diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
deleted file mode 100644
index a688be130711c..0000000000000
--- a/.devcontainer/Dockerfile
+++ /dev/null
@@ -1,3 +0,0 @@
-FROM julia:latest
-
-RUN apt-get update && apt-get install -y build-essential libatomic1 python gfortran perl wget m4 cmake pkg-config git
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a3747ca019694..455f8bea3e952 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,8 +1,12 @@
 {
-    "extensions": [
-      "julialang.language-julia",
-      "ms-vscode.cpptools"
-    ],
-  
-    "dockerFile": "Dockerfile"
+  "image": "docker.io/library/julia:latest",
+  "customizations": {
+      "vscode": {
+        "extensions": [
+          "julialang.language-julia",
+          "ms-vscode.cpptools"
+        ]
+     }
+  },
+  "onCreateCommand": "apt-get update && apt-get install -y build-essential libatomic1 python3 gfortran perl wget m4 cmake pkg-config git"
 }
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 3af8ba86153a1..bf6e580ace8cf 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -7,3 +7,7 @@ e66bfa5dd32f93e76068c00ad882c1fc839c5af8
 100a741e7ab38c91d48cc929bb001afc8e09261f
 # whitespace: replace tabs => space
 b03e8ab9c7bd3e001add519571858fa04d6a249b
+# whitespace: replace 2-space => 4-space for indentation
+f1b567507731129f90ca0dffc8fbc0ed98b6a15d
+# whitespace: replace multiple spaces after period with a single space
+f942c29bb0d02cc24f19712c642ac72ffc85a26b
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000..2ad7fdc1efa0a
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    open-pull-requests-limit: 100
+    labels:
+      - "dependencies"
+      - "github-actions"
+      - "domain:ci"
diff --git a/.github/workflows/LabelCheck.yml b/.github/workflows/LabelCheck.yml
index 194b0c92065c9..c966e478e3fe0 100644
--- a/.github/workflows/LabelCheck.yml
+++ b/.github/workflows/LabelCheck.yml
@@ -11,9 +11,9 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 2
     steps:
-    - uses: yogevbd/enforce-label-action@2.2.2
+    - uses: yogevbd/enforce-label-action@a3c219da6b8fa73f6ba62b68ff09c469b3a1c024 # 2.2.2
       with:
         # REQUIRED_LABELS_ANY: "bug,enhancement,skip-changelog"
         # REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['bug','enhancement','skip-changelog']"
-        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,DO NOT MERGE"
-        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `DO NOT MERGE` labels"
+        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,needs decision,DO NOT MERGE,status:DO NOT MERGE"
+        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `status:DO NOT MERGE` labels"
diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
new file mode 100644
index 0000000000000..6c9eeacc21800
--- /dev/null
+++ b/.github/workflows/Typos.yml
@@ -0,0 +1,70 @@
+name: Typos
+
+permissions: {}
+
+on: [pull_request]
+
+jobs:
+  typos-check:
+    name: Check for new typos
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          persist-credentials: false
+      - name: Check spelling with typos
+        #uses: crate-ci/typos@c7af4712eda24dd1ef54bd8212973888489eb0ce # v1.23.5
+        env:
+          GH_TOKEN: "${{ github.token }}"
+        run: |
+          git fetch --depth=1 origin ${{ github.base_ref }}
+          OLD_FILES=$(git diff-index --name-only --diff-filter=ad FETCH_HEAD)
+          NEW_FILES=$(git diff-index --name-only --diff-filter=d FETCH_HEAD)
+
+          # This is necessary because the typos command interprets the
+          # empty string as "check all files" rather than "check no files".
+          if [ -z "$NEW_FILES" ]; then
+            echo "All edited files were deleted. Skipping typos check."
+            exit 0
+          fi
+
+          mkdir -p "${{ runner.temp }}/typos"
+          RELEASE_ASSET_URL="$(
+            gh api /repos/crate-ci/typos/releases/latest \
+            --jq '."assets"[] | select(."name" | test("^typos-.+-x86_64-unknown-linux-musl\\.tar\\.gz$")) | ."browser_download_url"'
+          )"
+          wget --secure-protocol=TLSv1_3 --max-redirect=1 --retry-on-host-error --retry-connrefused --tries=3 \
+            --quiet --output-document=- "${RELEASE_ASSET_URL}" \
+            | tar -xz -C "${{ runner.temp }}/typos" ./typos
+          "${{ runner.temp }}/typos/typos" --version
+
+          echo -n $NEW_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/new_typos.jsonl || true
+          git checkout FETCH_HEAD -- $OLD_FILES
+          if [ -z "$OLD_FILES" ]; then
+            touch "${{ runner.temp }}/old_typos.jsonl" # No old files, so no old typos.
+          else
+            echo -n $OLD_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/old_typos.jsonl || true
+          fi
+
+
+          python -c '
+          import sys, json
+          old = set()
+          with open(sys.argv[1]) as old_file:
+            for line in old_file:
+              j = json.loads(line)
+              if j["type"] == "typo":
+                old.add(j["typo"])
+          clean = True
+          with open(sys.argv[2]) as new_file:
+            for line in new_file:
+              new = json.loads(line)
+              if new["type"] == "typo" and new["typo"] not in old:
+                if len(new["typo"]) > 6: # Short typos might be false positives. Long are probably real.
+                  clean = False
+                print("::warning file={},line={},col={}::perhaps \"{}\" should be \"{}\".".format(
+                  new["path"], new["line_num"], new["byte_offset"],
+                  new["typo"], " or ".join(new["corrections"])))
+          sys.exit(1 if not clean else 0)' "${{ runner.temp }}/old_typos.jsonl" "${{ runner.temp }}/new_typos.jsonl"
diff --git a/.github/workflows/Whitespace.yml b/.github/workflows/Whitespace.yml
new file mode 100644
index 0000000000000..37c9dbfd39a3c
--- /dev/null
+++ b/.github/workflows/Whitespace.yml
@@ -0,0 +1,26 @@
+name: Whitespace
+
+permissions: {}
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  whitespace:
+    name: Check whitespace
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          persist-credentials: false
+      - uses: julia-actions/setup-julia@9b79636afcfb07ab02c256cede01fe2db6ba808c # v2.6.0
+        with:
+          version: '1'
+      - name: Check whitespace
+        run: |
+          contrib/check-whitespace.jl
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
new file mode 100644
index 0000000000000..4c9debb246f3f
--- /dev/null
+++ b/.github/workflows/cffconvert.yml
@@ -0,0 +1,33 @@
+name: cffconvert
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+    paths:
+      - CITATION.cff
+  pull_request:
+    branches:
+      - 'master'
+      - 'release-*'
+    paths:
+      - CITATION.cff
+
+permissions:
+  contents: read
+
+jobs:
+  validate:
+    name: "validate"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out a copy of the repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          persist-credentials: false
+
+      - name: Check whether the citation metadata from CITATION.cff is valid
+        uses: citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084 # 2.0.0
+        with:
+          args: "--validate"
diff --git a/.gitignore b/.gitignore
index f0072fec9c91e..80bdd67619454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,11 +34,15 @@
 .DS_Store
 .idea/*
 .vscode/*
+.zed/*
 *.heapsnapshot
 .cache
 # Buildkite: Ignore the entire .buildkite directory
 /.buildkite
 
+# Builtkite: json test data
+/test/results.json
+
 # Buildkite: Ignore the unencrypted repo_key
 repo_key
 
diff --git a/.mailmap b/.mailmap
index e91501651d065..e278160d7381b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -284,9 +284,9 @@ Daniel Karrasch <Daniel.Karrasch@gmx.de> <daniel.karrasch@posteo.de>
 Roger Luo <rogerluo.rl18@gmail.com> <rogerluo.rl18@gmail.com>
 Roger Luo <rogerluo.rl18@gmail.com> <hiroger@qq.com>
 
-Frames Catherine White <me@oxinabox.net> <oxinabox@ucc.asn.au>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
-Frames Catherine White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
+Frames White <me@oxinabox.net> <oxinabox@ucc.asn.au>
+Frames White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
+Frames White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
 
 Claire Foster <aka.c42f@gmail.com> <chris42f@gmail.com>
 
@@ -295,3 +295,16 @@ Jishnu Bhattacharya <jishnub.github@gmail.com> <jishnub@users.noreply.github.com
 
 Shuhei Kadowaki <aviatesk@gmail.com> <aviatesk@gmail.com>
 Shuhei Kadowaki <aviatesk@gmail.com> <40514306+aviatesk@users.noreply.github.com>
+
+inky <git@wo-class.cn>
+inky <git@wo-class.cn> <inkydragon@users.noreply.github.com>
+
+Lilith Orion Hafner <lilithhafner@gmail.com> <Lilith.Hafner@gmail.com>
+Lilith Orion Hafner <lilithhafner@gmail.com> <60898866+LilithHafner@users.noreply.github.com>
+
+Timothy <git@tecosaur.net>
+
+Bhuminjay Soni <soni5happy@gmail.com>
+Bhuminjay Soni <soni5happy@gmail.com> <76656712+11happy@users.noreply.github.com>
+
+Florian Atteneder <florian.atteneder@gmail.com>
diff --git a/CITATION.cff b/CITATION.cff
index c88727bcfa311..878ab94a4d86a 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,3 +1,4 @@
+# Official format description at https://citation-file-format.github.io
 cff-version: 1.2.0
 message: "Cite this paper whenever you use Julia"
 authors:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0131dcbc4a278..9a3fe2cd441b3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -164,7 +164,7 @@ To run doctests you need to run `make -C doc doctest=true` from the root directo
 
 #### News-worthy changes
 
-For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number.  We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
+For new functionality and other substantial changes, add a brief summary to `NEWS.md`. The news item should cross reference the pull request (PR) parenthetically, in the form `([#pr])`. To add the PR reference number, first create the PR, then push an additional commit updating `NEWS.md` with the PR reference number. We periodically run `./julia doc/NEWS-update.jl` from the julia directory to update the cross-reference links, but this should not be done in a typical PR in order to avoid conflicting commits.
 
 #### Annotations for new features, deprecations and behavior changes
 
@@ -278,8 +278,8 @@ Be sure to change the UUID value back before making the pull request.
 
 The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows:
 
-1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release
-   branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well.
+1. Create a new branch (e.g. `backports-release-1.10`) against the relevant minor release
+   branch (e.g. `release-1.10`). Usually a corresponding pull request is created as well.
 
 2. Add commits, nominally from `master` (hence "backports"), to that branch.
    See below for more information on this process.
@@ -291,8 +291,8 @@ The process of [creating a patch release](https://docs.julialang.org/en/v1/devdo
    the pull request associated with the backports branch. Fix any issues.
 
 4. Once all test and benchmark reports look good, merge the backports branch into
-   the corresponding release branch (e.g. merge `backports-release-1.6` into
-   `release-1.6`).
+   the corresponding release branch (e.g. merge `backports-release-1.10` into
+   `release-1.10`).
 
 5. Open a pull request that bumps the version of the relevant minor release to the
    next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718).
@@ -347,7 +347,7 @@ please remove the `backport-X.Y` tag from the originating pull request for the c
 ### Git Recommendations For Pull Requests
 
  - Avoid working from the `master` branch of your fork, creating a new branch will make it easier if Julia's `master` changes and you need to update your pull request.
- - Try to [squash](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code so your pull request is easier to review. A reasonable number of separate well-factored commits is fine, especially for larger changes.
+ - Try to [squash](https://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code so your pull request is easier to review. A reasonable number of separate well-factored commits is fine, especially for larger changes.
  - If any conflicts arise due to changes in Julia's `master`, prefer updating your pull request branch with `git rebase` versus `git merge` or `git pull`, since the latter will introduce merge commits that clutter the git history with noise that makes your changes more difficult to review.
  - Descriptive commit messages are good.
  - Using `git add -p` or `git add -i` can be useful to avoid accidentally committing unrelated changes.
diff --git a/Compiler/LICENSE.md b/Compiler/LICENSE.md
new file mode 100644
index 0000000000000..028a39923ef04
--- /dev/null
+++ b/Compiler/LICENSE.md
@@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2009-2024: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+end of terms and conditions
+
+Please see [THIRDPARTY.md](../THIRDPARTY.md) for license information for other software used in this project.
diff --git a/Compiler/Project.toml b/Compiler/Project.toml
new file mode 100644
index 0000000000000..994634f5a8b78
--- /dev/null
+++ b/Compiler/Project.toml
@@ -0,0 +1,15 @@
+name = "Compiler"
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.0.3"
+
+[compat]
+julia = "1.10"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[targets]
+test = ["Test", "InteractiveUtils", "Random", "Libdl"]
diff --git a/Compiler/extras/CompilerDevTools/Manifest.toml b/Compiler/extras/CompilerDevTools/Manifest.toml
new file mode 100644
index 0000000000000..bcc78f1ded34a
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/Manifest.toml
@@ -0,0 +1,15 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "84f495a1bf065c95f732a48af36dd0cd2cefb9d5"
+
+[[deps.Compiler]]
+path = "../.."
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.0.2"
+
+[[deps.CompilerDevTools]]
+path = "."
+uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f"
+version = "0.0.0"
diff --git a/Compiler/extras/CompilerDevTools/Project.toml b/Compiler/extras/CompilerDevTools/Project.toml
new file mode 100644
index 0000000000000..a2749a9a56a84
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/Project.toml
@@ -0,0 +1,5 @@
+name = "CompilerDevTools"
+uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f"
+
+[deps]
+Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
new file mode 100644
index 0000000000000..5d0df5ccaa4e4
--- /dev/null
+++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
@@ -0,0 +1,48 @@
+module CompilerDevTools
+
+using Compiler
+using Core.IR
+
+struct SplitCacheOwner; end
+struct SplitCacheInterp <: Compiler.AbstractInterpreter
+    world::UInt
+    inf_params::Compiler.InferenceParams
+    opt_params::Compiler.OptimizationParams
+    inf_cache::Vector{Compiler.InferenceResult}
+    function SplitCacheInterp(;
+        world::UInt = Base.get_world_counter(),
+        inf_params::Compiler.InferenceParams = Compiler.InferenceParams(),
+        opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(),
+        inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[])
+        new(world, inf_params, opt_params, inf_cache)
+    end
+end
+
+Compiler.InferenceParams(interp::SplitCacheInterp) = interp.inf_params
+Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params
+Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world
+Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache
+Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner()
+
+import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge
+@eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) =
+    Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode)
+
+@eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8)
+    # TODO: This isn't quite right, we're just sketching things for now
+    interp = SplitCacheInterp(; world)
+    Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false)
+end
+
+function with_new_compiler(f, args...)
+    mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance}
+    world = Base.tls_world_age()
+    new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf(
+        SplitCacheOwner(), mi, Compiler.SOURCE_MODE_ABI
+    )
+    invoke(f, new_compiler_ci, args...)
+end
+
+export with_new_compiler
+
+end
diff --git a/Compiler/src/Compiler.jl b/Compiler/src/Compiler.jl
new file mode 100644
index 0000000000000..ea939f86422c5
--- /dev/null
+++ b/Compiler/src/Compiler.jl
@@ -0,0 +1,209 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+if isdefined(Base, :end_base_include) && !isdefined(Base, :Compiler)
+
+# Define a dummy `Compiler` module to make it installable even on Julia versions where
+# Compiler.jl is not available as a standard library.
+@eval module Compiler
+    function __init__()
+        println("""
+        The `Compiler` standard library is not available for this version of Julia.
+        Use Julia version `v"1.12.0-DEV.1581"` or later.
+        """)
+    end
+end
+
+# When generating an incremental precompile file, we first check whether we
+# already have a copy of this *exact* code in the system image. If so, we
+# simply generates a pkgimage that has the dependency edges we recorded in
+# the system image and simply returns that copy of the compiler. If not,
+# we proceed to load/precompile this as an ordinary package.
+elseif (isdefined(Base, :generating_output) && Base.generating_output(true) &&
+        Base.samefile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, Base._compiler_require_dependencies[1][2]), @eval @__FILE__) &&
+        !Base.any_includes_stale(
+            map(Base.compiler_chi, Base._compiler_require_dependencies),
+            "sysimg", nothing))
+
+    Base.prepare_compiler_stub_image!()
+    append!(Base._require_dependencies, map(Base.expand_compiler_path, Base._compiler_require_dependencies))
+    # There isn't much point in precompiling native code - downstream users will
+    # specialize their own versions of the compiler code and we don't activate
+    # the compiler by default anyway, so let's save ourselves some disk space.
+    ccall(:jl_suppress_precompile, Cvoid, (Cint,), 1)
+
+else
+
+@eval baremodule Compiler
+
+# Needs to match UUID defined in Project.toml
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Compiler,
+    (0x807dbc54_b67e_4c79, 0x8afb_eafe4df6f2e1))
+
+using Core.Intrinsics, Core.IR
+
+using Core: ABIOverride, Builtin, CodeInstance, IntrinsicFunction, MethodInstance, MethodMatch,
+    MethodTable, PartialOpaque, SimpleVector, TypeofVararg,
+    _apply_iterate, apply_type, compilerbarrier, donotdelete, memoryref_isassigned,
+    memoryrefget, memoryrefnew, memoryrefoffset, memoryrefset!, print, println, show, svec,
+    typename, unsafe_write, write
+
+using Base
+using Base: @_foldable_meta, @_gc_preserve_begin, @_gc_preserve_end, @nospecializeinfer,
+    BINDING_KIND_GLOBAL, BINDING_KIND_UNDEF_CONST, Base, BitVector, Bottom, Callable, DataTypeFieldDesc,
+    EffectsOverride, Filter, Generator, IteratorSize, JLOptions, NUM_EFFECTS_OVERRIDES,
+    OneTo, Ordering, RefValue, SizeUnknown, _NAMEDTUPLE_NAME,
+    _array_for, _bits_findnext, _methods_by_ftype, _uniontypes, all, allocatedinline, any,
+    argument_datatype, binding_kind, cconvert, copy_exprargs, datatype_arrayelem,
+    datatype_fieldcount, datatype_fieldtypes, datatype_layoutsize, datatype_nfields,
+    datatype_pointerfree, decode_effects_override, diff_names, fieldindex,
+    generating_output, get_nospecializeinfer_sig, get_world_counter, has_free_typevars,
+    hasgenerator, hasintersect, indexed_iterate, isType, is_file_tracked, is_function_def,
+    is_meta_expr, is_meta_expr_head, is_nospecialized, is_nospecializeinfer, is_defined_const_binding,
+    is_some_const_binding, is_some_guard, is_some_imported, is_valid_intrinsic_elptr,
+    isbitsunion, isconcretedispatch, isdispatchelem, isexpr, isfieldatomic, isidentityfree,
+    iskindtype, ismutabletypename, ismutationfree, issingletontype, isvarargtype, isvatuple,
+    kwerr, lookup_binding_partition, may_invoke_generator, methods, midpoint, moduleroot,
+    partition_restriction, quoted, rename_unionall, rewrap_unionall, specialize_method,
+    structdiff, tls_world_age, unconstrain_vararg_length, unionlen, uniontype_layout,
+    uniontypes, unsafe_convert, unwrap_unionall, unwrapva, vect, widen_diagonal,
+    _uncompressed_ir
+using Base.Order
+
+import Base: ==, _topmod, append!, convert, copy, copy!, findall, first, get, get!,
+    getindex, haskey, in, isempty, isready, iterate, iterate, last, length, max_world,
+    min_world, popfirst!, push!, resize!, setindex!, size
+
+const getproperty = Core.getfield
+const setproperty! = Core.setfield!
+const swapproperty! = Core.swapfield!
+const modifyproperty! = Core.modifyfield!
+const replaceproperty! = Core.replacefield!
+const _DOCS_ALIASING_WARNING = ""
+
+ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
+
+eval(x) = Core.eval(Compiler, x)
+eval(m, x) = Core.eval(m, x)
+
+function include(x::String)
+    if !isdefined(Base, :end_base_include)
+        # During bootstrap, all includes are relative to `base/`
+        x = Base.strcat(Base.strcat(Base.DATAROOT, "julia/Compiler/src/"), x)
+    end
+    Base.include(Compiler, x)
+end
+
+function include(mod::Module, x::String)
+    if !isdefined(Base, :end_base_include)
+        x = Base.strcat(Base.strcat(Base.DATAROOT, "julia/Compiler/src/"), x)
+    end
+    Base.include(mod, x)
+end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+function return_type end
+function is_return_type(Core.@nospecialize(f))
+    f === return_type && return true
+    if isdefined(Base, :Compiler) && Compiler !== Base.Compiler
+        # Also model the return_type function of the builtin Compiler the same.
+        # This isn't completely sound. We don't actually have any idea what the
+        # base compiler will do at runtime. In the fullness of time, we should
+        # re-work the semantics to make the cache primary and thus avoid having
+        # to reason about what the compiler may do at runtime, but we're not
+        # fully there yet.
+        return f === Base.Compiler.return_type
+    end
+    return false
+end
+
+include("sort.jl")
+
+# We don't include some.jl, but this definition is still useful.
+something(x::Nothing, y...) = something(y...)
+something(x::Any, y...) = x
+
+############
+# compiler #
+############
+
+baremodule BuildSettings
+using Core: ARGS, include
+using ..Compiler: >, getindex, length
+
+global MAX_METHODS::Int = 3
+
+if length(ARGS) > 2 && ARGS[2] === "--buildsettings"
+    include(BuildSettings, ARGS[3])
+end
+end
+
+if !isdefined(Base, :end_base_include)
+    macro show(ex...)
+        blk = Expr(:block)
+        for s in ex
+            push!(blk.args, :(println(stdout, $(QuoteNode(s)), " = ",
+                                              begin local value = $(esc(s)) end)))
+        end
+        isempty(ex) || push!(blk.args, :value)
+        blk
+    end
+else
+    using Base: @show
+end
+
+include("cicache.jl")
+include("methodtable.jl")
+include("effects.jl")
+include("types.jl")
+include("utilities.jl")
+include("validation.jl")
+
+include("ssair/basicblock.jl")
+include("ssair/domtree.jl")
+include("ssair/ir.jl")
+include("ssair/tarjan.jl")
+
+include("abstractlattice.jl")
+include("stmtinfo.jl")
+include("inferenceresult.jl")
+include("inferencestate.jl")
+
+include("typeutils.jl")
+include("typelimits.jl")
+include("typelattice.jl")
+include("tfuncs.jl")
+
+include("abstractinterpretation.jl")
+include("typeinfer.jl")
+include("optimize.jl")
+
+include("bootstrap.jl")
+include("reflection_interface.jl")
+include("opaque_closure.jl")
+
+macro __SOURCE_FILE__()
+    __source__.file === nothing && return nothing
+    return QuoteNode(__source__.file::Symbol)
+end
+
+module IRShow end
+function load_irshow!()
+    if isdefined(Base, :end_base_include)
+        # This code path is exclusively for Revise, which may want to re-run this
+        # after bootstrap.
+        include(IRShow, Base.joinpath(Base.dirname(Base.String(@__SOURCE_FILE__)), "ssair/show.jl"))
+    else
+        include(IRShow, "ssair/show.jl")
+    end
+end
+if !isdefined(Base, :end_base_include)
+    # During bootstrap, skip including this file and defer it to base/show.jl to include later
+else
+    # When this module is loaded as the standard library, include this file as usual
+    load_irshow!()
+end
+
+end # baremodule Compiler
+
+end # if isdefined(Base, :generating_output) && ...
diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl
new file mode 100644
index 0000000000000..59510dbfbb65a
--- /dev/null
+++ b/Compiler/src/abstractinterpretation.jl
@@ -0,0 +1,4312 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+struct SlotRefinement
+    slot::SlotNumber
+    typ::Any
+    SlotRefinement(slot::SlotNumber, @nospecialize(typ)) = new(slot, typ)
+end
+
+# See if the inference result of the current statement's result value might affect
+# the final answer for the method (aside from optimization potential and exceptions).
+# To do that, we need to check both for slot assignment and SSA usage.
+call_result_unused(sv::InferenceState, currpc::Int) =
+    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
+call_result_unused(si::StmtInfo) = !si.used
+
+is_const_bool_or_bottom(@nospecialize(b)) = (isa(b, Const) && isa(b.val, Bool)) || b == Bottom
+function can_propagate_conditional(@nospecialize(rt), argtypes::Vector{Any})
+    isa(rt, InterConditional) || return false
+    if rt.slot > length(argtypes)
+        # In the vararg tail - can't be conditional
+        @assert isvarargtype(argtypes[end])
+        return false
+    end
+    return isa(argtypes[rt.slot], Conditional) &&
+        is_const_bool_or_bottom(rt.thentype) && is_const_bool_or_bottom(rt.thentype)
+end
+
+function propagate_conditional(rt::InterConditional, cond::Conditional)
+    new_thentype = rt.thentype === Const(false) ? cond.elsetype : cond.thentype
+    new_elsetype = rt.elsetype === Const(true) ? cond.thentype : cond.elsetype
+    if rt.thentype == Bottom
+        @assert rt.elsetype != Bottom
+        return Conditional(cond.slot, Bottom, new_elsetype)
+    elseif rt.elsetype == Bottom
+        @assert rt.thentype != Bottom
+        return Conditional(cond.slot, new_thentype, Bottom)
+    end
+    return Conditional(cond.slot, new_thentype, new_elsetype)
+end
+
+mutable struct SafeBox{T}
+    x::T
+    SafeBox{T}(x::T) where T = new{T}(x)
+    SafeBox(@nospecialize x) = new{Any}(x)
+end
+getindex(box::SafeBox) = box.x
+setindex!(box::SafeBox{T}, x::T) where T = setfield!(box, :x, x)
+
+struct FailedMethodMatch
+    reason::String
+end
+
+struct MethodMatchTarget
+    match::MethodMatch
+    edges::Vector{Union{Nothing,CodeInstance}}
+    edge_idx::Int
+end
+
+struct MethodMatches
+    applicable::Vector{MethodMatchTarget}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+end
+any_ambig(result::MethodLookupResult) = result.ambig
+any_ambig(info::MethodMatchInfo) = any_ambig(info.results)
+any_ambig(m::MethodMatches) = any_ambig(m.info)
+fully_covering(info::MethodMatchInfo) = info.fullmatch
+fully_covering(m::MethodMatches) = fully_covering(m.info)
+
+struct UnionSplitMethodMatches
+    applicable::Vector{MethodMatchTarget}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+end
+any_ambig(info::UnionSplitInfo) = any(any_ambig, info.split)
+any_ambig(m::UnionSplitMethodMatches) = any_ambig(m.info)
+fully_covering(info::UnionSplitInfo) = all(fully_covering, info.split)
+fully_covering(m::UnionSplitMethodMatches) = fully_covering(m.info)
+
+nmatches(info::MethodMatchInfo) = length(info.results)
+function nmatches(info::UnionSplitInfo)
+    n = 0
+    for mminfo in info.split
+        n += nmatches(mminfo)
+    end
+    return n
+end
+
+# intermediate state for computing gfresult
+mutable struct CallInferenceState
+    inferidx::Int
+    rettype
+    exctype
+    all_effects::Effects
+    const_results::Union{Nothing,Vector{Union{Nothing,ConstResult}}} # keeps the results of inference with the extended lattice elements (if happened)
+    conditionals::Union{Nothing,Tuple{Vector{Any},Vector{Any}}} # keeps refinement information of call argument types when the return type is boolean
+    slotrefinements::Union{Nothing,Vector{Any}} # keeps refinement information on slot types obtained from call signature
+
+    # some additional fields for untyped objects (just to avoid capturing)
+    func
+    matches::Union{MethodMatches,UnionSplitMethodMatches}
+    function CallInferenceState(@nospecialize(func), matches::Union{MethodMatches,UnionSplitMethodMatches})
+        return new(#=inferidx=#1, #=rettype=#Bottom, #=exctype=#Bottom, #=all_effects=#EFFECTS_TOTAL,
+            #=const_results=#nothing, #=conditionals=#nothing, #=slotrefinements=#nothing,
+            func, matches)
+    end
+end
+
+function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(func),
+                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
+                                  sv::AbsIntState, max_methods::Int)
+    𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
+    ⊑ₚ, ⋤ₚ, ⊔ₚ, ⊔ᵢ  = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ), join(𝕃ᵢ)
+    argtypes = arginfo.argtypes
+    if si.saw_latestworld
+        add_remark!(interp, sv, "Cannot infer call, because we previously saw :latestworld")
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    matches = find_method_matches(interp, argtypes, atype; max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+
+    (; valid_worlds, applicable) = matches
+    update_valid_age!(sv, valid_worlds) # need to record the negative world now, since even if we don't generate any useful information, inlining might want to add an invoke edge and it won't have this information anymore
+    if bail_out_toplevel_call(interp, sv)
+        local napplicable = length(applicable)
+        for i = 1:napplicable
+            local sig = applicable[i].match.spec_types
+            if !isdispatchtuple(sig)
+                # only infer fully concrete call sites in top-level expressions (ignoring even isa_compileable_sig matches)
+                add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
+                return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+            end
+        end
+    end
+
+    # final result
+    gfresult = Future{CallMeta}()
+    state = CallInferenceState(func, matches)
+
+    # split the for loop off into a function, so that we can pause and restart it at will
+    function infercalls(interp, sv)
+        local napplicable = length(applicable)
+        local multiple_matches = napplicable > 1
+        while state.inferidx <= napplicable
+            (; match, edges, edge_idx) = applicable[state.inferidx]
+            local method = match.method
+            local sig = match.spec_types
+            if bail_out_call(interp, InferenceLoopState(state.rettype, state.all_effects), sv)
+                add_remark!(interp, sv, "Call inference reached maximally imprecise information: bailing on doing more abstract inference.")
+                break
+            end
+            # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level,
+            # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck)
+            # sigtuple = unwrap_unionall(sig)::DataType
+            # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
+            #if splitunions
+            #    splitsigs = switchtupleunion(sig)
+            #    for sig_n in splitsigs
+            #        result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future
+            #        handle1(...)
+            #    end
+            #end
+            mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future
+            function handle1(interp, sv)
+                local (; rt, exct, effects, edge, volatile_inf_result) = mresult[]
+                this_conditional = ignorelimited(rt)
+                this_rt = widenwrappedconditional(rt)
+                this_exct = exct
+                # try constant propagation with argtypes for this match
+                # this is in preparation for inlining, or improving the return result
+                local matches = state.matches
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[state.inferidx]
+                this_arginfo = ArgInfo(arginfo.fargs, this_argtypes)
+                const_call_result = abstract_call_method_with_const_args(interp,
+                    mresult[], state.func, this_arginfo, si, match, sv)
+                const_result = volatile_inf_result
+                if const_call_result !== nothing
+                    this_const_conditional = ignorelimited(const_call_result.rt)
+                    this_const_rt = widenwrappedconditional(const_call_result.rt)
+                    const_edge = nothing
+                    if this_const_rt ⊑ₚ this_rt
+                        # As long as the const-prop result we have is not *worse* than
+                        # what we found out on types, we'd like to use it. Even if the
+                        # end result is exactly equivalent, it is likely that the IR
+                        # we produced while constproping is better than that with
+                        # generic types.
+                        # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                        # e.g. in cases when there are cycles but cached result is still accurate
+                        this_conditional = this_const_conditional
+                        this_rt = this_const_rt
+                        (; effects, const_result, const_edge) = const_call_result
+                    elseif is_better_effects(const_call_result.effects, effects)
+                        (; effects, const_result, const_edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+                    end
+                    # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                    # because consistent-cy does not apply to exceptions.
+                    if const_call_result.exct ⋤ this_exct
+                        this_exct = const_call_result.exct
+                        (; const_result, const_edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
+                    end
+                    if const_edge !== nothing
+                        edge = const_edge
+                    end
+                end
+
+                state.all_effects = merge_effects(state.all_effects, effects)
+                if const_result !== nothing
+                    local const_results = state.const_results
+                    if const_results === nothing
+                        const_results = state.const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
+                    end
+                    const_results[state.inferidx] = const_result
+                end
+                @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
+                if can_propagate_conditional(this_conditional, argtypes)
+                    # The only case where we need to keep this in rt is where
+                    # we can directly propagate the conditional to a slot argument
+                    # that is not one of our arguments, otherwise we keep all the
+                    # relevant information in `conditionals` below.
+                    this_rt = this_conditional
+                end
+
+                state.rettype = state.rettype ⊔ₚ this_rt
+                state.exctype = state.exctype ⊔ₚ this_exct
+                if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, state.rettype) && arginfo.fargs !== nothing
+                    local conditionals = state.conditionals
+                    if conditionals === nothing
+                        conditionals = state.conditionals = (
+                            Any[Bottom for _ in 1:length(argtypes)],
+                            Any[Bottom for _ in 1:length(argtypes)])
+                    end
+                    for i = 1:length(argtypes)
+                        cnd = conditional_argtype(𝕃ᵢ, this_conditional, match.spec_types, argtypes, i)
+                        conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                        conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+                    end
+                end
+                edges[edge_idx] = edge
+
+                state.inferidx += 1
+                return true
+            end # function handle1
+            if isready(mresult) && handle1(interp, sv)
+                continue
+            else
+                push!(sv.tasks, handle1)
+                return false
+            end
+        end # while
+
+        seenall = state.inferidx > napplicable
+        retinfo = state.matches.info
+        if seenall # small optimization to skip some work that is already implied
+            local const_results = state.const_results
+            if const_results !== nothing
+                @assert napplicable == nmatches(retinfo) == length(const_results)
+                retinfo = ConstCallInfo(retinfo, const_results)
+            end
+            if !fully_covering(state.matches) || any_ambig(state.matches)
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                state.all_effects = Effects(state.all_effects; nothrow=false)
+                state.exctype = state.exctype ⊔ₚ MethodError
+            end
+            local fargs = arginfo.fargs
+            if sv isa InferenceState && fargs !== nothing
+                state.slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
+            end
+            state.rettype = from_interprocedural!(interp, state.rettype, sv, arginfo, state.conditionals)
+            if call_result_unused(si) && !(state.rettype === Bottom)
+                add_remark!(interp, sv, "Call result type was widened because the return value is unused")
+                # We're mainly only here because the optimizer might want this code,
+                # but we ourselves locally don't typically care about it locally
+                # (beyond checking if it always throws).
+                # So avoid adding an edge, since we don't want to bother attempting
+                # to improve our result even if it does change (to always throw),
+                # and avoid keeping track of a more complex result type.
+                state.rettype = Any
+            end
+            # if from_interprocedural added any pclimitations to the set inherited from the arguments,
+            # some of those may be part of our cycles, so those can be deleted now
+            # TODO: and those might need to be deleted later too if the cycle grows to include them?
+            if isa(sv, InferenceState)
+                # TODO (#48913) implement a proper recursion handling for irinterp:
+                # This works just because currently the `:terminate` condition guarantees that
+                # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+                # We should revisit this once we have a better story for handling cycles in irinterp.
+                if !isempty(sv.pclimitations) # remove self, if present
+                    delete!(sv.pclimitations, sv)
+                    for caller in callers_in_cycle(sv)
+                        delete!(sv.pclimitations, caller)
+                    end
+                end
+            end
+        else
+            # there is unanalyzed candidate, widen type and effects to the top
+            state.rettype = state.exctype = Any
+            state.all_effects = Effects()
+            state.const_results = nothing
+        end
+
+        # Also considering inferring the compilation signature for this method, so
+        # it is available to the compiler in case it ends up needing it for the invoke.
+        if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+            (!is_removable_if_unused(state.all_effects) || !call_result_unused(si)))
+            inferidx = SafeBox{Int}(1)
+            function infercalls2(interp, sv)
+                local napplicable = length(applicable)
+                local multiple_matches = napplicable > 1
+                while inferidx[] <= napplicable
+                    (; match, edges, edge_idx) = applicable[inferidx[]]
+                    inferidx[] += 1
+                    local method = match.method
+                    local sig = match.spec_types
+                    mi = specialize_method(match; preexisting=true)
+                    if mi === nothing || !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+                        csig = get_compileable_sig(method, sig, match.sparams)
+                        if csig !== nothing && (!seenall || csig !== sig) # corresponds to whether the first look already looked at this, so repeating abstract_call_method is not useful
+                            #println(sig, " changed to ", csig, " for ", method)
+                            sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), csig, method.sig)::SimpleVector
+                            sparams = sp_[2]::SimpleVector
+                            mresult = abstract_call_method(interp, method, csig, sparams, multiple_matches, StmtInfo(false, false), sv)::Future
+                            isready(mresult) || return false # wait for mresult Future to resolve off the callstack before continuing
+                        end
+                    end
+                end
+                return true
+            end
+            # start making progress on the first call
+            infercalls2(interp, sv) || push!(sv.tasks, infercalls2)
+        end
+
+        gfresult[] = CallMeta(state.rettype, state.exctype, state.all_effects, retinfo, state.slotrefinements)
+        return true
+    end # function infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return gfresult
+end
+
+function find_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype);
+                             max_union_splitting::Int = InferenceParams(interp).max_union_splitting,
+                             max_methods::Int = InferenceParams(interp).max_methods)
+    if is_union_split_eligible(typeinf_lattice(interp), argtypes, max_union_splitting)
+        return find_union_split_method_matches(interp, argtypes, atype, max_methods)
+    end
+    return find_simple_method_matches(interp, atype, max_methods)
+end
+
+# NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+is_union_split_eligible(𝕃::AbstractLattice, argtypes::Vector{Any}, max_union_splitting::Int) =
+    1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
+
+function find_union_split_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any},
+                                         @nospecialize(atype), max_methods::Int)
+    split_argtypes = switchtupleunion(typeinf_lattice(interp), argtypes)
+    infos = MethodMatchInfo[]
+    applicable = MethodMatchTarget[]
+    applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+    valid_worlds = WorldRange()
+    for i in 1:length(split_argtypes)
+        arg_n = split_argtypes[i]::Vector{Any}
+        sig_n = argtypes_to_type(arg_n)
+        mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+        mt === nothing && return FailedMethodMatch("Could not identify method table for call")
+        mt = mt::MethodTable
+        thismatches = findall(sig_n, method_table(interp); limit = max_methods)
+        if thismatches === nothing
+            return FailedMethodMatch("For one of the union split cases, too many methods matched")
+        end
+        valid_worlds = intersect(valid_worlds, thismatches.valid_worlds)
+        thisfullmatch = any(match::MethodMatch->match.fully_covers, thismatches)
+        thisinfo = MethodMatchInfo(thismatches, mt, sig_n, thisfullmatch)
+        push!(infos, thisinfo)
+        for idx = 1:length(thismatches)
+            push!(applicable, MethodMatchTarget(thismatches[idx], thisinfo.edges, idx))
+            push!(applicable_argtypes, arg_n)
+        end
+    end
+    info = UnionSplitInfo(infos)
+    return UnionSplitMethodMatches(
+        applicable, applicable_argtypes, info, valid_worlds)
+end
+
+function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(atype), max_methods::Int)
+    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+    if mt === nothing
+        return FailedMethodMatch("Could not identify method table for call")
+    end
+    mt = mt::MethodTable
+    matches = findall(atype, method_table(interp); limit = max_methods)
+    if matches === nothing
+        # this means too many methods matched
+        # (assume this will always be true, so we don't compute / update valid age in this case)
+        return FailedMethodMatch("Too many methods matched")
+    end
+    fullmatch = any(match::MethodMatch->match.fully_covers, matches)
+    info = MethodMatchInfo(matches, mt, atype, fullmatch)
+    applicable = MethodMatchTarget[MethodMatchTarget(matches[idx], info.edges, idx) for idx = 1:length(matches)]
+    return MethodMatches(applicable, info, matches.valid_worlds)
+end
+
+"""
+    from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState,
+                          arginfo::ArgInfo, maybecondinfo) -> newrt
+
+Converts inter-procedural return type `rt` into a local lattice element `newrt`,
+that is appropriate in the context of current local analysis frame `sv`, especially:
+- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
+- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
+  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
+  that holds a type constraint information about a variable in `sv`
+
+This function _should_ be used wherever we propagate results returned from
+`abstract_call_method` or `abstract_call_method_with_const_args`.
+
+When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
+In such cases `maybecondinfo` should be either of:
+- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
+- method call signature tuple type
+When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
+`tmerge`ing argument signature type of each method call.
+"""
+function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    rt = collect_limitations!(rt, sv)
+    if isa(rt, InterMustAlias)
+        rt = from_intermustalias(typeinf_lattice(interp), rt, arginfo, sv)
+    elseif is_lattice_bool(ipo_lattice(interp), rt)
+        if maybecondinfo === nothing
+            rt = widenconditional(rt)
+        else
+            rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo)
+        end
+    end
+    @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context"
+    return rt
+end
+
+function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
+end
+
+function from_intermustalias(𝕃ᵢ::AbstractLattice, rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
+    fargs = arginfo.fargs
+    if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
+        arg = ssa_def_slot(fargs[rt.slot], sv)
+        if isa(arg, SlotNumber)
+            argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
+            ⊑ = partialorder(𝕃ᵢ)
+            if rt.vartyp ⊑ argtyp
+                return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
+            else
+                # TODO optimize this case?
+            end
+        end
+    end
+    return widenmustalias(rt)
+end
+
+function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
+    (; fargs, argtypes) = arginfo
+    fargs === nothing && return widenconditional(rt)
+    if can_propagate_conditional(rt, argtypes)
+        return propagate_conditional(rt, argtypes[rt.slot]::Conditional)
+    end
+    slot = 0
+    alias = nothing
+    thentype = elsetype = Any
+    condval = maybe_extract_const_bool(rt)
+    ⊑, ⋤, ⊓ = partialorder(𝕃ᵢ), strictneqpartialorder(𝕃ᵢ), meet(𝕃ᵢ)
+    for i in 1:length(fargs)
+        # find the first argument which supports refinement,
+        # and intersect all equivalent arguments with it
+        argtyp = argtypes[i]
+        if alias === nothing
+            arg = ssa_def_slot(fargs[i], sv)
+            if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type
+                old = argtyp
+                id = slot_id(arg)
+            elseif argtyp isa MustAlias
+                old = argtyp.fldtyp
+                id = argtyp.slot
+            else
+                continue # unlikely to refine
+            end
+        elseif argtyp isa MustAlias && issubalias(argtyp, alias)
+            arg = nothing
+            old = alias.fldtyp
+            id = alias.slot
+        else
+            continue
+        end
+        if slot == 0 || id == slot
+            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
+                # if we have already computed argument refinement information, apply that now to get the result
+                new_thentype = maybecondinfo[1][i]
+                new_elsetype = maybecondinfo[2][i]
+            else
+                # otherwise compute it on the fly
+                cnd = conditional_argtype(𝕃ᵢ, rt, maybecondinfo, argtypes, i)
+                new_thentype = cnd.thentype
+                new_elsetype = cnd.elsetype
+            end
+            if condval === false
+                thentype = Bottom
+            elseif new_thentype ⊑ thentype
+                thentype = new_thentype
+            else
+                thentype = thentype ⊓ widenconst(new_thentype)
+            end
+            if condval === true
+                elsetype = Bottom
+            elseif new_elsetype ⊑ elsetype
+                elsetype = new_elsetype
+            else
+                elsetype = elsetype ⊓ widenconst(new_elsetype)
+            end
+            if (slot > 0 || condval !== false) && thentype ⋤ old
+                slot = id
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
+                slot = id
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
+            else # reset: no new useful information for this slot
+                slot = 0
+                alias = nothing
+                thentype = elsetype = Any
+            end
+        end
+    end
+    if thentype === Bottom && elsetype === Bottom
+        return Bottom # accidentally proved this call to be dead / throw !
+    elseif slot > 0
+        if alias !== nothing
+            return form_mustalias_conditional(alias, thentype, elsetype)
+        end
+        return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot
+    end
+    return widenconditional(rt)
+end
+
+function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospecialize(sig),
+                             argtypes::Vector{Any}, i::Int)
+    if isa(rt, InterConditional) && rt.slot == i
+        return rt
+    else
+        argt = widenslotwrapper(argtypes[i])
+        if isvarargtype(argt)
+            @assert fieldcount(sig) == i
+            argt = unwrapva(argt)
+        end
+        thentype = elsetype = tmeet(𝕃ᵢ, argt, fieldtype(sig, i))
+        condval = maybe_extract_const_bool(rt)
+        condval === true && (elsetype = Bottom)
+        condval === false && (thentype = Bottom)
+        return InterConditional(i, thentype, elsetype)
+    end
+end
+
+function collect_slot_refinements(𝕃ᵢ::AbstractLattice, applicable::Vector{MethodMatchTarget},
+    argtypes::Vector{Any}, fargs::Vector{Any}, sv::InferenceState)
+    ⊏, ⊔ = strictpartialorder(𝕃ᵢ), join(𝕃ᵢ)
+    slotrefinements = nothing
+    for i = 1:length(fargs)
+        fargᵢ = fargs[i]
+        if fargᵢ isa SlotNumber
+            fidx = slot_id(fargᵢ)
+            argt = widenslotwrapper(argtypes[i])
+            if isvarargtype(argt)
+                argt = unwrapva(argt)
+            end
+            sigt = Bottom
+            for j = 1:length(applicable)
+                (;match) = applicable[j]
+                valid_as_lattice(match.spec_types, true) || continue
+                sigt = sigt ⊔ fieldtype(match.spec_types, i)
+            end
+            if sigt ⊏ argt # i.e. signature type is strictly more specific than the type of the argument slot
+                if slotrefinements === nothing
+                    slotrefinements = fill!(Vector{Any}(undef, length(sv.slottypes)), nothing)
+                end
+                slotrefinements[fidx] = sigt
+            end
+        end
+    end
+    return slotrefinements
+end
+
+const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
+const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
+const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
+
+function abstract_call_method(interp::AbstractInterpreter,
+                              method::Method, @nospecialize(sig), sparams::SimpleVector,
+                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
+    sigtuple = unwrap_unionall(sig)
+    sigtuple isa DataType ||
+        return Future(MethodCallResult(Any, Any, Effects(), nothing, false, false))
+    all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) ||
+        return Future(MethodCallResult(Union{}, Any, EFFECTS_THROWS, nothing, false, false)) # catch bad type intersections early
+
+    if is_nospecializeinfer(method)
+        sig = get_nospecializeinfer_sig(method, sig, sparams)
+    end
+
+    # Limit argument type tuple growth of functions:
+    # look through the parents list to see if there's a call to the same method
+    # and from the same method.
+    # Returns the topmost occurrence of that repeated edge.
+    edgecycle = edgelimited = false
+    topmost = nothing
+
+    for sv′ in AbsIntStackUnwind(sv)
+        infmi = frame_instance(sv′)
+        if method === infmi.def
+            if infmi.specTypes::Type == sig::Type
+                # avoid widening when detecting self-recursion
+                # TODO: merge call cycle and return right away
+                topmost = nothing
+                edgecycle = true
+                break
+            end
+            topmost === nothing || continue
+            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
+                topmost = sv′
+                edgecycle = true
+            end
+        end
+    end
+    washardlimit = hardlimit
+
+    if topmost !== nothing
+        msig = unwrap_unionall(method.sig)::DataType
+        spec_len = length(msig.parameters) + 1
+        mi = frame_instance(sv)
+
+        if isdefined(method, :recursion_relation)
+            # We don't require the recursion_relation to be transitive, so
+            # apply a hard limit
+            hardlimit = true
+        end
+
+        if method === mi.def
+            # Under direct self-recursion, permit much greater use of reducers.
+            # here we assume that complexity(specTypes) :>= complexity(sig)
+            comparison = mi.specTypes
+            l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            spec_len = max(spec_len, l_comparison)
+        elseif !hardlimit && isa(topmost, InferenceState)
+            # Without a hardlimit, permit use of reducers too.
+            comparison = frame_instance(topmost).specTypes
+            # n.b. currently don't allow vararg reducers
+            #l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            #spec_len = max(spec_len, l_comparison)
+        else
+            comparison = method.sig
+        end
+
+        # see if the type is actually too big (relative to the caller), and limit it if required
+        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
+
+        if newsig !== sig
+            # continue inference, but note that we've limited parameter complexity
+            # on this call (to ensure convergence), so that we don't cache this result
+            if call_result_unused(si)
+                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
+                # if we don't (typically) actually care about this result,
+                # don't bother trying to examine some complex abstract signature
+                # since it's very unlikely that we'll try to inline this,
+                # or want make an invoke edge to its calling convention return type.
+                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
+                return Future(MethodCallResult(Any, Any, Effects(), nothing, true, true))
+            end
+            add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition usually means this is unreachable here
+            # for irinterp because there are not unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if isa(sv, InferenceState)
+                # since the hardlimit is against the edge to the parent frame,
+                # we should try to poison the whole edge, not just the topmost frame
+                parentframe = frame_parent(topmost)
+                while !isa(parentframe, InferenceState)
+                    # attempt to find a parent frame that can handle this LimitedAccuracy result correctly
+                    # so we don't try to cache this incomplete intermediate result
+                    parentframe === nothing && break
+                    parentframe = frame_parent(parentframe)
+                end
+                if isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe)
+                elseif isa(topmost, InferenceState)
+                    poison_callstack!(sv, topmost)
+                end
+            end
+            # n.b. this heuristic depends on the non-local state, so we must record the limit later
+            sig = newsig
+            sparams = svec()
+            edgelimited = true
+        end
+    end
+
+    # if sig changed, may need to recompute the sparams environment
+    if isa(method.sig, UnionAll) && isempty(sparams)
+        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
+        #@assert recomputed[1] !== Bottom
+        # We must not use `sig` here, since that may re-introduce structural complexity that
+        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
+        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
+        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
+        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
+        # (which moves `S` back up to a lower comparison depth)
+        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
+        # and this would only cause more questions and more problems
+        # (the following is only an example, most of the statements are probable in the wrong order):
+        #     newsig = sig
+        #     seen = IdSet()
+        #     while !(newsig in seen)
+        #         push!(seen, newsig)
+        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
+        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, lsig)
+        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
+        #         newsig = recomputed[2]
+        #     end
+        #     sig = ?
+        sparams = recomputed[2]::SimpleVector
+    end
+
+    return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited)
+end
+
+function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
+                         method::Method, @nospecialize(sig), sparams::SimpleVector,
+                         hardlimit::Bool, sv::AbsIntState)
+    # The `method_for_inference_heuristics` will expand the given method's generator if
+    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
+    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
+    # access it directly instead (to avoid regeneration).
+    world = get_inference_world(interp)
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world)
+    inf_method2 = method_for_inference_limit_heuristics(frame)
+    if callee_method2 !== inf_method2 # limit only if user token match
+        return false
+    end
+    if isa(frame, InferenceState) && cache_owner(frame.interp) !== cache_owner(interp)
+        # Don't assume that frames in different interpreters are the same
+        return false
+    end
+    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
+        # if this is a soft limit,
+        # also inspect the parent of this edge,
+        # to see if they are the same Method as sv
+        # in which case we'll need to ensure it is convergent
+        # otherwise, we don't
+
+        # check in the cycle list first
+        # all items in here are considered mutual parents of all others
+        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
+            let parent = frame_parent(frame)
+                parent === nothing && return false
+                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
+                matches_sv(parent, sv) || return false
+            end
+        end
+
+        # If the method defines a recursion relation, give it a chance
+        # to tell us that this recursion is actually ok.
+        if isdefined(method, :recursion_relation)
+            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame_instance(frame).specTypes])
+                return false
+            end
+        end
+    end
+    return true
+end
+
+# This function is used for computing alternate limit heuristics
+function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
+    if (hasgenerator(method) && !(method.generator isa Core.GeneratedFunctionStub) &&
+        may_invoke_generator(method, sig, sparams))
+        mi = specialize_method(method, sig, sparams)
+        cinfo = get_staged(mi, world)
+        if isa(cinfo, CodeInfo)
+            method2 = cinfo.method_for_inference_limit_heuristics
+            if method2 isa Method
+                return method2
+            end
+        end
+    end
+    return nothing
+end
+
+function matches_sv(parent::AbsIntState, sv::AbsIntState)
+    # limit only if user token match
+    return (frame_instance(parent).def === frame_instance(sv).def &&
+            method_for_inference_limit_heuristics(sv) === method_for_inference_limit_heuristics(parent))
+end
+
+function is_edge_recursed(edge::CodeInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge.def === frame_instance(sv)
+    end
+end
+
+function is_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def
+    end
+end
+
+function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv) && is_constproped(sv)
+    end
+end
+
+function is_constprop_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def && is_constproped(sv)
+    end
+end
+
+# keeps result and context information of abstract_method_call, which will later be used for
+# backedge computation, and concrete evaluation or constant-propagation
+struct MethodCallResult
+    rt
+    exct
+    effects::Effects
+    edge::Union{Nothing,CodeInstance}
+    edgecycle::Bool
+    edgelimited::Bool
+    volatile_inf_result::Union{Nothing,VolatileInferenceResult}
+    function MethodCallResult(@nospecialize(rt), @nospecialize(exct), effects::Effects,
+                              edge::Union{Nothing,CodeInstance}, edgecycle::Bool, edgelimited::Bool,
+                              volatile_inf_result::Union{Nothing,VolatileInferenceResult}=nothing)
+        return new(rt, exct, effects, edge, edgecycle, edgelimited, volatile_inf_result)
+    end
+end
+
+struct InvokeCall
+    types     # ::Type
+    InvokeCall(@nospecialize(types)) = new(types)
+end
+
+struct ConstCallResult
+    rt::Any
+    exct::Any
+    const_result::ConstResult
+    effects::Effects
+    const_edge::Union{Nothing,CodeInstance}
+    function ConstCallResult(
+        @nospecialize(rt), @nospecialize(exct),
+        const_result::ConstResult, effects::Effects,
+        const_edge::Union{Nothing,CodeInstance})
+        return new(rt, exct, const_result, effects, const_edge)
+    end
+end
+
+function abstract_call_method_with_const_args(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
+    if bail_out_const_call(interp, result, si, match, sv)
+        return nothing
+    end
+    eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv)
+    concrete_eval_result = nothing
+    if eligibility === :concrete_eval
+        concrete_eval_result = concrete_eval_call(interp, f, result, arginfo, sv, invokecall)
+        # if we don't inline the result of this concrete evaluation,
+        # give const-prop' a chance to inline a better method body
+        if !may_optimize(interp) || (
+            may_inline_concrete_result(concrete_eval_result.const_result::ConcreteResult) ||
+            concrete_eval_result.rt === Bottom) # unless this call deterministically throws and thus is non-inlineable
+            return concrete_eval_result
+        end
+        # TODO allow semi-concrete interp for this call?
+    end
+    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
+    mi === nothing && return concrete_eval_result
+    if is_constprop_recursed(result, mi, sv)
+        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+        return nothing
+    end
+    # try semi-concrete evaluation
+    if eligibility === :semi_concrete_eval
+        irinterp_result = semi_concrete_eval_call(interp, mi, result, arginfo, sv)
+        if irinterp_result !== nothing
+            return irinterp_result
+        end
+    end
+    # try constant prop'
+    return const_prop_call(interp, mi, result, arginfo, sv, concrete_eval_result)
+end
+
+function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult,
+                             si::StmtInfo, match::MethodMatch, sv::AbsIntState)
+    if !InferenceParams(interp).ipo_constant_propagation
+        add_remark!(interp, sv, "[constprop] Disabled by parameter")
+        return true
+    end
+    if is_no_constprop(match.method)
+        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
+        return true
+    end
+    if is_removable_if_unused(result.effects)
+        if isa(result.rt, Const)
+            add_remark!(interp, sv, "[constprop] No more information to be gained (const)")
+            return true
+        elseif call_result_unused(si)
+            add_remark!(interp, sv, "[constprop] No more information to be gained (unused result)")
+            return true
+        end
+    end
+    if result.rt === Bottom
+        if is_terminates(result.effects) && is_effect_free(result.effects)
+            # In the future, we may want to add `&& isa(result.exct, Const)` to
+            # the list of conditions here, but currently, our effect system isn't
+            # precise enough to let us determine :consistency of `exct`, so we
+            # would have to force constprop just to determine this, which is too
+            # expensive.
+            add_remark!(interp, sv, "[constprop] No more information to be gained (bottom)")
+            return true
+        end
+    end
+    return false
+end
+
+function concrete_eval_eligible(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    (;effects) = result
+    if inbounds_option() === :off
+        if !is_nothrow(effects)
+            # Disable concrete evaluation in `--check-bounds=no` mode,
+            # unless it is known to not throw.
+            return :none
+        end
+    end
+    if result.edge !== nothing && is_foldable(effects, #=check_rtcall=#true)
+        if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
+            if (is_nonoverlayed(interp) || is_nonoverlayed(effects) ||
+                # Even if overlay methods are involved, when `:consistent_overlay` is
+                # explicitly applied, we can still perform concrete evaluation using the
+                # original methods for executing them.
+                # While there's a chance that the non-overlayed counterparts may raise
+                # non-egal exceptions, it will not impact the compilation validity, since:
+                # - the results of the concrete evaluation will not be inlined
+                # - the exception types from the concrete evaluation will not be propagated
+                is_consistent_overlay(effects))
+                return :concrete_eval
+            end
+            # disable concrete-evaluation if this function call is tainted by some overlayed
+            # method since currently there is no easy way to execute overlayed methods
+            add_remark!(interp, sv, "[constprop] Concrete eval disabled for overlayed methods")
+        end
+        if !any_conditional(arginfo)
+            if may_optimize(interp)
+                return :semi_concrete_eval
+            else
+                # disable irinterp if optimization is disabled, since it requires optimized IR
+                add_remark!(interp, sv, "[constprop] Semi-concrete interpretation disabled for non-optimizing interpreter")
+            end
+        end
+    end
+    return :none
+end
+
+is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int)
+function is_all_const_arg(argtypes::Vector{Any}, start::Int)
+    for i = start:length(argtypes)
+        argtype = widenslotwrapper(argtypes[i])
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+is_const_argtype(@nospecialize argtype) = isa(argtype, Const) || isconstType(argtype) || issingletontype(argtype)
+
+any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
+any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
+
+collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start)
+function collect_const_args(argtypes::Vector{Any}, start::Int)
+    return Any[ let a = widenslotwrapper(argtypes[i])
+                    isa(a, Const) ? a.val :
+                    isconstType(a) ? a.parameters[1] :
+                    (a::DataType).instance
+                end for i = start:length(argtypes) ]
+end
+
+function concrete_eval_call(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
+    invokecall::Union{InvokeCall,Nothing}=nothing)
+    args = collect_const_args(arginfo, #=start=#2)
+    if invokecall !== nothing
+        # this call should be `invoke`d, rewrite `args` back now
+        pushfirst!(args, f, invokecall.types)
+        f = invoke
+    end
+    world = get_inference_world(interp)
+    edge = result.edge::CodeInstance
+    value = try
+        Core._call_in_world_total(world, f, args...)
+    catch e
+        # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime.
+        # Howevever, at present, :consistency does not mandate the type of the exception
+        concrete_result = ConcreteResult(edge, result.effects)
+        return ConstCallResult(Bottom, Any, concrete_result, result.effects, #=const_edge=#nothing)
+    end
+    concrete_result = ConcreteResult(edge, EFFECTS_TOTAL, value)
+    return ConstCallResult(Const(value), Bottom, concrete_result, EFFECTS_TOTAL, #=const_edge=#nothing)
+end
+
+# check if there is a cycle and duplicated inference of `mi`
+function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
+    result.edgecycle || return false
+    if result.edgelimited
+        return is_constprop_method_recursed(mi.def::Method, sv)
+    else
+        # if the type complexity limiting didn't decide to limit the call signature (as
+        # indicated by `result.edgelimited === false`), we can relax the cycle detection
+        # by comparing `MethodInstance`s and allow inference to propagate different
+        # constant elements if the recursion is finite over the lattice
+        return is_constprop_edge_recursed(mi, sv)
+    end
+end
+
+# if there's a possibility we could get a better result with these constant arguments
+# (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState)
+    method = match.method
+    force = force_const_prop(interp, f, method)
+    if !const_prop_rettype_heuristic(interp, result, si, sv, force)
+        # N.B. remarks are emitted within `const_prop_rettype_heuristic`
+        return nothing
+    end
+    if !const_prop_argument_heuristic(interp, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by argument heuristics")
+        return nothing
+    end
+    all_overridden = is_all_overridden(interp, arginfo, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, all_overridden, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
+        return nothing
+    end
+    force |= all_overridden
+    mi = specialize_method(match; preexisting=!force)
+    if mi === nothing
+        add_remark!(interp, sv, "[constprop] Failed to specialize")
+        return nothing
+    end
+    mi = mi::MethodInstance
+    if !force && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
+        return nothing
+    end
+    return mi
+end
+
+function const_prop_rettype_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
+                                      si::StmtInfo, sv::AbsIntState, force::Bool)
+    rt = result.rt
+    if rt isa LimitedAccuracy
+        # optimizations like inlining are disabled for limited frames,
+        # thus there won't be much benefit in constant-prop' here
+        # N.B. don't allow forced constprop' for safety (xref #52763)
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (limited accuracy)")
+        return false
+    elseif force
+        return true
+    elseif call_result_unused(si) && result.edgecycle
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (edgecycle with unused result)")
+        return false
+    end
+    # check if this return type is improvable (i.e. whether it's possible that with more
+    # information, we might get a more precise type)
+    if isa(rt, Type)
+        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
+        # unless we're already at `Bottom`
+        if rt === Bottom
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (erroneous result)")
+            return false
+        end
+        return true
+    elseif isa(rt, PartialStruct) || isa(rt, InterConditional) || isa(rt, InterMustAlias)
+        # could be improved to `Const` or a more precise wrapper
+        return true
+    elseif isa(rt, Const)
+        if is_nothrow(result.effects)
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (nothrow const)")
+            return false
+        end
+        # Could still be improved to Bottom (or at least could see the effects improved)
+        return true
+    else
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (unimprovable result)")
+        return false
+    end
+end
+
+# determines heuristically whether if constant propagation can be worthwhile
+# by checking if any of given `argtypes` is "interesting" enough to be propagated
+function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    argtypes = arginfo.argtypes
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
+            is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
+        else
+            a = widenslotwrapper(a)
+            has_nontrivial_extended_info(𝕃ᵢ, a) && is_const_prop_profitable_arg(𝕃ᵢ, a) && return true
+        end
+    end
+    return false
+end
+
+function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slotid = find_constrained_arg(cnd, fargs, sv)
+    if slotid !== nothing
+        return true
+    end
+    # as a minor optimization, we just check the result is a constant or not,
+    # since both `has_nontrivial_extended_info`/`is_const_prop_profitable_arg` return `true`
+    # for `Const(::Bool)`
+    return isa(widenconditional(cnd), Const)
+end
+
+function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slot = cnd.slot
+    for i in 1:length(fargs)
+        arg = ssa_def_slot(fargs[i], sv)
+        if isa(arg, SlotNumber) && slot_id(arg) == slot
+            return i
+        end
+    end
+    return nothing
+end
+
+# checks if all argtypes has additional information other than what `Type` can provide
+function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) || return false
+        else
+            is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
+        end
+    end
+    return true
+end
+
+function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
+    return is_aggressive_constprop(method) ||
+           InferenceParams(interp).aggressive_constant_propagation ||
+           typename(typeof(f)).constprop_heuristic === Core.FORCE_CONST_PROP
+end
+
+function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
+    arginfo::ArgInfo, all_overridden::Bool, sv::AbsIntState)
+    argtypes = arginfo.argtypes
+    heuristic = typename(typeof(f)).constprop_heuristic
+    if length(argtypes) > 1
+        𝕃ᵢ = typeinf_lattice(interp)
+        if heuristic === Core.ARRAY_INDEX_HEURISTIC
+            arrty = argtypes[2]
+            # don't propagate constant index into indexing of non-constant array
+            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
+                # For static arrays, allow the constprop if we could possibly
+                # deduce nothrow as a result.
+                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
+                if !still_nothrow || ismutabletype(arrty)
+                    return false
+                end
+            elseif ⊑(𝕃ᵢ, arrty, Array) || ⊑(𝕃ᵢ, arrty, GenericMemory)
+                return false
+            end
+        elseif heuristic === Core.ITERATE_HEURISTIC
+            itrty = argtypes[2]
+            if ⊑(𝕃ᵢ, itrty, Array) || ⊑(𝕃ᵢ, itrty, GenericMemory)
+                return false
+            end
+        end
+    end
+    if !all_overridden && heuristic === Core.SAMETYPE_HEURISTIC
+        # it is almost useless to inline the op when all the same type,
+        # but highly worthwhile to inline promote of a constant
+        length(argtypes) > 2 || return false
+        t1 = widenconst(argtypes[2])
+        for i in 3:length(argtypes)
+            at = argtypes[i]
+            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
+            if ty !== t1
+                return true
+            end
+        end
+        return false
+    end
+    return true
+end
+
+# This is a heuristic to avoid trying to const prop through complicated functions
+# where we would spend a lot of time, but are probably unlikely to get an improved
+# result anyway.
+function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
+    mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
+    method = mi.def::Method
+    if method.is_for_opaque_closure
+        # Not inlining an opaque closure can be very expensive, so be generous
+        # with the const-prop-ability. It is quite possible that we can't infer
+        # anything at all without const-propping, so the inlining check below
+        # isn't particularly helpful here.
+        return true
+    end
+    # now check if the source of this method instance is inlineable, since the extended type
+    # information we have here would be discarded if it is not inlined into a callee context
+    # (modulo the inferred return type that can be potentially refined)
+    if is_declared_inline(method)
+        # this method is declared as `@inline` and will be inlined
+        return true
+    end
+    flag = get_curr_ssaflag(sv)
+    if is_stmt_inline(flag)
+        # force constant propagation for a call that is going to be inlined
+        # since the inliner will try to find this constant result
+        # if these constant arguments arrive there
+        return true
+    elseif is_stmt_noinline(flag)
+        # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
+        return false
+    else
+        # Peek at the inferred result for the method to determine if the optimizer
+        # was able to cut it down to something simple (inlineable in particular).
+        # If so, there will be a good chance we might be able to const prop
+        # all the way through and learn something new.
+        code = get(code_cache(interp), mi, nothing)
+        if isa(code, CodeInstance)
+            inferred = @atomic :monotonic code.inferred
+            # TODO propagate a specific `CallInfo` that conveys information about this call
+            if src_inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL)
+                return true
+            end
+        end
+    end
+    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
+end
+
+function semi_concrete_eval_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    world = frame_world(sv)
+    mi_cache = WorldView(code_cache(interp), world)
+    codeinst = get(mi_cache, mi, nothing)
+    if codeinst !== nothing
+        irsv = IRInterpretationState(interp, codeinst, mi, arginfo.argtypes, world)
+        if irsv !== nothing
+            assign_parentchild!(irsv, sv)
+            rt, (nothrow, noub) = ir_abstract_constant_propagation(interp, irsv)
+            @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
+            if !(isa(rt, Type) && hasintersect(rt, Bool))
+                ir = irsv.ir
+                # TODO (#48913) enable double inlining pass when there are any calls
+                # that are newly resolved by irinterp
+                # state = InliningState(interp)
+                # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
+                effects = result.effects
+                if nothrow
+                    effects = Effects(effects; nothrow=true)
+                end
+                if noub
+                    effects = Effects(effects; noub=ALWAYS_TRUE)
+                end
+                exct = refine_exception_type(result.exct, effects)
+                semi_concrete_result = SemiConcreteResult(codeinst, ir, effects, spec_info(irsv))
+                const_edge = nothing # TODO use the edges from irsv?
+                return ConstCallResult(rt, exct, semi_concrete_result, effects, const_edge)
+            end
+        end
+    end
+    return nothing
+end
+
+function const_prop_result(inf_result::InferenceResult)
+    @assert isdefined(inf_result, :ci_as_edge) "InferenceResult without ci_as_edge"
+    const_prop_result = ConstPropResult(inf_result)
+    return ConstCallResult(inf_result.result, inf_result.exc_result, const_prop_result,
+                           inf_result.ipo_effects, inf_result.ci_as_edge)
+end
+
+# return cached result of constant analysis
+return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
+    const_prop_result(inf_result)
+
+function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    return has_conditional(𝕃ᵢ, sv) ? ConditionalSimpleArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
+end
+
+function const_prop_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
+    concrete_eval_result::Union{Nothing,ConstCallResult}=nothing)
+    inf_cache = get_inference_cache(interp)
+    𝕃ᵢ = typeinf_lattice(interp)
+    forwarded_argtypes = compute_forwarded_argtypes(interp, arginfo, sv)
+    # use `cache_argtypes` that has been constructed for fresh regular inference if available
+    volatile_inf_result = result.volatile_inf_result
+    if volatile_inf_result !== nothing
+        cache_argtypes = volatile_inf_result.inf_result.argtypes
+    else
+        cache_argtypes = matching_cache_argtypes(𝕃ᵢ, mi)
+    end
+    argtypes = matching_cache_argtypes(𝕃ᵢ, mi, forwarded_argtypes, cache_argtypes)
+    inf_result = cache_lookup(𝕃ᵢ, mi, argtypes, inf_cache)
+    if inf_result !== nothing
+        # found the cache for this constant prop'
+        if inf_result.result === nothing
+            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
+            return nothing
+        end
+        @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
+        return return_localcache_result(interp, inf_result, sv)
+    end
+    overridden_by_const = falses(length(argtypes))
+    for i = 1:length(argtypes)
+        if argtypes[i] !== argtype_by_index(cache_argtypes, i)
+            overridden_by_const[i] = true
+        end
+    end
+    if !any(overridden_by_const)
+        add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+        return nothing
+    end
+    # perform fresh constant prop'
+    inf_result = InferenceResult(mi, argtypes, overridden_by_const)
+    frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future
+    if frame === nothing
+        add_remark!(interp, sv, "[constprop] Could not retrieve the source")
+        return nothing # this is probably a bad generated function (unsound), but just ignore it
+    end
+    assign_parentchild!(frame, sv)
+    if !typeinf(interp, frame)
+        add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+        @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+        callstack = frame.callstack::Vector{AbsIntState}
+        @assert callstack[end] === frame && length(callstack) == frame.frameid
+        pop!(callstack)
+        return nothing
+    end
+    existing_edge = result.edge
+    inf_result.ci_as_edge = codeinst_as_edge(interp, frame, existing_edge)
+    @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+    @assert frame.parentid == sv.frameid
+    @assert inf_result.result !== nothing
+    # ConditionalSimpleArgtypes is allowed, because the only case in which it modifies
+    # the argtypes is when one of the argtypes is a `Conditional`, which case
+    # concrete_eval_result will not be available.
+    if concrete_eval_result !== nothing && isa(forwarded_argtypes, Union{SimpleArgtypes, ConditionalSimpleArgtypes})
+        # override return type and effects with concrete evaluation result if available
+        inf_result.result = concrete_eval_result.rt
+        inf_result.ipo_effects = concrete_eval_result.effects
+    end
+    return const_prop_result(inf_result)
+end
+
+# TODO implement MustAlias forwarding
+
+struct ConditionalSimpleArgtypes
+    arginfo::ArgInfo
+    sv::InferenceState
+end
+
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
+                                 conditional_argtypes::ConditionalSimpleArgtypes,
+                                 cache_argtypes::Vector{Any})
+    (; arginfo, sv) = conditional_argtypes
+    (; fargs, argtypes) = arginfo
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    def = mi.def::Method
+    nargs = Int(def.nargs)
+    for i in 1:length(argtypes)
+        argtype = argtypes[i]
+        # forward `Conditional` if it conveys a constraint on any other argument
+        if isa(argtype, Conditional) && fargs !== nothing
+            cnd = argtype
+            slotid = find_constrained_arg(cnd, fargs, sv)
+            if slotid !== nothing
+                # using union-split signature, we may be able to narrow down `Conditional`
+                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
+                thentype = tmeet(cnd.thentype, sigt)
+                elsetype = tmeet(cnd.elsetype, sigt)
+                if thentype === Bottom && elsetype === Bottom
+                    # we accidentally proved this method match is impossible
+                    # TODO bail out here immediately rather than just propagating Bottom ?
+                    given_argtypes[i] = Bottom
+                else
+                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
+                end
+                continue
+            end
+        end
+        given_argtypes[i] = widenslotwrapper(argtype)
+    end
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
+end
+
+# This is only for use with `Conditional`.
+# In general, usage of this is wrong.
+function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
+    code = sv.src.code
+    init = sv.currpc
+    while isa(arg, SSAValue)
+        init = arg.id
+        arg = code[init]
+    end
+    if arg isa SlotNumber
+        # found this kind of pattern:
+        # %init = SlotNumber(x)
+        # [...]
+        # goto if not isa(%init, T)
+        # now conservatively make sure there isn't potentially another conflicting assignment
+        # to the same slot between the def and usage
+        # we can assume the IR is sorted, since the front-end only creates SSA values in order
+        for i = init:(sv.currpc-1)
+            e = code[i]
+            if isexpr(e, :(=)) && e.args[1] === arg
+                return nothing
+            end
+        end
+    else
+        # there might still be the following kind of pattern (see #45499):
+        # %init = ...
+        # [...]
+        # SlotNumber(x) = %init
+        # [...]
+        # goto if not isa(%init, T)
+        # let's check if there is a slot assigned to the def SSA value but also there isn't
+        # any potentially conflicting assignment to the same slot
+        arg = nothing
+        def = SSAValue(init)
+        for i = (init+1):(sv.currpc-1)
+            e = code[i]
+            if isexpr(e, :(=))
+                lhs = e.args[1]
+                if isa(lhs, SlotNumber)
+                    lhs === arg && return nothing
+                    rhs = e.args[2]
+                    if rhs === def
+                        arg = lhs
+                    end
+                end
+            end
+        end
+    end
+    return arg
+end
+
+# No slots in irinterp
+ssa_def_slot(@nospecialize(arg), sv::IRInterpretationState) = nothing
+
+struct AbstractIterationResult
+    cti::Vector{Any}
+    info::MaybeAbstractIterationInfo
+    ai_effects::Effects
+end
+AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
+    AbstractIterationResult(cti, info, EFFECTS_TOTAL)
+
+# `typ` is the inferred type for expression `arg`.
+# if the expression constructs a container (e.g. `svec(x,y,z)`),
+# refine its type to an array of element types.
+# Union of Tuples of the same length is converted to Tuple of Unions.
+# returns an array of types
+function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
+                                sv::AbsIntState)
+    if isa(typ, PartialStruct)
+        widet = typ.typ
+        if isa(widet, DataType)
+            if widet.name === Tuple.name
+                return Future(AbstractIterationResult(typ.fields, nothing))
+            elseif widet.name === _NAMEDTUPLE_NAME
+                return Future(AbstractIterationResult(typ.fields, nothing))
+            end
+        end
+    end
+
+    if isa(typ, Const)
+        val = typ.val
+        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
+            return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here!
+        end
+    end
+
+    tti0 = widenconst(typ)
+    tti = unwrap_unionall(tti0)
+    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
+        # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
+        # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
+        tti = unwraptv(tti.parameters[2])
+        tti0 = rewrap_unionall(tti, tti0)
+    end
+    if isa(tti, Union)
+        utis = uniontypes(tti)
+        # refine the Union to remove elements that are not valid tags for objects
+        filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
+        if length(utis) == 0
+            return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable
+        elseif length(utis) == 1
+            tti = utis[1]
+            tti0 = rewrap_unionall(tti, tti0)
+        else
+            if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
+                return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+            end
+            ltp = length((utis[1]::DataType).parameters)
+            for t in utis
+                if length((t::DataType).parameters) != ltp
+                    return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+                end
+            end
+            result = Any[ Union{} for _ in 1:ltp ]
+            for t in utis
+                tps = (t::DataType).parameters
+                for j in 1:ltp
+                    @assert valid_as_lattice(tps[j], true)
+                    result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
+                end
+            end
+            return Future(AbstractIterationResult(result, nothing))
+        end
+    end
+    if tti0 <: Tuple
+        if isa(tti0, DataType)
+            return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing))
+        elseif !isa(tti, DataType)
+            return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+        else
+            len = length(tti.parameters)
+            last = tti.parameters[len]
+            va = isvarargtype(last)
+            elts = Any[ fieldtype(tti0, i) for i = 1:len ]
+            if va
+                if elts[len] === Union{}
+                    pop!(elts)
+                else
+                    elts[len] = Vararg{elts[len]}
+                end
+            end
+            return Future(AbstractIterationResult(elts, nothing))
+        end
+    elseif tti0 === SimpleVector
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
+    elseif tti0 === Any
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+    elseif tti0 <: Array || tti0 <: GenericMemory
+        if eltype(tti0) === Union{}
+            return Future(AbstractIterationResult(Any[], nothing))
+        end
+        return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing))
+    else
+        return abstract_iteration(interp, itft, typ, sv)
+    end
+end
+
+# simulate iteration protocol on container type up to fixpoint
+function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
+    if isa(itft, Const)
+        iteratef = itft.val
+    else
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
+    end
+    @assert !isvarargtype(itertype)
+
+    iterateresult = Future{AbstractIterationResult}()
+    call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true, false), sv)::Future
+    function inferiterate(interp, sv)
+        call1 = call1future[]
+        stateordonet = call1.rt
+        # Return Bottom if this is not an iterator.
+        # WARNING: Changes to the iteration protocol must be reflected here,
+        # this is not just an optimization.
+        # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
+        if stateordonet === Bottom
+            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true))
+            return true
+        end
+        stateordonet_widened = widenconst(stateordonet)
+        calls = CallMeta[call1]
+        valtype = statetype = Bottom
+        ret = Any[]
+        𝕃ᵢ = typeinf_lattice(interp)
+        may_have_terminated = false
+        local call2future::Future{CallMeta}
+
+        nextstate::UInt8 = 0x0
+        function inferiterate_2arg(interp, sv)
+            if nextstate === 0x1
+                nextstate = 0xff
+                @goto state1
+            elseif nextstate === 0x2
+                nextstate = 0xff
+                @goto state2
+            else
+                @assert nextstate === 0x0
+                nextstate = 0xff
+            end
+
+            # Try to unroll the iteration up to max_tuple_splat, which covers any finite
+            # length iterators, or interesting prefix
+            while true
+                if stateordonet_widened === Nothing
+                    iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
+                    return true
+                end
+                if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
+                    break
+                end
+                if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+                    break
+                end
+                nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
+                # If there's no new information in this statetype, don't bother continuing,
+                # the iterator won't be finite.
+                if ⊑(𝕃ᵢ, nstatetype, statetype)
+                    iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
+                    return true
+                end
+                valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
+                push!(ret, valtype)
+                statetype = nstatetype
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true, false), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x1
+                    return false
+                    @label state1
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            # From here on, we start asking for results on the widened types, rather than
+            # the precise (potentially const) state type
+            # statetype and valtype are reinitialized in the first iteration below from the
+            # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+            valtype = statetype = Bottom
+            may_have_terminated = Nothing <: stateordonet_widened
+            while valtype !== Any
+                nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+                if nounion !== Union{} && !isa(nounion, DataType)
+                    # nounion is of a type we cannot handle
+                    valtype = Any
+                    break
+                end
+                if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+                    # reached a fixpoint or iterator failed/gave invalid answer
+                    if !hasintersect(stateordonet_widened, Nothing)
+                        # ... but cannot terminate
+                        if may_have_terminated
+                            # ... and iterator may have terminated prior to this loop, but not during it
+                            valtype = Bottom
+                        else
+                            #  ... or cannot have terminated prior to this loop
+                            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
+                            return true
+                        end
+                    end
+                    break
+                end
+                valtype = tmerge(valtype, nounion.parameters[1])
+                statetype = tmerge(statetype, nounion.parameters[2])
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true, false), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x2
+                    return false
+                    @label state2
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            if valtype !== Union{}
+                push!(ret, Vararg{valtype})
+            end
+            iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+            return true
+        end # function inferiterate_2arg
+        # continue making progress as much as possible, on iterate(arg, state)
+        inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg)
+        return true
+    end # inferiterate
+    # continue making progress as soon as possible, on iterate(arg)
+    if !(isready(call1future) && inferiterate(interp, sv))
+        push!(sv.tasks, inferiterate)
+    end
+    return iterateresult
+end
+
+# do apply(af, fargs...), where af is a function value
+function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
+                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
+    itft = Core.Box(argtype_by_index(argtypes, 2))
+    aft = argtype_by_index(argtypes, 3)
+    (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    aargtypes = argtype_tail(argtypes, 4)
+    aftw = widenconst(aft)
+    if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
+        if !isconcretetype(aftw) || (aftw <: Builtin)
+            add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
+            # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
+            # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
+            return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        end
+    end
+    res = Union{}
+    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
+    ctypes::Vector{Vector{Any}} = [Any[aft]]
+    infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
+    all_effects::Effects = EFFECTS_TOTAL
+    retinfos = ApplyCallInfo[]
+    retinfo = UnionSplitApplyCallInfo(retinfos)
+    exctype = Union{}
+    ctypes´ = Vector{Any}[]
+    infos´ = Vector{MaybeAbstractIterationInfo}[]
+    local ti, argtypesi
+    local ctfuture::Future{AbstractIterationResult}
+    local callfuture::Future{CallMeta}
+
+    applyresult = Future{CallMeta}()
+    # split the rest into a resumable state machine
+    i::Int = 1
+    j::Int = 1
+    nextstate::UInt8 = 0x0
+    function infercalls(interp, sv)
+        # n.b. Remember that variables will lose their values across restarts,
+        # so be sure to manually hoist any values that must be preserved and do
+        # not rely on program order.
+        # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation
+        if nextstate === 0x1
+            nextstate = 0xff
+            @goto state1
+        elseif nextstate === 0x2
+            nextstate = 0xff
+            @goto state2
+        elseif nextstate === 0x3
+            nextstate = 0xff
+            @goto state3
+        else
+            @assert nextstate === 0x0
+            nextstate = 0xff
+        end
+        while i <= length(aargtypes)
+            argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
+            i += 1
+            j = 1
+            while j <= length(argtypesi)
+                ti = argtypesi[j]
+                j += 1
+                if !isvarargtype(ti)
+                    ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x1
+                        return false
+                        @label state1
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                else
+                    ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x2
+                        return false
+                        @label state2
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                    # We can't represent a repeating sequence of the same types,
+                    # so tmerge everything together to get one type that represents
+                    # everything.
+                    argt = cti[end]
+                    if isvarargtype(argt)
+                        argt = unwrapva(argt)
+                    end
+                    for k in 1:(length(cti)-1)
+                        argt = tmerge(argt, cti[k])
+                    end
+                    cti = Any[Vararg{argt}]
+                end
+                all_effects = merge_effects(all_effects, ai_effects)
+                if info !== nothing
+                    for call in info.each
+                        all_effects = merge_effects(all_effects, call.effects)
+                    end
+                end
+                if any(@nospecialize(t) -> t === Bottom, cti)
+                    continue
+                end
+                for k = 1:length(ctypes)
+                    ct = ctypes[k]
+                    if isvarargtype(ct[end])
+                        # This is vararg, we're not gonna be able to do any inlining,
+                        # drop the info
+                        info = nothing
+                        tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
+                        push!(ctypes´, push!(ct[1:(end - 1)], tail))
+                    else
+                        push!(ctypes´, append!(ct[:], cti))
+                    end
+                    push!(infos´, push!(copy(infos[k]), info))
+                end
+            end
+            # swap for the new array and empty the temporary one
+            ctypes´, ctypes = ctypes, ctypes´
+            infos´, infos = infos, infos´
+            empty!(ctypes´)
+            empty!(infos´)
+        end
+        all_effects.nothrow || (exctype = Any)
+
+        i = 1
+        while i <= length(ctypes)
+            ct = ctypes[i]
+            if bail_out_apply(interp, InferenceLoopState(res, all_effects), sv)
+                add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information: bailing on analysis of more methods.")
+                # there is unanalyzed candidate, widen type and effects to the top
+                let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+                    applyresult[] = CallMeta(Any, Any, Effects(), retinfo)
+                    return true
+                end
+            end
+            lct = length(ct)
+            # truncate argument list at the first Vararg
+            for k = 1:lct-1
+                cti = ct[k]
+                if isvarargtype(cti)
+                    ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct])
+                    resize!(ct, k)
+                    break
+                end
+            end
+            callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future
+            if !isready(callfuture)
+                nextstate = 0x3
+                return false
+                @label state3
+            end
+            let (; info, rt, exct, effects) = callfuture[]
+                push!(retinfos, ApplyCallInfo(info, infos[i]))
+                res = tmerge(typeinf_lattice(interp), res, rt)
+                exctype = tmerge(typeinf_lattice(interp), exctype, exct)
+                all_effects = merge_effects(all_effects, effects)
+            end
+            i += 1
+        end
+        # TODO: Add a special info type to capture all the iteration info.
+        # For now, only propagate info if we don't also union-split the iteration
+        applyresult[] = CallMeta(res, exctype, all_effects, retinfo)
+        return true
+    end # function infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return applyresult
+end
+
+function argtype_by_index(argtypes::Vector{Any}, i::Int)
+    n = length(argtypes)
+    na = argtypes[n]
+    if isvarargtype(na)
+        return i >= n ? unwrapva(na) : argtypes[i]
+    else
+        return i > n ? Bottom : argtypes[i]
+    end
+end
+
+function argtype_tail(argtypes::Vector{Any}, i::Int)
+    n = length(argtypes)
+    if isvarargtype(argtypes[n]) && i > n
+        i = n
+    end
+    return argtypes[i:n]
+end
+
+struct ConditionalTypes
+    thentype
+    elsetype
+    ConditionalTypes(thentype, elsetype) = (@nospecialize; new(thentype, elsetype))
+end
+
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int,
+    @nospecialize(rt))
+    if isa(rt, Const)
+        xt = widenslotwrapper(xt)
+        if rt.val === false
+            return ConditionalTypes(Bottom, xt)
+        elseif rt.val === true
+            return ConditionalTypes(xt, Bottom)
+        end
+    end
+    return isa_condition(xt, ty, max_union_splitting)
+end
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int)
+    tty_ub, isexact_tty = instanceof_tfunc(ty, true)
+    tty = widenconst(xt)
+    if isexact_tty && !isa(tty_ub, TypeVar)
+        tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
+        if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
+            thentype = typeintersect(tty, tty_ub)
+            if iskindtype(tty_ub) && thentype !== Bottom
+                # `typeintersect` may be unable narrow down `Type`-type
+                thentype = tty_ub
+            end
+            valid_as_lattice(thentype, true) || (thentype = Bottom)
+            elsetype = typesubtract(tty, tty_lb, max_union_splitting)
+            return ConditionalTypes(thentype, elsetype)
+        end
+    end
+    return nothing
+end
+
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int,
+    @nospecialize(rt))
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if rt === Const(false)
+        thentype = Bottom
+    elseif rt === Const(true)
+        elsetype = Bottom
+    elseif elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int)
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+
+function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
+                               sv::AbsIntState)
+    @nospecialize f
+    la = length(argtypes)
+    𝕃ᵢ = typeinf_lattice(interp)
+    ⊑, ⊏, ⊔, ⊓ = partialorder(𝕃ᵢ), strictpartialorder(𝕃ᵢ), join(𝕃ᵢ), meet(𝕃ᵢ)
+    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
+        cnd = argtypes[2]
+        if isa(cnd, Conditional)
+            newcnd = widenconditional(cnd)
+            tx = argtypes[3]
+            ty = argtypes[4]
+            if isa(newcnd, Const)
+                # if `cnd` is constant, we should just respect its constantness to keep inference accuracy
+                return newcnd.val::Bool ? tx : ty
+            else
+                # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
+                a = ssa_def_slot(fargs[3], sv)
+                b = ssa_def_slot(fargs[4], sv)
+                if isa(a, SlotNumber) && cnd.slot == slot_id(a)
+                    tx = (cnd.thentype ⊑ tx ? cnd.thentype : tx ⊓ widenconst(cnd.thentype))
+                end
+                if isa(b, SlotNumber) && cnd.slot == slot_id(b)
+                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : ty ⊓ widenconst(cnd.elsetype))
+                end
+                return tx ⊔ ty
+            end
+        end
+    end
+    ft = popfirst!(argtypes)
+    rt = builtin_tfunction(interp, f, argtypes, sv)
+    pushfirst!(argtypes, ft)
+    if has_mustalias(𝕃ᵢ) && f === getfield && isa(fargs, Vector{Any}) && la ≥ 3
+        a3 = argtypes[3]
+        if isa(a3, Const)
+            if rt !== Bottom && !isalreadyconst(rt)
+                var = ssa_def_slot(fargs[2], sv)
+                if isa(var, SlotNumber)
+                    vartyp = widenslotwrapper(argtypes[2])
+                    fldidx = maybe_const_fldidx(vartyp, a3.val)
+                    if fldidx !== nothing
+                        # wrap this aliasable field into `MustAlias` for possible constraint propagations
+                        return MustAlias(var, vartyp, fldidx, rt)
+                    end
+                end
+            end
+        end
+    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+        # perform very limited back-propagation of type information for `is` and `isa`
+        if f === isa
+            # try splitting value argument, based on types
+            a = ssa_def_slot(fargs[2], sv)
+            a2 = argtypes[2]
+            a3 = argtypes[3]
+            if isa(a, SlotNumber)
+                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
+                if cndt !== nothing
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
+                end
+            end
+            if isa(a2, MustAlias)
+                if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
+                    if cndt !== nothing
+                        return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
+                    end
+                end
+            end
+            # try splitting type argument, based on value
+            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
+                b = ssa_def_slot(fargs[3], sv)
+                if isa(b, SlotNumber)
+                    # !(x isa T) implies !(Type{a2} <: T)
+                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
+                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
+                    return Conditional(b, a3, elsetype)
+                end
+            end
+        elseif f === (===)
+            a = ssa_def_slot(fargs[2], sv)
+            b = ssa_def_slot(fargs[3], sv)
+            aty = argtypes[2]
+            bty = argtypes[3]
+            # if doing a comparison to a singleton, consider returning a `Conditional` instead
+            if isa(aty, Const)
+                if isa(b, SlotNumber)
+                    cndt = egal_condition(aty, bty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(b, cndt.thentype, cndt.elsetype)
+                elseif isa(bty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(aty, bty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(bty, cndt.thentype, cndt.elsetype)
+                end
+            elseif isa(bty, Const)
+                if isa(a, SlotNumber)
+                    cndt = egal_condition(bty, aty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
+                elseif isa(aty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(bty, aty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(aty, cndt.thentype, cndt.elsetype)
+                end
+            end
+            # TODO enable multiple constraints propagation here, there are two possible improvements:
+            # 1. propagate constraints for both lhs and rhs
+            # 2. we can propagate both constraints on aliased fields and slots
+            # As for 2, for now, we prioritize constraints on aliased fields, since currently
+            # different slots that represent the same object can't share same field constraint,
+            # and thus binding `MustAlias` to the other slot is less likely useful
+            if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                if isa(bty, MustAlias)
+                    thentype = widenslotwrapper(aty)
+                    elsetype = bty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(bty, thentype, elsetype)
+                    end
+                elseif isa(aty, MustAlias)
+                    thentype = widenslotwrapper(bty)
+                    elsetype = aty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(aty, thentype, elsetype)
+                    end
+                end
+            end
+            # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
+            if isa(b, SlotNumber)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(bty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(bty)
+                return Conditional(b, thentype, elsetype)
+            elseif isa(a, SlotNumber)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(aty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(aty)
+                return Conditional(a, thentype, elsetype)
+            end
+        elseif f === Core.Intrinsics.not_int
+            aty = argtypes[2]
+            if isa(aty, Conditional)
+                thentype = rt === Const(false) ? Bottom : aty.elsetype
+                elsetype = rt === Const(true)  ? Bottom : aty.thentype
+                return Conditional(aty.slot, thentype, elsetype)
+            end
+        elseif f === isdefined
+            a = ssa_def_slot(fargs[2], sv)
+            if isa(a, SlotNumber)
+                argtype2 = argtypes[2]
+                if isa(argtype2, Union)
+                    fld = argtypes[3]
+                    thentype = Bottom
+                    elsetype = Bottom
+                    for ty in uniontypes(argtype2)
+                        cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
+                        if isa(cnd, Const)
+                            if cnd.val::Bool
+                                thentype = thentype ⊔ ty
+                            else
+                                elsetype = elsetype ⊔ ty
+                            end
+                        else
+                            thentype = thentype ⊔ ty
+                            elsetype = elsetype ⊔ ty
+                        end
+                    end
+                    return Conditional(a, thentype, elsetype)
+                else
+                    thentype = form_partially_defined_struct(argtype2, argtypes[3])
+                    if thentype !== nothing
+                        elsetype = argtype2
+                        if rt === Const(false)
+                            thentype = Bottom
+                        elseif rt === Const(true)
+                            elsetype = Bottom
+                        end
+                        return Conditional(a, thentype, elsetype)
+                    end
+                end
+            end
+        end
+    end
+    @assert !isa(rt, TypeVar) "unhandled TypeVar"
+    return rt
+end
+
+function form_partially_defined_struct(@nospecialize(obj), @nospecialize(name))
+    obj isa Const && return nothing # nothing to refine
+    name isa Const || return nothing
+    objt0 = widenconst(obj)
+    objt = unwrap_unionall(objt0)
+    objt isa DataType || return nothing
+    isabstracttype(objt) && return nothing
+    fldidx = try_compute_fieldidx(objt, name.val)
+    fldidx === nothing && return nothing
+    nminfld = datatype_min_ninitialized(objt)
+    if ismutabletype(objt)
+        # A mutable struct can have non-contiguous undefined fields, but `PartialStruct` cannot
+        # model such a state. So here `PartialStruct` can be used to represent only the
+        # objects where the field following the minimum initialized fields is also defined.
+        if fldidx ≠ nminfld+1
+            # if it is already represented as a `PartialStruct`, we can add one more
+            # `isdefined`-field information on top of those implied by its `fields`
+            if !(obj isa PartialStruct && fldidx == length(obj.fields)+1)
+                return nothing
+            end
+        end
+    else
+        fldidx > nminfld || return nothing
+    end
+    return PartialStruct(fallback_lattice, objt0, Any[obj isa PartialStruct && i≤length(obj.fields) ?
+        obj.fields[i] : fieldtype(objt0,i) for i = 1:fldidx])
+end
+
+function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any}, call::CallMeta)
+    na = length(argtypes)
+    if isvarargtype(argtypes[end])
+        if na ≤ 2
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        elseif na > 4
+            return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        end
+        a2 = argtypes[2]
+        a3 = unwrapva(argtypes[3])
+        nothrow = false
+    elseif na == 3
+        a2 = argtypes[2]
+        a3 = argtypes[3]
+        ⊑ = partialorder(typeinf_lattice(interp))
+        nothrow = a2 ⊑ TypeVar && (a3 ⊑ Type || a3 ⊑ TypeVar)
+    else
+        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    end
+    canconst = true
+    if isa(a3, Const)
+        body = a3.val
+    elseif isType(a3)
+        body = a3.parameters[1]
+        canconst = false
+    else
+        return CallMeta(Any, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
+    end
+    if !(isa(body, Type) || isa(body, TypeVar))
+        return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+    end
+    if has_free_typevars(body)
+        if isa(a2, Const)
+            tv = a2.val
+        elseif isa(a2, PartialTypeVar)
+            tv = a2.tv
+            canconst = false
+        else
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        end
+        isa(tv, TypeVar) || return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        body = UnionAll(tv, body)
+    end
+    ret = canconst ? Const(body) : Type{body}
+    return CallMeta(ret, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
+end
+
+function ci_abi(ci::CodeInstance)
+    def = ci.def
+    isa(def, ABIOverride) && return def.abi
+    (def::MethodInstance).specTypes
+end
+
+function get_ci_mi(ci::CodeInstance)
+    def = ci.def
+    isa(def, ABIOverride) && return def.def
+    return def::MethodInstance
+end
+
+function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState)
+    argtypes = arginfo.argtypes
+    ft′ = argtype_by_index(argtypes, 2)
+    ft = widenconst(ft′)
+    ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    types = argtype_by_index(argtypes, 3)
+    if types isa Const && types.val isa Union{Method, CodeInstance}
+        method_or_ci = types.val
+        if isa(method_or_ci, CodeInstance)
+            our_world = sv.world.this
+            argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft))
+            specsig = ci_abi(method_or_ci)
+            defdef = get_ci_mi(method_or_ci).def
+            exct = method_or_ci.exctype
+            if !hasintersect(argtype, specsig)
+                return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+            elseif !(argtype <: specsig) || ((!isa(method_or_ci.def, ABIOverride) && isa(defdef, Method)) && !(argtype <: defdef.sig))
+                exct = Union{exct, TypeError}
+            end
+            callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world)
+            if !(our_world in callee_valid_range)
+                if our_world < first(callee_valid_range)
+                    update_valid_age!(sv, WorldRange(first(sv.world.valid_worlds), first(callee_valid_range)-1))
+                else
+                    update_valid_age!(sv, WorldRange(last(callee_valid_range)+1, last(sv.world.valid_worlds)))
+                end
+                return Future(CallMeta(Bottom, ErrorException, EFFECTS_THROWS, NoCallInfo()))
+            end
+            # TODO: When we add curing, we may want to assume this is nothrow
+            if (method_or_ci.owner === Nothing && method_ir_ci.def.def isa Method)
+                exct = Union{exct, ErrorException}
+            end
+            update_valid_age!(sv, callee_valid_range)
+            return Future(CallMeta(method_or_ci.rettype, exct, Effects(decode_effects(method_or_ci.ipo_purity_bits), nothrow=(exct===Bottom)),
+                InvokeCICallInfo(method_or_ci)))
+        else
+            method = method_or_ci::Method
+            types = method # argument value
+            lookupsig = method.sig # edge kind
+            argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft))
+            nargtype = typeintersect(lookupsig, argtype)
+            nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+            nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+            # Fall through to generic invoke handling
+        end
+    else
+        hasintersect(widenconst(types), Union{Method, CodeInstance}) && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
+        isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        unwrapped = unwrap_unionall(types)
+        types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
+            return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+        end
+        argtype = argtypes_to_type(argtype_tail(argtypes, 4))
+        nargtype = typeintersect(types, argtype)
+        nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+        nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+        isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        ft = ft::DataType
+        lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
+        nargtype = Tuple{ft, nargtype.parameters...}
+        argtype = Tuple{ft, argtype.parameters...}
+        matched, valid_worlds = findsup(lookupsig, method_table(interp))
+        matched === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+        update_valid_age!(sv, valid_worlds)
+        method = matched.method
+    end
+    tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
+    ti = tienv[1]
+    env = tienv[2]::SimpleVector
+    mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future
+    match = MethodMatch(ti, env, method, argtype <: method.sig)
+    ft′_box = Core.Box(ft′)
+    lookupsig_box = Core.Box(lookupsig)
+    invokecall = InvokeCall(types)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, effects, edge, volatile_inf_result) = result
+        local ft′ = ft′_box.contents
+        sig = match.spec_types
+        argtypes′ = invoke_rewrite(arginfo.argtypes)
+        fargs = arginfo.fargs
+        fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+        arginfo′ = ArgInfo(fargs′, argtypes′)
+        # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+        # for i in 1:length(argtypes′)
+        #     t, a = ti.parameters[i], argtypes′[i]
+        #     argtypes′[i] = t ⊑ a ? t : a
+        # end
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        f = singleton_type(ft′)
+        const_call_result = abstract_call_method_with_const_args(interp,
+            result, f, arginfo′, si, match, sv, invokecall)
+        const_result = volatile_inf_result
+        if const_call_result !== nothing
+            const_edge = nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, effects, const_result, const_edge) = const_call_result
+            end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, const_edge) = const_call_result
+            end
+            if const_edge !== nothing
+                edge = const_edge
+            end
+        end
+        rt = from_interprocedural!(interp, rt, sv, arginfo′, sig)
+        info = InvokeCallInfo(edge, match, const_result, lookupsig_box.contents)
+        if !match.fully_covers
+            effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
+        end
+        return CallMeta(rt, exct, effects, info)
+    end
+end
+
+function invoke_rewrite(xs::Vector{Any})
+    x0 = xs[2]
+    newxs = xs[3:end]
+    newxs[1] = x0
+    return newxs
+end
+
+function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
+    if length(argtypes) == 3
+        finalizer_argvec = Any[argtypes[2], argtypes[3]]
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false, false), sv, #=max_methods=#1)::Future
+        return Future{CallMeta}(call, interp, sv) do call, interp, sv
+            return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        end
+    end
+    return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
+end
+
+function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    na = length(argtypes)
+    ⊔ = join(typeinf_lattice(interp))
+    if na == 2
+        argtype2 = argtypes[2]
+        if isvarargtype(argtype2)
+            exct = unwrapva(argtype2) ⊔ ArgumentError
+        else
+            exct = argtype2
+        end
+    elseif na == 3 && isvarargtype(argtypes[3])
+        exct = argtypes[2] ⊔ ArgumentError
+    else
+        exct = ArgumentError
+    end
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
+end
+
+function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    exct = if length(argtypes) == 1
+        ArgumentError
+    elseif !isvarargtype(argtypes[2])
+        MethodError
+    else
+        Union{MethodError, ArgumentError}
+    end
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
+end
+
+const generic_getglobal_effects = Effects(EFFECTS_THROWS, consistent=ALWAYS_FALSE, inaccessiblememonly=ALWAYS_FALSE)
+const generic_getglobal_exct = Union{ArgumentError, TypeError, ConcurrencyViolationError, UndefVarError}
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s))
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            (ret, bpart) = abstract_eval_globalref(interp, GlobalRef(M, s), saw_latestworld, sv)
+            return CallMeta(ret, bpart === nothing ? NoCallInfo() : GlobalAccessInfo(bpart))
+        end
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Any, UndefVarError, generic_getglobal_effects, NoCallInfo())
+    end
+    return CallMeta(Any, Union{UndefVarError, TypeError}, generic_getglobal_effects, NoCallInfo())
+end
+
+function merge_exct(cm::CallMeta, @nospecialize(exct))
+    if exct !== Bottom
+        cm = CallMeta(cm.rt, Union{cm.exct, exct}, Effects(cm.effects; nothrow=false), cm.info)
+    end
+    return cm
+end
+
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(order))
+    goe = global_order_exct(order, #=loading=#true, #=storing=#false)
+    cm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s)
+    return merge_exct(cm, goe)
+end
+
+function abstract_eval_getglobal(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if length(argtypes) == 3
+        return abstract_eval_getglobal(interp, sv, saw_latestworld, argtypes[2], argtypes[3])
+    elseif length(argtypes) == 4
+        return abstract_eval_getglobal(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 5
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, generic_getglobal_exct, generic_getglobal_effects, NoCallInfo())
+    end
+end
+
+@nospecs function abstract_eval_get_binding_type(interp::AbstractInterpreter, sv::AbsIntState, M, s)
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if isa(M, Const) && isa(s, Const)
+        (M, s) = (M.val, s.val)
+        if !isa(M, Module) || !isa(s, Symbol)
+            return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+        end
+        partition = abstract_eval_binding_partition!(interp, GlobalRef(M, s), sv)
+
+        if is_some_guard(binding_kind(partition))
+            # We do not currently assume an invalidation for guard -> defined transitions
+            # rt = Const(nothing)
+            rt = Type
+        elseif is_some_const_binding(binding_kind(partition))
+            rt = Const(Any)
+        else
+            rt = Const(partition_restriction(partition))
+        end
+        return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Type, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    end
+    return CallMeta(Type, TypeError, EFFECTS_THROWS, NoCallInfo())
+end
+
+function abstract_eval_get_binding_type(interp::AbstractInterpreter, sv::AbsIntState, argtypes::Vector{Any})
+    if length(argtypes) == 3
+        return abstract_eval_get_binding_type(interp, sv, argtypes[2], argtypes[3])
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 4
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    end
+    return CallMeta(Type, Union{TypeError, ArgumentError}, EFFECTS_THROWS, NoCallInfo())
+end
+
+const setglobal!_effects = Effects(EFFECTS_TOTAL; effect_free=ALWAYS_FALSE, nothrow=false, inaccessiblememonly=ALWAYS_FALSE)
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(v))
+    if isa(M, Const) && isa(s, Const)
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            (rt, exct), partition = global_assignment_rt_exct(interp, sv, saw_latestworld, GlobalRef(M, s), v)
+            return CallMeta(rt, exct, Effects(setglobal!_effects, nothrow=exct===Bottom), GlobalAccessInfo(partition))
+        end
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    end
+    ⊑ = partialorder(typeinf_lattice(interp))
+    if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(v, ErrorException, setglobal!_effects, NoCallInfo())
+    end
+    return CallMeta(v, Union{TypeError, ErrorException}, setglobal!_effects, NoCallInfo())
+end
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, @nospecialize(M), @nospecialize(s), @nospecialize(v), @nospecialize(order))
+    goe = global_order_exct(order, #=loading=#false, #=storing=#true)
+    cm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+    return merge_exct(cm, goe)
+end
+
+const generic_setglobal!_exct = Union{ArgumentError, TypeError, ErrorException, ConcurrencyViolationError}
+
+function abstract_eval_setglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if length(argtypes) == 4
+        return abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+    elseif length(argtypes) == 5
+        return abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4], argtypes[5])
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, generic_setglobal!_exct, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool,
+                                   @nospecialize(M), @nospecialize(s), @nospecialize(v))
+    scm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+    scm.rt === Bottom && return scm
+    gcm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s)
+    return CallMeta(gcm.rt, Union{scm.exct,gcm.exct}, merge_effects(scm.effects, gcm.effects), scm.info)
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool,
+                                   @nospecialize(M), @nospecialize(s), @nospecialize(v), @nospecialize(order))
+    scm = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v, order)
+    scm.rt === Bottom && return scm
+    gcm = abstract_eval_getglobal(interp, sv, saw_latestworld, M, s, order)
+    return CallMeta(gcm.rt, Union{scm.exct,gcm.exct}, merge_effects(scm.effects, gcm.effects), scm.info)
+end
+
+function abstract_eval_swapglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if length(argtypes) == 4
+        return abstract_eval_swapglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+    elseif length(argtypes) == 5
+        return abstract_eval_swapglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4], argtypes[5])
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, Union{generic_getglobal_exct,generic_setglobal!_exct}, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_setglobalonce!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if length(argtypes) in (4, 5, 6)
+        cm = abstract_eval_setglobal!(interp, sv, saw_latestworld, argtypes[2], argtypes[3], argtypes[4])
+        if length(argtypes) >= 5
+            goe = global_order_exct(argtypes[5], #=loading=#true, #=storing=#true)
+            cm = merge_exct(cm, goe)
+        end
+        if length(argtypes) == 6
+            goe = global_order_exct(argtypes[6], #=loading=#true, #=storing=#false)
+            cm = merge_exct(cm, goe)
+        end
+        return CallMeta(Bool, cm.exct, cm.effects, cm.info)
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 6
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Bool, generic_setglobal!_exct, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function abstract_eval_replaceglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
+    if length(argtypes) in (5, 6, 7)
+        (M, s, x, v) = argtypes[2], argtypes[3], argtypes[4], argtypes[5]
+        T = nothing
+        if isa(M, Const) && isa(s, Const)
+            M, s = M.val, s.val
+            M isa Module || return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+            s isa Symbol || return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+            partition = abstract_eval_binding_partition!(interp, GlobalRef(M, s), sv)
+            rte = abstract_eval_partition_load(interp, partition)
+            if binding_kind(partition) == BINDING_KIND_GLOBAL
+                T = partition_restriction(partition)
+            end
+            exct = Union{rte.exct, global_assignment_binding_rt_exct(interp, partition, v)[2]}
+            effects = merge_effects(rte.effects, Effects(setglobal!_effects, nothrow=exct===Bottom))
+            sg = CallMeta(Any, exct, effects, GlobalAccessInfo(partition))
+        else
+            sg = abstract_eval_setglobal!(interp, sv, saw_latestworld, M, s, v)
+        end
+        if length(argtypes) >= 6
+            goe = global_order_exct(argtypes[6], #=loading=#true, #=storing=#true)
+            sg = merge_exct(sg, goe)
+        end
+        if length(argtypes) == 7
+            goe = global_order_exct(argtypes[7], #=loading=#true, #=storing=#false)
+            sg = merge_exct(sg, goe)
+        end
+        rt = T === nothing ?
+            ccall(:jl_apply_cmpswap_type, Any, (Any,), S) where S :
+            ccall(:jl_apply_cmpswap_type, Any, (Any,), T)
+        return CallMeta(rt, sg.exct, sg.effects, sg.info)
+    elseif !isvarargtype(argtypes[end]) || length(argtypes) > 8
+        return CallMeta(Union{}, ArgumentError, EFFECTS_THROWS, NoCallInfo())
+    else
+        return CallMeta(Any, Union{generic_getglobal_exct,generic_setglobal!_exct}, setglobal!_effects, NoCallInfo())
+    end
+end
+
+function argtypes_are_actually_getglobal(argtypes::Vector{Any})
+    length(argtypes) in (3, 4) || return false
+    M = argtypes[2]
+    s = argtypes[3]
+    isa(M, Const) || return false
+    isa(s, Const) || return false
+    return isa(M.val, Module) && isa(s.val, Symbol)
+end
+
+# call where the function is known exactly
+function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
+        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+        max_methods::Int = get_max_methods(interp, f, sv))
+    (; fargs, argtypes) = arginfo
+    la = length(argtypes)
+    𝕃ᵢ = typeinf_lattice(interp)
+    if isa(f, Builtin)
+        if f === _apply_iterate
+            return abstract_apply(interp, argtypes, si, sv, max_methods)
+        elseif f === invoke
+            return abstract_invoke(interp, arginfo, si, sv)
+        elseif f === modifyfield! || f === Core.modifyglobal! ||
+               f === Core.memoryrefmodify! || f === atomic_pointermodify
+            return abstract_modifyop!(interp, f, argtypes, si, sv)
+        elseif f === Core.finalizer
+            return abstract_finalizer(interp, argtypes, sv)
+        elseif f === applicable
+            return abstract_applicable(interp, argtypes, sv, max_methods)
+        elseif f === throw
+            return abstract_throw(interp, argtypes, sv)
+        elseif f === Core.throw_methoderror
+            return abstract_throw_methoderror(interp, argtypes, sv)
+        elseif f === Core.getglobal
+            return Future(abstract_eval_getglobal(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.setglobal!
+            return Future(abstract_eval_setglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.swapglobal!
+            return Future(abstract_eval_swapglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.setglobalonce!
+            return Future(abstract_eval_setglobalonce!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.replaceglobal!
+            return Future(abstract_eval_replaceglobal!(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.getfield && argtypes_are_actually_getglobal(argtypes)
+            return Future(abstract_eval_getglobal(interp, sv, si.saw_latestworld, argtypes))
+        elseif f === Core.isdefined && argtypes_are_actually_getglobal(argtypes)
+            return Future(abstract_eval_isdefinedglobal(interp, argtypes[2], argtypes[3], Const(true),
+                length(argtypes) == 4 ? argtypes[4] : Const(:unordered),
+                si.saw_latestworld, sv))
+        elseif f === Core.isdefinedglobal && 3 <= length(argtypes) <= 5
+            return Future(abstract_eval_isdefinedglobal(interp, argtypes[2], argtypes[3],
+                length(argtypes) >= 4 ? argtypes[4] : Const(true),
+                length(argtypes) >= 5 ? argtypes[5] : Const(:unordered),
+                si.saw_latestworld, sv))
+        elseif f === Core.get_binding_type
+            return Future(abstract_eval_get_binding_type(interp, sv, argtypes))
+        end
+        rt = abstract_call_builtin(interp, f, arginfo, sv)
+        ft = popfirst!(argtypes)
+        effects = builtin_effects(𝕃ᵢ, f, argtypes, rt)
+        if effects.nothrow
+            exct = Union{}
+        else
+            exct = builtin_exct(𝕃ᵢ, f, argtypes, rt)
+        end
+        pushfirst!(argtypes, ft)
+        refinements = nothing
+        if sv isa InferenceState && f === typeassert
+            # perform very limited back-propagation of invariants after this type assertion
+            if rt !== Bottom && isa(fargs, Vector{Any})
+                farg2 = ssa_def_slot(fargs[2], sv)
+                if farg2 isa SlotNumber
+                    refinements = SlotRefinement(farg2, rt)
+                end
+            end
+        end
+        return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements))
+    elseif isa(f, Core.OpaqueClosure)
+        # calling an OpaqueClosure about which we have no information returns no information
+        return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()))
+    elseif f === TypeVar && !isvarargtype(argtypes[end])
+        # Manually look through the definition of TypeVar to
+        # make sure to be able to get `PartialTypeVar`s out.
+        2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        # make sure generic code is prepared for inlining if needed later
+        let T = Any[Type{TypeVar}, Any, Any, Any]
+            resize!(T, la)
+            atype = Tuple{T...}
+            T[1] = Const(TypeVar)
+            let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future
+                return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                    n = argtypes[2]
+                    ub_var = Const(Any)
+                    lb_var = Const(Union{})
+                    if la == 4
+                        ub_var = argtypes[4]
+                        lb_var = argtypes[3]
+                    elseif la == 3
+                        ub_var = argtypes[3]
+                    end
+                    pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+                    typevar_argtypes = Any[n, lb_var, ub_var]
+                    effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    if effects.nothrow
+                        exct = Union{}
+                    else
+                        exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    end
+                    return CallMeta(pT, exct, effects, call.info)
+                end
+            end
+        end
+    elseif f === UnionAll
+        let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future
+            return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                return abstract_call_unionall(interp, argtypes, call)
+            end
+        end
+    elseif f === Tuple && la == 2
+        aty = argtypes[2]
+        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
+        if !isconcretetype(ty)
+            return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()))
+        end
+    elseif is_return_type(f)
+        return return_type_tfunc(interp, argtypes, si, sv)
+    elseif la == 3 && f === Core.:(!==)
+        # mark !== as exactly a negated call to ===
+        let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future,
+            rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future
+            return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv
+                local rty = rtfuture[].rt
+                if isa(rty, Conditional)
+                    return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
+                elseif isa(rty, Const)
+                    return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
+                end
+                return callfuture[]
+            end
+        end
+    elseif la == 3 && f === Core.:(>:)
+        # mark issupertype as a exact alias for issubtype
+        # swap T1 and T2 arguments and call <:
+        if fargs !== nothing && length(fargs) == 3
+            fargs = Any[<:, fargs[3], fargs[2]]
+        else
+            fargs = nothing
+        end
+        argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
+        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
+    elseif la == 2 && f === Core.typename
+        return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()))
+    elseif f === Core._hasmethod
+        return Future(_hasmethod_tfunc(interp, argtypes, sv))
+    end
+    atype = argtypes_to_type(argtypes)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future
+end
+
+function abstract_call_opaque_closure(interp::AbstractInterpreter,
+    closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, check::Bool=true)
+    sig = argtypes_to_type(arginfo.argtypes)
+    tt = closure.typ
+    ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
+    ocargsig′ = unwrap_unionall(ocargsig)
+    ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
+    hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()))
+    ocmethod = closure.source::Method
+    if !isdefined(ocmethod, :source)
+        # This opaque closure was created from optimized source. We cannot infer it further.
+        ocrt = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[2], tt)
+        if isa(ocrt, DataType)
+            return Future(CallMeta(ocrt, Any, Effects(), NoCallInfo()))
+        end
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
+    mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    ocsig_box = Core.Box(ocsig)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, effects, volatile_inf_result, edge, edgecycle) = result
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        const_result = volatile_inf_result
+        if !edgecycle
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                #=f=#nothing, arginfo, si, match, sv)
+            if const_call_result !== nothing
+                const_edge = nothing
+                if const_call_result.rt ⊑ rt
+                    (; rt, effects, const_result, const_edge) = const_call_result
+                end
+                if const_call_result.exct ⋤ exct
+                    (; exct, const_result, const_edge) = const_call_result
+                end
+                if const_edge !== nothing
+                    edge = const_edge
+                end
+            end
+        end
+        if check # analyze implicit type asserts on argument and return type
+            ftt = closure.typ
+            rty = (unwrap_unionall(ftt)::DataType).parameters[2]
+            rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt)
+            if !(rt ⊑ rty && sig ⊑ ocsig_box.contents)
+                effects = Effects(effects; nothrow=false)
+                exct = exct ⊔ TypeError
+            end
+        end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+        info = OpaqueClosureCallInfo(edge, match, const_result)
+        return CallMeta(rt, exct, effects, info)
+    end
+end
+
+function most_general_argtypes(closure::PartialOpaque)
+    ret = Any[]
+    cc = widenconst(closure)
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
+    if !isa(argt, DataType) || argt.name !== typename(Tuple)
+        argt = Tuple
+    end
+    return Any[argt.parameters...]
+end
+
+function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
+                               arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+                               max_methods::Int)
+    if isa(ft, PartialOpaque)
+        newargtypes = copy(arginfo.argtypes)
+        newargtypes[1] = ft.env
+        return abstract_call_opaque_closure(interp,
+            ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
+    end
+    wft = widenconst(ft)
+    if hasintersect(wft, Builtin)
+        add_remark!(interp, sv, "Could not identify method table for call")
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    elseif hasintersect(wft, Core.OpaqueClosure)
+        uft = unwrap_unionall(wft)
+        if isa(uft, DataType)
+            return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()))
+        end
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    end
+    # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
+    atype = argtypes_to_type(arginfo.argtypes)
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future
+end
+
+# call where the function is any lattice element
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
+                       sv::AbsIntState, max_methods::Int=typemin(Int))
+    ft = widenslotwrapper(arginfo.argtypes[1])
+    f = singleton_type(ft)
+    if f === nothing
+        max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods
+        return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods)
+    end
+    max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods
+    return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
+end
+
+function sp_type_rewrap(@nospecialize(T), mi::MethodInstance, isreturn::Bool)
+    isref = false
+    if unwrapva(T) === Bottom
+        return Bottom
+    elseif isa(T, Type)
+        if isa(T, DataType) && (T::DataType).name === Ref.body.name
+            isref = true
+            T = T.parameters[1]
+            if isreturn && T === Any
+                return Bottom # a return type of Ref{Any} is invalid
+            end
+        end
+    else
+        return Any
+    end
+    if isa(mi.def, Method)
+        spsig = mi.def.sig
+        if isa(spsig, UnionAll)
+            if !isempty(mi.sparam_vals)
+                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
+                                  v for v in  mi.sparam_vals]
+                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
+                isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
+                for v in sparam_vals
+                    if isa(v, TypeVar)
+                        T = UnionAll(v, T)
+                    end
+                end
+                if has_free_typevars(T)
+                    fv = ccall(:jl_find_free_typevars, Vector{Any}, (Any,), T)
+                    for v in fv
+                        T = UnionAll(v, T)
+                    end
+                end
+            else
+                T = rewrap_unionall(T, spsig)
+            end
+        end
+    end
+    return unwraptv(T)
+end
+
+function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    f = abstract_eval_value(interp, e.args[2], sstate, sv)
+    # rt = sp_type_rewrap(e.args[3], sv.linfo, true) # verify that the result type make sense?
+    # rt === Bottom && return RTEffects(Union{}, Any, EFFECTS_UNKNOWN)
+    atv = e.args[4]::SimpleVector
+    at = Vector{Any}(undef, length(atv) + 1)
+    at[1] = f
+    for i = 1:length(atv)
+        atᵢ = at[i + 1] = sp_type_rewrap(atv[i], frame_instance(sv), false)
+        atᵢ === Bottom && return RTEffects(Union{}, Any, EFFECTS_UNKNOWN)
+    end
+    # this may be the wrong world for the call,
+    # but some of the result is likely to be valid anyways
+    # and that may help generate better codegen
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false, false), sv)::Future
+    rt = e.args[1]
+    isconcretetype(rt) || (rt = Any)
+    return RTEffects(rt, Any, EFFECTS_UNKNOWN)
+end
+
+function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), sstate::StatementState, sv::AbsIntState)
+    if isa(e, SSAValue)
+        return RTEffects(abstract_eval_ssavalue(e, sv), Union{}, EFFECTS_TOTAL)
+    elseif isa(e, SlotNumber)
+        if sstate.vtypes !== nothing
+            vtyp = sstate.vtypes[slot_id(e)]
+            if !vtyp.undef
+                return RTEffects(vtyp.typ, Union{}, EFFECTS_TOTAL)
+            end
+            return RTEffects(vtyp.typ, UndefVarError, EFFECTS_THROWS)
+        end
+        return RTEffects(Any, UndefVarError, EFFECTS_THROWS)
+    elseif isa(e, Argument)
+        if sstate.vtypes !== nothing
+            return RTEffects(sstate.vtypes[slot_id(e)].typ, Union{}, EFFECTS_TOTAL)
+        else
+            @assert isa(sv, IRInterpretationState)
+            return RTEffects(sv.ir.argtypes[e.n], Union{}, EFFECTS_TOTAL) # TODO frame_argtypes(sv)[e.n] and remove the assertion
+        end
+    elseif isa(e, GlobalRef)
+        # No need for an edge since an explicit GlobalRef will be picked up by the source scan
+        return abstract_eval_globalref(interp, e, sstate.saw_latestworld, sv)[1]
+    end
+    if isa(e, QuoteNode)
+        e = e.value
+    end
+    effects = Effects(EFFECTS_TOTAL;
+        inaccessiblememonly = is_mutation_free_argtype(typeof(e)) ? ALWAYS_TRUE : ALWAYS_FALSE)
+    return RTEffects(Const(e), Union{}, effects)
+end
+
+function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsIntState)
+    if e.head === :call && length(e.args) ≥ 1
+        # TODO: We still have non-linearized cglobal
+        @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
+    else
+        @assert e.head !== :(=)
+        # Some of our tests expect us to handle invalid IR here and error later
+        # - permit that for now.
+        # @assert false "Unexpected EXPR head in value position"
+        merge_effects!(interp, sv, EFFECTS_UNKNOWN)
+    end
+    return Any
+end
+
+function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), sstate::StatementState, sv::AbsIntState)
+    if isa(e, Expr)
+        return abstract_eval_value_expr(interp, e, sv)
+    else
+        (;rt, effects) = abstract_eval_special_value(interp, e, sstate, sv)
+        merge_effects!(interp, sv, effects)
+        return collect_limitations!(rt, sv)
+    end
+end
+
+function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, sstate::StatementState, sv::AbsIntState)
+    n = length(ea)
+    argtypes = Vector{Any}(undef, n)
+    @inbounds for i = 1:n
+        ai = abstract_eval_value(interp, ea[i], sstate, sv)
+        if ai === Bottom
+            return nothing
+        end
+        argtypes[i] = ai
+    end
+    return argtypes
+end
+
+struct RTEffects
+    rt::Any
+    exct::Any
+    effects::Effects
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function RTEffects(rt, exct, effects::Effects, refinements=nothing)
+        @nospecialize rt exct refinements
+        return new(rt, exct, effects, refinements)
+    end
+end
+
+CallMeta(rte::RTEffects, info::CallInfo) =
+    CallMeta(rte.rt, rte.exct, rte.effects, info, rte.refinements)
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sstate::StatementState, sv::InferenceState)
+    unused = call_result_unused(sv, sv.currpc)
+    if unused
+        add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
+    end
+    si = StmtInfo(!unused, sstate.saw_latestworld)
+    call = abstract_call(interp, arginfo, si, sv)::Future
+    Future{Any}(call, interp, sv) do call, interp, sv
+        # this only is needed for the side-effect, sequenced before any task tries to consume the return value,
+        # which this will do even without returning this Future
+        sv.stmt_info[sv.currpc] = call.info
+        nothing
+    end
+    return call
+end
+
+function abstract_eval_call(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                            sv::AbsIntState)
+    ea = e.args
+    argtypes = collect_argtypes(interp, ea, sstate, sv)
+    if argtypes === nothing
+        return Future(RTEffects(Bottom, Any, Effects()))
+    end
+    arginfo = ArgInfo(ea, argtypes)
+    call = abstract_call(interp, arginfo, sstate, sv)::Future
+    return Future{RTEffects}(call, interp, sv) do call, interp, sv
+        (; rt, exct, effects, refinements) = call
+        return RTEffects(rt, exct, effects, refinements)
+    end
+end
+
+
+function abstract_eval_new(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                           sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], sstate, sv), true)
+    ut = unwrap_unionall(rt)
+    exct = Union{ErrorException,TypeError}
+    if isa(ut, DataType) && !isabstracttype(ut)
+        ismutable = ismutabletype(ut)
+        fcount = datatype_fieldcount(ut)
+        nargs = length(e.args) - 1
+        has_any_uninitialized = (fcount === nothing || (fcount > nargs && (let t = rt
+                any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
+            end)))
+        if has_any_uninitialized
+            # allocation with undefined field is inconsistent always
+            consistent = ALWAYS_FALSE
+        elseif ismutable
+            # mutable allocation isn't `:consistent`, but we still have a chance that
+            # return type information later refines the `:consistent`-cy of the method
+            consistent = CONSISTENT_IF_NOTRETURNED
+        else
+            consistent = ALWAYS_TRUE # immutable allocation is consistent
+        end
+        if isconcretedispatch(rt)
+            nothrow = true
+            @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
+            ats = Vector{Any}(undef, nargs)
+            local anyrefine = false
+            local allconst = true
+            for i = 1:nargs
+                at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], sstate, sv))
+                ft = fieldtype(rt, i)
+                nothrow && (nothrow = ⊑(𝕃ᵢ, at, ft))
+                at = tmeet(𝕃ᵢ, at, ft)
+                at === Bottom && return RTEffects(Bottom, TypeError, EFFECTS_THROWS)
+                if ismutable && !isconst(rt, i)
+                    ats[i] = ft # can't constrain this field (as it may be modified later)
+                    continue
+                end
+                allconst &= isa(at, Const)
+                if !anyrefine
+                    anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
+                                ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
+                end
+                ats[i] = at
+            end
+            if fcount == nargs && consistent === ALWAYS_TRUE && allconst
+                argvals = Vector{Any}(undef, nargs)
+                for j in 1:nargs
+                    argvals[j] = (ats[j]::Const).val
+                end
+                rt = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), rt, argvals, nargs))
+            elseif anyrefine || nargs > datatype_min_ninitialized(rt)
+                # propagate partially initialized struct as `PartialStruct` when:
+                # - any refinement information is available (`anyrefine`), or when
+                # - `nargs` is greater than `n_initialized` derived from the struct type
+                #   information alone
+                rt = PartialStruct(𝕃ᵢ, rt, ats)
+            end
+        else
+            rt = refine_partial_type(rt)
+            nothrow = false
+        end
+    else
+        consistent = ALWAYS_FALSE
+        nothrow = false
+    end
+    nothrow && (exct = Union{})
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_splatnew(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], sstate, sv), true)
+    nothrow = false
+    if length(e.args) == 2 && isconcretedispatch(rt) && !ismutabletype(rt)
+        at = abstract_eval_value(interp, e.args[2], sstate, sv)
+        n = fieldcount(rt)
+        if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+            (let t = rt, at = at
+                all(i::Int -> getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+            end))
+            nothrow = isexact
+            rt = Const(ccall(:jl_new_structt, Any, (Any, Any), rt, at.val))
+        elseif (isa(at, PartialStruct) && ⊑(𝕃ᵢ, at, Tuple) && n > 0 &&
+                n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
+                (let t = rt, at = at
+                    all(i::Int -> ⊑(𝕃ᵢ, (at.fields::Vector{Any})[i], fieldtype(t, i)), 1:n)
+                end))
+            nothrow = isexact
+            rt = PartialStruct(𝕃ᵢ, rt, at.fields::Vector{Any})
+        end
+    else
+        rt = refine_partial_type(rt)
+    end
+    consistent = !ismutabletype(rt) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
+    effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                          sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    rt = Union{}
+    effects = Effects() # TODO
+    if length(e.args) >= 5
+        ea = e.args
+        argtypes = collect_argtypes(interp, ea, sstate, sv)
+        if argtypes === nothing
+            rt = Bottom
+            effects = EFFECTS_THROWS
+        else
+            mi = frame_instance(sv)
+            rt = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
+                argtypes[5], argtypes[6:end], mi)
+            if ea[4] !== true && isa(rt, PartialOpaque)
+                rt = widenconst(rt)
+                # Propagation of PartialOpaque disabled
+            end
+            if isa(rt, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
+                # Infer this now so that the specialization is available to
+                # optimization.
+                argtypes = most_general_argtypes(rt)
+                pushfirst!(argtypes, rt.env)
+                callinfo = abstract_call_opaque_closure(interp, rt,
+                    ArgInfo(nothing, argtypes), StmtInfo(true, false), sv, #=check=#false)::Future
+                Future{Any}(callinfo, interp, sv) do callinfo, interp, sv
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    nothing
+                end
+            end
+        end
+    end
+    return Future(RTEffects(rt, Any, effects))
+end
+
+function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                               sv::AbsIntState)
+    effects = EFFECTS_UNKNOWN
+    rt = abstract_eval_value(interp, e.args[1], sstate, sv)
+    if rt isa Const && rt.val isa Expr
+        # `copyast` makes copies of Exprs
+        rt = Expr
+    end
+    return RTEffects(rt, Any, effects)
+end
+
+function abstract_eval_isdefined_expr(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                      sv::AbsIntState)
+    sym = e.args[1]
+    if isa(sym, SlotNumber) && sstate.vtypes !== nothing
+        vtyp = sstate.vtypes[slot_id(sym)]
+        if vtyp.typ === Bottom
+            rt = Const(false) # never assigned previously
+        elseif !vtyp.undef
+            rt = Const(true) # definitely assigned previously
+        else # form `Conditional` to refine `vtyp.undef` in the then branch
+            rt = Conditional(sym, vtyp.typ, vtyp.typ; isdefined=true)
+        end
+        return RTEffects(rt, Union{}, EFFECTS_TOTAL)
+    end
+    rt = Bool
+    effects = EFFECTS_TOTAL
+    exct = Union{}
+    if isexpr(sym, :static_parameter)
+        n = sym.args[1]::Int
+        if 1 <= n <= length(sv.sptypes)
+            sp = sv.sptypes[n]
+            if !sp.undef
+                rt = Const(true)
+            elseif sp.typ === Bottom
+                rt = Const(false)
+            end
+        end
+    else
+        effects = EFFECTS_UNKNOWN
+        exct = Any
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+const generic_isdefinedglobal_effects = Effects(EFFECTS_TOTAL, consistent=ALWAYS_FALSE, nothrow=false)
+function abstract_eval_isdefinedglobal(interp::AbstractInterpreter, mod::Module, sym::Symbol, allow_import::Union{Bool, Nothing}, saw_latestworld::Bool, sv::AbsIntState)
+    rt = Bool
+    if saw_latestworld
+        return CallMeta(RTEffects(rt, Union{}, Effects(generic_isdefinedglobal_effects, nothrow=true)), NoCallInfo())
+    end
+
+    effects = EFFECTS_TOTAL
+    partition = lookup_binding_partition!(interp, GlobalRef(mod, sym), sv)
+    if allow_import !== true && is_some_imported(binding_kind(partition))
+        if allow_import === false
+            rt = Const(false)
+        else
+            effects = Effects(generic_isdefinedglobal_effects, nothrow=true)
+        end
+    else
+        partition = walk_binding_partition!(interp, partition, sv)
+        rte = abstract_eval_partition_load(interp, partition)
+        if rte.exct == Union{}
+            rt = Const(true)
+        elseif rte.rt === Union{} && rte.exct === UndefVarError
+            rt = Const(false)
+        else
+            effects = Effects(generic_isdefinedglobal_effects, nothrow=true)
+        end
+    end
+    return CallMeta(RTEffects(rt, Union{}, effects), GlobalAccessInfo(partition))
+end
+
+function abstract_eval_isdefinedglobal(interp::AbstractInterpreter, @nospecialize(M), @nospecialize(s), @nospecialize(allow_import_arg), @nospecialize(order_arg), saw_latestworld::Bool, sv::AbsIntState)
+    exct = Bottom
+    allow_import = true
+    if allow_import_arg !== nothing
+        if !isa(allow_import_arg, Const)
+            allow_import = nothing
+            if widenconst(allow_import_arg) != Bool
+                exct = Union{exct, TypeError}
+            end
+        else
+            allow_import = allow_import_arg.val
+        end
+    end
+    if order_arg !== nothing
+        exct = global_order_exct(order_arg, #=loading=#true, #=storing=#false)
+        if !(isa(order_arg, Const) && get_atomic_order(order_arg.val, #=loading=#true, #=storing=#false).x >= MEMORY_ORDER_UNORDERED.x)
+            exct = Union{exct, ConcurrencyViolationError}
+        end
+    end
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            return merge_exct(abstract_eval_isdefinedglobal(interp, M, s, allow_import, saw_latestworld, sv), exct)
+        end
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif !hasintersect(widenconst(M), Module) || !hasintersect(widenconst(s), Symbol)
+        return CallMeta(Union{}, TypeError, EFFECTS_THROWS, NoCallInfo())
+    elseif M ⊑ Module && s ⊑ Symbol
+        return CallMeta(Bool, Union{exct, UndefVarError}, generic_isdefinedglobal_effects, NoCallInfo())
+    end
+    return CallMeta(Bool, Union{exct, TypeError, UndefVarError}, generic_isdefinedglobal_effects, NoCallInfo())
+end
+
+function abstract_eval_throw_undef_if_not(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    condt = abstract_eval_value(interp, e.args[2], sstate, sv)
+    condval = maybe_extract_const_bool(condt)
+    rt = Nothing
+    exct = UndefVarError
+    effects = EFFECTS_THROWS
+    if condval isa Bool
+        if condval
+            effects = EFFECTS_TOTAL
+            exct = Union{}
+        else
+            rt = Union{}
+        end
+    elseif !hasintersect(widenconst(condt), Bool)
+        rt = Union{}
+    end
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_the_exception(::AbstractInterpreter, sv::InferenceState)
+    (;handler_info) = sv
+    if handler_info === nothing
+        return the_exception_info(Any)
+    end
+    (;handlers, handler_at) = handler_info
+    handler_id = handler_at[sv.currpc][2]
+    if handler_id === 0
+        return the_exception_info(Any)
+    end
+    return the_exception_info(handlers[handler_id].exct)
+end
+abstract_eval_the_exception(::AbstractInterpreter, ::IRInterpretationState) = the_exception_info(Any)
+the_exception_info(@nospecialize t) = RTEffects(t, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+
+function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsIntState)
+    n = e.args[1]::Int
+    nothrow = false
+    if 1 <= n <= length(sv.sptypes)
+        sp = sv.sptypes[n]
+        rt = sp.typ
+        nothrow = !sp.undef
+    else
+        rt = Any
+    end
+    exct = nothrow ? Union{} : UndefVarError
+    effects = Effects(EFFECTS_TOTAL; nothrow)
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, sstate::StatementState,
+                                      sv::AbsIntState)::Future{RTEffects}
+    ehead = e.head
+    if ehead === :call
+        return abstract_eval_call(interp, e, sstate, sv)
+    elseif ehead === :new
+        return abstract_eval_new(interp, e, sstate, sv)
+    elseif ehead === :splatnew
+        return abstract_eval_splatnew(interp, e, sstate, sv)
+    elseif ehead === :new_opaque_closure
+        return abstract_eval_new_opaque_closure(interp, e, sstate, sv)
+    elseif ehead === :foreigncall
+        return abstract_eval_foreigncall(interp, e, sstate, sv)
+    elseif ehead === :cfunction
+        return abstract_eval_cfunction(interp, e, sstate, sv)
+    elseif ehead === :method
+        rt = (length(e.args) == 1) ? Any : Nothing
+        return RTEffects(rt, Any, EFFECTS_UNKNOWN)
+    elseif ehead === :copyast
+        return abstract_eval_copyast(interp, e, sstate, sv)
+    elseif ehead === :invoke || ehead === :invoke_modify
+        error("type inference data-flow error: tried to double infer a function")
+    elseif ehead === :isdefined
+        return abstract_eval_isdefined_expr(interp, e, sstate, sv)
+    elseif ehead === :throw_undef_if_not
+        return abstract_eval_throw_undef_if_not(interp, e, sstate, sv)
+    elseif ehead === :boundscheck
+        return RTEffects(Bool, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+    elseif ehead === :the_exception
+        return abstract_eval_the_exception(interp, sv)
+    elseif ehead === :static_parameter
+        return abstract_eval_static_parameter(interp, e, sv)
+    elseif ehead === :gc_preserve_begin || ehead === :aliasscope
+        return RTEffects(Any, Union{}, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE, effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception ||
+           ehead === :global || ehead === :popaliasscope
+        return RTEffects(Nothing, Union{}, Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :globaldecl
+        return RTEffects(Nothing, Any, EFFECTS_UNKNOWN)
+    elseif ehead === :thunk
+        return RTEffects(Any, Any, Effects())
+    end
+    # N.B.: abstract_eval_value_expr can modify the global effects, but
+    # we move out any arguments with effects during SSA construction later
+    # and recompute the effects.
+    rt = abstract_eval_value_expr(interp, e, sv)
+    return RTEffects(rt, Any, EFFECTS_TOTAL)
+end
+
+# refine the result of instantiation of partially-known type `t` if some invariant can be assumed
+function refine_partial_type(@nospecialize t)
+    t′ = unwrap_unionall(t)
+    if isa(t′, DataType) && t′.name === _NAMEDTUPLE_NAME && length(t′.parameters) == 2 &&
+        (t′.parameters[1] === () || t′.parameters[2] === Tuple{})
+        # if the first/second parameter of `NamedTuple` is known to be empty,
+        # the second/first argument should also be empty tuple type,
+        # so refine it here
+        return Const((;))
+    end
+    return t
+end
+
+function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, sstate::StatementState, sv::AbsIntState)
+    mi = frame_instance(sv)
+    t = sp_type_rewrap(e.args[2], mi, true)
+    for i = 3:length(e.args)
+        if abstract_eval_value(interp, e.args[i], sstate, sv) === Bottom
+            return RTEffects(Bottom, Any, EFFECTS_THROWS)
+        end
+    end
+    effects = foreigncall_effects(e) do @nospecialize x
+        abstract_eval_value(interp, x, sstate, sv)
+    end
+    cconv = e.args[5]
+    if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt16}))
+        override = decode_effects_override(v[2])
+        effects = override_effects(effects, override)
+    end
+    return RTEffects(t, Any, effects)
+end
+
+function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, sstate::StatementState, sv::AbsIntState)
+    rt = Union{}
+    for i in 1:length(phi.values)
+        isassigned(phi.values, i) || continue
+        val = phi.values[i]
+        # N.B.: Phi arguments are restricted to not have effects, so we can drop
+        # them here safely.
+        thisval = abstract_eval_special_value(interp, val, sstate, sv).rt
+        rt = tmerge(typeinf_lattice(interp), rt, thisval)
+    end
+    return rt
+end
+
+function stmt_taints_inbounds_consistency(sv::AbsIntState)
+    propagate_inbounds(sv) && return true
+    return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
+end
+
+function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState)
+    # N.B.: This only applies to the effects of the statement itself.
+    # It is possible for arguments (GlobalRef/:static_parameter) to throw,
+    # but these will be recomputed during SSA construction later.
+    override = decode_statement_effects_override(sv)
+    effects = override_effects(effects, override)
+    set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
+    merge_effects!(interp, sv, effects)
+    return effects
+end
+
+function override_effects(effects::Effects, override::EffectsOverride)
+    return Effects(effects;
+        consistent = override.consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = override.effect_free ? ALWAYS_TRUE : effects.effect_free,
+        nothrow = override.nothrow ? true : effects.nothrow,
+        terminates = override.terminates_globally ? true : effects.terminates,
+        notaskstate = override.notaskstate ? true : effects.notaskstate,
+        inaccessiblememonly = override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
+        noub = override.noub ? ALWAYS_TRUE :
+            (override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE) ? NOUB_IF_NOINBOUNDS :
+            effects.noub,
+        nortcall = override.nortcall ? true : effects.nortcall)
+end
+
+world_range(ir::IRCode) = ir.valid_worlds
+world_range(ci::CodeInfo) = WorldRange(ci.min_world, ci.max_world)
+world_range(compact::IncrementalCompact) = world_range(compact.ir)
+
+function force_binding_resolution!(g::GlobalRef)
+    # Force resolution of the binding
+    # TODO: This will go away once we switch over to fully partitioned semantics
+    ccall(:jl_globalref_boundp, Cint, (Any,), g)
+    return nothing
+end
+
+function abstract_eval_globalref_type(g::GlobalRef, src::Union{CodeInfo, IRCode, IncrementalCompact}, retry_after_resolve::Bool=true)
+    worlds = world_range(src)
+    partition = lookup_binding_partition(min_world(worlds), g)
+    partition.max_world < max_world(worlds) && return Any
+    while is_some_imported(binding_kind(partition))
+        imported_binding = partition_restriction(partition)::Core.Binding
+        partition = lookup_binding_partition(min_world(worlds), imported_binding)
+        partition.max_world < max_world(worlds) && return Any
+    end
+    if is_some_guard(binding_kind(partition))
+        if retry_after_resolve
+            # This method is surprisingly hot. For performance, don't ask the runtime to resolve
+            # the binding unless necessary - doing so triggers an additional lookup, which though
+            # not super expensive is hot enough to show up in benchmarks.
+            force_binding_resolution!(g)
+            return abstract_eval_globalref_type(g, src, false)
+        end
+        # return Union{}
+        return Any
+    end
+    if is_some_const_binding(binding_kind(partition))
+        return Const(partition_restriction(partition))
+    end
+    return partition_restriction(partition)
+end
+
+function lookup_binding_partition!(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
+    force_binding_resolution!(g)
+    partition = lookup_binding_partition(get_inference_world(interp), g)
+    update_valid_age!(sv, WorldRange(partition.min_world, partition.max_world))
+    partition
+end
+
+function walk_binding_partition!(interp::AbstractInterpreter, partition::Core.BindingPartition, sv::AbsIntState)
+    while is_some_imported(binding_kind(partition))
+        imported_binding = partition_restriction(partition)::Core.Binding
+        partition = lookup_binding_partition(get_inference_world(interp), imported_binding)
+        update_valid_age!(sv, WorldRange(partition.min_world, partition.max_world))
+    end
+    return partition
+end
+
+function abstract_eval_binding_partition!(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
+    partition = lookup_binding_partition!(interp, g, sv)
+    partition = walk_binding_partition!(interp, partition, sv)
+    return partition
+end
+
+function abstract_eval_partition_load(interp::AbstractInterpreter, partition::Core.BindingPartition)
+    kind = binding_kind(partition)
+    if is_some_guard(kind) || kind == BINDING_KIND_UNDEF_CONST
+        if InferenceParams(interp).assume_bindings_static
+            return RTEffects(Union{}, UndefVarError, EFFECTS_THROWS)
+        else
+            # We do not currently assume an invalidation for guard -> defined transitions
+            # return RTEffects(Union{}, UndefVarError, EFFECTS_THROWS)
+            return RTEffects(Any, UndefVarError, generic_getglobal_effects)
+        end
+    end
+
+    if is_defined_const_binding(kind)
+        rt = Const(partition_restriction(partition))
+        return RTEffects(rt, Union{}, Effects(EFFECTS_TOTAL, inaccessiblememonly=is_mutation_free_argtype(rt) ? ALWAYS_TRUE : ALWAYS_FALSE))
+    end
+
+    rt = partition_restriction(partition)
+    return RTEffects(rt, UndefVarError, generic_getglobal_effects)
+end
+
+function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, saw_latestworld::Bool, sv::AbsIntState)
+    if saw_latestworld
+        return Pair{RTEffects, Union{Nothing, Core.BindingPartition}}(RTEffects(Any, Any, generic_getglobal_effects), nothing)
+    end
+    partition = abstract_eval_binding_partition!(interp, g, sv)
+    ret = abstract_eval_partition_load(interp, partition)
+    if ret.rt !== Union{} && ret.exct === UndefVarError && InferenceParams(interp).assume_bindings_static
+        if isdefined(g, :binding) && isdefined(g.binding, :value)
+            ret = RTEffects(ret.rt, Union{}, Effects(generic_getglobal_effects, nothrow=true))
+        end
+        # We do not assume in general that assigned global bindings remain assigned.
+        # The existence of pkgimages allows them to revert in practice.
+    end
+    return Pair{RTEffects, Union{Nothing, Core.BindingPartition}}(ret, partition)
+end
+
+function global_assignment_rt_exct(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, g::GlobalRef, @nospecialize(newty))
+    if saw_latestworld
+        return Pair{Pair{Any,Any}, Union{Core.BindingPartition, Nothing}}(
+            Pair{Any,Any}(newty, Union{ErrorException, TypeError}), nothing)
+    end
+    partition = abstract_eval_binding_partition!(interp, g, sv)
+    return Pair{Pair{Any,Any}, Union{Core.BindingPartition, Nothing}}(
+        global_assignment_binding_rt_exct(interp, partition, newty),
+        partition)
+end
+
+function global_assignment_binding_rt_exct(interp::AbstractInterpreter, partition::Core.BindingPartition, @nospecialize(newty))
+    kind = binding_kind(partition)
+    if is_some_guard(kind)
+        return Pair{Any,Any}(newty, ErrorException)
+    elseif is_some_const_binding(kind)
+        return Pair{Any,Any}(Bottom, ErrorException)
+    end
+    ty = partition_restriction(partition)
+    wnewty = widenconst(newty)
+    if !hasintersect(wnewty, ty)
+        return Pair{Any,Any}(Bottom, TypeError)
+    elseif !(wnewty <: ty)
+        retty = tmeet(typeinf_lattice(interp), newty, ty)
+        return Pair{Any,Any}(retty, TypeError)
+    end
+    return Pair{Any,Any}(newty, Bottom)
+end
+
+abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
+
+function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
+    (1 ≤ s.id ≤ length(ssavaluetypes)) || throw(InvalidIRError())
+    typ = ssavaluetypes[s.id]
+    if typ === NOT_FOUND
+        return Bottom
+    end
+    return typ
+end
+
+struct BestguessInfo{Interp<:AbstractInterpreter}
+    interp::Interp
+    bestguess
+    nargs::Int
+    slottypes::Vector{Any}
+    changes::VarTable
+    function BestguessInfo(interp::Interp, @nospecialize(bestguess), nargs::Int,
+        slottypes::Vector{Any}, changes::VarTable) where Interp<:AbstractInterpreter
+        new{Interp}(interp, bestguess, nargs, slottypes, changes)
+    end
+end
+
+@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo)
+    return widenreturn(typeinf_lattice(info.interp), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo)
+    if isa(rt, MustAlias)
+        if 1 ≤ rt.slot ≤ info.nargs
+            rt = InterMustAlias(rt)
+        else
+            rt = widenmustalias(rt)
+        end
+    end
+    isa(rt, InterMustAlias) && return rt
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo)
+    ⊑ᵢ = ⊑(𝕃ᵢ)
+    if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool
+        # give up inter-procedural constraint back-propagation
+        # when tmerge would widen the result anyways (as an optimization)
+        rt = widenconditional(rt)
+    else
+        if isa(rt, Conditional)
+            id = rt.slot
+            if 1 ≤ id ≤ info.nargs
+                old_id_type = widenconditional(info.slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
+                if (!(rt.thentype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.thentype) &&
+                   (!(rt.elsetype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.elsetype)
+                   # discard this `Conditional` since it imposes
+                   # no new constraint on the argument type
+                   # (the caller will recreate it if needed)
+                   rt = widenconditional(rt)
+               end
+            else
+                # discard this `Conditional` imposed on non-call arguments,
+                # since it's not interesting in inter-procedural context;
+                # we may give constraints on other call argument
+                rt = widenconditional(rt)
+            end
+        end
+        if isa(rt, Conditional)
+            rt = InterConditional(rt.slot, rt.thentype, rt.elsetype)
+        elseif is_lattice_bool(𝕃ᵢ, rt)
+            rt = bool_rt_to_conditional(rt, info)
+        end
+    end
+    if isa(rt, Conditional)
+        rt = InterConditional(rt)
+    end
+    isa(rt, InterConditional) && return rt
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo)
+    bestguess = info.bestguess
+    if isa(bestguess, InterConditional)
+        # if the bestguess so far is already `Conditional`, try to convert
+        # this `rt` into `Conditional` on the slot to avoid overapproximation
+        # due to conflict of different slots
+        rt = bool_rt_to_conditional(rt, bestguess.slot, info)
+    else
+        # pick up the first "interesting" slot, convert `rt` to its `Conditional`
+        # TODO: ideally we want `Conditional` and `InterConditional` to convey
+        # constraints on multiple slots
+        for slot_id = 1:Int(info.nargs)
+            rt = bool_rt_to_conditional(rt, slot_id, info)
+            rt isa InterConditional && break
+        end
+    end
+    return rt
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo)
+    ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
+    old = info.slottypes[slot_id]
+    new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
+    if isvarargtype(old) || isvarargtype(new)
+        return rt
+    end
+    if new ⊑ᵢ old && !(old ⊑ᵢ new)
+        if isa(rt, Const)
+            val = rt.val
+            if val === true
+                return InterConditional(slot_id, new, Bottom)
+            elseif val === false
+                return InterConditional(slot_id, Bottom, new)
+            end
+        elseif rt === Bool
+            return InterConditional(slot_id, new, new)
+        end
+    end
+    return rt
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    if isa(rt, PartialStruct)
+        fields = copy(rt.fields)
+        anyrefine = !isvarargtype(rt.fields[end]) &&
+            length(rt.fields) > datatype_min_ninitialized(rt.typ)
+        𝕃 = typeinf_lattice(info.interp)
+        ⊏ = strictpartialorder(𝕃)
+        for i in 1:length(fields)
+            a = fields[i]
+            a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
+            if !anyrefine
+                # TODO: consider adding && const_prop_profitable(a) here?
+                anyrefine = has_extended_info(a) || a ⊏ fieldtype(rt.typ, i)
+            end
+            fields[i] = a
+        end
+        anyrefine && return PartialStruct(𝕃ᵢ, rt.typ, fields)
+    end
+    if isa(rt, PartialOpaque)
+        return rt # XXX: this case was missed in #39512
+    end
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_consts(@nospecialize(rt))
+    isa(rt, Const) && return rt
+    return widenconst(rt)
+end
+
+@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenconst(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenconst(rt)
+end
+
+function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceState, from::Int, to::Int)
+    if from > to
+        if is_effect_overridden(frame, :terminates_locally)
+            # this backedge is known to terminate
+        else
+            merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; terminates=false))
+        end
+    end
+    return nothing
+end
+
+function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable, saw_latestworld::Bool)
+    frame.bb_saw_latestworld[bb] |= saw_latestworld
+    bbtable = frame.bb_vartables[bb]
+    if bbtable === nothing
+        # if a basic block hasn't been analyzed yet,
+        # we can update its state a bit more aggressively
+        frame.bb_vartables[bb] = copy(vartable)
+        return true
+    else
+        return stupdate!(𝕃ᵢ, bbtable, vartable)
+    end
+end
+
+function init_vartable!(vartable::VarTable, frame::InferenceState)
+    nargtypes = length(frame.result.argtypes)
+    for i = 1:length(vartable)
+        vartable[i] = VarState(Bottom, i > nargtypes)
+    end
+    return vartable
+end
+
+function update_bestguess!(interp::AbstractInterpreter, frame::InferenceState,
+                           currstate::VarTable, @nospecialize(rt))
+    bestguess = frame.bestguess
+    nargs = narguments(frame, #=include_va=#false)
+    slottypes = frame.slottypes
+    rt = widenreturn(rt, BestguessInfo(interp, bestguess, nargs, slottypes, currstate))
+    # narrow representation of bestguess slightly to prepare for tmerge with rt
+    if rt isa InterConditional && bestguess isa Const
+        slot_id = rt.slot
+        old_id_type = widenconditional(slottypes[slot_id])
+        if bestguess.val === true && rt.elsetype !== Bottom
+            bestguess = InterConditional(slot_id, old_id_type, Bottom)
+        elseif bestguess.val === false && rt.thentype !== Bottom
+            bestguess = InterConditional(slot_id, Bottom, old_id_type)
+        end
+    end
+    # copy limitations to return value
+    if !isempty(frame.pclimitations)
+        union!(frame.limitations, frame.pclimitations)
+        empty!(frame.pclimitations)
+    end
+    if !isempty(frame.limitations)
+        rt = LimitedAccuracy(rt, copy(frame.limitations))
+    end
+    𝕃ₚ = ipo_lattice(interp)
+    if !⊑(𝕃ₚ, rt, bestguess)
+        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
+        frame.bestguess = tmerge(𝕃ₚ, bestguess, rt) # new (wider) return type for frame
+        return true
+    else
+        return false
+    end
+end
+
+function update_exc_bestguess!(interp::AbstractInterpreter, @nospecialize(exct), frame::InferenceState)
+    𝕃ₚ = ipo_lattice(interp)
+    handler = gethandler(frame)
+    if handler === nothing
+        if !⊑(𝕃ₚ, exct, frame.exc_bestguess)
+            frame.exc_bestguess = tmerge(𝕃ₚ, frame.exc_bestguess, exct)
+            update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                caller_handler = gethandler(caller, caller_pc)
+                caller_exct = caller_handler === nothing ?
+                    caller.exc_bestguess : caller_handler.exct
+                return caller_exct !== Any
+            end
+        end
+    else
+        if !⊑(𝕃ₚ, exct, handler.exct)
+            handler.exct = tmerge(𝕃ₚ, handler.exct, exct)
+            enter = frame.src.code[handler.enter_idx]::EnterNode
+            exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function propagate_to_error_handler!(currstate::VarTable, currsaw_latestworld::Bool, frame::InferenceState, 𝕃ᵢ::AbstractLattice)
+    # If this statement potentially threw, propagate the currstate to the
+    # exception handler, BEFORE applying any state changes.
+    curr_hand = gethandler(frame)
+    if curr_hand !== nothing
+        enter = frame.src.code[curr_hand.enter_idx]::EnterNode
+        exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+        if update_bbstate!(𝕃ᵢ, frame, exceptbb, currstate, currsaw_latestworld)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function update_cycle_worklists!(callback, frame::InferenceState)
+    for (caller, caller_pc) in frame.cycle_backedges
+        if callback(caller, caller_pc)
+            push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
+        end
+    end
+end
+
+# make as much progress on `frame` as possible (without handling cycles)
+struct CurrentState
+    result::Future
+    currstate::VarTable
+    currsaw_latestworld::Bool
+    bbstart::Int
+    bbend::Int
+    CurrentState(result::Future, currstate::VarTable, currsaw_latestworld::Bool, bbstart::Int, bbend::Int) = new(result, currstate, currsaw_latestworld, bbstart, bbend)
+    CurrentState() = new()
+end
+function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState)
+    @assert !is_inferred(frame)
+    W = frame.ip
+    ssavaluetypes = frame.ssavaluetypes
+    bbs = frame.cfg.blocks
+    nbbs = length(bbs)
+    𝕃ᵢ = typeinf_lattice(interp)
+    states = frame.bb_vartables
+    saw_latestworld = frame.bb_saw_latestworld
+    currbb = frame.currbb
+    currpc = frame.currpc
+
+    if isdefined(nextresult, :result)
+        # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal
+        bbstart = nextresult.bbstart
+        bbend = nextresult.bbend
+        currstate = nextresult.currstate
+        currsaw_latestworld = nextresult.currsaw_latestworld
+        @goto injectresult
+    end
+
+    if currbb != 1
+        currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
+    end
+    currstate = copy(states[currbb]::VarTable)
+    currsaw_latestworld = saw_latestworld[currbb]
+    while currbb <= nbbs
+        delete!(W, currbb)
+        bbstart = first(bbs[currbb].stmts)
+        bbend = last(bbs[currbb].stmts)
+
+        currpc = bbstart - 1
+        while currpc < bbend
+            currpc += 1
+            frame.currpc = currpc
+            stmt = frame.src.code[currpc]
+            # If we're at the end of the basic block ...
+            if currpc == bbend
+                # Handle control flow
+                if isa(stmt, GotoNode)
+                    succs = bbs[currbb].succs
+                    @assert length(succs) == 1
+                    nextbb = succs[1]
+                    ssavaluetypes[currpc] = Any
+                    handle_control_backedge!(interp, frame, currpc, stmt.label)
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    @goto branch
+                elseif isa(stmt, GotoIfNot)
+                    condx = stmt.cond
+                    condslot = ssa_def_slot(condx, frame)
+                    condt = abstract_eval_value(interp, condx, StatementState(currstate, currsaw_latestworld), frame)
+                    if condt === Bottom
+                        ssavaluetypes[currpc] = Bottom
+                        empty!(frame.pclimitations)
+                        @goto find_next_bb
+                    end
+                    orig_condt = condt
+                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condslot, SlotNumber)
+                        # if this non-`Conditional` object is a slot, we form and propagate
+                        # the conditional constraint on it
+                        condt = Conditional(condslot, Const(true), Const(false))
+                    end
+                    condval = maybe_extract_const_bool(condt)
+                    nothrow = (condval !== nothing) || ⊑(𝕃ᵢ, orig_condt, Bool)
+                    if nothrow
+                        add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    else
+                        update_exc_bestguess!(interp, TypeError, frame)
+                        propagate_to_error_handler!(currstate, currsaw_latestworld, frame, 𝕃ᵢ)
+                        merge_effects!(interp, frame, EFFECTS_THROWS)
+                    end
+
+                    if !isempty(frame.pclimitations)
+                        # we can't model the possible effect of control
+                        # dependencies on the return
+                        # directly to all the return values (unless we error first)
+                        condval isa Bool || union!(frame.limitations, frame.pclimitations)
+                        empty!(frame.pclimitations)
+                    end
+                    ssavaluetypes[currpc] = Any
+                    if condval === true
+                        @goto fallthrough
+                    else
+                        if !nothrow && !hasintersect(widenconst(orig_condt), Bool)
+                            ssavaluetypes[currpc] = Bottom
+                            @goto find_next_bb
+                        end
+
+                        succs = bbs[currbb].succs
+                        if length(succs) == 1
+                            @assert condval === false || (stmt.dest === currpc + 1)
+                            nextbb = succs[1]
+                            @goto branch
+                        end
+                        @assert length(succs) == 2
+                        truebb = currbb + 1
+                        falsebb = succs[1] == truebb ? succs[2] : succs[1]
+                        if condval === false
+                            nextbb = falsebb
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
+                            @goto branch
+                        end
+
+                        # We continue with the true branch, but process the false
+                        # branch here.
+                        if isa(condt, Conditional)
+                            else_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#false)
+                            if else_change !== nothing
+                                elsestate = copy(currstate)
+                                stoverwrite1!(elsestate, else_change)
+                            elseif condslot isa SlotNumber
+                                elsestate = copy(currstate)
+                            else
+                                elsestate = currstate
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this else branch
+                                stoverwrite1!(elsestate, condition_object_change(currstate, condt, condslot, #=then_or_else=#false))
+                            end
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, elsestate, currsaw_latestworld)
+                            then_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#true)
+                            thenstate = currstate
+                            if then_change !== nothing
+                                stoverwrite1!(thenstate, then_change)
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this then branch
+                                stoverwrite1!(thenstate, condition_object_change(currstate, condt, condslot, #=then_or_else=#true))
+                            end
+                        else
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate, currsaw_latestworld)
+                        end
+                        if else_changed
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
+                            push!(W, falsebb)
+                        end
+                        @goto fallthrough
+                    end
+                elseif isa(stmt, ReturnNode)
+                    rt = abstract_eval_value(interp, stmt.val, StatementState(currstate, currsaw_latestworld), frame)
+                    if update_bestguess!(interp, frame, currstate, rt)
+                        update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                            # no reason to revisit if that call-site doesn't affect the final result
+                            return caller.ssavaluetypes[caller_pc] !== Any
+                        end
+                    end
+                    ssavaluetypes[currpc] = Any
+                    @goto find_next_bb
+                elseif isa(stmt, EnterNode)
+                    ssavaluetypes[currpc] = Any
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    if isdefined(stmt, :scope)
+                        scopet = abstract_eval_value(interp, stmt.scope, StatementState(currstate, currsaw_latestworld), frame)
+                        handler = gethandler(frame, currpc + 1)::TryCatchFrame
+                        @assert handler.scopet !== nothing
+                        if !⊑(𝕃ᵢ, scopet, handler.scopet)
+                            handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
+                            if isdefined(handler, :scope_uses)
+                                for bb in handler.scope_uses
+                                    push!(W, bb)
+                                end
+                            end
+                        end
+                    end
+                    @goto fallthrough
+                elseif isexpr(stmt, :leave)
+                    ssavaluetypes[currpc] = Any
+                    @goto fallthrough
+                end
+                # Fall through terminator - treat as regular stmt
+            end
+            # Process non control-flow statements
+            @assert isempty(frame.tasks)
+            rt = nothing
+            exct = Bottom
+            changes = nothing
+            refinements = nothing
+            effects = nothing
+            if isa(stmt, NewvarNode)
+                changes = StateUpdate(stmt.slot, VarState(Bottom, true))
+            elseif isa(stmt, PhiNode)
+                add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE)
+                # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+                # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+                # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+                # we will not lose an intermediate value.
+                rt = abstract_eval_phi(interp, stmt, StatementState(currstate, currsaw_latestworld), frame)
+                old_rt = frame.ssavaluetypes[currpc]
+                rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+            else
+                lhs = nothing
+                if isexpr(stmt, :(=))
+                    lhs = stmt.args[1]
+                    stmt = stmt.args[2]
+                end
+                if !isa(stmt, Expr)
+                    (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, StatementState(currstate, currsaw_latestworld), frame)
+                else
+                    hd = stmt.head
+                    if hd === :method
+                        fname = stmt.args[1]
+                        if isa(fname, SlotNumber)
+                            changes = StateUpdate(fname, VarState(Any, false))
+                        end
+                    elseif (hd === :code_coverage_effect || (
+                            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                            is_meta_expr(stmt)))
+                        rt = Nothing
+                    elseif hd === :latestworld
+                        currsaw_latestworld = true
+                        rt = Nothing
+                    else
+                        result = abstract_eval_statement_expr(interp, stmt, StatementState(currstate, currsaw_latestworld), frame)::Future
+                        if !isready(result) || !isempty(frame.tasks)
+                            return CurrentState(result, currstate, currsaw_latestworld, bbstart, bbend)
+                            @label injectresult
+                            # reload local variables
+                            stmt = frame.src.code[currpc]
+                            changes = nothing
+                            lhs = nothing
+                            if isexpr(stmt, :(=))
+                                lhs = stmt.args[1]
+                                stmt = stmt.args[2]
+                            end
+                            result = nextresult.result::Future{RTEffects}
+                        end
+                        result = result[]
+                        (; rt, exct, effects, refinements) = result
+                        if effects.noub === NOUB_IF_NOINBOUNDS
+                            if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS)
+                                effects = Effects(effects; noub=ALWAYS_FALSE)
+                            elseif !propagate_inbounds(frame)
+                                # The callee read our inbounds flag, but unless we propagate inbounds,
+                                # we ourselves don't read our parent's inbounds.
+                                effects = Effects(effects; noub=ALWAYS_TRUE)
+                            end
+                        end
+                        @assert !isa(rt, TypeVar) "unhandled TypeVar"
+                        rt = maybe_singleton_const(rt)
+                        if !isempty(frame.pclimitations)
+                            if rt isa Const || rt === Union{}
+                                empty!(frame.pclimitations)
+                            else
+                                rt = LimitedAccuracy(rt, frame.pclimitations)
+                                frame.pclimitations = IdSet{InferenceState}()
+                            end
+                        end
+                    end
+                end
+                effects === nothing || merge_override_effects!(interp, effects, frame)
+                if lhs !== nothing && rt !== Bottom
+                    changes = StateUpdate(lhs::SlotNumber, VarState(rt, false))
+                end
+            end
+            if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
+                if exct !== Union{}
+                    update_exc_bestguess!(interp, exct, frame)
+                    # TODO: assert that these conditions match. For now, we assume the `nothrow` flag
+                    # to be correct, but allow the exct to be an over-approximation.
+                end
+                propagate_to_error_handler!(currstate, currsaw_latestworld, frame, 𝕃ᵢ)
+            end
+            if rt === Bottom
+                ssavaluetypes[currpc] = Bottom
+                # Special case: Bottom-typed PhiNodes do not error (but must also be unused)
+                if isa(stmt, PhiNode)
+                    continue
+                end
+                @goto find_next_bb
+            end
+            if changes !== nothing
+                stoverwrite1!(currstate, changes)
+            end
+            if refinements isa SlotRefinement
+                apply_refinement!(𝕃ᵢ, refinements.slot, refinements.typ, currstate, changes)
+            elseif refinements isa Vector{Any}
+                for i = 1:length(refinements)
+                    newtyp = refinements[i]
+                    newtyp === nothing && continue
+                    apply_refinement!(𝕃ᵢ, SlotNumber(i), newtyp, currstate, changes)
+                end
+            end
+            if rt === nothing
+                ssavaluetypes[currpc] = Any
+                continue
+            end
+            record_ssa_assign!(𝕃ᵢ, currpc, rt, frame)
+        end # for currpc in bbstart:bbend
+
+        # Case 1: Fallthrough termination
+        begin @label fallthrough
+            nextbb = currbb + 1
+        end
+
+        # Case 2: Directly branch to a different BB
+        begin @label branch
+            if update_bbstate!(𝕃ᵢ, frame, nextbb, currstate, currsaw_latestworld)
+                push!(W, nextbb)
+            end
+        end
+
+        # Case 3: Control flow ended along the current path (converged, return or throw)
+        begin @label find_next_bb
+            currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
+            currbb == -1 && break # the working set is empty
+            currbb > nbbs && break
+
+            nexttable = states[currbb]
+            if nexttable === nothing
+                init_vartable!(currstate, frame)
+            else
+                stoverwrite!(currstate, nexttable)
+            end
+        end
+    end # while currbb <= nbbs
+
+    return CurrentState()
+end
+
+function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
+                           currstate::VarTable, currchanges::Union{Nothing,StateUpdate})
+    if currchanges !== nothing && currchanges.var == slot
+        return # type propagation from statement (like assignment) should have the precedence
+    end
+    vtype = currstate[slot_id(slot)]
+    oldtyp = vtype.typ
+    ⊏ = strictpartialorder(𝕃ᵢ)
+    if newtyp ⊏ oldtyp
+        stmtupdate = StateUpdate(slot, VarState(newtyp, vtype.undef))
+        stoverwrite1!(currstate, stmtupdate)
+    end
+end
+
+function conditional_change(𝕃ᵢ::AbstractLattice, currstate::VarTable, condt::Conditional, then_or_else::Bool)
+    vtype = currstate[condt.slot]
+    oldtyp = vtype.typ
+    newtyp = then_or_else ? condt.thentype : condt.elsetype
+    if iskindtype(newtyp)
+        # this code path corresponds to the special handling for `isa(x, iskindtype)` check
+        # implemented within `abstract_call_builtin`
+    elseif ⊑(𝕃ᵢ, ignorelimited(newtyp), ignorelimited(oldtyp))
+        # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
+        # since we probably formed these types with `typesubstract`,
+        # the comparison is likely simple
+    else
+        return nothing
+    end
+    if oldtyp isa LimitedAccuracy
+        # typ is better unlimited, but we may still need to compute the tmeet with the limit
+        # "causes" since we ignored those in the comparison
+        newtyp = tmerge(𝕃ᵢ, newtyp, LimitedAccuracy(Bottom, oldtyp.causes))
+    end
+    # if this `Conditional` is from from `@isdefined condt.slot`, refine its `undef` information
+    newundef = condt.isdefined ? !then_or_else : vtype.undef
+    return StateUpdate(SlotNumber(condt.slot), VarState(newtyp, newundef), #=conditional=#true)
+end
+
+function condition_object_change(currstate::VarTable, condt::Conditional,
+                                 condslot::SlotNumber, then_or_else::Bool)
+    vtype = currstate[slot_id(condslot)]
+    newcondt = Conditional(condt.slot,
+        then_or_else ? condt.thentype : Union{},
+        then_or_else ? Union{} : condt.elsetype)
+    return StateUpdate(condslot, VarState(newcondt, vtype.undef))
+end
+
+# make as much progress on `frame` as possible (by handling cycles)
+warnlength::Int = 2500
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+    callstack = frame.callstack::Vector{AbsIntState}
+    nextstates = CurrentState[]
+    takenext = frame.frameid
+    minwarn = warnlength
+    takeprev = 0
+    while takenext >= frame.frameid
+        callee = takenext == 0 ? frame : callstack[takenext]::InferenceState
+        if !isempty(callstack)
+            if length(callstack) - frame.frameid >= minwarn
+                topmethod = callstack[1].linfo
+                topmethod.def isa Method || (topmethod = callstack[2].linfo)
+                print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n")
+                minwarn *= 2
+            end
+            topcallee = (callstack[end]::InferenceState)
+            if topcallee.cycleid != callee.cycleid
+                callee = topcallee
+                takenext = length(callstack)
+            end
+        end
+        interp = callee.interp
+        nextstateid = takenext + 1 - frame.frameid
+        while length(nextstates) < nextstateid
+            push!(nextstates, CurrentState())
+        end
+        if doworkloop(interp, callee)
+            # First drain the workloop. Note that since some scheduled work doesn't
+            # affect the result (e.g. cfunction or abstract_call_method on
+            # get_compileable_sig), but still must be finished up since it may see and
+            # change the local variables of the InferenceState at currpc, we do this
+            # even if the nextresult status is already completed.
+            continue
+        elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
+            # Next make progress on this frame
+            prev = length(callee.tasks) + 1
+            nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
+            reverse!(callee.tasks, prev)
+        elseif callee.cycleid == length(callstack)
+            # With no active ip's and no cycles, frame is done
+            finish_nocycle(interp, callee)
+            callee.frameid == 0 && break
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            #@assert length(nextstates) == nextstateid + 1
+            #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+            resize!(nextstates, nextstateid)
+        elseif callee.cycleid == callee.frameid
+            # If the current frame is the top part of a cycle, check if the whole cycle
+            # is done, and if not, pick the next item to work on.
+            no_active_ips_in_cycle = true
+            for i = callee.cycleid:length(callstack)
+                caller = callstack[i]::InferenceState
+                @assert caller.cycleid == callee.cycleid
+                if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip)
+                    no_active_ips_in_cycle = false
+                    break
+                end
+            end
+            if no_active_ips_in_cycle
+                finish_cycle(interp, callstack, callee.cycleid)
+            end
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            if no_active_ips_in_cycle
+                #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+                resize!(nextstates, nextstateid)
+            else
+                #@assert length(nextstates) == nextstateid
+            end
+        else
+            # Continue to the next frame in this cycle
+            takenext = takenext - 1
+        end
+    end
+    #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
+    return is_inferred(frame)
+end
diff --git a/base/compiler/abstractlattice.jl b/Compiler/src/abstractlattice.jl
similarity index 91%
rename from base/compiler/abstractlattice.jl
rename to Compiler/src/abstractlattice.jl
index 3c6c874a9a09c..7a9cff8918175 100644
--- a/base/compiler/abstractlattice.jl
+++ b/Compiler/src/abstractlattice.jl
@@ -98,8 +98,10 @@ is_valid_lattice_norec(::InferenceLattice, @nospecialize(elem)) = isa(elem, Limi
 """
     tmeet(𝕃::AbstractLattice, a, b::Type)
 
-Compute the lattice meet of lattice elements `a` and `b` over the lattice `𝕃`.
-If `𝕃` is `JLTypeLattice`, this is equivalent to type intersection.
+Compute the lattice meet of lattice elements `a` and `b` over the lattice `𝕃`,
+dropping any results that will not be inhabited at runtime.
+If `𝕃` is `JLTypeLattice`, this is equivalent to type intersection plus the
+elimination of results that have no concrete subtypes.
 Note that currently `b` is restricted to being a type
 (interpreted as a lattice element in the `JLTypeLattice` sub-lattice of `𝕃`).
 """
@@ -107,7 +109,7 @@ function tmeet end
 
 function tmeet(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type))
     ti = typeintersect(a, b)
-    valid_as_lattice(ti) || return Bottom
+    valid_as_lattice(ti, true) || return Bottom
     return ti
 end
 
@@ -227,7 +229,7 @@ end
     if isa(t, Const)
         # don't consider mutable values useful constants
         val = t.val
-        return isa(val, Symbol) || isa(val, Type) || !ismutable(val)
+        return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || isa(val, CodeInstance) || !ismutable(val)
     end
     isa(t, PartialTypeVar) && return false # this isn't forwardable
     return is_const_prop_profitable_arg(widenlattice(𝕃), t)
@@ -249,9 +251,7 @@ end
     isa(x, Const) && return true
     return is_forwardable_argtype(widenlattice(𝕃), x)
 end
-@nospecializeinfer function is_forwardable_argtype(::JLTypeLattice, @nospecialize x)
-    return false
-end
+@nospecializeinfer is_forwardable_argtype(::JLTypeLattice, @nospecialize x) = false
 
 """
     widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) -> new_bestguess
@@ -260,7 +260,7 @@ end
 Appropriately converts inferred type of a return value `rt` to such a type
 that we know we can store in the cache and is valid and good inter-procedurally,
 E.g. if `rt isa Conditional` then `rt` should be converted to `InterConditional`
-or the other cachable lattice element.
+or the other cacheable lattice element.
 
 External lattice `𝕃ᵢ::ExternalLattice` may overload:
 - `widenreturn(𝕃ᵢ::ExternalLattice, @nospecialize(rt), info::BestguessInfo)`
@@ -285,9 +285,16 @@ has_extended_unionsplit(::AnyMustAliasesLattice) = true
 has_extended_unionsplit(::JLTypeLattice) = false
 
 # Curried versions
-⊑(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(lattice, a, b)
-⊏(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(lattice, a, b)
-⋤(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(lattice, a, b)
+⊑(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(𝕃, a, b)
+⊏(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(𝕃, a, b)
+⋤(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(𝕃, a, b)
+tmerge(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmerge(𝕃, a, b)
+tmeet(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmeet(𝕃, a, b)
+partialorder(𝕃::AbstractLattice) = ⊑(𝕃)
+strictpartialorder(𝕃::AbstractLattice) = ⊏(𝕃)
+strictneqpartialorder(𝕃::AbstractLattice) = ⋤(𝕃)
+join(𝕃::AbstractLattice) = tmerge(𝕃)
+meet(𝕃::AbstractLattice) = tmeet(𝕃)
 
 # Fallbacks for external packages using these methods
 const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance)
diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl
new file mode 100644
index 0000000000000..26fcfde5f256f
--- /dev/null
+++ b/Compiler/src/bootstrap.jl
@@ -0,0 +1,96 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# make sure that typeinf is executed before turning on typeinf_ext
+# this ensures that typeinf_ext doesn't recurse before it can add the item to the workq
+# especially try to make sure any recursive and leaf functions have concrete signatures,
+# since we won't be able to specialize & infer them at runtime
+
+function activate_codegen!()
+    ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
+    Core.eval(Compiler, quote
+        let typeinf_world_age = Base.tls_world_age()
+            @eval Core.OptimizedGenerics.CompilerPlugins.typeinf(::Nothing, mi::MethodInstance, source_mode::UInt8) =
+                Base.invoke_in_world($(Expr(:$, :typeinf_world_age)), typeinf_ext_toplevel, mi, Base.tls_world_age(), source_mode)
+        end
+    end)
+end
+
+global bootstrapping_compiler::Bool = false
+function bootstrap!()
+    global bootstrapping_compiler = true
+    let time() = ccall(:jl_clock_now, Float64, ())
+        println("Compiling the compiler. This may take several minutes ...")
+
+        ssa_inlining_pass!_tt = Tuple{typeof(ssa_inlining_pass!), IRCode, InliningState{NativeInterpreter}, Bool}
+        optimize_tt = Tuple{typeof(optimize), NativeInterpreter, OptimizationState{NativeInterpreter}, InferenceResult}
+        typeinf_ext_tt = Tuple{typeof(typeinf_ext), NativeInterpreter, MethodInstance, UInt8}
+        typeinf_tt = Tuple{typeof(typeinf), NativeInterpreter, InferenceState}
+        typeinf_edge_tt = Tuple{typeof(typeinf_edge), NativeInterpreter, Method, Any, SimpleVector, InferenceState, Bool, Bool}
+        fs = Any[
+            # we first create caches for the optimizer, because they contain many loop constructions
+            # and they're better to not run in interpreter even during bootstrapping
+            compact!, ssa_inlining_pass!_tt, optimize_tt,
+            # then we create caches for inference entries
+            typeinf_ext_tt, typeinf_tt, typeinf_edge_tt,
+        ]
+        # tfuncs can't be inferred from the inference entries above, so here we infer them manually
+        for x in T_FFUNC_VAL
+            push!(fs, x[3])
+        end
+        for i = 1:length(T_IFUNC)
+            if isassigned(T_IFUNC, i)
+                x = T_IFUNC[i]
+                push!(fs, x[3])
+            else
+                println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
+            end
+        end
+        starttime = time()
+        methods = Any[]
+        world = get_world_counter()
+        for f in fs
+            if isa(f, DataType) && f.name === typename(Tuple)
+                tt = f
+            else
+                tt = Tuple{typeof(f), Vararg{Any}}
+            end
+            matches = _methods_by_ftype(tt, 10, world)::Vector
+            if isempty(matches)
+                println(stderr, "WARNING: no matching method found for `", tt, "`")
+            else
+                for m in matches
+                    # remove any TypeVars from the intersection
+                    m = m::MethodMatch
+                    params = Any[m.spec_types.parameters...]
+                    for i = 1:length(params)
+                        params[i] = unwraptv(params[i])
+                    end
+                    mi = specialize_method(m.method, Tuple{params...}, m.sparams)
+                    #isa_compileable_sig(mi) || println(stderr, "WARNING: inferring `", mi, "` which isn't expected to be called.")
+                    push!(methods, mi)
+                end
+            end
+        end
+        codeinfos = typeinf_ext_toplevel(methods, [world], false)
+        for i = 1:2:length(codeinfos)
+            ci = codeinfos[i]::CodeInstance
+            src = codeinfos[i + 1]::CodeInfo
+            isa_compileable_sig(ci.def) || continue # println(stderr, "WARNING: compiling `", ci.def, "` which isn't expected to be called.")
+            ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, src)
+        end
+        endtime = time()
+        println("Base.Compiler ──── ", sub_float(endtime,starttime), " seconds")
+    end
+    activate_codegen!()
+    global bootstrapping_compiler = false
+    nothing
+end
+
+function activate!(; reflection=true, codegen=false)
+    if reflection
+        Base.REFLECTION_COMPILER[] = Compiler
+    end
+    if codegen
+        activate_codegen!()
+    end
+end
diff --git a/base/compiler/cicache.jl b/Compiler/src/cicache.jl
similarity index 78%
rename from base/compiler/cicache.jl
rename to Compiler/src/cicache.jl
index 8332777e6d5bc..2893be2787b29 100644
--- a/base/compiler/cicache.jl
+++ b/Compiler/src/cicache.jl
@@ -7,15 +7,20 @@ Internally, each `MethodInstance` keep a unique global cache of code instances
 that have been created for the given method instance, stratified by world age
 ranges. This struct abstracts over access to this cache.
 """
-struct InternalCodeCache end
+struct InternalCodeCache
+    owner::Any # `jl_egal` is used for comparison
+end
 
 function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
+    @assert ci.owner === cache.owner
+    m = mi.def
+    if isa(m, Method) && m.module != Core
+        ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
+    end
     ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
     return cache
 end
 
-const GLOBAL_CI_CACHE = InternalCodeCache()
-
 struct WorldRange
     min_world::UInt
     max_world::UInt
@@ -26,6 +31,8 @@ WorldRange(r::UnitRange) = WorldRange(first(r), last(r))
 first(wr::WorldRange) = wr.min_world
 last(wr::WorldRange) = wr.max_world
 in(world::UInt, wr::WorldRange) = wr.min_world <= world <= wr.max_world
+min_world(wr::WorldRange) = first(wr)
+max_world(wr::WorldRange) = last(wr)
 
 function intersect(a::WorldRange, b::WorldRange)
     ret = WorldRange(max(a.min_world, b.min_world), min(a.max_world, b.max_world))
@@ -49,11 +56,11 @@ WorldView(wvc::WorldView, wr::WorldRange) = WorldView(wvc.cache, wr)
 WorldView(wvc::WorldView, args...) = WorldView(wvc.cache, args...)
 
 function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
-    return ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
+    return ccall(:jl_rettype_inferred, Any, (Any, Any, UInt, UInt), wvc.cache.owner, mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
 end
 
 function get(wvc::WorldView{InternalCodeCache}, mi::MethodInstance, default)
-    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))
+    r = ccall(:jl_rettype_inferred, Any, (Any, Any, UInt, UInt), wvc.cache.owner, mi, first(wvc.worlds), last(wvc.worlds))
     if r === nothing
         return default
     end
diff --git a/Compiler/src/effects.jl b/Compiler/src/effects.jl
new file mode 100644
index 0000000000000..9aea4cb204ec6
--- /dev/null
+++ b/Compiler/src/effects.jl
@@ -0,0 +1,365 @@
+const effects_key_string = """
+## Key for `show` output of Effects:
+
+The output represents the state of different effect properties in the following order:
+
+1. `consistent` (`c`):
+    - `+c` (green): `ALWAYS_TRUE`
+    - `-c` (red): `ALWAYS_FALSE`
+    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
+2. `effect_free` (`e`):
+    - `+e` (green): `ALWAYS_TRUE`
+    - `-e` (red): `ALWAYS_FALSE`
+    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
+3. `nothrow` (`n`):
+    - `+n` (green): `true`
+    - `-n` (red): `false`
+4. `terminates` (`t`):
+    - `+t` (green): `true`
+    - `-t` (red): `false`
+5. `notaskstate` (`s`):
+    - `+s` (green): `true`
+    - `-s` (red): `false`
+6. `inaccessiblememonly` (`m`):
+    - `+m` (green): `ALWAYS_TRUE`
+    - `-m` (red): `ALWAYS_FALSE`
+    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
+7. `noub` (`u`):
+    - `+u` (green): `true`
+    - `-u` (red): `false`
+    - `?u` (yellow): `NOUB_IF_NOINBOUNDS`
+8. `:nonoverlayed` (`o`):
+    - `+o` (green): `ALWAYS_TRUE`
+    - `-o` (red): `ALWAYS_FALSE`
+    - `?o` (yellow): `CONSISTENT_OVERLAY`
+9. `:nortcall` (`r`):
+    - `+r` (green): `true`
+    - `-r` (red): `false`
+"""
+
+"""
+    effects::Effects
+
+Represents computational effects of a method call.
+
+The effects are a composition of different effect bits that represent some program property
+of the method being analyzed. They are represented as `Bool` or `UInt8` bits with the
+following meanings:
+- `consistent::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to return or terminate consistently.
+  * `ALWAYS_FALSE`: this method may be not return or terminate consistently, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `CONSISTENT_IF_NOTRETURNED`: the `:consistent`-cy of this method can later be refined to
+    `ALWAYS_TRUE` in a case when the return value of this method never involves newly
+    allocated mutable objects.
+  * `CONSISTENT_IF_INACCESSIBLEMEMONLY`: the `:consistent`-cy of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `effect_free::UInt8`:
+  * `ALWAYS_TRUE`: this method is free from externally semantically visible side effects.
+  * `ALWAYS_FALSE`: this method may not be free from externally semantically visible side effects, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`: the `:effect-free`-ness of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `nothrow::Bool`: this method is guaranteed to not throw an exception.
+  If the execution of this method may raise `MethodError`s and similar exceptions, then
+  the method is not considered as `:nothrow`.
+  However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+  are not modeled by this effect and thus a method that may result in `StackOverflowError`
+  does not necessarily need to taint `:nothrow` (although it should usually taint `:terminates` too).
+- `terminates::Bool`: this method is guaranteed to terminate.
+- `notaskstate::Bool`: this method does not access any state bound to the current
+  task and may thus be moved to a different task without changing observable
+  behavior. Note that this currently implies that `noyield` as well, since
+  yielding modifies the state of the current task, though this may be split
+  in the future.
+- `inaccessiblememonly::UInt8`:
+  * `ALWAYS_TRUE`: this method does not access or modify externally accessible mutable memory.
+    This state corresponds to LLVM's `inaccessiblememonly` function attribute.
+  * `ALWAYS_FALSE`: this method may access or modify externally accessible mutable memory.
+  * `INACCESSIBLEMEM_OR_ARGMEMONLY`: this method does not access or modify externally accessible mutable memory,
+    except that it may access or modify mutable memory pointed to by its call arguments.
+    This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
+    This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
+- `noub::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not execute any undefined behavior (for any input).
+  * `ALWAYS_FALSE`: this method may execute undefined behavior.
+  * `NOUB_IF_NOINBOUNDS`: this method is guaranteed to not execute any undefined behavior
+    under the assumption that its `@boundscheck` code is not elided (which happens when the
+    caller does not set nor propagate the `@inbounds` context)
+  Note that undefined behavior may technically cause the method to violate any other effect
+  assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
+  and they assume the absence of undefined behavior.
+- `nonoverlayed::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to not invoke any methods that defined in an
+    [overlayed method table](@ref OverlayMethodTable).
+  * `CONSISTENT_OVERLAY`: this method may invoke overlayed methods, but all such overlayed
+    methods are `:consistent` with their non-overlayed original counterparts
+    (see [`Base.@assume_effects`](@ref) for the exact definition of `:consistenct`-cy).
+  * `ALWAYS_FALSE`: this method may invoke overlayed methods.
+- `nortcall::Bool`: this method does not call `Core.Compiler.return_type`,
+  and it is guaranteed that any other methods this method might call also do not call
+  `Core.Compiler.return_type`.
+
+Note that the representations above are just internal implementation details and thus likely
+to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
+on the definitions of these properties.
+
+Along the abstract interpretation, `Effects` at each statement are analyzed locally and they
+are merged into the single global `Effects` that represents the entire effects of the
+analyzed method (see the implementation of `merge_effects!`). Each effect property is
+initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
+Note that within the current flow-insensitive analysis design, effects detected by local
+analysis on each statement usually taint the global conclusion conservatively.
+
+
+$(effects_key_string)
+"""
+struct Effects
+    consistent::UInt8
+    effect_free::UInt8
+    nothrow::Bool
+    terminates::Bool
+    notaskstate::Bool
+    inaccessiblememonly::UInt8
+    noub::UInt8
+    nonoverlayed::UInt8
+    nortcall::Bool
+    function Effects(
+        consistent::UInt8,
+        effect_free::UInt8,
+        nothrow::Bool,
+        terminates::Bool,
+        notaskstate::Bool,
+        inaccessiblememonly::UInt8,
+        noub::UInt8,
+        nonoverlayed::UInt8,
+        nortcall::Bool)
+        return new(
+            consistent,
+            effect_free,
+            nothrow,
+            terminates,
+            notaskstate,
+            inaccessiblememonly,
+            noub,
+            nonoverlayed,
+            nortcall)
+    end
+end
+
+const ALWAYS_TRUE  = 0x00
+const ALWAYS_FALSE = 0x01
+
+# :consistent-cy bits
+const CONSISTENT_IF_NOTRETURNED         = 0x01 << 1
+const CONSISTENT_IF_INACCESSIBLEMEMONLY = 0x01 << 2
+
+# :effect_free-ness bits
+const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x02
+
+"""
+`EFFECT_FREE_GLOBALLY` means that the statement is `:effect-free` and does not have a
+caller-visible effect, but may not be removed from the function itself. This may e.g.
+be used for effects that last only for the scope of the current function.
+"""
+const EFFECT_FREE_GLOBALLY = 0x03
+
+# :inaccessiblememonly bits
+const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
+
+# :noub bits
+const NOUB_IF_NOINBOUNDS = 0x01 << 1
+
+# :nonoverlayed bits
+const CONSISTENT_OVERLAY = 0x01 << 1
+
+const EFFECTS_TOTAL   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_THROWS  = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_TRUE, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
+
+function Effects(effects::Effects=Effects(
+    ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, false);
+    consistent::UInt8 = effects.consistent,
+    effect_free::UInt8 = effects.effect_free,
+    nothrow::Bool = effects.nothrow,
+    terminates::Bool = effects.terminates,
+    notaskstate::Bool = effects.notaskstate,
+    inaccessiblememonly::UInt8 = effects.inaccessiblememonly,
+    noub::UInt8 = effects.noub,
+    nonoverlayed::UInt8 = effects.nonoverlayed,
+    nortcall::Bool = effects.nortcall)
+    return Effects(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates,
+        notaskstate,
+        inaccessiblememonly,
+        noub,
+        nonoverlayed,
+        nortcall)
+end
+
+function is_better_effects(new::Effects, old::Effects)
+    any_improved = false
+    if new.consistent == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    else
+        if !iszero(new.consistent & CONSISTENT_IF_NOTRETURNED)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_NOTRETURNED)
+        elseif !iszero(new.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+        else
+            return false
+        end
+    end
+    if new.effect_free == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    elseif new.effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+        old.effect_free == ALWAYS_TRUE && return false
+        any_improved |= old.effect_free != EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+    elseif new.effect_free != old.effect_free
+        return false
+    end
+    if new.nothrow
+        any_improved |= !old.nothrow
+    elseif new.nothrow != old.nothrow
+        return false
+    end
+    if new.terminates
+        any_improved |= !old.terminates
+    elseif new.terminates != old.terminates
+        return false
+    end
+    if new.notaskstate
+        any_improved |= !old.notaskstate
+    elseif new.notaskstate != old.notaskstate
+        return false
+    end
+    if new.inaccessiblememonly == ALWAYS_TRUE
+        any_improved |= old.inaccessiblememonly != ALWAYS_TRUE
+    elseif new.inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+        old.inaccessiblememonly == ALWAYS_TRUE && return false
+        any_improved |= old.inaccessiblememonly != INACCESSIBLEMEM_OR_ARGMEMONLY
+    elseif new.inaccessiblememonly != old.inaccessiblememonly
+        return false
+    end
+    if new.noub == ALWAYS_TRUE
+        any_improved |= old.noub != ALWAYS_TRUE
+    elseif new.noub == NOUB_IF_NOINBOUNDS
+        old.noub == ALWAYS_TRUE && return false
+        any_improved |= old.noub != NOUB_IF_NOINBOUNDS
+    elseif new.noub != old.noub
+        return false
+    end
+    if new.nonoverlayed == ALWAYS_TRUE
+        any_improved |= old.nonoverlayed != ALWAYS_TRUE
+    elseif new.nonoverlayed == CONSISTENT_OVERLAY
+        old.nonoverlayed == ALWAYS_TRUE && return false
+        any_improved |= old.nonoverlayed != CONSISTENT_OVERLAY
+    elseif new.nonoverlayed != old.nonoverlayed
+        return false
+    end
+    if new.nortcall
+        any_improved |= !old.nortcall
+    elseif new.nortcall != old.nortcall
+        return false
+    end
+    return any_improved
+end
+
+function merge_effects(old::Effects, new::Effects)
+    return Effects(
+        merge_effectbits(old.consistent, new.consistent),
+        merge_effectbits(old.effect_free, new.effect_free),
+        merge_effectbits(old.nothrow, new.nothrow),
+        merge_effectbits(old.terminates, new.terminates),
+        merge_effectbits(old.notaskstate, new.notaskstate),
+        merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
+        merge_effectbits(old.noub, new.noub),
+        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
+        merge_effectbits(old.nortcall, new.nortcall))
+end
+
+function merge_effectbits(old::UInt8, new::UInt8)
+    if old === ALWAYS_FALSE || new === ALWAYS_FALSE
+        return ALWAYS_FALSE
+    end
+    return old | new
+end
+merge_effectbits(old::Bool, new::Bool) = old & new
+
+is_consistent(effects::Effects)          = effects.consistent === ALWAYS_TRUE
+is_effect_free(effects::Effects)         = effects.effect_free === ALWAYS_TRUE
+is_nothrow(effects::Effects)             = effects.nothrow
+is_terminates(effects::Effects)          = effects.terminates
+is_notaskstate(effects::Effects)         = effects.notaskstate
+is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
+is_noub(effects::Effects)                = effects.noub === ALWAYS_TRUE
+is_noub_if_noinbounds(effects::Effects)  = effects.noub === NOUB_IF_NOINBOUNDS
+is_nonoverlayed(effects::Effects)        = effects.nonoverlayed === ALWAYS_TRUE
+is_nortcall(effects::Effects)            = effects.nortcall
+
+# implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
+is_foldable(effects::Effects, check_rtcall::Bool=false) =
+    is_consistent(effects) &&
+    (is_noub(effects) || is_noub_if_noinbounds(effects)) &&
+    is_effect_free(effects) &&
+    is_terminates(effects) &&
+    (!check_rtcall || is_nortcall(effects))
+
+is_foldable_nothrow(effects::Effects, check_rtcall::Bool=false) =
+    is_foldable(effects, check_rtcall) &&
+    is_nothrow(effects)
+
+# TODO add `is_noub` here?
+is_removable_if_unused(effects::Effects) =
+    is_effect_free(effects) &&
+    is_terminates(effects) &&
+    is_nothrow(effects)
+
+is_finalizer_inlineable(effects::Effects) =
+    is_nothrow(effects) &&
+    is_notaskstate(effects)
+
+is_consistent_if_notreturned(effects::Effects)         = !iszero(effects.consistent & CONSISTENT_IF_NOTRETURNED)
+is_consistent_if_inaccessiblememonly(effects::Effects) = !iszero(effects.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+
+is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect_free & EFFECT_FREE_IF_INACCESSIBLEMEMONLY)
+
+is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
+
+is_consistent_overlay(effects::Effects) = effects.nonoverlayed === CONSISTENT_OVERLAY
+
+# (sync this with codegen.cpp and staticdata.c effects_foldable functions)
+function encode_effects(e::Effects)
+    return ((e.consistent          % UInt32) << 0)  |
+           ((e.effect_free         % UInt32) << 3)  |
+           ((e.nothrow             % UInt32) << 5)  |
+           ((e.terminates          % UInt32) << 6)  |
+           ((e.notaskstate         % UInt32) << 7)  |
+           ((e.inaccessiblememonly % UInt32) << 8)  |
+           ((e.noub                % UInt32) << 10) |
+           ((e.nonoverlayed        % UInt32) << 12) |
+           ((e.nortcall            % UInt32) << 14)
+end
+
+function decode_effects(e::UInt32)
+    return Effects(
+        UInt8((e >> 0) & 0x07),
+        UInt8((e >> 3) & 0x03),
+        Bool((e >> 5) & 0x01),
+        Bool((e >> 6) & 0x01),
+        Bool((e >> 7) & 0x01),
+        UInt8((e >> 8) & 0x03),
+        UInt8((e >> 10) & 0x03),
+        UInt8((e >> 12) & 0x03),
+        Bool((e >> 14) & 0x01))
+end
+
+decode_statement_effects_override(ssaflag::UInt32) =
+    decode_effects_override(UInt16((ssaflag >> NUM_IR_FLAGS) & (1 << NUM_EFFECTS_OVERRIDES - 1)))
diff --git a/Compiler/src/inferenceresult.jl b/Compiler/src/inferenceresult.jl
new file mode 100644
index 0000000000000..7da96c4cc2e93
--- /dev/null
+++ b/Compiler/src/inferenceresult.jl
@@ -0,0 +1,199 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance)
+    (; def, specTypes) = mi
+    return most_general_argtypes(isa(def, Method) ? def : nothing, specTypes)
+end
+
+struct SimpleArgtypes
+    argtypes::Vector{Any}
+end
+
+# Like `SimpleArgtypes`, but allows the argtypes to be wider than the current call.
+# As a result, it is not legal to refine the cache result with information more
+# precise than was it deducible from the `WidenedSimpleArgtypes`.
+struct WidenedArgtypes
+    argtypes::Vector{Any}
+end
+
+function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
+                                 simple_argtypes::Union{SimpleArgtypes, WidenedArgtypes},
+                                 cache_argtypes::Vector{Any})
+    (; argtypes) = simple_argtypes
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    for i = 1:length(argtypes)
+        given_argtypes[i] = widenslotwrapper(argtypes[i])
+    end
+    return pick_const_args!(𝕃, given_argtypes, cache_argtypes)
+end
+
+function pick_const_arg(𝕃::AbstractLattice, @nospecialize(given_argtype), @nospecialize(cache_argtype))
+    if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
+        # prefer the argtype we were given over the one computed from `mi`
+        if (isa(given_argtype, PartialStruct) && isa(cache_argtype, Type) &&
+            !⊏(𝕃, given_argtype, cache_argtype))
+            # if the type information of this `PartialStruct` is less strict than
+            # declared method signature, narrow it down using `tmeet`
+            given_argtype = tmeet(𝕃, given_argtype, cache_argtype)
+        end
+        return given_argtype
+    else
+        return cache_argtype
+    end
+end
+
+function pick_const_args!(𝕃::AbstractLattice, given_argtypes::Vector{Any}, cache_argtypes::Vector{Any})
+    ngiven = length(given_argtypes)
+    ncache = length(cache_argtypes)
+    if ngiven == 0 || ncache == 0
+        return Any[]
+    end
+    given_va = given_argtypes[end]
+    cache_va = cache_argtypes[end]
+    if isvarargtype(given_va)
+        va = unwrapva(given_va)
+        if isvarargtype(cache_va)
+            # Process the common prefix, then join
+            nprocessargs = max(ngiven-1, ncache-1)
+            resize!(given_argtypes, nprocessargs+1)
+            given_argtypes[end] = Vararg{pick_const_arg(𝕃, va, unwrapva(cache_va))}
+        else
+            nprocessargs = ncache
+            resize!(given_argtypes, nprocessargs)
+        end
+        for i = ngiven:nprocessargs
+            given_argtypes[i] = va
+        end
+    elseif isvarargtype(cache_va)
+        nprocessargs = ngiven
+    else
+        @assert ngiven == ncache
+        nprocessargs = ngiven
+    end
+    for i = 1:nprocessargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = argtype_by_index(cache_argtypes, i)
+        given_argtypes[i] = pick_const_arg(𝕃, given_argtype, cache_argtype)
+    end
+    return given_argtypes
+end
+
+function is_argtype_match(𝕃::AbstractLattice,
+                          @nospecialize(given_argtype),
+                          @nospecialize(cache_argtype),
+                          overridden_by_const::Bool)
+    if is_forwardable_argtype(𝕃, given_argtype)
+        return is_lattice_equal(𝕃, given_argtype, cache_argtype)
+    else
+        return !overridden_by_const
+    end
+end
+
+function va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, nargs::UInt, isva::Bool)
+    nargs = Int(nargs)
+    if isva || (!isempty(given_argtypes) && isvarargtype(given_argtypes[end]))
+        isva_given_argtypes = Vector{Any}(undef, nargs)
+        for i = 1:(nargs-isva)
+            newarg = argtype_by_index(given_argtypes, i)
+            if isva && has_conditional(𝕃) && isa(newarg, Conditional)
+                if newarg.slot > (nargs-isva)
+                    newarg = widenconditional(newarg)
+                end
+            end
+            isva_given_argtypes[i] = newarg
+        end
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+                if has_conditional(𝕃)
+                    for i = last:length(given_argtypes)
+                        newarg = given_argtypes[i]
+                        if isa(newarg, Conditional) && newarg.slot > (nargs-isva)
+                            given_argtypes[i] = widenconditional(newarg)
+                        end
+                    end
+                end
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
+        end
+        return isva_given_argtypes
+    end
+    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `mi`"
+    return given_argtypes
+end
+
+function most_general_argtypes(method::Union{Method,Nothing}, @nospecialize(specTypes))
+    mi_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
+    nargtypes = length(mi_argtypes)
+    nargs = isa(method, Method) ? Int(method.nargs) : 0
+    if length(mi_argtypes) < nargs && isvarargtype(mi_argtypes[end])
+        resize!(mi_argtypes, nargs)
+    end
+    # Now, we propagate type info from `mi_argtypes` into `cache_argtypes`, improving some
+    # type info as we go (where possible). Note that if we're dealing with a varargs method,
+    # we already handled the last element of `cache_argtypes` (and decremented `nargs` so that
+    # we don't overwrite the result of that work here).
+    tail_index = min(nargtypes, nargs)
+    local lastatype
+    for i = 1:nargtypes
+        atyp = mi_argtypes[i]
+        wasva = false
+        if i == nargtypes && isvarargtype(atyp)
+            wasva = true
+            atyp = unwrapva(atyp)
+        end
+        atyp = unwraptv(atyp)
+        if issingletontype(atyp)
+            # replace singleton types with their equivalent Const object
+            atyp = Const(atyp.instance)
+        elseif isconstType(atyp)
+            atyp = Const(atyp.parameters[1])
+        else
+            atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
+        end
+        mi_argtypes[i] = atyp
+        if wasva
+            lastatype = atyp
+            mi_argtypes[end] = Vararg{widenconst(atyp)}
+        end
+    end
+    for i = (tail_index+1):(nargs-1)
+        mi_argtypes[i] = lastatype
+    end
+    return mi_argtypes
+end
+
+# eliminate free `TypeVar`s in order to make the life much easier down the road:
+# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
+# malformed types here are user-constructed type arguments given at an inference entry
+# so this function will replace only the malformed `Type{...}::DataType` with `Type`
+# and simply replace other possibilities with `Any`
+function elim_free_typevars(@nospecialize t)
+    if has_free_typevars(t)
+        return isType(t) ? Type : Any
+    else
+        return t
+    end
+end
+
+function cache_lookup(𝕃::AbstractLattice, mi::MethodInstance, given_argtypes::Vector{Any},
+                      cache::Vector{InferenceResult})
+    method = mi.def::Method
+    nargtypes = length(given_argtypes)
+    for cached_result in cache
+        cached_result.linfo === mi || @goto next_cache
+        cache_argtypes = cached_result.argtypes
+        @assert length(cache_argtypes) == nargtypes "invalid `cache_argtypes` for `mi`"
+        cache_overridden_by_const = cached_result.overridden_by_const::BitVector
+        for i in 1:nargtypes
+            if !is_argtype_match(𝕃, given_argtypes[i], cache_argtypes[i], cache_overridden_by_const[i])
+                @goto next_cache
+            end
+        end
+        return cached_result
+        @label next_cache
+    end
+    return nothing
+end
diff --git a/Compiler/src/inferencestate.jl b/Compiler/src/inferencestate.jl
new file mode 100644
index 0000000000000..0ea0fc684b689
--- /dev/null
+++ b/Compiler/src/inferencestate.jl
@@ -0,0 +1,1214 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# data structures
+# ===============
+
+mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
+    elems::BitSet
+    min::Int
+    # Stores whether min is exact or a lower bound
+    # If exact, it is not set in elems
+    min_exact::Bool
+    max::Int
+end
+
+function BitSetBoundedMinPrioritySet(max::Int)
+    bs = BitSet()
+    bs.offset = 0
+    BitSetBoundedMinPrioritySet(bs, max+1, true, max)
+end
+
+@noinline function _advance_bsbmp!(bsbmp::BitSetBoundedMinPrioritySet)
+    @assert !bsbmp.min_exact
+    bsbmp.min = _bits_findnext(bsbmp.elems.bits, bsbmp.min)::Int
+    bsbmp.min < 0 && (bsbmp.min = bsbmp.max + 1)
+    bsbmp.min_exact = true
+    delete!(bsbmp.elems, bsbmp.min)
+    return nothing
+end
+
+function isempty(bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min > bsbmp.max
+        return true
+    end
+    bsbmp.min_exact && return false
+    _advance_bsbmp!(bsbmp)
+    return bsbmp.min > bsbmp.max
+end
+
+function popfirst!(bsbmp::BitSetBoundedMinPrioritySet)
+    bsbmp.min_exact || _advance_bsbmp!(bsbmp)
+    m = bsbmp.min
+    m > bsbmp.max && throw(ArgumentError("BitSetBoundedMinPrioritySet must be non-empty"))
+    bsbmp.min = m+1
+    bsbmp.min_exact = false
+    return m
+end
+
+function push!(bsbmp::BitSetBoundedMinPrioritySet, idx::Int)
+    if idx <= bsbmp.min
+        if bsbmp.min_exact && bsbmp.min < bsbmp.max && idx != bsbmp.min
+            push!(bsbmp.elems, bsbmp.min)
+        end
+        bsbmp.min = idx
+        bsbmp.min_exact = true
+        return nothing
+    end
+    push!(bsbmp.elems, idx)
+    return nothing
+end
+
+function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min_exact && idx == bsbmp.min
+        return true
+    end
+    return idx in bsbmp.elems
+end
+
+iterate(bsbmp::BitSetBoundedMinPrioritySet, s...) = iterate(bsbmp.elems, s...)
+
+function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
+    for val in itr
+        push!(bsbmp, val)
+    end
+end
+
+mutable struct TwoPhaseVectorView <: AbstractVector{Int}
+    const data::Vector{Int}
+    count::Int
+    const range::UnitRange{Int}
+end
+size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
+function getindex(tpvv::TwoPhaseVectorView, i::Int)
+    checkbounds(tpvv, i)
+    @inbounds tpvv.data[first(tpvv.range) + i - 1]
+end
+function push!(tpvv::TwoPhaseVectorView, v::Int)
+    tpvv.count += 1
+    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
+    return nothing
+end
+
+"""
+    mutable struct TwoPhaseDefUseMap
+
+This struct is intended as a memory- and GC-pressure-efficient mechanism
+for incrementally computing def-use maps. The idea is that the def-use map
+is constructed into two passes over the IR. In the first, we simply count the
+the number of uses, computing the number of uses for each def as well as the
+total number of uses. In the second pass, we actually fill in the def-use
+information.
+
+The idea is that either of these two phases can be combined with other useful
+work that needs to scan the instruction stream anyway, while avoiding the
+significant allocation pressure of e.g. allocating an array for every SSA value
+or attempting to dynamically move things around as new uses are discovered.
+
+The def-use map is presented as a vector of vectors. For every def, indexing
+into the map will return a vector of uses.
+"""
+mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
+    ssa_uses::Vector{Int}
+    data::Vector{Int}
+    complete::Bool
+end
+
+function complete!(tpdum::TwoPhaseDefUseMap)
+    cumsum = 0
+    for i = 1:length(tpdum.ssa_uses)
+        this_val = cumsum + 1
+        cumsum += tpdum.ssa_uses[i]
+        tpdum.ssa_uses[i] = this_val
+    end
+    resize!(tpdum.data, cumsum)
+    fill!(tpdum.data, 0)
+    tpdum.complete = true
+end
+
+function TwoPhaseDefUseMap(nssas::Int)
+    ssa_uses = zeros(Int, nssas)
+    data = Int[]
+    complete = false
+    return TwoPhaseDefUseMap(ssa_uses, data, complete)
+end
+
+function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
+    @assert !tpdum.complete
+    tpdum.ssa_uses[arg.id] += 1
+end
+
+function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
+    if !tpdum.complete
+        tpdum.ssa_uses[def] -= 1
+    else
+        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
+        # TODO: Sorted
+        useidx = findfirst(idx->tpdum.data[idx] == use, range)
+        @assert useidx !== nothing
+        idx = range[useidx]
+        while idx < lastindex(range)
+            ndata = tpdum.data[idx+1]
+            ndata == 0 && break
+            tpdum.data[idx] = ndata
+            idx += 1
+        end
+        tpdum.data[idx] = 0
+    end
+end
+kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
+    kill_def_use!(tpdum, def.id, use)
+
+function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
+    @assert tpdum.complete
+    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
+    # TODO: Make logarithmic
+    nelems = 0
+    for i in range
+        tpdum.data[i] == 0 && break
+        nelems += 1
+    end
+    return TwoPhaseVectorView(tpdum.data, nelems, range)
+end
+
+mutable struct LazyCFGReachability
+    ir::IRCode
+    reachability::CFGReachability
+    LazyCFGReachability(ir::IRCode) = new(ir)
+end
+function get!(x::LazyCFGReachability)
+    isdefined(x, :reachability) && return x.reachability
+    domtree = construct_domtree(x.ir)
+    return x.reachability = CFGReachability(x.ir.cfg, domtree)
+end
+
+mutable struct LazyGenericDomtree{IsPostDom}
+    ir::IRCode
+    domtree::GenericDomTree{IsPostDom}
+    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
+end
+function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
+    isdefined(x, :domtree) && return x.domtree
+    return @timeit "domtree 2" x.domtree = IsPostDom ?
+        construct_postdomtree(x.ir) :
+        construct_domtree(x.ir)
+end
+
+const LazyDomtree = LazyGenericDomtree{false}
+const LazyPostDomtree = LazyGenericDomtree{true}
+
+# InferenceState
+# ==============
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
+struct StatementState
+    vtypes::Union{VarTable,Nothing}
+    saw_latestworld::Bool
+end
+
+const CACHE_MODE_NULL     = 0x00      # not cached, optimization optional
+const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization required
+const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization required
+const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization required
+
+abstract type Handler end
+get_enter_idx(handler::Handler) = get_enter_idx_impl(handler)::Int
+
+mutable struct TryCatchFrame <: Handler
+    exct
+    scopet
+    const enter_idx::Int
+    scope_uses::Vector{Int}
+    TryCatchFrame(@nospecialize(exct), @nospecialize(scopet), enter_idx::Int) =
+        new(exct, scopet, enter_idx)
+end
+TryCatchFrame(stmt::EnterNode, pc::Int) =
+    TryCatchFrame(Bottom, isdefined(stmt, :scope) ? Bottom : nothing, pc)
+get_enter_idx_impl((; enter_idx)::TryCatchFrame) = enter_idx
+
+struct SimpleHandler <: Handler
+    enter_idx::Int
+end
+SimpleHandler(::EnterNode, pc::Int) = SimpleHandler(pc)
+get_enter_idx_impl((; enter_idx)::SimpleHandler) = enter_idx
+
+struct HandlerInfo{T<:Handler}
+    handlers::Vector{T}
+    handler_at::Vector{Tuple{Int,Int}} # tuple of current (handler, exception stack) value at the pc
+end
+
+struct WorldWithRange
+    this::UInt
+    valid_worlds::WorldRange
+    function WorldWithRange(world::UInt, valid_worlds::WorldRange)
+        if !(world in valid_worlds)
+            error("invalid age range update")
+        end
+        return new(world, valid_worlds)
+    end
+end
+
+intersect(world::WorldWithRange, valid_worlds::WorldRange) =
+    WorldWithRange(world.this, intersect(world.valid_worlds, valid_worlds))
+
+mutable struct InferenceState
+    #= information about this method instance =#
+    linfo::MethodInstance
+    world::WorldWithRange
+    mod::Module
+    sptypes::Vector{VarState}
+    slottypes::Vector{Any}
+    src::CodeInfo
+    cfg::CFG
+    spec_info::SpecInfo
+
+    #= intermediate states for local abstract interpretation =#
+    currbb::Int
+    currpc::Int
+    ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
+    handler_info::Union{Nothing,HandlerInfo{TryCatchFrame}}
+    ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
+    # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
+    bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
+    bb_saw_latestworld::Vector{Bool}
+    ssavaluetypes::Vector{Any}
+    ssaflags::Vector{UInt32}
+    edges::Vector{Any}
+    stmt_info::Vector{CallInfo}
+
+    #= intermediate states for interprocedural abstract interpretation =#
+    tasks::Vector{WorkThunk}
+    pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
+    limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
+    cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
+
+    # IPO tracking of in-process work, shared with all frames given AbstractInterpreter
+    callstack #::Vector{AbsIntState}
+    parentid::Int # index into callstack of the parent frame that originally added this frame (call frame_parent to extract the current parent of the SCC)
+    frameid::Int # index into callstack at which this object is found (or zero, if this is not a cached frame and has no parent)
+    cycleid::Int # index into the callstack of the topmost frame in the cycle (all frames in the same cycle share the same cycleid)
+
+    #= results =#
+    result::InferenceResult # remember where to put the result
+    unreachable::BitSet # statements that were found to be statically unreachable
+    bestguess #::Type
+    exc_bestguess
+    ipo_effects::Effects
+
+    #= flags =#
+    # Whether to restrict inference of abstract call sites to avoid excessive work
+    # Set by default for toplevel frame.
+    restrict_abstract_call_sites::Bool
+    cache_mode::UInt8 # TODO move this to InferenceResult?
+    insert_coverage::Bool
+
+    # The interpreter that created this inference state. Not looked at by
+    # NativeInterpreter. But other interpreters may use this to detect cycles
+    interp::AbstractInterpreter
+
+    # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
+    function InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::UInt8,
+                            interp::AbstractInterpreter)
+        mi = result.linfo
+        world = get_inference_world(interp)
+        if world == typemax(UInt)
+            error("Entering inference from a generated function with an invalid world")
+        end
+        def = mi.def
+        mod = isa(def, Method) ? def.module : def
+        sptypes = sptypes_from_meth_instance(mi)
+        code = src.code::Vector{Any}
+        cfg = compute_basic_blocks(code)
+        spec_info = SpecInfo(src)
+
+        currbb = currpc = 1
+        ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
+        handler_info = ComputeTryCatch{TryCatchFrame}()(code)
+        nssavalues = src.ssavaluetypes::Int
+        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
+        nstmts = length(code)
+        edges = []
+        stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
+
+        nslots = length(src.slotflags)
+        slottypes = Vector{Any}(undef, nslots)
+        bb_saw_latestworld = Bool[false for i = 1:length(cfg.blocks)]
+        bb_vartables = Union{Nothing,VarTable}[ nothing for i = 1:length(cfg.blocks) ]
+        bb_vartable1 = bb_vartables[1] = VarTable(undef, nslots)
+        argtypes = result.argtypes
+
+        argtypes = va_process_argtypes(typeinf_lattice(interp), argtypes, src.nargs, src.isva)
+
+        nargtypes = length(argtypes)
+        for i = 1:nslots
+            argtyp = (i > nargtypes) ? Bottom : argtypes[i]
+            if argtyp === Bool && has_conditional(typeinf_lattice(interp))
+                argtyp = Conditional(i, Const(true), Const(false))
+            end
+            slottypes[i] = argtyp
+            bb_vartable1[i] = VarState(argtyp, i > nargtypes)
+        end
+        src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
+        ssaflags = copy(src.ssaflags)
+
+        unreachable = BitSet()
+        pclimitations = IdSet{InferenceState}()
+        limitations = IdSet{InferenceState}()
+        cycle_backedges = Tuple{InferenceState,Int}[]
+        callstack = AbsIntState[]
+        tasks = WorkThunk[]
+
+        valid_worlds = WorldRange(1, get_world_counter())
+        bestguess = Bottom
+        exc_bestguess = Bottom
+        ipo_effects = EFFECTS_TOTAL
+
+        insert_coverage = should_insert_coverage(mod, src.debuginfo)
+        if insert_coverage
+            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
+        end
+
+        if def isa Method
+            nonoverlayed = is_nonoverlayed(def) ? ALWAYS_TRUE :
+                is_effect_overridden(def, :consistent_overlay) ? CONSISTENT_OVERLAY :
+                ALWAYS_FALSE
+            ipo_effects = Effects(ipo_effects; nonoverlayed)
+        end
+
+        restrict_abstract_call_sites = isa(def, Module)
+
+        parentid = frameid = cycleid = 0
+
+        this = new(
+            mi, WorldWithRange(world, valid_worlds), mod, sptypes, slottypes, src, cfg, spec_info,
+            currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info,
+            tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid,
+            result, unreachable, bestguess, exc_bestguess, ipo_effects,
+            restrict_abstract_call_sites, cache_mode, insert_coverage,
+            interp)
+
+        # some more setups
+        if !iszero(cache_mode & CACHE_MODE_LOCAL)
+            push!(get_inference_cache(interp), result)
+        end
+        if !iszero(cache_mode & CACHE_MODE_GLOBAL)
+            push!(callstack, this)
+            this.cycleid = this.frameid = length(callstack)
+        end
+
+        # Apply generated function restrictions
+        if src.min_world != 1 || src.max_world != typemax(UInt)
+            # From generated functions
+            update_valid_age!(this, WorldRange(src.min_world, src.max_world))
+        end
+
+        return this
+    end
+end
+
+gethandler(frame::InferenceState, pc::Int=frame.currpc) = gethandler(frame.handler_info, pc)
+gethandler(::Nothing, ::Int) = nothing
+function gethandler(handler_info::HandlerInfo, pc::Int)
+    handler_idx = handler_info.handler_at[pc][1]
+    handler_idx == 0 && return nothing
+    return handler_info.handlers[handler_idx]
+end
+
+is_nonoverlayed(m::Method) = !isdefined(m, :external_mt)
+is_nonoverlayed(interp::AbstractInterpreter) = !isoverlayed(method_table(interp))
+isoverlayed(::MethodTableView) = error("unsatisfied MethodTableView interface")
+isoverlayed(::InternalMethodTable) = false
+isoverlayed(::OverlayMethodTable) = true
+isoverlayed(mt::CachedMethodTable) = isoverlayed(mt.table)
+
+is_inferred(sv::InferenceState) = is_inferred(sv.result)
+is_inferred(result::InferenceResult) = result.result !== nothing
+
+was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
+
+struct ComputeTryCatch{T<:Handler} end
+
+const compute_trycatch = ComputeTryCatch{SimpleHandler}()
+
+(compute_trycatch::ComputeTryCatch{SimpleHandler})(ir::IRCode) =
+    compute_trycatch(ir.stmts.stmt, ir.cfg.blocks)
+
+"""
+    (::ComputeTryCatch{Handler})(code, [, bbs]) -> handler_info::Union{Nothing,HandlerInfo{Handler}}
+    const compute_trycatch = ComputeTryCatch{SimpleHandler}()
+
+Given the code of a function, compute, at every statement, the current
+try/catch handler, and the current exception stack top. This function returns
+a tuple of:
+
+    1. `handler_info.handler_at`: A statement length vector of tuples
+       `(catch_handler, exception_stack)`, which are indices into `handlers`
+
+    2. `handler_info.handlers`: A `Handler` vector of handlers
+"""
+function (::ComputeTryCatch{Handler})(code::Vector{Any}, bbs::Union{Vector{BasicBlock},Nothing}=nothing) where Handler
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave %1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all `try`s by walking backwards from :enter statements,
+    # and all `catch`es by looking at the statement after the :enter
+    n = length(code)
+    ip = BitSet()
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_info = nothing
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isa(stmt, EnterNode)
+            (;handlers, handler_at) = handler_info =
+                (handler_info === nothing ? HandlerInfo{Handler}(Handler[], fill((0, 0), n)) : handler_info)
+            l = stmt.catch_dest
+            (bbs !== nothing) && (l != 0) && (l = first(bbs[l].stmts))
+            push!(handlers, Handler(stmt, pc))
+            handler_id = length(handlers)
+            handler_at[pc + 1] = (handler_id, 0)
+            push!(ip, pc + 1)
+            if l != 0
+                handler_at[l] = (0, handler_id)
+                push!(ip, l)
+            end
+        end
+    end
+
+    if handler_info === nothing
+        return nothing
+    end
+
+    # now forward those marks to all :leave statements
+    (;handlers, handler_at) = handler_info
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, 0)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            delete!(ip, pc)
+            cur_stacks = handler_at[pc]
+            @assert cur_stacks != (0, 0) "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+                (bbs !== nothing) && (pc´ = first(bbs[pc´].stmts))
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                (bbs !== nothing) && (l = first(bbs[l].stmts))
+                if handler_at[l] != cur_stacks
+                    @assert handler_at[l][1] == 0 || handler_at[l][1] == cur_stacks[1] "unbalanced try/catch"
+                    handler_at[l] = cur_stacks
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) || cur_stacks[1] == 0 "unbalanced try/catch"
+                break
+            elseif isa(stmt, EnterNode)
+                l = stmt.catch_dest
+                (bbs !== nothing) && (l != 0) && (l = first(bbs[l].stmts))
+                # We assigned a handler number above. Here we just merge that
+                # with out current handler information.
+                if l != 0
+                    handler_at[l] = (cur_stacks[1], handler_at[l][2])
+                end
+                cur_stacks = (handler_at[pc´][1], cur_stacks[2])
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :leave
+                    l = 0
+                    for j = 1:length(stmt.args)
+                        arg = stmt.args[j]
+                        if arg === nothing
+                            continue
+                        else
+                            enter_stmt = code[(arg::SSAValue).id]
+                            if enter_stmt === nothing
+                                continue
+                            end
+                            @assert isa(enter_stmt, EnterNode) "malformed :leave"
+                        end
+                        l += 1
+                    end
+                    cur_hand = cur_stacks[1]
+                    for i = 1:l
+                        cur_hand = handler_at[get_enter_idx(handlers[cur_hand])][1]
+                    end
+                    cur_stacks = (cur_hand, cur_stacks[2])
+                    cur_stacks == (0, 0) && break
+                elseif head === :pop_exception
+                    cur_stacks = (cur_stacks[1], handler_at[(stmt.args[1]::SSAValue).id][2])
+                    cur_stacks == (0, 0) && break
+                end
+            end
+
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_stacks
+                handler_at[pc´] = cur_stacks
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_info
+end
+
+# check if coverage mode is enabled
+function should_insert_coverage(mod::Module, debuginfo::DebugInfo)
+    coverage_enabled(mod) && return true
+    JLOptions().code_coverage == 3 || return false
+    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
+    return _should_insert_coverage(debuginfo)
+end
+
+_should_insert_coverage(mod::Symbol) = is_file_tracked(mod)
+_should_insert_coverage(mod::Method) = _should_insert_coverage(mod.file)
+_should_insert_coverage(mod::MethodInstance) = _should_insert_coverage(mod.def)
+_should_insert_coverage(mod::Module) = false
+function _should_insert_coverage(info::DebugInfo)
+    linetable = info.linetable
+    linetable === nothing || (_should_insert_coverage(linetable) && return true)
+    _should_insert_coverage(info.def) && return true
+    return false
+end
+
+function InferenceState(result::InferenceResult, cache_mode::UInt8, interp::AbstractInterpreter)
+    # prepare an InferenceState object for inferring lambda
+    world = get_inference_world(interp)
+    mi = result.linfo
+    src = retrieve_code_info(mi, world)
+    src === nothing && return nothing
+    maybe_validate_code(mi, src, "lowered")
+    return InferenceState(result, src, cache_mode, interp)
+end
+InferenceState(result::InferenceResult, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, convert_cache_mode(cache_mode), interp)
+InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, src, convert_cache_mode(cache_mode), interp)
+
+function convert_cache_mode(cache_mode::Symbol)
+    if cache_mode === :global
+        return CACHE_MODE_GLOBAL
+    elseif cache_mode === :local
+        return CACHE_MODE_LOCAL
+    elseif cache_mode === :volatile
+        return CACHE_MODE_VOLATILE
+    elseif cache_mode === :no
+        return CACHE_MODE_NULL
+    end
+    error("unexpected `cache_mode` is given")
+end
+
+"""
+    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
+
+Check if `var` will be constrained to have a definite value
+in any concrete leaftype subtype of `sig`.
+
+It is used as a helper to determine whether type intersection is guaranteed to be able to
+find a value for a particular type parameter.
+A necessary condition for type intersection to not assign a parameter is that it only
+appears in a `Union[All]` and during subtyping some other union component (that does not
+constrain the type parameter) is selected.
+
+The `type_constrains` flag determines whether Type{T} is considered to be constraining
+`T`. This is not true in general, because of the existence of types with free type
+parameters, however, some callers would like to ignore this corner case.
+"""
+function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
+    typ === var && return true
+    while typ isa UnionAll
+        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
+        # typ.var.lb doesn't constrain var
+        typ = typ.body
+    end
+    if typ isa Union
+        # for unions, verify that both options would constrain var
+        ba = constrains_param(var, typ.a, covariant, type_constrains)
+        bb = constrains_param(var, typ.b, covariant, type_constrains)
+        (ba && bb) && return true
+    elseif typ isa DataType
+        # return true if any param constrains var
+        fc = length(typ.parameters)
+        if fc > 0
+            if typ.name === Tuple.name
+                # vararg tuple needs special handling
+                for i in 1:(fc - 1)
+                    p = typ.parameters[i]
+                    constrains_param(var, p, covariant, type_constrains) && return true
+                end
+                lastp = typ.parameters[fc]
+                vararg = unwrap_unionall(lastp)
+                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
+                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
+                    # T = vararg.parameters[1] doesn't constrain var
+                else
+                    constrains_param(var, lastp, covariant, type_constrains) && return true
+                end
+            else
+                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
+                    # Types with free type parameters are <: Type cause the typevar
+                    # to be unconstrained because Type{T} with free typevars is illegal
+                    return type_constrains
+                end
+                for i in 1:fc
+                    p = typ.parameters[i]
+                    constrains_param(var, p, false, type_constrains) && return true
+                end
+            end
+        end
+    end
+    return false
+end
+
+const EMPTY_SPTYPES = VarState[]
+
+function sptypes_from_meth_instance(mi::MethodInstance)
+    def = mi.def
+    isa(def, Method) || return EMPTY_SPTYPES # toplevel
+    sig = def.sig
+    if isempty(mi.sparam_vals)
+        isa(sig, UnionAll) || return EMPTY_SPTYPES
+        # mi is unspecialized
+        spvals = Any[]
+        sig′ = sig
+        while isa(sig′, UnionAll)
+            push!(spvals, sig′.var)
+            sig′ = sig′.body
+        end
+    else
+        spvals = mi.sparam_vals
+    end
+    nvals = length(spvals)
+    sptypes = Vector{VarState}(undef, nvals)
+    for i = 1:nvals
+        v = spvals[i]
+        if v isa TypeVar
+            temp = sig
+            for j = 1:i-1
+                temp = temp.body
+            end
+            vᵢ = (temp::UnionAll).var
+            sigtypes = (unwrap_unionall(temp)::DataType).parameters
+            for j = 1:length(sigtypes)
+                sⱼ = sigtypes[j]
+                if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
+                    # if this parameter came from `arg::Type{T}`,
+                    # then `arg` is more precise than `Type{T} where lb<:T<:ub`
+                    ty = fieldtype(mi.specTypes, j)
+                    @goto ty_computed
+                elseif (va = va_from_vatuple(sⱼ)) !== nothing
+                    # if this parameter came from `::Tuple{.., Vararg{T,vᵢ}}`,
+                    # then `vᵢ` is known to be `Int`
+                    if isdefined(va, :N) && va.N === vᵢ
+                        ty = Int
+                        @goto ty_computed
+                    end
+                end
+            end
+            ub = unwraptv_ub(v)
+            if has_free_typevars(ub)
+                ub = Any
+            end
+            lb = unwraptv_lb(v)
+            if has_free_typevars(lb)
+                lb = Bottom
+            end
+            if Any === ub && lb === Bottom
+                ty = Any
+            else
+                tv = TypeVar(v.name, lb, ub)
+                ty = UnionAll(tv, Type{tv})
+            end
+            @label ty_computed
+            undef = !(let sig=sig
+                # if the specialized signature `linfo.specTypes` doesn't contain any free
+                # type variables, we can use it for a more accurate analysis of whether `v`
+                # is constrained or not, otherwise we should use `def.sig` which always
+                # doesn't contain any free type variables
+                if !has_free_typevars(mi.specTypes)
+                    sig = mi.specTypes
+                end
+                @assert !has_free_typevars(sig)
+                constrains_param(v, sig, #=covariant=#true)
+            end)
+        elseif isvarargtype(v)
+            # if this parameter came from `func(..., ::Vararg{T,v})`,
+            # so the type is known to be `Int`
+            ty = Int
+            undef = false
+        else
+            ty = Const(v)
+            undef = false
+        end
+        sptypes[i] = VarState(ty, undef)
+    end
+    return sptypes
+end
+
+function va_from_vatuple(@nospecialize(t))
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        n = length(t.parameters)
+        if n > 0
+            va = t.parameters[n]
+            if isvarargtype(va)
+               return va
+            end
+        end
+    end
+    return nothing
+end
+
+_topmod(sv::InferenceState) = _topmod(frame_module(sv))
+
+function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
+    ssavaluetypes = frame.ssavaluetypes
+    old = ssavaluetypes[ssa_id]
+    if old === NOT_FOUND || !is_lattice_equal(𝕃ᵢ, new, old)
+        ssavaluetypes[ssa_id] = new
+        W = frame.ip
+        for r in frame.ssavalue_uses[ssa_id]
+            if was_reached(frame, r)
+                usebb = block_for_inst(frame.cfg, r)
+                if usebb != frame.currbb || r < ssa_id
+                    push!(W, usebb)
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function narguments(sv::InferenceState, include_va::Bool=true)
+    nargs = Int(sv.src.nargs)
+    if !include_va
+        nargs -= sv.src.isva
+    end
+    return nargs
+end
+
+# IRInterpretationState
+# =====================
+
+# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
+mutable struct IRInterpretationState
+    const spec_info::SpecInfo
+    const ir::IRCode
+    const mi::MethodInstance
+    world::WorldWithRange
+    curridx::Int
+    const argtypes_refined::Vector{Bool}
+    const sptypes::Vector{VarState}
+    const tpdum::TwoPhaseDefUseMap
+    const ssa_refined::BitSet
+    const lazyreachability::LazyCFGReachability
+    const tasks::Vector{WorkThunk}
+    const edges::Vector{Any}
+    callstack #::Vector{AbsIntState}
+    frameid::Int
+    parentid::Int
+
+    function IRInterpretationState(interp::AbstractInterpreter,
+        spec_info::SpecInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
+        world::UInt, min_world::UInt, max_world::UInt)
+        curridx = 1
+        given_argtypes = Vector{Any}(undef, length(argtypes))
+        for i = 1:length(given_argtypes)
+            given_argtypes[i] = widenslotwrapper(argtypes[i])
+        end
+        if isa(mi.def, Method)
+            argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
+                for i = 1:length(given_argtypes)]
+        else
+            argtypes_refined = Bool[false for i = 1:length(given_argtypes)]
+        end
+        empty!(ir.argtypes)
+        append!(ir.argtypes, given_argtypes)
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        ssa_refined = BitSet()
+        lazyreachability = LazyCFGReachability(ir)
+        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        tasks = WorkThunk[]
+        edges = Any[]
+        callstack = AbsIntState[]
+        return new(spec_info, ir, mi, WorldWithRange(world, valid_worlds), curridx, argtypes_refined, ir.sptypes, tpdum,
+                ssa_refined, lazyreachability, tasks, edges, callstack, 0, 0)
+    end
+end
+
+function IRInterpretationState(interp::AbstractInterpreter,
+    codeinst::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
+    @assert codeinst.def === mi "method instance is not synced with code instance"
+    src = @atomic :monotonic codeinst.inferred
+    if isa(src, String)
+        src = _uncompressed_ir(codeinst, src)
+    else
+        isa(src, CodeInfo) || return nothing
+    end
+    spec_info = SpecInfo(src)
+    ir = inflate_ir(src, mi)
+    argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva)
+    return IRInterpretationState(interp, spec_info, ir, mi, argtypes, world,
+                                 codeinst.min_world, codeinst.max_world)
+end
+
+# AbsIntState
+# ===========
+
+const AbsIntState = Union{InferenceState,IRInterpretationState}
+
+function print_callstack(frame::AbsIntState)
+    print("=================== Callstack: ==================\n")
+    frames = frame.callstack::Vector{AbsIntState}
+    for idx = (frame.frameid == 0 ? 0 : 1):length(frames)
+        sv = (idx == 0 ? frame : frames[idx])
+        idx == frame.frameid && print("*")
+        print("[")
+        print(idx)
+        if sv isa InferenceState && !isa(sv.interp, NativeInterpreter)
+            print(", ")
+            print(typeof(sv.interp))
+        end
+        print("] ")
+        print(frame_instance(sv))
+        is_cached(sv) || print("  [uncached]")
+        sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]")
+        println()
+        @assert sv.frameid == idx
+    end
+    print("================= End callstack ==================\n")
+end
+
+frame_instance(sv::InferenceState) = sv.linfo
+frame_instance(sv::IRInterpretationState) = sv.mi
+
+function frame_module(sv::AbsIntState)
+    mi = frame_instance(sv)
+    def = mi.def
+    isa(def, Module) && return def
+    return def.module
+end
+
+function frame_parent(sv::InferenceState)
+    sv.parentid == 0 && return nothing
+    callstack = sv.callstack::Vector{AbsIntState}
+    sv = callstack[sv.cycleid]::InferenceState
+    sv.parentid == 0 && return nothing
+    return callstack[sv.parentid]
+end
+frame_parent(sv::IRInterpretationState) = sv.parentid == 0 ? nothing : (sv.callstack::Vector{AbsIntState})[sv.parentid]
+
+# add the orphan child to the parent and the parent to the child
+function assign_parentchild!(child::InferenceState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.cycleid = child.frameid = length(callstack)
+    nothing
+end
+function assign_parentchild!(child::IRInterpretationState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.frameid = length(callstack)
+    nothing
+end
+
+function is_constproped(sv::InferenceState)
+    (;overridden_by_const) = sv.result
+    return overridden_by_const !== nothing
+end
+is_constproped(::IRInterpretationState) = true
+
+is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
+is_cached(::IRInterpretationState) = false
+
+spec_info(sv::InferenceState) = sv.spec_info
+spec_info(sv::IRInterpretationState) = sv.spec_info
+
+propagate_inbounds(sv::AbsIntState) = spec_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = spec_info(sv).method_for_inference_limit_heuristics
+
+frame_world(sv::InferenceState) = sv.world.this
+frame_world(sv::IRInterpretationState) = sv.world.this
+
+function is_effect_overridden(sv::AbsIntState, effect::Symbol)
+    if is_effect_overridden(frame_instance(sv), effect)
+        return true
+    elseif is_effect_overridden(decode_statement_effects_override(sv), effect)
+        return true
+    end
+    return false
+end
+function is_effect_overridden(mi::MethodInstance, effect::Symbol)
+    def = mi.def
+    return isa(def, Method) && is_effect_overridden(def, effect)
+end
+is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
+is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
+
+has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
+has_conditional(::AbstractLattice, ::IRInterpretationState) = false
+
+# work towards converging the valid age range for sv
+function update_valid_age!(sv::AbsIntState, valid_worlds::WorldRange)
+    sv.world = intersect(sv.world, valid_worlds)
+    return sv.world.valid_worlds
+end
+
+"""
+    AbsIntStackUnwind(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
+(including the given `AbsIntState` itself), visiting children before their parents (i.e.
+ascending the tree from the given `AbsIntState`).
+Note that cycles may be visited in any order.
+"""
+struct AbsIntStackUnwind
+    sv::AbsIntState
+end
+iterate(unw::AbsIntStackUnwind) = (unw.sv, length(unw.sv.callstack::Vector{AbsIntState}))
+function iterate(unw::AbsIntStackUnwind, frame::Int)
+    frame == 0 && return nothing
+    return ((unw.sv.callstack::Vector{AbsIntState})[frame], frame - 1)
+end
+
+struct AbsIntCycle
+    frames::Vector{AbsIntState}
+    cycleid::Int
+    cycletop::Int
+end
+iterate(unw::AbsIntCycle) = unw.cycleid == 0 ? nothing : (unw.frames[unw.cycletop], unw.cycletop)
+function iterate(unw::AbsIntCycle, frame::Int)
+    frame == unw.cycleid && return nothing
+    return (unw.frames[frame - 1], frame - 1)
+end
+
+"""
+    callers_in_cycle(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract
+interpretation stack (including the given `AbsIntState` itself) that are part
+of the same cycle, only if it is part of a cycle with multiple frames.
+"""
+function callers_in_cycle(sv::InferenceState)
+    callstack = sv.callstack::Vector{AbsIntState}
+    cycletop = cycleid = sv.cycleid
+    while cycletop < length(callstack)
+        frame = callstack[cycletop + 1]
+        frame isa InferenceState || break
+        frame.cycleid == cycleid || break
+        cycletop += 1
+    end
+    return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
+end
+callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0)
+
+get_curr_ssaflag(sv::InferenceState) = sv.ssaflags[sv.currpc]
+get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
+
+has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.ssaflags[sv.currpc], flag)
+has_curr_ssaflag(sv::IRInterpretationState, flag::UInt32) = has_flag(sv.ir.stmts[sv.curridx][:flag], flag)
+
+function set_curr_ssaflag!(sv::InferenceState, flag::UInt32, mask::UInt32=typemax(UInt32))
+    curr_flag = sv.ssaflags[sv.currpc]
+    sv.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag
+    nothing
+end
+
+add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] |= flag
+add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = add_flag!(sv.ir.stmts[sv.curridx], flag)
+
+sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] &= ~flag
+sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = sub_flag!(sv.ir.stmts[sv.curridx], flag)
+
+function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects)
+    if effects.effect_free === EFFECT_FREE_GLOBALLY
+        # This tracks the global effects
+        effects = Effects(effects; effect_free=ALWAYS_TRUE)
+    end
+    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+    nothing
+end
+merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
+
+decode_statement_effects_override(sv::InferenceState) = decode_statement_effects_override(sv.src.ssaflags[sv.currpc])
+decode_statement_effects_override(sv::IRInterpretationState) = decode_statement_effects_override(UInt32(0))
+
+struct InferenceLoopState
+    rt
+    effects::Effects
+    function InferenceLoopState(@nospecialize(rt), effects::Effects)
+        new(rt, effects)
+    end
+end
+
+bail_out_toplevel_call(::AbstractInterpreter, sv::InferenceState) = sv.restrict_abstract_call_sites
+bail_out_toplevel_call(::AbstractInterpreter, ::IRInterpretationState) = false
+
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any && !is_foldable(state.effects)
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any && !is_foldable(state.effects)
+
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any
+
+add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
+add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
+
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp, sv)
+end
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp)
+end
+function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
+    mmax = get_max_methods_for_module(sv)
+    mmax !== nothing && return mmax
+    return get_max_methods(interp)
+end
+get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
+
+function get_max_methods_for_func(@nospecialize(f))
+    if f !== nothing
+        fmm = typeof(f).name.max_methods
+        fmm !== UInt8(0) && return Int(fmm)
+    end
+    return nothing
+end
+get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
+function get_max_methods_for_module(mod::Module)
+    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
+    max_methods < 0 && return nothing
+    return max_methods
+end
+
+"""
+    Future{T}
+
+Assign-once delayed return value for a value of type `T`, similar to RefValue{T}.
+Can be constructed in one of three ways:
+
+1. With an immediate as `Future{T}(val)`
+2. As an assign-once storage location with `Future{T}()`. Assigned (once) using `f[] = val`.
+3. As a delayed computation with `Future{T}(callback, dep, interp, sv)` to have
+   `sv` arrange to call the `callback` with the result of `dep` when it is ready.
+
+Use `isready` to check if the value is ready, and `getindex` to get the value.
+"""
+struct Future{T}
+    later::Union{Nothing,RefValue{T}}
+    now::Union{Nothing,T}
+    function Future{T}() where {T}
+        later = RefValue{T}()
+        @assert !isassigned(later) "Future{T}() is not allowed for inlinealloc T"
+        new{T}(later, nothing)
+    end
+    Future{T}(x) where {T} = new{T}(nothing, x)
+    Future(x::T) where {T} = new{T}(nothing, x)
+end
+isready(f::Future) = f.later === nothing || isassigned(f.later)
+getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[])
+function setindex!(f::Future, v)
+    later = something(f.later)
+    @assert !isassigned(later)
+    later[] = v
+    return f
+end
+convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion
+convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T}
+function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T}
+    if immediate
+        return Future{T}(f(interp, sv))
+    else
+        @assert applicable(f, interp, sv)
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(interp, sv)
+            return true
+        end)
+        return result
+    end
+end
+function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S}
+    later = prev.later
+    if later === nothing
+        return Future{T}(f(prev[], interp, sv))
+    else
+        @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)})
+        result = Future{T}()
+        @assert !isa(sv, InferenceState) || interp === sv.interp
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(later[], interp, sv) # capture just later, instead of all of prev
+            return true
+        end)
+        return result
+    end
+end
+
+"""
+    doworkloop(args...)
+
+Run a tasks inside the abstract interpreter, returning false if there are none.
+Tasks will be run in DFS post-order tree order, such that all child tasks will
+be run in the order scheduled, prior to running any subsequent tasks. This
+allows tasks to generate more child tasks, which will be run before anything else.
+Each task will be run repeatedly when returning `false`, until it returns `true`.
+"""
+function doworkloop(interp::AbstractInterpreter, sv::AbsIntState)
+    tasks = sv.tasks
+    prev = length(tasks)
+    prevcallstack = length(sv.callstack)
+    prev == 0 && return false
+    task = pop!(tasks)
+    completed = task(interp, sv)
+    tasks = sv.tasks # allow dropping gc root over the previous call
+    completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line"
+    if !completed
+        @assert (length(tasks) >= prev || length(sv.callstack) > prevcallstack) "Task did not complete, but also did not create any child tasks"
+        push!(tasks, task)
+    end
+    # efficient post-order visitor: items pushed are executed in reverse post order such
+    # that later items are executed before earlier ones, but are fully executed
+    # (including any dependencies scheduled by them) before going on to the next item
+    reverse!(tasks, #=start=#prev)
+    return true
+end
+
+
+#macro workthunk(name::Symbol, body)
+#    name = esc(name)
+#    body = esc(body)
+#    return replace_linenums!(
+#        :(function $name($(esc(interp)), $(esc(sv)))
+#              $body
+#          end), __source__)
+#end
diff --git a/base/compiler/methodtable.jl b/Compiler/src/methodtable.jl
similarity index 78%
rename from base/compiler/methodtable.jl
rename to Compiler/src/methodtable.jl
index 8c79b2d8a8468..24a8b1ecf8242 100644
--- a/base/compiler/methodtable.jl
+++ b/Compiler/src/methodtable.jl
@@ -16,10 +16,7 @@ function iterate(result::MethodLookupResult, args...)
 end
 getindex(result::MethodLookupResult, idx::Int) = getindex(result.matches, idx)::MethodMatch
 
-struct MethodMatchResult
-    matches::MethodLookupResult
-    overlayed::Bool
-end
+abstract type MethodTableView end
 
 """
     struct InternalMethodTable <: MethodTableView
@@ -55,14 +52,14 @@ Overlays another method table view with an additional local fast path cache that
 can respond to repeated, identical queries faster than the original method table.
 """
 struct CachedMethodTable{T<:MethodTableView} <: MethodTableView
-    cache::IdDict{MethodMatchKey, Union{Nothing,MethodMatchResult}}
+    cache::IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}
     table::T
 end
-CachedMethodTable(table::T) where T = CachedMethodTable{T}(IdDict{MethodMatchKey, Union{Nothing,MethodMatchResult}}(), table)
+CachedMethodTable(table::T) where T = CachedMethodTable{T}(IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}(), table)
 
 """
     findall(sig::Type, view::MethodTableView; limit::Int=-1) ->
-        MethodMatchResult(matches::MethodLookupResult, overlayed::Bool) or nothing
+        matches::MethodLookupResult or nothing
 
 Find all methods in the given method table `view` that are applicable to the given signature `sig`.
 If no applicable methods are found, an empty result is returned.
@@ -70,11 +67,8 @@ If the number of applicable methods exceeded the specified `limit`, `nothing` is
 Note that the default setting `limit=-1` does not limit the number of applicable methods.
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=-1)
-    result = _findall(sig, nothing, table.world, limit)
-    result === nothing && return nothing
-    return MethodMatchResult(result, false)
-end
+findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=-1) =
+    _findall(sig, nothing, table.world, limit)
 
 function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=-1)
     result = _findall(sig, table.mt, table.world, limit)
@@ -82,20 +76,18 @@ function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int
     nr = length(result)
     if nr ≥ 1 && result[nr].fully_covers
         # no need to fall back to the internal method table
-        return MethodMatchResult(result, true)
+        return result
     end
     # fall back to the internal method table
     fallback_result = _findall(sig, nothing, table.world, limit)
     fallback_result === nothing && return nothing
     # merge the fallback match results with the internal method table
-    return MethodMatchResult(
-        MethodLookupResult(
-            vcat(result.matches, fallback_result.matches),
-            WorldRange(
-                max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
-                min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
-            result.ambig | fallback_result.ambig),
-        !isempty(result))
+    return MethodLookupResult(
+        vcat(result.matches, fallback_result.matches),
+        WorldRange(
+            max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
+            min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
+        result.ambig | fallback_result.ambig)
 end
 
 function _findall(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt, limit::Int)
@@ -122,7 +114,7 @@ end
 
 """
     findsup(sig::Type, view::MethodTableView) ->
-        (match::MethodMatch, valid_worlds::WorldRange, overlayed::Bool) or nothing
+        (match::Union{MethodMatch,Nothing}, valid_worlds::WorldRange, overlayed::Bool)
 
 Find the (unique) method such that `sig <: match.method.sig`, while being more
 specific than any other method with the same property. In other words, find the method
@@ -138,21 +130,19 @@ In both cases `nothing` is returned.
 
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findsup(@nospecialize(sig::Type), table::InternalMethodTable)
-    return (_findsup(sig, nothing, table.world)..., false)
-end
+findsup(@nospecialize(sig::Type), table::InternalMethodTable) =
+    _findsup(sig, nothing, table.world)
 
 function findsup(@nospecialize(sig::Type), table::OverlayMethodTable)
     match, valid_worlds = _findsup(sig, table.mt, table.world)
-    match !== nothing && return match, valid_worlds, true
+    match !== nothing && return match, valid_worlds
     # fall back to the internal method table
     fallback_match, fallback_valid_worlds = _findsup(sig, nothing, table.world)
     return (
         fallback_match,
         WorldRange(
             max(valid_worlds.min_world, fallback_valid_worlds.min_world),
-            min(valid_worlds.max_world, fallback_valid_worlds.max_world)),
-        false)
+            min(valid_worlds.max_world, fallback_valid_worlds.max_world)))
 end
 
 function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt)
@@ -166,8 +156,3 @@ end
 
 # This query is not cached
 findsup(@nospecialize(sig::Type), table::CachedMethodTable) = findsup(sig, table.table)
-
-isoverlayed(::MethodTableView)     = error("unsatisfied MethodTableView interface")
-isoverlayed(::InternalMethodTable) = false
-isoverlayed(::OverlayMethodTable)  = true
-isoverlayed(mt::CachedMethodTable) = isoverlayed(mt.table)
diff --git a/Compiler/src/opaque_closure.jl b/Compiler/src/opaque_closure.jl
new file mode 100644
index 0000000000000..d0a375c2a54b5
--- /dev/null
+++ b/Compiler/src/opaque_closure.jl
@@ -0,0 +1,56 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function compute_ir_rettype(ir::IRCode)
+    rt = Union{}
+    for i = 1:length(ir.stmts)
+        stmt = ir[SSAValue(i)][:stmt]
+        if isa(stmt, Core.ReturnNode) && isdefined(stmt, :val)
+            rt = Compiler.tmerge(Compiler.argextype(stmt.val, ir), rt)
+        end
+    end
+    return Compiler.widenconst(rt)
+end
+
+function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
+    argtypes = Vector{Any}(undef, nargs)
+    for i = 1:nargs
+        argtypes[i] = Compiler.widenconst(ir.argtypes[i+1])
+    end
+    if isva
+        lastarg = pop!(argtypes)
+        if lastarg <: Tuple
+            append!(argtypes, lastarg.parameters)
+        else
+            push!(argtypes, Vararg{Any})
+        end
+    end
+    return Tuple{argtypes...}
+end
+
+function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
+                            isva::Bool = false,
+                            slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                            kwargs...)
+    # NOTE: we need ir.argtypes[1] == typeof(env)
+    ir = Core.Compiler.copy(ir)
+    # if the user didn't specify a definition MethodInstance or filename Symbol to use for the debuginfo, set a filename now
+    ir.debuginfo.def === nothing && (ir.debuginfo.def = :var"generated IR for OpaqueClosure")
+    nargtypes = length(ir.argtypes)
+    nargs = nargtypes-1
+    sig = compute_oc_signature(ir, nargs, isva)
+    rt = compute_ir_rettype(ir)
+    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    if slotnames === nothing
+        src.slotnames = fill(:none, nargtypes)
+    else
+        length(slotnames) == nargtypes || error("mismatched `argtypes` and `slotnames`")
+        src.slotnames = slotnames
+    end
+    src.slotflags = fill(zero(UInt8), nargtypes)
+    src.slottypes = copy(ir.argtypes)
+    src.isva = isva
+    src.nargs = UInt(nargtypes)
+    src = ir_to_codeinf!(src, ir)
+    src.rettype = rt
+    return Base.Experimental.generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; kwargs...)
+end
diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl
new file mode 100644
index 0000000000000..12b2f3c9a269f
--- /dev/null
+++ b/Compiler/src/optimize.jl
@@ -0,0 +1,1583 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#############
+# constants #
+#############
+
+# The slot has uses that are not statically dominated by any assignment
+# This is implied by `SLOT_USEDUNDEF`.
+# If this is not set, all the uses are (statically) dominated by the defs.
+# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
+const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
+const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
+const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
+# const SLOT_CALLED      = 64
+
+# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
+
+const IR_FLAG_NULL        = zero(UInt32)
+# This statement is marked as @inbounds by user.
+# If replaced by inlining, any contained boundschecks may be removed.
+const IR_FLAG_INBOUNDS    = one(UInt32) << 0
+# This statement is marked as @inline by user
+const IR_FLAG_INLINE      = one(UInt32) << 1
+# This statement is marked as @noinline by user
+const IR_FLAG_NOINLINE    = one(UInt32) << 2
+# This statement is proven :consistent
+const IR_FLAG_CONSISTENT  = one(UInt32) << 3
+# This statement is proven :effect_free
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
+# This statement is proven :nothrow
+const IR_FLAG_NOTHROW     = one(UInt32) << 5
+# This statement is proven :terminates_globally
+const IR_FLAG_TERMINATES  = one(UInt32) << 6
+#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7
+#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8
+#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9
+const IR_FLAG_NOUB        = one(UInt32) << 10
+#const IR_FLAG_NOUBINIB   = one(UInt32) << 11
+#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12
+# This statement is :nortcall
+const IR_FLAG_NORTCALL = one(UInt32) << 13
+# An optimization pass has updated this statement in a way that may
+# have exposed information that inference did not see. Re-running
+# inference on this statement may be profitable.
+const IR_FLAG_REFINED     = one(UInt32) << 16
+# This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
+const IR_FLAG_UNUSED      = one(UInt32) << 17
+# TODO: Both of these next two should eventually go away once
+# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+const IR_FLAG_EFIIMO      = one(UInt32) << 18
+# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19
+
+const NUM_IR_FLAGS = 3 # sync with julia.h
+
+const IR_FLAGS_EFFECTS =
+    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
+    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
+
+const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
+
+const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
+
+has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
+
+function iscallstmt(@nospecialize stmt)
+    stmt isa Expr || return false
+    head = stmt.head
+    return head === :call || head === :invoke || head === :foreigncall
+end
+
+function flags_for_effects(effects::Effects)
+    flags = zero(UInt32)
+    if is_consistent(effects)
+        flags |= IR_FLAG_CONSISTENT
+    end
+    if is_effect_free(effects)
+        flags |= IR_FLAG_EFFECT_FREE
+    elseif is_effect_free_if_inaccessiblememonly(effects)
+        flags |= IR_FLAG_EFIIMO
+    end
+    if is_nothrow(effects)
+        flags |= IR_FLAG_NOTHROW
+    end
+    if is_terminates(effects)
+        flags |= IR_FLAG_TERMINATES
+    end
+    if is_inaccessiblemem_or_argmemonly(effects)
+        flags |= IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
+    end
+    if is_noub(effects)
+        flags |= IR_FLAG_NOUB
+    end
+    if is_nortcall(effects)
+        flags |= IR_FLAG_NORTCALL
+    end
+    return flags
+end
+
+const TOP_TUPLE = GlobalRef(Core, :tuple)
+
+# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
+const InlineCostType = UInt16
+const MAX_INLINE_COST = typemax(InlineCostType)
+const MIN_INLINE_COST = InlineCostType(10)
+const MaybeCompressed = Union{CodeInfo, String}
+
+is_inlineable(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
+set_inlineable!(src::CodeInfo, val::Bool) =
+    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
+
+function inline_cost_clamp(x::Int)
+    x > MAX_INLINE_COST && return MAX_INLINE_COST
+    x < MIN_INLINE_COST && return MIN_INLINE_COST
+    return convert(InlineCostType, x)
+end
+
+is_declared_inline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
+
+is_declared_noinline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
+
+#####################
+# OptimizationState #
+#####################
+
+# return whether this src should be inlined. If so, retrieve_ir_for_inlining must return an IRCode from it
+function src_inlining_policy(interp::AbstractInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    if isa(src, MaybeCompressed)
+        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
+        return src_inlineable
+    elseif isa(src, IRCode)
+        return true
+    end
+    @assert !isa(src, CodeInstance) # handled by caller
+    return false
+end
+
+struct InliningState{Interp<:AbstractInterpreter}
+    edges::Vector{Any}
+    world::UInt
+    interp::Interp
+end
+function InliningState(sv::InferenceState, interp::AbstractInterpreter)
+    return InliningState(sv.edges, frame_world(sv), interp)
+end
+function InliningState(interp::AbstractInterpreter)
+    return InliningState(Any[], get_inference_world(interp), interp)
+end
+
+# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
+code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
+
+mutable struct OptimizationState{Interp<:AbstractInterpreter}
+    linfo::MethodInstance
+    src::CodeInfo
+    ir::Union{Nothing, IRCode}
+    stmt_info::Vector{CallInfo}
+    mod::Module
+    sptypes::Vector{VarState}
+    slottypes::Vector{Any}
+    inlining::InliningState{Interp}
+    cfg::CFG
+    unreachable::BitSet
+    bb_vartables::Vector{Union{Nothing,VarTable}}
+    insert_coverage::Bool
+end
+function OptimizationState(sv::InferenceState, interp::AbstractInterpreter)
+    inlining = InliningState(sv, interp)
+    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
+                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
+                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
+end
+function OptimizationState(mi::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
+    # prepare src for running optimization passes if it isn't already
+    nssavalues = src.ssavaluetypes
+    if nssavalues isa Int
+        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+    else
+        nssavalues = length(src.ssavaluetypes::Vector{Any})
+    end
+    sptypes = sptypes_from_meth_instance(mi)
+    nslots = length(src.slotflags)
+    slottypes = src.slottypes
+    if slottypes === nothing
+        slottypes = Any[ Any for i = 1:nslots ]
+    end
+    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
+    # cache some useful state computations
+    def = mi.def
+    mod = isa(def, Method) ? def.module : def
+    # Allow using the global MI cache, but don't track edges.
+    # This method is mostly used for unit testing the optimizer
+    inlining = InliningState(interp)
+    cfg = compute_basic_blocks(src.code)
+    unreachable = BitSet()
+    bb_vartables = Union{VarTable,Nothing}[]
+    for block = 1:length(cfg.blocks)
+        push!(bb_vartables, VarState[
+            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
+            for slot = 1:nslots
+        ])
+    end
+    return OptimizationState(mi, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
+end
+function OptimizationState(mi::MethodInstance, interp::AbstractInterpreter)
+    world = get_inference_world(interp)
+    src = retrieve_code_info(mi, world)
+    src === nothing && return nothing
+    return OptimizationState(mi, src, interp)
+end
+
+function argextype end # imported by EscapeAnalysis
+function try_compute_field end # imported by EscapeAnalysis
+
+include("ssair/heap.jl")
+include("ssair/slot2ssa.jl")
+include("ssair/inlining.jl")
+include("ssair/verify.jl")
+include("ssair/legacy.jl")
+include("ssair/EscapeAnalysis.jl")
+include("ssair/passes.jl")
+include("ssair/irinterp.jl")
+
+function ir_to_codeinf!(opt::OptimizationState)
+    (; linfo, src) = opt
+    src = ir_to_codeinf!(src, opt.ir::IRCode)
+    src.edges = Core.svec(opt.inlining.edges...)
+    opt.ir = nothing
+    maybe_validate_code(linfo, src, "optimized")
+    return src
+end
+
+function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
+    replace_code_newstyle!(src, ir)
+    widen_all_consts!(src)
+    return src
+end
+
+# widen all Const elements in type annotations
+function widen_all_consts!(src::CodeInfo)
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
+    end
+
+    for i = 1:length(src.code)
+        x = src.code[i]
+        if isa(x, PiNode)
+            src.code[i] = PiNode(x.val, widenconst(x.typ))
+        end
+    end
+
+    return src
+end
+
+#########
+# logic #
+#########
+
+_topmod(sv::OptimizationState) = _topmod(sv.mod)
+
+is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
+is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
+
+function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
+    Targ = args[1]
+    atyp = argextype(Targ, src)
+    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
+    typ, isexact = instanceof_tfunc(atyp, true)
+    if !isexact
+        atyp = unwrap_unionall(widenconst(atyp))
+        if isType(atyp) && isTypeDataType(atyp.parameters[1])
+            typ = atyp.parameters[1]
+        else
+            return (false, false, false)
+        end
+        isabstracttype(typ) && return (false, false, false)
+    else
+        isconcretedispatch(typ) || return (false, false, false)
+    end
+    typ = typ::DataType
+    fcount = datatype_fieldcount(typ)
+    fcount === nothing && return (false, false, false)
+    fcount >= length(args) - 1 || return (false, false, false)
+    for fidx in 1:(length(args) - 1)
+        farg = args[fidx + 1]
+        eT = argextype(farg, src)
+        fT = fieldtype(typ, fidx)
+        if !isexact && has_free_typevars(fT)
+            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
+                continue
+            end
+            return (false, false, false)
+        end
+        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
+    end
+    return (false, true, true)
+end
+
+# Returns a tuple of `(:consistent, :removable, :nothrow)` flags for a given statement.
+function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
+    # TODO: We're duplicating analysis from inference here.
+    isa(stmt, PiNode) && return (true, true, true)
+    isa(stmt, PhiNode) && return (true, true, true)
+    isa(stmt, ReturnNode) && return (true, false, true)
+    isa(stmt, EnterNode) && return (true, false, true)
+    isa(stmt, GotoNode) && return (true, false, true)
+    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
+    if isa(stmt, GlobalRef)
+        # Modeled more precisely in abstract_eval_globalref. In general, if a
+        # GlobalRef was moved to statement position, it is probably not `const`,
+        # so we can't say much about it anyway.
+        return (false, false, false)
+    elseif isa(stmt, Expr)
+        (; head, args) = stmt
+        if head === :static_parameter
+            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
+            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
+            nothrow = !sptypes[args[1]::Int].undef
+            return (true, nothrow, nothrow)
+        end
+        if head === :call
+            f = argextype(args[1], src)
+            f = singleton_type(f)
+            f === nothing && return (false, false, false)
+            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
+                # TODO: these are not yet linearized
+                return (false, false, false)
+            end
+            isa(f, Builtin) || return (false, false, false)
+            # Needs to be handled in inlining to look at the callee effects
+            f === Core._apply_iterate && return (false, false, false)
+            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
+            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
+            return (consistent, removable, nothrow)
+        elseif head === :new
+            return new_expr_effect_flags(𝕃ₒ, args, src)
+        elseif head === :foreigncall
+            effects = foreigncall_effects(stmt) do @nospecialize x
+                argextype(x, src)
+            end
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            terminates = is_terminates(effects)
+            removable = effect_free & nothrow & terminates
+            return (consistent, removable, nothrow)
+        elseif head === :new_opaque_closure
+            length(args) < 4 && return (false, false, false)
+            typ = argextype(args[1], src)
+            typ, isexact = instanceof_tfunc(typ, true)
+            isexact || return (false, false, false)
+            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
+            rt_lb = argextype(args[2], src)
+            rt_ub = argextype(args[3], src)
+            source = argextype(args[5], src)
+            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
+                return (false, false, false)
+            end
+            return (false, true, true)
+        elseif head === :inbounds
+            return (true, true, true)
+        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
+            return (false, true, true)
+        else
+            # e.g. :loopinfo
+            return (false, false, false)
+        end
+    end
+    isa(stmt, SlotNumber) && error("unexpected IR elements")
+    return (true, true, true)
+end
+
+function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt),
+                                 src::Union{IRCode,IncrementalCompact})
+    flag = IR_FLAG_NULL
+    (consistent, removable, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, src)
+    if consistent
+        flag |= IR_FLAG_CONSISTENT
+    end
+    if removable
+        flag |= IR_FLAGS_REMOVABLE
+    elseif nothrow
+        flag |= IR_FLAG_NOTHROW
+    end
+    if !iscallstmt(stmt)
+        # There is a bit of a subtle point here, which is that some non-call
+        # statements (e.g. PiNode) can be UB:, however, we consider it
+        # illegal to introduce such statements that actually cause UB (for any
+        # input). Ideally that'd be handled at insertion time (TODO), but for
+        # the time being just do that here.
+        flag |= IR_FLAG_NOUB
+    end
+    return flag
+end
+
+"""
+    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
+
+Return the type of value `x` in the context of inferred source `src`.
+Note that `t` might be an extended lattice element.
+Use `widenconst(t)` to get the native Julia type of `x`.
+"""
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
+    argextype(x, ir, sptypes, ir.argtypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
+    isa(x, AnySSAValue) && return types(compact)[x]
+    return argextype(x, compact, sptypes, compact.ir.argtypes)
+end
+function argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState})
+    return argextype(x, src, sptypes, src.slottypes::Union{Vector{Any},Nothing})
+end
+function argextype(
+    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState}, slottypes::Union{Vector{Any},Nothing})
+    if isa(x, Expr)
+        if x.head === :static_parameter
+            idx = x.args[1]::Int
+            (1 ≤ idx ≤ length(sptypes)) || throw(InvalidIRError())
+            return sptypes[idx].typ
+        elseif x.head === :boundscheck
+            return Bool
+        elseif x.head === :copyast
+            length(x.args) == 0 && throw(InvalidIRError())
+            return argextype(x.args[1], src, sptypes, slottypes)
+        end
+        Core.println("argextype called on Expr with head ", x.head,
+                     " which is not valid for IR in argument-position.")
+        @assert false
+    elseif isa(x, SlotNumber)
+        slottypes === nothing && return Any
+        (1 ≤ x.id ≤ length(slottypes)) || throw(InvalidIRError())
+        return slottypes[x.id]
+    elseif isa(x, SSAValue)
+        return abstract_eval_ssavalue(x, src)
+    elseif isa(x, Argument)
+        slottypes === nothing && return Any
+        (1 ≤ x.n ≤ length(slottypes)) || throw(InvalidIRError())
+        return slottypes[x.n]
+    elseif isa(x, QuoteNode)
+        return Const(x.value)
+    elseif isa(x, GlobalRef)
+        return abstract_eval_globalref_type(x, src)
+    elseif isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
+        return Any
+    elseif isa(x, PiNode)
+        return x.typ
+    else
+        return Const(x)
+    end
+end
+function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
+    ssavaluetypes = src.ssavaluetypes
+    if ssavaluetypes isa Int
+        (1 ≤ s.id ≤ ssavaluetypes) || throw(InvalidIRError())
+        return Any
+    else
+        return abstract_eval_ssavalue(s, ssavaluetypes::Vector{Any})
+    end
+end
+abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
+
+"""
+    finish(interp::AbstractInterpreter, opt::OptimizationState,
+           ir::IRCode, caller::InferenceResult)
+
+Post-process information derived by Julia-level optimizations for later use.
+In particular, this function determines the inlineability of the optimized code.
+"""
+function finish(interp::AbstractInterpreter, opt::OptimizationState,
+                ir::IRCode, caller::InferenceResult)
+    (; src, linfo) = opt
+    (; def, specTypes) = linfo
+
+    force_noinline = is_declared_noinline(src)
+
+    # compute inlining and other related optimizations
+    result = caller.result
+    @assert !(result isa LimitedAccuracy)
+    result = widenslotwrapper(result)
+
+    opt.ir = ir
+
+    # determine and cache inlineability
+    if !force_noinline
+        sig = unwrap_unionall(specTypes)
+        if !(isa(sig, DataType) && sig.name === Tuple.name)
+            force_noinline = true
+        end
+        if !is_declared_inline(src) && result === Bottom
+            force_noinline = true
+        end
+    end
+    if force_noinline
+        set_inlineable!(src, false)
+    elseif isa(def, Method)
+        if is_declared_inline(src) && isdispatchtuple(specTypes)
+            # obey @inline declaration if a dispatch barrier would not help
+            set_inlineable!(src, true)
+        else
+            # compute the cost (size) of inlining this code
+            params = OptimizationParams(interp)
+            cost_threshold = default = params.inline_cost_threshold
+            if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
+                cost_threshold += params.inline_tupleret_bonus
+            end
+            # if the method is declared as `@inline`, increase the cost threshold 20x
+            if is_declared_inline(src)
+                cost_threshold += 19*default
+            end
+            # a few functions get special treatment
+            if def.module === _topmod(def.module)
+                name = def.name
+                if name === :iterate || name === :unsafe_convert || name === :cconvert
+                    cost_threshold += 4*default
+                end
+            end
+            src.inlining_cost = inline_cost(ir, params, cost_threshold)
+        end
+    end
+    return nothing
+end
+
+function visit_bb_phis!(callback, ir::IRCode, bb::Int)
+    stmts = ir.cfg.blocks[bb].stmts
+    for idx in stmts
+        stmt = ir[SSAValue(idx)][:stmt]
+        if !isa(stmt, PhiNode)
+            if !is_valid_phiblock_stmt(stmt)
+                return
+            end
+        else
+            callback(idx)
+        end
+    end
+end
+
+function any_stmt_may_throw(ir::IRCode, bb::Int)
+    for idx in ir.cfg.blocks[bb].stmts
+        if !has_flag(ir[SSAValue(idx)], IR_FLAG_NOTHROW)
+            return true
+        end
+    end
+    return false
+end
+
+visit_conditional_successors(callback, ir::IRCode, bb::Int) = # used for test
+    visit_conditional_successors(callback, LazyPostDomtree(ir), ir, bb)
+function visit_conditional_successors(callback, lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
+    visited = BitSet((bb,))
+    worklist = Int[bb]
+    while !isempty(worklist)
+        thisbb = popfirst!(worklist)
+        for succ in ir.cfg.blocks[thisbb].succs
+            succ in visited && continue
+            push!(visited, succ)
+            if postdominates(get!(lazypostdomtree), succ, bb)
+                # this successor is not conditional, so no need to visit it further
+                continue
+            elseif callback(succ)
+                return true
+            else
+                push!(worklist, succ)
+            end
+        end
+    end
+    return false
+end
+
+struct AugmentedDomtree
+    cfg::CFG
+    domtree::DomTree
+end
+
+mutable struct LazyAugmentedDomtree
+    const ir::IRCode
+    agdomtree::AugmentedDomtree
+    LazyAugmentedDomtree(ir::IRCode) = new(ir)
+end
+
+function get!(lazyagdomtree::LazyAugmentedDomtree)
+    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
+    ir = lazyagdomtree.ir
+    cfg = copy(ir.cfg)
+    # Add a virtual basic block to represent the exit
+    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
+    for bb = 1:(length(cfg.blocks)-1)
+        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
+        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
+            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
+        end
+    end
+    domtree = construct_domtree(cfg)
+    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
+end
+
+mutable struct PostOptAnalysisState
+    const result::InferenceResult
+    const ir::IRCode
+    const inconsistent::BitSetBoundedMinPrioritySet
+    const tpdum::TwoPhaseDefUseMap
+    const lazypostdomtree::LazyPostDomtree
+    const lazyagdomtree::LazyAugmentedDomtree
+    const ea_analysis_pending::Vector{Int}
+    all_retpaths_consistent::Bool
+    all_effect_free::Bool
+    effect_free_if_argmem_only::Union{Nothing,Bool}
+    all_nothrow::Bool
+    all_noub::Bool
+    any_conditional_ub::Bool
+    nortcall::Bool
+    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
+        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        lazypostdomtree = LazyPostDomtree(ir)
+        lazyagdomtree = LazyAugmentedDomtree(ir)
+        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
+                   true, true, nothing, true, true, false, true)
+    end
+end
+
+give_up_refinements!(sv::PostOptAnalysisState) =
+    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
+    sv.all_nothrow = sv.all_noub = sv.nortcall = false
+
+function any_refinable(sv::PostOptAnalysisState)
+    effects = sv.result.ipo_effects
+    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
+            (!is_effect_free(effects) & sv.all_effect_free) |
+            (!is_nothrow(effects) & sv.all_nothrow) |
+            (!is_noub(effects) & sv.all_noub) |
+            (!is_nortcall(effects) & sv.nortcall))
+end
+
+struct GetNativeEscapeCache{CodeCache}
+    code_cache::CodeCache
+    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
+end
+GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
+function ((; code_cache)::GetNativeEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
+    if codeinst isa MethodInstance
+        codeinst = get(code_cache, codeinst, nothing)
+        codeinst isa CodeInstance || return false
+    end
+    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
+        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
+    end
+    if argescapes !== nothing
+        return argescapes
+    end
+    effects = decode_effects(codeinst.ipo_purity_bits)
+    if is_effect_free(effects) && is_inaccessiblememonly(effects)
+        # We might not have run EA on simple frames without any escapes (e.g. when optimization
+        # is skipped when result is constant-folded by abstract interpretation). If those
+        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
+        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
+        return true
+    end
+    return false
+end
+
+function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
+    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
+        ir = sv.ir
+        nargs = Int(opt.src.nargs)
+        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
+        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
+        stack_analysis_result!(sv.result, argescapes)
+        validate_mutable_arg_escapes!(estate, sv)
+    end
+
+    any_refinable(sv) || return false
+    effects = sv.result.ipo_effects
+    sv.result.ipo_effects = Effects(effects;
+        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
+                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
+        nothrow = sv.all_nothrow ? true : effects.nothrow,
+        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
+        nortcall = sv.nortcall ? true : effects.nortcall)
+    return true
+end
+
+function is_ipo_dataflow_analysis_profitable(effects::Effects)
+    return !(is_consistent(effects) && is_effect_free(effects) &&
+             is_nothrow(effects) && is_noub(effects))
+end
+
+function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
+    isexpr(stmt, :call) || return false
+    ft = argextype(stmt.args[1], sv.ir)
+    f = singleton_type(ft)
+    f === nothing && return false
+    if f === getfield
+        nargs = 4
+    elseif f === memoryrefnew || f === memoryrefget || f === memoryref_isassigned
+        nargs = 4
+    elseif f === memoryrefset!
+        nargs = 5
+    else
+        return false
+    end
+    length(stmt.args) < nargs && return false
+    boundscheck = stmt.args[end]
+    argextype(boundscheck, sv.ir) === Bool || return false
+    isa(boundscheck, SSAValue) || return false
+    return true
+end
+
+function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
+                                  estate::EscapeAnalysis.EscapeState)
+    stmt isa Expr || return false
+    if isexpr(stmt, :invoke)
+        startidx = 2
+    elseif isexpr(stmt, :new)
+        startidx = 1
+    else
+        return false
+    end
+    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
+        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
+    for i = startidx:length(stmt.args)
+        arg = stmt.args[i]
+        argt = argextype(arg, ir)
+        if is_mutation_free_argtype(argt)
+            continue
+        end
+        # See if we can find the allocation
+        if isa(arg, Argument)
+            if has_no_escape(estate[arg])
+                # Even if we prove everything else effect_free, the best we can
+                # say is :effect_free_if_argmem_only
+                if sv.effect_free_if_argmem_only === nothing
+                    sv.effect_free_if_argmem_only = true
+                end
+            else
+                sv.effect_free_if_argmem_only = false
+            end
+            return false
+        elseif isa(arg, SSAValue)
+            has_no_escape(estate[arg]) || return false
+            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
+        else
+            return false
+        end
+    end
+    return true
+end
+
+function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
+    ir = sv.ir
+    for idx in sv.ea_analysis_pending
+        # See if any mutable memory was allocated in this function and determined
+        # not to escape.
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if !check_all_args_noescape!(sv, ir, stmt, estate)
+            return sv.all_effect_free = false
+        end
+    end
+    return true
+end
+
+function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
+    stmt = inst[:stmt]
+    iscall_with_boundscheck(stmt, sv) || return false
+    barg = stmt.args[end]::SSAValue
+    bstmt = sv.ir[barg][:stmt]
+    isexpr(bstmt, :boundscheck) || return false
+    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
+    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
+    return true
+end
+
+function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    # If we can prove that the argmem does not escape the current function, we can
+    # refine this to :effect_free.
+    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
+    stmt = inst[:stmt]
+    if !needs_ea_validation
+        if !isterminator(stmt) && stmt !== nothing
+            # ignore control flow node – they are not removable on their own and thus not
+            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
+            # the whole method invocation
+            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
+        end
+    elseif sv.all_effect_free
+        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
+            # HACK for performance: limit the scope of EA to code with object field access only,
+            # since its abilities to reason about e.g. arrays are currently very limited anyways.
+            is_known_call(stmt, setfield!, sv.ir))
+            push!(sv.ea_analysis_pending, inst.idx)
+        else
+            sv.all_effect_free = false
+        end
+    end
+    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+    if !has_flag(flag, IR_FLAG_NOUB)
+        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
+        if is_conditional_noub(inst, sv)
+            sv.any_conditional_ub = true
+        else
+            sv.all_noub = false
+        end
+    end
+    if !has_flag(flag, IR_FLAG_NORTCALL)
+        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
+        # there's no need to taint with `:nortcall`, allowing concrete evaluation
+        if iscallstmt(stmt)
+            sv.nortcall = false
+        end
+    end
+    nothing
+end
+
+function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
+    stmt = inst[:stmt]
+    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
+    (; inconsistent, tpdum) = sv
+    if iscall_with_boundscheck(stmt, sv)
+        for i = 1:(length(stmt.args)-1)
+            val = stmt.args[i]
+            if isa(val, SSAValue)
+                stmt_inconsistent |= val.id in inconsistent
+                count!(tpdum, val)
+            end
+        end
+    else
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, SSAValue)
+                stmt_inconsistent |= val.id in inconsistent
+                count!(tpdum, val)
+            end
+        end
+    end
+    stmt_inconsistent && push!(inconsistent, inst.idx)
+    return stmt_inconsistent
+end
+
+struct ScanStmt
+    sv::PostOptAnalysisState
+end
+
+function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
+    stmt = inst[:stmt]
+
+    if isa(stmt, EnterNode)
+        # try/catch not yet modeled
+        give_up_refinements!(sv)
+        return nothing
+    end
+
+    scan_non_dataflow_flags!(inst, sv)
+
+    stmt_inconsistent = scan_inconsistency!(inst, sv)
+
+    if stmt_inconsistent
+        if !has_flag(inst[:flag], IR_FLAG_NOTHROW)
+            # Taint :consistent if this statement may raise since :consistent requires
+            # consistent termination. TODO: Separate :consistent_return and :consistent_termination from :consistent.
+            sv.all_retpaths_consistent = false
+        end
+        if inst.idx == lstmt
+            if isa(stmt, ReturnNode) && isdefined(stmt, :val)
+                sv.all_retpaths_consistent = false
+            elseif isa(stmt, GotoIfNot)
+                # Conditional Branch with inconsistent condition.
+                # If we do not know this function terminates, taint consistency, now,
+                # :consistent requires consistent termination. TODO: Just look at the
+                # inconsistent region.
+                if !sv.result.ipo_effects.terminates
+                    sv.all_retpaths_consistent = false
+                elseif visit_conditional_successors(sv.lazypostdomtree, sv.ir, bb) do succ::Int
+                        return any_stmt_may_throw(sv.ir, succ)
+                    end
+                    # check if this `GotoIfNot` leads to conditional throws, which taints consistency
+                    sv.all_retpaths_consistent = false
+                else
+                    (; cfg, domtree) = get!(sv.lazyagdomtree)
+                    for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
+                        if succ == length(cfg.blocks)
+                            # Phi node in the virtual exit -> We have a conditional
+                            # return. TODO: Check if all the retvals are egal.
+                            sv.all_retpaths_consistent = false
+                        else
+                            visit_bb_phis!(sv.ir, succ) do phiidx::Int
+                                push!(sv.inconsistent, phiidx)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    # bail out early if there are no possibilities to refine the effects
+    if !any_refinable(sv)
+        return nothing
+    end
+
+    return true
+end
+
+function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
+    (; ir, inconsistent, tpdum) = sv
+
+    scan!(ScanStmt(sv), scanner, false)
+    complete!(tpdum); push!(scanner.bb_ip, 1)
+    populate_def_use_map!(tpdum, scanner)
+
+    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+    for def in inconsistent
+        for use in tpdum[def]
+            if !(use in inconsistent)
+                push!(inconsistent, use)
+                append!(stmt_ip, tpdum[use])
+            end
+        end
+    end
+    lazydomtree = LazyDomtree(ir)
+    while !isempty(stmt_ip)
+        idx = popfirst!(stmt_ip)
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if iscall_with_boundscheck(stmt, sv)
+            any_non_boundscheck_inconsistent = false
+            for i = 1:(length(stmt.args)-1)
+                val = stmt.args[i]
+                if isa(val, SSAValue)
+                    any_non_boundscheck_inconsistent |= val.id in inconsistent
+                    any_non_boundscheck_inconsistent && break
+                end
+            end
+            any_non_boundscheck_inconsistent || continue
+        elseif isa(stmt, ReturnNode)
+            sv.all_retpaths_consistent = false
+        elseif isa(stmt, GotoIfNot)
+            bb = block_for_inst(ir, idx)
+            cfg = ir.cfg
+            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
+            for succ in iterated_dominance_frontier(cfg, blockliveness, get!(lazydomtree))
+                visit_bb_phis!(ir, succ) do phiidx::Int
+                    push!(inconsistent, phiidx)
+                    push!(stmt_ip, phiidx)
+                end
+            end
+        end
+        sv.all_retpaths_consistent || break
+        append!(inconsistent, tpdum[idx])
+        append!(stmt_ip, tpdum[idx])
+    end
+end
+
+function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                ir::IRCode, result::InferenceResult)
+    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
+        return false
+    end
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
+
+    sv = PostOptAnalysisState(result, ir)
+    scanner = BBScanner(ir)
+
+    completed_scan = scan!(ScanStmt(sv), scanner, true)
+
+    if !completed_scan
+        if sv.all_retpaths_consistent
+            check_inconsistentcy!(sv, scanner)
+        else
+            # No longer any dataflow concerns, just scan the flags
+            scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+                scan_non_dataflow_flags!(inst, sv)
+                # bail out early if there are no possibilities to refine the effects
+                if !any_refinable(sv)
+                    return nothing
+                end
+                return true
+            end
+        end
+    end
+
+    return refine_effects!(interp, opt, sv)
+end
+
+# run the optimization work
+function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
+    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt)
+    ipo_dataflow_analysis!(interp, opt, ir, caller)
+    return finish(interp, opt, ir, caller)
+end
+
+macro pass(name, expr)
+    optimize_until = esc(:optimize_until)
+    stage = esc(:__stage__)
+    macrocall = :(@timeit $(esc(name)) $(esc(expr)))
+    macrocall.args[2] = __source__  # `@timeit` may want to use it
+    quote
+        $macrocall
+        matchpass($optimize_until, ($stage += 1), $(esc(name))) && $(esc(:(@goto __done__)))
+    end
+end
+
+matchpass(optimize_until::Int, stage, _) = optimize_until == stage
+matchpass(optimize_until::String, _, name) = optimize_until == name
+matchpass(::Nothing, _, _) = false
+
+function run_passes_ipo_safe(
+    ci::CodeInfo,
+    sv::OptimizationState,
+    optimize_until = nothing,  # run all passes by default
+)
+    __stage__ = 0  # used by @pass
+    # NOTE: The pass name MUST be unique for `optimize_until::String` to work
+    @pass "convert"   ir = convert_to_ircode(ci, sv)
+    @pass "slot2reg"  ir = slot2reg(ir, ci, sv)
+    # TODO: Domsorting can produce an updated domtree - no need to recompute here
+    @pass "compact 1" ir = compact!(ir)
+    @pass "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
+    # @timeit "verify 2" verify_ir(ir)
+    @pass "compact 2" ir = compact!(ir)
+    @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
+    @pass "ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
+    if made_changes
+        @pass "compact 3" ir = compact!(ir, true)
+    end
+    if is_asserts()
+        @timeit "verify 3" begin
+            verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp), sv.linfo)
+            verify_linetable(ir.debuginfo, length(ir.stmts))
+        end
+    end
+    @label __done__  # used by @pass
+    return ir
+end
+
+function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
+    # Remove `nothing`s at the end, we don't handle them well
+    # (we expect the last instruction to be a terminator)
+    codelocs = debuginfo.codelocs
+    for i = length(code):-1:1
+        if code[i] !== nothing
+            resize!(code, i)
+            resize!(ssavaluetypes, i)
+            resize!(codelocs, 3i)
+            resize!(info, i)
+            resize!(ssaflags, i)
+            break
+        end
+    end
+    # If the last instruction is not a terminator, add one. This can
+    # happen for implicit return on dead branches.
+    term = code[end]
+    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
+        push!(code, ReturnNode())
+        push!(ssavaluetypes, Union{})
+        push!(codelocs, 0, 0, 0)
+        push!(info, NoCallInfo())
+        push!(ssaflags, IR_FLAG_NOTHROW)
+
+        # Update CFG to include appended terminator
+        old_range = cfg.blocks[end].stmts
+        new_range = StmtRange(first(old_range), last(old_range) + 1)
+        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
+        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
+    end
+    nothing
+end
+
+function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
+    while true
+        next = getdebugidx(di, codeloc)
+        line = next[1]
+        line < 0 && return false # invalid info
+        line == 0 && next[2] == 0 && return false # no new info
+        prevloc <= 0 && return true # no old info
+        prev = getdebugidx(di, prevloc)
+        next === prev && return false # exactly identical
+        prevline = prev[1]
+        prevline < 0 && return true # previous invalid info, now valid
+        edge = next[2]
+        edge === prev[2] || return true # change to this edge
+        linetable = di.linetable
+        # check for change to line number here
+        if linetable === nothing || line == 0
+            line == prevline || return true
+        else
+            changed_lineinfo(linetable::DebugInfo, Int(line), Int(prevline)) && return true
+        end
+        # check for change to edge here
+        edge == 0 && return false # no edge here
+        di = di.edges[Int(edge)]::DebugInfo
+        codeloc = Int(next[3])
+        prevloc = Int(prev[3])
+    end
+end
+
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
+    # Update control-flow to reflect any unreachable branches.
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    ci.code = code = copy_exprargs(ci.code)
+    di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
+    codelocs = di.codelocs
+    ssaflags = ci.ssaflags
+    for i = 1:length(code)
+        expr = code[i]
+        if !(i in sv.unreachable)
+            if isa(expr, GotoIfNot)
+                # Replace this live GotoIfNot with:
+                # - no-op if :nothrow and the branch target is unreachable
+                # - cond if :nothrow and both targets are unreachable
+                # - typeassert if must-throw
+                block = block_for_inst(sv.cfg, i)
+                if ssavaluetypes[i] === Bottom
+                    destblock = block_for_inst(sv.cfg, expr.dest)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
+                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
+                elseif i + 1 in sv.unreachable
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    expr = GotoNode(expr.dest)
+                elseif expr.dest in sv.unreachable
+                    @assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
+                    expr = nothing
+                end
+                code[i] = expr
+            elseif isa(expr, EnterNode)
+                catchdest = expr.catch_dest
+                if catchdest in sv.unreachable
+                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
+                    if isdefined(expr, :scope)
+                        # We've proven that nothing inside the enter region throws,
+                        # but we don't yet know whether something might read the scope,
+                        # so we need to retain this enter for the time being. However,
+                        # we use the special marker `0` to indicate that setting up
+                        # the try/catch frame is not required.
+                        code[i] = EnterNode(expr, 0)
+                    else
+                        code[i] = nothing
+                    end
+                end
+            elseif isa(expr, PhiNode)
+                new_edges = Int32[]
+                new_vals = Any[]
+                for j = 1:length(expr.edges)
+                    edge = expr.edges[j]
+                    (edge in sv.unreachable || (ssavaluetypes[edge] === Union{} && !isa(code[edge], PhiNode))) && continue
+                    push!(new_edges, edge)
+                    if isassigned(expr.values, j)
+                        push!(new_vals, expr.values[j])
+                    else
+                        resize!(new_vals, length(new_edges))
+                    end
+                end
+                code[i] = PhiNode(new_edges, new_vals)
+            end
+        end
+    end
+
+    # Go through and add an unreachable node after every
+    # Union{} call. Then reindex labels.
+    stmtinfo = sv.stmt_info
+    meta = Expr[]
+    idx = 1
+    oldidx = 1
+    nstmts = length(code)
+    ssachangemap = labelchangemap = blockchangemap = nothing
+    prevloc = 0
+    while idx <= length(code)
+        if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
+            # insert a side-effect instruction before the current instruction in the same basic block
+            insert!(code, idx, Expr(:code_coverage_effect))
+            splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
+            insert!(ssavaluetypes, idx, Nothing)
+            insert!(stmtinfo, idx, NoCallInfo())
+            insert!(ssaflags, idx, IR_FLAG_NULL)
+            if ssachangemap === nothing
+                ssachangemap = fill(0, nstmts)
+            end
+            if labelchangemap === nothing
+                labelchangemap = fill(0, nstmts)
+            end
+            ssachangemap[oldidx] += 1
+            if oldidx < length(labelchangemap)
+                labelchangemap[oldidx + 1] += 1
+            end
+            if blockchangemap === nothing
+                blockchangemap = fill(0, length(sv.cfg.blocks))
+            end
+            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
+            idx += 1
+            prevloc = oldidx
+        end
+        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
+            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
+            @assert !isterminator(code[idx])
+
+            block = block_for_inst(sv.cfg, oldidx)
+            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
+
+            # Delete all successors to this basic block
+            for succ in sv.cfg.blocks[block].succs
+                preds = sv.cfg.blocks[succ].preds
+                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
+            end
+            empty!(sv.cfg.blocks[block].succs)
+
+            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
+                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
+                # by type inference (effectively deleting them). Only task left is to replace the block
+                # terminator with an explicit `unreachable` marker.
+
+                if block_end > idx
+                    if is_asserts()
+                        # Verify that type-inference did its job
+                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
+                            @assert i in sv.unreachable
+                        end
+                    end
+                    code[block_end] = ReturnNode()
+                    codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
+                    ssavaluetypes[block_end] = Union{}
+                    stmtinfo[block_end] = NoCallInfo()
+                    ssaflags[block_end] = IR_FLAG_NOTHROW
+                    idx += block_end - idx
+                else
+                    insert!(code, idx + 1, ReturnNode())
+                    splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
+                    insert!(ssavaluetypes, idx + 1, Union{})
+                    insert!(stmtinfo, idx + 1, NoCallInfo())
+                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
+                    if ssachangemap === nothing
+                        ssachangemap = fill(0, nstmts)
+                    end
+                    if labelchangemap === nothing
+                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
+                    end
+                    if oldidx < length(ssachangemap)
+                        ssachangemap[oldidx + 1] += 1
+                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
+                    end
+                    if blockchangemap === nothing
+                        blockchangemap = fill(0, length(sv.cfg.blocks))
+                    end
+                    blockchangemap[block] += 1
+                    idx += 1
+                end
+                oldidx = last(sv.cfg.blocks[block].stmts)
+            end
+        end
+        idx += 1
+        oldidx += 1
+    end
+    empty!(sv.unreachable)
+
+    if ssachangemap !== nothing && labelchangemap !== nothing
+        renumber_ir_elements!(code, ssachangemap, labelchangemap)
+    end
+    if blockchangemap !== nothing
+        renumber_cfg_stmts!(sv.cfg, blockchangemap)
+    end
+
+    for i = 1:length(code)
+        code[i] = process_meta!(meta, code[i])
+    end
+    strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
+    types = Any[]
+    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
+    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
+    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
+    # and eliminates slots (see below)
+    argtypes = sv.slottypes
+    return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes, WorldRange(ci.min_world, ci.max_world))
+end
+
+function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
+    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
+        push!(meta, stmt)
+        return nothing
+    end
+    return stmt
+end
+
+function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
+    # need `ci` for the slot metadata, IR for the code
+    svdef = sv.linfo.def
+    @timeit "domtree 1" domtree = construct_domtree(ir)
+    defuse_insts = scan_slot_def_use(Int(ci.nargs), ci, ir.stmts.stmt)
+    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
+    # NOTE now we have converted `ir` to the SSA form and eliminated slots
+    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
+    resize!(ir.argtypes, ci.nargs)
+    return ir
+end
+
+## Computing the cost of a function body
+
+# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
+plus_saturate(x::Int, y::Int) = max(x, y, x+y)
+
+# known return type
+isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
+
+function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                        params::OptimizationParams)
+    #=const=# UNKNOWN_CALL_COST = 20
+    head = ex.head
+    if is_meta_expr_head(head)
+        return 0
+    elseif head === :call
+        farg = ex.args[1]
+        ftyp = argextype(farg, src, sptypes)
+        if ftyp === IntrinsicFunction && farg isa SSAValue
+            # if this comes from code that was already inlined into another function,
+            # Consts have been widened. try to recover in simple cases.
+            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
+            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
+                ftyp = argextype(farg, src, sptypes)
+            end
+        end
+        f = singleton_type(ftyp)
+        if isa(f, IntrinsicFunction)
+            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+            if isassigned(T_IFUNC, iidx)
+                minarg, maxarg, = T_IFUNC[iidx]
+                nargs = length(ex.args)
+                if minarg + 1 <= nargs <= maxarg + 1
+                    # With mostly constant arguments, all Intrinsics tend to become very cheap
+                    # and are likely to combine with the operations around them,
+                    # so reduce their cost by half.
+                    cost = T_IFUNC_COST[iidx]
+                    if cost == 0 || nargs < 3 ||
+                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
+                        return cost
+                    end
+                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
+                    nconst = Int(aty2 isa Const)
+                    for i = 3:nargs
+                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
+                        if widenconst(aty) != widenconst(aty2)
+                            nconst = 0
+                            break
+                        end
+                        nconst += aty isa Const
+                    end
+                    if nconst + 2 >= nargs
+                        cost = (cost - 1) ÷ 2
+                    end
+                    return cost
+                end
+            end
+            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
+            return UNKNOWN_CALL_COST
+        end
+        if isa(f, Builtin) && f !== invoke
+            # The efficiency of operations like a[i] and s.b
+            # depend strongly on whether the result can be
+            # inferred, so check the type of ex
+            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
+                # we might like to penalize non-inferrability, but
+                # tuple iteration/destructuring makes that impossible
+                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
+                return 0
+            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
+            elseif f === Core.memoryrefset! && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
+            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
+                return 1
+            end
+            fidx = find_tfunc(f)
+            if fidx === nothing
+                # unknown/unhandled builtin
+                # Use the generic cost of a direct function call
+                return UNKNOWN_CALL_COST
+            end
+            return T_FFUNC_COST[fidx]
+        end
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
+        if extyp === Union{}
+            return 0
+        end
+        return params.inline_nonleaf_penalty
+    elseif head === :foreigncall
+        foreigncall = ex.args[1]
+        if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
+            return 1
+        end
+        return 20
+    elseif head === :invoke || head === :invoke_modify
+        # Calls whose "return type" is Union{} do not actually return:
+        # they are errors. Since these are not part of the typical
+        # run-time of the function, we omit them from
+        # consideration. This way, non-inlined error branches do not
+        # prevent inlining.
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
+        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
+    elseif head === :(=)
+        return statement_cost(ex.args[2], -1, src, sptypes, params)
+    elseif head === :copyast
+        return 100
+    end
+    return 0
+end
+
+function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                                  params::OptimizationParams)
+    thiscost = 0
+    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
+    if stmt isa Expr
+        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
+    elseif stmt isa GotoNode
+        # loops are generally always expensive
+        # but assume that forward jumps are already counted for from
+        # summing the cost of the not-taken branch
+        thiscost = dst(stmt.label) < line ? 40 : 0
+    elseif stmt isa GotoIfNot
+        thiscost = dst(stmt.dest) < line ? 40 : 0
+    elseif stmt isa EnterNode
+        # try/catch is a couple function calls,
+        # but don't inline functions with try/catch
+        # since these aren't usually performance-sensitive functions,
+        # and llvm is more likely to miscompile them when these functions get large
+        thiscost = typemax(Int)
+    end
+    return thiscost
+end
+
+function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
+    bodycost = 0
+    for i = 1:length(ir.stmts)
+        stmt = ir[SSAValue(i)][:stmt]
+        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
+        bodycost = plus_saturate(bodycost, thiscost)
+        if bodycost > cost_threshold
+            return MAX_INLINE_COST
+        end
+    end
+    return inline_cost_clamp(bodycost)
+end
+
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
+    maxcost = 0
+    for line = 1:length(body)
+        stmt = body[line]
+        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
+                                            params)
+        cost[line] = thiscost
+        if thiscost > maxcost
+            maxcost = thiscost
+        end
+    end
+    return maxcost
+end
+
+function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
+    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
+end
+
+function cumsum_ssamap!(ssachangemap::Vector{Int})
+    any_change = false
+    rel_change = 0
+    for i = 1:length(ssachangemap)
+        val = ssachangemap[i]
+        any_change |= val ≠ 0
+        rel_change += val
+        if val == -1
+            # Keep a marker that this statement was deleted
+            ssachangemap[i] = typemin(Int)
+        else
+            ssachangemap[i] = rel_change
+        end
+    end
+    return any_change
+end
+
+function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
+    any_change = cumsum_ssamap!(labelchangemap)
+    if ssachangemap !== labelchangemap
+        any_change |= cumsum_ssamap!(ssachangemap)
+    end
+    any_change || return
+    for i = 1:length(body)
+        el = body[i]
+        if isa(el, GotoNode)
+            body[i] = GotoNode(el.label + labelchangemap[el.label])
+        elseif isa(el, GotoIfNot)
+            cond = el.cond
+            if isa(cond, SSAValue)
+                cond = SSAValue(cond.id + ssachangemap[cond.id])
+            end
+            was_deleted = labelchangemap[el.dest] == typemin(Int)
+            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
+        elseif isa(el, ReturnNode)
+            if isdefined(el, :val)
+                val = el.val
+                if isa(val, SSAValue)
+                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
+                end
+            end
+        elseif isa(el, SSAValue)
+            body[i] = SSAValue(el.id + ssachangemap[el.id])
+        elseif isa(el, PhiNode)
+            i = 1
+            edges = el.edges
+            values = el.values
+            while i <= length(edges)
+                was_deleted = ssachangemap[edges[i]] == typemin(Int)
+                if was_deleted
+                    deleteat!(edges, i)
+                    deleteat!(values, i)
+                else
+                    edges[i] += ssachangemap[edges[i]]
+                    val = values[i]
+                    if isa(val, SSAValue)
+                        values[i] = SSAValue(val.id + ssachangemap[val.id])
+                    end
+                    i += 1
+                end
+            end
+        elseif isa(el, EnterNode)
+            tgt = el.catch_dest
+            if tgt != 0
+                was_deleted = labelchangemap[tgt] == typemin(Int)
+                if was_deleted
+                    @assert !isdefined(el, :scope)
+                    body[i] = nothing
+                else
+                    if isdefined(el, :scope) && isa(el.scope, SSAValue)
+                        body[i] = EnterNode(tgt + labelchangemap[tgt], SSAValue(el.scope.id + ssachangemap[el.scope.id]))
+                    else
+                        body[i] = EnterNode(el, tgt + labelchangemap[tgt])
+                    end
+                end
+            end
+        elseif isa(el, Expr)
+            if el.head === :(=) && el.args[2] isa Expr
+                el = el.args[2]::Expr
+            end
+            if !is_meta_expr_head(el.head)
+                args = el.args
+                for i = 1:length(args)
+                    el = args[i]
+                    if isa(el, SSAValue)
+                        args[i] = SSAValue(el.id + ssachangemap[el.id])
+                    end
+                end
+            end
+        end
+    end
+end
+
+function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
+    cumsum_ssamap!(blockchangemap) || return
+    for i = 1:length(cfg.blocks)
+        old_range = cfg.blocks[i].stmts
+        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
+                              last(old_range) + blockchangemap[i])
+        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
+        if i <= length(cfg.index)
+            cfg.index[i] = cfg.index[i] + blockchangemap[i]
+        end
+    end
+end
diff --git a/Compiler/src/reflection_interface.jl b/Compiler/src/reflection_interface.jl
new file mode 100644
index 0000000000000..3fc182685e598
--- /dev/null
+++ b/Compiler/src/reflection_interface.jl
@@ -0,0 +1,58 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+_findall_matches(interp::AbstractInterpreter, @nospecialize(tt)) = findall(tt, method_table(interp))
+_default_interp(world::UInt) = NativeInterpreter(world)
+
+_may_throw_methoderror(matches::MethodLookupResult) =
+    matches.ambig || !any(match::Core.MethodMatch->match.fully_covers, matches.matches)
+
+function _infer_exception_type(interp::AbstractInterpreter, @nospecialize(tt), optimize::Bool)
+    matches = _findall_matches(interp, tt)
+    matches === nothing && return nothing
+    exct = Union{}
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        exct = MethodError
+    end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = typeinf_frame(interp, match, #=run_optimizer=#optimize)
+        frame === nothing && return Any
+        exct = tmerge(exct, widenconst(frame.result.exc_result))
+    end
+    return exct
+end
+
+function _infer_effects(interp::AbstractInterpreter, @nospecialize(tt), optimize::Bool)
+    matches = _findall_matches(interp, tt)
+    matches === nothing && return nothing
+    effects = EFFECTS_TOTAL
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        effects = Effects(effects; nothrow=false)
+    end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = typeinf_frame(interp, match, #=run_optimizer=#optimize)
+        frame === nothing && return Effects()
+        effects = merge_effects(effects, frame.result.ipo_effects)
+    end
+    return effects
+end
+
+function statement_costs!(interp::AbstractInterpreter, cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, match::Core.MethodMatch)
+    params = OptimizationParams(interp)
+    sptypes = VarState[VarState(sp, false) for sp in match.sparams]
+    return statement_costs!(cost, body, src, sptypes, params)
+end
+
+function findsup_mt(@nospecialize(tt), world, method_table)
+    if method_table === nothing
+        table = InternalMethodTable(world)
+    elseif method_table isa Core.MethodTable
+        table = OverlayMethodTable(world, method_table)
+    else
+        table = method_table
+    end
+    return findsup(tt, table)
+end
diff --git a/base/compiler/sort.jl b/Compiler/src/sort.jl
similarity index 97%
rename from base/compiler/sort.jl
rename to Compiler/src/sort.jl
index 71d2f8a51cd59..6c8571f6198e6 100644
--- a/base/compiler/sort.jl
+++ b/Compiler/src/sort.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # reference on sorted binary search:
-#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+#   https://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
 
 # index of the first value of vector a that is greater than or equal to x;
 # returns lastindex(v)+1 if x is greater than all values in v.
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/Compiler/src/ssair/EscapeAnalysis.jl
similarity index 55%
rename from base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
rename to Compiler/src/ssair/EscapeAnalysis.jl
index 2469507fd3699..af8e9b1a4959e 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/Compiler/src/ssair/EscapeAnalysis.jl
@@ -10,59 +10,55 @@ export
     has_thrown_escape,
     has_all_escape
 
-const _TOP_MOD = ccall(:jl_base_relative_to, Any, (Any,), EscapeAnalysis)::Module
+using Base: Base
 
 # imports
-import ._TOP_MOD: ==, getindex, setindex!
+import Base: ==, copy, getindex, setindex!
 # usings
-import Core:
-    MethodInstance, Const, Argument, SSAValue, PiNode, PhiNode, UpsilonNode, PhiCNode,
-    ReturnNode, GotoNode, GotoIfNot, SimpleVector, MethodMatch, CodeInstance,
-    sizeof, ifelse, arrayset, arrayref, arraysize
-import ._TOP_MOD:     # Base definitions
-    @__MODULE__, @eval, @assert, @specialize, @nospecialize, @inbounds, @inline, @noinline,
-    @label, @goto, !, !==, !=, ≠, +, -, *, ≤, <, ≥, >, &, |, <<, error, missing, copy,
-    Vector, BitSet, IdDict, IdSet, UnitRange, Csize_t, Callable, ∪, ⊆, ∩, :, ∈, ∉, =>,
-    in, length, get, first, last, haskey, keys, get!, isempty, isassigned,
-    pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall,
-    ismutabletype
-import Core.Compiler: # Core.Compiler specific definitions
-    Bottom, InferenceResult, IRCode, IR_FLAG_NOTHROW, SimpleInferenceLattice,
-    isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type,
-    fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑,
-    intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck,
-    setfield!_nothrow, alloc_array_ndims, stmt_effect_free, check_effect_free!
-
-include(x) = _TOP_MOD.include(@__MODULE__, x)
-if _TOP_MOD === Core.Compiler
-    include("compiler/ssair/EscapeAnalysis/disjoint_set.jl")
-else
-    include("disjoint_set.jl")
-end
+using Core: Builtin, IntrinsicFunction, SimpleVector, ifelse, sizeof
+using Core.IR
+using Base:       # Base definitions
+    @__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline,
+    @nospecialize, @specialize, BitSet, IdDict, IdSet, UnitRange, Vector,
+    delete!, empty!, enumerate, first, get, get!, hasintersect, haskey, isassigned,
+    isempty, length, max, min, missing, println, push!, pushfirst!,
+    !, !==, &, *, +, -, :, <, <<, >, |, ∈, ∉, ∩, ∪, ≠, ≤, ≥, ⊆
+using ..Compiler: # Compiler specific definitions
+    AbstractLattice, Compiler, IRCode, IR_FLAG_NOTHROW,
+    argextype, fieldcount_noerror, has_flag, intrinsic_nothrow, is_meta_expr_head,
+    is_identity_free_argtype, isexpr, setfield!_nothrow, singleton_type, try_compute_field,
+    try_compute_fieldidx, widenconst
+
+function include(x::String)
+    if !isdefined(Base, :end_base_include)
+        # During bootstrap, all includes are relative to `base/`
+        x = ccall(:jl_prepend_string, Ref{String}, (Any, Any), "ssair/", x)
+    end
+    Compiler.include(@__MODULE__, x)
+end
+
+include("disjoint_set.jl")
 
 const AInfo = IdSet{Any}
-const LivenessSet = BitSet
-const 𝕃ₒ = SimpleInferenceLattice.instance
 
 """
     x::EscapeInfo
 
 A lattice for escape information, which holds the following properties:
-- `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
+- `x.Analyzed::Bool`: not formally part of the lattice, only indicates whether `x` has been analyzed
 - `x.ReturnEscape::Bool`: indicates `x` can escape to the caller via return
 - `x.ThrownEscape::BitSet`: records SSA statement numbers where `x` can be thrown as exception:
   * `isempty(x.ThrownEscape)`: `x` will never be thrown in this call frame (the bottom)
   * `pc ∈ x.ThrownEscape`: `x` may be thrown at the SSA statement at `pc`
   * `-1 ∈ x.ThrownEscape`: `x` may be thrown at arbitrary points of this call frame (the top)
   This information will be used by `escape_exception!` to propagate potential escapes via exception.
-- `x.AliasInfo::Union{Bool,IndexableFields,IndexableElements,Unindexable}`: maintains all possible values
+- `x.AliasInfo::Union{Bool,IndexableFields,Unindexable}`: maintains all possible values
   that can be aliased to fields or array elements of `x`:
   * `x.AliasInfo === false` indicates the fields/elements of `x` aren't analyzed yet
   * `x.AliasInfo === true` indicates the fields/elements of `x` can't be analyzed,
     e.g. the type of `x` is not known or is not concrete and thus its fields/elements
     can't be known precisely
   * `x.AliasInfo::IndexableFields` records all the possible values that can be aliased to fields of object `x` with precise index information
-  * `x.AliasInfo::IndexableElements` records all the possible values that can be aliased to elements of array `x` with precise index information
   * `x.AliasInfo::Unindexable` records all the possible values that can be aliased to fields/elements of `x` without precise index information
 - `x.Liveness::BitSet`: records SSA statement numbers where `x` should be live, e.g.
   to be used as a call argument, to be returned to a caller, or preserved for `:foreigncall`:
@@ -88,78 +84,74 @@ An abstract state will be initialized with the bottom(-like) elements:
 struct EscapeInfo
     Analyzed::Bool
     ReturnEscape::Bool
-    ThrownEscape::LivenessSet
-    AliasInfo #::Union{IndexableFields,IndexableElements,Unindexable,Bool}
-    Liveness::LivenessSet
+    ThrownEscape::BitSet
+    AliasInfo #::Union{IndexableFields,Unindexable,Bool}
+    Liveness::BitSet
 
     function EscapeInfo(
         Analyzed::Bool,
         ReturnEscape::Bool,
-        ThrownEscape::LivenessSet,
-        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=#,
-        Liveness::LivenessSet,
-        )
+        ThrownEscape::BitSet,
+        AliasInfo#=::Union{IndexableFields,Unindexable,Bool}=#,
+        Liveness::BitSet)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
     function EscapeInfo(
         x::EscapeInfo,
         # non-concrete fields should be passed as default arguments
         # in order to avoid allocating non-concrete `NamedTuple`s
-        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=# = x.AliasInfo;
+        AliasInfo#=::Union{IndexableFields,Unindexable,Bool}=# = x.AliasInfo;
         Analyzed::Bool = x.Analyzed,
         ReturnEscape::Bool = x.ReturnEscape,
-        ThrownEscape::LivenessSet = x.ThrownEscape,
-        Liveness::LivenessSet = x.Liveness,
-        )
+        ThrownEscape::BitSet = x.ThrownEscape,
+        Liveness::BitSet = x.Liveness)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
 end
 
 # precomputed default values in order to eliminate computations at each callsite
 
-const BOT_THROWN_ESCAPE = LivenessSet()
+const BOT_THROWN_ESCAPE = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_THROWN_ESCAPE = LivenessSet(-1)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_THROWN_ESCAPE = BitSet(-1)
 
-const BOT_LIVENESS = LivenessSet()
+const BOT_LIVENESS = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_LIVENESS = LivenessSet(-1:0)
-const ARG_LIVENESS = LivenessSet(0)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_LIVENESS = BitSet(-1:0)
+const ARG_LIVENESS = BitSet(0)
 
 # the constructors
 NotAnalyzed() = EscapeInfo(false, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS) # not formally part of the lattice
 NoEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS)
 ArgEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, true, ARG_LIVENESS)
-ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, LivenessSet(pc))
+ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, BitSet(pc))
 AllReturnEscape() = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, TOP_LIVENESS)
-ThrownEscape(pc::Int) = EscapeInfo(true, false, LivenessSet(pc), false, BOT_LIVENESS)
+ThrownEscape(pc::Int) = EscapeInfo(true, false, BitSet(pc), false, BOT_LIVENESS)
 AllEscape() = EscapeInfo(true, true, TOP_THROWN_ESCAPE, true, TOP_LIVENESS)
 
 const ⊥, ⊤ = NotAnalyzed(), AllEscape()
 
 # Convenience names for some ⊑ₑ queries
 has_no_escape(x::EscapeInfo) = !x.ReturnEscape && isempty(x.ThrownEscape) && 0 ∉ x.Liveness
-has_arg_escape(x::EscapeInfo) = 0 in x.Liveness
+has_arg_escape(x::EscapeInfo) = 0 ∈ x.Liveness
 has_return_escape(x::EscapeInfo) = x.ReturnEscape
-has_return_escape(x::EscapeInfo, pc::Int) = x.ReturnEscape && (-1 ∈ x.Liveness || pc in x.Liveness)
+has_return_escape(x::EscapeInfo, pc::Int) = x.ReturnEscape && (-1 ∈ x.Liveness || pc ∈ x.Liveness)
 has_thrown_escape(x::EscapeInfo) = !isempty(x.ThrownEscape)
-has_thrown_escape(x::EscapeInfo, pc::Int) = -1 ∈ x.ThrownEscape  || pc in x.ThrownEscape
+has_thrown_escape(x::EscapeInfo, pc::Int) = -1 ∈ x.ThrownEscape || pc ∈ x.ThrownEscape
 has_all_escape(x::EscapeInfo) = ⊤ ⊑ₑ x
 
 # utility lattice constructors
@@ -172,14 +164,13 @@ ignore_liveness(x::EscapeInfo) = EscapeInfo(x; Liveness=BOT_LIVENESS)
 struct IndexableFields
     infos::Vector{AInfo}
 end
-struct IndexableElements
-    infos::IdDict{Int,AInfo}
-end
 struct Unindexable
     info::AInfo
 end
 IndexableFields(nflds::Int) = IndexableFields(AInfo[AInfo() for _ in 1:nflds])
 Unindexable() = Unindexable(AInfo())
+copy(AliasInfo::IndexableFields) = IndexableFields(AInfo[copy(info) for info in AliasInfo.infos])
+copy(AliasInfo::Unindexable) = Unindexable(copy(AliasInfo.info))
 
 merge_to_unindexable(AliasInfo::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.infos))
 merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
@@ -190,15 +181,6 @@ function merge_to_unindexable(info::AInfo, infos::Vector{AInfo})
     end
     return info
 end
-merge_to_unindexable(AliasInfo::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.infos))
-merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
-merge_to_unindexable(infos::IdDict{Int,AInfo}) = merge_to_unindexable(AInfo(), infos)
-function merge_to_unindexable(info::AInfo, infos::IdDict{Int,AInfo})
-    for idx in keys(infos)
-        info = info ∪ infos[idx]
-    end
-    return info
-end
 
 # we need to make sure this `==` operator corresponds to lattice equality rather than object equality,
 # otherwise `propagate_changes` can't detect the convergence
@@ -221,9 +203,6 @@ x::EscapeInfo == y::EscapeInfo = begin
     elseif isa(xa, IndexableFields)
         isa(ya, IndexableFields) || return false
         xa.infos == ya.infos || return false
-    elseif isa(xa, IndexableElements)
-        isa(ya, IndexableElements) || return false
-        xa.infos == ya.infos || return false
     else
         xa = xa::Unindexable
         isa(ya, Unindexable) || return false
@@ -275,8 +254,6 @@ x::EscapeInfo ⊑ₑ y::EscapeInfo = begin
             for i in 1:xn
                 xinfos[i] ⊆ yinfos[i] || return false
             end
-        elseif isa(ya, IndexableElements)
-            return false
         elseif isa(ya, Unindexable)
             xinfos, yinfo = xa.infos, ya.info
             for i = length(xinfos)
@@ -285,23 +262,6 @@ x::EscapeInfo ⊑ₑ y::EscapeInfo = begin
         else
             ya === true || return false
         end
-    elseif isa(xa, IndexableElements)
-        if isa(ya, IndexableElements)
-            xinfos, yinfos = xa.infos, ya.infos
-            keys(xinfos) ⊆ keys(yinfos) || return false
-            for idx in keys(xinfos)
-                xinfos[idx] ⊆ yinfos[idx] || return false
-            end
-        elseif isa(ya, IndexableFields)
-            return false
-        elseif isa(ya, Unindexable)
-            xinfos, yinfo = xa.infos, ya.info
-            for idx in keys(xinfos)
-                xinfos[idx] ⊆ yinfo || return false
-            end
-        else
-            ya === true || return false
-        end
     else
         xa = xa::Unindexable
         if isa(ya, Unindexable)
@@ -407,33 +367,10 @@ function merge_alias_info(@nospecialize(xa), @nospecialize(ya))
         else
             return true # handle conflicting case conservatively
         end
-    elseif isa(xa, IndexableElements)
-        if isa(ya, IndexableElements)
-            xinfos, yinfos = xa.infos, ya.infos
-            infos = IdDict{Int,AInfo}()
-            for idx in keys(xinfos)
-                if !haskey(yinfos, idx)
-                    infos[idx] = xinfos[idx]
-                else
-                    infos[idx] = xinfos[idx] ∪ yinfos[idx]
-                end
-            end
-            for idx in keys(yinfos)
-                haskey(xinfos, idx) && continue # unioned already
-                infos[idx] = yinfos[idx]
-            end
-            return IndexableElements(infos)
-        elseif isa(ya, Unindexable)
-            return merge_to_unindexable(ya, xa)
-        else
-            return true # handle conflicting case conservatively
-        end
     else
         xa = xa::Unindexable
         if isa(ya, IndexableFields)
             return merge_to_unindexable(xa, ya)
-        elseif isa(ya, IndexableElements)
-            return merge_to_unindexable(xa, ya)
         else
             ya = ya::Unindexable
             xinfo, yinfo = xa.info, ya.info
@@ -445,8 +382,6 @@ end
 
 const AliasSet = IntDisjointSet{Int}
 
-const ArrayInfo = IdDict{Int,Vector{Int}}
-
 """
     estate::EscapeState
 
@@ -457,13 +392,12 @@ struct EscapeState
     escapes::Vector{EscapeInfo}
     aliasset::AliasSet
     nargs::Int
-    arrayinfo::Union{Nothing,ArrayInfo}
 end
-function EscapeState(nargs::Int, nstmts::Int, arrayinfo::Union{Nothing,ArrayInfo})
+function EscapeState(nargs::Int, nstmts::Int)
     escapes = EscapeInfo[
         1 ≤ i ≤ nargs ? ArgEscape() : ⊥ for i in 1:(nargs+nstmts)]
     aliasset = AliasSet(nargs+nstmts)
-    return EscapeState(escapes, aliasset, nargs, arrayinfo)
+    return EscapeState(escapes, aliasset, nargs)
 end
 function getindex(estate::EscapeState, @nospecialize(x))
     xidx = iridx(x, estate)
@@ -509,8 +443,7 @@ that is analyzable in the context of `estate`.
 `iridx(irval(xidx, state), state) === xidx`.
 """
 function irval(xidx::Int, estate::EscapeState)
-    x = xidx > estate.nargs ? SSAValue(xidx-estate.nargs) : Argument(xidx)
-    return x
+    return xidx > estate.nargs ? SSAValue(xidx-estate.nargs) : Argument(xidx)
 end
 
 function getaliases(x::Union{Argument,SSAValue}, estate::EscapeState)
@@ -543,14 +476,14 @@ isaliased(xidx::Int, yidx::Int, estate::EscapeState) =
     in_same_set(estate.aliasset, xidx, yidx)
 
 struct ArgEscapeInfo
-    EscapeBits::UInt8
+    escape_bits::UInt8
 end
 function ArgEscapeInfo(x::EscapeInfo)
     x === ⊤ && return ArgEscapeInfo(ARG_ALL_ESCAPE)
-    EscapeBits = 0x00
-    has_return_escape(x) && (EscapeBits |= ARG_RETURN_ESCAPE)
-    has_thrown_escape(x) && (EscapeBits |= ARG_THROWN_ESCAPE)
-    return ArgEscapeInfo(EscapeBits)
+    escape_bits = 0x00
+    has_return_escape(x) && (escape_bits |= ARG_RETURN_ESCAPE)
+    has_thrown_escape(x) && (escape_bits |= ARG_THROWN_ESCAPE)
+    return ArgEscapeInfo(escape_bits)
 end
 
 const ARG_ALL_ESCAPE    = 0x01 << 0
@@ -558,9 +491,9 @@ const ARG_RETURN_ESCAPE = 0x01 << 1
 const ARG_THROWN_ESCAPE = 0x01 << 2
 
 has_no_escape(x::ArgEscapeInfo)     = !has_all_escape(x) && !has_return_escape(x) && !has_thrown_escape(x)
-has_all_escape(x::ArgEscapeInfo)    = x.EscapeBits & ARG_ALL_ESCAPE    ≠ 0
-has_return_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_RETURN_ESCAPE ≠ 0
-has_thrown_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_THROWN_ESCAPE ≠ 0
+has_all_escape(x::ArgEscapeInfo)    = x.escape_bits & ARG_ALL_ESCAPE    ≠ 0
+has_return_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_RETURN_ESCAPE ≠ 0
+has_thrown_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_THROWN_ESCAPE ≠ 0
 
 struct ArgAliasing
     aidx::Int
@@ -570,46 +503,22 @@ end
 struct ArgEscapeCache
     argescapes::Vector{ArgEscapeInfo}
     argaliases::Vector{ArgAliasing}
-end
-
-function ArgEscapeCache(estate::EscapeState)
-    nargs = estate.nargs
-    argescapes = Vector{ArgEscapeInfo}(undef, nargs)
-    argaliases = ArgAliasing[]
-    for i = 1:nargs
-        info = estate.escapes[i]
-        @assert info.AliasInfo === true
-        argescapes[i] = ArgEscapeInfo(info)
-        for j = (i+1):nargs
-            if isaliased(i, j, estate)
-                push!(argaliases, ArgAliasing(i, j))
+    function ArgEscapeCache(estate::EscapeState)
+        nargs = estate.nargs
+        argescapes = Vector{ArgEscapeInfo}(undef, nargs)
+        argaliases = ArgAliasing[]
+        for i = 1:nargs
+            info = estate.escapes[i]
+            @assert info.AliasInfo === true
+            argescapes[i] = ArgEscapeInfo(info)
+            for j = (i+1):nargs
+                if isaliased(i, j, estate)
+                    push!(argaliases, ArgAliasing(i, j))
+                end
             end
         end
+        return new(argescapes, argaliases)
     end
-    return ArgEscapeCache(argescapes, argaliases)
-end
-
-"""
-    is_ipo_profitable(ir::IRCode, nargs::Int) -> Bool
-
-Heuristically checks if there is any profitability to run the escape analysis on `ir`
-and generate IPO escape information cache. Specifically, this function examines
-if any call argument is "interesting" in terms of their escapability.
-"""
-function is_ipo_profitable(ir::IRCode, nargs::Int)
-    for i = 1:nargs
-        t = unwrap_unionall(widenconst(ir.argtypes[i]))
-        t <: IO && return false # bail out IO-related functions
-        is_ipo_profitable_type(t) && return true
-    end
-    return false
-end
-function is_ipo_profitable_type(@nospecialize t)
-    if isa(t, Union)
-        return is_ipo_profitable_type(t.a) && is_ipo_profitable_type(t.b)
-    end
-    (t === String || t === Symbol || t === Module || t === SimpleVector) && return false
-    return ismutabletype(t)
 end
 
 abstract type Change end
@@ -631,68 +540,47 @@ struct LivenessChange <: Change
 end
 const Changes = Vector{Change}
 
-struct AnalysisState{T<:Callable}
+struct AnalysisState{GetEscapeCache, Lattice<:AbstractLattice}
     ir::IRCode
     estate::EscapeState
     changes::Changes
-    get_escape_cache::T
-end
-
-function getinst(ir::IRCode, idx::Int)
-    nstmts = length(ir.stmts)
-    if idx ≤ nstmts
-        return ir.stmts[idx]
-    else
-        return ir.new_nodes.stmts[idx - nstmts]
-    end
+    𝕃ₒ::Lattice
+    get_escape_cache::GetEscapeCache
 end
 
 """
-    analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::Callable)
-        -> estate::EscapeState
+    analyze_escapes(ir::IRCode, nargs::Int, get_escape_cache) -> estate::EscapeState
 
 Analyzes escape information in `ir`:
 - `nargs`: the number of actual arguments of the analyzed call
-- `call_resolved`: if interprocedural calls are already resolved by `ssa_inlining_pass!`
-- `get_escape_cache(::Union{InferenceResult,MethodInstance}) -> Union{Nothing,ArgEscapeCache}`:
+- `get_escape_cache(::MethodInstance) -> Union{Bool,ArgEscapeCache}`:
   retrieves cached argument escape information
 """
-function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::T) where T<:Callable
+function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_escape_cache)
     stmts = ir.stmts
     nstmts = length(stmts) + length(ir.new_nodes.stmts)
 
-    tryregions, arrayinfo, callinfo = compute_frameinfo(ir, call_resolved)
-    estate = EscapeState(nargs, nstmts, arrayinfo)
+    tryregions = compute_frameinfo(ir)
+    estate = EscapeState(nargs, nstmts)
     changes = Changes() # keeps changes that happen at current statement
-    astate = AnalysisState(ir, estate, changes, get_escape_cache)
+    astate = AnalysisState(ir, estate, changes, 𝕃ₒ, get_escape_cache)
 
     local debug_itr_counter = 0
     while true
         local anyupdate = false
 
         for pc in nstmts:-1:1
-            stmt = getinst(ir, pc)[:inst]
+            stmt = ir[SSAValue(pc)][:stmt]
 
             # collect escape information
             if isa(stmt, Expr)
                 head = stmt.head
                 if head === :call
-                    if callinfo !== nothing
-                        escape_call!(astate, pc, stmt.args, callinfo)
-                    else
-                        escape_call!(astate, pc, stmt.args)
-                    end
+                    escape_call!(astate, pc, stmt.args)
                 elseif head === :invoke
                     escape_invoke!(astate, pc, stmt.args)
                 elseif head === :new || head === :splatnew
                     escape_new!(astate, pc, stmt.args)
-                elseif head === :(=)
-                    lhs, rhs = stmt.args
-                    if isa(lhs, GlobalRef) # global store
-                        add_escape_change!(astate, rhs, ⊤)
-                    else
-                        unexpected_assignment!(ir, pc)
-                    end
                 elseif head === :foreigncall
                     escape_foreigncall!(astate, pc, stmt.args)
                 elseif head === :throw_undef_if_not # XXX when is this expression inserted ?
@@ -700,20 +588,25 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
                 elseif is_meta_expr_head(head)
                     # meta expressions doesn't account for any usages
                     continue
-                elseif head === :enter || head === :leave || head === :the_exception || head === :pop_exception
+                elseif head === :leave || head === :the_exception || head === :pop_exception
                     # ignore these expressions since escapes via exceptions are handled by `escape_exception!`
                     # `escape_exception!` conservatively propagates `AllEscape` anyway,
                     # and so escape information imposed on `:the_exception` isn't computed
                     continue
+                elseif head === :gc_preserve_begin
+                    # GC preserve is handled by `escape_gc_preserve!`
+                elseif head === :gc_preserve_end
+                    escape_gc_preserve!(astate, pc, stmt.args)
                 elseif head === :static_parameter ||  # this exists statically, not interested in its escape
-                       head === :copyast ||           # XXX can this account for some escapes?
-                       head === :isdefined ||         # just returns `Bool`, nothing accounts for any escapes
-                       head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
-                       head === :gc_preserve_end      # `GC.@preserve` expressions themselves won't be used anywhere
+                       head === :copyast ||           # XXX escape something?
+                       head === :isdefined            # just returns `Bool`, nothing accounts for any escapes
                     continue
                 else
                     add_conservative_changes!(astate, pc, stmt.args)
                 end
+            elseif isa(stmt, EnterNode)
+                # Handled via escape_exception!
+                continue
             elseif isa(stmt, ReturnNode)
                 if isdefined(stmt, :val)
                     add_escape_change!(astate, stmt.val, ReturnEscape(pc))
@@ -758,121 +651,29 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
 end
 
 """
-    compute_frameinfo(ir::IRCode, call_resolved::Bool) -> (tryregions, arrayinfo, callinfo)
-
-A preparatory linear scan before the escape analysis on `ir` to find:
-- `tryregions::Union{Nothing,Vector{UnitRange{Int}}}`: regions in which potential `throw`s can be caught (used by `escape_exception!`)
-- `arrayinfo::Union{Nothing,IdDict{Int,Vector{Int}}}`: array allocations whose dimensions are known precisely (with some very simple local analysis)
-- `callinfo::`: when `!call_resolved`, `compute_frameinfo` additionally returns `callinfo::Vector{Union{MethodInstance,InferenceResult}}`,
-  which contains information about statically resolved callsites.
-  The inliner will use essentially equivalent interprocedural information to inline callees as well as resolve static callsites,
-  this additional information won't be required when analyzing post-inlining IR.
-
-!!! note
-    This array dimension analysis to compute `arrayinfo` is very local and doesn't account
-    for flow-sensitivity nor complex aliasing.
-    Ideally this dimension analysis should be done as a part of type inference that
-    propagates array dimensions in a flow sensitive way.
+    compute_frameinfo(ir::IRCode) -> tryregions
+
+A preparatory linear scan before the escape analysis on `ir` to find
+`tryregions::Union{Nothing,Vector{UnitRange{Int}}}`, that represent regions in which
+potential `throw`s can be caught (used by `escape_exception!`)
 """
-function compute_frameinfo(ir::IRCode, call_resolved::Bool)
+function compute_frameinfo(ir::IRCode)
     nstmts, nnewnodes = length(ir.stmts), length(ir.new_nodes.stmts)
-    tryregions, arrayinfo = nothing, nothing
-    if !call_resolved
-        callinfo = Vector{Any}(undef, nstmts+nnewnodes)
-    else
-        callinfo = nothing
-    end
+    tryregions = nothing
     for idx in 1:nstmts+nnewnodes
-        inst = getinst(ir, idx)
-        stmt = inst[:inst]
-        if !call_resolved
-            # TODO don't call `check_effect_free!` in the inlinear
-            check_effect_free!(ir, idx, stmt, inst[:type], 𝕃ₒ)
-        end
-        if callinfo !== nothing && isexpr(stmt, :call)
-            # TODO: pass effects here
-            callinfo[idx] = resolve_call(ir, stmt, inst[:info])
-        elseif isexpr(stmt, :enter)
-            @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
-            tryregions === nothing && (tryregions = UnitRange{Int}[])
-            leave_block = stmt.args[1]::Int
-            leave_pc = first(ir.cfg.blocks[leave_block].stmts)
-            push!(tryregions, idx:leave_pc)
-        elseif isexpr(stmt, :foreigncall)
-            args = stmt.args
-            name = args[1]
-            nn = normalize(name)
-            isa(nn, Symbol) || @goto next_stmt
-            ndims = alloc_array_ndims(nn)
-            ndims === nothing && @goto next_stmt
-            if ndims ≠ 0
-                length(args) ≥ ndims+6 || @goto next_stmt
-                dims = Int[]
-                for i in 1:ndims
-                    dim = argextype(args[i+6], ir)
-                    isa(dim, Const) || @goto next_stmt
-                    dim = dim.val
-                    isa(dim, Int) || @goto next_stmt
-                    push!(dims, dim)
-                end
-            else
-                length(args) ≥ 7 || @goto next_stmt
-                dims = argextype(args[7], ir)
-                if isa(dims, Const)
-                    dims = dims.val
-                    isa(dims, Tuple{Vararg{Int}}) || @goto next_stmt
-                    dims = collect(Int, dims)
-                else
-                    dims === Tuple{} || @goto next_stmt
-                    dims = Int[]
-                end
-            end
-            if arrayinfo === nothing
-                arrayinfo = ArrayInfo()
-            end
-            arrayinfo[idx] = dims
-        elseif arrayinfo !== nothing
-            # TODO this super limited alias analysis is able to handle only very simple cases
-            # this should be replaced with a proper forward dimension analysis
-            if isa(stmt, PhiNode)
-                values = stmt.values
-                local dims = nothing
-                for i = 1:length(values)
-                    if isassigned(values, i)
-                        val = values[i]
-                        if isa(val, SSAValue) && haskey(arrayinfo, val.id)
-                            if dims === nothing
-                                dims = arrayinfo[val.id]
-                                continue
-                            elseif dims == arrayinfo[val.id]
-                                continue
-                            end
-                        end
-                    end
-                    @goto next_stmt
-                end
-                if dims !== nothing
-                    arrayinfo[idx] = dims
-                end
-            elseif isa(stmt, PiNode)
-                if isdefined(stmt, :val)
-                    val = stmt.val
-                    if isa(val, SSAValue) && haskey(arrayinfo, val.id)
-                        arrayinfo[idx] = arrayinfo[val.id]
-                    end
-                end
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if isa(stmt, EnterNode)
+            leave_block = stmt.catch_dest
+            if leave_block ≠ 0
+                @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
+                tryregions === nothing && (tryregions = UnitRange{Int}[])
+                leave_pc = first(ir.cfg.blocks[leave_block].stmts)
+                push!(tryregions, idx:leave_pc)
             end
         end
-        @label next_stmt
     end
-    return tryregions, arrayinfo, callinfo
-end
-
-# define resolve_call
-if _TOP_MOD === Core.Compiler
-    include("compiler/ssair/EscapeAnalysis/interprocedural.jl")
-else
-    include("interprocedural.jl")
+    return tryregions
 end
 
 # propagate changes, and check convergence
@@ -920,13 +721,13 @@ end
     return false
 end
 
-# propagate Liveness changes separately in order to avoid constructing too many LivenessSet
+# propagate Liveness changes separately in order to avoid constructing too many BitSet
 @inline function propagate_liveness_change!(estate::EscapeState, change::LivenessChange)
     (; xidx, livepc) = change
     info = estate.escapes[xidx]
     Liveness = info.Liveness
     Liveness === TOP_LIVENESS && return false
-    livepc in Liveness && return false
+    livepc ∈ Liveness && return false
     if Liveness === BOT_LIVENESS || Liveness === ARG_LIVENESS
         # if this Liveness is a constant, we shouldn't modify it and propagate this change as a new EscapeInfo
         Liveness = copy(Liveness)
@@ -958,7 +759,7 @@ function add_escape_change!(astate::AnalysisState, @nospecialize(x), xinfo::Esca
     xinfo === ⊥ && return nothing # performance optimization
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if force || !isbitstype(widenconst(argextype(x, astate.ir)))
+        if force || !is_identity_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, EscapeChange(xidx, xinfo))
         end
     end
@@ -968,7 +769,7 @@ end
 function add_liveness_change!(astate::AnalysisState, @nospecialize(x), livepc::Int)
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if !isbitstype(widenconst(argextype(x, astate.ir)))
+        if !is_identity_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, LivenessChange(xidx, livepc))
         end
     end
@@ -1074,25 +875,7 @@ function escape_unanalyzable_obj!(astate::AnalysisState, @nospecialize(obj), obj
     return objinfo
 end
 
-@noinline function unexpected_assignment!(ir::IRCode, pc::Int)
-    @eval Main (ir = $ir; pc = $pc)
-    error("unexpected assignment found: inspect `Main.pc` and `Main.pc`")
-end
-
-is_nothrow(ir::IRCode, pc::Int) = getinst(ir, pc)[:flag] & IR_FLAG_NOTHROW ≠ 0
-
-# NOTE if we don't maintain the alias set that is separated from the lattice state, we can do
-# something like below: it essentially incorporates forward escape propagation in our default
-# backward propagation, and leads to inefficient convergence that requires more iterations
-# # lhs = rhs: propagate escape information of `rhs` to `lhs`
-# function escape_alias!(astate::AnalysisState, @nospecialize(lhs), @nospecialize(rhs))
-#     if isa(rhs, SSAValue) || isa(rhs, Argument)
-#         vinfo = astate.estate[rhs]
-#     else
-#         return
-#     end
-#     add_escape_change!(astate, lhs, vinfo)
-# end
+is_nothrow(ir::IRCode, pc::Int) = has_flag(ir[SSAValue(pc)], IR_FLAG_NOTHROW)
 
 """
     escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{Int}})
@@ -1140,6 +923,7 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
     # NOTE if `:the_exception` is the only way to access the exception, we can do:
     # exc = SSAValue(pc)
     # excinfo = estate[exc]
+    # TODO? set up a special effect bit that checks the existence of `rethrow` and `current_exceptions` and use it here
     excinfo = ⊤
     escapes = estate.escapes
     for i in 1:length(escapes)
@@ -1148,7 +932,7 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
         xt === TOP_THROWN_ESCAPE && @goto propagate_exception_escape # fast pass
         for pc in xt
             for region in tryregions
-                pc in region && @goto propagate_exception_escape # early break because of AllEscape
+                pc ∈ region && @goto propagate_exception_escape # early break because of AllEscape
             end
         end
         continue
@@ -1159,25 +943,40 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
 end
 
 # escape statically-resolved call, i.e. `Expr(:invoke, ::MethodInstance, ...)`
-escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    escape_invoke!(astate, pc, args, first(args)::MethodInstance, 2)
-
-function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
-    linfo::Linfo, first_idx::Int, last_idx::Int = length(args))
-    if isa(linfo, InferenceResult)
-        cache = astate.get_escape_cache(linfo)
-        linfo = linfo.linfo
+function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    codeinst = first(args)
+    if codeinst isa MethodInstance
+        mi = codeinst
     else
-        cache = astate.get_escape_cache(linfo)
-    end
-    if cache === nothing
-        return add_conservative_changes!(astate, pc, args, 2)
-    else
-        cache = cache::ArgEscapeCache
+        mi = (codeinst::CodeInstance).def
     end
+    first_idx, last_idx = 2, length(args)
+    add_liveness_changes!(astate, pc, args, first_idx, last_idx)
+    # TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available
+    cache = astate.get_escape_cache(codeinst)
     ret = SSAValue(pc)
+    if cache isa Bool
+        if cache
+            # This method call is very simple and has good effects, so there's no need to
+            # escape its arguments. However, since the arguments might be returned, we need
+            # to consider the possibility of aliasing between them and the return value.
+            for argidx = first_idx:last_idx
+                arg = args[argidx]
+                if arg isa GlobalRef
+                    continue # :effect_free guarantees that nothings escapes to the global scope
+                end
+                if !is_identity_free_argtype(argextype(arg, astate.ir))
+                    add_alias_change!(astate, ret, arg)
+                end
+            end
+            return nothing
+        else
+            return add_conservative_changes!(astate, pc, args, 2)
+        end
+    end
+    cache = cache::ArgEscapeCache
     retinfo = astate.estate[ret] # escape information imposed on the call statement
-    method = linfo.def::Method
+    method = mi.def::Method
     nargs = Int(method.nargs)
     for (i, argidx) in enumerate(first_idx:last_idx)
         arg = args[argidx]
@@ -1186,46 +985,40 @@ function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
             # COMBAK will this be invalid once we take alias information into account?
             i = nargs
         end
-        arginfo = cache.argescapes[i]
-        info = from_interprocedural(arginfo, pc)
-        if has_return_escape(arginfo)
-            # if this argument can be "returned", in addition to propagating
-            # the escape information imposed on this call argument within the callee,
-            # we should also account for possible aliasing of this argument and the returned value
-            add_escape_change!(astate, arg, info)
+        argescape = cache.argescapes[i]
+        info = from_interprocedural(argescape, pc)
+        # propagate the escape information imposed on this call argument by the callee
+        add_escape_change!(astate, arg, info)
+        if has_return_escape(argescape)
+            # if this argument can be "returned", we should also account for possible
+            # aliasing between this argument and the returned value
             add_alias_change!(astate, ret, arg)
-        else
-            # if this is simply passed as the call argument, we can just propagate
-            # the escape information imposed on this call argument within the callee
-            add_escape_change!(astate, arg, info)
         end
     end
     for (; aidx, bidx) in cache.argaliases
-        add_alias_change!(astate, args[aidx-(first_idx-1)], args[bidx-(first_idx-1)])
+        add_alias_change!(astate, args[aidx+(first_idx-1)], args[bidx+(first_idx-1)])
     end
     # we should disable the alias analysis on this newly introduced object
     add_escape_change!(astate, ret, EscapeInfo(retinfo, true))
 end
 
 """
-    from_interprocedural(arginfo::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
+    from_interprocedural(argescape::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
 
-Reinterprets the escape information imposed on the call argument which is cached as `arginfo`
+Reinterprets the escape information imposed on the call argument which is cached as `argescape`
 in the context of the caller frame, where `pc` is the SSA statement number of the return value.
 """
-function from_interprocedural(arginfo::ArgEscapeInfo, pc::Int)
-    has_all_escape(arginfo) && return ⊤
-
-    ThrownEscape = has_thrown_escape(arginfo) ? LivenessSet(pc) : BOT_THROWN_ESCAPE
-
-    return EscapeInfo(
-        #=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape,
-        # FIXME implement interprocedural memory effect-analysis
-        # currently, this essentially disables the entire field analysis
-        # it might be okay from the SROA point of view, since we can't remove the allocation
-        # as far as it's passed to a callee anyway, but still we may want some field analysis
-        # for e.g. stack allocation or some other IPO optimizations
-        #=AliasInfo=#true, #=Liveness=#LivenessSet(pc))
+function from_interprocedural(argescape::ArgEscapeInfo, pc::Int)
+    has_all_escape(argescape) && return ⊤
+    ThrownEscape = has_thrown_escape(argescape) ? BitSet(pc) : BOT_THROWN_ESCAPE
+    # TODO implement interprocedural memory effect-analysis:
+    # currently, this essentially disables the entire field analysis–it might be okay from
+    # the SROA point of view, since we can't remove the allocation as far as it's passed to
+    # a callee anyway, but still we may want some field analysis for e.g. stack allocation
+    # or some other IPO optimizations
+    AliasInfo = true
+    Liveness = BitSet(pc)
+    return EscapeInfo(#=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape, AliasInfo, Liveness)
 end
 
 # escape every argument `(args[6:length(args[3])])` and the name `args[1]`
@@ -1240,25 +1033,6 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
     argtypes = args[3]::SimpleVector
     nargs = length(argtypes)
     name = args[1]
-    nn = normalize(name)
-    if isa(nn, Symbol)
-        boundserror_ninds = array_resize_info(nn)
-        if boundserror_ninds !== nothing
-            boundserror, ninds = boundserror_ninds
-            escape_array_resize!(boundserror, ninds, astate, pc, args)
-            return
-        end
-        if is_array_copy(nn)
-            escape_array_copy!(astate, pc, args)
-            return
-        elseif is_array_isassigned(nn)
-            escape_array_isassigned!(astate, pc, args)
-            return
-        end
-        # if nn === :jl_gc_add_finalizer_th
-        #     # TODO add `FinalizerEscape` ?
-        # end
-    end
     # NOTE array allocations might have been proven as nothrow (https://github.com/JuliaLang/julia/pull/43565)
     nothrow = is_nothrow(astate.ir, pc)
     name_info = nothrow ? ⊥ : ThrownEscape(pc)
@@ -1282,79 +1056,57 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
     end
 end
 
-normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x
-
-function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}, callinfo::Vector{Any})
-    info = callinfo[pc]
-    if isa(info, Bool)
-        info && return # known to be no escape
-        # now cascade to the builtin handling
-        escape_call!(astate, pc, args)
-        return
-    elseif isa(info, EACallInfo)
-        for linfo in info.linfos
-            escape_invoke!(astate, pc, args, linfo, 1)
-        end
-        # accounts for a potential escape via MethodError
-        info.nothrow || add_thrown_escapes!(astate, pc, args)
-        return
-    else
-        @assert info === missing
-        # if this call couldn't be analyzed, escape it conservatively
-        add_conservative_changes!(astate, pc, args)
-    end
+function escape_gc_preserve!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    @assert length(args) == 1 "invalid :gc_preserve_end"
+    val = args[1]
+    @assert val isa SSAValue "invalid :gc_preserve_end"
+    beginstmt = astate.ir[val][:stmt]
+    @assert isexpr(beginstmt, :gc_preserve_begin) "invalid :gc_preserve_end"
+    beginargs = beginstmt.args
+    # COMBAK we might need to add liveness for all statements from `:gc_preserve_begin` to `:gc_preserve_end`
+    add_liveness_changes!(astate, pc, beginargs)
 end
 
 function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    ir = astate.ir
-    ft = argextype(first(args), ir, ir.sptypes, ir.argtypes)
+    ft = argextype(first(args), astate.ir)
     f = singleton_type(ft)
-    if isa(f, Core.IntrinsicFunction)
-        # XXX somehow `:call` expression can creep in here, ideally we should be able to do:
-        # argtypes = Any[argextype(args[i], astate.ir) for i = 2:length(args)]
-        argtypes = Any[]
-        for i = 2:length(args)
-            arg = args[i]
-            push!(argtypes, isexpr(arg, :call) ? Any : argextype(arg, ir))
-        end
-        if intrinsic_nothrow(f, argtypes)
+    if f isa IntrinsicFunction
+        if is_nothrow(astate.ir, pc)
             add_liveness_changes!(astate, pc, args, 2)
         else
             add_fallback_changes!(astate, pc, args, 2)
         end
-        return # TODO accounts for pointer operations?
-    end
-    result = escape_builtin!(f, astate, pc, args)
-    if result === missing
-        # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
-        add_conservative_changes!(astate, pc, args)
-        return
-    elseif result === true
-        add_liveness_changes!(astate, pc, args, 2)
-        return # ThrownEscape is already checked
-    else
-        # we escape statements with the `ThrownEscape` property using the effect-freeness
-        # computed by `stmt_effect_flags` invoked within inlining
-        # TODO throwness ≠ "effect-free-ness"
-        if is_nothrow(astate.ir, pc)
+        # TODO needs to account for pointer operations?
+    elseif f isa Builtin
+        result = escape_builtin!(f, astate, pc, args)
+        if result === missing
+            # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
+            add_conservative_changes!(astate, pc, args)
+        elseif result === true
+            add_liveness_changes!(astate, pc, args, 2)
+        elseif is_nothrow(astate.ir, pc)
             add_liveness_changes!(astate, pc, args, 2)
         else
             add_fallback_changes!(astate, pc, args, 2)
         end
-        return
+    else
+        # escape this generic function or unknown function call conservatively
+        add_conservative_changes!(astate, pc, args)
     end
 end
 
-escape_builtin!(@nospecialize(f), _...) = return missing
+escape_builtin!(@nospecialize(f), _...) = missing
 
 # safe builtins
-escape_builtin!(::typeof(isa), _...) = return false
-escape_builtin!(::typeof(typeof), _...) = return false
-escape_builtin!(::typeof(sizeof), _...) = return false
-escape_builtin!(::typeof(===), _...) = return false
+escape_builtin!(::typeof(isa), _...) = false
+escape_builtin!(::typeof(typeof), _...) = false
+escape_builtin!(::typeof(sizeof), _...) = false
+escape_builtin!(::typeof(===), _...) = false
+escape_builtin!(::typeof(Core.donotdelete), _...) = false
 # not really safe, but `ThrownEscape` will be imposed later
-escape_builtin!(::typeof(isdefined), _...) = return false
-escape_builtin!(::typeof(throw), _...) = return false
+escape_builtin!(::typeof(isdefined), _...) = false
+escape_builtin!(::typeof(throw), _...) = false
+escape_builtin!(::typeof(Core.throw_methoderror), _...) = false
 
 function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) == 4 || return false
@@ -1400,6 +1152,7 @@ function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
             @goto escape_indexable_def
         end
     elseif isa(AliasInfo, IndexableFields)
+        AliasInfo = copy(AliasInfo)
         @label escape_indexable_def
         # fields are known precisely: propagate escape information imposed on recorded possibilities to the exact field values
         infos = AliasInfo.infos
@@ -1416,6 +1169,7 @@ function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
         end
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
     elseif isa(AliasInfo, Unindexable)
+        AliasInfo = copy(AliasInfo)
         @label escape_unindexable_def
         # fields are known partially: propagate escape information imposed on recorded possibilities to all fields values
         info = AliasInfo.info
@@ -1469,7 +1223,7 @@ function analyze_fields(ir::IRCode, @nospecialize(typ), @nospecialize(fld))
     return IndexableFields(nflds), fidx
 end
 
-function reanalyze_fields(ir::IRCode, AliasInfo::IndexableFields, @nospecialize(typ), @nospecialize(fld))
+function reanalyze_fields(AliasInfo::IndexableFields, ir::IRCode, @nospecialize(typ), @nospecialize(fld))
     nflds = fieldcount_noerror(typ)
     if nflds === nothing
         return merge_to_unindexable(AliasInfo), 0
@@ -1483,6 +1237,7 @@ function reanalyze_fields(ir::IRCode, AliasInfo::IndexableFields, @nospecialize(
     if fidx === nothing
         return merge_to_unindexable(AliasInfo), 0
     end
+    AliasInfo = copy(AliasInfo)
     infos = AliasInfo.infos
     ninfos = length(infos)
     if nflds > ninfos
@@ -1519,12 +1274,13 @@ function escape_builtin!(::typeof(getfield), astate::AnalysisState, pc::Int, arg
             @goto record_unindexable_use
         end
     elseif isa(AliasInfo, IndexableFields)
-        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        AliasInfo, fidx = reanalyze_fields(AliasInfo, ir, typ, args[3])
         isa(AliasInfo, Unindexable) && @goto record_unindexable_use
         @label record_indexable_use
         push!(AliasInfo.infos[fidx], LocalUse(pc))
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
     elseif isa(AliasInfo, Unindexable)
+        AliasInfo = copy(AliasInfo)
         @label record_unindexable_use
         push!(AliasInfo.info, LocalUse(pc))
         add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
@@ -1565,7 +1321,7 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
         end
     elseif isa(AliasInfo, IndexableFields)
         typ = widenconst(argextype(obj, ir))
-        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        AliasInfo, fidx = reanalyze_fields(AliasInfo, ir, typ, args[3])
         isa(AliasInfo, Unindexable) && @goto escape_unindexable_def
         @label escape_indexable_def
         add_alias_escapes!(astate, val, AliasInfo.infos[fidx])
@@ -1575,7 +1331,7 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
         # propagate the escape information of this object ignoring field information
         add_escape_change!(astate, val, ignore_aliasinfo(objinfo))
     elseif isa(AliasInfo, Unindexable)
-        info = AliasInfo.info
+        AliasInfo = copy(AliasInfo)
         @label escape_unindexable_def
         add_alias_escapes!(astate, val, AliasInfo.info)
         push!(AliasInfo.info, LocalDef(pc))
@@ -1597,10 +1353,10 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     add_escape_change!(astate, val, ssainfo)
     # compute the throwness of this setfield! call here since builtin_nothrow doesn't account for that
     @label add_thrown_escapes
-    if length(args) == 4 && setfield!_nothrow(𝕃ₒ,
+    if length(args) == 4 && setfield!_nothrow(astate.𝕃ₒ,
         argextype(args[2], ir), argextype(args[3], ir), argextype(args[4], ir))
         return true
-    elseif length(args) == 3 && setfield!_nothrow(𝕃ₒ,
+    elseif length(args) == 3 && setfield!_nothrow(astate.𝕃ₒ,
         argextype(args[2], ir), argextype(args[3], ir))
         return true
     else
@@ -1609,314 +1365,12 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     end
 end
 
-function escape_builtin!(::typeof(arrayref), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 4 || return false
-    # check potential thrown escapes from this arrayref call
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    if !array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 3)
-        add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and thus don't know what values
-    # can be referenced here: directly propagate the escape information imposed on the return
-    # value of this `arrayref` call to the array itself as the most conservative propagation
-    # but also with updated index information
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
-        # unanalyzable object, so the return value is also unanalyzable
-        add_escape_change!(astate, SSAValue(pc), ⊤)
-        return true
-    end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[4:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto record_indexable_use
-        end
-        AliasInfo = Unindexable()
-        @goto record_unindexable_use
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[4:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto record_unindexable_use
-        end
-        @label record_indexable_use
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        push!(info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-    elseif isa(AliasInfo, Unindexable)
-        @label record_unindexable_use
-        push!(AliasInfo.info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-    else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        # at the extreme case, an element of `ary` may point to `ary` itself
-        # so add the alias change here as the most conservative propagation
-        add_alias_change!(astate, ary, SSAValue(pc))
-    end
-    return true
-end
-
-function escape_builtin!(::typeof(arrayset), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 5 || return false
-    # check potential escapes from this arrayset call
-    # NOTE here we essentially only need to account for TypeError, assuming that
-    # UndefRefError or BoundsError don't capture any of the arguments here
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    valt = argtypes[3]
-    if !(array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 4) &&
-         arrayset_typecheck(aryt, valt))
-        add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    val = args[4]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and won't record what value
-    # is being assigned here: directly propagate the escape information of this array to
-    # the value being assigned as the most conservative propagation
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
-        # unanalyzable object (e.g. obj::GlobalRef): escape field value conservatively
-        add_escape_change!(astate, val, ⊤)
-        return true
+function escape_builtin!(::typeof(Core.finalizer), astate::AnalysisState, pc::Int, args::Vector{Any})
+    if length(args) ≥ 3
+        obj = args[3]
+        add_liveness_change!(astate, obj, pc) # TODO setup a proper FinalizerEscape?
     end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[5:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto escape_indexable_def
-        end
-        AliasInfo = Unindexable()
-        @goto escape_unindexable_def
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[5:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto escape_unindexable_def
-        end
-        @label escape_indexable_def
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        add_alias_escapes!(astate, val, info)
-        push!(info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    elseif isa(AliasInfo, Unindexable)
-        @label escape_unindexable_def
-        add_alias_escapes!(astate, val, AliasInfo.info)
-        push!(AliasInfo.info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        add_alias_change!(astate, val, ary)
-    end
-    # also propagate escape information imposed on the return value of this `arrayset`
-    ssainfo = estate[SSAValue(pc)]
-    add_escape_change!(astate, ary, ssainfo)
-    return true
-end
-
-# NOTE this function models and thus should be synced with the implementation of:
-# size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs, ...)
-function array_nd_index(astate::AnalysisState, @nospecialize(ary), args::Vector{Any}, nidxs::Int = length(args))
-    isa(ary, SSAValue) || return nothing
-    aryid = ary.id
-    arrayinfo = astate.estate.arrayinfo
-    isa(arrayinfo, ArrayInfo) || return nothing
-    haskey(arrayinfo, aryid) || return nothing
-    dims = arrayinfo[aryid]
-    local i = 0
-    local k, stride = 0, 1
-    local nd = length(dims)
-    while k < nidxs
-        arg = args[k+1]
-        argval = argextype(arg, astate.ir)
-        isa(argval, Const) || return nothing
-        argval = argval.val
-        isa(argval, Int) || return nothing
-        ii = argval - 1
-        i += ii * stride
-        d = k ≥ nd ? 1 : dims[k+1]
-        k < nidxs - 1 && ii ≥ d && return nothing # BoundsError
-        stride *= d
-        k += 1
-    end
-    while k < nd
-        stride *= dims[k+1]
-        k += 1
-    end
-    i ≥ stride && return nothing # BoundsError
-    return i
-end
-
-function escape_builtin!(::typeof(arraysize), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) == 3 || return false
-    ary = args[2]
-    dim = args[3]
-    if !arraysize_typecheck(ary, dim, astate.ir)
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-        add_escape_change!(astate, dim, ThrownEscape(pc))
-    end
-    # NOTE we may still see "arraysize: dimension out of range", but it doesn't capture anything
-    return true
-end
-
-function arraysize_typecheck(@nospecialize(ary), @nospecialize(dim), ir::IRCode)
-    aryt = argextype(ary, ir)
-    aryt ⊑ Array || return false
-    dimt = argextype(dim, ir)
-    dimt ⊑ Int || return false
-    return true
-end
-
-# returns nothing if this isn't array resizing operation,
-# otherwise returns true if it can throw BoundsError and false if not
-function array_resize_info(name::Symbol)
-    if name === :jl_array_grow_beg || name === :jl_array_grow_end
-        return false, 1
-    elseif name === :jl_array_del_beg || name === :jl_array_del_end
-        return true, 1
-    elseif name === :jl_array_grow_at || name === :jl_array_del_at
-        return true, 2
-    else
-        return nothing
-    end
-end
-
-# NOTE may potentially throw "cannot resize array with shared data" error,
-# but just ignore it since it doesn't capture anything
-function escape_array_resize!(boundserror::Bool, ninds::Int,
-    astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 6+ninds || return add_fallback_changes!(astate, pc, args)
-    ary = args[6]
-    aryt = argextype(ary, astate.ir)
-    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
-    for i in 1:ninds
-        ind = args[i+6]
-        indt = argextype(ind, astate.ir)
-        indt ⊑ Integer || return add_fallback_changes!(astate, pc, args)
-    end
-    if boundserror
-        # this array resizing can potentially throw `BoundsError`, impose it now
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-    end
-    # give up indexing analysis whenever we see array resizing
-    # (since we track array dimensions only globally)
-    mark_unindexable!(astate, ary)
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-function mark_unindexable!(astate::AnalysisState, @nospecialize(ary))
-    isa(ary, SSAValue) || return
-    aryinfo = astate.estate[ary]
-    AliasInfo = aryinfo.AliasInfo
-    isa(AliasInfo, IndexableElements) || return
-    AliasInfo = merge_to_unindexable(AliasInfo)
-    add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo))
-end
-
-is_array_copy(name::Symbol) = name === :jl_array_copy
-
-# FIXME this implementation is very conservative, improve the accuracy and solve broken test cases
-function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 6 || return add_fallback_changes!(astate, pc, args)
-    ary = args[6]
-    aryt = argextype(ary, astate.ir)
-    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        newary = SSAValue(pc)
-        aryinfo = astate.estate[ary]
-        newaryinfo = astate.estate[newary]
-        add_escape_change!(astate, newary, aryinfo)
-        add_escape_change!(astate, ary, newaryinfo)
-    end
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-is_array_isassigned(name::Symbol) = name === :jl_array_isassigned
-
-function escape_array_isassigned!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    if !array_isassigned_nothrow(args, astate.ir)
-        add_thrown_escapes!(astate, pc, args)
-    end
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-function array_isassigned_nothrow(args::Vector{Any}, src::IRCode)
-    # if !validate_foreigncall_args(args,
-    #     :jl_array_isassigned, Cint, svec(Any,Csize_t), 0, :ccall)
-    #     return false
-    # end
-    length(args) ≥ 7 || return false
-    arytype = argextype(args[6], src)
-    arytype ⊑ Array || return false
-    idxtype = argextype(args[7], src)
-    idxtype ⊑ Csize_t || return false
-    return true
-end
-
-# # COMBAK do we want to enable this (and also backport this to Base for array allocations?)
-# import Core.Compiler: Cint, svec
-# function validate_foreigncall_args(args::Vector{Any},
-#     name::Symbol, @nospecialize(rt), argtypes::SimpleVector, nreq::Int, convention::Symbol)
-#     length(args) ≥ 5 || return false
-#     normalize(args[1]) === name || return false
-#     args[2] === rt || return false
-#     args[3] === argtypes || return false
-#     args[4] === vararg || return false
-#     normalize(args[5]) === convention || return false
-#     return true
-# end
-
-if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-escape_builtin!(::typeof(arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(mutating_arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(arraythaw), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(ImmutableArray, astate, args)
-function is_safe_immutable_array_op(@nospecialize(arytype), astate::AnalysisState, args::Vector{Any})
-    length(args) == 2 || return false
-    argextype(args[2], astate.ir) ⊑ arytype || return false
-    return true
-end
-
-end # if isdefined(Core, :ImmutableArray)
-
-if _TOP_MOD !== Core.Compiler
-    # NOTE define fancy package utilities when developing EA as an external package
-    include("EAUtils.jl")
-    using .EAUtils
-    export code_escapes, @code_escapes, __clear_cache!
+    return false
 end
 
 end # baremodule EscapeAnalysis
diff --git a/base/compiler/ssair/basicblock.jl b/Compiler/src/ssair/basicblock.jl
similarity index 100%
rename from base/compiler/ssair/basicblock.jl
rename to Compiler/src/ssair/basicblock.jl
diff --git a/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl b/Compiler/src/ssair/disjoint_set.jl
similarity index 91%
rename from base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
rename to Compiler/src/ssair/disjoint_set.jl
index 915bc214d7c3c..e000d7e8a582f 100644
--- a/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
+++ b/Compiler/src/ssair/disjoint_set.jl
@@ -3,14 +3,9 @@
 # under the MIT license: https://github.com/JuliaCollections/DataStructures.jl/blob/master/License.md
 
 # imports
-import ._TOP_MOD:
-    length,
-    eltype,
-    union!,
-    push!
+import Base: length, eltype, union!, push!
 # usings
-import ._TOP_MOD:
-    OneTo, collect, zero, zeros, one, typemax
+using Base: OneTo, collect, zero, zeros, one, typemax
 
 # Disjoint-Set
 
@@ -27,7 +22,8 @@ import ._TOP_MOD:
 #
 ############################################################
 
-_intdisjointset_bounds_err_msg(T) = "the maximum number of elements in IntDisjointSet{$T} is $(typemax(T))"
+_intdisjointset_bounds_err_msg(@nospecialize T) =
+    "the maximum number of elements in IntDisjointSet{$T} is $(typemax(T))"
 
 """
     IntDisjointSet{T<:Integer}(n::Integer)
@@ -59,7 +55,7 @@ eltype(::Type{IntDisjointSet{T}}) where {T<:Integer} = T
 # path compression is implemented here
 function find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
     p = parents[x]
-    @inbounds if parents[p] != p
+    @inbounds if parents[p] ≠ p
         parents[x] = p = _find_root_impl!(parents, p)
     end
     return p
@@ -68,7 +64,7 @@ end
 # unsafe version of the above
 function _find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
     @inbounds p = parents[x]
-    @inbounds if parents[p] != p
+    @inbounds if parents[p] ≠ p
         parents[x] = p = _find_root_impl!(parents, p)
     end
     return p
@@ -99,7 +95,7 @@ function union!(s::IntDisjointSet{T}, x::T, y::T) where {T<:Integer}
     parents = s.parents
     xroot = find_root_impl!(parents, x)
     yroot = find_root_impl!(parents, y)
-    return xroot != yroot ? root_union!(s, xroot, yroot) : xroot
+    return xroot ≠ yroot ? root_union!(s, xroot, yroot) : xroot
 end
 
 """
diff --git a/base/compiler/ssair/domtree.jl b/Compiler/src/ssair/domtree.jl
similarity index 96%
rename from base/compiler/ssair/domtree.jl
rename to Compiler/src/ssair/domtree.jl
index 1edb8d2d5c6d4..f6a30cdee4f17 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/Compiler/src/ssair/domtree.jl
@@ -82,6 +82,8 @@ struct DFSTree
     # (preorder number -> preorder number)
     # Storing it this way saves a few lookups in the snca_compress! algorithm
     to_parent_pre::Vector{PreNumber}
+
+    _worklist::Vector{Tuple{BBNumber, PreNumber, Bool}}
 end
 
 function DFSTree(n_blocks::Int)
@@ -89,14 +91,16 @@ function DFSTree(n_blocks::Int)
                    Vector{BBNumber}(undef, n_blocks),
                    zeros(PostNumber, n_blocks),
                    Vector{BBNumber}(undef, n_blocks),
-                   zeros(PreNumber, n_blocks))
+                   zeros(PreNumber, n_blocks),
+                   Vector{Tuple{BBNumber, PreNumber, Bool}}())
 end
 
 copy(D::DFSTree) = DFSTree(copy(D.to_pre),
                            copy(D.from_pre),
                            copy(D.to_post),
                            copy(D.from_post),
-                           copy(D.to_parent_pre))
+                           copy(D.to_parent_pre),
+                           copy(D._worklist))
 
 function copy!(dst::DFSTree, src::DFSTree)
     copy!(dst.to_pre, src.to_pre)
@@ -106,17 +110,26 @@ function copy!(dst::DFSTree, src::DFSTree)
     copy!(dst.to_parent_pre, src.to_parent_pre)
     return dst
 end
+function resize!(D::DFSTree, n::Integer)
+    resize!(D.to_pre, n)
+    resize!(D.from_pre, n)
+    resize!(D.to_post, n)
+    resize!(D.from_post, n)
+    resize!(D.to_parent_pre, n)
+end
 
 length(D::DFSTree) = length(D.from_pre)
 
 function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
-    copy!(D, DFSTree(length(blocks)))
+    resize!(D, length(blocks))
+    fill!(D.to_pre, 0)
+    to_visit = D._worklist # always starts empty
     if is_post_dominator
         # TODO: We're using -1 as the virtual exit node here. Would it make
         #       sense to actually have a real BB for the exit always?
-        to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)]
+        push!(to_visit, (-1, 0, false))
     else
-        to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
+        push!(to_visit, (1, 0, false))
     end
     pre_num = is_post_dominator ? 0 : 1
     post_num = 1
@@ -189,7 +202,7 @@ DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0)
 """
 Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
 there are three separate length `n` arrays, `label`, `semi` and `ancestor`.
-Instead, for efficiency, we use one array in a array-of-structs style setup.
+Instead, for efficiency, we use one array in an array-of-structs style setup.
 """
 struct SNCAData
     semi::PreNumber
@@ -332,10 +345,7 @@ function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, m
     ancestors = copy(D.to_parent_pre)
     relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
     for w::PreNumber in reverse(relevant_blocks)
-        # LLVM initializes this to the parent, the paper initializes this to
-        # `w`, but it doesn't really matter (the parent is a predecessor, so at
-        # worst we'll discover it below). Save a memory reference here.
-        semi_w = typemax(PreNumber)
+        semi_w = ancestors[w]
         last_linked = PreNumber(w + 1)
         for v ∈ dom_edges(domtree, blocks, D.from_pre[w])
             # For the purpose of the domtree, ignore virtual predecessors into
@@ -596,7 +606,7 @@ dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) =
     _dominates(domtree, bb1, bb2)
 
 """
-    postdominates(domtree::DomTree, bb1::Int, bb2::Int) -> Bool
+    postdominates(domtree::PostDomTree, bb1::Int, bb2::Int) -> Bool
 
 Checks if `bb1` post-dominates `bb2`.
 `bb1` and `bb2` are indexes into the `CFG` blocks.
@@ -647,6 +657,8 @@ end
 Compute the nearest common (post-)dominator of `a` and `b`.
 """
 function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
+    a == 0 && return a
+    b == 0 && return b
     alevel = domtree.nodes[a].level
     blevel = domtree.nodes[b].level
     # W.l.g. assume blevel <= alevel
diff --git a/base/compiler/ssair/heap.jl b/Compiler/src/ssair/heap.jl
similarity index 99%
rename from base/compiler/ssair/heap.jl
rename to Compiler/src/ssair/heap.jl
index 6e9883bc4ec60..1afb4eb5b2ffc 100644
--- a/base/compiler/ssair/heap.jl
+++ b/Compiler/src/ssair/heap.jl
@@ -3,13 +3,11 @@
 # Heap operations on flat vectors
 # -------------------------------
 
-
 # Binary heap indexing
 heapleft(i::Integer) = 2i
 heapright(i::Integer) = 2i + 1
 heapparent(i::Integer) = div(i, 2)
 
-
 # Binary min-heap percolate down.
 function percolate_down!(xs::Vector, i::Integer, x, o::Ordering, len::Integer=length(xs))
     @inbounds while (l = heapleft(i)) <= len
@@ -60,7 +58,6 @@ function heappush!(xs::Vector, x, o::Ordering)
     return xs
 end
 
-
 """
     heapify!(v, ord::Ordering)
 
diff --git a/base/compiler/ssair/inlining.jl b/Compiler/src/ssair/inlining.jl
similarity index 64%
rename from base/compiler/ssair/inlining.jl
rename to Compiler/src/ssair/inlining.jl
index 170725f231761..0c0d14bf8f25a 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/Compiler/src/ssair/inlining.jl
@@ -12,19 +12,24 @@ struct InliningTodo
     mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
+    # The SpecInfo for the inlinee
+    spec_info::SpecInfo
+    # The DebugInfo table for the inlinee
+    di::DebugInfo
     # If the function being inlined is a single basic block we can use a
     # simpler inlining algorithm. This flag determines whether that's allowed
     linear_inline_eligible::Bool
     # Effects of the call statement
     effects::Effects
 end
-function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    return InliningTodo(mi, ir, linear_inline_eligible(ir), effects)
+function InliningTodo(mi::MethodInstance, ir::IRCode, spec_info::SpecInfo, di::DebugInfo, effects::Effects)
+    return InliningTodo(mi, ir, spec_info, di, linear_inline_eligible(ir), effects)
 end
 
 struct ConstantCase
     val::Any
-    ConstantCase(@nospecialize val) = new(val)
+    edge::CodeInstance
+    ConstantCase(@nospecialize(val), edge::CodeInstance) = new(val, edge)
 end
 
 struct SomeCase
@@ -33,7 +38,7 @@ struct SomeCase
 end
 
 struct InvokeCase
-    invoke::MethodInstance
+    invoke::Union{CodeInstance,MethodInstance}
     effects::Effects
     info::CallInfo
 end
@@ -48,29 +53,22 @@ struct InliningCase
 end
 
 struct UnionSplit
-    fully_covered::Bool
+    handled_all_cases::Bool # All possible dispatches are included in the cases
+    fully_covered::Bool # All handled cases are fully covering
     atype::DataType
     cases::Vector{InliningCase}
     bbs::Vector{Int}
-    UnionSplit(fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
-        new(fully_covered, atype, cases, Int[])
+    UnionSplit(handled_all_cases::Bool, fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
+        new(handled_all_cases, fully_covered, atype, cases, Int[])
 end
 
 struct InliningEdgeTracker
     edges::Vector{Any}
-    invokesig::Union{Nothing,Vector{Any}}
-    InliningEdgeTracker(state::InliningState, invokesig::Union{Nothing,Vector{Any}}=nothing) =
-        new(state.edges, invokesig)
+    InliningEdgeTracker(state::InliningState) = new(state.edges)
 end
 
-function add_inlining_backedge!((; edges, invokesig)::InliningEdgeTracker, mi::MethodInstance)
-    if invokesig === nothing
-        push!(edges, mi)
-    else # invoke backedge
-        push!(edges, invoke_signature(invokesig), mi)
-    end
-    return nothing
-end
+add_inlining_edge!(et::InliningEdgeTracker, edge::CodeInstance) = add_inlining_edge!(et.edges, edge)
+add_inlining_edge!(et::InliningEdgeTracker, edge::MethodInstance) = add_inlining_edge!(et.edges, edge)
 
 function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds::Bool)
     # Go through the function, performing simple inlining (e.g. replacing call by constants
@@ -78,7 +76,7 @@ function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, OptimizationParams(state.interp))
+    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, state.interp)
     return ir
 end
 
@@ -135,7 +133,7 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     last_block_idx = last(state.cfg.blocks[block].stmts)
     if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode))
         need_split = false
-        post_bb_id = -ir[SSAValue(last_block_idx)][:inst].label
+        post_bb_id = -ir[SSAValue(last_block_idx)][:stmt].label
     else
         post_bb_id = length(state.new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0)
         need_split = true #!(idx == last_block_idx)
@@ -196,7 +194,7 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     for (old_block, new_block) in enumerate(bb_rename_range)
         if (length(state.new_cfg_blocks[new_block].succs) == 0)
             terminator_idx = last(inlinee_cfg.blocks[old_block].stmts)
-            terminator = todo.ir[SSAValue(terminator_idx)][:inst]
+            terminator = todo.ir[SSAValue(terminator_idx)][:stmt]
             if isa(terminator, ReturnNode) && isdefined(terminator, :val)
                 any_edges = true
                 push!(state.new_cfg_blocks[new_block].succs, post_bb_id)
@@ -211,10 +209,9 @@ function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGIn
     return nothing
 end
 
-function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
-                                (; fully_covered, #=atype,=# cases, bbs)::UnionSplit,
-                                state::CFGInliningState,
-                                params::OptimizationParams)
+function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
+                                state::CFGInliningState, params::OptimizationParams)
+    (; handled_all_cases, fully_covered, #=atype,=# cases, bbs) = union_split
     inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
@@ -234,9 +231,7 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
             end
         end
         push!(from_bbs, length(state.new_cfg_blocks))
-        # TODO: Right now we unconditionally generate a fallback block
-        # in case of subtyping errors - This is probably unnecessary.
-        if i != length(cases) || (!fully_covered || (!params.trust_inference))
+        if !(i == length(cases) && (handled_all_cases && fully_covered))
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
@@ -245,7 +240,10 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
         end
     end
     # The edge from the fallback block.
-    fully_covered || push!(from_bbs, length(state.new_cfg_blocks))
+    # NOTE This edge is only required for `!handled_all_cases` and not `!fully_covered`,
+    #      since in the latter case we inline `Core.throw_methoderror` into the fallback
+    #      block, which is must-throw, making the subsequent code path unreachable.
+    !handled_all_cases && push!(from_bbs, length(state.new_cfg_blocks))
     # This block will be the block everyone returns to
     push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs))
     join_bb = length(state.new_cfg_blocks)
@@ -301,79 +299,39 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
-# duplicated from IRShow
-function normalize_method_name(m)
-    if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
-        return m
-    else
-        return Symbol("")
-    end
-end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-inline_node_is_duplicate(topline::LineInfoNode, line::LineInfoNode) =
-    topline.module === line.module &&
-    method_name(topline) === method_name(line) &&
-    topline.file === line.file &&
-    topline.line === line.line
-
-function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
-                              inlinee::MethodInstance,
-                              inlined_at::Int32)
-    inlinee_def = inlinee.def::Method
-    coverage = coverage_enabled(inlinee_def.module)
-    linetable_offset::Int32 = length(linetable)
-    # Append the linetable of the inlined function to our line table
-    topline::Int32 = linetable_offset + Int32(1)
-    coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(inlinee_def.module, inlinee, inlinee_def.file, inlinee_def.line, inlined_at))
-    oldlinetable = inlinee_ir.linetable
-    extra_coverage_line = zero(Int32)
-    for oldline in eachindex(oldlinetable)
-        entry = oldlinetable[oldline]
-        if !coverage && coverage_by_path && is_file_tracked(entry.file)
-            # include topline coverage entry if in path-specific coverage mode, and any file falls under path
-            coverage = true
+# TODO append `inlinee_debuginfo` to inner linetable when `inlined_at[2] ≠ 0`
+function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::DebugInfo, inlined_at::NTuple{3,Int32})
+    # Append the linetable of the inlined function to our edges table
+    linetable_offset = 1
+    while true
+        if linetable_offset > length(debuginfo.edges)
+            push!(debuginfo.edges, inlinee_debuginfo)
+            break
+        elseif debuginfo.edges[linetable_offset] === inlinee_debuginfo
+            break
         end
-        newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
-            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
-        if oldline == 1
-            # check for a duplicate on the first iteration (likely true)
-            if inline_node_is_duplicate(linetable[topline], newentry)
-                continue
-            else
-                linetable_offset += 1
-            end
-        end
-        push!(linetable, newentry)
-    end
-    if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline
-        extra_coverage_line = topline
+        linetable_offset += 1
     end
-    return linetable_offset, extra_coverage_line
+    return (inlined_at[1], Int32(linetable_offset), Int32(0))
 end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-        linetable::Vector{LineInfoNode}, ir′::IRCode, sparam_vals::SimpleVector,
-        mi::MethodInstance, inlined_at::Int32, argexprs::Vector{Any})
+                              ir::IRCode, spec_info::SpecInfo, di::DebugInfo, mi::MethodInstance,
+                              inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
     def = mi.def::Method
-    topline::Int32 = length(linetable) + Int32(1)
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir′, mi, inlined_at)
-    if extra_coverage_line != 0
-        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+    debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo
+    topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at)
+    if should_insert_coverage(def.module, di)
+        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
     end
-    sp_ssa = nothing
-    if !validate_sparams(sparam_vals)
+    spvals_ssa = nothing
+    if !validate_sparams(mi.sparam_vals)
         # N.B. This works on the caller-side argexprs, (i.e. before the va fixup below)
-        sp_ssa = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
+        spvals_ssa = insert_node!(
+            removable_if_unused(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
     end
-    if def.isva
-        nargs_def = Int(def.nargs::Int32)
+    if spec_info.isva
+        nargs_def = spec_info.nargs
         if nargs_def > 0
             argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline)
         end
@@ -382,43 +340,46 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod
         # Replace the first argument by a load of the capture environment
         argexprs[1] = insert_node!(
             NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
-            ir′.argtypes[1], topline))
+            ir.argtypes[1], topline))
     end
-    return (Pair{Union{Nothing, SSAValue}, Vector{Any}}(sp_ssa, argexprs), linetable_offset)
+    return SSASubstitute(mi, argexprs, spvals_ssa, new_inlined_at)
+end
+
+function adjust_boundscheck!(inline_compact::IncrementalCompact, idx′::Int, stmt::Expr, boundscheck::Symbol)
+    if boundscheck === :off
+        isempty(stmt.args) && push!(stmt.args, false)
+    elseif boundscheck !== :propagate
+        isempty(stmt.args) && push!(stmt.args, true)
+    end
+    return nothing
 end
 
 function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
-                         linetable::Vector{LineInfoNode}, item::InliningTodo,
-                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+                         item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
-    sparam_vals = item.mi.sparam_vals
     inlined_at = compact.result[idx][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.spec_info, item.di, item.mi, inlined_at, argexprs)
+    boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
 
-    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertHere(compact),
-        compact, linetable, item.ir, sparam_vals, item.mi, inlined_at, argexprs)
-
-    if boundscheck === :default || boundscheck === :propagate
-        if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
-            boundscheck = :off
-        end
-    end
     # If the iterator already moved on to the next basic block,
-    # temporarily re-open in again.
+    # temporarily re-open it again.
     local return_value
-    def = item.mi.def::Method
-    sig = def.sig
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     new_new_offset = length(compact.new_new_nodes)
     late_fixup_offset = length(compact.late_fixup)
     if item.linear_inline_eligible
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             # This dance is done to maintain accurate usage counts in the
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(InsertBefore(inline_compact, SSAValue(idx′)), inline_compact[SSAValue(idx′)], stmt′, argexprs, sig, sparam_vals, sp_ssa, linetable_offset, boundscheck)
+            # alter the line number information for InsertBefore to point to the current instruction in the new linetable
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 val = stmt′.val
                 return_value = SSAValue(idx′)
@@ -426,8 +387,10 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 inline_compact.result[idx′][:type] =
                     argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact)
                 # Everything legal in value position is guaranteed to be effect free in stmt position
-                inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+                inline_compact.result[idx′][:flag] = IR_FLAGS_REMOVABLE
                 break
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
@@ -443,9 +406,12 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         pn = PhiNode()
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
-        for ((_, idx′), stmt′) in inline_compact
+        @assert isempty(inline_compact.perm) && isempty(inline_compact.pending_perm) "linetable not in canonical form (missing compact call)"
+        for ((lineidx, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(InsertBefore(inline_compact, SSAValue(idx′)), inline_compact[SSAValue(idx′)], stmt′, argexprs, sig, sparam_vals, sp_ssa, linetable_offset, boundscheck)
+            inline_compact[SSAValue(idx′)][:line] = (ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(lineidx))
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute_op!(insert_node!, inline_compact[SSAValue(idx′)], stmt′, ssa_substitute)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -456,12 +422,14 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 end
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
-            elseif isa(stmt′, Expr) && stmt′.head === :enter
-                stmt′ = Expr(:enter, stmt′.args[1]::Int + bb_offset)
+            elseif isa(stmt′, EnterNode)
+                stmt′ = EnterNode(stmt′, stmt′.catch_dest == 0 ? 0 : stmt′.catch_dest + bb_offset)
             elseif isa(stmt′, GotoIfNot)
                 stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset)
             elseif isa(stmt′, PhiNode)
                 stmt′ = PhiNode(Int32[edge+bb_offset for edge in stmt′.edges], stmt′.values)
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
@@ -479,7 +447,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
 end
 
 function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-    argexprs::Vector{Any}, nargs_def::Int, line_idx::Int32)
+    argexprs::Vector{Any}, nargs_def::Int, line_idx::NTuple{3,Int32})
     newargexprs = argexprs[1:(nargs_def-1)]
     tuple_call = Expr(:call, TOP_TUPLE)
     tuple_typs = Any[]
@@ -494,55 +462,48 @@ function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, I
     return newargexprs
 end
 
-const FATAL_TYPE_BOUND_ERROR = ErrorException("fatal error in type inference (type bound)")
-
 """
     ir_inline_unionsplit!
 
-The core idea of this function is to simulate the dispatch semantics by generating
-(flat) `isa`-checks corresponding to the signatures of union-split dispatch candidates,
-and then inline their bodies into each `isa`-conditional block.
-This `isa`-based virtual dispatch requires few pre-conditions to hold in order to simulate
-the actual semantics correctly.
+The primary purpose of this function is to emulate the dispatch behavior by generating flat
+`isa`-checks that correspond to the signatures of union-split dispatch candidates.
+These checks allow us to inline the method bodies into respective `isa`-conditional blocks.
+
+Note that two pre-conditions are required for this emulation to work correctly:
+
+1. Ordered Dispatch Candidates
 
-The first one is that these dispatch candidates need to be processed in order of their specificity,
-and the corresponding `isa`-checks should reflect the method specificities, since now their
-signatures are not necessarily concrete.
-For example, given the following definitions:
+The dispatch candidates must be processed in order of their specificity.
+The generated `isa`-checks should reflect this order,
+especially since the method signatures may not be concrete.
+For instance, with the methods:
 
     f(x::Int)    = ...
     f(x::Number) = ...
     f(x::Any)    = ...
 
-and a callsite:
-
-    f(x::Any)
-
-then a correct `isa`-based virtual dispatch would be:
+A correct `isa`-based dispatch emulation for the call site `f(x::Any)` would look like:
 
     if isa(x, Int)
         [inlined/resolved f(x::Int)]
     elseif isa(x, Number)
         [inlined/resolved f(x::Number)]
-    else # implies `isa(x, Any)`, which fully covers this call signature,
-         # otherwise we need to insert a fallback dynamic dispatch case also
+    else
         [inlined/resolved f(x::Any)]
     end
 
-Fortunately, `ml_matches` should already sorted them in that way, except cases when there is
-any ambiguity, from which we already bail out at this point.
+`ml_matches` should already sort the matched method candidates correctly,
+except in ambiguous cases, which we've already excluded at this state.
 
-Another consideration is type equality constraint from type variables: the `isa`-checks are
-not enough to simulate the dispatch semantics in cases like:
-Given a definition:
+2. Type Equality Constraints
 
-    g(x::T, y::T) where T<:Integer = ...
-
-transform a callsite:
+Another factor is the type equality constraint imposed by type variables.
+Simple `isa`-checks are insufficient to capture the semantics in some cases.
+For example, given the following method definition:
 
-    g(x::Any, y::Any)
+    g(x::T, y::T) where T<:Integer = ...
 
-into the optimized form:
+it is _invalid_ to optimize a cal site like `g(x::Any, y::Any)` into:
 
     if isa(x, Integer) && isa(y, Integer)
         [inlined/resolved g(x::Integer, y::Integer)]
@@ -550,18 +511,20 @@ into the optimized form:
         g(x, y) # fallback dynamic dispatch
     end
 
-But again, we should already bail out from such cases at this point, essentially by
-excluding cases where `case.sig::UnionAll`.
+since we also need to check that `x` and `y` are equal types.
+
+But, we've already excluded such cases at this point,
+mainly by filtering out `case.sig::UnionAll`,
+so there is no need to worry about type equality at this point.
 
-In short, here we can process the dispatch candidates in order, assuming we haven't changed
-their order somehow somewhere up to this point.
+In essence, we can process the dispatch candidates sequentially,
+assuming their order stays the same post-discovery in `ml_matches`.
 """
-function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
-                               argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
-                               (; fully_covered, atype, cases, bbs)::UnionSplit,
-                               boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}},
-                               params::OptimizationParams)
-    stmt, typ, line = compact.result[idx][:inst], compact.result[idx][:type], compact.result[idx][:line]
+function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
+                               union_split::UnionSplit, boundscheck::Symbol,
+                               todo_bbs::Vector{Tuple{Int,Int}}, interp::AbstractInterpreter)
+    (; handled_all_cases, fully_covered, atype, cases, bbs) = union_split
+    stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line]
     join_bb = bbs[end]
     pn = PhiNode()
     local bb = compact.active_result_bb
@@ -575,22 +538,24 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         cond = true
         nparams = fieldcount(atype)
         @assert nparams == fieldcount(mtype)
-        if i != ncases || !fully_covered || !params.trust_inference
+        if !(i == ncases && fully_covered && handled_all_cases)
             for i = 1:nparams
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
                 # If this is always true, we don't need to check for it
-                a <: m && continue
+                aft <: mft && continue
                 # Generate isa check
-                isa_expr = Expr(:call, isa, argexprs[i], m)
-                ssa = insert_node_here!(compact, NewInstruction(isa_expr, Bool, line))
+                isa_expr = Expr(:call, isa, argexprs[i], mft)
+                isa_type = isa_tfunc(optimizer_lattice(interp), argextype(argexprs[i], compact), Const(mft))
+                ssa = insert_node_here!(compact, NewInstruction(isa_expr, isa_type, line))
                 if cond === true
                     cond = ssa
                 else
                     and_expr = Expr(:call, and_int, cond, ssa)
-                    cond = insert_node_here!(compact, NewInstruction(and_expr, Bool, line))
+                    and_type = and_int_tfunc(optimizer_lattice(interp), argextype(cond, compact), isa_type)
+                    cond = insert_node_here!(compact, NewInstruction(and_expr, and_type, line))
                 end
             end
-            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Union{}, line))
+            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Any, line))
         end
         bb = next_cond_bb - 1
         finish_current_bb!(compact, 0)
@@ -600,19 +565,21 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             for i = 1:nparams
                 argex = argexprs[i]
                 (isa(argex, SSAValue) || isa(argex, Argument)) || continue
-                a, m = fieldtype(atype, i), fieldtype(mtype, i)
-                if !(a <: m)
+                aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
+                if !(aft <: mft)
+                    𝕃ₒ = optimizer_lattice(interp)
+                    narrowed_type = tmeet(𝕃ₒ, argextype(argex, compact), mft)
                     argexprs′[i] = insert_node_here!(compact,
-                        NewInstruction(PiNode(argex, m), m, line))
+                        NewInstruction(PiNode(argex, mft), narrowed_type, line))
                 end
             end
         end
         if isa(case, InliningTodo)
-            val = ir_inline_item!(compact, idx, argexprs′, linetable, case, boundscheck, todo_bbs)
+            val = ir_inline_item!(compact, idx, argexprs′, case, boundscheck, todo_bbs)
         elseif isa(case, InvokeCase)
-            inst = Expr(:invoke, case.invoke, argexprs′...)
+            invoke_stmt = Expr(:invoke, case.invoke, argexprs′...)
             flag = flags_for_effects(case.effects)
-            val = insert_node_here!(compact, NewInstruction(inst, typ, case.info, line, flag))
+            val = insert_node_here!(compact, NewInstruction(invoke_stmt, typ, case.info, line, flag))
         else
             case = case::ConstantCase
             val = case.val
@@ -621,7 +588,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             push!(pn.edges, bb)
             push!(pn.values, val)
             insert_node_here!(compact,
-                NewInstruction(GotoNode(join_bb), Union{}, line))
+                NewInstruction(GotoNode(join_bb), Any, line))
         else
             insert_node_here!(compact,
                 NewInstruction(ReturnNode(), Union{}, line))
@@ -630,26 +597,24 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if fully_covered
-        if !params.trust_inference
-            e = Expr(:call, GlobalRef(Core, :throw), FATAL_TYPE_BOUND_ERROR)
-            insert_node_here!(compact, NewInstruction(e, Union{}, line))
-            insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
-            finish_current_bb!(compact, 0)
-        end
-    else
+    if !handled_all_cases
         ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
-        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Union{}, line))
+        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Any, line))
         finish_current_bb!(compact, 0)
+    elseif !fully_covered
+        insert_node_here!(compact, NewInstruction(Expr(:call, GlobalRef(Core, :throw_methoderror), argexprs...), Union{}, line))
+        insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
+        finish_current_bb!(compact, 0)
+        ncases == 0 && return insert_node_here!(compact, NewInstruction(nothing, Any, line))
     end
-
     # We're now in the join block.
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, params::OptimizationParams)
+function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, interp::AbstractInterpreter)
+    params = OptimizationParams(interp)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
@@ -664,10 +629,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
     end
     finish_cfg_inline!(state)
 
-    boundscheck = inbounds_option()
-    if boundscheck === :default && propagate_inbounds
-        boundscheck = :propagate
-    end
+    boundscheck = propagate_inbounds ? :propagate : :default
 
     let compact = IncrementalCompact(ir, CFGTransformState!(state.new_cfg_blocks, false))
         # This needs to be a minimum and is more of a size hint
@@ -694,22 +656,10 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                     compact.active_result_bb -= 1
                     refinish = true
                 end
-                # It is possible for GlobalRefs and Exprs to be in argument position
-                # at this point in the IR, though in that case they are required
-                # to be effect-free. However, we must still move them out of argument
-                # position, since `Argument` is allowed in PhiNodes, but `GlobalRef`
-                # and `Expr` are not, so a substitution could anger the verifier.
-                for aidx in 1:length(argexprs)
-                    aexpr = argexprs[aidx]
-                    if isa(aexpr, Expr) || isa(aexpr, GlobalRef)
-                        ninst = effect_free_and_nothrow(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
-                        argexprs[aidx] = insert_node_here!(compact, ninst)
-                    end
-                end
                 if isa(item, InliningTodo)
-                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, ir.linetable, item, boundscheck, state.todo_bbs)
+                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, ir.linetable, item, boundscheck, state.todo_bbs, params)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, item, boundscheck, state.todo_bbs, interp)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -720,8 +670,8 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                 end
             elseif isa(stmt, GotoNode)
                 compact[idx] = GotoNode(state.bb_rename[stmt.label])
-            elseif isa(stmt, Expr) && stmt.head === :enter
-                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]::Int])
+            elseif isa(stmt, EnterNode)
+                compact[idx] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : state.bb_rename[stmt.catch_dest])
             elseif isa(stmt, GotoIfNot)
                 compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest])
             elseif isa(stmt, PhiNode)
@@ -763,7 +713,7 @@ function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
                         ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
-                        if isa(p, DataType) && isdefined(p, :instance)
+                        if issingletontype(p)
                             # replace singleton types with their equivalent Const object
                             p = Const(p.instance)
                         elseif isconstType(p)
@@ -814,19 +764,20 @@ function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
     return new_argtypes
 end
 
-function compileable_specialization(mi::MethodInstance, effects::Effects,
-        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+function compileable_specialization(code::Union{MethodInstance,CodeInstance}, effects::Effects,
+    et::InliningEdgeTracker, @nospecialize(info::CallInfo), state::InliningState)
+    mi = code isa CodeInstance ? code.def : code
     mi_invoke = mi
-    if compilesig_invokes
-        method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+    method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+    if OptimizationParams(state.interp).compilesig_invokes
         new_atype = get_compileable_sig(method, atype, sparams)
         new_atype === nothing && return nothing
         if atype !== new_atype
             sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector
-            if sparams === sp_[2]::SimpleVector
-                mi_invoke = specialize_method(method, new_atype, sparams)
-                mi_invoke === nothing && return nothing
-            end
+            sparams = sp_[2]::SimpleVector
+            mi_invoke = specialize_method(method, new_atype, sparams)
+            mi_invoke === nothing && return nothing
+            code = mi_invoke
         end
     else
         # If this caller does not want us to optimize calls to use their
@@ -836,80 +787,96 @@ function compileable_specialization(mi::MethodInstance, effects::Effects,
             return nothing
         end
     end
-    add_inlining_backedge!(et, mi)
-    return InvokeCase(mi_invoke, effects, info)
-end
-
-function compileable_specialization(match::MethodMatch, effects::Effects,
-        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
-    mi = specialize_method(match)
-    return compileable_specialization(mi, effects, et, info; compilesig_invokes)
+    # prefer using a CodeInstance gotten from the cache, since that is where the invoke target should get compiled to normally
+    # TODO: can this code be gotten directly from inference sometimes?
+    code = get(code_cache(state), mi_invoke, nothing)
+    if !isa(code, CodeInstance)
+        #println("missing code for ", mi_invoke, " for ", mi)
+        code = mi_invoke
+    end
+    add_inlining_edge!(et, code) # to the code and edges
+    return InvokeCase(code, effects, info)
 end
 
-struct CachedResult
-    src::Any
+struct InferredResult
+    src::Any # CodeInfo or IRCode
     effects::Effects
-    CachedResult(@nospecialize(src), effects::Effects) = new(src, effects)
+    edge::CodeInstance
+    InferredResult(@nospecialize(src), effects::Effects, edge::CodeInstance) = new(src, effects, edge)
 end
 @inline function get_cached_result(state::InliningState, mi::MethodInstance)
     code = get(code_cache(state), mi, nothing)
     if code isa CodeInstance
         if use_const_api(code)
             # in this case function can be inlined to a constant
-            return ConstantCase(quoted(code.rettype_const))
-        else
-            src = @atomic :monotonic code.inferred
+            return ConstantCase(quoted(code.rettype_const), code)
+        end
+        return code
+    end
+    return nothing
+end
+@inline function get_local_result(inf_result::InferenceResult)
+    @assert isdefined(inf_result, :ci_as_edge) "InferenceResult without ci_as_edge"
+    effects = inf_result.ipo_effects
+    if is_foldable_nothrow(effects)
+        res = inf_result.result
+        if isa(res, Const) && is_inlineable_constant(res.val)
+            # use constant calling convention
+            return ConstantCase(quoted(res.val), inf_result.ci_as_edge)
         end
-        effects = decode_effects(code.ipo_purity_bits)
-        return CachedResult(src, effects)
     end
-    return CachedResult(nothing, Effects())
+    return InferredResult(inf_result.src, effects, inf_result.ci_as_edge)
 end
 
 # the general resolver for usual and const-prop'ed calls
-function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceResult},
-        argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
-        state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing)
-    et = InliningEdgeTracker(state, invokesig)
+function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,VolatileInferenceResult},
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    et = InliningEdgeTracker(state)
 
+    preserve_local_sources = true
     if isa(result, InferenceResult)
-        src = result.src
-        effects = result.ipo_effects
-        if is_foldable_nothrow(effects)
-            res = result.result
-            if isa(res, Const) && is_inlineable_constant(res.val)
-                # use constant calling convention
-                add_inlining_backedge!(et, mi)
-                return ConstantCase(quoted(res.val))
-            end
-        end
+        inferred_result = get_local_result(result)
+    elseif isa(result, VolatileInferenceResult)
+        inferred_result = get_local_result(result.inf_result)
+        # volatile inference result can be inlined destructively
+        preserve_local_sources = !result.inf_result.is_src_volatile | OptimizationParams(state.interp).preserve_local_sources
     else
-        cached_result = get_cached_result(state, mi)
-        if cached_result isa ConstantCase
-            add_inlining_backedge!(et, mi)
-            return cached_result
-        end
-        (; src, effects) = cached_result
+        inferred_result = get_cached_result(state, mi)
+    end
+    if inferred_result isa ConstantCase
+        add_inlining_edge!(et, inferred_result.edge)
+        return inferred_result
+    elseif inferred_result isa InferredResult
+        (; src, effects, edge) = inferred_result
+    elseif inferred_result isa CodeInstance
+        src = @atomic :monotonic inferred_result.inferred
+        effects = decode_effects(inferred_result.ipo_purity_bits)
+        edge = inferred_result
+    else # there is no cached source available for this, but there might be code for the compilation sig
+        return compileable_specialization(mi, Effects(), et, info, state)
     end
 
     # the duplicated check might have been done already within `analyze_method!`, but still
     # we need it here too since we may come here directly using a constant-prop' result
     if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
-        return compileable_specialization(mi, effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+        return compileable_specialization(edge, effects, et, info, state)
     end
 
-    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
-    src === nothing && return compileable_specialization(mi, effects, et, info;
-        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    src_inlining_policy(state.interp, src, info, flag) ||
+        return compileable_specialization(edge, effects, et, info, state)
 
-    add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
+    add_inlining_edge!(et, edge)
+    if inferred_result isa CodeInstance
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(inferred_result, src)
+    else
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    end
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 # the special resolver for :invoke-d call
-function resolve_todo(mi::MethodInstance, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
+function resolve_todo(mi::MethodInstance, @nospecialize(info::CallInfo), flag::UInt32,
+                      state::InliningState)
     if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
         return nothing
     end
@@ -918,17 +885,19 @@ function resolve_todo(mi::MethodInstance, argtypes::Vector{Any},
 
     cached_result = get_cached_result(state, mi)
     if cached_result isa ConstantCase
-        add_inlining_backedge!(et, mi)
+        add_inlining_edge!(et, cached_result.edge)
         return cached_result
+    elseif cached_result isa CodeInstance
+        src = @atomic :monotonic cached_result.inferred
+        effects = decode_effects(cached_result.ipo_purity_bits)
+    else # there is no cached source available, bail out
+        return nothing
     end
-    (; src, effects) = cached_result
-
-    src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
-
-    src === nothing && return nothing
 
-    add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
+    src_inlining_policy(state.interp, src, info, flag) || return nothing
+    ir, spec_info, debuginfo = retrieve_ir_for_inlining(cached_result, src)
+    add_inlining_edge!(et, cached_result)
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 function validate_sparams(sparams::SimpleVector)
@@ -947,10 +916,10 @@ function may_have_fcalls(m::Method)
 end
 
 function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-    allow_typevars::Bool, invokesig::Union{Nothing,Vector{Any}}=nothing)
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_typevars::Bool,
+    volatile_inf_result::Union{Nothing,VolatileInferenceResult}=nothing)
     method = match.method
-    spec_types = match.spec_types
 
     # Check that we have the correct number of arguments
     na = Int(method.nargs)
@@ -965,7 +934,9 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     if !match.fully_covers
         # type-intersection was not able to give us a simple list of types, so
         # ir_inline_unionsplit won't be able to deal with inlining this
-        if !(spec_types isa DataType && length(spec_types.parameters) == length(argtypes) && !isvarargtype(spec_types.parameters[end]))
+        spec_types = match.spec_types
+        if !(spec_types isa DataType && length(spec_types.parameters) == npassedargs &&
+             !isvarargtype(spec_types.parameters[end]))
             return nothing
         end
     end
@@ -977,41 +948,49 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     # Get the specialization for this method signature
     # (later we will decide what to do with it)
     mi = specialize_method(match)
-    return resolve_todo(mi, match, argtypes, info, flag, state; invokesig)
+    return resolve_todo(mi, volatile_inf_result, info, flag, state)
 end
 
-function retrieve_ir_for_inlining(mi::MethodInstance, src::String)
-    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
-    return inflate_ir!(src, mi)
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::String)
+    src = _uncompressed_ir(cached_result, src)
+    return inflate_ir!(src, cached_result.def), SpecInfo(src), src.debuginfo
 end
-retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)
-retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir)
-
-function flags_for_effects(effects::Effects)
-    flags::UInt8 = 0
-    if is_consistent(effects)
-        flags |= IR_FLAG_CONSISTENT
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::CodeInfo)
+    return inflate_ir!(copy(src), cached_result.def), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool)
+    if preserve_local_sources
+        src = copy(src)
     end
-    if is_effect_free(effects)
-        flags |= IR_FLAG_EFFECT_FREE
+    return inflate_ir!(src, mi), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool)
+    if preserve_local_sources
+        ir = copy(ir)
     end
-    if is_nothrow(effects)
-        flags |= IR_FLAG_NOTHROW
+    # COMBAK this is not correct, we should make `InferenceResult` propagate `SpecInfo`
+    spec_info = let m = mi.def::Method
+        SpecInfo(Int(m.nargs), m.isva, false, nothing)
     end
-    return flags
+    ir.debuginfo.def = mi
+    return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
 end
 
 function handle_single_case!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case),
     isinvoke::Bool = false)
     if isa(case, ConstantCase)
-        ir[SSAValue(idx)][:inst] = case.val
+        ir[SSAValue(idx)][:stmt] = case.val
     elseif isa(case, InvokeCase)
         is_foldable_nothrow(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
         isinvoke && rewrite_invoke_exprargs!(stmt)
-        stmt.head = :invoke
-        pushfirst!(stmt.args, case.invoke)
-        ir[SSAValue(idx)][:flag] |= flags_for_effects(case.effects)
+        if stmt.head === :invoke
+            stmt.args[1] = case.invoke
+        else
+            stmt.head = :invoke
+            pushfirst!(stmt.args, case.invoke)
+        end
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(case.effects))
     elseif case === nothing
         # Do, well, nothing
     else
@@ -1121,7 +1100,7 @@ function inline_apply!(todo::Vector{Pair{Int,Any}},
             # e.g. rewrite `((t::Tuple)...,)` to `t`
             nonempty_idx = 0
             𝕃ₒ = optimizer_lattice(state.interp)
-            for i = (arg_start + 1):length(argtypes)
+            for i = (arg_start+1):length(argtypes)
                 ti = argtypes[i]
                 ⊑(𝕃ₒ, ti, Tuple{}) && continue
                 if ⊑(𝕃ₒ, ti, Tuple) && nonempty_idx == 0
@@ -1132,14 +1111,14 @@ function inline_apply!(todo::Vector{Pair{Int,Any}},
                 break
             end
             if nonempty_idx != 0
-                ir.stmts[idx][:inst] = stmt.args[nonempty_idx]
+                ir[SSAValue(idx)][:stmt] = stmt.args[nonempty_idx]
                 return nothing
             end
         end
         # Try to figure out the signature of the function being called
         # and if rewrite_apply_exprargs can deal with this form
         arginfos = MaybeAbstractIterationInfo[]
-        for i = (arg_start + 1):length(argtypes)
+        for i = (arg_start+1):length(argtypes)
             thisarginfo = nothing
             if !is_valid_type_for_apply_rewrite(argtypes[i], OptimizationParams(state.interp))
                 isa(info, ApplyCallInfo) || return nothing
@@ -1172,7 +1151,7 @@ function is_builtin(𝕃ₒ::AbstractLattice, s::Signature)
 end
 
 function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
-    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
+    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt32,
     sig::Signature, state::InliningState)
     match = info.match
     if !match.fully_covers
@@ -1180,28 +1159,30 @@ function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
         return nothing
     end
     result = info.result
-    invokesig = sig.argtypes
     if isa(result, ConcreteResult)
-        item = concrete_result_item(result, info, state; invokesig)
+        item = concrete_result_item(result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        item = semiconcrete_result_item(result, info, flag, state)
     else
         argtypes = invoke_rewrite(sig.argtypes)
         if isa(result, ConstPropResult)
             mi = result.result.linfo
             validate_sparams(mi.sparam_vals) || return nothing
             if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig
-                item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig)
+                item = resolve_todo(mi, result.result, info, flag, state)
                 handle_single_case!(todo, ir, idx, stmt, item, true)
                 return nothing
             end
         end
-        item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig)
+        volatile_inf_result = result isa VolatileInferenceResult ? result : nothing
+        item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, volatile_inf_result)
     end
     handle_single_case!(todo, ir, idx, stmt, item, true)
     return nothing
 end
 
 function invoke_signature(argtypes::Vector{Any})
-    ft, argtyps = widenconst(argtypes[2]), instanceof_tfunc(widenconst(argtypes[3]))[1]
+    ft, argtyps = widenconst(argtypes[2]), instanceof_tfunc(widenconst(argtypes[3]), false)[1]
     return rewrap_unionall(Tuple{ft, unwrap_unionall(argtyps).parameters...}, argtyps)
 end
 
@@ -1228,44 +1209,38 @@ end
 # As a matter of convenience, this pass also computes effect-freenes.
 # For primitives, we do that right here. For proper calls, we will
 # discover this when we consult the caches.
-function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), state::InliningState)
-    return check_effect_free!(ir, idx, stmt, rt, optimizer_lattice(state.interp))
-end
-function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), 𝕃ₒ::AbstractLattice)
-    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, ir)
-    if consistent
-        ir.stmts[idx][:flag] |= IR_FLAG_CONSISTENT
-    end
-    if effect_free_and_nothrow
-        ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    elseif nothrow
-        ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-    end
-    return effect_free_and_nothrow
+add_inst_flag!(inst::Instruction, ir::IRCode, state::InliningState) =
+    add_inst_flag!(inst, ir, optimizer_lattice(state.interp))
+function add_inst_flag!(inst::Instruction, ir::IRCode, 𝕃ₒ::AbstractLattice)
+    flags = recompute_effects_flags(𝕃ₒ, inst[:stmt], inst[:type], ir)
+    add_flag!(inst, flags)
+    return !iszero(flags & IR_FLAGS_REMOVABLE)
 end
 
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, state::InliningState)
-    stmt = ir.stmts[idx][:inst]
-    rt = ir.stmts[idx][:type]
+function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, flag::UInt32,
+                         state::InliningState)
+    inst = ir[SSAValue(idx)]
+    stmt = inst[:stmt]
     if !(stmt isa Expr)
-        check_effect_free!(ir, idx, stmt, rt, state)
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
+    rt = inst[:type]
     head = stmt.head
     if head !== :call
         if head === :splatnew
             inline_splatnew!(ir, idx, stmt, rt, state)
         elseif head === :new_opaque_closure
-            narrow_opaque_closure!(ir, stmt, ir.stmts[idx][:info], state)
+            narrow_opaque_closure!(ir, stmt, inst[:info], state)
         elseif head === :invoke
             sig = call_sig(ir, stmt)
             sig === nothing && return nothing
             return stmt, sig
         end
-        check_effect_free!(ir, idx, stmt, rt, state)
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
 
@@ -1277,31 +1252,39 @@ function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stat
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    earlyres = early_inline_special_case(ir, stmt, rt, sig, state)
+    earlyres = early_inline_special_case(ir, stmt, flag, rt, sig, state)
     if isa(earlyres, SomeCase)
-        ir.stmts[idx][:inst] = earlyres.val
+        inst[:stmt] = earlyres.val
         return nothing
     end
 
-    if check_effect_free!(ir, idx, stmt, rt, state)
+    if add_inst_flag!(inst, ir, state)
         if sig.f === typeassert || ⊑(optimizer_lattice(state.interp), sig.ft, typeof(typeassert))
             # typeassert is a no-op if effect free
-            ir.stmts[idx][:inst] = stmt.args[2]
+            inst[:stmt] = stmt.args[2]
             return nothing
         end
     end
 
-    if (sig.f !== Core.invoke && sig.f !== Core.finalizer && sig.f !== modifyfield!) &&
-        is_builtin(optimizer_lattice(state.interp), sig)
-        # No inlining for builtins (other invoke/apply/typeassert/finalizer)
-        return nothing
+    if is_builtin(optimizer_lattice(state.interp), sig)
+        let f = sig.f
+            if (f !== Core.invoke &&
+                f !== Core.finalizer &&
+                f !== modifyfield! &&
+                f !== Core.modifyglobal! &&
+                f !== Core.memoryrefmodify! &&
+                f !== atomic_pointermodify)
+                # No inlining defined for most builtins (just invoke/apply/typeassert/finalizer), so attempt an early exit for them
+                return nothing
+            end
+        end
     end
 
     # Special case inliners for regular functions
-    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state)
+    lateres = late_inline_special_case!(ir, idx, stmt, flag, rt, sig, state)
     if isa(lateres, SomeCase)
-        ir[SSAValue(idx)][:inst] = lateres.val
-        check_effect_free!(ir, idx, lateres.val, rt, state)
+        inst[:stmt] = lateres.val
+        add_inst_flag!(inst, ir, state)
         return nothing
     end
 
@@ -1310,22 +1293,17 @@ end
 
 function handle_any_const_result!(cases::Vector{InliningCase},
     @nospecialize(result), match::MethodMatch, argtypes::Vector{Any},
-    @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_typevars::Bool)
     if isa(result, ConcreteResult)
-        return handle_concrete_result!(cases, result, info, state)
-    end
-    if isa(result, SemiConcreteResult)
-        result = inlining_policy(state.interp, result, info, flag, result.mi, argtypes)
-        if isa(result, SemiConcreteResult)
-            return handle_semi_concrete_result!(cases, result, info, flag, state; allow_abstract)
-        end
-    end
-    if isa(result, ConstPropResult)
-        return handle_const_prop_result!(cases, result, argtypes, info, flag, state; allow_abstract, allow_typevars)
+        return handle_concrete_result!(cases, result, match, info, state)
+    elseif isa(result, SemiConcreteResult)
+        return handle_semi_concrete_result!(cases, result, match, info, flag, state)
+    elseif isa(result, ConstPropResult)
+        return handle_const_prop_result!(cases, result, match, info, flag, state; allow_typevars)
     else
-        @assert result === nothing
-        return handle_match!(cases, match, argtypes, info, flag, state; allow_abstract, allow_typevars)
+        @assert result === nothing || result isa VolatileInferenceResult
+        return handle_match!(cases, match, argtypes, info, flag, state; allow_typevars, volatile_inf_result = result)
     end
 end
 
@@ -1348,253 +1326,222 @@ function info_effects(@nospecialize(result), match::MethodMatch, state::Inlining
     end
 end
 
-function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
     state::InliningState)
     nunion = nsplit(info)
     nunion === nothing && return nothing
     cases = InliningCase[]
     argtypes = sig.argtypes
-    local handled_all_cases::Bool = true
+    local handled_all_cases = local fully_covered = true
     local revisit_idx = nothing
-    local only_method = nothing
-    local meth::MethodLookupResult
     local all_result_count = 0
-    local joint_effects::Effects = EFFECTS_TOTAL
-    local fully_covered::Bool = true
+    local joint_effects = EFFECTS_TOTAL
     for i = 1:nunion
         meth = getsplit(info, i)
         if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
             return nothing
-        elseif length(meth) == 0
-            # No applicable methods; try next union split
-            handled_all_cases = false
-            continue
-        else
-            if length(meth) == 1 && only_method !== false
-                if only_method === nothing
-                    only_method = meth[1].method
-                elseif only_method !== meth[1].method
-                    only_method = false
-                end
-            else
-                only_method = false
-            end
         end
-        local split_fully_covered::Bool = false
+        local split_fully_covered = false
         for (j, match) in enumerate(meth)
             all_result_count += 1
             result = getresult(info, all_result_count)
             joint_effects = merge_effects(joint_effects, info_effects(result, match, state))
             split_fully_covered |= match.fully_covers
             if !validate_sparams(match.sparams)
-                if !match.fully_covers
-                    handled_all_cases = false
-                    continue
-                end
-                if revisit_idx === nothing
-                    revisit_idx = (i, j, all_result_count)
+                if match.fully_covers
+                    if revisit_idx === nothing
+                        revisit_idx = (i, j, all_result_count)
+                    else
+                        handled_all_cases = false
+                        revisit_idx = nothing
+                    end
                 else
                     handled_all_cases = false
-                    revisit_idx = nothing
                 end
+            elseif !(match.spec_types <: match.method.sig) # the requirement for correct union-split
+                handled_all_cases = false
             else
                 handled_all_cases &= handle_any_const_result!(cases,
-                    result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=false)
+                    result, match, argtypes, info, flag, state; allow_typevars=false)
             end
         end
         fully_covered &= split_fully_covered
     end
 
-    fully_covered || (joint_effects = Effects(joint_effects; nothrow=false))
-
-    if handled_all_cases && revisit_idx !== nothing
-        # we handled everything except one match with unmatched sparams,
-        # so try to handle it by bypassing validate_sparams
-        (i, j, k) = revisit_idx
-        match = getsplit(info, i)[j]
-        result = getresult(info, k)
-        handled_all_cases &= handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-    elseif length(cases) == 0 && only_method isa Method
-        # if the signature is fully covered and there is only one applicable method,
-        # we can try to inline it even in the presence of unmatched sparams
-        # -- But don't try it if we already tried to handle the match in the revisit_idx
-        # case, because that'll (necessarily) be the same method.
-        if nsplit(info)::Int > 1
-            atype = argtypes_to_type(argtypes)
-            (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype, only_method.sig)::SimpleVector
-            match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
-            result = nothing
-        else
-            @assert length(meth) == 1
-            match = meth[1]
-            result = getresult(info, 1)
+    (handled_all_cases & fully_covered) || (joint_effects = Effects(joint_effects; nothrow=false))
+
+    if handled_all_cases
+        if revisit_idx !== nothing
+            # we handled everything except one match with unmatched sparams,
+            # so try to handle it by bypassing validate_sparams
+            (i, j, k) = revisit_idx
+            match = getsplit(info, i)[j]
+            result = getresult(info, k)
+            handled_all_cases &= handle_any_const_result!(cases,
+                result, match, argtypes, info, flag, state; allow_typevars=true)
         end
-        handle_any_const_result!(cases,
-            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
-        fully_covered = handled_all_cases = match.fully_covers
-    elseif !handled_all_cases
+        if !fully_covered
+            # We will emit an inline MethodError in this case, but that info already came inference, so we must already have the uncovered edge for it
+        end
+    elseif !isempty(cases)
         # if we've not seen all candidates, union split is valid only for dispatch tuples
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
-
-    return cases, (handled_all_cases & fully_covered), joint_effects
+    return cases, handled_all_cases, fully_covered, joint_effects
 end
 
 function handle_call!(todo::Vector{Pair{Int,Any}},
-    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature,
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
     state::InliningState)
     cases = compute_inlining_cases(info, flag, sig, state)
     cases === nothing && return nothing
-    cases, all_covered, joint_effects = cases
-    handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, joint_effects)
+    cases, handled_all_cases, fully_covered, joint_effects = cases
+    atype = argtypes_to_type(sig.argtypes)
+    handle_cases!(todo, ir, idx, stmt, atype, cases, handled_all_cases, fully_covered, joint_effects)
 end
 
 function handle_match!(cases::Vector{InliningCase},
-    match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8,
+    match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt32,
     state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
-    spec_types = match.spec_types
-    allow_abstract || isdispatchtuple(spec_types) || return false
+    allow_typevars::Bool, volatile_inf_result::Union{Nothing,VolatileInferenceResult})
     # We may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
     # processing this dispatch candidate (unless unmatched type parameters are present)
-    !allow_typevars && _any(case->case.sig === spec_types, cases) && return true
-    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars)
+    !allow_typevars && any(case::InliningCase->case.sig === match.spec_types, cases) && return true
+    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars, volatile_inf_result)
     item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
+    push!(cases, InliningCase(match.spec_types, item))
     return true
 end
 
-function handle_const_prop_result!(cases::Vector{InliningCase},
-    result::ConstPropResult, argtypes::Vector{Any}, @nospecialize(info::CallInfo),
-    flag::UInt8, state::InliningState;
-    allow_abstract::Bool, allow_typevars::Bool)
+function handle_const_prop_result!(cases::Vector{InliningCase}, result::ConstPropResult,
+    match::MethodMatch, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_typevars::Bool)
     mi = result.result.linfo
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
     if !validate_sparams(mi.sparam_vals)
         (allow_typevars && !may_have_fcalls(mi.def::Method)) || return false
     end
-    item = resolve_todo(mi, result.result, argtypes, info, flag, state)
+    item = resolve_todo(mi, result.result, info, flag, state)
     item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
+    push!(cases, InliningCase(match.spec_types, item))
     return true
 end
 
 function semiconcrete_result_item(result::SemiConcreteResult,
-        @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
-    mi = result.mi
-    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
-        et = InliningEdgeTracker(state)
-        return compileable_specialization(mi, result.effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
-    else
-        return InliningTodo(mi, retrieve_ir_for_inlining(mi, result.ir), result.effects)
+        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    code = result.edge
+    mi = code.def
+    et = InliningEdgeTracker(state)
+
+    if (!OptimizationParams(state.interp).inlining || is_stmt_noinline(flag) ||
+        # For `NativeInterpreter`, `SemiConcreteResult` may be produced for
+        # a `@noinline`-declared method when it's marked as `@constprop :aggressive`.
+        # Suppress the inlining here (unless inlining is requested at the callsite).
+        (is_declared_noinline(mi.def::Method) && !is_stmt_inline(flag)))
+        return compileable_specialization(code, result.effects, et, info, state)
     end
+    src_inlining_policy(state.interp, result.ir, info, flag) ||
+        return compileable_specialization(code, result.effects, et, info, state)
+
+    add_inlining_edge!(et, result.edge)
+    preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources
+    ir, _, debuginfo = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
+    return InliningTodo(mi, ir, result.spec_info, debuginfo, result.effects)
 end
 
 function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
-        @nospecialize(info::CallInfo), flag::UInt8, state::InliningState;
-        allow_abstract::Bool)
-    mi = result.mi
-    spec_types = mi.specTypes
-    allow_abstract || isdispatchtuple(spec_types) || return false
+    match::MethodMatch, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    mi = result.edge.def
     validate_sparams(mi.sparam_vals) || return false
     item = semiconcrete_result_item(result, info, flag, state)
     item === nothing && return false
-    push!(cases, InliningCase(spec_types, item))
+    push!(cases, InliningCase(match.spec_types, item))
     return true
 end
 
-function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
+function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult,
+    match::MethodMatch, @nospecialize(info::CallInfo), state::InliningState)
     case = concrete_result_item(result, info, state)
     case === nothing && return false
-    push!(cases, InliningCase(result.mi.specTypes, case))
+    push!(cases, InliningCase(match.spec_types, case))
     return true
 end
 
 may_inline_concrete_result(result::ConcreteResult) =
     isdefined(result, :result) && is_inlineable_constant(result.result)
 
-function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState;
-    invokesig::Union{Nothing,Vector{Any}}=nothing)
+function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
     if !may_inline_concrete_result(result)
-        et = InliningEdgeTracker(state, invokesig)
-        return compileable_specialization(result.mi, result.effects, et, info;
-            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+        et = InliningEdgeTracker(state)
+        return compileable_specialization(result.edge, result.effects, et, info, state)
     end
     @assert result.effects === EFFECTS_TOTAL
-    return ConstantCase(quoted(result.result))
+    return ConstantCase(quoted(result.result), result.edge)
 end
 
 function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool,
+    @nospecialize(atype), cases::Vector{InliningCase}, handled_all_cases::Bool, fully_covered::Bool,
     joint_effects::Effects)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
-    if fully_covered && length(cases) == 1
+    if fully_covered && handled_all_cases && length(cases) == 1
         handle_single_case!(todo, ir, idx, stmt, cases[1].item)
-    elseif length(cases) > 0
+    elseif length(cases) > 0 || handled_all_cases
         isa(atype, DataType) || return nothing
         for case in cases
             isa(case.sig, DataType) || return nothing
         end
-        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
+        push!(todo, idx=>UnionSplit(handled_all_cases, fully_covered, atype, cases))
     else
-        ir[SSAValue(idx)][:flag] |= flags_for_effects(joint_effects)
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects))
     end
     return nothing
 end
 
 function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, info::OpaqueClosureCallInfo,
-    flag::UInt8, sig::Signature, state::InliningState)
+    flag::UInt32, sig::Signature, state::InliningState)
     result = info.result
     if isa(result, ConstPropResult)
         mi = result.result.linfo
         validate_sparams(mi.sparam_vals) || return nothing
-        item = resolve_todo(mi, result.result, sig.argtypes, info, flag, state)
+        item = resolve_todo(mi, result.result, info, flag, state)
     elseif isa(result, ConcreteResult)
         item = concrete_result_item(result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        item = item = semiconcrete_result_item(result, info, flag, state)
     else
-        if isa(result, SemiConcreteResult)
-            result = inlining_policy(state.interp, result, info, flag, result.mi, sig.argtypes)
-        end
-        if isa(result, SemiConcreteResult)
-            item = semiconcrete_result_item(result, info, flag, state)
-        else
-            item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false)
-        end
+        @assert result === nothing || result isa VolatileInferenceResult
+        volatile_inf_result = result
+        item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false, volatile_inf_result)
     end
     handle_single_case!(todo, ir, idx, stmt, item)
     return nothing
 end
 
-function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::ModifyFieldInfo, state::InliningState)
+function handle_modifyop!_call!(ir::IRCode, idx::Int, stmt::Expr, info::ModifyOpInfo, state::InliningState)
     info = info.info
     info isa MethodResultPure && (info = info.info)
     info isa ConstCallInfo && (info = info.call)
     info isa MethodMatchInfo || return nothing
-    length(info.results) == 1 || return nothing
+    length(info.edges) == length(info.results) == 1 || return nothing
     match = info.results[1]::MethodMatch
     match.fully_covers || return nothing
-    case = compileable_specialization(match, Effects(), InliningEdgeTracker(state), info;
-        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    edge = info.edges[1]
+    edge === nothing && return nothing
+    case = compileable_specialization(edge, Effects(), InliningEdgeTracker(state), info, state)
     case === nothing && return nothing
     stmt.head = :invoke_modify
     pushfirst!(stmt.args, case.invoke)
-    ir.stmts[idx][:inst] = stmt
+    ir[SSAValue(idx)][:stmt] = stmt
     return nothing
 end
 
 function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::FinalizerInfo,
-    state::InliningState)
-
+                                state::InliningState)
     # Finalizers don't return values, so if their execution is not observable,
     # we can just not register them
     if is_removable_if_unused(info.effects)
@@ -1614,7 +1561,7 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
     argtypes[2] = argextype(stmt.args[3], ir)
     sig = Signature(f, ft, argtypes)
 
-    cases = compute_inlining_cases(info.info, #=flag=#UInt8(0), sig, state)
+    cases = compute_inlining_cases(info.info, #=flag=#UInt32(0), sig, state)
     cases === nothing && return nothing
     cases, all_covered, _ = cases
     if all_covered && length(cases) == 1
@@ -1622,36 +1569,39 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
         # `Core.Compiler` data structure into the global cache
         item1 = cases[1].item
         if isa(item1, InliningTodo)
-            push!(stmt.args, true)
-            push!(stmt.args, item1.mi)
+            code = get(code_cache(state), item1.mi, nothing) # COMBAK: this seems like a bad design, can we use stmt_info instead to store the correct info?
+            if code isa CodeInstance
+                push!(stmt.args, true)
+                push!(stmt.args, code)
+            end
         elseif isa(item1, InvokeCase)
             push!(stmt.args, false)
             push!(stmt.args, item1.invoke)
         elseif isa(item1, ConstantCase)
             push!(stmt.args, nothing)
-            push!(stmt.args, item1.val)
         end
     end
     return nothing
 end
 
-function handle_invoke_expr!(todo::Vector{Pair{Int,Any}},
-    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt8, sig::Signature, state::InliningState)
-    mi = stmt.args[1]::MethodInstance
-    case = resolve_todo(mi, sig.argtypes, info, flag, state)
-    if case !== nothing
-        push!(todo, idx=>(case::InliningTodo))
+function handle_invoke_expr!(todo::Vector{Pair{Int,Any}}, ir::IRCode,
+    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature, state::InliningState)
+    mi = stmt.args[1]
+    if !(mi isa MethodInstance)
+        mi = (mi::CodeInstance).def
     end
+    case = resolve_todo(mi, info, flag, state)
+    handle_single_case!(todo, ir, idx, stmt, case, false)
     return nothing
 end
 
 function inline_const_if_inlineable!(inst::Instruction)
     rt = inst[:type]
     if rt isa Const && is_inlineable_constant(rt.val)
-        inst[:inst] = quoted(rt.val)
+        inst[:stmt] = quoted(rt.val)
         return true
     end
-    inst[:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    add_flag!(inst, IR_FLAGS_REMOVABLE)
     return false
 end
 
@@ -1659,17 +1609,18 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
 
     for idx in 1:length(ir.stmts)
-        simpleres = process_simple!(todo, ir, idx, state)
+        flag = ir.stmts[idx][:flag]
+
+        simpleres = process_simple!(todo, ir, idx, flag, state)
         simpleres === nothing && continue
         stmt, sig = simpleres
 
-        flag = ir.stmts[idx][:flag]
         info = ir.stmts[idx][:info]
 
         # `NativeInterpreter` won't need this, but provide a support for `:invoke` exprs here
         # for external `AbstractInterpreter`s that may run the inlining pass multiple times
         if isexpr(stmt, :invoke)
-            handle_invoke_expr!(todo, idx, stmt, info, flag, sig, state)
+            handle_invoke_expr!(todo, ir, idx, stmt, info, flag, sig, state)
             continue
         end
 
@@ -1686,8 +1637,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
         # handle special cased builtins
         if isa(info, OpaqueClosureCallInfo)
             handle_opaque_closure_call!(todo, ir, idx, stmt, info, flag, sig, state)
-        elseif isa(info, ModifyFieldInfo)
-            handle_modifyfield!_call!(ir, idx, stmt, info, state)
+        elseif isa(info, ModifyOpInfo)
+            handle_modifyop!_call!(ir, idx, stmt, info, state)
         elseif isa(info, InvokeCallInfo)
             handle_invoke_call!(todo, ir, idx, stmt, info, flag, sig, state)
         elseif isa(info, FinalizerInfo)
@@ -1703,15 +1654,14 @@ end
 
 function linear_inline_eligible(ir::IRCode)
     length(ir.cfg.blocks) == 1 || return false
-    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:inst]
+    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:stmt]
     isa(terminator, ReturnNode) || return false
     isdefined(terminator, :val) || return false
     return true
 end
 
-function early_inline_special_case(
-    ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function early_inline_special_case(ir::IRCode, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
 
@@ -1719,18 +1669,13 @@ function early_inline_special_case(
         val = type.val
         is_inlineable_constant(val) || return nothing
         if isa(f, IntrinsicFunction)
-            if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
+            if is_pure_intrinsic_infer(f) && has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         elseif contains_is(_PURE_BUILTINS, f)
             return SomeCase(quoted(val))
         elseif contains_is(_EFFECT_FREE_BUILTINS, f)
-            if _builtin_nothrow(optimizer_lattice(state.interp), f, argtypes[2:end], type)
-                return SomeCase(quoted(val))
-            end
-        elseif f === Core.get_binding_type
-            length(argtypes) == 3 || return nothing
-            if get_binding_type_effect_free(argtypes[2], argtypes[3])
+            if has_flag(flag, IR_FLAG_NOTHROW)
                 return SomeCase(quoted(val))
             end
         end
@@ -1743,7 +1688,9 @@ function early_inline_special_case(
         isa(setting, Const) || return nothing
         setting = setting.val
         isa(setting, Symbol) || return nothing
-        setting === :const || setting === :conditional || setting === :type || return nothing
+        # setting === :const || setting === :type barrier const evaluation,
+        # so they can't be eliminated at IPO time
+        setting === :conditional || return nothing
         # barriered successfully already, eliminate it
         return SomeCase(stmt.args[3])
     elseif f === Core.ifelse && length(argtypes) == 4
@@ -1754,6 +1701,8 @@ function early_inline_special_case(
             elseif cond.val === false
                 return SomeCase(stmt.args[4])
             end
+        elseif ⊑(optimizer_lattice(state.interp), cond, Bool) && stmt.args[3] === stmt.args[4]
+            return SomeCase(stmt.args[3])
         end
     end
     return nothing
@@ -1762,25 +1711,24 @@ end
 # special-case some regular method calls whose results are not folded within `abstract_call_known`
 # (and thus `early_inline_special_case` doesn't handle them yet)
 # NOTE we manually inline the method bodies, and so the logic here needs to precisely sync with their definitions
-function late_inline_special_case!(
-    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
-    state::InliningState)
+function late_inline_special_case!(ir::IRCode, idx::Int, stmt::Expr, flag::UInt32,
+                                   @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
+    if length(argtypes) == 3 && f === Core.:(!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
         if isa(type, Const)
             return SomeCase(quoted(type.val))
         end
         cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3])
-        cmp_call_ssa = insert_node!(ir, idx, effect_free_and_nothrow(NewInstruction(cmp_call, Bool)))
+        cmp_call_ssa = insert_node!(ir, idx, removable_if_unused(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         return SomeCase(not_call)
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
+    elseif length(argtypes) == 3 && f === Core.:(>:)
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(type, Const) && _builtin_nothrow(optimizer_lattice(state.interp), <:, Any[argtypes[3], argtypes[2]], type)
+        if isa(type, Const) && has_flag(flag, IR_FLAG_NOTHROW)
             return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
@@ -1804,50 +1752,44 @@ function late_inline_special_case!(
     return nothing
 end
 
-function ssa_substitute!(insert_node!::Inserter,
-                         subst_inst::Instruction, @nospecialize(val), arg_replacements::Vector{Any},
-                         @nospecialize(spsig), spvals::SimpleVector,
-                         spvals_ssa::Union{Nothing, SSAValue},
-                         linetable_offset::Int32, boundscheck::Symbol)
-    subst_inst[:flag] &= ~IR_FLAG_INBOUNDS
-    subst_inst[:line] += linetable_offset
-    return ssa_substitute_op!(insert_node!, subst_inst,
-        val, arg_replacements, spsig, spvals, spvals_ssa, boundscheck)
+struct SSASubstitute
+    mi::MethodInstance
+    arg_replacements::Vector{Any}
+    spvals_ssa::Union{Nothing,SSAValue}
+    inlined_at::NTuple{3,Int32} # TODO: add a map also, so that ssaidx doesn't need to equal inlined_idx?
 end
 
 function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool)
     ret = insert_node!(
-        effect_free_and_nothrow(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any)))
+        removable_if_unused(NewInstruction(Expr(:call, Core._svec_ref, spvals_ssa, spidx), Any)))
     tcheck_not = nothing
     if do_isdefined
         tcheck = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool)))
+            removable_if_unused(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool)))
         tcheck_not = insert_node!(
-            effect_free_and_nothrow(NewInstruction(Expr(:call, not_int, tcheck), Bool)))
+            removable_if_unused(NewInstruction(Expr(:call, not_int, tcheck), Bool)))
     end
     return (ret, tcheck_not)
 end
 
-function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
-                            @nospecialize(val), arg_replacements::Vector{Any},
-                            @nospecialize(spsig), spvals::SimpleVector,
-                            spvals_ssa::Union{Nothing, SSAValue},
-                            boundscheck::Symbol)
+function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction, @nospecialize(val),
+                            ssa_substitute::SSASubstitute)
     if isa(val, Argument)
-        return arg_replacements[val.n]
+        return ssa_substitute.arg_replacements[val.n]
     end
     if isa(val, Expr)
         e = val::Expr
         head = e.head
+        sparam_vals = ssa_substitute.mi.sparam_vals
         if head === :static_parameter
             spidx = e.args[1]::Int
-            val = spvals[spidx]
+            val = sparam_vals[spidx]
             if !isa(val, TypeVar) && val !== Vararg
                 return quoted(val)
             else
                 flag = subst_inst[:flag]
-                maybe_undef = (flag & IR_FLAG_NOTHROW) == 0 && isa(val, TypeVar)
-                (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, maybe_undef)
+                maybe_undef = !has_flag(flag, IR_FLAG_NOTHROW) && isa(val, TypeVar)
+                (ret, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, maybe_undef)
                 if maybe_undef
                     insert_node!(
                         NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing))
@@ -1856,44 +1798,38 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction,
             end
         elseif head === :isdefined && isa(e.args[1], Expr) && e.args[1].head === :static_parameter
             spidx = (e.args[1]::Expr).args[1]::Int
-            val = spvals[spidx]
+            val = sparam_vals[spidx]
             if !isa(val, TypeVar)
                 return true
             else
-                (_, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, true)
+                (_, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, true)
                 return tcheck_not
             end
-        elseif head === :cfunction && spvals_ssa === nothing
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
-            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], spsig, spvals)
+        elseif head === :cfunction && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
+            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], msig, sparam_vals)
             e.args[4] = svec(Any[
-                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                 for argt in e.args[4]::SimpleVector ]...)
-        elseif head === :foreigncall && spvals_ssa === nothing
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
+        elseif head === :foreigncall && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
             for i = 1:length(e.args)
                 if i == 2
-                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
+                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], msig, sparam_vals)
                 elseif i == 3
                     e.args[3] = svec(Any[
-                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                         for argt in e.args[3]::SimpleVector ]...)
                 end
             end
-        elseif head === :boundscheck
-            if boundscheck === :off # inbounds == true
-                return false
-            elseif boundscheck === :propagate
-                return e
-            else # on or default
-                return true
-            end
         end
     end
-    isa(val, Union{SSAValue, NewSSAValue}) && return val # avoid infinite loop
+    isa(val, AnySSAValue) && return val # avoid infinite loop
     urs = userefs(val)
     for op in urs
-        op[] = ssa_substitute_op!(insert_node!, subst_inst, op[], arg_replacements, spsig, spvals, spvals_ssa, boundscheck)
+        op[] = ssa_substitute_op!(insert_node!, subst_inst, op[], ssa_substitute)
     end
     return urs[]
 end
diff --git a/base/compiler/ssair/ir.jl b/Compiler/src/ssair/ir.jl
similarity index 73%
rename from base/compiler/ssair/ir.jl
rename to Compiler/src/ssair/ir.jl
index 5c6751c1e1dda..9103dba04fa54 100644
--- a/base/compiler/ssair/ir.jl
+++ b/Compiler/src/ssair/ir.jl
@@ -1,8 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-Core.PhiNode() = Core.PhiNode(Int32[], Any[])
-
-isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) ||
+    isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
 
 struct CFG
     blocks::Vector{BasicBlock}
@@ -60,16 +59,18 @@ block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
             # This is a fake dest to force the next stmt to start a bb
             idx < length(stmts) && push!(jump_dests, idx+1)
             push!(jump_dests, stmt.label)
+        elseif isa(stmt, EnterNode)
+            # :enter starts/ends a BB
+            push!(jump_dests, idx)
+            push!(jump_dests, idx+1)
+            # The catch block is a jump dest
+            if stmt.catch_dest != 0
+                push!(jump_dests, stmt.catch_dest)
+            end
         elseif isa(stmt, Expr)
             if stmt.head === :leave
                 # :leave terminates a BB
                 push!(jump_dests, idx+1)
-            elseif stmt.head === :enter
-                # :enter starts/ends a BB
-                push!(jump_dests, idx)
-                push!(jump_dests, idx+1)
-                # The catch block is a jump dest
-                push!(jump_dests, stmt.args[1]::Int)
             end
         end
         if isa(stmt, PhiNode)
@@ -80,7 +81,7 @@ block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
             end
         end
     end
-    # and add add one more basic block start after the last statement
+    # and add one more basic block start after the last statement
     for i = length(stmts):-1:1
         if stmts[i] !== nothing
             push!(jump_dests, i+1)
@@ -125,14 +126,16 @@ function compute_basic_blocks(stmts::Vector{Any})
                 push!(blocks[block′].preds, num)
                 push!(b.succs, block′)
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             # :enter gets a virtual edge to the exception handler and
             # the exception handler gets a virtual edge from outside
             # the function.
-            block′ = block_for_inst(basic_block_index, terminator.args[1]::Int)
-            push!(blocks[block′].preds, num)
-            push!(blocks[block′].preds, 0)
-            push!(b.succs, block′)
+            if terminator.catch_dest != 0
+                block′ = block_for_inst(basic_block_index, terminator.catch_dest)
+                push!(blocks[block′].preds, num)
+                push!(blocks[block′].preds, 0)
+                push!(b.succs, block′)
+            end
         end
         # statement fall-through
         if num + 1 <= length(blocks)
@@ -144,16 +147,78 @@ function compute_basic_blocks(stmts::Vector{Any})
 end
 
 # this function assumes insert position exists
+function is_valid_phiblock_stmt(@nospecialize(stmt))
+    isa(stmt, PhiNode) && return true
+    isa(stmt, Union{UpsilonNode, PhiCNode, SSAValue}) && return false
+    isa(stmt, Expr) && return is_value_pos_expr_head(stmt.head)
+    return true
+end
+
 function first_insert_for_bb(code::Vector{Any}, cfg::CFG, block::Int)
-    for idx in cfg.blocks[block].stmts
+    stmts = cfg.blocks[block].stmts
+    lastnonphiidx = first(stmts)
+    for idx in stmts
         stmt = code[idx]
         if !isa(stmt, PhiNode)
-            return idx
+            if !is_valid_phiblock_stmt(stmt)
+                return lastnonphiidx
+            end
+        else
+            lastnonphiidx = idx + 1
+        end
+    end
+    if lastnonphiidx > last(stmts)
+        error("any insert position isn't found")
+    end
+    return lastnonphiidx
+end
+
+# mutable version of the compressed DebugInfo
+mutable struct DebugInfoStream
+    def::Union{MethodInstance,Symbol,Nothing}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by having an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+    function DebugInfoStream(codelocs::Vector{Int32})
+        return new(nothing, nothing, DebugInfo[], 0, codelocs)
+    end
+    # DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int) =
+    #     if debuginfo_file1(di.def) === debuginfo_file1(di.def)
+    #         new(def, di.linetable, Core.svec(di.edges...), getdebugidx(di, 0),
+    #             ccall(:jl_uncompress_codelocs, Any, (Any, Int), di.codelocs, nstmts)::Vector{Int32})
+    #     else
+    function DebugInfoStream(def::Union{MethodInstance,Nothing}, di::DebugInfo, nstmts::Int)
+        codelocs = zeros(Int32, nstmts * 3)
+        for i = 1:nstmts
+            codelocs[3i - 2] = i
         end
+        return new(def, di, DebugInfo[], 0, codelocs)
     end
-    error("any insert position isn't found")
+    global copy(di::DebugInfoStream) = new(di.def, di.linetable, di.edges, di.firstline, di.codelocs)
 end
 
+Core.DebugInfo(di::DebugInfoStream, nstmts::Int) =
+    DebugInfo(something(di.def), di.linetable, Core.svec(di.edges...),
+        ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), di.firstline, di.codelocs, nstmts)::String)
+
+getdebugidx(debuginfo::DebugInfo, pc::Int) =
+    ccall(:jl_uncompress1_codeloc, NTuple{3,Int32}, (Any, Int), debuginfo.codelocs, pc)
+
+function getdebugidx(debuginfo::DebugInfoStream, pc::Int)
+    if 3 <= 3pc <= length(debuginfo.codelocs)
+        return (debuginfo.codelocs[3pc-2], debuginfo.codelocs[3pc-1], debuginfo.codelocs[3pc-0])
+    elseif pc == 0
+        return (Int32(debuginfo.firstline), Int32(0), Int32(0))
+    else
+        return (Int32(-1), Int32(0), Int32(0))
+    end
+end
+
+
 # SSA values that need renaming
 struct OldSSAValue
     id::Int
@@ -183,27 +248,30 @@ end
 
 const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 
-
 # SSA-indexed nodes
 struct InstructionStream
-    inst::Vector{Any}
+    stmt::Vector{Any}
     type::Vector{Any}
     info::Vector{CallInfo}
     line::Vector{Int32}
-    flag::Vector{UInt8}
+    flag::Vector{UInt32}
+    function InstructionStream(stmts::Vector{Any}, type::Vector{Any}, info::Vector{CallInfo}, line::Vector{Int32}, flag::Vector{UInt32})
+        return new(stmts, type, info, line, flag)
+    end
 end
 function InstructionStream(len::Int)
-    insts = Vector{Any}(undef, len)
+    stmts = Vector{Any}(undef, len)
     types = Vector{Any}(undef, len)
     info = Vector{CallInfo}(undef, len)
     fill!(info, NoCallInfo())
-    lines = fill(Int32(0), len)
+    lines = fill(Int32(0), 3len)
     flags = fill(IR_FLAG_NULL, len)
-    return InstructionStream(insts, types, info, lines, flags)
+    return InstructionStream(stmts, types, info, lines, flags)
 end
 InstructionStream() = InstructionStream(0)
-length(is::InstructionStream) = length(is.inst)
-isempty(is::InstructionStream) = isempty(is.inst)
+length(is::InstructionStream) = length(is.stmt)
+iterate(is::Compiler.InstructionStream, st::Int=1) = (st <= Compiler.length(is)) ? (is[st], st + 1) : nothing
+isempty(is::InstructionStream) = isempty(is.stmt)
 function add_new_idx!(is::InstructionStream)
     ninst = length(is) + 1
     resize!(is, ninst)
@@ -211,7 +279,7 @@ function add_new_idx!(is::InstructionStream)
 end
 function copy(is::InstructionStream)
     return InstructionStream(
-        copy_exprargs(is.inst),
+        copy_exprargs(is.stmt),
         copy(is.type),
         copy(is.info),
         copy(is.line),
@@ -219,13 +287,13 @@ function copy(is::InstructionStream)
 end
 function resize!(stmts::InstructionStream, len)
     old_length = length(stmts)
-    resize!(stmts.inst, len)
+    resize!(stmts.stmt, len)
     resize!(stmts.type, len)
     resize!(stmts.info, len)
-    resize!(stmts.line, len)
+    resize!(stmts.line, 3len)
     resize!(stmts.flag, len)
     for i in (old_length + 1):len
-        stmts.line[i] = 0
+        stmts.line[3i-2], stmts.line[3i-1], stmts.line[3i] = NoLineUpdate
         stmts.flag[i] = IR_FLAG_NULL
         stmts.info[i] = NoCallInfo()
     end
@@ -239,25 +307,37 @@ end
 Instruction(is::InstructionStream) = Instruction(is, add_new_idx!(is))
 
 @inline function getindex(node::Instruction, fld::Symbol)
+    (fld === :inst) && (fld = :stmt) # deprecated
     isdefined(node, fld) && return getfield(node, fld)
-    return getfield(getfield(node, :data), fld)[getfield(node, :idx)]
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    (fld === :line) && return (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0])
+    (1 ≤ fldidx ≤ length(fldarray)) || throw(InvalidIRError())
+    return fldarray[fldidx]
 end
 @inline function setindex!(node::Instruction, @nospecialize(val), fld::Symbol)
-    getfield(getfield(node, :data), fld)[getfield(node, :idx)] = val
+    (fld === :inst) && (fld = :stmt) # deprecated
+    fldarray = getfield(getfield(node, :data), fld)
+    fldidx = getfield(node, :idx)
+    if fld === :line
+        (fldarray[3fldidx-2], fldarray[3fldidx-1], fldarray[3fldidx-0]) = val::NTuple{3,Int32}
+    else
+        fldarray[fldidx] = val
+    end
     return node
 end
 
 @inline getindex(is::InstructionStream, idx::Int) = Instruction(is, idx)
 function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
-    is.inst[idx] = newval[:inst]
+    is.stmt[idx] = newval[:stmt]
     is.type[idx] = newval[:type]
     is.info[idx] = newval[:info]
-    is.line[idx] = newval[:line]
+    (is.line[3idx-2], is.line[3idx-1], is.line[3idx-0]) = newval[:line]
     is.flag[idx] = newval[:flag]
     return is
 end
 function setindex!(is::InstructionStream, newval::Union{AnySSAValue, Nothing}, idx::Int)
-    is.inst[idx] = newval
+    is.stmt[idx] = newval
     return is
 end
 function setindex!(node::Instruction, newval::Instruction)
@@ -265,6 +345,10 @@ function setindex!(node::Instruction, newval::Instruction)
     return node
 end
 
+has_flag(inst::Instruction, flag::UInt32) = has_flag(inst[:flag], flag)
+add_flag!(inst::Instruction, flag::UInt32) = inst[:flag] |= flag
+sub_flag!(inst::Instruction, flag::UInt32) = inst[:flag] &= ~flag
+
 struct NewNodeInfo
     # Insertion position (interpretation depends on which array this is in)
     pos::Int
@@ -288,14 +372,15 @@ struct NewInstruction
     stmt::Any
     type::Any
     info::CallInfo
-    line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location
-    flag::Union{UInt8,Nothing} # if nothing, IR flags will be recomputed on insertion
+    line::Union{NTuple{3,Int32},Nothing} # if nothing, copy the line from previous statement in the insertion location
+    flag::Union{UInt32,Nothing} # if nothing, IR flags will be recomputed on insertion
     function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
-                            line::Union{Int32,Nothing}, flag::Union{UInt8,Nothing})
+                            line::Union{NTuple{3,Int32},Int32,Nothing}, flag::Union{UInt32,Nothing})
+        line isa Int32 && (line = (line, zero(Int32), zero(Int32)))
         return new(stmt, type, info, line, flag)
     end
 end
-function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Int32,Nothing}=nothing)
+function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{NTuple{3,Int32},Int32,Nothing}=nothing)
     return NewInstruction(stmt, type, NoCallInfo(), line, nothing)
 end
 @nospecialize
@@ -303,50 +388,67 @@ function NewInstruction(newinst::NewInstruction;
     stmt::Any=newinst.stmt,
     type::Any=newinst.type,
     info::CallInfo=newinst.info,
-    line::Union{Int32,Nothing}=newinst.line,
-    flag::Union{UInt8,Nothing}=newinst.flag)
+    line::Union{NTuple{3,Int32},Int32,Nothing}=newinst.line,
+    flag::Union{UInt32,Nothing}=newinst.flag)
     return NewInstruction(stmt, type, info, line, flag)
 end
 function NewInstruction(inst::Instruction;
-    stmt::Any=inst[:inst],
+    stmt::Any=inst[:stmt],
     type::Any=inst[:type],
     info::CallInfo=inst[:info],
-    line::Union{Int32,Nothing}=inst[:line],
-    flag::Union{UInt8,Nothing}=inst[:flag])
+    line::Union{NTuple{3,Int32},Int32,Nothing}=inst[:line],
+    flag::Union{UInt32,Nothing}=inst[:flag])
     return NewInstruction(stmt, type, info, line, flag)
 end
 @specialize
-effect_free_and_nothrow(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW))
-with_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=add_flag(newinst, flags))
-without_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=sub_flag(newinst, flags))
-function add_flag(newinst::NewInstruction, newflag::UInt8)
+removable_if_unused(newinst::NewInstruction) = add_flag(newinst, IR_FLAGS_REMOVABLE)
+function add_flag(newinst::NewInstruction, newflag::UInt32)
     flag = newinst.flag
-    flag === nothing && return newflag
-    return flag | newflag
+    if flag === nothing
+        flag = newflag
+    else
+        flag |= newflag
+    end
+    return NewInstruction(newinst; flag)
 end
-function sub_flag(newinst::NewInstruction, newflag::UInt8)
+function sub_flag(newinst::NewInstruction, newflag::UInt32)
     flag = newinst.flag
-    flag === nothing && return IR_FLAG_NULL
-    return flag & ~newflag
+    if flag === nothing
+        flag = IR_FLAG_NULL
+    else
+        flag &= ~newflag
+    end
+    return NewInstruction(newinst; flag)
 end
 
 struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
     sptypes::Vector{VarState}
-    linetable::Vector{LineInfoNode}
+    debuginfo::DebugInfoStream
     cfg::CFG
     new_nodes::NewNodeStream
     meta::Vector{Expr}
+    valid_worlds::WorldRange
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
-        return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
+    function IRCode(stmts::InstructionStream, cfg::CFG, debuginfo::DebugInfoStream,
+                    argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState},
+                    valid_worlds=WorldRange(typemin(UInt), typemax(UInt)))
+        return new(stmts, argtypes, sptypes, debuginfo, cfg, NewNodeStream(), meta)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
-        return new(stmts, ir.argtypes, ir.sptypes, ir.linetable, cfg, new_nodes, ir.meta)
+        di = ir.debuginfo
+        @assert di.codelocs === stmts.line
+        return new(stmts, ir.argtypes, ir.sptypes, di, cfg, new_nodes, ir.meta, ir.valid_worlds)
+    end
+    global function copy(ir::IRCode)
+        di = ir.debuginfo
+        stmts = copy(ir.stmts)
+        di = copy(di)
+        di.edges = copy(di.edges)
+        di.codelocs = stmts.line
+        return new(stmts, copy(ir.argtypes), copy(ir.sptypes), di, copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta), ir.valid_worlds)
     end
-    global copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
-        copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
 """
@@ -357,26 +459,42 @@ for debugging and unit testing of IRCode APIs. The compiler itself should genera
 from the frontend or one of the caches.
 """
 function IRCode()
-    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], VarState[])
-    ir[SSAValue(1)][:inst] = ReturnNode(nothing)
+    stmts = InstructionStream(1)
+    debuginfo = DebugInfoStream(stmts.line)
+    stmts.line[1] = 1
+    ir = IRCode(stmts, CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), debuginfo, Any[], Expr[], VarState[])
+    ir[SSAValue(1)][:stmt] = ReturnNode(nothing)
     ir[SSAValue(1)][:type] = Nothing
     ir[SSAValue(1)][:flag] = 0x00
-    ir[SSAValue(1)][:line] = Int32(0)
+    ir[SSAValue(1)][:line] = NoLineUpdate
     return ir
 end
 
+construct_domtree(ir::IRCode) = construct_domtree(ir.cfg)
+construct_domtree(cfg::CFG) = construct_domtree(cfg.blocks)
+
+construct_postdomtree(ir::IRCode) = construct_postdomtree(ir.cfg)
+construct_postdomtree(cfg::CFG) = construct_postdomtree(cfg.blocks)
+
 function block_for_inst(ir::IRCode, inst::Int)
     if inst > length(ir.stmts)
         inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
     end
     block_for_inst(ir.cfg, inst)
 end
+block_for_inst(ir::IRCode, ssa::SSAValue) = block_for_inst(ir, ssa.id)
 
-function getindex(x::IRCode, s::SSAValue)
-    if s.id <= length(x.stmts)
-        return x.stmts[s.id]
+function getindex(ir::IRCode, s::SSAValue)
+    id = s.id
+    (id ≥ 1) || throw(InvalidIRError())
+    nstmts = length(ir.stmts)
+    if id <= nstmts
+        return ir.stmts[id]
     else
-        return x.new_nodes.stmts[s.id - length(x.stmts)]
+        id -= nstmts
+        stmts = ir.new_nodes.stmts
+        (id ≤ length(stmts)) || throw(InvalidIRError())
+        return stmts[id]
     end
 end
 
@@ -427,11 +545,15 @@ struct UndefToken end; const UNDEF_TOKEN = UndefToken()
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
+    elseif isa(stmt, EnterNode)
+        isdefined(stmt, :scope) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
+        return stmt.scope
     elseif isa(stmt, PiNode)
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
-    elseif isa(stmt, Union{SSAValue, NewSSAValue, GlobalRef})
+    elseif isa(stmt, Union{AnySSAValue, GlobalRef})
         op == 1 || return OOB_TOKEN
         return stmt
     elseif isa(stmt, UpsilonNode)
@@ -459,6 +581,7 @@ function is_relevant_expr(e::Expr)
                       :foreigncall, :isdefined, :copyast,
                       :throw_undef_if_not,
                       :cfunction, :method, :pop_exception,
+                      :leave,
                       :new_opaque_closure)
 end
 
@@ -482,16 +605,19 @@ end
         stmt = GotoIfNot(v, stmt.dest)
     elseif isa(stmt, ReturnNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
-    elseif isa(stmt, Union{SSAValue, NewSSAValue, GlobalRef})
+        stmt = ReturnNode(v)
+    elseif isa(stmt, EnterNode)
+        op == 1 || throw(BoundsError())
+        stmt = EnterNode(stmt.catch_dest, v)
+    elseif isa(stmt, Union{AnySSAValue, GlobalRef})
         op == 1 || throw(BoundsError())
         stmt = v
     elseif isa(stmt, UpsilonNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
+        stmt = UpsilonNode(v)
     elseif isa(stmt, PiNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v, stmt.typ)
+        stmt = PiNode(v, stmt.typ)
     elseif isa(stmt, PhiNode)
         op > length(stmt.values) && throw(BoundsError())
         isassigned(stmt.values, op) || throw(BoundsError())
@@ -513,8 +639,8 @@ end
 
 function userefs(@nospecialize(x))
     relevant = (isa(x, Expr) && is_relevant_expr(x)) ||
-        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, NewSSAValue) ||
-        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
+        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, OldSSAValue) || isa(x, NewSSAValue) ||
+        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode) || isa(x, EnterNode)
     return UseRefIterator(x, relevant)
 end
 
@@ -554,7 +680,7 @@ function insert_node!(ir::IRCode, pos::SSAValue, newinst::NewInstruction, attach
     end
     node = add_inst!(ir.new_nodes, posid, attach_after)
     newline = something(newinst.line, ir[pos][:line])
-    newflag = recompute_inst_flag(newinst, ir)
+    newflag = recompute_newinst_flag(newinst, ir)
     node = inst_from_newinst!(node, newinst, newline, newflag)
     return SSAValue(length(ir.stmts) + node.idx)
 end
@@ -567,6 +693,7 @@ struct CFGTransformState
     result_bbs::Vector{BasicBlock}
     bb_rename_pred::Vector{Int}
     bb_rename_succ::Vector{Int}
+    domtree::Union{Nothing, DomTree}
 end
 
 # N.B.: Takes ownership of the CFG array
@@ -602,11 +729,14 @@ function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bo
         let blocks = blocks, bb_rename = bb_rename
             result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
         end
+        # TODO: This could be done by just renaming the domtree
+        domtree = construct_domtree(result_bbs)
     else
         bb_rename = Vector{Int}()
         result_bbs = blocks
+        domtree = nothing
     end
-    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename)
+    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename, domtree)
 end
 
 mutable struct IncrementalCompact
@@ -639,6 +769,7 @@ mutable struct IncrementalCompact
         perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i))
         new_len = length(code.stmts) + length(info)
         result = InstructionStream(new_len)
+        code.debuginfo.codelocs = result.line
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
@@ -661,7 +792,7 @@ mutable struct IncrementalCompact
         bb_rename = Vector{Int}()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename),
+        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename, nothing),
             ssa_rename, parent.used_ssas,
             parent.late_fixup, perm, 1,
             parent.new_new_nodes, parent.new_new_used_ssas, pending_nodes, pending_perm,
@@ -679,12 +810,13 @@ end
 types(ir::Union{IRCode, IncrementalCompact}) = TypesView(ir)
 
 function getindex(compact::IncrementalCompact, ssa::SSAValue)
-    @assert ssa.id < compact.result_idx
+    (1 ≤ ssa.id ≤ compact.result_idx) || throw(InvalidIRError())
     return compact.result[ssa.id]
 end
 
 function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
     id = ssa.id
+    (id ≥ 1) || throw(InvalidIRError())
     if id < compact.idx
         new_idx = compact.ssa_rename[id]::Int
         return compact.result[new_idx]
@@ -696,12 +828,15 @@ function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
         return compact.ir.new_nodes.stmts[id]
     end
     id -= length(compact.ir.new_nodes)
+    (id ≤ length(compact.pending_nodes.stmts)) || throw(InvalidIRError())
     return compact.pending_nodes.stmts[id]
 end
 
 function getindex(compact::IncrementalCompact, ssa::NewSSAValue)
     if ssa.id < 0
-        return compact.new_new_nodes.stmts[-ssa.id]
+        stmts = compact.new_new_nodes.stmts
+        (-ssa.id ≤ length(stmts)) || throw(InvalidIRError())
+        return stmts[-ssa.id]
     else
         return compact[SSAValue(ssa.id)]
     end
@@ -740,6 +875,16 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
     xb = block_for_inst(compact, x)
     yb = block_for_inst(compact, y)
     if xb == yb
+        if isa(compact[x][:stmt], PhiNode)
+            if isa(compact[y][:stmt], PhiNode)
+                # A node dominates another only if it dominates all uses of that note.
+                # Usually that is not a distinction. However, for phi nodes, the use
+                # occurs on the edge to the predecessor block. Thus, by definition, for
+                # any other PhiNode in the same BB there must be (at least) one edge
+                # that this phi node does not dominate.
+                return false
+            end
+        end
         xinfo = yinfo = nothing
         if isa(x, OldSSAValue)
             x′ = compact.ssa_rename[x.id]::SSAValue
@@ -765,7 +910,7 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
         else
             y′ = y
         end
-        if x′.id == y′.id && (xinfo !== nothing || yinfo !== nothing)
+        if x′.id == y′.id
             if xinfo !== nothing && yinfo !== nothing
                 if xinfo.attach_after == yinfo.attach_after
                     return x.id < y.id
@@ -773,8 +918,8 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
                 return yinfo.attach_after
             elseif xinfo !== nothing
                 return !xinfo.attach_after
-            else
-                return (yinfo::NewNodeInfo).attach_after
+            elseif yinfo !== nothing
+                return yinfo.attach_after
             end
         end
         return x′.id < y′.id
@@ -809,8 +954,8 @@ function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
 end
 
 function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
-    newline::Int32=newinst.line::Int32, newflag::UInt8=newinst.flag::UInt8)
-    node[:inst] = newinst.stmt
+    newline::NTuple{3,Int32}=newinst.line::NTuple{3,Int32}, newflag::UInt32=newinst.flag::UInt32)
+    node[:stmt] = newinst.stmt
     node[:type] = newinst.type
     node[:info] = newinst.info
     node[:line] = newline
@@ -818,25 +963,14 @@ function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
     return node
 end
 
-function recompute_inst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
+function recompute_newinst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
     flag = newinst.flag
     flag !== nothing && return flag
-    flag = IR_FLAG_NULL
-    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(
-        fallback_lattice, newinst.stmt, newinst.type, src)
-    if consistent
-        flag |= IR_FLAG_CONSISTENT
-    end
-    if effect_free_and_nothrow
-        flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    elseif nothrow
-        flag |= IR_FLAG_NOTHROW
-    end
-    return flag
+    return recompute_effects_flags(fallback_lattice, newinst.stmt, newinst.type, src)
 end
 
 function insert_node!(compact::IncrementalCompact, @nospecialize(before), newinst::NewInstruction, attach_after::Bool=false)
-    newflag = recompute_inst_flag(newinst, compact)
+    newflag = recompute_newinst_flag(newinst, compact)
     if isa(before, SSAValue)
         if before.id < compact.result_idx
             count_added_node!(compact, newinst.stmt)
@@ -879,6 +1013,9 @@ function insert_node!(compact::IncrementalCompact, @nospecialize(before), newins
             return os
         end
     elseif isa(before, NewSSAValue)
+        # As above, new_new_nodes must get counted. We don't visit them during our compact,
+        # so they're immediately considered reified.
+        count_added_node!(compact, newinst.stmt)
         # TODO: This is incorrect and does not maintain ordering among the new nodes
         before_entry = compact.new_new_nodes.info[-before.id]
         newline = something(newinst.line, compact.new_new_nodes.stmts[-before.id][:line])
@@ -891,22 +1028,33 @@ function insert_node!(compact::IncrementalCompact, @nospecialize(before), newins
     end
 end
 
-function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
-    newline = newinst.line::Int32
-    refinish = false
+function did_just_finish_bb(compact)
     result_idx = compact.result_idx
     result_bbs = compact.cfg_transform.result_bbs
-    if reverse_affinity &&
-            ((compact.active_result_bb == length(result_bbs) + 1) ||
-             result_idx == first(result_bbs[compact.active_result_bb].stmts))
+    (compact.active_result_bb == length(result_bbs) + 1) ||
+    (result_idx == first(result_bbs[compact.active_result_bb].stmts) &&
+     compact.active_result_bb != 1)
+end
+
+function maybe_reopen_bb!(compact)
+    if did_just_finish_bb(compact)
         compact.active_result_bb -= 1
-        refinish = true
+        return true
     end
+    return false
+end
+
+function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
+    newline = newinst.line::NTuple{3,Int32}
+    refinish = false
+    result_idx = compact.result_idx
+    result_bbs = compact.cfg_transform.result_bbs
+    refinish = reverse_affinity && maybe_reopen_bb!(compact)
     if result_idx > length(compact.result)
         @assert result_idx == length(compact.result) + 1
         resize!(compact, result_idx)
     end
-    newflag = recompute_inst_flag(newinst, compact)
+    newflag = recompute_newinst_flag(newinst, compact)
     node = inst_from_newinst!(compact.result[result_idx], newinst, newline, newflag)
     count_added_node!(compact, newinst.stmt) && push!(compact.late_fixup, result_idx)
     compact.result_idx = result_idx + 1
@@ -915,10 +1063,26 @@ function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction,
     return inst
 end
 
+function delete_inst_here!(compact::IncrementalCompact)
+    # If we already closed this bb, reopen it for our modification
+    refinish = maybe_reopen_bb!(compact)
+
+    # Delete the statement, update refcounts etc
+    compact[SSAValue(compact.result_idx-1)] = nothing
+
+    # Pretend that we never compacted this statement in the first place
+    compact.result_idx -= 1
+
+    refinish && finish_current_bb!(compact, 0)
+
+    return nothing
+end
+
 function getindex(view::TypesView, v::OldSSAValue)
     id = v.id
     ir = view.ir.ir
     stmts = ir.stmts
+    (id ≥ 1) || throw(InvalidIRError())
     if id <= length(stmts)
         return stmts[id][:type]
     end
@@ -927,7 +1091,9 @@ function getindex(view::TypesView, v::OldSSAValue)
         return ir.new_nodes.stmts[id][:type]
     end
     id -= length(ir.new_nodes)
-    return view.ir.pending_nodes.stmts[id][:type]
+    stmts = view.ir.pending_nodes.stmts
+    (id ≤ length(stmts)) || throw(InvalidIRError())
+    return stmts[id][:type]
 end
 
 function kill_current_use!(compact::IncrementalCompact, @nospecialize(val))
@@ -947,14 +1113,13 @@ function kill_current_uses!(compact::IncrementalCompact, @nospecialize(stmt))
     end
 end
 
-function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
-    @assert idx.id < compact.result_idx
-    (compact.result[idx.id][:inst] === v) && return compact
+function setindex!(compact::IncrementalCompact, @nospecialize(v), ssa::Union{SSAValue, NewSSAValue})
+    (compact[ssa][:stmt] === v) && return compact
     # Kill count for current uses
-    kill_current_uses!(compact, compact.result[idx.id][:inst])
-    compact.result[idx.id][:inst] = v
+    kill_current_uses!(compact, compact[ssa][:stmt])
+    compact[ssa][:stmt] = v
     # Add count for new use
-    count_added_node!(compact, v) && push!(compact.late_fixup, idx.id)
+    count_added_node!(compact, v) && isa(ssa, SSAValue) && push!(compact.late_fixup, ssa.id)
     return compact
 end
 
@@ -962,22 +1127,22 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAVal
     id = idx.id
     if id < compact.idx
         new_idx = compact.ssa_rename[id]::Int
-        (compact.result[new_idx][:inst] === v) && return compact
-        kill_current_uses!(compact, compact.result[new_idx][:inst])
-        compact.result[new_idx][:inst] = v
+        (compact.result[new_idx][:stmt] === v) && return compact
+        kill_current_uses!(compact, compact.result[new_idx][:stmt])
+        compact.result[new_idx][:stmt] = v
         count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
         return compact
     elseif id <= length(compact.ir.stmts)  # ir.stmts, new_nodes, and pending_nodes uses aren't counted yet, so no need to adjust
-        compact.ir.stmts[id][:inst] = v
+        compact.ir.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.stmts)
     if id <= length(compact.ir.new_nodes)
-        compact.ir.new_nodes.stmts[id][:inst] = v
+        compact.ir.new_nodes.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.new_nodes)
-    compact.pending_nodes.stmts[id][:inst] = v
+    compact.pending_nodes.stmts[id][:stmt] = v
     return compact
 end
 
@@ -985,7 +1150,7 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::Int)
     if idx < compact.result_idx
         compact[SSAValue(idx)] = v
     else
-        compact.ir.stmts[idx][:inst] = v
+        compact.ir.stmts[idx][:stmt] = v
     end
     return compact
 end
@@ -997,7 +1162,7 @@ should_check_ssa_counts() = __check_ssa_counts__[]
 
 # specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
 function find_ssavalue_uses1(compact::IncrementalCompact)
-    body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst
+    body1, body2 = compact.result.stmt, compact.new_new_nodes.stmts.stmt
     nvals = length(compact.used_ssas)
     nvalsnew = length(compact.new_new_used_ssas)
     nbody1 = compact.result_idx
@@ -1031,7 +1196,7 @@ function find_ssavalue_uses1(compact::IncrementalCompact)
 end
 
 function _oracle_check(compact::IncrementalCompact)
-    (observed_used_ssas, observed_used_newssas) = Core.Compiler.find_ssavalue_uses1(compact)
+    (observed_used_ssas, observed_used_newssas) = find_ssavalue_uses1(compact)
     for i = 1:length(observed_used_ssas)
         if observed_used_ssas[i] != compact.used_ssas[i]
             return (observed_used_ssas, observed_used_newssas, SSAValue(i))
@@ -1055,20 +1220,27 @@ end
 
 getindex(view::TypesView, idx::SSAValue) = getindex(view, idx.id)
 function getindex(view::TypesView, idx::Int)
+    (idx ≥ 1) || throw(InvalidIRError())
     if isa(view.ir, IncrementalCompact) && idx < view.ir.result_idx
         return view.ir.result[idx][:type]
     elseif isa(view.ir, IncrementalCompact) && view.ir.renamed_new_nodes
         if idx <= length(view.ir.result)
             return view.ir.result[idx][:type]
         else
-            return view.ir.new_new_nodes.stmts[idx - length(view.ir.result)][:type]
+            idx -= length(view.ir.result)
+            stmts = view.ir.new_new_nodes.stmts
+            (idx ≤ length(stmts)) || throw(InvalidIRError())
+            return stmts[idx][:type]
         end
     else
         ir = isa(view.ir, IncrementalCompact) ? view.ir.ir : view.ir
         if idx <= length(ir.stmts)
             return ir.stmts[idx][:type]
         else
-            return ir.new_nodes.stmts[idx - length(ir.stmts)][:type]
+            idx -= length(ir.stmts)
+            stmts = ir.new_nodes.stmts
+            (idx ≤ length(stmts)) || throw(InvalidIRError())
+            return stmts[idx][:type]
         end
     end
 end
@@ -1079,13 +1251,13 @@ end
 
 # N.B.: Don't make this <: Function to avoid ::Function deopt
 struct Refiner
-    result_flags::Vector{UInt8}
+    result_flags::Vector{UInt32}
     result_idx::Int
 end
 (this::Refiner)() = (this.result_flags[this.result_idx] |= IR_FLAG_REFINED; nothing)
 
 function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int},
-                                processed_idx::Int, result_idx::Int,
+                                already_inserted, result_idx::Int,
                                 ssa_rename::Vector{Any}, used_ssas::Vector{Int},
                                 new_new_used_ssas::Vector{Int},
                                 do_rename_ssa::Bool,
@@ -1096,7 +1268,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
         val = old_values[i]
         if isa(val, SSAValue)
             if do_rename_ssa
-                if val.id > processed_idx
+                if !already_inserted(i, OldSSAValue(val.id))
                     push!(late_fixup, result_idx)
                     val = OldSSAValue(val.id)
                 else
@@ -1106,7 +1278,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
                 used_ssas[val.id] += 1
             end
         elseif isa(val, OldSSAValue)
-            if val.id > processed_idx
+            if !already_inserted(i, val)
                 push!(late_fixup, result_idx)
             else
                 # Always renumber these. do_rename_ssa applies only to actual SSAValues
@@ -1114,13 +1286,15 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
             end
         elseif isa(val, NewSSAValue)
             if val.id < 0
-                push!(late_fixup, result_idx)
                 new_new_used_ssas[-val.id] += 1
             else
                 @assert do_rename_ssa
                 val = SSAValue(val.id)
             end
         end
+        if isa(val, NewSSAValue)
+            push!(late_fixup, result_idx)
+        end
         values[i] = val
     end
     return values
@@ -1141,6 +1315,9 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In
     end
     if isa(val, SSAValue)
         used_ssas[val.id] += 1
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0
+        new_new_used_ssas[-val.id] += 1
     end
     return val
 end
@@ -1195,26 +1372,32 @@ end
 
 # N.B.: from and to are non-renamed indices
 function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int)
-    # Note: We recursively kill as many edges as are obviously dead. However, this
-    # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
-    # worstcase during codegen).
-    (; bb_rename_pred, bb_rename_succ, result_bbs) = compact.cfg_transform
+    # Note: We recursively kill as many edges as are obviously dead.
+    (; bb_rename_pred, bb_rename_succ, result_bbs, domtree) = compact.cfg_transform
     preds = result_bbs[bb_rename_succ[to]].preds
     succs = result_bbs[bb_rename_pred[from]].succs
     deleteat!(preds, findfirst(x::Int->x==bb_rename_pred[from], preds)::Int)
     deleteat!(succs, findfirst(x::Int->x==bb_rename_succ[to], succs)::Int)
+    if domtree !== nothing
+        domtree_delete_edge!(domtree, result_bbs, bb_rename_pred[from], bb_rename_succ[to])
+    end
     # Check if the block is now dead
-    if length(preds) == 0
-        for succ in copy(result_bbs[bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x::Int->x==succ, bb_rename_pred)::Int)
+    if length(preds) == 0 || (domtree !== nothing && bb_unreachable(domtree, bb_rename_succ[to]))
+        to_succs = result_bbs[bb_rename_succ[to]].succs
+        for succ in copy(to_succs)
+            new_succ = findfirst(x::Int->x==succ, bb_rename_pred)
+            new_succ === nothing && continue
+            kill_edge!(compact, active_bb, to, new_succ)
         end
+        empty!(preds)
+        empty!(to_succs)
         if to < active_bb
             # Kill all statements in the block
             stmts = result_bbs[bb_rename_succ[to]].stmts
             for stmt in stmts
-                compact.result[stmt][:inst] = nothing
+                compact.result[stmt][:stmt] = nothing
             end
-            compact.result[last(stmts)][:inst] = ReturnNode()
+            compact.result[last(stmts)][:stmt] = ReturnNode()
         else
             # Tell compaction to not schedule this block. A value of -2 here
             # indicates that the block is not to be scheduled, but there should
@@ -1230,7 +1413,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             stmts = result_bbs[bb_rename_succ[to]].stmts
             idx = first(stmts)
             while idx <= last(stmts)
-                stmt = compact.result[idx][:inst]
+                stmt = compact.result[idx][:stmt]
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
                 i = findfirst(x::Int32->x==bb_rename_pred[from], stmt.edges)
@@ -1243,8 +1426,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
         else
             stmts = compact.ir.cfg.blocks[to].stmts
             for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
-                stmt === nothing && continue
-                isa(stmt, PhiNode) || break
+                is_valid_phiblock_stmt(stmt) || break
+                isa(stmt, PhiNode) || continue
                 i = findfirst(x::Int32->x==from, stmt.edges)
                 if i !== nothing
                     deleteat!(stmt.edges, i)
@@ -1262,37 +1445,39 @@ struct Refined
 end
 
 function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool)
-    stmt = inst[:inst]
+    stmt = inst[:stmt]
     (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
     (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform
     mark_refined! = Refiner(result.flag, result_idx)
+    already_inserted_phi_arg = already_inserted_ssa(compact, processed_idx)
     if stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
+        stmt.label < 0 && (println(stmt); println(compact))
         label = bb_rename_succ[stmt.label]
         @assert label > 0
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = GotoNode(label)
+        result[result_idx][:stmt] = GotoNode(label)
         result_idx += 1
     elseif isa(stmt, GlobalRef)
         total_flags = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE
         flag = result[result_idx][:flag]
-        if (flag & total_flags) == total_flags
+        if has_flag(flag, total_flags)
             ssa_rename[idx] = stmt
         else
             ssa_rename[idx] = SSAValue(result_idx)
-            result[result_idx][:inst] = stmt
+            result[result_idx][:stmt] = stmt
             result_idx += 1
         end
     elseif isa(stmt, GotoNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled
         stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::GotoIfNot
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         cond = stmt.cond
         if fold_constant_branches
             if !isa(cond, Bool)
@@ -1304,14 +1489,14 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
             if cond
                 ssa_rename[idx] = nothing
-                result[result_idx][:inst] = nothing
+                result[result_idx][:stmt] = nothing
                 kill_edge!(compact, active_bb, active_bb, stmt.dest)
                 # Don't increment result_idx => Drop this statement
             else
                 label = bb_rename_succ[stmt.dest]
                 @assert label > 0
                 ssa_rename[idx] = SSAValue(result_idx)
-                result[result_idx][:inst] = GotoNode(label)
+                result[result_idx][:stmt] = GotoNode(label)
                 kill_edge!(compact, active_bb, active_bb, active_bb+1)
                 result_idx += 1
             end
@@ -1320,16 +1505,23 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             label = bb_rename_succ[stmt.dest]
             @assert label > 0
             ssa_rename[idx] = SSAValue(result_idx)
-            result[result_idx][:inst] = GotoIfNot(cond, label)
+            result[result_idx][:stmt] = GotoIfNot(cond, label)
             result_idx += 1
         end
+    elseif cfg_transforms_enabled && isa(stmt, EnterNode)
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::EnterNode
+        if stmt.catch_dest != 0
+            label = bb_rename_succ[stmt.catch_dest]
+            @assert label > 0
+            result[result_idx][:stmt] = EnterNode(stmt, label)
+        else
+            result[result_idx][:stmt] = stmt
+        end
+        ssa_rename[idx] = SSAValue(result_idx)
+        result_idx += 1
     elseif isa(stmt, Expr)
         stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::Expr
-        if cfg_transforms_enabled && isexpr(stmt, :enter)
-            label = bb_rename_succ[stmt.args[1]::Int]
-            @assert label > 0
-            stmt.args[1] = label
-        elseif isexpr(stmt, :throw_undef_if_not)
+        if isexpr(stmt, :throw_undef_if_not)
             cond = stmt.args[2]
             if isa(cond, Bool) && cond === true
                 # cond was folded to true - this statement
@@ -1337,9 +1529,29 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 ssa_rename[idx] = nothing
                 return result_idx
             end
+        elseif isexpr(stmt, :leave)
+            let i = 1
+                while i <= length(stmt.args)
+                    if stmt.args[i] === nothing
+                        deleteat!(stmt.args, i)
+                    else
+                        i += 1
+                    end
+                end
+            end
+            if isempty(stmt.args)
+                # This :leave is dead
+                ssa_rename[idx] = nothing
+                return result_idx
+            end
         end
-        ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        typ = inst[:type]
+        if isa(typ, Const) && is_inlineable_constant(typ.val)
+            ssa_rename[idx] = quoted(typ.val)
+        else
+            ssa_rename[idx] = SSAValue(result_idx)
+        end
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, PiNode)
         # As an optimization, we eliminate any trivial pinodes. For performance, we use ===
@@ -1359,18 +1571,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 return result_idx
             end
         elseif !isa(pi_val, AnySSAValue) && !isa(pi_val, GlobalRef)
-            valtyp = isa(pi_val, QuoteNode) ? typeof(pi_val.value) : typeof(pi_val)
-            if valtyp === stmt.typ
+            pi_val′ = isa(pi_val, QuoteNode) ? pi_val.value : pi_val
+            stmttyp = stmt.typ
+            if isa(stmttyp, Const) ? pi_val′ === stmttyp.val : typeof(pi_val′) === stmttyp
                 ssa_rename[idx] = pi_val
                 return result_idx
             end
         end
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         result_idx += 1
-    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot)
+    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot) || isa(stmt, EnterNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)
+        result[result_idx][:stmt] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)
         result_idx += 1
     elseif isa(stmt, PhiNode)
         # N.B.: For PhiNodes, this needs to be at the top, since PhiNodes
@@ -1379,7 +1592,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         if cfg_transforms_enabled
             # Rename phi node edges
             let bb_rename_pred=bb_rename_pred
-                map!(i::Int32->bb_rename_pred[i], stmt.edges, stmt.edges)
+                map!(i::Int32->i == 0 ? 0 : bb_rename_pred[i], stmt.edges, stmt.edges)
             end
 
             # Remove edges and values associated with dead blocks. Entries in
@@ -1413,18 +1626,16 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             values = stmt.values
         end
 
-        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
-        # Don't remove the phi node if it is before the definition of its value
-        # because doing so can create forward references. This should only
-        # happen with dead loops, but can cause problems when optimization
-        # passes look at all code, dead or not. This check should be
-        # unnecessary when DCE can remove those dead loops entirely, so this is
-        # just to be safe.
-        before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
-        if length(edges) == 1 && isassigned(values, 1) && !before_def &&
-                length(cfg_transforms_enabled ?
-                    result_bbs[bb_rename_succ[active_bb]].preds :
-                    compact.ir.cfg.blocks[active_bb].preds) == 1
+        values = process_phinode_values(values, late_fixup, already_inserted_phi_arg, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
+
+        # Quick egality check for PhiNode that may be replaced with its incoming
+        # value without needing to set the `Refined` flag. We can't do the actual
+        # refinement check, because we do not have access to the lattice here.
+        # Users may call `reprocess_phi_node!` inside the compaction loop to
+        # revisit PhiNodes with the proper lattice refinement check.
+        if may_replace_phi(values, cfg_transforms_enabled ?
+                result_bbs[bb_rename_succ[active_bb]] :
+                compact.ir.cfg.blocks[active_bb], idx) && argextype(values[1], compact) === inst[:type]
             # There's only one predecessor left - just replace it
             v = values[1]
             @assert !isa(v, NewSSAValue)
@@ -1433,12 +1644,26 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
             ssa_rename[idx] = v
         else
-            result[result_idx][:inst] = PhiNode(edges, values)
+            result[result_idx][:stmt] = PhiNode(edges, values)
             result_idx += 1
         end
     elseif isa(stmt, PhiCNode)
         ssa_rename[idx] = SSAValue(result_idx)
-        result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!))
+        values = stmt.values
+        if cfg_transforms_enabled
+            # Filter arguments that come from dead blocks
+            values = Any[]
+            for value in stmt.values
+                if isa(value, SSAValue)
+                    blk = block_for_inst(compact.ir.cfg, value.id)
+                    if bb_rename_pred[blk] < 0
+                        continue
+                    end
+                end
+                push!(values, value)
+            end
+        end
+        result[result_idx][:stmt] = PhiCNode(process_phinode_values(values, late_fixup, already_inserted_phi_arg, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!))
         result_idx += 1
     else
         if isa(stmt, SSAValue)
@@ -1451,7 +1676,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         else
             # Constant assign, replace uses of this ssa value with its result
         end
-        if (inst[:flag] & IR_FLAG_REFINED) != 0 && !isa(stmt, Refined)
+        if has_flag(inst, IR_FLAG_REFINED) && !isa(stmt, Refined)
             # If we're compacting away an instruction that was marked as refined,
             # leave a marker in the ssa_rename, so we can taint any users.
             stmt = Refined(stmt)
@@ -1461,6 +1686,38 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     return result_idx
 end
 
+function may_replace_phi(values::Vector{Any}, phi_bb::BasicBlock, idx::Int)
+    length(values) == 1 || return false
+    isassigned(values, 1) || return false
+    length(phi_bb.preds) == 1 || return false
+
+    # Don't remove the phi node if it is before the definition of its value
+    # because doing so can create forward references. This should only
+    # happen with dead loops, but can cause problems when optimization
+    # passes look at all code, dead or not. This check should be
+    # unnecessary when DCE can remove those dead loops entirely, so this is
+    # just to be safe.
+    v = values[1]
+    before_def = isa(v, OldSSAValue) && idx < v.id
+    return !before_def
+end
+
+function reprocess_phi_node!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, phi::PhiNode, old_idx::Int)
+    phi_bb = compact.active_result_bb
+    did_just_finish_bb(compact) && (phi_bb -= 1)
+    may_replace_phi(phi.values, compact.cfg_transform.result_bbs[phi_bb], compact.idx) || return false
+
+    # There's only one predecessor left - just replace it
+    v = phi.values[1]
+    if !⊑(𝕃ₒ, compact[compact.ssa_rename[old_idx]][:type], argextype(v, compact))
+        v = Refined(v)
+    end
+    compact.ssa_rename[old_idx] = v
+
+    delete_inst_here!(compact)
+    return true
+end
+
 function resize!(compact::IncrementalCompact, nnewnodes::Int)
     old_length = length(compact.result)
     resize!(compact.result, nnewnodes)
@@ -1471,6 +1728,8 @@ function resize!(compact::IncrementalCompact, nnewnodes::Int)
     return compact
 end
 
+const NoLineUpdate = (Int32(0), Int32(0), Int32(0))
+
 function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
                             old_result_idx::Int=compact.result_idx, unreachable::Bool=false)
     (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
@@ -1487,9 +1746,9 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
             length(compact.result) < old_result_idx && resize!(compact, old_result_idx)
             node = compact.result[old_result_idx]
             if unreachable
-                node[:inst], node[:type], node[:line] = ReturnNode(), Union{}, 0
+                node[:stmt], node[:type], node[:line] = ReturnNode(), Union{}, NoLineUpdate
             else
-                node[:inst], node[:type], node[:line] = nothing, Nothing, 0
+                node[:stmt], node[:type], node[:line], node[:flag] = nothing, Nothing, NoLineUpdate, IR_FLAGS_EFFECTS
             end
             compact.result_idx = old_result_idx + 1
         elseif cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
@@ -1508,10 +1767,20 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
     return skipped
 end
 
-function attach_after_stmt_after(compact::IncrementalCompact, idx::Int)
-    compact.new_nodes_idx > length(compact.perm) && return false
-    entry = compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]]
-    return entry.pos == idx && entry.attach_after
+"""
+    stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+Returns true if there are new/pending instructions enqueued for insertion into
+`compact` on any instruction in the range `1:idx`. Otherwise, returns false.
+"""
+function stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+    new_node_waiting = compact.new_nodes_idx <= length(compact.perm) &&
+        compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]].pos <= idx
+    pending_node_waiting = !isempty(compact.pending_perm) &&
+        compact.pending_nodes.info[compact.pending_perm[1]].pos <= idx
+
+    return new_node_waiting || pending_node_waiting
 end
 
 function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_entry::Instruction, new_node_info::NewNodeInfo, idx::Int, active_bb::Int, do_rename_ssa::Bool)
@@ -1523,7 +1792,7 @@ function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_en
     compact.result_idx = result_idx
     # If this instruction has reverse affinity and we were at the end of a basic block,
     # finish it now.
-    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !attach_after_stmt_after(compact, idx-1)
+    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !stmts_awaiting_insertion(compact, idx-1)
         active_bb += 1
         finish_current_bb!(compact, active_bb, old_result_idx)
     end
@@ -1534,13 +1803,27 @@ struct CompactPeekIterator
     compact::IncrementalCompact
     start_idx::Int
     end_idx::Int
+    include_stmts_before_start::Bool
 end
+CompactPeekIterator(compact::IncrementalCompact, start_idx::Int, end_idx::Int) =
+    CompactPeekIterator(compact, start_idx, end_idx, false)
+
 
 function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int)
     return CompactPeekIterator(compact, start_idx, 0)
 end
 
-entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx
+function entry_at_idx(entry::NewNodeInfo, idx::Int, start_idx::Int, include_stmts_before_start::Bool)
+    if entry.attach_after
+        if !include_stmts_before_start
+            entry.pos >= start_idx || return false
+        end
+        return entry.pos == idx - 1
+    else
+        return entry.pos == idx
+    end
+end
+
 function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1))
     if it.end_idx > 0 && idx > it.end_idx
         return nothing
@@ -1552,22 +1835,22 @@ function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.
     if compact.new_nodes_idx <= length(compact.perm)
         new_nodes = compact.ir.new_nodes
         for eidx in aidx:length(compact.perm)
-            if entry_at_idx(new_nodes.info[compact.perm[eidx]], idx)
+            if entry_at_idx(new_nodes.info[compact.perm[eidx]], idx, it.start_idx, it.include_stmts_before_start)
                 entry = new_nodes.stmts[compact.perm[eidx]]
-                return (entry[:inst], (idx, eidx+1, bidx))
+                return (entry[:stmt], (idx, eidx+1, bidx))
             end
         end
     end
     if !isempty(compact.pending_perm)
         for eidx in bidx:length(compact.pending_perm)
-            if entry_at_idx(compact.pending_nodes.info[compact.pending_perm[eidx]], idx)
+            if entry_at_idx(compact.pending_nodes.info[compact.pending_perm[eidx]], idx, it.start_idx, it.include_stmts_before_start)
                 entry = compact.pending_nodes.stmts[compact.pending_perm[eidx]]
-                return (entry[:inst], (idx, aidx, eidx+1))
+                return (entry[:stmt], (idx, aidx, eidx+1))
             end
         end
     end
     idx > length(compact.ir.stmts) && return nothing
-    return (compact.ir.stmts[idx][:inst], (idx + 1, aidx, bidx))
+    return (compact.ir.stmts[idx][:stmt], (idx + 1, aidx, bidx))
 end
 
 # the returned Union{Nothing, Pair{Pair{Int,Int},Any}} cannot be stack allocated,
@@ -1576,7 +1859,7 @@ end
     idxs = iterate_compact(compact)
     idxs === nothing && return nothing
     old_result_idx = idxs[2]
-    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:inst]), nothing
+    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:stmt]), nothing
 end
 
 function iterate_compact(compact::IncrementalCompact)
@@ -1653,7 +1936,7 @@ function iterate_compact(compact::IncrementalCompact)
     compact.result[old_result_idx] = compact.ir.stmts[idx]
     result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true)
     compact.result_idx = result_idx
-    if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx)
+    if idx == last(bb.stmts) && !stmts_awaiting_insertion(compact, idx)
         finish_current_bb!(compact, active_bb, old_result_idx)
         active_bb += 1
     end
@@ -1663,7 +1946,7 @@ function iterate_compact(compact::IncrementalCompact)
         idx += 1
         @goto restart
     end
-    @assert isassigned(compact.result.inst, old_result_idx)
+    @assert isassigned(compact.result.stmt, old_result_idx)
     return Pair{Int,Int}(compact.idx-1, old_result_idx)
 end
 
@@ -1673,11 +1956,13 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id
     in_worklist::Bool, extra_worklist::Vector{Int})
     nresult = length(compact.result)
     inst = idx ≤ nresult ? compact.result[idx] : compact.new_new_nodes.stmts[idx-nresult]
-    stmt = inst[:inst]
+    stmt = inst[:stmt]
     stmt === nothing && return false
     inst[:type] === Bottom && return false
-    effect_free = (inst[:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-    effect_free || return false
+    if !has_flag(inst, IR_FLAGS_REMOVABLE)
+        add_flag!(inst, IR_FLAG_UNUSED)
+        return false
+    end
     foreachssa(stmt) do val::SSAValue
         if compact.used_ssas[val.id] == 1
             if val.id < idx || in_worklist
@@ -1687,7 +1972,7 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id
         compact.used_ssas[val.id] -= 1
         callback(val)
     end
-    inst[:inst] = nothing
+    inst[:stmt] = nothing
     return true
 end
 
@@ -1725,14 +2010,22 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_
             return FixedNode(stmt, true)
         end
     elseif isa(stmt, OldSSAValue)
-        val = compact.ssa_rename[stmt.id]
-        if isa(val, Refined)
-            val = val.val
+        node = compact.ssa_rename[stmt.id]
+        if isa(node, Refined)
+            node = node.val
         end
-        if isa(val, SSAValue)
-            compact.used_ssas[val.id] += 1
+        needs_fixup = false
+        if isa(node, NewSSAValue)
+            (;node, needs_fixup) = fixup_node(compact, node, reify_new_nodes)
         end
-        return FixedNode(val, false)
+        if isa(node, SSAValue)
+            compact.used_ssas[node.id] += 1
+        elseif isa(node, NewSSAValue)
+            compact.new_new_used_ssas[-node.id] += 1
+        elseif isa(node, OldSSAValue)
+            return fixup_node(compact, node, reify_new_nodes)
+        end
+        return FixedNode(node, needs_fixup)
     else
         urs = userefs(stmt)
         fixup = false
@@ -1758,9 +2051,9 @@ function just_fixup!(compact::IncrementalCompact, new_new_nodes_offset::Union{In
     set_off = off
     for i in off:length(compact.late_fixup)
         idx = compact.late_fixup[i]
-        stmt = compact.result[idx][:inst]
+        stmt = compact.result[idx][:stmt]
         (;node, needs_fixup) = fixup_node(compact, stmt, late_fixup_offset === nothing)
-        (stmt === node) || (compact.result[idx][:inst] = node)
+        (stmt === node) || (compact.result[idx][:stmt] = node)
         if needs_fixup
             compact.late_fixup[set_off] = idx
             set_off += 1
@@ -1772,10 +2065,10 @@ function just_fixup!(compact::IncrementalCompact, new_new_nodes_offset::Union{In
     off = new_new_nodes_offset === nothing ? 1 : (new_new_nodes_offset+1)
     for idx in off:length(compact.new_new_nodes)
         new_node = compact.new_new_nodes.stmts[idx]
-        stmt = new_node[:inst]
+        stmt = new_node[:stmt]
         (;node) = fixup_node(compact, stmt, late_fixup_offset === nothing)
         if node !== stmt
-            new_node[:inst] = node
+            new_node[:stmt] = node
         end
     end
 end
@@ -1800,9 +2093,11 @@ function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
     just_fixup!(compact)
-    bb = compact.cfg_transform.result_bbs[end]
-    compact.cfg_transform.result_bbs[end] = BasicBlock(bb,
-                StmtRange(first(bb.stmts), result_idx-1))
+    if !did_just_finish_bb(compact)
+        # Finish the bb now
+        finish_current_bb!(compact, 0)
+    end
+    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
     compact.renamed_new_nodes = true
     nothing
 end
@@ -1814,7 +2109,7 @@ function finish(compact::IncrementalCompact)
 end
 
 function complete(compact::IncrementalCompact)
-    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
+    result_bbs = compact.cfg_transform.result_bbs
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
     if should_check_ssa_counts()
         oracle_check(compact)
@@ -1823,7 +2118,7 @@ function complete(compact::IncrementalCompact)
     # trim trailing undefined statements due to copy propagation
     nundef = 0
     for i in length(compact.result):-1:1
-        if isassigned(compact.result.inst, i)
+        if isassigned(compact.result.stmt, i)
             break
         end
         nundef += 1
diff --git a/Compiler/src/ssair/irinterp.jl b/Compiler/src/ssair/irinterp.jl
new file mode 100644
index 0000000000000..a4969e81828cc
--- /dev/null
+++ b/Compiler/src/ssair/irinterp.jl
@@ -0,0 +1,466 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
+    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion"
+    return typ
+end
+
+function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, argtypes::Vector{Any}, parent::IRInterpretationState)
+    world = frame_world(parent)
+    effects = decode_effects(ci.ipo_purity_bits)
+    if (is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1) &&
+        (is_nonoverlayed(interp) || is_nonoverlayed(effects)))
+        args = collect_const_args(argtypes, #=start=#1)
+        value = try
+            Core._call_in_world_total(world, args...)
+        catch
+            return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, is_noub(effects)))
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(Const(value), (true, true))
+    else
+        mi = ci.def
+        if is_constprop_edge_recursed(mi, parent)
+            return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+        end
+        newirsv = IRInterpretationState(interp, ci, mi, argtypes, world)
+        if newirsv !== nothing
+            assign_parentchild!(newirsv, parent)
+            return ir_abstract_constant_propagation(interp, newirsv)
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+    end
+end
+
+function abstract_eval_invoke_inst(interp::AbstractInterpreter, inst::Instruction, irsv::IRInterpretationState)
+    stmt = inst[:stmt]
+    ci = stmt.args[1]
+    if ci isa MethodInstance
+        world = frame_world(irsv)
+        mi_cache = WorldView(code_cache(interp), world)
+        code = get(mi_cache, ci, nothing)
+        code === nothing && return Pair{Any,Tuple{Bool,Bool}}(nothing, (false, false))
+    else
+        code = ci::CodeInstance
+    end
+    argtypes = collect_argtypes(interp, stmt.args[2:end], StatementState(nothing, false), irsv)
+    argtypes === nothing && return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, false))
+    return concrete_eval_invoke(interp, code, argtypes, irsv)
+end
+
+abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
+
+function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
+    return abstract_eval_phi(interp, phi, StatementState(nothing, false), irsv)
+end
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sstate::StatementState, irsv::IRInterpretationState)
+    si = StmtInfo(true, sstate.saw_latestworld) # TODO better job here?
+    call = abstract_call(interp, arginfo, si, irsv)::Future
+    Future{Any}(call, interp, irsv) do call, interp, irsv
+        irsv.ir.stmts[irsv.curridx][:info] = call.info
+        nothing
+    end
+    return call
+end
+
+function kill_block!(ir::IRCode, bb::Int)
+    # Kill the entire block
+    stmts = ir.cfg.blocks[bb].stmts
+    for bidx = stmts
+        inst = ir[SSAValue(bidx)]
+        inst[:stmt] = nothing
+        inst[:type] = Bottom
+        inst[:flag] = IR_FLAGS_REMOVABLE
+    end
+    ir[SSAValue(last(stmts))][:stmt] = ReturnNode()
+    return
+end
+kill_block!(ir::IRCode) = (bb::Int)->kill_block!(ir, bb)
+
+function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
+    ir = irsv.ir
+    if length(ir.cfg.blocks[to].preds) == 0
+        kill_block!(ir, to)
+    end
+    for sidx = ir.cfg.blocks[to].stmts
+        stmt = ir[SSAValue(sidx)][:stmt]
+        isa(stmt, Nothing) && continue # allowed between `PhiNode`s
+        isa(stmt, PhiNode) || break
+        for (eidx, edge) in enumerate(stmt.edges)
+            if edge == from
+                deleteat!(stmt.edges, eidx)
+                deleteat!(stmt.values, eidx)
+                push!(irsv.ssa_refined, sidx)
+                break
+            end
+        end
+    end
+end
+update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
+
+function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
+    ir = irsv.ir
+    stmt = ir[SSAValue(term_idx)][:stmt]
+    if isa(stmt, GotoIfNot)
+        kill_edge!(irsv, bb, stmt.dest)
+        kill_edge!(irsv, bb, bb+1)
+    elseif isa(stmt, GotoNode)
+        kill_edge!(irsv, bb, stmt.label)
+    elseif isa(stmt, ReturnNode)
+        # Nothing to do
+    else
+        @assert !isa(stmt, EnterNode)
+        kill_edge!(irsv, bb, bb+1)
+    end
+end
+
+function kill_edge!(irsv::IRInterpretationState, from::Int, to::Int)
+    kill_edge!(get!(irsv.lazyreachability), irsv.ir.cfg, from, to,
+               update_phi!(irsv), kill_block!(irsv.ir))
+end
+
+function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, idx::Int,
+                                bb::Union{Int,Nothing}, irsv::IRInterpretationState)
+    ir = irsv.ir
+    stmt = inst[:stmt]
+    if isa(stmt, GotoIfNot)
+        cond = stmt.cond
+        condval = maybe_extract_const_bool(argextype(cond, ir))
+        if condval isa Bool
+            if isa(cond, SSAValue)
+                kill_def_use!(irsv.tpdum, cond, idx)
+            end
+            if bb === nothing
+                bb = block_for_inst(ir, idx)
+            end
+            add_flag!(inst, IR_FLAG_NOTHROW)
+            if condval
+                inst[:stmt] = nothing
+                kill_edge!(irsv, bb, stmt.dest)
+            else
+                inst[:stmt] = GotoNode(stmt.dest)
+                kill_edge!(irsv, bb, bb+1)
+            end
+            return true
+        end
+        return false
+    end
+    rt = nothing
+    if isa(stmt, Expr)
+        head = stmt.head
+        if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
+            head === :static_parameter || head === :isdefined || head === :boundscheck)
+            @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now
+            result = abstract_eval_statement_expr(interp, stmt, StatementState(nothing, false), irsv)
+            reverse!(irsv.tasks)
+            while true
+                if length(irsv.callstack) > irsv.frameid
+                    typeinf(interp, irsv.callstack[irsv.frameid + 1])
+                elseif !doworkloop(interp, irsv)
+                    break
+                end
+            end
+            @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks)
+            result isa Future && (result = result[])
+            (; rt, effects) = result
+            add_flag!(inst, flags_for_effects(effects))
+        elseif head === :invoke  # COMBAK: || head === :invoke_modifyfield (similar to call, but for args[2:end])
+            rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
+            if nothrow
+                add_flag!(inst, IR_FLAG_NOTHROW)
+            end
+            if noub
+                add_flag!(inst, IR_FLAG_NOUB)
+            end
+        elseif head === :throw_undef_if_not
+            condval = maybe_extract_const_bool(argextype(stmt.args[2], ir))
+            condval isa Bool || return false
+            if condval
+                inst[:stmt] = nothing
+                # We simplified the IR, but we did not update the type
+                return false
+            end
+            rt = Union{}
+        elseif head === :gc_preserve_begin ||
+               head === :gc_preserve_end
+            return false
+        elseif head === :leave
+            return false
+        else
+            Core.println(stmt)
+            error("reprocess_instruction!: unhandled expression found")
+        end
+    elseif isa(stmt, PhiNode)
+        rt = abstract_eval_phi_stmt(interp, stmt, idx, irsv)
+    elseif isa(stmt, UpsilonNode)
+        rt = argextype(stmt.val, irsv.ir)
+    elseif isa(stmt, PhiCNode)
+        # Currently not modeled
+        return false
+    elseif isa(stmt, EnterNode)
+        # TODO: Propagate scope type changes
+        return false
+    elseif isa(stmt, ReturnNode)
+        # Handled at the very end
+        return false
+    elseif isa(stmt, PiNode)
+        rt = tmeet(typeinf_lattice(interp), argextype(stmt.val, ir), widenconst(stmt.typ))
+    elseif stmt === nothing
+        return false
+    elseif isa(stmt, GlobalRef)
+        # GlobalRef is not refinable
+    else
+        rt = argextype(stmt, irsv.ir)
+    end
+    if rt !== nothing
+        if has_flag(inst, IR_FLAG_UNUSED)
+            # Don't bother checking the type if we know it's unused
+            if has_flag(inst, IR_FLAGS_REMOVABLE)
+                inst[:stmt] = nothing
+            end
+            return false
+        end
+        if isa(rt, Const)
+            inst[:type] = rt
+            if is_inlineable_constant(rt.val) && has_flag(inst, IR_FLAGS_REMOVABLE)
+                inst[:stmt] = quoted(rt.val)
+            end
+            return true
+        elseif !⊑(typeinf_lattice(interp), inst[:type], rt)
+            inst[:type] = rt
+            return true
+        end
+    end
+    return false
+end
+
+# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
+function process_terminator!(@nospecialize(stmt), bb::Int, bb_ip::BitSetBoundedMinPrioritySet)
+    if isa(stmt, ReturnNode)
+        return false
+    elseif isa(stmt, GotoNode)
+        backedge = stmt.label <= bb
+        backedge || push!(bb_ip, stmt.label)
+        return backedge
+    elseif isa(stmt, GotoIfNot)
+        backedge = stmt.dest <= bb
+        backedge || push!(bb_ip, stmt.dest)
+        push!(bb_ip, bb+1)
+        return backedge
+    elseif isa(stmt, EnterNode)
+        dest = stmt.catch_dest
+        if dest ≠ 0
+            @assert dest > bb
+            push!(bb_ip, dest)
+        end
+        push!(bb_ip, bb+1)
+        return false
+    else
+        push!(bb_ip, bb+1)
+        return false
+    end
+end
+
+struct BBScanner
+    ir::IRCode
+    bb_ip::BitSetBoundedMinPrioritySet
+end
+
+function BBScanner(ir::IRCode)
+    bbs = ir.cfg.blocks
+    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
+    push!(bb_ip, 1)
+    return BBScanner(ir, bb_ip)
+end
+
+function scan!(callback, scanner::BBScanner, forwards_only::Bool)
+    (; bb_ip, ir) = scanner
+    bbs = ir.cfg.blocks
+    while !isempty(bb_ip)
+        bb = popfirst!(bb_ip)
+        stmts = bbs[bb].stmts
+        lstmt = last(stmts)
+        for idx = stmts
+            inst = ir[SSAValue(idx)]
+            ret = callback(inst, lstmt, bb)
+            ret === nothing && return true
+            ret::Bool || break
+            idx == lstmt && process_terminator!(inst[:stmt], bb, bb_ip) && forwards_only && return false
+        end
+    end
+    return true
+end
+
+function populate_def_use_map!(tpdum::TwoPhaseDefUseMap, scanner::BBScanner)
+    scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+        for ur in userefs(inst)
+            val = ur[]
+            if isa(val, SSAValue)
+                push!(tpdum[val.id], inst.idx)
+            end
+        end
+        return true
+    end
+end
+populate_def_use_map!(tpdum::TwoPhaseDefUseMap, ir::IRCode) =
+    populate_def_use_map!(tpdum, BBScanner(ir))
+
+function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irsv::IRInterpretationState)
+    isexpr(stmt, :call) || return false
+    @inbounds for i = 2:length(stmt.args)
+        argtype = abstract_eval_value(interp, stmt.args[i], StatementState(nothing, false), irsv)
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+        externally_refined::Union{Nothing,BitSet} = nothing)
+    (; ir, tpdum, ssa_refined) = irsv
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before irinterp"
+
+    all_rets = Int[]
+    scanner = BBScanner(ir)
+
+    check_ret!(@nospecialize(stmt), idx::Int) = isa(stmt, ReturnNode) && isdefined(stmt, :val) && push!(all_rets, idx)
+
+    # Fast path: Scan both use counts and refinement in one single pass of
+    #            of the instructions. In the absence of backedges, this will
+    #            converge.
+    completed_scan = scan!(scanner, true) do inst::Instruction, lstmt::Int, bb::Int
+        idx = inst.idx
+        irsv.curridx = idx
+        stmt = inst[:stmt]
+        typ = inst[:type]
+        flag = inst[:flag]
+        any_refined = false
+        if has_flag(flag, IR_FLAG_REFINED)
+            any_refined = true
+            sub_flag!(inst, IR_FLAG_REFINED)
+        elseif is_all_const_call(stmt, interp, irsv)
+            # force reinference on calls with all constant arguments
+            any_refined = true
+        end
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, Argument)
+                any_refined |= irsv.argtypes_refined[val.n]
+            elseif isa(val, SSAValue)
+                any_refined |= val.id in ssa_refined
+                count!(tpdum, val)
+            end
+        end
+        if isa(stmt, PhiNode) && idx in ssa_refined
+            any_refined = true
+            delete!(ssa_refined, idx)
+        end
+        check_ret!(stmt, idx)
+        is_terminator_or_phi = (isa(stmt, PhiNode) || stmt === nothing || isterminator(stmt))
+        if typ === Bottom && !(idx == lstmt && is_terminator_or_phi)
+            return true
+        end
+        if (any_refined && reprocess_instruction!(interp, inst, idx, bb, irsv)) ||
+            (externally_refined !== nothing && idx in externally_refined)
+            push!(ssa_refined, idx)
+            stmt = inst[:stmt]
+            typ = inst[:type]
+        end
+        if typ === Bottom && !is_terminator_or_phi
+            kill_terminator_edges!(irsv, lstmt, bb)
+            if idx != lstmt
+                for idx2 in (idx+1:lstmt-1)
+                    ir[SSAValue(idx2)] = nothing
+                end
+                ir[SSAValue(lstmt)][:stmt] = ReturnNode()
+            end
+            return false
+        end
+        return true
+    end
+
+    if !completed_scan
+        # Slow path
+        stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+
+        # Slow Path Phase 1.A: Complete use scanning
+        scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+            idx = inst.idx
+            irsv.curridx = idx
+            stmt = inst[:stmt]
+            flag = inst[:flag]
+            if has_flag(flag, IR_FLAG_REFINED)
+                sub_flag!(inst, IR_FLAG_REFINED)
+                push!(stmt_ip, idx)
+            end
+            check_ret!(stmt, idx)
+            for ur in userefs(stmt)
+                val = ur[]
+                if isa(val, Argument)
+                    if irsv.argtypes_refined[val.n]
+                        push!(stmt_ip, idx)
+                    end
+                elseif isa(val, SSAValue)
+                    count!(tpdum, val)
+                end
+            end
+            return true
+        end
+
+        # Slow Path Phase 1.B: Assemble def-use map
+        complete!(tpdum); push!(scanner.bb_ip, 1)
+        populate_def_use_map!(tpdum, scanner)
+
+        # Slow Path Phase 2: Use def-use map to converge cycles.
+        # TODO: It would be possible to return to the fast path after converging
+        #       each cycle, but that's somewhat complicated.
+        for val in ssa_refined
+            for use in tpdum[val]
+                if !(use in ssa_refined)
+                    push!(stmt_ip, use)
+                end
+            end
+        end
+        while !isempty(stmt_ip)
+            idx = popfirst!(stmt_ip)
+            irsv.curridx = idx
+            inst = ir[SSAValue(idx)]
+            if reprocess_instruction!(interp, inst, idx, nothing, irsv)
+                append!(stmt_ip, tpdum[idx])
+            end
+        end
+    end
+
+    ultimate_rt = Bottom
+    for idx in all_rets
+        bb = block_for_inst(ir.cfg, idx)
+        if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
+            # Could have discovered this block is dead after the initial scan
+            continue
+        end
+        inst = ir[SSAValue(idx)][:stmt]::ReturnNode
+        rt = argextype(inst.val, ir)
+        ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
+    end
+
+    nothrow = noub = true
+    for idx = 1:length(ir.stmts)
+        if ir[SSAValue(idx)][:stmt] === nothing
+            # skip `nothing` statement, which might be inserted as a dummy node,
+            # e.g. by `finish_current_bb!` without explicitly marking it as `:nothrow`
+            continue
+        end
+        flag = ir[SSAValue(idx)][:flag]
+        nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+        noub &= has_flag(flag, IR_FLAG_NOUB)
+        (nothrow | noub) || break
+    end
+
+    if irsv.frameid != 0
+        callstack = irsv.callstack::Vector{AbsIntState}
+        @assert callstack[end] === irsv && length(callstack) == irsv.frameid
+        pop!(callstack)
+    end
+
+    return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
+end
diff --git a/base/compiler/ssair/legacy.jl b/Compiler/src/ssair/legacy.jl
similarity index 68%
rename from base/compiler/ssair/legacy.jl
rename to Compiler/src/ssair/legacy.jl
index e2c924d60cb83..675ca2dea9b32 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/Compiler/src/ssair/legacy.jl
@@ -1,16 +1,22 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    inflate_ir!(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
+    inflate_ir!(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
     inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Inflates `ci::CodeInfo`-IR to `ir::IRCode`-format.
 This should be used with caution as it is a in-place transformation where the fields of
 the original `ci::CodeInfo` are modified.
 """
-function inflate_ir!(ci::CodeInfo, linfo::MethodInstance)
-    sptypes = sptypes_from_meth_instance(linfo)
-    argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
+function inflate_ir!(ci::CodeInfo, mi::MethodInstance)
+    sptypes = sptypes_from_meth_instance(mi)
+    if ci.slottypes === nothing
+        argtypes = va_process_argtypes(fallback_lattice,
+            matching_cache_argtypes(fallback_lattice, mi),
+            ci.nargs, ci.isva)
+    else
+        argtypes = ci.slottypes[1:ci.nargs]
+    end
     return inflate_ir!(ci, sptypes, argtypes)
 end
 function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any})
@@ -25,9 +31,8 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{A
             code[i] = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[block_for_inst(cfg, Int(edge)) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = block_for_inst(cfg, stmt.args[1]::Int)
-            code[i] = stmt
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
         end
     end
     nstmts = length(code)
@@ -36,24 +41,23 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{A
         ssavaluetypes = Any[ Any for i = 1:ssavaluetypes::Int ]
     end
     info = CallInfo[NoCallInfo() for i = 1:nstmts]
-    stmts = InstructionStream(code, ssavaluetypes, info, ci.codelocs, ci.ssaflags)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
-    end
+    di = DebugInfoStream(nothing, ci.debuginfo, nstmts)
+    stmts = InstructionStream(code, ssavaluetypes, info, di.codelocs, ci.ssaflags)
     meta = Expr[]
-    return IRCode(stmts, cfg, linetable, argtypes, meta, sptypes)
+    return IRCode(stmts, cfg, di, argtypes, meta, sptypes, WorldRange(ci.min_world, ci.max_world))
 end
 
 """
-    inflate_ir(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
     inflate_ir(ci::CodeInfo) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, mi::MethodInstance) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Non-destructive version of `inflate_ir!`.
 Mainly used for testing or interactive use.
 """
-inflate_ir(ci::CodeInfo, linfo::MethodInstance) = inflate_ir!(copy(ci), linfo)
+inflate_ir(ci::CodeInfo, mi::MethodInstance) = inflate_ir!(copy(ci), mi)
+inflate_ir(ci::CodeInfo, argtypes::Vector{Any}) = inflate_ir(ci, VarState[], argtypes)
 inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
 function inflate_ir(ci::CodeInfo)
     parent = ci.parent
@@ -72,17 +76,19 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
     resize!(ci.slotflags, nargs)
     resize!(ci.slottypes, nargs)
     stmts = ir.stmts
-    code = ci.code = stmts.inst
+    code = ci.code = stmts.stmt
     ssavaluetypes = ci.ssavaluetypes = stmts.type
-    codelocs = ci.codelocs = stmts.line
+    codelocs = stmts.line
     ssaflags = ci.ssaflags = stmts.flag
-    linetable = ci.linetable = ir.linetable
+    debuginfo = ir.debuginfo
     for metanode in ir.meta
         push!(code, metanode)
-        push!(codelocs, 1)
+        push!(codelocs, 1, 0, 0)
         push!(ssavaluetypes, Any)
         push!(ssaflags, IR_FLAG_NULL)
     end
+    @assert debuginfo.codelocs === stmts.line "line table not from debuginfo"
+    ci.debuginfo = DebugInfo(debuginfo, length(code))
     # Translate BB Edges to statement edges
     # (and undo normalization for now)
     for i = 1:length(code)
@@ -93,9 +99,8 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
             code[i] = GotoIfNot(stmt.cond, first(ir.cfg.blocks[stmt.dest].stmts))
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[edge == 0 ? 0 : last(ir.cfg.blocks[edge].stmts) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = first(ir.cfg.blocks[stmt.args[1]::Int].stmts)
-            code[i] = stmt
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : first(ir.cfg.blocks[stmt.catch_dest].stmts))
         end
     end
 end
diff --git a/base/compiler/ssair/passes.jl b/Compiler/src/ssair/passes.jl
similarity index 64%
rename from base/compiler/ssair/passes.jl
rename to Compiler/src/ssair/passes.jl
index 9a312bec8f202..ff333b9b0a129 100644
--- a/base/compiler/ssair/passes.jl
+++ b/Compiler/src/ssair/passes.jl
@@ -6,6 +6,13 @@ function is_known_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,I
     return singleton_type(ft) === func
 end
 
+function is_known_invoke_or_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,IncrementalCompact})
+    isinvoke = isexpr(x, :invoke)
+    (isinvoke || isexpr(x, :call)) || return false
+    ft = argextype(x.args[isinvoke ? 2 : 1], ir)
+    return singleton_type(ft) === func
+end
+
 struct SSAUse
     kind::Symbol
     idx::Int
@@ -64,7 +71,7 @@ function try_compute_field(ir::Union{IncrementalCompact,IRCode}, @nospecialize(f
 end
 
 # assume `stmt` is a call of `getfield`/`setfield!`/`isdefined`
-function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, typ::DataType)
+function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, @nospecialize(typ))
     field = try_compute_field(ir, stmt.args[3])
     return try_compute_fieldidx(typ, field)
 end
@@ -72,19 +79,18 @@ end
 function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
-    while !(curblock in allblocks) && curblock !== 0
+    while curblock ∉ allblocks && curblock ≠ 0
         curblock = domtree.idoms_bb[curblock]
     end
     return curblock
 end
 
 function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
-    ex = ir[SSAValue(def)][:inst]
+    ex = ir[SSAValue(def)][:stmt]
     if isexpr(ex, :new)
         return ex.args[1+fidx]
     else
-        @assert isa(ex, Expr)
-        # The use is whatever the setfield was
+        @assert is_known_call(ex, setfield!, ir)
         return ex.args[4]
     end
 end
@@ -184,8 +190,21 @@ function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospec
     return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ)
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                     callback = (@nospecialize(pi), @nospecialize(idx)) -> false)
+abstract type WalkerCallback end
+
+struct TrivialWalker <: WalkerCallback end
+(::TrivialWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue)) = nothing
+
+struct PiWalker <: WalkerCallback end
+function (::PiWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        return LiftedValue(def.val)
+    end
+    return nothing
+end
+
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa::AnySSAValue),
+                     walker_callback::WalkerCallback=TrivialWalker())
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -200,41 +219,46 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
                 return rename
             end
         end
-        def = compact[defssa][:inst]
-        if isa(def, PiNode)
-            if callback(def, defssa)
-                return defssa
-            end
-            def = def.val
-            if isa(def, SSAValue)
-                is_old(compact, defssa) && (def = OldSSAValue(def.id))
-            else
-                return def
-            end
-            defssa = def
-        elseif isa(def, AnySSAValue)
-            callback(def, defssa)
+        def = compact[defssa][:stmt]
+        if isa(def, AnySSAValue)
+            walker_callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
             end
             defssa = def
-        elseif isa(def, Union{PhiNode, PhiCNode, Expr, GlobalRef})
+        elseif isa(def, Union{PhiNode, PhiCNode, GlobalRef})
             return defssa
         else
-            return def
+            new_def = walker_callback(def, defssa)
+            if new_def === nothing
+                return defssa
+            end
+            new_def = new_def.val
+            if !isa(new_def, AnySSAValue)
+                return new_def
+            elseif isa(new_def, SSAValue)
+                is_old(compact, defssa) && (new_def = OldSSAValue(new_def.id))
+            end
+            defssa = new_def
         end
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                                @nospecialize(typeconstraint))
-    callback = function (@nospecialize(pi), @nospecialize(idx))
-        if isa(pi, PiNode)
-            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
-        end
-        return false
+mutable struct TypeConstrainingWalker <: WalkerCallback
+    typeconstraint::Any
+    TypeConstrainingWalker(@nospecialize(typeconstraint::Any)) = new(typeconstraint)
+end
+function (walker_callback::TypeConstrainingWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        walker_callback.typeconstraint =
+            typeintersect(walker_callback.typeconstraint, widenconst(def.typ))
+        return LiftedValue(def.val)
     end
-    def = simple_walk(compact, defssa, callback)
+    return nothing
+end
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(val::AnySSAValue),
+                                @nospecialize(typeconstraint))
+    def = simple_walk(compact, val, TypeConstrainingWalker(typeconstraint))
     return Pair{Any, Any}(def, typeconstraint)
 end
 
@@ -250,7 +274,7 @@ predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwi
 function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice)
     visited_philikes = AnySSAValue[]
     isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes
-    def = compact[defssa][:inst]
+    def = compact[defssa][:stmt]
     if predecessors(def, compact) === nothing
         return Any[defssa], visited_philikes
     end
@@ -264,10 +288,12 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
         defssa = pop!(worklist_defs)
         typeconstraint = pop!(worklist_constraints)
         visited_constraints[defssa] = typeconstraint
-        def = compact[defssa][:inst]
+        def = compact[defssa][:stmt]
         values = predecessors(def, compact)
         if values !== nothing
-            push!(visited_philikes, defssa)
+            if isa(def, PhiNode) || length(values) > 1
+                push!(visited_philikes, defssa)
+            end
             possible_predecessors = Int[]
 
             for n in 1:length(values)
@@ -330,17 +356,23 @@ function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compa
 end
 
 function already_inserted(compact::IncrementalCompact, old::OldSSAValue)
-    id = old.id
-    if id < length(compact.ir.stmts)
-        return id < compact.idx
-    end
-    id -= length(compact.ir.stmts)
-    if id < length(compact.ir.new_nodes)
-        return already_inserted(compact, OldSSAValue(compact.ir.new_nodes.info[id].pos))
+    already_inserted_ssa(compact, compact.idx-1)(0, old)
+end
+
+function already_inserted_ssa(compact::IncrementalCompact, processed_idx::Int)
+    return function did_already_insert(phi_arg::Int, old::OldSSAValue)
+        id = old.id
+        if id <= length(compact.ir.stmts)
+            return id <= processed_idx
+        end
+        id -= length(compact.ir.stmts)
+        if id <= length(compact.ir.new_nodes)
+            return did_already_insert(phi_arg, OldSSAValue(compact.ir.new_nodes.info[id].pos))
+        end
+        id -= length(compact.ir.new_nodes)
+        @assert id <= length(compact.pending_nodes)
+        return !(id in compact.pending_perm)
     end
-    id -= length(compact.ir.new_nodes)
-    @assert id <= length(compact.pending_nodes)
-    return !(id in compact.pending_perm)
 end
 
 function is_pending(compact::IncrementalCompact, old::OldSSAValue)
@@ -383,7 +415,9 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
             elseif isexpr(def, :new)
                 typ = unwrap_unionall(widenconst(types(compact)[leaf]))
                 (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
-                @assert !ismutabletype(typ)
+                if ismutabletype(typ)
+                    isconst(typ, field) || return nothing
+                end
                 if length(def.args) < 1+field
                     if field > fieldcount(typ)
                         return nothing
@@ -404,7 +438,7 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
             #      `:new` expressions by the inlinear
             # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue)
             #     tplssa = def.args[2]::AnySSAValue
-            #     tplexpr = compact[tplssa][:inst]
+            #     tplexpr = compact[tplssa][:stmt]
             #     if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args)
             #         lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves)
             #         continue
@@ -417,8 +451,8 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
                     ocleaf = simple_walk(compact, ocleaf)
                 end
                 ocdef, _ = walk_to_def(compact, ocleaf)
-                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-4
-                    lift_arg!(compact, leaf, cache_key, ocdef, 4+field, lifted_leaves)
+                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-5
+                    lift_arg!(compact, leaf, cache_key, ocdef, 5+field, lifted_leaves)
                     continue
                 end
                 return nothing
@@ -440,9 +474,9 @@ function lift_leaves(compact::IncrementalCompact, field::Int,
         elseif isa(leaf, QuoteNode)
             leaf = leaf.value
         elseif isa(leaf, GlobalRef)
-            mod, name = leaf.mod, leaf.name
-            if isdefined(mod, name) && isconst(mod, name)
-                leaf = getglobal(mod, name)
+            typ = argextype(leaf, compact)
+            if isa(typ, Const)
+                leaf = typ.val
             else
                 return nothing
             end
@@ -465,19 +499,15 @@ function lift_arg!(
     if is_old(compact, leaf) && isa(lifted, SSAValue)
         lifted = OldSSAValue(lifted.id)
         if already_inserted(compact, lifted)
-            lifted = compact.ssa_rename[lifted.id]
-            if isa(lifted, Refined)
-                lifted = lifted.val
+            new_lifted = compact.ssa_rename[lifted.id]
+            if isa(new_lifted, Refined)
+                new_lifted = new_lifted.val
+            end
+            # Special case: If lifted happens to be the statement we're currently processing,
+            # leave it as old SSAValue in case we decide to handle this in the renamer
+            if !isa(new_lifted, SSAValue) || new_lifted != SSAValue(compact.result_idx-1)
+                lifted = new_lifted
             end
-        end
-    end
-    if isa(lifted, GlobalRef) || isa(lifted, Expr)
-        lifted = insert_node!(compact, leaf, effect_free_and_nothrow(NewInstruction(lifted, argextype(lifted, compact))))
-        compact[leaf] = nothing
-        stmt.args[argidx] = lifted
-        compact[leaf] = stmt
-        if isa(leaf, SSAValue) && leaf.id < compact.result_idx
-            push!(compact.late_fixup, leaf.id)
         end
     end
     lifted_leaves[cache_key] = LiftedValue(lifted)
@@ -494,12 +524,12 @@ function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
             leaf = simple_walk(compact, leaf)
         end
         if isa(leaf, AnySSAValue)
-            def = compact[leaf][:inst]
+            def = compact[leaf][:stmt]
         else
             def = leaf
         end
     elseif isa(leaf, AnySSAValue)
-        def = compact[leaf][:inst]
+        def = compact[leaf][:stmt]
     else
         def = leaf
     end
@@ -522,8 +552,7 @@ end
 function lift_comparison! end
 
 function lift_comparison!(::typeof(===), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     lhs, rhs = args[2], args[3]
@@ -539,46 +568,44 @@ function lift_comparison!(::typeof(===), compact::IncrementalCompact,
     else
         return
     end
-    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isa), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     val = args[2]
-    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isdefined), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    𝕃ₒ::AbstractLattice)
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     isa(cmp, Const) || return # `isdefined_tfunc` won't return Const
     val = args[2]
-    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, lifting_cache, idx, 𝕃ₒ)
+    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, idx, 𝕃ₒ)
+end
+
+function phi_or_ifelse_predecessors(@nospecialize(def), compact::IncrementalCompact)
+    isa(def, PhiNode) && return def.values
+    is_known_call(def, Core.ifelse, compact) && return def.args[3:4]
+    return nothing
 end
 
 function lift_comparison_leaves!(@specialize(tfunc),
     compact::IncrementalCompact, @nospecialize(val), @nospecialize(cmp),
-    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, idx::Int,
-    𝕃ₒ::AbstractLattice)
+    idx::Int, 𝕃ₒ::AbstractLattice)
     typeconstraint = widenconst(argextype(val, compact))
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
     isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting
 
-    predecessors = function (@nospecialize(def), compact::IncrementalCompact)
-        isa(def, PhiNode) && return def.values
-        is_known_call(def, Core.ifelse, compact) && return def.args[3:4]
-        return nothing
-    end
-    leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, predecessors)
+    leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, phi_or_ifelse_predecessors)
     length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves
 
     # check if we can evaluate the comparison for each one of the leaves
@@ -597,11 +624,12 @@ function lift_comparison_leaves!(@specialize(tfunc),
     end
 
     # perform lifting
-    lifted_val = perform_lifting!(compact,
-        visited_philikes, cmp, lifting_cache, Bool,
-        lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, cmp, Bool, lifted_leaves::LiftedLeaves, val, nothing)
 
-    compact[idx] = lifted_val.val
+    compact[idx] = (lifted_val::LiftedValue).val
+
+    finish_phi_nest!(compact, nest)
 end
 
 struct IfElseCall
@@ -618,14 +646,17 @@ end
 
 struct SkipToken end; const SKIP_TOKEN = SkipToken()
 
-function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value),
-                      lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int})
+function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa::AnySSAValue),
+                      @nospecialize(old_value), lifted_philikes::Vector{LiftedPhilike},
+                      lifted_leaves::Union{LiftedLeaves, LiftedDefs},
+                      reverse_mapping::IdDict{AnySSAValue, Int},
+                      walker_callback::WalkerCallback)
     val = old_value
     if is_old(compact, old_node_ssa) && isa(val, SSAValue)
         val = OldSSAValue(val.id)
     end
     if isa(val, AnySSAValue)
-        val = simple_walk(compact, val)
+        val = simple_walk(compact, val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
     if val in keys(lifted_leaves)
         lifted_val = lifted_leaves[val]
@@ -635,8 +666,7 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
         lifted_val === nothing && return UNDEF_TOKEN
         val = lifted_val.val
         if isa(val, AnySSAValue)
-            callback = (@nospecialize(pi), @nospecialize(idx)) -> true
-            val = simple_walk(compact, val, callback)
+            val = simple_walk(compact, val, PiWalker())
         end
         return val
     elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
@@ -647,95 +677,23 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
 end
 
 function is_old(compact, @nospecialize(old_node_ssa))
-    isa(old_node_ssa, OldSSAValue) &&
-        !is_pending(compact, old_node_ssa) &&
-        !already_inserted(compact, old_node_ssa)
+    isa(old_node_ssa, OldSSAValue) || return false
+    is_pending(compact, old_node_ssa) && return false
+    already_inserted(compact, old_node_ssa) && return false
+    return true
 end
 
-function perform_lifting!(compact::IncrementalCompact,
-        visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-        @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
-        lazydomtree::Union{LazyDomtree,Nothing})
-    reverse_mapping = IdDict{AnySSAValue, Int}()
-    for id in 1:length(visited_philikes)
-        reverse_mapping[visited_philikes[id]] = id
-    end
-
-    # Check if all the lifted leaves are the same
-    local the_leaf
-    all_same = true
-    for (_, val) in lifted_leaves
-        if !@isdefined(the_leaf)
-            the_leaf = val
-            continue
-        end
-        if val !== the_leaf
-            all_same = false
-        end
-    end
-
-    the_leaf_val = isa(the_leaf, LiftedValue) ? the_leaf.val : nothing
-    if !isa(the_leaf_val, SSAValue)
-        all_same = false
-    end
-
-    if all_same
-        dominates_all = true
-        if lazydomtree !== nothing
-            domtree = get!(lazydomtree)
-            for item in visited_philikes
-                if !dominates_ssa(compact, domtree, the_leaf_val, item)
-                    dominates_all = false
-                    break
-                end
-            end
-            if dominates_all
-                return the_leaf
-            end
-        end
-    end
-
-    # Insert PhiNodes
-    nphilikes = length(visited_philikes)
-    lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes)
-    for i = 1:nphilikes
-        old_ssa = visited_philikes[i]
-        old_inst = compact[old_ssa]
-        old_node = old_inst[:inst]::Union{PhiNode,Expr}
-        # FIXME this cache is broken somehow
-        # ckey = Pair{AnySSAValue, Any}(old_ssa, cache_key)
-        # cached = ckey in keys(lifting_cache)
-        cached = false
-        if cached
-            ssa = lifting_cache[ckey]
-            if isa(old_node, PhiNode)
-                lifted_philikes[i] = LiftedPhilike(ssa, old_node, false)
-            else
-                lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(old_node), false)
-            end
-            continue
-        end
-        if isa(old_node, PhiNode)
-            new_node = PhiNode()
-            ssa = insert_node!(compact, old_ssa, effect_free_and_nothrow(NewInstruction(new_node, result_t)))
-            lifted_philikes[i] = LiftedPhilike(ssa, new_node, true)
-        else
-            @assert is_known_call(old_node, Core.ifelse, compact)
-            ifelse_func, condition = old_node.args
-            if is_old(compact, old_ssa) && isa(condition, SSAValue)
-                condition = OldSSAValue(condition.id)
-            end
-
-            new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below
-            new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag])
-
-            ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true)
-            lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true)
-        end
-        # lifting_cache[ckey] = ssa
-    end
+struct PhiNest{C<:WalkerCallback}
+    visited_philikes::Vector{AnySSAValue}
+    lifted_philikes::Vector{LiftedPhilike}
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    walker_callback::C
+end
 
+function finish_phi_nest!(compact::IncrementalCompact, nest::PhiNest)
+    (;visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback) = nest
+    nphilikes = length(lifted_philikes)
     # Fix up arguments
     for i = 1:nphilikes
         (old_node_ssa, lf) = visited_philikes[i], lifted_philikes[i]
@@ -744,12 +702,12 @@ function perform_lifting!(compact::IncrementalCompact,
 
         lfnode = lf.node
         if isa(lfnode, PhiNode)
-            old_node = compact[old_node_ssa][:inst]::PhiNode
+            old_node = compact[old_node_ssa][:stmt]::PhiNode
             new_node = lfnode
             for i = 1:length(old_node.values)
                 isassigned(old_node.values, i) || continue
                 val = lifted_value(compact, old_node_ssa, old_node.values[i],
-                                   lifted_philikes, lifted_leaves, reverse_mapping)
+                                   lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
                 val !== SKIP_TOKEN && push!(new_node.edges, old_node.edges[i])
                 if val === UNDEF_TOKEN
                     resize!(new_node.values, length(new_node.values)+1)
@@ -759,13 +717,13 @@ function perform_lifting!(compact::IncrementalCompact,
                 end
             end
         elseif isa(lfnode, IfElseCall)
-            old_node = compact[old_node_ssa][:inst]::Expr
+            old_node = compact[old_node_ssa][:stmt]::Expr
             then_result, else_result = old_node.args[3], old_node.args[4]
 
             then_result = lifted_value(compact, old_node_ssa, then_result,
-                                       lifted_philikes, lifted_leaves, reverse_mapping)
+                                       lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
             else_result = lifted_value(compact, old_node_ssa, else_result,
-                                       lifted_philikes, lifted_leaves, reverse_mapping)
+                                       lifted_philikes, lifted_leaves, reverse_mapping, walker_callback)
 
             # In cases where the Core.ifelse condition is statically-known, e.g., thanks
             # to a PiNode from a guarding conditional, replace with the remaining branch.
@@ -773,8 +731,7 @@ function perform_lifting!(compact::IncrementalCompact,
                 only_result = (then_result === SKIP_TOKEN) ? else_result : then_result
 
                 # Replace Core.ifelse(%cond, %a, %b) with %a
-                compact[lf.ssa][:inst] = only_result
-                should_count && _count_added_node!(compact, only_result)
+                compact[lf.ssa] = only_result
 
                 # Note: Core.ifelse(%cond, %a, %b) has observable effects (!nothrow), but since
                 # we have not deleted the preceding statement that this was derived from, this
@@ -794,26 +751,132 @@ function perform_lifting!(compact::IncrementalCompact,
             push!(lfnode.call.args, else_result)
         end
     end
+end
+
+struct LiftedLeaveWalker{C<:WalkerCallback} <: WalkerCallback
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    inner_walker_callback::C
+    function LiftedLeaveWalker(@nospecialize(lifted_leaves::Union{LiftedLeaves, LiftedDefs}),
+                               @nospecialize(reverse_mapping::IdDict{AnySSAValue, Int}),
+                               inner_walker_callback::C) where C<:WalkerCallback
+        return new{C}(lifted_leaves, reverse_mapping, inner_walker_callback)
+    end
+end
+function (walker_callback::LiftedLeaveWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    (; lifted_leaves, reverse_mapping, inner_walker_callback) = walker_callback
+    if defssa in keys(lifted_leaves) || defssa in keys(reverse_mapping)
+        return nothing
+    end
+    isa(def, PiNode) && return LiftedValue(def.val)
+    return inner_walker_callback(def, defssa)
+end
+
+function perform_lifting!(compact::IncrementalCompact,
+        visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
+        @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
+        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback::WalkerCallback = TrivialWalker())
+    reverse_mapping = IdDict{AnySSAValue, Int}()
+    for id in 1:length(visited_philikes)
+        reverse_mapping[visited_philikes[id]] = id
+    end
+
+    # Check if all the lifted leaves are the same
+    local the_leaf
+    all_same = true
+    for (_, val) in lifted_leaves
+        if !@isdefined(the_leaf)
+            the_leaf = val
+            continue
+        end
+        if val !== the_leaf
+            all_same = false
+        end
+    end
+
+    if all_same && isa(the_leaf, LiftedValue)
+        dominates_all = true
+        the_leaf_val = the_leaf.val
+        if isa(the_leaf_val, AnySSAValue)
+            if lazydomtree === nothing
+                # Must conservatively assume this
+                dominates_all = false
+            else
+                # This code guards against the possibility of accidentally forwarding a value from a
+                # previous iteration. Consider for example:
+                #
+                # %p = phi(%arg, %t)
+                # %b = <...>
+                # %c = getfield(%p, 1)
+                # %t = tuple(%b)
+                #
+                # It would be incorrect to replace `%c` by `%b`, because that would read the value of
+                # `%b` in the *current* iteration, while the value of `%b` that comes in via `%p` is
+                # that of the previous iteration.
+                domtree = get!(lazydomtree)
+                for item in visited_philikes
+                    if !dominates_ssa(compact, domtree, the_leaf_val, item)
+                        dominates_all = false
+                        break
+                    end
+                end
+            end
+        end
+        if dominates_all
+            if isa(the_leaf_val, OldSSAValue)
+                the_leaf = LiftedValue(simple_walk(compact, the_leaf_val))
+            end
+            return Pair{Any, PhiNest}(the_leaf, PhiNest(visited_philikes, Vector{LiftedPhilike}(undef, 0), lifted_leaves, reverse_mapping, walker_callback))
+        end
+    end
+
+    # Insert PhiNodes
+    nphilikes = length(visited_philikes)
+    lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes)
+    for i = 1:nphilikes
+        old_ssa = visited_philikes[i]
+        old_inst = compact[old_ssa]
+        old_node = old_inst[:stmt]::Union{PhiNode,Expr}
+        if isa(old_node, PhiNode)
+            new_node = PhiNode()
+            ssa = insert_node!(compact, old_ssa, removable_if_unused(NewInstruction(new_node, result_t)))
+            lifted_philikes[i] = LiftedPhilike(ssa, new_node, true)
+        else
+            @assert is_known_call(old_node, Core.ifelse, compact)
+            ifelse_func, condition = old_node.args
+            if is_old(compact, old_ssa) && isa(condition, SSAValue)
+                condition = OldSSAValue(condition.id)
+            end
+
+            new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below
+            new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag])
+
+            ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true)
+            lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true)
+        end
+    end
 
     # Fixup the stmt itself
     if isa(stmt_val, Union{SSAValue, OldSSAValue})
-        stmt_val = simple_walk(compact, stmt_val)
+        stmt_val = simple_walk(compact, stmt_val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
 
     if stmt_val in keys(lifted_leaves)
-        return lifted_leaves[stmt_val]
+        stmt_val = lifted_leaves[stmt_val]
     elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
-        return LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa)
+        stmt_val = LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa)
+    else
+        error()
     end
 
-    return stmt_val # N.B. should never happen
+    return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback))
 end
 
 function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
-    length(stmt.args) != 4 && return
+    length(stmt.args) != 3 && return
 
-    vec = stmt.args[3]
-    val = stmt.args[4]
+    vec = stmt.args[2]
+    val = stmt.args[3]
     valT = argextype(val, compact)
     (isa(valT, Const) && isa(valT.val, Int)) || return
     valI = valT.val::Int
@@ -823,7 +886,7 @@ function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
         valI <= length(vec) || return
         compact[idx] = quoted(vec[valI])
     elseif isa(vec, SSAValue)
-        def = compact[vec][:inst]
+        def = compact[vec][:stmt]
         if is_known_call(def, Core.svec, compact)
             valI <= length(def.args) - 1 || return
             compact[idx] = def.args[valI+1]
@@ -837,7 +900,116 @@ function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
     return
 end
 
-# TODO: We could do the whole lifing machinery here, but really all
+function lift_leaves_keyvalue(compact::IncrementalCompact, @nospecialize(key),
+                             leaves::Vector{Any}, 𝕃ₒ::AbstractLattice)
+    # For every leaf, the lifted value
+    lifted_leaves = LiftedLeaves()
+    for i = 1:length(leaves)
+        leaf = leaves[i]
+        cache_key = leaf
+        if isa(leaf, AnySSAValue)
+            (def, leaf) = walk_to_def(compact, leaf)
+            if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+                @assert isexpr(def, :invoke)
+                if length(def.args) in (5, 6)
+                    set_key = def.args[end-1]
+                    set_val_idx = length(def.args)
+                elseif length(def.args) == 4
+                    # Key is deleted
+                    # TODO: Model this
+                    return nothing
+                elseif length(def.args) == 3
+                    # The whole collection is deleted
+                    # TODO: Model this
+                    return nothing
+                else
+                    return nothing
+                end
+                if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                    lift_arg!(compact, leaf, cache_key, def, set_val_idx, lifted_leaves)
+                    continue
+                end
+            end
+        end
+        return nothing
+    end
+    return lifted_leaves
+end
+
+function keyvalue_predecessors(@nospecialize(key), 𝕃ₒ::AbstractLattice)
+    function(@nospecialize(def), compact::IncrementalCompact)
+        if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+            @assert isexpr(def, :invoke)
+            if length(def.args) in (5, 6)
+                collection = def.args[end-2]
+                set_key = def.args[end-1]
+                set_val_idx = length(def.args)
+            elseif length(def.args) == 4
+                collection = def.args[end-1]
+                # Key is deleted
+                # TODO: Model this
+                return nothing
+            elseif length(def.args) == 3
+                collection = def.args[end]
+                # The whole collection is deleted
+                # TODO: Model this
+                return nothing
+            else
+                return nothing
+            end
+            if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                # This is an actual def
+                return nothing
+            end
+            return Any[collection]
+        end
+        return phi_or_ifelse_predecessors(def, compact)
+    end
+end
+
+struct KeyValueWalker <: WalkerCallback
+    compact::IncrementalCompact
+end
+function (walker_callback::KeyValueWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, walker_callback.compact)
+        @assert length(def.args) in (5, 6)
+        return LiftedValue(def.args[end-2])
+    end
+    return nothing
+end
+
+function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    collection = stmt.args[end-1]
+    key = stmt.args[end]
+
+    leaves, visited_philikes = collect_leaves(compact, collection, Any, 𝕃ₒ, keyvalue_predecessors(key, 𝕃ₒ))
+    isempty(leaves) && return
+
+    lifted_leaves = lift_leaves_keyvalue(compact, key, leaves, 𝕃ₒ)
+    lifted_leaves === nothing && return
+
+    result_t = Union{}
+    for v in values(lifted_leaves)
+        v === nothing && return
+        result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
+    end
+
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, key, result_t, lifted_leaves, collection, nothing,
+        KeyValueWalker(compact))
+
+    compact[idx] = lifted_val === nothing ? nothing : Expr(:call, GlobalRef(Core, :tuple), lifted_val.val)
+    finish_phi_nest!(compact, nest)
+    if lifted_val !== nothing
+        if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], tuple_tfunc(𝕃ₒ, Any[result_t]))
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        end
+    end
+
+    return
+end
+
+# TODO: We could do the whole lifting machinery here, but really all
 # we want to do is clean this up when it got inserted by inlining,
 # which always targets simple `svec` call or `_compute_sparams`,
 # so this specialized lifting would be enough
@@ -869,11 +1041,11 @@ end
 
     rarg = def.args[2 + i]
     isa(rarg, SSAValue) || return nothing
-    argdef = compact[rarg][:inst]
+    argdef = compact[rarg][:stmt]
     if isexpr(argdef, :new)
         rarg = argdef.args[1]
         isa(rarg, SSAValue) || return nothing
-        argdef = compact[rarg][:inst]
+        argdef = compact[rarg][:stmt]
     else
         isType(arg) || return nothing
         arg = arg.parameters[1]
@@ -906,16 +1078,117 @@ end
     return nothing
 end
 
+struct IsEgal <: Function
+    x::Any
+    IsEgal(@nospecialize(x)) = new(x)
+end
+(x::IsEgal)(@nospecialize(y)) = x.x === y
+
+# This tries to match patterns of the form
+#  %ft   = typeof(%farg)
+#  %Targ = apply_type(Foo, ft)
+#  %x    = new(%Targ, %farg)
+#
+# and if possible refines the nothrowness of the new expr based on it.
+function pattern_match_typeof(compact::IncrementalCompact, typ::DataType, fidx::Int,
+                              @nospecialize(Targ), @nospecialize(farg))
+    isa(Targ, SSAValue) || return false
+
+    Tdef = compact[Targ][:stmt]
+    is_known_call(Tdef, Core.apply_type, compact) || return false
+    length(Tdef.args) ≥ 2 || return false
+
+    applyT = argextype(Tdef.args[2], compact)
+    isa(applyT, Const) || return false
+
+    applyT = applyT.val
+    tvars = Any[]
+    while isa(applyT, UnionAll)
+        applyTvar = applyT.var
+        applyT = applyT.body
+        push!(tvars, applyTvar)
+    end
+
+    @assert applyT.name === typ.name
+    fT = fieldtype(applyT, fidx)
+    idx = findfirst(IsEgal(fT), tvars)
+    idx === nothing && return false
+    checkbounds(Bool, Tdef.args, 2+idx) || return false
+    valarg = Tdef.args[2+idx]
+    isa(valarg, SSAValue) || return false
+    valdef = compact[valarg][:stmt]
+    is_known_call(valdef, typeof, compact) || return false
+
+    return valdef.args[2] === farg
+end
+
+function refine_new_effects!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, idx::Int, stmt::Expr)
+    inst = compact[SSAValue(idx)]
+    if has_flag(inst, IR_FLAGS_REMOVABLE)
+        return # already accurate
+    end
+    (consistent, removable, nothrow) = new_expr_effect_flags(𝕃ₒ, stmt.args, compact, pattern_match_typeof)
+    if consistent
+        add_flag!(inst, IR_FLAG_CONSISTENT)
+    end
+    if removable
+        add_flag!(inst, IR_FLAGS_REMOVABLE)
+    elseif nothrow
+        add_flag!(inst, IR_FLAG_NOTHROW)
+    end
+    return nothing
+end
+
+function fold_ifelse!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    length(stmt.args) == 4 || return false
+    condarg = stmt.args[2]
+    condtyp = argextype(condarg, compact)
+    if isa(condtyp, Const)
+        if condtyp.val === true
+            compact[idx] = stmt.args[3]
+            return true
+        elseif condtyp.val === false
+            compact[idx] = stmt.args[4]
+            return true
+        end
+    elseif ⊑(𝕃ₒ, condtyp, Bool) && stmt.args[3] === stmt.args[4]
+        compact[idx] = stmt.args[3]
+        return true
+    end
+    return false
+end
+
 # NOTE we use `IdSet{Int}` instead of `BitSet` for in these passes since they work on IR after inlining,
 # which can be very large sometimes, and program counters in question are often very sparse
 const SPCSet = IdSet{Int}
 
-struct IntermediaryCollector
+struct IntermediaryCollector <: WalkerCallback
     intermediaries::SPCSet
 end
-function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa))
-    push!(this.intermediaries, ssa.id)
-    return false
+function (walker_callback::IntermediaryCollector)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if !(def isa Expr)
+        push!(walker_callback.intermediaries, defssa.id)
+        if def isa PiNode
+            return LiftedValue(def.val)
+        end
+    end
+    return nothing
+end
+
+function update_scope_mapping!(scope_mapping, bb, val)
+    current_mapping = scope_mapping[bb]
+    if current_mapping != SSAValue(0)
+        if val == SSAValue(0)
+            # Unreachable bbs will have SSAValue(0), but can branch into
+            # try/catch regions. We could validate with the domtree, but that's
+            # quite expensive for a debug check, so simply allow this without
+            # making any changes to mapping.
+            return
+        end
+        @assert current_mapping == val
+        return
+    end
+    scope_mapping[bb] = val
 end
 
 """
@@ -940,13 +1213,61 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     compact = IncrementalCompact(ir)
     defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
-    lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
-    def_lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
     # initialization of domtree is delayed to avoid the expensive computation in many cases
     lazydomtree = LazyDomtree(ir)
-    for ((_, idx), stmt) in compact
+    scope_mapping::Union{Vector{SSAValue}, Nothing} = nothing
+    for ((old_idx, idx), stmt) in compact
+        # If we encounter any EnterNode with set :scope, propagate the current scope for all basic blocks, so
+        # we have easy access for current_scope folding below.
+        if !isa(stmt, Expr)
+            bb = compact.active_result_bb - 1
+            if scope_mapping !== nothing && did_just_finish_bb(compact)
+                this_scope = scope_mapping[bb]
+                if isa(stmt, GotoIfNot)
+                    update_scope_mapping!(scope_mapping, stmt.dest, this_scope)
+                    update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                elseif isa(stmt, GotoNode)
+                    update_scope_mapping!(scope_mapping, stmt.label, this_scope)
+                elseif isa(stmt, EnterNode)
+                    if stmt.catch_dest != 0
+                        update_scope_mapping!(scope_mapping, stmt.catch_dest, this_scope)
+                    end
+                    isdefined(stmt, :scope) || update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                elseif !isa(stmt, ReturnNode)
+                    update_scope_mapping!(scope_mapping, bb+1, this_scope)
+                end
+            end
+            if isa(stmt, EnterNode)
+                if isdefined(stmt, :scope)
+                    if scope_mapping === nothing
+                        scope_mapping = SSAValue[SSAValue(0) for i = 1:length(compact.cfg_transform.result_bbs)]
+                    end
+                    update_scope_mapping!(scope_mapping, bb+1, SSAValue(idx))
+                end
+            end
+            continue
+        end
+        if scope_mapping !== nothing && did_just_finish_bb(compact)
+            bb = compact.active_result_bb - 1
+            bbs = scope_mapping[bb]
+            if isexpr(stmt, :leave) && bbs != SSAValue(0)
+                # Here we want to count the number of scopes that we're leaving,
+                # which is the same as the number of EnterNodes being referenced
+                # by `stmt.args`. Which have :scope set. In practice, the frontend
+                # does emit these in order, so we could simply go to the last one,
+                # but we want to avoid making that semantic assumption.
+                for i = 1:length(stmt.args)
+                    scope = stmt.args[i]
+                    scope === nothing && continue
+                    enter = compact[scope][:inst]
+                    @assert isa(enter, EnterNode)
+                    isdefined(enter, :scope) || continue
+                    bbs = scope_mapping[block_for_inst(compact, bbs)]
+                end
+            end
+            update_scope_mapping!(scope_mapping, bb+1, bbs)
+        end
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
-        isa(stmt, Expr) || continue
         is_setfield = is_isdefined = is_finalizer = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
@@ -979,7 +1300,13 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 # Inlining performs legality checks on the finalizer to determine
                 # whether or not we may inline it. If so, it appends extra arguments
                 # at the end of the intrinsic. Detect that here.
-                length(stmt.args) == 5 || continue
+                if length(stmt.args) == 4 && stmt.args[4] === nothing
+                    # constant case
+                elseif length(stmt.args) == 5 && stmt.args[4] isa Bool && stmt.args[5] isa Core.CodeInstance
+                    # inlining case
+                else
+                    continue
+                end
             end
             is_finalizer = true
         elseif isexpr(stmt, :foreigncall)
@@ -990,14 +1317,10 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 preserved_arg = stmt.args[pidx]
                 isa(preserved_arg, SSAValue) || continue
                 let intermediaries = SPCSet()
-                    callback = function (@nospecialize(pi), @nospecialize(ssa))
-                        push!(intermediaries, ssa.id)
-                        return false
-                    end
-                    def = simple_walk(compact, preserved_arg, callback)
+                    def = simple_walk(compact, preserved_arg, IntermediaryCollector(intermediaries))
                     isa(def, SSAValue) || continue
                     defidx = def.id
-                    def = compact[def][:inst]
+                    def = compact[def][:stmt]
                     if is_known_call(def, tuple, compact)
                         record_immutable_preserve!(new_preserves, def, compact)
                         push!(preserved, preserved_arg.id)
@@ -1032,9 +1355,24 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             elseif is_known_call(stmt, Core._svec_ref, compact)
                 lift_svec_ref!(compact, idx, stmt)
             elseif is_known_call(stmt, (===), compact)
-                lift_comparison!(===, compact, idx, stmt, lifting_cache, 𝕃ₒ)
+                lift_comparison!(===, compact, idx, stmt, 𝕃ₒ)
             elseif is_known_call(stmt, isa, compact)
-                lift_comparison!(isa, compact, idx, stmt, lifting_cache, 𝕃ₒ)
+                lift_comparison!(isa, compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.ifelse, compact)
+                fold_ifelse!(compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_invoke_or_call(stmt, Core.OptimizedGenerics.KeyValue.get, compact)
+                2 == (length(stmt.args) - (isexpr(stmt, :invoke) ? 2 : 1)) || continue
+                lift_keyvalue_get!(compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.current_scope, compact)
+                length(stmt.args) == 1 || continue
+                scope_mapping !== nothing || continue
+                bb = compact.active_result_bb
+                did_just_finish_bb(compact) && (bb -= 1)
+                enter_ssa = scope_mapping[bb]
+                enter_ssa == SSAValue(0) && continue
+                compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope
+            elseif isexpr(stmt, :new)
+                refine_new_effects!(𝕃ₒ, compact, idx, stmt)
             end
             continue
         end
@@ -1045,29 +1383,27 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             # analyze `getfield` / `isdefined` / `setfield!` call
             val = stmt.args[2]
         end
-        struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
-        if isa(struct_typ, Union) && struct_typ <: Tuple
-            struct_typ = unswitchtupleunion(struct_typ)
-        end
-        if isa(struct_typ, Union) && is_isdefined
-            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache, 𝕃ₒ)
+        struct_typ = widenconst(argextype(val, compact))
+        struct_argtyp = argument_datatype(struct_typ)
+        if struct_argtyp === nothing
+            if isa(struct_typ, Union) && is_isdefined
+                lift_comparison!(isdefined, compact, idx, stmt, 𝕃ₒ)
+            end
             continue
         end
-        isa(struct_typ, DataType) || continue
+        struct_typ_name = struct_argtyp.name
 
-        struct_typ.name.atomicfields == C_NULL || continue # TODO: handle more
+        struct_typ_name.atomicfields == C_NULL || continue # TODO: handle more
         if !((field_ordering === :unspecified) ||
              (field_ordering isa Const && field_ordering.val === :not_atomic))
             continue
         end
 
-
         # analyze this mutable struct here for the later pass
-        if ismutabletype(struct_typ)
+        if ismutabletypename(struct_typ_name)
             isa(val, SSAValue) || continue
             let intermediaries = SPCSet()
-                callback = IntermediaryCollector(intermediaries)
-                def = simple_walk(compact, val, callback)
+                def = simple_walk(compact, val, IntermediaryCollector(intermediaries))
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
                 if defuses === nothing
@@ -1093,11 +1429,10 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         end
 
         # perform SROA on immutable structs here on
-
         field = try_compute_fieldidx_stmt(compact, stmt, struct_typ)
         field === nothing && continue
 
-        leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ)
+        leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ, phi_or_ifelse_predecessors)
         isempty(leaves) && continue
 
         lifted_result = lift_leaves(compact, field, leaves, 𝕃ₒ)
@@ -1110,8 +1445,25 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
         end
 
-        lifted_val = perform_lifting!(compact,
-            visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree)
+        (lifted_val, nest) = perform_lifting!(compact,
+            visited_philikes, field, result_t, lifted_leaves, val, lazydomtree)
+
+        should_delete_node = false
+        line = compact[SSAValue(idx)][:line]
+        if lifted_val !== nothing && !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t)
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        elseif lifted_val === nothing || isa(lifted_val.val, AnySSAValue)
+            # Save some work in a later compaction, by inserting this into the renamer now,
+            # but only do this if we didn't set the REFINED flag, to save work for irinterp
+            # in revisiting only the renamings that came through *this* idx.
+            compact.ssa_rename[old_idx] = lifted_val === nothing ? nothing : lifted_val.val
+            should_delete_node = true
+        else
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
+        end
+
+        finish_phi_nest!(compact, nest)
 
         # Insert the undef check if necessary
         if any_undef
@@ -1122,23 +1474,29 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 for (k, v) in pairs(lifted_leaves)
                     lifted_leaves_def[k] = v === nothing ? false : true
                 end
-                def_val = perform_lifting!(compact,
-                    visited_philikes, field, def_lifting_cache, Bool, lifted_leaves_def, val, lazydomtree).val
+                (def_val, nest) = perform_lifting!(compact,
+                    visited_philikes, field, Bool, lifted_leaves_def, val, lazydomtree)
+                def_val = (def_val::LiftedValue).val
+                finish_phi_nest!(compact, nest)
+            end
+            throw_expr = Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val)
+            if should_delete_node
+                # Replace the node we already have rather than deleting/re-inserting.
+                # This way it is easier to handle BB boundary corner cases.
+                compact[SSAValue(idx)] = throw_expr
+                compact[SSAValue(idx)][:type] = Nothing
+                compact[SSAValue(idx)][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
+                should_delete_node = false
+            else
+                ni = NewInstruction(throw_expr, Nothing, line)
+                insert_node!(compact, SSAValue(idx), ni)
             end
-            insert_node!(compact, SSAValue(idx), NewInstruction(
-                Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing))
-
         else
             # val must be defined
             @assert lifted_val !== nothing
         end
 
-        compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
-        if lifted_val !== nothing
-            if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t)
-                compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED
-            end
-        end
+        should_delete_node && delete_inst_here!(compact)
     end
 
     non_dce_finish!(compact)
@@ -1164,61 +1522,60 @@ end
 # NOTE we resolve the inlining source here as we don't want to serialize `Core.Compiler`
 # data structure into the global cache (see the comment in `handle_finalizer_call!`)
 function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
-    mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState,
+    code::CodeInstance, @nospecialize(info::CallInfo), inlining::InliningState,
     attach_after::Bool)
-    code = get(code_cache(inlining), mi, nothing)
+    mi = code.def
     et = InliningEdgeTracker(inlining)
     if code isa CodeInstance
         if use_const_api(code)
             # No code in the function - Nothing to do
-            add_inlining_backedge!(et, mi)
+            add_inlining_edge!(et, code)
             return true
         end
         src = @atomic :monotonic code.inferred
     else
-        src = nothing
+        return false
     end
 
-    src = inlining_policy(inlining.interp, src, info, IR_FLAG_NULL, mi, Any[])
-    src === nothing && return false
-    src = retrieve_ir_for_inlining(mi, src)
+    src_inlining_policy(inlining.interp, src, info, IR_FLAG_NULL) || return false
+    src, spec_info, di = retrieve_ir_for_inlining(code, src)
 
     # For now: Require finalizer to only have one basic block
     length(src.cfg.blocks) == 1 || return false
 
     # Ok, we're committed to inlining the finalizer
-    add_inlining_backedge!(et, mi)
+    add_inlining_edge!(et, code)
 
     # TODO: Should there be a special line number node for inlined finalizers?
-    inlined_at = ir[SSAValue(idx)][:line]
-    ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir,
-        ir.linetable, src, mi.sparam_vals, mi, inlined_at, argexprs)
+    inline_at = ir[SSAValue(idx)][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, spec_info, di, mi, inline_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
-    spvals = mi.sparam_vals
     ssa_rename = Vector{Any}(undef, length(src.stmts))
     for idx′ = 1:length(src.stmts)
         inst = src[SSAValue(idx′)]
-        stmt′ = inst[:inst]
+        stmt′ = inst[:stmt]
         isa(stmt′, ReturnNode) && continue
         stmt′ = ssamap(stmt′) do ssa::SSAValue
             ssa_rename[ssa.id]
         end
-        stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, argexprs, mi.specTypes, mi.sparam_vals, sp_ssa, :default)
+        stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, ssa_substitute)
         ssa_rename[idx′] = insert_node!(ir, idx,
-            NewInstruction(inst; stmt=stmt′, line=inst[:line]+linetable_offset),
+            NewInstruction(inst; stmt=stmt′, line=(ssa_substitute.inlined_at[1], ssa_substitute.inlined_at[2], Int32(idx′))),
             attach_after)
     end
 
     return true
 end
 
-is_nothrow(ir::IRCode, ssa::SSAValue) = (ir[ssa][:flag] & IR_FLAG_NOTHROW) ≠ 0
+is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW)
 
-function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing)
+function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
     worklist = Int[from_bb]
     visited = BitSet(from_bb)
-    if to_bb !== nothing
+    if to_bb == from_bb
+        return visited
+    else
         push!(visited, to_bb)
     end
     function visit!(bb::Int)
@@ -1233,166 +1590,178 @@ function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = no
     return visited
 end
 
-function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
+function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int, defuse::SSADefUse,
         inlining::InliningState, lazydomtree::LazyDomtree,
         lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
     # For now, require that:
     # 1. The allocation dominates the finalizer registration
-    # 2. The finalizer registration dominates all uses reachable from the
-    #    finalizer registration.
-    # 3. The insertion block for the finalizer is the post-dominator of all
-    #    uses and the finalizer registration block. The insertion block must
-    #    be dominated by the finalizer registration block.
-    # 4. The path from the finalizer registration to the finalizer inlining
+    # 2. The insertion block for the finalizer is the post-dominator of all
+    #    uses (including the finalizer registration).
+    # 3. The path from the finalizer registration to the finalizer inlining
     #    location is nothrow
     #
-    # TODO: We could relax item 3, by inlining the finalizer multiple times.
+    # TODO: We could relax the check 2, by inlining the finalizer multiple times.
 
     # Check #1: The allocation dominates the finalizer registration
     domtree = get!(lazydomtree)
     finalizer_bb = block_for_inst(ir, finalizer_idx)
-    alloc_bb = block_for_inst(ir, idx)
+    alloc_bb = block_for_inst(ir, alloc_idx)
     dominates(domtree, alloc_bb, finalizer_bb) || return nothing
 
-    bb_insert_block::Int = finalizer_bb
-    bb_insert_idx::Union{Int,Nothing} = finalizer_idx
-    function note_block_use!(usebb::Int, useidx::Int)
-        new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
-            bb_insert_block, usebb)
-        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
-            bb_insert_idx = max(bb_insert_idx::Int, useidx)
-        elseif new_bb_insert_block == usebb
-            bb_insert_idx = useidx
+    # Check #2: The insertion block for the finalizer is the post-dominator of all uses
+    insert_bb::Int = finalizer_bb
+    insert_idx::Union{Int,Nothing} = finalizer_idx
+    function note_defuse!(x::Union{Int,SSAUse})
+        defuse_idx = x isa SSAUse ? x.idx : x
+        defuse_idx == finalizer_idx && return nothing
+        defuse_bb = block_for_inst(ir, defuse_idx)
+        new_insert_bb = nearest_common_dominator(get!(lazypostdomtree),
+            insert_bb, defuse_bb)
+        if new_insert_bb == insert_bb && insert_idx !== nothing
+            insert_idx = max(insert_idx::Int, defuse_idx)
+        elseif new_insert_bb == defuse_bb
+            insert_idx = defuse_idx
         else
-            bb_insert_idx = nothing
+            insert_idx = nothing
         end
-        bb_insert_block = new_bb_insert_block
+        insert_bb = new_insert_bb
         nothing
     end
-
-    # Collect all reachable blocks between the finalizer registration and the
-    # insertion point
-    blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb)
-
-    # Check #2
-    function check_defuse(x::Union{Int,SSAUse})
-        duidx = x isa SSAUse ? x.idx : x
-        duidx == finalizer_idx && return true
-        bb = block_for_inst(ir, duidx)
-        # Not reachable from finalizer registration - we're ok
-        bb ∉ blocks && return true
-        note_block_use!(bb, duidx)
-        if dominates(domtree, finalizer_bb, bb)
-            return true
-        else
-            return false
-        end
-    end
-    all(check_defuse, defuse.uses) || return nothing
-    all(check_defuse, defuse.defs) || return nothing
-
-    # Check #3
-    dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
+    foreach(note_defuse!, defuse.uses)
+    foreach(note_defuse!, defuse.defs)
+    insert_bb != 0 || return nothing # verify post-dominator of all uses exists
 
     if !OptimizationParams(inlining.interp).assume_fatal_throw
         # Collect all reachable blocks between the finalizer registration and the
         # insertion point
-        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
-            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+        blocks = reachable_blocks(ir.cfg, finalizer_bb, insert_bb)
 
-        # Check #4
-        function check_range_nothrow(ir::IRCode, s::Int, e::Int)
+        # Check #3
+        function check_range_nothrow(s::Int, e::Int)
             return all(s:e) do sidx::Int
                 sidx == finalizer_idx && return true
-                sidx == idx && return true
+                sidx == alloc_idx && return true
                 return is_nothrow(ir, SSAValue(sidx))
             end
         end
         for bb in blocks
             range = ir.cfg.blocks[bb].stmts
             s, e = first(range), last(range)
-            if bb == bb_insert_block
-                bb_insert_idx === nothing && continue
-                e = bb_insert_idx
+            if bb == insert_bb
+                insert_idx === nothing && continue
+                e = insert_idx
             end
             if bb == finalizer_bb
                 s = finalizer_idx
             end
-            check_range_nothrow(ir, s, e) || return nothing
+            check_range_nothrow(s, e) || return nothing
         end
     end
 
     # Ok, legality check complete. Figure out the exact statement where we're
-    # gonna inline the finalizer.
-    loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int
-    attach_after = bb_insert_idx !== nothing
+    # going to inline the finalizer.
+    loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
+    attach_after = insert_idx !== nothing
 
-    finalizer_stmt = ir[SSAValue(finalizer_idx)][:inst]
+    finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
     argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
-    flags = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
+    flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
     if length(finalizer_stmt.args) >= 4
         inline = finalizer_stmt.args[4]
         if inline === nothing
             # No code in the function - Nothing to do
         else
-            mi = finalizer_stmt.args[5]::MethodInstance
-            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, info, inlining, attach_after)
+            ci = finalizer_stmt.args[5]::CodeInstance
+            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, ci, info, inlining, attach_after)
                 # the finalizer body has been inlined
             else
-                insert_node!(ir, loc, with_flags(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flags), attach_after)
+                newinst = add_flag(NewInstruction(Expr(:invoke, ci, argexprs...), Nothing), flag)
+                insert_node!(ir, loc, newinst, attach_after)
             end
         end
     else
-        insert_node!(ir, loc, with_flags(NewInstruction(Expr(:call, argexprs...), Nothing), flags), attach_after)
+        newinst = add_flag(NewInstruction(Expr(:call, argexprs...), Nothing), flag)
+        insert_node!(ir, loc, newinst, attach_after)
     end
     # Erase the call to `finalizer`
-    ir[SSAValue(finalizer_idx)][:inst] = nothing
+    ir[SSAValue(finalizer_idx)][:stmt] = nothing
     return nothing
 end
 
-function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     lazypostdomtree = LazyPostDomtree(ir)
-    for (idx, (intermediaries, defuse)) in defuses
-        intermediaries = collect(intermediaries)
-        # Check if there are any uses we did not account for. If so, the variable
-        # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
-        # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
-        # show up in the nuses_total count.
-        nleaves = length(defuse.uses) + length(defuse.defs)
-        nuses = 0
-        for idx in intermediaries
-            nuses += used_ssas[idx]
+    function find_finalizer_useidx(defuse::SSADefUse)
+        finalizer_useidx = nothing
+        for (useidx, use) in enumerate(defuse.uses)
+            if use.kind === :finalizer
+                # For now: Only allow one finalizer per allocation
+                finalizer_useidx !== nothing && return false
+                finalizer_useidx = useidx
+            end
         end
-        nuses_total = used_ssas[idx] + nuses - length(intermediaries)
-        nleaves == nuses_total || continue
+        if finalizer_useidx === nothing || inlining === nothing
+            return true
+        end
+        return finalizer_useidx
+    end
+    for (defidx, (intermediaries, defuse)) in defuses
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)][:inst]
+        defexpr = ir[SSAValue(defidx)][:stmt]
         isexpr(defexpr, :new) || continue
-        newidx = idx
-        typ = unwrap_unionall(ir.stmts[newidx][:type])
+        typ = unwrap_unionall(ir.stmts[defidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
         typ = widenconst(typ)
         ismutabletype(typ) || continue
         typ = typ::DataType
-        # First check for any finalizer calls
-        finalizer_idx = nothing
-        for use in defuse.uses
-            if use.kind === :finalizer
-                # For now: Only allow one finalizer per allocation
-                finalizer_idx !== nothing && @goto skip
-                finalizer_idx = use.idx
-            end
+        # Check if there are any uses we did not account for. If so, the variable
+        # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
+        # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
+        # show up in the nuses_total count.
+        nleaves = length(defuse.uses) + length(defuse.defs)
+        nuses = 0
+        for iidx in intermediaries
+            nuses += used_ssas[iidx]
         end
-        if finalizer_idx !== nothing && inlining !== nothing
-            try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining,
-                lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+        nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
+        all_eliminated = all_forwarded = true
+        if nleaves ≠ nuses_total
+            finalizer_useidx = find_finalizer_useidx(defuse)
+            if finalizer_useidx isa Int
+                nargs = length(ir.argtypes) # COMBAK this might need to be `Int(opt.src.nargs)`
+                estate = EscapeAnalysis.analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache(inlining.interp))
+                # disable finalizer inlining when this allocation is aliased to somewhere,
+                # mostly likely to edges of `PhiNode`
+                hasaliases = EscapeAnalysis.getaliases(SSAValue(defidx), estate) !== nothing
+                einfo = estate[SSAValue(defidx)]
+                if !hasaliases && EscapeAnalysis.has_no_escape(einfo)
+                    already = BitSet(use.idx for use in defuse.uses)
+                    for idx = einfo.Liveness
+                        if idx ∉ already
+                            push!(defuse.uses, SSAUse(:EALiveness, idx))
+                        end
+                    end
+                    finalizer_idx = defuse.uses[finalizer_useidx].idx
+                    try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
+                        lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+                end
+            end
             continue
+        else
+            finalizer_useidx = find_finalizer_useidx(defuse)
+            if finalizer_useidx isa Int
+                finalizer_idx = defuse.uses[finalizer_useidx].idx
+                try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
+                    lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
+                deleteat!(defuse.uses, finalizer_useidx)
+                all_eliminated = all_forwarded = false # can't eliminate `setfield!` calls safely
+            elseif !finalizer_useidx
+                continue
+            end
         end
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
-        all_eliminated = all_forwarded = true
         for use in defuse.uses
             if use.kind === :preserve
                 for du in fielddefuse
@@ -1400,7 +1769,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 end
                 continue
             end
-            stmt = ir[SSAValue(use.idx)][:inst] # == `getfield`/`isdefined` call
+            stmt = ir[SSAValue(use.idx)][:stmt] # == `getfield`/`isdefined` call
             # We may have discovered above that this use is dead
             # after the getfield elim of immutables. In that case,
             # it would have been deleted. That's fine, just ignore
@@ -1414,7 +1783,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             push!(fielddefuse[field].uses, use)
         end
         for def in defuse.defs
-            stmt = ir[SSAValue(def)][:inst]::Expr # == `setfield!` call
+            stmt = ir[SSAValue(def)][:stmt]::Expr # == `setfield!` call
             field = try_compute_fieldidx_stmt(ir, stmt, typ)
             field === nothing && @goto skip
             isconst(typ, field) && @goto skip # we discovered an attempt to mutate a const field, which must error
@@ -1425,11 +1794,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=#Vector{Int},#=allblocks=#BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
-            push!(du.defs, newidx)
+            push!(du.defs, defidx)
             ldu = compute_live_ins(ir.cfg, du)
             if isempty(ldu.live_in_bbs)
                 phiblocks = Int[]
@@ -1442,8 +1811,8 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx)
-                            ir[SSAValue(use.idx)][:inst] = true
+                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx)
+                            ir[SSAValue(use.idx)][:stmt] = true
                         else
                             all_eliminated = false
                         end
@@ -1455,13 +1824,13 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                             continue
                         end
                     end
-                    has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                    has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx) || @goto skip
                 end
             else # always have some definition at the allocation site
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        ir[SSAValue(use.idx)][:inst] = true
+                        ir[SSAValue(use.idx)][:stmt] = true
                     end
                 end
             end
@@ -1485,9 +1854,10 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 # Now go through all uses and rewrite them
                 for use in du.uses
                     if use.kind === :getfield
-                        ir[SSAValue(use.idx)][:inst] = compute_value_for_use(ir, domtree, allblocks,
+                        inst = ir[SSAValue(use.idx)]
+                        inst[:stmt] = compute_value_for_use(ir, domtree, allblocks,
                             du, phinodes, fidx, use.idx)
-                        ir[SSAValue(use.idx)][:flag] |= IR_FLAG_REFINED
+                        add_flag!(inst, IR_FLAG_REFINED)
                     elseif use.kind === :isdefined
                         continue # already rewritten if possible
                     elseif use.kind === :nopreserve
@@ -1506,7 +1876,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                     end
                 end
                 for b in phiblocks
-                    n = ir[phinodes[b]][:inst]::PhiNode
+                    n = ir[phinodes[b]][:stmt]::PhiNode
                     result_t = Bottom
                     for p in ir.cfg.blocks[b].preds
                         push!(n.edges, p)
@@ -1521,19 +1891,19 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
             # now eliminate "definitions" (i.e. `setfield!`) calls
             # (NOTE the allocation itself will be eliminated by DCE pass later)
-            for idx in du.defs
-                idx == newidx && continue # this is allocation
+            for didx in du.defs
+                didx == defidx && continue # this is allocation
                 # verify this statement won't throw, otherwise it can't be eliminated safely
-                ssa = SSAValue(idx)
-                if is_nothrow(ir, ssa)
-                    ir[ssa][:inst] = nothing
+                setfield_ssa = SSAValue(didx)
+                if is_nothrow(ir, setfield_ssa)
+                    ir[setfield_ssa][:stmt] = nothing
                 else
                     # We can't eliminate this statement, because it might still
                     # throw an error, but we can mark it as effect-free since we
                     # know we have removed all uses of the mutable allocation.
                     # As a result, if we ever do prove nothrow, we can delete
                     # this statement then.
-                    ir[ssa][:flag] |= IR_FLAG_EFFECT_FREE
+                    add_flag!(ir[setfield_ssa], IR_FLAG_EFFECT_FREE)
                 end
             end
         end
@@ -1542,11 +1912,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # this means all ccall preserves have been replaced with forwarded loads
             # so we can potentially eliminate the allocation, otherwise we must preserve
             # the whole allocation.
-            push!(intermediaries, newidx)
+            push!(intermediaries, defidx)
         end
         # Insert the new preserves
         for (useidx, new_preserves) in preserve_uses
-            ir[SSAValue(useidx)][:inst] = form_new_preserves(ir[SSAValue(useidx)][:inst]::Expr,
+            ir[SSAValue(useidx)][:stmt] = form_new_preserves(ir[SSAValue(useidx)][:stmt]::Expr,
                 intermediaries, new_preserves)
         end
 
@@ -1554,7 +1924,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
     end
 end
 
-function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any})
+function form_new_preserves(origex::Expr, intermediaries::Union{Vector{Int},SPCSet}, new_preserves::Vector{Any})
     newex = Expr(:foreigncall)
     nccallargs = length(origex.args[3]::SimpleVector)
     for i in 1:(6+nccallargs-1)
@@ -1563,7 +1933,7 @@ function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preser
     for i in (6+nccallargs):length(origex.args)
         x = origex.args[i]
         # don't need to preserve intermediaries
-        if isa(x, SSAValue) && x.id in intermediates
+        if isa(x, SSAValue) && x.id in intermediaries
             continue
         end
         push!(newex.args, x)
@@ -1597,7 +1967,7 @@ end
 
 function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool)
     # return whether this made a change
-    if isa(compact.result[idx][:inst], PhiNode)
+    if isa(compact.result[idx][:stmt], PhiNode)
         return maybe_erase_unused!(compact, idx, in_worklist, extra_worklist) do val::SSAValue
             phi_uses[val.id] -= 1
         end
@@ -1612,10 +1982,10 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I
     while !isempty(worklist)
         phi = pop!(worklist)
         push!(safe_phis, phi)
-        for ur in userefs(compact.result[phi][:inst])
+        for ur in userefs(compact.result[phi][:stmt])
             val = ur[]
             isa(val, SSAValue) || continue
-            isa(compact[val][:inst], PhiNode) || continue
+            isa(compact[val][:stmt], PhiNode) || continue
             (val.id in safe_phis) && continue
             push!(worklist, val.id)
         end
@@ -1628,7 +1998,7 @@ end
 
 function is_union_phi(compact::IncrementalCompact, idx::Int)
     inst = compact.result[idx]
-    isa(inst[:inst], PhiNode) || return false
+    isa(inst[:stmt], PhiNode) || return false
     return is_some_union(inst[:type])
 end
 
@@ -1680,9 +2050,15 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
     all_phis = Int[]
     unionphis = Pair{Int,Any}[] # sorted
-    compact = IncrementalCompact(ir)
-    for ((_, idx), stmt) in compact
+    compact = IncrementalCompact(ir, true)
+    made_changes = false
+    for ((old_idx, idx), stmt) in compact
         if isa(stmt, PhiNode)
+            if reprocess_phi_node!(𝕃ₒ, compact, stmt, old_idx)
+                # Phi node has a single predecessor and was deleted
+                made_changes = true
+                continue
+            end
             push!(all_phis, idx)
             if is_some_union(compact.result[idx][:type])
                 push!(unionphis, Pair{Int,Any}(idx, Union{}))
@@ -1700,9 +2076,9 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         else
             if is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
                 # nullify safe `typeassert` calls
-                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact))
+                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact), true)
                 if isexact && ⊑(𝕃ₒ, argextype(stmt.args[2], compact), ty)
-                    compact[idx] = nothing
+                    delete_inst_here!(compact)
                     continue
                 end
             end
@@ -1720,11 +2096,11 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
     non_dce_finish!(compact)
     for phi in all_phis
         inst = compact.result[phi]
-        for ur in userefs(inst[:inst]::PhiNode)
+        for ur in userefs(inst[:stmt]::PhiNode)
             use = ur[]
             if isa(use, SSAValue)
                 phi_uses[use.id] += 1
-                stmt = compact.result[use.id][:inst]
+                stmt = compact.result[use.id][:stmt]
                 if isa(stmt, PhiNode)
                     r = searchsorted(unionphis, use.id; by=first)
                     if !isempty(r)
@@ -1741,17 +2117,19 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         unionphi = unionphis[i]
         phi = unionphi[1]
         t = unionphi[2]
+        inst = compact.result[phi]
         if t === Union{}
-            stmt = compact[SSAValue(phi)][:inst]::PhiNode
+            stmt = inst[:stmt]::PhiNode
             kill_phi!(compact, phi_uses, 1:length(stmt.values), SSAValue(phi), stmt, true)
+            made_changes = true
             continue
         elseif t === Any
             continue
-        elseif ⊑(𝕃ₒ, compact.result[phi][:type], t)
-            continue
         end
+        ⊏ = strictpartialorder(𝕃ₒ)
+        t ⊏ inst[:type] || continue
         to_drop = Int[]
-        stmt = compact[SSAValue(phi)][:inst]
+        stmt = inst[:stmt]
         stmt === nothing && continue
         stmt = stmt::PhiNode
         for i = 1:length(stmt.values)
@@ -1763,18 +2141,20 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 push!(to_drop, i)
             end
         end
-        compact.result[phi][:type] = t
+        inst[:type] = t
+        add_flag!(inst, IR_FLAG_REFINED) # t ⊏ inst[:type]
         kill_phi!(compact, phi_uses, to_drop, SSAValue(phi), stmt, false)
+        made_changes = true
     end
     # Perform simple DCE for unused values
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         idx >= compact.result_idx && break
         nused == 0 || continue
-        adce_erase!(phi_uses, extra_worklist, compact, idx, false)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, idx, false)
     end
     while !isempty(extra_worklist)
-        adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
     end
     # Go back and erase any phi cycles
     changed = true
@@ -1795,16 +2175,18 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         while !isempty(extra_worklist)
             if adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
                 changed = true
+                made_changes = true
             end
         end
     end
-    return complete(compact)
+
+    return Pair{IRCode, Bool}(complete(compact), made_changes)
 end
 
 function is_bb_empty(ir::IRCode, bb::BasicBlock)
     isempty(bb.stmts) && return true
     if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        stmt = ir[SSAValue(first(bb.stmts))][:stmt]
         return stmt === nothing || isa(stmt, GotoNode)
     end
     return false
@@ -1814,14 +2196,6 @@ end
 function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
     # For the time being, don't drop the first bb, because it has special predecessor semantics.
     bbidx == 1 && return false
-    # If the block we're going to is the same as the fallthrow, it's always legal to drop
-    # the block.
-    length(bb.stmts) == 0 && return true
-    if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
-        stmt === nothing && return true
-        ((stmt::GotoNode).label == bbidx + 1) && return true
-    end
     return true
 end
 
@@ -1834,23 +2208,23 @@ function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vec
         dbi -= 1
     end
     last_fallthrough_term_ssa = SSAValue(last(bbs[last_fallthrough].stmts))
-    terminator = ir[last_fallthrough_term_ssa][:inst]
+    terminator = ir[last_fallthrough_term_ssa][:stmt]
     if isa(terminator, GotoIfNot)
         if terminator.dest != bbidx
             # The previous terminator's destination matches our fallthrough.
             # If we're also a fallthrough terminator, then we just have
             # to delete the GotoIfNot.
-            our_terminator = ir[SSAValue(last(bb.stmts))][:inst]
+            our_terminator = ir[SSAValue(last(bb.stmts))][:stmt]
             if terminator.dest != (isa(our_terminator, GotoNode) ? our_terminator.label : bbidx + 1)
                 return false
             end
         end
         ir[last_fallthrough_term_ssa] = nothing
         kill_edge!(bbs, last_fallthrough, terminator.dest)
-    elseif isexpr(terminator, :enter)
-        return false
     elseif isa(terminator, GotoNode)
         return true
+    elseif isterminator(terminator)
+        return false
     end
     # Hack, but effective. If we have a predecessor with a fall-through terminator, change the
     # instruction numbering to merge the blocks now such that below processing will properly
@@ -1859,59 +2233,109 @@ function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vec
     return true
 end
 
-is_terminator(@nospecialize(inst)) = isa(inst, GotoNode) || isa(inst, GotoIfNot) || isexpr(inst, :enter)
+function follow_map(map::Vector{Int}, idx::Int)
+    while map[idx] ≠ 0
+        idx = map[idx]
+    end
+    return idx
+end
 
-function cfg_simplify!(ir::IRCode)
-    bbs = ir.cfg.blocks
-    merge_into = zeros(Int, length(bbs))
-    merged_succ = zeros(Int, length(bbs))
-    dropped_bbs = Vector{Int}() # sorted
-    function follow_merge_into(idx::Int)
-        while merge_into[idx] != 0
-            idx = merge_into[idx]
-        end
-        return idx
+function ascend_eliminated_preds(bbs::Vector{BasicBlock}, pred::Int)
+    pred == 0 && return pred
+    while pred != 1 && length(bbs[pred].preds) == 1 && length(bbs[pred].succs) == 1
+        pred = bbs[pred].preds[1]
     end
-    function follow_merged_succ(idx::Int)
-        while merged_succ[idx] != 0
-            idx = merged_succ[idx]
+    return pred
+end
+
+# Compute (renamed) successors and predecessors given (renamed) block
+function compute_succs(merged_succ::Vector{Int}, bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_succ::Vector{Int}, i::Int)
+    orig_bb = follow_map(merged_succ, result_bbs[i])
+    return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
+end
+
+function compute_preds(bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_pred::Vector{Int}, i::Int)
+    orig_bb = result_bbs[i]
+    preds = copy(bbs[orig_bb].preds)
+    res = Int[]
+    while !isempty(preds)
+        pred = popfirst!(preds)
+        if pred == 0
+            push!(res, 0)
+            continue
+        end
+        r = bb_rename_pred[pred]
+        (r == -2 || r == -1) && continue
+        if r == -3
+            prepend!(preds, bbs[pred].preds)
+        else
+            push!(res, r)
         end
-        return idx
     end
-    function ascend_eliminated_preds(pred)
-        while pred != 1 && length(bbs[pred].preds) == 1 && length(bbs[pred].succs) == 1
-            pred = bbs[pred].preds[1]
+    return res
+end
+
+function add_preds!(all_new_preds::Vector{Int32}, bbs::Vector{BasicBlock}, bb_rename_pred::Vector{Int}, old_edge::Int32)
+    preds = copy(bbs[old_edge].preds)
+    while !isempty(preds)
+        old_edge′ = popfirst!(preds)
+        if old_edge′ == 0
+            push!(all_new_preds, old_edge′)
+            continue
+        end
+        new_edge = bb_rename_pred[old_edge′]
+        if new_edge > 0 && new_edge ∉ all_new_preds
+            push!(all_new_preds, Int32(new_edge))
+        elseif new_edge == -3
+            prepend!(preds, bbs[old_edge′].preds)
         end
-        return pred
     end
+end
+
+function cfg_simplify!(ir::IRCode)
+    bbs = ir.cfg.blocks
+    merge_into = zeros(Int, length(bbs))
+    merged_succ = zeros(Int, length(bbs))
+    dropped_bbs = Vector{Int}() # sorted
 
     # Walk the CFG from the entry block and aggressively combine blocks
     for (idx, bb) in enumerate(bbs)
         if length(bb.succs) == 1
             succ = bb.succs[1]
             if length(bbs[succ].preds) == 1 && succ != 1
-                # Can't merge blocks with :enter terminator even if they
-                # only have one successor.
-                if isexpr(ir[SSAValue(last(bb.stmts))][:inst], :enter)
+                # Can't merge blocks with a non-GotoNode terminator, even if they
+                # only have one successor, because it would not be legal to have that
+                # terminator in the middle of a basic block.
+                terminator = ir[SSAValue(last(bb.stmts))][:stmt]
+                if !isa(terminator, GotoNode) && isterminator(terminator)
                     continue
                 end
                 # Prevent cycles by making sure we don't end up back at `idx`
                 # by following what is to be merged into `succ`
-                if follow_merged_succ(succ) != idx
+                if follow_map(merged_succ, succ) != idx
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
             elseif merge_into[idx] == 0 && is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
                 # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
                 # reference our predecessors.
+                #
+                # This is for situations like:
+                #   #1 - ...
+                #        goto #3 if not ...
+                #   #2 - (empty)
+                #   #3 - ϕ(#2 => true, #1 => false)
+                #
+                # where we rely on the empty basic block to disambiguate the ϕ-node's value
+
                 found_interference = false
-                preds = Int[ascend_eliminated_preds(pred) for pred in bb.preds]
+                preds = Int[ascend_eliminated_preds(bbs, pred) for pred in bb.preds]
                 for idx in bbs[succ].stmts
-                    stmt = ir[SSAValue(idx)][:inst]
+                    stmt = ir[SSAValue(idx)][:stmt]
                     stmt === nothing && continue
                     isa(stmt, PhiNode) || break
                     for edge in stmt.edges
-                        edge = ascend_eliminated_preds(edge)
+                        edge = ascend_eliminated_preds(bbs, Int(edge))
                         for pred in preds
                             if pred == edge
                                 found_interference = true
@@ -1930,14 +2354,14 @@ function cfg_simplify!(ir::IRCode)
 
     # Assign new BB numbers in DFS order, dropping unreachable blocks
     max_bb_num = 1
-    bb_rename_succ = fill(0, length(bbs))
+    bb_rename_succ = zeros(Int, length(bbs))
     worklist = BitSetBoundedMinPrioritySet(length(bbs))
     push!(worklist, 1)
     while !isempty(worklist)
         i = popfirst!(worklist)
         # Drop blocks that will be merged away
         if merge_into[i] != 0
-            bb_rename_succ[i] = -1
+            bb_rename_succ[i] = typemin(Int)
         end
         # Mark dropped blocks for fixup
         if !isempty(searchsorted(dropped_bbs, i))
@@ -1957,20 +2381,36 @@ function cfg_simplify!(ir::IRCode)
                 # we have to schedule that block next
                 while merged_succ[curr] != 0
                     if bb_rename_succ[curr] == 0
-                        bb_rename_succ[curr] = -1
+                        bb_rename_succ[curr] = typemin(Int)
                     end
                     curr = merged_succ[curr]
                 end
-                terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst]
-                if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
-                    break
-                elseif isa(terminator, GotoIfNot)
+                terminator = ir[SSAValue(bbs[curr].stmts[end])][:stmt]
+
+                if isa(terminator, GotoIfNot)
                     if bb_rename_succ[terminator.dest] == 0
                         push!(worklist, terminator.dest)
                     end
-                elseif isexpr(terminator, :enter)
-                    if bb_rename_succ[terminator.args[1]] == 0
-                        push!(worklist, terminator.args[1])
+                elseif isa(terminator, EnterNode)
+                    catchbb = terminator.catch_dest
+                    if catchbb ≠ 0
+                        if bb_rename_succ[catchbb] == 0
+                            push!(worklist, catchbb)
+                        end
+                    end
+                elseif isa(terminator, GotoNode) || isa(terminator, ReturnNode)
+                    # No implicit fall through. Schedule from work list.
+                    break
+                else
+                    is_bottom = ir[SSAValue(bbs[curr].stmts[end])][:type] === Union{}
+                    if is_bottom && !isa(terminator, PhiNode) && terminator !== nothing
+                        # If this is a regular statement (not PhiNode/GotoNode/GotoIfNot
+                        # or the `nothing` special case deletion marker),
+                        # and the type is Union{}, then this may be a terminator.
+                        # Ordinarily we normalize with ReturnNode(), but this is not
+                        # required. In any case, we do not fall through, so we
+                        # do not need to schedule the fall-through block.
+                        break
                     end
                 end
                 ncurr = curr + 1
@@ -1996,9 +2436,9 @@ function cfg_simplify!(ir::IRCode)
         resolved_all = true
         for bb in dropped_bbs
             obb = bb_rename_succ[bb]
-            if obb < -1
+            if obb < 0 && obb != typemin(Int)
                 nsucc = bb_rename_succ[-obb]
-                if nsucc == -1
+                if nsucc == typemin(Int)
                     nsucc = -merge_into[-obb]
                 end
                 bb_rename_succ[bb] = nsucc
@@ -2011,8 +2451,10 @@ function cfg_simplify!(ir::IRCode)
     bb_rename_pred = zeros(Int, length(bbs))
     for i = 1:length(bbs)
         if bb_rename_succ[i] == 0
-            bb_rename_succ[i] = -1
+            bb_rename_succ[i] = -2
             bb_rename_pred[i] = -2
+        elseif bb_rename_succ[i] == typemin(Int)
+            bb_rename_succ[i] = -2
         end
     end
 
@@ -2056,7 +2498,7 @@ function cfg_simplify!(ir::IRCode)
         elseif is_multi
             bb_rename_pred[i] = -3
         else
-            bbnum = follow_merge_into(pred)
+            bbnum = follow_map(merge_into, pred)
             bb_rename_pred[i] = bb_rename_succ[bbnum]
         end
     end
@@ -2078,59 +2520,23 @@ function cfg_simplify!(ir::IRCode)
         bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i]
     end
 
-    cresult_bbs = let result_bbs = result_bbs,
-                      merged_succ = merged_succ,
-                      merge_into = merge_into,
-                      bbs = bbs,
-                      bb_rename_succ = bb_rename_succ
-
-        # Compute (renamed) successors and predecessors given (renamed) block
-        function compute_succs(i::Int)
-            orig_bb = follow_merged_succ(result_bbs[i])
-            return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
-        end
-        function compute_preds(i::Int)
-            orig_bb = result_bbs[i]
-            preds = bbs[orig_bb].preds
-            res = Int[]
-            function scan_preds!(preds::Vector{Int})
-                for pred in preds
-                    if pred == 0
-                        push!(res, 0)
-                        continue
-                    end
-                    r = bb_rename_pred[pred]
-                    (r == -2 || r == -1) && continue
-                    if r == -3
-                        scan_preds!(bbs[pred].preds)
-                    else
-                        push!(res, r)
-                    end
-                end
-            end
-            scan_preds!(preds)
-            return res
-        end
-
-        BasicBlock[
-            BasicBlock(StmtRange(bb_starts[i],
-                                 i+1 > length(bb_starts) ?
-                                    length(compact.result) : bb_starts[i+1]-1),
-                       compute_preds(i),
-                       compute_succs(i))
-            for i = 1:length(result_bbs)]
-    end
+    cresult_bbs = BasicBlock[
+        BasicBlock(StmtRange(bb_starts[i],
+                             i+1 > length(bb_starts) ? length(compact.result) : bb_starts[i+1]-1),
+                   compute_preds(bbs, result_bbs, bb_rename_pred, i),
+                   compute_succs(merged_succ, bbs, result_bbs, bb_rename_succ, i))
+        for i = 1:length(result_bbs)]
 
     # Fixup terminators for any blocks that would have caused double edges
     for (bbidx, (new_bb, old_bb)) in enumerate(zip(cresult_bbs, result_bbs))
         @assert length(new_bb.succs) <= 2
         length(new_bb.succs) <= 1 && continue
         if new_bb.succs[1] == new_bb.succs[2]
-            old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred)
+            old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred)::Int
             terminator = ir[SSAValue(last(bbs[old_bb2].stmts))]
-            @assert terminator[:inst] isa GotoIfNot
+            @assert terminator[:stmt] isa GotoIfNot
             # N.B.: The dest will be renamed in process_node! below
-            terminator[:inst] = GotoNode(terminator[:inst].dest)
+            terminator[:stmt] = GotoNode(terminator[:stmt].dest::Int)
             pop!(new_bb.succs)
             new_succ = cresult_bbs[new_bb.succs[1]]
             for (i, nsp) in enumerate(new_succ.preds)
@@ -2145,25 +2551,32 @@ function cfg_simplify!(ir::IRCode)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
     # so we don't want compaction to do so independently
-    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ))
+    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ, nothing))
     result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
         bb_start = true
         while ms != 0
-            for i in bbs[ms].stmts
+            old_bb_stmts = bbs[ms].stmts
+            for i in old_bb_stmts
                 node = ir.stmts[i]
                 compact.result[compact.result_idx] = node
-                if isa(node[:inst], GotoNode) && merged_succ[ms] != 0
+                stmt = node[:stmt]
+                if isa(stmt, GotoNode) && merged_succ[ms] != 0
                     # If we merged a basic block, we need remove the trailing GotoNode (if any)
-                    compact.result[compact.result_idx][:inst] = nothing
-                elseif isa(node[:inst], PhiNode)
-                    phi = node[:inst]
+                    compact.result[compact.result_idx][:stmt] = nothing
+                elseif isa(stmt, PhiNode)
+                    phi = stmt
                     values = phi.values
                     (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
                     ssa_rename[i] = SSAValue(compact.result_idx)
-                    processed_idx = i
-                    renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                    already_inserted = function (i::Int, val::OldSSAValue)
+                        if val.id in old_bb_stmts
+                            return val.id <= i
+                        end
+                        return bb_rename_pred[phi.edges[i]] < idx
+                    end
+                    renamed_values = process_phinode_values(values, late_fixup, already_inserted, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
                     edges = Int32[]
                     values = Any[]
                     sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values))
@@ -2177,20 +2590,16 @@ function cfg_simplify!(ir::IRCode)
                             else
                                 resize!(values, length(values)+1)
                             end
+                        elseif new_edge == -1
+                            @assert length(phi.edges) == 1
+                            if isassigned(renamed_values, old_index)
+                                push!(edges, -1)
+                                push!(values, renamed_values[old_index])
+                            end
                         elseif new_edge == -3
                             # Multiple predecessors, we need to expand out this phi
                             all_new_preds = Int32[]
-                            function add_preds!(old_edge)
-                                for old_edge′ in bbs[old_edge].preds
-                                    new_edge = bb_rename_pred[old_edge′]
-                                    if new_edge > 0 && !in(new_edge, all_new_preds)
-                                        push!(all_new_preds, new_edge)
-                                    elseif new_edge == -3
-                                        add_preds!(old_edge′)
-                                    end
-                                end
-                            end
-                            add_preds!(old_edge)
+                            add_preds!(all_new_preds, bbs, bb_rename_pred, old_edge)
                             append!(edges, all_new_preds)
                             if isassigned(renamed_values, old_index)
                                 val = renamed_values[old_index]
@@ -2209,19 +2618,19 @@ function cfg_simplify!(ir::IRCode)
                         end
                     end
                     if length(edges) == 0 || (length(edges) == 1 && !isassigned(values, 1))
-                        compact.result[compact.result_idx][:inst] = nothing
+                        compact.result[compact.result_idx][:stmt] = nothing
                     elseif length(edges) == 1 && !bb_start
-                        compact.result[compact.result_idx][:inst] = values[1]
+                        compact.result[compact.result_idx][:stmt] = values[1]
                     else
                         @assert bb_start
-                        compact.result[compact.result_idx][:inst] = PhiNode(edges, values)
+                        compact.result[compact.result_idx][:stmt] = PhiNode(edges, values)
                     end
                 else
                     ri = process_node!(compact, compact.result_idx, node, i, i, ms, true)
                     if ri == compact.result_idx
                         # process_node! wanted this statement dropped. We don't do this,
                         # but we still need to erase the node
-                        compact.result[compact.result_idx][:inst] = nothing
+                        compact.result[compact.result_idx][:stmt] = nothing
                     end
                 end
                 # We always increase the result index to ensure a predicatable
diff --git a/base/compiler/ssair/show.jl b/Compiler/src/ssair/show.jl
similarity index 64%
rename from base/compiler/ssair/show.jl
rename to Compiler/src/ssair/show.jl
index b420eb32b1205..e63d7b5cf640e 100644
--- a/base/compiler/ssair/show.jl
+++ b/Compiler/src/ssair/show.jl
@@ -1,19 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This file is not loaded into `Core.Compiler` but rather loaded into the context of
-# `Base.IRShow` and thus does not participate in bootstrapping.
+# This file does not participate in bootstrapping, but is included in the system image by
+# being loaded from `base/show.jl`. Compiler.jl as the standard library will simply include
+# this file in the context of `Compiler.IRShow`.
 
-@nospecialize
+using Base, Core.IR
 
-if Pair != Base.Pair
-import Base: Base, IOContext, string, join, sprint
-IOContext(io::IO, KV::Pair) = IOContext(io, Base.Pair(KV[1], KV[2]))
-length(s::String) = Base.length(s)
-^(s::String, i::Int) = Base.:^(s, i)
-end
+import Base: show
+using Base: isexpr, prec_decl, show_unquoted, with_output_color
+using .Compiler: ALWAYS_FALSE, ALWAYS_TRUE, argextype, BasicBlock, block_for_inst,
+    CachedMethodTable, CFG, compute_basic_blocks, DebugInfoStream, Effects,
+    EMPTY_SPTYPES, getdebugidx, IncrementalCompact, InferenceResult, InferenceState,
+    InvalidIRError, IRCode, LimitedAccuracy, NativeInterpreter, scan_ssa_use!,
+    singleton_type, sptypes_from_meth_instance, StmtRange, Timings, VarState, widenconst
 
-import Base: show_unquoted
-using Base: printstyled, with_output_color, prec_decl, @invoke
+@nospecialize
 
 function Base.show(io::IO, cfg::CFG)
     print(io, "CFG with $(length(cfg.blocks)) blocks:")
@@ -31,7 +32,50 @@ function Base.show(io::IO, cfg::CFG)
     end
 end
 
-function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxlength_idx::Int, color::Bool, show_type::Bool)
+function maybe_argextype(
+    @nospecialize(x),
+    src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState},
+)
+    return try
+        argextype(x, src, sptypes)
+    catch err
+        !(err isa InvalidIRError) && rethrow()
+        nothing
+    end
+end
+
+const inlined_apply_iterate_types = Union{Array,Memory,Tuple,NamedTuple,Core.SimpleVector}
+
+function builtin_call_has_dispatch(
+    @nospecialize(f),
+    args::Vector{Any},
+    src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{VarState},
+)
+    if f === Core._apply_iterate && length(args) >= 3
+        # The implementation of _apply_iterate has hand-inlined implementations
+        # for <builtin>(v::Union{Tuple,NamedTuple,Memory,Array,SimpleVector}...)
+        # which perform no dynamic dispatch
+        constructort = maybe_argextype(args[3], src, sptypes)
+        if constructort === nothing || !(widenconst(constructort) <: Core.Builtin)
+            return true
+        end
+        for arg in args[4:end]
+            argt = maybe_argextype(arg, src, sptypes)
+            if argt === nothing || !(widenconst(argt) <: inlined_apply_iterate_types)
+                return true
+            end
+        end
+    elseif (f === Core._apply_pure || f === Core._call_in_world || f === Core._call_in_world_total || f === Core._call_latest)
+        # These apply-like builtins are effectively dynamic calls
+        return true
+    end
+    return false
+end
+
+function print_stmt(io::IO, idx::Int, @nospecialize(stmt), code::Union{IRCode,CodeInfo,IncrementalCompact},
+                    sptypes::Vector{VarState}, used::BitSet, maxlength_idx::Int, color::Bool, show_type::Bool, label_dynamic_calls::Bool)
     if idx in used
         idx_s = string(idx)
         pad = " "^(maxlength_idx - length(idx_s) + 1)
@@ -48,16 +92,25 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         print(io, ", ")
         print(io, stmt.typ)
         print(io, ")")
-    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], MethodInstance)
+    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], Union{MethodInstance,CodeInstance})
         stmt = stmt::Expr
         # TODO: why is this here, and not in Base.show_unquoted
-        print(io, "invoke ")
-        linfo = stmt.args[1]::Core.MethodInstance
+        printstyled(io, "   invoke "; color = :light_black)
+        mi = stmt.args[1]
+        if !(mi isa Core.MethodInstance)
+            mi = (mi::Core.CodeInstance).def
+        end
+        if isa(mi, Core.ABIOverride)
+            abi = mi.abi
+            mi = mi.def
+        else
+            abi = mi.specTypes
+        end
         show_unquoted(io, stmt.args[2], indent)
         print(io, "(")
         # XXX: this is wrong if `sig` is not a concretetype method
         # more correct would be to use `fieldtype(sig, i)`, but that would obscure / discard Varargs information in show
-        sig = linfo.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(linfo.specTypes).parameters::Core.SimpleVector
+        sig = abi == Tuple ? Core.svec() : Base.unwrap_unionall(abi).parameters::Core.SimpleVector
         print_arg(i) = sprint(; context=io) do io
             show_unquoted(io, stmt.args[i], indent)
             if (i - 1) <= length(sig)
@@ -66,19 +119,42 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         end
         join(io, (print_arg(i) for i = 3:length(stmt.args)), ", ")
         print(io, ")")
+        # TODO: if we have a CodeInstance, should we print that rettype info here, which may differ (wider or narrower than the ssavaluetypes)
+    elseif isexpr(stmt, :call) && length(stmt.args) >= 1 && label_dynamic_calls
+        ft = maybe_argextype(stmt.args[1], code, sptypes)
+        f = singleton_type(ft)
+        if isa(f, Core.IntrinsicFunction)
+            printstyled(io, "intrinsic "; color = :light_black)
+        elseif isa(f, Core.Builtin)
+            if builtin_call_has_dispatch(f, stmt.args, code, sptypes)
+                printstyled(io, "dynamic builtin "; color = :yellow)
+            else
+                printstyled(io, "  builtin "; color = :light_black)
+            end
+        elseif ft === nothing
+            # This should only happen when, e.g., printing a call that targets
+            # an out-of-bounds SSAValue or similar
+            # (i.e. under normal circumstances, dead code)
+            printstyled(io, "  unknown "; color = :light_black)
+        elseif widenconst(ft) <: Core.Builtin
+            printstyled(io, "dynamic builtin "; color = :yellow)
+        else
+            printstyled(io, "  dynamic "; color = :yellow)
+        end
+        show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
     # given control flow information, we prefer to print these with the basic block #, instead of the ssa %
-    elseif isexpr(stmt, :enter) && length((stmt::Expr).args) == 1 && (stmt::Expr).args[1] isa Int
-        print(io, "\$(Expr(:enter, #", (stmt::Expr).args[1]::Int, "))")
+    elseif isa(stmt, EnterNode)
+        print(io, "enter #", stmt.catch_dest, "")
+        if isdefined(stmt, :scope)
+            print(io, " with scope ")
+            show_unquoted(io, stmt.scope, indent)
+        end
     elseif stmt isa GotoNode
         print(io, "goto #", stmt.label)
     elseif stmt isa PhiNode
-        show_unquoted_phinode(io, stmt, indent, "#")
+        Base.show_unquoted_phinode(io, stmt, indent, "#")
     elseif stmt isa GotoIfNot
-        show_unquoted_gotoifnot(io, stmt, indent, "#")
-    elseif stmt isa TypedSlot
-        # call `show` with the type set to Any so it will not be shown, since
-        # we will show the type ourselves.
-        show_unquoted(io, SlotNumber(stmt.id), indent, show_type ? prec_decl : 0)
+        Base.show_unquoted_gotoifnot(io, stmt, indent, "#")
     # everything else in the IR, defer to the generic AST printer
     else
         show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
@@ -86,83 +162,13 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
     nothing
 end
 
-show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io, Core.SlotNumber(val.n), indent, prec)
-
-show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
-function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
-    args = String[let
-        e = stmt.edges[i]
-        v = !isassigned(stmt.values, i) ? "#undef" :
-            sprint(; context=io) do io′
-                show_unquoted(io′, stmt.values[i], indent)
-            end
-        "$prefix$e => $v"
-        end for i in 1:length(stmt.edges)
-    ]
-    print(io, "φ ", '(')
-    join(io, args, ", ")
-    print(io, ')')
-end
-
-function show_unquoted(io::IO, stmt::PhiCNode, indent::Int, ::Int)
-    print(io, "φᶜ (")
-    first = true
-    for v in stmt.values
-        first ? (first = false) : print(io, ", ")
-        show_unquoted(io, v, indent)
-    end
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::PiNode, indent::Int, ::Int)
-    print(io, "π (")
-    show_unquoted(io, stmt.val, indent)
-    print(io, ", ")
-    printstyled(io, stmt.typ, color=:cyan)
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::UpsilonNode, indent::Int, ::Int)
-    print(io, "ϒ (")
-    isdefined(stmt, :val) ?
-        show_unquoted(io, stmt.val, indent) :
-        print(io, "#undef")
-    print(io, ")")
-end
-
-function show_unquoted(io::IO, stmt::ReturnNode, indent::Int, ::Int)
-    if !isdefined(stmt, :val)
-        print(io, "unreachable")
-    else
-        print(io, "return ")
-        show_unquoted(io, stmt.val, indent)
-    end
-end
-
-show_unquoted(io::IO, stmt::GotoIfNot, indent::Int, ::Int) = show_unquoted_gotoifnot(io, stmt, indent, "%")
-function show_unquoted_gotoifnot(io::IO, stmt::GotoIfNot, indent::Int, prefix::String)
-    print(io, "goto ", prefix, stmt.dest, " if not ")
-    show_unquoted(io, stmt.cond, indent)
-end
-
-function compute_inlining_depth(linetable::Vector, iline::Int32)
-    iline == 0 && return 1
-    depth = -1
-    while iline != 0
-        depth += 1
-        lineinfo = linetable[iline]::LineInfoNode
-        iline = lineinfo.inlined_at
-    end
-    return depth
-end
-
 function should_print_ssa_type(@nospecialize node)
     if isa(node, Expr)
-        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :enter, :leave))
+        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :leave))
     end
     return !isa(node, PiNode)   && !isa(node, GotoIfNot) &&
            !isa(node, GotoNode) && !isa(node, ReturnNode) &&
-           !isa(node, QuoteNode)
+           !isa(node, QuoteNode) && !isa(node, EnterNode)
 end
 
 function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show_type::Bool=true, _...)
@@ -171,32 +177,27 @@ function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show
     return nothing
 end
 
-function normalize_method_name(m)
+function method_name(@nospecialize m)
+    if m isa LineInfoNode
+        m = m.method
+    end
+    if m isa MethodInstance
+        m = m.def
+    end
     if m isa Method
-        return m.name
-    elseif m isa MethodInstance
-        return (m.def::Method).name
-    elseif m isa Symbol
+        m = m.name
+    end
+    if m isa Module
+        return :var"top-level scope"
+    end
+    if m isa Symbol
         return m
-    else
-        return Symbol("")
     end
+    return :var""
 end
-@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
-
-# converts the linetable for line numbers
-# into a list in the form:
-#   1 outer-most-frame
-#   2   inlined-frame
-#   3     innermost-frame
-function compute_loc_stack(linetable::Vector, line::Int32)
-    stack = Int[]
-    while line != 0
-        entry = linetable[line]::LineInfoNode
-        pushfirst!(stack, line)
-        line = entry.inlined_at
-    end
-    return stack
+@noinline function normalize_method_name(@nospecialize m)
+    name = method_name(m)
+    return name === :var"" ? :none : name
 end
 
 """
@@ -254,7 +255,7 @@ We get:
   └──      return %3                      │
 ```
 
-Even though we were in the `f` scope since the first statement, it tooks us two statements
+Even though we were in the `f` scope since the first statement, it took us two statements
 to catch up and print the intermediate scopes. Which scope is printed is indicated both
 by the indentation of the method name and by an increased thickness of the appropriate
 line for the scope.
@@ -265,83 +266,72 @@ function compute_ir_line_annotations(code::IRCode)
     loc_lineno = String[]
     cur_group = 1
     last_lineno = 0
-    last_stack = Int[]
+    last_stack = LineInfoNode[] # nb. only file, line, and method are populated in this
     last_printed_depth = 0
-    linetable = code.linetable
-    lines = code.stmts.line
-    last_line = zero(eltype(lines))
-    for idx in 1:length(lines)
+    debuginfo = code.debuginfo
+    def = :var"unknown scope"
+    for idx in 1:length(code.stmts)
         buf = IOBuffer()
-        line = lines[idx]
         print(buf, "│")
-        depth = compute_inlining_depth(linetable, line)
-        iline = line
-        lineno = 0
+        stack = buildLineInfoNode(debuginfo, def, idx)
+        lineno::Int = 0
         loc_method = ""
-        if line != 0
-            stack = compute_loc_stack(linetable, line)
-            lineno = linetable[stack[1]].line
+        isempty(stack) && (stack = last_stack)
+        if !isempty(stack)
+            lineno = stack[1].line
             x = min(length(last_stack), length(stack))
-            if length(stack) != 0
-                # Compute the last depth that was in common
-                first_mismatch = let last_stack=last_stack
-                    findfirst(i->last_stack[i] != stack[i], 1:x)
-                end
-                # If the first mismatch is the last stack frame, that might just
-                # be a line number mismatch in inner most frame. Ignore those
-                if length(last_stack) == length(stack) && first_mismatch == length(stack)
-                    last_entry, entry = linetable[last_stack[end]], linetable[stack[end]]
-                    if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
-                        first_mismatch = nothing
-                    end
+            depth = length(stack) - 1
+            # Compute the last depth that was in common
+            first_mismatch = let last_stack=last_stack
+                findfirst(i->last_stack[i] != stack[i], 1:x)
+            end
+            # If the first mismatch is the last stack frame, that might just
+            # be a line number mismatch in inner most frame. Ignore those
+            if length(last_stack) == length(stack) && first_mismatch == length(stack)
+                last_entry, entry = last_stack[end], stack[end]
+                if method_name(last_entry) === method_name(entry) && last_entry.file === entry.file
+                    first_mismatch = nothing
                 end
-                last_depth = something(first_mismatch, x+1)-1
-                if min(depth, last_depth) > last_printed_depth
-                    printing_depth = min(depth, last_printed_depth + 1)
-                    last_printed_depth = printing_depth
-                elseif length(stack) > length(last_stack) || first_mismatch !== nothing
-                    printing_depth = min(depth, last_depth + 1)
-                    last_printed_depth = printing_depth
-                else
-                    printing_depth = 0
+            end
+            last_depth = something(first_mismatch, x+1)-1
+            if min(depth, last_depth) > last_printed_depth
+                printing_depth = min(depth, last_printed_depth + 1)
+                last_printed_depth = printing_depth
+            elseif length(stack) > length(last_stack) || first_mismatch !== nothing
+                printing_depth = min(depth, last_depth + 1)
+                last_printed_depth = printing_depth
+            else
+                printing_depth = 0
+            end
+            stole_one = false
+            if printing_depth != 0
+                for _ in 1:(printing_depth-1)
+                    print(buf, "│")
                 end
-                stole_one = false
-                if printing_depth != 0
-                    for _ in 1:(printing_depth-1)
+                if printing_depth <= last_depth-1 && first_mismatch === nothing
+                    print(buf, "┃")
+                    for _ in printing_depth+1:min(depth, last_depth)
                         print(buf, "│")
                     end
-                    if printing_depth <= last_depth-1 && first_mismatch === nothing
-                        print(buf, "┃")
-                        for _ in printing_depth+1:min(depth, last_depth)
-                            print(buf, "│")
-                        end
-                    else
-                        stole_one = true
-                        print(buf, "╻")
-                    end
                 else
-                    for _ in 1:min(depth, last_depth)
-                        print(buf, "│")
-                    end
+                    stole_one = true
+                    print(buf, "╻")
                 end
-                print(buf, "╷"^max(0, depth - last_depth - stole_one))
-                if printing_depth != 0
-                    if length(stack) == printing_depth
-                        loc_method = line
-                    else
-                        loc_method = stack[printing_depth + 1]
-                    end
-                    loc_method = method_name(linetable[loc_method])
+            else
+                for _ in 1:min(depth, last_depth)
+                    print(buf, "│")
                 end
-                loc_method = string(" "^printing_depth, loc_method)
             end
+            print(buf, "╷"^max(0, depth - last_depth - stole_one))
+            if printing_depth != 0
+                loc_method = normalize_method_name(stack[printing_depth + 1])
+            end
+            loc_method = string(" "^printing_depth, loc_method)
             last_stack = stack
-            entry = linetable[line]
         end
         push!(loc_annotations, String(take!(buf)))
         push!(loc_lineno, (lineno != 0 && lineno != last_lineno) ? string(lineno) : "")
         push!(loc_methods, loc_method)
-        last_line = line
         (lineno != 0) && (last_lineno = lineno)
         nothing
     end
@@ -350,19 +340,87 @@ end
 
 Base.show(io::IO, code::Union{IRCode, IncrementalCompact}) = show_ir(io, code)
 
+# A line_info_preprinter for disabling line info printing
 lineinfo_disabled(io::IO, linestart::String, idx::Int) = ""
 
-function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
+# utility function to extract the file name from a DebugInfo object
+function debuginfo_file1(debuginfo::Union{DebugInfo,DebugInfoStream})
+    def = debuginfo.def
+    if def isa MethodInstance
+        def = def.def
+    end
+    if def isa Method
+        def = def.file
+    end
+    if def isa Symbol
+        return def
+    end
+    return :var"<unknown>"
+end
+
+# utility function to extract the first line number and file of a block of code
+function debuginfo_firstline(debuginfo::Union{DebugInfo,DebugInfoStream})
+    linetable = debuginfo.linetable
+    while linetable != nothing
+        debuginfo = linetable
+        linetable = debuginfo.linetable
+    end
+    codeloc = getdebugidx(debuginfo, 0)
+    return debuginfo_file1(debuginfo), codeloc[1]
+end
+
+struct LineInfoNode
+    method # ::Union{Method,MethodInstance,Symbol}
+    file::Symbol
+    line::Int32
+end
+
+# utility function for converting a debuginfo object a particular pc to list of LineInfoNodes representing the inlining info at that pc for function `def`
+# which is either `nothing` (macro-expand), a module (top-level), a Method (unspecialized code) or a MethodInstance (specialized code)
+# Returns `false` if the line info should not be updated with this info because this
+# statement has no effect on the line numbers. The `scopes` will still be populated however
+# with as much information as was available about the inlining at that statement.
+function append_scopes!(scopes::Vector{LineInfoNode}, pc::Int, debuginfo, @nospecialize(def))
+    doupdate = true
+    while true
+        debuginfo.def isa Symbol || (def = debuginfo.def)
+        codeloc = getdebugidx(debuginfo, pc)
+        line::Int = codeloc[1]
+        inl_to::Int = codeloc[2]
+        doupdate &= line != 0 || inl_to != 0 # disabled debug info--no update
+        if debuginfo.linetable === nothing || pc <= 0 || line < 0
+            line < 0 && (doupdate = false; line = 0) # broken debug info
+            push!(scopes, LineInfoNode(def, debuginfo_file1(debuginfo), Int32(line)))
+        else
+            doupdate = append_scopes!(scopes, line, debuginfo.linetable::DebugInfo, def) && doupdate
+        end
+        inl_to == 0 && return doupdate
+        def = :var"macro expansion"
+        debuginfo = debuginfo.edges[inl_to]
+        pc::Int = codeloc[3]
+    end
+end
+
+# utility wrapper around `append_scopes!` that returns an empty list instead of false
+# when there is no applicable line update
+function buildLineInfoNode(debuginfo, @nospecialize(def), pc::Int)
+    DI = LineInfoNode[]
+    append_scopes!(DI, pc, debuginfo, def) || empty!(DI)
+    return DI
+end
+
+# A default line_info_preprinter for printing accurate line number information
+function DILineInfoPrinter(debuginfo, def, showtypes::Bool=false)
     context = LineInfoNode[]
     context_depth = Ref(0)
     indent(s::String) = s^(max(context_depth[], 1) - 1)
-    function emit_lineinfo_update(io::IO, linestart::String, lineidx::Int32)
+    function emit_lineinfo_update(io::IO, linestart::String, pc::Int)
         # internal configuration options:
         linecolor = :yellow
         collapse = showtypes ? false : true
         indent_all = true
-        # convert lineidx to a vector
-        if lineidx == typemin(Int32)
+        # convert pc to a vector
+        if pc == 0
             # sentinel value: reset internal (and external) state
             pops = indent("└")
             if !isempty(pops)
@@ -372,13 +430,10 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             end
             empty!(context)
             context_depth[] = 0
-        elseif lineidx > 0 # just skip over lines with no debug info at all
-            DI = LineInfoNode[]
-            while lineidx != 0
-                entry = linetable[lineidx]::LineInfoNode
-                push!(DI, entry)
-                lineidx = entry.inlined_at
-            end
+            return ""
+        end
+        DI = reverse!(buildLineInfoNode(debuginfo, def, pc))
+        if !isempty(DI)
             # FOR DEBUGGING, or if you just like very excessive output:
             # this prints out the context in full for every statement
             #empty!(context)
@@ -460,7 +515,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                 started::Bool = false
                 if !update_line_only && showtypes && !isa(frame.method, Symbol) && nctx != 1
                     print(io, linestart)
-                    Base.with_output_color(linecolor, io) do io
+                    with_output_color(linecolor, io) do io
                         print(io, indent("│"))
                         print(io, "┌ invoke ", frame.method)
                         println(io)
@@ -468,7 +523,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     started = true
                 end
                 print(io, linestart)
-                Base.with_output_color(linecolor, io) do io
+                with_output_color(linecolor, io) do io
                     print(io, indent("│"))
                     push!(context, frame)
                     if update_line_only
@@ -508,6 +563,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
             #end
         end
         indent_all || return ""
+        context_depth[] <= 1 && return ""
         return sprint(io -> printstyled(io, indent("│"), color=linecolor), context=io)
     end
     return emit_lineinfo_update
@@ -524,16 +580,28 @@ end
 - `should_print_stmt(idx::Int) -> Bool`: whether the statement at index `idx` should be
   printed as part of the IR or not
 - `bb_color`: color used for printing the basic block brackets on the left
+- `label_dynamic_calls`: whether to label calls as dynamic / builtin / intrinsic
 """
 struct IRShowConfig
     line_info_preprinter
     line_info_postprinter
     should_print_stmt
     bb_color::Symbol
-    function IRShowConfig(line_info_preprinter, line_info_postprinter=default_expr_type_printer;
-                          should_print_stmt=Returns(true), bb_color::Symbol=:light_black)
-        return new(line_info_preprinter, line_info_postprinter, should_print_stmt, bb_color)
-    end
+    label_dynamic_calls::Bool
+
+    IRShowConfig(
+        line_info_preprinter,
+        line_info_postprinter=default_expr_type_printer;
+        should_print_stmt=Returns(true),
+        bb_color::Symbol=:light_black,
+        label_dynamic_calls=true
+    ) = new(
+        line_info_preprinter,
+        line_info_postprinter,
+        should_print_stmt,
+        bb_color,
+        label_dynamic_calls
+    )
 end
 
 struct _UNDEF
@@ -542,11 +610,11 @@ end
 
 function _stmt(code::IRCode, idx::Int)
     stmts = code.stmts
-    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
 end
 function _stmt(compact::IncrementalCompact, idx::Int)
     stmts = compact.result
-    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
 end
 function _stmt(code::CodeInfo, idx::Int)
     code = code.code
@@ -569,10 +637,8 @@ end
 
 function statement_indices_to_labels(stmt, cfg::CFG)
     # convert statement index to labels, as expected by print_stmt
-    if stmt isa Expr
-        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
-            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
-        end
+    if stmt isa EnterNode
+        stmt = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
     elseif isa(stmt, GotoIfNot)
         stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
     elseif stmt isa GotoNode
@@ -591,13 +657,14 @@ end
 #   at index `idx`. This function is repeatedly called until it returns `nothing`.
 #   to iterate nodes that are to be inserted after the statement, set `attach_after=true`.
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, config::IRShowConfig,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
+                      sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
     return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
-                        used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color)
+                        sptypes, used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color, config.label_dynamic_calls)
 end
 
 function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, line_info_preprinter, line_info_postprinter,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false, bb_color=:light_black)
+                      sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false,
+                      bb_color=:light_black, label_dynamic_calls::Bool=true)
     stmt = _stmt(code, idx)
     type = _type(code, idx)
     max_bb_idx_size = length(string(length(cfg.blocks)))
@@ -656,7 +723,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
         show_type = should_print_ssa_type(new_node_inst)
         let maxlength_idx=maxlength_idx, show_type=show_type
             with_output_color(:green, io) do io′
-                print_stmt(io′, node_idx, new_node_inst, used, maxlength_idx, false, show_type)
+                print_stmt(io′, node_idx, new_node_inst, code, sptypes, used, maxlength_idx, false, show_type, label_dynamic_calls)
             end
         end
 
@@ -685,7 +752,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
             stmt = statement_indices_to_labels(stmt, cfg)
         end
         show_type = type !== nothing && should_print_ssa_type(stmt)
-        print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
+        print_stmt(io, idx, stmt, code, sptypes, used, maxlength_idx, true, show_type, label_dynamic_calls)
         if type !== nothing # ignore types for pre-inference code
             if type === UNDEF
                 # This is an error, but can happen if passes don't update their type information
@@ -717,7 +784,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact},
 end
 
 function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
-    new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
+    new_nodes_perm = filter(i -> isassigned(new_nodes.stmt, i), 1:length(new_nodes))
     sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
 
     # separate iterators for the nodes that are inserted before resp. after each statement
@@ -745,7 +812,7 @@ function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
 
         iter[] += 1
         new_node = new_nodes[node_idx]
-        new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
+        new_node_inst = isassigned(new_nodes.stmt, node_idx) ? new_node[:stmt] : UNDEF
         new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
         node_idx += length(stmts)
         return node_idx, new_node_inst, new_node_type
@@ -805,30 +872,20 @@ end
 
 _strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
-function statementidx_lineinfo_printer(f, code::IRCode)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.stmts[idx][:line] : typemin(Int32))
-    end
-end
-function statementidx_lineinfo_printer(f, code::CodeInfo)
-    printer = f(code.linetable)
-    function (io::IO, indent::String, idx::Int)
-        printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
-    end
-end
+statementidx_lineinfo_printer(f, code::IRCode) = f(code.debuginfo, :var"unknown scope")
+statementidx_lineinfo_printer(f, code::CodeInfo) = f(code.debuginfo, :var"unknown scope")
 statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
 function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
-    stmts = code.stmts
+    insts = code.stmts
     used = BitSet()
-    for stmt in stmts
-        scan_ssa_use!(push!, used, stmt[:inst])
+    for inst in insts
+        scan_ssa_use!(push!, used, inst[:stmt])
     end
     new_nodes = code.new_nodes.stmts
     for nn in 1:length(new_nodes)
-        if isassigned(new_nodes.inst, nn)
-            scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+        if isassigned(new_nodes.stmt, nn)
+            scan_ssa_use!(push!, used, new_nodes[nn][:stmt])
         elseif warn_unset_entry
             printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
             warn_unset_entry = false
@@ -854,10 +911,10 @@ end
 default_config(code::CodeInfo) = IRShowConfig(statementidx_lineinfo_printer(code))
 
 function show_ir_stmts(io::IO, ir::Union{IRCode, CodeInfo, IncrementalCompact}, inds, config::IRShowConfig,
-                       used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+                       sptypes::Vector{VarState}, used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
     for idx in inds
         if config.should_print_stmt(ir, idx, used)
-            bb_idx = show_ir_stmt(io, ir, idx, config, used, cfg, bb_idx; pop_new_node!)
+            bb_idx = show_ir_stmt(io, ir, idx, config, sptypes, used, cfg, bb_idx; pop_new_node!)
         elseif bb_idx <= length(cfg.blocks) && idx == cfg.blocks[bb_idx].stmts.stop
             bb_idx += 1
         end
@@ -875,9 +932,9 @@ function show_ir(io::IO, ir::IRCode, config::IRShowConfig=default_config(ir);
                  pop_new_node! = new_nodes_iter(ir))
     used = stmts_used(io, ir)
     cfg = ir.cfg
-    maxssaid = length(ir.stmts) + Core.Compiler.length(ir.new_nodes)
+    maxssaid = length(ir.stmts) + length(ir.new_nodes)
     let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, ir, 1:length(ir.stmts), config, used, cfg, 1; pop_new_node!)
+        show_ir_stmts(io, ir, 1:length(ir.stmts), config, ir.sptypes, used, cfg, 1; pop_new_node!)
     end
     finish_show_ir(io, cfg, config)
 end
@@ -886,8 +943,12 @@ function show_ir(io::IO, ci::CodeInfo, config::IRShowConfig=default_config(ci);
                  pop_new_node! = Returns(nothing))
     used = stmts_used(io, ci)
     cfg = compute_basic_blocks(ci.code)
+    parent = ci.parent
+    sptypes = if parent isa MethodInstance
+        sptypes_from_meth_instance(parent)
+    else EMPTY_SPTYPES end
     let io = IOContext(io, :maxssaid=>length(ci.code))
-        show_ir_stmts(io, ci, 1:length(ci.code), config, used, cfg, 1; pop_new_node!)
+        show_ir_stmts(io, ci, 1:length(ci.code), config, sptypes, used, cfg, 1; pop_new_node!)
     end
     finish_show_ir(io, cfg, config)
 end
@@ -928,16 +989,16 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
         still_to_be_inserted = (last(input_bb.stmts) - compact.idx) + count
 
         result_bb = result_bbs[compact.active_result_bb]
-        result_bbs[compact.active_result_bb] = Core.Compiler.BasicBlock(result_bb,
-            Core.Compiler.StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
+        result_bbs[compact.active_result_bb] = BasicBlock(result_bb,
+            StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
     end
     compact_cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
 
     pop_new_node! = new_nodes_iter(compact)
-    maxssaid = length(compact.result) + Core.Compiler.length(compact.new_new_nodes)
+    maxssaid = length(compact.result) + length(compact.new_new_nodes)
     bb_idx = let io = IOContext(io, :maxssaid=>maxssaid)
-        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, used_compacted,
-                      compact_cfg, 1; pop_new_node!)
+        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, compact.ir.sptypes,
+                      used_compacted, compact_cfg, 1; pop_new_node!)
     end
 
 
@@ -955,8 +1016,8 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
     inputs_bbs = copy(cfg.blocks)
     for (i, bb) in enumerate(inputs_bbs)
         if bb.stmts.stop < bb.stmts.start
-            inputs_bbs[i] = Core.Compiler.BasicBlock(bb,
-                Core.Compiler.StmtRange(last(bb.stmts), last(bb.stmts)))
+            inputs_bbs[i] = BasicBlock(bb,
+                StmtRange(last(bb.stmts), last(bb.stmts)))
             # this is not entirely correct, and will result in the bb starting again,
             # but is the best we can do without changing how `finish_current_bb!` works.
         end
@@ -964,17 +1025,17 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
     uncompacted_cfg = CFG(inputs_bbs, Int[first(inputs_bbs[i].stmts) for i in 2:length(inputs_bbs)])
 
     pop_new_node! = new_nodes_iter(compact.ir, compact.new_nodes_idx)
-    maxssaid = length(compact.ir.stmts) + Core.Compiler.length(compact.ir.new_nodes)
+    maxssaid = length(compact.ir.stmts) + length(compact.ir.new_nodes)
     let io = IOContext(io, :maxssaid=>maxssaid)
         # first show any new nodes to be attached after the last compacted statement
         if compact.idx > 1
-            show_ir_stmt(io, compact.ir, compact.idx-1, config, used_uncompacted,
-                        uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
+            show_ir_stmt(io, compact.ir, compact.idx-1, config, compact.ir.sptypes,
+                         used_uncompacted, uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
         end
 
         # then show the actual uncompacted IR
-        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, used_uncompacted,
-                      uncompacted_cfg, bb_idx; pop_new_node!)
+        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, compact.ir.sptypes,
+                      used_uncompacted, uncompacted_cfg, bb_idx; pop_new_node!)
     end
 
     finish_show_ir(io, uncompacted_cfg, config)
@@ -1020,9 +1081,77 @@ function Base.show(io::IO, e::Effects)
     print(io, ',')
     printstyled(io, effectbits_letter(e, :inaccessiblememonly, 'm'); color=effectbits_color(e, :inaccessiblememonly))
     print(io, ',')
-    printstyled(io, effectbits_letter(e, :noinbounds, 'i'); color=effectbits_color(e, :noinbounds))
+    printstyled(io, effectbits_letter(e, :noub, 'u'); color=effectbits_color(e, :noub))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nonoverlayed, 'o'); color=effectbits_color(e, :nonoverlayed))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nortcall, 'r'); color=effectbits_color(e, :nortcall))
     print(io, ')')
-    e.nonoverlayed || printstyled(io, '′'; color=:red)
+end
+
+function Base.show(io::IO, inferred::InferenceResult)
+    mi = inferred.linfo
+    tt = mi.specTypes.parameters[2:end]
+    tts = join(["::$(t)" for t in tt], ", ")
+    rettype = inferred.result
+    if isa(rettype, InferenceState)
+        rettype = rettype.bestguess
+    end
+    if isa(mi.def, Method)
+        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    else
+        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+    end
+end
+
+Base.show(io::IO, sv::InferenceState) =
+    (print(io, "InferenceState for "); show(io, sv.linfo))
+
+Base.show(io::IO, ::NativeInterpreter) =
+    print(io, "Compiler.NativeInterpreter(...)")
+
+Base.show(io::IO, cache::CachedMethodTable) =
+    print(io, typeof(cache), "(", length(cache.cache), " entries)")
+
+function Base.show(io::IO, limited::LimitedAccuracy)
+    print(io, "LimitedAccuracy(")
+    show(io, limited.typ)
+    print(io, ", #= ", length(limited.causes), " cause(s) =#)")
+end
+
+# These sometimes show up as Const-values in InferenceFrameInfo signatures
+function Base.show(io::IO, mi_info::Timings.InferenceFrameInfo)
+    mi = mi_info.mi
+    def = mi.def
+    if isa(def, Method)
+        if isdefined(def, :generator) && mi === def.generator
+            print(io, "InferenceFrameInfo generator for ")
+            show(io, def)
+        else
+            print(io, "InferenceFrameInfo for ")
+            argnames = [isa(a, Core.Const) ? (isa(a.val, Type) ? "" : a.val) : "" for a in mi_info.slottypes[1:mi_info.nargs]]
+            show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
+        end
+    else
+        di = mi.cache.inferred.debuginfo
+        file, line = debuginfo_firstline(di)
+        file = string(file)
+        line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
+        print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
+    end
+end
+
+function Base.show(io::IO, tinf::Timings.Timing)
+    print(io, "Compiler.Timings.Timing(", tinf.mi_info, ") with ", length(tinf.children), " children")
 end
 
 @specialize
+
+const __debuginfo = Dict{Symbol, Any}(
+    # :full => src -> statementidx_lineinfo_printer(src), # and add variable slot information
+    :source => src -> statementidx_lineinfo_printer(src),
+    # :oneliner => src -> statementidx_lineinfo_printer(PartialLineInfoPrinter, src),
+    :none => src -> lineinfo_disabled,
+    )
+const default_debuginfo = Ref{Symbol}(:none)
+debuginfo(sym) = sym === :default ? default_debuginfo[] : sym
diff --git a/base/compiler/ssair/slot2ssa.jl b/Compiler/src/ssair/slot2ssa.jl
similarity index 73%
rename from base/compiler/ssair/slot2ssa.jl
rename to Compiler/src/ssair/slot2ssa.jl
index 73bdb51702ded..80dffdab23243 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/Compiler/src/ssair/slot2ssa.jl
@@ -1,13 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct TypedSlot
-    id::Int
-    typ
-    TypedSlot(id::Int, @nospecialize(typ)) = new(id, typ)
-end
-
-const UnoptSlot = Union{SlotNumber, TypedSlot}
-
 mutable struct SlotInfo
     defs::Vector{Int}
     uses::Vector{Int}
@@ -29,13 +21,13 @@ function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
         end
         stmt = stmt.args[2]
     end
-    if isa(stmt, UnoptSlot)
+    if isa(stmt, SlotNumber)
         push!(result[slot_id(stmt)].uses, idx)
         return
     end
     for op in userefs(stmt)
         val = op[]
-        if isa(val, UnoptSlot)
+        if isa(val, SlotNumber)
             push!(result[slot_id(val)].uses, idx)
         end
     end
@@ -89,28 +81,26 @@ function new_to_regular(@nospecialize(stmt), new_offset::Int)
     return urs[]
 end
 
-function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa), @nospecialize(def_ssa))
+function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(ssa), @nospecialize(def_ssa))
     # We don't really have the information here to get rid of these.
     # We'll do so later
     if ssa === UNDEF_TOKEN
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
         return UNDEF_TOKEN
+    elseif has_flag(ir.stmts[idx], IR_FLAG_NOTHROW)
+        # if the `isdefined`-ness of this slot is guaranteed by abstract interpretation,
+        # there is no need to form a `:throw_undef_if_not`
     elseif def_ssa !== true
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any))
     end
-    if isa(stmt, SlotNumber)
-        return ssa
-    elseif isa(stmt, TypedSlot)
-        return NewSSAValue(insert_node!(ir, idx, NewInstruction(PiNode(ssa, stmt.typ), stmt.typ)).id - length(ir.stmts))
-    end
-    @assert false # unreachable
+    return ssa
 end
 
 function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt))
-    if isa(stmt, UnoptSlot) && slot_filter(stmt)
-        return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)...)
+    if isa(stmt, SlotNumber) && slot_filter(stmt)
+        return fixup_slot!(ir, ci, idx, slot_id(stmt), rename_slot(stmt)...)
     end
     if isexpr(stmt, :(=))
         stmt.args[2] = fixemup!(slot_filter, rename_slot, ir, ci, idx, stmt.args[2])
@@ -120,37 +110,27 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode
         for i = 1:length(stmt.edges)
             isassigned(stmt.values, i) || continue
             val = stmt.values[i]
-            isa(val, UnoptSlot) || continue
+            isa(val, SlotNumber) || continue
             slot_filter(val) || continue
             bb_idx = block_for_inst(ir.cfg, Int(stmt.edges[i]))
             from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts)
-            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)...)
+            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), rename_slot(val)...)
         end
         return stmt
     end
     if isexpr(stmt, :isdefined)
         val = stmt.args[1]
-        if isa(val, UnoptSlot)
-            slot = slot_id(val)
-            if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0
-                return true
-            else
-                ssa, undef_ssa = rename_slot(val)
-                if ssa === UNDEF_TOKEN
-                    return false
-                elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
-                    return true
-                end
-                return undef_ssa
-            end
+        if isa(val, SlotNumber)
+            ssa, undef_ssa = rename_slot(val)
+            return undef_ssa
         end
         return stmt
     end
     urs = userefs(stmt)
     for op in urs
         val = op[]
-        if isa(val, UnoptSlot) && slot_filter(val)
-            x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)...)
+        if isa(val, SlotNumber) && slot_filter(val)
+            x = fixup_slot!(ir, ci, idx, slot_id(val), rename_slot(val)...)
             # We inserted an undef error node. Delete subsequent statement
             # to avoid confusing the optimizer
             if x === UNDEF_TOKEN
@@ -158,10 +138,11 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode
             end
             op[] = x
         elseif isa(val, GlobalRef) && !(isdefined(val.mod, val.name) && isconst(val.mod, val.name))
-            op[] = NewSSAValue(insert_node!(ir, idx,
-                NewInstruction(val, typ_for_val(val, ci, ir.sptypes, idx, Any[]))).id - length(ir.stmts))
+            typ = typ_for_val(val, ci, ir, idx, Any[])
+            new_inst = NewInstruction(val, typ)
+            op[] = NewSSAValue(insert_node!(ir, idx, new_inst).id - length(ir.stmts))
         elseif isexpr(val, :static_parameter)
-            ty = typ_for_val(val, ci, ir.sptypes, idx, Any[])
+            ty = typ_for_val(val, ci, ir, idx, Any[])
             if isa(ty, Const)
                 inst = NewInstruction(quoted(ty.val), ty)
             else
@@ -175,70 +156,38 @@ end
 
 function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa))
     for use in uses
-        code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->(ssa, true), ir, ci, use, code[use])
+        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, ::SlotNumber->Pair{Any,Any}(ssa, true), ir, ci, use, code[use])
     end
 end
 
 function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}})
-    return fixemup!(stmt::UnoptSlot->true, stmt::UnoptSlot->renames[slot_id(stmt)], ir, ci, idx, stmt)
-end
-
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{CallInfo})
-    # Remove `nothing`s at the end, we don't handle them well
-    # (we expect the last instruction to be a terminator)
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    (; codelocs, ssaflags) = ci
-    for i = length(code):-1:1
-        if code[i] !== nothing
-            resize!(code, i)
-            resize!(ssavaluetypes, i)
-            resize!(codelocs, i)
-            resize!(info, i)
-            resize!(ssaflags, i)
-            break
-        end
-    end
-    # If the last instruction is not a terminator, add one. This can
-    # happen for implicit return on dead branches.
-    term = code[end]
-    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
-        push!(code, ReturnNode())
-        push!(ssavaluetypes, Union{})
-        push!(codelocs, 0)
-        push!(info, NoCallInfo())
-        push!(ssaflags, IR_FLAG_NOTHROW)
-    end
-    nothing
-end
-
-struct DelayedTyp
-    phi::NewSSAValue
+    return fixemup!(::SlotNumber->true, x::SlotNumber->renames[slot_id(x)], ir, ci, idx, stmt)
 end
 
 # maybe use expr_type?
-function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{VarState}, idx::Int, slottypes::Vector{Any})
+function typ_for_val(@nospecialize(x), ci::CodeInfo, ir::IRCode, idx::Int, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int].typ
+            return ir.sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
-            return typ_for_val(x.args[1], ci, sptypes, idx, slottypes)
+            return typ_for_val(x.args[1], ci, ir, idx, slottypes)
         end
         return (ci.ssavaluetypes::Vector{Any})[idx]
     end
-    isa(x, GlobalRef) && return abstract_eval_globalref(x)
+    isa(x, GlobalRef) && return abstract_eval_globalref_type(x, ci)
     isa(x, SSAValue) && return (ci.ssavaluetypes::Vector{Any})[x.id]
     isa(x, Argument) && return slottypes[x.n]
-    isa(x, NewSSAValue) && return DelayedTyp(x)
+    isa(x, NewSSAValue) && return types(ir)[new_to_regular(x, length(ir.stmts))]
     isa(x, QuoteNode) && return Const(x.value)
-    isa(x, Union{Symbol, PiNode, PhiNode, UnoptSlot}) && error("unexpected val type")
+    isa(x, Union{Symbol, PiNode, PhiNode, SlotNumber}) && error("unexpected val type")
     return Const(x)
 end
 
 struct BlockLiveness
     def_bbs::Vector{Int}
-    live_in_bbs::Vector{Int}
+    live_in_bbs::Union{Vector{Int}, Nothing}
 end
 
 """
@@ -249,7 +198,7 @@ Run iterated dominance frontier.
 The algorithm we have here essentially follows LLVM, which itself is a
 a cleaned up version of the linear-time algorithm described in [^SG95].
 
-The algorithm here, is quite straightforward. Suppose we have a CFG:
+The algorithm here is quite straightforward. Suppose we have a CFG:
 
     A -> B -> D -> F
      \\-> C ------>/
@@ -301,7 +250,8 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
         push!(worklist, node)
         while !isempty(worklist)
             active = pop!(worklist)
-            for succ in cfg.blocks[active].succs
+            succs = cfg.blocks[active].succs
+            for succ in succs
                 # Check whether the current root (`node`) dominates succ.
                 # We are guaranteed that `node` dominates `active`, since
                 # we've arrived at `active` by following dominator tree edges.
@@ -316,7 +266,7 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
                 # unless liveness said otherwise.
                 succ in processed && continue
                 push!(processed, succ)
-                if !(succ in liveness.live_in_bbs)
+                if liveness.live_in_bbs !== nothing && !(succ in liveness.live_in_bbs)
                     continue
                 end
                 push!(phiblocks, succ)
@@ -389,43 +339,58 @@ RPO traversal and in particular, any use of an SSA value must come after
 (by linear order) its definition.
 """
 function domsort_ssa!(ir::IRCode, domtree::DomTree)
-    # First compute the new order of basic blocks
+    # Mapping from new → old BB index
+    # An "old" index of 0 means that this was a BB inserted as part of a fixup (see below)
     result_order = Int[]
-    stack = Int[]
+
+    # Mapping from old → new BB index
     bb_rename = fill(-1, length(ir.cfg.blocks))
-    node = 1
-    ncritbreaks = 0
-    nnewfallthroughs = 0
-    while node !== -1
-        push!(result_order, node)
-        bb_rename[node] = length(result_order)
-        cs = domtree.nodes[node].children
-        terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst]
-        next_node = node + 1
-        node = -1
+
+    # The number of GotoNodes we need to insert to preserve control-flow after sorting
+    nfixupstmts = 0
+
+    # node queued up for scheduling (-1 === nothing)
+    node_to_schedule = 1
+    worklist = Int[]
+    while node_to_schedule !== -1
+        # First assign a new BB index to `node_to_schedule`
+        push!(result_order, node_to_schedule)
+        bb_rename[node_to_schedule] = length(result_order)
+        cs = domtree.nodes[node_to_schedule].children
+        terminator = ir[SSAValue(last(ir.cfg.blocks[node_to_schedule].stmts))][:stmt]
+        fallthrough = node_to_schedule + 1
+        node_to_schedule = -1
+
         # Adding the nodes in reverse sorted order attempts to retain
         # the original source order of the nodes as much as possible.
         # This is not required for correctness, but is easier on the humans
-        for child in Iterators.Reverse(cs)
-            if child == next_node
+        for node in Iterators.Reverse(cs)
+            if node == fallthrough
                 # Schedule the fall through node first,
                 # so we can retain the fall through
-                node = next_node
+                node_to_schedule = node
             else
-                push!(stack, child)
+                push!(worklist, node)
             end
         end
-        if node == -1 && !isempty(stack)
-            node = pop!(stack)
+        if node_to_schedule == -1 && !isempty(worklist)
+            node_to_schedule = pop!(worklist)
         end
-        if node != next_node && !isa(terminator, Union{GotoNode, ReturnNode})
+        # If a fallthrough successor is no longer the fallthrough after sorting, we need to
+        # add a GotoNode (and either extend or split the basic block as necessary)
+        if node_to_schedule != fallthrough && !isa(terminator, Union{GotoNode, ReturnNode})
             if isa(terminator, GotoIfNot)
                 # Need to break the critical edge
-                ncritbreaks += 1
+                push!(result_order, 0)
+            elseif isa(terminator, EnterNode) || isexpr(terminator, :leave)
+                # Cannot extend the BasicBlock with a goto, have to split it
                 push!(result_order, 0)
             else
-                nnewfallthroughs += 1
+                # No need for a new block, just extend
+                @assert !isterminator(terminator)
             end
+            # Reserve space for the fixup goto
+            nfixupstmts += 1
         end
     end
     new_bbs = Vector{BasicBlock}(undef, length(result_order))
@@ -435,7 +400,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
             nstmts += length(ir.cfg.blocks[i].stmts)
         end
     end
-    result = InstructionStream(nstmts + ncritbreaks + nnewfallthroughs)
+    result = InstructionStream(nstmts + nfixupstmts)
     inst_rename = Vector{SSAValue}(undef, length(ir.stmts) + length(ir.new_nodes))
     @inbounds for i = 1:length(ir.stmts)
         inst_rename[i] = SSAValue(-1)
@@ -444,14 +409,13 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         inst_rename[i + length(ir.stmts)] = SSAValue(i + length(result))
     end
     bb_start_off = 0
-    crit_edge_breaks_fixup = Tuple{Int, Int}[]
     for (new_bb, bb) in pairs(result_order)
         if bb == 0
             nidx = bb_start_off + 1
-            inst = result[nidx][:inst]
-            @assert isa(inst, GotoNode)
+            stmt = result[nidx][:stmt]
+            @assert isa(stmt, GotoNode)
             # N.B.: The .label has already been renamed when it was created.
-            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [inst.label])
+            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [stmt.label])
             bb_start_off += 1
             continue
         end
@@ -459,44 +423,45 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         inst_range = (bb_start_off+1):(bb_start_off+length(old_inst_range))
         for (nidx, idx) in zip(inst_range, old_inst_range)
             inst_rename[idx] = SSAValue(nidx)
-            @assert !isassigned(result.inst, nidx)
+            @assert !isassigned(result.stmt, nidx)
             node = result[nidx]
             node[] = ir.stmts[idx]
-            inst = node[:inst]
-            if isa(inst, PhiNode)
-                node[:inst] = rename_phinode_edges(inst, bb, result_order, bb_rename)
+            stmt = node[:stmt]
+            if isa(stmt, PhiNode)
+                node[:stmt] = rename_phinode_edges(stmt, bb, result_order, bb_rename)
             end
         end
         # Now fix up the terminator
-        terminator = result[inst_range[end]][:inst]
+        terminator = result[inst_range[end]][:stmt]
         if isa(terminator, GotoNode)
             # Convert to implicit fall through
             if bb_rename[terminator.label] == new_bb + 1
-                result[inst_range[end]][:inst] = nothing
+                result[inst_range[end]][:stmt] = nothing
             else
-                result[inst_range[end]][:inst] = GotoNode(bb_rename[terminator.label])
+                result[inst_range[end]][:stmt] = GotoNode(bb_rename[terminator.label])
             end
-        elseif isa(terminator, GotoIfNot)
-            # Check if we need to break the critical edge
+        elseif isa(terminator, GotoIfNot) || isa(terminator, EnterNode) || isexpr(terminator, :leave)
+            # Check if we need to break the critical edge or split the block
             if bb_rename[bb + 1] != new_bb + 1
                 @assert result_order[new_bb + 1] == 0
                 # Add an explicit goto node in the next basic block (we accounted for this above)
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
             end
-            result[inst_range[end]][:inst] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
-        elseif !isa(terminator, ReturnNode)
-            if isa(terminator, Expr)
-                if terminator.head === :enter
-                    terminator.args[1] = bb_rename[terminator.args[1]]
-                end
+            if isa(terminator, GotoIfNot)
+                result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
+            elseif isa(terminator, EnterNode)
+                result[inst_range[end]][:stmt] = EnterNode(terminator, terminator.catch_dest == 0 ? 0 : bb_rename[terminator.catch_dest])
+            else
+                @assert isexpr(terminator, :leave)
             end
+        elseif !isa(terminator, ReturnNode)
             if bb_rename[bb + 1] != new_bb + 1
                 # Add an explicit goto node
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, NoLineUpdate
                 inst_range = first(inst_range):(last(inst_range) + 1)
             end
         end
@@ -504,12 +469,12 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         local new_preds, new_succs
         let bb = bb, bb_rename = bb_rename, result_order = result_order
             new_preds = Int[bb for bb in (rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds) if bb != -1]
-            new_succs = Int[             rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs]
+            new_succs = Int[              rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs]
         end
         new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs)
     end
     for i in 1:length(result)
-        result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true)
+        result[i][:stmt] = renumber_ssa!(result[i][:stmt], inst_rename, true)
     end
     cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]])
     new_new_nodes = NewNodeStream(length(ir.new_nodes))
@@ -519,12 +484,13 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         new_new_nodes.info[i] = new_new_info
         new_node = new_new_nodes.stmts[i]
         new_node[] = ir.new_nodes.stmts[i]
-        new_node_inst = new_node[:inst]
+        new_node_inst = new_node[:stmt]
         if isa(new_node_inst, PhiNode)
             new_node_inst = rename_phinode_edges(new_node_inst, block_for_inst(ir.cfg, new_info.pos), result_order, bb_rename)
         end
-        new_node[:inst] = renumber_ssa!(new_node_inst, inst_rename, true)
+        new_node[:stmt] = renumber_ssa!(new_node_inst, inst_rename, true)
     end
+    ir.debuginfo.codelocs = result.line
     new_ir = IRCode(ir, result, cfg, new_new_nodes)
     return new_ir
 end
@@ -569,22 +535,6 @@ function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     BlockLiveness(bb_defs, bb_uses)
 end
 
-function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode,
-    sptypes::Vector{VarState}, slottypes::Vector{Any}, nstmts::Int, 𝕃ₒ::AbstractLattice)
-    new_typ = Union{}
-    for i = 1:length(node.values)
-        if isa(node, PhiNode) && !isassigned(node.values, i)
-            continue
-        end
-        typ = typ_for_val(node.values[i], ci, sptypes, -1, slottypes)
-        while isa(typ, DelayedTyp)
-            typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-        new_typ = tmerge(𝕃ₒ, new_typ, typ)
-    end
-    return new_typ
-end
-
 struct TryCatchRegion
     enter_block::Int
     leave_block::Int
@@ -592,7 +542,7 @@ end
 struct NewSlotPhi{Phi}
     ssaval::NewSSAValue
     node::Phi
-    undef_ssaval::Union{NewSSAValue, Nothing}
+    undef_ssaval::Union{NewSSAValue, Bool}
     undef_node::Union{Phi, Nothing}
 end
 
@@ -603,33 +553,26 @@ struct NewPhiCNode2
     insert::NewSlotPhi{PhiCNode}
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
-                        defuses::Vector{SlotInfo}, slottypes::Vector{Any},
+function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
+                        domtree::DomTree, defuses::Vector{SlotInfo},
                         𝕃ₒ::AbstractLattice)
-    code = ir.stmts.inst
+    code = ir.stmts.stmt
     cfg = ir.cfg
     catch_entry_blocks = TryCatchRegion[]
     for idx in 1:length(code)
         stmt = code[idx]
-        if isexpr(stmt, :enter)
+        if isa(stmt, EnterNode)
             push!(catch_entry_blocks, TryCatchRegion(
                 block_for_inst(cfg, idx),
-                block_for_inst(cfg, stmt.args[1]::Int)))
+                block_for_inst(cfg, stmt.catch_dest)))
         end
     end
 
-    exc_handlers = IdDict{Int, TryCatchRegion}()
-    # Record the correct exception handler for all cricitcal sections
-    for catch_entry_block in catch_entry_blocks
-        (; enter_block, leave_block) = catch_entry_block
-        exc_handlers[enter_block+1] = catch_entry_block
-        # TODO: Cut off here if the terminator is a leave corresponding to this enter
-        for block in dominated(domtree, enter_block+1)
-            exc_handlers[block] = catch_entry_block
-        end
-    end
+    # Record the correct exception handler for all critical sections
+    handler_info = compute_trycatch(code)
 
     phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
+    live_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
     new_phi_nodes = Vector{NewPhiNode2}[NewPhiNode2[] for _ = 1:length(cfg.blocks)]
     new_phic_nodes = IdDict{Int, Vector{NewPhiCNode2}}()
     for (; leave_block) in catch_entry_blocks
@@ -641,7 +584,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         # TODO: Restore this optimization
         if false # length(slot.defs) == 1 && slot.any_newvar
             if slot.defs[] == 0
-                typ = slottypes[idx]
+                typ = sv.slottypes[idx]
                 ssaval = Argument(idx)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             elseif isa(code[slot.defs[]], NewvarNode)
@@ -654,26 +597,37 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 fixup_uses!(ir, ci, code, slot.uses, idx, nothing)
             else
                 val = code[slot.defs[]].args[2]
-                typ = typ_for_val(val, ci, ir.sptypes, slot.defs[], slottypes)
+                typ = typ_for_val(val, ci, ir, slot.defs[], sv.slottypes)
                 ssaval = make_ssa!(ci, code, slot.defs[], typ)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             end
             continue
         end
+
         @timeit "liveness" (live = compute_live_ins(cfg, slot))
         for li in live.live_in_bbs
+            push!(live_slots[li], idx)
             cidx = findfirst(x::TryCatchRegion->x.leave_block==li, catch_entry_blocks)
             if cidx !== nothing
                 # The slot is live-in into this block. We need to
                 # Create a PhiC node in the catch entry block and
                 # an upsilon node in the corresponding enter block
+                varstate = sv.bb_vartables[li]
+                if varstate === nothing
+                    continue
+                end
                 node = PhiCNode(Any[])
                 insertpoint = first_insert_for_bb(code, cfg, li)
+                vt = varstate[idx]
                 phic_ssa = NewSSAValue(
                     insert_node!(ir, insertpoint,
-                        NewInstruction(node, Union{})).id - length(ir.stmts))
+                        NewInstruction(node, vt.typ)).id - length(ir.stmts))
                 undef_node = undef_ssaval = nothing
-                if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0
+                if vt.typ === Union{}
+                    undef_ssaval = false
+                elseif !vt.undef
+                    undef_ssaval = true
+                else
                     undef_node = PhiCNode(Any[])
                     undef_ssaval = NewSSAValue(insert_node!(ir,
                         insertpoint, NewInstruction(undef_node, Bool)).id - length(ir.stmts))
@@ -690,10 +644,17 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
+            varstate = sv.bb_vartables[block]
+            @assert varstate !== nothing
+            vt = varstate[idx]
             ssaval = NewSSAValue(insert_node!(ir,
-                first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
+                first_insert_for_bb(code, cfg, block), NewInstruction(node, vt.typ)).id - length(ir.stmts))
             undef_node = undef_ssaval = nothing
-            if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0
+            if vt.typ === Union{}
+                undef_ssaval = false
+            elseif !vt.undef
+                undef_ssaval = true
+            else
                 undef_node = PhiNode()
                 undef_ssaval = NewSSAValue(insert_node!(ir,
                     first_insert_for_bb(code, cfg, block), NewInstruction(undef_node, Bool)).id - length(ir.stmts))
@@ -713,10 +674,12 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
     ]
     worklist = Tuple{Int, Int, Vector{Pair{Any, Any}}}[(1, 0, initial_incoming_vals)]
     visited = BitSet()
-    type_refine_phi = BitSet()
     new_nodes = ir.new_nodes
     @timeit "SSA Rename" while !isempty(worklist)
-        (item::Int, pred, incoming_vals) = pop!(worklist)
+        (item, pred, incoming_vals) = pop!(worklist)
+        if sv.bb_vartables[item] === nothing
+            continue
+        end
         # Rename existing phi nodes first, because their uses occur on the edge
         # TODO: This isn't necessary if inlining stops replacing arguments by slots.
         for idx in cfg.blocks[item].stmts
@@ -748,33 +711,38 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 push!(node.values, incoming_val)
             end
-            outgoing_def = true
-            if (ci.slotflags[slot] & SLOT_USEDUNDEF) != 0
+            if undef_node !== nothing
                 push!(undef_node.edges, pred)
                 push!(undef_node.values, incoming_def)
-                outgoing_def = undef_ssaval
-            end
-            # TODO: Remove the next line, it shouldn't be necessary
-            push!(type_refine_phi, ssaval.id)
-            if isa(incoming_val, NewSSAValue)
-                push!(type_refine_phi, ssaval.id)
             end
-            typ = incoming_val === UNDEF_TOKEN ? Union{} : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
-            old_entry = new_nodes.stmts[ssaval.id]
-            if isa(typ, DelayedTyp)
-                push!(type_refine_phi, ssaval.id)
-            end
-            new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(𝕃ₒ, old_entry[:type], typ)
-            old_entry[:type] = new_typ
-            old_entry[:inst] = node
-            incoming_vals[slot] = Pair{Any, Any}(ssaval, outgoing_def)
+
+            incoming_vals[slot] = Pair{Any, Any}(ssaval, undef_ssaval)
         end
         (item in visited) && continue
         # Record phi_C nodes if necessary
         if haskey(new_phic_nodes, item)
             for (; slot, insert) in new_phic_nodes[item]
                 (; ssaval, undef_ssaval) = insert
-                incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval === nothing ? true : undef_ssaval)
+                incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval)
+            end
+        end
+        # Record Pi nodes if necessary
+        has_pinode = fill(false, length(sv.slottypes))
+        for slot in live_slots[item]
+            (ival, idef) = incoming_vals[slot]
+            (ival === SSAValue(-1)) && continue
+            (ival === SSAValue(-2)) && continue
+            (ival === UNDEF_TOKEN) && continue
+
+            varstate = sv.bb_vartables[item]
+            @assert varstate !== nothing
+            typ = varstate[slot].typ
+            if !⊑(𝕃ₒ, sv.slottypes[slot], typ)
+                node = PiNode(ival, typ)
+                ival = NewSSAValue(insert_node!(ir,
+                    first_insert_for_bb(code, cfg, item), NewInstruction(node, typ)).id - length(ir.stmts))
+                incoming_vals[slot] = Pair{Any, Any}(ival, idef)
+                has_pinode[slot] = true
             end
         end
         # Record initial upsilon nodes if necessary
@@ -785,7 +753,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 (ival, idef) = incoming_vals[slot_id(slot)]
                 ivalundef = ival === UNDEF_TOKEN
                 Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival),
-                                   ivalundef ? Union{} : typ_for_val(ival, ci, ir.sptypes, -1, slottypes))
+                                   ivalundef ? Union{} : typ_for_val(ival, ci, ir, -1, sv.slottypes))
                 insertpos = first_insert_for_bb(code, cfg, item)
                 # insert `UpsilonNode` immediately before the `:enter` expression
                 Υssa = insert_node!(ir, insertpos, Υ)
@@ -803,6 +771,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue
             if isa(stmt, NewvarNode)
                 incoming_vals[slot_id(stmt.slot)] = Pair{Any, Any}(UNDEF_TOKEN, false)
+                has_pinode[slot_id(stmt.slot)] = false
                 code[idx] = nothing
             else
                 stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
@@ -817,7 +786,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                     if isa(arg1, SlotNumber)
                         id = slot_id(arg1)
                         val = stmt.args[2]
-                        typ = typ_for_val(val, ci, ir.sptypes, idx, slottypes)
+                        typ = typ_for_val(val, ci, ir, idx, sv.slottypes)
                         # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
                         # Do something reasonable here, by marking the LHS as undef as well.
                         if val !== UNDEF_TOKEN
@@ -829,10 +798,14 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                             thisdef = false
                         end
                         incoming_vals[id] = Pair{Any, Any}(thisval, thisdef)
-                        enter_block = item
-                        while haskey(exc_handlers, enter_block)
-                            (; enter_block, leave_block) = exc_handlers[enter_block]
-                            cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block])
+                        has_pinode[id] = false
+                        enter_idx = idx
+                        while (handler = gethandler(handler_info, enter_idx)) !== nothing
+                            enter_idx = get_enter_idx(handler)
+                            enter_node = code[enter_idx]::EnterNode
+                            leave_block = block_for_inst(cfg, enter_node.catch_dest)
+                            cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id,
+                                new_phic_nodes[leave_block])
                             if cidx !== nothing
                                 node = thisdef ? UpsilonNode(thisval) : UpsilonNode()
                                 if incoming_vals[id] === UNDEF_TOKEN
@@ -852,6 +825,14 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 end
             end
         end
+        # Unwrap any PiNodes before continuing, since they weren't considered during our
+        # dominance frontier calculation and so have to be used locally in each BB.
+        for (i, (ival, idef)) in enumerate(incoming_vals)
+            if has_pinode[i]
+                stmt = ir[new_to_regular(ival::NewSSAValue, length(ir.stmts))][:stmt]
+                incoming_vals[i] = Pair{Any, Any}(stmt.val, idef)
+            end
+        end
         for succ in cfg.blocks[item].succs
             push!(worklist, (succ, item, copy(incoming_vals)))
         end
@@ -871,7 +852,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
     nstmts = length(ir.stmts)
     new_code = Vector{Any}(undef, nstmts)
     ssavalmap = fill(SSAValue(-1), length(ssavaluetypes) + 1)
-    result_types = Any[Any for _ in 1:nstmts]
     # Detect statement positions for assignments and construct array
     for (bb, idx) in bbidxiter(ir)
         stmt = code[idx]
@@ -886,15 +866,15 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
-        elseif isexpr(stmt, :enter)
-            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
+        elseif isa(stmt, EnterNode)
+            except_bb = stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest)
+            new_code[idx] = EnterNode(stmt, except_bb)
             ssavalmap[idx] = SSAValue(idx) # Slot to store token for pop_exception
         elseif isexpr(stmt, :leave) || isexpr(stmt, :(=)) || isa(stmt, ReturnNode) ||
             isexpr(stmt, :meta) || isa(stmt, NewvarNode)
             new_code[idx] = stmt
         else
             ssavalmap[idx] = SSAValue(idx)
-            result_types[idx] = ssavaluetypes[idx]
             if isa(stmt, PhiNode)
                 edges = Int32[edge == 0 ? 0 : block_for_inst(cfg, Int(edge)) for edge in stmt.edges]
                 new_code[idx] = PhiNode(edges, stmt.values)
@@ -903,64 +883,17 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             end
         end
     end
-    for (_, nodes) in new_phic_nodes
-        for (; insert) in nodes
-            (; node, ssaval) = insert
-            new_typ = Union{}
-            # TODO: This could just be the ones that depend on other phis
-            push!(type_refine_phi, ssaval.id)
-            new_idx = ssaval.id
-            node = new_nodes.stmts[new_idx]
-            phic_values = (node[:inst]::PhiCNode).values
-            for i = 1:length(phic_values)
-                orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes)
-                while isa(typ, DelayedTyp)
-                    typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-                end
-                new_typ = tmerge(𝕃ₒ, new_typ, typ)
-            end
-            node[:type] = new_typ
-        end
-    end
-    # This is a bit awkward, because it basically duplicates what type
-    # inference does. Ideally, we'd just use this representation earlier
-    # to make sure phi nodes have accurate types
-    changed = true
-    while changed
-        changed = false
-        for new_idx in type_refine_phi
-            node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes, nstmts, 𝕃ₒ)
-            if !⊑(𝕃ₒ, node[:type], new_typ) || !⊑(𝕃ₒ, new_typ, node[:type])
-                node[:type] = new_typ
-                changed = true
-            end
-        end
-    end
-    for i in 1:length(result_types)
-        rt_i = result_types[i]
-        if rt_i isa DelayedTyp
-            result_types[i] = types(ir)[new_to_regular(rt_i.phi::NewSSAValue, nstmts)]
-        end
-    end
-    for i = 1:length(new_nodes)
-        local node = new_nodes.stmts[i]
-        local typ = node[:type]
-        if isa(typ, DelayedTyp)
-            node[:type] = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-    end
     # Renumber SSA values
     @assert isempty(ir.stmts.type)
     resize!(ir.stmts.type, nstmts)
     for i in 1:nstmts
         local node = ir.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
-        node[:type] = result_types[i]
+        node[:stmt] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
+        node[:type] = ssavaluetypes[i]
     end
     for i = 1:length(new_nodes)
         local node = new_nodes.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts)
+        node[:stmt] = new_to_regular(renumber_ssa!(node[:stmt], ssavalmap), nstmts)
     end
     @timeit "domsort" ir = domsort_ssa!(ir, domtree)
     return ir
diff --git a/Compiler/src/ssair/tarjan.jl b/Compiler/src/ssair/tarjan.jl
new file mode 100644
index 0000000000000..e73039868c367
--- /dev/null
+++ b/Compiler/src/ssair/tarjan.jl
@@ -0,0 +1,313 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using .Compiler: DomTree, CFG, BasicBlock, StmtRange, dominates
+
+struct SCCStackItem
+    v::Int32
+    # which child of `v` to scan
+    child::Int32
+    # the location of `parent` in the stack
+    parent::Int32
+    # the index in the (pre-order traversal of the) DFS tree
+    preorder::Int32
+    # the minimum node (by pre-order index) reachable from any node in the DFS sub-tree rooted at `v`
+    minpreorder::Int32
+    # whether this node is reachable from BasicBlock #1
+    live::Bool
+end
+
+function SCCStackItem(item::SCCStackItem; child=item.child,
+                      minpreorder=item.minpreorder, live=item.live)
+    return SCCStackItem(
+        item.v,        # v
+        child,         # child
+        item.parent,   # parent
+        item.preorder, # preorder
+        minpreorder,   # minpreorder
+        live,          # live
+    )
+end
+
+struct CFGReachability
+    irreducible::BitVector # BBNumber -> Bool
+    scc::Vector{Int}       # BBNumber -> SCCNumber
+    domtree::DomTree
+
+    _worklist::Vector{Int}       # for node removal
+    _stack::Vector{SCCStackItem} # for Tarjan's SCC algorithm
+end
+
+function CFGReachability(cfg::CFG, domtree::DomTree)
+    n_blocks = length(cfg.blocks)
+    reachability = CFGReachability(
+        BitVector(undef, n_blocks), # irreducible
+        zeros(Int, n_blocks),       # scc
+        domtree,                    # domtree
+        Int[],                      # _worklist
+        SCCStackItem[],             # _stack
+    )
+    tarjan!(reachability, cfg;
+        # reducible back-edges don't need to be considered for reachability
+        filter = (from::Int,to::Int)->!dominates(domtree, to, from)
+    )
+    return reachability
+end
+
+bb_unreachable(reach::CFGReachability, bb::Int) = reach.scc[bb] == 0
+
+bb_in_irreducible_loop(reach::CFGReachability, bb::Int) = reach.irreducible[bb]
+
+# Returns `true` if a node is 'rooted' as reachable, i.e. it is has an incoming
+# edge from a resolved SCC other than its own (or it is BasicBlock #1).
+#
+# `tarjan!` takes the transitive closure of this relation in order to detect
+# which BasicBlocks are unreachable.
+function _bb_externally_reachable(reach::CFGReachability, cfg::CFG, bb::Int; filter)
+    (; scc) = reach
+    bb == 1 && return true
+    for pred in cfg.blocks[bb].preds
+        scc[pred] <= 0 && continue
+        !filter(pred, bb) && continue
+        @assert scc[pred] != scc[bb]
+        return true
+    end
+    return false
+end
+
+"""
+    tarjan!(reach::CFGReachability, cfg::CFG, root::Int=1)
+
+Tarjan's strongly-connected components algorithm. Traverses the CFG starting at `root`, ignoring
+nodes with resolved SCC's and updating outputs for all un-resolved nodes.
+
+Returns true if any node was discovered to be unreachable, false otherwise.
+
+Outputs:
+  - `reach.scc`: strongly-connected components, ignoring backedges to (natural) loops
+  - `reach.irreducible`: true iff a BasicBlock is part of a (non-trivial) SCC / irreducible loop
+  - `reach._worklist`: if performing an incremental update (`root != 1`), any traversed nodes that
+    are unreachable from BasicBlock #1 are enqueued to this worklist
+"""
+function tarjan!(reach::CFGReachability, cfg::CFG; root::Int=1,
+    filter = (from::Int,to::Int)->true,
+)
+    (; scc, irreducible) = reach
+    scc[root] != 0 && return scc
+    live = _bb_externally_reachable(reach, cfg, root; filter)
+
+    # the original algorithm has a separate stack and worklist (unrelated to `reach._worklist`)
+    # here we use a single combined stack for improved memory/cache efficiency
+    stack = reach._stack
+    push!(stack, SCCStackItem(
+        root, # v
+        1,    # child
+        0,    # parent
+        1,    # preorder
+        1,    # minpreorder
+        live, # live
+    ))
+    scc[root] = -1
+    cursor = length(stack)
+
+    # worklist length before any new unreachable nodes are added
+    worklist_len = length(reach._worklist)
+
+    # last (pre-order) DFS label assigned to a node
+    preorder_id = 1
+    while true
+        (; v, child, minpreorder, live) = item = stack[cursor]
+
+        bb = cfg.blocks[v]
+        if child <= length(bb.succs) # queue next child
+            stack[cursor] = item = SCCStackItem(item; child=child+1)
+            succ = bb.succs[child]
+
+            # ignore any edges that don't pass the filter
+            !filter(convert(Int, v), succ) && continue
+
+            if scc[succ] < 0
+                # next child is already in DFS tree
+                child_preorder = stack[-scc[succ]].preorder
+
+                # only need to update `minpreorder` for `v`
+                stack[cursor] = item = SCCStackItem(item;
+                                                    minpreorder=min(minpreorder, child_preorder))
+            elseif scc[succ] == 0
+                # next child is a new element in DFS tree
+                preorder_id += 1
+                live = live || _bb_externally_reachable(reach, cfg, succ; filter)
+                push!(stack, SCCStackItem(
+                    succ,        # v
+                    1,           # child
+                    cursor,      # parent (index in stack)
+                    preorder_id, # preorder
+                    preorder_id, # minpreorder
+                    live,        # live
+                ))
+                scc[succ] = -length(stack)
+                cursor = length(stack)
+            else end # next child is a resolved SCC (do nothing)
+        else # v's children are processed, finalize v
+            if item.minpreorder == item.preorder
+                has_one_element = stack[end].v == v
+                while true
+                    item = pop!(stack)
+                    if live
+                        scc[item.v] = v
+                        scan_subgraph!(reach, cfg, convert(Int, item.v),
+                            #= filter =# (pred,x)->(filter(pred, x) && scc[x] > typemax(Int)÷2),
+                            #= action =# (x)->(scc[x] -= typemax(Int)÷2;),
+                        )
+                    else # this offset marks a node as 'maybe-dead'
+                        scc[item.v] = v + typemax(Int)÷2
+                        push!(reach._worklist, item.v)
+                    end
+                    irreducible[item.v] = !has_one_element
+                    (item.v == v) && break
+                end
+                item.parent == 0 && break # all done
+            elseif live
+                stack[item.parent] = SCCStackItem(stack[item.parent]; live=true)
+            end
+
+            # update `minpreorder` for parent
+            parent = stack[item.parent]
+            minpreorder = min(parent.minpreorder, item.minpreorder)
+            stack[item.parent] = SCCStackItem(parent; minpreorder)
+
+            cursor = item.parent
+        end
+    end
+
+    worklist = reach._worklist
+
+    # filter the worklist, leaving any nodes not proven to be reachable from BB #1
+    n_popped = 0
+    for i = (worklist_len + 1):length(worklist)
+        @assert worklist[i] != 1
+        @assert scc[worklist[i]] > 0
+        if scc[worklist[i]] > typemax(Int)÷2
+            # node is unreachable, enqueue it
+            scc[worklist[i]] = 0
+            worklist[i - n_popped] = worklist[i]
+        else
+            n_popped += 1
+        end
+    end
+    resize!(worklist, length(worklist) - n_popped)
+
+    return length(worklist) > worklist_len # if true, a (newly) unreachable node was enqueued
+end
+
+"""
+Scan the subtree rooted at `root`, excluding `root` itself
+
+Note: This function will not detect cycles for you. The `filter` provided must
+      protect against infinite cycle traversal.
+"""
+function scan_subgraph!(reach::CFGReachability, cfg::CFG, root::Int, filter, action)
+    worklist = reach._worklist
+    start_len = length(worklist)
+
+    push!(worklist, root)
+    while length(worklist) > start_len
+        v = pop!(worklist)
+        for succ in cfg.blocks[v].succs
+            !filter(v, succ) && continue
+            action(succ)
+            push!(worklist, succ)
+        end
+    end
+end
+
+function enqueue_if_unreachable!(reach::CFGReachability, cfg::CFG, bb::Int)
+    (; domtree, scc) = reach
+    @assert scc[bb] != 0
+
+    bb == 1 && return false
+    if bb_in_irreducible_loop(reach, bb)
+        # irreducible CFG
+        # this requires a full scan of the irreducible loop
+
+        # any reducible back-edges do not need to be considered as part of reachability
+        # (very important optimization, since it means reducible CFGs will have no SCCs)
+        filter = (from::Int, to::Int)->!dominates(domtree, to, from)
+
+        scc′ = scc[bb]
+        scc[bb] = 0
+        scan_subgraph!(reach, cfg, bb, # set this SCC to 0
+            #= filter =# (pred,x)->(filter(pred, x) && scc[x] == scc′),
+            #= action =# (x)->(scc[x] = 0;),
+        )
+
+        # re-compute the SCC's for this portion of the CFG, adding any freshly
+        # unreachable nodes to `reach._worklist`
+        return tarjan!(reach, cfg; root=bb, filter)
+    else
+        # target is a reducible CFG node
+        # this node lives iff it still has an incoming forward edge
+        for pred in cfg.blocks[bb].preds
+            # virtual edge does not count - if the enter is dead, that edge is
+            # not taken.
+            pred == 0 && continue
+            !dominates(domtree, bb, pred) && return false # forward-edge
+        end
+        scc[bb] = 0
+        push!(reach._worklist, bb)
+        return true
+    end
+end
+
+function kill_cfg_edge!(cfg::CFG, from::Int, to::Int)
+    preds, succs = cfg.blocks[to].preds, cfg.blocks[from].succs
+    deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
+    deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
+    return nothing
+end
+
+"""
+Remove from `cfg` and `reach` the edge (from → to), as well as any blocks/edges
+this causes to become unreachable.
+
+Calls:
+  - `block_callback` for every unreachable block.
+  - `edge_callback` for every unreachable edge into a reachable block (may also
+     be called for blocks which are later discovered to be unreachable).
+"""
+function kill_edge!(reach::CFGReachability, cfg::CFG, from::Int, to::Int,
+                    edge_callback=nothing, block_callback=nothing)
+    (reach.scc[from] == 0) && return # source is already unreachable
+    @assert reach.scc[to] != 0
+
+    # delete (from → to) edge
+    kill_cfg_edge!(cfg, from, to)
+
+    # check for unreachable target
+    enqueued = enqueue_if_unreachable!(reach, cfg, to)
+    if !enqueued && edge_callback !== nothing
+        edge_callback(from, to)
+    end
+    while !isempty(reach._worklist)
+        node = convert(Int, pop!(reach._worklist))
+
+        # already marked unreachable, just need to notify
+        @assert reach.scc[node] == 0 && node != 1
+        if block_callback !== nothing
+            block_callback(node)
+        end
+
+        for succ in cfg.blocks[node].succs
+            # delete (node → succ) edge
+            preds = cfg.blocks[succ].preds
+            deleteat!(preds, findfirst(x::Int->x==node, preds)::Int)
+
+            # check for newly unreachable target
+            reach.scc[succ] == 0 && continue
+            enqueued = enqueue_if_unreachable!(reach, cfg, succ)
+            if !enqueued && edge_callback !== nothing
+                edge_callback(node, succ)
+            end
+        end
+        empty!(cfg.blocks[node].succs)
+    end
+end
diff --git a/base/compiler/ssair/verify.jl b/Compiler/src/ssair/verify.jl
similarity index 59%
rename from base/compiler/ssair/verify.jl
rename to Compiler/src/ssair/verify.jl
index 39f56a47e1908..12eb09be693f3 100644
--- a/base/compiler/ssair/verify.jl
+++ b/Compiler/src/ssair/verify.jl
@@ -1,9 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function maybe_show_ir(ir::IRCode)
-    if isdefined(Core, :Main)
-        Core.Main.Base.display(ir)
+    if isdefined(Core, :Main) && isdefined(Core.Main, :Base)
+        # ensure we use I/O that does not yield, as this gets called during compilation
+        invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir)
+    else
+        Core.show(ir)
     end
+    Core.println(Core.stdout)
 end
 
 if !isdefined(@__MODULE__, Symbol("@verify_error"))
@@ -20,9 +24,12 @@ if !isdefined(@__MODULE__, Symbol("@verify_error"))
     end
 end
 
-is_value_pos_expr_head(head::Symbol) = head === :boundscheck
-function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
+is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || head === :thunk
+is_value_pos_expr_head(head::Symbol) = head === :static_parameter
+function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int,
+    allow_frontend_forms::Bool, @nospecialize(raise_error))
     if isa(op, SSAValue)
+        op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR"
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
@@ -34,26 +41,29 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
             else
                 if op.id >= use_idx
                     @verify_error "Def ($(op.id)) does not dominate use ($(use_idx)) in same BB"
-                    error("")
+                    raise_error()
                 end
             end
         else
             if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb))
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
                 @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value %$(op.id) at %$(printed_use_idx))"
-                error("")
+                raise_error()
             end
         end
 
         use_inst = ir[op]
-        if isa(use_inst[:inst], Union{GotoIfNot, GotoNode, ReturnNode})
+        if isa(use_inst[:stmt], Union{GotoIfNot, GotoNode, ReturnNode}) && !(isa(use_inst[:stmt], ReturnNode) && !isdefined(use_inst[:stmt], :val))
+            # Allow uses of `unreachable`, which may have been inserted when
+            # an earlier block got deleted, but for some reason we didn't figure
+            # out yet that this entire block is dead also.
             @verify_error "At statement %$use_idx: Invalid use of value statement or terminator %$(op.id)"
-            error("")
+            raise_error()
         end
     elseif isa(op, GlobalRef)
         if !isdefined(op.mod, op.name) || !isconst(op.mod, op.name)
             @verify_error "Unbound GlobalRef not allowed in value position"
-            error("")
+            raise_error()
         end
     elseif isa(op, Expr)
         # Only Expr(:boundscheck) is allowed in value position
@@ -64,15 +74,15 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
         elseif !is_value_pos_expr_head(op.head)
             if !allow_frontend_forms || op.head !== :opaque_closure_method
                 @verify_error "Expr not allowed in value position"
-                error("")
+                raise_error()
             end
         end
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
-        @verify_error "Left over SSA marker"
-        error("")
-    elseif isa(op, UnoptSlot)
+        @verify_error "At statement %$use_idx: Left over SSA marker ($op)"
+        raise_error()
+    elseif isa(op, SlotNumber)
         @verify_error "Left over slot detected in converted IR"
-        error("")
+        raise_error()
     end
 end
 
@@ -88,7 +98,50 @@ end
 
 function verify_ir(ir::IRCode, print::Bool=true,
                    allow_frontend_forms::Bool=false,
-                   𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance)
+                   𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance,
+                   mi::Union{Nothing,MethodInstance}=nothing)
+    function raise_error()
+        error_args = Any["IR verification failed."]
+        if isdefined(Core, :Main) && isdefined(Core.Main, :Base)
+            # ensure we use I/O that does not yield, as this gets called during compilation
+            firstline = invokelatest(IRShow.debuginfo_firstline, ir.debuginfo)
+        else
+            firstline = nothing
+        end
+        if firstline !== nothing
+            file, line = firstline
+            push!(error_args, "\n", "    Code location: ", file, ":", line)
+        end
+        if mi !== nothing
+            push!(error_args, "\n", "  Method instance: ", mi)
+        end
+        error(error_args...)
+    end
+    # Verify CFG graph. Must be well formed to construct domtree
+    if !(length(ir.cfg.blocks) - 1 <= length(ir.cfg.index) <= length(ir.cfg.blocks))
+        @verify_error "CFG index length ($(length(ir.cfg.index))) does not correspond to # of blocks $(length(ir.cfg.blocks))"
+        raise_error()
+    end
+    if length(ir.stmts.stmt) != length(ir.stmts)
+        @verify_error "IR stmt length is invalid $(length(ir.stmts.stmt)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.type) != length(ir.stmts)
+        @verify_error "IR type length is invalid $(length(ir.stmts.type)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.info) != length(ir.stmts)
+        @verify_error "IR info length is invalid $(length(ir.stmts.info)) / $(length(ir.stmts))"
+        raise_error()
+    end
+    if length(ir.stmts.line) != length(ir.stmts) * 3
+        @verify_error "IR line length is invalid $(length(ir.stmts.line)) / $(length(ir.stmts) * 3)"
+        raise_error()
+    end
+    if length(ir.stmts.flag) != length(ir.stmts)
+        @verify_error "IR flag length is invalid $(length(ir.stmts.flag)) / $(length(ir.stmts))"
+        raise_error()
+    end
     # For now require compact IR
     # @assert isempty(ir.new_nodes)
     # Verify CFG
@@ -99,32 +152,44 @@ function verify_ir(ir::IRCode, print::Bool=true,
             p == 0 && continue
             if !(1 <= p <= length(ir.cfg.blocks))
                 @verify_error "Predecessor $p of block $idx out of bounds for IR"
-                error("")
+                raise_error()
             end
             c = count_int(idx, ir.cfg.blocks[p].succs)
             if c == 0
                 @verify_error "Predecessor $p of block $idx not in successor list"
-                error("")
+                raise_error()
             elseif c == 2
                 if count_int(p, block.preds) != 2
                     @verify_error "Double edge from $p to $idx not correctly accounted"
-                    error("")
+                    raise_error()
                 end
             end
         end
         for s in block.succs
             if !(1 <= s <= length(ir.cfg.blocks))
                 @verify_error "Successor $s of block $idx out of bounds for IR"
-                error("")
+                raise_error()
             end
             if !(idx in ir.cfg.blocks[s].preds)
                 #Base.@show ir.cfg
                 #Base.@show ir
                 #Base.@show ir.argtypes
                 @verify_error "Successor $s of block $idx not in predecessor list"
-                error("")
+                raise_error()
             end
         end
+        if !(1 <= first(block.stmts) <= length(ir.stmts))
+            @verify_error "First statement of BB $idx ($(first(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            raise_error()
+        end
+        if !(1 <= last(block.stmts) <= length(ir.stmts))
+            @verify_error "Last statement of BB $idx ($(last(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            raise_error()
+        end
+        if idx <= length(ir.cfg.index) && last(block.stmts) + 1 != ir.cfg.index[idx]
+            @verify_error "End of BB $idx ($(last(block.stmts))) is not one less than CFG index ($(ir.cfg.index[idx]))"
+            raise_error()
+        end
     end
     # Verify statements
     domtree = construct_domtree(ir.cfg.blocks)
@@ -132,44 +197,49 @@ function verify_ir(ir::IRCode, print::Bool=true,
         if first(block.stmts) != last_end + 1
             #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
             @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
-            error("")
+            raise_error()
         end
         last_end = last(block.stmts)
-        terminator = ir.stmts[last_end][:inst]
+        terminator = ir[SSAValue(last_end)][:stmt]
 
         bb_unreachable(domtree, idx) && continue
         if isa(terminator, ReturnNode)
             if !isempty(block.succs)
                 @verify_error "Block $idx ends in return or unreachable, but has successors"
-                error("")
+                raise_error()
             end
         elseif isa(terminator, GotoNode)
             if length(block.succs) != 1 || block.succs[1] != terminator.label
-                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator"
-                error("")
+                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator ($(terminator.label))"
+                raise_error()
             end
         elseif isa(terminator, GotoIfNot)
             if terminator.dest == idx + 1
                 @verify_error "Block $idx terminator forms a double edge to block $(idx+1)"
-                error("")
+                raise_error()
             end
             if length(block.succs) != 2 || (block.succs != [terminator.dest, idx+1] && block.succs != [idx+1, terminator.dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match GotoIfNot terminator"
-                error("")
+                raise_error()
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             @label enter_check
-            if length(block.succs) != 2 || (block.succs != Int[terminator.args[1], idx+1] && block.succs != Int[idx+1, terminator.args[1]])
+            if length(block.succs) == 1
+                if terminator.catch_dest != 0
+                    @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
+                    raise_error()
+                end
+            elseif (block.succs != Int[terminator.catch_dest, idx+1] && block.succs != Int[idx+1, terminator.catch_dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
-                error("")
+                raise_error()
             end
         else
             if length(block.succs) != 1 || block.succs[1] != idx + 1
                 # As a special case, we allow extra statements in the BB of an :enter
                 # statement, until we can do proper CFG manipulations during compaction.
-                for idx in first(block.stmts):last(block.stmts)
-                    stmt = ir.stmts[idx][:inst]
-                    if isexpr(stmt, :enter)
+                for stmt_idx in first(block.stmts):last(block.stmts)
+                    stmt = ir[SSAValue(stmt_idx)][:stmt]
+                    if isa(stmt, EnterNode)
                         terminator = stmt
                         @goto enter_check
                     end
@@ -183,11 +253,15 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     # here, but that isn't always possible.
                 else
                     @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator %$termidx ($terminator)::$stmttyp"
-                    error("")
+                    raise_error()
                 end
             end
         end
     end
+    if length(ir.stmts) != last(ir.cfg.blocks[end].stmts)
+        @verify_error "End of last BB $(last(ir.cfg.blocks[end].stmts)) does not match last IR statement $(length(ir.stmts))"
+        raise_error()
+    end
     lastbb = 0
     is_phinode_block = false
     firstidx = 1
@@ -201,12 +275,12 @@ function verify_ir(ir::IRCode, print::Bool=true,
         # We allow invalid IR in dead code to avoid passes having to detect when
         # they're generating dead code.
         bb_unreachable(domtree, bb) && continue
-        stmt = ir.stmts[idx][:inst]
+        stmt = ir[SSAValue(idx)][:stmt]
         stmt === nothing && continue
         if isa(stmt, PhiNode)
             if !is_phinode_block
                 @verify_error "φ node $idx is not at the beginning of the basic block $bb"
-                error("")
+                raise_error()
             end
             lastphi = idx
             @assert length(stmt.edges) == length(stmt.values)
@@ -217,20 +291,20 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     if edge == edge′
                         # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is always unique.
                         @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)"
-                        error("")
+                        raise_error()
                     end
                 end
                 if !(edge == 0 && bb == 1) && !(edge in ir.cfg.blocks[bb].preds)
                     #Base.@show ir.argtypes
                     #Base.@show ir
                     @verify_error "Edge $edge of φ node $idx not in predecessor list"
-                    error("")
+                    raise_error()
                 end
                 edge == 0 && continue
                 if bb_unreachable(domtree, Int(edge))
                     # TODO: Disallow?
                     #@verify_error "Unreachable edge from #$edge should have been cleaned up at idx $idx"
-                    #error("")
+                    #raise_error()
                     continue
                 end
                 isassigned(stmt.values, i) || continue
@@ -243,21 +317,23 @@ function verify_ir(ir::IRCode, print::Bool=true,
                         #    PhiNode type was $phiT
                         #    Value type was $(ir.stmts[val.id][:type])
                         #"""
-                        #error("")
+                        #raise_error()
                     end
                 end
-                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms)
+                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i,
+                    allow_frontend_forms, raise_error)
             end
             continue
         end
 
-        if is_phinode_block && isa(stmt, Union{Expr, UpsilonNode, PhiCNode, SSAValue})
+        if is_phinode_block && !is_valid_phiblock_stmt(stmt)
             if !isa(stmt, Expr) || !is_value_pos_expr_head(stmt.head)
                 # Go back and check that all non-PhiNodes are valid value-position
                 for validate_idx in firstidx:(lastphi-1)
-                    validate_stmt = ir.stmts[validate_idx][:inst]
+                    validate_stmt = ir[SSAValue(validate_idx)][:stmt]
                     isa(validate_stmt, PhiNode) && continue
-                    check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0, allow_frontend_forms)
+                    check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0,
+                        allow_frontend_forms, raise_error)
                 end
                 is_phinode_block = false
             end
@@ -267,40 +343,35 @@ function verify_ir(ir::IRCode, print::Bool=true,
                 val = stmt.values[i]
                 if !isa(val, SSAValue)
                     @verify_error "Operand $i of PhiC node $idx must be an SSA Value."
-                    error("")
+                    raise_error()
                 end
-                if !isa(ir[val][:inst], UpsilonNode)
+                if !isa(ir[val][:stmt], UpsilonNode)
                     @verify_error "Operand $i of PhiC node $idx must reference an Upsilon node."
-                    error("")
+                    raise_error()
                 end
             end
-        elseif (isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isexpr(stmt, :enter)) && idx != last(ir.cfg.blocks[bb].stmts)
-            @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
-            error("")
-        else
-            if isa(stmt, Expr) || isa(stmt, ReturnNode) # TODO: make sure everything has line info
-                if (stmt isa ReturnNode)
-                    if isdefined(stmt, :val)
-                        # TODO: Disallow unreachable returns?
-                        # bb_unreachable(domtree, Int64(edge))
-                    else
-                        #@verify_error "Missing line number information for statement $idx of $ir"
-                    end
-                end
-                if !(stmt isa ReturnNode && !isdefined(stmt, :val)) # not actually a return node, but an unreachable marker
-                    if ir.stmts[idx][:line] <= 0
-                    end
-                end
+        elseif isterminator(stmt)
+            if idx != last(ir.cfg.blocks[bb].stmts)
+                @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
+                raise_error()
             end
+            if !isa(stmt, ReturnNode) && ir[SSAValue(idx)][:type] !== Any
+                @verify_error "Explicit terminators (other than ReturnNode) must have `Any` type"
+                raise_error()
+            end
+        else
             isforeigncall = false
             if isa(stmt, Expr)
                 if stmt.head === :(=)
-                    if stmt.args[1] isa SSAValue
-                        @verify_error "SSAValue as assignment LHS"
-                        error("")
+                    @verify_error "Assignment should have been removed during SSA conversion"
+                    raise_error()
+                elseif stmt.head === :isdefined
+                    if length(stmt.args) > 2
+                        @verify_error "malformed isdefined"
+                        raise_error()
                     end
-                    if stmt.args[2] isa GlobalRef
-                        # undefined GlobalRef as assignment RHS is OK
+                    if stmt.args[1] isa GlobalRef
+                        # undefined GlobalRef is OK in isdefined
                         continue
                     end
                 elseif stmt.head === :gc_preserve_end
@@ -311,7 +382,7 @@ function verify_ir(ir::IRCode, print::Bool=true,
                 elseif stmt.head === :foreigncall
                     isforeigncall = true
                 elseif stmt.head === :isdefined && length(stmt.args) == 1 &&
-                        (stmt.args[1] isa GlobalRef || isexpr(stmt.args[1], :static_parameter))
+                        isexpr(stmt.args[1], :static_parameter)
                     # a GlobalRef or static_parameter isdefined check does not evaluate its argument
                     continue
                 elseif stmt.head === :call
@@ -320,24 +391,39 @@ function verify_ir(ir::IRCode, print::Bool=true,
                         # TODO: these are not yet linearized
                         continue
                     end
+                elseif stmt.head === :leave
+                    for i in 1:length(stmt.args)
+                        arg = stmt.args[i]
+                        if !isa(arg, Union{Nothing, SSAValue})
+                            @verify_error "Malformed :leave - Expected `Nothing` or SSAValue"
+                            raise_error()
+                        elseif isa(arg, SSAValue)
+                            enter_stmt = ir[arg::SSAValue][:stmt]
+                            if !isa(enter_stmt, Nothing) && !isa(enter_stmt, EnterNode)
+                                @verify_error "Malformed :leave - argument ssavalue should point to `nothing` or :enter"
+                                raise_error()
+                            end
+                        end
+                    end
                 end
             end
             n = 1
             for op in userefs(stmt)
                 op = op[]
-                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n, allow_frontend_forms)
+                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n,
+                    allow_frontend_forms, raise_error)
                 n += 1
             end
         end
     end
 end
 
-function verify_linetable(linetable::Vector{LineInfoNode}, print::Bool=true)
-    for i in 1:length(linetable)
-        line = linetable[i]
-        if i <= line.inlined_at
-            @verify_error "Misordered linetable"
-            error("")
+function verify_linetable(di::DebugInfoStream, nstmts::Int, print::Bool=true)
+    @assert 3nstmts == length(di.codelocs)
+    for i in 1:nstmts
+        edge = di.codelocs[3i-1]
+        if !(edge == 0 || get(di.edges, edge, nothing) isa DebugInfo)
+            @verify_error "Malformed debuginfo index into edges"
         end
     end
 end
diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl
new file mode 100644
index 0000000000000..e3f8e2f56c86b
--- /dev/null
+++ b/Compiler/src/stmtinfo.jl
@@ -0,0 +1,498 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@nospecialize
+
+"""
+    call::CallMeta
+
+A simple struct that captures both the return type (`call.rt`)
+and any additional information (`call.info`) for a given generic call.
+"""
+struct CallMeta
+    rt::Any
+    exct::Any
+    effects::Effects
+    info::CallInfo
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function CallMeta(rt::Any, exct::Any, effects::Effects, info::CallInfo,
+                      refinements=nothing)
+        @nospecialize rt exct info
+        return new(rt, exct, effects, info, refinements)
+    end
+end
+
+struct NoCallInfo <: CallInfo end
+add_edges_impl(::Vector{Any}, ::NoCallInfo) = nothing
+
+"""
+    info::MethodMatchInfo <: CallInfo
+
+Captures the essential arguments and result of a `:jl_matching_methods` lookup
+for the given call (`info.results`). This info may then be used by the
+optimizer, without having to re-consult the method table.
+This info is illegal on any statement that is not a call to a generic function.
+"""
+struct MethodMatchInfo <: CallInfo
+    results::MethodLookupResult
+    mt::MethodTable
+    atype
+    fullmatch::Bool
+    edges::Vector{Union{Nothing,CodeInstance}}
+    function MethodMatchInfo(
+        results::MethodLookupResult, mt::MethodTable, @nospecialize(atype), fullmatch::Bool)
+        edges = fill!(Vector{Union{Nothing,CodeInstance}}(undef, length(results)), nothing)
+        return new(results, mt, atype, fullmatch, edges)
+    end
+end
+add_edges_impl(edges::Vector{Any}, info::MethodMatchInfo) = _add_edges_impl(edges, info)
+function _add_edges_impl(edges::Vector{Any}, info::MethodMatchInfo, mi_edge::Bool=false)
+    if !fully_covering(info)
+        # add legacy-style missing backedge info also
+        exists = false
+        for i in 2:length(edges)
+            if edges[i] === info.mt && edges[i-1] == info.atype
+                exists = true
+                break
+            end
+        end
+        if !exists
+            push!(edges, info.atype)
+            push!(edges, info.mt)
+        end
+    end
+    nmatches = length(info.results)
+    if nmatches == length(info.edges) == 1
+        # try the optimized format for the representation, if possible and applicable
+        # if this doesn't succeed, the backedge will be less precise,
+        # but the forward edge will maintain the precision
+        edge = info.edges[1]
+        m = info.results[1]
+        if edge === nothing
+            mi = specialize_method(m) # don't allow `Method`-edge for this optimized format
+            edge = mi
+        else
+            mi = edge.def::MethodInstance
+        end
+        if mi.specTypes === m.spec_types
+            add_one_edge!(edges, edge)
+            return nothing
+        end
+    end
+    # add check for whether this lookup already existed in the edges list
+    for i in 1:length(edges)
+        if edges[i] === nmatches && edges[i+1] == info.atype
+            # TODO: must also verify the CodeInstance match too
+            return nothing
+        end
+    end
+    push!(edges, nmatches, info.atype)
+    for i = 1:nmatches
+        edge = info.edges[i]
+        m = info.results[i]
+        if edge === nothing
+            edge = mi_edge ? specialize_method(m) : m.method
+        else
+            @assert edge.def.def === m.method
+        end
+        push!(edges, edge)
+    end
+    nothing
+end
+function add_one_edge!(edges::Vector{Any}, edge::MethodInstance)
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        edgeᵢ isa Int && (i += 2 + edgeᵢ; continue)
+        edgeᵢ isa CodeInstance && (edgeᵢ = get_ci_mi(edgeᵢ))
+        edgeᵢ isa MethodInstance || (i += 1; continue)
+        if edgeᵢ === edge && !(i > 1 && edges[i-1] isa Type)
+            return # found existing covered edge
+        end
+        i += 1
+    end
+    push!(edges, edge)
+    nothing
+end
+function add_one_edge!(edges::Vector{Any}, edge::CodeInstance)
+    i = 1
+    while i <= length(edges)
+        edgeᵢ_orig = edgeᵢ = edges[i]
+        edgeᵢ isa Int && (i += 2 + edgeᵢ; continue)
+        edgeᵢ isa CodeInstance && (edgeᵢ = get_ci_mi(edgeᵢ))
+        edgeᵢ isa MethodInstance || (i += 1; continue)
+        if edgeᵢ === edge.def && !(i > 1 && edges[i-1] isa Type)
+            if edgeᵢ_orig isa MethodInstance
+                # found edge we can upgrade
+                edges[i] = edge
+                return
+            elseif true # XXX compare `CodeInstance` identify?
+                return
+            end
+        end
+        i += 1
+    end
+    push!(edges, edge)
+    nothing
+end
+nsplit_impl(info::MethodMatchInfo) = 1
+getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
+getresult_impl(::MethodMatchInfo, ::Int) = nothing
+
+"""
+    info::UnionSplitInfo <: CallInfo
+
+If inference decides to partition the method search space by splitting unions,
+it will issue a method lookup query for each such partition. This info indicates
+that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
+each partition (`info.matches::Vector{MethodMatchInfo}`).
+This info is illegal on any statement that is not a call to a generic function.
+"""
+struct UnionSplitInfo <: CallInfo
+    split::Vector{MethodMatchInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::UnionSplitInfo) =
+    _add_edges_impl(edges, info)
+_add_edges_impl(edges::Vector{Any}, info::UnionSplitInfo, mi_edge::Bool=false) =
+    for split in info.split; _add_edges_impl(edges, split, mi_edge); end
+nsplit_impl(info::UnionSplitInfo) = length(info.split)
+getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit(info.split[idx], 1)
+getresult_impl(::UnionSplitInfo, ::Int) = nothing
+
+abstract type ConstResult end
+
+struct ConstPropResult <: ConstResult
+    result::InferenceResult
+end
+
+struct ConcreteResult <: ConstResult
+    edge::CodeInstance
+    effects::Effects
+    result
+    ConcreteResult(edge::CodeInstance, effects::Effects) = new(edge, effects)
+    ConcreteResult(edge::CodeInstance, effects::Effects, @nospecialize val) = new(edge, effects, val)
+end
+
+struct SemiConcreteResult <: ConstResult
+    edge::CodeInstance
+    ir::IRCode
+    effects::Effects
+    spec_info::SpecInfo
+end
+
+# XXX Technically this does not represent a result of constant inference, but rather that of
+#     regular edge inference. It might be more appropriate to rename `ConstResult` and
+#     `ConstCallInfo` to better reflect the fact that they represent either of local or
+#     volatile inference result.
+struct VolatileInferenceResult <: ConstResult
+    inf_result::InferenceResult
+end
+
+"""
+    info::ConstCallInfo <: CallInfo
+
+The precision of this call was improved using constant information.
+In addition to the original call information `info.call`, this info also keeps the results
+of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
+"""
+struct ConstCallInfo <: CallInfo
+    call::Union{MethodMatchInfo,UnionSplitInfo}
+    results::Vector{Union{Nothing,ConstResult}}
+end
+add_edges_impl(edges::Vector{Any}, info::ConstCallInfo) = add_edges!(edges, info.call)
+nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
+getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
+getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
+
+"""
+    info::MethodResultPure <: CallInfo
+
+This struct represents a method result constant was proven to be effect-free.
+"""
+struct MethodResultPure <: CallInfo
+    info::CallInfo
+end
+let instance = MethodResultPure(NoCallInfo())
+    global MethodResultPure
+    MethodResultPure() = instance
+end
+add_edges_impl(edges::Vector{Any}, info::MethodResultPure) = add_edges!(edges, info.info)
+
+"""
+    ainfo::AbstractIterationInfo
+
+Captures all the information for abstract iteration analysis of a single value.
+Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
+"""
+struct AbstractIterationInfo
+    each::Vector{CallMeta}
+    complete::Bool
+end
+
+const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
+
+"""
+    info::ApplyCallInfo <: CallInfo
+
+This info applies to any call of `_apply_iterate(...)` and captures both the
+info of the actual call being applied and the info for any implicit call
+to the `iterate` function. Note that it is possible for the call itself
+to be yet another `_apply_iterate`, in which case the `info.call` field will
+be another `ApplyCallInfo`. This info is illegal on any statement that is
+not an `_apply_iterate` call.
+"""
+struct ApplyCallInfo <: CallInfo
+    # The info for the call itself
+    call::CallInfo
+    # AbstractIterationInfo for each argument, if applicable
+    arginfo::Vector{MaybeAbstractIterationInfo}
+end
+function add_edges_impl(edges::Vector{Any}, info::ApplyCallInfo)
+    add_edges!(edges, info.call)
+    for arg in info.arginfo
+        arg === nothing && continue
+        for edge in arg.each
+            add_edges!(edges, edge.info)
+        end
+    end
+end
+
+"""
+    info::UnionSplitApplyCallInfo <: CallInfo
+
+Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
+This info is illegal on any statement that is not an `_apply_iterate` call.
+"""
+struct UnionSplitApplyCallInfo <: CallInfo
+    infos::Vector{ApplyCallInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::UnionSplitApplyCallInfo) =
+    for split in info.infos; add_edges!(edges, split); end
+
+"""
+    info::InvokeCICallInfo
+
+Represents a resolved call to `Core.invoke` targeting a `Core.CodeInstance`
+"""
+struct InvokeCICallInfo <: CallInfo
+    edge::CodeInstance
+end
+add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) =
+    add_inlining_edge!(edges, info.edge)
+
+"""
+    info::InvokeCallInfo
+
+Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct InvokeCallInfo <: CallInfo
+    edge::Union{Nothing,CodeInstance}
+    match::MethodMatch
+    result::Union{Nothing,ConstResult}
+    atype # ::Type
+end
+add_edges_impl(edges::Vector{Any}, info::InvokeCallInfo) =
+    _add_edges_impl(edges, info)
+function _add_edges_impl(edges::Vector{Any}, info::InvokeCallInfo, mi_edge::Bool=false)
+    edge = info.edge
+    if edge === nothing
+        edge = mi_edge ? specialize_method(info.match) : info.match.method
+    end
+    add_invoke_edge!(edges, info.atype, edge)
+    nothing
+end
+function add_invoke_edge!(edges::Vector{Any}, @nospecialize(atype), edge::Union{MethodInstance,Method})
+    for i in 2:length(edges)
+        edgeᵢ = edges[i]
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        edgeᵢ isa MethodInstance || edgeᵢ isa Method || continue
+        if edgeᵢ === edge
+            edge_minus_1 = edges[i-1]
+            if edge_minus_1 isa Type && edge_minus_1 == atype
+                return # found existing covered edge
+            end
+        end
+    end
+    push!(edges, atype)
+    push!(edges, edge)
+    nothing
+end
+function add_invoke_edge!(edges::Vector{Any}, @nospecialize(atype), edge::CodeInstance)
+    for i in 2:length(edges)
+        edgeᵢ_orig = edgeᵢ = edges[i]
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        if ((edgeᵢ isa MethodInstance && edgeᵢ === edge.def) ||
+            (edgeᵢ isa Method && edgeᵢ === edge.def.def))
+            edge_minus_1 = edges[i-1]
+            if edge_minus_1 isa Type && edge_minus_1 == atype
+                if edgeᵢ_orig isa MethodInstance || edgeᵢ_orig isa Method
+                    # found edge we can upgrade
+                    edges[i] = edge
+                    return
+                elseif true # XXX compare `CodeInstance` identify?
+                    return
+                end
+            end
+        end
+    end
+    push!(edges, atype)
+    push!(edges, edge)
+    nothing
+end
+
+function add_inlining_edge!(edges::Vector{Any}, edge::MethodInstance)
+    # check if we already have an edge to this code
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        if edgeᵢ isa Method && edgeᵢ === edge.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        edgeᵢ isa CodeInstance && (edgeᵢ = edgeᵢ.def)
+        if edgeᵢ isa MethodInstance && edgeᵢ === edge
+            return # found existing covered edge
+        end
+        i += 1
+    end
+    # add_invoke_edge alone
+    push!(edges, (edge.def::Method).sig)
+    push!(edges, edge)
+    nothing
+end
+function add_inlining_edge!(edges::Vector{Any}, edge::CodeInstance)
+    # check if we already have an edge to this code
+    i = 1
+    while i <= length(edges)
+        edgeᵢ = edges[i]
+        if edgeᵢ isa Method && edgeᵢ === edge.def.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        if edgeᵢ isa MethodInstance && edgeᵢ === edge.def
+            # found edge we can upgrade
+            edges[i] = edge
+            return
+        end
+        if edgeᵢ isa CodeInstance && edgeᵢ.def === edge.def
+            # found existing edge
+            # XXX compare `CodeInstance` identify?
+            return
+        end
+        i += 1
+    end
+    # add_invoke_edge alone
+    push!(edges, (get_ci_mi(edge).def::Method).sig)
+    push!(edges, edge)
+    nothing
+end
+
+
+"""
+    info::OpaqueClosureCallInfo
+
+Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct OpaqueClosureCallInfo <: CallInfo
+    edge::Union{Nothing,CodeInstance}
+    match::MethodMatch
+    result::Union{Nothing,ConstResult}
+end
+function add_edges_impl(edges::Vector{Any}, info::OpaqueClosureCallInfo)
+    edge = info.edge
+    if edge !== nothing
+        add_one_edge!(edges, edge)
+    end
+    nothing
+end
+
+"""
+    info::OpaqueClosureCreateInfo <: CallInfo
+
+This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
+carrying out inference result of an unreal, partially specialized call (i.e. specialized on
+the closure environment, but not on the argument types of the opaque closure) in order to
+allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
+"""
+struct OpaqueClosureCreateInfo <: CallInfo
+    unspec::CallMeta
+    function OpaqueClosureCreateInfo(unspec::CallMeta)
+        @assert isa(unspec.info, Union{OpaqueClosureCallInfo, NoCallInfo})
+        return new(unspec)
+    end
+end
+# merely creating the object implies edges for OC, unlike normal objects,
+# since calling them doesn't normally have edges in contrast
+add_edges_impl(edges::Vector{Any}, info::OpaqueClosureCreateInfo) = add_edges!(edges, info.unspec.info)
+
+# Stmt infos that are used by external consumers, but not by optimization.
+# These are not produced by default and must be explicitly opted into by
+# the AbstractInterpreter.
+
+"""
+    info::ReturnTypeCallInfo <: CallInfo
+
+Represents a resolved call of `Core.Compiler.return_type`.
+`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
+was supposed to analyze.
+"""
+struct ReturnTypeCallInfo <: CallInfo
+    info::CallInfo
+end
+add_edges_impl(edges::Vector{Any}, info::ReturnTypeCallInfo) = add_edges!(edges, info.info)
+
+"""
+    info::FinalizerInfo <: CallInfo
+
+Represents the information of a potential (later) call to the finalizer on the given
+object type.
+"""
+struct FinalizerInfo <: CallInfo
+    info::CallInfo   # the callinfo for the finalizer call
+    effects::Effects # the effects for the finalizer call
+end
+# merely allocating a finalizer does not imply edges (unless it gets inlined later)
+add_edges_impl(::Vector{Any}, ::FinalizerInfo) = nothing
+
+"""
+    info::ModifyOpInfo <: CallInfo
+
+Represents a resolved call of one of:
+ - `modifyfield!(obj, name, op, x, [order])`
+ - `modifyglobal!(mod, var, op, x, order)`
+ - `memoryrefmodify!(memref, op, x, order, boundscheck)`
+ - `Intrinsics.atomic_pointermodify(ptr, op, x, order)`
+
+`info.info` wraps the call information of `op(getval(), x)`.
+"""
+struct ModifyOpInfo <: CallInfo
+    info::CallInfo # the callinfo for the `op(getval(), x)` call
+end
+add_edges_impl(edges::Vector{Any}, info::ModifyOpInfo) = add_edges!(edges, info.info)
+
+struct VirtualMethodMatchInfo <: CallInfo
+    info::Union{MethodMatchInfo,UnionSplitInfo,InvokeCallInfo}
+end
+add_edges_impl(edges::Vector{Any}, info::VirtualMethodMatchInfo) =
+    _add_edges_impl(edges, info.info, #=mi_edge=#true)
+
+"""
+    info::GlobalAccessInfo <: CallInfo
+
+Represents access to a global through runtime reflection, rather than as a manifest
+`GlobalRef` in the source code. Used for builtins (getglobal/setglobal/etc.) that
+perform such accesses.
+"""
+struct GlobalAccessInfo <: CallInfo
+    bpart::Core.BindingPartition
+end
+GlobalAccessInfo(::Nothing) = NoCallInfo()
+add_edges_impl(edges::Vector{Any}, info::GlobalAccessInfo) =
+    push!(edges, info.bpart)
+
+@specialize
diff --git a/base/compiler/tfuncs.jl b/Compiler/src/tfuncs.jl
similarity index 65%
rename from base/compiler/tfuncs.jl
rename to Compiler/src/tfuncs.jl
index f5690f4e5b8d6..cfb865b06e9e5 100644
--- a/base/compiler/tfuncs.jl
+++ b/Compiler/src/tfuncs.jl
@@ -42,11 +42,10 @@ macro nospecs(ex)
         push!(names, arg)
     end
     @assert isexpr(body, :block)
-    if !isempty(names)
-        lin = first(body.args)::LineNumberNode
-        nospec = Expr(:macrocall, Symbol("@nospecialize"), lin, names...)
-        insert!(body.args, 2, nospec)
-    end
+    isempty(names) && throw(ArgumentError("no arguments for @nospec"))
+    lin = first(body.args)::LineNumberNode
+    nospec = Expr(:macrocall, GlobalRef(@__MODULE__, :var"@nospecialize"), lin, names...)
+    insert!(body.args, 2, nospec)
     return esc(ex)
 end
 
@@ -89,31 +88,38 @@ function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospeci
 end
 
 add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
+add_tfunc(Core.throw_methoderror, 1, INT_INF, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
 
 # the inverse of typeof_tfunc
 # returns (type, isexact, isconcrete, istype)
 # if isexact is false, the actual runtime type may (will) be a subtype of t
 # if isconcrete is true, the actual runtime type is definitely concrete (unreachable if not valid as a typeof)
 # if istype is true, the actual runtime value will definitely be a type (e.g. this is false for Union{Type{Int}, Int})
-function instanceof_tfunc(@nospecialize(t))
+function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(troot) = t)
     if isa(t, Const)
-        if isa(t.val, Type) && valid_as_lattice(t.val)
+        if isa(t.val, Type) && valid_as_lattice(t.val, astag)
             return t.val, true, isconcretetype(t.val), true
         end
         return Bottom, true, false, false # runtime throws on non-Type
     end
     t = widenconst(t)
+    troot = widenconst(troot)
     if t === Bottom
         return Bottom, true, true, false # runtime unreachable
     elseif t === typeof(Bottom) || !hasintersect(t, Type)
         return Bottom, true, false, false # literal Bottom or non-Type
     elseif isType(t)
         tp = t.parameters[1]
-        valid_as_lattice(tp) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        valid_as_lattice(tp, astag) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        if troot isa UnionAll
+            # Free `TypeVar`s inside `Type` has violated the "diagonal" rule.
+            # Widen them before `UnionAll` rewraping to relax concrete constraint.
+            tp = widen_diagonal(tp, troot)
+        end
         return tp, !has_free_typevars(tp), isconcretetype(tp), true
     elseif isa(t, UnionAll)
         t′ = unwrap_unionall(t)
-        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′)
+        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′, astag, rewrap_unionall(t, troot))
         tr = rewrap_unionall(t′′, t)
         if t′′ isa DataType && t′′.name !== Tuple.name && !has_free_typevars(tr)
             # a real instance must be within the declared bounds of the type,
@@ -128,8 +134,8 @@ function instanceof_tfunc(@nospecialize(t))
         end
         return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a)
-        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b)
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(unwraptv(t.a), astag, troot)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(unwraptv(t.b), astag, troot)
         isconcrete = isconcrete_a && isconcrete_b
         istype = istype_a && istype_b
         # most users already handle the Union case, so here we assume that
@@ -149,14 +155,14 @@ end
 # ----------
 
 @nospecs bitcast_tfunc(𝕃::AbstractLattice, t, x) = bitcast_tfunc(widenlattice(𝕃), t, x)
-@nospecs bitcast_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t)[1]
+@nospecs bitcast_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
 @nospecs conversion_tfunc(𝕃::AbstractLattice, t, x) = conversion_tfunc(widenlattice(𝕃), t, x)
-@nospecs conversion_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t)[1]
+@nospecs conversion_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
 
-add_tfunc(bitcast, 2, 2, bitcast_tfunc, 1)
-add_tfunc(sext_int, 2, 2, conversion_tfunc, 1)
-add_tfunc(zext_int, 2, 2, conversion_tfunc, 1)
-add_tfunc(trunc_int, 2, 2, conversion_tfunc, 1)
+add_tfunc(bitcast, 2, 2, bitcast_tfunc, 0)
+add_tfunc(sext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(zext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(trunc_int, 2, 2, conversion_tfunc, 0)
 add_tfunc(fptoui, 2, 2, conversion_tfunc, 1)
 add_tfunc(fptosi, 2, 2, conversion_tfunc, 1)
 add_tfunc(uitofp, 2, 2, conversion_tfunc, 1)
@@ -170,41 +176,74 @@ add_tfunc(fpext, 2, 2, conversion_tfunc, 1)
 @nospecs math_tfunc(𝕃::AbstractLattice, args...) = math_tfunc(widenlattice(𝕃), args...)
 @nospecs math_tfunc(::JLTypeLattice, x, xs...) = widenconst(x)
 
-add_tfunc(neg_int, 1, 1, math_tfunc, 1)
+add_tfunc(neg_int, 1, 1, math_tfunc, 0)
 add_tfunc(add_int, 2, 2, math_tfunc, 1)
 add_tfunc(sub_int, 2, 2, math_tfunc, 1)
-add_tfunc(mul_int, 2, 2, math_tfunc, 4)
-add_tfunc(sdiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(udiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(srem_int, 2, 2, math_tfunc, 30)
-add_tfunc(urem_int, 2, 2, math_tfunc, 30)
-add_tfunc(add_ptr, 2, 2, math_tfunc, 1)
-add_tfunc(sub_ptr, 2, 2, math_tfunc, 1)
+add_tfunc(mul_int, 2, 2, math_tfunc, 3)
+add_tfunc(sdiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(udiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(srem_int, 2, 2, math_tfunc, 20)
+add_tfunc(urem_int, 2, 2, math_tfunc, 20)
 add_tfunc(neg_float, 1, 1, math_tfunc, 1)
-add_tfunc(add_float, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float, 2, 2, math_tfunc, 4)
-add_tfunc(div_float, 2, 2, math_tfunc, 20)
-add_tfunc(fma_float, 3, 3, math_tfunc, 5)
-add_tfunc(muladd_float, 3, 3, math_tfunc, 5)
+add_tfunc(add_float, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float, 2, 2, math_tfunc, 8)
+add_tfunc(div_float, 2, 2, math_tfunc, 10)
+add_tfunc(min_float, 2, 2, math_tfunc, 1)
+add_tfunc(max_float, 2, 2, math_tfunc, 1)
+add_tfunc(fma_float, 3, 3, math_tfunc, 8)
+add_tfunc(muladd_float, 3, 3, math_tfunc, 8)
 
 # fast arithmetic
 add_tfunc(neg_float_fast, 1, 1, math_tfunc, 1)
-add_tfunc(add_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(add_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float_fast, 2, 2, math_tfunc, 8)
 add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
+add_tfunc(min_float_fast, 2, 2, math_tfunc, 1)
+add_tfunc(max_float_fast, 2, 2, math_tfunc, 1)
 
 # bitwise operators
 # -----------------
 
+@nospecs and_int_tfunc(𝕃::AbstractLattice, x, y) = and_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function and_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === false && widenconst(y) === Bool
+        return Const(false)
+    elseif isa(y, Const) && y.val === false && widenconst(x) === Bool
+        return Const(false)
+    end
+    return and_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs and_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
+@nospecs or_int_tfunc(𝕃::AbstractLattice, x, y) = or_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function or_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === true && widenconst(y) === Bool
+        return Const(true)
+    elseif isa(y, Const) && y.val === true && widenconst(x) === Bool
+        return Const(true)
+    end
+    return or_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs or_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
 @nospecs shift_tfunc(𝕃::AbstractLattice, x, y) = shift_tfunc(widenlattice(𝕃), x, y)
 @nospecs shift_tfunc(::JLTypeLattice, x, y) = widenconst(x)
 
-add_tfunc(and_int, 2, 2, math_tfunc, 1)
-add_tfunc(or_int, 2, 2, math_tfunc, 1)
+function not_tfunc(𝕃::AbstractLattice, @nospecialize(b))
+    if isa(b, Conditional)
+        return Conditional(b.slot, b.elsetype, b.thentype)
+    elseif isa(b, Const)
+        return Const(not_int(b.val))
+    end
+    return math_tfunc(𝕃, b)
+end
+
+add_tfunc(and_int, 2, 2, and_int_tfunc, 1)
+add_tfunc(or_int, 2, 2, or_int_tfunc, 1)
 add_tfunc(xor_int, 2, 2, math_tfunc, 1)
-add_tfunc(not_int, 1, 1, math_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
+add_tfunc(not_int, 1, 1, not_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
 add_tfunc(shl_int, 2, 2, shift_tfunc, 1)
 add_tfunc(lshr_int, 2, 2, shift_tfunc, 1)
 add_tfunc(ashr_int, 2, 2, shift_tfunc, 1)
@@ -258,12 +297,12 @@ add_tfunc(le_float_fast, 2, 2, cmp_tfunc, 1)
 @nospecs chk_tfunc(𝕃::AbstractLattice, x, y) = chk_tfunc(widenlattice(𝕃), x, y)
 @nospecs chk_tfunc(::JLTypeLattice, x, y) = Tuple{widenconst(x), Bool}
 
-add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 5)
+add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 5)
 
 # other, misc
 # -----------
@@ -281,7 +320,6 @@ end
 add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
 
 add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bool), 1)
-add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Int), 4)
 
 # builtin functions
 # =================
@@ -304,7 +342,7 @@ end
 add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
 @nospecs function ifelse_nothrow(𝕃::AbstractLattice, cond, x, y)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return cond ⊑ Bool
 end
 
@@ -353,7 +391,7 @@ function isdefined_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any})
     return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
 end
 @nospecs function isdefined_nothrow(𝕃::AbstractLattice, x, name)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     isvarargtype(x) && return false
     isvarargtype(name) && return false
     if hasintersect(widenconst(x), Module)
@@ -367,22 +405,12 @@ end
     return isdefined_tfunc(𝕃, arg1, sym)
 end
 @nospecs function isdefined_tfunc(𝕃::AbstractLattice, arg1, sym)
-    if isa(arg1, Const)
-        arg1t = typeof(arg1.val)
-    else
-        arg1t = widenconst(arg1)
-    end
-    if isType(arg1t)
-        return Bool
-    end
+    arg1t = arg1 isa Const ? typeof(arg1.val) : isconstType(arg1) ? typeof(arg1.parameters[1]) : widenconst(arg1)
     a1 = unwrap_unionall(arg1t)
     if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
             hasintersect(widenconst(sym), Symbol) || return Bottom
-            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) &&
-               isdefined(arg1.val::Module, sym.val::Symbol)
-                return Const(true)
-            end
+            # isa(sym, Const) case intercepted in abstract interpretation
         elseif isa(sym, Const)
             val = sym.val
             if isa(val, Symbol)
@@ -392,7 +420,7 @@ end
             else
                 return Bottom
             end
-            if 1 <= idx <= datatype_min_ninitialized(a1)
+            if 1 ≤ idx ≤ datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
@@ -400,15 +428,20 @@ end
                 else
                     ns = a1.parameters[1]
                     if isa(ns, Tuple)
-                        return Const(1 <= idx <= length(ns))
+                        return Const(1 ≤ idx ≤ length(ns))
                     end
                 end
-            elseif idx <= 0 || (!isvatuple(a1) && idx > fieldcount(a1))
+            elseif idx ≤ 0 || (!isvatuple(a1) && idx > fieldcount(a1))
                 return Const(false)
             elseif isa(arg1, Const)
-                arg1v = (arg1::Const).val
-                if !ismutable(arg1v) || isdefined(arg1v, idx) || isconst(typeof(arg1v), idx)
-                    return Const(isdefined(arg1v, idx))
+                if !ismutabletype(a1) || isconst(a1, idx)
+                    return Const(isdefined(arg1.val, idx))
+                end
+            elseif isa(arg1, PartialStruct)
+                if !isvarargtype(arg1.fields[end])
+                    if 1 ≤ idx ≤ length(arg1.fields)
+                        return Const(true)
+                    end
                 end
             elseif !isvatuple(a1)
                 fieldT = fieldtype(a1, idx)
@@ -439,7 +472,7 @@ function sizeof_nothrow(@nospecialize(x))
         return sizeof_nothrow(rewrap_unionall(xu.a, x)) &&
                sizeof_nothrow(rewrap_unionall(xu.b, x))
     end
-    t, exact, isconcrete = instanceof_tfunc(x)
+    t, exact, isconcrete = instanceof_tfunc(x, false)
     if t === Bottom
         # x must be an instance (not a Type) or is the Bottom type object
         x = widenconst(x)
@@ -466,8 +499,8 @@ function sizeof_nothrow(@nospecialize(x))
 end
 
 function _const_sizeof(@nospecialize(x))
-    # Constant Vector does not have constant size
-    isa(x, Vector) && return Int
+    # Constant GenericMemory does not have constant size
+    isa(x, GenericMemory) && return Int
     size = try
             Core.sizeof(x)
         catch ex
@@ -491,7 +524,7 @@ end
     end
     # Core.sizeof operates on either a type or a value. First check which
     # case we're in.
-    t, exact = instanceof_tfunc(x)
+    t, exact = instanceof_tfunc(x, false)
     if t !== Bottom
         # The value corresponding to `x` at runtime could be a type.
         # Normalize the query to ask about that type.
@@ -530,9 +563,9 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
         end
     end
     if isa(x, Union)
-        na = nfields_tfunc(𝕃, x.a)
+        na = nfields_tfunc(𝕃, unwraptv(x.a))
         na === Int && return Int
-        return tmerge(na, nfields_tfunc(𝕃, x.b))
+        return tmerge(𝕃, na, nfields_tfunc(𝕃, unwraptv(x.b)))
     end
     return Int
 end
@@ -568,12 +601,21 @@ add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVec
                 return TypeVar
             end
         end
-        tv = TypeVar(nval, lb, ub)
-        return PartialTypeVar(tv, lb_certain, ub_certain)
+        lb_valid = lb isa Type || lb isa TypeVar
+        ub_valid = ub isa Type || ub isa TypeVar
+        if lb_valid && ub_valid
+            tv = TypeVar(nval, lb, ub)
+            return PartialTypeVar(tv, lb_certain, ub_certain)
+        elseif !lb_valid && lb_certain
+            return Union{}
+        elseif !ub_valid && ub_certain
+            return Union{}
+        end
     end
     return TypeVar
 end
-@nospecs function typebound_nothrow(b)
+@nospecs function typebound_nothrow(𝕃::AbstractLattice, b)
+    ⊑ = partialorder(𝕃)
     b = widenconst(b)
     (b ⊑ TypeVar) && return true
     if isType(b)
@@ -582,30 +624,14 @@ end
     return false
 end
 @nospecs function typevar_nothrow(𝕃::AbstractLattice, n, lb, ub)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     n ⊑ Symbol || return false
-    typebound_nothrow(lb) || return false
-    typebound_nothrow(ub) || return false
+    typebound_nothrow(𝕃, lb) || return false
+    typebound_nothrow(𝕃, ub) || return false
     return true
 end
 add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 
-@nospecs function arraysize_tfunc(𝕃::AbstractLattice, ary, dim)
-    hasintersect(widenconst(ary), Array) || return Bottom
-    hasintersect(widenconst(dim), Int) || return Bottom
-    return Int
-end
-add_tfunc(arraysize, 2, 2, arraysize_tfunc, 4)
-
-@nospecs function arraysize_nothrow(ary, dim)
-    ary ⊑ Array || return false
-    if isa(dim, Const)
-        dimval = dim.val
-        return isa(dimval, Int) && dimval > 0
-    end
-    return false
-end
-
 struct MemoryOrder x::Cint end
 const MEMORY_ORDER_UNSPECIFIED = MemoryOrder(-2)
 const MEMORY_ORDER_INVALID     = MemoryOrder(-1)
@@ -643,13 +669,16 @@ function pointer_eltype(@nospecialize(ptr))
         unw = unwrap_unionall(a)
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(T, a)
         end
     end
     return Any
 end
 
+@nospecs function pointerarith_tfunc(𝕃::AbstractLattice, ptr, offset)
+    return ptr
+end
 @nospecs function pointerref_tfunc(𝕃::AbstractLattice, a, i, align)
     return pointer_eltype(a)
 end
@@ -675,7 +704,7 @@ end
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
             # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(Pair{T, T}, a)
         end
     end
@@ -693,6 +722,8 @@ end
     end
     return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
 end
+add_tfunc(add_ptr, 2, 2, pointerarith_tfunc, 1)
+add_tfunc(sub_ptr, 2, 2, pointerarith_tfunc, 1)
 add_tfunc(pointerref, 3, 3, pointerref_tfunc, 4)
 add_tfunc(pointerset, 4, 4, pointerset_tfunc, 5)
 add_tfunc(atomic_fence, 1, 1, atomic_fence_tfunc, 4)
@@ -731,8 +762,12 @@ function typeof_concrete_vararg(t::DataType)
     for i = 1:np
         p = t.parameters[i]
         if i == np && isvarargtype(p)
-            if isdefined(p, :T) && !isdefined(p, :N) && isconcretetype(p.T)
-                return Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+            if isdefined(p, :T) && isconcretetype(p.T)
+                t = Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+                if isdefined(p, :N)
+                    return t{p.N}
+                end
+                return t
             end
         elseif !isconcretetype(p)
             break
@@ -791,15 +826,15 @@ end
 add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
 
 @nospecs function typeassert_tfunc(𝕃::AbstractLattice, v, t)
-    t = instanceof_tfunc(t)[1]
+    t = instanceof_tfunc(t, true)[1]
     t === Any && return v
     return tmeet(𝕃, v, t)
 end
 add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 
 @nospecs function typeassert_nothrow(𝕃::AbstractLattice, v, t)
-    ⊑ = Core.Compiler.:⊑(𝕃)
-    # ty, exact = instanceof_tfunc(t)
+    ⊑ = partialorder(𝕃)
+    # ty, exact = instanceof_tfunc(t, true)
     # return exact && v ⊑ ty
     if (isType(t) && !has_free_typevars(t) && v ⊑ t.parameters[1]) ||
         (isa(t, Const) && isa(t.val, Type) && v ⊑ t.val)
@@ -809,7 +844,7 @@ add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 end
 
 @nospecs function isa_tfunc(𝕃::AbstractLattice, v, tt)
-    t, isexact = instanceof_tfunc(tt)
+    t, isexact = instanceof_tfunc(tt, true)
     if t === Bottom
         # check if t could be equivalent to typeof(Bottom), since that's valid in `isa`, but the set of `v` is empty
         # if `t` cannot have instances, it's also invalid on the RHS of isa
@@ -844,13 +879,13 @@ end
 add_tfunc(isa, 2, 2, isa_tfunc, 1)
 
 @nospecs function isa_nothrow(𝕃::AbstractLattice, obj, typ)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return typ ⊑ Type
 end
 
 @nospecs function subtype_tfunc(𝕃::AbstractLattice, a, b)
-    a, isexact_a = instanceof_tfunc(a)
-    b, isexact_b = instanceof_tfunc(b)
+    a, isexact_a = instanceof_tfunc(a, false)
+    b, isexact_b = instanceof_tfunc(b, false)
     if !has_free_typevars(a) && !has_free_typevars(b)
         if a <: b
             if isexact_b || a === Bottom
@@ -867,7 +902,7 @@ end
 add_tfunc(<:, 2, 2, subtype_tfunc, 10)
 
 @nospecs function subtype_nothrow(𝕃::AbstractLattice, lty, rty)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return lty ⊑ Type && rty ⊑ Type
 end
 
@@ -877,13 +912,10 @@ function fieldcount_noerror(@nospecialize t)
         if t === nothing
             return nothing
         end
-        t = t::DataType
     elseif t === Union{}
         return 0
     end
-    if !(t isa DataType)
-        return nothing
-    end
+    t isa DataType || return nothing
     if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters
         if names isa Tuple
@@ -892,17 +924,16 @@ function fieldcount_noerror(@nospecialize t)
         if types isa DataType && types <: Tuple
             return fieldcount_noerror(types)
         end
-        abstr = true
-    else
-        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-    end
-    if abstr
+        return nothing
+    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
         return nothing
     end
     return isdefined(t, :types) ? length(t.types) : length(t.name.names)
 end
 
-function try_compute_fieldidx(typ::DataType, @nospecialize(field))
+function try_compute_fieldidx(@nospecialize(typ), @nospecialize(field))
+    typ = argument_datatype(typ)
+    typ === nothing && return nothing
     if isa(field, Symbol)
         field = fieldindex(typ, field, false)
         field == 0 && return nothing
@@ -917,47 +948,43 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
     return field
 end
 
-function getfield_boundscheck((; fargs, argtypes)::ArgInfo) # Symbol
-    farg = nothing
-    if length(argtypes) == 3
+function getfield_boundscheck(argtypes::Vector{Any})
+    if length(argtypes) == 2
+        isvarargtype(argtypes[2]) && return :unsafe
         return :on
-    elseif length(argtypes) == 4
-        fargs !== nothing && (farg = fargs[4])
-        boundscheck = argtypes[4]
-        isvarargtype(boundscheck) && return :unknown
+    elseif length(argtypes) == 3
+        boundscheck = argtypes[3]
+        isvarargtype(boundscheck) && return :unsafe
         if widenconst(boundscheck) === Symbol
             return :on
         end
-    elseif length(argtypes) == 5
-        fargs !== nothing && (farg = fargs[5])
-        boundscheck = argtypes[5]
+    elseif length(argtypes) == 4
+        boundscheck = argtypes[4]
+        isvarargtype(boundscheck) && return :unsafe
     else
-        return :unknown
+        return :unsafe
     end
-    isvarargtype(boundscheck) && return :unknown
     boundscheck = widenconditional(boundscheck)
     if widenconst(boundscheck) === Bool
         if isa(boundscheck, Const)
             return boundscheck.val::Bool ? :on : :off
-        elseif farg !== nothing && isexpr(farg, :boundscheck)
-            return :boundscheck
         end
+        return :unknown # including a case when specified as `:boundscheck`
     end
-    return :unknown
+    return :unsafe
 end
 
-function getfield_nothrow(𝕃::AbstractLattice, arginfo::ArgInfo, boundscheck::Symbol=getfield_boundscheck(arginfo))
-    (;argtypes) = arginfo
-    boundscheck === :unknown && return false
+function getfield_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, boundscheck::Symbol=getfield_boundscheck(argtypes))
+    boundscheck === :unsafe && return false
     ordering = Const(:not_atomic)
-    if length(argtypes) == 4
-        isvarargtype(argtypes[4]) && return false
-        if widenconst(argtypes[4]) !== Bool
-            ordering = argtypes[4]
-        end
-    elseif length(argtypes) == 5
-        ordering = argtypes[5]
-    elseif length(argtypes) != 3
+    if length(argtypes) == 3
+        isvarargtype(argtypes[3]) && return false
+        if widenconst(argtypes[3]) !== Bool
+            ordering = argtypes[3]
+        end
+    elseif length(argtypes) == 4
+        ordering = argtypes[3]
+    elseif length(argtypes) ≠ 2
         return false
     end
     isa(ordering, Const) || return false
@@ -966,7 +993,7 @@ function getfield_nothrow(𝕃::AbstractLattice, arginfo::ArgInfo, boundscheck::
     if ordering !== :not_atomic # TODO: this is assuming not atomic
         return false
     end
-    return getfield_nothrow(𝕃, argtypes[2], argtypes[3], !(boundscheck === :off))
+    return getfield_nothrow(𝕃, argtypes[1], argtypes[2], !(boundscheck === :off))
 end
 @nospecs function getfield_nothrow(𝕃::AbstractLattice, s00, name, boundscheck::Bool)
     # If we don't have boundscheck off and don't know the field, don't even bother
@@ -974,30 +1001,43 @@ end
         isa(name, Const) || return false
     end
 
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
 
     # If we have s00 being a const, we can potentially refine our type-based analysis above
-    if isa(s00, Const) || isconstType(s00)
-        if !isa(s00, Const)
-            sv = s00.parameters[1]
-        else
+    if isa(s00, Const) || isconstType(s00) || isa(s00, PartialStruct)
+        if isa(s00, Const)
             sv = s00.val
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
+        elseif isa(s00, PartialStruct)
+            sty = unwrap_unionall(s00.typ)
+            nflds = fieldcount_noerror(sty)
+            ismod = false
+        else
+            sv = (s00::DataType).parameters[1]
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
         end
         if isa(name, Const)
             nval = name.val
             if !isa(nval, Symbol)
-                isa(sv, Module) && return false
+                ismod && return false
                 isa(nval, Int) || return false
             end
-            return isdefined(sv, nval)
+            return isdefined_tfunc(𝕃, s00, name) === Const(true)
         end
-        boundscheck && return false
+
         # If bounds checking is disabled and all fields are assigned,
         # we may assume that we don't throw
-        isa(sv, Module) && return false
+        @assert !boundscheck
+        ismod && return false
         name ⊑ Int || name ⊑ Symbol || return false
-        for i = 1:fieldcount(typeof(sv))
-            isdefined(sv, i) || return false
+        sty.name.n_uninitialized == 0 && return true
+        nflds === nothing && return false
+        for i = (datatype_min_ninitialized(sty)+1):nflds
+            isdefined_tfunc(𝕃, s00, Const(i)) === Const(true) || return false
         end
         return true
     end
@@ -1094,7 +1134,7 @@ end
 end
 
 @nospecs function _getfield_tfunc(𝕃::AnyMustAliasesLattice, s00, name, setfield::Bool)
-    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), name, setfield)
+    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), widenmustalias(name), setfield)
 end
 
 @nospecs function _getfield_tfunc(𝕃::PartialsLattice, s00, name, setfield::Bool)
@@ -1120,7 +1160,9 @@ end
             if isa(sv, Module)
                 setfield && return Bottom
                 if isa(nv, Symbol)
-                    return abstract_eval_global(sv, nv)
+                    # In ordinary inference, this case is intercepted early and
+                    # re-routed to `getglobal`.
+                    return Any
                 end
                 return Bottom
             end
@@ -1202,94 +1244,48 @@ end
             return Bottom
         end
         if nf == 1
-            return rewrap_unionall(unwrapva(ftypes[1]), s00)
-        end
-        # union together types of all fields
-        t = Bottom
-        for i in 1:nf
-            _ft = ftypes[i]
-            setfield && isconst(s, i) && continue
-            t = tmerge(t, rewrap_unionall(unwrapva(_ft), s00))
-            t === Any && break
+            fld = 1
+        else
+            # union together types of all fields
+            t = Bottom
+            for i in 1:nf
+                _ft = unwrapva(ftypes[i])
+                valid_as_lattice(_ft, true) || continue
+                setfield && isconst(s, i) && continue
+                t = tmerge(t, rewrap_unionall(_ft, s00))
+                t === Any && break
+            end
+            return t
         end
-        return t
+    else
+        fld = _getfield_fieldindex(s, name)
+        fld === nothing && return Bottom
     end
-    fld = _getfield_fieldindex(s, name)
-    fld === nothing && return Bottom
     if s <: Tuple && fld >= nf && isvarargtype(ftypes[nf])
-        return rewrap_unionall(unwrapva(ftypes[nf]), s00)
-    end
-    if fld < 1 || fld > nf
-        return Bottom
-    elseif setfield && isconst(s, fld)
-        return Bottom
-    end
-    R = ftypes[fld]
-    if isempty(s.parameters)
-        return R
+        R = unwrapva(ftypes[nf])
+    else
+        if fld < 1 || fld > nf
+            return Bottom
+        elseif setfield && isconst(s, fld)
+            return Bottom
+        end
+        R = ftypes[fld]
+        valid_as_lattice(R, true) || return Bottom
+        if isempty(s.parameters)
+            return R
+        end
     end
     return rewrap_unionall(R, s00)
 end
 
-@nospecs function getfield_notundefined(typ0, name)
-    if isa(typ0, Const) && isa(name, Const)
-        typv = typ0.val
-        namev = name.val
-        isa(typv, Module) && return true
-        if isa(namev, Symbol) || isa(namev, Int)
-            # Fields are not allowed to transition from defined to undefined, so
-            # even if the field is not const, all we need to check here is that
-            # it is defined here.
-            return isdefined(typv, namev)
-        end
-    end
-    typ0 = widenconst(typ0)
-    typ = unwrap_unionall(typ0)
-    if isa(typ, Union)
-        return getfield_notundefined(rewrap_unionall(typ.a, typ0), name) &&
-               getfield_notundefined(rewrap_unionall(typ.b, typ0), name)
-    end
-    isa(typ, DataType) || return false
-    if typ.name === Tuple.name || typ.name === _NAMEDTUPLE_NAME
-        # tuples and named tuples can't be instantiated with undefined fields,
-        # so we don't need to be conservative here
-        return true
-    end
-    if !isa(name, Const)
-        isvarargtype(name) && return false
-        if !hasintersect(widenconst(name), Union{Int,Symbol})
-            return true # no undefined behavior if thrown
-        end
-        # field isn't known precisely, but let's check if all the fields can't be
-        # initialized with undefined value so to avoid being too conservative
-        fcnt = fieldcount_noerror(typ)
-        fcnt === nothing && return false
-        all(i::Int->is_undefref_fieldtype(fieldtype(typ,i)), (datatype_min_ninitialized(typ)+1):fcnt) && return true
-        return false
-    end
-    name = name.val
-    if isa(name, Symbol)
-        fidx = fieldindex(typ, name, false)
-        fidx === nothing && return true # no undefined behavior if thrown
-    elseif isa(name, Int)
-        fidx = name
-    else
-        return true # no undefined behavior if thrown
-    end
-    fcnt = fieldcount_noerror(typ)
-    fcnt === nothing && return false
-    0 < fidx ≤ fcnt || return true # no undefined behavior if thrown
-    ftyp = fieldtype(typ, fidx)
-    is_undefref_fieldtype(ftyp) && return true
-    return fidx ≤ datatype_min_ninitialized(typ)
-end
-# checks if a field of this type will not be initialized with undefined value
-# and the access to that uninitialized field will cause and `UndefRefError`, e.g.,
+# checks if a field of this type is guaranteed to be defined to a value
+# and that access to an uninitialized field will cause an `UndefRefError` or return zero
 # - is_undefref_fieldtype(String) === true
 # - is_undefref_fieldtype(Integer) === true
 # - is_undefref_fieldtype(Any) === true
 # - is_undefref_fieldtype(Int) === false
 # - is_undefref_fieldtype(Union{Int32,Int64}) === false
+# - is_undefref_fieldtype(T) === false
 function is_undefref_fieldtype(@nospecialize ftyp)
     return !has_free_typevars(ftyp) && !allocatedinline(ftyp)
 end
@@ -1326,7 +1322,6 @@ end
     return setfield!_nothrow(𝕃, s00, name, v)
 end
 @nospecs function setfield!_nothrow(𝕃::AbstractLattice, s00, name, v)
-    ⊑ = Core.Compiler.:⊑(𝕃)
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
@@ -1343,81 +1338,128 @@ end
         isconst(s, field) && return false
         isfieldatomic(s, field) && return false # TODO: currently we're only testing for ordering === :not_atomic
         v_expected = fieldtype(s0, field)
+        ⊑ = partialorder(𝕃)
         return v ⊑ v_expected
     end
     return false
 end
 
-@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v, order)
-    return getfield_tfunc(𝕃, o, f)
-end
-@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v)
+@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v, order=Symbol)
+    setfield!_tfunc(𝕃, o, f, v) === Bottom && return Bottom
     return getfield_tfunc(𝕃, o, f)
 end
-@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v, order)
-    return modifyfield!_tfunc(𝕃, o, f, op, v)
-end
-@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v)
-    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
+@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v, order=Symbol)
+    o′ = widenconst(o)
+    T = _fieldtype_tfunc(𝕃, o′, f, isconcretetype(o′))
     T === Bottom && return Bottom
     PT = Const(Pair)
-    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T, T))[1]
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T, T]), true)[1]
 end
-function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order=Symbol, failure_order=Symbol)
+    o′ = widenconst(o)
+    T = _fieldtype_tfunc(𝕃, o′, f, isconcretetype(o′))
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T]), true)[1]
+end
+@nospecs function setfieldonce!_tfunc(𝕃::AbstractLattice, o, f, v, success_order=Symbol, failure_order=Symbol)
+    setfield!_tfunc(𝕃, o, f, v) === Bottom && return Bottom
+    isdefined_tfunc(𝕃, o, f) === Const(true) && return Const(false)
+    return Bool
+end
+
+@nospecs function abstract_modifyop!(interp::AbstractInterpreter, ff, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+    if ff === modifyfield!
+        minargs = 5
+        maxargs = 6
+        op_argi = 4
+        v_argi = 5
+    elseif ff === Core.modifyglobal!
+        minargs = 5
+        maxargs = 6
+        op_argi = 4
+        v_argi = 5
+    elseif ff === Core.memoryrefmodify!
+        minargs = 6
+        maxargs = 6
+        op_argi = 3
+        v_argi = 4
+    elseif ff === atomic_pointermodify
+        minargs = 5
+        maxargs = 5
+        op_argi = 3
+        v_argi = 4
+    else
+        @assert false "unreachable"
+    end
+
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-        nargs > 3 || return CallMeta(Any, Effects(), NoCallInfo())
+        nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     else
-        5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
+        minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     end
     𝕃ᵢ = typeinf_lattice(interp)
-    o = unwrapva(argtypes[2])
-    f = unwrapva(argtypes[3])
-    RT = modifyfield!_tfunc(𝕃ᵢ, o, f, Any, Any)
+    if ff === modifyfield!
+        o = unwrapva(argtypes[2])
+        f = unwrapva(argtypes[3])
+        RT = modifyfield!_tfunc(𝕃ᵢ, o, f, Any, Any, Symbol)
+        TF = getfield_tfunc(𝕃ᵢ, o, f)
+    elseif ff === Core.modifyglobal!
+        o = unwrapva(argtypes[2])
+        f = unwrapva(argtypes[3])
+        GT = abstract_eval_get_binding_type(interp, sv, o, f).rt
+        RT = isa(GT, Const) ? Pair{GT.val, GT.val} : Pair
+        TF = isa(GT, Const) ? GT.val : Any
+    elseif ff === Core.memoryrefmodify!
+        o = unwrapva(argtypes[2])
+        RT = memoryrefmodify!_tfunc(𝕃ᵢ, o, Any, Any, Symbol, Bool)
+        TF = memoryrefget_tfunc(𝕃ᵢ, o, Symbol, Bool)
+    elseif ff === atomic_pointermodify
+        o = unwrapva(argtypes[2])
+        RT = atomic_pointermodify_tfunc(𝕃ᵢ, o, Any, Any, Symbol)
+        TF = atomic_pointerref_tfunc(𝕃ᵢ, o, Symbol)
+    else
+        @assert false "unreachable"
+    end
     info = NoCallInfo()
-    if nargs >= 5 && RT !== Bottom
+    if nargs >= v_argi && RT !== Bottom
         # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
         # as well as compute the info for the method matches
-        op = unwrapva(argtypes[4])
-        v = unwrapva(argtypes[5])
-        TF = getfield_tfunc(𝕃ᵢ, o, f)
-        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
-        TF2 = tmeet(callinfo.rt, widenconst(TF))
-        if TF2 === Bottom
-            RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
-            RT = PartialStruct(RT, Any[TF, TF2])
+        op = unwrapva(argtypes[op_argi])
+        v = unwrapva(argtypes[v_argi])
+        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true, si.saw_latestworld), sv, #=max_methods=#1)
+        TF = Core.Box(TF)
+        RT = Core.Box(RT)
+        return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv
+            TF = TF.contents
+            RT = RT.contents
+            TF2 = tmeet(ipo_lattice(interp), callinfo.rt, widenconst(TF))
+            if TF2 === Bottom
+                RT = Bottom
+            elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
+                RT = PartialStruct(fallback_lattice, RT, Any[TF, TF2])
+            end
+            info = ModifyOpInfo(callinfo.info)
+            return CallMeta(RT, Any, Effects(), info)
         end
-        info = ModifyFieldInfo(callinfo.info)
     end
-    return CallMeta(RT, Effects(), info)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order, failure_order)
-    return replacefield!_tfunc(𝕃, o, f, x, v)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order)
-    return replacefield!_tfunc(𝕃, o, f, x, v)
-end
-@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v)
-    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
-    T === Bottom && return Bottom
-    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
-    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T))[1]
+    return Future(CallMeta(RT, Any, Effects(), info))
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
 
 add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
 add_tfunc(setfield!, 3, 4, setfield!_tfunc, 3)
-
 add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
 add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
 add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
+add_tfunc(setfieldonce!, 3, 5, setfieldonce!_tfunc, 3)
 
 @nospecs function fieldtype_nothrow(𝕃::AbstractLattice, s0, name)
     s0 === Bottom && return true # unreachable
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         # We have no idea
         return false
@@ -1435,7 +1477,7 @@ add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
                fieldtype_nothrow(𝕃, rewrap_unionall(su.b, s0), name)
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return false # always
     return _fieldtype_nothrow(s, exact, name)
 end
@@ -1500,7 +1542,7 @@ end
                       fieldtype_tfunc(𝕃, rewrap_unionall(su.b, s0), name))
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return Bottom
     return _fieldtype_tfunc(𝕃, s, name, exact)
 end
@@ -1513,8 +1555,8 @@ end
         tb0 = _fieldtype_tfunc(𝕃, rewrap_unionall(u.b, s), name, exact)
         ta0 ⊑ tb0 && return tb0
         tb0 ⊑ ta0 && return ta0
-        ta, exacta, _, istypea = instanceof_tfunc(ta0)
-        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        ta, exacta, _, istypea = instanceof_tfunc(ta0, false)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0, false)
         if exact && exacta && exactb
             return Const(Union{ta, tb})
         end
@@ -1629,7 +1671,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe
     (headtype === Union) && return true
     isa(rt, Const) && return true
     u = headtype
-    # TODO: implement optimization for isvarargtype(u) and istuple occurences (which are valid but are not UnionAll)
+    # TODO: implement optimization for isvarargtype(u) and istuple occurrences (which are valid but are not UnionAll)
     for i = 2:length(argtypes)
         isa(u, UnionAll) || return false
         ai = widenconditional(argtypes[i])
@@ -1648,7 +1690,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe
                 return false
             end
         else
-            T, exact, _, istype = instanceof_tfunc(ai)
+            T, exact, _, istype = instanceof_tfunc(ai, false)
             if T === Bottom
                 if !(u.var.lb === Union{} && u.var.ub === Any)
                     return false
@@ -1674,8 +1716,12 @@ end
 const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K, :_L, :_M,
                           :_N, :_O, :_P, :_Q, :_R, :_S, :_T, :_U, :_V, :_W, :_X, :_Y, :_Z]
 
-# TODO: handle e.g. apply_type(T, R::Union{Type{Int32},Type{Float64}})
-@nospecs function apply_type_tfunc(𝕃::AbstractLattice, headtypetype, args...)
+function apply_type_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any};
+                          max_union_splitting::Int=InferenceParams().max_union_splitting)
+    if isempty(argtypes)
+        return Bottom
+    end
+    headtypetype = argtypes[1]
     headtypetype = widenslotwrapper(headtypetype)
     if isa(headtypetype, Const)
         headtype = headtypetype.val
@@ -1684,15 +1730,15 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     else
         return Any
     end
-    if !isempty(args) && isvarargtype(args[end])
+    largs = length(argtypes)
+    if largs > 1 && isvarargtype(argtypes[end])
         return isvarargtype(headtype) ? TypeofVararg : Type
     end
-    largs = length(args)
     if headtype === Union
-        largs == 0 && return Const(Bottom)
+        largs == 1 && return Const(Bottom)
         hasnonType = false
-        for i = 1:largs
-            ai = args[i]
+        for i = 2:largs
+            ai = argtypes[i]
             if isa(ai, Const)
                 if !isa(ai.val, Type)
                     if isa(ai.val, TypeVar)
@@ -1711,16 +1757,14 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                 end
             end
         end
-        if largs == 1 # Union{T} --> T
-            u1 = typeintersect(widenconst(args[1]), Union{Type,TypeVar})
-            valid_as_lattice(u1) || return Bottom
-            return u1
+        if largs == 2 # Union{T} --> T
+            return tmeet(widenconst(argtypes[2]), Union{Type,TypeVar})
         end
         hasnonType && return Type
         ty = Union{}
         allconst = true
-        for i = 1:largs
-            ai = args[i]
+        for i = 2:largs
+            ai = argtypes[i]
             if isType(ai)
                 aty = ai.parameters[1]
                 allconst &= hasuniquerep(aty)
@@ -1731,7 +1775,19 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
         end
         return allconst ? Const(ty) : Type{ty}
     end
-    istuple = isa(headtype, Type) && (headtype == Tuple)
+    if 1 < unionsplitcost(𝕃, argtypes) ≤ max_union_splitting
+        rt = Bottom
+        for split_argtypes = switchtupleunion(𝕃, argtypes)
+            this_rt = widenconst(_apply_type_tfunc(𝕃, headtype, split_argtypes))
+            rt = Union{rt, this_rt}
+        end
+        return rt
+    end
+    return _apply_type_tfunc(𝕃, headtype, argtypes)
+end
+@nospecs function _apply_type_tfunc(𝕃::AbstractLattice, headtype, argtypes::Vector{Any})
+    largs = length(argtypes)
+    istuple = headtype === Tuple
     if !istuple && !isa(headtype, UnionAll) && !isvarargtype(headtype)
         return Union{}
     end
@@ -1744,20 +1800,20 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     # first push the tailing vars from headtype into outervars
     outer_start, ua = 0, headtype
     while isa(ua, UnionAll)
-        if (outer_start += 1) > largs
+        if (outer_start += 1) > largs - 1
             push!(outervars, ua.var)
         end
         ua = ua.body
     end
-    if largs > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
+    if largs - 1 > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
         return Bottom # too many arguments
     end
-    outer_start = outer_start - largs + 1
+    outer_start = outer_start - largs + 2
 
     varnamectr = 1
     ua = headtype
-    for i = 1:largs
-        ai = widenslotwrapper(args[i])
+    for i = 2:largs
+        ai = widenslotwrapper(argtypes[i])
         if isType(ai)
             aip1 = ai.parameters[1]
             canconst &= !has_free_typevars(aip1)
@@ -1799,7 +1855,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
             elseif !isT
                 # if we didn't have isType to compute ub directly, try to use instanceof_tfunc to refine this guess
                 ai_w = widenconst(ai)
-                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai)[1] : Any
+                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai, false)[1] : Any
             end
             if istuple
                 # in the last parameter of a Tuple type, if the upper bound is Any
@@ -1831,7 +1887,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                     # If the names are known, keep the upper bound, but otherwise widen to Tuple.
                     # This is a widening heuristic to avoid keeping type information
                     # that's unlikely to be useful.
-                    if !(uw.parameters[1] isa Tuple || (i == 2 && tparams[1] isa Tuple))
+                    if !(uw.parameters[1] isa Tuple || (i == 3 && tparams[1] isa Tuple))
                         ub = Any
                     end
                 else
@@ -1853,6 +1909,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     try
         appl = apply_type(headtype, tparams...)
     catch ex
+        ex isa InterruptException && rethrow()
         # type instantiation might fail if one of the type parameters doesn't
         # match, which could happen only if a type estimate is too coarse
         # and might guess a concrete value while the actual type for it is Bottom
@@ -1872,7 +1929,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
         # throwing errors.
         appl = headtype
         if isa(appl, UnionAll)
-            for _ = 1:largs
+            for _ = 2:largs
                 appl = appl::UnionAll
                 push!(outervars, appl.var)
                 appl = appl.body
@@ -1892,6 +1949,8 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     end
     return ans
 end
+@nospecs apply_type_tfunc(𝕃::AbstractLattice, headtypetype, args...) =
+    apply_type_tfunc(𝕃, Any[i == 0 ? headtypetype : args[i] for i in 0:length(args)])
 add_tfunc(apply_type, 1, INT_INF, apply_type_tfunc, 10)
 
 # convert the dispatch tuple type argtype to the real (concrete) type of
@@ -1958,54 +2017,141 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
     typ = Tuple{params...}
     # replace a singleton type with its equivalent Const object
     issingletontype(typ) && return Const(typ.instance)
-    return anyinfo ? PartialStruct(typ, argtypes) : typ
+    return anyinfo ? PartialStruct(𝕃, typ, argtypes) : typ
+end
+
+@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, memlen)
+    hasintersect(widenconst(memlen), Int) || return Bottom
+    memt = tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory)
+    memt == Union{} && return memt
+    # PartialStruct so that loads of Const `length` get inferred
+    return PartialStruct(𝕃, memt, Any[memlen, Ptr{Nothing}])
 end
+add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10)
 
-@nospecs function arrayref_tfunc(𝕃::AbstractLattice, boundscheck, ary, idxs...)
-    return _arrayref_tfunc(𝕃, boundscheck, ary, idxs)
+@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return memoryref_elemtype(mem)
+end
+@nospecs function memoryrefset!_tfunc(𝕃::AbstractLattice, mem, item, order, boundscheck)
+    hasintersect(widenconst(item), memoryrefget_tfunc(𝕃, mem, order, boundscheck)) || return Bottom
+    return item
+end
+@nospecs function memoryrefswap!_tfunc(𝕃::AbstractLattice, mem, v, order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, order, boundscheck) === Bottom && return Bottom
+    return memoryrefget_tfunc(𝕃, mem, order, boundscheck)
+end
+@nospecs function memoryrefmodify!_tfunc(𝕃::AbstractLattice, mem, op, v, order, boundscheck)
+    memoryrefget_tfunc(𝕃, mem, order, boundscheck) === Bottom && return Bottom
+    T = _memoryref_elemtype(mem)
+    T === Bottom && return Bottom
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T, T]), true)[1]
+end
+@nospecs function memoryrefreplace!_tfunc(𝕃::AbstractLattice, mem, x, v, success_order, failure_order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, success_order, boundscheck) === Bottom && return Bottom
+    hasintersect(widenconst(failure_order), Symbol) || return Bottom
+    T = _memoryref_elemtype(mem)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(𝕃, Any[PT, T]), true)[1]
 end
-@nospecs function _arrayref_tfunc(𝕃::AbstractLattice, boundscheck, ary, @nospecialize idxs::Tuple)
-    isempty(idxs) && return Bottom
-    array_builtin_common_errorcheck(boundscheck, ary, idxs) || return Bottom
-    return array_elmtype(ary)
+@nospecs function memoryrefsetonce!_tfunc(𝕃::AbstractLattice, mem, v, success_order, failure_order, boundscheck)
+    memoryrefset!_tfunc(𝕃, mem, v, success_order, boundscheck) === Bottom && return Bottom
+    hasintersect(widenconst(failure_order), Symbol) || return Bottom
+    return Bool
 end
-add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
 
-@nospecs function arrayset_tfunc(𝕃::AbstractLattice, boundscheck, ary, item, idxs...)
-    hasintersect(widenconst(item), _arrayref_tfunc(𝕃, boundscheck, ary, idxs)) || return Bottom
-    return ary
+add_tfunc(Core.memoryrefget, 3, 3, memoryrefget_tfunc, 20)
+add_tfunc(Core.memoryrefset!, 4, 4, memoryrefset!_tfunc, 20)
+add_tfunc(Core.memoryrefswap!, 4, 4, memoryrefswap!_tfunc, 20)
+add_tfunc(Core.memoryrefmodify!, 5, 5, memoryrefmodify!_tfunc, 20)
+add_tfunc(Core.memoryrefreplace!, 6, 6, memoryrefreplace!_tfunc, 20)
+add_tfunc(Core.memoryrefsetonce!, 5, 5, memoryrefsetonce!_tfunc, 20)
+
+@nospecs function memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    return _memoryref_isassigned_tfunc(𝕃, mem, order, boundscheck)
+end
+@nospecs function _memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return Bool
 end
-add_tfunc(arrayset, 4, INT_INF, arrayset_tfunc, 20)
+add_tfunc(memoryref_isassigned, 3, 3, memoryref_isassigned_tfunc, 20)
 
-@nospecs function array_builtin_common_errorcheck(boundscheck, ary, @nospecialize idxs::Tuple)
-    hasintersect(widenconst(boundscheck), Bool) || return false
-    hasintersect(widenconst(ary), Array) || return false
-    for i = 1:length(idxs)
-        idx = getfield(idxs, i)
-        idx = isvarargtype(idx) ? unwrapva(idx) : widenconst(idx)
-        hasintersect(idx, Int) || return false
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, mem)
+    a = widenconst(unwrapva(mem))
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === GenericMemory.body.body.body.name
+            A = unw.parameters[1]
+            T = unw.parameters[2]
+            AS = unw.parameters[3]
+            T isa Type || T isa TypeVar || return Bottom
+            return rewrap_unionall(GenericMemoryRef{A, T, AS}, a)
+        end
     end
+    return GenericMemoryRef
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx)
+    if isvarargtype(idx)
+        idx = unwrapva(idx)
+    end
+    return memoryref_tfunc(𝕃, ref, idx, Const(true))
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx, boundscheck)
+    memoryref_builtin_common_errorcheck(ref, Const(:not_atomic), boundscheck) || return Bottom
+    hasintersect(widenconst(idx), Int) || return Bottom
+    return ref
+end
+add_tfunc(memoryrefnew, 1, 3, memoryref_tfunc, 1)
+
+@nospecs function memoryrefoffset_tfunc(𝕃::AbstractLattice, mem)
+    hasintersect(widenconst(mem), GenericMemoryRef) || return Bottom
+    return Int
+end
+add_tfunc(memoryrefoffset, 1, 1, memoryrefoffset_tfunc, 5)
+
+@nospecs function memoryref_builtin_common_errorcheck(mem, order, boundscheck)
+    hasintersect(widenconst(mem), GenericMemoryRef) || return false
+    hasintersect(widenconst(order), Symbol) || return false
+    hasintersect(widenconst(unwrapva(boundscheck)), Bool) || return false
     return true
 end
 
-function array_elmtype(@nospecialize ary)
-    a = widenconst(ary)
-    if !has_free_typevars(a) && a <: Array
-        a0 = a
-        if isa(a, UnionAll)
-            a = unwrap_unionall(a0)
+@nospecs function memoryref_elemtype(mem)
+    m = widenconst(mem)
+    if !has_free_typevars(m) && m <: GenericMemoryRef
+        m0 = m
+        if isa(m, UnionAll)
+            m = unwrap_unionall(m0)
         end
-        if isa(a, DataType)
-            T = a.parameters[1]
-            valid_as_lattice(T) || return Bottom
-            return rewrap_unionall(T, a0)
+        if isa(m, DataType)
+            T = m.parameters[2]
+            valid_as_lattice(T, true) || return Bottom
+            return rewrap_unionall(T, m0)
         end
     end
     return Any
 end
 
-@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
+@nospecs function _memoryref_elemtype(mem)
+    m = widenconst(mem)
+    if !has_free_typevars(m) && m <: GenericMemoryRef
+        m0 = m
+        if isa(m, UnionAll)
+            m = unwrap_unionall(m0)
+        end
+        if isa(m, DataType)
+            T = m.parameters[2]
+            valid_as_lattice(T, true) || return Bottom
+            has_free_typevars(T) || return Const(T)
+            return rewrap_unionall(Type{T}, m0)
+        end
+    end
+    return Type
+end
+
+@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, mi::MethodInstance)
     argt, argt_exact = instanceof_tfunc(arg)
     lbt, lb_exact = instanceof_tfunc(lb)
     if !lb_exact
@@ -2019,91 +2165,138 @@ end
 
     (isa(source, Const) && isa(source.val, Method)) || return t
 
-    return PartialOpaque(t, tuple_tfunc(𝕃, env), linfo, source.val)
+    return PartialOpaque(t, tuple_tfunc(𝕃, env), mi, source.val)
 end
 
 # whether getindex for the elements can potentially throw UndefRef
-function array_type_undefable(@nospecialize(arytype))
+@nospecs function array_type_undefable(arytype)
+    arytype = unwrap_unionall(arytype)
     if isa(arytype, Union)
         return array_type_undefable(arytype.a) || array_type_undefable(arytype.b)
-    elseif isa(arytype, UnionAll)
-        return true
+    elseif arytype isa DataType
+        elmtype = memoryref_elemtype(arytype)
+        # TODO: use arraytype layout instead to derive this
+        return !((elmtype isa DataType && isbitstype(elmtype)) || (elmtype isa Union && isbitsunion(elmtype)))
+    end
+    return true
+end
+
+@nospecs function memoryset_typecheck(𝕃::AbstractLattice, memtype, elemtype)
+    # Check that we can determine the element type
+    isa(memtype, DataType) || return false
+    elemtype_expected = memoryref_elemtype(memtype)
+    elemtype_expected === Union{} && return false
+    # Check that the element type is compatible with the element we're assigning
+    ⊑ = partialorder(𝕃)
+    elemtype ⊑ elemtype_expected || return false
+    return true
+end
+
+function memoryref_builtin_common_nothrow(argtypes::Vector{Any})
+    if length(argtypes) == 1
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemory
     else
-        elmtype = (arytype::DataType).parameters[1]
-        return !(elmtype isa Type && (isbitstype(elmtype) || isbitsunion(elmtype)))
+        if length(argtypes) == 2
+            boundscheck = Const(true)
+        elseif length(argtypes) == 3
+            boundscheck = argtypes[3]
+        else
+            return false
+        end
+        memtype = widenconst(argtypes[1])
+        idx = widenconst(argtypes[2])
+        idx ⊑ Int || return false
+        boundscheck ⊑ Bool || return false
+        memtype ⊑ GenericMemoryRef || return false
+        # If we have @inbounds (last argument is false), we're allowed to assume
+        # we don't throw bounds errors.
+        if isa(boundscheck, Const)
+            boundscheck.val::Bool || return true
+        end
+        # Else we can't really say anything here
+        # TODO: In the future we may be able to track the minimum length though inference.
+        return false
     end
 end
 
-function array_builtin_common_nothrow(argtypes::Vector{Any}, isarrayref::Bool)
-    first_idx_idx = isarrayref ? 3 : 4
-    length(argtypes) ≥ first_idx_idx || return false
-    boundscheck = argtypes[1]
-    arytype = argtypes[2]
-    array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false
-    if isarrayref
+function memoryrefop_builtin_common_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize f)
+    ismemoryset = f === memoryrefset!
+    nargs = ismemoryset ? 4 : 3
+    length(argtypes) == nargs || return false
+    order = argtypes[2 + ismemoryset]
+    boundscheck = argtypes[3 + ismemoryset]
+    memtype = widenconst(argtypes[1])
+    memoryref_builtin_common_typecheck(𝕃, boundscheck, memtype, order) || return false
+    if ismemoryset
+        # Additionally check element type compatibility
+        memoryset_typecheck(𝕃, memtype, argtypes[2]) || return false
+    elseif f === memoryrefget
         # If we could potentially throw undef ref errors, bail out now.
-        arytype = widenconst(arytype)
-        array_type_undefable(arytype) && return false
+        array_type_undefable(memtype) && return false
     end
-    # If we have @inbounds (first argument is false), we're allowed to assume
+    # If we have @inbounds (last argument is false), we're allowed to assume
     # we don't throw bounds errors.
     if isa(boundscheck, Const)
         boundscheck.val::Bool || return true
     end
     # Else we can't really say anything here
-    # TODO: In the future we may be able to track the shapes of arrays though
-    # inference.
+    # TODO: In the future we may be able to track the minimum length though inference.
     return false
 end
 
-@nospecs function array_builtin_common_typecheck(boundscheck, arytype,
-    argtypes::Vector{Any}, first_idx_idx::Int)
-    (boundscheck ⊑ Bool && arytype ⊑ Array) || return false
-    for i = first_idx_idx:length(argtypes)
-        argtypes[i] ⊑ Int || return false
-    end
-    return true
+@nospecs function memoryref_builtin_common_typecheck(𝕃::AbstractLattice, boundscheck, memtype, order)
+    ⊑ = partialorder(𝕃)
+    return boundscheck ⊑ Bool && memtype ⊑ GenericMemoryRef && order ⊑ Symbol
 end
 
-@nospecs function arrayset_typecheck(arytype, elmtype)
-    # Check that we can determine the element type
-    arytype = widenconst(arytype)
-    isa(arytype, DataType) || return false
-    elmtype_expected = arytype.parameters[1]
-    isa(elmtype_expected, Type) || return false
-    # Check that the element type is compatible with the element we're assigning
-    elmtype ⊑ elmtype_expected || return false
-    return true
+function memorynew_nothrow(argtypes::Vector{Any})
+    if !(argtypes[1] isa Const && argtypes[2] isa Const)
+        return false
+    end
+    MemT = argtypes[1].val
+    if !(isconcretetype(MemT) && MemT <: GenericMemory)
+        return false
+    end
+    len = argtypes[2].val
+    if !(len isa Int && 0 <= len < typemax(Int))
+        return false
+    end
+    elsz = datatype_layoutsize(MemT)
+    overflows = checked_smul_int(len, elsz)[2]
+    return !overflows
 end
 
-# Query whether the given builtin is guaranteed not to throw given the argtypes
-@nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt)
-    ⊑ = Core.Compiler.:⊑(𝕃)
-    if f === arrayset
-        array_builtin_common_nothrow(argtypes, #=isarrayref=#false) || return false
-        # Additionally check element type compatibility
-        return arrayset_typecheck(argtypes[2], argtypes[3])
-    elseif f === arrayref || f === const_arrayref
-        return array_builtin_common_nothrow(argtypes, #=isarrayref=#true)
+# Query whether the given builtin is guaranteed not to throw given the `argtypes`.
+# `argtypes` can be assumed not to contain varargs.
+function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any},
+                          @nospecialize(rt))
+    ⊑ = partialorder(𝕃)
+    na = length(argtypes)
+    if f === Core.memorynew
+        return memorynew_nothrow(argtypes)
+    elseif f === memoryrefnew
+        return memoryref_builtin_common_nothrow(argtypes)
+    elseif f === memoryrefoffset
+        length(argtypes) == 1 || return false
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemoryRef
+    elseif f === memoryrefset!
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
+    elseif f === memoryrefget
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
+    elseif f === memoryref_isassigned
+        return memoryrefop_builtin_common_nothrow(𝕃, argtypes, f)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
-    end
-
-    # These builtins are not-vararg, so if we have varars, here, we can't guarantee
-    # the correct number of arguments.
-    na = length(argtypes)
-    (na ≠ 0 && isvarargtype(argtypes[end])) && return false
-    if f === arraysize
-        na == 2 || return false
-        return arraysize_nothrow(argtypes[1], argtypes[2])
     elseif f === Core._typevar
         na == 3 || return false
         return typevar_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
     elseif f === invoke
         return false
     elseif f === getfield
-        return getfield_nothrow(𝕃, ArgInfo(nothing, Any[Const(f), argtypes...]))
+        return getfield_nothrow(𝕃, argtypes)
     elseif f === setfield!
         if na == 3
             return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
@@ -2122,8 +2315,6 @@ end
     elseif f === (<:)
         na == 2 || return false
         return subtype_nothrow(𝕃, argtypes[1], argtypes[2])
-    elseif f === UnionAll
-        return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
         return isdefined_nothrow(𝕃, argtypes)
     elseif f === Core.sizeof
@@ -2135,20 +2326,6 @@ end
     elseif f === typeassert
         na == 2 || return false
         return typeassert_nothrow(𝕃, argtypes[1], argtypes[2])
-    elseif f === getglobal
-        if na == 2
-            return getglobal_nothrow(argtypes[1], argtypes[2])
-        elseif na == 3
-            return getglobal_nothrow(argtypes[1], argtypes[2], argtypes[3])
-        end
-        return false
-    elseif f === setglobal!
-        if na == 3
-            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3])
-        elseif na == 4
-            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3], argtypes[4])
-        end
-        return false
     elseif f === Core.get_binding_type
         na == 2 || return false
         return get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2])
@@ -2189,6 +2366,7 @@ const _CONSISTENT_BUILTINS = Any[
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     setfield!,
     donotdelete
 ]
@@ -2200,9 +2378,11 @@ const _EFFECT_FREE_BUILTINS = [
     isa,
     UnionAll,
     getfield,
-    arrayref,
-    arraysize,
-    const_arrayref,
+    Core.memorynew,
+    memoryrefnew,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
     isdefined,
     Core.sizeof,
     Core.ifelse,
@@ -2210,6 +2390,7 @@ const _EFFECT_FREE_BUILTINS = [
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     getglobal,
     compilerbarrier,
 ]
@@ -2223,21 +2404,24 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
     svec,
     fieldtype,
     isa,
-    isdefined,
     nfields,
     throw,
+    Core.throw_methoderror,
     tuple,
     typeassert,
     typeof,
     compilerbarrier,
     Core._typevar,
-    donotdelete
+    donotdelete,
+    Core.memorynew,
 ]
 
 const _ARGMEM_BUILTINS = Any[
-    arrayref,
-    arrayset,
-    arraysize,
+    memoryrefnew,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
+    memoryrefset!,
     modifyfield!,
     replacefield!,
     setfield!,
@@ -2274,25 +2458,16 @@ function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     # consistent if the first arg is immutable
     na = length(argtypes)
     2 ≤ na ≤ 3 || return EFFECTS_THROWS
-    obj, sym = argtypes
-    wobj = unwrapva(obj)
+    wobj, sym = argtypes
+    wobj = unwrapva(wobj)
+    sym = unwrapva(sym)
     consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
     if is_immutable_argtype(wobj)
         consistent = ALWAYS_TRUE
-    else
-        # Bindings/fields are not allowed to transition from defined to undefined, so even
-        # if the object is not immutable, we can prove `:consistent`-cy if it is defined:
-        if isa(wobj, Const) && isa(sym, Const)
-            objval = wobj.val
-            symval = sym.val
-            if isa(objval, Module)
-                if isa(symval, Symbol) && isdefined(objval, symval)
-                    consistent = ALWAYS_TRUE
-                end
-            elseif (isa(symval, Symbol) || isa(symval, Int)) && isdefined(objval, symval)
-                consistent = ALWAYS_TRUE
-            end
-        end
+    elseif isdefined_tfunc(𝕃, wobj, sym) isa Const
+        # Some bindings/fields are not allowed to transition from defined to undefined or the reverse, so even
+        # if the object is not immutable, we can prove `:consistent`-cy of this:
+        consistent = ALWAYS_TRUE
     end
     nothrow = isdefined_nothrow(𝕃, argtypes)
     if hasintersect(widenconst(wobj), Module)
@@ -2305,105 +2480,115 @@ function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
 end
 
-function getfield_effects(𝕃::AbstractLattice, arginfo::ArgInfo, @nospecialize(rt))
-    (;argtypes) = arginfo
-    # consistent if the argtype is immutable
-    length(argtypes) < 3 && return EFFECTS_THROWS
-    obj = argtypes[2]
-    isvarargtype(obj) && return Effects(EFFECTS_THROWS; consistent=ALWAYS_FALSE)
+function getfield_effects(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize(rt))
+    length(argtypes) < 2 && return EFFECTS_THROWS
+    obj = argtypes[1]
+    if isvarargtype(obj)
+        return Effects(EFFECTS_TOTAL;
+            consistent=CONSISTENT_IF_INACCESSIBLEMEMONLY,
+            nothrow=false,
+            inaccessiblememonly=ALWAYS_FALSE,
+            noub=ALWAYS_FALSE)
+    end
+    # :consistent if the argtype is immutable
     consistent = (is_immutable_argtype(obj) || is_mutation_free_argtype(obj)) ?
         ALWAYS_TRUE : CONSISTENT_IF_INACCESSIBLEMEMONLY
-    # access to `isbitstype`-field initialized with undefined value leads to undefined behavior
-    # so should taint `:consistent`-cy while access to uninitialized non-`isbitstype` field
-    # throws `UndefRefError` so doesn't need to taint it
-    # NOTE `getfield_notundefined` conservatively checks if this field is never initialized
-    # with undefined value so that we don't taint `:consistent`-cy too aggressively here
-    if !(length(argtypes) ≥ 3 && getfield_notundefined(obj, argtypes[3]))
-        consistent = ALWAYS_FALSE
-    end
-    bcheck = getfield_boundscheck(arginfo)
-    nothrow = getfield_nothrow(𝕃, arginfo, bcheck)
+    noub = ALWAYS_TRUE
+    bcheck = getfield_boundscheck(argtypes)
+    nothrow = getfield_nothrow(𝕃, argtypes, bcheck)
     if !nothrow
-        if !(bcheck === :on || bcheck === :boundscheck)
-            # If we cannot independently prove inboundsness, taint consistency.
-            # The inbounds-ness assertion requires dynamic reachability, while
-            # :consistent needs to be true for all input values.
+        if bcheck !== :on
+            # If we cannot independently prove inboundsness, taint `:noub`.
+            # The inbounds-ness assertion requires dynamic reachability,
+            # while `:noub` needs to be true for all input values.
             # However, as a special exception, we do allow literal `:boundscheck`.
-            # `:consistent`-cy will be tainted in any caller using `@inbounds` based
-            # on the `:noinbounds` effect.
-            # N.B. We do not taint for `--check-bounds=no` here. That is handled
-            # in concrete evaluation.
-            consistent = ALWAYS_FALSE
+            # `:noub` will be tainted in any caller using `@inbounds`
+            # based on the `:noinbounds` effect.
+            # N.B. We do not taint for `--check-bounds=no` here.
+            # That is handled in concrete evaluation.
+            noub = ALWAYS_FALSE
         end
     end
     if hasintersect(widenconst(obj), Module)
-        inaccessiblememonly = getglobal_effects(argtypes[2:end], rt).inaccessiblememonly
+        # Modeled more precisely in abstract_eval_getglobal
+        inaccessiblememonly = ALWAYS_FALSE
     elseif is_mutation_free_argtype(obj)
         inaccessiblememonly = ALWAYS_TRUE
     else
         inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
     end
-    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly, noub)
 end
 
-function getglobal_effects(argtypes::Vector{Any}, @nospecialize(rt))
-    consistent = inaccessiblememonly = ALWAYS_FALSE
-    nothrow = false
-    if length(argtypes) ≥ 2
-        M, s = argtypes[1], argtypes[2]
-        if getglobal_nothrow(M, s)
-            nothrow = true
-            # typeasserts below are already checked in `getglobal_nothrow`
-            Mval, sval = (M::Const).val::Module, (s::Const).val::Symbol
-            if isconst(Mval, sval)
-                consistent = ALWAYS_TRUE
-                if is_mutation_free_argtype(rt)
-                    inaccessiblememonly = ALWAYS_TRUE
-                end
-            end
-        end
-    end
-    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
-end
+"""
+    builtin_effects(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt) -> Effects
 
-function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), arginfo::ArgInfo, @nospecialize(rt))
+Compute the effects of a builtin function call. `argtypes` should not include `f` itself.
+"""
+function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
     if isa(f, IntrinsicFunction)
-        return intrinsic_effects(f, arginfo.argtypes[2:end])
+        return intrinsic_effects(f, argtypes)
     end
 
     @assert !contains_is(_SPECIAL_BUILTINS, f)
 
     if f === getfield
-        return getfield_effects(𝕃, arginfo, rt)
+        return getfield_effects(𝕃, argtypes, rt)
     end
-    argtypes = arginfo.argtypes[2:end]
+
+    # if this builtin call deterministically throws,
+    # don't bother to taint the other effects other than :nothrow:
+    # note this is safe only if we accounted for :noub already
+    rt === Bottom && return EFFECTS_THROWS
 
     if f === isdefined
         return isdefined_effects(𝕃, argtypes)
     elseif f === getglobal
-        return getglobal_effects(argtypes, rt)
+        2 ≤ length(argtypes) ≤ 3 || return EFFECTS_THROWS
+        # Modeled more precisely in abstract_eval_getglobal
+        return generic_getglobal_effects
     elseif f === Core.get_binding_type
         length(argtypes) == 2 || return EFFECTS_THROWS
-        effect_free = get_binding_type_effect_free(argtypes[1], argtypes[2]) ? ALWAYS_TRUE : ALWAYS_FALSE
-        return Effects(EFFECTS_TOTAL; effect_free)
+        # Modeled more precisely in abstract_eval_get_binding_type
+        return Effects(EFFECTS_TOTAL; nothrow=get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2]))
+    elseif f === compilerbarrier
+        length(argtypes) == 2 || return Effects(EFFECTS_THROWS; consistent=ALWAYS_FALSE)
+        setting = argtypes[1]
+        return Effects(EFFECTS_TOTAL;
+            consistent = (isa(setting, Const) && setting.val === :conditional) ? ALWAYS_TRUE : ALWAYS_FALSE,
+            nothrow = compilerbarrier_nothrow(setting, nothing))
+    elseif f === Core.current_scope
+        nothrow = true
+        if length(argtypes) != 0
+            if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                return EFFECTS_THROWS
+            end
+            nothrow = false
+        end
+        return Effects(EFFECTS_TOTAL;
+            consistent = ALWAYS_FALSE,
+            notaskstate = false,
+            nothrow)
     else
         if contains_is(_CONSISTENT_BUILTINS, f)
             consistent = ALWAYS_TRUE
-        elseif f === arrayref || f === arrayset || f === arraysize
+        elseif f === memoryrefnew || f === memoryrefoffset
+            consistent = ALWAYS_TRUE
+        elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
             consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
-        elseif f === Core._typevar
+        elseif f === Core._typevar || f === Core.memorynew
             consistent = CONSISTENT_IF_NOTRETURNED
         else
             consistent = ALWAYS_FALSE
         end
-        if f === setfield! || f === arrayset
+        if f === setfield! || f === memoryrefset!
             effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY
         elseif contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)
             effect_free = ALWAYS_TRUE
         else
             effect_free = ALWAYS_FALSE
         end
-        nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt)
+        nothrow = builtin_nothrow(𝕃, f, argtypes, rt)
         if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
             inaccessiblememonly = ALWAYS_TRUE
         elseif contains_is(_ARGMEM_BUILTINS, f)
@@ -2411,37 +2596,130 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argin
         else
             inaccessiblememonly = ALWAYS_FALSE
         end
-        return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
+        if f === memoryrefnew || f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
+            noub = memoryop_noub(f, argtypes) ? ALWAYS_TRUE : ALWAYS_FALSE
+        else
+            noub = ALWAYS_TRUE
+        end
+        return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly, noub)
     end
 end
 
+function memoryop_noub(@nospecialize(f), argtypes::Vector{Any})
+    nargs = length(argtypes)
+    nargs == 0 && return true # must throw and noub
+    lastargtype = argtypes[end]
+    isva = isvarargtype(lastargtype)
+    if f === memoryrefnew
+        if nargs == 1 && !isva
+            return true
+        elseif nargs == 2 && !isva
+            return true
+        end
+        expected_nargs = 3
+    elseif f === memoryrefget || f === memoryref_isassigned
+        expected_nargs = 3
+    else
+        @assert f === memoryrefset! "unexpected memoryop is given"
+        expected_nargs = 4
+    end
+    if nargs == expected_nargs && !isva
+        boundscheck = widenconditional(lastargtype)
+        hasintersect(widenconst(boundscheck), Bool) || return true # must throw and noub
+        boundscheck isa Const && boundscheck.val === true && return true
+    elseif nargs > expected_nargs + 1
+        return true # must throw and noub
+    elseif !isva
+        return true # must throw and noub
+    end
+    return false
+end
+
+function current_scope_tfunc(interp::AbstractInterpreter, sv::InferenceState)
+    pc = sv.currpc
+    handler_info = sv.handler_info
+    while true
+        pchandler = gethandler(sv, pc)
+        if pchandler === nothing
+            # No local scope available - inherited from the outside
+            return Any
+        end
+        # Remember that we looked at this handler, so we get re-scheduled
+        # if the scope information changes
+        isdefined(pchandler, :scope_uses) || (pchandler.scope_uses = Int[])
+        pcbb = block_for_inst(sv.cfg, pc)
+        if findfirst(==(pcbb), pchandler.scope_uses) === nothing
+            push!(pchandler.scope_uses, pcbb)
+        end
+        scope = pchandler.scopet
+        if scope !== nothing
+            # Found the scope - forward it
+            return scope
+        end
+        pc = pchandler.enter_idx
+    end
+end
+current_scope_tfunc(interp::AbstractInterpreter, sv) = Any
+
+hasvarargtype(argtypes::Vector{Any}) = !isempty(argtypes) && isvarargtype(argtypes[end])
+
+"""
+    builtin_nothrow(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt) -> Bool
+
+Compute throw-ness of a builtin function call. `argtypes` should not include `f` itself.
+"""
 function builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f), argtypes::Vector{Any}, @nospecialize(rt))
     rt === Bottom && return false
-    contains_is(_PURE_BUILTINS, f) && return true
+    if f === tuple || f === svec
+        return true
+    elseif hasvarargtype(argtypes)
+        return false
+    elseif contains_is(_PURE_BUILTINS, f)
+        return true
+    end
     return _builtin_nothrow(𝕃, f, argtypes, rt)
 end
 
 function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
                            sv::Union{AbsIntState, Nothing})
     𝕃ᵢ = typeinf_lattice(interp)
-    if f === tuple
-        return tuple_tfunc(𝕃ᵢ, argtypes)
-    end
     if isa(f, IntrinsicFunction)
         if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
             argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
             try
+                # unroll a few cases which have specialized codegen
+                if length(argvals) == 1
+                    return Const(f(argvals[1]))
+                elseif length(argvals) == 2
+                    return Const(f(argvals[1], argvals[2]))
+                elseif length(argvals) == 3
+                    return Const(f(argvals[1], argvals[2], argvals[3]))
+                end
                 return Const(f(argvals...))
-            catch
+            catch ex # expected ErrorException, TypeError, ConcurrencyViolationError, DivideError etc.
+                ex isa InterruptException && rethrow()
+                return Bottom
             end
         end
-        iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+        iidx = Int(reinterpret(Int32, f)) + 1
         if iidx < 0 || iidx > length(T_IFUNC)
-            # invalid intrinsic
+            # unknown intrinsic
             return Any
         end
         tf = T_IFUNC[iidx]
     else
+        if f === tuple
+            return tuple_tfunc(𝕃ᵢ, argtypes)
+        elseif f === Core.current_scope
+            if length(argtypes) != 0
+                if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                    return Bottom
+                end
+            end
+            return current_scope_tfunc(interp, sv)
+        elseif f === Core.apply_type
+            return apply_type_tfunc(𝕃ᵢ, argtypes; max_union_splitting=InferenceParams(interp).max_union_splitting)
+        end
         fidx = find_tfunc(f)
         if fidx === nothing
             # unknown/unhandled builtin function
@@ -2449,8 +2727,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         end
         tf = T_FFUNC_VAL[fidx]
     end
-    tf = tf::Tuple{Int, Int, Any}
-    if !isempty(argtypes) && isvarargtype(argtypes[end])
+    if hasvarargtype(argtypes)
         if length(argtypes) - 1 > tf[2]
             # definitely too many arguments
             return Bottom
@@ -2480,83 +2757,143 @@ _iszero(@nospecialize x) = x === Intrinsics.xor_int(x, x)
 _isneg1(@nospecialize x) = _iszero(Intrinsics.not_int(x))
 _istypemin(@nospecialize x) = !_iszero(x) && Intrinsics.neg_int(x) === x
 
-function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Vector{Any})
+function builtin_exct(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
+    if isa(f, IntrinsicFunction)
+        return intrinsic_exct(𝕃, f, argtypes)
+    end
+    return Any
+end
+
+function div_nothrow(f::IntrinsicFunction, @nospecialize(arg1), @nospecialize(arg2))
+    isa(arg2, Const) || return false
+    den_val = arg2.val
+    _iszero(den_val) && return false
+    f !== Intrinsics.checked_sdiv_int && return true
+    # Nothrow as long as we additionally don't do typemin(T)/-1
+    return !_isneg1(den_val) || (isa(arg1, Const) && !_istypemin(arg1.val))
+end
+
+function known_is_valid_intrinsic_elptr(𝕃::AbstractLattice, @nospecialize(ptr))
+    ptrT = typeof_tfunc(𝕃, ptr)
+    isa(ptrT, Const) || return false
+    return is_valid_intrinsic_elptr(ptrT.val)
+end
+
+function intrinsic_exct(𝕃::AbstractLattice, f::IntrinsicFunction, argtypes::Vector{Any})
+    if hasvarargtype(argtypes)
+        return Any
+    end
+
     # First check that we have the correct number of arguments
-    iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+    iidx = Int(reinterpret(Int32, f)) + 1
     if iidx < 1 || iidx > length(T_IFUNC)
-        # invalid intrinsic
-        return false
+        # invalid intrinsic (system will crash)
+        return Any
     end
     tf = T_IFUNC[iidx]
-    tf = tf::Tuple{Int, Int, Any}
     if !(tf[1] <= length(argtypes) <= tf[2])
         # wrong # of args
-        return false
+        return ArgumentError
     end
+
     # TODO: We could do better for cglobal
-    f === Intrinsics.cglobal && return false
+    f === Intrinsics.cglobal && return Any
     # TODO: We can't know for sure, but the user should have a way to assert
     # that it won't
-    f === Intrinsics.llvmcall && return false
-    if f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int || f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int
+    f === Intrinsics.llvmcall && return Any
+
+    if (f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int ||
+        f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int)
         # Nothrow as long as the second argument is guaranteed not to be zero
-        arg2 = argtypes[2]
-        isa(arg2, Const) || return false
         arg1 = argtypes[1]
+        arg2 = argtypes[2]
         warg1 = widenconst(arg1)
         warg2 = widenconst(arg2)
-        (warg1 === warg2 && isprimitivetype(warg1)) || return false
-        den_val = arg2.val
-        _iszero(den_val) && return false
-        f !== Intrinsics.checked_sdiv_int && return true
-        # Nothrow as long as we additionally don't do typemin(T)/-1
-        return !_isneg1(den_val) || (isa(arg1, Const) && !_istypemin(arg1.val))
+        if !(warg1 === warg2 && isprimitivetype(warg1))
+            return Union{TypeError, DivideError}
+        end
+        if !div_nothrow(f, arg1, arg2)
+            return DivideError
+        end
+        return Union{}
     end
+
     if f === Intrinsics.pointerref
         # Nothrow as long as the types are ok. N.B.: dereferencability is not
         # modeled here, but can cause errors (e.g. ReadOnlyMemoryError). We follow LLVM here
         # in that it is legal to remove unused non-volatile loads.
-        length(argtypes) == 3 || return false
-        return argtypes[1] ⊑ Ptr && argtypes[2] ⊑ Int && argtypes[3] ⊑ Int
+        if !(argtypes[1] ⊑ Ptr && argtypes[2] ⊑ Int && argtypes[3] ⊑ Int)
+            return Union{TypeError, ErrorException}
+        end
+        if !known_is_valid_intrinsic_elptr(𝕃, argtypes[1])
+            return ErrorException
+        end
+        return Union{}
     end
+
     if f === Intrinsics.pointerset
         eT = pointer_eltype(argtypes[1])
-        isprimitivetype(eT) || return false
-        return argtypes[2] ⊑ eT && argtypes[3] ⊑ Int && argtypes[4] ⊑ Int
-    end
-    if f === Intrinsics.arraylen
-        return argtypes[1] ⊑ Array
+        if !known_is_valid_intrinsic_elptr(𝕃, argtypes[1])
+            return Union{TypeError, ErrorException}
+        end
+        if !(argtypes[2] ⊑ eT && argtypes[3] ⊑ Int && argtypes[4] ⊑ Int)
+            return TypeError
+        end
+        return Union{}
     end
+
     if f === Intrinsics.bitcast
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
         xty = widenconst(argtypes[2])
-        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty)
+        if !isconcrete
+            return Union{ErrorException, TypeError}
+        end
+        if !(isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty))
+            return ErrorException
+        end
+        return Union{}
     end
+
     if f in (Intrinsics.sext_int, Intrinsics.zext_int, Intrinsics.trunc_int,
              Intrinsics.fptoui, Intrinsics.fptosi, Intrinsics.uitofp,
              Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
         # If !isconcrete, `ty` may be Union{} at runtime even if we have
         # isprimitivetype(ty).
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
+        if !isconcrete
+            return Union{ErrorException, TypeError}
+        end
         xty = widenconst(argtypes[2])
-        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty)
+        if !(isprimitivetype(ty) && isprimitivetype(xty))
+            return ErrorException
+        end
+        return Union{}
     end
+
     if f === Intrinsics.have_fma
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
-        return isconcrete && isprimitivetype(ty)
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
+        if !(isconcrete && isprimitivetype(ty))
+            return TypeError
+        end
+        return Union{}
     end
+
     # The remaining intrinsics are math/bits/comparison intrinsics. They work on all
     # primitive types of the same type.
     isshift = f === shl_int || f === lshr_int || f === ashr_int
     argtype1 = widenconst(argtypes[1])
-    isprimitivetype(argtype1) || return false
+    isprimitivetype(argtype1) || return ErrorException
     for i = 2:length(argtypes)
         argtype = widenconst(argtypes[i])
         if isshift ? !isprimitivetype(argtype) : argtype !== argtype1
-            return false
+            return ErrorException
         end
     end
-    return true
+    return Union{}
+end
+
+function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Vector{Any})
+    return intrinsic_exct(SimpleInferenceLattice.instance, f, argtypes) === Union{}
 end
 
 # whether `f` is pure for inference
@@ -2564,7 +2901,6 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
     return !(f === Intrinsics.pointerref || # this one is volatile
              f === Intrinsics.pointerset || # this one is never effect-free
              f === Intrinsics.llvmcall ||   # this one is never effect-free
-             f === Intrinsics.arraylen ||   # this one is volatile
              f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
              f === Intrinsics.have_fma ||  # this one depends on the runtime environment
              f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
@@ -2582,21 +2918,14 @@ function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
         # llvmcall can do arbitrary things
         return Effects()
     end
-
     if contains_is(_INCONSISTENT_INTRINSICS, f)
         consistent = ALWAYS_FALSE
-    elseif f === arraylen
-        consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
     else
         consistent = ALWAYS_TRUE
     end
     effect_free = !(f === Intrinsics.pointerset) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes)
-    if f === arraylen
-        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
-    else
-        inaccessiblememonly = ALWAYS_TRUE
-    end
+    nothrow = intrinsic_nothrow(f, argtypes)
+    inaccessiblememonly = ALWAYS_TRUE
     return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
 end
 
@@ -2604,34 +2933,40 @@ end
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
-    UNKNOWN = CallMeta(Type, EFFECTS_THROWS, NoCallInfo())
+    UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     tt = widenslotwrapper(argtypes[end])
     if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
     if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     if length(argtypes) == 3
         aft = widenslotwrapper(argtypes[2])
-        if !isa(aft, Const) && !(isType(aft) && !has_free_typevars(aft)) &&
-                !(isconcretetype(aft) && !(aft <: Builtin))
-            return UNKNOWN
-        end
         argtypes_vec = Any[aft, af_argtype.parameters...]
     else
         argtypes_vec = Any[af_argtype.parameters...]
+        isempty(argtypes_vec) && push!(argtypes_vec, Union{})
+        aft = argtypes_vec[1]
     end
+    if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
+            (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
+        return Future(UNKNOWN)
+    end
+
+    # effects are not an issue if we know this statement will get removed, but if it does not get removed,
+    # then this could be recursively re-entering inference (via concrete-eval), which will not terminate
+    RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
 
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), EFFECTS_TOTAL, NoCallInfo())
+        return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()))
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2640,82 +2975,68 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     if isa(sv, InferenceState)
         old_restrict = sv.restrict_abstract_call_sites
         sv.restrict_abstract_call_sites = false
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-        sv.restrict_abstract_call_sites = old_restrict
-    else
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-    end
-    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-    rt = widenslotwrapper(call.rt)
-    if isa(rt, Const)
-        # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), EFFECTS_TOTAL, info)
-    end
-    rt = widenconst(rt)
-    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-        # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-        # conservatively express uncertainty of this result
-        # in two ways: both as being a subtype of this, and
-        # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-    elseif (isa(tt, Const) || isconstType(tt)) &&
-        (isa(aft, Const) || isconstType(aft))
-        # input arguments were known for certain
-        # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-    elseif isType(rt)
-        return CallMeta(Type{rt}, EFFECTS_TOTAL, info)
-    else
-        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
+    end
+    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    tt = Core.Box(tt)
+    return Future{CallMeta}(call, interp, sv) do call, interp, sv
+        if isa(sv, InferenceState)
+            sv.restrict_abstract_call_sites = old_restrict
+        end
+        info = MethodResultPure(ReturnTypeCallInfo(call.info))
+        rt = widenslotwrapper(call.rt)
+        if isa(rt, Const)
+            # output was computed to be constant
+            return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
+        end
+        rt = widenconst(rt)
+        if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+            # output cannot be improved so it is known for certain
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+            # conservatively express uncertainty of this result
+            # in two ways: both as being a subtype of this, and
+            # because of LimitedAccuracy causes
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(tt.contents, Const) || isconstType(tt.contents)
+            # input arguments were known for certain
+            # XXX: this doesn't imply we know anything about rt
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isType(rt)
+            return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
+        else
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        end
     end
 end
 
 # a simplified model of abstract_call_gf_by_type for applicable
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
-    length(argtypes) < 2 && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, EFFECTS_UNKNOWN, NoCallInfo())
+    length(argtypes) < 2 && return Future(CallMeta(Bottom, ArgumentError, EFFECTS_THROWS, NoCallInfo()))
+    isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, ArgumentError, EFFECTS_THROWS, NoCallInfo()))
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
-    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
+    matches = find_method_matches(interp, argtypes, atype; max_methods)
+    info = NoCallInfo()
     if isa(matches, FailedMethodMatch)
         rt = Bool # too many matches to analyze
     else
         (; valid_worlds, applicable) = matches
         update_valid_age!(sv, valid_worlds)
-
-        # also need an edge to the method table in case something gets
-        # added that did not intersect with any existing method
-        if isa(matches, MethodMatches)
-            matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-        else
-            for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-                thisfullmatch || add_mt_backedge!(sv, mt, atype)
-            end
-        end
-
         napplicable = length(applicable)
         if napplicable == 0
             rt = Const(false) # never any matches
+        elseif !fully_covering(matches) || any_ambig(matches)
+            # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+            rt = Bool
         else
             rt = Const(true) # has applicable matches
-            for i in 1:napplicable
-                match = applicable[i]::MethodMatch
-                edge = specialize_method(match)::MethodInstance
-                add_backedge!(sv, edge)
-            end
-
-            if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-                    (!all(matches.fullmatches) || any_ambig(matches))
-                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-                rt = Bool
-            end
+        end
+        if rt !== Bool
+            info = VirtualMethodMatchInfo(matches.info)
         end
     end
-    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
+    return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, info))
 end
 add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
 
@@ -2724,41 +3045,41 @@ function _hasmethod_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, sv
     if length(argtypes) == 3 && !isvarargtype(argtypes[3])
         ft′ = argtype_by_index(argtypes, 2)
         ft = widenconst(ft′)
-        ft === Bottom && return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        ft === Bottom && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
         typeidx = 3
     elseif length(argtypes) == 2 && !isvarargtype(argtypes[2])
         typeidx = 2
     else
-        return CallMeta(Any, Effects(), NoCallInfo())
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
     end
-    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, typeidx))
-    isexact || return CallMeta(Bool, Effects(), NoCallInfo())
+    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, typeidx), false)
+    isexact || return CallMeta(Bool, Any, Effects(), NoCallInfo())
     unwrapped = unwrap_unionall(types)
     if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
     end
     if typeidx == 3
-        isdispatchelem(ft) || return CallMeta(Bool, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        isdispatchelem(ft) || return CallMeta(Bool, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
         types = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     end
     mt = ccall(:jl_method_table_for, Any, (Any,), types)
     if !isa(mt, MethodTable)
-        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
     end
-    match, valid_worlds, overlayed = findsup(types, method_table(interp))
+    match, valid_worlds = findsup(types, method_table(interp))
     update_valid_age!(sv, valid_worlds)
     if match === nothing
         rt = Const(false)
-        add_mt_backedge!(sv, mt, types) # this should actually be an invoke-type backedge
+        vresults = MethodLookupResult(Any[], valid_worlds, true)
+        vinfo = MethodMatchInfo(vresults, mt, types, false) # XXX: this should actually be an info with invoke-type edge
     else
         rt = Const(true)
-        edge = specialize_method(match)::MethodInstance
-        add_invoke_backedge!(sv, types, edge)
+        vinfo = InvokeCallInfo(nothing, match, nothing, types)
     end
-    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
+    info = VirtualMethodMatchInfo(vinfo)
+    return CallMeta(rt, Union{}, EFFECTS_TOTAL, info)
 end
 
-
 # N.B.: typename maps type equivalence classes to a single value
 function typename_static(@nospecialize(t))
     t isa Const && return _typename(t.val)
@@ -2767,92 +3088,42 @@ function typename_static(@nospecialize(t))
     return isType(t) ? _typename(t.parameters[1]) : Core.TypeName
 end
 
-function global_order_nothrow(@nospecialize(o), loading::Bool, storing::Bool)
-    o isa Const || return false
+function global_order_exct(@nospecialize(o), loading::Bool, storing::Bool)
+    if !(o isa Const)
+        if o === Symbol
+            return ConcurrencyViolationError
+        elseif !hasintersect(o, Symbol)
+            return TypeError
+        else
+            return Union{ConcurrencyViolationError, TypeError}
+        end
+    end
     sym = o.val
     if sym isa Symbol
         order = get_atomic_order(sym, loading, storing)
-        return order !== MEMORY_ORDER_INVALID && order !== MEMORY_ORDER_NOTATOMIC
-    end
-    return false
-end
-@nospecs function getglobal_nothrow(M, s, o)
-    global_order_nothrow(o, #=loading=#true, #=storing=#false) || return false
-    return getglobal_nothrow(M, s)
-end
-@nospecs function getglobal_nothrow(M, s)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return isdefined(M, s)
-        end
-    end
-    return false
-end
-@nospecs function getglobal_tfunc(𝕃::AbstractLattice, M, s, order=Symbol)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return abstract_eval_global(M, s)
-        end
-        return Bottom
-    elseif !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
-        return Bottom
-    end
-    return Any
-end
-@nospecs function setglobal!_tfunc(𝕃::AbstractLattice, M, s, v, order=Symbol)
-    if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
-        return Bottom
-    end
-    return v
-end
-add_tfunc(getglobal, 2, 3, getglobal_tfunc, 1)
-add_tfunc(setglobal!, 3, 4, setglobal!_tfunc, 3)
-@nospecs function setglobal!_nothrow(M, s, newty, o)
-    global_order_nothrow(o, #=loading=#false, #=storing=#true) || return false
-    return setglobal!_nothrow(M, s, newty)
-end
-@nospecs function setglobal!_nothrow(M, s, newty)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if isa(M, Module) && isa(s, Symbol)
-            return global_assignment_nothrow(M, s, newty)
-        end
-    end
-    return false
-end
-
-function global_assignment_nothrow(M::Module, s::Symbol, @nospecialize(newty))
-    if isdefined(M, s) && !isconst(M, s)
-        ty = ccall(:jl_get_binding_type, Any, (Any, Any), M, s)
-        return ty === nothing || newty ⊑ ty
-    end
-    return false
-end
-
-@nospecs function get_binding_type_effect_free(M, s)
-    if M isa Const && s isa Const
-        M, s = M.val, s.val
-        if M isa Module && s isa Symbol
-            return ccall(:jl_get_binding_type, Any, (Any, Any), M, s) !== nothing
+        if order !== MEMORY_ORDER_INVALID && order !== MEMORY_ORDER_NOTATOMIC
+            return Union{}
+        else
+            return ConcurrencyViolationError
         end
+    else
+        return TypeError
     end
-    return false
-end
-@nospecs function get_binding_type_tfunc(𝕃::AbstractLattice, M, s)
-    if get_binding_type_effect_free(M, s)
-        return Const(Core.get_binding_type((M::Const).val, (s::Const).val))
-    end
-    return Type
 end
-add_tfunc(Core.get_binding_type, 2, 2, get_binding_type_tfunc, 0)
 
 @nospecs function get_binding_type_nothrow(𝕃::AbstractLattice, M, s)
-    ⊑ = Core.Compiler.:⊑(𝕃)
+    ⊑ = partialorder(𝕃)
     return M ⊑ Module && s ⊑ Symbol
 end
 
+add_tfunc(getglobal, 2, 3, @nospecs((𝕃::AbstractLattice, args...)->Any), 1)
+add_tfunc(setglobal!, 3, 4, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(swapglobal!, 3, 4, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(modifyglobal!, 4, 5, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(replaceglobal!, 4, 6, @nospecs((𝕃::AbstractLattice, args...)->Any), 3)
+add_tfunc(setglobalonce!, 3, 5, @nospecs((𝕃::AbstractLattice, args...)->Bool), 3)
+add_tfunc(Core.get_binding_type, 2, 2, @nospecs((𝕃::AbstractLattice, args...)->Type), 0)
+
 # foreigncall
 # ===========
 
@@ -2861,94 +3132,31 @@ end
 
 const FOREIGNCALL_ARG_START = 6
 
-function foreigncall_effects(@specialize(abstract_eval), e::Expr)
-    args = e.args
-    name = args[1]
-    isa(name, QuoteNode) && (name = name.value)
-    isa(name, Symbol) || return EFFECTS_UNKNOWN
-    ndims = alloc_array_ndims(name)
-    if ndims !== nothing
-        if ndims ≠ 0
-            return alloc_array_effects(abstract_eval, args, ndims)
-        else
-            return new_array_effects(abstract_eval, args)
-        end
-    end
-    if is_array_resize(name)
-        return array_resize_effects()
-    end
+function foreigncall_effects(@nospecialize(abstract_eval), e::Expr)
+    # `:foreigncall` can potentially perform all sorts of operations, including calling
+    # overlay methods, but the `:foreigncall` itself is not dispatched, and there is no
+    # concern that the method calls that potentially occur within the `:foreigncall` will
+    # be executed using the wrong method table due to concrete evaluation, so using
+    # `EFFECTS_UNKNOWN` here and not tainting with `:nonoverlayed` is fine
     return EFFECTS_UNKNOWN
 end
 
-function is_array_resize(name::Symbol)
-    return name === :jl_array_grow_beg || name === :jl_array_grow_end ||
-           name === :jl_array_del_beg || name === :jl_array_del_end ||
-           name === :jl_array_grow_at || name === :jl_array_del_at
-end
-
-function array_resize_effects()
-    return Effects(EFFECTS_TOTAL;
-        effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY,
-        nothrow = false,
-        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY)
-end
-
-function alloc_array_ndims(name::Symbol)
-    if name === :jl_alloc_array_1d
-        return 1
-    elseif name === :jl_alloc_array_2d
-        return 2
-    elseif name === :jl_alloc_array_3d
-        return 3
-    elseif name === :jl_new_array
-        return 0
-    end
-    return nothing
-end
-
-function alloc_array_effects(@specialize(abstract_eval), args::Vector{Any}, ndims::Int)
-    nothrow = alloc_array_nothrow(abstract_eval, args, ndims)
-    return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow)
-end
-
-function alloc_array_nothrow(@specialize(abstract_eval), args::Vector{Any}, ndims::Int)
-    length(args) ≥ ndims+FOREIGNCALL_ARG_START || return false
-    atype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
-    dims = Csize_t[]
-    for i in 1:ndims
-        dim = abstract_eval(args[i+FOREIGNCALL_ARG_START])
-        isa(dim, Const) || return false
-        dimval = dim.val
-        isa(dimval, Int) || return false
-        push!(dims, reinterpret(Csize_t, dimval))
-    end
-    return _new_array_nothrow(atype, ndims, dims)
-end
-
-function new_array_effects(@specialize(abstract_eval), args::Vector{Any})
-    nothrow = new_array_nothrow(abstract_eval, args)
-    return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow)
-end
-
-function new_array_nothrow(@specialize(abstract_eval), args::Vector{Any})
-    length(args) ≥ FOREIGNCALL_ARG_START+1 || return false
-    atype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
-    dims = abstract_eval(args[FOREIGNCALL_ARG_START+1])
-    isa(dims, Const) || return dims === Tuple{}
-    dimsval = dims.val
-    isa(dimsval, Tuple{Vararg{Int}}) || return false
-    ndims = nfields(dimsval)
-    isa(ndims, Int) || return false
-    dims = Csize_t[reinterpret(Csize_t, dimval) for dimval in dimsval]
-    return _new_array_nothrow(atype, ndims, dims)
-end
-
-function _new_array_nothrow(@nospecialize(atype), ndims::Int, dims::Vector{Csize_t})
-    isa(atype, DataType) || return false
-    eltype = atype.parameters[1]
-    iskindtype(typeof(eltype)) || return false
-    elsz = aligned_sizeof(eltype)
-    return ccall(:jl_array_validate_dims, Cint,
-        (Ptr{Csize_t}, Ptr{Csize_t}, UInt32, Ptr{Csize_t}, Csize_t),
-        #=nel=#RefValue{Csize_t}(), #=tot=#RefValue{Csize_t}(), ndims, dims, elsz) == 0
+function new_genericmemory_nothrow(@nospecialize(abstract_eval), args::Vector{Any})
+    length(args) ≥ 1+FOREIGNCALL_ARG_START || return false
+    mtype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
+    isa(mtype, DataType) || return false
+    isdefined(mtype, :instance) || return false
+    elsz = Int(datatype_layoutsize(mtype))
+    arrayelem = datatype_arrayelem(mtype)
+    dim = abstract_eval(args[1+FOREIGNCALL_ARG_START])
+    isa(dim, Const) || return false
+    dimval = dim.val
+    isa(dimval, Int) || return false
+    0 < dimval < typemax(Int) || return false
+    tot, ovflw = Intrinsics.checked_smul_int(dimval, elsz)
+    ovflw && return false
+    isboxed = 1; isunion = 2
+    tot, ovflw = Intrinsics.checked_sadd_int(tot, arrayelem == isunion ? 1 + dimval : 1)
+    ovflw && return false
+    return true
 end
diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl
new file mode 100644
index 0000000000000..e3896870d82b8
--- /dev/null
+++ b/Compiler/src/typeinfer.jl
@@ -0,0 +1,1358 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
+can be used to measure the exclusive time spent inferring each method instance that is
+recursively inferred during type inference.
+
+This is meant to be internal to the compiler, and makes some specific assumptions about
+being used for this purpose alone.
+"""
+module Timings
+
+using ..Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
+    @inbounds, copy, backtrace
+
+# What we record for any given frame we infer during type inference.
+struct InferenceFrameInfo
+    mi::Core.MethodInstance
+    world::UInt64
+    sptypes::Vector{Compiler.VarState}
+    slottypes::Vector{Any}
+    nargs::Int
+end
+
+function _typeinf_identifier(frame::Compiler.InferenceState)
+    mi_info = InferenceFrameInfo(
+        frame.linfo,
+        frame_world(sv),
+        copy(frame.sptypes),
+        copy(frame.slottypes),
+        length(frame.result.argtypes),
+    )
+    return mi_info
+end
+
+_typeinf_identifier(frame::InferenceFrameInfo) = frame
+
+"""
+    Compiler.Timing(mi_info, start_time, ...)
+
+Internal type containing the timing result for running type inference on a single
+MethodInstance.
+"""
+struct Timing
+    mi_info::InferenceFrameInfo
+    start_time::UInt64
+    cur_start_time::UInt64
+    time::UInt64
+    children::Core.Array{Timing,1}
+    bt         # backtrace collected upon initial entry to typeinf
+end
+Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
+Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
+
+_time_ns() = ccall(:jl_hrtime, UInt64, ())
+
+# We keep a stack of the Timings for each of the MethodInstances currently being timed.
+# Since type inference currently operates via a depth-first search (during abstract
+# evaluation), this vector operates like a call stack. The last node in _timings is the
+# node currently being inferred, and its parent is directly before it, etc.
+# Each Timing also contains its own vector for all of its children, so that the tree
+# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
+# because we create a new node for duplicates.)
+const _timings = Timing[]
+# ROOT() is an empty function used as the top-level Timing node to measure all time spent
+# *not* in type inference during a given recording trace. It is used as a "dummy" node.
+function ROOT() end
+const ROOTmi = Compiler.specialize_method(
+    first(Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
+"""
+    Compiler.reset_timings()
+
+Empty out the previously recorded type inference timings (`Compiler._timings`), and
+start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
+"""
+function reset_timings() end
+push!(_timings, Timing(
+    # The MethodInstance for ROOT(), and default empty values for other fields.
+    InferenceFrameInfo(ROOTmi, 0x0, Compiler.VarState[], Any[Core.Const(ROOT)], 1),
+    _time_ns()))
+function close_current_timer() end
+function enter_new_timer(frame) end
+function exit_current_timer(_expected_frame_) end
+
+end  # module Timings
+
+"""
+    Compiler.__set_measure_typeinf(onoff::Bool)
+
+If set to `true`, record per-method-instance timings within type inference in the Compiler.
+"""
+__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
+const __measure_typeinf__ = RefValue{Bool}(false)
+
+function finish!(interp::AbstractInterpreter, caller::InferenceState)
+    result = caller.result
+    opt = result.src
+    if opt isa OptimizationState
+        src = ir_to_codeinf!(opt)
+        edges = src.edges::SimpleVector
+        caller.src = result.src = src
+    else
+        edges = Core.svec(caller.edges...)
+        caller.src.edges = edges
+    end
+    #@assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
+    if isdefined(result, :ci)
+        ci = result.ci
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        if last(result.valid_worlds) >= get_world_counter()
+            # TODO: this should probably come after all store_backedges (after optimizations) for the entire graph in finish_cycle
+            # since we should be requiring that all edges first get their backedges set, as a batch
+            result.valid_worlds = WorldRange(first(result.valid_worlds), typemax(UInt))
+        end
+        if last(result.valid_worlds) == typemax(UInt)
+            # if we can record all of the backedges in the global reverse-cache,
+            # we can now widen our applicability in the global cache too
+            store_backedges(ci, edges)
+        end
+        inferred_result = nothing
+        uncompressed = inferred_result
+        const_flag = is_result_constabi_eligible(result)
+        discard_src = caller.cache_mode === CACHE_MODE_NULL || const_flag
+        if !discard_src
+            inferred_result = transform_result_for_cache(interp, result)
+            # TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
+            if inferred_result isa CodeInfo
+                if may_compress(interp)
+                    nslots = length(inferred_result.slotflags)
+                    resize!(inferred_result.slottypes::Vector{Any}, nslots)
+                    resize!(inferred_result.slotnames, nslots)
+                end
+                di = inferred_result.debuginfo
+                uncompressed = inferred_result
+                inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result)
+                result.is_src_volatile = false
+            elseif ci.owner === nothing
+                # The global cache can only handle objects that codegen understands
+                inferred_result = nothing
+            end
+        end
+        if !@isdefined di
+            di = DebugInfo(result.linfo)
+        end
+        ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+            ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
+            result.analysis_results, di, edges)
+        engine_reject(interp, ci)
+        if !discard_src && isdefined(interp, :codegen) && uncompressed isa CodeInfo
+            # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
+            interp.codegen[ci] = uncompressed
+            if bootstrapping_compiler && inferred_result == nothing
+                # This is necessary to get decent bootstrapping performance
+                # when compiling the compiler to inject everything eagerly
+                # where codegen can start finding and using it right away
+                mi = result.linfo
+                if mi.def isa Method && isa_compileable_sig(mi)
+                    ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, uncompressed)
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstance, src::CodeInfo)
+    user_edges = src.edges
+    edges = user_edges isa SimpleVector ? user_edges : user_edges === nothing ? Core.svec() : Core.svec(user_edges...)
+    const_flag = false
+    di = src.debuginfo
+    rettype = Any
+    exctype = Any
+    rettype_const = nothing
+    const_flags = 0x0
+    ipo_effects = zero(UInt32)
+    min_world = src.min_world
+    max_world = src.max_world
+    if max_world >= get_world_counter()
+        max_world = typemax(UInt)
+    end
+    if max_world == typemax(UInt)
+        # if we can record all of the backedges in the global reverse-cache,
+        # we can now widen our applicability in the global cache too
+        store_backedges(ci, edges)
+    end
+    ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+        ci, rettype, exctype, nothing, const_flags, min_world, max_world, ipo_effects, nothing, di, edges)
+    ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+        ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, di, edges)
+    code_cache(interp)[mi] = ci
+    if isdefined(interp, :codegen)
+        interp.codegen[ci] = src
+    end
+    engine_reject(interp, ci)
+    return nothing
+end
+
+function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
+    finishinfer!(frame, frame.interp)
+    opt = frame.result.src
+    if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
+        optimize(frame.interp, opt, frame.result)
+    end
+    finish!(frame.interp, frame)
+    if frame.cycleid != 0
+        frames = frame.callstack::Vector{AbsIntState}
+        @assert frames[end] === frame
+        pop!(frames)
+    end
+    return nothing
+end
+
+function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int)
+    cycle_valid_worlds = WorldRange()
+    cycle_valid_effects = EFFECTS_TOTAL
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        @assert caller.cycleid == cycleid
+        # converge the world age range and effects for this cycle here:
+        # all frames in the cycle should have the same bits of `valid_worlds` and `effects`
+        # that are simply the intersection of each partial computation, without having
+        # dependencies on each other (unlike rt and exct)
+        cycle_valid_worlds = intersect(cycle_valid_worlds, caller.world.valid_worlds)
+        cycle_valid_effects = merge_effects(cycle_valid_effects, caller.ipo_effects)
+    end
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        adjust_cycle_frame!(caller, cycle_valid_worlds, cycle_valid_effects)
+        finishinfer!(caller, caller.interp)
+    end
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        opt = caller.result.src
+        if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
+            optimize(caller.interp, opt, caller.result)
+        end
+    end
+    for frameid = cycleid:length(frames)
+        caller = frames[frameid]::InferenceState
+        finish!(caller.interp, caller)
+    end
+    resize!(frames, cycleid - 1)
+    return nothing
+end
+
+function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange, cycle_valid_effects::Effects)
+    update_valid_age!(sv, cycle_valid_worlds)
+    sv.ipo_effects = cycle_valid_effects
+    # traverse the callees of this cycle that are tracked within `sv.cycle_backedges`
+    # and adjust their statements so that they are consistent with the new `cycle_valid_effects`
+    new_flags = flags_for_effects(cycle_valid_effects)
+    for (callee, pc) in sv.cycle_backedges
+        old_currpc = callee.currpc
+        callee.currpc = pc
+        set_curr_ssaflag!(callee, new_flags, IR_FLAGS_EFFECTS)
+        callee.currpc = old_currpc
+    end
+    return nothing
+end
+
+function is_result_constabi_eligible(result::InferenceResult)
+    result_type = result.result
+    return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
+end
+
+transform_result_for_cache(::AbstractInterpreter, result::InferenceResult) = result.src
+
+function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo)
+    def = mi.def
+    isa(def, Method) || return ci # don't compress toplevel code
+    can_discard_trees = may_discard_trees(interp)
+    cache_the_tree = !can_discard_trees || is_inlineable(ci)
+    if cache_the_tree
+        if may_compress(interp)
+            return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
+        else
+            return ci
+        end
+    else
+        return nothing
+    end
+end
+
+function cache_result!(interp::AbstractInterpreter, result::InferenceResult, ci::CodeInstance)
+    @assert isdefined(ci, :inferred)
+    # check if the existing linfo metadata is also sufficient to describe the current inference result
+    # to decide if it is worth caching this right now
+    mi = result.linfo
+    cache = WorldView(code_cache(interp), result.valid_worlds)
+    if haskey(cache, mi)
+        ci = cache[mi]
+        # n.b.: accurate edge representation might cause the CodeInstance for this to be constructed later
+        @assert isdefined(ci, :inferred)
+        return false
+    end
+    code_cache(interp)[mi] = ci
+    return true
+end
+
+function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
+    if typ isa LimitedAccuracy
+        if sv.parentid === 0
+            # we might have introduced a limit marker, but we should know it must be sv and other callers_in_cycle
+            #@assert !isempty(callers_in_cycle(sv))
+            #  FIXME: this assert fails, appearing to indicate there is a bug in filtering this list earlier.
+            #  In particular (during doctests for example), during inference of
+            #  show(Base.IOContext{Base.GenericIOBuffer{Memory{UInt8}}}, Base.Multimedia.MIME{:var"text/plain"}, LinearAlgebra.BunchKaufman{Float64, Array{Float64, 2}, Array{Int64, 1}})
+            #  we observed one of the ssavaluetypes here to be Core.Compiler.LimitedAccuracy(typ=Any, causes=Core.Compiler.IdSet(getproperty(LinearAlgebra.BunchKaufman{Float64, Array{Float64, 2}, Array{Int64, 1}}, Symbol)))
+            return typ.typ
+        end
+        causes = copy(typ.causes)
+        delete!(causes, sv)
+        for caller in callers_in_cycle(sv)
+            delete!(causes, caller)
+        end
+        if isempty(causes)
+            return typ.typ
+        end
+        if length(causes) != length(typ.causes)
+            return LimitedAccuracy(typ.typ, causes)
+        end
+    end
+    return typ
+end
+
+function adjust_effects(ipo_effects::Effects, def::Method)
+    # override the analyzed effects using manually annotated effect settings
+    override = decode_effects_override(def.purity)
+    if is_effect_overridden(override, :consistent)
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :effect_free)
+        ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :nothrow)
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_effect_overridden(override, :terminates_globally)
+        ipo_effects = Effects(ipo_effects; terminates=true)
+    end
+    if is_effect_overridden(override, :notaskstate)
+        ipo_effects = Effects(ipo_effects; notaskstate=true)
+    end
+    if is_effect_overridden(override, :inaccessiblememonly)
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :noub)
+        ipo_effects = Effects(ipo_effects; noub=ALWAYS_TRUE)
+    elseif is_effect_overridden(override, :noub_if_noinbounds) && ipo_effects.noub !== ALWAYS_TRUE
+        ipo_effects = Effects(ipo_effects; noub=NOUB_IF_NOINBOUNDS)
+    end
+    if is_effect_overridden(override, :consistent_overlay)
+        ipo_effects = Effects(ipo_effects; nonoverlayed=CONSISTENT_OVERLAY)
+    end
+    if is_effect_overridden(override, :nortcall)
+        ipo_effects = Effects(ipo_effects; nortcall=true)
+    end
+    return ipo_effects
+end
+
+function adjust_effects(sv::InferenceState)
+    ipo_effects = sv.ipo_effects
+
+    # refine :consistent-cy effect using the return type information
+    # TODO this adjustment tries to compromise imprecise :consistent-cy information,
+    # that is currently modeled in a flow-insensitive way: ideally we want to model it
+    # with a proper dataflow analysis instead
+    rt = sv.bestguess
+    if rt === Bottom
+        # always throwing an error counts or never returning both count as consistent
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+    if sv.exc_bestguess === Bottom
+        # if the exception type of this frame is known to be `Bottom`,
+        # this frame is known to be safe
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
+            return is_mutation_free_argtype(sv.slottypes[i])
+        end
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
+        # in a case when the :consistent-cy here is only tainted by mutable allocations
+        # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
+        # type guarantees that the allocations are never returned
+        consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
+        ipo_effects = Effects(ipo_effects; consistent)
+    end
+    if is_consistent_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; consistent)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
+        end
+    end
+    if is_effect_free_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; effect_free)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
+        end
+    end
+
+    # override the analyzed effects using manually annotated effect settings
+    def = sv.linfo.def
+    if isa(def, Method)
+        ipo_effects = adjust_effects(ipo_effects, def)
+    end
+
+    return ipo_effects
+end
+
+function refine_exception_type(@nospecialize(exc_bestguess), ipo_effects::Effects)
+    ipo_effects.nothrow && return Bottom
+    return exc_bestguess
+end
+
+const empty_edges = Core.svec()
+
+# inference completed on `me`
+# update the MethodInstance
+function finishinfer!(me::InferenceState, interp::AbstractInterpreter)
+    # prepare to run optimization passes on fulltree
+    @assert isempty(me.ip)
+    # inspect whether our inference had a limited result accuracy,
+    # else it may be suitable to cache
+    bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me)
+    exc_bestguess = me.exc_bestguess = cycle_fix_limited(me.exc_bestguess, me)
+    limited_ret = bestguess isa LimitedAccuracy || exc_bestguess isa LimitedAccuracy
+    limited_src = false
+    if !limited_ret
+        gt = me.ssavaluetypes
+        for j = 1:length(gt)
+            gt[j] = gtj = cycle_fix_limited(gt[j], me)
+            if gtj isa LimitedAccuracy && me.parentid != 0
+                limited_src = true
+                break
+            end
+        end
+    end
+    result = me.result
+    result.valid_worlds = me.world.valid_worlds
+    result.result = bestguess
+    ipo_effects = result.ipo_effects = me.ipo_effects = adjust_effects(me)
+    result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects)
+    me.src.rettype = widenconst(ignorelimited(bestguess))
+    me.src.ssaflags = me.ssaflags
+    me.src.min_world = first(me.world.valid_worlds)
+    me.src.max_world = last(me.world.valid_worlds)
+    istoplevel = !(me.linfo.def isa Method)
+    istoplevel || compute_edges!(me) # don't add backedges to toplevel method instance
+
+    if limited_ret
+        # a parent may be cached still, but not this intermediate work:
+        # we can throw everything else away now
+        result.src = nothing
+        me.cache_mode = CACHE_MODE_NULL
+        set_inlineable!(me.src, false)
+    elseif limited_src
+        # a type result will be cached still, but not this intermediate work:
+        # we can throw everything else away now
+        result.src = nothing
+        set_inlineable!(me.src, false)
+    else
+        # annotate fulltree with type information,
+        # either because we are the outermost code, or we might use this later
+        type_annotate!(interp, me)
+        mayopt = may_optimize(interp)
+        doopt = mayopt &&
+                # disable optimization if we don't use this later (because it is not cached)
+                me.cache_mode != CACHE_MODE_NULL &&
+                # disable optimization if we've already obtained very accurate result
+                !result_is_constabi(interp, result)
+        if doopt
+            result.src = OptimizationState(me, interp)
+        else
+            result.src = me.src # for reflection etc.
+        end
+    end
+
+    maybe_validate_code(me.linfo, me.src, "inferred")
+
+    # finish populating inference results into the CodeInstance if possible, and maybe cache that globally for use elsewhere
+    if isdefined(result, :ci)
+        result_type = result.result
+        result_type isa LimitedAccuracy && (result_type = result_type.typ)
+        @assert !(result_type === nothing)
+        if isa(result_type, Const)
+            rettype_const = result_type.val
+            const_flags = is_result_constabi_eligible(result) ? 0x3 : 0x2
+        elseif isa(result_type, PartialOpaque)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isconstType(result_type)
+            rettype_const = result_type.parameters[1]
+            const_flags = 0x2
+        elseif isa(result_type, PartialStruct)
+            rettype_const = result_type.fields
+            const_flags = 0x2
+        elseif isa(result_type, InterConditional)
+            rettype_const = result_type
+            const_flags = 0x2
+        elseif isa(result_type, InterMustAlias)
+            rettype_const = result_type
+            const_flags = 0x2
+        else
+            rettype_const = nothing
+            const_flags = 0x0
+        end
+        di = nothing
+        edges = empty_edges # `edges` will be updated within `finish!`
+        ci = result.ci
+        ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+            ci, widenconst(result_type), widenconst(result.exc_result), rettype_const, const_flags,
+            first(result.valid_worlds), last(result.valid_worlds),
+            encode_effects(result.ipo_effects), result.analysis_results, di, edges)
+        if is_cached(me) # CACHE_MODE_GLOBAL
+            cached_result = cache_result!(me.interp, result, ci)
+            if !cached_result
+                me.cache_mode = CACHE_MODE_VOLATILE
+            end
+        end
+    end
+    nothing
+end
+
+# record the backedges
+function store_backedges(caller::CodeInstance, edges::SimpleVector)
+    isa(caller.def.def, Method) || return # don't add backedges to toplevel method instance
+    i = 1
+    while true
+        i > length(edges) && return nothing
+        item = edges[i]
+        if item isa Int
+            i += 2
+            continue # ignore the query information if present but process the contents
+        elseif isa(item, Method)
+            # ignore `Method`-edges (from e.g. failed `abstract_call_method`)
+            i += 1
+            continue
+        elseif isa(item, Core.BindingPartition)
+            i += 1
+            continue
+        end
+        if isa(item, CodeInstance)
+            item = item.def
+        end
+        if isa(item, MethodInstance) # regular dispatch
+            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), item, nothing, caller)
+            i += 1
+        else
+            callee = edges[i+1]
+            if isa(callee, MethodTable) # abstract dispatch (legacy style edges)
+                ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, item, caller)
+                i += 2
+                continue
+            elseif isa(callee, Method)
+                # ignore `Method`-edges (from e.g. failed `abstract_call_method`)
+                i += 2
+                continue
+            # `invoke` edge
+            elseif isa(callee, CodeInstance)
+                callee = get_ci_mi(callee)
+            end
+            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, item, caller)
+            i += 2
+        end
+    end
+    nothing
+end
+
+function compute_edges!(sv::InferenceState)
+    edges = sv.edges
+    for i in 1:length(sv.stmt_info)
+        add_edges!(edges, sv.stmt_info[i])
+    end
+    user_edges = sv.src.edges
+    if user_edges !== nothing && user_edges !== empty_edges
+        append!(edges, user_edges)
+    end
+    nothing
+end
+
+function record_slot_assign!(sv::InferenceState)
+    # look at all assignments to slots
+    # and union the set of types stored there
+    # to compute a lower bound on the storage required
+    body = sv.src.code::Vector{Any}
+    slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.ssavaluetypes
+    for i = 1:length(body)
+        expr = body[i]
+        # find all reachable assignments to locals
+        if was_reached(sv, i) && isexpr(expr, :(=))
+            lhs = expr.args[1]
+            if isa(lhs, SlotNumber)
+                typ = ssavaluetypes[i]
+                @assert typ !== NOT_FOUND "active slot in unreached region"
+                vt = widenconst(typ)
+                if vt !== Bottom
+                    id = slot_id(lhs)
+                    otherTy = slottypes[id]
+                    if otherTy === Bottom
+                        slottypes[id] = vt
+                    elseif otherTy === Any
+                        slottypes[id] = Any
+                    else
+                        slottypes[id] = tmerge(otherTy, vt)
+                    end
+                end
+            end
+        end
+    end
+    sv.src.slottypes = slottypes
+    return nothing
+end
+
+# find the dominating assignment to the slot `id` in the block containing statement `idx`,
+# returns `nothing` otherwise
+function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
+    block = block_for_inst(sv.cfg, idx)
+    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
+        pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
+        stmt = sv.src.code[pc]
+        isexpr(stmt, :(=)) || continue
+        lhs = stmt.args[1]
+        isa(lhs, SlotNumber) || continue
+        slot_id(lhs) == id || continue
+        return pc
+    end
+    return nothing
+end
+
+# annotate types of all symbols in AST, preparing for optimization
+function type_annotate!(interp::AbstractInterpreter, sv::InferenceState)
+    # widen `Conditional`s from `slottypes`
+    slottypes = sv.slottypes
+    for i = 1:length(slottypes)
+        slottypes[i] = widenconditional(slottypes[i])
+    end
+
+    # compute the required type for each slot
+    # to hold all of the items assigned into it
+    record_slot_assign!(sv)
+
+    # annotate variables load types
+    src = sv.src
+    stmts = src.code
+    nstmt = length(stmts)
+    ssavaluetypes = sv.ssavaluetypes
+    nslots = length(src.slotflags)
+
+    # widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
+    # and mark any unreachable statements by wrapping them in Const(...), to distinguish them from
+    # must-throw statements which also have type Bottom
+    for i = 1:nstmt
+        expr = stmts[i]
+        if was_reached(sv, i)
+            ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 3
+        else # i.e. any runtime execution will never reach this statement
+            push!(sv.unreachable, i)
+            if is_meta_expr(expr) # keep any lexically scoped expressions
+                ssavaluetypes[i] = Any # 3
+            else
+                ssavaluetypes[i] = Bottom # 3
+                # annotate that this statement actually is dead
+                stmts[i] = Const(expr)
+            end
+        end
+    end
+
+    # widen slot wrappers (`Conditional` and `MustAlias`) in `bb_vartables`
+    for varstate in sv.bb_vartables
+        if varstate !== nothing
+            for slot in 1:nslots
+                vt = varstate[slot]
+                widened_type = widenslotwrapper(ignorelimited(vt.typ))
+                varstate[slot] = VarState(widened_type, vt.undef)
+            end
+        end
+    end
+
+    return nothing
+end
+
+function merge_call_chain!(::AbstractInterpreter, parent::InferenceState, child::InferenceState)
+    # add backedge of parent <- child
+    # then add all backedges of parent <- parent.parent
+    frames = parent.callstack::Vector{AbsIntState}
+    @assert child.callstack === frames
+    ancestorid = child.cycleid
+    while true
+        add_cycle_backedge!(parent, child)
+        parent.cycleid === ancestorid && break
+        child = parent
+        parent = frame_parent(child)::InferenceState
+    end
+    # ensure that walking the callstack has the same cycleid (DAG)
+    for frameid = reverse(ancestorid:length(frames))
+        frame = frames[frameid]::InferenceState
+        frame.cycleid == ancestorid && break
+        @assert frame.cycleid > ancestorid
+        frame.cycleid = ancestorid
+    end
+end
+
+function add_cycle_backedge!(caller::InferenceState, frame::InferenceState)
+    update_valid_age!(caller, frame.world.valid_worlds)
+    backedge = (caller, caller.currpc)
+    contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
+    return frame
+end
+
+function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
+    return mi === frame_instance(frame) && cache_owner(interp) === cache_owner(frame.interp)
+end
+
+function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
+    push!(infstate.pclimitations, topmost)
+    nothing
+end
+
+# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
+# frame matching `mi` is encountered, then there is a cycle in the call graph
+# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
+# we "resolve" it by merging the call chain, which entails updating each intermediary
+# frame's `cycleid` field and adding the appropriate backedges. Finally,
+# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
+# returned instead.
+function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
+    # TODO (#48913) implement a proper recursion handling for irinterp:
+    # This works currently just because the irinterp code doesn't get used much with
+    # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution.
+    parent isa InferenceState || return false
+    frames = parent.callstack::Vector{AbsIntState}
+    uncached = false
+    for frameid = reverse(1:length(frames))
+        frame = frames[frameid]
+        isa(frame, InferenceState) || break
+        uncached |= !is_cached(frame) # ensure we never add a (globally) uncached frame to a cycle
+        if is_same_frame(interp, mi, frame)
+            if uncached
+                # our attempt to speculate into a constant call lead to an undesired self-cycle
+                # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
+                # with the limited flag and abort (set return type to Any) now
+                poison_callstack!(parent, frame)
+                return true
+            end
+            merge_call_chain!(interp, parent, frame)
+            return frame
+        end
+    end
+    return false
+end
+
+ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
+
+# return cached result of regular inference
+function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
+    rt = cached_return_type(codeinst)
+    exct = codeinst.exctype
+    effects = ipo_effects(codeinst)
+    edge = codeinst
+    update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
+    return Future(MethodCallResult(interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
+end
+
+function MethodCallResult(::AbstractInterpreter, sv::AbsIntState, method::Method,
+                          @nospecialize(rt), @nospecialize(exct), effects::Effects,
+                          edge::Union{Nothing,CodeInstance}, edgecycle::Bool, edgelimited::Bool,
+                          volatile_inf_result::Union{Nothing,VolatileInferenceResult}=nothing)
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        effects = Effects(effects, terminates=true)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        effects = Effects(effects; terminates=true)
+    elseif edgecycle
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
+    end
+
+    return MethodCallResult(rt, exct, effects, edge, edgecycle, edgelimited, volatile_inf_result)
+end
+
+# allocate a dummy `edge::CodeInstance` to be added by `add_edges!`, reusing an existing_edge if possible
+# TODO: fill this in fully correctly (currently IPO info such as effects and return types are lost)
+function codeinst_as_edge(interp::AbstractInterpreter, sv::InferenceState, @nospecialize existing_edge)
+    mi = sv.linfo
+    min_world, max_world = first(sv.world.valid_worlds), last(sv.world.valid_worlds)
+    if max_world >= get_world_counter()
+        max_world = typemax(UInt)
+    end
+    edges = Core.svec(sv.edges...)
+    if existing_edge isa CodeInstance
+        # return an existing_edge, if the existing edge has more restrictions already (more edges and narrower worlds)
+        if existing_edge.min_world >= min_world &&
+           existing_edge.max_world <= max_world &&
+           existing_edge.edges == edges
+            return existing_edge
+        end
+    end
+    ci = CodeInstance(mi, cache_owner(interp), Any, Any, nothing, nothing, zero(Int32),
+        min_world, max_world, zero(UInt32), nothing, nothing, edges)
+    if max_world == typemax(UInt)
+        # if we can record all of the backedges in the global reverse-cache,
+        # we can now widen our applicability in the global cache too
+        # TODO: this should probably come after we decide this edge is even useful
+        store_backedges(ci, edges)
+    end
+    return ci
+end
+
+# compute (and cache) an inferred AST and return the current best estimate of the result type
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
+    mi = specialize_method(method, atype, sparams)
+    cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
+    force_inline = is_stmt_inline(get_curr_ssaflag(caller))
+    edge_ci = nothing
+    # check cache with SOURCE_MODE_NOT_REQUIRED source_mode
+    let codeinst = get(code_cache(interp), mi, nothing)
+        if codeinst isa CodeInstance # return existing rettype if the code is already inferred
+            inferred = @atomic :monotonic codeinst.inferred
+            if inferred === nothing && force_inline
+                # we already inferred this edge before and decided to discard the inferred code,
+                # nevertheless we re-infer it here again in order to propagate the re-inferred
+                # source to the inliner as a volatile result
+                cache_mode = CACHE_MODE_VOLATILE
+                edge_ci = codeinst
+            else
+                @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
+            end
+        end
+    end
+    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
+        add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
+        return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+    end
+    if !is_cached(caller) && frame_parent(caller) === nothing
+        # this caller exists to return to the user
+        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
+        frame = false
+    else
+        frame = resolve_call_cycle!(interp, mi, caller)
+    end
+    if frame === false
+        # completely new, but check again after reserving in the engine
+        if cache_mode == CACHE_MODE_GLOBAL
+            ci_from_engine = engine_reserve(interp, mi)
+            edge_ci = ci_from_engine
+            codeinst = get(code_cache(interp), mi, nothing)
+            if codeinst isa CodeInstance # return existing rettype if the code is already inferred
+                engine_reject(interp, ci_from_engine)
+                ci_from_engine = nothing
+                inferred = @atomic :monotonic codeinst.inferred
+                if inferred === nothing && force_inline
+                    cache_mode = CACHE_MODE_VOLATILE
+                    edge_ci = codeinst
+                else
+                    @assert codeinst.def === mi "MethodInstance for cached edge does not match"
+                    return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
+                end
+            end
+        else
+            ci_from_engine = nothing
+        end
+        result = InferenceResult(mi, typeinf_lattice(interp))
+        if ci_from_engine !== nothing
+            result.ci = ci_from_engine
+        end
+        frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
+        if frame === nothing
+            add_remark!(interp, caller, "[typeinf_edge] Failed to retrieve source")
+            # can't get the source for this, so we know nothing
+            if ci_from_engine !== nothing
+                engine_reject(interp, ci_from_engine)
+            end
+            return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+        end
+        assign_parentchild!(frame, caller)
+        # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue
+        # while splitting off the rest of the work for this caller into a separate workq thunk
+        let mresult = Future{MethodCallResult}()
+            push!(caller.tasks, function get_infer_result(interp, caller)
+                update_valid_age!(caller, frame.world.valid_worlds)
+                local isinferred = is_inferred(frame)
+                local edge = isinferred ? edge_ci : nothing
+                local effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
+                    adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+                local bestguess = frame.bestguess
+                local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+                # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+                # note that this result is cached globally exclusively, so we can use this local result destructively
+                local volatile_inf_result = if isinferred && edge_ci isa CodeInstance
+                    result.ci_as_edge = edge_ci # set the edge for the inliner usage
+                    VolatileInferenceResult(result)
+                end
+                mresult[] = MethodCallResult(interp, caller, method, bestguess, exc_bestguess, effects,
+                    edge, edgecycle, edgelimited, volatile_inf_result)
+                return true
+            end)
+            return mresult
+        end
+    elseif frame === true
+        # unresolvable cycle
+        add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
+        return Future(MethodCallResult(interp, caller, method, Any, Any, Effects(), nothing, edgecycle, edgelimited))
+    end
+    # return the current knowledge about this cycle
+    frame = frame::InferenceState
+    update_valid_age!(caller, frame.world.valid_worlds)
+    effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+    bestguess = frame.bestguess
+    exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+    return Future(MethodCallResult(interp, caller, method, bestguess, exc_bestguess, effects, nothing, edgecycle, edgelimited))
+end
+
+# The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
+# been fully resolved. As for other effects, there's no need to taint them at this moment
+# because they will be tainted as we try to resolve the cycle.
+effects_for_cycle(effects::Effects) = Effects(effects; terminates=false)
+
+function cached_return_type(code::CodeInstance)
+    rettype = code.rettype
+    isdefined(code, :rettype_const) || return rettype
+    rettype_const = code.rettype_const
+    # the second subtyping/egal conditions are necessary to distinguish usual cases
+    # from rare cases when `Const` wrapped those extended lattice type objects
+    if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
+        return PartialStruct(fallback_lattice, rettype, rettype_const)
+    elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
+        return rettype_const
+    elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
+        return rettype_const
+    elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
+        return rettype_const
+    else
+        return Const(rettype_const)
+    end
+end
+
+#### entry points for inferring a MethodInstance given a type signature ####
+
+"""
+    codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, worlds::WorldRange, @nospecialize(val))
+
+Return a fake CodeInfo that just contains `return \$val`. This function is used in various reflection APIs when asking
+for the code of a function that inference has found to just return a constant. For such functions, no code is actually
+stored - the constant is used directly. However, because this is an ABI implementation detail, it is nice to maintain
+consistency and just synthesize a CodeInfo when the reflection APIs ask for them - this function does that.
+"""
+function codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, @nospecialize(val))
+    method = mi.def::Method
+    tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    tree.code = Any[ ReturnNode(quoted(val)) ]
+    nargs = Int(method.nargs)
+    tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
+    tree.slotflags = fill(0x00, nargs)
+    tree.ssavaluetypes = 1
+    tree.debuginfo = DebugInfo(mi)
+    tree.ssaflags = [IR_FLAG_NULL]
+    tree.rettype = Core.Typeof(val)
+    tree.edges = Core.svec()
+    set_inlineable!(tree, true)
+    tree.parent = mi
+    return tree
+end
+
+result_is_constabi(interp::AbstractInterpreter, result::InferenceResult) =
+    may_discard_trees(interp) && is_result_constabi_eligible(result)
+
+# compute an inferred AST and return type
+typeinf_code(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(match), run_optimizer)
+typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+             run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_code(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    frame = typeinf_frame(interp, mi, run_optimizer)
+    frame === nothing && return nothing
+    return frame.src
+end
+
+"""
+    typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter,
+                   method::Method, atype, sparams::SimpleVector,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                   optimize_until::Union{Int,String,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+
+Infer a `method` and return an `IRCode` with inferred `returntype` on success.
+"""
+typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+               optimize_until::Union{Int,String,Nothing}) =
+    typeinf_ircode(interp, specialize_method(match), optimize_until)
+typeinf_ircode(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+               optimize_until::Union{Int,String,Nothing}) =
+    typeinf_ircode(interp, specialize_method(method, atype, sparams), optimize_until)
+function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                        optimize_until::Union{Int,String,Nothing})
+    frame = typeinf_frame(interp, mi, false)
+    if frame === nothing
+        return nothing, Any
+    end
+    (; result) = frame
+    opt = OptimizationState(frame, interp)
+    ir = run_passes_ipo_safe(opt.src, opt, optimize_until)
+    rt = widenconst(ignorelimited(result.result))
+    return ir, rt
+end
+
+# compute an inferred frame
+typeinf_frame(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(match), run_optimizer)
+typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+              run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    frame = InferenceState(result, #=cache_mode=#:no, interp)
+    frame === nothing && return nothing
+    typeinf(interp, frame)
+    is_inferred(frame) || return nothing
+    if run_optimizer
+        if result_is_constabi(interp, frame.result)
+            rt = frame.result.result::Const
+            src = codeinfo_for_const(interp, frame.linfo, rt.val)
+        else
+            opt = OptimizationState(frame, interp)
+            optimize(interp, opt, frame.result)
+            src = ir_to_codeinf!(opt)
+        end
+        result.src = frame.src = src
+    end
+    return frame
+end
+
+# N.B.: These need to be aligned with the C side headers
+"""
+    SOURCE_MODE_NOT_REQUIRED
+
+Indicates to inference that the source is not required and the only fields
+of the resulting `CodeInstance` that the caller is interested in are types
+and effects. Inference is still free to create a CodeInstance with source,
+but is not required to do so.
+"""
+const SOURCE_MODE_NOT_REQUIRED = 0x0
+
+"""
+    SOURCE_MODE_ABI
+
+Indicates to inference that it should return a CodeInstance that can
+either be `->invoke`'d (because it has already been compiled or because
+it has constabi) or one that can be made so by compiling its `->inferred`
+field.
+
+N.B.: The `->inferred` field is volatile and the compiler may delete it.
+"""
+const SOURCE_MODE_ABI = 0x1
+
+"""
+    ci_has_abi(code::CodeInstance)
+
+Determine whether this CodeInstance is something that could be invoked if we gave it
+to the runtime system (either because it already has an ->invoke ptr, or
+because it has source that could be compiled). Note that this information may
+be stale by the time the user see it, so the user will need to perform their
+own checks if they actually need the abi from it.
+"""
+function ci_has_abi(code::CodeInstance)
+    (@atomic :acquire code.invoke) !== C_NULL && return true
+    inf = @atomic :monotonic code.inferred
+    if code.owner === nothing ? (isa(inf, CodeInfo) || isa(inf, String)) : inf !== nothing
+        # interp.codegen[code] = maybe_uncompress(code, inf) # TODO: the correct way to ensure this information doesn't become stale would be to push it into the stable codegen cache
+        return true
+    end
+    return false
+end
+
+function ci_has_invoke(code::CodeInstance)
+    return (@atomic :monotonic code.invoke) !== C_NULL
+end
+
+function ci_meets_requirement(code::CodeInstance, source_mode::UInt8)
+    source_mode == SOURCE_MODE_NOT_REQUIRED && return true
+    source_mode == SOURCE_MODE_ABI && return ci_has_abi(code)
+    return false
+end
+
+# compute (and cache) an inferred AST and return type
+function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if ci_meets_requirement(code, source_mode)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+        end
+    end
+    def = mi.def
+    ci = engine_reserve(interp, mi)
+    # check cache again if it is still new after reserving in the engine
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this code already exists in the cache
+            if ci_meets_requirement(code, source_mode)
+                engine_reject(interp, ci)
+                ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+                return code
+            end
+        end
+    end
+    if isa(def, Method) && ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0
+        src = retrieve_code_info(mi, get_inference_world(interp))
+        if src isa CodeInfo
+            finish!(interp, mi, ci, src)
+        else
+            engine_reject(interp, ci)
+        end
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+        return ci
+    end
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    result.ci = ci
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
+    if frame === nothing
+        engine_reject(interp, ci)
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+        return nothing
+    end
+    typeinf(interp, frame)
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+
+    ci = result.ci # reload from result in case it changed
+    @assert frame.cache_mode != CACHE_MODE_NULL
+    @assert is_result_constabi_eligible(result) || (!isdefined(interp, :codegen) || haskey(interp.codegen, ci))
+    @assert is_result_constabi_eligible(result) == use_const_api(ci)
+    @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations"
+    if !is_cached(frame) && source_mode == SOURCE_MODE_ABI
+        # XXX: jl_type_infer somewhat ambiguously assumes this must be cached
+        # XXX: this should be using the CI from the cache, if possible instead: haskey(cache, mi) && (ci = cache[mi])
+        code_cache(interp)[mi] = ci
+    end
+    return ci
+end
+
+# compute (and cache) an inferred AST and return the inferred return type
+function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
+    if contains_is(unwrap_unionall(atype).parameters, Union{})
+        return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
+    end
+    return typeinf_type(interp, specialize_method(method, atype, sparams))
+end
+typeinf_type(interp::AbstractInterpreter, match::MethodMatch) =
+    typeinf_type(interp, specialize_method(match))
+function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance)
+    # n.b.: this could be replaced with @something(typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED), return nothing).rettype
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            # see if this rettype already exists in the cache
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return code.rettype
+        end
+    end
+    ci = engine_reserve(interp, mi)
+    let code = get(code_cache(interp), mi, nothing)
+        if code isa CodeInstance
+            engine_reject(interp, ci)
+            # see if this rettype already exists in the cache
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return code.rettype
+        end
+    end
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    result.ci = ci
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
+    if frame === nothing
+        engine_reject(interp, ci)
+        return nothing
+    end
+    typeinf(interp, frame)
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    is_inferred(result) || return nothing
+    return widenconst(ignorelimited(result.result))
+end
+
+# collect a list of all code that is needed along with CodeInstance to codegen it fully
+function collectinvokes!(wq::Vector{CodeInstance}, ci::CodeInfo)
+    src = ci.code
+    for i = 1:length(src)
+        stmt = src[i]
+        isexpr(stmt, :(=)) && (stmt = stmt.args[2])
+        if isexpr(stmt, :invoke) || isexpr(stmt, :invoke_modify)
+            edge = stmt.args[1]
+            edge isa CodeInstance && isdefined(edge, :inferred) && push!(wq, edge)
+        end
+        # TODO: handle other StmtInfo like @cfunction and OpaqueClosure?
+    end
+end
+
+# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match
+function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8)
+    interp = NativeInterpreter(world)
+    ci = typeinf_ext(interp, mi, source_mode)
+    if source_mode == SOURCE_MODE_ABI && ci isa CodeInstance && !ci_has_invoke(ci)
+        inspected = IdSet{CodeInstance}()
+        tocompile = Vector{CodeInstance}()
+        push!(tocompile, ci)
+        while !isempty(tocompile)
+            # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst)
+            callee = pop!(tocompile)
+            ci_has_invoke(callee) && continue
+            callee in inspected && continue
+            src = get(interp.codegen, callee, nothing)
+            if !isa(src, CodeInfo)
+                src = @atomic :monotonic callee.inferred
+                if isa(src, String)
+                    src = _uncompressed_ir(callee, src)
+                end
+                if !isa(src, CodeInfo)
+                    newcallee = typeinf_ext(interp, callee.def, source_mode)
+                    if newcallee isa CodeInstance
+                        callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee
+                        push!(tocompile, newcallee)
+                    #else
+                    #    println("warning: could not get source code for ", callee.def)
+                    end
+                    continue
+                end
+            end
+            push!(inspected, callee)
+            collectinvokes!(tocompile, src)
+            ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src)
+        end
+    end
+    return ci
+end
+
+# This is a bridge for the C code calling `jl_typeinf_func()` on set of Method matches
+function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim::Bool)
+    inspected = IdSet{CodeInstance}()
+    tocompile = Vector{CodeInstance}()
+    codeinfos = []
+    # first compute the ABIs of everything
+    for this_world in reverse(sort!(worlds))
+        interp = NativeInterpreter(this_world)
+        for i = 1:length(methods)
+            # each item in this list is either a MethodInstance indicating something
+            # to compile, or an svec(rettype, sig) describing a C-callable alias to create.
+            item = methods[i]
+            if item isa MethodInstance
+                # if this method is generally visible to the current compilation world,
+                # and this is either the primary world, or not applicable in the primary world
+                # then we want to compile and emit this
+                if item.def.primary_world <= this_world <= item.def.deleted_world
+                    ci = typeinf_ext(interp, item, SOURCE_MODE_NOT_REQUIRED)
+                    ci isa CodeInstance && !use_const_api(ci) && push!(tocompile, ci)
+                end
+            elseif item isa SimpleVector
+                push!(codeinfos, item[1]::Type)
+                push!(codeinfos, item[2]::Type)
+            end
+        end
+        while !isempty(tocompile)
+            callee = pop!(tocompile)
+            callee in inspected && continue
+            push!(inspected, callee)
+            # now make sure everything has source code, if desired
+            mi = get_ci_mi(callee)
+            def = mi.def
+            if use_const_api(callee)
+                src = codeinfo_for_const(interp, mi, code.rettype_const)
+            elseif haskey(interp.codegen, callee)
+                src = interp.codegen[callee]
+            elseif isa(def, Method) && ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0 && !trim
+                src = retrieve_code_info(mi, get_inference_world(interp))
+            else
+                # TODO: typeinf_code could return something with different edges/ages/owner/abi (needing an update to callee), which we don't handle here
+                src = typeinf_code(interp, mi, true)
+            end
+            if src isa CodeInfo
+                collectinvokes!(tocompile, src)
+                # It is somewhat ambiguous if typeinf_ext might have callee in the caches,
+                # but for the purpose of native compile, we always want them put there.
+                if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee))
+                    code_cache(interp)[mi] = callee
+                end
+                push!(codeinfos, callee)
+                push!(codeinfos, src)
+            elseif trim
+                println("warning: failed to get code for ", mi)
+            end
+        end
+    end
+    return codeinfos
+end
+
+function return_type(@nospecialize(f), t::DataType) # this method has a special tfunc
+    world = tls_world_age()
+    args = Any[_return_type, NativeInterpreter(world), Tuple{Core.Typeof(f), t.parameters...}]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), args, length(args))
+end
+
+function return_type(@nospecialize(f), t::DataType, world::UInt)
+    return return_type(Tuple{Core.Typeof(f), t.parameters...}, world)
+end
+
+function return_type(t::DataType)
+    world = tls_world_age()
+    return return_type(t, world)
+end
+
+function return_type(t::DataType, world::UInt)
+    args = Any[_return_type, NativeInterpreter(world), t]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), args, length(args))
+end
+
+function _return_type(interp::AbstractInterpreter, t::DataType)
+    rt = Union{}
+    f = singleton_type(t.parameters[1])
+    if isa(f, Builtin)
+        args = Any[t.parameters...]
+        popfirst!(args)
+        rt = builtin_tfunction(interp, f, args, nothing)
+        rt = widenconst(rt)
+    else
+        for match in _methods_by_ftype(t, -1, get_inference_world(interp))::Vector
+            ty = typeinf_type(interp, match::MethodMatch)
+            ty === nothing && return Any
+            rt = tmerge(rt, ty)
+            rt === Any && break
+        end
+    end
+    return rt
+end
diff --git a/base/compiler/typelattice.jl b/Compiler/src/typelattice.jl
similarity index 86%
rename from base/compiler/typelattice.jl
rename to Compiler/src/typelattice.jl
index 324f2b600cc44..bd0d24167b75a 100644
--- a/base/compiler/typelattice.jl
+++ b/Compiler/src/typelattice.jl
@@ -6,23 +6,7 @@
 
 # N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
-#
-# # The type of a value might be constant
-# struct Const
-#     val
-# end
-#
-# struct PartialStruct
-#     typ
-#     fields::Vector{Any} # elements are other type lattice members
-# end
-import Core: Const, PartialStruct
-function PartialStruct(@nospecialize(typ), fields::Vector{Any})
-    for i = 1:length(fields)
-        assert_nested_slotwrapper(fields[i])
-    end
-    return Core._PartialStruct(typ, fields)
-end
+import Core: Const, InterConditional, PartialStruct
 
 """
     cnd::Conditional
@@ -48,34 +32,19 @@ struct Conditional
     slot::Int
     thentype
     elsetype
-    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype))
+    # `isdefined` indicates this `Conditional` is from `@isdefined slot`, implying that
+    # the `undef` information of `slot` can be improved in the then branch.
+    # Since this is only beneficial for local inference, it is not translated into `InterConditional`.
+    isdefined::Bool
+    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype);
+                         isdefined::Bool=false)
         assert_nested_slotwrapper(thentype)
         assert_nested_slotwrapper(elsetype)
-        return new(slot, thentype, elsetype)
+        return new(slot, thentype, elsetype, isdefined)
     end
 end
-Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    Conditional(slot_id(var), thentype, elsetype)
-
-"""
-    cnd::InterConditional
-
-Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
-This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
-while processing a call, then `Conditional` everywhere else. Thus `InterConditional` does not appear in
-`CompilerTypes`—these type's usages are disjoint—though we define the lattice for `InterConditional`.
-"""
-:(InterConditional)
-import Core: InterConditional
-# struct InterConditional
-#     slot::Int
-#     thentype
-#     elsetype
-#     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) =
-#         new(slot, thentype, elsetype)
-# end
-InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    InterConditional(slot_id(var), thentype, elsetype)
+Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype); isdefined::Bool=false) =
+    Conditional(slot_id(var), thentype, elsetype; isdefined)
 
 const AnyConditional = Union{Conditional,InterConditional}
 Conditional(cnd::InterConditional) = Conditional(cnd.slot, cnd.thentype, cnd.elsetype)
@@ -120,8 +89,6 @@ end
 MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
     MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
 
-_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
-
 """
     alias::InterMustAlias
 
@@ -159,8 +126,8 @@ end
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
-    state::VarTable
     conditional::Bool
+    StateUpdate(var::SlotNumber, vtype::VarState, conditional::Bool=false) = new(var, vtype, conditional)
 end
 
 """
@@ -224,11 +191,6 @@ struct NotFound end
 
 const NOT_FOUND = NotFound()
 
-const CompilerTypes = Union{Const, Conditional, MustAlias, NotFound, PartialStruct}
-==(x::CompilerTypes, y::CompilerTypes) = x === y
-==(x::Type, y::CompilerTypes) = false
-==(x::CompilerTypes, y::Type) = false
-
 #################
 # lattice logic #
 #################
@@ -286,11 +248,17 @@ end
 
 # `Conditional` and `InterConditional` are valid in opposite contexts
 # (i.e. local inference and inter-procedural call), as such they will never be compared
-@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional}
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::Conditional, b::Conditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#true)
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::InterConditional, b::InterConditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#false)
+@nospecializeinfer function _issubconditional(𝕃::AbstractLattice, a::C, b::C, check_isdefined::Bool) where C<:AnyConditional
     if is_same_conditionals(a, b)
-        if ⊑(lattice, a.thentype, b.thentype)
-            if ⊑(lattice, a.elsetype, b.elsetype)
-                return true
+        if ⊑(𝕃, a.thentype, b.thentype)
+            if ⊑(𝕃, a.elsetype, b.elsetype)
+                if !check_isdefined || a.isdefined ≥ b.isdefined
+                    return true
+                end
             end
         end
     end
@@ -323,7 +291,7 @@ end
 
 @nospecializeinfer function isalreadyconst(@nospecialize t)
     isa(t, Const) && return true
-    isa(t, DataType) && isdefined(t, :instance) && return true
+    issingletontype(t) && return true
     return isconstType(t)
 end
 
@@ -357,8 +325,8 @@ end
             end
         end
         return Conditional(slot,
-            thenfields === nothing ? Bottom : PartialStruct(vartyp.typ, thenfields),
-            elsefields === nothing ? Bottom : PartialStruct(vartyp.typ, elsefields))
+            thenfields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp.typ, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp.typ, elsefields))
     else
         vartyp_widened = widenconst(vartyp)
         thenfields = thentype === Bottom ? nothing : Any[]
@@ -374,8 +342,8 @@ end
             end
         end
         return Conditional(slot,
-            thenfields === nothing ? Bottom : PartialStruct(vartyp_widened, thenfields),
-            elsefields === nothing ? Bottom : PartialStruct(vartyp_widened, elsefields))
+            thenfields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp_widened, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(fallback_lattice, vartyp_widened, elsefields))
     end
 end
 
@@ -394,8 +362,8 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ
 # =============
 
 @nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
-    r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b))
-    r || return false
+    ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) || return false
+
     isa(b, LimitedAccuracy) || return true
 
     # We've found that ignorelimited(a) ⊑ ignorelimited(b).
@@ -448,8 +416,13 @@ end
 @nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         if isa(b, PartialStruct)
-            if !(length(a.fields) == length(b.fields) && a.typ <: b.typ)
-                return false
+            a.typ <: b.typ || return false
+            if length(a.fields) ≠ length(b.fields)
+                if !(isvarargtype(a.fields[end]) || isvarargtype(b.fields[end]))
+                    length(a.fields) ≥ length(b.fields) || return false
+                else
+                    return false
+                end
             end
             for i in 1:length(b.fields)
                 af = a.fields[i]
@@ -472,19 +445,25 @@ end
         return isa(b, Type) && a.typ <: b
     elseif isa(b, PartialStruct)
         if isa(a, Const)
-            nf = nfields(a.val)
-            nf == length(b.fields) || return false
             widea = widenconst(a)::DataType
             wideb = widenconst(b)
             wideb′ = unwrap_unionall(wideb)::DataType
             widea.name === wideb′.name || return false
-            # We can skip the subtype check if b is a Tuple, since in that
-            # case, the ⊑ of the elements is sufficient.
-            if wideb′.name !== Tuple.name && !(widea <: wideb)
-                return false
+            if wideb′.name === Tuple.name
+                # We can skip the subtype check if b is a Tuple, since in that
+                # case, the ⊑ of the elements is sufficient.
+                # But for tuple comparisons, we need their lengths to be the same for now.
+                # TODO improve accuracy for cases when `b` contains vararg element
+                nfields(a.val) == length(b.fields) || return false
+            else
+                widea <: wideb || return false
+                # for structs we need to check that `a` has more information than `b` that may be partially initialized
+                n_initialized(a) ≥ length(b.fields) || return false
             end
+            nf = nfields(a.val)
             for i in 1:nf
                 isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
+                i > length(b.fields) && break # `a` has more information than `b` that is partially initialized struct
                 bfᵢ = b.fields[i]
                 if i == nf
                     bfᵢ = unwrapva(bfᵢ)
@@ -607,7 +586,7 @@ end
         if ti === widev
             return v
         end
-        valid_as_lattice(ti) || return Bottom
+        valid_as_lattice(ti, true) || return Bottom
         if widev <: Tuple
             new_fields = Vector{Any}(undef, length(v.fields))
             for i = 1:length(new_fields)
@@ -631,7 +610,7 @@ end
             return v
         end
         ti = typeintersect(widev, t)
-        valid_as_lattice(ti) || return Bottom
+        valid_as_lattice(ti, true) || return Bottom
         return PartialOpaque(ti, v.env, v.parent, v.source)
     end
     return tmeet(widenlattice(lattice), v, t)
@@ -692,7 +671,7 @@ widenconst(::AnyConditional) = Bool
 widenconst(a::AnyMustAlias) = widenconst(widenmustalias(a))
 widenconst(c::Const) = (v = c.val; isa(v, Type) ? Type{v} : typeof(v))
 widenconst(::PartialTypeVar) = TypeVar
-widenconst(t::PartialStruct) = t.typ
+widenconst(t::Core.PartialStruct) = t.typ
 widenconst(t::PartialOpaque) = t.typ
 @nospecializeinfer widenconst(@nospecialize t::Type) = t
 widenconst(::TypeVar) = error("unhandled TypeVar")
@@ -724,28 +703,6 @@ function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional:
     return nothing
 end
 
-function stupdate!(lattice::AbstractLattice, state::VarTable, changes::StateUpdate)
-    changed = false
-    changeid = slot_id(changes.var)
-    for i = 1:length(state)
-        if i == changeid
-            newtype = changes.vtype
-        else
-            newtype = changes.state[i]
-        end
-        invalidated = invalidate_slotwrapper(newtype, changeid, changes.conditional)
-        if invalidated !== nothing
-            newtype = invalidated
-        end
-        oldtype = state[i]
-        if schanged(lattice, newtype, oldtype)
-            state[i] = smerge(lattice, oldtype, newtype)
-            changed = true
-        end
-    end
-    return changed
-end
-
 function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     changed = false
     for i = 1:length(state)
@@ -759,24 +716,6 @@ function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     return changed
 end
 
-function stupdate1!(lattice::AbstractLattice, state::VarTable, change::StateUpdate)
-    changeid = slot_id(change.var)
-    for i = 1:length(state)
-        invalidated = invalidate_slotwrapper(state[i], changeid, change.conditional)
-        if invalidated !== nothing
-            state[i] = invalidated
-        end
-    end
-    # and update the type of it
-    newtype = change.vtype
-    oldtype = state[changeid]
-    if schanged(lattice, newtype, oldtype)
-        state[changeid] = smerge(lattice, oldtype, newtype)
-        return true
-    end
-    return false
-end
-
 function stoverwrite!(state::VarTable, newstate::VarTable)
     for i = 1:length(state)
         state[i] = newstate[i]
@@ -797,3 +736,13 @@ function stoverwrite1!(state::VarTable, change::StateUpdate)
     state[changeid] = newtype
     return state
 end
+
+# The ::AbstractLattice argument is unused and simply serves to disambiguate
+# different instances of the compiler that may share the `Core.PartialStruct`
+# type.
+function Core.PartialStruct(::AbstractLattice, @nospecialize(typ), fields::Vector{Any})
+    for i = 1:length(fields)
+        assert_nested_slotwrapper(fields[i])
+    end
+    return Core._PartialStruct(typ, fields)
+end
diff --git a/base/compiler/typelimits.jl b/Compiler/src/typelimits.jl
similarity index 79%
rename from base/compiler/typelimits.jl
rename to Compiler/src/typelimits.jl
index b648144ea3bd1..536b5fb34d1b1 100644
--- a/base/compiler/typelimits.jl
+++ b/Compiler/src/typelimits.jl
@@ -116,15 +116,31 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             return Union{a, b}
         end
     elseif isa(t, DataType)
-        if isType(t) # see equivalent case in type_more_complex
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                is_derived_type_from_any(tt, sources, depth + 1) && return t
+        if isType(t)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = t.parameters[1]
+            ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            # must forbid nesting through this if we detect that potentially occurring
+            # we already know !is_derived_type_from_any so refuse to recurse here
+            if !isa(ttu, DataType)
+                return Type
+            elseif isType(ttu)
+                return Type{<:Type}
+            end
+            # try to peek into c to get a comparison object, but if we can't perhaps t is already simple enough on its own
+            if isType(c)
+                ct = c.parameters[1]
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                type_more_complex(tt, c, sources, depth, 0, 0) || return t
+                ct = Union{}
             end
-            return Type
+            Qt = __limit_type_size(tt, ct, sources, depth + 1, 0)
+            Qt === tt && return t
+            Qt === Any && return Type
+            # Can't form Type{<:Qt} just yet, without first make sure we limited the depth
+            # enough, since this moves Qt outside of Type for is_derived_type_from_any
+            Qt = __limit_type_size(tt, ct, sources, depth + 2, 0)
+            Qt === Any && return Type
+            return Type{<:Qt}
         elseif isa(c, DataType)
             tP = t.parameters
             cP = c.parameters
@@ -157,10 +173,11 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             end
         end
         if allowed_tuplelen < 1 && t.name === Tuple.name
+            # forbid nesting Tuple{Tuple{Tuple...}} through this
             return Any
         end
         widert = t.name.wrapper
-        if !(t <: widert)
+        if !(t <: widert) # XXX: we should call has_free_typevars(t) here, but usually t does not have those wrappers by the time it got here
             # This can happen when a typevar has bounds too wide for its context, e.g.
             # `Complex{T} where T` is not a subtype of `Complex`. In that case widen even
             # faster to something safe to ensure the result is a supertype of the input.
@@ -211,20 +228,22 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # Bottom is as simple as they come
     elseif isa(t, DataType) && isempty(t.parameters)
         return false # fastpath: unparameterized types are always finite
-    elseif tupledepth > 0 && is_derived_type_from_any(unwrap_unionall(t), sources, depth)
+    elseif is_derived_type_from_any(unwrap_unionall(t), sources, depth)
         return false # t isn't something new
     end
     # peel off wrappers
     isvarargtype(t) && (t = unwrapva(t))
     isvarargtype(c) && (c = unwrapva(c))
     if isa(c, UnionAll)
-        # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
+        # allow wrapping type with fewer UnionAlls than comparison only if in a covariant context
         if !isa(t, UnionAll) && tupledepth == 0
             return true
         end
-        t = unwrap_unionall(t)
         c = unwrap_unionall(c)
     end
+    if isa(t, UnionAll)
+        t = unwrap_unionall(t)
+    end
     # rules for various comparison types
     if isa(c, TypeVar)
         tupledepth = 1
@@ -248,16 +267,22 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
     if isa(t, DataType)
         tP = t.parameters
         if isType(t)
-            # Treat Type{T} and T as equivalent to allow taking typeof any
-            # source type (DataType) anywhere as Type{...}, as long as it isn't
-            # nesting as Type{Type{...}}
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                return !is_derived_type_from_any(tt, sources, depth + 1)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = tP[1]
+            ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            if isType(c)
+                ct = c.parameters[1]
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                return type_more_complex(tt, c, sources, depth, 0, 0)
+                ct = Union{}
+                tupledepth == 0 && return true # cannot allow nesting
             end
+            # allow creating variation within a nested Type, but not very deep
+            if tupledepth > 1
+                tupledepth = 1
+            else
+                tupledepth = 0
+            end
+            return type_more_complex(tt, ct, sources, depth + 1, tupledepth, 0)
         elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
@@ -270,22 +295,9 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
             else
                 tupledepth = 0
             end
-            isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
             for i = 1:length(tP)
                 tPi = tP[i]
                 cPi = cP[i + ntail]
-                if isgenerator
-                    let tPi = unwrap_unionall(tPi),
-                        cPi = unwrap_unionall(cPi)
-                        if isa(tPi, DataType) && isa(cPi, DataType) &&
-                            !isabstracttype(tPi) && !isabstracttype(cPi) &&
-                                sym_isless(cPi.name.name, tPi.name.name)
-                            # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
-                            # TODO: is there a better way?
-                            continue
-                        end
-                    end
-                end
                 type_more_complex(tPi, cPi, sources, depth + 1, tupledepth, 0) && return true
             end
             return false
@@ -298,10 +310,22 @@ union_count_abstract(x::Union) = union_count_abstract(x.a) + union_count_abstrac
 union_count_abstract(@nospecialize(x)) = !isdispatchelem(x)
 
 function issimpleenoughtype(@nospecialize t)
-    return unionlen(t) + union_count_abstract(t) <= MAX_TYPEUNION_LENGTH &&
+    ut = unwrap_unionall(t)
+    ut isa DataType && ut.name.wrapper == t && return true
+    return max(unionlen(t), union_count_abstract(t) + 1) <= MAX_TYPEUNION_LENGTH &&
            unioncomplexity(t) <= MAX_TYPEUNION_COMPLEXITY
 end
 
+# We may want to apply a stricter limit than issimpleenoughtype to
+# tupleelements individually, to try to keep the whole tuple under the limit,
+# even after complicated recursion and other operations on it elsewhere
+const issimpleenoughtupleelem = issimpleenoughtype
+
+function n_initialized(t::Const)
+    nf = nfields(t.val)
+    return something(findfirst(i::Int->!isdefined(t.val,i), 1:nf), nf+1)-1
+end
+
 # A simplified type_more_complex query over the extended lattice
 # (assumes typeb ⊑ typea)
 @nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
@@ -309,6 +333,13 @@ end
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
+        if typeb isa Const
+            @assert length(typea.fields) ≤ n_initialized(typeb) "typeb ⊑ typea is assumed"
+        elseif typeb isa PartialStruct
+            @assert length(typea.fields) ≤ length(typeb.fields) "typeb ⊑ typea is assumed"
+        else
+            return false
+        end
         for i = 1:length(typea.fields)
             ai = unwrapva(typea.fields[i])
             bi = fieldtype(aty, i)
@@ -553,34 +584,35 @@ end
 
 # N.B. This can also be called with both typea::Const and typeb::Const to
 # to recover PartialStruct from `Const`s with overlapping fields.
-@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+@nospecializeinfer function tmerge_partial_struct(𝕃::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
     aty = widenconst(typea)
     bty = widenconst(typeb)
-    if aty === bty
-        # must have egal here, since we do not create PartialStruct for non-concrete types
-        typea_nfields = nfields_tfunc(lattice, typea)
-        typeb_nfields = nfields_tfunc(lattice, typeb)
-        isa(typea_nfields, Const) || return nothing
-        isa(typeb_nfields, Const) || return nothing
-        type_nfields = typea_nfields.val::Int
-        type_nfields === typeb_nfields.val::Int || return nothing
-        type_nfields == 0 && return nothing
-        fields = Vector{Any}(undef, type_nfields)
-        anyrefine = false
-        for i = 1:type_nfields
-            ai = getfield_tfunc(lattice, typea, Const(i))
-            bi = getfield_tfunc(lattice, typeb, Const(i))
-            # N.B.: We're assuming here that !isType(aty), because that case
-            # only arises when typea === typeb, which should have been caught
-            # before calling this.
+    if aty === bty && !isType(aty)
+        if typea isa PartialStruct
+            if typeb isa PartialStruct
+                nflds = min(length(typea.fields), length(typeb.fields))
+            else
+                nflds = min(length(typea.fields), n_initialized(typeb::Const))
+            end
+        elseif typeb isa PartialStruct
+            nflds = min(n_initialized(typea::Const), length(typeb.fields))
+        else
+            nflds = min(n_initialized(typea::Const), n_initialized(typeb::Const))
+        end
+        nflds == 0 && return nothing
+        fields = Vector{Any}(undef, nflds)
+        anyrefine = nflds > datatype_min_ninitialized(aty)
+        for i = 1:nflds
+            ai = getfield_tfunc(𝕃, typea, Const(i))
+            bi = getfield_tfunc(𝕃, typeb, Const(i))
             ft = fieldtype(aty, i)
-            if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
+            if is_lattice_equal(𝕃, ai, bi) || is_lattice_equal(𝕃, ai, ft)
                 # Since ai===bi, the given type has no restrictions on complexity.
                 # and can be used to refine ft
                 tyi = ai
-            elseif is_lattice_equal(lattice, bi, ft)
+            elseif is_lattice_equal(𝕃, bi, ft)
                 tyi = bi
-            elseif (tyi′ = tmerge_field(lattice, ai, bi); tyi′ !== nothing)
+            elseif (tyi′ = tmerge_field(𝕃, ai, bi); tyi′ !== nothing)
                 # allow external lattice implementation to provide a custom field-merge strategy
                 tyi = tyi′
             else
@@ -602,11 +634,11 @@ end
             end
             fields[i] = tyi
             if !anyrefine
-                anyrefine = has_nontrivial_extended_info(lattice, tyi) || # extended information
-                            ⋤(lattice, tyi, ft) # just a type-level information, but more precise than the declared type
+                anyrefine = has_nontrivial_extended_info(𝕃, tyi) || # extended information
+                            ⋤(𝕃, tyi, ft) # just a type-level information, but more precise than the declared type
             end
         end
-        anyrefine && return PartialStruct(aty, fields)
+        anyrefine && return PartialStruct(𝕃, aty, fields)
     end
     return nothing
 end
@@ -669,7 +701,7 @@ end
     return tmerge(wl, typea, typeb)
 end
 
-@nospecializeinfer function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
+@nospecializeinfer function tmerge(lattice::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
     # it's always ok to form a Union of two concrete types
     act = isconcretetype(typea)
     bct = isconcretetype(typeb)
@@ -680,11 +712,38 @@ end
     if (act || isType(typea)) && (bct || isType(typeb))
         return Union{typea, typeb}
     end
-    typea <: typeb && return typeb
-    typeb <: typea && return typea
+    u = tmerge_fast_path(lattice, typea, typeb)
+    u === nothing || return u
     return tmerge_types_slow(typea, typeb)
 end
 
+@nospecializeinfer @noinline function tname_intersect(aname::Core.TypeName, bname::Core.TypeName)
+    aname === bname && return aname
+    if !isabstracttype(aname.wrapper) && !isabstracttype(bname.wrapper)
+        return nothing # fast path
+    end
+    Any.name === aname && return aname
+    a = unwrap_unionall(aname.wrapper)
+    heighta = 0
+    while a !== Any
+        heighta += 1
+        a = a.super
+    end
+    b = unwrap_unionall(bname.wrapper)
+    heightb = 0
+    while b !== Any
+        b.name === aname && return aname
+        heightb += 1
+        b = b.super
+    end
+    a = unwrap_unionall(aname.wrapper)
+    while heighta > heightb
+        a = a.super
+        heighta -= 1
+    end
+    return a.name === bname ? bname : nothing
+end
+
 @nospecializeinfer @noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type))
     # collect the list of types from past tmerge calls returning Union
     # and then reduce over that list
@@ -708,73 +767,97 @@ end
     # see if any of the union elements have the same TypeName
     # in which case, simplify this tmerge by replacing it with
     # the widest possible version of itself (the wrapper)
+    simplify = falses(length(types))
     for i in 1:length(types)
+        typenames[i] === Any.name && continue
         ti = types[i]
         for j in (i + 1):length(types)
-            if typenames[i] === typenames[j]
+            typenames[j] === Any.name && continue
+            ijname = tname_intersect(typenames[i], typenames[j])
+            if !(ijname === nothing)
                 tj = types[j]
                 if ti <: tj
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
+                    simplify[j] = true
                     break
                 elseif tj <: ti
                     types[j] = Union{}
                     typenames[j] = Any.name
+                    simplify[j] = false
+                    simplify[i] = true
                 else
-                    if typenames[i] === Tuple.name
+                    if ijname === Tuple.name
                         # try to widen Tuple slower: make a single non-concrete Tuple containing both
                         # converge the Tuple element-wise if they are the same length
                         # see 4ee2b41552a6bc95465c12ca66146d69b354317b, be59686f7613a2ccfd63491c7b354d0b16a95c05,
                         widen = tuplemerge(unwrap_unionall(ti)::DataType, unwrap_unionall(tj)::DataType)
                         widen = rewrap_unionall(rewrap_unionall(widen, ti), tj)
+                        simplify[j] = false
                     else
-                        wr = typenames[i].wrapper
+                        wr = ijname.wrapper
                         uw = unwrap_unionall(wr)::DataType
                         ui = unwrap_unionall(ti)::DataType
+                        while ui.name !== ijname
+                            ui = ui.super
+                        end
                         uj = unwrap_unionall(tj)::DataType
-                        merged = wr
+                        while uj.name !== ijname
+                            uj = uj.super
+                        end
+                        p = Vector{Any}(undef, length(uw.parameters))
+                        usep = true
+                        widen = wr
                         for k = 1:length(uw.parameters)
                             ui_k = ui.parameters[k]
                             if ui_k === uj.parameters[k] && !has_free_typevars(ui_k)
-                                merged = merged{ui_k}
+                                p[k] = ui_k
+                                usep = true
                             else
-                                merged = merged{uw.parameters[k]}
+                                p[k] = uw.parameters[k]
                             end
                         end
-                        widen = rewrap_unionall(merged, wr)
+                        if usep
+                            widen = rewrap_unionall(wr{p...}, wr)
+                            widen <: wr || (widen = wr) # sometimes there are cross-constraints on wr that we may lose in this process, but that would cause future calls to this to need to return Any, which is undesirable
+                        end
+                        simplify[j] = !usep
                     end
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
                     types[j] = widen
+                    typenames[j] = ijname
                     break
                 end
             end
         end
     end
-    u = Union{types...}
-    # don't let type unions get too big, if the above didn't reduce it enough
-    if issimpleenoughtype(u)
-        return u
-    end
-    # don't let the slow widening of Tuple cause the whole type to grow too fast
+    # don't let elements of the union get too big, if the above didn't reduce something enough
+    # Specifically widen Tuple{..., Union{lots of stuff}...} to Tuple{..., Any, ...}
+    # Don't let Val{<:Val{<:Val}} keep nesting abstract levels either
     for i in 1:length(types)
+        simplify[i] || continue
+        ti = types[i]
+        issimpleenoughtype(ti) && continue
         if typenames[i] === Tuple.name
-            widen = unwrap_unionall(types[i])
-            if isa(widen, DataType) && !isvatuple(widen)
-                widen = NTuple{length(widen.parameters), Any}
-            else
-                widen = Tuple
-            end
-            types[i] = widen
-            u = Union{types...}
-            if issimpleenoughtype(u)
-                return u
+            # otherwise we need to do a simple version of tuplemerge for one element now
+            tip = (unwrap_unionall(ti)::DataType).parameters
+            lt = length(tip)
+            p = Vector{Any}(undef, lt)
+            for j = 1:lt
+                ui = tip[j]
+                p[j] = issimpleenoughtupleelem(unwrapva(ui)) ? ui : isvarargtype(ui) ? Vararg : Any
             end
-            break
+            types[i] = rewrap_unionall(Tuple{p...}, ti)
+        else
+            # this element is not simple enough yet, make it so now
+            types[i] = typenames[i].wrapper
         end
     end
-    # finally, just return the widest possible type
-    return Any
+    u = Union{types...}
+    return u
 end
 
 # the inverse of switchtupleunion, with limits on max element union size
@@ -796,7 +879,7 @@ function tuplemerge(a::DataType, b::DataType)
     p = Vector{Any}(undef, lt + vt)
     for i = 1:lt
         ui = Union{ap[i], bp[i]}
-        p[i] = issimpleenoughtype(ui) ? ui : Any
+        p[i] = issimpleenoughtupleelem(ui) ? ui : Any
     end
     # merge the remaining tail into a single, simple Tuple{Vararg{T}} (#22120)
     if vt
@@ -814,8 +897,10 @@ function tuplemerge(a::DataType, b::DataType)
                 #   or (equivalently?) iteratively took super-types until reaching a common wrapper
                 #   e.g. consider the results of `tuplemerge(Tuple{Complex}, Tuple{Number, Int})` and of
                 #   `tuplemerge(Tuple{Int}, Tuple{String}, Tuple{Int, String})`
-                if !(ti <: tail)
-                    if tail <: ti
+                #   c.f. tname_intersect in the algorithm above
+                hasfree = has_free_typevars(ti)
+                if hasfree || !(ti <: tail)
+                    if !hasfree && tail <: ti
                         tail = ti # widen to ti
                     else
                         uw = unwrap_unionall(tail)
@@ -843,11 +928,16 @@ function tuplemerge(a::DataType, b::DataType)
                         end
                     end
                 end
-                tail === Any && return Tuple # short-circuit loop
+                tail === Any && return Tuple # short-circuit loops
             end
         end
         @assert !(tail === Union{})
-        p[lt + 1] = Vararg{tail}
+        if !issimpleenoughtupleelem(tail) || tail === Any
+            p[lt + 1] = Vararg
+            lt == 0 && return Tuple
+        else
+            p[lt + 1] = Vararg{tail}
+        end
     end
     return Tuple{p...}
 end
diff --git a/base/compiler/types.jl b/Compiler/src/types.jl
similarity index 65%
rename from base/compiler/types.jl
rename to Compiler/src/types.jl
index 14f1c90dca0e9..6ffb5402682f3 100644
--- a/base/compiler/types.jl
+++ b/Compiler/src/types.jl
@@ -1,4 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+
+const WorkThunk = Any
+# #@eval struct WorkThunk
+#    thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool}
+#    WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks)
+# end
+# (p::WorkThunk)() = p.thunk()
 
 """
     AbstractInterpreter
@@ -12,14 +20,16 @@ If `interp::NewInterpreter` is an `AbstractInterpreter`, it is expected to provi
 the following methods to satisfy the `AbstractInterpreter` API requirement:
 - `InferenceParams(interp::NewInterpreter)` - return an `InferenceParams` instance
 - `OptimizationParams(interp::NewInterpreter)` - return an `OptimizationParams` instance
-- `get_world_counter(interp::NewInterpreter)` - return the world age for this interpreter
+- `get_inference_world(interp::NewInterpreter)` - return the world age for this interpreter
 - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache
-- `code_cache(interp::NewInterpreter)` - return the global inference cache
+- `cache_owner(interp::NewInterpreter)` - return the owner of any new cache entries
 """
-:(AbstractInterpreter)
+abstract type AbstractInterpreter end
 
 abstract type AbstractLattice end
 
+struct InvalidIRError <: Exception end
+
 struct ArgInfo
     fargs::Union{Nothing,Vector{Any}}
     argtypes::Vector{Any}
@@ -31,13 +41,17 @@ struct StmtInfo
     need thus not be computed.
     """
     used::Bool
+    saw_latestworld::Bool
 end
 
-struct MethodInfo
+struct SpecInfo
+    nargs::Int
+    isva::Bool
     propagate_inbounds::Bool
     method_for_inference_limit_heuristics::Union{Nothing,Method}
 end
-MethodInfo(src::CodeInfo) = MethodInfo(
+SpecInfo(src::CodeInfo) = SpecInfo(
+    Int(src.nargs), src.isva,
     src.propagate_inbounds,
     src.method_for_inference_limit_heuristics::Union{Nothing,Method})
 
@@ -57,38 +71,73 @@ struct VarState
     VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
 end
 
-abstract type ForwardableArgtypes end
+struct AnalysisResults
+    result
+    next::AnalysisResults
+    AnalysisResults(@nospecialize(result), next::AnalysisResults) = new(result, next)
+    AnalysisResults(@nospecialize(result)) = new(result)
+    # NullAnalysisResults() = new(nothing)
+    # global const NULL_ANALYSIS_RESULTS = NullAnalysisResults()
+end
+const NULL_ANALYSIS_RESULTS = AnalysisResults(nothing)
 
 """
-    InferenceResult(linfo::MethodInstance, [argtypes::ForwardableArgtypes, 𝕃::AbstractLattice])
+    result::InferenceResult
 
 A type that represents the result of running type inference on a chunk of code.
-
-See also [`matching_cache_argtypes`](@ref).
+There are two constructor available:
+- `InferenceResult(mi::MethodInstance, [𝕃::AbstractLattice])` for regular inference,
+  without extended lattice information included in `result.argtypes`.
+- `InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::BitVector)`
+  for constant inference, with extended lattice information included in `result.argtypes`.
 """
 mutable struct InferenceResult
+    #=== constant fields ===#
     const linfo::MethodInstance
     const argtypes::Vector{Any}
-    const overridden_by_const::BitVector
-    result                   # extended lattice element if inferred, nothing otherwise
-    src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
-    valid_worlds::WorldRange # if inference and optimization is finished
-    ipo_effects::Effects     # if inference is finished
-    effects::Effects         # if optimization is finished
-    argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
-    must_be_codeinf::Bool    # if this must come out as CodeInfo or leaving it as IRCode is ok
-    function InferenceResult(linfo::MethodInstance, cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-        # def = linfo.def
-        # nargs = def isa Method ? Int(def.nargs) : 0
-        # @assert length(cache_argtypes) == nargs
-        return new(linfo, cache_argtypes, overridden_by_const, nothing, nothing,
-            WorldRange(), Effects(), Effects(), nothing, true)
+    const overridden_by_const::Union{Nothing,BitVector}
+
+    #=== mutable fields ===#
+    result                            # extended lattice element if inferred, nothing otherwise
+    exc_result                        # like `result`, but for the thrown value
+    src                               # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
+    valid_worlds::WorldRange          # if inference and optimization is finished
+    ipo_effects::Effects              # if inference is finished
+    effects::Effects                  # if optimization is finished
+    analysis_results::AnalysisResults # AnalysisResults with e.g. result::ArgEscapeCache if optimized, otherwise NULL_ANALYSIS_RESULTS
+    is_src_volatile::Bool             # `src` has been cached globally as the compressed format already, allowing `src` to be used destructively
+
+    #=== uninitialized fields ===#
+    ci::CodeInstance                  # CodeInstance if this result may be added to the cache
+    ci_as_edge::CodeInstance          # CodeInstance as the edge representing locally cached result
+    function InferenceResult(mi::MethodInstance, argtypes::Vector{Any}, overridden_by_const::Union{Nothing,BitVector})
+        result = exc_result = src = nothing
+        valid_worlds = WorldRange()
+        ipo_effects = effects = Effects()
+        analysis_results = NULL_ANALYSIS_RESULTS
+        return new(mi, argtypes, overridden_by_const, result, exc_result, src,
+            valid_worlds, ipo_effects, effects, analysis_results, #=is_src_volatile=#false)
+    end
+end
+function InferenceResult(mi::MethodInstance, 𝕃::AbstractLattice=fallback_lattice)
+    argtypes = matching_cache_argtypes(𝕃, mi)
+    return InferenceResult(mi, argtypes, #=overridden_by_const=#nothing)
+end
+
+function stack_analysis_result!(inf_result::InferenceResult, @nospecialize(result))
+    return inf_result.analysis_results = AnalysisResults(result, inf_result.analysis_results)
+end
+
+function traverse_analysis_results(callback, (;analysis_results)::Union{InferenceResult,CodeInstance})
+    analysis_results isa AnalysisResults || return nothing
+    while isdefined(analysis_results, :next)
+        if (result = callback(analysis_results.result)) !== nothing
+            return result
+        end
+        analysis_results = analysis_results.next
     end
+    return nothing
 end
-InferenceResult(linfo::MethodInstance, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo)...)
-InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes, 𝕃::AbstractLattice=fallback_lattice) =
-    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo, argtypes)...)
 
 """
     inf_params::InferenceParams
@@ -131,11 +180,6 @@ Parameters that control abstract interpretation-based type inference operation.
   information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
   more fine-grained control on this configuration with per-method annotation basis.
 ---
-- `inf_params.unoptimize_throw_blocks::Bool = true`\\
-  If `true`, skips inferring calls that are in a block that is known to `throw`.
-  It may improve the compiler latency without sacrificing the runtime performance
-  in common situations.
----
 - `inf_params.assume_bindings_static::Bool = false`\\
   If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
   inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -151,7 +195,6 @@ struct InferenceParams
     tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
-    unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
     ignore_recursion_hardlimit::Bool
 
@@ -163,7 +206,6 @@ struct InferenceParams
         tuple_complexity_limit_depth::Int,
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
-        unoptimize_throw_blocks::Bool,
         assume_bindings_static::Bool,
         ignore_recursion_hardlimit::Bool)
         return new(
@@ -174,21 +216,19 @@ struct InferenceParams
             tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
-            unoptimize_throw_blocks,
             assume_bindings_static,
             ignore_recursion_hardlimit)
     end
 end
 function InferenceParams(
     params::InferenceParams = InferenceParams( # default constructor
-        #=max_methods::Int=# 3,
+        #=max_methods::Int=# BuildSettings.MAX_METHODS,
         #=max_union_splitting::Int=# 4,
         #=max_apply_union_enum::Int=# 8,
         #=max_tuple_splat::Int=# 32,
         #=tuple_complexity_limit_depth::Int=# 3,
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
-        #=unoptimize_throw_blocks::Bool=# true,
         #=assume_bindings_static::Bool=# false,
         #=ignore_recursion_hardlimit::Bool=# false);
     max_methods::Int = params.max_methods,
@@ -198,7 +238,6 @@ function InferenceParams(
     tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
-    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
     assume_bindings_static::Bool = params.assume_bindings_static,
     ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
     return InferenceParams(
@@ -209,7 +248,6 @@ function InferenceParams(
         tuple_complexity_limit_depth,
         ipo_constant_propagation,
         aggressive_constant_propagation,
-        unoptimize_throw_blocks,
         assume_bindings_static,
         ignore_recursion_hardlimit)
 end
@@ -234,10 +272,6 @@ Parameters that control optimizer operation.
   tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
   be added to `opt_params.inline_cost_threshold` when making inlining decision.
 ---
-- `opt_params.inline_error_path_cost::Int = 20`\\
-  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
-  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
----
 - `opt_params.max_tuple_splat::Int = 32`\\
   When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
   contains more than this many elements.
@@ -247,48 +281,46 @@ Parameters that control optimizer operation.
   generating `:invoke` expression based on the [`@nospecialize`](@ref) annotation,
   in order to avoid over-specialization.
 ---
-- `opt_params.trust_inference::Bool = false`\\
-  If `false`, the inliner will unconditionally generate a fallback block when union-splitting
-  a callsite, in case of existing subtyping bugs. This option may be removed in the future.
----
 - `opt_params.assume_fatal_throw::Bool = false`\\
   If `true`, gives the optimizer license to assume that any `throw` is fatal and thus the
   state after a `throw` is not externally observable. In particular, this gives the
   optimizer license to move side effects (that are proven not observed within a particular
   code path) across a throwing call. Defaults to `false`.
 ---
+- `opt_params.preserve_local_sources::Bool = false`\\
+  If `true`, the inliner is restricted from modifying locally-cached sources that are
+  retained in `CallInfo` objects and always makes their copies before inlining them into
+  caller context. Defaults to `false`.
+---
 """
 struct OptimizationParams
     inlining::Bool
     inline_cost_threshold::Int
     inline_nonleaf_penalty::Int
     inline_tupleret_bonus::Int
-    inline_error_path_cost::Int
     max_tuple_splat::Int
     compilesig_invokes::Bool
-    trust_inference::Bool
     assume_fatal_throw::Bool
+    preserve_local_sources::Bool
 
     function OptimizationParams(
         inlining::Bool,
         inline_cost_threshold::Int,
         inline_nonleaf_penalty::Int,
         inline_tupleret_bonus::Int,
-        inline_error_path_cost::Int,
         max_tuple_splat::Int,
         compilesig_invokes::Bool,
-        trust_inference::Bool,
-        assume_fatal_throw::Bool)
+        assume_fatal_throw::Bool,
+        preserve_local_sources::Bool)
         return new(
             inlining,
             inline_cost_threshold,
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
-            inline_error_path_cost,
             max_tuple_splat,
             compilesig_invokes,
-            trust_inference,
-            assume_fatal_throw)
+            assume_fatal_throw,
+            preserve_local_sources)
     end
 end
 function OptimizationParams(
@@ -297,30 +329,27 @@ function OptimizationParams(
         #=inline_cost_threshold::Int=# 100,
         #=inline_nonleaf_penalty::Int=# 1000,
         #=inline_tupleret_bonus::Int=# 250,
-        #=inline_error_path_cost::Int=# 20,
         #=max_tuple_splat::Int=# 32,
         #=compilesig_invokes::Bool=# true,
-        #=trust_inference::Bool=# false,
-        #=assume_fatal_throw::Bool=# false);
+        #=assume_fatal_throw::Bool=# false,
+        #=preserve_local_sources::Bool=# false);
     inlining::Bool = params.inlining,
     inline_cost_threshold::Int = params.inline_cost_threshold,
     inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
     inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
-    inline_error_path_cost::Int = params.inline_error_path_cost,
     max_tuple_splat::Int = params.max_tuple_splat,
     compilesig_invokes::Bool = params.compilesig_invokes,
-    trust_inference::Bool = params.trust_inference,
-    assume_fatal_throw::Bool = params.assume_fatal_throw)
+    assume_fatal_throw::Bool = params.assume_fatal_throw,
+    preserve_local_sources::Bool = params.preserve_local_sources)
     return OptimizationParams(
         inlining,
         inline_cost_threshold,
         inline_nonleaf_penalty,
         inline_tupleret_bonus,
-        inline_error_path_cost,
         max_tuple_splat,
         compilesig_invokes,
-        trust_inference,
-        assume_fatal_throw)
+        assume_fatal_throw,
+        preserve_local_sources)
 end
 
 """
@@ -337,82 +366,46 @@ struct NativeInterpreter <: AbstractInterpreter
 
     # Cache of inference results for this particular interpreter
     inf_cache::Vector{InferenceResult}
+    codegen::IdDict{CodeInstance,CodeInfo}
 
     # Parameters for inference and optimization
     inf_params::InferenceParams
     opt_params::OptimizationParams
-
-    # a boolean flag to indicate if this interpreter is performing semi concrete interpretation
-    irinterp::Bool
 end
 
 function NativeInterpreter(world::UInt = get_world_counter();
                            inf_params::InferenceParams = InferenceParams(),
                            opt_params::OptimizationParams = OptimizationParams())
+    curr_max_world = get_world_counter()
     # Sometimes the caller is lazy and passes typemax(UInt).
     # we cap it to the current world age for correctness
     if world == typemax(UInt)
-        world = get_world_counter()
+        world = curr_max_world
     end
-
     # If they didn't pass typemax(UInt) but passed something more subtly
     # incorrect, fail out loudly.
-    @assert world <= get_world_counter()
-
+    @assert world <= curr_max_world
     method_table = CachedMethodTable(InternalMethodTable(world))
-
     inf_cache = Vector{InferenceResult}() # Initially empty cache
-
-    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, #=irinterp=#false)
-end
-
-function NativeInterpreter(interp::NativeInterpreter;
-                           world::UInt = interp.world,
-                           method_table::CachedMethodTable{InternalMethodTable} = interp.method_table,
-                           inf_cache::Vector{InferenceResult} = interp.inf_cache,
-                           inf_params::InferenceParams = interp.inf_params,
-                           opt_params::OptimizationParams = interp.opt_params,
-                           irinterp::Bool = interp.irinterp)
-    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, irinterp)
+    codegen = IdDict{CodeInstance,CodeInfo}()
+    return NativeInterpreter(world, method_table, inf_cache, codegen, inf_params, opt_params)
 end
 
 # Quickly and easily satisfy the AbstractInterpreter API contract
 InferenceParams(interp::NativeInterpreter) = interp.inf_params
 OptimizationParams(interp::NativeInterpreter) = interp.opt_params
-get_world_counter(interp::NativeInterpreter) = interp.world
+get_inference_world(interp::NativeInterpreter) = interp.world
 get_inference_cache(interp::NativeInterpreter) = interp.inf_cache
-code_cache(interp::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(interp))
-
-"""
-    already_inferred_quick_test(::AbstractInterpreter, ::MethodInstance)
-
-For the `NativeInterpreter`, we don't need to do an actual cache query to know if something
-was already inferred. If we reach this point, but the inference flag has been turned off,
-then it's in the cache. This is purely for a performance optimization.
-"""
-already_inferred_quick_test(interp::NativeInterpreter, mi::MethodInstance) = !mi.inInference
-already_inferred_quick_test(interp::AbstractInterpreter, mi::MethodInstance) = false
+cache_owner(interp::NativeInterpreter) = nothing
 
-"""
-    lock_mi_inference(::AbstractInterpreter, mi::MethodInstance)
-
-Hint that `mi` is in inference to help accelerate bootstrapping.
-This is particularly used by `NativeInterpreter` and helps us limit the amount of wasted
-work we might do when inference is working on initially inferring itself by letting us
-detect when inference is already in progress and not running a second copy on it.
-This creates a data-race, but the entry point into this code from C (`jl_type_infer`)
-already includes detection and restriction on recursion, so it is hopefully mostly a
-benign problem, since it should really only happen during the first phase of bootstrapping
-that we encounter this flag.
-"""
-lock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
-lock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
+engine_reserve(interp::AbstractInterpreter, mi::MethodInstance) = engine_reserve(mi, cache_owner(interp))
+engine_reserve(mi::MethodInstance, @nospecialize owner) = ccall(:jl_engine_reserve, Any, (Any, Any), mi, owner)::CodeInstance
+# engine_fulfill(::AbstractInterpreter, ci::CodeInstance, src::CodeInfo) = ccall(:jl_engine_fulfill, Cvoid, (Any, Any), ci, src) # currently the same as engine_reject, so just use that one
+engine_reject(::AbstractInterpreter, ci::CodeInstance) = ccall(:jl_engine_fulfill, Cvoid, (Any, Ptr{Cvoid}), ci, C_NULL)
 
-"""
-See `lock_mi_inference`.
-"""
-unlock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
-unlock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
+function already_inferred_quick_test end
+function lock_mi_inference end
+function unlock_mi_inference end
 
 """
     add_remark!(::AbstractInterpreter, sv::InferenceState, remark)
@@ -426,7 +419,6 @@ function add_remark! end
 may_optimize(::AbstractInterpreter) = true
 may_compress(::AbstractInterpreter) = true
 may_discard_trees(::AbstractInterpreter) = true
-verbose_stmt_info(::AbstractInterpreter) = false
 
 """
     method_table(interp::AbstractInterpreter) -> MethodTableView
@@ -435,7 +427,7 @@ Returns a method table this `interp` uses for method lookup.
 External `AbstractInterpreter` can optionally return `OverlayMethodTable` here
 to incorporate customized dispatches for the overridden methods.
 """
-method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
+method_table(interp::AbstractInterpreter) = InternalMethodTable(get_inference_world(interp))
 method_table(interp::NativeInterpreter) = interp.method_table
 
 """
@@ -467,44 +459,35 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i
 ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
 optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance
 
-typeinf_lattice(interp::NativeInterpreter) = interp.irinterp ?
-    InferenceLattice(SimpleInferenceLattice.instance) :
-    InferenceLattice(BaseInferenceLattice.instance)
-ipo_lattice(interp::NativeInterpreter) = interp.irinterp ?
-    InferenceLattice(SimpleInferenceLattice.instance) :
-    InferenceLattice(IPOResultLattice.instance)
-optimizer_lattice(interp::NativeInterpreter) = SimpleInferenceLattice.instance
-
-"""
-    switch_to_irinterp(interp::AbstractInterpreter) -> irinterp::AbstractInterpreter
-
-This interface allows `ir_abstract_constant_propagation` to convert `interp` to a new
-`irinterp::AbstractInterpreter` to perform semi-concrete interpretation.
-`NativeInterpreter` uses this interface to switch its lattice to `optimizer_lattice` during
-semi-concrete interpretation on `IRCode`.
-"""
-switch_to_irinterp(interp::AbstractInterpreter) = interp
-switch_to_irinterp(interp::NativeInterpreter) = NativeInterpreter(interp; irinterp=true)
-
-"""
-    switch_from_irinterp(irinterp::AbstractInterpreter) -> interp::AbstractInterpreter
+function code_cache(interp::AbstractInterpreter)
+  cache = InternalCodeCache(cache_owner(interp))
+  worlds = WorldRange(get_inference_world(interp))
+  return WorldView(cache, worlds)
+end
 
-The inverse operation of `switch_to_irinterp`, allowing `typeinf` to convert `irinterp` back
-to a new `interp::AbstractInterpreter` to perform ordinary abstract interpretation.
-"""
-switch_from_irinterp(irinterp::AbstractInterpreter) = irinterp
-switch_from_irinterp(irinterp::NativeInterpreter) = NativeInterpreter(irinterp; irinterp=false)
+get_escape_cache(interp::AbstractInterpreter) = GetNativeEscapeCache(interp)
 
 abstract type CallInfo end
 
 @nospecialize
 
+function add_edges!(edges::Vector{Any}, info::CallInfo)
+    if info === NoCallInfo()
+        return nothing # just a minor optimization to avoid dynamic dispatch
+    end
+    add_edges_impl(edges, info)
+    nothing
+end
 nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int}
 getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult
-getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)
+getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)#=::Union{Nothing,ConstResult}=#
 
+add_edges_impl(::Vector{Any}, ::CallInfo) = error("""
+    All `CallInfo` is required to implement `add_edges_impl(::Vector{Any}, ::CallInfo)`""")
 nsplit_impl(::CallInfo) = nothing
-getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`")
+getsplit_impl(::CallInfo, ::Int) = error("""
+    A `info::CallInfo` that implements `nsplit_impl(info::CallInfo) -> Int` must implement `getsplit_impl(info::CallInfo, idx::Int) -> MethodLookupResult`
+    in order to correctly opt in to inlining""")
 getresult_impl(::CallInfo, ::Int) = nothing
 
 @specialize
diff --git a/base/compiler/typeutils.jl b/Compiler/src/typeutils.jl
similarity index 92%
rename from base/compiler/typeutils.jl
rename to Compiler/src/typeutils.jl
index 2ecc077228264..5175e00612270 100644
--- a/base/compiler/typeutils.jl
+++ b/Compiler/src/typeutils.jl
@@ -18,7 +18,7 @@ function hasuniquerep(@nospecialize t)
     iskindtype(typeof(t)) || return true # non-types are always compared by egal in the type system
     isconcretetype(t) && return true # these are also interned and pointer comparable
     if isa(t, DataType) && t.name !== Tuple.name && !isvarargtype(t) # invariant DataTypes
-        return _all(hasuniquerep, t.parameters)
+        return all(hasuniquerep, t.parameters)
     end
     return false
 end
@@ -64,7 +64,9 @@ end
 
 # Compute the minimum number of initialized fields for a particular datatype
 # (therefore also a lower bound on the number of fields)
-function datatype_min_ninitialized(t::DataType)
+function datatype_min_ninitialized(@nospecialize t0)
+    t = unwrap_unionall(t0)
+    t isa DataType || return 0
     isabstracttype(t) && return 0
     if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters[1], t.parameters[2]
@@ -95,12 +97,13 @@ end
 
 has_concrete_subtype(d::DataType) = d.flags & 0x0020 == 0x0020 # n.b. often computed only after setting the type and layout fields
 
-# determine whether x is a valid lattice element tag
+# determine whether x is a valid lattice element
 # For example, Type{v} is not valid if v is a value
-# Accepts TypeVars also, since it assumes the user will rewrap it correctly
-function valid_as_lattice(@nospecialize(x))
+# Accepts TypeVars and has_free_typevar also, since it assumes the user will rewrap it correctly
+# If astag is true, then also requires that it be a possible type tag for a valid object
+function valid_as_lattice(@nospecialize(x), astag::Bool=false)
     x === Bottom && false
-    x isa TypeVar && return valid_as_lattice(x.ub)
+    x isa TypeVar && return valid_as_lattice(x.ub, astag)
     x isa UnionAll && (x = unwrap_unionall(x))
     if x isa Union
         # the Union constructor ensures this (and we'll recheck after
@@ -111,6 +114,9 @@ function valid_as_lattice(@nospecialize(x))
         if isType(x)
             p = x.parameters[1]
             p isa Type || p isa TypeVar || return false
+        elseif astag && isstructtype(x)
+            datatype_fieldtypes(x) # force computation of has_concrete_subtype to be updated now
+            return has_concrete_subtype(x)
         end
         return true
     end
@@ -149,6 +155,7 @@ function compatible_vatuple(a::DataType, b::DataType)
 end
 
 # return an upper-bound on type `a` with type `b` removed
+# and also any contents that are not valid type tags on any objects
 # such that `return <: a` && `Union{return, b} == Union{a, b}`
 function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::Int)
     if a <: b && isnotbrokensubtype(a, b)
@@ -158,8 +165,8 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
     if isa(ua, Union)
         uua = typesubtract(rewrap_unionall(ua.a, a), b, max_union_splitting)
         uub = typesubtract(rewrap_unionall(ua.b, a), b, max_union_splitting)
-        return Union{valid_as_lattice(uua) ? uua : Union{},
-                     valid_as_lattice(uub) ? uub : Union{}}
+        return Union{valid_as_lattice(uua, true) ? uua : Union{},
+                     valid_as_lattice(uub, true) ? uub : Union{}}
     elseif a isa DataType
         ub = unwrap_unionall(b)
         if ub isa DataType
@@ -197,8 +204,6 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
     return a # TODO: improve this bound?
 end
 
-hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
-
 _typename(@nospecialize a) = Union{}
 _typename(a::TypeVar) = Core.TypeName
 function _typename(a::Union)
@@ -291,7 +296,7 @@ end
 unioncomplexity(@nospecialize x) = _unioncomplexity(x)::Int
 function _unioncomplexity(@nospecialize x)
     if isa(x, DataType)
-        x.name === Tuple.name || isvarargtype(x) || return 0
+        x.name === Tuple.name || return 0
         c = 0
         for ti in x.parameters
             c = max(c, unioncomplexity(ti))
@@ -302,7 +307,7 @@ function _unioncomplexity(@nospecialize x)
     elseif isa(x, UnionAll)
         return max(unioncomplexity(x.body), unioncomplexity(x.var.ub))
     elseif isa(x, TypeofVararg)
-        return isdefined(x, :T) ? unioncomplexity(x.T) : 0
+        return isdefined(x, :T) ? unioncomplexity(x.T) + 1 : 1
     else
         return 0
     end
@@ -317,24 +322,6 @@ function unionall_depth(@nospecialize ua) # aka subtype_env_size
     return depth
 end
 
-# convert a Union of Tuple types to a Tuple of Unions
-function unswitchtupleunion(u::Union)
-    ts = uniontypes(u)
-    n = -1
-    for t in ts
-        if t isa DataType && t.name === Tuple.name && length(t.parameters) != 0 && !isvarargtype(t.parameters[end])
-            if n == -1
-                n = length(t.parameters)
-            elseif n != length(t.parameters)
-                return u
-            end
-        else
-            return u
-        end
-    end
-    Tuple{Any[ Union{Any[(t::DataType).parameters[i] for t in ts]...} for i in 1:n ]...}
-end
-
 function unwraptv_ub(@nospecialize t)
     while isa(t, TypeVar)
         t = t.ub
diff --git a/Compiler/src/utilities.jl b/Compiler/src/utilities.jl
new file mode 100644
index 0000000000000..c322d1062cea1
--- /dev/null
+++ b/Compiler/src/utilities.jl
@@ -0,0 +1,334 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+###########
+# generic #
+###########
+
+if !@isdefined(var"@timeit")
+    # This is designed to allow inserting timers when loading a second copy
+    # of inference for performing performance experiments.
+    macro timeit(args...)
+        esc(args[end])
+    end
+end
+
+function contains_is(itr, @nospecialize(x))
+    for y in itr
+        if y === x
+            return true
+        end
+    end
+    return false
+end
+
+anymap(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i in 1:length(a) ]
+
+############
+# inlining #
+############
+
+const MAX_INLINE_CONST_SIZE = 256
+
+function count_const_size(@nospecialize(x), count_self::Bool = true)
+    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
+    if ismutable(x)
+        # No definite size
+        (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) &&
+            return MAX_INLINE_CONST_SIZE + 1
+        if isa(x, Module) || isa(x, Method) || isa(x, CodeInstance)
+            # We allow modules, methods and CodeInstance, because we already assume they are externally
+            # rooted, so we count their contents as 0 size.
+            return sizeof(Ptr{Cvoid})
+        end
+        # We allow mutable types with no mutable fields (i.e. those mutable
+        # types used for identity only). The intent of this function is to
+        # prevent the rooting of large amounts of data that may have been
+        # speculatively computed. If the struct can get mutated later, we
+        # cannot assess how much data we might end up rooting. However, if
+        # the struct is mutable only for identity, the query still works.
+        for i = 1:nfields(x)
+            if !isconst(typeof(x), i)
+                return MAX_INLINE_CONST_SIZE + 1
+            end
+        end
+    end
+    isbits(x) && return Core.sizeof(x)
+    dt = typeof(x)
+    sz = count_self ? sizeof(dt) : 0
+    sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
+    dtfd = DataTypeFieldDesc(dt)
+    for i = 1:Int(datatype_nfields(dt))
+        isdefined(x, i) || continue
+        f = getfield(x, i)
+        if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
+            continue
+        end
+        sz += count_const_size(f, dtfd[i].isptr)
+        sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
+    end
+    return sz
+end
+
+function is_inlineable_constant(@nospecialize(x))
+    return count_const_size(x) <= MAX_INLINE_CONST_SIZE
+end
+
+###########################
+# MethodInstance/CodeInfo #
+###########################
+
+invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
+use_const_api(li::CodeInstance) = invoke_api(li) == 2
+
+function get_staged(mi::MethodInstance, world::UInt)
+    may_invoke_generator(mi) || return nothing
+    cache_ci = (mi.def::Method).generator isa Core.CachedGenerator ?
+        RefValue{CodeInstance}() : nothing
+    try
+        return call_get_staged(mi, world, cache_ci)
+    catch # user code might throw errors – ignore them
+        return nothing
+    end
+end
+
+# enable caching of unoptimized generated code if the generator is `CachedGenerator`
+function call_get_staged(mi::MethodInstance, world::UInt, cache_ci::RefValue{CodeInstance})
+    token = @_gc_preserve_begin cache_ci
+    cache_ci_ptr = pointer_from_objref(cache_ci)
+    src = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{CodeInstance}), mi, world, cache_ci_ptr)
+    @_gc_preserve_end token
+    return src
+end
+function call_get_staged(mi::MethodInstance, world::UInt, ::Nothing)
+    return ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), mi, world, C_NULL)
+end
+
+function get_cached_uninferred(mi::MethodInstance, world::UInt)
+    ccall(:jl_cached_uninferred, Any, (Any, UInt), mi.cache, world)::CodeInstance
+end
+
+function retrieve_code_info(mi::MethodInstance, world::UInt)
+    def = mi.def
+    if !isa(def, Method)
+        ci = get_cached_uninferred(mi, world)
+        src = ci.inferred
+        # Inference may corrupt the src, which is fine, because this is a
+        # (short-lived) top-level thunk, but set it to NULL anyway, so we
+        # can catch it if somebody tries to read it again by accident.
+        # @atomic ci.inferred = C_NULL
+        return src
+    end
+    c = hasgenerator(def) ? get_staged(mi, world) : nothing
+    if c === nothing && isdefined(def, :source)
+        src = def.source
+        if src === nothing
+            # can happen in images built with --strip-ir
+            return nothing
+        elseif isa(src, String)
+            c = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), def, C_NULL, src)
+        else
+            c = copy(src::CodeInfo)
+        end
+    end
+    if c isa CodeInfo
+        c.parent = mi
+        return c
+    end
+    return nothing
+end
+
+function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return nothing
+    mt = ccall(:jl_method_get_table, Any, (Any,), method)
+    mt === nothing && return nothing
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
+        mt, atype, sparams, method, #=int return_if_compileable=#1)
+end
+
+
+isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) =
+    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method))
+
+isa_compileable_sig(m::MethodInstance) = (def = m.def; !isa(def, Method) || isa_compileable_sig(m.specTypes, m.sparam_vals, def))
+isa_compileable_sig(m::ABIOverride) = false
+
+has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v) != 0
+
+"""
+    is_declared_inline(method::Method) -> Bool
+
+Check if `method` is declared as `@inline`.
+"""
+is_declared_inline(method::Method) = _is_declared_inline(method, true)
+
+"""
+    is_declared_noinline(method::Method) -> Bool
+
+Check if `method` is declared as `@noinline`.
+"""
+is_declared_noinline(method::Method) = _is_declared_inline(method, false)
+
+function _is_declared_inline(method::Method, inline::Bool)
+    isdefined(method, :source) || return false
+    src = method.source
+    isa(src, MaybeCompressed) || return false
+    return (inline ? is_declared_inline : is_declared_noinline)(src)
+end
+
+"""
+    is_aggressive_constprop(method::Union{Method,CodeInfo}) -> Bool
+
+Check if `method` is declared as `Base.@constprop :aggressive`.
+"""
+is_aggressive_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x01
+
+"""
+    is_no_constprop(method::Union{Method,CodeInfo}) -> Bool
+
+Check if `method` is declared as `Base.@constprop :none`.
+"""
+is_no_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x02
+
+#########
+# types #
+#########
+
+@nospecializeinfer function singleton_type(@nospecialize(ft))
+    ft = widenslotwrapper(ft)
+    if isa(ft, Const)
+        return ft.val
+    elseif isconstType(ft)
+        return ft.parameters[1]
+    elseif issingletontype(ft)
+        return ft.instance
+    end
+    return nothing
+end
+
+@nospecializeinfer function maybe_singleton_const(@nospecialize(t))
+    if isa(t, DataType)
+        if issingletontype(t)
+            return Const(t.instance)
+        elseif isconstType(t)
+            return Const(t.parameters[1])
+        end
+    end
+    return t
+end
+
+###################
+# SSAValues/Slots #
+###################
+
+function ssamap(f, @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, SSAValue)
+            op[] = f(val)
+        end
+    end
+    return urs[]
+end
+
+function foreachssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, SSAValue)
+            f(val)
+        end
+    end
+end
+
+function foreach_anyssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, AnySSAValue)
+            f(val)
+        end
+    end
+end
+
+function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
+    uses = BitSet[ BitSet() for i = 1:nvals ]
+    for line in 1:length(body)
+        e = body[line]
+        if isa(e, ReturnNode)
+            isdefined(e, :val) || continue
+            e = e.val
+        elseif isa(e, GotoIfNot)
+            e = e.cond
+        end
+        if isa(e, SSAValue)
+            push!(uses[e.id], line)
+        elseif isa(e, Expr)
+            find_ssavalue_uses!(uses, e, line)
+        elseif isa(e, PhiNode)
+            find_ssavalue_uses!(uses, e, line)
+        end
+    end
+    return uses
+end
+
+function find_ssavalue_uses!(uses::Vector{BitSet}, e::Expr, line::Int)
+    head = e.head
+    is_meta_expr_head(head) && return
+    skiparg = (head === :(=))
+    for a in e.args
+        if skiparg
+            skiparg = false
+        elseif isa(a, SSAValue)
+            push!(uses[a.id], line)
+        elseif isa(a, Expr)
+            find_ssavalue_uses!(uses, a, line)
+        end
+    end
+end
+
+function find_ssavalue_uses!(uses::Vector{BitSet}, e::PhiNode, line::Int)
+    values = e.values
+    for i = 1:length(values)
+        isassigned(values, i) || continue
+        val = values[i]
+        if isa(val, SSAValue)
+            push!(uses[val.id], line)
+        end
+    end
+end
+
+# using a function to ensure we can infer this
+@inline function slot_id(s)
+    isa(s, SlotNumber) && return s.id
+    return (s::Argument).n
+end
+
+###########
+# options #
+###########
+
+inlining_enabled() = (JLOptions().can_inline == 1)
+
+function coverage_enabled(m::Module)
+    generating_output() && return false # don't alter caches
+    cov = JLOptions().code_coverage
+    if cov == 1 # user
+        m = moduleroot(m)
+        m === Core && return false
+        isdefined(Main, :Base) && m === Main.Base && return false
+        return true
+    elseif cov == 2 # all
+        return true
+    end
+    return false
+end
+
+function inbounds_option()
+    opt_check_bounds = JLOptions().check_bounds
+    opt_check_bounds == 0 && return :default
+    opt_check_bounds == 1 && return :on
+    return :off
+end
+
+is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
diff --git a/base/compiler/validation.jl b/Compiler/src/validation.jl
similarity index 88%
rename from base/compiler/validation.jl
rename to Compiler/src/validation.jl
index 68eb2ab15c59d..6700aa8d4508f 100644
--- a/base/compiler/validation.jl
+++ b/Compiler/src/validation.jl
@@ -9,23 +9,23 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :(&) => 1:1,
     :(=) => 2:2,
     :method => 1:4,
-    :const => 1:1,
+    :const => 1:2,
     :new => 1:typemax(Int),
     :splatnew => 2:2,
     :the_exception => 0:0,
-    :enter => 1:1,
-    :leave => 1:1,
+    :leave => 1:typemax(Int),
     :pop_exception => 1:1,
     :inbounds => 1:1,
     :inline => 1:1,
     :noinline => 1:1,
-    :boundscheck => 0:0,
+    :boundscheck => 0:1,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
     :global => 1:1,
+    :globaldecl => 2:2,
     :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects), args..., roots...
     :cfunction => 5:5,
-    :isdefined => 1:1,
+    :isdefined => 1:2,
     :code_coverage_effect => 0:0,
     :loopinfo => 0:typemax(Int),
     :gc_preserve_begin => 0:typemax(Int),
@@ -34,7 +34,12 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :throw_undef_if_not => 2:2,
     :aliasscope => 0:0,
     :popaliasscope => 0:0,
-    :new_opaque_closure => 4:typemax(Int)
+    :new_opaque_closure => 5:typemax(Int),
+    :import => 1:typemax(Int),
+    :using => 1:typemax(Int),
+    :export => 1:typemax(Int),
+    :public => 1:typemax(Int),
+    :latestworld => 0:0,
 )
 
 # @enum isn't defined yet, otherwise I'd use it for this
@@ -61,20 +66,20 @@ struct InvalidCodeError <: Exception
 end
 InvalidCodeError(kind::AbstractString) = InvalidCodeError(kind, nothing)
 
-function validate_code_in_debug_mode(linfo::MethodInstance, src::CodeInfo, kind::String)
-    if JLOptions().debug_level == 2
-        # this is a debug build of julia, so let's validate linfo
-        errors = validate_code(linfo, src)
+function maybe_validate_code(mi::MethodInstance, src::CodeInfo, kind::String)
+    if is_asserts()
+        errors = validate_code(mi, src)
         if !isempty(errors)
             for e in errors
-                if linfo.def isa Method
+                if mi.def isa Method
                     println(stderr, "WARNING: Encountered invalid ", kind, " code for method ",
-                            linfo.def, ": ", e)
+                            mi.def, ": ", e)
                 else
                     println(stderr, "WARNING: Encountered invalid ", kind, " code for top level expression in ",
-                            linfo.def, ": ", e)
+                            mi.def, ": ", e)
                 end
             end
+            error("")
         end
     end
 end
@@ -144,7 +149,7 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
             elseif head === :call || head === :invoke || x.head === :invoke_modify ||
                 head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
-                head === :const || head === :enter || head === :leave || head === :pop_exception ||
+                head === :const || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
                 head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
@@ -160,6 +165,13 @@ function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_le
                 push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.cond))
             end
             validate_val!(x.cond)
+        elseif isa(x, EnterNode)
+            if isdefined(x, :scope)
+                if !is_valid_argument(x.scope)
+                    push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.scope))
+                end
+                validate_val!(x.scope)
+            end
         elseif isa(x, ReturnNode)
             if isdefined(x, :val)
                 if !is_valid_return(x.val)
@@ -230,11 +242,11 @@ end
 
 validate_code(args...) = validate_code!(Vector{InvalidCodeError}(), args...)
 
-is_valid_lvalue(@nospecialize(x)) = isa(x, UnoptSlot) || isa(x, GlobalRef)
+is_valid_lvalue(@nospecialize(x)) = isa(x, SlotNumber) || isa(x, GlobalRef)
 
 function is_valid_argument(@nospecialize(x))
-    if isa(x, UnoptSlot) || isa(x, Argument) || isa(x, SSAValue) ||
-       isa(x, GlobalRef) || isa(x, QuoteNode) || isexpr(x, (:static_parameter, :boundscheck)) ||
+    if isa(x, SlotNumber) || isa(x, Argument) || isa(x, SSAValue) ||
+       isa(x, GlobalRef) || isa(x, QuoteNode) || (isa(x, Expr) && is_value_pos_expr_head(x.head))  ||
        isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
        isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
@@ -246,12 +258,12 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call,
+        :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast,
+        :new_opaque_closure)
         return true
     end
     return false
 end
 
 is_valid_return(@nospecialize(x)) = is_valid_argument(x) || (isa(x, Expr) && x.head === :lambda)
-
-is_flag_set(byte::UInt8, flag::UInt8) = (byte & flag) == flag
diff --git a/Compiler/test/AbstractInterpreter.jl b/Compiler/test/AbstractInterpreter.jl
new file mode 100644
index 0000000000000..533eaf93937a3
--- /dev/null
+++ b/Compiler/test/AbstractInterpreter.jl
@@ -0,0 +1,536 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("irutils.jl")
+include("newinterp.jl")
+
+# interpreter that performs abstract interpretation only
+# (semi-concrete interpretation should be disabled automatically)
+@newinterp AbsIntOnlyInterp1
+Compiler.may_optimize(::AbsIntOnlyInterp1) = false
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp1()) >: IO
+
+# it should work even if the interpreter discards inferred source entirely
+@newinterp AbsIntOnlyInterp2
+Compiler.may_optimize(::AbsIntOnlyInterp2) = false
+Compiler.transform_result_for_cache(::AbsIntOnlyInterp2, ::Compiler.InferenceResult) = nothing
+@test Base.infer_return_type(Base.init_stdio, (Ptr{Cvoid},); interp=AbsIntOnlyInterp2()) >: IO
+
+# OverlayMethodTable
+# ==================
+
+using Base.Experimental: @MethodTable, @overlay, @consistent_overlay
+
+# @overlay method with return type annotation
+@MethodTable RT_METHOD_DEF
+@overlay RT_METHOD_DEF Base.sin(x::Float64)::Float64 = cos(x)
+@overlay RT_METHOD_DEF function Base.sin(x::T)::T where T<:AbstractFloat
+    cos(x)
+end
+
+@newinterp MTOverlayInterp
+@MethodTable OVERLAY_MT
+Compiler.method_table(interp::MTOverlayInterp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), OVERLAY_MT)
+
+function Compiler.add_remark!(interp::MTOverlayInterp, ::Compiler.InferenceState, remark)
+    if interp.meta !== nothing
+        # Core.println(remark)
+        push!(interp.meta, remark)
+    end
+    return nothing
+end
+
+struct StrangeSinError end
+strangesin(x) = sin(x)
+@overlay OVERLAY_MT strangesin(x::Float64) =
+    iszero(x) ? throw(StrangeSinError()) : x < 0 ? nothing : cos(x)
+
+# inference should use the overlayed method table
+@test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> only === Union{Float64,Nothing}
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    @invoke strangesin(x::Float64)
+end |> only === Union{Float64,Nothing}
+@test only(Base.return_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{Float64,Nothing}
+@test Base.infer_exception_type(strangesin, (Float64,); interp=MTOverlayInterp()) === Union{StrangeSinError,DomainError}
+@test only(Base.infer_exception_types(strangesin, (Float64,); interp=MTOverlayInterp())) === Union{StrangeSinError,DomainError}
+@test last(only(code_typed(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
+@test last(only(Base.code_ircode(strangesin, (Float64,); interp=MTOverlayInterp()))) === Union{Float64,Nothing}
+
+# effect analysis should figure out that the overlayed method is used
+@test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> !Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,); interp=MTOverlayInterp()) do x
+    @invoke strangesin(x::Float64)
+end |> !Compiler.is_nonoverlayed
+
+# account for overlay possibility in unanalyzed matching method
+callstrange(::Float64) = strangesin(x)
+callstrange(::Number) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
+callstrange(::Any) = 1.0
+callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
+let interp = MTOverlayInterp(Set{Any}())
+    matches = Compiler.findall(Tuple{typeof(callstrange),Any}, Compiler.method_table(interp))
+    @test matches !== nothing
+    @test Compiler.length(matches) == 3
+    @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Compiler.is_nonoverlayed
+    @test "Call inference reached maximally imprecise information: bailing on doing more abstract inference." in interp.meta
+end
+
+# but it should never apply for the native compilation
+@test Base.infer_effects((Float64,)) do x
+    strangesin(x)
+end |> Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,)) do x
+    @invoke strangesin(x::Float64)
+end |> Compiler.is_nonoverlayed
+
+# fallback to the internal method table
+@test Base.return_types((Int,); interp=MTOverlayInterp()) do x
+    cos(x)
+end |> only === Float64
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    @invoke cos(x::Float64)
+end |> only === Float64
+
+# not fully covered overlay method match
+overlay_match(::Any) = nothing
+@overlay OVERLAY_MT overlay_match(::Int) = missing
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    overlay_match(x)
+end |> only === Union{Nothing,Missing}
+
+# partial concrete evaluation
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    isbitstype(Int) ? nothing : missing
+end |> only === Nothing
+Base.@assume_effects :terminates_locally function issue41694(x)
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    issue41694(3) == 6 ? nothing : missing
+end |> only === Nothing
+
+# disable partial concrete evaluation when tainted by any overlayed call
+Base.@assume_effects :total totalcall(f, args...) = f(args...)
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    if totalcall(strangesin, 1.0) == cos(1.0)
+        return nothing
+    else
+        return missing
+    end
+end |> only === Nothing
+
+# override `:native_executable` to allow concrete-eval for overlay-ed methods
+function myfactorial(x::Int, raise)
+    res = 1
+    0 ≤ x < 20 || raise("x is too big")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+raise_on_gpu1(x) = error(x)
+@overlay OVERLAY_MT @noinline raise_on_gpu1(x) = #=do something with GPU=# error(x)
+raise_on_gpu2(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline raise_on_gpu2(x) = #=do something with GPU=# error(x)
+raise_on_gpu3(x) = error(x)
+@consistent_overlay OVERLAY_MT @noinline Base.@assume_effects :foldable raise_on_gpu3(x) = #=do something with GPU=# error_on_gpu(x)
+cpu_factorial(x::Int) = myfactorial(x, error)
+gpu_factorial1(x::Int) = myfactorial(x, raise_on_gpu1)
+gpu_factorial2(x::Int) = myfactorial(x, raise_on_gpu2)
+gpu_factorial3(x::Int) = myfactorial(x, raise_on_gpu3)
+
+@test Base.infer_effects(cpu_factorial, (Int,); interp=MTOverlayInterp()) |> Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial1, (Int,); interp=MTOverlayInterp()) |> !Compiler.is_nonoverlayed
+@test Base.infer_effects(gpu_factorial2, (Int,); interp=MTOverlayInterp()) |> Compiler.is_consistent_overlay
+let effects = Base.infer_effects(gpu_factorial3, (Int,); interp=MTOverlayInterp())
+    # check if `@consistent_overlay` together works with `@assume_effects`
+    # N.B. the overlaid `raise_on_gpu3` is not :foldable otherwise since `error_on_gpu` is (intetionally) undefined.
+    @test Compiler.is_consistent_overlay(effects)
+    @test Compiler.is_foldable(effects)
+end
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial2(3))
+end == Val{6}
+@test Base.infer_return_type(; interp=MTOverlayInterp()) do
+    Val(gpu_factorial3(3))
+end == Val{6}
+
+# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
+# https://github.com/JuliaLang/julia/issues/48097
+@newinterp Issue48097Interp
+@MethodTable ISSUE_48097_MT
+Compiler.method_table(interp::Issue48097Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), ISSUE_48097_MT)
+function Compiler.concrete_eval_eligible(interp::Issue48097Interp,
+    @nospecialize(f), result::Compiler.MethodCallResult, arginfo::Compiler.ArgInfo, sv::Compiler.AbsIntState)
+    ret = @invoke Compiler.concrete_eval_eligible(interp::Compiler.AbstractInterpreter,
+        f::Any, result::Compiler.MethodCallResult, arginfo::Compiler.ArgInfo, sv::Compiler.AbsIntState)
+    if ret === :semi_concrete_eval
+        # disable semi-concrete interpretation
+        return :none
+    end
+    return ret
+end
+@overlay ISSUE_48097_MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
+issue48097(; kwargs...) = return 42
+@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
+    issue48097(; a=1f0, b=1.0)
+end
+
+# https://github.com/JuliaLang/julia/issues/52938
+@newinterp Issue52938Interp
+@MethodTable ISSUE_52938_MT
+Compiler.method_table(interp::Issue52938Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), ISSUE_52938_MT)
+inner52938(x, types::Type, args...; kwargs...) = x
+outer52938(x) = @inline inner52938(x, Tuple{}; foo=Ref(42), bar=1)
+@test fully_eliminated(outer52938, (Any,); interp=Issue52938Interp(), retval=Argument(2))
+
+# https://github.com/JuliaGPU/CUDA.jl/issues/2241
+@newinterp Cuda2241Interp
+@MethodTable CUDA_2241_MT
+Compiler.method_table(interp::Cuda2241Interp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), CUDA_2241_MT)
+inner2241(f, types::Type, args...; kwargs...) = nothing
+function outer2241(f)
+    @inline inner2241(f, Tuple{}; foo=Ref(42), bar=1)
+    return nothing
+end
+# NOTE CUDA.jl overlays `throw_boundserror` in a way that causes effects, but these effects
+#      are ignored for this call graph at the `@assume_effects` annotation on `typejoin`.
+#      Here it's important to use `@consistent_overlay` to avoid tainting the `:nonoverlayed` bit.
+const cuda_kernel_state = Ref{Any}()
+@consistent_overlay CUDA_2241_MT @inline Base.throw_boundserror(A, I) =
+    (cuda_kernel_state[] = (A, I); error())
+@test fully_eliminated(outer2241, (Nothing,); interp=Cuda2241Interp(), retval=nothing)
+
+# Should not concrete-eval overlayed methods in semi-concrete interpretation
+@newinterp OverlaySinInterp
+@MethodTable OVERLAY_SIN_MT
+Compiler.method_table(interp::OverlaySinInterp) = Compiler.OverlayMethodTable(Compiler.get_inference_world(interp), OVERLAY_SIN_MT)
+overlay_sin1(x) = error("Not supposed to be called.")
+@overlay OVERLAY_SIN_MT overlay_sin1(x) = cos(x)
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin1(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+_overlay_sin2(x) = error("Not supposed to be called.")
+@overlay OVERLAY_SIN_MT _overlay_sin2(x) = cos(x)
+overlay_sin2(x) = _overlay_sin2(x)
+@overlay OVERLAY_SIN_MT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin2(x)
+let ir = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test oc() == cos(0.)
+end
+
+# AbstractLattice
+# ===============
+
+using Core: SlotNumber, Argument
+using .Compiler: slot_id, tmerge_fast_path
+import .Compiler:
+    AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice,
+    widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice,
+    widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn
+
+@newinterp TaintInterpreter
+struct TaintLattice{PL<:AbstractLattice} <: Compiler.AbstractLattice
+    parent::PL
+end
+Compiler.widenlattice(𝕃::TaintLattice) = 𝕃.parent
+Compiler.is_valid_lattice_norec(::TaintLattice, @nospecialize(elm)) = isa(elm, Taint)
+
+struct InterTaintLattice{PL<:AbstractLattice} <: Compiler.AbstractLattice
+    parent::PL
+end
+Compiler.widenlattice(𝕃::InterTaintLattice) = 𝕃.parent
+Compiler.is_valid_lattice_norec(::InterTaintLattice, @nospecialize(elm)) = isa(elm, InterTaint)
+
+const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}}
+
+Compiler.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance))
+Compiler.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance))
+Compiler.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance)
+
+struct Taint
+    typ
+    slots::BitSet
+    function Taint(@nospecialize(typ), slots::BitSet)
+        if typ isa Taint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+Taint(@nospecialize(typ), id::Int) = Taint(typ, push!(BitSet(), id))
+function Base.:(==)(a::Taint, b::Taint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+struct InterTaint
+    typ
+    slots::BitSet
+    function InterTaint(@nospecialize(typ), slots::BitSet)
+        if typ isa InterTaint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+InterTaint(@nospecialize(typ), id::Int) = InterTaint(typ, push!(BitSet(), id))
+function Base.:(==)(a::InterTaint, b::InterTaint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+const AnyTaint = Union{Taint, InterTaint}
+
+function Compiler.tmeet(𝕃::AnyTaintLattice, @nospecialize(v), @nospecialize(t::Type))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(v, T)
+        v = v.typ
+    end
+    return tmeet(widenlattice(𝕃), v, t)
+end
+function Compiler.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    r = tmerge_fast_path(𝕃, typea, typeb)
+    r !== nothing && return r
+    # type-lattice for Taint
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            return T(
+                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
+                typea.slots ∪ typeb.slots)
+        else
+            typea = typea.typ
+        end
+    elseif isa(typeb, T)
+        typeb = typeb.typ
+    end
+    return tmerge(widenlattice(𝕃), typea, typeb)
+end
+function Compiler.:⊑(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            typea.slots ⊆ typeb.slots || return false
+            return ⊑(widenlattice(𝕃), typea.typ, typeb.typ)
+        end
+        typea = typea.typ
+    elseif isa(typeb, T)
+        return false
+    end
+    return ⊑(widenlattice(𝕃), typea, typeb)
+end
+Compiler.widenconst(taint::AnyTaint) = widenconst(taint.typ)
+
+function Compiler.abstract_eval_special_value(interp::TaintInterpreter,
+    @nospecialize(e), sstate::Compiler.StatementState, sv::Compiler.InferenceState)
+    ret = @invoke Compiler.abstract_eval_special_value(interp::Compiler.AbstractInterpreter,
+        e::Any, sstate::Compiler.StatementState, sv::Compiler.InferenceState)
+    if isa(e, SlotNumber) || isa(e, Argument)
+        return Taint(ret, slot_id(e))
+    end
+    return ret
+end
+
+function Compiler.widenreturn(𝕃::InferenceLattice{<:InterTaintLattice}, @nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::Compiler.VarTable)
+    if isa(rt, Taint)
+        return InterTaint(rt.typ, BitSet((id for id in rt.slots if id ≤ nargs)))
+    end
+    return Compiler.widenreturn(widenlattice(𝕃), rt, bestguess, nargs, slottypes, changes)
+end
+
+@test Compiler.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
+
+# code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
+
+# External lattice without `Conditional`
+
+import .Compiler:
+    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice,
+    typeinf_lattice, ipo_lattice, optimizer_lattice
+
+@newinterp NonconditionalInterpreter
+Compiler.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+Compiler.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+Compiler.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice())
+
+@test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x
+    c = isa(x, Int) || isa(x, Float64)
+    if c
+        return x
+    else
+        return nothing
+    end
+end |> only === Any
+
+# CallInfo × inlining
+# ===================
+
+@newinterp NoinlineInterpreter
+noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
+
+import .Compiler: CallInfo
+
+struct NoinlineCallInfo <: CallInfo
+    info::CallInfo # wrapped call
+end
+Compiler.add_edges_impl(edges::Vector{Any}, info::NoinlineCallInfo) = Compiler.add_edges!(edges, info.info)
+Compiler.nsplit_impl(info::NoinlineCallInfo) = Compiler.nsplit(info.info)
+Compiler.getsplit_impl(info::NoinlineCallInfo, idx::Int) = Compiler.getsplit(info.info, idx)
+Compiler.getresult_impl(info::NoinlineCallInfo, idx::Int) = Compiler.getresult(info.info, idx)
+
+function Compiler.abstract_call(interp::NoinlineInterpreter,
+    arginfo::Compiler.ArgInfo, si::Compiler.StmtInfo, sv::Compiler.InferenceState, max_methods::Int)
+    ret = @invoke Compiler.abstract_call(interp::Compiler.AbstractInterpreter,
+        arginfo::Compiler.ArgInfo, si::Compiler.StmtInfo, sv::Compiler.InferenceState, max_methods::Int)
+    return Compiler.Future{Compiler.CallMeta}(ret, interp, sv) do ret, interp, sv
+        if sv.mod in noinline_modules(interp)
+            (;rt, exct, effects, info) = ret
+            return Compiler.CallMeta(rt, exct, effects, NoinlineCallInfo(info))
+        end
+        return ret
+    end
+end
+function Compiler.src_inlining_policy(interp::NoinlineInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    if isa(info, NoinlineCallInfo)
+        return false
+    end
+    return @invoke Compiler.src_inlining_policy(interp::Compiler.AbstractInterpreter,
+        src::Any, info::CallInfo, stmt_flag::UInt32)
+end
+
+@inline function inlined_usually(x, y, z)
+    return x * y + z
+end
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
+
+# check if the inlining algorithm works as expected
+let src = code_typed1((Float64,Float64,Float64)) do x, y, z
+        inlined_usually(x, y, z)
+    end
+    @test count(isinvoke(:inlined_usually), src.code) == 0
+    @test count(iscall((src, inlined_usually)), src.code) == 0
+end
+let NoinlineModule = Module()
+    OtherModule = Module()
+    main_func(x, y, z) = inlined_usually(x, y, z)
+    @eval NoinlineModule noinline_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval OtherModule other_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval NoinlineModule bar_split_error() = $foo_split(Core.compilerbarrier(:type, nothing))
+
+    interp = NoinlineInterpreter(Set((NoinlineModule,)))
+
+    # this anonymous function's context is Main -- it should be inlined as usual
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # it should work for cached results
+    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
+    mi = Compiler.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
+    @test Compiler.haskey(Compiler.code_cache(interp), mi)
+    let src = code_typed1(main_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # now the context module is `NoinlineModule` -- it should not be inlined
+    let src = code_typed1(NoinlineModule.noinline_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 1
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # the context module is totally irrelevant -- it should be inlined as usual
+    let src = code_typed1(OtherModule.other_func, (Float64,Float64,Float64); interp)
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    let src = code_typed1(NoinlineModule.bar_split_error)
+        @test count(iscall((src, foo_split)), src.code) == 0
+        @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+    end
+end
+
+# custom inferred data
+# ====================
+
+@newinterp CustomDataInterp
+struct CustomDataInterpToken end
+Compiler.cache_owner(::CustomDataInterp) = CustomDataInterpToken()
+struct CustomData
+    inferred
+    CustomData(@nospecialize inferred) = new(inferred)
+end
+function Compiler.transform_result_for_cache(interp::CustomDataInterp, result::Compiler.InferenceResult)
+    inferred_result = @invoke Compiler.transform_result_for_cache(
+        interp::Compiler.AbstractInterpreter, result::Compiler.InferenceResult)
+    return CustomData(inferred_result)
+end
+function Compiler.src_inlining_policy(interp::CustomDataInterp, @nospecialize(src),
+                            @nospecialize(info::Compiler.CallInfo), stmt_flag::UInt32)
+    if src isa CustomData
+        src = src.inferred
+    end
+    return @invoke Compiler.src_inlining_policy(interp::Compiler.AbstractInterpreter, src::Any,
+                                          info::Compiler.CallInfo, stmt_flag::UInt32)
+end
+Compiler.retrieve_ir_for_inlining(cached_result::CodeInstance, src::CustomData) =
+    Compiler.retrieve_ir_for_inlining(cached_result, src.inferred)
+Compiler.retrieve_ir_for_inlining(mi::MethodInstance, src::CustomData, preserve_local_sources::Bool) =
+    Compiler.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
+let src = code_typed((Int,); interp=CustomDataInterp()) do x
+        return sin(x) + cos(x)
+    end |> only |> first
+    @test count(isinvoke(:sin), src.code) == 1
+    @test count(isinvoke(:cos), src.code) == 1
+    @test count(isinvoke(:+), src.code) == 0
+end
+
+# ephemeral cache mode
+@newinterp DebugInterp #=ephemeral_cache=#true
+func_ext_cache1(a) = func_ext_cache2(a) * cos(a)
+func_ext_cache2(a) = sin(a)
+let interp = DebugInterp()
+    @test Base.infer_return_type(func_ext_cache1, (Float64,); interp) === Float64
+    @test isdefined(interp, :code_cache)
+    found = false
+    for (mi, codeinst) in interp.code_cache.dict
+        if mi.def.name === :func_ext_cache2
+            found = true
+            break
+        end
+    end
+    @test found
+end
diff --git a/Compiler/test/CompilerLoadingTest/Manifest.toml b/Compiler/test/CompilerLoadingTest/Manifest.toml
new file mode 100644
index 0000000000000..7fb3452a61017
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/Manifest.toml
@@ -0,0 +1,16 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "10c2816629fed766649b89eb6670e7001df6ea18"
+
+[[deps.Compiler]]
+path = "../.."
+uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
+version = "0.0.1"
+
+[[deps.CompilerLoadingTest]]
+deps = ["Compiler"]
+path = "."
+uuid = "95defb8a-f82d-44d7-b2c9-37d658f648c1"
+version = "0.0.0"
diff --git a/Compiler/test/CompilerLoadingTest/Project.toml b/Compiler/test/CompilerLoadingTest/Project.toml
new file mode 100644
index 0000000000000..5dca932dc7997
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/Project.toml
@@ -0,0 +1,5 @@
+name = "CompilerLoadingTest"
+uuid = "95defb8a-f82d-44d7-b2c9-37d658f648c1"
+
+[deps]
+Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1"
diff --git a/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl b/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl
new file mode 100644
index 0000000000000..a09f7751912b8
--- /dev/null
+++ b/Compiler/test/CompilerLoadingTest/compiler_loading_test.jl
@@ -0,0 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, UUIDs
+
+# This file is loaded as part of special_loading.jl
+Base.compilecache(Base.PkgId(UUID(0x95defb8a_f82d_44d7_b2c9_37d658f648c1), "CompilerLoadingTest"))
+
+using CompilerLoadingTest
+@test Base.maybe_loaded_precompile(Base.PkgId(UUID(0x807dbc54_b67e_4c79_8afb_eafe4df6f2e1), "Compiler"), Base.module_build_id(Base.Compiler)) !== nothing
+
+using Compiler
+@test CompilerLoadingTest.Compiler === Compiler === Base.Compiler
diff --git a/stdlib/LinearAlgebra/test/runtests.jl b/Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
similarity index 50%
rename from stdlib/LinearAlgebra/test/runtests.jl
rename to Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
index 29581313c18d5..61f8417a23251 100644
--- a/stdlib/LinearAlgebra/test/runtests.jl
+++ b/Compiler/test/CompilerLoadingTest/src/CompilerLoadingTest.jl
@@ -1,5 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-for file in readlines(joinpath(@__DIR__, "testgroups"))
-    include(file * ".jl")
+module CompilerLoadingTest
+    using Compiler
 end
diff --git a/Compiler/test/EAUtils.jl b/Compiler/test/EAUtils.jl
new file mode 100644
index 0000000000000..990a7de3b8141
--- /dev/null
+++ b/Compiler/test/EAUtils.jl
@@ -0,0 +1,359 @@
+module EAUtils
+
+export code_escapes, @code_escapes, __clear_cache!
+
+include("setup_Compiler.jl")
+
+using .Compiler: EscapeAnalysis as EA
+
+# AbstractInterpreter
+# -------------------
+
+# imports
+import .Compiler:
+    AbstractInterpreter, NativeInterpreter, WorldView, WorldRange, InferenceParams,
+    OptimizationParams, get_world_counter, get_inference_cache, ipo_dataflow_analysis!
+# usings
+using Core.IR
+using .Compiler: InferenceResult, InferenceState, OptimizationState, IRCode
+using .EA: analyze_escapes, ArgEscapeCache, ArgEscapeInfo, EscapeInfo, EscapeState
+
+mutable struct EscapeAnalyzerCacheToken end
+global GLOBAL_EA_CACHE_TOKEN::EscapeAnalyzerCacheToken = EscapeAnalyzerCacheToken()
+
+struct EscapeResultForEntry
+    ir::IRCode
+    estate::EscapeState
+    mi::MethodInstance
+end
+
+mutable struct EscapeAnalyzer <: AbstractInterpreter
+    const world::UInt
+    const inf_params::InferenceParams
+    const opt_params::OptimizationParams
+    const inf_cache::Vector{InferenceResult}
+    const token::EscapeAnalyzerCacheToken
+    const entry_mi::Union{Nothing,MethodInstance}
+    result::EscapeResultForEntry
+    function EscapeAnalyzer(world::UInt, cache_token::EscapeAnalyzerCacheToken;
+                            entry_mi::Union{Nothing,MethodInstance}=nothing)
+        inf_params = InferenceParams()
+        opt_params = OptimizationParams()
+        inf_cache = InferenceResult[]
+        return new(world, inf_params, opt_params, inf_cache, cache_token, entry_mi)
+    end
+end
+
+Compiler.InferenceParams(interp::EscapeAnalyzer) = interp.inf_params
+Compiler.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params
+Compiler.get_inference_world(interp::EscapeAnalyzer) = interp.world
+Compiler.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
+Compiler.cache_owner(interp::EscapeAnalyzer) = interp.token
+Compiler.get_escape_cache(::EscapeAnalyzer) = GetEscapeCache()
+
+function Compiler.ipo_dataflow_analysis!(interp::EscapeAnalyzer, opt::OptimizationState,
+                                         ir::IRCode, caller::InferenceResult)
+    # run EA on all frames that have been optimized
+    nargs = Int(opt.src.nargs)
+    𝕃ₒ = Compiler.optimizer_lattice(interp)
+    estate = try
+        analyze_escapes(ir, nargs, 𝕃ₒ, GetEscapeCache())
+    catch err
+        @error "error happened within EA, inspect `Main.failedanalysis`"
+        failedanalysis = FailedAnalysis(caller, ir, nargs)
+        Core.eval(Main, :(failedanalysis = $failedanalysis))
+        rethrow(err)
+    end
+    if caller.linfo === interp.entry_mi
+        # return back the result
+        interp.result = EscapeResultForEntry(Compiler.copy(ir), estate, caller.linfo)
+    end
+    record_escapes!(caller, estate, ir)
+
+    @invoke Compiler.ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                            ir::IRCode, caller::InferenceResult)
+end
+
+# cache entire escape state for inspection and debugging
+struct EscapeCacheInfo
+    argescapes::ArgEscapeCache
+    state::EscapeState # preserved just for debugging purpose
+    ir::IRCode         # preserved just for debugging purpose
+end
+
+function record_escapes!(caller::InferenceResult, estate::EscapeState, ir::IRCode)
+    argescapes = ArgEscapeCache(estate)
+    ecacheinfo = EscapeCacheInfo(argescapes, estate, ir)
+    return Compiler.stack_analysis_result!(caller, ecacheinfo)
+end
+
+struct GetEscapeCache end
+function (::GetEscapeCache)(codeinst::Union{CodeInstance,MethodInstance})
+    codeinst isa CodeInstance || return false
+    ecacheinfo = Compiler.traverse_analysis_results(codeinst) do @nospecialize result
+        return result isa EscapeCacheInfo ? result : nothing
+    end
+    return ecacheinfo === nothing ? false : ecacheinfo.argescapes
+end
+
+struct FailedAnalysis
+    caller::InferenceResult
+    ir::IRCode
+    nargs::Int
+end
+
+# printing
+# --------
+
+using Core: Argument, SSAValue
+using .Compiler: widenconst, singleton_type
+
+function get_name_color(x::EscapeInfo, symbol::Bool = false)
+    getname(x) = string(nameof(x))
+    if x === EA.⊥
+        name, color = (getname(EA.NotAnalyzed), "◌"), :plain
+    elseif EA.has_no_escape(EA.ignore_argescape(x))
+        if EA.has_arg_escape(x)
+            name, color = (getname(EA.ArgEscape), "✓"), :cyan
+        else
+            name, color = (getname(EA.NoEscape), "✓"), :green
+        end
+    elseif EA.has_all_escape(x)
+        name, color = (getname(EA.AllEscape), "X"), :red
+    elseif EA.has_return_escape(x)
+        name = (getname(EA.ReturnEscape), "↑")
+        color = EA.has_thrown_escape(x) ? :yellow : :blue
+    else
+        name = (nothing, "*")
+        color = EA.has_thrown_escape(x) ? :yellow : :bold
+    end
+    name = symbol ? last(name) : first(name)
+    if name !== nothing && !isa(x.AliasInfo, Bool)
+        name = string(name, "′")
+    end
+    return name, color
+end
+
+# pcs = sprint(show, collect(x.EscapeSites); context=:limit=>true)
+function Base.show(io::IO, x::EscapeInfo)
+    name, color = get_name_color(x)
+    if isnothing(name)
+        @invoke show(io::IO, x::Any)
+    else
+        printstyled(io, name; color)
+    end
+end
+
+function get_sym_color(x::ArgEscapeInfo)
+    escape_bits = x.escape_bits
+    if escape_bits == EA.ARG_ALL_ESCAPE
+        color, sym = :red, "X"
+    elseif escape_bits == 0x00
+        color, sym = :green, "✓"
+    else
+        color, sym = :bold, "*"
+        if !iszero(escape_bits & EA.ARG_RETURN_ESCAPE)
+            color, sym = :blue, "↑"
+        end
+        if !iszero(escape_bits & EA.ARG_THROWN_ESCAPE)
+            color = :yellow
+        end
+    end
+    return sym, color
+end
+
+function Base.show(io::IO, x::ArgEscapeInfo)
+    escape_bits = x.escape_bits
+    if escape_bits == EA.ARG_ALL_ESCAPE
+        color, sym = :red, "X"
+    elseif escape_bits == 0x00
+        color, sym = :green, "✓"
+    else
+        color, sym = :bold, "*"
+        if !iszero(escape_bits & EA.ARG_RETURN_ESCAPE)
+            color, sym = :blue, "↑"
+        end
+        if !iszero(escape_bits & EA.ARG_THROWN_ESCAPE)
+            color = :yellow
+        end
+    end
+    printstyled(io, "ArgEscapeInfo(", sym, ")"; color)
+end
+
+struct EscapeResult
+    ir::IRCode
+    state::EscapeState
+    mi::Union{Nothing,MethodInstance}
+    slotnames::Union{Nothing,Vector{Symbol}}
+    source::Bool
+    interp::Union{Nothing,EscapeAnalyzer}
+    function EscapeResult(ir::IRCode, state::EscapeState,
+                          mi::Union{Nothing,MethodInstance}=nothing,
+                          slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                          source::Bool=false,
+                          interp::Union{Nothing,EscapeAnalyzer}=nothing)
+        return new(ir, state, mi, slotnames, source, interp)
+    end
+end
+Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
+@eval Base.iterate(res::EscapeResult, state=1) =
+    return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
+
+Base.show(io::IO, ecacheinfo::EscapeCacheInfo) = show(io, EscapeResult(ecacheinfo.ir, ecacheinfo.state))
+
+# adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
+function print_with_info(io::IO, result::EscapeResult)
+    (; ir, state, mi, slotnames, source) = result
+    # print escape information on SSA values
+    function preprint(io::IO)
+        ft = ir.argtypes[1]
+        f = singleton_type(ft)
+        if f === nothing
+            f = widenconst(ft)
+        end
+        print(io, f, '(')
+        for i in 1:state.nargs
+            arg = state[Argument(i)]
+            i == 1 && continue
+            c, color = get_name_color(arg, true)
+            slot = isnothing(slotnames) ? "_$i" : slotnames[i]
+            printstyled(io, c, ' ', slot, "::", ir.argtypes[i]; color)
+            i ≠ state.nargs && print(io, ", ")
+        end
+        print(io, ')')
+        if !isnothing(mi)
+            def = mi.def
+            printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
+        end
+        println(io)
+    end
+
+    # print escape information on SSA values
+    # nd = ndigits(length(ssavalues))
+    function preprint(io::IO, idx::Int)
+        c, color = get_name_color(state[SSAValue(idx)], true)
+        # printstyled(io, lpad(idx, nd), ' ', c, ' '; color)
+        printstyled(io, rpad(c, 2), ' '; color)
+    end
+
+    print_with_info(preprint, (args...)->nothing, io, ir, source)
+end
+
+function print_with_info(preprint, postprint, io::IO, ir::IRCode, source::Bool)
+    io = IOContext(io, :displaysize=>displaysize(io))
+    used = Compiler.IRShow.stmts_used(io, ir)
+    if source
+        line_info_preprinter = function (io::IO, indent::String, idx::Int)
+            r = Compiler.IRShow.inline_linfo_printer(ir)(io, indent, idx)
+            idx ≠ 0 && preprint(io, idx)
+            return r
+        end
+    else
+        line_info_preprinter = Compiler.IRShow.lineinfo_disabled
+    end
+    line_info_postprinter = Compiler.IRShow.default_expr_type_printer
+    preprint(io)
+    bb_idx_prev = bb_idx = 1
+    for idx = 1:length(ir.stmts)
+        preprint(io, idx)
+        bb_idx = Compiler.IRShow.show_ir_stmt(io, ir, idx, line_info_preprinter, line_info_postprinter, ir.sptypes, used, ir.cfg, bb_idx)
+        postprint(io, idx, bb_idx != bb_idx_prev)
+        bb_idx_prev = bb_idx
+    end
+    max_bb_idx_size = ndigits(length(ir.cfg.blocks))
+    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
+    postprint(io)
+    return nothing
+end
+
+# entries
+# -------
+
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+
+"""
+    @code_escapes [options...] f(args...)
+
+Evaluates the arguments to the function call, determines its types, and then calls
+[`code_escapes`](@ref) on the resulting expression.
+As with `@code_typed` and its family, any of `code_escapes` keyword arguments can be given
+as the optional arguments like `@code_escapes optimize=false myfunc(myargs...)`.
+"""
+macro code_escapes(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_escapes, ex0)
+end
+
+"""
+    code_escapes(f, argtypes=Tuple{}; [world::UInt], [debuginfo::Symbol]) -> result::EscapeResult
+    code_escapes(mi::MethodInstance; [world::UInt], [interp::EscapeAnalyzer], [debuginfo::Symbol]) -> result::EscapeResult
+
+Runs the escape analysis on optimized IR of a generic function call with the given type signature,
+while caching the analysis results.
+
+# Keyword Arguments
+
+- `world::UInt = Base.get_world_counter()`:
+  controls the world age to use when looking up methods, use current world age if not specified.
+- `cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN`:
+  specifies the cache token to use, by default a global token is used so that the analysis
+  can use the caches from previous invocations. If you with to use a fresh cache and perform
+  a new analysis, specify a new `EscapeAnalyzerCacheToken` instance.
+- `interp::EscapeAnalyzer = EscapeAnalyzer(world, cache_token)`:
+  specifies the escape analyzer to use.
+- `debuginfo::Symbol = :none`:
+  controls the amount of code metadata present in the output, possible options are `:none` or `:source`.
+"""
+function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      debuginfo::Symbol = :none)
+    tt = Base.signature_type(f, types)
+    match = Base._which(tt; world, raise=true)
+    mi = Compiler.specialize_method(match)
+    return code_escapes(mi; world, cache_token, debuginfo)
+end
+
+function code_escapes(mi::MethodInstance;
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      interp::EscapeAnalyzer=EscapeAnalyzer(world, cache_token; entry_mi=mi),
+                      debuginfo::Symbol = :none)
+    frame = Compiler.typeinf_frame(interp, mi, #=run_optimizer=#true)
+    isdefined(interp, :result) || error("optimization didn't happen: maybe everything has been constant folded?")
+    slotnames = let src = frame.src
+        src isa CodeInfo ? src.slotnames : nothing
+    end
+    return EscapeResult(interp.result.ir, interp.result.estate, interp.result.mi,
+                        slotnames, debuginfo === :source, interp)
+end
+
+"""
+    code_escapes(ir::IRCode, nargs::Int; [world::UInt], [interp::AbstractInterpreter]) -> result::EscapeResult
+
+Runs the escape analysis on `ir::IRCode`.
+`ir` is supposed to be optimized already, specifically after inlining has been applied.
+Note that this version does not cache the analysis results.
+
+# Keyword Arguments
+
+- `world::UInt = Base.get_world_counter()`:
+  controls the world age to use when looking up methods, use current world age if not specified.
+- `cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN`:
+  specifies the cache token to use, by default a global token is used so that the analysis
+  can use the caches from previous invocations. If you with to use a fresh cache and perform
+  a new analysis, specify a new `EscapeAnalyzerCacheToken` instance.
+- `interp::AbstractInterpreter = EscapeAnalyzer(world, cache_token)`:
+  specifies the abstract interpreter to use, by default a new `EscapeAnalyzer` with an empty cache is created.
+"""
+function code_escapes(ir::IRCode, nargs::Int;
+                      world::UInt = get_world_counter(),
+                      cache_token::EscapeAnalyzerCacheToken = GLOBAL_EA_CACHE_TOKEN,
+                      interp::AbstractInterpreter=EscapeAnalyzer(world, cache_token))
+    estate = analyze_escapes(ir, nargs, Compiler.optimizer_lattice(interp), Compiler.get_escape_cache(interp))
+    return EscapeResult(ir, estate) # return back the result
+end
+
+# in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
+__clear_cache!() = empty!(GLOBAL_EA_CODE_CACHE)
+
+end # module EAUtils
diff --git a/Compiler/test/EscapeAnalysis.jl b/Compiler/test/EscapeAnalysis.jl
new file mode 100644
index 0000000000000..60364769c95a8
--- /dev/null
+++ b/Compiler/test/EscapeAnalysis.jl
@@ -0,0 +1,1711 @@
+module test_EA
+
+include("irutils.jl")
+
+const EscapeAnalysis = Compiler.EscapeAnalysis
+
+include("EAUtils.jl")
+
+using Test, .EscapeAnalysis, .EAUtils
+using .EscapeAnalysis: ignore_argescape
+
+let utils_ex = quote
+        mutable struct SafeRef{T}
+            x::T
+        end
+        Base.getindex(s::SafeRef) = getfield(s, 1)
+        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+
+        mutable struct SafeRefs{S,T}
+            x1::S
+            x2::T
+        end
+        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
+        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
+
+        global GV::Any
+        const global GR = Ref{Any}()
+    end
+    global function EATModule(utils_ex = utils_ex)
+        M = Module()
+        Core.eval(M, utils_ex)
+        return M
+    end
+    Core.eval(@__MODULE__, utils_ex)
+end
+
+using .EscapeAnalysis: EscapeInfo, IndexableFields
+
+isϕ(@nospecialize x) = isa(x, Core.PhiNode)
+"""
+    is_load_forwardable(x::EscapeInfo) -> Bool
+
+Queries if `x` is elibigle for store-to-load forwarding optimization.
+"""
+function is_load_forwardable(x::EscapeInfo)
+    AliasInfo = x.AliasInfo
+    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
+    # but we can also do equivalent check during forwarding
+    return isa(AliasInfo, IndexableFields)
+end
+
+@testset "EAUtils" begin
+    @test_throws "everything has been constant folded" code_escapes() do; sin(42); end
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+end
+
+@testset "basics" begin
+    let # arg return
+        result = code_escapes((Any,)) do a # return to caller
+            println("prevent ConstABI")
+            return nothing
+        end
+        @test has_arg_escape(result.state[Argument(2)])
+        # return
+        result = code_escapes((Any,)) do a
+            println("prevent ConstABI")
+            return a
+        end
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_arg_escape(result.state[Argument(1)]) # self
+        @test !has_return_escape(result.state[Argument(1)], i) # self
+        @test has_arg_escape(result.state[Argument(2)]) # a
+        @test has_return_escape(result.state[Argument(2)], i) # a
+    end
+    let # global store
+        result = code_escapes((Any,)) do a
+            global GV = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let # global load
+        result = code_escapes() do
+            global GV
+            return GV
+        end
+        i = only(findall(has_return_escape, map(i->result.state[SSAValue(i)], 1:length(result.ir.stmts))))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # global store / load (https://github.com/aviatesk/EscapeAnalysis.jl/issues/56)
+        result = code_escapes((Any,)) do s
+            global GV
+            GV = s
+            return GV
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+    end
+    let # :gc_preserve_begin / :gc_preserve_end
+        result = code_escapes((String,)) do s
+            m = SafeRef(s)
+            GC.@preserve m begin
+                println(s)
+                return nothing
+            end
+        end
+        i = findfirst(==(SafeRef{String}), result.ir.stmts.type) # find allocation statement
+        @test !isnothing(i)
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+    let # :isdefined
+        result = code_escapes((String, Bool,)) do a, b
+            if b
+                s = Ref(a)
+            end
+            return @isdefined(s)
+        end
+        i = findfirst(==(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
+        @test isnothing(i) || has_no_escape(result.state[SSAValue(i)])
+    end
+    let # ϕ-node
+        result = code_escapes((Bool,Any,Any)) do cond, a, b
+            c = cond ? a : b # ϕ(a, b)
+            return c
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], i) # a
+        @test has_return_escape(result.state[Argument(4)], i) # b
+    end
+    let # π-node
+        result = code_escapes((Any,)) do a
+            if isa(a, Regex) # a::π(Regex)
+                return a
+            end
+            return nothing
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.stmt)
+        @test any(findall(isreturn, result.ir.stmts.stmt)) do i
+            has_return_escape(result.state[Argument(2)], i)
+        end
+    end
+    let # φᶜ-node / ϒ-node
+        result = code_escapes((Any,String)) do a, b
+            local x::String
+            try
+                x = a
+            catch err
+                x = b
+            end
+            return x
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.stmt)
+        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], i)
+        @test has_return_escape(result.state[Argument(3)], i)
+    end
+    let # branching
+        result = code_escapes((Any,Bool,)) do a, c
+            if c
+                return nothing # a doesn't escape in this branch
+            else
+                return a # a escapes to a caller
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # loop
+        result = code_escapes((Int,)) do n
+            c = SafeRef{Bool}(false)
+            while n > 0
+                rand(Bool) && return c
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+    let # try/catch
+        result = code_escapes((Any,)) do a
+            try
+                println("prevent ConstABI")
+                nothing
+            catch err
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            try
+                println("prevent ConstABI")
+                nothing
+            finally
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # :foreigncall
+        result = code_escapes((Any,)) do x
+            ccall(:some_ccall, Any, (Any,), x)
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+end
+
+@testset "builtins" begin
+    let # throw
+        r = code_escapes((Any,)) do a
+            throw(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # implicit throws
+        r = code_escapes((Any,)) do a
+            getfield(a, :may_not_field)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+
+        r = code_escapes((Any,)) do a
+            sizeof(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # :===
+        result = code_escapes((Bool, SafeRef{String})) do cond, s
+            m = cond ? s : nothing
+            c = m === nothing
+            return c
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # sizeof
+        result = code_escapes((Vector{Any},)) do xs
+            sizeof(xs)
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # ifelse
+        result = code_escapes((Bool,)) do c
+            r = ifelse(c, Ref("yes"), Ref("no"))
+            return r
+        end
+        inds = findall(isnew, result.ir.stmts.stmt)
+        @assert !isempty(inds)
+        for i in inds
+            @test has_return_escape(result.state[SSAValue(i)])
+        end
+    end
+    let # ifelse (with constant condition)
+        result = code_escapes() do
+            r = ifelse(true, Ref("yes"), Ref(nothing))
+            return r
+        end
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{String}
+                @test has_return_escape(result.state[SSAValue(i)])
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{Nothing}
+                @test has_no_escape(result.state[SSAValue(i)])
+            end
+        end
+    end
+
+    let # typeassert
+        result = code_escapes((Any,)) do x
+            y = x::Base.RefValue{Any}
+            return y
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+    end
+
+    let # isdefined
+        result = code_escapes((Any,)) do x
+            isdefined(x, :foo) ? x : throw("undefined")
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+    end
+end
+
+@testset "flow-sensitivity" begin
+    # ReturnEscape
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            if cond
+                return cond
+            end
+            return r
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt)
+        @assert length(rts) == 2
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
+    end
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            cnt = 0
+            while rand(Bool)
+                cnt += 1
+                rand(Bool) && return r
+            end
+            rand(Bool) && return r
+            return cnt
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt) # return statement
+        @assert length(rts) == 3
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
+    end
+end
+
+@testset "escape through exceptions" begin
+    M = @eval Module() begin
+        unsafeget(x) = isassigned(x) ? x[] : throw(x)
+        @noinline function escape_rethrow!()
+            try
+                rethrow()
+            catch err
+                GR[] = err
+            end
+        end
+        @noinline function escape_current_exceptions!()
+            excs = Base.current_exceptions()
+            GR[] = excs
+        end
+        const GR = Ref{Any}()
+        @__MODULE__
+    end
+
+    let # simple: return escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                ret = err
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+
+    let # simple: global escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret # prevent DCE
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                global GV = err
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # account for possible escapes via nested throws
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    throw(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    rethrow(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_rethrow!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            local t
+            try
+                r = Ref{String}()
+                t = unsafeget(r)
+            catch err
+                t = typeof(err)
+                escape_rethrow!()
+            end
+            return t
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                GR[] = Base.current_exceptions()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_current_exceptions!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # contextual: escape information imposed on `err` shouldn't propagate to `r2`, but only to `r1`
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err
+                global GV = err
+            end
+            s2 = unsafeget(r2)
+            return s2, r2
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test !has_all_escape(result.state[SSAValue(i2)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+    end
+
+    # XXX test cases below are currently broken because of the technical reason described in `escape_exception!`
+
+    let # limited propagation: exception is caught within a frame => doesn't escape to a caller
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r) # TODO? see `escape_exception!`
+    end
+    let # sequential: escape information imposed on `err1` and `err2 should propagate separately
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err1
+                global GV = err1
+            end
+            try
+                s2 = unsafeget(r2)
+                ret = sizeof(s2)
+            catch err2
+                ret = err2
+            end
+            return ret
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+        @test_broken !has_all_escape(result.state[SSAValue(i2)]) # TODO? see `escape_exception!`
+    end
+    let # nested: escape information imposed on `inner` shouldn't propagate to `s`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                try
+                    ret = sizeof(s)
+                catch inner
+                    return inner
+                end
+            catch outer
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)])
+    end
+    let # merge: escape information imposed on `err1` and `err2 should be merged
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err1
+                return err1
+            end
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err2
+                return err2
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rs = findall(isreturn, result.ir.stmts.stmt)
+        @test_broken !has_all_escape(result.state[SSAValue(i)])
+        for r in rs
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let # no exception handling: should keep propagating the escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            finally
+                if !@isdefined(ret)
+                    ret = 42
+                end
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+    end
+end
+
+@testset "field analysis / alias analysis" begin
+    # escaped allocations
+    # -------------------
+
+    # escaped object should escape its fields as well
+    let result = code_escapes((Any,)) do a
+            global GV = SafeRef{Any}(a)
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            global GV = (a,)
+            nothing
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            o0 = SafeRef{Any}(a)
+            global GV = SafeRef(o0)
+            nothing
+        end
+        is = findall(isnew, result.ir.stmts.stmt)
+        @test length(is) == 2
+        i0, i1 = is
+        @test has_all_escape(result.state[SSAValue(i0)])
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            t0 = (a,)
+            global GV = (t0,)
+            nothing
+        end
+        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.stmt)
+        @assert length(inds) == 2
+        for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # global escape through `setfield!`
+    let result = code_escapes((Any,)) do a
+            r = SafeRef{Any}(:init)
+            global GV = r
+            r[] = a
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            global GV = r
+            r[] = b
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)]) # a
+        @test has_all_escape(result.state[Argument(3)]) # b
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef(Ref(""))
+            $code_escapes((Base.RefValue{String},)) do s
+                Rx[] = s
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef{Any}(nothing)
+            $code_escapes((Base.RefValue{String},)) do s
+                setfield!(Rx, :x, s)
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let M = EATModule()
+        @eval M module ___xxx___
+            import ..SafeRef
+            const Rx = SafeRef("Rx")
+        end
+        result = @eval M begin
+            $code_escapes((String,)) do s
+                rx = getfield(___xxx___, :Rx)
+                rx[] = s
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+
+    # field escape
+    # ------------
+
+    # field escape should propagate to :new arguments
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o = SafeRef(a)
+            Core.donotdelete(o)
+            return o[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            t = SafeRef((a,))
+            f = t[][1]
+            return f
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String})) do a, b
+            obj = SafeRefs(a, b)
+            Core.donotdelete(obj)
+            fld1 = obj[1]
+            fld2 = obj[2]
+            return (fld1, fld2)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # field escape should propagate to `setfield!` argument
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o = SafeRef(Ref("foo"))
+            Core.donotdelete(o)
+            o[] = a
+            return o[]
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # propagate escape information imposed on return value of `setfield!` call
+    let result = code_escapes((Base.RefValue{String},)) do a
+            obj = SafeRef(Ref("foo"))
+            Core.donotdelete(obj)
+            return (obj[] = a)
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # nested allocations
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1 = SafeRef(a)
+            o2 = SafeRef(o1)
+            return o2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{String}
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{SafeRef{String}}
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1 = (a,)
+            o2 = (o1,)
+            return o2[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{String}
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{Tuple{String}}
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o1  = SafeRef(a)
+            o2  = SafeRef(o1)
+            o1′ = o2[]
+            a′  = o1′[]
+            return a′
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes() do
+            o1 = SafeRef("foo")
+            o2 = SafeRef(o1)
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes() do
+            o1   = SafeRef("foo")
+            o2′  = SafeRef(nothing)
+            o2   = SafeRef{SafeRef}(o2′)
+            o2[] = o1
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        findall(1:length(result.ir.stmts)) do i
+            if isnew(result.ir.stmts[i][:stmt])
+                t = result.ir.stmts[i][:type]
+                return t === SafeRef{String}  || # o1
+                       t === SafeRef{SafeRef}    # o2
+            end
+            return false
+        end |> x->foreach(x) do i
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes((Base.RefValue{String},)) do x
+            o = Ref(x)
+            Core.donotdelete(o)
+            broadcast(identity, o)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # ϕ-node allocations
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ = SafeRef{Any}(x)
+            else
+                ϕ = SafeRef{Any}(y)
+            end
+            return ϕ[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        i = only(findall(isϕ, result.ir.stmts.stmt))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ2 = ϕ1 = SafeRef{Any}(x)
+            else
+                ϕ2 = ϕ1 = SafeRef{Any}(y)
+            end
+            return ϕ1[], ϕ2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    # when ϕ-node merges values with different types
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},Base.RefValue{String})) do cond, x, y, z
+            local out
+            if cond
+                ϕ = SafeRef(x)
+                out = ϕ[]
+            else
+                ϕ = SafeRefs(z, y)
+            end
+            return @isdefined(out) ? out : throw(ϕ)
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.stmt))
+        ϕ = only(findall(==(Union{SafeRef{Base.RefValue{String}},SafeRefs{Base.RefValue{String},Base.RefValue{String}}}), result.ir.stmts.type))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test !has_return_escape(result.state[Argument(4)], r) # y
+        @test has_return_escape(result.state[Argument(5)], r) # z
+        @test has_thrown_escape(result.state[SSAValue(ϕ)], t)
+    end
+
+    # alias analysis
+    # --------------
+
+    # alias via getfield & Expr(:new)
+    let result = code_escapes((String,)) do s
+            r = SafeRef(s)
+            Core.donotdelete(r)
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((String,)) do s
+                r = SafeRef(Rx)
+                Core.donotdelete(r)
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test has_all_escape(result.state[Argument(2)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via getfield & setfield!
+    let result = code_escapes((String,)) do s
+            r = Ref{String}()
+            Core.donotdelete(r)
+            r[] = s
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref(s)
+            r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
+            r2[] = r1
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
+            r2[] = r1
+            r1[] = s
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+        result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            r1[] = s
+            r2[] = r1
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((SafeRef{String}, String,)) do _rx, s
+                r = SafeRef(_rx)
+                Core.donotdelete(r)
+                r[] = Rx
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = findfirst(isnew, result.ir.stmts.stmt)
+        @test has_all_escape(result.state[Argument(3)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via typeassert
+    let result = code_escapes((Any,)) do a
+            r = a::Base.RefValue{String}
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test isaliased(Argument(2), val, result.state)       # a <-> r
+    end
+    let result = code_escapes((Any,)) do a
+            global GV
+            (g::SafeRef{Any})[] = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # alias via ifelse
+    let result = code_escapes((Bool,Any,Any)) do c, a, b
+            r = ifelse(c, a, b)
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # a
+        @test has_return_escape(result.state[Argument(4)], r) # b
+        @test !isaliased(Argument(2), val, result.state)      # c <!-> r
+        @test isaliased(Argument(3), val, result.state)       # a <-> r
+        @test isaliased(Argument(4), val, result.state)       # b <-> r
+    end
+    let result = @eval EATModule() begin
+            const Lx, Rx = SafeRef("Lx"), SafeRef("Rx")
+            $code_escapes((Bool,String,)) do c, a
+                r = ifelse(c, Lx, Rx)
+                r[] = a
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(3)]) # a
+    end
+    # alias via ϕ-node
+    let result = code_escapes((Bool,Base.RefValue{String})) do cond, x
+            if cond
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
+            else
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
+            end
+            ϕ2[] = x
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test isaliased(Argument(3), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((Bool,Bool,Base.RefValue{String})) do cond1, cond2, x
+            if cond1
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
+            else
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
+            end
+            cond2 && (ϕ2[] = x)
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(4)], r) # x
+        @test isaliased(Argument(4), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.stmt)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    # alias via π-node
+    let result = code_escapes((Any,)) do x
+            if isa(x, Base.RefValue{String})
+                return x
+            end
+            throw("error!")
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        rval = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # x
+        @test isaliased(Argument(2), rval, result.state)
+    end
+    let result = code_escapes((String,)) do x
+            global GV
+            l = g
+            if isa(l, SafeRef{String})
+                l[] = x
+            end
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)]) # x
+    end
+    # circular reference
+    let result = code_escapes() do
+            x = Ref{Any}()
+            x[] = x
+            return x[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+    let result = @eval Module() begin
+            const Rx = Ref{Any}()
+            Rx[] = Rx
+            $code_escapes() do
+                r = Rx[]::Base.RefValue{Any}
+                return r[]
+            end
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.stmt)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = @eval Module() begin
+            @noinline function genr()
+                r = Ref{Any}()
+                r[] = r
+                return r
+            end
+            $code_escapes() do
+                x = genr()
+                return x[]
+            end
+        end
+        i = only(findall(isinvoke(:genr), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # dynamic semantics
+    # -----------------
+
+    # conservatively handle untyped objects
+    let result = @eval code_escapes((Any,Any,)) do T, x
+            obj = $(Expr(:new, :T, :x))
+        end
+        t = only(findall(isnew, result.ir.stmts.stmt))
+        @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
+        @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x, :y))
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x))
+            setfield!(obj, :x, y)
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+
+    # conservatively handle unknown field:
+    # all fields should be escaped, but the allocation itself doesn't need to be escaped
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
+            obj = SafeRef(a)
+            return getfield(obj, fld)
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
+            obj = SafeRefs(a, b)
+            return getfield(obj, fld) # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
+            obj = SafeRefs(a, b)
+            return obj[idx] # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
+            setfield!(obj, fld, a)
+            return obj[2] # should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
+            setfield!(obj, fld, a)
+            return obj[1] # this should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
+        obj = SafeRefs(Ref("a"), Ref("b"))
+            obj[idx] = a
+            return obj[2] # should escape `a`
+        end
+        i = last(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+
+    # interprocedural
+    # ---------------
+
+    let result = @eval EATModule() begin
+            @noinline getx(obj) = obj[]
+            $code_escapes((Base.RefValue{String},)) do a
+                obj = SafeRef(a)
+                fld = getx(obj)
+                return fld
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(2)], r)
+        # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
+        @test_broken is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # TODO interprocedural alias analysis
+    let result = code_escapes((SafeRef{Base.RefValue{String}},)) do s
+            s[] = Ref("bar")
+            global GV = s[]
+            nothing
+        end
+        @test_broken !has_all_escape(result.state[Argument(2)])
+    end
+
+    # aliasing between arguments
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x = SafeRef("init")
+                setxy!(x, y)
+                return x
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x1 = SafeRef("init")
+                x2 = SafeRef(y)
+                Core.donotdelete(x1, x2)
+                setxy!(x1, x2[])
+                return x1
+            end
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[SSAValue(i1)], r)
+        @test !has_return_escape(result.state[SSAValue(i2)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline mysetindex!(x, a) = x[1] = a
+            const Ax = Vector{Any}(undef, 1)
+            $code_escapes((Base.RefValue{String},)) do s
+                mysetindex!(Ax, s)
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)]) # s
+    end
+
+    # TODO flow-sensitivity?
+    # ----------------------
+
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            Core.donotdelete(r)
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
+            r[] = a
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any,Bool)) do a, b, cond
+            r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
+            if cond
+                r[] = a
+                return r[]
+            else
+                r[] = b
+                return nothing
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        r = only(findall(result.ir.stmts.stmt) do @nospecialize x
+            isreturn(x) && isa(x.val, Core.SSAValue)
+        end)
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test_broken !has_return_escape(result.state[Argument(3)], r) # b
+    end
+
+    # handle conflicting field information correctly
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
+            if cnd
+                o = SafeRef(Ref("foo"))
+            else
+                o = SafeRefs(Ref("bar"), baz)
+                r = getfield(o, 2)
+            end
+            if cnd
+                o = o::SafeRef
+                setfield!(o, 1, qux)
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+        for new in findall(isnew, result.ir.stmts.stmt)
+            if !(result.ir[SSAValue(new)][:type] <: Base.RefValue)
+                @test is_load_forwardable(result.state[SSAValue(new)])
+            end
+        end
+    end
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
+            if cnd
+                o = SafeRefs(Ref("foo"), Ref("bar"))
+                r = setfield!(o, 2, baz)
+            else
+                o = SafeRef(qux)
+            end
+            if !cnd
+                o = o::SafeRef
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+    end
+
+    # foreigncall should disable field analysis
+    let result = code_escapes((Any,Nothing,Int,UInt)) do t, mt, lim, world
+            ambig = false
+            min = Ref{UInt}(typemin(UInt))
+            max = Ref{UInt}(typemax(UInt))
+            has_ambig = Ref{Int32}(0)
+            mt = ccall(:jl_matching_methods, Any,
+                (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ref{Int32}),
+                t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+            return mt, has_ambig[]
+        end
+        for i in findall(isnew, result.ir.stmts.stmt)
+            @test !is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+end
+
+# demonstrate the power of our field / alias analysis with a realistic end to end example
+abstract type AbstractPoint{T} end
+mutable struct MPoint{T} <: AbstractPoint{T}
+    x::T
+    y::T
+end
+add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y)
+function compute(T, ax, ay, bx, by)
+    a = T(ax, ay)
+    b = T(bx, by)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # replaceable
+    end
+    a.x, a.y
+end
+let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
+                 return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+function compute(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # unreplaceable (aliased to the call argument `a`)
+    end
+    a.x, a.y
+end
+# let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+#     idxs = findall(1:length(result.ir.stmts)) do idx
+#         inst = result.ir[SSAValue(idx)]
+#         stmt = inst[:stmt]
+#         return isnew(stmt) && inst[:type] <: MPoint
+#     end
+#     @assert length(idxs) == 2
+#     @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
+# end
+function compute!(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b)  # replaceable
+        a′ = add(c, b) # replaceable
+        a.x = a′.x
+        a.y = a′.y
+    end
+end
+let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
+                 return isnew(stmt) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+
+# demonstrate a simple type level analysis can sometimes improve the analysis accuracy
+# by compensating the lack of yet unimplemented analyses
+@testset "special-casing bitstype" begin
+    let result = code_escapes((Nothing,)) do a
+            global GV = a
+        end
+        @test !(has_all_escape(result.state[Argument(2)]))
+    end
+
+    let result = code_escapes((Int,)) do a
+            o = SafeRef(a)
+            Core.donotdelete(o)
+            return o[]
+        end
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # an escaped tuple stmt will not propagate to its Int argument (since `Int` is of bitstype)
+    let result = code_escapes((Int,Any,)) do a, b
+            t = tuple(a, b)
+            return t
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test !has_return_escape(result.state[Argument(2)], r)
+        @test has_return_escape(result.state[Argument(3)], r)
+    end
+end
+
+# interprocedural analysis
+# ========================
+
+# propagate escapes imposed on call arguments
+@noinline broadcast_noescape2(b) = broadcast(identity, b)
+let result = code_escapes() do
+        broadcast_noescape2(Ref(Ref("Hi")))
+    end
+    i = last(findall(isnew, result.ir.stmts.stmt))
+    @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # IDEA embed const-prop'ed `CodeInstance` for `:invoke`?
+end
+let result = code_escapes((Base.RefValue{Base.RefValue{String}},)) do x
+        out1 = broadcast_noescape2(Ref(Ref("Hi")))
+        out2 = broadcast_noescape2(x)
+        return out1, out2
+    end
+    i = last(findall(isnew, result.ir.stmts.stmt))
+    @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # IDEA embed const-prop'ed `CodeInstance` for `:invoke`?
+    @test has_thrown_escape(result.state[Argument(2)])
+end
+@noinline allescape_argument(a) = (global GV = a) # obvious escape
+let result = code_escapes() do
+        allescape_argument(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_all_escape(result.state[SSAValue(i)])
+end
+# if we can't determine the matching method statically, we should be conservative
+let result = code_escapes((Ref{Any},)) do a
+        may_exist(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Ref{Any},)) do a
+        Base.@invokelatest broadcast_noescape1(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+
+# handling of simple union-split (just exploit the inliner's effort)
+@noinline unionsplit_noescape(a)      = string(nothing)
+@noinline unionsplit_noescape(a::Int) = a + 10
+let result = code_escapes((Union{Int,Nothing},)) do x
+        s = SafeRef{Union{Int,Nothing}}(x)
+        unionsplit_noescape(s[])
+        return nothing
+    end
+    inds = findall(isnew, result.ir.stmts.stmt) # find allocation statement
+    @assert !isempty(inds)
+    for i in inds
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+end
+
+@noinline unused_argument(a) = (println("prevent inlining"); nothing)
+let result = code_escapes() do
+        a = Ref("foo") # shouldn't be "return escape"
+        b = unused_argument(a)
+        nothing
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+
+    result = code_escapes() do
+        a = Ref("foo") # still should be "return escape"
+        b = unused_argument(a)
+        return a
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+
+# should propagate escape information imposed on return value to the aliased call argument
+@noinline returnescape_argument(a) = (println("prevent inlining"); a)
+let result = code_escapes() do
+        obj = Ref("foo")           # should be "return escape"
+        ret = returnescape_argument(obj)
+        return ret                 # alias of `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
+let result = code_escapes() do
+        obj = Ref("foo")              # better to not be "return escape"
+        ret = noreturnescape_argument(obj)
+        return ret                    # must not alias to `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+end
+
+function with_self_aliased(from_bb::Int, succs::Vector{Int})
+    worklist = Int[from_bb]
+    visited = BitSet(from_bb)
+    function visit!(bb::Int)
+        if bb ∉ visited
+            push!(visited, bb)
+            push!(worklist, bb)
+        end
+    end
+    while !isempty(worklist)
+        foreach(visit!, succs)
+    end
+    return visited
+end
+@test code_escapes(with_self_aliased) isa EAUtils.EscapeResult
+
+# accounts for ThrownEscape via potential MethodError
+
+# no method error
+@noinline identity_if_string(x::SafeRef{<:AbstractString}) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},)) do x
+        identity_if_string(x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((SafeRef,)) do x
+        identity_if_string(x)
+    end
+    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)
+    @test_broken !has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test !has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Union{SafeRef{String},Vector{String}},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+# method ambiguity error
+@noinline ambig_error_test(a::SafeRef, b) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b::SafeRef) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        ambig_error_test(x, y)
+    end
+    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)  # x
+    @test has_thrown_escape(result.state[Argument(3)], i)  # y
+    @test_broken !has_return_escape(result.state[Argument(2)], r)  # x
+    @test_broken !has_return_escape(result.state[Argument(3)], r)  # y
+end
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        try
+            ambig_error_test(x, y)
+        catch err
+            global GV = err
+        end
+    end
+    @test has_all_escape(result.state[Argument(2)])  # x
+    @test has_all_escape(result.state[Argument(3)])  # y
+end
+
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+@test (@code_escapes scope_folding()) isa EAUtils.EscapeResult
+@test (@code_escapes scope_folding_opt()) isa EAUtils.EscapeResult
+
+end # module test_EA
diff --git a/Compiler/test/abioverride.jl b/Compiler/test/abioverride.jl
new file mode 100644
index 0000000000000..da9b1f92786e5
--- /dev/null
+++ b/Compiler/test/abioverride.jl
@@ -0,0 +1,60 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Base.Meta
+include("irutils.jl")
+
+# In this test, we will manually construct a CodeInstance that specializes the `myplus`
+# method on a constant for the second argument and test various, interfaces surrounding
+# CodeInstances with ABI overrides.
+myplus(x::Int, y::Int) = x + y
+
+struct SecondArgConstOverride
+    arg2::Int
+end
+
+function is_known_call(@nospecialize(x), @nospecialize(func), src::Core.CodeInfo)
+    isexpr(x, :call) || return false
+    ft = Compiler.argextype(x.args[1], src, Compiler.VarState[])
+    return Compiler.singleton_type(ft) === func
+end
+
+
+# Construct a CodeInstance with an ABI override
+let world = Base.tls_world_age()
+    # Get some inferred source code to give to the compiler
+    # Do not look at a CodeInstance here, since those fields are only valid to
+    # use while attached to a cache, and are thus invalid to make copies of
+    # (since you'd have to have made the copy to insert into the cache before
+    # making the original CodeInstance to copy from, which is obviously
+    # rather temporally-challenged)
+    new_source = only(code_typed(myplus, (Int, Int)))[1]
+    mi = new_source.parent
+    ## Sanity check
+    @assert length(new_source.code) == 2
+    add = new_source.code[1]
+    @assert is_known_call(add, Core.Intrinsics.add_int, new_source) && add.args[3] == Core.Argument(3)
+
+    ## Replace x + y by x + 1
+    add.args[3] = 1
+
+    ## Remove the argument
+    resize!(new_source.slotnames, 2)
+    resize!(new_source.slotflags, 2)
+
+    # Construct the CodeInstance from the modified CodeInfo data
+    global new_ci = Core.CodeInstance(Core.ABIOverride(Tuple{typeof(myplus), Int}, mi),
+        #=owner=#SecondArgConstOverride(1), new_source.rettype, Any#=new_source.exctype is missing=#,
+        #=inferred_const=#nothing, #=code=#nothing, #=const_flags=#Int32(0),
+        new_source.min_world, new_source.max_world, #=new_source.ipo_purity_bits is missing=#UInt32(0),
+        #=analysis_results=#nothing, new_source.debuginfo, new_source.edges)
+
+    # Poke the CI into the global cache
+    # This isn't necessary, but does conveniently give it the mandatory permanent GC-root before calling `invoke`
+    ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, new_ci)
+
+    # Poke the source code into the JIT for it
+    ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), new_ci, new_source)
+end
+
+@test contains(repr(new_ci), "ABI Overridden")
+@test invoke(myplus, new_ci, 10) == 11
diff --git a/test/compiler/codegen.jl b/Compiler/test/codegen.jl
similarity index 67%
rename from test/compiler/codegen.jl
rename to Compiler/test/codegen.jl
index e93ecd232498f..9b92f560c64fc 100644
--- a/test/compiler/codegen.jl
+++ b/Compiler/test/codegen.jl
@@ -5,6 +5,9 @@
 using Random
 using InteractiveUtils
 using Libdl
+using Test
+
+include("setup_Compiler.jl")
 
 const opt_level = Base.JLOptions().opt_level
 const coverage = (Base.JLOptions().code_coverage > 0) || (Base.JLOptions().malloc_log > 0)
@@ -17,14 +20,19 @@ end
 
 # The tests below assume a certain format and safepoint_on_entry=true breaks that.
 function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
-    params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false)
+    params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false, debug_info_level=Cint(2))
     d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params)
     sprint(print, d)
 end
 
+# Some tests assume calls should be stripped out,
+# so strip out the calls to debug intrinsics that
+# are not actually materialized as call instructions.
+strip_debug_calls(ir) = replace(ir, r"call void @llvm\.dbg\.declare.*\n" => "", r"call void @llvm\.dbg\.value.*\n" => "")
+
 if !is_debug_build && opt_level > 0
     # Make sure getptls call is removed at IR level with optimization on
-    @test !occursin(" call ", get_llvm(identity, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(identity, Tuple{String})))
 end
 
 jl_string_ptr(s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
@@ -114,22 +122,27 @@ end
 
 if !is_debug_build && opt_level > 0
     # Make sure `jl_string_ptr` is inlined
-    @test !occursin(" call ", get_llvm(jl_string_ptr, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(jl_string_ptr, Tuple{String})))
     # Make sure `Core.sizeof` call is inlined
     s = "aaa"
     @test jl_string_ptr(s) == pointer_from_objref(s) + sizeof(Int)
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{String}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{String})), [Iptr])
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Core.SimpleVector}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Core.SimpleVector})), [Iptr])
     # Array
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector{Int}}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Vector{Int}})), [Iptr])
+    # As long as the eltype is known we don't need to load the elsize, but do need to check isvector
+    @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic $Iptr", "ptr", "ptr", Iptr, Iptr, "ptr",  Iptr])
+    # Memory
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Int}})), [Iptr])
     # As long as the eltype is known we don't need to load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Array{Any}}), [Iptr])
-    # Check that we load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector}), [Iptr, "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Any}})), [Iptr])
+    # Check that we load the elsize and isunion from the typeof layout
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "ptr", "i32", "i16"])
     # Primitive Type size should be folded to a constant
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Ptr}), String[])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Ptr})), String[])
 
     test_jl_dump_compiles()
     test_jl_dump_compiles_toplevel_thunks()
@@ -171,15 +184,15 @@ end
 breakpoint_mutable(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ref{MutableStruct},), a)
 
 # Allocation with uninitialized field as gcroot
-mutable struct BadRef
+mutable struct BadRefMutableStruct
     x::MutableStruct
     y::MutableStruct
-    BadRef(x) = new(x)
+    BadRefMutableStruct(x) = new(x)
 end
-Base.cconvert(::Type{Ptr{BadRef}}, a::MutableStruct) = BadRef(a)
-Base.unsafe_convert(::Type{Ptr{BadRef}}, ar::BadRef) = Ptr{BadRef}(pointer_from_objref(ar.x))
+Base.cconvert(::Type{Ptr{BadRefMutableStruct}}, a::MutableStruct) = BadRefMutableStruct(a)
+Base.unsafe_convert(::Type{Ptr{BadRefMutableStruct}}, ar::BadRefMutableStruct) = Ptr{BadRefMutableStruct}(pointer_from_objref(ar.x))
 
-breakpoint_badref(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ptr{BadRef},), a)
+breakpoint_badref(a::MutableStruct) = ccall(:jl_breakpoint, Cvoid, (Ptr{BadRefMutableStruct},), a)
 
 struct PtrStruct
     a::Ptr{Cvoid}
@@ -212,18 +225,18 @@ if opt_level > 0
     @test occursin("call i32 @memcmp(", compare_large_struct_ir) || occursin("call i32 @bcmp(", compare_large_struct_ir)
     @test !occursin("%gcframe", compare_large_struct_ir)
 
-    @test occursin("jl_gc_pool_alloc", get_llvm(MutableStruct, Tuple{}))
+    @test occursin("jl_gc_small_alloc", get_llvm(MutableStruct, Tuple{}))
     breakpoint_mutable_ir = get_llvm(breakpoint_mutable, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_mutable_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_mutable_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_mutable_ir)
 
     breakpoint_badref_ir = get_llvm(breakpoint_badref, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_badref_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_badref_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_badref_ir)
 
     breakpoint_ptrstruct_ir = get_llvm(breakpoint_ptrstruct, Tuple{RealStruct})
     @test !occursin("%gcframe", breakpoint_ptrstruct_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_ptrstruct_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_ptrstruct_ir)
 end
 
 function two_breakpoint(a::Float64)
@@ -241,22 +254,22 @@ end
 if opt_level > 0
     breakpoint_f64_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Float64},), a),
                                  Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", breakpoint_f64_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_f64_ir)
     breakpoint_any_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Any},), a),
                                  Tuple{Float64})
-    @test occursin("jl_gc_pool_alloc", breakpoint_any_ir)
+    @test occursin("jl_gc_small_alloc", breakpoint_any_ir)
     two_breakpoint_ir = get_llvm(two_breakpoint, Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", two_breakpoint_ir)
+    @test !occursin("jl_gc_small_alloc", two_breakpoint_ir)
     @test occursin("llvm.lifetime.end", two_breakpoint_ir)
 
     @test load_dummy_ref(1234) === 1234
     load_dummy_ref_ir = get_llvm(load_dummy_ref, Tuple{Int})
-    @test !occursin("jl_gc_pool_alloc", load_dummy_ref_ir)
+    @test !occursin("jl_gc_small_alloc", load_dummy_ref_ir)
     # Hopefully this is reliable enough. LLVM should be able to optimize this to a direct return.
-    @test occursin("ret $Iptr %0", load_dummy_ref_ir)
+    @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
 end
 
-# Issue 22770
+# Issue JuliaLang/julia#22770
 let was_gced = false
     @noinline make_tuple(x) = tuple(x)
     @noinline use(x) = ccall(:jl_breakpoint, Cvoid, ())
@@ -304,8 +317,8 @@ end
 
 # PR #23595
 @generated f23595(g, args...) = Expr(:call, :g, Expr(:(...), :args))
-x23595 = rand(1)
-@test f23595(Core.arrayref, true, x23595, 1) == x23595[]
+x23595 = rand(1).ref
+@test f23595(Core.memoryrefget, x23595, :not_atomic, true) == x23595[]
 
 # Issue #22421
 @noinline f22421_1(x) = x[] + 1
@@ -362,26 +375,9 @@ mktemp() do f_22330, _
 end
 
 # Alias scope
-macro aliasscope(body)
-    sym = gensym()
-    esc(quote
-        $(Expr(:aliasscope))
-        $sym = $body
-        $(Expr(:popaliasscope))
-        $sym
-    end)
-end
-
-struct ConstAliasScope{T<:Array}
-    a::T
-end
-
-@eval Base.getindex(A::ConstAliasScope, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::ConstAliasScope, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
-
 function foo31018!(a, b)
-    @aliasscope for i in eachindex(a, b)
-        a[i] = ConstAliasScope(b)[i]
+    @Base.Experimental.aliasscope for i in eachindex(a, b)
+        a[i] = Base.Experimental.Const(b)[i]
     end
 end
 io = IOBuffer()
@@ -413,7 +409,7 @@ function g_dict_hash_alloc()
 end
 # Warm up
 f_dict_hash_alloc(); g_dict_hash_alloc();
-@test (@allocated f_dict_hash_alloc()) == (@allocated g_dict_hash_alloc())
+@test abs((@allocated f_dict_hash_alloc()) / (@allocated g_dict_hash_alloc()) - 1) < 0.1 # less that 10% difference
 
 # returning an argument shouldn't alloc a new box
 @noinline f33829(x) = (global called33829 = true; x)
@@ -446,7 +442,7 @@ function f1_30093(r)
     end
 end
 
-@test f1_30093(Ref(0)) == nothing
+@test f1_30093(Ref(0)) === nothing
 
 # issue 33590
 function f33590(b, x)
@@ -493,20 +489,23 @@ function f37262(x)
     catch
         GC.safepoint()
     end
+    local a
     try
         GC.gc()
-        return g37262(x)
+        a = g37262(x)
+        Base.inferencebarrier(false) && error()
+        return a
     catch ex
         GC.gc()
     finally
+        @isdefined(a) && Base.donotdelete(a)
         GC.gc()
     end
 end
 @testset "#37262" begin
-    str = "store volatile { i8, {}*, {}*, {}*, {}* } zeroinitializer, { i8, {}*, {}*, {}*, {}* }* %phic"
-    str_opaque = "store volatile { i8, ptr, ptr, ptr, ptr } zeroinitializer, ptr %phic"
+    str_opaque = "getelementptr inbounds i8, ptr %.roots.phic, i32 8\n  store volatile ptr null"
     llvmstr = get_llvm(f37262, (Bool,), false, false, false)
-    @test (contains(llvmstr, str) || contains(llvmstr, str_opaque)) || llvmstr
+    @test contains(llvmstr, str_opaque)
     @test f37262(Base.inferencebarrier(true)) === nothing
 end
 
@@ -564,6 +563,7 @@ end
     function f1(cond)
         val = [1]
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f1, Tuple{Bool}, true, false, false))
 
@@ -571,19 +571,22 @@ end
     function f3(cond)
         val = ([1],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f3, Tuple{Bool}, true, false, false))
 
-    # unions of immutables (JuliaLang/julia#39501)
+    # PhiNode of unions of immutables (JuliaLang/julia#39501)
     function f2(cond)
-        val = cond ? 1 : 1f0
+        val = cond ? 1 : ""
         GC.@preserve val begin end
+        return cond
     end
-    @test !occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
     # make sure the fix for the above doesn't regress #34241
     function f4(cond)
         val = cond ? ([1],) : ([1f0],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f4, Tuple{Bool}, true, false, false))
 end
@@ -619,10 +622,10 @@ g40612(a, b) = a[]|a[] === b[]|b[]
 
 # issue #41438
 struct A41438{T}
-  x::Ptr{T}
+    x::Ptr{T}
 end
 struct B41438{T}
-  x::T
+    x::T
 end
 f41438(y) = y[].x
 @test A41438.body.layout != C_NULL
@@ -695,7 +698,7 @@ mktempdir() do pfx
         libs_deleted += 1
     end
     @test libs_deleted > 0
-    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) --startup-file=no -e 'print("no codegen!\n")'`) == "no codegen!"
 
     # PR #47343
     libs_emptied = 0
@@ -720,14 +723,14 @@ mutable struct A42645{T}
     end
 end
 mutable struct B42645{T}
-  y::A42645{T}
+    y::A42645{T}
 end
 x42645 = 1
 function f42645()
-  res = B42645(A42645([x42645]))
-  res.y = A42645([x42645])
-  res.y.x = true
-  res
+    res = B42645(A42645([x42645]))
+    res.y = A42645([x42645])
+    res.y.x = true
+    res
 end
 @test ((f42645()::B42645).y::A42645{Int}).x
 
@@ -787,11 +790,11 @@ f47247(a::Ref{Int}, b::Nothing) = setfield!(a, :x, b)
 @test_throws TypeError f47247(Ref(5), nothing)
 
 f48085(@nospecialize x...) = length(x)
-@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
-@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
+@test Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
+@test Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
 
 # Make sure that the bounds check is elided in tuple iteration
-@test !occursin("call void @", get_llvm(iterate, Tuple{NTuple{4, Float64}, Int}))
+@test !occursin("call void @", strip_debug_calls(get_llvm(iterate, Tuple{NTuple{4, Float64}, Int})))
 
 # issue #34459
 function f34459(args...)
@@ -820,3 +823,221 @@ end
 # issue 48917, hoisting load to above the parent
 f48917(x, w) = (y = (a=1, b=x); z = (; a=(a=(1, w), b=(3, y))))
 @test f48917(1,2) == (a = (a = (1, 2), b = (3, (a = 1, b = 1))),)
+
+# https://github.com/JuliaLang/julia/issues/50317 getproperty allocation on struct with 1 field
+struct Wrapper50317
+    lock::ReentrantLock
+end
+const MONITOR50317 = Wrapper50317(ReentrantLock())
+issue50317() = @noinline MONITOR50317.lock
+issue50317()
+let res = @timed issue50317()
+    @test res.bytes == 0
+    return res # must return otherwise the compiler may eliminate the result entirely
+end
+struct Wrapper50317_2
+    lock::ReentrantLock
+    fun::Vector{Int}
+end
+const MONITOR50317_2 = Wrapper50317_2(ReentrantLock(),[1])
+issue50317_2() = @noinline MONITOR50317.lock
+issue50317_2()
+let res = @timed issue50317_2()
+    @test res.bytes == 0
+    return res
+end
+const a50317 = (b=3,)
+let res = @timed a50317[:b]
+    @test res.bytes == 0
+    return res
+end
+
+# https://github.com/JuliaLang/julia/issues/50964
+@noinline bar50964(x::Core.Const) = Base.inferencebarrier(1)
+@noinline bar50964(x::DataType) = Base.inferencebarrier(2)
+foo50964(x) = bar50964(Base.inferencebarrier(Core.Const(x)))
+foo50964(1) # Shouldn't assert!
+
+# https://github.com/JuliaLang/julia/issues/51233
+obj51233 = (1,)
+@test_throws FieldError obj51233.x
+
+# Very specific test for multiversioning
+if Sys.ARCH === :x86_64
+    foo52079() = Core.Intrinsics.have_fma(Float64)
+    if foo52079() == true
+        let io = IOBuffer()
+            code_native(io,Base.Math.exp_impl,(Float64,Float64,Val{:ℯ}), dump_module=false)
+            str = String(take!(io))
+            @test !occursin("fma_emulated", str)
+            @test occursin("vfmadd", str)
+        end
+    end
+end
+
+#Check if we aren't emitting the store with the wrong TBAA metadata
+
+foo54166(x,i,y) = x[i] = y
+let io = IOBuffer()
+    code_llvm(io,foo54166, (Vector{Union{Missing,Int}}, Int, Int), dump_module=true, raw=true)
+    str = String(take!(io))
+    @test !occursin("jtbaa_unionselbyte", str)
+    @test occursin("jtbaa_arrayselbyte", str)
+end
+
+ex54166 = Union{Missing, Int64}[missing -2; missing -2];
+dims54166 = (1,2)
+@test (minimum(ex54166; dims=dims54166)[1] === missing)
+
+# #54109 - Excessive LLVM time for egal
+struct DefaultOr54109{T}
+    x::T
+    default::Bool
+end
+
+@eval struct Torture1_54109
+    $((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:897)...)
+end
+Torture1_54109() = Torture1_54109((DefaultOr54109(1.0, false) for i = 1:897)...)
+
+@eval struct Torture2_54109
+    $((Expr(:(::), Symbol("x$i"), DefaultOr54109{Float64}) for i = 1:400)...)
+    $((Expr(:(::), Symbol("x$(i+400)"), DefaultOr54109{Int16}) for i = 1:400)...)
+end
+Torture2_54109() = Torture2_54109((DefaultOr54109(1.0, false) for i = 1:400)..., (DefaultOr54109(Int16(1), false) for i = 1:400)...)
+
+@noinline egal_any54109(x, @nospecialize(y::Any)) = x === Base.compilerbarrier(:type, y)
+
+let ir1 = get_llvm(egal_any54109, Tuple{Torture1_54109, Any}),
+    ir2 = get_llvm(egal_any54109, Tuple{Torture2_54109, Any})
+
+    # We can't really do timing on CI, so instead, let's look at the length of
+    # the optimized IR. The original version had tens of thousands of lines and
+    # was slower, so just check here that we only have < 500 lines. If somebody,
+    # implements a better comparison that's larger than that, just re-benchmark
+    # this and adjust the threshold.
+
+    @test count(==('\n'), ir1) < 500
+    @test count(==('\n'), ir2) < 500
+end
+
+## Regression test for egal of a struct of this size without padding, but with
+## non-bitsegal, to make sure that it doesn't accidentally go down the accelerated
+## path.
+@eval struct BigStructAnyInt
+    $((Expr(:(::), Symbol("x$i"), Pair{Any, Int}) for i = 1:33)...)
+end
+BigStructAnyInt() = BigStructAnyInt((Union{Base.inferencebarrier(Float64), Int}=>i for i = 1:33)...)
+@test egal_any54109(BigStructAnyInt(), BigStructAnyInt())
+
+## For completeness, also test correctness, since we don't have a lot of
+## large-struct tests.
+
+# The two allocations of the same struct will likely have different padding,
+# we want to make sure we find them egal anyway - a naive memcmp would
+# accidentally look at it.
+@test egal_any54109(Torture1_54109(), Torture1_54109())
+@test egal_any54109(Torture2_54109(), Torture2_54109())
+@test !egal_any54109(Torture1_54109(), Torture1_54109((DefaultOr54109(2.0, false) for i = 1:897)...))
+
+bar54599() = Base.inferencebarrier(true) ? (Base.PkgId(Main),1) : nothing
+
+function foo54599()
+    pkginfo = @noinline bar54599()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    @noinline println(devnull, pkgid)
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+
+#this function used to crash allocopt due to a no predecessors bug
+barnopreds() = Base.inferencebarrier(true) ? (Base.PkgId(Test),1) : nothing
+function foonopreds()
+    pkginfo = @noinline barnopreds()
+    pkgid = pkginfo !== nothing ? pkginfo[1] : nothing
+    pkgid.uuid !== nothing ? pkgid.uuid : false
+end
+@test foonopreds() !== nothing
+
+# issue 55396
+struct Incomplete55396
+  x::Tuple{Int}
+  y::Int
+  @noinline Incomplete55396(x::Int) = new((x,))
+end
+let x = Incomplete55396(55396)
+    @test x.x === (55396,)
+end
+
+# Core.getptls() special handling
+@test !occursin("call ptr @jlplt", get_llvm(Core.getptls, Tuple{})) #It should lower to a direct load of the ptls and not a ccall
+
+# issue 55208
+@noinline function f55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    return z isa Core.TypeofBottom
+end
+@test f55208((Union{}, 5, 6, 7), 0)
+
+@noinline function g55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    typeof(z)
+end
+@test g55208((Union{}, true, true), 0) === typeof(Union{})
+
+@test string((Core.Union{}, true, true, true)) == "(Union{}, true, true, true)"
+
+# Issue #55558
+for (T, StructName) in ((Int128, :Issue55558), (UInt128, :UIssue55558))
+    @eval begin
+        struct $(StructName)
+            a::$(T)
+            b::Int64
+            c::$(T)
+        end
+        local broken_i128 = Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "powerpc64le"
+        @test fieldoffset($(StructName), 2) == 16
+        @test fieldoffset($(StructName), 3) == 32 broken=broken_i128
+        @test sizeof($(StructName)) == 48 broken=broken_i128
+    end
+end
+
+@noinline Base.@nospecializeinfer f55768(@nospecialize z::UnionAll) = z === Vector
+@test f55768(Vector)
+@test f55768(Vector{T} where T)
+@test !f55768(Vector{S} where S)
+
+# test that values get rooted correctly over throw
+for a in ((@noinline Ref{Int}(2)),
+          (@noinline Ref{Int}(3)),
+          5,
+          (@noinline Ref{Int}(4)),
+          6)
+    @test a[] != 0
+    try
+        b = (@noinline Ref{Int}(5),
+             @noinline Ref{Int}(6),
+             @noinline Ref{Int}(7),
+             @noinline Ref{Int}(8),
+             @noinline Ref{Int}(9),
+             @noinline Ref{Int}(10),
+             @noinline Ref{Int}(11))
+        GC.gc(true)
+        GC.@preserve b throw(a)
+    catch ex
+        @test ex === a
+    end
+end
+
+# Make sure that code that has unbound sparams works
+#https://github.com/JuliaLang/julia/issues/56739
+
+f56739(a) where {T} = a
+
+@test f56739(1) == 1
+g56739(x) = @noinline f56739(x)
+@test g56739(1) == 1
+
+struct Vec56937 x::NTuple{8, VecElement{Int}} end
+
+x56937 = Ref(Vec56937(ntuple(_->VecElement(1),8)))
+@test x56937[].x[1] == VecElement{Int}(1) # shouldn't crash
diff --git a/Compiler/test/compact.jl b/Compiler/test/compact.jl
new file mode 100644
index 0000000000000..b01e209d5ce9b
--- /dev/null
+++ b/Compiler/test/compact.jl
@@ -0,0 +1,57 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("irutils.jl")
+
+using .Compiler: IncrementalCompact, insert_node_here!, finish,
+    NewInstruction, verify_ir, ReturnNode, SSAValue
+
+foo_test_function(i) = i == 1 ? 1 : 2
+
+@testset "IncrementalCompact statefulness" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    # set up first iterator
+    x = Compiler.iterate(compact)
+    x = Compiler.iterate(compact, x[2])
+
+    # set up second iterator
+    x = Compiler.iterate(compact)
+
+    # consume remainder
+    while x !== nothing
+        x = Compiler.iterate(compact, x[2])
+    end
+
+    ir = finish(compact)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# Test early finish of IncrementalCompact
+@testset "IncrementalCompact early finish" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]))
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
+
+# Test reverse affinity insert at start of compact
+@testset "IncrementalCompact reverse affinity insert" begin
+    ir = only(Base.code_ircode(foo_test_function, (Int,)))[1]
+    compact = IncrementalCompact(ir)
+    @test !Compiler.did_just_finish_bb(compact)
+
+    insert_node_here!(compact, NewInstruction(ReturnNode(1), Union{}, ir[SSAValue(1)][:line]), true)
+    new_ir = finish(compact)
+    # TODO: Should IncrementalCompact be doing this internally?
+    empty!(new_ir.cfg.blocks[1].succs)
+    verify_ir(new_ir)
+    @test length(new_ir.cfg.blocks) == 1
+end
diff --git a/Compiler/test/contextual.jl b/Compiler/test/contextual.jl
new file mode 100644
index 0000000000000..a9c63ab34c0c0
--- /dev/null
+++ b/Compiler/test/contextual.jl
@@ -0,0 +1,126 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+include("setup_Compiler.jl")
+
+# Cassette
+# ========
+
+module MiniCassette
+    # A minimal demonstration of the cassette mechanism. Doesn't support all the
+    # fancy features, but sufficient to exercise this code path in the compiler.
+
+    using Core.IR
+    using ..Compiler
+    using ..Compiler: retrieve_code_info, quoted, anymap
+    using Base.Meta: isexpr
+
+    export Ctx, overdub
+
+    struct Ctx; end
+
+    # A no-op cassette-like transform
+    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::Core.SimpleVector)
+        @nospecialize expr
+        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
+        if isexpr(expr, :call)
+            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
+        elseif isa(expr, GotoIfNot)
+            return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
+        elseif isexpr(expr, :static_parameter)
+            return quoted(sparams[expr.args[1]])
+        elseif isa(expr, ReturnNode)
+            return ReturnNode(transform(expr.val))
+        elseif isa(expr, Expr)
+            return Expr(expr.head, anymap(transform, expr.args)...)
+        elseif isa(expr, GotoNode)
+            return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
+        elseif isa(expr, SlotNumber)
+            return map_slot_number(expr.id)
+        elseif isa(expr, SSAValue)
+            return map_ssa_value(expr)
+        else
+            return expr
+        end
+    end
+
+    function transform!(mi::MethodInstance, ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
+        code = ci.code
+        di = Compiler.DebugInfoStream(mi, ci.debuginfo, length(code))
+        ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
+        ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
+        # Insert one SSAValue for every argument statement
+        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
+        prepend!(di.codelocs, fill(Int32(0), 3nargs))
+        prepend!(ci.ssaflags, fill(0x00, nargs))
+        ci.debuginfo = Core.DebugInfo(di, length(code))
+        ci.ssavaluetypes += nargs
+        function map_slot_number(slot::Int)
+            if slot == 1
+                # self in the original function is now `f`
+                return SlotNumber(3)
+            elseif 2 <= slot <= nargs + 1
+                # Arguments get inserted as ssa values at the top of the function
+                return SSAValue(slot - 1)
+            else
+                # The first non-argument slot will be 5
+                return SlotNumber(slot - (nargs + 1) + 4)
+            end
+        end
+        map_ssa_value(ssa::SSAValue) = SSAValue(ssa.id + nargs)
+        for i = (nargs+1:length(code))
+            code[i] = transform_expr(code[i], map_slot_number, map_ssa_value, sparams)
+        end
+    end
+
+    function overdub_generator(world::UInt, source, self, ctx, f, args)
+        @nospecialize
+        if !Base.issingletontype(f)
+            # (c, f, args..) -> f(args...)
+            ex = :(return f(args...))
+            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :ctx, :f, :args), Core.svec())(world, source, ex)
+        end
+
+        tt = Tuple{f, args...}
+        match = Base._which(tt; world)
+        mi = Base.specialize_method(match)
+        # Unsupported in this mini-cassette
+        @assert !mi.def.isva
+        src = retrieve_code_info(mi, world)
+        @assert isa(src, CodeInfo)
+        src = copy(src)
+        @assert src.edges === Core.svec()
+        src.edges = Any[mi]
+        transform!(mi, src, length(args), match.sparams)
+        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
+        # Match the generator, since that's what our transform! does
+        src.nargs = 4
+        src.isva = true
+        return src
+    end
+
+    @inline overdub(::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...) = f(args...)
+
+    @eval function overdub(ctx::Ctx, f, args...)
+        $(Expr(:meta, :generated_only))
+        $(Expr(:meta, :generated, overdub_generator))
+    end
+end
+
+using .MiniCassette
+
+# Test #265 for Cassette
+f() = 1
+@test overdub(Ctx(), f) === 1
+f() = 2
+@test overdub(Ctx(), f) === 2
+
+# Test that MiniCassette is at least somewhat capable by overdubbing gcd
+@test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
+
+@generated bar(::Val{align}) where {align} = :(42)
+foo(i) = i+bar(Val(1))
+
+@test @inferred(overdub(Ctx(), foo, 1)) == 43
diff --git a/Compiler/test/datastructures.jl b/Compiler/test/datastructures.jl
new file mode 100644
index 0000000000000..608e4e770998a
--- /dev/null
+++ b/Compiler/test/datastructures.jl
@@ -0,0 +1,115 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+
+@testset "CachedMethodTable" begin
+    # cache result should be separated per `limit` and `sig`
+    # https://github.com/JuliaLang/julia/pull/46799
+    interp = Compiler.NativeInterpreter()
+    table = Compiler.method_table(interp)
+    sig = Tuple{typeof(*), Any, Any}
+    result1 = Compiler.findall(sig, table; limit=-1)
+    result2 = Compiler.findall(sig, table; limit=Compiler.InferenceParams().max_methods)
+    @test result1 !== nothing && !Compiler.isempty(result1)
+    @test result2 === nothing
+end
+
+@testset "BitSetBoundedMinPrioritySet" begin
+    bsbmp = Compiler.BitSetBoundedMinPrioritySet(5)
+    Compiler.push!(bsbmp, 2)
+    Compiler.push!(bsbmp, 2)
+    iterateok = true
+    cnt = 0
+    @eval Compiler for v in $bsbmp
+        if cnt == 0
+            iterateok &= v == 2
+        elseif cnt == 1
+            iterateok &= v == 5
+        else
+            iterateok = false
+        end
+        cnt += 1
+    end
+    @test iterateok
+    @test Compiler.popfirst!(bsbmp) == 2
+    Compiler.push!(bsbmp, 1)
+    @test Compiler.popfirst!(bsbmp) == 1
+    @test Compiler.isempty(bsbmp)
+end
+
+@testset "basic heap functionality" begin
+    v = [2,3,1]
+    @test Compiler.heapify!(v, Compiler.Forward) === v
+    @test Compiler.heappop!(v, Compiler.Forward) === 1
+    @test Compiler.heappush!(v, 4, Compiler.Forward) === v
+    @test Compiler.heappop!(v, Compiler.Forward) === 2
+    @test Compiler.heappop!(v, Compiler.Forward) === 3
+    @test Compiler.heappop!(v, Compiler.Forward) === 4
+end
+
+@testset "randomized heap correctness tests" begin
+    order = Compiler.By(x -> -x[2])
+    for i in 1:6
+        heap = Tuple{Int, Int}[(rand(1:i), rand(1:i)) for _ in 1:2i]
+        mock = copy(heap)
+        @test Compiler.heapify!(heap, order) === heap
+        sort!(mock, by=last)
+
+        for _ in 1:6i
+            if rand() < .5 && !isempty(heap)
+                # The first entries may differ because heaps are not stable
+                @test last(Compiler.heappop!(heap, order)) === last(pop!(mock))
+            else
+                new = (rand(1:i), rand(1:i))
+                Compiler.heappush!(heap, new, order)
+                push!(mock, new)
+                sort!(mock, by=last)
+            end
+        end
+    end
+end
+
+@testset "searchsorted" begin
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Compiler.UnitRange(1, 0)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Compiler.UnitRange(1, 2)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Compiler.UnitRange(3, 4)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Compiler.UnitRange(7, 6)
+    @test Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Compiler.UnitRange(5, 4)
+
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 0) === Compiler.UnitRange(1, 0)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 1) === Compiler.UnitRange(1, 1)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 2) === Compiler.UnitRange(2, 2)
+    @test Compiler.searchsorted(Compiler.UnitRange(1, 3), 4) === Compiler.UnitRange(4, 3)
+
+    @test Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Compiler.UnitRange(1, 4)
+    @test Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Compiler.UnitRange(5, 10)
+    @test Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Compiler.Forward) === Compiler.UnitRange(6, 6)
+    @test Compiler.searchsorted(fill(1, 15), 1, 6, 10, Compiler.Forward) === Compiler.UnitRange(6, 10)
+
+    for (rg,I) in Any[(Compiler.UnitRange(49, 57),   47:59),
+                      (Compiler.StepRange(1, 2, 17), -1:19)]
+        rg_r = Compiler.reverse(rg)
+        rgv, rgv_r = Compiler.collect(rg), Compiler.collect(rg_r)
+        for i = I
+            @test Compiler.searchsorted(rg,i) === Compiler.searchsorted(rgv,i)
+            @test Compiler.searchsorted(rg_r,i,rev=true) === Compiler.searchsorted(rgv_r,i,rev=true)
+        end
+    end
+end
+
+@testset "basic sort" begin
+    v = [3,1,2]
+    @test v == [3,1,2]
+    @test Compiler.sort!(v) === v == [1,2,3]
+    @test Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
+    @test Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
+end
+
+@testset "randomized sorting tests" begin
+    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
+        v = rand(-1:k, n)
+        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
+            @test sort(v; by, lt) == Compiler.sort!(copy(v); by, < = lt)
+        end
+    end
+end
diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl
new file mode 100644
index 0000000000000..a7a1d18159137
--- /dev/null
+++ b/Compiler/test/effects.jl
@@ -0,0 +1,1386 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+include("irutils.jl")
+
+# Test that the Core._apply_iterate bail path taints effects
+function f_apply_bail(f)
+    f(()...)
+    return nothing
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(f_apply_bail))
+@test !fully_eliminated((Function,)) do f
+    f_apply_bail(f)
+    nothing
+end
+
+# Test that effect modeling for return_type doesn't incorrectly pick
+# up the effects of the function being analyzed
+f_throws() = error()
+@noinline function return_type_unused(x)
+    Compiler.return_type(f_throws, Tuple{})
+    return x+1
+end
+@test Compiler.is_removable_if_unused(Base.infer_effects(return_type_unused, (Int,)))
+@test fully_eliminated((Int,)) do x
+    return_type_unused(x)
+    return nothing
+end
+
+# Test that ambiguous calls don't accidentally get nothrow effect
+ambig_effects_test(a::Int, b) = 1
+ambig_effects_test(a, b::Int) = 1
+ambig_effects_test(a, b) = 1
+@test !Compiler.is_nothrow(Base.infer_effects(ambig_effects_test, (Int, Any)))
+global ambig_unknown_type_global::Any = 1
+@noinline function conditionally_call_ambig(b::Bool, a)
+    if b
+        ambig_effects_test(a, ambig_unknown_type_global)
+    end
+    return 0
+end
+@test !fully_eliminated((Bool,)) do b
+    conditionally_call_ambig(b, 1)
+    return nothing
+end
+
+# Test that a missing methtable identification gets tainted
+# appropriately
+struct FCallback; f::Union{Nothing, Function}; end
+f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(f_invoke_callback, (FCallback,)))
+@test !fully_eliminated((FCallback,)) do fc
+    f_invoke_callback(fc)
+    return nothing
+end
+
+# @assume_effects override
+const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
+Base.@assume_effects :foldable concrete_eval(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    concrete_eval(getindex, ___CONST_DICT___, :a)
+end
+
+# :removable override
+Base.@assume_effects :removable removable_call(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    @noinline removable_call(getindex, ___CONST_DICT___, :a)
+    nothing
+end
+
+# terminates_globally override
+# https://github.com/JuliaLang/julia/issues/41694
+Base.@assume_effects :terminates_globally function issue41694(x)
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Compiler.is_foldable(Base.infer_effects(issue41694, (Int,)))
+@test fully_eliminated() do
+    issue41694(2)
+end
+
+Base.@assume_effects :terminates_globally function recur_termination1(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return x * recur_termination1(x-1)
+end
+@test Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination1, (Int,)))
+function recur_termination2()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination1(12)
+end
+@test fully_eliminated(recur_termination2)
+@test fully_eliminated() do; recur_termination2(); end
+
+Base.@assume_effects :terminates_globally function recur_termination21(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return recur_termination22(x)
+end
+recur_termination22(x) = x * recur_termination21(x-1)
+@test Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
+@test Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination21, (Int,)))
+@test Compiler.is_terminates(Base.infer_effects(recur_termination22, (Int,)))
+function recur_termination2x()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination21(12) + recur_termination22(12)
+end
+@test fully_eliminated(recur_termination2x)
+@test fully_eliminated() do; recur_termination2x(); end
+
+# anonymous function support for `@assume_effects`
+@test fully_eliminated() do
+    map((2,3,4)) do x
+        # this :terminates_locally allows this anonymous function to be constant-folded
+        Base.@assume_effects :terminates_locally
+        res = 1
+        0 ≤ x < 20 || error("bad fact")
+        while x > 1
+            res *= x
+            x -= 1
+        end
+        return res
+    end
+end
+
+# control flow backedge should taint `terminates`
+@test Base.infer_effects((Int,)) do n
+    for i = 1:n; end
+end |> !Compiler.is_terminates
+
+# interprocedural-recursion should taint `terminates` **appropriately**
+function sumrecur(a, x)
+    isempty(a) && return x
+    return sumrecur(Base.tail(a), x + first(a))
+end
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int},Int)) |> Compiler.is_terminates
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int,Vararg{Int}},Int)) |> !Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/45781
+@test Base.infer_effects((Float32,)) do a
+    out1 = promote_type(Irrational{:π}, Bool)
+    out2 = sin(a)
+    out1, out2
+end |> Compiler.is_terminates
+
+# refine :consistent-cy effect inference using the return type information
+@test Base.infer_effects((Any,)) do x
+    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
+    throw(taint)
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return nothing
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
+end |> Compiler.is_consistent
+@test Base.infer_effects((Symbol,Any)) do s, x
+    if s === :throw
+        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
+        throw(taint)
+    end
+    return s # should handle `Symbol` nicely
+end |> Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return Ref(x)
+end |> !Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return x < 0 ? Ref(x) : nothing
+end |> !Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        throw(DomainError(x, lazy"$x is negative"))
+    end
+    return nothing
+end |> Compiler.is_foldable
+
+# :the_exception expression should taint :consistent-cy
+global inconsistent_var::Int = 42
+function throw_inconsistent() # this is still :consistent
+    throw(inconsistent_var)
+end
+function catch_inconsistent()
+    try
+        throw_inconsistent()
+    catch err
+        err
+    end
+end
+@test !Compiler.is_consistent(Base.infer_effects(catch_inconsistent))
+cache_inconsistent() = catch_inconsistent()
+function compare_inconsistent()
+    a = cache_inconsistent()
+    global inconsistent_var = 0
+    b = cache_inconsistent()
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent()
+# return type information shouldn't be able to refine it also
+function catch_inconsistent(x::T) where T
+    v = x
+    try
+        throw_inconsistent()
+    catch err
+        v = err::T
+    end
+    return v
+end
+@test !Compiler.is_consistent(Base.infer_effects(catch_inconsistent, (Int,)))
+cache_inconsistent(x) = catch_inconsistent(x)
+function compare_inconsistent(x::T) where T
+    x = one(T)
+    a = cache_inconsistent(x)
+    global inconsistent_var = 0
+    b = cache_inconsistent(x)
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent(3)
+
+# Effect modeling for Core.compilerbarrier
+@test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Compiler.is_removable_if_unused
+
+# effects modeling for allocation/access of uninitialized fields
+struct Maybe{T}
+    x::T
+    Maybe{T}() where T = new{T}()
+    Maybe{T}(x) where T = new{T}(x)
+    Maybe(x::T) where T = new{T}(x)
+end
+Base.getindex(x::Maybe) = x.x
+struct SyntacticallyDefined{T}
+    x::T
+end
+@test Base.infer_effects() do
+    Maybe{Int}()
+end |> !Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{Int}()[]
+end |> !Compiler.is_consistent
+@test !fully_eliminated() do
+    Maybe{Int}()[]
+end
+@test Base.infer_effects() do
+    Maybe{String}()
+end |> Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{String}()[]
+end |> Compiler.is_consistent
+let f() = Maybe{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects() do
+    Ref{Int}()
+end |> !Compiler.is_consistent
+@test Base.infer_effects() do
+    Ref{Int}()[]
+end |> !Compiler.is_consistent
+@test !fully_eliminated() do
+    Ref{Int}()[]
+end
+@test Base.infer_effects() do
+    Ref{String}()[]
+end |> Compiler.is_consistent
+let f() = Ref{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects((SyntacticallyDefined{Float64}, Symbol)) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+
+# effects propagation for `Core.invoke` calls
+# https://github.com/JuliaLang/julia/issues/44763
+global x44763::Int = 0
+increase_x44763!(n) = (global x44763; x44763 += n)
+invoke44763(x) = @invoke increase_x44763!(x)
+@test Base.return_types() do
+    invoke44763(42)
+end |> only === Int
+@test x44763 == 0
+
+# `@inbounds`/`@boundscheck` expression should taint :consistent correctly
+# https://github.com/JuliaLang/julia/issues/48099
+function A1_inbounds()
+    r = 0
+    @inbounds begin
+        @boundscheck r += 1
+    end
+    return r
+end
+@test !Compiler.is_consistent(Base.infer_effects(A1_inbounds))
+
+# Test that purity doesn't try to accidentally run unreachable code due to
+# boundscheck elimination
+function f_boundscheck_elim(n)
+    # Inbounds here assumes that this is only ever called with `n==0`, but of
+    # course the compiler has no way of knowing that, so it must not attempt
+    # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
+    ntuple(x->(@inbounds ()[x]), n)
+end
+@test !Compiler.is_noub(Base.infer_effects(f_boundscheck_elim, (Int,)))
+@test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
+
+# Test that purity modeling doesn't accidentally introduce new world age issues
+f_redefine_me(x) = x+1
+f_call_redefine() = f_redefine_me(0)
+f_mk_opaque() = Base.Experimental.@opaque ()->Base.inferencebarrier(f_call_redefine)()
+const op_capture_world = f_mk_opaque()
+f_redefine_me(x) = x+2
+@test op_capture_world() == 1
+@test f_mk_opaque()() == 2
+
+# backedge insertion for Any-typed, effect-free frame
+const CONST_DICT = let d = Dict()
+    for c in 'A':'z'
+        push!(d, c => Int(c))
+    end
+    d
+end
+Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
+@noinline callf(f, args...) = f(args...)
+function entry_to_be_invalidated(c)
+    return callf(getcharid, c)
+end
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> Compiler.is_foldable
+@test fully_eliminated(; retval=97) do
+    entry_to_be_invalidated('a')
+end
+getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> !Compiler.is_foldable
+@test !fully_eliminated() do
+    entry_to_be_invalidated('a')
+end
+
+@test !Compiler.builtin_nothrow(Compiler.fallback_lattice, Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
+
+# effects modeling for assignment to globals
+global glob_assign_int::Int = 0
+f_glob_assign_int() = global glob_assign_int = 1
+let effects = Base.infer_effects(f_glob_assign_int, (); optimize=false)
+    @test Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+end
+# effects modeling for for setglobal!
+global SETGLOBAL!_NOTHROW::Int = 0
+let effects = Base.infer_effects(; optimize=false) do
+        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
+    end
+    @test Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+end
+
+# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
+# as the cached effects can be easily wrong otherwise
+# since the inference currently doesn't track "world-age" of global variables
+@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
+setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
+let effects = Base.infer_effects() do
+        global_assignment_undefinedyet()
+    end
+    @test !Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        setglobal!_nothrow_undefinedyet()
+    end
+    @test !Compiler.is_nothrow(effects)
+end
+global UNDEFINEDYET::String = "0"
+let effects = Base.infer_effects() do
+        global_assignment_undefinedyet()
+    end
+    @test !Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        setglobal!_nothrow_undefinedyet()
+    end
+    @test !Compiler.is_nothrow(effects)
+end
+@test_throws Union{ErrorException,TypeError} setglobal!_nothrow_undefinedyet() # TODO: what kind of error should this be?
+
+# Nothrow for setfield!
+mutable struct SetfieldNothrow
+    x::Int
+end
+f_setfield_nothrow() = SetfieldNothrow(0).x = 1
+let effects = Base.infer_effects(f_setfield_nothrow, ())
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_effect_free(effects) # see EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+end
+
+# even if 2-arg `getfield` may throw, it should be still `:consistent`
+@test Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
+
+# SimpleVector allocation is consistent
+@test Compiler.is_consistent(Base.infer_effects(Core.svec))
+@test Base.infer_effects() do
+    Core.svec(nothing, 1, "foo")
+end |> Compiler.is_consistent
+
+# fastmath operations are in-`:consistent`
+@test !Compiler.is_consistent(Base.infer_effects((a,b)->@fastmath(a+b), (Float64,Float64)))
+
+# issue 46122: @assume_effects for @ccall
+@test Base.infer_effects((Vector{Int},)) do a
+    Base.@assume_effects :effect_free @ccall this_call_does_not_really_exist(a::Any)::Ptr{Int}
+end |> Compiler.is_effect_free
+
+# `getfield_effects` handles access to union object nicely
+let 𝕃 = Compiler.fallback_lattice
+    getfield_effects = Compiler.getfield_effects
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Some{String}, Core.Const(:value)], String))
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Some{Symbol}, Core.Const(:value)], Symbol))
+    @test Compiler.is_consistent(getfield_effects(𝕃, Any[Union{Some{Symbol},Some{String}}, Core.Const(:value)], Union{Symbol,String}))
+end
+@test Base.infer_effects((Bool,)) do c
+    obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
+    return getfield(obj, :value)
+end |> Compiler.is_consistent
+
+# getfield is nothrow when bounds checking is turned off
+@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
+    getfield(t, i, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
+    getfield(t, i, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
+    getfield(t, i, false) # invalid name type
+end |> !Compiler.is_nothrow
+
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :not_atomic)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :invalid_atomic_spec)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, boundscheck)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :invalid_atomic_spec, boundscheck)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Any)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> !Compiler.is_nothrow
+
+@test Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
+
+# :inaccessiblememonly effect
+const global constant_global::Int = 42
+const global ConstantType = Ref
+global nonconstant_global::Int = 42
+const global constant_mutable_global = Ref(0)
+const global constant_global_nonisbits = Some(:foo)
+@test Base.infer_effects() do
+    constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    constant_global_nonisbits
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :constant_global)
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    nonconstant_global
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :nonconstant_global)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Symbol,)) do name
+    getglobal(@__MODULE__, name)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    global nonconstant_global = v
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    setglobal!(@__MODULE__, :nonconstant_global, v)
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    constant_mutable_global[] = v
+end |> !Compiler.is_inaccessiblememonly
+module ConsistentModule
+const global constant_global::Int = 42
+const global ConstantType = Ref
+end # module
+@test Base.infer_effects() do
+    ConsistentModule.constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).constant_global
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType{Any}()
+end |> Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.constant_global
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.ConstantType
+end |> !Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do M
+    M.ConstantType{Any}()
+end |> !Compiler.is_inaccessiblememonly
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
+# including `getfield` / `isdefined` accessing to local mutable object
+
+mutable struct SafeRef{T}
+    x::T
+end
+Base.getindex(x::SafeRef) = x.x;
+Base.setindex!(x::SafeRef, v) = x.x = v;
+Base.isassigned(x::SafeRef) = true;
+
+function mutable_consistent(s)
+    SafeRef(s)[]
+end
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    mutable_consistent(:foo)
+end
+
+function nested_mutable_consistent(s)
+    SafeRef(SafeRef(SafeRef(SafeRef(SafeRef(s)))))[][][][][]
+end
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(nested_mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    nested_mutable_consistent(:foo)
+end
+
+const consistent_global = Some(:foo)
+@test Base.infer_effects() do
+    consistent_global.value
+end |> Compiler.is_consistent
+const inconsistent_global = SafeRef(:foo)
+@test Base.infer_effects() do
+    inconsistent_global[]
+end |> !Compiler.is_consistent
+const inconsistent_condition_ref = Ref{Bool}(false)
+@test Base.infer_effects() do
+    if inconsistent_condition_ref[]
+        return 0
+    else
+        return 1
+    end
+end |> !Compiler.is_consistent
+
+# should handle va-method properly
+callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
+@test !Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
+const GLOBAL_XS = Ref(:julia)
+global_getfield() = callgetfield1(GLOBAL_XS)
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia
+GLOBAL_XS[] = :julia2
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia2
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
+# including `setfield!` modifying local mutable object
+
+const global_ref = Ref{Any}()
+global const global_bit::Int = 42
+makeref() = Ref{Any}()
+setref!(ref, @nospecialize v) = ref[] = v
+
+@noinline function removable_if_unused1()
+    x = makeref()
+    setref!(x, 42)
+    x
+end
+@noinline function removable_if_unused2()
+    x = makeref()
+    setref!(x, global_bit)
+    x
+end
+for f = Any[removable_if_unused1, removable_if_unused2]
+    effects = Base.infer_effects(f)
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_removable_if_unused(effects)
+    @test @eval fully_eliminated() do
+        $f()
+        nothing
+    end
+end
+@noinline function removable_if_unused3(v)
+    x = makeref()
+    setref!(x, v)
+    x
+end
+let effects = Base.infer_effects(removable_if_unused3, (Int,))
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_removable_if_unused(effects)
+end
+@test fully_eliminated((Int,)) do v
+    removable_if_unused3(v)
+    nothing
+end
+
+@noinline function unremovable_if_unused1!(x)
+    setref!(x, 42)
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (typeof(global_ref),)))
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (Any,)))
+
+@noinline function unremovable_if_unused2!()
+    setref!(global_ref, 42)
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused2!))
+
+@noinline function unremovable_if_unused3!()
+    getfield(@__MODULE__, :global_ref)[] = nothing
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
+
+# array ops
+# =========
+
+# allocation
+# ----------
+
+# low-level constructor
+@noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
+# should eliminate safe but dead allocations
+let good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    for dim = good_dims, N = Ns
+        Int64(dim)^N > typemax(Int) && continue
+        dims = ntuple(i->dim, N)
+        @test @eval Base.infer_effects() do
+            construct_array(Int, $(dims...))
+        end |> Compiler.is_removable_if_unused
+        @test @eval fully_eliminated() do
+            construct_array(Int, $(dims...))
+            nothing
+        end
+    end
+end
+# should analyze throwness correctly
+let bad_dims = [-1, typemax(Int)]
+    for dim in bad_dims, N in [1, 2, 3, 4, 10]
+        for T in Any[Int, Union{Missing,Nothing}, Missing, Any]
+            dims = ntuple(i->dim, N)
+            @test @eval Base.infer_effects() do
+                construct_array($T, $(dims...))
+            end |> !Compiler.is_removable_if_unused
+            @test @eval !fully_eliminated() do
+                construct_array($T, $(dims...))
+                nothing
+            end
+            @test_throws "invalid " @eval construct_array($T, $(dims...))
+        end
+    end
+end
+
+# high-level interfaces
+# getindex
+for safesig = Any[
+        (Type{Int},)
+        (Type{Int}, Int)
+        (Type{Int}, Int, Int)
+        (Type{Number},)
+        (Type{Number}, Number)
+        (Type{Number}, Int)
+        (Type{Any},)
+        (Type{Any}, Any,)
+        (Type{Any}, Any, Any)
+    ]
+    let effects = Base.infer_effects(getindex, safesig)
+        @test Compiler.is_consistent_if_notreturned(effects)
+        @test Compiler.is_removable_if_unused(effects)
+    end
+end
+for unsafesig = Any[
+        (Type{Int}, String)
+        (Type{Int}, Any)
+        (Type{Number}, AbstractString)
+        (Type{Number}, Any)
+    ]
+    let effects = Base.infer_effects(getindex, unsafesig)
+        @test !Compiler.is_nothrow(effects)
+    end
+end
+# vect
+for safesig = Any[
+        ()
+        (Int,)
+        (Int, Int)
+    ]
+    let effects = Base.infer_effects(Base.vect, safesig)
+        @test Compiler.is_consistent_if_notreturned(effects)
+        @test Compiler.is_removable_if_unused(effects)
+    end
+end
+
+# array getindex
+let tt = (MemoryRef{Any},Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefget, tt)
+        @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Compiler.is_effect_free(effects)
+        @test !Compiler.is_nothrow(effects)
+        @test Compiler.is_terminates(effects)
+    end
+end
+
+# array setindex!
+let tt = (MemoryRef{Any},Any,Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefset!, tt)
+        @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Compiler.is_effect_free_if_inaccessiblememonly(effects)
+        @test !Compiler.is_nothrow(effects)
+        @test Compiler.is_terminates(effects)
+    end
+end
+# nothrow for arrayset
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, true)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    a[] = v # may throw
+end |> !Compiler.is_nothrow
+# when bounds checking is turned off, it should be safe
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Number},Number)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Compiler.is_nothrow
+
+# arraysize
+# ---------
+
+let effects = Base.infer_effects(size, (Array,Int))
+    @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+# Test that arraysize has proper effect modeling
+@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
+
+# arraylen
+# --------
+
+let effects = Base.infer_effects(length, (Vector{Any},))
+    @test Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+
+# resize
+# ------
+
+#for op = Any[
+#        Base._growbeg!,
+#        Base._growend!,
+#        Base._deletebeg!,
+#        Base._deleteend!,
+#    ]
+#    let effects = Base.infer_effects(op, (Vector, Int))
+#        @test Compiler.is_effect_free_if_inaccessiblememonly(effects)
+#        @test Compiler.is_terminates(effects)
+#        @test !Compiler.is_nothrow(effects)
+#    end
+#end
+
+@test Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Int}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Any}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Int}, Int)))
+@test Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Any}, Int)))
+
+# tuple indexing
+# --------------
+
+@test Compiler.is_foldable(Base.infer_effects(iterate, Tuple{Tuple{Int, Int}, Int}))
+
+# end to end
+# ----------
+
+#function simple_vec_ops(T, op!, op, xs...)
+#    a = T[]
+#    op!(a, xs...)
+#    return op(a)
+#end
+#for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
+#    xs = Any[(Int,), (Int,Int,)]
+#    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
+#        @test Compiler.is_foldable(effects)
+#    end
+#end
+
+# Test that builtin_effects handles vararg correctly
+@test !Compiler.is_nothrow(Compiler.builtin_effects(Compiler.fallback_lattice, Core.isdefined,
+    Any[String, Vararg{Any}], Bool))
+
+# Test that :new can be eliminated even if an sparam is unknown
+struct SparamUnused{T}
+    x
+    SparamUnused(x::T) where {T} = new{T}(x)
+end
+mksparamunused(x) = (SparamUnused(x); nothing)
+let src = code_typed1(mksparamunused, (Any,))
+    @test count(isnew, src.code) == 0
+end
+
+struct WrapperOneField{T}
+    x::T
+end
+
+# Effects for getfield of type instance
+@test Base.infer_effects(Tuple{Nothing}) do x
+    WrapperOneField{typeof(x)}.instance
+end |> Compiler.is_foldable_nothrow
+@test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+@test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
+    getfield(w, s)
+end |> Compiler.is_foldable
+
+# Flow-sensitive consistent for _typevar
+@test Base.infer_effects() do
+    return WrapperOneField == (WrapperOneField{T} where T)
+end |> Compiler.is_foldable_nothrow
+
+# Test that dead `@inbounds` does not taint consistency
+# https://github.com/JuliaLang/julia/issues/48243
+@test Base.infer_effects(Tuple{Int64}) do i
+    false && @inbounds (1,2,3)[i]
+    return 1
+end |> Compiler.is_foldable_nothrow
+
+@test Base.infer_effects(Tuple{Int64}) do i
+    @inbounds (1,2,3)[i]
+end |> !Compiler.is_noub
+
+@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
+    @inbounds x[1]
+end |> Compiler.is_foldable_nothrow
+
+# Test that :new of non-concrete, but otherwise known type
+# does not taint consistency.
+@eval struct ImmutRef{T}
+    x::T
+    ImmutRef(x) = $(Expr(:new, :(ImmutRef{typeof(x)}), :x))
+end
+@test Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
+
+@test Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
+
+# nothrow-ness of subtyping operations
+# https://github.com/JuliaLang/julia/pull/48566
+@test !Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
+@test !Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
+
+# GotoIfNot should properly mark itself as throwing when given a non-Bool
+# https://github.com/JuliaLang/julia/pull/48583
+gotoifnot_throw_check_48583(x) = x ? x : 0
+@test !Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
+@test !Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
+@test Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
+
+# unknown :static_parameter should taint :nothrow
+# https://github.com/JuliaLang/julia/issues/46771
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
+unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
+@test !Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
+@test Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
+
+# purely abstract recursion should not taint :terminates
+# https://github.com/JuliaLang/julia/issues/48983
+abstractly_recursive1() = abstractly_recursive2()
+abstractly_recursive2() = (Base._return_type(abstractly_recursive1, Tuple{}); 1)
+abstractly_recursive3() = abstractly_recursive2()
+@test_broken Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
+actually_recursive1(x) = actually_recursive2(x)
+actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
+actually_recursive3(x) = actually_recursive2(x)
+@test !Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
+
+# `isdefined` effects
+struct MaybeSome{T}
+    value::T
+    MaybeSome(x::T) where T = new{T}(x)
+    MaybeSome{T}(x::T) where T = new{T}(x)
+    MaybeSome{T}() where T = new{T}()
+end
+const undefined_ref = Ref{String}()
+const defined_ref = Ref{String}("julia")
+const undefined_some = MaybeSome{String}()
+const defined_some = MaybeSome{String}("julia")
+let effects = Base.infer_effects() do
+        isdefined(undefined_ref, :x)
+    end
+    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_ref, :x)
+    end
+    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(undefined_some, :value)
+    end
+    @test Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_some, :value)
+    end
+    @test Compiler.is_consistent(effects)
+    @test Compiler.is_nothrow(effects)
+end
+# high-level interface test
+isassigned_effects(s) = isassigned(Ref(s))
+@test Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
+@test fully_eliminated(; retval=true) do
+    isassigned_effects(:foo)
+end
+
+# inference on throw block should be disabled only when the effects are already known to be
+# concrete-eval ineligible:
+function optimize_throw_block_for_effects(x)
+    a = [x]
+    if x < 0
+        throw(ArgumentError(lazy"negative number given: $x"))
+    end
+    return a
+end
+let effects = Base.infer_effects(optimize_throw_block_for_effects, (Int,))
+    @test Compiler.is_consistent_if_notreturned(effects)
+    @test Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test Compiler.is_terminates(effects)
+end
+
+# :isdefined effects
+@test @eval Base.infer_effects() do
+    @isdefined($(gensym("some_undef_symbol")))
+end |> !Compiler.is_consistent
+
+# Effects of Base.hasfield (#50198)
+hf50198(s) = hasfield(typeof((;x=1, y=2)), s)
+f50198() = (hf50198(Ref(:x)[]); nothing)
+@test fully_eliminated(f50198)
+
+# Effects properly applied to flags by irinterp (#50311)
+f50311(x, s) = Symbol(s)
+g50311(x) = Val{f50311((1.0, x), "foo")}()
+@test fully_eliminated(g50311, Tuple{Float64})
+
+# getglobal effects
+const my_defined_var = 42
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :monotonic)
+end |> Compiler.is_foldable_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo, nothing)
+end |> !Compiler.is_nothrow
+
+# irinterp should refine `:nothrow` information only if profitable
+Base.@assume_effects :nothrow function irinterp_nothrow_override(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test Base.infer_effects((Float64,)) do y
+    isinf(y) && return zero(y)
+    irinterp_nothrow_override(true, y)
+end |> Compiler.is_nothrow
+
+# Effects for :compilerbarrier
+f1_compilerbarrier(b) = Base.compilerbarrier(:type, b)
+f2_compilerbarrier(b) = Base.compilerbarrier(:conditional, b)
+
+@test !Compiler.is_consistent(Base.infer_effects(f1_compilerbarrier, (Bool,)))
+@test Compiler.is_consistent(Base.infer_effects(f2_compilerbarrier, (Bool,)))
+
+# Optimizer-refined effects
+function f1_optrefine(b)
+    if Base.inferencebarrier(b)
+        error()
+    end
+    return b
+end
+@test !Compiler.is_consistent(Base.infer_effects(f1_optrefine, (Bool,)))
+
+function f2_optrefine()
+    if Ref(false)[]
+        error()
+    end
+    return true
+end
+@test !Compiler.is_nothrow(Base.infer_effects(f2_optrefine; optimize=false))
+@test Compiler.is_nothrow(Base.infer_effects(f2_optrefine))
+
+function f3_optrefine(x)
+    @fastmath sqrt(x)
+    return x
+end
+@test !Compiler.is_consistent(Base.infer_effects(f3_optrefine; optimize=false))
+@test Compiler.is_consistent(Base.infer_effects(f3_optrefine, (Float64,)))
+
+# Check that :consistent is properly modeled for throwing statements
+const GLOBAL_MUTABLE_SWITCH = Ref{Bool}(false)
+
+check_switch(switch::Base.RefValue{Bool}) = (switch[] && error(); return nothing)
+check_switch2() = check_switch(GLOBAL_MUTABLE_SWITCH)
+
+@test (Base.return_types(check_switch2) |> only) === Nothing
+GLOBAL_MUTABLE_SWITCH[] = true
+# Check that flipping the switch doesn't accidentally change the return type
+@test (Base.return_types(check_switch2) |> only) === Nothing
+
+@test !Compiler.is_consistent(Base.infer_effects(check_switch, (Base.RefValue{Bool},)))
+
+# post-opt IPO analysis refinement of `:effect_free`-ness
+function post_opt_refine_effect_free(y, c=true)
+    x = Ref(c)
+    if x[]
+        return true
+    else
+        r = y[] isa Number
+        y[] = nothing
+    end
+    return r
+end
+@test Compiler.is_effect_free(Base.infer_effects(post_opt_refine_effect_free, (Base.RefValue{Any},)))
+@test Base.infer_effects((Base.RefValue{Any},)) do y
+    post_opt_refine_effect_free(y, true)
+end |> Compiler.is_effect_free
+
+# Check EA-based refinement of :effect_free
+Base.@assume_effects :nothrow @noinline _noinline_set!(x) = (x[] = 1; nothing)
+
+function set_ref_with_unused_arg_1(_)
+    x = Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_ref_with_unused_arg_2(_)
+    x = @noinline Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_ref!(x)
+    _noinline_set!(x)
+    y = Ref(false)
+    y[] && (Main.x = x)
+    return nothing
+end
+
+function set_arr_with_unused_arg_1(_)
+    x = Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arr_with_unused_arg_2(_)
+    x = @noinline Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_arr!(x)
+    _noinline_set!(x)
+    y = Bool[false]
+    y[] && (Main.x = x)
+    return nothing
+end
+
+# This is inferable by type analysis only since the arguments have no mutable memory
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Vector{Int},)))
+for func in (set_ref_with_unused_arg_1, set_ref_with_unused_arg_2,
+             set_arr_with_unused_arg_1, set_arr_with_unused_arg_2)
+    effects = Base.infer_effects(func, (Nothing,))
+    @test Compiler.is_inaccessiblememonly(effects)
+    @test Compiler.is_effect_free(effects)
+end
+
+# These need EA
+@test Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
+@test Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},)))
+@test_broken Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},)))
+@test_broken Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))
+@test_broken Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_arr!, (Vector{Int},)))
+
+# EA-based refinement of :effect_free
+function f_EA_refine(ax, b)
+    bx = Ref{Any}()
+    @noinline bx[] = b
+    return ax[] + b
+end
+@test Compiler.is_effect_free(Base.infer_effects(f_EA_refine, (Base.RefValue{Int},Int)))
+
+function issue51837(; openquotechar::Char, newlinechar::Char)
+    ncodeunits(openquotechar) == 1 || throw(ArgumentError("`openquotechar` must be a single-byte character"))
+    if !isnothing(newlinechar)
+        ncodeunits(newlinechar) > 1 && throw(ArgumentError("`newlinechar` must be a single-byte character."))
+    end
+    return nothing
+end
+@test Base.infer_effects() do openquotechar::Char, newlinechar::Char
+    issue51837(; openquotechar, newlinechar)
+end |> !Compiler.is_nothrow
+@test_throws ArgumentError issue51837(; openquotechar='α', newlinechar='\n')
+
+# idempotency of effects derived by post-opt analysis
+callgetfield(x, f) = getfield(x, f, Base.@_boundscheck)
+@test Base.infer_effects(callgetfield, (Some{Any},Symbol)).noub === Compiler.NOUB_IF_NOINBOUNDS
+callgetfield1(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_simple(x, f) = callgetfield1(x, f)
+@test Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Compiler.ALWAYS_TRUE
+callgetfield2(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_inbounds(x, f) = @inbounds callgetfield2(x, f)
+@test Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Compiler.ALWAYS_FALSE
+
+# noub modeling for memory ops
+let (memoryrefnew, memoryrefget, memoryref_isassigned, memoryrefset!) =
+        (Core.memoryrefnew, Core.memoryrefget, Core.memoryref_isassigned, Core.memoryrefset!)
+    function builtin_effects(@nospecialize xs...)
+        interp = Compiler.NativeInterpreter()
+        𝕃 = Compiler.typeinf_lattice(interp)
+        rt = Compiler.builtin_tfunction(interp, xs..., nothing)
+        return Compiler.builtin_effects(𝕃, xs..., rt)
+    end
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[Memory,]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Int,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefnew, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Vararg{Any}]))
+    @test Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(true)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(false)]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Bool]))
+    @test Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Int]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Vararg{Bool}]))
+    @test !Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Vararg{Any}]))
+    # `:boundscheck` taint should be refined by post-opt analysis
+    @test Base.infer_effects() do xs::Vector{Any}, i::Int
+        memoryrefget(memoryrefnew(getfield(xs, :ref), i, Base.@_boundscheck), :not_atomic, Base.@_boundscheck)
+    end |> Compiler.is_noub_if_noinbounds
+end
+
+# high level tests
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Int},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Any},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(setindex!, (Vector{Int},Int,Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(Base._setindex!, (Vector{Any},Any,Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Int},Int)))
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    xs[i]
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    xs[i]
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int,Int)) do xs, x, i
+    xs[i] = x
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Any,Int)) do xs, x, i
+    xs[i] = x
+end |> Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Compiler.is_noub
+Base.@propagate_inbounds getindex_propagate(xs, i) = xs[i]
+getindex_dont_propagate(xs, i) = xs[i]
+@test Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex_propagate, (Vector{Any},Int)))
+@test Compiler.is_noub(Base.infer_effects(getindex_dont_propagate, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_propagate(xs, i)
+end |> !Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_dont_propagate(xs, i)
+end |> Compiler.is_noub
+
+# refine `:nothrow` when `exct` is known to be `Bottom`
+@test Base.infer_exception_type(getindex, (Vector{Int},Int)) == BoundsError
+function getindex_nothrow(xs::Vector{Int}, i::Int)
+    try
+        return xs[i]
+    catch err
+        err isa BoundsError && return nothing
+        rethrow(err)
+    end
+end
+@test Compiler.is_nothrow(Base.infer_effects(getindex_nothrow, (Vector{Int}, Int)))
+
+# callsite `@assume_effects` annotation
+let ast = code_lowered((Int,)) do x
+        Base.@assume_effects :total identity(x)
+    end |> only
+    ssaflag = ast.ssaflags[findfirst(!iszero, ast.ssaflags)::Int]
+    override = Compiler.decode_statement_effects_override(ssaflag)
+    # if this gets broken, check if this is synced with expr.jl
+    @test override.consistent && override.effect_free && override.nothrow &&
+          override.terminates_globally && !override.terminates_locally &&
+          override.notaskstate && override.inaccessiblememonly &&
+          override.noub && !override.noub_if_noinbounds
+end
+@test Base.infer_effects((Float64,)) do x
+    isinf(x) && return 0.0
+    return Base.@assume_effects :nothrow sin(x)
+end |> Compiler.is_nothrow
+let effects = Base.infer_effects((Vector{Float64},)) do xs
+        isempty(xs) && return 0.0
+        Base.@assume_effects :nothrow begin
+            x = Base.@assume_effects :noub @inbounds xs[1]
+            isinf(x) && return 0.0
+            return sin(x)
+        end
+    end
+    # all nested overrides should be applied
+    @test Compiler.is_nothrow(effects)
+    @test Compiler.is_noub(effects)
+end
+@test Base.infer_effects((Int,)) do x
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end |> Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/52531
+const a52531 = Core.Ref(1)
+@eval getref52531() = $(QuoteNode(a52531)).x
+@test !Compiler.is_consistent(Base.infer_effects(getref52531))
+let
+    global set_a52531!, get_a52531
+    _a::Int             = -1
+    set_a52531!(a::Int) = (_a = a; return get_a52531())
+    get_a52531()        = _a
+end
+@test !Compiler.is_consistent(Base.infer_effects(set_a52531!, (Int,)))
+@test !Compiler.is_consistent(Base.infer_effects(get_a52531, ()))
+@test get_a52531() == -1
+@test set_a52531!(1) == 1
+@test get_a52531() == 1
+
+let
+    global is_initialized52531, set_initialized52531!
+    _is_initialized                   = false
+    set_initialized52531!(flag::Bool) = (_is_initialized = flag)
+    is_initialized52531()             = _is_initialized
+end
+top_52531(_) = (set_initialized52531!(true); nothing)
+@test !Compiler.is_consistent(Base.infer_effects(is_initialized52531))
+@test !Compiler.is_removable_if_unused(Base.infer_effects(set_initialized52531!, (Bool,)))
+@test !is_initialized52531()
+top_52531(0)
+@test is_initialized52531()
+
+const ref52843 = Ref{Int}()
+@eval func52843() = ($ref52843[] = 1; nothing)
+@test !Compiler.is_foldable(Base.infer_effects(func52843))
+let; Base.Experimental.@force_compile; func52843(); end
+@test ref52843[] == 1
+
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(identity∘identity, Tuple{Any}))
+@test Compiler.is_inaccessiblememonly(Base.infer_effects(()->Vararg, Tuple{}))
+
+# pointerref nothrow for invalid pointer
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.pointerref, Any[Type{Ptr{Vector{Int64}}}, Int, Int])
+@test !Compiler.intrinsic_nothrow(Core.Intrinsics.pointerref, Any[Type{Ptr{T}} where T, Int, Int])
+
+# post-opt :consistent-cy analysis correctness
+# https://github.com/JuliaLang/julia/issues/53508
+@test !Compiler.is_consistent(Base.infer_effects(getindex, (UnitRange{Int},Int)))
+@test !Compiler.is_consistent(Base.infer_effects(getindex, (Base.OneTo{Int},Int)))
+
+@noinline f53613() = @assert isdefined(@__MODULE__, :v53613)
+g53613() = f53613()
+h53613() = g53613()
+@test !Compiler.is_consistent(Base.infer_effects(f53613))
+@test !Compiler.is_consistent(Base.infer_effects(g53613))
+@test_throws AssertionError f53613()
+@test_throws AssertionError g53613()
+@test_throws AssertionError h53613()
+global v53613 = nothing
+@test f53613() === nothing
+@test g53613() === nothing
+@test h53613() === nothing
+
+# tuple/svec effects
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.tuple(xs...)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Vector{Any},)) do xs
+    Core.svec(xs...)
+end |> Compiler.is_nothrow
+
+# effects for unknown `:foreigncall`s
+@test Base.infer_effects() do
+    @ccall unsafecall()::Cvoid
+end == Compiler.EFFECTS_UNKNOWN
diff --git a/test/compiler/inference.jl b/Compiler/test/inference.jl
similarity index 69%
rename from test/compiler/inference.jl
rename to Compiler/test/inference.jl
index ded9438037733..21d29c376bb27 100644
--- a/test/compiler/inference.jl
+++ b/Compiler/test/inference.jl
@@ -1,14 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# tests for Core.Compiler correctness and precision
-import Core.Compiler: Const, Conditional, ⊑, ReturnNode, GotoIfNot
-isdispatchelem(@nospecialize x) = !isa(x, Type) || Core.Compiler.isdispatchelem(x)
-
-using Random, Core.IR
-using InteractiveUtils: code_llvm
+using Test
 
 include("irutils.jl")
 
+# tests for Compiler correctness and precision
+using .Compiler: Conditional, ⊑
+isdispatchelem(@nospecialize x) = !isa(x, Type) || Compiler.isdispatchelem(x)
+
+using Random, Core.IR
+using InteractiveUtils
+
 f39082(x::Vararg{T}) where {T <: Number} = x[1]
 let ast = only(code_typed(f39082, Tuple{Vararg{Rational}}))[1]
     @test ast.slottypes == Any[Const(f39082), Tuple{Vararg{Rational}}]
@@ -18,161 +20,245 @@ let ast = only(code_typed(f39082, Tuple{Rational, Vararg{Rational}}))[1]
 end
 
 # demonstrate some of the type-size limits
-@test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref, Ref, 100, 0) == Ref
-@test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref{Complex{T} where T}, Ref, 100, 0) == Ref{Complex{T} where T}
+@test Compiler.limit_type_size(Ref{Complex{T} where T}, Ref, Ref, 100, 0) == Ref
+@test Compiler.limit_type_size(Ref{Complex{T} where T}, Ref{Complex{T} where T}, Ref, 100, 0) == Ref{Complex{T} where T}
 
 let comparison = Tuple{X, X} where X<:Tuple
     sig = Tuple{X, X} where X<:comparison
     ref = Tuple{X, X} where X
-    @test Core.Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
-    @test Core.Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
-    @test Core.Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
+    @test Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
+    @test Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
 let ref = Tuple{T, Val{T}} where T<:Val
     sig = Tuple{T, Val{T}} where T<:(Val{T} where T<:Val)
-    @test Core.Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 let ref = Tuple{T, Val{T}} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:Val)))
     sig = Tuple{T, Val{T}} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:(Val{T} where T<:Val))))
-    @test Core.Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
-    @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
+    @test Compiler.limit_type_size(sig, ref, Union{}, 100, 100) == Tuple{Val, Val}
+    @test Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
 let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
-    @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
+    @test t <: Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
 # obtain Vararg with 2 undefined fields
 let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
-    @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
+    @test Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
 end
 
 mutable struct TS14009{T}; end
 let A = TS14009{TS14009{TS14009{TS14009{TS14009{T}}}}} where {T},
     B = Base.rewrap_unionall(TS14009{Base.unwrap_unionall(A)}, A)
 
-    @test Core.Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
+    @test Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
 end
 
 # issue #42835
-@test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
-
-@test  Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test !Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.limit_type_size(Type{Int}, Any, Union{}, 0, 0) == Type{Int}
+@test  Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
+
+@test  Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 1, 1) == Type{<:Complex}
+@test  Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
 
 # n.b. Type{Type{Union{}} === Type{Core.TypeofBottom}
-@test !Core.Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
-
-@test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
-@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
+@test !Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
+
+@test !Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Type{Union{Float32,Float64}}, Core.svec(Type{Union{Float32,Float64}}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 
 # issue #49287
-@test !Core.Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
-@test  Core.Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
+@test !Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
+@test  Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
+
+# issue #51694
+@test Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Array{Bool, 1}}, typeof(identity)},
+       Base.Generator{Array{Bool, 1}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
+@test Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Base.Generator{Array{Bool, 1}, typeof(identity)}}, typeof(identity)},
+       Base.Generator{Array{Bool, 1}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
 
 let # 40336
-    t = Type{Type{Int}}
-    c = Type{Int}
-    r = Core.Compiler.limit_type_size(t, c, c, 100, 100)
+    t = Type{Type{Type{Int}}}
+    c = Type{Type{Int}}
+    r = Compiler.limit_type_size(t, c, c, 100, 100)
     @test t !== r && t <: r
 end
 
-@test Core.Compiler.unionlen(Union{}) == 1
-@test Core.Compiler.unionlen(Int8) == 1
-@test Core.Compiler.unionlen(Union{Int8, Int16}) == 2
-@test Core.Compiler.unionlen(Union{Int8, Int16, Int32, Int64}) == 4
-@test Core.Compiler.unionlen(Tuple{Union{Int8, Int16, Int32, Int64}}) == 1
-@test Core.Compiler.unionlen(Union{Int8, Int16, Int32, T} where T) == 1
-
-@test Core.Compiler.unioncomplexity(Union{}) == 0
-@test Core.Compiler.unioncomplexity(Int8) == 0
-@test Core.Compiler.unioncomplexity(Val{Union{Int8, Int16, Int32, Int64}}) == 0
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16}) == 1
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16, Int32, Int64}) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Union{Int8, Int16, Int32, Int64}}) == 3
-@test Core.Compiler.unioncomplexity(Union{Int8, Int16, Int32, T} where T) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Val{T}, Union{Int8, Int16}, Int8} where T<:Union{Int8, Int16, Int32, Int64}) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 0
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 2
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 3
+@test Compiler.limit_type_size(Type{Type{Type{Int}}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Compiler.limit_type_size(Type{Type{Int}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Compiler.limit_type_size(Type{Int}, Type, Union{}, 0, 0) == Type{Int}
+@test Compiler.limit_type_size(Type{<:Int}, Type, Union{}, 0, 0) == Type{<:Int}
+@test Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 0, 0) == Type{<:Complex} # added nesting
+@test Compiler.limit_type_size(Type{ComplexF32}, Type{ComplexF64}, Union{}, 0, 0) == Type{ComplexF32} # base matches
+@test Compiler.limit_type_size(Type{ComplexF32}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test_broken  Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type #50692
+@test Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type
+@test_broken Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type{<:Complex} #50692
+@test Compiler.limit_type_size(Type{Union{Float32,Float64}}, Type, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Union{Type{Int},Type{Type{Int}}}, Union{}, 0, 0) == Type
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Union{Type{Int},Type{Type{Int}}}}, Union{}, 0, 0) == Type{Union{Int, Type{Int}}}
+@test Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+
+
+@test Compiler.limit_type_size(Type{Any}, Union{}, Union{}, 0, 0) ==
+      Compiler.limit_type_size(Type{Any}, Any, Union{}, 0, 0) ==
+      Compiler.limit_type_size(Type{Any}, Type, Union{}, 0, 0) ==
+      Type{Any}
+
+# issue #43296
+struct C43296{t,I} end
+r43296(b) = r43296(typeof(b))
+r43296(::Type) = nothing
+r43296(::Nothing) = nonexistent
+r43296(::Type{C43296{c,d}}) where {c,d} = f43296(r43296(c), e)
+f43296(::Nothing, :) = nothing
+f43296(g, :) = h
+k43296(b, j, :) = l
+k43296(b, j, ::Nothing) = b
+i43296(b, j) = k43296(b, j, r43296(j))
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, Tuple}}}))) <: Int
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, <:Tuple}}}))) <: Int
+
+abstract type e43296{a, j} <: AbstractArray{a, j} end
+abstract type b43296{a, j, c, d} <: e43296{a, j} end
+struct h43296{a, j, f, d, i} <: b43296{a, j, f, d} end
+Base.ndims(::Type{f}) where {f<:e43296} = ndims(supertype(f))
+Base.ndims(g::e43296) = ndims(typeof(g))
+@test only(Base.return_types(ndims, (h43296{Any, 0, Any, Int, Any},))) == Int
+
+@test Compiler.unionlen(Union{}) == 1
+@test Compiler.unionlen(Int8) == 1
+@test Compiler.unionlen(Union{Int8, Int16}) == 2
+@test Compiler.unionlen(Union{Int8, Int16, Int32, Int64}) == 4
+@test Compiler.unionlen(Tuple{Union{Int8, Int16, Int32, Int64}}) == 1
+@test Compiler.unionlen(Union{Int8, Int16, Int32, T} where T) == 1
+
+@test Compiler.unioncomplexity(Union{}) == 0
+@test Compiler.unioncomplexity(Int8) == 0
+@test Compiler.unioncomplexity(Val{Union{Int8, Int16, Int32, Int64}}) == 0
+@test Compiler.unioncomplexity(Union{Int8, Int16}) == 1
+@test Compiler.unioncomplexity(Union{Int8, Int16, Int32, Int64}) == 3
+@test Compiler.unioncomplexity(Tuple{Union{Int8, Int16, Int32, Int64}}) == 3
+@test Compiler.unioncomplexity(Union{Int8, Int16, Int32, T} where T) == 3
+@test Compiler.unioncomplexity(Tuple{Val{T}, Union{Int8, Int16}, Int8} where T<:Union{Int8, Int16, Int32, Int64}) == 3
+@test Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 2
+@test Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 1
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 3
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 5
+@test Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 7
 
 
 # PR 22120
-function tmerge_test(a, b, r, commutative=true)
-    @test r == Core.Compiler.tuplemerge(a, b)
-    if commutative
-        @test r == Core.Compiler.tuplemerge(b, a)
-    else
-        @test_broken r == Core.Compiler.tuplemerge(b, a)
-    end
-end
-tmerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
-tmerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
-tmerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
-tmerge_test(Tuple{Int}, Tuple{Int, Int},
+function tuplemerge_test(a, b, r, commutative=true)
+    @test r == Compiler.tuplemerge(a, b)
+    @test r == Compiler.tuplemerge(b, a) broken=!commutative
+end
+tuplemerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
+tuplemerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
+tuplemerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
+tuplemerge_test(Tuple{Int}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{Integer}, Tuple{Int, Int},
+tuplemerge_test(Tuple{Integer}, Tuple{Int, Int},
     Tuple{Vararg{Integer}})
-tmerge_test(Tuple{}, Tuple{Int, Int},
+tuplemerge_test(Tuple{}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{}, Tuple{Complex},
+tuplemerge_test(Tuple{}, Tuple{Complex},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
+tuplemerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}}, false)
-tmerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}})
-@test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{}, Tuple{ComplexF32, ComplexF32}}
-@test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
-@test Core.Compiler.tmerge(Union{Nothing, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Union{Nothing, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}
-@test Core.Compiler.tmerge(Union{Nothing, Tuple{}, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+@test Compiler.tmerge(Union{Nothing, Tuple{}, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
-@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Vector{Bool})) ==
+@test Compiler.tmerge(Vector{Int}, Compiler.tmerge(Vector{String}, Vector{Bool})) ==
     Union{Vector{Bool}, Vector{Int}, Vector{String}}
-@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
-@test Core.Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
-@test Core.Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
-@test Core.Compiler.tmerge(Core.Compiler.fallback_ipo_lattice, Core.Compiler.InterConditional(1, Int, Union{}), Core.Compiler.InterConditional(2, String, Union{})) === Core.Compiler.Const(true)
+@test Compiler.tmerge(Vector{Int}, Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
+@test Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
+@test Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
+@test Compiler.tmerge(Compiler.fallback_ipo_lattice, Compiler.InterConditional(1, Int, Union{}), Compiler.InterConditional(2, String, Union{})) === Compiler.Const(true)
+# test issue behind https://github.com/JuliaLang/julia/issues/50458
+@test Compiler.tmerge(Nothing, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Base.BitInteger, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Any, Int}}
+@test Compiler.tmerge(Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Char, Int}}, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Compiler.tmerge(Nothing, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Vector) == Union{Nothing, Int, AbstractVector}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Matrix) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Matrix{Int}) == Union{Nothing, Int, AbstractArray{Int}}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector{Int}}, Array) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractArray{Int}}, Vector) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, Int, AbstractVector}, Matrix{Int}) == Union{Nothing, Int, AbstractArray}
+@test Compiler.tmerge(Union{Nothing, AbstractFloat}, Integer) == Union{Nothing, AbstractFloat, Integer}
+@test Compiler.tmerge(AbstractVector, AbstractMatrix) == Union{AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Nothing}, AbstractMatrix) == Union{Nothing, AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Int}, AbstractMatrix) == Union{Int, AbstractVector, AbstractMatrix}
+@test Compiler.tmerge(Union{AbstractVector, Integer}, AbstractMatrix) == Union{Integer, AbstractArray}
+@test Compiler.tmerge(Union{AbstractVector, Nothing, Int}, AbstractMatrix) == Union{Nothing, Int, AbstractArray}
+
+# test that recursively more complicated types don't widen all the way to Any when there is a useful valid type upper bound
+# Specifically test with base types of a trivial type, a simple union, a complicated union, and a tuple.
+for T in (Nothing, Base.BitInteger, Union{Int, Int32, Int16, Int8}, Tuple{Int, Int})
+    Ta, Tb = T, T
+    for i in 1:10
+        Ta = Union{Tuple{Ta}, Nothing}
+        Tb = Compiler.tmerge(Tuple{Tb}, Nothing)
+        @test Ta <: Tb <: Union{Nothing, Tuple}
+    end
+end
 
 struct SomethingBits
     x::Base.BitIntegerType
@@ -282,9 +368,9 @@ barTuple2() = fooTuple{tuple(:y)}()
 @test Base.return_types(barTuple1,Tuple{})[1] == Base.return_types(barTuple2,Tuple{})[1] == fooTuple{(:y,)}
 
 # issue #6050
-@test Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice,
+@test Compiler.getfield_tfunc(Compiler.fallback_lattice,
           Dict{Int64,Tuple{UnitRange{Int64},UnitRange{Int64}}},
-          Core.Compiler.Const(:vals)) == Array{Tuple{UnitRange{Int64},UnitRange{Int64}},1}
+          Compiler.Const(:vals)) == Memory{Tuple{UnitRange{Int64},UnitRange{Int64}}}
 
 # assert robustness of `getfield_tfunc`
 struct GetfieldRobustness
@@ -344,8 +430,7 @@ code_llvm(devnull, f14009, (Int,))
 mutable struct B14009{T}; end
 g14009(a) = g14009(B14009{a})
 code_typed(g14009, (Type{Int},))
-code_llvm(devnull, f14009, (Int,))
-
+code_llvm(devnull, g14009, (Type{Int},))
 
 # issue #9232
 arithtype9232(::Type{T},::Type{T}) where {T<:Real} = arithtype9232(T)
@@ -396,7 +481,7 @@ end
 @test f15259(1,2) == (1,2,1,2)
 # check that error cases are still correct
 @eval g15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); a.z)
-@test_throws ErrorException g15259(1,1)
+@test_throws FieldError g15259(1,1)
 @eval h15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); getfield(a, 3))
 @test_throws BoundsError h15259(1,1)
 
@@ -564,7 +649,7 @@ f18450() = ifelse(true, Tuple{Vararg{Int}}, Tuple{Vararg})
 @test f18450() == Tuple{Vararg{Int}}
 
 # issue #18569
-@test !Core.Compiler.isconstType(Type{Tuple})
+@test !Compiler.isconstType(Type{Tuple})
 
 # issue #10880
 function cat10880(a, b)
@@ -593,7 +678,6 @@ end
 function test_inferred_static(arrow::Pair, all_ssa)
     code, rt = arrow
     @test isdispatchelem(rt)
-    @test code.inferred
     for i = 1:length(code.code)
         e = code.code[i]
         test_inferred_static(e)
@@ -649,7 +733,7 @@ for (codetype, all_ssa) in Any[
     test_inferred_static(codetype, all_ssa)
 end
 @test f18679() === ()
-@test_throws UndefVarError(:any_undef_global) g18679()
+@test_throws UndefVarError(:any_undef_global, @__MODULE__) g18679()
 @test h18679() === nothing
 
 
@@ -696,9 +780,9 @@ end
 f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x)
 @test Base.return_types(f_infer_abstract_fieldtype, ()) == Any[Type{Union{Int,String}}]
 let fieldtype_tfunc(@nospecialize args...) =
-        Core.Compiler.fieldtype_tfunc(Core.Compiler.fallback_lattice, args...),
-    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow(
-        Core.Compiler.SimpleInferenceLattice.instance, s0, name)
+        Compiler.fieldtype_tfunc(Compiler.fallback_lattice, args...),
+    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Compiler.fieldtype_nothrow(
+        Compiler.SimpleInferenceLattice.instance, s0, name)
     @test fieldtype_tfunc(Union{}, :x) == Union{}
     @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array}
@@ -741,7 +825,7 @@ end
 
 # Issue 19641
 foo19641() = let a = 1.0
-    Core.Compiler.return_type(x -> x + a, Tuple{Float64})
+    Base._return_type(x -> x + a, Tuple{Float64})
 end
 @inferred foo19641()
 
@@ -895,15 +979,15 @@ test_no_apply(::Any) = true
 
 # issue #20033
 # check return_type_tfunc for calls where no method matches
-bcast_eltype_20033(f, A) = Core.Compiler.return_type(f, Tuple{eltype(A)})
+bcast_eltype_20033(f, A) = Base._return_type(f, Tuple{eltype(A)})
 err20033(x::Float64...) = prod(x)
 @test bcast_eltype_20033(err20033, [1]) === Union{}
 @test Base.return_types(bcast_eltype_20033, (typeof(err20033), Vector{Int},)) == Any[Type{Union{}}]
 # return_type on builtins
-@test Core.Compiler.return_type(tuple, Tuple{Int,Int8,Int}) === Tuple{Int,Int8,Int}
+@test Base._return_type(tuple, Tuple{Int,Int8,Int}) === Tuple{Int,Int8,Int}
 
 # issue #21088
-@test Core.Compiler.return_type(typeof, Tuple{Int}) == Type{Int}
+@test Base._return_type(typeof, Tuple{Int}) == Type{Int}
 
 # Inference of constant svecs
 @eval fsvecinf() = $(QuoteNode(Core.svec(Tuple{Int,Int}, Int)))[1]
@@ -983,7 +1067,7 @@ gl_17003 = [1, 2, 3]
 f2_17003(item::AVector_17003) = nothing
 f2_17003(::Any) = f2_17003(NArray_17003(gl_17003))
 
-@test f2_17003(1) == nothing
+@test f2_17003(1) === nothing
 
 # issue #20847
 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T}
@@ -994,7 +1078,7 @@ end
 tuplevec_20847 = Tuple{Float64, Float64}[(0.0,0.0), (1.0,0.0)]
 
 for A in (1,)
-    @test segfaultfunction_20847(tuplevec_20847) == nothing
+    @test segfaultfunction_20847(tuplevec_20847) === nothing
 end
 
 # Issue #20902, check that this doesn't error.
@@ -1015,7 +1099,7 @@ f21771(::Val{U}) where {U} = Tuple{g21771(U)}
 
 # PR #28284, check that constants propagate through calls to new
 struct t28284
-  x::Int
+    x::Int
 end
 f28284() = Val(t28284(1))
 @inferred f28284()
@@ -1078,7 +1162,7 @@ end
 struct UnionIsdefinedA; x; end
 struct UnionIsdefinedB; x; end
 let isdefined_tfunc(@nospecialize xs...) =
-        Core.Compiler.isdefined_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.isdefined_tfunc(Compiler.fallback_lattice, xs...)
     @test isdefined_tfunc(typeof(NamedTuple()), Const(0)) === Const(false)
     @test isdefined_tfunc(typeof(NamedTuple()), Const(1)) === Const(false)
     @test isdefined_tfunc(typeof((a=1,b=2)), Const(:a)) === Const(true)
@@ -1102,14 +1186,9 @@ let isdefined_tfunc(@nospecialize xs...) =
     @test isdefined_tfunc(ComplexF32, Const(0)) === Const(false)
     @test isdefined_tfunc(SometimesDefined, Const(:x)) == Bool
     @test isdefined_tfunc(SometimesDefined, Const(:y)) === Const(false)
-    @test isdefined_tfunc(Const(Base), Const(:length)) === Const(true)
-    @test isdefined_tfunc(Const(Base), Symbol) == Bool
-    @test isdefined_tfunc(Const(Base), Const(:NotCurrentlyDefinedButWhoKnows)) == Bool
     @test isdefined_tfunc(Core.SimpleVector, Const(1)) === Const(false)
     @test Const(false) ⊑ isdefined_tfunc(Const(:x), Symbol)
     @test Const(false) ⊑ isdefined_tfunc(Const(:x), Const(:y))
-    @test isdefined_tfunc(Vector{Int}, Const(1)) == Const(false)
-    @test isdefined_tfunc(Vector{Any}, Const(1)) == Const(false)
     @test isdefined_tfunc(Module, Int) === Union{}
     @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(0)) === Const(false)
     @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
@@ -1180,18 +1259,18 @@ function get_linfo(@nospecialize(f), @nospecialize(t))
     # get the MethodInstance for the method match
     match = Base._which(Base.signature_type(f, t))
     precompile(match.spec_types)
-    return Core.Compiler.specialize_method(match)
+    return Compiler.specialize_method(match)
 end
 
 function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val))
-    interp = Core.Compiler.NativeInterpreter()
-    linfo = Core.Compiler.getindex(Core.Compiler.code_cache(interp), get_linfo(f, t))
+    interp = Compiler.NativeInterpreter()
+    linfo = Compiler.getindex(Compiler.code_cache(interp), get_linfo(f, t))
     # If coverage is not enabled, make the check strict by requiring constant ABI
     # Otherwise, check the typed AST to make sure we return a constant.
     if Base.JLOptions().code_coverage == 0
-        @test Core.Compiler.invoke_api(linfo) == 2
+        @test Compiler.invoke_api(linfo) == 2
     end
-    if Core.Compiler.invoke_api(linfo) == 2
+    if Compiler.invoke_api(linfo) == 2
         @test linfo.rettype_const == val
         return
     end
@@ -1211,7 +1290,7 @@ function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val
             @test ret === val || (isa(ret, QuoteNode) && (ret::QuoteNode).value === val)
             continue
         elseif isa(ex, Expr)
-            if Core.Compiler.is_meta_expr_head(ex.head)
+            if Compiler.is_meta_expr_head(ex.head)
                 continue
             end
         end
@@ -1231,7 +1310,7 @@ function find_call(code::Core.CodeInfo, @nospecialize(func), narg)
                     farg = typeof(getfield(farg.mod, farg.name))
                 end
             elseif isa(farg, Core.SSAValue)
-                farg = Core.Compiler.widenconst(code.ssavaluetypes[farg.id])
+                farg = Compiler.widenconst(code.ssavaluetypes[farg.id])
             else
                 farg = typeof(farg)
             end
@@ -1249,33 +1328,36 @@ test_const_return(()->sizeof(1), Tuple{}, sizeof(Int))
 test_const_return(()->sizeof(DataType), Tuple{}, sizeof(DataType))
 test_const_return(()->sizeof(1 < 2), Tuple{}, 1)
 test_const_return(()->fieldtype(Dict{Int64,Nothing}, :age), Tuple{}, UInt)
-test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
-test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, 2 * sizeof(Int))
+test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * sizeof(Int))
+# TODO: do we want to implement these?
+# test_const_return(@eval(()->sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
+# test_const_return(@eval(()->sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+# test_const_return(@eval(()->Core.sizeof($(Memory{Int}(undef, 0)))), Tuple{}, 0)
 
 # Make sure Core.sizeof with a ::DataType as inferred input type is inferred but not constant.
 function sizeof_typeref(typeref)
     return Core.sizeof(typeref[])
 end
 @test @inferred(sizeof_typeref(Ref{DataType}(Int))) == sizeof(Int)
-@test find_call(first(code_typed(sizeof_typeref, (Ref{DataType},))[1]), Core.sizeof, 2)
+@test find_call(only(code_typed(sizeof_typeref, (Ref{DataType},)))[1], Core.sizeof, 2)
 # Constant `Vector` can be resized and shouldn't be optimized to a constant.
 const constvec = [1, 2, 3]
 @eval function sizeof_constvec()
-    return Core.sizeof($constvec)
+    return sizeof($constvec)
 end
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 3
-@test find_call(first(code_typed(sizeof_constvec, ())[1]), Core.sizeof, 2)
 push!(constvec, 10)
-@test @inferred(sizeof_constvec()) == sizeof(Int) * 4
+@test sizeof_constvec() == sizeof(Int) * 4
 
 test_const_return(x->isdefined(x, :re), Tuple{ComplexF64}, true)
 
 isdefined_f3(x) = isdefined(x, 3)
 @test @inferred(isdefined_f3(())) == false
-@test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]), isdefined, 3)
+@test find_call(only(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}}))[1], isdefined, 3)
 
 let isa_tfunc(@nospecialize xs...) =
-        Core.Compiler.isa_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.isa_tfunc(Compiler.fallback_lattice, xs...)
     @test isa_tfunc(Array, Const(AbstractArray)) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray}) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray{Int}}) == Bool
@@ -1315,7 +1397,7 @@ let isa_tfunc(@nospecialize xs...) =
 end
 
 let subtype_tfunc(@nospecialize xs...) =
-        Core.Compiler.subtype_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.subtype_tfunc(Compiler.fallback_lattice, xs...)
     @test subtype_tfunc(Type{<:Array}, Const(AbstractArray)) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray}) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray{Int}}) == Bool
@@ -1367,9 +1449,9 @@ end
 
 let egal_tfunc
     function egal_tfunc(a, b)
-        𝕃 = Core.Compiler.fallback_lattice
-        r = Core.Compiler.egal_tfunc(𝕃, a, b)
-        @test r === Core.Compiler.egal_tfunc(𝕃, b, a)
+        𝕃 = Compiler.fallback_lattice
+        r = Compiler.egal_tfunc(𝕃, a, b)
+        @test r === Compiler.egal_tfunc(𝕃, b, a)
         return r
     end
     @test egal_tfunc(Const(12345.12345), Const(12344.12345 + 1)) == Const(true)
@@ -1438,11 +1520,11 @@ egal_conditional_lattice3(x, y) = x === y + y ? "" : 1
 @test Base.return_types(egal_conditional_lattice3, (Int32, Int64)) == Any[Int]
 
 let nfields_tfunc(@nospecialize xs...) =
-        Core.Compiler.nfields_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.nfields_tfunc(Compiler.fallback_lattice, xs...)
     sizeof_tfunc(@nospecialize xs...) =
-        Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.sizeof_tfunc(Compiler.fallback_lattice, xs...)
     sizeof_nothrow(@nospecialize xs...) =
-        Core.Compiler.sizeof_nothrow(xs...)
+        Compiler.sizeof_nothrow(xs...)
     @test sizeof_tfunc(Const(Ptr)) === sizeof_tfunc(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}}) === Const(Sys.WORD_SIZE ÷ 8)
     @test sizeof_tfunc(Type{Ptr}) === Const(sizeof(Ptr))
     @test sizeof_nothrow(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}})
@@ -1450,12 +1532,12 @@ let nfields_tfunc(@nospecialize xs...) =
     @test sizeof_nothrow(Type{Ptr})
     @test sizeof_nothrow(Type{Union{Ptr{Int}, Int}})
     @test !sizeof_nothrow(Const(Tuple))
-    @test !sizeof_nothrow(Type{Vector{Int}})
+    @test sizeof_nothrow(Type{Vector{Int}})
     @test !sizeof_nothrow(Type{Union{Int, String}})
     @test sizeof_nothrow(String)
     @test !sizeof_nothrow(Type{String})
     @test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
-    let PT = Core.Compiler.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
+    let PT = Core.PartialStruct(Compiler.fallback_lattice, Tuple{Int64,UInt64}, Any[Const(10), UInt64])
         @test sizeof_tfunc(PT) === Const(16)
         @test nfields_tfunc(PT) === Const(2)
         @test sizeof_nothrow(PT)
@@ -1483,7 +1565,7 @@ let nfields_tfunc(@nospecialize xs...) =
 end
 
 let typeof_tfunc(@nospecialize xs...) =
-        Core.Compiler.typeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.typeof_tfunc(Compiler.fallback_lattice, xs...)
     @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N
     @test typeof_tfunc(Tuple{Any}) == Type{<:Tuple{Any}}
     @test typeof_tfunc(Type{Array}) === DataType
@@ -1496,41 +1578,77 @@ end
 f_typeof_tfunc(x) = typeof(x)
 @test Base.return_types(f_typeof_tfunc, (Union{<:T, Int} where T<:Complex,)) == Any[Union{Type{Int}, Type{Complex{T}} where T<:Real}]
 
-# arrayref / arrayset / arraysize
-import Core.Compiler: Const
-let arrayref_tfunc(@nospecialize xs...) = Core.Compiler.arrayref_tfunc(Core.Compiler.fallback_lattice, xs...)
-    arrayset_tfunc(@nospecialize xs...) = Core.Compiler.arrayset_tfunc(Core.Compiler.fallback_lattice, xs...)
-    arraysize_tfunc(@nospecialize xs...) = Core.Compiler.arraysize_tfunc(Core.Compiler.fallback_lattice, xs...)
-    @test arrayref_tfunc(Const(true), Vector{Int}, Int) === Int
-    @test arrayref_tfunc(Const(true), Vector{<:Integer}, Int) === Integer
-    @test arrayref_tfunc(Const(true), Vector, Int) === Any
-    @test arrayref_tfunc(Const(true), Vector{Int}, Int, Vararg{Int}) === Int
-    @test arrayref_tfunc(Const(true), Vector{Int}, Vararg{Int}) === Int
-    @test arrayref_tfunc(Const(true), Vector{Int}) === Union{}
-    @test arrayref_tfunc(Const(true), String, Int) === Union{}
-    @test arrayref_tfunc(Const(true), Vector{Int}, Float64) === Union{}
-    @test arrayref_tfunc(Int, Vector{Int}, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Int, Int) === Vector{Int}
-    let ua = Vector{<:Integer}
-        @test arrayset_tfunc(Const(true), ua, Int, Int) === ua
-    end
-    @test arrayset_tfunc(Const(true), Vector, Int, Int) === Vector
-    @test arrayset_tfunc(Const(true), Any, Int, Int) === Any
-    @test arrayset_tfunc(Const(true), Vector{String}, String, Int, Vararg{Int}) === Vector{String}
-    @test arrayset_tfunc(Const(true), Vector{String}, String, Vararg{Int}) === Vector{String}
-    @test arrayset_tfunc(Const(true), Vector{String}, String) === Union{}
-    @test arrayset_tfunc(Const(true), String, Char, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Int, Float64) === Union{}
-    @test arrayset_tfunc(Int, Vector{Int}, Int, Int) === Union{}
-    @test arrayset_tfunc(Const(true), Vector{Int}, Float64, Int) === Union{}
-    @test arraysize_tfunc(Vector, Int) === Int
-    @test arraysize_tfunc(Vector, Float64) === Union{}
-    @test arraysize_tfunc(String, Int) === Union{}
+# memoryref_tfunc, memoryrefget_tfunc, memoryrefset!_tfunc, memoryref_isassigned, memoryrefoffset_tfunc
+let memoryref_tfunc(@nospecialize xs...) = Compiler.memoryref_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefget_tfunc(@nospecialize xs...) = Compiler.memoryrefget_tfunc(Compiler.fallback_lattice, xs...)
+    memoryref_isassigned_tfunc(@nospecialize xs...) = Compiler.memoryref_isassigned_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefset!_tfunc(@nospecialize xs...) = Compiler.memoryrefset!_tfunc(Compiler.fallback_lattice, xs...)
+    memoryrefoffset_tfunc(@nospecialize xs...) = Compiler.memoryrefoffset_tfunc(Compiler.fallback_lattice, xs...)
+    interp = Compiler.NativeInterpreter()
+    builtin_tfunction(@nospecialize xs...) = Compiler.builtin_tfunction(interp, xs..., nothing)
+    @test memoryref_tfunc(Memory{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Integer}) == MemoryRef{Integer}
+    @test memoryref_tfunc(MemoryRef{Int}, Int) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Vararg{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Symbol) == Union{}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Bool) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Vararg{Bool}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Int}, Int) == Union{}
+    @test memoryref_tfunc(Any, Any, Any) == Any # also probably could be GenericMemoryRef
+    @test memoryref_tfunc(Any, Any) == Any # also probably could be GenericMemoryRef
+    @test memoryref_tfunc(Any) == GenericMemoryRef
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, Bool) === Int
+    @test memoryrefget_tfunc(MemoryRef{Int}, Any, Any) === Int
+    @test memoryrefget_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Integer
+    @test memoryrefget_tfunc(GenericMemoryRef, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryrefget_tfunc(Any, Any, Any) === Any
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Int
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Any, Any) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryref_isassigned_tfunc(Any, Any, Any) === Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Vararg{Any}]) == Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Bool
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Int, Symbol, Bool) === Int
+    let ua = MemoryRef{<:Integer}
+        @test memoryrefset!_tfunc(ua, Int, Symbol, Bool) === Int
+    end
+    @test memoryrefset!_tfunc(GenericMemoryRef, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(Any, Int, Symbol, Bool) === Int
+    @test memoryrefset!_tfunc(MemoryRef{String}, Int, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(String, Char, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Symbol, Bool) === Any # could improve this to Int
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Any, Any) === Any # could improve this to Int
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Any, Any, Any) === Any
+    @test memoryrefset!_tfunc(GenericMemoryRef, Any, Any, Any) === Any
+    @test memoryrefset!_tfunc(Any, Any, Any, Any) === Any
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Any}]) == Any
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Symbol}]) == Union{}
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Vararg{Bool}]) === Any # could improve this to Int
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Bool, Vararg{Any}]) === Any # could improve this to Int
+    @test memoryrefoffset_tfunc(MemoryRef) == memoryrefoffset_tfunc(GenericMemoryRef) == Int
+    @test memoryrefoffset_tfunc(Memory) == memoryrefoffset_tfunc(GenericMemory) == Union{}
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{MemoryRef}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Memory}]) == Union{}
 end
 
 let tuple_tfunc(@nospecialize xs...) =
-        Core.Compiler.tuple_tfunc(Core.Compiler.fallback_lattice, Any[xs...])
-    @test Core.Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
+        Compiler.tuple_tfunc(Compiler.fallback_lattice, Any[xs...])
+    @test Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
     # https://github.com/JuliaLang/julia/issues/44705
     @test tuple_tfunc(Union{Type{Int32},Type{Int64}}) === Tuple{Type}
     @test tuple_tfunc(DataType) === Tuple{DataType}
@@ -1546,8 +1664,8 @@ g23024(TT::Tuple{DataType}) = f23024(TT[1], v23024)
 @test Base.return_types(g23024, (Tuple{DataType},)) == Any[Int]
 @test g23024((UInt8,)) === 2
 
-@test !Core.Compiler.isconstType(Type{typeof(Union{})}) # could be Core.TypeofBottom or Type{Union{}} at runtime
-@test !isa(Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, Type{Core.TypeofBottom}, Core.Compiler.Const(:name)), Core.Compiler.Const)
+@test !Compiler.isconstType(Type{typeof(Union{})}) # could be Core.TypeofBottom or Type{Union{}} at runtime
+@test !isa(Compiler.getfield_tfunc(Compiler.fallback_lattice, Type{Core.TypeofBottom}, Compiler.Const(:name)), Compiler.Const)
 @test Base.return_types(supertype, (Type{typeof(Union{})},)) == Any[Any]
 
 # issue #23685
@@ -1573,19 +1691,18 @@ gg13183(x::X...) where {X} = (_false13183 ? gg13183(x, x) : 0)
 # test the external OptimizationState constructor
 let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     world = UInt(23) # some small-numbered world that should be valid
-    interp = Core.Compiler.NativeInterpreter()
-    opt = Core.Compiler.OptimizationState(linfo, interp)
+    interp = Compiler.NativeInterpreter()
+    opt = Compiler.OptimizationState(linfo, interp)
     # make sure the state of the properties look reasonable
     @test opt.src !== linfo.def.source
     @test length(opt.src.slotflags) == linfo.def.nargs <= length(opt.src.slotnames)
     @test opt.src.ssavaluetypes isa Vector{Any}
-    @test !opt.src.inferred
     @test opt.mod === Base
 end
 
 # approximate static parameters due to unions
 let T1 = Array{Float64}, T2 = Array{_1,2} where _1
-    inference_test_copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+    inference_test_copy(a::T) where {T<:Array} = ccall(:array_copy_like, Ref{T}, (Any,), a)
     rt = Base.return_types(inference_test_copy, (Union{T1,T2},))[1]
     @test rt >: T1 && rt >: T2
 
@@ -1606,12 +1723,12 @@ g_test_constant() = (f_constant(3) == 3 && f_constant(4) == 4 ? true : "BAD")
 f_pure_add() = (1 + 1 == 2) ? true : "FAIL"
 @test @inferred f_pure_add()
 
-import Core: Const
+using Core: Const
 mutable struct ARef{T}
     @atomic x::T
 end
 let getfield_tfunc(@nospecialize xs...) =
-        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.getfield_tfunc(Compiler.fallback_lattice, xs...)
 
     # inference of `T.mutable`
     @test getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
@@ -1647,7 +1764,7 @@ let getfield_tfunc(@nospecialize xs...) =
     @test getfield_tfunc(ARef{Int},Const(:x),Bool,Bool) === Union{}
 end
 
-import Core.Compiler: Const
+using Core: Const
 mutable struct XY{X,Y}
     x::X
     y::Y
@@ -1659,7 +1776,7 @@ mutable struct ABCDconst
     const d::Union{Int,Nothing}
 end
 let setfield!_tfunc(@nospecialize xs...) =
-        Core.Compiler.setfield!_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.setfield!_tfunc(Compiler.fallback_lattice, xs...)
     @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int) === Int
     @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int, Symbol) === Int
     @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int) === Int
@@ -1719,7 +1836,7 @@ let setfield!_tfunc(@nospecialize xs...) =
     @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
 end
 let setfield!_nothrow(@nospecialize xs...) =
-        Core.Compiler.setfield!_nothrow(Core.Compiler.SimpleInferenceLattice.instance, xs...)
+        Compiler.setfield!_nothrow(Compiler.SimpleInferenceLattice.instance, xs...)
     @test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
     @test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
     @test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
@@ -1819,6 +1936,8 @@ function f24852_kernel_cinfo(world::UInt, source, fsig::Type)
     end
     pushfirst!(code_info.slotnames, Symbol("#self#"))
     pushfirst!(code_info.slotflags, 0x00)
+    code_info.nargs = 4
+    code_info.isva = false
     # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
     # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return match.method, code_info
@@ -1915,7 +2034,7 @@ function foo25261()
         next = f25261(Core.getfield(next, 2))
     end
 end
-let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
+let opt25261 = code_typed(foo25261, Tuple{}, optimize=true)[1].first.code
     i = 1
     # Skip to after the branch
     while !isa(opt25261[i], GotoIfNot)
@@ -1923,7 +2042,7 @@ let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
     end
     foundslot = false
     for expr25261 in opt25261[i:end]
-        if expr25261 isa Core.Compiler.TypedSlot && expr25261.typ === Tuple{Int, Int}
+        if expr25261 isa Core.PiNode && expr25261.typ === Tuple{Int, Int}
             # This should be the assignment to the SSAValue into the getfield
             # call - make sure it's a TypedSlot
             foundslot = true
@@ -2014,12 +2133,12 @@ end
 
     # handle edge case
     @test (@eval Module() begin
-        edgecase(_) = $(Core.Compiler.InterConditional(2, Int, Any))
+        edgecase(_) = $(Compiler.InterConditional(2, Int, Any))
         Base.return_types(edgecase, (Any,)) # create cache
         Base.return_types((Any,)) do x
             edgecase(x)
         end
-    end) == Any[Core.Compiler.InterConditional]
+    end) == Any[Compiler.InterConditional]
 
     # a tricky case: if constant inference derives `Const` while non-constant inference has
     # derived `InterConditional`, we should not discard that constant information
@@ -2031,78 +2150,75 @@ end
 
 @testset "branching on conditional object" begin
     # simple
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         return b ? 0 : a # ::Int
-    end == Any[Int]
+    end == Int
 
     # can use multiple times (as far as the subject of condition hasn't changed)
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         c = b ? 0 : a # c::Int
         d = !b ? a : 0 # d::Int
         return c, d # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 
     # should invalidate old constraint when the subject of condition has changed
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         cond = a === nothing
         r1 = cond ? 0 : a # r1::Int
         a = 0
         r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
         return r1, r2 # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 end
 
 # https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
 # `PartialStruct` shouldn't wrap `Conditional`
-let M = Module()
-    @eval M begin
-        struct BePartialStruct
-            val::Int
-            cond
-        end
-    end
-
-    rt = @eval M begin
-        Base.return_types((Union{Nothing,Int},)) do a
-            cond = a === nothing
-            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
-            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
-            a = $(gensym(:anyvar))::Any
-            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
-            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
-        end |> only
-    end
-    @test rt == Tuple{Union{Nothing,Int},Any}
+struct BePartialStruct
+    val::Int
+    cond
+end
+@test Tuple{Union{Nothing,Int},Any} == @eval Base.infer_return_type((Union{Nothing,Int},)) do a
+    cond = a === nothing
+    obj = $(Expr(:new, BePartialStruct, 42, :cond))
+    r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+    a = $(gensym(:anyvar))::Any
+    r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
+    return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
 end
 
 # make sure we never form nested `Conditional` (https://github.com/JuliaLang/julia/issues/46207)
-@test Base.return_types((Any,)) do a
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     42 === c ? :a : "b"
-end |> only === String
-@test Base.return_types((Any,)) do a
+end == String
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     c === 42 ? :a : "b"
-end |> only === String
+end == String
 
-@testset "conditional constraint propagation from non-`Conditional` object" begin
-    @test Base.return_types((Bool,)) do b
-        if b
-            return !b ? nothing : 1 # ::Int
-        else
-            return 0
-        end
-    end == Any[Int]
-
-    @test Base.return_types((Any,)) do b
-        if b
-            return b # ::Bool
-        else
-            return nothing
-        end
-    end == Any[Union{Bool,Nothing}]
+function condition_object_update1(cond)
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : 1 # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+function condition_object_update2(x)
+    cond = x isa Int
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : x # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+@testset "state update for condition object" begin
+    # refine the type of condition object into constant boolean values on branching
+    @test Base.infer_return_type(condition_object_update1, (Bool,)) == Int
+    @test Base.infer_return_type(condition_object_update1, (Any,)) == Int
+    # refine even when their original type is `Conditional`
+    @test Base.infer_return_type(condition_object_update2, (Any,)) == Int
 end
 
 @testset "`from_interprocedural!`: translate inter-procedural information" begin
@@ -2121,7 +2237,7 @@ end
     end |> only == Int
     # the `fargs = nothing` edge case
     @test Base.return_types((Any,)) do a
-        Core.Compiler.return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
+        Base._return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
     end |> only == Type{Bool}
 
     # `InterConditional` handling: `abstract_call_opaque_closure`
@@ -2150,27 +2266,25 @@ mutable struct AliasableConstField{S,T}
     f2::T
 end
 
-import Core.Compiler:
+import .Compiler:
     InferenceLattice, MustAliasesLattice, InterMustAliasesLattice,
     BaseInferenceLattice, SimpleInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice
 
 include("newinterp.jl")
 @newinterp MustAliasInterpreter
-let CC = Core.Compiler
-    CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
-    CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
-    CC.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance
-end
+Compiler.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
+Compiler.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
+Compiler.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance
 
 # lattice
 # -------
 
-import Core.Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
+import .Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
 let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
-    ⊑(@nospecialize(a), @nospecialize(b)) = Core.Compiler.:⊑(𝕃ᵢ, a, b)
-    tmerge(@nospecialize(a), @nospecialize(b)) = Core.Compiler.tmerge(𝕃ᵢ, a, b)
-    isa_tfunc(@nospecialize xs...) = Core.Compiler.isa_tfunc(𝕃ᵢ, xs...)
-    ifelse_tfunc(@nospecialize xs...) = Core.Compiler.ifelse_tfunc(𝕃ᵢ, xs...)
+    ⊑(@nospecialize(a), @nospecialize(b)) = Compiler.:⊑(𝕃ᵢ, a, b)
+    tmerge(@nospecialize(a), @nospecialize(b)) = Compiler.tmerge(𝕃ᵢ, a, b)
+    isa_tfunc(@nospecialize xs...) = Compiler.isa_tfunc(𝕃ᵢ, xs...)
+    ifelse_tfunc(@nospecialize xs...) = Compiler.ifelse_tfunc(𝕃ᵢ, xs...)
 
     @test (MustAlias(2, AliasableField{Any}, 1, Int) ⊑ Int)
     @test !(Int ⊑ MustAlias(2, AliasableField{Any}, 1, Int))
@@ -2181,6 +2295,7 @@ let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance)
     @test tmerge(MustAlias(2, AliasableField{Any}, 1, Int), Const(nothing)) === Union{Int,Nothing}
     @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Any)) === Any
     @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Int)) === Union{Int,Nothing}
+    tmerge(Const(AbstractVector{<:Any}), Const(AbstractVector{T} where {T}))  # issue #56913
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Const(Bool)) === Const(true)
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Type{Bool}) === Const(true)
     @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Int), Type{Bool}) === Const(false)
@@ -2188,6 +2303,12 @@ let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance)
     @test ifelse_tfunc(MustAlias(2, AliasableField{Any}, 1, Int), Int, Int) === Union{}
 end
 
+@testset "issue #56913: `BoundsError` in type inference" begin
+    R = UnitRange{Int}
+    @test Type{AbstractVector} == Base.infer_return_type(Base.promote_typeof, Tuple{R, R, Vector{Any}, Vararg{R}})
+    @test Type{AbstractVector} == Base.infer_return_type(Base.promote_typeof, Tuple{R, R, Vector{Any}, R, Vararg{R}})
+end
+
 maybeget_mustalias_tmerge(x::AliasableField) = x.f
 maybeget_mustalias_tmerge(x) = x
 @test Base.return_types((Union{Nothing,AliasableField{Any}},); interp=MustAliasInterpreter()) do x
@@ -2331,7 +2452,7 @@ isaint(a) = isa(a, Int)
     end
     return 0
 end |> only === Int
-# handle multiple call-site refinment targets
+# handle multiple call-site refinement targets
 isasome(_) = true
 isasome(::Nothing) = false
 @test_broken Base.return_types((AliasableField{Union{Int,Nothing}},); interp=MustAliasInterpreter()) do a
@@ -2439,11 +2560,19 @@ end |> only === Int
 end |> only === Some{Int}
 
 # handle the edge case
-@eval intermustalias_edgecase(_) = $(Core.Compiler.InterMustAlias(2, Some{Any}, 1, Int))
+@eval intermustalias_edgecase(_) = $(Compiler.InterMustAlias(2, Some{Any}, 1, Int))
 Base.return_types(intermustalias_edgecase, (Any,); interp=MustAliasInterpreter()) # create cache
 @test Base.return_types((Any,); interp=MustAliasInterpreter()) do x
     intermustalias_edgecase(x)
-end |> only === Core.Compiler.InterMustAlias
+end |> only === Compiler.InterMustAlias
+
+@test Base.infer_return_type((AliasableField,Integer,); interp=MustAliasInterpreter()) do a, x
+    s = (;x)
+    if getfield(a, :f) isa Symbol
+        return getfield(s, getfield(a, :f))
+    end
+    return 0
+end == Integer
 
 function f25579(g)
     h = g[]
@@ -2471,7 +2600,7 @@ function h25579(g)
     return t ? typeof(h) : typeof(h)
 end
 @test Base.return_types(h25579, (Base.RefValue{Union{Nothing, Int}},)) ==
-        Any[Union{Type{Float64}, Type{Int}, Type{Nothing}}]
+        Any[Type{Float64}]
 
 f26172(v) = Val{length(Base.tail(ntuple(identity, v)))}() # Val(M-1)
 g26172(::Val{0}) = ()
@@ -2522,7 +2651,7 @@ g26826(x) = getfield26826(x, :a, :b)
 # If this test is broken (especially if inference is getting a correct, but loose result,
 # like a Union) then it's potentially an indication that the optimizer isn't hitting the
 # InferenceResult cache properly for varargs methods.
-let ct = Core.Compiler.code_typed(f26826, (Float64,))[1]
+let ct = code_typed(f26826, (Float64,))[1]
     typed_code, retty = ct.first, ct.second
     found_poorly_typed_getfield_call = false
     for i = 1:length(typed_code.code)
@@ -2645,10 +2774,10 @@ end |> only === Int
 
 # `apply_type_tfunc` accuracy for constrained type construction
 # https://github.com/JuliaLang/julia/issues/47089
-import Core: Const
-import Core.Compiler: apply_type_tfunc
 struct Issue47089{A<:Number,B<:Number} end
-let 𝕃 = Core.Compiler.fallback_lattice
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    Const = Core.Const
     A = Type{<:Integer}
     @test apply_type_tfunc(𝕃, Const(Issue47089), A, A) <: (Type{Issue47089{A,B}} where {A<:Integer, B<:Integer})
     @test apply_type_tfunc(𝕃, Const(Issue47089), Const(Int), Const(Int), Const(Int)) === Union{}
@@ -2667,7 +2796,7 @@ end
 @test only(Base.return_types(Base.afoldl, (typeof((m, n) -> () -> Returns(nothing)(m, n)), Function, Function, Vararg{Function}))) === Function
 
 let A = Tuple{A,B,C,D,E,F,G,H} where {A,B,C,D,E,F,G,H}
-    B = Core.Compiler.rename_unionall(A)
+    B = Compiler.rename_unionall(A)
     for i in 1:8
         @test A.var != B.var && (i == 1 ? A == B : A != B)
         A, B = A.body, B.body
@@ -2831,7 +2960,7 @@ end
 # issue #27316 - inference shouldn't hang on these
 f27316(::Vector) = nothing
 f27316(::Any) = f27316(Any[][1]), f27316(Any[][1])
-let expected = NTuple{2, Union{Nothing, NTuple{2, Union{Nothing, Tuple{Any, Any}}}}}
+let expected = NTuple{2, Union{Nothing, Tuple{Any, Any}}}
     @test Tuple{Nothing, Nothing} <: only(Base.return_types(f27316, Tuple{Int})) == expected # we may be able to improve this bound in the future
 end
 function g27316()
@@ -2908,13 +3037,15 @@ end
 @test ig27907(Int, Int, 1, 0) == 0
 
 # issue #28279
-function f28279(b::Bool)
-    i = 1
-    while i > b
-        i -= 1
+# ensure that lowering doesn't move these into statement position, which would require renumbering
+@eval function f28279(b::Bool)
+    let i = 1
+        while $(>)(i, b)
+            i = $(-)(i, 1)
+        end
+        if b end
+        return $(+)(i, 1)
     end
-    if b end
-    return i + 1
 end
 code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
@@ -2931,7 +3062,7 @@ let i
         end
     end
 end
-Core.Compiler.renumber_ir_elements!(code28279, ssachangemap, labelchangemap)
+Compiler.renumber_ir_elements!(code28279, ssachangemap, labelchangemap)
 @test length(code28279) === length(oldcode28279)
 offset = 1
 let i
@@ -2954,11 +3085,11 @@ end
 # issue #28356
 # unit test to make sure countunionsplit overflows gracefully
 # we don't care what number is returned as long as it's large
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
-@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
+@test Compiler.unionsplitcost(Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
 
 # make sure compiler doesn't hang in union splitting
 
@@ -3201,8 +3332,8 @@ _rttf_test(::Int16) = 0
 _rttf_test(::Int32) = 0
 _rttf_test(::Int64) = 0
 _rttf_test(::Int128) = 0
-_call_rttf_test() = Core.Compiler.return_type(_rttf_test, Tuple{Any})
-@test Core.Compiler.return_type(_rttf_test, Tuple{Any}) === Int
+_call_rttf_test() = Base._return_type(_rttf_test, Tuple{Any})
+@test Base._return_type(_rttf_test, Tuple{Any}) === Int
 @test _call_rttf_test() === Int
 
 f_with_Type_arg(::Type{T}) where {T} = T
@@ -3225,7 +3356,10 @@ end
 call_ntuple(a, b) = my_ntuple(i->(a+b; i), Val(4))
 @test Base.return_types(call_ntuple, Tuple{Any,Any}) == [NTuple{4, Int}]
 @test length(code_typed(my_ntuple, Tuple{Any, Val{4}})) == 1
-@test_throws ErrorException code_typed(my_ntuple, Tuple{Any, Val})
+let (src, rt) = only(code_typed(my_ntuple, Tuple{Any, Val}))
+    @test src isa CodeInfo
+    @test rt == Tuple
+end
 
 @generated unionall_sig_generated(::Vector{T}, b::Vector{S}) where {T, S} = :($b)
 @test length(code_typed(unionall_sig_generated, Tuple{Any, Vector{Int}})) == 1
@@ -3247,15 +3381,15 @@ end
 @test @inferred(foo30783(2)) == Val(1)
 
 # PartialStruct tmerge
-using Core.Compiler: PartialStruct, tmerge, Const, ⊑
+using .Compiler: PartialStruct, tmerge, Const, ⊑
 struct FooPartial
     a::Int
     b::Int
     c::Int
 end
-let PT1 = PartialStruct(FooPartial, Any[Const(1), Const(2), Int]),
-    PT2 = PartialStruct(FooPartial, Any[Const(1), Int, Int]),
-    PT3 = PartialStruct(FooPartial, Any[Const(1), Int, Const(3)])
+let PT1 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Const(2), Int]),
+    PT2 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Int, Int]),
+    PT3 = PartialStruct(Compiler.fallback_lattice, FooPartial, Any[Const(1), Int, Const(3)])
 
     @test PT1 ⊑ PT2
     @test !(PT1 ⊑ PT3) && !(PT2 ⊑ PT1)
@@ -3381,14 +3515,14 @@ const DenseIdx = Union{IntRange,Integer}
 @test @inferred(foo_26724((), 1:4, 1:5, 1:6)) === (4, 5, 6)
 
 # Non uniformity in expressions with PartialTypeVar
-@test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
+@test Compiler.:⊑(Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
-    𝕃 = Core.Compiler.SimpleInferenceLattice.instance
-    argtypes = Any[Core.Compiler.Const(NTuple),
-        Core.Compiler.PartialTypeVar(N, true, true),
-        Core.Compiler.Const(Any)]
+    𝕃 = Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Compiler.Const(NTuple),
+        Compiler.PartialTypeVar(N, true, true),
+        Compiler.Const(Any)]
     rt = Type{Tuple{Vararg{Any,N}}}
-    @test Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
+    @test Compiler.apply_type_nothrow(𝕃, argtypes, rt)
 end
 
 # issue #33768
@@ -3410,8 +3544,12 @@ end
 @test Base.return_types(h33768, ()) == Any[Union{}]
 
 # constant prop of `Symbol("")`
-f_getf_computed_symbol(p) = getfield(p, Symbol("first"))
-@test Base.return_types(f_getf_computed_symbol, Tuple{Pair{Int8,String}}) == [Int8]
+@test Base.return_types() do
+    Val(Symbol("julia"))
+end |> only == Val{:julia}
+@test Base.return_types() do p::Pair{Int8,String}
+    getfield(p, Symbol("first"))
+end |> only == Int8
 
 # issue #33954
 struct X33954
@@ -3442,8 +3580,20 @@ function pickvarnames(x::Vector{Any})
 end
 @test pickvarnames(:a) === :a
 @test pickvarnames(Any[:a, :b]) === (:a, :b)
-@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
-@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple}}}}}}
+@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple
+@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
+# make sure this converges in a reasonable amount of time
+function pickvarnames2(x::Vector{Any})
+    varnames = ()
+    for a in x
+        varnames = (varnames..., pickvarnames(a) )
+    end
+    return varnames
+end
+@test only(Base.return_types(pickvarnames2, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
+@test only(Base.code_typed(pickvarnames2, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
 
 @test map(>:, [Int], [Int]) == [true]
 
@@ -3485,29 +3635,29 @@ end
 
 f() = _foldl_iter(step, (Missing[],), [0.0], 1)
 end
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 0) == Tuple{Int}
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 1) == Tuple{Int}
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 2) == Tuple{Int}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 0) ==
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 0) == Tuple{Int}
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 1) == Tuple{Int}
+@test Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 2) == Tuple{Int}
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 0) ==
         Tuple{Int, Union{Char, Int}, Union{Char, Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 10) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 10) ==
         Union{Tuple{Int, Char, Char}, Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 0) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 0) ==
         NTuple{3, Union{Int, Char}}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 10) ==
+@test Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 10) ==
         Union{Tuple{Char, Char, Int}, Tuple{Char, Int, Char}, Tuple{Char, Int, Int}, Tuple{Int, Char, Char},
               Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
 # Test that these don't throw
-@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
-@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{}
-@test Core.Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{}
-@test Core.Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}}
-@test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
-@test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
+@test Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
+@test Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{}
+@test Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{}
+@test Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}}
+@test Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
+@test Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
 
-@test Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}})
-@test !Core.Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}})
-@test !Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int})
+@test Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}})
+@test !Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}})
+@test !Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int})
 
 @test Base.return_types(Issue35566.f) == [Val{:expected}]
 
@@ -3664,8 +3814,8 @@ f_generator_splat(t::Tuple) = tuple((identity(l) for l in t)...)
 
 # Issue #36710 - sizeof(::UnionAll) tfunc correctness
 @test (sizeof(Ptr),) == sizeof.((Ptr,)) == sizeof.((Ptr{Cvoid},))
-@test Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, UnionAll) === Int
-@test !Core.Compiler.sizeof_nothrow(UnionAll)
+@test Compiler.sizeof_tfunc(Compiler.fallback_lattice, UnionAll) === Int
+@test !Compiler.sizeof_nothrow(UnionAll)
 
 @test only(Base.return_types(Core._expr)) === Expr
 @test only(Base.return_types(Core.svec, (Any,))) === Core.SimpleVector
@@ -3734,119 +3884,12 @@ f_apply_cglobal(args...) = cglobal(args...)
 @test only(Base.return_types(f_apply_cglobal, Tuple{Any, Type{Int}, Type{Int}, Vararg{Type{Int}}})) == Union{}
 
 # issue #37532
-@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
-@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{T}} where T, Ptr])
-@test !Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr}, Ptr])
+@test Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
+@test Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{T}} where T, Ptr])
+@test !Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr}, Ptr])
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
-# PR #37749
-# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
-# usually (SnoopCompileCore).
-function time_inference(f)
-    Core.Compiler.Timings.reset_timings()
-    Core.Compiler.__set_measure_typeinf(true)
-    f()
-    Core.Compiler.__set_measure_typeinf(false)
-    Core.Compiler.Timings.close_current_timer()
-    return Core.Compiler.Timings._timings[1]
-end
-function depth(t::Core.Compiler.Timings.Timing)
-    maximum(depth.(t.children), init=0) + 1
-end
-function flatten_times(t::Core.Compiler.Timings.Timing)
-    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
-end
-# Some very limited testing of timing the type inference (#37749).
-@testset "Core.Compiler.Timings" begin
-    # Functions that call each other
-    @eval module M1
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    timing1 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
-    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
-    # their concrete types.
-    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
-    @test all(child->isa(child.bt, Vector), timing1.children)
-    @test all(child->child.bt===nothing, timing1.children[1].children)
-    # Test the stacktrace
-    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
-    # Test that inference has cached some of the Method Instances
-    timing2 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
-    # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M2
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
-    timingmod = time_inference() do
-        @eval @testset "Outer" begin
-            @testset "Inner" begin
-                for i = 1:2 M2.g(2, 3.0) end
-            end
-        end
-    end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
-    # END LINE NUMBER SENSITIVITY
-
-    # Recursive function
-    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
-    timing = time_inference() do
-        @eval _Recursive.f(Base.inferencebarrier(5))
-    end
-    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
-    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
-
-    # Functions inferred with multiple constants
-    @eval module C
-        i(x) = x === 0 ? 0 : 1 / x
-        a(x) = i(0) * i(x)
-        b() = i(0) * i(1) * i(0)
-        function loopc(n)
-            s = 0
-            for i = 1:n
-                s += i
-            end
-            return s
-        end
-        call_loopc() = loopc(5)
-        myfloor(::Type{T}, x) where T = floor(T, x)
-        d(x) = myfloor(Int16, x)
-    end
-    timing = time_inference() do
-        @eval C.a(2)
-        @eval C.b()
-        @eval C.call_loopc()
-        @eval C.d(3.2)
-    end
-    ft = flatten_times(timing)
-    @test !isempty(ft)
-    str = sprint(show, ft)
-    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
-    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
-    # loopc has internal slots, check constant printing in this case
-    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
-    ifi = sel[end].second
-    @test length(ifi.slottypes) > ifi.nargs
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
-    # check that types aren't double-printed as `T::Type{T}`
-    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
-end
-
 # issue #37638
 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type
 
@@ -3887,16 +3930,16 @@ Base.@constprop :aggressive @noinline f_constprop_aggressive_noinline(f, x) = (f
 Base.@constprop :none f_constprop_none(f, x) = (f(x); Val{x}())
 Base.@constprop :none @inline f_constprop_none_inline(f, x) = (f(x); Val{x}())
 
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_simple)))
-@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
-@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
-@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
-@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none)))
-@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
-@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_simple)))
+@test Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
+@test Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
+@test Compiler.is_no_constprop(only(methods(f_constprop_none)))
+@test !Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
+@test Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
 
 # make sure that improvements to the compiler don't render the annotation effectless.
 @test Base.return_types((Function,)) do f
@@ -3952,12 +3995,12 @@ end
 @testset "switchtupleunion" begin
     # signature tuple
     let
-        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Nothing})
+        tunion = Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Nothing})
         @test Tuple{Int32, Nothing} in tunion
         @test Tuple{Int64, Nothing} in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Union{Float32,Float64}, Nothing})
+        tunion = Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Union{Float32,Float64}, Nothing})
         @test Tuple{Int32, Float32, Nothing} in tunion
         @test Tuple{Int32, Float64, Nothing} in tunion
         @test Tuple{Int64, Float32, Nothing} in tunion
@@ -3966,13 +4009,13 @@ end
 
     # argtypes
     let
-        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
+        tunion = Compiler.switchtupleunion(Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
         @test length(tunion) == 2
         @test Any[Int32, Core.Const(nothing)] in tunion
         @test Any[Int64, Core.Const(nothing)] in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
+        tunion = Compiler.switchtupleunion(Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
         @test length(tunion) == 4
         @test Any[Int32, Float32, Core.Const(nothing)] in tunion
         @test Any[Int32, Float64, Core.Const(nothing)] in tunion
@@ -4053,6 +4096,126 @@ end
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
 
+@testset "constraint back-propagation from typeassert" begin
+    @test Base.infer_return_type((Any,)) do a
+        typeassert(a, Int)
+        return a
+    end == Int
+
+    @test Base.infer_return_type((Any,Bool)) do a, b
+        if b
+            typeassert(a, Int64)
+        else
+            typeassert(a, Int32)
+        end
+        return a
+    end == Union{Int32,Int64}
+
+    @test Base.infer_return_type((Vector{Any},)) do args
+        codeinst = first(args)
+        if codeinst isa Core.MethodInstance
+            mi = codeinst
+        else
+            codeinst::Core.CodeInstance
+            def = codeinst.def
+            if isa(def, Core.ABIOverride)
+                mi = def.def
+            else
+                mi = def::Core.MethodInstance
+            end
+        end
+        return mi
+    end == Core.MethodInstance
+end
+
+callsig_backprop_basic(::Int) = nothing
+callsig_backprop_unionsplit(::Int32) = nothing
+callsig_backprop_unionsplit(::Int64) = nothing
+callsig_backprop_multi(::Int32, ::Int64) = nothing
+callsig_backprop_any(::Any) = nothing
+callsig_backprop_lhs(::Int) = nothing
+callsig_backprop_bailout(::Val{0}) = 0
+callsig_backprop_bailout(::Val{1}) = undefvar # undefvar::Any triggers `bail_out_call`
+callsig_backprop_bailout(::Val) = 2
+callsig_backprop_addinteger(a::Integer, b::Integer) = a + b # results in too many matching methods and triggers `bail_out_call`)
+@test Base.infer_return_type(callsig_backprop_addinteger) == Any
+let effects = Base.infer_effects(callsig_backprop_addinteger)
+    @test !Compiler.is_consistent(effects)
+    @test !Compiler.is_effect_free(effects)
+    @test !Compiler.is_nothrow(effects)
+    @test !Compiler.is_terminates(effects)
+end
+callsig_backprop_anti(::Any) = :any
+callsig_backprop_anti(::Int) = :int
+
+@testset "constraint back-propagation from call signature" begin
+    # basic case
+    @test Base.infer_return_type(a->(callsig_backprop_basic(a); return a), (Any,)) == Int
+
+    # union-split case
+    @test Base.infer_return_type(a->(callsig_backprop_unionsplit(a); return a), (Any,)) == Union{Int32,Int64}
+
+    # multiple arguments updates
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_multi(a, b)
+        return a, b
+    end == Tuple{Int32,Int64}
+
+    # refinement should happen only when it's worthwhile
+    @test Base.infer_return_type(a->(callsig_backprop_any(a); return a), (Integer,)) == Integer
+
+    # state update on lhs slot (assignment effect should have the precedence)
+    @test Base.infer_return_type((Any,)) do a
+        a = callsig_backprop_lhs(a)
+        return a
+    end == Nothing
+
+    # make sure to throw away an intermediate refinement information when we bail out early
+    # (inference would bail out on `callsig_backprop_bailout(::Val{1})`)
+    @test Base.infer_return_type(a->(callsig_backprop_bailout(a); return a), (Any,)) == Any
+
+    # if we see all the matching methods, we don't need to throw away refinement information
+    # even if it's caught by `bail_out_call` check
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_addinteger(a, b)
+        return a, b
+    end == Tuple{Integer,Integer}
+
+    # anti case
+    @test Base.infer_return_type((Any,)) do x
+        callsig_backprop_anti(x)
+        return x
+    end == Any
+end
+
+# make sure to add backedges when we use call signature constraint
+function callsig_backprop_invalidation_outer(a)
+    callsig_backprop_invalidation_inner!(a)
+    return a
+end
+@eval callsig_backprop_invalidation_inner!(::Int) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    callsig_backprop_invalidation_outer(a)
+end == Int
+# new definition of `callsig_backprop_invalidation_inner!` should invalidate `callsig_backprop_invalidation_outer`
+# (even if the previous return type is annotated as `Any`)
+@eval callsig_backprop_invalidation_inner!(::Nothing) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    # since inference will bail out at the first matched `_inner!` and so call signature constraint won't be available
+    callsig_backprop_invalidation_outer(a)
+end ≠ Int
+
+# https://github.com/JuliaLang/julia/issues/37866
+function issue37866(v::Vector{Union{Nothing,Float64}})
+    for x in v
+        if x > 5.0
+            return x # x > 5.0 is MethodError for Nothing so can assume ::Float64
+        end
+    end
+    return 0.0
+end
+@test Base.infer_return_type(issue37866, (Vector{Union{Nothing,Float64}},)) == Float64
+
 # make sure inference on a recursive call graph with nested `Type`s terminates
 # https://github.com/JuliaLang/julia/issues/40336
 f40336(@nospecialize(t)) = f40336(Type{t})
@@ -4108,22 +4271,22 @@ end
 let # Test the presence of PhiNodes in lowered IR by taking the above function,
     # running it through SSA conversion and then putting it into an opaque
     # closure.
-    mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
+    mi = Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
         Tuple{Bool, Float64}, Core.svec())
     ci = Base.uncompressed_ast(mi.def)
     ci.slottypes = Any[ Any for i = 1:length(ci.slotflags) ]
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
-    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.NativeInterpreter())
-    ir = Core.Compiler.convert_to_ircode(ci, sv)
-    ir = Core.Compiler.slot2reg(ir, ci, sv)
-    ir = Core.Compiler.compact!(ir)
-    Core.Compiler.replace_code_newstyle!(ci, ir)
+    sv = Compiler.OptimizationState(mi, Compiler.NativeInterpreter())
+    ir = Compiler.convert_to_ircode(ci, sv)
+    ir = Compiler.slot2reg(ir, ci, sv)
+    ir = Compiler.compact!(ir)
+    Compiler.replace_code_newstyle!(ci, ir)
     ci.ssavaluetypes = length(ci.ssavaluetypes)
     @test any(x->isa(x, Core.PhiNode), ci.code)
-    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
     @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
-    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any, true,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
     @test Base.return_types(oc, Tuple{}) == Any[Float64]
 end
@@ -4275,30 +4438,28 @@ g41908() = f41908(Any[1][1])
 # issue #42022
 let x = Tuple{Int,Any}[
         #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
-        #= 2=# (0, Expr(:enter, 18))
+        #= 2=# (0, EnterNode(17))
         #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
-        #= 4=# (2, Expr(:enter, 12))
+        #= 4=# (2, EnterNode(12))
         #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
         #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
-        #= 7=# (4, Expr(:leave, 2))
+        #= 7=# (4, Expr(:leave, Core.SSAValue(4), Core.SSAValue(2)))
         #= 8=# (0, Core.ReturnNode(1))
         #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
-        #=10=# (4, Expr(:leave, 1))
-        #=11=# (2, Core.GotoNode(16))
-        #=12=# (4, Expr(:leave, 1))
-        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
-        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
-        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
-        #=16=# (2, Expr(:leave, 1))
-        #=17=# (0, Core.GotoNode(22))
-        #=18=# (2, Expr(:leave, 1))
-        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
-        #=20=# (0, nothing)
-        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
-        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+        #=10=# (4, Expr(:leave, Core.SSAValue(4)))
+        #=11=# (2, Core.GotoNode(15))
+        #=12=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=13=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=14=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=15=# (2, Expr(:leave, Core.SSAValue(2)))
+        #=16=# (0, Core.GotoNode(20))
+        #=17=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=18=# (0, nothing)
+        #=19=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=20=# (0, Core.ReturnNode(Core.SlotNumber(3)))
     ]
-    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
-    @test handler_at == first.(x)
+    (;handler_at, handlers) = Compiler.compute_trycatch(last.(x))
+    @test map(x->x[1] == 0 ? 0 : Compiler.get_enter_idx(handlers[x[1]]), handler_at) == first.(x)
 end
 
 @test only(Base.return_types((Bool,)) do y
@@ -4315,7 +4476,7 @@ end
             nothing
         end
         return x
-    end) === Union{Int, Float64, Char}
+    end) === Union{Int, Char}
 
 # issue #42097
 struct Foo42097{F} end
@@ -4347,8 +4508,10 @@ let
                # Vararg
         #=va=# Bound, unbound,         # => Tuple{Integer,Integer} (invalid `TypeVar` widened beforehand)
         } where Bound<:Integer
-    argtypes = Core.Compiler.most_general_argtypes(method, specTypes, true)
+    argtypes = Compiler.most_general_argtypes(method, specTypes)
     popfirst!(argtypes)
+    # N.B.: `argtypes` do not have va processing applied yet
+    @test length(argtypes) == 12
     @test argtypes[1] == Integer
     @test argtypes[2] == Integer
     @test argtypes[3] == Type{Bound} where Bound<:Integer
@@ -4359,7 +4522,8 @@ let
     @test argtypes[8] == Any
     @test argtypes[9] == Union{Nothing,Bound} where Bound<:Integer
     @test argtypes[10] == Any
-    @test argtypes[11] == Tuple{Integer,Integer}
+    @test argtypes[11] == Integer
+    @test argtypes[12] == Integer
 end
 
 # make sure not to call `widenconst` on `TypeofVararg` objects
@@ -4414,7 +4578,8 @@ end |> only == Tuple{Int,Int}
 end |> only == Int
 
 # form PartialStruct for mutables with `const` field
-import Core.Compiler: Const, ⊑
+using Core: Const
+using .Compiler: ⊑
 mutable struct PartialMutable{S,T}
     const s::S
     t::T
@@ -4491,32 +4656,80 @@ end
 
 # issue #43784
 @testset "issue #43784" begin
-    init = Base.ImmutableDict{Any,Any}()
-    a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, Any])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-
-    init = Base.ImmutableDict{Number,Number}()
-    a = Const(init)
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c === typeof(init)
-
-    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c.fields[2] === Any # or Number
-    @test c.fields[3] === ComplexF64
-
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-    @test c.fields[2] === Complex
-    @test c.fields[3] === Complex
+    ⊑ = Compiler.partialorder(Compiler.fallback_lattice)
+    ⊔ = Compiler.join(Compiler.fallback_lattice)
+    𝕃 = Compiler.fallback_lattice
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+    let init = Base.ImmutableDict{Any,Any}()
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), Any[Const(init), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Any,Any}(1,2)
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), Any[Const(getfield(init,1)), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 3
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = Const(init)
+        b = PartialStruct(𝕃, typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(𝕃, typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(𝕃, typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Number
+        @test c.fields[3] === ComplexF64
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(𝕃, typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(𝕃, typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Complex
+        @test c.fields[3] === Complex
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(𝕃, T, Any[T])
+        b = PartialStruct(𝕃, T, Any[T, Number, Number])
+        @test b ⊑ a
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 1
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(𝕃, T, Any[T])
+        b = Const(T())
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = PartialStruct(𝕃, T, Any[T])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = Const(T(1,2))
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
 
     global const ginit43784 = Base.ImmutableDict{Any,Any}()
     @test Base.return_types() do
@@ -4525,6 +4738,55 @@ end
                 g = Base.ImmutableDict(g, 1=>2)
             end
         end |> only === Union{}
+
+    a = Val{Union{}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}, Val{Union{Val{Union{}}, Val{Val{Union{}}}}}}
+    a = Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Val
+
+    a = Val{Union{}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Val{<:a}, a)
+    @test_broken a != Val{<:Val{Union{}}}
+    @test_broken a == Val{<:Val} || a == Val
+
+    a = Tuple{Vararg{Tuple{}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Union{Tuple{Tuple{Vararg{Tuple{}}}}, Tuple{Vararg{Tuple{}}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple{Vararg{Union{Tuple{Tuple{Vararg{Tuple{}}}}, Tuple{Vararg{Tuple{}}}}}}
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple
+    a = Compiler.tmerge(Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple
+end
+
+let ⊑ = Compiler.partialorder(Compiler.fallback_lattice)
+    ⊔ = Compiler.join(Compiler.fallback_lattice)
+    𝕃 = Compiler.fallback_lattice
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]))
+    @test !(Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test !(Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]))
+    @test  (Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]))
+    @test  (Const((1,2)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}])) broken=true
+    @test  (Const((1,2,3)) ⊑ PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(𝕃, Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2,3)))
+
+    t = Const((false, false)) ⊔ Const((false, true))
+    @test t isa PartialStruct && length(t.fields) == 2 && t.fields[1] === Const(false)
+    t = t ⊔ Const((false, false, 0))
+    @test t ⊑ Union{Tuple{Bool,Bool},Tuple{Bool,Bool,Int}}
 end
 
 # Test that a function-wise `@max_methods` works as expected
@@ -4550,18 +4812,18 @@ end
 # at top level.
 @test let
     Base.Experimental.@force_compile
-    Core.Compiler.return_type(+, NTuple{2, Rational})
+    Base._return_type(+, NTuple{2, Rational})
 end == Rational
 
 # vararg-tuple comparison within `Compiler.PartialStruct`
 # https://github.com/JuliaLang/julia/issues/44965
-let 𝕃ᵢ = Core.Compiler.fallback_lattice
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
-    @test Core.Compiler.issimplertype(𝕃ᵢ, t, t)
+let 𝕃ᵢ = Compiler.fallback_lattice
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
+    @test Compiler.issimplertype(𝕃ᵢ, t, t)
 
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
     @test t === Const((42,))
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
+    t = Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
     @test t.typ === Tuple{Int, Int}
     @test t.fields == Any[Const(42), Int]
 end
@@ -4633,7 +4895,7 @@ end |> only === Union{Int,Nothing}
 @test Base.return_types((Symbol,Int)) do setting, val
     compilerbarrier(setting, val)
 end |> only === Any # XXX we may want to have "compile-time" error for this instead
-for setting = (:type, :const, :conditional)
+for setting = (#=:type, :const,=# :conditional,)
     # a successful barrier on abstract interpretation should be eliminated at the optimization
     @test @eval fully_eliminated((Int,)) do a
         compilerbarrier($(QuoteNode(setting)), 42)
@@ -4661,7 +4923,7 @@ let src = code_typed1() do
 end
 
 # Test that Const ⊑ PartialStruct respects vararg
-@test Const((1,2)) ⊑ PartialStruct(Tuple{Vararg{Int}}, [Const(1), Vararg{Int}])
+@test Const((1,2)) ⊑ PartialStruct(Compiler.fallback_lattice, Tuple{Vararg{Int}}, [Const(1), Vararg{Int}])
 
 # Test that semi-concrete interpretation doesn't break on functions with while loops in them.
 Base.@assume_effects :consistent :effect_free :terminates_globally function pure_annotated_loop(x::Int, y::Int)
@@ -4687,7 +4949,7 @@ invoke_concretized1(a::Integer) = a > 0 ? "integer" : nothing
 # check if `invoke(invoke_concretized1, Tuple{Integer}, ::Int)` is foldable
 @test Base.infer_effects((Int,)) do a
     @invoke invoke_concretized1(a::Integer)
-end |> Core.Compiler.is_foldable
+end |> Compiler.is_foldable
 @test Base.return_types() do
     @invoke invoke_concretized1(42::Integer)
 end |> only === String
@@ -4697,7 +4959,7 @@ invoke_concretized2(a::Integer) = a > 0 ? :integer : nothing
 # check if `invoke(invoke_concretized2, Tuple{Integer}, ::Int)` is foldable
 @test Base.infer_effects((Int,)) do a
     @invoke invoke_concretized2(a::Integer)
-end |> Core.Compiler.is_foldable
+end |> Compiler.is_foldable
 @test let
     Base.Experimental.@force_compile
     @invoke invoke_concretized2(42::Integer)
@@ -4775,34 +5037,32 @@ g() = empty_nt_values(Base.inferencebarrier(Tuple{}))
 # This is somewhat sensitive to the exact recursion level that inference is willing to do, but the intention
 # is to test the case where inference limited a recursion, but then a forced constprop nevertheless managed
 # to terminate the call.
+@newinterp RecurseInterpreter
+function Compiler.const_prop_rettype_heuristic(
+    interp::RecurseInterpreter, result::Compiler.MethodCallResult,
+    si::Compiler.StmtInfo, sv::Compiler.AbsIntState, force::Bool)
+    if result.rt isa Compiler.LimitedAccuracy
+        return force # allow forced constprop to recurse into unresolved cycles
+    end
+    return @invoke Compiler.const_prop_rettype_heuristic(
+        interp::Compiler.AbstractInterpreter, result::Compiler.MethodCallResult,
+        si::Compiler.StmtInfo, sv::Compiler.AbsIntState, force::Bool)
+end
 Base.@constprop :aggressive type_level_recurse1(x...) = x[1] == 2 ? 1 : (length(x) > 100 ? x : type_level_recurse2(x[1] + 1, x..., x...))
 Base.@constprop :aggressive type_level_recurse2(x...) = type_level_recurse1(x...)
 type_level_recurse_entry() = Val{type_level_recurse1(1)}()
-@test Base.return_types(type_level_recurse_entry, ()) |> only == Val{1}
+@test Base.infer_return_type(type_level_recurse_entry, (); interp=RecurseInterpreter()) == Val{1}
 
 # Test that inference doesn't give up if it can potentially refine effects,
 # even if the return type is Any.
 f_no_bail_effects_any(x::Any) = x
 f_no_bail_effects_any(x::NamedTuple{(:x,), Tuple{Any}}) = getfield(x, 1)
 g_no_bail_effects_any(x::Any) = f_no_bail_effects_any(x)
-@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
+@test Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
 
 # issue #48374
 @test (() -> Union{<:Nothing})() == Nothing
 
-# :static_parameter accuracy
-unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = @isdefined(T) ? T::Type : nothing
-unknown_sparam_nothrow1(x::Ref{T}) where T = @isdefined(T) ? T::Type : nothing
-unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = @isdefined(T) ? T::Type : nothing
-@test only(Base.return_types(unknown_sparam_throw, (Type{Int},))) == Type{Int}
-@test only(Base.return_types(unknown_sparam_throw, (Type{<:Integer},))) == Type{<:Integer}
-@test only(Base.return_types(unknown_sparam_throw, (Type,))) == Union{Nothing, Type}
-@test_broken only(Base.return_types(unknown_sparam_throw, (Nothing,))) === Nothing
-@test_broken only(Base.return_types(unknown_sparam_throw, (Union{Type{Int},Nothing},))) === Union{Nothing,Type{Int}}
-@test only(Base.return_types(unknown_sparam_throw, (Any,))) === Union{Nothing,Type}
-@test only(Base.return_types(unknown_sparam_nothrow1, (Ref,))) === Type
-@test only(Base.return_types(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,))) === Type
-
 struct Issue49027{Ty<:Number}
     x::Ty
 end
@@ -4950,9 +5210,9 @@ end |> only === Tuple{Int,Symbol}
     end
 end) == Type{Nothing}
 
-# Test that Core.Compiler.return_type inference works for the 1-arg version
+# Test that Base._return_type inference works for the 1-arg version
 @test Base.return_types() do
-    Core.Compiler.return_type(Tuple{typeof(+), Int, Int})
+    Base._return_type(Tuple{typeof(+), Int, Int})
 end |> only == Type{Int}
 
 # Test that NamedTuple abstract iteration works for PartialStruct/Const
@@ -5002,15 +5262,903 @@ let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do
 end
 
 struct Issue49785{S, T<:S} end
-let 𝕃 = Core.Compiler.SimpleInferenceLattice.instance
-    argtypes = Any[Core.Compiler.Const(Issue49785),
+let 𝕃 = Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Compiler.Const(Issue49785),
         Union{Type{String},Type{Int}},
         Union{Type{String},Type{Int}}]
     rt = Type{Issue49785{<:Any, Int}}
     # the following should not throw
-    @test !Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
+    @test !Compiler.apply_type_nothrow(𝕃, argtypes, rt)
     @test code_typed() do
         S = Union{Type{String},Type{Int}}[Int][1]
         map(T -> Issue49785{S,T}, (a = S,))
     end isa Vector
 end
+
+# `getindex(::SimpleVector, ::Int)` should be concrete-evaluated
+@eval Base.return_types() do
+    $(Core.svec(1,Int,nothing))[2]
+end |> only == Type{Int}
+# https://github.com/JuliaLang/julia/issues/50544
+struct Issue50544{T<:Tuple}
+    t::T
+end
+Base.@propagate_inbounds f_issue50544(x, i, ii...) = f_issue50544(f_issue50544(x, i), ii...)
+Base.@propagate_inbounds f_issue50544(::Type{Issue50544{T}}, i) where T = T.parameters[i]
+g_issue50544(T...) = Issue50544{Tuple{T...}}
+h_issue50544(x::T) where T = g_issue50544(f_issue50544(T, 1), f_issue50544(T, 2, 1))
+let x = Issue50544((1, Issue50544((2.0, 'x'))))
+    @test only(Base.return_types(h_issue50544, (typeof(x),))) == Type{Issue50544{Tuple{Int,Float64}}}
+end
+
+# refine const-prop'ed `PartialStruct` with declared method signature type
+Base.@constprop :aggressive function refine_partial_struct1((a, b)::Tuple{String,Int})
+    if iszero(b)
+        println("b=0") # to prevent semi-concrete eval
+        return nothing
+    else
+        return a
+    end
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct1((s, 42))
+end |> only === String
+
+function refine_partial_struct2(xs::Union{Int,String,Symbol}...)
+    first(xs) isa Int && iszero(first(xs)) && return nothing
+    for x in xs[2:end]
+        if x isa String
+            continue
+        else
+            return nothing
+        end
+    end
+    return string(length(xs))
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct2(42, s)
+end |> only === String
+# JET.test_call(s::AbstractString->Base._string(s, 'c'))
+
+# issue #45759 #46557
+g45759(x::Tuple{Any,Vararg}) = x[1] + _g45759(x[2:end])
+g45759(x::Tuple{}) = 0
+_g45759(x) = g45759(x)
+@test only(Base.return_types(g45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+h45759(x::Tuple{Any,Vararg}; kwargs...) = x[1] + h45759(x[2:end]; kwargs...)
+h45759(x::Tuple{}; kwargs...) = 0
+@test only(Base.return_types(h45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+# issue #50709
+@test Base.code_typed_by_type(Tuple{Type{Vector{S}} where {T, S<:AbstractVector{T}}, UndefInitializer, Int})[1][2] == Vector{<:AbstractVector{T}} where T
+
+@test only(Base.return_types((typeof([[[1]]]),)) do x
+    sum(x) do v
+        sum(length, v)
+    end
+end) == Int
+
+struct FunctionSum{Tf}
+    functions::Tf
+end
+(F::FunctionSum)(x) = sum(f -> f(x), F.functions)
+F = FunctionSum((x -> sqrt(x), FunctionSum((x -> x^2, x -> x^3))))
+@test @inferred(F(1.)) === 3.0
+
+f31485(arr::AbstractArray{T, 0}) where {T} = arr
+indirect31485(arr) = f31485(arr)
+f31485(arr::AbstractArray{T, N}) where {T, N} = indirect31485(view(arr, 1, ntuple(i -> :, Val(N-1))...))
+@test @inferred(f31485(zeros(3,3,3,3,3),)) == fill(0.0)
+
+# override const-prop' return type with the concrete-eval result
+# if concrete-eval returns non-inlineable constant
+Base.@assume_effects :foldable function continue_const_prop(i, j)
+    chars = map(Char, i:j)
+    String(chars)
+end
+@test Base.return_types() do
+    Val(length(continue_const_prop(1, 5)))
+end |> only === Val{5}
+@test fully_eliminated() do
+    length(continue_const_prop(1, 5))
+end
+
+# issue #51090
+@noinline function bar51090(b)
+    b == 0 && return
+    r = foo51090(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090(b) = return bar51090(b)
+@test !fully_eliminated(foo51090, (Int,))
+
+Base.@assume_effects :terminates_globally @noinline function bar51090_terminates(b)
+    b == 0 && return
+    r = foo51090_terminates(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090_terminates(b) = return bar51090_terminates(b)
+@test !fully_eliminated(foo51090_terminates, (Int,))
+
+# exploit throwness from concrete eval for intrinsics
+@test Base.return_types() do
+    Base.or_int(true, 1)
+end |> only === Union{}
+
+# [add|or]_int tfuncs
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.and_int(b, false))
+end |> only == Val{false}
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.or_int(true, b))
+end |> only == Val{true}
+
+# https://github.com/JuliaLang/julia/issues/51310
+@test code_typed() do
+    b{c} = d...
+end |> only |> first isa Core.CodeInfo
+
+abstract_call_unionall_vararg(some::Some{Any}) = UnionAll(some.value...)
+@test only(Base.return_types(abstract_call_unionall_vararg)) !== Union{}
+let TV = TypeVar(:T)
+    t = Vector{TV}
+    some = Some{Any}((TV, t))
+    @test abstract_call_unionall_vararg(some) isa UnionAll
+end
+
+# use `Vararg` type constraints
+use_vararg_constraint1(args::Vararg{T,N}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint1, Tuple{Int,Int})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint2(args::Vararg{T,N}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint2, Tuple{Vararg{Int}})) == Tuple{Val{Int},Int}
+use_vararg_constraint3(args::NTuple{N,T}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint3, Tuple{Tuple{Int,Int}})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint4(args::NTuple{N,T}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint4, Tuple{NTuple{N,Int}} where N)) == Tuple{Val{Int},Int}
+
+# issue 51228
+global whatever_unknown_value51228
+f51228() = f51228(whatever_unknown_value51228)
+f51228(x) = 1
+f51228(::Vararg{T,T}) where {T} = "2"
+@test only(Base.return_types(f51228, ())) == Int
+
+struct A51317
+    b::Tuple{1}
+    A1() = new()
+end
+struct An51317
+    a::Int
+    b::Tuple{1}
+    An51317() = new()
+end
+@test only(Base.return_types((x,f) -> getfield(x, f), (A51317, Symbol))) === Union{}
+@test only(Base.return_types((x,f) -> getfield(x, f), (An51317, Symbol))) === Int
+@test only(Base.return_types(x -> getfield(x, :b), (A51317,))) === Union{}
+@test only(Base.return_types(x -> getfield(x, :b), (An51317,))) === Union{}
+
+# Don't visit the catch block for empty try/catch
+function completely_dead_try_catch()
+    try
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(completely_dead_try_catch) |> only === Int
+@test fully_eliminated(completely_dead_try_catch)
+
+function nothrow_try_catch()
+    try
+        1+1
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(nothrow_try_catch) |> only === Int
+@test fully_eliminated(nothrow_try_catch)
+
+may_error(b) = Base.inferencebarrier(b) && error()
+function phic_type1()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type1) |> only === Int
+@test phic_type1() === 2
+
+function phic_type2()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type2) |> only === Union{Int, Float64}
+@test phic_type2() === 2
+
+function phic_type3()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+        if Base.inferencebarrier(false)
+            a = Ref(1)
+        elseif Base.inferencebarrier(false)
+            a = nothing
+        end
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type3) |> only === Union{Int, Float64}
+@test phic_type3() === 2
+
+# Issue #51852
+function phic_type4()
+    a = (;progress = "a")
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type4) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type4() === (;progress = 1.0)
+
+function phic_type5()
+    a = (;progress = "a")
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type5) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type5() === (;progress = 1.0)
+
+function phic_type6()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type6) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type6() === (;progress = 1.0)
+
+function phic_type7()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type7) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type7() === (;progress = 1.0)
+
+function phic_type8()
+    local a
+    try
+        may_error(true)
+        a = Base.inferencebarrier(1)
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type8) |> only === Int
+@test phic_type8() === 2
+
+function phic_type9()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(false) ? 1 : nothing
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type9) |> only === Int
+@test phic_type9() === 2
+
+function phic_type10()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(true) ? missing : nothing
+    catch
+    end
+
+    try
+        Base.inferencebarrier(true) && (a = 2)
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a::Int
+end
+@test Base.return_types(phic_type10) |> only === Int
+@test phic_type10() === 2
+
+undef_trycatch() = try (a_undef_trycatch = a_undef_trycatch, b = 2); return 1 catch end
+# `global a_undef_trycatch` could be defined dynamically, so both paths must be allowed
+@test Base.return_types(undef_trycatch) |> only === Union{Nothing, Int}
+@test undef_trycatch() === nothing
+
+# Test that `exit` returns `Union{}` (issue #51856)
+function test_exit_bottom(s)
+    n = tryparse(Int, s)
+    isnothing(n) && exit()
+    n
+end
+@test only(Base.return_types(test_exit_bottom, Tuple{String})) == Int
+
+function foo_typed_throw_error()
+    try
+        error()
+    catch e
+        if isa(e, ErrorException)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_error) |> only === Float64
+
+will_throw_no_method(x::Int) = 1
+function foo_typed_throw_metherr()
+    try
+        will_throw_no_method(1.0)
+    catch e
+        if isa(e, MethodError)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_metherr) |> only === Float64
+
+# refine `exct` when `:nothrow` is proven
+Base.@assume_effects :nothrow function sin_nothrow(x::Float64)
+    x == Inf && return zero(x)
+    return sin(x)
+end
+@test Base.infer_exception_type(sin_nothrow, (Float64,)) == Union{}
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_nothrow(x)
+    catch err
+        return err
+    end
+end |> only === Float64
+# for semi-concrete interpretation result too
+Base.@constprop :aggressive function sin_maythrow(x::Float64, maythrow::Bool)
+    if maythrow
+        return sin(x)
+    else
+        return @noinline sin_nothrow(x)
+    end
+end
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_maythrow(x, false)
+    catch err
+        return err
+    end
+end |> only === Float64
+
+# exception type from GotoIfNot
+@test Base.infer_exception_type(c::Bool -> c ? 1 : 2) == Union{}
+@test Base.infer_exception_type(c::Missing -> c ? 1 : 2) == TypeError
+@test Base.infer_exception_type(c::Any -> c ? 1 : 2) == TypeError
+
+# exception type inference for `:new`
+struct NewExctInference
+    a::Int
+    @eval NewExctInference(a) = $(Expr(:new, :NewExctInference, :a))
+end
+@test Base.infer_exception_type(NewExctInference, (Float64,)) == TypeError
+
+# semi-concrete interpretation accuracy
+# https://github.com/JuliaLang/julia/issues/50037
+@inline countvars50037(bitflags::Int, var::Int) = bitflags >> 0
+@test Base.infer_return_type() do var::Int
+    Val(countvars50037(1, var))
+end == Val{1}
+
+# Issue #52168
+f52168(x, t::Type) = x::NTuple{2, Base.inferencebarrier(t)::Type}
+@test f52168((1, 2.), Any) === (1, 2.)
+
+# Issue #27031
+let x = 1, _Any = Any
+    @noinline bar27031(tt::Tuple{T,T}, ::Type{Val{T}}) where {T} = notsame27031(tt)
+    @noinline notsame27031(tt::Tuple{T, T}) where {T} = error()
+    @noinline notsame27031(tt::Tuple{T, S}) where {T, S} = "OK"
+    foo27031() = bar27031((x, 1.0), Val{_Any})
+    @test foo27031() == "OK"
+end
+
+# Issue #51927
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    @test apply_type_tfunc(𝕃, Const(Tuple{Vararg{Any,N}} where N), Int) == Type{NTuple{_A, Any}} where _A
+end
+
+# Issue #52613
+@test (code_typed((Any,)) do x; TypeVar(x...); end)[1][2] === TypeVar
+
+# https://github.com/JuliaLang/julia/issues/53590
+func53590(b) = b ? Int : Float64
+function issue53590(b1, b2)
+    T1 = func53590(b1)
+    T2 = func53590(b2)
+    return typejoin(T1, T2)
+end
+@test issue53590(true, true) == Int
+@test issue53590(true, false) == Real
+@test issue53590(false, false) == Float64
+@test issue53590(false, true) == Real
+
+# Expr(:throw_undef_if_not) handling
+@eval function has_tuin()
+    $(Expr(:throw_undef_if_not, :x, false))
+end
+@test Base.infer_return_type(has_tuin, Tuple{}) === Union{}
+@test_throws UndefVarError has_tuin()
+
+function gen_tuin_from_arg(world::UInt, source, _, _)
+    ci = make_codeinfo(Any[
+        Expr(:throw_undef_if_not, :x, Core.Argument(2)),
+        ReturnNode(true),
+    ]; slottypes=Any[Any, Bool])
+    ci.slotnames = Symbol[:var"#self#", :def]
+    ci.nargs = 2
+    ci.isva = false
+    ci
+end
+
+@eval function has_tuin2(def)
+    $(Expr(:meta, :generated, gen_tuin_from_arg))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws UndefVarError has_tuin2(false)
+@test has_tuin2(true)
+
+# issue #53585
+let t = ntuple(i -> i % 8 == 1 ? Int64 : Float64, 4000)
+    @test only(Base.return_types(Base.promote_typeof, t)) == Type{Float64}
+    @test only(Base.return_types(vcat, t)) == Vector{Float64}
+end
+
+# Infinite loop in inference on SSA assignment
+const stop_infinite_loop::Base.Threads.Atomic{Bool} = Base.Threads.Atomic{Bool}(false)
+function gen_infinite_loop_ssa_generator(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # Block 1
+        (),
+        # Block 2
+        PhiNode(Int32[1, 5], Any[SSAValue(1), SSAValue(3)]),
+        Expr(:call, tuple, SSAValue(2)),
+        Expr(:call, getindex, GlobalRef(@__MODULE__, :stop_infinite_loop)),
+        GotoIfNot(SSAValue(4), 2),
+        # Block 3
+        ReturnNode(SSAValue(2))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    ci
+end
+
+@eval function gen_infinite_loop_ssa()
+    $(Expr(:meta, :generated, gen_infinite_loop_ssa_generator))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+
+# We want to make sure that both this returns `Tuple` and that
+# it doesn't infinite loop inside inference.
+@test Base.infer_return_type(gen_infinite_loop_ssa, Tuple{}) === Tuple
+
+# inference local cache lookup with extended lattice elements that may be transformed
+# by `matching_cache_argtypes`
+@newinterp CachedConditionalInterp
+Base.@constprop :aggressive function func_cached_conditional(x, y)
+    if x
+        @noinline sin(y)
+    else
+        0.0
+    end
+end;
+function test_func_cached_conditional(y)
+    y₁ = func_cached_conditional(isa(y, Float64), y)
+    y₂ = func_cached_conditional(isa(y, Float64), y)
+    return y₁, y₂
+end;
+let interp = CachedConditionalInterp();
+    @test Base.infer_return_type(test_func_cached_conditional, (Any,); interp) == Tuple{Float64, Float64}
+    @test count(interp.inf_cache) do result
+        result.linfo.def.name === :func_cached_conditional
+    end == 1
+end
+
+# fieldcount on `Tuple` should constant fold, even though `.fields` not const
+@test fully_eliminated(Base.fieldcount, Tuple{Type{Tuple{Nothing, Int, Int}}})
+
+# Vararg-constprop regression from MutableArithmetics (#54341)
+global SIDE_EFFECT54341::Int
+function foo54341(a, b, c, d, args...)
+    # Side effect to force constprop rather than semi-concrete
+    global SIDE_EFFECT54341 = a + b + c + d
+    return SIDE_EFFECT54341
+end
+bar54341(args...) = foo54341(4, args...)
+
+@test Base.infer_return_type(bar54341, Tuple{Vararg{Int}}) === Int
+
+# `PartialStruct` for partially initialized structs:
+struct PartiallyInitialized1
+    a; b; c
+    PartiallyInitialized1(a) = (@nospecialize; new(a))
+    PartiallyInitialized1(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized1(a, b, c) = (@nospecialize; new(a, b, c))
+end
+mutable struct PartiallyInitialized2
+    a; b; c
+    PartiallyInitialized2(a) = (@nospecialize; new(a))
+    PartiallyInitialized2(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized2(a, b, c) = (@nospecialize; new(a, b, c))
+end
+
+# 1. isdefined modeling for partial struct
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((PartiallyInitialized1,)) do x
+    @assert isdefined(x, :a)
+    return Val(isdefined(x, :c))
+end == Val
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized1(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    s = PartiallyInitialized2(a, b)
+    s.c = c
+    Val(isdefined(s, :c))
+end >: Val{true}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized2(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 1))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 2))
+end == Val
+
+# 2. getfield modeling for partial struct
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized1(a, b), :b)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized1(a, b), f, #=boundscheck=#false)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized1(a, b, c), :c)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized1(a, b, c), f, #=boundscheck=#false)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized2(a, b), :b)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized2(a, b), f, #=boundscheck=#false)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized2(a, b, c), :c)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized2(a, b, c), f, #=boundscheck=#false)
+end |> Compiler.is_nothrow
+
+# isdefined-Conditionals
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isdefined(x, :x)
+        return getfield(x, :x)
+    end
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isassigned(x)
+        return x[]
+    end
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, c
+    x = PartiallyInitialized2(a)
+    x.c = c
+    if isdefined(x, :c)
+        return x.b
+    end
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((PartiallyInitialized2,); optimize=false) do x
+    if isdefined(x, :b)
+        if isdefined(x, :c)
+            return x.c
+        end
+        return x.b
+    end
+    return nothing
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Bool,Int,); optimize=false) do c, b
+    x = c ? PartiallyInitialized1(true) : PartiallyInitialized1(true, b)
+    if isdefined(x, :b)
+        return Val(x.a), x.b
+    end
+    return nothing
+end |> Compiler.is_nothrow
+
+# refine `undef` information from `@isdefined` check
+function isdefined_nothrow(c, x)
+    local val
+    if c
+        val = x
+    end
+    if @isdefined val
+        return val
+    end
+    return zero(Int)
+end
+@test Compiler.is_nothrow(Base.infer_effects(isdefined_nothrow, (Bool,Int)))
+@test !any(first(only(code_typed(isdefined_nothrow, (Bool,Int)))).code) do @nospecialize x
+    Meta.isexpr(x, :throw_undef_if_not)
+end
+
+# End to end test case for the partially initialized struct with `PartialStruct`
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+@test fully_eliminated() do
+    broadcast_noescape1(Ref("x"))
+end
+
+# InterConditional rt with Vararg argtypes
+fcondvarargs(a, b, c, d) = isa(d, Int64)
+gcondvarargs(a, x...) = return fcondvarargs(a, x...) ? isa(a, Int64) : !isa(a, Int64)
+@test Base.infer_return_type(gcondvarargs, Tuple{Vararg{Any}}) === Bool
+
+# JuliaLang/julia#55627: argtypes check in `abstract_call_opaque_closure`
+issue55627_make_oc() = Base.Experimental.@opaque (x::Int) -> 2x
+@test Base.infer_return_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end == Union{}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    f = issue55627_make_oc()
+    return f(1), f(xs...)
+end == Tuple{Int,Int}
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end >: MethodError
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f('1')
+end >: TypeError
+
+# `exct` modeling for opaque closure
+oc_exct_1() = Base.Experimental.@opaque (x) -> x < 0 ? throw(x) : x
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_1()(x)
+end == Int
+oc_exct_2() = Base.Experimental.@opaque Tuple{Number}->Number (x) -> '1'
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_2()(x)
+end == TypeError
+
+# nothrow modeling for `invoke` calls
+f_invoke_nothrow(::Number) = :number
+f_invoke_nothrow(::Int) = :int
+@test Base.infer_effects((Int,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> Compiler.is_nothrow
+@test Base.infer_effects((Char,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Compiler.is_nothrow
+@test Base.infer_effects((Union{Nothing,Int},)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Compiler.is_nothrow
+
+# `exct` modeling for `invoke` calls
+f_invoke_exct(x::Number) = x < 0 ? throw(x) : x
+f_invoke_exct(x::Int) = x
+@test Base.infer_exception_type((Int,)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Int
+@test Base.infer_exception_type() do
+    @invoke f_invoke_exct(42::Number)
+end == Union{}
+@test Base.infer_exception_type((Union{Nothing,Int},)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Union{Int,TypeError}
+@test Base.infer_exception_type((Int,)) do x
+    invoke(f_invoke_exct, Number, x)
+end == TypeError
+@test Base.infer_exception_type((Char,)) do x
+    invoke(f_invoke_exct, Tuple{Number}, x)
+end == TypeError
+
+@test Base.infer_exception_type((Vector{Any},)) do args
+    Core.throw_methoderror(args...)
+end == Union{MethodError,ArgumentError}
+
+# Issue https://github.com/JuliaLang/julia/issues/55751
+
+abstract type AbstractGrid55751{T, N} <: AbstractArray{T, N} end
+struct Grid55751{T, N, AT} <: AbstractGrid55751{T, N}
+    axes::AT
+end
+
+t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A}
+t255751 = Array{Float32, 3}
+@test Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang
+
+issue55882_nfields(x::Union{T,Nothing}) where T<:Number = nfields(x)
+@test Base.infer_return_type(issue55882_nfields) <: Int
+
+# issue #55916
+f55916(x) = 1
+f55916(::Vararg{T,T}) where {T} = "2"
+g55916(x) = f55916(x)
+# this shouldn't error
+@test only(code_typed(g55916, (Any,); optimize=false))[2] == Int
+
+# JuliaLang/julia#56248
+@test Base.infer_return_type() do
+    TypeVar(:Issue56248, 1)
+end === Union{}
+@test Base.infer_return_type() do
+    TypeVar(:Issue56248, Any, 1)
+end === Union{}
+
+@test Base.infer_return_type((Nothing,)) do x
+    @atomic x.count += 1
+end == Union{}
+@test Base.infer_return_type((Nothing,)) do x
+    @atomicreplace x.count 0 => 1
+end == Union{}
+mutable struct AtomicModifySafety
+    @atomic count::Int
+end
+let src = code_typed((Union{Nothing,AtomicModifySafety},)) do x
+        @atomic x.count += 1
+    end |> only |> first
+    @test any(@nospecialize(x)->Meta.isexpr(x, :invoke_modify), src.code)
+end
+
+function issue56387(nt::NamedTuple, field::Symbol=:a)
+    NT = typeof(nt)
+    names = fieldnames(NT)
+    types = fieldtypes(NT)
+    index = findfirst(==(field), names)
+    if index === nothing
+        throw(ArgumentError("Field $field not found"))
+    end
+    types[index]
+end
+@test Base.infer_return_type(issue56387, (typeof((;a=1)),)) == Type{Int}
+
+# `apply_type_tfunc` with `Union` in its arguments
+let apply_type_tfunc = Compiler.apply_type_tfunc
+    𝕃 = Compiler.fallback_lattice
+    Const = Core.Const
+    @test apply_type_tfunc(𝕃, Any[Const(Vector), Union{Type{Int},Type{Nothing}}]) == Union{Type{Vector{Int}},Type{Vector{Nothing}}}
+end
+
+@test Base.infer_return_type((Bool,Int,)) do b, y
+    x = b ? 1 : missing
+    inner = y -> x + y
+    return inner(y)
+end == Union{Int,Missing}
+
+function issue31909(ys)
+    x = if @noinline rand(Bool)
+        1
+    else
+        missing
+    end
+    map(y -> x + y, ys)
+end
+@test Base.infer_return_type(issue31909, (Vector{Int},)) == Union{Vector{Int},Vector{Missing}}
+
+global setglobal!_refine::Int
+@test Base.infer_return_type((Integer,)) do x
+    setglobal!(@__MODULE__, :setglobal!_refine, x)
+end === Int
+global setglobal!_must_throw::Int = 42
+@test Base.infer_return_type((String,)) do x
+    setglobal!(@__MODULE__, :setglobal!_must_throw, x)
+end === Union{}
+
+global swapglobal!_xxx::Int = 42
+@test Base.infer_return_type((Int,)) do x
+    swapglobal!(@__MODULE__, :swapglobal!_xxx, x)
+end === Int
+@test Base.infer_return_type((String,)) do x
+    swapglobal!(@__MODULE__, :swapglobal!_xxx, x)
+end === Union{}
+
+global swapglobal!_must_throw
+@newinterp SwapGlobalInterp
+Compiler.InferenceParams(::SwapGlobalInterp) = Compiler.InferenceParams(; assume_bindings_static=true)
+function func_swapglobal!_must_throw(x)
+    swapglobal!(@__MODULE__, :swapglobal!_must_throw, x)
+end
+@test Base.infer_return_type(func_swapglobal!_must_throw, (Int,); interp=SwapGlobalInterp()) === Union{}
+@test !Compiler.is_effect_free(Base.infer_effects(func_swapglobal!_must_throw, (Int,); interp=SwapGlobalInterp()) )
+
+@eval get_exception() = $(Expr(:the_exception))
+@test Base.infer_return_type() do
+    get_exception()
+end <: Any
+@test @eval Base.infer_return_type((Float64,)) do x
+    out = $(Expr(:the_exception))
+    try
+        out = sin(x)
+    catch
+        out = $(Expr(:the_exception))
+    end
+    return out
+end == Union{Float64,DomainError}
diff --git a/test/compiler/inline.jl b/Compiler/test/inline.jl
similarity index 78%
rename from test/compiler/inline.jl
rename to Compiler/test/inline.jl
index be821a88f00cc..0fc00de457f24 100644
--- a/test/compiler/inline.jl
+++ b/Compiler/test/inline.jl
@@ -71,7 +71,7 @@ function bar12620()
         foo_inl(i==1)
     end
 end
-@test_throws UndefVarError(:y) bar12620()
+@test_throws UndefVarError(:y, :local) bar12620()
 
 # issue #16165
 @inline f16165(x) = (x = UInt(x) + 1)
@@ -121,7 +121,7 @@ f29083(;μ,σ) = μ + σ*randn()
 g29083() = f29083(μ=2.0,σ=0.1)
 let c = code_typed(g29083, ())[1][1].code
     # make sure no call to kwfunc remains
-    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.name === :kwfunc)), c)
+    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.def.name === :kwfunc)), c)
 end
 
 @testset "issue #19122: [no]inline of short func. def. with return type annotation" begin
@@ -147,8 +147,10 @@ end
         s
     end
 
-    (src, _) = code_typed(sum27403, Tuple{Vector{Int}})[1]
-    @test !any(x -> x isa Expr && x.head === :invoke, src.code)
+    (src, _) = only(code_typed(sum27403, Tuple{Vector{Int}}))
+    @test !any(src.code) do x
+        x isa Expr && x.head === :invoke && x.args[2] !== Core.GlobalRef(Base, :throw_boundserror)
+    end
 end
 
 # check that ismutabletype(type) can be fully eliminated
@@ -252,7 +254,7 @@ let code = code_typed(f_pointerref, Tuple{Type{Int}})[1][1].code
     @test !any_ptrref
 end
 
-# Test that inlining can inline _applys of builtins/_applys on SimpleVectors
+# Test that inlining can inline _apply_iterate of builtins/_apply_iterate on SimpleVectors
 function foo_apply_apply_type_svec()
     A = (Tuple, Float32)
     B = Tuple{Float32, Float32}
@@ -274,7 +276,7 @@ f34900(x, y::Int) = y
 f34900(x::Int, y::Int) = invoke(f34900, Tuple{Int, Any}, x, y)
 @test fully_eliminated(f34900, Tuple{Int, Int}; retval=Core.Argument(2))
 
-using Core.Compiler: is_declared_inline, is_declared_noinline
+using .Compiler: is_declared_inline, is_declared_noinline
 
 @testset "is_declared_[no]inline" begin
     @test is_declared_inline(only(methods(@inline x -> x)))
@@ -295,7 +297,7 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
     @test !is_declared_noinline(only(methods() do x x end))
 end
 
-using Core.Compiler: is_inlineable, set_inlineable!
+using .Compiler: is_inlineable, set_inlineable!
 
 @testset "basic set_inlineable! functionality" begin
     ci = code_typed1() do
@@ -311,7 +313,7 @@ end
 const _a_global_array = [1]
 f_inline_global_getindex() = _a_global_array[1]
 let ci = code_typed(f_inline_global_getindex, Tuple{})[1].first
-    @test any(x->(isexpr(x, :call) && x.args[1] === GlobalRef(Base, :arrayref)), ci.code)
+    @test any(x->(isexpr(x, :call) && x.args[1] === GlobalRef(Base, :memoryrefget)), ci.code)
 end
 
 # Issue #29114 & #36087 - Inlining of non-tuple splats
@@ -343,8 +345,8 @@ struct NonIsBitsDimsUndef
     dims::NTuple{N, Int} where N
     NonIsBitsDimsUndef() = new()
 end
-@test Core.Compiler.is_inlineable_constant(NonIsBitsDimsUndef())
-@test !Core.Compiler.is_inlineable_constant((("a"^1000, "b"^1000), nothing))
+@test Compiler.is_inlineable_constant(NonIsBitsDimsUndef())
+@test !Compiler.is_inlineable_constant((("a"^1000, "b"^1000), nothing))
 
 # More nothrow modeling for apply_type
 f_apply_type_typeof(x) = (Ref{typeof(x)}; nothing)
@@ -505,6 +507,17 @@ end
         Base.@constprop :aggressive noinlined_constprop_implicit(a) = a+g
         force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0)
 
+        function force_inline_constprop_cached1()
+            r1 =         noinlined_constprop_implicit(0)
+            r2 = @inline noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+        function force_inline_constprop_cached2()
+            r1 = @inline noinlined_constprop_implicit(0)
+            r2 =         noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+
         @inline Base.@constprop :aggressive inlined_constprop_explicit(a) = a+g
         force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0)
         @inline Base.@constprop :aggressive inlined_constprop_implicit(a) = a+g
@@ -555,6 +568,12 @@ end
     let code = get_code(M.force_inline_constprop_implicit)
         @test all(!isinvoke(:noinlined_constprop_implicit), code)
     end
+    let code = get_code(M.force_inline_constprop_cached1)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
+    let code = get_code(M.force_inline_constprop_cached2)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
 
     let code = get_code(M.force_noinline_constprop_explicit)
         @test any(isinvoke(:inlined_constprop_explicit), code)
@@ -568,6 +587,18 @@ end
     end
 end
 
+@noinline fresh_edge_noinlined(a::Integer) = unresolvable(a)
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0
+end
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0 # should be idempotent
+end
+
 # force constant-prop' for `setproperty!`
 # https://github.com/JuliaLang/julia/pull/41882
 let code = @eval Module() begin
@@ -598,9 +629,9 @@ g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
 # https://github.com/JuliaLang/julia/issues/42078
 # idempotency of callsite inlining
 function getcache(mi::Core.MethodInstance)
-    cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
-    codeinf = Core.Compiler.get(cache, mi, nothing)
-    return isnothing(codeinf) ? nothing : codeinf
+    cache = Compiler.code_cache(Compiler.NativeInterpreter())
+    codeinst = Compiler.get(cache, mi, nothing)
+    return isnothing(codeinst) ? nothing : codeinst
 end
 @noinline f42078(a) = sum(sincos(a))
 let
@@ -618,8 +649,8 @@ let
     end
     let # make sure to discard the inferred source
         mi = only(methods(f42078)).specializations::Core.MethodInstance
-        codeinf = getcache(mi)::Core.CodeInstance
-        @atomic codeinf.inferred = nothing
+        codeinst = getcache(mi)::Core.CodeInstance
+        @atomic codeinst.inferred = nothing
     end
 
     let # inference should re-infer `f42078(::Int)` and we should get the same code
@@ -692,7 +723,7 @@ mktempdir() do dir
                 ci, rt = only(code_typed(issue42246))
                 if any(ci.code) do stmt
                        Meta.isexpr(stmt, :invoke) &&
-                       stmt.args[1].def.name === nameof(IOBuffer)
+                       stmt.args[1].def.def.name === nameof(IOBuffer)
                    end
                     exit(0)
                 else
@@ -730,7 +761,7 @@ end
 let f(x) = (x...,)
     # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls
     # in inlining. For this particular case, we're relying on `iterate(::CaretesianIndex)` throwing an error, such
-    # the the original apply call is not union-split, but the inserted `iterate` call is.
+    # that the original apply call is not union-split, but the inserted `iterate` call is.
     @test code_typed(f, Tuple{Union{Int64, CartesianIndex{1}, CartesianIndex{3}}})[1][2] == Tuple{Int64}
 end
 
@@ -777,8 +808,8 @@ end
 let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
         g42840(xs, 2)
     end |> only |> first
-    # `(xs::Vector{Int})[a::Const(2)]` => `Base.arrayref(true, xs, 2)`
-    @test count(iscall((src, Base.arrayref)), src.code) == 1
+    # `(xs::Vector{Int})[a::Const(2)]`
+    @test count(iscall((src, Base.memoryrefget)), src.code) == 1
     @test count(isinvoke(:g42840), src.code) == 1
 end
 
@@ -845,7 +876,7 @@ let src = code_typed1((Any,)) do x
         abstract_unionsplit_fallback(x)
     end
     @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(x)
@@ -881,7 +912,7 @@ let src = code_typed1((Any,)) do x
     @test count(iscall((src, typeof)), src.code) == 2
     @test count(isinvoke(:println), src.code) == 0
     @test count(iscall((src, println)), src.code) == 0
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(false, x)
@@ -920,34 +951,34 @@ end
 end
 
 # issue 43104
-
+_has_free_typevars(t) = ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0
 @inline isGoodType(@nospecialize x::Type) =
-    x !== Any && !(@noinline Base.has_free_typevars(x))
+    x !== Any && !(@noinline _has_free_typevars(x))
 let # aggressive inlining of single, abstract method match
     src = code_typed((Type, Any,)) do x, y
         isGoodType(x), isGoodType(y)
     end |> only |> first
     # both callsites should be inlined
-    @test count(isinvoke(:has_free_typevars), src.code) == 2
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType)), src.code) == 1
+    @test count(isinvoke(:_has_free_typevars), src.code) == 2
+    # `isGoodType(y::Any)` isn't fully covered, so the fallback is a method error
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @inline isGoodType2(cnd, @nospecialize x::Type) =
-    x !== Any && !(@noinline (cnd ? Core.Compiler.isType : Base.has_free_typevars)(x))
+    x !== Any && !(@noinline (cnd ? Compiler.isType : _has_free_typevars)(x))
 let # aggressive inlining of single, abstract method match (with constant-prop'ed)
     src = code_typed((Type, Any,)) do x, y
         isGoodType2(true, x), isGoodType2(true, y)
     end |> only |> first
     # both callsite should be inlined with constant-prop'ed result
     @test count(isinvoke(:isType), src.code) == 2
-    @test count(isinvoke(:has_free_typevars), src.code) == 0
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType2)), src.code) == 1
+    @test count(isinvoke(:_has_free_typevars), src.code) == 0
+    # `isGoodType(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @noinline function checkBadType!(@nospecialize x::Type)
-    if x === Any || Base.has_free_typevars(x)
+    if x === Any || _has_free_typevars(x)
         println(x)
     end
     return nothing
@@ -958,8 +989,8 @@ let # aggressive static dispatch of single, abstract method match
     end |> only |> first
     # both callsites should be resolved statically
     @test count(isinvoke(:checkBadType!), src.code) == 2
-    # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,checkBadType!)), src.code) == 1
+    # `checkBadType!(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @testset "late_inline_special_case!" begin
@@ -974,6 +1005,14 @@ end
         end |> only |> first
         @test count(iscall((src,UnionAll)), src.code) == 0
     end
+    # test >:
+    let src = code_typed((Any,Any)) do x, y
+            x >: y
+        end |> only |> first
+        idx = findfirst(iscall((src,<:)), src.code)
+        @test idx !== nothing
+        @test src.code[idx].args[2:3] == Any[#=y=#Argument(3), #=x=#Argument(2)]
+    end
 end
 
 # have_fma elimination inside ^
@@ -1130,7 +1169,7 @@ function f44200()
     x44200
 end
 let src = code_typed1(f44200)
-    @test_broken count(x -> isa(x, Core.PiNode), src.code) == 0
+    @test count(x -> isa(x, Core.PiNode), src.code) == 0
 end
 
 # Test that peeling off one case from (::Any) doesn't introduce
@@ -1164,7 +1203,7 @@ end
 end
 
 # Test that inlining doesn't accidentally delete a bad return_type call
-f_bad_return_type() = Core.Compiler.return_type(+, 1, 2)
+f_bad_return_type() = Compiler.return_type(+, 1, 2)
 @test_throws MethodError f_bad_return_type()
 
 # Test that inlining doesn't leave useless globalrefs around
@@ -1179,7 +1218,7 @@ end
 # Test that we can inline a finalizer for a struct that does not otherwise escape
 @noinline nothrow_side_effect(x) =
     Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
-@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(nothrow_side_effect, (Nothing,)))
+@test Compiler.is_finalizer_inlineable(Base.infer_effects(nothrow_side_effect, (Nothing,)))
 
 mutable struct DoAllocNoEscape
     function DoAllocNoEscape()
@@ -1364,7 +1403,7 @@ init_finalization_count!() = FINALIZATION_COUNT[] = 0
 get_finalization_count() = FINALIZATION_COUNT[]
 @noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
 @noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
-@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
+@test Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
 
 mutable struct DoAllocWithField
     x::Int
@@ -1531,7 +1570,6 @@ let
     @test get_finalization_count() == 1000
 end
 
-
 function cfg_finalization7(io)
     for i = -999:1000
         o = DoAllocWithField(0)
@@ -1558,24 +1596,51 @@ let
     @test get_finalization_count() == 1000
 end
 
+# Load forwarding with `finalizer` elision
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        xs[] += 1
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+    @test count(iscall((src, setfield!)), src.code) == 1
+end
 
 # optimize `[push!|pushfirst!](::Vector{Any}, x...)`
 @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
     @eval begin
-        let src = code_typed1((Vector{Any}, Any)) do xs, x
-                $f(xs, x)
+        for T in [Int, Any]
+            let src = code_typed1((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
-            @test count(src.code) do @nospecialize x
-                isa(x, Core.GotoNode) ||
-                isa(x, Core.GotoIfNot) ||
-                iscall((src, getfield))(x)
-            end == 0 # no loop should be involved for the common single arg case
-        end
-        let src = code_typed1((Vector{Any}, Any, Any)) do xs, x, y
-                $f(xs, x, y)
+            let effects = Base.infer_effects((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test Compiler.Compiler.is_terminates(effects)
+            end
+            let src = code_typed1((Vector{T}, T, T)) do xs, x, y
+                    $f(xs, x, y)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
         end
         let xs = Any[]
             $f(xs, :x, "y", 'z')
@@ -1586,7 +1651,7 @@ end
     end
 end
 
-using Core.Compiler: is_declared_inline, is_declared_noinline
+using .Compiler: is_declared_inline, is_declared_noinline
 
 # https://github.com/JuliaLang/julia/issues/45050
 @testset "propagate :meta annotations to keyword sorter methods" begin
@@ -1600,12 +1665,12 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
         @test is_declared_noinline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     let Base.@constprop :aggressive f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_aggressive_constprop(only(methods(f)))
+        @test Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     let Base.@constprop :none f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.is_no_constprop(only(methods(f)))
-        @test Core.Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_no_constprop(only(methods(f)))
+        @test Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
     end
     # @nospecialize
     let f(@nospecialize(A::Any); x::Int=1) = 2x
@@ -1618,19 +1683,19 @@ using Core.Compiler: is_declared_inline, is_declared_noinline
     end
     # Base.@assume_effects
     let Base.@assume_effects :notaskstate f(::Any; x::Int=1) = 2x
-        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
     # propagate multiple metadata also
     let @inline Base.@assume_effects :notaskstate Base.@constprop :aggressive f(::Any; x::Int=1) = (@nospecialize; 2x)
         @test is_declared_inline(only(methods(f)))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
+        @test Compiler.is_aggressive_constprop(only(methods(f)))
         @test is_declared_inline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
-        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
         @test only(methods(f)).nospecialize == -1
         @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
-        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
-        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
     end
 end
 
@@ -1642,13 +1707,12 @@ function oc_capture_oc(z)
 end
 @test fully_eliminated(oc_capture_oc, (Int,))
 
+# inlining with unmatched type parameters
 @eval struct OldVal{T}
-    x::T
     (OV::Type{OldVal{T}})() where T = $(Expr(:new, :OV))
 end
-with_unmatched_typeparam1(x::OldVal{i}) where {i} = i
-with_unmatched_typeparam2() = [ Base.donotdelete(OldVal{i}()) for i in 1:10000 ]
-function with_unmatched_typeparam3()
+@test OldVal{0}() === OldVal{0}.instance
+function with_unmatched_typeparam()
     f(x::OldVal{i}) where {i} = i
     r = 0
     for i = 1:10000
@@ -1656,17 +1720,15 @@ function with_unmatched_typeparam3()
     end
     return r
 end
-
-@testset "Inlining with unmatched type parameters" begin
-    let src = code_typed1(with_unmatched_typeparam1, (Any,))
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
-    end
-    let src = code_typed1(with_unmatched_typeparam2)
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
-    end
-    let src = code_typed1(with_unmatched_typeparam3)
-        @test !any(@nospecialize(x) -> isexpr(x, :call) && length(x.args) == 1, src.code)
+let src = code_typed1(with_unmatched_typeparam)
+    found = nothing
+    for x in src.code
+        if isexpr(x, :call) && length(x.args) == 1
+            found = x
+            break
+        end
     end
+    @test isnothing(found) || (source=src, statement=found)
 end
 
 function twice_sitofp(x::Int, y::Int)
@@ -1704,13 +1766,13 @@ end
 
 # Test getfield modeling of Type{Ref{_A}} where _A
 let getfield_tfunc(@nospecialize xs...) =
-        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+        Compiler.getfield_tfunc(Compiler.fallback_lattice, xs...)
     @test getfield_tfunc(Type, Core.Const(:parameters)) !== Union{}
     @test !isa(getfield_tfunc(Type{Tuple{Union{Int, Float64}, Int}}, Core.Const(:name)), Core.Const)
 end
 @test fully_eliminated(Base.ismutable, Tuple{Base.RefValue})
 
-# TODO: Remove compute sparams for vararg_retrival
+# TODO: Remove compute sparams for vararg_retrieval
 fvarargN_inline(x::Tuple{Vararg{Int, N}}) where {N} = N
 fvarargN_inline(args...) = fvarargN_inline(args)
 let src = code_typed1(fvarargN_inline, (Tuple{Vararg{Int}},))
@@ -1735,7 +1797,7 @@ end
 
 isinvokemodify(y) = @nospecialize(x) -> isinvokemodify(y, x)
 isinvokemodify(sym::Symbol, @nospecialize(x)) = isinvokemodify(mi->mi.def.name===sym, x)
-isinvokemodify(pred::Function, @nospecialize(x)) = isexpr(x, :invoke_modify) && pred(x.args[1]::MethodInstance)
+isinvokemodify(pred::Function, @nospecialize(x)) = isexpr(x, :invoke_modify) && pred((x.args[1]::CodeInstance).def)
 
 mutable struct Atomic{T}
     @atomic x::T
@@ -1763,22 +1825,72 @@ let src = code_typed1((Atomic{Int},Union{Int,Float64})) do a, b
     end
     @test count(isinvokemodify(:mymax), src.code) == 2
 end
+global x_global_inc::Int = 1
+let src = code_typed1(()) do
+        @atomic (@__MODULE__).x_global_inc += 1
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((Ptr{Int},)) do a
+        unsafe_modify!(a, +, 1)
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((AtomicMemoryRef{Int},)) do a
+        Core.memoryrefmodify!(a, +, 1, :sequentially_consistent, true)
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
 
 # apply `ssa_inlining_pass` multiple times
-let interp = Core.Compiler.NativeInterpreter()
+func_mul_int(a::Int, b::Int) = Core.Intrinsics.mul_int(a, b)
+multi_inlining1(a::Int, b::Int) = @noinline func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Compiler.NativeInterpreter()
     # check if callsite `@noinline` annotation works
-    ir, = Base.code_ircode((Int,Int); optimize_until="inlining", interp) do a, b
-        @noinline a*b
-    end |> only
-    i = findfirst(isinvoke(:*), ir.stmts.inst)
+    ir, = only(Base.code_ircode(multi_inlining1, (Int,Int); optimize_until="inlining", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
     @test i !== nothing
-
-    # ok, now delete the callsite flag, and see the second inlining pass can inline the call
-    @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
-    inlining = Core.Compiler.InliningState(interp)
-    ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
-    @test count(isinvoke(:*), ir.stmts.inst) == 0
-    @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.inst) == 1
+    # now delete the callsite flag, and see the second inlining pass can inline the call
+    @eval Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
+    inlining = Compiler.InliningState(interp)
+    ir = Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) === nothing
+    @test (i = findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt)) !== nothing
+    lins = Compiler.IRShow.buildLineInfoNode(ir.debuginfo, nothing, i)
+    @test (continue_ = length(lins) == 2) # :multi_inlining1 -> :func_mul_int
+    if continue_
+        def1 = lins[1].method
+        @test def1 isa Core.MethodInstance && def1.def.name === :multi_inlining1
+        def2 = lins[2].method
+        @test def2 isa Core.MethodInstance && def2.def.name === :func_mul_int
+    end
+end
+
+call_func_mul_int(a::Int, b::Int) = @noinline func_mul_int(a, b)
+multi_inlining2(a::Int, b::Int) = call_func_mul_int(a, b)
+let i::Int, continue_::Bool
+    interp = Compiler.NativeInterpreter()
+    # check if callsite `@noinline` annotation works
+    ir, = only(Base.code_ircode(multi_inlining2, (Int,Int); optimize_until="inlining", interp))
+    i = findfirst(isinvoke(:func_mul_int), ir.stmts.stmt)
+    @test i !== nothing
+    # now delete the callsite flag, and see the second inlining pass can inline the call
+    @eval Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
+    inlining = Compiler.InliningState(interp)
+    ir = Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test findfirst(isinvoke(:func_mul_int), ir.stmts.stmt) === nothing
+    @test (i = findfirst(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt)) !== nothing
+    lins = Compiler.IRShow.buildLineInfoNode(ir.debuginfo, nothing, i)
+    @test_broken (continue_ = length(lins) == 3) # see TODO in `ir_inline_linetable!`
+    if continue_
+        def1 = lins[1].method
+        @test def1 isa Core.MethodInstance && def1.def.name === :multi_inlining2
+        def2 = lins[2].method
+        @test def2 isa Core.MethodInstance && def2.def.name === :call_func_mul_int
+        def3 = lins[3].method
+        @test def3 isa Core.MethodInstance && def3.def.name === :call_func_mul_int
+    end
 end
 
 # Test special purpose inliner for Core.ifelse
@@ -1803,30 +1915,30 @@ end
 
 # optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument
 # https://github.com/JuliaLang/julia/pull/47059
-abstract type CallInfo end
-struct NewInstruction
+abstract type TestCallInfo end
+struct TestNewInstruction
     stmt::Any
     type::Any
-    info::CallInfo
+    info::TestCallInfo
     line::Int32
     flag::UInt8
-    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+    function TestNewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::TestCallInfo),
                             line::Int32, flag::UInt8)
         return new(stmt, type, info, line, flag)
     end
 end
 @nospecialize
-function NewInstruction(newinst::NewInstruction;
+function TestNewInstruction(newinst::TestNewInstruction;
     stmt=newinst.stmt,
     type=newinst.type,
-    info::CallInfo=newinst.info,
+    info::TestCallInfo=newinst.info,
     line::Int32=newinst.line,
     flag::UInt8=newinst.flag)
-    return NewInstruction(stmt, type, info, line, flag)
+    return TestNewInstruction(stmt, type, info, line, flag)
 end
 @specialize
-let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info
-        NewInstruction(newinst; stmt, type, info)
+let src = code_typed1((TestNewInstruction,Any,Any,TestCallInfo)) do newinst, stmt, type, info
+        TestNewInstruction(newinst; stmt, type, info)
     end
     @test count(issplatnew, src.code) == 0
     @test count(iscall((src,NamedTuple)), src.code) == 0
@@ -1836,26 +1948,16 @@ end
 # Test that inlining can still use nothrow information from concrete-eval
 # even if the result itself is too big to be inlined, and nothrow is not
 # known without concrete-eval
-const THE_BIG_TUPLE = ntuple(identity, 1024)
+const THE_BIG_TUPLE = ntuple(identity, 1024);
 function return_the_big_tuple(err::Bool)
     err && error("BAD")
     return THE_BIG_TUPLE
 end
-@noinline function return_the_big_tuple_noinline(err::Bool)
-    err && error("BAD")
-    return THE_BIG_TUPLE
+@test fully_eliminated() do
+    return_the_big_tuple(false)[1]
 end
-big_tuple_test1() = return_the_big_tuple(false)[1]
-big_tuple_test2() = return_the_big_tuple_noinline(false)[1]
-
-@test fully_eliminated(big_tuple_test2, Tuple{})
-# Currently we don't run these cleanup passes, but let's make sure that
-# if we did, inlining would be able to remove this
-let ir = Base.code_ircode(big_tuple_test1, Tuple{})[1][1]
-    ir = Core.Compiler.compact!(ir, true)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    ir = Core.Compiler.compact!(ir, true)
-    @test length(ir.stmts) == 1
+@test fully_eliminated() do
+    @inline return_the_big_tuple(false)[1]
 end
 
 # inlineable but removable call should be eligible for DCE
@@ -1926,7 +2028,7 @@ f48397(::Tuple{String,String}) = :ok
 let src = code_typed1((Union{Bool,Tuple{String,Any}},)) do x
         f48397(x)
     end
-    @test any(iscall((src, f48397)), src.code)
+    @test any(iscall((src, Core.throw_methoderror)), src.code) # fallback method error)
 end
 g48397::Union{Bool,Tuple{String,Any}} = ("48397", 48397)
 let res = @test_throws MethodError let
@@ -2009,7 +2111,7 @@ for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_esca
     global finalizer_escape::Int = 0
 
     let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool})
-        @test any(x->isexpr(x, :(=)), src.code)
+        @test any(iscall((src, Core.setglobal!)), src.code)
     end
 
     let
@@ -2020,8 +2122,8 @@ end
 
 # `compilesig_invokes` inlining option
 @newinterp NoCompileSigInvokes
-Core.Compiler.OptimizationParams(::NoCompileSigInvokes) =
-    Core.Compiler.OptimizationParams(; compilesig_invokes=false)
+Compiler.OptimizationParams(::NoCompileSigInvokes) =
+    Compiler.OptimizationParams(; compilesig_invokes=false)
 @noinline no_compile_sig_invokes(@nospecialize x) = (x !== Any && !Base.has_free_typevars(x))
 # test the single dispatch candidate case
 let src = code_typed1((Type,)) do x
@@ -2029,7 +2131,7 @@ let src = code_typed1((Type,)) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
     end == 1
 end
 let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
@@ -2037,7 +2139,7 @@ let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
     end == 1
 end
 # test the union split case
@@ -2046,7 +2148,7 @@ let src = code_typed1((Union{DataType,UnionAll},)) do x
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
     end == 2
 end
 let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes()) do x
@@ -2054,10 +2156,195 @@ let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes())
     end
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
     end == 1
     @test count(src.code) do @nospecialize x
         isinvoke(:no_compile_sig_invokes, x) &&
-        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
+        (x.args[1]::Core.CodeInstance).def.specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
     end == 1
 end
+
+# https://github.com/JuliaLang/julia/issues/50612
+f50612(x) = UInt32(x)
+@test all(!isinvoke(:UInt32),get_code(f50612,Tuple{Char}))
+
+# move inlineable constant values into statement position during `compact!`-ion
+# so that we don't inline DCE-eligibile calls
+Base.@assume_effects :nothrow function erase_before_inlining(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test fully_eliminated((Float64,); retval=5) do y
+    length(erase_before_inlining(true, y))
+end
+@test fully_eliminated((Float64,); retval=(5,5)) do y
+    z = erase_before_inlining(true, y)
+    return length(z), length(z)
+end
+
+# continue const-prop' when concrete-eval result is too big
+const THE_BIG_TUPLE_2 = ntuple(identity, 1024)
+return_the_big_tuple2(a) = (a, THE_BIG_TUPLE_2)
+let src = code_typed1() do
+        return return_the_big_tuple2(42)[2]
+    end
+    @test count(isinvoke(:return_the_big_tuple2), src.code) == 0
+end
+let src = code_typed1() do
+        return iterate(("1", '2'), 1)
+    end
+    @test count(isinvoke(:iterate), src.code) == 0
+end
+
+function issue53062(cond)
+    x = Ref{Int}(0)
+    if cond
+        x[] = x
+    else
+        return -1
+    end
+end
+@test !Compiler.is_nothrow(Base.infer_effects(issue53062, (Bool,)))
+@test issue53062(false) == -1
+@test_throws MethodError issue53062(true)
+
+struct Issue52644
+    tuple::Type{<:Tuple}
+end
+issue52644(::DataType) = :DataType
+issue52644(::UnionAll) = :UnionAll
+let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t
+        issue52644(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test irfunc(Issue52644(Tuple{<:Integer})) === :UnionAll
+end
+issue52644_single(x::DataType) = :DataType
+let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t
+        issue52644_single(t.tuple)
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    irfunc = Core.OpaqueClosure(ir)
+    @test irfunc(Issue52644(Tuple{})) === :DataType
+    @test_throws MethodError irfunc(Issue52644(Tuple{<:Integer}))
+end
+
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
+bar_inline_error() = foo_split(nothing)
+bar_split_error() = foo_split(Core.compilerbarrier(:type,nothing))
+
+let src = code_typed1(bar_inline_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
+let src = code_typed1(bar_split_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
+
+# finalizer inlining with EA
+mutable struct ForeignBuffer{T}
+    const ptr::Ptr{T}
+end
+mutable struct ForeignBufferChecker
+    @atomic finalized::Bool
+end
+const foreign_buffer_checker = ForeignBufferChecker(false)
+function foreign_alloc(::Type{T}, length) where T
+    ptr = Libc.malloc(sizeof(T) * length)
+    ptr = Base.unsafe_convert(Ptr{T}, ptr)
+    obj = ForeignBuffer{T}(ptr)
+    return finalizer(obj) do obj
+        Base.@assume_effects :notaskstate :nothrow
+        @atomic foreign_buffer_checker.finalized = true
+        Libc.free(obj.ptr)
+    end
+end
+function f_EA_finalizer(N::Int)
+    workspace = foreign_alloc(Float64, N)
+    GC.@preserve workspace begin
+        (;ptr) = workspace
+        Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
+    end
+end
+let src = code_typed1(foreign_alloc, (Type{Float64},Int,))
+    @test count(iscall((src, Core.finalizer)), src.code) == 1
+end
+let src = code_typed1(f_EA_finalizer, (Int,))
+    @test count(iscall((src, Core.finalizer)), src.code) == 0
+end
+let;Base.Experimental.@force_compile
+    f_EA_finalizer(42000)
+    @test foreign_buffer_checker.finalized
+end
+
+# JuliaLang/julia#56422:
+# EA-based finalizer inlining should not result in an invalid IR in the existence of `PhiNode`s
+function issue56422(cnd::Bool, N::Int)
+    if cnd
+        workspace = foreign_alloc(Float64, N)
+    else
+        workspace = foreign_alloc(Float64, N+1)
+    end
+    GC.@preserve workspace begin
+        (;ptr) = workspace
+        Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
+    end
+end
+let src = code_typed1(issue56422, (Bool,Int,))
+    @test_broken count(iscall((src, Core.finalizer)), src.code) == 0
+end
+
+# Test that inlining doesn't unnecessarily move things to statement position
+@noinline f_noinline_invoke(x::Union{Symbol,Nothing}=nothing) = Core.donotdelete(x)
+g_noinline_invoke(x) = f_noinline_invoke(x)
+let src = code_typed1(g_noinline_invoke, (Union{Symbol,Nothing},))
+    @test !any(@nospecialize(x)->isa(x,GlobalRef), src.code)
+end
+
+@testset "@outline" begin
+    @testset "basic" begin
+        @test @outline(2) == 2
+        @test @outline(2 + 2) == 4
+
+        x = 10
+        @test @outline(x + 1) == 11
+        @test @outline(x + x) == 20
+
+        negate(x) = -x
+        @test @outline(negate(+(1, 2))) == -3
+    end
+
+    @testset "throw exception" begin
+        @test_throws BoundsError((), 1) @outline(throw(BoundsError((), 1)))
+        a = []
+        @test_throws BoundsError(a, 1) @outline(throw(BoundsError(a, 1)))
+
+        @test_throws AssertionError("false") @outline @assert false
+        @test_throws AssertionError("violated") @outline @assert false "violated"
+
+        x = 10
+        @test_throws AssertionError("x == 0") @outline @assert x == 0
+        @test_throws AssertionError("x: 10") @outline @assert x == 0 "x: $x"
+    end
+
+    @testset "in a function" begin
+        function get_first(tup)
+            if isempty(tup)
+                @outline(throw(BoundsError(tup, 1)))
+            end
+            return first(tup)
+        end
+        @test get_first((1,)) == 1
+        @test get_first((1,2)) == 1
+        @test_throws BoundsError((), 1) get_first(())
+    end
+end
diff --git a/Compiler/test/interpreter_exec.jl b/Compiler/test/interpreter_exec.jl
new file mode 100644
index 0000000000000..b1d450f8f4286
--- /dev/null
+++ b/Compiler/test/interpreter_exec.jl
@@ -0,0 +1,114 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# tests that interpreter matches codegen
+include("setup_Compiler.jl")
+
+using Test
+using Core.IR
+
+# test that interpreter correctly handles PhiNodes (#29262)
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        GlobalRef(@__MODULE__, :test29262),
+        GotoIfNot(SSAValue(3), 6),
+        # block 2
+        PhiNode(Int32[4], Any[SSAValue(1)]),
+        PhiNode(Int32[4, 5], Any[SSAValue(2), SSAValue(5)]),
+        ReturnNode(SSAValue(6)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test :a === @eval $m
+    global test29262 = false
+    @test :b === @eval $m
+end
+
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        QuoteNode(:c),
+        GlobalRef(@__MODULE__, :test29262),
+        # block 2
+        PhiNode(Int32[4, 16], Any[false, true]), # false, true
+        PhiNode(Int32[4, 16], Any[SSAValue(1), SSAValue(2)]), # :a, :b
+        PhiNode(Int32[4, 16], Any[SSAValue(3), SSAValue(6)]), # :c, :a
+        PhiNode(Int32[16], Any[SSAValue(7)]), # NULL, :c
+        # block 3
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[true, SSAValue(4)]), # test29262, test29262, [true]
+        PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[SSAValue(2), SSAValue(8)]), # NULL, :c, [:b]
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        GotoIfNot(SSAValue(5), 5),
+        # block 4
+        GotoIfNot(SSAValue(10), 9),
+        # block 5
+        Expr(:call, GlobalRef(Core, :tuple), SSAValue(6), SSAValue(7), SSAValue(8), SSAValue(14)),
+        ReturnNode(SSAValue(18)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    m.args[1] = copy(src)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test (:b, :a, :c, :c) === @eval $m
+    m.args[1] = copy(src)
+    global test29262 = false
+    @test (:b, :a, :c, :b) === @eval $m
+end
+
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::CodeInfo
+    src.code = Any[
+        # block 1
+        QuoteNode(:a),
+        QuoteNode(:b),
+        GlobalRef(@__MODULE__, :test29262),
+        # block 2
+        EnterNode(12),
+        # block 3
+        UpsilonNode(),
+        UpsilonNode(),
+        UpsilonNode(SSAValue(2)),
+        GotoIfNot(SSAValue(3), 10),
+        # block 4
+        UpsilonNode(SSAValue(1)),
+        # block 5
+        Expr(:throw_undef_if_not, :expected, false),
+        ReturnNode(), # unreachable
+        # block 6
+        PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]), # NULL, :a, :b
+        PhiCNode(Any[SSAValue(6)]), # NULL
+        Expr(:pop_exception, SSAValue(4)),
+        # block 7
+        ReturnNode(SSAValue(12)),
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.ssaflags = fill(zero(UInt32), nstmts)
+    src.debuginfo = Core.DebugInfo(:none)
+    Compiler.verify_ir(Compiler.inflate_ir(src))
+    global test29262 = true
+    @test :a === @eval $m
+    global test29262 = false
+    @test :b === @eval $m
+    @test isempty(current_exceptions())
+end
diff --git a/Compiler/test/invalidation.jl b/Compiler/test/invalidation.jl
new file mode 100644
index 0000000000000..2642c1647a682
--- /dev/null
+++ b/Compiler/test/invalidation.jl
@@ -0,0 +1,285 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# setup
+# -----
+
+include("irutils.jl")
+
+using Test
+
+struct InvalidationTesterToken end
+
+struct InvalidationTester <: Compiler.AbstractInterpreter
+    world::UInt
+    inf_params::Compiler.InferenceParams
+    opt_params::Compiler.OptimizationParams
+    inf_cache::Vector{Compiler.InferenceResult}
+    function InvalidationTester(;
+                                world::UInt = Base.get_world_counter(),
+                                inf_params::Compiler.InferenceParams = Compiler.InferenceParams(),
+                                opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(),
+                                inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[])
+        return new(world, inf_params, opt_params, inf_cache)
+    end
+end
+
+Compiler.InferenceParams(interp::InvalidationTester) = interp.inf_params
+Compiler.OptimizationParams(interp::InvalidationTester) = interp.opt_params
+Compiler.get_inference_world(interp::InvalidationTester) = interp.world
+Compiler.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
+Compiler.cache_owner(::InvalidationTester) = InvalidationTesterToken()
+
+# basic functionality test
+# ------------------------
+
+basic_callee(x) = x
+basic_caller(x) = basic_callee(x)
+
+# run inference and check that cache exist
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Float64
+
+let mi = Base.method_instance(basic_callee, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+# this redefinition below should invalidate the cache
+const BASIC_CALLER_WORLD = Base.get_world_counter()
+basic_callee(x) = x, x
+@test !isdefined(Base.method_instance(basic_callee, (Float64,)), :cache)
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == BASIC_CALLER_WORLD
+end
+
+# re-run inference and check the result is updated (and new cache exists)
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Tuple{Float64,Float64}
+let mi = Base.method_instance(basic_callee, (Float64,))
+    ci = mi.cache
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+end
+
+let mi = Base.method_instance(basic_caller, (Float64,))
+    ci = mi.cache
+    @test isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world == typemax(UInt)
+    ci = ci.next
+    @test !isdefined(ci, :next)
+    @test ci.owner === InvalidationTesterToken()
+    @test ci.max_world != typemax(UInt)
+end
+
+
+# backedge optimization
+# ---------------------
+
+const GLOBAL_BUFFER = IOBuffer()
+
+# test backedge optimization when the callee's type and effects information are maximized
+begin
+    take!(GLOBAL_BUFFER)
+
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller(x)
+        end |> only
+        @test rt === Any
+        @test any(iscall((src, pr48932_callee)), src.code)
+    end
+
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_callee))))
+        # Base.method_instance(pr48932_callee, (Any,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+
+        # In cache due to Base.return_types(pr48932_callee, (Any,))
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test 42 == pr48932_caller(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
+    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
+
+    @test length(Base.methods(pr48932_callee)) == 2
+    @test Base.only(Base.methods(pr48932_callee, Tuple{Any})) === first(Base.methods(pr48932_callee))
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee, Tuple{Any}))))
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_caller))))
+        # Base.method_instance(pr48932_callee, (Any,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test_broken ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test_broken ci.max_world == typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
+
+# we can avoid adding backedge even if the callee's return type is not the top
+# when the return value is not used within the caller
+begin take!(GLOBAL_BUFFER)
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(1)::Int)
+    pr48932_caller_unuse(x) = (pr48932_callee_inferable(Base.inferencebarrier(x)); nothing)
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inferable, (Any,)))
+        @test rt === Int
+        effects = Base.infer_effects(pr48932_callee_inferable, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`:
+    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
+    # since the inference result of `pr48932_callee` is maximized and it's not inlined
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_unuse(x)
+        end |> only
+        @test rt === Nothing
+        @test any(iscall((src, pr48932_callee_inferable)), src.code)
+    end
+
+    let mi = only(Base.specializations(Base.only(Base.methods(pr48932_callee_inferable))))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller_unuse, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee_inferable` to `pr48932_caller_unuse`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inferable` but not that of `pr48932_caller_unuse`
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, "foo"); x)
+
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee_inferable, Tuple{Any}))))
+    let mi = Base.method_instance(pr48932_caller_unuse, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test_broken ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test_broken ci.max_world == typemax(UInt)
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "foo" == String(take!(GLOBAL_BUFFER))
+end
+
+# we need to add backedge when the callee is inlined
+begin take!(GLOBAL_BUFFER)
+
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
+        @test effects == Compiler.Effects()
+    end
+
+    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_inlined(x)
+        end |> only
+        @test rt === Any
+        @test any(isinvoke(:pr48932_callee_inlined), src.code)
+    end
+
+    let mi = Base.method_instance(pr48932_callee_inlined, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world == typemax(UInt)
+    end
+    let mi = Base.method_instance(pr48932_caller_inlined, (Int,))
+        ci = mi.cache
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world == typemax(UInt)
+    end
+
+    @test 42 == pr48932_caller_inlined(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
+
+    @test isempty(Base.specializations(Base.only(Base.methods(pr48932_callee_inlined, Tuple{Any}))))
+    let mi = Base.method_instance(pr48932_caller_inlined, (Int,))
+        ci = mi.cache
+        @test isdefined(ci, :next)
+        @test ci.owner === nothing
+        @test ci.max_world != typemax(UInt)
+        ci = ci.next
+        @test !isdefined(ci, :next)
+        @test ci.owner === InvalidationTesterToken()
+        @test ci.max_world != typemax(UInt)
+    end
+
+    @test isnothing(pr48932_caller_inlined(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
diff --git a/test/compiler/irpasses.jl b/Compiler/test/irpasses.jl
similarity index 55%
rename from test/compiler/irpasses.jl
rename to Compiler/test/irpasses.jl
index a1738b52161bf..27b6d75f86c93 100644
--- a/test/compiler/irpasses.jl
+++ b/Compiler/test/irpasses.jl
@@ -2,20 +2,15 @@
 
 using Test
 using Base.Meta
-import Core:
-    CodeInfo, Argument, SSAValue, GotoNode, GotoIfNot, PiNode, PhiNode,
-    QuoteNode, ReturnNode
+using Core.IR
 
-include(normpath(@__DIR__, "irutils.jl"))
+include("irutils.jl")
 
 # domsort
 # =======
 
 ## Test that domsort doesn't mangle single-argument phis (#29262)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         Expr(:call, :opaque),
         GotoIfNot(Core.SSAValue(1), 10),
@@ -33,24 +28,16 @@ let m = Meta.@lower 1 + 1
         Core.PhiNode(Int32[2, 8], Any[0, Core.SSAValue(7)]),
         ReturnNode(Core.SSAValue(10)),
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
-    Core.Compiler.verify_ir(ir)
-    phi = ir.stmts.inst[3]
+    ir = make_ircode(code)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
+    phi = ir.stmts.stmt[3]
     @test isa(phi, Core.PhiNode) && length(phi.edges) == 1
 end
 
 # test that we don't stack-overflow in SNCA with large functions.
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    code = Any[]
+let code = Any[]
     N = 2^15
     for i in 1:2:N
         push!(code, Expr(:call, :opaque))
@@ -59,23 +46,16 @@ let m = Meta.@lower 1 + 1
     # all goto here
     push!(code, Expr(:call, :opaque))
     push!(code, ReturnNode(nothing))
-    src.code = code
-
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
 end
 
 # SROA
 # ====
 
-import Core.Compiler: widenconst
+using .Compiler: widenconst
 
 is_load_forwarded(src::CodeInfo) = !any(iscall((src, getfield)), src.code)
 is_scalar_replaced(src::CodeInfo) =
@@ -456,7 +436,7 @@ let src = code_typed1() do
     @test count(isnew, src.code) == 1
 end
 
-# should eliminate allocation whose address isn't taked even if it has uninitialized field(s)
+# should eliminate allocation whose address isn't taken even if it has uninitialized field(s)
 mutable struct BadRef
     x::String
     y::String
@@ -505,7 +485,7 @@ function isdefined_elim()
     return arr
 end
 let src = code_typed1(isdefined_elim)
-    @test is_scalar_replaced(src)
+    @test count(isisdefined, src.code) == 0
 end
 @test isdefined_elim() == Any[]
 
@@ -596,7 +576,6 @@ let # lifting `isa` through Core.ifelse
     @test count(iscall((src, isa)), src.code) == 0
 end
 
-
 let # lifting `isdefined` through PhiNode
     src = code_typed1((Bool,Some{Int},)) do c, x
         y = c ? x : nothing
@@ -688,7 +667,7 @@ let nt = (a=1, b=2)
 end
 
 # Expr(:new) annotated as PartialStruct
-struct FooPartial
+struct FooPartialNew
     x
     y
     global f_partial
@@ -697,10 +676,7 @@ end
 @test fully_eliminated(f_partial, Tuple{Float64})
 
 # A SSAValue after the compaction line
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         nothing,
         # block 2
@@ -719,7 +695,7 @@ let m = Meta.@lower 1 + 1
         # block 5
         ReturnNode(Core.SSAValue(2)),
     ]
-    src.ssavaluetypes = Any[
+    ssavaluetypes = Any[
         Nothing,
         Any,
         Bool,
@@ -732,21 +708,14 @@ let m = Meta.@lower 1 + 1
         Any,
         Any
     ]
-    nstmts = length(src.code)
-    src.codelocs = fill(one(Int32), nstmts)
-    src.ssaflags = fill(one(Int32), nstmts)
-    src.slotflags = fill(zero(UInt8), 3)
-    ir = Core.Compiler.inflate_ir(src)
-    @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
-    @test Core.Compiler.verify_ir(ir) === nothing
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
+    ir = @test_nowarn Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # A lifted Core.ifelse with an eliminated branch (#50276)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         #=  %1: =# Core.Argument(2),
         # block 2
@@ -770,7 +739,7 @@ let m = Meta.@lower 1 + 1
         # block 5
         #= %11: =# ReturnNode(false),
     ]
-    src.ssavaluetypes = Any[
+    ssavaluetypes = Any[
         Any,
         Union{Missing, Bool},
         Any,
@@ -783,14 +752,10 @@ let m = Meta.@lower 1 + 1
         Any,
         Any
     ]
-    nstmts = length(src.code)
-    src.codelocs = fill(one(Int32), nstmts)
-    src.ssaflags = fill(one(Int32), nstmts)
-    src.slotflags = fill(zero(UInt8), 3)
-    ir = Core.Compiler.inflate_ir(src)
-    @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
-    @test Core.Compiler.verify_ir(ir) === nothing
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
+    ir = @test_nowarn Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # Issue #31546 - missing widenconst in SROA
@@ -805,51 +770,44 @@ end
 # Tests for cfg simplification
 let src = code_typed(gcd, Tuple{Int, Int})[1].first
     # Test that cfg_simplify doesn't mangle IR on code with loops
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
-end
-
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify combines redundant basic blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
-        Core.Compiler.GotoNode(2),
-        Core.Compiler.GotoNode(3),
-        Core.Compiler.GotoNode(4),
-        Core.Compiler.GotoNode(5),
-        Core.Compiler.GotoNode(6),
-        Core.Compiler.GotoNode(7),
+    ir = Compiler.inflate_ir(src)
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+let # Test that CFG simplify combines redundant basic blocks
+    code = Any[
+        Compiler.GotoNode(2),
+        Compiler.GotoNode(3),
+        Compiler.GotoNode(4),
+        Compiler.GotoNode(5),
+        Compiler.GotoNode(6),
+        Compiler.GotoNode(7),
         ReturnNode(2)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.compact!(ir)
-    @test length(ir.cfg.blocks) == 1 && Core.Compiler.length(ir.stmts) == 1
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.compact!(ir)
+    @test length(ir.cfg.blocks) == 1 && Compiler.length(ir.stmts) == 1
 end
 
 # Test cfg_simplify in complicated sequences of dropped and merged bbs
-using Core.Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
-bb_term(ir, bb) = Core.Compiler.getindex(ir, SSAValue(Core.Compiler.last(ir.cfg.blocks[bb].stmts)))[:inst]
+using .Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
+bb_term(ir, bb) = Compiler.getindex(ir, SSAValue(Compiler.last(ir.cfg.blocks[bb].stmts)))[:stmt]
 
 function each_stmt_a_bb(stmts, preds, succs)
     ir = IRCode()
-    empty!(ir.stmts.inst)
-    append!(ir.stmts.inst, stmts)
-    empty!(ir.stmts.type); append!(ir.stmts.type, [Nothing for _ = 1:length(stmts)])
+    empty!(ir.stmts.stmt)
+    append!(ir.stmts.stmt, stmts)
+    empty!(ir.stmts.type); append!(ir.stmts.type, [Any for _ = 1:length(stmts)])
     empty!(ir.stmts.flag); append!(ir.stmts.flag, [0x0 for _ = 1:length(stmts)])
-    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:length(stmts)])
+    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:3length(stmts)])
     empty!(ir.stmts.info); append!(ir.stmts.info, [NoCallInfo() for _ = 1:length(stmts)])
     empty!(ir.cfg.blocks); append!(ir.cfg.blocks, [BasicBlock(StmtRange(i, i), preds[i], succs[i]) for i = 1:length(stmts)])
-    Core.Compiler.verify_ir(ir)
+    empty!(ir.cfg.index);  append!(ir.cfg.index,  [i for i = 2:length(stmts)])
+    Compiler.verify_ir(ir)
     return ir
 end
 
@@ -885,8 +843,8 @@ for gotoifnot in (false, true)
     preds = Vector{Int}[Int[], [1], [2], [2], [4], [5], [6], [1], [3], [4, 9], [5, 10], gotoifnot ? [6,11] : [6], [7, 11]]
     succs = Vector{Int}[[2, 8], [3, 4], [9], [5, 10], [6, 11], [7, 12], [13], Int[], [10], [11], gotoifnot ? [12, 13] : [13], Int[], Int[]]
     ir = each_stmt_a_bb(stmts, preds, succs)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
 
     if gotoifnot
         let term4 = bb_term(ir, 4), term5 = bb_term(ir, 5)
@@ -916,64 +874,48 @@ let stmts = [
     preds = Vector{Int}[Int[], [1], [2], [1], [2, 3]]
     succs = Vector{Int}[[2, 4], [3, 5], [5], Int[], Int[]]
     ir = each_stmt_a_bb(stmts, preds, succs)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
 
     @test length(ir.cfg.blocks) == 4
     terms = map(i->bb_term(ir, i), 1:length(ir.cfg.blocks))
     @test Set(term.val for term in terms if isa(term, ReturnNode)) == Set([1,2])
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't mess up when chaining past return blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
-        Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 3),
-        Core.Compiler.GotoNode(4),
+let # Test that CFG simplify doesn't mess up when chaining past return blocks
+    code = Any[
+        Compiler.GotoIfNot(Compiler.Argument(2), 3),
+        Compiler.GotoNode(4),
         ReturnNode(1),
-        Core.Compiler.GotoNode(5),
-        Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 7),
+        Compiler.GotoNode(5),
+        Compiler.GotoIfNot(Compiler.Argument(2), 7),
         # This fall through block of the previous GotoIfNot
         # must be moved up along with it, when we merge it
         # into the goto 4 block.
         ReturnNode(2),
         ReturnNode(3)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 5
-    ret_2 = ir.stmts.inst[ir.cfg.blocks[3].stmts[end]]
-    @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2
+    ret_2 = ir.stmts.stmt[ir.cfg.blocks[3].stmts[end]]
+    @test isa(ret_2, Compiler.ReturnNode) && ret_2.val == 2
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't try to merge every block in a loop into
+let # Test that CFG simplify doesn't try to merge every block in a loop into
     # its predecessor
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+    code = Any[
         # Block 1
-        Core.Compiler.GotoNode(2),
+        Compiler.GotoNode(2),
         # Block 2
-        Core.Compiler.GotoNode(3),
+        Compiler.GotoNode(3),
         # Block 3
-        Core.Compiler.GotoNode(1)
+        Compiler.GotoNode(1)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 1
 end
 
@@ -984,10 +926,10 @@ let ir = Base.code_ircode(; optimize_until="slot2ssa") do
         end
         v
     end |> only |> first
-    Core.Compiler.verify_ir(ir)
+    Compiler.verify_ir(ir)
     nb = length(ir.cfg.blocks)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     na = length(ir.cfg.blocks)
     @test na < nb
 end
@@ -1092,8 +1034,7 @@ exc39508 = ErrorException("expected")
 end
 @test test39508() === exc39508
 
-let
-    # `typeassert` elimination after SROA
+let # `typeassert` elimination after SROA
     # NOTE we can remove this optimization once inference is able to reason about memory-effects
     src = @eval Module() begin
         mutable struct Foo; x; end
@@ -1108,8 +1049,7 @@ let
     @test count(iscall((src, typeassert)), src.code) == 0
 end
 
-let
-    # Test for https://github.com/JuliaLang/julia/issues/43402
+let # Test for https://github.com/JuliaLang/julia/issues/43402
     # Ensure that structs required not used outside of the ccall,
     # still get listed in the ccall_preserves
 
@@ -1140,15 +1080,17 @@ let
     @test all(alloc -> alloc in preserves, refs)
 end
 
-# test `stmt_effect_free` and DCE
-# ===============================
+# test `flags_for_effects` and DCE
+# ================================
 
-let # effect-freeness computation for array allocation
+@testset "effect-freeness computation for array allocation" begin
 
     # should eliminate dead allocations
-    good_dims = @static Int === Int64 ? (1:10) : (1:8)
-    Ns = @static Int === Int64 ? (1:10) : (1:8)
-    for dim = good_dims, N = Ns
+    good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any]
+    @testset "$dim, $N" for dim in good_dims, N in Ns
+        Int64(dim)^N > typemax(Int) && continue
         dims = ntuple(i->dim, N)
         @test @eval fully_eliminated() do
             Array{Int,$N}(undef, $(dims...))
@@ -1158,14 +1100,14 @@ let # effect-freeness computation for array allocation
 
     # shouldn't eliminate erroneous dead allocations
     bad_dims = [-1, typemax(Int)]
-    for dim in bad_dims, N in 1:10
+    @testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts
         dims = ntuple(i->dim, N)
         @test @eval !fully_eliminated() do
-            Array{Int,$N}(undef, $(dims...))
+            Array{$T,$N}(undef, $(dims...))
             nothing
         end
-        @test_throws "invalid Array" @eval let
-            Array{Int,$N}(undef, $(dims...))
+        @test_throws "invalid " @eval let
+            Array{$T,$N}(undef, $(dims...))
             nothing
         end
     end
@@ -1194,10 +1136,10 @@ let ci = code_typed1(optimize=false) do
             gcd(64, 128)
         end
     end
-    ir = Core.Compiler.inflate_ir(ci)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 1
-    ir = Core.Compiler.compact!(ir, true)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 0
+    ir = Compiler.inflate_ir(ci)
+    @test any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
+    ir = Compiler.compact!(ir, true)
+    @test !any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
 end
 
 # Test that adce_pass! can drop phi node uses that can be concluded unused
@@ -1223,26 +1165,26 @@ function foo_cfg_empty(b)
         @goto x
     end
     @label x
-    return 1
+    return b
 end
 let ci = code_typed(foo_cfg_empty, Tuple{Bool}, optimize=true)[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
+    ir = Compiler.inflate_ir(ci)
     @test length(ir.stmts) == 3
     @test length(ir.cfg.blocks) == 3
-    Core.Compiler.verify_ir(ir)
-    ir = Core.Compiler.cfg_simplify!(ir)
-    Core.Compiler.verify_ir(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) <= 2
-    @test isa(ir.stmts[length(ir.stmts)][:inst], ReturnNode)
+    @test isa(ir.stmts[length(ir.stmts)][:stmt], ReturnNode)
 end
 
-@test Core.Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
-@test Core.Compiler.is_effect_free(Base.infer_effects(getglobal, (Module, Symbol)))
+@test Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
+@test Compiler.is_effect_free(Base.infer_effects(getglobal, (Module, Symbol)))
 
 # Test that UseRefIterator gets SROA'd inside of new_to_regular (#44557)
 # expression and new_to_regular offset are arbitrary here, we just want to see the UseRefIterator erased
 let e = Expr(:call, Core.GlobalRef(Base, :arrayset), false, Core.SSAValue(4), Core.SSAValue(9), Core.SSAValue(8))
-    new_to_reg(expr) = Core.Compiler.new_to_regular(expr, 1)
+    new_to_reg(expr) = Compiler.new_to_regular(expr, 1)
     @allocated new_to_reg(e) # warmup call
     @test (@allocated new_to_reg(e)) == 0
 end
@@ -1355,3 +1297,736 @@ let src = code_typed1(mut50285, Tuple{Bool, Int, Float64})
     @test count(isnew, src.code) == 0
     @test count(iscall((src, typeassert)), src.code) == 0
 end
+
+# Test that we can eliminate new{typeof(x)}(x)
+struct TParamTypeofTest1{T}
+    x::T
+    @eval TParamTypeofTest1(x) = $(Expr(:new, :(TParamTypeofTest1{typeof(x)}), :x))
+end
+tparam_typeof_test_elim1(x) = TParamTypeofTest1(x).x
+@test fully_eliminated(tparam_typeof_test_elim1, Tuple{Any})
+
+struct TParamTypeofTest2{S,T}
+    x::S
+    y::T
+    @eval TParamTypeofTest2(x, y) = $(Expr(:new, :(TParamTypeofTest2{typeof(x),typeof(y)}), :x, :y))
+end
+tparam_typeof_test_elim2(x, y) = TParamTypeofTest2(x, y).x
+@test fully_eliminated(tparam_typeof_test_elim2, Tuple{Any,Any})
+
+# Test that sroa doesn't get confused by free type parameters in struct types
+struct Wrap1{T}
+    x::T
+    @eval @inline (T::Type{Wrap1{X}} where X)(x) = $(Expr(:new, :T, :x))
+end
+Wrap1(x) = Wrap1{typeof(x)}(x)
+
+function wrap1_wrap1_ifelse(b, x, w1)
+    w2 = Wrap1(Wrap1(x))
+    w3 = Wrap1(typeof(w1)(w1.x))
+    Core.ifelse(b, w3, w2).x.x
+end
+function wrap1_wrap1_wrapper(b, x, y)
+    w1 = Base.inferencebarrier(Wrap1(y))::Wrap1{<:Union{Int, Float64}}
+    wrap1_wrap1_ifelse(b, x, w1)
+end
+@test wrap1_wrap1_wrapper(true, 1, 1.0) === 1.0
+@test wrap1_wrap1_wrapper(false, 1, 1.0) === 1
+
+# Test unswitching-union optimization within SRO Apass
+function sroaunswitchuniontuple(c, x1, x2)
+    t = c ? (x1,) : (x2,)
+    return getfield(t, 1)
+end
+struct SROAUnswitchUnion1{T}
+    x::T
+end
+struct SROAUnswitchUnion2{S,T}
+    x::T
+    @inline SROAUnswitchUnion2{S}(x::T) where {S,T} = new{S,T}(x)
+end
+function sroaunswitchunionstruct1(c, x1, x2)
+    x = c ? SROAUnswitchUnion1(x1) : SROAUnswitchUnion1(x2)
+    return getfield(x, :x)
+end
+function sroaunswitchunionstruct2(c, x1, x2)
+    x = c ? SROAUnswitchUnion2{:a}(x1) : SROAUnswitchUnion2{:a}(x2)
+    return getfield(x, :x)
+end
+let src = code_typed1(sroaunswitchuniontuple, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1(sroaunswitchunionstruct1, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+@test sroaunswitchunionstruct2(true, 1, 1.0) === 1
+@test sroaunswitchunionstruct2(false, 1, 1.0) === 1.0
+
+# Test SROA of union into getfield
+struct SingleFieldStruct1
+    x::Int
+end
+struct SingleFieldStruct2
+    x::Int
+end
+function foo(b, x)
+    if b
+        f = SingleFieldStruct1(x)
+    else
+        f = SingleFieldStruct2(x)
+    end
+    getfield(f, :x) + 1
+end
+@test foo(true, 1) == 2
+
+# ifelse folding
+@test Compiler.is_removable_if_unused(Base.infer_effects(exp, (Float64,)))
+@test !Compiler.is_inlineable(code_typed1(exp, (Float64,)))
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return Core.ifelse(true, x, exp(x))
+end
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(true, x, exp(x)) # the optimization should be applied to post-inlining IR too
+end
+@test fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(isa(x, Float64), x, exp(x))
+end
+func_coreifelse(c, x) = Core.ifelse(c, x, x)
+func_ifelse(c, x) = ifelse(c, x, x)
+@test fully_eliminated(func_coreifelse, (Bool,Float64); retval=Core.Argument(3))
+@test !fully_eliminated(func_coreifelse, (Any,Float64))
+@test fully_eliminated(func_ifelse, (Bool,Float64); retval=Core.Argument(3))
+@test !fully_eliminated(func_ifelse, (Any,Float64))
+
+# PhiC fixup of compact! with cfg modification
+@inline function big_dead_throw_catch()
+    x = 1
+    try
+        x = 2
+        if Ref{Bool}(false)[]
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            x = 3
+        end
+    catch
+        return x
+    end
+end
+
+function call_big_dead_throw_catch()
+    if Ref{Bool}(false)[]
+        return big_dead_throw_catch()
+    end
+    return 4
+end
+
+# Issue #51159 - Unreachable reached in try-catch block
+function f_with_early_try_catch_exit()
+    result = false
+    for i in 3
+        x = try
+        catch
+            # This introduces an early Expr(:leave) that we must respect when building
+            # φᶜ-nodes in slot2ssa. In particular, we have to ignore the `result = x`
+            # assignment that occurs outside of this try-catch block
+            continue
+        end
+        result = x
+    end
+    result
+end
+
+let ir = first(only(Base.code_ircode(f_with_early_try_catch_exit, (); optimize_until="compact")))
+    for i = 1:length(ir.stmts)
+        expr = ir.stmts[i][:stmt]
+        if isa(expr, PhiCNode)
+            # The φᶜ should only observe the value of `result` at the try-catch :enter
+            # (from the `result = false` assignment), since `result = x` assignment is
+            # dominated by an Expr(:leave).
+            @test length(expr.values) == 1
+        end
+    end
+end
+
+@test isnothing(f_with_early_try_catch_exit())
+
+# Issue #51144 - UndefRefError during compaction
+let code = Any[
+        # block 1  → 2, 3
+        #=  %1: =# Expr(:(=), Core.SlotNumber(4), Core.Argument(2)),
+        #=  %2: =# Expr(:call, :(===), Core.SlotNumber(4), nothing),
+        #=  %3: =# GotoIfNot(Core.SSAValue(1), 5),
+        # block 2
+        #=  %4: =# ReturnNode(nothing),
+        # block 3  → 4, 5
+        #=  %5: =# Expr(:(=), Core.SlotNumber(4), false),
+        #=  %6: =# GotoIfNot(Core.Argument(2), 8),
+        # block 4  → 5
+        #=  %7: =# Expr(:(=), Core.SlotNumber(4), true),
+        # block 5
+        #=  %8: =# ReturnNode(nothing), # Must not insert a π-node here
+    ]
+    slottypes = Any[Any, Union{Bool, Nothing}, Bool, Union{Bool, Nothing}]
+    src = make_codeinfo(code; slottypes)
+
+    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    mi.specTypes = Tuple{}
+    mi.def = Module()
+
+    # Simulate the important results from inference
+    interp = Compiler.NativeInterpreter()
+    sv = Compiler.OptimizationState(mi, src, interp)
+    slot_id = 4
+    for block_id = 3:5
+        # (_4 !== nothing) conditional narrows the type, triggering PiNodes
+        sv.bb_vartables[block_id][slot_id] = VarState(Bool, #= maybe_undef =# false)
+    end
+
+    ir = Compiler.convert_to_ircode(src, sv)
+    ir = Compiler.slot2reg(ir, src, sv)
+    ir = Compiler.compact!(ir)
+
+    Compiler.verify_ir(ir)
+end
+
+function f_with_merge_to_entry_block()
+    while true
+        i = @noinline rand(Int)
+        if @noinline isodd(i)
+            return i
+        end
+    end
+end
+
+let (ir, _) = only(Base.code_ircode(f_with_merge_to_entry_block))
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+# Test that CFG simplify doesn't leave an un-renamed SSA Value
+let # Test that CFG simplify doesn't try to merge every block in a loop into
+    # its predecessor
+    code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 3),
+        # Block 2
+        GotoNode(5),
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(6),
+        # Block 4
+        Expr(:call, Base.inferencebarrier, 2), # fallthrough
+        # Block 5
+        PhiNode(Int32[4, 5], Any[SSAValue(3), SSAValue(5)]),
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 4
+end
+
+# JET.test_opt(Compiler.cfg_simplify!, (Compiler.IRCode,))
+
+# Test support for Core.OptimizedGenerics.KeyValue protocol
+function persistent_dict_elim()
+    a = Base.PersistentDict(:a => 1)
+    return a[:a]
+end
+
+# Ideally we would be able to fully eliminate this,
+# but currently this would require an extra round of constprop
+@test_broken fully_eliminated(persistent_dict_elim)
+@test code_typed(persistent_dict_elim)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple()
+    a = Base.PersistentDict(:a => 1)
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple)
+let code = code_typed(persistent_dict_elim_multiple)[1][1].code
+    @test count(x->isexpr(x, :invoke), code) == 0
+    @test code[end] == Core.ReturnNode(1)
+end
+
+function persistent_dict_elim_multiple_phi(c::Bool)
+    if c
+        a = Base.PersistentDict(:a => 1)
+    else
+        a = Base.PersistentDict(:a => 1)
+    end
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple_phi)
+@test code_typed(persistent_dict_elim_multiple_phi)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple_phi2(c::Bool)
+    z = Base.inferencebarrier(1)::Int
+    if c
+        a = Base.PersistentDict(:a => z)
+    else
+        a = Base.PersistentDict(:a => z)
+    end
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test persistent_dict_elim_multiple_phi2(true) == 1
+
+# Test CFG simplify with try/catch blocks
+let code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 5),
+        # Block 2
+        EnterNode(4),
+        # Block 3
+        Expr(:leave, SSAValue(2)),
+        # Block 4
+        GotoNode(5),
+        # Block 5
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 5
+end
+
+# Test CFG simplify with single predecessor phi node
+let code = Any[
+        # Block 1
+        GotoNode(3),
+        # Block 2
+        nothing,
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(5),
+        # Block 4
+        PhiNode(Int32[4], Any[SSAValue(3)]),
+        ReturnNode(SSAValue(5))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 2
+    ir = Compiler.compact!(ir)
+    @test length(ir.stmts) <= 3
+    @test (ir[SSAValue(length(ir.stmts))][:stmt]::ReturnNode).val !== nothing
+end
+
+let code = Any[
+    Expr(:call, Base.inferencebarrier, Argument(1)), # ::Bool
+    Expr(:call, Core.tuple, 1), # ::Tuple{Int}
+    Expr(:call, Core.tuple, 1.0), # ::Tuple{Float64}
+    Expr(:call, Core.ifelse, SSAValue(1), SSAValue(2), SSAValue(3)), # ::Tuple{Int} (e.g. from inlining)
+    Expr(:call, Core.getfield, SSAValue(4), 1), # ::Int
+    ReturnNode(SSAValue(5))
+]
+    try
+        argtypes = Any[Bool]
+        ssavaluetypes = Any[Bool, Tuple{Int}, Tuple{Float64}, Tuple{Int}, Int, Any]
+        ir = make_ircode(code; slottypes=argtypes, ssavaluetypes)
+        Compiler.verify_ir(ir)
+        Compiler.__set_check_ssa_counts(true)
+        ir = Compiler.sroa_pass!(ir)
+        Compiler.verify_ir(ir)
+    finally
+        Compiler.__set_check_ssa_counts(false)
+    end
+end
+
+# Test SROA all_same on NewNode
+let code = Any[
+    # Block 1
+    Expr(:call, tuple, Argument(1)),
+    GotoIfNot(Argument(4), 5),
+    # Block 2
+    Expr(:call, tuple, Argument(2)),
+    GotoIfNot(Argument(4), 9),
+    # Block 3
+    PhiNode(Int32[2, 4], Any[SSAValue(1), SSAValue(3)]),
+    Expr(:call, getfield, SSAValue(5), 1),
+    Expr(:call, tuple, SSAValue(6), Argument(2)), # ::Tuple{Int, Int}
+    Expr(:call, tuple, SSAValue(7), Argument(3)), # ::Tuple{Tuple{Int, Int}, Int}
+    # Block 4
+    PhiNode(Int32[4, 8], Any[nothing, SSAValue(8)]),
+    Expr(:call, Core.Intrinsics.not_int, Argument(4)),
+    GotoIfNot(SSAValue(10), 13),
+    # Block 5
+    ReturnNode(1),
+    # Block 6
+    PiNode(SSAValue(9), Tuple{Tuple{Int, Int}, Int}),
+    Expr(:call, getfield, SSAValue(13), 1),
+    Expr(:call, getfield, SSAValue(14), 1),
+    ReturnNode(SSAValue(15))
+]
+
+    argtypes = Any[Int, Int, Int, Bool]
+    ssavaluetypes = Any[Tuple{Int}, Any, Tuple{Int}, Any, Tuple{Int}, Int, Tuple{Int, Int}, Tuple{Tuple{Int, Int}, Int},
+                        Union{Nothing, Tuple{Tuple{Int, Int}, Int}}, Bool, Any, Any,
+                        Tuple{Tuple{Int, Int}, Int},
+                        Tuple{Int, Int}, Int, Any]
+    ir = make_ircode(code; slottypes=argtypes, ssavaluetypes)
+    Compiler.verify_ir(ir)
+    ir = Compiler.sroa_pass!(ir)
+    Compiler.verify_ir(ir)
+    ir = Compiler.compact!(ir)
+    Compiler.verify_ir(ir)
+end
+
+# Test correctness of current_scope folding
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+
+@test scope_folding() == 1
+@test scope_folding_opt() == 1
+@test_broken fully_eliminated(scope_folding)
+@test_broken fully_eliminated(scope_folding_opt)
+let ir = first(only(Base.code_ircode(scope_folding, ())))
+    @test Compiler.compute_trycatch(ir) isa Compiler.HandlerInfo
+end
+let ir = first(only(Base.code_ircode(scope_folding_opt, ())))
+    @test Compiler.compute_trycatch(ir) isa Compiler.HandlerInfo
+end
+
+# Function that happened to have lots of sroa that
+# happened to trigger a bad case in the renamer. We
+# just want to check this doesn't crash in inference.
+function f52610()
+    slots_dict = IdDict()
+    for () in Base.inferencebarrier(1)
+       for x in 1
+           if Base.inferencebarrier(true)
+               slots_dict[x] = 0
+           end
+       end
+    end
+    return nothing
+end
+@test code_typed(f52610)[1][2] === Nothing
+
+# Issue #52703
+@eval function f52703()
+    try
+        $(Expr(:tryfinally,
+            Expr(:block,
+                Expr(:tryfinally, :(), :(), 2),
+                :(return Base.inferencebarrier(Core.current_scope)()::Int)),
+        :(), 1))
+    catch
+        return 1
+    end
+    return 0
+end
+@test code_typed(f52703)[1][2] === Int
+
+# Issue #52858 - compaction gets confused by pending node
+let code = Any[
+    # Block 1
+    GotoIfNot(true, 6),
+    # Block 2
+    Expr(:call, println, 1),
+    Expr(:call, Base.inferencebarrier, true),
+    GotoIfNot(SSAValue(3), 6),
+    # Block 3
+    nothing,
+    # Block 4
+    PhiNode(Int32[1, 4, 5], Any[1, 2, 3]),
+    ReturnNode(SSAValue(6))
+]
+    ir = make_ircode(code)
+    Compiler.insert_node!(ir, SSAValue(5),
+        Compiler.NewInstruction(
+            Expr(:call, println, 2), Nothing, Int32(1)),
+            #= attach_after = =# true)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
+    @test count(x->isa(x, GotoIfNot), ir.stmts.stmt) == 1
+end
+
+# Issue #52857 - Affinity of sroa definedness check
+let code = Any[
+    Expr(:new, ImmutableRef{Any}),
+    GotoIfNot(Argument(1), 4),
+    Expr(:call, GlobalRef(Base, :getfield), SSAValue(1), 1), # Will throw
+    ReturnNode(1)
+]
+    ir = make_ircode(code; ssavaluetypes = Any[ImmutableRef{Any}, Any, Any, Any], slottypes=Any[Bool], verify=true)
+    ir = Compiler.sroa_pass!(ir)
+    @test Compiler.verify_ir(ir) === nothing
+    @test !any(iscall((ir, getfield)), ir.stmts.stmt)
+    @test length(ir.cfg.blocks[end].stmts) == 1
+end
+
+# https://github.com/JuliaLang/julia/issues/47065
+# `Compiler.sort!` should be able to handle a big list
+let n = 1000
+    ex = :(return 1)
+    for _ in 1:n
+        ex = :(rand() < .1 && $(ex))
+    end
+    @eval global function f_1000_blocks()
+        $ex
+        return 0
+    end
+end
+@test f_1000_blocks() == 0
+
+# https://github.com/JuliaLang/julia/issues/53521
+# Incorrect scope counting in :leave
+using Base.ScopedValues
+function f53521()
+    VALUE = ScopedValue(1)
+    @with VALUE => 2 begin
+        for i = 1
+            @with VALUE => 3 begin
+                try
+                    foo()
+                catch
+                    nothing
+                end
+            end
+        end
+    end
+end
+let (ir,rt) = only(Base.code_ircode(f53521, ()))
+    @test rt == Nothing
+    Compiler.verify_ir(ir)
+    Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+end
+
+Base.@assume_effects :foldable Base.@constprop :aggressive function f53521(x::Int, ::Int)
+    VALUE = ScopedValue(x)
+    @with VALUE => 2 begin
+        for i = 1
+            @with VALUE => 3 begin
+                local v
+                try
+                    v = sin(VALUE[])
+                catch
+                    v = nothing
+                end
+                return v
+            end
+        end
+    end
+end
+let (ir,rt) = only(Base.code_ircode((Int,)) do y
+        f53521(1, y)
+    end)
+    @test rt == Union{Nothing,Float64}
+end
+
+# Test that adce_pass! sets Refined on PhiNode values
+let code = Any[
+    # Basic Block 1
+    GotoIfNot(false, 3)
+    # Basic Block 2
+    nothing
+    # Basic Block 3
+    PhiNode(Int32[1, 2], Any[1.0, 1])
+    ReturnNode(Core.SSAValue(3))
+]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Nothing, Union{Int64, Float64}, Any])
+    (ir, made_changes) = Compiler.adce_pass!(ir)
+    @test made_changes
+    @test (ir[Core.SSAValue(length(ir.stmts))][:flag] & Compiler.IR_FLAG_REFINED) != 0
+end
+
+# JuliaLang/julia#52991: statements that may not :terminate should not be deleted
+@noinline Base.@assume_effects :effect_free :nothrow function issue52991(n)
+    local s = 0
+    try
+        while true
+            yield()
+            if n - rand(1:10) > 0
+                s += 1
+            else
+                break
+            end
+        end
+    catch
+    end
+    return s
+end
+@test !Compiler.is_removable_if_unused(Base.infer_effects(issue52991, (Int,)))
+let src = code_typed1((Int,)) do x
+        issue52991(x)
+        nothing
+    end
+    @test count(isinvoke(:issue52991), src.code) == 1
+end
+let t = @async begin
+        issue52991(11) # this call never terminates
+        nothing
+    end
+    sleep(1)
+    if istaskdone(t)
+        ok = false
+    else
+        ok = true
+        schedule(t, InterruptException(); error=true)
+    end
+    @test ok
+end
+
+# JuliaLang/julia47664
+@test !fully_eliminated() do
+    any(isone, Iterators.repeated(0))
+end
+@test !fully_eliminated() do
+    all(iszero, Iterators.repeated(0))
+end
+
+## Test that cfg_simplify respects implicit `unreachable` terminators
+let code = Any[
+        # block 1
+        GotoIfNot(Core.Argument(2), 4),
+        # block 2
+        Expr(:call, Base.throw, "error"), # an implicit `unreachable` terminator
+        # block 3
+        Expr(:call, :opaque),
+        # block 4
+        ReturnNode(nothing),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Union{}, Any, Union{}])
+
+    # Unfortunately `compute_basic_blocks` does not notice the `throw()` so it gives us
+    # a slightly imprecise CFG. Instead manually construct the CFG we need for this test:
+    empty!(ir.cfg.blocks)
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(1,1), [], [2,4]))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(2,2), [1], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(3,3), [], []))
+    push!(ir.cfg.blocks, BasicBlock(StmtRange(4,4), [1], []))
+    empty!(ir.cfg.index)
+    append!(ir.cfg.index, Int[2,3,4])
+    ir.stmts.stmt[1] = GotoIfNot(Core.Argument(2), 4)
+
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 3 # should have removed block 3
+end
+
+let code = Any[
+        # block 1
+        EnterNode(4, 1),
+        # block 2
+        GotoNode(3), # will be turned into nothing
+        # block 3
+        GotoNode(5),
+        # block 4
+        ReturnNode(),
+        # block 5
+        Expr(:leave, SSAValue(1)),
+        # block 6
+        GotoIfNot(Core.Argument(1), 8),
+        # block 7
+        ReturnNode(1),
+        # block 8
+        ReturnNode(2),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Any, Any, Any, Any, Union{}, Union{}])
+    @test length(ir.cfg.blocks) == 8
+    Compiler.verify_ir(ir)
+
+    # Union typed deletion marker in basic block 2
+    Compiler.setindex!(ir, nothing, SSAValue(2))
+
+    # Test cfg_simplify
+    Compiler.verify_ir(ir)
+    ir = Compiler.cfg_simplify!(ir)
+    Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 6
+    gotoifnot = Compiler.last(ir.cfg.blocks[3].stmts)
+    inst = ir[SSAValue(gotoifnot)]
+    @test isa(inst[:stmt], GotoIfNot)
+    # Make sure we didn't accidentally schedule the unreachable block as
+    # fallthrough
+    @test isdefined(ir[SSAValue(gotoifnot+1)][:inst]::ReturnNode, :val)
+end
+
+# https://github.com/JuliaLang/julia/issues/54596
+# finalized object's uses have no postdominator
+let f = (x)->nothing, mi = Base.method_instance(f, (Base.RefValue{Nothing},)), code = Any[
+   # Basic Block 1
+   Expr(:new, Base.RefValue{Nothing}, nothing)
+   Expr(:call, Core.finalizer, f, SSAValue(1), true, mi)
+   GotoIfNot(false, 6)
+   # Basic Block 2
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(4))
+   # Basic Block 3
+   Expr(:call, Base.getfield, SSAValue(1), :x)
+   ReturnNode(SSAValue(6))
+]
+   ir = make_ircode(code; ssavaluetypes=Any[Base.RefValue{Nothing}, Nothing, Any, Nothing, Any, Nothing, Any])
+   inlining = Compiler.InliningState(Compiler.NativeInterpreter())
+   Compiler.verify_ir(ir)
+   ir = Compiler.sroa_pass!(ir, inlining)
+   Compiler.verify_ir(ir)
+end
+
+let code = Any[
+        # block 1
+        GotoNode(4), # skip
+        # block 2
+        Expr(:leave, SSAValue(1)), # not domsorted - make sure we move it correctly
+        # block 3
+        ReturnNode(2),
+        # block 4
+        EnterNode(7),
+        # block 5
+        GotoIfNot(Argument(1), 2),
+        # block 6
+        Expr(:leave, SSAValue(1)),
+        # block 7
+        ReturnNode(1),
+        # block 8
+        ReturnNode(nothing),
+    ]
+    ir = make_ircode(code; ssavaluetypes=Any[Any, Any, Union{}, Any, Any, Any, Union{}, Union{}])
+    @test length(ir.cfg.blocks) == 8
+    Compiler.verify_ir(ir)
+
+    # The IR should remain valid after domsorting
+    # (esp. including the insertion of new BasicBlocks for any fix-ups)
+    domtree = Compiler.construct_domtree(ir)
+    ir = Compiler.domsort_ssa!(ir, domtree)
+    Compiler.verify_ir(ir)
+end
diff --git a/Compiler/test/irutils.jl b/Compiler/test/irutils.jl
new file mode 100644
index 0000000000000..c1616ad4a8fd0
--- /dev/null
+++ b/Compiler/test/irutils.jl
@@ -0,0 +1,103 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("setup_Compiler.jl")
+
+using Core.IR
+using .Compiler: IRCode, IncrementalCompact, singleton_type, VarState
+using Base.Meta: isexpr
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+
+argextype(@nospecialize args...) = Compiler.argextype(args..., VarState[])
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
+macro code_typed1(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0)
+end
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+macro get_code(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0)
+end
+
+# check if `x` is a statement with a given `head`
+isnew(@nospecialize x) = isexpr(x, :new)
+issplatnew(@nospecialize x) = isexpr(x, :splatnew)
+isreturn(@nospecialize x) = isa(x, ReturnNode) && isdefined(x, :val)
+isisdefined(@nospecialize x) = isexpr(x, :isdefined)
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:Union{CodeInfo,IRCode,IncrementalCompact}
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src)) === f
+    end
+end
+function iscall(pred::Base.Callable, @nospecialize(x))
+    if isexpr(x, :(=))
+        x = x.args[2]
+    end
+    return isexpr(x, :call) && pred(x.args[1])
+end
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred((x.args[1]::CodeInstance).def)
+
+fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...) =
+    fully_eliminated(code_typed1(args...; kwargs...); retval)
+fully_eliminated(src::CodeInfo; retval=(@__FILE__)) = fully_eliminated(src.code; retval)
+fully_eliminated(ir::IRCode; retval=(@__FILE__)) = fully_eliminated(ir.stmts.stmt; retval)
+function fully_eliminated(code::Vector{Any}; retval=(@__FILE__), kwargs...)
+    length(code) == 1 || return false
+    retstmt = only(code)
+    isreturn(retstmt) || return false
+    retval === (@__FILE__) && return true
+    retval′ = retstmt.val
+    if retval′ isa QuoteNode
+        retval′ = retval′.value
+    end
+    return retval′ == retval
+end
+macro fully_eliminated(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0)
+end
+
+let m = Meta.@lower 1 + 1
+    @assert isexpr(m, :thunk)
+    orig_src = m.args[1]::CodeInfo
+    global function make_codeinfo(code::Vector{Any};
+                                  ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slotnames::Union{Nothing,Vector{Symbol}}=nothing)
+        src = copy(orig_src)
+        src.code = code
+        nstmts = length(src.code)
+        if ssavaluetypes === nothing
+            src.ssavaluetypes = nstmts
+        else
+            src.ssavaluetypes = ssavaluetypes
+        end
+        src.debuginfo = Core.DebugInfo(:none)
+        src.ssaflags = fill(zero(UInt32), nstmts)
+        if slottypes !== nothing
+            src.slottypes = slottypes
+            src.slotflags = fill(zero(UInt8), length(slottypes))
+        end
+        if slotnames !== nothing
+            src.slotnames = slotnames
+        end
+        return src
+    end
+    global function make_ircode(code::Vector{Any};
+                                slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                verify::Bool=true,
+                                kwargs...)
+        src = make_codeinfo(code; slottypes, kwargs...)
+        if slottypes !== nothing
+            ir = Compiler.inflate_ir(src, slottypes)
+        else
+            ir = Compiler.inflate_ir(src)
+        end
+        verify && Compiler.verify_ir(ir)
+        return ir
+    end
+end
diff --git a/Compiler/test/newinterp.jl b/Compiler/test/newinterp.jl
new file mode 100644
index 0000000000000..5ebcf332895fa
--- /dev/null
+++ b/Compiler/test/newinterp.jl
@@ -0,0 +1,63 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# TODO set up a version who defines new interpreter with persistent cache?
+
+"""
+    @newinterp NewInterpreter [ephemeral_cache::Bool=false]
+
+Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
+from the native code cache, satisfying the minimum interface requirements.
+
+When the `ephemeral_cache=true` option is specified, `NewInterpreter` will hold
+`CodeInstance` in an ephemeral non-integrated cache, rather than in the integrated
+`Compiler.InternalCodeCache`.
+Keep in mind that ephemeral cache lacks support for invalidation and doesn't persist across
+sessions. However it is an usual Julia object of the type `code_cache::IdDict{MethodInstance,CodeInstance}`,
+making it easier for debugging and inspecting the compiler behavior.
+"""
+macro newinterp(InterpName, ephemeral_cache::Bool=false)
+    cache_token = QuoteNode(gensym(string(InterpName, "CacheToken")))
+    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
+    InterpName = esc(InterpName)
+    C = Core
+    quote
+        $(ephemeral_cache && quote
+        struct $InterpCacheName
+            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
+        end
+        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
+        end)
+        struct $InterpName <: $Compiler.AbstractInterpreter
+            meta # additional information
+            world::UInt
+            inf_params::$Compiler.InferenceParams
+            opt_params::$Compiler.OptimizationParams
+            inf_cache::Vector{$Compiler.InferenceResult}
+            $(ephemeral_cache && :(code_cache::$InterpCacheName))
+            function $InterpName(meta = nothing;
+                                 world::UInt = Base.get_world_counter(),
+                                 inf_params::$Compiler.InferenceParams = $Compiler.InferenceParams(),
+                                 opt_params::$Compiler.OptimizationParams = $Compiler.OptimizationParams(),
+                                 inf_cache::Vector{$Compiler.InferenceResult} = $Compiler.InferenceResult[],
+                                 $(ephemeral_cache ?
+                                    Expr(:kw, :(code_cache::$InterpCacheName), :($InterpCacheName())) :
+                                    Expr(:kw, :_, :nothing)))
+                return $(ephemeral_cache ?
+                    :(new(meta, world, inf_params, opt_params, inf_cache, code_cache)) :
+                    :(new(meta, world, inf_params, opt_params, inf_cache)))
+            end
+        end
+        $Compiler.InferenceParams(interp::$InterpName) = interp.inf_params
+        $Compiler.OptimizationParams(interp::$InterpName) = interp.opt_params
+        $Compiler.get_inference_world(interp::$InterpName) = interp.world
+        $Compiler.get_inference_cache(interp::$InterpName) = interp.inf_cache
+        $Compiler.cache_owner(::$InterpName) = $cache_token
+        $(ephemeral_cache && quote
+        $Compiler.code_cache(interp::$InterpName) = $Compiler.WorldView(interp.code_cache, $Compiler.WorldRange(interp.world))
+        $Compiler.get(wvc::$Compiler.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
+        $Compiler.getindex(wvc::$Compiler.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
+        $Compiler.haskey(wvc::$Compiler.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
+        $Compiler.setindex!(wvc::$Compiler.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+        end)
+    end
+end
diff --git a/Compiler/test/runtests.jl b/Compiler/test/runtests.jl
new file mode 100644
index 0000000000000..6a38fce678ba0
--- /dev/null
+++ b/Compiler/test/runtests.jl
@@ -0,0 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+using Test, Compiler
+using InteractiveUtils: @activate
+@activate Compiler
+
+@testset "Compiler.jl" begin
+    for file in readlines(joinpath(@__DIR__, "testgroups"))
+        file == "special_loading" && continue # Only applicable to Base.Compiler
+        testfile = file * ".jl"
+        @eval @testset $testfile include($testfile)
+    end
+end
diff --git a/Compiler/test/setup_Compiler.jl b/Compiler/test/setup_Compiler.jl
new file mode 100644
index 0000000000000..a28a3f918aaf9
--- /dev/null
+++ b/Compiler/test/setup_Compiler.jl
@@ -0,0 +1,9 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+if !@isdefined(Compiler)
+    if Base.REFLECTION_COMPILER[] === nothing
+        using Base.Compiler: Compiler
+    else
+        const Compiler = Base.REFLECTION_COMPILER[]
+    end
+end
diff --git a/Compiler/test/special_loading.jl b/Compiler/test/special_loading.jl
new file mode 100644
index 0000000000000..ba012446dc61f
--- /dev/null
+++ b/Compiler/test/special_loading.jl
@@ -0,0 +1,9 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+mktempdir() do dir
+    withenv("JULIA_DEPOT_PATH" => dir * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        cd(joinpath(@__DIR__, "CompilerLoadingTest")) do
+            @test success(pipeline(`$(Base.julia_cmd()[1]) --startup-file=no --project=. compiler_loading_test.jl`; stdout, stderr))
+        end
+    end
+end
diff --git a/test/compiler/ssair.jl b/Compiler/test/ssair.jl
similarity index 52%
rename from test/compiler/ssair.jl
rename to Compiler/test/ssair.jl
index 43f17d4ad69f2..6100aad673040 100644
--- a/test/compiler/ssair.jl
+++ b/Compiler/test/ssair.jl
@@ -1,23 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base.Meta
-using Core.IR
-const Compiler = Core.Compiler
-using .Compiler: CFG, BasicBlock, NewSSAValue
+include("irutils.jl")
 
-include(normpath(@__DIR__, "irutils.jl"))
+using Test
 
-make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
+using .Compiler: CFG, BasicBlock, NewSSAValue
 
-function make_ci(code)
-    ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = code
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
-    return ci
-end
+make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
 
 # TODO: this test is broken
 #let code = Any[
@@ -38,7 +27,6 @@ end
 #        false, false, false, false
 #    ))
 #
-#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), Int32(0))
 #    Compiler.run_passes(ci, 1, [NullLineInfo])
 #    # XXX: missing @test
 #end
@@ -73,8 +61,8 @@ let cfg = CFG(BasicBlock[
     @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
     let correct_idoms = Compiler.naive_idoms(cfg.blocks),
         correct_pidoms = Compiler.naive_idoms(cfg.blocks, true)
-        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
-        @test Compiler.construct_postdomtree(cfg.blocks).idoms_bb == correct_pidoms
+        @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms
+        @test Compiler.construct_postdomtree(cfg).idoms_bb == correct_pidoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -85,25 +73,22 @@ let cfg = CFG(BasicBlock[
                 c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
-                @test Compiler.construct_postdomtree(cfg′.blocks).idoms_bb == correct_pidoms
+                @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms
+                @test Compiler.construct_postdomtree(cfg′).idoms_bb == correct_pidoms
             end
         end
     end
 end
 
-# test >:
-let
-    f(a, b) = a >: b
-    code_typed(f, Tuple{Any, Any})
-    # XXX: missing @test
+# test code execution with the default compile-mode
+module CompilerExecTest
+include("interpreter_exec.jl")
 end
 
-for compile in ("min", "yes")
-    cmd = `$(Base.julia_cmd()) --compile=$compile interpreter_exec.jl`
-    if !success(pipeline(Cmd(cmd, dir=@__DIR__); stdout=stdout, stderr=stderr))
-        error("Interpreter test failed, cmd : $cmd")
-    end
+# test code execution with the interpreter mode (compile=min)
+module InterpreterExecTest
+Base.Experimental.@compiler_options compile=min
+include("interpreter_exec.jl")
 end
 
 # PR #32145
@@ -116,8 +101,9 @@ let cfg = CFG(BasicBlock[
     make_bb([0, 1, 2] , [5]   ), # 0 predecessor should be preserved
     make_bb([2, 3]    , []    ),
 ], Int[])
-    insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    ir = Compiler.IRCode(insts, cfg, Core.LineInfoNode[], Any[], Expr[], Compiler.VarState[])
+    insts = Compiler.InstructionStream([], [], Compiler.CallInfo[], Int32[], UInt32[])
+    di = Compiler.DebugInfoStream(insts.line)
+    ir = Compiler.IRCode(insts, cfg, di, Any[], Expr[], Compiler.VarState[])
     compact = Compiler.IncrementalCompact(ir, true)
     @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
 end
@@ -143,54 +129,55 @@ end
 @test f32579(0, false) === false
 
 # Test for bug caused by renaming blocks improperly, related to PR #32145
-let ci = make_ci([
+let code = Any[
         # block 1
-        Core.Compiler.GotoIfNot(Expr(:boundscheck), 6),
+        Expr(:boundscheck),
+        Compiler.GotoIfNot(SSAValue(1), 6),
         # block 2
-        Expr(:call, GlobalRef(Base, :size), Core.Compiler.Argument(3)),
-        Core.Compiler.ReturnNode(),
+        Expr(:call, GlobalRef(Base, :size), Compiler.Argument(3)),
+        Compiler.ReturnNode(),
         # block 3
         Core.PhiNode(),
-        Core.Compiler.ReturnNode(),
+        Compiler.ReturnNode(),
         # block 4
         GlobalRef(Main, :something),
         GlobalRef(Main, :somethingelse),
-        Expr(:call, Core.SSAValue(6), Core.SSAValue(7)),
-        Core.Compiler.GotoIfNot(Core.SSAValue(8), 11),
+        Expr(:call, Core.SSAValue(7), Core.SSAValue(8)),
+        Compiler.GotoIfNot(Core.SSAValue(9), 12),
         # block 5
-        Core.Compiler.ReturnNode(Core.SSAValue(8)),
+        Compiler.ReturnNode(Core.SSAValue(9)),
         # block 6
-        Core.Compiler.ReturnNode(Core.SSAValue(8))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) === nothing
+        Compiler.ReturnNode(Core.SSAValue(9))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # Test that the verifier doesn't choke on cglobals (which aren't linearized)
-let ci = make_ci([
+let code = Any[
         Expr(:call, GlobalRef(Main, :cglobal),
                     Expr(:call, Core.tuple, :(:c)), Nothing),
-                    Core.Compiler.ReturnNode()
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    @test Core.Compiler.verify_ir(ir) === nothing
+                    Compiler.ReturnNode()
+    ]
+    ir = make_ircode(code)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # Test that GlobalRef in value position is non-canonical
-let ci = make_ci([
+let code = Any[
         Expr(:call, GlobalRef(Main, :something_not_defined_please))
         ReturnNode(SSAValue(1))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test_throws ErrorException Core.Compiler.verify_ir(ir, false)
+    ]
+    ir = make_ircode(code; verify=false)
+    ir = Compiler.compact!(ir, true)
+    @test_throws ["IR verification failed.", "Code location: "] Compiler.verify_ir(ir, false)
 end
 
 # Issue #29107
-let ci = make_ci([
+let code = Any[
         # Block 1
-        Core.Compiler.GotoNode(6),
+        Compiler.GotoNode(6),
         # Block 2
         # The following phi node gets deleted because it only has one edge, so
         # the call to `something` is made to use the value of `something2()`,
@@ -200,12 +187,12 @@ let ci = make_ci([
         Core.PhiNode(Int32[2], Any[Core.SSAValue(4)]),
         Expr(:call, :something, Core.SSAValue(2)),
         Expr(:call, :something2),
-        Core.Compiler.GotoNode(2),
+        Compiler.GotoNode(2),
         # Block 3
-        Core.Compiler.ReturnNode(1000)
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
+        Compiler.ReturnNode(1000)
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir, true)
     # Make sure that if there is a call to `something` (block 2 should be
     # removed entirely with working DCE), it doesn't use any SSA values that
     # come after it.
@@ -219,26 +206,109 @@ let ci = make_ci([
     end
 end
 
-# Make sure dead blocks that are removed are not still referenced in live phi
-# nodes
-let ci = make_ci([
+# Make sure dead blocks that are removed are not still referenced in live phi nodes
+let code = Any[
         # Block 1
-        Core.Compiler.GotoNode(3),
+        Compiler.GotoNode(3),
         # Block 2 (no predecessors)
-        Core.Compiler.ReturnNode(3),
+        Compiler.ReturnNode(3),
         # Block 3
         Core.PhiNode(Int32[1, 2], Any[100, 200]),
-        Core.Compiler.ReturnNode(Core.SSAValue(3))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
-    ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
+        Compiler.ReturnNode(Core.SSAValue(3))
+    ]
+    ir = make_ircode(code; verify=false)
+    ir = Compiler.compact!(ir, true)
+    @test Compiler.verify_ir(ir) === nothing
 end
 
 # issue #37919
-let ci = code_lowered(()->@isdefined(_not_def_37919_), ())[1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test Core.Compiler.verify_ir(ir) === nothing
+let ci = only(code_lowered(()->@isdefined(_not_def_37919_), ()))
+    ir = Compiler.inflate_ir(ci)
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 4)
+        # block 2
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+        # block 3
+        ReturnNode(Argument(3))
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(false, 1) == 1
+    @test_throws "potential throw" oc(true, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 3)
+        # block 2
+        ReturnNode(Argument(3))
+        # block 3
+        Expr(:call, throw, "potential throw")
+        ReturnNode() # unreachable
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(true, 1) == 1
+    @test_throws "potential throw" oc(false, 1)
+end
+
+let code = Any[
+        # block 1
+        GotoIfNot(Argument(2), 5)
+        # block 2
+        GotoNode(3)
+        # block 3
+        Expr(:call, throw, "potential throw")
+        ReturnNode()
+        # block 4
+        Expr(:call, Core.Intrinsics.add_int, Argument(3), Argument(4))
+        GotoNode(7)
+        # block 5
+        ReturnNode(SSAValue(5))
+    ]
+    ir = make_ircode(code; slottypes=Any[Any,Bool,Int,Int])
+    visited = BitSet()
+    @test !Compiler.visit_conditional_successors(ir, #=bb=#1) do succ::Int
+        push!(visited, succ)
+        return false
+    end
+    @test 2 ∈ visited
+    @test 3 ∈ visited
+    @test 4 ∈ visited
+    @test 5 ∈ visited
+    oc = Core.OpaqueClosure(ir)
+    @test oc(false, 1, 1) == 2
+    @test_throws "potential throw" oc(true, 1, 1)
+
+    let buf = IOBuffer()
+        oc = Core.OpaqueClosure(ir; slotnames=Symbol[:ocfunc, :x, :y, :z])
+        try
+            oc(true, 1, 1)
+        catch
+            Base.show_backtrace(buf, catch_backtrace())
+        end
+        s = String(take!(buf))
+        @test occursin("(x::Bool, y::$Int, z::$Int)", s)
+    end
 end
 
 # Test dynamic update of domtree with edge insertions and deletions in the
@@ -266,7 +336,7 @@ let cfg = CFG(BasicBlock[
         make_bb([2, 6], []),
         make_bb([4],    [5, 3]),
     ], Int[])
-    domtree = Compiler.construct_domtree(cfg.blocks)
+    domtree = Compiler.construct_domtree(cfg)
     @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
     @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
@@ -323,6 +393,8 @@ f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
 
 let # https://github.com/JuliaLang/julia/issues/42258
     code = """
+        using Base: Compiler
+
         function foo()
             a = @noinline rand(rand(0:10))
             if isempty(a)
@@ -334,10 +406,10 @@ let # https://github.com/JuliaLang/julia/issues/42258
         end
         code_typed(foo; optimize=true)
 
-        code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
+        code_typed(Compiler.setindex!, (Compiler.UseRef,Compiler.NewSSAValue); optimize=true)
         """
     cmd = `$(Base.julia_cmd()) -g 2 -e $code`
-    stderr = Base.BufferStream()
+    stderr = IOBuffer()
     @test success(pipeline(Cmd(cmd); stdout, stderr))
     @test readchomp(stderr) == ""
 end
@@ -397,18 +469,18 @@ let
 
     function _test_userefs(@nospecialize stmt)
         ex = Expr(:call, :+, Core.SSAValue(3), 1)
-        urs = Core.Compiler.userefs(stmt)::Core.Compiler.UseRefIterator
-        it = Core.Compiler.iterate(urs)
+        urs = Compiler.userefs(stmt)::Compiler.UseRefIterator
+        it = Compiler.iterate(urs)
         while it !== nothing
-            ur = getfield(it, 1)::Core.Compiler.UseRef
+            ur = getfield(it, 1)::Compiler.UseRef
             op = getfield(it, 2)::Int
-            v1 = Core.Compiler.getindex(ur)
+            v1 = Compiler.getindex(ur)
             # set to dummy expression and then back to itself to test `_useref_setindex!`
-            v2 = Core.Compiler.setindex!(ur, ex)
+            v2 = Compiler.setindex!(ur, ex)
             test_useref(v2, ex, op)
-            Core.Compiler.setindex!(ur, v1)
-            @test Core.Compiler.getindex(ur) === v1
-            it = Core.Compiler.iterate(urs, op)
+            Compiler.setindex!(ur, v1)
+            @test Compiler.getindex(ur) === v1
+            it = Compiler.iterate(urs, op)
         end
     end
 
@@ -420,10 +492,10 @@ let
 
     # this isn't valid code, we just care about looking at a variety of IR nodes
     body = Any[
-        Expr(:enter, 11),
+        EnterNode(11),
         Expr(:call, :+, SSAValue(3), 1),
         Expr(:throw_undef_if_not, :expected, false),
-        Expr(:leave, 1),
+        Expr(:leave, Core.SSAValue(1)),
         Expr(:(=), SSAValue(1), Expr(:call, :+, SSAValue(3), 1)),
         UpsilonNode(),
         UpsilonNode(SSAValue(2)),
@@ -454,31 +526,31 @@ let ir = Base.code_ircode((Bool,Any)) do c, x
     @test length(ir.cfg.blocks) == 4
     for i = 1:4
         @test any(ir.cfg.blocks[i].stmts) do j
-            inst = ir.stmts[j][:inst]
+            inst = ir.stmts[j][:stmt]
             iscall((ir, println), inst) &&
             inst.args[3] == i
         end
     end
     # domination analysis
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    @test Core.Compiler.dominates(domtree, 1, 2)
-    @test Core.Compiler.dominates(domtree, 1, 3)
-    @test Core.Compiler.dominates(domtree, 1, 4)
+    domtree = Compiler.construct_domtree(ir)
+    @test Compiler.dominates(domtree, 1, 2)
+    @test Compiler.dominates(domtree, 1, 3)
+    @test Compiler.dominates(domtree, 1, 4)
     for i = 2:4
         for j = 1:4
             i == j && continue
-            @test !Core.Compiler.dominates(domtree, i, j)
+            @test !Compiler.dominates(domtree, i, j)
         end
     end
     # post domination analysis
-    post_domtree = Core.Compiler.construct_postdomtree(ir.cfg.blocks)
-    @test Core.Compiler.postdominates(post_domtree, 4, 1)
-    @test Core.Compiler.postdominates(post_domtree, 4, 2)
-    @test Core.Compiler.postdominates(post_domtree, 4, 3)
+    post_domtree = Compiler.construct_postdomtree(ir)
+    @test Compiler.postdominates(post_domtree, 4, 1)
+    @test Compiler.postdominates(post_domtree, 4, 2)
+    @test Compiler.postdominates(post_domtree, 4, 3)
     for i = 1:3
         for j = 1:4
             i == j && continue
-            @test !Core.Compiler.postdominates(post_domtree, i, j)
+            @test !Compiler.postdominates(post_domtree, i, j)
         end
     end
 end
@@ -494,23 +566,23 @@ end
 
     # get the addition instruction
     add_stmt = ir.stmts[1]
-    @test Meta.isexpr(add_stmt[:inst], :call) && add_stmt[:inst].args[3] == 42
+    @test Meta.isexpr(add_stmt[:stmt], :call) && add_stmt[:stmt].args[3] == 42
 
     # replace the addition with a slightly different one
-    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:inst].args[1], add_stmt[:inst].args[2], 999), Int)
-    node = Core.Compiler.insert_node!(ir, 1, inst)
-    Core.Compiler.setindex!(add_stmt, node, :inst)
+    inst = Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
+    node = Compiler.insert_node!(ir, 1, inst)
+    Compiler.setindex!(add_stmt, node, :stmt)
 
     # perform compaction (not by calling compact! because with DCE the bug doesn't trigger)
-    compact = Core.Compiler.IncrementalCompact(ir)
-    state = Core.Compiler.iterate(compact)
+    compact = Compiler.IncrementalCompact(ir)
+    state = Compiler.iterate(compact)
     while state !== nothing
-        state = Core.Compiler.iterate(compact, state[2])
+        state = Compiler.iterate(compact, state[2])
     end
-    ir = Core.Compiler.complete(compact)
+    ir = Compiler.complete(compact)
 
     # test that the inserted node was compacted
-    @test Core.Compiler.length(ir.new_nodes) == 0
+    @test Compiler.length(ir.new_nodes) == 0
 
     # test that we performed copy propagation, but that the undef node was trimmed
     @test length(ir.stmts) == instructions
@@ -518,100 +590,231 @@ end
     @test show(devnull, ir) === nothing
 end
 
-@testset "IncrementalCompact statefulness" begin
-    foo(i) = i == 1 ? 1 : 2
-    ir = only(Base.code_ircode(foo, (Int,)))[1]
-    compact = Core.Compiler.IncrementalCompact(ir)
-
-    # set up first iterator
-    x = Core.Compiler.iterate(compact)
-    x = Core.Compiler.iterate(compact, x[2])
-
-    # set up second iterator
-    x = Core.Compiler.iterate(compact)
-
-    # consume remainder
-    while x !== nothing
-        x = Core.Compiler.iterate(compact, x[2])
-    end
-
-    ir = Core.Compiler.complete(compact)
-    @test Core.Compiler.verify_ir(ir) === nothing
-end
-
 # insert_node! operations
 # =======================
 
 import Core: SSAValue
-import Core.Compiler: NewInstruction, insert_node!
+import .Compiler: NewInstruction, insert_node!
 
 # insert_node! for pending node
 let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
         a^b
     end |> only |> first
-    @test length(ir.stmts) == 2
-    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
+    nstmts = length(ir.stmts)
+    invoke_idx = findfirst(@nospecialize(stmt)->Meta.isexpr(stmt, :invoke), ir.stmts.stmt)
+    @test invoke !== nothing
 
-    newssa = insert_node!(ir, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
+    invoke_ssa = SSAValue(invoke_idx)
+    newssa = insert_node!(ir, invoke_ssa, NewInstruction(Expr(:call, println, invoke_ssa), Nothing), #=attach_after=#true)
     newssa = insert_node!(ir, newssa, NewInstruction(Expr(:call, println, newssa), Nothing), #=attach_after=#true)
 
-    ir = Core.Compiler.compact!(ir)
-    @test length(ir.stmts) == 4
-    @test Meta.isexpr(ir.stmts[1][:inst], :invoke)
-    call1 = ir.stmts[2][:inst]
+    ir = Compiler.compact!(ir)
+    @test length(ir.stmts) == nstmts + 2
+    @test Meta.isexpr(ir.stmts[invoke_idx][:stmt], :invoke)
+    call1 = ir.stmts[invoke_idx+1][:stmt]
     @test iscall((ir,println), call1)
-    @test call1.args[2] === SSAValue(1)
-    call2 = ir.stmts[3][:inst]
+    @test call1.args[2] === invoke_ssa
+    call2 = ir.stmts[invoke_idx+2][:stmt]
     @test iscall((ir,println), call2)
-    @test call2.args[2] === SSAValue(2)
+    @test call2.args[2] === SSAValue(invoke_idx+1)
+end
+
+# Issue #50379 - insert_node!(::IncrementalCompact, ...) at end of basic block
+let code = Any[
+        # block 1
+        #= %1: =# Expr(:boundscheck),
+        #= %2: =# Compiler.GotoIfNot(SSAValue(1), 4),
+        # block 2
+        #= %3: =# Expr(:call, println, Argument(1)),
+        # block 3
+        #= %4: =# Core.PhiNode(),
+        #= %5: =# Compiler.ReturnNode(),
+    ]
+    ir = make_ircode(code)
+
+    # Insert another call at end of "block 2"
+    compact = Compiler.IncrementalCompact(ir)
+    new_inst = NewInstruction(Expr(:call, println, Argument(1)), Nothing)
+    insert_node!(compact, SSAValue(3), new_inst, #= attach_after =# true)
+
+    # Complete iteration
+    x = Compiler.iterate(compact)
+    while x !== nothing
+        x = Compiler.iterate(compact, x[2])
+    end
+    ir = Compiler.complete(compact)
+
+    @test Compiler.verify_ir(ir) === nothing
+end
+
+# compact constant PiNode
+let code = Any[
+        PiNode(0.0, Const(0.0))
+        ReturnNode(SSAValue(1))
+    ]
+    ir = make_ircode(code)
+    ir = Compiler.compact!(ir)
+    @test fully_eliminated(ir)
 end
 
 # insert_node! with new instruction with flag computed
 let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
         a^b
     end |> only |> first
-    invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-        Meta.isexpr(x, :invoke)
-    end
+    invoke_idx = findfirst(@nospecialize(stmt)->Meta.isexpr(stmt, :invoke), ir.stmts.stmt)
     @test invoke_idx !== nothing
-    invoke_expr = ir.stmts.inst[invoke_idx]
+    invoke_expr = ir.stmts.stmt[invoke_idx]
+    invoke_ssa = SSAValue(invoke_idx)
 
     # effect-ful node
-    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
-        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
-        state = Core.Compiler.iterate(compact)
+    let compact = Compiler.IncrementalCompact(Compiler.copy(ir))
+        insert_node!(compact, invoke_ssa, NewInstruction(Expr(:call, println, invoke_ssa), Nothing), #=attach_after=#true)
+        state = Compiler.iterate(compact)
         while state !== nothing
-            state = Core.Compiler.iterate(compact, state[2])
-        end
-        ir = Core.Compiler.finish(compact)
-        new_invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            x == invoke_expr
+            state = Compiler.iterate(compact, state[2])
         end
+        ir = Compiler.finish(compact)
+        new_invoke_idx = findfirst(@nospecialize(stmt)->stmt==invoke_expr, ir.stmts.stmt)
         @test new_invoke_idx !== nothing
-        new_call_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            iscall((ir,println), x) && x.args[2] === SSAValue(invoke_idx)
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(stmt)
+            iscall((ir,println), stmt) && stmt.args[2] === SSAValue(new_invoke_idx)
         end
         @test new_call_idx !== nothing
         @test new_call_idx == new_invoke_idx+1
     end
 
     # effect-free node
-    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
-        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, GlobalRef(Base, :add_int), SSAValue(1), SSAValue(1)), Int), #=attach_after=#true)
-        state = Core.Compiler.iterate(compact)
+    let compact = Compiler.IncrementalCompact(Compiler.copy(ir))
+        insert_node!(compact, invoke_ssa, NewInstruction(Expr(:call, GlobalRef(Base, :add_int), invoke_ssa, invoke_ssa), Int), #=attach_after=#true)
+        state = Compiler.iterate(compact)
         while state !== nothing
-            state = Core.Compiler.iterate(compact, state[2])
+            state = Compiler.iterate(compact, state[2])
         end
-        ir = Core.Compiler.finish(compact)
+        ir = Compiler.finish(compact)
 
-        ir = Core.Compiler.finish(compact)
-        new_invoke_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            x == invoke_expr
-        end
+        ir = Compiler.finish(compact)
+        new_invoke_idx = findfirst(@nospecialize(stmt)->stmt==invoke_expr, ir.stmts.stmt)
         @test new_invoke_idx !== nothing
-        new_call_idx = findfirst(ir.stmts.inst) do @nospecialize(x)
-            iscall((ir,Base.add_int), x) && x.args[2] === SSAValue(invoke_idx)
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            iscall((ir,Base.add_int), x) && x.args[2] === SSAValue(new_invoke_idx)
         end
         @test new_call_idx === nothing # should be deleted during the compaction
     end
 end
+
+@testset "GotoIfNot folding" begin
+    # After IRCode conversion, following the targets of a GotoIfNot should never lead to
+    # statically unreachable code.
+    function f_with_maybe_nonbool_cond(a::Int, r::Bool)
+        a = r ? true : a
+        if a
+            # The following conditional can be resolved statically, since `a === true`
+            # This test checks that it becomes a static `goto` despite its wide slottype.
+            x = a ? 1 : 2.
+        else
+            x = a ? 1 : 2.
+        end
+        return x
+    end
+    let
+        # At least some statements should have been found to be statically unreachable and wrapped in Const(...)::Union{}
+        unopt = code_typed1(f_with_maybe_nonbool_cond, (Int, Bool); optimize=false)
+        @test any(j -> isa(unopt.code[j], Core.Const) && unopt.ssavaluetypes[j] == Union{}, 1:length(unopt.code))
+
+        # Any GotoIfNot destinations after IRCode conversion should not be statically unreachable
+        ircode = first(only(Base.code_ircode(f_with_maybe_nonbool_cond, (Int, Bool); optimize_until="convert")))
+        for i = 1:length(ircode.stmts)
+            expr = ircode.stmts[i][:stmt]
+            if isa(expr, GotoIfNot)
+                # If this statement is Core.Const(...)::Union{}, that means this code was not reached
+                @test !(isa(ircode.stmts[i+1][:stmt], Core.Const) && (unopt.ssavaluetypes[i+1] === Union{}))
+                @test !(isa(ircode.stmts[expr.dest][:stmt], Core.Const) && (unopt.ssavaluetypes[expr.dest] === Union{}))
+            end
+        end
+    end
+end
+
+# Test that things don't break if one branch of the frontend PhiNode becomes unreachable
+const global_error_switch_const1::Bool = false
+function gen_unreachable_phinode_edge1(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const1),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge1(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge1))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge1(1, 2) == 1
+
+const global_error_switch_const2::Bool = true
+function gen_unreachable_phinode_edge2(world::UInt, source, args...)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch_const2),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, identity, Argument(3)),
+        # block 3
+        PhiNode(Int32[2, 3], Any[Argument(2), SSAValue(3)]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any,Int,Int])
+    ci.slotnames = Symbol[:var"#self#", :x, :y]
+    ci.nargs = 3
+    ci.isva = false
+    return ci
+end
+@eval function f_unreachable_phinode_edge2(x, y)
+    $(Expr(:meta, :generated, gen_unreachable_phinode_edge2))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+@test f_unreachable_phinode_edge2(1, 2) == 2
+
+global global_error_switch::Bool = true
+function gen_must_throw_phinode_edge(world::UInt, source, _)
+    ci = make_codeinfo(Any[
+        # block 1
+        GlobalRef(@__MODULE__, :global_error_switch),
+        GotoIfNot(SSAValue(1), 4),
+        # block 2
+        Expr(:call, error, "This error is expected"),
+        # block 3
+        PhiNode(Int32[2, 3], Any[1, 2]),
+        ReturnNode(SSAValue(4))
+    ]; slottypes=Any[Any])
+    ci.slotnames = Symbol[:var"#self#"]
+    ci.nargs = 1
+    ci.isva = false
+    return ci
+end
+@eval function f_must_throw_phinode_edge()
+    $(Expr(:meta, :generated, gen_must_throw_phinode_edge))
+    $(Expr(:meta, :generated_only))
+    #= no body =#
+end
+let ir = first(only(Base.code_ircode(f_must_throw_phinode_edge)))
+    @test !any(@nospecialize(x)->isa(x,PhiNode), ir.stmts.stmt)
+end
+@test_throws ErrorException f_must_throw_phinode_edge()
+global global_error_switch = false
+@test f_must_throw_phinode_edge() == 1
+
+# Test roundtrip of debuginfo compression
+let cl = Int32[32, 1, 1, 1000, 240, 230]
+    str = ccall(:jl_compress_codelocs, Any, (Int32, Any, Int), 378, cl, 2)::String;
+    cl2 = ccall(:jl_uncompress_codelocs, Any, (Any, Int), str, 2)
+    @test cl == cl2
+end
diff --git a/Compiler/test/tarjan.jl b/Compiler/test/tarjan.jl
new file mode 100644
index 0000000000000..aa04bd94a6f6a
--- /dev/null
+++ b/Compiler/test/tarjan.jl
@@ -0,0 +1,169 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("irutils.jl")
+
+using .Compiler: CFGReachability, DomTree, CFG, BasicBlock, StmtRange, dominates,
+                     bb_unreachable, kill_edge!
+
+function reachable(g::CFG, a::Int, b::Int; domtree=nothing)
+    visited = BitVector(false for _ = 1:length(g.blocks))
+    worklist = Int[a]
+    while !isempty(worklist)
+        node = pop!(worklist)
+        node == b && return true
+        visited[node] = true
+        for child in g.blocks[node].succs
+            if domtree !== nothing && dominates(domtree, child, node)
+                continue # if provided `domtree`, ignore back-edges
+            end
+
+            !visited[child] && push!(worklist, child)
+        end
+    end
+    return false
+end
+
+function rand_cfg(V, E)
+    bbs = [BasicBlock(StmtRange(0,0), Int[], Int[]) for _ = 1:V]
+
+    reachable = BitVector(false for _ = 1:V)
+    reachable[1] = true
+
+    targets = BitVector(false for _ = 1:V)
+
+    for _ = 1:E
+        # Pick any source (with at least 1 missing edge)
+        source, dest = 0, 0
+        while true
+            source = rand(findall(reachable))
+            for v = 1:V
+                targets[v] = !in(v, bbs[source].succs)
+            end
+            any(targets) && break
+        end
+
+        # Pick any new target for source
+        dest = rand(findall(targets))
+
+        # Add edge to graph
+        push!(bbs[source].succs, dest)
+        push!(bbs[dest].preds, source)
+
+        reachable[dest] = true
+    end
+
+    return CFG(bbs, zeros(Int, V + 1))
+end
+
+function get_random_edge(cfg::CFG, V)
+    has_edge = [length(cfg.blocks[bb].succs) != 0 for bb in 1:V]
+    source = rand(findall(has_edge))
+    target = rand(cfg.blocks[source].succs)
+    return source, target
+end
+
+# Generate a random CFG with the requested number of vertices and edges, then simulate
+# `deletions` edge removals and verify that reachability is maintained correctly.
+#
+# If `all_checks` is true, verify internal data structures as well with O(E^2) checks.
+function test_reachability(V, E; deletions = 2E ÷ 3, all_checks=false)
+
+    function check_reachability(reachability, cfg, domtree, all_checks)
+        for i = 1:V
+            # All nodes should be reported as unreachable only if we cannot reach them from BB #1.
+            @test reachable(cfg, 1, i) == !bb_unreachable(reachability, i)
+
+            # All predecessors of a reachable block should be reachable.
+            if !bb_unreachable(reachability, i)
+                for pred in cfg.blocks[i].preds
+                    @test !bb_unreachable(reachability, pred)
+                end
+            end
+        end
+
+        if all_checks # checks for internal data structures - O(E^2)
+
+            # Nodes should be mutually reachable iff they are in the same SCompiler.
+            scc = reachability.scc
+            reachable_nodes = BitSet(v for v = 1:V if !bb_unreachable(reachability, v))
+            for i ∈ reachable_nodes
+                for j ∈ reachable_nodes
+                    @test (reachable(cfg, i, j; domtree) && reachable(cfg, j, i; domtree)) == (scc[i] == scc[j])
+                end
+            end
+
+            # Nodes in any non-trivial SCC (ignoring backedges) should be marked irreducible.
+            irreducible = reachability.irreducible
+            for i ∈ reachable_nodes
+                in_nontrivial_scc = any(v != i && scc[v] == scc[i] for v = 1:V)
+                @test Compiler.getindex(irreducible, i) == in_nontrivial_scc
+            end
+        end
+    end
+
+    cfg = rand_cfg(V, E)
+    domtree = Compiler.construct_domtree(cfg)
+    reachability = CFGReachability(cfg, domtree)
+    check_reachability(reachability, cfg, domtree, all_checks)
+
+    # track the reachable blocks/edges so that we can verify callbacks below
+    blocks = Set{Int}()
+    edges = Set{Tuple{Int,Int}}()
+    for bb in 1:V
+        !bb_unreachable(reachability, bb) && push!(blocks, bb)
+        for succ in cfg.blocks[bb].succs
+            push!(edges, (bb, succ))
+        end
+    end
+
+    killed_edges = Tuple{Int,Int}[]
+    killed_blocks = Int[]
+    for k = 1:deletions
+        length(blocks) == 1 && break # no more reachable blocks
+
+        from, to = get_random_edge(cfg, V)
+        kill_edge!(reachability, cfg, from, to,
+            (from::Int, to::Int) -> push!(killed_edges, (from, to)),
+            (bb::Int) -> push!(killed_blocks, bb),
+        )
+
+        # If these nodes are still reachable, to and from edges should have been removed.
+        @test !reachable(cfg, 1, from) || !in(to, cfg.blocks[from].succs)
+        @test !reachable(cfg, 1, to)   || !in(from, cfg.blocks[to].preds)
+
+        check_reachability(reachability, cfg, domtree, all_checks)
+
+        for bb in 1:V
+            if bb_unreachable(reachability, bb) && in(bb, blocks)
+                # If the block changed from reachable -> unreachable, we should have gotten a callback.
+                @test bb in killed_blocks
+                delete!(blocks, bb)
+            end
+        end
+        for (from, to) in edges
+            if !in(from, cfg.blocks[to].preds) && !bb_unreachable(reachability, to)
+                # If the edge changed from reachable -> unreachable and feeds into a reachable BasicBlock,
+                # we should have gotten a callback.
+                @test (from, to) in killed_edges
+                delete!(edges, (from, to))
+            end
+        end
+
+        empty!(killed_edges)
+        empty!(killed_blocks)
+    end
+end
+
+@testset "CFGReachability tests" begin
+    test_reachability(1, 0; all_checks=true)
+
+    test_reachability(10, 15; all_checks=true)
+    test_reachability(10, 15; all_checks=true)
+    test_reachability(10, 15; all_checks=true)
+
+    test_reachability(100, 150; all_checks=false)
+    test_reachability(100, 150; all_checks=false)
+    test_reachability(100, 1000; all_checks=false)
+end
diff --git a/Compiler/test/testgroups b/Compiler/test/testgroups
new file mode 100644
index 0000000000000..d17735a52a025
--- /dev/null
+++ b/Compiler/test/testgroups
@@ -0,0 +1,18 @@
+AbstractInterpreter
+EscapeAnalysis
+codegen
+compact
+contextual
+datastructures
+effects
+inference
+inline
+interpreter_exec
+invalidation
+irpasses
+newinterp
+ssair
+tarjan
+validation
+special_loading
+abioverride
diff --git a/test/compiler/validation.jl b/Compiler/test/validation.jl
similarity index 59%
rename from test/compiler/validation.jl
rename to Compiler/test/validation.jl
index 5fd074fee73ae..5328516f63d36 100644
--- a/test/compiler/validation.jl
+++ b/Compiler/test/validation.jl
@@ -2,6 +2,8 @@
 
 using Test, Core.IR
 
+include("setup_Compiler.jl")
+
 function f22938(a, b, x...)
     nothing
     nothing
@@ -21,17 +23,17 @@ end
 msig = Tuple{typeof(f22938),Int,Int,Int,Int}
 world = Base.get_world_counter()
 match = only(Base._methods_by_ftype(msig, -1, world))
-mi = Core.Compiler.specialize_method(match)
-c0 = Core.Compiler.retrieve_code_info(mi, world)
+mi = Compiler.specialize_method(match)
+c0 = Compiler.retrieve_code_info(mi, world)
 
-@test isempty(Core.Compiler.validate_code(mi, c0))
+@test isempty(Compiler.validate_code(mi, c0))
 
 @testset "INVALID_EXPR_HEAD" begin
     c = copy(c0)
     c.code[1] = Expr(:invalid, 1)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.INVALID_EXPR_HEAD
+    @test errors[1].kind === Compiler.INVALID_EXPR_HEAD
 end
 
 @testset "INVALID_LVALUE" begin
@@ -39,9 +41,9 @@ end
     c.code[1] = Expr(:(=), GotoNode(1), 1)
     c.code[2] = Expr(:(=), :x, 1)
     c.code[3] = Expr(:(=), 3, 1)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 3
-    @test all(e.kind === Core.Compiler.INVALID_LVALUE for e in errors)
+    @test all(e.kind === Compiler.INVALID_LVALUE for e in errors)
 end
 
 @testset "INVALID_RVALUE" begin
@@ -52,9 +54,9 @@ end
     for h in (:line, :const, :meta)
         c.code[i+=1] = Expr(:(=), SlotNumber(2), Expr(h))
     end
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 5
-    @test count(e.kind === Core.Compiler.INVALID_RVALUE for e in errors) == 5
+    @test count(e.kind === Compiler.INVALID_RVALUE for e in errors) == 5
 end
 
 @testset "INVALID_CALL_ARG" begin
@@ -66,74 +68,74 @@ end
     for h in (:line, :const, :meta)
         c.code[i+=1] = Expr(:call, GlobalRef(@__MODULE__,:f), Expr(h))
     end
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 6
-    @test count(e.kind === Core.Compiler.INVALID_CALL_ARG for e in errors) == 6
+    @test count(e.kind === Compiler.INVALID_CALL_ARG for e in errors) == 6
 end
 
 @testset "EMPTY_SLOTNAMES" begin
     c = copy(c0)
     empty!(c.slotnames)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 2
-    @test any(e.kind === Core.Compiler.EMPTY_SLOTNAMES for e in errors)
-    @test any(e.kind === Core.Compiler.SLOTFLAGS_MISMATCH for e in errors)
+    @test any(e.kind === Compiler.EMPTY_SLOTNAMES for e in errors)
+    @test any(e.kind === Compiler.SLOTFLAGS_MISMATCH for e in errors)
 end
 
 @testset "SLOTFLAGS_MISMATCH" begin
     c = copy(c0)
     push!(c.slotflags, 0x00)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SLOTFLAGS_MISMATCH
+    @test errors[1].kind === Compiler.SLOTFLAGS_MISMATCH
 end
 
 @testset "SSAVALUETYPES_MISMATCH" begin
     c = code_typed(f22938, (Int,Int,Int,Int))[1][1]
     empty!(c.ssavaluetypes)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH
+    @test errors[1].kind === Compiler.SSAVALUETYPES_MISMATCH
 end
 
 @testset "SSAVALUETYPES_MISMATCH_UNINFERRED" begin
     c = copy(c0)
     c.ssavaluetypes -= 1
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
+    @test errors[1].kind === Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
 end
 
 @testset "SSAFLAGS_MISMATCH" begin
     c = copy(c0)
     empty!(c.ssaflags)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SSAFLAGS_MISMATCH
+    @test errors[1].kind === Compiler.SSAFLAGS_MISMATCH
 end
 
 @testset "SIGNATURE_NARGS_MISMATCH" begin
     old_sig = mi.def.sig
     mi.def.sig = Tuple{1,2}
-    errors = Core.Compiler.validate_code(mi, nothing)
+    errors = Compiler.validate_code(mi, nothing)
     mi.def.sig = old_sig
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH
+    @test errors[1].kind === Compiler.SIGNATURE_NARGS_MISMATCH
 end
 
 @testset "NON_TOP_LEVEL_METHOD" begin
     c = copy(c0)
     c.code[1] = Expr(:method, :dummy)
-    errors = Core.Compiler.validate_code(c)
+    errors = Compiler.validate_code(c)
     @test length(errors) == 1
-    @test errors[1].kind === Core.Compiler.NON_TOP_LEVEL_METHOD
+    @test errors[1].kind === Compiler.NON_TOP_LEVEL_METHOD
 end
 
 @testset "SLOTNAMES_NARGS_MISMATCH" begin
     mi.def.nargs += 20
-    errors = Core.Compiler.validate_code(mi, c0)
+    errors = Compiler.validate_code(mi, c0)
     mi.def.nargs -= 20
     @test length(errors) == 2
-    @test count(e.kind === Core.Compiler.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
-    @test count(e.kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH for e in errors) == 1
+    @test count(e.kind === Compiler.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
+    @test count(e.kind === Compiler.SIGNATURE_NARGS_MISMATCH for e in errors) == 1
 end
diff --git a/HISTORY.md b/HISTORY.md
index 935b203ffaa97..c3ca212453d07 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,466 @@
+Julia v1.11 Release Notes
+========================
+
+New language features
+---------------------
+* `public` is a new keyword. Symbols marked with `public` are considered public
+  API. Symbols marked with `export` are now also treated as public API. The
+  difference between `public` and `export` is that `public` names do not become
+  available when `using` a package/module ([#50105]).
+* `ScopedValue` implements dynamic scope with inheritance across tasks ([#50958]).
+* The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
+  but allows to add keyword arguments to the function call ([#51501]).
+* Support for Unicode 15.1 ([#51799]).
+* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}`
+  entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations
+  are preserved across operations (e.g. string concatenation with `*`) when
+  possible.
+  * `AnnotatedString` is a new `AbstractString` type. It wraps an underlying
+    string and allows for annotations to be attached to regions of the string.
+    This type is used extensively in the new `StyledStrings` standard library to
+    hold styling information.
+  * `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and
+    holds a list of annotations that apply to it.
+  * `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has
+    specialised `read`/`write` methods for annotated content. This can be
+    thought of both as a "string builder" of sorts and also as glue between
+    annotated and unannotated content.
+* `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
+  to be preferentially picked up by the given julia version. i.e. in the same folder,
+  a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia
+  version. This makes managing environments for multiple julia versions at the same time
+  easier ([#43845]).
+* `@time` now reports a count of any lock conflicts where a `ReentrantLock` had to wait, plus a new macro
+  `@lock_conflicts` which returns that count ([#52883]).
+
+Language changes
+----------------
+* During precompilation, the `atexit` hooks now run before saving the output file. This
+  allows users to safely tear down background state (such as closing Timers and sending
+  disconnect notifications to heartbeat tasks) and cleanup other resources when the program
+  wants to begin exiting.
+* Code coverage and malloc tracking is no longer generated during the package precompilation stage.
+  Further, during these modes pkgimage caches are now used for packages that are not being tracked.
+  This means that coverage testing (the default for `julia-actions/julia-runtest`) will by default use
+  pkgimage caches for all other packages than the package being tested, likely meaning faster test
+  execution. ([#52123])
+
+* Specifying a path in `JULIA_DEPOT_PATH` now results in the expansion of empty strings to
+  omit the default user depot ([#51448]).
+
+Compiler/Runtime improvements
+-----------------------------
+* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]).
+* A new `LazyLibrary` type is exported from `Libdl` for use in building chained lazy library
+  loads, primarily to be used within JLLs ([#50074]).
+* Added support for annotating `Base.@assume_effects` on code blocks ([#52400]).
+* The libuv library has been updated from a base of v1.44.2 to v1.48.0 ([#49937]).
+
+Command-line option changes
+---------------------------
+
+* The entry point for Julia has been standardized to `Main.main(Base.ARGS)`. This must be explicitly opted into using the `@main` macro
+(see the docstring for further details). When opted-in, and julia is invoked to run a script or expression
+(i.e. using `julia script.jl` or `julia -e expr`), julia will subsequently run the `Main.main` function automatically.
+This is intended to unify script and compilation workflows, where code loading may happen
+in the compiler and execution of `Main.main` may happen in the resulting executable. For interactive use, there is no semantic
+difference between defining a `main` function and executing the code directly at the end of the script ([50974]).
+* The `--compiled-modules` and `--pkgimages` flags can now be set to `existing`, which will
+  cause Julia to consider loading existing cache files, but not to create new ones ([#50586]
+  and [#52573]).
+* The `--project` argument now accepts `@script` to give a path to a directory with a Project.toml relative to the passed script file. `--project=@script/foo` for the `foo` subdirectory. If no path is given after (i.e. `--project=@script`) then (like `--project=@.`) the directory and its parents are searched for a Project.toml ([#50864] and [#53352])
+
+Multi-threading changes
+-----------------------
+
+* `Threads.@threads` now supports the `:greedy` scheduler, intended for non-uniform workloads ([#52096]).
+* A new public (but unexported) struct `Base.Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
+
+Build system changes
+--------------------
+
+* There is a new `Makefile` to build Julia and LLVM using the profile-guided and link-time optimizations (PGO and LTO) strategies, see `contrib/pgo-lto/Makefile` ([#45641]).
+
+New library functions
+---------------------
+
+* `in!(x, s::AbstractSet)` will return whether `x` is in `s`, and insert `x` in `s` if not.
+* The new `Libc.mkfifo` function wraps the `mkfifo` C function on Unix platforms ([#34587]).
+* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
+* `eachrsplit(string, pattern)` iterates split substrings right to left.
+* `Sys.username()` can be used to return the current user's username ([#51897]).
+* `GC.logging_enabled()` can be used to test whether GC logging has been enabled via `GC.enable_logging` ([#51647]).
+* `IdSet` is now exported from Base and considered public ([#53262]).
+
+New library features
+--------------------
+
+* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]).
+* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]).
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* New methods `allequal(f, itr)` and `allunique(f, itr)` taking a predicate function ([#47679]).
+* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]).
+* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]).
+* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]).
+* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as
+  expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is
+  no longer required there for correctness to avoid data races ([#52461]).
+* After a process exits, `closewrite` will no longer be automatically called on
+  the stream passed to it. Call `wait` on the process instead to ensure the
+  content is fully written, then call `closewrite` manually to avoid
+  data-races. Or use the callback form of `open` to have all that handled
+  automatically.
+* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
+* `filter` can now act on a `NamedTuple` ([#50795]).
+* `Iterators.cycle(iter, n)` runs over `iter` a fixed number of times, instead of forever ([#47354])
+* `zero(::AbstractArray)` now applies recursively, so `zero([[1,2],[3,4,5]])` now produces the additive identity `[[0,0],[0,0,0]]` rather than erroring ([#38064]).
+
+Standard library changes
+------------------------
+
+* It's not possible to define `length` for stateful iterators in a generally consistent manner. The
+  potential for silently incorrect results for `Stateful` iterators is addressed by deleting the
+  `length(::Stateful)` method. The last type parameter of `Stateful` is gone, too. Issue: ([#47790]),
+  PR: ([#51747]).
+
+#### StyledStrings
+
+* A new standard library for handling styling in a more comprehensive and structured way ([#49586]).
+* The new `Faces` struct serves as a container for text styling information
+  (think typeface, as well as color and decoration), and comes with a framework
+  to provide a convenient, extensible (via `addface!`), and customisable (with a
+  user's `Faces.toml` and `loadfaces!`) approach to
+  styled content ([#49586]).
+* The new `@styled_str` string macro provides a convenient way of creating a
+  `AnnotatedString` with various faces or other attributes applied ([#49586]).
+
+#### Package Manager
+* It is now possible to specify "sources" for packages in a `[sources]` section in Project.toml.
+  This can be used to add non-registered normal or test dependencies.
+* Pkg now obeys `[compat]` bounds for `julia` and raises an error if the version of the running Julia binary is incompatible with the bounds in `Project.toml`.
+  Pkg has always obeyed this compat when working with Registry packages. This change affects mostly local packages
+* `pkg> add` and `Pkg.add` will now add compat entries for new direct dependencies if the active environment is a
+  package (has a `name` and `uuid` entry).
+* Dependencies can now be directly added as weak deps or extras via the `pkg> add --weak/extra Foo` or
+  `Pkg.add("Foo", target=:weakdeps/:extras)` forms.
+
+#### LinearAlgebra
+* `cbrt(::AbstractMatrix{<:Real})` is now defined and returns real-valued matrix cube roots of real-valued matrices ([#50661]).
+* `eigvals/eigen(A, bunchkaufman(B))` and `eigvals/eigen(A, lu(B))`, which utilize the Bunchkaufman (LDL) and LU decomposition of `B`,
+   respectively, now efficiently compute the generalized eigenvalues (`eigen`: and eigenvectors) of `A` and `B`. Note: The second
+   argument is the output of `bunchkaufman` or `lu` ([#50471]).
+* There is now a specialized dispatch for `eigvals/eigen(::Hermitian{<:Tridiagonal})` which performs a similarity transformation to create a real symmetrix triagonal matrix, and solve that using the LAPACK routines ([#49546]).
+* Structured matrices now retain either the axes of the parent (for `Symmetric`/`Hermitian`/`AbstractTriangular`/`UpperHessenberg`), or that of the principal diagonal (for banded matrices) ([#52480]).
+* `bunchkaufman` and `bunchkaufman!` now work for any `AbstractFloat`, `Rational` and their complex variants. `bunchkaufman` now supports `Integer` types, by making an internal conversion to `Rational{BigInt}`. Added new function `inertia` that computes the inertia of the diagonal factor given by the `BunchKaufman` factorization object of a real symmetric or Hermitian matrix. For complex symmetric matrices, `inertia` only computes the number of zero eigenvalues of the diagonal factor ([#51487]).
+* Packages that specialize matrix-matrix `mul!` with a method signature of the form `mul!(::AbstractMatrix, ::MyMatrix, ::AbstractMatrix, ::Number, ::Number)` no longer encounter method ambiguities when interacting with `LinearAlgebra`. Previously, ambiguities used to arise when multiplying a `MyMatrix` with a structured matrix type provided by LinearAlgebra, such as `AbstractTriangular`, which used to necessitate additional methods to resolve such ambiguities. Similar sources of ambiguities have also been removed for matrix-vector `mul!` operations ([#52837]).
+* `lu` and `issuccess(::LU)` now accept an `allowsingular` keyword argument. When set to `true`, a valid factorization with rank-deficient U factor will be treated as success instead of throwing an error. Such factorizations are now shown by printing the factors together with a "rank-deficient" note rather than printing a "Failed Factorization" message ([#52957]).
+
+#### Logging
+
+#### Printf
+
+#### Profile
+
+#### Random
+* `rand` now supports sampling over `Tuple` types ([#35856], [#50251]).
+* `rand` now supports sampling over `Pair` types ([#28705]).
+* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).
+* Seedable random number generators from `Random` can now be seeded by a string, e.g.
+  `seed!(rng, "a random seed")` ([#51527]).
+
+#### REPL
+
+* Tab complete hints now show in lighter text while typing in the repl. To disable
+  set `Base.active_repl.options.hint_tab_completes = false` interactively, or in startup.jl:
+  ```
+  if VERSION >= v"1.11.0-0"
+    atreplinit() do repl
+        repl.options.hint_tab_completes = false
+    end
+  end
+  ``` ([#51229]).
+* Meta-M with an empty prompt now toggles the contextual module between the previous non-Main
+  contextual module and Main so that switching back and forth is simple. ([#51616], [#52670])
+
+#### SuiteSparse
+
+
+#### SparseArrays
+
+#### Test
+
+#### Dates
+
+The undocumented function `adjust` is no longer exported but is now documented
+
+#### Statistics
+
+* Statistics is now an upgradeable standard library ([#46501]).
+
+#### Distributed
+
+* `pmap` now defaults to using a `CachingPool` ([#33892]).
+
+#### Unicode
+
+
+#### DelimitedFiles
+
+
+#### InteractiveUtils
+
+Deprecated or removed
+---------------------
+
+* `Base.map`, `Iterators.map`, and `foreach` lost their single-argument methods ([#52631]).
+
+
+External dependencies
+---------------------
+* `tput` is no longer called to check terminal capabilities, it has been replaced with a pure-Julia terminfo parser ([#50797]).
+
+Tooling Improvements
+--------------------
+
+* CI now performs limited automatic typo detection on all PRs. If you merge a PR with a
+  failing typo CI check, then the reported typos will be automatically ignored in future CI
+  runs on PRs that edit those same files ([#51704]).
+
+<!--- generated by NEWS-update.jl: -->
+[#28705]: https://github.com/JuliaLang/julia/issues/28705
+[#33892]: https://github.com/JuliaLang/julia/issues/33892
+[#34587]: https://github.com/JuliaLang/julia/issues/34587
+[#35856]: https://github.com/JuliaLang/julia/issues/35856
+[#38064]: https://github.com/JuliaLang/julia/issues/38064
+[#43845]: https://github.com/JuliaLang/julia/issues/43845
+[#45641]: https://github.com/JuliaLang/julia/issues/45641
+[#46501]: https://github.com/JuliaLang/julia/issues/46501
+[#47354]: https://github.com/JuliaLang/julia/issues/47354
+[#47679]: https://github.com/JuliaLang/julia/issues/47679
+[#47790]: https://github.com/JuliaLang/julia/issues/47790
+[#48273]: https://github.com/JuliaLang/julia/issues/48273
+[#48625]: https://github.com/JuliaLang/julia/issues/48625
+[#49546]: https://github.com/JuliaLang/julia/issues/49546
+[#49586]: https://github.com/JuliaLang/julia/issues/49586
+[#49937]: https://github.com/JuliaLang/julia/issues/49937
+[#50074]: https://github.com/JuliaLang/julia/issues/50074
+[#50105]: https://github.com/JuliaLang/julia/issues/50105
+[#50144]: https://github.com/JuliaLang/julia/issues/50144
+[#50251]: https://github.com/JuliaLang/julia/issues/50251
+[#50471]: https://github.com/JuliaLang/julia/issues/50471
+[#50586]: https://github.com/JuliaLang/julia/issues/50586
+[#50661]: https://github.com/JuliaLang/julia/issues/50661
+[#50795]: https://github.com/JuliaLang/julia/issues/50795
+[#50797]: https://github.com/JuliaLang/julia/issues/50797
+[#50864]: https://github.com/JuliaLang/julia/issues/50864
+[#50958]: https://github.com/JuliaLang/julia/issues/50958
+[#51229]: https://github.com/JuliaLang/julia/issues/51229
+[#51416]: https://github.com/JuliaLang/julia/issues/51416
+[#51448]: https://github.com/JuliaLang/julia/issues/51448
+[#51487]: https://github.com/JuliaLang/julia/issues/51487
+[#51501]: https://github.com/JuliaLang/julia/issues/51501
+[#51527]: https://github.com/JuliaLang/julia/issues/51527
+[#51616]: https://github.com/JuliaLang/julia/issues/51616
+[#51647]: https://github.com/JuliaLang/julia/issues/51647
+[#51704]: https://github.com/JuliaLang/julia/issues/51704
+[#51747]: https://github.com/JuliaLang/julia/issues/51747
+[#51799]: https://github.com/JuliaLang/julia/issues/51799
+[#51897]: https://github.com/JuliaLang/julia/issues/51897
+[#51929]: https://github.com/JuliaLang/julia/issues/51929
+[#52096]: https://github.com/JuliaLang/julia/issues/52096
+[#52123]: https://github.com/JuliaLang/julia/issues/52123
+[#52139]: https://github.com/JuliaLang/julia/issues/52139
+[#52180]: https://github.com/JuliaLang/julia/issues/52180
+[#52400]: https://github.com/JuliaLang/julia/issues/52400
+[#52413]: https://github.com/JuliaLang/julia/issues/52413
+[#52461]: https://github.com/JuliaLang/julia/issues/52461
+[#52480]: https://github.com/JuliaLang/julia/issues/52480
+[#52573]: https://github.com/JuliaLang/julia/issues/52573
+[#52631]: https://github.com/JuliaLang/julia/issues/52631
+[#52670]: https://github.com/JuliaLang/julia/issues/52670
+[#52837]: https://github.com/JuliaLang/julia/issues/52837
+[#52883]: https://github.com/JuliaLang/julia/issues/52883
+[#52889]: https://github.com/JuliaLang/julia/issues/52889
+[#52898]: https://github.com/JuliaLang/julia/issues/52898
+[#52957]: https://github.com/JuliaLang/julia/issues/52957
+[#53262]: https://github.com/JuliaLang/julia/issues/53262
+[#53352]: https://github.com/JuliaLang/julia/issues/53352
+
+
+Julia v1.10 Release Notes
+=========================
+
+New language features
+---------------------
+
+* JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster
+  parsing. Set environment variable `JULIA_USE_FLISP_PARSER` to `1` to switch back to the old
+  parser if necessary (and if you find this necessary, please file an issue) ([#46372]).
+* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`) may now be used as
+  binary operators with arrow precedence ([#45962]).
+
+Language changes
+----------------
+
+* When a task forks a child, the parent task's task-local RNG (random number generator) is no longer affected. The seeding of child based on the parent task also takes a more disciplined approach to collision resistance, using a design based on the SplitMix and DotMix splittable RNG schemes ([#49110]).
+* A new more-specific rule for methods resolves ambiguities containing Union{} in favor of
+  the method defined explicitly to handle the Union{} argument. This makes it possible to
+  define methods to explicitly handle Union{} without the ambiguities that commonly would
+  result previously. This also lets the runtime optimize certain method lookups in a way
+  that significantly improves load and inference times for heavily overloaded methods that
+  dispatch on Types (such as traits and constructors).
+* The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127).
+* The `@simd` macro now has more limited and clearer semantics: it only enables reordering and contraction
+  of floating-point operations, instead of turning on all "fastmath" optimizations.
+  If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`,
+  if you are OK with all the optimizations enabled by the `@fastmath` macro ([#49405]).
+* When a method with keyword arguments is displayed in the stack trace view, the textual
+  representation of the keyword arguments' type is simplified using the new
+  `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* Updated GC heuristics to count allocated pages instead of object sizes ([#50144]). This should help
+  some programs that consumed excessive memory before.
+* The mark phase of the garbage collector is now multi-threaded ([#48600]).
+* [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]).
+  This should resolve many segmentation faults previously observed on this platform.
+* The precompilation process now uses pidfile locks and orchestrates multiple julia processes to only have one process
+  spend effort precompiling while the others wait. Previously all would do the work and race to overwrite the cache files.
+  ([#49052])
+
+Command-line option changes
+---------------------------
+
+* New option `--gcthreads` to set how many threads will be used by the garbage collector ([#48600]).
+  The default is `N/2` where `N` is the number of worker threads (`--threads`) used by Julia.
+
+Build system changes
+--------------------
+
+* SparseArrays and SuiteSparse are no longer included in the default system image, so the core
+  language no longer contains GPL libraries. However, these libraries are still included
+  alongside the language in the standard binary distribution ([#44247], [#48979], [#49266]).
+
+New library functions
+---------------------
+
+* `tanpi` is now defined. It computes tan(π*x) more accurately than `tan(pi*x)` ([#48575]).
+* `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`.
+   It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot<tab>` ([#48899]).
+* `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls.
+* `Base.isprecompiled(pkg::PkgId)` has been added, to identify whether a package has already been precompiled ([#50218]).
+
+New library features
+--------------------
+
+* `binomial(x, k)` now supports non-integer `x` ([#48124]).
+* A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
+* `printstyled` now supports italic output ([#45164]).
+* `parent` and `parentindices` support `SubString`s.
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* `startswith` now supports seekable `IO` streams ([#43055]).
+
+Standard library changes
+------------------------
+
+* The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!`
+  is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
+* Printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in arrays) ([#45396]).
+
+#### Package Manager
+
+* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`).
+
+#### LinearAlgebra
+
+* `AbstractQ` no longer subtypes `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
+  no longer wraps `Q` in an `Adjoint` type, but instead in an `AdjointQ`, that itself
+  subtypes `AbstractQ`. This change accounts for the fact that typically `AbstractQ`
+  instances behave like function-based, matrix-backed linear operators, and hence don't
+  allow for efficient indexing. Also, many `AbstractQ` types can act on vectors/matrices
+  of different size, acting like a matrix with context-dependent size. With this change,
+  `AbstractQ` has a well-defined API that is described in detail in the
+  [Julia documentation](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/#man-linalg-abstractq)
+  ([#46196]).
+* Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
+  and `Transpose` wrappers, respectively. Instead, they are wrapped in
+  `AdjointFactorization` and `TransposeFactorization` types, which themselves subtype
+  `Factorization` ([#46874]).
+* New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
+  (real symmetric) part of a matrix ([#31836]).
+* The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the
+  parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]).
+* `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian,
+  are now fully supported ([#49533]).
+* `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors)
+  of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is
+  the output of `cholesky`.
+
+#### Printf
+
+* Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]).
+
+#### REPL
+
+* When stack traces are printed, the printed depth of types in function signatures will be limited
+  to avoid overly verbose output ([#49795]).
+
+#### Test
+
+* The `@test_broken` macro (or `@test` with `broken=true`) now complains if the test expression returns a
+  non-boolean value in the same way as a non-broken test ([#47804]).
+* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the  test within a `@testset` can be retrieved ([#49451]).
+
+#### InteractiveUtils
+
+* `code_native` and `@code_native` now default to intel syntax instead of AT&T.
+* `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529]).
+
+Deprecated or removed
+---------------------
+
+* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
+
+<!--- generated by NEWS-update.jl: -->
+[#31836]: https://github.com/JuliaLang/julia/issues/31836
+[#40105]: https://github.com/JuliaLang/julia/issues/40105
+[#43055]: https://github.com/JuliaLang/julia/issues/43055
+[#44247]: https://github.com/JuliaLang/julia/issues/44247
+[#45164]: https://github.com/JuliaLang/julia/issues/45164
+[#45396]: https://github.com/JuliaLang/julia/issues/45396
+[#45962]: https://github.com/JuliaLang/julia/issues/45962
+[#46196]: https://github.com/JuliaLang/julia/issues/46196
+[#46372]: https://github.com/JuliaLang/julia/issues/46372
+[#46874]: https://github.com/JuliaLang/julia/issues/46874
+[#47044]: https://github.com/JuliaLang/julia/issues/47044
+[#47804]: https://github.com/JuliaLang/julia/issues/47804
+[#47977]: https://github.com/JuliaLang/julia/issues/47977
+[#47979]: https://github.com/JuliaLang/julia/issues/47979
+[#48124]: https://github.com/JuliaLang/julia/issues/48124
+[#48575]: https://github.com/JuliaLang/julia/issues/48575
+[#48600]: https://github.com/JuliaLang/julia/issues/48600
+[#48625]: https://github.com/JuliaLang/julia/issues/48625
+[#48682]: https://github.com/JuliaLang/julia/issues/48682
+[#48899]: https://github.com/JuliaLang/julia/issues/48899
+[#48979]: https://github.com/JuliaLang/julia/issues/48979
+[#49020]: https://github.com/JuliaLang/julia/issues/49020
+[#49052]: https://github.com/JuliaLang/julia/issues/49052
+[#49110]: https://github.com/JuliaLang/julia/issues/49110
+[#49266]: https://github.com/JuliaLang/julia/issues/49266
+[#49405]: https://github.com/JuliaLang/julia/issues/49405
+[#49451]: https://github.com/JuliaLang/julia/issues/49451
+[#49529]: https://github.com/JuliaLang/julia/issues/49529
+[#49533]: https://github.com/JuliaLang/julia/issues/49533
+[#49745]: https://github.com/JuliaLang/julia/issues/49745
+[#49795]: https://github.com/JuliaLang/julia/issues/49795
+[#49959]: https://github.com/JuliaLang/julia/issues/49959
+[#50144]: https://github.com/JuliaLang/julia/issues/50144
+[#50218]: https://github.com/JuliaLang/julia/issues/50218
+
 Julia v1.9 Release Notes
 ========================
 
@@ -130,8 +593,7 @@ Standard library changes
 
 #### REPL
 
-* `Alt-e` now opens the current input in an editor. The content (if modified) will be executed
-  upon exiting the editor ([#33759]).
+* `Alt-e` now opens the current input in an editor ([#33759]).
 * The contextual module which is active in the REPL can be changed (it is `Main` by default),
   via the `REPL.activate(::Module)` function or via typing the module in the REPL and pressing
   the keybinding Alt-m ([#33872]).
@@ -210,11 +672,13 @@ Tooling Improvements
 [#42902]: https://github.com/JuliaLang/julia/issues/42902
 [#43270]: https://github.com/JuliaLang/julia/issues/43270
 [#43334]: https://github.com/JuliaLang/julia/issues/43334
+[#43536]: https://github.com/JuliaLang/julia/issues/43536
 [#44137]: https://github.com/JuliaLang/julia/issues/44137
 [#44266]: https://github.com/JuliaLang/julia/issues/44266
 [#44358]: https://github.com/JuliaLang/julia/issues/44358
 [#44360]: https://github.com/JuliaLang/julia/issues/44360
 [#44512]: https://github.com/JuliaLang/julia/issues/44512
+[#44527]: https://github.com/JuliaLang/julia/issues/44527
 [#44534]: https://github.com/JuliaLang/julia/issues/44534
 [#44571]: https://github.com/JuliaLang/julia/issues/44571
 [#44714]: https://github.com/JuliaLang/julia/issues/44714
@@ -244,6 +708,8 @@ Tooling Improvements
 [#46609]: https://github.com/JuliaLang/julia/issues/46609
 [#46862]: https://github.com/JuliaLang/julia/issues/46862
 [#46976]: https://github.com/JuliaLang/julia/issues/46976
+[#47117]: https://github.com/JuliaLang/julia/issues/47117
+[#47184]: https://github.com/JuliaLang/julia/issues/47184
 [#47367]: https://github.com/JuliaLang/julia/issues/47367
 [#47392]: https://github.com/JuliaLang/julia/issues/47392
 
@@ -538,6 +1004,7 @@ Tooling Improvements
 [#43919]: https://github.com/JuliaLang/julia/issues/43919
 [#44080]: https://github.com/JuliaLang/julia/issues/44080
 [#44136]: https://github.com/JuliaLang/julia/issues/44136
+[#45064]: https://github.com/JuliaLang/julia/issues/45064
 
 Julia v1.7 Release Notes
 ========================
@@ -663,7 +1130,7 @@ Standard library changes
 * `lpad/rpad` are now defined in terms of `textwidth` ([#39044]).
 * `Test.@test` now accepts `broken` and `skip` boolean keyword arguments, which
   mimic `Test.@test_broken` and `Test.@test_skip` behavior, but allows skipping
-  tests failing only under certain conditions.  For example
+  tests failing only under certain conditions. For example
   ```julia
   if T == Float64
       @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
@@ -1087,7 +1554,7 @@ Standard library changes
 * The `Pkg.BinaryPlatforms` module has been moved into `Base` as `Base.BinaryPlatforms` and heavily reworked.
   Applications that want to be compatible with the old API should continue to import `Pkg.BinaryPlatforms`,
   however new users should use `Base.BinaryPlatforms` directly ([#37320]).
-* The `Pkg.Artifacts` module has been imported as a separate standard library.  It is still available as
+* The `Pkg.Artifacts` module has been imported as a separate standard library. It is still available as
   `Pkg.Artifacts`, however starting from Julia v1.6+, packages may import simply `Artifacts` without importing
   all of `Pkg` alongside ([#37320]).
 
@@ -1127,7 +1594,7 @@ Standard library changes
 * The `AbstractMenu` extension interface of `REPL.TerminalMenus` has been extensively
   overhauled. The new interface does not rely on global configuration variables, is more
   consistent in delegating printing of the navigation/selection markers, and provides
-  improved support for dynamic menus.  These changes are compatible with the previous
+  improved support for dynamic menus. These changes are compatible with the previous
   (deprecated) interface, so are non-breaking.
 
   The new API offers several enhancements:
@@ -1265,9 +1732,9 @@ Tooling Improvements
 [#37753]: https://github.com/JuliaLang/julia/issues/37753
 [#37829]: https://github.com/JuliaLang/julia/issues/37829
 [#37844]: https://github.com/JuliaLang/julia/issues/37844
+[#37928]: https://github.com/JuliaLang/julia/issues/37928
 [#37973]: https://github.com/JuliaLang/julia/issues/37973
 [#38042]: https://github.com/JuliaLang/julia/issues/38042
-[#38062]: https://github.com/JuliaLang/julia/issues/38062
 [#38168]: https://github.com/JuliaLang/julia/issues/38168
 [#38449]: https://github.com/JuliaLang/julia/issues/38449
 [#38475]: https://github.com/JuliaLang/julia/issues/38475
@@ -1382,8 +1849,8 @@ New library functions
   `Base.Experimental.show_error_hints` from their `showerror` method ([#35094]).
 * The `@ccall` macro has been added to Base. It is a near drop-in replacement for `ccall` with more Julia-like syntax. It also wraps the new `foreigncall` API for varargs of different types, though it lacks the capability to specify an LLVM calling convention ([#32748]).
 * New functions `mergewith` and `mergewith!` supersede `merge` and `merge!` with `combine`
-  argument.  They don't have the restriction for `combine` to be a `Function` and also
-  provide one-argument method that returns a closure.  The old methods of `merge` and
+  argument. They don't have the restriction for `combine` to be a `Function` and also
+  provide one-argument method that returns a closure. The old methods of `merge` and
   `merge!` are still available for backward compatibility ([#34296]).
 * The new `isdisjoint` function indicates whether two collections are disjoint ([#34427]).
 * Add function `ismutable` and deprecate `isimmutable` to check whether something is mutable ([#34652]).
@@ -1510,6 +1977,7 @@ Tooling Improvements
 [#25930]: https://github.com/JuliaLang/julia/issues/25930
 [#26872]: https://github.com/JuliaLang/julia/issues/26872
 [#28789]: https://github.com/JuliaLang/julia/issues/28789
+[#28811]: https://github.com/JuliaLang/julia/issues/28811
 [#29240]: https://github.com/JuliaLang/julia/issues/29240
 [#29333]: https://github.com/JuliaLang/julia/issues/29333
 [#29411]: https://github.com/JuliaLang/julia/issues/29411
@@ -1525,6 +1993,7 @@ Tooling Improvements
 [#33864]: https://github.com/JuliaLang/julia/issues/33864
 [#33886]: https://github.com/JuliaLang/julia/issues/33886
 [#33937]: https://github.com/JuliaLang/julia/issues/33937
+[#34126]: https://github.com/JuliaLang/julia/issues/34126
 [#34149]: https://github.com/JuliaLang/julia/issues/34149
 [#34199]: https://github.com/JuliaLang/julia/issues/34199
 [#34200]: https://github.com/JuliaLang/julia/issues/34200
@@ -1551,9 +2020,12 @@ Tooling Improvements
 [#34896]: https://github.com/JuliaLang/julia/issues/34896
 [#34953]: https://github.com/JuliaLang/julia/issues/34953
 [#35001]: https://github.com/JuliaLang/julia/issues/35001
+[#35057]: https://github.com/JuliaLang/julia/issues/35057
 [#35078]: https://github.com/JuliaLang/julia/issues/35078
+[#35085]: https://github.com/JuliaLang/julia/issues/35085
 [#35094]: https://github.com/JuliaLang/julia/issues/35094
 [#35108]: https://github.com/JuliaLang/julia/issues/35108
+[#35113]: https://github.com/JuliaLang/julia/issues/35113
 [#35124]: https://github.com/JuliaLang/julia/issues/35124
 [#35132]: https://github.com/JuliaLang/julia/issues/35132
 [#35138]: https://github.com/JuliaLang/julia/issues/35138
@@ -1864,6 +2336,7 @@ Tooling Improvements
 [#32534]: https://github.com/JuliaLang/julia/issues/32534
 [#32600]: https://github.com/JuliaLang/julia/issues/32600
 [#32628]: https://github.com/JuliaLang/julia/issues/32628
+[#32651]: https://github.com/JuliaLang/julia/issues/32651
 [#32653]: https://github.com/JuliaLang/julia/issues/32653
 [#32729]: https://github.com/JuliaLang/julia/issues/32729
 [#32814]: https://github.com/JuliaLang/julia/issues/32814
@@ -1873,6 +2346,7 @@ Tooling Improvements
 [#32851]: https://github.com/JuliaLang/julia/issues/32851
 [#32872]: https://github.com/JuliaLang/julia/issues/32872
 [#32875]: https://github.com/JuliaLang/julia/issues/32875
+[#32918]: https://github.com/JuliaLang/julia/issues/32918
 
 Julia v1.2 Release Notes
 ========================
@@ -2015,6 +2489,7 @@ External dependencies
 [#31009]: https://github.com/JuliaLang/julia/issues/31009
 [#31125]: https://github.com/JuliaLang/julia/issues/31125
 [#31211]: https://github.com/JuliaLang/julia/issues/31211
+[#31223]: https://github.com/JuliaLang/julia/issues/31223
 [#31230]: https://github.com/JuliaLang/julia/issues/31230
 [#31235]: https://github.com/JuliaLang/julia/issues/31235
 [#31310]: https://github.com/JuliaLang/julia/issues/31310
@@ -2263,7 +2738,7 @@ Standard Library Changes
 
 * The `Libdl` module's methods `dlopen()` and `dlsym()` have gained a
   `throw_error` keyword argument, replacing the now-deprecated `dlopen_e()`
-  and `dlsym_e()` methods.  When `throw_error` is `false`, failure to locate
+  and `dlsym_e()` methods. When `throw_error` is `false`, failure to locate
   a shared library or symbol will return `nothing` rather than `C_NULL`.
   ([#28888])
 
@@ -2525,7 +3000,7 @@ This section lists changes that do not have deprecation warnings.
     "Code Loading" and "Pkg" for documentation.
 
   * `replace(s::AbstractString, pat=>repl)` for function `repl` arguments formerly
-    passed a substring to `repl` in all cases.  It now passes substrings for
+    passed a substring to `repl` in all cases. It now passes substrings for
     string patterns `pat`, but a `Char` for character patterns (when `pat` is a
     `Char`, collection of `Char`, or a character predicate) ([#25815]).
 
@@ -2710,7 +3185,7 @@ This section lists changes that do not have deprecation warnings.
 
   * The logging system has been redesigned - `info` and `warn` are deprecated
     and replaced with the logging macros `@info`, `@warn`, `@debug` and
-    `@error`.  The `logging` function is also deprecated and replaced with
+    `@error`. The `logging` function is also deprecated and replaced with
     `AbstractLogger` and the functions from the new standard `Logging` library.
     ([#24490])
 
@@ -2866,7 +3341,7 @@ Library improvements
     For example, `x^-1` is now essentially a synonym for `inv(x)`, and works
     in a type-stable way even if `typeof(x) != typeof(inv(x))` ([#24240]).
 
-  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]).  Iterator
+  * New `Iterators.reverse(itr)` for reverse-order iteration ([#24187]). Iterator
     types `T` can implement `start` etc. for `Iterators.Reverse{T}` to support this.
 
   * The functions `nextind` and `prevind` now accept `nchar` argument that indicates
@@ -3005,7 +3480,7 @@ Library improvements
     cartesian indices to linear indices using the normal indexing operation.
     ([#24715], [#26775]).
 
-  * `IdDict{K,V}` replaces `ObjectIdDict`.  It has type parameters
+  * `IdDict{K,V}` replaces `ObjectIdDict`. It has type parameters
     like other `AbstractDict` subtypes and its constructors mirror the
     ones of `Dict`. ([#25210])
 
@@ -3216,8 +3691,8 @@ Deprecated or removed
     should add offset axis support to the function `f` directly ([#26733]).
 
   * The functions `ones` and `zeros` used to accept any objects as dimensional arguments,
-    implicitly converting them to `Int`s.  This is now deprecated; only `Integer`s or
-    `AbstractUnitRange`s are accepted as arguments.  Instead, convert the arguments before
+    implicitly converting them to `Int`s. This is now deprecated; only `Integer`s or
+    `AbstractUnitRange`s are accepted as arguments. Instead, convert the arguments before
     calling `ones` or `zeros` ([#26733]).
 
   * The variadic `size(A, dim1, dim2, dims...)` method to return a tuple of multiple
@@ -3982,6 +4457,7 @@ Command-line option changes
 [#26932]: https://github.com/JuliaLang/julia/issues/26932
 [#26935]: https://github.com/JuliaLang/julia/issues/26935
 [#26980]: https://github.com/JuliaLang/julia/issues/26980
+[#26991]: https://github.com/JuliaLang/julia/issues/26991
 [#26997]: https://github.com/JuliaLang/julia/issues/26997
 [#27067]: https://github.com/JuliaLang/julia/issues/27067
 [#27071]: https://github.com/JuliaLang/julia/issues/27071
@@ -4016,6 +4492,7 @@ Command-line option changes
 [#28155]: https://github.com/JuliaLang/julia/issues/28155
 [#28266]: https://github.com/JuliaLang/julia/issues/28266
 [#28302]: https://github.com/JuliaLang/julia/issues/28302
+[#28310]: https://github.com/JuliaLang/julia/issues/28310
 
 Julia v0.6.0 Release Notes
 ==========================
@@ -4082,8 +4559,8 @@ Language changes
     Previously, this syntax parsed as an implicit multiplication ([#18690]).
 
   * For every binary operator `⨳`, `a .⨳ b` is now automatically equivalent to
-    the `broadcast` call `(⨳).(a, b)`.  Hence, one no longer defines methods
-    for `.*` etcetera.  This also means that "dot operations" automatically
+    the `broadcast` call `(⨳).(a, b)`. Hence, one no longer defines methods
+    for `.*` etcetera. This also means that "dot operations" automatically
     fuse into a single loop, along with other dot calls `f.(x)` ([#17623]).
     Similarly for unary operators ([#20249]).
 
@@ -4136,11 +4613,11 @@ This section lists changes that do not have deprecation warnings.
     or an array as a "scalar" ([#16986]).
 
   * `broadcast` now produces a `BitArray` instead of `Array{Bool}` for
-    functions yielding a boolean result.  If you want `Array{Bool}`, use
+    functions yielding a boolean result. If you want `Array{Bool}`, use
     `broadcast!` or `.=` ([#17623]).
 
   * Broadcast `A[I...] .= X` with entirely scalar indices `I` is deprecated as
-    its behavior will change in the future.  Use `A[I...] = X` instead.
+    its behavior will change in the future. Use `A[I...] = X` instead.
 
   * Operations like `.+` and `.*` on `Range` objects are now generic
     `broadcast` calls (see [above](#language-changes)) and produce an `Array`.
@@ -4186,7 +4663,7 @@ This section lists changes that do not have deprecation warnings.
     now tab-completes to U+03B5 (greek small letter epsilon) ([#19464]).
 
   * `retry` now inputs the keyword arguments `delays` and `check` instead of
-    `n` and `max_delay`.  The previous functionality can be achieved setting
+    `n` and `max_delay`. The previous functionality can be achieved setting
     `delays` to `ExponentialBackOff` ([#19331]).
 
   * `transpose(::AbstractVector)` now always returns a `RowVector` view of the input (which is a
@@ -4227,7 +4704,7 @@ This section lists changes that do not have deprecation warnings.
       using the values and types of `a` and `step` as given, whereas
       `range(a, step, len)` will attempt to match inputs `a::FloatNN`
       and `step::FloatNN` to rationals and construct a `StepRangeLen`
-      that internally uses twice-precision arithmetic.  These two
+      that internally uses twice-precision arithmetic. These two
       outcomes exhibit differences in both precision and speed.
 
   * `A=>B` expressions are now parsed as calls instead of using `=>` as the
@@ -4247,7 +4724,7 @@ This section lists changes that do not have deprecation warnings.
     trigamma, and polygamma special functions have been moved from Base to
     the
     [SpecialFunctions.jl package](https://github.com/JuliaMath/SpecialFunctions.jl)
-    ([#20427]).  Note that `airy`, `airyx` and `airyprime` have been deprecated
+    ([#20427]). Note that `airy`, `airyx` and `airyprime` have been deprecated
     in favor of more specific functions (`airyai`, `airybi`, `airyaiprime`,
     `airybiprimex`, `airyaix`, `airybix`, `airyaiprimex`, `airybiprimex`)
     ([#18050]).
@@ -4332,7 +4809,7 @@ Library improvements
     for more information.
 
   * The default color for info messages has been changed from blue to cyan
-    ([#18442]), and for warning messages from red to yellow ([#18453]).  This
+    ([#18442]), and for warning messages from red to yellow ([#18453]). This
     can be changed back to the original colors by setting the environment
     variables `JULIA_INFO_COLOR` to `"blue"` and `JULIA_WARN_COLOR` to `"red"`.
 
@@ -4676,10 +5153,10 @@ New language features
   * Function return type syntax `function f()::T` has been added ([#1090]). Values returned
     from a function with such a declaration will be converted to the specified type `T`.
 
-  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]).  However,
+  * Many more operators now support `.` prefixes (e.g. `.≤`) ([#17393]). However,
     users are discouraged from overloading these, since they are mainly parsed
     in order to implement backwards compatibility with planned automatic
-    broadcasting of dot operators in Julia 0.6 ([#16285]).  Explicitly qualified
+    broadcasting of dot operators in Julia 0.6 ([#16285]). Explicitly qualified
     operator names like `Base.≤` should now use `Base.:≤` (prefixed by `@compat`
     if you need 0.4 compatibility via the `Compat` package).
 
@@ -4812,7 +5289,7 @@ Library improvements
   * Strings ([#16107]):
 
     * The `UTF8String` and `ASCIIString` types have been merged into a single
-      `String` type ([#16058]).  Use `isascii(s)` to check whether
+      `String` type ([#16058]). Use `isascii(s)` to check whether
       a string contains only ASCII characters. The `ascii(s)` function now
       converts `s` to `String`, raising an `ArgumentError` exception if `s` is
       not pure ASCII.
@@ -5120,7 +5597,7 @@ Language tooling improvements
      talk](https://www.youtube.com/watch?v=e6-hcOHO0tc&list=PLP8iPy9hna6SQPwZUDtAM59-wPzCPyD_S&index=5)
      on Gallium shows off various features of the debugger.
 
-   * The [Juno IDE](http://junolab.org) has matured significantly, and now
+   * The [Juno IDE](https://junolab.org) has matured significantly, and now
      also includes support for plotting and debugging.
 
    * [Cxx.jl](https://github.com/Keno/Cxx.jl) provides a convenient FFI for
@@ -5134,7 +5611,7 @@ New language features
 
   * Function call overloading: for arbitrary objects `x` (not of type
     `Function`), `x(...)` is transformed into `call(x, ...)`, and `call`
-    can be overloaded as desired.  Constructors are now a special case of
+    can be overloaded as desired. Constructors are now a special case of
     this mechanism, which allows e.g. constructors for abstract types.
     `T(...)` falls back to `convert(T, x)`, so all `convert` methods implicitly
     define a constructor ([#8712], [#2403]).
@@ -5162,13 +5639,13 @@ New language features
     `~/.julia/lib/v0.4` ([#8745]).
 
       * See manual section on `Module initialization and precompilation` (under `Modules`) for
-        details and errata.  In particular, to be safely precompilable a module may need an
+        details and errata. In particular, to be safely precompilable a module may need an
         `__init__` function to separate code that must be executed at runtime rather than precompile
-        time.  Modules that are *not* precompilable should call `__precompile__(false)`.
+        time. Modules that are *not* precompilable should call `__precompile__(false)`.
 
       * The precompiled `.ji` file includes a list of dependencies (modules and files that
         were imported/included at precompile-time), and the module is automatically recompiled
-        upon `import` when any of its dependencies have changed.  Explicit dependencies
+        upon `import` when any of its dependencies have changed. Explicit dependencies
         on other files can be declared with `include_dependency(path)` ([#12458]).
 
       * New option `--output-incremental={yes|no}` added to invoke the equivalent of `Base.compilecache`
@@ -5372,7 +5849,7 @@ Library improvements
     * New `vecdot` function, analogous to `vecnorm`, for Euclidean inner products over any iterable container ([#11067]).
 
     * `p = plan_fft(x)` and similar functions now return a `Base.DFT.Plan` object, rather
-    than an anonymous function.  Calling it via `p(x)` is deprecated in favor of
+    than an anonymous function. Calling it via `p(x)` is deprecated in favor of
     `p * x` or `p \ x` (for the inverse), and it can also be used with `A_mul_B!`
     to employ pre-allocated output arrays ([#12087]).
 
@@ -5518,7 +5995,7 @@ Library improvements
 
   * Other improvements
 
-    * You can now tab-complete emoji via their [short names](http://www.emoji-cheat-sheet.com/), using `\:name:<tab>` ([#10709]).
+    * You can now tab-complete emoji via their [short names](https://www.emoji-cheat-sheet.com/), using `\:name:<tab>` ([#10709]).
 
     * `gc_enable` subsumes `gc_disable`, and also returns the previous GC state.
 
@@ -5862,7 +6339,7 @@ Library improvements
     * New string type, `UTF16String` ([#4930]), constructed by
       `utf16(s)` from another string, a `Uint16` array or pointer, or
       a byte array (possibly prefixed by a byte-order marker to
-      indicate endian-ness).  Its data is internally `NULL`-terminated
+      indicate endian-ness). Its data is internally `NULL`-terminated
       for passing to C ([#7016]).
 
     * `CharString` is renamed to `UTF32String` ([#4943]), and its data
@@ -5897,7 +6374,7 @@ Library improvements
 
       * New `vecnorm(itr, p=2)` function that computes the norm of
         any iterable collection of numbers as if it were a vector of
-        the same length.  This generalizes and replaces `normfro` ([#6057]),
+        the same length. This generalizes and replaces `normfro` ([#6057]),
         and `norm` is now type-stable ([#6056]).
 
       * New `UniformScaling` matrix type and identity `I` constant ([#5810]).
diff --git a/LICENSE.md b/LICENSE.md
index d4125f4fba221..da8b6920491cc 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+Copyright (c) 2009-2024: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/Make.inc b/Make.inc
index 96064cb7eac15..26b5ae7752555 100644
--- a/Make.inc
+++ b/Make.inc
@@ -28,13 +28,7 @@ BOOTSTRAP_DEBUG_LEVEL ?= 0
 OPENBLAS_TARGET_ARCH:=
 OPENBLAS_SYMBOLSUFFIX:=
 OPENBLAS_LIBNAMESUFFIX:=
-
-# If OPENBLAS_TARGET_ARCH is set, we default to disabling OPENBLAS_DYNAMIC_ARCH
-ifneq ($(OPENBLAS_TARGET_ARCH),)
 OPENBLAS_DYNAMIC_ARCH:=0
-else
-OPENBLAS_DYNAMIC_ARCH:=1
-endif
 OPENBLAS_USE_THREAD:=1
 
 # Flags for using libraries available on the system instead of building them.
@@ -57,7 +51,7 @@ USE_SYSTEM_MPFR:=0
 USE_SYSTEM_LIBSUITESPARSE:=0
 USE_SYSTEM_LIBUV:=0
 USE_SYSTEM_UTF8PROC:=0
-USE_SYSTEM_MBEDTLS:=0
+USE_SYSTEM_OPENSSL:=0
 USE_SYSTEM_LIBSSH2:=0
 USE_SYSTEM_NGHTTP2:=0
 USE_SYSTEM_CURL:=0
@@ -86,12 +80,18 @@ HAVE_SSP := 0
 WITH_GC_VERIFY := 0
 WITH_GC_DEBUG_ENV := 0
 
+# Use stock if MMTK_PLAN hasn't been defined
+MMTK_PLAN ?= None
+
 # Enable DTrace support
 WITH_DTRACE := 0
 
 # Enable ITTAPI integration
 WITH_ITTAPI := 0
 
+# Enable NVTX integration
+WITH_NVTX := 0
+
 # Enable Tracy support
 WITH_TRACY := 0
 WITH_TRACY_CALLSTACKS := 0
@@ -115,6 +115,51 @@ SPACE:=$(eval) $(eval)
 export LC_ALL=C
 export LANG=C
 
+# Respect `FORCE_COLOR` environment variable: <https://force-color.org/>.
+ifndef FORCE_COLOR
+FORCE_COLOR := ""
+endif
+
+# Respect `NO_COLOR` environment variable: <https://no-color.org/>.
+ifndef NO_COLOR
+NO_COLOR := ""
+endif
+
+# When both `FORCE_COLOR` and `NO_COLOR` are defined, the former has precedence.
+ifneq ($(FORCE_COLOR), "")
+NO_COLOR = ""
+endif
+
+WARNCOLOR:="\033[33;1m"
+ENDCOLOR:="\033[0m"
+
+CCCOLOR:="\033[34m"
+LINKCOLOR:="\033[34;1m"
+PERLCOLOR:="\033[35m"
+FLISPCOLOR:="\033[32m"
+JULIACOLOR:="\033[32;1m"
+DTRACECOLOR:="\033[32;1m"
+
+SRCCOLOR:="\033[33m"
+BINCOLOR:="\033[37;1m"
+JULCOLOR:="\033[34;1m"
+
+ifneq ($(NO_COLOR), "")
+WARNCOLOR:=""
+ENDCOLOR:=""
+
+CCCOLOR:=""
+LINKCOLOR:=""
+PERLCOLOR:=""
+FLISPCOLOR:=""
+JULIACOLOR:=""
+DTRACECOLOR:=""
+
+SRCCOLOR:=""
+BINCOLOR:=""
+JULCOLOR:=""
+endif
+
 # We need python for things like BB triplet recognition and relative path computation.
 # We don't really care about version, generally, so just find something that works:
 PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo "{python|python3|python2} not found")"
@@ -137,7 +182,7 @@ ifeq ($(BUILDROOT),)
 ifeq ("$(origin O)", "command line")
   BUILDROOT := $(abspath $O)
   BUILDDIR := $(abspath $(BUILDROOT)/$(call rel_path,$(JULIAHOME),$(SRCDIR)))
-  $(info $(shell printf '\033[32;1mBuilding into $(BUILDROOT)\033[0m')) # use printf to expand the escape sequences
+  $(info $(shell printf '$(JULIACOLOR)Building into $(BUILDROOT)$(ENDCOLOR)')) # use printf to expand the escape sequences
 else
   BUILDROOT:=$(JULIAHOME)
 endif
@@ -359,6 +404,10 @@ USE_MLIR := 0
 # Options to use RegionVectorizer
 USE_RV := 0
 
+# Use `ccache` for speeding up recompilation of the C/C++ part of Julia.
+# Requires the `ccache` executable to be in the `PATH` environment variable.
+USECCACHE := 0
+
 # Cross-compile
 #XC_HOST := i686-w64-mingw32
 #XC_HOST := x86_64-w64-mingw32
@@ -440,11 +489,15 @@ endif
 # Set to 1 to enable profiling with perf
 ifeq ("$(OS)", "Linux")
 USE_PERF_JITEVENTS ?= 1
+ifeq ($(ARCH),x86_64)
 USE_INTEL_JITEVENTS ?= 1
-else
+else # ARCH x86_64
+USE_INTEL_JITEVENTS ?= 0
+endif # ARCH x86_64
+else # OS Linux
 USE_PERF_JITEVENTS ?= 0
 USE_INTEL_JITEVENTS ?= 0
-endif
+endif # OS Linux
 
 JULIACODEGEN := LLVM
 
@@ -478,7 +531,7 @@ endif
 
 FC := $(CROSS_COMPILE)gfortran
 
-# Note: Supporting only macOS Yosemite and above
+# Note: Supporting only macOS Mojave and above
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 ifneq ($(APPLE_ARCH),arm64)
@@ -488,16 +541,16 @@ MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
-JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -Wformat -Wformat-security
 JCFLAGS_CLANG     := $(JCFLAGS_COMMON)
 JCFLAGS_GCC       := $(JCFLAGS_COMMON) -fno-gnu-unique
 
-# AArch64 needs this flag to generate the .eh_frame used by libunwind
+# These flags are needed to generate decent debug info
 JCPPFLAGS_COMMON  := -fasynchronous-unwind-tables
-JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON)
-JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON)
+JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0
+JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON) -fno-tree-tail-merge
 
-JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++14
+JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security
 JCXXFLAGS_CLANG   := $(JCXXFLAGS_COMMON) -pedantic
 JCXXFLAGS_GCC     := $(JCXXFLAGS_COMMON) -fno-gnu-unique
 
@@ -509,6 +562,11 @@ SHIPFLAGS_COMMON  := -O3
 SHIPFLAGS_CLANG   := $(SHIPFLAGS_COMMON) -g
 SHIPFLAGS_GCC     := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
 
+BOLT_LDFLAGS :=
+
+BOLT_CFLAGS_GCC    :=
+BOLT_CFLAGS_CLANG  :=
+
 ifeq ($(OS), Darwin)
 JCPPFLAGS_CLANG   += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
@@ -525,7 +583,8 @@ JCFLAGS    := $(JCFLAGS_GCC)
 JCPPFLAGS  := $(JCPPFLAGS_GCC)
 JCXXFLAGS  := $(JCXXFLAGS_GCC)
 DEBUGFLAGS := $(DEBUGFLAGS_GCC)
-SHIPFLAGS  := $(SHIPFLAGS_GCC)
+SHIPFLAGS  := $(SHIPFLAGS_GCC) $(BOLT_CFLAGS_GCC)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_GCC)
 endif
 
 ifeq ($(USECLANG),1)
@@ -535,7 +594,8 @@ JCFLAGS    := $(JCFLAGS_CLANG)
 JCPPFLAGS  := $(JCPPFLAGS_CLANG)
 JCXXFLAGS  := $(JCXXFLAGS_CLANG)
 DEBUGFLAGS := $(DEBUGFLAGS_CLANG)
-SHIPFLAGS  := $(SHIPFLAGS_CLANG)
+SHIPFLAGS  := $(SHIPFLAGS_CLANG) $(BOLT_CFLAGS_CLANG)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_CLANG)
 
 ifeq ($(OS), Darwin)
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
@@ -546,7 +606,17 @@ export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN)
 endif
 endif
 
-JLDFLAGS :=
+# Conditional setting of RELRO flag for enhanced security on Linux builds.
+# RELRO (Read-Only Relocations) is a security feature that marks certain sections
+# of the binary as read-only to prevent exploitation techniques like
+# GOT (Global Offset Table) overwriting attacks.
+ifeq ($(OS),Linux)
+    RELRO_FLAG := -Wl,-z,relro
+else
+    RELRO_FLAG :=
+endif
+
+JLDFLAGS := $(RELRO_FLAG)
 
 ifeq ($(USECCACHE), 1)
 # Expand CC, CXX and FC here already because we want the original definition and not the ccache version.
@@ -567,10 +637,10 @@ CXX_BASE := ccache
 FC_BASE  := ccache
 ifeq ($(USECLANG),1)
 # ccache and Clang don't do well together
-# http://petereisentraut.blogspot.be/2011/05/ccache-and-clang.html
+# https://petereisentraut.blogspot.be/2011/05/ccache-and-clang.html
 CC += -Qunused-arguments
 CXX += -Qunused-arguments
-# http://petereisentraut.blogspot.be/2011/09/ccache-and-clang-part-2.html
+# https://petereisentraut.blogspot.be/2011/09/ccache-and-clang-part-2.html
 export CCACHE_CPP2 := yes
 endif
 else #USECCACHE
@@ -595,23 +665,26 @@ CPP_STDOUT := $(CPP) -P
 # file extensions
 ifeq ($(OS), WINNT)
   SHLIB_EXT := dll
+  PATHSEP := ;
 else ifeq ($(OS), Darwin)
   SHLIB_EXT := dylib
+  PATHSEP := :
 else
   SHLIB_EXT := so
+  PATHSEP := :
 endif
 
 ifeq ($(OS),WINNT)
 define versioned_libname
-$$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else ifeq ($(OS),Darwin)
 define versioned_libname
-$$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else
 define versioned_libname
-$$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
 endef
 endif
 
@@ -663,7 +736,7 @@ JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR)
 endif
 endif
 
-ifeq ($(OS), FreeBSD)
+ifneq ($(findstring $(OS),FreeBSD OpenBSD),)
 LOCALBASE ?= /usr/local
 else
 LOCALBASE ?= /usr
@@ -719,7 +792,7 @@ SANITIZE_LDFLAGS :=
 ifeq ($(SANITIZE_MEMORY),1)
 SANITIZE_OPTS += -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer
 SANITIZE_LDFLAGS += $(SANITIZE_OPTS)
-ifneq ($(findstring $(OS),Linux FreeBSD),)
+ifneq ($(findstring $(OS),Linux FreeBSD OpenBSD),)
 SANITIZE_LDFLAGS += -Wl,--warn-unresolved-symbols
 endif # OS Linux or FreeBSD
 endif # SANITIZE_MEMORY=1
@@ -752,11 +825,42 @@ JCXXFLAGS += -DGC_VERIFY
 JCFLAGS += -DGC_VERIFY
 endif
 
+ifneq ($(JL_STACK_SIZE),)
+JCXXFLAGS += -DJL_STACK_SIZE=$(JL_STACK_SIZE)
+JCFLAGS += -DJL_STACK_SIZE=$(JL_STACK_SIZE)
+endif
+
+
 ifeq ($(WITH_GC_DEBUG_ENV), 1)
 JCXXFLAGS += -DGC_DEBUG_ENV
 JCFLAGS += -DGC_DEBUG_ENV
 endif
 
+ifneq (${MMTK_PLAN},None)
+JCXXFLAGS += -DMMTK_GC
+JCFLAGS += -DMMTK_GC
+# Do a release build on the binding by default
+MMTK_BUILD ?= release
+ifeq (${MMTK_PLAN},Immix)
+JCXXFLAGS += -DMMTK_PLAN_IMMIX
+JCFLAGS += -DMMTK_PLAN_IMMIX
+else
+$(error "Unsupported MMTk plan: $(MMTK_PLAN)")
+endif
+
+# Location of mmtk-julia binding
+# (needed for api/*.h and .so file)
+MMTK_JULIA_DIR ?= $(BUILDROOT)/usr/lib/mmtk_julia
+
+MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk
+MMTK_API_INC = ${MMTK_DIR}/api
+
+MMTK_LIB := -lmmtk_julia
+else
+MMTK_JULIA_INC :=
+MMTK_LIB :=
+endif
+
 ifeq ($(WITH_DTRACE), 1)
 JCXXFLAGS += -DUSE_DTRACE
 JCFLAGS += -DUSE_DTRACE
@@ -785,6 +889,11 @@ JCXXFLAGS += -DUSE_TIMING_COUNTS
 JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
+ifeq ($(WITH_NVTX), 1)
+JCXXFLAGS += -DUSE_NVTX
+JCFLAGS += -DUSE_NVTX
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -852,6 +961,21 @@ ARCH := $(BUILD_OS)
 endif
 endif
 
+# MMTk is only available on x86_64 Linux for now
+ifneq (${MMTK_PLAN},None)
+
+ifeq ($(OS),Linux)
+MMTK_LIB_NAME := libmmtk_julia.so
+else
+$(error "Unsupported OS for MMTk")
+endif
+
+ifneq ($(ARCH),x86_64)
+$(error "Unsupported build architecture for MMTk")
+endif
+
+endif
+
 # Detect common pre-SSE2 JULIA_CPU_TARGET values known not to work (#7185)
 ifeq ($(MARCH),)
 ifneq ($(findstring $(ARCH),i386 i486 i586 i686),)
@@ -897,15 +1021,45 @@ else
 ISX86:=0
 endif
 
+
+#If nothing is set default to native unless we are cross-compiling
+ifeq ($(MARCH)$(MCPU)$(MTUNE)$(JULIA_CPU_TARGET)$(XC_HOST),)
+ifeq ($(ARCH),aarch64)
+# ARM recommends only setting MCPU for AArch64
+MCPU=native
+else ifneq (,$(findstring riscv64,$(ARCH)))
+# RISC-V doesn't have a native option
+$(error Building for RISC-V requires a specific MARCH to be set))
+else
+MARCH=native
+MTUNE=native
+endif
+endif
+
+# If we are running on x86 or x86_64, set certain options automatically
+ifeq (1,$(ISX86))
+OPENBLAS_DYNAMIC_ARCH:=1
+endif
+
 # If we are running on powerpc64le or ppc64le, set certain options automatically
 ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 JCFLAGS += -fsigned-char
+OPENBLAS_DYNAMIC_ARCH:=1
 OPENBLAS_TARGET_ARCH:=POWER8
 BINARY:=64
 # GCC doesn't do -march= on ppc64le
 MARCH=
 endif
 
+# Allow Clang to use CRC instructions (only applicable on AArch64)
+ifneq (,$(findstring aarch64,$(ARCH)))
+ifeq ($(USECLANG),1)
+ifeq (,$(MARCH))
+JCFLAGS += -mcrc
+endif
+endif
+endif
+
 # If we are running on powerpc64 or ppc64, fail out dramatically
 ifneq (,$(filter $(ARCH), powerpc64 ppc64))
 $(error Big-endian PPC64 is not supported, to ignore this error, set ARCH=ppc64le)
@@ -937,6 +1091,9 @@ endif
 ifneq (,$(findstring arm,$(ARCH)))
 DIST_ARCH:=arm
 endif
+ifneq (,$(findstring riscv64,$(ARCH)))
+DIST_ARCH:=riscv64
+endif
 
 JULIA_BINARYDIST_FILENAME := julia-$(JULIA_COMMIT)-$(DIST_OS)$(DIST_ARCH)
 endif
@@ -944,16 +1101,20 @@ endif
 # If we are running on ARM, set certain options automatically
 ifneq (,$(findstring arm,$(ARCH)))
 JCFLAGS += -fsigned-char
-USE_BLAS64:=0
-OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=ARMV7
+BINARY:=32
 endif
 
 # If we are running on aarch64 (e.g. ARMv8 or ARM64), set certain options automatically
 ifneq (,$(findstring aarch64,$(ARCH)))
-OPENBLAS_DYNAMIC_ARCH:=0
+OPENBLAS_DYNAMIC_ARCH:=1
 OPENBLAS_TARGET_ARCH:=ARMV8
-USE_BLAS64:=1
+BINARY:=64
+endif
+
+# If we are running on riscv64, set certain options automatically
+ifneq (,$(findstring riscv64,$(ARCH)))
+OPENBLAS_DYNAMIC_ARCH:=1
 BINARY:=64
 endif
 
@@ -962,8 +1123,12 @@ ifneq ($(MARCH),)
 CC += -march=$(MARCH)
 CXX += -march=$(MARCH)
 FC += -march=$(MARCH)
+# On RISC-V, don't forward the MARCH ISA string to JULIA_CPU_TARGET,
+# as it's always incompatible with LLVM's CPU target name parser.
+ifeq (,$(findstring riscv64,$(ARCH)))
 JULIA_CPU_TARGET ?= $(MARCH)
 endif
+endif
 
 # Set MCPU-specific flags
 ifneq ($(MCPU),)
@@ -973,6 +1138,14 @@ FC += -mcpu=$(MCPU)
 JULIA_CPU_TARGET ?= $(MCPU)
 endif
 
+# Set MTUNE-specific flags
+ifneq ($(MTUNE),)
+CC += -mtune=$(MTUNE)
+CXX += -mtune=$(MTUNE)
+FC += -mtune=$(MTUNE)
+JULIA_CPU_TARGET ?= $(MTUNE)
+endif
+
 ifneq ($(MARCH)$(MCPU),)
 ifeq ($(OS),Darwin)
 # on Darwin, the standalone `as` program doesn't know
@@ -1029,21 +1202,14 @@ LIBUNWIND:=
 else ifneq ($(DISABLE_LIBUNWIND), 0)
 LIBUNWIND:=
 else
-ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
-ifneq ($(OS),Darwin)
 LIBUNWIND:=-lunwind
-# Only for linux since we want to use not yet released libunwind features
+ifneq ($(findstring $(OS),Darwin OpenBSD),)
+JCPPFLAGS+=-DLLVMLIBUNWIND
+else ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
+# Only for linux and freebsd since we want to use not yet released gnu libunwind features
 JCFLAGS+=-DSYSTEM_LIBUNWIND
 JCPPFLAGS+=-DSYSTEM_LIBUNWIND
 endif
-else
-ifeq ($(OS),Darwin)
-LIBUNWIND:=-lunwind
-JCPPFLAGS+=-DLLVMLIBUNWIND
-else
-LIBUNWIND:=-lunwind
-endif
-endif
 endif
 
 ifeq ($(origin LLVM_CONFIG), undefined)
@@ -1171,7 +1337,7 @@ endif
 
 # We need python for things like BB triplet recognition.  We don't really care
 # about version, generally, so just find something that works:
-PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)"
+PYTHON := $(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)
 PYTHON_SYSTEM := $(shell $(PYTHON) -c 'from __future__ import print_function; import platform; print(platform.system())')
 
 # If we're running on Cygwin, but using a native-windows Python, we need to use cygpath -w
@@ -1229,14 +1395,14 @@ LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SP
 # shipped with CSL. Although we do not depend on any of the symbols, it is entirely
 # possible that a user might choose to install a library which depends on symbols provided
 # by a newer libstdc++. Without runtime detection, those libraries would break.
-CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.31|GLIBCXX_3\.5\.|GLIBCXX_4\.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.34|GLIBCXX_3\.5\.|GLIBCXX_4\.
 
 
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP OPENSSL LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT BOLT
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1342,12 +1508,32 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 # make it loaded first to
 # prevent from linking to outdated system libs.
 # See #21788
+# TODO: Determine whether the condition here on AArch64 (added in #55089) should actually
+# be `ifneq ($(USE_BINARYBUILDER),0)`. We vendor a correctly versioned libgcc_s when using
+# BinaryBuilder which we want to link in early as noted above, but it could be the case
+# that without BinaryBuilder, regardless of architecture, we need to delay linking libgcc_s
+# to avoid getting the system one.
+ifeq (,$(findstring aarch64,$(ARCH)))
 OSLIBS += -lgcc_s
+endif
 
 OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE)
 endif
 
+ifeq ($(OS), OpenBSD)
+JLDFLAGS += -Wl,--Bdynamic
+ifneq ($(SANITIZE),1)
+JLDFLAGS += -Wl,-no-undefined
+endif
+
+JLIBLDFLAGS += -Wl,-Bsymbolic-functions
+
+OSLIBS += -Wl,--no-as-needed -lpthread -lm -lc++abi -lc
+OSLIBS += -Wl,--whole-archive -lcompiler_rt -Wl,--no-whole-archive
+OSLIBS += -Wl,--export-dynamic,--as-needed,--version-script=$(BUILDROOT)/src/julia.expmap
+endif
+
 ifeq ($(OS), Darwin)
 SHLIB_EXT := dylib
 OSLIBS += -framework CoreFoundation
@@ -1360,8 +1546,8 @@ endif
 ifeq ($(OS), WINNT)
 HAVE_SSP := 1
 OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
-	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic
-JLDFLAGS += -Wl,--stack,8388608
+	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic -lole32
+JLDFLAGS += -Wl,--stack,8388608 --disable-auto-import --disable-runtime-pseudo-reloc
 ifeq ($(ARCH),i686)
 JLDFLAGS += -Wl,--large-address-aware
 endif
@@ -1455,7 +1641,7 @@ endif
 CLANGSA_FLAGS :=
 CLANGSA_CXXFLAGS :=
 ifeq ($(OS), Darwin) # on new XCode, the files are hidden
-CLANGSA_FLAGS += -isysroot $(shell xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ CLANGSA_FLAGS += -isysroot $(shell xcrun --show-sdk-path -sdk macosx)
 endif
 ifeq ($(USEGCC),1)
 # try to help clang find the c++ files for CC by guessing the value for --prefix
@@ -1503,7 +1689,7 @@ endef
 WINE ?= wine
 
 ifeq ($(BINARY),32)
-HEAPLIM := --heap-size-hint=500M
+HEAPLIM := --heap-size-hint=1000M
 else
 HEAPLIM :=
 endif
@@ -1671,24 +1857,10 @@ ifndef VERBOSE
 VERBOSE := 0
 endif
 
-WARNCOLOR:="\033[33;1m"
-ENDCOLOR:="\033[0m"
-
 ifeq ($(VERBOSE), 0)
 
 QUIET_MAKE = -s
 
-CCCOLOR:="\033[34m"
-LINKCOLOR:="\033[34;1m"
-PERLCOLOR:="\033[35m"
-FLISPCOLOR:="\033[32m"
-JULIACOLOR:="\033[32;1m"
-DTRACECOLOR:="\033[32;1m"
-
-SRCCOLOR:="\033[33m"
-BINCOLOR:="\033[37;1m"
-JULCOLOR:="\033[34;1m"
-
 GOAL=$(subst ','\'',$(subst $(abspath $(JULIAHOME))/,,$(abspath $@)))
 
 PRINT_CC = printf '    %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$(GOAL)$(ENDCOLOR); $(1)
@@ -1709,7 +1881,7 @@ PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)
 
-endif
+endif # VERBOSE
 
 # Makefile debugging trick:
 # call print-VARIABLE to see the runtime value of any variable
diff --git a/Makefile b/Makefile
index 5e9b4ccf5460a..20d131ee8524c 100644
--- a/Makefile
+++ b/Makefile
@@ -61,6 +61,10 @@ $(foreach link,base $(JULIAHOME)/test,$(eval $(call symlink_target,$(link),$$(bu
 julia_flisp.boot.inc.phony: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src julia_flisp.boot.inc.phony
 
+# Build the HTML docs (skipped if already exists, notably in tarballs)
+$(BUILDROOT)/doc/_build/html/en/index.html: $(shell find $(BUILDROOT)/base $(BUILDROOT)/doc \( -path $(BUILDROOT)/doc/_build -o -path $(BUILDROOT)/doc/deps -o -name *_constants.jl -o -name *_h.jl -o -name version_git.jl \) -prune -o -type f -print)
+	@$(MAKE) docs
+
 julia-symlink: julia-cli-$(JULIA_BUILD_MODE)
 ifeq ($(OS),WINNT)
 	echo '@"%~dp0/'"$$(echo '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')"'" %*' | tr / '\\' > $(BUILDROOT)/julia.bat
@@ -71,6 +75,13 @@ ifndef JULIA_VAGRANT_BUILD
 endif
 endif
 
+TOP_LEVEL_PKGS := Compiler
+
+TOP_LEVEL_PKG_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/,$(TOP_LEVEL_PKGS))
+
+# Generate symlinks for top level pkgs in usr/share/julia/
+$(foreach module, $(TOP_LEVEL_PKGS), $(eval $(call symlink_target,$$(JULIAHOME)/$(module),$$(build_datarootdir)/julia,$(module))))
+
 julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia/test
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/deps
 
@@ -78,12 +89,18 @@ julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia
 julia-stdlib: | $(DIRS) julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib
 
-julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl
+julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl $(build_datarootdir)/julia/juliac.jl $(build_datarootdir)/julia/juliac-buildscript.jl
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/base
 
 julia-libccalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalltest
 
+julia-libccalllazyfoo: julia-deps
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazyfoo
+
+julia-libccalllazybar: julia-deps julia-libccalllazyfoo
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazybar
+
 julia-libllvmcalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libllvmcalltest
 
@@ -93,23 +110,24 @@ julia-src-release julia-src-debug : julia-src-% : julia-deps julia_flisp.boot.in
 julia-cli-release julia-cli-debug: julia-cli-% : julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/cli $*
 
-julia-sysimg-ji : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
+julia-sysimg-ji : $(TOP_LEVEL_PKG_LINK_TARGETS) julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-ji JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
 
-julia-sysimg-bc : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
+julia-sysimg-bc : $(TOP_LEVEL_PKG_LINK_TARGETS) julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-bc JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
 
 julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia-src-%
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-$*
 
-julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
+julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest \
+                                      julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest julia-base-cache
 
 stdlibs-cache-release stdlibs-cache-debug : stdlibs-cache-% : julia-%
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk all-$*
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk $*
 
 debug release : % : julia-% stdlibs-cache-%
 
-docs: julia-sysimg-$(JULIA_BUILD_MODE)
+docs: julia-sysimg-$(JULIA_BUILD_MODE) stdlibs-cache-$(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no'
 
 docs-revise:
@@ -152,8 +170,8 @@ release-candidate: release testall
 	@echo 7. Clean out old .tar.gz files living in deps/, "\`git clean -fdx\`" seems to work	#"`
 	@echo 8. Replace github release tarball with tarballs created from make light-source-dist and make full-source-dist with USE_BINARYBUILDER=0
 	@echo 9. Check that 'make && make install && make test' succeed with unpacked tarballs even without Internet access.
-	@echo 10. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
-	@echo 11. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
+	@echo 10. Follow packaging instructions in doc/src/devdocs/build/distributing.md to create binary packages for all platforms
+	@echo 11. Upload to AWS, update https://julialang.org/downloads and https://status.julialang.org/stable links
 	@echo 12. Update checksums on AWS for tarball and packaged binaries
 	@echo 13. Update versions.json. Wait at least 60 minutes before proceeding to step 14.
 	@echo 14. Push to Juliaup (https://github.com/JuliaLang/juliaup/wiki/Adding-a-Julia-version)
@@ -170,15 +188,15 @@ $(build_sysconfdir)/julia/startup.jl: $(JULIAHOME)/etc/startup.jl | $(build_sysc
 	@echo Creating usr/etc/julia/startup.jl
 	@cp $< $@
 
-$(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl | $(build_datarootdir)/julia
+$(build_datarootdir)/julia/%: $(JULIAHOME)/contrib/% | $(build_datarootdir)/julia
 	$(INSTALL_M) $< $(dir $@)
 
 $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
 	@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
 
 julia-base-cache: julia-sysimg-$(JULIA_BUILD_MODE) | $(DIRS) $(build_datarootdir)/julia
-	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
+	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) JULIA_FALLBACK_REPL=1 WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/contrib/write_base_cache.jl) \
 		$(call cygpath_w,$(build_datarootdir)/julia/base.cache))
 
 # public libraries, that are installed in $(prefix)/lib
@@ -189,14 +207,19 @@ JL_TARGETS := julia-debug
 endif
 
 # private libraries, that are installed in $(prefix)/lib/julia
-JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest
+JL_PRIVATE_LIBS-0 := libccalltest libccalllazyfoo libccalllazybar libllvmcalltest
 ifeq ($(JULIA_BUILD_MODE),release)
 JL_PRIVATE_LIBS-0 += libjulia-internal libjulia-codegen
 else ifeq ($(JULIA_BUILD_MODE),debug)
 JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
 endif
+# BSD-3-Clause
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libcamd libccolamd libcolamd libsuitesparseconfig
+# LGPL-2.1+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libbtf libklu libldl
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
+# GPL-2.0+
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libcholmod librbio libspqr libumfpack
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
@@ -205,7 +228,7 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_GMP) += libgmp libgmpxx
 JL_PRIVATE_LIBS-$(USE_SYSTEM_MPFR) += libmpfr
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSSH2) += libssh2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_NGHTTP2) += libnghttp2
-JL_PRIVATE_LIBS-$(USE_SYSTEM_MBEDTLS) += libmbedtls libmbedcrypto libmbedx509
+JL_PRIVATE_LIBS-$(USE_SYSTEM_OPENSSL) += libcrypto libssl
 JL_PRIVATE_LIBS-$(USE_SYSTEM_CURL) += libcurl
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBGIT2) += libgit2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUV) += libuv
@@ -239,9 +262,9 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread
 endif
 ifeq ($(SANITIZE),1)
 ifeq ($(USECLANG),1)
-JL_PRIVATE_LIBS-1 += libclang_rt.asan
+JL_PRIVATE_LIBS-0 += libclang_rt.asan-*
 else
-JL_PRIVATE_LIBS-1 += libasan
+JL_PRIVATE_LIBS-0 += libasan
 endif
 endif
 
@@ -265,7 +288,7 @@ define stringreplace
 endef
 
 
-install: $(build_depsbindir)/stringreplace docs
+install: $(build_depsbindir)/stringreplace $(BUILDROOT)/doc/_build/html/en/index.html
 	@$(MAKE) $(QUIET_MAKE) $(JULIA_BUILD_MODE)
 	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(private_libexecdir); do \
 		mkdir -p $(DESTDIR)$$subdir; \
@@ -281,16 +304,14 @@ else ifeq ($(JULIA_BUILD_MODE),debug)
 	-$(INSTALL_M) $(build_libdir)/libjulia-debug.dll.a $(DESTDIR)$(libdir)/
 	-$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
 endif
+	-$(INSTALL_M) $(wildcard $(build_private_libdir)/*.a) $(DESTDIR)$(private_libdir)/
+	-rm -f $(DESTDIR)$(private_libdir)/sys-o.a
 
-	# We have a single exception; we want 7z.dll to live in private_libexecdir, not bindir, so that 7z.exe can find it.
+	# We have a single exception; we want 7z.dll to live in private_libexecdir,
+	# not bindir, so that 7z.exe can find it.
 	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(private_libexecdir)/
 	-$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/
 	-$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/
-	# The rest are compiler dependencies, as an example memcpy is exported by msvcrt
-	# These are files from mingw32 and required for creating shared libraries like our caches.
-	-$(INSTALL_M) $(build_libdir)/libgcc_s.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libgcc.a $(DESTDIR)$(libdir)/
-	-$(INSTALL_M) $(build_libdir)/libmsvcrt.a $(DESTDIR)$(libdir)/
 else
 
 # Copy over .dSYM directories directly for Darwin
@@ -368,6 +389,11 @@ endif
 	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
 	cp -R -L $(JULIAHOME)/test/* $(DESTDIR)$(datarootdir)/julia/test
 	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
+
+	# Set .jl sources as read-only to match package directories
+	find $(DESTDIR)$(datarootdir)/julia/base -type f -name \*.jl -exec chmod 0444 '{}' \;
+	find $(DESTDIR)$(datarootdir)/julia/test -type f -name \*.jl -exec chmod 0444 '{}' \;
+
 	# Copy documentation
 	cp -R -L $(BUILDROOT)/doc/_build/html $(DESTDIR)$(docdir)/
 	# Remove various files which should not be installed
@@ -387,8 +413,12 @@ endif
 	mkdir -p $(DESTDIR)$(datarootdir)/applications/
 	$(INSTALL_F) $(JULIAHOME)/contrib/julia.desktop $(DESTDIR)$(datarootdir)/applications/
 	# Install appdata file
-	mkdir -p $(DESTDIR)$(datarootdir)/appdata/
-	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/appdata/
+	mkdir -p $(DESTDIR)$(datarootdir)/metainfo/
+	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/
+	# Install terminal info database
+ifneq ($(WITH_TERMINFO),0)
+	cp -R -L $(build_datarootdir)/julia/terminfo $(DESTDIR)$(datarootdir)/julia/
+endif
 
 	# Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel)
 ifneq ($(private_libdir_rel),$(build_private_libdir_rel))
@@ -445,7 +475,6 @@ endif
 ifeq ($(OS), Linux)
 	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
 endif
-
 ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
 	# Next, overwrite relative path to libjulia-internal in our loader if $$(LOADER_BUILD_DEP_LIBS) != $$(LOADER_INSTALL_DEP_LIBS)
 ifeq ($(JULIA_BUILD_MODE),release)
@@ -525,7 +554,7 @@ app:
 darwinframework:
 	$(MAKE) -C $(JULIAHOME)/contrib/mac/framework
 
-light-source-dist.tmp: docs
+light-source-dist.tmp: $(BUILDROOT)/doc/_build/html/en/index.html
 ifneq ($(BUILDROOT),$(JULIAHOME))
 	$(error make light-source-dist does not work in out-of-tree builds)
 endif
@@ -583,6 +612,7 @@ clean: | $(CLEAN_TARGETS)
 	@-$(MAKE) -C $(BUILDROOT)/cli clean
 	@-$(MAKE) -C $(BUILDROOT)/test clean
 	@-$(MAKE) -C $(BUILDROOT)/stdlib clean
+	@-$(MAKE) -C $(BUILDROOT) -f pkgimage.mk clean
 	-rm -f $(BUILDROOT)/julia
 	-rm -f $(BUILDROOT)/*.tar.gz
 	-rm -f $(build_depsbindir)/stringreplace \
@@ -627,7 +657,7 @@ testall1: check-whitespace $(JULIA_BUILD_MODE)
 
 test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE
 	@([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \
-		printf '\033[93m    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true
+		printf '$(WARNCOLOR)    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. $(ENDCOLOR)\n') || true
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test $* JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
 test-revise-%: .FORCE
@@ -647,7 +677,7 @@ win-extras:
 ifeq ($(USE_SYSTEM_LLVM), 1)
 LLVM_SIZE := llvm-size$(EXE)
 else
-LLVM_SIZE := $(build_depsbindir)/llvm-size$(EXE)
+LLVM_SIZE := PATH=$(build_bindir):$$PATH; $(build_depsbindir)/llvm-size$(EXE)
 endif
 build-stats:
 ifeq ($(USE_BINARYBUILDER_LLVM),1)
@@ -656,7 +686,14 @@ endif
 	@printf $(JULCOLOR)' ==> ./julia binary sizes\n'$(ENDCOLOR)
 	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_private_libdir)/sys.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_shlibdir)/libjulia.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)) \
+		$(call cygpath_w,$(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_bindir)/julia$(EXE)))
+ifeq ($(OS),Darwin)
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/libLLVM.$(SHLIB_EXT)))
+else
+	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_shlibdir)/$(LLVM_SHARED_LIB_NAME).$(SHLIB_EXT)))
+endif
 	@printf $(JULCOLOR)' ==> ./julia launch speedtest\n'$(ENDCOLOR)
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
diff --git a/NEWS.md b/NEWS.md
index e9ae12847ed29..6c378c8186007 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,162 +1,230 @@
-Julia v1.10 Release Notes
+Julia v1.12 Release Notes
 ========================
 
 New language features
 ---------------------
 
-* JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster
-  parsing. Set environment variable `JULIA_USE_NEW_PARSER` to `0` to switch back to the old
-  parser if necessary (and if you find this necessary, please file an issue) ([#46372]).
-* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`)
-  may now be used as binary operators with arrow precedence. ([#45962])
+- New option `--trim` creates smaller binaries by removing code that was not proven to be reachable from
+  the entry points. Entry points can be marked using `Base.Experimental.entrypoint` ([#55047]).
+- A new keyword argument `usings::Bool` has been added to `names`. By using this, we can now
+  find all the names available in module `A` by `names(A; all=true, imported=true, usings=true)`. ([#54609])
+- the `@atomic(...)` macro family supports now the reference assignment syntax, e.g.
+  `@atomic :monotonic v[3] += 4` modifies `v[3]` atomically with monotonic ordering semantics. ([#54707])
+  The supported syntax allows
+  - atomic fetch (`x = @atomic v[3]`),
+  - atomic set (`@atomic v[3] = 4`),
+  - atomic modify (`@atomic v[3] += 2`),
+  - atomic set once (`@atomiconce v[3] = 2`),
+  - atomic swap (`x = @atomicswap v[3] = 2`), and
+  - atomic replace (`x = @atomicreplace v[3] 2=>5`).
+- New option `--task-metrics=yes` to enable the collection of per-task timing information,
+  which can also be enabled/disabled at runtime with `Base.Experimental.task_metrics(::Bool)`. ([#56320])
+  The available metrics are:
+  - actual running time for the task (`Base.Experimental.task_running_time_ns`), and
+  - wall-time for the task (`Base.Experimental.task_wall_time_ns`).
+- Support for Unicode 16 ([#56925]).
 
 Language changes
 ----------------
 
-* When a task forks a child, the parent task's task-local RNG (random number generator) is no longer affected. The seeding of child based on the parent task also takes a more disciplined approach to collision resistance, using a design based on the SplitMix and DotMix splittable RNG schemes ([#49110]).
-* A new more-specific rule for methods resolves ambiguities containing Union{} in favor of
-  the method defined explicitly to handle the Union{} argument. This makes it possible to
-  define methods to explicitly handle Union{} without the ambiguities that commonly would
-  result previously. This also lets the runtime optimize certain method lookups in a way
-  that significantly improves load and inference times for heavily overloaded methods that
-  dispatch on Types (such as traits and constructors).
-* The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127).
-* The `@simd` macro now has a more limited and clearer semantics, it only enables reordering and contraction
-  of floating-point operations, instead of turning on all "fastmath" optimizations.
-  If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`,
-  if you are OK with all the optimizations enabled by the `@fastmath` macro. ([#49405])
-* When a method with keyword arguments is displayed in the stack trace view, the textual
-  representation of the keyword arguments' types is simplified using the new
-  `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]).
+ - When methods are replaced with exactly equivalent ones, the old method is no
+   longer deleted implicitly simultaneously, although the new method does take
+   priority and become more specific than the old method. Thus if the new
+   method is deleted later, the old method will resume operating. This can be
+   useful to mocking frameworks (such as in SparseArrays, Pluto, and Mocking,
+   among others), as they do not need to explicitly restore the old method.
+   While inference and compilation still must be repeated with this, it also
+   may pave the way for inference to be able to intelligently re-use the old
+   results, once the new method is deleted. ([#53415])
+
+ - Macro expansion will no longer eagerly recurse into `Expr(:toplevel)`
+   expressions returned from macros. Instead, macro expansion of `:toplevel`
+   expressions will be delayed until evaluation time. This allows a later
+   expression within a given `:toplevel` expression to make use of macros
+   defined earlier in the same `:toplevel` expression. ([#53515])
+
+ - Trivial infinite loops (like `while true; end`) are no longer undefined
+   behavior. Infinite loops that actually do things (e.g. have side effects
+   or sleep) were never and are still not undefined behavior. ([#52999])
+
+ - It is now an error to mark a symbol as both `public` and `export`ed.
+   ([#53664])
 
 Compiler/Runtime improvements
 -----------------------------
 
-* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
-* The mark phase of the Garbage Collector is now multi-threaded ([#48600]).
-* [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]).
-  This should resolve many segmentation faults previously observed on this platform.
+- Generated LLVM IR now uses actual pointer types instead of passing pointers as integers.
+  This affects `llvmcall`: Inline LLVM IR should be updated to use `i8*` or `ptr` instead of
+  `i32` or `i64`, and remove unneeded `ptrtoint`/`inttoptr` conversions. For compatibility,
+  IR with integer pointers is still supported, but generates a deprecation warning. ([#53687])
+
+- A new exception `FieldError` is now introduced to raise/handle `getfield` exceptions. Previously `getfield` exception was captured by fallback generic exception `ErrorException`. Now that `FieldError` is more specific `getfield` related exceptions that can occur should use `FieldError` exception instead. ([#54504])
 
 Command-line option changes
 ---------------------------
 
-* New option `--gcthreads` to set how many threads will be used by the Garbage Collector ([#48600]).
-  The default is set to `N/2` where `N` is the amount of worker threads (`--threads`) used by Julia.
+* The `-m/--module` flag can be passed to run the `main` function inside a package with a set of arguments.
+  This `main` function should be declared using `@main` to indicate that it is an entry point. ([#52103])
+* Enabling or disabling color text in Julia can now be controlled with the
+  [`NO_COLOR`](https://no-color.org/) or [`FORCE_COLOR`](https://force-color.org/) environment
+  variables. These variables are also honored by Julia's build system ([#53742], [#56346]).
+* `--project=@temp` starts Julia with a temporary environment. ([#51149])
+* New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took
+  to compile, in ms. ([#54662])
+* `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763])
+* New `--trace-dispatch` option to report methods that are dynamically dispatched ([#55848]).
 
 Multi-threading changes
 -----------------------
 
+* New types are defined to handle the pattern of code that must run once per process, called
+  a `OncePerProcess{T}` type, which allows defining a function that should be run exactly once
+  the first time it is called, and then always return the same result value of type `T`
+  every subsequent time afterwards. There are also `OncePerThread{T}` and `OncePerTask{T}` types for
+  similar usage with threads or tasks. ([#55793])
 
 Build system changes
 --------------------
 
+* There are new `Makefile`s to build Julia and LLVM using the Binary Optimization and Layout Tool (BOLT), see  `contrib/bolt` and `contrib/pgo-lto-bolt` ([#54107]).
 
 New library functions
 ---------------------
-* `tanpi` is now defined. It computes tan(πx) more accurately than `tan(pi*x)` ([#48575]).
-* `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`.
-   It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot<tab>` ([#48899]).
-* `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls.
-* `Base.isprecompiled(pkg::PkgId)` to identify whether a package has already been precompiled ([#50218]).
+
+* `logrange(start, stop; length)` makes a range of constant ratio, instead of constant step ([#39071])
+* The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block. ([#53159])
+* `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait multiple tasks at once ([#53341]).
+* `uuid7()` creates an RFC 9652 compliant UUID with version 7 ([#54834]).
+* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims`. ([#45793])
+* The new `Fix` type is a generalization of `Fix1/Fix2` for fixing a single argument ([#54653]).
 
 New library features
 --------------------
-* The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!`
-  is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
-* `binomial(x, k)` now supports non-integer `x` ([#48124]).
-* A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
-* `printstyled` now supports italic output ([#45164]).
-* `parent` and `parentindices` support `SubString`s
+
+* `escape_string` takes additional keyword arguments `ascii=true` (to escape all
+  non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers
+  for u/U escapes, e.g. for C compatibility) ([#55099]).
+* `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
+  the uniquing checking ([#53474])
+* `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988])
+* `Lockable` is now exported ([#54595])
+* `Base.require_one_based_indexing` and `Base.has_offset_axes` are now public ([#56196])
+* New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351])
+* `isless` (and thus `cmp`, sorting, etc.) is now supported for zero-dimensional `AbstractArray`s ([#55772])
+* `invoke` now supports passing a Method instead of a type signature making this interface somewhat more flexible for certain uncommon use cases ([#56692]).
+* `Timer(f, ...)` will now match the stickiness of the parent task when creating timer tasks, which can be overridden
+  by the new `spawn` kwarg. This avoids the issue where sticky tasks i.e. `@async` make their parent sticky ([#56745])
+* `invoke` now supports passing a CodeInstance instead of a type, which can enable
+certain compiler plugin workflows ([#56660]).
+* `sort` now supports `NTuple`s ([#54494])
+* `map!(f, A)` now stores the results in `A`, like `map!(f, A, A)`. or `A .= f.(A)` ([#40632]).
 
 Standard library changes
 ------------------------
 
-* `startswith` now supports seekable `IO` streams ([#43055])
-* printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in `Arrays`) ([#45396])
+* `gcdx(0, 0)` now returns `(0, 0, 0)` instead of `(0, 1, 0)` ([#40989]).
+* `fd` returns a `RawFD` instead of an `Int` ([#55080]).
 
-#### Package Manager
+#### StyledStrings
 
-* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`)
+#### JuliaSyntaxHighlighting
+
+* A new standard library for applying syntax highlighting to Julia code, this
+  uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function
+  that creates an `AnnotatedString` with syntax highlighting applied. ([#51810])
+
+#### Package Manager
 
 #### LinearAlgebra
 
-* `AbstractQ` no longer subtypes to `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
-  no longer wraps `Q` in an `Adjoint` type, but instead in an `AdjointQ`, that itself
-  subtypes `AbstractQ`. This change accounts for the fact that typically `AbstractQ`
-  instances behave like function-based, matrix-backed linear operators, and hence don't
-  allow for efficient indexing. Also, many `AbstractQ` types can act on vectors/matrices
-  of different size, acting like a matrix with context-dependent size. With this change,
-  `AbstractQ` has a well-defined API that is described in detail in the
-  [Julia documentation](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/#man-linalg-abstractq)
-  ([#46196]).
-* Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
-  and `Transpose` wrappers, respectively. Instead, they are wrapped in
-  `AdjointFactorization` and `TranposeFactorization` types, which themselves subtype
-  `Factorization` ([#46874]).
-* New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
-  (real symmetric) part of a matrix ([#31836]).
-* The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the
-  parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]).
-* `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian,
-  are now fully supported ([#49533])
-* `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors)
-  of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is
-  the output of `cholesky`.
+* `rank` can now take a `QRPivoted` matrix to allow rank estimation via QR factorization ([#54283]).
+* Added keyword argument `alg` to `eigen`, `eigen!`, `eigvals` and `eigvals!` for self-adjoint
+  matrix types (i.e., the type union `RealHermSymComplexHerm`) that allows one to switch
+  between different eigendecomposition algorithms ([#49355]).
+* Added a generic version of the (unblocked) pivoted Cholesky decomposition
+  (callable via `cholesky[!](A, RowMaximum())`) ([#54619]).
+* The number of default BLAS threads now respects process affinity, instead of
+  using total number of logical threads available on the system ([#55574]).
+* A new function `zeroslike` is added that is used to generate the zero elements for matrix-valued banded matrices.
+  Custom array types may specialize this function to return an appropriate result ([#55252]).
+* The matrix multiplication `A * B` calls `matprod_dest(A, B, T::Type)` to generate the destination.
+  This function is now public ([#55537]).
+* The function `haszero(T::Type)` is used to check if a type `T` has a unique zero element defined as `zero(T)`.
+  This is now public ([#56223]).
+* A new function `diagview` is added that returns a view into a specific band of an `AbstractMatrix` ([#56175]).
+
+#### Logging
 
 #### Printf
-* Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]).
 
 #### Profile
 
+* `Profile.take_heap_snapshot` takes a new keyword argument, `redact_data::Bool`,
+  that is `true` by default. When set, the contents of Julia objects are not emitted
+  in the heap snapshot. This currently only applies to strings. ([#55326])
+* `Profile.print()` now colors Base/Core/Package modules similarly to how they are in stacktraces.
+  Also paths, even if truncated, are now clickable in terminals that support URI links
+  to take you to the specified `JULIA_EDITOR` for the given file & line number. ([#55335])
 
 #### Random
 
-
 #### REPL
 
-* When stack traces are printed, the printed depth of types in function signatures will be limited
-  to avoid overly verbose output ([#49795]).
+- Using the new `usings=true` feature of the `names()` function, REPL completions can now
+  complete names that have been explicitly `using`-ed. ([#54610])
+- REPL completions can now complete input lines like `[import|using] Mod: xxx|` e.g.
+  complete `using Base.Experimental: @op` to `using Base.Experimental: @opaque`. ([#54719])
+- the REPL will now warn if it detects a name is being accessed from a module which does not define it (nor has a submodule which defines it),
+  and for which the name is not public in that module. For example, `map` is defined in Base, and executing `LinearAlgebra.map`
+  in the REPL will now issue a warning the first time occurs. ([#54872])
+- When an object is printed automatically (by being returned in the REPL), its display is now truncated after printing 20 KiB.
+  This does not affect manual calls to `show`, `print`, and so forth. ([#53959])
+- Backslash completions now print the respective glyph or emoji next to each matching backslash shortcode. ([#54800])
 
 #### SuiteSparse
 
-
 #### SparseArrays
 
-
 #### Test
 
-
-* The `@test_broken` macro (or `@test` with `broken=true`) now complains if the test expression returns a
-  non-boolean value in the same way as a non-broken test. ([#47804])
-* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the test within a `@testset` can be retrieved ([#49451])
+* A failing `DefaultTestSet` now prints to screen the random number generator (RNG) of the failed test, to help reproducing a stochastic failure which only depends on the state of the RNG.
+  It is also possible seed a test set by passing the `rng` keyword argument to `@testset`:
+  ```julia
+  using Test, Random
+  @testset rng=Xoshiro(0x2e026445595ed28e, 0x07bb81ac4c54926d, 0x83d7d70843e8bad6, 0xdbef927d150af80b, 0xdbf91ddf2534f850) begin
+      @test rand() == 0.559472630416976
+  end
+  ```
 
 #### Dates
 
+#### Statistics
 
 #### Distributed
 
-
 #### Unicode
 
-
 #### DelimitedFiles
 
-
 #### InteractiveUtils
 
- * `code_native` and `@code_native` now default to intel syntax instead of AT&T.
- * `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529])
+* New macros `@trace_compile` and `@trace_dispatch` for running an expression with
+  `--trace-compile=stderr --trace-compile-timing` and `--trace-dispatch=stderr` respectively enabled.
+  ([#55915])
 
 Deprecated or removed
 ---------------------
 
-
 External dependencies
 ---------------------
 
+- The terminal info database, `terminfo`, is now vendored by default, providing a better
+  REPL user experience when `terminfo` is not available on the system. Julia can be built
+  without vendoring the database using the Makefile option `WITH_TERMINFO=0`. ([#55411])
 
 Tooling Improvements
 --------------------
 
+- A wall-time profiler is now available for users who need a sampling profiler that captures tasks regardless of their scheduling or running state. This type of profiler enables profiling of I/O-heavy tasks and helps detect areas of heavy contention in the system ([#55889]).
 
 <!--- generated by NEWS-update.jl: -->
diff --git a/README.md b/README.md
index a4480ecf482cd..021322336d286 100644
--- a/README.md
+++ b/README.md
@@ -35,8 +35,8 @@
 ## The Julia Language
 
 Julia is a high-level, high-performance dynamic language for technical
-computing.  The main homepage for Julia can be found at
-[julialang.org](https://julialang.org/).  This is the GitHub
+computing. The main homepage for Julia can be found at
+[julialang.org](https://julialang.org/). This is the GitHub
 repository of Julia source code, including instructions for compiling
 and installing Julia, below.
 
@@ -48,6 +48,7 @@ and installing Julia, below.
 - **Documentation:** <https://docs.julialang.org>
 - **Packages:** <https://julialang.org/packages/>
 - **Discussion forum:** <https://discourse.julialang.org>
+- **Zulip:** <https://julialang.zulipchat.com/>
 - **Slack:** <https://julialang.slack.com> (get an invite from <https://julialang.org/slack/>)
 - **YouTube:** <https://www.youtube.com/user/JuliaLanguage>
 - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
@@ -56,10 +57,8 @@ New developers may find the notes in
 [CONTRIBUTING](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md)
 helpful to start contributing to the Julia codebase.
 
-### External Resources
+### Learning Julia
 
-- [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
-- [**Twitter**](https://twitter.com/JuliaLanguage)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
@@ -73,10 +72,10 @@ for OS and platform combinations.
 
 If everything works correctly, you will see a Julia banner and an
 interactive prompt into which you can enter expressions for
-evaluation.  You can read about [getting
+evaluation. You can read about [getting
 started](https://docs.julialang.org/en/v1/manual/getting-started/) in the manual.
 
-**Note**: Although some system package managers provide Julia, such
+**Note**: Although some OS package managers provide Julia, such
 installations are neither maintained nor endorsed by the Julia
 project. They may be outdated, broken and/or unmaintained. We
 recommend you use the official Julia binaries instead.
@@ -89,11 +88,11 @@ Then, acquire the source code by cloning the git repository:
 
     git clone https://github.com/JuliaLang/julia.git
 
-and then use the command prompt to change into the resulting julia directory. By default you will be building the latest unstable version of
+and then use the command prompt to change into the resulting julia directory. By default, you will be building the latest unstable version of
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.9.0
+    git checkout v1.11.2
 
 To build the `julia` executable, run `make` from within the julia directory.
 
@@ -116,7 +115,7 @@ started](https://docs.julialang.org/en/v1/manual/getting-started/)
 in the manual.
 
 Detailed build instructions, should they be necessary,
-are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/).
+are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/build.md).
 
 ### Uninstalling Julia
 
@@ -131,14 +130,14 @@ The Julia source code is organized as follows:
 | Directory         | Contents                                                           |
 | -                 | -                                                                  |
 | `base/`           | source code for the Base module (part of Julia's standard library) |
-| `stdlib/`         | source code for other standard library packages                    |
 | `cli/`            | source for the command line interface/REPL                         |
 | `contrib/`        | miscellaneous scripts                                              |
 | `deps/`           | external dependencies                                              |
 | `doc/src/`        | source for the user manual                                         |
+| `etc/`            | contains `startup.jl`                                              |
 | `src/`            | source for Julia language core                                     |
+| `stdlib/`         | source code for other standard library packages                    |
 | `test/`           | test suites                                                        |
-| `usr/`            | binaries and shared libraries loaded by Julia's standard libraries |
 
 ## Terminal, Editors and IDEs
 
@@ -146,7 +145,7 @@ The Julia REPL is quite powerful. See the section in the manual on
 [the Julia REPL](https://docs.julialang.org/en/v1/stdlib/REPL/)
 for more details.
 
-On Windows we highly recommend running Julia in a modern terminal,
+On Windows, we highly recommend running Julia in a modern terminal,
 such as [Windows Terminal from the Microsoft Store](https://aka.ms/terminal).
 
 Support for editing Julia is available for many
@@ -157,7 +156,7 @@ Support for editing Julia is available for many
 others.
 
 For users who prefer IDEs, we recommend using VS Code with the
-[julia-vscode](https://www.julia-vscode.org/) plugin.
+[julia-vscode](https://www.julia-vscode.org/) plugin.\
 For notebook users, [Jupyter](https://jupyter.org/) notebook support is available through the
 [IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
 the [Pluto.jl](https://github.com/fonsp/Pluto.jl) package provides Pluto notebooks.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 51950d9e2c6a1..f3f59ca4ff3f7 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -1,12 +1,11 @@
 The Julia language is licensed under the MIT License (see [LICENSE.md](./LICENSE.md) ). The "language" consists
-of the compiler (the contents of src/), most of the standard library (base/),
+of the compiler (the contents of `src/`), most of the standard library (`base/` and `stdlib/`),
 and some utilities (most of the rest of the files in this repository). See below
 for exceptions.
 
 - [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
 - [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
 - [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
-- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
 - [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
 - [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
 - [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
@@ -26,7 +25,8 @@ own licenses:
 
 and optionally:
 
-- [ITTAPI](https://github.com/intel/ittapi/blob/master/LICENSES/BSD-3-Clause.txt) [BSD-3]
+- [LibTracyClient](https://github.com/wolfpld/tracy/blob/master/LICENSE) [BSD-3]
+- [ITTAPI](https://github.com/intel/ittapi/tree/master/LICENSES) [BSD-3 AND GPL2]
 
 Julia's `stdlib` uses the following external libraries, which have their own licenses:
 
@@ -36,19 +36,31 @@ Julia's `stdlib` uses the following external libraries, which have their own lic
 - [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
 - [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
 - [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://github.com/ARMmbed/mbedtls/blob/development/LICENSE) [Apache 2.0]
+- [OPENSSL](https://www.openssl.org/source/license.html) [Apache 2.0]
 - [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
 - [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
 - [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
 - [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of BSD-3-Clause, LGPL2.1+ and GPL2+; see individual module licenses]
+  - [`libamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/AMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libccolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CCOLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libcolamd`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/COLAMD/Doc/License.txt) [BSD-3-Clause]
+  - [`libsuitesparseconfig`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SuiteSparse_config/README.txt) [BSD-3-Clause]
+  - [`libbtf`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/BTF/Doc/License.txt) [LGPL-2.1+]
+  - [`libklu`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/KLU/Doc/License.txt) [LGPL-2.1+]
+  - [`libldl`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/LDL/Doc/License.txt) [LGPL-2.1+]
+  - [`libcholmod`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/CHOLMOD/Doc/License.txt) [LGPL-2.1+ and GPL-2.0+]
+  - [`librbio`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/RBio/Doc/License.txt) [GPL-2.0+]
+  - [`libspqr`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/SPQR/Doc/License.txt) [GPL-2.0+]
+  - [`libumfpack`](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/dev/UMFPACK/Doc/License.txt) [GPL-2.0+]
 - [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
 - [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
 
 Julia's build process uses the following external tools:
 
-- [PATCHELF](https://nixos.org/patchelf.html)
-- [OBJCONV](https://www.agner.org/optimize/#objconv)
+- [PATCHELF](https://github.com/NixOS/patchelf/blob/master/COPYING) [GPL3]
+- [OBJCONV](https://www.agner.org/optimize/#objconv) [GPL3]
 - [LIBWHICH](https://github.com/vtjnash/libwhich/blob/master/LICENSE) [MIT]
 
 Julia bundles the following external programs and libraries:
diff --git a/VERSION b/VERSION
index 86a15e0570c4a..f6c25a3020b69 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.10.0-DEV
+1.12.0-DEV
diff --git a/base/Base.jl b/base/Base.jl
index 1fc20293aa384..20b1636c29a8d 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -1,169 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-baremodule Base
-
-using Core.Intrinsics, Core.IR
-
-# to start, we're going to use a very simple definition of `include`
-# that doesn't require any function (except what we can get from the `Core` top-module)
-const _included_files = Array{Tuple{Module,String},1}(Core.undef, 1)
-function include(mod::Module, path::String)
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), _included_files, UInt(1))
-    Core.arrayset(true, _included_files, (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), arraylen(_included_files))
-    Core.println(path)
-    ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
-    Core.include(mod, path)
-end
-include(path::String) = include(Base, path)
-
-# from now on, this is now a top-module for resolving syntax
-const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
-ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
-
-# The @inline/@noinline macros that can be applied to a function declaration are not available
-# until after array.jl, and so we will mark them within a function body instead.
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
-# Try to help prevent users from shooting them-selves in the foot
-# with ambiguities by defining a few common and critical operations
-# (and these don't need the extra convert code)
-getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
-getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
-setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
-setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
-
-getproperty(x, f::Symbol) = (@inline; getfield(x, f))
-function setproperty!(x, f::Symbol, v)
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return setfield!(x, f, val)
-end
-
-dotgetproperty(x, f) = getproperty(x, f)
-
-getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
-function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
-    @inline
-    ty = Core.get_binding_type(x, f)
-    val = v isa ty ? v : convert(ty, v)
-    return setglobal!(x, f, val, order)
-end
-getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
-getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
-
-getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-function setproperty!(x, f::Symbol, v, order::Symbol)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return setfield!(x, f, val, order)
-end
-
-function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = v isa ty ? v : convert(ty, v)
-    return Core.swapfield!(x, f, val, order)
-end
-function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
-    @inline
-    return Core.modifyfield!(x, f, op, v, order)
-end
-function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
-    @inline
-    ty = fieldtype(typeof(x), f)
-    val = desired isa ty ? desired : convert(ty, desired)
-    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
-end
-
-convert(::Type{Any}, Core.@nospecialize x) = x
-convert(::Type{T}, x::T) where {T} = x
-include("coreio.jl")
-
-eval(x) = Core.eval(Base, x)
-eval(m::Module, x) = Core.eval(m, x)
-
-# init core docsystem
-import Core: @doc, @__doc__, WrappedException, @int128_str, @uint128_str, @big_str, @cmd
-if isdefined(Core, :Compiler)
-    import Core.Compiler.CoreDocs
-    Core.atdoc!(CoreDocs.docm)
-end
-
-include("exports.jl")
+const start_base_include = time_ns()
 
-if false
-    # simple print definitions for debugging. enable these if something
-    # goes wrong during bootstrap before printing code is available.
-    # otherwise, they just just eventually get (noisily) overwritten later
-    global show, print, println
-    show(io::IO, x) = Core.show(io, x)
-    print(io::IO, a...) = Core.print(io, a...)
-    println(io::IO, x...) = Core.println(io, x...)
-end
-
-"""
-    time_ns() -> UInt64
-
-Get the time in nanoseconds. The time corresponding to 0 is undefined, and wraps every 5.8 years.
-"""
-time_ns() = ccall(:jl_hrtime, UInt64, ())
-
-start_base_include = time_ns()
-
-## Load essential files and libraries
-include("essentials.jl")
-include("ctypes.jl")
-include("gcutils.jl")
-include("generator.jl")
 include("reflection.jl")
-include("options.jl")
-
-# define invoke(f, T, args...; kwargs...), without kwargs wrapping
-# to forward to invoke
-function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
-    @inline
-    # prepend kwargs and f to the invoked from the user
-    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
-    return invoke(Core.kwcall, T, kwargs, f, args...)
-end
-# invoke does not have its own call cache, but kwcall for invoke does
-setfield!(typeof(invoke).name.mt, :max_args, 3, :monotonic) # invoke, f, T, args...
-
-# define applicable(f, T, args...; kwargs...), without kwargs wrapping
-# to forward to applicable
-function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
-    @inline
-    return applicable(Core.kwcall, kwargs, args...)
-end
-function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
-    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
-    return Core._hasmethod(tt)
-end
-
-
-# core operations & types
-include("promotion.jl")
-include("tuple.jl")
-include("expr.jl")
-include("pair.jl")
-include("traits.jl")
-include("range.jl")
-include("error.jl")
-
-# core numeric operations & types
-==(x, y) = x === y
-include("bool.jl")
-include("number.jl")
-include("int.jl")
-include("operators.jl")
-include("pointer.jl")
-include("refvalue.jl")
-include("cmem.jl")
 include("refpointer.jl")
 
 # now replace the Pair constructor (relevant for NamedTuples) with one that calls our Base.convert
@@ -174,55 +13,27 @@ delete_method(which(Pair{Any,Any}, (Any, Any)))
 end
 
 # The REPL stdlib hooks into Base using this Ref
-const REPL_MODULE_REF = Ref{Module}()
-
-include("checked.jl")
-using .Checked
-function cld end
-function fld end
-
-# Lazy strings
-include("strings/lazy.jl")
-
-# array structures
-include("indices.jl")
-include("array.jl")
-include("abstractarray.jl")
-include("subarray.jl")
-include("views.jl")
-include("baseext.jl")
-
-include("ntuple.jl")
-
-include("abstractdict.jl")
-include("iddict.jl")
-include("idset.jl")
-
-include("iterators.jl")
-using .Iterators: zip, enumerate, only
-using .Iterators: Flatten, Filter, product  # for generators
-using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
-
-include("namedtuple.jl")
-
-# For OS specific stuff
-# We need to strcat things here, before strings are really defined
-function strcat(x::String, y::String)
-    out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), Core.sizeof(x) + Core.sizeof(y))
-    GC.@preserve x y out begin
-        out_ptr = unsafe_convert(Ptr{UInt8}, out)
-        unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
-        unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+const REPL_MODULE_REF = Ref{Module}(Base)
+process_sysimg_args!()
+
+include(strcat(BUILDROOT, "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
+include(strcat(BUILDROOT, "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+
+# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
+# a slightly more verbose fashion than usual, because we're running so early.
+const DL_LOAD_PATH = String[]
+let os = ccall(:jl_get_UNAME, Any, ())
+    if os === :Darwin || os === :Apple
+        if Base.DARWIN_FRAMEWORK
+            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
+        end
+        push!(DL_LOAD_PATH, "@loader_path")
     end
-    return out
 end
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
-include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
 
 # numeric operations
 include("hashing.jl")
 include("rounding.jl")
-using .Rounding
 include("div.jl")
 include("float.jl")
 include("twiceprecision.jl")
@@ -245,13 +56,6 @@ include("reduce.jl")
 ## core structures
 include("reshapedarray.jl")
 include("reinterpretarray.jl")
-include("bitarray.jl")
-include("bitset.jl")
-
-if !isdefined(Core, :Compiler)
-    include("docs/core.jl")
-    Core.atdoc!(CoreDocs.docm)
-end
 
 include("multimedia.jl")
 using .Multimedia
@@ -260,34 +64,39 @@ using .Multimedia
 include("some.jl")
 
 include("dict.jl")
-include("abstractset.jl")
 include("set.jl")
 
 # Strings
 include("char.jl")
+function array_new_memory(mem::Memory{UInt8}, newlen::Int)
+    # add an optimization to array_new_memory for StringVector
+    if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) === mem
+        # TODO: when implemented, this should use a memory growing call
+        return typeof(mem)(undef, newlen)
+    else
+        # If data is in a String, keep it that way.
+        # When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization
+        str = _string_n(newlen)
+        return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8})
+    end
+end
 include("strings/basic.jl")
 include("strings/string.jl")
 include("strings/substring.jl")
-
-# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
-# a slightly more verbose fashion than usual, because we're running so early.
-const DL_LOAD_PATH = String[]
-let os = ccall(:jl_get_UNAME, Any, ())
-    if os === :Darwin || os === :Apple
-        if Base.DARWIN_FRAMEWORK
-            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        end
-        push!(DL_LOAD_PATH, "@loader_path")
-    end
-end
+include("strings/cstring.jl")
 
 include("osutils.jl")
-include("c.jl")
 
 # Core I/O
 include("io.jl")
 include("iobuffer.jl")
 
+# Concurrency (part 1)
+include("linked_list.jl")
+include("condition.jl")
+include("threads.jl")
+include("lock.jl")
+
 # strings & printing
 include("intfuncs.jl")
 include("strings/strings.jl")
@@ -324,21 +133,25 @@ using .Libc: getpid, gethostname, time, memcpy, memset, memmove, memcmp
 const libblas_name = "libblastrampoline" * (Sys.iswindows() ? "-5" : "")
 const liblapack_name = libblas_name
 
-# Logging
-include("logging.jl")
-using .CoreLogging
-
-# Concurrency
-include("linked_list.jl")
-include("condition.jl")
-include("threads.jl")
-include("lock.jl")
+# Concurrency (part 2)
+# Note that `atomics.jl` here should be deprecated
+Core.eval(Threads, :(include("atomics.jl")))
 include("channels.jl")
 include("partr.jl")
 include("task.jl")
 include("threads_overloads.jl")
 include("weakkeydict.jl")
 
+# ScopedValues
+include("scopedvalues.jl")
+
+# metaprogramming
+include("meta.jl")
+
+# Logging
+include("logging/logging.jl")
+using .CoreLogging
+
 include("env.jl")
 
 # functions defined in Random
@@ -354,7 +167,7 @@ include("filesystem.jl")
 using .Filesystem
 include("cmd.jl")
 include("process.jl")
-include("ttyhascolor.jl")
+include("terminfo.jl")
 include("secretbuffer.jl")
 
 # core math functions
@@ -388,10 +201,6 @@ include("accumulate.jl")
 include("permuteddimsarray.jl")
 using .PermutedDimsArrays
 
-# basic data structures
-include("ordering.jl")
-using .Order
-
 # Combinatorics
 include("sort.jl")
 using .Sort
@@ -428,9 +237,6 @@ include("irrationals.jl")
 include("mathconstants.jl")
 using .MathConstants: ℯ, π, pi
 
-# metaprogramming
-include("meta.jl")
-
 # Stack frames and traces
 include("stacktraces.jl")
 using .StackTraces
@@ -443,8 +249,10 @@ include("deepcopy.jl")
 include("download.jl")
 include("summarysize.jl")
 include("errorshow.jl")
+include("util.jl")
 
 include("initdefs.jl")
+Filesystem.__postinit__()
 
 # worker threads
 include("threadcall.jl")
@@ -454,11 +262,11 @@ include("uuid.jl")
 include("pkgid.jl")
 include("toml_parser.jl")
 include("linking.jl")
+include("staticdata.jl")
 include("loading.jl")
 
 # misc useful functions & macros
 include("timing.jl")
-include("util.jl")
 include("client.jl")
 include("asyncmap.jl")
 
@@ -471,14 +279,18 @@ include("docs/basedocs.jl")
 # Documentation -- should always be included last in sysimg.
 include("docs/Docs.jl")
 using .Docs
-if isdefined(Core, :Compiler) && is_primary_base_module
-    Docs.loaddocs(Core.Compiler.CoreDocs.DOCS)
-end
+Docs.loaddocs(CoreDocs.DOCS)
+@eval CoreDocs DOCS = DocLinkedList()
+
+include("precompilation.jl")
 
 # finally, now make `include` point to the full version
 for m in methods(include)
     delete_method(m)
 end
+for m in methods(IncludeInto(Base))
+    delete_method(m)
+end
 
 # This method is here only to be overwritten during the test suite to test
 # various sysimg related invalidation scenarios.
@@ -486,86 +298,34 @@ a_method_to_overwrite_in_test() = inferencebarrier(1)
 
 # These functions are duplicated in client.jl/include(::String) for
 # nicer stacktraces. Modifications here have to be backported there
-include(mod::Module, _path::AbstractString) = _include(identity, mod, _path)
-include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path)
+@noinline include(mod::Module, _path::AbstractString) = _include(identity, mod, _path)
+@noinline include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path)
+(this::IncludeInto)(fname::AbstractString) = include(identity, this.m, fname)
+(this::IncludeInto)(mapexpr::Function, fname::AbstractString) = include(mapexpr, this.m, fname)
+
+# Compatibility with when Compiler was in Core
+@eval Core const Compiler = Main.Base.Compiler
+@eval Compiler const fl_parse = Core.Main.Base.fl_parse
 
 # External libraries vendored into Base
 Core.println("JuliaSyntax/src/JuliaSyntax.jl")
-include(@__MODULE__, "JuliaSyntax/src/JuliaSyntax.jl")
+include(@__MODULE__, string(BUILDROOT, "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl)
 
 end_base_include = time_ns()
 
 const _sysimage_modules = PkgId[]
 in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
 
-# Precompiles for Revise and other packages
-# TODO: move these to contrib/generate_precompile.jl
-# The problem is they don't work there
-for match = _methods(+, (Int, Int), -1, get_world_counter())
-    m = match.method
-    delete!(push!(Set{Method}(), m), m)
-    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
-
-    empty!(Set())
-    push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-    (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-    (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-    (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-    (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-    (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-    Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-    Dict(Base => [:(1+1)])[Base]
-    Dict(:one => [1])[:one]
-    Dict("abc" => Set())["abc"]
-    pushfirst!([], sum)
-    get(Base.pkgorigins, Base.PkgId(Base), nothing)
-    sort!([1,2,3])
-    unique!([1,2,3])
-    cumsum([1,2,3])
-    append!(Int[], BitSet())
-    isempty(BitSet())
-    delete!(BitSet([1,2]), 3)
-    deleteat!(Int32[1,2,3], [1,3])
-    deleteat!(Any[1,2,3], [1,3])
-    Core.svec(1, 2) == Core.svec(3, 4)
-    any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
-
-    # Code loading uses this
-    sortperm(mtime.(readdir(".")), rev=true)
-    # JLLWrappers uses these
-    Dict{UUID,Set{String}}()[UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
-    get!(Set{String}, Dict{UUID,Set{String}}(), UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
-    eachindex(IndexLinear(), Expr[])
-    push!(Expr[], Expr(:return, false))
-    vcat(String[], String[])
-    k, v = (:hello => nothing)
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
-    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
-    # Preferences uses these
-    precompile(get_preferences, (UUID,))
-    precompile(record_compiletime_preference, (UUID, String))
-    get(Dict{String,Any}(), "missing", nothing)
-    delete!(Dict{String,Any}(), "missing")
-    for (k, v) in Dict{String,Any}()
-        println(k)
-    end
-
-    break   # only actually need to do this once
-end
-
 if is_primary_base_module
 
 # Profiling helper
 # triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
 # Needs to be in Base because Profile is no longer loaded on boot
-const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
-function profile_printing_listener()
+function profile_printing_listener(cond::Base.AsyncCondition)
     profile = nothing
     try
-        while true
-            wait(PROFILE_PRINT_COND[])
-            profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))
-
+        while _trywait(cond)
+            profile = @something(profile, require_stdlib(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))::Module
             invokelatest(profile.peek_report[])
             if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
                 println(stderr, "Saving heap snapshot...")
@@ -578,10 +338,32 @@ function profile_printing_listener()
             @error "Profile printing listener crashed" exception=ex,catch_backtrace()
         end
     end
+    nothing
+end
+
+function start_profile_listener()
+    cond = Base.AsyncCondition()
+    Base.uv_unref(cond.handle)
+    t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
+    atexit() do
+        # destroy this callback when exiting
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+        # this will prompt any ongoing or pending event to flush also
+        close(cond)
+        # error-propagation is not needed, since the errormonitor will handle printing that better
+        t === current_task() || _wait(t)
+    end
+    finalizer(cond) do c
+        # if something goes south, still make sure we aren't keeping a reference in C to this
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+    end
+    ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
 end
 
 function __init__()
     # Base library init
+    global _atexit_hooks_finished = false
+    Filesystem.__postinit__()
     reinit_stdio()
     Multimedia.reinit_displays() # since Multimedia.displays uses stdout as fallback
     # initialize loading
@@ -589,34 +371,54 @@ function __init__()
     init_load_path()
     init_active_project()
     append!(empty!(_sysimage_modules), keys(loaded_modules))
+    empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty
+    for mod in loaded_modules_order
+        push!(get!(Vector{Module}, loaded_precompiles, PkgId(mod)), mod)
+    end
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
     end
     # Profiling helper
     @static if !Sys.iswindows()
         # triggering a profile via signals is not implemented on windows
-        cond = Base.AsyncCondition()
-        Base.uv_unref(cond.handle)
-        PROFILE_PRINT_COND[] = cond
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
-        errormonitor(Threads.@spawn(profile_printing_listener()))
+        start_profile_listener()
     end
     _require_world_age[] = get_world_counter()
     # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
     delete!(ENV, "JULIA_WAIT_FOR_TRACY")
-    if get_bool_env("JULIA_USE_NEW_PARSER", true) === true
+    if get_bool_env("JULIA_USE_FLISP_PARSER", false) === false
         JuliaSyntax.enable_in_core!()
     end
+
+    CoreLogging.global_logger(CoreLogging.ConsoleLogger())
     nothing
 end
 
 # enable threads support
 @eval PCRE PCRE_COMPILE_LOCK = Threads.SpinLock()
 
+# Record dependency information for files belonging to the Compiler, so that
+# we know whether the .ji can just give the Base copy or not.
+# TODO: We may want to do this earlier to avoid TOCTOU issues.
+const _compiler_require_dependencies = Any[]
+for i = 1:length(_included_files)
+    isassigned(_included_files, i) || continue
+    (mod, file) = _included_files[i]
+    if mod === Compiler || parentmodule(mod) === Compiler || endswith(file, "/Compiler.jl")
+        _include_dependency!(_compiler_require_dependencies, true, mod, file, true, false)
+    end
+end
+# Make relative to DATAROOTDIR to allow relocation
+let basedir = joinpath(Sys.BINDIR, DATAROOTDIR)
+for i = 1:length(_compiler_require_dependencies)
+    tup = _compiler_require_dependencies[i]
+    _compiler_require_dependencies[i] = (tup[1], relpath(tup[2], basedir), tup[3:end]...)
+end
+end
+@assert length(_compiler_require_dependencies) >= 15
+
 end
 
 # Ensure this file is also tracked
 @assert !isassigned(_included_files, 1)
 _included_files[1] = (parentmodule(Base), abspath(@__FILE__))
-
-end # baremodule Base
diff --git a/base/Base_compiler.jl b/base/Base_compiler.jl
new file mode 100644
index 0000000000000..db3ebb0232e38
--- /dev/null
+++ b/base/Base_compiler.jl
@@ -0,0 +1,309 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+baremodule Base
+
+using Core.Intrinsics, Core.IR
+
+# to start, we're going to use a very simple definition of `include`
+# that doesn't require any function (except what we can get from the `Core` top-module)
+# start this big so that we don't have to resize before we have defined how to grow an array
+const _included_files = Array{Tuple{Module,String},1}(Core.undef, 400)
+setfield!(_included_files, :size, (1,))
+function include(mod::Module, path::String)
+    len = getfield(_included_files.size, 1)
+    memlen = _included_files.ref.mem.length
+    lenp1 = Core.add_int(len, 1)
+    if len === memlen # by the time this is true we hopefully will have defined _growend!
+        _growend!(_included_files, UInt(1))
+    else
+        setfield!(_included_files, :size, (lenp1,))
+    end
+    Core.memoryrefset!(Core.memoryref(_included_files.ref, lenp1), (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), :not_atomic, true)
+    Core.println(path)
+    ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
+    Core.include(mod, path)
+end
+include(path::String) = include(Base, path)
+
+struct IncludeInto <: Function
+    m::Module
+end
+(this::IncludeInto)(fname::AbstractString) = include(this.m, fname)
+
+# from now on, this is now a top-module for resolving syntax
+const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
+ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
+
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+# Try to help prevent users from shooting them-selves in the foot
+# with ambiguities by defining a few common and critical operations
+# (and these don't need the extra convert code)
+getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
+getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
+setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
+setproperty!(x::Array, f::Symbol, v) = error("setfield! fields of Array should not be changed")
+getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
+setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
+
+getproperty(x, f::Symbol) = (@inline; getfield(x, f))
+function setproperty!(x, f::Symbol, v)
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val)
+end
+
+typeof(function getproperty end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+typeof(function setproperty! end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+
+dotgetproperty(x, f) = getproperty(x, f)
+
+getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
+function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
+    return setglobal!(x, f, val, order)
+end
+getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
+
+getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+function setproperty!(x, f::Symbol, v, order::Symbol)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val, order)
+end
+
+function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapfield!(x, f, val, order)
+end
+function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyfield!(x, f, op, v, order)
+end
+function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
+end
+function setpropertyonce!(x, f::Symbol, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.setfieldonce!(x, f, val, success_order, fail_order)
+end
+
+function swapproperty!(x::Module, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapglobal!(x, f, val, order)
+end
+function modifyproperty!(x::Module, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyglobal!(x, f, op, v, order)
+end
+function replaceproperty!(x::Module, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replaceglobal!(x, f, expected, val, success_order, fail_order)
+end
+function setpropertyonce!(x::Module, f::Symbol, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = Core.get_binding_type(x, f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.setglobalonce!(x, f, val, success_order, fail_order)
+end
+
+convert(::Type{Any}, Core.@nospecialize x) = x
+convert(::Type{T}, x::T) where {T} = x
+include("coreio.jl")
+
+import Core: @doc, @__doc__, WrappedException, @int128_str, @uint128_str, @big_str, @cmd
+
+# core docsystem
+include("docs/core.jl")
+Core.atdoc!(CoreDocs.docm)
+
+eval(x) = Core.eval(Base, x)
+eval(m::Module, x) = Core.eval(m, x)
+
+include("exports.jl")
+include("public.jl")
+
+if false
+    # simple print definitions for debugging. enable these if something
+    # goes wrong during bootstrap before printing code is available.
+    # otherwise, they just just eventually get (noisily) overwritten later
+    global show, print, println
+    show(io::IO, x) = Core.show(io, x)
+    print(io::IO, a...) = Core.print(io, a...)
+    println(io::IO, x...) = Core.println(io, x...)
+end
+
+"""
+    time_ns() -> UInt64
+
+Get the time in nanoseconds relative to some arbitrary time in the past. The primary use is for measuring the elapsed time
+between two moments in time.
+"""
+time_ns() = ccall(:jl_hrtime, UInt64, ())
+
+# A warning to be interpolated in the docstring of every dangerous mutating function in Base, see PR #50824
+const _DOCS_ALIASING_WARNING = """
+!!! warning
+    Behavior can be unexpected when any mutated argument shares memory with any other argument.
+"""
+
+## Load essential files and libraries
+include("essentials.jl")
+include("ctypes.jl")
+include("gcutils.jl")
+include("generator.jl")
+include("runtime_internals.jl")
+include("options.jl")
+
+# define invoke(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to invoke
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
+    @inline
+    # prepend kwargs and f to the invoked from the user
+    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
+    return invoke(Core.kwcall, T, kwargs, f, args...)
+end
+# invoke does not have its own call cache, but kwcall for invoke does
+setfield!(typeof(invoke).name.mt, :max_args, 3, :monotonic) # invoke, f, T, args...
+
+# define applicable(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to applicable
+function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
+    @inline
+    return applicable(Core.kwcall, kwargs, args...)
+end
+function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
+    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
+    return Core._hasmethod(tt)
+end
+
+# core operations & types
+include("promotion.jl")
+include("tuple.jl")
+include("expr.jl")
+include("pair.jl")
+include("traits.jl")
+include("range.jl")
+include("error.jl")
+
+# core numeric operations & types
+==(x, y) = x === y
+include("bool.jl")
+include("number.jl")
+include("int.jl")
+include("operators.jl")
+include("pointer.jl")
+include("refvalue.jl")
+include("cmem.jl")
+
+include("checked.jl")
+using .Checked
+function cld end
+function fld end
+
+# Lazy strings
+include("strings/lazy.jl")
+
+# array structures
+include("indices.jl")
+include("genericmemory.jl")
+include("array.jl")
+include("abstractarray.jl")
+include("subarray.jl")
+include("views.jl")
+include("baseext.jl")
+
+include("c.jl")
+include("ntuple.jl")
+include("abstractset.jl")
+include("bitarray.jl")
+include("bitset.jl")
+include("abstractdict.jl")
+include("iddict.jl")
+include("idset.jl")
+include("iterators.jl")
+using .Iterators: zip, enumerate, only
+using .Iterators: Flatten, Filter, product  # for generators
+using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
+include("namedtuple.jl")
+
+include("anyall.jl")
+
+include("ordering.jl")
+using .Order
+
+include("coreir.jl")
+include("invalidation.jl")
+
+# For OS specific stuff
+# We need to strcat things here, before strings are really defined
+function strcat(x::String, y::String)
+    out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), Core.sizeof(x) + Core.sizeof(y))
+    GC.@preserve x y out begin
+        out_ptr = unsafe_convert(Ptr{UInt8}, out)
+        unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
+        unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+    end
+    return out
+end
+
+BUILDROOT::String = ""
+DATAROOT::String = ""
+
+baremodule BuildSettings end
+
+function process_sysimg_args!()
+    let i = 2 # skip file name
+        while i <= length(Core.ARGS)
+            if Core.ARGS[i] == "--buildsettings"
+                include(BuildSettings, ARGS[i+1])
+            elseif Core.ARGS[i] == "--buildroot"
+                global BUILDROOT = Core.ARGS[i+1]
+            elseif Core.ARGS[i] == "--dataroot"
+                global DATAROOT = Core.ARGS[i+1]
+            else
+                error(strcat("invalid sysimage argument: ", Core.ARGS[i]))
+            end
+            i += 2
+        end
+    end
+end
+process_sysimg_args!()
+
+function isready end
+
+include(strcat(DATAROOT, "julia/Compiler/src/Compiler.jl"))
+
+const _return_type = Compiler.return_type
+
+# Enable compiler
+Compiler.bootstrap!()
+
+include("flparse.jl")
+Core._setparser!(fl_parse)
+
+# Further definition of Base will happen in Base.jl if loaded.
+
+end # baremodule Base
diff --git a/base/Enums.jl b/base/Enums.jl
index 45a1b66753484..d4094945853ec 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -44,7 +44,7 @@ Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 function Base.show(io::IO, x::Enum)
     sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
-        from = get(io, :module, Base.active_module())
+        from = get(io, :module, Main)
         def = parentmodule(typeof(x))
         if from === nothing || !Base.isvisible(sym, def, from)
             show(io, def)
@@ -90,7 +90,7 @@ end
 # give Enum types scalar behavior in broadcasting
 Base.broadcastable(x::Enum) = Ref(x)
 
-@noinline enum_argument_error(typename, x) = throw(ArgumentError(string("invalid value for Enum $(typename): $x")))
+@noinline enum_argument_error(typename, x) = throw(ArgumentError(LazyString("invalid value for Enum ", typename, ": ", x)))
 
 """
     @enum EnumName[::BaseType] value1[=x] value2[=y]
@@ -143,7 +143,7 @@ julia> Symbol(apple)
 """
 macro enum(T::Union{Symbol,Expr}, syms...)
     if isempty(syms)
-        throw(ArgumentError("no arguments given for Enum $T"))
+        throw(ArgumentError(LazyString("no arguments given for Enum ", T)))
     end
     basetype = Int32
     typename = T
@@ -151,10 +151,11 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         typename = T.args[1]
         basetype = Core.eval(__module__, T.args[2])
         if !isa(basetype, DataType) || !(basetype <: Integer) || !isbitstype(basetype)
-            throw(ArgumentError("invalid base type for Enum $typename, $T=::$basetype; base type must be an integer primitive type"))
+            throw(ArgumentError(
+                LazyString("invalid base type for Enum ", typename, ", ", T, "=::", basetype, "; base type must be an integer primitive type")))
         end
     elseif !isa(T, Symbol)
-        throw(ArgumentError("invalid type expression for enum $T"))
+        throw(ArgumentError(LazyString("invalid type expression for enum ", T)))
     end
     values = Vector{basetype}()
     seen = Set{Symbol}()
@@ -169,32 +170,32 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         s isa LineNumberNode && continue
         if isa(s, Symbol)
             if i == typemin(basetype) && !isempty(values)
-                throw(ArgumentError("overflow in value \"$s\" of Enum $typename"))
+                throw(ArgumentError(LazyString("overflow in value \"", s, "\" of Enum ", typename)))
             end
         elseif isa(s, Expr) &&
                (s.head === :(=) || s.head === :kw) &&
                length(s.args) == 2 && isa(s.args[1], Symbol)
             i = Core.eval(__module__, s.args[2]) # allow exprs, e.g. uint128"1"
             if !isa(i, Integer)
-                throw(ArgumentError("invalid value for Enum $typename, $s; values must be integers"))
+                throw(ArgumentError(LazyString("invalid value for Enum ", typename, ", ", s, "; values must be integers")))
             end
             i = convert(basetype, i)
             s = s.args[1]
             hasexpr = true
         else
-            throw(ArgumentError(string("invalid argument for Enum ", typename, ": ", s)))
+            throw(ArgumentError(LazyString("invalid argument for Enum ", typename, ": ", s)))
         end
         s = s::Symbol
         if !Base.isidentifier(s)
-            throw(ArgumentError("invalid name for Enum $typename; \"$s\" is not a valid identifier"))
+            throw(ArgumentError(LazyString("invalid name for Enum ", typename, "; \"", s, "\" is not a valid identifier")))
         end
         if hasexpr && haskey(namemap, i)
-            throw(ArgumentError("both $s and $(namemap[i]) have value $i in Enum $typename; values must be unique"))
+            throw(ArgumentError(LazyString("both ", s, " and ", namemap[i], " have value ", i, " in Enum ", typename, "; values must be unique")))
         end
         namemap[i] = s
         push!(values, i)
         if s in seen
-            throw(ArgumentError("name \"$s\" in Enum $typename is not unique"))
+            throw(ArgumentError(LazyString("name \"", s, "\" in Enum ", typename, " is not unique")))
         end
         push!(seen, s)
         if length(values) == 1
diff --git a/base/Makefile b/base/Makefile
index 493302af78b02..09f79e5b98611 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -203,19 +203,25 @@ endif
 $(build_bindir)/7z$(EXE):
 	[ -e "$(7Z_PATH)" ] && \
 	rm -f "$@" && \
-	ln -svf "$(7Z_PATH)" "$@"
+	ln -sf "$(7Z_PATH)" "$@"
 
-symlink_lld: $(build_bindir)/lld$(EXE)
+symlink_llvm_utils: $(build_depsbindir)/lld$(EXE) $(build_depsbindir)/dsymutil$(EXE)
 
 ifneq ($(USE_SYSTEM_LLD),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_lld
+SYMLINK_SYSTEM_LIBRARIES += symlink_llvm_utils
 LLD_PATH := $(shell which lld$(EXE))
+DSYMUTIL_PATH := $(shell which dsymutil$(EXE))
 endif
 
-$(build_bindir)/lld$(EXE):
+$(build_depsbindir)/lld$(EXE):
 	[ -e "$(LLD_PATH)" ] && \
 	rm -f "$@" && \
-	ln -svf "$(LLD_PATH)" "$@"
+	ln -sf "$(LLD_PATH)" "$@"
+
+$(build_depsbindir)/dsymutil$(EXE):
+	[ -e "$(DSYMUTIL_PATH)" ] && \
+	rm -f "$@" && \
+	ln -sf "$(DSYMUTIL_PATH)" "$@"
 
 # the following excludes: libuv.a, libutf8proc.a
 
@@ -243,12 +249,12 @@ endif
 ifneq (,$(LIBGFORTRAN_VERSION))
 $(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
 endif
-$(eval $(call symlink_system_library,CSL,libquadmath,0))
 $(eval $(call symlink_system_library,CSL,libstdc++,6))
-# We allow libssp, libatomic and libgomp to fail as they are not available on all systems
+# We allow libssp, libatomic, libgomp and libquadmath to fail as they are not available on all systems
 $(eval $(call symlink_system_library,CSL,libssp,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libquadmath,0,ALLOW_FAILURE))
 $(eval $(call symlink_system_library,PCRE,libpcre2-8))
 $(eval $(call symlink_system_library,DSFMT,libdSFMT))
 $(eval $(call symlink_system_library,LIBBLASTRAMPOLINE,libblastrampoline))
@@ -258,9 +264,8 @@ $(eval $(call symlink_system_library,LAPACK,$(LIBLAPACKNAME)))
 endif
 $(eval $(call symlink_system_library,GMP,libgmp))
 $(eval $(call symlink_system_library,MPFR,libmpfr))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedtls))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedcrypto))
-$(eval $(call symlink_system_library,MBEDTLS,libmbedx509))
+$(eval $(call symlink_system_library,OPENSSL,libcrypto))
+$(eval $(call symlink_system_library,OPENSSL,libssl))
 $(eval $(call symlink_system_library,LIBSSH2,libssh2))
 $(eval $(call symlink_system_library,NGHTTP2,libnghttp2))
 $(eval $(call symlink_system_library,CURL,libcurl))
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 1417987847ec4..1ab78a55c93b5 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -95,7 +95,7 @@ julia> axes(A)
 """
 function axes(A)
     @inline
-    map(oneto, size(A))
+    map(unchecked_oneto, size(A))
 end
 
 """
@@ -103,17 +103,20 @@ end
     has_offset_axes(A, B, ...)
 
 Return `true` if the indices of `A` start with something other than 1 along any axis.
-If multiple arguments are passed, equivalent to `has_offset_axes(A) | has_offset_axes(B) | ...`.
+If multiple arguments are passed, equivalent to `has_offset_axes(A) || has_offset_axes(B) || ...`.
 
 See also [`require_one_based_indexing`](@ref).
 """
+has_offset_axes() = false
 has_offset_axes(A) = _any_tuple(x->Int(first(x))::Int != 1, false, axes(A)...)
 has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve performance of a common case (ranges)
-# Use `_any_tuple` to avoid unneeded invoke.
-# note: this could call `any` directly if the compiler can infer it
-has_offset_axes(As...) = _any_tuple(has_offset_axes, false, As...)
 has_offset_axes(::Colon) = false
 has_offset_axes(::Array) = false
+# note: this could call `any` directly if the compiler can infer it. We don't use _any_tuple
+# here because it stops full elision in some cases (#49332) and we don't need handling of
+# `missing` (has_offset_axes(A) always returns a Bool)
+has_offset_axes(A, As...) = has_offset_axes(A) || has_offset_axes(As...)
+
 
 """
     require_one_based_indexing(A::AbstractArray)
@@ -268,8 +271,8 @@ julia> ndims(A)
 3
 ```
 """
-ndims(::AbstractArray{T,N}) where {T,N} = N
-ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
+ndims(::AbstractArray{T,N}) where {T,N} = N::Int
+ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N::Int
 ndims(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 
 """
@@ -446,7 +449,7 @@ julia> firstindex(rand(3,4,5), 2)
 firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
 firstindex(a, d) = (@inline; first(axes(a, d)))
 
-first(a::AbstractArray) = a[first(eachindex(a))]
+@propagate_inbounds first(a::AbstractArray) = a[first(eachindex(a))]
 
 """
     first(coll)
@@ -499,7 +502,7 @@ Bool[]
 first(itr, n::Integer) = collect(Iterators.take(itr, n))
 # Faster method for vectors
 function first(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
     v[range(begin, length=min(n, checked_length(v)))]
 end
 
@@ -549,7 +552,7 @@ Float64[]
 last(itr, n::Integer) = reverse!(collect(Iterators.take(Iterators.reverse(itr), n)))
 # Faster method for arrays
 function last(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
     v[range(stop=lastindex(v), length=min(n, checked_length(v)))]
 end
 
@@ -678,15 +681,12 @@ function checkbounds(::Type{Bool}, A::AbstractArray, I...)
     checkbounds_indices(Bool, axes(A), I)
 end
 
-# Linear indexing is explicitly allowed when there is only one (non-cartesian) index
+# Linear indexing is explicitly allowed when there is only one (non-cartesian) index;
+# indices that do not allow linear indexing (e.g., logical arrays, cartesian indices, etc)
+# must add specialized methods to implement their restrictions
 function checkbounds(::Type{Bool}, A::AbstractArray, i)
     @inline
-    checkindex(Bool, eachindex(IndexLinear(), A), i)
-end
-# As a special extension, allow using logical arrays that match the source array exactly
-function checkbounds(::Type{Bool}, A::AbstractArray{<:Any,N}, I::AbstractArray{Bool,N}) where N
-    @inline
-    axes(A) == axes(I)
+    return checkindex(Bool, eachindex(IndexLinear(), A), i)
 end
 
 """
@@ -720,18 +720,13 @@ of `IA`.
 
 See also [`checkbounds`](@ref).
 """
-function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
-    @inline
-    checkindex(Bool, IA[1], I[1])::Bool & checkbounds_indices(Bool, tail(IA), tail(I))
-end
-function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
+function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{Any, Vararg})
     @inline
-    checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
+    return checkindex(Bool, get(inds, 1, OneTo(1)), I[1])::Bool &
+        checkbounds_indices(Bool, safe_tail(inds), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
-checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
+checkbounds_indices(::Type{Bool}, inds::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, inds))
 
 # check along a single dimension
 """
@@ -753,20 +748,19 @@ julia> checkindex(Bool, 1:20, 21)
 false
 ```
 """
-checkindex(::Type{Bool}, inds::AbstractUnitRange, i) =
-    throw(ArgumentError("unable to check bounds for indices of type $(typeof(i))"))
+checkindex(::Type{Bool}, inds, i) = throw(ArgumentError(LazyString("unable to check bounds for indices of type ", typeof(i))))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i::Real) = (first(inds) <= i) & (i <= last(inds))
 checkindex(::Type{Bool}, inds::IdentityUnitRange, i::Real) = checkindex(Bool, inds.indices, i)
 checkindex(::Type{Bool}, inds::OneTo{T}, i::T) where {T<:BitInteger} = unsigned(i - one(i)) < unsigned(last(inds))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Colon) = true
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Slice) = true
-function checkindex(::Type{Bool}, inds::AbstractUnitRange, r::AbstractRange)
-    @_propagate_inbounds_meta
-    isempty(r) | (checkindex(Bool, inds, first(r)) & checkindex(Bool, inds, last(r)))
-end
-checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractVector{Bool}) = indx == axes1(I)
-checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractArray{Bool}) = false
-function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::AbstractRange) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
+# range like indices with cheap `extrema`
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::LinearIndices) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
+
+function checkindex(::Type{Bool}, inds, I::AbstractArray)
     @inline
     b = true
     for i in I
@@ -834,7 +828,6 @@ similar(a::AbstractArray, ::Type{T}, dims::DimOrInd...) where {T}  = similar(a,
 # define this method to convert supported axes to Ints, with the expectation that an offset array
 # package will define a method with dims::Tuple{Union{Integer, UnitRange}, Vararg{Union{Integer, UnitRange}}}
 similar(a::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, OneTo}, Vararg{Union{Integer, OneTo}}}) where {T} = similar(a, T, to_shape(dims))
-similar(a::AbstractArray, ::Type{T}, dims::Tuple{Integer, Vararg{Integer}}) where {T} = similar(a, T, to_shape(dims))
 # similar creates an Array by default
 similar(a::AbstractArray, ::Type{T}, dims::Dims{N}) where {T,N}    = Array{T,N}(undef, dims)
 
@@ -890,7 +883,7 @@ julia> empty([1.0, 2.0, 3.0], String)
 String[]
 ```
 """
-empty(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()
+empty(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = similar(a, U, 0)
 
 # like empty, but should return a mutable collection, a Vector by default
 emptymutable(a::AbstractVector{T}, ::Type{U}=T) where {T,U} = Vector{U}()
@@ -902,11 +895,18 @@ emptymutable(itr, ::Type{U}) where {U} = Vector{U}()
 In-place [`copy`](@ref) of `src` into `dst`, discarding any pre-existing
 elements in `dst`.
 If `dst` and `src` are of the same type, `dst == src` should hold after
-the call. If `dst` and `src` are multidimensional arrays, they must have
+the call. If `dst` and `src` are vector types, they must have equal
+offset. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`copyto!`](@ref).
 
+!!! note
+    When operating on vector types, if `dst` and `src` are not of the
+    same length, `dst` is resized to `length(src)` prior to the `copy`.
+
 !!! compat "Julia 1.1"
     This method requires at least Julia 1.1. In Julia 1.0 this method
     is available from the `Future` standard library as `Future.copy!`.
@@ -993,7 +993,7 @@ end
 # this method must be separate from the above since src might not have a length
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::Integer)
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",n,
-        ", elements, but n should be nonnegative")))
+        ", elements, but n should be non-negative")))
     n == 0 && return dest
     dmax = dstart + n - 1
     inds = LinearIndices(dest)
@@ -1011,14 +1011,19 @@ function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::
         end
         y = iterate(src, y[2])
     end
+    if y === nothing
+        throw(ArgumentError(LazyString(
+            "source has fewer elements than required, ",
+            "expected at least ",sstart," got ", sstart-1)))
+    end
+    val, st = y
     i = Int(dstart)
-    while i <= dmax && y !== nothing
-        val, st = y
-        @inbounds dest[i] = val
-        y = iterate(src, st)
+    @inbounds dest[i] = val
+    for val in Iterators.take(Iterators.rest(src, st), n-1)
         i += 1
+        @inbounds dest[i] = val
     end
-    i <= dmax && throw(BoundsError(dest, i))
+    i < dmax && throw(BoundsError(dest, i))
     return dest
 end
 
@@ -1034,6 +1039,8 @@ the other elements are left untouched.
 
 See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> x = [1., 0., 3., 0., 5.];
@@ -1098,11 +1105,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
             end
         else
             # Dual-iterator implementation
-            ret = iterate(iterdest)
-            @inbounds for a in src
-                idx, state = ret::NTuple{2,Any}
-                dest[idx] = a
-                ret = iterate(iterdest, state)
+            for (Idest, Isrc) in zip(iterdest, itersrc)
+                @inbounds dest[Idest] = src[Isrc]
             end
         end
     end
@@ -1120,11 +1124,11 @@ function copyto!(dest::AbstractArray, dstart::Integer, src::AbstractArray, sstar
 end
 
 function copyto!(dest::AbstractArray, dstart::Integer,
-               src::AbstractArray, sstart::Integer,
-               n::Integer)
+                 src::AbstractArray, sstart::Integer,
+                 n::Integer)
     n == 0 && return dest
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",
-        n," elements, but n should be nonnegative")))
+        n," elements, but n should be non-negative")))
     destinds, srcinds = LinearIndices(dest), LinearIndices(src)
     (checkbounds(Bool, destinds, dstart) && checkbounds(Bool, destinds, dstart+n-1)) || throw(BoundsError(dest, dstart:dstart+n-1))
     (checkbounds(Bool, srcinds, sstart)  && checkbounds(Bool, srcinds, sstart+n-1))  || throw(BoundsError(src,  sstart:sstart+n-1))
@@ -1198,7 +1202,26 @@ function copymutable(a::AbstractArray)
 end
 copymutable(itr) = collect(itr)
 
-zero(x::AbstractArray{T}) where {T} = fill!(similar(x, typeof(zero(T))), zero(T))
+zero(x::AbstractArray{T}) where {T<:Number} = fill!(similar(x, typeof(zero(T))), zero(T))
+zero(x::AbstractArray{S}) where {S<:Union{Missing, Number}} = fill!(similar(x, typeof(zero(S))), zero(S))
+zero(x::AbstractArray) = map(zero, x)
+
+function _one(unit::T, mat::AbstractMatrix) where {T}
+    (rows, cols) = axes(mat)
+    (length(rows) == length(cols)) ||
+      throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
+    zer = zero(unit)::T
+    require_one_based_indexing(mat)
+    I = similar(mat, T)
+    fill!(I, zer)
+    for i ∈ rows
+        I[i, i] = unit
+    end
+    I
+end
+
+one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
+oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
 
 ## iteration support for arrays by iterating over `eachindex` in the array ##
 # Allows fast iteration by default for both IndexLinear and IndexCartesian arrays
@@ -1229,10 +1252,10 @@ end
 # note: the following type definitions don't mean any AbstractArray is convertible to
 # a data Ref. they just map the array element type to the pointer type for
 # convenience in cases that work.
-pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
+pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, cconvert(Ptr{T}, x))
 function pointer(x::AbstractArray{T}, i::Integer) where T
     @inline
-    unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
+    pointer(x) + Int(_memory_offset(x, i))::Int
 end
 
 # The distance from pointer(x) to the element at x[I...] in bytes
@@ -1242,6 +1265,10 @@ function _memory_offset(x::AbstractArray, I::Vararg{Any,N}) where {N}
     return sum(map((i, s, o)->s*(i-o), J, strides(x), Tuple(first(CartesianIndices(x)))))*elsize(x)
 end
 
+## Special constprop heuristics for getindex/setindex
+typename(typeof(function getindex end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+typename(typeof(function setindex! end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+
 ## Approach:
 # We only define one fallback method on getindex for all argument types.
 # That dispatches to an (inlined) internal _getindex function, where the goal is
@@ -1254,8 +1281,16 @@ end
 """
     getindex(A, inds...)
 
-Return a subset of array `A` as specified by `inds`, where each `ind` may be,
-for example, an `Int`, an [`AbstractRange`](@ref), or a [`Vector`](@ref).
+Return a subset of array `A` as selected by the indices `inds`.
+
+Each index may be any [supported index type](@ref man-supported-index-types), such
+as an [`Integer`](@ref), [`CartesianIndex`](@ref), [range](@ref Base.AbstractRange), or [array](@ref man-multi-dim-arrays) of supported indices.
+A [:](@ref Base.Colon) may be used to select all elements along a specific dimension, and a boolean array (e.g. an `Array{Bool}` or a [`BitArray`](@ref)) may be used to filter for elements where the corresponding index is `true`.
+
+When `inds` selects multiple elements, this function returns a newly
+allocated array. To index multiple elements without making a copy,
+use [`view`](@ref) instead.
+
 See the manual section on [array indexing](@ref man-array-indexing) for details.
 
 # Examples
@@ -1278,6 +1313,27 @@ julia> getindex(A, 2:4)
  3
  2
  4
+
+julia> getindex(A, 2, 1)
+3
+
+julia> getindex(A, CartesianIndex(2, 1))
+3
+
+julia> getindex(A, :, 2)
+2-element Vector{Int64}:
+ 2
+ 4
+
+julia> getindex(A, 2, :)
+2-element Vector{Int64}:
+ 3
+ 4
+
+julia> getindex(A, A .> 2)
+2-element Vector{Int64}:
+ 3
+ 4
 ```
 """
 function getindex(A::AbstractArray, I...)
@@ -1288,11 +1344,7 @@ end
 # To avoid invalidations from multidimensional.jl: getindex(A::Array, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...)
 @propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
-function unsafe_getindex(A::AbstractArray, I...)
-    @inline
-    @inbounds r = getindex(A, I...)
-    r
-end
+@inline unsafe_getindex(A::AbstractArray, I...) = @inbounds getindex(A, I...)
 
 struct CanonicalIndexError <: Exception
     func::String
@@ -1369,6 +1421,8 @@ _unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 Store values from array `X` within some subset of `A` as specified by `inds`.
 The syntax `A[inds...] = X` is equivalent to `(setindex!(A, X, inds...); X)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = zeros(2,2);
@@ -1425,6 +1479,8 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe
     r
 end
 
+_unsetindex!(A::AbstractArray, i::Integer) = _unsetindex!(A, to_index(i))
+
 """
     parent(A)
 
@@ -1490,12 +1546,14 @@ much more common case where aliasing does not occur. By default,
 unaliascopy(A::Array) = copy(A)
 unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
-_unaliascopy(A, C) = throw(ArgumentError("""
-    an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
-    and must make a preventative copy of itself in order to maintain consistent semantics,
-    but `copy(::$(typeof(A)))` returns a new array of type `$(typeof(C))`.
-    To fix, implement:
-        `Base.unaliascopy(A::$(typename(typeof(A)).wrapper))::typeof(A)`"""))
+function _unaliascopy(A, C)
+    Aw = typename(typeof(A)).wrapper
+    throw(ArgumentError(LazyString("an array of type `", Aw, "` shares memory with another argument ",
+    "and must make a preventative copy of itself in order to maintain consistent semantics, ",
+    "but `copy(::", typeof(A), ")` returns a new array of type `", typeof(C), "`.\n",
+    """To fix, implement:
+        `Base.unaliascopy(A::""", Aw, ")::typeof(A)`")))
+end
 unaliascopy(A) = A
 
 """
@@ -1506,7 +1564,7 @@ Perform a conservative test to check if arrays `A` and `B` might share the same
 By default, this simply checks if either of the arrays reference the same memory
 regions, as identified by their [`Base.dataids`](@ref).
 """
-mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !_isdisjoint(dataids(A), dataids(B))
+mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !isempty(A) && !isempty(B) && !_isdisjoint(dataids(A), dataids(B))
 mightalias(x, y) = false
 
 _isdisjoint(as::Tuple{}, bs::Tuple{}) = true
@@ -1530,7 +1588,8 @@ their component parts.  A typical definition for an array that wraps a parent is
 `Base.dataids(C::CustomArray) = dataids(C.parent)`.
 """
 dataids(A::AbstractArray) = (UInt(objectid(A)),)
-dataids(A::Array) = (UInt(pointer(A)),)
+dataids(A::Memory) = (UInt(A.ptr),)
+dataids(A::Array) = dataids(A.ref.mem)
 dataids(::AbstractRange) = ()
 dataids(x) = ()
 
@@ -1584,11 +1643,19 @@ replace_in_print_matrix(A::AbstractVector,i::Integer,j::Integer,s::AbstractStrin
 eltypeof(x) = typeof(x)
 eltypeof(x::AbstractArray) = eltype(x)
 
-promote_eltypeof() = Bottom
-promote_eltypeof(v1, vs...) = promote_type(eltypeof(v1), promote_eltypeof(vs...))
+promote_eltypeof() = error()
+promote_eltypeof(v1) = eltypeof(v1)
+promote_eltypeof(v1, v2) = promote_type(eltypeof(v1), eltypeof(v2))
+promote_eltypeof(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltypeof(y)), promote_eltypeof(v1, v2), vs...))
+promote_eltypeof(v1::T, vs::T...) where {T} = eltypeof(v1)
+promote_eltypeof(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
-promote_eltype() = Bottom
-promote_eltype(v1, vs...) = promote_type(eltype(v1), promote_eltype(vs...))
+promote_eltype() = error()
+promote_eltype(v1) = eltype(v1)
+promote_eltype(v1, v2) = promote_type(eltype(v1), eltype(v2))
+promote_eltype(v1, v2, vs...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, eltype(y)), promote_eltype(v1, v2), vs...))
+promote_eltype(v1::T, vs::T...) where {T} = eltype(T)
+promote_eltype(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
 #TODO: ERROR CHECK
 _cat(catdim::Int) = Vector{Any}()
@@ -1597,10 +1664,10 @@ typed_vcat(::Type{T}) where {T} = Vector{T}()
 typed_hcat(::Type{T}) where {T} = Vector{T}()
 
 ## cat: special cases
-vcat(X::T...) where {T}         = T[ X[i] for i=1:length(X) ]
-vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
-hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
-hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
+vcat(X::T...) where {T}         = T[ X[i] for i=eachindex(X) ]
+vcat(X::T...) where {T<:Number} = T[ X[i] for i=eachindex(X) ]
+hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=eachindex(X) ]
+hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=eachindex(X) ]
 
 vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
 hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
@@ -1651,7 +1718,7 @@ function _typed_hcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     for j = 1:nargs
         Aj = A[j]
         if size(Aj, 1) != nrows
-            throw(ArgumentError("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
+            throw(DimensionMismatch("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
         end
         dense &= isa(Aj,Array)
         nd = ndims(Aj)
@@ -1686,7 +1753,7 @@ function _typed_vcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     ncols = size(A[1], 2)
     for j = 2:nargs
         if size(A[j], 2) != ncols
-            throw(ArgumentError("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
+            throw(DimensionMismatch("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
         end
     end
     B = similar(A[1], T, nrows, ncols)
@@ -1809,17 +1876,16 @@ function __cat_offset1!(A, shape, catdims, offsets, x)
     inds = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] .+ cat_indices(x, i) : 1:shape[i]
     end
-    if x isa AbstractArray
-        A[inds...] = x
-    else
-        fill!(view(A, inds...), x)
-    end
+    _copy_or_fill!(A, inds, x)
     newoffsets = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] + cat_size(x, i) : offsets[i]
     end
     return newoffsets
 end
 
+_copy_or_fill!(A, inds, x) = fill!(view(A, inds...), x)
+_copy_or_fill!(A, inds, x::AbstractArray) = (A[inds...] = x)
+
 """
     vcat(A...)
 
@@ -1856,7 +1922,7 @@ julia> vcat(range(1, 2, length=3))  # collects lazy ranges
  2.0
 
 julia> two = ([10, 20, 30]', Float64[4 5 6; 7 8 9])  # row vector and a matrix
-([10 20 30], [4.0 5.0 6.0; 7.0 8.0 9.0])
+(adjoint([10, 20, 30]), [4.0 5.0 6.0; 7.0 8.0 9.0])
 
 julia> vcat(two...)
 3×3 Matrix{Float64}:
@@ -1959,24 +2025,91 @@ The keyword also accepts `Val(dims)`.
     For multiple dimensions `dims = Val(::Tuple)` was added in Julia 1.8.
 
 # Examples
+
+Concatenate two arrays in different dimensions:
+```jldoctest
+julia> a = [1 2 3]
+1×3 Matrix{Int64}:
+ 1  2  3
+
+julia> b = [4 5 6]
+1×3 Matrix{Int64}:
+ 4  5  6
+
+julia> cat(a, b; dims=1)
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> cat(a, b; dims=2)
+1×6 Matrix{Int64}:
+ 1  2  3  4  5  6
+
+julia> cat(a, b; dims=(1, 2))
+2×6 Matrix{Int64}:
+ 1  2  3  0  0  0
+ 0  0  0  4  5  6
+```
+
+# Extended Help
+
+Concatenate 3D arrays:
+```jldoctest
+julia> a = ones(2, 2, 3);
+
+julia> b = ones(2, 2, 4);
+
+julia> c = cat(a, b; dims=3);
+
+julia> size(c) == (2, 2, 7)
+true
+```
+
+Concatenate arrays of different sizes:
 ```jldoctest
 julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)  # same as hcat
 2×6×1 Array{Float64, 3}:
 [:, :, 1] =
  1.0  2.0  3.14159  10.0  10.0  10.0
  3.0  4.0  3.14159  10.0  10.0  10.0
+```
 
+Construct a block diagonal matrix:
+```
 julia> cat(true, trues(2,2), trues(4)', dims=(1,2))  # block-diagonal
 4×7 Matrix{Bool}:
  1  0  0  0  0  0  0
  0  1  1  0  0  0  0
  0  1  1  0  0  0  0
  0  0  0  1  1  1  1
+```
 
+```
 julia> cat(1, [2], [3;;]; dims=Val(2))
 1×3 Matrix{Int64}:
  1  2  3
 ```
+
+!!! note
+    `cat` does not join two strings, you may want to use `*`.
+
+```jldoctest
+julia> a = "aaa";
+
+julia> b = "bbb";
+
+julia> cat(a, b; dims=1)
+2-element Vector{String}:
+ "aaa"
+ "bbb"
+
+julia> cat(a, b; dims=2)
+1×2 Matrix{String}:
+ "aaa"  "bbb"
+
+julia> a * b
+"aaabbb"
+```
 """
 @inline cat(A...; dims) = _cat(dims, A...)
 # `@constprop :aggressive` allows `catdims` to be propagated as constant improving return type inference
@@ -1984,16 +2117,14 @@ julia> cat(1, [2], [3;;]; dims=Val(2))
 
 # The specializations for 1 and 2 inputs are important
 # especially when running with --inline=no, see #11158
-# The specializations for Union{AbstractVecOrMat,Number} are necessary
-# to have more specialized methods here than in LinearAlgebra/uniformscaling.jl
 vcat(A::AbstractArray) = cat(A; dims=Val(1))
 vcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(1))
 vcat(A::AbstractArray...) = cat(A...; dims=Val(1))
-vcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(1))
+vcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(1))
 hcat(A::AbstractArray) = cat(A; dims=Val(2))
 hcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(2))
 hcat(A::AbstractArray...) = cat(A...; dims=Val(2))
-hcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(2))
+hcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(2))
 
 typed_vcat(T::Type, A::AbstractArray) = _cat_t(Val(1), T, A)
 typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(1), T, A, B)
@@ -2055,51 +2186,11 @@ julia> hvcat((2,2,2), a,b,c,d,e,f) == hvcat(2, a,b,c,d,e,f)
 true
 ```
 """
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat{T}...) where {T} = typed_hvcat(T, rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray{T}...) where {T} = typed_hvcat(T, rows, xs...)
 
-function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat...) where T
-    nbr = length(rows)  # number of block rows
-
-    nc = 0
-    for i=1:rows[1]
-        nc += size(as[i],2)
-    end
-
-    nr = 0
-    a = 1
-    for i = 1:nbr
-        nr += size(as[a],1)
-        a += rows[i]
-    end
-
-    out = similar(as[1], T, nr, nc)
-
-    a = 1
-    r = 1
-    for i = 1:nbr
-        c = 1
-        szi = size(as[a],1)
-        for j = 1:rows[i]
-            Aj = as[a+j-1]
-            szj = size(Aj,2)
-            if size(Aj,1) != szi
-                throw(ArgumentError("mismatched height in block row $(i) (expected $szi, got $(size(Aj,1)))"))
-            end
-            if c-1+szj > nc
-                throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1+szj))"))
-            end
-            out[r:r-1+szi, c:c-1+szj] = Aj
-            c += szj
-        end
-        if c != nc+1
-            throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1))"))
-        end
-        r += szi
-        a += rows[i]
-    end
-    out
-end
+rows_to_dimshape(rows::Tuple{Vararg{Int}}) = all(==(rows[1]), rows) ? (length(rows), rows[1]) : (rows, (sum(rows),))
+typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat...) where T = typed_hvncat(T, rows_to_dimshape(rows), true, as...)
 
 hvcat(rows::Tuple{Vararg{Int}}) = []
 typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}) where {T} = Vector{T}()
@@ -2115,7 +2206,7 @@ function hvcat(rows::Tuple{Vararg{Int}}, xs::T...) where T<:Number
     k = 1
     @inbounds for i=1:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
         for j=1:nc
             a[i,j] = xs[k]
@@ -2144,29 +2235,20 @@ end
 hvcat(rows::Tuple{Vararg{Int}}, xs::Number...) = typed_hvcat(promote_typeof(xs...), rows, xs...)
 hvcat(rows::Tuple{Vararg{Int}}, xs...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 # the following method is needed to provide a more specific one compared to LinearAlgebra/uniformscaling.jl
-hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractVecOrMat,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractArray,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
     nr = length(rows)
     nc = rows[1]
     for i = 2:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
     end
     hvcat_fill!(Matrix{T}(undef, nr, nc), xs)
 end
 
-function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
-    nbr = length(rows)  # number of block rows
-    rs = Vector{Any}(undef, nbr)
-    a = 1
-    for i = 1:nbr
-        rs[i] = typed_hcat(T, as[a:a-1+rows[i]]...)
-        a += rows[i]
-    end
-    T[rs...;]
-end
+typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T = typed_hvncat(T, rows_to_dimshape(rows), true, as...)
 
 ## N-dimensional concatenation ##
 
@@ -2287,13 +2369,13 @@ _typed_hvncat_0d_only_one() =
 
 function _typed_hvncat(::Type{T}, ::Val{N}) where {T, N}
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     return Array{T, N}(undef, ntuple(x -> 0, Val(N)))
 end
 
 function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
     hvncat_fill!(A, false, xs)
     return A
@@ -2305,7 +2387,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     for a ∈ as
         ndims(a) <= N || all(x -> size(a, x) == 1, (N + 1):ndims(a)) ||
             return _typed_hvncat(T, (ntuple(x -> 1, Val(N - 1))..., length(as), 1), false, as...)
@@ -2319,7 +2401,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
         Ndim += cat_size(as[i], N)
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N - 1
-            cat_size(as[1], d) == cat_size(as[i], d) || throw(ArgumentError("mismatched size along axis $d in element $i"))
+            cat_size(as[1], d) == cat_size(as[i], d) || throw(DimensionMismatch("mismatched size along axis $d in element $i"))
         end
     end
 
@@ -2338,7 +2420,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     nd = N
     Ndim = 0
     for i ∈ eachindex(as)
@@ -2346,7 +2428,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N-1
             cat_size(as[i], d) == 1 ||
-                throw(ArgumentError("all dimensions of element $i other than $N must be of length 1"))
+                throw(DimensionMismatch("all dimensions of element $i other than $N must be of length 1"))
         end
     end
 
@@ -2463,7 +2545,7 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                 for dd ∈ 1:N
                     dd == d && continue
                     if cat_size(as[startelementi], dd) != cat_size(as[i], dd)
-                        throw(ArgumentError("incompatible shape in element $i"))
+                        throw(DimensionMismatch("incompatible shape in element $i"))
                     end
                 end
             end
@@ -2500,21 +2582,21 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                     elseif currentdims[d] < outdims[d] # dimension in progress
                         break
                     else # exceeded dimension
-                        throw(ArgumentError("argument $i has too many elements along axis $d"))
+                        throw(DimensionMismatch("argument $i has too many elements along axis $d"))
                     end
                 end
             end
         elseif currentdims[d1] > outdims[d1] # exceeded dimension
-            throw(ArgumentError("argument $i has too many elements along axis $d1"))
+            throw(DimensionMismatch("argument $i has too many elements along axis $d1"))
         end
     end
 
     outlen = prod(outdims)
     elementcount == outlen ||
-        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+        throw(DimensionMismatch("mismatched number of elements; expected $(outlen), got $(elementcount)"))
 
     # copy into final array
-    A = cat_similar(as[1], T, outdims)
+    A = cat_similar(as[1], T, ntuple(i -> outdims[i], N))
     # @assert all(==(0), currentdims)
     outdims .= 0
     hvncat_fill!(A, currentdims, outdims, d1, d2, as)
@@ -2572,8 +2654,8 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
             if d == 1 || i == 1 || wasstartblock
                 currentdims[d] += dsize
             elseif dsize != cat_size(as[i - 1], ad)
-                throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                    expected $(cat_size(as[i - 1], ad)), got $dsize"))
+                throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                         expected $(cat_size(as[i - 1], ad)), got $dsize"))
             end
 
             wasstartblock = blockcounts[d] == 1 # remember for next dimension
@@ -2583,15 +2665,15 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
                 if outdims[d] == -1
                     outdims[d] = currentdims[d]
                 elseif outdims[d] != currentdims[d]
-                    throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                        expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
+                    throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                             expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
                 end
                 currentdims[d] = 0
                 blockcounts[d] = 0
                 shapepos[d] += 1
                 d > 1 && (blockcounts[d - 1] == 0 ||
-                    throw(ArgumentError("shape in level $d is inconsistent; level counts must nest \
-                                        evenly into each other")))
+                    throw(DimensionMismatch("shape in level $d is inconsistent; level counts must nest \
+                                             evenly into each other")))
             end
         end
     end
@@ -2608,7 +2690,7 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
     # @assert all(==(0), blockcounts)
 
     # copy into final array
-    A = cat_similar(as[1], T, outdims)
+    A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd))
     hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
     return A
 end
@@ -2881,7 +2963,7 @@ end
 @inline function _stack_size_check(x, ax1::Tuple)
     if _iterator_axes(x) != ax1
         uax1 = map(UnitRange, ax1)
-        uaxN = map(UnitRange, axes(x))
+        uaxN = map(UnitRange, _iterator_axes(x))
         throw(DimensionMismatch(
             LazyString("stack expects uniform slices, got axes(x) == ", uaxN, " while first had ", uax1)))
     end
@@ -2917,6 +2999,15 @@ function cmp(A::AbstractVector, B::AbstractVector)
     return cmp(length(A), length(B))
 end
 
+"""
+    isless(A::AbstractArray{<:Any,0}, B::AbstractArray{<:Any,0})
+
+Return `true` when the only element of `A` is less than the only element of `B`.
+"""
+function isless(A::AbstractArray{<:Any,0}, B::AbstractArray{<:Any,0})
+    isless(only(A), only(B))
+end
+
 """
     isless(A::AbstractVector, B::AbstractVector)
 
@@ -3083,9 +3174,8 @@ julia> foreach((x, y) -> println(x, " with ", y), tri, 'a':'z')
 7 with c
 ```
 """
-foreach(f) = (f(); nothing)
 foreach(f, itr) = (for x in itr; f(x); end; nothing)
-foreach(f, itrs...) = (for z in zip(itrs...); f(z...); end; nothing)
+foreach(f, itr, itrs...) = (for z in zip(itr, itrs...); f(z...); end; nothing)
 
 ## map over arrays ##
 
@@ -3162,7 +3252,7 @@ one *without* a colon in the slice. This is `view(A,:,i,:)`, whereas
 `mapslices(f, A; dims=(1,3))` uses `A[:,i,:]`. The function `f` may mutate
 values in the slice without affecting `A`.
 """
-function mapslices(f, A::AbstractArray; dims)
+@constprop :aggressive function mapslices(f, A::AbstractArray; dims)
     isempty(dims) && return map(f, A)
 
     for d in dims
@@ -3257,10 +3347,6 @@ end
 concatenate_setindex!(R, v, I...) = (R[I...] .= (v,); R)
 concatenate_setindex!(R, X::AbstractArray, I...) = (R[I...] = X)
 
-## 0 arguments
-
-map(f) = f()
-
 ## 1 argument
 
 function map!(f::F, dest::AbstractArray, A::AbstractArray) where F
@@ -3283,6 +3369,8 @@ mapany(f, itr) = Any[f(x) for x in itr]
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
 apply `f` elementwise, and stop when any of them is exhausted.
 
+The element type of the result is determined in the same manner as in [`collect`](@ref).
+
 See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
@@ -3340,6 +3428,8 @@ end
 Like [`map`](@ref), but stores the result in `destination` rather than a new
 collection. `destination` must be at least as large as the smallest collection.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`map`](@ref), [`foreach`](@ref), [`zip`](@ref), [`copyto!`](@ref).
 
 # Examples
@@ -3364,11 +3454,32 @@ julia> map!(+, zeros(Int, 5), 100:999, 1:3)
 ```
 """
 function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
-    isempty(As) && throw(ArgumentError(
-        """map! requires at least one "source" argument"""))
+    @assert !isempty(As) # should dispatch to map!(f, A)
     map_n!(f, dest, As)
 end
 
+"""
+    map!(function, array)
+
+Like [`map`](@ref), but stores the result in the same array.
+!!! compat "Julia 1.12"
+    This method requires Julia 1.12 or later. To support previous versions too,
+    use the equivalent `map!(function, array, array)`.
+
+# Examples
+```jldoctest
+julia> a = [1 2 3; 4 5 6];
+
+julia> map!(x -> x^3, a);
+
+julia> a
+2×3 Matrix{$Int}:
+  1    8   27
+ 64  125  216
+```
+"""
+map!(f::F, inout::AbstractArray) where F = map!(f, inout, inout)
+
 """
     map(f, A::AbstractArray...) -> N-array
 
@@ -3394,7 +3505,37 @@ julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is
  102.0
 ```
 """
-map(f, iters...) = collect(Generator(f, iters...))
+map(f, it, iters...) = collect(Generator(f, it, iters...))
+
+# Generic versions of push! for AbstractVector
+# These are specialized further for Vector for faster resizing and setindexing
+function push!(a::AbstractVector{T}, item) where T
+    # convert first so we don't grow the array if the assignment won't work
+    itemT = item isa T ? item : convert(T, item)::T
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = itemT
+    return a
+end
+
+# specialize and optimize the single argument case
+function push!(a::AbstractVector{Any}, @nospecialize x)
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = x
+    return a
+end
+function push!(a::AbstractVector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
+    na = length(a)
+    nx = length(x)
+    resize!(a, na + nx)
+    e = lastindex(a) - nx
+    for i = 1:nx
+        a[e+i] = x[i]
+    end
+    return a
+end
 
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
 # (note: must not cause a dispatch loop when 1-item case is not defined)
@@ -3403,6 +3544,9 @@ push!(A, a, b, c...) = push!(push!(A, a, b), c...)
 pushfirst!(A, a, b) = pushfirst!(pushfirst!(A, b), a)
 pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
 
+# sizehint! does not nothing by default
+sizehint!(a::AbstractVector, _) = a
+
 ## hashing AbstractArray ##
 
 const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
@@ -3526,9 +3670,9 @@ end
 ## 1-d circshift ##
 function circshift!(a::AbstractVector, shift::Integer)
     n = length(a)
-    n == 0 && return
+    n == 0 && return a
     shift = mod(shift, n)
-    shift == 0 && return
+    shift == 0 && return a
     l = lastindex(a)
     reverse!(a, firstindex(a), l-shift)
     reverse!(a, l-shift+1, lastindex(a))
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 70c304d9060c1..54b6d75cee2dc 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -93,6 +93,70 @@ function _dropdims(A::AbstractArray, dims::Dims)
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
+
+"""
+    insertdims(A; dims)
+
+Inverse of [`dropdims`](@ref); return an array with new singleton dimensions
+at every dimension in `dims`.
+
+Repeated dimensions are forbidden and the largest entry in `dims` must be
+less than or equal than `ndims(A) + length(dims)`.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`dropdims`](@ref), [`reshape`](@ref), [`vec`](@ref).
+# Examples
+```jldoctest
+julia> x = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=3)
+2×3×1 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=(1,2,5)) == reshape(x, 1, 1, 2, 3, 1)
+true
+
+julia> dropdims(insertdims(x, dims=(1,2,5)), dims=(1,2,5))
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+```
+
+!!! compat "Julia 1.12"
+    Requires Julia 1.12 or later.
+"""
+insertdims(A; dims) = _insertdims(A, dims)
+function _insertdims(A::AbstractArray{T, N}, dims::NTuple{M, Int}) where {T, N, M}
+    for i in eachindex(dims)
+        1 ≤ dims[i] || throw(ArgumentError("the smallest entry in dims must be ≥ 1."))
+        dims[i] ≤ N+M || throw(ArgumentError("the largest entry in dims must be not larger than the dimension of the array and the length of dims added"))
+        for j = 1:i-1
+            dims[j] == dims[i] && throw(ArgumentError("inserted dims must be unique"))
+        end
+    end
+
+    # acc is a tuple, where the first entry is the final shape
+    # the second entry off acc is a counter for the axes of A
+    inds= Base._foldoneto((acc, i) ->
+                            i ∈ dims
+                                ? ((acc[1]..., Base.OneTo(1)), acc[2])
+                                : ((acc[1]..., axes(A, acc[2])), acc[2] + 1),
+                            ((), 1), Val(N+M))
+    new_shape = inds[1]
+    return reshape(A, new_shape)
+end
+_insertdims(A::AbstractArray, dim::Integer) = _insertdims(A, (Int(dim),))
+
+
+
 ## Unary operators ##
 
 """
@@ -119,6 +183,7 @@ julia> A
 """
 conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
 conj!(x::AbstractArray{<:Real}) = x
+conj!(A::AbstractArray) = (foreach(conj!, A); A)
 
 """
     conj(A::AbstractArray)
@@ -264,10 +329,13 @@ circshift(a::AbstractArray, shiftamt::DimsInteger) = circshift!(similar(a), a, s
 """
     circshift(A, shifts)
 
-Circularly shift, i.e. rotate, the data in an array. The second argument is a tuple or
+Circularly shift, i.e. rotate, the data in `A`. The second argument is a tuple or
 vector giving the amount to shift in each dimension, or an integer to shift only in the
 first dimension.
 
+The generated code is most efficient when the shift amounts are known at compile-time, i.e.,
+compile-time constants.
+
 See also: [`circshift!`](@ref), [`circcopy!`](@ref), [`bitrotate`](@ref), [`<<`](@ref).
 
 # Examples
@@ -316,6 +384,18 @@ julia> circshift(a, -1)
  0
  1
  1
+
+julia> x = (1, 2, 3, 4, 5)
+(1, 2, 3, 4, 5)
+
+julia> circshift(x, 4)
+(2, 3, 4, 5, 1)
+
+julia> z = (1, 'a', -7.0, 3)
+(1, 'a', -7.0, 3)
+
+julia> circshift(z, -1)
+('a', -7.0, 3, 1)
 ```
 """
 function circshift(a::AbstractArray, shiftamt)
@@ -353,7 +433,7 @@ julia> repeat([1, 2, 3], 2, 3)
 ```
 """
 function repeat(A::AbstractArray, counts...)
-    return _RepeatInnerOuter.repeat(A, outer=counts)
+    return repeat(A, outer=counts)
 end
 
 """
@@ -438,6 +518,9 @@ function check(arr, inner, outer)
         # TODO: Currently one based indexing is demanded for inner !== nothing,
         # but not for outer !== nothing. Decide for something consistent.
         Base.require_one_based_indexing(arr)
+        if !all(n -> n isa Integer, inner)
+            throw(ArgumentError("repeat requires integer counts, got inner = $inner"))
+        end
         if any(<(0), inner)
             throw(ArgumentError("no inner repetition count may be negative; got $inner"))
         end
@@ -446,6 +529,9 @@ function check(arr, inner, outer)
         end
     end
     if outer !== nothing
+        if !all(n -> n isa Integer, outer)
+            throw(ArgumentError("repeat requires integer counts, got outer = $outer"))
+        end
         if any(<(0), outer)
             throw(ArgumentError("no outer repetition count may be negative; got $outer"))
         end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 9dba5369a2a66..3be930151d4d4 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -12,6 +12,8 @@ struct KeyError <: Exception
     key
 end
 
+KeyTypeError(K, key) = TypeError(:var"dict key", K, key)
+
 const secret_table_token = :__c782dbf1cf4d6a2e5e3865d7e95634f2e09b5902__
 
 haskey(d::AbstractDict, k) = in(k, keys(d))
@@ -86,8 +88,8 @@ Return an iterator over all keys in a dictionary.
 When the keys are stored internally in a hash table,
 as is the case for `Dict`,
 the order in which they are returned may vary.
-But `keys(a)` and `values(a)` both iterate `a` and
-return the elements in the same order.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
 ```jldoctest
@@ -112,8 +114,8 @@ Return an iterator over all values in a collection.
 When the values are stored internally in a hash table,
 as is the case for `Dict`,
 the order in which they are returned may vary.
-But `keys(a)` and `values(a)` both iterate `a` and
-return the elements in the same order.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
 ```jldoctest
@@ -136,6 +138,10 @@ values(a::AbstractDict) = ValueIterator(a)
 Return an iterator over `key => value` pairs for any
 collection that maps a set of keys to a set of values.
 This includes arrays, where the keys are the array indices.
+When the entries are stored internally in a hash table,
+as is the case for `Dict`, the order in which they are returned may vary.
+But `keys(a)`, `values(a)` and `pairs(a)` all iterate `a`
+and return the elements in the same order.
 
 # Examples
 ```jldoctest
@@ -218,7 +224,7 @@ Dict{Int64, Int64} with 3 entries:
 function merge!(d::AbstractDict, others::AbstractDict...)
     for other in others
         if haslength(d) && haslength(other)
-            sizehint!(d, length(d) + length(other))
+            sizehint!(d, length(d) + length(other); shrink = false)
         end
         for (k,v) in other
             d[k] = v
@@ -299,7 +305,7 @@ julia> keytype(Dict(Int32(1) => "foo"))
 Int32
 ```
 """
-keytype(::Type{<:AbstractDict{K,V}}) where {K,V} = K
+keytype(::Type{<:AbstractDict{K}}) where {K} = K
 keytype(a::AbstractDict) = keytype(typeof(a))
 
 """
@@ -313,7 +319,7 @@ julia> valtype(Dict(Int32(1) => "foo"))
 String
 ```
 """
-valtype(::Type{<:AbstractDict{K,V}}) where {K,V} = V
+valtype(::Type{<:AbstractDict{<:Any,V}}) where {V} = V
 valtype(a::AbstractDict) = valtype(typeof(a))
 
 """
@@ -390,6 +396,10 @@ Dict{String, Float64} with 3 entries:
 
 julia> ans == mergewith(+)(a, b)
 true
+
+julia> mergewith(-, Dict(), Dict(:a=>1))  # Combining function only used if key is present in both
+Dict{Any, Any} with 1 entry:
+  :a => 1
 ```
 """
 mergewith(combine, d::AbstractDict, others::AbstractDict...) =
@@ -414,7 +424,7 @@ end
 Update `d`, removing elements for which `f` is `false`.
 The function `f` is passed `key=>value` pairs.
 
-# Example
+# Examples
 ```jldoctest
 julia> d = Dict(1=>"a", 2=>"b", 3=>"c")
 Dict{Int64, String} with 3 entries:
@@ -577,6 +587,55 @@ _tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<(top_set_bit(x-one(
 
 TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
 
+# This error is thrown if `grow_to!` cannot validate the contents of the iterator argument to it, which it does by testing the iteration protocol (isiterable) on it each time it is about to start iteration on it
+_throw_dict_kv_error() = throw(ArgumentError("AbstractDict(kv): kv needs to be an iterator of 2-tuples or pairs"))
+
+function grow_to!(dest::AbstractDict, itr)
+    applicable(iterate, itr) || _throw_dict_kv_error()
+    y = iterate(itr)
+    y === nothing && return dest
+    kv, st = y
+    applicable(iterate, kv) || _throw_dict_kv_error()
+    k = iterate(kv)
+    k === nothing && _throw_dict_kv_error()
+    k, kvst = k
+    v = iterate(kv, kvst)
+    v === nothing && _throw_dict_kv_error()
+    v, kvst = v
+    iterate(kv, kvst) === nothing || _throw_dict_kv_error()
+    if !(dest isa AbstractDict{typeof(k), typeof(v)})
+        dest = empty(dest, typeof(k), typeof(v))
+    end
+    dest[k] = v
+    return grow_to!(dest, itr, st)
+end
+
+function grow_to!(dest::AbstractDict{K,V}, itr, st) where {K, V}
+    y = iterate(itr, st)
+    while y !== nothing
+        kv, st = y
+        applicable(iterate, kv) || _throw_dict_kv_error()
+        kst = iterate(kv)
+        kst === nothing && _throw_dict_kv_error()
+        k, kvst = kst
+        vst = iterate(kv, kvst)
+        vst === nothing && _throw_dict_kv_error()
+        v, kvst = vst
+        iterate(kv, kvst) === nothing || _throw_dict_kv_error()
+        if isa(k, K) && isa(v, V)
+            dest[k] = v
+        else
+            new = empty(dest, promote_typejoin(K, typeof(k)), promote_typejoin(V, typeof(v)))
+            merge!(new, dest)
+            new[k] = v
+            return grow_to!(new, itr, st)
+        end
+        y = iterate(itr, st)
+    end
+    return dest
+end
+
+
 dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
 dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
 dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 5d0d65dad2de6..b38cb2799740b 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -65,6 +65,8 @@ const ∪ = union
 Construct the [`union`](@ref) of passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> a = Set([3, 4, 5]);
@@ -99,7 +101,7 @@ max_values(::Type{Bool}) = 2
 max_values(::Type{Nothing}) = 1
 
 function union!(s::AbstractSet{T}, itr) where T
-    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int)
+    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false)
     for x in itr
         push!(s, x)
         length(s) == max_values(T) && break
@@ -182,6 +184,8 @@ const ∩ = intersect
 
 Intersect all passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function intersect!(s::AbstractSet, itrs...)
     for x in itrs
@@ -218,6 +222,8 @@ setdiff(s) = union(s)
 Remove from set `s` (in-place) each element of each iterable from `itrs`.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> a = Set([1, 3, 4, 5]);
@@ -272,6 +278,8 @@ symdiff(s) = symdiff!(copy(s))
 Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
 When `s` is an array, the order is maintained.
 Note that in this case the multiplicity of elements matters.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function symdiff!(s::AbstractSet, itrs...)
     for x in itrs
@@ -338,13 +346,17 @@ function issubset(a, b)
 end
 
 """
-    hasfastin(T)
+    Base.hasfastin(T)
 
 Determine whether the computation `x ∈ collection` where `collection::T` can be considered
 as a "fast" operation (typically constant or logarithmic complexity).
 The definition `hasfastin(x) = hasfastin(typeof(x))` is provided for convenience so that instances
 can be passed instead of types.
 However the form that accepts a type argument should be defined for new types.
+
+The default for `hasfastin(T)` is `true` for subtypes of
+[`AbstractSet`](@ref), [`AbstractDict`](@ref) and [`AbstractRange`](@ref)
+and `false` otherwise.
 """
 hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
@@ -352,6 +364,31 @@ hasfastin(x) = hasfastin(typeof(x))
 
 ⊇(a, b) = b ⊆ a
 
+"""
+    issubset(x)
+
+Create a function that compares its argument to `x` using [`issubset`](@ref), i.e.
+a function equivalent to `y -> issubset(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issubset)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issubset(a) = Fix2(issubset, a)
+
+"""
+    ⊇(x)
+
+Create a function that compares its argument to `x` using [`⊇`](@ref), i.e.
+a function equivalent to `y -> y ⊇ x`.
+The returned function is of type `Base.Fix2{typeof(⊇)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊇(a) = Fix2(⊇, a)
 ## strict subset comparison
 
 function ⊊ end
@@ -381,6 +418,31 @@ false
 ⊊(a, b) = Set(a) ⊊ Set(b)
 ⊋(a, b) = b ⊊ a
 
+"""
+    ⊋(x)
+
+Create a function that compares its argument to `x` using [`⊋`](@ref), i.e.
+a function equivalent to `y -> y ⊋ x`.
+The returned function is of type `Base.Fix2{typeof(⊋)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊋(a) = Fix2(⊋, a)
+"""
+    ⊊(x)
+
+Create a function that compares its argument to `x` using [`⊊`](@ref), i.e.
+a function equivalent to `y -> y ⊊ x`.
+The returned function is of type `Base.Fix2{typeof(⊊)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊊(a) = Fix2(⊊, a)
+
 function ⊈ end
 function ⊉ end
 """
@@ -405,6 +467,32 @@ false
 ⊈(a, b) = !⊆(a, b)
 ⊉(a, b) = b ⊈ a
 
+"""
+    ⊉(x)
+
+Create a function that compares its argument to `x` using [`⊉`](@ref), i.e.
+a function equivalent to `y -> y ⊉ x`.
+The returned function is of type `Base.Fix2{typeof(⊉)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊉(a) = Fix2(⊉, a)
+
+"""
+    ⊈(x)
+
+Create a function that compares its argument to `x` using [`⊈`](@ref), i.e.
+a function equivalent to `y -> y ⊈ x`.
+The returned function is of type `Base.Fix2{typeof(⊈)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊈(a) = Fix2(⊈, a)
+
 ## set equality comparison
 
 """
@@ -441,6 +529,19 @@ function issetequal(a, b)
     return issetequal(Set(a), Set(b))
 end
 
+"""
+    issetequal(x)
+
+Create a function that compares its argument to `x` using [`issetequal`](@ref), i.e.
+a function equivalent to `y -> issetequal(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issetequal)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issetequal(a) = Fix2(issetequal, a)
+
 ## set disjoint comparison
 """
     isdisjoint(a, b) -> Bool
@@ -487,6 +588,19 @@ function isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T
     end
 end
 
+"""
+    isdisjoint(x)
+
+Create a function that compares its argument to `x` using [`isdisjoint`](@ref), i.e.
+a function equivalent to `y -> isdisjoint(y, x)`.
+The returned function is of type `Base.Fix2{typeof(isdisjoint)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+isdisjoint(a) = Fix2(isdisjoint, a)
+
 _overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T = invoke(isdisjoint, Tuple{Any,Any}, a, b)
 
 function _overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T<:Integer
diff --git a/base/accumulate.jl b/base/accumulate.jl
index eeb9759e125c7..2748a4da481fa 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -33,7 +33,7 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
 end
 
 function accumulate_pairwise(op, v::AbstractVector{T}) where T
-    out = similar(v, promote_op(op, T, T))
+    out = similar(v, _accumulate_promote_op(op, v))
     return accumulate_pairwise!(op, out, v)
 end
 
@@ -42,6 +42,8 @@ end
     cumsum!(B, A; dims::Integer)
 
 Cumulative sum of `A` along the dimension `dims`, storing the result in `B`. See also [`cumsum`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumsum!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(add_sum, B, A, dims=dims)
@@ -109,8 +111,8 @@ julia> cumsum(a, dims=2)
     widening happens and integer overflow results in `Int8[100, -128]`.
 """
 function cumsum(A::AbstractArray{T}; dims::Integer) where T
-    out = similar(A, promote_op(add_sum, T, T))
-    cumsum!(out, A, dims=dims)
+    out = similar(A, _accumulate_promote_op(add_sum, A))
+    return cumsum!(out, A, dims=dims)
 end
 
 """
@@ -150,6 +152,8 @@ cumsum(itr) = accumulate(add_sum, itr)
 
 Cumulative product of `A` along the dimension `dims`, storing the result in `B`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(mul_prod, B, A, dims=dims)
@@ -159,6 +163,8 @@ cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
 
 Cumulative product of a vector `x`, storing the result in `y`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(y::AbstractVector, x::AbstractVector) = cumprod!(y, x, dims=1)
 
@@ -274,14 +280,13 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
         # This branch takes care of the cases not handled by `_accumulate!`.
         return collect(Iterators.accumulate(op, A; kw...))
     end
+
     nt = values(kw)
-    if isempty(kw)
-        out = similar(A, promote_op(op, eltype(A), eltype(A)))
-    elseif keys(nt) === (:init,)
-        out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
-    else
+    if !(isempty(kw) || keys(nt) === (:init,))
         throw(ArgumentError("accumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
     end
+
+    out = similar(A, _accumulate_promote_op(op, A; kw...))
     accumulate!(op, out, A; dims=dims, kw...)
 end
 
@@ -301,6 +306,8 @@ Cumulative operation `op` on `A` along the dimension `dims`, storing the result
 Providing `dims` is optional for vectors.  If the keyword argument `init` is given, its
 value is used to instantiate the accumulation.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`accumulate`](@ref), [`cumsum!`](@ref), [`cumprod!`](@ref).
 
 # Examples
@@ -434,3 +441,42 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
     end
     return B
 end
+
+# Internal function used to identify the widest possible eltype required for accumulate results
+function _accumulate_promote_op(op, v; init=nothing)
+    # Nested mock functions used to infer the widest necessary eltype
+    # NOTE: We are just passing this to promote_op for inference and should never be run.
+
+    # Initialization function used to identify initial type of `r`
+    # NOTE: reduce_first may have a different return type than calling `op`
+    function f(op, v, init)
+        val = first(something(iterate(v)))
+        return isnothing(init) ? Base.reduce_first(op, val) : op(init, val)
+    end
+
+    # Infer iteration type independent of the initialization type
+    # If `op` fails then this will return `Union{}` as `k` will be undefined.
+    # Returning `Union{}` is desirable as it won't break the `promote_type` call in the
+    # outer scope below
+    function g(op, v, r)
+        local k
+        for val in v
+            k = op(r, val)
+        end
+        return k
+    end
+
+    # Finally loop again with the two types promoted together
+    # If the `op` fails and reduce_first was used then then this will still just
+    # return the initial type, allowing the `op` to error during execution.
+    function h(op, v, r)
+        for val in v
+            r = op(r, val)
+        end
+        return r
+    end
+
+    R = Base.promote_op(f, typeof(op), typeof(v), typeof(init))
+    K = Base.promote_op(g, typeof(op), typeof(v), R)
+    return Base.promote_op(h, typeof(op), typeof(v), Base.promote_type(R, K))
+end
diff --git a/base/anyall.jl b/base/anyall.jl
new file mode 100644
index 0000000000000..e51515bb3187d
--- /dev/null
+++ b/base/anyall.jl
@@ -0,0 +1,231 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## all & any
+
+"""
+    any(itr) -> Bool
+
+Test whether any elements of a boolean collection are `true`, returning `true` as
+soon as the first `true` value in `itr` is encountered (short-circuiting). To
+short-circuit on `false`, use [`all`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `false` (or equivalently, if the input contains no `true` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), [`||`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [true,false,false,true]
+4-element Vector{Bool}:
+ 1
+ 0
+ 0
+ 1
+
+julia> any(a)
+true
+
+julia> any((println(i); v) for (i, v) in enumerate(a))
+1
+true
+
+julia> any([missing, true])
+true
+
+julia> any([false, missing])
+missing
+```
+"""
+any(itr) = any(identity, itr)
+
+"""
+    all(itr) -> Bool
+
+Test whether all elements of a boolean collection are `true`, returning `false` as
+soon as the first `false` value in `itr` is encountered (short-circuiting). To
+short-circuit on `true`, use [`any`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `true` (or equivalently, if the input contains no `false` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), [`&&`](@ref), [`allunique`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [true,false,false,true]
+4-element Vector{Bool}:
+ 1
+ 0
+ 0
+ 1
+
+julia> all(a)
+false
+
+julia> all((println(i); v) for (i, v) in enumerate(a))
+1
+2
+false
+
+julia> all([missing, false])
+false
+
+julia> all([true, missing])
+missing
+```
+"""
+all(itr) = all(identity, itr)
+
+"""
+    any(p, itr) -> Bool
+
+Determine whether predicate `p` returns `true` for any elements of `itr`, returning
+`true` as soon as the first item in `itr` for which `p` returns `true` is encountered
+(short-circuiting). To short-circuit on `false`, use [`all`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `false` (or equivalently, if the input contains no `true` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+# Examples
+```jldoctest
+julia> any(i->(4<=i<=6), [3,5,7])
+true
+
+julia> any(i -> (println(i); i > 3), 1:10)
+1
+2
+3
+4
+true
+
+julia> any(i -> i > 0, [1, missing])
+true
+
+julia> any(i -> i > 0, [-1, missing])
+missing
+
+julia> any(i -> i > 0, [-1, 0])
+false
+```
+"""
+any(f, itr) = _any(f, itr, :)
+
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _any(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v && return true
+            end
+        end
+        return anymissing ? missing : false
+    end
+end
+
+# Specialized versions of any(f, ::Tuple)
+# We fall back to the for loop implementation all elements have the same type or
+# if the tuple is too large.
+function any(f, itr::Tuple)
+    if itr isa NTuple || length(itr) > 32
+        return _any(f, itr, :)
+    end
+    _any_tuple(f, false, itr...)
+end
+
+@inline function _any_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _any_tuple(f, anymissing, rest...)
+end
+@inline _any_tuple(f, anymissing) = anymissing ? missing : false
+
+"""
+    all(p, itr) -> Bool
+
+Determine whether predicate `p` returns `true` for all elements of `itr`, returning
+`false` as soon as the first item in `itr` for which `p` returns `false` is encountered
+(short-circuiting). To short-circuit on `true`, use [`any`](@ref).
+
+If the input contains [`missing`](@ref) values, return `missing` if all non-missing
+values are `true` (or equivalently, if the input contains no `false` value), following
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
+
+# Examples
+```jldoctest
+julia> all(i->(4<=i<=6), [4,5,6])
+true
+
+julia> all(i -> (println(i); i < 3), 1:10)
+1
+2
+3
+false
+
+julia> all(i -> i > 0, [1, missing])
+missing
+
+julia> all(i -> i > 0, [-1, missing])
+false
+
+julia> all(i -> i > 0, [1, 2])
+true
+```
+"""
+all(f, itr) = _all(f, itr, :)
+
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _all(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v || return false
+            end
+        end
+        return anymissing ? missing : true
+    end
+end
+
+# Specialized versions of all(f, ::Tuple),
+# This is similar to any(f, ::Tuple) defined above.
+function all(f, itr::Tuple)
+    if itr isa NTuple || length(itr) > 32
+        return _all(f, itr, :)
+    end
+    _all_tuple(f, false, itr...)
+end
+
+@inline function _all_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    # this syntax allows throwing a TypeError for non-Bool, for consistency with any
+    elseif v
+        nothing
+    else
+        return false
+    end
+    return _all_tuple(f, anymissing, rest...)
+end
+@inline _all_tuple(f, anymissing) = anymissing ? missing : true
+
+all(::Tuple{Missing}) = missing
diff --git a/base/array.jl b/base/array.jl
index 3a12b38c5bc26..aafcfc182124b 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -120,39 +120,37 @@ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}}
 
 ## Basic functions ##
 
-using Core: arraysize, arrayset, const_arrayref
-
 """
     @_safeindex
 
 This internal macro converts:
-- `getindex(xs::Tuple, )` -> `__inbounds_getindex(args...)`
-- `setindex!(xs::Vector, args...)` -> `__inbounds_setindex!(xs, args...)`
+- `getindex(xs::Tuple, i::Int)` -> `__safe_getindex(xs, i)`
+- `setindex!(xs::Vector{T}, x, i::Int)` -> `__safe_setindex!(xs, x, i)`
 to tell the compiler that indexing operations within the applied expression are always
 inbounds and do not need to taint `:consistent` and `:nothrow`.
 """
 macro _safeindex(ex)
-    return esc(_safeindex(__module__, ex))
+    return esc(_safeindex(ex))
 end
-function _safeindex(__module__, ex)
+function _safeindex(ex)
     isa(ex, Expr) || return ex
     if ex.head === :(=)
-        lhs = arrayref(true, ex.args, 1)
+        lhs = ex.args[1]
         if isa(lhs, Expr) && lhs.head === :ref # xs[i] = x
-            rhs = arrayref(true, ex.args, 2)
-            xs = arrayref(true, lhs.args, 1)
+            rhs = ex.args[2]
+            xs = lhs.args[1]
             args = Vector{Any}(undef, length(lhs.args)-1)
             for i = 2:length(lhs.args)
-                arrayset(true, args, _safeindex(__module__, arrayref(true, lhs.args, i)), i-1)
+                args[i-1] = _safeindex(lhs.args[i])
             end
-            return Expr(:call, GlobalRef(__module__, :__inbounds_setindex!), xs, _safeindex(__module__, rhs), args...)
+            return Expr(:call, GlobalRef(@__MODULE__, :__safe_setindex!), xs, _safeindex(rhs), args...)
         end
     elseif ex.head === :ref # xs[i]
-        return Expr(:call, GlobalRef(__module__, :__inbounds_getindex), ex.args...)
+        return Expr(:call, GlobalRef(@__MODULE__, :__safe_getindex), ex.args...)
     end
     args = Vector{Any}(undef, length(ex.args))
     for i = 1:length(ex.args)
-        arrayset(true, args, _safeindex(__module__, arrayref(true, ex.args, i)), i)
+        args[i] = _safeindex(ex.args[i])
     end
     return Expr(ex.head, args...)
 end
@@ -187,12 +185,15 @@ function vect(X...)
     return T[X...]
 end
 
-size(a::Array, d::Integer) = arraysize(a, d isa Int ? d : convert(Int, d))
-size(a::Vector) = (arraysize(a,1),)
-size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
+size(a::Array, d::Integer) = size(a, Int(d)::Int)
+function size(a::Array, d::Int)
+    d < 1 && error("arraysize: dimension out of range")
+    sz = getfield(a, :size)
+    return d > length(sz) ? 1 : getfield(sz, d, false) # @inbounds
+end
+size(a::Array) = getfield(a, :size)
 
-asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
+asize_from(a::Array, n) = n > ndims(a) ? () : (size(a,n), asize_from(a, n+1)...)
 
 allocatedinline(@nospecialize T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
 
@@ -210,49 +211,18 @@ julia> Base.isbitsunion(Union{Float64, String})
 false
 ```
 """
-isbitsunion(u::Union) = allocatedinline(u)
-isbitsunion(x) = false
+isbitsunion(u::Type) = u isa Union && allocatedinline(u)
 
-function _unsetindex!(A::Array{T}, i::Int) where {T}
+function _unsetindex!(A::Array, i::Int)
     @inline
     @boundscheck checkbounds(A, i)
-    t = @_gc_preserve_begin A
-    p = Ptr{Ptr{Cvoid}}(pointer(A, i))
-    if !allocatedinline(T)
-        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
-    elseif T isa DataType
-        if !datatype_pointerfree(T)
-            for j = 1:Core.sizeof(Ptr{Cvoid}):Core.sizeof(T)
-                Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
-            end
-        end
-    end
-    @_gc_preserve_end t
+    @inbounds _unsetindex!(memoryref(A.ref, i))
     return A
 end
 
 
-"""
-    Base.bitsunionsize(U::Union) -> Int
-
-For a `Union` of [`isbitstype`](@ref) types, return the size of the largest type; assumes `Base.isbitsunion(U) == true`.
-
-# Examples
-```jldoctest
-julia> Base.bitsunionsize(Union{Float64, UInt8})
-8
-
-julia> Base.bitsunionsize(Union{Float64, UInt8, Int128})
-16
-```
-"""
-function bitsunionsize(u::Union)
-    isinline, sz, _ = uniontype_layout(u)
-    @assert isinline
-    return sz
-end
-
-elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
+# TODO: deprecate this (aligned_sizeof and/or elsize and/or sizeof(Some{T}) are more correct)
+elsize(::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
 function elsize(::Type{Ptr{T}}) where T
     # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T),
     # which includes Any and most concrete datatypes
@@ -261,15 +231,25 @@ function elsize(::Type{Ptr{T}}) where T
     return LLT_ALIGN(Core.sizeof(T), datatype_alignment(T))
 end
 elsize(::Type{Union{}}, slurp...) = 0
-sizeof(a::Array) = Core.sizeof(a)
+
+sizeof(a::Array) = length(a) * elsize(typeof(a)) # n.b. this ignores bitsunion bytes, as a historical fact
 
 function isassigned(a::Array, i::Int...)
     @inline
+    @_noub_if_noinbounds_meta
     @boundscheck checkbounds(Bool, a, i...) || return false
-    ii = (_sub2ind(size(a), i...) % UInt) - 1
-    ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
+    ii = _sub2ind(size(a), i...)
+    return @inbounds isassigned(memoryrefnew(a.ref, ii, false))
+end
+
+function isassigned(a::Vector, i::Int) # slight compiler simplification for the most common case
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(Bool, a, i) || return false
+    return @inbounds isassigned(memoryrefnew(a.ref, i, false))
 end
 
+
 ## copy ##
 
 """
@@ -289,92 +269,48 @@ function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T
     return dest
 end
 
-
-function _unsafe_copyto!(dest, doffs, src, soffs, n)
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    @inbounds if destp < srcp || destp > srcp + n
-        for i = 1:n
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    else
-        for i = n:-1:1
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    end
-    return dest
-end
-
 """
-    unsafe_copyto!(dest::Array, do, src::Array, so, N)
+    unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
 
-Copy `N` elements from a source array to a destination, starting at the linear index `so` in the
-source and `do` in the destination (1-indexed).
+Copy `n` elements from a source array to a destination, starting at the linear index `soffs` in the
+source and `doffs` in the destination (1-indexed).
 
 The `unsafe` prefix on this function indicates that no validation is performed to ensure
-that N is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
+that n is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
 the same manner as C.
 """
-function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T
-    t1 = @_gc_preserve_begin dest
-    t2 = @_gc_preserve_begin src
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    if !allocatedinline(T)
-        ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int),
-              dest, destp, src, srcp, n)
-    elseif isbitstype(T)
-        memmove(destp, srcp, n * aligned_sizeof(T))
-    elseif isbitsunion(T)
-        memmove(destp, srcp, n * aligned_sizeof(T))
-        # copy selector bytes
-        memmove(
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1,
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1,
-              n)
-    else
-        _unsafe_copyto!(dest, doffs, src, soffs, n)
-    end
-    @_gc_preserve_end t2
-    @_gc_preserve_end t1
+function unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
+    n == 0 && return dest
+    unsafe_copyto!(memoryref(dest.ref, doffs), memoryref(src.ref, soffs), n)
     return dest
 end
 
-unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n) =
-    _unsafe_copyto!(dest, doffs, src, soffs, n)
-
 """
-    copyto!(dest, do, src, so, N)
+    copyto!(dest, doffs, src, soffs, n)
 
-Copy `N` elements from collection `src` starting at the linear index `so`, to array `dest` starting at
-the index `do`. Return `dest`.
+Copy `n` elements from collection `src` starting at the linear index `soffs`, to array `dest` starting at
+the index `doffs`. Return `dest`.
 """
-function copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Array, doffs::Integer, src::Memory, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Memory, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
 
 # this is only needed to avoid possible ambiguities with methods added in some packages
-function copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where T
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where {T} = _copyto_impl!(dest, doffs, src, soffs, n)
 
-function _copyto_impl!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
+function _copyto_impl!(dest::Union{Array,Memory}, doffs::Integer, src::Union{Array,Memory}, soffs::Integer, n::Integer)
     n == 0 && return dest
-    n > 0 || _throw_argerror("Number of elements to copy must be nonnegative.")
+    n > 0 || _throw_argerror("Number of elements to copy must be non-negative.")
     @boundscheck checkbounds(dest, doffs:doffs+n-1)
     @boundscheck checkbounds(src, soffs:soffs+n-1)
-    unsafe_copyto!(dest, doffs, src, soffs, n)
+    @inbounds let dest = memoryref(dest isa Array ? getfield(dest, :ref) : dest, doffs),
+                  src = memoryref(src isa Array ? getfield(src, :ref) : src, soffs)
+        unsafe_copyto!(dest, src, n)
+    end
     return dest
 end
 
+
 # Outlining this because otherwise a catastrophic inference slowdown
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
@@ -388,9 +324,13 @@ copyto!(dest::Array{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, leng
 # N.B: The generic definition in multidimensional.jl covers, this, this is just here
 # for bootstrapping purposes.
 function fill!(dest::Array{T}, x) where T
-    xT = x isa T ? x : convert(T, x)::T
+    @inline
+    x = x isa T ? x : convert(T, x)::T
+    return _fill!(dest, x)
+end
+function _fill!(dest::Array{T}, x::T) where T
     for i in eachindex(dest)
-        @inbounds dest[i] = xT
+        @inbounds dest[i] = x
     end
     return dest
 end
@@ -406,7 +346,26 @@ See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref), [`deepcopy`](@ref).
 """
 copy
 
-copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+@eval function copy(a::Array)
+    # `copy` only throws when the size exceeds the max allocation size,
+    # but since we're copying an existing array, we're guaranteed that this will not happen.
+    @_nothrow_meta
+    ref = a.ref
+    newmem = typeof(ref.mem)(undef, length(a))
+    @inbounds unsafe_copyto!(memoryref(newmem), ref, length(a))
+    return $(Expr(:new, :(typeof(a)), :(memoryref(newmem)), :(a.size)))
+end
+
+# a mutating version of copyto! that results in dst aliasing src afterwards
+function _take!(dst::Array{T,N}, src::Array{T,N}) where {T,N}
+    if getfield(dst, :ref) !== getfield(src, :ref)
+        setfield!(dst, :ref, getfield(src, :ref))
+    end
+    if getfield(dst, :size) !== getfield(src, :size)
+        setfield!(dst, :size, getfield(src, :size))
+    end
+    return dst
+end
 
 ## Constructors ##
 
@@ -469,9 +428,10 @@ end
 getindex(::Type{Any}) = Vector{Any}()
 
 function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer)
-    t = @_gc_preserve_begin a
-    p = unsafe_convert(Ptr{Cvoid}, a)
-    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a))
+    ref = a.ref
+    t = @_gc_preserve_begin ref
+    p = unsafe_convert(Ptr{Cvoid}, ref)
+    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a) % UInt)
     @_gc_preserve_end t
     return a
 end
@@ -580,6 +540,7 @@ function fill end
 fill(v, dims::DimOrInd...) = fill(v, dims)
 fill(v, dims::NTuple{N, Union{Integer, OneTo}}) where {N} = fill(v, map(to_dim, dims))
 fill(v, dims::NTuple{N, Integer}) where {N} = (a=Array{typeof(v),N}(undef, dims); fill!(a, v); a)
+fill(v, dims::NTuple{N, DimOrInd}) where {N} = (a=similar(Array{typeof(v),N}, dims); fill!(a, v); a)
 fill(v, dims::Tuple{}) = (a=Array{typeof(v),0}(undef, dims); fill!(a, v); a)
 
 """
@@ -640,24 +601,14 @@ for (fname, felt) in ((:zeros, :zero), (:ones, :one))
             fill!(a, $felt(T))
             return a
         end
+        function $fname(::Type{T}, dims::NTuple{N, DimOrInd}) where {T,N}
+            a = similar(Array{T,N}, dims)
+            fill!(a, $felt(T))
+            return a
+        end
     end
 end
 
-function _one(unit::T, x::AbstractMatrix) where T
-    require_one_based_indexing(x)
-    m,n = size(x)
-    m==n || throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
-    # Matrix{T}(I, m, m)
-    I = zeros(T, m, m)
-    for i in 1:m
-        I[i,i] = unit
-    end
-    I
-end
-
-one(x::AbstractMatrix{T}) where {T} = _one(one(T), x)
-oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
-
 ## Conversions ##
 
 convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)::T
@@ -666,11 +617,9 @@ promote_rule(a::Type{Array{T,n}}, b::Type{Array{S,n}}) where {T,n,S} = el_same(p
 
 ## Constructors ##
 
-if nameof(@__MODULE__) === :Base  # avoid method overwrite
 # constructors should make copies
 Array{T,N}(x::AbstractArray{S,N})         where {T,N,S} = copyto_axcheck!(Array{T,N}(undef, size(x)), x)
 AbstractArray{T,N}(A::AbstractArray{S,N}) where {T,N,S} = copyto_axcheck!(similar(A,T), A)
-end
 
 ## copying iterators to containers
 
@@ -725,33 +674,55 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it
 
 
 """
-    collect(collection)
+    collect(iterator)
 
 Return an `Array` of all items in a collection or iterator. For dictionaries, returns
-`Vector{Pair{KeyType, ValType}}`. If the argument is array-like or is an iterator with the
-[`HasShape`](@ref IteratorSize) trait, the result will have the same shape
+a `Vector` of `key=>value` [Pair](@ref Pair)s. If the argument is array-like or is an iterator
+with the [`HasShape`](@ref IteratorSize) trait, the result will have the same shape
 and number of dimensions as the argument.
 
-Used by comprehensions to turn a generator into an `Array`.
+Used by [comprehensions](@ref man-comprehensions) to turn a [generator expression](@ref man-generators)
+into an `Array`. Thus, *on generators*, the square-brackets notation may be used instead of calling `collect`,
+see second example.
+
+The element type of the returned array is based on the types of the values collected. However, if the
+iterator is empty then the element type of the returned (empty) array is determined by type inference.
 
 # Examples
+
+Collect items from a `UnitRange{Int64}` collection:
+
 ```jldoctest
-julia> collect(1:2:13)
-7-element Vector{Int64}:
-  1
-  3
-  5
-  7
-  9
- 11
- 13
+julia> collect(1:3)
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
 
-julia> [x^2 for x in 1:8 if isodd(x)]
-4-element Vector{Int64}:
-  1
-  9
- 25
- 49
+Collect items from a generator (same output as `[x^2 for x in 1:3]`):
+
+```jldoctest
+julia> collect(x^2 for x in 1:3)
+3-element Vector{Int64}:
+ 1
+ 4
+ 9
+```
+
+Collecting an empty iterator where the result type depends on type inference:
+
+```jldoctest
+julia> [rand(Bool) ? 1 : missing for _ in []]
+Union{Missing, Int64}[]
+```
+
+When the iterator is non-empty, the result type depends only on values:
+
+```julia-repl
+julia> [rand(Bool) ? 1 : missing for _ in [""]]
+1-element Vector{Int64}:
+ 1
 ```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
@@ -797,28 +768,15 @@ end
 # gets inlined into the caller before recursion detection
 # gets a chance to see it, so that recursive calls to the caller
 # don't trigger the inference limiter
-if isdefined(Core, :Compiler)
-    macro default_eltype(itr)
-        I = esc(itr)
-        return quote
-            if $I isa Generator && ($I).f isa Type
-                T = ($I).f
-            else
-                T = Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
-            end
-            promote_typejoin_union(T)
-        end
-    end
-else
-    macro default_eltype(itr)
-        I = esc(itr)
-        return quote
-            if $I isa Generator && ($I).f isa Type
-                promote_typejoin_union($I.f)
-            else
-                Any
-            end
+macro default_eltype(itr)
+    I = esc(itr)
+    return quote
+        if $I isa Generator && ($I).f isa Type
+            T = ($I).f
+        else
+            T = Base._return_type(_iterator_upper_bound, Tuple{typeof($I)})
         end
+        promote_typejoin_union(T)
     end
 end
 
@@ -940,7 +898,7 @@ end
 
 ## Iteration ##
 
-iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
+iterate(A::Array, i=1) = (@inline; (i - 1)%UInt < length(A)%UInt ? (@inbounds A[i], i + 1) : nothing)
 
 ## Indexing: getindex ##
 
@@ -965,6 +923,12 @@ julia> getindex(A, "a")
 """
 function getindex end
 
+function getindex(A::Array, i1::Int, i2::Int, I::Int...)
+    @inline
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    return @inbounds A[_to_linear_index(A, i1, i2, I...)]
+end
+
 # Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
 function getindex(A::Array, I::AbstractUnitRange{<:Integer})
     @inline
@@ -1016,27 +980,48 @@ Dict{String, Int64} with 2 entries:
 """
 function setindex! end
 
-@eval setindex!(A::Array{T}, x, i1::Int) where {T} =
-    arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1)
-@eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1, i2, I...))
+function setindex!(A::Array{T}, x, i::Int) where {T}
+    @_propagate_inbounds_meta
+    x = x isa T ? x : convert(T, x)::T
+    return _setindex!(A, x, i)
+end
+function _setindex!(A::Array{T}, x::T, i::Int) where {T}
+    @_noub_if_noinbounds_meta
+    @boundscheck (i - 1)%UInt < length(A)%UInt || throw_boundserror(A, (i,))
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false)
+    return A
+end
+function setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @_propagate_inbounds_meta
+    x = x isa T ? x : convert(T, x)::T
+    return _setindex!(A, x, i1, i2, I...)
+end
+function _setindex!(A::Array{T}, x::T, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    memoryrefset!(memoryrefnew(A.ref, _to_linear_index(A, i1, i2, I...), false), x, :not_atomic, false)
+    return A
+end
 
-__inbounds_setindex!(A::Array{T}, x, i1::Int) where {T} =
-    arrayset(false, A, convert(T,x)::T, i1)
-__inbounds_setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset(false, A, convert(T,x)::T, i1, i2, I...))
+__safe_setindex!(A::Vector{Any}, @nospecialize(x), i::Int) = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x::T, i::Int) where {T} = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryrefnew(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x,    i::Int) where {T} = (@inline;
+    __safe_setindex!(A, convert(T, x)::T, i))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
     @_propagate_inbounds_meta
     @boundscheck setindex_shape_check(X, length(I))
+    @boundscheck checkbounds(A, I)
     require_one_based_indexing(X)
     X′ = unalias(A, X)
     I′ = unalias(A, I)
     count = 1
     for i in I′
-        @inbounds x = X′[count]
-        A[i] = x
+        @inbounds A[i] = X′[count]
         count += 1
     end
     return A
@@ -1063,24 +1048,205 @@ function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
     return A
 end
 
-# efficiently grow an array
+# Pick new memory size for efficiently growing an array
+# TODO: This should know about the size of our GC pools
+# Specifically we are wasting ~10% of memory for small arrays
+# by not picking memory sizes that max out a GC pool
+function overallocation(maxsize)
+    maxsize < 8 && return 8;
+    # compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    # for small n, we grow faster than O(n)
+    # for large n, we grow at O(n/8)
+    # and as we reach O(memory) for memory>>1MB,
+    # this means we end by adding about 10% of memory each time
+    exp2 = sizeof(maxsize) * 8 - Core.Intrinsics.ctlz_int(maxsize)
+    maxsize += (1 << div(exp2 * 7, 8)) * 4 + div(maxsize, 8)
+    return maxsize
+end
+
+array_new_memory(mem::Memory, newlen::Int) = typeof(mem)(undef, newlen) # when implemented, this should attempt to first expand mem
+
+function _growbeg!(a::Vector, delta::Integer)
+    @_noub_meta
+    delta = Int(delta)
+    delta == 0 && return # avoid attempting to index off the end
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    mem = ref.mem
+    len = length(a)
+    offset = memoryrefoffset(ref)
+    newlen = len + delta
+    setfield!(a, :size, (newlen,))
+    # if offset is far enough advanced to fit data in existing memory without copying
+    if delta <= offset - 1
+        setfield!(a, :ref, @inbounds memoryref(ref, 1 - delta))
+    else
+        @noinline (function()
+        @_terminates_locally_meta
+        memlen = length(mem)
+        if offset + len - 1 > memlen || offset < 1
+            throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+        end
+        # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+        # the +1 is because I didn't want to have an off by 1 error.
+        newmemlen = max(overallocation(len), len + 2 * delta + 1)
+        newoffset = div(newmemlen - newlen, 2) + 1
+        # If there is extra data after the end of the array we can use that space so long as there is enough
+        # space at the end that there won't be quadratic behavior with a mix of growth from both ends.
+        # Specifically, we want to ensure that we will only do this operation once before
+        # increasing the size of the array, and that we leave enough space at both the beginning and the end.
+        if newoffset + newlen < memlen
+            newoffset = div(memlen - newlen, 2) + 1
+            newmem = mem
+            unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
+            for j in offset:newoffset+delta-1
+                @inbounds _unsetindex!(mem, j)
+            end
+        else
+            newmem = array_new_memory(mem, newmemlen)
+            unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
+        end
+        if ref !== a.ref
+            @noinline throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+        end
+        setfield!(a, :ref, @inbounds memoryref(newmem, newoffset))
+        end)()
+    end
+    return
+end
 
-_growbeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), a, delta)
-_growend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), a, delta)
-_growat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_grow_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+function _growend!(a::Vector, delta::Integer)
+    @_noub_meta
+    delta = Int(delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    len = length(a)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    setfield!(a, :size, (newlen,))
+    newmemlen = offset + newlen - 1
+    if memlen < newmemlen
+        @noinline (function()
+        if offset + len - 1 > memlen || offset < 1
+            throw(ConcurrencyViolationError("Vector has invalid state. Don't modify internal fields incorrectly, or resize without correct locks"))
+        end
 
-# efficiently delete part of an array
+        if offset - 1 > div(5 * newlen, 4)
+            # If the offset is far enough that we can copy without resizing
+            # while maintaining proportional spacing on both ends of the array
+            # note that this branch prevents infinite growth when doing combinations
+            # of push! and popfirst! (i.e. when using a Vector as a queue)
+            newmem = mem
+            newoffset = div(newlen, 8) + 1
+        else
+            # grow either by our computed overallocation factor
+            # or exactly the requested size, whichever is larger
+            # TODO we should possibly increase the offset if the current offset is nonzero.
+            newmemlen2 = max(overallocation(memlen), newmemlen)
+            newmem = array_new_memory(mem, newmemlen2)
+            newoffset = offset
+        end
+        newref = @inbounds memoryref(newmem, newoffset)
+        unsafe_copyto!(newref, ref, len)
+        if ref !== a.ref
+            @noinline throw(ConcurrencyViolationError("Vector can not be resized concurrently"))
+        end
+        setfield!(a, :ref, newref)
+        end)()
+    end
+    return
+end
 
-_deletebeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_beg, Cvoid, (Any, UInt), a, delta)
-_deleteend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_end, Cvoid, (Any, UInt), a, delta)
-_deleteat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_del_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+function _growat!(a::Vector, i::Integer, delta::Integer)
+    @_terminates_globally_noub_meta
+    delta = Int(delta)
+    i = Int(i)
+    i == 1 && return _growbeg!(a, delta)
+    len = length(a)
+    i == len + 1 && return _growend!(a, delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    1 < i <= len || throw(BoundsError(a, i))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    setfield!(a, :size, (newlen,))
+    newmemlen = offset + newlen - 1
+
+    # which side would we rather grow into?
+    prefer_start = i <= div(len, 2)
+    # if offset is far enough advanced to fit data in beginning of the memory
+    if prefer_start && delta <= offset - 1
+        newref = @inbounds memoryref(mem, offset - delta)
+        unsafe_copyto!(newref, ref, i)
+        setfield!(a, :ref, newref)
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    elseif !prefer_start && memlen >= newmemlen
+        unsafe_copyto!(mem, offset - 1 + delta + i, mem, offset - 1 + i, len - i + 1)
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    else
+        # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+        # the +1 is because I didn't want to have an off by 1 error.
+        newmemlen = max(overallocation(memlen), len+2*delta+1)
+        newoffset = (newmemlen - newlen) ÷ 2 + 1
+        newmem = array_new_memory(mem, newmemlen)
+        newref = @inbounds memoryref(newmem, newoffset)
+        unsafe_copyto!(newref, ref, i-1)
+        unsafe_copyto!(newmem, newoffset + delta + i - 1, mem, offset + i - 1, len - i + 1)
+        setfield!(a, :ref, newref)
+    end
+end
 
+# efficiently delete part of an array
+function _deletebeg!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    0 <= delta <= len || throw(ArgumentError("_deletebeg! requires delta in 0:length(a)"))
+    for i in 1:delta
+        @inbounds _unsetindex!(a, i)
+    end
+    newlen = len - delta
+    if newlen != 0 # if newlen==0 we could accidentally index past the memory
+        newref = @inbounds memoryref(a.ref, delta + 1)
+        setfield!(a, :ref, newref)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
+function _deleteend!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    0 <= delta <= len || throw(ArgumentError("_deleteend! requires delta in 0:length(a)"))
+    newlen = len - delta
+    for i in newlen+1:len
+        @inbounds _unsetindex!(a, i)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
+function _deleteat!(a::Vector, i::Integer, delta::Integer)
+    i = Int(i)
+    len = length(a)
+    0 <= delta || throw(ArgumentError("_deleteat! requires delta >= 0"))
+    1 <= i <= len || throw(BoundsError(a, i))
+    i + delta <= len + 1 || throw(BoundsError(a, i + delta - 1))
+    newa = a
+    if 2*i + delta <= len
+        unsafe_copyto!(newa, 1 + delta, a, 1, i - 1)
+        _deletebeg!(a, delta)
+    else
+        unsafe_copyto!(newa, i, a, i + delta, len + 1 - delta - i)
+        _deleteend!(a, delta)
+    end
+    return
+end
 ## Dequeue functionality ##
 
 """
@@ -1112,10 +1278,16 @@ See also [`pushfirst!`](@ref).
 function push! end
 
 function push!(a::Vector{T}, item) where T
+    @inline
     # convert first so we don't grow the array if the assignment won't work
-    itemT = item isa T ? item : convert(T, item)::T
+    # and also to avoid a dynamic dynamic dispatch in the common case that
+    # `item` is poorly-typed and `a` is well-typed
+    item = item isa T ? item : convert(T, item)::T
+    return _push!(a, item)
+end
+function _push!(a::Vector{T}, item::T) where T
     _growend!(a, 1)
-    @_safeindex a[length(a)] = itemT
+    @_safeindex a[length(a)] = item
     return a
 end
 
@@ -1174,30 +1346,24 @@ and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
 function append! end
 
-function append!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function append!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = length(items)
     _growend!(a, n)
-    copyto!(a, length(a)-n+1, items, first(itemindices), n)
+    copyto!(a, length(a)-n+1, items, firstindex(items), n)
     return a
 end
 
 append!(a::AbstractVector, iter) = _append!(a, IteratorSize(iter), iter)
 push!(a::AbstractVector, iter...) = append!(a, iter)
-
-append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
+append!(a::AbstractVector, iter...) = (for v in iter; append!(a, v); end; return a)
 
 function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
-    @_terminates_locally_meta
-    n = length(a)
+    n = Int(length(iter))::Int
     i = lastindex(a)
-    resize!(a, n+Int(length(iter))::Int)
-    for (i, item) in zip(i+1:lastindex(a), iter)
-        if isa(a, Vector) # give better effects for builtin vectors
-            @_safeindex a[i] = item
-        else
-            a[i] = item
-        end
+    sizehint!(a, length(a) + n; shrink=false)
+    for item in iter
+        push!(a, item)
     end
     a
 end
@@ -1239,32 +1405,31 @@ julia> prepend!([6], [1, 2], [3, 4, 5])
 """
 function prepend! end
 
-function prepend!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function prepend!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = length(items)
     _growbeg!(a, n)
-    if a === items
-        copyto!(a, 1, items, n+1, n)
-    else
-        copyto!(a, 1, items, first(itemindices), n)
-    end
+    # in case of aliasing, the _growbeg might have shifted our data, so copy
+    # just the last n elements instead of all of them from the first
+    copyto!(a, 1, items, lastindex(items)-n+1, n)
     return a
 end
 
-prepend!(a::Vector, iter) = _prepend!(a, IteratorSize(iter), iter)
-pushfirst!(a::Vector, iter...) = prepend!(a, iter)
-
-prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
+prepend!(a::AbstractVector, iter) = _prepend!(a, IteratorSize(iter), iter)
+pushfirst!(a::AbstractVector, iter...) = prepend!(a, iter)
+prepend!(a::AbstractVector, iter...) = (for v = reverse(iter); prepend!(a, v); end; return a)
 
 function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
     @_terminates_locally_meta
     require_one_based_indexing(a)
-    n = length(iter)
-    _growbeg!(a, n)
-    i = 0
+    n = Int(length(iter))::Int
+    sizehint!(a, length(a) + n; first=true, shrink=false)
+    n = 0
     for item in iter
-        @_safeindex a[i += 1] = item
+        n += 1
+        pushfirst!(a, item)
     end
+    reverse!(a, 1, n)
     a
 end
 function _prepend!(a::Vector, ::IteratorSize, iter)
@@ -1321,13 +1486,21 @@ function resize!(a::Vector, nl::Integer)
 end
 
 """
-    sizehint!(s, n) -> s
+    sizehint!(s, n; first::Bool=false, shrink::Bool=true) -> s
 
 Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
 you expect that you're going to have to push a lot of values onto `s`, you can avoid
 the cost of incremental reallocation by doing it once up front; this can improve
 performance.
 
+If `first` is `true`, then any additional space is reserved before the start of the collection.
+This way, subsequent calls to `pushfirst!` (instead of `push!`) may become faster.
+Supplying this keyword may result in an error if the collection is not ordered
+or if `pushfirst!` is not supported for this collection.
+
+If `shrink=true` (the default), the collection's capacity may be reduced if its current
+capacity is greater than `n`.
+
 See also [`resize!`](@ref).
 
 # Notes on the performance model
@@ -1342,14 +1515,54 @@ For types that support `sizehint!`,
    `Base`.
 
 3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation.
+
+!!! compat "Julia 1.11"
+    The `shrink` and `first` arguments were added in Julia 1.11.
 """
 function sizehint! end
 
-function sizehint!(a::Vector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), a, sz)
+function sizehint!(a::Vector, sz::Integer; first::Bool=false, shrink::Bool=true)
+    len = length(a)
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    sz = max(Int(sz), len)
+    inc = sz - len
+    if sz <= memlen
+        # if we don't save at least 1/8th memlen then its not worth it to shrink
+        if !shrink || memlen - sz <= div(memlen, 8)
+            return a
+        end
+        newmem = array_new_memory(mem, sz)
+        if first
+            newref = memoryref(newmem, inc + 1)
+        else
+            newref = memoryref(newmem)
+        end
+        unsafe_copyto!(newref, ref, len)
+        setfield!(a, :ref, newref)
+    elseif first
+        _growbeg!(a, inc)
+        newref = getfield(a, :ref)
+        newref = memoryref(newref, inc + 1)
+        setfield!(a, :size, (len,)) # undo the size change from _growbeg!
+        setfield!(a, :ref, newref) # undo the offset change from _growbeg!
+    else # last
+        _growend!(a, inc)
+        setfield!(a, :size, (len,)) # undo the size change from _growend!
+    end
     a
 end
 
+# Fall-back implementation for non-shrinkable collections
+# avoid defining this the normal way to avoid avoid infinite recursion
+function Core.kwcall(kwargs::NamedTuple{names}, ::typeof(sizehint!), a, sz) where names
+    get(kwargs, :first, false)::Bool
+    get(kwargs, :shrink, true)::Bool
+    isempty(diff_names(names, (:first, :shrink))) || kwerr(kwargs, sizehint!, a, sz)
+    sizehint!(a, sz)
+end
+
 """
     pop!(collection) -> item
 
@@ -1433,6 +1646,7 @@ ERROR: BoundsError: attempt to access 3-element Vector{Int64} at index [4]
 ```
 """
 function popat!(a::Vector, i::Integer)
+    @_propagate_inbounds_meta
     x = a[i]
     _deleteat!(a, i, 1)
     x
@@ -1468,7 +1682,11 @@ julia> pushfirst!([1, 2, 3, 4], 5, 6)
 ```
 """
 function pushfirst!(a::Vector{T}, item) where T
+    @inline
     item = item isa T ? item : convert(T, item)::T
+    return _pushfirst!(a, item)
+end
+function _pushfirst!(a::Vector{T}, item::T) where T
     _growbeg!(a, 1)
     @_safeindex a[1] = item
     return a
@@ -1554,11 +1772,16 @@ julia> insert!(Any[1:6;], 3, "here")
 ```
 """
 function insert!(a::Array{T,1}, i::Integer, item) where T
+    @_propagate_inbounds_meta
+    item = item isa T ? item : convert(T, item)::T
+    return _insert!(a, i, item)
+end
+function _insert!(a::Array{T,1}, i::Integer, item::T) where T
+    @_noub_meta
     # Throw convert error before changing the shape of the array
-    _item = item isa T ? item : convert(T, item)::T
     _growat!(a, i, 1)
-    # _growat! already did bound check
-    @inbounds a[i] = _item
+    # :noub, because _growat! already did bound check
+    @inbounds a[i] = item
     return a
 end
 
@@ -1635,17 +1858,19 @@ struct Nowhere; end
 push!(::Nowhere, _) = nothing
 _growend!(::Nowhere, _) = nothing
 
-@inline function _push_deleted!(dltd, a::Vector, ind)
-    if @inbounds isassigned(a, ind)
-        push!(dltd, @inbounds a[ind])
+function _push_deleted!(dltd, a::Vector, ind)
+    @_propagate_inbounds_meta
+    if isassigned(a, ind)
+        push!(dltd, a[ind])
     else
         _growend!(dltd, 1)
     end
 end
 
-@inline function _copy_item!(a::Vector, p, q)
-    if @inbounds isassigned(a, q)
-        @inbounds a[p] = a[q]
+function _copy_item!(a::Vector, p, q)
+    @_propagate_inbounds_meta
+    if isassigned(a, q)
+        a[p] = a[q]
     else
         _unsetindex!(a, p)
     end
@@ -1657,7 +1882,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
     y === nothing && return a
     (p, s) = y
     checkbounds(a, p)
-    _push_deleted!(dltd, a, p)
+    @inbounds _push_deleted!(dltd, a, p)
     q = p+1
     while true
         y = iterate(inds, s)
@@ -1671,14 +1896,14 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
             end
         end
         while q < i
-            _copy_item!(a, p, q)
+            @inbounds _copy_item!(a, p, q)
             p += 1; q += 1
         end
-        _push_deleted!(dltd, a, i)
+        @inbounds _push_deleted!(dltd, a, i)
         q = i+1
     end
     while q <= n
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += 1; q += 1
     end
     _deleteend!(a, n-p+1)
@@ -1691,7 +1916,7 @@ function deleteat!(a::Vector, inds::AbstractVector{Bool})
     length(inds) == n || throw(BoundsError(a, inds))
     p = 1
     for (q, i) in enumerate(inds)
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += !i
     end
     _deleteend!(a, n-p+1)
@@ -1757,7 +1982,7 @@ function splice!(a::Vector, i::Integer, ins=_default_splice)
     if m == 0
         _deleteat!(a, i, 1)
     elseif m == 1
-        a[i] = ins[1]
+        a[i] = only(ins)
     else
         _growat!(a, i, m-1)
         k = 1
@@ -1781,6 +2006,8 @@ place of the removed items; in this case, `indices` must be a `AbstractUnitRange
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
@@ -1841,10 +2068,12 @@ end
 
 # use memcmp for cmp on byte arrays
 function cmp(a::Array{UInt8,1}, b::Array{UInt8,1})
-    ta = @_gc_preserve_begin a
-    tb = @_gc_preserve_begin b
-    pa = unsafe_convert(Ptr{Cvoid}, a)
-    pb = unsafe_convert(Ptr{Cvoid}, b)
+    aref = a.ref
+    bref = b.ref
+    ta = @_gc_preserve_begin aref
+    tb = @_gc_preserve_begin bref
+    pa = unsafe_convert(Ptr{Cvoid}, aref)
+    pb = unsafe_convert(Ptr{Cvoid}, bref)
     c = memcmp(pa, pb, min(length(a),length(b)))
     @_gc_preserve_end ta
     @_gc_preserve_end tb
@@ -1855,10 +2084,12 @@ const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where
 # use memcmp for == on bit integer types
 function ==(a::Arr, b::Arr) where {Arr <: BitIntegerArray}
     if size(a) == size(b)
-        ta = @_gc_preserve_begin a
-        tb = @_gc_preserve_begin b
-        pa = unsafe_convert(Ptr{Cvoid}, a)
-        pb = unsafe_convert(Ptr{Cvoid}, b)
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
+        pa = unsafe_convert(Ptr{Cvoid}, aref)
+        pb = unsafe_convert(Ptr{Cvoid}, bref)
         c = memcmp(pa, pb, sizeof(eltype(Arr)) * length(a))
         @_gc_preserve_end ta
         @_gc_preserve_end tb
@@ -1871,11 +2102,13 @@ end
 function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1}
     len = length(a)
     if len == length(b)
-        ta = @_gc_preserve_begin a
-        tb = @_gc_preserve_begin b
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
         T = eltype(Arr)
-        pa = unsafe_convert(Ptr{T}, a)
-        pb = unsafe_convert(Ptr{T}, b)
+        pa = unsafe_convert(Ptr{T}, aref)
+        pb = unsafe_convert(Ptr{T}, bref)
         c = memcmp(pa, pb, sizeof(T) * len)
         @_gc_preserve_end ta
         @_gc_preserve_end tb
@@ -2041,18 +2274,6 @@ function vcat(arrays::Vector{T}...) where T
 end
 vcat(A::Vector...) = cat(A...; dims=Val(1)) # more special than SparseArrays's vcat
 
-# disambiguation with LinAlg/special.jl
-# Union{Number,Vector,Matrix} is for LinearAlgebra._DenseConcatGroup
-# VecOrMat{T} is for LinearAlgebra._TypedDenseConcatGroup
-hcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(2))
-hcat(A::VecOrMat{T}...) where {T} = typed_hcat(T, A...)
-vcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(1))
-vcat(A::VecOrMat{T}...) where {T} = typed_vcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::Union{Number,Vector,Matrix}...) =
-    typed_hvcat(promote_eltypeof(xs...), rows, xs...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::VecOrMat{T}...) where {T} =
-    typed_hvcat(T, rows, xs...)
-
 _cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
 ## find ##
@@ -2135,7 +2356,9 @@ findfirst(A::AbstractArray) = findnext(A, first(keys(A)))
     findnext(predicate::Function, A, i)
 
 Find the next index after or including `i` of an element of `A`
-for which `predicate` returns `true`, or `nothing` if not found.
+for which `predicate` returns `true`, or `nothing` if not found. This works for
+Arrays, Strings, and most other collections that support [`getindex`](@ref),
+[`keys(A)`](@ref), and [`nextind`](@ref).
 
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
@@ -2153,6 +2376,9 @@ julia> A = [1 4; 2 2];
 
 julia> findnext(isodd, A, CartesianIndex(1, 1))
 CartesianIndex(1, 1)
+
+julia> findnext(isspace, "a b c", 3)
+4
 ```
 """
 function findnext(testf::Function, A, start)
@@ -2214,20 +2440,29 @@ end
 findfirst(testf::Function, A::Union{AbstractArray, AbstractString}) =
     findnext(testf, A, first(keys(A)))
 
-findfirst(p::Union{Fix2{typeof(isequal),Int},Fix2{typeof(==),Int}}, r::OneTo{Int}) =
-    1 <= p.x <= r.stop ? p.x : nothing
+findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::OneTo) where {T<:Integer} =
+    1 <= p.x <= r.stop ? convert(keytype(r), p.x) : nothing
 
-findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange) where {T<:Integer} =
-    first(r) <= p.x <= last(r) ? firstindex(r) + Int(p.x - first(r)) : nothing
+findfirst(::typeof(iszero), ::OneTo) = nothing
+findfirst(::typeof(isone), r::OneTo) = isempty(r) ? nothing : oneunit(keytype(r))
+
+function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange{<:Integer}) where {T<:Integer}
+    first(r) <= p.x <= last(r) || return nothing
+    i1 = first(keys(r))
+    return i1 + oftype(i1, p.x - first(r))
+end
 
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
     minimum(r) <= p.x <= maximum(r) || return nothing
-    d = convert(S, p.x - first(r))::S
+    d = p.x - first(r)
     iszero(d % step(r)) || return nothing
-    return d ÷ step(r) + 1
+    return convert(keytype(r), d ÷ step(r) + 1)
 end
 
+findfirst(::typeof(iszero), r::AbstractRange) = findfirst(==(zero(first(r))), r)
+findfirst(::typeof(isone), r::AbstractRange) = findfirst(==(one(first(r))), r)
+
 """
     findprev(A, i)
 
@@ -2309,7 +2544,9 @@ findlast(A::AbstractArray) = findprev(A, last(keys(A)))
     findprev(predicate::Function, A, i)
 
 Find the previous index before or including `i` of an element of `A`
-for which `predicate` returns `true`, or `nothing` if not found.
+for which `predicate` returns `true`, or `nothing` if not found. This works for
+Arrays, Strings, and most other collections that support [`getindex`](@ref),
+[`keys(A)`](@ref), and [`nextind`](@ref).
 
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
@@ -2335,6 +2572,9 @@ julia> A = [4 6; 1 2]
 
 julia> findprev(isodd, A, CartesianIndex(1, 2))
 CartesianIndex(2, 1)
+
+julia> findprev(isspace, "a b c", 3)
+2
 ```
 """
 function findprev(testf::Function, A, start)
@@ -2393,6 +2633,17 @@ end
 findlast(testf::Function, A::Union{AbstractArray, AbstractString}) =
     findprev(testf, A, last(keys(A)))
 
+# for monotonic ranges, there is a unique index corresponding to a value, so findfirst and findlast are identical
+function findlast(p::Union{Fix2{typeof(isequal),<:Integer},Fix2{typeof(==),<:Integer},typeof(iszero),typeof(isone)},
+        r::AbstractUnitRange{<:Integer})
+    findfirst(p, r)
+end
+
+function findlast(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T},typeof(iszero),typeof(isone)},
+        r::StepRange{T,S}) where {T,S}
+    findfirst(p, r)
+end
+
 """
     findall(f::Function, A)
 
@@ -2437,7 +2688,7 @@ Dict{Symbol, Int64} with 3 entries:
   :B => -1
   :C => 0
 
-julia> findall(x -> x >= 0, d)
+julia> findall(≥(0), d)
 2-element Vector{Symbol}:
  :A
  :C
@@ -2445,9 +2696,8 @@ julia> findall(x -> x >= 0, d)
 ```
 """
 function findall(testf::Function, A)
-    T = eltype(keys(A))
     gen = (first(p) for p in pairs(A) if testf(last(p)))
-    isconcretetype(T) ? collect(T, gen) : collect(gen)
+    @default_eltype(gen) === Union{} ? collect(@default_eltype(keys(A)), gen) : collect(gen)
 end
 
 # Broadcasting is much faster for small testf, and computing
@@ -2499,42 +2749,19 @@ function findall(A)
 end
 
 # Allocating result upfront is faster (possible only when collection can be iterated twice)
-function _findall(f::Function, A::AbstractArray{Bool})
-    n = count(f, A)
+function findall(A::AbstractArray{Bool})
+    n = count(A)
     I = Vector{eltype(keys(A))}(undef, n)
-    isempty(I) && return I
-    _findall(f, I, A)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractArray{Bool})
     cnt = 1
-    len = length(I)
-    for (k, v) in pairs(A)
-        @inbounds I[cnt] = k
-        cnt += f(v)
-        cnt > len && return I
-    end
-    # In case of impure f, this line could potentially be hit. In that case,
-    # we can't assume I is the correct length.
-    resize!(I, cnt - 1)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractVector{Bool})
-    i = firstindex(A)
-    cnt = 1
-    len = length(I)
-    while cnt ≤ len
-        @inbounds I[cnt] = i
-        cnt += f(@inbounds A[i])
-        i = nextind(A, i)
+    for (i,a) in pairs(A)
+        if a
+            I[cnt] = i
+            cnt += 1
+        end
     end
-    cnt - 1 == len ? I : resize!(I, cnt - 1)
+    I
 end
 
-findall(f::Function, A::AbstractArray{Bool}) = _findall(f, A)
-findall(f::Fix2{typeof(in)}, A::AbstractArray{Bool}) = _findall(f, A)
-findall(A::AbstractArray{Bool}) = _findall(identity, A)
-
 findall(x::Bool) = x ? [1] : Vector{Int}()
 findall(testf::Function, x::Number) = testf(x) ? [1] : Vector{Int}()
 findall(p::Fix2{typeof(in)}, x::Number) = x in p.x ? [1] : Vector{Int}()
@@ -2795,6 +3022,8 @@ Remove the items at all the indices which are not given by `inds`,
 and return the modified `a`.
 Items which are kept are shifted to fill the resulting gaps.
 
+$(_DOCS_ALIASING_WARNING)
+
 `inds` must be an iterator of sorted and unique integer indices.
 See also [`deleteat!`](@ref).
 
@@ -2895,3 +3124,61 @@ function intersect(v::AbstractVector, r::AbstractRange)
     return vectorfilter(T, _shrink_filter!(seen), common)
 end
 intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
+
+# Here instead of range.jl for bootstrapping because `@propagate_inbounds` depends on Vectors.
+@propagate_inbounds function getindex(v::AbstractRange, i::Integer)
+    if i isa Bool # Not via dispatch to avoid ambiguities
+        throw(ArgumentError("invalid index: $i of type Bool"))
+    else
+        _getindex(v, i)
+    end
+end
+
+"""
+    wrap(Array, m::Union{Memory{T}, MemoryRef{T}}, dims)
+
+Create an array of size `dims` using `m` as the underlying memory. This can be thought of as a safe version
+of [`unsafe_wrap`](@ref) utilizing `Memory` or `MemoryRef` instead of raw pointers.
+"""
+function wrap end
+
+# validity checking for _wrap calls, separate from allocation of Array so that it can be more likely to inline into the caller
+function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
+    mem = ref.mem
+    mem_len = length(mem) + 1 - memoryrefoffset(ref)
+    len = Core.checked_dims(dims...)
+    @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
+    return ref
+end
+
+@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(LazyString(
+    "Attempted to wrap a MemoryRef of length ", len, " with an Array of size dims=", dims,
+    " which is invalid because prod(dims) = ", proddims, " > ", len,
+    " so that the array would have more elements than the underlying memory can store.")))
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(memoryref(m), dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(memoryref(m), (l,))
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}) where {T}
+    ref = memoryref(m)
+    dims = (length(m),)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index a05a8d4dac51c..623111ef0883d 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -41,7 +41,7 @@ Accept keyword args `c` for alternate single character marker.
 """
 function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '⋅')
     N = textwidth(ANSIIterator(s))
-    return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
+    return N == 0 ? string(c) : join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
 end
 
 const undef_ref_alignment = (3,3)
@@ -364,13 +364,13 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
     if isempty(X) && (get(io, :compact, false)::Bool || X isa Vector)
         return show(io, X)
     end
-    # 0) show summary before setting :compact
+    # 1) show summary before setting :compact
     summary(io, X)
     isempty(X) && return
     print(io, ":")
     show_circular(io, X) && return
 
-    # 1) compute new IOContext
+    # 2) compute new IOContext
     if !haskey(io, :compact) && length(axes(X, 2)) > 1
         io = IOContext(io, :compact => true)
     end
@@ -385,7 +385,7 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
         println(io)
     end
 
-    # 2) update typeinfo
+    # 3) update typeinfo
     #
     # it must come after printing the summary, which can exploit :typeinfo itself
     # (e.g. views)
@@ -394,7 +394,7 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
     # checking for current :typeinfo (this could be changed in the future)
     io = IOContext(io, :typeinfo => eltype(X))
 
-    # 2) show actual content
+    # 4) show actual content
     recur_io = IOContext(io, :SHOWN_SET => X)
     print_array(recur_io, X)
 end
@@ -545,6 +545,12 @@ typeinfo_eltype(typeinfo::Type{<:AbstractArray{T}}) where {T} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractDict{K,V}}) where {K,V} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractSet{T}}) where {T} = eltype(typeinfo)
 
+# This is a fancy way to make de-specialize a call to `typeinfo_implicit(T)`
+# which is unfortunately invalidated by Dates
+#  (https://github.com/JuliaLang/julia/issues/56080)
+#
+# This makes the call less efficient, but avoids being invalidated by Dates.
+_typeinfo_implicit(@nospecialize(T)) = Base.invoke_in_world(Base.tls_world_age(), typeinfo_implicit, T)::Bool
 
 # types that can be parsed back accurately from their un-decorated representations
 function typeinfo_implicit(@nospecialize(T))
@@ -553,9 +559,9 @@ function typeinfo_implicit(@nospecialize(T))
         return true
     end
     return isconcretetype(T) &&
-        ((T <: Array && typeinfo_implicit(eltype(T))) ||
-         ((T <: Tuple || T <: Pair) && all(typeinfo_implicit, fieldtypes(T))) ||
-         (T <: AbstractDict && typeinfo_implicit(keytype(T)) && typeinfo_implicit(valtype(T))))
+        ((T <: Array && _typeinfo_implicit(eltype(T))) ||
+         ((T <: Tuple || T <: NamedTuple || T <: Pair) && all(_typeinfo_implicit, fieldtypes(T))) ||
+         (T <: AbstractDict && _typeinfo_implicit(keytype(T)) && _typeinfo_implicit(valtype(T))))
 end
 
 # X not constrained, can be any iterable (cf. show_vector)
@@ -573,7 +579,7 @@ function typeinfo_prefix(io::IO, X)
     if X isa AbstractDict
         if eltype_X == eltype_ctx
             sprint(show_type_name, typeof(X).name; context=io), false
-        elseif !isempty(X) && typeinfo_implicit(keytype(X)) && typeinfo_implicit(valtype(X))
+        elseif !isempty(X) && _typeinfo_implicit(keytype(X)) && _typeinfo_implicit(valtype(X))
             sprint(show_type_name, typeof(X).name; context=io), true
         else
             sprint(print, typeof(X); context=io), false
@@ -582,7 +588,7 @@ function typeinfo_prefix(io::IO, X)
         # Types hard-coded here are those which are created by default for a given syntax
         if eltype_X == eltype_ctx
             "", false
-        elseif !isempty(X) && typeinfo_implicit(eltype_X)
+        elseif !isempty(X) && _typeinfo_implicit(eltype_X)
             "", true
         elseif print_without_params(eltype_X)
             sprint(show_type_name, unwrap_unionall(eltype_X).name; context=io), false # Print "Array" rather than "Array{T,N}"
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index a26945bbb1105..8c708455976e2 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -118,14 +118,21 @@ end
 unsafe_convert(::Type{Ptr{Cvoid}}, t::Timer) = t.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, async::AsyncCondition) = async.handle
 
+# if this returns true, the object has been signaled
+# if this returns false, the object is closed
 function _trywait(t::Union{Timer, AsyncCondition})
     set = t.set
     if set
         # full barrier now for AsyncCondition
         t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
     else
-        t.isopen || return false
-        t.handle == C_NULL && return false
+        if !isopen(t)
+            set = t.set
+            if !set
+                close(t) # wait for the close to complete
+                return false
+            end
+        end
         iolock_begin()
         set = t.set
         if !set
@@ -133,7 +140,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
             lock(t.cond)
             try
                 set = t.set
-                if !set && t.isopen && t.handle != C_NULL
+                if !set && t.handle != C_NULL # wait for set or handle, but not the isopen flag
                     iolock_end()
                     set = wait(t.cond)
                     unlock(t.cond)
@@ -147,7 +154,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    @atomic :monotonic t.set = false
+    @atomic :monotonic t.set = false # if there are multiple waiters, an unspecified number may short-circuit past here
     return set
 end
 
@@ -157,13 +164,33 @@ function wait(t::Union{Timer, AsyncCondition})
 end
 
 
-isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
+isopen(t::Union{Timer, AsyncCondition}) = @atomic :acquire t.isopen
 
 function close(t::Union{Timer, AsyncCondition})
+    t.handle == C_NULL && !t.isopen && return # short-circuit path, :monotonic
     iolock_begin()
-    if isopen(t)
-        @atomic :monotonic t.isopen = false
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+    if t.handle != C_NULL
+        if t.isopen
+            @atomic :release t.isopen = false
+            ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+        end
+        # implement _trywait here without the auto-reset function, just waiting for the final close signal
+        preserve_handle(t)
+        lock(t.cond)
+        try
+            while t.handle != C_NULL
+                iolock_end()
+                wait(t.cond)
+                unlock(t.cond)
+                iolock_begin()
+                lock(t.cond)
+            end
+        finally
+            unlock(t.cond)
+            unpreserve_handle(t)
+        end
+    elseif t.isopen
+        @atomic :release t.isopen = false
     end
     iolock_end()
     nothing
@@ -176,8 +203,8 @@ function uvfinalize(t::Union{Timer, AsyncCondition})
         if t.handle != C_NULL
             disassociate_julia_struct(t.handle) # not going to call the usual close hooks anymore
             if t.isopen
-                @atomic :monotonic t.isopen = false
-                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) # this will call Libc.free
             end
             @atomic :monotonic t.handle = C_NULL
             notify(t.cond, false)
@@ -192,8 +219,10 @@ end
 function _uv_hook_close(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
-        @atomic :monotonic t.isopen = false
-        Libc.free(@atomicswap :monotonic t.handle = C_NULL)
+        handle = t.handle
+        @atomic :release t.isopen = false
+        @atomic :monotonic t.handle = C_NULL
+        Libc.free(handle)
         notify(t.cond, false)
     finally
         unlock(t.cond)
@@ -220,7 +249,10 @@ function uv_timercb(handle::Ptr{Cvoid})
         @atomic :monotonic t.set = true
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
-            close(t)
+            if t.isopen
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+            end
         end
         notify(t.cond, true)
     finally
@@ -243,7 +275,7 @@ end
 
 # timer with repeated callback
 """
-    Timer(callback::Function, delay; interval = 0)
+    Timer(callback::Function, delay; interval = 0, spawn::Union{Nothing,Bool}=nothing)
 
 Create a timer that runs the function `callback` at each timer expiration.
 
@@ -253,6 +285,13 @@ callback is only run once. The function `callback` is called with a single argum
 itself. Stop a timer by calling `close`. The `callback` may still be run one final time, if the timer
 has already expired.
 
+If `spawn` is `true`, the created task will be spawned, meaning that it will be allowed
+to move thread, which avoids the side-effect of forcing the parent task to get stuck to the thread
+it is on. If `spawn` is `nothing` (default), the task will be spawned if the parent task isn't sticky.
+
+!!! compat "Julia 1.12"
+    The `spawn` argument was introduced in Julia 1.12.
+
 # Examples
 
 Here the first number is printed after a delay of two seconds, then the following numbers are
@@ -272,8 +311,9 @@ julia> begin
 3
 ```
 """
-function Timer(cb::Function, timeout::Real; interval::Real=0.0)
-    timer = Timer(timeout, interval=interval)
+function Timer(cb::Function, timeout; spawn::Union{Nothing,Bool}=nothing, kwargs...)
+    sticky = spawn === nothing ? current_task().sticky : !spawn
+    timer = Timer(timeout; kwargs...)
     t = @task begin
         unpreserve_handle(timer)
         while _trywait(timer)
@@ -287,6 +327,7 @@ function Timer(cb::Function, timeout::Real; interval::Real=0.0)
             isopen(timer) || return
         end
     end
+    t.sticky = sticky
     # here we are mimicking parts of _trywait, in coordination with task `t`
     preserve_handle(timer)
     @lock timer.cond begin
@@ -302,11 +343,24 @@ end
 """
     timedwait(testcb, timeout::Real; pollint::Real=0.1)
 
-Waits until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
+Wait until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
 The test function is polled every `pollint` seconds. The minimum value for `pollint` is 0.001 seconds,
 that is, 1 millisecond.
 
 Return `:ok` or `:timed_out`.
+
+# Examples
+```jldoctest
+julia> cb() = (sleep(5); return);
+
+julia> t = @async cb();
+
+julia> timedwait(()->istaskdone(t), 1)
+:timed_out
+
+julia> timedwait(()->istaskdone(t), 6.5)
+:ok
+```
 """
 function timedwait(testcb, timeout::Real; pollint::Real=0.1)
     pollint >= 1e-3 || throw(ArgumentError("pollint must be ≥ 1 millisecond"))
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index be16ba1b27610..1914ddc645f31 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -9,6 +9,8 @@ Uses multiple concurrent tasks to map `f` over a collection (or multiple
 equal length collections). For multiple collection arguments, `f` is
 applied elementwise.
 
+The output is guaranteed to be the same order as the elements of the collection(s) `c`.
+
 `ntasks` specifies the number of tasks to run concurrently.
 Depending on the length of the collections, if `ntasks` is unspecified,
 up to 100 tasks will be used for concurrent mapping.
@@ -26,11 +28,11 @@ The following examples highlight execution in different tasks by returning
 the `objectid` of the tasks in which the mapping function is executed.
 
 First, with `ntasks` undefined, each element is processed in a different task.
-```
+```julia-repl
 julia> tskoid() = objectid(current_task());
 
 julia> asyncmap(x->tskoid(), 1:5)
-5-element Array{UInt64,1}:
+5-element Vector{UInt64}:
  0x6e15e66c75c75853
  0x440f8819a1baa682
  0x9fb3eeadd0c83985
@@ -42,9 +44,9 @@ julia> length(unique(asyncmap(x->tskoid(), 1:5)))
 ```
 
 With `ntasks=2` all elements are processed in 2 tasks.
-```
+```julia-repl
 julia> asyncmap(x->tskoid(), 1:5; ntasks=2)
-5-element Array{UInt64,1}:
+5-element Vector{UInt64}:
  0x027ab1680df7ae94
  0xa23d2f80cd7cf157
  0x027ab1680df7ae94
@@ -58,12 +60,12 @@ julia> length(unique(asyncmap(x->tskoid(), 1:5; ntasks=2)))
 With `batch_size` defined, the mapping function needs to be changed to accept an array
 of argument tuples and return an array of results. `map` is used in the modified mapping
 function to achieve this.
-```
+```julia-repl
 julia> batch_func(input) = map(x->string("args_tuple: ", x, ", element_val: ", x[1], ", task: ", tskoid()), input)
 batch_func (generic function with 1 method)
 
 julia> asyncmap(batch_func, 1:5; ntasks=2, batch_size=2)
-5-element Array{String,1}:
+5-element Vector{String}:
  "args_tuple: (1,), element_val: 1, task: 9118321258196414413"
  "args_tuple: (2,), element_val: 2, task: 4904288162898683522"
  "args_tuple: (3,), element_val: 3, task: 9118321258196414413"
@@ -394,6 +396,8 @@ length(itr::AsyncGenerator) = length(itr.collector.enumerator)
 
 Like [`asyncmap`](@ref), but stores output in `results` rather than
 returning a collection.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function asyncmap!(f, r, c1, c...; ntasks=0, batch_size=nothing)
     foreach(identity, AsyncCollector(f, r, c1, c...; ntasks=ntasks, batch_size=batch_size))
diff --git a/base/atomics.jl b/base/atomics.jl
index 7312206c19896..e6f3a5654cbf7 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -80,6 +80,13 @@ end
 
 Atomic() = Atomic{Int}()
 
+const LOCK_PROFILING = Atomic{Int}(0)
+lock_profiling(state::Bool) = state ? atomic_add!(LOCK_PROFILING, 1) : atomic_sub!(LOCK_PROFILING, 1)
+lock_profiling() = LOCK_PROFILING[] > 0
+
+const LOCK_CONFLICT_COUNT = Atomic{Int}(0);
+inc_lock_conflict_count() = atomic_add!(LOCK_CONFLICT_COUNT, 1)
+
 """
     Threads.atomic_cas!(x::Atomic{T}, cmp::T, newval::T) where T
 
@@ -357,13 +364,13 @@ for typ in atomictypes
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
         GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                 %ptr = bitcast i8* %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
         GC.@preserve x llvmcall($"""
-                 %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                 %ptr = bitcast i8* %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
                  """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
@@ -372,7 +379,7 @@ for typ in atomictypes
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
             GC.@preserve x llvmcall($"""
-                     %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                     %ptr = bitcast i8* %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
                      ret $lt %rv
@@ -381,7 +388,7 @@ for typ in atomictypes
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
             GC.@preserve x llvmcall($"""
-                     %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
+                     %iptr = bitcast i8* %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
                      %irs = cmpxchg $ilt* %iptr, $ilt %icmp, $ilt %inew acq_rel acquire
@@ -404,7 +411,7 @@ for typ in atomictypes
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
                 GC.@preserve x llvmcall($"""
-                         %ptr = inttoptr i$WORD_SIZE %0 to $lt*
+                         %ptr = bitcast i8* %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
                          """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
@@ -412,7 +419,7 @@ for typ in atomictypes
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
                 GC.@preserve x llvmcall($"""
-                         %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
+                         %iptr = bitcast i8* %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
                          %rv = bitcast $ilt %irv to $lt
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index f96887d554af0..598b618f0d1ed 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -170,20 +170,18 @@ end
 
 
 # Allow us to easily serialize Platform objects
-function Base.repr(p::Platform; context=nothing)
-    str = string(
-        "Platform(",
-        repr(arch(p)),
-        ", ",
-        repr(os(p)),
-        "; ",
-        join(("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", "),
-        ")",
-    )
+function Base.show(io::IO, p::Platform)
+    print(io, "Platform(")
+    show(io, arch(p))
+    print(io, ", ")
+    show(io, os(p))
+    print(io, "; ")
+    join(io, ("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", ")
+    print(io, ")")
 end
 
 # Make showing the platform a bit more palatable
-function Base.show(io::IO, p::Platform)
+function Base.show(io::IO, ::MIME"text/plain", p::Platform)
     str = string(platform_name(p), " ", arch(p))
     # Add on all the other tags not covered by os/arch:
     other_tags = sort!(filter!(kv -> kv[1] ∉ ("os", "arch"), collect(tags(p))))
@@ -196,11 +194,11 @@ end
 function validate_tags(tags::Dict)
     throw_invalid_key(k) = throw(ArgumentError("Key \"$(k)\" cannot have value \"$(tags[k])\""))
     # Validate `arch`
-    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le")
+    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le", "riscv64")
         throw_invalid_key("arch")
     end
     # Validate `os`
-    if tags["os"] ∉ ("linux", "macos", "freebsd", "windows")
+    if tags["os"] ∉ ("linux", "macos", "freebsd", "openbsd", "windows")
         throw_invalid_key("os")
     end
     # Validate `os`/`arch` combination
@@ -308,7 +306,7 @@ function compare_version_cap(a::String, b::String, a_requested::Bool, b_requeste
         return a == b
     end
 
-    # Otherwise, do the comparison between the the single version cap and the single version:
+    # Otherwise, do the comparison between the single version cap and the single version:
     if a_requested
         return b <= a
     else
@@ -377,8 +375,10 @@ function os()
         return "windows"
     elseif Sys.isapple()
         return "macos"
-    elseif Sys.isbsd()
+    elseif Sys.isfreebsd()
         return "freebsd"
+    elseif Sys.isopenbsd()
+        return "openbsd"
     else
         return "linux"
     end
@@ -424,6 +424,7 @@ const platform_names = Dict(
     "macos" => "macOS",
     "windows" => "Windows",
     "freebsd" => "FreeBSD",
+    "openbsd" => "OpenBSD",
     nothing => "Unknown",
 )
 
@@ -494,7 +495,7 @@ julia> wordsize(Platform("x86_64", "macos"))
 wordsize(p::AbstractPlatform) = (arch(p) ∈ ("i686", "armv6l", "armv7l")) ? 32 : 64
 
 """
-    triplet(p::AbstractPlatform; exclude_tags::Vector{String})
+    triplet(p::AbstractPlatform)
 
 Get the target triplet for the given `Platform` object as a `String`.
 
@@ -558,6 +559,8 @@ function os_str(p::AbstractPlatform)
         else
             return "-unknown-freebsd"
         end
+    elseif os(p) == "openbsd"
+        return "-unknown-openbsd"
     else
         return "-unknown"
     end
@@ -583,7 +586,8 @@ Sys.isapple(p::AbstractPlatform) = os(p) == "macos"
 Sys.islinux(p::AbstractPlatform) = os(p) == "linux"
 Sys.iswindows(p::AbstractPlatform) = os(p) == "windows"
 Sys.isfreebsd(p::AbstractPlatform) = os(p) == "freebsd"
-Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "macos")
+Sys.isopenbsd(p::AbstractPlatform) = os(p) == "openbsd"
+Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "openbsd", "macos")
 Sys.isunix(p::AbstractPlatform) = Sys.isbsd(p) || Sys.islinux(p)
 
 const arch_mapping = Dict(
@@ -593,6 +597,7 @@ const arch_mapping = Dict(
     "armv7l" => "arm(v7l)?", # if we just see `arm-linux-gnueabihf`, we assume it's `armv7l`
     "armv6l" => "armv6l",
     "powerpc64le" => "p(ower)?pc64le",
+    "riscv64" => "(rv64|riscv64)",
 )
 # Keep this in sync with `CPUID.ISAs_by_family`
 # These are the CPUID side of the microarchitectures targeted by GCC flags in BinaryBuilder.jl
@@ -628,12 +633,16 @@ const arch_march_isa_mapping = let
         ],
         "powerpc64le" => [
             "power8" => get_set("powerpc64le", "power8"),
-        ]
+        ],
+        "riscv64" => [
+            "riscv64" => get_set("riscv64", "riscv64"),
+        ],
     )
 end
 const os_mapping = Dict(
     "macos" => "-apple-darwin[\\d\\.]*",
     "freebsd" => "-(.*-)?freebsd[\\d\\.]*",
+    "openbsd" => "-(.*-)?openbsd[\\d\\.]*",
     "windows" => "-w64-mingw32",
     "linux" => "-(.*-)?linux",
 )
@@ -663,18 +672,12 @@ const libstdcxx_version_mapping = Dict{String,String}(
     "libstdcxx" => "-libstdcxx\\d+",
 )
 
-"""
-    parse(::Type{Platform}, triplet::AbstractString)
-
-Parses a string platform triplet back into a `Platform` object.
-"""
-function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = false)
+const triplet_regex = let
     # Helper function to collapse dictionary of mappings down into a regex of
     # named capture groups joined by "|" operators
     c(mapping) = string("(",join(["(?<$k>$v)" for (k, v) in mapping], "|"), ")")
 
-    # We're going to build a mondo regex here to parse everything:
-    triplet_regex = Regex(string(
+    Regex(string(
         "^",
         # First, the core triplet; arch/os/libc/call_abi
         c(arch_mapping),
@@ -689,7 +692,14 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         "(?<tags>(?:-[^-]+\\+[^-]+)*)?",
         "\$",
     ))
+end
+
+"""
+    parse(::Type{Platform}, triplet::AbstractString)
 
+Parses a string platform triplet back into a `Platform` object.
+"""
+function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = false)
     m = match(triplet_regex, triplet)
     if m !== nothing
         # Helper function to find the single named field within the giant regex
@@ -746,6 +756,9 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         if os == "freebsd"
             os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
+        if os == "openbsd"
+            os_version = extract_os_version("openbsd", r".*openbsd([\d.]+)"sa)
+        end
         tags["os_version"] = os_version
 
         return Platform(arch, os, tags; validate_strict)
@@ -803,7 +816,7 @@ function parse_dl_name_version(path::String, os::String)
         # On OSX, libraries look like `libnettle.6.3.dylib`
         dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
-        # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
+        # On Linux and others BSD, libraries look like `libnettle.so.6.3.0`
         dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
diff --git a/base/bitarray.jl b/base/bitarray.jl
index f29b30d0ac8c0..93fa48c56e379 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -404,6 +404,7 @@ falses(dims::DimOrInd...) = falses(dims)
 falses(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = falses(map(to_dim, dims))
 falses(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), false)
 falses(dims::Tuple{}) = fill!(BitArray(undef, dims), false)
+falses(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), false)
 
 """
     trues(dims)
@@ -422,6 +423,7 @@ trues(dims::DimOrInd...) = trues(dims)
 trues(dims::NTuple{N, Union{Integer, OneTo}}) where {N} = trues(map(to_dim, dims))
 trues(dims::NTuple{N, Integer}) where {N} = fill!(BitArray(undef, dims), true)
 trues(dims::Tuple{}) = fill!(BitArray(undef, dims), true)
+trues(dims::NTuple{N, DimOrInd}) where {N} = fill!(similar(BitArray, dims), true)
 
 function one(x::BitMatrix)
     m, n = size(x)
@@ -462,7 +464,7 @@ copyto!(dest::BitArray, doffs::Integer, src::Union{BitArray,Array}, soffs::Integ
     _copyto_int!(dest, Int(doffs), src, Int(soffs), Int(n))
 function _copyto_int!(dest::BitArray, doffs::Int, src::Union{BitArray,Array}, soffs::Int, n::Int)
     n == 0 && return dest
-    n < 0 && throw(ArgumentError("Number of elements to copy must be nonnegative."))
+    n < 0 && throw(ArgumentError("Number of elements to copy must be non-negative."))
     soffs < 1 && throw(BoundsError(src, soffs))
     doffs < 1 && throw(BoundsError(dest, doffs))
     soffs+n-1 > length(src) && throw(BoundsError(src, length(src)+1))
@@ -482,7 +484,7 @@ end
 reshape(B::BitArray, dims::Tuple{Vararg{Int}}) = _bitreshape(B, dims)
 function _bitreshape(B::BitArray, dims::NTuple{N,Int}) where N
     prod(dims) == length(B) ||
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(B))"))
+        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $(length(B))"))
     Br = BitArray{N}(undef, ntuple(i->0,Val(N))...)
     Br.chunks = B.chunks
     Br.len = prod(dims)
@@ -541,10 +543,8 @@ end
 reinterpret(::Type{Bool}, B::BitArray, dims::NTuple{N,Int}) where {N} = reinterpret(B, dims)
 reinterpret(B::BitArray, dims::NTuple{N,Int}) where {N} = reshape(B, dims)
 
-if nameof(@__MODULE__) === :Base  # avoid method overwrite
 (::Type{T})(x::T) where {T<:BitArray} = copy(x)::T
 BitArray(x::BitArray) = copy(x)
-end
 
 """
     BitArray(itr)
@@ -807,7 +807,7 @@ prepend!(B::BitVector, items) = prepend!(B, BitArray(items))
 prepend!(A::Vector{Bool}, items::BitVector) = prepend!(A, Array(items))
 
 function sizehint!(B::BitVector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), B.chunks, num_bit_chunks(sz))
+    sizehint!(B.chunks, num_bit_chunks(sz))
     return B
 end
 
@@ -1791,9 +1791,10 @@ function bit_map!(f::F, dest::BitArray, A::BitArray) where F
     dest_last = destc[len_Ac]
     _msk = _msk_end(A)
     # first zero out the bits mask is going to change
-    destc[len_Ac] = (dest_last & (~_msk))
     # then update bits by `or`ing with a masked RHS
-    destc[len_Ac] |= f(Ac[len_Ac]) & _msk
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac aliases destc
+    destc[len_Ac] = (dest_last & (~_msk)) | f(Ac[len_Ac]) & _msk
     dest
 end
 function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
@@ -1812,9 +1813,10 @@ function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
     dest_last = destc[len_Ac]
     _msk = _msk_end(min_bitlen)
     # first zero out the bits mask is going to change
-    destc[len_Ac] = (dest_last & ~(_msk))
     # then update bits by `or`ing with a masked RHS
-    destc[len_Ac] |= f(Ac[end], Bc[end]) & _msk
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac or Bc aliases destc
+    destc[len_Ac] = (dest_last & ~(_msk)) | f(Ac[end], Bc[end]) & _msk
     dest
 end
 
diff --git a/base/bitset.jl b/base/bitset.jl
index 240be822fa263..78d8fc8769de1 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -15,7 +15,11 @@ mutable struct BitSet <: AbstractSet{Int}
     # 1st stored Int equals 64*offset
     offset::Int
 
-    BitSet() = new(resize!(Vector{UInt64}(undef, 4), 0), NO_OFFSET)
+    function BitSet()
+        a = Vector{UInt64}(undef, 4) # start with some initial space for holding 0:255 without additional allocations later
+        setfield!(a, :size, (0,)) # aka `empty!(a)` inlined
+        return new(a, NO_OFFSET)
+   end
 end
 
 """
@@ -51,7 +55,10 @@ function copy!(dest::BitSet, src::BitSet)
     dest
 end
 
-sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
+function sizehint!(s::BitSet, n::Integer; first::Bool=false, shrink::Bool=true)
+    sizehint!(s.bits, (n+63) >> 6; first, shrink)
+    s
+end
 
 function _bits_getindex(b::Bits, n::Int, offset::Int)
     ci = _div64(n) - offset + 1
diff --git a/base/bool.jl b/base/bool.jl
index d7dcf76caa91b..3a5c36b09ae2c 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -184,3 +184,5 @@ end
 div(x::Bool, y::Bool) = y ? x : throw(DivideError())
 rem(x::Bool, y::Bool) = y ? false : throw(DivideError())
 mod(x::Bool, y::Bool) = rem(x,y)
+
+Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
diff --git a/base/boot.jl b/base/boot.jl
index 78b7daaf47d64..53e439d83ebe2 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -52,8 +52,26 @@
 #abstract type AbstractArray{T,N} end
 #abstract type DenseArray{T,N} <: AbstractArray{T,N} end
 
+#primitive type AddrSpace{Backend::Module} 8 end
+#const CPU = bitcast(AddrSpace{Core}, 0x00)
+
+#struct GenericMemory{kind::Symbol, T, AS::AddrSpace}
+#   length::Int
+#   const data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#   Union{ # hidden data
+#       elements :: NTuple{length, T}
+#       owner :: Any
+#   }
+#end
+
+#struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
+#    mem::GenericMemory{kind, T, AS}
+#    data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#end
+
 #mutable struct Array{T,N} <: DenseArray{T,N}
-## opaque
+#  ref::MemoryRef{T}
+#  size::NTuple{N,Int}
 #end
 
 #mutable struct Module
@@ -107,12 +125,13 @@
 #    file::Union{Symbol,Nothing}
 #end
 
-#struct LineInfoNode
-#    module::Module
-#    method::Any (Union{Symbol, Method, MethodInstance})
-#    file::Symbol
-#    line::Int32
-#    inlined_at::Int32
+#struct LegacyLineInfoNode end # only used internally during lowering
+
+#struct DebugInfo
+#    def::Any # (Union{Symbol, Method, MethodInstance})
+#    linetable::Any # (Union{Nothing,DebugInfo})
+#    edges::SimpleVector # Vector{DebugInfo}
+#    codelocs::String # compressed Vector{UInt8}
 #end
 
 #struct GotoNode
@@ -156,15 +175,33 @@
 #end
 
 #mutable struct Task
-#    parent::Task
+#    next::Any
+#    queue::Any
 #    storage::Any
-#    state::Symbol
 #    donenotify::Any
 #    result::Any
-#    exception::Any
-#    backtrace::Any
-#    logstate::Any
+#    scope::Any
 #    code::Any
+#    @atomic _state::UInt8
+#    sticky::UInt8
+#    priority::UInt16
+#    @atomic _isexception::UInt8
+#    pad00::UInt8
+#    pad01::UInt8
+#    pad02::UInt8
+#    rngState0::UInt64
+#    rngState1::UInt64
+#    rngState2::UInt64
+#    rngState3::UInt64
+#    rngState4::UInt64
+#    const metrics_enabled::Bool
+#    pad10::UInt8
+#    pad11::UInt8
+#    pad12::UInt8
+#    @atomic first_enqueued_at::UInt64
+#    @atomic last_started_running_at::UInt64
+#    @atomic running_time_ns::UInt64
+#    @atomic finished_at::UInt64
 #end
 
 export
@@ -173,8 +210,8 @@ export
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
     AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
-    Function, Method,
-    Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
+    Function, Method, Module, Symbol, Task, UndefInitializer, undef, WeakRef, VecElement,
+    Array, Memory, MemoryRef, AtomicMemory, AtomicMemoryRef, GenericMemory, GenericMemoryRef,
     # numeric types
     Number, Real, Integer, Bool, Ref, Ptr,
     AbstractFloat, Float16, Float32, Float64,
@@ -187,14 +224,14 @@ export
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
     OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
-    UndefKeywordError, ConcurrencyViolationError,
+    UndefKeywordError, ConcurrencyViolationError, FieldError,
     # AST representation
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
-    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!,
+    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!, setfieldonce!,
     nfields, throw, tuple, ===, isdefined, eval,
     # access to globals
-    getglobal, setglobal!,
+    getglobal, setglobal!, swapglobal!, modifyglobal!, replaceglobal!, setglobalonce!, isdefinedglobal,
     # ifelse, sizeof    # not exported, to avoid conflicting with Base
     # type reflection
     <:, typeof, isa, typeassert,
@@ -217,6 +254,8 @@ primitive type Float16 <: AbstractFloat 16 end
 primitive type Float32 <: AbstractFloat 32 end
 primitive type Float64 <: AbstractFloat 64 end
 
+primitive type BFloat16 <: AbstractFloat 16 end
+
 #primitive type Bool <: Integer 8 end
 abstract type AbstractChar end
 primitive type Char <: AbstractChar 32 end
@@ -238,22 +277,66 @@ else
     const UInt = UInt32
 end
 
-function iterate end
 function Typeof end
 ccall(:jl_toplevel_eval_in, Any, (Any, Any),
       Core, quote
       (f::typeof(Typeof))(x) = ($(_expr(:meta,:nospecialize,:x)); isa(x,Type) ? Type{x} : typeof(x))
       end)
 
+function iterate end
+
 macro nospecialize(x)
     _expr(:meta, :nospecialize, x)
 end
+Expr(@nospecialize args...) = _expr(args...)
 
-TypeVar(n::Symbol) = _typevar(n, Union{}, Any)
-TypeVar(n::Symbol, @nospecialize(ub)) = _typevar(n, Union{}, ub)
-TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
+macro latestworld() Expr(:latestworld) end
 
-UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
+_is_internal(__module__) = __module__ === Core
+# can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
+macro _total_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#true,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+# can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
+macro _foldable_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
+macro _boundscheck() Expr(:boundscheck) end
+
+# n.b. the effects and model of these is refined in inference abstractinterpretation.jl
+TypeVar(@nospecialize(n)) = _typevar(n::Symbol, Union{}, Any)
+TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
+TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
+UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)
+
+const Memory{T} = GenericMemory{:not_atomic, T, CPU}
+const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
 
 # simple convert for use by constructors of types in Core
 # note that there is no actual conversion defined here,
@@ -263,6 +346,11 @@ convert(::Type{T}, x::T) where {T} = x
 cconvert(::Type{T}, x) where {T} = convert(T, x)
 unsafe_convert(::Type{T}, x::T) where {T} = x
 
+# will be inserted by the frontend for closures
+_typeof_captured_variable(@nospecialize t) = (@_total_meta; t isa Type && has_free_typevars(t) ? typeof(t) : Typeof(t))
+
+has_free_typevars(@nospecialize t) = (@_total_meta; ccall(:jl_has_free_typevars, Int32, (Any,), t) === Int32(1))
+
 # dispatch token indicating a kwarg (keyword sorter) call
 function kwcall end
 # deprecated internal functions:
@@ -275,16 +363,11 @@ kwftype(@nospecialize(t)) = typeof(kwcall)
 Union{}(a...) = throw(ArgumentError("cannot construct a value of type Union{} for return result"))
 kwcall(kwargs, ::Type{Union{}}, a...) = Union{}(a...)
 
-Expr(@nospecialize args...) = _expr(args...)
-
 abstract type Exception end
 struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
 struct BoundsError <: Exception
     a::Any
     i::Any
@@ -300,10 +383,16 @@ struct StackOverflowError  <: Exception end
 struct UndefRefError       <: Exception end
 struct UndefVarError <: Exception
     var::Symbol
+    scope # a Module or Symbol or other object describing the context where this variable was looked for (e.g. Main or :local or :static_parameter)
+    UndefVarError(var::Symbol) = new(var)
+    UndefVarError(var::Symbol, @nospecialize scope) = new(var, scope)
 end
 struct ConcurrencyViolationError <: Exception
     msg::AbstractString
 end
+struct MissingCodeError <: Exception
+    mi::MethodInstance
+end
 struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
@@ -328,9 +417,8 @@ TypeError(where, @nospecialize(expected::Type), @nospecialize(got)) =
     TypeError(Symbol(where), "", expected, got)
 struct InexactError <: Exception
     func::Symbol
-    T  # Type
-    val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
+    args
+    InexactError(f::Symbol, @nospecialize(args...)) = (@noinline; new(f, args))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -343,13 +431,15 @@ struct UndefKeywordError <: Exception
     var::Symbol
 end
 
+const typemax_UInt = Intrinsics.sext_int(UInt, 0xFF)
+const typemax_Int = Core.Intrinsics.udiv_int(Core.Intrinsics.sext_int(Int, 0xFF), 2)
+
 struct MethodError <: Exception
     f
     args
     world::UInt
     MethodError(@nospecialize(f), @nospecialize(args), world::UInt) = new(f, args, world)
 end
-const typemax_UInt = ccall(:jl_typemax_uint, Any, (Any,), UInt)
 MethodError(@nospecialize(f), @nospecialize(args)) = MethodError(f, args, typemax_UInt)
 
 struct AssertionError <: Exception
@@ -357,6 +447,11 @@ struct AssertionError <: Exception
 end
 AssertionError() = AssertionError("")
 
+struct FieldError <: Exception
+    type::DataType
+    field::Symbol
+end
+
 abstract type WrappedException <: Exception end
 
 struct LoadError <: WrappedException
@@ -370,6 +465,14 @@ struct InitError <: WrappedException
     error
 end
 
+struct ABIOverride
+    abi::Type
+    def::MethodInstance
+    ABIOverride(@nospecialize(abi::Type), def::MethodInstance) = new(abi, def)
+end
+
+struct PrecompilableError <: Exception end
+
 String(s::String) = s  # no constructor yet
 
 const Cvoid = Nothing
@@ -378,9 +481,13 @@ Nothing() = nothing
 # This should always be inlined
 getptls() = ccall(:jl_get_ptls_states, Ptr{Cvoid}, ())
 
-include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname)
+include(m::Module, fname::String) = (@noinline; ccall(:jl_load_, Any, (Any, Any), m, fname))
+eval(m::Module, @nospecialize(e)) = (@noinline; ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e))
 
-eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)
+struct EvalInto <: Function
+    m::Module
+end
+(this::EvalInto)(@nospecialize(e)) = eval(this.m, e)
 
 mutable struct Box
     contents::Any
@@ -414,13 +521,17 @@ eval(Core, quote
     ReturnNode(@nospecialize val) = $(Expr(:new, :ReturnNode, :val))
     ReturnNode() = $(Expr(:new, :ReturnNode)) # unassigned val indicates unreachable
     GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))
+    EnterNode(dest::Int) = $(Expr(:new, :EnterNode, :dest))
+    EnterNode(dest::Int, @nospecialize(scope)) = $(Expr(:new, :EnterNode, :dest, :scope))
     LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))
     function LineNumberNode(l::Int, @nospecialize(f))
         isa(f, String) && (f = Symbol(f))
         return $(Expr(:new, :LineNumberNode, :l, :f))
     end
-    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
-        $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}, linetable::Union{Nothing,DebugInfo}, edges::SimpleVector, codelocs::String) =
+        $(Expr(:new, :DebugInfo, :def, :linetable, :edges, :codelocs))
+    DebugInfo(def::Union{Method,MethodInstance,Symbol}) =
+        $(Expr(:new, :DebugInfo, :def, nothing, Core.svec(), ""))
     SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
     PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
     PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
@@ -428,23 +539,33 @@ eval(Core, quote
     UpsilonNode(@nospecialize(val)) = $(Expr(:new, :UpsilonNode, :val))
     UpsilonNode() = $(Expr(:new, :UpsilonNode))
     Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))
-    # NOTE the main constructor is defined within `Core.Compiler`
     _PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
     PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :thentype, :elsetype))
     MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
 end)
 
+const NullDebugInfo = DebugInfo(:none)
+
+struct LineInfoNode # legacy support for aiding Serializer.deserialize of old IR
+    mod::Module
+    method
+    file::Symbol
+    line::Int32
+    inlined_at::Int32
+    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) = new(mod, method, file, line, inlined_at)
+end
+
+
 function CodeInstance(
-    mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
+    mi::Union{MethodInstance, ABIOverride}, owner, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
     @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
-    ipo_effects::UInt32, effects::UInt32, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
-    relocatability::UInt8)
+    effects::UInt32, @nospecialize(analysis_results),
+    di::Union{DebugInfo,Nothing}, edges::SimpleVector)
     return ccall(:jl_new_codeinst, Ref{CodeInstance},
-        (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
-        mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
-        ipo_effects, effects, argescapes,
-        relocatability)
+        (Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
+        mi, owner, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
+        effects, analysis_results, di, edges)
 end
 GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
 Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
@@ -453,52 +574,103 @@ function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
 end
 
-_is_internal(__module__) = __module__ === Core
-# can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
-macro _foldable_meta()
-    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
-        #=:consistent=#true,
-        #=:effect_free=#true,
-        #=:nothrow=#false,
-        #=:terminates_globally=#true,
-        #=:terminates_locally=#false,
-        #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
-end
-
 const NTuple{N,T} = Tuple{Vararg{T,N}}
 
 ## primitive Array constructors
 struct UndefInitializer end
 const undef = UndefInitializer()
+
+# type and dimensionality specified
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = memorynew(self, m)
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
+# empty vector constructor
+(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
+
+memoryref(mem::GenericMemory) = memoryrefnew(mem)
+memoryref(mem::GenericMemory, i::Integer) = memoryrefnew(memoryrefnew(mem), Int(i), @_boundscheck)
+memoryref(ref::GenericMemoryRef, i::Integer) = memoryrefnew(ref, Int(i), @_boundscheck)
+GenericMemoryRef(mem::GenericMemory) = memoryref(mem)
+GenericMemoryRef(mem::GenericMemory, i::Integer) = memoryref(mem, i)
+GenericMemoryRef(mem::GenericMemoryRef, i::Integer) = memoryref(mem, i)
+
+const AtomicMemory{T} = GenericMemory{:atomic, T, CPU}
+const AtomicMemoryRef{T} = GenericMemoryRef{:atomic, T, CPU}
+
+# construction helpers for Array
+new_as_memoryref(self::Type{GenericMemoryRef{kind,T,addrspace}}, m::Int) where {T,kind,addrspace} = memoryref(fieldtype(self, :mem)(undef, m))
+
+# checked-multiply intrinsic function for dimensions
+_checked_mul_dims() = 1, false
+_checked_mul_dims(m::Int) = m, Intrinsics.ule_int(typemax_Int, m) # equivalently: (m + 1) < 1
+function _checked_mul_dims(m::Int, n::Int)
+    b = Intrinsics.checked_smul_int(m, n)
+    a = getfield(b, 1)
+    ovflw = getfield(b, 2)
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, m))
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, n))
+    return a, ovflw
+end
+function _checked_mul_dims(m::Int, d::Int...)
+    @_foldable_meta # the compiler needs to know this loop terminates
+    a = m
+    i = 1
+    ovflw = false
+    neg = Intrinsics.ule_int(typemax_Int, m)
+    zero = false # if m==0 we won't have overflow since we go left to right
+    while Intrinsics.sle_int(i, nfields(d))
+        di = getfield(d, i)
+        b = Intrinsics.checked_smul_int(a, di)
+        zero = Intrinsics.or_int(zero, di === 0)
+        ovflw = Intrinsics.or_int(ovflw, getfield(b, 2))
+        neg = Intrinsics.or_int(neg, Intrinsics.ule_int(typemax_Int, di))
+        a = getfield(b, 1)
+        i = Intrinsics.add_int(i, 1)
+   end
+   return a, Intrinsics.or_int(neg, Intrinsics.and_int(ovflw, Intrinsics.not_int(zero)))
+end
+
+# convert a set of dims to a length, with overflow checking
+checked_dims() = 1
+checked_dims(m::Int) = m # defer this check to Memory constructor instead
+function checked_dims(d::Int...)
+    b = _checked_mul_dims(d...)
+    getfield(b, 2) && throw(ArgumentError("invalid Array dimensions"))
+    return getfield(b, 1)
+end
+
 # type and dimensionality specified, accepting dims as series of Ints
-Array{T,1}(::UndefInitializer, m::Int) where {T} =
-    ccall(:jl_alloc_array_1d, Array{T,1}, (Any, Int), Array{T,1}, m)
-Array{T,2}(::UndefInitializer, m::Int, n::Int) where {T} =
-    ccall(:jl_alloc_array_2d, Array{T,2}, (Any, Int, Int), Array{T,2}, m, n)
-Array{T,3}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} =
-    ccall(:jl_alloc_array_3d, Array{T,3}, (Any, Int, Int, Int), Array{T,3}, m, n, o)
-Array{T,N}(::UndefInitializer, d::Vararg{Int,N}) where {T,N} =
-    ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+eval(Core, :(function (self::Type{Array{T,1}})(::UndefInitializer, m::Int) where {T}
+    mem = fieldtype(fieldtype(self, :ref), :mem)(undef, m)
+    return $(Expr(:new, :self, :(memoryref(mem)), :((m,))))
+end))
+eval(Core, :(function (self::Type{Array{T,2}})(::UndefInitializer, m::Int, n::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n))), :((m, n))))
+end))
+eval(Core, :(function (self::Type{Array{T,3}})(::UndefInitializer, m::Int, n::Int, o::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n, o))), :((m, n, o))))
+end))
+eval(Core, :(function (self::Type{Array{T, N}})(::UndefInitializer, d::Vararg{Int, N}) where {T, N}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(d...))), :d))
+end))
 # type and dimensionality specified, accepting dims as tuples of Ints
-Array{T,1}(::UndefInitializer, d::NTuple{1,Int}) where {T} = Array{T,1}(undef, getfield(d,1))
-Array{T,2}(::UndefInitializer, d::NTuple{2,Int}) where {T} = Array{T,2}(undef, getfield(d,1), getfield(d,2))
-Array{T,3}(::UndefInitializer, d::NTuple{3,Int}) where {T} = Array{T,3}(undef, getfield(d,1), getfield(d,2), getfield(d,3))
-Array{T,N}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+(self::Type{Array{T,1}})(::UndefInitializer, d::NTuple{1, Int}) where {T} = self(undef, getfield(d, 1))
+(self::Type{Array{T,2}})(::UndefInitializer, d::NTuple{2, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2))
+(self::Type{Array{T,3}})(::UndefInitializer, d::NTuple{3, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2), getfield(d, 3))
+(self::Type{Array{T,N}})(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = self(undef, d...)
 # type but not dimensionality specified
-Array{T}(::UndefInitializer, m::Int) where {T} = Array{T,1}(undef, m)
-Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T,2}(undef, m, n)
-Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T,3}(undef, m, n, o)
-Array{T}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, d)
+Array{T}(::UndefInitializer, m::Int) where {T} = Array{T, 1}(undef, m)
+Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T, 2}(undef, m, n)
+Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T, 3}(undef, m, n, o)
+Array{T}(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, d)
 # empty vector constructor
-Array{T,1}() where {T} = Array{T,1}(undef, 0)
+(self::Type{Array{T, 1}})() where {T} = self(undef, 0)
 
-(Array{T,N} where T)(x::AbstractArray{S,N}) where {S,N} = Array{S,N}(x)
+(Array{T, N} where T)(x::AbstractArray{S, N}) where {S, N} = Array{S, N}(x)
 
-Array(A::AbstractArray{T,N})    where {T,N}   = Array{T,N}(A)
-Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A)
+Array(A::AbstractArray{T, N})    where {T, N}   = Array{T, N}(A)
+Array{T}(A::AbstractArray{S, N}) where {T, N, S} = Array{T, N}(A)
 
-AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A)
+AbstractArray{T}(A::AbstractArray{S, N}) where {T, S, N} = AbstractArray{T, N}(A)
 
 # primitive Symbol constructors
 
@@ -513,9 +685,9 @@ function Symbol(s::String)
     @noinline
     return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s)
 end
-function Symbol(a::Array{UInt8,1})
+function Symbol(a::Array{UInt8, 1})
     @noinline
-    return _Symbol(ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), Intrinsics.arraylen(a), a)
+    return _Symbol(bitcast(Ptr{UInt8}, a.ref.ptr_or_offset), getfield(a.size, 1), a.ref.mem)
 end
 Symbol(s::Symbol) = s
 
@@ -524,13 +696,13 @@ module IR
 
 export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode, memoryref
 
-import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
+using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
     NewvarNode, SSAValue, SlotNumber, Argument,
-    PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct, InterConditional
+    PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
+    Const, PartialStruct, InterConditional, EnterNode, memoryref
 
 end # module IR
 
@@ -543,8 +715,17 @@ end
 macro __doc__(x)
     return Expr(:escape, Expr(:block, Expr(:meta, :doc), x))
 end
-atdoc     = (source, mod, str, expr) -> Expr(:escape, expr)
-atdoc!(λ) = global atdoc = λ
+
+isbasicdoc(@nospecialize x) = (isa(x, Expr) && x.head === :.) || isa(x, Union{QuoteNode, Symbol})
+iscallexpr(ex::Expr) = (isa(ex, Expr) && ex.head === :where) ? iscallexpr(ex.args[1]) : (isa(ex, Expr) && ex.head === :call)
+iscallexpr(ex) = false
+function ignoredoc(source, mod, str, expr)
+    (isbasicdoc(expr) || iscallexpr(expr)) && return Expr(:escape, nothing)
+    Expr(:escape, expr)
+end
+
+global atdoc = ignoredoc
+atdoc!(λ)    = global atdoc = λ
 
 # macros for big integer syntax
 macro int128_str end
@@ -615,6 +796,13 @@ function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospec
     end
 end
 
+# If the generator is a subtype of this trait, inference caches the generated unoptimized
+# code, sacrificing memory space to improve the performance of subsequent inferences.
+# This tradeoff is not appropriate in general cases (e.g., for `GeneratedFunctionStub`s
+# generated from the front end), but it can be justified for generators involving complex
+# code transformations, such as a Cassette-like system.
+abstract type CachedGenerator end
+
 NamedTuple() = NamedTuple{(),Tuple{}}(())
 
 eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
@@ -622,15 +810,13 @@ eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
 
 using .Intrinsics: sle_int, add_int
 
-eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
-             $(Expr(:splatnew, :(NamedTuple{names,T}), :args))))
+eval(Core, :((NT::Type{NamedTuple{names,T}})(args::T) where {names, T <: Tuple} =
+             $(Expr(:splatnew, :NT, :args))))
 
 # constructors for built-in types
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
-
 function is_top_bit_set(x)
     @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
@@ -641,9 +827,14 @@ function is_top_bit_set(x::Union{Int8,UInt8})
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
-function check_top_bit(::Type{To}, x) where {To}
+# n.b. This function exists for CUDA to overload to configure error behavior (see #48097)
+throw_inexacterror(func::Symbol, to, val) = throw(InexactError(func, to, val))
+
+function check_sign_bit(::Type{To}, x) where {To}
     @inline
-    is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
+    # the top bit is the sign bit of x but "sign bit" sounds better in stacktraces
+    # n.b. if x is signed, then sizeof(x) === sizeof(To), otherwise sizeof(x) >= sizeof(To)
+    is_top_bit_set(x) && throw_inexacterror(sizeof(x) === sizeof(To) ? :convert : :trunc, To, x)
     x
 end
 
@@ -668,11 +859,11 @@ toInt8(x::Int16)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int32)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int64)      = checked_trunc_sint(Int8, x)
 toInt8(x::Int128)     = checked_trunc_sint(Int8, x)
-toInt8(x::UInt8)      = bitcast(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt16)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt32)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt64)     = checked_trunc_sint(Int8, check_top_bit(Int8, x))
-toInt8(x::UInt128)    = checked_trunc_sint(Int8, check_top_bit(Int8, x))
+toInt8(x::UInt8)      = bitcast(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt16)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt32)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt64)     = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
+toInt8(x::UInt128)    = checked_trunc_sint(Int8, check_sign_bit(Int8, x))
 toInt8(x::Bool)       = and_int(bitcast(Int8, x), Int8(1))
 toInt16(x::Int8)      = sext_int(Int16, x)
 toInt16(x::Int16)     = x
@@ -680,10 +871,10 @@ toInt16(x::Int32)     = checked_trunc_sint(Int16, x)
 toInt16(x::Int64)     = checked_trunc_sint(Int16, x)
 toInt16(x::Int128)    = checked_trunc_sint(Int16, x)
 toInt16(x::UInt8)     = zext_int(Int16, x)
-toInt16(x::UInt16)    = bitcast(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt32)    = checked_trunc_sint(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt64)    = checked_trunc_sint(Int16, check_top_bit(Int16, x))
-toInt16(x::UInt128)   = checked_trunc_sint(Int16, check_top_bit(Int16, x))
+toInt16(x::UInt16)    = bitcast(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt32)    = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt64)    = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
+toInt16(x::UInt128)   = checked_trunc_sint(Int16, check_sign_bit(Int16, x))
 toInt16(x::Bool)      = and_int(zext_int(Int16, x), Int16(1))
 toInt32(x::Int8)      = sext_int(Int32, x)
 toInt32(x::Int16)     = sext_int(Int32, x)
@@ -692,9 +883,9 @@ toInt32(x::Int64)     = checked_trunc_sint(Int32, x)
 toInt32(x::Int128)    = checked_trunc_sint(Int32, x)
 toInt32(x::UInt8)     = zext_int(Int32, x)
 toInt32(x::UInt16)    = zext_int(Int32, x)
-toInt32(x::UInt32)    = bitcast(Int32, check_top_bit(Int32, x))
-toInt32(x::UInt64)    = checked_trunc_sint(Int32, check_top_bit(Int32, x))
-toInt32(x::UInt128)   = checked_trunc_sint(Int32, check_top_bit(Int32, x))
+toInt32(x::UInt32)    = bitcast(Int32, check_sign_bit(Int32, x))
+toInt32(x::UInt64)    = checked_trunc_sint(Int32, check_sign_bit(Int32, x))
+toInt32(x::UInt128)   = checked_trunc_sint(Int32, check_sign_bit(Int32, x))
 toInt32(x::Bool)      = and_int(zext_int(Int32, x), Int32(1))
 toInt64(x::Int8)      = sext_int(Int64, x)
 toInt64(x::Int16)     = sext_int(Int64, x)
@@ -704,8 +895,8 @@ toInt64(x::Int128)    = checked_trunc_sint(Int64, x)
 toInt64(x::UInt8)     = zext_int(Int64, x)
 toInt64(x::UInt16)    = zext_int(Int64, x)
 toInt64(x::UInt32)    = zext_int(Int64, x)
-toInt64(x::UInt64)    = bitcast(Int64, check_top_bit(Int64, x))
-toInt64(x::UInt128)   = checked_trunc_sint(Int64, check_top_bit(Int64, x))
+toInt64(x::UInt64)    = bitcast(Int64, check_sign_bit(Int64, x))
+toInt64(x::UInt128)   = checked_trunc_sint(Int64, check_sign_bit(Int64, x))
 toInt64(x::Bool)      = and_int(zext_int(Int64, x), Int64(1))
 toInt128(x::Int8)     = sext_int(Int128, x)
 toInt128(x::Int16)    = sext_int(Int128, x)
@@ -716,9 +907,9 @@ toInt128(x::UInt8)    = zext_int(Int128, x)
 toInt128(x::UInt16)   = zext_int(Int128, x)
 toInt128(x::UInt32)   = zext_int(Int128, x)
 toInt128(x::UInt64)   = zext_int(Int128, x)
-toInt128(x::UInt128)  = bitcast(Int128, check_top_bit(Int128, x))
+toInt128(x::UInt128)  = bitcast(Int128, check_sign_bit(Int128, x))
 toInt128(x::Bool)     = and_int(zext_int(Int128, x), Int128(1))
-toUInt8(x::Int8)      = bitcast(UInt8, check_top_bit(UInt8, x))
+toUInt8(x::Int8)      = bitcast(UInt8, check_sign_bit(UInt8, x))
 toUInt8(x::Int16)     = checked_trunc_uint(UInt8, x)
 toUInt8(x::Int32)     = checked_trunc_uint(UInt8, x)
 toUInt8(x::Int64)     = checked_trunc_uint(UInt8, x)
@@ -729,8 +920,8 @@ toUInt8(x::UInt32)    = checked_trunc_uint(UInt8, x)
 toUInt8(x::UInt64)    = checked_trunc_uint(UInt8, x)
 toUInt8(x::UInt128)   = checked_trunc_uint(UInt8, x)
 toUInt8(x::Bool)      = and_int(bitcast(UInt8, x), UInt8(1))
-toUInt16(x::Int8)     = sext_int(UInt16, check_top_bit(UInt16, x))
-toUInt16(x::Int16)    = bitcast(UInt16, check_top_bit(UInt16, x))
+toUInt16(x::Int8)     = sext_int(UInt16, check_sign_bit(UInt16, x))
+toUInt16(x::Int16)    = bitcast(UInt16, check_sign_bit(UInt16, x))
 toUInt16(x::Int32)    = checked_trunc_uint(UInt16, x)
 toUInt16(x::Int64)    = checked_trunc_uint(UInt16, x)
 toUInt16(x::Int128)   = checked_trunc_uint(UInt16, x)
@@ -740,9 +931,9 @@ toUInt16(x::UInt32)   = checked_trunc_uint(UInt16, x)
 toUInt16(x::UInt64)   = checked_trunc_uint(UInt16, x)
 toUInt16(x::UInt128)  = checked_trunc_uint(UInt16, x)
 toUInt16(x::Bool)     = and_int(zext_int(UInt16, x), UInt16(1))
-toUInt32(x::Int8)     = sext_int(UInt32, check_top_bit(UInt32, x))
-toUInt32(x::Int16)    = sext_int(UInt32, check_top_bit(UInt32, x))
-toUInt32(x::Int32)    = bitcast(UInt32, check_top_bit(UInt32, x))
+toUInt32(x::Int8)     = sext_int(UInt32, check_sign_bit(UInt32, x))
+toUInt32(x::Int16)    = sext_int(UInt32, check_sign_bit(UInt32, x))
+toUInt32(x::Int32)    = bitcast(UInt32, check_sign_bit(UInt32, x))
 toUInt32(x::Int64)    = checked_trunc_uint(UInt32, x)
 toUInt32(x::Int128)   = checked_trunc_uint(UInt32, x)
 toUInt32(x::UInt8)    = zext_int(UInt32, x)
@@ -751,10 +942,10 @@ toUInt32(x::UInt32)   = x
 toUInt32(x::UInt64)   = checked_trunc_uint(UInt32, x)
 toUInt32(x::UInt128)  = checked_trunc_uint(UInt32, x)
 toUInt32(x::Bool)     = and_int(zext_int(UInt32, x), UInt32(1))
-toUInt64(x::Int8)     = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int16)    = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int32)    = sext_int(UInt64, check_top_bit(UInt64, x))
-toUInt64(x::Int64)    = bitcast(UInt64, check_top_bit(UInt64, x))
+toUInt64(x::Int8)     = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int16)    = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int32)    = sext_int(UInt64, check_sign_bit(UInt64, x))
+toUInt64(x::Int64)    = bitcast(UInt64, check_sign_bit(UInt64, x))
 toUInt64(x::Int128)   = checked_trunc_uint(UInt64, x)
 toUInt64(x::UInt8)    = zext_int(UInt64, x)
 toUInt64(x::UInt16)   = zext_int(UInt64, x)
@@ -762,11 +953,11 @@ toUInt64(x::UInt32)   = zext_int(UInt64, x)
 toUInt64(x::UInt64)   = x
 toUInt64(x::UInt128)  = checked_trunc_uint(UInt64, x)
 toUInt64(x::Bool)     = and_int(zext_int(UInt64, x), UInt64(1))
-toUInt128(x::Int8)    = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int16)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int32)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int64)   = sext_int(UInt128, check_top_bit(UInt128, x))
-toUInt128(x::Int128)  = bitcast(UInt128, check_top_bit(UInt128, x))
+toUInt128(x::Int8)    = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int16)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int32)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int64)   = sext_int(UInt128, check_sign_bit(UInt128, x))
+toUInt128(x::Int128)  = bitcast(UInt128, check_sign_bit(UInt128, x))
 toUInt128(x::UInt8)   = zext_int(UInt128, x)
 toUInt128(x::UInt16)  = zext_int(UInt128, x)
 toUInt128(x::UInt32)  = zext_int(UInt128, x)
@@ -795,8 +986,8 @@ if Int === Int32
 Int64(x::Ptr) = Int64(UInt32(x))
 UInt64(x::Ptr) = UInt64(UInt32(x))
 end
-Ptr{T}(x::Union{Int,UInt,Ptr}) where {T} = bitcast(Ptr{T}, x)
-Ptr{T}() where {T} = Ptr{T}(0)
+(PT::Type{Ptr{T}} where T)(x::Union{Int,UInt,Ptr}=0) = bitcast(PT, x)
+(AS::Type{AddrSpace{Backend}} where Backend)(x::UInt8) = bitcast(AS, x)
 
 Signed(x::UInt8)    = Int8(x)
 Unsigned(x::Int8)   = UInt8(x)
@@ -815,6 +1006,14 @@ Unsigned(x::Union{Float16, Float32, Float64, Bool}) = UInt(x)
 Integer(x::Integer) = x
 Integer(x::Union{Float16, Float32, Float64}) = Int(x)
 
+# During definition of struct type `B`, if an `A.B` expression refers to
+# the eventual global name of the struct, then return the partially-initialized
+# type object.
+# TODO: remove. This is a shim for backwards compatibility.
+function struct_name_shim(@nospecialize(x), name::Symbol, mod::Module, @nospecialize(t))
+    return x === mod ? t : getfield(x, name)
+end
+
 # Binding for the julia parser, called as
 #
 #    Core._parse(text, filename, lineno, offset, options)
@@ -850,8 +1049,52 @@ struct Pair{A, B}
 end
 
 function _hasmethod(@nospecialize(tt)) # this function has a special tfunc
-    world = ccall(:jl_get_tls_world_age, UInt, ())
+    world = ccall(:jl_get_tls_world_age, UInt, ()) # tls_world_age()
     return Intrinsics.not_int(ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world) === nothing)
 end
 
+# for backward compat
+arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+const_arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+arrayset(inbounds::Bool, A::Array{T}, x::Any, i::Int...) where {T} = Main.Base.setindex!(A, x::T, i...)
+arraysize(a::Array) = a.size
+arraysize(a::Array, i::Int) = sle_int(i, nfields(a.size)) ? getfield(a.size, i) : 1
+export arrayref, arrayset, arraysize, const_arrayref
+const check_top_bit = check_sign_bit
+
+# For convenience
+EnterNode(old::EnterNode, new_dest::Int) = isdefined(old, :scope) ?
+    EnterNode(new_dest, old.scope) : EnterNode(new_dest)
+
+# typename(_).constprop_heuristic
+const FORCE_CONST_PROP      = 0x1
+const ARRAY_INDEX_HEURISTIC = 0x2
+const ITERATE_HEURISTIC     = 0x3
+const SAMETYPE_HEURISTIC    = 0x4
+
+# `typename` has special tfunc support in inference to improve
+# the result for `Type{Union{...}}`. It is defined here, so that the Compiler
+# can look it up by value.
+struct TypeNameError <: Exception
+    a
+    TypeNameError(@nospecialize(a)) = new(a)
+end
+
+typename(a) = throw(TypeNameError(a))
+typename(a::DataType) = a.name
+function typename(a::Union)
+    ta = typename(a.a)
+    tb = typename(a.b)
+    ta === tb || throw(TypeNameError(a))
+    return tb
+end
+typename(union::UnionAll) = typename(union.body)
+
+# Special inference support to avoid execess specialization of these methods.
+# TODO: Replace this by a generic heuristic.
+(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+(!==)(@nospecialize(a), @nospecialize(b)) = Intrinsics.not_int(a === b)
+
+include(Core, "optimized_generics.jl")
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 1e057789509ed..512b397352040 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -196,14 +196,18 @@ const andand = AndAnd()
 broadcasted(::AndAnd, a, b) = broadcasted((a, b) -> a && b, a, b)
 function broadcasted(::AndAnd, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a && bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a && bcf.f(args...), a, bcf.args...)
 end
 struct OrOr end
 const oror = OrOr()
 broadcasted(::OrOr, a, b) = broadcasted((a, b) -> a || b, a, b)
 function broadcasted(::OrOr, a, bc::Broadcasted)
     bcf = flatten(bc)
-    broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
+    # Vararg type signature to specialize on args count. This is necessary for performance
+    # and innexpensive because this should only ever get called with 1+N = length(bc.args)
+    broadcasted(((a, args::Vararg{Any, N}) where {N}) -> a || bcf.f(args...), a, bcf.args...)
 end
 
 Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{<:Any,Axes,F,Args}) where {NewStyle,Axes,F,Args} =
@@ -222,12 +226,12 @@ end
 ## Allocating the output container
 Base.similar(bc::Broadcasted, ::Type{T}) where {T} = similar(bc, T, axes(bc))
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}, dims) where {N,ElType} =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{DefaultArrayStyle{N}}, ::Type{Bool}, dims) where N =
     similar(BitArray, dims)
 # In cases of conflict we fall back on Array
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType =
-    similar(Array{ElType}, dims)
+    similar(Array{ElType, length(dims)}, dims)
 Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) =
     similar(BitArray, dims)
 
@@ -246,9 +250,11 @@ BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} =
 argtype(::Type{BC}) where {BC<:Broadcasted} = fieldtype(BC, :args)
 argtype(bc::Broadcasted) = argtype(typeof(bc))
 
-@inline Base.eachindex(bc::Broadcasted) = _eachindex(axes(bc))
-_eachindex(t::Tuple{Any}) = t[1]
-_eachindex(t::Tuple) = CartesianIndices(t)
+@inline Base.eachindex(bc::Broadcasted) = eachindex(IndexStyle(bc), bc)
+@inline Base.eachindex(s::IndexStyle, bc::Broadcasted) = _eachindex(s, axes(bc))
+_eachindex(::IndexCartesian, t::Tuple) = CartesianIndices(t)
+_eachindex(s::IndexLinear, t::Tuple) = eachindex(s, LinearIndices(t))
+_eachindex(::IndexLinear, t::Tuple{Any}) = t[1]
 
 Base.IndexStyle(bc::Broadcasted) = IndexStyle(typeof(bc))
 Base.IndexStyle(::Type{<:Broadcasted{<:Any,<:Tuple{Any}}}) = IndexLinear()
@@ -274,19 +280,20 @@ Base.@propagate_inbounds function Base.iterate(bc::Broadcasted, s)
 end
 
 Base.IteratorSize(::Type{T}) where {T<:Broadcasted} = Base.HasShape{ndims(T)}()
-Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, :args))
-Base.ndims(::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N<:Integer} = N
+Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims_broadcasted(BC)
+# the `AbstractArrayStyle` type parameter is required to be either equal to `Any` or be an `Int` value
+Base.ndims(BC::Type{<:Broadcasted{<:AbstractArrayStyle{Any},Nothing}}) = _maxndims_broadcasted(BC)
+Base.ndims(::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N} = N::Int
 
-_maxndims(T::Type{<:Tuple}) = reduce(max, (ntuple(n -> _ndims(fieldtype(T, n)), Base._counttuple(T))))
-_maxndims(::Type{<:Tuple{T}}) where {T} = ndims(T)
-_maxndims(::Type{<:Tuple{T}}) where {T<:Tuple} = _ndims(T)
+function _maxndims_broadcasted(BC::Type{<:Broadcasted})
+    _maxndims(fieldtype(BC, :args))
+end
+_maxndims(::Type{T}) where {T<:Tuple} = reduce(max, ntuple(n -> (F = fieldtype(T, n); F <: Tuple ? 1 : ndims(F)), Base._counttuple(T)))
+_maxndims(::Type{<:Tuple{T}}) where {T} = T <: Tuple ? 1 : ndims(T)
 function _maxndims(::Type{<:Tuple{T, S}}) where {T, S}
-    return T<:Tuple || S<:Tuple ? max(_ndims(T), _ndims(S)) : max(ndims(T), ndims(S))
+    return max(T <: Tuple ? 1 : ndims(T), S <: Tuple ? 1 : ndims(S))
 end
 
-_ndims(x) = ndims(x)
-_ndims(::Type{<:Tuple}) = 1
-
 Base.IteratorEltype(::Type{<:Broadcasted}) = Base.EltypeUnknown()
 
 ## Instantiation fills in the "missing" fields in Broadcasted.
@@ -341,20 +348,17 @@ function flatten(bc::Broadcasted)
     isflat(bc) && return bc
     # concatenate the nested arguments into {a, b, c, d}
     args = cat_nested(bc)
-    # build a function `makeargs` that takes a "flat" argument list and
-    # and creates the appropriate input arguments for `f`, e.g.,
-    #          makeargs = (w, x, y, z) -> (w, g(x, y), z)
-    #
-    # `makeargs` is built recursively and looks a bit like this:
-    #     makeargs(w, x, y, z) = (w, makeargs1(x, y, z)...)
-    #                          = (w, g(x, y), makeargs2(z)...)
-    #                          = (w, g(x, y), z)
-    let makeargs = make_makeargs(()->(), bc.args), f = bc.f
-        newf = @inline function(args::Vararg{Any,N}) where N
-            f(makeargs(args...)...)
-        end
-        return Broadcasted(bc.style, newf, args, bc.axes)
-    end
+    # build a tuple of functions `makeargs`. Its elements take
+    # the whole "flat" argument list and generate the appropriate
+    # input arguments for the broadcasted function `f`, e.g.,
+    #          makeargs[1] = ((w, x, y, z)) -> w
+    #          makeargs[2] = ((w, x, y, z)) -> g(x, y)
+    #          makeargs[3] = ((w, x, y, z)) -> z
+    makeargs = make_makeargs(bc.args)
+    f = Base.maybeconstructor(bc.f)
+    # TODO: consider specializing on args... if performance problems emerge:
+    newf = (args...) -> (@inline; f(prepare_args(makeargs, args)...))
+    return Broadcasted(bc.style, newf, args, bc.axes)
 end
 
 const NestedTuple = Tuple{<:Broadcasted,Vararg{Any}}
@@ -363,78 +367,47 @@ _isflat(args::NestedTuple) = false
 _isflat(args::Tuple) = _isflat(tail(args))
 _isflat(args::Tuple{}) = true
 
-cat_nested(t::Broadcasted, rest...) = (cat_nested(t.args...)..., cat_nested(rest...)...)
-cat_nested(t::Any, rest...) = (t, cat_nested(rest...)...)
-cat_nested() = ()
+cat_nested(bc::Broadcasted) = cat_nested_args(bc.args)
+cat_nested_args(::Tuple{}) = ()
+cat_nested_args(t::Tuple{Any}) = cat_nested(t[1])
+cat_nested_args(t::Tuple) = (cat_nested(t[1])..., cat_nested_args(tail(t))...)
+cat_nested(a) = (a,)
 
 """
-    make_makeargs(makeargs_tail::Function, t::Tuple) -> Function
+    make_makeargs(t::Tuple) -> Tuple{Vararg{Function}}
 
 Each element of `t` is one (consecutive) node in a broadcast tree.
-Ignoring `makeargs_tail` for the moment, the job of `make_makeargs` is
-to return a function that takes in flattened argument list and returns a
-tuple (each entry corresponding to an entry in `t`, having evaluated
-the corresponding element in the broadcast tree). As an additional
-complication, the passed in tuple may be longer than the number of leaves
-in the subtree described by `t`. The `makeargs_tail` function should
-be called on such additional arguments (but not the arguments consumed
-by `t`).
+The returned `Tuple` are functions which take in the (whole) flattened
+list and generate the inputs for the corresponding broadcasted function.
 """
-@inline make_makeargs(makeargs_tail, t::Tuple{}) = makeargs_tail
-@inline function make_makeargs(makeargs_tail, t::Tuple)
-    makeargs = make_makeargs(makeargs_tail, tail(t))
-    (head, tail...)->(head, makeargs(tail...)...)
+make_makeargs(args::Tuple) = _make_makeargs(args, 1)[1]
+
+# We build `makeargs` by traversing the broadcast nodes recursively.
+# note: `n` indicates the flattened index of the next unused argument.
+@inline function _make_makeargs(args::Tuple, n::Int)
+    head, n = _make_makeargs1(args[1], n)
+    rest, n = _make_makeargs(tail(args), n)
+    (head, rest...), n
 end
-function make_makeargs(makeargs_tail, t::Tuple{<:Broadcasted, Vararg{Any}})
-    bc = t[1]
-    # c.f. the same expression in the function on leaf nodes above. Here
-    # we recurse into siblings in the broadcast tree.
-    let makeargs_tail = make_makeargs(makeargs_tail, tail(t)),
-            # Here we recurse into children. It would be valid to pass in makeargs_tail
-            # here, and not use it below. However, in that case, our recursion is no
-            # longer purely structural because we're building up one argument (the closure)
-            # while destructuing another.
-            makeargs_head = make_makeargs((args...)->args, bc.args),
-            f = bc.f
-        # Create two functions, one that splits of the first length(bc.args)
-        # elements from the tuple and one that yields the remaining arguments.
-        # N.B. We can't call headargs on `args...` directly because
-        # args is flattened (i.e. our children have not been evaluated
-        # yet).
-        headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args)
-        return @inline function(args::Vararg{Any,N}) where N
-            args1 = makeargs_head(args...)
-            a, b = headargs(args1...), makeargs_tail(tailargs(args1...)...)
-            (f(a...), b...)
-        end
-    end
+_make_makeargs(::Tuple{}, n::Int) = (), n
+
+# A help struct to store the flattened index statically
+struct Pick{N} <: Function end
+(::Pick{N})(@nospecialize(args::Tuple)) where {N} = args[N]
+
+# For flat nodes, we just consume one argument (n += 1), and return the "Pick" function
+@inline _make_makeargs1(_, n::Int) = Pick{n}(), n + 1
+# For nested nodes, we form the `makeargs1` based on the child `makeargs` (n += length(cat_nested(bc)))
+@inline function _make_makeargs1(bc::Broadcasted, n::Int)
+    makeargs, n = _make_makeargs(bc.args, n)
+    f = Base.maybeconstructor(bc.f)
+    makeargs1 = (args::Tuple) -> (@inline; f(prepare_args(makeargs, args)...))
+    makeargs1, n
 end
 
-@inline function make_headargs(t::Tuple)
-    let headargs = make_headargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            (head, headargs(tail...)...)
-        end
-    end
-end
-@inline function make_headargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        ()
-    end
-end
-
-@inline function make_tailargs(t::Tuple)
-    let tailargs = make_tailargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            tailargs(tail...)
-        end
-    end
-end
-@inline function make_tailargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        tail
-    end
-end
+@inline prepare_args(makeargs::Tuple, @nospecialize(x::Tuple)) = (makeargs[1](x), prepare_args(tail(makeargs), x)...)
+@inline prepare_args(makeargs::Tuple{Any}, @nospecialize(x::Tuple)) = (makeargs[1](x),)
+prepare_args(::Tuple{}, ::Tuple) = ()
 
 ## Broadcasting utilities ##
 
@@ -458,6 +431,10 @@ function combine_styles end
 
 combine_styles() = DefaultArrayStyle{0}()
 combine_styles(c) = result_style(BroadcastStyle(typeof(c)))
+function combine_styles(bc::Broadcasted)
+    bc.style isa Union{Nothing,Unknown} || return bc.style
+    throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
+end
 combine_styles(c1, c2) = result_style(combine_styles(c1), combine_styles(c2))
 @inline combine_styles(c1, c2, cs...) = result_style(combine_styles(c1), combine_styles(c2, cs...))
 
@@ -480,7 +457,9 @@ Base.Broadcast.DefaultArrayStyle{1}()
 function result_style end
 
 result_style(s::BroadcastStyle) = s
-result_style(s1::S, s2::S) where S<:BroadcastStyle = S()
+function result_style(s1::S, s2::S) where S<:BroadcastStyle
+    s1 ≡ s2 ? s1 : error("inconsistent broadcast styles, custom rule needed")
+end
 # Test both orders so users typically only have to declare one order
 result_style(s1, s2) = result_join(s1, s2, BroadcastStyle(s1, s2), BroadcastStyle(s2, s1))
 
@@ -496,7 +475,8 @@ result_join(::Any, ::Any, s::BroadcastStyle, ::Unknown) = s
 result_join(::AbstractArrayStyle, ::AbstractArrayStyle, ::Unknown, ::Unknown) =
     ArrayConflict()
 # Fallbacks in case users define `rule` for both argument-orders (not recommended)
-result_join(::Any, ::Any, ::S, ::S) where S<:BroadcastStyle = S()
+result_join(::Any, ::Any, s1::S, s2::S) where S<:BroadcastStyle = result_style(s1, s2)
+
 @noinline function result_join(::S, ::T, ::U, ::V) where {S,T,U,V}
     error("""
 conflicting broadcast rules defined
@@ -549,10 +529,10 @@ function _bcs(shape::Tuple, newshape::Tuple)
     return (_bcs1(shape[1], newshape[1]), _bcs(tail(shape), tail(newshape))...)
 end
 # _bcs1 handles the logic for a single dimension
-_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $b"))))
-_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $a and $(length(b))")))
+_bcs1(a::Integer, b::Integer) = a == 1 ? b : (b == 1 ? a : (a == b ? a : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", b)))))
+_bcs1(a::Integer, b) = a == 1 ? b : (first(b) == 1 && last(b) == a ? b : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size; got a dimension with lengths ", a, " and ", length(b)))))
 _bcs1(a, b::Integer) = _bcs1(b, a)
-_bcs1(a, b) = _bcsm(b, a) ? axistype(b, a) : (_bcsm(a, b) ? axistype(a, b) : throw(DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths $(length(a)) and $(length(b))")))
+_bcs1(a, b) = _bcsm(b, a) ? axistype(b, a) : _bcsm(a, b) ? axistype(a, b) : throw(DimensionMismatch(LazyString("arrays could not be broadcast to a common size: a has axes ", a, " and b has axes ", b)))
 # _bcsm tests whether the second index is consistent with the first
 _bcsm(a, b) = a == b || length(b) == 1
 _bcsm(a, b::Number) = b == 1
@@ -603,15 +583,15 @@ an `Int`.
     Any remaining indices in `I` beyond the length of the `keep` tuple are truncated. The `keep` and `default`
     tuples may be created by `newindexer(argument)`.
 """
-Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
-Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
+Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = to_index(_newindex(axes(arg), I.I))
+Base.@propagate_inbounds newindex(arg, I::Integer) = to_index(_newindex(axes(arg), (I,)))
 Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
 Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
 
 # If dot-broadcasting were already defined, this would be `ifelse.(keep, I, Idefault)`.
-@inline newindex(I::CartesianIndex, keep, Idefault) = CartesianIndex(_newindex(I.I, keep, Idefault))
+@inline newindex(I::CartesianIndex, keep, Idefault) = to_index(_newindex(I.I, keep, Idefault))
 @inline newindex(i::Integer, keep::Tuple, idefault) = ifelse(keep[1], i, idefault[1])
 @inline newindex(i::Integer, keep::Tuple{}, idefault) = CartesianIndex(())
 @inline _newindex(I, keep, Idefault) =
@@ -631,22 +611,32 @@ Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
     (Base.length(ind1)::Integer != 1, keep...), (first(ind1), Idefault...)
 end
 
-@inline function Base.getindex(bc::Broadcasted, I::Union{Integer,CartesianIndex})
+Base.@propagate_inbounds function Base.getindex(bc::Broadcasted, Is::Vararg{Union{Integer,CartesianIndex},N}) where {N}
+    I = to_index(Base.IteratorsMD.flatten(Is))
+    _getindex(IndexStyle(bc), bc, I)
+end
+@inline function _getindex(::IndexStyle, bc, I)
     @boundscheck checkbounds(bc, I)
     @inbounds _broadcast_getindex(bc, I)
 end
-Base.@propagate_inbounds Base.getindex(
-    bc::Broadcasted,
-    i1::Union{Integer,CartesianIndex},
-    i2::Union{Integer,CartesianIndex},
-    I::Union{Integer,CartesianIndex}...,
-) =
-    bc[CartesianIndex((i1, i2, I...))]
-Base.@propagate_inbounds Base.getindex(bc::Broadcasted) = bc[CartesianIndex(())]
-
-@inline Base.checkbounds(bc::Broadcasted, I::Union{Integer,CartesianIndex}) =
+Base.@propagate_inbounds function _getindex(s::IndexCartesian, bc, I::Integer)
+    C = CartesianIndices(axes(bc))
+    _getindex(s, bc, C[I])
+end
+Base.@propagate_inbounds function _getindex(s::IndexLinear, bc, I::CartesianIndex)
+    L = LinearIndices(axes(bc))
+    _getindex(s, bc, L[I])
+end
+to_index(::Tuple{}) = CartesianIndex()
+to_index(Is::Tuple{Any}) = Is[1]
+to_index(Is::Tuple) = CartesianIndex(Is)
+
+@inline Base.checkbounds(bc::Broadcasted, I::CartesianIndex) =
     Base.checkbounds_indices(Bool, axes(bc), (I,)) || Base.throw_boundserror(bc, (I,))
 
+@inline Base.checkbounds(bc::Broadcasted, I::Integer) =
+    Base.checkindex(Bool, eachindex(IndexLinear(), bc), I) || Base.throw_boundserror(bc, (I,))
+
 
 """
     _broadcast_getindex(A, I)
@@ -750,8 +740,8 @@ _broadcast_getindex_eltype(A) = eltype(A)  # Tuple, Array, etc.
 eltypes(::Tuple{}) = Tuple{}
 eltypes(t::Tuple{Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]))
 eltypes(t::Tuple{Any,Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2]))
-# eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
-eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
+eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
+# eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
 
 # Inferred eltype of result of broadcast(f, args...)
 function combine_eltypes(f, args::Tuple)
@@ -782,6 +772,7 @@ The resulting container type is established by the following rules:
  - All other combinations of arguments default to returning an `Array`, but
    custom container types can define their own implementation and promotion-like
    rules to customize the result when they appear as arguments.
+ - The element type is determined in the same manner as in [`collect`](@ref).
 
 A special syntax exists for broadcasting: `f.(args...)` is equivalent to
 `broadcast(f, args...)`, and nested `f.(g.(args...))` calls are fused into a
@@ -1007,26 +998,41 @@ preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 end
 
 # Performance optimization: for BitArray outputs, we cache the result
-# in a "small" Vector{Bool}, and then copy in chunks into the output
+# in a 64-bit register before writing into memory (to bypass LSQ)
 @inline function copyto!(dest::BitArray, bc::Broadcasted{Nothing})
     axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
     ischunkedbroadcast(dest, bc) && return chunkedcopyto!(dest, bc)
-    length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc)
-    tmp = Vector{Bool}(undef, bitcache_size)
-    destc = dest.chunks
-    cind = 1
+    ndims(dest) == 0 && (dest[] = bc[]; return dest)
     bc′ = preprocess(dest, bc)
-    @inbounds for P in Iterators.partition(eachindex(bc′), bitcache_size)
-        ind = 1
-        @simd for I in P
-            tmp[ind] = bc′[I]
-            ind += 1
+    ax = axes(bc′)
+    ax1, out = ax[1], CartesianIndices(tail(ax))
+    destc, indc = dest.chunks, 0
+    bitst, remain = 0, UInt64(0)
+    for I in out
+        i = first(ax1) - 1
+        if ndims(bc) == 1 || bitst >= 64 - length(ax1)
+            if ndims(bc) > 1 && bitst != 0
+                @inbounds @simd for j = bitst:63
+                    remain |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
+                end
+                @inbounds destc[indc+=1] = remain
+                bitst, remain = 0, UInt64(0)
+            end
+            while i <= last(ax1) - 64
+                z = UInt64(0)
+                @inbounds @simd for j = 0:63
+                    z |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
+                end
+                @inbounds destc[indc+=1] = z
+            end
         end
-        @simd for i in ind:bitcache_size
-            tmp[i] = false
+        @inbounds @simd for j = i+1:last(ax1)
+            remain |= UInt64(convert(Bool, bc′[j, I])) << (bitst & 63)
+            bitst += 1
         end
-        dumpbitcache(destc, cind, tmp)
-        cind += bitcache_chunks
+    end
+    @inbounds if bitst != 0
+        destc[indc+=1] = remain
     end
     return dest
 end
@@ -1078,7 +1084,7 @@ end
 
 
 @noinline throwdm(axdest, axsrc) =
-    throw(DimensionMismatch("destination axes $axdest are not compatible with source axes $axsrc"))
+    throw(DimensionMismatch(LazyString("destination axes ", axdest, " are not compatible with source axes ", axsrc)))
 
 function restart_copyto_nonleaf!(newdest, dest, bc, val, I, iter, state, count)
     # Function barrier that makes the copying to newdest type stable
diff --git a/base/c.jl b/base/c.jl
index 662986501d59d..c1b34579e0a0b 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -2,7 +2,7 @@
 
 # definitions related to C interface
 
-import Core.Intrinsics: cglobal, bitcast
+import Core.Intrinsics: cglobal
 
 """
     cglobal((symbol, library) [, type=Cvoid])
@@ -91,7 +91,7 @@ Equivalent to the native `char` c-type.
 Cchar
 
 # The ccall here is equivalent to Sys.iswindows(), but that's not defined yet
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
+if ccall(:jl_get_UNAME, Any, ()) === :NT
     const Clong = Int32
     const Culong = UInt32
     const Cwchar_t = UInt16
@@ -122,32 +122,7 @@ Equivalent to the native `wchar_t` c-type ([`Int32`](@ref)).
 """
 Cwchar_t
 
-"""
-    Cwstring
-
-A C-style string composed of the native wide character type
-[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
-C-style strings composed of the native character
-type, see [`Cstring`](@ref). For more information
-about string interoperability with C, see the
-[manual](@ref man-bits-types).
-
-"""
-Cwstring
-
-"""
-    Cstring
-
-A C-style string composed of the native character type
-[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
-C-style strings composed of the native wide character
-type, see [`Cwstring`](@ref). For more information
-about string interoperability with C, see the
-[manual](@ref man-bits-types).
-"""
-Cstring
-
-@static if ccall(:jl_get_UNAME, Any, ()) !== :NT
+if ccall(:jl_get_UNAME, Any, ()) !== :NT
     const sizeof_mode_t = ccall(:jl_sizeof_mode_t, Cint, ())
     if sizeof_mode_t == 2
         const Cmode_t = Int16
@@ -155,292 +130,11 @@ Cstring
         const Cmode_t = Int32
     elseif sizeof_mode_t == 8
         const Cmode_t = Int64
+    else
+        error("invalid sizeof mode_t")
     end
 end
 
-# construction from pointers
-Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
-Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
-Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
-Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
-
-convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
-convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
-convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
-convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
-
-"""
-    pointer(array [, index])
-
-Get the native address of an array or string, optionally at a given location `index`.
-
-This function is "unsafe". Be careful to ensure that a Julia reference to
-`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
-macro should be used to protect the `array` argument from garbage collection
-within a given block of code.
-
-Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
-"""
-function pointer end
-
-pointer(p::Cstring) = convert(Ptr{Cchar}, p)
-pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
-
-# comparisons against pointers (mainly to support `cstr==C_NULL`)
-==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
-==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
-
-unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
-
-# convert strings to String etc. to pass as pointers
-cconvert(::Type{Cstring}, s::String) = s
-cconvert(::Type{Cstring}, s::AbstractString) =
-    cconvert(Cstring, String(s)::String)
-
-function cconvert(::Type{Cwstring}, s::AbstractString)
-    v = transcode(Cwchar_t, String(s))
-    !isempty(v) && v[end] == 0 || push!(v, 0)
-    return v
-end
-
-eltype(::Type{Cstring}) = Cchar
-eltype(::Type{Cwstring}) = Cwchar_t
-
-containsnul(p::Ptr, len) =
-    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
-containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
-containsnul(s::AbstractString) = '\0' in s
-
-function unsafe_convert(::Type{Cstring}, s::Union{String,AbstractVector{UInt8}})
-    p = unsafe_convert(Ptr{Cchar}, s)
-    containsnul(p, sizeof(s)) &&
-        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return Cstring(p)
-end
-
-function unsafe_convert(::Type{Cwstring}, v::Vector{Cwchar_t})
-    for i = 1:length(v)-1
-        v[i] == 0 &&
-            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
-    end
-    v[end] == 0 ||
-        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
-    p = unsafe_convert(Ptr{Cwchar_t}, v)
-    return Cwstring(p)
-end
-
-# symbols are guaranteed not to contain embedded NUL
-cconvert(::Type{Cstring}, s::Symbol) = s
-unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
-
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
-"""
-    Base.cwstring(s)
-
-Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
-functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
-conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
-same argument.
-
-This is only available on Windows.
-"""
-function cwstring(s::AbstractString)
-    bytes = codeunits(String(s))
-    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return push!(transcode(UInt16, bytes), 0)
-end
-end
-
-# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
-# and also UTF-32 for APIs using Cwchar_t on other platforms.
-
-"""
-    transcode(T, src)
-
-Convert string data between Unicode encodings. `src` is either a
-`String` or a `Vector{UIntXX}` of UTF-XX code units, where
-`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
-`String` to return a (UTF-8 encoded) `String` or `UIntXX`
-to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
-can also be used as the integer type, for converting `wchar_t*` strings
-used by external C libraries.)
-
-The `transcode` function succeeds as long as the input data can be
-reasonably represented in the target encoding; it always succeeds for
-conversions between UTF-XX encodings, even for invalid Unicode data.
-
-Only conversion to/from UTF-8 is currently supported.
-
-# Examples
-```jldoctest
-julia> str = "αβγ"
-"αβγ"
-
-julia> transcode(UInt16, str)
-3-element Vector{UInt16}:
- 0x03b1
- 0x03b2
- 0x03b3
-
-julia> transcode(String, transcode(UInt16, str))
-"αβγ"
-```
-"""
-function transcode end
-
-transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
-transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
-transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(Vector(src)))
-transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(src))
-
-function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
-    buf = IOBuffer()
-    for c in src
-        print(buf, Char(c))
-    end
-    take!(buf)
-end
-transcode(::Type{String}, src::String) = src
-transcode(T, src::String) = transcode(T, codeunits(src))
-transcode(::Type{String}, src) = String(transcode(UInt8, src))
-
-function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
-    require_one_based_indexing(src)
-    dst = UInt16[]
-    i, n = 1, length(src)
-    n > 0 || return dst
-    sizehint!(dst, 2n)
-    a = src[1]
-    while true
-        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
-            b = src[i += 1]
-            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
-                # invalid UTF-8 (non-continuation or too-high code point)
-                push!(dst, a)
-                a = b; continue
-            elseif a < 0xe0 # 2-byte UTF-8
-                push!(dst, xor(0x3080, UInt16(a) << 6, b))
-            elseif i < n # 3/4-byte character
-                c = src[i += 1]
-                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
-                    push!(dst, a, b)
-                    a = c; continue
-                elseif a < 0xf0 # 3-byte UTF-8
-                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
-                elseif i < n
-                    d = src[i += 1]
-                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
-                        push!(dst, a, b, c)
-                        a = d; continue
-                    elseif a == 0xf0 && b < 0x90 # overlong encoding
-                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
-                    else # 4-byte UTF-8
-                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
-                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
-                    end
-                else # too short
-                    push!(dst, a, b, c)
-                    break
-                end
-            else # too short
-                push!(dst, a, b)
-                break
-            end
-        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
-            push!(dst, a)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
-    require_one_based_indexing(src)
-    n = length(src)
-    n == 0 && return UInt8[]
-
-    # Precompute m = sizeof(dst).   This involves annoying duplication
-    # of the loop over the src array.   However, this is not just an
-    # optimization: it is problematic for security reasons to grow
-    # dst dynamically, because Base.winprompt uses this function to
-    # convert passwords to UTF-8 and we don't want to make unintentional
-    # copies of the password data.
-    a = src[1]
-    i, m = 1, 0
-    while true
-        if a < 0x80
-            m += 1
-        elseif a < 0x800 # 2-byte UTF-8
-            m += 2
-        elseif a & 0xfc00 == 0xd800 && i < length(src)
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
-                m += 4
-            else
-                m += 3
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            m += 3
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-
-    dst = StringVector(m)
-    a = src[1]
-    i, j = 1, 0
-    while true
-        if a < 0x80 # ASCII
-            dst[j += 1] = a % UInt8
-        elseif a < 0x800 # 2-byte UTF-8
-            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        elseif a & 0xfc00 == 0xd800 && i < n
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00
-                # 2-unit UTF-16 sequence => 4-byte UTF-8
-                a += 0x2840
-                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
-                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
-                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
-                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
-            else
-                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
-    transcode(String, unsafe_wrap(Array, p, length; own=false))
-end
-function unsafe_string(cw::Cwstring)
-    p = convert(Ptr{Cwchar_t}, cw)
-    n = 1
-    while unsafe_load(p, n) != 0
-        n += 1
-    end
-    return unsafe_string(p, n - 1)
-end
-
 # deferring (or un-deferring) ctrl-c handler for external C code that
 # is not interrupt safe (see also issue #2622).  The sigatomic_begin/end
 # functions should always be called in matched pairs, ideally via:
@@ -540,7 +234,7 @@ function expand_ccallable(rt, def)
                 end
             end
             return quote
-                $(esc(def))
+                @__doc__ $(esc(def))
                 _ccallable($(esc(rt)), $(Expr(:curly, :Tuple, esc(f), map(esc, at)...)))
             end
         end
@@ -576,21 +270,21 @@ The above input outputs this:
 """
 function ccall_macro_parse(expr::Expr)
     # setup and check for errors
-    if !Meta.isexpr(expr, :(::))
+    if !isexpr(expr, :(::))
         throw(ArgumentError("@ccall needs a function signature with a return type"))
     end
     rettype = expr.args[2]
 
     call = expr.args[1]
-    if !Meta.isexpr(call, :call)
+    if !isexpr(call, :call)
         throw(ArgumentError("@ccall has to take a function call"))
     end
 
     # get the function symbols
     func = let f = call.args[1]
-        if Meta.isexpr(f, :.)
+        if isexpr(f, :.)
             :(($(f.args[2]), $(f.args[1])))
-        elseif Meta.isexpr(f, :$)
+        elseif isexpr(f, :$)
             f
         elseif f isa Symbol
             QuoteNode(f)
@@ -603,7 +297,7 @@ function ccall_macro_parse(expr::Expr)
     varargs = nothing
     argstart = 2
     callargs = call.args
-    if length(callargs) >= 2 && Meta.isexpr(callargs[2], :parameters)
+    if length(callargs) >= 2 && isexpr(callargs[2], :parameters)
         argstart = 3
         varargs = callargs[2].args
     end
@@ -613,7 +307,7 @@ function ccall_macro_parse(expr::Expr)
     types = []
 
     function pusharg!(arg)
-        if !Meta.isexpr(arg, :(::))
+        if !isexpr(arg, :(::))
             throw(ArgumentError("args in @ccall need type annotations. '$arg' doesn't have one."))
         end
         push!(args, arg.args[1])
@@ -643,14 +337,14 @@ function ccall_macro_lower(convention, func, rettype, types, args, nreq)
     statements = []
 
     # if interpolation was used, ensure the value is a function pointer at runtime.
-    if Meta.isexpr(func, :$)
+    if isexpr(func, :$)
         push!(statements, Expr(:(=), :func, esc(func.args[1])))
         name = QuoteNode(func.args[1])
         func = :func
         check = quote
             if !isa(func, Ptr{Cvoid})
                 name = $name
-                throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))"))
+                throw(ArgumentError(LazyString("interpolated function `", name, "` was not a Ptr{Cvoid}, but ", typeof(func))))
             end
         end
         push!(statements, check)
@@ -715,6 +409,6 @@ macro ccall(expr)
     return ccall_macro_lower(:ccall, ccall_macro_parse(expr)...)
 end
 
-macro ccall_effects(effects::UInt8, expr)
+macro ccall_effects(effects::UInt16, expr)
     return ccall_macro_lower((:ccall, effects), ccall_macro_parse(expr)...)
 end
diff --git a/base/cartesian.jl b/base/cartesian.jl
index 5f96a2061880f..ca0fc0aac0cfc 100644
--- a/base/cartesian.jl
+++ b/base/cartesian.jl
@@ -2,7 +2,7 @@
 
 module Cartesian
 
-export @nloops, @nref, @ncall, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
+export @nloops, @nref, @ncall, @ncallkw, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
 
 ### Cartesian-specific macros
 
@@ -104,10 +104,38 @@ while `@ncall 2 func a b i->c[i]` yields
 macro ncall(N::Int, f, args...)
     pre = args[1:end-1]
     ex = args[end]
-    vars = Any[ inlineanonymous(ex,i) for i = 1:N ]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
     Expr(:escape, Expr(:call, f, pre..., vars...))
 end
 
+"""
+    @ncallkw N f kw sym...
+
+Generate a function call expression with keyword arguments `kw...`. As
+in the case of [`@ncall`](@ref), `sym` represents any number of function arguments, the
+last of which may be an anonymous-function expression and is expanded into `N` arguments.
+
+# Examples
+```jldoctest
+julia> using Base.Cartesian
+
+julia> f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d);
+
+julia> x_1, x_2 = (-1, -2); b = 0; kw = (c = 0, d = 0);
+
+julia> @ncallkw 2 f (; a = 0, b, kw...) x
+-3
+
+```
+"""
+macro ncallkw(N::Int, f, kw, args...)
+    pre = args[1:end-1]
+    ex = args[end]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
+    param = Expr(:parameters, Expr(:(...), kw))
+    Expr(:escape, Expr(:call, f, param, pre..., vars...))
+end
+
 """
     @nexprs N expr
 
@@ -374,6 +402,8 @@ function exprresolve_conditional(ex::Expr)
                 return true, exprresolve_cond_dict[callee](ex.args[2], ex.args[3])
             end
         end
+    elseif Meta.isexpr(ex, :block, 2) && ex.args[1] isa LineNumberNode
+        return exprresolve_conditional(ex.args[2])
     end
     false, false
 end
@@ -402,10 +432,16 @@ function exprresolve(ex::Expr)
         return ex.args[1][ex.args[2:end]...]
     end
     # Resolve conditionals
-    if ex.head === :if
+    if ex.head === :if || ex.head === :elseif
         can_eval, tf = exprresolve_conditional(ex.args[1])
         if can_eval
-            ex = tf ? ex.args[2] : ex.args[3]
+            if tf
+                return ex.args[2]
+            elseif length(ex.args) == 3
+                return ex.args[3]
+            else
+                return nothing
+            end
         end
     end
     ex
diff --git a/base/channels.jl b/base/channels.jl
index 1b5b427f92671..527c22b3d45fd 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -59,7 +59,7 @@ Channel(sz=0) = Channel{Any}(sz)
 
 # special constructors
 """
-    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false)
+    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing)
 
 Create a new task from `func`, bind it to a new channel of type
 `T` and size `size`, and schedule the task, all in a single call.
@@ -70,9 +70,14 @@ The channel is automatically closed when the task terminates.
 If you need a reference to the created task, pass a `Ref{Task}` object via
 the keyword argument `taskref`.
 
-If `spawn = true`, the Task created for `func` may be scheduled on another thread
+If `spawn=true`, the `Task` created for `func` may be scheduled on another thread
 in parallel, equivalent to creating a task via [`Threads.@spawn`](@ref).
 
+If `spawn=true` and the `threadpool` argument is not set, it defaults to `:default`.
+
+If the `threadpool` argument is set (to `:default` or `:interactive`), this implies
+that `spawn=true` and the new Task is spawned to the specified threadpool.
+
 Return a `Channel`.
 
 # Examples
@@ -117,6 +122,9 @@ true
     In earlier versions of Julia, Channel used keyword arguments to set `size` and `T`, but
     those constructors are deprecated.
 
+!!! compat "Julia 1.9"
+    The `threadpool=` argument was added in Julia 1.9.
+
 ```jldoctest
 julia> chnl = Channel{Char}(1, spawn=true) do ch
            for c in "hello world"
@@ -129,12 +137,18 @@ julia> String(collect(chnl))
 "hello world"
 ```
 """
-function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false) where T
+function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing) where T
     chnl = Channel{T}(size)
     task = Task(() -> func(chnl))
+    if threadpool === nothing
+        threadpool = :default
+    else
+        spawn = true
+    end
     task.sticky = !spawn
     bind(chnl, task)
     if spawn
+        Threads._spawn_set_thrpool(task, threadpool)
         schedule(task) # start it on (potentially) another thread
     else
         yield(task) # immediately start it, yielding the current thread
@@ -149,17 +163,17 @@ Channel(func::Function, args...; kwargs...) = Channel{Any}(func, args...; kwargs
 # of course not deprecated.)
 # We use `nothing` default values to check which arguments were set in order to throw the
 # deprecation warning if users try to use `spawn=` with `ctype=` or `csize=`.
-function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing)
+function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing, threadpool=nothing)
     # The spawn= keyword argument was added in Julia v1.3, and cannot be used with the
     # deprecated keyword arguments `ctype=` or `csize=`.
-    if (ctype !== nothing || csize !== nothing) && spawn !== nothing
-        throw(ArgumentError("Cannot set `spawn=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false)` instead!"))
+    if (ctype !== nothing || csize !== nothing) && (spawn !== nothing || threadpool !== nothing)
+        throw(ArgumentError("Cannot set `spawn=` or `threadpool=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false, threadpool=nothing)` instead!"))
     end
     # Set the actual default values for the arguments.
     ctype === nothing && (ctype = Any)
     csize === nothing && (csize = 0)
     spawn === nothing && (spawn = false)
-    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn)
+    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn, threadpool=threadpool)
 end
 
 closed_exception() = InvalidStateException("Channel is closed.", :closed)
@@ -197,7 +211,68 @@ function close(c::Channel, @nospecialize(excp::Exception))
     end
     nothing
 end
-isopen(c::Channel) = ((@atomic :monotonic c.state) === :open)
+
+"""
+    isopen(c::Channel)
+Determines whether a [`Channel`](@ref) is open for new [`put!`](@ref) operations.
+Notice that a `Channel`` can be closed and still have
+buffered elements which can be consumed with [`take!`](@ref).
+
+# Examples
+
+Buffered channel with task:
+```jldoctest
+julia> c = Channel(ch -> put!(ch, 1), 1);
+
+julia> isopen(c) # The channel is closed to new `put!`s
+false
+
+julia> isready(c) # The channel is closed but still contains elements
+true
+
+julia> take!(c)
+1
+
+julia> isready(c)
+false
+```
+
+Unbuffered channel:
+```jldoctest
+julia> c = Channel{Int}();
+
+julia> isopen(c)
+true
+
+julia> close(c)
+
+julia> isopen(c)
+false
+```
+"""
+function isopen(c::Channel)
+    # Use acquire here to pair with release store in `close`, so that subsequent `isready` calls
+    # are forced to see `isready == true` if they see `isopen == false`. This means users must
+    # call `isopen` before `isready` if you are using the race-y APIs (or call `iterate`, which
+    # does this right for you).
+    return ((@atomic :acquire c.state) === :open)
+end
+
+"""
+    empty!(c::Channel)
+
+Empty a Channel `c` by calling `empty!` on the internal buffer.
+Return the empty channel.
+"""
+function Base.empty!(c::Channel)
+    @lock c begin
+        ndrop = length(c.data)
+        empty!(c.data)
+        _increment_n_avail(c, -ndrop)
+        notify(c.cond_put)
+    end
+    return c
+end
 
 """
     bind(chnl::Channel, task::Task)
@@ -484,7 +559,7 @@ end
 Determines whether a [`Channel`](@ref) has a value stored in it.
 Returns immediately, does not block.
 
-For unbuffered channels returns `true` if there are tasks waiting on a [`put!`](@ref).
+For unbuffered channels, return `true` if there are tasks waiting on a [`put!`](@ref).
 
 # Examples
 
@@ -524,6 +599,47 @@ function n_avail(c::Channel)
     @atomic :monotonic c.n_avail_items
 end
 
+"""
+    isfull(c::Channel)
+
+Determines if a [`Channel`](@ref) is full, in the sense
+that calling `put!(c, some_value)` would have blocked.
+Returns immediately, does not block.
+
+Note that it may frequently be the case that `put!` will
+not block after this returns `true`. Users must take
+precautions not to accidentally create live-lock bugs
+in their code by calling this method, as these are
+generally harder to debug than deadlocks. It is also
+possible that `put!` will block after this call
+returns `false`, if there are multiple producer
+tasks calling `put!` in parallel.
+
+# Examples
+
+Buffered channel:
+```jldoctest
+julia> c = Channel(1); # capacity = 1
+
+julia> isfull(c)
+false
+
+julia> put!(c, 1);
+
+julia> isfull(c)
+true
+```
+
+Unbuffered channel:
+```jldoctest
+julia> c = Channel(); # capacity = 0
+
+julia> isfull(c) # unbuffered channel is always full
+true
+```
+"""
+isfull(c::Channel) = n_avail(c) ≥ c.sz_max
+
 lock(c::Channel) = lock(c.cond_take)
 lock(f, c::Channel) = lock(f, c.cond_take)
 unlock(c::Channel) = unlock(c.cond_take)
@@ -600,6 +716,15 @@ function iterate(c::Channel, state=nothing)
             end
         end
     else
+        # If the channel was closed with an exception, it needs to be thrown
+        if (@atomic :acquire c.state) === :closed
+            e = c.excp
+            if isa(e, InvalidStateException) && e.state === :closed
+                nothing
+            else
+                throw(e)
+            end
+        end
         return nothing
     end
 end
diff --git a/base/char.jl b/base/char.jl
index 08d661c41de56..2e8410f6903e2 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -62,7 +62,14 @@ to an output stream, or `ncodeunits(string(c))` but computed efficiently.
     This method requires at least Julia 1.1. In Julia 1.0 consider
     using `ncodeunits(string(c))`.
 """
-ncodeunits(c::Char) = write(devnull, c) # this is surprisingly efficient
+function ncodeunits(c::Char)
+    u = reinterpret(UInt32, c)
+    # We care about how many trailing bytes are all zero
+    # subtract that from the total number of bytes
+    n_nonzero_bytes = sizeof(UInt32) - div(trailing_zeros(u), 0x8)
+    # Take care of '\0', which has an all-zero bitpattern
+    n_nonzero_bytes + iszero(u)
+end
 
 """
     codepoint(c::AbstractChar) -> Integer
@@ -216,6 +223,7 @@ hash(x::Char, h::UInt) =
     hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
 
 first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
+first_utf8_byte(c::AbstractChar) = first_utf8_byte(Char(c)::Char)
 
 # fallbacks:
 isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
diff --git a/base/checked.jl b/base/checked.jl
index d5b4112397e84..b374d34830280 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -13,7 +13,7 @@ return both the unchecked results and a boolean value denoting the presence of a
 module Checked
 
 export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
-       checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
+       checked_div, checked_rem, checked_fld, checked_mod, checked_cld, checked_pow,
        checked_length, add_with_overflow, sub_with_overflow, mul_with_overflow
 
 import Core.Intrinsics:
@@ -358,6 +358,19 @@ The overflow protection may impose a perceptible performance penalty.
 """
 checked_cld(x::T, y::T) where {T<:Integer} = cld(x, y) # Base.cld already checks
 
+"""
+    Base.checked_pow(x, y)
+
+Calculates `^(x,y)`, checking for overflow errors where applicable.
+
+The overflow protection may impose a perceptible performance penalty.
+"""
+checked_pow(x::Integer, y::Integer) = checked_power_by_squaring(x, y)
+
+checked_power_by_squaring(x_, p::Integer) = Base.power_by_squaring(x_, p; mul = checked_mul)
+# For Booleans, the default implementation covers all cases.
+checked_power_by_squaring(x::Bool, p::Integer) = Base.power_by_squaring(x, p)
+
 """
     Base.checked_length(r)
 
diff --git a/base/client.jl b/base/client.jl
index 6e30c9991e45e..e95d518d3e501 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -4,6 +4,7 @@
 ##             and REPL
 
 have_color = nothing
+have_truecolor = nothing
 const default_color_warn = :yellow
 const default_color_error = :light_red
 const default_color_info = :cyan
@@ -40,7 +41,6 @@ function repl_cmd(cmd, out)
     if isempty(cmd.exec)
         throw(ArgumentError("no cmd to execute"))
     elseif cmd.exec[1] == "cd"
-        new_oldpwd = pwd()
         if length(cmd.exec) > 2
             throw(ArgumentError("cd method only takes one argument"))
         elseif length(cmd.exec) == 2
@@ -51,11 +51,17 @@ function repl_cmd(cmd, out)
                 end
                 dir = ENV["OLDPWD"]
             end
-            cd(dir)
         else
-            cd()
+            dir = homedir()
         end
-        ENV["OLDPWD"] = new_oldpwd
+        try
+            ENV["OLDPWD"] = pwd()
+        catch ex
+            ex isa IOError || rethrow()
+            # if current dir has been deleted, then pwd() will throw an IOError: pwd(): no such file or directory (ENOENT)
+            delete!(ENV, "OLDPWD")
+        end
+        cd(dir)
         println(out, pwd())
     else
         @static if !Sys.iswindows()
@@ -94,7 +100,7 @@ function scrub_repl_backtrace(bt)
     if bt !== nothing && !(bt isa Vector{Any}) # ignore our sentinel value types
         bt = bt isa Vector{StackFrame} ? copy(bt) : stacktrace(bt)
         # remove REPL-related frames from interactive printing
-        eval_ind = findlast(frame -> !frame.from_c && frame.func === :eval, bt)
+        eval_ind = findlast(frame -> !frame.from_c && startswith(String(frame.func), "__repl_entry"), bt)
         eval_ind === nothing || deleteat!(bt, eval_ind:length(bt))
     end
     return bt
@@ -103,8 +109,8 @@ scrub_repl_backtrace(stack::ExceptionStack) =
     ExceptionStack(Any[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
 
 istrivialerror(stack::ExceptionStack) =
-    length(stack) == 1 && length(stack[1].backtrace) ≤ 1
-    # frame 1 = top level; assumes already went through scrub_repl_backtrace
+    length(stack) == 1 && length(stack[1].backtrace) ≤ 1 && !isa(stack[1].exception, MethodError)
+    # frame 1 = top level; assumes already went through scrub_repl_backtrace; MethodError see #50803
 
 function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
@@ -226,11 +232,8 @@ end
 incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail)
 
 function exec_options(opts)
-    quiet                 = (opts.quiet != 0)
     startup               = (opts.startupfile != 2)
-    history_file          = (opts.historyfile != 0)
-    color_set             = (opts.color != 0) # --color!=auto
-    global have_color     = color_set ? (opts.color == 1) : nothing # --color=on
+    global have_color     = colored_text(opts)
     global is_interactive = (opts.isinteractive != 0)
 
     # pre-process command line argument list
@@ -238,21 +241,18 @@ function exec_options(opts)
     repl = !arg_is_program
     cmds = unsafe_load_commands(opts.commands)
     for (cmd, arg) in cmds
-        if cmd == 'e'
+        if cmd_suppresses_program(cmd)
             arg_is_program = false
             repl = false
-        elseif cmd == 'E'
-            arg_is_program = false
-            repl = false
-        elseif cmd == 'L'
+        elseif cmd == 'L' || cmd == 'm'
             # nothing
         elseif cmd == 'B' # --bug-report
             # If we're doing a bug report, don't load anything else. We will
             # spawn a child in which to execute these options.
             let InteractiveUtils = load_InteractiveUtils()
-                InteractiveUtils.report_bug(arg)
+                invokelatest(InteractiveUtils.report_bug, arg)
             end
-            return nothing
+            return false
         else
             @warn "Unexpected command -$cmd'$arg'"
         end
@@ -265,8 +265,8 @@ function exec_options(opts)
     distributed_mode = (opts.worker == 1) || (opts.nprocs > 0) || (opts.machine_file != C_NULL)
     if distributed_mode
         let Distributed = require(PkgId(UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            Core.eval(Main, :(const Distributed = $Distributed))
-            Core.eval(Main, :(using .Distributed))
+            Core.eval(MainInclude, :(const Distributed = $Distributed))
+            Core.eval(Main, :(using Base.MainInclude.Distributed))
         end
 
         invokelatest(Main.Distributed.process_opts, opts)
@@ -275,6 +275,10 @@ function exec_options(opts)
     interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
     is_interactive::Bool |= interactiveinput
 
+    # load terminfo in for styled printing
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    global current_terminfo = load_terminfo(term_env)
+
     # load ~/.julia/config/startup.jl file
     if startup
         try
@@ -292,6 +296,13 @@ function exec_options(opts)
         elseif cmd == 'E'
             invokelatest(show, Core.eval(Main, parse_input_line(arg)))
             println()
+        elseif cmd == 'm'
+            entrypoint = push!(split(arg, "."), "main")
+            Base.eval(Main, Expr(:import, Expr(:., Symbol.(entrypoint)...)))
+            if !should_use_main_entrypoint()
+                error("`main` in `$arg` not declared as entry point (use `@main` to do so)")
+            end
+            return false
         elseif cmd == 'L'
             # load file immediately on all processors
             if !distributed_mode
@@ -324,15 +335,8 @@ function exec_options(opts)
             end
         end
     end
-    if repl || is_interactive::Bool
-        if interactiveinput
-            banner = (opts.banner != 0) # --banner!=no
-        else
-            banner = (opts.banner == 1) # --banner=yes
-        end
-        run_main_repl(interactiveinput, quiet, banner, history_file, color_set)
-    end
-    nothing
+
+    return repl
 end
 
 function _global_julia_startup_file()
@@ -340,11 +344,13 @@ function _global_julia_startup_file()
     # If it is not found, then continue on to the relative path based on Sys.BINDIR
     BINDIR = Sys.BINDIR
     SYSCONFDIR = Base.SYSCONFDIR
+    p1 = nothing
     if !isempty(SYSCONFDIR)
         p1 = abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl")
         isfile(p1) && return p1
     end
     p2 = abspath(BINDIR, "..", "etc", "julia", "startup.jl")
+    p1 == p2 && return nothing # don't check the same path twice
     isfile(p2) && return p2
     return nothing
 end
@@ -391,86 +397,68 @@ _atreplinit(repl) = invokelatest(__atreplinit, repl)
 
 function load_InteractiveUtils(mod::Module=Main)
     # load interactive-only libraries
-    if !isdefined(mod, :InteractiveUtils)
+    if !isdefined(MainInclude, :InteractiveUtils)
         try
-            let InteractiveUtils = require(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
-                Core.eval(mod, :(const InteractiveUtils = $InteractiveUtils))
-                Core.eval(mod, :(using .InteractiveUtils))
-                return InteractiveUtils
+            # TODO: we have to use require_stdlib here because it is a dependency of REPL, but we would sort of prefer not to
+            let InteractiveUtils = require_stdlib(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
+                Core.eval(MainInclude, :(const InteractiveUtils = $InteractiveUtils))
             end
         catch ex
             @warn "Failed to import InteractiveUtils into module $mod" exception=(ex, catch_backtrace())
+            return nothing
         end
-        return nothing
     end
-    return getfield(mod, :InteractiveUtils)
+    Core.eval(mod, :(using Base.MainInclude.InteractiveUtils))
+    return MainInclude.InteractiveUtils
 end
 
-global active_repl
+function load_REPL()
+    # load interactive-only libraries
+    try
+        return Base.require_stdlib(PkgId(UUID(0x3fa0cd96_eef1_5676_8a61_b3b8758bbffb), "REPL"))
+    catch ex
+        @warn "Failed to import REPL" exception=(ex, catch_backtrace())
+    end
+    return nothing
+end
 
-# run the requested sort of evaluation loop on stdio
-function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
-    load_InteractiveUtils()
-
-    if interactive && isassigned(REPL_MODULE_REF)
-        invokelatest(REPL_MODULE_REF[]) do REPL
-            term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
-            term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
-            banner && Base.banner(term)
-            if term.term_type == "dumb"
-                repl = REPL.BasicREPL(term)
-                quiet || @warn "Terminal not fully functional"
+global active_repl::Any
+global active_repl_backend = nothing
+
+function run_fallback_repl(interactive::Bool)
+    let input = stdin
+        if isa(input, File) || isa(input, IOStream)
+            # for files, we can slurp in the whole thing at once
+            ex = parse_input_line(read(input, String))
+            if Meta.isexpr(ex, :toplevel)
+                # if we get back a list of statements, eval them sequentially
+                # as if we had parsed them sequentially
+                for stmt in ex.args
+                    eval_user_input(stderr, stmt, true)
+                end
+                body = ex.args
             else
-                repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
-                repl.history_file = history_file
+                eval_user_input(stderr, ex, true)
             end
-            global active_repl = repl
-            # Make sure any displays pushed in .julia/config/startup.jl ends up above the
-            # REPLDisplay
-            pushdisplay(REPL.REPLDisplay(repl))
-            _atreplinit(repl)
-            REPL.run_repl(repl, backend->(global active_repl_backend = backend))
-        end
-    else
-        # otherwise provide a simple fallback
-        if interactive && !quiet
-            @warn "REPL provider not available: using basic fallback"
-        end
-        banner && Base.banner()
-        let input = stdin
-            if isa(input, File) || isa(input, IOStream)
-                # for files, we can slurp in the whole thing at once
-                ex = parse_input_line(read(input, String))
-                if Meta.isexpr(ex, :toplevel)
-                    # if we get back a list of statements, eval them sequentially
-                    # as if we had parsed them sequentially
-                    for stmt in ex.args
-                        eval_user_input(stderr, stmt, true)
-                    end
-                    body = ex.args
-                else
-                    eval_user_input(stderr, ex, true)
+        else
+            while !eof(input)
+                if interactive
+                    print("julia> ")
+                    flush(stdout)
                 end
-            else
-                while isopen(input) || !eof(input)
-                    if interactive
-                        print("julia> ")
-                        flush(stdout)
-                    end
-                    try
-                        line = ""
-                        ex = nothing
-                        while !eof(input)
-                            line *= readline(input, keep=true)
-                            ex = parse_input_line(line)
-                            if !(isa(ex, Expr) && ex.head === :incomplete)
-                                break
-                            end
+                try
+                    line = ""
+                    ex = nothing
+                    while !eof(input)
+                        line *= readline(input, keep=true)
+                        ex = parse_input_line(line)
+                        if !(isa(ex, Expr) && ex.head === :incomplete)
+                            break
                         end
-                        eval_user_input(stderr, ex, true)
-                    catch err
-                        isa(err, InterruptException) ? print("\n\n") : rethrow()
                     end
+                    eval_user_input(stderr, ex, true)
+                catch err
+                    isa(err, InterruptException) ? print("\n\n") : rethrow()
                 end
             end
         end
@@ -478,17 +466,61 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_fil
     nothing
 end
 
-# MainInclude exists to hide Main.include and eval from `names(Main)`.
+function run_std_repl(REPL::Module, quiet::Bool, banner::Symbol, history_file::Bool)
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
+    banner == :no || REPL.banner(term, short=banner==:short)
+    if term.term_type == "dumb"
+        repl = REPL.BasicREPL(term)
+        quiet || @warn "Terminal not fully functional"
+    else
+        repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
+        repl.history_file = history_file
+    end
+    # Make sure any displays pushed in .julia/config/startup.jl ends up above the
+    # REPLDisplay
+    d = REPL.REPLDisplay(repl)
+    last_active_repl = @isdefined(active_repl) ? active_repl : nothing
+    last_active_repl_backend = active_repl_backend
+    global active_repl = repl
+    pushdisplay(d)
+    try
+        global active_repl = repl
+        _atreplinit(repl)
+        REPL.run_repl(repl, backend->(global active_repl_backend = backend))
+    finally
+        popdisplay(d)
+        active_repl = last_active_repl
+        active_repl_backend = last_active_repl_backend
+    end
+    nothing
+end
+
+# run the requested sort of evaluation loop on stdio
+function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool)
+    fallback_repl = parse(Bool, get(ENV, "JULIA_FALLBACK_REPL", "false"))
+    if !fallback_repl && interactive
+        load_InteractiveUtils()
+        REPL = REPL_MODULE_REF[]
+        if REPL === Base
+            load_REPL()
+        end
+    end
+    REPL = REPL_MODULE_REF[]
+    if !fallback_repl && interactive && REPL !== Base
+        invokelatest(run_std_repl, REPL, quiet, banner, history_file)
+    else
+        if !fallback_repl && interactive && !quiet
+            @warn "REPL provider not available: using basic fallback" LOAD_PATH=join(Base.LOAD_PATH, Sys.iswindows() ? ';' : ':')
+        end
+        run_fallback_repl(interactive)
+    end
+    nothing
+end
+
+# MainInclude exists to weakly add certain identifiers to Main
 baremodule MainInclude
 using ..Base
-# These definitions calls Base._include rather than Base.include to get
-# one-frame stacktraces for the common case of using include(fname) in Main.
-include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname)
-function include(fname::AbstractString)
-    isa(fname, String) || (fname = Base.convert(String, fname)::String)
-    Base._include(identity, Main, fname)
-end
-eval(x) = Core.eval(Main, x)
 
 """
     ans
@@ -507,54 +539,124 @@ global err = nothing
 
 # weakly exposes ans and err variables to Main
 export ans, err
-
 end
 
-"""
-    eval(expr)
-
-Evaluate an expression in the global scope of the containing module.
-Every `Module` (except those defined with `baremodule`) has its own 1-argument
-definition of `eval`, which evaluates expressions in that module.
-"""
-MainInclude.eval
-
-"""
-    include([mapexpr::Function,] path::AbstractString)
-
-Evaluate the contents of the input source file in the global scope of the containing module.
-Every module (except those defined with `baremodule`) has its own
-definition of `include`, which evaluates the file in that module.
-Returns the result of the last evaluated expression of the input file. During including,
-a task-local include path is set to the directory containing the file. Nested calls to
-`include` will search relative to that path. This function is typically used to load source
-interactively, or to combine files in packages that are broken into multiple source files.
-The argument `path` is normalized using [`normpath`](@ref) which will resolve
-relative path tokens such as `..` and convert `/` to the appropriate path separator.
-
-The optional first argument `mapexpr` can be used to transform the included code before
-it is evaluated: for each parsed expression `expr` in `path`, the `include` function
-actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
-
-Use [`Base.include`](@ref) to evaluate a file into another module.
-
-!!! compat "Julia 1.5"
-    Julia 1.5 is required for passing the `mapexpr` argument.
-"""
-MainInclude.include
+function should_use_main_entrypoint()
+    isdefined(Main, :main) || return false
+    M_binding_owner = Base.binding_module(Main, :main)
+    (isdefined(M_binding_owner, Symbol("#__main_is_entrypoint__#")) && M_binding_owner.var"#__main_is_entrypoint__#") || return false
+    return true
+end
 
 function _start()
     empty!(ARGS)
     append!(ARGS, Core.ARGS)
     # clear any postoutput hooks that were saved in the sysimage
     empty!(Base.postoutput_hooks)
+    local ret = 0
     try
-        exec_options(JLOptions())
+        repl_was_requested = exec_options(JLOptions())
+        if should_use_main_entrypoint() && !is_interactive
+            if Base.generating_output()
+                precompile(Main.main, (typeof(ARGS),))
+            else
+                ret = invokelatest(Main.main, ARGS)
+            end
+        elseif (repl_was_requested || is_interactive)
+            # Run the Base `main`, which will either load the REPL stdlib
+            # or run the fallback REPL
+            ret = repl_main(ARGS)
+        end
+        ret === nothing && (ret = 0)
+        ret = Cint(ret)
     catch
+        ret = Cint(1)
         invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
-        exit(1)
     end
     if is_interactive && get(stdout, :color, false)
         print(color_normal)
     end
+    return ret
+end
+
+function repl_main(_)
+    opts = Base.JLOptions()
+    interactiveinput = isa(stdin, Base.TTY)
+    b = opts.banner
+    auto = b == -1
+    banner = b == 0 || (auto && !interactiveinput) ? :no  :
+             b == 1 || (auto && interactiveinput)  ? :yes :
+             :short # b == 2
+
+    quiet                 = (opts.quiet != 0)
+    history_file          = (opts.historyfile != 0)
+    return run_main_repl(interactiveinput, quiet, banner, history_file)
+end
+
+"""
+    @main
+
+This macro is used to mark that the binding `main` in the current module is considered an
+entrypoint. The precise semantics of the entrypoint depend on the CLI driver.
+
+In the `julia` driver, if `Main.main` is marked as an entrypoint, it will be automatically called upon
+the completion of script execution.
+
+The `@main` macro may be used standalone or as part of the function definition, though in the latter
+case, parentheses are required. In particular, the following are equivalent:
+
+```
+function (@main)(args)
+    println("Hello World")
+end
+```
+
+```
+function main(args)
+end
+@main
+```
+
+## Detailed semantics
+
+The entrypoint semantics attach to the owner of the binding owner. In particular, if a marked entrypoint is
+imported into `Main`, it will be treated as an entrypoint in `Main`:
+
+```
+module MyApp
+    export main
+    (@main)(args) = println("Hello World")
+end
+using .MyApp
+# `julia` Will execute MyApp.main at the conclusion of script execution
+```
+
+Note that in particular, the semantics do not attach to the method
+or the name:
+```
+module MyApp
+    (@main)(args) = println("Hello World")
+end
+const main = MyApp.main
+# `julia` Will *NOT* execute MyApp.main unless there is a separate `@main` annotation in `Main`
+```
+
+!!! compat "Julia 1.11"
+    This macro is new in Julia 1.11. At present, the precise semantics of `@main` are still subject to change.
+"""
+macro main(args...)
+    if !isempty(args)
+        error("`@main` is expected to be used as `(@main)` without macro arguments.")
+    end
+    if isdefined(__module__, :main)
+        if Base.binding_module(__module__, :main) !== __module__
+            error("Symbol `main` is already a resolved import in module $(__module__). `@main` must be used in the defining module.")
+        end
+    end
+    Core.eval(__module__, quote
+        # Force the binding to resolve to this module
+        global main
+        global var"#__main_is_entrypoint__#"::Bool = true
+    end)
+    esc(:main)
 end
diff --git a/base/cmd.jl b/base/cmd.jl
index 475a62a82d4d7..b46c8293cdf3c 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -3,9 +3,10 @@
 abstract type AbstractCmd end
 
 # libuv process option flags
-const UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS = UInt8(1 << 2)
-const UV_PROCESS_DETACHED = UInt8(1 << 3)
-const UV_PROCESS_WINDOWS_HIDE = UInt8(1 << 4)
+const UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS = UInt32(1 << 2)
+const UV_PROCESS_DETACHED = UInt32(1 << 3)
+const UV_PROCESS_WINDOWS_HIDE = UInt32(1 << 4)
+const UV_PROCESS_WINDOWS_DISABLE_EXACT_NAME = UInt32(1 << 7)
 
 struct Cmd <: AbstractCmd
     exec::Vector{String}
@@ -14,7 +15,7 @@ struct Cmd <: AbstractCmd
     env::Union{Vector{String},Nothing}
     dir::String
     cpus::Union{Nothing,Vector{UInt16}}
-    Cmd(exec::Vector{String}) =
+    Cmd(exec::Vector{<:AbstractString}) =
         new(exec, false, 0x00, nothing, "", nothing)
     Cmd(cmd::Cmd, ignorestatus, flags, env, dir, cpus = nothing) =
         new(cmd.exec, ignorestatus, flags, env,
@@ -481,10 +482,16 @@ function cmd_gen(parsed)
     end
 end
 
+@assume_effects :foldable !:consistent function cmd_gen(
+    parsed::Tuple{Vararg{Tuple{Vararg{Union{String, SubString{String}}}}}}
+)
+    return @invoke cmd_gen(parsed::Any)
+end
+
 """
     @cmd str
 
-Similar to `cmd`, generate a `Cmd` from the `str` string which represents the shell command(s) to be executed.
+Similar to ``` `str` ```, generate a `Cmd` from the `str` string which represents the shell command(s) to be executed.
 The [`Cmd`](@ref) object can be run as a process and can outlive the spawning julia process (see `Cmd` for more).
 
 # Examples
diff --git a/base/cmem.jl b/base/cmem.jl
index 8b0b99b3a6ebd..dd4cbc30585f2 100644
--- a/base/cmem.jl
+++ b/base/cmem.jl
@@ -10,6 +10,7 @@ Call `memcpy` from the C standard library.
 
 """
 function memcpy(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
 end
 
@@ -23,6 +24,7 @@ Call `memmove` from the C standard library.
 
 """
 function memmove(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
 end
 
@@ -36,6 +38,7 @@ Call `memset` from the C standard library.
 
 """
 function memset(p::Ptr, val, n::Integer)
+    @_terminates_globally_meta
     ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p, val, n)
 end
 
@@ -49,5 +52,6 @@ Call `memcmp` from the C standard library.
 
 """
 function memcmp(a::Ptr, b::Ptr, n::Integer)
+    @_terminates_globally_meta
     ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, n % Csize_t) % Int
 end
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index d09a5b6c0ce83..3672a19e19998 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -2,23 +2,30 @@
 
 # Factorials
 
-const _fact_table64 = Vector{Int64}(undef, 20)
-_fact_table64[1] = 1
-for n in 2:20
-    _fact_table64[n] = _fact_table64[n-1] * n
+const _fact_table64 = let _fact_table64 = Vector{Int64}(undef, 20)
+    _fact_table64[1] = 1
+    for n in 2:20
+        _fact_table64[n] = _fact_table64[n-1] * n
+    end
+    Tuple(_fact_table64)
 end
 
-const _fact_table128 = Vector{UInt128}(undef, 34)
-_fact_table128[1] = 1
-for n in 2:34
-    _fact_table128[n] = _fact_table128[n-1] * n
+const _fact_table128 = let _fact_table128 = Vector{UInt128}(undef, 34)
+    _fact_table128[1] = 1
+    for n in 2:34
+        _fact_table128[n] = _fact_table128[n-1] * n
+    end
+    Tuple(_fact_table128)
 end
 
-function factorial_lookup(n::Integer, table, lim)
-    n < 0 && throw(DomainError(n, "`n` must not be negative."))
-    n > lim && throw(OverflowError(string(n, " is too large to look up in the table; consider using `factorial(big(", n, "))` instead")))
-    n == 0 && return one(n)
-    @inbounds f = table[n]
+function factorial_lookup(
+    n::Union{Checked.SignedInt,Checked.UnsignedInt},
+    table::Union{NTuple{20,Int64},NTuple{34,UInt128}}, lim::Int)
+    idx = Int(n)
+    idx < 0 && throw(DomainError(n, "`n` must not be negative."))
+    idx > lim && throw(OverflowError(lazy"$n is too large to look up in the table; consider using `factorial(big($n))` instead"))
+    idx == 0 && return one(n)
+    f = getfield(table, idx)
     return oftype(n, f)
 end
 
@@ -136,27 +143,43 @@ function permutecols!!(a::AbstractMatrix, p::AbstractVector{<:Integer})
     a
 end
 
-function permute!!(a, p::AbstractVector{<:Integer})
+# Row and column permutations for AbstractMatrix
+permutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swapcols!)
+permuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swaprows!)
+@inline function _permute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
     require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        ptr = start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            a[ptr] = a[next]
-            p[ptr] = 0
-            ptr = next
-            next = p[next]
-            count += 1
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = i
+        in = p[j] = -p[j]
+        while p[in] < 0
+            swapfun!(a, in, j)
+            j = in
+            in = p[in] = -p[in]
         end
-        a[ptr] = temp
-        p[ptr] = 0
     end
     a
 end
+invpermutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swapcols!)
+invpermuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swaprows!)
+@inline function _invpermute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
+    require_one_based_indexing(a, p)
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = p[i] = -p[i]
+        while j != i
+           swapfun!(a, j, i)
+           j = p[j] = -p[j]
+        end
+     end
+    a
+end
 
 """
     permute!(v, p)
@@ -169,6 +192,8 @@ it is even faster to write into a pre-allocated output array with `u .= @view v[
 (Even though `permute!` overwrites `v` in-place, it internally requires some allocation
 to keep track of which elements have been moved.)
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`invpermute!`](@ref).
 
 # Examples
@@ -189,30 +214,6 @@ julia> A
 """
 permute!(v, p::AbstractVector) = (v .= v[p])
 
-function invpermute!!(a, p::AbstractVector{<:Integer})
-    require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            temp_next = a[next]
-            a[next] = temp
-            temp = temp_next
-            ptr = p[next]
-            p[next] = 0
-            next = ptr
-            count += 1
-        end
-        a[next] = temp
-        p[next] = 0
-    end
-    a
-end
-
 """
     invpermute!(v, p)
 
@@ -222,6 +223,8 @@ Note that if you have a pre-allocated output array (e.g. `u = similar(v)`),
 it is quicker to instead employ `u[p] = v`.  (`invpermute!` internally
 allocates a copy of the data.)
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1, 1, 3, 4];
@@ -283,7 +286,7 @@ julia> B[invperm(v)]
 """
 function invperm(a::AbstractVector)
     require_one_based_indexing(a)
-    b = zero(a) # similar vector of zeros
+    b = fill!(similar(a), zero(eltype(a))) # mutable vector of zeros
     n = length(a)
     @inbounds for (i, j) in enumerate(a)
         ((1 <= j <= n) && b[j] == 0) ||
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
deleted file mode 100644
index 0cf3e6c00a1b7..0000000000000
--- a/base/compiler/abstractinterpretation.jl
+++ /dev/null
@@ -1,3162 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# See if the inference result of the current statement's result value might affect
-# the final answer for the method (aside from optimization potential and exceptions).
-# To do that, we need to check both for slot assignment and SSA usage.
-call_result_unused(sv::InferenceState, currpc::Int) =
-    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
-call_result_unused(si::StmtInfo) = !si.used
-
-function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
-                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
-                                  sv::AbsIntState, max_methods::Int)
-    ⊑ₚ = ⊑(ipo_lattice(interp))
-    if !should_infer_this_call(interp, sv)
-        add_remark!(interp, sv, "Skipped call in throw block")
-        # At this point we are guaranteed to end up throwing on this path,
-        # which is all that's required for :consistent-cy. Of course, we don't
-        # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed=!isoverlayed(method_table(interp)))
-        return CallMeta(Any, effects, NoCallInfo())
-    end
-
-    argtypes = arginfo.argtypes
-    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
-        InferenceParams(interp).max_union_splitting, max_methods)
-    if isa(matches, FailedMethodMatch)
-        add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Effects(), NoCallInfo())
-    end
-
-    (; valid_worlds, applicable, info) = matches
-    update_valid_age!(sv, valid_worlds)
-    napplicable = length(applicable)
-    rettype = Bottom
-    edges = MethodInstance[]
-    conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
-    seen = 0               # number of signatures actually inferred
-    any_const_result = false
-    const_results = Union{Nothing,ConstResult}[]
-    multiple_matches = napplicable > 1
-    fargs = arginfo.fargs
-    all_effects = EFFECTS_TOTAL
-    if !matches.nonoverlayed
-        # currently we don't have a good way to execute the overlayed method definition,
-        # so we should give up concrete eval when any of the matched methods is overlayed
-        f = nothing
-        all_effects = Effects(all_effects; nonoverlayed=false)
-    end
-
-    𝕃ₚ = ipo_lattice(interp)
-    for i in 1:napplicable
-        match = applicable[i]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            # only infer concrete call sites in top-level expressions
-            add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            break
-        end
-        this_rt = Bottom
-        splitunions = false
-        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
-        # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
-        if splitunions
-            splitsigs = switchtupleunion(sig)
-            for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
-                (; rt, edge, effects) = result
-                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-                this_arginfo = ArgInfo(fargs, this_argtypes)
-                const_call_result = abstract_call_method_with_const_args(interp,
-                    result, f, this_arginfo, si, match, sv)
-                const_result = nothing
-                if const_call_result !== nothing
-                    if const_call_result.rt ⊑ₚ rt
-                        rt = const_call_result.rt
-                        (; effects, const_result, edge) = const_call_result
-                    else
-                        add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
-                    end
-                end
-                all_effects = merge_effects(all_effects, effects)
-                push!(const_results, const_result)
-                any_const_result |= const_result !== nothing
-                edge === nothing || push!(edges, edge)
-                this_rt = tmerge(this_rt, rt)
-                if bail_out_call(interp, this_rt, sv)
-                    break
-                end
-            end
-            this_conditional = ignorelimited(this_rt)
-            this_rt = widenwrappedconditional(this_rt)
-        else
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
-            (; rt, edge, effects) = result
-            this_conditional = ignorelimited(rt)
-            this_rt = widenwrappedconditional(rt)
-            # try constant propagation with argtypes for this match
-            # this is in preparation for inlining, or improving the return result
-            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp,
-                result, f, this_arginfo, si, match, sv)
-            const_result = nothing
-            if const_call_result !== nothing
-                this_const_conditional = ignorelimited(const_call_result.rt)
-                this_const_rt = widenwrappedconditional(const_call_result.rt)
-                # return type of const-prop' inference can be wider than that of non const-prop' inference
-                # e.g. in cases when there are cycles but cached result is still accurate
-                if this_const_rt ⊑ₚ this_rt
-                    this_conditional = this_const_conditional
-                    this_rt = this_const_rt
-                    (; effects, const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
-                end
-            end
-            all_effects = merge_effects(all_effects, effects)
-            push!(const_results, const_result)
-            any_const_result |= const_result !== nothing
-            edge === nothing || push!(edges, edge)
-        end
-        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
-        seen += 1
-        rettype = tmerge(𝕃ₚ, rettype, this_rt)
-        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
-            if conditionals === nothing
-                conditionals = Any[Bottom for _ in 1:length(argtypes)],
-                               Any[Bottom for _ in 1:length(argtypes)]
-            end
-            for i = 1:length(argtypes)
-                cnd = conditional_argtype(this_conditional, sig, argtypes, i)
-                conditionals[1][i] = tmerge(conditionals[1][i], cnd.thentype)
-                conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
-            end
-        end
-        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-
-    if any_const_result && seen == napplicable
-        @assert napplicable == nmatches(info) == length(const_results)
-        info = ConstCallInfo(info, const_results)
-    end
-
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        rettype = Any
-        all_effects = Effects()
-    elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-            (!all(matches.fullmatches) || any_ambig(matches))
-        # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        all_effects = Effects(all_effects; nothrow=false)
-    end
-
-    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
-
-    # Also considering inferring the compilation signature for this method, so
-    # it is available to the compiler in case it ends up needing it.
-    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
-        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
-        !is_removable_if_unused(all_effects))
-        match = applicable[1]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-            csig = get_compileable_sig(method, sig, match.sparams)
-            if csig !== nothing && csig !== sig
-                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
-            end
-        end
-    end
-
-    if call_result_unused(si) && !(rettype === Bottom)
-        add_remark!(interp, sv, "Call result type was widened because the return value is unused")
-        # We're mainly only here because the optimizer might want this code,
-        # but we ourselves locally don't typically care about it locally
-        # (beyond checking if it always throws).
-        # So avoid adding an edge, since we don't want to bother attempting
-        # to improve our result even if it does change (to always throw),
-        # and avoid keeping track of a more complex result type.
-        rettype = Any
-    end
-    add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
-    if isa(sv, InferenceState)
-        # TODO (#48913) implement a proper recursion handling for irinterp:
-        # This works just because currently the `:terminate` condition guarantees that
-        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-        # We should revisit this once we have a better story for handling cycles in irinterp.
-        if !isempty(sv.pclimitations) # remove self, if present
-            delete!(sv.pclimitations, sv)
-            for caller in callers_in_cycle(sv)
-                delete!(sv.pclimitations, caller)
-            end
-        end
-    end
-    return CallMeta(rettype, all_effects, info)
-end
-
-struct FailedMethodMatch
-    reason::String
-end
-
-struct MethodMatches
-    applicable::Vector{Any}
-    info::MethodMatchInfo
-    valid_worlds::WorldRange
-    mt::MethodTable
-    fullmatch::Bool
-    nonoverlayed::Bool
-end
-any_ambig(info::MethodMatchInfo) = info.results.ambig
-any_ambig(m::MethodMatches) = any_ambig(m.info)
-
-struct UnionSplitMethodMatches
-    applicable::Vector{Any}
-    applicable_argtypes::Vector{Vector{Any}}
-    info::UnionSplitInfo
-    valid_worlds::WorldRange
-    mts::Vector{MethodTable}
-    fullmatches::Vector{Bool}
-    nonoverlayed::Bool
-end
-any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
-
-function find_matching_methods(𝕃::AbstractLattice,
-                               argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
-                               max_union_splitting::Int, max_methods::Int)
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    if 1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
-        split_argtypes = switchtupleunion(𝕃, argtypes)
-        infos = MethodMatchInfo[]
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        valid_worlds = WorldRange()
-        mts = MethodTable[]
-        fullmatches = Bool[]
-        nonoverlayed = true
-        for i in 1:length(split_argtypes)
-            arg_n = split_argtypes[i]::Vector{Any}
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
-            mt = mt::MethodTable
-            result = findall(sig_n, method_table; limit = max_methods)
-            if result === nothing
-                return FailedMethodMatch("For one of the union split cases, too many methods matched")
-            end
-            (; matches, overlayed) = result
-            nonoverlayed &= !overlayed
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatches[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatches, thisfullmatch)
-            end
-        end
-        return UnionSplitMethodMatches(applicable,
-                                       applicable_argtypes,
-                                       UnionSplitInfo(infos),
-                                       valid_worlds,
-                                       mts,
-                                       fullmatches,
-                                       nonoverlayed)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            return FailedMethodMatch("Could not identify method table for call")
-        end
-        mt = mt::MethodTable
-        result = findall(atype, method_table; limit = max_methods)
-        if result === nothing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            return FailedMethodMatch("Too many methods matched")
-        end
-        (; matches, overlayed) = result
-        fullmatch = any(match::MethodMatch->match.fully_covers, matches)
-        return MethodMatches(matches.matches,
-                             MethodMatchInfo(matches),
-                             matches.valid_worlds,
-                             mt,
-                             fullmatch,
-                             !overlayed)
-    end
-end
-
-"""
-    from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState,
-                          arginfo::ArgInfo, maybecondinfo) -> newrt
-
-Converts inter-procedural return type `rt` into a local lattice element `newrt`,
-that is appropriate in the context of current local analysis frame `sv`, especially:
-- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
-- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
-  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
-  that holds a type constraint information about a variable in `sv`
-
-This function _should_ be used wherever we propagate results returned from
-`abstract_call_method` or `abstract_call_method_with_const_args`.
-
-When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
-In such cases `maybecondinfo` should be either of:
-- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
-- method call signature tuple type
-When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
-`tmerge`ing argument signature type of each method call.
-"""
-function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState,
-                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
-    rt = collect_limitations!(rt, sv)
-    if isa(rt, InterMustAlias)
-        rt = from_intermustalias(rt, arginfo)
-    elseif is_lattice_bool(ipo_lattice(interp), rt)
-        if maybecondinfo === nothing
-            rt = widenconditional(rt)
-        else
-            rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo)
-        end
-    end
-    @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context"
-    return rt
-end
-
-function collect_limitations!(@nospecialize(typ), sv::InferenceState)
-    if isa(typ, LimitedAccuracy)
-        union!(sv.pclimitations, typ.causes)
-        return typ.typ
-    end
-    return typ
-end
-
-function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo)
-    fargs = arginfo.fargs
-    if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
-        arg = fargs[rt.slot]
-        if isa(arg, SlotNumber)
-            argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
-            if rt.vartyp ⊑ argtyp
-                return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
-            else
-                # TODO optimize this case?
-            end
-        end
-    end
-    return widenmustalias(rt)
-end
-
-function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState,
-                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
-    has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
-    (; fargs, argtypes) = arginfo
-    fargs === nothing && return widenconditional(rt)
-    slot = 0
-    alias = nothing
-    thentype = elsetype = Any
-    condval = maybe_extract_const_bool(rt)
-    for i in 1:length(fargs)
-        # find the first argument which supports refinement,
-        # and intersect all equivalent arguments with it
-        argtyp = argtypes[i]
-        if alias === nothing
-            arg = ssa_def_slot(fargs[i], sv)
-            if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type
-                old = argtyp
-                id = slot_id(arg)
-            elseif argtyp isa MustAlias
-                old = argtyp.fldtyp
-                id = argtyp.slot
-            else
-                continue # unlikely to refine
-            end
-        elseif argtyp isa MustAlias && issubalias(argtyp, alias)
-            arg = nothing
-            old = alias.fldtyp
-            id = alias.slot
-        else
-            continue
-        end
-        if slot == 0 || id == slot
-            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
-                # if we have already computed argument refinement information, apply that now to get the result
-                new_thentype = maybecondinfo[1][i]
-                new_elsetype = maybecondinfo[2][i]
-            else
-                # otherwise compute it on the fly
-                cnd = conditional_argtype(rt, maybecondinfo, argtypes, i)
-                new_thentype = cnd.thentype
-                new_elsetype = cnd.elsetype
-            end
-            if condval === false
-                thentype = Bottom
-            elseif ⊑(𝕃ᵢ, new_thentype, thentype)
-                thentype = new_thentype
-            else
-                thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype))
-            end
-            if condval === true
-                elsetype = Bottom
-            elseif ⊑(𝕃ᵢ, new_elsetype, elsetype)
-                elsetype = new_elsetype
-            else
-                elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype))
-            end
-            if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old)
-                slot = id
-                if !(arg isa SlotNumber) && argtyp isa MustAlias
-                    alias = argtyp
-                end
-            elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old)
-                slot = id
-                if !(arg isa SlotNumber) && argtyp isa MustAlias
-                    alias = argtyp
-                end
-            else # reset: no new useful information for this slot
-                slot = 0
-                alias = nothing
-                thentype = elsetype = Any
-            end
-        end
-    end
-    if thentype === Bottom && elsetype === Bottom
-        return Bottom # accidentally proved this call to be dead / throw !
-    elseif slot > 0
-        if alias !== nothing
-            return form_mustalias_conditional(alias, thentype, elsetype)
-        end
-        return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot
-    end
-    return widenconditional(rt)
-end
-
-function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int)
-    if isa(rt, InterConditional) && rt.slot == i
-        return rt
-    else
-        thentype = elsetype = tmeet(widenslotwrapper(argtypes[i]), fieldtype(sig, i))
-        condval = maybe_extract_const_bool(rt)
-        condval === true && (elsetype = Bottom)
-        condval === false && (thentype = Bottom)
-        return InterConditional(i, thentype, elsetype)
-    end
-end
-
-function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
-    edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
-    sv::AbsIntState)
-    # don't bother to add backedges when both type and effects information are already
-    # maximized to the top since a new method couldn't refine or widen them anyway
-    if rettype === Any
-        # ignore the `:nonoverlayed` property if `interp` doesn't use overlayed method table
-        # since it will never be tainted anyway
-        if !isoverlayed(method_table(interp))
-            all_effects = Effects(all_effects; nonoverlayed=false)
-        end
-        if (# ignore the `:noinbounds` property if `:consistent`-cy is tainted already
-            (sv isa InferenceState && sv.ipo_effects.consistent === ALWAYS_FALSE) ||
-            all_effects.consistent === ALWAYS_FALSE ||
-            # or this `:noinbounds` doesn't taint it
-            !stmt_taints_inbounds_consistency(sv))
-            all_effects = Effects(all_effects; noinbounds=false)
-        end
-        all_effects === Effects() && return nothing
-    end
-    for edge in edges
-        add_backedge!(sv, edge)
-    end
-    # also need an edge to the method table in case something gets
-    # added that did not intersect with any existing method
-    if isa(matches, MethodMatches)
-        matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-    else
-        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-            thisfullmatch || add_mt_backedge!(sv, mt, atype)
-        end
-    end
-    return nothing
-end
-
-const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
-const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
-const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
-
-function abstract_call_method(interp::AbstractInterpreter,
-                              method::Method, @nospecialize(sig), sparams::SimpleVector,
-                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
-    if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
-        add_remark!(interp, sv, "Refusing to infer into `depwarn`")
-        return MethodCallResult(Any, false, false, nothing, Effects())
-    end
-    sigtuple = unwrap_unionall(sig)
-    sigtuple isa DataType || return MethodCallResult(Any, false, false, nothing, Effects())
-
-    if is_nospecializeinfer(method)
-        sig = get_nospecializeinfer_sig(method, sig, sparams)
-    end
-
-    # Limit argument type tuple growth of functions:
-    # look through the parents list to see if there's a call to the same method
-    # and from the same method.
-    # Returns the topmost occurrence of that repeated edge.
-    edgecycle = edgelimited = false
-    topmost = nothing
-
-    for sv′ in AbsIntStackUnwind(sv)
-        infmi = frame_instance(sv′)
-        if method === infmi.def
-            if infmi.specTypes::Type == sig::Type
-                # avoid widening when detecting self-recursion
-                # TODO: merge call cycle and return right away
-                if call_result_unused(si)
-                    add_remark!(interp, sv, RECURSION_UNUSED_MSG)
-                    # since we don't use the result (typically),
-                    # we have a self-cycle in the call-graph, but not in the inference graph (typically):
-                    # break this edge now (before we record it) by returning early
-                    # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, true, true, nothing, Effects())
-                end
-                topmost = nothing
-                edgecycle = true
-                break
-            end
-            topmost === nothing || continue
-            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
-                topmost = sv′
-                edgecycle = true
-            end
-        end
-    end
-    washardlimit = hardlimit
-
-    if topmost !== nothing
-        msig = unwrap_unionall(method.sig)::DataType
-        spec_len = length(msig.parameters) + 1
-        ls = length(sigtuple.parameters)
-        mi = frame_instance(sv)
-
-        if method === mi.def
-            # Under direct self-recursion, permit much greater use of reducers.
-            # here we assume that complexity(specTypes) :>= complexity(sig)
-            comparison = mi.specTypes
-            l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
-            spec_len = max(spec_len, l_comparison)
-        else
-            comparison = method.sig
-        end
-
-        if isdefined(method, :recursion_relation)
-            # We don't require the recursion_relation to be transitive, so
-            # apply a hard limit
-            hardlimit = true
-        end
-
-        # see if the type is actually too big (relative to the caller), and limit it if required
-        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
-
-        if newsig !== sig
-            # continue inference, but note that we've limited parameter complexity
-            # on this call (to ensure convergence), so that we don't cache this result
-            if call_result_unused(si)
-                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
-                # if we don't (typically) actually care about this result,
-                # don't bother trying to examine some complex abstract signature
-                # since it's very unlikely that we'll try to inline this,
-                # or want make an invoke edge to its calling convention return type.
-                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, true, true, nothing, Effects())
-            end
-            add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
-            # TODO (#48913) implement a proper recursion handling for irinterp:
-            # This works just because currently the `:terminate` condition guarantees that
-            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-            # We should revisit this once we have a better story for handling cycles in irinterp.
-            if isa(topmost, InferenceState)
-                parentframe = frame_parent(topmost)
-                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
-                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
-                end
-            end
-            sig = newsig
-            sparams = svec()
-            edgelimited = true
-        end
-    end
-
-    # if sig changed, may need to recompute the sparams environment
-    if isa(method.sig, UnionAll) && isempty(sparams)
-        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
-        #@assert recomputed[1] !== Bottom
-        # We must not use `sig` here, since that may re-introduce structural complexity that
-        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
-        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
-        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
-        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
-        # (which moves `S` back up to a lower comparison depth)
-        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
-        # and this would only cause more questions and more problems
-        # (the following is only an example, most of the statements are probable in the wrong order):
-        #     newsig = sig
-        #     seen = IdSet()
-        #     while !(newsig in seen)
-        #         push!(seen, newsig)
-        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
-        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, lsig)
-        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
-        #         newsig = recomputed[2]
-        #     end
-        #     sig = ?
-        sparams = recomputed[2]::SimpleVector
-    end
-
-    (; rt, edge, effects) = typeinf_edge(interp, method, sig, sparams, sv)
-
-    if edge === nothing
-        edgecycle = edgelimited = true
-    end
-
-    # we look for the termination effect override here as well, since the :terminates effect
-    # may have been tainted due to recursion at this point even if it's overridden
-    if is_effect_overridden(sv, :terminates_globally)
-        # this frame is known to terminate
-        effects = Effects(effects, terminates=true)
-    elseif is_effect_overridden(method, :terminates_globally)
-        # this edge is known to terminate
-        effects = Effects(effects; terminates=true)
-    elseif edgecycle
-        # Some sort of recursion was detected.
-        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
-            # no `MethodInstance` cycles -- don't taint :terminate
-        else
-            # we cannot guarantee that the call will terminate
-            effects = Effects(effects; terminates=false)
-        end
-    end
-
-    return MethodCallResult(rt, edgecycle, edgelimited, edge, effects)
-end
-
-function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
-                         method::Method, @nospecialize(sig), sparams::SimpleVector,
-                         hardlimit::Bool, sv::AbsIntState)
-    # The `method_for_inference_heuristics` will expand the given method's generator if
-    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
-    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
-    # access it directly instead (to avoid regeneration).
-    world = get_world_counter(interp)
-    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world) # Union{Method, Nothing}
-
-    inf_method2 = method_for_inference_limit_heuristics(frame) # limit only if user token match
-    inf_method2 isa Method || (inf_method2 = nothing)
-    if callee_method2 !== inf_method2
-        return false
-    end
-    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
-        # if this is a soft limit,
-        # also inspect the parent of this edge,
-        # to see if they are the same Method as sv
-        # in which case we'll need to ensure it is convergent
-        # otherwise, we don't
-
-        # check in the cycle list first
-        # all items in here are mutual parents of all others
-        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
-            let parent = frame_parent(frame)
-                parent !== nothing || return false
-                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
-                matches_sv(parent, sv) || return false
-            end
-        end
-
-        # If the method defines a recursion relation, give it a chance
-        # to tell us that this recursion is actually ok.
-        if isdefined(method, :recursion_relation)
-            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame_instance(frame).specTypes])
-                return false
-            end
-        end
-    end
-    return true
-end
-
-# This function is used for computing alternate limit heuristics
-function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
-    if isdefined(method, :generator) && !(method.generator isa Core.GeneratedFunctionStub) && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams)
-        if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance, world)
-            if isa(cinfo, CodeInfo)
-                method2 = cinfo.method_for_inference_limit_heuristics
-                if method2 isa Method
-                    return method2
-                end
-            end
-        end
-    end
-    return nothing
-end
-
-function matches_sv(parent::AbsIntState, sv::AbsIntState)
-    sv_method2 = method_for_inference_limit_heuristics(sv) # limit only if user token match
-    sv_method2 isa Method || (sv_method2 = nothing)
-    parent_method2 = method_for_inference_limit_heuristics(parent) # limit only if user token match
-    parent_method2 isa Method || (parent_method2 = nothing)
-    return frame_instance(parent).def === frame_instance(sv).def && sv_method2 === parent_method2
-end
-
-function is_edge_recursed(edge::MethodInstance, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return edge === frame_instance(sv)
-    end
-end
-
-function is_method_recursed(method::Method, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return method === frame_instance(sv).def
-    end
-end
-
-function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return edge === frame_instance(sv) && is_constproped(sv)
-    end
-end
-
-function is_constprop_method_recursed(method::Method, caller::AbsIntState)
-    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
-        return method === frame_instance(sv).def && is_constproped(sv)
-    end
-end
-
-# keeps result and context information of abstract_method_call, which will later be used for
-# backedge computation, and concrete evaluation or constant-propagation
-struct MethodCallResult
-    rt
-    edgecycle::Bool
-    edgelimited::Bool
-    edge::Union{Nothing,MethodInstance}
-    effects::Effects
-    function MethodCallResult(@nospecialize(rt),
-                              edgecycle::Bool,
-                              edgelimited::Bool,
-                              edge::Union{Nothing,MethodInstance},
-                              effects::Effects)
-        return new(rt, edgecycle, edgelimited, edge, effects)
-    end
-end
-
-struct InvokeCall
-    types     # ::Type
-    lookupsig # ::Type
-    InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig)
-end
-
-struct ConstCallResults
-    rt::Any
-    const_result::ConstResult
-    effects::Effects
-    edge::MethodInstance
-    ConstCallResults(@nospecialize(rt),
-                     const_result::ConstResult,
-                     effects::Effects,
-                     edge::MethodInstance) =
-        new(rt, const_result, effects, edge)
-end
-
-function abstract_call_method_with_const_args(interp::AbstractInterpreter,
-    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
-    match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
-    if !const_prop_enabled(interp, sv, match)
-        return nothing
-    end
-    if bail_out_const_call(interp, result, si)
-        add_remark!(interp, sv, "[constprop] No more information to be gained")
-        return nothing
-    end
-    eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv)
-    if eligibility === :concrete_eval
-        return concrete_eval_call(interp, f, result, arginfo, sv, invokecall)
-    end
-    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
-    mi === nothing && return nothing
-    if is_constprop_recursed(result, mi, sv)
-        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-        return nothing
-    end
-    # try semi-concrete evaluation
-    if eligibility === :semi_concrete_eval
-        res = semi_concrete_eval_call(interp, mi, result, arginfo, sv)
-        if res !== nothing
-            return res
-        end
-    end
-    # try constant prop'
-    return const_prop_call(interp, mi, result, arginfo, sv)
-end
-
-function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch)
-    if !InferenceParams(interp).ipo_constant_propagation
-        add_remark!(interp, sv, "[constprop] Disabled by parameter")
-        return false
-    end
-    if is_no_constprop(match.method)
-        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
-        return false
-    end
-    return true
-end
-
-function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo)
-    if is_removable_if_unused(result.effects)
-        if isa(result.rt, Const) || call_result_unused(si)
-            return true
-        end
-    end
-    return false
-end
-
-function concrete_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    (;effects) = result
-    if inbounds_option() === :off
-        if !is_nothrow(effects)
-            # Disable concrete evaluation in `--check-bounds=no` mode,
-            # unless it is known to not throw.
-            return :none
-        end
-    end
-    if !effects.noinbounds && stmt_taints_inbounds_consistency(sv)
-        # If the current statement is @inbounds or we propagate inbounds, the call's consistency
-        # is tainted and not consteval eligible.
-        add_remark!(interp, sv, "[constprop] Concrete evel disabled for inbounds")
-        return :none
-    end
-    if isoverlayed(method_table(interp)) && !is_nonoverlayed(effects)
-        # disable concrete-evaluation if this function call is tainted by some overlayed
-        # method since currently there is no direct way to execute overlayed methods
-        add_remark!(interp, sv, "[constprop] Concrete evel disabled for overlayed methods")
-        return :none
-    end
-    if result.edge !== nothing && is_foldable(effects)
-        if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
-            return :concrete_eval
-        elseif !any_conditional(arginfo)
-            return :semi_concrete_eval
-        end
-    end
-    return :none
-end
-
-is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int)
-function is_all_const_arg(argtypes::Vector{Any}, start::Int)
-    for i = start:length(argtypes)
-        a = widenslotwrapper(argtypes[i])
-        isa(a, Const) || isconstType(a) || issingletontype(a) || return false
-    end
-    return true
-end
-
-any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
-any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
-
-collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start)
-function collect_const_args(argtypes::Vector{Any}, start::Int)
-    return Any[ let a = widenslotwrapper(argtypes[i])
-                    isa(a, Const) ? a.val :
-                    isconstType(a) ? (a::DataType).parameters[1] :
-                    (a::DataType).instance
-                end for i = start:length(argtypes) ]
-end
-
-function concrete_eval_call(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo,
-    sv::AbsIntState, invokecall::Union{InvokeCall,Nothing})
-    args = collect_const_args(arginfo, #=start=#2)
-    if invokecall !== nothing
-        # this call should be `invoke`d, rewrite `args` back now
-        pushfirst!(args, f, invokecall.types)
-        f = invoke
-    end
-    world = get_world_counter(interp)
-    edge = result.edge::MethodInstance
-    value = try
-        Core._call_in_world_total(world, f, args...)
-    catch
-        # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime
-        return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge)
-    end
-    return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge)
-end
-
-# check if there is a cycle and duplicated inference of `mi`
-function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
-    result.edgecycle || return false
-    if result.edgelimited
-        return is_constprop_method_recursed(mi.def::Method, sv)
-    else
-        # if the type complexity limiting didn't decide to limit the call signature (as
-        # indicated by `result.edgelimited === false`), we can relax the cycle detection
-        # by comparing `MethodInstance`s and allow inference to propagate different
-        # constant elements if the recursion is finite over the lattice
-        return is_constprop_edge_recursed(mi, sv)
-    end
-end
-
-# if there's a possibility we could get a better result with these constant arguments
-# (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
-function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
-    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
-    match::MethodMatch, sv::AbsIntState)
-    method = match.method
-    force = force_const_prop(interp, f, method)
-    force || const_prop_entry_heuristic(interp, result, si, sv) || return nothing
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(arginfo.argtypes) < nargs && return nothing
-    if !const_prop_argument_heuristic(interp, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
-        return nothing
-    end
-    all_overridden = is_all_overridden(interp, arginfo, sv)
-    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
-        return nothing
-    end
-    force |= all_overridden
-    mi = specialize_method(match; preexisting=!force)
-    if mi === nothing
-        add_remark!(interp, sv, "[constprop] Failed to specialize")
-        return nothing
-    end
-    mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
-        return nothing
-    end
-    return mi
-end
-
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo, sv::AbsIntState)
-    if call_result_unused(si) && result.edgecycle
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
-        return false
-    end
-    # check if this return type is improvable (i.e. whether it's possible that with more
-    # information, we might get a more precise type)
-    rt = result.rt
-    if isa(rt, Type)
-        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
-        # unless we're already at `Bottom`
-        if rt === Bottom
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
-            return false
-        else
-            return true
-        end
-    elseif isa(rt, PartialStruct) || isa(rt, InterConditional) || isa(rt, InterMustAlias)
-        # could be improved to `Const` or a more precise wrapper
-        return true
-    elseif isa(rt, LimitedAccuracy)
-        # optimizations like inlining are disabled for limited frames,
-        # thus there won't be much benefit in constant-prop' here
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
-        return false
-    else
-        if isa(rt, Const)
-            if !is_nothrow(result.effects)
-                # Could still be improved to Bottom (or at least could see the effects improved)
-                return true
-            end
-        end
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable result)")
-        return false
-    end
-end
-
-# determines heuristically whether if constant propagation can be worthwhile
-# by checking if any of given `argtypes` is "interesting" enough to be propagated
-function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
-    𝕃ᵢ = typeinf_lattice(interp)
-    argtypes = arginfo.argtypes
-    for i in 1:length(argtypes)
-        a = argtypes[i]
-        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
-            is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
-        else
-            a = widenslotwrapper(a)
-            has_nontrivial_extended_info(𝕃ᵢ, a) && is_const_prop_profitable_arg(𝕃ᵢ, a) && return true
-        end
-    end
-    return false
-end
-
-function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
-    slotid = find_constrained_arg(cnd, fargs, sv)
-    if slotid !== nothing
-        return true
-    end
-    # as a minor optimization, we just check the result is a constant or not,
-    # since both `has_nontrivial_extended_info`/`is_const_prop_profitable_arg` return `true`
-    # for `Const(::Bool)`
-    return isa(widenconditional(cnd), Const)
-end
-
-function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
-    slot = cnd.slot
-    for i in 1:length(fargs)
-        arg = ssa_def_slot(fargs[i], sv)
-        if isa(arg, SlotNumber) && slot_id(arg) == slot
-            return i
-        end
-    end
-    return nothing
-end
-
-# checks if all argtypes has additional information other than what `Type` can provide
-function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
-    𝕃ᵢ = typeinf_lattice(interp)
-    for i in 1:length(argtypes)
-        a = argtypes[i]
-        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
-            is_const_prop_profitable_conditional(a, fargs, sv) || return false
-        else
-            is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
-        end
-    end
-    return true
-end
-
-function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
-    return is_aggressive_constprop(method) ||
-           InferenceParams(interp).aggressive_constant_propagation ||
-           istopfunction(f, :getproperty) ||
-           istopfunction(f, :setproperty!)
-end
-
-function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
-    arginfo::ArgInfo, nargs::Int, all_overridden::Bool, sv::AbsIntState)
-    argtypes = arginfo.argtypes
-    if nargs > 1
-        𝕃ᵢ = typeinf_lattice(interp)
-        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
-            arrty = argtypes[2]
-            # don't propagate constant index into indexing of non-constant array
-            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
-                # For static arrays, allow the constprop if we could possibly
-                # deduce nothrow as a result.
-                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
-                if !still_nothrow || ismutabletype(arrty)
-                    return false
-                end
-            elseif ⊑(𝕃ᵢ, arrty, Array)
-                return false
-            end
-        elseif istopfunction(f, :iterate)
-            itrty = argtypes[2]
-            if ⊑(𝕃ᵢ, itrty, Array)
-                return false
-            end
-        end
-    end
-    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                           istopfunction(f, :<<) || istopfunction(f, :>>))
-        # it is almost useless to inline the op when all the same type,
-        # but highly worthwhile to inline promote of a constant
-        length(argtypes) > 2 || return false
-        t1 = widenconst(argtypes[2])
-        for i in 3:length(argtypes)
-            at = argtypes[i]
-            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
-            if ty !== t1
-                return true
-            end
-        end
-        return false
-    end
-    return true
-end
-
-# This is a heuristic to avoid trying to const prop through complicated functions
-# where we would spend a lot of time, but are probably unlikely to get an improved
-# result anyway.
-function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
-    mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
-    method = mi.def::Method
-    if method.is_for_opaque_closure
-        # Not inlining an opaque closure can be very expensive, so be generous
-        # with the const-prop-ability. It is quite possible that we can't infer
-        # anything at all without const-propping, so the inlining check below
-        # isn't particularly helpful here.
-        return true
-    end
-    # now check if the source of this method instance is inlineable, since the extended type
-    # information we have here would be discarded if it is not inlined into a callee context
-    # (modulo the inferred return type that can be potentially refined)
-    if is_declared_inline(method)
-        # this method is declared as `@inline` and will be inlined
-        return true
-    end
-    flag = get_curr_ssaflag(sv)
-    if is_stmt_inline(flag)
-        # force constant propagation for a call that is going to be inlined
-        # since the inliner will try to find this constant result
-        # if these constant arguments arrive there
-        return true
-    elseif is_stmt_noinline(flag)
-        # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
-        return false
-    else
-        # Peek at the inferred result for the method to determine if the optimizer
-        # was able to cut it down to something simple (inlineable in particular).
-        # If so, there will be a good chance we might be able to const prop
-        # all the way through and learn something new.
-        code = get(code_cache(interp), mi, nothing)
-        if isa(code, CodeInstance)
-            inferred = @atomic :monotonic code.inferred
-            # TODO propagate a specific `CallInfo` that conveys information about this call
-            if inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL, mi, arginfo.argtypes) !== nothing
-                return true
-            end
-        end
-    end
-    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
-end
-
-function semi_concrete_eval_call(interp::AbstractInterpreter,
-    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    world = frame_world(sv)
-    mi_cache = WorldView(code_cache(interp), world)
-    code = get(mi_cache, mi, nothing)
-    if code !== nothing
-        irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
-        if irsv !== nothing
-            irsv.parent = sv
-            rt, nothrow = ir_abstract_constant_propagation(interp, irsv)
-            @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
-            if !(isa(rt, Type) && hasintersect(rt, Bool))
-                ir = irsv.ir
-                # TODO (#48913) enable double inlining pass when there are any calls
-                # that are newly resovled by irinterp
-                # state = InliningState(interp)
-                # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
-                new_effects = Effects(result.effects; nothrow)
-                return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi)
-            end
-        end
-    end
-    return nothing
-end
-
-function const_prop_call(interp::AbstractInterpreter,
-    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
-    inf_cache = get_inference_cache(interp)
-    𝕃ᵢ = typeinf_lattice(interp)
-    inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache)
-    if inf_result === nothing
-        # fresh constant prop'
-        argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
-        inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp))
-        if !any(inf_result.overridden_by_const)
-            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
-            return nothing
-        end
-        frame = InferenceState(inf_result, #=cache=#:local, interp)
-        if frame === nothing
-            add_remark!(interp, sv, "[constprop] Could not retrieve the source")
-            return nothing # this is probably a bad generated function (unsound), but just ignore it
-        end
-        frame.parent = sv
-        if !typeinf(interp, frame)
-            add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
-            return nothing
-        end
-        @assert inf_result.result !== nothing
-    else
-        # found the cache for this constant prop'
-        if inf_result.result === nothing
-            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
-            return nothing
-        end
-    end
-    return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi)
-end
-
-# TODO implement MustAlias forwarding
-
-struct ConditionalArgtypes <: ForwardableArgtypes
-    arginfo::ArgInfo
-    sv::InferenceState
-end
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                            conditional_argtypes::ConditionalArgtypes)
-
-The implementation is able to forward `Conditional` of `conditional_argtypes`,
-as well as the other general extended lattice inforamtion.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
-                                 conditional_argtypes::ConditionalArgtypes)
-    (; arginfo, sv) = conditional_argtypes
-    (; fargs, argtypes) = arginfo
-    given_argtypes = Vector{Any}(undef, length(argtypes))
-    def = linfo.def::Method
-    nargs = Int(def.nargs)
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    local condargs = nothing
-    for i in 1:length(argtypes)
-        argtype = argtypes[i]
-        # forward `Conditional` if it conveys a constraint on any other argument
-        if isa(argtype, Conditional) && fargs !== nothing
-            cnd = argtype
-            slotid = find_constrained_arg(cnd, fargs, sv)
-            if slotid !== nothing
-                # using union-split signature, we may be able to narrow down `Conditional`
-                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
-                thentype = tmeet(cnd.thentype, sigt)
-                elsetype = tmeet(cnd.elsetype, sigt)
-                if thentype === Bottom && elsetype === Bottom
-                    # we accidentally proved this method match is impossible
-                    # TODO bail out here immediately rather than just propagating Bottom ?
-                    given_argtypes[i] = Bottom
-                else
-                    if condargs === nothing
-                        condargs = Tuple{Int,Int}[]
-                    end
-                    push!(condargs, (slotid, i))
-                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
-                end
-                continue
-            end
-        end
-        given_argtypes[i] = widenslotwrapper(argtype)
-    end
-    if condargs !== nothing
-        given_argtypes = let condargs=condargs
-            va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int
-                # invalidate `Conditional` imposed on varargs
-                for (slotid, i) in condargs
-                    if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise
-                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
-                    end
-                end
-            end
-        end
-    else
-        given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    end
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
-end
-
-# This is only for use with `Conditional`.
-# In general, usage of this is wrong.
-function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
-    code = sv.src.code
-    init = sv.currpc
-    while isa(arg, SSAValue)
-        init = arg.id
-        arg = code[init]
-    end
-    if arg isa SlotNumber
-        # found this kind of pattern:
-        # %init = SlotNumber(x)
-        # [...]
-        # goto if not isa(%init, T)
-        # now conservatively make sure there isn't potentially another conflicting assignment
-        # to the same slot between the def and usage
-        # we can assume the IR is sorted, since the front-end only creates SSA values in order
-        for i = init:(sv.currpc-1)
-            e = code[i]
-            if isexpr(e, :(=)) && e.args[1] === arg
-                return nothing
-            end
-        end
-    else
-        # there might still be the following kind of pattern (see #45499):
-        # %init = ...
-        # [...]
-        # SlotNumber(x) = %init
-        # [...]
-        # goto if not isa(%init, T)
-        # let's check if there is a slot assigned to the def SSA value but also there isn't
-        # any potentially conflicting assignment to the same slot
-        arg = nothing
-        def = SSAValue(init)
-        for i = (init+1):(sv.currpc-1)
-            e = code[i]
-            if isexpr(e, :(=))
-                lhs = e.args[1]
-                if isa(lhs, SlotNumber)
-                    lhs === arg && return nothing
-                    rhs = e.args[2]
-                    if rhs === def
-                        arg = lhs
-                    end
-                end
-            end
-        end
-    end
-    return arg
-end
-
-struct AbstractIterationResult
-    cti::Vector{Any}
-    info::MaybeAbstractIterationInfo
-    ai_effects::Effects
-end
-AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
-    AbstractIterationResult(cti, info, EFFECTS_TOTAL)
-
-# `typ` is the inferred type for expression `arg`.
-# if the expression constructs a container (e.g. `svec(x,y,z)`),
-# refine its type to an array of element types.
-# Union of Tuples of the same length is converted to Tuple of Unions.
-# returns an array of types
-function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
-                                sv::AbsIntState)
-    if isa(typ, PartialStruct)
-        widet = typ.typ
-        if isa(widet, DataType)
-            if widet.name === Tuple.name
-                return AbstractIterationResult(typ.fields, nothing)
-            elseif widet.name === _NAMEDTUPLE_NAME
-                return AbstractIterationResult(typ.fields, nothing)
-            end
-        end
-    end
-
-    if isa(typ, Const)
-        val = typ.val
-        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
-            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
-        end
-    end
-
-    tti0 = widenconst(typ)
-    tti = unwrap_unionall(tti0)
-    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
-        # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
-        # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
-        tti = unwraptv(tti.parameters[2])
-        tti0 = rewrap_unionall(tti, tti0)
-    end
-    if isa(tti, Union)
-        utis = uniontypes(tti)
-        if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-        end
-        ltp = length((utis[1]::DataType).parameters)
-        for t in utis
-            if length((t::DataType).parameters) != ltp
-                return AbstractIterationResult(Any[Vararg{Any}], nothing)
-            end
-        end
-        result = Any[ Union{} for _ in 1:ltp ]
-        for t in utis
-            tps = (t::DataType).parameters
-            _all(valid_as_lattice, tps) || continue
-            for j in 1:ltp
-                result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
-            end
-        end
-        return AbstractIterationResult(result, nothing)
-    elseif tti0 <: Tuple
-        if isa(tti0, DataType)
-            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
-        elseif !isa(tti, DataType)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing)
-        else
-            len = length(tti.parameters)
-            last = tti.parameters[len]
-            va = isvarargtype(last)
-            elts = Any[ fieldtype(tti0, i) for i = 1:len ]
-            if va
-                if elts[len] === Union{}
-                    pop!(elts)
-                else
-                    elts[len] = Vararg{elts[len]}
-                end
-            end
-            return AbstractIterationResult(elts, nothing)
-        end
-    elseif tti0 === SimpleVector
-        return AbstractIterationResult(Any[Vararg{Any}], nothing)
-    elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-    elseif tti0 <: Array
-        if eltype(tti0) === Union{}
-            return AbstractIterationResult(Any[], nothing)
-        end
-        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
-    else
-        return abstract_iteration(interp, itft, typ, sv)
-    end
-end
-
-# simulate iteration protocol on container type up to fixpoint
-function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
-    if isa(itft, Const)
-        iteratef = itft.val
-    else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
-    end
-    @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
-    stateordonet = call.rt
-    info = call.info
-    # Return Bottom if this is not an iterator.
-    # WARNING: Changes to the iteration protocol must be reflected here,
-    # this is not just an optimization.
-    # TODO: this doesn't realize that Array, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, call.effects, info)], true))
-    valtype = statetype = Bottom
-    ret = Any[]
-    calls = CallMeta[call]
-    stateordonet_widened = widenconst(stateordonet)
-    𝕃ᵢ = typeinf_lattice(interp)
-
-    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
-    # length iterators, or interesting prefix
-    while true
-        if stateordonet_widened === Nothing
-            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
-        end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
-            break
-        end
-        if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
-            break
-        end
-        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
-        # If there's no new information in this statetype, don't bother continuing,
-        # the iterator won't be finite.
-        if ⊑(𝕃ᵢ, nstatetype, statetype)
-            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
-        end
-        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
-        push!(ret, valtype)
-        statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-        push!(calls, call)
-    end
-    # From here on, we start asking for results on the widened types, rather than
-    # the precise (potentially const) state type
-    # statetype and valtype are reinitialized in the first iteration below from the
-    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    valtype = statetype = Bottom
-    may_have_terminated = Nothing <: stateordonet_widened
-    while valtype !== Any
-        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
-        if nounion !== Union{} && !isa(nounion, DataType)
-            # nounion is of a type we cannot handle
-            valtype = Any
-            break
-        end
-        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
-            # reached a fixpoint or iterator failed/gave invalid answer
-            if !hasintersect(stateordonet_widened, Nothing)
-                # ... but cannot terminate
-                if !may_have_terminated
-                    #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
-                else
-                    # iterator may have terminated prior to this loop, but not during it
-                    valtype = Bottom
-                end
-            end
-            break
-        end
-        valtype = tmerge(valtype, nounion.parameters[1])
-        statetype = tmerge(statetype, nounion.parameters[2])
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        push!(calls, call)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-    end
-    if valtype !== Union{}
-        push!(ret, Vararg{valtype})
-    end
-    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
-end
-
-# do apply(af, fargs...), where af is a function value
-function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
-                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
-    itft = argtype_by_index(argtypes, 2)
-    aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    aargtypes = argtype_tail(argtypes, 4)
-    aftw = widenconst(aft)
-    if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
-        if !isconcretetype(aftw) || (aftw <: Builtin)
-            add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
-            # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
-            # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Effects(), NoCallInfo())
-        end
-    end
-    res = Union{}
-    nargs = length(aargtypes)
-    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
-    ctypes = [Any[aft]]
-    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
-    effects = EFFECTS_TOTAL
-    for i = 1:nargs
-        ctypes´ = Vector{Any}[]
-        infos′ = Vector{MaybeAbstractIterationInfo}[]
-        for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
-            if !isvarargtype(ti)
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
-            else
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
-                # We can't represent a repeating sequence of the same types,
-                # so tmerge everything together to get one type that represents
-                # everything.
-                argt = cti[end]
-                if isvarargtype(argt)
-                    argt = unwrapva(argt)
-                end
-                for i in 1:(length(cti)-1)
-                    argt = tmerge(argt, cti[i])
-                end
-                cti = Any[Vararg{argt}]
-            end
-            effects = merge_effects(effects, ai_effects)
-            if info !== nothing
-                for call in info.each
-                    effects = merge_effects(effects, call.effects)
-                end
-            end
-            if any(@nospecialize(t) -> t === Bottom, cti)
-                continue
-            end
-            for j = 1:length(ctypes)
-                ct = ctypes[j]::Vector{Any}
-                if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inlining,
-                    # drop the info
-                    info = nothing
-                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
-                    push!(ctypes´, push!(ct[1:(end - 1)], tail))
-                else
-                    push!(ctypes´, append!(ct[:], cti))
-                end
-                push!(infos′, push!(copy(infos[j]), info))
-            end
-        end
-        ctypes = ctypes´
-        infos = infos′
-    end
-    retinfos = ApplyCallInfo[]
-    retinfo = UnionSplitApplyCallInfo(retinfos)
-    napplicable = length(ctypes)
-    seen = 0
-    for i = 1:napplicable
-        ct = ctypes[i]
-        arginfo = infos[i]
-        lct = length(ct)
-        # truncate argument list at the first Vararg
-        for i = 1:lct-1
-            cti = ct[i]
-            if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
-                resize!(ct, i)
-                break
-            end
-        end
-        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
-        seen += 1
-        push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(typeinf_lattice(interp), res, call.rt)
-        effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
-            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        res = Any
-        effects = Effects()
-        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
-    end
-    # TODO: Add a special info type to capture all the iteration info.
-    # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, effects, retinfo)
-end
-
-function argtype_by_index(argtypes::Vector{Any}, i::Int)
-    n = length(argtypes)
-    na = argtypes[n]
-    if isvarargtype(na)
-        return i >= n ? unwrapva(na) : argtypes[i]
-    else
-        return i > n ? Bottom : argtypes[i]
-    end
-end
-
-function argtype_tail(argtypes::Vector{Any}, i::Int)
-    n = length(argtypes)
-    if isvarargtype(argtypes[n]) && i > n
-        i = n
-    end
-    return argtypes[i:n]
-end
-
-struct ConditionalTypes
-    thentype
-    elsetype
-    ConditionalTypes(thentype, elsetype) = (@nospecialize; new(thentype, elsetype))
-end
-
-@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int,
-    @nospecialize(rt))
-    if isa(rt, Const)
-        xt = widenslotwrapper(xt)
-        if rt.val === false
-            return ConditionalTypes(Bottom, xt)
-        elseif rt.val === true
-            return ConditionalTypes(xt, Bottom)
-        end
-    end
-    return isa_condition(xt, ty, max_union_splitting)
-end
-@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int)
-    tty_ub, isexact_tty = instanceof_tfunc(ty)
-    tty = widenconst(xt)
-    if isexact_tty && !isa(tty_ub, TypeVar)
-        tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
-        if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
-            thentype = typeintersect(tty, tty_ub)
-            if iskindtype(tty_ub) && thentype !== Bottom
-                # `typeintersect` may be unable narrow down `Type`-type
-                thentype = tty_ub
-            end
-            valid_as_lattice(thentype) || (thentype = Bottom)
-            elsetype = typesubtract(tty, tty_lb, max_union_splitting)
-            return ConditionalTypes(thentype, elsetype)
-        end
-    end
-    return nothing
-end
-
-@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int,
-    @nospecialize(rt))
-    thentype = c
-    elsetype = widenslotwrapper(xt)
-    if rt === Const(false)
-        thentype = Bottom
-    elseif rt === Const(true)
-        elsetype = Bottom
-    elseif elsetype isa Type && isdefined(typeof(c.val), :instance) # can only widen a if it is a singleton
-        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
-    end
-    return ConditionalTypes(thentype, elsetype)
-end
-@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int)
-    thentype = c
-    elsetype = widenslotwrapper(xt)
-    if elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
-        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
-    end
-    return ConditionalTypes(thentype, elsetype)
-end
-
-function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
-                               sv::AbsIntState)
-    @nospecialize f
-    la = length(argtypes)
-    𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
-        cnd = argtypes[2]
-        if isa(cnd, Conditional)
-            newcnd = widenconditional(cnd)
-            tx = argtypes[3]
-            ty = argtypes[4]
-            if isa(newcnd, Const)
-                # if `cnd` is constant, we should just respect its constantness to keep inference accuracy
-                return newcnd.val::Bool ? tx : ty
-            else
-                # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
-                a = ssa_def_slot(fargs[3], sv)
-                b = ssa_def_slot(fargs[4], sv)
-                if isa(a, SlotNumber) && cnd.slot == slot_id(a)
-                    tx = (cnd.thentype ⊑ᵢ tx ? cnd.thentype : tmeet(𝕃ᵢ, tx, widenconst(cnd.thentype)))
-                end
-                if isa(b, SlotNumber) && cnd.slot == slot_id(b)
-                    ty = (cnd.elsetype ⊑ᵢ ty ? cnd.elsetype : tmeet(𝕃ᵢ, ty, widenconst(cnd.elsetype)))
-                end
-                return tmerge(𝕃ᵢ, tx, ty)
-            end
-        end
-    end
-    rt = builtin_tfunction(interp, f, argtypes[2:end], sv)
-    if has_mustalias(𝕃ᵢ) && f === getfield && isa(fargs, Vector{Any}) && la ≥ 3
-        a3 = argtypes[3]
-        if isa(a3, Const)
-            if rt !== Bottom && !isalreadyconst(rt)
-                var = fargs[2]
-                if isa(var, SlotNumber)
-                    vartyp = widenslotwrapper(argtypes[2])
-                    fldidx = maybe_const_fldidx(vartyp, a3.val)
-                    if fldidx !== nothing
-                        # wrap this aliasable field into `MustAlias` for possible constraint propagations
-                        return MustAlias(var, vartyp, fldidx, rt)
-                    end
-                end
-            end
-        end
-    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
-        # perform very limited back-propagation of type information for `is` and `isa`
-        if f === isa
-            # try splitting value argument, based on types
-            a = ssa_def_slot(fargs[2], sv)
-            a2 = argtypes[2]
-            a3 = argtypes[3]
-            if isa(a, SlotNumber)
-                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
-                if cndt !== nothing
-                    return Conditional(a, cndt.thentype, cndt.elsetype)
-                end
-            end
-            if isa(a2, MustAlias)
-                if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
-                    if cndt !== nothing
-                        return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
-                    end
-                end
-            end
-            # try splitting type argument, based on value
-            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
-                b = ssa_def_slot(fargs[3], sv)
-                if isa(b, SlotNumber)
-                    # !(x isa T) implies !(Type{a2} <: T)
-                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
-                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
-                    return Conditional(b, a3, elsetype)
-                end
-            end
-        elseif f === (===)
-            a = ssa_def_slot(fargs[2], sv)
-            b = ssa_def_slot(fargs[3], sv)
-            aty = argtypes[2]
-            bty = argtypes[3]
-            # if doing a comparison to a singleton, consider returning a `Conditional` instead
-            if isa(aty, Const)
-                if isa(b, SlotNumber)
-                    cndt = egal_condition(aty, bty, InferenceParams(interp).max_union_splitting, rt)
-                    return Conditional(b, cndt.thentype, cndt.elsetype)
-                elseif isa(bty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = egal_condition(aty, bty.fldtyp, InferenceParams(interp).max_union_splitting)
-                    return form_mustalias_conditional(bty, cndt.thentype, cndt.elsetype)
-                end
-            elseif isa(bty, Const)
-                if isa(a, SlotNumber)
-                    cndt = egal_condition(bty, aty, InferenceParams(interp).max_union_splitting, rt)
-                    return Conditional(a, cndt.thentype, cndt.elsetype)
-                elseif isa(aty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = egal_condition(bty, aty.fldtyp, InferenceParams(interp).max_union_splitting)
-                    return form_mustalias_conditional(aty, cndt.thentype, cndt.elsetype)
-                end
-            end
-            # TODO enable multiple constraints propagation here, there are two possible improvements:
-            # 1. propagate constraints for both lhs and rhs
-            # 2. we can propagate both constraints on aliased fields and slots
-            # As for 2, for now, we prioritize constraints on aliased fields, since currently
-            # different slots that represent the same object can't share same field constraint,
-            # and thus binding `MustAlias` to the other slot is less likely useful
-            if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                if isa(bty, MustAlias)
-                    thentype = widenslotwrapper(aty)
-                    elsetype = bty.fldtyp
-                    if thentype ⊏ elsetype
-                        return form_mustalias_conditional(bty, thentype, elsetype)
-                    end
-                elseif isa(aty, MustAlias)
-                    thentype = widenslotwrapper(bty)
-                    elsetype = aty.fldtyp
-                    if thentype ⊏ elsetype
-                        return form_mustalias_conditional(aty, thentype, elsetype)
-                    end
-                end
-            end
-            # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
-            if isa(b, SlotNumber)
-                thentype = rt === Const(false) ? Bottom : widenslotwrapper(bty)
-                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(bty)
-                return Conditional(b, thentype, elsetype)
-            elseif isa(a, SlotNumber)
-                thentype = rt === Const(false) ? Bottom : widenslotwrapper(aty)
-                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(aty)
-                return Conditional(a, thentype, elsetype)
-            end
-        elseif f === Core.Compiler.not_int
-            aty = argtypes[2]
-            if isa(aty, Conditional)
-                thentype = rt === Const(false) ? Bottom : aty.elsetype
-                elsetype = rt === Const(true)  ? Bottom : aty.thentype
-                return Conditional(aty.slot, thentype, elsetype)
-            end
-        elseif f === isdefined
-            uty = argtypes[2]
-            a = ssa_def_slot(fargs[2], sv)
-            if isa(uty, Union) && isa(a, SlotNumber)
-                fld = argtypes[3]
-                thentype = Bottom
-                elsetype = Bottom
-                for ty in uniontypes(uty)
-                    cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
-                    if isa(cnd, Const)
-                        if cnd.val::Bool
-                            thentype = tmerge(thentype, ty)
-                        else
-                            elsetype = tmerge(elsetype, ty)
-                        end
-                    else
-                        thentype = tmerge(thentype, ty)
-                        elsetype = tmerge(elsetype, ty)
-                    end
-                end
-                return Conditional(a, thentype, elsetype)
-            end
-        end
-    end
-    @assert !isa(rt, TypeVar) "unhandled TypeVar"
-    return rt
-end
-
-function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any})
-    if length(argtypes) == 3
-        canconst = true
-        a2 = argtypes[2]
-        a3 = argtypes[3]
-        ⊑ᵢ = ⊑(typeinf_lattice(interp))
-        nothrow = a2 ⊑ᵢ TypeVar && (a3 ⊑ᵢ Type || a3 ⊑ᵢ TypeVar)
-        if isa(a3, Const)
-            body = a3.val
-        elseif isType(a3)
-            body = a3.parameters[1]
-            canconst = false
-        else
-            return CallMeta(Any, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo())
-        end
-        if !(isa(body, Type) || isa(body, TypeVar))
-            return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-        end
-        if has_free_typevars(body)
-            if isa(a2, Const)
-                tv = a2.val
-            elseif isa(a2, PartialTypeVar)
-                tv = a2.tv
-                canconst = false
-            else
-                return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-            end
-            isa(tv, TypeVar) || return CallMeta(Any, EFFECTS_THROWS, NoCallInfo())
-            body = UnionAll(tv, body)
-        end
-        ret = canconst ? Const(body) : Type{body}
-        return CallMeta(ret, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo())
-    end
-    return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-end
-
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
-    ft′ = argtype_by_index(argtypes, 2)
-    ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
-    isexact || return CallMeta(Any, Effects(), NoCallInfo())
-    unwrapped = unwrap_unionall(types)
-    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    end
-    argtype = argtypes_to_type(argtype_tail(argtypes, 4))
-    nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-    nargtype isa DataType || return CallMeta(Any, Effects(), NoCallInfo()) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    ft = ft::DataType
-    lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
-    nargtype = Tuple{ft, nargtype.parameters...}
-    argtype = Tuple{ft, argtype.parameters...}
-    match, valid_worlds, overlayed = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Effects(), NoCallInfo())
-    update_valid_age!(sv, valid_worlds)
-    method = match.method
-    tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, edge, effects) = result
-    match = MethodMatch(ti, env, method, argtype <: method.sig)
-    res = nothing
-    sig = match.spec_types
-    argtypes′ = invoke_rewrite(argtypes)
-    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
-    arginfo = ArgInfo(fargs′, argtypes′)
-    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
-    # for i in 1:length(argtypes′)
-    #     t, a = ti.parameters[i], argtypes′[i]
-    #     argtypes′[i] = t ⊑ a ? t : a
-    # end
-    𝕃ₚ = ipo_lattice(interp)
-    f = overlayed ? nothing : singleton_type(ft′)
-    invokecall = InvokeCall(types, lookupsig)
-    const_call_result = abstract_call_method_with_const_args(interp,
-        result, f, arginfo, si, match, sv, invokecall)
-    const_result = nothing
-    if const_call_result !== nothing
-        if ⊑(𝕃ₚ, const_call_result.rt, rt)
-            (; rt, effects, const_result, edge) = const_call_result
-        end
-    end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
-    effects = Effects(effects; nonoverlayed=!overlayed)
-    info = InvokeCallInfo(match, const_result)
-    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    return CallMeta(rt, effects, info)
-end
-
-function invoke_rewrite(xs::Vector{Any})
-    x0 = xs[2]
-    newxs = xs[3:end]
-    newxs[1] = x0
-    return newxs
-end
-
-function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
-    if length(argtypes) == 3
-        finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
-        return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
-    end
-    return CallMeta(Nothing, Effects(), NoCallInfo())
-end
-
-# call where the function is known exactly
-function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
-        max_methods::Int = get_max_methods(interp, f, sv))
-    (; fargs, argtypes) = arginfo
-    la = length(argtypes)
-
-    𝕃ᵢ = typeinf_lattice(interp)
-    if isa(f, Builtin)
-        if f === _apply_iterate
-            return abstract_apply(interp, argtypes, si, sv, max_methods)
-        elseif f === invoke
-            return abstract_invoke(interp, arginfo, si, sv)
-        elseif f === modifyfield!
-            return abstract_modifyfield!(interp, argtypes, si, sv)
-        elseif f === Core.finalizer
-            return abstract_finalizer(interp, argtypes, sv)
-        elseif f === applicable
-            return abstract_applicable(interp, argtypes, sv, max_methods)
-        end
-        rt = abstract_call_builtin(interp, f, arginfo, sv)
-        effects = builtin_effects(𝕃ᵢ, f, arginfo, rt)
-        if f === getfield && (fargs !== nothing && isexpr(fargs[end], :boundscheck)) && !is_nothrow(effects) && isa(sv, InferenceState)
-            # As a special case, we delayed tainting `noinbounds` for getfield calls in case we can prove
-            # in-boundedness indepedently. Here we need to put that back in other cases.
-            # N.B.: This isn't about the effects of the call itself, but a delayed contribution of the :boundscheck
-            # statement, so we need to merge this directly into sv, rather than modifying thte effects.
-            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
-                consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
-        end
-        return CallMeta(rt, effects, NoCallInfo())
-    elseif isa(f, Core.OpaqueClosure)
-        # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(typeof(f).parameters[2], Effects(), NoCallInfo())
-    elseif f === TypeVar
-        # Manually look through the definition of TypeVar to
-        # make sure to be able to get `PartialTypeVar`s out.
-        (la < 2 || la > 4) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
-        n = argtypes[2]
-        ub_var = Const(Any)
-        lb_var = Const(Union{})
-        if la == 4
-            ub_var = argtypes[4]
-            lb_var = argtypes[3]
-        elseif la == 3
-            ub_var = argtypes[3]
-        end
-        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
-        effects = builtin_effects(𝕃ᵢ, Core._typevar, ArgInfo(nothing,
-            Any[Const(Core._typevar), n, lb_var, ub_var]), pT)
-        return CallMeta(pT, effects, NoCallInfo())
-    elseif f === UnionAll
-        return abstract_call_unionall(interp, argtypes)
-    elseif f === Tuple && la == 2
-        aty = argtypes[2]
-        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
-        if !isconcretetype(ty)
-            return CallMeta(Tuple, EFFECTS_UNKNOWN, NoCallInfo())
-        end
-    elseif is_return_type(f)
-        return return_type_tfunc(interp, argtypes, si, sv)
-    elseif la == 2 && istopfunction(f, :!)
-        # handle Conditional propagation through !Bool
-        aty = argtypes[2]
-        if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), si, Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
-            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), call.effects, call.info)
-        end
-    elseif la == 3 && istopfunction(f, :!==)
-        # mark !== as exactly a negated call to ===
-        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
-        if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), EFFECTS_TOTAL, NoCallInfo()) # swap if-else
-        elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), EFFECTS_TOTAL, MethodResultPure())
-        end
-        return CallMeta(rty, EFFECTS_TOTAL, NoCallInfo())
-    elseif la == 3 && istopfunction(f, :(>:))
-        # mark issupertype as a exact alias for issubtype
-        # swap T1 and T2 arguments and call <:
-        if fargs !== nothing && length(fargs) == 3
-            fargs = Any[<:, fargs[3], fargs[2]]
-        else
-            fargs = nothing
-        end
-        argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
-    elseif la == 2 && istopfunction(f, :typename)
-        return CallMeta(typename_static(argtypes[2]), EFFECTS_TOTAL, MethodResultPure())
-    elseif f === Core._hasmethod
-        return _hasmethod_tfunc(interp, argtypes, sv)
-    end
-    atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
-end
-
-function abstract_call_opaque_closure(interp::AbstractInterpreter,
-    closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::InferenceState, check::Bool=true)
-    sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
-    (; rt, edge, effects) = result
-    tt = closure.typ
-    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
-    const_result = nothing
-    if !result.edgecycle
-        const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, si, match, sv)
-        if const_call_result !== nothing
-            if const_call_result.rt ⊑ₚ rt
-                (; rt, effects, const_result, edge) = const_call_result
-            end
-        end
-    end
-    if check # analyze implicit type asserts on argument and return type
-        ftt = closure.typ
-        (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
-        rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
-            effects = Effects(effects; nothrow=false)
-        end
-    end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
-    info = OpaqueClosureCallInfo(match, const_result)
-    edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, effects, info)
-end
-
-function most_general_argtypes(closure::PartialOpaque)
-    ret = Any[]
-    cc = widenconst(closure)
-    argt = (unwrap_unionall(cc)::DataType).parameters[1]
-    if !isa(argt, DataType) || argt.name !== typename(Tuple)
-        argt = Tuple
-    end
-    return Any[argt.parameters...]
-end
-
-function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
-                               arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
-                               max_methods::Int)
-    if isa(ft, PartialOpaque)
-        newargtypes = copy(arginfo.argtypes)
-        newargtypes[1] = ft.env
-        return abstract_call_opaque_closure(interp,
-            ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
-    end
-    wft = widenconst(ft)
-    if hasintersect(wft, Builtin)
-        add_remark!(interp, sv, "Could not identify method table for call")
-        return CallMeta(Any, Effects(), NoCallInfo())
-    elseif hasintersect(wft, Core.OpaqueClosure)
-        uft = unwrap_unionall(wft)
-        if isa(uft, DataType)
-            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo())
-        end
-        return CallMeta(Any, Effects(), NoCallInfo())
-    end
-    # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
-    atype = argtypes_to_type(arginfo.argtypes)
-    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
-end
-
-# call where the function is any lattice element
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
-                       sv::AbsIntState, max_methods::Int=typemin(Int))
-    ft = widenslotwrapper(arginfo.argtypes[1])
-    f = singleton_type(ft)
-    if f === nothing
-        max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods
-        return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods)
-    end
-    max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods
-    return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
-end
-
-function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
-    isref = false
-    if unwrapva(T) === Bottom
-        return Bottom
-    elseif isa(T, Type)
-        if isa(T, DataType) && (T::DataType).name === Ref.body.name
-            isref = true
-            T = T.parameters[1]
-            if isreturn && T === Any
-                return Bottom # a return type of Ref{Any} is invalid
-            end
-        end
-    else
-        return Any
-    end
-    if isa(linfo.def, Method)
-        spsig = linfo.def.sig
-        if isa(spsig, UnionAll)
-            if !isempty(linfo.sparam_vals)
-                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
-                                  v for v in  linfo.sparam_vals]
-                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
-                isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
-                for v in sparam_vals
-                    if isa(v, TypeVar)
-                        T = UnionAll(v, T)
-                    end
-                end
-            else
-                T = rewrap_unionall(T, spsig)
-            end
-        end
-    end
-    return unwraptv(T)
-end
-
-function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    f = abstract_eval_value(interp, e.args[2], vtypes, sv)
-    # rt = sp_type_rewrap(e.args[3], sv.linfo, true)
-    atv = e.args[4]::SimpleVector
-    at = Vector{Any}(undef, length(atv) + 1)
-    at[1] = f
-    for i = 1:length(atv)
-        at[i + 1] = sp_type_rewrap(at[i], frame_instance(sv), false)
-        at[i + 1] === Bottom && return
-    end
-    # this may be the wrong world for the call,
-    # but some of the result is likely to be valid anyways
-    # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
-    nothing
-end
-
-function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    rt = Any
-    head = e.head
-    if head === :static_parameter
-        n = e.args[1]::Int
-        nothrow = false
-        if 1 <= n <= length(sv.sptypes)
-            sp = sv.sptypes[n]
-            rt = sp.typ
-            nothrow = !sp.undef
-        end
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow))
-        return rt
-    elseif head === :boundscheck
-        if isa(sv, InferenceState)
-            stmt = sv.src.code[sv.currpc]
-            if isexpr(stmt, :call)
-                f = abstract_eval_value(interp, stmt.args[1], vtypes, sv)
-                if f isa Const && f.val === getfield
-                    # boundscheck of `getfield` call is analyzed by tfunc potentially without
-                    # tainting :inbounds or :consistent when it's known to be nothrow
-                    @goto delay_effects_analysis
-                end
-            end
-            # If there is no particular `@inbounds` for this function, then we only taint `:noinbounds`,
-            # which will subsequently taint `:consistent`-cy if this function is called from another
-            # function that uses `@inbounds`. However, if this `:boundscheck` is itself within an
-            # `@inbounds` region, its value depends on `--check-bounds`, so we need to taint
-            # `:consistent`-cy here also.
-            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
-                consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
-        end
-        @label delay_effects_analysis
-        rt = Bool
-    elseif head === :inbounds
-        @assert false && "Expected this to have been moved into flags"
-    elseif head === :the_exception
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
-    end
-    return rt
-end
-
-function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    if isa(e, QuoteNode)
-        return Const(e.value)
-    elseif isa(e, SSAValue)
-        return abstract_eval_ssavalue(e, sv)
-    elseif isa(e, SlotNumber)
-        if vtypes !== nothing
-            vtyp = vtypes[slot_id(e)]
-            if vtyp.undef
-                merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
-            end
-            return vtyp.typ
-        end
-        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
-        return Any
-    elseif isa(e, Argument)
-        if vtypes !== nothing
-            return vtypes[slot_id(e)].typ
-        else
-            @assert isa(sv, IRInterpretationState)
-            return sv.ir.argtypes[e.n] # TODO frame_argtypes(sv)[e.n] and remove the assertion
-        end
-    elseif isa(e, GlobalRef)
-        return abstract_eval_globalref(interp, e, sv)
-    end
-
-    return Const(e)
-end
-
-function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    if isa(e, Expr)
-        return abstract_eval_value_expr(interp, e, vtypes, sv)
-    else
-        typ = abstract_eval_special_value(interp, e, vtypes, sv)
-        return collect_limitations!(typ, sv)
-    end
-end
-
-function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    n = length(ea)
-    argtypes = Vector{Any}(undef, n)
-    @inbounds for i = 1:n
-        ai = abstract_eval_value(interp, ea[i], vtypes, sv)
-        if ai === Bottom
-            return nothing
-        end
-        argtypes[i] = ai
-    end
-    return argtypes
-end
-
-struct RTEffects
-    rt
-    effects::Effects
-    RTEffects(@nospecialize(rt), effects::Effects) = new(rt, effects)
-end
-
-function mark_curr_effect_flags!(sv::AbsIntState, effects::Effects)
-    if isa(sv, InferenceState)
-        if is_effect_free(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-        end
-        if is_nothrow(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_NOTHROW)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_NOTHROW)
-        end
-        if is_consistent(effects)
-            add_curr_ssaflag!(sv, IR_FLAG_CONSISTENT)
-        else
-            sub_curr_ssaflag!(sv, IR_FLAG_CONSISTENT)
-        end
-    end
-end
-
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
-    si = StmtInfo(!call_result_unused(sv, sv.currpc))
-    (; rt, effects, info) = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = info
-    # mark this call statement as DCE-elgible
-    # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
-    mark_curr_effect_flags!(sv, effects)
-    return RTEffects(rt, effects)
-end
-
-function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                            sv::AbsIntState)
-    ea = e.args
-    argtypes = collect_argtypes(interp, ea, vtypes, sv)
-    if argtypes === nothing
-        return RTEffects(Bottom, Effects())
-    end
-    arginfo = ArgInfo(ea, argtypes)
-    return abstract_call(interp, arginfo, sv)
-end
-
-function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
-    effects = Effects()
-    ehead = e.head
-    𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if ehead === :call
-        (; rt, effects) = abstract_eval_call(interp, e, vtypes, sv)
-        t = rt
-    elseif ehead === :new
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        ut = unwrap_unionall(t)
-        consistent = ALWAYS_FALSE
-        nothrow = false
-        if isa(ut, DataType) && !isabstracttype(ut)
-            ismutable = ismutabletype(ut)
-            fcount = datatype_fieldcount(ut)
-            nargs = length(e.args) - 1
-            if (fcount === nothing || (fcount > nargs && (let t = t
-                    any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
-                end)))
-                # allocation with undefined field leads to undefined behavior and should taint `:consistent`-cy
-                consistent = ALWAYS_FALSE
-            elseif ismutable
-                # mutable object isn't `:consistent`, but we can still give the return
-                # type information a chance to refine this `:consistent`-cy later
-                consistent = CONSISTENT_IF_NOTRETURNED
-            else
-                consistent = ALWAYS_TRUE
-            end
-            if isconcretedispatch(t)
-                nothrow = true
-                @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
-                ats = Vector{Any}(undef, nargs)
-                local anyrefine = false
-                local allconst = true
-                for i = 1:nargs
-                    at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], vtypes, sv))
-                    ft = fieldtype(t, i)
-                    nothrow && (nothrow = at ⊑ᵢ ft)
-                    at = tmeet(𝕃ᵢ, at, ft)
-                    at === Bottom && @goto always_throw
-                    if ismutable && !isconst(t, i)
-                        ats[i] = ft # can't constrain this field (as it may be modified later)
-                        continue
-                    end
-                    allconst &= isa(at, Const)
-                    if !anyrefine
-                        anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
-                                    ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
-                    end
-                    ats[i] = at
-                end
-                # For now, don't allow:
-                # - Const/PartialStruct of mutables (but still allow PartialStruct of mutables
-                #   with `const` fields if anything refined)
-                # - partially initialized Const/PartialStruct
-                if fcount == nargs
-                    if consistent === ALWAYS_TRUE && allconst
-                        argvals = Vector{Any}(undef, nargs)
-                        for j in 1:nargs
-                            argvals[j] = (ats[j]::Const).val
-                        end
-                        t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
-                    elseif anyrefine
-                        t = PartialStruct(t, ats)
-                    end
-                end
-            else
-                t = refine_partial_type(t)
-            end
-        end
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-    elseif ehead === :splatnew
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        nothrow = false # TODO: More precision
-        if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
-            at = abstract_eval_value(interp, e.args[2], vtypes, sv)
-            n = fieldcount(t)
-            if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                (let t = t, at = at
-                    all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
-                end))
-                nothrow = isexact
-                t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif (isa(at, PartialStruct) && at ⊑ᵢ Tuple && n > 0 && n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
-                    (let t = t, at = at, ⊑ᵢ = ⊑ᵢ
-                        all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n)
-                    end))
-                nothrow = isexact
-                t = PartialStruct(t, at.fields::Vector{Any})
-            end
-        else
-            t = refine_partial_type(t)
-        end
-        consistent = !ismutabletype(t) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
-        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
-    elseif ehead === :new_opaque_closure
-        t = Union{}
-        effects = Effects() # TODO
-        merge_effects!(interp, sv, effects)
-        if length(e.args) >= 4
-            ea = e.args
-            argtypes = collect_argtypes(interp, ea, vtypes, sv)
-            if argtypes === nothing
-                t = Bottom
-            else
-                mi = frame_instance(sv)
-                t = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
-                    argtypes[4], argtypes[5:end], mi)
-                if isa(t, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
-                    # Infer this now so that the specialization is available to
-                    # optimization.
-                    argtypes = most_general_argtypes(t)
-                    pushfirst!(argtypes, t.env)
-                    callinfo = abstract_call_opaque_closure(interp, t,
-                        ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
-                end
-            end
-        end
-    elseif ehead === :foreigncall
-        (; rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv)
-        t = rt
-        mark_curr_effect_flags!(sv, effects)
-    elseif ehead === :cfunction
-        effects = EFFECTS_UNKNOWN
-        t = e.args[1]
-        isa(t, Type) || (t = Any)
-        abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif ehead === :method
-        t = (length(e.args) == 1) ? Any : Nothing
-        effects = EFFECTS_UNKNOWN
-    elseif ehead === :copyast
-        effects = EFFECTS_UNKNOWN
-        t = abstract_eval_value(interp, e.args[1], vtypes, sv)
-        if t isa Const && t.val isa Expr
-            # `copyast` makes copies of Exprs
-            t = Expr
-        end
-    elseif ehead === :invoke || ehead === :invoke_modify
-        error("type inference data-flow error: tried to double infer a function")
-    elseif ehead === :isdefined
-        sym = e.args[1]
-        t = Bool
-        effects = EFFECTS_TOTAL
-        if isa(sym, SlotNumber) && vtypes !== nothing
-            vtyp = vtypes[slot_id(sym)]
-            if vtyp.typ === Bottom
-                t = Const(false) # never assigned previously
-            elseif !vtyp.undef
-                t = Const(true) # definitely assigned previously
-            end
-        elseif isa(sym, Symbol)
-            if isdefined(frame_module(sv), sym)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            end
-        elseif isa(sym, GlobalRef)
-            if isdefined(sym.mod, sym.name)
-                t = Const(true)
-            elseif InferenceParams(interp).assume_bindings_static
-                t = Const(false)
-            end
-        elseif isexpr(sym, :static_parameter)
-            n = sym.args[1]::Int
-            if 1 <= n <= length(sv.sptypes)
-                sp = sv.sptypes[n]
-                if !sp.undef
-                    t = Const(true)
-                elseif sp.typ === Bottom
-                    t = Const(false)
-                end
-            end
-        end
-    elseif false
-        @label always_throw
-        t = Bottom
-        effects = EFFECTS_THROWS
-    else
-        t = abstract_eval_value_expr(interp, e, vtypes, sv)
-        effects = EFFECTS_TOTAL
-    end
-    return RTEffects(t, effects)
-end
-
-# refine the result of instantiation of partially-known type `t` if some invariant can be assumed
-function refine_partial_type(@nospecialize t)
-    t′ = unwrap_unionall(t)
-    if isa(t′, DataType) && t′.name === _NAMEDTUPLE_NAME && length(t′.parameters) == 2 &&
-        (t′.parameters[1] === () || t′.parameters[2] === Tuple{})
-        # if the first/second parameter of `NamedTuple` is known to be empty,
-        # the second/first argument should also be empty tuple type,
-        # so refine it here
-        return Const(NamedTuple())
-    end
-    return t
-end
-
-function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    abstract_eval_value(interp, e.args[1], vtypes, sv)
-    mi = frame_instance(sv)
-    t = sp_type_rewrap(e.args[2], mi, true)
-    for i = 3:length(e.args)
-        if abstract_eval_value(interp, e.args[i], vtypes, sv) === Bottom
-            return RTEffects(Bottom, EFFECTS_THROWS)
-        end
-    end
-    effects = foreigncall_effects(e) do @nospecialize x
-        abstract_eval_value(interp, x, vtypes, sv)
-    end
-    cconv = e.args[5]
-    if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt8}))
-        override = decode_effects_override(v[2])
-        effects = Effects(
-            override.consistent          ? ALWAYS_TRUE : effects.consistent,
-            override.effect_free         ? ALWAYS_TRUE : effects.effect_free,
-            override.nothrow             ? true        : effects.nothrow,
-            override.terminates_globally ? true        : effects.terminates,
-            override.notaskstate         ? true        : effects.notaskstate,
-            override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
-            effects.nonoverlayed,
-            effects.noinbounds)
-    end
-    return RTEffects(t, effects)
-end
-
-function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
-    rt = Union{}
-    for i in 1:length(phi.values)
-        isassigned(phi.values, i) || continue
-        val = phi.values[i]
-        rt = tmerge(typeinf_lattice(interp), rt, abstract_eval_special_value(interp, val, vtypes, sv))
-    end
-    return rt
-end
-
-function stmt_taints_inbounds_consistency(sv::AbsIntState)
-    propagate_inbounds(sv) && return true
-    return (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0
-end
-
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
-            return abstract_eval_phi(interp, e, vtypes, sv)
-        end
-        return abstract_eval_special_value(interp, e, vtypes, sv)
-    end
-    (; rt, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
-    if !effects.noinbounds
-        if !propagate_inbounds(sv)
-            # The callee read our inbounds flag, but unless we propagate inbounds,
-            # we ourselves don't read our parent's inbounds.
-            effects = Effects(effects; noinbounds=true)
-        end
-        if (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0
-            effects = Effects(effects; consistent=ALWAYS_FALSE)
-        end
-    end
-    merge_effects!(interp, sv, effects)
-    e = e::Expr
-    @assert !isa(rt, TypeVar) "unhandled TypeVar"
-    rt = maybe_singleton_const(rt)
-    if !isempty(sv.pclimitations)
-        if rt isa Const || rt === Union{}
-            empty!(sv.pclimitations)
-        else
-            rt = LimitedAccuracy(rt, sv.pclimitations)
-            sv.pclimitations = IdSet{InferenceState}()
-        end
-    end
-    return rt
-end
-
-function isdefined_globalref(g::GlobalRef)
-    return ccall(:jl_globalref_boundp, Cint, (Any,), g) != 0
-end
-
-function abstract_eval_globalref(g::GlobalRef)
-    if isdefined_globalref(g) && isconst(g)
-        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
-    end
-    ty = ccall(:jl_get_binding_type, Any, (Any, Any), g.mod, g.name)
-    ty === nothing && return Any
-    return ty
-end
-abstract_eval_global(M::Module, s::Symbol) = abstract_eval_globalref(GlobalRef(M, s))
-
-function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
-    rt = abstract_eval_globalref(g)
-    consistent = inaccessiblememonly = ALWAYS_FALSE
-    nothrow = false
-    if isa(rt, Const)
-        consistent = ALWAYS_TRUE
-        nothrow = true
-        if is_mutation_free_argtype(rt)
-            inaccessiblememonly = ALWAYS_TRUE
-        end
-    elseif isdefined_globalref(g)
-        nothrow = true
-    elseif InferenceParams(interp).assume_bindings_static
-        consistent = inaccessiblememonly = ALWAYS_TRUE
-        rt = Union{}
-    end
-    merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
-    return rt
-end
-
-function handle_global_assignment!(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(newty))
-    effect_free = ALWAYS_FALSE
-    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, newty)
-    inaccessiblememonly = ALWAYS_FALSE
-    merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; effect_free, nothrow, inaccessiblememonly))
-    return nothing
-end
-
-abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
-
-function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
-    typ = ssavaluetypes[s.id]
-    if typ === NOT_FOUND
-        return Bottom
-    end
-    return typ
-end
-
-struct BestguessInfo{Interp<:AbstractInterpreter}
-    interp::Interp
-    bestguess
-    nargs::Int
-    slottypes::Vector{Any}
-    changes::VarTable
-    function BestguessInfo(interp::Interp, @nospecialize(bestguess), nargs::Int,
-        slottypes::Vector{Any}, changes::VarTable) where Interp<:AbstractInterpreter
-        new{Interp}(interp, bestguess, nargs, slottypes, changes)
-    end
-end
-
-@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo)
-    return widenreturn(typeinf_lattice(info.interp), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo)
-    if isa(rt, MustAlias)
-        if 1 ≤ rt.slot ≤ info.nargs
-            rt = InterMustAlias(rt)
-        else
-            rt = widenmustalias(rt)
-        end
-    end
-    isa(rt, InterMustAlias) && return rt
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
-    if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool
-        # give up inter-procedural constraint back-propagation
-        # when tmerge would widen the result anyways (as an optimization)
-        rt = widenconditional(rt)
-    else
-        if isa(rt, Conditional)
-            id = rt.slot
-            if 1 ≤ id ≤ info.nargs
-                old_id_type = widenconditional(info.slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
-                if (!(rt.thentype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.thentype) &&
-                   (!(rt.elsetype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.elsetype)
-                   # discard this `Conditional` since it imposes
-                   # no new constraint on the argument type
-                   # (the caller will recreate it if needed)
-                   rt = widenconditional(rt)
-               end
-            else
-                # discard this `Conditional` imposed on non-call arguments,
-                # since it's not interesting in inter-procedural context;
-                # we may give constraints on other call argument
-                rt = widenconditional(rt)
-            end
-        end
-        if isa(rt, Conditional)
-            rt = InterConditional(rt.slot, rt.thentype, rt.elsetype)
-        elseif is_lattice_bool(𝕃ᵢ, rt)
-            rt = bool_rt_to_conditional(rt, info)
-        end
-    end
-    if isa(rt, Conditional)
-        rt = InterConditional(rt)
-    end
-    isa(rt, InterConditional) && return rt
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo)
-    bestguess = info.bestguess
-    if isa(bestguess, InterConditional)
-        # if the bestguess so far is already `Conditional`, try to convert
-        # this `rt` into `Conditional` on the slot to avoid overapproximation
-        # due to conflict of different slots
-        rt = bool_rt_to_conditional(rt, bestguess.slot, info)
-    else
-        # pick up the first "interesting" slot, convert `rt` to its `Conditional`
-        # TODO: ideally we want `Conditional` and `InterConditional` to convey
-        # constraints on multiple slots
-        for slot_id = 1:info.nargs
-            rt = bool_rt_to_conditional(rt, slot_id, info)
-            rt isa InterConditional && break
-        end
-    end
-    return rt
-end
-@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo)
-    ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
-    old = info.slottypes[slot_id]
-    new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
-    if new ⊑ᵢ old && !(old ⊑ᵢ new)
-        if isa(rt, Const)
-            val = rt.val
-            if val === true
-                return InterConditional(slot_id, new, Bottom)
-            elseif val === false
-                return InterConditional(slot_id, Bottom, new)
-            end
-        elseif rt === Bool
-            return InterConditional(slot_id, new, new)
-        end
-    end
-    return rt
-end
-
-@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_partials(𝕃ᵢ, rt, info)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    return widenreturn_partials(𝕃ᵢ, rt, info)
-end
-@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
-    if isa(rt, PartialStruct)
-        fields = copy(rt.fields)
-        local anyrefine = false
-        𝕃 = typeinf_lattice(info.interp)
-        for i in 1:length(fields)
-            a = fields[i]
-            a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
-            if !anyrefine
-                # TODO: consider adding && const_prop_profitable(a) here?
-                anyrefine = has_extended_info(a) ||
-                            ⊏(𝕃, a, fieldtype(rt.typ, i))
-            end
-            fields[i] = a
-        end
-        anyrefine && return PartialStruct(rt.typ, fields)
-    end
-    if isa(rt, PartialOpaque)
-        return rt # XXX: this case was missed in #39512
-    end
-    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
-end
-
-@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenreturn_consts(rt)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenreturn_consts(rt)
-end
-@nospecializeinfer function widenreturn_consts(@nospecialize(rt))
-    isa(rt, Const) && return rt
-    return widenconst(rt)
-end
-
-@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenconst(rt)
-end
-@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
-    return widenconst(rt)
-end
-
-function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceState, from::Int, to::Int)
-    if from > to
-        if is_effect_overridden(frame, :terminates_locally)
-            # this backedge is known to terminate
-        else
-            merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; terminates=false))
-        end
-    end
-    return nothing
-end
-
-struct BasicStmtChange
-    changes::Union{Nothing,StateUpdate}
-    type::Any # ::Union{Type, Nothing} - `nothing` if this statement may not be used as an SSA Value
-    # TODO effects::Effects
-    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize type) = new(changes, type)
-end
-
-@inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
-    @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
-    if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true), pc_vartable, false)
-        return BasicStmtChange(changes, nothing)
-    elseif !isa(stmt, Expr)
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
-    end
-    changes = nothing
-    stmt = stmt::Expr
-    hd = stmt.head
-    if hd === :(=)
-        t = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if t === Bottom
-            return BasicStmtChange(nothing, Bottom)
-        end
-        lhs = stmt.args[1]
-        if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(t, false), pc_vartable, false)
-        elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, t)
-        elseif !isa(lhs, SSAValue)
-            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
-        end
-        return BasicStmtChange(changes, t)
-    elseif hd === :method
-        fname = stmt.args[1]
-        if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false), pc_vartable, false)
-        end
-        return BasicStmtChange(changes, nothing)
-    elseif (hd === :code_coverage_effect || (
-            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
-            is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing)
-    else
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
-    end
-end
-
-function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
-    bbtable = frame.bb_vartables[bb]
-    if bbtable === nothing
-        # if a basic block hasn't been analyzed yet,
-        # we can update its state a bit more aggressively
-        frame.bb_vartables[bb] = copy(vartable)
-        return true
-    else
-        return stupdate!(𝕃ᵢ, bbtable, vartable)
-    end
-end
-
-function init_vartable!(vartable::VarTable, frame::InferenceState)
-    nargtypes = length(frame.result.argtypes)
-    for i = 1:length(vartable)
-        vartable[i] = VarState(Bottom, i > nargtypes)
-    end
-    return vartable
-end
-
-# make as much progress on `frame` as possible (without handling cycles)
-function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
-    @assert !is_inferred(frame)
-    frame.dont_work_on_me = true # mark that this function is currently on the stack
-    W = frame.ip
-    nargs = narguments(frame, #=include_va=#false)
-    slottypes = frame.slottypes
-    ssavaluetypes = frame.ssavaluetypes
-    bbs = frame.cfg.blocks
-    nbbs = length(bbs)
-    𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
-
-    currbb = frame.currbb
-    if currbb != 1
-        currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
-    end
-
-    states = frame.bb_vartables
-    currstate = copy(states[currbb]::VarTable)
-    while currbb <= nbbs
-        delete!(W, currbb)
-        bbstart = first(bbs[currbb].stmts)
-        bbend = last(bbs[currbb].stmts)
-
-        for currpc in bbstart:bbend
-            frame.currpc = currpc
-            empty_backedges!(frame, currpc)
-            stmt = frame.src.code[currpc]
-            # If we're at the end of the basic block ...
-            if currpc == bbend
-                # Handle control flow
-                if isa(stmt, GotoNode)
-                    succs = bbs[currbb].succs
-                    @assert length(succs) == 1
-                    nextbb = succs[1]
-                    ssavaluetypes[currpc] = Any
-                    handle_control_backedge!(interp, frame, currpc, stmt.label)
-                    @goto branch
-                elseif isa(stmt, GotoIfNot)
-                    condx = stmt.cond
-                    condt = abstract_eval_value(interp, condx, currstate, frame)
-                    if condt === Bottom
-                        ssavaluetypes[currpc] = Bottom
-                        empty!(frame.pclimitations)
-                        @goto find_next_bb
-                    end
-                    orig_condt = condt
-                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
-                        # if this non-`Conditional` object is a slot, we form and propagate
-                        # the conditional constraint on it
-                        condt = Conditional(condx, Const(true), Const(false))
-                    end
-                    condval = maybe_extract_const_bool(condt)
-                    if !isempty(frame.pclimitations)
-                        # we can't model the possible effect of control
-                        # dependencies on the return
-                        # directly to all the return values (unless we error first)
-                        condval isa Bool || union!(frame.limitations, frame.pclimitations)
-                        empty!(frame.pclimitations)
-                    end
-                    ssavaluetypes[currpc] = Any
-                    if condval === true
-                        @goto fallthrough
-                    else
-                        succs = bbs[currbb].succs
-                        if length(succs) == 1
-                            @assert condval === false || (stmt.dest === currpc + 1)
-                            nextbb = succs[1]
-                            @goto branch
-                        end
-                        @assert length(succs) == 2
-                        truebb = currbb + 1
-                        falsebb = succs[1] == truebb ? succs[2] : succs[1]
-                        if condval === false
-                            nextbb = falsebb
-                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
-                            @goto branch
-                        else
-                            if !⊑(𝕃ᵢ, orig_condt, Bool)
-                                merge_effects!(interp, frame, EFFECTS_THROWS)
-                                if !hasintersect(widenconst(orig_condt), Bool)
-                                    ssavaluetypes[currpc] = Bottom
-                                    @goto find_next_bb
-                                end
-                            end
-
-                            # We continue with the true branch, but process the false
-                            # branch here.
-                            if isa(condt, Conditional)
-                                else_change = conditional_change(𝕃ᵢ, currstate, condt.elsetype, condt.slot)
-                                if else_change !== nothing
-                                    false_vartable = stoverwrite1!(copy(currstate), else_change)
-                                else
-                                    false_vartable = currstate
-                                end
-                                changed = update_bbstate!(𝕃ᵢ, frame, falsebb, false_vartable)
-                                then_change = conditional_change(𝕃ᵢ, currstate, condt.thentype, condt.slot)
-                                if then_change !== nothing
-                                    stoverwrite1!(currstate, then_change)
-                                end
-                            else
-                                changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
-                            end
-                            if changed
-                                handle_control_backedge!(interp, frame, currpc, stmt.dest)
-                                push!(W, falsebb)
-                            end
-                            @goto fallthrough
-                        end
-                    end
-                elseif isa(stmt, ReturnNode)
-                    bestguess = frame.bestguess
-                    rt = abstract_eval_value(interp, stmt.val, currstate, frame)
-                    rt = widenreturn(rt, BestguessInfo(interp, bestguess, nargs, slottypes, currstate))
-                    # narrow representation of bestguess slightly to prepare for tmerge with rt
-                    if rt isa InterConditional && bestguess isa Const
-                        let slot_id = rt.slot
-                            old_id_type = slottypes[slot_id]
-                            if bestguess.val === true && rt.elsetype !== Bottom
-                                bestguess = InterConditional(slot_id, old_id_type, Bottom)
-                            elseif bestguess.val === false && rt.thentype !== Bottom
-                                bestguess = InterConditional(slot_id, Bottom, old_id_type)
-                            end
-                        end
-                    end
-                    # copy limitations to return value
-                    if !isempty(frame.pclimitations)
-                        union!(frame.limitations, frame.pclimitations)
-                        empty!(frame.pclimitations)
-                    end
-                    if !isempty(frame.limitations)
-                        rt = LimitedAccuracy(rt, copy(frame.limitations))
-                    end
-                    if !⊑(𝕃ₚ, rt, bestguess)
-                        # new (wider) return type for frame
-                        bestguess = tmerge(𝕃ₚ, bestguess, rt)
-                        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
-                        frame.bestguess = bestguess
-                        for (caller, caller_pc) in frame.cycle_backedges
-                            if !(caller.ssavaluetypes[caller_pc] === Any)
-                                # no reason to revisit if that call-site doesn't affect the final result
-                                push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
-                            end
-                        end
-                    end
-                    ssavaluetypes[frame.currpc] = Any
-                    @goto find_next_bb
-                elseif isexpr(stmt, :enter)
-                    # Propagate entry info to exception handler
-                    l = stmt.args[1]::Int
-                    catchbb = block_for_inst(frame.cfg, l)
-                    if update_bbstate!(𝕃ᵢ, frame, catchbb, currstate)
-                        push!(W, catchbb)
-                    end
-                    ssavaluetypes[currpc] = Any
-                    @goto fallthrough
-                end
-                # Fall through terminator - treat as regular stmt
-            end
-            # Process non control-flow statements
-            (; changes, type) = abstract_eval_basic_statement(interp,
-                stmt, currstate, frame)
-            if type === Bottom
-                ssavaluetypes[currpc] = Bottom
-                @goto find_next_bb
-            end
-            if changes !== nothing
-                stoverwrite1!(currstate, changes)
-                let cur_hand = frame.handler_at[currpc], l, enter
-                    while cur_hand != 0
-                        enter = frame.src.code[cur_hand]::Expr
-                        l = enter.args[1]::Int
-                        exceptbb = block_for_inst(frame.cfg, l)
-                        # propagate new type info to exception handler
-                        # the handling for Expr(:enter) propagates all changes from before the try/catch
-                        # so this only needs to propagate any changes
-                        if stupdate1!(𝕃ᵢ, states[exceptbb]::VarTable, changes)
-                            push!(W, exceptbb)
-                        end
-                        cur_hand = frame.handler_at[cur_hand]
-                    end
-                end
-            end
-            if type === nothing
-                ssavaluetypes[currpc] = Any
-                continue
-            end
-            if !isempty(frame.ssavalue_uses[currpc])
-                record_ssa_assign!(𝕃ᵢ, currpc, type, frame)
-            else
-                ssavaluetypes[currpc] = type
-            end
-        end # for currpc in bbstart:bbend
-
-        # Case 1: Fallthrough termination
-        begin @label fallthrough
-            nextbb = currbb + 1
-        end
-
-        # Case 2: Directly branch to a different BB
-        begin @label branch
-            if update_bbstate!(𝕃ᵢ, frame, nextbb, currstate)
-                push!(W, nextbb)
-            end
-        end
-
-        # Case 3: Control flow ended along the current path (converged, return or throw)
-        begin @label find_next_bb
-            currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
-            currbb == -1 && break # the working set is empty
-            currbb > nbbs && break
-
-            nexttable = states[currbb]
-            if nexttable === nothing
-                init_vartable!(currstate, frame)
-            else
-                stoverwrite!(currstate, nexttable)
-            end
-        end
-    end # while currbb <= nbbs
-
-    frame.dont_work_on_me = false
-    nothing
-end
-
-function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecialize(typ), slot::Int)
-    vtype = state[slot]
-    oldtyp = vtype.typ
-    if iskindtype(typ)
-        # this code path corresponds to the special handling for `isa(x, iskindtype)` check
-        # implemented within `abstract_call_builtin`
-    elseif ⊑(𝕃ᵢ, ignorelimited(typ), ignorelimited(oldtyp))
-        # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
-        # since we probably formed these types with `typesubstract`,
-        # the comparison is likely simple
-    else
-        return nothing
-    end
-    if oldtyp isa LimitedAccuracy
-        # typ is better unlimited, but we may still need to compute the tmeet with the limit
-        # "causes" since we ignored those in the comparison
-        typ = tmerge(𝕃ᵢ, typ, LimitedAccuracy(Bottom, oldtyp.causes))
-    end
-    return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
-end
-
-# make as much progress on `frame` as possible (by handling cycles)
-function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_local(interp, frame)
-
-    # If the current frame is part of a cycle, solve the cycle before finishing
-    no_active_ips_in_callers = false
-    while !no_active_ips_in_callers
-        no_active_ips_in_callers = true
-        for caller in frame.callers_in_cycle
-            caller.dont_work_on_me && return false # cycle is above us on the stack
-            if !isempty(caller.ip)
-                # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.callers_in_cycle`, which is why making incremental progress requires the
-                # outer while loop.
-                typeinf_local(interp, caller)
-                no_active_ips_in_callers = false
-            end
-            update_valid_age!(caller, frame.valid_worlds)
-        end
-    end
-    return true
-end
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
deleted file mode 100644
index 1f62d21c9d2d9..0000000000000
--- a/base/compiler/bootstrap.jl
+++ /dev/null
@@ -1,51 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# make sure that typeinf is executed before turning on typeinf_ext
-# this ensures that typeinf_ext doesn't recurse before it can add the item to the workq
-# especially try to make sure any recursive and leaf functions have concrete signatures,
-# since we won't be able to specialize & infer them at runtime
-
-time() = ccall(:jl_clock_now, Float64, ())
-
-let interp = NativeInterpreter()
-
-    analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(null_escape_cache)}
-    fs = Any[
-        # we first create caches for the optimizer, because they contain many loop constructions
-        # and they're better to not run in interpreter even during bootstrapping
-        #=analyze_escapes_tt,=# run_passes,
-        # then we create caches for inference entries
-        typeinf_ext, typeinf, typeinf_edge,
-    ]
-    # tfuncs can't be inferred from the inference entries above, so here we infer them manually
-    for x in T_FFUNC_VAL
-        push!(fs, x[3])
-    end
-    for i = 1:length(T_IFUNC)
-        if isassigned(T_IFUNC, i)
-            x = T_IFUNC[i]
-            push!(fs, x[3])
-        else
-            println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
-        end
-    end
-    starttime = time()
-    for f in fs
-        if isa(f, DataType) && f.name === typename(Tuple)
-            tt = f
-        else
-            tt = Tuple{typeof(f), Vararg{Any}}
-        end
-        for m in _methods_by_ftype(tt, 10, get_world_counter())::Vector
-            # remove any TypeVars from the intersection
-            m = m::MethodMatch
-            typ = Any[m.spec_types.parameters...]
-            for i = 1:length(typ)
-                typ[i] = unwraptv(typ[i])
-            end
-            typeinf_type(interp, m.method, Tuple{typ...}, m.sparams)
-        end
-    end
-    endtime = time()
-    println("Core.Compiler ──── ", sub_float(endtime,starttime), " seconds")
-end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
deleted file mode 100644
index 04b0791d9a79e..0000000000000
--- a/base/compiler/compiler.jl
+++ /dev/null
@@ -1,177 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-getfield(Core, :eval)(Core, :(baremodule Compiler
-
-using Core.Intrinsics, Core.IR
-
-import Core: print, println, show, write, unsafe_write, stdout, stderr,
-             _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodTable, MethodMatch, PartialOpaque,
-             TypeofVararg
-
-const getproperty = Core.getfield
-const setproperty! = Core.setfield!
-const swapproperty! = Core.swapfield!
-const modifyproperty! = Core.modifyfield!
-const replaceproperty! = Core.replacefield!
-
-ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
-
-eval(x) = Core.eval(Compiler, x)
-eval(m, x) = Core.eval(m, x)
-
-include(x) = Core.include(Compiler, x)
-include(mod, x) = Core.include(mod, x)
-
-# The @inline/@noinline macros that can be applied to a function declaration are not available
-# until after array.jl, and so we will mark them within a function body instead.
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
-convert(::Type{Any}, Core.@nospecialize x) = x
-convert(::Type{T}, x::T) where {T} = x
-
-# mostly used by compiler/methodtable.jl, but also by reflection.jl
-abstract type MethodTableView end
-abstract type AbstractInterpreter end
-
-# essential files and libraries
-include("essentials.jl")
-include("ctypes.jl")
-include("generator.jl")
-include("reflection.jl")
-include("options.jl")
-
-ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@inline; (f(1),))
-ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
-ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
-ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
-
-# core operations & types
-function return_type end # promotion.jl expects this to exist
-is_return_type(Core.@nospecialize(f)) = f === return_type
-include("promotion.jl")
-include("tuple.jl")
-include("pair.jl")
-include("traits.jl")
-include("range.jl")
-include("expr.jl")
-include("error.jl")
-
-# core numeric operations & types
-==(x::T, y::T) where {T} = x === y
-include("bool.jl")
-include("number.jl")
-include("int.jl")
-include("operators.jl")
-include("pointer.jl")
-include("refvalue.jl")
-
-# the same constructor as defined in float.jl, but with a different name to avoid redefinition
-_Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
-# fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
-fld(x::T, y::T) where {T<:Unsigned} = div(x, y)
-function fld(x::T, y::T) where T<:Integer
-    d = div(x, y)
-    return d - (signbit(x ⊻ y) & (d * y != x))
-end
-# cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
-function cld(x::T, y::T) where T<:Unsigned
-    d = div(x, y)
-    return d + (d * y != x)
-end
-function cld(x::T, y::T) where T<:Integer
-    d = div(x, y)
-    return d + (((x > 0) == (y > 0)) & (d * y != x))
-end
-
-
-# checked arithmetic
-const checked_add = +
-const checked_sub = -
-const SignedInt = Union{Int8,Int16,Int32,Int64,Int128}
-const UnsignedInt = Union{UInt8,UInt16,UInt32,UInt64,UInt128}
-sub_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_ssub_int(x, y)
-sub_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_usub_int(x, y)
-sub_with_overflow(x::Bool, y::Bool) = (x-y, false)
-add_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_sadd_int(x, y)
-add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y)
-add_with_overflow(x::Bool, y::Bool) = (x+y, false)
-
-include("cmem.jl")
-include("strings/lazy.jl")
-
-# core array operations
-include("indices.jl")
-include("array.jl")
-include("abstractarray.jl")
-
-# core structures
-include("bitarray.jl")
-include("bitset.jl")
-include("abstractdict.jl")
-include("iddict.jl")
-include("idset.jl")
-include("abstractset.jl")
-include("iterators.jl")
-using .Iterators: zip, enumerate
-using .Iterators: Flatten, Filter, product  # for generators
-include("namedtuple.jl")
-
-# core docsystem
-include("docs/core.jl")
-import Core.Compiler.CoreDocs
-Core.atdoc!(CoreDocs.docm)
-
-# sorting
-include("ordering.jl")
-using .Order
-include("compiler/sort.jl")
-
-# We don't include some.jl, but this definition is still useful.
-something(x::Nothing, y...) = something(y...)
-something(x::Any, y...) = x
-
-############
-# compiler #
-############
-
-include("compiler/cicache.jl")
-include("compiler/methodtable.jl")
-include("compiler/effects.jl")
-include("compiler/types.jl")
-include("compiler/utilities.jl")
-include("compiler/validation.jl")
-
-function argextype end # imported by EscapeAnalysis
-function stmt_effect_free end # imported by EscapeAnalysis
-function alloc_array_ndims end # imported by EscapeAnalysis
-function try_compute_field end # imported by EscapeAnalysis
-include("compiler/ssair/basicblock.jl")
-include("compiler/ssair/domtree.jl")
-include("compiler/ssair/ir.jl")
-
-include("compiler/abstractlattice.jl")
-include("compiler/inferenceresult.jl")
-include("compiler/inferencestate.jl")
-
-include("compiler/typeutils.jl")
-include("compiler/typelimits.jl")
-include("compiler/typelattice.jl")
-include("compiler/tfuncs.jl")
-include("compiler/stmtinfo.jl")
-
-include("compiler/abstractinterpretation.jl")
-include("compiler/typeinfer.jl")
-include("compiler/optimize.jl")
-
-include("compiler/bootstrap.jl")
-ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
-
-include("compiler/parsing.jl")
-Core._setparser!(fl_parse)
-
-end # baremodule Compiler
-))
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
deleted file mode 100644
index 7d09769e5b31b..0000000000000
--- a/base/compiler/effects.jl
+++ /dev/null
@@ -1,265 +0,0 @@
-"""
-    effects::Effects
-
-Represents computational effects of a method call.
-
-The effects are a composition of different effect bits that represent some program property
-of the method being analyzed. They are represented as `Bool` or `UInt8` bits with the
-following meanings:
-- `effects.consistent::UInt8`:
-  * `ALWAYS_TRUE`: this method is guaranteed to return or terminate consistently.
-  * `ALWAYS_FALSE`: this method may be not return or terminate consistently, and there is
-    no need for further analysis with respect to this effect property as this conclusion
-    will not be refined anyway.
-  * `CONSISTENT_IF_NOTRETURNED`: the `:consistent`-cy of this method can later be refined to
-    `ALWAYS_TRUE` in a case when the return value of this method never involves newly
-    allocated mutable objects.
-  * `CONSISTENT_IF_INACCESSIBLEMEMONLY`: the `:consistent`-cy of this method can later be
-    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
-- `effect_free::UInt8`:
-  * `ALWAYS_TRUE`: this method is free from externally semantically visible side effects.
-  * `ALWAYS_FALSE`: this method may not be free from externally semantically visible side effects, and there is
-    no need for further analysis with respect to this effect property as this conclusion
-    will not be refined anyway.
-  * `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`: the `:effect-free`-ness of this method can later be
-    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
-- `nothrow::Bool`: this method is guaranteed to not throw an exception.
-- `terminates::Bool`: this method is guaranteed to terminate.
-- `notaskstate::Bool`: this method does not access any state bound to the current
-  task and may thus be moved to a different task without changing observable
-  behavior. Note that this currently implies that `noyield` as well, since
-  yielding modifies the state of the current task, though this may be split
-  in the future.
-- `inaccessiblememonly::UInt8`:
-  * `ALWAYS_TRUE`: this method does not access or modify externally accessible mutable memory.
-    This state corresponds to LLVM's `inaccessiblememonly` function attribute.
-  * `ALWAYS_FALSE`: this method may access or modify externally accessible mutable memory.
-  * `INACCESSIBLEMEM_OR_ARGMEMONLY`: this method does not access or modify externally accessible mutable memory,
-    except that it may access or modify mutable memory pointed to by its call arguments.
-    This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
-    This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
-- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
-  are not defined in an [overlayed method table](@ref OverlayMethodTable).
-- `noinbounds::Bool`: If set, indicates that this method does not read the parent's `:inbounds`
-  state. In particular, it does not have any reached `:boundscheck` exprs, not propagates inbounds
-  to any children that do.
-
-Note that the representations above are just internal implementation details and thus likely
-to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
-on the definitions of these properties.
-
-Along the abstract interpretation, `Effects` at each statement are analyzed locally and they
-are merged into the single global `Effects` that represents the entire effects of the
-analyzed method (see the implementation of `merge_effects!`). Each effect property is
-initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
-Note that within the current flow-insensitive analysis design, effects detected by local
-analysis on each statement usually taint the global conclusion conservatively.
-
-## Key for `show` output of Effects:
-
-The output represents the state of different effect properties in the following order:
-
-1. `consistent` (`c`):
-    - `+c` (green): `ALWAYS_TRUE`
-    - `-c` (red): `ALWAYS_FALSE`
-    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
-2. `effect_free` (`e`):
-    - `+e` (green): `ALWAYS_TRUE`
-    - `-e` (red): `ALWAYS_FALSE`
-    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
-3. `nothrow` (`n`):
-    - `+n` (green): `true`
-    - `-n` (red): `false`
-4. `terminates` (`t`):
-    - `+t` (green): `true`
-    - `-t` (red): `false`
-5. `notaskstate` (`s`):
-    - `+s` (green): `true`
-    - `-s` (red): `false`
-6. `inaccessiblememonly` (`m`):
-    - `+m` (green): `ALWAYS_TRUE`
-    - `-m` (red): `ALWAYS_FALSE`
-    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
-7. `noinbounds` (`i`):
-    - `+i` (green): `true`
-    - `-i` (red): `false`
-
-Additionally, if the `nonoverlayed` property is false, a red prime symbol (′) is displayed after the tuple.
-"""
-struct Effects
-    consistent::UInt8
-    effect_free::UInt8
-    nothrow::Bool
-    terminates::Bool
-    notaskstate::Bool
-    inaccessiblememonly::UInt8
-    nonoverlayed::Bool
-    noinbounds::Bool
-    function Effects(
-        consistent::UInt8,
-        effect_free::UInt8,
-        nothrow::Bool,
-        terminates::Bool,
-        notaskstate::Bool,
-        inaccessiblememonly::UInt8,
-        nonoverlayed::Bool,
-        noinbounds::Bool)
-        return new(
-            consistent,
-            effect_free,
-            nothrow,
-            terminates,
-            notaskstate,
-            inaccessiblememonly,
-            nonoverlayed,
-            noinbounds)
-    end
-end
-
-const ALWAYS_TRUE  = 0x00
-const ALWAYS_FALSE = 0x01
-
-# :consistent-cy bits
-const CONSISTENT_IF_NOTRETURNED         = 0x01 << 1
-const CONSISTENT_IF_INACCESSIBLEMEMONLY = 0x01 << 2
-
-# :effect_free-ness bits
-const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x01 << 1
-
-# :inaccessiblememonly bits
-const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
-
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  true,  true)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  true,  true)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true,  true)  # unknown mostly, but it's not overlayed and noinbounds at least (e.g. it's not a call)
-const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, false) # unknown really
-
-function Effects(e::Effects = _EFFECTS_UNKNOWN;
-    consistent::UInt8 = e.consistent,
-    effect_free::UInt8 = e.effect_free,
-    nothrow::Bool = e.nothrow,
-    terminates::Bool = e.terminates,
-    notaskstate::Bool = e.notaskstate,
-    inaccessiblememonly::UInt8 = e.inaccessiblememonly,
-    nonoverlayed::Bool = e.nonoverlayed,
-    noinbounds::Bool = e.noinbounds)
-    return Effects(
-        consistent,
-        effect_free,
-        nothrow,
-        terminates,
-        notaskstate,
-        inaccessiblememonly,
-        nonoverlayed,
-        noinbounds)
-end
-
-function merge_effects(old::Effects, new::Effects)
-    return Effects(
-        merge_effectbits(old.consistent, new.consistent),
-        merge_effectbits(old.effect_free, new.effect_free),
-        merge_effectbits(old.nothrow, new.nothrow),
-        merge_effectbits(old.terminates, new.terminates),
-        merge_effectbits(old.notaskstate, new.notaskstate),
-        merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
-        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
-        merge_effectbits(old.noinbounds, new.noinbounds))
-end
-
-function merge_effectbits(old::UInt8, new::UInt8)
-    if old === ALWAYS_FALSE || new === ALWAYS_FALSE
-        return ALWAYS_FALSE
-    end
-    return old | new
-end
-merge_effectbits(old::Bool, new::Bool) = old & new
-
-is_consistent(effects::Effects)          = effects.consistent === ALWAYS_TRUE
-is_effect_free(effects::Effects)         = effects.effect_free === ALWAYS_TRUE
-is_nothrow(effects::Effects)             = effects.nothrow
-is_terminates(effects::Effects)          = effects.terminates
-is_notaskstate(effects::Effects)         = effects.notaskstate
-is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
-is_nonoverlayed(effects::Effects)        = effects.nonoverlayed
-
-# implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
-is_foldable(effects::Effects) =
-    is_consistent(effects) &&
-    is_effect_free(effects) &&
-    is_terminates(effects)
-
-is_foldable_nothrow(effects::Effects) =
-    is_foldable(effects) &&
-    is_nothrow(effects)
-
-is_removable_if_unused(effects::Effects) =
-    is_effect_free(effects) &&
-    is_terminates(effects) &&
-    is_nothrow(effects)
-
-is_finalizer_inlineable(effects::Effects) =
-    is_nothrow(effects) &&
-    is_notaskstate(effects)
-
-is_consistent_if_notreturned(effects::Effects)         = !iszero(effects.consistent & CONSISTENT_IF_NOTRETURNED)
-is_consistent_if_inaccessiblememonly(effects::Effects) = !iszero(effects.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
-
-is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect_free & EFFECT_FREE_IF_INACCESSIBLEMEMONLY)
-
-is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
-
-function encode_effects(e::Effects)
-    return ((e.consistent          % UInt32) << 0) |
-           ((e.effect_free         % UInt32) << 3) |
-           ((e.nothrow             % UInt32) << 5) |
-           ((e.terminates          % UInt32) << 6) |
-           ((e.notaskstate         % UInt32) << 7) |
-           ((e.inaccessiblememonly % UInt32) << 8) |
-           ((e.nonoverlayed        % UInt32) << 10)|
-           ((e.noinbounds          % UInt32) << 11)
-end
-
-function decode_effects(e::UInt32)
-    return Effects(
-        UInt8((e >> 0) & 0x07),
-        UInt8((e >> 3) & 0x03),
-        _Bool((e >> 5) & 0x01),
-        _Bool((e >> 6) & 0x01),
-        _Bool((e >> 7) & 0x01),
-        UInt8((e >> 8) & 0x03),
-        _Bool((e >> 10) & 0x01),
-        _Bool((e >> 11) & 0x01))
-end
-
-struct EffectsOverride
-    consistent::Bool
-    effect_free::Bool
-    nothrow::Bool
-    terminates_globally::Bool
-    terminates_locally::Bool
-    notaskstate::Bool
-    inaccessiblememonly::Bool
-end
-
-function encode_effects_override(eo::EffectsOverride)
-    e = 0x00
-    eo.consistent          && (e |= (0x01 << 0))
-    eo.effect_free         && (e |= (0x01 << 1))
-    eo.nothrow             && (e |= (0x01 << 2))
-    eo.terminates_globally && (e |= (0x01 << 3))
-    eo.terminates_locally  && (e |= (0x01 << 4))
-    eo.notaskstate         && (e |= (0x01 << 5))
-    eo.inaccessiblememonly && (e |= (0x01 << 6))
-    return e
-end
-
-function decode_effects_override(e::UInt8)
-    return EffectsOverride(
-        (e & (0x01 << 0)) != 0x00,
-        (e & (0x01 << 1)) != 0x00,
-        (e & (0x01 << 2)) != 0x00,
-        (e & (0x01 << 3)) != 0x00,
-        (e & (0x01 << 4)) != 0x00,
-        (e & (0x01 << 5)) != 0x00,
-        (e & (0x01 << 6)) != 0x00)
-end
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
deleted file mode 100644
index 3a96b21d7c40a..0000000000000
--- a/base/compiler/inferenceresult.jl
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns argument types `cache_argtypes::Vector{Any}` for `linfo` that are in the native
-Julia type domain. `overridden_by_const::BitVector` is all `false` meaning that
-there is no additional extended lattice information there.
-
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ForwardableArgtypes) ->
-        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-
-Returns cache-correct extended lattice argument types `cache_argtypes::Vector{Any}`
-for `linfo` given some `argtypes` accompanied by `overridden_by_const::BitVector`
-that marks which argument contains additional extended lattice information.
-
-In theory, there could be a `cache` containing a matching `InferenceResult`
-for the provided `linfo` and `given_argtypes`. The purpose of this function is
-to return a valid value for `cache_lookup(𝕃, linfo, argtypes, cache).argtypes`,
-so that we can construct cache-correct `InferenceResult`s in the first place.
-"""
-function matching_cache_argtypes end
-
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance)
-    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
-    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
-    return cache_argtypes, falses(length(cache_argtypes))
-end
-
-struct SimpleArgtypes <: ForwardableArgtypes
-    argtypes::Vector{Any}
-end
-
-"""
-    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::SimpleArgtypes)
-
-The implementation for `argtypes` with general extended lattice information.
-This is supposed to be used for debugging and testing or external `AbstractInterpreter`
-usages and in general `matching_cache_argtypes(::MethodInstance, ::ConditionalArgtypes)`
-is more preferred it can forward `Conditional` information.
-"""
-function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, simple_argtypes::SimpleArgtypes)
-    (; argtypes) = simple_argtypes
-    given_argtypes = Vector{Any}(undef, length(argtypes))
-    for i = 1:length(argtypes)
-        given_argtypes[i] = widenslotwrapper(argtypes[i])
-    end
-    given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
-    return pick_const_args(𝕃, linfo, given_argtypes)
-end
-
-function pick_const_args(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any})
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
-    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
-end
-
-function pick_const_args!(𝕃::AbstractLattice, cache_argtypes::Vector{Any}, overridden_by_const::BitVector, given_argtypes::Vector{Any})
-    for i = 1:length(given_argtypes)
-        given_argtype = given_argtypes[i]
-        cache_argtype = cache_argtypes[i]
-        if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
-            # prefer the argtype we were given over the one computed from `linfo`
-            cache_argtypes[i] = given_argtype
-            overridden_by_const[i] = true
-        end
-    end
-    return cache_argtypes, overridden_by_const
-end
-
-function is_argtype_match(𝕃::AbstractLattice,
-                          @nospecialize(given_argtype),
-                          @nospecialize(cache_argtype),
-                          overridden_by_const::Bool)
-    if is_forwardable_argtype(𝕃, given_argtype)
-        return is_lattice_equal(𝕃, given_argtype, cache_argtype)
-    end
-    return !overridden_by_const
-end
-
-va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance) =
-    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, mi)
-function va_process_argtypes(@nospecialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance)
-    def = mi.def
-    isva = isa(def, Method) ? def.isva : false
-    nargs = isa(def, Method) ? Int(def.nargs) : length(mi.specTypes.parameters)
-    if isva || isvarargtype(given_argtypes[end])
-        isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs-isva)
-            isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
-        end
-        if isva
-            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
-                last = length(given_argtypes)
-            else
-                last = nargs
-            end
-            isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
-            va_handler!(isva_given_argtypes, last)
-        end
-        return isva_given_argtypes
-    end
-    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `mi`"
-    return given_argtypes
-end
-
-function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
-    withfirst::Bool = true)
-    toplevel = method === nothing
-    isva = !toplevel && method.isva
-    linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
-    nargs::Int = toplevel ? 0 : method.nargs
-    # For opaque closure, the closure environment is processed elsewhere
-    withfirst || (nargs -= 1)
-    cache_argtypes = Vector{Any}(undef, nargs)
-    # First, if we're dealing with a varargs method, then we set the last element of `args`
-    # to the appropriate `Tuple` type or `PartialStruct` instance.
-    if !toplevel && isva
-        if specTypes::Type == Tuple
-            linfo_argtypes = Any[Any for i = 1:nargs]
-            if nargs > 1
-                linfo_argtypes[end] = Tuple
-            end
-            vargtype = Tuple
-        else
-            linfo_argtypes_length = length(linfo_argtypes)
-            if nargs > linfo_argtypes_length
-                va = linfo_argtypes[linfo_argtypes_length]
-                if isvarargtype(va)
-                    new_va = rewrap_unionall(unconstrain_vararg_length(va), specTypes)
-                    vargtype = Tuple{new_va}
-                else
-                    vargtype = Tuple{}
-                end
-            else
-                vargtype_elements = Any[]
-                for i in nargs:linfo_argtypes_length
-                    p = linfo_argtypes[i]
-                    p = unwraptv(isvarargtype(p) ? unconstrain_vararg_length(p) : p)
-                    push!(vargtype_elements, elim_free_typevars(rewrap_unionall(p, specTypes)))
-                end
-                for i in 1:length(vargtype_elements)
-                    atyp = vargtype_elements[i]
-                    if issingletontype(atyp)
-                        # replace singleton types with their equivalent Const object
-                        vargtype_elements[i] = Const(atyp.instance)
-                    elseif isconstType(atyp)
-                        vargtype_elements[i] = Const(atyp.parameters[1])
-                    end
-                end
-                vargtype = tuple_tfunc(fallback_lattice, vargtype_elements)
-            end
-        end
-        cache_argtypes[nargs] = vargtype
-        nargs -= 1
-    end
-    # Now, we propagate type info from `linfo_argtypes` into `cache_argtypes`, improving some
-    # type info as we go (where possible). Note that if we're dealing with a varargs method,
-    # we already handled the last element of `cache_argtypes` (and decremented `nargs` so that
-    # we don't overwrite the result of that work here).
-    linfo_argtypes_length = length(linfo_argtypes)
-    if linfo_argtypes_length > 0
-        n = linfo_argtypes_length > nargs ? nargs : linfo_argtypes_length
-        tail_index = n
-        local lastatype
-        for i = 1:n
-            atyp = linfo_argtypes[i]
-            if i == n && isvarargtype(atyp)
-                atyp = unwrapva(atyp)
-                tail_index -= 1
-            end
-            atyp = unwraptv(atyp)
-            if issingletontype(atyp)
-                # replace singleton types with their equivalent Const object
-                atyp = Const(atyp.instance)
-            elseif isconstType(atyp)
-                atyp = Const(atyp.parameters[1])
-            else
-                atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
-            end
-            i == n && (lastatype = atyp)
-            cache_argtypes[i] = atyp
-        end
-        for i = (tail_index + 1):nargs
-            cache_argtypes[i] = lastatype
-        end
-    else
-        @assert nargs == 0 "invalid specialization of method" # wrong number of arguments
-    end
-    cache_argtypes
-end
-
-# eliminate free `TypeVar`s in order to make the life much easier down the road:
-# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
-# malformed types here are user-constructed type arguments given at an inference entry
-# so this function will replace only the malformed `Type{...}::DataType` with `Type`
-# and simply replace other possibilities with `Any`
-function elim_free_typevars(@nospecialize t)
-    if has_free_typevars(t)
-        return isType(t) ? Type : Any
-    else
-        return t
-    end
-end
-
-function cache_lookup(lattice::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
-    method = linfo.def::Method
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(given_argtypes) >= nargs || return nothing
-    for cached_result in cache
-        cached_result.linfo === linfo || continue
-        cache_match = true
-        cache_argtypes = cached_result.argtypes
-        cache_overridden_by_const = cached_result.overridden_by_const
-        for i in 1:nargs
-            if !is_argtype_match(lattice, widenmustalias(given_argtypes[i]),
-                                 cache_argtypes[i],
-                                 cache_overridden_by_const[i])
-                cache_match = false
-                break
-            end
-        end
-        if method.isva && cache_match
-            cache_match = is_argtype_match(lattice, tuple_tfunc(lattice, given_argtypes[(nargs + 1):end]),
-                                           cache_argtypes[end],
-                                           cache_overridden_by_const[end])
-        end
-        cache_match || continue
-        return cached_result
-    end
-    return nothing
-end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
deleted file mode 100644
index c4608dd5781e1..0000000000000
--- a/base/compiler/inferencestate.jl
+++ /dev/null
@@ -1,894 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# data structures
-# ===============
-
-mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
-    elems::BitSet
-    min::Int
-    # Stores whether min is exact or a lower bound
-    # If exact, it is not set in elems
-    min_exact::Bool
-    max::Int
-end
-
-function BitSetBoundedMinPrioritySet(max::Int)
-    bs = BitSet()
-    bs.offset = 0
-    BitSetBoundedMinPrioritySet(bs, max+1, true, max)
-end
-
-@noinline function _advance_bsbmp!(bsbmp::BitSetBoundedMinPrioritySet)
-    @assert !bsbmp.min_exact
-    bsbmp.min = _bits_findnext(bsbmp.elems.bits, bsbmp.min)::Int
-    bsbmp.min < 0 && (bsbmp.min = bsbmp.max + 1)
-    bsbmp.min_exact = true
-    delete!(bsbmp.elems, bsbmp.min)
-    return nothing
-end
-
-function isempty(bsbmp::BitSetBoundedMinPrioritySet)
-    if bsbmp.min > bsbmp.max
-        return true
-    end
-    bsbmp.min_exact && return false
-    _advance_bsbmp!(bsbmp)
-    return bsbmp.min > bsbmp.max
-end
-
-function popfirst!(bsbmp::BitSetBoundedMinPrioritySet)
-    bsbmp.min_exact || _advance_bsbmp!(bsbmp)
-    m = bsbmp.min
-    m > bsbmp.max && throw(ArgumentError("BitSetBoundedMinPrioritySet must be non-empty"))
-    bsbmp.min = m+1
-    bsbmp.min_exact = false
-    return m
-end
-
-function push!(bsbmp::BitSetBoundedMinPrioritySet, idx::Int)
-    if idx <= bsbmp.min
-        if bsbmp.min_exact && bsbmp.min < bsbmp.max && idx != bsbmp.min
-            push!(bsbmp.elems, bsbmp.min)
-        end
-        bsbmp.min = idx
-        bsbmp.min_exact = true
-        return nothing
-    end
-    push!(bsbmp.elems, idx)
-    return nothing
-end
-
-function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
-    if bsbmp.min_exact && idx == bsbmp.min
-        return true
-    end
-    return idx in bsbmp.elems
-end
-
-function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
-    for val in itr
-        push!(bsbmp, val)
-    end
-end
-
-mutable struct TwoPhaseVectorView <: AbstractVector{Int}
-    const data::Vector{Int}
-    count::Int
-    const range::UnitRange{Int}
-end
-size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
-function getindex(tpvv::TwoPhaseVectorView, i::Int)
-    checkbounds(tpvv, i)
-    @inbounds tpvv.data[first(tpvv.range) + i - 1]
-end
-function push!(tpvv::TwoPhaseVectorView, v::Int)
-    tpvv.count += 1
-    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
-    return nothing
-end
-
-"""
-    mutable struct TwoPhaseDefUseMap
-
-This struct is intended as a memory- and GC-pressure-efficient mechanism
-for incrementally computing def-use maps. The idea is that the def-use map
-is constructed into two passes over the IR. In the first, we simply count the
-the number of uses, computing the number of uses for each def as well as the
-total number of uses. In the second pass, we actually fill in the def-use
-information.
-
-The idea is that either of these two phases can be combined with other useful
-work that needs to scan the instruction stream anyway, while avoiding the
-significant allocation pressure of e.g. allocating an array for every SSA value
-or attempting to dynamically move things around as new uses are discovered.
-
-The def-use map is presented as a vector of vectors. For every def, indexing
-into the map will return a vector of uses.
-"""
-mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
-    ssa_uses::Vector{Int}
-    data::Vector{Int}
-    complete::Bool
-end
-
-function complete!(tpdum::TwoPhaseDefUseMap)
-    cumsum = 0
-    for i = 1:length(tpdum.ssa_uses)
-        this_val = cumsum + 1
-        cumsum += tpdum.ssa_uses[i]
-        tpdum.ssa_uses[i] = this_val
-    end
-    resize!(tpdum.data, cumsum)
-    fill!(tpdum.data, 0)
-    tpdum.complete = true
-end
-
-function TwoPhaseDefUseMap(nssas::Int)
-    ssa_uses = zeros(Int, nssas)
-    data = Int[]
-    complete = false
-    return TwoPhaseDefUseMap(ssa_uses, data, complete)
-end
-
-function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
-    @assert !tpdum.complete
-    tpdum.ssa_uses[arg.id] += 1
-end
-
-function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
-    if !tpdum.complete
-        tpdum.ssa_uses[def] -= 1
-    else
-        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
-        # TODO: Sorted
-        useidx = findfirst(idx->tpdum.data[idx] == use, range)
-        @assert useidx !== nothing
-        idx = range[useidx]
-        while idx < lastindex(range)
-            ndata = tpdum.data[idx+1]
-            ndata == 0 && break
-            tpdum.data[idx] = ndata
-            idx += 1
-        end
-        tpdum.data[idx] = 0
-    end
-end
-kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
-    kill_def_use!(tpdum, def.id, use)
-
-function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
-    @assert tpdum.complete
-    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
-    # TODO: Make logarithmic
-    nelems = 0
-    for i in range
-        tpdum.data[i] == 0 && break
-        nelems += 1
-    end
-    return TwoPhaseVectorView(tpdum.data, nelems, range)
-end
-
-mutable struct LazyGenericDomtree{IsPostDom}
-    ir::IRCode
-    domtree::GenericDomTree{IsPostDom}
-    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
-end
-function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
-    isdefined(x, :domtree) && return x.domtree
-    return @timeit "domtree 2" x.domtree = IsPostDom ?
-        construct_postdomtree(x.ir.cfg.blocks) :
-        construct_domtree(x.ir.cfg.blocks)
-end
-
-const LazyDomtree = LazyGenericDomtree{false}
-const LazyPostDomtree = LazyGenericDomtree{true}
-
-# InferenceState
-# ==============
-
-"""
-    const VarTable = Vector{VarState}
-
-The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
-Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
-Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
-to enable flow-sensitive analysis.
-"""
-const VarTable = Vector{VarState}
-
-mutable struct InferenceState
-    #= information about this method instance =#
-    linfo::MethodInstance
-    world::UInt
-    mod::Module
-    sptypes::Vector{VarState}
-    slottypes::Vector{Any}
-    src::CodeInfo
-    cfg::CFG
-    method_info::MethodInfo
-
-    #= intermediate states for local abstract interpretation =#
-    currbb::Int
-    currpc::Int
-    ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
-    handler_at::Vector{Int} # current exception handler info
-    ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
-    # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
-    bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
-    ssavaluetypes::Vector{Any}
-    stmt_edges::Vector{Union{Nothing,Vector{Any}}}
-    stmt_info::Vector{CallInfo}
-
-    #= intermediate states for interprocedural abstract interpretation =#
-    pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
-    limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
-    cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
-    callers_in_cycle::Vector{InferenceState}
-    dont_work_on_me::Bool
-    parent # ::Union{Nothing,AbsIntState}
-
-    #= results =#
-    result::InferenceResult # remember where to put the result
-    valid_worlds::WorldRange
-    bestguess #::Type
-    ipo_effects::Effects
-
-    #= flags =#
-    # Whether to restrict inference of abstract call sites to avoid excessive work
-    # Set by default for toplevel frame.
-    restrict_abstract_call_sites::Bool
-    cached::Bool # TODO move this to InferenceResult?
-    insert_coverage::Bool
-
-    # The interpreter that created this inference state. Not looked at by
-    # NativeInterpreter. But other interpreters may use this to detect cycles
-    interp::AbstractInterpreter
-
-    # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
-    function InferenceState(result::InferenceResult, src::CodeInfo, cache::Symbol,
-                            interp::AbstractInterpreter)
-        linfo = result.linfo
-        world = get_world_counter(interp)
-        def = linfo.def
-        mod = isa(def, Method) ? def.module : def
-        sptypes = sptypes_from_meth_instance(linfo)
-        code = src.code::Vector{Any}
-        cfg = compute_basic_blocks(code)
-        method_info = MethodInfo(src)
-
-        currbb = currpc = 1
-        ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
-        handler_at = compute_trycatch(code, BitSet())
-        nssavalues = src.ssavaluetypes::Int
-        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
-        nstmts = length(code)
-        stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
-        stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
-
-        nslots = length(src.slotflags)
-        slottypes = Vector{Any}(undef, nslots)
-        bb_vartables = Union{Nothing,VarTable}[ nothing for i = 1:length(cfg.blocks) ]
-        bb_vartable1 = bb_vartables[1] = VarTable(undef, nslots)
-        argtypes = result.argtypes
-        nargtypes = length(argtypes)
-        for i = 1:nslots
-            argtyp = (i > nargtypes) ? Bottom : argtypes[i]
-            slottypes[i] = argtyp
-            bb_vartable1[i] = VarState(argtyp, i > nargtypes)
-        end
-        src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
-
-        pclimitations = IdSet{InferenceState}()
-        limitations = IdSet{InferenceState}()
-        cycle_backedges = Vector{Tuple{InferenceState,Int}}()
-        callers_in_cycle = Vector{InferenceState}()
-        dont_work_on_me = false
-        parent = nothing
-
-        valid_worlds = WorldRange(src.min_world, src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
-        bestguess = Bottom
-        ipo_effects = EFFECTS_TOTAL
-
-        insert_coverage = should_insert_coverage(mod, src)
-        if insert_coverage
-            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
-        end
-
-        restrict_abstract_call_sites = isa(linfo.def, Module)
-        @assert cache === :no || cache === :local || cache === :global
-        cached = cache === :global
-
-        # some more setups
-        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        cache !== :no && push!(get_inference_cache(interp), result)
-
-        return new(
-            linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
-            currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
-            result, valid_worlds, bestguess, ipo_effects,
-            restrict_abstract_call_sites, cached, insert_coverage,
-            interp)
-    end
-end
-
-is_inferred(sv::InferenceState) = is_inferred(sv.result)
-is_inferred(result::InferenceResult) = result.result !== nothing
-
-was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
-
-function compute_trycatch(code::Vector{Any}, ip::BitSet)
-    # The goal initially is to record the frame like this for the state at exit:
-    # 1: (enter 3) # == 0
-    # 3: (expr)    # == 1
-    # 3: (leave 1) # == 1
-    # 4: (expr)    # == 0
-    # then we can find all trys by walking backwards from :enter statements,
-    # and all catches by looking at the statement after the :enter
-    n = length(code)
-    empty!(ip)
-    ip.offset = 0 # for _bits_findnext
-    push!(ip, n + 1)
-    handler_at = fill(0, n)
-
-    # start from all :enter statements and record the location of the try
-    for pc = 1:n
-        stmt = code[pc]
-        if isexpr(stmt, :enter)
-            l = stmt.args[1]::Int
-            handler_at[pc + 1] = pc
-            push!(ip, pc + 1)
-            handler_at[l] = pc
-            push!(ip, l)
-        end
-    end
-
-    # now forward those marks to all :leave statements
-    pc´´ = 0
-    while true
-        # make progress on the active ip set
-        pc = _bits_findnext(ip.bits, pc´´)::Int
-        pc > n && break
-        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
-            pc´ = pc + 1 # next program-counter (after executing instruction)
-            if pc == pc´´
-                pc´´ = pc´
-            end
-            delete!(ip, pc)
-            cur_hand = handler_at[pc]
-            @assert cur_hand != 0 "unbalanced try/catch"
-            stmt = code[pc]
-            if isa(stmt, GotoNode)
-                pc´ = stmt.label
-            elseif isa(stmt, GotoIfNot)
-                l = stmt.dest::Int
-                if handler_at[l] != cur_hand
-                    @assert handler_at[l] == 0 "unbalanced try/catch"
-                    handler_at[l] = cur_hand
-                    if l < pc´´
-                        pc´´ = l
-                    end
-                    push!(ip, l)
-                end
-            elseif isa(stmt, ReturnNode)
-                @assert !isdefined(stmt, :val) "unbalanced try/catch"
-                break
-            elseif isa(stmt, Expr)
-                head = stmt.head
-                if head === :enter
-                    cur_hand = pc
-                elseif head === :leave
-                    l = stmt.args[1]::Int
-                    for i = 1:l
-                        cur_hand = handler_at[cur_hand]
-                    end
-                    cur_hand == 0 && break
-                end
-            end
-
-            pc´ > n && break # can't proceed with the fast-path fall-through
-            if handler_at[pc´] != cur_hand
-                @assert handler_at[pc´] == 0 "unbalanced try/catch"
-                handler_at[pc´] = cur_hand
-            elseif !in(pc´, ip)
-                break  # already visited
-            end
-            pc = pc´
-        end
-    end
-
-    @assert first(ip) == n + 1
-    return handler_at
-end
-
-# check if coverage mode is enabled
-function should_insert_coverage(mod::Module, src::CodeInfo)
-    coverage_enabled(mod) && return true
-    JLOptions().code_coverage == 3 || return false
-    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
-    linetable = src.linetable
-    if isa(linetable, Vector{Any})
-        for line in linetable
-            line = line::LineInfoNode
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    elseif isa(linetable, Vector{LineInfoNode})
-        for line in linetable
-            if is_file_tracked(line.file)
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
-    # prepare an InferenceState object for inferring lambda
-    world = get_world_counter(interp)
-    src = retrieve_code_info(result.linfo, world)
-    src === nothing && return nothing
-    validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cache, interp)
-end
-
-"""
-    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
-
-Check if `var` will be constrained to have a definite value
-in any concrete leaftype subtype of `sig`.
-
-It is used as a helper to determine whether type intersection is guaranteed to be able to
-find a value for a particular type parameter.
-A necessary condition for type intersection to not assign a parameter is that it only
-appears in a `Union[All]` and during subtyping some other union component (that does not
-constrain the type parameter) is selected.
-
-The `type_constrains` flag determines whether Type{T} is considered to be constraining
-`T`. This is not true in general, because of the existence of types with free type
-parameters, however, some callers would like to ignore this corner case.
-"""
-function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
-    typ === var && return true
-    while typ isa UnionAll
-        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
-        # typ.var.lb doesn't constrain var
-        typ = typ.body
-    end
-    if typ isa Union
-        # for unions, verify that both options would constrain var
-        ba = constrains_param(var, typ.a, covariant, type_constrains)
-        bb = constrains_param(var, typ.b, covariant, type_constrains)
-        (ba && bb) && return true
-    elseif typ isa DataType
-        # return true if any param constrains var
-        fc = length(typ.parameters)
-        if fc > 0
-            if typ.name === Tuple.name
-                # vararg tuple needs special handling
-                for i in 1:(fc - 1)
-                    p = typ.parameters[i]
-                    constrains_param(var, p, covariant, type_constrains) && return true
-                end
-                lastp = typ.parameters[fc]
-                vararg = unwrap_unionall(lastp)
-                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
-                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
-                    # T = vararg.parameters[1] doesn't constrain var
-                else
-                    constrains_param(var, lastp, covariant, type_constrains) && return true
-                end
-            else
-                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
-                    # Types with free type parameters are <: Type cause the typevar
-                    # to be unconstrained because Type{T} with free typevars is illegal
-                    return type_constrains
-                end
-                for i in 1:fc
-                    p = typ.parameters[i]
-                    constrains_param(var, p, false, type_constrains) && return true
-                end
-            end
-        end
-    end
-    return false
-end
-
-const EMPTY_SPTYPES = VarState[]
-
-function sptypes_from_meth_instance(linfo::MethodInstance)
-    def = linfo.def
-    isa(def, Method) || return EMPTY_SPTYPES # toplevel
-    sig = def.sig
-    if isempty(linfo.sparam_vals)
-        isa(sig, UnionAll) || return EMPTY_SPTYPES
-        # linfo is unspecialized
-        spvals = Any[]
-        sig′ = sig
-        while isa(sig′, UnionAll)
-            push!(spvals, sig′.var)
-            sig′ = sig′.body
-        end
-    else
-        spvals = linfo.sparam_vals
-    end
-    nvals = length(spvals)
-    sptypes = Vector{VarState}(undef, nvals)
-    for i = 1:nvals
-        v = spvals[i]
-        if v isa TypeVar
-            temp = sig
-            for j = 1:i-1
-                temp = temp.body
-            end
-            vᵢ = (temp::UnionAll).var
-            sigtypes = (unwrap_unionall(temp)::DataType).parameters
-            for j = 1:length(sigtypes)
-                sⱼ = sigtypes[j]
-                if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
-                    # if this parameter came from `arg::Type{T}`,
-                    # then `arg` is more precise than `Type{T} where lb<:T<:ub`
-                    ty = fieldtype(linfo.specTypes, j)
-                    @goto ty_computed
-                end
-            end
-            ub = unwraptv_ub(v)
-            if has_free_typevars(ub)
-                ub = Any
-            end
-            lb = unwraptv_lb(v)
-            if has_free_typevars(lb)
-                lb = Bottom
-            end
-            if Any === ub && lb === Bottom
-                ty = Any
-            else
-                tv = TypeVar(v.name, lb, ub)
-                ty = UnionAll(tv, Type{tv})
-            end
-            @label ty_computed
-            undef = !(let sig=sig
-                # if the specialized signature `linfo.specTypes` doesn't contain any free
-                # type variables, we can use it for a more accurate analysis of whether `v`
-                # is constrained or not, otherwise we should use `def.sig` which always
-                # doesn't contain any free type variables
-                if !has_free_typevars(linfo.specTypes)
-                    sig = linfo.specTypes
-                end
-                @assert !has_free_typevars(sig)
-                constrains_param(v, sig, #=covariant=#true)
-            end)
-        elseif isvarargtype(v)
-            ty = Int
-            undef = false
-        else
-            ty = Const(v)
-            undef = false
-        end
-        sptypes[i] = VarState(ty, undef)
-    end
-    return sptypes
-end
-
-_topmod(sv::InferenceState) = _topmod(frame_module(sv))
-
-function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
-    ssavaluetypes = frame.ssavaluetypes
-    old = ssavaluetypes[ssa_id]
-    if old === NOT_FOUND || !⊑(𝕃ᵢ, new, old)
-        # typically, we expect that old ⊑ new (that output information only
-        # gets less precise with worse input information), but to actually
-        # guarantee convergence we need to use tmerge here to ensure that is true
-        ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(𝕃ᵢ, old, new)
-        W = frame.ip
-        for r in frame.ssavalue_uses[ssa_id]
-            if was_reached(frame, r)
-                usebb = block_for_inst(frame.cfg, r)
-                # We're guaranteed to visit the statement if it's in the current
-                # basic block, since SSA values can only ever appear after their
-                # def.
-                if usebb != frame.currbb
-                    push!(W, usebb)
-                end
-            end
-        end
-    end
-    return nothing
-end
-
-function add_cycle_backedge!(caller::InferenceState, frame::InferenceState, currpc::Int)
-    update_valid_age!(caller, frame.valid_worlds)
-    backedge = (caller, currpc)
-    contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
-    add_backedge!(caller, frame.linfo)
-    return frame
-end
-
-function get_stmt_edges!(caller::InferenceState, currpc::Int=caller.currpc)
-    stmt_edges = caller.stmt_edges
-    edges = stmt_edges[currpc]
-    if edges === nothing
-        edges = stmt_edges[currpc] = []
-    end
-    return edges
-end
-
-function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
-    edges = frame.stmt_edges[currpc]
-    edges === nothing || empty!(edges)
-    return nothing
-end
-
-function print_callstack(sv::InferenceState)
-    while sv !== nothing
-        print(sv.linfo)
-        !sv.cached && print("  [uncached]")
-        println()
-        for cycle in sv.callers_in_cycle
-            print(' ', cycle.linfo)
-            println()
-        end
-        sv = sv.parent
-    end
-end
-
-function narguments(sv::InferenceState, include_va::Bool=true)
-    def = sv.linfo.def
-    nargs = length(sv.result.argtypes)
-    if !include_va
-        nargs -= isa(def, Method) && def.isva
-    end
-    return nargs
-end
-
-# IRInterpretationState
-# =====================
-
-# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
-mutable struct IRInterpretationState
-    const method_info::MethodInfo
-    const ir::IRCode
-    const mi::MethodInstance
-    const world::UInt
-    curridx::Int
-    const argtypes_refined::Vector{Bool}
-    const sptypes::Vector{VarState}
-    const tpdum::TwoPhaseDefUseMap
-    const ssa_refined::BitSet
-    const lazydomtree::LazyDomtree
-    valid_worlds::WorldRange
-    const edges::Vector{Any}
-    parent # ::Union{Nothing,AbsIntState}
-
-    function IRInterpretationState(interp::AbstractInterpreter,
-        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
-        world::UInt, min_world::UInt, max_world::UInt)
-        curridx = 1
-        given_argtypes = Vector{Any}(undef, length(argtypes))
-        for i = 1:length(given_argtypes)
-            given_argtypes[i] = widenslotwrapper(argtypes[i])
-        end
-        given_argtypes = va_process_argtypes(optimizer_lattice(interp), given_argtypes, mi)
-        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
-            for i = 1:length(given_argtypes)]
-        empty!(ir.argtypes)
-        append!(ir.argtypes, given_argtypes)
-        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
-        ssa_refined = BitSet()
-        lazydomtree = LazyDomtree(ir)
-        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
-        edges = Any[]
-        parent = nothing
-        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                   ssa_refined, lazydomtree, valid_worlds, edges, parent)
-    end
-end
-
-function IRInterpretationState(interp::AbstractInterpreter,
-    code::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
-    @assert code.def === mi
-    src = @atomic :monotonic code.inferred
-    if isa(src, String)
-        src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
-    else
-        isa(src, CodeInfo) || return nothing
-    end
-    method_info = MethodInfo(src)
-    ir = inflate_ir(src, mi)
-    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
-                                 src.min_world, src.max_world)
-end
-
-# AbsIntState
-# ===========
-
-const AbsIntState = Union{InferenceState,IRInterpretationState}
-
-frame_instance(sv::InferenceState) = sv.linfo
-frame_instance(sv::IRInterpretationState) = sv.mi
-
-function frame_module(sv::AbsIntState)
-    mi = frame_instance(sv)
-    def = mi.def
-    isa(def, Module) && return def
-    return def.module
-end
-
-frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
-frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
-
-is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
-is_constproped(::IRInterpretationState) = true
-
-is_cached(sv::InferenceState) = sv.cached
-is_cached(::IRInterpretationState) = false
-
-method_info(sv::InferenceState) = sv.method_info
-method_info(sv::IRInterpretationState) = sv.method_info
-
-propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
-method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
-
-frame_world(sv::InferenceState) = sv.world
-frame_world(sv::IRInterpretationState) = sv.world
-
-callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
-callers_in_cycle(sv::IRInterpretationState) = ()
-
-is_effect_overridden(sv::AbsIntState, effect::Symbol) = is_effect_overridden(frame_instance(sv), effect)
-function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
-    def = linfo.def
-    return isa(def, Method) && is_effect_overridden(def, effect)
-end
-is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
-is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
-
-has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
-has_conditional(::AbstractLattice, ::IRInterpretationState) = false
-
-# work towards converging the valid age range for sv
-function update_valid_age!(sv::AbsIntState, valid_worlds::WorldRange)
-    valid_worlds = sv.valid_worlds = intersect(valid_worlds, sv.valid_worlds)
-    @assert sv.world in valid_worlds "invalid age range update"
-    return valid_worlds
-end
-
-"""
-    AbsIntStackUnwind(sv::AbsIntState)
-
-Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
-(including the given `AbsIntState` itself), visiting children before their parents (i.e.
-ascending the tree from the given `AbsIntState`).
-Note that cycles may be visited in any order.
-"""
-struct AbsIntStackUnwind
-    sv::AbsIntState
-end
-iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
-function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
-    # iterate through the cycle before walking to the parent
-    if cyclei < length(callers_in_cycle(sv))
-        cyclei += 1
-        parent = callers_in_cycle(sv)[cyclei]
-    else
-        cyclei = 0
-        parent = frame_parent(sv)
-    end
-    parent === nothing && return nothing
-    return (parent, (parent, cyclei))
-end
-
-# temporarily accumulate our edges to later add as backedges in the callee
-function add_backedge!(caller::InferenceState, mi::MethodInstance)
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), mi)
-end
-function add_backedge!(irsv::IRInterpretationState, mi::MethodInstance)
-    return push!(irsv.edges, mi)
-end
-
-function add_invoke_backedge!(caller::InferenceState, @nospecialize(invokesig::Type), mi::MethodInstance)
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), invokesig, mi)
-end
-function add_invoke_backedge!(irsv::IRInterpretationState, @nospecialize(invokesig::Type), mi::MethodInstance)
-    return push!(irsv.edges, invokesig, mi)
-end
-
-# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
-function add_mt_backedge!(caller::InferenceState, mt::MethodTable, @nospecialize(typ))
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return push!(get_stmt_edges!(caller), mt, typ)
-end
-function add_mt_backedge!(irsv::IRInterpretationState, mt::MethodTable, @nospecialize(typ))
-    return push!(irsv.edges, mt, typ)
-end
-
-get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
-get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
-
-add_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] |= flag
-add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] |= flag
-
-sub_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] &= ~flag
-sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] &= ~flag
-
-merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects) =
-    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
-merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
-
-struct InferenceLoopState
-    sig
-    rt
-    effects::Effects
-    function InferenceLoopState(@nospecialize(sig), @nospecialize(rt), effects::Effects)
-        new(sig, rt, effects)
-    end
-end
-
-bail_out_toplevel_call(::AbstractInterpreter, state::InferenceLoopState, sv::InferenceState) =
-    sv.restrict_abstract_call_sites && !isdispatchtuple(state.sig)
-bail_out_toplevel_call(::AbstractInterpreter, ::InferenceLoopState, ::IRInterpretationState) = false
-
-bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
-    state.rt === Any && !is_foldable(state.effects)
-bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
-    state.rt === Any && !is_foldable(state.effects)
-
-bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
-    state.rt === Any
-bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
-    state.rt === Any
-
-function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
-    if InferenceParams(interp).unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-function should_infer_for_effects(sv::InferenceState)
-    effects = sv.ipo_effects
-    return is_terminates(effects) && is_effect_free(effects)
-end
-should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
-
-add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
-add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
-
-function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
-    fmax = get_max_methods_for_func(f)
-    fmax !== nothing && return fmax
-    return get_max_methods(interp, sv)
-end
-function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
-    fmax = get_max_methods_for_func(f)
-    fmax !== nothing && return fmax
-    return get_max_methods(interp)
-end
-function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
-    mmax = get_max_methods_for_module(sv)
-    mmax !== nothing && return mmax
-    return get_max_methods(interp)
-end
-get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
-
-function get_max_methods_for_func(@nospecialize(f))
-    if f !== nothing
-        fmm = typeof(f).name.max_methods
-        fmm !== UInt8(0) && return Int(fmm)
-    end
-    return nothing
-end
-get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
-function get_max_methods_for_module(mod::Module)
-    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
-    max_methods < 0 && return nothing
-    return max_methods
-end
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
deleted file mode 100644
index 3a8de06811cc2..0000000000000
--- a/base/compiler/optimize.jl
+++ /dev/null
@@ -1,859 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#############
-# constants #
-#############
-
-# The slot has uses that are not statically dominated by any assignment
-# This is implied by `SLOT_USEDUNDEF`.
-# If this is not set, all the uses are (statically) dominated by the defs.
-# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
-const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
-const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
-const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
-# const SLOT_CALLED      = 64
-
-# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
-
-const IR_FLAG_NULL        = 0x00
-# This statement is marked as @inbounds by user.
-# Ff replaced by inlining, any contained boundschecks may be removed.
-const IR_FLAG_INBOUNDS    = 0x01 << 0
-# This statement is marked as @inline by user
-const IR_FLAG_INLINE      = 0x01 << 1
-# This statement is marked as @noinline by user
-const IR_FLAG_NOINLINE    = 0x01 << 2
-const IR_FLAG_THROW_BLOCK = 0x01 << 3
-# This statement may be removed if its result is unused. In particular,
-# it must be both :effect_free and :nothrow.
-# TODO: Separate these out.
-const IR_FLAG_EFFECT_FREE = 0x01 << 4
-# This statement was proven not to throw
-const IR_FLAG_NOTHROW     = 0x01 << 5
-# This is :consistent
-const IR_FLAG_CONSISTENT  = 0x01 << 6
-# An optimization pass has updated this statement in a way that may
-# have exposed information that inference did not see. Re-running
-# inference on this statement may be profitable.
-const IR_FLAG_REFINED     = 0x01 << 7
-
-const TOP_TUPLE = GlobalRef(Core, :tuple)
-
-# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
-const InlineCostType = UInt16
-const MAX_INLINE_COST = typemax(InlineCostType)
-const MIN_INLINE_COST = InlineCostType(10)
-const MaybeCompressed = Union{CodeInfo, String}
-
-is_inlineable(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
-set_inlineable!(src::CodeInfo, val::Bool) =
-    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
-
-function inline_cost_clamp(x::Int)::InlineCostType
-    x > MAX_INLINE_COST && return MAX_INLINE_COST
-    x < MIN_INLINE_COST && return MIN_INLINE_COST
-    return convert(InlineCostType, x)
-end
-
-is_declared_inline(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
-
-is_declared_noinline(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
-
-#####################
-# OptimizationState #
-#####################
-
-is_source_inferred(@nospecialize src::MaybeCompressed) =
-    ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
-
-function inlining_policy(interp::AbstractInterpreter,
-    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
-    argtypes::Vector{Any})
-    if isa(src, MaybeCompressed)
-        is_source_inferred(src) || return nothing
-        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
-        return src_inlineable ? src : nothing
-    elseif src === nothing && is_stmt_inline(stmt_flag)
-        # if this statement is forced to be inlined, make an additional effort to find the
-        # inferred source in the local cache
-        # we still won't find a source for recursive call because the "single-level" inlining
-        # seems to be more trouble and complex than it's worth
-        inf_result = cache_lookup(optimizer_lattice(interp), mi, argtypes, get_inference_cache(interp))
-        inf_result === nothing && return nothing
-        src = inf_result.src
-        if isa(src, CodeInfo)
-            src_inferred = is_source_inferred(src)
-            return src_inferred ? src : nothing
-        else
-            return nothing
-        end
-    elseif isa(src, IRCode)
-        return src
-    elseif isa(src, SemiConcreteResult)
-        if is_declared_noinline(mi.def::Method)
-            # For `NativeInterpreter`, `SemiConcreteResult` may be produced for
-            # a `@noinline`-declared method when it's marked as `@constprop :aggressive`.
-            # Suppress the inlining here.
-            return nothing
-        end
-        return src
-    end
-    return nothing
-end
-
-struct InliningState{Interp<:AbstractInterpreter}
-    edges::Vector{Any}
-    world::UInt
-    interp::Interp
-end
-function InliningState(sv::InferenceState, interp::AbstractInterpreter)
-    edges = sv.stmt_edges[1]::Vector{Any}
-    return InliningState(edges, sv.world, interp)
-end
-function InliningState(interp::AbstractInterpreter)
-    return InliningState(Any[], get_world_counter(interp), interp)
-end
-
-# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
-code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
-
-include("compiler/ssair/driver.jl")
-
-mutable struct OptimizationState{Interp<:AbstractInterpreter}
-    linfo::MethodInstance
-    src::CodeInfo
-    ir::Union{Nothing, IRCode}
-    stmt_info::Vector{CallInfo}
-    mod::Module
-    sptypes::Vector{VarState}
-    slottypes::Vector{Any}
-    inlining::InliningState{Interp}
-    cfg::Union{Nothing,CFG}
-    insert_coverage::Bool
-end
-function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
-                           recompute_cfg::Bool=true)
-    inlining = InliningState(sv, interp)
-    cfg = recompute_cfg ? nothing : sv.cfg
-    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
-                             sv.sptypes, sv.slottypes, inlining, cfg, sv.insert_coverage)
-end
-function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
-    # prepare src for running optimization passes if it isn't already
-    nssavalues = src.ssavaluetypes
-    if nssavalues isa Int
-        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
-    else
-        nssavalues = length(src.ssavaluetypes::Vector{Any})
-    end
-    sptypes = sptypes_from_meth_instance(linfo)
-    nslots = length(src.slotflags)
-    slottypes = src.slottypes
-    if slottypes === nothing
-        slottypes = Any[ Any for i = 1:nslots ]
-    end
-    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
-    # cache some useful state computations
-    def = linfo.def
-    mod = isa(def, Method) ? def.module : def
-    # Allow using the global MI cache, but don't track edges.
-    # This method is mostly used for unit testing the optimizer
-    inlining = InliningState(interp)
-    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, nothing, false)
-end
-function OptimizationState(linfo::MethodInstance, interp::AbstractInterpreter)
-    world = get_world_counter(interp)
-    src = retrieve_code_info(linfo, world)
-    src === nothing && return nothing
-    return OptimizationState(linfo, src, interp)
-end
-
-function ir_to_codeinf!(opt::OptimizationState)
-    (; linfo, src) = opt
-    src = ir_to_codeinf!(src, opt.ir::IRCode)
-    opt.ir = nothing
-    validate_code_in_debug_mode(linfo, src, "optimized")
-    return src
-end
-
-function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
-    replace_code_newstyle!(src, ir)
-    widen_all_consts!(src)
-    src.inferred = true
-    return src
-end
-
-# widen all Const elements in type annotations
-function widen_all_consts!(src::CodeInfo)
-    ssavaluetypes = src.ssavaluetypes::Vector{Any}
-    for i = 1:length(ssavaluetypes)
-        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
-    end
-
-    for i = 1:length(src.code)
-        x = src.code[i]
-        if isa(x, PiNode)
-            src.code[i] = PiNode(x.val, widenconst(x.typ))
-        end
-    end
-
-    src.rettype = widenconst(src.rettype)
-
-    return src
-end
-
-#########
-# logic #
-#########
-
-_topmod(sv::OptimizationState) = _topmod(sv.mod)
-
-is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
-is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
-is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
-
-"""
-    stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) ->
-        (consistent::Bool, effect_free_and_nothrow::Bool, nothrow::Bool)
-
-Returns a tuple of `(:consistent, :effect_free_and_nothrow, :nothrow)` flags for a given statement.
-"""
-function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
-    # TODO: We're duplicating analysis from inference here.
-    isa(stmt, PiNode) && return (true, true, true)
-    isa(stmt, PhiNode) && return (true, true, true)
-    isa(stmt, ReturnNode) && return (true, false, true)
-    isa(stmt, GotoNode) && return (true, false, true)
-    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
-    if isa(stmt, GlobalRef)
-        nothrow = isdefined(stmt.mod, stmt.name)
-        consistent = nothrow && isconst(stmt.mod, stmt.name)
-        return (consistent, nothrow, nothrow)
-    elseif isa(stmt, Expr)
-        (; head, args) = stmt
-        if head === :static_parameter
-            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
-            nothrow = !sptypes[args[1]::Int].undef
-            return (true, nothrow, nothrow)
-        end
-        if head === :call
-            f = argextype(args[1], src)
-            f = singleton_type(f)
-            f === nothing && return (false, false, false)
-            if f === UnionAll
-                # TODO: This is a weird special case - should be determined in inference
-                argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
-                nothrow = _builtin_nothrow(𝕃ₒ, f, argtypes, rt)
-                return (true, nothrow, nothrow)
-            end
-            if f === Intrinsics.cglobal
-                # TODO: these are not yet linearized
-                return (false, false, false)
-            end
-            isa(f, Builtin) || return (false, false, false)
-            # Needs to be handled in inlining to look at the callee effects
-            f === Core._apply_iterate && return (false, false, false)
-            argtypes = Any[argextype(args[arg], src) for arg in 1:length(args)]
-            effects = builtin_effects(𝕃ₒ, f, ArgInfo(args, argtypes), rt)
-            consistent = is_consistent(effects)
-            effect_free = is_effect_free(effects)
-            nothrow = is_nothrow(effects)
-            return (consistent, effect_free & nothrow, nothrow)
-        elseif head === :new
-            atyp = argextype(args[1], src)
-            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
-            typ, isexact = instanceof_tfunc(atyp)
-            if !isexact
-                atyp = unwrap_unionall(widenconst(atyp))
-                if isType(atyp) && isTypeDataType(atyp.parameters[1])
-                    typ = atyp.parameters[1]
-                else
-                    return (false, false, false)
-                end
-                isabstracttype(typ) && return (false, false, false)
-            else
-                isconcretedispatch(typ) || return (false, false, false)
-            end
-            typ = typ::DataType
-            fcount = datatype_fieldcount(typ)
-            fcount === nothing && return (false, false, false)
-            fcount >= length(args) - 1 || return (false, false, false)
-            for fld_idx in 1:(length(args) - 1)
-                eT = argextype(args[fld_idx + 1], src)
-                fT = fieldtype(typ, fld_idx)
-                # Currently, we cannot represent any type equality constraints
-                # in the lattice, so if we see any type of type parameter,
-                # there is very little we can say about it
-                if !isexact && has_free_typevars(fT)
-                    return (false, false, false)
-                end
-                ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
-            end
-            return (false, true, true)
-        elseif head === :foreigncall
-            effects = foreigncall_effects(stmt) do @nospecialize x
-                argextype(x, src)
-            end
-            consistent = is_consistent(effects)
-            effect_free = is_effect_free(effects)
-            nothrow = is_nothrow(effects)
-            return (consistent, effect_free & nothrow, nothrow)
-        elseif head === :new_opaque_closure
-            length(args) < 4 && return (false, false, false)
-            typ = argextype(args[1], src)
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return (false, false, false)
-            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
-            rt_lb = argextype(args[2], src)
-            rt_ub = argextype(args[3], src)
-            source = argextype(args[4], src)
-            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
-                return (false, false, false)
-            end
-            return (false, true, true)
-        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
-            return (true, true, true)
-        else
-            # e.g. :loopinfo
-            return (false, false, false)
-        end
-    end
-    isa(stmt, UnoptSlot) && error("unexpected IR elements")
-    return (true, true, true)
-end
-
-"""
-    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
-    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
-
-Return the type of value `x` in the context of inferred source `src`.
-Note that `t` might be an extended lattice element.
-Use `widenconst(t)` to get the native Julia type of `x`.
-"""
-argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
-    argextype(x, ir, sptypes, ir.argtypes)
-function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
-    isa(x, AnySSAValue) && return types(compact)[x]
-    return argextype(x, compact, sptypes, compact.ir.argtypes)
-end
-argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
-function argextype(
-    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
-    sptypes::Vector{VarState}, slottypes::Vector{Any})
-    if isa(x, Expr)
-        if x.head === :static_parameter
-            return sptypes[x.args[1]::Int].typ
-        elseif x.head === :boundscheck
-            return Bool
-        elseif x.head === :copyast
-            return argextype(x.args[1], src, sptypes, slottypes)
-        end
-        Core.println("argextype called on Expr with head ", x.head,
-                     " which is not valid for IR in argument-position.")
-        @assert false
-    elseif isa(x, SlotNumber)
-        return slottypes[x.id]
-    elseif isa(x, TypedSlot)
-        return x.typ
-    elseif isa(x, SSAValue)
-        return abstract_eval_ssavalue(x, src)
-    elseif isa(x, Argument)
-        return slottypes[x.n]
-    elseif isa(x, QuoteNode)
-        return Const(x.value)
-    elseif isa(x, GlobalRef)
-        return abstract_eval_globalref(x)
-    elseif isa(x, PhiNode)
-        return Any
-    elseif isa(x, PiNode)
-        return x.typ
-    else
-        return Const(x)
-    end
-end
-abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
-abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
-
-"""
-    finish(interp::AbstractInterpreter, opt::OptimizationState,
-           ir::IRCode, caller::InferenceResult)
-
-Post-process information derived by Julia-level optimizations for later use.
-In particular, this function determines the inlineability of the optimized code.
-"""
-function finish(interp::AbstractInterpreter, opt::OptimizationState,
-                ir::IRCode, caller::InferenceResult)
-    (; src, linfo) = opt
-    (; def, specTypes) = linfo
-
-    force_noinline = is_declared_noinline(src)
-
-    # compute inlining and other related optimizations
-    result = caller.result
-    @assert !(result isa LimitedAccuracy)
-    result = widenslotwrapper(result)
-
-    opt.ir = ir
-
-    # determine and cache inlineability
-    union_penalties = false
-    if !force_noinline
-        sig = unwrap_unionall(specTypes)
-        if isa(sig, DataType) && sig.name === Tuple.name
-            for P in sig.parameters
-                P = unwrap_unionall(P)
-                if isa(P, Union)
-                    union_penalties = true
-                    break
-                end
-            end
-        else
-            force_noinline = true
-        end
-        if !is_declared_inline(src) && result === Bottom
-            force_noinline = true
-        end
-    end
-    if force_noinline
-        set_inlineable!(src, false)
-    elseif isa(def, Method)
-        if is_declared_inline(src) && isdispatchtuple(specTypes)
-            # obey @inline declaration if a dispatch barrier would not help
-            set_inlineable!(src, true)
-        else
-            # compute the cost (size) of inlining this code
-            params = OptimizationParams(interp)
-            cost_threshold = default = params.inline_cost_threshold
-            if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
-                cost_threshold += params.inline_tupleret_bonus
-            end
-            # if the method is declared as `@inline`, increase the cost threshold 20x
-            if is_declared_inline(src)
-                cost_threshold += 19*default
-            end
-            # a few functions get special treatment
-            if def.module === _topmod(def.module)
-                name = def.name
-                if name === :iterate || name === :unsafe_convert || name === :cconvert
-                    cost_threshold += 4*default
-                end
-            end
-            src.inlining_cost = inline_cost(ir, params, union_penalties, cost_threshold)
-        end
-    end
-    return nothing
-end
-
-# run the optimization work
-function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
-    @timeit "optimizer" ir = run_passes(opt.src, opt, caller)
-    return finish(interp, opt, ir, caller)
-end
-
-using .EscapeAnalysis
-import .EscapeAnalysis: EscapeState, ArgEscapeCache, is_ipo_profitable
-
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState)
-
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-cache_escapes!(caller::InferenceResult, estate::EscapeState) =
-    caller.argescapes = ArgEscapeCache(estate)
-
-function ipo_escape_cache(mi_cache::MICache) where MICache
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            argescapes = linfo.argescapes
-        else
-            codeinst = get(mi_cache, linfo, nothing)
-            isa(codeinst, CodeInstance) || return nothing
-            argescapes = codeinst.argescapes
-        end
-        return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
-    end
-end
-null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing
-
-macro pass(name, expr)
-    optimize_until = esc(:optimize_until)
-    stage = esc(:__stage__)
-    macrocall = :(@timeit $(esc(name)) $(esc(expr)))
-    macrocall.args[2] = __source__  # `@timeit` may want to use it
-    quote
-        $macrocall
-        matchpass($optimize_until, ($stage += 1), $(esc(name))) && $(esc(:(@goto __done__)))
-    end
-end
-
-matchpass(optimize_until::Int, stage, _) = optimize_until == stage
-matchpass(optimize_until::String, _, name) = optimize_until == name
-matchpass(::Nothing, _, _) = false
-
-function run_passes(
-    ci::CodeInfo,
-    sv::OptimizationState,
-    caller::InferenceResult,
-    optimize_until = nothing,  # run all passes by default
-)
-    __stage__ = 0  # used by @pass
-    # NOTE: The pass name MUST be unique for `optimize_until::AbstractString` to work
-    @pass "convert"   ir = convert_to_ircode(ci, sv)
-    @pass "slot2reg"  ir = slot2reg(ir, ci, sv)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @pass "compact 1" ir = compact!(ir)
-    @pass "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
-    # @timeit "verify 2" verify_ir(ir)
-    @pass "compact 2" ir = compact!(ir)
-    @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
-    @pass "ADCE"      ir = adce_pass!(ir, sv.inlining)
-    @pass "compact 3" ir = compact!(ir)
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    @label __done__  # used by @pass
-    return ir
-end
-
-function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
-    linetable = ci.linetable
-    if !isa(linetable, Vector{LineInfoNode})
-        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
-    end
-
-    # Go through and add an unreachable node after every
-    # Union{} call. Then reindex labels.
-    code = copy_exprargs(ci.code)
-    stmtinfo = sv.stmt_info
-    codelocs = ci.codelocs
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    ssaflags = ci.ssaflags
-    meta = Expr[]
-    idx = 1
-    oldidx = 1
-    nstmts = length(code)
-    ssachangemap = labelchangemap = nothing
-    prevloc = zero(eltype(ci.codelocs))
-    while idx <= length(code)
-        codeloc = codelocs[idx]
-        if sv.insert_coverage && codeloc != prevloc && codeloc != 0
-            # insert a side-effect instruction before the current instruction in the same basic block
-            insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(codelocs, idx, codeloc)
-            insert!(ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, NoCallInfo())
-            insert!(ssaflags, idx, IR_FLAG_NULL)
-            if ssachangemap === nothing
-                ssachangemap = fill(0, nstmts)
-            end
-            if labelchangemap === nothing
-                labelchangemap = fill(0, nstmts)
-            end
-            ssachangemap[oldidx] += 1
-            if oldidx < length(labelchangemap)
-                labelchangemap[oldidx + 1] += 1
-            end
-            idx += 1
-            prevloc = codeloc
-        end
-        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
-            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
-                # insert unreachable in the same basic block after the current instruction (splitting it)
-                insert!(code, idx + 1, ReturnNode())
-                insert!(codelocs, idx + 1, codelocs[idx])
-                insert!(ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, NoCallInfo())
-                insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
-                if ssachangemap === nothing
-                    ssachangemap = fill(0, nstmts)
-                end
-                if labelchangemap === nothing
-                    labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
-                end
-                if oldidx < length(ssachangemap)
-                    ssachangemap[oldidx + 1] += 1
-                    sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
-                end
-                idx += 1
-            end
-        end
-        idx += 1
-        oldidx += 1
-    end
-
-    cfg = sv.cfg
-    if ssachangemap !== nothing && labelchangemap !== nothing
-        renumber_ir_elements!(code, ssachangemap, labelchangemap)
-        cfg = nothing # recompute CFG
-    end
-
-    for i = 1:length(code)
-        code[i] = process_meta!(meta, code[i])
-    end
-    strip_trailing_junk!(ci, code, stmtinfo)
-    types = Any[]
-    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
-    if cfg === nothing
-        cfg = compute_basic_blocks(code)
-    end
-    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
-    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
-    # and eliminates slots (see below)
-    argtypes = sv.slottypes
-    return IRCode(stmts, cfg, linetable, argtypes, meta, sv.sptypes)
-end
-
-function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
-    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
-        push!(meta, stmt)
-        return nothing
-    end
-    return stmt
-end
-
-function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
-    # need `ci` for the slot metadata, IR for the code
-    svdef = sv.linfo.def
-    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes, 𝕃ₒ) # consumes `ir`
-    # NOTE now we have converted `ir` to the SSA form and eliminated slots
-    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
-    resize!(ir.argtypes, nargs)
-    return ir
-end
-
-## Computing the cost of a function body
-
-# saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
-plus_saturate(x::Int, y::Int) = max(x, y, x+y)
-
-# known return type
-isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
-
-function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                        union_penalties::Bool, params::OptimizationParams, error_path::Bool = false)
-    head = ex.head
-    if is_meta_expr_head(head)
-        return 0
-    elseif head === :call
-        farg = ex.args[1]
-        ftyp = argextype(farg, src, sptypes)
-        if ftyp === IntrinsicFunction && farg isa SSAValue
-            # if this comes from code that was already inlined into another function,
-            # Consts have been widened. try to recover in simple cases.
-            farg = isa(src, CodeInfo) ? src.code[farg.id] : src.stmts[farg.id][:inst]
-            if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
-                ftyp = argextype(farg, src, sptypes)
-            end
-        end
-        f = singleton_type(ftyp)
-        if isa(f, IntrinsicFunction)
-            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
-            if !isassigned(T_IFUNC_COST, iidx)
-                # unknown/unhandled intrinsic
-                return params.inline_nonleaf_penalty
-            end
-            return T_IFUNC_COST[iidx]
-        end
-        if isa(f, Builtin) && f !== invoke
-            # The efficiency of operations like a[i] and s.b
-            # depend strongly on whether the result can be
-            # inferred, so check the type of ex
-            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
-                # we might like to penalize non-inferrability, but
-                # tuple iteration/destructuring makes that impossible
-                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
-                return 0
-            elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
-                return 1
-            elseif f === Core.isa
-                # If we're in a union context, we penalize type computations
-                # on union types. In such cases, it is usually better to perform
-                # union splitting on the outside.
-                if union_penalties && isa(argextype(ex.args[2],  src, sptypes), Union)
-                    return params.inline_nonleaf_penalty
-                end
-            end
-            fidx = find_tfunc(f)
-            if fidx === nothing
-                # unknown/unhandled builtin
-                # Use the generic cost of a direct function call
-                return 20
-            end
-            return T_FFUNC_COST[fidx]
-        end
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
-        if extyp === Union{}
-            return 0
-        end
-        return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke || head === :invoke_modify
-        # Calls whose "return type" is Union{} do not actually return:
-        # they are errors. Since these are not part of the typical
-        # run-time of the function, we omit them from
-        # consideration. This way, non-inlined error branches do not
-        # prevent inlining.
-        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
-        return extyp === Union{} ? 0 : 20
-    elseif head === :(=)
-        if ex.args[1] isa GlobalRef
-            cost = 20
-        else
-            cost = 0
-        end
-        a = ex.args[2]
-        if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params, error_path))
-        end
-        return cost
-    elseif head === :copyast
-        return 100
-    elseif head === :enter
-        # try/catch is a couple function calls,
-        # but don't inline functions with try/catch
-        # since these aren't usually performance-sensitive functions,
-        # and llvm is more likely to miscompile them when these functions get large
-        return typemax(Int)
-    end
-    return 0
-end
-
-function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                                  union_penalties::Bool, params::OptimizationParams)
-    thiscost = 0
-    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
-    if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params,
-                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
-    elseif stmt isa GotoNode
-        # loops are generally always expensive
-        # but assume that forward jumps are already counted for from
-        # summing the cost of the not-taken branch
-        thiscost = dst(stmt.label) < line ? 40 : 0
-    elseif stmt isa GotoIfNot
-        thiscost = dst(stmt.dest) < line ? 40 : 0
-    end
-    return thiscost
-end
-
-function inline_cost(ir::IRCode, params::OptimizationParams, union_penalties::Bool=false,
-                       cost_threshold::Integer=params.inline_cost_threshold)::InlineCostType
-    bodycost::Int = 0
-    for line = 1:length(ir.stmts)
-        stmt = ir.stmts[line][:inst]
-        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, union_penalties, params)
-        bodycost = plus_saturate(bodycost, thiscost)
-        bodycost > cost_threshold && return MAX_INLINE_COST
-    end
-    return inline_cost_clamp(bodycost)
-end
-
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, unionpenalties::Bool, params::OptimizationParams)
-    maxcost = 0
-    for line = 1:length(body)
-        stmt = body[line]
-        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
-                                            unionpenalties, params)
-        cost[line] = thiscost
-        if thiscost > maxcost
-            maxcost = thiscost
-        end
-    end
-    return maxcost
-end
-
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int})
-    return renumber_ir_elements!(body, ssachangemap, ssachangemap)
-end
-
-function cumsum_ssamap!(ssachangemap::Vector{Int})
-    any_change = false
-    rel_change = 0
-    for i = 1:length(ssachangemap)
-        val = ssachangemap[i]
-        any_change |= val ≠ 0
-        rel_change += val
-        if val == -1
-            # Keep a marker that this statement was deleted
-            ssachangemap[i] = typemin(Int)
-        else
-            ssachangemap[i] = rel_change
-        end
-    end
-    return any_change
-end
-
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
-    any_change = cumsum_ssamap!(labelchangemap)
-    if ssachangemap !== labelchangemap
-        any_change |= cumsum_ssamap!(ssachangemap)
-    end
-    any_change || return
-    for i = 1:length(body)
-        el = body[i]
-        if isa(el, GotoNode)
-            body[i] = GotoNode(el.label + labelchangemap[el.label])
-        elseif isa(el, GotoIfNot)
-            cond = el.cond
-            if isa(cond, SSAValue)
-                cond = SSAValue(cond.id + ssachangemap[cond.id])
-            end
-            was_deleted = labelchangemap[el.dest] == typemin(Int)
-            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
-        elseif isa(el, ReturnNode)
-            if isdefined(el, :val)
-                val = el.val
-                if isa(val, SSAValue)
-                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
-                end
-            end
-        elseif isa(el, SSAValue)
-            body[i] = SSAValue(el.id + ssachangemap[el.id])
-        elseif isa(el, PhiNode)
-            i = 1
-            edges = el.edges
-            values = el.values
-            while i <= length(edges)
-                was_deleted = ssachangemap[edges[i]] == typemin(Int)
-                if was_deleted
-                    deleteat!(edges, i)
-                    deleteat!(values, i)
-                else
-                    edges[i] += ssachangemap[edges[i]]
-                    val = values[i]
-                    if isa(val, SSAValue)
-                        values[i] = SSAValue(val.id + ssachangemap[val.id])
-                    end
-                    i += 1
-                end
-            end
-        elseif isa(el, Expr)
-            if el.head === :(=) && el.args[2] isa Expr
-                el = el.args[2]::Expr
-            end
-            if el.head === :enter
-                tgt = el.args[1]::Int
-                el.args[1] = tgt + labelchangemap[tgt]
-            elseif !is_meta_expr_head(el.head)
-                args = el.args
-                for i = 1:length(args)
-                    el = args[i]
-                    if isa(el, SSAValue)
-                        args[i] = SSAValue(el.id + ssachangemap[el.id])
-                    end
-                end
-            end
-        end
-    end
-end
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 26b0e5b404641..0000000000000
--- a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,159 +0,0 @@
-# TODO this file contains many duplications with the inlining analysis code, factor them out
-
-import Core.Compiler:
-    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult,
-    SemiConcreteResult, CallInfo, NoCallInfo, MethodResultPure, MethodMatchInfo,
-    UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
-    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction,
-    validate_sparams, specialize_method, invoke_rewrite
-
-const Linfo = Union{MethodInstance,InferenceResult}
-struct EACallInfo
-    linfos::Vector{Linfo}
-    nothrow::Bool
-end
-
-function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo))
-    # TODO: if effect free, return true
-    sig = call_sig(ir, stmt)
-    if sig === nothing
-        return missing
-    end
-    # TODO handle _apply_iterate
-    if is_builtin(𝕃ₒ, sig) && sig.f !== invoke
-        return false
-    end
-    # handling corresponding to late_inline_special_case!
-    (; f, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
-        return true
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
-        return true
-    elseif f === TypeVar && 2 ≤ length(argtypes) ≤ 4 && (argtypes[2] ⊑ Symbol)
-        return true
-    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
-        return true
-    elseif is_return_type(f)
-        return true
-    end
-    if info isa MethodResultPure
-        return true
-    elseif info === NoCallInfo
-        return missing
-    end
-    # TODO handle OpaqueClosureCallInfo
-    if sig.f === invoke
-        isa(info, InvokeCallInfo) || return missing
-        return analyze_invoke_call(sig, info)
-    elseif isa(info, ConstCallInfo)
-        return analyze_const_call(sig, info)
-    elseif isa(info, MethodMatchInfo)
-        infos = MethodMatchInfo[info]
-    elseif isa(info, UnionSplitInfo)
-        infos = info.matches
-    else # isa(info, ReturnTypeCallInfo), etc.
-        return missing
-    end
-    return analyze_call(sig, infos)
-end
-
-function analyze_invoke_call(sig::Signature, info::InvokeCallInfo)
-    match = info.match
-    if !match.fully_covers
-        # TODO: We could union split out the signature check and continue on
-        return missing
-    end
-    result = info.result
-    if isa(result, ConstPropResult)
-        return EACallInfo(Linfo[result.result], true)
-    elseif isa(result, ConcreteResult)
-        return EACallInfo(Linfo[result.mi], true)
-    elseif isa(result, SemiConcreteResult)
-        return EACallInfo(Linfo[result.mi], true)
-    else
-        argtypes = invoke_rewrite(sig.argtypes)
-        mi = analyze_match(match, length(argtypes))
-        mi === nothing && return missing
-        return EACallInfo(Linfo[mi], true)
-    end
-end
-
-function analyze_const_call(sig::Signature, cinfo::ConstCallInfo)
-    linfos = Linfo[]
-    (; call, results) = cinfo
-    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
-    local nothrow = true # required to account for potential escape via MethodError
-    local j = 0
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            j += 1
-            result = results[j]
-            match = Core.Compiler.getindex(meth, i)
-            if result === nothing
-                mi = analyze_match(match, length(sig.argtypes))
-                mi === nothing && return missing
-                push!(linfos, mi)
-            elseif isa(result, ConcreteResult)
-                # TODO we may want to feedback information that this call always throws if !isdefined(result, :result)
-                push!(linfos, result.mi)
-            elseif isa(result, SemiConcreteResult)
-                push!(linfos, result.mi)
-            elseif isa(result, ConstPropResult)
-                push!(linfos, result.result)
-            end
-            nothrow &= match.fully_covers
-        end
-    end
-    return EACallInfo(linfos, nothrow)
-end
-
-function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
-    linfos = Linfo[]
-    local nothrow = true # required to account for potential escape via MethodError
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            match = Core.Compiler.getindex(meth, i)
-            mi = analyze_match(match, length(sig.argtypes))
-            mi === nothing && return missing
-            push!(linfos, mi)
-            nothrow &= match.fully_covers
-        end
-    end
-    return EACallInfo(linfos, nothrow)
-end
-
-function analyze_match(match::MethodMatch, npassedargs::Int)
-    method = match.method
-    na = Int(method.nargs)
-    if na != npassedargs && !(na > 0 && method.isva)
-        # we have a method match only because an earlier
-        # inference step shortened our call args list, even
-        # though we have too many arguments to actually
-        # call this function
-        return nothing
-    end
-
-    # Bail out if any static parameters are left as TypeVar
-    # COMBAK is this needed for escape analysis?
-    validate_sparams(match.sparams) || return nothing
-
-    # See if there exists a specialization for this method signature
-    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
-    return mi
-end
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
deleted file mode 100644
index 1946a76714e57..0000000000000
--- a/base/compiler/ssair/driver.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if false
-    import Base: Base, @show
-else
-    macro show(ex...)
-        blk = Expr(:block)
-        for s in ex
-            push!(blk.args, :(println(stdout, $(QuoteNode(s)), " = ",
-                                              begin local value = $(esc(s)) end)))
-        end
-        isempty(ex) || push!(blk.args, :value)
-        blk
-    end
-end
-
-include("compiler/ssair/heap.jl")
-include("compiler/ssair/slot2ssa.jl")
-include("compiler/ssair/inlining.jl")
-include("compiler/ssair/verify.jl")
-include("compiler/ssair/legacy.jl")
-include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
-include("compiler/ssair/passes.jl")
-include("compiler/ssair/irinterp.jl")
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
deleted file mode 100644
index f4c04ea4e1380..0000000000000
--- a/base/compiler/ssair/irinterp.jl
+++ /dev/null
@@ -1,388 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
-    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion"
-    return typ
-end
-
-function concrete_eval_invoke(interp::AbstractInterpreter,
-    inst::Expr, mi::MethodInstance, irsv::IRInterpretationState)
-    world = frame_world(irsv)
-    mi_cache = WorldView(code_cache(interp), world)
-    code = get(mi_cache, mi, nothing)
-    code === nothing && return Pair{Any,Bool}(nothing, false)
-    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv)
-    argtypes === nothing && return Pair{Any,Bool}(Bottom, false)
-    effects = decode_effects(code.ipo_purity_bits)
-    if is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1)
-        args = collect_const_args(argtypes, #=start=#1)
-        value = let world = get_world_counter(interp)
-            try
-                Core._call_in_world_total(world, args...)
-            catch
-                return Pair{Any,Bool}(Bottom, false)
-            end
-        end
-        return Pair{Any,Bool}(Const(value), true)
-    else
-        if is_constprop_edge_recursed(mi, irsv)
-            return Pair{Any,Bool}(nothing, is_nothrow(effects))
-        end
-        newirsv = IRInterpretationState(interp, code, mi, argtypes, world)
-        if newirsv !== nothing
-            newirsv.parent = irsv
-            return ir_abstract_constant_propagation(interp, newirsv)
-        end
-        return Pair{Any,Bool}(nothing, is_nothrow(effects))
-    end
-end
-
-abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
-
-function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
-    return abstract_eval_phi(interp, phi, nothing, irsv)
-end
-
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
-    si = StmtInfo(true) # TODO better job here?
-    (; rt, effects, info) = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = info
-    return RTEffects(rt, effects)
-end
-
-function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
-    ir = irsv.ir
-    if length(ir.cfg.blocks[to].preds) == 0
-        # Kill the entire block
-        for bidx = ir.cfg.blocks[to].stmts
-            ir.stmts[bidx][:inst] = nothing
-            ir.stmts[bidx][:type] = Bottom
-            ir.stmts[bidx][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        end
-        return
-    end
-    for sidx = ir.cfg.blocks[to].stmts
-        sinst = ir.stmts[sidx][:inst]
-        isa(sinst, Nothing) && continue # allowed between `PhiNode`s
-        isa(sinst, PhiNode) || break
-        for (eidx, edge) in enumerate(sinst.edges)
-            if edge == from
-                deleteat!(sinst.edges, eidx)
-                deleteat!(sinst.values, eidx)
-                push!(irsv.ssa_refined, sidx)
-                break
-            end
-        end
-    end
-end
-update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
-
-function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
-    ir = irsv.ir
-    inst = ir[SSAValue(term_idx)][:inst]
-    if isa(inst, GotoIfNot)
-        kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
-        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-    elseif isa(inst, GotoNode)
-        kill_edge!(ir, bb, inst.label, update_phi!(irsv))
-    elseif isa(inst, ReturnNode)
-        # Nothing to do
-    else
-        @assert !isexpr(inst, :enter)
-        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-    end
-end
-
-function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union{Int,Nothing},
-    @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState)
-    ir = irsv.ir
-    if isa(inst, GotoIfNot)
-        cond = inst.cond
-        condval = maybe_extract_const_bool(argextype(cond, ir))
-        if condval isa Bool
-            if isa(cond, SSAValue)
-                kill_def_use!(irsv.tpdum, cond, idx)
-            end
-            if bb === nothing
-                bb = block_for_inst(ir, idx)
-            end
-            ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-            if condval
-                ir.stmts[idx][:inst] = nothing
-                ir.stmts[idx][:type] = Any
-                kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
-            else
-                ir.stmts[idx][:inst] = GotoNode(inst.dest)
-                kill_edge!(ir, bb, bb+1, update_phi!(irsv))
-            end
-            return true
-        end
-        return false
-    end
-    rt = nothing
-    if isa(inst, Expr)
-        head = inst.head
-        if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined
-            (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, irsv)
-            ir.stmts[idx][:flag] |= flags_for_effects(effects)
-        elseif head === :invoke
-            rt, nothrow = concrete_eval_invoke(interp, inst, inst.args[1]::MethodInstance, irsv)
-            if nothrow
-                ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-            end
-        elseif head === :throw_undef_if_not
-            condval = maybe_extract_const_bool(argextype(inst.args[2], ir))
-            condval isa Bool || return false
-            if condval
-                ir.stmts[idx][:inst] = nothing
-                # We simplified the IR, but we did not update the type
-                return false
-            end
-            rt = Union{}
-        elseif head === :gc_preserve_begin ||
-               head === :gc_preserve_end
-            return false
-        else
-            error("reprocess_instruction!: unhandled expression found")
-        end
-    elseif isa(inst, PhiNode)
-        rt = abstract_eval_phi_stmt(interp, inst, idx, irsv)
-    elseif isa(inst, ReturnNode)
-        # Handled at the very end
-        return false
-    elseif isa(inst, PiNode)
-        rt = tmeet(typeinf_lattice(interp), argextype(inst.val, ir), widenconst(inst.typ))
-    elseif inst === nothing
-        return false
-    elseif isa(inst, GlobalRef)
-        # GlobalRef is not refinable
-    else
-        rt = argextype(inst, irsv.ir)
-    end
-    if rt !== nothing
-        if isa(rt, Const)
-            ir.stmts[idx][:type] = rt
-            if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-                ir.stmts[idx][:inst] = quoted(rt.val)
-            end
-            return true
-        elseif !⊑(typeinf_lattice(interp), typ, rt)
-            ir.stmts[idx][:type] = rt
-            return true
-        end
-    end
-    return false
-end
-
-# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
-function process_terminator!(ir::IRCode, @nospecialize(inst), idx::Int, bb::Int,
-    all_rets::Vector{Int}, bb_ip::BitSetBoundedMinPrioritySet)
-    if isa(inst, ReturnNode)
-        if isdefined(inst, :val)
-            push!(all_rets, idx)
-        end
-        return false
-    elseif isa(inst, GotoNode)
-        backedge = inst.label <= bb
-        backedge || push!(bb_ip, inst.label)
-        return backedge
-    elseif isa(inst, GotoIfNot)
-        backedge = inst.dest <= bb
-        backedge || push!(bb_ip, inst.dest)
-        push!(bb_ip, bb+1)
-        return backedge
-    elseif isexpr(inst, :enter)
-        dest = inst.args[1]::Int
-        @assert dest > bb
-        push!(bb_ip, dest)
-        push!(bb_ip, bb+1)
-        return false
-    else
-        push!(bb_ip, bb+1)
-        return false
-    end
-end
-
-function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
-        externally_refined::Union{Nothing,BitSet} = nothing)
-    interp = switch_to_irinterp(interp)
-
-    (; ir, tpdum, ssa_refined) = irsv
-
-    bbs = ir.cfg.blocks
-    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
-    push!(bb_ip, 1)
-    all_rets = Int[]
-
-    # Fast path: Scan both use counts and refinement in one single pass of
-    #            of the instructions. In the absence of backedges, this will
-    #            converge.
-    while !isempty(bb_ip)
-        bb = popfirst!(bb_ip)
-        stmts = bbs[bb].stmts
-        lstmt = last(stmts)
-        for idx = stmts
-            irsv.curridx = idx
-            inst = ir.stmts[idx][:inst]
-            typ = ir.stmts[idx][:type]
-            flag = ir.stmts[idx][:flag]
-            any_refined = false
-            if (flag & IR_FLAG_REFINED) != 0
-                any_refined = true
-                ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED
-            end
-            for ur in userefs(inst)
-                val = ur[]
-                if isa(val, Argument)
-                    any_refined |= irsv.argtypes_refined[val.n]
-                elseif isa(val, SSAValue)
-                    any_refined |= val.id in ssa_refined
-                    count!(tpdum, val)
-                end
-            end
-            if isa(inst, PhiNode) && idx in ssa_refined
-                any_refined = true
-                delete!(ssa_refined, idx)
-            end
-            is_terminator_or_phi = isa(inst, PhiNode) || isa(inst, GotoNode) || isa(inst, GotoIfNot) || isa(inst, ReturnNode) || isexpr(inst, :enter)
-            if typ === Bottom && (idx != lstmt || !is_terminator_or_phi)
-                continue
-            end
-            if (any_refined && reprocess_instruction!(interp,
-                    idx, bb, inst, typ, irsv)) ||
-               (externally_refined !== nothing && idx in externally_refined)
-                push!(ssa_refined, idx)
-                inst = ir.stmts[idx][:inst]
-                typ = ir.stmts[idx][:type]
-            end
-            if typ === Bottom && !is_terminator_or_phi
-                kill_terminator_edges!(irsv, lstmt, bb)
-                if idx != lstmt
-                    for idx2 in (idx+1:lstmt-1)
-                        ir[SSAValue(idx2)] = nothing
-                    end
-                    ir[SSAValue(lstmt)][:inst] = ReturnNode()
-                end
-                break
-            end
-            if idx == lstmt
-                process_terminator!(ir, inst, idx, bb, all_rets, bb_ip) && @goto residual_scan
-            end
-        end
-    end
-    @goto compute_rt
-
-    # Slow path
-    begin @label residual_scan
-        stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
-
-        # Slow Path Phase 1.A: Complete use scanning
-        while !isempty(bb_ip)
-            bb = popfirst!(bb_ip)
-            stmts = bbs[bb].stmts
-            lstmt = last(stmts)
-            for idx = stmts
-                irsv.curridx = idx
-                inst = ir.stmts[idx][:inst]
-                flag = ir.stmts[idx][:flag]
-                if (flag & IR_FLAG_REFINED) != 0
-                    ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED
-                    push!(stmt_ip, idx)
-                end
-                for ur in userefs(inst)
-                    val = ur[]
-                    if isa(val, Argument)
-                        if irsv.argtypes_refined[val.n]
-                            push!(stmt_ip, idx)
-                        end
-                    elseif isa(val, SSAValue)
-                        count!(tpdum, val)
-                    end
-                end
-                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
-            end
-        end
-
-        # Slow Path Phase 1.B: Assemble def-use map
-        complete!(tpdum)
-        push!(bb_ip, 1)
-        while !isempty(bb_ip)
-            bb = popfirst!(bb_ip)
-            stmts = bbs[bb].stmts
-            lstmt = last(stmts)
-            for idx = stmts
-                irsv.curridx = idx
-                inst = ir.stmts[idx][:inst]
-                for ur in userefs(inst)
-                    val = ur[]
-                    if isa(val, SSAValue)
-                        push!(tpdum[val.id], idx)
-                    end
-                end
-                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
-            end
-        end
-
-        # Slow Path Phase 2: Use def-use map to converge cycles.
-        # TODO: It would be possible to return to the fast path after converging
-        #       each cycle, but that's somewhat complicated.
-        for val in ssa_refined
-            append!(stmt_ip, tpdum[val])
-        end
-        while !isempty(stmt_ip)
-            idx = popfirst!(stmt_ip)
-            irsv.curridx = idx
-            inst = ir.stmts[idx][:inst]
-            typ = ir.stmts[idx][:type]
-            if reprocess_instruction!(interp,
-                idx, nothing, inst, typ, irsv)
-                append!(stmt_ip, tpdum[idx])
-            end
-        end
-    end
-
-    begin @label compute_rt
-        ultimate_rt = Bottom
-        for idx in all_rets
-            bb = block_for_inst(ir.cfg, idx)
-            if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
-                # Could have discovered this block is dead after the initial scan
-                continue
-            end
-            inst = ir.stmts[idx][:inst]::ReturnNode
-            rt = argextype(inst.val, ir)
-            ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
-        end
-    end
-
-    nothrow = true
-    for idx = 1:length(ir.stmts)
-        if (ir.stmts[idx][:flag] & IR_FLAG_NOTHROW) == 0
-            nothrow = false
-            break
-        end
-    end
-
-    if last(irsv.valid_worlds) >= get_world_counter()
-        # if we aren't cached, we don't need this edge
-        # but our caller might, so let's just make it anyways
-        store_backedges(frame_instance(irsv), irsv.edges)
-    end
-
-    return Pair{Any,Bool}(maybe_singleton_const(ultimate_rt), nothrow)
-end
-
-function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
-    if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
-        Timings.enter_new_timer(inf_frame)
-        ret = _ir_abstract_constant_propagation(interp, irsv)
-        append!(inf_frame.slottypes, irsv.ir.argtypes)
-        Timings.exit_current_timer(inf_frame)
-        return ret
-    else
-        return _ir_abstract_constant_propagation(interp, irsv)
-    end
-end
-ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
-    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
deleted file mode 100644
index 9f55d56181838..0000000000000
--- a/base/compiler/stmtinfo.jl
+++ /dev/null
@@ -1,225 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-@nospecialize
-
-"""
-    call::CallMeta
-
-A simple struct that captures both the return type (`call.rt`)
-and any additional information (`call.info`) for a given generic call.
-"""
-struct CallMeta
-    rt::Any
-    effects::Effects
-    info::CallInfo
-end
-
-struct NoCallInfo <: CallInfo end
-
-"""
-    info::MethodMatchInfo <: CallInfo
-
-Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
-This info may then be used by the optimizer to inline the matches, without having
-to re-consult the method table. This info is illegal on any statement that is
-not a call to a generic function.
-"""
-struct MethodMatchInfo <: CallInfo
-    results::MethodLookupResult
-end
-nsplit_impl(info::MethodMatchInfo) = 1
-getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
-getresult_impl(::MethodMatchInfo, ::Int) = nothing
-
-"""
-    info::UnionSplitInfo <: CallInfo
-
-If inference decides to partition the method search space by splitting unions,
-it will issue a method lookup query for each such partition. This info indicates
-that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
-each partition (`info.matches::Vector{MethodMatchInfo}`).
-This info is illegal on any statement that is not a call to a generic function.
-"""
-struct UnionSplitInfo <: CallInfo
-    matches::Vector{MethodMatchInfo}
-end
-
-nmatches(info::MethodMatchInfo) = length(info.results)
-function nmatches(info::UnionSplitInfo)
-    n = 0
-    for mminfo in info.matches
-        n += nmatches(mminfo)
-    end
-    return n
-end
-nsplit_impl(info::UnionSplitInfo) = length(info.matches)
-getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
-getresult_impl(::UnionSplitInfo, ::Int) = nothing
-
-abstract type ConstResult end
-
-struct ConstPropResult <: ConstResult
-    result::InferenceResult
-end
-
-struct ConcreteResult <: ConstResult
-    mi::MethodInstance
-    effects::Effects
-    result
-    ConcreteResult(mi::MethodInstance, effects::Effects) = new(mi, effects)
-    ConcreteResult(mi::MethodInstance, effects::Effects, @nospecialize val) = new(mi, effects, val)
-end
-
-struct SemiConcreteResult <: ConstResult
-    mi::MethodInstance
-    ir::IRCode
-    effects::Effects
-end
-
-"""
-    info::ConstCallInfo <: CallInfo
-
-The precision of this call was improved using constant information.
-In addition to the original call information `info.call`, this info also keeps the results
-of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
-"""
-struct ConstCallInfo <: CallInfo
-    call::Union{MethodMatchInfo,UnionSplitInfo}
-    results::Vector{Union{Nothing,ConstResult}}
-end
-nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
-getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
-getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
-
-"""
-    info::MethodResultPure <: CallInfo
-
-This struct represents a method result constant was proven to be
-effect-free, including being no-throw (typically because the value was computed
-by calling an `@pure` function).
-"""
-struct MethodResultPure <: CallInfo
-    info::CallInfo
-end
-let instance = MethodResultPure(NoCallInfo())
-    global MethodResultPure
-    MethodResultPure() = instance
-end
-
-"""
-    ainfo::AbstractIterationInfo
-
-Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
-"""
-struct AbstractIterationInfo
-    each::Vector{CallMeta}
-    complete::Bool
-end
-
-const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
-
-"""
-    info::ApplyCallInfo <: CallInfo
-
-This info applies to any call of `_apply_iterate(...)` and captures both the
-info of the actual call being applied and the info for any implicit call
-to the `iterate` function. Note that it is possible for the call itself
-to be yet another `_apply_iterate`, in which case the `info.call` field will
-be another `ApplyCallInfo`. This info is illegal on any statement that is
-not an `_apply_iterate` call.
-"""
-struct ApplyCallInfo <: CallInfo
-    # The info for the call itself
-    call::Any
-    # AbstractIterationInfo for each argument, if applicable
-    arginfo::Vector{MaybeAbstractIterationInfo}
-end
-
-"""
-    info::UnionSplitApplyCallInfo <: CallInfo
-
-Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
-This info is illegal on any statement that is not an `_apply_iterate` call.
-"""
-struct UnionSplitApplyCallInfo <: CallInfo
-    infos::Vector{ApplyCallInfo}
-end
-
-"""
-    info::InvokeCallInfo
-
-Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
-the method that has been processed.
-Optionally keeps `info.result::InferenceResult` that keeps constant information.
-"""
-struct InvokeCallInfo <: CallInfo
-    match::MethodMatch
-    result::Union{Nothing,ConstResult}
-end
-
-"""
-    info::OpaqueClosureCallInfo
-
-Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
-the method that has been processed.
-Optionally keeps `info.result::InferenceResult` that keeps constant information.
-"""
-struct OpaqueClosureCallInfo <: CallInfo
-    match::MethodMatch
-    result::Union{Nothing,ConstResult}
-end
-
-"""
-    info::OpaqueClosureCreateInfo <: CallInfo
-
-This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
-carrying out inference result of an unreal, partially specialized call (i.e. specialized on
-the closure environment, but not on the argument types of the opaque closure) in order to
-allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
-"""
-struct OpaqueClosureCreateInfo <: CallInfo
-    unspec::CallMeta
-    function OpaqueClosureCreateInfo(unspec::CallMeta)
-        @assert isa(unspec.info, OpaqueClosureCallInfo)
-        return new(unspec)
-    end
-end
-
-# Stmt infos that are used by external consumers, but not by optimization.
-# These are not produced by default and must be explicitly opted into by
-# the AbstractInterpreter.
-
-"""
-    info::ReturnTypeCallInfo <: CallInfo
-
-Represents a resolved call of `Core.Compiler.return_type`.
-`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
-was supposed to analyze.
-"""
-struct ReturnTypeCallInfo <: CallInfo
-    info::CallInfo
-end
-
-"""
-    info::FinalizerInfo <: CallInfo
-
-Represents the information of a potential (later) call to the finalizer on the given
-object type.
-"""
-struct FinalizerInfo <: CallInfo
-    info::CallInfo   # the callinfo for the finalizer call
-    effects::Effects # the effects for the finalizer call
-end
-
-"""
-    info::ModifyFieldInfo <: CallInfo
-
-Represents a resolved all of `modifyfield!(obj, name, op, x, [order])`.
-`info.info` wraps the call information of `op(getfield(obj, name), x)`.
-"""
-struct ModifyFieldInfo <: CallInfo
-    info::CallInfo # the callinfo for the `op(getfield(obj, name), x)` call
-end
-
-@specialize
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
deleted file mode 100644
index 77e1fd02de8d0..0000000000000
--- a/base/compiler/typeinfer.jl
+++ /dev/null
@@ -1,1139 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Tracking of newly-inferred CodeInstances during precompilation
-const track_newly_inferred = RefValue{Bool}(false)
-const newly_inferred = CodeInstance[]
-
-# build (and start inferring) the inference frame for the top-level MethodInstance
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
-    frame = InferenceState(result, cache, interp)
-    frame === nothing && return false
-    cache === :global && lock_mi_inference(interp, result.linfo)
-    return typeinf(interp, frame)
-end
-
-"""
-The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
-can be used to measure the exclusive time spent inferring each method instance that is
-recursively inferred during type inference.
-
-This is meant to be internal to the compiler, and makes some specific assumptions about
-being used for this purpose alone.
-"""
-module Timings
-
-using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
-    @inbounds, copy, backtrace
-
-# What we record for any given frame we infer during type inference.
-struct InferenceFrameInfo
-    mi::Core.MethodInstance
-    world::UInt64
-    sptypes::Vector{Core.Compiler.VarState}
-    slottypes::Vector{Any}
-    nargs::Int
-end
-
-function _typeinf_identifier(frame::Core.Compiler.InferenceState)
-    mi_info = InferenceFrameInfo(
-        frame.linfo,
-        frame.world,
-        copy(frame.sptypes),
-        copy(frame.slottypes),
-        length(frame.result.argtypes),
-    )
-    return mi_info
-end
-
-_typeinf_identifier(frame::InferenceFrameInfo) = frame
-
-"""
-    Core.Compiler.Timing(mi_info, start_time, ...)
-
-Internal type containing the timing result for running type inference on a single
-MethodInstance.
-"""
-struct Timing
-    mi_info::InferenceFrameInfo
-    start_time::UInt64
-    cur_start_time::UInt64
-    time::UInt64
-    children::Core.Array{Timing,1}
-    bt         # backtrace collected upon initial entry to typeinf
-end
-Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
-Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
-
-_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
-
-# We keep a stack of the Timings for each of the MethodInstances currently being timed.
-# Since type inference currently operates via a depth-first search (during abstract
-# evaluation), this vector operates like a call stack. The last node in _timings is the
-# node currently being inferred, and its parent is directly before it, etc.
-# Each Timing also contains its own vector for all of its children, so that the tree
-# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
-# because we create a new node for duplicates.)
-const _timings = Timing[]
-# ROOT() is an empty function used as the top-level Timing node to measure all time spent
-# *not* in type inference during a given recording trace. It is used as a "dummy" node.
-function ROOT() end
-const ROOTmi = Core.Compiler.specialize_method(
-    first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
-"""
-    Core.Compiler.reset_timings()
-
-Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
-start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
-"""
-function reset_timings()
-    empty!(_timings)
-    push!(_timings, Timing(
-        # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
-        _time_ns()))
-    return nothing
-end
-reset_timings()
-
-# (This is split into a function so that it can be called both in this module, at the top
-# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
-# the operation and called `reset_timings()`.)
-# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
-# timer manipulations and the tasks we're timing.
-@inline function close_current_timer()
-    stop_time = _time_ns()
-    parent_timer = _timings[end]
-    accum_time = stop_time - parent_timer.cur_start_time
-
-    # Add in accum_time ("modify" the immutable struct)
-    @inbounds begin
-        _timings[end] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            parent_timer.cur_start_time,
-            parent_timer.time + accum_time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-    return nothing
-end
-
-@inline function enter_new_timer(frame)
-    # Very first thing, stop the active timer: get the current time and add in the
-    # time since it was last started to its aggregate exclusive time.
-    close_current_timer()
-
-    mi_info = _typeinf_identifier(frame)
-
-    # Start the new timer right before returning
-    push!(_timings, Timing(mi_info, UInt64(0)))
-    len = length(_timings)
-    new_timer = @inbounds _timings[len]
-    # Set the current time _after_ appending the node, to try to exclude the
-    # overhead from measurement.
-    start = _time_ns()
-
-    @inbounds begin
-        _timings[len] = Timing(
-            new_timer.mi_info,
-            start,
-            start,
-            new_timer.time,
-            new_timer.children,
-        )
-    end
-
-    return nothing
-end
-
-# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
-# assert that indeed we are always returning to a parent after finishing all of its
-# children (that is, asserting that inference proceeds via depth-first-search).
-@inline function exit_current_timer(_expected_frame_)
-    # Finish the new timer
-    stop_time = _time_ns()
-
-    expected_mi_info = _typeinf_identifier(_expected_frame_)
-
-    # Grab the new timer again because it might have been modified in _timings
-    # (since it's an immutable struct)
-    # And remove it from the current timings stack
-    new_timer = pop!(_timings)
-    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
-
-    # Prepare to unwind one level of the stack and record in the parent
-    parent_timer = _timings[end]
-
-    accum_time = stop_time - new_timer.cur_start_time
-    # Add in accum_time ("modify" the immutable struct)
-    new_timer = Timing(
-        new_timer.mi_info,
-        new_timer.start_time,
-        new_timer.cur_start_time,
-        new_timer.time + accum_time,
-        new_timer.children,
-        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
-    )
-    # Record the final timing with the original parent timer
-    push!(parent_timer.children, new_timer)
-
-    # And finally restart the parent timer:
-    len = length(_timings)
-    @inbounds begin
-        _timings[len] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            _time_ns(),
-            parent_timer.time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-
-    return nothing
-end
-
-end  # module Timings
-
-"""
-    Core.Compiler.__set_measure_typeinf(onoff::Bool)
-
-If set to `true`, record per-method-instance timings within type inference in the Compiler.
-"""
-__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
-const __measure_typeinf__ = fill(false)
-
-# Wrapper around `_typeinf` that optionally records the exclusive time for
-# each inference performed by `NativeInterpreter`.
-function typeinf(interp::NativeInterpreter, frame::InferenceState)
-    if __measure_typeinf__[]
-        Timings.enter_new_timer(frame)
-        v = _typeinf(interp, frame)
-        Timings.exit_current_timer(frame)
-        return v
-    else
-        return _typeinf(interp, frame)
-    end
-end
-typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
-
-function finish!(interp::AbstractInterpreter, caller::InferenceResult)
-    # If we didn't transform the src for caching, we may have to transform
-    # it anyway for users like typeinf_ext. Do that here.
-    opt = caller.src
-    if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
-        if opt.ir !== nothing
-            if caller.must_be_codeinf
-                caller.src = ir_to_codeinf!(opt)
-            elseif is_inlineable(opt.src)
-                # TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
-                # use this IR over and over, it's worth simplifying it. Round trips through
-                # CodeInstance do this implicitly, since they recompute the CFG, so try to
-                # match that behavior here.
-                # ir = cfg_simplify!(opt.ir)
-                caller.src = opt.ir
-            else
-                # Not cached and not inlineable - drop the ir
-                caller.src = nothing
-            end
-        end
-    end
-    return caller.src
-end
-
-function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    interp = switch_from_irinterp(interp)
-    typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
-    # with no active ip's, frame is done
-    frames = frame.callers_in_cycle
-    isempty(frames) && push!(frames, frame)
-    valid_worlds = WorldRange()
-    for caller in frames
-        @assert !(caller.dont_work_on_me)
-        caller.dont_work_on_me = true
-        # might might not fully intersect these earlier, so do that now
-        valid_worlds = intersect(caller.valid_worlds, valid_worlds)
-    end
-    for caller in frames
-        caller.valid_worlds = valid_worlds
-        finish(caller, interp)
-    end
-    # collect results for the new expanded frame
-    results = Tuple{InferenceResult, Vector{Any}, Bool}[
-            ( frames[i].result,
-              frames[i].stmt_edges[1]::Vector{Any},
-              frames[i].cached )
-        for i in 1:length(frames) ]
-    empty!(frames)
-    for (caller, _, _) in results
-        opt = caller.src
-        if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
-            optimize(interp, opt, caller)
-        end
-    end
-    for (caller, edges, cached) in results
-        valid_worlds = caller.valid_worlds
-        if last(valid_worlds) >= get_world_counter()
-            # if we aren't cached, we don't need this edge
-            # but our caller might, so let's just make it anyways
-            store_backedges(caller, edges)
-        end
-        if cached
-            cache_result!(interp, caller)
-        end
-        finish!(interp, caller)
-    end
-    return true
-end
-
-function CodeInstance(interp::AbstractInterpreter, result::InferenceResult,
-                      @nospecialize(inferred_result), valid_worlds::WorldRange)
-    local const_flags::Int32
-    result_type = result.result
-    @assert !(result_type === nothing || result_type isa LimitedAccuracy)
-
-    if isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
-        # use constant calling convention
-        rettype_const = result_type.val
-        const_flags = 0x3
-        if may_discard_trees(interp)
-            inferred_result = nothing
-        end
-    else
-        if isa(result_type, Const)
-            rettype_const = result_type.val
-            const_flags = 0x2
-        elseif isa(result_type, PartialOpaque)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isconstType(result_type)
-            rettype_const = result_type.parameters[1]
-            const_flags = 0x2
-        elseif isa(result_type, PartialStruct)
-            rettype_const = result_type.fields
-            const_flags = 0x2
-        elseif isa(result_type, InterConditional)
-            rettype_const = result_type
-            const_flags = 0x2
-        elseif isa(result_type, InterMustAlias)
-            rettype_const = result_type
-            const_flags = 0x2
-        else
-            rettype_const = nothing
-            const_flags = 0x00
-        end
-    end
-    relocatability = 0x0
-    if isa(inferred_result, String)
-        t = @_gc_preserve_begin inferred_result
-        relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
-        @_gc_preserve_end t
-    elseif inferred_result === nothing
-        relocatability = 0x1
-    end
-    # relocatability = isa(inferred_result, String) ? inferred_result[end] : UInt8(0)
-    return CodeInstance(result.linfo,
-        widenconst(result_type), rettype_const, inferred_result,
-        const_flags, first(valid_worlds), last(valid_worlds),
-        # TODO: Actually do something with non-IPO effects
-        encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
-        relocatability)
-end
-
-function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInstance, ci::CodeInfo)
-    def = linfo.def
-    toplevel = !isa(def, Method)
-    if toplevel
-        return ci
-    end
-    if may_discard_trees(interp)
-        cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def))
-    else
-        cache_the_tree = true
-    end
-    if cache_the_tree
-        if may_compress(interp)
-            nslots = length(ci.slotflags)
-            resize!(ci.slottypes::Vector{Any}, nslots)
-            resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
-        else
-            return ci
-        end
-    else
-        return nothing
-    end
-end
-
-function transform_result_for_cache(interp::AbstractInterpreter,
-    linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult)
-    inferred_result = result.src
-    if inferred_result isa OptimizationState{typeof(interp)}
-        # TODO respect must_be_codeinf setting here?
-        result.src = inferred_result = ir_to_codeinf!(inferred_result)
-    end
-    if inferred_result isa CodeInfo
-        inferred_result.min_world = first(valid_worlds)
-        inferred_result.max_world = last(valid_worlds)
-        inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
-    end
-    # The global cache can only handle objects that codegen understands
-    if !isa(inferred_result, MaybeCompressed)
-        inferred_result = nothing
-    end
-    return inferred_result
-end
-
-function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
-    valid_worlds = result.valid_worlds
-    if last(valid_worlds) == get_world_counter()
-        # if we've successfully recorded all of the backedges in the global reverse-cache,
-        # we can now widen our applicability in the global cache too
-        valid_worlds = WorldRange(first(valid_worlds), typemax(UInt))
-    end
-    # check if the existing linfo metadata is also sufficient to describe the current inference result
-    # to decide if it is worth caching this
-    linfo = result.linfo
-    already_inferred = already_inferred_quick_test(interp, linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
-        already_inferred = true
-    end
-
-    # TODO: also don't store inferred code if we've previously decided to interpret this function
-    if !already_inferred
-        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
-        code_cache(interp)[linfo] = ci = CodeInstance(interp, result, inferred_result, valid_worlds)
-        if track_newly_inferred[]
-            m = linfo.def
-            if isa(m, Method) && m.module != Core
-                ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
-            end
-        end
-    end
-    unlock_mi_inference(interp, linfo)
-    nothing
-end
-
-function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
-    if typ isa LimitedAccuracy
-        if sv.parent === nothing
-            # when part of a cycle, we might have unintentionally introduced a limit marker
-            @assert !isempty(sv.callers_in_cycle)
-            return typ.typ
-        end
-        causes = copy(typ.causes)
-        delete!(causes, sv)
-        for caller in sv.callers_in_cycle
-            delete!(causes, caller)
-        end
-        if isempty(causes)
-            return typ.typ
-        end
-        if length(causes) != length(typ.causes)
-            return LimitedAccuracy(typ.typ, causes)
-        end
-    end
-    return typ
-end
-
-function adjust_effects(sv::InferenceState)
-    ipo_effects = sv.ipo_effects
-
-    # refine :consistent-cy effect using the return type information
-    # TODO this adjustment tries to compromise imprecise :consistent-cy information,
-    # that is currently modeled in a flow-insensitive way: ideally we want to model it
-    # with a proper dataflow analysis instead
-    rt = sv.bestguess
-    if ipo_effects.noinbounds && rt === Bottom
-        # always throwing an error counts or never returning both count as consistent
-        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-    end
-    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
-            return is_mutation_free_argtype(sv.slottypes[i])
-        end
-        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
-    end
-    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
-        # in a case when the :consistent-cy here is only tainted by mutable allocations
-        # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
-        # type guarantees that the allocations are never returned
-        consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
-        ipo_effects = Effects(ipo_effects; consistent)
-    end
-    if is_consistent_if_inaccessiblememonly(ipo_effects)
-        if is_inaccessiblememonly(ipo_effects)
-            consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
-            ipo_effects = Effects(ipo_effects; consistent)
-        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
-        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
-            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
-        end
-    end
-    if is_effect_free_if_inaccessiblememonly(ipo_effects)
-        if is_inaccessiblememonly(ipo_effects)
-            effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
-            ipo_effects = Effects(ipo_effects; effect_free)
-        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
-        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
-            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
-        end
-    end
-
-    # override the analyzed effects using manually annotated effect settings
-    def = sv.linfo.def
-    if isa(def, Method)
-        override = decode_effects_override(def.purity)
-        if is_effect_overridden(override, :consistent)
-            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :effect_free)
-            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :nothrow)
-            ipo_effects = Effects(ipo_effects; nothrow=true)
-        end
-        if is_effect_overridden(override, :terminates_globally)
-            ipo_effects = Effects(ipo_effects; terminates=true)
-        end
-        if is_effect_overridden(override, :notaskstate)
-            ipo_effects = Effects(ipo_effects; notaskstate=true)
-        end
-        if is_effect_overridden(override, :inaccessiblememonly)
-            ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
-        end
-    end
-
-    return ipo_effects
-end
-
-# inference completed on `me`
-# update the MethodInstance
-function finish(me::InferenceState, interp::AbstractInterpreter)
-    # prepare to run optimization passes on fulltree
-    s_edges = me.stmt_edges[1]
-    if s_edges === nothing
-        s_edges = me.stmt_edges[1] = []
-    end
-    for edges in me.stmt_edges
-        edges === nothing && continue
-        edges === s_edges && continue
-        append!(s_edges, edges)
-        empty!(edges)
-    end
-    if me.src.edges !== nothing
-        append!(s_edges, me.src.edges::Vector)
-        me.src.edges = nothing
-    end
-    # inspect whether our inference had a limited result accuracy,
-    # else it may be suitable to cache
-    bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me)
-    limited_ret = bestguess isa LimitedAccuracy
-    limited_src = false
-    if !limited_ret
-        gt = me.ssavaluetypes
-        for j = 1:length(gt)
-            gt[j] = gtj = cycle_fix_limited(gt[j], me)
-            if gtj isa LimitedAccuracy && me.parent !== nothing
-                limited_src = true
-                break
-            end
-        end
-    end
-    if limited_ret
-        # a parent may be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
-        me.cached = false
-        set_inlineable!(me.src, false)
-        unlock_mi_inference(interp, me.linfo)
-    elseif limited_src
-        # a type result will be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
-        set_inlineable!(me.src, false)
-    else
-        # annotate fulltree with type information,
-        # either because we are the outermost code, or we might use this later
-        doopt = (me.cached || me.parent !== nothing)
-        recompute_cfg = type_annotate!(interp, me, doopt)
-        if doopt && may_optimize(interp)
-            me.result.src = OptimizationState(me, interp, recompute_cfg)
-        else
-            me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
-        end
-    end
-    me.result.valid_worlds = me.valid_worlds
-    me.result.result = bestguess
-    me.ipo_effects = me.result.ipo_effects = adjust_effects(me)
-    validate_code_in_debug_mode(me.linfo, me.src, "inferred")
-    nothing
-end
-
-# record the backedges
-function store_backedges(caller::InferenceResult, edges::Vector{Any})
-    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
-    return store_backedges(caller.linfo, edges)
-end
-
-function store_backedges(caller::MethodInstance, edges::Vector{Any})
-    for itr in BackedgeIterator(edges)
-        callee = itr.caller
-        if isa(callee, MethodInstance)
-            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
-        else
-            typeassert(callee, MethodTable)
-            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
-        end
-    end
-    return nothing
-end
-
-function record_slot_assign!(sv::InferenceState)
-    # look at all assignments to slots
-    # and union the set of types stored there
-    # to compute a lower bound on the storage required
-    body = sv.src.code::Vector{Any}
-    slottypes = sv.slottypes::Vector{Any}
-    ssavaluetypes = sv.ssavaluetypes
-    for i = 1:length(body)
-        expr = body[i]
-        # find all reachable assignments to locals
-        if was_reached(sv, i) && isexpr(expr, :(=))
-            lhs = expr.args[1]
-            if isa(lhs, SlotNumber)
-                typ = ssavaluetypes[i]
-                @assert typ !== NOT_FOUND "active slot in unreached region"
-                vt = widenconst(typ)
-                if vt !== Bottom
-                    id = slot_id(lhs)
-                    otherTy = slottypes[id]
-                    if otherTy === Bottom
-                        slottypes[id] = vt
-                    elseif otherTy === Any
-                        slottypes[id] = Any
-                    else
-                        slottypes[id] = tmerge(otherTy, vt)
-                    end
-                end
-            end
-        end
-    end
-    sv.src.slottypes = slottypes
-    return nothing
-end
-
-function record_bestguess!(sv::InferenceState)
-    bestguess = sv.bestguess
-    @assert !(bestguess isa LimitedAccuracy)
-    sv.src.rettype = bestguess
-    return nothing
-end
-
-function annotate_slot_load!(interp::AbstractInterpreter, undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
-    if isa(x, SlotNumber)
-        id = slot_id(x)
-        pc = find_dominating_assignment(id, idx, sv)
-        if pc === nothing
-            block = block_for_inst(sv.cfg, idx)
-            state = sv.bb_vartables[block]::VarTable
-            vt = state[id]
-            undefs[id] |= vt.undef
-            typ = widenslotwrapper(ignorelimited(vt.typ))
-        else
-            typ = sv.ssavaluetypes[pc]
-            @assert typ !== NOT_FOUND "active slot in unreached region"
-        end
-        # add type annotations where needed
-        if !⊑(typeinf_lattice(interp), sv.slottypes[id], typ)
-            return TypedSlot(id, typ)
-        end
-        return x
-    elseif isa(x, Expr)
-        head = x.head
-        i0 = 1
-        if is_meta_expr_head(head) || head === :const
-            return x
-        end
-        if head === :(=) || head === :method
-            i0 = 2
-        end
-        for i = i0:length(x.args)
-            x.args[i] = annotate_slot_load!(interp, undefs, idx, sv, x.args[i])
-        end
-        return x
-    elseif isa(x, ReturnNode) && isdefined(x, :val)
-        return ReturnNode(annotate_slot_load!(interp, undefs, idx, sv, x.val))
-    elseif isa(x, GotoIfNot)
-        return GotoIfNot(annotate_slot_load!(interp, undefs, idx, sv, x.cond), x.dest)
-    end
-    return x
-end
-
-# find the dominating assignment to the slot `id` in the block containing statement `idx`,
-# returns `nothing` otherwise
-function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
-    block = block_for_inst(sv.cfg, idx)
-    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
-        pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
-        stmt = sv.src.code[pc]
-        isexpr(stmt, :(=)) || continue
-        lhs = stmt.args[1]
-        isa(lhs, SlotNumber) || continue
-        slot_id(lhs) == id || continue
-        return pc
-    end
-    return nothing
-end
-
-# annotate types of all symbols in AST, preparing for optimization
-function type_annotate!(interp::AbstractInterpreter, sv::InferenceState, run_optimizer::Bool)
-    # widen `Conditional`s from `slottypes`
-    slottypes = sv.slottypes
-    for i = 1:length(slottypes)
-        slottypes[i] = widenconditional(slottypes[i])
-    end
-
-    # compute the required type for each slot
-    # to hold all of the items assigned into it
-    record_slot_assign!(sv)
-
-    record_bestguess!(sv)
-
-    # annotate variables load types
-    # remove dead code optimization
-    # and compute which variables may be used undef
-    stmt_info = sv.stmt_info
-    src = sv.src
-    body = src.code
-    nexpr = length(body)
-    codelocs = src.codelocs
-    ssavaluetypes = sv.ssavaluetypes
-    ssaflags = src.ssaflags
-    slotflags = src.slotflags
-    nslots = length(slotflags)
-    undefs = fill(false, nslots)
-    any_unreachable = false
-
-    # this statement traversal does five things:
-    # 1. introduce temporary `TypedSlot`s that are supposed to be replaced with π-nodes later
-    # 2. mark used-undef slots (required by the `slot2reg` conversion)
-    # 3. mark unreached statements for a bulk code deletion (see issue #7836)
-    # 4. widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
-    #    NOTE because of this, `was_reached` will no longer be available after this point
-    # 5. eliminate GotoIfNot if either branch target is unreachable
-    changemap = nothing # initialized if there is any dead region
-    for i = 1:nexpr
-        expr = body[i]
-        if was_reached(sv, i)
-            if run_optimizer
-                if isa(expr, GotoIfNot) && widenconst(argextype(expr.cond, src, sv.sptypes)) === Bool
-                    # 5: replace this live GotoIfNot with:
-                    # - GotoNode if the fallthrough target is unreachable
-                    # - no-op if the branch target is unreachable
-                    if !was_reached(sv, i+1)
-                        expr = GotoNode(expr.dest)
-                    elseif !was_reached(sv, expr.dest)
-                        expr = nothing
-                    end
-                end
-            end
-            body[i] = annotate_slot_load!(interp, undefs, i, sv, expr) # 1&2
-            ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 4
-        else # i.e. any runtime execution will never reach this statement
-            any_unreachable = true
-            if is_meta_expr(expr) # keep any lexically scoped expressions
-                ssavaluetypes[i] = Any # 4
-            else
-                ssavaluetypes[i] = Bottom # 4
-                body[i] = Const(expr) # annotate that this statement actually is dead
-            end
-        end
-    end
-
-    # finish marking used-undef variables
-    for j = 1:nslots
-        if undefs[j]
-            slotflags[j] |= SLOT_USEDUNDEF | SLOT_STATICUNDEF
-        end
-    end
-
-    return any_unreachable
-end
-
-# at the end, all items in b's cycle
-# will now be added to a's cycle
-function union_caller_cycle!(a::InferenceState, b::InferenceState)
-    callers_in_cycle = b.callers_in_cycle
-    b.parent = a.parent
-    b.callers_in_cycle = a.callers_in_cycle
-    contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
-    if callers_in_cycle !== a.callers_in_cycle
-        for caller in callers_in_cycle
-            if caller !== b
-                caller.parent = a.parent
-                caller.callers_in_cycle = a.callers_in_cycle
-                push!(a.callers_in_cycle, caller)
-            end
-        end
-    end
-    return
-end
-
-function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
-    # add backedge of parent <- child
-    # then add all backedges of parent <- parent.parent
-    # and merge all of the callers into ancestor.callers_in_cycle
-    # and ensure that walking the parent list will get the same result (DAG) from everywhere
-    while true
-        add_cycle_backedge!(parent, child, parent.currpc)
-        union_caller_cycle!(ancestor, child)
-        child = parent
-        child === ancestor && break
-        parent = frame_parent(child)
-        while !isa(parent, InferenceState)
-            # XXX we may miss some edges here?
-            parent = frame_parent(parent::IRInterpretationState)
-        end
-        parent = parent::InferenceState
-    end
-end
-
-function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
-    return mi === frame_instance(frame)
-end
-
-function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
-    push!(infstate.pclimitations, topmost)
-    nothing
-end
-
-# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
-# frame matching `mi` is encountered, then there is a cycle in the call graph
-# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
-# we "resolve" it by merging the call chain, which entails unioning each intermediary
-# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
-# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
-# returned instead.
-function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
-    # TODO (#48913) implement a proper recursion handling for irinterp:
-    # This works just because currently the `:terminate` condition guarantees that
-    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-    # We should revisit this once we have a better story for handling cycles in irinterp.
-    isa(parent, InferenceState) || return false
-    frame = parent
-    uncached = false
-    while isa(frame, InferenceState)
-        uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
-        if is_same_frame(interp, mi, frame)
-            if uncached
-                # our attempt to speculate into a constant call lead to an undesired self-cycle
-                # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
-                # with the limited flag and abort (set return type to Any) now
-                poison_callstack!(parent, frame)
-                return true
-            end
-            merge_call_chain!(interp, parent, frame, frame)
-            return frame
-        end
-        for caller in callers_in_cycle(frame)
-            if is_same_frame(interp, mi, caller)
-                if uncached
-                    poison_callstack!(parent, frame)
-                    return true
-                end
-                merge_call_chain!(interp, parent, frame, caller)
-                return caller
-            end
-        end
-        frame = frame_parent(frame)
-    end
-    return false
-end
-
-generating_sysimg() = ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
-
-ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
-
-struct EdgeCallResult
-    rt #::Type
-    edge::Union{Nothing,MethodInstance}
-    effects::Effects
-    function EdgeCallResult(@nospecialize(rt),
-                            edge::Union{Nothing,MethodInstance},
-                            effects::Effects)
-        return new(rt, edge, effects)
-    end
-end
-
-# compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance # return existing rettype if the code is already inferred
-        inferred = @atomic :monotonic code.inferred
-        if inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
-            # we already inferred this edge before and decided to discard the inferred code,
-            # nevertheless we re-infer it here again and keep it around in the local cache
-            # since the inliner will request to use it later
-            cache = :local
-        else
-            effects = ipo_effects(code)
-            update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-            rettype = code.rettype
-            if isdefined(code, :rettype_const)
-                rettype_const = code.rettype_const
-                # the second subtyping/egal conditions are necessary to distinguish usual cases
-                # from rare cases when `Const` wrapped those extended lattice type objects
-                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
-                    rettype = PartialStruct(rettype, rettype_const)
-                elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
-                    rettype = rettype_const
-                elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
-                    rettype = rettype_const
-                elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
-                    rettype = rettype_const
-                else
-                    rettype = Const(rettype_const)
-                end
-            end
-            return EdgeCallResult(rettype, mi, effects)
-        end
-    else
-        cache = :global # cache edge targets by default
-    end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        add_remark!(interp, caller, "Inference is disabled for the target module")
-        return EdgeCallResult(Any, nothing, Effects())
-    end
-    if !is_cached(caller) && frame_parent(caller) === nothing
-        # this caller exists to return to the user
-        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
-        frame = false
-    else
-        frame = resolve_call_cycle!(interp, mi, caller)
-    end
-    if frame === false
-        # completely new
-        lock_mi_inference(interp, mi)
-        result = InferenceResult(mi, typeinf_lattice(interp))
-        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
-        if frame === nothing
-            add_remark!(interp, caller, "Failed to retrieve source")
-            # can't get the source for this, so we know nothing
-            unlock_mi_inference(interp, mi)
-            return EdgeCallResult(Any, nothing, Effects())
-        end
-        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
-            frame.parent = caller
-        end
-        typeinf(interp, frame)
-        update_valid_age!(caller, frame.valid_worlds)
-        edge = is_inferred(frame) ? mi : nothing
-        return EdgeCallResult(frame.bestguess, edge, frame.ipo_effects) # effects are adjusted already within `finish`
-    elseif frame === true
-        # unresolvable cycle
-        return EdgeCallResult(Any, nothing, Effects())
-    end
-    # return the current knowledge about this cycle
-    frame = frame::InferenceState
-    update_valid_age!(caller, frame.valid_worlds)
-    return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
-end
-
-#### entry points for inferring a MethodInstance given a type signature ####
-
-# compute an inferred AST and return type
-function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
-    frame === nothing && return nothing, Any
-    is_inferred(frame) || return nothing, Any
-    code = frame.src
-    rt = widenconst(ignorelimited(frame.result.result))
-    return code, rt
-end
-
-"""
-    typeinf_ircode(
-        interp::AbstractInterpreter,
-        method::Method,
-        atype,
-        sparams::SimpleVector,
-        optimize_until::Union{Integer,AbstractString,Nothing},
-    ) -> (ir::Union{IRCode,Nothing}, returntype::Type)
-
-Infer a `method` and return an `IRCode` with inferred `returntype` on success.
-"""
-function typeinf_ircode(
-    interp::AbstractInterpreter,
-    method::Method,
-    @nospecialize(atype),
-    sparams::SimpleVector,
-    optimize_until::Union{Integer,AbstractString,Nothing},
-)
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    frame = typeinf_frame(interp, method, atype, sparams, false)
-    if frame === nothing
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        return nothing, Any
-    end
-    (; result) = frame
-    opt = OptimizationState(frame, interp)
-    ir = run_passes(opt.src, opt, result, optimize_until)
-    rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    return ir, rt
-end
-
-# compute an inferred frame
-function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
-    frame === nothing && return nothing
-    typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    return frame
-end
-
-# compute (and cache) an inferred AST and return type
-function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
-    method = mi.def::Method
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this code already exists in the cache
-        inf = @atomic :monotonic code.inferred
-        if use_const_api(code)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-            rettype_const = code.rettype_const
-            tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
-            nargs = Int(method.nargs)
-            tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-            tree.slotflags = fill(IR_FLAG_NULL, nargs)
-            tree.ssavaluetypes = 1
-            tree.codelocs = Int32[1]
-            tree.linetable = LineInfoNode[LineInfoNode(method.module, mi, method.file, method.line, Int32(0))]
-            tree.ssaflags = UInt8[0]
-            set_inlineable!(tree, true)
-            tree.parent = mi
-            tree.rettype = Core.Typeof(rettype_const)
-            tree.min_world = code.min_world
-            tree.max_world = code.max_world
-            tree.inferred = true
-            return tree
-        elseif isa(inf, CodeInfo)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            if !(inf.min_world == code.min_world &&
-                    inf.max_world == code.max_world &&
-                    inf.rettype === code.rettype)
-                inf = copy(inf)
-                inf.min_world = code.min_world
-                inf.max_world = code.max_world
-                inf.rettype = code.rettype
-            end
-            return inf
-        elseif isa(inf, String)
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            inf = _uncompressed_ir(code, inf)
-            return inf
-        end
-    end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        return retrieve_code_info(mi, get_world_counter(interp))
-    end
-    lock_mi_inference(interp, mi)
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    frame = InferenceState(result, #=cache=#:global, interp)
-    frame === nothing && return nothing
-    typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    frame.src.inferred || return nothing
-    return frame.src
-end
-
-# compute (and cache) an inferred AST and return the inferred return type
-function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
-    if contains_is(unwrap_unionall(atype).parameters, Union{})
-        return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
-    end
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    code = get(code_cache(interp), mi, nothing)
-    if code isa CodeInstance
-        # see if this rettype already exists in the cache
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        return code.rettype
-    end
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    typeinf(interp, result, :global)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    is_inferred(result) || return nothing
-    return widenconst(ignorelimited(result.result))
-end
-
-# This is a bridge for the C code calling `jl_typeinf_func()`
-typeinf_ext_toplevel(mi::MethodInstance, world::UInt) = typeinf_ext_toplevel(NativeInterpreter(world), mi)
-function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance)
-    if isa(linfo.def, Method)
-        # method lambda - infer this specialization via the method cache
-        src = typeinf_ext(interp, linfo)
-    else
-        src = linfo.uninferred::CodeInfo
-        if !src.inferred
-            # toplevel lambda - infer directly
-            start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-            if !src.inferred
-                result = InferenceResult(linfo, typeinf_lattice(interp))
-                frame = InferenceState(result, src, #=cache=#:global, interp)
-                typeinf(interp, frame)
-                @assert is_inferred(frame) # TODO: deal with this better
-                src = frame.src
-            end
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-        end
-    end
-    return src
-end
-
-function return_type(@nospecialize(f), t::DataType) # this method has a special tfunc
-    world = ccall(:jl_get_tls_world_age, UInt, ())
-    args = Any[_return_type, NativeInterpreter(world), Tuple{Core.Typeof(f), t.parameters...}]
-    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
-end
-
-function return_type(@nospecialize(f), t::DataType, world::UInt)
-    return return_type(Tuple{Core.Typeof(f), t.parameters...}, world)
-end
-
-function return_type(t::DataType)
-    world = ccall(:jl_get_tls_world_age, UInt, ())
-    return return_type(t, world)
-end
-
-function return_type(t::DataType, world::UInt)
-    args = Any[_return_type, NativeInterpreter(world), t]
-    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
-end
-
-function _return_type(interp::AbstractInterpreter, t::DataType)
-    rt = Union{}
-    f = singleton_type(t.parameters[1])
-    if isa(f, Builtin)
-        args = Any[t.parameters...]
-        popfirst!(args)
-        rt = builtin_tfunction(interp, f, args, nothing)
-        rt = widenconst(rt)
-    else
-        for match in _methods_by_ftype(t, -1, get_world_counter(interp))::Vector
-            match = match::MethodMatch
-            ty = typeinf_type(interp, match.method, match.spec_types, match.sparams)
-            ty === nothing && return Any
-            rt = tmerge(rt, ty)
-            rt === Any && break
-        end
-    end
-    return rt
-end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
deleted file mode 100644
index f3c5694535ce6..0000000000000
--- a/base/compiler/utilities.jl
+++ /dev/null
@@ -1,521 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-###########
-# generic #
-###########
-
-if !@isdefined(var"@timeit")
-    # This is designed to allow inserting timers when loading a second copy
-    # of inference for performing performance experiments.
-    macro timeit(args...)
-        esc(args[end])
-    end
-end
-
-# avoid cycle due to over-specializing `any` when used by inference
-function _any(@nospecialize(f), a)
-    for x in a
-        f(x) && return true
-    end
-    return false
-end
-any(@nospecialize(f), itr) = _any(f, itr)
-any(itr) = _any(identity, itr)
-
-function _all(@nospecialize(f), a)
-    for x in a
-        f(x) || return false
-    end
-    return true
-end
-all(@nospecialize(f), itr) = _all(f, itr)
-all(itr) = _all(identity, itr)
-
-function contains_is(itr, @nospecialize(x))
-    for y in itr
-        if y === x
-            return true
-        end
-    end
-    return false
-end
-
-anymap(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i in 1:length(a) ]
-
-###########
-# scoping #
-###########
-
-_topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
-
-function istopfunction(@nospecialize(f), name::Symbol)
-    tn = typeof(f).name
-    if tn.mt.name === name
-        top = _topmod(tn.module)
-        return isdefined(top, name) && isconst(top, name) && f === getglobal(top, name)
-    end
-    return false
-end
-
-#######
-# AST #
-#######
-
-# Meta expression head, these generally can't be deleted even when they are
-# in a dead branch but can be ignored when analyzing uses/liveness.
-is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
-is_meta_expr(@nospecialize x) = isa(x, Expr) && is_meta_expr_head(x.head)
-
-sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
-
-function is_self_quoting(@nospecialize(x))
-    return isa(x,Number) || isa(x,AbstractString) || isa(x,Tuple) || isa(x,Type) ||
-        isa(x,Char) || x === nothing || isa(x,Function)
-end
-
-function quoted(@nospecialize(x))
-    return is_self_quoting(x) ? x : QuoteNode(x)
-end
-
-############
-# inlining #
-############
-
-const MAX_INLINE_CONST_SIZE = 256
-
-function count_const_size(@nospecialize(x), count_self::Bool = true)
-    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
-    ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
-    isbits(x) && return Core.sizeof(x)
-    dt = typeof(x)
-    sz = count_self ? sizeof(dt) : 0
-    sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
-    dtfd = DataTypeFieldDesc(dt)
-    for i = 1:nfields(x)
-        isdefined(x, i) || continue
-        f = getfield(x, i)
-        if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
-            continue
-        end
-        sz += count_const_size(f, dtfd[i].isptr)
-        sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
-    end
-    return sz
-end
-
-function is_inlineable_constant(@nospecialize(x))
-    return count_const_size(x) <= MAX_INLINE_CONST_SIZE
-end
-
-is_nospecialized(method::Method) = method.nospecialize ≠ 0
-
-is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method)
-
-###########################
-# MethodInstance/CodeInfo #
-###########################
-
-invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
-use_const_api(li::CodeInstance) = invoke_api(li) == 2
-
-function get_staged(mi::MethodInstance, world::UInt)
-    may_invoke_generator(mi) || return nothing
-    try
-        # user code might throw errors – ignore them
-        ci = ccall(:jl_code_for_staged, Any, (Any, UInt), mi, world)::CodeInfo
-        return ci
-    catch
-        return nothing
-    end
-end
-
-function retrieve_code_info(linfo::MethodInstance, world::UInt)
-    m = linfo.def::Method
-    c = nothing
-    if isdefined(m, :generator)
-        # user code might throw errors – ignore them
-        c = get_staged(linfo, world)
-    end
-    if c === nothing && isdefined(m, :source)
-        src = m.source
-        if src === nothing
-            # can happen in images built with --strip-ir
-            return nothing
-        elseif isa(src, String)
-            c = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, src)
-        else
-            c = copy(src::CodeInfo)
-        end
-    end
-    if c isa CodeInfo
-        c.parent = linfo
-        return c
-    end
-    return nothing
-end
-
-function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    isa(atype, DataType) || return nothing
-    mt = ccall(:jl_method_get_table, Any, (Any,), method)
-    mt === nothing && return nothing
-    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
-        mt, atype, sparams, method, #=int return_if_compileable=#1)
-end
-
-function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    isa(atype, DataType) || return method.sig
-    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-    mt === nothing && return method.sig
-    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
-        mt, atype, sparams, method, #=int return_if_compileable=#0)
-end
-
-isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) =
-    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method))
-
-# eliminate UnionAll vars that might be degenerate due to having identical bounds,
-# or a concrete upper bound and appearing covariantly.
-function subst_trivial_bounds(@nospecialize(atype))
-    if !isa(atype, UnionAll)
-        return atype
-    end
-    v = atype.var
-    if isconcretetype(v.ub) || v.lb === v.ub
-        subst = try
-            atype{v.ub}
-        catch
-            # Note in rare cases a var bound might not be valid to substitute.
-            nothing
-        end
-        if subst !== nothing
-            return subst_trivial_bounds(subst)
-        end
-    end
-    return UnionAll(v, subst_trivial_bounds(atype.body))
-end
-
-has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v) != 0
-
-# If removing trivial vars from atype results in an equivalent type, use that
-# instead. Otherwise we can get a case like issue #38888, where a signature like
-#   f(x::S) where S<:Int
-# gets cached and matches a concrete dispatch case.
-function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    at2 = subst_trivial_bounds(atype)
-    if at2 !== atype && at2 == atype
-        atype = at2
-        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
-        sparams = sp_[2]::SimpleVector
-    end
-    return Pair{Any,SimpleVector}(atype, sparams)
-end
-
-# get a handle to the unique specialization object representing a particular instantiation of a call
-@inline function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
-    if isa(atype, UnionAll)
-        atype, sparams = normalize_typevars(method, atype, sparams)
-    end
-    if is_nospecializeinfer(method)
-        atype = get_nospecializeinfer_sig(method, atype, sparams)
-    end
-    if preexisting
-        # check cached specializations
-        # for an existing result stored there
-        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
-    end
-    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
-end
-
-function specialize_method(match::MethodMatch; kwargs...)
-    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
-end
-
-"""
-    is_declared_inline(method::Method) -> Bool
-
-Check if `method` is declared as `@inline`.
-"""
-is_declared_inline(method::Method) = _is_declared_inline(method, true)
-
-"""
-    is_declared_noinline(method::Method) -> Bool
-
-Check if `method` is declared as `@noinline`.
-"""
-is_declared_noinline(method::Method) = _is_declared_inline(method, false)
-
-function _is_declared_inline(method::Method, inline::Bool)
-    isdefined(method, :source) || return false
-    src = method.source
-    isa(src, MaybeCompressed) || return false
-    return (inline ? is_declared_inline : is_declared_noinline)(src)
-end
-
-"""
-    is_aggressive_constprop(method::Union{Method,CodeInfo}) -> Bool
-
-Check if `method` is declared as `Base.@constprop :aggressive`.
-"""
-is_aggressive_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x01
-
-"""
-    is_no_constprop(method::Union{Method,CodeInfo}) -> Bool
-
-Check if `method` is declared as `Base.@constprop :none`.
-"""
-is_no_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x02
-
-#############
-# backedges #
-#############
-
-"""
-    BackedgeIterator(backedges::Vector{Any})
-
-Return an iterator over a list of backedges. Iteration returns `(sig, caller)` elements,
-which will be one of the following:
-
-- `BackedgePair(nothing, caller::MethodInstance)`: a call made by ordinary inferable dispatch
-- `BackedgePair(invokesig::Type, caller::MethodInstance)`: a call made by `invoke(f, invokesig, args...)`
-- `BackedgePair(specsig::Type, mt::MethodTable)`: an abstract call
-
-# Examples
-
-```julia
-julia> callme(x) = x+1
-callme (generic function with 1 method)
-
-julia> callyou(x) = callme(x)
-callyou (generic function with 1 method)
-
-julia> callyou(2.0)
-3.0
-
-julia> mi = which(callme, (Any,)).specializations
-MethodInstance for callme(::Float64)
-
-julia> @eval Core.Compiler for (; sig, caller) in BackedgeIterator(Main.mi.backedges)
-           println(sig)
-           println(caller)
-       end
-nothing
-callyou(Float64) from callyou(Any)
-```
-"""
-struct BackedgeIterator
-    backedges::Vector{Any}
-end
-
-const empty_backedge_iter = BackedgeIterator(Any[])
-
-struct BackedgePair
-    sig # ::Union{Nothing,Type}
-    caller::Union{MethodInstance,MethodTable}
-    BackedgePair(@nospecialize(sig), caller::Union{MethodInstance,MethodTable}) = new(sig, caller)
-end
-
-function iterate(iter::BackedgeIterator, i::Int=1)
-    backedges = iter.backedges
-    i > length(backedges) && return nothing
-    item = backedges[i]
-    isa(item, MethodInstance) && return BackedgePair(nothing, item), i+1      # regular dispatch
-    isa(item, MethodTable) && return BackedgePair(backedges[i+1], item), i+2  # abstract dispatch
-    return BackedgePair(item, backedges[i+1]::MethodInstance), i+2            # `invoke` calls
-end
-
-#########
-# types #
-#########
-
-@nospecializeinfer function singleton_type(@nospecialize(ft))
-    ft = widenslotwrapper(ft)
-    if isa(ft, Const)
-        return ft.val
-    elseif isconstType(ft)
-        return ft.parameters[1]
-    elseif issingletontype(ft)
-        return ft.instance
-    end
-    return nothing
-end
-
-@nospecializeinfer function maybe_singleton_const(@nospecialize(t))
-    if isa(t, DataType)
-        if issingletontype(t)
-            return Const(t.instance)
-        elseif isconstType(t)
-            return Const(t.parameters[1])
-        end
-    end
-    return t
-end
-
-###################
-# SSAValues/Slots #
-###################
-
-function ssamap(f, @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, SSAValue)
-            op[] = f(val)
-        end
-    end
-    return urs[]
-end
-
-function foreachssa(@specialize(f), @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, SSAValue)
-            f(val)
-        end
-    end
-end
-
-function foreach_anyssa(@specialize(f), @nospecialize(stmt))
-    urs = userefs(stmt)
-    for op in urs
-        val = op[]
-        if isa(val, AnySSAValue)
-            f(val)
-        end
-    end
-end
-
-function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
-    uses = BitSet[ BitSet() for i = 1:nvals ]
-    for line in 1:length(body)
-        e = body[line]
-        if isa(e, ReturnNode)
-            e = e.val
-        elseif isa(e, GotoIfNot)
-            e = e.cond
-        end
-        if isa(e, SSAValue)
-            push!(uses[e.id], line)
-        elseif isa(e, Expr)
-            find_ssavalue_uses(e, uses, line)
-        elseif isa(e, PhiNode)
-            find_ssavalue_uses(e, uses, line)
-        end
-    end
-    return uses
-end
-
-function find_ssavalue_uses(e::Expr, uses::Vector{BitSet}, line::Int)
-    head = e.head
-    is_meta_expr_head(head) && return
-    skiparg = (head === :(=))
-    for a in e.args
-        if skiparg
-            skiparg = false
-        elseif isa(a, SSAValue)
-            push!(uses[a.id], line)
-        elseif isa(a, Expr)
-            find_ssavalue_uses(a, uses, line)
-        end
-    end
-end
-
-function find_ssavalue_uses(e::PhiNode, uses::Vector{BitSet}, line::Int)
-    for val in e.values
-        if isa(val, SSAValue)
-            push!(uses[val.id], line)
-        end
-    end
-end
-
-function is_throw_call(e::Expr)
-    if e.head === :call
-        f = e.args[1]
-        if isa(f, GlobalRef)
-            ff = abstract_eval_globalref(f)
-            if isa(ff, Const) && ff.val === Core.throw
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
-    for stmt in find_throw_blocks(src.code, handler_at)
-        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
-    end
-    return nothing
-end
-
-function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
-    stmts = BitSet()
-    n = length(code)
-    for i in n:-1:1
-        s = code[i]
-        if isa(s, Expr)
-            if s.head === :gotoifnot
-                if i+1 in stmts && s.args[2]::Int in stmts
-                    push!(stmts, i)
-                end
-            elseif s.head === :return
-                # see `ReturnNode` handling
-            elseif is_throw_call(s)
-                if handler_at[i] == 0
-                    push!(stmts, i)
-                end
-            elseif i+1 in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, ReturnNode)
-            # NOTE: it potentially makes sense to treat unreachable nodes
-            # (where !isdefined(s, :val)) as `throw` points, but that can cause
-            # worse codegen around the call site (issue #37558)
-        elseif isa(s, GotoNode)
-            if s.label in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, GotoIfNot)
-            if i+1 in stmts && s.dest in stmts
-                push!(stmts, i)
-            end
-        elseif i+1 in stmts
-            push!(stmts, i)
-        end
-    end
-    return stmts
-end
-
-# using a function to ensure we can infer this
-@inline function slot_id(s)
-    isa(s, SlotNumber) && return s.id
-    isa(s, Argument) && return s.n
-    return (s::TypedSlot).id
-end
-
-###########
-# options #
-###########
-
-is_root_module(m::Module) = false
-
-inlining_enabled() = (JLOptions().can_inline == 1)
-function coverage_enabled(m::Module)
-    ccall(:jl_generating_output, Cint, ()) == 0 || return false # don't alter caches
-    cov = JLOptions().code_coverage
-    if cov == 1 # user
-        m = moduleroot(m)
-        m === Core && return false
-        isdefined(Main, :Base) && m === Main.Base && return false
-        return true
-    elseif cov == 2 # all
-        return true
-    end
-    return false
-end
-function inbounds_option()
-    opt_check_bounds = JLOptions().check_bounds
-    opt_check_bounds == 0 && return :default
-    opt_check_bounds == 1 && return :on
-    return :off
-end
diff --git a/base/complex.jl b/base/complex.jl
index 97b47eac91a5a..5d9f9df6f2b78 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -178,7 +178,7 @@ complex(x::Real, y::Real) = Complex(x, y)
     complex(T::Type)
 
 Return an appropriate type which can represent a value of type `T` as a complex number.
-Equivalent to `typeof(complex(zero(T)))`.
+Equivalent to `typeof(complex(zero(T)))` if `T` does not contain `Missing`.
 
 # Examples
 ```jldoctest
@@ -187,6 +187,9 @@ Complex{Int64}
 
 julia> complex(Int)
 Complex{Int64}
+
+julia> complex(Union{Int, Missing})
+Union{Missing, Complex{Int64}}
 ```
 """
 complex(::Type{T}) where {T<:Real} = Complex{T}
@@ -339,7 +342,7 @@ end
 *(x::Real, z::Complex) = Complex(x * real(z), x * imag(z))
 *(z::Complex, x::Real) = Complex(x * real(z), x * imag(z))
 
-muladd(x::Real, z::Complex, y::Number) = muladd(z, x, y)
+muladd(x::Real, z::Complex, y::Union{Real,Complex}) = muladd(z, x, y)
 muladd(z::Complex, x::Real, y::Real) = Complex(muladd(real(z),x,y), imag(z)*x)
 muladd(z::Complex, x::Real, w::Complex) =
     Complex(muladd(real(z),x,real(w)), muladd(imag(z),x,imag(w)))
@@ -567,7 +570,7 @@ end
 """
     cis(x)
 
-More efficient method for `exp(im*x)` by using Euler's formula: ``cos(x) + i sin(x) = \\exp(i x)``.
+More efficient method for `exp(im*x)` by using Euler's formula: ``\\cos(x) + i \\sin(x) = \\exp(i x)``.
 
 See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref), [`angle`](@ref).
 
@@ -622,7 +625,10 @@ end
 
 Compute the phase angle in radians of a complex number `z`.
 
-See also: [`atan`](@ref), [`cis`](@ref).
+Returns a number `-pi ≤ angle(z) ≤ pi`, and is thus discontinuous
+along the negative real axis.
+
+See also: [`atan`](@ref), [`cis`](@ref), [`rad2deg`](@ref).
 
 # Examples
 ```jldoctest
@@ -632,8 +638,11 @@ julia> rad2deg(angle(1 + im))
 julia> rad2deg(angle(1 - im))
 -45.0
 
-julia> rad2deg(angle(-1 - im))
--135.0
+julia> rad2deg(angle(-1 + 1e-20im))
+180.0
+
+julia> rad2deg(angle(-1 - 1e-20im))
+-180.0
 ```
 """
 angle(z::Complex) = atan(imag(z), real(z))
@@ -750,7 +759,7 @@ function log1p(z::Complex{T}) where T
         # allegedly due to Kahan, only modified to handle real(u) <= 0
         # differently to avoid inaccuracy near z==-2 and for correct branch cut
         u = one(float(T)) + z
-        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*z/(u-1)
+        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*(z/(u-1))
     elseif isnan(zr)
         Complex(zr, zr)
     elseif isfinite(zi)
@@ -1028,24 +1037,22 @@ end
 function atanh(z::Complex{T}) where T
     z = float(z)
     Tf = float(T)
-    Ω = prevfloat(typemax(Tf))
-    θ = sqrt(Ω)/4
-    ρ = 1/θ
     x, y = reim(z)
     ax = abs(x)
     ay = abs(y)
+    θ = sqrt(floatmax(Tf))/4
     if ax > θ || ay > θ #Prevent overflow
         if isnan(y)
             if isinf(x)
                 return Complex(copysign(zero(x),x), y)
             else
-                return Complex(real(1/z), y)
+                return Complex(real(inv(z)), y)
             end
         end
         if isinf(y)
             return Complex(copysign(zero(x),x), copysign(oftype(y,pi)/2, y))
         end
-        return Complex(real(1/z), copysign(oftype(y,pi)/2, y))
+        return Complex(real(inv(z)), copysign(oftype(y,pi)/2, y))
     end
     β = copysign(one(Tf), x)
     z *= β
@@ -1055,16 +1062,15 @@ function atanh(z::Complex{T}) where T
             ξ = oftype(x, Inf)
             η = y
         else
-            ym = ay+ρ
-            ξ = log(sqrt(sqrt(4+y*y))/sqrt(ym))
-            η = copysign(oftype(y,pi)/2 + atan(ym/2), y)/2
+            ξ = log(sqrt(sqrt(muladd(y, y, 4)))/sqrt(ay))
+            η = copysign(oftype(y,pi)/2 + atan(ay/2), y)/2
         end
     else #Normal case
-        ysq = (ay+ρ)^2
+        ysq = ay^2
         if x == 0
             ξ = x
         else
-            ξ = log1p(4x/((1-x)^2 + ysq))/4
+            ξ = log1p(4x/(muladd(1-x, 1-x, ysq)))/4
         end
         η = angle(Complex((1-x)*(1+x)-ysq, 2y))/2
     end
@@ -1088,7 +1094,7 @@ second is used for rounding the imaginary components.
 which rounds to the nearest integer, with ties (fractional values of 0.5)
 being rounded to the nearest even integer.
 
-# Example
+# Examples
 ```jldoctest
 julia> round(3.14 + 4.5im)
 3.0 + 4.0im
diff --git a/base/condition.jl b/base/condition.jl
index 20481c98ee805..fd771c9be346a 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -69,6 +69,8 @@ struct GenericCondition{L<:AbstractLock}
     GenericCondition(l::AbstractLock) = new{typeof(l)}(IntrusiveLinkedList{Task}(), l)
 end
 
+show(io::IO, c::GenericCondition) = print(io, GenericCondition, "(", c.lock, ")")
+
 assert_havelock(c::GenericCondition) = assert_havelock(c.lock)
 lock(c::GenericCondition) = lock(c.lock)
 unlock(c::GenericCondition) = unlock(c.lock)
@@ -103,17 +105,16 @@ end
 """
     wait([x])
 
-Block the current task until some event occurs, depending on the type of the argument:
+Block the current task until some event occurs.
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
 * [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
-  parameter passed to `notify`. Waiting on a condition additionally allows passing
-  `first=true` which results in the waiter being put _first_ in line to wake up on `notify`
-  instead of the usual first-in-first-out behavior.
+  parameter passed to `notify`. See the `Condition`-specific docstring of `wait` for
+  the exact behavior.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
-* [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
-  `TaskFailedException` (which wraps the failed task) is thrown.
+* [`Task`](@ref): Wait for a `Task` to finish. See the `Task`-specific docstring of `wait` for
+  the exact behavior.
 * [`RawFD`](@ref): Wait for changes on a file descriptor (see the `FileWatching` package).
 
 If no argument is passed, the task blocks for an undefined period. A task can only be
@@ -122,6 +123,16 @@ restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref).
 Often `wait` is called within a `while` loop to ensure a waited-for condition is met before
 proceeding.
 """
+function wait end
+
+"""
+    wait(c::GenericCondition; first::Bool=false)
+
+Wait for [`notify`](@ref) on `c` and return the `val` parameter passed to `notify`.
+
+If the keyword `first` is set to `true`, the waiter will be put _first_
+in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO) behavior.
+"""
 function wait(c::GenericCondition; first::Bool=false)
     ct = current_task()
     _wait2(c, ct, first)
@@ -129,7 +140,7 @@ function wait(c::GenericCondition; first::Bool=false)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
@@ -175,8 +186,9 @@ isempty(c::GenericCondition) = isempty(c.waitq)
 
 Create an edge-triggered event source that tasks can wait for. Tasks that call [`wait`](@ref) on a
 `Condition` are suspended and queued. Tasks are woken up when [`notify`](@ref) is later called on
-the `Condition`. Edge triggering means that only tasks waiting at the time [`notify`](@ref) is
-called can be woken up. For level-triggered notifications, you must keep extra state to keep
+the `Condition`. Waiting on a condition can return a value or raise an error if the optional arguments
+of [`notify`](@ref) are used. Edge triggering means that only tasks waiting at the time [`notify`](@ref)
+is called can be woken up. For level-triggered notifications, you must keep extra state to keep
 track of whether a notification has happened. The [`Channel`](@ref) and [`Threads.Event`](@ref) types do
 this, and can be used for level-triggered events.
 
@@ -184,6 +196,8 @@ This object is NOT thread-safe. See [`Threads.Condition`](@ref) for a thread-saf
 """
 const Condition = GenericCondition{AlwaysLockedST}
 
+show(io::IO, ::Condition) = print(io, Condition, "()")
+
 lock(c::GenericCondition{AlwaysLockedST}) =
     throw(ArgumentError("`Condition` is not thread-safe. Please use `Threads.Condition` instead for multi-threaded code."))
 unlock(c::GenericCondition{AlwaysLockedST}) =
diff --git a/base/coreio.jl b/base/coreio.jl
index 3e508c64a0a64..b5c543a25d5ad 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -1,8 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+print(x) = print(stdout, x)
+print(x1, x2) = print(stdout, x1, x2)
+println(x) = print(stdout, x, "\n")
+println(x1, x2) = print(stdout, x1, x2, "\n")
+
 print(xs...)   = print(stdout, xs...)
-println(xs...) = println(stdout, xs...)
-println(io::IO) = print(io, '\n')
+println(xs...) = print(stdout, xs..., "\n")  # fewer allocations than `println(stdout, xs...)`
+println(io::IO) = print(io, "\n")
 
 function show end
 function repr end
@@ -11,6 +16,7 @@ struct DevNull <: IO end
 const devnull = DevNull()
 write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
+closewrite(::DevNull) = nothing
 close(::DevNull) = nothing
 wait_close(::DevNull) = wait()
 bytesavailable(io::DevNull) = 0
diff --git a/base/coreir.jl b/base/coreir.jl
new file mode 100644
index 0000000000000..5199dfd35f028
--- /dev/null
+++ b/base/coreir.jl
@@ -0,0 +1,53 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+Core.PhiNode() = Core.PhiNode(Int32[], Any[])
+
+"""
+    struct Const
+        val
+    end
+
+The type representing a constant value.
+"""
+Core.Const
+
+"""
+    struct PartialStruct
+        typ
+        fields::Vector{Any} # elements are other type lattice members
+    end
+
+This extended lattice element is introduced when we have information about an object's
+fields beyond what can be obtained from the object type. E.g. it represents a tuple where
+some elements are known to be constants or a struct whose `Any`-typed field is initialized
+with `Int` values.
+
+- `typ` indicates the type of the object
+- `fields` holds the lattice elements corresponding to each field of the object
+
+If `typ` is a struct, `fields` represents the fields of the struct that are guaranteed to be
+initialized. For instance, if the length of `fields` of `PartialStruct` representing a
+struct with 4 fields is 3, the 4th field may not be initialized. If the length is 4, all
+fields are guaranteed to be initialized.
+
+If `typ` is a tuple, the last element of `fields` may be `Vararg`. In this case, it is
+guaranteed that the number of elements in the tuple is at least `length(fields)-1`, but the
+exact number of elements is unknown.
+"""
+Core.PartialStruct
+
+"""
+    struct InterConditional
+        slot::Int
+        thentype
+        elsetype
+    end
+
+Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
+This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
+while processing a call, then `Conditional` everywhere else.
+"""
+Core.InterConditional
+
+InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
+    InterConditional(slot_id(var), thentype, elsetype)
diff --git a/base/cpuid.jl b/base/cpuid.jl
index 48930d8064ba9..0370bd33b83e5 100644
--- a/base/cpuid.jl
+++ b/base/cpuid.jl
@@ -21,7 +21,7 @@ Base.:<=(a::ISA, b::ISA) = a.features <= b.features
 Base.:<(a::ISA,  b::ISA) = a.features <  b.features
 Base.isless(a::ISA,  b::ISA) = a < b
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
+include(string(Base.BUILDROOT, "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
 
 # Keep in sync with `arch_march_isa_mapping`.
 const ISAs_by_family = Dict(
@@ -61,10 +61,17 @@ const ISAs_by_family = Dict(
         "a64fx" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_sha2, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fullfp16, JL_AArch64_sve))),
         "apple_m1" => ISA(Set((JL_AArch64_v8_5a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2, JL_AArch64_sha3, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fp16fml, JL_AArch64_fullfp16, JL_AArch64_dotprod, JL_AArch64_rcpc, JL_AArch64_altnzcv))),
     ],
+    "riscv64" => [
+        "riscv64" => ISA(Set{UInt32}()),
+    ],
     "powerpc64le" => [
         # We have no way to test powerpc64le features yet, so we're only going to declare the lowest ISA:
         "power8" => ISA(Set{UInt32}()),
-    ]
+    ],
+    "riscv64" => [
+        # We have no way to test riscv64 features yet, so we're only going to declare the lowest ISA:
+        "riscv64" => ISA(Set{UInt32}()),
+    ],
 )
 
 # Test a CPU feature exists on the currently-running host
diff --git a/base/ctypes.jl b/base/ctypes.jl
index 26640ed82bef5..45f01b684902f 100644
--- a/base/ctypes.jl
+++ b/base/ctypes.jl
@@ -113,3 +113,7 @@ const Cfloat = Float32
 Equivalent to the native `double` c-type ([`Float64`](@ref)).
 """
 const Cdouble = Float64
+
+
+# we have no `Float16` alias, because C does not define a standard fp16 type. Julia follows
+# the _Float16 C ABI; if that becomes standard, we can add an appropriate alias here.
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index eae8974326d06..f60ce2043dd5a 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -9,8 +9,11 @@
     deepcopy(x)
 
 Create a deep copy of `x`: everything is copied recursively, resulting in a fully
-independent object. For example, deep-copying an array produces a new array whose elements
-are deep copies of the original elements. Calling `deepcopy` on an object should generally
+independent object. For example, deep-copying an array creates deep copies of all
+the objects it contains and produces a new array with the consistent relationship
+structure (e.g., if the first two elements are the same object in the original array,
+the first two elements of the new array will also be the same `deepcopy`ed object).
+Calling `deepcopy` on an object should generally
 have the same effect as serializing and then deserializing it.
 
 While it isn't normally necessary, user-defined types can override the default `deepcopy`
@@ -34,7 +37,7 @@ deepcopy_internal(x::Module, stackdict::IdDict) = error("deepcopy of Modules not
 
 function deepcopy_internal(x::SimpleVector, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = Core.svec(Any[deepcopy_internal(x[i], stackdict) for i = 1:length(x)]...)
     stackdict[x] = y
@@ -43,7 +46,7 @@ end
 
 function deepcopy_internal(x::String, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = GC.@preserve x unsafe_string(pointer(x), sizeof(x))
     stackdict[x] = y
@@ -55,14 +58,16 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     nf = nfields(x)
     if ismutable(x)
         if haskey(stackdict, x)
-            return stackdict[x]
+            return stackdict[x]::typeof(x)
         end
         y = ccall(:jl_new_struct_uninit, Any, (Any,), T)
         stackdict[x] = y
         for i in 1:nf
             if isdefined(x, i)
                 xi = getfield(x, i)
-                xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                if !isbits(xi)
+                    xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                end
                 ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), y, i-1, xi)
             end
         end
@@ -73,7 +78,9 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
         for i in 1:nf
             if isdefined(x, i)
                 xi = getfield(x, i)
-                xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                if !isbits(xi)
+                    xi = deepcopy_internal(xi, stackdict)::typeof(xi)
+                end
                 flds[i] = xi
             else
                 nf = i - 1 # rest of tail must be undefined values
@@ -85,30 +92,54 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     return y::T
 end
 
-function deepcopy_internal(x::Array, stackdict::IdDict)
+function deepcopy_internal(x::Memory, stackdict::IdDict)
     if haskey(stackdict, x)
         return stackdict[x]::typeof(x)
     end
-    _deepcopy_array_t(x, eltype(x), stackdict)
+    _deepcopy_memory_t(x, eltype(x), stackdict)
 end
 
-function _deepcopy_array_t(@nospecialize(x::Array), T, stackdict::IdDict)
+function _deepcopy_memory_t(@nospecialize(x::Memory), T, stackdict::IdDict)
     if isbitstype(T)
         return (stackdict[x]=copy(x))
     end
-    dest = similar(x)
+    dest = typeof(x)(undef, length(x))
     stackdict[x] = dest
+    xr = memoryref(x)
+    dr = memoryref(dest)
     for i = 1:length(x)
-        if ccall(:jl_array_isassigned, Cint, (Any, Csize_t), x, i-1) != 0
-            xi = ccall(:jl_arrayref, Any, (Any, Csize_t), x, i-1)
+        xi = Core.memoryrefnew(xr, i, false)
+        if Core.memoryref_isassigned(xi, :not_atomic, false)
+            xi = Core.memoryrefget(xi, :not_atomic, false)
             if !isbits(xi)
                 xi = deepcopy_internal(xi, stackdict)::typeof(xi)
             end
-            ccall(:jl_arrayset, Cvoid, (Any, Any, Csize_t), dest, xi, i-1)
+            di = Core.memoryrefnew(dr, i, false)
+            Core.memoryrefset!(di, xi, :not_atomic, false)
         end
     end
     return dest
 end
+function deepcopy_internal(x::Array{T, N}, stackdict::IdDict) where {T, N}
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    y = stackdict[x] = Array{T, N}(undef, ntuple(Returns(0), Val{N}()))
+    setfield!(y, :ref, deepcopy_internal(x.ref, stackdict))
+    setfield!(y, :size, x.size)
+    y
+end
+function deepcopy_internal(x::GenericMemoryRef, stackdict::IdDict)
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    mem = getfield(x, :mem)
+    dest = memoryref(deepcopy_internal(mem, stackdict)::typeof(mem))
+    i = memoryrefoffset(x)
+    i == 1 || (dest = Core.memoryrefnew(dest, i, true))
+    return dest
+end
+
 
 function deepcopy_internal(x::Union{Dict,IdDict}, stackdict::IdDict)
     if haskey(stackdict, x)
@@ -129,7 +160,7 @@ end
 
 function deepcopy_internal(x::AbstractLock, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)()
     stackdict[x] = y
@@ -138,7 +169,7 @@ end
 
 function deepcopy_internal(x::GenericCondition, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     y = typeof(x)(deepcopy_internal(x.lock, stackdict))
     stackdict[x] = y
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 1b661716cc2d9..cffff05d954d1 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -1,5 +1,115 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Internal changes mechanism.
+# Instructions for Julia Core Developers:
+# 1. When making a breaking change that is known to be depnedet upon by an
+#    important and closely coupled package, decide on a unique `change_name`
+#    for your PR and add it to the list below. In general, it is better to
+#    err on the side of caution and assign a `change_name` even if it is not
+#    clear that it is required. `change_name`s may also be assigned after the
+#    fact in a separate PR. (Note that this may cause packages to misbehave
+#    on versions in between the change and the assignment of the `change_name`,
+#    but this is often still better than the alternative of misbehaving on unknown
+#    versions).
+
+# Instructions for Release Managers:
+# 1. Upon tagging any release, clear the list of internal changes.
+# 2. Upon tagging an -alpha version
+#    a. On master, set __next_removal_version to v"1.(x+1)-alpha"
+#    b. On the release branch, set __next_removal_version to v"1.x" (no -alpha)
+# 3. Upong tagging a release candidate, clear the list of internal changes and
+#    set __next_removal_version to `nothing`.
+const __next_removal_version = v"1.12-alpha"
+const __internal_changes_list = (
+    :invertedlinetables,
+    :codeinforefactor,
+    :miuninferredrm,
+    :codeinfonargs,  # #54341
+    :ocnopartial,
+    :printcodeinfocalls,
+    # Add new change names above this line
+)
+
+if !isempty(__internal_changes_list)
+    if VERSION == __next_removal_version
+        error("You have tagged a new release without clearing the internal changes list.")
+    end
+elseif __next_removal_version === nothing
+    error("You have tagged a new release candidate without clearing the internal changes list.")
+end
+
+"""
+    __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+
+Some Julia packages have known dependencies on Julia internals (e.g. for introspection of
+internal julia datastructures). To ease the co-development of such packages with julia,
+a `change_name` is assigned on a best-effort basis or when explicitly requested.
+This `change_name` can be used to probe whether or not the particular pre-release build of julia has
+a particular change. In particular this function tests change scheduled for `version_or`
+is present in our current julia build, either because our current version
+is greater than `version_or` or because we're running a pre-release build that
+includes the change.
+
+Using this mechanism is a superior alternative to commit-number based `VERSION`
+comparisons, which can be brittle during pre-release stages when there are multiple
+actively developed branches.
+
+The list of changes is cleared twice during the release process:
+1. With the release of the first alpha
+2. For the first release candidate
+
+No new `change_name`s will be added during release candidates or bugfix releases
+(so in particular on any released version, the list of changes will be empty and
+`__has_internal_change` will always be equivalent to a version comparison.
+
+# Example
+
+Julia version `v"1.12.0-DEV.173"` changed the internal representation of line number debug info.
+Several debugging packages have custom code to display this information and need to be changed
+accordingly. In previous practice, this would often be accomplished with something like the following
+```
+@static if VERSION > v"1.12.0-DEV.173"
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+However, because such checks cannot be introduced until a VERSION number is assigned
+(which also automatically pushes out the change to all nightly users), there was a builtin period
+of breakage. With `__has_internal_change`, this can instead be written as:
+
+```
+@static if __has_internal_change(v"1.12-alpha", :invertedlinenames)
+    # Code to handle new format
+else
+    # Code to handle old format
+end
+```
+
+To find out the correct verrsion to use as the first argument, you may use
+`Base.__next_removal_version`, which is set to the next version number in which
+the list of changes will be cleared.
+
+The primary advantage of this approach is that it allows a new version of the
+package to be tagged and released *in advance* of the break on the nightly
+build, thus ensuring continuity of package operation for nightly users.
+
+!!! warning
+
+    This functionality is intended to help package developers which make use of
+    internal julia functionality. Doing so is explicitly discouraged unless absolutely
+    required and comes with the explicit understanding that the package will break.
+    In particular, this is not a generic feature-testing mechanism, but only a
+    simple, courtesy coordination mechanism for changes that are known (or found) to
+    be breaking a package depending on julia internals.
+"""
+function __has_internal_change(version_or::VersionNumber, change_name::Symbol)
+    VERSION > version_or && return true
+    change_name in __internal_changes_list
+end
+export __has_internal_change
+
 # Deprecated functions and objects
 #
 # Please add new deprecations at the bottom of the file.
@@ -10,9 +120,7 @@
 # and of exporting the function.
 #
 # For more complex cases, move the body of the deprecated method in this file,
-# and call depwarn() directly from inside it. The symbol depwarn() expects is
-# the name of the function, which is used to ensure that the deprecation warning
-# is only printed the first time for each call place.
+# and call depwarn() directly from inside it.
 
 """
     @deprecate old new [export_old=true]
@@ -22,6 +130,8 @@ with the specified signature in the process.
 
 To prevent `old` from being exported, set `export_old` to `false`.
 
+See also [`Base.depwarn()`](@ref).
+
 !!! compat "Julia 1.5"
     As of Julia 1.5, functions defined by `@deprecate` do not print warning when `julia`
     is run without the `--depwarn=yes` flag set, as the default value of `--depwarn` option
@@ -29,11 +139,11 @@ To prevent `old` from being exported, set `export_old` to `false`.
 
 # Examples
 ```jldoctest
-julia> @deprecate old(x) new(x)
-old (generic function with 1 method)
+julia> @deprecate old_export(x) new(x)
+old_export (generic function with 1 method)
 
-julia> @deprecate old(x) new(x) false
-old (generic function with 1 method)
+julia> @deprecate old_public(x) new(x) false
+old_public (generic function with 1 method)
 ```
 
 Calls to `@deprecate` without explicit type-annotations will define
@@ -118,7 +228,35 @@ macro deprecate(old, new, export_old=true)
     end
 end
 
-function depwarn(msg, funcsym; force::Bool=false)
+"""
+    Base.depwarn(msg::String, funcsym::Symbol; force=false)
+
+Print `msg` as a deprecation warning. The symbol `funcsym` should be the name
+of the calling function, which is used to ensure that the deprecation warning is
+only printed the first time for each call place. Set `force=true` to force the
+warning to always be shown, even if Julia was started with `--depwarn=no` (the
+default).
+
+See also [`@deprecate`](@ref).
+
+# Examples
+```julia
+function deprecated_func()
+    Base.depwarn("Don't use `deprecated_func()`!", :deprecated_func)
+
+    1 + 1
+end
+```
+"""
+@nospecializeinfer function depwarn(msg, funcsym; force::Bool=false)
+    @nospecialize
+    # N.B. With this use of `@invokelatest`, we're preventing the addition of backedges from
+    # callees, such as `convert`, to this user-facing method. This approach is designed to
+    # enhance the resilience of packages that utilize `depwarn` against invalidation.
+    return @invokelatest _depwarn(msg, funcsym, force)
+end
+@nospecializeinfer function _depwarn(msg, funcsym, force::Bool)
+    @nospecialize
     opts = JLOptions()
     if opts.depwarn == 2
         throw(ErrorException(msg))
@@ -271,14 +409,10 @@ getindex(match::Core.MethodMatch, field::Int) =
 # these were internal functions, but some packages seem to be relying on them
 tuple_type_head(T::Type) = fieldtype(T, 1)
 tuple_type_cons(::Type, ::Type{Union{}}) = Union{}
-function tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S
-    @_foldable_meta
+@assume_effects :foldable tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S =
     Tuple{S, T.parameters...}
-end
-function parameter_upper_bound(t::UnionAll, idx)
-    @_foldable_meta
-    return rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
-end
+@assume_effects :foldable parameter_upper_bound(t::UnionAll, idx) =
+    rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
 
 # these were internal functions, but some packages seem to be relying on them
 @deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
@@ -299,7 +433,8 @@ const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
 
 # the plan is to eventually overload getproperty to access entries of the dict
 @noinline function getproperty(x::Pairs, s::Symbol)
-    depwarn("use values(kwargs) and keys(kwargs) instead of kwargs.data and kwargs.itr", :getproperty, force=true)
+    s == :data && depwarn("use values(kwargs) instead of kwargs.data", :getproperty, force=true)
+    s == :itr && depwarn("use keys(kwargs) instead of kwargs.itr", :getproperty, force=true)
     return getfield(x, s)
 end
 
@@ -384,3 +519,16 @@ macro pure(ex)
 end
 
 # END 1.10 deprecations
+
+# BEGIN 1.11 deprecations
+
+# these were never a part of the public API and so they can be removed without deprecation
+# in a minor release but we're being nice and trying to avoid transient breakage.
+@deprecate permute!!(a, p::AbstractVector{<:Integer}) permute!(a, p) false
+@deprecate invpermute!!(a, p::AbstractVector{<:Integer}) invpermute!(a, p) false
+
+# END 1.11 deprecations
+
+# BEGIN 1.12 deprecations
+
+# END 1.12 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 8a78c1fa8da45..4a63ed364b64d 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -53,12 +53,20 @@ Dict{String, Int64} with 2 entries:
   "B" => 2
   "A" => 1
 ```
+
+!!! warning
+
+    Keys are allowed to be mutable, but if you do mutate stored
+    keys, the hash table may become internally inconsistent, in which case
+    the `Dict` will not work properly. [`IdDict`](@ref) can be an
+    alternative if you need to mutate keys.
+
 """
 mutable struct Dict{K,V} <: AbstractDict{K,V}
     # Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
-    slots::Vector{UInt8}
-    keys::Array{K,1}
-    vals::Array{V,1}
+    slots::Memory{UInt8}
+    keys::Memory{K}
+    vals::Memory{V}
     ndel::Int
     count::Int
     age::UInt
@@ -66,14 +74,16 @@ mutable struct Dict{K,V} <: AbstractDict{K,V}
     maxprobe::Int
 
     function Dict{K,V}() where V where K
-        n = 16
-        new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, n, 0)
+        n = 0
+        slots = Memory{UInt8}(undef,n)
+        fill!(slots, 0x0)
+        new(slots, Memory{K}(undef, n), Memory{V}(undef, n), 0, 0, 0, max(1, n), 0)
     end
     function Dict{K,V}(d::Dict{K,V}) where V where K
         new(copy(d.slots), copy(d.keys), copy(d.vals), d.ndel, d.count, d.age,
             d.idxfloor, d.maxprobe)
     end
-    function Dict{K, V}(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) where {K, V}
+    function Dict{K, V}(slots::Memory{UInt8}, keys::Memory{K}, vals::Memory{V}, ndel::Int, count::Int, age::UInt, idxfloor::Int, maxprobe::Int) where {K, V}
         new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe)
     end
 end
@@ -104,45 +114,7 @@ const AnyDict = Dict{Any,Any}
 Dict(ps::Pair{K,V}...) where {K,V} = Dict{K,V}(ps)
 Dict(ps::Pair...)                  = Dict(ps)
 
-function Dict(kv)
-    try
-        dict_with_eltype((K, V) -> Dict{K, V}, kv, eltype(kv))
-    catch
-        if !isiterable(typeof(kv)) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError("Dict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
-
-function grow_to!(dest::AbstractDict{K, V}, itr) where V where K
-    y = iterate(itr)
-    y === nothing && return dest
-    ((k,v), st) = y
-    dest2 = empty(dest, typeof(k), typeof(v))
-    dest2[k] = v
-    grow_to!(dest2, itr, st)
-end
-
-# this is a special case due to (1) allowing both Pairs and Tuples as elements,
-# and (2) Pair being invariant. a bit annoying.
-function grow_to!(dest::AbstractDict{K,V}, itr, st) where V where K
-    y = iterate(itr, st)
-    while y !== nothing
-        (k,v), st = y
-        if isa(k,K) && isa(v,V)
-            dest[k] = v
-        else
-            new = empty(dest, promote_typejoin(K,typeof(k)), promote_typejoin(V,typeof(v)))
-            merge!(new, dest)
-            new[k] = v
-            return grow_to!(new, itr, st)
-        end
-        y = iterate(itr, st)
-    end
-    return dest
-end
+Dict(kv) = dict_with_eltype((K, V) -> Dict{K, V}, kv, eltype(kv))
 
 empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()
 
@@ -171,17 +143,20 @@ end
     h.age += 1
     h.idxfloor = 1
     if h.count == 0
-        resize!(h.slots, newsz)
+        # TODO: tryresize
+        h.slots = Memory{UInt8}(undef, newsz)
         fill!(h.slots, 0x0)
-        resize!(h.keys, newsz)
-        resize!(h.vals, newsz)
+        h.keys = Memory{K}(undef, newsz)
+        h.vals = Memory{V}(undef, newsz)
         h.ndel = 0
+        h.maxprobe = 0
         return h
     end
 
-    slots = zeros(UInt8,newsz)
-    keys = Vector{K}(undef, newsz)
-    vals = Vector{V}(undef, newsz)
+    slots = Memory{UInt8}(undef, newsz)
+    fill!(slots, 0x0)
+    keys = Memory{K}(undef, newsz)
+    vals = Memory{V}(undef, newsz)
     age0 = h.age
     count = 0
     maxprobe = 0
@@ -215,13 +190,13 @@ end
     return h
 end
 
-function sizehint!(d::Dict{T}, newsz) where T
+function sizehint!(d::Dict{T}, newsz; shrink::Bool=true) where T
     oldsz = length(d.slots)
     # limit new element count to max_values of the key type
     newsz = min(max(newsz, length(d)), max_values(T)::Int)
     # need at least 1.5n space to hold n elements
     newsz = _tablesz(cld(3 * newsz, 2))
-    return newsz == oldsz ? d : rehash!(d, newsz)
+    return (shrink ? newsz == oldsz : newsz <= oldsz) ? d : rehash!(d, newsz)
 end
 
 """
@@ -245,19 +220,20 @@ Dict{String, Int64}()
 function empty!(h::Dict{K,V}) where V where K
     fill!(h.slots, 0x0)
     sz = length(h.slots)
-    empty!(h.keys)
-    empty!(h.vals)
-    resize!(h.keys, sz)
-    resize!(h.vals, sz)
+    for i in 1:sz
+        _unsetindex!(h.keys, i)
+        _unsetindex!(h.vals, i)
+    end
     h.ndel = 0
     h.count = 0
+    h.maxprobe = 0
     h.age += 1
-    h.idxfloor = sz
+    h.idxfloor = max(1, sz)
     return h
 end
 
 # get the index where a key is stored, or -1 if not present
-@assume_effects :terminates_locally function ht_keyindex(h::Dict{K,V}, key) where V where K
+function ht_keyindex(h::Dict{K,V}, key) where V where K
     isempty(h) && return -1
     sz = length(h.keys)
     iter = 0
@@ -266,9 +242,9 @@ end
     index, sh = hashindex(key, sz)
     keys = h.keys
 
-    @inbounds while true
+    @assume_effects :terminates_locally :noub @inbounds while true
         isslotempty(h,index) && return -1
-        if h.slots[index] == sh
+        if sh == h.slots[index]
             k = keys[index]
             if (key ===  k || isequal(key, k))
                 return index
@@ -288,6 +264,11 @@ end
 # This version is for use by setindex! and get!
 function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
     sz = length(h.keys)
+    if sz == 0 # if Dict was empty resize and then return location to insert
+        rehash!(h, 4)
+        index, sh = hashindex(key, length(h.keys))
+        return -index, sh
+    end
     iter = 0
     maxprobe = h.maxprobe
     index, sh = hashindex(key, sz)
@@ -353,7 +334,7 @@ ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
     # Rehash now if necessary
     if (h.count + h.ndel)*3 > sz*2
         # > 2/3 full (including tombstones)
-        rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
+        rehash!(h, h.count > 64000 ? h.count*2 : max(h.count*4, 4))
     end
     nothing
 end
@@ -364,7 +345,7 @@ function setindex!(h::Dict{K,V}, v0, key0) where V where K
     else
         key = convert(K, key0)::K
         if !(isequal(key, key0)::Bool)
-            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+            throw(KeyTypeError(K, key0))
         end
     end
     setindex!(h, v0, key)
@@ -462,7 +443,7 @@ function get!(default::Callable, h::Dict{K,V}, key0) where V where K
     else
         key = convert(K, key0)::K
         if !isequal(key, key0)
-            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+            throw(KeyTypeError(K, key0))
         end
     end
     return get!(default, h, key)
@@ -493,7 +474,7 @@ end
 
 function getindex(h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
-    @inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
+    return index < 0 ? throw(KeyError(key)) : @assume_effects :noub @inbounds h.vals[index]::V
 end
 
 """
@@ -724,6 +705,8 @@ end
 isempty(t::Dict) = (t.count == 0)
 length(t::Dict) = t.count
 
+@propagate_inbounds Iterators.only(t::Dict) = Iterators._only(t, first)
+
 @propagate_inbounds function Base.iterate(v::T, i::Int = v.dict.idxfloor) where T <: Union{KeySet{<:Any, <:Dict}, ValueIterator{<:Dict}}
     i == 0 && return nothing
     i = skip_deleted(v.dict, i)
@@ -761,7 +744,7 @@ function map!(f, iter::ValueIterator{<:Dict})
 end
 
 function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
-    haslength(d2) && sizehint!(d1, length(d1) + length(d2))
+    haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink=false)
     for (k, v) in d2
         i, sh = ht_keyindex2_shorthash!(d1, k)
         if i > 0
@@ -770,7 +753,7 @@ function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
             if !(k isa K)
                 k1 = convert(K, k)::K
                 if !isequal(k, k1)
-                    throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+                    throw(KeyTypeError(K, k))
                 end
                 k = k1
             end
@@ -869,3 +852,180 @@ empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 _similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
 _similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
     throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n  if calling map, consider a comprehension instead"))
+
+
+include("hamt.jl")
+using .HashArrayMappedTries
+using Core.OptimizedGenerics: KeyValue
+const HAMT = HashArrayMappedTries
+
+struct PersistentDict{K,V} <: AbstractDict{K,V}
+    trie::HAMT.HAMT{K,V}
+    # Serves as a marker for an empty initialization
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}) where {K, V}
+        new{K, V}(HAMT.HAMT{K,V}())
+    end
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}, ::Nothing, key, val) where {K, V}
+        new{K, V}(HAMT.HAMT{K, V}(key => val))
+    end
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key, val) where {K, V} = @inline _keyvalueset(dict, key, val)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K, val::V) where {K, V} = @inline _keyvalueset(dict, key, val)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key, val) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=#true)
+        HAMT.insert!(found, present, trie, i, bi, hs, val)
+        return new{K, V}(top)
+    end
+    @noinline Base.@assume_effects :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key) where {K, V} = @inline _keyvalueset(dict, key)
+    @noinline Base.@assume_effects :nothrow :effect_free :terminates_globally KeyValue.set(
+        dict::PersistentDict{K, V}, key::K) where {K, V} = @inline _keyvalueset(dict, key)
+    global function _keyvalueset(dict::PersistentDict{K, V}, key) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=#true)
+        if found && present
+            deleteat!(trie.data, i)
+            HAMT.unset!(trie, bi)
+        end
+        return new{K, V}(top)
+    end
+end
+
+"""
+    PersistentDict
+
+`PersistentDict` is a dictionary implemented as an hash array mapped trie,
+which is optimal for situations where you need persistence, each operation
+returns a new dictionary separate from the previous one, but the underlying
+implementation is space-efficient and may share storage across multiple
+separate dictionaries.
+
+!!! note
+    It behaves like an IdDict.
+
+```julia
+PersistentDict(KV::Pair)
+```
+
+# Examples
+
+```jldoctest
+julia> dict = Base.PersistentDict(:a=>1)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 1
+
+julia> dict2 = Base.delete(dict, :a)
+Base.PersistentDict{Symbol, Int64}()
+
+julia> dict3 = Base.PersistentDict(dict, :a=>2)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 2
+```
+"""
+PersistentDict
+
+PersistentDict{K,V}() where {K, V} = KeyValue.set(PersistentDict{K,V})
+function PersistentDict{K,V}(KV::Pair) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+function PersistentDict(KV::Pair{K,V}) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+PersistentDict(dict::PersistentDict, pair::Pair) = PersistentDict(dict, pair...)
+PersistentDict{K,V}(dict::PersistentDict{K,V}, pair::Pair) where {K,V} = PersistentDict(dict, pair...)
+
+
+function PersistentDict(dict::PersistentDict{K,V}, key, val) where {K,V}
+    key = convert(K, key)
+    val = convert(V, val)
+    return KeyValue.set(dict, key, val)
+end
+
+function PersistentDict{K,V}(KV::Pair, rest::Pair...) where {K,V}
+    dict = PersistentDict{K,V}(KV)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+function PersistentDict(kv::Pair, rest::Pair...)
+    dict = PersistentDict(kv)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+eltype(::PersistentDict{K,V}) where {K,V} = Pair{K,V}
+
+function in(key_val::Pair{K,V}, dict::PersistentDict{K,V}, valcmp=(==)) where {K,V}
+    key, val = key_val
+    found = KeyValue.get(dict, key)
+    found === nothing && return false
+    return valcmp(val, only(found))
+end
+
+function haskey(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.get(dict, key) !== nothing
+end
+
+function getindex(dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && throw(KeyError(key))
+    return only(found)
+end
+
+function get(dict::PersistentDict{K,V}, key::K, default) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default
+    return only(found)
+end
+
+@noinline function KeyValue.get(dict::PersistentDict{K, V}, key) where {K, V}
+    trie = dict.trie
+    if HAMT.islevel_empty(trie)
+        return nothing
+    end
+    h = HAMT.HashState(key)
+    found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
+    if found && present
+        leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
+        return (leaf.val,)
+    end
+    return nothing
+end
+
+@noinline function KeyValue.get(default, dict::PersistentDict, key)
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function get(default::Callable, dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function delete(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.set(dict, key)
+end
+
+iterate(dict::PersistentDict, state=nothing) = HAMT.iterate(dict.trie, state)
+
+length(dict::PersistentDict) = HAMT.length(dict.trie)
+isempty(dict::PersistentDict) = HAMT.isempty(dict.trie)
+empty(::PersistentDict, ::Type{K}, ::Type{V}) where {K, V} = PersistentDict{K, V}()
+
+@propagate_inbounds Iterators.only(dict::PersistentDict) = Iterators._only(dict, first)
diff --git a/base/div.jl b/base/div.jl
index 9c2187e662ee9..3fec8d2f5cdf3 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -22,6 +22,8 @@ See also [`fld`](@ref) and [`cld`](@ref), which are special cases of this functi
 
 # Examples:
 ```jldoctest
+julia> div(4, 3, RoundToZero) # Matches div(4, 3)
+1
 julia> div(4, 3, RoundDown) # Matches fld(4, 3)
 1
 julia> div(4, 3, RoundUp) # Matches cld(4, 3)
@@ -41,6 +43,21 @@ julia> div(4, 3, RoundFromZero)
 julia> div(-4, 3, RoundFromZero)
 -2
 ```
+Because `div(x, y)` implements strictly correct truncated rounding based on the true
+value of floating-point numbers, unintuitive situations can arise. For example:
+```jldoctest
+julia> div(6.0, 0.1)
+59.0
+julia> 6.0 / 0.1
+60.0
+julia> 6.0 / big(0.1)
+59.99999999999999666933092612453056361837965690217069245739573412231113406246995
+```
+What is happening here is that the true value of the floating-point number written
+as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
+the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
+than 60. When doing division, this is rounded to precisely `60.0`, but
+`div(6.0, 0.1, RoundToZero)` always truncates the true value, so the result is `59.0`.
 """
 div(x, y, r::RoundingMode)
 
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index e0733280e7c7d..61c0cf71e70c2 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -3,7 +3,7 @@
 """
     Docs
 
-The `Docs` module provides the `@doc` macro which can be used to set and retrieve
+The `Docs` module provides the [`@doc`](@ref) macro which can be used to set and retrieve
 documentation metadata for Julia objects.
 
 Please see the manual section on [documentation](@ref man-documentation) for more
@@ -19,8 +19,9 @@ module Docs
 Functions, methods and types can be documented by placing a string before the definition:
 
     \"\"\"
-    # The Foo Function
-    `foo(x)`: Foo the living hell out of `x`.
+        foo(x)
+
+    Return a fooified version of `x`.
     \"\"\"
     foo(x) = ...
 
@@ -33,8 +34,8 @@ The macro has special parsing so that the documented object may occur on the nex
 By default, documentation is written as Markdown, but any object can be used as
 the first argument.
 
-## Documenting objects after they are defined
-You can document an object after its definition by
+## Documenting objects separately from their definitions
+You can document an object before or after its definition with
 
     @doc "foo" function_to_doc
     @doc "bar" TypeToDoc
@@ -60,12 +61,12 @@ function.
 
 include("bindings.jl")
 
-import .Base.Meta: quot, isexpr
+import .Base.Meta: quot, isexpr, unblock, unescape, uncurly
 import .Base: Callable, with_output_color
 using .Base: RefValue, mapany
 import ..CoreDocs: lazy_iterpolate
 
-export doc
+export doc, hasdoc, undocumented_names
 
 # Basic API / Storage
 
@@ -194,7 +195,7 @@ docexpr(__source__, __module__, args...) = Expr(:call, docstr, args...)
 Stores a collection of docstrings for related objects, ie. a `Function`/`DataType` and
 associated `Method` objects.
 
-Each documented object in a `MultiDoc` is referred to by it's signature which is represented
+Each documented object in a `MultiDoc` is referred to by its signature which is represented
 by a `Union` of `Tuple` types. For example, the following `Method` definition
 
     f(x, y) = ...
@@ -243,7 +244,7 @@ function doc!(__module__::Module, b::Binding, str::DocStr, @nospecialize sig = U
             @warn "Replacing docs for `$b :: $sig` in module `$(__module__)`"
     else
         # The ordering of docstrings for each Binding is defined by the order in which they
-        # are initially added. Replacing a specific docstring does not change it's ordering.
+        # are initially added. Replacing a specific docstring does not change its ordering.
         push!(m.order, sig)
     end
     m.docs[sig] = str
@@ -285,29 +286,6 @@ catdoc(xs...) = vcat(xs...)
 
 const keywords = Dict{Symbol, DocStr}()
 
-function unblock(@nospecialize ex)
-    while isexpr(ex, :var"hygienic-scope")
-        isexpr(ex.args[1], :escape) || break
-        ex = ex.args[1].args[1]
-    end
-    isexpr(ex, :block) || return ex
-    exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args)
-    length(exs) == 1 || return ex
-    return unblock(exs[1])
-end
-
-# peek through ex to figure out what kind of expression it may eventually act like
-# but ignoring scopes and line numbers
-function unescape(@nospecialize ex)
-    ex = unblock(ex)
-    while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope")
-       ex = unblock(ex.args[1])
-    end
-    return ex
-end
-
-uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
-
 namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef}
 
 function astname(x::Expr, ismacro::Bool)
@@ -469,6 +447,52 @@ more than one expression is marked then the same docstring is applied to each ex
     end
 
 `@__doc__` has no effect when a macro that uses it is not documented.
+
+!!! compat "Julia 1.12"
+
+    This section documents a very subtle corner case that is only relevant to
+    macros which themselves both define other macros and then attempt to use them
+    within the same expansion. Such macros were impossible to write prior to
+    Julia 1.12 and are still quite rare. If you are not writing such a macro,
+    you may ignore this note.
+
+    In versions prior to Julia 1.12, macroexpansion would recursively expand through
+    `Expr(:toplevel)` blocks. This behavior was changed in Julia 1.12 to allow
+    macros to recursively define other macros and use them in the same returned
+    expression. However, to preserve backwards compatibility with existing uses of
+    `@__doc__`, the doc system will still expand through `Expr(:toplevel)` blocks
+    when looking for `@__doc__` markers. As a result, macro-defining-macros will
+    have an observable behavior difference when annotated with a docstring:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    ERROR: LoadError: UndefVarError: `@foo` not defined in `Main`
+    ```
+
+    The supported workaround is to manually expand the `@__doc__` macro in the
+    defining macro, which the docsystem will recognize and suppress the recursive
+    expansion:
+
+    ```julia
+    julia> macro macroception()
+        Expr(:toplevel,
+            macroexpand(__module__, :(@__doc__ macro foo() 1 end); recursive=false),
+            :(@foo))
+    end
+
+    julia> @macroception
+    1
+
+    julia> "Docstring" @macroception
+    1
+    ```
 """
 :(Core.@__doc__)
 
@@ -476,17 +500,23 @@ function __doc__!(source, mod, meta, def, define::Bool)
     @nospecialize source mod meta def
     # Two cases must be handled here to avoid redefining all definitions contained in `def`:
     if define
-        # `def` has not been defined yet (this is the common case, i.e. when not generating
-        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
-        finddoc(def) do each
+        function replace_meta_doc(each)
             each.head = :macrocall
             each.args = Any[Symbol("@doc"), source, mod, nothing, meta, each.args[end], define]
         end
+
+        # `def` has not been defined yet (this is the common case, i.e. when not generating
+        # the Base image). We just need to convert each `@__doc__` marker to an `@doc`.
+        found = finddoc(replace_meta_doc, mod, def; expand_toplevel = false)
+
+        if !found
+            found = finddoc(replace_meta_doc, mod, def; expand_toplevel = true)
+        end
     else
         # `def` has already been defined during Base image gen so we just need to find and
         # document any subexpressions marked with `@__doc__`.
         docs  = []
-        found = finddoc(def) do each
+        found = finddoc(mod, def; expand_toplevel = true) do each
             push!(docs, :(@doc($source, $mod, $meta, $(each.args[end]), $define)))
         end
         # If any subexpressions have been documented then replace the entire expression with
@@ -495,25 +525,30 @@ function __doc__!(source, mod, meta, def, define::Bool)
             def.head = :toplevel
             def.args = docs
         end
-        found
     end
+    return found
 end
 # Walk expression tree `def` and call `λ` when any `@__doc__` markers are found. Returns
 # `true` to signify that at least one `@__doc__` has been found, and `false` otherwise.
-function finddoc(λ, def::Expr)
+function finddoc(λ, mod::Module, def::Expr; expand_toplevel::Bool=false)
     if isexpr(def, :block, 2) && isexpr(def.args[1], :meta, 1) && (def.args[1]::Expr).args[1] === :doc
         # Found the macroexpansion of an `@__doc__` expression.
         λ(def)
         true
     else
+        if expand_toplevel && isexpr(def, :toplevel)
+            for i = 1:length(def.args)
+                def.args[i] = macroexpand(mod, def.args[i])
+            end
+        end
         found = false
         for each in def.args
-            found |= finddoc(λ, each)
+            found |= finddoc(λ, mod, each; expand_toplevel)
         end
         found
     end
 end
-finddoc(λ, @nospecialize def) = false
+finddoc(λ, mod::Module, @nospecialize def; expand_toplevel::Bool=false) = false
 
 # Predicates and helpers for `docm` expression selection:
 
@@ -528,14 +563,62 @@ isquotedmacrocall(@nospecialize x) =
 isbasicdoc(@nospecialize x) = isexpr(x, :.) || isa(x, Union{QuoteNode, Symbol})
 is_signature(@nospecialize x) = isexpr(x, :call) || (isexpr(x, :(::), 2) && isexpr(x.args[1], :call)) || isexpr(x, :where)
 
+function _doc(binding::Binding, sig::Type = Union{})
+    if defined(binding)
+        result = getdoc(resolve(binding), sig)
+        result === nothing || return result
+    end
+    # Lookup first match for `binding` and `sig` in all modules of the docsystem.
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
+        if haskey(dict, binding)
+            multidoc = dict[binding]
+            for msig in multidoc.order
+                sig <: msig && return multidoc.docs[msig]
+            end
+            # if no matching signatures, return first
+            if !isempty(multidoc.docs)
+                return first(values(multidoc.docs))
+            end
+        end
+    end
+    return nothing
+end
+
+# Some additional convenience `doc` methods that take objects rather than `Binding`s.
+_doc(obj::UnionAll) = _doc(Base.unwrap_unionall(obj))
+_doc(object, sig::Type = Union{}) = _doc(aliasof(object, typeof(object)), sig)
+_doc(object, sig...)              = _doc(object, Tuple{sig...})
+
+function simple_lookup_doc(ex)
+    if isa(ex, Expr) && ex.head !== :(.) && Base.isoperator(ex.head)
+        # handle syntactic operators, e.g. +=, ::, .=
+        ex = ex.head
+    end
+    if haskey(keywords, ex)
+        return keywords[ex]
+    elseif !isa(ex, Expr) && !isa(ex, Symbol)
+        return :($(_doc)($(typeof)($(esc(ex)))))
+    end
+    binding = esc(bindingexpr(namify(ex)))
+    if isexpr(ex, :call) || isexpr(ex, :macrocall) || isexpr(ex, :where)
+        sig = esc(signature(ex))
+        :($(_doc)($binding, $sig))
+    else
+        :($(_doc)($binding))
+    end
+end
+
 function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    elseif isassigned(Base.REPL_MODULE_REF)
+    elseif (REPL = Base.REPL_MODULE_REF[]) !== Base
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
-        REPL = Base.REPL_MODULE_REF[]
-        return REPL.lookup_doc(ex)
+        return invokelatest(REPL.lookup_doc, ex)
+    else
+        return simple_lookup_doc(ex)
     end
     return nothing
 end
@@ -551,8 +634,37 @@ iscallexpr(ex) = false
 function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true)
     @nospecialize meta ex
     # Some documented expressions may be decorated with macro calls which obscure the actual
-    # expression. Expand the macro calls and remove extra blocks.
-    x = unblock(macroexpand(mod, ex))
+    # expression. Expand the macro calls.
+    x = macroexpand(mod, ex)
+    return _docm(source, mod, meta, x, define)
+end
+
+function _docm(source::LineNumberNode, mod::Module, meta, x, define::Bool = true)
+    if isexpr(x, :var"hygienic-scope")
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :escape)
+        x.args[1] = _docm(source, mod, meta, x.args[1])
+        return x
+    elseif isexpr(x, :block)
+        docarg = 0
+        for i = 1:length(x.args)
+            isa(x.args[i], LineNumberNode) && continue
+            if docarg == 0
+                docarg = i
+                continue
+            end
+            # More than one documentable expression in the block, treat it as a whole
+            # expression, which will fall through and look for (Expr(:meta, doc))
+            docarg = 0
+            break
+        end
+        if docarg != 0
+            x.args[docarg] = _docm(source, mod, meta, x.args[docarg], define)
+            return x
+        end
+    end
+
     # Don't try to redefine expressions. This is only needed for `Base` img gen since
     # otherwise calling `loaddocs` would redefine all documented functions and types.
     def = define ? x : nothing
@@ -617,7 +729,7 @@ function docm(source::LineNumberNode, mod::Module, meta, ex, define::Bool = true
     # All other expressions are undocumentable and should be handled on a case-by-case basis
     # with `@__doc__`. Unbound string literals are also undocumentable since they cannot be
     # retrieved from the module's metadata `IdDict` without a reference to the string.
-    docerror(ex)
+    docerror(x)
 
     return doc
 end
@@ -638,21 +750,85 @@ include("utils.jl")
 # Swap out the bootstrap macro with the real one.
 Core.atdoc!(docm)
 
-function loaddocs(docs::Vector{Core.SimpleVector})
-    for (mod, ex, str, file, line) in docs
+function loaddocs(docs::Base.CoreDocs.DocLinkedList)
+    while isdefined(docs, :doc)
+        (mod, ex, str, file, line) = docs.doc
         data = Dict{Symbol,Any}(:path => string(file), :linenumber => line)
         doc = docstr(str, data)
         lno = LineNumberNode(line, file)
         docstring = docm(lno, mod, doc, ex, false) # expand the real @doc macro now
         Core.eval(mod, Expr(:var"hygienic-scope", docstring, Docs, lno))
+        docs = docs.next
     end
-    empty!(docs)
     nothing
 end
 
+# FIXME: formatdoc, parsedoc, apropos, and doc are defined here (but only doc is exported)
+# for historical reasons (#25738), but are *implemented* in REPL/src/docview.jl, while
+# apropos is *exported* by InteractiveUtils and doc is exported by Docs.  Seems
+# like a more sensible refactoring should be possible.
+
 function formatdoc end
 function parsedoc end
+
+"""
+    apropos([io::IO=stdout], pattern::Union{AbstractString,Regex})
+
+Search available docstrings for entries containing `pattern`.
+
+When `pattern` is a string, case is ignored. Results are printed to `io`.
+
+`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
+```
+help?> "pattern"
+```
+"""
 function apropos end
+
+"""
+    Docs.doc(binding, sig)
+
+Return all documentation that matches both `binding` and `sig`.
+
+If `getdoc` returns a non-`nothing` result on the value of the binding, then a
+dynamic docstring is returned instead of one based on the binding itself.
+"""
 function doc end
 
+"""
+    Docs.hasdoc(mod::Module, sym::Symbol)::Bool
+
+Return `true` if `sym` in `mod` has a docstring and `false` otherwise.
+"""
+hasdoc(mod::Module, sym::Symbol) = hasdoc(Docs.Binding(mod, sym))
+function hasdoc(binding::Docs.Binding, sig::Type = Union{})
+    # this function is based on the Base.Docs.doc method implemented
+    # in REPL/src/docview.jl.  TODO: refactor and unify these methods.
+    defined(binding) && !isnothing(getdoc(resolve(binding), sig)) && return true
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        !isnothing(dict) && haskey(dict, binding) && return true
+    end
+    alias = aliasof(binding)
+    return alias == binding ? false : hasdoc(alias, sig)
+end
+
+
+"""
+    undocumented_names(mod::Module; private=false)
+
+Return a sorted vector of undocumented symbols in `module` (that is, lacking docstrings).
+`private=false` (the default) returns only identifiers declared with `public` and/or
+`export`, whereas `private=true` returns all symbols in the module (excluding
+compiler-generated hidden symbols starting with `#`).
+
+See also: [`names`](@ref), [`Docs.hasdoc`](@ref), [`Base.ispublic`](@ref).
+"""
+function undocumented_names(mod::Module; private::Bool=false)
+    filter!(names(mod; all=true)) do sym
+        !hasdoc(mod, sym) && !startswith(string(sym), '#') &&
+            (private || Base.ispublic(mod, sym))
+    end
+end
+
 end
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index fd8c35a5fdf76..88ed34de02b64 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
 
 module BaseDocs
 
@@ -36,6 +37,14 @@ kw"help", kw"Julia", kw"julia", kw""
 available for direct use. Names can also be used via dot syntax (e.g. `Foo.foo` to access
 the name `foo`), whether they are `export`ed or not.
 See the [manual section about modules](@ref modules) for details.
+
+!!! note
+    When two or more packages/modules export a name and that name does not refer to the
+    same thing in each of the packages, and the packages are loaded via `using` without
+    an explicit list of names, it is an error to reference that name without qualification.
+    It is thus recommended that code intended to be forward-compatible with future versions
+    of its dependencies and of Julia, e.g., code in released packages, list the names it
+    uses from each loaded package, e.g., `using Foo: Foo, f` rather than `using Foo`.
 """
 kw"using"
 
@@ -52,13 +61,30 @@ kw"import"
 """
     export
 
-`export` is used within modules to tell Julia which functions should be
+`export` is used within modules to tell Julia which names should be
 made available to the user. For example: `export foo` makes the name
 `foo` available when [`using`](@ref) the module.
 See the [manual section about modules](@ref modules) for details.
 """
 kw"export"
 
+"""
+    public
+
+`public` is used within modules to tell Julia which names are part of the
+public API of the module. For example: `public foo` indicates that the name
+`foo` is public, without making it available when [`using`](@ref) the module.
+
+As [`export`](@ref) already indicates that a name is public, it is
+unnecessary and an error to declare a name both as `public` and as `export`ed.
+See the [manual section about modules](@ref modules) for details.
+
+!!! compat "Julia 1.11"
+    The public keyword was added in Julia 1.11. Prior to this the notion
+    of publicness was less explicit.
+"""
+kw"public"
+
 """
     as
 
@@ -101,7 +127,7 @@ kw"abstract type", kw"abstract"
 
 `module` declares a [`Module`](@ref), which is a separate global variable workspace. Within a
 module, you can control which names from other modules are visible (via importing), and
-specify which of your names are intended to be public (via exporting).
+specify which of your names are intended to be public (via `export` and `public`).
 Modules allow you to create top-level definitions without worrying about name conflicts
 when your code is used together with somebody else’s.
 See the [manual section about modules](@ref modules) for more details.
@@ -135,6 +161,8 @@ runtime initialization functions of external C libraries and initializing global
 that involve pointers returned by external libraries.
 See the [manual section about modules](@ref modules) for more details.
 
+See also: [`OncePerProcess`](@ref).
+
 # Examples
 ```julia
 const foo_data_ptr = Ref{Ptr{Cvoid}}(0)
@@ -151,7 +179,7 @@ kw"__init__"
     baremodule
 
 `baremodule` declares a module that does not contain `using Base` or local definitions of
-[`eval`](@ref Base.MainInclude.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
+[`eval`](@ref Main.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
 
 ```julia
 module Mod
@@ -203,7 +231,7 @@ kw"primitive type"
 A macro maps a sequence of argument expressions to a returned expression, and the
 resulting expression is substituted directly into the program at the point where
 the macro is invoked.
-Macros are a way to run generated code without calling [`eval`](@ref Base.MainInclude.eval),
+Macros are a way to run generated code without calling [`eval`](@ref Main.eval),
 since the generated code instead simply becomes part of the surrounding program.
 Macro arguments may include expressions, literal values, and symbols. Macros can be defined for
 variable number of arguments (varargs), but do not accept keyword arguments.
@@ -378,11 +406,11 @@ Assigning `a` to `b` does not create a copy of `b`; instead use [`copy`](@ref) o
 
 ```jldoctest
 julia> b = [1]; a = b; b[1] = 2; a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  2
 
 julia> b = [1]; a = copy(b); b[1] = 2; a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  1
 
 ```
@@ -392,7 +420,7 @@ julia> function f!(x); x[:] .+= 1; end
 f! (generic function with 1 method)
 
 julia> a = [1]; f!(a); a
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  2
 
 ```
@@ -411,7 +439,7 @@ julia> a, b
 Assignment can operate on multiple variables in series, and will return the value of the right-hand-most expression:
 ```jldoctest
 julia> a = [1]; b = [2]; c = [3]; a = b = c
-1-element Array{Int64, 1}:
+1-element Vector{Int64}:
  3
 
 julia> b[1] = 2; a, b, c
@@ -421,11 +449,11 @@ julia> b[1] = 2; a, b, c
 Assignment at out-of-bounds indices does not grow a collection. If the collection is a [`Vector`](@ref) it can instead be grown with [`push!`](@ref) or [`append!`](@ref).
 ```jldoctest
 julia> a = [1, 1]; a[3] = 2
-ERROR: BoundsError: attempt to access 2-element Array{Int64, 1} at index [3]
+ERROR: BoundsError: attempt to access 2-element Vector{Int64} at index [3]
 [...]
 
 julia> push!(a, 2, 3)
-4-element Array{Int64, 1}:
+4-element Vector{Int64}:
  1
  1
  2
@@ -439,7 +467,7 @@ ERROR: DimensionMismatch: tried to assign 0 elements to 1 destinations
 [...]
 
 julia> filter!(x -> x > 1, a) # in-place & thus more efficient than a = a[a .> 1]
-2-element Array{Int64, 1}:
+2-element Vector{Int64}:
  2
  3
 
@@ -462,14 +490,14 @@ assignment expression is converted into a single loop.
 julia> A = zeros(4, 4); B = [1, 2, 3, 4];
 
 julia> A .= B
-4×4 Array{Float64, 2}:
+4×4 Matrix{Float64}:
  1.0  1.0  1.0  1.0
  2.0  2.0  2.0  2.0
  3.0  3.0  3.0  3.0
  4.0  4.0  4.0  4.0
 
 julia> A
-4×4 Array{Float64, 2}:
+4×4 Matrix{Float64}:
  1.0  1.0  1.0  1.0
  2.0  2.0  2.0  2.0
  3.0  3.0  3.0  3.0
@@ -645,8 +673,11 @@ kw"{", kw"{}", kw"}"
 """
     []
 
-Square braces are used for [indexing](@ref man-array-indexing), [indexed assignment](@ref man-indexed-assignment),
-[array literals](@ref man-array-literals), and [array comprehensions](@ref man-comprehensions).
+Square brackets are used for [indexing](@ref man-array-indexing) ([`getindex`](@ref)),
+[indexed assignment](@ref man-indexed-assignment) ([`setindex!`](@ref)),
+[array literals](@ref man-array-literals) ([`Base.vect`](@ref)),
+[array concatenation](@ref man-array-concatenation) ([`vcat`](@ref), [`hcat`](@ref), [`hvcat`](@ref), [`hvncat`](@ref)),
+and [array comprehensions](@ref man-comprehensions) ([`collect`](@ref)).
 """
 kw"[", kw"[]", kw"]"
 
@@ -916,11 +947,14 @@ expression, rather than the side effects that evaluating `b` or `c` may have.
 See the manual section on [control flow](@ref man-conditional-evaluation) for more details.
 
 # Examples
-```
+```jldoctest
 julia> x = 1; y = 2;
 
-julia> x > y ? println("x is larger") : println("y is larger")
-y is larger
+julia> x > y ? println("x is larger") : println("x is not larger")
+x is not larger
+
+julia> x > y ? "x is larger" : x == y ? "x and y are equal" : "y is larger"
+"y is larger"
 ```
 """
 kw"?", kw"?:"
@@ -984,12 +1018,12 @@ collection or the last index of a dimension of an array.
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
-2×2 Array{Int64, 2}:
+2×2 Matrix{Int64}:
  1  2
  3  4
 
 julia> A[end, :]
-2-element Array{Int64, 1}:
+2-element Vector{Int64}:
  3
  4
 ```
@@ -1031,13 +1065,42 @@ exception object to the given variable within the `catch` block.
 
 The power of the `try`/`catch` construct lies in the ability to unwind a deeply
 nested computation immediately to a much higher level in the stack of calling functions.
+
+A `try/catch` block can also have an `else` clause that executes only if no exception occurred:
+```julia
+try
+    a_dangerous_operation()
+catch
+    @warn "The operation failed."
+else
+    @info "The operation succeeded."
+end
+```
+
+A `try` or `try`/`catch` block can also have a [`finally`](@ref) clause that executes
+at the end, regardless of whether an exception occurred.  For example, this can be
+used to guarantee that an opened file is closed:
+```julia
+f = open("file")
+try
+    operate_on_file(f)
+catch
+    @warn "An error occurred!"
+finally
+    close(f)
+end
+```
+(`finally` can also be used without a `catch` block.)
+
+!!! compat "Julia 1.8"
+    Else clauses require at least Julia 1.8.
 """
 kw"try", kw"catch"
 
 """
     finally
 
-Run some code when a given block of code exits, regardless
+Run some code when a given `try` block of code exits, regardless
 of how it exits. For example, here is how we can guarantee that an opened file is
 closed:
 
@@ -1242,6 +1305,12 @@ kw";"
 
 Short-circuiting boolean AND.
 
+This is equivalent to `x ? y : false`: it returns `false` if `x` is `false` and the result of evaluating `y` if `x` is `true`.
+Note that if `y` is an expression, it is only evaluated when `x` is `true`, which is called "short-circuiting" behavior.
+
+Also, `y` does not need to have a boolean value.  This means that `(condition) && (statement)` can be used as shorthand for
+`if condition; statement; end` for an arbitrary `statement`.
+
 See also [`&`](@ref), the ternary operator `? :`, and the manual section on [control flow](@ref man-conditional-evaluation).
 
 # Examples
@@ -1253,6 +1322,9 @@ true
 
 julia> x < 0 && error("expected positive x")
 false
+
+julia> x > 0 && "not a boolean"
+"not a boolean"
 ```
 """
 kw"&&"
@@ -1262,6 +1334,12 @@ kw"&&"
 
 Short-circuiting boolean OR.
 
+This is equivalent to `x ? true : y`: it returns `true` if `x` is `true` and the result of evaluating `y` if `x` is `false`.
+Note that if `y` is an expression, it is only evaluated when `x` is `false`, which is called "short-circuiting" behavior.
+
+Also, `y` does not need to have a boolean value.  This means that `(condition) || (statement)` can be used as shorthand for
+`if !(condition); statement; end` for an arbitrary `statement`.
+
 See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
 
 # Examples
@@ -1271,6 +1349,9 @@ true
 
 julia> false || true || println("neither is true!")
 true
+
+julia> pi < 3 || "not a boolean"
+"not a boolean"
 ```
 """
 kw"||"
@@ -1315,7 +1396,11 @@ a tuple of types. All types, as well as the LLVM code, should be specified as li
 not as variables or expressions (it may be necessary to use `@eval` to generate these
 literals).
 
-See `test/llvmcall.jl` for usage examples.
+[Opaque pointers](https://llvm.org/docs/OpaquePointers.html) (written as `ptr`) are not allowed in the LLVM code.
+
+See
+[`test/llvmcall.jl`](https://github.com/JuliaLang/julia/blob/v$VERSION/test/llvmcall.jl)
+for usage examples.
 """
 Core.Intrinsics.llvmcall
 
@@ -1335,17 +1420,21 @@ Usually `begin` will not be necessary, since keywords such as [`function`](@ref)
 implicitly begin blocks of code. See also [`;`](@ref).
 
 `begin` may also be used when indexing to represent the first index of a
-collection or the first index of a dimension of an array.
+collection or the first index of a dimension of an array. For example,
+`a[begin]` is the first element of an array `a`.
+
+!!! compat "Julia 1.4"
+    Use of `begin` as an index requires Julia 1.4 or later.
 
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
-2×2 Array{Int64,2}:
+2×2 Matrix{Int64}:
  1  2
  3  4
 
 julia> A[begin, :]
-2-element Array{Int64,1}:
+2-element Matrix{Int64}:
  1
  2
 ```
@@ -1396,8 +1485,20 @@ kw"struct"
     mutable struct
 
 `mutable struct` is similar to [`struct`](@ref), but additionally allows the
-fields of the type to be set after construction. See the manual section on
-[Composite Types](@ref) for more information.
+fields of the type to be set after construction.
+
+Individual fields of a mutable struct can be marked as `const` to make them immutable:
+
+```julia
+mutable struct Baz
+    a::Int
+    const b::Float64
+end
+```
+!!! compat "Julia 1.8"
+    The `const` keyword for fields of mutable structs requires at least Julia 1.8.
+
+See the manual section on [Composite Types](@ref) for more information.
 """
 kw"mutable struct"
 
@@ -1414,7 +1515,7 @@ kw"new"
 """
     where
 
-The `where` keyword creates a type that is an iterated union of other types, over all
+The `where` keyword creates a [`UnionAll`](@ref) type, which may be thought of as an iterated union of other types, over all
 values of some variable. For example `Vector{T} where T<:Real` includes all [`Vector`](@ref)s
 where the element type is some kind of `Real` number.
 
@@ -1499,6 +1600,8 @@ Nothing
 The singleton instance of type [`Nothing`](@ref), used by convention when there is no value to return
 (as in a C `void` function) or when a variable or field holds no value.
 
+A return value of `nothing` is not displayed by the REPL and similar interactive environments.
+
 See also: [`isnothing`](@ref), [`something`](@ref), [`missing`](@ref).
 """
 nothing
@@ -1601,6 +1704,34 @@ julia> ex.msg
 """
 ErrorException
 
+"""
+    FieldError(type::DataType, field::Symbol)
+
+An operation tried to access invalid `field` on an object of `type`.
+
+!!! compat "Julia 1.12"
+    Prior to Julia 1.12, invalid field access threw an [`ErrorException`](@ref)
+
+See [`getfield`](@ref)
+
+# Examples
+```jldoctest
+julia> struct AB
+          a::Float32
+          b::Float64
+       end
+
+julia> ab = AB(1, 3)
+AB(1.0f0, 3.0)
+
+julia> ab.c # field `c` doesn't exist
+ERROR: FieldError: type AB has no field `c`, available fields: `a`, `b`
+Stacktrace:
+[...]
+```
+"""
+FieldError
+
 """
     WrappedException(msg)
 
@@ -1738,12 +1869,22 @@ Stacktrace:
 DomainError
 
 """
-    Task(func)
+    Task(func[, reserved_stack::Int])
 
 Create a `Task` (i.e. coroutine) to execute the given function `func` (which
 must be callable with no arguments). The task exits when this function returns.
 The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
+The optional `reserved_stack` argument specifies the size of the stack available
+for this task, in bytes. The default, `0`, uses the system-dependent stack size default.
+
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a() = sum(i for i in 1:1000);
@@ -1794,14 +1935,14 @@ In these examples, `a` is a [`Rational`](@ref), which has two fields.
 nfields
 
 """
-    UndefVarError(var::Symbol)
+    UndefVarError(var::Symbol, [scope])
 
 A symbol in the current scope is not defined.
 
 # Examples
 ```jldoctest
 julia> a
-ERROR: UndefVarError: `a` not defined
+ERROR: UndefVarError: `a` not defined in `Main`
 
 julia> a = 1;
 
@@ -1889,21 +2030,49 @@ applicable
 
 """
     invoke(f, argtypes::Type, args...; kwargs...)
+    invoke(f, argtypes::Method, args...; kwargs...)
+    invoke(f, argtypes::CodeInstance, args...; kwargs...)
 
 Invoke a method for the given generic function `f` matching the specified types `argtypes` on the
 specified arguments `args` and passing the keyword arguments `kwargs`. The arguments `args` must
 conform with the specified types in `argtypes`, i.e. conversion is not automatically performed.
 This method allows invoking a method other than the most specific matching method, which is useful
 when the behavior of a more general definition is explicitly needed (often as part of the
-implementation of a more specific method of the same function).
+implementation of a more specific method of the same function). However, because this means
+the runtime must do more work, `invoke` is generally also slower--sometimes significantly
+so--than doing normal dispatch with a regular call.
 
-Be careful when using `invoke` for functions that you don't write.  What definition is used
+Be careful when using `invoke` for functions that you don't write. What definition is used
 for given `argtypes` is an implementation detail unless the function is explicitly states
 that calling with certain `argtypes` is a part of public API.  For example, the change
 between `f1` and `f2` in the example below is usually considered compatible because the
 change is invisible by the caller with a normal (non-`invoke`) call.  However, the change is
 visible if you use `invoke`.
 
+# Passing a `Method` instead of a signature
+The `argtypes` argument may be a `Method`, in which case the ordinary method table lookup is
+bypassed entirely and the given method is invoked directly. Needing this feature is uncommon.
+Note in particular that the specified `Method` may be entirely unreachable from ordinary dispatch
+(or ordinary invoke), e.g. because it was replaced or fully covered by more specific methods.
+If the method is part of the ordinary method table, this call behaves similar
+to `invoke(f, method.sig, args...)`.
+
+!!! compat "Julia 1.12"
+    Passing a `Method` requires Julia 1.12.
+
+# Passing a `CodeInstance` instead of a signature
+The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization.
+The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s
+`invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its
+parent `MethodInstance` or from a world age not included in the `min_world`/`max_world` range.
+It is undefined behavior to invoke a `CodeInstance` whose behavior does not match the constraints
+specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated
+by external compilers), it may be an error to invoke them after passing through precompilation.
+This is an advanced interface intended for use with external compiler plugins.
+
+!!! compat "Julia 1.12"
+    Passing a `CodeInstance` requires Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> f(x::Real) = x^2;
@@ -2003,7 +2172,21 @@ AbstractFloat
 """
     Integer <: Real
 
-Abstract supertype for all integers.
+Abstract supertype for all integers (e.g. [`Signed`](@ref), [`Unsigned`](@ref), and [`Bool`](@ref)).
+
+See also [`isinteger`](@ref), [`trunc`](@ref), [`div`](@ref).
+
+# Examples
+```
+julia> 42 isa Integer
+true
+
+julia> 1.0 isa Integer
+false
+
+julia> isinteger(1.0)
+true
+```
 """
 Integer
 
@@ -2018,6 +2201,21 @@ Signed
     Unsigned <: Integer
 
 Abstract supertype for all unsigned integers.
+
+Built-in unsigned integers are printed in hexadecimal, with prefix `0x`,
+and can be entered in the same way.
+
+# Examples
+```
+julia> typemax(UInt8)
+0xff
+
+julia> Int(0x00d)
+13
+
+julia> unsigned(true)
+0x0000000000000001
+```
 """
 Unsigned
 
@@ -2028,57 +2226,147 @@ Boolean type, containing the values `true` and `false`.
 
 `Bool` is a kind of number: `false` is numerically
 equal to `0` and `true` is numerically equal to `1`.
-Moreover, `false` acts as a multiplicative "strong zero":
+Moreover, `false` acts as a multiplicative "strong zero"
+against [`NaN`](@ref) and [`Inf`](@ref):
 
 ```jldoctest
-julia> false == 0
+julia> [true, false] == [1, 0]
 true
 
-julia> true == 1
-true
+julia> 42.0 + true
+43.0
 
-julia> 0 * NaN
-NaN
+julia> 0 .* (NaN, Inf, -Inf)
+(NaN, NaN, NaN)
 
-julia> false * NaN
-0.0
+julia> false .* (NaN, Inf, -Inf)
+(0.0, 0.0, -0.0)
 ```
 
-See also: [`digits`](@ref), [`iszero`](@ref), [`NaN`](@ref).
+Branches via [`if`](@ref) and other conditionals only accept `Bool`.
+There are no "truthy" values in Julia.
+
+Comparisons typically return `Bool`, and broadcasted comparisons may
+return [`BitArray`](@ref) instead of an `Array{Bool}`.
+
+```jldoctest
+julia> [1 2 3 4 5] .< pi
+1×5 BitMatrix:
+ 1  1  1  0  0
+
+julia> map(>(pi), [1 2 3 4 5])
+1×5 Matrix{Bool}:
+ 0  0  0  1  1
+```
+
+See also [`trues`](@ref), [`falses`](@ref), [`ifelse`](@ref).
 """
 Bool
 
-for (bit, sign, exp, frac) in ((16, 1, 5, 10), (32, 1, 8, 23), (64, 1, 11, 52))
-    @eval begin
-        """
-            Float$($bit) <: AbstractFloat
+"""
+    Float64 <: AbstractFloat <: Real
 
-        $($bit)-bit floating point number type (IEEE 754 standard).
+64-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 11 exponent, 52 fraction bits.
+See [`bitstring`](@ref), [`signbit`](@ref), [`exponent`](@ref), [`frexp`](@ref),
+and [`significand`](@ref) to access various bits.
 
-        Binary format: $($sign) sign, $($exp) exponent, $($frac) fraction bits.
-        """
-        $(Symbol("Float", bit))
-    end
-end
+This is the default for floating point literals, `1.0 isa Float64`,
+and for many operations such as `1/2, 2pi, log(2), range(0,90,length=4)`.
+Unlike integers, this default does not change with `Sys.WORD_SIZE`.
+
+The exponent for scientific notation can be entered as `e` or `E`,
+thus `2e3 === 2.0E3 === 2.0 * 10^3`. Doing so is strongly preferred over
+`10^n` because integers overflow, thus `2.0 * 10^19 < 0` but `2e19 > 0`.
+
+See also [`Inf`](@ref), [`NaN`](@ref), [`floatmax`](@ref), [`Float32`](@ref), [`Complex`](@ref).
+"""
+Float64
+
+"""
+    Float32 <: AbstractFloat <: Real
+
+32-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 8 exponent, 23 fraction bits.
+
+The exponent for scientific notation should be entered as lower-case `f`,
+thus `2f3 === 2.0f0 * 10^3 === Float32(2_000)`.
+For array literals and comprehensions, the element type can be specified before
+the square brackets: `Float32[1,4,9] == Float32[i^2 for i in 1:3]`.
+
+See also [`Inf32`](@ref), [`NaN32`](@ref), [`Float16`](@ref), [`exponent`](@ref), [`frexp`](@ref).
+"""
+Float32
+
+"""
+    Float16 <: AbstractFloat <: Real
+
+16-bit floating point number type (IEEE 754 standard).
+Binary format is 1 sign, 5 exponent, 10 fraction bits.
+"""
+Float16
 
 for bit in (8, 16, 32, 64, 128)
+    type = Symbol(:Int, bit)
+    srange = bit > 31 ? "" : "Represents numbers `n ∈ " * repr(eval(:(typemin($type):typemax($type)))) * "`.\n"
+    unshow = repr(eval(Symbol(:UInt, bit))(bit-1))
+
     @eval begin
         """
-            Int$($bit) <: Signed
+            Int$($bit) <: Signed <: Integer
 
         $($bit)-bit signed integer type.
+
+        $($(srange))Note that such integers overflow without warning,
+        thus `typemax($($type)) + $($type)(1) < 0`.
+
+        See also [`Int`](@ref $Int), [`widen`](@ref), [`BigInt`](@ref).
         """
         $(Symbol("Int", bit))
 
         """
-            UInt$($bit) <: Unsigned
+            UInt$($bit) <: Unsigned <: Integer
 
         $($bit)-bit unsigned integer type.
+
+        Printed in hexadecimal, thus $($(unshow)) == $($(bit-1)).
         """
         $(Symbol("UInt", bit))
     end
 end
 
+"""
+    Int
+
+Sys.WORD_SIZE-bit signed integer type, `Int <: Signed <: Integer <: Real`.
+
+This is the default type of most integer literals and is an alias for either `Int32`
+or `Int64`, depending on `Sys.WORD_SIZE`. It is the type returned by functions such as
+[`length`](@ref), and the standard type for indexing arrays.
+
+Note that integers overflow without warning, thus `typemax(Int) + 1 < 0` and `10^19 < 0`.
+Overflow can be avoided by using [`BigInt`](@ref).
+Very large integer literals will use a wider type, for instance `10_000_000_000_000_000_000 isa Int128`.
+
+Integer division is [`div`](@ref) alias `÷`,
+whereas [`/`](@ref) acting on integers returns [`Float64`](@ref).
+
+See also [`$(Symbol("Int", Sys.WORD_SIZE))`](@ref), [`widen`](@ref), [`typemax`](@ref), [`bitstring`](@ref).
+"""
+Int
+
+"""
+    UInt
+
+Sys.WORD_SIZE-bit unsigned integer type, `UInt <: Unsigned <: Integer`.
+
+Like [`Int`](@ref Int), the alias `UInt` may point to either `UInt32` or `UInt64`,
+according to the value of `Sys.WORD_SIZE` on a given computer.
+
+Printed and parsed in hexadecimal: `UInt(15) === $(repr(UInt(15)))`.
+"""
+UInt
+
 """
     Symbol
 
@@ -2211,11 +2499,14 @@ setfield!
     swapfield!(value, name::Symbol, x, [order::Symbol])
     swapfield!(value, i::Int, x, [order::Symbol])
 
-These atomically perform the operations to simultaneously get and set a field:
+Atomically perform the operations to simultaneously get and set a field:
 
     y = getfield(value, name)
     setfield!(value, name, x)
     return y
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 swapfield!
 
@@ -2223,7 +2514,7 @@ swapfield!
     modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
     modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
 
-These atomically perform the operations to get and set a field after applying
+Atomically perform the operations to get and set a field after applying
 the function `op`.
 
     y = getfield(value, name)
@@ -2233,6 +2524,9 @@ the function `op`.
 
 If supported by the hardware (for example, atomic increment), this may be
 optimized to the appropriate hardware instruction, otherwise it'll use a loop.
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 modifyfield!
 
@@ -2242,7 +2536,7 @@ modifyfield!
     replacefield!(value, i::Int, expected, desired,
                   [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
 
-These atomically perform the operations to get and conditionally set a field to
+Atomically perform the operations to get and conditionally set a field to
 a given value.
 
     y = getfield(value, name, fail_order)
@@ -2254,9 +2548,30 @@ a given value.
 
 If supported by the hardware, this may be optimized to the appropriate hardware
 instruction, otherwise it'll use a loop.
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
 """
 replacefield!
 
+"""
+    setfieldonce!(value, name::Union{Int,Symbol}, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> success::Bool
+
+Atomically perform the operations to set a field to
+a given value, only if it was previously not set.
+
+    ok = !isdefined(value, name, fail_order)
+    if ok
+        setfield!(value, name, desired, success_order)
+    end
+    return ok
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+setfieldonce!
+
 """
     getglobal(module::Module, name::Symbol, [order::Symbol=:monotonic])
 
@@ -2297,6 +2612,7 @@ julia> getglobal(M, :a)
 """
 getglobal
 
+
 """
     setglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
 
@@ -2318,11 +2634,17 @@ cases.
 See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref)
 
 # Examples
-```jldoctest
-julia> module M end;
+```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*\\n.*)*"
+julia> module M; global a; end;
 
 julia> M.a  # same as `getglobal(M, :a)`
-ERROR: UndefVarError: `a` not defined
+ERROR: UndefVarError: `a` not defined in `M`
+Suggestion: add an appropriate import or assignment. This global was declared but not assigned.
+Stacktrace:
+ [1] getproperty(x::Module, f::Symbol)
+   @ Base ./Base_compiler.jl:40
+ [2] top-level scope
+   @ none:1
 
 julia> setglobal!(M, :a, 1)
 1
@@ -2333,6 +2655,69 @@ julia> M.a
 """
 setglobal!
 
+"""
+    Core.get_binding_type(module::Module, name::Symbol)
+
+Retrieve the declared type of the binding `name` from the module `module`.
+
+!!! compat "Julia 1.9"
+    This function requires Julia 1.9 or later.
+"""
+Core.get_binding_type
+
+"""
+    swapglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
+
+Atomically perform the operations to simultaneously get and set a global.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`swapproperty!`](@ref Base.swapproperty!) and [`setglobal!`](@ref).
+"""
+swapglobal!
+
+"""
+    modifyglobal!(module::Module, name::Symbol, op, x, [order::Symbol=:monotonic]) -> Pair
+
+Atomically perform the operations to get and set a global after applying
+the function `op`.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`modifyproperty!`](@ref Base.modifyproperty!) and [`setglobal!`](@ref).
+"""
+modifyglobal!
+
+"""
+    replaceglobal!(module::Module, name::Symbol, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+
+Atomically perform the operations to get and conditionally set a global to
+a given value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`replaceproperty!`](@ref Base.replaceproperty!) and [`setglobal!`](@ref).
+"""
+replaceglobal!
+
+"""
+    setglobalonce!(module::Module, name::Symbol, value,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> success::Bool
+
+Atomically perform the operations to set a global to
+a given value, only if it was previously not set.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`setpropertyonce!`](@ref Base.setpropertyonce!) and [`setglobal!`](@ref).
+"""
+setglobalonce!
+
 """
     typeof(x)
 
@@ -2369,6 +2754,9 @@ compatible with the stores to that location. Otherwise, if not declared as
 
 To test whether an array element is defined, use [`isassigned`](@ref) instead.
 
+The global variable variant is supported for compatibility with older julia
+releases. For new code, prefer [`isdefinedglobal`](@ref).
+
 See also [`@isdefined`](@ref).
 
 # Examples
@@ -2397,6 +2785,73 @@ false
 isdefined
 
 
+"""
+    isdefinedglobal(m::Module, s::Symbol, [allow_import::Bool=true, [order::Symbol=:unordered]])
+
+Tests whether a global variable `s` is defined in module `m` (in the current world age).
+A variable is considered defined if and only if a value may be read from this global variable
+and an access will not throw. This includes both constants and global variables that have
+a value set.
+
+If `allow_import` is `false`, the global variable must be defined inside `m`
+and may not be imported from another module.
+
+See also [`@isdefined`](@ref).
+
+# Examples
+```jldoctest
+julia> isdefinedglobal(Base, :sum)
+true
+
+julia> isdefinedglobal(Base, :NonExistentMethod)
+false
+
+julia> isdefinedglobal(Base, :sum, false)
+true
+
+julia> isdefinedglobal(Main, :sum, false)
+false
+```
+"""
+isdefinedglobal
+
+"""
+    Memory{T}(undef, n)
+
+Construct an uninitialized [`Memory{T}`](@ref) of length `n`. All Memory
+objects of length 0 might alias, since there is no reachable mutable content
+from them.
+
+# Examples
+```julia-repl
+julia> Memory{Float64}(undef, 3)
+3-element Memory{Float64}:
+ 6.90966e-310
+ 6.90966e-310
+ 6.90966e-310
+```
+"""
+Memory{T}(::UndefInitializer, n)
+
+"""
+    memoryref(::GenericMemory)
+
+Construct a `GenericMemoryRef` from a memory object. This does not fail, but the
+resulting memory will point out-of-bounds if and only if the memory is empty.
+"""
+memoryref(::GenericMemory)
+
+"""
+    memoryref(::GenericMemory, index::Integer)
+    memoryref(::GenericMemoryRef, index::Integer)
+
+Construct a `GenericMemoryRef` from a memory object and an offset index (1-based) which
+can also be negative. This always returns an inbounds object, and will throw an
+error if that is not possible (because the index would result in a shift
+out-of-bounds of the underlying memory).
+"""
+memoryref(::Union{GenericMemory,GenericMemoryRef}, ::Integer)
+
 """
     Vector{T}(undef, n)
 
@@ -2405,7 +2860,7 @@ Construct an uninitialized [`Vector{T}`](@ref) of length `n`.
 # Examples
 ```julia-repl
 julia> Vector{Float64}(undef, 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  6.90966e-310
  6.90966e-310
  6.90966e-310
@@ -2455,7 +2910,7 @@ Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`.
 # Examples
 ```julia-repl
 julia> Matrix{Float64}(undef, 2, 3)
-2×3 Array{Float64, 2}:
+2×3 Matrix{Float64}:
  2.36365e-314  2.28473e-314    5.0e-324
  2.26704e-314  2.26711e-314  NaN
 
@@ -2593,7 +3048,7 @@ an alias for `UndefInitializer()`.
 # Examples
 ```julia-repl
 julia> Array{Float64, 1}(UndefInitializer(), 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  2.2752528595e-314
  2.202942107e-314
  2.275252907e-314
@@ -2631,7 +3086,13 @@ Ptr{T}()
 """
     +(x, y...)
 
-Addition operator. `x+y+z+...` calls this function with all arguments, i.e. `+(x, y, z, ...)`.
+Addition operator.
+
+Infix `x+y+z+...` calls this function with all arguments, i.e. `+(x, y, z, ...)`,
+which by default then calls `(x+y) + z + ...` starting from the left.
+
+Note that overflow is possible for most integer types, including the
+default `Int`, when adding large numbers.
 
 # Examples
 ```jldoctest
@@ -2640,6 +3101,14 @@ julia> 1 + 20 + 4
 
 julia> +(1, 20, 4)
 25
+
+julia> [1,2] + [3,4]
+2-element Vector{Int64}:
+ 4
+ 6
+
+julia> typemax(Int) + 1 < 0
+true
 ```
 """
 (+)(x, y...)
@@ -2663,6 +3132,12 @@ julia> -[1 2; 3 4]
 2×2 Matrix{Int64}:
  -1  -2
  -3  -4
+
+julia> -(true)  # promotes to Int
+-1
+
+julia> -(0x003)
+0xfffd
 ```
 """
 -(x)
@@ -2686,7 +3161,18 @@ julia> -(2, 4.5)
 """
     *(x, y...)
 
-Multiplication operator. `x*y*z*...` calls this function with all arguments, i.e. `*(x, y, z, ...)`.
+Multiplication operator.
+
+Infix `x*y*z*...` calls this function with all arguments, i.e. `*(x, y, z, ...)`,
+which by default then calls `(x*y) * z * ...` starting from the left.
+
+Juxtaposition such as `2pi` also calls `*(2, pi)`. Note that this operation
+has higher precedence than a literal `*`. Note also that juxtaposition "0x..."
+(integer zero times a variable whose name starts with `x`) is forbidden as
+it clashes with unsigned integer literals: `0x01 isa UInt8`.
+
+Note that overflow is possible for most integer types, including the default `Int`,
+when multiplying large numbers.
 
 # Examples
 ```jldoctest
@@ -2695,6 +3181,17 @@ julia> 2 * 7 * 8
 
 julia> *(2, 7, 8)
 112
+
+julia> [2 0; 0 3] * [1, 10]  # matrix * vector
+2-element Vector{Int64}:
+  2
+ 30
+
+julia> 1/2pi, 1/2*pi  # juxtaposition has higher precedence
+(0.15915494309189535, 1.5707963267948966)
+
+julia> x = [1, 2]; x'x  # adjoint vector * vector
+5
 ```
 """
 (*)(x, y...)
@@ -2702,8 +3199,10 @@ julia> *(2, 7, 8)
 """
     /(x, y)
 
-Right division operator: multiplication of `x` by the inverse of `y` on the right. Gives
-floating-point results for integer arguments.
+Right division operator: multiplication of `x` by the inverse of `y` on the right.
+
+Gives floating-point results for integer arguments.
+See [`÷`](@ref div) for integer division, or [`//`](@ref) for [`Rational`](@ref) results.
 
 # Examples
 ```jldoctest
@@ -2782,14 +3281,27 @@ Any
 """
     Union{}
 
-`Union{}`, the empty [`Union`](@ref) of types, is the type that has no values. That is, it has the defining
-property `isa(x, Union{}) == false` for any `x`. `Base.Bottom` is defined as its alias and the type of `Union{}`
-is `Core.TypeofBottom`.
+`Union{}`, the empty [`Union`](@ref) of types, is the *bottom* type of the type system. That is, for each
+`T::Type`, `Union{} <: T`. Also see the subtyping operator's documentation: [`<:`](@ref).
+
+As such, `Union{}` is also an *empty*/*uninhabited* type, meaning that it has no values. That is, for each `x`,
+`isa(x, Union{}) == false`.
+
+`Base.Bottom` is defined as its alias and the type of `Union{}` is `Core.TypeofBottom`.
 
 # Examples
 ```jldoctest
 julia> isa(nothing, Union{})
 false
+
+julia> Union{} <: Int
+true
+
+julia> typeof(Union{}) === Core.TypeofBottom
+true
+
+julia> isa(Union{}, Union)
+false
 ```
 """
 kw"Union{}", Base.Bottom
@@ -2797,23 +3309,33 @@ kw"Union{}", Base.Bottom
 """
     Union{Types...}
 
-A type union is an abstract type which includes all instances of any of its argument types. The empty
-union [`Union{}`](@ref) is the bottom type of Julia.
+A `Union` type is an abstract type which includes all instances of any of its argument types.
+This means that `T <: Union{T,S}` and `S <: Union{T,S}`.
+
+Like other abstract types, it cannot be instantiated, even if all of its arguments are non
+abstract.
 
 # Examples
 ```jldoctest
 julia> IntOrString = Union{Int,AbstractString}
 Union{Int64, AbstractString}
 
-julia> 1 isa IntOrString
+julia> 1 isa IntOrString # instance of Int is included in the union
 true
 
-julia> "Hello!" isa IntOrString
+julia> "Hello!" isa IntOrString # String is also included
 true
 
-julia> 1.0 isa IntOrString
+julia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString
 false
 ```
+
+# Extended Help
+
+Unlike most other parametric types, unions are covariant in their parameters. For example,
+`Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.
+
+The empty union [`Union{}`](@ref) is the bottom type of Julia.
 """
 Union
 
@@ -2822,7 +3344,7 @@ Union
     UnionAll
 
 A union of types over all values of a type parameter. `UnionAll` is used to describe parametric types
-where the values of some parameters are not known.
+where the values of some parameters are not known. See the manual section on [UnionAll Types](@ref).
 
 # Examples
 ```jldoctest
@@ -3090,14 +3612,30 @@ Base.modifyproperty!
     replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
 
 Perform a compare-and-swap operation on `x.f` from `expected` to `desired`, per
-egal. The syntax `@atomic_replace! x.f expected => desired` can be used instead
+egal. The syntax `@atomicreplace x.f expected => desired` can be used instead
 of the function call form.
 
 See also [`replacefield!`](@ref Core.replacefield!)
-and [`setproperty!`](@ref Base.setproperty!).
+[`setproperty!`](@ref Base.setproperty!),
+[`setpropertyonce!`](@ref Base.setpropertyonce!).
 """
 Base.replaceproperty!
 
+"""
+    setpropertyonce!(x, f::Symbol, value, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+
+Perform a compare-and-swap operation on `x.f` to set it to `value` if previously unset.
+The syntax `@atomiconce x.f = value` can be used instead of the function call form.
+
+See also [`setfieldonce!`](@ref Core.replacefield!),
+[`setproperty!`](@ref Base.setproperty!),
+[`replaceproperty!`](@ref Base.replaceproperty!).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Base.setpropertyonce!
+
 
 """
     StridedArray{T, N}
@@ -3241,7 +3779,7 @@ kw"atomic"
 
 This function prevents dead-code elimination (DCE) of itself and any arguments
 passed to it, but is otherwise the lightest barrier possible. In particular,
-it is not a GC safepoint, does model an observable heap effect, does not expand
+it is not a GC safepoint, does not model an observable heap effect, does not expand
 to any code itself and may be re-ordered with respect to other side effects
 (though the total number of executions may not change).
 
@@ -3259,6 +3797,17 @@ unused and delete the entire benchmark code).
     `donotdelete(1+1)`, no add instruction needs to be executed at runtime and
     the code is semantically equivalent to `donotdelete(2).`
 
+!!! note
+    This intrinsic does not affect the semantics of code that is dead because it is
+    *unreachable*. For example, the body of the function `f(x) = false && donotdelete(x)`
+    may be deleted in its entirety. The semantics of this intrinsic only guarantee that
+    *if* the intrinsic is semantically executed, then there is some program state at
+    which the value of the arguments of this intrinsic were available (in a register,
+    in memory, etc.).
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+
 # Examples
 
 ```julia
@@ -3277,11 +3826,9 @@ Base.donotdelete
 """
     Base.compilerbarrier(setting::Symbol, val)
 
-This function puts a barrier at a specified compilation phase.
-It is supposed to only influence the compilation behavior according to `setting`,
-and its runtime semantics is just to return the second argument `val` (except that
-this function will perform additional checks on `setting` in a case when `setting`
-isn't known precisely at compile-time.)
+This function acts a compiler barrier at a specified compilation phase.
+The dynamic semantics of this intrinsic are to return the `val` argument, unmodified.
+However, depending on the `setting`, the compiler is prevented from assuming this behavior.
 
 Currently either of the following `setting`s is allowed:
 - Barriers on abstract interpretation:
@@ -3294,9 +3841,9 @@ Currently either of the following `setting`s is allowed:
 - Any barriers on optimization aren't implemented yet
 
 !!! note
-    This function is supposed to be used _with `setting` known precisely at compile-time_.
-    Note that in a case when the `setting` isn't known precisely at compile-time, the compiler
-    currently will put the most strongest barrier(s) rather than emitting a compile-time warning.
+    This function is expected to be used with `setting` known precisely at compile-time.
+    If the `setting` is not known precisely at compile-time, the compiler will emit the
+    strongest barrier(s). No compile-time warning is issued.
 
 # Examples
 
@@ -3351,4 +3898,20 @@ The current differences are:
 """
 Core.finalizer
 
+"""
+    ConcurrencyViolationError(msg) <: Exception
+
+An error thrown when a detectable violation of concurrent semantics has occurred.
+
+A non-exhaustive list of examples of when this is used include:
+
+ * Throwing when a deadlock has been detected (e.g. `wait(current_task())`)
+ * Known-unsafe behavior is attempted (e.g. `yield(current_task)`)
+ * A known non-threadsafe datastructure is attempted to be modified from multiple concurrent tasks
+ * A lock is being unlocked that wasn't locked by this task
+"""
+ConcurrencyViolationError
+
+Base.include(BaseDocs, "intrinsicsdocs.jl")
+
 end
diff --git a/base/docs/core.jl b/base/docs/core.jl
index 718e49917632f..93265416099f9 100644
--- a/base/docs/core.jl
+++ b/base/docs/core.jl
@@ -2,15 +2,21 @@
 
 module CoreDocs
 
-import ..esc, ..push!, ..getindex, ..unsafe_load, ..Csize_t, ..@nospecialize
+import Core: @nospecialize, SimpleVector
 
-@nospecialize # don't specialize on any arguments of the methods declared herein
+struct DocLinkedList
+    doc::SimpleVector
+    next::DocLinkedList
+    DocLinkedList() = new()
+    DocLinkedList(doc::SimpleVector, next::DocLinkedList) = new(doc, next)
+end
 
+global DOCS = DocLinkedList()
 function doc!(source::LineNumberNode, mod::Module, str, ex)
-    push!(DOCS, Core.svec(mod, ex, str, source.file, source.line))
+    global DOCS
+    DOCS = DocLinkedList(Core.svec(mod, ex, str, source.file, source.line), DOCS)
     nothing
 end
-const DOCS = Array{Core.SimpleVector,1}()
 
 isexpr(x, h::Symbol) = isa(x, Expr) && x.head === h
 
@@ -25,9 +31,9 @@ function docm(source::LineNumberNode, mod::Module, str, x)
     else
         out = Expr(:block, x, out)
     end
-    return esc(out)
+    return Expr(:escape, out)
 end
 docm(source::LineNumberNode, mod::Module, x) =
-    isexpr(x, :->) ? docm(source, mod, x.args[1], x.args[2].args[2]) : error("invalid '@doc'.")
+    (isa(x, Expr) && x.head === :->) ? docm(source, mod, x.args[1], x.args[2].args[2]) : error("invalid '@doc'.")
 
 end
diff --git a/base/docs/intrinsicsdocs.jl b/base/docs/intrinsicsdocs.jl
new file mode 100644
index 0000000000000..c9538ea74ab26
--- /dev/null
+++ b/base/docs/intrinsicsdocs.jl
@@ -0,0 +1,180 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    Core.IR
+
+The `Core.IR` module exports the IR object model.
+"""
+Core.IR
+
+"""
+    Core.IntrinsicFunction <: Core.Builtin <: Function
+
+The `Core.IntrinsicFunction` function define some basic primitives for what defines the
+abilities and behaviors of a Julia program
+"""
+Core.IntrinsicFunction
+
+"""
+    Core.Intrinsics
+
+The `Core.Intrinsics` module holds the `Core.IntrinsicFunction` objects.
+"""
+Core.Intrinsics
+
+"""
+    Core.memoryrefnew(::GenericMemory)
+    Core.memoryrefnew(::GenericMemoryRef, index::Int, [boundscheck::Bool])
+
+Return a `GenericMemoryRef` for a `GenericMemory`. See [`memoryref`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefnew
+
+"""
+    Core..memoryrefoffset(::GenericMemoryRef)
+
+Return the offset index that was used to construct the `MemoryRef`. See [`memoryref`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefoffset
+
+"""
+    Core.memoryrefget(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return the value stored at the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[]`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefget
+
+"""
+    Core.memoryrefset!(::GenericMemoryRef, value, ordering::Symbol, boundscheck::Bool)
+
+Store the value to the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[] = value`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryrefset!
+
+"""
+    Core.memoryref_isassigned(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return whether there is a value stored at the `MemoryRef`, returning false if the `Memory`
+is empty. See [`isassigned(::Base.RefValue)`](@ref), [`Core.memoryrefget`](@ref).
+The memory ordering specified must be compatible with the `isatomic` parameter.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+"""
+Core.memoryref_isassigned
+
+"""
+    Core.memoryrefswap!(::GenericMemoryRef, value, ordering::Symbol, boundscheck::Bool)
+
+Atomically perform the operations to simultaneously get and set a `MemoryRef` value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`swapproperty!`](@ref Base.swapproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefswap!
+
+"""
+    Core.memoryrefmodify!(::GenericMemoryRef, op, value, ordering::Symbol, boundscheck::Bool) -> Pair
+
+Atomically perform the operations to get and set a `MemoryRef` value after applying
+the function `op`.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`modifyproperty!`](@ref Base.modifyproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefmodify!
+
+"""
+    Core.memoryrefreplace!(::GenericMemoryRef, expected, desired,
+                           success_order::Symbol, fail_order::Symbol=success_order, boundscheck::Bool) -> (; old, success::Bool)
+
+Atomically perform the operations to get and conditionally set a `MemoryRef` value.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`replaceproperty!`](@ref Base.replaceproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefreplace!
+
+"""
+    Core.memoryrefsetonce!(::GenericMemoryRef, value,
+                           success_order::Symbol, fail_order::Symbol=success_order, boundscheck::Bool) -> success::Bool
+
+Atomically perform the operations to set a `MemoryRef` to
+a given value, only if it was previously not set.
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+See also [`setpropertyonce!`](@ref Base.replaceproperty!) and [`Core.memoryrefset!`](@ref).
+"""
+Core.memoryrefsetonce!
+
+"""
+    Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_load`](@ref Base.unsafe_load).
+"""
+Core.Intrinsics.atomic_pointerref
+
+"""
+    Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_store!`](@ref Base.unsafe_store!).
+"""
+Core.Intrinsics.atomic_pointerset
+
+"""
+    Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_swap!`](@ref Base.unsafe_swap!).
+"""
+Core.Intrinsics.atomic_pointerswap
+
+"""
+    Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_modify!`](@ref Base.unsafe_modify!).
+"""
+Core.Intrinsics.atomic_pointermodify
+
+"""
+    Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+
+See [`unsafe_replace!`](@ref Base.unsafe_replace!).
+"""
+Core.Intrinsics.atomic_pointerreplace
diff --git a/base/docs/utils.jl b/base/docs/utils.jl
index 928dfde01ccf0..1ed576c7362ff 100644
--- a/base/docs/utils.jl
+++ b/base/docs/utils.jl
@@ -23,7 +23,7 @@ You can also use a stream for large amounts of data:
     `HTML` is currently exported to maintain
     backwards compatibility, but this export
     is deprecated. It is recommended to use
-    this type as `Docs.HTML` or to explicitly
+    this type as [`Docs.HTML`](@ref) or to explicitly
     import it from `Docs`.
 """
 mutable struct HTML{T}
@@ -81,7 +81,7 @@ You can also use a stream for large amounts of data:
     `Text` is currently exported to maintain
     backwards compatibility, but this export
     is deprecated. It is recommended to use
-    this type as `Docs.Text` or to explicitly
+    this type as [`Docs.Text`](@ref) or to explicitly
     import it from `Docs`.
 """
 mutable struct Text{T}
diff --git a/base/env.jl b/base/env.jl
index a4a55d9dad013..6248f1933d1ec 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -3,12 +3,29 @@
 if Sys.iswindows()
     const ERROR_ENVVAR_NOT_FOUND = UInt32(203)
 
+    const env_dict = Lockable(Dict{String, Vector{Cwchar_t}}())
+
+    function memoized_env_lookup(str::AbstractString)
+        # Windows environment variables have a different format from Linux / MacOS, and previously
+        # incurred allocations because we had to convert a String to a Vector{Cwchar_t} each time
+        # an environment variable was looked up. This function memoizes that lookup process, storing
+        # the String => Vector{Cwchar_t} pairs in env_dict
+        @lock env_dict begin
+            var = get(env_dict[], str, nothing)
+            if isnothing(var)
+                var = cwstring(str)
+                env_dict[][str] = var
+            end
+            return var
+        end
+    end
+
     _getenvlen(var::Vector{UInt16}) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,C_NULL,0)
     _hasenv(s::Vector{UInt16}) = _getenvlen(s) != 0 || Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND
-    _hasenv(s::AbstractString) = _hasenv(cwstring(s))
+    _hasenv(s::AbstractString) = _hasenv(memoized_env_lookup(s))
 
     function access_env(onError::Function, str::AbstractString)
-        var = cwstring(str)
+        var = memoized_env_lookup(str)
         len = _getenvlen(var)
         if len == 0
             return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
@@ -21,7 +38,7 @@ if Sys.iswindows()
     end
 
     function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         val = cwstring(sval)
         if overwrite || !_hasenv(var)
             ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,val)
@@ -30,7 +47,7 @@ if Sys.iswindows()
     end
 
     function _unsetenv(svar::AbstractString)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
         windowserror(:setenv, ret == 0 && Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND)
     end
@@ -111,25 +128,34 @@ const get_bool_env_falsy = (
     "0")
 
 """
-    Base.get_bool_env(name::String, default::Bool)::Union{Bool,Nothing}
+    Base.get_bool_env(name::String, default::Bool; throw=false)::Union{Bool,Nothing}
+    Base.get_bool_env(f_default::Callable, name::String; throw=false)::Union{Bool,Nothing}
 
-Evaluate whether the value of environnment variable `name` is a truthy or falsy string,
-and return `nothing` if it is not recognized as either. If the variable is not set, or is set to "",
-return `default`.
+Evaluate whether the value of environment variable `name` is a truthy or falsy string,
+and return `nothing` (or throw if `throw=true`) if it is not recognized as either. If
+the variable is not set, or is set to "", return `default` or the result of executing `f_default()`.
 
 Recognized values are the following, and their Capitalized and UPPERCASE forms:
     truthy: "t", "true", "y", "yes", "1"
     falsy:  "f", "false", "n", "no", "0"
 """
-function get_bool_env(name::String, default::Bool)
-    haskey(ENV, name) || return default
-    val = ENV[name]
-    if isempty(val)
-        return default
-    elseif val in get_bool_env_truthy
+get_bool_env(name::String, default::Bool; kwargs...) = get_bool_env(Returns(default), name; kwargs...)
+function get_bool_env(f_default::Callable, name::String; kwargs...)
+    if haskey(ENV, name)
+        val = ENV[name]
+        if !isempty(val)
+            return parse_bool_env(name, val; kwargs...)
+        end
+    end
+    return f_default()
+end
+function parse_bool_env(name::String, val::String = ENV[name]; throw::Bool=false)
+    if val in get_bool_env_truthy
         return true
     elseif val in get_bool_env_falsy
         return false
+    elseif throw
+        Base.throw(ArgumentError("Value for environment variable `$name` could not be parsed as Boolean: $(repr(val))"))
     else
         return nothing
     end
@@ -138,6 +164,10 @@ end
 getindex(::EnvDict, k::AbstractString) = access_env(k->throw(KeyError(k)), k)
 get(::EnvDict, k::AbstractString, def) = access_env(Returns(def), k)
 get(f::Callable, ::EnvDict, k::AbstractString) = access_env(k->f(), k)
+function get!(default::Callable, ::EnvDict, k::AbstractString)
+    haskey(ENV, k) && return ENV[k]
+    ENV[k] = default()
+end
 in(k::AbstractString, ::KeySet{String, EnvDict}) = _hasenv(k)
 pop!(::EnvDict, k::AbstractString) = (v = ENV[k]; _unsetenv(k); v)
 pop!(::EnvDict, k::AbstractString, def) = haskey(ENV,k) ? pop!(ENV,k) : def
diff --git a/base/error.jl b/base/error.jl
index 4e9be0e172d61..6e0cdeea09fd3 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -27,17 +27,27 @@ throw
 
 ## native julia error handling ##
 
+# This is `Experimental.@max_methods 2 function error end`, which is not available at this point in bootstrap.
+# NOTE It is important to always be able to infer the return type of `error` as `Union{}`,
+# but there's a hitch when a package globally sets `@max_methods 1` and it causes inference
+# for `error(::Any)` to fail (JuliaLang/julia#54029).
+# This definition site `@max_methods 2` setting overrides any global `@max_methods 1` settings
+# on package side, guaranteeing that return type inference on `error` is successful always.
+function error end
+typeof(error).name.max_methods = UInt8(2)
+
 """
     error(message::AbstractString)
 
 Raise an `ErrorException` with the given message.
 """
 error(s::AbstractString) = throw(ErrorException(s))
+error() = throw(ErrorException(""))
 
 """
     error(msg...)
 
-Raise an `ErrorException` with the given message.
+Raise an `ErrorException` with a message constructed by `string(msg...)`.
 """
 function error(s::Vararg{Any,N}) where {N}
     @noinline
@@ -62,7 +72,7 @@ rethrow() = ccall(:jl_rethrow, Bottom, ())
 rethrow(@nospecialize(e)) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
 
 struct InterpreterIP
-    code::Union{CodeInfo,Core.MethodInstance,Nothing}
+    code::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
     stmt::Csize_t
     mod::Union{Module,Nothing}
 end
@@ -87,7 +97,7 @@ function _reformat_bt(bt::Array{Ptr{Cvoid},1}, bt2::Array{Any,1})
         tag       = (entry_metadata >> 6) & 0xf
         header    =  entry_metadata >> 10
         if tag == 1 # JL_BT_INTERP_FRAME_TAG
-            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Nothing}
+            code = bt2[j]::Union{CodeInfo,Core.MethodInstance,Core.CodeInstance,Nothing}
             mod = njlvalues == 2 ? bt2[j+1]::Union{Module,Nothing} : nothing
             push!(ret, InterpreterIP(code, header, mod))
         else
@@ -162,7 +172,7 @@ end
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
     @noinline
-    throw(MethodError(Core.kwcall, (kw, args...)))
+    throw(MethodError(Core.kwcall, (kw, args...), tls_world_age()))
 end
 
 ## system error handling ##
@@ -197,15 +207,17 @@ windowserror(p, code::UInt32=Libc.GetLastError(); extrainfo=nothing) = throw(Mai
 """
     @assert cond [text]
 
-Throw an [`AssertionError`](@ref) if `cond` is `false`. Preferred syntax for writing assertions.
-Message `text` is optionally displayed upon assertion failure.
+Throw an [`AssertionError`](@ref) if `cond` is `false`. This is the preferred syntax for
+writing assertions, which are conditions that are assumed to be true, but that the user
+might decide to check anyways, as an aid to debugging if they fail.
+The optional message `text` is displayed upon assertion failure.
 
 !!! warning
-    An assert might be disabled at various optimization levels.
+    An assert might be disabled at some optimization levels.
     Assert should therefore only be used as a debugging tool
-    and not used for authentication verification (e.g., verifying passwords),
-    nor should side effects needed for the function to work correctly
-    be used inside of asserts.
+    and not used for authentication verification (e.g., verifying passwords or checking array bounds).
+    The code must not rely on the side effects of running `cond` for the correct behavior
+    of a function.
 
 # Examples
 ```jldoctest
@@ -221,20 +233,22 @@ macro assert(ex, msgs...)
         msg = msg # pass-through
     elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
         # message is an expression needing evaluating
-        msg = :(Main.Base.string($(esc(msg))))
+        # N.B. To reduce the risk of invalidation caused by the complex callstack involved
+        # with `string`, use `inferencebarrier` here to hide this `string` from the compiler.
+        msg = :($Main.Base.inferencebarrier($Main.Base.string)($(msg)))
     elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg)
         msg = Main.Base.string(msg)
     else
         # string() might not be defined during bootstrap
-        msg = quote
-            msg = $(Expr(:quote,msg))
-            isdefined(Main, :Base) ? Main.Base.string(msg) :
-                (Core.println(msg); "Error during bootstrap. See stdout.")
-        end
+        msg = :($Main.Base.inferencebarrier($_assert_tostring)($(Expr(:quote,msg))))
     end
-    return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg)))
+    return esc(:($(ex) ? $(nothing) : $Base.@outline($throw($AssertionError($msg)))))
 end
 
+# this may be overridden in contexts where `string(::Expr)` doesn't work
+_assert_tostring(msg) = isdefined(Main, :Base) ? Main.Base.string(msg) :
+    (Core.println(msg); "Error during bootstrap. See stdout.")
+
 struct ExponentialBackOff
     n::Int
     first_delay::Float64
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 81f4c9c2ee9e0..d4b9b3666fbb7 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -43,6 +43,15 @@ function showerror(io::IO, ex::Meta.ParseError)
     end
 end
 
+function showerror(io::IO, ex::Core.TypeNameError)
+    print(io, "TypeNameError: ")
+    if isa(ex.a, Union)
+        print(io, "typename does not apply to unions whose components have different typenames")
+    else
+        print(io, "typename does not apply to this type")
+    end
+end
+
 function showerror(io::IO, ex::BoundsError)
     print(io, "BoundsError")
     if isdefined(ex, :a)
@@ -70,6 +79,8 @@ function showerror(io::IO, ex::TypeError)
     print(io, "TypeError: ")
     if ex.expected === Bool
         print(io, "non-boolean (", typeof(ex.got), ") used in boolean context")
+    elseif ex.func === :var"dict key"
+        print(io, "$(limitrepr(ex.got)) is not a valid key for type $(ex.expected)")
     else
         if isvarargtype(ex.got)
             targs = (ex.got,)
@@ -80,7 +91,7 @@ function showerror(io::IO, ex::TypeError)
         end
         if ex.context == ""
             ctx = "in $(ex.func)"
-        elseif ex.func === Symbol("keyword argument")
+        elseif ex.func === :var"keyword argument"
             ctx = "in keyword argument $(ex.context)"
         else
             ctx = "in $(ex.func), in $(ex.context)"
@@ -147,13 +158,7 @@ showerror(io::IO, ::DivideError) = print(io, "DivideError: integer division erro
 showerror(io::IO, ::StackOverflowError) = print(io, "StackOverflowError:")
 showerror(io::IO, ::UndefRefError) = print(io, "UndefRefError: access to undefined reference")
 showerror(io::IO, ::EOFError) = print(io, "EOFError: read end of file")
-function showerror(io::IO, ex::ErrorException)
-    print(io, ex.msg)
-    if ex.msg == "type String has no field data"
-        println(io)
-        print(io, "Use `codeunits(str)` instead.")
-    end
-end
+showerror(io::IO, ex::ErrorException) = print(io, ex.msg)
 showerror(io::IO, ex::KeyError) = (print(io, "KeyError: key ");
                                    show(io, ex.key);
                                    print(io, " not found"))
@@ -168,13 +173,31 @@ showerror(io::IO, ex::UndefKeywordError) =
 
 function showerror(io::IO, ex::UndefVarError)
     print(io, "UndefVarError: `$(ex.var)` not defined")
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            print(io, " in `$scope`")
+        elseif scope === :static_parameter
+            print(io, " in static parameter matching")
+        else
+            print(io, " in $scope scope")
+        end
+    end
     Experimental.show_error_hints(io, ex)
 end
 
 function showerror(io::IO, ex::InexactError)
     print(io, "InexactError: ", ex.func, '(')
-    nameof(ex.T) === ex.func || print(io, ex.T, ", ")
-    print(io, ex.val, ')')
+    T = first(ex.args)
+    nameof(T) === ex.func || print(io, T, ", ")
+    # `join` calls `string` on its arguments, which shadows the size of e.g. Inf16
+    # as `string(Inf16) == "Inf"` instead of "Inf16". Thus we cannot use `join` here.
+    for arg in ex.args[2:end-1]
+        show(io, arg)
+        print(io, ", ")
+    end
+    show(io, ex.args[end])
+    print(io, ")")
     Experimental.show_error_hints(io, ex)
 end
 
@@ -182,7 +205,7 @@ function showerror(io::IO, ex::CanonicalIndexError)
     print(io, "CanonicalIndexError: ", ex.func, " not defined for ", ex.type)
 end
 
-typesof(@nospecialize args...) = Tuple{Any[ Core.Typeof(args[i]) for i in 1:length(args) ]...}
+typesof(@nospecialize args...) = Tuple{Any[Core.Typeof(arg) for arg in args]...}
 
 function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b::DataType), color::Symbol)
     if a.name === b.name
@@ -230,35 +253,41 @@ function show_convert_error(io::IO, ex::MethodError, arg_types_param)
 end
 
 function showerror(io::IO, ex::MethodError)
+    @nospecialize io
     # ex.args is a tuple type if it was thrown from `invoke` and is
     # a tuple of the arguments otherwise.
-    is_arg_types = isa(ex.args, DataType)
-    arg_types = (is_arg_types ? ex.args : typesof(ex.args...))::DataType
+    is_arg_types = !isa(ex.args, Tuple)
+    arg_types = is_arg_types ? ex.args : typesof(ex.args...)
+    arg_types_param::SimpleVector = (unwrap_unionall(arg_types)::DataType).parameters
+    san_arg_types_param = Any[rewrap_unionall(a, arg_types) for a in arg_types_param]
     f = ex.f
     meth = methods_including_ambiguous(f, arg_types)
     if isa(meth, MethodList) && length(meth) > 1
         return showerror_ambiguous(io, meth, f, arg_types)
     end
-    arg_types_param::SimpleVector = arg_types.parameters
-    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
     f_is_function = false
-    kwargs = ()
-    if f === Core.kwcall && !is_arg_types
-        f = (ex.args::Tuple)[2]
-        ft = typeof(f)
+    kwargs = []
+    if f === Core.kwcall && length(arg_types_param) >= 2 && arg_types_param[1] <: NamedTuple && !is_arg_types
+        # if this is a kwcall, reformat it as a call with kwargs
+        # TODO: handle !is_arg_types here (aka invoke with kwargs), which needs a value for `f`
+        local kwt
+        let args = ex.args::Tuple
+            f = args[2]
+            ft = typeof(f)
+            kwt = typeof(args[1])
+            ex = MethodError(f, args[3:end], ex.world)
+        end
         arg_types_param = arg_types_param[3:end]
-        kwargs = pairs(ex.args[1])
-        ex = MethodError(f, ex.args[3:end::Int], ex.world)
+        san_arg_types_param = san_arg_types_param[3:end]
+        keys = kwt.parameters[1]::Tuple
+        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in eachindex(keys)]
+        arg_types = rewrap_unionall(Tuple{arg_types_param...}, arg_types)
     end
-    name = ft.name.mt.name
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
-    elseif f === mapreduce_empty || f === reduce_empty
-        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
-        show_candidates = false
     elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
@@ -267,31 +296,28 @@ function showerror(io::IO, ex::MethodError)
         if ft <: Function && isempty(ft.parameters) && _isself(ft)
             f_is_function = true
         end
-        print(io, "no method matching ")
-        show_signature_function(io, isa(f, Type) ? Type{f} : typeof(f))
-        print(io, "(")
-        for (i, typ) in enumerate(arg_types_param)
-            print(io, "::", typ)
-            i == length(arg_types_param) || print(io, ", ")
-        end
-        if !isempty(kwargs)
-            print(io, "; ")
-            for (i, (k, v)) in enumerate(kwargs)
-                print(io, k, "::", typeof(v))
-                i == length(kwargs)::Int || print(io, ", ")
-            end
+        if is_arg_types
+            print(io, "no method matching invoke ")
+        else
+            print(io, "no method matching ")
         end
-        print(io, ")")
+        buf = IOBuffer()
+        iob = IOContext(buf, io)     # for type abbreviation as in #49795; some, like `convert(T, x)`, should not abbreviate
+        show_signature_function(iob, Core.Typeof(f))
+        show_tuple_as_call(iob, :function, arg_types; hasfirst=false, kwargs = isempty(kwargs) ? nothing : kwargs)
+        str = String(take!(buf))
+        str = type_limited_string_from_context(io, str)
+        print(io, str)
     end
     # catch the two common cases of element-wise addition and subtraction
-    if (f === Base.:+ || f === Base.:-) && length(arg_types_param) == 2
+    if (f === Base.:+ || f === Base.:-) && length(san_arg_types_param) == 2
         # we need one array of numbers and one number, in any order
-        if any(x -> x <: AbstractArray{<:Number}, arg_types_param) &&
-            any(x -> x <: Number, arg_types_param)
+        if any(x -> x <: AbstractArray{<:Number}, san_arg_types_param) &&
+            any(x -> x <: Number, san_arg_types_param)
 
             nounf = f === Base.:+ ? "addition" : "subtraction"
             varnames = ("scalar", "array")
-            first, second = arg_types_param[1] <: Number ? varnames : reverse(varnames)
+            first, second = san_arg_types_param[1] <: Number ? varnames : reverse(varnames)
             fstring = f === Base.:+ ? "+" : "-"  # avoid depending on show_default for functions (invalidation)
             print(io, "\nFor element-wise $nounf, use broadcasting with dot syntax: $first .$fstring $second")
         end
@@ -300,17 +326,31 @@ function showerror(io::IO, ex::MethodError)
         print(io, "\nUse square brackets [] for indexing an Array.")
     end
     # Check for local functions that shadow methods in Base
-    if f_is_function && isdefined(Base, name)
-        basef = getfield(Base, name)
-        if basef !== ex.f && hasmethod(basef, arg_types)
-            print(io, "\nYou may have intended to import ")
-            show_unquoted(io, Expr(:., :Base, QuoteNode(name)))
+    let name = ft.name.mt.name
+        if f_is_function && isdefined(Base, name)
+            basef = getfield(Base, name)
+            if basef !== f && hasmethod(basef, arg_types)
+                print(io, "\nYou may have intended to import ")
+                show_unquoted(io, Expr(:., :Base, QuoteNode(name)))
+            end
         end
     end
-    if (ex.world != typemax(UInt) && hasmethod(ex.f, arg_types) &&
-        !hasmethod(ex.f, arg_types, world = ex.world))
+    if ex.world == typemax(UInt) || hasmethod(f, arg_types, world=ex.world)
+        if !isempty(kwargs)
+            print(io, "\nThis method does not support all of the given keyword arguments (and may not support any).")
+        end
+        if ex.world == typemax(UInt) || isempty(kwargs)
+            print(io, "\nThis error has been manually thrown, explicitly, so the method may exist but be intentionally marked as unimplemented.")
+        end
+    elseif hasmethod(f, arg_types) && !hasmethod(f, arg_types, world=ex.world)
         curworld = get_world_counter()
         print(io, "\nThe applicable method may be too new: running in world age $(ex.world), while current world is $(curworld).")
+    elseif f isa Function
+        print(io, "\nThe function `$f` exists, but no method is defined for this combination of argument types.")
+    elseif f isa Type
+        print(io, "\nThe type `$f` exists, but no method is defined for this combination of argument types when trying to construct it.")
+    else
+        print(io, "\nThe object of type `$(typeof(f))` exists, but no method is defined for this combination of argument types when trying to treat it as a callable object.")
     end
     if !is_arg_types
         # Check for row vectors used where a column vector is intended.
@@ -327,27 +367,30 @@ function showerror(io::IO, ex::MethodError)
                       "\nYou can convert to a column vector with the vec() function.")
         end
     end
-    Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    show_candidates && try
+    Experimental.show_error_hints(io, ex, san_arg_types_param, kwargs)
+    try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
     end
+    nothing
+end
+
+function showerror(io::IO, exc::FieldError)
+    @nospecialize
+    print(io, "FieldError: type $(exc.type |> nameof) has no field `$(exc.field)`")
+    Base.Experimental.show_error_hints(io, exc)
 end
 
 striptype(::Type{T}) where {T} = T
 striptype(::Any) = nothing
 
-function showerror_ambiguous(io::IO, meths, f, args)
+function showerror_ambiguous(io::IO, meths, f, args::Type)
+    @nospecialize f args
     print(io, "MethodError: ")
     show_signature_function(io, isa(f, Type) ? Type{f} : typeof(f))
-    print(io, "(")
-    p = args.parameters
-    for (i,a) in enumerate(p)
-        print(io, "::", a)
-        i < length(p) && print(io, ", ")
-    end
-    println(io, ") is ambiguous.\n\nCandidates:")
+    show_tuple_as_call(io, :var"", args, hasfirst=false)
+    println(io, " is ambiguous.\n\nCandidates:")
     sigfix = Any
     for m in meths
         print(io, "  ")
@@ -359,7 +402,7 @@ function showerror_ambiguous(io::IO, meths, f, args)
         let sigfix=sigfix
             if all(m->morespecific(sigfix, m.sig), meths)
                 print(io, "\nPossible fix, define\n  ")
-                Base.show_tuple_as_call(io, :function,  sigfix)
+                show_tuple_as_call(io, :function,  sigfix)
             else
                 print(io, "To resolve the ambiguity, try making one of the methods more specific, or ")
                 print(io, "adding a new method more specific than any of the existing applicable methods.")
@@ -372,7 +415,7 @@ end
 
 #Show an error by directly calling jl_printf.
 #Useful in Base submodule __init__ functions where stderr isn't defined yet.
-function showerror_nostdio(err, msg::AbstractString)
+function showerror_nostdio(@nospecialize(err), msg::AbstractString)
     stderr_stream = ccall(:jl_stderr_stream, Ptr{Cvoid}, ())
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, msg)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, ":\n")
@@ -384,15 +427,18 @@ stacktrace_expand_basepaths()::Bool = Base.get_bool_env("JULIA_STACKTRACE_EXPAND
 stacktrace_contract_userdir()::Bool = Base.get_bool_env("JULIA_STACKTRACE_CONTRACT_HOMEDIR", true) === true
 stacktrace_linebreaks()::Bool = Base.get_bool_env("JULIA_STACKTRACE_LINEBREAKS", false) === true
 
-function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=())
-    is_arg_types = isa(ex.args, DataType)
+function show_method_candidates(io::IO, ex::MethodError, kwargs=[])
+    @nospecialize io
+    is_arg_types = !isa(ex.args, Tuple)
     arg_types = is_arg_types ? ex.args : typesof(ex.args...)
-    arg_types_param = Any[arg_types.parameters...]
+    arg_types_param = Any[(unwrap_unionall(arg_types)::DataType).parameters...]
+    arg_types_param = Any[rewrap_unionall(a, arg_types) for a in arg_types_param]
     # Displays the closest candidates of the given function by looping over the
     # functions methods and counting the number of matching arguments.
     f = ex.f
     ft = typeof(f)
-    lines = []
+    lines = String[]
+    line_score = Int[]
     # These functions are special cased to only show if first argument is matched.
     special = f === convert || f === getindex || f === setindex!
     funcs = Tuple{Any,Vector{Any}}[(f, arg_types_param)]
@@ -402,7 +448,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
     # pool MethodErrors for these two functions.
     if f === convert && !isempty(arg_types_param)
         at1 = arg_types_param[1]
-        if isType(at1) && !Core.Compiler.has_free_typevars(at1)
+        if isType(at1) && !has_free_typevars(at1)
             push!(funcs, (at1.parameters[1], arg_types_param[2:end]))
         end
     end
@@ -483,85 +529,82 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 end
             end
 
-            if right_matches > 0 || length(arg_types_param) < 2
-                if length(t_i) < length(sig)
-                    # If the methods args is longer than input then the method
-                    # arguments is printed as not a match
-                    for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
-                        sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
-                        if Base.isvarargtype(sigtype)
-                            sigstr = (unwrapva(sigtype::Core.TypeofVararg), "...")
-                        else
-                            sigstr = (sigtype,)
-                        end
-                        if !((min(length(t_i), length(sig)) == 0) && k==1)
-                            print(iob, ", ")
-                        end
-                        if k == 1 && Base.isvarargtype(sigtype)
-                            # There wasn't actually a mismatch - the method match failed for
-                            # some other reason, e.g. world age. Just print the sigstr.
-                            print(iob, sigstr...)
-                        elseif get(io, :color, false)::Bool
-                            let sigstr=sigstr
-                                Base.with_output_color(Base.error_color(), iob) do iob
-                                    print(iob, "::", sigstr...)
-                                end
-                            end
-                        else
-                            print(iob, "!Matched::", sigstr...)
-                        end
+            if length(t_i) < length(sig)
+                # If the methods args is longer than input then the method
+                # arguments is printed as not a match
+                for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
+                    sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
+                    if Base.isvarargtype(sigtype)
+                        sigstr = (unwrapva(sigtype::Core.TypeofVararg), "...")
+                    else
+                        sigstr = (sigtype,)
                     end
-                end
-                kwords = kwarg_decl(method)
-                if !isempty(kwords)
-                    print(iob, "; ")
-                    join(iob, kwords, ", ")
-                end
-                print(iob, ")")
-                show_method_params(iob0, tv)
-                file, line = updated_methodloc(method)
-                if file === nothing
-                    file = string(method.file)
-                end
-                stacktrace_contract_userdir() && (file = contractuser(file))
-
-                if !isempty(kwargs)::Bool
-                    unexpected = Symbol[]
-                    if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
-                        for (k, v) in kwargs
-                            if !(k::Symbol in kwords)
-                                push!(unexpected, k::Symbol)
+                    if !((min(length(t_i), length(sig)) == 0) && k==1)
+                        print(iob, ", ")
+                    end
+                    if k == 1 && Base.isvarargtype(sigtype)
+                        # There wasn't actually a mismatch - the method match failed for
+                        # some other reason, e.g. world age. Just print the sigstr.
+                        print(iob, sigstr...)
+                    elseif get(io, :color, false)::Bool
+                        let sigstr=sigstr
+                            Base.with_output_color(Base.error_color(), iob) do iob
+                                print(iob, "::", sigstr...)
                             end
                         end
+                    else
+                        print(iob, "!Matched::", sigstr...)
                     end
-                    if !isempty(unexpected)
-                        Base.with_output_color(Base.error_color(), iob) do iob
-                            plur = length(unexpected) > 1 ? "s" : ""
-                            print(iob, " got unsupported keyword argument$plur \"", join(unexpected, "\", \""), "\"")
+                end
+            end
+            kwords = kwarg_decl(method)
+            if !isempty(kwords)
+                print(iob, "; ")
+                join(iob, kwords, ", ")
+            end
+            print(iob, ")")
+            show_method_params(iob0, tv)
+            file, line = updated_methodloc(method)
+            if file === nothing
+                file = string(method.file)
+            end
+            stacktrace_contract_userdir() && (file = contractuser(file))
+
+            if !isempty(kwargs)::Bool
+                unexpected = Symbol[]
+                if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
+                    for (k, v) in kwargs
+                        if !(k::Symbol in kwords)
+                            push!(unexpected, k::Symbol)
                         end
                     end
                 end
-                if ex.world < reinterpret(UInt, method.primary_world)
-                    print(iob, " (method too new to be called from this world context.)")
-                elseif ex.world > reinterpret(UInt, method.deleted_world)
-                    print(iob, " (method deleted before this world age.)")
+                if !isempty(unexpected)
+                    Base.with_output_color(Base.error_color(), iob) do iob
+                        plur = length(unexpected) > 1 ? "s" : ""
+                        print(iob, " got unsupported keyword argument$plur \"", join(unexpected, "\", \""), "\"")
+                    end
                 end
-                println(iob)
-
-                m = parentmodule_before_main(method)
-                modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
-                print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3)
-
-                # TODO: indicate if it's in the wrong world
-                push!(lines, (buf, right_matches))
             end
+            if ex.world < reinterpret(UInt, method.primary_world)
+                print(iob, " (method too new to be called from this world context.)")
+            elseif ex.world > reinterpret(UInt, method.deleted_world)
+                print(iob, " (method deleted before this world age.)")
+            end
+            println(iob)
+
+            m = parentmodule_before_main(method)
+            modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
+            print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3)
+            push!(lines, String(take!(buf)))
+            push!(line_score, -(right_matches * 2 + (length(arg_types_param) < 2 ? 1 : 0)))
         end
     end
 
     if !isempty(lines) # Display up to three closest candidates
         Base.with_output_color(:normal, io) do io
             print(io, "\n\nClosest candidates are:")
-            sort!(lines, by = x -> -x[2])
+            permute!(lines, sortperm(line_score))
             i = 0
             for line in lines
                 println(io)
@@ -570,7 +613,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                     break
                 end
                 i += 1
-                print(io, String(take!(line[1])))
+                print(io, line)
             end
             println(io) # extra newline for spacing to stacktrace
         end
@@ -660,7 +703,7 @@ function show_reduced_backtrace(io::IO, t::Vector)
 
     push!(repeated_cycle, (0,0,0)) # repeated_cycle is never empty
     frame_counter = 1
-    for i in 1:length(displayed_stackframes)
+    for i in eachindex(displayed_stackframes)
         (frame, n) = displayed_stackframes[i]
 
         print_stackframe(io, frame_counter, frame, n, ndigits_max, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS)
@@ -675,7 +718,7 @@ function show_reduced_backtrace(io::IO, t::Vector)
             repetitions = repeated_cycle[1][3]
             popfirst!(repeated_cycle)
             printstyled(io,
-                "--- the last ", cycle_length, " lines are repeated ",
+                "--- the above ", cycle_length, " lines are repeated ",
                   repetitions, " more time", repetitions>1 ? "s" : "", " ---", color = :light_black)
             if i < length(displayed_stackframes)
                 println(io)
@@ -716,9 +759,6 @@ parentmodule_before_main(x) = parentmodule_before_main(parentmodule(x))
 # Print a stack frame where the module color is set manually with `modulecolor`.
 function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulecolor)
     file, line = string(frame.file), frame.line
-    file = fixup_stdlib_path(file)
-    stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
-    stacktrace_contract_userdir() && (file = contractuser(file))
 
     # Used by the REPL to make it possible to open
     # the location of a stackframe/method in the editor.
@@ -737,7 +777,7 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulec
 
     StackTraces.show_spec_linfo(IOContext(io, :backtrace=>true), frame)
     if n > 1
-        printstyled(io, " (repeats $n times)"; color=:light_black)
+        printstyled(io, " (repeats $n times)"; color=Base.warn_color(), bold=true)
     end
     println(io)
 
@@ -758,6 +798,7 @@ function print_module_path_file(io, modul, file, line; modulecolor = :light_blac
     end
 
     # filepath
+    file = fixup_stdlib_path(file)
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
     stacktrace_contract_userdir() && (file = contractuser(file))
     print(io, " ")
@@ -772,12 +813,9 @@ function show_backtrace(io::IO, t::Vector)
     if haskey(io, :last_shown_line_infos)
         empty!(io[:last_shown_line_infos])
     end
-    # this will be set to true if types in the stacktrace are truncated
-    limitflag = Ref(false)
-    io = IOContext(io, :stacktrace_types_limited => limitflag)
 
     # t is a pre-processed backtrace (ref #12856)
-    if t isa Vector{Any}
+    if t isa Vector{Any} && (length(t) == 0 || t[1] isa Tuple{StackFrame,Int})
         filtered = t
     else
         filtered = process_backtrace(t)
@@ -786,7 +824,7 @@ function show_backtrace(io::IO, t::Vector)
 
     if length(filtered) == 1 && StackTraces.is_top_level_frame(filtered[1][1])
         f = filtered[1][1]::StackFrame
-        if f.line == 0 && f.file === Symbol("")
+        if f.line == 0 && f.file === :var""
             # don't show a single top-level frame with no location info
             return
         end
@@ -800,9 +838,6 @@ function show_backtrace(io::IO, t::Vector)
         # process_backtrace returns a Vector{Tuple{Frame, Int}}
         show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks())
     end
-    if limitflag[]
-        print(io, "\nSome type information was truncated. Use `show(err)` to see complete types.")
-    end
     nothing
 end
 
@@ -815,7 +850,10 @@ function _simplify_include_frames(trace)
     for i in length(trace):-1:1
         frame::StackFrame, _ = trace[i]
         mod = parentmodule(frame)
-        if first_ignored === nothing
+        if mod === Base && frame.func === :IncludeInto ||
+           mod === Core && frame.func === :EvalInto
+            kept_frames[i] = false
+        elseif first_ignored === nothing
             if mod === Base && frame.func === :_include
                 # Hide include() machinery by default
                 first_ignored = i
@@ -845,7 +883,7 @@ end
 function _collapse_repeated_frames(trace)
     kept_frames = trues(length(trace))
     last_frame = nothing
-    for i in 1:length(trace)
+    for i in eachindex(trace)
         frame::StackFrame, _ = trace[i]
         if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
             #=
@@ -877,9 +915,9 @@ function _collapse_repeated_frames(trace)
             [3] g(x::Int64) <-- useless
             @ Main ./REPL[1]:1
             =#
-            if frame.linfo isa MethodInstance && last_frame.linfo isa MethodInstance &&
-                frame.linfo.def isa Method && last_frame.linfo.def isa Method
-                m, last_m = frame.linfo.def::Method, last_frame.linfo.def::Method
+            m, last_m = StackTraces.frame_method_or_module(frame),
+                        StackTraces.frame_method_or_module(last_frame)
+            if m isa Method && last_m isa Method
                 params, last_params = Base.unwrap_unionall(m.sig).parameters, Base.unwrap_unionall(last_m.sig).parameters
                 if last_m.nkw != 0
                     pos_sig_params = last_params[(last_m.nkw+2):end]
@@ -890,7 +928,7 @@ function _collapse_repeated_frames(trace)
                 end
                 if length(last_params) > length(params)
                     issame = true
-                    for i = 1:length(params)
+                    for i = eachindex(params)
                         issame &= params[i] == last_params[i]
                     end
                     if issame
@@ -906,7 +944,6 @@ function _collapse_repeated_frames(trace)
     return trace[kept_frames]
 end
 
-
 function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     n = 0
     last_frame = StackTraces.UNKNOWN
@@ -927,9 +964,8 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
             if (lkup.from_c && skipC)
                 continue
             end
-            code = lkup.linfo
-            if code isa MethodInstance
-                def = code.def
+            if lkup.linfo isa Union{MethodInstance, CodeInstance}
+                def = StackTraces.frame_method_or_module(lkup)
                 if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),NamedTuple,Any,Vararg}
                     # hide kwcall() methods, which are probably internal keyword sorter methods
                     # (we print the internal method instead, after demangling
@@ -1003,11 +1039,36 @@ end
 
 Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
 
+# handler for displaying a hint in case the user tries to call setindex! on
+# something that doesn't support it:
+#  - a number (probably attempting to use wrong indexing)
+#    eg: a = [1 2; 3 4]; a[1][2] = 5
+#  - a type (probably tried to initialize without parentheses)
+#    eg: d = Dict; d["key"] = 2
+function nonsetable_type_hint_handler(io, ex, arg_types, kwargs)
+    @nospecialize
+    if ex.f == setindex!
+        T = arg_types[1]
+        if T <: Number
+            print(io, "\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ")
+            printstyled(io, "a[1, 2]", color=:cyan)
+            print(io, " rather than a[1][2]")
+        elseif isType(T)
+            Tx = T.parameters[1]
+            print(io, "\nYou attempted to index the type $Tx, rather than an instance of the type. Make sure you create the type using its constructor: ")
+            printstyled(io, "d = $Tx([...])", color=:cyan)
+            print(io, " rather than d = $Tx")
+        end
+    end
+end
+
+Experimental.register_error_hint(nonsetable_type_hint_handler, MethodError)
+
 # Display a hint in case the user tries to use the + operator on strings
 # (probably attempting concatenation)
 function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
     @nospecialize
-    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
+    if (ex.f === +) && !isempty(arg_types) && all(i -> i <: AbstractString, arg_types)
         print(io, "\nString concatenation is performed with ")
         printstyled(io, "*", color=:cyan)
         print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
@@ -1016,6 +1077,70 @@ end
 
 Experimental.register_error_hint(string_concatenation_hint_handler, MethodError)
 
+# Display a hint in case the user tries to use the min or max function on an iterable
+# or tries to use something like `collect` on an iterator without defining either IteratorSize or length
+function methods_on_iterable(io, ex, arg_types, kwargs)
+    @nospecialize
+    f = ex.f
+    if (f === max || f === min) && length(arg_types) == 1 && Base.isiterable(only(arg_types))
+        f_correct = f === max ? "maximum" : "minimum"
+        print(io, "\nFinding the $f_correct of an iterable is performed with `$f_correct`.")
+    end
+    if (f === Base.length || f === Base.size) && length(arg_types) >= 1
+        arg_type_tuple = Tuple{arg_types...}
+        if hasmethod(iterate, arg_type_tuple)
+            iterkind = IteratorSize(arg_types[1])
+            if iterkind isa HasLength
+                print(io, "\nYou may need to implement the `length` method or define `IteratorSize` for this type to be `SizeUnknown`.")
+            elseif iterkind isa HasShape
+                print(io, "\nYou may need to implement the `length` and `size` methods for `IteratorSize` `HasShape`.")
+            end
+        end
+    end
+    nothing
+end
+
+Experimental.register_error_hint(methods_on_iterable, MethodError)
+
+# Display a hint in case the user tries to access non-member fields of container type datastructures
+function fielderror_dict_hint_handler(io, exc)
+    @nospecialize
+    field = exc.field
+    type = exc.type
+    if type <: AbstractDict
+        print(io, "\nDid you mean to access dict values using key: `:$field` ? Consider using indexing syntax ")
+        printstyled(io, "dict[:$(field)]", color=:cyan)
+        println(io)
+    end
+end
+
+Experimental.register_error_hint(fielderror_dict_hint_handler, FieldError)
+
+function fielderror_listfields_hint_handler(io, exc)
+    fields = fieldnames(exc.type)
+    if isempty(fields)
+        print(io, "; $(nameof(exc.type)) has no fields at all.")
+    else
+        print(io, ", available fields: $(join(map(k -> "`$k`", fields), ", "))")
+    end
+    props = _propertynames_bytype(exc.type)
+    isnothing(props) && return
+    props = setdiff(props, fields)
+    isempty(props) && return
+    print(io, "\nAvailable properties: $(join(map(k -> "`$k`", props), ", "))")
+end
+
+function _propertynames_bytype(T::Type)
+    which(propertynames, (T,)) === which(propertynames, (Any,)) && return nothing
+    inferred_names = promote_op(Val∘propertynames, T)
+    inferred_names isa DataType && inferred_names <: Val || return nothing
+    inferred_names = inferred_names.parameters[1]
+    inferred_names isa NTuple{<:Any, Symbol} || return nothing
+    return Symbol[inferred_names[i] for i in 1:length(inferred_names)]
+end
+
+Experimental.register_error_hint(fielderror_listfields_hint_handler, FieldError)
+
 # ExceptionStack implementation
 size(s::ExceptionStack) = size(s.stack)
 getindex(s::ExceptionStack, i::Int) = s.stack[i]
diff --git a/base/essentials.jl b/base/essentials.jl
index 97f32483a6b14..fa5cf79192f56 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -1,17 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-import Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, arrayref
+using Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, memoryrefnew, memoryrefget, memoryrefset!
 
 const Callable = Union{Function,Type}
 
 const Bottom = Union{}
 
 # Define minimal array interface here to help code used in macros:
-length(a::Array) = arraylen(a)
+length(a::Array{T, 0}) where {T} = 1
+length(a::Array{T, 1}) where {T} = getfield(a, :size)[1]
+length(a::Array{T, 2}) where {T} = (sz = getfield(a, :size); sz[1] * sz[2])
+# other sizes are handled by generic prod definition for AbstractArray
+length(a::GenericMemory) = getfield(a, :length)
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
-# This is more complicated than it needs to be in order to get Win64 through bootstrap
-eval(:(getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)))
-eval(:(getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))))
+# multidimensional getindex will be defined later on
 
 ==(a::GlobalRef, b::GlobalRef) = a.mod === b.mod && a.name === b.name
 
@@ -88,9 +91,9 @@ f(y) = [x for x in y]
     standard ones) on type-inference. Use [`Base.@nospecializeinfer`](@ref) together with
     `@nospecialize` to additionally suppress inference.
 
-# Example
+# Examples
 
-```julia
+```julia-repl
 julia> f(A::AbstractArray) = g(A)
 f (generic function with 1 method)
 
@@ -178,25 +181,11 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
-"""
-    nameof(m::Module) -> Symbol
-
-Get the name of a `Module` as a [`Symbol`](@ref).
-
-# Examples
-```jldoctest
-julia> nameof(Base.Broadcast)
-:Broadcast
-```
-"""
-nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
+_nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
 
 function _is_internal(__module__)
-    if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
-       nameof(__module__) === :Base
-        return true
-    end
-    return false
+    return _nameof(__module__) === :Base ||
+      _nameof(ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module) === :Compiler
 end
 
 # can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
@@ -208,7 +197,11 @@ macro _total_meta()
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
         #=:notaskstate=#true,
-        #=:inaccessiblememonly=#true))
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -218,8 +211,102 @@ macro _foldable_meta()
         #=:nothrow=#false,
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
+end
+# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
+macro _terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally` (supposed to be used for bootstrapping)
+macro _terminates_globally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally :notaskstate` (supposed to be used for bootstrapping)
+macro _terminates_globally_notaskstate_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally :noub` (supposed to be used for bootstrapping)
+macro _terminates_globally_noub_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
+macro _effect_free_terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :nothrow :noub` (supposed to be used for bootstrapping)
+macro _nothrow_noub_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#true,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#true))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _nothrow_meta()
@@ -230,37 +317,89 @@ macro _nothrow_meta()
         #=:terminates_globally=#false,
         #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
-# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
-macro _terminates_locally_meta()
+# can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
+macro _noub_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
         #=:consistent=#false,
         #=:effect_free=#false,
         #=:nothrow=#false,
         #=:terminates_globally=#false,
-        #=:terminates_locally=#true,
+        #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
-# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
-macro _effect_free_terminates_locally_meta()
+# can be used in place of `@assume_effects :notaskstate` (supposed to be used for bootstrapping)
+macro _notaskstate_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
         #=:consistent=#false,
-        #=:effect_free=#true,
+        #=:effect_free=#false,
         #=:nothrow=#false,
         #=:terminates_globally=#false,
-        #=:terminates_locally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
+end
+# can be used in place of `@assume_effects :noub_if_noinbounds` (supposed to be used for bootstrapping)
+macro _noub_if_noinbounds_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
         #=:notaskstate=#false,
-        #=:inaccessiblememonly=#false))
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#true,
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 
 # another version of inlining that propagates an inbounds context
 macro _propagate_inbounds_meta()
     return Expr(:meta, :inline, :propagate_inbounds)
 end
+macro _nospecializeinfer_meta()
+    return Expr(:meta, :nospecializeinfer)
+end
 
-function iterate end
+default_access_order(a::GenericMemory{:not_atomic}) = :not_atomic
+default_access_order(a::GenericMemory{:atomic}) = :monotonic
+default_access_order(a::GenericMemoryRef{:not_atomic}) = :not_atomic
+default_access_order(a::GenericMemoryRef{:atomic}) = :monotonic
+
+getindex(A::GenericMemory, i::Int) = (@_noub_if_noinbounds_meta;
+    memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))
+getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck)
+
+"""
+    nameof(m::Module) -> Symbol
+
+Get the name of a `Module` as a [`Symbol`](@ref).
+
+# Examples
+```jldoctest
+julia> nameof(Base.Broadcast)
+:Broadcast
+```
+"""
+nameof(m::Module) = (@_total_meta; ccall(:jl_module_name, Ref{Symbol}, (Any,), m))
+
+typeof(function iterate end).name.constprop_heuristic = Core.ITERATE_HEURISTIC
 
 """
     convert(T, x)
@@ -282,8 +421,9 @@ Stacktrace:
 [...]
 ```
 
-If `T` is a [`AbstractFloat`](@ref) type,
-then it will return the closest value to `x` representable by `T`.
+If `T` is a [`AbstractFloat`](@ref) type, then it will return the
+closest value to `x` representable by `T`. Inf is treated as one
+ulp greater than `floatmax(T)` for purposes of determining nearest.
 
 ```jldoctest
 julia> x = 1/3
@@ -327,10 +467,18 @@ Evaluate an expression with values interpolated into it using `eval`.
 If two arguments are provided, the first is the module to evaluate in.
 """
 macro eval(ex)
-    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), __module__, Expr(:quote, ex)))
+    return Expr(:let, Expr(:(=), :eval_local_result,
+            Expr(:escape, Expr(:call, GlobalRef(Core, :eval), __module__, Expr(:quote, ex)))),
+        Expr(:block,
+            Expr(:var"latestworld-if-toplevel"),
+            :eval_local_result))
 end
 macro eval(mod, ex)
-    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))
+    return Expr(:let, Expr(:(=), :eval_local_result,
+            Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))),
+        Expr(:block,
+            Expr(:var"latestworld-if-toplevel"),
+            :eval_local_result))
 end
 
 # use `@eval` here to directly form `:new` expressions avoid implicit `convert`s
@@ -346,7 +494,7 @@ end
 pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}} where {V, N, names, T<:NTuple{N, Any}}
 
 """
-    Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
+    Base.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
 
 Transforms an indexable container into a Dictionary-view of the same data.
 Modifying the key-space of the underlying data may invalidate this object.
@@ -375,6 +523,7 @@ tail(x::Tuple) = argtail(x...)
 tail(::Tuple{}) = throw(ArgumentError("Cannot call tail on an empty tuple."))
 
 function unwrap_unionall(@nospecialize(a))
+    @_foldable_meta
     while isa(a,UnionAll)
         a = a.body
     end
@@ -382,6 +531,7 @@ function unwrap_unionall(@nospecialize(a))
 end
 
 function rewrap_unionall(@nospecialize(t), @nospecialize(u))
+    @_foldable_meta
     if !isa(u, UnionAll)
         return t
     end
@@ -389,6 +539,7 @@ function rewrap_unionall(@nospecialize(t), @nospecialize(u))
 end
 
 function rewrap_unionall(t::Core.TypeofVararg, @nospecialize(u))
+    @_foldable_meta
     isdefined(t, :T) || return t
     if !isa(u, UnionAll)
         return t
@@ -411,11 +562,17 @@ function rename_unionall(@nospecialize(u))
     return UnionAll(nv, body{nv})
 end
 
+# remove concrete constraint on diagonal TypeVar if it comes from troot
+function widen_diagonal(@nospecialize(t), troot::UnionAll)
+    body = ccall(:jl_widen_diagonal, Any, (Any, Any), t, troot)
+end
+
 function isvarargtype(@nospecialize(t))
     return isa(t, Core.TypeofVararg)
 end
 
 function isvatuple(@nospecialize(t))
+    @_foldable_meta
     t = unwrap_unionall(t)
     if isa(t, DataType)
         n = length(t.parameters)
@@ -436,15 +593,7 @@ function unconstrain_vararg_length(va::Core.TypeofVararg)
     return Vararg{unwrapva(va)}
 end
 
-typename(a) = error("typename does not apply to this type")
-typename(a::DataType) = a.name
-function typename(a::Union)
-    ta = typename(a.a)
-    tb = typename(a.b)
-    ta === tb || error("typename does not apply to unions whose components have different typenames")
-    return tb
-end
-typename(union::UnionAll) = typename(union.body)
+import Core: typename
 
 _tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
@@ -543,21 +692,46 @@ unsafe_convert(::Type{T}, x::T) where {T<:Ptr} = x  # to resolve ambiguity with
 unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x)
 
 """
-    reinterpret(type, x)
+    reinterpret(::Type{Out}, x::In)
 
-Change the type-interpretation of the binary data in the primitive value `x`
-to that of the primitive type `type`.
-The size of `type` has to be the same as that of the type of `x`.
+Change the type-interpretation of the binary data in the isbits value `x`
+to that of the isbits type `Out`.
+The size (ignoring padding) of `Out` has to be the same as that of the type of `x`.
 For example, `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
-[`Float32`](@ref).
+[`Float32`](@ref). Note that `reinterpret(In, reinterpret(Out, x)) === x`
 
-# Examples
 ```jldoctest
 julia> reinterpret(Float32, UInt32(7))
 1.0f-44
+
+julia> reinterpret(NTuple{2, UInt8}, 0x1234)
+(0x34, 0x12)
+
+julia> reinterpret(UInt16, (0x34, 0x12))
+0x1234
+
+julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203))
+(0x0301, 0x02)
 ```
+
+!!! note
+
+    The treatment of padding differs from reinterpret(::DataType, ::AbstractArray).
+
+!!! warning
+
+    Use caution if some combinations of bits in `Out` are not considered valid and would
+    otherwise be prevented by the type's constructors and methods. Unexpected behavior
+    may result without additional validation.
+
 """
-reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
+function reinterpret(::Type{Out}, x) where {Out}
+    if isprimitivetype(Out) && isprimitivetype(typeof(x))
+        return bitcast(Out, x)
+    end
+    # only available when Base is fully loaded.
+    return _reinterpret(Out, x)
+end
 
 """
     sizeof(T::DataType)
@@ -621,9 +795,6 @@ julia> ifelse(1 > 2, 1, 2)
 """
 ifelse(condition::Bool, x, y) = Core.ifelse(condition, x, y)
 
-# simple Array{Any} operations needed for bootstrap
-@eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
-
 """
     esc(e)
 
@@ -703,11 +874,11 @@ end
 
     Using `@inbounds` may return incorrect results/crashes/corruption
     for out-of-bounds indices. The user is responsible for checking it manually.
-    Only use `@inbounds` when it is certain from the information locally available
-    that all accesses are in bounds. In particular, using `1:length(A)` instead of
-    `eachindex(A)` in a function like the one above is _not_ safely inbounds because
-    the first index of `A` may not be `1` for all user defined types that subtype
-    `AbstractArray`.
+    Only use `@inbounds` when you are certain that all accesses are in bounds (as
+    undefined behavior, e.g. crashes, might occur if this assertion is violated). For
+    example, using `1:length(A)` instead of `eachindex(A)` in a function like
+    the one above is _not_ safely inbounds because the first index of `A` may not
+    be `1` for all user defined types that subtype `AbstractArray`.
 """
 macro inbounds(blk)
     return Expr(:block,
@@ -739,9 +910,26 @@ macro goto(name::Symbol)
     return esc(Expr(:symbolicgoto, name))
 end
 
+# linear indexing
+function getindex(A::Array, i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
+    memoryrefget(memoryrefnew(getfield(A, :ref), i, false), :not_atomic, false)
+end
+# simple Array{Any} operations needed for bootstrap
+function setindex!(A::Array{Any}, @nospecialize(x), i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
+    memoryrefset!(memoryrefnew(getfield(A, :ref), i, false), x, :not_atomic, false)
+    return A
+end
+setindex!(A::Memory{Any}, @nospecialize(x), i::Int) = (memoryrefset!(memoryrefnew(memoryrefnew(A), i, @_boundscheck), x, :not_atomic, @_boundscheck); A)
+setindex!(A::MemoryRef{T}, x) where {T} = (memoryrefset!(A, convert(T, x), :not_atomic, @_boundscheck); A)
+setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomic, @_boundscheck); A)
+
 # SimpleVector
 
-@eval getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref($(Expr(:boundscheck)), v, i))
+getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
 function length(v::SimpleVector)
     @_total_meta
     t = @_gc_preserve_begin v
@@ -862,6 +1050,7 @@ call obsolete versions of a function `f`.
     Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`.
 """
 function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
+    @inline
     kwargs = merge(NamedTuple(), kwargs)
     if isempty(kwargs)
         return Core._call_latest(f, args...)
@@ -896,6 +1085,7 @@ of [`invokelatest`](@ref).
     world age refers to system state unrelated to the main Julia session.
 """
 function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; kwargs...)
+    @inline
     kwargs = Base.merge(NamedTuple(), kwargs)
     if isempty(kwargs)
         return Core._call_in_world(world, f, args...)
@@ -903,6 +1093,12 @@ function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; k
     return Core._call_in_world(world, Core.kwcall, kwargs, f, args...)
 end
 
+"""
+    inferencebarrier(x)
+
+A shorthand for `compilerbarrier(:type, x)` causes the type of this statement to be inferred as `Any`.
+See [`Base.compilerbarrier`](@ref) for more info.
+"""
 inferencebarrier(@nospecialize(x)) = compilerbarrier(:type, x)
 
 """
@@ -913,9 +1109,10 @@ Determine whether a collection is empty (has no elements).
 !!! warning
 
     `isempty(itr)` may consume the next element of a stateful iterator `itr`
-    unless an appropriate `Base.isdone(itr)` or `isempty` method is defined.
-    Use of `isempty` should therefore be avoided when writing generic
-    code which should support any iterator type.
+    unless an appropriate [`Base.isdone(itr)`](@ref) method is defined.
+    Stateful iterators *should* implement `isdone`, but you may want to avoid
+    using `isempty` when writing generic code which should support any iterator
+    type.
 
 # Examples
 ```jldoctest
@@ -1024,17 +1221,21 @@ end
 
 # Iteration
 """
-    isdone(itr, state...) -> Union{Bool, Missing}
+    isdone(itr, [state]) -> Union{Bool, Missing}
 
 This function provides a fast-path hint for iterator completion.
-This is useful for mutable iterators that want to avoid having elements
-consumed, if they are not going to be exposed to the user (e.g. to check
-for done-ness in `isempty` or `zip`). Mutable iterators that want to
-opt into this feature should define an isdone method that returns
-true/false depending on whether the iterator is done or not. Stateless
-iterators need not implement this function. If the result is `missing`,
-callers may go ahead and compute `iterate(x, state...) === nothing` to
-compute a definite answer.
+This is useful for stateful iterators that want to avoid having elements
+consumed if they are not going to be exposed to the user (e.g. when checking
+for done-ness in `isempty` or `zip`).
+
+Stateful iterators that want to opt into this feature should define an `isdone`
+method that returns true/false depending on whether the iterator is done or
+not. Stateless iterators need not implement this function.
+
+If the result is `missing`, callers may go ahead and compute
+`iterate(x, state) === nothing` to compute a definite answer.
+
+See also [`iterate`](@ref), [`isempty`](@ref)
 """
 isdone(itr, state...) = missing
 
@@ -1056,3 +1257,63 @@ that is whether it has an `iterate` method or not.
 function isiterable(T)::Bool
     return hasmethod(iterate, Tuple{T})
 end
+
+"""
+    @world(sym, world)
+
+Resolve the binding `sym` in world `world`. See [`invoke_in_world`](@ref) for running
+arbitrary code in fixed worlds. `world` may be `UnitRange`, in which case the macro
+will error unless the binding is valid and has the same value across the entire world
+range.
+
+The `@world` macro is primarily used in the printing of bindings that are no longer
+available in the current world.
+
+## Example
+```julia-repl
+julia> struct Foo; a::Int; end
+Foo
+
+julia> fold = Foo(1)
+
+julia> Int(Base.get_world_counter())
+26866
+
+julia> struct Foo; a::Int; b::Int end
+Foo
+
+julia> fold
+@world(Foo, 26866)(1)
+```
+
+!!! compat "Julia 1.12"
+    This functionality requires at least Julia 1.12.
+"""
+macro world(sym, world)
+    if isa(sym, Symbol)
+        return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(GlobalRef(__module__, sym)))))
+    elseif isa(sym, GlobalRef)
+        return :($(_resolve_in_world)($(esc(world)), $(QuoteNode(sym))))
+    elseif isa(sym, Expr) && sym.head === :(.) &&
+            length(sym.args) == 2 && isa(sym.args[2], QuoteNode) && isa(sym.args[2].value, Symbol)
+        return :($(_resolve_in_world)($(esc(world)), $(GlobalRef)($(esc(sym.args[1])), $(sym.args[2]))))
+    else
+        error("`@world` requires a symbol or GlobalRef")
+    end
+end
+
+_resolve_in_world(world::Integer, gr::GlobalRef) =
+    invoke_in_world(UInt(world), Core.getglobal, gr.mod, gr.name)
+
+# Special constprop heuristics for various binary opes
+typename(typeof(function + end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function - end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function * end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function == end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function != end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function <= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function < end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function > end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function << end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >> end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
diff --git a/base/experimental.jl b/base/experimental.jl
index cc8d368023b49..17871b4f346d6 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -9,7 +9,7 @@
 """
 module Experimental
 
-using Base: Threads, sync_varname
+using Base: Threads, sync_varname, is_function_def, @propagate_inbounds
 using Base.Meta
 
 """
@@ -28,10 +28,7 @@ end
 Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
-@eval Base.getindex(A::Const, i1::Int) =
-    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@propagate_inbounds Base.getindex(A::Const, i1::Int, I::Int...) = A.a[i1, I...]
 
 """
     @aliasscope expr
@@ -86,11 +83,16 @@ end
 """
     Experimental.@sync
 
-Wait until all lexically-enclosed uses of `@async`, `@spawn`, `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete, or at least one of them has errored. The first exception is immediately
 rethrown. It is the responsibility of the user to cancel any still-running operations
 during error handling.
 
+!!! Note
+    This is different to [`@sync`](@ref) in that errors from wrapped tasks are thrown immediately,
+    potentially before all tasks have returned.
+
 !!! Note
     This interface is experimental and subject to change or removal without notice.
 """
@@ -141,7 +143,7 @@ code to resort to runtime dispatch instead.
 Supported values are `1`, `2`, `3`, `4`, and `default` (currently equivalent to `3`).
 """
 macro max_methods(n::Int)
-    0 < n < 5 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
+    1 <= n <= 4 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
     return Expr(:meta, :max_methods, n)
 end
 
@@ -154,13 +156,13 @@ for max_methods. This setting is global for the entire generic function (or more
 the MethodTable).
 """
 macro max_methods(n::Int, fdef::Expr)
-    0 < n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
+    1 <= n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
     (fdef.head === :function && length(fdef.args) == 1) || error("Second argument must be a function forward declaration")
     return :(typeof($(esc(fdef))).name.max_methods = $(UInt8(n)))
 end
 
 """
-    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,...}
+    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,4}
 
 Set compiler options for code in the enclosing module. Options correspond directly to
 command-line options with the same name, where applicable. The following options
@@ -193,7 +195,7 @@ macro compiler_options(args...)
             elseif ex.args[1] === :max_methods
                 a = ex.args[2]
                 a = a === :default ? 3 :
-                  a isa Int ? ((0 < a < 5) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
+                  a isa Int ? ((1 <= a <= 4) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
                   error("invalid argument to \"max_methods\" option")
                 push!(opts.args, Expr(:meta, :max_methods, a))
             else
@@ -252,7 +254,7 @@ When issuing a hint, the output should typically start with `\\n`.
 If you define custom exception types, your `showerror` method can
 support hints by calling [`Experimental.show_error_hints`](@ref).
 
-# Example
+# Examples
 
 ```
 julia> module Hinter
@@ -278,6 +280,7 @@ Then if you call `Hinter.only_int` on something that isn't an `Int` (thereby tri
 ```
 julia> Hinter.only_int(1.0)
 ERROR: MethodError: no method matching only_int(::Float64)
+The function `only_int` exists, but no method is defined for this combination of argument types.
 Did you mean to call `any_number`?
 Closest candidates are:
     ...
@@ -315,10 +318,10 @@ function show_error_hints(io, ex, args...)
     isnothing(hinters) && return
     for handler in hinters
         try
-            Base.invokelatest(handler, io, ex, args...)
-        catch err
+            @invokelatest handler(io, ex, args...)
+        catch
             tn = typeof(handler).name
-            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error"
+            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error" exception=current_exceptions()
         end
     end
 end
@@ -327,27 +330,113 @@ end
 include("opaque_closure.jl")
 
 """
-    Experimental.@overlay mt [function def]
+    Base.Experimental.@overlay mt def
 
 Define a method and add it to the method table `mt` instead of to the global method table.
 This can be used to implement a method override mechanism. Regular compilation will not
 consider these methods, and you should customize the compilation flow to look in these
 method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
 
+!!! note
+    Please be aware that when defining overlay methods using `@overlay`, it is not necessary
+    to have an original method that corresponds exactly in terms of how the method dispatches.
+    This means that the method overlay mechanism enabled by `@overlay` is not implemented by
+    replacing the methods themselves, but through an additional and prioritized method
+    lookup during the method dispatch.
+
+    Considering this, it is important to understand that in compilations using an overlay
+    method table like the following, the method dispatched by `callx(x)` is not the regular
+    method `callx(::Float64)`, but the overlay method `callx(x::Real)`:
+    ```julia
+    callx(::Real) = :real
+    @overlay SOME_OVERLAY_MT callx(::Real) = :overlay_real
+    callx(::Float64) = :float64
+
+    # some overlay callsite
+    let x::Float64
+        callx(x) #> :overlay_real
+    end
+    ```
 """
 macro overlay(mt, def)
-    def = macroexpand(__module__, def) # to expand @inline, @generated, etc
-    if !isexpr(def, [:function, :(=)])
-        error("@overlay requires a function Expr")
-    end
-    if isexpr(def.args[1], :call)
-        def.args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1])
-    elseif isexpr(def.args[1], :where)
-        def.args[1].args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1].args[1])
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@overlay requires a function definition")
+    overlay_def!(mt, inner)
+    return esc(def)
+end
+
+"""
+    Base.Experimental.@consistent_overlay mt def
+
+This macro operates almost identically to [`Base.Experimental.@overlay`](@ref), defining a
+new overlay method. The key difference with this macro is that it informs the compiler that
+the invocation of the overlay method it defines is `:consistent` with a regular,
+non-overlayed method call.
+
+More formally, when evaluating a generic function call ``f(x)`` at a specific world age
+``i``, if a regular method call ``fᵢ(x)`` is redirected to an overlay method call ``fᵢ′(x)``
+defined by this macro, ``fᵢ(x)`` and ``fᵢ′(x)`` are considered `:consistent` if the following
+conditions are met:
+- If ``fᵢ(x)`` returns a value ``y``, then ``fᵢ′(x)`` also returns some value ``yᵢ``, and ``y ≡ yᵢ`` holds.
+- If ``fᵢ(x)`` throws an exception, then ``fᵢ′(x)`` also throws some exception.
+
+For a detailed definition of `:consistent`-cy, consult the corresponding section in
+[`Base.@assume_effects`](@ref).
+
+!!! note
+    Note that the requirements for `:consistent`-cy include not only that the return values
+    are egal, but also that the manner of termination is the same. However, it's important
+    to aware that when they throw exceptions, the exceptions themselves don't necessarily
+    have to be egal. In other words, if ``fᵢ(x)`` throws an exception, ``fᵢ′(x)`` is
+    required to also throw one, but the exact exceptions may differ.
+
+!!! note
+    Please note that the `:consistent`-cy requirement applies not to method itself but to
+    _method invocation_. This means that for the use of `@consistent_overlay`, it is
+    necessary for method invocations with the native regular compilation and those with
+    a compilation with overlay method table to be `:consistent`.
+
+    For example, it is important to understand that, `@consistent_overlay` can be used like
+    the following:
+    ```julia
+    callsin(x::Real) = x < 0 ? error(x) : sin(x)
+    @consistent_overlay SOME_OVERLAY_MT callsin(x::Float64) =
+        x < 0 ? error_somehow(x) : sin(x)
+    ```
+    However, be aware that this `@consistent_overlay` will immediately become invalid if a
+    new method for `callsin` is defined subsequently, such as:
+    ```julia
+    callsin(x::Float64) = cos(x)
+    ```
+
+    This specifically implies that the use of `@consistent_overlay` should be restricted as
+    much as possible to cases where a regular method with a concrete signature is replaced
+    by an overlay method with the same concrete signature.
+
+    This constraint is closely related to the note in [`Base.Experimental.@overlay`](@ref);
+    you are advised to consult that as well.
+"""
+macro consistent_overlay(mt, def)
+    inner = Base.unwrap_macrocalls(def)
+    is_function_def(inner) || error("@consistent_overlay requires a function definition")
+    overlay_def!(mt, inner)
+    override = Base.EffectsOverride(; consistent_overlay=true)
+    Base.pushmeta!(def::Expr, Base.form_purity_expr(override))
+    return esc(def)
+end
+
+function overlay_def!(mt, @nospecialize ex)
+    arg1 = ex.args[1]
+    if isexpr(arg1, :call)
+        arg1.args[1] = Expr(:overlay, mt, arg1.args[1])
+    elseif isexpr(arg1, :(::))
+        overlay_def!(mt, arg1)
+    elseif isexpr(arg1, :where)
+        overlay_def!(mt, arg1)
     else
-        error("@overlay requires a function Expr")
+        error("@overlay requires a function definition")
     end
-    esc(def)
+    return ex
 end
 
 let new_mt(name::Symbol, mod::Module) = begin
@@ -360,12 +449,132 @@ let new_mt(name::Symbol, mod::Module) = begin
 end
 
 """
-    Experimental.@MethodTable(name)
+    Base.Experimental.@MethodTable name
 
 Create a new MethodTable in the current module, bound to `name`. This method table can be
-used with the [`Experimental.@overlay`](@ref) macro to define methods for a function without
-adding them to the global method table.
+used with the [`Base.Experimental.@overlay`](@ref) macro to define methods for a function
+without adding them to the global method table.
 """
 :@MethodTable
 
+"""
+    Base.Experimental.make_io_thread()
+
+Create a new thread that will run the Julia IO loop. This can potentially reduce the latency of some
+IO operations as they no longer depend on the main thread to run it. This does mean that code that uses
+this as implicit synchronization needs to be checked for correctness.
+"""
+function make_io_thread()
+    tid = UInt[0]
+    threadwork = @cfunction function(arg::Ptr{Cvoid})
+            current_task().donenotify = Base.ThreadSynchronizer() #TODO: Should this happen by default in adopt thread?
+            Base.errormonitor(current_task()) # this may not go particularly well if the IO loop is dead, but try anyways
+            @ccall jl_set_io_loop_tid((Threads.threadid() - 1)::Int16)::Cvoid
+            wait() # spin uv_run as long as needed
+            nothing
+        end Cvoid (Ptr{Cvoid},)
+    err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, C_NULL::Ptr{Cvoid})::Cint
+    err == 0 || Base.uv_error("uv_thread_create", err)
+    @ccall uv_thread_detach(tid::Ptr{UInt})::Cint
+    err == 0 || Base.uv_error("uv_thread_detach", err)
+    # n.b. this does not wait for the thread to start or to take ownership of the event loop
+end
+
+"""
+    Base.Experimental.entrypoint(f, argtypes::Tuple)
+
+Mark a method for inclusion when the `--trim` option is specified.
+"""
+function entrypoint(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    entrypoint(Tuple{Core.Typeof(f), argtypes...})
+end
+
+function entrypoint(@nospecialize(argt::Type))
+    ccall(:jl_add_entrypoint, Int32, (Any,), argt)
+    nothing
+end
+
+"""
+    Base.Experimental.disable_new_worlds()
+
+Mark that no new worlds (methods additions, deletions, etc) are permitted to be created at
+any future time, allowing for lower latencies for some operations and slightly lower memory
+usage, by eliminating the tracking of those possible invalidation.
+"""
+disable_new_worlds() = ccall(:jl_disable_new_worlds, Cvoid, ())
+
+### Task metrics
+
+"""
+    Base.Experimental.task_metrics(::Bool)
+
+Enable or disable the collection of per-task metrics.
+A `Task` created when `Base.Experimental.task_metrics(true)` is in effect will have
+[`Base.Experimental.task_running_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref)
+timing information available.
+
+!!! note
+    Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option.
+"""
+function task_metrics(b::Bool)
+    if b
+        ccall(:jl_task_metrics_enable, Cvoid, ())
+    else
+        ccall(:jl_task_metrics_disable, Cvoid, ())
+    end
+    return nothing
 end
+
+"""
+    Base.Experimental.task_running_time_ns(t::Task) -> Union{UInt64, Nothing}
+
+Return the total nanoseconds that the task `t` has spent running.
+This metric is only updated when `t` yields or completes unless `t` is the current task, in
+which it will be updated continuously.
+See also [`Base.Experimental.task_wall_time_ns`](@ref).
+
+Returns `nothing` if task timings are not enabled.
+See [`Base.Experimental.task_metrics`](@ref).
+
+!!! note "This metric is from the Julia scheduler"
+    A task may be running on an OS thread that is descheduled by the OS
+    scheduler, this time still counts towards the metric.
+
+!!! compat "Julia 1.12"
+    This method was added in Julia 1.12.
+"""
+function task_running_time_ns(t::Task=current_task())
+    t.metrics_enabled || return nothing
+    if t == current_task()
+        # These metrics fields can't update while we're running.
+        # But since we're running we need to include the time since we last started running!
+        return t.running_time_ns + (time_ns() - t.last_started_running_at)
+    else
+        return t.running_time_ns
+    end
+end
+
+"""
+    Base.Experimental.task_wall_time_ns(t::Task) -> Union{UInt64, Nothing}
+
+Return the total nanoseconds that the task `t` was runnable.
+This is the time since the task first entered the run queue until the time at which it
+completed, or until the current time if the task has not yet completed.
+See also [`Base.Experimental.task_running_time_ns`](@ref).
+
+Returns `nothing` if task timings are not enabled.
+See [`Base.Experimental.task_metrics`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was added in Julia 1.12.
+"""
+function task_wall_time_ns(t::Task=current_task())
+    t.metrics_enabled || return nothing
+    start_at = t.first_enqueued_at
+    start_at == 0 && return UInt64(0)
+    end_at = t.finished_at
+    end_at == 0 && return time_ns() - start_at
+    return end_at - start_at
+end
+
+end # module
diff --git a/base/exports.jl b/base/exports.jl
index 10f43825e12df..547cc802c28eb 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -58,6 +58,7 @@ export
     IOBuffer,
     IOStream,
     LinRange,
+    Lockable,
     Irrational,
     LazyString,
     Matrix,
@@ -65,9 +66,13 @@ export
     Missing,
     NTuple,
     IdDict,
+    IdSet,
     OrdinalRange,
     Pair,
     PartialQuickSort,
+    OncePerProcess,
+    OncePerTask,
+    OncePerThread,
     PermutedDimsArray,
     QuickSort,
     Rational,
@@ -405,12 +410,14 @@ export
     indexin,
     argmax,
     argmin,
+    insertdims,
     invperm,
     invpermute!,
     isassigned,
     isperm,
     issorted,
     last,
+    logrange,
     mapslices,
     max,
     maximum!,
@@ -532,6 +539,7 @@ export
     getkey,
     haskey,
     in,
+    in!,
     intersect!,
     intersect,
     isdisjoint,
@@ -546,6 +554,7 @@ export
     mapfoldl,
     mapfoldr,
     mapreduce,
+    memoryref,
     merge!,
     mergewith!,
     merge,
@@ -590,9 +599,11 @@ export
     codepoint,
     codeunit,
     codeunits,
+    ctruncate,
     digits,
     digits!,
     eachsplit,
+    eachrsplit,
     escape_string,
     hex2bytes,
     hex2bytes!,
@@ -613,6 +624,7 @@ export
     join,
     lpad,
     lstrip,
+    ltruncate,
     ncodeunits,
     ndigits,
     nextind,
@@ -625,6 +637,7 @@ export
     rpad,
     rsplit,
     rstrip,
+    rtruncate,
     split,
     string,
     strip,
@@ -701,6 +714,8 @@ export
     yield,
     yieldto,
     wait,
+    waitany,
+    waitall,
     timedwait,
     asyncmap,
     asyncmap!,
@@ -709,6 +724,7 @@ export
 # channels
     take!,
     put!,
+    isfull,
     isready,
     fetch,
     bind,
@@ -748,6 +764,7 @@ export
     swapproperty!,
     modifyproperty!,
     replaceproperty!,
+    setpropertyonce!,
     fieldoffset,
     fieldname,
     fieldnames,
@@ -857,6 +874,8 @@ export
     readline,
     readlines,
     readuntil,
+    copyuntil,
+    copyline,
     redirect_stdio,
     redirect_stderr,
     redirect_stdin,
@@ -922,6 +941,7 @@ export
     isblockdev,
     ischardev,
     isdir,
+    isexecutable,
     isfifo,
     isfile,
     islink,
@@ -1026,6 +1046,7 @@ export
     @elapsed,
     @allocated,
     @allocations,
+    @lock_conflicts,
 
     # tasks
     @sync,
@@ -1046,6 +1067,7 @@ export
     @simd,
     @inline,
     @noinline,
+    @outline,
     @nospecialize,
     @specialize,
     @polly,
@@ -1054,10 +1076,13 @@ export
     @atomic,
     @atomicswap,
     @atomicreplace,
+    @atomiconce,
     @__dot__,
     @enum,
     @label,
     @goto,
     @view,
     @views,
-    @static
+    @static,
+
+    @main
diff --git a/base/expr.jl b/base/expr.jl
index e007306063db1..e202ed9b69c31 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -39,33 +39,39 @@ isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
 isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
 
 copy(e::Expr) = exprarray(e.head, copy_exprargs(e.args))
+function copy(x::PhiNode)
+    values = x.values
+    nvalues = length(values)
+    new_values = Vector{Any}(undef, nvalues)
+    @inbounds for i = 1:nvalues
+        isassigned(values, i) || continue
+        new_values[i] = copy_exprs(values[i])
+    end
+    return PhiNode(copy(x.edges), new_values)
+end
+function copy(x::PhiCNode)
+    values = x.values
+    nvalues = length(values)
+    new_values = Vector{Any}(undef, nvalues)
+    @inbounds for i = 1:nvalues
+        isassigned(values, i) || continue
+        new_values[i] = copy_exprs(values[i])
+    end
+    return PhiCNode(new_values)
+end
 
 # copy parts of an AST that the compiler mutates
 function copy_exprs(@nospecialize(x))
     if isa(x, Expr)
         return copy(x)
     elseif isa(x, PhiNode)
-        values = x.values
-        nvalues = length(values)
-        new_values = Vector{Any}(undef, nvalues)
-        @inbounds for i = 1:nvalues
-            isassigned(values, i) || continue
-            new_values[i] = copy_exprs(values[i])
-        end
-        return PhiNode(copy(x.edges), new_values)
+        return copy(x)
     elseif isa(x, PhiCNode)
-        values = x.values
-        nvalues = length(values)
-        new_values = Vector{Any}(undef, nvalues)
-        @inbounds for i = 1:nvalues
-            isassigned(values, i) || continue
-            new_values[i] = copy_exprs(values[i])
-        end
-        return PhiCNode(new_values)
+        return copy(x)
     end
     return x
 end
-copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in 1:length(x)]
+copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in eachindex(x)]
 
 @eval exprarray(head::Symbol, arg::Array{Any,1}) = $(Expr(:new, :Expr, :head, :arg))
 
@@ -76,12 +82,10 @@ function copy(c::CodeInfo)
     cnew.slotnames = copy(cnew.slotnames)
     cnew.slotflags = copy(cnew.slotflags)
     if cnew.slottypes !== nothing
-        cnew.slottypes = copy(cnew.slottypes)
+        cnew.slottypes = copy(cnew.slottypes::Vector{Any})
     end
-    cnew.codelocs  = copy(cnew.codelocs)
-    cnew.linetable = copy(cnew.linetable::Union{Vector{Any},Vector{Core.LineInfoNode}})
     cnew.ssaflags  = copy(cnew.ssaflags)
-    cnew.edges     = cnew.edges === nothing ? nothing : copy(cnew.edges::Vector)
+    cnew.edges     = cnew.edges === nothing || cnew.edges isa Core.SimpleVector ? cnew.edges : copy(cnew.edges::Vector)
     ssavaluetypes  = cnew.ssavaluetypes
     ssavaluetypes isa Vector{Any} && (cnew.ssavaluetypes = copy(ssavaluetypes))
     return cnew
@@ -126,9 +130,10 @@ function macroexpand(m::Module, @nospecialize(x); recursive=true)
 end
 
 """
-    @macroexpand
+    @macroexpand [mod,] ex
 
 Return equivalent expression with all macros removed (expanded).
+If two arguments are provided, the first is the module to evaluate in.
 
 There are differences between `@macroexpand` and [`macroexpand`](@ref).
 
@@ -163,20 +168,28 @@ julia> M.f()
 ```
 With `@macroexpand` the expression expands where `@macroexpand` appears in the code (module `M` in the example).
 With `macroexpand` the expression expands in the module given as the first argument.
+
+!!! compat "Julia 1.11"
+    The two-argument form requires at least Julia 1.11.
 """
 macro macroexpand(code)
     return :(macroexpand($__module__, $(QuoteNode(code)), recursive=true))
 end
-
+macro macroexpand(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)), recursive=true))
+end
 
 """
-    @macroexpand1
+    @macroexpand1 [mod,] ex
 
 Non recursive version of [`@macroexpand`](@ref).
 """
 macro macroexpand1(code)
     return :(macroexpand($__module__, $(QuoteNode(code)), recursive=false))
 end
+macro macroexpand1(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)), recursive=false))
+end
 
 ## misc syntax ##
 
@@ -343,39 +356,82 @@ macro noinline(x)
 end
 
 """
-    @constprop setting [ex]
+    @outline expr
+
+Outline an expression into its own function, and call that function.
+
+This macro introduces a "function barrier", which can be helpful in some code optimization
+scenarios. The expr is extracted into an outlined function, which is marked `@noinline`.
+
+Outlining an expr can be used to make a function smaller, e.g. by outlining an unlikely
+branch, which could help with runtime performance by improving instruction cache locality,
+and could help with compilation performance since 2 smaller functions can sometimes compile
+faster than one larger function. Finally, outlining can be useful for type-stability, by
+outlining a type unstable block within a hot loop, where the outlined function could be type
+stable.
+
+A common use case is to @outline the code that throws exceptions, since this should be a
+rare case, but it can introduce a lot of complexity to the generated code, which can
+sometimes harm the compiler's ability to optimize.
+
+# Examples
+```julia
+function getindex(container, index)
+    if index < 1 || index > length(container)
+        # Outline this throw, since constructing a BoundsError requires boxing
+        # the arguments, which produces a lot of code.
+        @outline throw(BoundsError(container, index))
+    end
+    return container.data[index]
+end
+```
+"""
+macro outline(expr)
+    vars = esc.(_free_vars(expr))
+    quote
+        @noinline outline($(vars...)) = $(esc(expr))
+
+        outline($(vars...))
+    end
+end
+_free_vars(s::Symbol) = [s]
+_free_vars(_) = []
+_free_vars(e::Expr) = isempty(e.args) ? [] : unique!(mapreduce(_free_vars, vcat, e.args))
+
+"""
+    Base.@constprop setting [ex]
 
 Control the mode of interprocedural constant propagation for the annotated function.
 
 Two `setting`s are supported:
 
-- `@constprop :aggressive [ex]`: apply constant propagation aggressively.
+- `Base.@constprop :aggressive [ex]`: apply constant propagation aggressively.
   For a method where the return type depends on the value of the arguments,
   this can yield improved inference results at the cost of additional compile time.
-- `@constprop :none [ex]`: disable constant propagation. This can reduce compile
+- `Base.@constprop :none [ex]`: disable constant propagation. This can reduce compile
   times for functions that Julia might otherwise deem worthy of constant-propagation.
   Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
 
-`@constprop` can be applied immediately before a function definition or within a function body.
+`Base.@constprop` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
-@constprop :aggressive function longdef(x)
-  ...
+Base.@constprop :aggressive function longdef(x)
+    ...
 end
 
 # annotate short-form definition
-@constprop :aggressive shortdef(x) = ...
+Base.@constprop :aggressive shortdef(x) = ...
 
 # annotate anonymous function that a `do` block creates
 f() do
-    @constprop :aggressive
+    Base.@constprop :aggressive
     ...
 end
 ```
 
 !!! compat "Julia 1.10"
-  The usage within a function body requires at least Julia 1.10.
+    The usage within a function body requires at least Julia 1.10.
 """
 macro constprop(setting, ex)
     sym = constprop_setting(setting)
@@ -388,71 +444,93 @@ macro constprop(setting)
 end
 
 function constprop_setting(@nospecialize setting)
+    s = setting
     isa(setting, QuoteNode) && (setting = setting.value)
     if setting === :aggressive
         return :aggressive_constprop
     elseif setting === :none
         return :no_constprop
     end
-    throw(ArgumentError(LazyString("@constprop "), setting, "not supported"))
+    throw(ArgumentError(LazyString("`Base.@constprop ", s, "` not supported")))
 end
 
 """
-    @assume_effects setting... [ex]
+    Base.@assume_effects setting... [ex]
 
-Override the compiler's effect modeling for the given method or foreign call.
-`@assume_effects` can be applied immediately before a function definition or within a function body.
-It can also be applied immediately before a `@ccall` expression.
-
-!!! compat "Julia 1.8"
-    Using `Base.@assume_effects` requires Julia version 1.8.
+Override the compiler's effect modeling.
+This macro can be used in several contexts:
+1. Immediately before a method definition, to override the entire effect modeling of the applied method.
+2. Within a function body without any arguments, to override the entire effect modeling of the enclosing method.
+3. Applied to a code block, to override the local effect modeling of the applied code block.
 
 # Examples
 ```jldoctest
-julia> Base.@assume_effects :terminates_locally function pow(x)
-           # this :terminates_locally allows `pow` to be constant-folded
+julia> Base.@assume_effects :terminates_locally function fact(x)
+           # usage 1:
+           # this :terminates_locally allows `fact` to be constant-folded
            res = 1
-           1 < x < 20 || error("bad pow")
+           0 ≤ x < 20 || error("bad fact")
            while x > 1
                res *= x
                x -= 1
            end
            return res
        end
-pow (generic function with 1 method)
+fact (generic function with 1 method)
 
 julia> code_typed() do
-           pow(12)
-       end
-1-element Vector{Any}:
- CodeInfo(
+           fact(12)
+       end |> only
+CodeInfo(
 1 ─     return 479001600
 ) => Int64
 
 julia> code_typed() do
            map((2,3,4)) do x
+               # usage 2:
                # this :terminates_locally allows this anonymous function to be constant-folded
                Base.@assume_effects :terminates_locally
                res = 1
-               1 < x < 20 || error("bad pow")
+               0 ≤ x < 20 || error("bad fact")
                while x > 1
                    res *= x
                    x -= 1
                end
                return res
            end
-       end
-1-element Vector{Any}:
- CodeInfo(
+       end |> only
+CodeInfo(
 1 ─     return (2, 6, 24)
 ) => Tuple{Int64, Int64, Int64}
 
-julia> Base.@assume_effects :total !:nothrow @ccall jl_type_intersection(Vector{Int}::Any, Vector{<:Integer}::Any)::Any
-Vector{Int64} (alias for Array{Int64, 1})
+julia> code_typed() do
+           map((2,3,4)) do x
+               res = 1
+               0 ≤ x < 20 || error("bad fact")
+               # usage 3:
+               # with this :terminates_locally annotation the compiler skips tainting
+               # `:terminates` effect within this `while` block, allowing the parent
+               # anonymous function to be constant-folded
+               Base.@assume_effects :terminates_locally while x > 1
+                   res *= x
+                   x -= 1
+               end
+               return res
+           end
+       end |> only
+CodeInfo(
+1 ─     return (2, 6, 24)
+) => Tuple{Int64, Int64, Int64}
 ```
 
+!!! compat "Julia 1.8"
+    Using `Base.@assume_effects` requires Julia version 1.8.
+
 !!! compat "Julia 1.10"
-  The usage within a function body requires at least Julia 1.10.
+    The usage within a function body requires at least Julia 1.10.
+
+!!! compat "Julia 1.11"
+    The code block annotation requires at least Julia 1.11.
 
 !!! warning
     Improper use of this macro causes undefined behavior (including crashes,
@@ -475,6 +553,9 @@ The following `setting`s are supported.
 - `:terminates_locally`
 - `:notaskstate`
 - `:inaccessiblememonly`
+- `:noub`
+- `:noub_if_noinbounds`
+- `:nortcall`
 - `:foldable`
 - `:removable`
 - `:total`
@@ -495,7 +576,7 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
 
 !!! note
     The `:consistent`-cy assertion is made world-age wise. More formally, write
-    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then we require:
+    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then this setting requires:
     ```math
     ∀ i, x, y: x ≡ y → fᵢ(x) ≡ fᵢ(y)
     ```
@@ -512,13 +593,6 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
     even for the same world age (e.g. because one ran in the interpreter, while
     the other was optimized).
 
-!!! note
-    The `:consistent`-cy assertion currrently includes the assertion that the function
-    will not execute any undefined behavior (for any input). Note that undefined behavior
-    may technically cause the function to violate other effect assertions (such as
-    `:nothrow` or `:effect_free`) as well, but we do not model this, and all effects
-    except `:consistent` assume the absence of undefined behavior.
-
 !!! note
     If `:consistent` functions terminate by throwing an exception, that exception
     itself is not required to meet the egality requirement specified above.
@@ -556,7 +630,7 @@ were not executed.
 ---
 ## `:nothrow`
 
-The `:nothrow` settings asserts that this method does not terminate abnormally
+The `:nothrow` settings asserts that this method does not throw an exception
 (i.e. will either always return a value or never return).
 
 !!! note
@@ -565,7 +639,11 @@ The `:nothrow` settings asserts that this method does not terminate abnormally
     method itself.
 
 !!! note
-    `MethodErrors` and similar exceptions count as abnormal termination.
+    If the execution of a method may raise `MethodError`s and similar exceptions, then
+    the method is not considered as `:nothrow`.
+    However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+    are not modeled by this effect and thus a method that may result in `StackOverflowError`
+    does not necessarily need to be `!:nothrow` (although it should usually be `!:terminates` too).
 
 ---
 ## `:terminates_globally`
@@ -578,7 +656,7 @@ The `:terminates_globally` settings asserts that this method will eventually ter
 
 !!! note
     The compiler will consider this a strong indication that the method will
-    terminate relatively *quickly* and may (if otherwise legal), call this
+    terminate relatively *quickly* and may (if otherwise legal) call this
     method at compile time. I.e. it is a bad idea to annotate this setting
     on a method that *technically*, but not *practically*, terminates.
 
@@ -638,6 +716,28 @@ global state or mutable memory pointed to by its arguments.
 !!! note
     This `:inaccessiblememonly` assertion covers any other methods called by the annotated method.
 
+---
+## `:noub`
+
+The `:noub` setting asserts that the method will not execute any undefined behavior
+(for any input). Note that undefined behavior may technically cause the method to violate
+any other effect assertions (such as `:consistent` or `:effect_free`) as well, but we do
+not model this, and they assume the absence of undefined behavior.
+
+---
+## `:nortcall`
+
+The `:nortcall` setting asserts that the method does not call `Core.Compiler.return_type`,
+and that any other methods this method might call also do not call `Core.Compiler.return_type`.
+
+!!! note
+    To be precise, this assertion can be used when a call to `Core.Compiler.return_type` is
+    not made at runtime; that is, when the result of `Core.Compiler.return_type` is known
+    exactly at compile time and the call is eliminated by the optimizer. However, since
+    whether the result of `Core.Compiler.return_type` is folded at compile time depends
+    heavily on the compiler's implementation, it is generally risky to assert this if
+    the method in question uses `Core.Compiler.return_type` in any form.
+
 ---
 ## `:foldable`
 
@@ -647,6 +747,8 @@ currently equivalent to the following `setting`s:
 - `:consistent`
 - `:effect_free`
 - `:terminates_globally`
+- `:noub`
+- `:nortcall`
 
 !!! note
     This list in particular does not include `:nothrow`. The compiler will still
@@ -656,7 +758,7 @@ currently equivalent to the following `setting`s:
 
 !!! note
     An explicit `@inbounds` annotation inside the function will also disable
-    constant folding and not be overriden by `:foldable`.
+    constant folding and not be overridden by `:foldable`.
 
 ---
 ## `:removable`
@@ -679,6 +781,8 @@ the following other `setting`s:
 - `:terminates_globally`
 - `:notaskstate`
 - `:inaccessiblememonly`
+- `:noub`
+- `:nortcall`
 
 !!! warning
     `:total` is a very strong assertion and will likely gain additional semantics
@@ -698,68 +802,159 @@ the call is generally total, it may however throw.
 """
 macro assume_effects(args...)
     lastex = args[end]
-    inner = unwrap_macrocalls(lastex)
-    if is_function_def(inner)
-        ex = lastex
-        idx = length(args)-1
+    override = compute_assumed_settings(args[begin:end-1])
+    if is_function_def(unwrap_macrocalls(lastex))
+        return esc(pushmeta!(lastex::Expr, form_purity_expr(override)))
     elseif isexpr(lastex, :macrocall) && lastex.args[1] === Symbol("@ccall")
-        ex = lastex
-        idx = length(args)-1
-    else # anonymous function case
-        ex = nothing
-        idx = length(args)
+        lastex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
+        insert!(lastex.args, 3, encode_effects_override(override))
+        return esc(lastex)
     end
-    (consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly) =
-        (false, false, false, false, false, false, false, false)
-    for org_setting in args[1:idx]
-        (setting, val) = compute_assumed_setting(org_setting)
-        if setting === :consistent
-            consistent = val
-        elseif setting === :effect_free
-            effect_free = val
-        elseif setting === :nothrow
-            nothrow = val
-        elseif setting === :terminates_globally
-            terminates_globally = val
-        elseif setting === :terminates_locally
-            terminates_locally = val
-        elseif setting === :notaskstate
-            notaskstate = val
-        elseif setting === :inaccessiblememonly
-            inaccessiblememonly = val
-        elseif setting === :foldable
-            consistent = effect_free = terminates_globally = val
-        elseif setting === :removable
-            effect_free = nothrow = terminates_globally = val
-        elseif setting === :total
-            consistent = effect_free = nothrow = terminates_globally = notaskstate = inaccessiblememonly = val
-        else
-            throw(ArgumentError("@assume_effects $org_setting not supported"))
-        end
+    override′ = compute_assumed_setting(override, lastex)
+    if override′ !== nothing
+        # anonymous function case
+        return Expr(:meta, form_purity_expr(override′))
+    else
+        # call site annotation case
+        return Expr(:block,
+                    form_purity_expr(override),
+                    Expr(:local, Expr(:(=), :val, esc(lastex))),
+                    Expr(:purity), # region end token
+                    :val)
     end
-    if is_function_def(inner)
-        return esc(pushmeta!(ex, :purity,
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
-    elseif isexpr(ex, :macrocall) && ex.args[1] === Symbol("@ccall")
-        ex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
-        insert!(ex.args, 3, Core.Compiler.encode_effects_override(Core.Compiler.EffectsOverride(
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly,
-        )))
-        return esc(ex)
-    else # anonymous function case
-        return Expr(:meta, Expr(:purity,
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
+end
+
+function compute_assumed_settings(settings)
+    override = EffectsOverride()
+    for setting in settings
+        override = compute_assumed_setting(override, setting)
+        override === nothing &&
+            throw(ArgumentError("`@assume_effects $setting` not supported"))
     end
+    return override
 end
 
-function compute_assumed_setting(@nospecialize(setting), val::Bool=true)
+struct EffectsOverride
+    consistent::Bool
+    effect_free::Bool
+    nothrow::Bool
+    terminates_globally::Bool
+    terminates_locally::Bool
+    notaskstate::Bool
+    inaccessiblememonly::Bool
+    noub::Bool
+    noub_if_noinbounds::Bool
+    consistent_overlay::Bool
+    nortcall::Bool
+end
+
+function EffectsOverride(
+    override::EffectsOverride =
+        EffectsOverride(false, false, false, false, false, false, false, false, false, false, false);
+    consistent::Bool = override.consistent,
+    effect_free::Bool = override.effect_free,
+    nothrow::Bool = override.nothrow,
+    terminates_globally::Bool = override.terminates_globally,
+    terminates_locally::Bool = override.terminates_locally,
+    notaskstate::Bool = override.notaskstate,
+    inaccessiblememonly::Bool = override.inaccessiblememonly,
+    noub::Bool = override.noub,
+    noub_if_noinbounds::Bool = override.noub_if_noinbounds,
+    consistent_overlay::Bool = override.consistent_overlay,
+    nortcall::Bool = override.nortcall)
+    return EffectsOverride(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates_globally,
+        terminates_locally,
+        notaskstate,
+        inaccessiblememonly,
+        noub,
+        noub_if_noinbounds,
+        consistent_overlay,
+        nortcall)
+end
+
+const NUM_EFFECTS_OVERRIDES = 11 # sync with julia.h
+
+function compute_assumed_setting(override::EffectsOverride, @nospecialize(setting), val::Bool=true)
     if isexpr(setting, :call) && setting.args[1] === :(!)
-        return compute_assumed_setting(setting.args[2], !val)
+        return compute_assumed_setting(override, setting.args[2], !val)
     elseif isa(setting, QuoteNode)
-        return compute_assumed_setting(setting.value, val)
-    else
-        return (setting, val)
+        return compute_assumed_setting(override, setting.value, val)
     end
+    if setting === :consistent
+        return EffectsOverride(override; consistent = val)
+    elseif setting === :effect_free
+        return EffectsOverride(override; effect_free = val)
+    elseif setting === :nothrow
+        return EffectsOverride(override; nothrow = val)
+    elseif setting === :terminates_globally
+        return EffectsOverride(override; terminates_globally = val)
+    elseif setting === :terminates_locally
+        return EffectsOverride(override; terminates_locally = val)
+    elseif setting === :notaskstate
+        return EffectsOverride(override; notaskstate = val)
+    elseif setting === :inaccessiblememonly
+        return EffectsOverride(override; inaccessiblememonly = val)
+    elseif setting === :noub
+        return EffectsOverride(override; noub = val)
+    elseif setting === :noub_if_noinbounds
+        return EffectsOverride(override; noub_if_noinbounds = val)
+    elseif setting === :foldable
+        consistent = effect_free = terminates_globally = noub = nortcall = val
+        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub, nortcall)
+    elseif setting === :removable
+        effect_free = nothrow = terminates_globally = val
+        return EffectsOverride(override; effect_free, nothrow, terminates_globally)
+    elseif setting === :total
+        consistent = effect_free = nothrow = terminates_globally = notaskstate =
+            inaccessiblememonly = noub = nortcall = val
+        return EffectsOverride(override;
+            consistent, effect_free, nothrow, terminates_globally, notaskstate,
+            inaccessiblememonly, noub, nortcall)
+    end
+    return nothing
+end
+
+function encode_effects_override(eo::EffectsOverride)
+    e = 0x0000
+    eo.consistent          && (e |= (0x0001 << 0))
+    eo.effect_free         && (e |= (0x0001 << 1))
+    eo.nothrow             && (e |= (0x0001 << 2))
+    eo.terminates_globally && (e |= (0x0001 << 3))
+    eo.terminates_locally  && (e |= (0x0001 << 4))
+    eo.notaskstate         && (e |= (0x0001 << 5))
+    eo.inaccessiblememonly && (e |= (0x0001 << 6))
+    eo.noub                && (e |= (0x0001 << 7))
+    eo.noub_if_noinbounds  && (e |= (0x0001 << 8))
+    eo.consistent_overlay  && (e |= (0x0001 << 9))
+    eo.nortcall            && (e |= (0x0001 << 10))
+    return e
+end
+
+function decode_effects_override(e::UInt16)
+    return EffectsOverride(
+        !iszero(e & (0x0001 << 0)),
+        !iszero(e & (0x0001 << 1)),
+        !iszero(e & (0x0001 << 2)),
+        !iszero(e & (0x0001 << 3)),
+        !iszero(e & (0x0001 << 4)),
+        !iszero(e & (0x0001 << 5)),
+        !iszero(e & (0x0001 << 6)),
+        !iszero(e & (0x0001 << 7)),
+        !iszero(e & (0x0001 << 8)),
+        !iszero(e & (0x0001 << 9)),
+        !iszero(e & (0x0001 << 10)))
+end
+
+function form_purity_expr(override::EffectsOverride)
+    ex = Expr(:purity)
+    for i = 1:NUM_EFFECTS_OVERRIDES
+        push!(ex.args, getfield(override, i))
+    end
+    return ex
 end
 
 """
@@ -772,7 +967,7 @@ end
 Tells the compiler to infer `f` using the declared types of `@nospecialize`d arguments.
 This can be used to limit the number of compiler-generated specializations during inference.
 
-# Example
+# Examples
 
 ```julia
 julia> f(A::AbstractArray) = g(A)
@@ -795,6 +990,9 @@ while it can not infer the concrete return type of it.
 Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`,
 indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on
 specialized code generation.
+
+!!! compat "Julia 1.10"
+    Using `Base.@nospecializeinfer` requires Julia version 1.10.
 """
 macro nospecializeinfer(ex)
     esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex)
@@ -827,29 +1025,23 @@ end
 unwrap_macrocalls(@nospecialize(x)) = x
 function unwrap_macrocalls(ex::Expr)
     inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
+    while isexpr(inner, :macrocall)
+        inner = inner.args[end]
     end
     return inner
 end
 
-function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
-    if isempty(args)
-        tag = sym
-    else
-        tag = Expr(sym, args...)::Expr
-    end
-
+function pushmeta!(ex::Expr, tag::Union{Symbol,Expr})
     inner = unwrap_macrocalls(ex)
-
     idx, exargs = findmeta(inner)
     if idx != 0
-        push!(exargs[idx].args, tag)
+        metastmt = exargs[idx]::Expr
+        push!(metastmt.args, tag)
     else
         body = inner.args[2]::Expr
         pushfirst!(body.args, Expr(:meta, tag))
     end
-    ex
+    return ex
 end
 
 popmeta!(body, sym) = _getmeta(body, sym, true)
@@ -945,26 +1137,29 @@ function findmeta_block(exargs, argsmatch=args->true)
     return 0, []
 end
 
-remove_linenums!(ex) = ex
-function remove_linenums!(ex::Expr)
-    if ex.head === :block || ex.head === :quote
-        # remove line number expressions from metadata (not argument literal or inert) position
-        filter!(ex.args) do x
-            isa(x, Expr) && x.head === :line && return false
-            isa(x, LineNumberNode) && return false
-            return true
+"""
+    Base.remove_linenums!(ex)
+
+Remove all line-number metadata from expression-like object `ex`.
+"""
+function remove_linenums!(@nospecialize ex)
+    if ex isa Expr
+        if ex.head === :block || ex.head === :quote
+            # remove line number expressions from metadata (not argument literal or inert) position
+            filter!(ex.args) do x
+                isa(x, Expr) && x.head === :line && return false
+                isa(x, LineNumberNode) && return false
+                return true
+            end
         end
-    end
-    for subex in ex.args
-        subex isa Expr && remove_linenums!(subex)
+        for subex in ex.args
+            subex isa Expr && remove_linenums!(subex)
+        end
+    elseif ex isa CodeInfo
+        ex.debuginfo = Core.DebugInfo(ex.debuginfo.def) # TODO: filter partially, but keep edges
     end
     return ex
 end
-function remove_linenums!(src::CodeInfo)
-    src.codelocs .= 0
-    length(src.linetable) > 1 && resize!(src.linetable, 1)
-    return src
-end
 
 replace_linenums!(ex, ln::LineNumberNode) = ex
 function replace_linenums!(ex::Expr, ln::LineNumberNode)
@@ -1023,7 +1218,6 @@ macro generated(f)
     if isa(f, Expr) && (f.head === :function || is_short_function_def(f))
         body = f.args[2]
         lno = body.args[1]
-        tmp = gensym("tmp")
         return Expr(:escape,
                     Expr(f.head, f.args[1],
                          Expr(:block,
@@ -1050,13 +1244,22 @@ If no `order` is specified it defaults to :sequentially_consistent.
     @atomic a.b.x += addend
     @atomic :release a.b.x = new
     @atomic :acquire_release a.b.x += addend
+    @atomic m[idx] = new
+    @atomic m[idx] += addend
+    @atomic :release m[idx] = new
+    @atomic :acquire_release m[idx] += addend
 
 Perform the store operation expressed on the right atomically and return the
 new value.
 
-With `=`, this operation translates to a `setproperty!(a.b, :x, new)` call.
-With any operator also, this operation translates to a `modifyproperty!(a.b,
-:x, +, addend)[2]` call.
+With assignment (`=`), this operation translates to a `setproperty!(a.b, :x, new)`
+or, in case of reference, to a `setindex_atomic!(m, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
+
+With any modifying operator this operation translates to a
+`modifyproperty!(a.b, :x, op, addend)[2]` or, in case of reference, to a
+`modifyindex_atomic!(m, order, op, addend, idx...)[2]` call,
+with `order` defaulting to `:sequentially_consistent`.
 
     @atomic a.b.x max arg2
     @atomic a.b.x + arg2
@@ -1064,12 +1267,20 @@ With any operator also, this operation translates to a `modifyproperty!(a.b,
     @atomic :acquire_release max(a.b.x, arg2)
     @atomic :acquire_release a.b.x + arg2
     @atomic :acquire_release a.b.x max arg2
+    @atomic m[idx] max arg2
+    @atomic m[idx] + arg2
+    @atomic max(m[idx], arg2)
+    @atomic :acquire_release max(m[idx], arg2)
+    @atomic :acquire_release m[idx] + arg2
+    @atomic :acquire_release m[idx] max arg2
 
 Perform the binary operation expressed on the right atomically. Store the
-result into the field in the first argument and return the values `(old, new)`.
-
-This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
+result into the field or the reference in the first argument, and return the values
+`(old, new)`.
 
+This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` or,
+in case of reference to a `modifyindex_atomic!(m, order, func, arg2, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1102,8 +1313,36 @@ julia> @atomic a.x max 5 # again change field x of a to the max value, with sequ
 10 => 10
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 2 # set mem[1] to value 2 with sequential consistency
+2
+
+julia> @atomic :monotonic mem[1] # fetch the first value of mem, with monotonic consistency
+2
+
+julia> @atomic mem[1] += 1 # increment the first value of mem, with sequential consistency
+3
+
+julia> @atomic mem[1] + 1 # increment the first value of mem, with sequential consistency
+3 => 4
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+
+julia> @atomic max(mem[1], 10) # change the first value of mem to the max value, with sequential consistency
+4 => 10
+
+julia> @atomic mem[1] max 5 # again change the first value of mem to the max value, with sequential consistency
+10 => 10
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomic(ex)
     if !isa(ex, Symbol) && !is_expr(ex, :(::))
@@ -1130,11 +1369,17 @@ function make_atomic(order, ex)
             return :(getproperty($l, $r, $order))
         elseif isexpr(ex, :call, 3)
             return make_atomic(order, ex.args[2], ex.args[1], ex.args[3])
+        elseif isexpr(ex, :ref)
+            x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+            return :(getindex_atomic($x, $order, $(idcs...)))
         elseif ex.head === :(=)
             l, r = ex.args[1], esc(ex.args[2])
             if is_expr(l, :., 2)
                 ll, lr = esc(l.args[1]), esc(l.args[2])
                 return :(setproperty!($ll, $lr, $r, $order))
+            elseif is_expr(l, :ref)
+                x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+                return :(setindex_atomic!($x, $order, $r, $(idcs...)))
             end
         end
         if length(ex.args) == 2
@@ -1157,19 +1402,29 @@ function make_atomic(order, ex)
 end
 function make_atomic(order, a1, op, a2)
     @nospecialize
-    is_expr(a1, :., 2) || error("@atomic modify expression missing field access")
-    a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
-    return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    if is_expr(a1, :., 2)
+        a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
+        return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+    elseif is_expr(a1, :ref)
+        x, idcs, op, a2 = esc(a1.args[1]), map(esc, a1.args[2:end]), esc(op), esc(a2)
+        return :(modifyindex_atomic!($x, $order, $op, $a2, $(idcs...)))
+    end
+    error("@atomic modify expression missing field access or indexing")
 end
 
 
 """
     @atomicswap a.b.x = new
     @atomicswap :sequentially_consistent a.b.x = new
+    @atomicswap m[idx] = new
+    @atomicswap :sequentially_consistent m[idx] = new
 
-Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
+Stores `new` into `a.b.x` (`m[idx]` in case of reference) and returns the old
+value of `a.b.x` (the old value stored at `m[idx]`, respectively).
 
-This operation translates to a `swapproperty!(a.b, :x, new)` call.
+This operation translates to a `swapproperty!(a.b, :x, new)` or,
+in case of reference, `swapindex_atomic!(mem, order, new, idx)` call,
+with `order` defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1187,8 +1442,23 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicswap mem[1] = 4 # replace the first value of `mem` with 4, with sequential consistency
+1
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+4
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicswap(order, ex)
     order isa QuoteNode || (order = esc(order))
@@ -1201,9 +1471,14 @@ function make_atomicswap(order, ex)
     @nospecialize
     is_expr(ex, :(=), 2) || error("@atomicswap expression missing assignment")
     l, val = ex.args[1], esc(ex.args[2])
-    is_expr(l, :., 2) || error("@atomicswap expression missing field access")
-    ll, lr = esc(l.args[1]), esc(l.args[2])
-    return :(swapproperty!($ll, $lr, $val, $order))
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(swapproperty!($ll, $lr, $val, $order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(swapindex_atomic!($x, $order, $val, $(idcs...)))
+    end
+    error("@atomicswap expression missing field access or indexing")
 end
 
 
@@ -1211,12 +1486,18 @@ end
     @atomicreplace a.b.x expected => desired
     @atomicreplace :sequentially_consistent a.b.x expected => desired
     @atomicreplace :sequentially_consistent :monotonic a.b.x expected => desired
+    @atomicreplace m[idx] expected => desired
+    @atomicreplace :sequentially_consistent m[idx] expected => desired
+    @atomicreplace :sequentially_consistent :monotonic m[idx] expected => desired
 
 Perform the conditional replacement expressed by the pair atomically, returning
 the values `(old, success::Bool)`. Where `success` indicates whether the
 replacement was completed.
 
-This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
+This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` or,
+in case of reference, to a
+`replaceindex_atomic!(mem, success_order, fail_order, expected, desired, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
@@ -1233,7 +1514,7 @@ julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 2
 
-julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+julia> @atomicreplace a.x 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
 (old = 2, success = false)
 
 julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
@@ -1245,8 +1526,34 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
 ```
 
+```jldoctest
+julia> mem = AtomicMemory{Int}(undef, 2);
+
+julia> @atomic mem[1] = 1;
+
+julia> @atomicreplace mem[1] 1 => 2 # replace the first value of mem with 2 if it was 1, with sequential consistency
+(old = 1, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+2
+
+julia> @atomicreplace mem[1] 1 => 3 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 2, success = false)
+
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
+
+julia> @atomicreplace mem[1] xchg
+(old = 2, success = true)
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+0
+```
+
 !!! compat "Julia 1.7"
-    This functionality requires at least Julia 1.7.
+    Atomic fields functionality requires at least Julia 1.7.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
 """
 macro atomicreplace(success_order, fail_order, ex, old_new)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
@@ -1262,13 +1569,131 @@ macro atomicreplace(ex, old_new)
 end
 function make_atomicreplace(success_order, fail_order, ex, old_new)
     @nospecialize
-    is_expr(ex, :., 2) || error("@atomicreplace expression missing field access")
-    ll, lr = esc(ex.args[1]), esc(ex.args[2])
-    if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
-        exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
-        return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
-    else
-        old_new = esc(old_new)
-        return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+    if is_expr(ex, :., 2)
+        ll, lr = esc(ex.args[1]), esc(ex.args[2])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
+        else
+            old_new = esc(old_new)
+            return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+        end
+    elseif is_expr(ex, :ref)
+        x, idcs = esc(ex.args[1]), map(esc, ex.args[2:end])
+        if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+            exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $exp, $rep, $(idcs...)))
+        else
+            old_new = esc(old_new)
+            return :(replaceindex_atomic!($x, $success_order, $fail_order, $old_new::Pair..., $(idcs...)))
+        end
+    end
+    error("@atomicreplace expression missing field access or indexing")
+end
+
+"""
+    @atomiconce a.b.x = value
+    @atomiconce :sequentially_consistent a.b.x = value
+    @atomiconce :sequentially_consistent :monotonic a.b.x = value
+    @atomiconce m[idx] = value
+    @atomiconce :sequentially_consistent m[idx] = value
+    @atomiconce :sequentially_consistent :monotonic m[idx] = value
+
+Perform the conditional assignment of value atomically if it was previously
+unset. Returned value `success::Bool` indicates whether the assignment was completed.
+
+This operation translates to a `setpropertyonce!(a.b, :x, value)` or,
+in case of reference, to a `setindexonce_atomic!(m, success_order, fail_order, value, idx)` call,
+with both orders defaulting to `:sequentially_consistent`.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+# Examples
+```jldoctest
+julia> mutable struct AtomicOnce
+           @atomic x
+           AtomicOnce() = new()
+       end
+
+julia> a = AtomicOnce()
+AtomicOnce(#undef)
+
+julia> @atomiconce a.x = 1 # set field x of a to 1, if unset, with sequential consistency
+true
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+1
+
+julia> @atomiconce :monotonic a.x = 2 # set field x of a to 1, if unset, with monotonic consistence
+false
+```
+
+```jldoctest
+julia> mem = AtomicMemory{Vector{Int}}(undef, 1);
+
+julia> isassigned(mem, 1)
+false
+
+julia> @atomiconce mem[1] = [1] # set the first value of mem to [1], if unset, with sequential consistency
+true
+
+julia> isassigned(mem, 1)
+true
+
+julia> @atomic mem[1] # fetch the first value of mem, with sequential consistency
+1-element Vector{Int64}:
+ 1
+
+julia> @atomiconce :monotonic mem[1] = [2] # set the first value of mem to [2], if unset, with monotonic
+false
+
+julia> @atomic mem[1]
+1-element Vector{Int64}:
+ 1
+```
+
+!!! compat "Julia 1.11"
+    Atomic fields functionality requires at least Julia 1.11.
+
+!!! compat "Julia 1.12"
+    Atomic reference functionality requires at least Julia 1.12.
+"""
+macro atomiconce(success_order, fail_order, ex)
+    fail_order isa QuoteNode || (fail_order = esc(fail_order))
+    success_order isa QuoteNode || (success_order = esc(success_order))
+    return make_atomiconce(success_order, fail_order, ex)
+end
+macro atomiconce(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomiconce(order, order, ex)
+end
+macro atomiconce(ex)
+    return make_atomiconce(QuoteNode(:sequentially_consistent), QuoteNode(:sequentially_consistent), ex)
+end
+function make_atomiconce(success_order, fail_order, ex)
+    @nospecialize
+    is_expr(ex, :(=), 2) || error("@atomiconce expression missing assignment")
+    l, val = ex.args[1], esc(ex.args[2])
+    if is_expr(l, :., 2)
+        ll, lr = esc(l.args[1]), esc(l.args[2])
+        return :(setpropertyonce!($ll, $lr, $val, $success_order, $fail_order))
+    elseif is_expr(l, :ref)
+        x, idcs = esc(l.args[1]), map(esc, l.args[2:end])
+        return :(setindexonce_atomic!($x, $success_order, $fail_order, $val, $(idcs...)))
     end
+    error("@atomiconce expression missing field access or indexing")
+end
+
+# Meta expression head, these generally can't be deleted even when they are
+# in a dead branch but can be ignored when analyzing uses/liveness.
+is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
+is_meta_expr(@nospecialize x) = isa(x, Expr) && is_meta_expr_head(x.head)
+
+function is_self_quoting(@nospecialize(x))
+    return isa(x,Number) || isa(x,AbstractString) || isa(x,Tuple) || isa(x,Type) ||
+        isa(x,Char) || x === nothing || isa(x,Function)
+end
+
+function quoted(@nospecialize(x))
+    return is_self_quoting(x) ? x : QuoteNode(x)
 end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index 44440ebad2050..f2f60519b99ac 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -6,7 +6,7 @@
 # strict IEEE semantics.
 
 # This allows the following transformations. For more information see
-# http://llvm.org/docs/LangRef.html#fast-math-flags:
+# https://llvm.org/docs/LangRef.html#fast-math-flags:
 # nnan: No NaNs - Allow optimizations to assume the arguments and
 #       result are not NaN. Such optimizations are required to retain
 #       defined behavior over NaNs, but the value of the result is
@@ -28,8 +28,9 @@ module FastMath
 export @fastmath
 
 import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
-    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
+    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, min_float_fast, max_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
+import Base: afoldl
 
 const fast_op =
     Dict(# basic arithmetic
@@ -101,9 +102,12 @@ const rewrite_op =
 function make_fastmath(expr::Expr)
     if expr.head === :quote
         return expr
-    elseif expr.head === :call && expr.args[1] === :^ && expr.args[3] isa Integer
-        # mimic Julia's literal_pow lowering of literal integer powers
-        return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(expr.args[2]), Val{expr.args[3]}())
+    elseif expr.head === :call && expr.args[1] === :^
+        ea = expr.args
+        if length(ea) >= 3 && isa(ea[3], Int)
+            # mimic Julia's literal_pow lowering of literal integer powers
+            return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(ea[2]), Val(ea[3]))
+        end
     end
     op = get(rewrite_op, expr.head, :nothing)
     if op !== :nothing
@@ -136,7 +140,7 @@ may violate strict IEEE semantics. This allows the fastest possible operation,
 but results are undefined -- be careful when doing this, as it may change numerical
 results.
 
-This sets the [LLVM Fast-Math flags](http://llvm.org/docs/LangRef.html#fast-math-flags),
+This sets the [LLVM Fast-Math flags](https://llvm.org/docs/LangRef.html#fast-math-flags),
 and corresponds to the `-ffast-math` option in clang. See [the notes on performance
 annotations](@ref man-performance-annotations) for more details.
 
@@ -164,11 +168,9 @@ add_fast(x::T, y::T) where {T<:FloatTypes} = add_float_fast(x, y)
 sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
 mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
 div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
-
-add_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    add_fast(add_fast(x, y), zs...)
-mul_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
-    mul_fast(mul_fast(x, y), zs...)
+max_fast(x::T, y::T) where {T<:FloatTypes} = max_float_fast(x, y)
+min_fast(x::T, y::T) where {T<:FloatTypes} = min_float_fast(x, y)
+minmax_fast(x::T, y::T) where {T<:FloatTypes} = (min_fast(x, y), max_fast(x, y))
 
 @fastmath begin
     cmp_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(x==y, 0, ifelse(x<y, -1, +1))
@@ -237,14 +239,6 @@ ComplexTypes = Union{ComplexF32, ComplexF64}
 
     ne_fast(x::T, y::T) where {T<:ComplexTypes} = !(x==y)
 
-    # Note: we use the same comparison for min, max, and minmax, so
-    # that the compiler can convert between them
-    max_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, y, x)
-    min_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, x, y)
-    minmax_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, (x,y), (y,x))
-
-    max_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = max_fast(max_fast(x, y), z...)
-    min_fast(x::T, y::T, z::T...) where {T<:FloatTypes} = min_fast(min_fast(x, y), z...)
 end
 
 # fall-back implementations and type promotion
@@ -257,7 +251,7 @@ for op in (:abs, :abs2, :conj, :inv, :sign)
     end
 end
 
-for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
+for op in (:-, :/, :(==), :!=, :<, :<=, :cmp, :rem, :minmax)
     op_fast = fast_op[op]
     @eval begin
         # fall-back implementation for non-numeric types
@@ -270,6 +264,31 @@ for op in (:+, :-, :*, :/, :(==), :!=, :<, :<=, :cmp, :rem, :min, :max, :minmax)
     end
 end
 
+for op in (:+, :*, :min, :max)
+    op_fast = fast_op[op]
+    @eval begin
+        $op_fast(x) = $op(x)
+        # fall-back implementation for non-numeric types
+        $op_fast(x, y) = $op(x, y)
+        # type promotion
+        $op_fast(x::Number, y::Number) =
+            $op_fast(promote(x,y)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(x::T,y::T) where {T<:Number} = $op(x,y)
+        # note: these definitions must not cause a dispatch loop when +(a,b) is
+        # not defined, and must only try to call 2-argument definitions, so
+        # that defining +(a,b) is sufficient for full functionality.
+        ($op_fast)(a, b, c, xs...) = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+        # a further concern is that it's easy for a type like (Int,Int...)
+        # to match many definitions, so we need to keep the number of
+        # definitions down to avoid losing type information.
+        # type promotion
+        $op_fast(a::Number, b::Number, c::Number, xs::Number...) =
+            $op_fast(promote(a,b,c,xs...)...)
+        # fall-back implementation that applies after promotion
+        $op_fast(a::T, b::T, c::T, xs::T...) where {T<:Number} = (@inline; afoldl($op_fast, ($op_fast)(($op_fast)(a,b),c), xs...))
+    end
+end
 
 # Math functions
 exp2_fast(x::Union{Float32,Float64})  = Base.Math.exp2_fast(x)
@@ -278,8 +297,12 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32.i32", llvmcall, Float32, (Float32, Int32), x, y)
-pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
+function pow_fast(x::Float64, y::Integer)
+    z = y % Int32
+    z == y ? pow_fast(x, z) : x^y
+end
+pow_fast(x::Float32, y::Integer) = x^y
+pow_fast(x::Float64, y::Int32) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
@@ -309,7 +332,7 @@ end
         Complex{T}(c, s)
     end
 
-    # See <http://en.cppreference.com/w/cpp/numeric/complex>
+    # See <https://en.cppreference.com/w/cpp/numeric/complex>
     pow_fast(x::T, y::T) where {T<:ComplexTypes} = exp(y*log(x))
     pow_fast(x::T, y::Complex{T}) where {T<:FloatTypes} = exp(y*log(x))
     pow_fast(x::Complex{T}, y::T) where {T<:FloatTypes} = exp(y*log(x))
@@ -364,6 +387,10 @@ for f in (:^, :atan, :hypot, :log)
         # fall-back implementation that applies after promotion
         $f_fast(x::T, y::T) where {T<:Number} = $f(x, y)
     end
+    # Issue 53886 - avoid promotion of Int128 etc to be consistent with non-fastmath
+    if f === :^
+        @eval $f_fast(x::Number, y::Integer) = $f(x, y)
+    end
 end
 
 # Reductions
diff --git a/base/file.jl b/base/file.jl
index 866e82b6e39c2..66e8114aba4ba 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -128,7 +128,7 @@ julia> pwd()
 "/home/JuliaUser"
 
 julia> cd(readdir, "/home/JuliaUser/Projects/julia")
-34-element Array{String,1}:
+34-element Vector{String}:
  ".circleci"
  ".freebsdci.sh"
  ".git"
@@ -211,17 +211,17 @@ julia> mkpath("my/test/dir") # creates three directories
 "my/test/dir"
 
 julia> readdir()
-1-element Array{String,1}:
+1-element Vector{String}:
  "my"
 
 julia> cd("my")
 
 julia> readdir()
-1-element Array{String,1}:
+1-element Vector{String}:
  "test"
 
 julia> readdir("test")
-1-element Array{String,1}:
+1-element Vector{String}:
  "dir"
 
 julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directories
@@ -230,15 +230,19 @@ julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directo
 julia> isdir("intermediate_dir/actually_a_directory.txt")
 true
 
+julia> mkpath("my/test/dir/") # returns the original `path`
+"my/test/dir/"
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
-    isdirpath(path) && (path = dirname(path))
-    dir = dirname(path)
-    (path == dir || isdir(path)) && return path
-    mkpath(dir, mode = checkmode(mode))
+    parent = dirname(path)
+    # stop recursion for `""`, `"/"`, or existing dir
+    (path == parent || isdir(path)) && return path
+    mkpath(parent, mode = checkmode(mode))
     try
-        mkdir(path, mode = mode)
+        # The `isdir` check could be omitted, then `mkdir` will throw an error in cases like `x/`.
+        # Although the error will not be rethrown, we avoid it in advance for performance reasons.
+        isdir(path) || mkdir(path, mode = mode)
     catch err
         # If there is a problem with making the directory, but the directory
         # does in fact exist, then ignore the error. Else re-throw it.
@@ -246,9 +250,13 @@ function mkpath(path::AbstractString; mode::Integer = 0o777)
             rethrow()
         end
     end
-    path
+    return path
 end
 
+# Files that were requested to be deleted but can't be by the current process
+# i.e. loaded DLLs on Windows
+delayed_delete_dir() = joinpath(tempdir(), "julia_delayed_deletes")
+
 """
     rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
 
@@ -270,20 +278,26 @@ Stacktrace:
 [...]
 ```
 """
-function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
+function rm(path::AbstractString; force::Bool=false, recursive::Bool=false, allow_delayed_delete::Bool=true)
+    # allow_delayed_delete is used by Pkg.gc() but is otherwise not part of the public API
     if islink(path) || !isdir(path)
         try
-            @static if Sys.iswindows()
-                # is writable on windows actually means "is deletable"
-                st = lstat(path)
-                if ispath(st) && (filemode(st) & 0o222) == 0
-                    chmod(path, 0o777)
-                end
-            end
             unlink(path)
         catch err
-            if force && isa(err, IOError) && err.code==Base.UV_ENOENT
-                return
+            if isa(err, IOError)
+                force && err.code==Base.UV_ENOENT && return
+                @static if Sys.iswindows()
+                    if allow_delayed_delete && err.code==Base.UV_EACCES && endswith(path, ".dll")
+                        # Loaded DLLs cannot be deleted on Windows, even with posix delete mode
+                        # but they can be moved. So move out to allow the dir to be deleted.
+                        # Pkg.gc() cleans up this dir when possible
+                        dir = mkpath(delayed_delete_dir())
+                        temp_path = tempname(dir, cleanup = false, suffix = string("_", basename(path)))
+                        @debug "Could not delete DLL most likely because it is loaded, moving to tempdir" path temp_path
+                        mv(path, temp_path)
+                        return
+                    end
+                end
             end
             rethrow()
         end
@@ -291,19 +305,23 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
         if recursive
             try
                 for p in readdir(path)
-                    rm(joinpath(path, p), force=force, recursive=true)
+                    try
+                        rm(joinpath(path, p), force=force, recursive=true)
+                    catch err
+                        (isa(err, IOError) && err.code==Base.UV_EACCES) || rethrow()
+                    end
                 end
             catch err
-                if !(isa(err, IOError) && err.code==Base.UV_EACCES)
-                    rethrow(err)
-                end
+                (isa(err, IOError) && err.code==Base.UV_EACCES) || rethrow()
             end
         end
         req = Libc.malloc(_sizeof_uv_fs)
         try
             ret = ccall(:uv_fs_rmdir, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), C_NULL, req, path, C_NULL)
             uv_fs_req_cleanup(req)
-            ret < 0 && uv_error("rm($(repr(path)))", ret)
+            if ret < 0 && !(force && ret == Base.UV_ENOENT)
+                uv_error("rm($(repr(path)))", ret)
+            end
             nothing
         finally
             Libc.free(req)
@@ -367,7 +385,7 @@ of the file or directory `src` refers to.
 Return `dst`.
 
 !!! note
-    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    The `cp` function is different from the `cp` Unix command. The `cp` function always operates on
     the assumption that `dst` is a file, while the command does different things depending
     on whether `dst` is a directory or a file.
     Using `force=true` when `dst` is a directory will result in loss of all the contents present
@@ -420,13 +438,73 @@ julia> mv("hello.txt", "goodbye.txt", force=true)
 julia> rm("goodbye.txt");
 
 ```
+
+!!! note
+    The `mv` function is different from the `mv` Unix command. The `mv` function by
+    default will error if `dst` exists, while the command will delete
+    an existing `dst` file by default.
+    Also the `mv` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function mv(src::AbstractString, dst::AbstractString; force::Bool=false)
-    checkfor_mv_cp_cptree(src, dst, "moving"; force=force)
-    rename(src, dst)
+    if force
+        _mv_replace(src, dst)
+    else
+        _mv_noreplace(src, dst)
+    end
+end
+
+function _mv_replace(src::AbstractString, dst::AbstractString)
+    # This check is copied from checkfor_mv_cp_cptree
+    if ispath(dst) && Base.samefile(src, dst)
+        abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
+        abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
+        throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
+                                   "This is not supported.\n  ",
+                                   "`src` refers to: $(abs_src)\n  ",
+                                   "`dst` refers to: $(abs_dst)\n")))
+    end
+    # First try to do a regular rename, because this might avoid a situation
+    # where dst is deleted or truncated.
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on rename error try to delete dst if it exists and isn't the same as src
+        checkfor_mv_cp_cptree(src, dst, "moving"; force=true)
+        try
+            rename(src, dst)
+        catch err
+            err isa IOError || rethrow()
+            # on second error, default to force cp && rm
+            cp(src, dst; force=true, follow_symlinks=false)
+            rm(src; recursive=true)
+        end
+    end
+    dst
+end
+
+function _mv_noreplace(src::AbstractString, dst::AbstractString)
+    # Error if dst exists.
+    # This check currently has TOCTTOU issues.
+    checkfor_mv_cp_cptree(src, dst, "moving"; force=false)
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on error, default to cp && rm
+        cp(src, dst; force=false, follow_symlinks=false)
+        rm(src; recursive=true)
+    end
     dst
 end
 
+
 """
     touch(path::AbstractString)
     touch(fd::File)
@@ -478,13 +556,26 @@ function tempdir()
         rc = ccall(:uv_os_tmpdir, Cint, (Ptr{UInt8}, Ptr{Csize_t}), buf, sz)
         if rc == 0
             resize!(buf, sz[])
-            return String(buf)
+            break
         elseif rc == Base.UV_ENOBUFS
             resize!(buf, sz[] - 1)  # space for null-terminator implied by StringVector
         else
             uv_error("tempdir()", rc)
         end
     end
+    tempdir = String(buf)
+    try
+        s = stat(tempdir)
+        if !ispath(s)
+            @warn "tempdir path does not exist" tempdir
+        elseif !isdir(s)
+            @warn "tempdir path is not a directory" tempdir
+        end
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @warn "accessing tempdir path failed" _exception=ex
+    end
+    return tempdir
 end
 
 """
@@ -502,13 +593,19 @@ function prepare_for_deletion(path::AbstractString)
         return
     end
 
-    try chmod(path, filemode(path) | 0o333)
-    catch; end
+    try
+        chmod(path, filemode(path) | 0o333)
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+    end
     for (root, dirs, files) in walkdir(path; onerror=x->())
         for dir in dirs
             dpath = joinpath(root, dir)
-            try chmod(dpath, filemode(dpath) | 0o333)
-            catch; end
+            try
+                chmod(dpath, filemode(dpath) | 0o333)
+            catch ex
+                ex isa IOError || ex isa SystemError || rethrow()
+            end
         end
     end
 end
@@ -519,37 +616,70 @@ const TEMP_CLEANUP = Dict{String,Bool}()
 const TEMP_CLEANUP_LOCK = ReentrantLock()
 
 function temp_cleanup_later(path::AbstractString; asap::Bool=false)
-    lock(TEMP_CLEANUP_LOCK)
+    @lock TEMP_CLEANUP_LOCK begin
     # each path should only be inserted here once, but if there
     # is a collision, let !asap win over asap: if any user might
     # still be using the path, don't delete it until process exit
     TEMP_CLEANUP[path] = get(TEMP_CLEANUP, path, true) & asap
     if length(TEMP_CLEANUP) > TEMP_CLEANUP_MAX[]
-        temp_cleanup_purge()
+        temp_cleanup_purge_prelocked(false)
         TEMP_CLEANUP_MAX[] = max(TEMP_CLEANUP_MIN[], 2*length(TEMP_CLEANUP))
     end
-    unlock(TEMP_CLEANUP_LOCK)
-    return nothing
+    end
+    nothing
+end
+
+function temp_cleanup_forget(path::AbstractString)
+    @lock TEMP_CLEANUP_LOCK delete!(TEMP_CLEANUP, path)
+    nothing
 end
 
-function temp_cleanup_purge(; force::Bool=false)
-    need_gc = Sys.iswindows()
-    for (path, asap) in TEMP_CLEANUP
+function temp_cleanup_purge_prelocked(force::Bool)
+    filter!(TEMP_CLEANUP) do (path, asap)
         try
-            if (force || asap) && ispath(path)
-                need_gc && GC.gc(true)
-                need_gc = false
+            ispath(path) || return false
+            if force || asap
                 prepare_for_deletion(path)
                 rm(path, recursive=true, force=true)
             end
-            !ispath(path) && delete!(TEMP_CLEANUP, path)
+            return ispath(path)
         catch ex
             @warn """
                 Failed to clean up temporary path $(repr(path))
                 $ex
                 """ _group=:file
+            ex isa InterruptException && rethrow()
+            return true
+        end
+    end
+    nothing
+end
+
+function temp_cleanup_purge_all()
+    may_need_gc = false
+    @lock TEMP_CLEANUP_LOCK filter!(TEMP_CLEANUP) do (path, asap)
+        try
+            ispath(path) || return false
+            may_need_gc = true
+            return true
+        catch ex
+            ex isa InterruptException && rethrow()
+            return true
         end
     end
+    if may_need_gc
+        # this is only usually required on Sys.iswindows(), but may as well do it everywhere
+        GC.gc(true)
+    end
+    @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(true)
+    nothing
+end
+
+# deprecated internal function used by some packages
+temp_cleanup_purge(; force=false) = force ? temp_cleanup_purge_all() : @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(false)
+
+function __postinit__()
+    Base.atexit(temp_cleanup_purge_all)
 end
 
 const temp_prefix = "jl_"
@@ -566,13 +696,13 @@ end
 
 
 # Obtain a temporary filename.
-function tempname(parent::AbstractString=tempdir(); max_tries::Int = 100, cleanup::Bool=true)
+function tempname(parent::AbstractString=tempdir(); max_tries::Int = 100, cleanup::Bool=true, suffix::AbstractString="")
     isdir(parent) || throw(ArgumentError("$(repr(parent)) is not a directory"))
 
     prefix = joinpath(parent, temp_prefix)
     filename = nothing
     for i in 1:max_tries
-        filename = string(prefix, _rand_filename())
+        filename = string(prefix, _rand_filename(), suffix)
         if ispath(filename)
             filename = nothing
         else
@@ -628,7 +758,7 @@ end # os-test
 
 
 """
-    tempname(parent=tempdir(); cleanup=true) -> String
+    tempname(parent=tempdir(); cleanup=true, suffix="") -> String
 
 Generate a temporary file path. This function only returns a path; no file is
 created. The path is likely to be unique, but this cannot be guaranteed due to
@@ -639,18 +769,22 @@ existing at the time of the call to `tempname`.
 When called with no arguments, the temporary name will be an absolute path to a
 temporary name in the system temporary directory as given by `tempdir()`. If a
 `parent` directory argument is given, the temporary path will be in that
-directory instead.
+directory instead. If a suffix is given the tempname will end with that suffix
+and be tested for uniqueness with that suffix.
 
 The `cleanup` option controls whether the process attempts to delete the
 returned path automatically when the process exits. Note that the `tempname`
 function does not create any file or directory at the returned location, so
 there is nothing to cleanup unless you create a file or directory there. If
-you do and `clean` is `true` it will be deleted upon process termination.
+you do and `cleanup` is `true` it will be deleted upon process termination.
 
 !!! compat "Julia 1.4"
     The `parent` and `cleanup` arguments were added in 1.4. Prior to Julia 1.4
     the path `tempname` would never be cleaned up at process termination.
 
+!!! compat "Julia 1.12"
+    The `suffix` keyword argument was added in Julia 1.12.
+
 !!! warning
 
     This can lead to security holes if another process obtains the same
@@ -731,10 +865,11 @@ temporary file upon completion.
 See also: [`mktempdir`](@ref).
 """
 function mktemp(fn::Function, parent::AbstractString=tempdir())
-    (tmp_path, tmp_io) = mktemp(parent, cleanup=false)
+    (tmp_path, tmp_io) = mktemp(parent)
     try
         fn(tmp_path, tmp_io)
     finally
+        temp_cleanup_forget(tmp_path)
         try
             close(tmp_io)
             ispath(tmp_path) && rm(tmp_path)
@@ -750,7 +885,7 @@ end
     mktempdir(f::Function, parent=tempdir(); prefix=$(repr(temp_prefix)))
 
 Apply the function `f` to the result of [`mktempdir(parent; prefix)`](@ref) and remove the
-temporary directory all of its contents upon completion.
+temporary directory and all of its contents upon completion.
 
 See also: [`mktemp`](@ref), [`mkdir`](@ref).
 
@@ -759,10 +894,11 @@ See also: [`mktemp`](@ref), [`mkdir`](@ref).
 """
 function mktempdir(fn::Function, parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix)
-    tmpdir = mktempdir(parent; prefix=prefix, cleanup=false)
+    tmpdir = mktempdir(parent; prefix=prefix)
     try
         fn(tmpdir)
     finally
+        temp_cleanup_forget(tmpdir)
         try
             if ispath(tmpdir)
                 prepare_for_deletion(tmpdir)
@@ -807,7 +943,7 @@ See also: [`walkdir`](@ref).
 julia> cd("/home/JuliaUser/dev/julia")
 
 julia> readdir()
-30-element Array{String,1}:
+30-element Vector{String}:
  ".appveyor.yml"
  ".git"
  ".gitattributes"
@@ -817,7 +953,7 @@ julia> readdir()
  "usr-staging"
 
 julia> readdir(join=true)
-30-element Array{String,1}:
+30-element Vector{String}:
  "/home/JuliaUser/dev/julia/.appveyor.yml"
  "/home/JuliaUser/dev/julia/.git"
  "/home/JuliaUser/dev/julia/.gitattributes"
@@ -827,7 +963,7 @@ julia> readdir(join=true)
  "/home/JuliaUser/dev/julia/usr-staging"
 
 julia> readdir("base")
-145-element Array{String,1}:
+145-element Vector{String}:
  ".gitignore"
  "Base.jl"
  "Enums.jl"
@@ -837,7 +973,7 @@ julia> readdir("base")
  "weakkeydict.jl"
 
 julia> readdir("base", join=true)
-145-element Array{String,1}:
+145-element Vector{String}:
  "base/.gitignore"
  "base/Base.jl"
  "base/Enums.jl"
@@ -847,7 +983,7 @@ julia> readdir("base", join=true)
  "base/weakkeydict.jl"
 
 julia> readdir(abspath("base"), join=true)
-145-element Array{String,1}:
+145-element Vector{String}:
  "/home/JuliaUser/dev/julia/base/.gitignore"
  "/home/JuliaUser/dev/julia/base/Base.jl"
  "/home/JuliaUser/dev/julia/base/Enums.jl"
@@ -857,7 +993,79 @@ julia> readdir(abspath("base"), join=true)
  "/home/JuliaUser/dev/julia/base/weakkeydict.jl"
 ```
 """
-function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
+readdir(; join::Bool=false, kwargs...) = readdir(join ? pwd() : "."; join, kwargs...)::Vector{String}
+readdir(dir::AbstractString; kwargs...) = _readdir(dir; return_objects=false, kwargs...)::Vector{String}
+
+# this might be better as an Enum but they're not available here
+# UV_DIRENT_T
+const UV_DIRENT_UNKNOWN = Cint(0)
+const UV_DIRENT_FILE = Cint(1)
+const UV_DIRENT_DIR = Cint(2)
+const UV_DIRENT_LINK = Cint(3)
+const UV_DIRENT_FIFO = Cint(4)
+const UV_DIRENT_SOCKET = Cint(5)
+const UV_DIRENT_CHAR = Cint(6)
+const UV_DIRENT_BLOCK = Cint(7)
+
+"""
+    DirEntry
+
+A type representing a filesystem entry that contains the name of the entry, the directory, and
+the raw type of the entry. The full path of the entry can be obtained lazily by accessing the
+`path` field. The type of the entry can be checked for by calling [`isfile`](@ref), [`isdir`](@ref),
+[`islink`](@ref), [`isfifo`](@ref), [`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref)
+"""
+struct DirEntry
+    dir::String
+    name::String
+    rawtype::Cint
+end
+function Base.getproperty(obj::DirEntry, p::Symbol)
+    if p === :path
+        return joinpath(obj.dir, obj.name)
+    else
+        return getfield(obj, p)
+    end
+end
+Base.propertynames(::DirEntry) = (:dir, :name, :path, :rawtype)
+Base.isless(a::DirEntry, b::DirEntry) = a.dir == b.dir ? isless(a.name, b.name) : isless(a.dir, b.dir)
+Base.hash(o::DirEntry, h::UInt) = hash(o.dir, hash(o.name, hash(o.rawtype, h)))
+Base.:(==)(a::DirEntry, b::DirEntry) = a.name == b.name && a.dir == b.dir && a.rawtype == b.rawtype
+joinpath(obj::DirEntry, args...) = joinpath(obj.path, args...)
+isunknown(obj::DirEntry) =  obj.rawtype == UV_DIRENT_UNKNOWN
+islink(obj::DirEntry) =     isunknown(obj) ? islink(obj.path) : obj.rawtype == UV_DIRENT_LINK
+isfile(obj::DirEntry) =     (isunknown(obj) || islink(obj)) ? isfile(obj.path)      : obj.rawtype == UV_DIRENT_FILE
+isdir(obj::DirEntry) =      (isunknown(obj) || islink(obj)) ? isdir(obj.path)       : obj.rawtype == UV_DIRENT_DIR
+isfifo(obj::DirEntry) =     (isunknown(obj) || islink(obj)) ? isfifo(obj.path)      : obj.rawtype == UV_DIRENT_FIFO
+issocket(obj::DirEntry) =   (isunknown(obj) || islink(obj)) ? issocket(obj.path)    : obj.rawtype == UV_DIRENT_SOCKET
+ischardev(obj::DirEntry) =  (isunknown(obj) || islink(obj)) ? ischardev(obj.path)   : obj.rawtype == UV_DIRENT_CHAR
+isblockdev(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isblockdev(obj.path)  : obj.rawtype == UV_DIRENT_BLOCK
+realpath(obj::DirEntry) = realpath(obj.path)
+
+"""
+    _readdirx(dir::AbstractString=pwd(); sort::Bool = true) -> Vector{DirEntry}
+
+Return a vector of [`DirEntry`](@ref) objects representing the contents of the directory `dir`,
+or the current working directory if not given. If `sort` is true, the returned vector is
+sorted by name.
+
+Unlike [`readdir`](@ref), `_readdirx` returns [`DirEntry`](@ref) objects, which contain the name of the
+file, the directory it is in, and the type of the file which is determined during the
+directory scan. This means that calls to [`isfile`](@ref), [`isdir`](@ref), [`islink`](@ref), [`isfifo`](@ref),
+[`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref) can be made on the
+returned objects without further stat calls. However, for some filesystems, the type of the file
+cannot be determined without a stat call. In these cases the `rawtype` field of the [`DirEntry`](@ref))
+object will be 0 (`UV_DIRENT_UNKNOWN`) and [`isfile`](@ref) etc. will fall back to a `stat` call.
+
+```julia
+for obj in _readdirx()
+    isfile(obj) && println("\$(obj.name) is a file with path \$(obj.path)")
+end
+```
+"""
+_readdirx(dir::AbstractString=pwd(); sort::Bool=true) = _readdir(dir; return_objects=true, sort)::Vector{DirEntry}
+
+function _readdir(dir::AbstractString; return_objects::Bool=false, join::Bool=false, sort::Bool=true)
     # Allocate space for uv_fs_t struct
     req = Libc.malloc(_sizeof_uv_fs)
     try
@@ -867,11 +1075,16 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
         err < 0 && uv_error("readdir($(repr(dir)))", err)
 
         # iterate the listing into entries
-        entries = String[]
+        entries = return_objects ? DirEntry[] : String[]
         ent = Ref{uv_dirent_t}()
         while Base.UV_EOF != ccall(:uv_fs_scandir_next, Cint, (Ptr{Cvoid}, Ptr{uv_dirent_t}), req, ent)
             name = unsafe_string(ent[].name)
-            push!(entries, join ? joinpath(dir, name) : name)
+            if return_objects
+                rawtype = ent[].typ
+                push!(entries, DirEntry(dir, name, rawtype))
+            else
+                push!(entries, join ? joinpath(dir, name) : name)
+            end
         end
 
         # Clean up the request string
@@ -885,31 +1098,38 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
         Libc.free(req)
     end
 end
-readdir(; join::Bool=false, sort::Bool=true) =
-    readdir(join ? pwd() : ".", join=join, sort=sort)
 
 """
-    walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
+    walkdir(dir = pwd(); topdown=true, follow_symlinks=false, onerror=throw)
 
 Return an iterator that walks the directory tree of a directory.
-The iterator returns a tuple containing `(rootpath, dirs, files)`.
+
+The iterator returns a tuple containing `(path, dirs, files)`.
+Each iteration `path` will change to the next directory in the tree;
+then `dirs` and `files` will be vectors containing the directories and files
+in the current `path` directory.
 The directory tree can be traversed top-down or bottom-up.
 If `walkdir` or `stat` encounters a `IOError` it will rethrow the error by default.
 A custom error handling function can be provided through `onerror` keyword argument.
 `onerror` is called with a `IOError` as argument.
+The returned iterator is stateful so when accessed repeatedly each access will
+resume where the last left off, like [`Iterators.Stateful`](@ref).
 
 See also: [`readdir`](@ref).
 
+!!! compat "Julia 1.12"
+    `pwd()` as the default directory was added in Julia 1.12.
+
 # Examples
 ```julia
-for (root, dirs, files) in walkdir(".")
-    println("Directories in \$root")
+for (path, dirs, files) in walkdir(".")
+    println("Directories in \$path")
     for dir in dirs
-        println(joinpath(root, dir)) # path to directories
+        println(joinpath(path, dir)) # path to directories
     end
-    println("Files in \$root")
+    println("Files in \$path")
     for file in files
-        println(joinpath(root, file)) # path to files
+        println(joinpath(path, file)) # path to files
     end
 end
 ```
@@ -919,18 +1139,18 @@ julia> mkpath("my/test/dir");
 
 julia> itr = walkdir("my");
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my", ["test"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test", ["dir"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test/dir", String[], String[])
 ```
 """
-function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
-    function _walkdir(chnl, root)
+function walkdir(path = pwd(); topdown=true, follow_symlinks=false, onerror=throw)
+    function _walkdir(chnl, path)
         tryf(f, p) = try
                 f(p)
             catch err
@@ -942,33 +1162,31 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
                 end
                 return
             end
-        content = tryf(readdir, root)
-        content === nothing && return
-        dirs = Vector{eltype(content)}()
-        files = Vector{eltype(content)}()
-        for name in content
-            path = joinpath(root, name)
-
+        entries = tryf(_readdirx, path)
+        entries === nothing && return
+        dirs = Vector{String}()
+        files = Vector{String}()
+        for entry in entries
             # If we're not following symlinks, then treat all symlinks as files
-            if (!follow_symlinks && something(tryf(islink, path), true)) || !something(tryf(isdir, path), false)
-                push!(files, name)
+            if (!follow_symlinks && something(tryf(islink, entry), true)) || !something(tryf(isdir, entry), false)
+                push!(files, entry.name)
             else
-                push!(dirs, name)
+                push!(dirs, entry.name)
             end
         end
 
         if topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         for dir in dirs
-            _walkdir(chnl, joinpath(root, dir))
+            _walkdir(chnl, joinpath(path, dir))
         end
         if !topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         nothing
     end
-    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, root))
+    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, path))
 end
 
 function unlink(p::AbstractString)
@@ -977,15 +1195,38 @@ function unlink(p::AbstractString)
     nothing
 end
 
-# For move command
-function rename(src::AbstractString, dst::AbstractString; force::Bool=false)
-    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), src, dst)
-    # on error, default to cp && rm
+"""
+    Base.rename(oldpath::AbstractString, newpath::AbstractString)
+
+Change the name of a file or directory from `oldpath` to `newpath`.
+If `newpath` is an existing file or empty directory it may be replaced.
+Equivalent to [rename(2)](https://man7.org/linux/man-pages/man2/rename.2.html) on Unix.
+If a path contains a "\\0" throw an `ArgumentError`.
+On other failures throw an `IOError`.
+Return `newpath`.
+
+This is a lower level filesystem operation used to implement [`mv`](@ref).
+
+OS-specific restrictions may apply when `oldpath` and `newpath` are in different directories.
+
+Currently there are a few differences in behavior on Windows which may be resolved in a future release.
+Specifically, currently on Windows:
+1. `rename` will fail if `oldpath` or `newpath` are opened files.
+2. `rename` will fail if `newpath` is an existing directory.
+3. `rename` may work if `newpath` is a file and `oldpath` is a directory.
+4. `rename` may remove `oldpath` if it is a hardlink to `newpath`.
+
+See also: [`mv`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was made public in Julia 1.12.
+"""
+function rename(oldpath::AbstractString, newpath::AbstractString)
+    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), oldpath, newpath)
     if err < 0
-        cp(src, dst; force=force, follow_symlinks=false)
-        rm(src; recursive=true)
+        uv_error("rename($(repr(oldpath)), $(repr(newpath)))", err)
     end
-    nothing
+    newpath
 end
 
 function sendfile(src::AbstractString, dst::AbstractString)
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 63fe4281f6e59..bc1f4942877e8 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -4,6 +4,45 @@
 
 module Filesystem
 
+"""
+    JL_O_APPEND
+    JL_O_ASYNC
+    JL_O_CLOEXEC
+    JL_O_CREAT
+    JL_O_DIRECT
+    JL_O_DIRECTORY
+    JL_O_DSYNC
+    JL_O_EXCL
+    JL_O_FSYNC
+    JL_O_LARGEFILE
+    JL_O_NDELAY
+    JL_O_NOATIME
+    JL_O_NOCTTY
+    JL_O_NOFOLLOW
+    JL_O_NONBLOCK
+    JL_O_PATH
+    JL_O_RANDOM
+    JL_O_RDONLY
+    JL_O_RDWR
+    JL_O_RSYNC
+    JL_O_SEQUENTIAL
+    JL_O_SHORT_LIVED
+    JL_O_SYNC
+    JL_O_TEMPORARY
+    JL_O_TMPFILE
+    JL_O_TRUNC
+    JL_O_WRONLY
+
+Enum constant for the `open` syscall, where `JL_O_*` corresponds to the `O_*` constant.
+See [the libuv docs](https://docs.libuv.org/en/v1.x/fs.html#file-open-constants) for more details.
+"""
+(:JL_O_APPEND, :JL_O_ASYNC, :JL_O_CLOEXEC, :JL_O_CREAT, :JL_O_DIRECT,
+ :JL_O_DIRECTORY, :JL_O_DSYNC, :JL_O_EXCL, :JL_O_FSYNC, :JL_O_LARGEFILE,
+ :JL_O_NOATIME, :JL_O_NOCTTY, :JL_O_NDELAY, :JL_O_NOFOLLOW, :JL_O_NONBLOCK,
+ :JL_O_PATH, :JL_O_RANDOM, :JL_O_RDONLY, :JL_O_RDWR, :JL_O_RSYNC,
+ :JL_O_SEQUENTIAL, :JL_O_SHORT_LIVED, :JL_O_SYNC, :JL_O_TEMPORARY,
+ :JL_O_TMPFILE, :JL_O_TRUNC, :JL_O_WRONLY)
+
 const S_IFDIR  = 0o040000  # directory
 const S_IFCHR  = 0o020000  # character device
 const S_IFBLK  = 0o060000  # block device
@@ -31,6 +70,36 @@ const S_IWOTH = 0o0002  # write by other
 const S_IXOTH = 0o0001  # execute by other
 const S_IRWXO = 0o0007  # mask for other permissions
 
+"""
+    S_IRUSR
+    S_IWUSR
+    S_IXUSR
+    S_IRGRP
+    S_IWGRP
+    S_IXGRP
+    S_IROTH
+    S_IWOTH
+    S_IXOTH
+
+Constants for file access permission bits.
+The general structure is `S_I[permission][class]`
+where `permission` is `R` for read, `W` for write, and `X` for execute,
+and `class` is `USR` for user/owner, `GRP` for group, and `OTH` for other.
+"""
+(:S_IRUSR, :S_IWUSR, :S_IXUSR, :S_IRGRP, :S_IWGRP, :S_IXGRP, :S_IROTH, :S_IWOTH, :S_IXOTH)
+
+"""
+    S_IRWXU
+    S_IRWXG
+    S_IRWXO
+
+Constants for file access permission masks, i.e. the combination of read, write,
+and execute permissions for a class.
+The general structure is `S_IRWX[class]`
+where `class` is `U` for user/owner, `G` for group, and `O` for other.
+"""
+(:S_IRWXU, :S_IRWXG, :S_IRWXO)
+
 export File,
        StatStruct,
        # open,
@@ -48,7 +117,6 @@ export File,
        JL_O_SEQUENTIAL,
        JL_O_RANDOM,
        JL_O_NOCTTY,
-       JL_O_NOCTTY,
        JL_O_NONBLOCK,
        JL_O_NDELAY,
        JL_O_SYNC,
@@ -72,7 +140,8 @@ import .Base:
     IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
     bytesavailable, position, read, read!, readavailable, seek, seekend, show,
     skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize,
+    isexecutable, isreadable, iswritable, MutableDenseArrayType
 
 import .Base.RefValue
 
@@ -90,7 +159,7 @@ uv_fs_req_cleanup(req) = ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
 include("path.jl")
 include("stat.jl")
 include("file.jl")
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
+include(string(Base.BUILDROOT, "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
 
 ## Operations with File (fd) objects ##
 
@@ -143,6 +212,8 @@ function close(f::File)
     nothing
 end
 
+closewrite(f::File) = nothing
+
 # sendfile is the most efficient way to copy from a file descriptor
 function sendfile(dst::File, src::File, src_offset::Int64, bytes::Int)
     check_open(dst)
@@ -193,18 +264,23 @@ end
 
 function read(f::File, ::Type{UInt8})
     check_open(f)
-    ret = ccall(:jl_fs_read_byte, Int32, (OS_HANDLE,), f.handle)
+    p = Ref{UInt8}()
+    ret = ccall(:jl_fs_read, Int32, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
+                f.handle, p, 1)
     uv_error("read", ret)
-    return ret % UInt8
+    @assert ret <= sizeof(p) == 1
+    ret < 1 && throw(EOFError())
+    return p[] % UInt8
 end
 
 function read(f::File, ::Type{Char})
     b0 = read(f, UInt8)
-    l = 8 * (4 - leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(f)
+            # this works around lack of peek(::File)
             p = position(f)
             b = read(f, UInt8)
             if b & 0xc0 != 0x80
@@ -233,7 +309,7 @@ bytesavailable(f::File) = max(0, filesize(f) - position(f)) # position can be >
 
 eof(f::File) = bytesavailable(f) == 0
 
-function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
+function readbytes!(f::File, b::MutableDenseArrayType{UInt8}, nb=length(b))
     nr = min(nb, bytesavailable(f))
     if length(b) < nr
         resize!(b, nr)
@@ -290,5 +366,85 @@ function touch(f::File)
     f
 end
 
+"""
+    isexecutable(path::String)
+
+Return `true` if the given `path` has executable permissions.
+
+!!! note
+    This permission may change before the user executes `path`,
+    so it is recommended to execute the file and handle the error if that fails,
+    rather than calling `isexecutable` first.
+
+!!! note
+    Prior to Julia 1.6, this did not correctly interrogate filesystem
+    ACLs on Windows, therefore it would return `true` for any
+    file.  From Julia 1.6 on, it correctly determines whether the
+    file is marked as executable or not.
+
+See also [`ispath`](@ref), [`isreadable`](@ref), [`iswritable`](@ref).
+"""
+function isexecutable(path::String)
+    # We use `access()` and `X_OK` to determine if a given path is
+    # executable by the current user.  `X_OK` comes from `unistd.h`.
+    X_OK = 0x01
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, X_OK) == 0
+end
+isexecutable(path::AbstractString) = isexecutable(String(path))
+
+"""
+    isreadable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted reading by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `isreadable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`iswritable`](@ref).
+"""
+function isreadable(path::String)
+    # We use `access()` and `R_OK` to determine if a given path is
+    # readable by the current user.  `R_OK` comes from `unistd.h`.
+    R_OK = 0x04
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, R_OK) == 0
+end
+isreadable(path::AbstractString) = isreadable(String(path))
+
+"""
+    iswritable(path::String)
+
+Return `true` if the access permissions for the given `path` permitted writing by the current user.
+
+!!! note
+    This permission may change before the user calls `open`,
+    so it is recommended to just call `open` alone and handle the error if that fails,
+    rather than calling `iswritable` first.
+
+!!! note
+    Currently this function does not correctly interrogate filesystem
+    ACLs on Windows, therefore it can return wrong results.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`ispath`](@ref), [`isexecutable`](@ref), [`isreadable`](@ref).
+"""
+function iswritable(path::String)
+    # We use `access()` and `W_OK` to determine if a given path is
+    # writeable by the current user.  `W_OK` comes from `unistd.h`.
+    W_OK = 0x02
+    return ccall(:jl_fs_access, Cint, (Cstring, Cint), path, W_OK) == 0
+end
+iswritable(path::AbstractString) = iswritable(String(path))
+
 
 end
diff --git a/base/float.jl b/base/float.jl
index d5280ef74fbce..faded5cd5978c 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -14,6 +14,8 @@ const Inf16 = bitcast(Float16, 0x7c00)
     NaN16
 
 A not-a-number value of type [`Float16`](@ref).
+
+See also: [`NaN`](@ref).
 """
 const NaN16 = bitcast(Float16, 0x7e00)
 """
@@ -26,6 +28,8 @@ const Inf32 = bitcast(Float32, 0x7f800000)
     NaN32
 
 A not-a-number value of type [`Float32`](@ref).
+
+See also: [`NaN`](@ref).
 """
 const NaN32 = bitcast(Float32, 0x7fc00000)
 const Inf64 = bitcast(Float64, 0x7ff0000000000000)
@@ -69,9 +73,23 @@ NaN
 julia> Inf - Inf
 NaN
 
-julia> NaN == NaN, isequal(NaN, NaN), NaN === NaN
+julia> NaN == NaN, isequal(NaN, NaN), isnan(NaN)
 (false, true, true)
 ```
+
+!!! note
+    Always use [`isnan`](@ref) or [`isequal`](@ref) for checking for `NaN`.
+    Using `x === NaN` may give unexpected results:
+    ```julia-repl
+    julia> reinterpret(UInt32, NaN32)
+    0x7fc00000
+
+    julia> NaN32p1 = reinterpret(Float32, 0x7fc00001)
+    NaN32
+
+    julia> NaN32p1 === NaN32, isequal(NaN32p1, NaN32), isnan(NaN32p1)
+    (false, true, true)
+    ```
 """
 NaN, NaN64
 
@@ -104,13 +122,16 @@ significand_mask(::Type{Float16}) = 0x03ff
 mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T)
 
 for T in (Float16, Float32, Float64)
-    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
-    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
-    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
+    sb = trailing_ones(significand_mask(T))
+    em = exponent_mask(T)
+    eb = Int(exponent_one(T) >> sb)
+    @eval significand_bits(::Type{$T}) = $(sb)
+    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - sb - 1)
+    @eval exponent_bias(::Type{$T}) = $(eb)
     # maximum float exponent
-    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) - 1)
+    @eval exponent_max(::Type{$T}) = $(Int(em >> sb) - eb - 1)
     # maximum float exponent without bias
-    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
+    @eval exponent_raw_max(::Type{$T}) = $(Int(em >> sb))
 end
 
 """
@@ -137,6 +158,68 @@ i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
 """
 function exponent_raw_max end
 
+"""
+IEEE 754 definition of the minimum exponent.
+"""
+ieee754_exponent_min(::Type{T}) where {T<:IEEEFloat} = Int(1 - exponent_max(T))::Int
+
+exponent_min(::Type{Float16}) = ieee754_exponent_min(Float16)
+exponent_min(::Type{Float32}) = ieee754_exponent_min(Float32)
+exponent_min(::Type{Float64}) = ieee754_exponent_min(Float64)
+
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, exponent_field::Integer, significand_field::Integer
+) where {F<:IEEEFloat}
+    T = uinttype(F)
+    ret::T = sign_bit
+    ret <<= exponent_bits(F)
+    ret |= exponent_field
+    ret <<= significand_bits(F)
+    ret |= significand_field
+end
+
+# ±floatmax(T)
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:omega}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F) - 1, significand_mask(F))
+end
+
+# NaN or an infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F), significand_field)
+end
+
+# NaN with default payload
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, one(uinttype(F)) << (significand_bits(F) - 1), Val(:nan))
+end
+
+# Infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:inf}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:nan))
+end
+
+# Subnormal or zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:subnormal}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, significand_field)
+end
+
+# Zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:zero}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:subnormal))
+end
+
 """
     uabs(x::Integer)
 
@@ -168,8 +251,6 @@ for t1 in (Float16, Float32, Float64)
     end
 end
 
-Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
-
 promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
 promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
 promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
@@ -374,26 +455,28 @@ unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x))
 unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
 
 # matches convert methods
-# also determines floor, ceil, round
-trunc(::Type{Signed}, x::IEEEFloat) = trunc(Int,x)
-trunc(::Type{Unsigned}, x::IEEEFloat) = trunc(UInt,x)
-trunc(::Type{Integer}, x::IEEEFloat) = trunc(Int,x)
-
-# fallbacks
-floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown))
-ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp))
-round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest))
-
-# Bool
-trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x))
-floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x))
-ceil(::Type{Bool}, x::AbstractFloat)  = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x))
-round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x))
-
-round(x::IEEEFloat, r::RoundingMode{:ToZero})  = trunc_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Down})    = floor_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Up})      = ceil_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Nearest}) = rint_llvm(x)
+# also determines trunc, floor, ceil
+round(::Type{Signed},   x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+round(::Type{Unsigned}, x::IEEEFloat, r::RoundingMode) = round(UInt, x, r)
+round(::Type{Integer},  x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+
+round(x::IEEEFloat, ::RoundingMode{:ToZero})  = trunc_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
+
+rounds_up(x, ::RoundingMode{:Down}) = false
+rounds_up(x, ::RoundingMode{:Up}) = true
+rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x)
+rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x)
+function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat}
+    x_t = convert(T, x_integer)
+    if rounds_up(x, r)
+        x_t < x ? nextfloat(x_t) : x_t
+    else
+        x_t > x ? prevfloat(x_t) : x_t
+    end
+end
 
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
@@ -688,22 +771,24 @@ function hash(x::Real, h::UInt)
     den_z = trailing_zeros(den)
     den >>= den_z
     pow += num_z - den_z
-
-    # handle values representable as Int64, UInt64, Float64
+    # If the real can be represented as an Int64, UInt64, or Float64, hash as those types.
+    # To be an Integer the denominator must be 1 and the power must be non-negative.
     if den == 1
+        # left = ceil(log2(num*2^pow))
         left = top_set_bit(abs(num)) + pow
-        right = pow + den_z
-        if -1074 <= right
-            if 0 <= right
+        # 2^-1074 is the minimum Float64 so if the power is smaller, not a Float64
+        if -1074 <= pow
+            if 0 <= pow # if pow is non-negative, it is an integer
                 left <= 63 && return hash(Int64(num) << Int(pow), h)
                 left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h)
             end # typemin(Int64) handled by Float64 case
-            left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow), h)
+            # 2^1024 is the maximum Float64 so if the power is greater, not a Float64
+            # Float64s only have 53 mantisa bits (including implicit bit)
+            left <= 1024 && left - pow <= 53 && return hash(ldexp(Float64(num), pow), h)
         end
     else
         h = hash_integer(den, h)
     end
-
     # handle generic rational values
     h = hash_integer(pow, h)
     h = hash_integer(num, h)
@@ -783,12 +868,12 @@ number of significand digits in that base.
 """
 function precision end
 
-_precision(::Type{Float16}) = 11
-_precision(::Type{Float32}) = 24
-_precision(::Type{Float64}) = 53
-function _precision(x, base::Integer=2)
+_precision_with_base_2(::Type{Float16}) = 11
+_precision_with_base_2(::Type{Float32}) = 24
+_precision_with_base_2(::Type{Float64}) = 53
+function _precision(x, base::Integer)
     base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
-    p = _precision(x)
+    p = _precision_with_base_2(x)
     return base == 2 ? Int(p) : floor(Int, p / log2(base))
 end
 precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
@@ -842,8 +927,8 @@ end
 """
     nextfloat(x::AbstractFloat)
 
-Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no
-such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
+Return the smallest floating point number `y` of the same type as `x` such that `x < y`.
+If no such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
 
 See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
 """
@@ -860,8 +945,8 @@ prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d)
 """
     prevfloat(x::AbstractFloat)
 
-Return the largest floating point number `y` of the same type as `x` such `y < x`. If no
-such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
+Return the largest floating point number `y` of the same type as `x` such that `y < x`.
+If no such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
 """
 prevfloat(x::AbstractFloat) = nextfloat(x,-1)
 
@@ -872,15 +957,18 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
             # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
-                    if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && isinteger(x)
+                    # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is,
+                    # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using
+                    # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1)
+                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x)
                         return unsafe_trunc($Ti,x)
                     else
                         throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
@@ -893,11 +981,11 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
             # these types, but not for `Float16` or larger integer types.
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
@@ -958,13 +1046,24 @@ isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Int
     floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
     floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
 
-    eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
     eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
     eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
     eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
     eps() = eps(Float64)
 end
 
+eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
+
+function eps(x::T) where T<:IEEEFloat
+    # For isfinite(x), toggling the LSB will produce either prevfloat(x) or
+    # nextfloat(x) but will never change the sign or exponent.
+    # For !isfinite(x), this will map Inf to NaN and NaN to NaN or Inf.
+    y = reinterpret(T, reinterpret(Unsigned, x) ⊻ true)
+    # The absolute difference between these values is eps(x). This is true even
+    # for Inf/NaN values.
+    return abs(x - y)
+end
+
 """
     floatmin(T = Float64)
 
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 9b8ca4b04ee28..2c26f7cff1133 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -42,87 +42,9 @@ it is the minimum of `maxintfloat(T)` and [`typemax(S)`](@ref).
 maxintfloat(::Type{S}, ::Type{T}) where {S<:AbstractFloat, T<:Integer} = min(maxintfloat(S), S(typemax(T)))
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
+isinteger(x::AbstractFloat) = iszero(x - trunc(x)) # note: x == trunc(x) would be incorrect for x=Inf
 
-"""
-    round([T,] x, [r::RoundingMode])
-    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
-    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
-
-Rounds the number `x`.
-
-Without keyword arguments, `x` is rounded to an integer value, returning a value of type
-`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
-thrown if the value is not representable by `T`, similar to [`convert`](@ref).
-
-If the `digits` keyword argument is provided, it rounds to the specified number of digits
-after the decimal place (or before if negative), in base `base`.
-
-If the `sigdigits` keyword argument is provided, it rounds to the specified number of
-significant digits, in base `base`.
-
-The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
-[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
-of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
-results if the global rounding mode is changed (see [`rounding`](@ref)).
-
-# Examples
-```jldoctest
-julia> round(1.7)
-2.0
-
-julia> round(Int, 1.7)
-2
-
-julia> round(1.5)
-2.0
-
-julia> round(2.5)
-2.0
-
-julia> round(pi; digits=2)
-3.14
-
-julia> round(pi; digits=3, base=2)
-3.125
-
-julia> round(123.456; sigdigits=2)
-120.0
-
-julia> round(357.913; sigdigits=4, base=2)
-352.0
-```
-
-!!! note
-    Rounding to specified digits in bases other than 2 can be inexact when
-    operating on binary floating point numbers. For example, the [`Float64`](@ref)
-    value represented by `1.15` is actually *less* than 1.15, yet will be
-    rounded to 1.2. For example:
-
-    ```jldoctest
-    julia> x = 1.15
-    1.15
-
-    julia> big(1.15)
-    1.149999999999999911182158029987476766109466552734375
-
-    julia> x < 115//100
-    true
-
-    julia> round(x, digits=1)
-    1.2
-    ```
-
-# Extensions
-
-To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
-"""
-round(T::Type, x)
-
-function round(::Type{T}, x::AbstractFloat, r::RoundingMode) where {T<:Integer}
-    r != RoundToZero && (x = round(x,r))
-    trunc(T, x)
-end
+# See rounding.jl for docstring.
 
 # NOTE: this relies on the current keyword dispatch behaviour (#9498).
 function round(x::Real, r::RoundingMode=RoundNearest;
@@ -150,12 +72,6 @@ function round(x::Real, r::RoundingMode=RoundNearest;
     end
 end
 
-trunc(x::Real; kwargs...) = round(x, RoundToZero; kwargs...)
-floor(x::Real; kwargs...) = round(x, RoundDown; kwargs...)
-ceil(x::Real; kwargs...)  = round(x, RoundUp; kwargs...)
-
-round(x::Integer, r::RoundingMode) = x
-
 # round x to multiples of 1/invstep
 function _round_invstep(x, invstep, r::RoundingMode)
     y = round(x * invstep, r) / invstep
@@ -304,7 +220,22 @@ true
 function isapprox(x::Number, y::Number;
                   atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
                   nans::Bool=false, norm::Function=abs)
-    x == y || (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))) || (nans && isnan(x) && isnan(y))
+    x′, y′ = promote(x, y) # to avoid integer overflow
+    x == y ||
+        (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x′), norm(y′)))) ||
+         (nans && isnan(x) && isnan(y))
+end
+
+function isapprox(x::Integer, y::Integer;
+                  atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
+                  nans::Bool=false, norm::Function=abs)
+    if norm === abs && atol < 1 && rtol == 0
+        return x == y
+    else
+        # We need to take the difference `max` - `min` when comparing unsigned integers.
+        _x, _y = x < y ? (x, y) : (y, x)
+        return norm(_y - _x) <= max(atol, rtol*max(norm(_x), norm(_y)))
+    end
 end
 
 """
diff --git a/base/compiler/parsing.jl b/base/flparse.jl
similarity index 100%
rename from base/compiler/parsing.jl
rename to base/flparse.jl
diff --git a/base/gcutils.jl b/base/gcutils.jl
index fed30befd7d5c..84a184537ffc0 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -38,7 +38,7 @@ WeakRef
 # Used by `Base.finalizer` to validate mutability of an object being finalized.
 function _check_mutable(@nospecialize(o)) @noinline
     if !ismutable(o)
-        error("objects of type ", typeof(o), " cannot be finalized")
+        error("objects of type ", typeof(o), " cannot be finalized because they are not mutable")
     end
 end
 
@@ -70,7 +70,6 @@ end
 A finalizer may be registered at object construction. In the following example note that
 we implicitly rely on the finalizer returning the newly created mutable struct `x`.
 
-# Example
 ```julia
 mutable struct MyMutableStruct
     bar
@@ -110,6 +109,8 @@ Module with garbage collection utilities.
 """
 module GC
 
+public gc, enable, @preserve, safepoint, enable_logging, logging_enabled
+
 # mirrored from julia.h
 const GC_AUTO = 0
 const GC_FULL = 1
@@ -119,9 +120,12 @@ const GC_INCREMENTAL = 2
     GC.gc([full=true])
 
 Perform garbage collection. The argument `full` determines the kind of
-collection: A full collection (default) sweeps all objects, which makes the
-next GC scan much slower, while an incremental collection may only sweep
-so-called young objects.
+collection: a full collection (default) traverses all live objects (i.e. full mark)
+and should reclaim memory from all unreachable objects. An incremental collection only
+reclaims memory from young objects which are not reachable.
+
+The GC may decide to perform a full collection even if an incremental collection was
+requested.
 
 !!! warning
     Excessive use will likely lead to poor performance.
@@ -259,4 +263,13 @@ function enable_logging(on::Bool=true)
     ccall(:jl_enable_gc_logging, Cvoid, (Cint,), on)
 end
 
+"""
+    GC.logging_enabled()
+
+Return whether GC logging has been enabled via [`GC.enable_logging`](@ref).
+"""
+function logging_enabled()
+    ccall(:jl_is_gc_logging_enabled, Cint, ()) != 0
+end
+
 end # module GC
diff --git a/base/generator.jl b/base/generator.jl
index aa4b7f67cba95..1f981de8dc788 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -5,25 +5,26 @@
 
 Given a function `f` and an iterator `iter`, construct an iterator that yields
 the values of `f` applied to the elements of `iter`.
-The syntax for constructing an instance of this type is `f(x) for x in iter [if cond(x)::Bool] `.
-The `[if cond(x)::Bool]` expression is optional and acts as a "guard", effectively
-filtering out values where the condition is false.
+The syntax `f(x) for x in iter` is syntax for constructing an instance of this
+type.
 
 ```jldoctest
-julia> g = (abs2(x) for x in 1:5 if x != 3);
+julia> g = (abs2(x) for x in 1:5);
 
 julia> for x in g
            println(x)
        end
 1
 4
+9
 16
 25
 
 julia> collect(g)
-4-element Vector{Int64}:
+5-element Vector{Int64}:
   1
   4
+  9
  16
  25
 ```
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
new file mode 100644
index 0000000000000..2a33336c0aad6
--- /dev/null
+++ b/base/genericmemory.jl
@@ -0,0 +1,414 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## genericmemory.jl: Managed Memory
+
+"""
+    GenericMemory{kind::Symbol, T, addrspace=Core.CPU} <: DenseVector{T}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+
+`kind` can currently be either `:not_atomic` or `:atomic`. For details on what `:atomic` implies, see [`AtomicMemory`](@ref)
+
+`addrspace` can currently only be set to `Core.CPU`. It is designed to permit extension by other systems such as GPUs, which might define values such as:
+```julia
+module CUDA
+const Generic = bitcast(Core.AddrSpace{CUDA}, 0)
+const Global = bitcast(Core.AddrSpace{CUDA}, 1)
+end
+```
+The exact semantics of these other addrspaces is defined by the specific backend, but will error if the user is attempting to access these on the CPU.
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+"""
+GenericMemory
+
+"""
+    Memory{T} == GenericMemory{:not_atomic, T, Core.CPU}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+"""
+Memory
+
+"""
+    AtomicMemory{T} == GenericMemory{:atomic, T, Core.CPU}
+
+Fixed-size [`DenseVector{T}`](@ref DenseVector).
+Fetching of any of its individual elements is performed atomically
+(with `:monotonic` ordering by default).
+
+!!! warning
+    The access to `AtomicMemory` must be done by either using the [`@atomic`](@ref)
+    macro or the lower level interface functions: `Base.getindex_atomic`,
+    `Base.setindex_atomic!`, `Base.setindexonce_atomic!`,
+    `Base.swapindex_atomic!`, `Base.modifyindex_atomic!`, and `Base.replaceindex_atomic!`.
+
+For details, see [Atomic Operations](@ref man-atomic-operations) as well as macros
+[`@atomic`](@ref), [`@atomiconce`](@ref), [`@atomicswap`](@ref), and [`@atomicreplace`](@ref).
+
+!!! compat "Julia 1.11"
+    This type requires Julia 1.11 or later.
+
+!!! compat "Julia 1.12"
+    Lower level interface functions or `@atomic` macro requires Julia 1.12 or later.
+"""
+AtomicMemory
+
+## Basic functions ##
+
+using Core: memoryrefoffset, memoryref_isassigned # import more functions which were not essential
+
+size(a::GenericMemory, d::Int) =
+    d < 1 ? error("dimension out of range") :
+    d == 1 ? length(a) :
+    1
+size(a::GenericMemory, d::Integer) =  size(a, convert(Int, d))
+size(a::GenericMemory) = (length(a),)
+
+IndexStyle(::Type{<:GenericMemory}) = IndexLinear()
+
+parent(ref::GenericMemoryRef) = ref.mem
+
+pointer(mem::GenericMemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
+
+_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(memoryref(A, i)); A)
+function _unsetindex!(A::MemoryRef{T}) where T
+    @_terminates_locally_meta
+    @_propagate_inbounds_meta
+    @inline
+    @boundscheck memoryref(A, 1)
+    mem = A.mem
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isbits = 0; isboxed = 1; isunion = 2
+    arrayelem == isbits && datatype_pointerfree(T::DataType) && return A
+    t = @_gc_preserve_begin mem
+    p = Ptr{Ptr{Cvoid}}(@inbounds pointer(A))
+    if arrayelem == isboxed
+        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
+    elseif arrayelem != isunion
+        for j = 1:Core.sizeof(Ptr{Cvoid}):elsz
+            # XXX: this violates memory ordering, since it writes more than one C_NULL to each
+            Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
+        end
+    end
+    @_gc_preserve_end t
+    return A
+end
+
+elsize(@nospecialize _::Type{A}) where {T,A<:GenericMemory{<:Any,T}} = aligned_sizeof(T) # XXX: probably supposed to be the stride?
+sizeof(a::GenericMemory) = Core.sizeof(a)
+
+# multi arg case will be overwritten later. This is needed for bootstrapping
+function isassigned(a::GenericMemory, i::Int)
+    @inline
+    @boundscheck (i - 1)%UInt < length(a)%UInt || return false
+    return @inbounds memoryref_isassigned(memoryref(a, i), default_access_order(a), false)
+end
+
+isassigned(a::GenericMemoryRef) = memoryref_isassigned(a, default_access_order(a), @_boundscheck)
+
+## copy ##
+function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T}
+    @_terminates_globally_notaskstate_meta
+    n == 0 && return dest
+    @boundscheck memoryref(dest, n), memoryref(src, n)
+    if isbitstype(T)
+        tdest = @_gc_preserve_begin dest
+        tsrc = @_gc_preserve_begin src
+        pdest = unsafe_convert(Ptr{Cvoid}, dest)
+        psrc = unsafe_convert(Ptr{Cvoid}, src)
+        memmove(pdest, psrc, aligned_sizeof(T) * n)
+        @_gc_preserve_end tdest
+        @_gc_preserve_end tsrc
+    else
+        ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n))
+    end
+    return dest
+end
+
+function unsafe_copyto!(dest::GenericMemoryRef, src::GenericMemoryRef, n)
+    n == 0 && return dest
+    @boundscheck memoryref(dest, n), memoryref(src, n)
+    unsafe_copyto!(dest.mem, memoryrefoffset(dest), src.mem, memoryrefoffset(src), n)
+    return dest
+end
+
+function unsafe_copyto!(dest::Memory{T}, doffs, src::Memory{T}, soffs, n) where{T}
+    n == 0 && return dest
+    unsafe_copyto!(memoryref(dest, doffs), memoryref(src, soffs), n)
+    return dest
+end
+
+#fallback method when types don't match
+function unsafe_copyto!(dest::Memory, doffs, src::Memory, soffs, n)
+    @_terminates_locally_meta
+    n == 0 && return dest
+    # use pointer math to determine if they are deemed to alias
+    destp = pointer(dest, doffs)
+    srcp = pointer(src, soffs)
+    endp = pointer(src, soffs + n - 1)
+    @inbounds if destp < srcp || destp > endp
+        for i = 1:n
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    else
+        for i = n:-1:1
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    end
+    return dest
+end
+
+function copy(a::T) where {T<:Memory}
+    # `copy` only throws when the size exceeds the max allocation size,
+    # but since we're copying an existing array, we're guaranteed that this will not happen.
+    @_nothrow_meta
+    newmem = T(undef, length(a))
+    @inbounds unsafe_copyto!(newmem, 1, a, 1, length(a))
+end
+
+copyto!(dest::Memory, src::Memory) = copyto!(dest, 1, src, 1, length(src))
+function copyto!(dest::Memory, doffs::Integer, src::Memory, soffs::Integer, n::Integer)
+    n < 0 && _throw_argerror("Number of elements to copy must be non-negative.")
+    unsafe_copyto!(dest, doffs, src, soffs, n)
+    return dest
+end
+
+
+## Constructors ##
+
+similar(a::GenericMemory) =
+    typeof(a)(undef, length(a))
+similar(a::GenericMemory{kind,<:Any,AS}, T::Type) where {kind,AS} =
+    GenericMemory{kind,T,AS}(undef, length(a))
+similar(a::GenericMemory, m::Int) =
+    typeof(a)(undef, m)
+similar(a::GenericMemory{kind,<:Any,AS}, T::Type, dims::Dims{1}) where {kind,AS} =
+    GenericMemory{kind,T,AS}(undef, dims[1])
+similar(a::GenericMemory, dims::Dims{1}) =
+    typeof(a)(undef, dims[1])
+
+function fill!(a::Union{Memory{UInt8}, Memory{Int8}}, x::Integer)
+    t = @_gc_preserve_begin a
+    p = unsafe_convert(Ptr{Cvoid}, a)
+    T = eltype(a)
+    memset(p, x isa T ? x : convert(T, x), length(a) % UInt)
+    @_gc_preserve_end t
+    return a
+end
+
+## Conversions ##
+
+convert(::Type{T}, a::AbstractArray) where {T<:Memory} = a isa T ? a : T(a)::T
+
+promote_rule(a::Type{Memory{T}}, b::Type{Memory{S}}) where {T,S} = el_same(promote_type(T,S), a, b)
+
+## Constructors ##
+
+# constructors should make copies
+Memory{T}(x::AbstractArray{S,1}) where {T,S} = copyto_axcheck!(Memory{T}(undef, size(x)), x)
+
+## copying iterators to containers
+
+## Iteration ##
+
+iterate(A::Memory, i=1) = (@inline; (i - 1)%UInt < length(A)%UInt ? (@inbounds A[i], i + 1) : nothing)
+
+## Indexing: getindex ##
+
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
+function getindex(A::Memory, I::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    X = similar(A, axes(I))
+    if lI > 0
+        copyto!(X, firstindex(X), A, first(I), lI)
+    end
+    return X
+end
+
+# getindex for carrying out logical indexing for AbstractUnitRange{Bool} as Bool <: Integer
+getindex(a::Memory, r::AbstractUnitRange{Bool}) = getindex(a, to_index(r))
+
+getindex(A::Memory, c::Colon) = copy(A)
+
+## Indexing: setindex! ##
+
+function _setindex!(A::Memory{T}, x::T, i1::Int) where {T}
+    ref = memoryrefnew(memoryref(A), i1, @_boundscheck)
+    memoryrefset!(ref, x, :not_atomic, @_boundscheck)
+    return A
+end
+
+function setindex!(A::Memory{T}, x, i1::Int) where {T}
+    @_propagate_inbounds_meta
+    val = x isa T ? x : convert(T,x)::T
+    return _setindex!(A, val, i1)
+end
+
+function setindex!(A::Memory{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @boundscheck (i2 == 1 && all(==(1), I)) || throw_boundserror(A, (i1, i2, I...))
+    setindex!(A, x, i1)
+end
+
+# Faster contiguous setindex! with copyto!
+function setindex!(A::Memory{T}, X::Memory{T}, I::AbstractUnitRange{Int}) where T
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, first(I), X, 1, lI)
+    end
+    return A
+end
+function setindex!(A::Memory{T}, X::Memory{T}, c::Colon) where T
+    @inline
+    lI = length(A)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, 1, X, 1, lI)
+    end
+    return A
+end
+
+# use memcmp for cmp on byte arrays
+function cmp(a::Memory{UInt8}, b::Memory{UInt8})
+    ta = @_gc_preserve_begin a
+    tb = @_gc_preserve_begin b
+    pa = unsafe_convert(Ptr{Cvoid}, a)
+    pb = unsafe_convert(Ptr{Cvoid}, b)
+    c = memcmp(pa, pb, min(length(a),length(b)))
+    @_gc_preserve_end ta
+    @_gc_preserve_end tb
+    return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
+end
+
+const BitIntegerMemory{N} = Union{map(T->Memory{T}, BitInteger_types)...}
+# use memcmp for == on bit integer types
+function ==(a::M, b::M) where {M <: BitIntegerMemory}
+    if length(a) == length(b)
+        ta = @_gc_preserve_begin a
+        tb = @_gc_preserve_begin b
+        pa = unsafe_convert(Ptr{Cvoid}, a)
+        pb = unsafe_convert(Ptr{Cvoid}, b)
+        c = memcmp(pa, pb, sizeof(eltype(M)) * length(a))
+        @_gc_preserve_end ta
+        @_gc_preserve_end tb
+        return c == 0
+    else
+        return false
+    end
+end
+
+function findall(pred::Fix2{typeof(in),<:Union{Memory{<:Real},Real}}, x::Memory{<:Real})
+    if issorted(x, Sort.Forward) && issorted(pred.x, Sort.Forward)
+        return _sortedfindin(x, pred.x)
+    else
+        return _findin(x, pred.x)
+    end
+end
+
+# Copying subregions
+function indcopy(sz::Dims, I::GenericMemory)
+    n = length(I)
+    s = sz[n]
+    for i = n+1:length(sz)
+        s *= sz[i]
+    end
+    dst = eltype(I)[_findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
+    src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
+    dst, src
+end
+
+# get, set(once), modify, swap and replace at index, atomically
+function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)
+    @_propagate_inbounds_meta
+    memref = memoryref(mem, i)
+    return memoryrefget(memref, order, @_boundscheck)
+end
+
+function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return memoryrefset!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function setindexonce_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    val,
+    i::Int,
+)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefsetonce!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        success_order,
+        fail_order,
+        @_boundscheck
+    )
+end
+
+function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)
+    @_propagate_inbounds_meta
+    memref = memoryref(mem, i)
+    return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck)
+end
+
+function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefswap!(
+        memref,
+        val isa T ? val : convert(T, val)::T,
+        order,
+        @_boundscheck
+    )
+end
+
+function replaceindex_atomic!(
+    mem::GenericMemory,
+    success_order::Symbol,
+    fail_order::Symbol,
+    expected,
+    desired,
+    i::Int,
+)
+    @_propagate_inbounds_meta
+    T = eltype(mem)
+    memref = memoryref(mem, i)
+    return Core.memoryrefreplace!(
+        memref,
+        expected,
+        desired isa T ? desired : convert(T, desired)::T,
+        success_order,
+        fail_order,
+        @_boundscheck,
+    )
+end
diff --git a/base/gmp.jl b/base/gmp.jl
index 8a1451be7a590..4d2b4b66ac41b 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -10,7 +10,8 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
              trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit,
+             clamp, unsafe_takestring
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -29,10 +30,13 @@ else
     const libgmp = "libgmp.so.10"
 end
 
-version() = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
+_version() = unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar})))
+version() = VersionNumber(_version())
+major_version() = _version()[1]
 bits_per_limb() = Int(unsafe_load(cglobal((:__gmp_bits_per_limb, libgmp), Cint)))
 
 const VERSION = version()
+const MAJOR_VERSION = major_version()
 const BITS_PER_LIMB = bits_per_limb()
 
 # GMP's mp_limb_t is by default a typedef of `unsigned long`, but can also be configured to be either
@@ -101,7 +105,7 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 
 function __init__()
     try
-        if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
+        if major_version() != MAJOR_VERSION || bits_per_limb() != BITS_PER_LIMB
             msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
                      does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
                      Please rebuild Julia."""
@@ -170,8 +174,8 @@ end
 
 invert!(x::BigInt, a::BigInt, b::BigInt) =
     ccall((:__gmpz_invert, libgmp), Cint, (mpz_t, mpz_t, mpz_t), x, a, b)
-invert(a::BigInt, b::BigInt) = invert!(BigInt(), a, b)
 invert!(x::BigInt, b::BigInt) = invert!(x, x, b)
+invert(a::BigInt, b::BigInt) = (ret=BigInt(); invert!(ret, a, b); ret)
 
 for op in (:add_ui, :sub_ui, :mul_ui, :mul_2exp, :fdiv_q_2exp, :pow_ui, :bin_ui)
     op! = Symbol(op, :!)
@@ -260,8 +264,6 @@ end
 
 limbs_write!(x::BigInt, a) = ccall((:__gmpz_limbs_write, libgmp), Ptr{Limb}, (mpz_t, Clong), x, a)
 limbs_finish!(x::BigInt, a) = ccall((:__gmpz_limbs_finish, libgmp), Cvoid, (mpz_t, Clong), x, a)
-import!(x::BigInt, a, b, c, d, e, f) = ccall((:__gmpz_import, libgmp), Cvoid,
-    (mpz_t, Csize_t, Cint, Csize_t, Cint, Csize_t, Ptr{Cvoid}), x, a, b, c, d, e, f)
 
 setbit!(x, a) = (ccall((:__gmpz_setbit, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
 tstbit(a::BigInt, b) = ccall((:__gmpz_tstbit, libgmp), Cint, (mpz_t, bitcnt_t), a, b) % Bool
@@ -320,11 +322,6 @@ function BigInt(x::Float64)
     unsafe_trunc(BigInt,x)
 end
 
-function trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64})
-    isfinite(x) || throw(InexactError(:trunc, BigInt, x))
-    unsafe_trunc(BigInt,x)
-end
-
 BigInt(x::Float16) = BigInt(Float64(x))
 BigInt(x::Float32) = BigInt(Float64(x))
 
@@ -363,6 +360,8 @@ end
 
 rem(x::Integer, ::Type{BigInt}) = BigInt(x)
 
+clamp(x, ::Type{BigInt}) = convert(BigInt, x)
+
 isodd(x::BigInt) = MPZ.tstbit(x, 0)
 iseven(x::BigInt) = !isodd(x)
 
@@ -611,8 +610,8 @@ function top_set_bit(x::BigInt)
     x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size))
 end
 
-divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y)
-divrem(x::BigInt, y::Integer) = MPZ.tdiv_qr(x, big(y))
+divrem(x::BigInt, y::BigInt,  ::typeof(RoundToZero) = RoundToZero) = MPZ.tdiv_qr(x, y)
+divrem(x::BigInt, y::Integer, ::typeof(RoundToZero) = RoundToZero) = MPZ.tdiv_qr(x, BigInt(y))
 
 cmp(x::BigInt, y::BigInt) = sign(MPZ.cmp(x, y))
 cmp(x::BigInt, y::ClongMax) = sign(MPZ.cmp_si(x, y))
@@ -628,11 +627,11 @@ isqrt(x::BigInt) = MPZ.sqrt(x)
 ^(x::BigInt, y::Culong) = MPZ.pow_ui(x, y)
 
 function bigint_pow(x::BigInt, y::Integer)
+    x == 1 && return x
+    x == -1 && return isodd(y) ? x : -x
     if y<0; throw(DomainError(y, "`y` cannot be negative.")); end
     @noinline throw1(y) =
         throw(OverflowError("exponent $y is too large and computation will overflow"))
-    if x== 1; return x; end
-    if x==-1; return isodd(y) ? x : -x; end
     if y>typemax(Culong)
        x==0 && return x
 
@@ -663,11 +662,6 @@ end
 powermod(x::Integer, p::Integer, m::BigInt) = powermod(big(x), big(p), m)
 
 function gcdx(a::BigInt, b::BigInt)
-    if iszero(b) # shortcut this to ensure consistent results with gcdx(a,b)
-        return a < 0 ? (-a,-ONE,b) : (a,one(BigInt),b)
-        # we don't return the globals ONE and ZERO in case the user wants to
-        # mutate the result
-    end
     g, s, t = MPZ.gcdext(a, b)
     if t == 0
         # work around a difference in some versions of GMP
@@ -705,7 +699,7 @@ function prod(arr::AbstractArray{BigInt})
     foldl(MPZ.mul!, arr; init)
 end
 
-factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
+factorial(n::BigInt) = !isneg(n) ? MPZ.fac_ui(n) : throw(DomainError(n, "`n` must not be negative."))
 
 function binomial(n::BigInt, k::Integer)
     k < 0 && return BigInt(0)
@@ -759,13 +753,13 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
     iszero(n) && pad < 1 && return ""
     nd1 = ndigits(n, base=base)
     nd  = max(nd1, pad)
-    sv  = Base.StringVector(nd + isneg(n))
+    sv  = Base.StringMemory(nd + isneg(n))
     GC.@preserve sv MPZ.get_str!(pointer(sv) + nd - nd1, base, n)
     @inbounds for i = (1:nd-nd1) .+ isneg(n)
         sv[i] = '0' % UInt8
     end
     isneg(n) && (sv[1] = '-' % UInt8)
-    String(sv)
+    unsafe_takestring(sv)
 end
 
 function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer}
@@ -839,7 +833,12 @@ Base.add_with_overflow(a::BigInt, b::BigInt) = a + b, false
 Base.sub_with_overflow(a::BigInt, b::BigInt) = a - b, false
 Base.mul_with_overflow(a::BigInt, b::BigInt) = a * b, false
 
-Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)
+# checked_pow doesn't follow the same promotion rules as the others, above.
+Base.checked_pow(x::BigInt, p::Integer) = x^p
+Base.checked_pow(x::Integer, p::BigInt) = x^p
+Base.checked_pow(x::BigInt, p::BigInt) = x^p
+
+Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)::BigInt
 
 ## streamlined hashing for BigInt, by avoiding allocation from shifts ##
 
diff --git a/base/hamt.jl b/base/hamt.jl
new file mode 100644
index 0000000000000..e3e4b4bd03ba9
--- /dev/null
+++ b/base/hamt.jl
@@ -0,0 +1,277 @@
+module HashArrayMappedTries
+
+export HAMT
+
+##
+# Implements "Ideal Hash Trees" Phil Bagwell 2000
+#
+# Notable divergence is that we forgo a resizable root table.
+# Root tables improve lookup performance for large sizes, but
+# limit space efficiency if the HAMT is used for a persistent
+# dictionary, since each persistent operation would duplicate
+# the root table.
+#
+# We do not handle perfect hash-collision. We would need to
+# add an additional node type for Collisions. Perfect hash
+# collisions should not occur in practice since we perform
+# rehashing after using 55 bits (MAX_SHIFT) of the original hash.
+#
+# Use https://github.com/vchuravy/HashArrayMappedTries.jl if
+# you want to use this implementation in a package.
+#
+# A HAMT is formed by tree of levels, where at each level
+# we use a portion of the bits of the hash for indexing
+#
+# We use a branching width (ENTRY_COUNT) of 32, giving us
+# 5bits of indexing per level
+# 0000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000
+# L11  L10   L9    L8    L7    L6    L5    L4    L3    L2    L1    L0
+#
+# At each level we use a 32bit bitmap to store which elements are occupied.
+# Since our storage is "sparse" we need to map from index in [0,31] to
+# the actual storage index. We mask the bitmap with (1 << i) - 1 and count
+# the ones in the result. The number of set ones (+1) gives us the index
+# into the storage array.
+#
+# HAMT can be both persistent and non-persistent.
+# The `path` function searches for a matching entries, and for persistency
+# optionally copies the path so that it can be safely mutated.
+
+# TODO:
+# When `trie.data` becomes empty we could remove it from it's parent,
+# but we only know so fairly late. Maybe have a compact function?
+
+const ENTRY_COUNT = UInt(32)
+const BITMAP = UInt32
+const NBITS = sizeof(UInt) * 8
+# @assert ispow2(ENTRY_COUNT)
+const BITS_PER_LEVEL = trailing_zeros(ENTRY_COUNT)
+const LEVEL_MASK = (UInt(1) << BITS_PER_LEVEL) - UInt(1)
+const MAX_SHIFT = (NBITS ÷ BITS_PER_LEVEL - 1) *  BITS_PER_LEVEL
+
+mutable struct Leaf{K, V}
+    const key::K
+    const val::V
+end
+
+"""
+    HAMT{K,V}
+
+A HashArrayMappedTrie that optionally supports persistence.
+"""
+mutable struct HAMT{K, V}
+    const data::Vector{Union{Leaf{K, V}, HAMT{K, V}}}
+    bitmap::BITMAP
+    HAMT{K,V}(data, bitmap) where {K,V} = new{K,V}(data, bitmap)
+    HAMT{K, V}() where {K, V} = new{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 0), zero(BITMAP))
+end
+
+Base.@assume_effects :nothrow :effect_free function init_hamt(K, V, k, v)
+    # For a single element we can't have a 'hash-collision
+    trie = HAMT{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 1), zero(BITMAP))
+    trie.data[1] = Leaf{K,V}(k,v)
+    return trie
+end
+
+Base.@assume_effects :effect_free function HAMT{K,V}((k,v)::Pair{K,V}) where {K, V}
+    trie = init_hamt(K, V, k, v)
+    bi = BitmapIndex(HashState(k))
+    set!(trie, bi)
+    return trie
+end
+HAMT{K,V}(kv::Pair) where {K, V} = HAMT{K,V}(convert(Pair{K,V}, kv))
+
+HAMT(pair::Pair{K,V}) where {K, V} = HAMT{K,V}(pair)
+
+# TODO: Parameterize by hash function
+struct HashState{K}
+    key::K
+    hash::UInt
+    depth::Int
+    shift::Int
+end
+HashState(key) = HashState(key, objectid(key), 0, 0)
+# Reconstruct
+Base.@assume_effects :terminates_locally function HashState(other::HashState, key)
+    h = HashState(key)
+    while h.depth !== other.depth
+        h = next(h)
+    end
+    return h
+end
+
+function next(h::HashState)
+    depth = h.depth + 1
+    shift = h.shift + BITS_PER_LEVEL
+    # Assert disabled for effect precision
+    # @assert h.shift <= MAX_SHIFT
+    if shift > MAX_SHIFT
+        # Note we use `UInt(depth ÷ BITS_PER_LEVEL)` to seed the hash function
+        # the hash docs, do we need to hash `UInt(depth ÷ BITS_PER_LEVEL)` first?
+        h_hash = hash(objectid(h.key), UInt(depth ÷ BITS_PER_LEVEL))
+        shift = 0
+    else
+        h_hash = h.hash
+    end
+    return HashState(h.key, h_hash, depth, shift)
+end
+
+struct BitmapIndex
+    x::UInt
+end
+BitmapIndex(h::HashState) = BitmapIndex((h.hash >> h.shift) & LEVEL_MASK)
+
+Base.:(<<)(v, bi::BitmapIndex) = v << bi.x
+Base.:(>>)(v, bi::BitmapIndex) = v >> bi.x
+
+isset(trie::HAMT, bi::BitmapIndex) = isodd(trie.bitmap >> bi)
+function set!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap |= (UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function unset!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap &= ~(UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function entry_index(trie::HAMT, bi::BitmapIndex)
+    mask = (UInt32(1) << bi.x) - UInt32(1)
+    count_ones(trie.bitmap & mask) + 1
+end
+
+islevel_empty(trie::HAMT) = trie.bitmap == 0
+islevel_empty(::Leaf) = false
+
+"""
+    path(trie, h, copyf)::(found, present, trie, i, top, level)
+
+Internal function that walks a HAMT and finds the slot for hash.
+Returns if a value is `present` and a value is `found`.
+
+It returns the `trie` and the index `i` into `trie.data`, as well
+as the current `level`.
+
+If a copy function is provided `copyf` use the return `top` for the
+new persistent tree.
+"""
+@inline @Base.assume_effects :noub :terminates_locally function path(trie::HAMT{K,V}, key, h::HashState, copy=false) where {K, V}
+    if copy
+        trie = top = HAMT{K,V}(Base.copy(trie.data), trie.bitmap)
+    else
+        trie = top = trie
+    end
+    while true
+        bi = BitmapIndex(h)
+        i = entry_index(trie, bi)
+        if isset(trie, bi)
+            next = @inbounds trie.data[i]
+            if next isa Leaf{K,V}
+                # Check if key match if not we will need to grow.
+                found = next.key === h.key
+                return found, true, trie, i, bi, top, h
+            end
+            if copy
+                next = HAMT{K,V}(Base.copy(next.data), next.bitmap)
+                # :noub because entry_index is guaranteed to be inbounds for trie.data
+                @inbounds trie.data[i] = next
+            end
+            trie = next::HAMT{K,V}
+        else
+            # found empty slot
+            return true, false, trie, i, bi, top, h
+        end
+        h = HashArrayMappedTries.next(h)
+    end
+end
+
+"""
+Internal function that given an obtained path, either set the value
+or grows the HAMT by inserting a new trie instead.
+"""
+@inline @Base.assume_effects :terminates_locally function insert!(found, present, trie::HAMT{K,V}, i, bi, h, val) where {K,V}
+    if found # we found a slot, just set it to the new leaf
+        # replace or insert
+        if present # replace
+            @inbounds trie.data[i] = Leaf{K, V}(h.key, val)
+        else
+            Base.insert!(trie.data, i, Leaf{K, V}(h.key, val))
+        end
+        set!(trie, bi)
+    else
+        @assert present
+        # collision -> grow
+        leaf = @inbounds trie.data[i]::Leaf{K,V}
+        leaf_h = HashState(h, leaf.key)
+        if leaf_h.hash == h.hash
+            error("Perfect hash collision")
+        end
+        while true
+            new_trie = HAMT{K, V}()
+            if present
+                @inbounds trie.data[i] = new_trie
+            else
+                i = entry_index(trie, bi)
+                Base.insert!(trie.data, i, new_trie)
+            end
+            set!(trie, bi)
+
+            h = next(h)
+            leaf_h = next(leaf_h)
+            bi_new = BitmapIndex(h)
+            bi_old = BitmapIndex(leaf_h)
+            if bi_new == bi_old # collision in new trie -> retry
+                trie = new_trie
+                bi = bi_new
+                present = false
+                continue
+            end
+            i_new = entry_index(new_trie, bi_new)
+            Base.insert!(new_trie.data, i_new, Leaf{K, V}(h.key, val))
+            set!(new_trie, bi_new)
+
+            i_old = entry_index(new_trie, bi_old)
+            Base.insert!(new_trie.data, i_old, leaf)
+            set!(new_trie, bi_old)
+
+            break
+        end
+    end
+end
+
+length(::Leaf) = 1
+length(trie::HAMT) = sum((length(trie.data[i]) for i in eachindex(trie.data)), init=0)
+
+isempty(::Leaf) = false
+function isempty(trie::HAMT)
+    if islevel_empty(trie)
+        return true
+    end
+    return all(isempty(trie.data[i]) for i in eachindex(trie.data))
+end
+
+# DFS
+function iterate(trie::HAMT, state=nothing)
+    if state === nothing
+        state = (;parent=nothing, trie, i=1)
+    end
+    while state !== nothing
+        i = state.i
+        if i > Base.length(state.trie.data)
+            state = state.parent
+            continue
+        end
+        trie = state.trie.data[i]
+        state = (;parent=state.parent, trie=state.trie, i=i+1)
+        if trie isa Leaf
+            return (trie.key => trie.val, state)
+        else
+            # we found a new level
+            state = (;parent=state, trie, i=1)
+            continue
+        end
+    end
+    return nothing
+end
+
+end # module HashArrayMapTries
diff --git a/base/hashing.jl b/base/hashing.jl
index 5dbae09123bd6..d4a6217de6edb 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -11,9 +11,7 @@ optional second argument `h` is another hash code to be mixed with the result.
 New types should implement the 2-argument form, typically by calling the 2-argument `hash`
 method recursively in order to mix hashes of the contents with each other (and with `h`).
 Typically, any type that implements `hash` should also implement its own [`==`](@ref) (hence
-[`isequal`](@ref)) to guarantee the property mentioned above. Types supporting subtraction
-(operator `-`) should also implement [`widen`](@ref), which is required to hash
-values inside heterogeneous arrays.
+[`isequal`](@ref)) to guarantee the property mentioned above.
 
 The hash value may change when a new Julia process is started.
 
@@ -29,7 +27,9 @@ See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 """
 hash(x::Any) = hash(x, zero(UInt))
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
-hash(T::Type, h::UInt) = hash_uint(3h - ccall(:jl_type_hash, UInt, (Any,), T))
+
+# Types can't be deleted, so marking as total allows the compiler to look up the hash
+hash(T::Type, h::UInt) = hash_uint(3h - @assume_effects :total ccall(:jl_type_hash, UInt, (Any,), T))
 
 ## hashing general objects ##
 
diff --git a/base/iddict.jl b/base/iddict.jl
index 01ff213305d7b..f1632e93427a8 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -4,11 +4,12 @@
     IdDict([itr])
 
 `IdDict{K,V}()` constructs a hash table using [`objectid`](@ref) as hash and
-`===` as equality with keys of type `K` and values of type `V`.
+`===` as equality with keys of type `K` and values of type `V`. See [`Dict`](@ref)
+for further help and [`IdSet`](@ref) for the set version of this.
 
-See [`Dict`](@ref) for further help. In the example below, The `Dict`
-keys are all `isequal` and therefore get hashed the same, so they get overwritten.
-The `IdDict` hashes by object-id, and thus preserves the 3 different keys.
+In the example below, the `Dict` keys are all `isequal` and therefore get hashed
+the same, so they get overwritten. The `IdDict` hashes by object-id, and thus
+preserves the 3 different keys.
 
 # Examples
 ```julia-repl
@@ -24,10 +25,10 @@ IdDict{Any, String} with 3 entries:
 ```
 """
 mutable struct IdDict{K,V} <: AbstractDict{K,V}
-    ht::Vector{Any}
+    ht::Memory{Any}
     count::Int
     ndel::Int
-    IdDict{K,V}() where {K, V} = new{K,V}(Vector{Any}(undef, 32), 0, 0)
+    IdDict{K,V}() where {K, V} = new{K,V}(Memory{Any}(undef, 32), 0, 0)
 
     function IdDict{K,V}(itr) where {K, V}
         d = IdDict{K,V}()
@@ -53,23 +54,12 @@ IdDict(ps::Pair{K}...)             where {K}   = IdDict{K,Any}(ps)
 IdDict(ps::(Pair{K,V} where K)...) where {V}   = IdDict{Any,V}(ps)
 IdDict(ps::Pair...)                            = IdDict{Any,Any}(ps)
 
-function IdDict(kv)
-    try
-        dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
-    catch
-        if !applicable(iterate, kv) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError(
-                "IdDict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
+IdDict(kv) = dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
 
 empty(d::IdDict, ::Type{K}, ::Type{V}) where {K, V} = IdDict{K,V}()
 
 function rehash!(d::IdDict, newsz = length(d.ht)%UInt)
-    d.ht = ccall(:jl_idtable_rehash, Vector{Any}, (Any, Csize_t), d.ht, newsz)
+    d.ht = ccall(:jl_idtable_rehash, Memory{Any}, (Any, Csize_t), d.ht, newsz)
     d
 end
 
@@ -84,7 +74,7 @@ function sizehint!(d::IdDict, newsz)
 end
 
 function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where {K, V}
-    !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
+    !isa(key, K) && throw(KeyTypeError(K, key))
     if !(val isa V) # avoid a dynamic call
         val = convert(V, val)::V
     end
@@ -93,7 +83,7 @@ function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where
         d.ndel = 0
     end
     inserted = RefValue{Cint}(0)
-    d.ht = ccall(:jl_eqtable_put, Array{Any,1}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
+    d.ht = ccall(:jl_eqtable_put, Memory{Any}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
     d.count += inserted[]
     return d
 end
@@ -133,10 +123,10 @@ function delete!(d::IdDict{K}, @nospecialize(key)) where K
 end
 
 function empty!(d::IdDict)
-    resize!(d.ht, 32)
+    d.ht = Memory{Any}(undef, 32)
     ht = d.ht
     t = @_gc_preserve_begin ht
-    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht))
+    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht) % UInt)
     @_gc_preserve_end t
     d.ndel = 0
     d.count = 0
diff --git a/base/idset.jl b/base/idset.jl
index 0a4d4275b4231..c46d49968ff73 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -1,13 +1,36 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Like Set, but using IdDict
-mutable struct IdSet{T} <: AbstractSet{T}
-    dict::IdDict{T,Nothing}
+"""
+    IdSet{T}([itr])
+    IdSet()
 
-    IdSet{T}() where {T} = new(IdDict{T,Nothing}())
-    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.dict))
-end
+IdSet{T}() constructs a set (see [`Set`](@ref)) using
+`===` as equality with values of type `T`.
+
+In the example below, the values are all `isequal` so they get overwritten in the ordinary `Set`.
+The `IdSet` compares by `===` and so preserves the 3 different values.
+
+# Examples
+```jldoctest; filter = r"\\n\\s*(1|1\\.0|true)"
+julia> Set(Any[true, 1, 1.0])
+Set{Any} with 1 element:
+  1.0
 
+julia> IdSet{Any}(Any[true, 1, 1.0])
+IdSet{Any} with 3 elements:
+  1.0
+  1
+  true
+```
+"""
+mutable struct IdSet{K} <: AbstractSet{K}
+    list::Memory{Any}
+    idxs::Union{Memory{UInt8}, Memory{UInt16}, Memory{UInt32}}
+    count::Int
+    max::Int # n.b. always <= length(list)
+    IdSet{T}() where {T} = new(Memory{Any}(undef, 0), Memory{UInt8}(undef, 0), 0, 0)
+    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.list), copy(s.idxs), s.count, s.max)
+end
 IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
 IdSet() = IdSet{Any}()
 
@@ -15,22 +38,77 @@ copymutable(s::IdSet) = typeof(s)(s)
 emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
 copy(s::IdSet) = typeof(s)(s)
 
-isempty(s::IdSet) = isempty(s.dict)
-length(s::IdSet)  = length(s.dict)
-in(@nospecialize(x), s::IdSet) = haskey(s.dict, x)
-push!(s::IdSet, @nospecialize(x)) = (s.dict[x] = nothing; s)
-pop!(s::IdSet, @nospecialize(x)) = (pop!(s.dict, x); x)
-pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = (x in s ? pop!(s, x) : default)
-delete!(s::IdSet, @nospecialize(x)) = (delete!(s.dict, x); s)
+haskey(s::IdSet, @nospecialize(key)) = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, key) != -1
+isempty(s::IdSet) = s.count == 0
+length(s::IdSet)  = s.count
+in(@nospecialize(x), s::IdSet) = haskey(s, x)
+function push!(s::IdSet, @nospecialize(x))
+    idx = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if idx >= 0
+        s.list[idx + 1] = x
+    else
+        if s.max < length(s.list)
+            idx = s.max
+            @assert !isassigned(s.list, idx + 1)
+            s.list[idx + 1] = x
+            s.max = idx + 1
+        else
+            newidx = RefValue{Int}(0)
+            setfield!(s, :list, ccall(:jl_idset_put_key, Any, (Any, Any, Ptr{Int}), s.list, x, newidx))
+            idx = newidx[]
+            s.max = idx < 0 ? -idx : idx + 1
+        end
+        @assert s.list[s.max] === x
+        setfield!(s, :idxs, ccall(:jl_idset_put_idx, Any, (Any, Any, Int), s.list, s.idxs, idx))
+        s.count += 1
+    end
+    s
+end
+function _pop!(s::IdSet, @nospecialize(x))
+    removed = ccall(:jl_idset_pop, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if removed != -1
+        s.count -= 1
+        while s.max > 0 && !isassigned(s.list, s.max)
+            s.max -= 1
+        end
+    end
+    removed
+end
+pop!(s::IdSet, @nospecialize(x)) = _pop!(s, x) == -1 ? throw(KeyError(x)) : x
+pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = _pop!(s, x) == -1 ? default : x
+delete!(s::IdSet, @nospecialize(x)) = (_pop!(s, x); s)
+
+function sizehint!(s::IdSet, newsz)
+    # TODO: grow/compact list and perform rehash, if profitable?
+    # TODO: shrink?
+    # s.list = resize(s.list, newsz)
+    # newsz = _tablesz(newsz)
+    # oldsz = length(s.idxs)
+    # #grow at least 25%
+    # if newsz < (oldsz*5)>>2
+    #     return s
+    # end
+    # rehash!(s, newsz)
+    nothing
+end
 
-sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
-empty!(s::IdSet) = (empty!(s.dict); s)
+function empty!(s::IdSet)
+    fill!(s.idxs, 0x00)
+    list = s.list
+    for i = 1:s.max
+        _unsetindex!(list, i)
+    end
+    s.count = 0
+    s.max = 0
+    s
+end
 
 filter!(f, d::IdSet) = unsafe_filter!(f, d)
 
-function iterate(s::IdSet, state...)
-    y = iterate(s.dict, state...)
-    y === nothing && return nothing
-    ((k, _), i) = y
-    return (k, i)
+function iterate(s::IdSet{S}, state=0) where {S}
+    while true
+        state += 1
+        state > s.max && return nothing
+        isassigned(s.list, state) && return s.list[state]::S, state
+    end
 end
diff --git a/base/indices.jl b/base/indices.jl
index 15a2a2f3c0ac7..45f3495e51191 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -106,26 +106,49 @@ IndexStyle(::IndexStyle, ::IndexStyle) = IndexCartesian()
 
 promote_shape(::Tuple{}, ::Tuple{}) = ()
 
-function promote_shape(a::Tuple{Int,}, b::Tuple{Int,})
-    if a[1] != b[1]
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
+# Consistent error message for promote_shape mismatch, hiding type details like
+# OneTo. When b ≡ nothing, it is omitted; i can be supplied for an index.
+function throw_promote_shape_mismatch(a::Tuple, b::Union{Nothing,Tuple}, i = nothing)
+    if a isa Tuple{Vararg{Base.OneTo}} && (b === nothing || b isa Tuple{Vararg{Base.OneTo}})
+        a = map(lastindex, a)::Dims
+        b === nothing || (b = map(lastindex, b)::Dims)
+    end
+    _has_axes = !(a isa Dims && (b === nothing || b isa Dims))
+    if _has_axes
+        _normalize(d) = map(x -> firstindex(x):lastindex(x), d)
+        a = _normalize(a)
+        b === nothing || (b = _normalize(b))
+        _things = "axes "
+    else
+        _things = "size "
+    end
+    msg = IOBuffer()
+    print(msg, "a has ", _things)
+    print(msg, a)
+    if b ≢ nothing
+        print(msg, ", b has ", _things)
+        print(msg, b)
+    end
+    if i ≢ nothing
+        print(msg, ", mismatch at dim ", i)
     end
+    throw(DimensionMismatch(String(take!(msg))))
+end
+
+function promote_shape(a::Tuple{Int,}, b::Tuple{Int,})
+    a[1] != b[1] && throw_promote_shape_mismatch(a, b)
     return a
 end
 
 function promote_shape(a::Tuple{Int,Int}, b::Tuple{Int,})
-    if a[1] != b[1] || a[2] != 1
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
-    end
+    (a[1] != b[1] || a[2] != 1) && throw_promote_shape_mismatch(a, b)
     return a
 end
 
 promote_shape(a::Tuple{Int,}, b::Tuple{Int,Int}) = promote_shape(b, a)
 
 function promote_shape(a::Tuple{Int, Int}, b::Tuple{Int, Int})
-    if a[1] != b[1] || a[2] != b[2]
-        throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b"))
-    end
+    (a[1] != b[1] || a[2] != b[2]) && throw_promote_shape_mismatch(a, b)
     return a
 end
 
@@ -153,14 +176,10 @@ function promote_shape(a::Dims, b::Dims)
         return promote_shape(b, a)
     end
     for i=1:length(b)
-        if a[i] != b[i]
-            throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b, mismatch at $i"))
-        end
+        a[i] != b[i] && throw_promote_shape_mismatch(a, b, i)
     end
     for i=length(b)+1:length(a)
-        if a[i] != 1
-            throw(DimensionMismatch("dimensions must match: a has dims $a, must have singleton at dim $i"))
-        end
+        a[i] != 1 && throw_promote_shape_mismatch(a, nothing, i)
     end
     return a
 end
@@ -174,14 +193,10 @@ function promote_shape(a::Indices, b::Indices)
         return promote_shape(b, a)
     end
     for i=1:length(b)
-        if a[i] != b[i]
-            throw(DimensionMismatch("dimensions must match: a has dims $a, b has dims $b, mismatch at $i"))
-        end
+        a[i] != b[i] && throw_promote_shape_mismatch(a, b, i)
     end
     for i=length(b)+1:length(a)
-        if a[i] != 1:1
-            throw(DimensionMismatch("dimensions must match: a has dims $a, must have singleton at dim $i"))
-        end
+        a[i] != 1:1 && throw_promote_shape_mismatch(a, nothing, i)
     end
     return a
 end
@@ -295,9 +310,9 @@ to_index(I::AbstractArray{Bool}) = LogicalIndex(I)
 to_index(I::AbstractArray) = I
 to_index(I::AbstractArray{Union{}}) = I
 to_index(I::AbstractArray{<:Union{AbstractArray, Colon}}) =
-    throw(ArgumentError("invalid index: $(limitrepr(I)) of type $(typeof(I))"))
+    throw(ArgumentError(LazyString("invalid index: ", limitrepr(I), " of type ", typeof(I))))
 to_index(::Colon) = throw(ArgumentError("colons must be converted by to_indices(...)"))
-to_index(i) = throw(ArgumentError("invalid index: $(limitrepr(i)) of type $(typeof(i))"))
+to_index(i) = throw(ArgumentError(LazyString("invalid index: ", limitrepr(i), " of type ", typeof(i))))
 
 # The general to_indices is mostly defined in multidimensional.jl, but this
 # definition is required for bootstrap:
@@ -349,15 +364,8 @@ to_indices(A, I::Tuple{}) = ()
 to_indices(A, I::Tuple{Vararg{Int}}) = I
 to_indices(A, I::Tuple{Vararg{Integer}}) = (@inline; to_indices(A, (), I))
 to_indices(A, inds, ::Tuple{}) = ()
-function to_indices(A, inds, I::Tuple{Any, Vararg{Any}})
-    @inline
-    head = _to_indices1(A, inds, I[1])
-    rest = to_indices(A, _cutdim(inds, I[1]), tail(I))
-    (head..., rest...)
-end
-
-_to_indices1(A, inds, I1) = (to_index(A, I1),)
-_cutdim(inds, I1) = safe_tail(inds)
+to_indices(A, inds, I::Tuple{Any, Vararg}) =
+    (@inline; (to_index(A, I[1]), to_indices(A, safe_tail(inds), tail(I))...))
 
 """
     Slice(indices)
@@ -415,15 +423,57 @@ first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
+getindex(S::IdentityUnitRange, i::Integer) = (@inline; @boundscheck checkbounds(S, i); convert(eltype(S), i))
+getindex(S::IdentityUnitRange, i::Bool) = throw(ArgumentError("invalid index: $i of type Bool"))
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractUnitRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::AbstractUnitRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i))
+end
+function getindex(S::IdentityUnitRange, i::StepRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return convert(AbstractRange{eltype(S)}, i)
+end
+function getindex(S::IdentityUnitRange, i::StepRange{Bool})
+    @inline
+    @boundscheck checkbounds(S, i)
+    range(first(i) ? first(S) : last(S), length = last(i), step = Int(step(i)))
+end
+# Indexing with offset ranges should preserve the axes of the indices
+# however, this is only really possible in general with OffsetArrays.
+# In some cases, though, we may obtain correct results using Base ranges
+# the following methods are added to allow OffsetArrays to dispatch on the first argument without ambiguities
+function getindex(S::IdentityUnitRange{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
+function getindex(S::Slice{<:AbstractUnitRange{<:Integer}},
+                    i::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, i)
+    return i
+end
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
 # For OneTo, the values and indices of the values are identical, so this may be defined in Base.
 # In general such an indexing operation would produce offset ranges
-getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
+# This should also ideally return an AbstractUnitRange{eltype(S)}, but currently
+# we're restricted to eltype(::IdentityUnitRange) == Int by definition
+function getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}})
+    @inline
+    @boundscheck checkbounds(S, I)
+    return I
+end
 
 """
     LinearIndices(A::AbstractArray)
@@ -456,7 +506,7 @@ julia> extrema(b)
 
 Return a `LinearIndices` array with the specified shape or [`axes`](@ref).
 
-# Example
+# Examples
 
 The main purpose of this constructor is intuitive conversion
 from cartesian to linear indexing:
@@ -485,7 +535,7 @@ LinearIndices(inds::NTuple{N,Union{<:Integer,AbstractUnitRange{<:Integer}}}) whe
     LinearIndices(map(_convert2ind, inds))
 LinearIndices(A::Union{AbstractArray,SimpleVector}) = LinearIndices(axes(A))
 
-_convert2ind(i::Integer) = Base.OneTo(i)
+_convert2ind(i::Integer) = oneto(i)
 _convert2ind(ind::AbstractUnitRange) = first(ind):last(ind)
 
 function indices_promote_type(::Type{Tuple{R1,Vararg{R1,N}}}, ::Type{Tuple{R2,Vararg{R2,N}}}) where {R1,R2,N}
@@ -515,6 +565,7 @@ function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
+copy(iter::LinearIndices) = iter
 # More efficient iteration — predominantly for non-vector LinearIndices
 # but one-dimensional LinearIndices must be special-cased to support OffsetArrays
 iterate(iter::LinearIndices{1}, s...) = iterate(axes1(iter.indices[1]), s...)
@@ -525,3 +576,7 @@ first(iter::LinearIndices) = 1
 first(iter::LinearIndices{1}) = (@inline; first(axes1(iter.indices[1])))
 last(iter::LinearIndices) = (@inline; length(iter))
 last(iter::LinearIndices{1}) = (@inline; last(axes1(iter.indices[1])))
+
+function show(io::IO, iter::LinearIndices)
+    print(io, "LinearIndices(", iter.indices, ")")
+end
diff --git a/base/initdefs.jl b/base/initdefs.jl
index ed0aa3856f339..f7693813239c6 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -9,7 +9,7 @@ A string containing the script name passed to Julia from the command line. Note
 script name remains unchanged from within included files. Alternatively see
 [`@__FILE__`](@ref).
 """
-global PROGRAM_FILE = ""
+global PROGRAM_FILE::String = ""
 
 """
     ARGS
@@ -25,19 +25,19 @@ Stop the program with an exit code. The default exit code is zero, indicating th
 program completed successfully. In an interactive session, `exit()` can be called with
 the keyboard shortcut `^D`.
 """
-exit(n) = ccall(:jl_exit, Cvoid, (Int32,), n)
+exit(n) = ccall(:jl_exit, Union{}, (Int32,), n)
 exit() = exit(0)
 
 const roottask = current_task()
 
-is_interactive = false
+is_interactive::Bool = false
 
 """
     isinteractive() -> Bool
 
 Determine whether Julia is running an interactive session.
 """
-isinteractive() = (is_interactive::Bool)
+isinteractive() = is_interactive
 
 ## package depots (registries, packages, environments) ##
 
@@ -73,22 +73,28 @@ environment variable if set.
 Each entry in `DEPOT_PATH` is a path to a directory which contains subdirectories used by Julia for various purposes.
 Here is an overview of some of the subdirectories that may exist in a depot:
 
+* `artifacts`: Contains content that packages use for which Pkg manages the installation of.
 * `clones`: Contains full clones of package repos. Maintained by `Pkg.jl` and used as a cache.
+* `config`: Contains julia-level configuration such as a `startup.jl`.
 * `compiled`: Contains precompiled `*.ji` files for packages. Maintained by Julia.
 * `dev`: Default directory for `Pkg.develop`. Maintained by `Pkg.jl` and the user.
 * `environments`: Default package environments. For instance the global environment for a specific julia version. Maintained by `Pkg.jl`.
-* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and `Julia`.
+* `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and Julia.
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
+* `scratchspaces`: Contains content that a package itself installs via the [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) package. `Pkg.gc()` will delete content that is known to be unused.
+
+!!! note
+    Packages that want to store content should use the `scratchspaces` subdirectory via
+    [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) instead of creating new
+    subdirectories in the depot root.
 
 See also [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
 [Code Loading](@ref code-loading).
 """
 const DEPOT_PATH = String[]
 
-function append_default_depot_path!(DEPOT_PATH)
-    path = joinpath(homedir(), ".julia")
-    path in DEPOT_PATH || push!(DEPOT_PATH, path)
+function append_bundled_depot_path!(DEPOT_PATH)
     path = abspath(Sys.BINDIR, "..", "local", "share", "julia")
     path in DEPOT_PATH || push!(DEPOT_PATH, path)
     path = abspath(Sys.BINDIR, "..", "share", "julia")
@@ -100,17 +106,34 @@ function init_depot_path()
     empty!(DEPOT_PATH)
     if haskey(ENV, "JULIA_DEPOT_PATH")
         str = ENV["JULIA_DEPOT_PATH"]
+
+        # explicitly setting JULIA_DEPOT_PATH to the empty string means using no depot
         isempty(str) && return
-        for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
+
+        # otherwise, populate the depot path with the entries in JULIA_DEPOT_PATH,
+        # expanding empty strings to the bundled depot
+        pushfirst_default = true
+        for (i, path) in enumerate(eachsplit(str, Sys.iswindows() ? ';' : ':'))
             if isempty(path)
-                append_default_depot_path!(DEPOT_PATH)
+                append_bundled_depot_path!(DEPOT_PATH)
             else
                 path = expanduser(path)
                 path in DEPOT_PATH || push!(DEPOT_PATH, path)
+                if i == 1
+                    # if a first entry is given, don't add the default depot at the start
+                    pushfirst_default = false
+                end
             end
         end
+
+        # backwards compatibility: if JULIA_DEPOT_PATH only contains empty entries
+        # (e.g., JULIA_DEPOT_PATH=':'), make sure to use the default depot
+        if pushfirst_default
+            pushfirst!(DEPOT_PATH, joinpath(homedir(), ".julia"))
+        end
     else
-        append_default_depot_path!(DEPOT_PATH)
+        push!(DEPOT_PATH, joinpath(homedir(), ".julia"))
+        append_bundled_depot_path!(DEPOT_PATH)
     end
     nothing
 end
@@ -225,8 +248,14 @@ function init_load_path()
     if haskey(ENV, "JULIA_LOAD_PATH")
         paths = parse_load_path(ENV["JULIA_LOAD_PATH"])
     else
-        paths = filter!(env -> env !== nothing,
-            String[env == "@." ? current_project() : env for env in DEFAULT_LOAD_PATH])
+        paths = String[]
+        for env in DEFAULT_LOAD_PATH
+            if env == "@."
+                env = current_project()
+                env === nothing && continue
+            end
+            push!(paths, env)
+        end
     end
     append!(empty!(LOAD_PATH), paths)
 end
@@ -243,15 +272,35 @@ function init_active_project()
 end
 
 ## load path expansion: turn LOAD_PATH entries into concrete paths ##
+cmd_suppresses_program(cmd) = cmd in ('e', 'E')
 
 function load_path_expand(env::AbstractString)::Union{String, Nothing}
     # named environment?
     if startswith(env, '@')
-        # `@` in JULIA_LOAD_PATH is expanded early (at startup time)
-        # if you put a `@` in LOAD_PATH manually, it's expanded late
+        # `@.` in JULIA_LOAD_PATH is expanded early (at startup time)
+        # if you put a `@.` in LOAD_PATH manually, it's expanded late
         env == "@" && return active_project(false)
         env == "@." && return current_project()
+        env == "@temp" && return mktempdir()
         env == "@stdlib" && return Sys.STDLIB
+        if startswith(env, "@script")
+            if @isdefined(PROGRAM_FILE)
+                dir = dirname(PROGRAM_FILE)
+            else
+                cmds = unsafe_load_commands(JLOptions().commands)
+                if any(cmd::Pair{Char, String}->cmd_suppresses_program(first(cmd)), cmds)
+                    # Usage error. The user did not pass a script.
+                    return nothing
+                end
+                dir = dirname(ARGS[1])
+            end
+            if env == "@script"  # complete match, not startswith, so search upwards
+                return current_project(dir)
+            else
+                # starts with, so assume relative path is after
+                return abspath(replace(env, "@script" => dir))
+            end
+        end
         env = replace(env, '#' => VERSION.major, count=1)
         env = replace(env, '#' => VERSION.minor, count=1)
         env = replace(env, '#' => VERSION.patch, count=1)
@@ -354,9 +403,7 @@ end
 
 ## atexit: register exit hooks ##
 
-const atexit_hooks = Callable[
-    () -> Filesystem.temp_cleanup_purge(force=true)
-]
+const atexit_hooks = Callable[]
 const _atexit_hooks_lock = ReentrantLock()
 global _atexit_hooks_finished::Bool = false
 
@@ -394,13 +441,18 @@ function atexit(f::Function)
 end
 
 function _atexit(exitcode::Cint)
+    # this current task shouldn't be scheduled anywhere, but if it was (because
+    # this exit came from a signal for example), then try to clear that state
+    # to minimize scheduler issues later
+    ct = current_task()
+    q = ct.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
     # Don't hold the lock around the iteration, just in case any other thread executing in
     # parallel tries to register a new atexit hook while this is running. We don't want to
     # block that thread from proceeding, and we can allow it to register its hook which we
     # will immediately run here.
     while true
         local f
-        Base.@lock _atexit_hooks_lock begin
+        @lock _atexit_hooks_lock begin
             # If this is the last iteration, atomically disable atexit hooks to prevent
             # someone from registering a hook that will never be run.
             # (We do this inside the loop, so that it is atomic: no one can have registered
@@ -421,7 +473,7 @@ function _atexit(exitcode::Cint)
             end
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
@@ -441,7 +493,7 @@ function _postoutput()
             f()
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
@@ -449,7 +501,7 @@ end
 
 ## hook for disabling threaded libraries ##
 
-library_threading_enabled = true
+library_threading_enabled::Bool = true
 const disable_library_threading_hooks = []
 
 function at_disable_library_threading(f)
diff --git a/base/int.jl b/base/int.jl
index 4b2f542bba788..8a80f90f7e2c1 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -587,37 +587,32 @@ julia> bitstring(bitrotate(0b01110010, 8))
 bitrotate(x::T, k::Integer) where {T <: BitInteger} =
     (x << ((sizeof(T) << 3 - 1) & k)) | (x >>> ((sizeof(T) << 3 - 1) & -k))
 
-# @doc isn't available when running in Core at this point.
-# Tuple syntax for documentation two function signatures at the same time
-# doesn't work either at this point.
-if nameof(@__MODULE__) === :Base
-    for fname in (:mod, :rem)
-        @eval @doc """
-            rem(x::Integer, T::Type{<:Integer}) -> T
-            mod(x::Integer, T::Type{<:Integer}) -> T
-            %(x::Integer, T::Type{<:Integer}) -> T
-
-        Find `y::T` such that `x` ≡ `y` (mod n), where n is the number of integers representable
-        in `T`, and `y` is an integer in `[typemin(T),typemax(T)]`.
-        If `T` can represent any integer (e.g. `T == BigInt`), then this operation corresponds to
-        a conversion to `T`.
-
-        # Examples
-        ```jldoctest
-        julia> x = 129 % Int8
-        -127
-
-        julia> typeof(x)
-        Int8
-
-        julia> x = 129 % BigInt
-        129
-
-        julia> typeof(x)
-        BigInt
-        ```
-        """ $fname(x::Integer, T::Type{<:Integer})
-    end
+for fname in (:mod, :rem)
+    @eval @doc """
+        rem(x::Integer, T::Type{<:Integer}) -> T
+        mod(x::Integer, T::Type{<:Integer}) -> T
+        %(x::Integer, T::Type{<:Integer}) -> T
+
+    Find `y::T` such that `x` ≡ `y` (mod n), where n is the number of integers representable
+    in `T`, and `y` is an integer in `[typemin(T),typemax(T)]`.
+    If `T` can represent any integer (e.g. `T == BigInt`), then this operation corresponds to
+    a conversion to `T`.
+
+    # Examples
+    ```jldoctest
+    julia> x = 129 % Int8
+    -127
+
+    julia> typeof(x)
+    Int8
+
+    julia> x = 129 % BigInt
+    129
+
+    julia> typeof(x)
+    BigInt
+    ```
+    """ $fname(x::Integer, T::Type{<:Integer})
 end
 
 rem(x::T, ::Type{T}) where {T<:Integer} = x
@@ -629,70 +624,6 @@ mod(x::Integer, ::Type{T}) where {T<:Integer} = rem(x, T)
 
 unsafe_trunc(::Type{T}, x::Integer) where {T<:Integer} = rem(x, T)
 
-"""
-    trunc([T,] x)
-    trunc(x; digits::Integer= [, base = 10])
-    trunc(x; sigdigits::Integer= [, base = 10])
-
-`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
-is less than or equal to the absolute value of `x`.
-
-`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-
-See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> trunc(2.22)
-2.0
-
-julia> trunc(-2.22, digits=1)
--2.2
-
-julia> trunc(Int, -2.22)
--2
-```
-"""
-function trunc end
-
-"""
-    floor([T,] x)
-    floor(x; digits::Integer= [, base = 10])
-    floor(x; sigdigits::Integer= [, base = 10])
-
-`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
-equal to `x`.
-
-`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function floor end
-
-"""
-    ceil([T,] x)
-    ceil(x; digits::Integer= [, base = 10])
-    ceil(x; sigdigits::Integer= [, base = 10])
-
-`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
-equal to `x`.
-
-`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is not
-representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function ceil end
-
-round(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-trunc(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-floor(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
- ceil(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-
 ## integer construction ##
 
 """
@@ -754,6 +685,15 @@ julia> big"_"
 ERROR: ArgumentError: invalid number format _ for BigInt or BigFloat
 [...]
 ```
+
+!!! warning
+    Using `@big_str` for constructing [`BigFloat`](@ref) values may not result
+    in the behavior that might be naively expected: as a macro, `@big_str`
+    obeys the global precision ([`setprecision`](@ref)) and rounding mode
+    ([`setrounding`](@ref)) settings as they are at *load time*. Thus, a
+    function like `() -> precision(big"0.3")` returns a constant whose value
+    depends on the value of the precision at the point when the function is
+    defined, **not** at the precision at the time when the function is called.
 """
 macro big_str(s)
     message = "invalid number format $s for BigInt or BigFloat"
@@ -898,166 +838,14 @@ widemul(x::Bool,y::Number) = x * y
 widemul(x::Number,y::Bool) = x * y
 
 
-## wide multiplication, Int128 multiply and divide ##
-
-if Core.sizeof(Int) == 4
-    function widemul(u::Int64, v::Int64)
-        local u0::UInt64, v0::UInt64, w0::UInt64
-        local u1::Int64, v1::Int64, w1::UInt64, w2::Int64, t::UInt64
-
-        u0 = u & 0xffffffff; u1 = u >> 32
-        v0 = v & 0xffffffff; v1 = v >> 32
-        w0 = u0 * v0
-        t = reinterpret(UInt64, u1) * v0 + (w0 >>> 32)
-        w2 = reinterpret(Int64, t) >> 32
-        w1 = u0 * reinterpret(UInt64, v1) + (t & 0xffffffff)
-        hi = u1 * v1 + w2 + (reinterpret(Int64, w1) >> 32)
-        lo = w0 & 0xffffffff + (w1 << 32)
-        return Int128(hi) << 64 + Int128(lo)
-    end
-
-    function widemul(u::UInt64, v::UInt64)
-        local u0::UInt64, v0::UInt64, w0::UInt64
-        local u1::UInt64, v1::UInt64, w1::UInt64, w2::UInt64, t::UInt64
-
-        u0 = u & 0xffffffff; u1 = u >>> 32
-        v0 = v & 0xffffffff; v1 = v >>> 32
-        w0 = u0 * v0
-        t = u1 * v0 + (w0 >>> 32)
-        w2 = t >>> 32
-        w1 = u0 * v1 + (t & 0xffffffff)
-        hi = u1 * v1 + w2 + (w1 >>> 32)
-        lo = w0 & 0xffffffff + (w1 << 32)
-        return UInt128(hi) << 64 + UInt128(lo)
-    end
-
-    function *(u::Int128, v::Int128)
-        u0 = u % UInt64; u1 = Int64(u >> 64)
-        v0 = v % UInt64; v1 = Int64(v >> 64)
-        lolo = widemul(u0, v0)
-        lohi = widemul(reinterpret(Int64, u0), v1)
-        hilo = widemul(u1, reinterpret(Int64, v0))
-        t = reinterpret(UInt128, hilo) + (lolo >>> 64)
-        w1 = reinterpret(UInt128, lohi) + (t & 0xffffffffffffffff)
-        return Int128(lolo & 0xffffffffffffffff) + reinterpret(Int128, w1) << 64
-    end
-
-    function *(u::UInt128, v::UInt128)
-        u0 = u % UInt64; u1 = UInt64(u>>>64)
-        v0 = v % UInt64; v1 = UInt64(v>>>64)
-        lolo = widemul(u0, v0)
-        lohi = widemul(u0, v1)
-        hilo = widemul(u1, v0)
-        t = hilo + (lolo >>> 64)
-        w1 = lohi + (t & 0xffffffffffffffff)
-        return (lolo & 0xffffffffffffffff) + UInt128(w1) << 64
-    end
-
-    function _setbit(x::UInt128, i)
-        # faster version of `return x | (UInt128(1) << i)`
-        j = i >> 5
-        y = UInt128(one(UInt32) << (i & 0x1f))
-        if j == 0
-            return x | y
-        elseif j == 1
-            return x | (y << 32)
-        elseif j == 2
-            return x | (y << 64)
-        elseif j == 3
-            return x | (y << 96)
-        end
-        return x
-    end
+# Int128 multiply and divide
+*(x::T, y::T) where {T<:Union{Int128,UInt128}}  = mul_int(x, y)
 
-    function divrem(x::UInt128, y::UInt128)
-        iszero(y) && throw(DivideError())
-        if (x >> 64) % UInt64 == 0
-            if (y >> 64) % UInt64 == 0
-                # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
-                q64, x64 = divrem(x % UInt64, y % UInt64)
-                return UInt128(q64), UInt128(x64)
-            else
-                # this implies y>x, so
-                return zero(UInt128), x
-            end
-        end
-        n = leading_zeros(y) - leading_zeros(x)
-        q = zero(UInt128)
-        ys = y << n
-        while n >= 0
-            # ys == y * 2^n
-            if ys <= x
-                x -= ys
-                q = _setbit(q, n)
-                if (x >> 64) % UInt64 == 0
-                    # exit early, similar to above fast path
-                    if (y >> 64) % UInt64 == 0
-                        q64, x64 = divrem(x % UInt64, y % UInt64)
-                        q |= q64
-                        x = UInt128(x64)
-                    end
-                    return q, x
-                end
-            end
-            ys >>>= 1
-            n -= 1
-        end
-        return q, x
-    end
+div(x::Int128,  y::Int128)  = checked_sdiv_int(x, y)
+div(x::UInt128, y::UInt128) = checked_udiv_int(x, y)
 
-    function div(x::Int128, y::Int128)
-        (x == typemin(Int128)) & (y == -1) && throw(DivideError())
-        return Int128(div(BigInt(x), BigInt(y)))::Int128
-    end
-    div(x::UInt128, y::UInt128) = divrem(x, y)[1]
-
-    function rem(x::Int128, y::Int128)
-        return Int128(rem(BigInt(x), BigInt(y)))::Int128
-    end
-
-    function rem(x::UInt128, y::UInt128)
-        iszero(y) && throw(DivideError())
-        if (x >> 64) % UInt64 == 0
-            if (y >> 64) % UInt64 == 0
-                # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
-                return UInt128(rem(x % UInt64, y % UInt64))
-            else
-                # this implies y>x, so
-                return x
-            end
-        end
-        n = leading_zeros(y) - leading_zeros(x)
-        ys = y << n
-        while n >= 0
-            # ys == y * 2^n
-            if ys <= x
-                x -= ys
-                if (x >> 64) % UInt64 == 0
-                    # exit early, similar to above fast path
-                    if (y >> 64) % UInt64 == 0
-                        x = UInt128(rem(x % UInt64, y % UInt64))
-                    end
-                    return x
-                end
-            end
-            ys >>>= 1
-            n -= 1
-        end
-        return x
-    end
-
-    function mod(x::Int128, y::Int128)
-        return Int128(mod(BigInt(x), BigInt(y)))::Int128
-    end
-else
-    *(x::T, y::T) where {T<:Union{Int128,UInt128}}  = mul_int(x, y)
-
-    div(x::Int128,  y::Int128)  = checked_sdiv_int(x, y)
-    div(x::UInt128, y::UInt128) = checked_udiv_int(x, y)
-
-    rem(x::Int128,  y::Int128)  = checked_srem_int(x, y)
-    rem(x::UInt128, y::UInt128) = checked_urem_int(x, y)
-end
+rem(x::Int128,  y::Int128)  = checked_srem_int(x, y)
+rem(x::UInt128, y::UInt128) = checked_urem_int(x, y)
 
 # issue #15489: since integer ops are unchecked, they shouldn't check promotion
 for op in (:+, :-, :*, :&, :|, :xor)
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 1b007700f4331..dc81f2bd3e489 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -97,6 +97,9 @@ end
 Least common (positive) multiple (or zero if any argument is zero).
 The arguments may be integer and rational numbers.
 
+``a`` is a multiple of ``b`` if there exists an integer ``m`` such
+that ``a=mb``.
+
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
@@ -150,7 +153,16 @@ gcd(a::T, b::T) where T<:Real = throw(MethodError(gcd, (a,b)))
 lcm(a::T, b::T) where T<:Real = throw(MethodError(lcm, (a,b)))
 
 gcd(abc::AbstractArray{<:Real}) = reduce(gcd, abc; init=zero(eltype(abc)))
-lcm(abc::AbstractArray{<:Real}) = reduce(lcm, abc; init=one(eltype(abc)))
+function lcm(abc::AbstractArray{<:Real})
+    # Using reduce with init=one(eltype(abc)) is buggy for Rationals.
+    l = length(abc)
+    if l == 0
+        eltype(abc) <: Integer && return one(eltype(abc))
+        throw(ArgumentError("lcm has no identity for $(eltype(abc))"))
+    end
+    l == 1 && return abs(only(abc))
+    return reduce(lcm, abc)
+end
 
 function gcd(abc::AbstractArray{<:Integer})
     a = zero(eltype(abc))
@@ -165,17 +177,24 @@ end
 
 # return (gcd(a, b), x, y) such that ax+by == gcd(a, b)
 """
-    gcdx(a, b)
+    gcdx(a, b...)
 
 Computes the greatest common (positive) divisor of `a` and `b` and their Bézout
 coefficients, i.e. the integer coefficients `u` and `v` that satisfy
-``ua+vb = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+``u*a + v*b = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+
+For more arguments than two, i.e., `gcdx(a, b, c, ...)` the Bézout coefficients are computed
+recursively, returning a solution `(d, u, v, w, ...)` to
+``u*a + v*b + w*c + ... = d = gcd(a, b, c, ...)``.
 
 The arguments may be integer and rational numbers.
 
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
+!!! compat "Julia 1.12"
+    More or fewer arguments than two require Julia 1.12 or later.
+
 # Examples
 ```jldoctest
 julia> gcdx(12, 42)
@@ -183,6 +202,9 @@ julia> gcdx(12, 42)
 
 julia> gcdx(240, 46)
 (2, -9, 47)
+
+julia> gcdx(15, 12, 20)
+(1, 7, -7, -1)
 ```
 
 !!! note
@@ -198,6 +220,7 @@ julia> gcdx(240, 46)
 """
 Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
     T = promote_type(typeof(a), typeof(b))
+    a == b == 0 && return (zero(T), zero(T), zero(T))
     # a0, b0 = a, b
     s0, s1 = oneunit(T), zero(T)
     t0, t1 = s1, s0
@@ -214,11 +237,23 @@ Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
 end
 gcdx(a::Real, b::Real) = gcdx(promote(a,b)...)
 gcdx(a::T, b::T) where T<:Real = throw(MethodError(gcdx, (a,b)))
+gcdx(a::Real) = (gcd(a), signbit(a) ? -one(a) : one(a))
+function gcdx(a::Real, b::Real, cs::Real...)
+    # a solution to the 3-arg `gcdx(a,b,c)` problem, `u*a + v*b + w*c = gcd(a,b,c)`, can be
+    # obtained from the 2-arg problem in three steps:
+    #   1. `gcdx(a,b)`: solve `i*a + j*b = d′ = gcd(a,b)` for `(i,j)`
+    #   2. `gcdx(d′,c)`: solve `x*gcd(a,b) + yc = gcd(gcd(a,b),c) = gcd(a,b,c)` for `(x,y)`
+    #   3. return `d = gcd(a,b,c)`, `u = i*x`, `v = j*x`, and `w = y`
+    # the N-arg solution proceeds similarly by recursion
+    d, i, j = gcdx(a, b)
+    d′, x, ys... = gcdx(d, cs...)
+    return d′, i*x, j*x, ys...
+end
 
 # multiplicative inverse of n mod m, error if none
 
 """
-    invmod(n, m)
+    invmod(n::Integer, m::Integer)
 
 Take the inverse of `n` modulo `m`: `y` such that ``n y = 1 \\pmod m``,
 and ``div(y,m) = 0``. This will throw an error if ``m = 0``, or if
@@ -257,29 +292,73 @@ function invmod(n::Integer, m::Integer)
     return mod(x, m)
 end
 
+"""
+    invmod(n::Integer, T) where {T <: Base.BitInteger}
+    invmod(n::T) where {T <: Base.BitInteger}
+
+Compute the modular inverse of `n` in the integer ring of type `T`, i.e. modulo
+`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words, these
+methods satisfy the following identities:
+```
+n * invmod(n) == 1
+(n * invmod(n, T)) % T == 1
+(n % T) * invmod(n, T) == 1
+```
+Note that `*` here is modular multiplication in the integer ring, `T`.  This will
+throw an error if `n` is even, because then it is not relatively prime with `2^N`
+and thus has no such inverse.
+
+Specifying the modulus implied by an integer type as an explicit value is often
+inconvenient since the modulus is by definition too big to be represented by the
+type.
+
+The modular inverse is computed much more efficiently than the general case
+using the algorithm described in https://arxiv.org/pdf/2204.04342.pdf.
+
+!!! compat "Julia 1.11"
+    The `invmod(n)` and `invmod(n, T)` methods require Julia 1.11 or later.
+"""
+invmod(n::Integer, ::Type{T}) where {T<:BitInteger} = invmod(n % T)
+
+function invmod(n::T) where {T<:BitInteger}
+    isodd(n) || throw(DomainError(n, "Argument must be odd."))
+    x = (3*n ⊻ 2) % T
+    y = (1 - n*x) % T
+    for _ = 1:trailing_zeros(2*sizeof(T))
+        x *= y + true
+        y *= y
+    end
+    return x
+end
+
 # ^ for any x supporting *
-to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
+function to_power_type(x::Number)
+    T = promote_type(typeof(x), typeof(one(x)), typeof(x*x))
+    convert(T, x)
+end
+to_power_type(x) = oftype(x*x, x)
 @noinline throw_domerr_powbysq(::Any, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nConvert input to float.")))
 @noinline throw_domerr_powbysq(::Integer, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nMake x or ", p, " a float by adding a zero decimal ",
-    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ")",
+    "(e.g., 2.0^", p, " or 2^", float(p), " instead of 2^", p, ") ",
     "or write 1/x^", -p, ", float(x)^", p, ", x^float(", p, ") or (x//1)^", p, ".")))
 @noinline throw_domerr_powbysq(::AbstractMatrix, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer matrix x to a negative power ", p, ".",
     "\nMake x a float matrix by adding a zero decimal ",
-    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ")",
+    "(e.g., [2.0 1.0;1.0 0.0]^", p, " instead of [2 1;1 0]^", p, ") ",
     "or write float(x)^", p, " or Rational.(x)^", p, ".")))
-@assume_effects :terminates_locally function power_by_squaring(x_, p::Integer)
+# The * keyword supports `*=checked_mul` for `checked_pow`
+@assume_effects :terminates_locally function power_by_squaring(x_, p::Integer; mul=*)
     x = to_power_type(x_)
     if p == 1
         return copy(x)
     elseif p == 0
         return one(x)
     elseif p == 2
-        return x*x
+        return mul(x, x)
     elseif p < 0
         isone(x) && return copy(x)
         isone(-x) && return iseven(p) ? one(x) : copy(x)
@@ -288,16 +367,16 @@ to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
     t = trailing_zeros(p) + 1
     p >>= t
     while (t -= 1) > 0
-        x *= x
+        x = mul(x, x)
     end
     y = x
     while p > 0
         t = trailing_zeros(p) + 1
         p >>= t
         while (t -= 1) >= 0
-            x *= x
+            x = mul(x, x)
         end
-        y *= x
+        y = mul(y, x)
     end
     return y
 end
@@ -321,7 +400,7 @@ end
 
 # Restrict inlining to hardware-supported arithmetic types, which
 # are fast enough to benefit from inlining.
-const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64}
+const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64}
 const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
 # Inline x^2 and x^3 for Val
@@ -558,7 +637,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64)
 end
 function bit_ndigits0z(x::UInt128)
     n = 0
-    while x > 0x8ac7230489e80000
+    while x > 0x8ac7230489e80000 # 10e18
         x = div(x,0x8ac7230489e80000)
         n += 19
     end
@@ -704,7 +783,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
 function bin(x::Unsigned, pad::Int, neg::Bool)
     m = top_set_bit(x)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
     #     @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
     # end
@@ -724,50 +803,98 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x1
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 function oct(x::Unsigned, pad::Int, neg::Bool)
     m = div(top_set_bit(x) + 2, 3)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i > neg
         @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
         x >>= 0x3
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 # 2-digit decimal characters ("00":"99")
-const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+const _dec_d100 = UInt16[
+# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+#    0 0,    0 1,    0 2,    0 3, and so on in little-endian
+  0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+  0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+  0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+  0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+  0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+  0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+  0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+  0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+  0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+  0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
+]
 
-function dec(x::Unsigned, pad::Int, neg::Bool)
-    n = neg + ndigits(x, pad=pad)
-    a = StringVector(n)
-    i = n
-    @inbounds while i >= 2
-        d, r = divrem(x, 0x64)
-        d100 = _dec_d100[(r % Int)::Int + 1]
-        a[i-1] = d100 % UInt8
-        a[i] = (d100 >> 0x8) % UInt8
-        x = oftype(x, d)
+function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    while i >= 2
+        d, c = divrem(digits, 0x64)
+        digits = oftype(digits, d)
+        @inbounds d100 = _dec_d100[(c % Int) + 1]
+        @inbounds buf[pos + i - 2] = d100 % UInt8
+        @inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
         i -= 2
     end
-    if i > neg
-        @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+    if i == 1
+        @inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
+        i -= 1
+    end
+    return pos + olength
+end
+
+function append_nine_digits(digits::Unsigned, buf, pos::Int)
+    if digits == 0
+        for _ = 1:9
+            @inbounds buf[pos] = UInt8('0')
+            pos += 1
+        end
+        return pos
+    end
+    return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
+end
+
+function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    # n.b. olength may be larger than required to print all of `digits` (and will be padded
+    # with zeros), but the printed number will be undefined if it is smaller, and may include
+    # bits of both the high and low bytes.
+    maxpow10 = 0x3b9aca00 # 10e9 as UInt32
+    while i > 9 && digits > typemax(UInt)
+        # do everything in cheap math chunks, using the processor's native math size
+        d, c = divrem(digits, maxpow10)
+        digits = oftype(digits, d)
+        append_nine_digits(c % UInt32, buf, pos + i - 9)
+        i -= 9
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    append_c_digits(i, digits % UInt, buf, pos)
+    return pos + olength
+end
+
+
+function dec(x::Unsigned, pad::Int, neg::Bool)
+    n = neg + ndigits(x, pad=pad)
+    a = StringMemory(n)
+    append_c_digits_fast(n, x, a, 1)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 function hex(x::Unsigned, pad::Int, neg::Bool)
     m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
     n = neg + max(pad, m)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     while i >= 2
         b = (x % UInt8)::UInt8
@@ -781,8 +908,8 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
         d = (x % UInt8)::UInt8 & 0xf
         @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 const base36digits = UInt8['0':'9';'a':'z']
@@ -794,7 +921,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
     b = (base % Int)::Int
     digits = abs(b) <= 36 ? base36digits : base62digits
     n = neg + ndigits(x, base=b, pad=pad)
-    a = StringVector(n)
+    a = StringMemory(n)
     i = n
     @inbounds while i > neg
         if b > 0
@@ -806,8 +933,8 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
         end
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
-    String(a)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
+    unsafe_takestring(a)
 end
 
 split_sign(n::Integer) = unsigned(abs(n)), n < 0
@@ -854,7 +981,8 @@ string(b::Bool) = b ? "true" : "false"
 """
     bitstring(n)
 
-A string giving the literal bit representation of a primitive type.
+A string giving the literal bit representation of a primitive type
+(in bigendian order, i.e. most-significant bit first).
 
 See also [`count_ones`](@ref), [`count_zeros`](@ref), [`digits`](@ref).
 
@@ -868,9 +996,9 @@ julia> bitstring(2.2)
 ```
 """
 function bitstring(x::T) where {T}
-    isprimitivetype(T) || throw(ArgumentError("$T not a primitive type"))
+    isprimitivetype(T) || throw(ArgumentError(LazyString(T, " not a primitive type")))
     sz = sizeof(T) * 8
-    str = StringVector(sz)
+    str = StringMemory(sz)
     i = sz
     @inbounds while i >= 4
         b = UInt32(sizeof(T) == 1 ? bitcast(UInt8, x) : trunc_int(UInt8, x))
@@ -882,7 +1010,7 @@ function bitstring(x::T) where {T}
         x = lshr_int(x, 4)
         i -= 4
     end
-    return String(str)
+    return unsafe_takestring(str)
 end
 
 """
@@ -890,7 +1018,7 @@ end
 
 Return an array with element type `T` (default `Int`) of the digits of `n` in the given
 base, optionally padded with zeros to a specified size. More significant digits are at
-higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
+higher indices, such that `n == sum(digits[k]*base^(k-1) for k in 1:length(digits))`.
 
 See also [`ndigits`](@ref), [`digits!`](@ref),
 and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
@@ -968,7 +1096,7 @@ julia> digits!([2, 2, 2, 2, 2, 2], 10, base = 2)
 function digits!(a::AbstractVector{T}, n::Integer; base::Integer = 10) where T<:Integer
     2 <= abs(base) || throw(DomainError(base, "base must be ≥ 2 or ≤ -2"))
     hastypemax(T) && abs(base) - 1 > typemax(T) &&
-        throw(ArgumentError("type $T too small for base $base"))
+        throw(ArgumentError(LazyString("type ", T, " too small for base ", base)))
     isempty(a) && return a
 
     if base > 0
@@ -1047,7 +1175,7 @@ julia> factorial(big(21))
 * [Factorial](https://en.wikipedia.org/wiki/Factorial) on Wikipedia.
 """
 function factorial(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be nonnegative."))
+    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
     f::typeof(n*n) = 1
     for i::typeof(n*n) = 2:n
         f *= i
@@ -1089,6 +1217,8 @@ julia> binomial(-5, 3)
 # External links
 * [Binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) on Wikipedia.
 """
+binomial(n::Integer, k::Integer) = binomial(promote(n, k)...)
+
 Base.@assume_effects :terminates_locally function binomial(n::T, k::T) where T<:Integer
     n0, k0 = n, k
     k < 0 && return zero(T)
@@ -1148,3 +1278,102 @@ function binomial(x::Number, k::Integer)
     # and instead divide each term by i, to avoid spurious overflow.
     return prod(i -> (x-(i-1))/i, OneTo(k), init=oneunit(x)/one(k))
 end
+
+"""
+    clamp(x, lo, hi)
+
+Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
+are promoted to a common type.
+
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
+3-element Vector{BigFloat}:
+ 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
+ 2.0
+ 9.0
+
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
+3-element Vector{Int64}:
+  6
+  6
+ 10
+```
+"""
+function clamp(x::X, lo::L, hi::H) where {X,L,H}
+    T = promote_type(X, L, H)
+    return (x > hi) ? convert(T, hi) : (x < lo) ? convert(T, lo) : convert(T, x)
+end
+
+"""
+    clamp(x, T)::T
+
+Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
+
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> clamp(200, Int8)
+127
+
+julia> clamp(-200, Int8)
+-128
+
+julia> trunc(Int, 4pi^2)
+39
+```
+"""
+function clamp(x, ::Type{T}) where {T<:Integer}
+    # delegating to clamp(x, typemin(T), typemax(T)) would promote types
+    # this way, we avoid unnecessary conversions
+    # think of, e.g., clamp(big(2) ^ 200, Int16)
+    lo = typemin(T)
+    hi = typemax(T)
+    return (x > hi) ? hi : (x < lo) ? lo : convert(T, x)
+end
+
+
+"""
+    clamp!(array::AbstractArray, lo, hi)
+
+Restrict values in `array` to the specified range, in-place.
+See also [`clamp`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
+"""
+function clamp!(x::AbstractArray, lo, hi)
+    @inbounds for i in eachindex(x)
+        x[i] = clamp(x[i], lo, hi)
+    end
+    x
+end
+
+"""
+    clamp(x::Integer, r::AbstractUnitRange)
+
+Clamp `x` to lie within range `r`.
+
+!!! compat "Julia 1.6"
+     This method requires at least Julia 1.6.
+"""
+clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
diff --git a/base/invalidation.jl b/base/invalidation.jl
new file mode 100644
index 0000000000000..5abb0b74ad884
--- /dev/null
+++ b/base/invalidation.jl
@@ -0,0 +1,130 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+struct GlobalRefIterator
+    mod::Module
+end
+IteratorSize(::Type{GlobalRefIterator}) = SizeUnknown()
+globalrefs(mod::Module) = GlobalRefIterator(mod)
+
+function iterate(gri::GlobalRefIterator, i = 1)
+    m = gri.mod
+    table = ccall(:jl_module_get_bindings, Ref{SimpleVector}, (Any,), m)
+    i > length(table) && return nothing
+    b = table[i]
+    b === nothing && return iterate(gri, i+1)
+    return ((b::Core.Binding).globalref, i+1)
+end
+
+const TYPE_TYPE_MT = Type.body.name.mt
+const NONFUNCTION_MT = Core.MethodTable.name.mt
+function foreach_module_mtable(visit, m::Module, world::UInt)
+    for gb in globalrefs(m)
+        binding = gb.binding
+        bpart = lookup_binding_partition(world, binding)
+        if is_defined_const_binding(binding_kind(bpart))
+            v = partition_restriction(bpart)
+            uw = unwrap_unionall(v)
+            name = gb.name
+            if isa(uw, DataType)
+                tn = uw.name
+                if tn.module === m && tn.name === name && tn.wrapper === v && isdefined(tn, :mt)
+                    # this is the original/primary binding for the type (name/wrapper)
+                    mt = tn.mt
+                    if mt !== nothing && mt !== TYPE_TYPE_MT && mt !== NONFUNCTION_MT
+                        @assert mt.module === m
+                        visit(mt) || return false
+                    end
+                end
+            elseif isa(v, Module) && v !== m && parentmodule(v) === m && _nameof(v) === name
+                # this is the original/primary binding for the submodule
+                foreach_module_mtable(visit, v, world) || return false
+            elseif isa(v, Core.MethodTable) && v.module === m && v.name === name
+                # this is probably an external method table here, so let's
+                # assume so as there is no way to precisely distinguish them
+                visit(v) || return false
+            end
+        end
+    end
+    return true
+end
+
+function foreach_reachable_mtable(visit, world::UInt)
+    visit(TYPE_TYPE_MT) || return
+    visit(NONFUNCTION_MT) || return
+    for mod in loaded_modules_array()
+        foreach_module_mtable(visit, mod, world)
+    end
+end
+
+function should_invalidate_code_for_globalref(gr::GlobalRef, src::CodeInfo)
+    found_any = false
+    labelchangemap = nothing
+    stmts = src.code
+    isgr(g::GlobalRef) = gr.mod == g.mod && gr.name === g.name
+    isgr(g) = false
+    for i = 1:length(stmts)
+        stmt = stmts[i]
+        if isgr(stmt)
+            found_any = true
+            continue
+        end
+        for ur in Compiler.userefs(stmt)
+            arg = ur[]
+            # If any of the GlobalRefs in this stmt match the one that
+            # we are about, we need to move out all GlobalRefs to preserve
+            # effect order, in case we later invalidate a different GR
+            if isa(arg, GlobalRef)
+                if isgr(arg)
+                    @assert !isa(stmt, PhiNode)
+                    found_any = true
+                    break
+                end
+            end
+        end
+    end
+    return found_any
+end
+
+function scan_edge_list(ci::Core.CodeInstance, bpart::Core.BindingPartition)
+    isdefined(ci, :edges) || return false
+    edges = ci.edges
+    i = 1
+    while i <= length(edges)
+        if isassigned(edges, i) && edges[i] === bpart
+            return true
+        end
+        i += 1
+    end
+    return false
+end
+
+function invalidate_code_for_globalref!(gr::GlobalRef, invalidated_bpart::Core.BindingPartition, new_max_world::UInt)
+    try
+        valid_in_valuepos = false
+        foreach_reachable_mtable(new_max_world) do mt::Core.MethodTable
+            for method in MethodList(mt)
+                if isdefined(method, :source)
+                    src = _uncompressed_ir(method)
+                    old_stmts = src.code
+                    invalidate_all = should_invalidate_code_for_globalref(gr, src)
+                    for mi in specializations(method)
+                        isdefined(mi, :cache) || continue
+                        ci = mi.cache
+                        while true
+                            if ci.max_world > new_max_world && (invalidate_all || scan_edge_list(ci, invalidated_bpart))
+                                ccall(:jl_invalidate_code_instance, Cvoid, (Any, UInt), ci, new_max_world)
+                            end
+                            isdefined(ci, :next) || break
+                            ci = ci.next
+                        end
+                    end
+                end
+            end
+            return true
+        end
+    catch err
+        bt = catch_backtrace()
+        invokelatest(Base.println, "Internal Error during invalidation:")
+        invokelatest(Base.display_error, err, bt)
+    end
+end
diff --git a/base/io.jl b/base/io.jl
index 60a24831587cb..46aec6ca393b7 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -25,6 +25,14 @@ end
 
 lock(::IO) = nothing
 unlock(::IO) = nothing
+
+"""
+    reseteof(io)
+
+Clear the EOF flag from IO so that further reads (and possibly writes) are
+again allowed. Note that it may immediately get re-set, if the underlying
+stream object is at EOF and cannot be resumed.
+"""
 reseteof(x::IO) = nothing
 
 const SZ_UNBUFFERED_IO = 65536
@@ -68,6 +76,10 @@ Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
 first. Notify the other end that no more data will be written to the underlying
 file. This is not supported by all IO types.
 
+If implemented, `closewrite` causes subsequent `read` or `eof` calls that would
+block to instead throw EOF or return true, respectively. If the stream is
+already closed, this is idempotent.
+
 # Examples
 ```jldoctest
 julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
@@ -119,6 +131,8 @@ data has already been buffered. The result is a `Vector{UInt8}`.
 """
 function readavailable end
 
+function isexecutable end
+
 """
     isreadable(io) -> Bool
 
@@ -233,7 +247,7 @@ The endianness of the written value depends on the endianness of the host system
 Convert to/from a fixed endianness when writing/reading (e.g. using  [`htol`](@ref) and
 [`ltoh`](@ref)) to get results that are consistent across platforms.
 
-You can write multiple values with the same `write` call. i.e. the following are equivalent:
+You can write multiple values with the same `write` call, i.e. the following are equivalent:
 
     write(io, x, y...)
     write(io, x) + write(io, y...)
@@ -402,7 +416,14 @@ end
 """
     AbstractPipe
 
-`AbstractPipe` is the abstract supertype for IO pipes that provide for communication between processes.
+`AbstractPipe` is an abstract supertype that exists for the convenience of creating
+pass-through wrappers for other IO objects, so that you only need to implement the
+additional methods relevant to your type. A subtype only needs to implement one or both of
+these methods:
+
+    struct P <: AbstractPipe; ...; end
+    pipe_reader(io::P) = io.out
+    pipe_writer(io::P) = io.in
 
 If `pipe isa AbstractPipe`, it must obey the following interface:
 
@@ -440,10 +461,10 @@ for f in (
 end
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb)
-readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::UInt8; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractChar; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractString; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractVector; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out)
 readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n)
 peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T
@@ -499,11 +520,15 @@ read!(filename::AbstractString, a) = open(io->read!(io, a), convert(String, file
     readuntil(stream::IO, delim; keep::Bool = false)
     readuntil(filename::AbstractString, delim; keep::Bool = false)
 
-Read a string from an I/O stream or a file, up to the given delimiter.
+Read a string from an I/O `stream` or a file, up to the given delimiter.
 The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
 Keyword argument `keep` controls whether the delimiter is included in the result.
 The text is assumed to be encoded in UTF-8.
 
+Return a `String` if `delim` is an `AbstractChar` or a string
+or otherwise return a `Vector{typeof(delim)}`.   See also [`copyuntil`](@ref)
+to instead write in-place to another stream (which can be a preallocated [`IOBuffer`](@ref)).
+
 # Examples
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
@@ -517,7 +542,40 @@ julia> readuntil("my_file.txt", '.', keep = true)
 julia> rm("my_file.txt")
 ```
 """
-readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), convert(String, filename)::String)
+readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String)
+readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))
+readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)))
+readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep)
+
+
+"""
+    copyuntil(out::IO, stream::IO, delim; keep::Bool = false)
+    copyuntil(out::IO, filename::AbstractString, delim; keep::Bool = false)
+
+Copy a string from an I/O `stream` or a file, up to the given delimiter, to
+the `out` stream, returning `out`.
+The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
+Keyword argument `keep` controls whether the delimiter is included in the result.
+The text is assumed to be encoded in UTF-8.
+
+Similar to [`readuntil`](@ref), which returns a `String`; in contrast,
+`copyuntil` writes directly to `out`, without allocating a string.
+(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).)
+
+# Examples
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
+
+julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L')))
+"Julia"
+
+julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true)))
+"JuliaLang is a GitHub organization."
+
+julia> rm("my_file.txt")
+```
+"""
+copyuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->copyuntil(out, io, delim; kw...), convert(String, filename)::String)
 
 """
     readline(io::IO=stdin; keep::Bool=false)
@@ -530,6 +588,11 @@ false (as it is by default), these trailing newline characters are removed from
 line before it is returned. When `keep` is true, they are returned as part of the
 line.
 
+Return a `String`.   See also [`copyline`](@ref) to instead write in-place
+to another stream (which can be a preallocated [`IOBuffer`](@ref)).
+
+See also [`readuntil`](@ref) for reading until more general delimiters.
+
 # Examples
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
@@ -551,21 +614,63 @@ Logan
 "Logan"
 ```
 """
-function readline(filename::AbstractString; keep::Bool=false)
-    open(filename) do f
-        readline(f, keep=keep)
-    end
-end
+readline(filename::AbstractString; keep::Bool=false) =
+    open(io -> readline(io; keep), filename)
+readline(s::IO=stdin; keep::Bool=false) =
+    String(_unsafe_take!(copyline(IOBuffer(sizehint=16), s; keep)))
+
+"""
+    copyline(out::IO, io::IO=stdin; keep::Bool=false)
+    copyline(out::IO, filename::AbstractString; keep::Bool=false)
+
+Copy a single line of text from an I/O `stream` or a file to the `out` stream,
+returning `out`.
+
+When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the
+input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `keep` is
+false (as it is by default), these trailing newline characters are removed from the
+line before it is returned. When `keep` is true, they are returned as part of the
+line.
+
+Similar to [`readline`](@ref), which returns a `String`; in contrast,
+`copyline` writes directly to `out`, without allocating a string.
+(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).)
 
-function readline(s::IO=stdin; keep::Bool=false)
-    line = readuntil(s, 0x0a, keep=true)::Vector{UInt8}
-    i = length(line)
-    if keep || i == 0 || line[i] != 0x0a
-        return String(line)
-    elseif i < 2 || line[i-1] != 0x0d
-        return String(resize!(line,i-1))
+See also [`copyuntil`](@ref) for reading until more general delimiters.
+
+# Examples
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
+
+julia> String(take!(copyline(IOBuffer(), "my_file.txt")))
+"JuliaLang is a GitHub organization."
+
+julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true)))
+"JuliaLang is a GitHub organization.\\n"
+
+julia> rm("my_file.txt")
+```
+"""
+copyline(out::IO, filename::AbstractString; keep::Bool=false) =
+    open(io -> copyline(out, io; keep), filename)
+
+# fallback to optimized methods for IOBuffer in iobuffer.jl
+function copyline(out::IO, s::IO; keep::Bool=false)
+    if keep
+        return copyuntil(out, s, 0x0a, keep=true)
     else
-        return String(resize!(line,i-2))
+        # more complicated to deal with CRLF logic
+        while !eof(s)
+            b = read(s, UInt8)
+            b == 0x0a && break
+            if b == 0x0d && !eof(s)
+                b = read(s, UInt8)
+                b == 0x0a && break
+                write(out, 0x0d)
+            end
+            write(out, b)
+        end
+        return out
     end
 end
 
@@ -696,10 +801,17 @@ end
 @noinline unsafe_write(s::IO, p::Ref{T}, n::Integer) where {T} =
     unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-write(s::IO, x::Ref{T}) where {T} = unsafe_write(s, x, Core.sizeof(T))
+function write(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("write cannot copy from a Ptr")
+    if isbitstype(T)
+        Int(unsafe_write(s, x, Core.sizeof(T)))
+    else
+        write(s, x[])
+    end
+end
 write(s::IO, x::Int8) = write(s, reinterpret(UInt8, x))
 function write(s::IO, x::Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
-    return write(s, Ref(x))
+    return unsafe_write(s, Ref(x), Core.sizeof(x))
 end
 
 write(s::IO, x::Bool) = write(s, UInt8(x))
@@ -710,48 +822,52 @@ function write(s::IO, A::AbstractArray)
         error("`write` is not supported on non-isbits arrays")
     end
     nb = 0
+    r = Ref{eltype(A)}()
     for a in A
-        nb += write(s, a)
+        r[] = a
+        nb += @noinline unsafe_write(s, r, Core.sizeof(r)) # r must be heap-allocated
     end
     return nb
 end
 
-function write(s::IO, a::Array)
-    if isbitstype(eltype(a))
-        return GC.@preserve a unsafe_write(s, pointer(a), sizeof(a))
-    else
+function write(s::IO, A::StridedArray)
+    if !isbitstype(eltype(A))
         error("`write` is not supported on non-isbits arrays")
     end
-end
-
-function write(s::IO, a::SubArray{T,N,<:Array}) where {T,N}
-    if !isbitstype(T) || !isa(a, StridedArray)
-        return invoke(write, Tuple{IO, AbstractArray}, s, a)
+    _checkcontiguous(Bool, A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(write, Tuple{IO, AbstractArray}, s, A)
+    n == ndims(A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz′, st′ = tail(sz), tail(st)
+    while n > 1
+        sz′ = (tail(sz′)..., 1)
+        st′ = (tail(st′)..., 0)
+        n -= 1
     end
-    elsz = elsize(a)
-    colsz = size(a,1) * elsz
-    GC.@preserve a if stride(a,1) != 1
-        for idxs in CartesianIndices(size(a))
-            unsafe_write(s, pointer(a, idxs), elsz)
-        end
-        return elsz * length(a)
-    elseif N <= 1
-        return unsafe_write(s, pointer(a, 1), colsz)
-    else
-        for colstart in CartesianIndices((1, size(a)[2:end]...))
-            unsafe_write(s, pointer(a, colstart), colsz)
+    GC.@preserve A begin
+        nb = 0
+        iter = CartesianIndices(sz′)
+        for I in iter
+            p = pointer(A)
+            for i in 1:length(sz′)
+                p += elsize(A) * st′[i] * (I[i] - 1)
+            end
+            nb += unsafe_write(s, p, elsize(A) * msz)
         end
-        return colsz * trailingsize(a,2)
+        return nb
     end
 end
 
 function write(io::IO, c::Char)
     u = bswap(reinterpret(UInt32, c))
-    n = 1
+    n = 0
     while true
-        write(io, u % UInt8)
+        n += write(io, u % UInt8)
         (u >>= 8) == 0 && return n
-        n += 1
     end
 end
 # write(io, ::AbstractChar) is not defined: implementations
@@ -772,37 +888,81 @@ end
 
 @noinline unsafe_read(s::IO, p::Ref{T}, n::Integer) where {T} = unsafe_read(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_read(s::IO, p::Ptr, n::Integer) = unsafe_read(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-read!(s::IO, x::Ref{T}) where {T} = (unsafe_read(s, x, Core.sizeof(T)); x)
+function read!(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("read! cannot copy into a Ptr")
+    if isbitstype(T)
+        unsafe_read(s, x, Core.sizeof(T))
+    else
+        x[] = read(s, T)
+    end
+    return x
+end
 
 read(s::IO, ::Type{Int8}) = reinterpret(Int8, read(s, UInt8))
 function read(s::IO, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
-    return read!(s, Ref{T}(0))[]::T
+    r = Ref{T}(0)
+    unsafe_read(s, r, Core.sizeof(T))
+    return r[]
 end
 
 read(s::IO, ::Type{Bool}) = (read(s, UInt8) != 0)
 read(s::IO, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(s, UInt))
 
-function read!(s::IO, a::Array{UInt8})
-    GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
-    return a
+function read!(s::IO, A::AbstractArray{T}) where {T}
+    if isbitstype(T) && _checkcontiguous(Bool, A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
+    else
+        if isbitstype(T)
+            r = Ref{T}()
+            for i in eachindex(A)
+                @noinline unsafe_read(s, r, Core.sizeof(r)) # r must be heap-allocated
+                A[i] = r[]
+            end
+        else
+            for i in eachindex(A)
+                A[i] = read(s, T)
+            end
+        end
+    end
+    return A
 end
 
-function read!(s::IO, a::AbstractArray{T}) where T
-    if isbitstype(T) && (a isa Array || a isa FastContiguousSubArray{T,<:Any,<:Array{T}})
-        GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
+function read!(s::IO, A::StridedArray{T}) where {T}
+    if !isbitstype(T) || _checkcontiguous(Bool, A)
+        return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    end
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    if n == ndims(A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
     else
-        for i in eachindex(a)
-            a[i] = read(s, T)
+        sz′, st′ = tail(sz), tail(st)
+        while n > 1
+            sz′ = (tail(sz′)..., 1)
+            st′ = (tail(st′)..., 0)
+            n -= 1
+        end
+        GC.@preserve A begin
+            iter = CartesianIndices(sz′)
+            for I in iter
+                p = pointer(A)
+                for i in 1:length(sz′)
+                    p += elsize(A) * st′[i] * (I[i] - 1)
+                end
+                unsafe_read(s, p, elsize(A) * msz)
+            end
         end
     end
-    return a
+    return A
 end
 
 function read(io::IO, ::Type{Char})
     b0 = read(io, UInt8)::UInt8
-    l = 8(4-leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(io)::Bool
             peek(io) & 0xc0 == 0x80 || break
@@ -816,15 +976,10 @@ end
 # read(io, T) is not defined for other AbstractChar: implementations
 # must provide their own encoding-specific method.
 
-# readuntil_string is useful below since it has
-# an optimized method for s::IOStream
-readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))::String
-
-function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
+function copyuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false)
     if delim ≤ '\x7f'
-        return readuntil_string(s, delim % UInt8, keep)
+        return copyuntil(out, s, delim % UInt8; keep)
     end
-    out = IOBuffer()
     for c in readeach(s, Char)
         if c == delim
             keep && write(out, c)
@@ -832,17 +987,22 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
         end
         write(out, c)
     end
-    return String(take!(out))
+    return out
 end
 
-function readuntil(s::IO, delim::T; keep::Bool=false) where T
-    out = (T === UInt8 ? StringVector(0) : Vector{T}())
+# note: optimized methods of copyuntil for IOStreams and delim::UInt8 in iostream.jl
+#       and for IOBuffer with delim::UInt8 in iobuffer.jl
+copyuntil(out::IO, s::IO, delim; keep::Bool=false) = _copyuntil(out, s, delim, keep)
+
+# supports out::Union{IO, AbstractVector} for use with both copyuntil & readuntil
+function _copyuntil(out, s::IO, delim::T, keep::Bool) where T
+    output! = isa(out, IO) ? write : push!
     for c in readeach(s, T)
         if c == delim
-            keep && push!(out, c)
+            keep && output!(out, c)
             break
         end
-        push!(out, c)
+        output!(out, c)
     end
     return out
 end
@@ -933,27 +1093,29 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w
     return false
 end
 
-function readuntil(io::IO, target::AbstractString; keep::Bool=false)
+function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false)
     # small-string target optimizations
     x = Iterators.peel(target)
-    isnothing(x) && return ""
+    isnothing(x) && return out
     c, rest = x
     if isempty(rest) && c <= '\x7f'
-        return readuntil_string(io, c % UInt8, keep)
+        return copyuntil(out, io, c % UInt8; keep)
     end
     # convert String to a utf8-byte-iterator
     if !(target isa String) && !(target isa SubString{String})
         target = String(target)
     end
     target = codeunits(target)::AbstractVector
-    return String(readuntil(io, target, keep=keep))
+    return copyuntil(out, io, target, keep=keep)
 end
 
 function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
-    out = (T === UInt8 ? StringVector(0) : Vector{T}())
+    out = (T === UInt8 ? resize!(StringVector(16), 0) : Vector{T}())
     readuntil_vector!(io, target, keep, out)
     return out
 end
+copyuntil(out::IO, io::IO, target::AbstractVector; keep::Bool=false) =
+    (readuntil_vector!(io, target, keep, out); out)
 
 """
     readchomp(x)
@@ -1128,7 +1290,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
         buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
         empty!(chunks) # will cause next iteration to terminate
         seekend(r.itr.stream) # reposition to end of stream for isdone
-        s = String(take!(buf))
+        s = String(_unsafe_take!(buf))
     else
         # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
         if ichunk == jchunk # common case: current and previous newline in same chunk
@@ -1145,7 +1307,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
             end
             write(buf, view(chunks[jchunk], 1:jnewline))
             buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
-            s = String(take!(buf))
+            s = String(_unsafe_take!(buf))
 
             # overwrite obsolete chunks (ichunk+1:jchunk)
             i = jchunk
@@ -1263,7 +1425,7 @@ previously marked position. Throw an error if the stream is not marked.
 See also [`mark`](@ref), [`unmark`](@ref), [`ismarked`](@ref).
 """
 function reset(io::T) where T<:IO
-    ismarked(io) || throw(ArgumentError("$T not marked"))
+    ismarked(io) || throw(ArgumentError(LazyString(T, " not marked")))
     m = io.mark
     seek(io, m)
     io.mark = -1 # must be after seek, or seek may fail
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index 6c95285f232f2..7e309b9ad586c 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -4,32 +4,45 @@
 
 # Stateful string
 mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
-    data::T # T should support: getindex, setindex!, length, copyto!, and resize!
+    data::T # T should support: getindex, setindex!, length, copyto!, similar, and (optionally) resize!
     reinit::Bool # if true, data needs to be re-allocated (after take!)
     readable::Bool
     writable::Bool
     seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
     append::Bool # add data at end instead of at pointer
-    size::Int # end pointer (and write pointer if append == true)
+    size::Int # end pointer (and write pointer if append == true) + offset
     maxsize::Int # fixed array size (typically pre-allocated)
-    ptr::Int # read (and maybe write) pointer
+    ptr::Int # read (and maybe write) pointer + offset
+    offset::Int # offset of ptr and size from actual start of data and actual size
     mark::Int # reset mark location for ptr (or <0 for no mark)
 
     function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
                                 maxsize::Integer) where T<:AbstractVector{UInt8}
         require_one_based_indexing(data)
-        new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1)
+        return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, 0, -1)
     end
 end
-const IOBuffer = GenericIOBuffer{Vector{UInt8}}
+
+const IOBuffer = GenericIOBuffer{Memory{UInt8}}
 
 function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
                          maxsize::Integer) where T<:AbstractVector{UInt8}
     GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize)
 end
+function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
+                         maxsize::Integer)
+    ref = data.ref
+    buf = GenericIOBuffer(ref.mem, readable, writable, seekable, append, maxsize)
+    offset = memoryrefoffset(ref) - 1
+    buf.ptr += offset
+    buf.size = length(data) + offset
+    buf.offset = offset
+    return buf
+end
 
 # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings
-StringVector(n::Integer) = unsafe_wrap(Vector{UInt8}, _string_n(n))
+StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n))
+StringVector(n::Integer) = wrap(Array, StringMemory(n))
 
 # IOBuffers behave like Files. They are typically readable and writable. They are seekable. (They can be appendable).
 
@@ -46,6 +59,15 @@ It may take optional keyword arguments:
 
 When `data` is not given, the buffer will be both readable and writable by default.
 
+!!! warning "Passing `data` as scratch space to `IOBuffer` with `write=true` may give unexpected behavior"
+    Once `write` is called on an `IOBuffer`, it is best to consider any
+    previous references to `data` invalidated; in effect `IOBuffer` "owns"
+    this data until a call to `take!`. Any indirect mutations to `data`
+    could lead to undefined behavior by breaking the abstractions expected
+    by `IOBuffer`. If `write=true` the IOBuffer may store data at any
+    offset leaving behind arbitrary values at other offsets. If `maxsize > length(data)`,
+    the IOBuffer might re-allocate the data entirely, which
+    may or may not be visible in any outstanding bindings to `array`.
 # Examples
 ```jldoctest
 julia> io = IOBuffer();
@@ -98,7 +120,7 @@ function IOBuffer(
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
     buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize))
     if flags.truncate
-        buf.size = 0
+        buf.size = buf.offset
     end
     return buf
 end
@@ -113,7 +135,7 @@ function IOBuffer(;
     size = sizehint !== nothing ? Int(sizehint) : maxsize != typemax(Int) ? Int(maxsize) : 32
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
     buf = IOBuffer(
-        StringVector(size),
+        StringMemory(size),
         read=flags.read,
         write=flags.write,
         append=flags.append,
@@ -123,10 +145,11 @@ function IOBuffer(;
     return buf
 end
 
-# PipeBuffers behave like Unix Pipes. They are typically readable and writable, they act appendable, and are not seekable.
+# PipeBuffers behave somewhat more like Unix Pipes (than Files). They are typically readable and writable, they act appendable, and are not seekable.
+# However, they do not support stream notification, so for that there is the BufferStream wrapper around this.
 
 """
-    PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
+    PipeBuffer(data::AbstractVector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
 
 An [`IOBuffer`](@ref) that allows reading and performs writes by appending.
 Seeking and truncating are not supported.
@@ -134,12 +157,12 @@ See [`IOBuffer`](@ref) for the available constructors.
 If `data` is given, creates a `PipeBuffer` to operate on a data vector,
 optionally specifying a size beyond which the underlying `Array` may not be grown.
 """
-PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
-    GenericIOBuffer(data,true,true,false,true,maxsize)
-PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x)
+PipeBuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) =
+    GenericIOBuffer(data, true, true, false, true, maxsize)
+PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize = maxsize); x.size = 0; x)
 
 _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
-_similar_data(b::IOBuffer, len::Int) = StringVector(len)
+_similar_data(b::IOBuffer, len::Int) = StringMemory(len)
 
 function copy(b::GenericIOBuffer)
     ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
@@ -147,6 +170,8 @@ function copy(b::GenericIOBuffer)
                     b.readable, b.writable, b.seekable, b.append, b.maxsize)
     ret.size = b.size
     ret.ptr  = b.ptr
+    ret.mark = b.mark
+    ret.offset = b.offset
     return ret
 end
 
@@ -155,9 +180,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
                                       "writable=", b.writable, ", ",
                                       "seekable=", b.seekable, ", ",
                                       "append=",   b.append, ", ",
-                                      "size=",     b.size, ", ",
+                                      "size=",     b.size - b.offset, ", ",
                                       "maxsize=",  b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ",
-                                      "ptr=",      b.ptr, ", ",
+                                      "ptr=",      b.ptr - b.offset, ", ",
                                       "mark=",     b.mark, ")")
 
 @noinline function _throw_not_readable()
@@ -169,7 +194,7 @@ function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     from.readable || _throw_not_readable()
     avail = bytesavailable(from)
     adv = min(avail, nb)
-    GC.@preserve from unsafe_copyto!(p, pointer(from.data, from.ptr), adv)
+    unsafe_read!(p, from.data, from.ptr, adv)
     from.ptr += adv
     if nb > avail
         throw(EOFError())
@@ -177,6 +202,25 @@ function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     nothing
 end
 
+function unsafe_read!(dest::Ptr{UInt8}, src::AbstractVector{UInt8}, so::Integer, nbytes::UInt)
+    for i in 1:nbytes
+        unsafe_store!(dest, @inbounds(src[so+i-1]), i)
+    end
+end
+
+# Note: Currently, CodeUnits <: DenseVector, which makes this union redundant w.r.t
+# DenseArrayType{UInt8}, but this is a bug, and may be removed in future versions
+# of Julia. See #54002
+const DenseBytes = Union{
+    <:DenseArrayType{UInt8},
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+}
+
+function unsafe_read!(dest::Ptr{UInt8}, src::DenseBytes, so::Integer, nbytes::UInt)
+    GC.@preserve src unsafe_copyto!(dest, pointer(src, so), nbytes)
+    nothing
+end
+
 function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
     from.readable || _throw_not_readable()
     avail = bytesavailable(from)
@@ -203,7 +247,7 @@ function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
     if offs+nel-1 > length(a) || offs < 1 || nel < 0
         throw(BoundsError())
     end
-    if isbitstype(T) && isa(a,Array)
+    if isa(a, MutableDenseArrayType{UInt8})
         nb = UInt(nel * sizeof(T))
         GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
     else
@@ -239,29 +283,37 @@ read(from::GenericIOBuffer, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(fro
 isreadable(io::GenericIOBuffer) = io.readable
 iswritable(io::GenericIOBuffer) = io.writable
 
-# TODO: GenericIOBuffer is not iterable, so doesn't really have a length.
-# This should maybe be sizeof() instead.
-#length(io::GenericIOBuffer) = (io.seekable ? io.size : bytesavailable(io))
+filesize(io::GenericIOBuffer) = (io.seekable ? io.size - io.offset : bytesavailable(io))
 bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1
-position(io::GenericIOBuffer) = io.ptr-1
+position(io::GenericIOBuffer) = io.ptr - io.offset - 1
 
 function skip(io::GenericIOBuffer, n::Integer)
-    seekto = io.ptr + n
-    n < 0 && return seek(io, seekto-1) # Does error checking
-    io.ptr = min(seekto, io.size+1)
-    return io
+    skip(io, clamp(n, Int))
+end
+function skip(io::GenericIOBuffer, n::Int)
+    if signbit(n)
+        seekto = clamp(widen(position(io)) + widen(n), Int)
+        seek(io, seekto) # Does error checking
+    else
+        n_max = io.size + 1 - io.ptr
+        io.ptr += min(n, n_max)
+        io
+    end
 end
 
 function seek(io::GenericIOBuffer, n::Integer)
+    seek(io, clamp(n, Int))
+end
+function seek(io::GenericIOBuffer, n::Int)
     if !io.seekable
         ismarked(io) || throw(ArgumentError("seek failed, IOBuffer is not seekable and is not marked"))
         n == io.mark || throw(ArgumentError("seek failed, IOBuffer is not seekable and n != mark"))
     end
     # TODO: REPL.jl relies on the fact that this does not throw (by seeking past the beginning or end
     #       of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
-    #(n < 0 || n > io.size) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
-    #io.ptr = n+1
-    io.ptr = max(min(n+1, io.size+1), 1)
+    #(n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
+    #io.ptr = n + io.offset + 1
+    io.ptr = clamp(n, 0, io.size - io.offset) + io.offset + 1
     return io
 end
 
@@ -270,32 +322,66 @@ function seekend(io::GenericIOBuffer)
     return io
 end
 
+# choose a resize strategy based on whether `resize!` is defined:
+# for a Vector, we use `resize!`, but for most other types,
+# this calls `similar`+copy
+function _resize!(io::GenericIOBuffer, sz::Int)
+    a = io.data
+    offset = io.offset
+    if applicable(resize!, a, sz)
+        if offset != 0
+            size = io.size
+            size > offset && copyto!(a, 1, a, offset + 1, min(sz, size - offset))
+            io.ptr -= offset
+            io.size -= offset
+            io.offset = 0
+        end
+        resize!(a, sz)
+    else
+        size = io.size
+        if size >= sz && sz != 0
+            b = a
+        else
+            b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size - io.offset), sz))
+        end
+        size > offset && copyto!(b, 1, a, offset + 1, min(sz, size - offset))
+        io.data = b
+        io.ptr -= offset
+        io.size -= offset
+        io.offset = 0
+    end
+    return io
+end
+
 function truncate(io::GenericIOBuffer, n::Integer)
     io.writable || throw(ArgumentError("truncate failed, IOBuffer is not writeable"))
     io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
     n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
     n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
+    n = Int(n)
     if io.reinit
         io.data = _similar_data(io, n)
         io.reinit = false
-    elseif n > length(io.data)
-        resize!(io.data, n)
+    elseif n > length(io.data) + io.offset
+        _resize!(io, n)
     end
+    ismarked(io) && io.mark > n && unmark(io)
+    n += io.offset
     io.data[io.size+1:n] .= 0
     io.size = n
     io.ptr = min(io.ptr, n+1)
-    ismarked(io) && io.mark > n && unmark(io)
     return io
 end
 
 function compact(io::GenericIOBuffer)
     io.writable || throw(ArgumentError("compact failed, IOBuffer is not writeable"))
     io.seekable && throw(ArgumentError("compact failed, IOBuffer is seekable"))
+    io.reinit && return
     local ptr::Int, bytes_to_move::Int
-    if ismarked(io) && io.mark < io.ptr
-        if io.mark == 0 return end
-        ptr = io.mark
-        bytes_to_move = bytesavailable(io) + (io.ptr-io.mark)
+    if ismarked(io) && io.mark < position(io)
+        io.mark == 0 && return
+        ptr = io.mark + io.offset
+        bytes_to_move = bytesavailable(io) + (io.ptr - ptr)
     else
         ptr = io.ptr
         bytes_to_move = bytesavailable(io)
@@ -303,19 +389,24 @@ function compact(io::GenericIOBuffer)
     copyto!(io.data, 1, io.data, ptr, bytes_to_move)
     io.size -= ptr - 1
     io.ptr -= ptr - 1
-    io.mark -= ptr - 1
-    return io
+    io.offset = 0
+    return
 end
 
 @noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt)
     io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable"))
+    if io.reinit
+        io.data = _similar_data(io, nshort % Int)
+        io.reinit = false
+    end
     if !io.seekable
-        if !ismarked(io) && io.ptr > 1 && io.size <= io.ptr - 1
+        if !ismarked(io) && io.ptr > io.offset+1 && io.size <= io.ptr - 1
             io.ptr = 1
             io.size = 0
+            io.offset = 0
         else
-            datastart = ismarked(io) ? io.mark : io.ptr
-            if (io.size+nshort > io.maxsize) ||
+            datastart = (ismarked(io) ? io.mark : io.ptr - io.offset)
+            if (io.size-io.offset+nshort > io.maxsize) ||
                 (datastart > 4096 && datastart > io.size - io.ptr) ||
                 (datastart > 262144)
                 # apply somewhat arbitrary heuristics to decide when to destroy
@@ -329,27 +420,21 @@ end
 
 @inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
 @inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
-    if !io.writable || (!io.seekable && io.ptr > 1)
+    if !io.writable || (!io.seekable && io.ptr > io.offset+1) || io.reinit
         ensureroom_slowpath(io, nshort)
     end
-    n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
-    if io.reinit
-        io.data = _similar_data(io, n)
-        io.reinit = false
-    else
-        l = length(io.data)
-        if n > l
-            _growend!(io.data, (n - l) % UInt)
-        end
+    n = min((nshort % Int) + (io.append ? io.size : io.ptr-1) - io.offset, io.maxsize)
+    l = length(io.data) + io.offset
+    if n > l
+        _resize!(io, Int(n))
     end
     return io
 end
 
-eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
+eof(io::GenericIOBuffer) = (io.ptr - 1 >= io.size)
 
 function closewrite(io::GenericIOBuffer)
     io.writable = false
-    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
     nothing
 end
 
@@ -358,11 +443,12 @@ end
     io.writable = false
     io.seekable = false
     io.size = 0
+    io.offset = 0
     io.maxsize = 0
     io.ptr = 1
     io.mark = -1
-    if io.writable
-        resize!(io.data, 0)
+    if io.writable && !io.reinit
+        io.data = _resize!(io, 0)
     end
     nothing
 end
@@ -388,45 +474,45 @@ julia> String(take!(io))
 function take!(io::GenericIOBuffer)
     ismarked(io) && unmark(io)
     if io.seekable
-        nbytes = io.size
-        data = copyto!(StringVector(nbytes), 1, io.data, 1, nbytes)
+        nbytes = io.size - io.offset
+        data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
     else
         nbytes = bytesavailable(io)
-        data = read!(io,StringVector(nbytes))
+        data = read!(io, StringVector(nbytes))
     end
     if io.writable
         io.ptr = 1
         io.size = 0
+        io.offset = 0
     end
     return data
 end
 function take!(io::IOBuffer)
     ismarked(io) && unmark(io)
     if io.seekable
-        if io.writable
-            if io.reinit
-                data = StringVector(0)
-            else
-                data = resize!(io.data, io.size)
-                io.reinit = true
-            end
+        nbytes = filesize(io)
+        if nbytes == 0 || io.reinit
+            data = StringVector(0)
+        elseif io.writable
+            data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes)
         else
-            data = copyto!(StringVector(io.size), 1, io.data, 1, io.size)
+            data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
         end
     else
         nbytes = bytesavailable(io)
-        if io.writable
-            data = io.data
-            io.reinit = true
-            _deletebeg!(data, io.ptr-1)
-            resize!(data, nbytes)
+        if nbytes == 0
+            data = StringVector(0)
+        elseif io.writable
+            data = wrap(Array, memoryref(io.data, io.ptr), nbytes)
         else
-            data = read!(io, StringVector(nbytes))
+            data = read!(io, data)
         end
     end
     if io.writable
+        io.reinit = true
         io.ptr = 1
         io.size = 0
+        io.offset = 0
     end
     return data
 end
@@ -440,17 +526,23 @@ state.  This should only be used internally for performance-critical
 `String` routines that immediately discard `io` afterwards, and it
 *assumes* that `io` is writable and seekable.
 
-It saves no allocations compared to `take!`, it just omits some checks.
+It might save an allocation compared to `take!` (if the compiler elides the
+Array allocation), as well as omits some checks.
 """
-_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size)
+_unsafe_take!(io::IOBuffer) =
+    wrap(Array, io.size == io.offset ?
+        memoryref(Memory{UInt8}()) :
+        memoryref(io.data, io.offset + 1),
+        io.size - io.offset)
 
 function write(to::IO, from::GenericIOBuffer)
+    written::Int = bytesavailable(from)
     if to === from
         from.ptr = from.size + 1
-        return 0
+    else
+        written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(written))
+        from.ptr += written
     end
-    written::Int = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(bytesavailable(from)))
-    from.ptr += written
     return written
 end
 
@@ -488,8 +580,8 @@ end
     return sizeof(UInt8)
 end
 
-readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
-function readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb::Int)
+readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
+function readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb::Int)
     nr = min(nb, bytesavailable(io))
     if length(b) < nr
         resize!(b, nr)
@@ -497,13 +589,13 @@ function readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb::Int)
     read_sub(io, b, 1, nr)
     return nr
 end
-read(io::GenericIOBuffer) = read!(io,StringVector(bytesavailable(io)))
+read(io::GenericIOBuffer) = read!(io, StringVector(bytesavailable(io)))
 readavailable(io::GenericIOBuffer) = read(io)
-read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, bytesavailable(io))))
+read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io))))
 
 function occursin(delim::UInt8, buf::IOBuffer)
     p = pointer(buf.data, buf.ptr)
-    q = GC.@preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,bytesavailable(buf))
+    q = GC.@preserve buf ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, delim, bytesavailable(buf))
     return q != C_NULL
 end
 
@@ -516,33 +608,53 @@ function occursin(delim::UInt8, buf::GenericIOBuffer)
     return false
 end
 
-function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
-    lb = 70
-    A = StringVector(lb)
-    nread = 0
-    nout = 0
-    data = io.data
-    for i = io.ptr : io.size
-        @inbounds b = data[i]
-        nread += 1
-        if keep || b != delim
-            nout += 1
-            if nout > lb
-                lb = nout*2
-                resize!(A, lb)
-            end
-            @inbounds A[nout] = b
-        end
-        if b == delim
-            break
-        end
+function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
+    data = view(io.data, io.ptr:io.size)
+    # note: findfirst + copyto! is much faster than a single loop
+    #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
+    nout = nread = something(findfirst(==(delim), data), length(data))
+    if !keep && nout > 0 && data[nout] == delim
+        nout -= 1
     end
+    write(out, view(io.data, io.ptr:io.ptr+nout-1))
     io.ptr += nread
-    if lb != nout
-        resize!(A, nout)
+    return out
+end
+
+function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
+    copyuntil(out, s, 0x0a, keep=true)
+    line = out.data
+    i = out.size # XXX: this is only correct for appended data. if the data was inserted, only ptr should change
+    if keep || i == out.offset || line[i] != 0x0a
+        return out
+    elseif i < 2 || line[i-1] != 0x0d
+        i -= 1
+    else
+        i -= 2
     end
-    A
+    out.size = i
+    if !out.append
+        out.ptr = i+1
+    end
+    return out
+end
+
+function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
+    data = view(io.data, io.ptr:io.size)
+    # note: findfirst + copyto! is much faster than a single loop
+    #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
+    nout = nread = something(findfirst(==(0x0a), data), length(data))
+    if !keep && nout > 0 && data[nout] == 0x0a
+        nout -= 1
+        nout > 0 && data[nout] == 0x0d && (nout -= 1)
+    end
+    write(out, view(io.data, io.ptr:io.ptr+nout-1))
+    io.ptr += nread
+    return out
 end
+copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
+copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
+
 
 # copy-free crc32c of IOBuffer:
 function _crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
diff --git a/base/iostream.jl b/base/iostream.jl
index 23dfb53256e82..d91330960d59a 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -47,12 +47,31 @@ macro _lock_ios(s, expr)
 end
 
 """
-    fd(stream)
+    fd(x) -> RawFD
 
-Return the file descriptor backing the stream or file. Note that this function only applies
-to synchronous `File`'s and `IOStream`'s not to any of the asynchronous streams.
+Return the file descriptor backing the stream, file, or socket.
+
+`RawFD` objects can be passed directly to other languages via the `ccall` interface.
+
+!!! compat "Julia 1.12"
+    Prior to 1.12, this function returned an `Int` instead of a `RawFD`. You may use
+    `RawFD(fd(x))` to produce a `RawFD` in all Julia versions.
+
+!!! compat "Julia 1.12"
+    Getting the file descriptor of sockets are supported as of Julia 1.12.
+
+!!! warning
+    Duplicate the returned file descriptor with [`Libc.dup()`](@ref) before
+    passing it to another system that will take ownership of it (e.g. a C
+    library). Otherwise both the Julia object `x` and the other system may try
+    to close the file descriptor, which will cause errors.
+
+!!! warning
+    The file descriptors for sockets are asynchronous (i.e. `O_NONBLOCK` on
+    POSIX and `OVERLAPPED` on Windows), they may behave differently than regular
+    file descriptors.
 """
-fd(s::IOStream) = Int(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
+fd(s::IOStream) = RawFD(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
 
 stat(s::IOStream) = stat(fd(s))
 
@@ -63,6 +82,8 @@ function close(s::IOStream)
     systemerror("close", bad)
 end
 
+closewrite(s::IOStream) = nothing
+
 function flush(s::IOStream)
     sigatomic_begin()
     bad = @_lock_ios s ccall(:ios_flush, Cint, (Ptr{Cvoid},), s.ios) != 0
@@ -222,8 +243,8 @@ end
 function filesize(s::IOStream)
     sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
     if sz == -1
-        err = Libc.errno()
-        throw(IOError(string("filesize: ", Libc.strerror(err), " for ", s.name), err))
+        # if `s` is not seekable `ios_filesize` can fail, so fall back to slower stat method
+        sz = filesize(stat(s))
     end
     return sz
 end
@@ -290,12 +311,15 @@ function open(fname::String; lock = true,
     if !lock
         s._dolock = false
     end
-    systemerror("opening file $(repr(fname))",
-                ccall(:ios_file, Ptr{Cvoid},
-                      (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
-                      s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL)
+    if ccall(:ios_file, Ptr{Cvoid},
+             (Ptr{UInt8}, Cstring, Cint, Cint, Cint, Cint),
+             s.ios, fname, flags.read, flags.write, flags.create, flags.truncate) == C_NULL
+        systemerror("opening file $(repr(fname))")
+    end
     if flags.append
-        systemerror("seeking to end of file $fname", ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0)
+        if ccall(:ios_seek_end, Int64, (Ptr{Cvoid},), s.ios) != 0
+            systemerror("seeking to end of file $fname")
+        end
     end
     return s
 end
@@ -443,14 +467,45 @@ end
 function readuntil_string(s::IOStream, delim::UInt8, keep::Bool)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep)
 end
+readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) =
+    isascii(delim) ? readuntil_string(s, delim % UInt8, keep) :
+    String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)))
 
 function readline(s::IOStream; keep::Bool=false)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2)
 end
 
-function readbytes_all!(s::IOStream,
-                        b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                        nb::Integer)
+function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false)
+    ensureroom(out, 1) # make sure we can read at least 1 byte, for iszero(n) check below
+    while true
+        d = out.data
+        len = length(d)
+        ptr = (out.append ? out.size+1 : out.ptr)
+        GC.@preserve d @_lock_ios s n=
+            Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t),
+                s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t))
+        iszero(n) && break
+        ptr += n
+        found = (d[ptr - 1] == delim)
+        found && !keep && (ptr -= 1)
+        out.size = max(out.size, ptr - 1)
+        out.append || (out.ptr = ptr)
+        found && break
+        (eof(s) || len == out.maxsize) && break
+        len = min(2len + 64, out.maxsize)
+        ensureroom(out, len)
+        @assert length(out.data) >= len
+    end
+    return out
+end
+
+function copyuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false)
+    @_lock_ios out @_lock_ios s ccall(:ios_copyuntil, Csize_t,
+        (Ptr{Cvoid}, Ptr{Cvoid}, UInt8, Cint), out.ios, s.ios, delim, keep)
+    return out
+end
+
+function readbytes_all!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = lb = length(b)
     nr = 0
     let l = s._dolock, slock = s.lock
@@ -478,9 +533,7 @@ function readbytes_all!(s::IOStream,
     return nr
 end
 
-function readbytes_some!(s::IOStream,
-                         b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                         nb::Integer)
+function readbytes_some!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb::Integer)
     olb = length(b)
     if nb > olb
         resize!(b, nb)
@@ -509,10 +562,7 @@ requested bytes, until an error or end-of-file occurs. If `all` is `false`, at m
 `read` call is performed, and the amount of data returned is device-dependent. Note that not
 all stream types support the `all` option.
 """
-function readbytes!(s::IOStream,
-                    b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
-                    nb=length(b);
-                    all::Bool=true)
+function readbytes!(s::IOStream, b::MutableDenseArrayType{UInt8}, nb=length(b); all::Bool=true)
     return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
 end
 
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 6513e3269a4d7..76222997865c0 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -28,6 +28,9 @@ See also [`AbstractIrrational`](@ref).
 """
 struct Irrational{sym} <: AbstractIrrational end
 
+typemin(::Type{T}) where {T<:Irrational} = T()
+typemax(::Type{T}) where {T<:Irrational} = T()
+
 show(io::IO, x::Irrational{sym}) where {sym} = print(io, sym)
 
 function show(io::IO, ::MIME"text/plain", x::Irrational{sym}) where {sym}
@@ -42,14 +45,22 @@ promote_rule(::Type{<:AbstractIrrational}, ::Type{Float16}) = Float16
 promote_rule(::Type{<:AbstractIrrational}, ::Type{Float32}) = Float32
 promote_rule(::Type{<:AbstractIrrational}, ::Type{<:AbstractIrrational}) = Float64
 promote_rule(::Type{<:AbstractIrrational}, ::Type{T}) where {T<:Real} = promote_type(Float64, T)
-promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number} = promote_type(promote_type(S, real(T)), T)
+
+function promote_rule(::Type{S}, ::Type{T}) where {S<:AbstractIrrational,T<:Number}
+    U = promote_type(S, real(T))
+    if S <: U
+        # prevent infinite recursion
+        promote_type(Float64, T)
+    else
+        promote_type(U, T)
+    end
+end
 
 AbstractFloat(x::AbstractIrrational) = Float64(x)::Float64
 Float16(x::AbstractIrrational) = Float16(Float32(x)::Float32)
 Complex{T}(x::AbstractIrrational) where {T<:Real} = Complex{T}(T(x))
 
-# XXX this may change `DEFAULT_PRECISION`, thus not effect free
-@assume_effects :total function Rational{T}(x::AbstractIrrational) where T<:Integer
+function _irrational_to_rational(::Type{T}, x::AbstractIrrational) where T<:Integer
     o = precision(BigFloat)
     p = 256
     while true
@@ -63,13 +74,16 @@ Complex{T}(x::AbstractIrrational) where {T<:Real} = Complex{T}(T(x))
         p += 32
     end
 end
-Rational{BigInt}(x::AbstractIrrational) = throw(ArgumentError("Cannot convert an AbstractIrrational to a Rational{BigInt}: use rationalize(BigInt, x) instead"))
+Rational{T}(x::AbstractIrrational) where {T<:Integer} = _irrational_to_rational(T, x)
+_throw_argument_error_irrational_to_rational_bigint() = throw(ArgumentError("Cannot convert an AbstractIrrational to a Rational{BigInt}: use rationalize(BigInt, x) instead"))
+Rational{BigInt}(::AbstractIrrational) = _throw_argument_error_irrational_to_rational_bigint()
 
-@assume_effects :total function (t::Type{T})(x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
+function _irrational_to_float(::Type{T}, x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
     setprecision(BigFloat, 256) do
         T(BigFloat(x)::BigFloat, r)
     end
 end
+(::Type{T})(x::AbstractIrrational, r::RoundingMode) where {T<:Union{Float32,Float64}} = _irrational_to_float(T, x, r)
 
 float(::Type{<:AbstractIrrational}) = Float64
 
@@ -107,14 +121,18 @@ end
 <=(x::AbstractFloat, y::AbstractIrrational) = x < y
 
 # Irrational vs Rational
-@assume_effects :total function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where T
+function _rationalize_irrational(::Type{T}, x::AbstractIrrational, tol::Real) where {T<:Integer}
     return rationalize(T, big(x), tol=tol)
 end
-@assume_effects :total function lessrational(rx::Rational{<:Integer}, x::AbstractIrrational)
-    # an @assume_effects :total version of `<` for determining if the rationalization of
-    # an irrational number required rounding up or down
+function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where {T<:Integer}
+    return _rationalize_irrational(T, x, tol)
+end
+function _lessrational(rx::Rational, x::AbstractIrrational)
     return rx < big(x)
 end
+function lessrational(rx::Rational, x::AbstractIrrational)
+    return _lessrational(rx, x)
+end
 function <(x::AbstractIrrational, y::Rational{T}) where T
     T <: Unsigned && x < 0.0 && return true
     rx = rationalize(T, x)
@@ -213,7 +231,7 @@ function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
-        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=MPFR.ROUNDING_MODE[]; precision=precision(BigFloat))
+        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=Rounding.rounding_raw(BigFloat); precision=precision(BigFloat))
             c = BigFloat(;precision=precision)
             ccall(($(string("mpfr_const_", def)), :libmpfr),
                   Cint, (Ref{BigFloat}, MPFR.MPFRRoundingMode), c, r)
diff --git a/base/iterators.jl b/base/iterators.jl
index 11e94d3384de8..c6278e6284d70 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -11,35 +11,28 @@ const Base = parentmodule(@__MODULE__)
 using .Base:
     @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexStyle, AbstractVector, Vector,
     SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
-    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator,
+    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, IdDict,
     AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
-    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing,
+    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, =>, missing,
     any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
-    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
+    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape, LazyString,
+    afoldl
 using Core: @doc
 
-if Base !== Core.Compiler
 using .Base:
     cld, fld, SubArray, view, resize!, IndexCartesian
 using .Base.Checked: checked_mul
-else
-    # Checked.checked_mul is not available during bootstrapping:
-    const checked_mul = *
-end
 
 import .Base:
     first, last,
     isempty, length, size, axes, ndims,
-    eltype, IteratorSize, IteratorEltype,
+    eltype, IteratorSize, IteratorEltype, promote_typejoin,
     haskey, keys, values, pairs,
     getindex, setindex!, get, iterate,
     popfirst!, isdone, peek, intersect
 
-export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap
-
-if Base !== Core.Compiler
-export partition
-end
+export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap, partition
+public accumulate, filter, map, peel, reverse, Stateful
 
 """
     Iterators.map(f, iterators...)
@@ -59,10 +52,7 @@ julia> collect(Iterators.map(x -> x^2, 1:3))
  9
 ```
 """
-map(f, args...) = Base.Generator(f, args...)
-
-tail_if_any(::Tuple{}) = ()
-tail_if_any(x::Tuple) = tail(x)
+map(f, arg, args...) = Base.Generator(f, arg, args...)
 
 _min_length(a, b, ::IsInfinite, ::IsInfinite) = min(length(a),length(b)) # inherit behaviour, error
 _min_length(a, b, A, ::IsInfinite) = length(a)
@@ -281,10 +271,8 @@ pairs(v::Core.SimpleVector) = Pairs(v, LinearIndices(v))
 pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 # pairs(v::Pairs) = v # listed for reference, but already defined from being an AbstractDict
 
-if Base !== Core.Compiler
 pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, Base.CartesianIndices(axes(A)))
 pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
-end
 
 length(v::Pairs) = length(getfield(v, :itr))
 axes(v::Pairs) = axes(getfield(v, :itr))
@@ -387,6 +375,22 @@ function _zip_min_length(is)
     end
 end
 _zip_min_length(is::Tuple{}) = nothing
+
+# For a collection of iterators `is`, returns a tuple (b, n), where
+# `b` is true when every component of `is` has a statically-known finite
+# length and all such lengths are equal. Otherwise, `b` is false.
+# `n` is an implementation detail, and will be the `length` of the first
+# iterator if it is statically-known and finite. Otherwise, `n` is `nothing`.
+function _zip_lengths_finite_equal(is)
+    i = is[1]
+    if IteratorSize(i) isa Union{IsInfinite, SizeUnknown}
+        return (false, nothing)
+    else
+        b, n = _zip_lengths_finite_equal(tail(is))
+        return (b && (n === nothing || n == length(i)), length(i))
+    end
+end
+_zip_lengths_finite_equal(is::Tuple{}) = (true, nothing)
 size(z::Zip) = _promote_tuple_shape(Base.map(size, z.is)...)
 axes(z::Zip) = _promote_tuple_shape(Base.map(axes, z.is)...)
 _promote_tuple_shape((a,)::Tuple{OneTo}, (b,)::Tuple{OneTo}) = (intersect(a, b),)
@@ -468,8 +472,13 @@ zip_iteratoreltype() = HasEltype()
 zip_iteratoreltype(a) = a
 zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail...))
 
-reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
 last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))
+function reverse(z::Zip)
+    if !first(_zip_lengths_finite_equal(z.is))
+        throw(ArgumentError("Cannot reverse zipped iterators of unknown, infinite, or unequal lengths"))
+    end
+    Zip(Base.map(reverse, z.is))
+end
 
 # filter
 
@@ -491,6 +500,15 @@ invocation of `filter`. Calls to `flt` will be made when iterating over the
 returned iterable object. These calls are not cached and repeated calls will be
 made when reiterating.
 
+!!! warning
+    Subsequent *lazy* transformations on the iterator returned from `filter`, such
+    as those performed by `Iterators.reverse` or `cycle`, will also delay calls to `flt`
+    until collecting or iterating over the returned iterable object. If the filter
+    predicate is nondeterministic or its return values depend on the order of iteration
+    over the elements of `itr`, composition with lazy transformations may result in
+    surprising behavior. If this is undesirable, either ensure that `flt` is a pure
+    function or collect intermediate `filter` iterators before further transformations.
+
 See [`Base.filter`](@ref) for an eager implementation of filtering for arrays.
 
 # Examples
@@ -705,7 +723,7 @@ struct Take{I}
     xs::I
     n::Int
     function Take(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Take length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Take length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -764,7 +782,7 @@ struct Drop{I}
     xs::I
     n::Int
     function Drop(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Drop length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Drop length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -923,12 +941,17 @@ struct Cycle{I}
 end
 
 """
-    cycle(iter)
+    cycle(iter[, n::Int])
 
 An iterator that cycles through `iter` forever.
-If `iter` is empty, so is `cycle(iter)`.
+If `n` is specified, then it cycles through `iter` that many times.
+When `iter` is empty, so are `cycle(iter)` and `cycle(iter, n)`.
+
+`Iterators.cycle(iter, n)` is the lazy equivalent of [`Base.repeat`](@ref)`(vector, n)`,
+while [`Iterators.repeated`](@ref)`(iter, n)` is the lazy [`Base.fill`](@ref)`(item, n)`.
 
-See also: [`Iterators.repeated`](@ref), [`Base.repeat`](@ref).
+!!! compat "Julia 1.11"
+    The method `cycle(iter, n)` was added in Julia 1.11.
 
 # Examples
 ```jldoctest
@@ -937,13 +960,23 @@ julia> for (i, v) in enumerate(Iterators.cycle("hello"))
            i > 10 && break
        end
 hellohelloh
+
+julia> foreach(print, Iterators.cycle(['j', 'u', 'l', 'i', 'a'], 3))
+juliajuliajulia
+
+julia> repeat([1,2,3], 4) == collect(Iterators.cycle([1,2,3], 4))
+true
+
+julia> fill([1,2,3], 4) == collect(Iterators.repeated([1,2,3], 4))
+true
 ```
 """
 cycle(xs) = Cycle(xs)
+cycle(xs, n::Integer) = flatten(repeated(xs, n))
 
 eltype(::Type{Cycle{I}}) where {I} = eltype(I)
 IteratorEltype(::Type{Cycle{I}}) where {I} = IteratorEltype(I)
-IteratorSize(::Type{Cycle{I}}) where {I} = IsInfinite()
+IteratorSize(::Type{Cycle{I}}) where {I} = IsInfinite() # XXX: this is false if iterator ever becomes empty
 
 iterate(it::Cycle) = iterate(it.xs)
 isdone(it::Cycle) = isdone(it.xs)
@@ -970,7 +1003,7 @@ repeated(x) = Repeated(x)
 An iterator that generates the value `x` forever. If `n` is specified, generates `x` that
 many times (equivalent to `take(repeated(x), n)`).
 
-See also: [`Iterators.cycle`](@ref), [`Base.repeat`](@ref).
+See also [`fill`](@ref Base.fill), and compare [`Iterators.cycle`](@ref).
 
 # Examples
 ```jldoctest
@@ -982,6 +1015,12 @@ julia> collect(a)
  [1 2]
  [1 2]
  [1 2]
+
+julia> ans == fill([1 2], 4)
+true
+
+julia> Iterators.cycle([1 2], 4) |> collect |> println
+[1, 2, 1, 2, 1, 2, 1, 2]
 ```
 """
 repeated(x, n::Integer) = take(repeated(x), Int(n))
@@ -1048,7 +1087,7 @@ _prod_size(t::Tuple) = (_prod_size1(t[1], IteratorSize(t[1]))..., _prod_size(tai
 _prod_size1(a, ::HasShape)  = size(a)
 _prod_size1(a, ::HasLength) = (length(a),)
 _prod_size1(a, A) =
-    throw(ArgumentError("Cannot compute size for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute size for object of type ", typeof(a))))
 
 axes(P::ProductIterator) = _prod_indices(P.iterators)
 _prod_indices(::Tuple{}) = ()
@@ -1056,7 +1095,7 @@ _prod_indices(t::Tuple) = (_prod_axes1(t[1], IteratorSize(t[1]))..., _prod_indic
 _prod_axes1(a, ::HasShape)  = axes(a)
 _prod_axes1(a, ::HasLength) = (OneTo(length(a)),)
 _prod_axes1(a, A) =
-    throw(ArgumentError("Cannot compute indices for object of type $(typeof(a))"))
+    throw(ArgumentError(LazyString("Cannot compute indices for object of type ", typeof(a))))
 
 ndims(p::ProductIterator) = length(axes(p))
 length(P::ProductIterator) = reduce(checked_mul, size(P); init=1)
@@ -1164,7 +1203,13 @@ julia> [(x,y) for x in 0:1 for y in 'a':'c']  # collects generators involving It
 flatten(itr) = Flatten(itr)
 
 eltype(::Type{Flatten{I}}) where {I} = eltype(eltype(I))
-eltype(::Type{Flatten{Tuple{}}}) = eltype(Tuple{})
+
+# For tuples, we statically know the element type of each index, so we can compute
+# this at compile time.
+function eltype(::Type{Flatten{I}}) where {I<:Union{Tuple,NamedTuple}}
+    afoldl((T, i) -> promote_typejoin(T, eltype(i)), Union{}, fieldtypes(I)...)
+end
+
 IteratorEltype(::Type{Flatten{I}}) where {I} = _flatteneltype(I, IteratorEltype(I))
 IteratorEltype(::Type{Flatten{Tuple{}}}) = IteratorEltype(Tuple{})
 _flatteneltype(I, ::HasEltype) = IteratorEltype(eltype(I))
@@ -1252,7 +1297,6 @@ true
 """
 flatmap(f, c...) = flatten(map(f, c...))
 
-if Base !== Core.Compiler # views are not defined
 @doc """
     partition(collection, n)
 
@@ -1392,43 +1436,30 @@ julia> sum(a) # Sum the remaining elements
 7
 ```
 """
-mutable struct Stateful{T, VS, N<:Integer}
+mutable struct Stateful{T, VS}
     itr::T
     # A bit awkward right now, but adapted to the new iteration protocol
     nextvalstate::Union{VS, Nothing}
-
-    # Number of remaining elements, if itr is HasLength or HasShape.
-    # if not, store -1 - number_of_consumed_elements.
-    # This allows us to defer calculating length until asked for.
-    # See PR #45924
-    remaining::N
     @inline function Stateful{<:Any, Any}(itr::T) where {T}
-        itl = iterlength(itr)
-        new{T, Any, typeof(itl)}(itr, iterate(itr), itl)
+        return new{T, Any}(itr, iterate(itr))
     end
     @inline function Stateful(itr::T) where {T}
         VS = approx_iter_type(T)
-        itl = iterlength(itr)
-        return new{T, VS, typeof(itl)}(itr, iterate(itr)::VS, itl)
+        return new{T, VS}(itr, iterate(itr)::VS)
     end
 end
 
-function iterlength(it)::Signed
-    if IteratorSize(it) isa Union{HasShape, HasLength}
-       return length(it)
-    else
-        -1
-    end
+function reset!(s::Stateful)
+    setfield!(s, :nextvalstate, iterate(s.itr)) # bypass convert call of setproperty!
+    return s
 end
-
-function reset!(s::Stateful{T,VS}, itr::T=s.itr) where {T,VS}
+function reset!(s::Stateful{T}, itr::T) where {T}
     s.itr = itr
-    itl = iterlength(itr)
-    setfield!(s, :nextvalstate, iterate(itr))
-    s.remaining = itl
-    s
+    reset!(s)
+    return s
 end
 
+
 # Try to find an appropriate type for the (value, state tuple),
 # by doing a recursive unrolling of the iteration protocol up to
 # fixpoint.
@@ -1450,7 +1481,6 @@ end
 
 Stateful(x::Stateful) = x
 convert(::Type{Stateful}, itr) = Stateful(itr)
-
 @inline isdone(s::Stateful, st=nothing) = s.nextvalstate === nothing
 
 @inline function popfirst!(s::Stateful)
@@ -1460,8 +1490,6 @@ convert(::Type{Stateful}, itr) = Stateful(itr)
     else
         val, state = vs
         Core.setfield!(s, :nextvalstate, iterate(s.itr, state))
-        rem = s.remaining
-        s.remaining = rem - typeof(rem)(1)
         return val
     end
 end
@@ -1471,22 +1499,10 @@ end
     return ns !== nothing ? ns[1] : sentinel
 end
 @inline iterate(s::Stateful, state=nothing) = s.nextvalstate === nothing ? nothing : (popfirst!(s), nothing)
-IteratorSize(::Type{<:Stateful{T}}) where {T} = IteratorSize(T) isa HasShape ? HasLength() : IteratorSize(T)
+IteratorSize(::Type{<:Stateful{T}}) where {T} = IteratorSize(T) isa IsInfinite ? IsInfinite() : SizeUnknown()
 eltype(::Type{<:Stateful{T}}) where {T} = eltype(T)
 IteratorEltype(::Type{<:Stateful{T}}) where {T} = IteratorEltype(T)
 
-function length(s::Stateful)
-    rem = s.remaining
-    # If rem is actually remaining length, return it.
-    # else, rem is number of consumed elements.
-    if rem >= 0
-        rem
-    else
-        length(s.itr) - (typeof(rem)(1) - rem)
-    end
-end
-end # if statement several hundred lines above
-
 """
     only(x)
 
@@ -1517,7 +1533,9 @@ Stacktrace:
 [...]
 ```
 """
-@propagate_inbounds function only(x)
+@propagate_inbounds only(x) = _only(x, iterate)
+
+@propagate_inbounds function _only(x, ::typeof(iterate))
     i = iterate(x)
     @boundscheck if i === nothing
         throw(ArgumentError("Collection is empty, must contain exactly 1 element"))
@@ -1529,18 +1547,58 @@ Stacktrace:
     return ret
 end
 
-# Collections of known size
-only(x::Ref) = x[]
-only(x::Number) = x
-only(x::Char) = x
+@inline function _only(x, ::typeof(first))
+    @boundscheck if length(x) != 1
+        throw(ArgumentError("Collection must contain exactly 1 element"))
+    end
+    @inbounds first(x)
+end
+
+@propagate_inbounds only(x::IdDict) = _only(x, first)
+
+# Specific error messages for tuples and named tuples
 only(x::Tuple{Any}) = x[1]
 only(x::Tuple) = throw(
     ArgumentError("Tuple contains $(length(x)) elements, must contain exactly 1 element")
 )
-only(a::AbstractArray{<:Any, 0}) = @inbounds return a[]
 only(x::NamedTuple{<:Any, <:Tuple{Any}}) = first(x)
 only(x::NamedTuple) = throw(
     ArgumentError("NamedTuple contains $(length(x)) elements, must contain exactly 1 element")
 )
 
+"""
+    IterableStatePairs(x)
+
+This internal type is returned by [`pairs`](@ref), when the key is the same as
+the state of `iterate`. This allows the iterator to determine the key => value
+pairs by only calling iterate on the values.
+
+"""
+struct IterableStatePairs{T}
+    x::T
+end
+
+IteratorSize(::Type{<:IterableStatePairs{T}}) where T = IteratorSize(T)
+length(x::IterableStatePairs) = length(x.x)
+Base.eltype(::Type{IterableStatePairs{T}}) where T = Pair{<:Any, eltype(T)}
+
+function iterate(x::IterableStatePairs, state=first(keys(x.x)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+reverse(x::IterableStatePairs) = IterableStatePairs(Iterators.reverse(x.x))
+reverse(x::IterableStatePairs{<:Iterators.Reverse}) = IterableStatePairs(x.x.itr)
+
+function iterate(x::IterableStatePairs{<:Iterators.Reverse}, state=last(keys(x.x.itr)))
+    it = iterate(x.x, state)
+    it === nothing && return nothing
+    (state => first(it), last(it))
+end
+
+# According to the docs of iterate(::AbstractString), the iteration state must
+# be the same as the keys, so this is a valid optimization (see #51631)
+pairs(s::AbstractString) = IterableStatePairs(s)
+
 end
diff --git a/base/libc.jl b/base/libc.jl
index 99e8dce6b87e5..7364f6e6677fe 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -6,18 +6,18 @@ Interface to libc, the C standard library.
 """ Libc
 
 import Base: transcode, windowserror, show
-# these need to be defined seperately for bootstrapping but belong to Libc
+# these need to be defined separately for bootstrapping but belong to Libc
 import Base: memcpy, memmove, memset, memcmp
 import Core.Intrinsics: bitcast
 
 export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, memcpy,
     memmove, memset, calloc, realloc, errno, strerror, flush_cstdio, systemsleep, time,
-    transcode
+    transcode, mkfifo
 if Sys.iswindows()
     export GetLastError, FormatMessage
 end
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
+include(string(Base.BUILDROOT, "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
 
 ## RawFD ##
 
@@ -36,15 +36,27 @@ RawFD(fd::Integer) = bitcast(RawFD, Cint(fd))
 RawFD(fd::RawFD) = fd
 Base.cconvert(::Type{Cint}, fd::RawFD) = bitcast(Cint, fd)
 
+"""
+    dup(src::RawFD[, target::RawFD])::RawFD
+
+Duplicate the file descriptor `src` so that the duplicate refers to the same OS
+resource (e.g. a file or socket). A `target` file descriptor may be optionally
+be passed to use for the new duplicate.
+"""
 dup(x::RawFD) = ccall((@static Sys.iswindows() ? :_dup : :dup), RawFD, (RawFD,), x)
 dup(src::RawFD, target::RawFD) = systemerror("dup", -1 ==
     ccall((@static Sys.iswindows() ? :_dup2 : :dup2), Int32,
                 (RawFD, RawFD), src, target))
 
-show(io::IO, fd::RawFD) = print(io, "RawFD(", bitcast(UInt32, fd), ')')  # avoids invalidation via show_default
+show(io::IO, fd::RawFD) = print(io, "RawFD(", bitcast(Int32, fd), ')')  # avoids invalidation via show_default
 
 # Wrapper for an OS file descriptor (for Windows)
 if Sys.iswindows()
+    @doc """
+        WindowsRawSocket
+
+    Primitive type which wraps the native Windows file `HANDLE`.
+    """
     primitive type WindowsRawSocket sizeof(Ptr) * 8 end # On Windows file descriptors are HANDLE's and 64-bit on 64-bit Windows
     WindowsRawSocket(handle::Ptr{Cvoid}) = bitcast(WindowsRawSocket, handle)
     WindowsRawSocket(handle::WindowsRawSocket) = handle
@@ -75,6 +87,34 @@ end
 
 ## FILE (not auto-finalized) ##
 
+"""
+    FILE(::Ptr)
+    FILE(::IO)
+
+A libc `FILE*`, representing an opened file.
+
+It can be passed as a `Ptr{FILE}` argument to [`ccall`](@ref) and also supports
+[`seek`](@ref), [`position`](@ref) and [`close`](@ref).
+
+A `FILE` can be constructed from an ordinary `IO` object, provided it is an open file. It
+must be closed afterward.
+
+# Examples
+```jldoctest
+julia> using Base.Libc
+
+julia> mktemp() do _, io
+           # write to the temporary file using `puts(char*, FILE*)` from libc
+           file = FILE(io)
+           ccall(:fputs, Cint, (Cstring, Ptr{FILE}), "hello world", file)
+           close(file)
+           # read the file again
+           seek(io, 0)
+           read(io, String)
+       end
+"hello world"
+```
+"""
 struct FILE
     ptr::Ptr{Cvoid}
 end
@@ -409,6 +449,33 @@ function srand(seed::Integer=_make_uint64_seed())
     ccall(:jl_srand, Cvoid, (UInt64,), seed % UInt64)
 end
 
+"""
+    mkfifo(path::AbstractString, [mode::Integer]) -> path
+
+Make a FIFO special file (a named pipe) at `path`.  Return `path` as-is on success.
+
+`mkfifo` is supported only in Unix platforms.
+
+!!! compat "Julia 1.11"
+    `mkfifo` requires at least Julia 1.11.
+"""
+function mkfifo(
+    path::AbstractString,
+    mode::Integer = Base.S_IRUSR | Base.S_IWUSR | Base.S_IRGRP | Base.S_IWGRP |
+                    Base.S_IROTH | Base.S_IWOTH,
+)
+    @static if Sys.isunix()
+        # Default `mode` is compatible with `mkfifo` CLI in coreutils.
+        ret = ccall(:mkfifo, Cint, (Cstring, Base.Cmode_t), path, mode)
+        systemerror("mkfifo", ret == -1)
+        return path
+    else
+        # Using normal `error` because `systemerror("mkfifo", ENOTSUP)` does not
+        # seem to work on Windows.
+        error("mkfifo: Operation not supported")
+    end
+end
+
 struct Cpasswd
    username::Cstring
    uid::Culong
@@ -438,6 +505,26 @@ struct Group
     mem::Vector{String}
 end
 
+# Gets password-file entry for default user, or a subset thereof
+# (e.g., uid and guid are set to -1 on Windows)
+function getpw()
+    ref_pd = Ref(Cpasswd())
+    ret = ccall(:uv_os_get_passwd, Cint, (Ref{Cpasswd},), ref_pd)
+    Base.uv_error("getpw", ret)
+
+    pd = ref_pd[]
+    pd = Passwd(
+        pd.username == C_NULL ? "" : unsafe_string(pd.username),
+        pd.uid,
+        pd.gid,
+        pd.shell == C_NULL ? "" : unsafe_string(pd.shell),
+        pd.homedir == C_NULL ? "" : unsafe_string(pd.homedir),
+        pd.gecos == C_NULL ? "" : unsafe_string(pd.gecos),
+    )
+    ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
+    return pd
+end
+
 function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ref_pd = Ref(Cpasswd())
     ret = ccall(:uv_os_get_passwd2, Cint, (Ref{Cpasswd}, Culong), ref_pd, uid)
@@ -457,6 +544,7 @@ function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
     return pd
 end
+
 function getgrgid(gid::Unsigned, throw_error::Bool=true)
     ref_gp = Ref(Cgroup())
     ret = ccall(:uv_os_get_group, Cint, (Ref{Cgroup}, Culong), ref_gp, gid)
@@ -487,6 +575,5 @@ geteuid() = ccall(:jl_geteuid, Culong, ())
 
 # Include dlopen()/dlpath() code
 include("libdl.jl")
-using .Libdl
 
 end # module
diff --git a/base/libdl.jl b/base/libdl.jl
index fdf6103d1800b..199d847572ca4 100644
--- a/base/libdl.jl
+++ b/base/libdl.jl
@@ -9,7 +9,7 @@ import Base.DL_LOAD_PATH
 
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
 
 """
     DL_LOAD_PATH
@@ -45,6 +45,9 @@ applicable.
 """
 (RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW)
 
+# The default flags for `dlopen()`
+const default_rtld_flags = RTLD_LAZY | RTLD_DEEPBIND
+
 """
     dlsym(handle, sym; throw_error::Bool = true)
 
@@ -72,8 +75,8 @@ end
 Look up a symbol from a shared library handle, silently return `C_NULL` on lookup failure.
 This method is now deprecated in favor of `dlsym(handle, sym; throw_error=false)`.
 """
-function dlsym_e(hnd::Ptr, s::Union{Symbol,AbstractString})
-    return something(dlsym(hnd, s; throw_error=false), C_NULL)
+function dlsym_e(args...)
+    return something(dlsym(args...; throw_error=false), C_NULL)
 end
 
 """
@@ -110,12 +113,12 @@ If the library cannot be found, this method throws an error, unless the keyword
 """
 function dlopen end
 
-dlopen(s::Symbol, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; kwargs...) =
+dlopen(s::Symbol, flags::Integer = default_rtld_flags; kwargs...) =
     dlopen(string(s), flags; kwargs...)
 
-function dlopen(s::AbstractString, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; throw_error::Bool = true)
+function dlopen(s::AbstractString, flags::Integer = default_rtld_flags; throw_error::Bool = true)
     ret = ccall(:jl_load_dynamic_library, Ptr{Cvoid}, (Cstring,UInt32,Cint), s, flags, Cint(throw_error))
-    if ret == C_NULL
+    if !throw_error && ret == C_NULL
         return nothing
     end
     return ret
@@ -127,7 +130,7 @@ end
 Wrapper for usage with `do` blocks to automatically close the dynamic library once
 control flow leaves the `do` block scope.
 
-# Example
+# Examples
 ```julia
 vendor = dlopen("libblas") do lib
     if Libdl.dlsym(lib, :openblas_set_num_threads; throw_error=false) !== nothing
@@ -138,10 +141,10 @@ vendor = dlopen("libblas") do lib
 end
 ```
 """
-function dlopen(f::Function, args...; kwargs...)
+function dlopen(f::Function, name, args...; kwargs...)
     hdl = nothing
     try
-        hdl = dlopen(args...; kwargs...)
+        hdl = dlopen(name, args...; kwargs...)
         f(hdl)
     finally
         dlclose(hdl)
@@ -231,7 +234,7 @@ end
 
 Get the full path of the library `libname`.
 
-# Example
+# Examples
 ```julia-repl
 julia> dlpath("libjulia")
 ```
@@ -314,4 +317,135 @@ function dllist()
     return dynamic_libraries
 end
 
+
+"""
+    LazyLibraryPath
+
+Helper type for lazily constructed library paths for use with `LazyLibrary`.
+Arguments are passed to `joinpath()`.  Arguments must be able to have
+`string()` called on them.
+
+```
+libfoo = LazyLibrary(LazyLibraryPath(prefix, "lib/libfoo.so.1.2.3"))
+```
+"""
+struct LazyLibraryPath
+    pieces::Vector
+    LazyLibraryPath(pieces::Vector) = new(pieces)
+end
+LazyLibraryPath(args...) = LazyLibraryPath(collect(args))
+Base.string(llp::LazyLibraryPath) = joinpath(string.(llp.pieces)...)::String
+Base.cconvert(::Type{Cstring}, llp::LazyLibraryPath) = Base.cconvert(Cstring, string(llp))
+# Define `print` so that we can wrap this in a `LazyString`
+Base.print(io::IO, llp::LazyLibraryPath) = print(io, string(llp))
+
+# Helper to get `Sys.BINDIR` at runtime
+struct SysBindirGetter; end
+Base.string(::SysBindirGetter) = dirname(Sys.BINDIR)
+
+"""
+    BundledLazyLibraryPath
+
+Helper type for lazily constructed library paths that are stored within the
+bundled Julia distribution, primarily for use by Base modules.
+
+```
+libfoo = LazyLibrary(BundledLazyLibraryPath("lib/libfoo.so.1.2.3"))
+```
+"""
+BundledLazyLibraryPath(subpath) = LazyLibraryPath(SysBindirGetter(), subpath)
+
+
+"""
+    LazyLibrary(name, flags = <default dlopen flags>,
+                dependencies = LazyLibrary[], on_load_callback = nothing)
+
+Represents a lazily-loaded library that opens itself and its dependencies on first usage
+in a `dlopen()`, `dlsym()`, or `ccall()` usage.  While this structure contains the
+ability to run arbitrary code on first load via `on_load_callback`, we caution that this
+should be used sparingly, as it is not expected that `ccall()` should result in large
+amounts of Julia code being run.  You may call `ccall()` from within the
+`on_load_callback` but only for the current library and its dependencies, and user should
+not call `wait()` on any tasks within the on load callback.
+"""
+mutable struct LazyLibrary
+    # Name and flags to open with
+    const path
+    const flags::UInt32
+
+    # Dependencies that must be loaded before we can load
+    dependencies::Vector{LazyLibrary}
+
+    # Function that get called once upon initial load
+    on_load_callback
+    const lock::Base.ReentrantLock
+
+    # Pointer that we eventually fill out upon first `dlopen()`
+    @atomic handle::Ptr{Cvoid}
+    function LazyLibrary(path; flags = default_rtld_flags, dependencies = LazyLibrary[],
+                         on_load_callback = nothing)
+        return new(
+            path,
+            UInt32(flags),
+            collect(dependencies),
+            on_load_callback,
+            Base.ReentrantLock(),
+            C_NULL,
+        )
+    end
+end
+
+# We support adding dependencies only because of very special situations
+# such as LBT needing to have OpenBLAS_jll added as a dependency dynamically.
+function add_dependency!(ll::LazyLibrary, dep::LazyLibrary)
+    @lock ll.lock begin
+        push!(ll.dependencies, dep)
+    end
+end
+
+# Register `jl_libdl_dlopen_func` so that `ccall()` lowering knows
+# how to call `dlopen()`, during bootstrap.
+# See  `post_image_load_hooks` for non-bootstrapping.
+Base.unsafe_store!(cglobal(:jl_libdl_dlopen_func, Any), dlopen)
+
+function dlopen(ll::LazyLibrary, flags::Integer = ll.flags; kwargs...)
+    handle = @atomic :acquire ll.handle
+    if handle == C_NULL
+        @lock ll.lock begin
+            # Check to see if another thread has already run this
+            if ll.handle == C_NULL
+                # Ensure that all dependencies are loaded
+                for dep in ll.dependencies
+                    dlopen(dep; kwargs...)
+                end
+
+                # Load our library
+                handle = dlopen(string(ll.path), flags; kwargs...)
+                @atomic :release ll.handle = handle
+
+                # Only the thread that loaded the library calls the `on_load_callback()`.
+                if ll.on_load_callback !== nothing
+                    ll.on_load_callback()
+                end
+            end
+        end
+    else
+        # Invoke our on load callback, if it exists
+        if ll.on_load_callback !== nothing
+            # This empty lock protects against the case where we have updated
+            # `ll.handle` in the branch above, but not exited the lock.  We want
+            # a second thread that comes in at just the wrong time to have to wait
+            # for that lock to be released (and thus for the on_load_callback to
+            # have finished), hence the empty lock here. But we want the
+            # on_load_callback thread to bypass this, which will be happen thanks
+            # to the fact that we're using a reentrant lock here.
+            @lock ll.lock begin end
+        end
+    end
+
+    return handle
+end
+dlopen(x::Any) = throw(TypeError(:dlopen, "", Union{Symbol,String,LazyLibrary}, x))
+dlsym(ll::LazyLibrary, args...; kwargs...) = dlsym(dlopen(ll), args...; kwargs...)
+dlpath(ll::LazyLibrary) = dlpath(dlopen(ll))
 end # module Libdl
diff --git a/base/libuv.jl b/base/libuv.jl
index 24a04f5bcad78..306854e9f4436 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -2,7 +2,7 @@
 
 # Core definitions for interacting with the libuv library from Julia
 
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
+include(string(Base.BUILDROOT, "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
 
 # convert UV handle data to julia object, checking for null
 function uv_sizeof_handle(handle)
@@ -26,10 +26,10 @@ for r in uv_req_types
 @eval const $(Symbol("_sizeof_", lowercase(string(r)))) = uv_sizeof_req($r)
 end
 
-uv_handle_data(handle) = ccall(:jl_uv_handle_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_data(handle) = ccall(:jl_uv_req_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_set_data(req, data) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
-uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
+uv_handle_data(handle) = ccall(:uv_handle_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_data(handle) = ccall(:uv_req_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_set_data(req, data) = ccall(:uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
+uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:uv_handle_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
 
 macro handle_as(hand, typ)
     return quote
@@ -82,7 +82,13 @@ struct IOError <: Exception
     IOError(msg::AbstractString, code::Integer) = new(msg, code)
 end
 
-showerror(io::IO, e::IOError) = print(io, "IOError: ", e.msg)
+function showerror(io::IO, e::IOError)
+    print(io, "IOError: ", e.msg)
+    if e.code == UV_ENOENT && '~' in e.msg
+        print(io, "\nMany shells expand '~' to the home directory in unquoted strings. To replicate this behavior, call",
+                  " `expanduser` to expand the '~' character to the user’s home directory.")
+    end
+end
 
 function _UVError(pfx::AbstractString, code::Integer)
     code = Int32(code)
@@ -103,8 +109,17 @@ uv_error(prefix::AbstractString, c::Integer) = c < 0 ? throw(_UVError(prefix, c)
 
 eventloop() = ccall(:jl_global_event_loop, Ptr{Cvoid}, ())
 
-uv_unref(h::Ptr{Cvoid}) = ccall(:uv_unref, Cvoid, (Ptr{Cvoid},), h)
-uv_ref(h::Ptr{Cvoid}) = ccall(:uv_ref, Cvoid, (Ptr{Cvoid},), h)
+function uv_unref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_unref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
+
+function uv_ref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_ref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
 
 function process_events()
     return ccall(:jl_process_events, Int32, ())
@@ -118,18 +133,21 @@ function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
 
-function reinit_stdio()
-    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))
-    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))
-    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))
+reinit_stdio() = _reinit_stdio()
+# we need this so it can be called by codegen to print errors, even after
+# reinit_stdio has been redefined by the juliac build script.
+function _reinit_stdio()
+    global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO
+    global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO
+    global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO
     opts = JLOptions()
-    if opts.color != 0
-        have_color = (opts.color == 1)
+    color = colored_text(opts)
+    if !isnothing(color)
         if !isa(stdout, TTY)
-            global stdout = IOContext(stdout, :color => have_color)
+            global stdout = IOContext(stdout, :color => color::Bool)
         end
         if !isa(stderr, TTY)
-            global stderr = IOContext(stderr, :color => have_color)
+            global stderr = IOContext(stderr, :color => color::Bool)
         end
     end
     nothing
diff --git a/base/linking.jl b/base/linking.jl
index fd21ce74c9268..953d80c82cc42 100644
--- a/base/linking.jl
+++ b/base/linking.jl
@@ -79,7 +79,7 @@ end
 const VERBOSE = Ref{Bool}(false)
 
 function __init__()
-    VERBOSE[] = Base.get_bool_env("JULIA_VERBOSE_LINKING", false)
+    VERBOSE[] = something(Base.get_bool_env("JULIA_VERBOSE_LINKING", false), false)
 
     __init_lld_path()
     __init_dsymutil_path()
@@ -110,7 +110,7 @@ function ld()
         # LLD supports mingw style linking
         flavor = "gnu"
         m = Sys.ARCH == :x86_64 ? "i386pep" : "i386pe"
-        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition`
+        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition --disable-auto-import --disable-runtime-pseudo-reloc`
     elseif Sys.isapple()
         flavor = "darwin"
         arch = Sys.ARCH == :aarch64 ? :arm64 : Sys.ARCH
@@ -150,16 +150,16 @@ else
 end
 
 function link_image_cmd(path, out)
-    LIBDIR = "-L$(libdir())"
     PRIVATE_LIBDIR = "-L$(private_libdir())"
     SHLIBDIR = "-L$(shlibdir())"
-    LIBS = is_debug() ? ("-ljulia-debug", "-ljulia-internal-debug") : ("-ljulia", "-ljulia-internal")
+    LIBS = is_debug() ? ("-ljulia-debug", "-ljulia-internal-debug") :
+                        ("-ljulia", "-ljulia-internal")
     @static if Sys.iswindows()
         LIBS = (LIBS..., "-lopenlibm", "-lssp", "-lgcc_s", "-lgcc", "-lmsvcrt")
     end
 
     V = VERBOSE[] ? "--verbose" : ""
-    `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $LIBDIR $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
+    `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
 end
 
 function link_image(path, out, internal_stderr::IO=stderr, internal_stdout::IO=stdout)
diff --git a/base/loading.jl b/base/loading.jl
index 1ea4412ecc68f..4193aae13b96a 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -256,19 +256,22 @@ struct LoadingCache
     env_project_file::Dict{String, Union{Bool, String}}
     project_file_manifest_path::Dict{String, Union{Nothing, String}}
     require_parsed::Set{String}
-    identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
-    identified::Dict{String, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
-    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{Union{String, Nothing}, Union{String, Nothing}}, Nothing}}
+    identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, String}}}
+    identified::Dict{String, Union{Nothing, Tuple{PkgId, String}}}
+    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{String, String}, Nothing}}
 end
 const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
 LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
 
 
-struct TOMLCache
-    p::TOML.Parser
+struct TOMLCache{Dates}
+    p::TOML.Parser{Dates}
     d::Dict{String, CachedTOMLDict}
 end
-const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}())
+TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d))
+
+const TOML_CACHE = TOMLCache(TOML.Parser{nothing}())
 
 parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
@@ -298,30 +301,45 @@ end
 ## package identification: determine unique identity of package to be loaded ##
 
 # Used by Pkg but not used in loading itself
-function find_package(arg)
+function find_package(arg) # ::Union{Nothing,String}
     pkgenv = identify_package_env(arg)
     pkgenv === nothing && return nothing
     pkg, env = pkgenv
     return locate_package(pkg, env)
 end
 
+# is there a better/faster ground truth?
+function is_stdlib(pkgid::PkgId)
+    pkgid.name in readdir(Sys.STDLIB) || return false
+    stdlib_root = joinpath(Sys.STDLIB, pkgid.name)
+    project_file = locate_project_file(stdlib_root)
+    if project_file isa String
+        d = parsed_toml(project_file)
+        uuid = get(d, "uuid", nothing)
+        if uuid !== nothing
+            return UUID(uuid) == pkgid.uuid
+        end
+    end
+    return false
+end
+
 """
     Base.identify_package_env(name::String)::Union{Tuple{PkgId, String}, Nothing}
-    Base.identify_package_env(where::Union{Module,PkgId}, name::String)::Union{Tuple{PkgId, String} Nothing}
+    Base.identify_package_env(where::Union{Module,PkgId}, name::String)::Union{Tuple{PkgId, Union{String, Nothing}}, Nothing}
 
 Same as [`Base.identify_package`](@ref) except that the path to the environment where the package is identified
-is also returned.
+is also returned, except when the identity is not identified.
 """
 identify_package_env(where::Module, name::String) = identify_package_env(PkgId(where), name)
 function identify_package_env(where::PkgId, name::String)
     cache = LOADING_CACHE[]
     if cache !== nothing
-        pkg_env = get(cache.identified_where, (where, name), nothing)
-        pkg_env === nothing || return pkg_env
+        pkg_env = get(cache.identified_where, (where, name), missing)
+        pkg_env === missing || return pkg_env
     end
     pkg_env = nothing
     if where.name === name
-        pkg_env = where, nothing
+        return (where, nothing)
     elseif where.uuid === nothing
         pkg_env = identify_package_env(name) # ignore `where`
     else
@@ -333,6 +351,12 @@ function identify_package_env(where::PkgId, name::String)
             end
             break # found in implicit environment--return "not found"
         end
+        if pkg_env === nothing && is_stdlib(where)
+            # if not found it could be that manifests are from a different julia version/commit
+            # where stdlib dependencies have changed, so look up deps based on the stdlib Project.toml
+            # as a fallback
+            pkg_env = identify_stdlib_project_dep(where, name)
+        end
     end
     if cache !== nothing
         cache.identified_where[(where, name)] = pkg_env
@@ -342,8 +366,8 @@ end
 function identify_package_env(name::String)
     cache = LOADING_CACHE[]
     if cache !== nothing
-        pkg_env = get(cache.identified, name, nothing)
-        pkg_env === nothing || return pkg_env
+        pkg_env = get(cache.identified, name, missing)
+        pkg_env === missing || return pkg_env
     end
     pkg_env = nothing
     for env in load_path()
@@ -359,6 +383,22 @@ function identify_package_env(name::String)
     return pkg_env
 end
 
+function identify_stdlib_project_dep(stdlib::PkgId, depname::String)
+    @debug """
+    Stdlib $(repr("text/plain", stdlib)) is trying to load `$depname`
+    which is not listed as a dep in the load path manifests, so resorting to search
+    in the stdlib Project.tomls for true deps"""
+    stdlib_projfile = locate_project_file(joinpath(Sys.STDLIB, stdlib.name))
+    stdlib_projfile === nothing && return nothing
+    found = explicit_project_deps_get(stdlib_projfile, depname)
+    if found !== nothing
+        @debug "$(repr("text/plain", stdlib)) indeed depends on $depname in project $stdlib_projfile"
+        pkgid = PkgId(found, depname)
+        return pkgid, stdlib_projfile
+    end
+    return nothing
+end
+
 _nothing_or_first(x) = x === nothing ? nothing : first(x)
 
 """
@@ -371,7 +411,7 @@ its `PkgId`, or `nothing` if it cannot be found.
 If only the `name` argument is provided, it searches each environment in the
 stack and its named direct dependencies.
 
-There `where` argument provides the context from where to search for the
+The `where` argument provides the context from where to search for the
 package: in this case it first checks if the name matches the context itself,
 otherwise it searches all recursive dependencies (from the resolved manifest of
 each environment) until it locates the context `where`, and from there
@@ -390,17 +430,16 @@ identify_package(where::Module, name::String) = _nothing_or_first(identify_packa
 identify_package(where::PkgId, name::String)  = _nothing_or_first(identify_package_env(where, name))
 identify_package(name::String)                = _nothing_or_first(identify_package_env(name))
 
-function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
+function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,Tuple{String,String}}
     cache = LOADING_CACHE[]
     if cache !== nothing
-        pathenv = get(cache.located, (pkg, stopenv), nothing)
-        pathenv === nothing || return pathenv
+        pathenv = get(cache.located, (pkg, stopenv), missing)
+        pathenv === missing || return pathenv
     end
     path = nothing
     env′ = nothing
     if pkg.uuid === nothing
         for env in load_path()
-            env′ = env
             # look for the toplevel pkg `pkg.name` in this entry
             found = project_deps_get(env, pkg.name)
             if found !== nothing
@@ -410,6 +449,7 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
                     # return the path the entry point for the code, if it could be found
                     # otherwise, signal failure
                     path = implicit_manifest_uuid_path(env, pkg)
+                    env′ = env
                     @goto done
                 end
             end
@@ -419,7 +459,6 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
         end
     else
         for env in load_path()
-            env′ = env
             path = manifest_uuid_path(env, pkg)
             # missing is used as a sentinel to stop looking further down in envs
             if path === missing
@@ -427,7 +466,7 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
                 @goto done
             end
             if path !== nothing
-                path = entry_path(path, pkg.name)
+                env′ = env
                 @goto done
             end
             if !(loading_extension || precompiling_extension)
@@ -438,15 +477,20 @@ function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
         # e.g. if they have been explicitly added to the project/manifest
         mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
         if mbypath isa String
-            path = entry_path(mbypath, pkg.name)
+            path = mbypath
+            env′ = Sys.STDLIB
             @goto done
         end
     end
     @label done
+    if path !== nothing && !isfile_casesensitive(path)
+        path = nothing
+    end
     if cache !== nothing
-        cache.located[(pkg, stopenv)] = path, env′
+        cache.located[(pkg, stopenv)] = path === nothing ? nothing : (path, something(env′))
     end
-    return path, env′
+    path === nothing && return nothing
+    return path, something(env′)
 end
 
 """
@@ -475,11 +519,12 @@ or `nothing` if `m` was not imported from a package.
 
 Use [`dirname`](@ref) to get the directory part and [`basename`](@ref)
 to get the file name part of the path.
+
+See also [`pkgdir`](@ref).
 """
 function pathof(m::Module)
     @lock require_lock begin
-    pkgid = get(module_keys, m, nothing)
-    pkgid === nothing && return nothing
+    pkgid = PkgId(m)
     origin = get(pkgorigins, pkgid, nothing)
     origin === nothing && return nothing
     path = origin.path
@@ -499,6 +544,8 @@ package root.
 To get the root directory of the package that implements the current module
 the form `pkgdir(@__MODULE__)` can be used.
 
+If an extension module is given, the root of the parent package is returned.
+
 ```julia-repl
 julia> pkgdir(Foo)
 "/path/to/Foo.jl"
@@ -507,6 +554,8 @@ julia> pkgdir(Foo, "src", "file.jl")
 "/path/to/Foo.jl/src/file.jl"
 ```
 
+See also [`pathof`](@ref).
+
 !!! compat "Julia 1.7"
     The optional argument `paths` requires at least Julia 1.7.
 """
@@ -514,7 +563,19 @@ function pkgdir(m::Module, paths::String...)
     rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return joinpath(dirname(dirname(path)), paths...)
+    original = path
+    path, base = splitdir(dirname(path))
+    if base == "src"
+        # package source in `../src/Foo.jl`
+    elseif base == "ext"
+        # extension source in `../ext/FooExt.jl`
+    elseif basename(path) == "ext"
+        # extension source in `../ext/FooExt/FooExt.jl`
+        path = dirname(path)
+    else
+        error("Unexpected path structure for module source: $original")
+    end
+    return joinpath(path, paths...)
 end
 
 function get_pkgversion_from_path(path)
@@ -562,7 +623,12 @@ end
 ## generic project & manifest API ##
 
 const project_names = ("JuliaProject.toml", "Project.toml")
-const manifest_names = ("JuliaManifest.toml", "Manifest.toml")
+const manifest_names = (
+    "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "JuliaManifest.toml",
+    "Manifest.toml",
+)
 const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml")
 
 function locate_project_file(env::String)
@@ -600,6 +666,23 @@ function env_project_file(env::String)::Union{Bool,String}
     end
 end
 
+function base_project(project_file)
+    base_dir = abspath(joinpath(dirname(project_file), ".."))
+    base_project_file = env_project_file(base_dir)
+    base_project_file isa String || return nothing
+    d = parsed_toml(base_project_file)
+    workspace = get(d, "workspace", nothing)::Union{Dict{String, Any}, Nothing}
+    if workspace === nothing
+        return nothing
+    end
+    projects = get(workspace, "projects", nothing)::Union{Vector{String}, Nothing, String}
+    projects === nothing && return nothing
+    if projects isa Vector && basename(dirname(project_file)) in projects
+        return base_project_file
+    end
+    return nothing
+end
+
 function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
     project_file = env_project_file(env)
     if project_file isa String
@@ -611,21 +694,27 @@ function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
     return nothing
 end
 
+function package_get(project_file, where::PkgId, name::String)
+    proj = project_file_name_uuid(project_file, where.name)
+    if proj == where
+        # if `where` matches the project, use [deps] section as manifest, and stop searching
+        pkg_uuid = explicit_project_deps_get(project_file, name)
+        return PkgId(pkg_uuid, name)
+    end
+    return nothing
+end
+
 function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     uuid = where.uuid
     @assert uuid !== nothing
     project_file = env_project_file(env)
     if project_file isa String
-        # first check if `where` names the Project itself
-        proj = project_file_name_uuid(project_file, where.name)
-        if proj == where
-            # if `where` matches the project, use [deps] section as manifest, and stop searching
-            pkg_uuid = explicit_project_deps_get(project_file, name)
-            return PkgId(pkg_uuid, name)
-        end
+        pkg = package_get(project_file, where, name)
+        pkg === nothing || return pkg
         d = parsed_toml(project_file)
         exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
         if exts !== nothing
+            proj = project_file_name_uuid(project_file, where.name)
             # Check if `where` is an extension of the project
             if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name)
                 # Extensions can load weak deps...
@@ -656,15 +745,29 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi
         proj = project_file_name_uuid(project_file, pkg.name)
         if proj == pkg
             # if `pkg` matches the project, return the project itself
-            return project_file_path(project_file)
+            return project_file_path(project_file, pkg.name)
         end
-        mby_ext = project_file_ext_path(project_file, pkg.name)
+        mby_ext = project_file_ext_path(project_file, pkg)
         mby_ext === nothing || return mby_ext
         # look for manifest file and `where` stanza
         return explicit_manifest_uuid_path(project_file, pkg)
     elseif project_file
         # if env names a directory, search it
-        return implicit_manifest_uuid_path(env, pkg)
+        proj = implicit_manifest_uuid_path(env, pkg)
+        proj === nothing || return proj
+        # if not found
+        triggers = get(EXT_PRIMED, pkg, nothing)
+        if triggers !== nothing
+            parentid = triggers[1]
+            _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+            if parent_project_file !== nothing
+                parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                if parentproj == parentid
+                    mby_ext = project_file_ext_path(parent_project_file, pkg)
+                    mby_ext === nothing || return mby_ext
+                end
+            end
+        end
     end
     return nothing
 end
@@ -676,13 +779,13 @@ function find_ext_path(project_path::String, extname::String)
     return joinpath(project_path, "ext", extname * ".jl")
 end
 
-function project_file_ext_path(project_file::String, name::String)
+function project_file_ext_path(project_file::String, ext::PkgId)
     d = parsed_toml(project_file)
-    p = project_file_path(project_file)
+    p = dirname(project_file)
     exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
     if exts !== nothing
-        if name in keys(exts)
-            return find_ext_path(p, name)
+        if ext.name in keys(exts) && ext.uuid == uuid5(UUID(d["uuid"]::String), ext.name)
+            return find_ext_path(p, ext.name)
         end
     end
     return nothing
@@ -697,9 +800,22 @@ function project_file_name_uuid(project_file::String, name::String)::PkgId
     return PkgId(uuid, name)
 end
 
-function project_file_path(project_file::String)
+function project_file_path(project_file::String, name::String)
     d = parsed_toml(project_file)
-    joinpath(dirname(project_file), get(d, "path", "")::String)
+    entryfile = get(d, "path", nothing)::Union{String, Nothing}
+    # "path" entry in project file is soft deprecated
+    if entryfile === nothing
+        entryfile = get(d, "entryfile", nothing)::Union{String, Nothing}
+    end
+    return entry_path(dirname(project_file), name, entryfile)
+end
+
+function workspace_manifest(project_file)
+    base = base_project(project_file)
+    if base !== nothing
+        return project_file_manifest_path(base)
+    end
+    return nothing
 end
 
 # find project file's corresponding manifest file
@@ -712,6 +828,10 @@ function project_file_manifest_path(project_file::String)::Union{Nothing,String}
     end
     dir = abspath(dirname(project_file))
     d = parsed_toml(project_file)
+    base_manifest = workspace_manifest(project_file)
+    if base_manifest !== nothing
+        return base_manifest
+    end
     explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing}
     manifest_path = nothing
     if explicit_manifest !== nothing
@@ -752,33 +872,47 @@ end
 # given a project directory (implicit env from LOAD_PATH) and a name,
 # find an entry point for `name`, and see if it has an associated project file
 function entry_point_and_project_file(dir::String, name::String)::Union{Tuple{Nothing,Nothing},Tuple{String,Nothing},Tuple{String,String}}
-    path = normpath(joinpath(dir, "$name.jl"))
-    isfile_casesensitive(path) && return path, nothing
-    dir = joinpath(dir, name)
-    path, project_file = entry_point_and_project_file_inside(dir, name)
+    dir_name = joinpath(dir, name)
+    path, project_file = entry_point_and_project_file_inside(dir_name, name)
     path === nothing || return path, project_file
-    dir = dir * ".jl"
-    path, project_file = entry_point_and_project_file_inside(dir, name)
+    dir_jl = dir_name * ".jl"
+    path, project_file = entry_point_and_project_file_inside(dir_jl, name)
     path === nothing || return path, project_file
+    # check for less likely case with a bare file and no src directory last to minimize stat calls
+    path = normpath(joinpath(dir, "$name.jl"))
+    isfile_casesensitive(path) && return path, nothing
+    return nothing, nothing
+end
+
+# Find the project file for the extension `ext` in the implicit env `dir``
+function implicit_env_project_file_extension(dir::String, ext::PkgId)
+    for pkg in readdir(dir; join=true)
+        project_file = env_project_file(pkg)
+        project_file isa String || continue
+        path = project_file_ext_path(project_file, ext)
+        if path !== nothing
+            return path, project_file
+        end
+    end
     return nothing, nothing
 end
 
-# given a path and a name, return the entry point
-function entry_path(path::String, name::String)::Union{Nothing,String}
+# given a path, name, and possibly an entryfile, return the entry point
+function entry_path(path::String, name::String, entryfile::Union{Nothing,String})::String
     isfile_casesensitive(path) && return normpath(path)
-    path = normpath(joinpath(path, "src", "$name.jl"))
-    isfile_casesensitive(path) && return path
-    return nothing # source not found
+    entrypoint = entryfile === nothing ? joinpath("src", "$name.jl") : entryfile
+    return normpath(joinpath(path, entrypoint))
 end
 
 ## explicit project & manifest API ##
 
 # find project file root or deps `name => uuid` mapping
+# `ext` is the name of the extension if `name` is loaded from one
 # return `nothing` if `name` is not found
-function explicit_project_deps_get(project_file::String, name::String)::Union{Nothing,UUID}
+function explicit_project_deps_get(project_file::String, name::String, ext::Union{String,Nothing}=nothing)::Union{Nothing,UUID}
     d = parsed_toml(project_file)
-    root_uuid = dummy_uuid(project_file)
     if get(d, "name", nothing)::Union{String, Nothing} === name
+        root_uuid = dummy_uuid(project_file)
         uuid = get(d, "uuid", nothing)::Union{String, Nothing}
         return uuid === nothing ? root_uuid : UUID(uuid)
     end
@@ -787,6 +921,19 @@ function explicit_project_deps_get(project_file::String, name::String)::Union{No
         uuid = get(deps, name, nothing)::Union{String, Nothing}
         uuid === nothing || return UUID(uuid)
     end
+    if ext !== nothing
+        extensions = get(d, "extensions", nothing)
+        extensions === nothing && return nothing
+        ext_data = get(extensions, ext, nothing)
+        ext_data === nothing && return nothing
+        if (ext_data isa String && name == ext_data) || (ext_data isa Vector{String} && name in ext_data)
+            weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing}
+            weakdeps === nothing && return nothing
+            wuuid = get(weakdeps, name, nothing)::Union{String, Nothing}
+            wuuid === nothing && return nothing
+            return UUID(wuuid)
+        end
+    end
     return nothing
 end
 
@@ -827,14 +974,14 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St
             entry = entry::Dict{String, Any}
             uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
             uuid === nothing && continue
+            # deps is either a list of names (deps = ["DepA", "DepB"]) or
+            # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."}
+            deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
             if UUID(uuid) === where.uuid
                 found_where = true
-                # deps is either a list of names (deps = ["DepA", "DepB"]) or
-                # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."}
-                deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
                 if deps isa Vector{String}
                     found_name = name in deps
-                    break
+                    found_name && @goto done
                 elseif deps isa Dict{String, Any}
                     deps = deps::Dict{String, Any}
                     for (dep, uuid) in deps
@@ -853,23 +1000,25 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St
                             return PkgId(UUID(uuid), name)
                         end
                         exts = extensions[where.name]::Union{String, Vector{String}}
+                        weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
                         if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts)
-                            weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
-                            if weakdeps !== nothing
-                                if weakdeps isa Vector{String}
-                                    found_name = name in weakdeps
-                                    break
-                                elseif weakdeps isa Dict{String, Any}
-                                    weakdeps = weakdeps::Dict{String, Any}
-                                    for (dep, uuid) in weakdeps
-                                        uuid::String
-                                        if dep === name
-                                            return PkgId(UUID(uuid), name)
+                            for deps′ in [weakdeps, deps]
+                                    if deps′ !== nothing
+                                        if deps′ isa Vector{String}
+                                            found_name = name in deps′
+                                            found_name && @goto done
+                                        elseif deps′ isa Dict{String, Any}
+                                            deps′ = deps′::Dict{String, Any}
+                                            for (dep, uuid) in deps′
+                                                uuid::String
+                                                if dep === name
+                                                    return PkgId(UUID(uuid), name)
+                                                end
+                                            end
                                         end
                                     end
                                 end
                             end
-                        end
                         # `name` is not an ext, do standard lookup as if this was the parent
                         return identify_package(PkgId(UUID(uuid), dep_name), name)
                     end
@@ -877,6 +1026,7 @@ function explicit_manifest_deps_get(project_file::String, where::PkgId, name::St
             end
         end
     end
+    @label done
     found_where || return nothing
     found_name || return PkgId(name)
     # Only reach here if deps was not a dict which mean we have a unique name for the dep
@@ -928,19 +1078,26 @@ end
 
 function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::Dict{String,Any})
     path = get(entry, "path", nothing)::Union{Nothing, String}
+    entryfile = get(entry, "entryfile", nothing)::Union{Nothing, String}
     if path !== nothing
-        path = normpath(abspath(dirname(manifest_file), path))
+        path = entry_path(normpath(abspath(dirname(manifest_file), path)), pkg.name, entryfile)
         return path
     end
     hash = get(entry, "git-tree-sha1", nothing)::Union{Nothing, String}
-    hash === nothing && return nothing
+    if hash === nothing
+        mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
+        if mbypath isa String && isfile(mbypath)
+            return mbypath
+        end
+        return nothing
+    end
     hash = SHA1(hash)
     # Keep the 4 since it used to be the default
     uuid = pkg.uuid::UUID # checked within `explicit_manifest_uuid_path`
     for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4))
         for depot in DEPOT_PATH
             path = joinpath(depot, "packages", pkg.name, slug)
-            ispath(path) && return abspath(path)
+            ispath(path) && return entry_path(abspath(path), pkg.name, entryfile)
         end
     end
     # no depot contains the package, return missing to stop looking
@@ -968,11 +1125,28 @@ end
 function implicit_manifest_deps_get(dir::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     @assert where.uuid !== nothing
     project_file = entry_point_and_project_file(dir, where.name)[2]
-    project_file === nothing && return nothing # a project file is mandatory for a package with a uuid
+    if project_file === nothing
+        # `where` could be an extension
+        project_file = implicit_env_project_file_extension(dir, where)[2]
+        project_file === nothing && return nothing
+    end
     proj = project_file_name_uuid(project_file, where.name)
-    proj == where || return nothing # verify that this is the correct project file
+    ext = nothing
+    if proj !== where
+        # `where` could be an extension in `proj`
+        d = parsed_toml(project_file)
+        exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+        if exts !== nothing && where.name in keys(exts)
+            if where.uuid !== uuid5(proj.uuid, where.name)
+                return nothing
+            end
+            ext = where.name
+        else
+            return nothing
+        end
+    end
     # this is the correct project, so stop searching here
-    pkg_uuid = explicit_project_deps_get(project_file, name)
+    pkg_uuid = explicit_project_deps_get(project_file, name, ext)
     return PkgId(pkg_uuid, name)
 end
 
@@ -996,16 +1170,20 @@ function find_source_file(path::AbstractString)
     return isfile(base_path) ? normpath(base_path) : nothing
 end
 
-cache_file_entry(pkg::PkgId) = joinpath(
-    "compiled",
-    "v$(VERSION.major).$(VERSION.minor)",
-    pkg.uuid === nothing ? ""       : pkg.name),
-    pkg.uuid === nothing ? pkg.name : package_slug(pkg.uuid)
+function cache_file_entry(pkg::PkgId)
+    uuid = pkg.uuid
+    return joinpath(
+        "compiled",
+        "v$(VERSION.major).$(VERSION.minor)",
+        uuid === nothing ? ""       : pkg.name),
+        uuid === nothing ? pkg.name : package_slug(uuid)
+end
 
-function find_all_in_cache_path(pkg::PkgId)
+function find_all_in_cache_path(pkg::PkgId, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     paths = String[]
     entrypath, entryfile = cache_file_entry(pkg)
-    for path in joinpath.(DEPOT_PATH, entrypath)
+    for path in DEPOT_PATH
+        path = joinpath(path, entrypath)
         isdir(path) || continue
         for file in readdir(path, sort = false) # no sort given we sort later
             if !((pkg.uuid === nothing && file == entryfile * ".ji") ||
@@ -1018,89 +1196,205 @@ function find_all_in_cache_path(pkg::PkgId)
         end
     end
     if length(paths) > 1
-        # allocating the sort vector is less expensive than using sort!(.. by=mtime), which would
-        # call the relatively slow mtime multiple times per path
-        p = sortperm(mtime.(paths), rev = true)
+        function sort_by(path)
+            # when using pkgimages, consider those cache files first
+            pkgimage = if JLOptions().use_pkgimages != 0
+                io = open(path, "r")
+                try
+                    if iszero(isvalid_cache_header(io))
+                        false
+                    else
+                        _, _, _, _, _, _, _, flags = parse_cache_header(io, path)
+                        CacheFlags(flags).use_pkgimages
+                    end
+                finally
+                    close(io)
+                end
+            else
+                false
+            end
+            (; pkgimage, mtime=mtime(path))
+        end
+        function sort_lt(a, b)
+            if a.pkgimage != b.pkgimage
+                return a.pkgimage < b.pkgimage
+            end
+            return a.mtime < b.mtime
+        end
+
+        # allocating the sort vector is less expensive than using sort!(.. by=sort_by),
+        # which would call the relatively slow mtime multiple times per path
+        p = sortperm(sort_by.(paths), lt=sort_lt, rev=true)
         return paths[p]
     else
         return paths
     end
 end
 
-ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Base.Libc.dlext)
-cachefile_from_ocachefile(cachefile) = string(chopsuffix(cachefile, ".$(Base.Libc.dlext)"), ".ji")
+ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Libc.Libdl.dlext)
+cachefile_from_ocachefile(cachefile) = string(chopsuffix(cachefile, ".$(Libc.Libdl.dlext)"), ".ji")
 
 
 # use an Int counter so that nested @time_imports calls all remain open
 const TIMING_IMPORTS = Threads.Atomic{Int}(0)
 
+# loads a precompile cache file, ignoring stale_cachefile tests
+# assuming all depmods are already loaded and everything is valid
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any})
+function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing; register::Bool=true)
+    if isnothing(ignore_native)
+        if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
+            ignore_native = false
+        else
+            io = open(path, "r")
+            try
+                iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.")
+                _, (includes, _, _), _, _, _, _, _, _ = parse_cache_header(io, path)
+                ignore_native = pkg_tracked(includes)
+            finally
+                close(io)
+            end
+        end
+    end
     assert_havelock(require_lock)
     timing_imports = TIMING_IMPORTS[] > 0
     try
-    if timing_imports
-        t_before = time_ns()
-        cumulative_compile_timing(true)
-        t_comp_before = cumulative_compile_time_ns()
-    end
+        if timing_imports
+            t_before = time_ns()
+            cumulative_compile_timing(true)
+            t_comp_before = cumulative_compile_time_ns()
+        end
 
-    if ocachepath !== nothing
-        @debug "Loading object cache file $ocachepath for $pkg"
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachepath, depmods, false, pkg.name)
-    else
-        @debug "Loading cache file $path for $pkg"
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), path, depmods, false, pkg.name)
-    end
-    if isa(sv, Exception)
-        return sv
-    end
+        for i in eachindex(depmods)
+            dep = depmods[i]
+            dep isa Module && continue
+            _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
+            dep = something(maybe_loaded_precompile(depkey, depbuild_id))
+            @assert PkgId(dep) == depkey && module_build_id(dep) === depbuild_id
+            depmods[i] = dep
+        end
+
+        unlock(require_lock) # temporarily _unlock_ during these operations
+        sv = try
+            if ocachepath !== nothing
+                @debug "Loading object cache file $ocachepath for $(repr("text/plain", pkg))"
+                ccall(:jl_restore_package_image_from_file, Ref{SimpleVector}, (Cstring, Any, Cint, Cstring, Cint),
+                    ocachepath, depmods, #=completeinfo=#false, pkg.name, ignore_native)
+            else
+                @debug "Loading cache file $path for $(repr("text/plain", pkg))"
+                ccall(:jl_restore_incremental, Ref{SimpleVector}, (Cstring, Any, Cint, Cstring),
+                    path, depmods, #=completeinfo=#false, pkg.name)
+            end
+        finally
+            lock(require_lock)
+        end
 
-    restored = register_restored_modules(sv, pkg, path)
+        edges = sv[3]::Vector{Any}
+        ext_edges = sv[4]::Union{Nothing,Vector{Any}}
+        StaticData.insert_backedges(edges, ext_edges)
 
-    for M in restored
-        M = M::Module
-        if parentmodule(M) === M && PkgId(M) == pkg
-            if timing_imports
-                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
-                comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
-                print(lpad(elapsed, 9), " ms  ")
-                parentid = get(EXT_PRIMED, pkg, nothing)
-                if parentid !== nothing
-                    print(parentid.name, " → ")
-                end
-                print(pkg.name)
-                if comp_time > 0
-                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
-                end
-                if recomp_time > 0
-                    perc = Float64(100 * recomp_time / comp_time)
-                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+        restored = register_restored_modules(sv, pkg, path)
+
+        for M in restored
+            M = M::Module
+            if is_root_module(M) && PkgId(M) == pkg
+                register && register_root_module(M)
+                if timing_imports
+                    elapsed_time = time_ns() - t_before
+                    comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
+                    print_time_imports_report(M, elapsed_time, comp_time, recomp_time)
                 end
-                println()
+                return M
             end
-            return M
         end
-    end
-    return ErrorException("Required dependency $pkg failed to load from a cache file.")
+        return ErrorException("Required dependency $(repr("text/plain", pkg)) failed to load from a cache file.")
 
     finally
         timing_imports && cumulative_compile_timing(false)
     end
 end
 
+# printing functions for @time_imports
+# note that the time inputs are UInt64 on all platforms. Give default values here so that we don't have
+# confusing UInt64 types in generate_precompile.jl
+function print_time_imports_report(
+        mod::Module,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    print(lpad(round(elapsed_time / 1e6, digits=1), 9), " ms  ")
+    ext_parent = extension_parent_name(mod)
+    if ext_parent !== nothing
+        print(ext_parent::String, " → ")
+    end
+    print(string(mod))
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * comp_time / (elapsed_time)), 2)
+        printstyled(" $perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        perc_show = perc < 1 ? "<1" : Ryu.writefixed(perc, 0)
+        printstyled(" ($perc_show% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+function print_time_imports_report_init(
+        mod::Module, i::Int=1,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    connector = i > 1 ? "├" : "┌"
+    printstyled("               $connector ", color = :light_black)
+    print("$(round(elapsed_time / 1e6, digits=1)) ms $mod.__init__() ")
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * (comp_time) / elapsed_time), 2)
+        printstyled("$perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+
+# if M is an extension, return the string name of the parent. Otherwise return nothing
+function extension_parent_name(M::Module)
+    rootmodule = moduleroot(M)
+    src_path = pathof(rootmodule)
+    src_path === nothing && return nothing
+    pkgdir_parts = splitpath(src_path)
+    ext_pos = findlast(==("ext"), pkgdir_parts)
+    if ext_pos !== nothing && ext_pos >= length(pkgdir_parts) - 2
+        parent_package_root = joinpath(pkgdir_parts[1:ext_pos-1]...)
+        parent_package_project_file = locate_project_file(parent_package_root)
+        if parent_package_project_file isa String
+            d = parsed_toml(parent_package_project_file)
+            name = get(d, "name", nothing)
+            if name !== nothing
+                return name
+            end
+        end
+    end
+    return nothing
+end
+
 function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     # This function is also used by PkgCacheInspector.jl
+    assert_havelock(require_lock)
     restored = sv[1]::Vector{Any}
     for M in restored
         M = M::Module
         if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing
             push!(Base.Docs.modules, M)
         end
-        if parentmodule(M) === M
-            register_root_module(M)
+        if is_root_module(M)
+            push!(loaded_modules_order, M)
+            push!(get!(Vector{Module}, loaded_precompiles, pkg), M)
         end
     end
 
@@ -1126,35 +1420,23 @@ function run_module_init(mod::Module, i::Int=1)
     # `i` informs ordering for the `@time_imports` report formatting
     if TIMING_IMPORTS[] == 0
         ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
-    else
-        if isdefined(mod, :__init__)
-            connector = i > 1 ? "├" : "┌"
-            printstyled("               $connector ", color = :light_black)
-
-            elapsedtime = time_ns()
-            cumulative_compile_timing(true)
-            compile_elapsedtimes = cumulative_compile_time_ns()
+    elseif isdefined(mod, :__init__)
+        elapsed_time = time_ns()
+        cumulative_compile_timing(true)
+        compile_elapsedtimes = cumulative_compile_time_ns()
 
-            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
 
-            elapsedtime = (time_ns() - elapsedtime) / 1e6
-            cumulative_compile_timing(false);
-            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+        elapsed_time = time_ns() - elapsed_time
+        cumulative_compile_timing(false);
+        comp_time, recomp_time = cumulative_compile_time_ns() .- compile_elapsedtimes
 
-            print(round(elapsedtime, digits=1), " ms $mod.__init__() ")
-            if comp_time > 0
-                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
-            end
-            if recomp_time > 0
-                perc = Float64(100 * recomp_time / comp_time)
-                printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
-            end
-            println()
-        end
+        print_time_imports_report_init(mod, i, elapsed_time, comp_time, recomp_time)
     end
 end
 
 function run_package_callbacks(modkey::PkgId)
+    @assert modkey != precompilation_target
     run_extension_callbacks(modkey)
     assert_havelock(require_lock)
     unlock(require_lock)
@@ -1180,10 +1462,11 @@ end
 mutable struct ExtensionId
     const id::PkgId
     const parentid::PkgId # just need the name, for printing
+    const n_total_triggers::Int
     ntriggers::Int # how many more packages must be defined until this is loaded
 end
 
-const EXT_PRIMED = Dict{PkgId, PkgId}() # Extension -> Parent
+const EXT_PRIMED = Dict{PkgId,Vector{PkgId}}() # Extension -> Parent + Triggers (parent is always first)
 const EXT_DORMITORY = Dict{PkgId,Vector{ExtensionId}}() # Trigger -> Extensions that can be triggered by it
 const EXT_DORMITORY_FAILED = ExtensionId[]
 
@@ -1192,29 +1475,34 @@ function insert_extension_triggers(pkg::PkgId)
     path_env_loc = locate_package_env(pkg)
     path_env_loc === nothing && return
     path, env_loc = path_env_loc
-    if path === nothing || env_loc === nothing
-        return
-    end
     insert_extension_triggers(env_loc, pkg)
 end
 
 function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missing}
     project_file = env_project_file(env)
-    if project_file isa String
+    if project_file isa String || project_file
+        implicit_project_file = project_file
+        if !(implicit_project_file isa String)
+            # if env names a directory, search it for an implicit project file (for stdlibs)
+            path, implicit_project_file = entry_point_and_project_file(env, pkg.name)
+            if !(implicit_project_file isa String)
+                return nothing
+            end
+        end
         # Look in project for extensions to insert
-        proj_pkg = project_file_name_uuid(project_file, pkg.name)
+        proj_pkg = project_file_name_uuid(implicit_project_file, pkg.name)
         if pkg == proj_pkg
-            d_proj = parsed_toml(project_file)
-            weakdeps = get(d_proj, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
+            d_proj = parsed_toml(implicit_project_file)
             extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
             extensions === nothing && return
-            weakdeps === nothing && return
-            if weakdeps isa Dict{String, Any}
-                return _insert_extension_triggers(pkg, extensions, weakdeps)
-            end
+            weakdeps = get(Dict{String, Any}, d_proj, "weakdeps")::Dict{String,Any}
+            deps = get(Dict{String, Any}, d_proj, "deps")::Dict{String,Any}
+            total_deps = merge(weakdeps, deps)
+            return _insert_extension_triggers(pkg, extensions, total_deps)
         end
 
         # Now look in manifest
+        project_file isa String || return nothing
         manifest_file = project_file_manifest_path(project_file)
         manifest_file === nothing && return
         d = get_deps(parsed_toml(manifest_file))
@@ -1225,27 +1513,35 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
                 uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
                 uuid === nothing && continue
                 if UUID(uuid) == pkg.uuid
-                    weakdeps = get(entry, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
                     extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
                     extensions === nothing && return
-                    weakdeps === nothing && return
-                    if weakdeps isa Dict{String, Any}
-                        return _insert_extension_triggers(pkg, extensions, weakdeps)
+                    weakdeps = get(Dict{String, Any}, entry, "weakdeps")::Union{Vector{String}, Dict{String,Any}}
+                    deps = get(Dict{String, Any}, entry, "deps")::Union{Vector{String}, Dict{String,Any}}
+
+                    function expand_deps_list(deps′::Vector{String})
+                        deps′_expanded = Dict{String, Any}()
+                        for (dep_name, entries) in d
+                            dep_name in deps′ || continue
+                            entries::Vector{Any}
+                            if length(entries) != 1
+                                error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
+                            end
+                            entry = first(entries)::Dict{String, Any}
+                            uuid = entry["uuid"]::String
+                            deps′_expanded[dep_name] = uuid
+                        end
+                        return deps′_expanded
                     end
 
-                    d_weakdeps = Dict{String, Any}()
-                    for (dep_name, entries) in d
-                        dep_name in weakdeps || continue
-                        entries::Vector{Any}
-                        if length(entries) != 1
-                            error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
-                        end
-                        entry = first(entries)::Dict{String, Any}
-                        uuid = entry["uuid"]::String
-                        d_weakdeps[dep_name] = uuid
+                    if weakdeps isa Vector{String}
+                        weakdeps = expand_deps_list(weakdeps)
                     end
-                    @assert length(d_weakdeps) == length(weakdeps)
-                    return _insert_extension_triggers(pkg, extensions, d_weakdeps)
+                    if deps isa Vector{String}
+                        deps = expand_deps_list(deps)
+                    end
+
+                    total_deps = merge(weakdeps, deps)
+                    return _insert_extension_triggers(pkg, extensions, total_deps)
                 end
             end
         end
@@ -1253,23 +1549,24 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi
     return nothing
 end
 
-function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any})
+function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, totaldeps::Dict{String, Any})
     for (ext, triggers) in extensions
         triggers = triggers::Union{String, Vector{String}}
         triggers isa String && (triggers = [triggers])
-        id = PkgId(uuid5(parent.uuid, ext), ext)
-        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+        id = PkgId(uuid5(parent.uuid::UUID, ext), ext)
+        if haskey(EXT_PRIMED, id) || haskey(Base.loaded_modules, id)
             continue  # extension is already primed or loaded, don't add it again
         end
-        EXT_PRIMED[id] = parent
-        gid = ExtensionId(id, parent, 1 + length(triggers))
+        EXT_PRIMED[id] = trigger_ids = PkgId[parent]
+        gid = ExtensionId(id, parent, 1 + length(triggers), 1 + length(triggers))
         trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, parent)
         push!(trigger1, gid)
         for trigger in triggers
             # TODO: Better error message if this lookup fails?
-            uuid_trigger = UUID(weakdeps[trigger]::String)
+            uuid_trigger = UUID(totaldeps[trigger]::String)
             trigger_id = PkgId(uuid_trigger, trigger)
-            if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id)
+            push!(trigger_ids, trigger_id)
+            if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id) || (trigger_id == precompilation_target)
                 trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id)
                 push!(trigger1, gid)
             else
@@ -1280,7 +1577,9 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}
 end
 
 loading_extension::Bool = false
+loadable_extensions::Union{Nothing,Vector{PkgId}} = nothing
 precompiling_extension::Bool = false
+precompilation_target::Union{Nothing,PkgId} = nothing
 function run_extension_callbacks(extid::ExtensionId)
     assert_havelock(require_lock)
     succeeded = try
@@ -1291,9 +1590,14 @@ function run_extension_callbacks(extid::ExtensionId)
         true
     catch
         # Try to continue loading if loading an extension errors
-        errs = current_exceptions()
-        @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
+        if JLOptions().incremental != 0
+            # during incremental precompilation, this should be fail-fast
+            rethrow()
+        else
+            errs = current_exceptions()
+            @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
                 use `Base.retry_load_extensions()` to retry." exception=errs
+        end
         false
     finally
         global loading_extension = false
@@ -1306,25 +1610,22 @@ function run_extension_callbacks(pkgid::PkgId)
     # take ownership of extids that depend on this pkgid
     extids = pop!(EXT_DORMITORY, pkgid, nothing)
     extids === nothing && return
+    extids_to_load = Vector{ExtensionId}()
     for extid in extids
-        if extid.ntriggers > 0
-            # indicate pkgid is loaded
-            extid.ntriggers -= 1
-        end
-        if extid.ntriggers < 0
-            # indicate pkgid is loaded
-            extid.ntriggers += 1
-            succeeded = false
-        else
-            succeeded = true
-        end
-        if extid.ntriggers == 0
-            # actually load extid, now that all dependencies are met,
-            # and record the result
-            succeeded = succeeded && run_extension_callbacks(extid)
-            succeeded || push!(EXT_DORMITORY_FAILED, extid)
+        @assert extid.ntriggers > 0
+        extid.ntriggers -= 1
+        if extid.ntriggers == 0 && (loadable_extensions === nothing || extid.id in loadable_extensions)
+            push!(extids_to_load, extid)
         end
     end
+    # Load extensions with the fewest triggers first
+    sort!(extids_to_load, by=extid->extid.n_total_triggers)
+    for extid in extids_to_load
+        # actually load extid, now that all dependencies are met,
+        succeeded = run_extension_callbacks(extid)
+        succeeded || push!(EXT_DORMITORY_FAILED, extid)
+    end
+
     return
 end
 
@@ -1359,49 +1660,190 @@ get_extension(parent::Module, ext::Symbol) = get_extension(PkgId(parent), ext)
 function get_extension(parentid::PkgId, ext::Symbol)
     parentid.uuid === nothing && return nothing
     extid = PkgId(uuid5(parentid.uuid, string(ext)), string(ext))
-    return get(loaded_modules, extid, nothing)
+    return maybe_root_module(extid)
 end
 
 # End extensions
 
-# should sync with the types of arguments of `stale_cachefile`
-const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String}
 
-"""
-    Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
+struct CacheFlags
+    # OOICCDDP - see jl_cache_flags
+    use_pkgimages::Bool
+    debug_level::Int
+    check_bounds::Int
+    inline::Bool
+    opt_level::Int
+end
+function CacheFlags(f::UInt8)
+    use_pkgimages = Bool(f & 1)
+    debug_level = Int((f >> 1) & 3)
+    check_bounds = Int((f >> 3) & 3)
+    inline = Bool((f >> 5) & 1)
+    opt_level = Int((f >> 6) & 3) # define OPT_LEVEL in statiddata_utils
+    CacheFlags(use_pkgimages, debug_level, check_bounds, inline, opt_level)
+end
+CacheFlags(f::Int) = CacheFlags(UInt8(f))
+function CacheFlags(cf::CacheFlags=CacheFlags(ccall(:jl_cache_flags, UInt8, ()));
+            use_pkgimages::Union{Nothing,Bool}=nothing,
+            debug_level::Union{Nothing,Int}=nothing,
+            check_bounds::Union{Nothing,Int}=nothing,
+            inline::Union{Nothing,Bool}=nothing,
+            opt_level::Union{Nothing,Int}=nothing
+        )
+    return CacheFlags(
+        use_pkgimages === nothing ? cf.use_pkgimages : use_pkgimages,
+        debug_level === nothing ? cf.debug_level : debug_level,
+        check_bounds === nothing ? cf.check_bounds : check_bounds,
+        inline === nothing ? cf.inline : inline,
+        opt_level === nothing ? cf.opt_level : opt_level
+    )
+end
+# reflecting jloptions.c defaults
+const DefaultCacheFlags = CacheFlags(use_pkgimages=true, debug_level=isdebugbuild() ? 2 : 1, check_bounds=0, inline=true, opt_level=2)
 
-Returns whether a given PkgId within the active project is precompiled.
+function _cacheflag_to_uint8(cf::CacheFlags)::UInt8
+    f = UInt8(0)
+    f |= cf.use_pkgimages << 0
+    f |= cf.debug_level << 1
+    f |= cf.check_bounds << 3
+    f |= cf.inline << 5
+    f |= cf.opt_level << 6
+    return f
+end
 
-By default this check observes the same approach that code loading takes
-with respect to when different versions of dependencies are currently loaded
-to that which is expected. To ignore loaded modules and answer as if in a
-fresh julia session specify `ignore_loaded=true`.
+function translate_cache_flags(cacheflags::CacheFlags, defaultflags::CacheFlags)
+    opts = String[]
+    cacheflags.use_pkgimages    != defaultflags.use_pkgimages   && push!(opts, cacheflags.use_pkgimages ? "--pkgimages=yes" : "--pkgimages=no")
+    cacheflags.debug_level      != defaultflags.debug_level     && push!(opts, "-g$(cacheflags.debug_level)")
+    cacheflags.check_bounds     != defaultflags.check_bounds    && push!(opts, ("--check-bounds=auto", "--check-bounds=yes", "--check-bounds=no")[cacheflags.check_bounds + 1])
+    cacheflags.inline           != defaultflags.inline          && push!(opts, cacheflags.inline ? "--inline=yes" : "--inline=no")
+    cacheflags.opt_level        != defaultflags.opt_level       && push!(opts, "-O$(cacheflags.opt_level)")
+    return opts
+end
 
-!!! compat "Julia 1.10"
-    This function requires at least Julia 1.10.
-"""
-function isprecompiled(pkg::PkgId;
+function show(io::IO, cf::CacheFlags)
+    print(io, "CacheFlags(")
+    print(io, "; use_pkgimages=")
+    print(io, cf.use_pkgimages)
+    print(io, ", debug_level=")
+    print(io, cf.debug_level)
+    print(io, ", check_bounds=")
+    print(io, cf.check_bounds)
+    print(io, ", inline=")
+    print(io, cf.inline)
+    print(io, ", opt_level=")
+    print(io, cf.opt_level)
+    print(io, ")")
+end
+
+struct ImageTarget
+    name::String
+    flags::Int32
+    ext_features::String
+    features_en::Vector{UInt8}
+    features_dis::Vector{UInt8}
+end
+
+function parse_image_target(io::IO)
+    flags = read(io, Int32)
+    nfeature = read(io, Int32)
+    feature_en = read(io, 4*nfeature)
+    feature_dis = read(io, 4*nfeature)
+    name_len = read(io, Int32)
+    name = String(read(io, name_len))
+    ext_features_len = read(io, Int32)
+    ext_features = String(read(io, ext_features_len))
+    ImageTarget(name, flags, ext_features, feature_en, feature_dis)
+end
+
+function parse_image_targets(targets::Vector{UInt8})
+    io = IOBuffer(targets)
+    ntargets = read(io, Int32)
+    targets = Vector{ImageTarget}(undef, ntargets)
+    for i in 1:ntargets
+        targets[i] = parse_image_target(io)
+    end
+    return targets
+end
+
+function current_image_targets()
+    targets = @ccall jl_reflect_clone_targets()::Vector{UInt8}
+    return parse_image_targets(targets)
+end
+
+struct FeatureName
+    name::Cstring
+    bit::UInt32 # bit index into a `uint32_t` array;
+    llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support
+end
+
+function feature_names()
+    fnames = Ref{Ptr{FeatureName}}()
+    nf = Ref{Csize_t}()
+    @ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid
+    if fnames[] == C_NULL
+        @assert nf[] == 0
+        return Vector{FeatureName}(undef, 0)
+    end
+    Base.unsafe_wrap(Array, fnames[], nf[], own=false)
+end
+
+function test_feature(features::Vector{UInt8}, feat::FeatureName)
+    bitidx = feat.bit
+    u8idx = div(bitidx, 8) + 1
+    bit = bitidx % 8
+    return (features[u8idx] & (1 << bit)) != 0
+end
+
+function show(io::IO, it::ImageTarget)
+    print(io, it.name)
+    if !isempty(it.ext_features)
+        print(io, ",", it.ext_features)
+    end
+    print(io, "; flags=", it.flags)
+    print(io, "; features_en=(")
+    first = true
+    for feat in feature_names()
+        if test_feature(it.features_en, feat)
+            name = Base.unsafe_string(feat.name)
+            if first
+                first = false
+                print(io, name)
+            else
+                print(io, ", ", name)
+            end
+        end
+    end
+    print(io, ")")
+    # Is feature_dis useful?
+end
+
+# should sync with the types of arguments of `stale_cachefile`
+const StaleCacheKey = Tuple{PkgId, UInt128, String, String}
+
+function compilecache_path(pkg::PkgId;
         ignore_loaded::Bool=false,
         stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
-        cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg),
-        sourcepath::Union{String,Nothing}=Base.locate_package(pkg)
-    )
-    isnothing(sourcepath) && error("Cannot locate source for $(repr(pkg))")
+        cachepath_cache::Dict{PkgId, Vector{String}}=Dict{PkgId, Vector{String}}(),
+        cachepaths::Vector{String}=get!(() -> find_all_in_cache_path(pkg), cachepath_cache, pkg),
+        sourcepath::Union{String,Nothing}=Base.locate_package(pkg),
+        flags::CacheFlags=CacheFlags())
+    path = nothing
+    isnothing(sourcepath) && error("Cannot locate source for $(repr("text/plain", pkg))")
     for path_to_try in cachepaths
-        staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true)
+        staledeps = stale_cachefile(sourcepath, path_to_try; ignore_loaded, requested_flags=flags)
         if staledeps === true
             continue
         end
-        staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+        staledeps, _, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
         # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
+        for dep in staledeps
             dep isa Module && continue
             modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
-            modpaths = find_all_in_cache_path(modkey)
+            modpaths = get!(() -> find_all_in_cache_path(modkey), cachepath_cache, modkey)
             for modpath_to_try in modpaths::Vector{String}
                 stale_cache_key = (modkey, modbuild_id, modpath, modpath_to_try)::StaleCacheKey
-                if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded) === true,
+                if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded, requested_flags=flags) === true,
                         stale_cache, stale_cache_key)
                     continue
                 end
@@ -1417,86 +1859,131 @@ function isprecompiled(pkg::PkgId;
             # file might be read-only and then we fail to update timestamp, which is fine
             ex isa IOError || rethrow()
         end
-        return true
+        path = path_to_try
+        break
         @label check_next_path
     end
-    return false
+    return path
+end
+
+"""
+    Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
+
+Returns whether a given PkgId within the active project is precompiled.
+
+By default this check observes the same approach that code loading takes
+with respect to when different versions of dependencies are currently loaded
+to that which is expected. To ignore loaded modules and answer as if in a
+fresh julia session specify `ignore_loaded=true`.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
+"""
+function isprecompiled(pkg::PkgId;
+        ignore_loaded::Bool=false,
+        stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
+        cachepath_cache::Dict{PkgId, Vector{String}}=Dict{PkgId, Vector{String}}(),
+        cachepaths::Vector{String}=get!(() -> find_all_in_cache_path(pkg), cachepath_cache, pkg),
+        sourcepath::Union{String,Nothing}=Base.locate_package(pkg),
+        flags::CacheFlags=CacheFlags())
+    path = compilecache_path(pkg; ignore_loaded, stale_cache, cachepath_cache, cachepaths, sourcepath, flags)
+    return !isnothing(path)
+end
+
+"""
+    Base.isrelocatable(pkg::PkgId)
+
+Returns whether a given PkgId within the active project is precompiled and the
+associated cache is relocatable.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function isrelocatable(pkg::PkgId)
+    path = compilecache_path(pkg)
+    isnothing(path) && return false
+    io = open(path, "r")
+    try
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        _, (includes, includes_srcfiles, _), _... = _parse_cache_header(io, path)
+        for inc in includes
+            !startswith(inc.filename, "@depot") && return false
+            if inc ∉ includes_srcfiles
+                # its an include_dependency
+                track_content = inc.mtime == -1.0
+                track_content || return false
+            end
+        end
+    finally
+        close(io)
+    end
+    return true
 end
 
-# loads a precompile cache file, after checking stale_cachefile tests
+# search for a precompile cache file to load, after some various checks
 function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
     assert_havelock(require_lock)
-    loaded = nothing
-    if root_module_exists(modkey)
-        loaded = root_module(modkey)
-    else
-        loaded = start_loading(modkey)
-        if loaded === nothing
-            try
-                modpath = locate_package(modkey)
-                modpath === nothing && return nothing
-                set_pkgorigin_version_path(modkey, String(modpath))
-                loaded = _require_search_from_serialized(modkey, String(modpath), build_id)
-            finally
-                end_loading(modkey, loaded)
-            end
-            if loaded isa Module
-                insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
-            end
+    loaded = start_loading(modkey, build_id, false)
+    if loaded === nothing
+        try
+            modpath = locate_package(modkey)
+            isnothing(modpath) && error("Cannot locate source for $(repr("text/plain", modkey))")
+            modpath = String(modpath)::String
+            set_pkgorigin_version_path(modkey, modpath)
+            loaded = _require_search_from_serialized(modkey, modpath, build_id, true)
+        finally
+            end_loading(modkey, loaded)
+        end
+        if loaded isa Module
+            insert_extension_triggers(modkey)
+            run_package_callbacks(modkey)
         end
     end
-    if !(loaded isa Module) || PkgId(loaded) != modkey
-        return ErrorException("Required dependency $modkey failed to load from a cache file.")
+    if loaded isa Module && PkgId(loaded) == modkey && module_build_id(loaded) === build_id
+        return loaded
     end
-    return loaded
+    return ErrorException("Required dependency $modkey failed to load from a cache file.")
 end
 
-# loads a precompile cache file, ignoring stale_cachefile tests
-# assuming all depmods are already loaded and everything is valid
-function _tryrequire_from_serialized(modkey::PkgId, path::String, ocachepath::Union{Nothing, String}, sourcepath::String, depmods::Vector{Any})
-    assert_havelock(require_lock)
-    loaded = nothing
-    if root_module_exists(modkey)
-        loaded = root_module(modkey)
-    else
-        loaded = start_loading(modkey)
-        if loaded === nothing
-            try
-                for i in 1:length(depmods)
-                    dep = depmods[i]
-                    dep isa Module && continue
-                    _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
-                    @assert root_module_exists(depkey)
-                    dep = root_module(depkey)
-                    depmods[i] = dep
+# returns whether the package is tracked in coverage or malloc tracking based on
+# JLOptions and includes
+function pkg_tracked(includes)
+    if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
+        return false
+    elseif JLOptions().code_coverage == 1 || JLOptions().malloc_log == 1 # user
+        # Just say true. Pkgimages aren't in Base
+        return true
+    elseif JLOptions().code_coverage == 2 || JLOptions().malloc_log == 2 # all
+        return true
+    elseif JLOptions().code_coverage == 3 || JLOptions().malloc_log == 3 # tracked path
+        if JLOptions().tracked_path == C_NULL
+            return false
+        else
+            tracked_path = unsafe_string(JLOptions().tracked_path)
+            if isempty(tracked_path)
+                return false
+            else
+                return any(includes) do inc
+                    startswith(inc.filename, tracked_path)
                 end
-                set_pkgorigin_version_path(modkey, sourcepath)
-                loaded = _include_from_serialized(modkey, path, ocachepath, depmods)
-            finally
-                end_loading(modkey, loaded)
-            end
-            if loaded isa Module
-                insert_extension_triggers(modkey)
-                run_package_callbacks(modkey)
             end
         end
     end
-    if !(loaded isa Module) || PkgId(loaded) != modkey
-        return ErrorException("Required dependency $modkey failed to load from a cache file.")
-    end
-    return loaded
 end
 
 # loads a precompile cache file, ignoring stale_cachefile tests
-# load the best available (non-stale) version of all dependent modules first
+# load all dependent modules first
 function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String})
     assert_havelock(require_lock)
     local depmodnames
     io = open(path, "r")
+    ignore_native = false
     try
-        iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
-        _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io)
+        iszero(isvalid_cache_header(io)) && return ArgumentError("Incompatible header in cache file $path.")
+        _, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path)
+
+        ignore_native = pkg_tracked(includes)
+
         pkgimage = !isempty(clone_targets)
         if pkgimage
             ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided")
@@ -1522,121 +2009,228 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union
         depmods[i] = dep
     end
     # then load the file
-    return _include_from_serialized(pkg, path, ocachepath, depmods)
+    loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native; register = true)
+    return loaded
 end
 
-# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
+# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it or it was stale
 # returns the set of modules restored if the cache load succeeded
-@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
+@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128, stalecheck::Bool; reasons=nothing, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     assert_havelock(require_lock)
-    paths = find_all_in_cache_path(pkg)
-    for path_to_try in paths::Vector{String}
-        staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try)
-        if staledeps === true
-            continue
-        end
-        staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
-        # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
-            dep isa Module && continue
-            modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
-            modpaths = find_all_in_cache_path(modkey)
-            for modpath_to_try in modpaths
-                modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try)
-                if modstaledeps === true
-                    continue
+    paths = find_all_in_cache_path(pkg, DEPOT_PATH)
+    newdeps = PkgId[]
+    try_build_ids = UInt128[build_id]
+    if build_id == UInt128(0)
+        let loaded = get(loaded_precompiles, pkg, nothing)
+            if loaded !== nothing
+                for mod in loaded # try these in reverse original load order to see if one is already valid
+                    pushfirst!(try_build_ids, module_build_id(mod))
                 end
-                modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}}
-                staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps, modocachepath)
-                @goto check_next_dep
             end
-            @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
-            @goto check_next_path
-            @label check_next_dep
         end
-        try
-            touch(path_to_try) # update timestamp of precompilation file
-        catch ex # file might be read-only and then we fail to update timestamp, which is fine
-            ex isa IOError || rethrow()
-        end
-        # finish loading module graph into staledeps
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
-            dep isa Module && continue
-            modpath, modkey, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, String, Vector{Any}, Union{Nothing, String}}
-            dep = _tryrequire_from_serialized(modkey, modcachepath, modocachepath, modpath, modstaledeps)
-            if !isa(dep, Module)
-                @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
-                @goto check_next_path
+    end
+    for build_id in try_build_ids
+        for path_to_try in paths::Vector{String}
+            staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try; reasons, stalecheck)
+            if staledeps === true
+                continue
+            end
+            staledeps, ocachefile, newbuild_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+            startedloading = length(staledeps) + 1
+            try # any exit from here (goto, break, continue, return) will end_loading
+                # finish checking staledeps module graph, while acquiring all start_loading locks
+                # so that concurrent require calls won't make any different decisions that might conflict with the decisions here
+                # note that start_loading will drop the loading lock if necessary
+                let i = 0
+                    # start_loading here has a deadlock problem if we try to load `A,B,C` and `B,A,D` at the same time:
+                    # it will claim A,B have a cycle, but really they just have an ambiguous order and need to be batch-acquired rather than singly
+                    # solve that by making sure we can start_loading everything before allocating each of those and doing all the stale checks
+                    while i < length(staledeps)
+                        i += 1
+                        dep = staledeps[i]
+                        dep isa Module && continue
+                        _, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
+                        dep = canstart_loading(modkey, modbuild_id, stalecheck)
+                        if dep isa Module
+                            if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id
+                                staledeps[i] = dep
+                                continue
+                            else
+                                @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected."
+                                @goto check_next_path
+                            end
+                            continue
+                        elseif dep === nothing
+                            continue
+                        end
+                        wait(dep) # releases require_lock, so requires restarting this loop
+                        i = 0
+                    end
+                end
+                for i in reverse(eachindex(staledeps))
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
+                    # inline a call to start_loading here
+                    @assert canstart_loading(modkey, modbuild_id, stalecheck) === nothing
+                    package_locks[modkey] = (current_task(), Threads.Condition(require_lock), modbuild_id)
+                    startedloading = i
+                    modpaths = find_all_in_cache_path(modkey, DEPOT_PATH)
+                    for modpath_to_try in modpaths
+                        modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try; stalecheck)
+                        if modstaledeps === true
+                            continue
+                        end
+                        modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+                        staledeps[i] = (modpath, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath)
+                        @goto check_next_dep
+                    end
+                    @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
+                    @goto check_next_path
+                    @label check_next_dep
+                end
+                M = maybe_loaded_precompile(pkg, newbuild_id)
+                if isa(M, Module)
+                    stalecheck && register_root_module(M)
+                    return M
+                end
+                if stalecheck
+                    try
+                        touch(path_to_try) # update timestamp of precompilation file
+                    catch ex # file might be read-only and then we fail to update timestamp, which is fine
+                        ex isa IOError || rethrow()
+                    end
+                end
+                # finish loading module graph into staledeps
+                # n.b. this runs __init__ methods too early, so it is very unwise to have those, as they may see inconsistent loading state, causing them to fail unpredictably here
+                for i in eachindex(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modpath, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
+                    set_pkgorigin_version_path(modkey, modpath)
+                    dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps; register = stalecheck)
+                    if !isa(dep, Module)
+                        @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
+                        @goto check_next_path
+                    else
+                        startedloading = i + 1
+                        end_loading(modkey, dep)
+                        staledeps[i] = dep
+                        push!(newdeps, modkey)
+                    end
+                end
+                restored = maybe_loaded_precompile(pkg, newbuild_id)
+                if !isa(restored, Module)
+                    restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps; register = stalecheck)
+                end
+                isa(restored, Module) && return restored
+                @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
+                @label check_next_path
+            finally
+                # cancel all start_loading locks that were taken but not fulfilled before failing
+                for i in startedloading:length(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    if dep isa Tuple{String, PkgId, UInt128}
+                        _, modkey, _ = dep
+                    else
+                        _, modkey, _ = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
+                    end
+                    end_loading(modkey, nothing)
+                end
+                for modkey in newdeps
+                    insert_extension_triggers(modkey)
+                    stalecheck && run_package_callbacks(modkey)
+                end
             end
-            staledeps[i] = dep
         end
-        restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
-        isa(restored, Module) && return restored
-        @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
-        continue
-        @label check_next_path
     end
     return nothing
 end
 
 # to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Pair{Task,Threads.Condition}}()
+const package_locks = Dict{PkgId,Tuple{Task,Threads.Condition,UInt128}}()
 
 debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but more complete algorithm that can handle simultaneous tasks.
                                # This only triggers if you have multiple tasks trying to load the same package at the same time,
-                               # so it is unlikely to make a difference normally.
-function start_loading(modkey::PkgId)
-    # handle recursive calls to require
+                               # so it is unlikely to make a performance difference normally.
+
+function canstart_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
     assert_havelock(require_lock)
+    require_lock.reentrancy_cnt == 1 || throw(ConcurrencyViolationError("recursive call to start_loading"))
     loading = get(package_locks, modkey, nothing)
-    if loading !== nothing
-        # load already in progress for this module on the task
-        task, cond = loading
-        deps = String[modkey.name]
-        pkgid = modkey
-        assert_havelock(cond.lock)
-        if debug_loading_deadlocks && current_task() !== task
-            waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
-            for each in package_locks
-                cond2 = each[2][2]
-                assert_havelock(cond2.lock)
-                for waiting in cond2.waitq
-                    push!(waiters, waiting => (each[2][1] => each[1]))
-                end
-            end
-            while true
-                running = get(waiters, task, nothing)
-                running === nothing && break
-                task, pkgid = running
-                push!(deps, pkgid.name)
-                task === current_task() && break
-            end
-        end
-        if current_task() === task
-            others = String[modkey.name] # repeat this to emphasize the cycle here
-            for each in package_locks # list the rest of the packages being loaded too
-                if each[2][1] === task
-                    other = each[1].name
-                    other == modkey.name || other == pkgid.name || push!(others, other)
-                end
+    if loading === nothing
+        loaded = stalecheck ? maybe_root_module(modkey) : nothing
+        loaded isa Module && return loaded
+        if build_id != UInt128(0)
+            loaded = maybe_loaded_precompile(modkey, build_id)
+            loaded isa Module && return loaded
+        end
+        return nothing
+    end
+    if !stalecheck && build_id != UInt128(0) && loading[3] != build_id
+        # don't block using an existing specific loaded module on needing a different concurrently loaded one
+        loaded = maybe_loaded_precompile(modkey, build_id)
+        loaded isa Module && return loaded
+    end
+    # load already in progress for this module on the task
+    task, cond = loading
+    deps = String[modkey.name]
+    pkgid = modkey
+    assert_havelock(cond.lock)
+    if debug_loading_deadlocks && current_task() !== task
+        waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
+        for each in package_locks
+            cond2 = each[2][2]
+            assert_havelock(cond2.lock)
+            for waiting in cond2.waitq
+                push!(waiters, waiting => (each[2][1] => each[1]))
             end
-            msg = sprint(deps, others) do io, deps, others
-                print(io, "deadlock detected in loading ")
-                join(io, deps, " -> ")
-                print(io, " -> ")
-                join(io, others, " && ")
+        end
+        while true
+            running = get(waiters, task, nothing)
+            running === nothing && break
+            task, pkgid = running
+            push!(deps, pkgid.name)
+            task === current_task() && break
+        end
+    end
+    if current_task() === task
+        others = String[modkey.name] # repeat this to emphasize the cycle here
+        for each in package_locks # list the rest of the packages being loaded too
+            if each[2][1] === task
+                other = each[1].name
+                other == modkey.name || other == pkgid.name || push!(others, other)
             end
-            throw(ConcurrencyViolationError(msg))
         end
-        return wait(cond)
+        msg = sprint(deps, others) do io, deps, others
+            print(io, "deadlock detected in loading ")
+            join(io, deps, " -> ")
+            print(io, " -> ")
+            join(io, others, " && ")
+        end
+        throw(ConcurrencyViolationError(msg))
+    end
+    return cond
+end
+
+function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
+    # handle recursive and concurrent calls to require
+    while true
+        loaded = canstart_loading(modkey, build_id, stalecheck)
+        if loaded === nothing
+            package_locks[modkey] = (current_task(), Threads.Condition(require_lock), build_id)
+            return nothing
+        elseif loaded isa Module
+            return loaded
+        end
+        loaded = wait(loaded)
+        loaded isa Module && return loaded
     end
-    package_locks[modkey] = current_task() => Threads.Condition(require_lock)
-    return
 end
 
 function end_loading(modkey::PkgId, @nospecialize loaded)
+    assert_havelock(require_lock)
     loading = pop!(package_locks, modkey)
     notify(loading[2], loaded, all=true)
     nothing
@@ -1652,43 +2246,69 @@ const package_callbacks = Any[]
 const include_callbacks = Any[]
 
 # used to optionally track dependencies when requiring a module:
-const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
-const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
+const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", because they are explicitly loaded, and the process should try to avoid invalidating them
+const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash, mtime) tuples that are the file dependencies of the module currently being precompiled
 const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
-function _include_dependency(mod::Module, _path::AbstractString)
+
+function _include_dependency(mod::Module, _path::AbstractString; track_content::Bool=true,
+                             path_may_be_dir::Bool=false)
+    _include_dependency!(_require_dependencies, _track_dependencies[], mod, _path, track_content, path_may_be_dir)
+end
+
+function _include_dependency!(dep_list::Vector{Any}, track_dependencies::Bool,
+                              mod::Module, _path::AbstractString,
+                              track_content::Bool, path_may_be_dir::Bool)
     prev = source_path(nothing)
     if prev === nothing
         path = abspath(_path)
     else
         path = normpath(joinpath(dirname(prev), _path))
     end
-    if _track_dependencies[]
+    if !track_dependencies[]
+        if !path_may_be_dir && !isfile(path)
+            throw(SystemError("opening file $(repr(path))", Libc.ENOENT))
+        elseif path_may_be_dir && !Filesystem.isreadable(path)
+            throw(SystemError("opening file or folder $(repr(path))", Libc.ENOENT))
+        end
+    else
         @lock require_lock begin
-        push!(_require_dependencies, (mod, path, mtime(path)))
+            if track_content
+                hash = isdir(path) ? _crc32c(join(readdir(path))) : open(_crc32c, path, "r")
+                # use mtime=-1.0 here so that fsize==0 && mtime==0.0 corresponds to a missing include_dependency
+                push!(dep_list, (mod, path, filesize(path), hash, -1.0))
+            else
+                push!(dep_list, (mod, path, UInt64(0), UInt32(0), mtime(path)))
+            end
         end
     end
     return path, prev
 end
 
 """
-    include_dependency(path::AbstractString)
+    include_dependency(path::AbstractString; track_content::Bool=true)
 
 In a module, declare that the file, directory, or symbolic link specified by `path`
-(relative or absolute) is a dependency for precompilation; that is, the module will need
-to be recompiled if the modification time of `path` changes.
+(relative or absolute) is a dependency for precompilation; that is, if `track_content=true`
+the module will need to be recompiled if the content of `path` changes
+(if `path` is a directory the content equals `join(readdir(path))`).
+If `track_content=false` recompilation is triggered when the modification time `mtime` of `path` changes.
 
 This is only needed if your module depends on a path that is not used via [`include`](@ref). It has
 no effect outside of compilation.
+
+!!! compat "Julia 1.11"
+    Keyword argument `track_content` requires at least Julia 1.11.
+    An error is now thrown if `path` is not readable.
 """
-function include_dependency(path::AbstractString)
-    _include_dependency(Main, path)
+function include_dependency(path::AbstractString; track_content::Bool=true)
+    _include_dependency(Main, path, track_content=track_content, path_may_be_dir=true)
     return nothing
 end
 
 # we throw PrecompilableError when a module doesn't want to be precompiled
-struct PrecompilableError <: Exception end
+import Core: PrecompilableError
 function show(io::IO, ex::PrecompilableError)
-    print(io, "Declaring __precompile__(false) is not allowed in files that are being precompiled.")
+    print(io, "Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.")
 end
 precompilableerror(ex::PrecompilableError) = true
 precompilableerror(ex::WrappedException) = precompilableerror(ex.error)
@@ -1703,13 +2323,14 @@ If a module or file is *not* safely precompilable, it should call `__precompile_
 order to throw an error if Julia attempts to precompile it.
 """
 @noinline function __precompile__(isprecompilable::Bool=true)
-    if !isprecompilable && ccall(:jl_generating_output, Cint, ()) != 0
+    if !isprecompilable && generating_output()
         throw(PrecompilableError())
     end
     nothing
 end
 
 # require always works in Main scope and loads files from node 1
+# XXX: (this is deprecated, but still used by Distributed)
 const toplevel_load = Ref(true)
 
 const _require_world_age = Ref{UInt}(typemax(UInt))
@@ -1736,14 +2357,18 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
-    if _require_world_age[] != typemax(UInt)
-        Base.invoke_in_world(_require_world_age[], __require, into, mod)
-    else
-        @invokelatest __require(into, mod)
+    world = _require_world_age[]
+    if world == typemax(UInt)
+        world = get_world_counter()
     end
+    return invoke_in_world(world, __require, into, mod)
 end
 
 function __require(into::Module, mod::Symbol)
+    if into === __toplevel__ && generating_output(#=incremental=#true)
+        error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \
+         is not allowed during package precompilation.")
+    end
     @lock require_lock begin
     LOADING_CACHE[] = LoadingCache()
     try
@@ -1762,14 +2387,19 @@ function __require(into::Module, mod::Symbol)
                     end
                 end
                 hint_message = hint ? ", maybe you meant `import/using $(dots)$(mod)`" : ""
-                start_sentence = hint ? "Otherwise, run" : "Run"
-                throw(ArgumentError("""
-                    Package $mod not found in current path$hint_message.
-                    - $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."""))
+                install_message = if mod != :Pkg
+                    start_sentence = hint ? "Otherwise, run" : "Run"
+                    "\n- $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."
+                else  # for some reason Pkg itself isn't availability so do not tell them to use Pkg to install it.
+                    ""
+                end
+
+                throw(ArgumentError("Package $mod not found in current path$hint_message.$install_message"))
             else
+                manifest_warnings = collect_manifest_warnings()
                 throw(ArgumentError("""
                 Package $(where.name) does not have $mod in its dependencies:
-                - You may have a partially installed environment. Try `Pkg.instantiate()`
+                $manifest_warnings- You may have a partially installed environment. Try `Pkg.instantiate()`
                   to ensure all packages in the environment are installed.
                 - Or, if you have $(where.name) checked out for development and have
                   added $mod as a dependency but haven't updated your primary
@@ -1779,7 +2409,8 @@ function __require(into::Module, mod::Symbol)
         end
         uuidkey, env = uuidkey_env
         if _track_dependencies[]
-            push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
+            path = binpack(uuidkey)
+            push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), 0.0))
         end
         return _require_prelocked(uuidkey, env)
     finally
@@ -1788,36 +2419,85 @@ function __require(into::Module, mod::Symbol)
     end
 end
 
-require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
-
-const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL")
+function find_unsuitable_manifests_versions()
+    unsuitable_manifests = String[]
+    dev_manifests = String[]
+    for env in load_path()
+        project_file = env_project_file(env)
+        project_file isa String || continue # no project file
+        manifest_file = project_file_manifest_path(project_file)
+        manifest_file isa String || continue # no manifest file
+        m = parsed_toml(manifest_file)
+        man_julia_version = get(m, "julia_version", nothing)
+        man_julia_version isa String || @goto mark
+        man_julia_version = VersionNumber(man_julia_version)
+        thispatch(man_julia_version) != thispatch(VERSION) && @goto mark
+        isempty(man_julia_version.prerelease) != isempty(VERSION.prerelease) && @goto mark
+        isempty(man_julia_version.prerelease) && continue
+        man_julia_version.prerelease[1] != VERSION.prerelease[1] && @goto mark
+        if VERSION.prerelease[1] == "DEV"
+            # manifests don't store the 2nd part of prerelease, so cannot check further
+            # so treat them specially in the warning
+            push!(dev_manifests, manifest_file)
+        end
+        continue
+        @label mark
+        push!(unsuitable_manifests, string(manifest_file, " (v", man_julia_version, ")"))
+    end
+    return unsuitable_manifests, dev_manifests
+end
 
-function _require_prelocked(uuidkey::PkgId, env=nothing)
-    if _require_world_age[] != typemax(UInt)
-        Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env)
-    else
-        @invokelatest __require_prelocked(uuidkey, env)
+function collect_manifest_warnings()
+    unsuitable_manifests, dev_manifests = find_unsuitable_manifests_versions()
+    msg = ""
+    if !isempty(unsuitable_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved with a different
+          julia version, which may be the cause of the error. Try to re-resolve them in the
+          current version, or consider deleting them if that fails:
+            $(join(unsuitable_manifests, "\n    "))
+        """
     end
+    if !isempty(dev_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved with a potentially
+          different DEV version of the current version, which may be the cause of the error.
+          Try to re-resolve them in the current version, or consider deleting them if that fails:
+            $(join(dev_manifests, "\n    "))
+        """
+    end
+    return msg
 end
 
-function __require_prelocked(uuidkey::PkgId, env=nothing)
+function require(uuidkey::PkgId)
+    world = _require_world_age[]
+    if world == typemax(UInt)
+        world = get_world_counter()
+    end
+    return invoke_in_world(world, __require, uuidkey)
+end
+__require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
+function _require_prelocked(uuidkey::PkgId, env=nothing)
     assert_havelock(require_lock)
-    if !root_module_exists(uuidkey)
-        newm = _require(uuidkey, env)
-        if newm === nothing
-            error("package `$(uuidkey.name)` did not define the expected \
-                  module `$(uuidkey.name)`, check for typos in package module name")
+    m = start_loading(uuidkey, UInt128(0), true)
+    if m === nothing
+        last = toplevel_load[]
+        try
+            toplevel_load[] = false
+            m = __require_prelocked(uuidkey, env)
+            if m === nothing
+                error("package `$(uuidkey.name)` did not define the expected \
+                      module `$(uuidkey.name)`, check for typos in package module name")
+            end
+        finally
+            toplevel_load[] = last
+            end_loading(uuidkey, m)
         end
         insert_extension_triggers(uuidkey)
         # After successfully loading, notify downstream consumers
         run_package_callbacks(uuidkey)
-        if uuidkey == REPL_PKGID
-            REPL_MODULE_REF[] = newm
-        end
-    else
-        newm = root_module(uuidkey)
     end
-    return newm
+    return m
 end
 
 mutable struct PkgOrigin
@@ -1828,12 +2508,26 @@ end
 PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
 const pkgorigins = Dict{PkgId,PkgOrigin}()
 
-const loaded_modules = Dict{PkgId,Module}()
+const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded
+const loaded_precompiles = Dict{PkgId,Vector{Module}}() # extended (complete) list of modules, available to be loaded
 const loaded_modules_order = Vector{Module}()
-const module_keys = IdDict{Module,PkgId}() # the reverse
 
-is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
-root_module_key(m::Module) = @lock require_lock module_keys[m]
+root_module_key(m::Module) = PkgId(m)
+
+function maybe_loaded_precompile(key::PkgId, buildid::UInt128)
+    @lock require_lock begin
+    mods = get(loaded_precompiles, key, nothing)
+    mods === nothing && return
+    for mod in mods
+        module_build_id(mod) == buildid && return mod
+    end
+    end
+end
+
+function module_build_id(m::Module)
+    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
+    return (UInt128(hi) << 64) | lo
+end
 
 @constprop :none function register_root_module(m::Module)
     # n.b. This is called from C after creating a new module in `Base.__toplevel__`,
@@ -1843,16 +2537,15 @@ root_module_key(m::Module) = @lock require_lock module_keys[m]
     if haskey(loaded_modules, key)
         oldm = loaded_modules[key]
         if oldm !== m
-            if (0 != ccall(:jl_generating_output, Cint, ())) && (JLOptions().incremental != 0)
+            if generating_output(#=incremental=#true)
                 error("Replacing module `$(key.name)`")
             else
                 @warn "Replacing module `$(key.name)`"
             end
         end
     end
-    push!(loaded_modules_order, m)
+    maybe_loaded_precompile(key, module_build_id(m)) === nothing && push!(loaded_modules_order, m)
     loaded_modules[key] = m
-    module_keys[m] = key
     end
     nothing
 end
@@ -1869,161 +2562,172 @@ using Base
 end
 
 # get a top-level Module from the given key
+# this is similar to `require`, but worse in almost every possible way
 root_module(key::PkgId) = @lock require_lock loaded_modules[key]
 function root_module(where::Module, name::Symbol)
     key = identify_package(where, String(name))
     key isa PkgId || throw(KeyError(name))
     return root_module(key)
 end
+root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
 maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, nothing)
 
-root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
 loaded_modules_array() = @lock require_lock copy(loaded_modules_order)
 
+# after unreference_module, a subsequent require call will try to load a new copy of it, if stale
+# reload(m) = (unreference_module(m); require(m))
 function unreference_module(key::PkgId)
+    @lock require_lock begin
     if haskey(loaded_modules, key)
         m = pop!(loaded_modules, key)
         # need to ensure all modules are GC rooted; will still be referenced
-        # in module_keys
+        # in loaded_modules_order
+    end
     end
 end
 
 # whoever takes the package_locks[pkg] must call this function immediately
-function set_pkgorigin_version_path(pkg::PkgId, path::Union{String,Nothing})
+function set_pkgorigin_version_path(pkg::PkgId, path::String)
     assert_havelock(require_lock)
     pkgorigin = get!(PkgOrigin, pkgorigins, pkg)
-    if path !== nothing
-        # Pkg needs access to the version of packages in the sysimage.
-        if Core.Compiler.generating_sysimg()
-            pkgorigin.version = get_pkgversion_from_path(joinpath(dirname(path), ".."))
-        end
+    # Pkg needs access to the version of packages in the sysimage.
+    if generating_output(#=incremental=#false)
+        pkgorigin.version = get_pkgversion_from_path(joinpath(dirname(path), ".."))
     end
     pkgorigin.path = path
     nothing
 end
 
-# A hook to allow code load to use Pkg.precompile
+# Unused
 const PKG_PRECOMPILE_HOOK = Ref{Function}()
+disable_parallel_precompile::Bool = false
 
 # Returns `nothing` or the new(ish) module
-function _require(pkg::PkgId, env=nothing)
+function __require_prelocked(pkg::PkgId, env)
     assert_havelock(require_lock)
-    loaded = start_loading(pkg)
-    loaded === nothing || return loaded
 
-    last = toplevel_load[]
-    try
-        toplevel_load[] = false
-        # perform the search operation to select the module file require intends to load
-        path = locate_package(pkg, env)
-        if path === nothing
-            throw(ArgumentError("""
-                Package $pkg is required but does not seem to be installed:
-                 - Run `Pkg.instantiate()` to install all recorded dependencies.
-                """))
-        end
-        set_pkgorigin_version_path(pkg, path)
-
-        pkg_precompile_attempted = false # being safe to avoid getting stuck in a Pkg.precompile loop
-
-        # attempt to load the module file via the precompile cache locations
-        if JLOptions().use_compiled_modules != 0
-            @label load_from_cache
-            m = _require_search_from_serialized(pkg, path, UInt128(0))
-            if m isa Module
-                return m
-            end
-        end
-
-        # if the module being required was supposed to have a particular version
-        # but it was not handled by the precompile loader, complain
-        for (concrete_pkg, concrete_build_id) in _concrete_dependencies
-            if pkg == concrete_pkg
-                @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
-                     This may mean $pkg does not support precompilation but is imported by a module that does."""
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
-                end
+    # perform the search operation to select the module file require intends to load
+    path = locate_package(pkg, env)
+    if path === nothing
+        throw(ArgumentError("""
+            Package $(repr("text/plain", pkg)) is required but does not seem to be installed:
+             - Run `Pkg.instantiate()` to install all recorded dependencies.
+            """))
+    end
+    set_pkgorigin_version_path(pkg, path)
+
+    parallel_precompile_attempted = false # being safe to avoid getting stuck in a precompilepkgs loop
+    reasons = Dict{String,Int}()
+    # attempt to load the module file via the precompile cache locations
+    if JLOptions().use_compiled_modules != 0
+        @label load_from_cache
+        loaded = _require_search_from_serialized(pkg, path, UInt128(0), true; reasons)
+        if loaded isa Module
+            return loaded
+        end
+    end
+
+    if JLOptions().use_compiled_modules == 3
+        error("Precompiled image $pkg not available with flags $(CacheFlags())")
+    end
+
+    # if the module being required was supposed to have a particular version
+    # but it was not handled by the precompile loader, complain
+    for (concrete_pkg, concrete_build_id) in _concrete_dependencies
+        if pkg == concrete_pkg
+            @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
+                 This may mean $(repr("text/plain", pkg)) does not support precompilation but is imported by a module that does."""
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
             end
         end
+    end
 
-        if JLOptions().use_compiled_modules != 0
-            if (0 == ccall(:jl_generating_output, Cint, ())) || (JLOptions().incremental != 0)
-                if !pkg_precompile_attempted && isinteractive() && isassigned(PKG_PRECOMPILE_HOOK)
-                    pkg_precompile_attempted = true
-                    unlock(require_lock)
-                    try
-                        PKG_PRECOMPILE_HOOK[](pkg.name, _from_loading = true)
-                    finally
-                        lock(require_lock)
-                    end
-                    @goto load_from_cache
-                end
-                # spawn off a new incremental pre-compile task for recursive `require` calls
-                cachefile_or_module = maybe_cachefile_lock(pkg, path) do
-                    # double-check now that we have lock
-                    m = _require_search_from_serialized(pkg, path, UInt128(0))
-                    m isa Module && return m
-                    compilecache(pkg, path)
+    if JLOptions().use_compiled_modules == 1
+        if !generating_output(#=incremental=#false)
+            project = active_project()
+            if !generating_output() && !parallel_precompile_attempted && !disable_parallel_precompile && @isdefined(Precompilation) && project !== nothing &&
+                    isfile(project) && project_file_manifest_path(project) !== nothing
+                parallel_precompile_attempted = true
+                unlock(require_lock)
+                try
+                    Precompilation.precompilepkgs([pkg.name]; _from_loading=true, ignore_loaded=false)
+                finally
+                    lock(require_lock)
                 end
-                cachefile_or_module isa Module && return cachefile_or_module::Module
-                cachefile = cachefile_or_module
-                if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process
-                    @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
-                elseif isa(cachefile, Exception)
-                    if precompilableerror(cachefile)
-                        verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
-                        @logmsg verbosity "Skipping precompilation since __precompile__(false). Importing $pkg."
-                    else
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
+                @goto load_from_cache
+            end
+            # spawn off a new incremental pre-compile task for recursive `require` calls
+            loaded = maybe_cachefile_lock(pkg, path) do
+                # double-check the search now that we have lock
+                m = _require_search_from_serialized(pkg, path, UInt128(0), true)
+                m isa Module && return m
+                triggers = get(EXT_PRIMED, pkg, nothing)
+                loadable_exts = nothing
+                if triggers !== nothing # extension
+                    loadable_exts = PkgId[]
+                    for (ext′, triggers′) in EXT_PRIMED
+                        if triggers′ ⊊ triggers
+                            push!(loadable_exts, ext′)
+                        end
                     end
-                    # fall-through to loading the file locally if not incremental
+                end
+                return compilecache(pkg, path; reasons, loadable_exts)
+            end
+            loaded isa Module && return loaded
+            if isnothing(loaded) # maybe_cachefile_lock returns nothing if it had to wait for another process
+                @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
+            elseif isa(loaded, Exception)
+                if precompilableerror(loaded)
+                    verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
+                    @logmsg verbosity "Skipping precompilation due to precompilable error. Importing $(repr("text/plain", pkg))." exception=loaded
                 else
-                    cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}}
-                    m = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
-                    if !isa(m, Module)
-                        @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
-                    else
-                        return m
-                    end
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
                 end
-                if JLOptions().incremental != 0
-                    # during incremental precompilation, this should be fail-fast
-                    throw(PrecompilableError())
+                # fall-through to loading the file locally if not incremental
+            else
+                cachefile, ocachefile = loaded::Tuple{String, Union{Nothing, String}}
+                loaded = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
+                if !isa(loaded, Module)
+                    @warn "The call to compilecache failed to create a usable precompiled cache file for $(repr("text/plain", pkg))" exception=loaded
+                else
+                    return loaded
                 end
             end
+            if JLOptions().incremental != 0
+                # during incremental precompilation, this should be fail-fast
+                throw(PrecompilableError())
+            end
         end
+    end
 
-        # just load the file normally via include
-        # for unknown dependencies
-        uuid = pkg.uuid
-        uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
-        old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    # just load the file normally via include
+    # for unknown dependencies
+    uuid = pkg.uuid
+    uuid = (uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid))
+    old_uuid = ccall(:jl_module_uuid, NTuple{2, UInt64}, (Any,), __toplevel__)
+    if uuid !== old_uuid
+        ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
+    end
+    unlock(require_lock)
+    try
+        include(__toplevel__, path)
+        loaded = maybe_root_module(pkg)
+    finally
+        lock(require_lock)
         if uuid !== old_uuid
-            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
-        end
-        unlock(require_lock)
-        try
-            include(__toplevel__, path)
-            loaded = get(loaded_modules, pkg, nothing)
-        finally
-            lock(require_lock)
-            if uuid !== old_uuid
-                ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
-            end
+            ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
         end
-    finally
-        toplevel_load[] = last
-        end_loading(pkg, loaded)
     end
     return loaded
 end
 
-# Only used from test/precompile.jl
-function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing})
+# load a serialized file directly, including dependencies (without checking staleness except for immediate conflicts)
+# this does not call start_loading / end_loading, so can lead to some odd behaviors
+function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing}, sourcepath::String)
     @lock require_lock begin
-    set_pkgorigin_version_path(uuidkey, nothing)
+    set_pkgorigin_version_path(uuidkey, sourcepath)
     newm = _tryrequire_from_serialized(uuidkey, path, ocachepath)
     newm isa Module || throw(newm)
     insert_extension_triggers(uuidkey)
@@ -2033,7 +2737,82 @@ function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Unio
     end
 end
 
-
+# load a serialized file directly from append_bundled_depot_path for uuidkey without stalechecks
+"""
+    require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
+
+!!! warning "May load duplicate copies of stdlib packages."
+
+    This requires that all stdlib packages loaded are compatible with having concurrent
+    copies of themselves loaded into memory. It also places additional restrictions on
+    the kinds of type-piracy that are allowed in stdlibs, since type-piracy can cause the
+    dispatch table to become visibly "torn" across multiple different packages.
+
+    The specific requirements are:
+
+      The import side (caller of `require_stdlib`) must not leak any stdlib types, esp.
+      to any context that may have a conflicting copy of the stdlib(s) (or vice-versa).
+         - e.g., if an output is forwarded to user code, it must contain only Base types.
+         - e.g., if an output contains types from the stdlib, it must be consumed "internally"
+                 before reaching user code.
+
+      The imported code (loaded stdlibs) must be very careful about type piracy:
+         - It must not access any global state that may differ between stdlib copies in
+           type-pirated methods.
+         - It must not return any stdlib types from any type-pirated public methods (since
+           a loaded duplicate would overwrite the Base method again, returning different
+           types that don't correspond to the user-accessible copy of the stdlib).
+         - It must not pass / discriminate stdlib types in type-pirated methods, except
+           indirectly via methods defined in Base and implemented (w/o type-piracy) in
+           all copies of the stdlib over their respective types.
+
+      The idea behind the above restrictions is that any type-pirated methods in the stdlib
+      must return a result that is simultaneously correct for all of the stdlib's loaded
+      copies, including accounting for global state differences and split type identities.
+
+      Furthermore, any imported code must not leak any stdlib types to globals and containers
+      (e.g. Vectors and mutable structs) in upstream Modules, since this will also lead to
+      type-confusion when the type is later pulled out in user / stdlib code.
+
+    For examples of issues like the above, see:
+      [1] https://github.com/JuliaLang/Pkg.jl/issues/4017#issuecomment-2377589989
+      [2] https://github.com/JuliaLang/StyledStrings.jl/issues/91#issuecomment-2379602914
+"""
+function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
+    if generating_output(#=incremental=#true)
+        # Otherwise this would lead to awkward dependency issues by loading a package that isn't in the Project/Manifest
+        error("This interactive function requires a stdlib to be loaded, and package code should instead use it directly from that stdlib.")
+    end
+    @lock require_lock begin
+    # the PkgId of the ext, or package if not an ext
+    this_uuidkey = ext isa String ? PkgId(uuid5(package_uuidkey.uuid, ext), ext) : package_uuidkey
+    env = Sys.STDLIB
+    newm = start_loading(this_uuidkey, UInt128(0), true)
+    newm === nothing || return newm
+    try
+        # first since this is a stdlib, try to look there directly first
+        if ext === nothing
+            sourcepath = normpath(env, this_uuidkey.name, "src", this_uuidkey.name * ".jl")
+        else
+            sourcepath = find_ext_path(normpath(joinpath(env, package_uuidkey.name)), ext)
+        end
+        depot_path = append_bundled_depot_path!(empty(DEPOT_PATH))
+        set_pkgorigin_version_path(this_uuidkey, sourcepath)
+        newm = _require_search_from_serialized(this_uuidkey, sourcepath, UInt128(0), false; DEPOT_PATH=depot_path)
+    finally
+        end_loading(this_uuidkey, newm)
+    end
+    if newm isa Module
+        # After successfully loading, notify downstream consumers
+        insert_extension_triggers(env, this_uuidkey)
+        run_package_callbacks(this_uuidkey)
+    else
+        # if the user deleted their bundled depot, next try to load it completely normally
+        newm = _require_prelocked(this_uuidkey)
+    end
+    return newm
+    end
+end
 
 # relative-path load
 
@@ -2145,7 +2924,7 @@ and return the value of the last expression.
 The optional `args` argument can be used to set the input arguments of the script (i.e. the global `ARGS` variable).
 Note that definitions (e.g. methods, globals) are evaluated in the anonymous module and do not affect the current module.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> write("testfile.jl", \"\"\"
@@ -2163,12 +2942,12 @@ julia> rm("testfile.jl")
 ```
 """
 function evalfile(path::AbstractString, args::Vector{String}=String[])
-    return Core.eval(Module(:__anon__),
+    m = Module(:__anon__)
+    return Core.eval(m,
         Expr(:toplevel,
              :(const ARGS = $args),
-             :(eval(x) = $(Expr(:core, :eval))(__anon__, x)),
-             :(include(x) = $(Expr(:top, :include))(__anon__, x)),
-             :(include(mapexpr::Function, x) = $(Expr(:top, :include))(mapexpr, __anon__, x)),
+             :(const include = $(Base.IncludeInto(m))),
+             :(const eval = $(Core.EvalInto(m))),
              :(include($path))))
 end
 evalfile(path::AbstractString, args::Vector) = evalfile(path, String[args...])
@@ -2192,9 +2971,13 @@ function load_path_setup_code(load_path::Bool=true)
     return code
 end
 
+# Const global for GC root
+const newly_inferred = CodeInstance[]
+
 # this is called in the external process that generates precompiled package files
 function include_package_for_output(pkg::PkgId, input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
                                     concrete_deps::typeof(_concrete_dependencies), source::Union{Nothing,String})
+
     append!(empty!(Base.DEPOT_PATH), depot_path)
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
@@ -2210,8 +2993,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
         task_local_storage()[:SOURCE_PATH] = source
     end
 
-    ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
-    Core.Compiler.track_newly_inferred.x = true
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
     try
         Base.include(Base.__toplevel__, input)
     catch ex
@@ -2219,52 +3001,86 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
         @debug "Aborting `create_expr_cache'" exception=(ErrorException("Declaration of __precompile__(false) not allowed"), catch_backtrace())
         exit(125) # we define status = 125 means PrecompileableError
     finally
-        Core.Compiler.track_newly_inferred.x = false
+        ccall(:jl_set_newly_inferred, Cvoid, (Any,), nothing)
     end
+    # check that the package defined the expected module so we can give a nice error message if not
+    Base.check_package_module_loaded(pkg)
+
+    # Re-populate the runtime's newly-inferred array, which will be included
+    # in the output. We removed it above to avoid including any code we may
+    # have compiled for error handling and validation.
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
 end
 
+function check_package_module_loaded(pkg::PkgId)
+    if !haskey(Base.loaded_modules, pkg)
+        # match compilecache error type for non-125 errors
+        error("$(repr("text/plain", pkg)) did not define the expected module `$(pkg.name)`, \
+            check for typos in package module name")
+    end
+    return nothing
+end
+
+# protects against PkgId and UUID being imported and losing Base prefix
+_pkg_str(_pkg::PkgId) = (_pkg.uuid === nothing) ? "Base.PkgId($(repr(_pkg.name)))" : "Base.PkgId(Base.UUID(\"$(_pkg.uuid)\"), $(repr(_pkg.name)))"
+_pkg_str(_pkg::Vector) = sprint(show, eltype(_pkg); context = :module=>nothing) * "[" * join(map(_pkg_str, _pkg), ",") * "]"
+_pkg_str(_pkg::Pair{PkgId}) = _pkg_str(_pkg.first) * " => " * repr(_pkg.second)
+_pkg_str(_pkg::Nothing) = "nothing"
+
 const PRECOMPILE_TRACE_COMPILE = Ref{String}()
 function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String},
-                           concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
+                           internal_stderr::IO = stderr, internal_stdout::IO = stdout, loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     output_o === nothing || rm(output_o, force=true)
-    depot_path = map(abspath, DEPOT_PATH)
-    dl_load_path = map(abspath, DL_LOAD_PATH)
-    load_path = map(abspath, Base.load_path())
+    depot_path = String[abspath(x) for x in DEPOT_PATH]
+    dl_load_path = String[abspath(x) for x in DL_LOAD_PATH]
+    load_path = String[abspath(x) for x in Base.load_path()]
+    # if pkg is a stdlib, append its parent Project.toml to the load path
+    triggers = get(EXT_PRIMED, pkg, nothing)
+    if triggers !== nothing
+        parentid = triggers[1]
+        for env in load_path
+            project_file = env_project_file(env)
+            if project_file === true
+                _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+                if parent_project_file !== nothing
+                    parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                    if parentproj == parentid
+                        push!(load_path, parent_project_file)
+                    end
+                end
+            end
+        end
+    end
     path_sep = Sys.iswindows() ? ';' : ':'
     any(path -> path_sep in path, load_path) &&
         error("LOAD_PATH entries cannot contain $(repr(path_sep))")
 
-    deps_strs = String[]
-    function pkg_str(_pkg::PkgId)
-        if _pkg.uuid === nothing
-            "Base.PkgId($(repr(_pkg.name)))"
-        else
-            "Base.PkgId(Base.UUID(\"$(_pkg.uuid)\"), $(repr(_pkg.name)))"
-        end
+    if output_o === nothing
+        # remove options that make no difference given the other cache options
+        cacheflags = CacheFlags(cacheflags, opt_level=0)
     end
-    for (pkg, build_id) in concrete_deps
-        push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))")
-    end
-
+    opts = translate_cache_flags(cacheflags, CacheFlags()) # julia_cmd is generated for the running system, and must be fixed if running for precompile instead
     if output_o !== nothing
+        @debug "Generating object cache file for $(repr("text/plain", pkg))"
         cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
-        opt_level = Base.JLOptions().opt_level
-        opts = `-O$(opt_level) --output-o $(output_o) --output-ji $(output) --output-incremental=yes`
+        push!(opts, "--output-o", output_o)
     else
+        @debug "Generating cache file for $(repr("text/plain", pkg))"
         cpu_target = nothing
-        opts = `-O0 --output-ji $(output) --output-incremental=yes`
     end
+    push!(opts, "--output-ji", output)
+    isassigned(PRECOMPILE_TRACE_COMPILE) && push!(opts, "--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])")
 
-    deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing)
-    deps = deps_eltype * "[" * join(deps_strs, ",") * "]"
-    trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
-    io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) $(opts)
-                              --startup-file=no --history-file=no --warn-overwrite=yes
-                              --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
-                              $trace
-                              -`,
+    io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd)
+                               $(flags)
+                               $(opts)
+                               --output-incremental=yes
+                               --startup-file=no --history-file=no --warn-overwrite=yes
+                               $(have_color === nothing ? "--color=auto" : have_color ? "--color=yes" : "--color=no")
+                               -`,
                               "OPENBLAS_NUM_THREADS" => 1,
                               "JULIA_NUM_THREADS" => 1),
                        stderr = internal_stderr, stdout = internal_stdout),
@@ -2272,20 +3088,35 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
     # write data over stdin to avoid the (unlikely) case of exceeding max command line size
     write(io.in, """
         empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
+        Base.track_nested_precomp($(_pkg_str(vcat(Base.precompilation_stack, pkg))))
+        Base.loadable_extensions = $(_pkg_str(loadable_exts))
         Base.precompiling_extension = $(loading_extension)
-        Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
-            $(repr(load_path)), $deps, $(repr(source_path(nothing))))
+        Base.precompilation_target = $(_pkg_str(pkg))
+        Base.include_package_for_output($(_pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
+            $(repr(load_path)), $(_pkg_str(concrete_deps)), $(repr(source_path(nothing))))
         """)
     close(io.in)
     return io
 end
 
+const precompilation_stack = Vector{PkgId}()
+# Helpful for debugging when precompilation is unexpectedly nested.
+# Enable with `JULIA_DEBUG=nested_precomp`. Note that it expected to be nested in classical code-load precompilation
+# TODO: Add detection if extension precompilation is nested and error / return early?
+function track_nested_precomp(pkgs::Vector{PkgId})
+    append!(precompilation_stack, pkgs)
+    if length(precompilation_stack) > 1
+        list() = join(map(p->p.name, precompilation_stack), " > ")
+        @debug "Nested precompilation: $(list())" _group=:nested_precomp
+    end
+end
+
 function compilecache_dir(pkg::PkgId)
     entrypath, entryfile = cache_file_entry(pkg)
     return joinpath(DEPOT_PATH[1], entrypath)
 end
 
-function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String
+function compilecache_path(pkg::PkgId, prefs_hash::UInt64; flags::CacheFlags=CacheFlags(), project::String=something(Base.active_project(), ""))::String
     entrypath, entryfile = cache_file_entry(pkg)
     cachepath = joinpath(DEPOT_PATH[1], entrypath)
     isdir(cachepath) || mkpath(cachepath)
@@ -2295,7 +3126,7 @@ function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=somet
         crc = _crc32c(project)
         crc = _crc32c(unsafe_string(JLOptions().image_file), crc)
         crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc)
-        crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc)
+        crc = _crc32c(_cacheflag_to_uint8(flags), crc)
 
         cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
         if cpu_target === nothing
@@ -2317,39 +3148,42 @@ This can be used to reduce package load times. Cache files are stored in
 `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref)
 for important notes.
 """
-function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
     @nospecialize internal_stderr internal_stdout
     path = locate_package(pkg)
-    path === nothing && throw(ArgumentError("$pkg not found during precompilation"))
-    return compilecache(pkg, path, internal_stderr, internal_stdout)
+    path === nothing && throw(ArgumentError("$(repr("text/plain", pkg)) not found during precompilation"))
+    return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons, loadable_exts)
 end
 
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
 function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
-                      keep_loaded_modules::Bool = true)
+                      keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
+                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), loadable_exts::Union{Vector{PkgId},Nothing}=nothing)
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
     cachepath = compilecache_dir(pkg)
 
     # build up the list of modules that we want the precompile process to preserve
-    concrete_deps = copy(_concrete_dependencies)
     if keep_loaded_modules
-        for mod in loaded_modules_array()
-            if !(mod === Main || mod === Core || mod === Base)
-                push!(concrete_deps, PkgId(mod) => module_build_id(mod))
+        concrete_deps = copy(_concrete_dependencies)
+        for (pkgreq, modreq) in loaded_modules
+            if !(pkgreq === Main || pkgreq === Core || pkgreq === Base)
+                push!(concrete_deps, pkgreq => module_build_id(modreq))
             end
         end
+    else
+        concrete_deps = empty(_concrete_dependencies)
     end
     # run the expression and cache the result
     verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
-    @logmsg verbosity "Precompiling $pkg"
+    @logmsg verbosity "Precompiling $(repr("text/plain", pkg)) $(list_reasons(reasons))"
 
     # create a temporary file in `cachepath` directory, write the cache in it,
     # write the checksum, _and then_ atomically move the file to `cachefile`.
     mkpath(cachepath)
-    cache_objects = JLOptions().use_pkgimages != 0
+    cache_objects = JLOptions().use_pkgimages == 1
     tmppath, tmpio = mktemp(cachepath)
 
     if cache_objects
@@ -2365,7 +3199,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             close(tmpio_o)
             close(tmpio_so)
         end
-        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, internal_stderr, internal_stdout)
+        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, cacheflags, internal_stderr, internal_stdout, loadable_exts)
 
         if success(p)
             if cache_objects
@@ -2376,7 +3210,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
             prefs_hash = preferences_hash(tmppath)
-            cachefile = compilecache_path(pkg, prefs_hash)
+            cachefile = compilecache_path(pkg, prefs_hash; flags=cacheflags)
             ocachefile = cache_objects ? ocachefile_from_cachefile(cachefile) : nothing
 
             # append checksum for so to the end of the .ji file:
@@ -2388,7 +3222,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             # append extra crc to the end of the .ji file:
             open(tmppath, "r+") do f
                 if iszero(isvalid_cache_header(f))
-                    error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
+                    error("Incompatible header for $(repr("text/plain", pkg)) in new cache file $(repr(tmppath)).")
                 end
                 seekend(f)
                 write(f, crc_so)
@@ -2398,12 +3232,6 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
 
             # inherit permission from the source file (and make them writable)
             chmod(tmppath, filemode(path) & 0o777 | 0o200)
-            if cache_objects
-                # Ensure that the user can execute the `.so` we're generating
-                # Note that on windows, `filemode(path)` typically returns `0o666`, so this
-                # addition of the execute bit for the user is doubly needed.
-                chmod(tmppath_so, filemode(path) & 0o777 | 0o333)
-            end
 
             # prune the directory with cache files
             if pkg.uuid !== nothing
@@ -2426,33 +3254,19 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             end
 
             if cache_objects
-                try
-                    rename(tmppath_so, ocachefile::String; force=true)
-                catch e
-                    e isa IOError || rethrow()
-                    isfile(ocachefile::String) || rethrow()
-                    # Windows prevents renaming a file that is in use so if there is a Julia session started
-                    # with a package image loaded, we cannot rename that file.
-                    # The code belows append a `_i` to the name of the cache file where `i` is the smallest number such that
-                    # that cache file does not exist.
-                    ocachename, ocacheext = splitext(ocachefile::String)
-                    old_cachefiles = Set(readdir(cachepath))
-                    num = 1
-                    while true
-                        ocachefile = ocachename * "_$num" * ocacheext
-                        in(basename(ocachefile), old_cachefiles) || break
-                        num += 1
-                    end
-                    # TODO: Risk for a race here if some other process grabs this name before us
-                    cachefile = cachefile_from_ocachefile(ocachefile)
-                    rename(tmppath_so, ocachefile::String; force=true)
+                ocachefile_new = rename_unique_ocachefile(tmppath_so, ocachefile)
+                if ocachefile_new != ocachefile
+                    cachefile = cachefile_from_ocachefile(ocachefile_new)
+                    ocachefile = ocachefile_new
                 end
                 @static if Sys.isapple()
                     run(`$(Linking.dsymutil()) $ocachefile`, Base.DevNull(), Base.DevNull(), Base.DevNull())
                 end
             end
             # this is atomic according to POSIX (not Win32):
-            rename(tmppath, cachefile; force=true)
+            # but force=true means it will fall back to non atomic
+            # move if the initial rename fails.
+            mv(tmppath, cachefile; force=true)
             return cachefile, ocachefile
         end
     finally
@@ -2465,13 +3279,37 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
     if p.exitcode == 125
         return PrecompilableError()
     else
-        error("Failed to precompile $pkg to $(repr(tmppath)).")
+        error("Failed to precompile $(repr("text/plain", pkg)) to $(repr(tmppath)).")
     end
 end
 
-function module_build_id(m::Module)
-    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
-    return (UInt128(hi) << 64) | lo
+function rename_unique_ocachefile(tmppath_so::String, ocachefile_orig::String, ocachefile::String = ocachefile_orig, num = 0)
+    try
+        mv(tmppath_so, ocachefile; force=true)
+    catch e
+        e isa IOError || rethrow()
+        # If `rm` was called on a dir containing a loaded DLL, we moved it to temp for cleanup
+        # on restart. However the old path cannot be used (UV_EACCES) while the DLL is loaded
+        if !isfile(ocachefile) && e.code != Base.UV_EACCES
+            rethrow()
+        end
+        # Windows prevents renaming a file that is in use so if there is a Julia session started
+        # with a package image loaded, we cannot rename that file.
+        # The code belows append a `_i` to the name of the cache file where `i` is the smallest number such that
+        # that cache file does not exist.
+        ocachename, ocacheext = splitext(ocachefile_orig)
+        ocachefile_unique = ocachename * "_$num" * ocacheext
+        ocachefile = rename_unique_ocachefile(tmppath_so, ocachefile_orig, ocachefile_unique, num + 1)
+    end
+    return ocachefile
+end
+
+function object_build_id(obj)
+    mod = ccall(:jl_object_top_module, Any, (Any,), obj)
+    if mod === nothing
+        return nothing
+    end
+    return module_build_id(mod::Module)
 end
 
 function isvalid_cache_header(f::IOStream)
@@ -2494,25 +3332,72 @@ function isvalid_pkgimage_crc(f::IOStream, ocachefile::String)
     expected_crc_so == crc_so
 end
 
-struct CacheHeaderIncludes
-    id::PkgId
+mutable struct CacheHeaderIncludes
+    const id::PkgId
     filename::String
-    mtime::Float64
-    modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
+    const fsize::UInt64
+    const hash::UInt32
+    const mtime::Float64
+    const modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
 end
 
-function parse_cache_header(f::IO)
-    flags = read(f, UInt8)
-    modules = Vector{Pair{PkgId, UInt64}}()
+function CacheHeaderIncludes(dep_tuple::Tuple{Module, String, Int64, UInt32, Float64})
+    return CacheHeaderIncludes(PkgId(dep_tuple[1]), dep_tuple[2:end]..., String[])
+end
+
+function replace_depot_path(path::AbstractString, depots::Vector{String}=normalize_depots_for_relocation())
+    for depot in depots
+        if startswith(path, string(depot, Filesystem.pathsep())) || path == depot
+            path = replace(path, depot => "@depot"; count=1)
+            break
+        end
+    end
+    return path
+end
+
+function normalize_depots_for_relocation()
+    depots = String[]
+    sizehint!(depots, length(DEPOT_PATH))
+    for d in DEPOT_PATH
+        isdir(d) || continue
+        if isdirpath(d)
+            d = dirname(d)
+        end
+        push!(depots, abspath(d))
+    end
+    return depots
+end
+
+function restore_depot_path(path::AbstractString, depot::AbstractString)
+    replace(path, r"^@depot" => depot; count=1)
+end
+
+function resolve_depot(inc::AbstractString)
+    startswith(inc, string("@depot", Filesystem.pathsep())) || return :not_relocatable
+    for depot in DEPOT_PATH
+        ispath(restore_depot_path(inc, depot)) && return depot
+    end
+    return :no_depot_found
+end
+
+function read_module_list(f::IO, has_buildid_hi::Bool)
+    modules = Vector{Pair{PkgId, UInt128}}()
     while true
         n = read(f, Int32)
         n == 0 && break
         sym = String(read(f, n)) # module name
         uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
-        build_id = read(f, UInt64) # build UUID (mostly just a timestamp)
+        build_id_hi = UInt128(has_buildid_hi ? read(f, UInt64) : UInt64(0)) << 64
+        build_id = (build_id_hi | read(f, UInt64)) # build id (checksum + time - not a UUID)
         push!(modules, PkgId(uuid, sym) => build_id)
     end
-    totbytes = read(f, Int64) # total bytes for file dependencies + preferences
+    return modules
+end
+
+function _parse_cache_header(f::IO, cachefile::AbstractString)
+    flags = read(f, UInt8)
+    modules = read_module_list(f, false)
+    totbytes = Int64(read(f, UInt64)) # total bytes for file dependencies + preferences
     # read the list of requirements
     # and split the list into include and requires statements
     includes = CacheHeaderIncludes[]
@@ -2525,6 +3410,10 @@ function parse_cache_header(f::IO)
         end
         depname = String(read(f, n2))
         totbytes -= n2
+        fsize = read(f, UInt64)
+        totbytes -= 8
+        hash = read(f, UInt32)
+        totbytes -= 4
         mtime = read(f, Float64)
         totbytes -= 8
         n1 = read(f, Int32)
@@ -2547,7 +3436,7 @@ function parse_cache_header(f::IO)
         if depname[1] == '\0'
             push!(requires, modkey => binunpack(depname))
         else
-            push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath))
+            push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, mtime, modpath))
         end
     end
     prefs = String[]
@@ -2566,82 +3455,155 @@ function parse_cache_header(f::IO)
     totbytes -= 8
     @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
     # read the list of modules that are required to be present during loading
-    required_modules = Vector{Pair{PkgId, UInt128}}()
-    while true
-        n = read(f, Int32)
-        n == 0 && break
-        sym = String(read(f, n)) # module name
-        uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
-        build_id = UInt128(read(f, UInt64)) << 64
-        build_id |= read(f, UInt64)
-        push!(required_modules, PkgId(uuid, sym) => build_id)
-    end
+    required_modules = read_module_list(f, true)
     l = read(f, Int32)
     clone_targets = read(f, l)
 
-    return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
+    srcfiles = srctext_files(f, srctextpos, includes)
+
+    return modules, (includes, srcfiles, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
+end
+
+function parse_cache_header(f::IO, cachefile::AbstractString)
+    modules, (includes, srcfiles, requires), required_modules,
+        srctextpos, prefs, prefs_hash, clone_targets, flags = _parse_cache_header(f, cachefile)
+
+    includes_srcfiles = CacheHeaderIncludes[]
+    includes_depfiles = CacheHeaderIncludes[]
+    for inc in includes
+        if inc.filename ∈ srcfiles
+            push!(includes_srcfiles, inc)
+        else
+            push!(includes_depfiles, inc)
+        end
+    end
+
+
+    # The @depot resolution logic for include() files:
+    # 1. If the cache is not relocatable because of an absolute path,
+    #    we ignore that path for the depot search.
+    #    Recompilation will be triggered by stale_cachefile() if that absolute path does not exist.
+    # 2. If we can't find a depot for a relocatable path,
+    #    we still replace it with the depot we found from other files.
+    #    Recompilation will be triggered by stale_cachefile() because the resolved path does not exist.
+    # 3. We require that relocatable paths all resolve to the same depot.
+    # 4. We explicitly check that all relocatable paths resolve to the same depot. This has two reasons:
+    #    - We want to scan all source files in order to provide logs for 1. and 2. above.
+    #    - It is possible that a depot might be missing source files.
+    #      Assume that we have two depots on DEPOT_PATH, depot_complete and depot_incomplete.
+    #      If DEPOT_PATH=["depot_complete","depot_incomplete"] then no recompilation shall happen,
+    #      because depot_complete will be picked.
+    #      If DEPOT_PATH=["depot_incomplete","depot_complete"] we trigger recompilation and
+    #      hopefully a meaningful error about missing files is thrown.
+    #      If we were to just select the first depot we find, then whether recompilation happens would
+    #      depend on whether the first relocatable file resolves to depot_complete or depot_incomplete.
+    srcdepot = nothing
+    any_not_relocatable = false
+    any_no_depot_found = false
+    multiple_depots_found = false
+    for src in srcfiles
+        depot = resolve_depot(src)
+        if depot === :not_relocatable
+            any_not_relocatable = true
+        elseif depot === :no_depot_found
+            any_no_depot_found = true
+        elseif isnothing(srcdepot)
+            srcdepot = depot
+        elseif depot != srcdepot
+            multiple_depots_found = true
+        end
+    end
+    if any_no_depot_found
+        @debug("Unable to resolve @depot tag for at least one include() file from cache file $cachefile", srcfiles, _group=:relocatable)
+    end
+    if any_not_relocatable
+        @debug("At least one include() file from $cachefile is not relocatable", srcfiles, _group=:relocatable)
+    end
+    if multiple_depots_found
+        @debug("Some include() files from $cachefile are distributed over multiple depots", srcfiles, _group=:relocatable)
+    elseif !isnothing(srcdepot)
+        for inc in includes_srcfiles
+            inc.filename = restore_depot_path(inc.filename, srcdepot)
+        end
+    end
+
+    # unlike include() files, we allow each relocatable include_dependency() file to resolve
+    # to a separate depot, #52161
+    for inc in includes_depfiles
+        depot = resolve_depot(inc.filename)
+        if depot === :no_depot_found
+            @debug("Unable to resolve @depot tag for include_dependency() file $(inc.filename) from cache file $cachefile", _group=:relocatable)
+        elseif depot === :not_relocatable
+            @debug("include_dependency() file $(inc.filename) from $cachefile is not relocatable", _group=:relocatable)
+        else
+            inc.filename = restore_depot_path(inc.filename, depot)
+        end
+    end
+
+    return modules, (includes, includes_srcfiles, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
 end
 
-function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
+function parse_cache_header(cachefile::String)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        ret = parse_cache_header(io)
-        srcfiles_only || return ret
-        _, (includes, _), _, srctextpos, _... = ret
-        srcfiles = srctext_files(io, srctextpos)
-        delidx = Int[]
-        for (i, chi) in enumerate(includes)
-            chi.filename ∈ srcfiles || push!(delidx, i)
-        end
-        deleteat!(includes, delidx)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        ret = parse_cache_header(io, cachefile)
         return ret
     finally
         close(io)
     end
 end
 
-preferences_hash(f::IO) = parse_cache_header(f)[6]
+preferences_hash(f::IO, cachefile::AbstractString) = parse_cache_header(f, cachefile)[6]
 function preferences_hash(cachefile::String)
     io = open(cachefile, "r")
     try
         if iszero(isvalid_cache_header(io))
-            throw(ArgumentError("Invalid header in cache file $cachefile."))
+            throw(ArgumentError("Incompatible header in cache file $cachefile."))
         end
-        return preferences_hash(io)
+        return preferences_hash(io, cachefile)
     finally
         close(io)
     end
 end
 
-function cache_dependencies(f::IO)
-    _, (includes, _), modules, _... = parse_cache_header(f)
-    return modules, map(chi -> (chi.filename, chi.mtime), includes)  # return just filename and mtime
+function cache_dependencies(f::IO, cachefile::AbstractString)
+    _, (includes, _, _), modules, _... = parse_cache_header(f, cachefile)
+    return modules, map(chi -> chi.filename, includes)  # return just filename
 end
 
 function cache_dependencies(cachefile::String)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return cache_dependencies(io)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        return cache_dependencies(io, cachefile)
     finally
         close(io)
     end
 end
 
-function read_dependency_src(io::IO, filename::AbstractString)
-    srctextpos = parse_cache_header(io)[4]
+function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString)
+    _, (includes, _, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile)
     srctextpos == 0 && error("no source-text stored in cache file")
     seek(io, srctextpos)
-    return _read_dependency_src(io, filename)
+    return _read_dependency_src(io, filename, includes)
 end
 
-function _read_dependency_src(io::IO, filename::AbstractString)
+function _read_dependency_src(io::IO, filename::AbstractString, includes::Vector{CacheHeaderIncludes}=CacheHeaderIncludes[])
     while !eof(io)
         filenamelen = read(io, Int32)
         filenamelen == 0 && break
-        fn = String(read(io, filenamelen))
+        depotfn = String(read(io, filenamelen))
         len = read(io, UInt64)
+        fn = if !startswith(depotfn, string("@depot", Filesystem.pathsep()))
+            depotfn
+        else
+            basefn = restore_depot_path(depotfn, "")
+            idx = findfirst(includes) do inc
+                endswith(inc.filename, basefn)
+            end
+            isnothing(idx) ? depotfn : includes[idx].filename
+        end
         if fn == filename
             return String(read(io, len))
         end
@@ -2653,23 +3615,23 @@ end
 function read_dependency_src(cachefile::String, filename::AbstractString)
     io = open(cachefile, "r")
     try
-        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return read_dependency_src(io, filename)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Incompatible header in cache file $cachefile."))
+        return read_dependency_src(io, cachefile, filename)
     finally
         close(io)
     end
 end
 
-function srctext_files(f::IO, srctextpos::Int64)
+function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderIncludes})
     files = Set{String}()
     srctextpos == 0 && return files
     seek(f, srctextpos)
     while !eof(f)
         filenamelen = read(f, Int32)
         filenamelen == 0 && break
-        fn = String(read(f, filenamelen))
+        filename = String(read(f, filenamelen))
         len = read(f, UInt64)
-        push!(files, fn)
+        push!(files, filename)
         seek(f, position(f) + len)
     end
     return files
@@ -2788,9 +3750,27 @@ function recursive_prefs_merge(base::Dict{String, Any}, overrides::Dict{String,
     return new_base
 end
 
+function get_projects_workspace_to_root(project_file)
+    projects = String[project_file]
+    while true
+        project_file = base_project(project_file)
+        if project_file === nothing
+            return projects
+        end
+        push!(projects, project_file)
+    end
+end
+
 function get_preferences(uuid::Union{UUID,Nothing} = nothing)
     merged_prefs = Dict{String,Any}()
-    for env in reverse(load_path())
+    loadpath = load_path()
+    projects_to_merge_prefs = String[]
+    append!(projects_to_merge_prefs, Iterators.drop(loadpath, 1))
+    if length(loadpath) >= 1
+        prepend!(projects_to_merge_prefs, get_projects_workspace_to_root(first(loadpath)))
+    end
+
+    for env in reverse(projects_to_merge_prefs)
         project_toml = env_project_file(env)
         if !isa(project_toml, String)
             continue
@@ -2841,72 +3821,47 @@ get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uu
 get_compiletime_preferences(::Nothing) = String[]
 
 function check_clone_targets(clone_targets)
-    try
-        ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets)
-        return true
-    catch
-        return false
-    end
-end
-
-struct CacheFlags
-    # OOICCDDP - see jl_cache_flags
-    use_pkgimages::Bool
-    debug_level::Int
-    check_bounds::Int
-    inline::Bool
-    opt_level::Int
-
-    function CacheFlags(f::UInt8)
-        use_pkgimages = Bool(f & 1)
-        debug_level = Int((f >> 1) & 3)
-        check_bounds = Int((f >> 3) & 3)
-        inline = Bool((f >> 5) & 1)
-        opt_level = Int((f >> 6) & 3) # define OPT_LEVEL in statiddata_utils
-        new(use_pkgimages, debug_level, check_bounds, inline, opt_level)
+    rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets)
+    if rejection_reason !== nothing
+        return rejection_reason
     end
 end
-CacheFlags(f::Int) = CacheFlags(UInt8(f))
-CacheFlags() = CacheFlags(ccall(:jl_cache_flags, UInt8, ()))
-
-function show(io::IO, cf::CacheFlags)
-    print(io, "use_pkgimages = ", cf.use_pkgimages)
-    print(io, ", debug_level = ", cf.debug_level)
-    print(io, ", check_bounds = ", cf.check_bounds)
-    print(io, ", inline = ", cf.inline)
-    print(io, ", opt_level = ", cf.opt_level)
-end
 
 # Set by FileWatching.__init__()
-global mkpidlock_hook
-global trymkpidlock_hook
-global parse_pidfile_hook
+global mkpidlock_hook::Any
+global trymkpidlock_hook::Any
+global parse_pidfile_hook::Any
 
 # The preferences hash is only known after precompilation so just assume no preferences.
 # Also ignore the active project, which means that if all other conditions are equal,
 # the same package cannot be precompiled from different projects and/or different preferences at the same time.
-compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"
+compilecache_pidfile_path(pkg::PkgId; flags::CacheFlags=CacheFlags()) = compilecache_path(pkg, UInt64(0); project="", flags) * ".pidfile"
+
+const compilecache_pidlock_stale_age = 10
 
 # Allows processes to wait if another process is precompiling a given source already.
-# The lock file is deleted and precompilation will proceed after `stale_age` seconds if
-#  - the locking process no longer exists
-#  - the lock is held by another host, since processes cannot be checked remotely
-# or after `stale_age * 25` seconds if the process does still exist.
-function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
+# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
+# variance of 10 seconds or more being infrequent but not unusual.
+# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
+# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
+# seconds if the process does still exist.
+# If the lock is held by another host, it will conservatively wait `stale_age * 5`
+# seconds since processes cannot be checked remotely
+function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
     if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
         pidfile = compilecache_pidfile_path(pkg)
-        cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
+        cachefile = @invokelatest trymkpidlock_hook(f, pidfile; stale_age)
         if cachefile === false
-            pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
+            pid, hostname, age = @invokelatest parse_pidfile_hook(pidfile)
             verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
             if isempty(hostname) || hostname == gethostname()
-                @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg"
+                @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $(repr("text/plain", pkg)). Pidfile: $pidfile"
             else
-                @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg"
+                @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $(repr("text/plain", pkg)). Pidfile: $pidfile"
             end
             # wait until the lock is available, but don't actually acquire it
             # returning nothing indicates a process waited for another
-            return invokelatest(mkpidlock_hook, Returns(nothing), pidfile; stale_age)
+            return @invokelatest mkpidlock_hook(Returns(nothing), pidfile; stale_age)
         end
         return cachefile
     else
@@ -2915,29 +3870,100 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300)
     end
 end
 
+function record_reason(reasons::Dict{String,Int}, reason::String)
+    reasons[reason] = get(reasons, reason, 0) + 1
+end
+record_reason(::Nothing, ::String) = nothing
+function list_reasons(reasons::Dict{String,Int})
+    isempty(reasons) && return ""
+    return "(cache misses: $(join(("$k ($v)" for (k,v) in reasons), ", ")))"
+end
+list_reasons(::Nothing) = ""
+
+function any_includes_stale(includes::Vector{CacheHeaderIncludes}, cachefile::String, reasons::Union{Dict{String,Int},Nothing}=nothing)
+    for chi in includes
+        f, fsize_req, hash_req, ftime_req = chi.filename, chi.fsize, chi.hash, chi.mtime
+        if startswith(f, string("@depot", Filesystem.pathsep()))
+            @debug("Rejecting stale cache file $cachefile because its depot could not be resolved")
+            record_reason(reasons, "nonresolveable depot")
+            return true
+        end
+        if !ispath(f)
+            _f = fixup_stdlib_path(f)
+            if _f != f && isfile(_f) && startswith(_f, Sys.STDLIB)
+                continue
+            end
+            @debug "Rejecting stale cache file $cachefile because file $f does not exist"
+            record_reason(reasons, "missing sourcefile")
+            return true
+        end
+        if ftime_req >= 0.0
+            # this is an include_dependency for which we only recorded the mtime
+            ftime = mtime(f)
+            is_stale = ( ftime != ftime_req ) &&
+                       ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
+                       ( ftime != ceil(ftime_req) ) &&            # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching
+                       ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
+                       ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
+                       !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
+            if is_stale
+                @debug "Rejecting stale cache file $cachefile because mtime of include_dependency $f has changed (mtime $ftime, before $ftime_req)"
+                record_reason(reasons, "include_dependency mtime change")
+                return true
+            end
+        else
+            fstat = stat(f)
+            fsize = filesize(fstat)
+            if fsize != fsize_req
+                @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)"
+                record_reason(reasons, "include_dependency fsize change")
+                return true
+            end
+            hash = isdir(fstat) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
+            if hash != hash_req
+                @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)"
+                record_reason(reasons, "include_dependency fhash change")
+                return true
+            end
+        end
+    end
+    return false
+end
+
 # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
 # otherwise returns the list of dependencies to also check
-@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
-    return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
-end
-@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
-    io = open(cachefile, "r")
+@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false, requested_flags::CacheFlags=CacheFlags(), reasons=nothing)
+    return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded, requested_flags, reasons)
+end
+@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String;
+                                          ignore_loaded::Bool=false, requested_flags::CacheFlags=CacheFlags(),
+                                          reasons::Union{Dict{String,Int},Nothing}=nothing, stalecheck::Bool=true)
+    # n.b.: this function does nearly all of the file validation, not just those checks related to stale, so the name is potentially unclear
+    io = try
+        open(cachefile, "r")
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @debug "Rejecting cache file $cachefile for $modkey because it could not be opened" isfile(cachefile)
+        return true
+    end
     try
         checksum = isvalid_cache_header(io)
         if iszero(checksum)
-            @debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
-            return true # invalid cache file
+            @debug "Rejecting cache file $cachefile due to it containing an incompatible cache header"
+            record_reason(reasons, "incompatible header")
+            return true # incompatible cache file
         end
-        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io)
+        modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, actual_flags = parse_cache_header(io, cachefile)
         if isempty(modules)
             return true # ignore empty file
         end
-        if ccall(:jl_match_cache_flags, UInt8, (UInt8,), flags) == 0
+        if @ccall(jl_match_cache_flags(_cacheflag_to_uint8(requested_flags)::UInt8, actual_flags::UInt8)::UInt8) == 0
             @debug """
             Rejecting cache file $cachefile for $modkey since the flags are mismatched
-              current session: $(CacheFlags())
-              cache file:      $(CacheFlags(flags))
+              requested flags: $(requested_flags) [$(_cacheflag_to_uint8(requested_flags))]
+              cache file:      $(CacheFlags(actual_flags)) [$actual_flags]
             """
+            record_reason(reasons, "mismatched flags")
             return true
         end
         pkgimage = !isempty(clone_targets)
@@ -2946,14 +3972,21 @@ end
             if JLOptions().use_pkgimages == 0
                 # presence of clone_targets means native code cache
                 @debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage"
+                record_reason(reasons, "requires pkgimages")
                 return true
             end
-            if !check_clone_targets(clone_targets)
-                @debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target"
+            rejection_reasons = check_clone_targets(clone_targets)
+            if !isnothing(rejection_reasons)
+                @debug("Rejecting cache file $cachefile for $modkey:",
+                    Reasons=rejection_reasons,
+                    var"Image Targets"=parse_image_targets(clone_targets),
+                    var"Current Targets"=current_image_targets())
+                record_reason(reasons, "target mismatch")
                 return true
             end
             if !isfile(ocachefile)
                 @debug "Rejecting cache file $cachefile for $modkey since pkgimage $ocachefile was not found"
+                record_reason(reasons, "missing ocachefile")
                 return true
             end
         else
@@ -2962,12 +3995,15 @@ end
         id = first(modules)
         if id.first != modkey && modkey != PkgId("")
             @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
+            record_reason(reasons, "for different pkgid")
             return true
         end
+        id_build = id.second
+        id_build = (UInt128(checksum) << 64) | (id_build % UInt64)
         if build_id != UInt128(0)
-            id_build = (UInt128(checksum) << 64) | id.second
             if id_build != build_id
-                @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))"
+                @debug "Ignoring cache file $cachefile for $modkey ($(UUID(id_build))) since it does not provide desired build_id ($((UUID(build_id))))"
+                record_reason(reasons, "for different buildid")
                 return true
             end
         end
@@ -2979,93 +4015,96 @@ end
         depmods = Vector{Any}(undef, ndeps)
         for i in 1:ndeps
             req_key, req_build_id = required_modules[i]
-            # Module is already loaded
-            if root_module_exists(req_key)
-                M = root_module(req_key)
+            # Check if module is already loaded
+            M = stalecheck ? nothing : maybe_loaded_precompile(req_key, req_build_id)
+            if M !== nothing
+                @assert PkgId(M) == req_key && module_build_id(M) === req_build_id
+                depmods[i] = M
+                continue
+            end
+            M = maybe_root_module(req_key)
+            if M isa Module
                 if PkgId(M) == req_key && module_build_id(M) === req_build_id
                     depmods[i] = M
-                elseif ignore_loaded
+                    continue
+                elseif M == Core
+                    @debug "Rejecting cache file $cachefile because it was made with a different julia version"
+                    record_reason(reasons, "wrong julia version")
+                    return true # Won't be able to fulfill dependency
+                elseif ignore_loaded || !stalecheck
                     # Used by Pkg.precompile given that there it's ok to precompile different versions of loaded packages
-                    @goto locate_branch
                 else
                     @debug "Rejecting cache file $cachefile because module $req_key is already loaded and incompatible."
+                    record_reason(reasons, "wrong dep version loaded")
                     return true # Won't be able to fulfill dependency
                 end
-            else
-                @label locate_branch
-                path = locate_package(req_key)
-                if path === nothing
-                    @debug "Rejecting cache file $cachefile because dependency $req_key not found."
-                    return true # Won't be able to fulfill dependency
-                end
-                depmods[i] = (path, req_key, req_build_id)
             end
+            path = locate_package(req_key) # TODO: add env and/or skip this when stalecheck is false
+            if path === nothing
+                @debug "Rejecting cache file $cachefile because dependency $req_key not found."
+                record_reason(reasons, "dep missing source")
+                return true # Won't be able to fulfill dependency
+            end
+            depmods[i] = (path, req_key, req_build_id)
         end
 
         # check if this file is going to provide one of our concrete dependencies
         # or if it provides a version that conflicts with our concrete dependencies
         # or neither
-        skip_timecheck = false
-        for (req_key, req_build_id) in _concrete_dependencies
-            build_id = get(modules, req_key, UInt64(0))
-            if build_id !== UInt64(0)
-                build_id |= UInt128(checksum) << 64
-                if build_id === req_build_id
-                    skip_timecheck = true
-                    break
+        if stalecheck
+            for (req_key, req_build_id) in _concrete_dependencies
+                build_id = get(modules, req_key, UInt64(0))
+                if build_id !== UInt64(0)
+                    build_id |= UInt128(checksum) << 64
+                    if build_id === req_build_id
+                        stalecheck = false
+                        break
+                    end
+                    @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
+                    record_reason(reasons, "wrong dep buildid")
+                    return true # cachefile doesn't provide the required version of the dependency
                 end
-                @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
-                return true # cachefile doesn't provide the required version of the dependency
             end
         end
 
-        # now check if this file is fresh relative to its source files
-        if !skip_timecheck
-            if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath)
-                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
-                return true # cache file was compiled from a different path
+        # now check if this file's content hash has changed relative to its source files
+        if stalecheck
+            if !samefile(includes[1].filename, modpath)
+                # In certain cases the path rewritten by `fixup_stdlib_path` may
+                # point to an unreadable directory, make sure we can `stat` the
+                # file before comparing it with `modpath`.
+                stdlib_path = fixup_stdlib_path(includes[1].filename)
+                if !(isreadable(stdlib_path) && samefile(stdlib_path, modpath))
+                    !samefile(fixup_stdlib_path(includes[1].filename), modpath)
+                    @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
+                    record_reason(reasons, "wrong source")
+                    return true # cache file was compiled from a different path
+                end
             end
             for (modkey, req_modkey) in requires
                 # verify that `require(modkey, name(req_modkey))` ==> `req_modkey`
-                if identify_package(modkey, req_modkey.name) != req_modkey
-                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed"
+                pkg = identify_package(modkey, req_modkey.name)
+                if pkg != req_modkey
+                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed, expected $modkey => $(repr("text/plain", pkg))"
+                    record_reason(reasons, "dep uuid changed")
                     return true
                 end
             end
-            for chi in includes
-                f, ftime_req = chi.filename, chi.mtime
-                if !ispath(f)
-                    _f = fixup_stdlib_path(f)
-                    if isfile(_f) && startswith(_f, Sys.STDLIB)
-                        # mtime is changed by extraction
-                        @debug "Skipping mtime check for file $f used by $cachefile, since it is a stdlib"
-                        continue
-                    end
-                    @debug "Rejecting stale cache file $cachefile because file $f does not exist"
-                    return true
-                end
-                ftime = mtime(f)
-                is_stale = ( ftime != ftime_req ) &&
-                           ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
-                           ( ftime != ceil(ftime_req) ) &&            # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching
-                           ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
-                           ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
-                           !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
-                if is_stale
-                    @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed"
-                    return true
-                end
+            if any_includes_stale(includes, cachefile, reasons)
+                return true
             end
         end
 
         if !isvalid_file_crc(io)
             @debug "Rejecting cache file $cachefile because it has an invalid checksum"
+            record_reason(reasons, "invalid checksum")
             return true
         end
 
         if pkgimage
             if !isvalid_pkgimage_crc(io, ocachefile::String)
                 @debug "Rejecting cache file $cachefile because $ocachefile has an invalid checksum"
+                record_reason(reasons, "ocachefile invalid checksum")
                 return true
             end
         end
@@ -3073,10 +4112,11 @@ end
         curr_prefs_hash = get_preferences_hash(id.uuid, prefs)
         if prefs_hash != curr_prefs_hash
             @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))"
+            record_reason(reasons, "preferences hash mismatch")
             return true
         end
 
-        return depmods, ocachefile # fresh cachefile
+        return depmods, ocachefile, id_build # fresh cachefile
     finally
         close(io)
     end
@@ -3098,9 +4138,32 @@ end
 """
     @__DIR__ -> String
 
-Expand to a string with the absolute path to the directory of the file
-containing the macrocall.
-Return the current working directory if run from a REPL or if evaluated by `julia -e <expr>`.
+Macro to obtain the absolute path of the current directory as a string.
+
+If in a script, returns the directory of the script containing the `@__DIR__` macrocall. If run from a
+REPL or if evaluated by `julia -e <expr>`, returns the current working directory.
+
+# Examples
+
+The example illustrates the difference in the behaviors of `@__DIR__` and `pwd()`, by creating
+a simple script in a different directory than the current working one and executing both commands:
+
+```julia-repl
+julia> cd("/home/JuliaUser") # working directory
+
+julia> # create script at /home/JuliaUser/Projects
+       open("/home/JuliaUser/Projects/test.jl","w") do io
+           print(io, \"\"\"
+               println("@__DIR__ = ", @__DIR__)
+               println("pwd() = ", pwd())
+           \"\"\")
+       end
+
+julia> # outputs script directory and current working directory
+       include("/home/JuliaUser/Projects/test.jl")
+@__DIR__ = /home/JuliaUser/Projects
+pwd() = /home/JuliaUser
+```
 """
 macro __DIR__()
     __source__.file === nothing && return nothing
@@ -3108,6 +4171,17 @@ macro __DIR__()
     return isempty(_dirname) ? pwd() : abspath(_dirname)
 end
 
+function prepare_compiler_stub_image!()
+    ccall(:jl_add_to_module_init_list, Cvoid, (Any,), Compiler)
+    register_root_module(Compiler)
+    filter!(mod->mod !== Compiler, loaded_modules_order)
+end
+
+function expand_compiler_path(tup)
+    (tup[1], joinpath(Sys.BINDIR, DATAROOTDIR, tup[2]), tup[3:end]...)
+end
+compiler_chi(tup::Tuple) = CacheHeaderIncludes(expand_compiler_path(tup))
+
 """
     precompile(f, argtypes::Tuple{Vararg{Any}})
 
@@ -3127,8 +4201,8 @@ function precompile(@nospecialize(argt::Type))
 end
 
 # Variants that work for `invoke`d calls for which the signature may not be sufficient
-precompile(mi::Core.MethodInstance, world::UInt=get_world_counter()) =
-    (ccall(:jl_compile_method_instance, Cvoid, (Any, Any, UInt), mi, C_NULL, world); return true)
+precompile(mi::MethodInstance, world::UInt=get_world_counter()) =
+    (ccall(:jl_compile_method_instance, Cvoid, (Any, Ptr{Cvoid}, UInt), mi, C_NULL, world); return true)
 
 """
     precompile(f, argtypes::Tuple{Vararg{Any}}, m::Method)
@@ -3143,11 +4217,11 @@ end
 
 function precompile(@nospecialize(argt::Type), m::Method)
     atype, sparams = ccall(:jl_type_intersection_with_env, Any, (Any, Any), argt, m.sig)::SimpleVector
-    mi = Core.Compiler.specialize_method(m, atype, sparams)
+    mi = Base.Compiler.specialize_method(m, atype, sparams)
     return precompile(mi)
 end
 
-precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
-precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String))
-precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), IO, IO))
-precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), IO, IO))
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) || @assert false
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) || @assert false
+precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false
+precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), Cmd, CacheFlags, IO, IO)) || @assert false
diff --git a/base/lock.jl b/base/lock.jl
index 1663a765111bb..59e554c01c24a 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -2,6 +2,21 @@
 
 const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
 
+"""
+    current_task()
+
+Get the currently running [`Task`](@ref).
+"""
+current_task() = ccall(:jl_get_current_task, Ref{Task}, ())
+
+# This bit is set in the `havelock` of a `ReentrantLock` when that lock is locked by some task.
+const LOCKED_BIT = 0b01
+# This bit is set in the `havelock` of a `ReentrantLock` just before parking a task. A task is being
+# parked if it wants to lock the lock, but it is currently being held by some other task.
+const PARKED_BIT = 0b10
+
+const MAX_SPIN_ITERS = 40
+
 # Advisory reentrant lock
 """
     ReentrantLock()
@@ -10,8 +25,8 @@ Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
 acquire the lock as many times as required (this is what the "Reentrant" part
 of the name means). Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
 
-Calling 'lock' will also inhibit running of finalizers on that thread until the
-corresponding 'unlock'. Use of the standard lock pattern illustrated below
+Calling `lock` will also inhibit running of finalizers on that thread until the
+corresponding `unlock`. Use of the standard lock pattern illustrated below
 should naturally be supported, but beware of inverting the try/lock order or
 missing the try block entirely (e.g. attempting to return with the lock still
 held):
@@ -36,7 +51,28 @@ mutable struct ReentrantLock <: AbstractLock
     # offset32 = 20, offset64 = 24
     reentrancy_cnt::UInt32
     # offset32 = 24, offset64 = 28
-    @atomic havelock::UInt8 # 0x0 = none, 0x1 = lock, 0x2 = conflict
+    #
+    # This atomic integer holds the current state of the lock instance. Only the two lowest bits
+    # are used. See `LOCKED_BIT` and `PARKED_BIT` for the bitmask for these bits.
+    #
+    # # State table:
+    #
+    # PARKED_BIT | LOCKED_BIT | Description
+    #     0      |     0      | The lock is not locked, nor is anyone waiting for it.
+    # -----------+------------+------------------------------------------------------------------
+    #     0      |     1      | The lock is locked by exactly one task. No other task is
+    #            |            | waiting for it.
+    # -----------+------------+------------------------------------------------------------------
+    #     1      |     0      | The lock is not locked. One or more tasks are parked.
+    # -----------+------------+------------------------------------------------------------------
+    #     1      |     1      | The lock is locked by exactly one task. One or more tasks are
+    #            |            | parked waiting for the lock to become available.
+    #            |            | In this state, PARKED_BIT is only ever cleared when the cond_wait lock
+    #            |            | is held (i.e. on unlock). This ensures that
+    #            |            | we never end up in a situation where there are parked tasks but
+    #            |            | PARKED_BIT is not set (which would result in those tasks
+    #            |            | potentially never getting woken up).
+    @atomic havelock::UInt8
     # offset32 = 28, offset64 = 32
     cond_wait::ThreadSynchronizer # 2 words
     # offset32 = 36, offset64 = 48
@@ -51,6 +87,20 @@ end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
 
+show(io::IO, ::ReentrantLock) = print(io, ReentrantLock, "()")
+
+function show(io::IO, ::MIME"text/plain", l::ReentrantLock)
+    show(io, l)
+    if !(get(io, :compact, false)::Bool)
+        locked_by = l.locked_by
+        if locked_by isa Task
+            print(io, " (locked by ", locked_by === current_task() ? "current " : "", locked_by, ")")
+        else
+            print(io, " (unlocked)")
+        end
+    end
+end
+
 """
     islocked(lock) -> Status (Boolean)
 
@@ -91,7 +141,7 @@ function islocked end
 # `ReentrantLock`.
 
 function islocked(rl::ReentrantLock)
-    return (@atomic :monotonic rl.havelock) != 0
+    return (@atomic :monotonic rl.havelock) & LOCKED_BIT != 0
 end
 
 """
@@ -115,7 +165,6 @@ function trylock end
 @inline function trylock(rl::ReentrantLock)
     ct = current_task()
     if rl.locked_by === ct
-        #@assert rl.havelock !== 0x00
         rl.reentrancy_cnt += 0x0000_0001
         return true
     end
@@ -123,9 +172,8 @@ function trylock end
 end
 @noinline function _trylock(rl::ReentrantLock, ct::Task)
     GC.disable_finalizers()
-    if (@atomicreplace :acquire rl.havelock 0x00 => 0x01).success
-        #@assert rl.locked_by === nothing
-        #@assert rl.reentrancy_cnt === 0
+    state = (@atomic :monotonic rl.havelock) & PARKED_BIT
+    if (@atomicreplace :acquire rl.havelock state => (state | LOCKED_BIT)).success
         rl.reentrancy_cnt = 0x0000_0001
         @atomic :release rl.locked_by = ct
         return true
@@ -145,24 +193,71 @@ Each `lock` must be matched by an [`unlock`](@ref).
 """
 @inline function lock(rl::ReentrantLock)
     trylock(rl) || (@noinline function slowlock(rl::ReentrantLock)
+        Threads.lock_profiling() && Threads.inc_lock_conflict_count()
         c = rl.cond_wait
-        lock(c.lock)
-        try
-            while true
-                if (@atomicreplace rl.havelock 0x01 => 0x02).old == 0x00 # :sequentially_consistent ? # now either 0x00 or 0x02
-                    # it was unlocked, so try to lock it ourself
-                    _trylock(rl, current_task()) && break
-                else # it was locked, so now wait for the release to notify us
-                    wait(c)
+        ct = current_task()
+        iteration = 1
+        while true
+            state = @atomic :monotonic rl.havelock
+            # Grab the lock if it isn't locked, even if there is a queue on it
+            if state & LOCKED_BIT == 0
+                GC.disable_finalizers()
+                result = (@atomicreplace :acquire :monotonic rl.havelock state => (state | LOCKED_BIT))
+                if result.success
+                    rl.reentrancy_cnt = 0x0000_0001
+                    @atomic :release rl.locked_by = ct
+                    return
                 end
+                GC.enable_finalizers()
+                continue
             end
-        finally
-            unlock(c.lock)
+
+            if state & PARKED_BIT == 0
+                # If there is no queue, try spinning a few times
+                if iteration <= MAX_SPIN_ITERS
+                    Base.yield()
+                    iteration += 1
+                    continue
+                end
+
+                # If still not locked, try setting the parked bit
+                @atomicreplace :monotonic :monotonic rl.havelock state => (state | PARKED_BIT)
+            end
+
+            # lock the `cond_wait`
+            lock(c.lock)
+
+            # Last check before we wait to make sure `unlock` did not win the race
+            # to the `cond_wait` lock and cleared the parked bit
+            state = @atomic :acquire rl.havelock
+            if state != LOCKED_BIT | PARKED_BIT
+                unlock(c.lock)
+                continue
+            end
+
+            # It was locked, so now wait for the unlock to notify us
+            wait_no_relock(c)
+
+            # Loop back and try locking again
+            iteration = 1
         end
     end)(rl)
     return
 end
 
+function wait_no_relock(c::GenericCondition)
+    ct = current_task()
+    _wait2(c, ct)
+    token = unlockall(c.lock)
+    try
+        return wait()
+    catch
+        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        rethrow()
+    end
+end
+
+
 """
     unlock(lock)
 
@@ -179,18 +274,27 @@ internal counter and return immediately.
         rl.reentrancy_cnt = n
         if n == 0x0000_00000
             @atomic :monotonic rl.locked_by = nothing
-            if (@atomicswap :release rl.havelock = 0x00) == 0x02
+            result = (@atomicreplace :release :monotonic rl.havelock LOCKED_BIT => 0x00)
+            if result.success
+                return true
+            else
                 (@noinline function notifywaiters(rl)
                     cond_wait = rl.cond_wait
                     lock(cond_wait)
-                    try
-                        notify(cond_wait)
-                    finally
-                        unlock(cond_wait)
+
+                    notify(cond_wait, all=false)
+                    if !isempty(cond_wait.waitq)
+                        @atomic :release rl.havelock = PARKED_BIT
+                    else
+                        # We may have won the race to the `cond_wait` lock as a task was about to park
+                        # but we unlock anyway as any parking task will retry
+                        @atomic :release rl.havelock = 0x00
                     end
+
+                    unlock(cond_wait)
                 end)(rl)
+                return true
             end
-            return true
         end
         return false
     end)(rl) && GC.enable_finalizers()
@@ -220,6 +324,8 @@ available.
 When this function returns, the `lock` has been released, so the caller should
 not attempt to `unlock` it.
 
+See also: [`@lock`](@ref).
+
 !!! compat "Julia 1.7"
     Using a [`Channel`](@ref) as the second argument requires Julia 1.7 or later.
 """
@@ -258,6 +364,9 @@ end
 ```
 This is similar to using [`lock`](@ref) with a `do` block, but avoids creating a closure
 and thus can improve the performance.
+
+!!! compat
+    `@lock` was added in Julia 1.3, and exported in Julia 1.10.
 """
 macro lock(l, expr)
     quote
@@ -288,6 +397,63 @@ macro lock_nofail(l, expr)
     end
 end
 
+"""
+  Lockable(value, lock = ReentrantLock())
+
+Creates a `Lockable` object that wraps `value` and
+associates it with the provided `lock`. This object
+supports [`@lock`](@ref), [`lock`](@ref), [`trylock`](@ref),
+[`unlock`](@ref). To access the value, index the lockable object while
+holding the lock.
+
+!!! compat "Julia 1.11"
+    Requires at least Julia 1.11.
+
+## Example
+
+```jldoctest
+julia> locked_list = Base.Lockable(Int[]);
+
+julia> @lock(locked_list, push!(locked_list[], 1)) # must hold the lock to access the value
+1-element Vector{Int64}:
+ 1
+
+julia> lock(summary, locked_list)
+"1-element Vector{Int64}"
+```
+"""
+struct Lockable{T, L <: AbstractLock}
+    value::T
+    lock::L
+end
+
+Lockable(value) = Lockable(value, ReentrantLock())
+getindex(l::Lockable) = (assert_havelock(l.lock); l.value)
+
+"""
+  lock(f::Function, l::Lockable)
+
+Acquire the lock associated with `l`, execute `f` with the lock held,
+and release the lock when `f` returns. `f` will receive one positional
+argument: the value wrapped by `l`. If the lock is already locked by a
+different task/thread, wait for it to become available.
+When this function returns, the `lock` has been released, so the caller should
+not attempt to `unlock` it.
+
+!!! compat "Julia 1.11"
+    Requires at least Julia 1.11.
+"""
+function lock(f, l::Lockable)
+    lock(l.lock) do
+        f(l.value)
+    end
+end
+
+# implement the rest of the Lock interface on Lockable
+lock(l::Lockable) = lock(l.lock)
+trylock(l::Lockable) = trylock(l.lock)
+unlock(l::Lockable) = unlock(l.lock)
+
 @eval Threads begin
     """
         Threads.Condition([lock])
@@ -435,8 +601,8 @@ This provides an acquire & release memory ordering on notify/wait.
     The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
-    notify::Threads.Condition
-    autoreset::Bool
+    const notify::Threads.Condition
+    const autoreset::Bool
     @atomic set::Bool
     Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
 end
@@ -493,3 +659,278 @@ end
     import .Base: Event
     export Event
 end
+
+const PerStateInitial       = 0x00
+const PerStateHasrun        = 0x01
+const PerStateErrored       = 0x02
+const PerStateConcurrent    = 0x03
+
+"""
+    OncePerProcess{T}(init::Function)() -> T
+
+Calling a `OncePerProcess` object returns a value of type `T` by running the
+function `initializer` exactly once per process. All concurrent and future
+calls in the same process will return exactly the same value. This is useful in
+code that will be precompiled, as it allows setting up caches or other state
+which won't get serialized.
+
+## Example
+
+```jldoctest
+julia> const global_state = Base.OncePerProcess{Vector{UInt32}}() do
+           println("Making lazy global value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (procstate = global_state()) |> typeof
+Making lazy global value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> procstate === global_state()
+true
+
+julia> procstate === fetch(@async global_state())
+true
+```
+"""
+mutable struct OncePerProcess{T, F}
+    value::Union{Nothing,T}
+    @atomic state::UInt8 # 0=initial, 1=hasrun, 2=error
+    @atomic allow_compile_time::Bool
+    const initializer::F
+    const lock::ReentrantLock
+
+    function OncePerProcess{T,F}(initializer::F) where {T, F}
+        once = new{T,F}(nothing, PerStateInitial, true, initializer, ReentrantLock())
+        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+            once, :value, nothing)
+        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+            once, :state, PerStateInitial)
+        return once
+    end
+end
+OncePerProcess{T}(initializer::F) where {T, F} = OncePerProcess{T, F}(initializer)
+OncePerProcess(initializer) = OncePerProcess{Base.promote_op(initializer), typeof(initializer)}(initializer)
+@inline function (once::OncePerProcess{T})() where T
+    state = (@atomic :acquire once.state)
+    if state != PerStateHasrun
+        (@noinline function init_perprocesss(once, state)
+            state == PerStateErrored && error("OncePerProcess initializer failed previously")
+            once.allow_compile_time || __precompile__(false)
+            lock(once.lock)
+            try
+                state = @atomic :monotonic once.state
+                if state == PerStateInitial
+                    once.value = once.initializer()
+                elseif state == PerStateErrored
+                    error("OncePerProcess initializer failed previously")
+                elseif state != PerStateHasrun
+                    error("invalid state for OncePerProcess")
+                end
+            catch
+                state == PerStateErrored || @atomic :release once.state = PerStateErrored
+                unlock(once.lock)
+                rethrow()
+            end
+            state == PerStateHasrun || @atomic :release once.state = PerStateHasrun
+            unlock(once.lock)
+            nothing
+        end)(once, state)
+    end
+    return once.value::T
+end
+
+function copyto_monotonic!(dest::AtomicMemory, src)
+    i = 1
+    for j in eachindex(src)
+        if isassigned(src, j)
+            @atomic :monotonic dest[i] = src[j]
+        #else
+        #    _unsetindex_atomic!(dest, i, src[j], :monotonic)
+        end
+        i += 1
+    end
+    dest
+end
+
+function fill_monotonic!(dest::AtomicMemory, x)
+    for i = 1:length(dest)
+        @atomic :monotonic dest[i] = x
+    end
+    dest
+end
+
+
+# share a lock/condition, since we just need it briefly, so some contention is okay
+const PerThreadLock = ThreadSynchronizer()
+"""
+    OncePerThread{T}(init::Function)() -> T
+
+Calling a `OncePerThread` object returns a value of type `T` by running the function
+`initializer` exactly once per thread. All future calls in the same thread, and
+concurrent or future calls with the same thread id, will return exactly the
+same value. The object can also be indexed by the threadid for any existing
+thread, to get (or initialize *on this thread*) the value stored for that
+thread. Incorrect usage can lead to data-races or memory corruption so use only
+if that behavior is correct within your library's threading-safety design.
+
+!!! warning
+    It is not necessarily true that a Task only runs on one thread, therefore the value
+    returned here may alias other values or change in the middle of your program. This function
+    may get deprecated in the future. If initializer yields, the thread running the current
+    task after the call might not be the same as the one at the start of the call.
+
+See also: [`OncePerTask`](@ref).
+
+## Example
+
+```jldoctest
+julia> const thread_state = Base.OncePerThread{Vector{UInt32}}() do
+           println("Making lazy thread value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (threadvec = thread_state()) |> typeof
+Making lazy thread value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> threadvec === fetch(@async thread_state())
+true
+
+julia> threadvec === thread_state[Threads.threadid()]
+true
+```
+"""
+mutable struct OncePerThread{T, F}
+    @atomic xs::AtomicMemory{T} # values
+    @atomic ss::AtomicMemory{UInt8} # states: 0=initial, 1=hasrun, 2=error, 3==concurrent
+    const initializer::F
+
+    function OncePerThread{T,F}(initializer::F) where {T, F}
+        xs, ss = AtomicMemory{T}(), AtomicMemory{UInt8}()
+        once = new{T,F}(xs, ss, initializer)
+        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+            once, :xs, xs)
+        ccall(:jl_set_precompile_field_replace, Cvoid, (Any, Any, Any),
+            once, :ss, ss)
+        return once
+    end
+end
+OncePerThread{T}(initializer::F) where {T, F} = OncePerThread{T,F}(initializer)
+OncePerThread(initializer) = OncePerThread{Base.promote_op(initializer), typeof(initializer)}(initializer)
+@inline (once::OncePerThread)() = once[Threads.threadid()]
+@inline function getindex(once::OncePerThread, tid::Integer)
+    tid = Int(tid)
+    ss = @atomic :acquire once.ss
+    xs = @atomic :monotonic once.xs
+    # n.b. length(xs) >= length(ss)
+    if tid <= 0 || tid > length(ss) || (@atomic :acquire ss[tid]) != PerStateHasrun
+        (@noinline function init_perthread(once, tid)
+            local ss = @atomic :acquire once.ss
+            local xs = @atomic :monotonic once.xs
+            local len = length(ss)
+            # slow path to allocate it
+            nt = Threads.maxthreadid()
+            0 < tid <= nt || throw(ArgumentError("thread id outside of allocated range"))
+            if tid <= length(ss) && (@atomic :acquire ss[tid]) == PerStateErrored
+                error("OncePerThread initializer failed previously")
+            end
+            newxs = xs
+            newss = ss
+            if tid > len
+                # attempt to do all allocations outside of PerThreadLock for better scaling
+                @assert length(xs) >= length(ss) "logical constraint violation"
+                newxs = typeof(xs)(undef, len + nt)
+                newss = typeof(ss)(undef, len + nt)
+            end
+            # uses state and locks to ensure this runs exactly once per tid argument
+            lock(PerThreadLock)
+            try
+                ss = @atomic :monotonic once.ss
+                xs = @atomic :monotonic once.xs
+                if tid > length(ss)
+                    @assert len <= length(ss) <= length(newss) "logical constraint violation"
+                    fill_monotonic!(newss, PerStateInitial)
+                    xs = copyto_monotonic!(newxs, xs)
+                    ss = copyto_monotonic!(newss, ss)
+                    @atomic :release once.xs = xs
+                    @atomic :release once.ss = ss
+                end
+                state = @atomic :monotonic ss[tid]
+                while state == PerStateConcurrent
+                    # lost race, wait for notification this is done running elsewhere
+                    wait(PerThreadLock) # wait for initializer to finish without releasing this thread
+                    ss = @atomic :monotonic once.ss
+                    state = @atomic :monotonic ss[tid]
+                end
+                if state == PerStateInitial
+                    # won the race, drop lock in exchange for state, and run user initializer
+                    @atomic :monotonic ss[tid] = PerStateConcurrent
+                    result = try
+                        unlock(PerThreadLock)
+                        once.initializer()
+                    catch
+                        lock(PerThreadLock)
+                        ss = @atomic :monotonic once.ss
+                        @atomic :release ss[tid] = PerStateErrored
+                        notify(PerThreadLock)
+                        rethrow()
+                    end
+                    # store result and notify waiters
+                    lock(PerThreadLock)
+                    xs = @atomic :monotonic once.xs
+                    @atomic :release xs[tid] = result
+                    ss = @atomic :monotonic once.ss
+                    @atomic :release ss[tid] = PerStateHasrun
+                    notify(PerThreadLock)
+                elseif state == PerStateErrored
+                    error("OncePerThread initializer failed previously")
+                elseif state != PerStateHasrun
+                    error("invalid state for OncePerThread")
+                end
+            finally
+                unlock(PerThreadLock)
+            end
+            nothing
+        end)(once, tid)
+        xs = @atomic :monotonic once.xs
+    end
+    return xs[tid]
+end
+
+"""
+    OncePerTask{T}(init::Function)() -> T
+
+Calling a `OncePerTask` object returns a value of type `T` by running the function `initializer`
+exactly once per Task. All future calls in the same Task will return exactly the same value.
+
+See also: [`task_local_storage`](@ref).
+
+## Example
+
+```jldoctest
+julia> const task_state = Base.OncePerTask{Vector{UInt32}}() do
+           println("Making lazy task value...done.")
+           return [Libc.rand()]
+       end;
+
+julia> (taskvec = task_state()) |> typeof
+Making lazy task value...done.
+Vector{UInt32} (alias for Array{UInt32, 1})
+
+julia> taskvec === task_state()
+true
+
+julia> taskvec === fetch(@async task_state())
+Making lazy task value...done.
+false
+```
+"""
+mutable struct OncePerTask{T, F}
+    const initializer::F
+
+    OncePerTask{T}(initializer::F) where {T, F} = new{T,F}(initializer)
+    OncePerTask{T,F}(initializer::F) where {T, F} = new{T,F}(initializer)
+    OncePerTask(initializer) = new{Base.promote_op(initializer), typeof(initializer)}(initializer)
+end
+@inline (once::OncePerTask)() = get!(once.initializer, task_local_storage(), once)
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/base/logging/ConsoleLogger.jl
similarity index 90%
rename from stdlib/Logging/src/ConsoleLogger.jl
rename to base/logging/ConsoleLogger.jl
index 747f8a2b22966..818b2272b773c 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/base/logging/ConsoleLogger.jl
@@ -115,13 +115,22 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     end
 
     # Generate a text representation of the message and all key value pairs,
-    # split into lines.
-    msglines = [(indent=0, msg=l) for l in split(chomp(convert(String, string(message))::String), '\n')]
+    # split into lines.  This is specialised to improve type inference,
+    # and reduce the risk of resulting method invalidations.
+    message = string(message)
+    msglines = if Base._isannotated(message) && !isempty(Base.annotations(message))
+        message = Base.AnnotatedString(String(message), Base.annotations(message))
+        @NamedTuple{indent::Int, msg::Union{SubString{Base.AnnotatedString{String}}, SubString{String}}}[
+            (indent=0, msg=l) for l in split(chomp(message), '\n')]
+    else
+        [(indent=0, msg=l) for l in split(
+             chomp(convert(String, message)::String), '\n')]
+    end
     stream::IO = logger.stream
     if !(isopen(stream)::Bool)
         stream = stderr
     end
-    dsize = displaysize(stream)::Tuple{Int,Int}
+    dsize = Base.displaysize_(stream)::Tuple{Int,Int}
     nkwargs = length(kwargs)::Int
     if nkwargs > hasmaxlog
         valbuf = IOBuffer()
diff --git a/base/logging.jl b/base/logging/logging.jl
similarity index 95%
rename from base/logging.jl
rename to base/logging/logging.jl
index c42af08d8f4ae..5cf3882a300ec 100644
--- a/base/logging.jl
+++ b/base/logging/logging.jl
@@ -3,6 +3,7 @@
 module CoreLogging
 
 import Base: isless, +, -, convert, show
+import Base.ScopedValues: ScopedValue, with, @with
 
 export
     AbstractLogger,
@@ -59,7 +60,7 @@ function min_enabled_level end
     catch_exceptions(logger)
 
 Return `true` if the logger should catch exceptions which happen during log
-record construction.  By default, messages are caught
+record construction.  By default, messages are caught.
 
 By default all exceptions are caught to prevent log message generation from
 crashing the program.  This lets users confidently toggle little-used
@@ -132,6 +133,11 @@ isless(a::LogLevel, b::LogLevel) = isless(a.level, b.level)
 -(level::LogLevel, inc::Integer) = LogLevel(level.level-inc)
 convert(::Type{LogLevel}, level::Integer) = LogLevel(level)
 
+"""
+    BelowMinLevel
+
+Alias for [`LogLevel(-1_000_001)`](@ref LogLevel).
+"""
 const BelowMinLevel = LogLevel(-1000001)
 """
     Debug
@@ -157,8 +163,16 @@ const Warn          = LogLevel(    1000)
 Alias for [`LogLevel(2000)`](@ref LogLevel).
 """
 const Error         = LogLevel(    2000)
+"""
+    AboveMaxLevel
+
+Alias for [`LogLevel(1_000_001)`](@ref LogLevel).
+"""
 const AboveMaxLevel = LogLevel( 1000001)
 
+# Global log limiting mechanism for super fast but inflexible global log limiting.
+const _min_enabled_level = Ref{LogLevel}(Debug)
+
 function show(io::IO, level::LogLevel)
     if     level == BelowMinLevel  print(io, "BelowMinLevel")
     elseif level == Debug          print(io, "Debug")
@@ -319,6 +333,15 @@ function issimplekw(@nospecialize val)
     return false
 end
 
+# helper function to get the current logger, if enabled for the specified message type
+@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
+    logstate = @inline current_logstate()
+    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
+        return logstate.logger
+    end
+    return nothing
+end
+
 # Generate code for logging macros
 function logmsg_code(_module, file, line, level, message, exs...)
     @nospecialize
@@ -335,12 +358,12 @@ function logmsg_code(_module, file, line, level, message, exs...)
         checkerrors = nothing
         for kwarg in reverse(log_data.kwargs)
             if isa(kwarg.args[2].args[1], Symbol)
-                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1])))
+                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1]), QuoteNode(:local)))
             end
         end
         if isa(message, Symbol)
             message = esc(message)
-            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1])))
+            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1]), QuoteNode(:local)))
         end
         logrecord = quote
             let err = $checkerrors
@@ -370,23 +393,23 @@ function logmsg_code(_module, file, line, level, message, exs...)
         let
             level = $level
             # simplify std_level code emitted, if we know it is one of our global constants
-            std_level = $(level isa Symbol ? :level : :(level isa LogLevel ? level : convert(LogLevel, level)::LogLevel))
-            if std_level >= _min_enabled_level[]
+            std_level = $(level isa Symbol ? :level : :(level isa $LogLevel ? level : convert($LogLevel, level)::$LogLevel))
+            if std_level >= $(_min_enabled_level)[]
                 group = $(log_data._group)
                 _module = $(log_data._module)
-                logger = current_logger_for_env(std_level, group, _module)
+                logger = $(current_logger_for_env)(std_level, group, _module)
                 if !(logger === nothing)
                     id = $(log_data._id)
                     # Second chance at an early bail-out (before computing the message),
                     # based on arbitrary logger-specific logic.
-                    if invokelatest(shouldlog, logger, level, _module, group, id)
+                    if invokelatest($shouldlog, logger, level, _module, group, id)
                         file = $(log_data._file)
                         if file isa String
                             file = Base.fixup_stdlib_path(file)
                         end
                         line = $(log_data._line)
                         local msg, kwargs
-                        $(logrecord) && invokelatest(handle_message,
+                        $(logrecord) && invokelatest($handle_message,
                             logger, level, msg, _module, group, id, file, line;
                             kwargs...)
                     end
@@ -481,9 +504,6 @@ function logmsg_shim(level, message, _module, group, id, file, line, kwargs)
     nothing
 end
 
-# Global log limiting mechanism for super fast but inflexible global log limiting.
-const _min_enabled_level = Ref{LogLevel}(Debug)
-
 # LogState - a cache of data extracted from the logger, plus the logger itself.
 struct LogState
     min_enabled_level::LogLevel
@@ -492,31 +512,14 @@ end
 
 LogState(logger) = LogState(LogLevel(_invoked_min_enabled_level(logger)), logger)
 
-function current_logstate()
-    logstate = current_task().logstate
-    return (logstate !== nothing ? logstate : _global_logstate)::LogState
-end
+const CURRENT_LOGSTATE = ScopedValue{LogState}()
 
-# helper function to get the current logger, if enabled for the specified message type
-@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
-    logstate = current_logstate()
-    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
-        return logstate.logger
-    end
-    return nothing
+function current_logstate()
+    maybe = @inline Base.ScopedValues.get(CURRENT_LOGSTATE)
+    return something(maybe, _global_logstate)::LogState
 end
 
-function with_logstate(f::Function, logstate)
-    @nospecialize
-    t = current_task()
-    old = t.logstate
-    try
-        t.logstate = logstate
-        f()
-    finally
-        t.logstate = old
-    end
-end
+with_logstate(f::Function, logstate) = @with(CURRENT_LOGSTATE => logstate, f())
 
 #-------------------------------------------------------------------------------
 # Control of the current logger and early log filtering
@@ -587,6 +590,8 @@ end
 end
 
 
+global _global_logstate::LogState
+
 """
     global_logger()
 
@@ -610,7 +615,7 @@ end
 
 Execute `function`, directing all log messages to `logger`.
 
-# Example
+# Examples
 
 ```julia
 function test(x)
@@ -695,4 +700,6 @@ end
 
 _global_logstate = LogState(SimpleLogger())
 
+include("logging/ConsoleLogger.jl")
+
 end # CoreLogging
diff --git a/base/math.jl b/base/math.jl
index 71bd4949498b5..650fc6bc0cef0 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -23,9 +23,9 @@ import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
 using .Base: sign_mask, exponent_mask, exponent_one,
             exponent_half, uinttype, significand_mask,
             significand_bits, exponent_bits, exponent_bias,
-            exponent_max, exponent_raw_max
+            exponent_max, exponent_raw_max, clamp, clamp!
 
-using Core.Intrinsics: sqrt_llvm
+using Core.Intrinsics: sqrt_llvm, min_float, max_float
 
 using .Base: IEEEFloat
 
@@ -69,98 +69,6 @@ end
     return Txy, T(xy-Txy)
 end
 
-"""
-    clamp(x, lo, hi)
-
-Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
-are promoted to a common type.
-
-See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` as the first argument requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
-3-element Vector{BigFloat}:
- 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
- 2.0
- 9.0
-
-julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
-3-element Vector{Int64}:
-  6
-  6
- 10
-```
-"""
-clamp(x::X, lo::L, hi::H) where {X,L,H} =
-    ifelse(x > hi, convert(promote_type(X,L,H), hi),
-           ifelse(x < lo,
-                  convert(promote_type(X,L,H), lo),
-                  convert(promote_type(X,L,H), x)))
-
-"""
-    clamp(x, T)::T
-
-Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
-
-See also [`trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> clamp(200, Int8)
-127
-
-julia> clamp(-200, Int8)
--128
-
-julia> trunc(Int, 4pi^2)
-39
-```
-"""
-clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
-
-
-"""
-    clamp!(array::AbstractArray, lo, hi)
-
-Restrict values in `array` to the specified range, in-place.
-See also [`clamp`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` entries in `array` require at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> row = collect(-4:4)';
-
-julia> clamp!(row, 0, Inf)
-1×9 adjoint(::Vector{Int64}) with eltype Int64:
- 0  0  0  0  0  1  2  3  4
-
-julia> clamp.((-4:4)', 0, Inf)
-1×9 Matrix{Float64}:
- 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
-```
-"""
-function clamp!(x::AbstractArray, lo, hi)
-    @inbounds for i in eachindex(x)
-        x[i] = clamp(x[i], lo, hi)
-    end
-    x
-end
-
-"""
-    clamp(x::Integer, r::AbstractUnitRange)
-
-Clamp `x` to lie within range `r`.
-
-!!! compat "Julia 1.6"
-     This method requires at least Julia 1.6.
-"""
-clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
 
 """
     evalpoly(x, p)
@@ -177,7 +85,7 @@ a Goertzel-like [^DK62] algorithm if `x` is complex.
 !!! compat "Julia 1.4"
     This function requires Julia 1.4 or later.
 
-# Example
+# Examples
 ```jldoctest
 julia> evalpoly(2, (1, 2, 3))
 17
@@ -304,14 +212,19 @@ end
 
 # polynomial evaluation using compensated summation.
 # much more accurate, especially when lo can be combined with other rounding errors
-Base.@assume_effects :terminates_locally @inline function exthorner(x, p::Tuple)
-    hi, lo = p[end], zero(x)
-    for i in length(p)-1:-1:1
-        pi = getfield(p, i) # needed to prove consistency
-        prod, err = two_mul(hi,x)
-        hi = pi+prod
-        lo = fma(lo, x, prod - (hi - pi) + err)
-    end
+@inline function exthorner(x::T, p::Tuple{T,T,T}) where T<:Union{Float32,Float64}
+    hi, lo = p[lastindex(p)], zero(x)
+    hi, lo = _exthorner(2, x, p, hi, lo)
+    hi, lo = _exthorner(1, x, p, hi, lo)
+    return hi, lo
+end
+
+@inline function _exthorner(i::Int, x::T, p::Tuple{T,T,T}, hi::T, lo::T) where T<:Union{Float32,Float64}
+    i == 2 || i == 1 || error("unexpected index")
+    pi = p[i]
+    prod, err = two_mul(hi,x)
+    hi = pi+prod
+    lo = fma(lo, x, prod - (hi - pi) + err)
     return hi, lo
 end
 
@@ -354,7 +267,7 @@ log(b::T, x::T) where {T<:Number} = log(x)/log(b)
 """
     log(b,x)
 
-Compute the base `b` logarithm of `x`. Throws [`DomainError`](@ref) for negative
+Compute the base `b` logarithm of `x`. Throw a [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
 # Examples
@@ -403,6 +316,8 @@ const libm = Base.libm_name
     sinh(x)
 
 Compute hyperbolic sine of `x`.
+
+See also [`sin`](@ref).
 """
 sinh(x::Number)
 
@@ -410,6 +325,8 @@ sinh(x::Number)
     cosh(x)
 
 Compute hyperbolic cosine of `x`.
+
+See also [`cos`](@ref).
 """
 cosh(x::Number)
 
@@ -448,7 +365,7 @@ tanh(x::Number)
 
 Compute the inverse tangent of `y` or `y/x`, respectively.
 
-For one argument, this is the angle in radians between the positive *x*-axis and the point
+For one real argument, this is the angle in radians between the positive *x*-axis and the point
 (1, *y*), returning a value in the interval ``[-\\pi/2, \\pi/2]``.
 
 For two arguments, this is the angle in radians between the positive *x*-axis and the
@@ -488,10 +405,12 @@ asinh(x::Number)
 
 # functions that return NaN on non-NaN argument for domain error
 """
-    sin(x)
+    sin(x::T) where {T <: Number} -> float(T)
 
 Compute sine of `x`, where `x` is in radians.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`sind`](@ref), [`sinpi`](@ref), [`sincos`](@ref), [`cis`](@ref), [`asin`](@ref).
 
 # Examples
@@ -519,26 +438,34 @@ julia> round(exp(im*pi/6), digits=3)
 sin(x::Number)
 
 """
-    cos(x)
+    cos(x::T) where {T <: Number} -> float(T)
 
 Compute cosine of `x`, where `x` is in radians.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`cosd`](@ref), [`cospi`](@ref), [`sincos`](@ref), [`cis`](@ref).
 """
 cos(x::Number)
 
 """
-    tan(x)
+    tan(x::T) where {T <: Number} -> float(T)
 
 Compute tangent of `x`, where `x` is in radians.
+
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
+See also [`tanh`](@ref).
 """
 tan(x::Number)
 
 """
-    asin(x)
+    asin(x::T) where {T <: Number} -> float(T)
 
 Compute the inverse sine of `x`, where the output is in radians.
 
+Return a `T(NaN)` if `isnan(x)`.
+
 See also [`asind`](@ref) for output in degrees.
 
 # Examples
@@ -553,9 +480,11 @@ julia> asind.((0, 1/2, 1))
 asin(x::Number)
 
 """
-    acos(x)
+    acos(x::T) where {T <: Number} -> float(T)
 
 Compute the inverse cosine of `x`, where the output is in radians
+
+Return a `T(NaN)` if `isnan(x)`.
 """
 acos(x::Number)
 
@@ -576,8 +505,14 @@ atanh(x::Number)
 """
     log(x)
 
-Compute the natural logarithm of `x`. Throws [`DomainError`](@ref) for negative
-[`Real`](@ref) arguments. Use complex negative arguments to obtain complex results.
+Compute the natural logarithm of `x`.
+
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Use [`Complex`](@ref) arguments to obtain [`Complex`](@ref) results.
+
+!!! note "Branch cut"
+    `log` has a branch cut along the negative real axis; `-0.0im` is taken
+    to be below the axis.
 
 See also [`ℯ`](@ref), [`log1p`](@ref), [`log2`](@ref), [`log10`](@ref).
 
@@ -593,6 +528,12 @@ Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
 
+julia> log(-3 + 0im)
+1.0986122886681098 + 3.141592653589793im
+
+julia> log(-3 - 0.0im)
+1.0986122886681098 - 3.141592653589793im
+
 julia> log.(exp.(-1:1))
 3-element Vector{Float64}:
  -1.0
@@ -605,7 +546,7 @@ log(x::Number)
 """
     log2(x)
 
-Compute the logarithm of `x` to base 2. Throws [`DomainError`](@ref) for negative
+Compute the logarithm of `x` to base 2. Throw a [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
 See also: [`exp2`](@ref), [`ldexp`](@ref), [`ispow2`](@ref).
@@ -638,7 +579,7 @@ log2(x)
     log10(x)
 
 Compute the logarithm of `x` to base 10.
-Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
 
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
@@ -661,7 +602,7 @@ log10(x)
 """
     log1p(x)
 
-Accurate natural logarithm of `1+x`. Throws [`DomainError`](@ref) for [`Real`](@ref)
+Accurate natural logarithm of `1+x`. Throw a [`DomainError`](@ref) for [`Real`](@ref)
 arguments less than -1.
 
 # Examples
@@ -690,8 +631,16 @@ end
 """
     sqrt(x)
 
-Return ``\\sqrt{x}``. Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
-Use complex negative arguments instead. The prefix operator `√` is equivalent to `sqrt`.
+Return ``\\sqrt{x}``.
+
+Throw a [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
+Use [`Complex`](@ref) negative arguments instead to obtain a [`Complex`](@ref) result.
+
+The prefix operator `√` is equivalent to `sqrt`.
+
+!!! note "Branch cut"
+    `sqrt` has a branch cut along the negative real axis; `-0.0im` is taken
+    to be below the axis.
 
 See also: [`hypot`](@ref).
 
@@ -710,6 +659,9 @@ Stacktrace:
 julia> sqrt(big(complex(-81)))
 0.0 + 9.0im
 
+julia> sqrt(-81 - 0.0im)  # -0.0im is below the branch cut
+0.0 - 9.0im
+
 julia> .√(1:4)
 4-element Vector{Float64}:
  1.0
@@ -773,8 +725,8 @@ true
 ```
 """
 hypot(x::Number) = abs(float(x))
-hypot(x::Number, y::Number) = _hypot(promote(float(x), y)...)
-hypot(x::Number, y::Number, xs::Number...) = _hypot(promote(float(x), y, xs...))
+hypot(x::Number, y::Number) = _hypot(float.(promote(x, y))...)
+hypot(x::Number, y::Number, xs::Number...) = _hypot(float.(promote(x, y, xs...)))
 function _hypot(x, y)
     # preserves unit
     axu = abs(x)
@@ -879,47 +831,12 @@ min(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(x, y
 max(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(y, x) ? x : y
 minmax(x::T, y::T) where {T<:AbstractFloat} = min(x, y), max(x, y)
 
-_isless(x::Float16, y::Float16) = signbit(widen(x) - widen(y))
-
-const has_native_fminmax = Sys.ARCH === :aarch64
-@static if has_native_fminmax
-    @eval begin
-        Base.@assume_effects :total @inline llvm_min(x::Float64, y::Float64) = ccall("llvm.minimum.f64", llvmcall, Float64, (Float64, Float64), x, y)
-        Base.@assume_effects :total @inline llvm_min(x::Float32, y::Float32) = ccall("llvm.minimum.f32", llvmcall, Float32, (Float32, Float32), x, y)
-        Base.@assume_effects :total @inline llvm_max(x::Float64, y::Float64) = ccall("llvm.maximum.f64", llvmcall, Float64, (Float64, Float64), x, y)
-        Base.@assume_effects :total @inline llvm_max(x::Float32, y::Float32) = ccall("llvm.maximum.f32", llvmcall, Float32, (Float32, Float32), x, y)
-    end
-end
-
-function min(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_min(x,y)
-    end
-    diff = x - y
-    argmin = ifelse(signbit(diff), x, y)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, argmin)
+function min(x::T, y::T) where {T<:IEEEFloat}
+    return min_float(x, y)
 end
 
-function max(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_max(x,y)
-    end
-    diff = x - y
-    argmax = ifelse(signbit(diff), y, x)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, argmax)
-end
-
-function minmax(x::T, y::T) where {T<:Union{Float32,Float64}}
-    @static if has_native_fminmax
-        return llvm_min(x, y), llvm_max(x, y)
-    end
-    diff = x - y
-    sdiff = signbit(diff)
-    min, max = ifelse(sdiff, x, y), ifelse(sdiff, y, x)
-    anynan = isnan(x)|isnan(y)
-    return ifelse(anynan, diff, min), ifelse(anynan, diff, max)
+function max(x::T, y::T) where {T<:IEEEFloat}
+    return max_float(x, y)
 end
 
 """
@@ -927,9 +844,11 @@ end
 
 Compute ``x \\times 2^n``.
 
+See also [`frexp`](@ref), [`exponent`](@ref).
+
 # Examples
 ```jldoctest
-julia> ldexp(5., 2)
+julia> ldexp(5.0, 2)
 20.0
 ```
 """
@@ -979,27 +898,36 @@ end
 ldexp(x::Float16, q::Integer) = Float16(ldexp(Float32(x), q))
 
 """
-    exponent(x) -> Int
+    exponent(x::Real) -> Int
 
-Returns the largest integer `y` such that `2^y ≤ abs(x)`.
+Return the largest integer `y` such that `2^y ≤ abs(x)`.
 For a normalized floating-point number `x`, this corresponds to the exponent of `x`.
 
+Throws a `DomainError` when `x` is zero, infinite, or [`NaN`](@ref).
+For any other non-subnormal floating-point number `x`, this corresponds to the exponent bits of `x`.
+
+See also [`signbit`](@ref), [`significand`](@ref), [`frexp`](@ref), [`issubnormal`](@ref), [`log2`](@ref), [`ldexp`](@ref).
 # Examples
 ```jldoctest
 julia> exponent(8)
 3
 
-julia> exponent(64//1)
-6
-
 julia> exponent(6.5)
 2
 
-julia> exponent(16.0)
-4
+julia> exponent(-1//4)
+-2
 
 julia> exponent(3.142e-4)
 -12
+
+julia> exponent(floatmin(Float32)), exponent(nextfloat(0.0f0))
+(-126, -149)
+
+julia> exponent(0.0)
+ERROR: DomainError with 0.0:
+Cannot be ±0.0.
+[...]
 ```
 """
 function exponent(x::T) where T<:IEEEFloat
@@ -1039,6 +967,8 @@ a non-zero finite number, then the result will be a number of the same type and
 sign as `x`, and whose absolute value is on the interval ``[1,2)``. Otherwise
 `x` is returned.
 
+See also [`frexp`](@ref), [`exponent`](@ref).
+
 # Examples
 ```jldoctest
 julia> significand(15.2)
@@ -1073,10 +1003,19 @@ end
 
 Return `(x,exp)` such that `x` has a magnitude in the interval ``[1/2, 1)`` or 0,
 and `val` is equal to ``x \\times 2^{exp}``.
+
+See also [`significand`](@ref), [`exponent`](@ref), [`ldexp`](@ref).
+
 # Examples
 ```jldoctest
-julia> frexp(12.8)
-(0.8, 4)
+julia> frexp(6.0)
+(0.75, 3)
+
+julia> significand(6.0), exponent(6.0)  # interval [1, 2) instead
+(1.5, 2)
+
+julia> frexp(0.0), frexp(NaN), frexp(-Inf)  # exponent would give an error
+((0.0, 0), (NaN, 0), (-Inf, 0))
 ```
 """
 function frexp(x::T) where T<:IEEEFloat
@@ -1189,6 +1128,10 @@ function modf(x::T) where T<:IEEEFloat
     return (rx, ix)
 end
 
+@inline function use_power_by_squaring(n::Integer)
+    -2^12 <= n <= 3 * 2^13
+end
+
 # @constprop aggressive to help the compiler see the switch between the integer and float
 # variants for callers with constant `y`
 @constprop :aggressive function ^(x::Float64, y::Float64)
@@ -1201,24 +1144,33 @@ end
         y = sign(y)*0x1.8p62
     end
     yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
-    y == yint && return @noinline x^yint
-    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0
-    x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
-    !isfinite(x) && return x*(y>0 || isnan(x))           # x is inf or NaN
+    yisint = y == yint
+    if yisint
+        yint == 0 && return 1.0
+        use_power_by_squaring(yint) && return @noinline pow_body(x, yint)
+    end
+    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x === +0.0 or -0.0 (Inf * false === 0.0)
+    s = 1
+    if x < 0
+        !yisint && throw_exp_domainerror(x) # y isn't an integer
+        s = ifelse(isodd(yint), -1, 1)
+    end
+    !isfinite(x) && return copysign(x,s)*(y>0 || isnan(x))           # x is inf or NaN
+    return copysign(pow_body(abs(x), y), s)
+end
+
+@assume_effects :foldable @noinline function pow_body(x::Float64, y::Float64)
+    xu = reinterpret(UInt64, x)
     if xu < (UInt64(1)<<52) # x is subnormal
         xu = reinterpret(UInt64, x * 0x1p52) # normalize x
         xu &= ~sign_mask(Float64)
         xu -= UInt64(52) << 52 # mess with the exponent
     end
-    return pow_body(xu, y)
-end
-
-@inline function pow_body(xu::UInt64, y::Float64)
-    logxhi,logxlo = Base.Math._log_ext(xu)
+    logxhi,logxlo = _log_ext(xu)
     xyhi, xylo = two_mul(logxhi,y)
     xylo = muladd(logxlo, y, xylo)
     hi = xyhi+xylo
-    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
+    return @inline Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
 end
 
 @constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
@@ -1242,12 +1194,27 @@ end
     return T(exp2(log2(abs(widen(x))) * y))
 end
 
-# compensated power by squaring
 @constprop :aggressive @inline function ^(x::Float64, n::Integer)
+    n = clamp(n, Int64)
     n == 0 && return one(x)
-    return pow_body(x, n)
+    if use_power_by_squaring(n)
+        return pow_body(x, n)
+    else
+        s = ifelse(x < 0 && isodd(n), -1.0, 1.0)
+        x = abs(x)
+        y = float(n)
+        if y == n
+            return copysign(pow_body(x, y), s)
+        else
+            n2 = n % 1024
+            y = float(n - n2)
+            return pow_body(x, y) * copysign(pow_body(x, n2), s)
+        end
+    end
 end
 
+# compensated power by squaring
+# this method is only reliable for -2^20 < n < 2^20 (cf. #53881 #53886)
 @assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
     y = 1.0
     xnlo = ynlo = 0.0
@@ -1274,20 +1241,18 @@ end
     return ifelse(isfinite(x) & isfinite(err), muladd(x, y, err), x*y)
 end
 
-function ^(x::Float32, n::Integer)
+function ^(x::Union{Float16,Float32}, n::Integer)
     n == -2 && return (i=inv(x); i*i)
     n == 3 && return x*x*x #keep compatibility with literal_pow
-    n < 0 && return Float32(Base.power_by_squaring(inv(Float64(x)),-n))
-    Float32(Base.power_by_squaring(Float64(x),n))
+    n < 0 && return oftype(x, Base.power_by_squaring(inv(widen(x)),-n))
+    oftype(x, Base.power_by_squaring(widen(x),n))
 end
-@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y)
-@inline literal_pow(::typeof(^), x::Float16, ::Val{p}) where {p} = Float16(literal_pow(^,Float32(x),Val(p)))
 
 ## rem2pi-related calculations ##
 
 function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64)
     # This algorithm, due to Dekker, computes the sum of two
-    # double-double numbers and returns the high double. References:
+    # double-double numbers and return the high double. References:
     # [1] http://www.digizeitschriften.de/en/dms/img/?PID=GDZPPN001170007
     # [2] https://doi.org/10.1007/BF01397083
     r = xh+yh
@@ -1463,9 +1428,11 @@ end
 rem2pi(x::Float32, r::RoundingMode) = Float32(rem2pi(Float64(x), r))
 rem2pi(x::Float16, r::RoundingMode) = Float16(rem2pi(Float64(x), r))
 rem2pi(x::Int32, r::RoundingMode) = rem2pi(Float64(x), r)
-function rem2pi(x::Int64, r::RoundingMode)
-    fx = Float64(x)
-    fx == x || throw(ArgumentError("Int64 argument to rem2pi is too large: $x"))
+
+# general fallback
+function rem2pi(x::Integer, r::RoundingMode)
+    fx = float(x)
+    fx == x || throw(ArgumentError(LazyString(typeof(x), " argument to rem2pi is too large: ", x)))
     rem2pi(fx, r)
 end
 
@@ -1571,7 +1538,7 @@ sincos(a::Float16) = Float16.(sincos(Float32(a)))
 for f in (:sin, :cos, :tan, :asin, :atan, :acos,
           :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
           :exp, :exp2, :exp10, :expm1, :log, :log2, :log10, :log1p,
-          :exponent, :sqrt, :cbrt)
+          :exponent, :sqrt, :cbrt, :sinpi, :cospi, :sincospi, :tanpi)
     @eval function ($f)(x::Real)
         xf = float(x)
         x === xf && throw(MethodError($f, (x,)))
@@ -1588,7 +1555,6 @@ end
 
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
-clamp(::Missing, lo, hi) = missing
 fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index 4bb8c409acf00..d26f5115b5ccb 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -16,6 +16,26 @@ Base.@irrational γ        euler
 Base.@irrational φ        (1+sqrt(big(5)))/2
 Base.@irrational catalan  catalan
 
+const _KnownIrrational = Union{
+    typeof(π), typeof(ℯ), typeof(γ), typeof(φ), typeof(catalan)
+}
+
+function Rational{BigInt}(::_KnownIrrational)
+    Base._throw_argument_error_irrational_to_rational_bigint()
+end
+Base.@assume_effects :foldable function Rational{T}(x::_KnownIrrational) where {T<:Integer}
+    Base._irrational_to_rational(T, x)
+end
+Base.@assume_effects :foldable function (::Type{T})(x::_KnownIrrational, r::RoundingMode) where {T<:Union{Float32,Float64}}
+    Base._irrational_to_float(T, x, r)
+end
+Base.@assume_effects :foldable function Base.rationalize(::Type{T}, x::_KnownIrrational; tol::Real=0) where {T<:Integer}
+    Base._rationalize_irrational(T, x, tol)
+end
+Base.@assume_effects :foldable function Base.lessrational(rx::Rational, x::_KnownIrrational)
+    Base._lessrational(rx, x)
+end
+
 # aliases
 """
     π
diff --git a/base/meta.jl b/base/meta.jl
index 31fef1b9697e3..36875b8e2c625 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -5,8 +5,6 @@ Convenience functions for metaprogramming.
 """
 module Meta
 
-using ..CoreLogging
-
 export quot,
        isexpr,
        isidentifier,
@@ -18,6 +16,8 @@ export quot,
        show_sexpr,
        @dump
 
+public parse
+
 using Base: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator
 import Base: isexpr
 
@@ -364,11 +364,29 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
         x.edges .+= slot_offset
         return x
     end
+    if isa(x, Core.UpsilonNode)
+        if !isdefined(x, :val)
+            return x
+        end
+        return Core.UpsilonNode(
+            _partially_inline!(x.val, slot_replacements, type_signature, static_param_values,
+                               slot_offset, statement_offset, boundscheck),
+        )
+    end
+    if isa(x, Core.PhiCNode)
+        _partially_inline!(x.values, slot_replacements, type_signature, static_param_values,
+                           slot_offset, statement_offset, boundscheck)
+    end
     if isa(x, Core.ReturnNode)
+       # Unreachable doesn't have val defined
+       if !isdefined(x, :val)
+          return x
+       else
         return Core.ReturnNode(
             _partially_inline!(x.val, slot_replacements, type_signature, static_param_values,
                                slot_offset, statement_offset, boundscheck),
         )
+       end
     end
     if isa(x, Core.GotoIfNot)
         return Core.GotoIfNot(
@@ -377,6 +395,12 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
             x.dest + statement_offset,
         )
     end
+    if isa(x, Core.EnterNode)
+        if x.catch_dest == 0
+            return x
+        end
+        return Core.EnterNode(x, x.catch_dest + statement_offset)
+    end
     if isa(x, Expr)
         head = x.head
         if head === :static_parameter
@@ -424,8 +448,6 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                                            static_param_values, slot_offset,
                                            statement_offset, boundscheck)
             x.args[2] += statement_offset
-        elseif head === :enter
-            x.args[1] += statement_offset
         elseif head === :isdefined
             arg = x.args[1]
             # inlining a QuoteNode or literal into `Expr(:isdefined, x)` is invalid, replace with true
@@ -450,7 +472,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                 @assert isa(arg, Union{GlobalRef, Symbol})
                 return x
             end
-        elseif !Core.Compiler.is_meta_expr_head(head)
+        elseif !Base.is_meta_expr_head(head)
             partially_inline!(x.args, slot_replacements, type_signature, static_param_values,
                               slot_offset, statement_offset, boundscheck)
         end
@@ -460,4 +482,45 @@ end
 
 _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals)
 
+"""
+    Meta.unblock(expr)
+
+Peel away redundant block expressions.
+
+Specifically, the following expressions are stripped by this function:
+- `:block` expressions with a single non-line-number argument.
+- Pairs of `:var"hygienic-scope"` / `:escape` expressions.
+"""
+function unblock(@nospecialize ex)
+    while isexpr(ex, :var"hygienic-scope")
+        isexpr(ex.args[1], :escape) || break
+        ex = ex.args[1].args[1]
+    end
+    isexpr(ex, :block) || return ex
+    exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args)
+    length(exs) == 1 || return ex
+    return unblock(exs[1])
+end
+
+"""
+    Meta.unescape(expr)
+
+Peel away `:escape` expressions and redundant block expressions (see
+[`unblock`](@ref)).
+"""
+function unescape(@nospecialize ex)
+    ex = unblock(ex)
+    while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope")
+       ex = unblock(ex.args[1])
+    end
+    return ex
+end
+
+"""
+    Meta.uncurly(expr)
+
+Turn `T{P...}` into just `T`.
+"""
+uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
+
 end # module
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 0eb99dc88303f..a2158cb9180e4 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -286,6 +286,29 @@ function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
     !iszero(n) && print(io, ":")
 end
 
+# Determine the `modulecolor` value to pass to `show_method`
+function _modulecolor(method::Method)
+    mmt = get_methodtable(method)
+    if mmt === nothing || mmt.module === parentmodule(method)
+        return nothing
+    end
+    # `mmt` is only particularly relevant for external method tables. Since the primary
+    # method table is shared, we now need to distinguish "primary" methods by trying to
+    # check if there is a primary `DataType` to identify it with. c.f. how `jl_method_def`
+    # would derive this same information (for the name).
+    ft = argument_datatype((unwrap_unionall(method.sig)::DataType).parameters[1])
+    # `ft` should be the type associated with the first argument in the method signature.
+    # If it's `Type`, try to unwrap it again.
+    if isType(ft)
+        ft = argument_datatype(ft.parameters[1])
+    end
+    if ft === nothing || parentmodule(method) === parentmodule(ft) !== Core
+        return nothing
+    end
+    m = parentmodule_before_main(method)
+    return get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
+end
+
 function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=true)
     mt = ms.mt
     name = mt.name
@@ -300,12 +323,6 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
     last_shown_line_infos = get(io, :last_shown_line_infos, nothing)
     last_shown_line_infos === nothing || empty!(last_shown_line_infos)
 
-    modul = if mt === _TYPE_NAME.mt && length(ms) > 0 # type constructor
-            which(ms.ms[1].module, ms.ms[1].name)
-        else
-            mt.module
-        end
-
     digit_align_width = length(string(max > 0 ? max : length(ms)))
 
     for meth in ms
@@ -315,13 +332,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
 
             print(io, " ", lpad("[$n]", digit_align_width + 2), " ")
 
-            modulecolor = if parentmodule(meth) == modul
-                nothing
-            else
-                m = parentmodule_before_main(meth)
-                get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
-            end
-            show_method(io, meth; modulecolor)
+            show_method(io, meth; modulecolor=_modulecolor(meth))
 
             file, line = updated_methodloc(meth)
             if last_shown_line_infos !== nothing
@@ -367,7 +378,6 @@ function url(m::Method)
     line = m.line
     line <= 0 || occursin(r"In\[[0-9]+\]"a, file) && return ""
     Sys.iswindows() && (file = replace(file, '\\' => '/'))
-    libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
     if inbase(M)
         if isempty(Base.GIT_VERSION_INFO.commit)
             # this url will only work if we're on a tagged release
@@ -375,8 +385,10 @@ function url(m::Method)
         else
             return "https://github.com/JuliaLang/julia/tree/$(Base.GIT_VERSION_INFO.commit)/base/$file#L$line"
         end
-    elseif root_module_exists(libgit2_id)
-        LibGit2 = root_module(libgit2_id)
+    end
+    libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
+    LibGit2 = maybe_root_module(libgit2_id)
+    if LibGit2 isa Module
         try
             d = dirname(file)
             return LibGit2.with(LibGit2.GitRepoExt(d)) do repo
@@ -393,11 +405,10 @@ function url(m::Method)
                 end
             end
         catch
-            return fileurl(file)
+            # oops, this was a bad idea
         end
-    else
-        return fileurl(file)
     end
+    return fileurl(file)
 end
 
 function show(io::IO, ::MIME"text/html", m::Method)
diff --git a/base/missing.jl b/base/missing.jl
index f6f5fe507260b..6a8c09dc02aff 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -36,7 +36,7 @@ Any
 !!! compat "Julia 1.3"
     This function is exported as of Julia 1.3.
 """
-nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
+nonmissingtype(@nospecialize(T::Type)) = typesplit(T, Missing)
 
 function nonmissingtype_checked(T::Type)
     R = nonmissingtype(T)
@@ -135,6 +135,7 @@ min(::Any,     ::Missing) = missing
 max(::Missing, ::Missing) = missing
 max(::Missing, ::Any)     = missing
 max(::Any,     ::Missing) = missing
+clamp(::Missing, lo, hi) = missing
 
 missing_conversion_msg(@nospecialize T) =
     LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead")
@@ -146,21 +147,10 @@ round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} =
     throw(MissingException(missing_conversion_msg(T)))
 round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 # to fix ambiguities
+round(::Type{T}, x::Real, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Bool}, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 
-# Handle ceil, floor, and trunc separately as they have no RoundingMode argument
-for f in (:(ceil), :(floor), :(trunc))
-    @eval begin
-        ($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
-        ($f)(::Type{>:Missing}, ::Missing) = missing
-        ($f)(::Type{T}, ::Missing) where {T} = throw(MissingException(missing_conversion_msg(T)))
-        ($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-        # to fix ambiguities
-        ($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-    end
-end
-
 # to avoid ambiguity warnings
 (^)(::Missing, ::Integer) = missing
 
@@ -252,7 +242,7 @@ function iterate(itr::SkipMissing, state...)
     y = iterate(itr.x, state...)
     y === nothing && return nothing
     item, state = y
-    while item === missing
+    while ismissing(item)
         y = iterate(itr.x, state)
         y === nothing && return nothing
         item, state = y
@@ -262,12 +252,12 @@ end
 
 IndexStyle(::Type{<:SkipMissing{T}}) where {T} = IndexStyle(T)
 eachindex(itr::SkipMissing) =
-    Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, eachindex(itr.x))
+    Iterators.filter(i -> !ismissing(@inbounds(itr.x[i])), eachindex(itr.x))
 keys(itr::SkipMissing) =
-    Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x))
+    Iterators.filter(i -> !ismissing(@inbounds(itr.x[i])), keys(itr.x))
 @propagate_inbounds function getindex(itr::SkipMissing, I...)
     v = itr.x[I...]
-    v === missing && throw(MissingException(LazyString("the value at index ", I, " is missing")))
+    ismissing(v) && throw(MissingException(LazyString("the value at index ", I, " is missing")))
     v
 end
 
@@ -291,18 +281,18 @@ function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
     ilast = last(inds)
     for outer i in i:ilast
         @inbounds ai = A[i]
-        ai !== missing && break
+        !ismissing(ai) && break
     end
-    ai === missing && return mapreduce_empty(f, op, eltype(itr))
+    ismissing(ai) && return mapreduce_empty(f, op, eltype(itr))
     a1::eltype(itr) = ai
     i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
     i += 1
     ai = missing
     for outer i in i:ilast
         @inbounds ai = A[i]
-        ai !== missing && break
+        !ismissing(ai) && break
     end
-    ai === missing && return mapreduce_first(f, op, a1)
+    ismissing(ai) && return mapreduce_first(f, op, a1)
     # We know A contains at least two non-missing entries: the result cannot be nothing
     something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
 end
@@ -320,7 +310,7 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         return nothing
     elseif ifirst == ilast
         @inbounds a1 = A[ifirst]
-        if a1 === missing
+        if ismissing(a1)
             return nothing
         else
             return Some(mapreduce_first(f, op, a1))
@@ -331,25 +321,25 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         i = ifirst
         for outer i in i:ilast
             @inbounds ai = A[i]
-            ai !== missing && break
+            !ismissing(ai) && break
         end
-        ai === missing && return nothing
+        ismissing(ai) && return nothing
         a1 = ai::eltype(itr)
         i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
         i += 1
         ai = missing
         for outer i in i:ilast
             @inbounds ai = A[i]
-            ai !== missing && break
+            !ismissing(ai) && break
         end
-        ai === missing && return Some(mapreduce_first(f, op, a1))
+        ismissing(ai) && return Some(mapreduce_first(f, op, a1))
         a2 = ai::eltype(itr)
         i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
         i += 1
         v = op(f(a1), f(a2))
         @simd for i = i:ilast
             @inbounds ai = A[i]
-            if ai !== missing
+            if !ismissing(ai)
                 v = op(v, f(ai))
             end
         end
@@ -395,7 +385,7 @@ julia> filter(isodd, skipmissing(x))
 function filter(f, itr::SkipMissing{<:AbstractArray})
     y = similar(itr.x, eltype(itr), 0)
     for xi in itr.x
-        if xi !== missing && f(xi)
+        if !ismissing(xi) && f(xi)
             push!(y, xi)
         end
     end
@@ -461,7 +451,7 @@ ERROR: `b` is still missing
 macro coalesce(args...)
     expr = :(missing)
     for arg in reverse(args)
-        expr = :((val = $arg) !== missing ? val : $expr)
+        expr = :(!ismissing((val = $(esc(arg));)) ? val : $expr)
     end
-    return esc(:(let val; $expr; end))
+    return :(let val; $expr; end)
 end
diff --git a/base/mpfr.jl b/base/mpfr.jl
index 2e03018f7669f..1e39f52b9d1a3 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -16,12 +16,14 @@ import
         cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, lerpi,
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
-        isone, big, _string_n, decompose, minmax,
-        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
-
+        isone, big, _string_n, decompose, minmax, _precision_with_base_2,
+        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
+        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask
 
 using .Base.Libc
-import ..Rounding: rounding_raw, setrounding_raw
+import ..Rounding: Rounding,
+    rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
+    tie_breaker_is_to_even, correct_rounding_requires_increment
 
 import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
 
@@ -35,7 +37,6 @@ else
     const libmpfr = "libmpfr.so.6"
 end
 
-
 version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ())))
 patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ')
 
@@ -89,65 +90,166 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
     end
 end
 
+rounds_to_nearest(m::MPFRRoundingMode) = m == MPFRRoundNearest
+function rounds_away_from_zero(m::MPFRRoundingMode, sign_bit::Bool)
+    if m == MPFRRoundToZero
+        false
+    elseif m == MPFRRoundUp
+        !sign_bit
+    elseif m == MPFRRoundDown
+        sign_bit
+    else
+        # Assuming `m == MPFRRoundFromZero`
+        true
+    end
+end
+tie_breaker_is_to_even(::MPFRRoundingMode) = true
+
 const ROUNDING_MODE = Ref{MPFRRoundingMode}(MPFRRoundNearest)
+const CURRENT_ROUNDING_MODE = Base.ScopedValues.ScopedValue{MPFRRoundingMode}()
 const DEFAULT_PRECISION = Ref{Clong}(256)
-
+const CURRENT_PRECISION = Base.ScopedValues.ScopedValue{Clong}()
 # Basic type and initialization definitions
 
-"""
-    BigFloat <: AbstractFloat
+# Warning: the constants are MPFR implementation details from
+# `src/mpfr-impl.h`, search for `MPFR_EXP_ZERO`.
+const mpfr_special_exponent_zero = typemin(Clong) + true
+const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true
+const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true
 
-Arbitrary precision floating point number type.
-"""
-mutable struct BigFloat <: AbstractFloat
+struct BigFloatLayout
     prec::Clong
     sign::Cint
     exp::Clong
     d::Ptr{Limb}
-    # _d::Buffer{Limb} # Julia gc handle for memory @ d
-    _d::String # Julia gc handle for memory @ d (optimized)
+    # possible padding
+    p::Limb # Tuple{Vararg{Limb}}
+end
+const offset_prec = fieldoffset(BigFloatLayout, 1) % Int
+const offset_sign = fieldoffset(BigFloatLayout, 2) % Int
+const offset_exp = fieldoffset(BigFloatLayout, 3) % Int
+const offset_d = fieldoffset(BigFloatLayout, 4) % Int
+const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb))
+const offset_p = offset_p_limbs * sizeof(Limb)
+
+"""
+    BigFloat <: AbstractFloat
+
+Arbitrary precision floating point number type.
+"""
+struct BigFloat <: AbstractFloat
+    d::Memory{Limb}
 
     # Not recommended for general use:
     # used internally by, e.g. deepcopy
-    global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String)
-        # ccall-based version, inlined below
-        #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d)
-        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
-        #NAN_KIND = Cint(0)
-        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
-        #return z
-        return new(prec, sign, exp, pointer(d), d)
+    global function _BigFloat(d::Memory{Limb})
+        Base.unsafe_convert(Ref{BigFloat}, BigFloatData(d)) # force early initialization of pointer field of z.d
+        return new(d)
     end
 
-    function BigFloat(; precision::Integer=DEFAULT_PRECISION[])
+    function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat))
         precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1."))
         nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision)
-        nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
-        #d = Vector{Limb}(undef, nb)
-        d = _string_n(nb * Core.sizeof(Limb))
-        EXP_NAN = Clong(1) - Clong(typemax(Culong) >> 1)
-        return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN
+        nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
+        d = Memory{Limb}(undef, nl % Int)
+        # ccall-based version, inlined below
+        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr
+        #NAN_KIND = Cint(0)
+        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d))
+        p = Base.unsafe_convert(Ptr{Limb}, d)
+        GC.@preserve d begin # initialize to +NAN
+            unsafe_store!(Ptr{Clong}(p) + offset_prec, Clong(precision))
+            unsafe_store!(Ptr{Cint}(p) + offset_sign, one(Cint))
+            unsafe_store!(Ptr{Clong}(p) + offset_exp, mpfr_special_exponent_nan)
+            unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p)
+        end
+        return new(d)
+    end
+end
+
+"""
+Segment of raw words of bits interpreted as a big integer. Less
+significant words come first. Each word is in machine-native bit-order.
+"""
+struct BigFloatData{Limb}
+    d::Memory{Limb}
+end
+
+# BigFloat interface
+@inline function Base.getproperty(x::BigFloat, s::Symbol)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec)
+    elseif s === :sign
+        return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign)
+    elseif s === :exp
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp)
+    elseif s === :d
+        return BigFloatData(d)
+    else
+        return throw(FieldError(typeof(x), s))
+    end
+end
+
+# While BigFloat (like all Numbers) is considered immutable, for practical reasons
+# of writing the algorithms on it we allow mutating sign, exp, and the contents of d
+@inline function Base.setproperty!(x::BigFloat, s::Symbol, v)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :sign
+        return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v)
+    elseif s === :exp
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v)
+    #elseif s === :d || s === :prec # not mutable
+    else
+        return throw(FieldError(x, s))
+    end
+end
+
+# Ref interface: make sure the conversion to C is done properly
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr")
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr")
+Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat
+function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    dptrptr = Ptr{Ptr{Limb}}(p) + offset_d
+    dptr = p + offset_p
+    GC.@preserve d if unsafe_load(dptrptr, :monotonic) != dptr # make sure this pointer value was recomputed after any deserialization or copying
+        unsafe_store!(dptrptr, dptr, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race
     end
+    return Ptr{BigFloat}(p)
 end
+Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p
+function Base.setindex!(fd::BigFloatData, v, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs] = v
+    return fd
+end
+function Base.getindex(fd::BigFloatData, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs]
+end
+Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs
+Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random
 
-rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[]
+include("rawbigfloats.jl")
+
+rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[])
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
+function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode)
+    Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f())
+end
 
 rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat))
 setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r))
+setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) =
+    setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r))
 
 
-# overload the definition of unsafe_convert to ensure that `x.d` is assigned
-# it may have been dropped in the event that the BigFloat was serialized
-Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x
-@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat})
-    x = x[]
-    if x.d == C_NULL
-        x.d = pointer(x._d)
-    end
-    return convert(Ptr{BigFloat}, Base.pointer_from_objref(x))
-end
-
 """
     BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)])
 
@@ -192,8 +294,8 @@ BigFloat(x, r::RoundingMode)
 widen(::Type{Float64}) = BigFloat
 widen(::Type{BigFloat}) = BigFloat
 
-function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    if precision == _precision(x)
+function BigFloat(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
+    if precision == _precision_with_base_2(x)
         return x
     else
         z = BigFloat(;precision=precision)
@@ -204,7 +306,7 @@ function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
 end
 
 function _duplicate(x::BigFloat)
-    z = BigFloat(;precision=_precision(x))
+    z = BigFloat(;precision=_precision_with_base_2(x))
     ccall((:mpfr_set, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
     return z
 end
@@ -212,7 +314,7 @@ end
 # convert to BigFloat
 for (fJ, fC) in ((:si,:Clong), (:ui,:Culong))
     @eval begin
-        function BigFloat(x::($fC), r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+        function BigFloat(x::($fC), r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
             z = BigFloat(;precision=precision)
             ccall(($(string(:mpfr_set_,fJ)), libmpfr), Int32, (Ref{BigFloat}, $fC, MPFRRoundingMode), z, x, r)
             return z
@@ -220,7 +322,7 @@ for (fJ, fC) in ((:si,:Clong), (:ui,:Culong))
     end
 end
 
-function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
     z = BigFloat(;precision)
     # punt on the hard case where we might have to deal with rounding
     # we could use this path in all cases, but mpfr_set_d has a lot of overhead.
@@ -234,11 +336,11 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::In
     z.sign = 1-2*signbit(x)
     if iszero(x) || !isfinite(x)
         if isinf(x)
-            z.exp = Clong(2) - typemax(Clong)
+            z.exp = mpfr_special_exponent_inf
         elseif isnan(x)
-            z.exp = Clong(1) - typemax(Clong)
+            z.exp = mpfr_special_exponent_nan
         else
-            z.exp = - typemax(Clong)
+            z.exp = mpfr_special_exponent_zero
         end
         return z
     end
@@ -248,41 +350,42 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::In
     nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb))
 
     # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower.
+    zd = z.d
     if Limb === UInt64
         for i in 1:nlimbs-1
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val, nlimbs)
+        @inbounds setindex!(zd, val, nlimbs)
     else
         for i in 1:nlimbs-2
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val % UInt32, nlimbs-1)
-        unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs)
+        @inbounds setindex!(zd, val % UInt32, nlimbs-1)
+        @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs)
     end
     z
 end
 
-function BigFloat(x::BigInt, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::BigInt, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
     z = BigFloat(;precision=precision)
     ccall((:mpfr_set_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, r)
     return z
 end
 
-BigFloat(x::Integer; precision::Integer=DEFAULT_PRECISION[]) =
-    BigFloat(BigInt(x)::BigInt, ROUNDING_MODE[]; precision=precision)
-BigFloat(x::Integer, r::MPFRRoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Integer; precision::Integer=_precision_with_base_2(BigFloat)) =
+    BigFloat(BigInt(x)::BigInt, rounding_raw(BigFloat); precision=precision)
+BigFloat(x::Integer, r::MPFRRoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(BigInt(x)::BigInt, r; precision=precision)
 
-BigFloat(x::Union{Bool,Int8,Int16,Int32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{Bool,Int8,Int16,Int32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(convert(Clong, x), r; precision=precision)
-BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(convert(Culong, x), r; precision=precision)
 
-BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(Float64(x), r; precision=precision)
 
-function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
+function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
     setprecision(BigFloat, precision) do
         setrounding_raw(BigFloat, r) do
             BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
@@ -290,14 +393,14 @@ function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
     end
 end
 
-function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])
+function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat))
     !isempty(s) && isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base = base)
     z = BigFloat(precision=precision)
     err = ccall((:mpfr_set_str, libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, MPFRRoundingMode), z, s, base, rounding)
     err == 0 ? z : nothing
 end
 
-BigFloat(x::AbstractString, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::AbstractString, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     parse(BigFloat, x; precision=precision, rounding=r)
 
 Rational(x::BigFloat) = convert(Rational{BigInt}, x)
@@ -305,9 +408,9 @@ AbstractFloat(x::BigInt) = BigFloat(x)
 
 float(::Type{BigInt}) = BigFloat
 
-BigFloat(x::Real, r::RoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::Real, r::RoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(x, convert(MPFRRoundingMode, r); precision=precision)::BigFloat
-BigFloat(x::AbstractString, r::RoundingMode; precision::Integer=DEFAULT_PRECISION[]) =
+BigFloat(x::AbstractString, r::RoundingMode; precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(x, convert(MPFRRoundingMode, r); precision=precision)
 
 ## BigFloat -> Integer
@@ -352,18 +455,15 @@ round(::Type{T}, x::BigFloat, r::RoundingMode) where T<:Union{Signed, Unsigned}
     invoke(round, Tuple{Type{<:Union{Signed, Unsigned}}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, T, x, r)
 round(::Type{BigInt}, x::BigFloat, r::RoundingMode) =
     invoke(round, Tuple{Type{BigInt}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, BigInt, x, r)
-round(::Type{<:Integer}, x::BigFloat, r::RoundingMode) = throw(MethodError(round, (Integer, x, r)))
 
 
 unsafe_trunc(::Type{T}, x::BigFloat) where {T<:Integer} = unsafe_trunc(T, _unchecked_cast(T, x, RoundToZero))
 unsafe_trunc(::Type{BigInt}, x::BigFloat) = _unchecked_cast(BigInt, x, RoundToZero)
 
-# TODO: Ideally the base fallbacks for these would already exist
-for (f, rnd) in zip((:trunc, :floor, :ceil, :round),
-                 (RoundToZero, RoundDown, RoundUp, :(ROUNDING_MODE[])))
-    @eval $f(::Type{T}, x::BigFloat) where T<:Union{Unsigned, Signed, BigInt} = round(T, x, $rnd)
-    @eval $f(::Type{Integer}, x::BigFloat) = $f(BigInt, x)
-end
+round(::Type{T}, x::BigFloat) where T<:Integer = round(T, x, rounding_raw(BigFloat))
+# these two methods are split to increase their precedence in disambiguation:
+round(::Type{Integer}, x::BigFloat, r::RoundingMode) = round(BigInt, x, r)
+round(::Type{Integer}, x::BigFloat, r::MPFRRoundingMode) = round(BigInt, x, r)
 
 function Bool(x::BigFloat)
     iszero(x) && return false
@@ -380,35 +480,69 @@ function (::Type{T})(x::BigFloat) where T<:Integer
     trunc(T,x)
 end
 
-## BigFloat -> AbstractFloat
-_cpynansgn(x::AbstractFloat, y::BigFloat) = isnan(x) && signbit(x) != signbit(y) ? -x : x
-
-Float64(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_d,libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float64(x::BigFloat, r::RoundingMode) = Float64(x, convert(MPFRRoundingMode, r))
-
-Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_flt,libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
-
-function Float16(x::BigFloat) :: Float16
-    res = Float32(x)
-    resi = reinterpret(UInt32, res)
-    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
-        #shift so that the mantissa lines up where it would for normal Float16
-        shift = 113-((resi & 0x7f800000)>>23)
-        if shift<23
-            resi |= 0x0080_0000 # set implicit bit
-            resi >>= shift
+function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat}
+    sb = signbit(x)
+    is_zero = iszero(x)
+    is_inf = isinf(x)
+    is_nan = isnan(x)
+    is_regular = !is_zero & !is_inf & !is_nan
+    ieee_exp = Int(x.exp) - 1
+    ieee_precision = precision(T)
+    ieee_exp_max = exponent_max(T)
+    ieee_exp_min = exponent_min(T)
+    exp_diff = ieee_exp - ieee_exp_min
+    is_normal = 0 ≤ exp_diff
+    (rm_is_to_zero, rm_is_from_zero) = if rounds_to_nearest(rm)
+        (false, false)
+    else
+        let from = rounds_away_from_zero(rm, sb)
+            (!from, from)
         end
-    end
-    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
-        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        res = nextfloat(res, cmp(x, res))
-    end
-    return res
+    end::NTuple{2,Bool}
+    exp_is_huge_p = ieee_exp_max < ieee_exp
+    exp_is_huge_n = signbit(exp_diff + ieee_precision)
+    rounds_to_inf = is_regular & exp_is_huge_p & !rm_is_to_zero
+    rounds_to_zero = is_regular & exp_is_huge_n & !rm_is_from_zero
+    U = uinttype(T)
+
+    ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero
+        if !exp_is_huge_p
+            # significand
+            v = x.d::BigFloatData
+            len = max(ieee_precision + min(exp_diff, 0), 0)::Int
+            signif = truncated(U, v, len) & significand_mask(T)
+
+            # round up if necessary
+            rh = BigFloatDataRoundingIncrementHelper(v, len)
+            incr = correct_rounding_requires_increment(rh, rm, sb)
+
+            # exponent
+            exp_field = max(exp_diff, 0) + is_normal
+
+            ieee754_representation(T, sb, exp_field, signif) + incr
+        else
+            ieee754_representation(T, sb, Val(:omega))
+        end
+    else
+        if is_zero | rounds_to_zero
+            ieee754_representation(T, sb, Val(:zero))
+        elseif is_inf | rounds_to_inf
+            ieee754_representation(T, sb, Val(:inf))
+        else
+            ieee754_representation(T, sb, Val(:nan))
+        end
+    end::U
+
+    reinterpret(T, ret_u)
 end
 
+Float16(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::MPFRRoundingMode=rounding_raw(BigFloat)) = to_ieee754(Float64, x, r)
+Float16(x::BigFloat, r::RoundingMode) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::RoundingMode) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::RoundingMode) = to_ieee754(Float64, x, r)
+
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
 promote_rule(::Type{BigFloat}, ::Type{<:AbstractFloat}) = BigFloat
@@ -431,14 +565,14 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
             return z
         end
 
         # Unsigned Integer
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::CulongMax, x::BigFloat) = ($fJ)(x,c)
@@ -446,7 +580,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::ClongMax, x::BigFloat) = ($fJ)(x,c)
@@ -454,7 +588,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::CdoubleMax, x::BigFloat) = ($fJ)(x,c)
@@ -462,7 +596,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         ($fJ)(c::BigInt, x::BigFloat) = ($fJ)(x,c)
@@ -474,50 +608,50 @@ for (fJ, fC) in ((:-,:sub), (:/,:div))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
             return z
         end
 
         # Unsigned Int
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::CulongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:ui_,fC)), libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:ui_,fC)), libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::ClongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:si_,fC)), libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:si_,fC)), libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(c::CdoubleMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:d_,fC)), libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:d_,fC)), libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
             return z
         end
 
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, rounding_raw(BigFloat))
             return z
         end
         # no :mpfr_z_div function
@@ -526,7 +660,7 @@ end
 
 function -(c::BigInt, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_z_sub, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+    ccall((:mpfr_z_sub, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -534,7 +668,7 @@ inv(x::BigFloat) = one(Clong) / x # faster than fallback one(x)/x
 
 function fma(x::BigFloat, y::BigFloat, z::BigFloat)
     r = BigFloat()
-    ccall(("mpfr_fma",libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, ROUNDING_MODE[])
+    ccall(("mpfr_fma",libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, rounding_raw(BigFloat))
     return r
 end
 
@@ -605,23 +739,23 @@ for (fJ, fC, fI) in ((:+, :add, 0), (:*, :mul, 1))
     @eval begin
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, rounding_raw(BigFloat))
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat, e::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, rounding_raw(BigFloat))
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, rounding_raw(BigFloat))
             return z
         end
     end
@@ -629,14 +763,14 @@ end
 
 function -(x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_neg, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_neg, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     return z
 end
 
 function sqrt(x::BigFloat)
     isnan(x) && return x
     z = BigFloat()
-    ccall((:mpfr_sqrt, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_sqrt, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
     return z
 end
@@ -645,25 +779,25 @@ sqrt(x::BigInt) = sqrt(BigFloat(x))
 
 function ^(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_pow, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::CulongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::ClongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function ^(x::BigFloat, y::BigInt)
     z = BigFloat()
-    ccall((:mpfr_pow_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -673,7 +807,7 @@ end
 for f in (:exp, :exp2, :exp10, :expm1, :cosh, :sinh, :tanh, :sech, :csch, :coth, :cbrt)
     @eval function $f(x::BigFloat)
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
         return z
     end
 end
@@ -681,7 +815,7 @@ end
 function sincos_fast(v::BigFloat)
     s = BigFloat()
     c = BigFloat()
-    ccall((:mpfr_sin_cos, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, ROUNDING_MODE[])
+    ccall((:mpfr_sin_cos, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, rounding_raw(BigFloat))
     return (s, c)
 end
 sincos(v::BigFloat) = sincos_fast(v)
@@ -689,18 +823,18 @@ sincos(v::BigFloat) = sincos_fast(v)
 # return log(2)
 function big_ln2()
     c = BigFloat()
-    ccall((:mpfr_const_log2, libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.ROUNDING_MODE[])
+    ccall((:mpfr_const_log2, libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.rounding_raw(BigFloat))
     return c
 end
 
 function ldexp(x::BigFloat, n::Clong)
     z = BigFloat()
-    ccall((:mpfr_mul_2si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, rounding_raw(BigFloat))
     return z
 end
 function ldexp(x::BigFloat, n::Culong)
     z = BigFloat()
-    ccall((:mpfr_mul_2ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, rounding_raw(BigFloat))
     return z
 end
 ldexp(x::BigFloat, n::ClongMax) = ldexp(x, convert(Clong, n))
@@ -713,13 +847,13 @@ function factorial(x::BigFloat)
     end
     ui = convert(Culong, x)
     z = BigFloat()
-    ccall((:mpfr_fac_ui, libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, ROUNDING_MODE[])
+    ccall((:mpfr_fac_ui, libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, rounding_raw(BigFloat))
     return z
 end
 
 function hypot(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_hypot, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_hypot, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -731,7 +865,7 @@ for f in (:log, :log2, :log10)
                               "with a complex argument. Try ", $f, "(complex(x)).")))
         end
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
         return z
     end
 end
@@ -743,7 +877,7 @@ function log1p(x::BigFloat)
                           "with a complex argument. Try log1p(complex(x)).")))
     end
     z = BigFloat()
-    ccall((:mpfr_log1p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_log1p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -767,19 +901,19 @@ end
 function modf(x::BigFloat)
     zint = BigFloat()
     zfloat = BigFloat()
-    ccall((:mpfr_modf, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, ROUNDING_MODE[])
+    ccall((:mpfr_modf, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, rounding_raw(BigFloat))
     return (zfloat, zint)
 end
 
 function rem(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_fmod, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_fmod, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
 function rem(x::BigFloat, y::BigFloat, ::RoundingMode{:Nearest})
     z = BigFloat()
-    ccall((:mpfr_remainder, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_remainder, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -790,7 +924,7 @@ function sum(arr::AbstractArray{BigFloat})
     z = BigFloat(0)
     for i in arr
         ccall((:mpfr_add, libmpfr), Int32,
-            (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, i, ROUNDING_MODE[])
+            (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, i, rounding_raw(BigFloat))
     end
     return z
 end
@@ -801,7 +935,7 @@ for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :at
         function ($f)(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
@@ -811,7 +945,7 @@ sincospi(x::BigFloat) = (sinpi(x), cospi(x))
 
 function atan(y::BigFloat, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_atan2, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, ROUNDING_MODE[])
+    ccall((:mpfr_atan2, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, rounding_raw(BigFloat))
     return z
 end
 
@@ -821,14 +955,14 @@ for f in (:sin, :cos, :tan)
         function ($(Symbol(f,:d)))(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
         function ($(Symbol(:a,f,:d)))(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_a,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_a,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, rounding_raw(BigFloat))
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
@@ -836,7 +970,7 @@ for f in (:sin, :cos, :tan)
 end
 function atand(y::BigFloat, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_atan2u, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, y, x, 360, ROUNDING_MODE[])
+    ccall((:mpfr_atan2u, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, y, x, 360, rounding_raw(BigFloat))
     return z
 end
 
@@ -885,19 +1019,27 @@ cmp(x::CdoubleMax, y::BigFloat) = -cmp(y,x)
 <=(x::BigFloat, y::CdoubleMax) = !isnan(x) && !isnan(y) && cmp(x,y) <= 0
 <=(x::CdoubleMax, y::BigFloat) = !isnan(x) && !isnan(y) && cmp(y,x) >= 0
 
-signbit(x::BigFloat) = ccall((:mpfr_signbit, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+# Note: this inlines the implementation of `mpfr_signbit` to avoid a
+# `ccall`.
+signbit(x::BigFloat) = signbit(x.sign)
+
 function sign(x::BigFloat)
     c = cmp(x, 0)
     (c == 0 || isnan(x)) && return x
     return c < 0 ? -one(x) : one(x)
 end
 
-function _precision(x::BigFloat)  # precision of an object of type BigFloat
+function _precision_with_base_2(x::BigFloat)  # precision of an object of type BigFloat
     return ccall((:mpfr_get_prec, libmpfr), Clong, (Ref{BigFloat},), x)
 end
 precision(x::BigFloat; base::Integer=2) = _precision(x, base)
 
-_precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # default precision of the type BigFloat itself
+
+_convert_precision_from_base(precision::Integer, base::Integer) =
+    base == 2 ? precision : ceil(Int, precision * log2(base))
+
+_precision_with_base_2(::Type{BigFloat}) =
+    Int(something(Base.ScopedValues.get(CURRENT_PRECISION), DEFAULT_PRECISION[])) # default precision of the type BigFloat itself
 
 """
     setprecision([T=BigFloat,] precision::Int; base=2)
@@ -918,7 +1060,7 @@ at least `precision` digits in the given `base`.
 function setprecision(::Type{BigFloat}, precision::Integer; base::Integer=2)
     base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
     precision > 0 || throw(DomainError(precision, "`precision` cannot be less than 1."))
-    DEFAULT_PRECISION[] = base == 2 ? precision : ceil(Int, precision * log2(base))
+    DEFAULT_PRECISION[] = _convert_precision_from_base(precision, base)
     return precision
 end
 
@@ -929,7 +1071,7 @@ maxintfloat(::Type{BigFloat}) = BigFloat(2)^precision(BigFloat)
 
 function copysign(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_copysign, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_copysign, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, rounding_raw(BigFloat))
     return z
 end
 
@@ -944,16 +1086,16 @@ end
 function frexp(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, rounding_raw(BigFloat))
     return (z, c[])
 end
 
 function significand(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, rounding_raw(BigFloat))
     # Double the significand to make it work as Base.significand
-    ccall((:mpfr_mul_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, ROUNDING_MODE[])
+    ccall((:mpfr_mul_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, rounding_raw(BigFloat))
     return z
 end
 
@@ -976,16 +1118,16 @@ for (f,R) in ((:roundeven, :Nearest),
 end
 
 function isinf(x::BigFloat)
-    return ccall((:mpfr_inf_p, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_inf
 end
 
 function isnan(x::BigFloat)
-    return ccall((:mpfr_nan_p, libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_nan
 end
 
 isfinite(x::BigFloat) = !isinf(x) && !isnan(x)
 
-iszero(x::BigFloat) = x == Clong(0)
+iszero(x::BigFloat) = x.exp == mpfr_special_exponent_zero
 isone(x::BigFloat) = x == Clong(1)
 
 @eval typemax(::Type{BigFloat}) = $(BigFloat(Inf))
@@ -1034,14 +1176,8 @@ Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
 !!! compat "Julia 1.8"
     The `base` keyword requires at least Julia 1.8.
 """
-function setprecision(f::Function, ::Type{T}, prec::Integer; kws...) where T
-    old_prec = precision(T)
-    setprecision(T, prec; kws...)
-    try
-        return f()
-    finally
-        setprecision(T, old_prec)
-    end
+function setprecision(f::Function, ::Type{BigFloat}, prec::Integer; base::Integer=2)
+    Base.ScopedValues.@with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
 end
 
 setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
@@ -1125,13 +1261,11 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong,
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
     get!(stackdict, x) do
-        # d = copy(x._d)
-        d = x._d
-        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        d′ = copy(getfield(x, :d))
+        y = _BigFloat(d′)
         #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
-    end
+    end::BigFloat
 end
 
 function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
@@ -1142,7 +1276,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
     s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
     b = s.size * sizeof(Limb)            # bytes
     ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    memcpy(s.d, x.d, b)
+    xd = x.d
+    GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b)
     s, x.exp - 8b, x.sign
 end
 
@@ -1154,7 +1289,8 @@ end
 # flags
 clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
 had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
-had_overflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
+had_overflow() = ccall((:mpfr_overflow_p, libmpfr), Cint, ()) != 0
+had_divbyzero() = ccall((:mpfr_divby0_p, libmpfr), Cint, ()) != 0
 had_nan() = ccall((:mpfr_nanflag_p, libmpfr), Cint, ()) != 0
 had_inexact_exception() = ccall((:mpfr_inexflag_p, libmpfr), Cint, ()) != 0
 had_range_exception() = ccall((:mpfr_erangeflag_p, libmpfr), Cint, ()) != 0
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index ba4e6eb12695a..ba08f0679590b 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -4,12 +4,12 @@
 module IteratorsMD
     import .Base: eltype, length, size, first, last, in, getindex, setindex!,
                   min, max, zero, oneunit, isless, eachindex,
-                  convert, show, iterate, promote_rule
+                  convert, show, iterate, promote_rule, to_indices, copy
 
     import .Base: +, -, *, (:)
     import .Base: simd_outer_range, simd_inner_length, simd_index, setindex
-    import .Base: to_indices, to_index, _to_indices1, _cutdim
-    using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail,
+    using .Base: to_index, fill_to_length, tail, safe_tail
+    using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex,
         ReshapedArray, ReshapedArrayLF, OneTo, Fix1
     using .Base.Iterators: Reverse, PartitionIterator
     using .Base: @propagate_inbounds
@@ -83,6 +83,7 @@ module IteratorsMD
     CartesianIndex{N}(index::Integer...) where {N} = CartesianIndex{N}(index)
     CartesianIndex{N}() where {N} = CartesianIndex{N}(())
     # Un-nest passed CartesianIndexes
+    CartesianIndex{N}(index::CartesianIndex{N}) where {N} = index
     CartesianIndex(index::Union{Integer, CartesianIndex}...) = CartesianIndex(flatten(index))
     flatten(::Tuple{}) = ()
     flatten(I::Tuple{Any}) = Tuple(I[1])
@@ -114,6 +115,7 @@ module IteratorsMD
     oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(1), Val(N)))
 
     # arithmetic, min/max
+    @inline (+)(index::CartesianIndex) = index
     @inline (-)(index::CartesianIndex{N}) where {N} =
         CartesianIndex{N}(map(-, index.I))
     @inline (+)(index1::CartesianIndex{N}, index2::CartesianIndex{N}) where {N} =
@@ -166,6 +168,19 @@ module IteratorsMD
     Base.iterate(::CartesianIndex) =
         error("iteration is deliberately unsupported for CartesianIndex. Use `I` rather than `I...`, or use `Tuple(I)...`")
 
+    # ranges are deliberately disabled to prevent ambiguities with the colon constructor
+    Base.range_start_step_length(start::CartesianIndex, step::CartesianIndex, len::Integer) =
+        error("range with a specified length is deliberately unsupported for CartesianIndex arguments."*
+            " Use StepRangeLen($start, $step, $len) to construct this range")
+
+    # show is special-cased to avoid the start:stop:step display,
+    # which constructs a CartesianIndices
+    # See #50784
+    function show(io::IO, r::StepRangeLen{<:CartesianIndex})
+        print(io, "StepRangeLen(", first(r), ", ",
+                    step(r), ", ", length(r), ")")
+    end
+
     # Iteration
     const OrdinalRangeInt = OrdinalRange{Int, Int}
     """
@@ -267,7 +282,7 @@ module IteratorsMD
     CartesianIndices(A::AbstractArray) = CartesianIndices(axes(A))
 
     _convert2ind(sz::Bool) = Base.OneTo(Int8(sz))
-    _convert2ind(sz::Integer) = Base.OneTo(sz)
+    _convert2ind(sz::Integer) = Base.oneto(sz)
     _convert2ind(sz::AbstractUnitRange) = first(sz):last(sz)
     _convert2ind(sz::OrdinalRange) = first(sz):step(sz):last(sz)
 
@@ -351,7 +366,7 @@ module IteratorsMD
     end
 
     # getindex for a 0D CartesianIndices is necessary for disambiguation
-    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+    @inline function Base.getindex(iter::CartesianIndices{0,R}) where {R}
         CartesianIndex()
     end
     @inline function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
@@ -424,29 +439,19 @@ module IteratorsMD
     @inline function __inc(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] + step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != last(rng)
-        return valid, (I, )
+        valid = state[1] != last(rng)
+        return valid, (I,)
     end
     @inline function __inc(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] + step(rng)
-        if __is_valid_range(I, rng) && state[1] != last(rng)
+        if state[1] != last(rng)
             return true, (I, tail(state)...)
         end
         valid, I = __inc(tail(state), tail(indices))
         return valid, (first(rng), I...)
     end
 
-    @inline __is_valid_range(I, rng::AbstractUnitRange) = I in rng
-    @inline function __is_valid_range(I, rng::OrdinalRange)
-        if step(rng) > 0
-            lo, hi = first(rng), last(rng)
-        else
-            lo, hi = last(rng), first(rng)
-        end
-        lo <= I <= hi
-    end
-
     # 0-d cartesian ranges are special-cased to iterate once and only once
     iterate(iter::CartesianIndices{0}, done=false) = done ? nothing : (CartesianIndex(), true)
 
@@ -461,15 +466,19 @@ module IteratorsMD
     last(iter::CartesianIndices)  = CartesianIndex(map(last, iter.indices))
 
     # When used as indices themselves, CartesianIndices can simply become its tuple of ranges
-    _to_indices1(A, inds, I1::CartesianIndices) = map(Fix1(to_index, A), I1.indices)
-    _cutdim(inds::Tuple, I1::CartesianIndices) = split(inds, Val(ndims(I1)))[2]
-
+    @inline function to_indices(A, inds, I::Tuple{CartesianIndices{N}, Vararg}) where N
+        _, indstail = split(inds, Val(N))
+        (map(Fix1(to_index, A), I[1].indices)..., to_indices(A, indstail, tail(I))...)
+    end
     # but preserve CartesianIndices{0} as they consume a dimension.
-    _to_indices1(A, inds, I1::CartesianIndices{0}) = (I1,)
+    @inline to_indices(A, inds, I::Tuple{CartesianIndices{0}, Vararg}) =
+        (first(I), to_indices(A, inds, tail(I))...)
 
     @inline in(i::CartesianIndex, r::CartesianIndices) = false
     @inline in(i::CartesianIndex{N}, r::CartesianIndices{N}) where {N} = all(map(in, i.I, r.indices))
 
+    copy(iter::CartesianIndices) = iter
+
     simd_outer_range(iter::CartesianIndices{0}) = iter
     function simd_outer_range(iter::CartesianIndices)
         CartesianIndices(tail(iter.indices))
@@ -556,13 +565,13 @@ module IteratorsMD
     @inline function __dec(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] - step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != first(rng)
+        valid = state[1] != first(rng)
         return valid, (I,)
     end
     @inline function __dec(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] - step(rng)
-        if __is_valid_range(I, rng) && state[1] != first(rng)
+        if state[1] != first(rng)
             return true, (I, tail(state)...)
         end
         valid, I = __dec(tail(state), tail(indices))
@@ -583,7 +592,7 @@ module IteratorsMD
         else
             # Given the fact that StepRange 1:2:4 === 1:2:3, we lost the original size information
             # and thus cannot calculate the correct linear indices when the steps are not 1.
-            throw(ArgumentError("LinearIndices for $(typeof(inds)) with non-1 step size is not yet supported."))
+            throw(ArgumentError(LazyString("LinearIndices for ", typeof(inds), " with non-1 step size is not yet supported.")))
         end
     end
 
@@ -606,6 +615,8 @@ module IteratorsMD
     # array operations
     Base.intersect(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
         CartesianIndices(intersect.(a.indices, b.indices))
+    Base.issubset(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
+        isempty(a) || all(map(issubset, a.indices, b.indices))
 
     # Views of reshaped CartesianIndices are used for partitions — ensure these are fast
     const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,<:Tuple{AbstractUnitRange{Int}},false}
@@ -676,19 +687,53 @@ end  # IteratorsMD
 
 using .IteratorsMD
 
+# from genericmemory.jl:
+## generate vararg methods for atomic indexing
+for ex in (
+    :(getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)),
+    :(setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(setindexonce_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, val, i::Int)),
+    :(modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)),
+    :(swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)),
+    :(replaceindex_atomic!(mem::GenericMemory, success_order::Symbol, fail_order::Symbol, expected, desired, i::Int,)),
+)
+    fn = ex.args[1]
+    args = ex.args[2:end-1]
+
+    @eval begin
+        function $fn($(args...), i::Union{Integer,CartesianIndex}...)
+            return $fn($(args...), CartesianIndex(to_indices($(args[1]), i)))
+        end
+
+        function $fn($(args...), i::CartesianIndex)
+            return $fn($(args...), Tuple(i)...)
+        end
+
+        function $fn($(args...), i::Integer...)
+            idcs = to_indices($(args[1]), i)
+            S = IndexStyle($(args[1]))
+            if isa(S, IndexLinear)
+                return $fn($(args...), _to_linear_index($(args[1]), idcs...))
+            else
+                return $fn($(args...), _to_subscript_indices($(args[1]), idcs...))
+            end
+        end
+    end
+end
+
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
-function checkbounds(::Type{Bool}, A::AbstractArray, i::Union{CartesianIndex, AbstractArray{<:CartesianIndex}})
-    @inline
+@inline checkbounds(::Type{Bool}, A::AbstractArray, i::CartesianIndex) =
     checkbounds_indices(Bool, axes(A), (i,))
+# Here we try to consume N of the indices (if there are that many available)
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{CartesianIndex,Vararg})
+    inds1, rest = IteratorsMD.split(inds, Val(length(I[1])))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
 end
-
-@inline checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, (), (I[1].I..., tail(I)...))
-@inline checkbounds_indices(::Type{Bool}, IA::Tuple{Any}, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
-@inline checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple{CartesianIndex,Vararg{Any}}) =
-    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
+@inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndex) =
+    checkbounds_indices(Bool, inds, I.I)
+@inline checkindex(::Type{Bool}, inds::Tuple, i::AbstractRange{<:CartesianIndex}) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
 
 # Indexing into Array with mixtures of Integers and CartesianIndices is
 # extremely performance-sensitive. While the abstract fallbacks support this,
@@ -698,45 +743,17 @@ end
 @propagate_inbounds setindex!(A::Array, v, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...) =
     (A[to_indices(A, (i1, I...))...] = v; A)
 
-# Support indexing with an array of CartesianIndex{N}s
+## Bounds-checking with arrays of CartesianIndex{N}
+# Disallow linear indexing with an array of CartesianIndex{N}
+@inline checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractArray{CartesianIndex{N}}) where {N} =
+    checkbounds_indices(Bool, axes(A), (i,))
 # Here we try to consume N of the indices (if there are that many available)
-# The first two simply handle ambiguities
-@inline function checkbounds_indices(::Type{Bool}, ::Tuple{},
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    checkindex(Bool, (), I[1]) & checkbounds_indices(Bool, (), tail(I))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{Any},
-        I::Tuple{AbstractArray{CartesianIndex{0}},Vararg{Any}})
-    checkbounds_indices(Bool, IA, tail(I))
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{AbstractArray{CartesianIndex{N}},Vararg}) where N
+    inds1, rest = IteratorsMD.split(inds, Val(N))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
 end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{Any},
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    checkindex(Bool, IA, I[1]) & checkbounds_indices(Bool, (), tail(I))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple,
-        I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}}) where N
-    IA1, IArest = IteratorsMD.split(IA, Val(N))
-    checkindex(Bool, IA1, I[1]) & checkbounds_indices(Bool, IArest, tail(I))
-end
-
-
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{},
-    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
-    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
-end
-@inline function checkbounds_indices(::Type{Bool}, IA::Tuple,
-    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
-    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
-end
-
-function checkindex(::Type{Bool}, inds::Tuple, I::AbstractArray{<:CartesianIndex})
-    b = true
-    for i in I
-        b &= checkbounds_indices(Bool, inds, (i,))
-    end
-    b
-end
-checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) = all(checkindex.(Bool, inds, I.indices))
+@inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) =
+    checkbounds_indices(Bool, inds, I.indices)
 
 # combined count of all indices, including CartesianIndex and
 # AbstractArray{CartesianIndex}
@@ -810,11 +827,11 @@ end
     n = s[1]
     n > length(L) && return nothing
     #unroll once to help inference, cf issue #29418
-    idx, i = iterate(tail(s)...)
+    idx, i = iterate(tail(s)...)::Tuple{Any,Any}
     s = (n+1, s[2], i)
     L.mask[idx] && return (idx, s)
     while true
-        idx, i = iterate(tail(s)...)
+        idx, i = iterate(tail(s)...)::Tuple{Any,Any}
         s = (n+1, s[2], i)
         L.mask[idx] && return (idx, s)
     end
@@ -844,11 +861,29 @@ end
     return eltype(L)(i1, irest...), (i1 - tz, Bi, irest, c)
 end
 
-@inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex{<:Any,<:AbstractArray{Bool,1}}) =
-    eachindex(IndexLinear(), A) == eachindex(IndexLinear(), I.mask)
-@inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex) = axes(A) == axes(I.mask)
-@inline checkindex(::Type{Bool}, indx::AbstractUnitRange, I::LogicalIndex) = (indx,) == axes(I.mask)
-checkindex(::Type{Bool}, inds::Tuple, I::LogicalIndex) = checkbounds_indices(Bool, inds, axes(I.mask))
+## Boundscheck for Logicalindex
+# LogicalIndex: map all calls to mask
+checkbounds(::Type{Bool}, A::AbstractArray, i::LogicalIndex) = checkbounds(Bool, A, i.mask)
+# `checkbounds_indices` has been handled via `I::AbstractArray` fallback
+checkindex(::Type{Bool}, inds::AbstractUnitRange, i::LogicalIndex) = checkindex(Bool, inds, i.mask)
+checkindex(::Type{Bool}, inds::Tuple, i::LogicalIndex) = checkindex(Bool, inds, i.mask)
+
+## Boundscheck for AbstractArray{Bool}
+# Disallow linear indexing with AbstractArray{Bool}
+checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractArray{Bool}) =
+    checkbounds_indices(Bool, axes(A), (i,))
+# But allow linear indexing with AbstractVector{Bool}
+checkbounds(::Type{Bool}, A::AbstractArray, i::AbstractVector{Bool}) =
+    checkindex(Bool, eachindex(IndexLinear(), A), i)
+@inline function checkbounds_indices(::Type{Bool}, inds::Tuple, I::Tuple{AbstractArray{Bool},Vararg})
+    inds1, rest = IteratorsMD.split(inds, Val(ndims(I[1])))
+    checkindex(Bool, inds1, I[1]) & checkbounds_indices(Bool, rest, tail(I))
+end
+checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractVector{Bool}) = axes1(I) == inds
+checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractRange{Bool}) = axes1(I) == inds
+checkindex(::Type{Bool}, inds::Tuple, I::AbstractArray{Bool}) = _check_boolean_axes(inds, axes(I))
+_check_boolean_axes(inds::Tuple, axes::Tuple) = (inds[1] == axes[1]) & _check_boolean_axes(tail(inds), tail(axes))
+_check_boolean_axes(::Tuple{}, axes::Tuple) = all(==(OneTo(1)), axes)
 
 ensure_indexable(I::Tuple{}) = ()
 @inline ensure_indexable(I::Tuple{Any, Vararg{Any}}) = (I[1], ensure_indexable(tail(I))...)
@@ -859,24 +894,54 @@ ensure_indexable(I::Tuple{}) = ()
 @inline to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}}) = to_indices(A, (), I)
 # But some index types require more context spanning multiple indices
 # CartesianIndex is unfolded outside the inner to_indices for better inference
-_to_indices1(A, inds, I1::CartesianIndex) = map(Fix1(to_index, A), I1.I)
-_cutdim(inds, I1::CartesianIndex) = IteratorsMD.split(inds, Val(length(I1)))[2]
+@inline function to_indices(A, inds, I::Tuple{CartesianIndex{N}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (map(Fix1(to_index, A), I[1].I)..., to_indices(A, indstail, tail(I))...)
+end
 # For arrays of CartesianIndex, we just skip the appropriate number of inds
-_cutdim(inds, I1::AbstractArray{CartesianIndex{N}}) where {N} = IteratorsMD.split(inds, Val(N))[2]
+@inline function to_indices(A, inds, I::Tuple{AbstractArray{CartesianIndex{N}}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (to_index(A, I[1]), to_indices(A, indstail, tail(I))...)
+end
 # And boolean arrays behave similarly; they also skip their number of dimensions
-_cutdim(inds::Tuple, I1::AbstractArray{Bool}) = IteratorsMD.split(inds, Val(ndims(I1)))[2]
-# As an optimization, we allow trailing Array{Bool} and BitArray to be linear over trailing dimensions
-@inline to_indices(A, inds, I::Tuple{Union{Array{Bool,N}, BitArray{N}}}) where {N} =
-    (_maybe_linear_logical_index(IndexStyle(A), A, I[1]),)
+@inline function to_indices(A, inds, I::Tuple{AbstractArray{Bool, N}, Vararg}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (to_index(A, I[1]), to_indices(A, indstail, tail(I))...)
+end
+# As an optimization, we allow the only `AbstractArray{Bool}` to be linear-iterated
+@inline to_indices(A, I::Tuple{AbstractArray{Bool}}) = (_maybe_linear_logical_index(IndexStyle(A), A, I[1]),)
 _maybe_linear_logical_index(::IndexStyle, A, i) = to_index(A, i)
 _maybe_linear_logical_index(::IndexLinear, A, i) = LogicalIndex{Int}(i)
 
 # Colons get converted to slices by `uncolon`
-_to_indices1(A, inds, I1::Colon) = (uncolon(inds),)
+@inline to_indices(A, inds, I::Tuple{Colon, Vararg}) =
+    (uncolon(inds), to_indices(A, Base.safe_tail(inds), tail(I))...)
 
 uncolon(::Tuple{}) = Slice(OneTo(1))
 uncolon(inds::Tuple) = Slice(inds[1])
 
+"""
+    _prechecked_iterate(iter[, state])
+
+Internal function used to eliminate the dead branch in `iterate`.
+Fallback to `iterate` by default, but optimized for indices type in `Base`.
+"""
+@propagate_inbounds _prechecked_iterate(iter) = iterate(iter)
+@propagate_inbounds _prechecked_iterate(iter, state) = iterate(iter, state)
+
+_prechecked_iterate(iter::AbstractUnitRange, i = first(iter)) = i, convert(eltype(iter), i + step(iter))
+_prechecked_iterate(iter::LinearIndices, i = first(iter)) = i, i + 1
+_prechecked_iterate(iter::CartesianIndices) = first(iter), first(iter)
+function _prechecked_iterate(iter::CartesianIndices, i)
+    i′ = IteratorsMD.inc(i.I, iter.indices)
+    return i′, i′
+end
+_prechecked_iterate(iter::SCartesianIndices2) = first(iter), first(iter)
+function _prechecked_iterate(iter::SCartesianIndices2{K}, (;i, j)) where {K}
+    I = i < K ? SCartesianIndex2{K}(i + 1, j) : SCartesianIndex2{K}(1, j + 1)
+    return I, I
+end
+
 ### From abstractarray.jl: Internal multidimensional indexing definitions ###
 getindex(x::Union{Number,AbstractChar}, ::CartesianIndex{0}) = x
 getindex(t::Tuple,  i::CartesianIndex{1}) = getindex(t, i.I[1])
@@ -908,14 +973,11 @@ function _generate_unsafe_getindex!_body(N::Int)
     quote
         @inline
         D = eachindex(dest)
-        Dy = iterate(D)
+        Dy = _prechecked_iterate(D)
         @inbounds @nloops $N j d->I[d] begin
-            # This condition is never hit, but at the moment
-            # the optimizer is not clever enough to split the union without it
-            Dy === nothing && return dest
-            (idx, state) = Dy
+            (idx, state) = Dy::NTuple{2,Any}
             dest[idx] = @ncall $N getindex src j
-            Dy = iterate(D, state)
+            Dy = _prechecked_iterate(D, state)
         end
         return dest
     end
@@ -951,14 +1013,12 @@ function _generate_unsafe_setindex!_body(N::Int)
         @nexprs $N d->(I_d = unalias(A, I[d]))
         idxlens = @ncall $N index_lengths I
         @ncall $N setindex_shape_check x′ (d->idxlens[d])
-        Xy = iterate(x′)
+        X = eachindex(x′)
+        Xy = _prechecked_iterate(X)
         @inbounds @nloops $N i d->I_d begin
-            # This is never reached, but serves as an assumption for
-            # the optimizer that it does not need to emit error paths
-            Xy === nothing && break
-            (val, state) = Xy
-            @ncall $N setindex! A val i
-            Xy = iterate(x′, state)
+            (idx, state) = Xy::NTuple{2,Any}
+            @ncall $N setindex! A x′[idx] i
+            Xy = _prechecked_iterate(X, state)
         end
         A
     end
@@ -1025,25 +1085,34 @@ end
 
 ### from abstractarray.jl
 
-# In the common case where we have two views into the same parent, aliasing checks
-# are _much_ easier and more important to get right
-function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P}
-    if !_parentsmatch(A.parent, B.parent)
-        # We cannot do any better than the usual dataids check
-        return !_isdisjoint(dataids(A), dataids(B))
-    end
-    # Now we know that A.parent === B.parent. This means that the indices of A
-    # and B are the same length and indexing into the same dimensions. We can
-    # just walk through them and check for overlaps: O(ndims(A)). We must finally
-    # ensure that the indices don't alias with either parent
-    return _indicesmightoverlap(A.indices, B.indices) ||
-        !_isdisjoint(dataids(A.parent), _splatmap(dataids, B.indices)) ||
-        !_isdisjoint(dataids(B.parent), _splatmap(dataids, A.indices))
+function mightalias(A::SubArray, B::SubArray)
+    # There are three ways that SubArrays might _problematically_ alias one another:
+    #   1. The parents are the same we can conservatively check if the indices might overlap OR
+    #   2. The parents alias eachother in a more complicated manner (and we can't trace indices) OR
+    #   3. One's parent is used in the other's indices
+    # Note that it's ok for just the indices to alias each other as those should not be mutated,
+    # so we can always do better than the default !_isdisjoint(dataids(A), dataids(B))
+    if isbits(A.parent) || isbits(B.parent)
+        return false # Quick out for immutables
+    elseif _parentsmatch(A.parent, B.parent)
+        # Each SubArray unaliases its own parent from its own indices upon construction, so if
+        # the two parents are the same, then by construction one cannot alias the other's indices
+        # and therefore this is the only test we need to perform:
+        return _indicesmightoverlap(A.indices, B.indices)
+    else
+        A_parent_ids = dataids(A.parent)
+        B_parent_ids = dataids(B.parent)
+        return !_isdisjoint(A_parent_ids, B_parent_ids) ||
+            !_isdisjoint(A_parent_ids, _splatmap(dataids, B.indices)) ||
+            !_isdisjoint(B_parent_ids, _splatmap(dataids, A.indices))
+    end
 end
+# Test if two arrays are backed by exactly the same memory in exactly the same order
 _parentsmatch(A::AbstractArray, B::AbstractArray) = A === B
-# Two reshape(::Array)s of the same size aren't `===` because they have different headers
-_parentsmatch(A::Array, B::Array) = pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::DenseArray, B::DenseArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && size(A) == size(B)
+_parentsmatch(A::StridedArray, B::StridedArray) = elsize(A) == elsize(B) && pointer(A) == pointer(B) && strides(A) == strides(B)
 
+# Given two SubArrays with the same parent, check if the indices might overlap (returning true if unsure)
 _indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true
 _indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray")
 _indicesmightoverlap(A::Tuple, B::Tuple{}) = error("malformed subarray")
@@ -1179,8 +1248,7 @@ circshift!(dest::AbstractArray, src, ::Tuple{}) = copyto!(dest, src)
 Circularly shift, i.e. rotate, the data in `src`, storing the result in
 `dest`. `shifts` specifies the amount to shift in each dimension.
 
-The `dest` array must be distinct from the `src` array (they cannot
-alias each other).
+$(_DOCS_ALIASING_WARNING)
 
 See also [`circshift`](@ref).
 """
@@ -1238,21 +1306,23 @@ their indices; any offset results in a (circular) wraparound. If the
 arrays have overlapping indices, then on the domain of the overlap
 `dest` agrees with `src`.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`circshift`](@ref).
 
 # Examples
 ```julia-repl
 julia> src = reshape(Vector(1:16), (4,4))
-4×4 Array{Int64,2}:
+4×4 Matrix{Int64}:
  1  5   9  13
  2  6  10  14
  3  7  11  15
  4  8  12  16
 
-julia> dest = OffsetArray{Int}(undef, (0:3,2:5))
+julia> dest = OffsetArray{Int}(undef, (0:3,2:5));
 
 julia> circcopy!(dest, src)
-OffsetArrays.OffsetArray{Int64,2,Array{Int64,2}} with indices 0:3×2:5:
+4×4 OffsetArray(::Matrix{Int64}, 0:3, 2:5) with eltype Int64 with indices 0:3×2:5:
  8  12  16  4
  5   9  13  1
  6  10  14  2
@@ -1562,19 +1632,23 @@ end
     end
 end
 
-isassigned(a::AbstractArray, i::CartesianIndex) = isassigned(a, Tuple(i)...)
-function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...)
-    isa(i, Tuple{Vararg{Int}}) || return isassigned(A, CartesianIndex(i...))
-    @boundscheck checkbounds(Bool, A, i...) || return false
+@propagate_inbounds isassigned(A::AbstractArray, i::CartesianIndex) = isassigned(A, Tuple(i)...)
+@propagate_inbounds function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...)
+    return isassigned(A, CartesianIndex(to_indices(A, i)))
+end
+@inline function isassigned(A::AbstractArray, i::Integer...)
+    # convert to valid indices, checking for Bool
+    inds = to_indices(A, i)
+    @boundscheck checkbounds(Bool, A, inds...) || return false
     S = IndexStyle(A)
-    ninds = length(i)
+    ninds = length(inds)
     if (isa(S, IndexLinear) && ninds != 1)
-        return @inbounds isassigned(A, _to_linear_index(A, i...))
+        return @inbounds isassigned(A, _to_linear_index(A, inds...))
     elseif (!isa(S, IndexLinear) && ninds != ndims(A))
-        return @inbounds isassigned(A, _to_subscript_indices(A, i...)...)
+        return @inbounds isassigned(A, _to_subscript_indices(A, inds...)...)
     else
        try
-            A[i...]
+            A[inds...]
             true
         catch e
             if isa(e, BoundsError) || isa(e, UndefRefError)
@@ -1599,12 +1673,11 @@ function permutedims(B::StridedArray, perm)
     permutedims!(P, B, perm)
 end
 
-function checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N}
-    indsB = axes(B)
-    length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))"))
+checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N} = checkdims_perm(axes(P), axes(B), perm)
+function checkdims_perm(indsP::NTuple{N, AbstractUnitRange}, indsB::NTuple{N, AbstractUnitRange}, perm) where {N}
+    length(perm) == N || throw(ArgumentError(LazyString("expected permutation of size ", N, ", but length(perm)=", length(perm))))
     isperm(perm) || throw(ArgumentError("input is not a permutation"))
-    indsP = axes(P)
-    for i = 1:length(perm)
+    for i in eachindex(perm)
         indsP[i] == indsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size"))
     end
     nothing
@@ -1613,21 +1686,21 @@ end
 for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArray)]
     @eval @generated function permutedims!(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) where $(V...)
         quote
-            checkdims_perm(P, B, perm)
+            checkdims_perm(axes(P), axes(B), perm)
 
             #calculates all the strides
             native_strides = size_to_strides(1, size(B)...)
-            strides_1 = 0
-            @nexprs $N d->(strides_{d+1} = native_strides[perm[d]])
+            strides = @ntuple $N d->native_strides[perm[d]]
+            strides::NTuple{$N,Integer}
 
             #Creates offset, because indexing starts at 1
-            offset = 1 - sum(@ntuple $N d->strides_{d+1})
+            offset = 1 - reduce(+, strides, init = 0)
 
             sumc = 0
             ind = 1
             @nloops($N, i, P,
-                    d->(sumc += i_d*strides_{d+1}), # PRE
-                    d->(sumc -= i_d*strides_{d+1}), # POST
+                    d->(sumc += i_d*strides[d]), # PRE
+                    d->(sumc -= i_d*strides[d]), # POST
                     begin # BODY
                         @inbounds P[ind] = B[sumc+offset]
                         ind += 1
@@ -1843,7 +1916,7 @@ but the result order will be row-major instead.
 
 # Higher dimensional examples
 ```
-julia> A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2))
+julia> A = [4 3; 2 1 ;;; 'A' 'B'; 'C' 'D']
 2×2×2 Array{Any, 3}:
 [:, :, 1] =
  4  3
@@ -1892,39 +1965,25 @@ julia> sortslices(reshape([5; 4; 3; 2; 1], (1,1,5)), dims=3, by=x->x[1,1])
 ```
 """
 function sortslices(A::AbstractArray; dims::Union{Integer, Tuple{Vararg{Integer}}}, kws...)
-    _sortslices(A, Val{dims}(); kws...)
-end
+    if A isa Matrix && dims isa Integer && dims == 1
+        # TODO: remove once the generic version becomes as fast or faster
+        perm = sortperm(eachslice(A; dims); kws...)
+        return A[perm, :]
+    end
 
-# Works around inference's lack of ability to recognize partial constness
-struct DimSelector{dims, T}
-    A::T
+    B = similar(A)
+    _sortslices!(B, A, Val{dims}(); kws...)
+    B
 end
-DimSelector{dims}(x::T) where {dims, T} = DimSelector{dims, T}(x)
-(ds::DimSelector{dims, T})(i) where {dims, T} = i in dims ? axes(ds.A, i) : (:,)
 
-_negdims(n, dims) = filter(i->!(i in dims), 1:n)
+function _sortslices!(B, A, ::Val{dims}; kws...) where dims
+    ves = vec(eachslice(A; dims))
+    perm = sortperm(ves; kws...)
+    bes = eachslice(B; dims)
 
-function compute_itspace(A, ::Val{dims}) where {dims}
-    negdims = _negdims(ndims(A), dims)
-    axs = Iterators.product(ntuple(DimSelector{dims}(A), ndims(A))...)
-    vec(permutedims(collect(axs), (dims..., negdims...)))
-end
-
-function _sortslices(A::AbstractArray, d::Val{dims}; kws...) where dims
-    itspace = compute_itspace(A, d)
-    vecs = map(its->view(A, its...), itspace)
-    p = sortperm(vecs; kws...)
-    if ndims(A) == 2 && isa(dims, Integer) && isa(A, Array)
-        # At the moment, the performance of the generic version is subpar
-        # (about 5x slower). Hardcode a fast-path until we're able to
-        # optimize this.
-        return dims == 1 ? A[p, :] : A[:, p]
-    else
-        B = similar(A)
-        for (x, its) in zip(p, itspace)
-            B[its...] = vecs[x]
-        end
-        B
+    # TODO for further optimization: traverse in memory order
+    for (slice, i) in zip(eachslice(B; dims), perm)
+        slice .= ves[i]
     end
 end
 
diff --git a/base/multinverses.jl b/base/multinverses.jl
index 21d8e53d2ff83..70033de12fcd8 100644
--- a/base/multinverses.jl
+++ b/base/multinverses.jl
@@ -28,7 +28,7 @@ abstract type  MultiplicativeInverse{T} <: Number end
 # Division of Int32 by 3:
 #   floor((2^32+2)/3 * n/2^32) = floor(n/3 + 2n/(3*2^32))
 # The correction term, 2n/(3*2^32), is strictly less than 1/3 for any
-# nonnegative n::Int32, so this divides any nonnegative Int32 by 3.
+# non-negative n::Int32, so this divides any non-negative Int32 by 3.
 # (When n < 0, we add 1, and one can show that this computes
 # ceil(n/d) = -floor(abs(n)/d).)
 #
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index e489508bc55ea..991c4d35da52f 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -110,26 +110,24 @@ julia> (; t.x)
 """
 Core.NamedTuple
 
-if nameof(@__MODULE__) === :Base
-
-@eval function NamedTuple{names,T}(args::Tuple) where {names, T <: Tuple}
+@eval function (NT::Type{NamedTuple{names,T}})(args::Tuple) where {names, T <: Tuple}
     if length(args) != length(names::Tuple)
         throw(ArgumentError("Wrong number of arguments to named tuple constructor."))
     end
     # Note T(args) might not return something of type T; e.g.
     # Tuple{Type{Float64}}((Float64,)) returns a Tuple{DataType}
-    $(Expr(:splatnew, :(NamedTuple{names,T}), :(T(args))))
+    $(Expr(:splatnew, :NT, :(T(args))))
 end
 
-function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
+function (NT::Type{NamedTuple{names, T}})(nt::NamedTuple) where {names, T <: Tuple}
     if @generated
-        Expr(:new, :(NamedTuple{names, T}),
-             Any[ :(let Tn = fieldtype(T, $n),
+        Expr(:new, :NT,
+             Any[ :(let Tn = fieldtype(NT, $n),
                       ntn = getfield(nt, $(QuoteNode(names[n])))
                       ntn isa Tn ? ntn : convert(Tn, ntn)
                   end) for n in 1:length(names) ]...)
     else
-        NamedTuple{names, T}(map(Fix1(getfield, nt), names))
+        NT(map(Fix1(getfield, nt), names))
     end
 end
 
@@ -145,16 +143,11 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
     end
 end
 
-NamedTuple{names, T}(itr) where {names, T <: Tuple} = NamedTuple{names, T}(T(itr))
-NamedTuple{names}(itr) where {names} = NamedTuple{names}(Tuple(itr))
+(NT::Type{NamedTuple{names, T}})(itr) where {names, T <: Tuple} = NT(T(itr))
+(NT::Type{NamedTuple{names}})(itr) where {names} = NT(Tuple(itr))
 
 NamedTuple(itr) = (; itr...)
 
-# avoids invalidating Union{}(...)
-NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTuple{names, Union{}}, (itr,)))
-
-end # if Base
-
 # Like NamedTuple{names, T} as a constructor, but omits the additional
 # `convert` call, when the types are known to match the fields
 @eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple)
@@ -182,25 +175,24 @@ nextind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)+1
 
 convert(::Type{NT}, nt::NT) where {names, NT<:NamedTuple{names}} = nt
 convert(::Type{NT}, nt::NT) where {names, T<:Tuple, NT<:NamedTuple{names,T}} = nt
+convert(::Type{NT}, t::Tuple) where {NT<:NamedTuple} = (@inline NT(t))::NT
 
-function convert(::Type{NT}, nt::NamedTuple{names}) where {names, T<:Tuple, NT<:NamedTuple{names,T}}
-    if !@isdefined T
-        # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
-        # _tuple_error(NT, nt)
-        T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
-        NT1 = NamedTuple{names, T1}
-    else
-        T1 = T
-        NT1 = NT
-    end
-    return NT1(T1(nt))::NT1::NT
+function convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names}) where {names,T<:Tuple}
+    NT = NamedTuple{names,T}
+    (@inline NT(nt))::NT
 end
 
-if nameof(@__MODULE__) === :Base
-    Tuple(nt::NamedTuple) = (nt...,)
-    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
+function convert(::Type{NT}, nt::NamedTuple{names}) where {names, NT<:NamedTuple{names}}
+    # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
+    # _tuple_error(NT, nt)
+    T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
+    NT1 = NamedTuple{names, T1}
+    return NT1(T1(nt))::NT1::NT
 end
 
+Tuple(nt::NamedTuple) = (nt...,)
+(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
+
 function show(io::IO, t::NamedTuple)
     n = nfields(t)
     for i = 1:n
@@ -269,8 +261,11 @@ function map(f, nt::NamedTuple{names}, nts::NamedTuple...) where names
     NamedTuple{names}(map(f, map(Tuple, (nt, nts...))...))
 end
 
-@assume_effects :total function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+filter(f, xs::NamedTuple) = xs[filter(k -> f(xs[k]), keys(xs))]
+
+function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[an...]
     for n in bn
         if !sym_in(n, an)
@@ -280,18 +275,21 @@ end
     (names...,)
 end
 
-@assume_effects :total function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
-    @nospecialize names a b
+function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
+    @nospecialize
+    @_total_meta
     bn = _nt_names(b)
     return Tuple{Any[ fieldtype(sym_in(names[n], bn) ? b : a, names[n]) for n in 1:length(names) ]...}
 end
 
-@assume_effects :foldable function merge_fallback(@nospecialize(a::NamedTuple), @nospecialize(b::NamedTuple),
-        @nospecialize(an::Tuple{Vararg{Symbol}}), @nospecialize(bn::Tuple{Vararg{Symbol}}))
+function merge_fallback(a::NamedTuple, b::NamedTuple,
+                        an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     names = merge_names(an, bn)
     types = merge_types(names, typeof(a), typeof(b))
     n = length(names)
-    A = Vector{Any}(undef, n)
+    A = Memory{Any}(undef, n)
     for i=1:n
         n = names[i]
         A[i] = getfield(sym_in(n, bn) ? b : a, n)
@@ -299,6 +297,10 @@ end
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+# This is `Experimental.@max_methods 4 function merge end`, which is not
+# defined at this point in bootstrap.
+typeof(function merge end).name.max_methods = UInt8(4)
+
 """
     merge(a::NamedTuple, bs::NamedTuple...)
 
@@ -384,8 +386,9 @@ tail(t::NamedTuple{names}) where names = NamedTuple{tail(names::Tuple)}(t)
 front(t::NamedTuple{names}) where names = NamedTuple{front(names::Tuple)}(t)
 reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
 
-@assume_effects :total function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[]
     for n in an
         if !sym_in(n, bn)
@@ -395,16 +398,20 @@ reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
     (names...,)
 end
 
-@assume_effects :foldable function diff_types(@nospecialize(a::NamedTuple), @nospecialize(names::Tuple{Vararg{Symbol}}))
+function diff_types(a::NamedTuple, names::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     return Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
 end
 
-@assume_effects :foldable function diff_fallback(@nospecialize(a::NamedTuple), @nospecialize(an::Tuple{Vararg{Symbol}}), @nospecialize(bn::Tuple{Vararg{Symbol}}))
+function diff_fallback(a::NamedTuple, an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
     names = diff_names(an, bn)
     isempty(names) && return (;)
     types = diff_types(a, names)
     n = length(names)
-    A = Vector{Any}(undef, n)
+    A = Memory{Any}(undef, n)
     for i=1:n
         n = names[i]
         A[i] = getfield(a, n)
@@ -412,6 +419,24 @@ end
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+"""
+    delete(a::NamedTuple, field::Symbol)
+
+Construct a new named tuple from `a` by removing the named field.
+
+```jldoctest
+julia> Base.delete((a=1, b=2, c=3), :a)
+(b = 2, c = 3)
+
+julia> Base.delete((a=1, b=2, c=3), :b)
+(a = 1, c = 3)
+```
+"""
+@constprop :aggressive function delete(a::NamedTuple{an}, field::Symbol) where {an}
+    names = diff_names(an, (field,))
+    NamedTuple{names}(a)
+end
+
 """
     structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
@@ -512,6 +537,7 @@ Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}}
 
 julia> sum("julia"; init=1)
 ERROR: MethodError: no method matching +(::Char, ::Char)
+The function `+` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   +(::Any, ::Any, ::Any, ::Any...)
diff --git a/base/ntuple.jl b/base/ntuple.jl
index 7391b86154ac4..185c42601280f 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -3,7 +3,7 @@
 # `ntuple`, for constructing tuples of a given length
 
 """
-    ntuple(f::Function, n::Integer)
+    ntuple(f, n::Integer)
 
 Create a tuple of length `n`, computing each element as `f(i)`,
 where `i` is the index of the element.
@@ -14,7 +14,7 @@ julia> ntuple(i -> 2*i, 4)
 (2, 4, 6, 8)
 ```
 """
-@inline function ntuple(f::F, n::Integer) where F
+@inline function ntuple(f::F, n::Int) where F
     # marked inline since this benefits from constant propagation of `n`
     t = n == 0  ? () :
         n == 1  ? (f(1),) :
@@ -30,8 +30,10 @@ julia> ntuple(i -> 2*i, 4)
         _ntuple(f, n)
     return t
 end
+ntuple(f::F, n::Integer) where F = ntuple(f, convert(Int, n)::Int)
 
-function _ntuple(f::F, n) where F
+# `n` should always be an Int (#55790)
+function _ntuple(f::F, n::Int) where F
     @noinline
     (n >= 0) || throw(ArgumentError(LazyString("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
@@ -72,9 +74,10 @@ julia> ntuple(i -> 2*i, Val(4))
     if @generated
         :(@ntuple $N i -> f(i))
     else
-        Tuple(f(i) for i = 1:N)
+        Tuple(f(i) for i = 1:(N::Int))
     end
 end
+typeof(function ntuple end).name.max_methods = UInt8(5)
 
 @inline function fill_to_length(t::Tuple, val, ::Val{_N}) where {_N}
     M = length(t)
@@ -88,3 +91,11 @@ end
         (t..., fill(val, N-M)...)
     end
 end
+
+
+# Specialized extensions for NTuple
+function reverse(t::NTuple{N}) where N
+    ntuple(Val{N}()) do i
+        t[end+1-i]
+    end
+end
diff --git a/base/number.jl b/base/number.jl
index 923fc907d4038..72df50a9c3134 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -287,7 +287,12 @@ map(f, x::Number, ys::Number...) = f(x, ys...)
     zero(x)
     zero(::Type)
 
-Get the additive identity element for the type of `x` (`x` can also specify the type itself).
+Get the additive identity element for `x`. If the additive identity can be deduced
+from the type alone, then a type may be given as an argument to `zero`.
+
+For example, `zero(Int)` will work because the additive identity is the same for all
+instances of `Int`, but `zero(Vector{Int})` is not defined because vectors of different
+lengths have different additive identities.
 
 See also [`iszero`](@ref), [`one`](@ref), [`oneunit`](@ref), [`oftype`](@ref).
 
@@ -311,12 +316,15 @@ zero(::Type{Union{}}, slurp...) = Union{}(0)
 
 """
     one(x)
-    one(T::type)
+    one(T::Type)
 
 Return a multiplicative identity for `x`: a value such that
-`one(x)*x == x*one(x) == x`.  Alternatively `one(T)` can
-take a type `T`, in which case `one` returns a multiplicative
-identity for any `x` of type `T`.
+`one(x)*x == x*one(x) == x`. If the multiplicative identity can
+be deduced from the type alone, then a type may be given as
+an argument to `one` (e.g. `one(Int)` will work because the
+multiplicative identity is the same for all instances of `Int`,
+but `one(Matrix{Int})` is not defined because matrices of
+different shapes have different multiplicative identities.)
 
 If possible, `one(x)` returns a value of the same type as `x`,
 and `one(T)` returns a value of type `T`.  However, this may
@@ -354,9 +362,10 @@ one(::Type{Union{}}, slurp...) = Union{}(1)
     oneunit(x::T)
     oneunit(T::Type)
 
-Return `T(one(x))`, where `T` is either the type of the argument or
-(if a type is passed) the argument.  This differs from [`one`](@ref) for
-dimensionful quantities: `one` is dimensionless (a multiplicative identity)
+Return `T(one(x))`, where `T` is either the type of the argument, or
+the argument itself in cases where the `oneunit` can be deduced from
+the type alone. This differs from [`one`](@ref) for dimensionful
+quantities: `one` is dimensionless (a multiplicative identity)
 while `oneunit` is dimensionful (of the same type as `x`, or of type `T`).
 
 # Examples
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index bb0ae8935b06c..5e38c8523f4a8 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -18,69 +18,30 @@ the argument type may be fixed length even if the function is variadic.
     This interface is experimental and subject to change or removal without notice.
 """
 macro opaque(ex)
-    esc(Expr(:opaque_closure, ex))
+    esc(Expr(:opaque_closure, nothing, nothing, nothing, #= allow_partial =# true, ex))
 end
 
 macro opaque(ty, ex)
-    esc(Expr(:opaque_closure, ty, ex))
-end
-
-# OpaqueClosure construction from pre-inferred CodeInfo/IRCode
-using Core.Compiler: IRCode
-using Core: CodeInfo
-
-function compute_ir_rettype(ir::IRCode)
-    rt = Union{}
-    for i = 1:length(ir.stmts)
-        stmt = ir.stmts[i][:inst]
-        if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
-            rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
+    if Base.isexpr(ty, :->)
+        (AT, body) = ty.args
+        filter!((n)->!isa(n, Core.LineNumberNode), body.args)
+        if !Base.isexpr(body, :block) || length(body.args) != 1
+            error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
         end
+        RT = only(body.args)
+    else
+        error("Opaque closure type must be specified in the form Tuple{T,U...}->RT")
     end
-    return Core.Compiler.widenconst(rt)
+    AT = (AT !== :_) ? AT : nothing
+    RT = (RT !== :_) ? RT : nothing
+    return esc(Expr(:opaque_closure, AT, RT, RT, #= allow_partial =# true, ex))
 end
 
-function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
-    argtypes = Vector{Any}(undef, nargs)
-    for i = 1:nargs
-        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
-    end
-    if isva
-        lastarg = pop!(argtypes)
-        if lastarg <: Tuple
-            append!(argtypes, lastarg.parameters)
-        else
-            push!(argtypes, Vararg{Any})
-        end
-    end
-    return Tuple{argtypes...}
-end
-
-function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
-                            isva::Bool = false,
-                            do_compile::Bool = true)
-    # NOTE: we need ir.argtypes[1] == typeof(env)
-    ir = Core.Compiler.copy(ir)
-    nargs = length(ir.argtypes)-1
-    sig = compute_oc_signature(ir, nargs, isva)
-    rt = compute_ir_rettype(ir)
-    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-    src.slotnames = fill(:none, nargs+1)
-    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
-    src.slottypes = copy(ir.argtypes)
-    src.rettype = rt
-    src = Core.Compiler.ir_to_codeinf!(src, ir)
-    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; do_compile)
-end
+# OpaqueClosure construction from pre-inferred CodeInfo/IRCode
+using Core: CodeInfo, SSAValue
 
-function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...)
-    src.inferred || throw(ArgumentError("Expected inferred src::CodeInfo"))
-    mi = src.parent::Core.MethodInstance
-    sig = Base.tuple_type_tail(mi.specTypes)
-    method = mi.def::Method
-    nargs = method.nargs-1
-    isva = method.isva
-    return generate_opaque_closure(sig, Union{}, src.rettype, src, nargs, isva, env...)
+function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...; rettype, sig, nargs, isva=false, kwargs...)
+    return generate_opaque_closure(sig, Union{}, rettype, src, nargs, isva, env...; kwargs...)
 end
 
 function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nospecialize(rt_ub),
@@ -88,7 +49,8 @@ function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nosp
                                  mod::Module=@__MODULE__,
                                  lineno::Int=0,
                                  file::Union{Nothing,Symbol}=nothing,
-                                 do_compile::Bool=true)
-    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
-        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile)
+                                 do_compile::Bool=true,
+                                 isinferred::Bool=true)
+    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint, Cint),
+        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile, isinferred)
 end
diff --git a/base/operators.jl b/base/operators.jl
index 3f51be737ca5c..d01902e302359 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -3,10 +3,25 @@
 ## types ##
 
 """
-    <:(T1, T2)
+    <:(T1, T2)::Bool
 
-Subtype operator: returns `true` if and only if all values of type `T1` are
-also of type `T2`.
+Subtyping relation, defined between two types. In Julia, a type `S` is said to be a
+*subtype* of a type `T` if and only if we have `S <: T`.
+
+For any type `L` and any type `R`, `L <: R` implies that any value `v` of type `L`
+is also of type `R`. I.e., `(L <: R) && (v isa L)` implies `v isa R`.
+
+The subtyping relation is a *partial order*. I.e., `<:` is:
+
+* *reflexive*: for any type `T`, `T <: T` holds
+
+* *antisymmetric*: for any type `A` and any type `B`, `(A <: B) && (B <: A)`
+  implies `A == B`
+
+* *transitive*: for any type `A`, any type `B` and any type `C`;
+  `(A <: B) && (B <: C)` implies `A <: C`
+
+See also info on [Types](@ref man-types), [`Union{}`](@ref), [`Any`](@ref), [`isa`](@ref).
 
 # Examples
 ```jldoctest
@@ -16,28 +31,56 @@ true
 julia> Vector{Int} <: AbstractArray
 true
 
-julia> Matrix{Float64} <: Matrix{AbstractFloat}
+julia> Matrix{Float64} <: Matrix{AbstractFloat}  # `Matrix` is invariant
 false
+
+julia> Tuple{Float64} <: Tuple{AbstractFloat}    # `Tuple` is covariant
+true
+
+julia> Union{} <: Int  # The bottom type, `Union{}`, subtypes each type.
+true
+
+julia> Union{} <: Float32 <: AbstractFloat <: Real <: Number <: Any  # Operator chaining
+true
 ```
+
+The `<:` keyword also has several syntactic uses which represent the same subtyping relation,
+but which do not execute the operator or return a Bool:
+
+* To specify the lower bound and the upper bound on a parameter of a
+  [`UnionAll`](@ref) type in a [`where`](@ref) statement.
+
+* To specify the lower bound and the upper bound on a (static) parameter of a
+  method, see [Parametric Methods](@ref).
+
+* To define a subtyping relation while declaring a new type, see [`struct`](@ref)
+  and [`abstract type`](@ref).
 """
 (<:)
 
+import Core: >:
+
 """
     >:(T1, T2)
 
 Supertype operator, equivalent to `T2 <: T1`.
 """
-(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+>:
 
 """
-    supertype(T::DataType)
+    supertype(T::Union{DataType, UnionAll})
 
-Return the supertype of DataType `T`.
+Return the direct supertype of type `T`.
+`T` can be a [`DataType`](@ref) or a [`UnionAll`](@ref) type. Does not support
+type [`Union`](@ref)s. Also see info on [Types](@ref man-types).
 
 # Examples
 ```jldoctest
 julia> supertype(Int32)
 Signed
+
+julia> supertype(Vector)
+DenseVector (alias for DenseArray{T, 1} where T)
 ```
 """
 supertype(T::DataType) = (@_total_meta; T.super)
@@ -52,8 +95,9 @@ Generic equality operator. Falls back to [`===`](@ref).
 Should be implemented for all types with a notion of equality, based on the abstract value
 that an instance represents. For example, all numeric types are compared by numeric value,
 ignoring type. Strings are compared as sequences of characters, ignoring encoding.
-For collections, `==` is generally called recursively on all contents,
-though other properties (like the shape for arrays) may also be taken into account.
+Collections of the same type generally compare their key sets, and if those are `==`, then compare the values
+for each of those keys, returning true if all such pairs are `==`.
+Other properties are typically not taken into account (such as the exact type).
 
 This operator follows IEEE semantics for floating-point numbers: `0.0 == -0.0` and
 `NaN != NaN`.
@@ -61,17 +105,18 @@ This operator follows IEEE semantics for floating-point numbers: `0.0 == -0.0` a
 The result is of type `Bool`, except when one of the operands is [`missing`](@ref),
 in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic)).
-For collections, `missing` is returned if at least one of the operands contains
-a `missing` value and all non-missing values are equal.
+Collections generally implement three-valued logic akin to [`all`](@ref), returning
+missing if any operands contain missing values and all other pairs are equal.
 Use [`isequal`](@ref) or [`===`](@ref) to always get a `Bool` result.
 
 # Implementation
 New numeric types should implement this function for two arguments of the new type, and
 handle comparison to other types via promotion rules where possible.
 
-[`isequal`](@ref) falls back to `==`, so new methods of `==` will be used by the
-[`Dict`](@ref) type to compare keys. If your type will be used as a dictionary key, it
-should therefore also implement [`hash`](@ref).
+Equality and hashing are intimately related; two values that are considered [`isequal`](@ref) **must**
+have the same [`hash`](@ref) and by default `isequal` falls back to `==`. If a type customizes the behavior of `==` and/or [`isequal`](@ref),
+then [`hash`](@ref) must be similarly implemented to ensure `isequal` and `hash` agree. `Set`s, `Dict`s, and many other internal
+implementations assume that this invariant holds.
 
 If some type defines `==`, [`isequal`](@ref), and [`isless`](@ref) then it should
 also implement [`<`](@ref) to ensure consistency of comparisons.
@@ -143,7 +188,7 @@ isequal(x::AbstractFloat, y::Real         ) = (isnan(x) & isnan(y)) | signequal(
     isless(x, y)
 
 Test whether `x` is less than `y`, according to a fixed total order (defined together with
-[`isequal`](@ref)). `isless` is not defined on all pairs of values `(x, y)`. However, if it
+[`isequal`](@ref)). `isless` is not defined for pairs `(x, y)` of all types. However, if it
 is defined, it is expected to satisfy the following:
 - If `isless(x, y)` is defined, then so is `isless(y, x)` and `isequal(x, y)`,
   and exactly one of those three yields `true`.
@@ -154,13 +199,13 @@ Values that are normally unordered, such as `NaN`,
 are ordered after regular values.
 [`missing`](@ref) values are ordered last.
 
-This is the default comparison used by [`sort`](@ref).
+This is the default comparison used by [`sort!`](@ref).
 
 # Implementation
 Non-numeric types with a total order should implement this function.
 Numeric types only need to implement it if they have special values such as `NaN`.
 Types with a partial order should implement [`<`](@ref).
-See the documentation on [Alternate orderings](@ref) for how to define alternate
+See the documentation on [Alternate Orderings](@ref) for how to define alternate
 ordering methods that can be used in sorting and related functions.
 
 # Examples
@@ -303,6 +348,7 @@ true
 ===
 const ≡ = ===
 
+import Core: !==
 """
     !==(x, y)
     ≢(x,y)
@@ -320,7 +366,8 @@ julia> a ≢ a
 false
 ```
 """
-!==(@nospecialize(x), @nospecialize(y)) = !(x === y)
+!==
+
 const ≢ = !==
 
 """
@@ -335,6 +382,8 @@ New types with a canonical partial order should implement this function for
 two arguments of the new type.
 Types with a canonical total order should implement [`isless`](@ref) instead.
 
+See also [`isunordered`](@ref).
+
 # Examples
 ```jldoctest
 julia> 'a' < 'b'
@@ -462,13 +511,17 @@ cmp(x::Integer, y::Integer) = ifelse(isless(x, y), -1, ifelse(isless(y, x), 1, 0
 """
     max(x, y, ...)
 
-Return the maximum of the arguments (with respect to [`isless`](@ref)). See also the [`maximum`](@ref) function
-to take the maximum element from a collection.
+Return the maximum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`maximum`](@ref) function to take the maximum element from a collection.
 
 # Examples
 ```jldoctest
 julia> max(2, 5, 1)
 5
+
+julia> max(5, missing, 6)
+missing
 ```
 """
 max(x, y) = ifelse(isless(y, x), x, y)
@@ -476,13 +529,17 @@ max(x, y) = ifelse(isless(y, x), x, y)
 """
     min(x, y, ...)
 
-Return the minimum of the arguments (with respect to [`isless`](@ref)). See also the [`minimum`](@ref) function
-to take the minimum element from a collection.
+Return the minimum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`minimum`](@ref) function to take the minimum element from a collection.
 
 # Examples
 ```jldoctest
 julia> min(2, 5, 1)
 1
+
+julia> min(4, missing, 6)
+missing
 ```
 """
 min(x,y) = ifelse(isless(y, x), y, x)
@@ -1097,40 +1154,55 @@ julia> filter(!isletter, str)
 !(f::ComposedFunction{typeof(!)}) = f.inner #allows !!f === f
 
 """
-    Fix1(f, x)
+    Fix{N}(f, x)
 
-A type representing a partially-applied version of the two-argument function
-`f`, with the first argument fixed to the value "x". In other words,
-`Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+A type representing a partially-applied version of a function `f`, with the argument
+`x` fixed at position `N::Int`. In other words, `Fix{3}(f, x)` behaves similarly to
+`(y1, y2, y3...; kws...) -> f(y1, y2, x, y3...; kws...)`.
 
-See also [`Fix2`](@ref Base.Fix2).
+!!! compat "Julia 1.12"
+    This general functionality requires at least Julia 1.12, while `Fix1` and `Fix2`
+    are available earlier.
+
+!!! note
+    When nesting multiple `Fix`, note that the `N` in `Fix{N}` is _relative_ to the current
+    available arguments, rather than an absolute ordering on the target function. For example,
+    `Fix{1}(Fix{2}(f, 4), 4)` fixes the first and second arg, while `Fix{2}(Fix{1}(f, 4), 4)`
+    fixes the first and third arg.
 """
-struct Fix1{F,T} <: Function
+struct Fix{N,F,T} <: Function
     f::F
     x::T
 
-    Fix1(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix1(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
+    function Fix{N}(f::F, x) where {N,F}
+        if !(N isa Int)
+            throw(ArgumentError(LazyString("expected type parameter in `Fix` to be `Int`, but got `", N, "::", typeof(N), "`")))
+        elseif N < 1
+            throw(ArgumentError(LazyString("expected `N` in `Fix{N}` to be integer greater than 0, but got ", N)))
+        end
+        new{N,_stable_typeof(f),_stable_typeof(x)}(f, x)
+    end
 end
 
-(f::Fix1)(y) = f.f(f.x, y)
+function (f::Fix{N})(args::Vararg{Any,M}; kws...) where {N,M}
+    M < N-1 && throw(ArgumentError(LazyString("expected at least ", N-1, " arguments to `Fix{", N, "}`, but got ", M)))
+    return f.f(args[begin:begin+(N-2)]..., f.x, args[begin+(N-1):end]...; kws...)
+end
 
-"""
-    Fix2(f, x)
+# Special cases for improved constant propagation
+(f::Fix{1})(arg; kws...) = f.f(f.x, arg; kws...)
+(f::Fix{2})(arg; kws...) = f.f(arg, f.x; kws...)
 
-A type representing a partially-applied version of the two-argument function
-`f`, with the second argument fixed to the value "x". In other words,
-`Fix2(f, x)` behaves similarly to `y->f(y, x)`.
 """
-struct Fix2{F,T} <: Function
-    f::F
-    x::T
+Alias for `Fix{1}`. See [`Fix`](@ref Base.Fix).
+"""
+const Fix1{F,T} = Fix{1,F,T}
 
-    Fix2(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix2(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
-end
+"""
+Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).
+"""
+const Fix2{F,T} = Fix{2,F,T}
 
-(f::Fix2)(y) = f.f(y, f.x)
 
 """
     isequal(x)
@@ -1231,7 +1303,7 @@ it into the original function. This is useful as an adaptor to pass a
 multi-argument function in a context that expects a single argument, but passes
 a tuple as that single argument.
 
-# Example usage:
+# Examples
 ```jldoctest
 julia> map(splat(+), zip(1:3,4:6))
 3-element Vector{Int64}:
@@ -1267,8 +1339,7 @@ struct Splat{F} <: Function
     Splat(f) = new{Core.Typeof(f)}(f)
 end
 (s::Splat)(args) = s.f(args...)
-print(io::IO, s::Splat) = print(io, "splat(", s.f, ')')
-show(io::IO, s::Splat) = print(io, s)
+show(io::IO, s::Splat) = (print(io, "splat("); show(io, s.f); print(io, ")"))
 
 ## in and related operators
 
@@ -1285,7 +1356,7 @@ used to implement specialized methods.
 """
 in(x) = Fix2(in, x)
 
-function in(x, itr)
+function in(x, itr::Any)
     anymissing = false
     for y in itr
         v = (y == x)
@@ -1298,6 +1369,30 @@ function in(x, itr)
     return anymissing ? missing : false
 end
 
+# Specialized variant of in for Tuple, which can generate typed comparisons for each element
+# of the tuple, skipping values that are statically known to be != at compile time.
+in(x, itr::Tuple) = _in_tuple(x, itr, false)
+# This recursive function will be unrolled at compiletime, and will not generate separate
+# llvm-compiled specializations for each step of the recursion.
+function _in_tuple(x, @nospecialize(itr::Tuple), anymissing::Bool)
+    @inline
+    # Base case
+    if isempty(itr)
+        return anymissing ? missing : false
+    end
+    # Recursive case
+    v = (itr[1] == x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _in_tuple(x, tail(itr), anymissing)
+end
+
+# fallback to the loop implementation after some number of arguments to avoid inference blowup
+in(x, itr::Any32) = invoke(in, Tuple{Any,Any}, x, itr)
+
 const ∈ = in
 ∉(x, itr) = !∈(x, itr)
 ∉(itr) = Fix2(∉, itr)
@@ -1330,11 +1425,15 @@ a function equivalent to `y -> item in y`.
 
 Determine whether an item is in the given collection, in the sense that it is
 [`==`](@ref) to one of the values generated by iterating over the collection.
+Can equivalently be used with infix syntax:
+
+    item in collection
+    item ∈ collection
+
 Return a `Bool` value, except if `item` is [`missing`](@ref) or `collection`
 contains `missing` but not `item`, in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 matching the behavior of [`any`](@ref) and [`==`](@ref)).
-
 Some collections follow a slightly different definition. For example,
 [`Set`](@ref)s check whether the item [`isequal`](@ref) to one of the elements;
 [`Dict`](@ref)s look for `key=>value` pairs, and the `key` is compared using
@@ -1345,14 +1444,14 @@ or `k in keys(dict)`. For the collections mentioned above,
 the result is always a `Bool`.
 
 When broadcasting with `in.(items, collection)` or `items .∈ collection`, both
-`item` and `collection` are broadcasted over, which is often not what is intended.
+`items` and `collection` are broadcasted over, which is often not what is intended.
 For example, if both arguments are vectors (and the dimensions match), the result is
 a vector indicating whether each value in collection `items` is `in` the value at the
 corresponding position in `collection`. To get a vector indicating whether each value
 in `items` is in `collection`, wrap `collection` in a tuple or a `Ref` like this:
 `in.(items, Ref(collection))` or `items .∈ Ref(collection)`.
 
-See also: [`∉`](@ref).
+See also: [`∉`](@ref), [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
 
 # Examples
 ```jldoctest
@@ -1390,8 +1489,6 @@ julia> [1, 2] .∈ ([2, 3],)
  0
  1
 ```
-
-See also: [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
 """
 in
 
@@ -1401,7 +1498,7 @@ in
 
 Negation of `∈` and `∋`, i.e. checks that `item` is not in `collection`.
 
-When broadcasting with `items .∉ collection`, both `item` and `collection` are
+When broadcasting with `items .∉ collection`, both `items` and `collection` are
 broadcasted over, which is often not what is intended. For example, if both arguments
 are vectors (and the dimensions match), the result is a vector indicating whether
 each value in collection `items` is not in the value at the corresponding position
diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl
new file mode 100644
index 0000000000000..6b1d146b6172b
--- /dev/null
+++ b/base/optimized_generics.jl
@@ -0,0 +1,84 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module OptimizedGenerics
+
+# This file defines interfaces that are recognized and optimized by the compiler
+# They are intended to be used by data structure implementations that wish to
+# opt into some level of compiler optimizations. These interfaces are
+# EXPERIMENTAL and currently intended for use by Base only. They are subject
+# to change or removal without notice. It is undefined behavior to add methods
+# to these generics that do not conform to the specified interface.
+#
+# The intended way to use these generics is that data structures will provide
+# appropriate implementations for a generic. In the absence of compiler
+# optimizations, these behave like regular methods. However, the compiler is
+# semantically allowed to perform certain structural optimizations on
+# appropriate combinations of these intrinsics without proving correctness.
+
+# Compiler-recognized generics for immutable key-value stores (dicts, etc.)
+"""
+    module KeyValue
+
+Implements a key-value like interface where the compiler has liberty to perform
+the following transformations. The core optimization semantically allowed for
+the compiler is:
+
+    get(set(x, key, val), key) -> (val,)
+
+where the compiler will recursively look through `x`. Keys are compared by
+egality.
+
+Implementations must observe the following constraints:
+
+1. It is undefined behavior for `get` not to return the exact (by egality) val
+   stored for a given `key`.
+"""
+module KeyValue
+    """
+        set(collection, [key [, val]])
+        set(T, collection, key, val)
+
+    Set the `key` in `collection` to `val`. If `val` is omitted, deletes the
+    value from the collection. If `key` is omitted as well, deletes all elements
+    of the collection.
+    """
+    function set end
+
+    """
+        get(collection, key)
+
+    Retrieve the value corresponding to `key` in `collection` as a single
+    element tuple or `nothing` if no value corresponding to the key was found.
+    `key`s are compared by egal.
+    """
+    function get end
+end
+
+# Compiler-recognized intrinsics for compiler plugins
+"""
+    module CompilerPlugins
+
+Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees
+a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the
+current inference stack in `parent_frame` (but otherwise supplying the arguments
+to `typeinf`). `typeinf_edge` will return the `CodeInstance` that `typeinf` would
+have returned at runtime. The optimizer may perform a non-IPO replacement of
+the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe
+fields of the `CodeInstance` may be propagated in IPO mode.
+"""
+module CompilerPlugins
+    """
+        typeinf(owner, mi, source_mode)::CodeInstance
+
+    Return a `CodeInstance` for the given `mi` whose valid results include at
+    the least current tls world and satisfies the requirements of `source_mode`.
+    """
+    function typeinf end
+
+    """
+        typeinf_edge(owner, mi, parent_frame, world, abi_mode)::CodeInstance
+    """
+    function typeinf_edge end
+end
+
+end
diff --git a/base/options.jl b/base/options.jl
index a94936391fa8d..7e7808bd5c047 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -34,10 +34,12 @@ struct JLOptions
     can_inline::Int8
     polly::Int8
     trace_compile::Ptr{UInt8}
+    trace_dispatch::Ptr{UInt8}
     fast_math::Int8
     worker::Int8
     cookie::Ptr{UInt8}
     handle_signals::Int8
+    use_experimental_features::Int8
     use_sysimage_native_code::Int8
     use_compiled_modules::Int8
     use_pkgimages::Int8
@@ -57,6 +59,9 @@ struct JLOptions
     strip_ir::Int8
     permalloc_pkgimg::Int8
     heap_size_hint::UInt64
+    trace_compile_timing::Int8
+    trim::Int8
+    task_metrics::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
@@ -67,6 +72,18 @@ end
 
 JLOptions() = unsafe_load(cglobal(:jl_options, JLOptions))
 
+function colored_text(opts::JLOptions)
+    return if opts.color != 0
+        opts.color == 1
+    elseif !isempty(get(ENV, "FORCE_COLOR", ""))
+        true
+    elseif !isempty(get(ENV, "NO_COLOR", ""))
+        false
+    else
+        nothing
+    end
+end
+
 function show(io::IO, opt::JLOptions)
     print(io, "JLOptions(")
     fields = fieldnames(JLOptions)
diff --git a/base/ordering.jl b/base/ordering.jl
index d0c9cb99f9c72..585824bbeadfe 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -21,7 +21,8 @@ export # not exported by Base
 """
     Base.Order.Ordering
 
-Abstract type which represents a total order on some set of elements.
+Abstract type which represents a strict weak order on some set of elements. See
+[`sort!`](@ref) for more.
 
 Use [`Base.Order.lt`](@ref) to compare two elements according to the ordering.
 """
@@ -87,8 +88,8 @@ By(by) = By(by, Forward)
 """
     Lt(lt)
 
-`Ordering` which calls `lt(a, b)` to compare elements. `lt` should
-obey the same rules as implementations of [`isless`](@ref).
+`Ordering` that calls `lt(a, b)` to compare elements. `lt` must
+obey the same rules as the `lt` parameter of [`sort!`](@ref).
 """
 struct Lt{T} <: Ordering
     lt::T
@@ -110,7 +111,7 @@ ReverseOrdering(by::By) = By(by.by, ReverseOrdering(by.order))
 ReverseOrdering(perm::Perm) = Perm(ReverseOrdering(perm.order), perm.data)
 
 """
-    lt(o::Ordering, a, b)
+    lt(o::Ordering, a, b) -> Bool
 
 Test whether `a` is less than `b` according to the ordering `o`.
 """
@@ -125,18 +126,15 @@ lt(o::Lt,                    a, b) = o.lt(a,b)
     (lt(p.order, da, db)::Bool) | (!(lt(p.order, db, da)::Bool) & (a < b))
 end
 
-_ord(lt::typeof(isless), by::typeof(identity), order::Ordering) = order
-_ord(lt::typeof(isless), by,                   order::Ordering) = By(by, order)
-
-function _ord(lt, by, order::Ordering)
-    if order === Forward
-        return Lt((x, y) -> lt(by(x), by(y)))
-    elseif order === Reverse
-        return Lt((x, y) -> lt(by(y), by(x)))
-    else
-        error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
-    end
-end
+
+_ord(lt::typeof(isless), by, order::Ordering)                         = _by(by, order)
+_ord(lt::typeof(isless), by, order::ForwardOrdering)                  = _by(by, order)  # disambiguation
+_ord(lt::typeof(isless), by, order::ReverseOrdering{ForwardOrdering}) = _by(by, order)  # disambiguation
+_ord(lt,                 by, order::ForwardOrdering)                  = _by(by, Lt(lt))
+_ord(lt,                 by, order::ReverseOrdering{ForwardOrdering}) = reverse(_by(by, Lt(lt)))
+_ord(lt,                 by, order::Ordering) = error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
+_by(by, order::Ordering) = By(by, order)
+_by(::typeof(identity), order::Ordering) = order
 
 """
     ord(lt, by, rev::Union{Bool, Nothing}, order::Ordering=Forward)
@@ -146,8 +144,8 @@ Construct an [`Ordering`](@ref) object from the same arguments used by
 Elements are first transformed by the function `by` (which may be
 [`identity`](@ref)) and are then compared according to either the function `lt`
 or an existing ordering `order`. `lt` should be [`isless`](@ref) or a function
-which obeys similar rules. Finally, the resulting order is reversed if
-`rev=true`.
+that obeys the same rules as the `lt` parameter of [`sort!`](@ref). Finally,
+the resulting order is reversed if `rev=true`.
 
 Passing an `lt` other than `isless` along with an `order` other than
 [`Base.Order.Forward`](@ref) or [`Base.Order.Reverse`](@ref) is not permitted,
diff --git a/base/osutils.jl b/base/osutils.jl
index 95d0562540e5a..5daf58f5b8f4f 100644
--- a/base/osutils.jl
+++ b/base/osutils.jl
@@ -3,13 +3,23 @@
 """
     @static
 
-Partially evaluate an expression at parse time.
+Partially evaluate an expression at macro expansion time.
 
-For example, `@static Sys.iswindows() ? foo : bar` will evaluate `Sys.iswindows()` and insert
-either `foo` or `bar` into the expression.
-This is useful in cases where a construct would be invalid on other platforms,
-such as a `ccall` to a non-existent function.
-`@static if Sys.isapple() foo end` and `@static foo <&&,||> bar` are also valid syntax.
+This is useful in cases where a construct would be invalid in some cases, such as a `ccall`
+to an os-dependent function, or macros defined in packages that are not imported.
+
+`@static` requires a conditional. The conditional can be in an `if` statement, a ternary
+operator, or `&&`\`||`. The conditional is evaluated by recursively expanding macros,
+lowering and executing the resulting expressions. Then, the matching branch (if any) is
+returned. All the other branches of the conditional are deleted before they are
+macro-expanded (and lowered or executed).
+
+# Example
+
+Suppose we want to parse an expression `expr` that is valid only on macOS. We could solve
+this problem using `@static` with `@static if Sys.isapple() expr end`. In case we had
+`expr_apple` for macOS and `expr_others` for the other operating systems, the solution with
+`@static` would be `@static Sys.isapple() ? expr_apple : expr_others`.
 """
 macro static(ex)
     if isa(ex, Expr)
diff --git a/base/parse.jl b/base/parse.jl
index f6a93e56369b7..2530e5a46146a 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -251,8 +251,9 @@ function tryparse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = n
 end
 
 function parse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = nothing) where {T<:Integer}
-    convert(T, tryparse_internal(T, s, firstindex(s), lastindex(s),
-                                 base===nothing ? 0 : check_valid_base(base), true))
+    v = tryparse_internal(T, s, firstindex(s), lastindex(s), base===nothing ? 0 : check_valid_base(base), true)
+    v === nothing && error("should not happoen")
+    convert(T, v)
 end
 tryparse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
@@ -321,14 +322,14 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     if i₊ == i # leading ± sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
-    if i₊ != 0 && s[i₊-1] in ('e','E') # exponent sign
+    if i₊ != 0 && s[prevind(s, i₊)] in ('e','E') # exponent sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
 
     # find trailing im/i/j
     iᵢ = something(findprev(in(('m','i','j')), s, e), 0)
     if iᵢ > 0 && s[iᵢ] == 'm' # im
-        iᵢ -= 1
+        iᵢ = prevind(s, iᵢ)
         if s[iᵢ] != 'i'
             raise && throw(ArgumentError("expected trailing \"im\", found only \"m\""))
             return nothing
@@ -337,7 +338,7 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
 
     if i₊ == 0 # purely real or imaginary value
         if iᵢ > i && !(iᵢ == i+1 && s[i] in ('+','-')) # purely imaginary (not "±inf")
-            x = tryparse_internal(T, s, i, iᵢ-1, raise)
+            x = tryparse_internal(T, s, i, prevind(s, iᵢ), raise)
             x === nothing && return nothing
             return Complex{T}(zero(x),x)
         else # purely real
@@ -353,11 +354,11 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     end
 
     # parse real part
-    re = tryparse_internal(T, s, i, i₊-1, raise)
+    re = tryparse_internal(T, s, i, prevind(s, i₊), raise)
     re === nothing && return nothing
 
     # parse imaginary part
-    im = tryparse_internal(T, s, i₊+1, iᵢ-1, raise)
+    im = tryparse_internal(T, s, i₊+1, prevind(s, iᵢ), raise)
     im === nothing && return nothing
 
     return Complex{T}(re, s[i₊]=='-' ? -im : im)
@@ -385,7 +386,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, raise::Bool; kwargs...)
     return result
 end
 @noinline _parse_failure(T, s::AbstractString, startpos = firstindex(s), endpos = lastindex(s)) =
-    throw(ArgumentError("cannot parse $(repr(s[startpos:endpos])) as $T"))
+    throw(ArgumentError(LazyString("cannot parse ", repr(s[startpos:endpos]), " as ", T)))
 
 tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, raise::Bool) where T<:Integer =
     tryparse_internal(T, s, startpos, endpos, 10, raise)
diff --git a/base/partr.jl b/base/partr.jl
index a02272ceab202..6053a584af5ba 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -18,17 +18,64 @@ end
 const heap_d = UInt32(8)
 const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
 const heaps_lock = [SpinLock(), SpinLock()]
-const cong_unbias = [typemax(UInt32), typemax(UInt32)]
 
 
-cong(max::UInt32, unbias::UInt32) =
-    ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)
+"""
+    cong(max::UInt32)
 
-function unbias_cong(max::UInt32)
-    return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
+Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
+"""
+cong(max::UInt32) = iszero(max) ? UInt32(0) : rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check
+
+get_ptls_rng() = ccall(:jl_get_ptls_rng, UInt64, ())
+
+set_ptls_rng(seed::UInt64) = ccall(:jl_set_ptls_rng, Cvoid, (UInt64,), seed)
+
+"""
+    rand_ptls(max::UInt32)
+
+Return a random UInt32 in the range `0:max-1` using the thread-local RNG
+state. Max must be greater than 0.
+"""
+Base.@assume_effects :removable :inaccessiblememonly :notaskstate function rand_ptls(max::UInt32)
+    rngseed = get_ptls_rng()
+    val, seed = rand_uniform_max_int32(max, rngseed)
+    set_ptls_rng(seed)
+    return val % UInt32
+end
+
+# This implementation is based on OpenSSLs implementation of rand_uniform
+# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
+# Comments are vendored from their implementation as well.
+# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.
+
+# Essentially it boils down to incrementally generating a fixed point
+# number on the interval [0, 1) and multiplying this number by the upper
+# range limit.  Once it is certain what the fractional part contributes to
+# the integral part of the product, the algorithm has produced a definitive
+# result.
+"""
+    rand_uniform_max_int32(max::UInt32, seed::UInt64)
+
+Return a random UInt32 in the range `0:max-1` using the given seed.
+Max must be greater than 0.
+"""
+Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
+    if max == UInt32(1)
+        return UInt32(0), seed
+    end
+    # We are generating a fixed point number on the interval [0, 1).
+    # Multiplying this by the range gives us a number on [0, upper).
+    # The high word of the multiplication result represents the integral part
+    # This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
+    seed = UInt64(69069) * seed + UInt64(362437)
+    prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
+    i = prod >> 32 % UInt32 # integral part
+    return i % UInt32, seed
 end
 
 
+
 function multiq_sift_up(heap::taskheap, idx::Int32)
     while idx > Int32(1)
         parent = (idx - Int32(2)) ÷ heap_d + Int32(1)
@@ -86,7 +133,6 @@ function multiq_size(tpid::Int8)
             newheaps[i] = taskheap()
         end
         heaps[tp] = newheaps
-        cong_unbias[tp] = unbias_cong(heap_p)
     end
 
     return heap_p
@@ -95,15 +141,16 @@ end
 
 function multiq_insert(task::Task, priority::UInt16)
     tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), task)
+    @assert tpid > -1
     heap_p = multiq_size(tpid)
     tp = tpid + 1
 
     task.priority = priority
 
-    rn = cong(heap_p, cong_unbias[tp])
+    rn = cong(heap_p)
     tpheaps = heaps[tp]
     while !trylock(tpheaps[rn].lock)
-        rn = cong(heap_p, cong_unbias[tp])
+        rn = cong(heap_p)
     end
 
     heap = tpheaps[rn]
@@ -131,6 +178,9 @@ function multiq_deletemin()
 
     tid = Threads.threadid()
     tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return nothing
+    end
     tpheaps = heaps[tp]
 
     @label retry
@@ -140,8 +190,8 @@ function multiq_deletemin()
         if i == heap_p
             return nothing
         end
-        rn1 = cong(heap_p, cong_unbias[tp])
-        rn2 = cong(heap_p, cong_unbias[tp])
+        rn1 = cong(heap_p)
+        rn2 = cong(heap_p)
         prio1 = tpheaps[rn1].priority
         prio2 = tpheaps[rn2].priority
         if prio1 > prio2
@@ -182,6 +232,9 @@ end
 function multiq_check_empty()
     tid = Threads.threadid()
     tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return true
+    end
     for i = UInt32(1):length(heaps[tp])
         if heaps[tp][i].ntasks != 0
             return false
diff --git a/base/path.jl b/base/path.jl
index c439a2800acce..69c8d22c63c54 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -34,8 +34,25 @@ elseif Sys.iswindows()
     const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"sa
     const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"sa
 
+    const splitdrive_re = let
+        # Slash in either direction.
+        S = raw"[\\/]"
+        # Not a slash in either direction.
+        N = raw"[^\\/]"
+        # Drive letter, e.g. `C:`
+        drive = "$(N)+:"
+        # UNC path, e.g. `\\server\share`
+        unc = "$(S)$(S)$(N)+$(S)$(N)+"
+        # Long drive letter, e.g. `\\?\C:`
+        long_drive = "$(S)$(S)\\?$(S)$(drive)"
+        # Long UNC path, e.g. `\\?\UNC\server\share`
+        long_unc = "$(S)$(S)\\?$(S)UNC$(S)$(N)+$(S)$(N)+"
+        # Need to match the long patterns first so they get priority.
+        Regex("^($long_unc|$long_drive|$unc|$drive|)(.*)\$", "sa")
+    end
+
     function splitdrive(path::String)
-        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"sa, path)::AbstractMatch
+        m = match(splitdrive_re, path)::AbstractMatch
         String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
@@ -60,6 +77,8 @@ Return the current user's home directory.
     `homedir` determines the home directory via `libuv`'s `uv_os_homedir`. For details
     (for example on how to specify the home directory via environment variables), see the
     [`uv_os_homedir` documentation](http://docs.libuv.org/en/v1.x/misc.html#c.uv_os_homedir).
+
+See also [`Sys.username`](@ref).
 """
 function homedir()
     buf = Base.StringVector(AVG_PATH - 1) # space for null-terminator implied by StringVector
@@ -416,11 +435,11 @@ normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
 Convert a path to an absolute path by adding the current directory if necessary.
 Also normalizes the path as in [`normpath`](@ref).
 
-# Example
+# Examples
 
 If you are in a directory called `JuliaExample` and the data you are using is two levels up relative to the `JuliaExample` directory, you could write:
 
-abspath("../../data")
+    abspath("../../data")
 
 Which gives a path like `"/home/JuliaUser/data/"`.
 
@@ -594,3 +613,56 @@ relpath(path::AbstractString, startpath::AbstractString) =
 for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath)
     @eval $f(path::AbstractString) = $f(String(path))
 end
+
+# RFC3986 Section 2.1
+percent_escape(s) = '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
+# RFC3986 Section 2.3
+encode_uri_component(s) = replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
+
+"""
+    uripath(path::AbstractString)
+
+Encode `path` as a URI as per [RFC8089: The "file" URI
+Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource
+Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and
+the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/).
+
+## Examples
+
+```julia-repl
+julia> uripath("/home/user/example file.jl") # On a unix machine
+"file://<hostname>/home/user/example%20file.jl"
+
+juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine
+"file:///C:/Users/user/example%20file.jl"
+```
+"""
+function uripath end
+
+@static if Sys.iswindows()
+    function uripath(path::String)
+        path = abspath(path)
+        if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3
+            unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/')
+            string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2
+        else
+            drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component
+            unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/')
+            encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2
+            string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2
+        end
+    end
+else
+    function uripath(path::String)
+        localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/')
+        host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil
+            distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
+            "wsl\$/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+        else
+            gethostname() # Freedesktop File URI Spec, Hostnames section
+        end
+        string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2
+    end
+end
+
+uripath(path::AbstractString) = uripath(String(path))
diff --git a/base/pcre.jl b/base/pcre.jl
index 7597c1217ca9e..e4567fe03e8f8 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -7,7 +7,7 @@ module PCRE
 import ..RefValue
 
 # include($BUILDROOT/base/pcre_h.jl)
-include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
+include(string(Base.BUILDROOT, "pcre_h.jl"))
 
 const PCRE_LIB = "libpcre2-8"
 
@@ -24,19 +24,19 @@ function create_match_context()
     return ctx
 end
 
-THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
+global THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
 
-PCRE_COMPILE_LOCK = nothing
+global PCRE_COMPILE_LOCK::Threads.SpinLock
 
 _tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
-_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
+_mth() = Threads.maxthreadid()
 
 function get_local_match_context()
     tid = _tid()
     ctxs = THREAD_MATCH_CONTEXTS
     if length(ctxs) < tid
         # slow path to allocate it
-        l = PCRE_COMPILE_LOCK::Threads.SpinLock
+        l = PCRE_COMPILE_LOCK
         lock(l)
         try
             ctxs = THREAD_MATCH_CONTEXTS
@@ -196,10 +196,12 @@ function err_message(errno::Integer)
     return GC.@preserve buffer unsafe_string(pointer(buffer))
 end
 
-function exec(re, subject, offset, options, match_data)
-    if !(subject isa Union{String,SubString{String}})
-        subject = String(subject)
-    end
+exec(re, subject::Union{String,SubString{String}}, offset, options, match_data) =
+    _exec(re, subject, offset, options, match_data)
+exec(re, subject, offset, options, match_data) =
+    _exec(re, String(subject), offset, options, match_data)
+
+function _exec(re, subject, offset, options, match_data)
     rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
                (Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
                re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index 41c3636b40216..cf9748168aac2 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -12,7 +12,7 @@ struct PermutedDimsArray{T,N,perm,iperm,AA<:AbstractArray} <: AbstractArray{T,N}
     function PermutedDimsArray{T,N,perm,iperm,AA}(data::AA) where {T,N,perm,iperm,AA<:AbstractArray}
         (isa(perm, NTuple{N,Int}) && isa(iperm, NTuple{N,Int})) || error("perm and iperm must both be NTuple{$N,Int}")
         isperm(perm) || throw(ArgumentError(string(perm, " is not a valid permutation of dimensions 1:", N)))
-        all(map(d->iperm[perm[d]]==d, 1:N)) || throw(ArgumentError(string(perm, " and ", iperm, " must be inverses")))
+        all(d->iperm[perm[d]]==d, 1:N) || throw(ArgumentError(string(perm, " and ", iperm, " must be inverses")))
         new(data)
     end
 end
@@ -39,7 +39,7 @@ julia> B[3,1,2] == A[1,2,3]
 true
 ```
 """
-function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
+Base.@constprop :aggressive function PermutedDimsArray(data::AbstractArray{T,N}, perm) where {T,N}
     length(perm) == N || throw(ArgumentError(string(perm, " is not a valid permutation of dimensions 1:", N)))
     iperm = invperm(perm)
     PermutedDimsArray{T,N,(perm...,),(iperm...,),typeof(data)}(data)
@@ -49,10 +49,8 @@ Base.parent(A::PermutedDimsArray) = A.parent
 Base.size(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(size(parent(A)), perm)
 Base.axes(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(axes(parent(A)), perm)
 Base.has_offset_axes(A::PermutedDimsArray) = Base.has_offset_axes(A.parent)
-
 Base.similar(A::PermutedDimsArray, T::Type, dims::Base.Dims) = similar(parent(A), T, dims)
-
-Base.unsafe_convert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.cconvert(Ptr{T}, parent(A))
 
 # It's OK to return a pointer to the first element, and indeed quite
 # useful for wrapping C routines that require a different storage
@@ -89,13 +87,68 @@ end
 
 """
     permutedims(A::AbstractArray, perm)
+    permutedims(A::AbstractMatrix)
 
-Permute the dimensions of array `A`. `perm` is a vector or a tuple of length `ndims(A)`
+Permute the dimensions (axes) of array `A`. `perm` is a tuple or vector of `ndims(A)` integers
 specifying the permutation.
 
+If `A` is a 2d array ([`AbstractMatrix`](@ref)), then
+`perm` defaults to `(2,1)`, swapping the two axes of `A` (the rows and columns
+of the matrix).   This differs from [`transpose`](@ref) in that the
+operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` would throw an error) and/or 2d arrays that do not represent
+linear operators.
+
+For 1d arrays, see [`permutedims(v::AbstractVector)`](@ref), which returns a 1-row “matrix”.
+
 See also [`permutedims!`](@ref), [`PermutedDimsArray`](@ref), [`transpose`](@ref), [`invperm`](@ref).
 
 # Examples
+
+## 2d arrays:
+Unlike `transpose`, `permutedims` can be used to swap rows and columns of 2d arrays of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> A = ["a" "b" "c"
+            "d" "e" "f"]
+2×3 Matrix{String}:
+ "a"  "b"  "c"
+ "d"  "e"  "f"
+
+julia> permutedims(A)
+3×2 Matrix{String}:
+ "a"  "d"
+ "b"  "e"
+ "c"  "f"
+```
+And `permutedims` produces results that differ from `transpose`
+for matrices whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
+julia> a = [1 2; 3 4];
+
+julia> b = [5 6; 7 8];
+
+julia> c = [9 10; 11 12];
+
+julia> d = [13 14; 15 16];
+
+julia> X = [[a] [b]; [c] [d]]
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]     [5 6; 7 8]
+ [9 10; 11 12]  [13 14; 15 16]
+
+julia> permutedims(X)
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]  [9 10; 11 12]
+ [5 6; 7 8]  [13 14; 15 16]
+
+julia> transpose(X)
+2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
+ [1 3; 2 4]  [9 11; 10 12]
+ [5 7; 6 8]  [13 15; 14 16]
+```
+
+## Multi-dimensional arrays
 ```jldoctest
 julia> A = reshape(Vector(1:8), (2,2,2))
 2×2×2 Array{Int64, 3}:
@@ -145,54 +198,62 @@ function permutedims(A::AbstractArray, perm)
     permutedims!(dest, A, perm)
 end
 
-"""
-    permutedims(m::AbstractMatrix)
-
-Permute the dimensions of the matrix `m`, by flipping the elements across the diagonal of
-the matrix. Differs from `LinearAlgebra`'s [`transpose`](@ref) in that the
-operation is not recursive.
-
-# Examples
-```jldoctest; setup = :(using LinearAlgebra)
-julia> a = [1 2; 3 4];
-
-julia> b = [5 6; 7 8];
-
-julia> c = [9 10; 11 12];
-
-julia> d = [13 14; 15 16];
-
-julia> X = [[a] [b]; [c] [d]]
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]     [5 6; 7 8]
- [9 10; 11 12]  [13 14; 15 16]
-
-julia> permutedims(X)
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]  [9 10; 11 12]
- [5 6; 7 8]  [13 14; 15 16]
-
-julia> transpose(X)
-2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
- [1 3; 2 4]  [9 11; 10 12]
- [5 7; 6 8]  [13 15; 14 16]
-```
-"""
 permutedims(A::AbstractMatrix) = permutedims(A, (2,1))
 
 """
     permutedims(v::AbstractVector)
 
 Reshape vector `v` into a `1 × length(v)` row matrix.
-Differs from `LinearAlgebra`'s [`transpose`](@ref) in that
-the operation is not recursive.
+Differs from [`transpose`](@ref) in that
+the operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` might throw an error).
 
 # Examples
+Unlike `transpose`, `permutedims` can be used on vectors of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> permutedims(["a", "b", "c"])
+1×3 Matrix{String}:
+ "a"  "b"  "c"
+```
+For vectors of numbers, `permutedims(v)` works much like `transpose(v)`
+except that the return type differs (it uses [`reshape`](@ref)
+rather than a `LinearAlgebra.Transpose` view, though both
+share memory with the original array `v`):
 ```jldoctest; setup = :(using LinearAlgebra)
-julia> permutedims([1, 2, 3, 4])
+julia> v = [1, 2, 3, 4]
+4-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+
+julia> p = permutedims(v)
 1×4 Matrix{Int64}:
  1  2  3  4
 
+julia> r = transpose(v)
+1×4 transpose(::Vector{Int64}) with eltype Int64:
+ 1  2  3  4
+
+julia> p == r
+true
+
+julia> typeof(r)
+Transpose{Int64, Vector{Int64}}
+
+julia> p[1] = 5; r[2] = 6; # mutating p or r also changes v
+
+julia> v # shares memory with both p and r
+4-element Vector{Int64}:
+ 5
+ 6
+ 3
+ 4
+```
+However, `permutedims` produces results that differ from `transpose`
+for vectors whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
 julia> V = [[[1 2; 3 4]]; [[5 6; 7 8]]]
 2-element Vector{Matrix{Int64}}:
  [1 2; 3 4]
@@ -221,7 +282,7 @@ regions.
 See also [`permutedims`](@ref).
 """
 function permutedims!(dest, src::AbstractArray, perm)
-    Base.checkdims_perm(dest, src, perm)
+    Base.checkdims_perm(axes(dest), axes(src), perm)
     P = PermutedDimsArray(dest, invperm(perm))
     _copy!(P, src)
     return dest
diff --git a/base/pkgid.jl b/base/pkgid.jl
index 20d9de559b334..8c776d79a69cb 100644
--- a/base/pkgid.jl
+++ b/base/pkgid.jl
@@ -23,7 +23,7 @@ function hash(pkg::PkgId, h::UInt)
     return h
 end
 
-show(io::IO, pkg::PkgId) =
+show(io::IO,  ::MIME"text/plain", pkg::PkgId) =
     print(io, pkg.name, " [", pkg.uuid === nothing ? "top-level" : pkg.uuid, "]")
 
 function binpack(pkg::PkgId)
@@ -37,7 +37,8 @@ end
 
 function binunpack(s::String)
     io = IOBuffer(s)
-    @assert read(io, UInt8) === 0x00
+    z = read(io, UInt8)
+    @assert z === 0x00
     uuid = read(io, UInt128)
     name = read(io, String)
     return PkgId(UUID(uuid), name)
diff --git a/base/pointer.jl b/base/pointer.jl
index a47f1e38edb9b..de2f413d8f881 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -54,17 +54,40 @@ See also [`cconvert`](@ref)
 """
 function unsafe_convert end
 
+# convert strings to String etc. to pass as pointers
+cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s)
+cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s)
 unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8}, (Any,), x)
 unsafe_convert(::Type{Ptr{Int8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{Int8}, (Any,), x)
 unsafe_convert(::Type{Ptr{UInt8}}, s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
 unsafe_convert(::Type{Ptr{Int8}}, s::String) = ccall(:jl_string_ptr, Ptr{Int8}, (Any,), s)
-# convert strings to String etc. to pass as pointers
-cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s)
-cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s)
 
-unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = ccall(:jl_array_ptr, Ptr{T}, (Any,), a)
+cconvert(::Type{<:Ptr}, a::Array) = getfield(a, :ref)
 unsafe_convert(::Type{Ptr{S}}, a::AbstractArray{T}) where {S,T} = convert(Ptr{S}, unsafe_convert(Ptr{T}, a))
+unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = unsafe_convert(Ptr{T}, a.ref)
 unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))")
+# TODO: add this deprecation to give a better error:
+# cconvert(::Type{<:Ptr}, a::AbstractArray) = error("conversion to pointer not defined for $(typeof(a))")
+# unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("missing call to cconvert for call to unsafe_convert for AbstractArray")
+
+cconvert(::Type{<:Ptr}, a::GenericMemory) = a
+unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemory{T}) where {T} = getfield(a, :ptr)
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemory) where {T} = convert(Ptr{T}, getfield(a, :ptr))
+
+function unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemoryRef{<:Any,T,Core.CPU}) where {T}
+    mem = getfield(a, :mem)
+    offset = getfield(a, :ptr_or_offset)
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isboxed = 1; isunion = 2
+    if arrayelem == isunion || elsz == 0
+        offset = UInt(offset) * elsz
+        offset += unsafe_convert(Ptr{Cvoid}, mem)
+    end
+    return offset
+end
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemoryRef) where {T} = convert(Ptr{T}, unsafe_convert(Ptr{Cvoid}, a))
 
 # unsafe pointer to array conversions
 """
@@ -92,10 +115,21 @@ function unsafe_wrap(::Union{Type{Array},Type{Array{T}},Type{Array{T,1}}},
     ccall(:jl_ptr_to_array_1d, Array{T,1},
           (Any, Ptr{Cvoid}, Csize_t, Cint), Array{T,1}, p, d, own)
 end
-unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}},
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, dims::Tuple{Int}; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, dims[1], own)
+end
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, d::Integer; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, d, own)
+end
+unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}},Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}} where {kind},
             p::Ptr{T}, dims::NTuple{N,<:Integer}; own::Bool = false) where {T,N} =
     unsafe_wrap(Atype, p, convert(Tuple{Vararg{Int}}, dims), own = own)
 
+
 """
     unsafe_load(p::Ptr{T}, i::Integer=1)
     unsafe_load(p::Ptr{T}, order::Symbol)
@@ -135,7 +169,7 @@ The `unsafe` prefix on this function indicates that no validation is performed o
 pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
 that referenced memory is not freed or garbage collected while invoking this function.
 Incorrect usage may segfault your program. Unlike C, storing memory region allocated as
-different type may be valid provided that that the types are compatible.
+different type may be valid provided that the types are compatible.
 
 !!! compat "Julia 1.10"
      The `order` argument is available as of Julia 1.10.
@@ -279,8 +313,8 @@ isless(x::Ptr{T}, y::Ptr{T}) where {T} = x < y
 <(x::Ptr,  y::Ptr) = UInt(x) < UInt(y)
 -(x::Ptr,  y::Ptr) = UInt(x) - UInt(y)
 
-+(x::Ptr, y::Integer) = oftype(x, add_ptr(UInt(x), (y % UInt) % UInt))
--(x::Ptr, y::Integer) = oftype(x, sub_ptr(UInt(x), (y % UInt) % UInt))
++(x::Ptr, y::Integer) = add_ptr(x, (y % UInt) % UInt)
+-(x::Ptr, y::Integer) = sub_ptr(x, (y % UInt) % UInt)
 +(x::Integer, y::Ptr) = y + x
 
 unsigned(x::Ptr) = UInt(x)
diff --git a/base/precompilation.jl b/base/precompilation.jl
new file mode 100644
index 0000000000000..820cf260df71f
--- /dev/null
+++ b/base/precompilation.jl
@@ -0,0 +1,1182 @@
+module Precompilation
+
+using Base: PkgId, UUID, SHA1, parsed_toml, project_file_name_uuid, project_names,
+            project_file_manifest_path, get_deps, preferences_names, isaccessibledir, isfile_casesensitive,
+            base_project, isdefined
+
+# This is currently only used for pkgprecompile but the plan is to use this in code loading in the future
+# see the `kc/codeloading2.0` branch
+struct ExplicitEnv
+    path::String
+    project_deps::Dict{String, UUID} # [deps] in Project.toml
+    project_weakdeps::Dict{String, UUID} # [weakdeps] in Project.toml
+    project_extras::Dict{String, UUID} # [extras] in Project.toml
+    project_extensions::Dict{String, Vector{UUID}} # [exts] in Project.toml
+    deps::Dict{UUID, Vector{UUID}} # all dependencies in Manifest.toml
+    weakdeps::Dict{UUID, Vector{UUID}} # all weak dependencies in Manifest.toml
+    extensions::Dict{UUID, Dict{String, Vector{UUID}}}
+    # Lookup name for a UUID
+    names::Dict{UUID, String}
+    lookup_strategy::Dict{UUID, Union{
+                                      SHA1,     # `git-tree-sha1` entry
+                                      String,   # `path` entry
+                                      Nothing,  # stdlib (no `path` nor `git-tree-sha1`)
+                                      Missing}} # not present in the manifest
+    #prefs::Union{Nothing, Dict{String, Any}}
+    #local_prefs::Union{Nothing, Dict{String, Any}}
+end
+
+function ExplicitEnv(envpath::String=Base.active_project())
+    if !isfile(envpath)
+        error("expected a project file at $(repr(envpath))")
+    end
+    envpath = abspath(envpath)
+    project_d = parsed_toml(envpath)
+
+    # TODO: Perhaps verify that two packages with the same UUID do not have different names?
+    names = Dict{UUID, String}()
+    project_uuid_to_name = Dict{String, UUID}()
+
+    project_deps = Dict{String, UUID}()
+    project_weakdeps = Dict{String, UUID}()
+    project_extras = Dict{String, UUID}()
+
+    # Collect all direct dependencies of the project
+    for key in ["deps", "weakdeps", "extras"]
+        for (name, _uuid) in get(Dict{String, Any}, project_d, key)::Dict{String, Any}
+            v = key == "deps" ? project_deps :
+                key == "weakdeps" ? project_weakdeps :
+                key == "extras" ? project_extras :
+                error()
+            uuid = UUID(_uuid::String)
+            v[name] = uuid
+            names[UUID(uuid)] = name
+            project_uuid_to_name[name] = UUID(uuid)
+        end
+    end
+
+    # A package in both deps and weakdeps is in fact only a weakdep
+    for (name, _) in project_weakdeps
+        delete!(project_deps, name)
+    end
+
+    # This project might be a package, in that case, that is also a "dependency"
+    # of the project.
+    proj_name = get(project_d, "name", nothing)::Union{String, Nothing}
+    _proj_uuid = get(project_d, "uuid", nothing)::Union{String, Nothing}
+    proj_uuid = _proj_uuid === nothing ? nothing : UUID(_proj_uuid)
+
+    project_is_package = proj_name !== nothing && proj_uuid !== nothing
+    if project_is_package
+        # TODO: Error on missing uuid?
+        project_deps[proj_name] = UUID(proj_uuid)
+        names[UUID(proj_uuid)] = proj_name
+    end
+
+    project_extensions = Dict{String, Vector{UUID}}()
+    # Collect all extensions of the project
+    for (name, triggers) in get(Dict{String, Any}, project_d, "extensions")::Dict{String, Any}
+        if triggers isa String
+            triggers = [triggers]
+        else
+            triggers = triggers::Vector{String}
+        end
+        uuids = UUID[]
+        for trigger in triggers
+            uuid = get(project_uuid_to_name, trigger, nothing)
+            if uuid === nothing
+                error("Trigger $trigger for extension $name not found in project")
+            end
+            push!(uuids, uuid)
+        end
+        project_extensions[name] = uuids
+    end
+
+    manifest = project_file_manifest_path(envpath)
+    manifest_d = manifest === nothing ? Dict{String, Any}() : parsed_toml(manifest)
+
+    # Dependencies in a manifest can either be stored compressed (when name is unique among all packages)
+    # in which case it is a `Vector{String}` or expanded where it is a `name => uuid` mapping.
+    deps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    weakdeps = Dict{UUID, Union{Vector{String}, Vector{UUID}}}()
+    extensions = Dict{UUID, Dict{String, Vector{String}}}()
+    name_to_uuid = Dict{String, UUID}()
+    lookup_strategy = Dict{UUID, Union{SHA1, String, Nothing, Missing}}()
+
+    sizehint!(deps, length(manifest_d))
+    sizehint!(weakdeps, length(manifest_d))
+    sizehint!(extensions, length(manifest_d))
+    sizehint!(name_to_uuid, length(manifest_d))
+    sizehint!(lookup_strategy, length(manifest_d))
+
+    for (name, pkg_infos) in get_deps(manifest_d)
+        for pkg_info in pkg_infos::Vector{Any}
+            pkg_info = pkg_info::Dict{String, Any}
+            m_uuid = UUID(pkg_info["uuid"]::String)
+
+            # If we have multiple packages with the same name we will overwrite things here
+            # but that is fine since we will only use the information in here for packages
+            # with unique names
+            names[m_uuid] = name
+            name_to_uuid[name] = m_uuid
+
+            for key in ["deps", "weakdeps"]
+                deps_pkg = get(Vector{String}, pkg_info, key)::Union{Vector{String}, Dict{String, Any}}
+                d = key == "deps" ? deps :
+                    key == "weakdeps" ? weakdeps :
+                    error()
+
+                # Compressed format with unique names:
+                if deps_pkg isa Vector{String}
+                    d[m_uuid] = deps_pkg
+                # Expanded format:
+                else
+                    uuids = UUID[]
+                    for (name_dep, _dep_uuid) in deps_pkg
+                        dep_uuid = UUID(_dep_uuid::String)
+                        push!(uuids, dep_uuid)
+                        names[dep_uuid] = name_dep
+                    end
+                    d[m_uuid] = uuids
+                end
+            end
+
+            # Extensions
+            deps_pkg = get(Dict{String, Any}, pkg_info, "extensions")::Dict{String, Any}
+            for (ext, triggers) in deps_pkg
+                if triggers isa String
+                    triggers = [triggers]
+                else
+                    triggers = triggers::Vector{String}
+                end
+                deps_pkg[ext] = triggers
+            end
+            extensions[m_uuid] = deps_pkg
+
+            # Determine strategy to find package
+            lookup_strat = begin
+                if (path = get(pkg_info, "path", nothing)::Union{String, Nothing}) !== nothing
+                    path
+                elseif (git_tree_sha_str = get(pkg_info, "git-tree-sha1", nothing)::Union{String, Nothing}) !== nothing
+                    SHA1(git_tree_sha_str)
+                else
+                    nothing
+                end
+            end
+            lookup_strategy[m_uuid] = lookup_strat
+        end
+    end
+
+    # No matter if the deps were stored compressed or not in the manifest,
+    # we internally store them expanded
+    deps_expanded = Dict{UUID, Vector{UUID}}()
+    weakdeps_expanded = Dict{UUID, Vector{UUID}}()
+    extensions_expanded = Dict{UUID, Dict{String, Vector{UUID}}}()
+    sizehint!(deps_expanded, length(deps))
+    sizehint!(weakdeps_expanded, length(deps))
+    sizehint!(extensions_expanded, length(deps))
+
+    if proj_name !== nothing && proj_uuid !== nothing
+        deps_expanded[proj_uuid] = filter!(!=(proj_uuid), collect(values(project_deps)))
+        extensions_expanded[proj_uuid] = project_extensions
+        path = get(project_d, "path", nothing)::Union{String, Nothing}
+        entry_point = path !== nothing ? path : dirname(envpath)
+        lookup_strategy[proj_uuid] = entry_point
+    end
+
+    for key in ["deps", "weakdeps"]
+        d = key == "deps" ? deps :
+            key == "weakdeps" ? weakdeps :
+            error()
+        d_expanded = key == "deps" ? deps_expanded :
+                     key == "weakdeps" ? weakdeps_expanded :
+                     error()
+        for (pkg, deps) in d
+            # dependencies was already expanded so use it directly:
+            if deps isa Vector{UUID}
+                d_expanded[pkg] = deps
+                for dep in deps
+                    name_to_uuid[names[dep]] = dep
+                end
+            # find the (unique) UUID associated with the name
+            else
+                deps_pkg = UUID[]
+                sizehint!(deps_pkg, length(deps))
+                for dep in deps
+                    push!(deps_pkg, name_to_uuid[dep])
+                end
+                d_expanded[pkg] = deps_pkg
+            end
+        end
+    end
+
+    for (pkg, exts) in extensions
+        exts_expanded = Dict{String, Vector{UUID}}()
+        for (ext, triggers) in exts
+            triggers_expanded = UUID[]
+            sizehint!(triggers_expanded, length(triggers))
+            for trigger in triggers
+                push!(triggers_expanded, name_to_uuid[trigger])
+            end
+            exts_expanded[ext] = triggers_expanded
+        end
+        extensions_expanded[pkg] = exts_expanded
+    end
+
+    # Everything that does not yet have a lookup_strategy is missing from the manifest
+    for (_, uuid) in project_deps
+        get!(lookup_strategy, uuid, missing)
+    end
+
+    #=
+    # Preferences:
+    prefs = get(project_d, "preferences", nothing)
+
+    # `(Julia)LocalPreferences.toml`
+    project_dir = dirname(envpath)
+    local_prefs = nothing
+    for name in preferences_names
+        toml_path = joinpath(project_dir, name)
+        if isfile(toml_path)
+            local_prefs = parsed_toml(toml_path)
+            break
+        end
+    end
+    =#
+
+    return ExplicitEnv(envpath, project_deps, project_weakdeps, project_extras,
+                       project_extensions, deps_expanded, weakdeps_expanded, extensions_expanded,
+                       names, lookup_strategy, #=prefs, local_prefs=#)
+end
+
+## PROGRESS BAR
+
+# using Printf
+Base.@kwdef mutable struct MiniProgressBar
+    max::Int = 1.0
+    header::String = ""
+    color::Symbol = :nothing
+    width::Int = 40
+    current::Int = 0.0
+    prev::Int = 0.0
+    has_shown::Bool = false
+    time_shown::Float64 = 0.0
+    percentage::Bool = true
+    always_reprint::Bool = false
+    indent::Int = 4
+end
+
+const PROGRESS_BAR_TIME_GRANULARITY = Ref(1 / 30.0) # 30 fps
+const PROGRESS_BAR_PERCENTAGE_GRANULARITY = Ref(0.1)
+
+function start_progress(io::IO, _::MiniProgressBar)
+    ansi_disablecursor = "\e[?25l"
+    print(io, ansi_disablecursor)
+end
+
+function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagereturn=true)
+    if p.max == 0
+        perc = 0.0
+        prev_perc = 0.0
+    else
+        perc = p.current / p.max * 100
+        prev_perc = p.prev / p.max * 100
+    end
+    # Bail early if we are not updating the progress bar,
+    # Saves printing to the terminal
+    if !p.always_reprint && p.has_shown && !((perc - prev_perc) > PROGRESS_BAR_PERCENTAGE_GRANULARITY[])
+        return
+    end
+    t = time()
+    if !p.always_reprint && p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[]
+        return
+    end
+    p.time_shown = t
+    p.prev = p.current
+    p.has_shown = true
+
+    progress_text = if false # p.percentage
+        # @sprintf "%2.1f %%" perc
+    else
+        string(p.current, "/",  p.max)
+    end
+    termwidth = @something termwidth displaysize(io)[2]
+    max_progress_width = max(0, min(termwidth - textwidth(p.header) - textwidth(progress_text) - 10 , p.width))
+    n_filled = floor(Int, max_progress_width * perc / 100)
+    partial_filled = (max_progress_width * perc / 100) - n_filled
+    n_left = max_progress_width - n_filled
+    headers = split(p.header, ' ')
+    to_print = sprint(; context=io) do io
+        print(io, " "^p.indent)
+        printstyled(io, headers[1], " "; color=:green, bold=true)
+        printstyled(io, join(headers[2:end], ' '))
+        print(io, " ")
+        printstyled(io, "━"^n_filled; color=p.color)
+        if n_left > 0
+            if partial_filled > 0.5
+                printstyled(io, "╸"; color=p.color) # More filled, use ╸
+            else
+                printstyled(io, "╺"; color=:light_black) # Less filled, use ╺
+            end
+            printstyled(io, "━"^(n_left-1); color=:light_black)
+        end
+        printstyled(io, " "; color=:light_black)
+        print(io, progress_text)
+        carriagereturn && print(io, "\r")
+    end
+    # Print everything in one call
+    print(io, to_print)
+end
+
+function end_progress(io, p::MiniProgressBar)
+    ansi_enablecursor = "\e[?25h"
+    ansi_clearline = "\e[2K"
+    print(io, ansi_enablecursor * ansi_clearline)
+end
+
+function print_progress_bottom(io::IO)
+    ansi_clearline = "\e[2K"
+    ansi_movecol1 = "\e[1G"
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    print(io, "\e[S" * ansi_moveup(1) * ansi_clearline * ansi_movecol1)
+end
+
+
+############
+struct PkgPrecompileError <: Exception
+    msg::String
+end
+Base.showerror(io::IO, err::PkgPrecompileError) = print(io, err.msg)
+Base.showerror(io::IO, err::PkgPrecompileError, bt; kw...) = Base.showerror(io, err) # hide stacktrace
+
+# This needs a show method to make `julia> err` show nicely
+Base.show(io::IO, err::PkgPrecompileError) = print(io, "PkgPrecompileError: ", err.msg)
+
+import Base: StaleCacheKey
+
+can_fancyprint(io::IO) = io isa Base.TTY && (get(ENV, "CI", nothing) != "true")
+
+function printpkgstyle(io, header, msg; color=:green)
+    printstyled(io, header; color, bold=true)
+    println(io, " ", msg)
+end
+
+const Config = Pair{Cmd, Base.CacheFlags}
+const PkgConfig = Tuple{PkgId,Config}
+
+# name or parent → ext
+function full_name(ext_to_parent::Dict{PkgId, PkgId}, pkg::PkgId)
+    if haskey(ext_to_parent, pkg)
+        return string(ext_to_parent[pkg].name, " → ", pkg.name)
+    else
+        return pkg.name
+    end
+end
+
+function excluded_circular_deps_explanation(io::IOContext{IO}, ext_to_parent::Dict{PkgId, PkgId}, circular_deps, cycles)
+    outer_deps = copy(circular_deps)
+    cycles_names = ""
+    for cycle in cycles
+        filter!(!in(cycle), outer_deps)
+        cycle_str = ""
+        for (i, pkg) in enumerate(cycle)
+            j = max(0, i - 1)
+            if length(cycle) == 1
+                line = " ─ "
+            elseif i == 1
+                line = " ┌ "
+            elseif i < length(cycle)
+                line = " │ " * " " ^j
+            else
+                line = " └" * "─" ^j * " "
+            end
+            hascolor = get(io, :color, false)::Bool
+            line = _color_string(line, :light_black, hascolor) * full_name(ext_to_parent, pkg) * "\n"
+            cycle_str *= line
+        end
+        cycles_names *= cycle_str
+    end
+    plural1 = length(cycles) > 1 ? "these cycles" : "this cycle"
+    plural2 = length(cycles) > 1 ? "cycles" : "cycle"
+    msg = """Circular dependency detected.
+    Precompilation will be skipped for dependencies in $plural1:
+    $cycles_names"""
+    if !isempty(outer_deps)
+        msg *= "Precompilation will also be skipped for the following, which depend on the above $plural2:\n"
+        msg *= join(("  " * full_name(ext_to_parent, pkg) for pkg in outer_deps), "\n")
+    end
+    return msg
+end
+
+function precompilepkgs(pkgs::Vector{String}=String[];
+                        internal_call::Bool=false,
+                        strict::Bool = false,
+                        warn_loaded::Bool = true,
+                        timing::Bool = false,
+                        _from_loading::Bool=false,
+                        configs::Union{Config,Vector{Config}}=(``=>Base.CacheFlags()),
+                        io::IO=stderr,
+                        # asking for timing disables fancy mode, as timing is shown in non-fancy mode
+                        fancyprint::Bool = can_fancyprint(io) && !timing,
+                        manifest::Bool=false,
+                        ignore_loaded::Bool=true)
+    # monomorphize this to avoid latency problems
+    _precompilepkgs(pkgs, internal_call, strict, warn_loaded, timing, _from_loading,
+                   configs isa Vector{Config} ? configs : [configs],
+                   IOContext{IO}(io), fancyprint, manifest, ignore_loaded)
+end
+
+function _precompilepkgs(pkgs::Vector{String},
+                         internal_call::Bool,
+                         strict::Bool,
+                         warn_loaded::Bool,
+                         timing::Bool,
+                         _from_loading::Bool,
+                         configs::Vector{Config},
+                         io::IOContext{IO},
+                         fancyprint::Bool,
+                         manifest::Bool,
+                         ignore_loaded::Bool)
+    requested_pkgs = copy(pkgs) # for understanding user intent
+
+    time_start = time_ns()
+
+    env = ExplicitEnv()
+
+    # Windows sometimes hits a ReadOnlyMemoryError, so we halve the default number of tasks. Issue #2323
+    # TODO: Investigate why this happens in windows and restore the full task limit
+    default_num_tasks = Sys.iswindows() ? div(Sys.CPU_THREADS::Int, 2) + 1 : Sys.CPU_THREADS::Int + 1
+    default_num_tasks = min(default_num_tasks, 16) # limit for better stability on shared resource systems
+
+    num_tasks = parse(Int, get(ENV, "JULIA_NUM_PRECOMPILE_TASKS", string(default_num_tasks)))
+    parallel_limiter = Base.Semaphore(num_tasks)
+
+    if _from_loading && !Sys.isinteractive() && Base.get_bool_env("JULIA_TESTS", false)
+        # suppress passive loading printing in julia test suite. `JULIA_TESTS` is set in Base.runtests
+        io = IOContext{IO}(devnull)
+    end
+
+    nconfigs = length(configs)
+    hascolor = get(io, :color, false)::Bool
+    color_string(cstr::String, col::Union{Int64, Symbol}) = _color_string(cstr, col, hascolor)
+
+    stale_cache = Dict{StaleCacheKey, Bool}()
+    cachepath_cache = Dict{PkgId, Vector{String}}()
+
+    # a map from packages/extensions to their direct deps
+    direct_deps = Dict{Base.PkgId, Vector{Base.PkgId}}()
+    # a map from parent → extension, including all extensions that are loadable
+    # in the current environment (i.e. their triggers are present)
+    parent_to_exts = Dict{Base.PkgId, Vector{Base.PkgId}}()
+    # inverse map of `parent_to_ext` above (ext → parent)
+    ext_to_parent = Dict{Base.PkgId, Base.PkgId}()
+
+    function describe_pkg(pkg::PkgId, is_project_dep::Bool, flags::Cmd, cacheflags::Base.CacheFlags)
+        name = full_name(ext_to_parent, pkg)
+        name = is_project_dep ? name : color_string(name, :light_black)
+        if nconfigs > 1 && !isempty(flags)
+            config_str = join(flags, " ")
+            name *= color_string(" `$config_str`", :light_black)
+        end
+        if nconfigs > 1
+            config_str = join(Base.translate_cache_flags(cacheflags, Base.DefaultCacheFlags), " ")
+            name *= color_string(" $config_str", :light_black)
+        end
+        return name
+    end
+
+    triggers = Dict{Base.PkgId,Vector{Base.PkgId}}()
+    for (dep, deps) in env.deps
+        pkg = Base.PkgId(dep, env.names[dep])
+        Base.in_sysimage(pkg) && continue
+        deps = [Base.PkgId(x, env.names[x]) for x in deps]
+        direct_deps[pkg] = filter!(!Base.in_sysimage, deps)
+        for (ext_name, trigger_uuids) in env.extensions[dep]
+            ext_uuid = Base.uuid5(pkg.uuid, ext_name)
+            ext = Base.PkgId(ext_uuid, ext_name)
+            triggers[ext] = Base.PkgId[pkg] # depends on parent package
+            all_triggers_available = true
+            for trigger_uuid in trigger_uuids
+                trigger_name = env.names[trigger_uuid]
+                if trigger_uuid in keys(env.deps)
+                    push!(triggers[ext], Base.PkgId(trigger_uuid, trigger_name))
+                else
+                    all_triggers_available = false
+                    break
+                end
+            end
+            all_triggers_available || continue
+            ext_to_parent[ext] = pkg
+            direct_deps[ext] = filter(!Base.in_sysimage, triggers[ext])
+
+            if !haskey(parent_to_exts, pkg)
+                parent_to_exts[pkg] = Base.PkgId[ext]
+            else
+                push!(parent_to_exts[pkg], ext)
+            end
+        end
+    end
+
+    project_deps = [
+        Base.PkgId(uuid, name)
+        for (name, uuid) in env.project_deps if !Base.in_sysimage(Base.PkgId(uuid, name))
+    ]
+
+    # consider exts of project deps to be project deps so that errors are reported
+    append!(project_deps, keys(filter(d->last(d).name in keys(env.project_deps), ext_to_parent)))
+
+    @debug "precompile: deps collected"
+
+    # An extension effectively depends on another extension if it has a strict superset of its triggers
+    for ext_a in keys(ext_to_parent)
+        for ext_b in keys(ext_to_parent)
+            if triggers[ext_a] ⊋ triggers[ext_b]
+                push!(direct_deps[ext_a], ext_b)
+            end
+        end
+    end
+
+    # A package depends on an extension if it (indirectly) depends on all extension triggers
+    function expand_indirect_dependencies(direct_deps)
+        function visit!(visited, node, all_deps)
+            if node in visited
+                return
+            end
+            push!(visited, node)
+            for dep in get(Set{Base.PkgId}, direct_deps, node)
+                if !(dep in all_deps)
+                    push!(all_deps, dep)
+                    visit!(visited, dep, all_deps)
+                end
+            end
+        end
+
+        indirect_deps = Dict{Base.PkgId, Set{Base.PkgId}}()
+        for package in keys(direct_deps)
+            # Initialize a set to keep track of all dependencies for 'package'
+            all_deps = Set{Base.PkgId}()
+            visited = Set{Base.PkgId}()
+            visit!(visited, package, all_deps)
+            # Update direct_deps with the complete set of dependencies for 'package'
+            indirect_deps[package] = all_deps
+        end
+        return indirect_deps
+    end
+
+    # this loop must be run after the full direct_deps map has been populated
+    indirect_deps = expand_indirect_dependencies(direct_deps)
+    for ext in keys(ext_to_parent)
+        ext_loadable_in_pkg = Dict{Base.PkgId,Bool}()
+        for pkg in keys(direct_deps)
+            is_trigger = in(pkg, direct_deps[ext])
+            is_extension = in(pkg, keys(ext_to_parent))
+            has_triggers = issubset(direct_deps[ext], indirect_deps[pkg])
+            ext_loadable_in_pkg[pkg] = !is_extension && has_triggers && !is_trigger
+        end
+        for (pkg, ext_loadable) in ext_loadable_in_pkg
+            if ext_loadable && !any((dep)->ext_loadable_in_pkg[dep], direct_deps[pkg])
+                # add an edge if the extension is loadable by pkg, and was not loadable in any
+                # of the pkg's dependencies
+                push!(direct_deps[pkg], ext)
+            end
+        end
+    end
+    @debug "precompile: extensions collected"
+
+    # return early if no deps
+    if isempty(direct_deps)
+        if isempty(pkgs)
+            return
+        elseif _from_loading
+            # if called from loading precompilation it may be a package from another environment stack so
+            # don't error and allow serial precompilation to try
+            # TODO: actually handle packages from other envs in the stack
+            return
+        else
+            error("No direct dependencies outside of the sysimage found matching $(pkgs)")
+        end
+    end
+
+    # initialize signalling
+    started = Dict{PkgConfig,Bool}()
+    was_processed = Dict{PkgConfig,Base.Event}()
+    was_recompiled = Dict{PkgConfig,Bool}()
+    for config in configs
+        for pkgid in keys(direct_deps)
+            pkg_config = (pkgid, config)
+            started[pkg_config] = false
+            was_processed[pkg_config] = Base.Event()
+            was_recompiled[pkg_config] = false
+        end
+    end
+    @debug "precompile: signalling initialized"
+
+    # find and guard against circular deps
+    cycles = Vector{Base.PkgId}[]
+    # For every scanned package, true if pkg found to be in a cycle
+    # or depends on packages in a cycle and false otherwise.
+    could_be_cycle = Dict{Base.PkgId, Bool}()
+    # temporary stack for the SCC-like algorithm below
+    stack = Base.PkgId[]
+    function scan_pkg!(pkg, dmap)
+        if haskey(could_be_cycle, pkg)
+            return could_be_cycle[pkg]
+        else
+            return scan_deps!(pkg, dmap)
+        end
+    end
+    function scan_deps!(pkg, dmap)
+        push!(stack, pkg)
+        cycle = nothing
+        for dep in dmap[pkg]
+            if dep in stack
+                # Created fresh cycle
+                cycle′ = stack[findlast(==(dep), stack):end]
+                if cycle === nothing || length(cycle′) < length(cycle)
+                    cycle = cycle′ # try to report smallest cycle possible
+                end
+            elseif scan_pkg!(dep, dmap)
+                # Reaches an existing cycle
+                could_be_cycle[pkg] = true
+                pop!(stack)
+                return true
+            end
+        end
+        pop!(stack)
+        if cycle !== nothing
+            push!(cycles, cycle)
+            could_be_cycle[pkg] = true
+            return true
+        end
+        could_be_cycle[pkg] = false
+        return false
+    end
+    # set of packages that depend on a cycle (either because they are
+    # a part of a cycle themselves or because they transitively depend
+    # on a package in some cycle)
+    circular_deps = Base.PkgId[]
+    for pkg in keys(direct_deps)
+        @assert isempty(stack)
+        if scan_pkg!(pkg, direct_deps)
+            push!(circular_deps, pkg)
+            for pkg_config in keys(was_processed)
+                # notify all to allow skipping
+                pkg_config[1] == pkg && notify(was_processed[pkg_config])
+            end
+        end
+    end
+    if !isempty(circular_deps)
+        @warn excluded_circular_deps_explanation(io, ext_to_parent, circular_deps, cycles)
+    end
+    @debug "precompile: circular dep check done"
+
+    if !manifest
+        if isempty(pkgs)
+            pkgs = [pkg.name for pkg in project_deps]
+        end
+        # restrict to dependencies of given packages
+        function collect_all_deps(direct_deps, dep, alldeps=Set{Base.PkgId}())
+            for _dep in direct_deps[dep]
+                if !(_dep in alldeps)
+                    push!(alldeps, _dep)
+                    collect_all_deps(direct_deps, _dep, alldeps)
+                end
+            end
+            return alldeps
+        end
+        keep = Set{Base.PkgId}()
+        for dep in direct_deps
+            dep_pkgid = first(dep)
+            if dep_pkgid.name in pkgs
+                push!(keep, dep_pkgid)
+                collect_all_deps(direct_deps, dep_pkgid, keep)
+            end
+        end
+        for ext in keys(ext_to_parent)
+            if issubset(collect_all_deps(direct_deps, ext), keep) # if all extension deps are kept
+                push!(keep, ext)
+            end
+        end
+        filter!(d->in(first(d), keep), direct_deps)
+        if isempty(direct_deps)
+            if _from_loading
+                # if called from loading precompilation it may be a package from another environment stack so
+                # don't error and allow serial precompilation to try
+                # TODO: actually handle packages from other envs in the stack
+                return
+            else
+                return
+            end
+        end
+    end
+
+    target = nothing
+    if nconfigs == 1
+        if !isempty(only(configs)[1])
+            target = "for configuration $(join(only(configs)[1], " "))"
+        end
+    else
+        target = "for $nconfigs compilation configurations..."
+    end
+    @debug "precompile: packages filtered"
+
+    pkg_queue = PkgConfig[]
+    failed_deps = Dict{PkgConfig, String}()
+    precomperr_deps = PkgConfig[] # packages that may succeed after a restart (i.e. loaded packages with no cache file)
+
+    print_lock = io.io isa Base.LibuvStream ? io.io.lock::ReentrantLock : ReentrantLock()
+    first_started = Base.Event()
+    printloop_should_exit::Bool = !fancyprint # exit print loop immediately if not fancy printing
+    interrupted_or_done = Base.Event()
+
+    ansi_moveup(n::Int) = string("\e[", n, "A")
+    ansi_movecol1 = "\e[1G"
+    ansi_cleartoend = "\e[0J"
+    ansi_cleartoendofline = "\e[0K"
+    ansi_enablecursor = "\e[?25h"
+    ansi_disablecursor = "\e[?25l"
+    n_done::Int = 0
+    n_already_precomp::Int = 0
+    n_loaded::Int = 0
+    interrupted = false
+
+    function handle_interrupt(err, in_printloop = false)
+        notify(interrupted_or_done)
+        in_printloop || wait(t_print) # wait to let the print loop cease first
+        if err isa InterruptException
+            lock(print_lock) do
+                println(io, " Interrupted: Exiting precompilation...", ansi_cleartoendofline)
+            end
+            interrupted = true
+            return true
+        else
+            return false
+        end
+    end
+    std_outputs = Dict{PkgConfig,IOBuffer}()
+    taskwaiting = Set{PkgConfig}()
+    pkgspidlocked = Dict{PkgConfig,String}()
+    pkg_liveprinted = nothing
+
+    function monitor_std(pkg_config, pipe; single_requested_pkg=false)
+        pkg, config = pkg_config
+        try
+            liveprinting = false
+            while !eof(pipe)
+                str = readline(pipe, keep=true)
+                if single_requested_pkg && (liveprinting || !isempty(str))
+                    lock(print_lock) do
+                        if !liveprinting
+                            printpkgstyle(io, :Info, "Given $(pkg.name) was explicitly requested, output will be shown live $ansi_cleartoendofline",
+                                color = Base.info_color())
+                            liveprinting = true
+                            pkg_liveprinted = pkg
+                        end
+                        print(io, ansi_cleartoendofline, str)
+                    end
+                end
+                write(get!(IOBuffer, std_outputs, pkg_config), str)
+                if !in(pkg_config, taskwaiting) && occursin("waiting for IO to finish", str)
+                    !fancyprint && lock(print_lock) do
+                        println(io, pkg.name, color_string(" Waiting for background task / IO / timer.", Base.warn_color()))
+                    end
+                    push!(taskwaiting, pkg_config)
+                end
+                if !fancyprint && in(pkg_config, taskwaiting)
+                    lock(print_lock) do
+                        print(io, str)
+                    end
+                end
+            end
+        catch err
+            err isa InterruptException || rethrow()
+        end
+    end
+
+    ## fancy print loop
+    t_print = @async begin
+        try
+            wait(first_started)
+            (isempty(pkg_queue) || interrupted_or_done.set) && return
+            lock(print_lock) do
+                if target !== nothing
+                    printpkgstyle(io, :Precompiling, target)
+                end
+                if fancyprint
+                    print(io, ansi_disablecursor)
+                end
+            end
+            t = Timer(0; interval=1/10)
+            anim_chars = ["◐","◓","◑","◒"]
+            i = 1
+            last_length = 0
+            bar = MiniProgressBar(; indent=0, header = "Precompiling packages ", color = :green, percentage=false, always_reprint=true)
+            n_total = length(direct_deps) * length(configs)
+            bar.max = n_total - n_already_precomp
+            final_loop = false
+            n_print_rows = 0
+            while !printloop_should_exit
+                lock(print_lock) do
+                    term_size = displaysize(io)
+                    num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps
+                    pkg_queue_show = if !interrupted_or_done.set && length(pkg_queue) > num_deps_show
+                        last(pkg_queue, num_deps_show)
+                    else
+                        pkg_queue
+                    end
+                    str_ = sprint() do iostr
+                        if i > 1
+                            print(iostr, ansi_cleartoend)
+                        end
+                        bar.current = n_done - n_already_precomp
+                        bar.max = n_total - n_already_precomp
+                        # when sizing to the terminal width subtract a little to give some tolerance to resizing the
+                        # window between print cycles
+                        termwidth = displaysize(io)[2] - 4
+                        if !final_loop
+                            str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io)
+                            print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n")
+                        end
+                        for pkg_config in pkg_queue_show
+                            dep, config = pkg_config
+                            loaded = warn_loaded && haskey(Base.loaded_modules, dep)
+                            flags, cacheflags = config
+                            name = describe_pkg(dep, dep in project_deps, flags, cacheflags)
+                            line = if pkg_config in precomperr_deps
+                                string(color_string("  ? ", Base.warn_color()), name)
+                            elseif haskey(failed_deps, pkg_config)
+                                string(color_string("  ✗ ", Base.error_color()), name)
+                            elseif was_recompiled[pkg_config]
+                                !loaded && interrupted_or_done.set && continue
+                                loaded || @async begin # keep successful deps visible for short period
+                                    sleep(1);
+                                    filter!(!isequal(pkg_config), pkg_queue)
+                                end
+                                string(color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                            elseif started[pkg_config]
+                                # Offset each spinner animation using the first character in the package name as the seed.
+                                # If not offset, on larger terminal fonts it looks odd that they all sync-up
+                                anim_char = anim_chars[(i + Int(dep.name[1])) % length(anim_chars) + 1]
+                                anim_char_colored = dep in project_deps ? anim_char : color_string(anim_char, :light_black)
+                                waiting = if haskey(pkgspidlocked, pkg_config)
+                                    who_has_lock = pkgspidlocked[pkg_config]
+                                    color_string(" Being precompiled by $(who_has_lock)", Base.info_color())
+                                elseif pkg_config in taskwaiting
+                                    color_string(" Waiting for background task / IO / timer. Interrupt to inspect", Base.warn_color())
+                                else
+                                    ""
+                                end
+                                string("  ", anim_char_colored, " ", name, waiting)
+                            else
+                                string("    ", name)
+                            end
+                            println(iostr, Base._truncate_at_width_or_chars(true, line, termwidth))
+                        end
+                    end
+                    last_length = length(pkg_queue_show)
+                    n_print_rows = count("\n", str_)
+                    print(io, str_)
+                    printloop_should_exit = interrupted_or_done.set && final_loop
+                    final_loop = interrupted_or_done.set # ensures one more loop to tidy last task after finish
+                    i += 1
+                    printloop_should_exit || print(io, ansi_moveup(n_print_rows), ansi_movecol1)
+                end
+                wait(t)
+            end
+        catch err
+            handle_interrupt(err, true) || rethrow()
+        finally
+            fancyprint && print(io, ansi_enablecursor)
+        end
+    end
+    tasks = Task[]
+    if !_from_loading
+        Base.LOADING_CACHE[] = Base.LoadingCache()
+    end
+    @debug "precompile: starting precompilation loop" direct_deps project_deps
+    ## precompilation loop
+
+    for (pkg, deps) in direct_deps
+        cachepaths = get!(() -> Base.find_all_in_cache_path(pkg), cachepath_cache, pkg)
+        sourcepath = Base.locate_package(pkg)
+        single_requested_pkg = length(requested_pkgs) == 1 && only(requested_pkgs) == pkg.name
+        for config in configs
+            pkg_config = (pkg, config)
+            if sourcepath === nothing
+                failed_deps[pkg_config] = "Error: Missing source file for $(pkg)"
+                notify(was_processed[pkg_config])
+                continue
+            end
+            # Heuristic for when precompilation is disabled
+            if occursin(r"\b__precompile__\(\s*false\s*\)", read(sourcepath, String))
+                notify(was_processed[pkg_config])
+                continue
+            end
+            flags, cacheflags = config
+            task = @async begin
+                try
+                    loaded = haskey(Base.loaded_modules, pkg)
+                    for dep in deps # wait for deps to finish
+                        wait(was_processed[(dep,config)])
+                    end
+                    circular = pkg in circular_deps
+                    is_stale = !Base.isprecompiled(pkg; ignore_loaded, stale_cache, cachepath_cache, cachepaths, sourcepath, flags=cacheflags)
+                    if !circular && is_stale
+                        Base.acquire(parallel_limiter)
+                        is_project_dep = pkg in project_deps
+
+                        # std monitoring
+                        std_pipe = Base.link_pipe!(Pipe(); reader_supports_async=true, writer_supports_async=true)
+                        t_monitor = @async monitor_std(pkg_config, std_pipe; single_requested_pkg)
+
+                        name = describe_pkg(pkg, is_project_dep, flags, cacheflags)
+                        lock(print_lock) do
+                            if !fancyprint && isempty(pkg_queue)
+                                printpkgstyle(io, :Precompiling, something(target, "packages..."))
+                            end
+                        end
+                        push!(pkg_queue, pkg_config)
+                        started[pkg_config] = true
+                        fancyprint && notify(first_started)
+                        if interrupted_or_done.set
+                            notify(was_processed[pkg_config])
+                            Base.release(parallel_limiter)
+                            return
+                        end
+                        try
+                            # allows processes to wait if another process is precompiling a given package to
+                            # a functionally identical package cache (except for preferences, which may differ)
+                            t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor, parallel_limiter, ignore_loaded) do
+                                Base.with_logger(Base.NullLogger()) do
+                                    # whether to respect already loaded dependency versions
+                                    keep_loaded_modules = !ignore_loaded
+                                    # for extensions, any extension in our direct dependencies is one we have a right to load
+                                    # for packages, we may load any extension (all possible triggers are accounted for above)
+                                    loadable_exts = haskey(ext_to_parent, pkg) ? filter((dep)->haskey(ext_to_parent, dep), direct_deps[pkg]) : nothing
+                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, keep_loaded_modules;
+                                                      flags, cacheflags, loadable_exts)
+                                end
+                            end
+                            if ret isa Base.PrecompilableError
+                                push!(precomperr_deps, pkg_config)
+                                !fancyprint && lock(print_lock) do
+                                    println(io, _timing_string(t), color_string("  ? ", Base.warn_color()), name)
+                                end
+                            else
+                                !fancyprint && lock(print_lock) do
+                                    println(io, _timing_string(t), color_string("  ✓ ", loaded ? Base.warn_color() : :green), name)
+                                end
+                                was_recompiled[pkg_config] = true
+                            end
+                            loaded && (n_loaded += 1)
+                        catch err
+                            # @show err
+                            close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            if err isa ErrorException || (err isa ArgumentError && startswith(err.msg, "Invalid header in cache file"))
+                                errmsg = String(take!(get(IOBuffer, std_outputs, pkg_config)))
+                                delete!(std_outputs, pkg_config) # so it's not shown as warnings, given error report
+                                failed_deps[pkg_config] = (strict || is_project_dep) ? string(sprint(showerror, err), "\n", strip(errmsg)) : ""
+                                !fancyprint && lock(print_lock) do
+                                    println(io, " "^9, color_string("  ✗ ", Base.error_color()), name)
+                                end
+                            else
+                                rethrow()
+                            end
+                        finally
+                            isopen(std_pipe.in) && close(std_pipe.in) # close pipe to end the std output monitor
+                            wait(t_monitor)
+                            Base.release(parallel_limiter)
+                        end
+                    else
+                        is_stale || (n_already_precomp += 1)
+                    end
+                    n_done += 1
+                    notify(was_processed[pkg_config])
+                catch err_outer
+                    # For debugging:
+                    # println("Task failed $err_outer")
+                    # Base.display_error(ErrorException(""), Base.catch_backtrace())# logging doesn't show here
+                    handle_interrupt(err_outer) || rethrow()
+                    notify(was_processed[pkg_config])
+                finally
+                    filter!(!istaskdone, tasks)
+                    length(tasks) == 1 && notify(interrupted_or_done)
+                end
+            end
+            Base.errormonitor(task) # interrupts are handled separately so ok to watch for other errors like this
+            push!(tasks, task)
+        end
+    end
+    isempty(tasks) && notify(interrupted_or_done)
+    try
+        wait(interrupted_or_done)
+    catch err
+        handle_interrupt(err) || rethrow()
+    finally
+        Base.LOADING_CACHE[] = nothing
+    end
+    notify(first_started) # in cases of no-op or !fancyprint
+    fancyprint && wait(t_print)
+    quick_exit = !all(istaskdone, tasks) || interrupted # if some not finished internal error is likely
+    seconds_elapsed = round(Int, (time_ns() - time_start) / 1e9)
+    ndeps = count(values(was_recompiled))
+    if ndeps > 0 || !isempty(failed_deps) || (quick_exit && !isempty(std_outputs))
+        str = sprint(context=io) do iostr
+            if !quick_exit
+                if fancyprint # replace the progress bar
+                    what = isempty(requested_pkgs) ? "packages finished." : "$(join(requested_pkgs, ", ", " and ")) finished."
+                    printpkgstyle(iostr, :Precompiling, what)
+                end
+                plural = length(configs) > 1 ? "dependency configurations" : ndeps == 1 ? "dependency" : "dependencies"
+                print(iostr, "  $(ndeps) $(plural) successfully precompiled in $(seconds_elapsed) seconds")
+                if n_already_precomp > 0 || !isempty(circular_deps)
+                    n_already_precomp > 0 && (print(iostr, ". $n_already_precomp already precompiled"))
+                    !isempty(circular_deps) && (print(iostr, ". $(length(circular_deps)) skipped due to circular dependency"))
+                    print(iostr, ".")
+                end
+                if n_loaded > 0
+                    plural1 = length(configs) > 1 ? "dependency configurations" : n_loaded == 1 ? "dependency" : "dependencies"
+                    plural2 = n_loaded == 1 ? "a different version is" : "different versions are"
+                    plural3 = n_loaded == 1 ? "" : "s"
+                    plural4 = n_loaded == 1 ? "this package" : "these packages"
+                    print(iostr, "\n  ",
+                        color_string(string(n_loaded), Base.warn_color()),
+                        " $(plural1) precompiled but ",
+                        color_string("$(plural2) currently loaded", Base.warn_color()),
+                        ". Restart julia to access the new version$(plural3). \
+                        Otherwise, loading dependents of $(plural4) may trigger further precompilation to work with the unexpected version$(plural3)."
+                    )
+                end
+                if !isempty(precomperr_deps)
+                    pluralpc = length(configs) > 1 ? "dependency configurations" : precomperr_deps == 1 ? "dependency" : "dependencies"
+                    print(iostr, "\n  ",
+                        color_string(string(length(precomperr_deps)), Base.warn_color()),
+                        " $(pluralpc) failed but may be precompilable after restarting julia"
+                    )
+                end
+            end
+            # show any stderr output, even if Pkg.precompile has been interrupted (quick_exit=true), given user may be
+            # interrupting a hanging precompile job with stderr output. julia#48371
+            let std_outputs = Tuple{PkgConfig,SubString{String}}[(pkg_config, strip(String(take!(io)))) for (pkg_config,io) in std_outputs]
+                filter!(kv -> !isempty(last(kv)), std_outputs)
+                if !isempty(std_outputs)
+                    plural1 = length(std_outputs) == 1 ? "y" : "ies"
+                    plural2 = length(std_outputs) == 1 ? "" : "s"
+                    print(iostr, "\n  ", color_string("$(length(std_outputs))", Base.warn_color()), " dependenc$(plural1) had output during precompilation:")
+                    for (pkg_config, err) in std_outputs
+                        pkg, config = pkg_config
+                        err = if pkg == pkg_liveprinted
+                            "[Output was shown above]"
+                        else
+                            join(split(err, "\n"), color_string("\n│  ", Base.warn_color()))
+                        end
+                        name = full_name(ext_to_parent, pkg)
+                        print(iostr, color_string("\n┌ ", Base.warn_color()), name, color_string("\n│  ", Base.warn_color()), err, color_string("\n└  ", Base.warn_color()))
+                    end
+                end
+            end
+        end
+        let str=str
+            lock(print_lock) do
+                println(io, str)
+            end
+        end
+        quick_exit && return
+        err_str = IOBuffer()
+        n_direct_errs = 0
+        for (pkg_config, err) in failed_deps
+            dep, config = pkg_config
+            if strict || (dep in project_deps)
+                print(err_str, "\n", dep.name, " ")
+                for cfg in config[1]
+                    print(err_str, cfg, " ")
+                end
+                print(err_str, "\n\n", err)
+                n_direct_errs > 0 && write(err_str, "\n")
+                n_direct_errs += 1
+            end
+        end
+        if position(err_str) > 0
+            skip(err_str, -1)
+            truncate(err_str, position(err_str))
+            pluralde = n_direct_errs == 1 ? "y" : "ies"
+            direct = strict ? "" : "direct "
+            err_msg = "The following $n_direct_errs $(direct)dependenc$(pluralde) failed to precompile:\n$(String(take!(err_str)))"
+            if internal_call # aka. auto-precompilation
+                if isinteractive()
+                    plural1 = length(failed_deps) == 1 ? "y" : "ies"
+                    println(io, "  ", color_string("$(length(failed_deps))", Base.error_color()), " dependenc$(plural1) errored.")
+                    println(io, "  For a report of the errors see `julia> err`. To retry use `pkg> precompile`")
+                    setglobal!(Base.MainInclude, :err, PkgPrecompileError(err_msg))
+                else
+                    # auto-precompilation shouldn't throw but if the user can't easily access the
+                    # error messages, just show them
+                    print(io, "\n", err_msg)
+                end
+            else
+                println(io)
+                throw(PkgPrecompileError(err_msg))
+            end
+        end
+    end
+    nothing
+end
+
+_timing_string(t) = string(lpad(round(t * 1e3, digits = 1), 9), " ms")
+
+function _color_string(cstr::String, col::Union{Int64, Symbol}, hascolor)
+    if hascolor
+        enable_ansi  = get(Base.text_colors, col, Base.text_colors[:default])
+        disable_ansi = get(Base.disable_text_style, col, Base.text_colors[:default])
+        return string(enable_ansi, cstr, disable_ansi)
+    else
+        return cstr
+    end
+end
+
+# Can be merged with `maybe_cachefile_lock` in loading?
+function precompile_pkgs_maybe_cachefile_lock(f, io::IO, print_lock::ReentrantLock, fancyprint::Bool, pkg_config, pkgspidlocked, hascolor, parallel_limiter::Base.Semaphore, ignore_loaded::Bool)
+    if !(isdefined(Base, :mkpidlock_hook) && isdefined(Base, :trymkpidlock_hook) && Base.isdefined(Base, :parse_pidfile_hook))
+        return f()
+    end
+    pkg, config = pkg_config
+    flags, cacheflags = config
+    stale_age = Base.compilecache_pidlock_stale_age
+    pidfile = Base.compilecache_pidfile_path(pkg, flags=cacheflags)
+    cachefile = @invokelatest Base.trymkpidlock_hook(f, pidfile; stale_age)
+    if cachefile === false
+        pid, hostname, age = @invokelatest Base.parse_pidfile_hook(pidfile)
+        pkgspidlocked[pkg_config] = if isempty(hostname) || hostname == gethostname()
+            if pid == getpid()
+                "an async task in this process (pidfile: $pidfile)"
+            else
+                "another process (pid: $pid, pidfile: $pidfile)"
+            end
+        else
+            "another machine (hostname: $hostname, pid: $pid, pidfile: $pidfile)"
+        end
+        !fancyprint && lock(print_lock) do
+            println(io, "    ", pkg.name, _color_string(" Being precompiled by $(pkgspidlocked[pkg_config])", Base.info_color(), hascolor))
+        end
+        Base.release(parallel_limiter) # release so other work can be done while waiting
+        try
+            # wait until the lock is available
+            @invokelatest Base.mkpidlock_hook(() -> begin
+                    # double-check in case the other process crashed or the lock expired
+                    if Base.isprecompiled(pkg; ignore_loaded, flags=cacheflags) # don't use caches for this as the env state will have changed
+                        return nothing # returning nothing indicates a process waited for another
+                    else
+                        delete!(pkgspidlocked, pkg_config)
+                        Base.acquire(f, parallel_limiter) # precompile
+                    end
+                end,
+                pidfile; stale_age)
+        finally
+            Base.acquire(parallel_limiter) # re-acquire so the outer release is balanced
+        end
+    end
+    return cachefile
+end
+
+end
diff --git a/base/process.jl b/base/process.jl
index ed51a30ae3ced..fbc4acfd83e80 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -6,11 +6,12 @@ mutable struct Process <: AbstractPipe
     in::IO
     out::IO
     err::IO
+    syncd::Vector{Task}
     exitcode::Int64
     termsignal::Int32
     exitnotify::ThreadSynchronizer
-    function Process(cmd::Cmd, handle::Ptr{Cvoid})
-        this = new(cmd, handle, devnull, devnull, devnull,
+    function Process(cmd::Cmd, handle::Ptr{Cvoid}, syncd::Vector{Task})
+        this = new(cmd, handle, devnull, devnull, devnull, syncd,
                    typemin(fieldtype(Process, :exitcode)),
                    typemin(fieldtype(Process, :termsignal)),
                    ThreadSynchronizer())
@@ -35,6 +36,15 @@ end
 pipe_reader(p::ProcessChain) = p.out
 pipe_writer(p::ProcessChain) = p.in
 
+# a lightweight pair of a child OS_HANDLE and associated Task that will
+# complete only after all content has been read from it for synchronizing
+# state without the kernel to aide
+struct SyncCloseFD
+    fd
+    t::Task
+end
+rawhandle(io::SyncCloseFD) = rawhandle(io.fd)
+
 # release ownership of the libuv handle
 function uvfinalize(proc::Process)
     if proc.handle != C_NULL
@@ -74,8 +84,8 @@ function _uv_hook_close(proc::Process)
     nothing
 end
 
-const SpawnIO  = Union{IO, RawFD, OS_HANDLE}
-const SpawnIOs = Vector{SpawnIO} # convenience name for readability
+const SpawnIO  = Union{IO, RawFD, OS_HANDLE, SyncCloseFD} # internal copy of Redirectable, removing FileRedirect and adding SyncCloseFD
+const SpawnIOs = Memory{SpawnIO} # convenience name for readability (used for dispatch also to clearly distinguish from Vector{Redirectable})
 
 function as_cpumask(cpus::Vector{UInt16})
     n = max(Int(maximum(cpus)), Int(ccall(:uv_cpumask_size, Cint, ())))
@@ -100,9 +110,11 @@ end
                 error("invalid spawn handle $h from $io")
             end
             for io in stdio]
+        syncd = Task[io.t for io in stdio if io isa SyncCloseFD]
         handle = Libc.malloc(_sizeof_uv_process)
         disassociate_julia_struct(handle)
         (; exec, flags, env, dir) = cmd
+        flags ⊻= UV_PROCESS_WINDOWS_DISABLE_EXACT_NAME # libuv inverts the default for this, so flip this bit now
         iolock_begin()
         err = ccall(:jl_spawn, Int32,
                   (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
@@ -117,7 +129,7 @@ end
             cpumask === nothing ? 0 : length(cpumask),
             @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
         if err == 0
-            pp = Process(cmd, handle)
+            pp = Process(cmd, handle, syncd)
             associate_julia_struct(handle, pp)
         else
             ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
@@ -130,23 +142,24 @@ end
     return pp
 end
 
-_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIO[])
+_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIOs())
 
-# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
-function _spawn(cmd::Cmd, stdios::SpawnIOs)
-    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+function _spawn(cmd::AbstractCmd, stdios::Vector{Redirectable})
     pp = setup_stdios(stdios) do stdios
-        return _spawn_primitive(cmd.exec[1], cmd, stdios)
+        return _spawn(cmd, stdios)
     end
     return pp
 end
 
+# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
+function _spawn(cmd::Cmd, stdios::SpawnIOs)
+    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+    return _spawn_primitive(cmd.exec[1], cmd, stdios)
+end
+
 # assume that having a ProcessChain means that the stdio are setup
 function _spawn(cmds::AbstractCmd, stdios::SpawnIOs)
-    pp = setup_stdios(stdios) do stdios
-        return _spawn(cmds, stdios, ProcessChain())
-    end
-    return pp
+    return _spawn(cmds, stdios, ProcessChain())
 end
 
 # helper function for making a copy of a SpawnIOs, with replacement
@@ -212,7 +225,7 @@ end
 
 
 # open the child end of each element of `stdios`, and initialize the parent end
-function setup_stdios(f, stdios::SpawnIOs)
+function setup_stdios(f, stdios::Vector{Redirectable})
     nstdio = length(stdios)
     open_io = SpawnIOs(undef, nstdio)
     close_io = falses(nstdio)
@@ -295,25 +308,26 @@ function setup_stdio(stdio::IO, child_readable::Bool)
     child = child_readable ? rd : wr
     try
         let in = (child_readable ? parent : stdio),
-            out = (child_readable ? stdio : parent)
-            @async try
+            out = (child_readable ? stdio : parent),
+            t = @async try
                 write(in, out)
             catch ex
                 @warn "Process I/O error" exception=(ex, catch_backtrace())
+                rethrow()
             finally
                 close(parent)
-                child_readable || closewrite(stdio)
             end
+            return (SyncCloseFD(child, t), true)
         end
     catch
         close_pipe_sync(child)
         rethrow()
     end
-    return (child, true)
 end
 
-close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
+close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
+close_stdio(stdio::SyncCloseFD) = close_stdio(stdio.fd)
 
 # INTERNAL
 # pad out stdio to have at least three elements,
@@ -325,19 +339,19 @@ close_stdio(stdio) = close(stdio)
 #   - An Filesystem.File or IOStream object to redirect the output to
 #   - A FileRedirect, containing a string specifying a filename to be opened for the child
 
-spawn_opts_swallow(stdios::StdIOSet) = SpawnIO[stdios...]
-spawn_opts_inherit(stdios::StdIOSet) = SpawnIO[stdios...]
+spawn_opts_swallow(stdios::StdIOSet) = Redirectable[stdios...]
+spawn_opts_inherit(stdios::StdIOSet) = Redirectable[stdios...]
 spawn_opts_swallow(in::Redirectable=devnull, out::Redirectable=devnull, err::Redirectable=devnull) =
-    SpawnIO[in, out, err]
+    Redirectable[in, out, err]
 # pass original descriptors to child processes by default, because we might
 # have already exhausted and closed the libuv object for our standard streams.
 # ref issue #8529
 spawn_opts_inherit(in::Redirectable=RawFD(0), out::Redirectable=RawFD(1), err::Redirectable=RawFD(2)) =
-    SpawnIO[in, out, err]
+    Redirectable[in, out, err]
 
 function eachline(cmd::AbstractCmd; keep::Bool=false)
     out = PipeEndpoint()
-    processes = _spawn(cmd, SpawnIO[devnull, out, stderr])
+    processes = _spawn(cmd, Redirectable[devnull, out, stderr])
     # if the user consumes all the data, also check process exit status for success
     ondone = () -> (success(processes) || pipeline_error(processes); nothing)
     return EachLine(out, keep=keep, ondone=ondone)::EachLine
@@ -385,20 +399,20 @@ function open(cmds::AbstractCmd, stdio::Redirectable=devnull; write::Bool=false,
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in read-write mode"))
         in = PipeEndpoint()
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, out, stderr])
+        processes = _spawn(cmds, Redirectable[in, out, stderr])
         processes.in = in
         processes.out = out
     elseif read
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[stdio, out, stderr])
+        processes = _spawn(cmds, Redirectable[stdio, out, stderr])
         processes.out = out
     elseif write
         in = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, stdio, stderr])
+        processes = _spawn(cmds, Redirectable[in, stdio, stderr])
         processes.in = in
     else
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in no-access mode"))
-        processes = _spawn(cmds, SpawnIO[devnull, devnull, stderr])
+        processes = _spawn(cmds, Redirectable[devnull, devnull, stderr])
     end
     return processes
 end
@@ -415,12 +429,18 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
     function waitkill(P::Union{Process,ProcessChain})
         close(P)
-        # 0.1 seconds after we hope it dies (from closing stdio),
-        # we kill the process with SIGTERM (15)
-        local t = Timer(0.1) do t
+        # shortly after we hope it starts cleanup and dies (from closing
+        # stdio), we kill the process with SIGTERM (15) so that we can proceed
+        # with throwing the error and hope it will exit soon from that
+        local t = Timer(2) do t
             process_running(P) && kill(P)
         end
-        wait(P)
+        # pass false to indicate that we do not care about data-races on the
+        # Julia stdio objects after this point, since we already know this is
+        # an error path and the state of them is fairly unpredictable anyways
+        # in that case. Since we closed P some of those should come crumbling
+        # down already, and we don't want to throw that error here either.
+        wait(P, false)
         close(t)
     end
     ret = try
@@ -430,10 +450,23 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
         rethrow()
     end
     close(P.in)
+    closestdio = @async begin
+        # wait for P to complete (including sync'd), then mark the output streams for EOF (if applicable to that stream type)
+        wait(P)
+        err = P.err
+        applicable(closewrite, err) && closewrite(err)
+        out = P.out
+        applicable(closewrite, out) && closewrite(out)
+        nothing
+    end
+    # now verify that the output stream is at EOF, and the user didn't fail to consume it successfully
+    # (we do not currently verify the user dealt with the stderr stream)
     if !(eof(P.out)::Bool)
         waitkill(P)
         throw(_UVError("open(do)", UV_EPIPE))
     end
+    # make sure to closestdio is completely done to avoid data-races later
+    wait(closestdio)
     success(P) || pipeline_error(P)
     return ret
 end
@@ -650,26 +683,31 @@ function process_status(s::Process)
            error("process status error")
 end
 
-function wait(x::Process)
-    process_exited(x) && return
-    iolock_begin()
+function wait(x::Process, syncd::Bool=true)
     if !process_exited(x)
-        preserve_handle(x)
-        lock(x.exitnotify)
-        iolock_end()
-        try
-            wait(x.exitnotify)
-        finally
-            unlock(x.exitnotify)
-            unpreserve_handle(x)
+        iolock_begin()
+        if !process_exited(x)
+            preserve_handle(x)
+            lock(x.exitnotify)
+            iolock_end()
+            try
+                wait(x.exitnotify)
+            finally
+                unlock(x.exitnotify)
+                unpreserve_handle(x)
+            end
+        else
+            iolock_end()
         end
-    else
-        iolock_end()
+    end
+    # and make sure all sync'd Tasks are complete too
+    syncd && for t in x.syncd
+        wait(t)
     end
     nothing
 end
 
-wait(x::ProcessChain) = foreach(wait, x.processes)
+wait(x::ProcessChain, syncd::Bool=true) = foreach(p -> wait(p, syncd), x.processes)
 
 show(io::IO, p::Process) = print(io, "Process(", p.cmd, ", ", process_status(p), ")")
 
diff --git a/base/promotion.jl b/base/promotion.jl
index 6e32bd7a42efa..72257f8ba5a3d 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -18,10 +18,12 @@ Number
 ```
 """
 typejoin() = Bottom
-typejoin(@nospecialize(t)) = t
-typejoin(@nospecialize(t), ts...) = (@_foldable_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t)) = (@_nospecializeinfer_meta; t)
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u)) = (@_foldable_meta; @_nospecializeinfer_meta; typejoin(typejoin(t, s), u))
+typejoin(@nospecialize(t), @nospecialize(s), @nospecialize(u), ts...) = (@_foldable_meta; @_nospecializeinfer_meta; afoldl(typejoin, typejoin(t, s, u), ts...))
 function typejoin(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
+    @_nospecializeinfer_meta
     if isa(a, TypeVar)
         return typejoin(a.ub, b)
     elseif isa(b, TypeVar)
@@ -90,9 +92,9 @@ function typejoin(@nospecialize(a), @nospecialize(b))
     elseif b <: Tuple
         return Any
     end
-    while b !== Any
+    while !(b === Any)
         if a <: b.name.wrapper
-            while a.name !== b.name
+            while !(a.name === b.name)
                 a = supertype(a)::DataType
             end
             if a.name === Type.body.name
@@ -119,7 +121,7 @@ function typejoin(@nospecialize(a), @nospecialize(b))
                     aprimary = aprimary::UnionAll
                     # pushfirst!(vars, aprimary.var)
                     _growbeg!(vars, 1)
-                    arrayset(false, vars, aprimary.var, 1)
+                    vars[1] = aprimary.var
                     aprimary = aprimary.body
                 end
             end
@@ -139,6 +141,7 @@ end
 #          (Core.Compiler.isnotbrokensubtype), use only simple types for `b`
 function typesplit(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
+    @_nospecializeinfer_meta
     if a <: b
         return Bottom
     end
@@ -196,16 +199,15 @@ end
 
 function typejoin_union_tuple(T::DataType)
     @_foldable_meta
-    u = Base.unwrap_unionall(T)
-    p = (u::DataType).parameters
-    lr = length(p)::Int
+    p = T.parameters
+    lr = length(p)
     if lr == 0
         return Tuple{}
     end
     c = Vector{Any}(undef, lr)
     for i = 1:lr
         pi = p[i]
-        U = Core.Compiler.unwrapva(pi)
+        U = unwrapva(pi)
         if U === Union{}
             ci = Union{}
         elseif U isa Union
@@ -215,7 +217,7 @@ function typejoin_union_tuple(T::DataType)
         else
             ci = promote_typejoin_union(U)
         end
-        if i == lr && Core.Compiler.isvarargtype(pi)
+        if i == lr && isvarargtype(pi)
             c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
         else
             c[i] = ci
@@ -239,7 +241,8 @@ function full_va_len(p::Core.SimpleVector)
 end
 
 # reduce typejoin over A[i:end]
-function tailjoin(A, i)
+function tailjoin(A::SimpleVector, i::Int)
+    @_foldable_meta
     if i > length(A)
         return unwrapva(A[end])
     end
@@ -296,7 +299,8 @@ function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U) = (@inline; promote_type(promote_type(T, S), U))
+promote_type(T, S, U, V...) = (@inline; afoldl(promote_type, promote_type(T, S, U), V...))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -370,7 +374,9 @@ function _promote(x::T, y::S) where {T,S}
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, y) = (@inline; promote_type(typeof(x), typeof(y)))
+promote_typeof(x, y, z) = (@inline; promote_type(typeof(x), typeof(y), typeof(z)))
+promote_typeof(x, y, z, a...) = (@inline; afoldl(((::Type{T}, y) where {T}) -> promote_type(T, typeof(y)), promote_typeof(x, y, z), a...))
 function _promote(x, y, z)
     @inline
     R = promote_typeof(x, y, z)
@@ -427,7 +433,11 @@ end
 """
     ^(x, y)
 
-Exponentiation operator. If `x` is a matrix, computes matrix exponentiation.
+Exponentiation operator.
+
+If `x` and `y` are integers, the result may overflow.
+To enter numbers in scientific notation, use [`Float64`](@ref) literals
+such as `1.2e3` rather than `1.2 * 10^3`.
 
 If `y` is an `Int` literal (e.g. `2` in `x^2` or `-3` in `x^-3`), the Julia code
 `x^y` is transformed by the compiler to `Base.literal_pow(^, x, Val(y))`, to
@@ -437,20 +447,31 @@ where usually `^ == Base.^` unless `^` has been defined in the calling
 namespace.) If `y` is a negative integer literal, then `Base.literal_pow`
 transforms the operation to `inv(x)^-y` by default, where `-y` is positive.
 
+See also [`exp2`](@ref), [`<<`](@ref).
+
 # Examples
 ```jldoctest
 julia> 3^5
 243
 
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
+julia> 3^-1  # uses Base.literal_pow
+0.3333333333333333
+
+julia> p = -1;
+
+julia> 3^p
+ERROR: DomainError with -1:
+Cannot raise an integer x to a negative power -1.
+[...]
+
+julia> 3.0^p
+0.3333333333333333
+
+julia> 10^19 > 0  # integer overflow
+false
 
-julia> A^3
-2×2 Matrix{Int64}:
- 37   54
- 81  118
+julia> big(10)^19 == 1e19
+true
 ```
 """
 ^(x::Number, y::Number) = ^(promote(x,y)...)
@@ -472,12 +493,6 @@ max(x::Real, y::Real) = max(promote(x,y)...)
 min(x::Real, y::Real) = min(promote(x,y)...)
 minmax(x::Real, y::Real) = minmax(promote(x, y)...)
 
-if isdefined(Core, :Compiler)
-    const _return_type = Core.Compiler.return_type
-else
-    _return_type(@nospecialize(f), @nospecialize(t)) = Any
-end
-
 function TupleOrBottom(tt...)
     any(p -> p === Union{}, tt) && return Union{}
     return Tuple{tt...}
@@ -489,10 +504,106 @@ end
 Guess what an appropriate container eltype would be for storing results of
 `f(::argtypes...)`. The guess is in part based on type inference, so can change any time.
 
+Accordingly, return a type `R` such that `f(args...) isa R` where `args isa T`.
+
 !!! warning
     Due to its fragility, use of `promote_op` should be avoided. It is preferable to base
     the container eltype on the type of the actual elements. Only in the absence of any
     elements (for an empty result container), it may be unavoidable to call `promote_op`.
+
+The type `R` obtained from `promote_op` is merely an upper bound. There may exist a stricter
+type `S` such that `f(args...) isa S` for every `args isa T` with `S <: R` and `S != R`.
+Furthermore, the exact type `R` obtained from `promote_op` depends on various factors
+including but not limited to the exact Julia version used, packages loaded, and command line
+options. As such, when used in publicly registered packages, **it is the package authors'
+responsibility to ensure that the API guarantees provided by the package do not depend on
+the exact type `R` obtained from `promote_op`.**
+
+Additionally, the result may return overly exact types, such as `DataType`, `Type`, or
+`Union{...}`, while the desired inputs or outputs may be different from those. The internal
+`promote_typejoin_union` function may be helpful to improve the result in some of these
+cases.
+
+# Extended help
+
+## Examples
+
+The following function is an invalid use-case of `promote_op`.
+
+```julia
+\"""
+    invalid_usecase1(f, xs::AbstractArray) -> ys::Array
+
+Return an array `ys` such that `vec(ys)` is `isequal`-equivalent to
+
+    [f(xs[1]), f(xs[2]), ..., f(xs[end])]
+\"""
+function invalid_usecase1(f, xs)
+    R = promote_op(f, eltype(xs))
+    ys = similar(xs, R)
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because the value obtained through `eltype(invalid_usecase1(f, xs))` depends on
+exactly what `promote_op` returns. It may be improved by re-computing the element type
+before returning the result.
+
+```julia
+function valid_usecase1(f, xs)
+    R = promote_typejoin_union(promote_op(f, eltype(xs)))
+    ys = similar(xs, R)
+    S = Union{}
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+        S = promote_type(S, typeof(ys[i]))
+    end
+    if S != R
+        zs = similar(xs, S)
+        copyto!(zs, ys)
+        return zs
+    end
+    return ys
+end
+```
+
+Note that using [`isconcretetype`](@ref) on the result is not enough to safely use
+`promote_op`. The following function is another invalid use-case of `promote_op`.
+
+```julia
+function invalid_usecase2(f, xs)
+    R = promote_op(f, eltype(xs))
+    if isconcretetype(R)
+        ys = similar(xs, R)
+    else
+        ys = similar(xs, Any)
+    end
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because whether or not the caller gets `Any` element type depends on if `promote_op`
+can infer a concrete return type of the given function. A fix similar to `valid_usecase1`
+can be used.
+
+*Technically*, another possible fix for `invalid_usecase1` and `invalid_usecase2` is to
+loosen the API guarantee:
+
+>     another_valid_usecase1(f, xs::AbstractArray) -> ys::Array
+>
+> Return an array `ys` such that every element in `xs` with the same index
+> is mapped with `f`.
+>
+> The element type of `ys` is _undefined_. It must not be used with generic
+> functions whose behavior depend on the element type of `ys`.
+
+However, it is discouraged to define such unconventional API guarantees.
 """
 function promote_op(f, S::Type...)
     argT = TupleOrBottom(S...)
diff --git a/base/public.jl b/base/public.jl
new file mode 100644
index 0000000000000..8777a454c920a
--- /dev/null
+++ b/base/public.jl
@@ -0,0 +1,118 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+public
+# Modules
+    Checked,
+    Filesystem,
+    Order,
+    ScopedValues,
+    Sort,
+
+# Types
+    AbstractLock,
+    AbstractPipe,
+    AsyncCondition,
+    CodeUnits,
+    Event,
+    Fix,
+    Fix1,
+    Fix2,
+    Generator,
+    ImmutableDict,
+    OneTo,
+    LogRange,
+    UUID,
+
+# Semaphores
+    Semaphore,
+    acquire,
+    release,
+
+# arrays
+    has_offset_axes,
+    require_one_based_indexing,
+
+# collections
+    IteratorEltype,
+    IteratorSize,
+    to_index,
+    vect,
+    isdone,
+    front,
+    rest,
+    split_rest,
+    tail,
+    checked_length,
+    elsize,
+
+# Loading
+    DL_LOAD_PATH,
+    load_path,
+    active_project,
+
+# Reflection and introspection
+    isambiguous,
+    isexpr,
+    isidentifier,
+    issingletontype,
+    identify_package,
+    locate_package,
+    moduleroot,
+    jit_total_bytes,
+    summarysize,
+    isexported,
+    ispublic,
+    remove_linenums!,
+
+# Operators
+    operator_associativity,
+    operator_precedence,
+    isbinaryoperator,
+    isoperator,
+    isunaryoperator,
+
+# C interface
+    cconvert,
+    unsafe_convert,
+
+# Error handling
+    exit_on_sigint,
+    windowserror,
+
+# Macros
+    @assume_effects,
+    @constprop,
+    @locals,
+    @propagate_inbounds,
+
+# External processes
+    shell_escape,
+    shell_split,
+    shell_escape_posixly,
+    shell_escape_csh,
+    shell_escape_wincmd,
+    escape_microsoft_c_args,
+
+# Strings
+    escape_raw_string,
+
+# IO
+    # types
+    BufferStream,
+    IOServer,
+    OS_HANDLE,
+    PipeEndpoint,
+    TTY,
+    # functions
+    reseteof,
+    link_pipe!,
+    dup,
+
+# filesystem operations
+    rename,
+
+# misc
+    notnothing,
+    runtests,
+    text_colors,
+    depwarn
diff --git a/base/range.jl b/base/range.jl
index 6b701d31b0358..39428ab741955 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -32,7 +32,7 @@ _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
     (:)(start, [step], stop)
 
 Range operator. `a:b` constructs a range from `a` to `b` with a step size
-equal to 1, which produces:
+equal to +1, which produces:
 
 * a [`UnitRange`](@ref) when `a` and `b` are integers, or
 * a [`StepRange`](@ref) when `a` and `b` are characters, or
@@ -41,6 +41,9 @@ equal to 1, which produces:
 `a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref) or
 [`StepRangeLen`](@ref)). See also [`range`](@ref) for more control.
 
+To create a descending range, use `reverse(a:b)` or a negative step size, e.g. `b:-1:a`.
+Otherwise, when `b < a`, an empty range will be constructed and normalized to `a:a-1`.
+
 The operator `:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
 
 `:` is also used to [`quote`](@ref) code, e.g. `:(x + y) isa Expr` and `:x isa Symbol`.
@@ -66,10 +69,15 @@ Mathematically a range is uniquely determined by any three of `start`, `step`, `
 Valid invocations of range are:
 * Call `range` with any three of `start`, `step`, `stop`, `length`.
 * Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
-  to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
-* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
+  to be positive one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be positive one.
+
+To construct a descending range, specify a negative step size, e.g. `range(5, 1; step = -1)` => [5,4,3,2,1]. Otherwise,
+a `stop` value less than the `start` value, with the default `step` of `+1`, constructs an empty range. Empty ranges
+are normalized such that the `stop` is one less than the `start`, e.g. `range(5, 1) == 5:4`.
 
 See Extended Help for additional details on the returned type.
+See also [`logrange`](@ref) for logarithmically spaced points.
 
 # Examples
 ```jldoctest
@@ -252,10 +260,13 @@ end
 ## 1-dimensional ranges ##
 
 """
-    AbstractRange{T}
+    AbstractRange{T} <: AbstractVector{T}
 
-Supertype for ranges with elements of type `T`.
-[`UnitRange`](@ref) and other types are subtypes of this.
+Supertype for linear ranges with elements of type `T`.
+[`UnitRange`](@ref), [`LinRange`](@ref) and other types are subtypes of this.
+
+All subtypes must define [`step`](@ref).
+Thus [`LogRange`](@ref Base.LogRange) is not a subtype of `AbstractRange`.
 """
 abstract type AbstractRange{T} <: AbstractArray{T,1} end
 
@@ -347,7 +358,7 @@ function steprange_last(start, step, stop)::typeof(stop)
             # (to simplify handling both signed and unsigned T and checking for signed overflow):
             absdiff, absstep = stop > start ? (stop - start, step) : (start - stop, -step)
 
-            # Compute remainder as a nonnegative number:
+            # Compute remainder as a non-negative number:
             if absdiff isa Signed && absdiff < zero(absdiff)
                 # unlikely, but handle the signed overflow case with unsigned rem
                 overflow_case(absdiff, absstep) = (@noinline; convert(typeof(absdiff), unsigned(absdiff) % absstep))
@@ -372,6 +383,7 @@ function steprange_last_empty(start::Integer, step, stop)::typeof(stop)
     end
     return last
 end
+steprange_last_empty(start::Bool, step, stop) = start ⊻ (step > zero(step)) # isnegative(step) ? start : !start
 # For types where x+oneunit(x) may not be well-defined use the user-given value for stop
 steprange_last_empty(start, step, stop) = stop
 
@@ -447,7 +459,7 @@ distinction that the lower limit is guaranteed (by the type system) to
 be 1.
 """
 struct OneTo{T<:Integer} <: AbstractUnitRange{T}
-    stop::T
+    stop::T # invariant: stop >= zero(stop)
     function OneTo{T}(stop) where {T<:Integer}
         throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         T === Bool && throwbool(stop)
@@ -463,6 +475,8 @@ struct OneTo{T<:Integer} <: AbstractUnitRange{T}
         T === Bool && throwbool(r)
         return new(max(zero(T), last(r)))
     end
+
+    global unchecked_oneto(stop::Integer) = new{typeof(stop)}(stop)
 end
 OneTo(stop::T) where {T<:Integer} = OneTo{T}(stop)
 OneTo(r::AbstractRange{T}) where {T<:Integer} = OneTo{T}(r)
@@ -547,6 +561,8 @@ julia> collect(LinRange(-0.1, 0.3, 5))
   0.19999999999999998
   0.3
 ```
+
+See also [`Logrange`](@ref Base.LogRange) for logarithmically spaced points.
 """
 struct LinRange{T,L<:Integer} <: AbstractRange{T}
     start::T
@@ -595,7 +611,7 @@ function show(io::IO, r::LinRange{T}) where {T}
     print(io, "LinRange{")
     show(io, T)
     print(io, "}(")
-    ioc = IOContext(io, :typeinto=>T)
+    ioc = IOContext(io, :typeinfo=>T)
     show(ioc, first(r))
     print(io, ", ")
     show(ioc, last(r))
@@ -617,7 +633,7 @@ parameters `pre` and `post` characters for each printed row,
 `sep` separator string between printed elements,
 `hdots` string for the horizontal ellipsis.
 """
-function print_range(io::IO, r::AbstractRange,
+function print_range(io::IO, r::AbstractArray,
                      pre::AbstractString = " ",
                      sep::AbstractString = ", ",
                      post::AbstractString = "",
@@ -695,6 +711,7 @@ julia> step(range(2.5, stop=10.9, length=85))
 """
 step(r::StepRange) = r.step
 step(r::AbstractUnitRange{T}) where {T} = oneunit(T) - zero(T)
+step(r::AbstractUnitRange{Bool}) = true
 step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
@@ -703,8 +720,6 @@ step(r::LinRange) = (last(r)-first(r))/r.lendiv
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-axes(r::AbstractRange) = (oneto(length(r)),)
-
 # Needed to ensure `has_offset_axes` can constant-fold.
 has_offset_axes(::StepRange) = false
 
@@ -836,6 +851,11 @@ first(r::OneTo{T}) where {T} = oneunit(T)
 first(r::StepRangeLen) = unsafe_getindex(r, 1)
 first(r::LinRange) = r.start
 
+function first(r::OneTo, n::Integer)
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
+    OneTo(oftype(r.stop, min(r.stop, n)))
+end
+
 last(r::OrdinalRange{T}) where {T} = convert(T, r.stop) # via steprange_last
 last(r::StepRangeLen) = unsafe_getindex(r, length(r))
 last(r::LinRange) = r.stop
@@ -905,13 +925,20 @@ end
 
 ## indexing
 
-isassigned(r::AbstractRange, i::Int) = firstindex(r) <= i <= lastindex(r)
+function isassigned(r::AbstractRange, i::Integer)
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    firstindex(r) <= i <= lastindex(r)
+end
+
+# `_getindex` is like `getindex` but does not check if `i isa Bool`
+function _getindex(v::AbstractRange, i::Integer)
+    @boundscheck checkbounds(v, i)
+    unsafe_getindex(v, i)
+end
 
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
-function getindex(v::UnitRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where T
     val = convert(T, v.start + (i - oneunit(i)))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val
@@ -920,64 +947,38 @@ end
 const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
-function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
     val = v.start + (i - oneunit(i))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val % T
 end
 
-function getindex(v::OneTo{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
-    convert(T, i)
-end
-
-function getindex(v::AbstractRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    ret = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
-    ok = ifelse(step(v) > zero(step(v)),
-                (ret <= last(v)) & (ret >= first(v)),
-                (ret <= first(v)) & (ret >= last(v)))
-    @boundscheck ((i > 0) & ok) || throw_boundserror(v, i)
-    ret
-end
-
-function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck checkbounds(r, i)
-    unsafe_getindex(r, i)
+let BitInteger64 = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64} # for bootstrapping
+    global function checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64)
+        res = widemul(step(v), i-oneunit(i)) + first(v)
+        (0 < i) & ifelse(0 < step(v), res <= last(v), res >= last(v))
+    end
 end
 
-# This is separate to make it useful even when running with --check-bounds=yes
+# unsafe_getindex is separate to make it useful even when running with --check-bounds=yes
+# it assumes the index is inbounds but does not segfault even if the index is out of bounds.
+# it does not check if the index isa bool.
+unsafe_getindex(v::OneTo{T}, i::Integer) where T = convert(T, i)
+unsafe_getindex(v::AbstractRange{T}, i::Integer) where T = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
 function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     T(r.ref + u*r.step)
 end
-
-function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
-    r.ref + u*r.step
-end
-
-function unsafe_getindex(r::LinRange, i::Integer)
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
-end
+unsafe_getindex(r::LinRange, i::Integer) = lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @inline
     t = j/d # ∈ [0,1]
     # compute approximately fma(t, b, -fma(t, a, a))
     return T((1-t)*a + t*b)
 end
 
+# non-scalar indexing
+
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
 function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
@@ -1006,13 +1007,14 @@ function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=last(s))
+        len = Int(last(s))
+        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=len)
     else
         f = first(r)
         start = oftype(f, f + s.start - firstindex(r))
         st = step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -1022,26 +1024,22 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        if length(s) == 0
-            start, len = first(r), 0
-        elseif length(s) == 1
-            if first(s)
-                start, len = first(r), 1
-            else
-                start, len = first(r), 0
-            end
-        else # length(s) == 2
-            start, len = last(r), 1
-        end
-        return range(start, step=step(r); length=len)
+        # treat as a zero, one, or two-element vector, where at most one element is true
+        # staying inbounds on the original range (preserving either start or
+        # stop as either stop or start, depending on the length)
+        st = step(s)
+        nonempty = st > zero(st) ? last(s) : first(s)
+        # n.b. isempty(r) implies isempty(r) which means !nonempty and !first(s)
+        range((first(s) ⊻ nonempty) ⊻ isempty(r) ? last(r) : first(r), step=step(r), length=Int(nonempty))
     else
         f = r.start
         fs = first(s)
         st = r.step
-        start = oftype(f, f + (fs - oneunit(fs)) * st)
-        st = st * step(s)
+        start = oftype(f, f + (fs - firstindex(r)) * st)
+        st *= step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        # mimic steprange_last_empty here, to try to avoid overflow
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -1076,6 +1074,11 @@ function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     end
 end
 
+function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
+    u = oftype(r.offset, i) - r.offset
+    r.ref + u*r.step
+end
+
 function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     @inline
     @boundscheck checkbounds(r, s)
@@ -1297,6 +1300,9 @@ promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
+promote_rule(a::Type{OneTo{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
+    promote_rule(UnitRange{T1}, UR)
+
 promote_rule(a::Type{UnitRange{T1}}, ::Type{UR}) where {T1,UR<:AbstractUnitRange} =
     promote_rule(a, UnitRange{eltype(UR)})
 UnitRange{T}(r::AbstractUnitRange) where {T<:Real} = UnitRange{T}(first(r), last(r))
@@ -1373,8 +1379,21 @@ function vcat(rs::AbstractRange{T}...) where T
     return a
 end
 
-Array{T,1}(r::AbstractRange{T}) where {T} = vcat(r)
-collect(r::AbstractRange) = vcat(r)
+# This method differs from that for AbstractArrays as it
+# use iteration instead of indexing. This works even if certain
+# non-standard ranges don't support indexing.
+# See https://github.com/JuliaLang/julia/pull/27302
+# Similarly, collect(r::AbstractRange) uses iteration
+function Array{T,1}(r::AbstractRange{T}) where {T}
+    a = Vector{T}(undef, length(r))
+    i = 1
+    for x in r
+        @inbounds a[i] = x
+        i += 1
+    end
+    return a
+end
+collect(r::AbstractRange) = Array(r)
 
 _reverse(r::OrdinalRange, ::Colon) = (:)(last(r), negate(step(r)), first(r))
 function _reverse(r::StepRangeLen, ::Colon)
@@ -1396,8 +1415,8 @@ sort!(r::AbstractUnitRange) = r
 
 sort(r::AbstractRange) = issorted(r) ? r : reverse(r)
 
-sortperm(r::AbstractUnitRange) = 1:length(r)
-sortperm(r::AbstractRange) = issorted(r) ? (1:1:length(r)) : (length(r):-1:1)
+sortperm(r::AbstractUnitRange) = eachindex(r)
+sortperm(r::AbstractRange) = issorted(r) ? (firstindex(r):1:lastindex(r)) : (lastindex(r):-1:firstindex(r))
 
 function sum(r::AbstractRange{<:Real})
     l = length(r)
@@ -1472,7 +1491,7 @@ end
 """
     mod(x::Integer, r::AbstractUnitRange)
 
-Find `y` in the range `r` such that ``x ≡ y (mod n)``, where `n = length(r)`,
+Find `y` in the range `r` such that `x` ≡ `y` (mod `n`), where `n = length(r)`,
 i.e. `y = mod(x - first(r), n) + first(r)`.
 
 See also [`mod1`](@ref).
@@ -1491,3 +1510,190 @@ julia> mod(3, 0:2)  # mod(3, 3)
 """
 mod(i::Integer, r::OneTo) = mod1(i, last(r))
 mod(i::Integer, r::AbstractUnitRange{<:Integer}) = mod(i-first(r), length(r)) + first(r)
+
+
+"""
+    logrange(start, stop, length)
+    logrange(start, stop; length)
+
+Construct a specialized array whose elements are spaced logarithmically
+between the given endpoints. That is, the ratio of successive elements is
+a constant, calculated from the length.
+
+This is similar to `geomspace` in Python. Unlike `PowerRange` in Mathematica,
+you specify the number of elements not the ratio.
+Unlike `logspace` in Python and Matlab, the `start` and `stop` arguments are
+always the first and last elements of the result, not powers applied to some base.
+
+# Examples
+```jldoctest
+julia> logrange(10, 4000, length=3)
+3-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 10.0, 200.0, 4000.0
+
+julia> ans[2] ≈ sqrt(10 * 4000)  # middle element is the geometric mean
+true
+
+julia> range(10, 40, length=3)[2] ≈ (10 + 40)/2  # arithmetic mean
+true
+
+julia> logrange(1f0, 32f0, 11)
+11-element Base.LogRange{Float32, Float64}:
+ 1.0, 1.41421, 2.0, 2.82843, 4.0, 5.65685, 8.0, 11.3137, 16.0, 22.6274, 32.0
+
+julia> logrange(1, 1000, length=4) ≈ 10 .^ (0:3)
+true
+```
+
+See the [`LogRange`](@ref Base.LogRange) type for further details.
+
+See also [`range`](@ref) for linearly spaced points.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+logrange(start::Real, stop::Real, length::Integer) = LogRange(start, stop, Int(length))
+logrange(start::Real, stop::Real; length::Integer) = logrange(start, stop, length)
+
+
+"""
+    LogRange{T}(start, stop, len) <: AbstractVector{T}
+
+A range whose elements are spaced logarithmically between `start` and `stop`,
+with spacing controlled by `len`. Returned by [`logrange`](@ref).
+
+Like [`LinRange`](@ref), the first and last elements will be exactly those
+provided, but intermediate values may have small floating-point errors.
+These are calculated using the logs of the endpoints, which are
+stored on construction, often in higher precision than `T`.
+
+# Examples
+```jldoctest
+julia> logrange(1, 4, length=5)
+5-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 1.0, 1.41421, 2.0, 2.82843, 4.0
+
+julia> Base.LogRange{Float16}(1, 4, 5)
+5-element Base.LogRange{Float16, Float64}:
+ 1.0, 1.414, 2.0, 2.828, 4.0
+
+julia> logrange(1e-310, 1e-300, 11)[1:2:end]
+6-element Vector{Float64}:
+ 1.0e-310
+ 9.999999999999974e-309
+ 9.999999999999981e-307
+ 9.999999999999988e-305
+ 9.999999999999994e-303
+ 1.0e-300
+
+julia> prevfloat(1e-308, 5) == ans[2]
+true
+```
+
+Note that integer eltype `T` is not allowed.
+Use for instance `round.(Int, xs)`, or explicit powers of some integer base:
+
+```jldoctest
+julia> xs = logrange(1, 512, 4)
+4-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:
+ 1.0, 8.0, 64.0, 512.0
+
+julia> 2 .^ (0:3:9) |> println
+[1, 8, 64, 512]
+```
+
+!!! compat "Julia 1.11"
+    This type requires at least Julia 1.11.
+"""
+struct LogRange{T<:Real,X} <: AbstractArray{T,1}
+    start::T
+    stop::T
+    len::Int
+    extra::Tuple{X,X}
+    function LogRange{T}(start::T, stop::T, len::Int) where {T<:Real}
+        if T <: Integer
+            # LogRange{Int}(1, 512, 4) produces InexactError: Int64(7.999999999999998)
+            throw(ArgumentError("LogRange{T} does not support integer types"))
+        end
+        if iszero(start) || iszero(stop)
+            throw(DomainError((start, stop),
+                "LogRange cannot start or stop at zero"))
+        elseif start < 0 || stop < 0
+            # log would throw, but _log_twice64_unchecked does not
+            throw(DomainError((start, stop),
+                "LogRange does not accept negative numbers"))
+        elseif !isfinite(start) || !isfinite(stop)
+            throw(DomainError((start, stop),
+                "LogRange is only defined for finite start & stop"))
+        elseif len < 0
+            throw(ArgumentError(LazyString(
+                "LogRange(", start, ", ", stop, ", ", len, "): can't have negative length")))
+        elseif len == 1 && start != stop
+            throw(ArgumentError(LazyString(
+                "LogRange(", start, ", ", stop, ", ", len, "): endpoints differ, while length is 1")))
+        end
+        ex = _logrange_extra(start, stop, len)
+        new{T,typeof(ex[1])}(start, stop, len, ex)
+    end
+end
+
+function LogRange{T}(start::Real, stop::Real, len::Integer) where {T}
+    LogRange{T}(convert(T, start), convert(T, stop), convert(Int, len))
+end
+function LogRange(start::Real, stop::Real, len::Integer)
+    T = float(promote_type(typeof(start), typeof(stop)))
+    LogRange{T}(convert(T, start), convert(T, stop), convert(Int, len))
+end
+
+size(r::LogRange) = (r.len,)
+length(r::LogRange) = r.len
+
+first(r::LogRange) = r.start
+last(r::LogRange) = r.stop
+
+function _logrange_extra(a::Real, b::Real, len::Int)
+    loga = log(1.0 * a)  # widen to at least Float64
+    logb = log(1.0 * b)
+    (loga/(len-1), logb/(len-1))
+end
+function _logrange_extra(a::Float64, b::Float64, len::Int)
+    loga = _log_twice64_unchecked(a)
+    logb = _log_twice64_unchecked(b)
+    # The reason not to do linear interpolation on log(a)..log(b) in `getindex` is
+    # that division of TwicePrecision is quite slow, so do it once on construction:
+    (loga/(len-1), logb/(len-1))
+end
+
+function getindex(r::LogRange{T}, i::Int) where {T}
+    @inline
+    @boundscheck checkbounds(r, i)
+    i == 1 && return r.start
+    i == r.len && return r.stop
+    # Main path uses Math.exp_impl for TwicePrecision, but is not perfectly
+    # accurate, hence the special cases for endpoints above.
+    logx = (r.len-i) * r.extra[1] + (i-1) * r.extra[2]
+    x = _exp_allowing_twice64(logx)
+    return T(x)
+end
+
+function show(io::IO, r::LogRange{T}) where {T}
+    print(io, "LogRange{", T, "}(")
+    ioc = IOContext(io, :typeinfo => T)
+    show(ioc, first(r))
+    print(io, ", ")
+    show(ioc, last(r))
+    print(io, ", ")
+    show(io, length(r))
+    print(io, ')')
+end
+
+# Implementation detail of @world
+# The rest of this is defined in essentials.jl, but UnitRange is not available
+function _resolve_in_world(worlds::UnitRange, gr::GlobalRef)
+    # Validate that this binding's reference covers the entire world range
+    bpart = lookup_binding_partition(UInt(first(worlds)), gr)
+    if bpart.max_world < last(worlds)
+        error("Binding does not cover the full world range")
+    end
+    _resolve_in_world(UInt(last(worlds)), gr)
+end
diff --git a/base/rational.jl b/base/rational.jl
index baca2397c42ff..69d39770b2095 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -17,31 +17,47 @@ end
 unsafe_rational(num::T, den::T) where {T<:Integer} = unsafe_rational(T, num, den)
 unsafe_rational(num::Integer, den::Integer) = unsafe_rational(promote(num, den)...)
 
-@noinline __throw_rational_argerror_typemin(T) = throw(ArgumentError("invalid rational: denominator can't be typemin($T)"))
 function checked_den(::Type{T}, num::T, den::T) where T<:Integer
     if signbit(den)
-        den = -den
-        signbit(den) && __throw_rational_argerror_typemin(typeof(den))
-        num = -num
+        den = checked_neg(den)
+        num = checked_neg(num)
     end
     return unsafe_rational(T, num, den)
 end
 checked_den(num::T, den::T) where T<:Integer = checked_den(T, num, den)
 checked_den(num::Integer, den::Integer) = checked_den(promote(num, den)...)
 
-@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError("invalid rational: zero($T)//zero($T)"))
+@noinline __throw_rational_argerror_zero(T) = throw(ArgumentError(LazyString("invalid rational: zero(", T, ")//zero(", T, ")")))
 function Rational{T}(num::Integer, den::Integer) where T<:Integer
     iszero(den) && iszero(num) && __throw_rational_argerror_zero(T)
-    num, den = divgcd(num, den)
-    return checked_den(T, T(num), T(den))
+    if T <: Union{Unsigned, Bool}
+        # Throw InexactError if the result is negative.
+        if !iszero(num) && (signbit(den) ⊻ signbit(num))
+            throw(InexactError(:Rational, Rational{T}, num, den))
+        end
+        unum = uabs(num)
+        uden = uabs(den)
+        r_unum, r_uden = divgcd(unum, uden)
+        return unsafe_rational(T, promote(T(r_unum), T(r_uden))...)
+    else
+        r_num, r_den = divgcd(num, den)
+        return checked_den(T, promote(T(r_num), T(r_den))...)
+    end
 end
 
 Rational(n::T, d::T) where {T<:Integer} = Rational{T}(n, d)
 Rational(n::Integer, d::Integer) = Rational(promote(n, d)...)
 Rational(n::Integer) = unsafe_rational(n, one(n))
 
-function divgcd(x::Integer,y::Integer)
-    g = gcd(x,y)
+"""
+    divgcd(x::Integer, y::Integer)
+
+Returns `(x÷gcd(x,y), y÷gcd(x,y))`.
+
+See also [`div`](@ref), [`gcd`](@ref).
+"""
+function divgcd(x::TX, y::TY)::Tuple{TX, TY} where {TX<:Integer, TY<:Integer}
+    g = gcd(uabs(x), uabs(y))
     div(x,g), div(y,g)
 end
 
@@ -49,6 +65,12 @@ end
     //(num, den)
 
 Divide two integers or rational numbers, giving a [`Rational`](@ref) result.
+More generally, `//` can be used for exact rational division of other numeric types
+with integer or rational components, such as complex numbers with integer components.
+
+Note that floating-point ([`AbstractFloat`](@ref)) arguments are not permitted by `//`
+(even if the values are rational).
+The arguments must be subtypes of [`Integer`](@ref), `Rational`, or composites thereof.
 
 # Examples
 ```jldoctest
@@ -57,6 +79,13 @@ julia> 3 // 5
 
 julia> (3 // 5) // (2 // 1)
 3//10
+
+julia> (1+2im) // (3+4im)
+11//25 + 2//25*im
+
+julia> 1.0 // 2
+ERROR: MethodError: no method matching //(::Float64, ::Int64)
+[...]
 ```
 """
 //(n::Integer,  d::Integer) = Rational(n,d)
@@ -84,7 +113,7 @@ end
 function show(io::IO, x::Rational)
     show(io, numerator(x))
 
-    if isone(denominator(x)) && get(io, :typeinfo, Any) <: Rational
+    if isone(denominator(x)) && nonnothing_nonmissing_typeinfo(io) <: Rational
         return
     end
 
@@ -119,7 +148,7 @@ function Rational{T}(x::Rational) where T<:Integer
     unsafe_rational(T, convert(T, x.num), convert(T, x.den))
 end
 function Rational{T}(x::Integer) where T<:Integer
-    unsafe_rational(T, convert(T, x), one(T))
+    unsafe_rational(T, T(x), T(one(x)))
 end
 
 Rational(x::Rational) = x
@@ -134,6 +163,14 @@ function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
     P = promote_type(T,S)
     convert(T, convert(P,x.num)/convert(P,x.den))::T
 end
+ # avoid spurious overflow (#52394).  (Needed for UInt16 or larger;
+ # we also include Int16 for consistency of accuracy.)
+Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
+    Float16(Float32(x))
+Float16(x::Rational{<:Union{Int128,UInt128}}) =
+    Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
+Float32(x::Rational{<:Union{Int128,UInt128}}) =
+    Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
 
 function Rational{T}(x::AbstractFloat) where T<:Integer
     r = rationalize(T, x, tol=0)
@@ -234,7 +271,7 @@ function rationalize(::Type{T}, x::Union{AbstractFloat, Rational}, tol::Real) wh
     end
 end
 rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)
-rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...)
+rationalize(x::Real; kvs...) = rationalize(Int, x; kvs...)
 rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re; kvs...), rationalize(T, x.im; kvs...))
 rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re; kvs...), rationalize(Int, x.im; kvs...))
 rationalize(::Type{T}, x::Rational; tol::Real = 0) where {T<:Integer} = rationalize(T, x, tol)
@@ -263,8 +300,14 @@ julia> numerator(4)
 4
 ```
 """
-numerator(x::Integer) = x
+numerator(x::Union{Integer,Complex{<:Integer}}) = x
 numerator(x::Rational) = x.num
+function numerator(z::Complex{<:Rational})
+    den = denominator(z)
+    reim = (real(z), imag(z))
+    result = checked_mul.(numerator.(reim), div.(den, denominator.(reim)))
+    complex(result...)
+end
 
 """
     denominator(x)
@@ -280,13 +323,12 @@ julia> denominator(4)
 1
 ```
 """
-denominator(x::Integer) = one(x)
+denominator(x::Union{Integer,Complex{<:Integer}}) = one(x)
 denominator(x::Rational) = x.den
+denominator(z::Complex{<:Rational}) = lcm(denominator(real(z)), denominator(imag(z)))
 
 sign(x::Rational) = oftype(x, sign(x.num))
 signbit(x::Rational) = signbit(x.num)
-copysign(x::Rational, y::Real) = unsafe_rational(copysign(x.num, y), x.den)
-copysign(x::Rational, y::Rational) = unsafe_rational(copysign(x.num, y.num), x.den)
 
 abs(x::Rational) = unsafe_rational(checked_abs(x.num), x.den)
 
@@ -304,7 +346,7 @@ function -(x::Rational{T}) where T<:BitSigned
     x.num == typemin(T) && __throw_rational_numerator_typemin(T)
     unsafe_rational(-x.num, x.den)
 end
-@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError("rational numerator is typemin($T)"))
+@noinline __throw_rational_numerator_typemin(T) = throw(OverflowError(LazyString("rational numerator is typemin(", T, ")")))
 
 function -(x::Rational{T}) where T<:Unsigned
     x.num != zero(T) && __throw_negate_unsigned()
@@ -484,10 +526,6 @@ for (S, T) in ((Rational, Integer), (Integer, Rational), (Rational, Rational))
     end
 end
 
-trunc(::Type{T}, x::Rational) where {T} = round(T, x, RoundToZero)
-floor(::Type{T}, x::Rational) where {T} = round(T, x, RoundDown)
-ceil(::Type{T}, x::Rational) where {T} = round(T, x, RoundUp)
-
 round(x::Rational, r::RoundingMode=RoundNearest) = round(typeof(x), x, r)
 
 function round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T,Tr}
@@ -531,7 +569,7 @@ lcm(x::Rational, y::Rational) = unsafe_rational(lcm(x.num, y.num), gcd(x.den, y.
 function gcdx(x::Rational, y::Rational)
     c = gcd(x, y)
     if iszero(c.num)
-        a, b = one(c.num), c.num
+        a, b = zero(c.num), c.num
     elseif iszero(c.den)
         a = ifelse(iszero(x.den), one(c.den), c.den)
         b = ifelse(iszero(y.den), one(c.den), c.den)
@@ -549,9 +587,10 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
     num, den = Base.numerator(x), Base.denominator(x)
     den == 1 && return hash(num, h)
     den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-    if isodd(den)
+    if isodd(den) # since den != 1, this rational can't be a Float64
         pow = trailing_zeros(num)
         num >>= pow
+        h = hash_integer(den, h)
     else
         pow = trailing_zeros(den)
         den >>= pow
diff --git a/base/rawbigfloats.jl b/base/rawbigfloats.jl
new file mode 100644
index 0000000000000..4377edfc463d8
--- /dev/null
+++ b/base/rawbigfloats.jl
@@ -0,0 +1,143 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt
+
+elem_count(x::BigFloatData, ::Val{:words}) = length(x)
+elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8
+word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits))
+elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
+reversed_index(n::Int, i::Int) = n - i - 1
+reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int
+split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero)
+
+"""
+`i` is the zero-based index of the wanted word in `x`, starting from
+the less significant words.
+"""
+function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
+    @inbounds return x[i + 1]::T
+end
+
+function get_elem(x, i::Int, v::Val, ::Val{:descending})
+    j = reversed_index(x, i, v)
+    get_elem(x, j, v, Val(:ascending))
+end
+
+word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
+
+word_is_nonzero(x::BigFloatData, v::Val) = let x = x
+    i -> word_is_nonzero(x, i, v)
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant words
+of `x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words})
+    any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1))
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant bits of
+the `i`-th (zero-based index) word of `x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word})
+    !iszero(len) &&
+    !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len))
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant bits of
+`x` are nonzero.
+"""
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits})
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) ||
+        tail_is_nonzero(x, word_count, Val(:words))
+    else
+        false
+    end::Bool
+end
+
+"""
+Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::Unsigned, i::Int, ::Val{:bits}, ::Val{:ascending})
+    (x >>> i) % Bool
+end
+
+"""
+Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending})
+    vb = Val(:bits)
+    if 0 ≤ i < elem_count(x, vb)
+        word_index, bit_index_in_word = split_bit_index(x, i)
+        word = get_elem(x, word_index, Val(:words), v)
+        get_elem(word, bit_index_in_word, vb, v)
+    else
+        false
+    end::Bool
+end
+
+"""
+Returns an integer of type `R`, consisting of the `len` most
+significant bits of `x`. If there are less than `len` bits in `x`,
+the least significant bits are zeroed.
+"""
+function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer}
+    ret = zero(R)
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        k = word_length(x)
+        vals = (Val(:words), Val(:descending))
+        lenx = elem_count(x, first(vals))
+
+        for w ∈ 0:(word_count - 1)
+            ret <<= k
+            if w < lenx # if the output type is larger, truncate turns into zero-extend
+                word = get_elem(x, w, vals...)
+                ret |= R(word)
+            end
+        end
+
+        if !iszero(bit_count_in_word)
+            ret <<= bit_count_in_word
+            if word_count < lenx # if the output type is larger, truncate turns into zero-extend
+                wrd = get_elem(x, word_count, vals...)
+                ret |= R(wrd >>> (k - bit_count_in_word))
+            end
+        end
+    end
+    ret::R
+end
+
+struct BigFloatDataRoundingIncrementHelper{T<:Unsigned}
+    n::BigFloatData{T}
+    trunc_len::Int
+
+    final_bit::Bool
+    round_bit::Bool
+
+    function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+        vals = (Val(:bits), Val(:descending))
+        f = get_elem(n, len - 1, vals...)
+        r = get_elem(n, len    , vals...)
+        new{T}(n, len, f, r)
+    end
+end
+
+function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+    BigFloatDataRoundingIncrementHelper{T}(n, len)
+end
+
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
+
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
+
+function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit)
+    v = Val(:bits)
+    n = h.n
+    tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index 61a0f466b2902..25466eed4a105 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -4,14 +4,6 @@
 
 ###### Generic (map)reduce functions ######
 
-if Int === Int32
-    const SmallSigned = Union{Int8,Int16}
-    const SmallUnsigned = Union{UInt8,UInt16}
-else
-    const SmallSigned = Union{Int8,Int16,Int32}
-    const SmallUnsigned = Union{UInt8,UInt16,UInt32}
-end
-
 abstract type AbstractBroadcasted end
 const AbstractArrayOrBroadcasted = Union{AbstractArray, AbstractBroadcasted}
 
@@ -22,8 +14,8 @@ The reduction operator used in `sum`. The main difference from [`+`](@ref) is th
 integers are promoted to `Int`/`UInt`.
 """
 add_sum(x, y) = x + y
-add_sum(x::SmallSigned, y::SmallSigned) = Int(x) + Int(y)
-add_sum(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) + UInt(y)
+add_sum(x::BitSignedSmall, y::BitSignedSmall) = Int(x) + Int(y)
+add_sum(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) + UInt(y)
 add_sum(x::Real, y::Real)::Real = x + y
 
 """
@@ -33,8 +25,8 @@ The reduction operator used in `prod`. The main difference from [`*`](@ref) is t
 integers are promoted to `Int`/`UInt`.
 """
 mul_prod(x, y) = x * y
-mul_prod(x::SmallSigned, y::SmallSigned) = Int(x) * Int(y)
-mul_prod(x::SmallUnsigned, y::SmallUnsigned) = UInt(x) * UInt(y)
+mul_prod(x::BitSignedSmall, y::BitSignedSmall) = Int(x) * Int(y)
+mul_prod(x::BitUnsignedSmall, y::BitUnsignedSmall) = UInt(x) * UInt(y)
 mul_prod(x::Real, y::Real)::Real = x * y
 
 ## foldl && mapfoldl
@@ -51,15 +43,15 @@ function foldl_impl(op::OP, nt, itr) where {OP}
 end
 
 function _foldl_impl(op::OP, init, itr) where {OP}
-    # Unroll the while loop once; if init is known, the call to op may
-    # be evaluated at compile time
+    # Unroll the loop once to check if the iterator is empty.
+    # If init is known, the call to op may be evaluated at compile time
     y = iterate(itr)
     y === nothing && return init
     v = op(init, y[1])
-    while true
-        y = iterate(itr, y[2])
-        y === nothing && break
-        v = op(v, y[1])
+    # Using a for loop is more performant than a while loop (see #56492)
+    # This unrolls the loop a second time before entering the body
+    for x in Iterators.rest(itr, y[2])
+        v = op(v, x)
     end
     return v
 end
@@ -305,7 +297,7 @@ implementations may reuse the return value of `f` for elements that appear multi
 guaranteed left or right associativity and invocation of `f` for every value.
 """
 mapreduce(f, op, itr; kw...) = mapfoldl(f, op, itr; kw...)
-mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...)
+mapreduce(f, op, itr, itrs...; kw...) = reduce(op, Generator(f, itr, itrs...); kw...)
 
 # Note: sum_seq usually uses four or more accumulators after partial
 # unrolling, so each accumulator gets at most 256 numbers
@@ -316,10 +308,11 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 
 # handling empty arrays
-_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
-_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
-    reducing with $f over an empty collection of element type $T is not allowed.
-    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer"))
+reduce_empty(f, T) = _empty_reduce_error()
+mapreduce_empty(f, op, T) = _empty_reduce_error()
+reduce_empty(f, ::Type{Union{}}, splat...) = _empty_reduce_error()
+mapreduce_empty(f, op, ::Type{Union{}}, splat...) = _empty_reduce_error()
 
 """
     Base.reduce_empty(op, T)
@@ -339,23 +332,19 @@ is generally ambiguous, and especially so when the element type is unknown).
 
 As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
-reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitSignedSmall}  = zero(Int)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:BitUnsignedSmall} = zero(UInt)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
-reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitSignedSmall}  = one(Int)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitUnsignedSmall} = one(UInt)
 
 reduce_empty(op::BottomRF, ::Type{T}) where {T} = reduce_empty(op.rf, T)
 reduce_empty(op::MappingRF, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf, T)
@@ -405,11 +394,11 @@ reduce_first(::typeof(+), x::Bool) = Int(x)
 reduce_first(::typeof(*), x::AbstractChar) = string(x)
 
 reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
-reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(add_sum), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(add_sum), x::BitUnsignedSmall) = UInt(x)
 reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
-reduce_first(::typeof(mul_prod), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(mul_prod), x::BitSignedSmall)   = Int(x)
+reduce_first(::typeof(mul_prod), x::BitUnsignedSmall) = UInt(x)
 
 """
     Base.mapreduce_first(f, op, x)
@@ -483,8 +472,8 @@ elements are not reordered if you use an ordered collection.
 julia> reduce(*, [2; 3; 4])
 24
 
-julia> reduce(*, [2; 3; 4]; init=-1)
--24
+julia> reduce(*, Int[]; init=1)
+1
 ```
 """
 reduce(op, itr; kw...) = mapreduce(identity, op, itr; kw...)
@@ -649,11 +638,11 @@ function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
     start = first + 1
     simdstop  = start + chunk_len - 4
     while simdstop <= last - 3
-        @inbounds for i in start:4:simdstop
-            v1 = _fast(op, v1, f(A[i+0]))
-            v2 = _fast(op, v2, f(A[i+1]))
-            v3 = _fast(op, v3, f(A[i+2]))
-            v4 = _fast(op, v4, f(A[i+3]))
+        for i in start:4:simdstop
+            v1 = _fast(op, v1, f(@inbounds(A[i+0])))
+            v2 = _fast(op, v2, f(@inbounds(A[i+1])))
+            v3 = _fast(op, v3, f(@inbounds(A[i+2])))
+            v4 = _fast(op, v4, f(@inbounds(A[i+3])))
         end
         checkbounds(A, simdstop+3)
         start += chunk_len
@@ -753,7 +742,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -785,7 +774,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -877,11 +866,12 @@ end
 """
     findmax(f, domain) -> (f(x), index)
 
-Return a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
 If there are multiple maximal points, then the first one will be returned.
 
-`domain` must be a non-empty iterable.
+`domain` must be a non-empty iterable supporting [`keys`](@ref). Indices
+are of the same type as those returned by [`keys(domain)`](@ref).
 
 Values are compared with `isless`.
 
@@ -915,6 +905,9 @@ Return the maximal element of the collection `itr` and its index or key.
 If there are multiple maximal elements, then the first one will be returned.
 Values are compared with `isless`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmin`](@ref), [`argmax`](@ref), [`maximum`](@ref).
 
 # Examples
@@ -936,12 +929,15 @@ _findmax(a, ::Colon) = findmax(identity, a)
 """
     findmin(f, domain) -> (f(x), index)
 
-Return a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
 If there are multiple minimal points, then the first one will be returned.
 
 `domain` must be a non-empty iterable.
 
+Indices are of the same type as those returned by [`keys(domain)`](@ref)
+and [`pairs(domain)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 !!! compat "Julia 1.7"
@@ -975,6 +971,9 @@ Return the minimal element of the collection `itr` and its index or key.
 If there are multiple minimal elements, then the first one will be returned.
 `NaN` is treated as less than all other values except `missing`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmax`](@ref), [`argmin`](@ref), [`minimum`](@ref).
 
 # Examples
@@ -1027,6 +1026,9 @@ If there are multiple maximal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 Values are compared with `isless`.
 
 See also: [`argmin`](@ref), [`findmax`](@ref).
@@ -1082,6 +1084,9 @@ If there are multiple minimal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 See also: [`argmax`](@ref), [`findmin`](@ref).
@@ -1100,227 +1105,6 @@ julia> argmin([7, 1, 1, NaN])
 """
 argmin(itr) = findmin(itr)[2]
 
-## all & any
-
-"""
-    any(itr) -> Bool
-
-Test whether any elements of a boolean collection are `true`, returning `true` as
-soon as the first `true` value in `itr` is encountered (short-circuiting). To
-short-circuit on `false`, use [`all`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `false` (or equivalently, if the input contains no `true` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), , [`||`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [true,false,false,true]
-4-element Vector{Bool}:
- 1
- 0
- 0
- 1
-
-julia> any(a)
-true
-
-julia> any((println(i); v) for (i, v) in enumerate(a))
-1
-true
-
-julia> any([missing, true])
-true
-
-julia> any([false, missing])
-missing
-```
-"""
-any(itr) = any(identity, itr)
-
-"""
-    all(itr) -> Bool
-
-Test whether all elements of a boolean collection are `true`, returning `false` as
-soon as the first `false` value in `itr` is encountered (short-circuiting). To
-short-circuit on `true`, use [`any`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `true` (or equivalently, if the input contains no `false` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), , [`&&`](@ref), [`allunique`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [true,false,false,true]
-4-element Vector{Bool}:
- 1
- 0
- 0
- 1
-
-julia> all(a)
-false
-
-julia> all((println(i); v) for (i, v) in enumerate(a))
-1
-2
-false
-
-julia> all([missing, false])
-false
-
-julia> all([true, missing])
-missing
-```
-"""
-all(itr) = all(identity, itr)
-
-"""
-    any(p, itr) -> Bool
-
-Determine whether predicate `p` returns `true` for any elements of `itr`, returning
-`true` as soon as the first item in `itr` for which `p` returns `true` is encountered
-(short-circuiting). To short-circuit on `false`, use [`all`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `false` (or equivalently, if the input contains no `true` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-# Examples
-```jldoctest
-julia> any(i->(4<=i<=6), [3,5,7])
-true
-
-julia> any(i -> (println(i); i > 3), 1:10)
-1
-2
-3
-4
-true
-
-julia> any(i -> i > 0, [1, missing])
-true
-
-julia> any(i -> i > 0, [-1, missing])
-missing
-
-julia> any(i -> i > 0, [-1, 0])
-false
-```
-"""
-any(f, itr) = _any(f, itr, :)
-
-function _any(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        elseif v
-            return true
-        end
-    end
-    return anymissing ? missing : false
-end
-
-# Specialized versions of any(f, ::Tuple)
-# We fall back to the for loop implementation all elements have the same type or
-# if the tuple is too large.
-function any(f, itr::Tuple)
-    if itr isa NTuple || length(itr) > 32
-        return _any(f, itr, :)
-    end
-    _any_tuple(f, false, itr...)
-end
-
-@inline function _any_tuple(f, anymissing, x, rest...)
-    v = f(x)
-    if ismissing(v)
-        anymissing = true
-    elseif v
-        return true
-    end
-    return _any_tuple(f, anymissing, rest...)
-end
-@inline _any_tuple(f, anymissing) = anymissing ? missing : false
-
-"""
-    all(p, itr) -> Bool
-
-Determine whether predicate `p` returns `true` for all elements of `itr`, returning
-`false` as soon as the first item in `itr` for which `p` returns `false` is encountered
-(short-circuiting). To short-circuit on `true`, use [`any`](@ref).
-
-If the input contains [`missing`](@ref) values, return `missing` if all non-missing
-values are `true` (or equivalently, if the input contains no `false` value), following
-[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
-
-# Examples
-```jldoctest
-julia> all(i->(4<=i<=6), [4,5,6])
-true
-
-julia> all(i -> (println(i); i < 3), 1:10)
-1
-2
-3
-false
-
-julia> all(i -> i > 0, [1, missing])
-missing
-
-julia> all(i -> i > 0, [-1, missing])
-false
-
-julia> all(i -> i > 0, [1, 2])
-true
-```
-"""
-all(f, itr) = _all(f, itr, :)
-
-function _all(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        # this syntax allows throwing a TypeError for non-Bool, for consistency with any
-        elseif v
-            continue
-        else
-            return false
-        end
-    end
-    return anymissing ? missing : true
-end
-
-# Specialized versions of all(f, ::Tuple),
-# This is similar to any(f, ::Tuple) defined above.
-function all(f, itr::Tuple)
-    if itr isa NTuple || length(itr) > 32
-        return _all(f, itr, :)
-    end
-    _all_tuple(f, false, itr...)
-end
-
-@inline function _all_tuple(f, anymissing, x, rest...)
-    v = f(x)
-    if ismissing(v)
-        anymissing = true
-    # this syntax allows throwing a TypeError for non-Bool, for consistency with any
-    elseif v
-        nothing
-    else
-        return false
-    end
-    return _all_tuple(f, anymissing, rest...)
-end
-@inline _all_tuple(f, anymissing) = anymissing ? missing : true
-
 ## count
 
 _bool(f) = x->f(x)::Bool
diff --git a/base/reducedim.jl b/base/reducedim.jl
index c1c58ccdfefed..0478afe1a46b6 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -17,59 +17,21 @@ reduced_indices(a::AbstractArrayOrBroadcasted, region) = reduced_indices(axes(a)
 # for reductions that keep 0 dims as 0
 reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region)
 
-function reduced_indices(inds::Indices{N}, d::Int) where N
-    d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    if d == 1
-        return (reduced_index(inds[1]), tail(inds)...)::typeof(inds)
-    elseif 1 < d <= N
-        return tuple(inds[1:d-1]..., oftype(inds[d], reduced_index(inds[d])), inds[d+1:N]...)::typeof(inds)
-    else
-        return inds
-    end
-end
-
-function reduced_indices0(inds::Indices{N}, d::Int) where N
-    d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    if d <= N
-        ind = inds[d]
-        rd = isempty(ind) ? ind : reduced_index(inds[d])
-        if d == 1
-            return (rd, tail(inds)...)::typeof(inds)
-        else
-            return tuple(inds[1:d-1]..., oftype(inds[d], rd), inds[d+1:N]...)::typeof(inds)
-        end
-    else
-        return inds
-    end
+function reduced_indices(axs::Indices{N}, region) where N
+    _check_valid_region(region)
+    ntuple(d -> d in region ? reduced_index(axs[d]) : axs[d], Val(N))
 end
 
-function reduced_indices(inds::Indices{N}, region) where N
-    rinds = collect(inds)
-    for i in region
-        isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
-        d = Int(i)
-        if d < 1
-            throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
-        elseif d <= N
-            rinds[d] = reduced_index(rinds[d])
-        end
-    end
-    tuple(rinds...)::typeof(inds)
+function reduced_indices0(axs::Indices{N}, region) where N
+    _check_valid_region(region)
+    ntuple(d -> d in region && !isempty(axs[d]) ? reduced_index(axs[d]) : axs[d], Val(N))
 end
 
-function reduced_indices0(inds::Indices{N}, region) where N
-    rinds = collect(inds)
-    for i in region
-        isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
-        d = Int(i)
-        if d < 1
-            throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
-        elseif d <= N
-            rind = rinds[d]
-            rinds[d] = isempty(rind) ? rind : reduced_index(rind)
-        end
+function _check_valid_region(region)
+    for d in region
+        isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
+        Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
     end
-    tuple(rinds...)::typeof(inds)
 end
 
 ###### Generic reduction functions #####
@@ -146,16 +108,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min
             T = _realtype(f, promote_union(eltype(A)))
             Tr = v0 isa T ? T : typeof(v0)
 
-            # but NaNs and missing need to be avoided as initial values
+            # but NaNs, missing and unordered values need to be avoided as initial values
             if v0 isa Number && isnan(v0)
                 # v0 is NaN
                 v0 = oftype(v0, $initval)
             elseif isunordered(v0)
                 # v0 is missing or a third-party unordered value
                 Tnm = nonmissingtype(Tr)
-                # TODO: Some types, like BigInt, don't support typemin/typemax.
-                # So a Matrix{Union{BigInt, Missing}} can still error here.
-                v0 = $typeextreme(Tnm)
+                if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
+                    v0 = $typeextreme(Tnm)
+                elseif !all(isunordered, A1)
+                    v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
+                end
             end
             # v0 may have changed type.
             Tr = v0 isa T ? T : typeof(v0)
@@ -186,12 +150,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray
 
     # but NaNs and missing need to be avoided as initial values
     if v0[1] isa Number && isnan(v0[1])
+        # v0 is NaN
         v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
     elseif isunordered(v0[1])
         # v0 is missing or a third-party unordered value
-        # TODO: Some types, like BigInt, don't support typemin/typemax.
-        # So a Matrix{Union{BigInt, Missing}} can still error here.
-        v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
+        Tminnm = nonmissingtype(Tmin)
+        Tmaxnm = nonmissingtype(Tmax)
+        if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
+            Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
+            v0 = (typemax(Tminnm), typemin(Tmaxnm))
+        elseif !all(isunordered, A1)
+            v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
+        end
     end
     # v0 may have changed type.
     Tmin = v0[1] isa T ? T : typeof(v0[1])
@@ -226,11 +196,8 @@ end
 
 ## generic (map)reduction
 
-has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
-has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
-has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
-    all(has_fast_linear_indexing, bc.args)
+has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
+has_fast_linear_indexing(a::AbstractVector) = true
 
 function check_reducedims(R, A)
     # Check whether R has compatible dimensions w.r.t. A for reduction
@@ -291,8 +258,9 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
         # use mapreduce_impl, which is probably better tuned to achieve higher performance
         nslices = div(length(A), lsiz)
         ibase = first(LinearIndices(A))-1
-        for i = 1:nslices
-            @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+        for i in eachindex(R)
+            r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+            @inbounds R[i] = r
             ibase += lsiz
         end
         return R
@@ -302,19 +270,20 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
     if reducedim1(R, A)
         # keep the accumulator as a local variable when reducing along the first dimension
         i1 = first(axes1(R))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            r = R[i1,IR]
+            @inbounds r = R[i1,IR]
             @simd for i in axes(A, 1)
-                r = op(r, f(A[i, IA]))
+                r = op(r, f(@inbounds(A[i, IA])))
             end
-            R[i1,IR] = r
+            @inbounds R[i1,IR] = r
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             @simd for i in axes(A, 1)
-                R[i,IR] = op(R[i,IR], f(A[i,IA]))
+                v = op(@inbounds(R[i,IR]), f(@inbounds(A[i,IA])))
+                @inbounds R[i,IR] = v
             end
         end
     end
@@ -356,8 +325,8 @@ julia> mapreduce(isodd, |, a, dims=1)
 """
 mapreduce(f, op, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) =
     _mapreduce_dim(f, op, init, A, dims)
-mapreduce(f, op, A::AbstractArrayOrBroadcasted...; kw...) =
-    reduce(op, map(f, A...); kw...)
+mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...; kw...) =
+    reduce(op, map(f, A, B...); kw...)
 
 _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
     mapfoldl_impl(f, op, nt, A)
@@ -448,6 +417,8 @@ _count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_s
 Count the number of elements in `A` for which `f` returns `true` over the
 singleton dimensions of `r`, writing the result into `r` in-place.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     inplace `count!` was added in Julia 1.5.
 
@@ -525,8 +496,8 @@ sum(f, A::AbstractArray; dims)
     sum!(r, A)
 
 Sum elements of `A` over the singleton dimensions of `r`, and write results to `r`.
-Note that since the sum! function is intended to operate without making any allocations,
-the target should not alias with the source.
+
+$(_DOCS_ALIASING_WARNING)
 
 # Examples
 ```jldoctest
@@ -601,6 +572,8 @@ prod(f, A::AbstractArray; dims)
 
 Multiply elements of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -678,6 +651,8 @@ maximum(f, A::AbstractArray; dims)
 
 Compute the maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -755,6 +730,8 @@ minimum(f, A::AbstractArray; dims)
 
 Compute the minimum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -820,6 +797,8 @@ extrema(f, A::AbstractArray; dims)
 
 Compute the minimum and maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.8"
     This method requires Julia 1.8 or later.
 
@@ -895,6 +874,8 @@ all(::Function, ::AbstractArray; dims)
 
 Test whether all values in `A` along the singleton dimensions of `r` are `true`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -968,6 +949,8 @@ any(::Function, ::AbstractArray; dims)
 Test whether any values in `A` along the singleton dimensions of `r` are `true`, and write
 results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -1044,33 +1027,33 @@ function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
     zi = zero(eltype(ks))
     if reducedim1(Rval, A)
         i1 = first(axes1(Rval))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            tmpRv = Rval[i1,IR]
-            tmpRi = Rind[i1,IR]
+            @inbounds tmpRv = Rval[i1,IR]
+            @inbounds tmpRi = Rind[i1,IR]
             for i in axes(A,1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
+                tmpAv = f(@inbounds(A[i,IA]))
                 if tmpRi == zi || op(tmpRv, tmpAv)
                     tmpRv = tmpAv
                     tmpRi = k
                 end
                 y = iterate(ks, kss)
             end
-            Rval[i1,IR] = tmpRv
-            Rind[i1,IR] = tmpRi
+            @inbounds Rval[i1,IR] = tmpRv
+            @inbounds Rind[i1,IR] = tmpRi
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             for i in axes(A, 1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                tmpRv = Rval[i,IR]
-                tmpRi = Rind[i,IR]
+                tmpAv = f(@inbounds(A[i,IA]))
+                @inbounds tmpRv = Rval[i,IR]
+                @inbounds tmpRi = Rind[i,IR]
                 if tmpRi == zi || op(tmpRv, tmpAv)
-                    Rval[i,IR] = tmpAv
-                    Rind[i,IR] = k
+                    @inbounds Rval[i,IR] = tmpAv
+                    @inbounds Rind[i,IR] = k
                 end
                 y = iterate(ks, kss)
             end
@@ -1085,6 +1068,8 @@ end
 Find the minimum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as less than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
@@ -1156,6 +1141,8 @@ end
 Find the maximum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as greater than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
diff --git a/base/reflection.jl b/base/reflection.jl
index bbcd6cad27128..f9c5dd9765533 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -1,965 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# name and module reflection
-
-"""
-    parentmodule(m::Module) -> Module
-
-Get a module's enclosing `Module`. `Main` is its own parent.
-
-See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
-
-# Examples
-```jldoctest
-julia> parentmodule(Main)
-Main
-
-julia> parentmodule(Base.Broadcast)
-Base
-```
-"""
-parentmodule(m::Module) = ccall(:jl_module_parent, Ref{Module}, (Any,), m)
-
-"""
-    moduleroot(m::Module) -> Module
-
-Find the root module of a given module. This is the first module in the chain of
-parent modules of `m` which is either a registered root module or which is its
-own parent module.
-"""
-function moduleroot(m::Module)
-    while true
-        is_root_module(m) && return m
-        p = parentmodule(m)
-        p === m && return m
-        m = p
-    end
-end
-
-"""
-    @__MODULE__ -> Module
-
-Get the `Module` of the toplevel eval,
-which is the `Module` code is currently being read from.
-"""
-macro __MODULE__()
-    return __module__
-end
-
-"""
-    fullname(m::Module)
-
-Get the fully-qualified name of a module as a tuple of symbols. For example,
-
-# Examples
-```jldoctest
-julia> fullname(Base.Iterators)
-(:Base, :Iterators)
-
-julia> fullname(Main)
-(:Main,)
-```
-"""
-function fullname(m::Module)
-    mn = nameof(m)
-    if m === Main || m === Base || m === Core
-        return (mn,)
-    end
-    mp = parentmodule(m)
-    if mp === m
-        return (mn,)
-    end
-    return (fullname(mp)..., mn)
-end
-
-"""
-    names(x::Module; all::Bool = false, imported::Bool = false)
-
-Get an array of the names exported by a `Module`, excluding deprecated names.
-If `all` is true, then the list also includes non-exported names defined in the module,
-deprecated names, and compiler-generated names.
-If `imported` is true, then names explicitly imported from other modules
-are also included.
-
-As a special case, all names defined in `Main` are considered \"exported\",
-since it is not idiomatic to explicitly export names from `Main`.
-
-See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
-"""
-names(m::Module; all::Bool = false, imported::Bool = false) =
-    sort!(unsorted_names(m; all, imported))
-unsorted_names(m::Module; all::Bool = false, imported::Bool = false) =
-    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)
-
-isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
-isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
-isbindingresolved(m::Module, var::Symbol) = ccall(:jl_binding_resolved_p, Cint, (Any, Any), m, var) != 0
-
-function binding_module(m::Module, s::Symbol)
-    p = ccall(:jl_get_module_of_binding, Ptr{Cvoid}, (Any, Any), m, s)
-    p == C_NULL && return m
-    return unsafe_pointer_to_objref(p)::Module
-end
-
-const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
-
-function _fieldnames(@nospecialize t)
-    if t.name === _NAMEDTUPLE_NAME
-        if t.parameters[1] isa Tuple
-            return t.parameters[1]
-        else
-            throw(ArgumentError("type does not have definite field names"))
-        end
-    end
-    return t.name.names
-end
-
-"""
-    fieldname(x::DataType, i::Integer)
-
-Get the name of field `i` of a `DataType`.
-
-# Examples
-```jldoctest
-julia> fieldname(Rational, 1)
-:num
-
-julia> fieldname(Rational, 2)
-:den
-```
-"""
-function fieldname(t::DataType, i::Integer)
-    throw_not_def_field() = throw(ArgumentError("type does not have definite field names"))
-    function throw_field_access(t, i, n_fields)
-        field_label = n_fields == 1 ? "field" : "fields"
-        throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
-    end
-    throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
-
-    isabstracttype(t) && throw_not_def_field()
-    names = _fieldnames(t)
-    n_fields = length(names)::Int
-    i > n_fields && throw_field_access(t, i, n_fields)
-    i < 1 && throw_need_pos_int(i)
-    return @inbounds names[i]::Symbol
-end
-
-fieldname(t::UnionAll, i::Integer) = fieldname(unwrap_unionall(t), i)
-fieldname(t::Type{<:Tuple}, i::Integer) =
-    i < 1 || i > fieldcount(t) ? throw(BoundsError(t, i)) : Int(i)
-
-"""
-    fieldnames(x::DataType)
-
-Get a tuple with the names of the fields of a `DataType`.
-
-See also [`propertynames`](@ref), [`hasfield`](@ref).
-
-# Examples
-```jldoctest
-julia> fieldnames(Rational)
-(:num, :den)
-
-julia> fieldnames(typeof(1+im))
-(:re, :im)
-```
-"""
-fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
-                           (_fieldnames(t)...,))::Tuple{Vararg{Symbol}}
-fieldnames(t::UnionAll) = fieldnames(unwrap_unionall(t))
-fieldnames(::Core.TypeofBottom) =
-    throw(ArgumentError("The empty type does not have field names since it does not have instances."))
-fieldnames(t::Type{<:Tuple}) = ntuple(identity, fieldcount(t))
-
-"""
-    hasfield(T::Type, name::Symbol)
-
-Return a boolean indicating whether `T` has `name` as one of its own fields.
-
-See also [`fieldnames`](@ref), [`fieldcount`](@ref), [`hasproperty`](@ref).
-
-!!! compat "Julia 1.2"
-     This function requires at least Julia 1.2.
-
-# Examples
-```jldoctest
-julia> struct Foo
-            bar::Int
-       end
-
-julia> hasfield(Foo, :bar)
-true
-
-julia> hasfield(Foo, :x)
-false
-```
-"""
-hasfield(T::Type, name::Symbol) = fieldindex(T, name, false) > 0
-
-"""
-    nameof(t::DataType) -> Symbol
-
-Get the name of a (potentially `UnionAll`-wrapped) `DataType` (without its parent module)
-as a symbol.
-
-# Examples
-```jldoctest
-julia> module Foo
-           struct S{T}
-           end
-       end
-Foo
-
-julia> nameof(Foo.S{T} where T)
-:S
-```
-"""
-nameof(t::DataType) = t.name.name
-nameof(t::UnionAll) = nameof(unwrap_unionall(t))::Symbol
-
-"""
-    parentmodule(t::DataType) -> Module
-
-Determine the module containing the definition of a (potentially `UnionAll`-wrapped) `DataType`.
-
-# Examples
-```jldoctest
-julia> module Foo
-           struct Int end
-       end
-Foo
-
-julia> parentmodule(Int)
-Core
-
-julia> parentmodule(Foo.Int)
-Foo
-```
-"""
-parentmodule(t::DataType) = t.name.module
-parentmodule(t::UnionAll) = parentmodule(unwrap_unionall(t))
-
-"""
-    isconst(m::Module, s::Symbol) -> Bool
-
-Determine whether a global is declared `const` in a given module `m`.
-"""
-isconst(m::Module, s::Symbol) =
-    ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
-
-function isconst(g::GlobalRef)
-    return ccall(:jl_globalref_is_const, Cint, (Any,), g) != 0
-end
-
-"""
-    isconst(t::DataType, s::Union{Int,Symbol}) -> Bool
-
-Determine whether a field `s` is declared `const` in a given type `t`.
-"""
-function isconst(@nospecialize(t::Type), s::Symbol)
-    t = unwrap_unionall(t)
-    isa(t, DataType) || return false
-    return isconst(t, fieldindex(t, s, false))
-end
-function isconst(@nospecialize(t::Type), s::Int)
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false # uncertain
-    ismutabletype(t) || return true # immutable structs are always const
-    1 <= s <= length(t.name.names) || return true # OOB reads are "const" since they always throw
-    constfields = t.name.constfields
-    constfields === C_NULL && return false
-    s -= 1
-    return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
-end
-
-"""
-    isfieldatomic(t::DataType, s::Union{Int,Symbol}) -> Bool
-
-Determine whether a field `s` is declared `@atomic` in a given type `t`.
-"""
-function isfieldatomic(@nospecialize(t::Type), s::Symbol)
-    t = unwrap_unionall(t)
-    isa(t, DataType) || return false
-    return isfieldatomic(t, fieldindex(t, s, false))
-end
-function isfieldatomic(@nospecialize(t::Type), s::Int)
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false # uncertain
-    ismutabletype(t) || return false # immutable structs are never atomic
-    1 <= s <= length(t.name.names) || return false # OOB reads are not atomic (they always throw)
-    atomicfields = t.name.atomicfields
-    atomicfields === C_NULL && return false
-    s -= 1
-    return unsafe_load(Ptr{UInt32}(atomicfields), 1 + s÷32) & (1 << (s%32)) != 0
-end
-
-"""
-    @locals()
-
-Construct a dictionary of the names (as symbols) and values of all local
-variables defined as of the call site.
-
-!!! compat "Julia 1.1"
-    This macro requires at least Julia 1.1.
-
-# Examples
-```jldoctest
-julia> let x = 1, y = 2
-           Base.@locals
-       end
-Dict{Symbol, Any} with 2 entries:
-  :y => 2
-  :x => 1
-
-julia> function f(x)
-           local y
-           show(Base.@locals); println()
-           for i = 1:1
-               show(Base.@locals); println()
-           end
-           y = 2
-           show(Base.@locals); println()
-           nothing
-       end;
-
-julia> f(42)
-Dict{Symbol, Any}(:x => 42)
-Dict{Symbol, Any}(:i => 1, :x => 42)
-Dict{Symbol, Any}(:y => 2, :x => 42)
-```
-"""
-macro locals()
-    return Expr(:locals)
-end
-
-# concrete datatype predicates
-
-datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
-
-struct DataTypeLayout
-    size::UInt32
-    nfields::UInt32
-    npointers::UInt32
-    firstptr::Int32
-    alignment::UInt16
-    flags::UInt16
-    # haspadding : 1;
-    # fielddesc_type : 2;
-end
-
-"""
-    Base.datatype_alignment(dt::DataType) -> Int
-
-Memory allocation minimum alignment for instances of this type.
-Can be called on any `isconcretetype`.
-"""
-function datatype_alignment(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment
-    return Int(alignment)
-end
-
-function uniontype_layout(@nospecialize T::Type)
-    sz = RefValue{Csize_t}(0)
-    algn = RefValue{Csize_t}(0)
-    isinline = ccall(:jl_islayout_inline, Cint, (Any, Ptr{Csize_t}, Ptr{Csize_t}), T, sz, algn) != 0
-    (isinline, Int(sz[]), Int(algn[]))
-end
-
-LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
-
-# amount of total space taken by T when stored in a container
-function aligned_sizeof(@nospecialize T::Type)
-    @_foldable_meta
-    if isa(T, Union)
-        if allocatedinline(T)
-            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
-            # inference in the second branch with the outer `isa(T, Union)` check
-            _, sz, al = uniontype_layout(T)
-            return LLT_ALIGN(sz, al)
-        end
-    elseif allocatedinline(T)
-        al = datatype_alignment(T)
-        return LLT_ALIGN(Core.sizeof(T), al)
-    end
-    return Core.sizeof(Ptr{Cvoid})
-end
-
-gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
-gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
-
-"""
-    Base.datatype_haspadding(dt::DataType) -> Bool
-
-Return whether the fields of instances of this type are packed in memory,
-with no intervening padding bytes.
-Can be called on any `isconcretetype`.
-"""
-function datatype_haspadding(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
-    return flags & 1 == 1
-end
-
-"""
-    Base.datatype_nfields(dt::DataType) -> Bool
-
-Return the number of fields known to this datatype's layout.
-Can be called on any `isconcretetype`.
-"""
-function datatype_nfields(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
-end
-
-"""
-    Base.datatype_pointerfree(dt::DataType) -> Bool
-
-Return whether instances of this type can contain references to gc-managed memory.
-Can be called on any `isconcretetype`.
-"""
-function datatype_pointerfree(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
-    return npointers == 0
-end
-
-"""
-    Base.datatype_fielddesc_type(dt::DataType) -> Int
-
-Return the size in bytes of each field-description entry in the layout array,
-located at `(dt.layout + sizeof(DataTypeLayout))`.
-Can be called on any `isconcretetype`.
-
-See also [`fieldoffset`](@ref).
-"""
-function datatype_fielddesc_type(dt::DataType)
-    @_foldable_meta
-    dt.layout == C_NULL && throw(UndefRefError())
-    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
-    return (flags >> 1) & 3
-end
-
-# For type stability, we only expose a single struct that describes everything
-struct FieldDesc
-    isforeign::Bool
-    isptr::Bool
-    size::UInt32
-    offset::UInt32
-end
-
-struct FieldDescStorage{T}
-    ptrsize::T
-    offset::T
-end
-FieldDesc(fd::FieldDescStorage{T}) where {T} =
-    FieldDesc(false, fd.ptrsize & 1 != 0,
-              fd.ptrsize >> 1, fd.offset)
-
-struct DataTypeFieldDesc
-    dt::DataType
-    function DataTypeFieldDesc(dt::DataType)
-        dt.layout == C_NULL && throw(UndefRefError())
-        new(dt)
-    end
-end
-
-function getindex(dtfd::DataTypeFieldDesc, i::Int)
-    layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
-    fd_ptr = layout_ptr + sizeof(DataTypeLayout)
-    layout = unsafe_load(layout_ptr)
-    fielddesc_type = (layout.flags >> 1) & 3
-    nfields = layout.nfields
-    @boundscheck ((1 <= i <= nfields) || throw(BoundsError(dtfd, i)))
-    if fielddesc_type == 0
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt8}}(fd_ptr), i))
-    elseif fielddesc_type == 1
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt16}}(fd_ptr), i))
-    elseif fielddesc_type == 2
-        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt32}}(fd_ptr), i))
-    else
-        # fielddesc_type == 3
-        return FieldDesc(true, true, 0, 0)
-    end
-end
-
-"""
-    ismutable(v) -> Bool
-
-Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
-for a discussion of immutability. Note that this function works on values, so if you
-give it a `DataType`, it will tell you that a value of the type is mutable.
-
-!!! note
-    For technical reasons, `ismutable` returns `true` for values of certain special types
-    (for example `String` and `Symbol`) even though they cannot be mutated in a permissible way.
-
-See also [`isbits`](@ref), [`isstructtype`](@ref).
-
-# Examples
-```jldoctest
-julia> ismutable(1)
-false
-
-julia> ismutable([1,2])
-true
-```
-
-!!! compat "Julia 1.5"
-    This function requires at least Julia 1.5.
-"""
-ismutable(@nospecialize(x)) = (@_total_meta; typeof(x).name.flags & 0x2 == 0x2)
-
-"""
-    ismutabletype(T) -> Bool
-
-Determine whether type `T` was declared as a mutable type
-(i.e. using `mutable struct` keyword).
-
-!!! compat "Julia 1.7"
-    This function requires at least Julia 1.7.
-"""
-function ismutabletype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.name.flags & 0x2 == 0x2
-end
-
-"""
-    isstructtype(T) -> Bool
-
-Determine whether type `T` was declared as a struct type
-(i.e. using the `struct` or `mutable struct` keyword).
-"""
-function isstructtype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false
-    return !isprimitivetype(t) && !isabstracttype(t)
-end
-
-"""
-    isprimitivetype(T) -> Bool
-
-Determine whether type `T` was declared as a primitive type
-(i.e. using the `primitive type` syntax).
-"""
-function isprimitivetype(@nospecialize t)
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    isa(t, DataType) || return false
-    return (t.flags & 0x0080) == 0x0080
-end
-
-"""
-    isbitstype(T)
-
-Return `true` if type `T` is a "plain data" type,
-meaning it is immutable and contains no references to other values,
-only `primitive` types and other `isbitstype` types.
-Typical examples are numeric types such as [`UInt8`](@ref),
-[`Float64`](@ref), and [`Complex{Float64}`](@ref).
-This category of types is significant since they are valid as type parameters,
-may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
-and have a defined layout that is compatible with C.
-
-See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
-
-# Examples
-```jldoctest
-julia> isbitstype(Complex{Float64})
-true
-
-julia> isbitstype(Complex)
-false
-```
-"""
-isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0008) == 0x0008)
-
-"""
-    isbits(x)
-
-Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
-"""
-isbits(@nospecialize x) = isbitstype(typeof(x))
-
-"""
-    objectid(x) -> UInt
-
-Get a hash value for `x` based on object identity.
-
-If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
-
-See also [`hash`](@ref), [`IdDict`](@ref).
-"""
-function objectid(x)
-    # objectid is foldable iff it isn't a pointer.
-    if isidentityfree(typeof(x))
-        return _foldable_objectid(x)
-    end
-    return _objectid(x)
-end
-function _foldable_objectid(@nospecialize(x))
-    @_foldable_meta
-    _objectid(x)
-end
-_objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
-
-"""
-    isdispatchtuple(T)
-
-Determine whether type `T` is a tuple "leaf type",
-meaning it could appear as a type signature in dispatch
-and has no subtypes (or supertypes) which could appear in a call.
-"""
-isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0004) == 0x0004)
-
-datatype_ismutationfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0100) == 0x0100)
-
-"""
-    ismutationfree(T)
-
-Determine whether type `T` is mutation free in the sense that no mutable memory
-is reachable from this type (either in the type itself) or through any fields.
-Note that the type itself need not be immutable. For example, an empty mutable
-type is `ismutabletype`, but also `ismutationfree`.
-"""
-function ismutationfree(@nospecialize(t))
-    t = unwrap_unionall(t)
-    if isa(t, DataType)
-        return datatype_ismutationfree(t)
-    elseif isa(t, Union)
-        return ismutationfree(t.a) && ismutationfree(t.b)
-    end
-    # TypeVar, etc.
-    return false
-end
-
-datatype_isidentityfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0200) == 0x0200)
-
-"""
-    isidentityfree(T)
-
-Determine whether type `T` is identity free in the sense that this type or any
-reachable through its fields has non-content-based identity.
-"""
-function isidentityfree(@nospecialize(t))
-    t = unwrap_unionall(t)
-    if isa(t, DataType)
-        return datatype_isidentityfree(t)
-    elseif isa(t, Union)
-        return isidentityfree(t.a) && isidentityfree(t.b)
-    end
-    # TypeVar, etc.
-    return false
-end
-
-iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
-isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
-has_free_typevars(@nospecialize(t)) = ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0
-
-# equivalent to isa(v, Type) && isdispatchtuple(Tuple{v}) || v === Union{}
-# and is thus perhaps most similar to the old (pre-1.0) `isleaftype` query
-function isdispatchelem(@nospecialize v)
-    return (v === Bottom) || (v === typeof(Bottom)) || isconcretedispatch(v) ||
-        (isType(v) && !has_free_typevars(v))
-end
-
-const _TYPE_NAME = Type.body.name
-isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
-
-"""
-    isconcretetype(T)
-
-Determine whether type `T` is a concrete type, meaning it could have direct instances
-(values `x` such that `typeof(x) === T`).
-
-See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
-
-# Examples
-```jldoctest
-julia> isconcretetype(Complex)
-false
-
-julia> isconcretetype(Complex{Float32})
-true
-
-julia> isconcretetype(Vector{Complex})
-true
-
-julia> isconcretetype(Vector{Complex{Float32}})
-true
-
-julia> isconcretetype(Union{})
-false
-
-julia> isconcretetype(Union{Int,String})
-false
-```
-"""
-isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0002) == 0x0002)
-
-"""
-    isabstracttype(T)
-
-Determine whether type `T` was declared as an abstract type
-(i.e. using the `abstract type` syntax).
-
-# Examples
-```jldoctest
-julia> isabstracttype(AbstractArray)
-true
-
-julia> isabstracttype(Vector)
-false
-```
-"""
-function isabstracttype(@nospecialize(t))
-    @_total_meta
-    t = unwrap_unionall(t)
-    # TODO: what to do for `Union`?
-    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
-end
-
-"""
-    Base.issingletontype(T)
-
-Determine whether type `T` has exactly one possible instance; for example, a
-struct type with no fields.
-"""
-issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance))
-
-"""
-    typeintersect(T::Type, S::Type)
-
-Compute a type that contains the intersection of `T` and `S`. Usually this will be the
-smallest such type or one close to it.
-"""
-typeintersect(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_intersection, Any, (Any, Any), a::Type, b::Type))
-
-morespecific(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_morespecific, Cint, (Any, Any), a::Type, b::Type) != 0)
-
-"""
-    fieldoffset(type, i)
-
-The byte offset of field `i` of a type relative to the data start. For example, we could
-use it in the following manner to summarize information about a struct:
-
-```jldoctest
-julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
-
-julia> structinfo(Base.Filesystem.StatStruct)
-13-element Vector{Tuple{UInt64, Symbol, Type}}:
- (0x0000000000000000, :desc, Union{RawFD, String})
- (0x0000000000000008, :device, UInt64)
- (0x0000000000000010, :inode, UInt64)
- (0x0000000000000018, :mode, UInt64)
- (0x0000000000000020, :nlink, Int64)
- (0x0000000000000028, :uid, UInt64)
- (0x0000000000000030, :gid, UInt64)
- (0x0000000000000038, :rdev, UInt64)
- (0x0000000000000040, :size, Int64)
- (0x0000000000000048, :blksize, Int64)
- (0x0000000000000050, :blocks, Int64)
- (0x0000000000000058, :mtime, Float64)
- (0x0000000000000060, :ctime, Float64)
-```
-"""
-fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
-
-"""
-    fieldtype(T, name::Symbol | index::Int)
-
-Determine the declared type of a field (specified by name or index) in a composite DataType `T`.
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> fieldtype(Foo, :x)
-Int64
-
-julia> fieldtype(Foo, 2)
-String
-```
-"""
-fieldtype
-
-"""
-    Base.fieldindex(T, name::Symbol, err:Bool=true)
-
-Get the index of a named field, throwing an error if the field does not exist (when err==true)
-or returning 0 (when err==false).
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> Base.fieldindex(Foo, :z)
-ERROR: type Foo has no field z
-Stacktrace:
-[...]
-
-julia> Base.fieldindex(Foo, :z, false)
-0
-```
-"""
-function fieldindex(T::DataType, name::Symbol, err::Bool=true)
-    return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name)
-end
-
-function _fieldindex_maythrow(T::DataType, name::Symbol)
-    @_foldable_meta
-    @noinline
-    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1)
-end
-
-function _fieldindex_nothrow(T::DataType, name::Symbol)
-    @_total_meta
-    @noinline
-    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1)
-end
-
-function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
-    t = argument_datatype(t)
-    if t === nothing
-        err && throw(ArgumentError("type does not have definite fields"))
-        return 0
-    end
-    return fieldindex(t, name, err)
-end
-
-function argument_datatype(@nospecialize t)
-    @_total_meta
-    @noinline
-    return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
-end
-
-function datatype_fieldcount(t::DataType)
-    if t.name === _NAMEDTUPLE_NAME
-        names, types = t.parameters[1], t.parameters[2]
-        if names isa Tuple
-            return length(names)
-        end
-        if types isa DataType && types <: Tuple
-            return fieldcount(types)
-        end
-        return nothing
-    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-        return nothing
-    end
-    if isdefined(t, :types)
-        return length(t.types)
-    end
-    return length(t.name.names)
-end
-
-"""
-    fieldcount(t::Type)
-
-Get the number of fields that an instance of the given type would have.
-An error is thrown if the type is too abstract to determine this.
-"""
-function fieldcount(@nospecialize t)
-    @_foldable_meta
-    if t isa UnionAll || t isa Union
-        t = argument_datatype(t)
-        if t === nothing
-            throw(ArgumentError("type does not have a definite number of fields"))
-        end
-    elseif t === Union{}
-        throw(ArgumentError("The empty type does not have a well-defined number of fields since it does not have instances."))
-    end
-    if !(t isa DataType)
-        throw(TypeError(:fieldcount, DataType, t))
-    end
-    fcount = datatype_fieldcount(t)
-    if fcount === nothing
-        throw(ArgumentError("type does not have a definite number of fields"))
-    end
-    return fcount
-end
-
-"""
-    fieldtypes(T::Type)
-
-The declared types of all fields in a composite DataType `T` as a tuple.
-
-!!! compat "Julia 1.1"
-    This function requires at least Julia 1.1.
-
-# Examples
-```jldoctest
-julia> struct Foo
-           x::Int64
-           y::String
-       end
-
-julia> fieldtypes(Foo)
-(Int64, String)
-```
-"""
-fieldtypes(T::Type) = (@_foldable_meta; ntupleany(i -> fieldtype(T, i), fieldcount(T)))
-
-# return all instances, for types that can be enumerated
-
-"""
-    instances(T::Type)
-
-Return a collection of all instances of the given type, if applicable. Mostly used for
-enumerated types (see `@enum`).
-
-# Example
-```jldoctest
-julia> @enum Color red blue green
-
-julia> instances(Color)
-(red, blue, green)
-```
-"""
-function instances end
-
-function to_tuple_type(@nospecialize(t))
-    if isa(t, Tuple) || isa(t, AbstractArray) || isa(t, SimpleVector)
-        t = Tuple{t...}
-    end
-    if isa(t, Type) && t <: Tuple
-        for p in (unwrap_unionall(t)::DataType).parameters
-            if isa(p, Core.TypeofVararg)
-                p = unwrapva(p)
-            end
-            if !(isa(p, Type) || isa(p, TypeVar))
-                error("argument tuple type must contain only types")
-            end
-        end
-    else
-        error("expected tuple type")
-    end
-    t
-end
-
-function signature_type(@nospecialize(f), @nospecialize(argtypes))
-    argtypes = to_tuple_type(argtypes)
-    ft = Core.Typeof(f)
-    u = unwrap_unionall(argtypes)::DataType
-    return rewrap_unionall(Tuple{ft, u.parameters...}, argtypes)
-end
-
 """
     code_lowered(f, types; generated=true, debuginfo=:default)
 
@@ -973,7 +13,7 @@ yielded by expanding the generators.
 
 The keyword `debuginfo` controls the amount of code metadata present in the output.
 
-Note that an error will be thrown if `types` are not leaf types when `generated` is
+Note that an error will be thrown if `types` are not concrete types when `generated` is
 `true` and any of the corresponding methods are an `@generated` method.
 """
 function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=true, debuginfo::Symbol=:default)
@@ -986,122 +26,38 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     world = get_world_counter()
-    return map(method_instances(f, t, world)) do m
-        if generated && hasgenerator(m)
-            if may_invoke_generator(m)
-                return ccall(:jl_code_for_staged, Any, (Any, UInt), m, world)::CodeInfo
-            else
-                error("Could not expand generator for `@generated` method ", m, ". ",
-                      "This can happen if the provided argument types (", t, ") are ",
-                      "not leaf types, but the `generated` argument is `true`.")
-            end
-        end
-        code = uncompressed_ir(m.def::Method)
-        debuginfo === :none && remove_linenums!(code)
-        return code
-    end
-end
-
-hasgenerator(m::Method) = isdefined(m, :generator)
-hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
-
-# low-level method lookup functions used by the compiler
-
-unionlen(x::Union) = unionlen(x.a) + unionlen(x.b)
-unionlen(@nospecialize(x)) = 1
-
-_uniontypes(x::Union, ts) = (_uniontypes(x.a,ts); _uniontypes(x.b,ts); ts)
-_uniontypes(@nospecialize(x), ts) = (push!(ts, x); ts)
-uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
-
-function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
-    tt = signature_type(f, t)
-    return _methods_by_ftype(tt, lim, world)
-end
-
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
-    return _methods_by_ftype(t, nothing, lim, world)
-end
-function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
-    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
-end
-function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
-    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Vector{Any},Nothing}
-end
-
-# high-level, more convenient method lookup functions
-
-# type for reflecting and pretty-printing a subset of methods
-mutable struct MethodList <: AbstractArray{Method,1}
-    ms::Array{Method,1}
-    mt::Core.MethodTable
-end
-
-size(m::MethodList) = size(m.ms)
-getindex(m::MethodList, i::Integer) = m.ms[i]
-
-function MethodList(mt::Core.MethodTable)
-    ms = Method[]
-    visit(mt) do m
-        push!(ms, m)
-    end
-    return MethodList(ms, mt)
-end
-
-"""
-    methods(f, [types], [module])
-
-Return the method table for `f`.
-
-If `types` is specified, return an array of methods whose types match.
-If `module` is specified, return an array of methods defined in that module.
-A list of modules can also be specified as an array.
-
-!!! compat "Julia 1.4"
-    At least Julia 1.4 is required for specifying a module.
-
-See also: [`which`](@ref) and `@which`.
-"""
-function methods(@nospecialize(f), @nospecialize(t),
-                 mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
-    world = get_world_counter()
-    # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
-    ms = Method[]
-    for m in _methods(f, t, -1, world)::Vector
-        m = m::Core.MethodMatch
-        (mod === nothing || parentmodule(m.method) ∈ mod) && push!(ms, m.method)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    ret = CodeInfo[]
+    for m in method_instances(f, t, world)
+        if generated && hasgenerator(m)
+            if may_invoke_generator(m)
+                code = ccall(:jl_code_for_staged, Ref{CodeInfo}, (Any, UInt, Ptr{Cvoid}), m, world, C_NULL)
+            else
+                error("Could not expand generator for `@generated` method ", m, ". ",
+                      "This can happen if the provided argument types (", t, ") are ",
+                      "not concrete types, but the `generated` argument is `true`.")
+            end
+        else
+            code = uncompressed_ir(m.def::Method)
+            debuginfo === :none && remove_linenums!(code)
+        end
+        push!(ret, code)
     end
-    MethodList(ms, typeof(f).name.mt)
-end
-methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
-
-function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
-    tt = signature_type(f, t)
-    world = get_world_counter()
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
-    min = RefValue{UInt}(typemin(UInt))
-    max = RefValue{UInt}(typemax(UInt))
-    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
-    return MethodList(Method[(m::Core.MethodMatch).method for m in ms], typeof(f).name.mt)
+    return ret
 end
 
-function methods(@nospecialize(f),
-                 mod::Union{Module,AbstractArray{Module},Nothing}=nothing)
-    # return all matches
-    return methods(f, Tuple{Vararg{Any}}, mod)
-end
+# high-level, more convenient method lookup functions
 
 function visit(f, mt::Core.MethodTable)
     mt.defs !== nothing && visit(f, mt.defs)
     nothing
 end
 function visit(f, mc::Core.TypeMapLevel)
-    function avisit(f, e::Array{Any,1})
+    function avisit(f, e::Memory{Any})
         for i in 2:2:length(e)
             isassigned(e, i) || continue
             ei = e[i]
-            if ei isa Vector{Any}
+            if ei isa Memory{Any}
                 for j in 2:2:length(ei)
                     isassigned(ei, j) || continue
                     visit(f, ei[j])
@@ -1112,16 +68,16 @@ function visit(f, mc::Core.TypeMapLevel)
         end
     end
     if mc.targ !== nothing
-        avisit(f, mc.targ::Vector{Any})
+        avisit(f, mc.targ::Memory{Any})
     end
     if mc.arg1 !== nothing
-        avisit(f, mc.arg1::Vector{Any})
+        avisit(f, mc.arg1::Memory{Any})
     end
     if mc.tname !== nothing
-        avisit(f, mc.tname::Vector{Any})
+        avisit(f, mc.tname::Memory{Any})
     end
     if mc.name1 !== nothing
-        avisit(f, mc.name1::Vector{Any})
+        avisit(f, mc.name1::Memory{Any})
     end
     mc.list !== nothing && visit(f, mc.list)
     mc.any !== nothing && visit(f, mc.any)
@@ -1172,12 +128,10 @@ function length(mt::Core.MethodTable)
 end
 isempty(mt::Core.MethodTable) = (mt.defs === nothing)
 
-uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m, m.source) :
+uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m) :
                              isdefined(m, :generator) ? error("Method is @generated; try `code_lowered` instead.") :
                              error("Code for this Method is not available.")
-_uncompressed_ir(m::Method, s::CodeInfo) = copy(s)
-_uncompressed_ir(m::Method, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
-_uncompressed_ir(ci::Core.CodeInstance, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
+
 # for backwards compat
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
@@ -1188,130 +142,126 @@ function method_instances(@nospecialize(f), @nospecialize(t), world::UInt)
     # this make a better error message than the typeassert that follows
     world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     for match in _methods_by_ftype(tt, -1, world)::Vector
-        instance = Core.Compiler.specialize_method(match)
+        instance = specialize_method(match::Core.MethodMatch)
         push!(results, instance)
     end
     return results
 end
 
+function method_instance(@nospecialize(f), @nospecialize(t);
+                         world=Base.get_world_counter(), method_table=nothing)
+    tt = signature_type(f, t)
+    mi = ccall(:jl_method_lookup_by_tt, Any,
+                (Any, Csize_t, Any),
+                tt, world, method_table)
+    return mi::Union{Nothing, MethodInstance}
+end
+
 default_debug_info_kind() = unsafe_load(cglobal(:jl_default_debug_info_kind, Cint))
 
 # this type mirrors jl_cgparams_t (documented in julia.h)
 struct CodegenParams
+    """
+    If enabled, generate the necessary code to support the --track-allocations
+    command line flag to julia itself. Note that the option itself does not enable
+    allocation tracking. Rather, it merely generates the support code necessary
+    to perform allocation tracking if requested by the command line option.
+    """
     track_allocations::Cint
+
+    """
+    If enabled, generate the necessary code to support the --code-coverage
+    command line flag to julia itself. Note that the option itself does not enable
+    code coverage. Rather, it merely generates the support code necessary
+    to code coverage if requested by the command line option.
+    """
     code_coverage::Cint
+
+    """
+    If enabled, force the compiler to use the specialized signature
+    for all generated functions, whenever legal. If disabled, the choice is made
+    heuristically and specsig is only used when deemed profitable.
+    """
     prefer_specsig::Cint
+
+    """
+    If enabled, enable emission of `.debug_names` sections.
+    """
     gnu_pubnames::Cint
+
+    """
+    Controls what level of debug info to emit. Currently supported values are:
+    - 0: no debug info
+    - 1: full debug info
+    - 2: Line tables only
+    - 3: Debug directives only
+
+    The integer values currently match the llvm::DICompilerUnit::DebugEmissionKind enum,
+    although this is not guaranteed.
+    """
     debug_info_kind::Cint
+
+    """
+    Controls the debug_info_level parameter, equivalent to the -g command line option.
+    """
+    debug_info_level::Cint
+
+    """
+    If enabled, generate a GC safepoint at the entry to every function. Emitting
+    these extra safepoints can reduce the amount of time that other threads are
+    waiting for the currently running thread to reach a safepoint. The cost for
+    a safepoint is small, but non-zero. The option is enabled by default.
+    """
     safepoint_on_entry::Cint
-    gcstack_arg::Cint
 
-    lookup::Ptr{Cvoid}
+    """
+    If enabled, add an implicit argument to each function call that is used to
+    pass down the current task local state pointer. This argument is passed
+    using the `swiftself` convention, which in the ordinary case means that the
+    pointer is kept in a register and accesses are thus very fast. If this option
+    is disabled, the task local state pointer must be loaded from thread local
+    storage, which incurs a small amount of additional overhead. The option is enabled by
+    default.
+    """
+    gcstack_arg::Cint
 
-    generic_context::Any
+    """
+    If enabled, use the Julia PLT mechanism to support lazy-resolution of `ccall`
+    targets. The option may be disabled for use in environments where the julia
+    runtime is unavailable, but is otherwise recommended to be enabled, even if
+    lazy resolution is not required, as the Julia PLT mechanism may have superior
+    performance compared to the native platform mechanism. The options is enabled by default.
+    """
+    use_jlplt::Cint
+
+    """
+    If enabled, only provably reachable code (from functions marked with `entrypoint`) is included
+    in the output system image. Errors or warnings can be given for call sites too dynamic to handle.
+    The option is disabled by default. (0=>disabled, 1=>safe (static errors), 2=>unsafe, 3=>unsafe plus warnings)
+    """
+    trim::Cint
 
     function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
                    prefer_specsig::Bool=false,
-                   gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
-                   safepoint_on_entry::Bool=true,
-                   gcstack_arg::Bool=true,
-                   lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})),
-                   generic_context = nothing)
+                   gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(),
+                   debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true,
+                   gcstack_arg::Bool=true, use_jlplt::Bool=true, trim::Cint=Cint(0))
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
-            Cint(safepoint_on_entry),
-            Cint(gcstack_arg),
-            lookup, generic_context)
+            debug_info_level, Cint(safepoint_on_entry),
+            Cint(gcstack_arg), Cint(use_jlplt), Cint(trim))
     end
 end
 
-const SLOT_USED = 0x8
-ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
-
-"""
-    may_invoke_generator(method, atype, sparams) -> Bool
-
-Computes whether or not we may invoke the generator for the given `method` on
-the given `atype` and `sparams`. For correctness, all generated function are
-required to return monotonic answers. However, since we don't expect users to
-be able to successfully implement this criterion, we only call generated
-functions on concrete types. The one exception to this is that we allow calling
-generators with abstract types if the generator does not use said abstract type
-(and thus cannot incorrectly use it to break monotonicity). This function
-computes whether we are in either of these cases.
-
-Unlike normal functions, the compilation heuristics still can't generate good dispatch
-in some cases, but this may still allow inference not to fall over in some limited cases.
-"""
-function may_invoke_generator(mi::MethodInstance)
-    return may_invoke_generator(mi.def::Method, mi.specTypes, mi.sparam_vals)
-end
-function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
-    # If we have complete information, we may always call the generator
-    isdispatchtuple(atype) && return true
-
-    # We don't have complete information, but it is possible that the generator
-    # syntactically doesn't make use of the information we don't have. Check
-    # for that.
-
-    # For now, only handle the (common, generated by the frontend case) that the
-    # generator only has one method
-    generator = method.generator
-    isa(generator, Core.GeneratedFunctionStub) || return false
-    tt = Tuple{typeof(generator.gen), Vararg{Any}}
-    gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world)
-    gen_mthds isa Vector || return false
-    length(gen_mthds) == 1 || return false
-
-    generator_method = first(gen_mthds).method
-    nsparams = length(sparams)
-    isdefined(generator_method, :source) || return false
-    code = generator_method.source
-    nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atype)
-    at isa DataType || return false
-    (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
-
-    firstarg = 1
-    for i = 1:nsparams
-        if isa(sparams[i], TypeVar)
-            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
-                return false
-            end
-        end
-    end
-    nargs = Int(method.nargs)
-    non_va_args = method.isva ? nargs - 1 : nargs
-    for i = 1:non_va_args
-        if !isdispatchelem(at.parameters[i])
-            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
-                return false
-            end
-        end
-    end
-    if method.isva
-        # If the va argument is used, we need to ensure that all arguments that
-        # contribute to the va tuple are dispatchelemes
-        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
-            for i = (non_va_args+1):length(at.parameters)
-                if !isdispatchelem(at.parameters[i])
-                    return false
-                end
-            end
-        end
-    end
-    return true
-end
+# this type mirrors jl_emission_params_t (documented in julia.h)
+struct EmissionParams
+    emit_metadata::Cint
 
-# give a decent error message if we try to instantiate a staged function on non-leaf types
-function func_for_method_checked(m::Method, @nospecialize(types), sparams::SimpleVector)
-    if isdefined(m, :generator) && !may_invoke_generator(m, types, sparams)
-        error("cannot call @generated function `", m, "` ",
-              "with abstract argument types: ", types)
+    function EmissionParams(; emit_metadata::Bool=true)
+        return new(Cint(emit_metadata))
     end
-    return m
 end
 
 """
@@ -1337,7 +287,7 @@ internals.
 - `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
   optional, controls the abstract interpreter to use, use the native interpreter if not specified.
 
-# Example
+# Examples
 
 One can put the argument types in a tuple to get the corresponding `code_typed`.
 
@@ -1352,7 +302,7 @@ julia> code_typed(+, (Float64, Float64))
 """
 function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...)
     if isa(f, Core.OpaqueClosure)
-        return code_typed_opaque_closure(f; kwargs...)
+        return code_typed_opaque_closure(f, types; kwargs...)
     end
     tt = signature_type(f, types)
     return code_typed_by_type(tt; kwargs...)
@@ -1370,6 +320,42 @@ function default_tt(@nospecialize(f))
     end
 end
 
+function raise_match_failure(name::Symbol, @nospecialize(tt))
+    @noinline
+    sig_str = sprint(Base.show_tuple_as_call, Symbol(""), tt)
+    error("$name: unanalyzable call given $sig_str")
+end
+
+const REFLECTION_COMPILER = RefValue{Union{Nothing, Module}}(nothing)
+
+function invoke_in_typeinf_world(args...)
+    vargs = Any[args...]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Any}, Cint), vargs, length(vargs))
+end
+
+function invoke_default_compiler(fname::Symbol, args...)
+    if REFLECTION_COMPILER[] === nothing
+        return invoke_in_typeinf_world(getglobal(Compiler, fname), args...)
+    else
+        return getglobal(REFLECTION_COMPILER[], fname)(args...)
+    end
+end
+
+function invoke_interp_compiler(interp, fname::Symbol, args...)
+    if interp === nothing
+        return invoke_default_compiler(fname, args...)
+    else
+        T = typeof(interp)
+        while true
+            Tname = typename(T).name
+            Tname === :Any && error("Expected Interpreter")
+            Tname === :AbstractInterpreter && break
+            T = supertype(T)
+        end
+        return getglobal(typename(T).module, fname)(args...)
+    end
+end
+
 """
     code_typed_by_type(types::Type{<:Tuple}; ...)
 
@@ -1380,7 +366,9 @@ function code_typed_by_type(@nospecialize(tt::Type);
                             optimize::Bool=true,
                             debuginfo::Symbol=:default,
                             world::UInt=get_world_counter(),
-                            interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+                            interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     if @isdefined(IRShow)
@@ -1392,37 +380,64 @@ function code_typed_by_type(@nospecialize(tt::Type);
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:code_typed, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, optimize)
+        code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, match, optimize)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             debuginfo === :none && remove_linenums!(code)
-            push!(asts, code => ty)
+            push!(asts, code => code.rettype)
         end
     end
     return asts
 end
 
-function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure);
-                                   debuginfo::Symbol=:default, _...)
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+function get_oc_code_rt(passed_interp, oc::Core.OpaqueClosure, types, optimize::Bool)
+    @nospecialize oc types
+    ccall(:jl_is_in_pure_context, Bool, ()) &&
+        error("code reflection cannot be used from generated functions")
     m = oc.source
     if isa(m, Method)
-        code = _uncompressed_ir(m, m.source)
-        debuginfo === :none && remove_linenums!(code)
-        # intersect the declared return type and the inferred return type (if available)
-        rt = typeintersect(code.rettype, typeof(oc).parameters[2])
-        return Any[code => rt]
+        if isdefined(m, :source)
+            if optimize
+                tt = Tuple{typeof(oc.captures), to_tuple_type(types).parameters...}
+                mi = specialize_method(m, tt, Core.svec())
+                interp = invoke_interp_compiler(passed_interp, :_default_interp, m.primary_world)
+                code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, mi, optimize)
+                if code isa CodeInfo
+                    return Pair{CodeInfo, Any}(code, code.rettype)
+                end
+                error("inference not successful")
+            else
+                code = _uncompressed_ir(m)
+                return Pair{CodeInfo, Any}(code, typeof(oc).parameters[2])
+            end
+        else
+            # OC constructed from optimized IR
+            codeinst = m.specializations.cache
+            # XXX: the inferred field is not normally a CodeInfo, but this assumes it is guaranteed to be always
+            return Pair{CodeInfo, Any}(codeinst.inferred, codeinst.rettype)
+        end
     else
         error("encountered invalid Core.OpaqueClosure object")
     end
 end
 
+function code_typed_opaque_closure(oc::Core.OpaqueClosure, types;
+                                   debuginfo::Symbol=:default,
+                                   optimize::Bool=true,
+                                   interp=nothing,
+                                   _...)
+    @nospecialize oc types
+    (code, rt) = get_oc_code_rt(interp, oc, types, optimize)
+    debuginfo === :none && remove_linenums!(code)
+    return Any[Pair{CodeInfo,Any}(code, rt)]
+end
+
 """
     code_ircode(f, [types])
 
@@ -1440,13 +455,13 @@ internals.
   when looking up methods, use current world age if not specified.
 - `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
   optional, controls the abstract interpreter to use, use the native interpreter if not specified.
-- `optimize_until::Union{Integer,AbstractString,Nothing} = nothing`: optional,
+- `optimize_until::Union{Int,String,Nothing} = nothing`: optional,
   controls the optimization passes to run.
   If it is a string, it specifies the name of the pass up to which the optimizer is run.
   If it is an integer, it specifies the number of passes to run.
   If it is `nothing` (default), all passes are run.
 
-# Example
+# Examples
 
 One can put the argument types in a tuple to get the corresponding `code_ircode`.
 
@@ -1483,26 +498,22 @@ a full signature to query.
 function code_ircode_by_type(
     @nospecialize(tt::Type);
     world::UInt=get_world_counter(),
-    interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world),
-    optimize_until::Union{Integer,AbstractString,Nothing}=nothing,
+    interp=nothing,
+    optimize_until::Union{Int,String,Nothing}=nothing,
 )
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:code_ircode, tt)
     asts = []
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_ircode(
-            interp,
-            meth,
-            match.spec_types,
-            match.sparams,
-            optimize_until,
-        )
+        (code, ty) = invoke_interp_compiler(passed_interp, :typeinf_ircode, interp, match, optimize_until)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             push!(asts, code => ty)
         end
@@ -1510,16 +521,56 @@ function code_ircode_by_type(
     return asts
 end
 
+function _builtin_return_type(passed_interp, interp,
+                              @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = invoke_interp_compiler(passed_interp, :builtin_tfunction, interp, f, argtypes, nothing)
+    return invoke_interp_compiler(passed_interp, :widenconst, rt)
+end
+
+function _builtin_effects(passed_interp, interp,
+                          @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = invoke_interp_compiler(passed_interp, :builtin_tfunction, interp, f, argtypes, nothing)
+    return invoke_interp_compiler(passed_interp, :builtin_effects,
+        invoke_interp_compiler(passed_interp, :typeinf_lattice, interp),
+        f, argtypes, rt)
+end
+
+function _builtin_exception_type(passed_interp, interp,
+                                 @nospecialize(f::Core.Builtin), @nospecialize(types))
+    effects = _builtin_effects(passed_interp, interp, f, types)
+    return invoke_interp_compiler(passed_interp, :is_nothrow, effects) ? Union{} : Any
+end
+
+check_generated_context(world::UInt) =
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
+
+# TODO rename `Base.return_types` to `Base.infer_return_types`
 
 """
-    Base.return_types(f::Function, types::DataType=default_tt(f);
-                      world::UInt=get_world_counter(), interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world))
+    Base.return_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> rts::Vector{Any}
 
 Return a list of possible return types for a given function `f` and argument types `types`.
 The list corresponds to the results of type inference on all the possible method match
 candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
 
-# Example
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rts::Vector{Any}`: The list of return types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Examples
 
 ```julia
 julia> Base.return_types(sum, Tuple{Vector{Int}})
@@ -1529,9 +580,9 @@ julia> Base.return_types(sum, Tuple{Vector{Int}})
 julia> methods(sum, (Union{Vector{Int},UnitRange{Int}},))
 # 2 methods for generic function "sum" from Base:
  [1] sum(r::AbstractRange{<:Real})
-     @ range.jl:1396
+     @ range.jl:1399
  [2] sum(a::AbstractArray; dims, kw...)
-     @ reducedim.jl:996
+     @ reducedim.jl:1010
 
 julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
 2-element Vector{Any}:
@@ -1540,103 +591,320 @@ julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
 ```
 
 !!! warning
-    The `return_types` function should not be used from generated functions;
+    The `Base.return_types` function should not be used from generated functions;
     doing so will result in an error.
 """
 function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
                       world::UInt=get_world_counter(),
-                      interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
+                      interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
-        _, rt = only(code_typed_opaque_closure(f))
+        _, rt = only(code_typed_opaque_closure(f, types; Compiler))
         return Any[rt]
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_return_type(passed_interp, interp, f, types)]
     end
-
-    if isa(f, Core.Builtin)
-        argtypes = Any[to_tuple_type(types).parameters...]
-        rt = Core.Compiler.builtin_tfunction(interp, f, argtypes, nothing)
-        return Any[Core.Compiler.widenconst(rt)]
-    end
-    rts = []
     tt = signature_type(f, types)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    for match in matches
-        match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, types, match.sparams)
-        ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:return_types, tt)
+    rts = Any[]
+    for match in matches.matches
+        ty = invoke_interp_compiler(passed_interp, :typeinf_type, interp, match::Core.MethodMatch)
         push!(rts, something(ty, Any))
     end
     return rts
 end
 
 """
-    infer_effects(f, types=default_tt(f); world=get_world_counter(), interp=Core.Compiler.NativeInterpreter(world))
+    Base.infer_return_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> rt::Type
+
+Returns an inferred return type of the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rt::Type`: An inferred return type of the function call specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    return types of every possible method matching with the given `f` and `types`.
+    It returns a single return type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Examples
+
+```julia
+julia> checksym(::Symbol) = :symbol;
+
+julia> checksym(x::Any) = x;
+
+julia> Base.infer_return_type(checksym, (Union{Symbol,String},))
+Union{String, Symbol}
+
+julia> Base.return_types(checksym, (Union{Symbol,String},))
+2-element Vector{Any}:
+ Symbol
+ Union{String, Symbol}
+```
+
+It's important to note the difference here: `Base.return_types` gives back inferred results
+for each method that matches the given signature `checksum(::Union{Symbol,String})`.
+On the other hand `Base.infer_return_type` returns one collective result that sums up all those possibilities.
+
+!!! warning
+    The `Base.infer_return_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_return_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                           world::UInt=get_world_counter(),
+                           interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return last(only(code_typed_opaque_closure(f, types; interp=passed_interp)))
+    elseif isa(f, Core.Builtin)
+        return _builtin_return_type(passed_interp, interp, f, types)
+    end
+    tt = signature_type(f, types)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:infer_return_type, tt)
+    rt = Union{}
+    for match in matches.matches
+        ty = invoke_interp_compiler(passed_interp, :typeinf_type, interp, match::Core.MethodMatch)
+        rt = invoke_interp_compiler(passed_interp, :tmerge, rt, something(ty, Any))
+    end
+    return rt
+end
+
+"""
+    Base.infer_exception_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> excts::Vector{Any}
+
+Return a list of possible exception types for a given function `f` and argument types `types`.
+The list corresponds to the results of type inference on all the possible method match
+candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
+It works like [`Base.return_types`](@ref), but it infers the exception types instead of the return types.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `excts::Vector{Any}`: The list of exception types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Examples
+
+```julia
+julia> throw_if_number(::Number) = error("number is given");
+
+julia> throw_if_number(::Any) = nothing;
+
+julia> Base.infer_exception_types(throw_if_number, (Int,))
+1-element Vector{Any}:
+ ErrorException
+
+julia> methods(throw_if_number, (Any,))
+# 2 methods for generic function "throw_if_number" from Main:
+ [1] throw_if_number(x::Number)
+     @ REPL[1]:1
+ [2] throw_if_number(::Any)
+     @ REPL[2]:1
+
+julia> Base.infer_exception_types(throw_if_number, (Any,))
+2-element Vector{Any}:
+ ErrorException # the result of inference on `throw_if_number(::Number)`
+ Union{}        # the result of inference on `throw_if_number(::Any)`
+```
+
+!!! warning
+    The `Base.infer_exception_types` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_types(@nospecialize(f), @nospecialize(types=default_tt(f));
+                               world::UInt=get_world_counter(),
+                               interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any[Any] # TODO
+    elseif isa(f, Core.Builtin)
+        return Any[_builtin_exception_type(passed_interp, interp, f, types)]
+    end
+    tt = signature_type(f, types)
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:infer_exception_types, tt)
+    excts = Any[]
+    for match in matches.matches
+        frame = invoke_interp_compiler(passed_interp, :typeinf_frame, interp, match::Core.MethodMatch, #=run_optimizer=#false)
+        if frame === nothing
+            exct = Any
+        else
+            exct = invoke_interp_compiler(passed_interp, :widenconst, frame.result.exc_result)
+        end
+        push!(excts, exct)
+    end
+    return excts
+end
+
+"""
+    Base.infer_exception_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> exct::Type
+
+Returns the type of exception potentially thrown by the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `exct::Type`: The inferred type of exception that can be thrown by the function call
+  specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.infer_exception_types`](@ref), this doesn't give you the list
+    exception types for every possible matching method with the given `f` and `types`.
+    It returns a single exception type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Examples
+
+```julia
+julia> f1(x) = x * 2;
+
+julia> Base.infer_exception_type(f1, (Int,))
+Union{}
+```
+
+The exception inferred as `Union{}` indicates that `f1(::Int)` will not throw any exception.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_exception_type(f2, (Integer,))
+MethodError
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the exception type is widened to `MethodError`.
+
+!!! warning
+    The `Base.infer_exception_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                              world::UInt=get_world_counter(),
+                              interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any # TODO
+    elseif isa(f, Core.Builtin)
+        return _builtin_exception_type(passed_interp, interp, f, types)
+    end
+    tt = signature_type(f, types)
+    exct = invoke_interp_compiler(passed_interp, :_infer_exception_type, interp, tt, false)
+    exct === nothing && raise_match_failure(:infer_exception_type, tt)
+    return exct
+end
+
+"""
+    Base.infer_effects(
+        f, types=default_tt(f);
+        optimize::Bool=true,
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> effects::Effects
 
-Compute the `Effects` of a function `f` with argument types `types`. The `Effects` represents the computational effects of the function call, such as whether it is free of side effects, guaranteed not to throw an exception, guaranteed to terminate, etc. The `world` and `interp` arguments specify the world counter and the native interpreter to use for the analysis.
+Returns the possible computation effects of the function call specified by `f` and `types`.
 
 # Arguments
 - `f`: The function to analyze.
 - `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `optimize` (optional): Whether to run additional effects refinements based on post-optimization analysis.
 - `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
-- `interp` (optional): The native interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
 
 # Returns
-- `effects::Effects`: The computed effects of the function call.
+- `effects::Effects`: The computed effects of the function call specified by the given call signature.
+  See the documentation of [`Effects`](@ref Core.Compiler.Effects) or [`Base.@assume_effects`](@ref)
+  for more information on the various effect properties.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    effect analysis results for every possible matching method with the given `f` and `types`.
+    It returns a single effect, taking into account all potential outcomes of any function
+    call entailed by the given signature type.
 
-# Example
+# Examples
 
 ```julia
-julia> function foo(x)
-           y = x * 2
-           return y
-       end;
+julia> f1(x) = x * 2;
 
-julia> effects = Base.infer_effects(foo, (Int,))
+julia> Base.infer_effects(f1, (Int,))
 (+c,+e,+n,+t,+s,+m,+i)
 ```
 
-This function will return an `Effects` object with information about the computational effects of the function `foo` when called with an `Int` argument. See the documentation for `Effects` for more information on the various effect properties.
+This function will return an `Effects` object with information about the computational
+effects of the function `f1` when called with an `Int` argument.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_effects(f2, (Integer,))
+(+c,+e,!n,+t,+s,+m,+i)
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the `:nothrow` bit gets tainted.
 
 !!! warning
-    The `infer_effects` function should not be used from generated functions;
+    The `Base.infer_effects` function should not be used from generated functions;
     doing so will result in an error.
 
+$(Compiler.effects_key_string)
+
 # See Also
-- [`Core.Compiler.Effects`](@ref): A type representing the computational effects of a method call.
+- [`Compiler.Effects`](@ref): A type representing the computational effects of a method call.
 - [`Base.@assume_effects`](@ref): A macro for making assumptions about the effects of a method.
 """
 function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
-                       world = get_world_counter(),
-                       interp = Core.Compiler.NativeInterpreter(world))
-    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
-        error("code reflection cannot be used from generated functions")
+                       optimize::Bool=true,
+                       world::UInt=get_world_counter(),
+                       interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
+    check_generated_context(world)
     if isa(f, Core.Builtin)
-        types = to_tuple_type(types)
-        argtypes = Any[Core.Compiler.Const(f), types.parameters...]
-        rt = Core.Compiler.builtin_tfunction(interp, f, argtypes[2:end], nothing)
-        return Core.Compiler.builtin_effects(Core.Compiler.typeinf_lattice(interp), f,
-            Core.Compiler.ArgInfo(nothing, argtypes), rt)
+        return _builtin_effects(passed_interp, interp, f, types)
     end
     tt = signature_type(f, types)
-    result = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
-    if result === missing
-        # unanalyzable call, return the unknown effects
-        return Core.Compiler.Effects()
-    end
-    (; matches) = result
-    effects = Core.Compiler.EFFECTS_TOTAL
-    if matches.ambig || !any(match::Core.MethodMatch->match.fully_covers, matches.matches)
-        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        effects = Core.Compiler.Effects(effects; nothrow=false)
-    end
-    for match in matches.matches
-        match = match::Core.MethodMatch
-        frame = Core.Compiler.typeinf_frame(interp,
-            match.method, match.spec_types, match.sparams, #=run_optimizer=#false)
-        frame === nothing && return Core.Compiler.Effects()
-        effects = Core.Compiler.merge_effects(effects, frame.ipo_effects)
-    end
+    effects = invoke_interp_compiler(passed_interp, :_infer_effects, interp, tt, optimize)
+    effects === nothing && raise_match_failure(:infer_effects, tt)
     return effects
 end
 
@@ -1653,23 +921,24 @@ end
 
 function print_statement_costs(io::IO, @nospecialize(tt::Type);
                                world::UInt=get_world_counter(),
-                               interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+                               interp=nothing)
+    passed_interp = interp
+    interp = passed_interp === nothing ? invoke_default_compiler(:_default_interp, world) : interp
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
-    params = Core.Compiler.OptimizationParams(interp)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    matches = invoke_interp_compiler(passed_interp, :_findall_matches, interp, tt)
+    matches === nothing && raise_match_failure(:print_statement_costs, tt)
     cst = Int[]
-    for match in matches
+    for match in matches.matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        println(io, meth)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, true)
+        println(io, match.method)
+        code = invoke_interp_compiler(passed_interp, :typeinf_code, interp, match, true)
         if code === nothing
             println(io, "  inference not successful")
         else
             empty!(cst)
             resize!(cst, length(code.code))
-            sptypes = Core.Compiler.VarState[Core.Compiler.VarState(sp, false) for sp in match.sparams]
-            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, sptypes, false, params)
+            maxcost = invoke_interp_compiler(passed_interp, :statement_costs!, interp, cst, code.code, code, match)
             nd = ndigits(maxcost)
             irshow_config = IRShow.IRShowConfig() do io, linestart, idx
                 print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
@@ -1684,18 +953,11 @@ end
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
 function _which(@nospecialize(tt::Type);
-    method_table::Union{Nothing,Core.MethodTable,Core.Compiler.MethodTableView}=nothing,
+    method_table #=::Union{Nothing,Core.MethodTable,Compiler.MethodTableView}=# =nothing,
     world::UInt=get_world_counter(),
     raise::Bool=true)
     world == typemax(UInt) && error("code reflection cannot be used from generated functions")
-    if method_table === nothing
-        table = Core.Compiler.InternalMethodTable(world)
-    elseif method_table isa Core.MethodTable
-        table = Core.Compiler.OverlayMethodTable(world, method_table)
-    else
-        table = method_table
-    end
-    match, = Core.Compiler.findsup(tt, table)
+    match, = invoke_default_compiler(:findsup_mt, tt, world, method_table)
     if match === nothing
         raise && error("no unique matching method found for the specified argument types")
         return nothing
@@ -1710,11 +972,21 @@ Returns the method of `f` (a `Method` object) that would be called for arguments
 
 If `types` is an abstract type, then the method that would be called by `invoke` is returned.
 
-See also: [`parentmodule`](@ref), and `@which` and `@edit` in [`InteractiveUtils`](@ref man-interactive-utils).
+See also: [`parentmodule`](@ref), [`@which`](@ref Main.InteractiveUtils.@which), and [`@edit`](@ref Main.InteractiveUtils.@edit).
 """
 function which(@nospecialize(f), @nospecialize(t))
     tt = signature_type(f, t)
-    return which(tt)
+    world = get_world_counter()
+    match, _ = invoke_default_compiler(:_findsup, tt, nothing, world)
+    if match === nothing
+        me = MethodError(f, t, world)
+        ee = ErrorException(sprint(io -> begin
+            println(io, "Calling invoke(f, t, args...) would throw:");
+            Base.showerror(io, me);
+        end))
+        throw(ee)
+    end
+    return match.method
 end
 
 """
@@ -1831,7 +1103,7 @@ true
 ```
 """
 function hasmethod(@nospecialize(f), @nospecialize(t))
-    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+    return Core._hasmethod(signature_type(f, t))
 end
 
 function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
@@ -1841,6 +1113,7 @@ end
 
 function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
     @nospecialize
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     isempty(kwnames) && return hasmethod(f, t; world)
     t = to_tuple_type(t)
     ft = Core.Typeof(f)
@@ -1853,7 +1126,7 @@ function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_c
     for kw in kws
         endswith(String(kw), "...") && return true
     end
-    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
+    kwnames = collect(kwnames)
     return issubset(kwnames, kws)
 end
 
@@ -1893,6 +1166,8 @@ function bodyfunction(basemethod::Method)
                     else
                         return nothing
                     end
+                elseif isa(fsym, Core.SSAValue)
+                    fsym = ast.code[fsym.id]
                 else
                     return nothing
                 end
@@ -1946,10 +1221,11 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             has_bottom_parameter(ti) && return false
         end
         world = get_world_counter()
+        world == typemax(UInt) && return true # intersecting methods are always ambiguous in the generator world, which is true, albeit maybe confusing for some
         min = Ref{UInt}(typemin(UInt))
         max = Ref{UInt}(typemax(UInt))
         has_ambig = Ref{Int32}(0)
-        ms = _methods_by_ftype(ti, nothing, -1, world, true, min, max, has_ambig)::Vector
+        ms = collect(Core.MethodMatch, _methods_by_ftype(ti, nothing, -1, world, true, min, max, has_ambig)::Vector)
         has_ambig[] == 0 && return false
         if !ambiguous_bottom
             filter!(ms) do m::Core.MethodMatch
@@ -1962,7 +1238,6 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
         # report the other ambiguous pair)
         have_m1 = have_m2 = false
         for match in ms
-            match = match::Core.MethodMatch
             m = match.method
             m === m1 && (have_m1 = true)
             m === m2 && (have_m2 = true)
@@ -1980,7 +1255,7 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             for match in ms
                 m = match.method
                 match.fully_covers || continue
-                if minmax === nothing || morespecific(m.sig, minmax.sig)
+                if minmax === nothing || morespecific(m, minmax)
                     minmax = m
                 end
             end
@@ -1990,8 +1265,8 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             for match in ms
                 m = match.method
                 m === minmax && continue
-                if !morespecific(minmax.sig, m.sig)
-                    if match.fully_covers || !morespecific(m.sig, minmax.sig)
+                if !morespecific(minmax, m)
+                    if match.fully_covers || !morespecific(m, minmax)
                         return true
                     end
                 end
@@ -2021,73 +1296,6 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
     return true
 end
 
-"""
-    delete_method(m::Method)
-
-Make method `m` uncallable and force recompilation of any methods that use(d) it.
-"""
-function delete_method(m::Method)
-    ccall(:jl_method_table_disable, Cvoid, (Any, Any), get_methodtable(m), m)
-end
-
-function get_methodtable(m::Method)
-    return ccall(:jl_method_get_table, Any, (Any,), m)::Core.MethodTable
-end
-
-"""
-    has_bottom_parameter(t) -> Bool
-
-Determine whether `t` is a Type for which one or more of its parameters is `Union{}`.
-"""
-function has_bottom_parameter(t::DataType)
-    for p in t.parameters
-        has_bottom_parameter(p) && return true
-    end
-    return false
-end
-has_bottom_parameter(t::typeof(Bottom)) = true
-has_bottom_parameter(t::UnionAll) = has_bottom_parameter(unwrap_unionall(t))
-has_bottom_parameter(t::Union) = has_bottom_parameter(t.a) & has_bottom_parameter(t.b)
-has_bottom_parameter(t::TypeVar) = has_bottom_parameter(t.ub)
-has_bottom_parameter(::Any) = false
-
-min_world(m::Core.CodeInstance) = m.min_world
-max_world(m::Core.CodeInstance) = m.max_world
-min_world(m::Core.CodeInfo) = m.min_world
-max_world(m::Core.CodeInfo) = m.max_world
-get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
-
-"""
-    propertynames(x, private=false)
-
-Get a tuple or a vector of the properties (`x.property`) of an object `x`.
-This is typically the same as [`fieldnames(typeof(x))`](@ref), but types
-that overload [`getproperty`](@ref) should generally overload `propertynames`
-as well to get the properties of an instance of the type.
-
-`propertynames(x)` may return only "public" property names that are part
-of the documented interface of `x`.   If you want it to also return "private"
-property names intended for internal use, pass `true` for the optional second argument.
-REPL tab completion on `x.` shows only the `private=false` properties.
-
-See also: [`hasproperty`](@ref), [`hasfield`](@ref).
-"""
-propertynames(x) = fieldnames(typeof(x))
-propertynames(m::Module) = names(m)
-propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
-
-"""
-    hasproperty(x, s::Symbol)
-
-Return a boolean indicating whether the object `x` has `s` as one of its own properties.
-
-!!! compat "Julia 1.2"
-     This function requires at least Julia 1.2.
-
-See also: [`propertynames`](@ref), [`hasfield`](@ref).
-"""
-hasproperty(x, s::Symbol) = s in propertynames(x)
-
 """
     @invoke f(arg::T, ...; kwargs...)
 
@@ -2135,7 +1343,7 @@ julia> @macroexpand @invoke (xs::Xs)[i::I] = v::V
     The additional syntax is supported as of Julia 1.10.
 """
 macro invoke(ex)
-    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    topmod = _topmod(__module__)
     f, args, kwargs = destructure_callex(topmod, ex)
     types = Expr(:curly, :Tuple)
     out = Expr(:call, GlobalRef(Core, :invoke))
@@ -2194,7 +1402,7 @@ julia> @macroexpand @invokelatest xs[i] = v
     The additional `x.f` and `xs[i]` syntax requires Julia 1.10.
 """
 macro invokelatest(ex)
-    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    topmod = _topmod(__module__)
     f, args, kwargs = destructure_callex(topmod, ex)
     out = Expr(:call, GlobalRef(Base, :invokelatest))
     isempty(kwargs) || push!(out.args, Expr(:parameters, kwargs...))
diff --git a/base/refpointer.jl b/base/refpointer.jl
index ad74763ff8286..5027462eeb6b6 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -42,9 +42,18 @@ A `C_NULL` instance of `Ptr` can be passed to a `ccall` `Ref` argument to initia
 # Examples
 
 ```jldoctest
-julia> Ref(5)
+julia> r = Ref(5) # Create a Ref with an initial value
 Base.RefValue{Int64}(5)
 
+julia> r[] # Getting a value from a Ref
+5
+
+julia> r[] = 7 # Storing a new value in a Ref
+7
+
+julia> r # The Ref now contains 7
+Base.RefValue{Int64}(7)
+
 julia> isa.(Ref([1,2,3]), [Array, Dict, Int]) # Treat reference values as scalar during broadcasting
 3-element BitVector:
  1
@@ -65,9 +74,6 @@ julia> Ref{Int64}()[]; # A reference to a bitstype refers to an undetermined val
 
 julia> isassigned(Ref{Int64}()) # A reference to a bitstype is always assigned
 true
-
-julia> Ref{Int64}(0)[] == 0 # Explicitly give a value for a bitstype reference
-true
 ```
 """
 Ref
@@ -142,13 +148,14 @@ if is_primary_base_module
     Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
 
     # convert Arrays to pointer arrays for ccall
-    function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
-        return RefArray(a) # effectively a no-op
-    end
+    # For example `["a", "b"]` to Ptr{Cstring} for `char **argv`
     function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
-        if (!isbitstype(T) && T <: eltype(P))
+        if P == T
+            return getfield(a, :ref)
+        elseif (isbitstype(T) ? T <: Ptr || T <: Union{Cwstring,Cstring} : T <: eltype(P))
             # this Array already has the right memory layout for the requested Ref
-            return RefArray(a,1,false) # root something, so that this function is type-stable
+            # but the wrong eltype for the constructor
+            return RefArray{P,typeof(a),Nothing}(a, 1, nothing) # effectively a no-op
         else
             ptrs = Vector{P}(undef, length(a)+1)
             roots = Vector{Any}(undef, length(a))
@@ -158,14 +165,14 @@ if is_primary_base_module
                 roots[i] = root
             end
             ptrs[length(a)+1] = C_NULL
-            return RefArray(ptrs,1,roots)
+            return RefArray{P,typeof(ptrs),typeof(roots)}(ptrs, 1, roots)
         end
     end
     Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
 end
 
-cconvert(::Type{Ptr{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
-cconvert(::Type{Ref{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
+cconvert(::Type{Ptr{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
+cconvert(::Type{Ref{P}}, a::Array{<:Union{Ptr,Cwstring,Cstring}}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 cconvert(::Type{Ref{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 000088ff0ce76..7a0f2f84e2206 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -46,9 +46,9 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})
         # Instead, explicitly load the pointer from the `RefValue`,
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
         p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic)
-    end
-    if p == C_NULL
-        throw(UndefRefError())
+        if p == C_NULL
+            throw(UndefRefError())
+        end
     end
     return p
 end
diff --git a/base/regex.jl b/base/regex.jl
index c8d66265e0784..09922b8a25111 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -28,7 +28,7 @@ mutable struct Regex <: AbstractPattern
 
     function Regex(pattern::AbstractString, compile_options::Integer,
                    match_options::Integer)
-        pattern = String(pattern)
+        pattern = String(pattern)::String
         compile_options = UInt32(compile_options)
         match_options = UInt32(match_options)
         if (compile_options & ~PCRE.COMPILE_MASK) != 0
@@ -69,11 +69,11 @@ Regex(pattern::AbstractString) = Regex(pattern, DEFAULT_COMPILER_OPTS, DEFAULT_M
 
 function compile(regex::Regex)
     if regex.regex == C_NULL
-        if PCRE.PCRE_COMPILE_LOCK === nothing
+        if !isdefinedglobal(PCRE, :PCRE_COMPILE_LOCK)
             regex.regex = PCRE.compile(regex.pattern, regex.compile_options)
             PCRE.jit_compile(regex.regex)
         else
-            l = PCRE.PCRE_COMPILE_LOCK::Threads.SpinLock
+            l = PCRE.PCRE_COMPILE_LOCK
             lock(l)
             try
                 if regex.regex == C_NULL
@@ -99,8 +99,8 @@ listed after the ending quote, to change its behaviour:
 - `m` treats the `^` and `\$` tokens as matching the start and end of individual lines, as
   opposed to the whole string.
 - `s` allows the `.` modifier to match newlines.
-- `x` enables "comment mode": whitespace is enabled except when escaped with `\\`, and `#`
-  is treated as starting a comment.
+- `x` enables "free-spacing mode": whitespace between regex tokens is ignored except when escaped with `\\`,
+   and `#` in the regex is treated as starting a comment (which is ignored to the line ending).
 - `a` enables ASCII mode (disables `UTF` and `UCP` modes). By default `\\B`, `\\b`, `\\D`,
   `\\d`, `\\S`, `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With
   this option, these sequences only match ASCII characters. This includes `\\u` also, which
@@ -185,9 +185,14 @@ If a group was not captured, `nothing` will be yielded instead of a substring.
 
 Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
 [`length`](@ref), [`eltype`](@ref), [`keys`](@ref keys(::RegexMatch)), [`haskey`](@ref), and
-[`getindex`](@ref), where keys are the the names or numbers of a capture group.
+[`getindex`](@ref), where keys are the names or numbers of a capture group.
 See [`keys`](@ref keys(::RegexMatch)) for more information.
 
+`Tuple(m)`, `NamedTuple(m)`, and `Dict(m)` can be used to construct more flexible collection types from `RegexMatch` objects.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts from RegexMatches requires Julia 1.11
+
 # Examples
 ```jldoctest
 julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
@@ -210,16 +215,26 @@ julia> hr, min, ampm = m; # destructure capture groups by iteration
 
 julia> hr
 "11"
+
+julia> Dict(m)
+Dict{Any, Union{Nothing, SubString{String}}} with 3 entries:
+  "hour"   => "11"
+  3        => nothing
+  "minute" => "30"
 ```
 """
-struct RegexMatch <: AbstractMatch
-    match::SubString{String}
-    captures::Vector{Union{Nothing,SubString{String}}}
+struct RegexMatch{S<:AbstractString} <: AbstractMatch
+    match::SubString{S}
+    captures::Vector{Union{Nothing,SubString{S}}}
     offset::Int
     offsets::Vector{Int}
     regex::Regex
 end
 
+RegexMatch(match::SubString{S}, captures::Vector{Union{Nothing,SubString{S}}},
+           offset::Union{Int, UInt}, offsets::Vector{Int}, regex::Regex) where {S<:AbstractString} =
+    RegexMatch{S}(match, captures, offset, offsets, regex)
+
 """
     keys(m::RegexMatch) -> Vector
 
@@ -285,6 +300,9 @@ iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
 eltype(m::RegexMatch) = eltype(m.captures)
 
+NamedTuple(m::RegexMatch) = NamedTuple{Symbol.(Tuple(keys(m)))}(values(m))
+Dict(m::RegexMatch) = Dict(pairs(m))
+
 function occursin(r::Regex, s::AbstractString; offset::Integer=0)
     compile(r)
     return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
@@ -377,9 +395,13 @@ end
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
 Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
-object containing the match, or nothing if the match failed. The matching substring can be
-retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
-`m.captures` The optional `idx` argument specifies an index at which to start the search.
+object containing the match, or nothing if the match failed.
+The optional `idx` argument specifies an index at which to start the search.
+The matching substring can be retrieved by accessing `m.match`, the captured sequences can be retrieved by accessing `m.captures`.
+The resulting [`RegexMatch`](@ref) object can be used to construct other collections: e.g. `Tuple(m)`, `NamedTuple(m)`.
+
+!!! compat "Julia 1.11"
+    Constructing NamedTuples and Dicts requires Julia 1.11
 
 # Examples
 ```jldoctest
@@ -423,15 +445,42 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
     return result
 end
 
+function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString}
+    RegexMatch{AnnotatedString{S}}(
+        (@inbounds SubString{AnnotatedString{S}}(
+            str, m.match.offset, m.match.ncodeunits, Val(:noshift))),
+        Union{Nothing,SubString{AnnotatedString{S}}}[
+            if !isnothing(cap)
+                (@inbounds SubString{AnnotatedString{S}}(
+                    str, cap.offset, cap.ncodeunits, Val(:noshift)))
+            end for cap in m.captures],
+        m.offset, m.offsets, m.regex)
+end
+
+function match(re::Regex, str::AnnotatedString)
+    m = match(re, str.string)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
+function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0))
+    m = match(re, str.string, idx, add_opts)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
 match(r::Regex, s::AbstractString) = match(r, s, firstindex(s))
 match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
-    "regex matching is only available for the String type; use String(s) to convert"
+    "regex matching is only available for the String and AnnotatedString types; use String(s) to convert"
 ))
 
 findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL)
 
 # TODO: return only start index and update deprecation
-function _findnext_re(re::Regex, str::Union{String,SubString}, idx::Integer, match_data::Ptr{Cvoid})
+# duck-type str so that external UTF-8 string packages like StringViews can hook in
+function _findnext_re(re::Regex, str, idx::Integer, match_data::Ptr{Cvoid})
     if idx > nextind(str,lastindex(str))
         throw(BoundsError())
     end
@@ -670,18 +719,19 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
     end
 end
 
-struct RegexMatchIterator
+struct RegexMatchIterator{S <: AbstractString}
     regex::Regex
-    string::String
+    string::S
     overlap::Bool
 
-    function RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false)
-        new(regex, string, ovr)
-    end
+    RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) =
+        new{String}(regex, String(string), ovr)
+    RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) =
+        new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr)
 end
 compile(itr::RegexMatchIterator) = (compile(itr.regex); itr)
-eltype(::Type{RegexMatchIterator}) = RegexMatch
-IteratorSize(::Type{RegexMatchIterator}) = SizeUnknown()
+eltype(::Type{<:RegexMatchIterator}) = RegexMatch
+IteratorSize(::Type{<:RegexMatchIterator}) = SizeUnknown()
 
 function iterate(itr::RegexMatchIterator, (offset,prevempty)=(1,false))
     opts_nonempty = UInt32(PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART)
@@ -726,7 +776,7 @@ julia> rx = r"a.a"
 r"a.a"
 
 julia> m = eachmatch(rx, "a1a2a3a")
-Base.RegexMatchIterator(r"a.a", "a1a2a3a", false)
+Base.RegexMatchIterator{String}(r"a.a", "a1a2a3a", false)
 
 julia> collect(m)
 2-element Vector{RegexMatch}:
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index d33c127b78c76..d31f3ebb5dd2d 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -13,15 +13,16 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
 
     function throwbits(S::Type, T::Type, U::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
+        throw(ArgumentError(LazyString("cannot reinterpret `", S, "` as `", T, "`, type `", U, "` is not a bits type")))
     end
     function throwsize0(S::Type, T::Type, msg)
         @noinline
-        throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
+        throw(ArgumentError(LazyString("cannot reinterpret a zero-dimensional `", S, "` array to `", T,
+            "` which is of a ", msg, " size")))
     end
     function throwsingleton(S::Type, T::Type)
         @noinline
-        throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` which is a singleton type"))
+        throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T, "` which is a singleton type")))
     end
 
     global reinterpret
@@ -46,18 +47,35 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
      3 + 4im
      5 + 6im
     ```
+
+    If the location of padding bits does not line up between `T` and `eltype(A)`, the resulting array will be
+    read-only or write-only, to prevent invalid bits from being written to or read from, respectively.
+
+    ```jldoctest
+    julia> a = reinterpret(Tuple{UInt8, UInt32}, UInt32[1, 2])
+    1-element reinterpret(Tuple{UInt8, UInt32}, ::Vector{UInt32}):
+     (0x01, 0x00000002)
+
+    julia> a[1] = 3
+    ERROR: Padding of type Tuple{UInt8, UInt32} is not compatible with type UInt32.
+
+    julia> b = reinterpret(UInt32, Tuple{UInt8, UInt32}[(0x01, 0x00000002)]); # showing will error
+
+    julia> b[1]
+    ERROR: Padding of type UInt32 is not compatible with type Tuple{UInt8, UInt32}.
+    ```
     """
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
             @noinline
-            throw(ArgumentError("""
-                cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
-                The resulting array would have non-integral first dimension.
-                """))
+            throw(ArgumentError(LazyString(
+                "cannot reinterpret an `", S, "` array to `", T, "` whose first dimension has size `", dim,
+                "`. The resulting array would have a non-integral first dimension.")))
         end
         function throwaxes1(S::Type, T::Type, ax1)
             @noinline
-            throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
+            throw(ArgumentError(LazyString("cannot reinterpret a `", S, "` array to `", T,
+                "` when the first axis is ", ax1, ". Try reshaping first.")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -82,15 +100,19 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
         function throwintmult(S::Type, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got ",
+                sizeof(T), ") and `sizeof(eltype(a))` (got ", sizeof(S), ") be an integer multiple of the other")))
         end
         function throwsize1(a::AbstractArray, T::Type)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", eltype(a),
+                " requires that `axes(a, 1)` (got ", axes(a, 1), ") be equal to 1:",
+                sizeof(T) ÷ sizeof(eltype(a)), " (from the ratio of element sizes)")))
         end
         function throwfromsingleton(S, T)
             @noinline
-            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $S requires that $T be a singleton type, since $S is one"))
+            throw(ArgumentError(LazyString("`reinterpret(reshape, ", T, ", a)` where `eltype(a)` is ", S,
+                " requires that ", T, " be a singleton type, since ", S, " is one")))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
@@ -350,9 +372,10 @@ axes(a::NonReshapedReinterpretArray{T,0}) where {T} = ()
 has_offset_axes(a::ReinterpretArray) = has_offset_axes(a.parent)
 
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
+cconvert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = cconvert(Ptr{S}, a.parent)
 unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
 
-@inline @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
+@propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
     if isprimitivetype(T) && isprimitivetype(S)
         reinterpret(T, a.parent[])
     else
@@ -360,15 +383,28 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
     end
 end
 
-@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
+check_ptr_indexable(a::ReinterpretArray, sz = elsize(a)) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::ReshapedArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::FastContiguousSubArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::Array, sz) = sizeof(eltype(a)) !== sz
+check_ptr_indexable(a::Memory, sz) = true
+check_ptr_indexable(a::AbstractArray, sz) = false
+
+@propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::Integer...) = checkbounds(Bool, a, inds...) && (check_ptr_indexable(a) || _isassigned_ra(a, inds...))
+@propagate_inbounds isassigned(a::ReinterpretArray, inds::SCartesianIndex2) = isassigned(a.parent, inds.j)
+@propagate_inbounds _isassigned_ra(a::ReinterpretArray, inds...) = true # that is not entirely true, but computing exactly which indexes will be accessed in the parent requires a lot of duplication from the _getindex_ra code
+
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, inds...)
     _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, i)
     if isa(IndexStyle(a), IndexLinear)
         return _getindex_ra(a, i, ())
     end
@@ -378,16 +414,22 @@ end
     isempty(inds) ? _getindex_ra(a, 1, ()) : _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
     check_readable(a)
     s = Ref{S}(a.parent[ind.j])
-    GC.@preserve s begin
-        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
-        return unsafe_load(tptr, ind.i)
-    end
+    tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+    GC.@preserve s return unsafe_load(tptr, ind.i)
 end
 
-@inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline function _getindex_ptr(a::ReinterpretArray{T}, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap return unsafe_load(p)
+end
+
+@propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -443,7 +485,7 @@ end
     end
 end
 
-@inline @propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -490,7 +532,7 @@ end
     end
 end
 
-@inline @propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
+@propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
     if isprimitivetype(S) && isprimitivetype(T)
         a.parent[] = reinterpret(S, v)
         return a
@@ -498,15 +540,17 @@ end
     setindex!(a, v, firstindex(a))
 end
 
-@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
+@propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, inds...)
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, i)
     if isa(IndexStyle(a), IndexLinear)
         return _setindex_ra!(a, v, i, ())
     end
@@ -514,7 +558,7 @@ end
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
     check_writable(a)
     v = convert(T, v)::T
     s = Ref{S}(a.parent[ind.j])
@@ -526,7 +570,16 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline function _setindex_ptr!(a::ReinterpretArray{T}, v, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap unsafe_store!(p, v)
+    return a
+end
+
+@propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -599,7 +652,7 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -672,7 +725,7 @@ end
 """
     CyclePadding(padding, total_size)
 
-Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
+Cycles an iterator of `Padding` structs, restarting the padding at `total_size`.
 E.g. if `padding` is all the padding in a struct and `total_size` is the total
 aligned size of that array, `CyclePadding` will correspond to the padding in an
 infinite vector of such structs.
@@ -720,7 +773,9 @@ function CyclePadding(T::DataType)
     a, s = datatype_alignment(T), sizeof(T)
     as = s + (a - (s % a)) % a
     pad = padding(T)
-    s != as && push!(pad, Padding(s, as - s))
+    if s != as
+        pad = Core.svec(pad..., Padding(s, as - s))
+    end
     CyclePadding(pad, as)
 end
 
@@ -795,47 +850,15 @@ function _copyfrompacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In}
     end
 end
 
-"""
-    reinterpret(::Type{Out}, x::In)
-
-Reinterpret the valid non-padding bytes of an isbits value `x` as isbits type `Out`.
-
-Both types must have the same amount of non-padding bytes. This operation is guaranteed
-to be reversible.
-
-```jldoctest
-julia> reinterpret(NTuple{2, UInt8}, 0x1234)
-(0x34, 0x12)
-
-julia> reinterpret(UInt16, (0x34, 0x12))
-0x1234
-
-julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203))
-(0x0301, 0x02)
-```
-
-!!! warning
-
-    Use caution if some combinations of bits in `Out` are not considered valid and would
-    otherwise be prevented by the type's constructors and methods. Unexpected behavior
-    may result without additional validation.
-"""
-@inline function reinterpret(::Type{Out}, x::In) where {Out, In}
+@inline function _reinterpret(::Type{Out}, x::In) where {Out, In}
+    # handle non-primitive types
     isbitstype(Out) || throw(ArgumentError("Target type for `reinterpret` must be isbits"))
     isbitstype(In) || throw(ArgumentError("Source type for `reinterpret` must be isbits"))
-    if isprimitivetype(Out) && isprimitivetype(In)
-        outsize = sizeof(Out)
-        insize = sizeof(In)
-        outsize == insize ||
-            throw(ArgumentError("Sizes of types $Out and $In do not match; got $outsize \
-                and $insize, respectively."))
-        return bitcast(Out, x)
-    end
     inpackedsize = packedsize(In)
     outpackedsize = packedsize(Out)
     inpackedsize == outpackedsize ||
-        throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \
-            and $inpackedsize, respectively."))
+        throw(ArgumentError(LazyString("Packed sizes of types ", Out, " and ", In,
+            " do not match; got ", outpackedsize, " and ", inpackedsize, ", respectively.")))
     in = Ref{In}(x)
     out = Ref{Out}()
     if struct_subpadding(Out, In)
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index bcb47a9359392..f65a7d8c9561a 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -35,22 +35,34 @@ end
 length(R::ReshapedArrayIterator) = length(R.iter)
 eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedIndex{eltype(I)} : Any
 
-## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611)
-# reshaping to same # of dimensions
-function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
-    throw_dmrsa(dims, len) =
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $len"))
+@noinline throw_dmrsa(dims, len) =
+    throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len"))
 
-    if prod(dims) != length(a)
+## reshape(::Array, ::Dims) returns a new Array (to avoid conditionally aliasing the structure, only the data)
+# reshaping to same # of dimensions
+@eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
+    len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
+    if len != length(a)
         throw_dmrsa(dims, length(a))
     end
-    isbitsunion(T) && return ReshapedArray(a, dims, ())
-    if N == M && dims == size(a)
-        return a
+    ref = a.ref
+    # or we could use `a = Array{T,N}(undef, ntuple(i->0, Val(N))); a.ref = ref; a.size = dims; return a` here to avoid the eval
+    return $(Expr(:new, :(Array{T,N}), :ref, :dims))
+end
+
+## reshape!(::Array, ::Dims) returns the original array, but must have the same dimensions and length as the original
+# see also resize! for a similar operation that can change the length
+function reshape!(a::Array{T,N}, dims::NTuple{N,Int}) where {T,N}
+    len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
+    if len != length(a)
+        throw_dmrsa(dims, length(a))
     end
-    ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
+    setfield!(a, :dims, dims)
+    return a
 end
 
+
+
 """
     reshape(A, dims...) -> AbstractArray
     reshape(A, dims) -> AbstractArray
@@ -109,25 +121,56 @@ reshape
 
 reshape(parent::AbstractArray, dims::IntOrInd...) = reshape(parent, dims)
 reshape(parent::AbstractArray, shp::Tuple{Union{Integer,OneTo}, Vararg{Union{Integer,OneTo}}}) = reshape(parent, to_shape(shp))
+reshape(parent::AbstractArray, dims::Tuple{Integer, Vararg{Integer}}) = reshape(parent, map(Int, dims))
 reshape(parent::AbstractArray, dims::Dims)        = _reshape(parent, dims)
 
 # Allow missing dimensions with Colon():
 reshape(parent::AbstractVector, ::Colon) = parent
 reshape(parent::AbstractVector, ::Tuple{Colon}) = parent
 reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims)
-reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims)
-reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
-@inline function _reshape_uncolon(A, dims)
-    @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ",
-        "may have at most one omitted dimension specified by `Colon()`")))
-    @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ",
-        "must be divisible by the product of the new dimensions $dims")))
+reshape(parent::AbstractArray, dims::Integer...) = reshape(parent, dims)
+reshape(parent::AbstractArray, dims::Union{Integer,Colon}...) = reshape(parent, dims)
+reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Integer,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
+
+@noinline throw1(dims) = throw(DimensionMismatch(LazyString("new dimensions ", dims,
+        " may have at most one omitted dimension specified by `Colon()`")))
+@noinline throw2(lenA, dims) = throw(DimensionMismatch(string("array size ", lenA,
+    " must be divisible by the product of the new dimensions ", dims)))
+
+@inline function _reshape_uncolon(A, _dims::Tuple{Vararg{Union{Integer, Colon}}})
+    # promote the dims to `Int` at least
+    dims = map(x -> x isa Colon ? x : promote_type(typeof(x), Int)(x), _dims)
     pre = _before_colon(dims...)
     post = _after_colon(dims...)
     _any_colon(post...) && throw1(dims)
-    sz, remainder = divrem(length(A), prod(pre)*prod(post))
-    remainder == 0 || throw2(A, dims)
-    (pre..., Int(sz), post...)
+    len = length(A)
+    _reshape_uncolon_computesize(len, dims, pre, post)
+end
+@inline function _reshape_uncolon_computesize(len::Int, dims, pre::Tuple{Vararg{Int}}, post::Tuple{Vararg{Int}})
+    sz = if iszero(len)
+        0
+    else
+        let pr = Core.checked_dims(pre..., post...)  # safe product
+            quo = _reshape_uncolon_computesize_nonempty(len, dims, pr)
+            convert(Int, quo)
+        end
+    end
+    (pre..., sz, post...)
+end
+@inline function _reshape_uncolon_computesize(len, dims, pre, post)
+    pr = prod((pre..., post...))
+    sz = if iszero(len)
+        promote(len, pr)[1] # zero of the correct type
+    else
+        _reshape_uncolon_computesize_nonempty(len, dims, pr)
+    end
+    (pre..., sz, post...)
+end
+@inline function _reshape_uncolon_computesize_nonempty(len, dims, pr)
+    iszero(pr) && throw2(len, dims)
+    (quo, rem) = divrem(len, pr)
+    iszero(rem) || throw2(len, dims)
+    quo
 end
 @inline _any_colon() = false
 @inline _any_colon(dim::Colon, tail...) = true
@@ -215,6 +258,11 @@ elsize(::Type{<:ReshapedArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
 unaliascopy(A::ReshapedArray) = typeof(A)(unaliascopy(A.parent), A.dims, A.mi)
 dataids(A::ReshapedArray) = dataids(A.parent)
+# forward the aliasing check the parent in case there are specializations
+mightalias(A::ReshapedArray, B::ReshapedArray) = mightalias(parent(A), parent(B))
+# special handling for reshaped SubArrays that dispatches to the subarray aliasing check
+mightalias(A::ReshapedArray, B::SubArray) = mightalias(parent(A), B)
+mightalias(A::SubArray, B::ReshapedArray) = mightalias(A, parent(B))
 
 @inline ind2sub_rs(ax, ::Tuple{}, i::Int) = (i,)
 @inline ind2sub_rs(ax, strds, i) = _ind2sub_rs(ax, strds, i - 1)
@@ -228,7 +276,8 @@ offset_if_vec(i::Integer, axs::Tuple) = i
 
 @inline function isassigned(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(Bool, A, index) || return false
-    @inbounds ret = isassigned(parent(A), index)
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = isassigned(parent(A), indexparent)
     ret
 end
 @inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N}
@@ -241,7 +290,8 @@ end
 
 @inline function getindex(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds ret = parent(A)[index]
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds ret = parent(A)[indexparent]
     ret
 end
 @inline function getindex(A::ReshapedArray{T,N}, indices::Vararg{Int,N}) where {T,N}
@@ -265,7 +315,8 @@ end
 
 @inline function setindex!(A::ReshapedArrayLF, val, index::Int)
     @boundscheck checkbounds(A, index)
-    @inbounds parent(A)[index] = val
+    indexparent = index - firstindex(A) + firstindex(parent(A))
+    @inbounds parent(A)[indexparent] = val
     val
 end
 @inline function setindex!(A::ReshapedArray{T,N}, val, indices::Vararg{Int,N}) where {T,N}
@@ -293,7 +344,8 @@ setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 
 @noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
-unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, parent(a))
+cconvert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = cconvert(Ptr{T}, parent(a))
+unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, a.parent)
 
 # Add a few handy specializations to further speed up views of reshaped ranges
 const ReshapedUnitRange{T,N,A<:AbstractUnitRange} = ReshapedArray{T,N,A,Tuple{}}
@@ -304,9 +356,18 @@ compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange
     (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
-unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
-    unsafe_convert(Ptr{T}, V.parent) + (first_index(V)-1)*sizeof(T)
 
+# cconvert(::Type{<:Ptr}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} = V
+function unsafe_convert(::Type{Ptr{S}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {S,T,N,P}
+    parent = V.parent
+    p = cconvert(Ptr{T}, parent) # XXX: this should occur in cconvert, the result is not GC-rooted
+    Δmem = if _checkcontiguous(Bool, parent)
+        (first_index(V) - firstindex(parent)) * elsize(parent)
+    else
+        _memory_offset(parent, map(first, V.indices)...)
+    end
+    return Ptr{S}(unsafe_convert(Ptr{T}, p) + Δmem)
+end
 
 _checkcontiguous(::Type{Bool}, A::AbstractArray) = false
 # `strides(A::DenseArray)` calls `size_to_strides` by default.
diff --git a/base/rounding.jl b/base/rounding.jl
index 25cfe2dc09829..98b4c30822245 100644
--- a/base/rounding.jl
+++ b/base/rounding.jl
@@ -109,6 +109,64 @@ Rounds to nearest integer, with ties rounded toward positive infinity (Java/Java
 """
 const RoundNearestTiesUp = RoundingMode{:NearestTiesUp}()
 
+# Rounding mode predicates. TODO: better names
+
+# Overload these for other rounding modes
+rounds_to_nearest(::RoundingMode) = false
+rounds_to_nearest(::RoundingMode{:Nearest}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesUp}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesAway}) = true
+rounds_away_from_zero(::RoundingMode{:Up},   sign_bit::Bool) = !sign_bit
+rounds_away_from_zero(::RoundingMode{:Down}, sign_bit::Bool) = sign_bit
+rounds_away_from_zero(::RoundingMode{:FromZero}, ::Bool) = true
+rounds_away_from_zero(::RoundingMode{:ToZero},   ::Bool) = false
+tie_breaker_is_to_even(::RoundingMode{:Nearest}) = true
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesUp}) = false
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesAway}) = false
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesUp}, sign_bit::Bool) = !sign_bit
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesAway},       ::Bool) = true
+
+rounds_to_nearest(t::Tuple{Any,Bool}) = rounds_to_nearest(first(t))
+rounds_away_from_zero(t::Tuple{Any,Bool}) = rounds_away_from_zero(t...)
+tie_breaker_is_to_even(t::Tuple{Any,Bool}) = tie_breaker_is_to_even(first(t))
+tie_breaker_rounds_away_from_zero(t::Tuple{Any,Bool}) = tie_breaker_rounds_away_from_zero(t...)
+
+struct FinalBit end
+struct RoundBit end
+struct StickyBit end
+
+function correct_rounding_requires_increment(x, rounding_mode, sign_bit::Bool)
+    r = (rounding_mode, sign_bit)
+    f = let y = x
+        (z::Union{FinalBit,RoundBit,StickyBit}) -> y(z)::Bool
+    end
+    if rounds_to_nearest(r)
+        if f(RoundBit())
+            if f(StickyBit())
+                true
+            else
+                if tie_breaker_is_to_even(r)
+                    f(FinalBit())
+                else
+                    tie_breaker_rounds_away_from_zero(r)::Bool
+                end
+            end
+        else
+            false
+        end
+    else
+        if rounds_away_from_zero(r)
+            if f(RoundBit())
+                true
+            else
+                f(StickyBit())
+            end
+        else
+            false
+        end
+    end::Bool
+end
+
 to_fenv(::RoundingMode{:Nearest}) = JL_FE_TONEAREST
 to_fenv(::RoundingMode{:ToZero}) = JL_FE_TOWARDZERO
 to_fenv(::RoundingMode{:Up}) = JL_FE_UPWARD
@@ -224,6 +282,8 @@ function _convert_rounding(::Type{T}, x::Real, r::RoundingMode{:ToZero}) where T
     end
 end
 
+# Default definitions
+
 """
     set_zero_subnormals(yes::Bool) -> Bool
 
@@ -254,3 +314,169 @@ for IEEE arithmetic, and `true` if they might be converted to zeros.
 get_zero_subnormals() = ccall(:jl_get_zero_subnormals,Int32,())!=0
 
 end #module
+using .Rounding
+
+"""
+    round([T,] x, [r::RoundingMode])
+    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
+    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
+
+Rounds the number `x`.
+
+Without keyword arguments, `x` is rounded to an integer value, returning a value of type
+`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
+thrown if the value is not representable by `T`, similar to [`convert`](@ref).
+
+If the `digits` keyword argument is provided, it rounds to the specified number of digits
+after the decimal place (or before if negative), in base `base`.
+
+If the `sigdigits` keyword argument is provided, it rounds to the specified number of
+significant digits, in base `base`.
+
+The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
+[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
+of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
+results if the global rounding mode is changed (see [`rounding`](@ref)).
+
+When rounding to a floating point type, will round to integers representable by that type
+(and Inf) rather than true integers. Inf is treated as one ulp greater than the
+`floatmax(T)` for purposes of determining "nearest", similar to [`convert`](@ref).
+
+# Examples
+```jldoctest
+julia> round(1.7)
+2.0
+
+julia> round(Int, 1.7)
+2
+
+julia> round(1.5)
+2.0
+
+julia> round(2.5)
+2.0
+
+julia> round(pi; digits=2)
+3.14
+
+julia> round(pi; digits=3, base=2)
+3.125
+
+julia> round(123.456; sigdigits=2)
+120.0
+
+julia> round(357.913; sigdigits=4, base=2)
+352.0
+
+julia> round(Float16, typemax(UInt128))
+Inf16
+
+julia> floor(Float16, typemax(UInt128))
+Float16(6.55e4)
+```
+
+!!! note
+    Rounding to specified digits in bases other than 2 can be inexact when
+    operating on binary floating point numbers. For example, the [`Float64`](@ref)
+    value represented by `1.15` is actually *less* than 1.15, yet will be
+    rounded to 1.2. For example:
+
+    ```jldoctest
+    julia> x = 1.15
+    1.15
+
+    julia> big(1.15)
+    1.149999999999999911182158029987476766109466552734375
+
+    julia> x < 115//100
+    true
+
+    julia> round(x, digits=1)
+    1.2
+    ```
+
+# Extensions
+
+To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
+"""
+function round end
+
+"""
+    trunc([T,] x)
+    trunc(x; digits::Integer= [, base = 10])
+    trunc(x; sigdigits::Integer= [, base = 10])
+
+`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
+is less than or equal to the absolute value of `x`.
+
+`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the truncated
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `trunc` for a new type, define `Base.round(x::NewType, ::RoundingMode{:ToZero})`.
+
+See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> trunc(2.22)
+2.0
+
+julia> trunc(-2.22, digits=1)
+-2.2
+
+julia> trunc(Int, -2.22)
+-2
+```
+"""
+function trunc end
+
+"""
+    floor([T,] x)
+    floor(x; digits::Integer= [, base = 10])
+    floor(x; sigdigits::Integer= [, base = 10])
+
+`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
+equal to `x`.
+
+`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the floored
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `floor` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Down})`.
+"""
+function floor end
+
+"""
+    ceil([T,] x)
+    ceil(x; digits::Integer= [, base = 10])
+    ceil(x; sigdigits::Integer= [, base = 10])
+
+`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
+equal to `x`.
+
+`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the ceiled
+value is not representable as a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `ceil` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Up})`.
+"""
+function ceil end
+
+trunc(x; kws...) = round(x, RoundToZero; kws...)
+floor(x; kws...) = round(x, RoundDown; kws...)
+ ceil(x; kws...) = round(x, RoundUp; kws...)
+round(x; kws...) = round(x, RoundNearest; kws...)
+
+trunc(::Type{T}, x) where T = round(T, x, RoundToZero)
+floor(::Type{T}, x) where T = round(T, x, RoundDown)
+ ceil(::Type{T}, x) where T = round(T, x, RoundUp)
+round(::Type{T}, x) where T = round(T, x, RoundNearest)
+
+round(::Type{T}, x, r::RoundingMode) where T = _round_convert(T, round(x, r), x, r)
+_round_convert(::Type{T}, x_integer, x, r) where T = convert(T, x_integer)
+
+round(x::Integer, r::RoundingMode) = x
diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl
new file mode 100644
index 0000000000000..67694e533ac47
--- /dev/null
+++ b/base/runtime_internals.jl
@@ -0,0 +1,1577 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# name and module reflection
+
+"""
+    parentmodule(m::Module) -> Module
+
+Get a module's enclosing `Module`. `Main` is its own parent.
+
+See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
+
+# Examples
+```jldoctest
+julia> parentmodule(Main)
+Main
+
+julia> parentmodule(Base.Broadcast)
+Base
+```
+"""
+parentmodule(m::Module) = (@_total_meta; ccall(:jl_module_parent, Ref{Module}, (Any,), m))
+
+is_root_module(m::Module) = parentmodule(m) === m || m === Compiler || (isdefined(Main, :Base) && m === Main.Base)
+
+"""
+    moduleroot(m::Module) -> Module
+
+Find the root module of a given module. This is the first module in the chain of
+parent modules of `m` which is either a registered root module or which is its
+own parent module.
+"""
+function moduleroot(m::Module)
+    @_total_meta
+    while true
+        is_root_module(m) && return m
+        p = parentmodule(m)
+        p === m && return m
+        m = p
+    end
+end
+
+"""
+    @__MODULE__ -> Module
+
+Get the `Module` of the toplevel eval,
+which is the `Module` code is currently being read from.
+"""
+macro __MODULE__()
+    return __module__
+end
+
+"""
+    fullname(m::Module)
+
+Get the fully-qualified name of a module as a tuple of symbols. For example,
+
+# Examples
+```jldoctest
+julia> fullname(Base.Iterators)
+(:Base, :Iterators)
+
+julia> fullname(Main)
+(:Main,)
+```
+"""
+function fullname(m::Module)
+    @_total_meta
+    mn = nameof(m)
+    if m === Main || m === Base || m === Core
+        return (mn,)
+    end
+    mp = parentmodule(m)
+    if mp === m
+        return (mn,)
+    end
+    return (fullname(mp)..., mn)
+end
+
+"""
+    moduleloc(m::Module) -> LineNumberNode
+
+Get the location of the `module` definition.
+"""
+function moduleloc(m::Module)
+    line = Ref{Int32}(0)
+    file = ccall(:jl_module_getloc, Ref{Symbol}, (Any, Ref{Int32}), m, line)
+    return LineNumberNode(Int(line[]), file)
+end
+
+"""
+    names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) -> Vector{Symbol}
+
+Get a vector of the public names of a `Module`, excluding deprecated names.
+If `all` is true, then the list also includes non-public names defined in the module,
+deprecated names, and compiler-generated names.
+If `imported` is true, then names explicitly imported from other modules
+are also included.
+If `usings` is true, then names explicitly imported via `using` are also included.
+Names are returned in sorted order.
+
+As a special case, all names defined in `Main` are considered \"public\",
+since it is not idiomatic to explicitly mark names from `Main` as public.
+
+!!! note
+    `sym ∈ names(SomeModule)` does *not* imply `isdefined(SomeModule, sym)`.
+    `names` may return symbols marked with `public` or `export`, even if
+    they are not defined in the module.
+
+!!! warning
+    `names` may return duplicate names. The duplication happens, e.g. if an `import`ed name
+    conflicts with an already existing identifier.
+
+See also: [`Base.isexported`](@ref), [`Base.ispublic`](@ref), [`Base.@locals`](@ref), [`@__MODULE__`](@ref).
+"""
+names(m::Module; kwargs...) = sort!(unsorted_names(m; kwargs...))
+unsorted_names(m::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) =
+    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint, Cint), m, all, imported, usings)
+
+"""
+    isexported(m::Module, s::Symbol) -> Bool
+
+Returns whether a symbol is exported from a module.
+
+See also: [`ispublic`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.isexported(Mod, :foo)
+true
+
+julia> Base.isexported(Mod, :bar)
+false
+
+julia> Base.isexported(Mod, :baz)
+false
+```
+"""
+isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
+
+"""
+    ispublic(m::Module, s::Symbol) -> Bool
+
+Returns whether a symbol is marked as public in a module.
+
+Exported symbols are considered public.
+
+!!! compat "Julia 1.11"
+    This function and the notion of publicity were added in Julia 1.11.
+
+See also: [`isexported`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.ispublic(Mod, :foo)
+true
+
+julia> Base.ispublic(Mod, :bar)
+true
+
+julia> Base.ispublic(Mod, :baz)
+false
+```
+"""
+ispublic(m::Module, s::Symbol) = ccall(:jl_module_public_p, Cint, (Any, Any), m, s) != 0
+
+# TODO: this is vaguely broken because it only works for explicit calls to
+# `Base.deprecate`, not the @deprecated macro:
+isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
+
+"""
+    isbindingresolved(m::Module, s::Symbol) -> Bool
+
+Returns whether the binding of a symbol in a module is resolved.
+
+See also: [`isexported`](@ref), [`ispublic`](@ref), [`isdeprecated`](@ref)
+
+```jldoctest
+julia> module Mod
+           foo() = 17
+       end
+Mod
+
+julia> Base.isbindingresolved(Mod, :foo)
+true
+
+julia> Base.isbindingresolved(Mod, :bar)
+false
+```
+"""
+isbindingresolved(m::Module, var::Symbol) = ccall(:jl_binding_resolved_p, Cint, (Any, Any), m, var) != 0
+
+function binding_module(m::Module, s::Symbol)
+    p = ccall(:jl_get_module_of_binding, Ptr{Cvoid}, (Any, Any), m, s)
+    p == C_NULL && return m
+    return unsafe_pointer_to_objref(p)::Module
+end
+
+const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
+
+function _fieldnames(@nospecialize t)
+    if t.name === _NAMEDTUPLE_NAME
+        if t.parameters[1] isa Tuple
+            return t.parameters[1]
+        else
+            throw(ArgumentError("type does not have definite field names"))
+        end
+    end
+    return t.name.names
+end
+
+# N.B.: Needs to be synced with julia.h
+const BINDING_KIND_CONST        = 0x0
+const BINDING_KIND_CONST_IMPORT = 0x1
+const BINDING_KIND_GLOBAL       = 0x2
+const BINDING_KIND_IMPLICIT     = 0x3
+const BINDING_KIND_EXPLICIT     = 0x4
+const BINDING_KIND_IMPORTED     = 0x5
+const BINDING_KIND_FAILED       = 0x6
+const BINDING_KIND_DECLARED     = 0x7
+const BINDING_KIND_GUARD        = 0x8
+const BINDING_KIND_UNDEF_CONST  = 0x9
+
+is_defined_const_binding(kind::UInt8) = (kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT)
+is_some_const_binding(kind::UInt8) = (is_defined_const_binding(kind) || kind == BINDING_KIND_UNDEF_CONST)
+is_some_imported(kind::UInt8) = (kind == BINDING_KIND_IMPLICIT || kind == BINDING_KIND_EXPLICIT || kind == BINDING_KIND_IMPORTED)
+is_some_guard(kind::UInt8) = (kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED || kind == BINDING_KIND_FAILED || kind == BINDING_KIND_UNDEF_CONST)
+
+function lookup_binding_partition(world::UInt, b::Core.Binding)
+    ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world)
+end
+
+function lookup_binding_partition(world::UInt, gr::Core.GlobalRef)
+    if isdefined(gr, :binding)
+        b = gr.binding
+    else
+        b = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), gr.mod, gr.name, true)
+    end
+    return lookup_binding_partition(world, b)
+end
+
+partition_restriction(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_restriction_value, Any, (Any,), bpart)
+
+binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart)
+binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s)))
+
+"""
+    delete_binding(mod::Module, sym::Symbol)
+
+Force the binding `mod.sym` to be undefined again, allowing it be redefined.
+Note that this operation is very expensive, requiring a full scan of all code in the system,
+as well as potential recompilation of any methods that (may) have used binding
+information.
+
+!!! warning
+    The implementation of this functionality is currently incomplete. Do not use
+    this method on versions that contain this disclaimer except for testing.
+"""
+function delete_binding(mod::Module, sym::Symbol)
+    ccall(:jl_disable_binding, Cvoid, (Any,), GlobalRef(mod, sym))
+end
+
+"""
+    fieldname(x::DataType, i::Integer)
+
+Get the name of field `i` of a `DataType`.
+
+The return type is `Symbol`, except when `x <: Tuple`, in which case the index of the field is returned, of type `Int`.
+
+# Examples
+```jldoctest
+julia> fieldname(Rational, 1)
+:num
+
+julia> fieldname(Rational, 2)
+:den
+
+julia> fieldname(Tuple{String,Int}, 2)
+2
+```
+"""
+function fieldname(t::DataType, i::Integer)
+    throw_not_def_field() = throw(ArgumentError("type does not have definite field names"))
+    function throw_field_access(t, i, n_fields)
+        field_label = n_fields == 1 ? "field" : "fields"
+        throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
+    end
+    throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
+
+    isabstracttype(t) && throw_not_def_field()
+    names = _fieldnames(t)
+    n_fields = length(names)::Int
+    i > n_fields && throw_field_access(t, i, n_fields)
+    i < 1 && throw_need_pos_int(i)
+    return @inbounds names[i]::Symbol
+end
+
+fieldname(t::UnionAll, i::Integer) = fieldname(unwrap_unionall(t), i)
+fieldname(t::Type{<:Tuple}, i::Integer) =
+    i < 1 || i > fieldcount(t) ? throw(BoundsError(t, i)) : Int(i)
+
+"""
+    fieldnames(x::DataType)
+
+Get a tuple with the names of the fields of a `DataType`.
+
+Each name is a `Symbol`, except when `x <: Tuple`, in which case each name (actually the
+index of the field) is an `Int`.
+
+See also [`propertynames`](@ref), [`hasfield`](@ref).
+
+# Examples
+```jldoctest
+julia> fieldnames(Rational)
+(:num, :den)
+
+julia> fieldnames(typeof(1+im))
+(:re, :im)
+
+julia> fieldnames(Tuple{String,Int})
+(1, 2)
+```
+"""
+fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
+                           (_fieldnames(t)...,))::Tuple{Vararg{Symbol}}
+fieldnames(t::UnionAll) = fieldnames(unwrap_unionall(t))
+fieldnames(::Core.TypeofBottom) =
+    throw(ArgumentError("The empty type does not have field names since it does not have instances."))
+fieldnames(t::Type{<:Tuple}) = ntuple(identity, fieldcount(t))
+
+"""
+    hasfield(T::Type, name::Symbol)
+
+Return a boolean indicating whether `T` has `name` as one of its own fields.
+
+See also [`fieldnames`](@ref), [`fieldcount`](@ref), [`hasproperty`](@ref).
+
+!!! compat "Julia 1.2"
+     This function requires at least Julia 1.2.
+
+# Examples
+```jldoctest
+julia> struct Foo
+            bar::Int
+       end
+
+julia> hasfield(Foo, :bar)
+true
+
+julia> hasfield(Foo, :x)
+false
+```
+"""
+hasfield(T::Type, name::Symbol) = fieldindex(T, name, false) > 0
+
+"""
+    nameof(t::DataType) -> Symbol
+
+Get the name of a (potentially `UnionAll`-wrapped) `DataType` (without its parent module)
+as a symbol.
+
+# Examples
+```jldoctest
+julia> module Foo
+           struct S{T}
+           end
+       end
+Foo
+
+julia> nameof(Foo.S{T} where T)
+:S
+```
+"""
+nameof(t::DataType) = t.name.name
+nameof(t::UnionAll) = nameof(unwrap_unionall(t))::Symbol
+
+"""
+    parentmodule(t::DataType) -> Module
+
+Determine the module containing the definition of a (potentially `UnionAll`-wrapped) `DataType`.
+
+# Examples
+```jldoctest
+julia> module Foo
+           struct Int end
+       end
+Foo
+
+julia> parentmodule(Int)
+Core
+
+julia> parentmodule(Foo.Int)
+Foo
+```
+"""
+parentmodule(t::DataType) = t.name.module
+parentmodule(t::UnionAll) = parentmodule(unwrap_unionall(t))
+
+"""
+    isconst(m::Module, s::Symbol) -> Bool
+
+Determine whether a global is declared `const` in a given module `m`.
+"""
+isconst(m::Module, s::Symbol) =
+    ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
+
+function isconst(g::GlobalRef)
+    return ccall(:jl_globalref_is_const, Cint, (Any,), g) != 0
+end
+
+"""
+    isconst(t::DataType, s::Union{Int,Symbol}) -> Bool
+
+Determine whether a field `s` is declared `const` in a given type `t`.
+"""
+function isconst(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isconst(t, fieldindex(t, s, false))
+end
+function isconst(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return true # immutable structs are always const
+    1 <= s <= length(t.name.names) || return true # OOB reads are "const" since they always throw
+    constfields = t.name.constfields
+    constfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
+
+"""
+    isfieldatomic(t::DataType, s::Union{Int,Symbol}) -> Bool
+
+Determine whether a field `s` is declared `@atomic` in a given type `t`.
+"""
+function isfieldatomic(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isfieldatomic(t, fieldindex(t, s, false))
+end
+function isfieldatomic(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return false # immutable structs are never atomic
+    1 <= s <= length(t.name.names) || return false # OOB reads are not atomic (they always throw)
+    atomicfields = t.name.atomicfields
+    atomicfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(atomicfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
+
+"""
+    @locals()
+
+Construct a dictionary of the names (as symbols) and values of all local
+variables defined as of the call site.
+
+!!! compat "Julia 1.1"
+    This macro requires at least Julia 1.1.
+
+# Examples
+```jldoctest
+julia> let x = 1, y = 2
+           Base.@locals
+       end
+Dict{Symbol, Any} with 2 entries:
+  :y => 2
+  :x => 1
+
+julia> function f(x)
+           local y
+           show(Base.@locals); println()
+           for i = 1:1
+               show(Base.@locals); println()
+           end
+           y = 2
+           show(Base.@locals); println()
+           nothing
+       end;
+
+julia> f(42)
+Dict{Symbol, Any}(:x => 42)
+Dict{Symbol, Any}(:i => 1, :x => 42)
+Dict{Symbol, Any}(:y => 2, :x => 42)
+```
+"""
+macro locals()
+    return Expr(:locals)
+end
+
+# concrete datatype predicates
+
+datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
+
+struct DataTypeLayout
+    size::UInt32
+    nfields::UInt32
+    npointers::UInt32
+    firstptr::Int32
+    alignment::UInt16
+    flags::UInt16
+    # haspadding : 1;
+    # fielddesc_type : 2;
+    # arrayelem_isboxed : 1;
+    # arrayelem_isunion : 1;
+end
+
+"""
+    Base.datatype_alignment(dt::DataType) -> Int
+
+Memory allocation minimum alignment for instances of this type.
+Can be called on any `isconcretetype`, although for Memory it will give the
+alignment of the elements, not the whole object.
+"""
+function datatype_alignment(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment
+    return Int(alignment)
+end
+
+function uniontype_layout(@nospecialize T::Type)
+    sz = RefValue{Csize_t}(0)
+    algn = RefValue{Csize_t}(0)
+    isinline = ccall(:jl_islayout_inline, Cint, (Any, Ptr{Csize_t}, Ptr{Csize_t}), T, sz, algn) != 0
+    (isinline, Int(sz[]), Int(algn[]))
+end
+
+LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
+
+# amount of total space taken by T when stored in a container
+function aligned_sizeof(@nospecialize T::Type)
+    @_foldable_meta
+    if isa(T, Union)
+        if allocatedinline(T)
+            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
+            # inference in the second branch with the outer `isa(T, Union)` check
+            _, sz, al = uniontype_layout(T)
+            return LLT_ALIGN(sz, al)
+        end
+    elseif allocatedinline(T)
+        al = datatype_alignment(T)
+        return LLT_ALIGN(Core.sizeof(T), al)
+    end
+    return Core.sizeof(Ptr{Cvoid})
+end
+
+gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
+gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
+
+"""
+    Base.datatype_haspadding(dt::DataType) -> Bool
+
+Return whether the fields of instances of this type are packed in memory,
+with no intervening padding bits (defined as bits whose value does not impact
+the semantic value of the instance itself).
+Can be called on any `isconcretetype`.
+"""
+function datatype_haspadding(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return flags & 1 == 1
+end
+
+"""
+    Base.datatype_isbitsegal(dt::DataType) -> Bool
+
+Return whether egality of the (non-padding bits of the) in-memory representation
+of an instance of this type implies semantic egality of the instance itself.
+This may not be the case if the type contains to other values whose egality is
+independent of their identity (e.g. immutable structs, some types, etc.).
+"""
+function datatype_isbitsegal(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags & (1<<5)) != 0
+end
+
+"""
+    Base.datatype_nfields(dt::DataType) -> UInt32
+
+Return the number of fields known to this datatype's layout. This may be
+different from the number of actual fields of the type for opaque types.
+Can be called on any `isconcretetype`.
+"""
+function datatype_nfields(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
+end
+
+"""
+    Base.datatype_npointers(dt::DataType) -> Int
+
+Return the number of pointers in the layout of a datatype.
+"""
+function datatype_npointers(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
+end
+
+"""
+    Base.datatype_pointerfree(dt::DataType) -> Bool
+
+Return whether instances of this type can contain references to gc-managed memory.
+Can be called on any `isconcretetype`.
+"""
+function datatype_pointerfree(dt::DataType)
+    @_foldable_meta
+    return datatype_npointers(dt) == 0
+end
+
+"""
+    Base.datatype_fielddesc_type(dt::DataType) -> Int
+
+Return the size in bytes of each field-description entry in the layout array,
+located at `(dt.layout + sizeof(DataTypeLayout))`.
+Can be called on any `isconcretetype`.
+
+See also [`fieldoffset`](@ref).
+"""
+function datatype_fielddesc_type(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags >> 1) & 3
+end
+
+"""
+    Base.datatype_arrayelem(dt::DataType) -> Int
+
+Return the behavior of the trailing array types allocations.
+Can be called on any `isconcretetype`, but only meaningful on `Memory`.
+
+0 = inlinealloc
+1 = isboxed
+2 = isbitsunion
+"""
+function datatype_arrayelem(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags >> 3) & 3
+end
+
+function datatype_layoutsize(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    size = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).size
+    return size % Int
+end
+
+
+# For type stability, we only expose a single struct that describes everything
+struct FieldDesc
+    isforeign::Bool
+    isptr::Bool
+    size::UInt32
+    offset::UInt32
+end
+
+struct FieldDescStorage{T}
+    ptrsize::T
+    offset::T
+end
+FieldDesc(fd::FieldDescStorage{T}) where {T} =
+    FieldDesc(false, fd.ptrsize & 1 != 0,
+              fd.ptrsize >> 1, fd.offset)
+
+struct DataTypeFieldDesc
+    dt::DataType
+    function DataTypeFieldDesc(dt::DataType)
+        dt.layout == C_NULL && throw(UndefRefError())
+        new(dt)
+    end
+end
+
+function getindex(dtfd::DataTypeFieldDesc, i::Int)
+    layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
+    fd_ptr = layout_ptr + Core.sizeof(DataTypeLayout)
+    layout = unsafe_load(layout_ptr)
+    fielddesc_type = (layout.flags >> 1) & 3
+    nfields = layout.nfields
+    @boundscheck ((1 <= i <= nfields) || throw(BoundsError(dtfd, i)))
+    if fielddesc_type == 0
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt8}}(fd_ptr), i))
+    elseif fielddesc_type == 1
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt16}}(fd_ptr), i))
+    elseif fielddesc_type == 2
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt32}}(fd_ptr), i))
+    else
+        # fielddesc_type == 3
+        return FieldDesc(true, true, 0, 0)
+    end
+end
+
+"""
+    ismutable(v) -> Bool
+
+Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
+for a discussion of immutability. Note that this function works on values, so if you
+give it a `DataType`, it will tell you that a value of the type is mutable.
+
+!!! note
+    For technical reasons, `ismutable` returns `true` for values of certain special types
+    (for example `String` and `Symbol`) even though they cannot be mutated in a permissible way.
+
+See also [`isbits`](@ref), [`isstructtype`](@ref).
+
+# Examples
+```jldoctest
+julia> ismutable(1)
+false
+
+julia> ismutable([1,2])
+true
+```
+
+!!! compat "Julia 1.5"
+    This function requires at least Julia 1.5.
+"""
+ismutable(@nospecialize(x)) = (@_total_meta; (typeof(x).name::Core.TypeName).flags & 0x2 == 0x2)
+# The type assertion above is required to fix some invalidations.
+# See also https://github.com/JuliaLang/julia/issues/52134
+
+"""
+    ismutabletype(T) -> Bool
+
+Determine whether type `T` was declared as a mutable type
+(i.e. using `mutable struct` keyword).
+If `T` is not a type, then return `false`.
+
+!!! compat "Julia 1.7"
+    This function requires at least Julia 1.7.
+"""
+function ismutabletype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    return isa(t, DataType) && ismutabletypename(t.name)
+end
+
+ismutabletypename(tn::Core.TypeName) = tn.flags & 0x2 == 0x2
+
+"""
+    isstructtype(T) -> Bool
+
+Determine whether type `T` was declared as a struct type
+(i.e. using the `struct` or `mutable struct` keyword).
+If `T` is not a type, then return `false`.
+"""
+function isstructtype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false
+    return !isprimitivetype(t) && !isabstracttype(t)
+end
+
+"""
+    isprimitivetype(T) -> Bool
+
+Determine whether type `T` was declared as a primitive type
+(i.e. using the `primitive type` syntax).
+If `T` is not a type, then return `false`.
+"""
+function isprimitivetype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false
+    return (t.flags & 0x0080) == 0x0080
+end
+
+"""
+    isbitstype(T)
+
+Return `true` if type `T` is a "plain data" type,
+meaning it is immutable and contains no references to other values,
+only `primitive` types and other `isbitstype` types.
+Typical examples are numeric types such as [`UInt8`](@ref),
+[`Float64`](@ref), and [`Complex{Float64}`](@ref).
+This category of types is significant since they are valid as type parameters,
+may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
+and have a defined layout that is compatible with C.
+If `T` is not a type, then return `false`.
+
+See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
+
+# Examples
+```jldoctest
+julia> isbitstype(Complex{Float64})
+true
+
+julia> isbitstype(Complex)
+false
+```
+"""
+isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0008) == 0x0008)
+
+"""
+    isbits(x)
+
+Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
+"""
+isbits(@nospecialize x) = isbitstype(typeof(x))
+
+"""
+    objectid(x) -> UInt
+
+Get a hash value for `x` based on object identity.
+
+If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
+"""
+function objectid(@nospecialize(x))
+    @_total_meta
+    return ccall(:jl_object_id, UInt, (Any,), x)
+end
+
+"""
+    isdispatchtuple(T)
+
+Determine whether type `T` is a tuple of concrete types,
+meaning it could appear as a type signature in dispatch
+and has no subtypes (or supertypes) which could appear in a call.
+If `T` is not a type, then return `false`.
+"""
+isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0004) == 0x0004)
+
+datatype_ismutationfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0100) == 0x0100)
+
+"""
+    Base.ismutationfree(T)
+
+Determine whether type `T` is mutation free in the sense that no mutable memory
+is reachable from this type (either in the type itself) or through any fields.
+Note that the type itself need not be immutable. For example, an empty mutable
+type is `ismutabletype`, but also `ismutationfree`.
+If `T` is not a type, then return `false`.
+"""
+function ismutationfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_ismutationfree(t)
+    elseif isa(t, Union)
+        return ismutationfree(t.a) && ismutationfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
+
+datatype_isidentityfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0200) == 0x0200)
+
+"""
+    Base.isidentityfree(T)
+
+Determine whether type `T` is identity free in the sense that this type or any
+reachable through its fields has non-content-based identity.
+If `T` is not a type, then return `false`.
+"""
+function isidentityfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_isidentityfree(t)
+    elseif isa(t, Union)
+        return isidentityfree(t.a) && isidentityfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
+
+iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
+isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
+
+using Core: has_free_typevars
+
+# equivalent to isa(v, Type) && isdispatchtuple(Tuple{v}) || v === Union{}
+# and is thus perhaps most similar to the old (pre-1.0) `isconcretetype` query
+function isdispatchelem(@nospecialize v)
+    return (v === Bottom) || (v === typeof(Bottom)) || isconcretedispatch(v) ||
+        (isType(v) && !has_free_typevars(v))
+end
+
+const _TYPE_NAME = Type.body.name
+isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
+
+"""
+    isconcretetype(T)
+
+Determine whether type `T` is a concrete type, meaning it could have direct instances
+(values `x` such that `typeof(x) === T`).
+Note that this is not the negation of `isabstracttype(T)`.
+If `T` is not a type, then return `false`.
+
+See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
+
+# Examples
+```jldoctest
+julia> isconcretetype(Complex)
+false
+
+julia> isconcretetype(Complex{Float32})
+true
+
+julia> isconcretetype(Vector{Complex})
+true
+
+julia> isconcretetype(Vector{Complex{Float32}})
+true
+
+julia> isconcretetype(Union{})
+false
+
+julia> isconcretetype(Union{Int,String})
+false
+```
+"""
+isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0002) == 0x0002)
+
+"""
+    isabstracttype(T)
+
+Determine whether type `T` was declared as an abstract type
+(i.e. using the `abstract type` syntax).
+Note that this is not the negation of `isconcretetype(T)`.
+If `T` is not a type, then return `false`.
+
+# Examples
+```jldoctest
+julia> isabstracttype(AbstractArray)
+true
+
+julia> isabstracttype(Vector)
+false
+```
+"""
+function isabstracttype(@nospecialize(t))
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
+end
+
+function is_datatype_layoutopaque(dt::DataType)
+    datatype_nfields(dt) == 0 && !datatype_pointerfree(dt)
+end
+
+function is_valid_intrinsic_elptr(@nospecialize(ety))
+    ety === Any && return true
+    isconcretetype(ety) || return false
+    ety <: Array && return false
+    return !is_datatype_layoutopaque(ety)
+end
+
+"""
+    Base.issingletontype(T)
+
+Determine whether type `T` has exactly one possible instance; for example, a
+struct type with no fields except other singleton values.
+If `T` is not a concrete type, then return `false`.
+"""
+issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance) && datatype_layoutsize(t) == 0 && datatype_pointerfree(t))
+
+"""
+    typeintersect(T::Type, S::Type)
+
+Compute a type that contains the intersection of `T` and `S`. Usually this will be the
+smallest such type or one close to it.
+
+A special case where exact behavior is guaranteed: when `T <: S`,
+`typeintersect(S, T) == T == typeintersect(T, S)`.
+"""
+typeintersect(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_intersection, Any, (Any, Any), a::Type, b::Type))
+
+morespecific(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_morespecific, Cint, (Any, Any), a::Type, b::Type) != 0)
+morespecific(a::Method, b::Method) = ccall(:jl_method_morespecific, Cint, (Any, Any), a, b) != 0
+
+"""
+    fieldoffset(type, i)
+
+The byte offset of field `i` of a type relative to the data start. For example, we could
+use it in the following manner to summarize information about a struct:
+
+```jldoctest
+julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
+
+julia> structinfo(Base.Filesystem.StatStruct)
+14-element Vector{Tuple{UInt64, Symbol, Type}}:
+ (0x0000000000000000, :desc, Union{RawFD, String})
+ (0x0000000000000008, :device, UInt64)
+ (0x0000000000000010, :inode, UInt64)
+ (0x0000000000000018, :mode, UInt64)
+ (0x0000000000000020, :nlink, Int64)
+ (0x0000000000000028, :uid, UInt64)
+ (0x0000000000000030, :gid, UInt64)
+ (0x0000000000000038, :rdev, UInt64)
+ (0x0000000000000040, :size, Int64)
+ (0x0000000000000048, :blksize, Int64)
+ (0x0000000000000050, :blocks, Int64)
+ (0x0000000000000058, :mtime, Float64)
+ (0x0000000000000060, :ctime, Float64)
+ (0x0000000000000068, :ioerrno, Int32)
+```
+"""
+fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
+
+"""
+    fieldtype(T, name::Symbol | index::Int)
+
+Determine the declared type of a field (specified by name or index) in a composite DataType `T`.
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldtype(Foo, :x)
+Int64
+
+julia> fieldtype(Foo, 2)
+String
+```
+"""
+fieldtype
+
+"""
+    Base.fieldindex(T, name::Symbol, err:Bool=true)
+
+Get the index of a named field, throwing an error if the field does not exist (when err==true)
+or returning 0 (when err==false).
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> Base.fieldindex(Foo, :z)
+ERROR: FieldError: type Foo has no field `z`, available fields: `x`, `y`
+Stacktrace:
+[...]
+
+julia> Base.fieldindex(Foo, :z, false)
+0
+```
+"""
+function fieldindex(T::DataType, name::Symbol, err::Bool=true)
+    return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name)
+end
+
+function _fieldindex_maythrow(T::DataType, name::Symbol)
+    @_foldable_meta
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1)
+end
+
+function _fieldindex_nothrow(T::DataType, name::Symbol)
+    @_total_meta
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1)
+end
+
+function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
+    t = argument_datatype(t)
+    if t === nothing
+        err && throw(ArgumentError("type does not have definite fields"))
+        return 0
+    end
+    return fieldindex(t, name, err)
+end
+
+function argument_datatype(@nospecialize t)
+    @_total_meta
+    @noinline
+    return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
+end
+
+function datatype_fieldcount(t::DataType)
+    if t.name === _NAMEDTUPLE_NAME
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        if types isa DataType && types <: Tuple
+            return fieldcount(types)
+        end
+        return nothing
+    elseif isabstracttype(t)
+        return nothing
+    end
+    if t.name === Tuple.name
+        isvatuple(t) && return nothing
+        return length(t.types)
+    end
+    # Equivalent to length(t.types), but `t.types` is lazy and we do not want
+    # to be forced to compute it.
+    return length(t.name.names)
+end
+
+"""
+    fieldcount(t::Type)
+
+Get the number of fields that an instance of the given type would have.
+An error is thrown if the type is too abstract to determine this.
+"""
+function fieldcount(@nospecialize t)
+    @_foldable_meta
+    if t isa UnionAll || t isa Union
+        t = argument_datatype(t)
+        if t === nothing
+            throw(ArgumentError("type does not have a definite number of fields"))
+        end
+    elseif t === Union{}
+        throw(ArgumentError("The empty type does not have a well-defined number of fields since it does not have instances."))
+    end
+    if !(t isa DataType)
+        throw(TypeError(:fieldcount, DataType, t))
+    end
+    fcount = datatype_fieldcount(t)
+    if fcount === nothing
+        throw(ArgumentError("type does not have a definite number of fields"))
+    end
+    return fcount
+end
+
+"""
+    fieldtypes(T::Type)
+
+The declared types of all fields in a composite DataType `T` as a tuple.
+
+!!! compat "Julia 1.1"
+    This function requires at least Julia 1.1.
+
+# Examples
+```jldoctest
+julia> struct Foo
+           x::Int64
+           y::String
+       end
+
+julia> fieldtypes(Foo)
+(Int64, String)
+```
+"""
+fieldtypes(T::Type) = (@_foldable_meta; ntupleany(i -> fieldtype(T, i), fieldcount(T)))
+
+# return all instances, for types that can be enumerated
+
+"""
+    instances(T::Type)
+
+Return a collection of all instances of the given type, if applicable. Mostly used for
+enumerated types (see `@enum`).
+
+# Examples
+```jldoctest
+julia> @enum Color red blue green
+
+julia> instances(Color)
+(red, blue, green)
+```
+"""
+function instances end
+
+function to_tuple_type(@nospecialize(t))
+    if isa(t, Tuple) || isa(t, AbstractArray) || isa(t, SimpleVector)
+        t = Tuple{t...}
+    end
+    if isa(t, Type) && t <: Tuple
+        for p in (unwrap_unionall(t)::DataType).parameters
+            if isa(p, Core.TypeofVararg)
+                p = unwrapva(p)
+            end
+            if !(isa(p, Type) || isa(p, TypeVar))
+                error("argument tuple type must contain only types")
+            end
+        end
+    else
+        error("expected tuple type")
+    end
+    t
+end
+
+function signature_type(@nospecialize(f), @nospecialize(argtypes))
+    argtypes = to_tuple_type(argtypes)
+    ft = Core.Typeof(f)
+    u = unwrap_unionall(argtypes)::DataType
+    return rewrap_unionall(Tuple{ft, u.parameters...}, argtypes)
+end
+
+function get_methodtable(m::Method)
+    mt = ccall(:jl_method_get_table, Any, (Any,), m)
+    if mt === nothing
+        return nothing
+    end
+    return mt::Core.MethodTable
+end
+
+"""
+    has_bottom_parameter(t) -> Bool
+
+Determine whether `t` is a Type for which one or more of its parameters is `Union{}`.
+"""
+function has_bottom_parameter(t::DataType)
+    for p in t.parameters
+        has_bottom_parameter(p) && return true
+    end
+    return false
+end
+has_bottom_parameter(t::typeof(Bottom)) = true
+has_bottom_parameter(t::UnionAll) = has_bottom_parameter(unwrap_unionall(t))
+has_bottom_parameter(t::Union) = has_bottom_parameter(t.a) & has_bottom_parameter(t.b)
+has_bottom_parameter(t::TypeVar) = has_bottom_parameter(t.ub)
+has_bottom_parameter(::Any) = false
+
+min_world(m::Core.CodeInstance) = m.min_world
+max_world(m::Core.CodeInstance) = m.max_world
+min_world(m::Core.CodeInfo) = m.min_world
+max_world(m::Core.CodeInfo) = m.max_world
+
+"""
+    get_world_counter()
+
+Returns the current maximum world-age counter. This counter is global and monotonically
+increasing.
+"""
+get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
+
+"""
+    tls_world_age()
+
+Returns the world the [current_task()](@ref) is executing within.
+"""
+tls_world_age() = ccall(:jl_get_tls_world_age, UInt, ())
+
+"""
+    propertynames(x, private=false)
+
+Get a tuple or a vector of the properties (`x.property`) of an object `x`.
+This is typically the same as [`fieldnames(typeof(x))`](@ref), but types
+that overload [`getproperty`](@ref) should generally overload `propertynames`
+as well to get the properties of an instance of the type.
+
+`propertynames(x)` may return only "public" property names that are part
+of the documented interface of `x`.   If you want it to also return "private"
+property names intended for internal use, pass `true` for the optional second argument.
+REPL tab completion on `x.` shows only the `private=false` properties.
+
+See also: [`hasproperty`](@ref), [`hasfield`](@ref).
+"""
+propertynames(x) = fieldnames(typeof(x))
+propertynames(m::Module) = names(m)
+propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
+propertynames(x::Array) = () # hide the fields from tab completion to discourage calling `x.size` instead of `size(x)`, even though they are equivalent
+
+"""
+    hasproperty(x, s::Symbol)
+
+Return a boolean indicating whether the object `x` has `s` as one of its own properties.
+
+!!! compat "Julia 1.2"
+     This function requires at least Julia 1.2.
+
+See also: [`propertynames`](@ref), [`hasfield`](@ref).
+"""
+hasproperty(x, s::Symbol) = s in propertynames(x)
+
+"""
+    delete_method(m::Method)
+
+Make method `m` uncallable and force recompilation of any methods that use(d) it.
+"""
+function delete_method(m::Method)
+    ccall(:jl_method_table_disable, Cvoid, (Any, Any), get_methodtable(m), m)
+end
+
+
+# type for reflecting and pretty-printing a subset of methods
+mutable struct MethodList <: AbstractArray{Method,1}
+    ms::Array{Method,1}
+    mt::Core.MethodTable
+end
+
+size(m::MethodList) = size(m.ms)
+getindex(m::MethodList, i::Integer) = m.ms[i]
+
+function MethodList(mt::Core.MethodTable)
+    ms = Method[]
+    visit(mt) do m
+        push!(ms, m)
+    end
+    return MethodList(ms, mt)
+end
+
+"""
+    methods(f, [types], [module])
+
+Return the method table for `f`.
+
+If `types` is specified, return an array of methods whose types match.
+If `module` is specified, return an array of methods defined in that module.
+A list of modules can also be specified as an array.
+
+!!! compat "Julia 1.4"
+    At least Julia 1.4 is required for specifying a module.
+
+See also: [`which`](@ref), [`@which`](@ref Main.InteractiveUtils.@which) and [`methodswith`](@ref Main.InteractiveUtils.methodswith).
+"""
+function methods(@nospecialize(f), @nospecialize(t),
+                 mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
+    world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
+    ms = Method[]
+    for m in _methods(f, t, -1, world)::Vector
+        m = m::Core.MethodMatch
+        (mod === nothing || parentmodule(m.method) ∈ mod) && push!(ms, m.method)
+    end
+    MethodList(ms, typeof(f).name.mt)
+end
+methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
+
+function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
+    tt = signature_type(f, t)
+    world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    min = RefValue{UInt}(typemin(UInt))
+    max = RefValue{UInt}(typemax(UInt))
+    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
+    return MethodList(Method[(m::Core.MethodMatch).method for m in ms], typeof(f).name.mt)
+end
+
+function methods(@nospecialize(f),
+                 mod::Union{Module,AbstractArray{Module},Nothing}=nothing)
+    # return all matches
+    return methods(f, Tuple{Vararg{Any}}, mod)
+end
+
+# low-level method lookup functions used by the compiler
+
+unionlen(@nospecialize(x)) = x isa Union ? unionlen(x.a) + unionlen(x.b) : 1
+
+function _uniontypes(@nospecialize(x), ts::Array{Any,1})
+    if x isa Union
+        _uniontypes(x.a, ts)
+        _uniontypes(x.b, ts)
+    else
+        push!(ts, x)
+    end
+    return ts
+end
+uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
+
+function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
+    tt = signature_type(f, t)
+    return _methods_by_ftype(tt, lim, world)
+end
+
+function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
+    return _methods_by_ftype(t, nothing, lim, world)
+end
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
+    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
+end
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Vector{Any},Nothing}
+end
+
+hasgenerator(m::Method) = isdefined(m, :generator)
+hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
+
+function _uncompressed_ir(m::Method)
+    s = m.source
+    if s isa String
+        s = ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)
+    end
+    return s::CodeInfo
+end
+
+_uncompressed_ir(codeinst::CodeInstance, s::String) =
+    ccall(:jl_uncompress_ir, Ref{CodeInfo}, (Any, Any, Any), codeinst.def.def::Method, codeinst, s)
+
+"""
+    Base.generating_output([incremental::Bool])::Bool
+
+Return `true` if the current process is being used to pre-generate a
+code cache via any of the `--output-*` command line arguments. The optional
+`incremental` argument further specifies the precompilation mode: when set
+to `true`, the function will return `true` only for package precompilation;
+when set to `false`, it will return `true` only for system image generation.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function generating_output(incremental::Union{Bool,Nothing}=nothing)
+    ccall(:jl_generating_output, Cint, ()) == 0 && return false
+    if incremental !== nothing
+        JLOptions().incremental == incremental || return false
+    end
+    return true
+end
+
+const SLOT_USED = 0x8
+ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
+
+"""
+    may_invoke_generator(method, atype, sparams) -> Bool
+
+Computes whether or not we may invoke the generator for the given `method` on
+the given `atype` and `sparams`. For correctness, all generated function are
+required to return monotonic answers. However, since we don't expect users to
+be able to successfully implement this criterion, we only call generated
+functions on concrete types. The one exception to this is that we allow calling
+generators with abstract types if the generator does not use said abstract type
+(and thus cannot incorrectly use it to break monotonicity). This function
+computes whether we are in either of these cases.
+
+Unlike normal functions, the compilation heuristics still can't generate good dispatch
+in some cases, but this may still allow inference not to fall over in some limited cases.
+"""
+function may_invoke_generator(mi::MethodInstance)
+    return may_invoke_generator(mi.def::Method, mi.specTypes, mi.sparam_vals)
+end
+function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    # If we have complete information, we may always call the generator
+    isdispatchtuple(atype) && return true
+
+    # We don't have complete information, but it is possible that the generator
+    # syntactically doesn't make use of the information we don't have. Check
+    # for that.
+
+    # For now, only handle the (common, generated by the frontend case) that the
+    # generator only has one method
+    generator = method.generator
+    isa(generator, Core.GeneratedFunctionStub) || return false
+    tt = Tuple{typeof(generator.gen), Vararg{Any}}
+    gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world)
+    gen_mthds isa Vector || return false
+    length(gen_mthds) == 1 || return false
+
+    generator_method = (first(gen_mthds)::Core.MethodMatch).method
+    nsparams = length(sparams)
+    isdefined(generator_method, :source) || return false
+    code = generator_method.source
+    nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
+    at = unwrap_unionall(atype)
+    at isa DataType || return false
+    (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
+
+    firstarg = 1
+    for i = 1:nsparams
+        if isa(sparams[i], TypeVar)
+            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
+                return false
+            end
+        end
+    end
+    nargs = Int(method.nargs)
+    non_va_args = method.isva ? nargs - 1 : nargs
+    for i = 1:non_va_args
+        if !isdispatchelem(at.parameters[i])
+            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
+                return false
+            end
+        end
+    end
+    if method.isva
+        # If the va argument is used, we need to ensure that all arguments that
+        # contribute to the va tuple are dispatchelemes
+        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
+            for i = (non_va_args+1):length(at.parameters)
+                if !isdispatchelem(at.parameters[i])
+                    return false
+                end
+            end
+        end
+    end
+    return true
+end
+
+# get a handle to the unique specialization object representing a particular instantiation of a call
+# eliminate UnionAll vars that might be degenerate due to having identical bounds,
+# or a concrete upper bound and appearing covariantly.
+function subst_trivial_bounds(@nospecialize(atype))
+    if !isa(atype, UnionAll)
+        return atype
+    end
+    v = atype.var
+    if isconcretetype(v.ub) || v.lb === v.ub
+        subst = try
+            atype{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
+    end
+    return UnionAll(v, subst_trivial_bounds(atype.body))
+end
+
+# If removing trivial vars from atype results in an equivalent type, use that
+# instead. Otherwise we can get a case like issue #38888, where a signature like
+#   f(x::S) where S<:Int
+# gets cached and matches a concrete dispatch case.
+function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    at2 = subst_trivial_bounds(atype)
+    if at2 !== atype && at2 == atype
+        atype = at2
+        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
+        sparams = sp_[2]::SimpleVector
+    end
+    return Pair{Any,SimpleVector}(atype, sparams)
+end
+
+function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return method.sig
+    mt = ccall(:jl_method_get_table, Any, (Any,), method)
+    mt === nothing && return method.sig
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
+        mt, atype, sparams, method, #=int return_if_compileable=#0)
+end
+
+is_nospecialized(method::Method) = method.nospecialize ≠ 0
+is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method)
+function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
+    @inline
+    if isa(atype, UnionAll)
+        atype, sparams = normalize_typevars(method, atype, sparams)
+    end
+    if is_nospecializeinfer(method)
+        atype = get_nospecializeinfer_sig(method, atype, sparams)
+    end
+    if preexisting
+        # check cached specializations
+        # for an existing result stored there
+        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
+    end
+    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
+end
+
+function specialize_method(match::Core.MethodMatch; kwargs...)
+    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
+end
+
+hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
+
+###########
+# scoping #
+###########
+
+_topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl
index 9b236caeb6ff1..e44e240baafda 100644
--- a/base/ryu/Ryu.jl
+++ b/base/ryu/Ryu.jl
@@ -112,7 +112,7 @@ end
 function Base.show(io::IO, x::T, forceuntyped::Bool=false, fromprint::Bool=false) where {T <: Base.IEEEFloat}
     compact = get(io, :compact, false)::Bool
     buf = Base.StringVector(neededdigits(T))
-    typed = !forceuntyped && !compact && get(io, :typeinfo, Any) != typeof(x)
+    typed = !forceuntyped && !compact && Base.nonnothing_nonmissing_typeinfo(io) !== typeof(x)
     pos = writeshortest(buf, 1, x, false, false, true, -1,
         (x isa Float32 && !fromprint) ? UInt8('f') : UInt8('e'), false, UInt8('.'), typed, compact)
     write(io, resize!(buf, pos - 1))
diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl
index 30291212d014d..4f749668867e2 100644
--- a/base/ryu/exp.jl
+++ b/base/ryu/exp.jl
@@ -7,34 +7,34 @@ function writeexp(buf, pos, v::T,
     pos = append_sign(x, plus, space, buf, pos)
 
     # special cases
-    if x == 0
-        buf[pos] = UInt8('0')
+    if iszero(x)
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if precision > 0 && !trimtrailingzeros
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:precision
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         elseif hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
-        buf[pos] = expchar
-        buf[pos + 1] = UInt8('+')
-        buf[pos + 2] = UInt8('0')
-        buf[pos + 3] = UInt8('0')
+        @inbounds buf[pos] = expchar
+        @inbounds buf[pos + 1] = UInt8('+')
+        @inbounds buf[pos + 2] = UInt8('0')
+        @inbounds buf[pos + 3] = UInt8('0')
         return pos + 4
     elseif isnan(x)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         return pos + 3
     end
 
@@ -42,7 +42,7 @@ function writeexp(buf, pos, v::T,
     mant = bits & MANTISSA_MASK
     exp = Int((bits >> 52) & EXP_MASK)
 
-    if exp == 0
+    if iszero(exp)
         e2 = 1 - 1023 - 52
         m2 = mant
     else
@@ -51,7 +51,7 @@ function writeexp(buf, pos, v::T,
     end
     nonzero = false
     precision += 1
-    digits = 0
+    digits = zero(UInt32)
     printedDigits = 0
     availableDigits = 0
     e = 0
@@ -64,14 +64,14 @@ function writeexp(buf, pos, v::T,
             j = p10bits - e2
             #=@inbounds=# mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
-            if printedDigits != 0
+            if !iszero(printedDigits)
                 if printedDigits + 9 > precision
                     availableDigits = 9
                     break
                 end
                 pos = append_nine_digits(digits, buf, pos)
                 printedDigits += 9
-            elseif digits != 0
+            elseif !iszero(digits)
                 availableDigits = decimallength(digits)
                 e = i * 9 + availableDigits - 1
                 if availableDigits > precision
@@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -93,26 +93,26 @@ function writeexp(buf, pos, v::T,
             i -= 1
         end
     end
-    if e2 < 0 && availableDigits == 0
+    if e2 < 0 && iszero(availableDigits)
         idx = div(-e2, 16)
-        i = MIN_BLOCK_2[idx + 1]
+        i = Int(MIN_BLOCK_2[idx + 1])
         while i < 200
             j = 120 + (-e2 - 16 * idx)
             p = POW10_OFFSET_2[idx + 1] + i - MIN_BLOCK_2[idx + 1]
             if p >= POW10_OFFSET_2[idx + 2]
-                digits = 0
+                digits = zero(UInt32)
             else
                 #=@inbounds=# mula, mulb, mulc = POW10_SPLIT_2[p + 1]
                 digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             end
-            if printedDigits != 0
+            if !iszero(printedDigits)
                 if printedDigits + 9 > precision
                     availableDigits = 9
                     break
                 end
                 pos = append_nine_digits(digits, buf, pos)
                 printedDigits += 9
-            elseif digits != 0
+            elseif !iszero(digits)
                 availableDigits = decimallength(digits)
                 e = -(i + 1) * 9 + availableDigits - 1
                 if availableDigits > precision
@@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -135,19 +135,19 @@ function writeexp(buf, pos, v::T,
         end
     end
     maximum = precision - printedDigits
-    if availableDigits == 0
-        digits = 0
+    if iszero(availableDigits)
+        digits = zero(UInt32)
     end
-    lastDigit = 0
+    lastDigit = zero(UInt32)
     if availableDigits > maximum
         for k = 0:(availableDigits - maximum - 1)
-            lastDigit = digits % 10
-            digits = div(digits, 10)
+            lastDigit = digits % UInt32(10)
+            digits = div(digits, UInt32(10))
         end
     end
     roundUp = 0
     if lastDigit != 5
-        roundUp = lastDigit > 5
+        roundUp = lastDigit > 5 ? 1 : 0
     else
         rexp = precision - e
         requiredTwos = -e2 - rexp
@@ -159,10 +159,10 @@ function writeexp(buf, pos, v::T,
         end
         roundUp = trailingZeros ? 2 : 1
     end
-    if printedDigits != 0
-        if digits == 0
+    if !iszero(printedDigits)
+        if iszero(digits)
             for _ = 1:maximum
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         else
@@ -172,64 +172,68 @@ function writeexp(buf, pos, v::T,
         if precision > 1
             pos = append_d_digits(maximum, digits, buf, pos, decchar)
         else
-            buf[pos] = UInt8('0') + digits
+            @inbounds buf[pos] = UInt8('0') + digits
             pos += 1
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
             end
         end
     end
-    if roundUp != 0
+    if !iszero(roundUp)
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
-                buf[roundPos + 1] = UInt8('1')
+            if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
+                @inbounds buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
             end
-            c = roundPos > 0 ? buf[roundPos] : 0x00
+            c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
             if c == decchar
                 continue
             elseif c == UInt8('9')
-                buf[roundPos] = UInt8('0')
+                @inbounds buf[roundPos] = UInt8('0')
                 roundUp = 1
                 continue
             else
-                if roundUp == 2 && UInt8(c) % 2 == 0
+                if roundUp == 2 && iseven(c)
                     break
                 end
-                buf[roundPos] = c + 1
+                @inbounds buf[roundPos] = c + 1
                 break
             end
         end
     end
     if trimtrailingzeros
-        while buf[pos - 1] == UInt8('0')
+        while @inbounds buf[pos - 1] == UInt8('0')
             pos -= 1
         end
-        if buf[pos - 1] == decchar && !hash
+        if @inbounds buf[pos - 1] == decchar && !hash
             pos -= 1
         end
     end
     buf[pos] = expchar
     pos += 1
     if e < 0
-        buf[pos] = UInt8('-')
+        @inbounds buf[pos] = UInt8('-')
         pos += 1
         e = -e
     else
-        buf[pos] = UInt8('+')
+        @inbounds buf[pos] = UInt8('+')
         pos += 1
     end
     if e >= 100
-        c = e % 10
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
-        buf[pos + 2] = UInt8('0') + c
+        c = (e % 10) % UInt8
+        @inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+        @inbounds buf[pos + 2] = UInt8('0') + c
         pos += 3
     else
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
+        @inbounds d100 = DIGIT_TABLE16[e + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
         pos += 2
     end
     return pos
diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl
index e0085f5c66dab..96777059bc284 100644
--- a/base/ryu/fixed.jl
+++ b/base/ryu/fixed.jl
@@ -38,7 +38,7 @@ function writefixed(buf, pos, v::T,
     mant = bits & MANTISSA_MASK
     exp = Int((bits >> 52) & EXP_MASK)
 
-    if exp == 0
+    if exp == 0 # subnormal
         e2 = 1 - 1023 - 52
         m2 = mant
     else
@@ -53,13 +53,13 @@ function writefixed(buf, pos, v::T,
         i = len - 1
         while i >= 0
             j = p10bits - e2
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
+            mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if nonzero
                 pos = append_nine_digits(digits, buf, pos)
             elseif digits != 0
                 olength = decimallength(digits)
-                pos = append_n_digits(olength, digits, buf, pos)
+                pos = append_c_digits(olength, digits, buf, pos)
                 nonzero = true
             end
             i -= 1
@@ -103,7 +103,7 @@ function writefixed(buf, pos, v::T,
                 end
                 break
             end
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT_2[p + 1]
+            mula, mulb, mulc = POW10_SPLIT_2[p + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if i < blocks - 1
                 pos = append_nine_digits(digits, buf, pos)
@@ -118,11 +118,11 @@ function writefixed(buf, pos, v::T,
                     k += 1
                 end
                 if lastDigit != 5
-                    roundUp = lastDigit > 5
+                    roundUp = lastDigit > 5 ? 1 : 0
                 else
                     requiredTwos = -e2 - precision - 1
                     trailingZeros = requiredTwos <= 0 || (requiredTwos < 60 && pow2(m2, requiredTwos))
-                    roundUp = trailingZeros ? 2 : 1
+                    roundUp = trailingZeros ? 2 : 1 # 2 means round only if odd
                 end
                 if maximum > 0
                     pos = append_c_digits(maximum, digits, buf, pos)
@@ -137,13 +137,13 @@ function writefixed(buf, pos, v::T,
             while true
                 roundPos -= 1
                 if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-')) || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
+                    buf[pos] = UInt8('0')
                     buf[roundPos + 1] = UInt8('1')
                     if dotPos > 1
                         buf[dotPos] = UInt8('0')
                         buf[dotPos + 1] = decchar
                         hasfractional = true
                     end
-                    buf[pos] = UInt8('0')
                     pos += 1
                     break
                 end
diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl
index aaa62ba33c703..32aa993467e7a 100644
--- a/base/ryu/shortest.jl
+++ b/base/ryu/shortest.jl
@@ -232,79 +232,79 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     # special cases
     if x == 0
         if typed && x isa Float16
-            buf[pos] = UInt8('F')
-            buf[pos + 1] = UInt8('l')
-            buf[pos + 2] = UInt8('o')
-            buf[pos + 3] = UInt8('a')
-            buf[pos + 4] = UInt8('t')
-            buf[pos + 5] = UInt8('1')
-            buf[pos + 6] = UInt8('6')
-            buf[pos + 7] = UInt8('(')
+            @inbounds buf[pos] = UInt8('F')
+            @inbounds buf[pos + 1] = UInt8('l')
+            @inbounds buf[pos + 2] = UInt8('o')
+            @inbounds buf[pos + 3] = UInt8('a')
+            @inbounds buf[pos + 4] = UInt8('t')
+            @inbounds buf[pos + 5] = UInt8('1')
+            @inbounds buf[pos + 6] = UInt8('6')
+            @inbounds buf[pos + 7] = UInt8('(')
             pos += 8
         end
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('0')
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
         if precision == -1
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
             if typed && x isa Float32
-                buf[pos] = UInt8('f')
-                buf[pos + 1] = UInt8('0')
+                @inbounds buf[pos] = UInt8('f')
+                @inbounds buf[pos + 1] = UInt8('0')
                 pos += 2
             end
             if typed && x isa Float16
-                buf[pos] = UInt8(')')
+                @inbounds buf[pos] = UInt8(')')
                 pos += 1
             end
             return pos
         end
         while hash && precision > 1
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
             precision -= 1
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
         if typed && x isa Float16
-            buf[pos] = UInt8(')')
+            @inbounds buf[pos] = UInt8(')')
             pos += 1
         end
         return pos
     elseif isnan(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     elseif !isfinite(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
@@ -313,14 +313,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     output, nexp = reduce_shortest(x, compact ? 999_999 : nothing)
 
     if typed && x isa Float16
-        buf[pos] = UInt8('F')
-        buf[pos + 1] = UInt8('l')
-        buf[pos + 2] = UInt8('o')
-        buf[pos + 3] = UInt8('a')
-        buf[pos + 4] = UInt8('t')
-        buf[pos + 5] = UInt8('1')
-        buf[pos + 6] = UInt8('6')
-        buf[pos + 7] = UInt8('(')
+        @inbounds buf[pos] = UInt8('F')
+        @inbounds buf[pos + 1] = UInt8('l')
+        @inbounds buf[pos + 2] = UInt8('o')
+        @inbounds buf[pos + 3] = UInt8('a')
+        @inbounds buf[pos + 4] = UInt8('t')
+        @inbounds buf[pos + 5] = UInt8('1')
+        @inbounds buf[pos + 6] = UInt8('6')
+        @inbounds buf[pos + 7] = UInt8('(')
         pos += 8
     end
     pos = append_sign(x, plus, space, buf, pos)
@@ -332,161 +332,122 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
         !(pt >= olength && abs(mod(x + 0.05, 10^(pt - olength)) - 0.05) > 0.05)
         exp_form = false
         if pt <= 0
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:abs(pt)
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            # elseif pt >= olength
+        # elseif pt >= olength
             # nothing to do at this point
-            # else
+        # else
             # nothing to do at this point
         end
     else
+        # make space for decchar
         pos += 1
     end
-    i = 0
-    ptr = pointer(buf)
-    ptr2 = pointer(DIGIT_TABLE)
-    if (output >> 32) != 0
-        q = output ÷ 100000000
-        output2 = (output % UInt32) - UInt32(100000000) * (q % UInt32)
-        output = q
 
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        d = output2 % UInt32(10000)
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        d0 = (d % 100) << 1
-        d1 = (d ÷ 100) << 1
-        memcpy(ptr + pos + olength - 3, ptr2 + c0, 2)
-        memcpy(ptr + pos + olength - 5, ptr2 + c1, 2)
-        memcpy(ptr + pos + olength - 7, ptr2 + d0, 2)
-        memcpy(ptr + pos + olength - 9, ptr2 + d1, 2)
-        i += 8
-    end
-    output2 = output % UInt32
-    while output2 >= 10000
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        memcpy(ptr + pos + olength - i - 3, ptr2 + c0, 2)
-        memcpy(ptr + pos + olength - i - 5, ptr2 + c1, 2)
-        i += 4
-    end
-    if output2 >= 100
-        c = (output2 % UInt32(100)) << 1
-        output2 = div(output2, UInt32(100))
-        memcpy(ptr + pos + olength - i - 3, ptr2 + c, 2)
-        i += 2
-    end
-    if output2 >= 10
-        c = output2 << 1
-        buf[pos + 1] = DIGIT_TABLE[c + 2]
-        buf[pos - exp_form] = DIGIT_TABLE[c + 1]
-    else
-        buf[pos - exp_form] = UInt8('0') + (output2 % UInt8)
-    end
+    append_c_digits(olength, output, buf, pos)
 
     if !exp_form
         if pt <= 0
             pos += olength
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
-                pos += 1
-                precision -= 1
-            end
         elseif pt >= olength
             pos += olength
             precision -= olength
             for _ = 1:nexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
                 if precision < 0
-                    buf[pos] = UInt8('0')
+                    @inbounds buf[pos] = UInt8('0')
                     pos += 1
                 end
-                while precision > 0
-                    buf[pos] = UInt8('0')
-                    pos += 1
-                    precision -= 1
-                end
             end
         else
             pointoff = olength - abs(nexp)
+            # shift bytes after pointoff to make room for decchar
+            ptr = pointer(buf)
             memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1)
-            buf[pos + pointoff] = decchar
+            @inbounds buf[pos + pointoff] = decchar
             pos += olength + 1
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
+        end
+        if hash
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
     else
+        # move leading digit into place
+        @inbounds buf[pos - 1] = buf[pos]
         if olength > 1 || hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += olength
             precision -= olength
         end
-        if hash && olength == 1
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        while hash && precision > 0
-            buf[pos] = UInt8('0')
-            pos += 1
-            precision -= 1
+        if hash
+            if olength == 1
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+            end
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+                precision -= 1
+            end
         end
 
-        buf[pos] = expchar
+        @inbounds buf[pos] = expchar
         pos += 1
         exp2 = nexp + olength - 1
         if exp2 < 0
-            buf[pos] = UInt8('-')
+            @inbounds buf[pos] = UInt8('-')
             pos += 1
             exp2 = -exp2
         elseif padexp
-            buf[pos] = UInt8('+')
+            @inbounds buf[pos] = UInt8('+')
             pos += 1
         end
 
         if exp2 >= 100
             c = exp2 % 10
-            memcpy(ptr + pos - 1, ptr2 + 2 * div(exp2, 10), 2)
-            buf[pos + 2] = UInt8('0') + (c % UInt8)
+            @inbounds d100 = DIGIT_TABLE16[(div(exp2, 10) % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+            @inbounds buf[pos + 2] = UInt8('0') + (c % UInt8)
             pos += 3
         elseif exp2 >= 10
-            memcpy(ptr + pos - 1, ptr2 + 2 * exp2, 2)
+            @inbounds d100 = DIGIT_TABLE16[(exp2 % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
             pos += 2
         else
             if padexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            buf[pos] = UInt8('0') + (exp2 % UInt8)
+            @inbounds buf[pos] = UInt8('0') + (exp2 % UInt8)
             pos += 1
         end
     end
     if typed && x isa Float16
-        buf[pos] = UInt8(')')
+        @inbounds buf[pos] = UInt8(')')
         pos += 1
     end
 
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index f5a88c057e2b3..2064dfbefcecd 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -134,7 +134,7 @@ end
 
 Compute `p = a*b` where `b = bLo + bHi<<64`, returning the result as `pLo, pHi` where `p = pLo + pHi<<128`.
 """
-function umul256(a, bHi, bLo)
+function umul256(a::UInt128, bHi::UInt64, bLo::UInt64)
     aLo = a % UInt64
     aHi = (a >> 64) % UInt64
 
@@ -164,7 +164,7 @@ end
 
 Compute `pHi = (a*b)>>128` where `b = bLo + bHi<<64`.
 """
-umul256_hi(a, bHi, bLo) = umul256(a, bHi, bLo)[2]
+umul256_hi(a::UInt128, bHi::UInt64, bLo::UInt64) = umul256(a, bHi, bLo)[2]
 
 """
     Ryu.mulshiftmod1e9(m, mula, mulb, mulc, j)::UInt32
@@ -183,7 +183,7 @@ function mulshiftmod1e9(m, mula, mulb, mulc, j)
     return (v % UInt32) - UInt32(1000000000) * shifted
 end
 
-function append_sign(x, plus, space, buf, pos)
+function append_sign(x, plus::Bool, space::Bool, buf, pos::Int)
     if signbit(x) && !isnan(x)  # suppress minus sign for signaling NaNs
         buf[pos] = UInt8('-')
         pos += 1
@@ -197,101 +197,14 @@ function append_sign(x, plus, space, buf, pos)
     return pos
 end
 
-function append_n_digits(olength, digits, buf, pos)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    else
-        buf[pos] = UInt8('0') + digits
-        i += 1
-    end
-    return pos + i
-end
-
-function append_d_digits(olength, digits, buf, pos, decchar)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        buf[pos] = DIGIT_TABLE[c + 1]
-        buf[pos + 1] = decchar
-        buf[pos + 2] = DIGIT_TABLE[c + 2]
-        i += 3
-    else
-        buf[pos] = UInt8('0') + digits
-        buf[pos + 1] = decchar
-        i += 2
-    end
-    return pos + i
-end
 
-function append_c_digits(count, digits, buf, pos)
-    i = 0
-    while i < count - 1
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + count - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if i < count
-        buf[pos + count - i - 1] = UInt8('0') + (digits % 10)
-        i += 1
-    end
-    return pos + i
-end
+import Base: append_c_digits_fast as append_c_digits, append_nine_digits
 
-function append_nine_digits(digits, buf, pos)
-    if digits == 0
-        for _ = 1:9
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        return pos
-    end
-    i = 0
-    while i < 5
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + 7 - i, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + 5 - i, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    buf[pos] = UInt8('0') + digits
-    i += 1
-    return pos + i
+function append_d_digits(olength::Int, digits::Unsigned, buf, pos::Int, decchar)
+    newpos = append_c_digits(olength, digits, buf, pos + 1)
+    @inbounds buf[pos] = buf[pos + 1]
+    @inbounds buf[pos + 1] = decchar
+    return newpos # == pos + olength + 1
 end
 
 const BIG_MASK = (big(1) << 64) - 1
@@ -390,18 +303,7 @@ for T in (Float64, Float32, Float16)
     @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1])
 end
 
-const DIGIT_TABLE = UInt8[
-  '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
-  '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
-  '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
-  '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
-  '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
-  '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
-  '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
-  '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
-  '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
-  '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
-]
+const DIGIT_TABLE16 = Base._dec_d100
 
 const POW10_OFFSET = UInt16[
   0, 2, 5, 8, 12, 16, 21, 26, 32, 39,
diff --git a/base/scopedvalues.jl b/base/scopedvalues.jl
new file mode 100644
index 0000000000000..39e3c2c076718
--- /dev/null
+++ b/base/scopedvalues.jl
@@ -0,0 +1,273 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module ScopedValues
+
+export ScopedValue, with, @with
+public get
+
+"""
+    ScopedValue(x)
+
+Create a container that propagates values across dynamic scopes.
+Use [`with`](@ref) to create and enter a new dynamic scope.
+
+Values can only be set when entering a new dynamic scope,
+and the value referred to will be constant during the
+execution of a dynamic scope.
+
+Dynamic scopes are propagated across tasks.
+
+# Examples
+
+```jldoctest
+julia> using Base.ScopedValues;
+
+julia> const sval = ScopedValue(1);
+
+julia> sval[]
+1
+
+julia> with(sval => 2) do
+           sval[]
+       end
+2
+
+julia> sval[]
+1
+```
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+"""
+mutable struct ScopedValue{T}
+    # NOTE this struct must be defined as mutable one since it's used as a key of
+    #      `ScopeStorage` dictionary and thus needs object identity
+    const has_default::Bool # this field is necessary since isbitstype `default` field may be initialized with undefined value
+    const default::T
+    ScopedValue{T}() where T = new(false)
+    ScopedValue{T}(val) where T = new{T}(true, val)
+    ScopedValue(val::T) where T = new{T}(true, val)
+end
+
+Base.eltype(::ScopedValue{T}) where {T} = T
+
+"""
+    isassigned(val::ScopedValue)
+
+Test whether a `ScopedValue` has an assigned value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1); b = ScopedValue{Int}();
+
+julia> isassigned(a)
+true
+
+julia> isassigned(b)
+false
+```
+"""
+function Base.isassigned(val::ScopedValue)
+    val.has_default && return true
+    scope = Core.current_scope()::Union{Scope, Nothing}
+    scope === nothing && return false
+    return haskey((scope::Scope).values, val)
+end
+
+const ScopeStorage = Base.PersistentDict{ScopedValue, Any}
+
+struct Scope
+    values::ScopeStorage
+end
+
+Scope(scope::Scope) = scope
+
+function Scope(parent::Union{Nothing, Scope}, key::ScopedValue{T}, value) where T
+    val = convert(T, value)
+    if parent === nothing
+        return Scope(ScopeStorage(key=>val))
+    end
+    return Scope(ScopeStorage(parent.values, key=>val))
+end
+
+function Scope(scope, pair::Pair{<:ScopedValue})
+    return Scope(scope, pair...)
+end
+
+function Scope(scope, pair1::Pair{<:ScopedValue}, pair2::Pair{<:ScopedValue}, pairs::Pair{<:ScopedValue}...)
+    # Unroll this loop through recursion to make sure that
+    # our compiler optimization support works
+    return Scope(Scope(scope, pair1...), pair2, pairs...)
+end
+Scope(::Nothing) = nothing
+
+function Base.show(io::IO, scope::Scope)
+    print(io, Scope, "(")
+    first = true
+    for (key, value) in scope.values
+        if first
+            first = false
+        else
+            print(io, ", ")
+        end
+        print(io, typeof(key), "@")
+        show(io, Base.objectid(key))
+        print(io, " => ")
+        show(IOContext(io, :typeinfo => eltype(key)), value)
+    end
+    print(io, ")")
+end
+
+struct NoValue end
+const novalue = NoValue()
+
+"""
+    get(val::ScopedValue{T})::Union{Nothing, Some{T}}
+
+If the scoped value isn't set and doesn't have a default value,
+return `nothing`. Otherwise returns `Some{T}` with the current
+value.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(42); b = ScopedValue{Int}();
+
+julia> ScopedValues.get(a)
+Some(42)
+
+julia> isnothing(ScopedValues.get(b))
+true
+```
+"""
+function get(val::ScopedValue{T}) where {T}
+    scope = Core.current_scope()::Union{Scope, Nothing}
+    if scope === nothing
+        val.has_default && return Some{T}(val.default)
+        return nothing
+    end
+    scope = scope::Scope
+    if val.has_default
+        return Some{T}(Base.get(scope.values, val, val.default)::T)
+    else
+        v = Base.get(scope.values, val, novalue)
+        v === novalue || return Some{T}(v::T)
+    end
+    return nothing
+end
+
+function Base.getindex(val::ScopedValue{T})::T where T
+    maybe = get(val)
+    maybe === nothing && throw(KeyError(val))
+    return something(maybe)::T
+end
+
+function Base.show(io::IO, val::ScopedValue)
+    print(io, ScopedValue)
+    print(io, '{', eltype(val), '}')
+    print(io, '(')
+    v = get(val)
+    if v === nothing
+        print(io, "undefined")
+    else
+        show(IOContext(io, :typeinfo => eltype(val)), something(v))
+    end
+    print(io, ')')
+end
+
+"""
+    @with (var::ScopedValue{T} => val)... expr
+
+Macro version of `with`. The expression `@with var=>val expr` evaluates `expr` in a
+new dynamic scope with `var` set to `val`. `val` will be converted to type `T`.
+`@with var=>val expr` is equivalent to `with(var=>val) do expr end`, but `@with`
+avoids creating a closure.
+
+See also: [`ScopedValues.with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> const a = ScopedValue(1);
+
+julia> f(x) = a[] + x;
+
+julia> @with a=>2 f(10)
+12
+
+julia> @with a=>3 begin
+           x = 100
+           f(x)
+       end
+103
+```
+"""
+macro with(exprs...)
+    if length(exprs) > 1
+        ex = last(exprs)
+        exprs = exprs[1:end-1]
+    elseif length(exprs) == 1
+        ex = only(exprs)
+        exprs = ()
+    else
+        error("@with expects at least one argument")
+    end
+    exprs = map(esc, exprs)
+    Expr(:tryfinally, esc(ex), nothing, :(Scope(Core.current_scope()::Union{Nothing, Scope}, $(exprs...))))
+end
+
+"""
+    with(f, (var::ScopedValue{T} => val)...)
+
+Execute `f` in a new dynamic scope with `var` set to `val`. `val` will be converted
+to type `T`.
+
+See also: [`ScopedValues.@with`](@ref), [`ScopedValues.ScopedValue`](@ref), [`ScopedValues.get`](@ref).
+
+# Examples
+```jldoctest
+julia> using Base.ScopedValues
+
+julia> a = ScopedValue(1);
+
+julia> f(x) = a[] + x;
+
+julia> f(10)
+11
+
+julia> with(a=>2) do
+           f(10)
+       end
+12
+
+julia> f(10)
+11
+
+julia> b = ScopedValue(2);
+
+julia> g(x) = a[] + b[] + x;
+
+julia> with(a=>10, b=>20) do
+           g(30)
+       end
+60
+
+julia> with(() -> a[] * b[], a=>3, b=>4)
+12
+```
+"""
+function with(f, pair::Pair{<:ScopedValue}, rest::Pair{<:ScopedValue}...)
+    @with(pair, rest..., f())
+end
+with(@nospecialize(f)) = f()
+
+end # module ScopedValues
diff --git a/base/secretbuffer.jl b/base/secretbuffer.jl
index eedfd8cbe84c1..bf37c3caa6c23 100644
--- a/base/secretbuffer.jl
+++ b/base/secretbuffer.jl
@@ -29,12 +29,12 @@ true
 ```
 """
 mutable struct SecretBuffer <: IO
-    data::Vector{UInt8}
+    data::Memory{UInt8}
     size::Int
     ptr::Int
 
     function SecretBuffer(; sizehint=128)
-        s = new(Vector{UInt8}(undef, sizehint), 0, 1)
+        s = new(Memory{UInt8}(undef, sizehint), 0, 1)
         finalizer(final_shred!, s)
         return s
     end
@@ -49,7 +49,7 @@ Strings are bad at keeping secrets because they are unable to be securely
 zeroed or destroyed. Therefore, avoid using this constructor with secret data.
 Instead of starting with a string, either construct the `SecretBuffer`
 incrementally with `SecretBuffer()` and [`write`](@ref), or use a `Vector{UInt8}` with
-the `Base.SecretBuffer!(::Vector{UInt8})` constructor.
+the `Base.SecretBuffer!(::AbstractVector{UInt8})` constructor.
 """
 SecretBuffer(str::AbstractString) = SecretBuffer(String(str))
 function SecretBuffer(str::String)
@@ -68,7 +68,7 @@ convert(::Type{SecretBuffer}, s::AbstractString) = SecretBuffer(String(s))
 
 Initialize a new `SecretBuffer` from `data`, securely zeroing `data` afterwards.
 """
-function SecretBuffer!(d::Vector{UInt8})
+function SecretBuffer!(d::AbstractVector{UInt8})
     len = length(d)
     s = SecretBuffer(sizehint=len)
     for i in 1:len
@@ -106,7 +106,7 @@ show(io::IO, s::SecretBuffer) = print(io, "SecretBuffer(\"*******\")")
 ==(s1::SecretBuffer, s2::SecretBuffer) = (s1.ptr == s2.ptr) && (s1.size == s2.size) && (UInt8(0) == _bufcmp(s1.data, s2.data, min(s1.size, s2.size)))
 # Also attempt a constant time buffer comparison algorithm — the length of the secret might be
 # inferred by a timing attack, but not its values.
-@noinline function _bufcmp(data1::Vector{UInt8}, data2::Vector{UInt8}, sz::Int)
+@noinline function _bufcmp(data1::Memory{UInt8}, data2::Memory{UInt8}, sz::Int)
     res = UInt8(0)
     for i = 1:sz
         res |= xor(data1[i], data2[i])
@@ -117,11 +117,23 @@ end
 const _sb_hash = UInt === UInt32 ? 0x111c0925 : 0xb06061e370557428
 hash(s::SecretBuffer, h::UInt) = hash(_sb_hash, h)
 
+copy(s::SecretBuffer) = copy!(SecretBuffer(sizehint=length(s.data)), s)
+function copy!(dest::SecretBuffer, src::SecretBuffer)
+    if length(dest.data) != length(src.data)
+        securezero!(dest.data)
+        dest.data = copy(src.data)
+    else
+        copyto!(dest.data, src.data)
+    end
+    dest.size = src.size
+    dest.ptr = src.ptr
+    return dest
+end
 
 function write(io::SecretBuffer, b::UInt8)
     if io.ptr > length(io.data)
         # We need to resize! the array: do this manually to ensure no copies are left behind
-        newdata = Vector{UInt8}(undef, (io.size+16)*2)
+        newdata = Memory{UInt8}(undef, (io.size+16)*2)
         copyto!(newdata, io.data)
         securezero!(io.data)
         io.data = newdata
@@ -140,8 +152,7 @@ function write(io::IO, s::SecretBuffer)
     return nb
 end
 
-cconvert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, s)
-function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
+function cconvert(::Type{Cstring}, s::SecretBuffer)
     # Ensure that no nuls appear in the valid region
     if any(==(0x00), s.data[i] for i in 1:s.size)
         throw(ArgumentError("`SecretBuffers` containing nul bytes cannot be converted to C strings"))
@@ -152,8 +163,10 @@ function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
     write(s, '\0')
     s.ptr = p
     s.size -= 1
-    return Cstring(unsafe_convert(Ptr{Cchar}, s.data))
+    return s.data
 end
+# optional shim for manual calls to unsafe_convert:
+#   unsafe_convert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, cconvert(Cstring, s))
 
 seek(io::SecretBuffer, n::Integer) = (io.ptr = max(min(n+1, io.size+1), 1); io)
 seekend(io::SecretBuffer) = seek(io, io.size+1)
@@ -187,7 +200,7 @@ resetting its pointer and size.
 This function is used to securely erase the sensitive data held in the buffer,
 reducing the potential for information leaks.
 
-# Example
+# Examples
 ```julia
 s = SecretBuffer()
 write(s, 's', 'e', 'c', 'r', 'e', 't')
diff --git a/base/set.jl b/base/set.jl
index a91bf328bd911..d1f9458039cd4 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -91,18 +91,70 @@ isempty(s::Set) = isempty(s.dict)
 length(s::Set)  = length(s.dict)
 in(x, s::Set) = haskey(s.dict, x)
 
-# This avoids hashing and probing twice and it works the same as
-# in!(x, s::Set) = in(x, s) ? true : (push!(s, x); false)
+"""
+    in!(x, s::AbstractSet) -> Bool
+
+If `x` is in `s`, return `true`. If not, push `x` into `s` and return `false`.
+This is equivalent to `in(x, s) ? true : (push!(s, x); false)`, but may have a
+more efficient implementation.
+
+See also: [`in`](@ref), [`push!`](@ref), [`Set`](@ref)
+
+!!! compat "Julia 1.11"
+    This function requires at least 1.11.
+
+# Examples
+```jldoctest; filter = r"^  [1234]\$"
+julia> s = Set{Any}([1, 2, 3]); in!(4, s)
+false
+
+julia> length(s)
+4
+
+julia> in!(0x04, s)
+true
+
+julia> s
+Set{Any} with 4 elements:
+  4
+  2
+  3
+  1
+```
+"""
+function in!(x, s::AbstractSet)
+    x ∈ s ? true : (push!(s, x); false)
+end
+
 function in!(x, s::Set)
-    idx, sh = ht_keyindex2_shorthash!(s.dict, x)
+    xT = convert(eltype(s), x)
+    idx, sh = ht_keyindex2_shorthash!(s.dict, xT)
     idx > 0 && return true
-    _setindex!(s.dict, nothing, x, -idx, sh)
+    _setindex!(s.dict, nothing, xT, -idx, sh)
     return false
 end
 
 push!(s::Set, x) = (s.dict[x] = nothing; s)
-pop!(s::Set, x) = (pop!(s.dict, x); x)
-pop!(s::Set, x, default) = (x in s ? pop!(s, x) : default)
+
+function pop!(s::Set, x, default)
+    dict = s.dict
+    index = ht_keyindex(dict, x)
+    if index > 0
+        @inbounds key = dict.keys[index]
+        _delete!(dict, index)
+        return key
+    else
+        return default
+    end
+end
+
+function pop!(s::Set, x)
+    index = ht_keyindex(s.dict, x)
+    index < 1 && throw(KeyError(x))
+    result = @inbounds s.dict.keys[index]
+    _delete!(s.dict, index)
+    result
+end
 
 function pop!(s::Set)
     isempty(s) && throw(ArgumentError("set must be non-empty"))
@@ -117,12 +169,14 @@ copymutable(s::Set{T}) where {T} = Set{T}(s)
 # Set is the default mutable fall-back
 copymutable(s::AbstractSet{T}) where {T} = Set{T}(s)
 
-sizehint!(s::Set, newsz) = (sizehint!(s.dict, newsz); s)
+sizehint!(s::Set, newsz; shrink::Bool=true) = (sizehint!(s.dict, newsz; shrink); s)
 empty!(s::Set) = (empty!(s.dict); s)
 rehash!(s::Set) = (rehash!(s.dict); s)
 
 iterate(s::Set, i...)       = iterate(KeySet(s.dict), i...)
 
+@propagate_inbounds Iterators.only(s::Set) = Iterators._only(s, first)
+
 # In case the size(s) is smaller than size(t) its more efficient to iterate through
 # elements of s instead and only delete the ones also contained in t.
 # The threshold for this decision boils down to a tradeoff between
@@ -147,7 +201,7 @@ end
     unique(itr)
 
 Return an array containing only the unique elements of collection `itr`,
-as determined by [`isequal`](@ref), in the order that the first of each
+as determined by [`isequal`](@ref) and [`hash`](@ref), in the order that the first of each
 set of equivalent elements originally appears. The element type of the
 input is preserved.
 
@@ -382,7 +436,7 @@ end
 """
     unique!(A::AbstractVector)
 
-Remove duplicate items as determined by [`isequal`](@ref), then return the modified `A`.
+Remove duplicate items as determined by [`isequal`](@ref) and [`hash`](@ref), then return the modified `A`.
 `unique!` will return the elements of `A` in the order that they occur. If you do not care
 about the order of the returned data, then calling `(sort!(A); unique!(A))` will be much
 more efficient as long as the elements of `A` can be sorted.
@@ -425,11 +479,21 @@ end
 
 """
     allunique(itr) -> Bool
+    allunique(f, itr) -> Bool
 
 Return `true` if all values from `itr` are distinct when compared with [`isequal`](@ref).
+Or if all of `[f(x) for x in itr]` are distinct, for the second method.
+
+Note that `allunique(f, itr)` may call `f` fewer than `length(itr)` times.
+The precise number of calls is regarded as an implementation detail.
+
+`allunique` may use a specialized implementation when the input is sorted.
 
 See also: [`unique`](@ref), [`issorted`](@ref), [`allequal`](@ref).
 
+!!! compat "Julia 1.11"
+    The method `allunique(f, itr)` requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> allunique([1, 2, 3])
@@ -443,6 +507,9 @@ false
 
 julia> allunique([NaN, 2.0, NaN, 4.0])
 false
+
+julia> allunique(abs, [1, -1, 2])
+false
 ```
 """
 function allunique(C)
@@ -453,8 +520,10 @@ function allunique(C)
     return _hashed_allunique(C)
 end
 
+allunique(f, xs) = allunique(Generator(f, xs))
+
 function _hashed_allunique(C)
-    seen = Set{eltype(C)}()
+    seen = Set{@default_eltype(C)}()
     x = iterate(C)
     if haslength(C) && length(C) > 1000
         for i in OneTo(1000)
@@ -476,7 +545,31 @@ allunique(::Union{AbstractSet,AbstractDict}) = true
 
 allunique(r::AbstractRange) = !iszero(step(r)) || length(r) <= 1
 
-allunique(A::StridedArray) = length(A) < 32 ? _indexed_allunique(A) : _hashed_allunique(A)
+function allunique(A::StridedArray)
+    if length(A) < 32
+        _indexed_allunique(A)
+    elseif OrderStyle(eltype(A)) === Ordered()
+        a1, rest1 = Iterators.peel(A)::Tuple{Any,Any}
+        a2, rest = Iterators.peel(rest1)::Tuple{Any,Any}
+        if !isequal(a1, a2)
+            compare = isless(a1, a2) ? isless : (a,b) -> isless(b,a)
+            for a in rest
+                if compare(a2, a)
+                    a2 = a
+                elseif isequal(a2, a)
+                    return false
+                else
+                    return _hashed_allunique(A)
+                end
+            end
+        else # isequal(a1, a2)
+            return false
+        end
+        return true
+    else
+        _hashed_allunique(A)
+    end
+end
 
 function _indexed_allunique(A)
     length(A) < 2 && return true
@@ -502,16 +595,30 @@ function allunique(t::Tuple)
 end
 allunique(t::Tuple{}) = true
 
+function allunique(f::F, t::Tuple) where {F}
+    length(t) < 2 && return true
+    length(t) < 32 || return _hashed_allunique(Generator(f, t))
+    return allunique(map(f, t))
+end
+
 """
     allequal(itr) -> Bool
+    allequal(f, itr) -> Bool
 
 Return `true` if all values from `itr` are equal when compared with [`isequal`](@ref).
+Or if all of `[f(x) for x in itr]` are equal, for the second method.
+
+Note that `allequal(f, itr)` may call `f` fewer than `length(itr)` times.
+The precise number of calls is regarded as an implementation detail.
 
 See also: [`unique`](@ref), [`allunique`](@ref).
 
 !!! compat "Julia 1.8"
     The `allequal` function requires at least Julia 1.8.
 
+!!! compat "Julia 1.11"
+    The method `allequal(f, itr)` requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> allequal([])
@@ -528,14 +635,36 @@ false
 
 julia> allequal(Dict(:a => 1, :b => 1))
 false
+
+julia> allequal(abs2, [1, -1])
+true
 ```
 """
-allequal(itr) = isempty(itr) ? true : all(isequal(first(itr)), itr)
+function allequal(itr)
+    if haslength(itr)
+        length(itr) <= 1 && return true
+    end
+    pl = Iterators.peel(itr)
+    isnothing(pl) && return true
+    a, rest = pl
+    return all(isequal(a), rest)
+end
 
 allequal(c::Union{AbstractSet,AbstractDict}) = length(c) <= 1
 
 allequal(r::AbstractRange) = iszero(step(r)) || length(r) <= 1
 
+allequal(f, xs) = allequal(Generator(f, xs))
+
+function allequal(f, xs::Tuple)
+    length(xs) <= 1 && return true
+    f1 = f(xs[1])
+    for x in tail(xs)
+        isequal(f1, f(x)) || return false
+    end
+    return true
+end
+
 filter!(f, s::Set) = unsafe_filter!(f, s)
 
 const hashs_seed = UInt === UInt64 ? 0x852ada37cfe8e0ce : 0xcfe8e0ce
diff --git a/base/shell.jl b/base/shell.jl
index 5bfd11fb46d29..e07fff128acfe 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -4,7 +4,19 @@
 
 const shell_special = "#{}()[]<>|&*?~;"
 
-# strips the end but respects the space when the string ends with "\\ "
+(@doc raw"""
+    rstrip_shell(s::AbstractString)
+
+Strip trailing whitespace from a shell command string, while respecting a trailing backslash followed by a space ("\\ ").
+
+```jldoctest
+julia> Base.rstrip_shell("echo 'Hello World' \\ ")
+"echo 'Hello World' \\ "
+
+julia> Base.rstrip_shell("echo 'Hello World'    ")
+"echo 'Hello World'"
+```
+"""
 function rstrip_shell(s::AbstractString)
     c_old = nothing
     for (i, c) in Iterators.reverse(pairs(s))
@@ -14,16 +26,15 @@ function rstrip_shell(s::AbstractString)
         c_old = c
     end
     SubString(s, 1, 0)
-end
+end)
 
 function shell_parse(str::AbstractString, interpolate::Bool=true;
                      special::AbstractString="", filename="none")
-    s = SubString(str, firstindex(str))
+    last_arg = firstindex(str) # N.B.: This is used by REPLCompletions
+    s = SubString(str, last_arg)
     s = rstrip_shell(lstrip(s))
 
-    # N.B.: This is used by REPLCompletions
-    last_parse = 0:-1
-    isempty(s) && return interpolate ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
+    isempty(s) && return interpolate ? (Expr(:tuple,:()), last_arg) : ([], last_arg)
 
     in_single_quotes = false
     in_double_quotes = false
@@ -32,6 +43,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     arg = []
     i = firstindex(s)
     st = Iterators.Stateful(pairs(s))
+    update_last_arg = false # true after spaces or interpolate
 
     function push_nonempty!(list, x)
         if !isa(x,AbstractString) || !isempty(x)
@@ -54,6 +66,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     for (j, c) in st
         j, c = j::Int, c::C
         if !in_single_quotes && !in_double_quotes && isspace(c)
+            update_last_arg = true
             i = consume_upto!(arg, s, i, j)
             append_2to1!(args, arg)
             while !isempty(st)
@@ -77,12 +90,17 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
                 # use parseatom instead of parse to respect filename (#28188)
                 ex, j = Meta.parseatom(s, stpos, filename=filename)
             end
-            last_parse = (stpos:prevind(s, j)) .+ s.offset
-            push_nonempty!(arg, ex)
+            last_arg = stpos + s.offset
+            update_last_arg = true
+            push!(arg, ex)
             s = SubString(s, j)
             Iterators.reset!(st, pairs(s))
             i = firstindex(s)
         else
+            if update_last_arg
+                last_arg = i + s.offset
+                update_last_arg = false
+            end
             if !in_double_quotes && c == '\''
                 in_single_quotes = !in_single_quotes
                 i = consume_upto!(arg, s, i, j)
@@ -124,16 +142,31 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     push_nonempty!(arg, s[i:end])
     append_2to1!(args, arg)
 
-    interpolate || return args, last_parse
+    interpolate || return args, last_arg
 
     # construct an expression
     ex = Expr(:tuple)
     for arg in args
         push!(ex.args, Expr(:tuple, arg...))
     end
-    return ex, last_parse
+    return ex, last_arg
 end
 
+"""
+    shell_split(command::AbstractString)
+
+Split a shell command string into its individual components.
+
+# Examples
+```jldoctest
+julia> Base.shell_split("git commit -m 'Initial commit'")
+4-element Vector{String}:
+ "git"
+ "commit"
+ "-m"
+ "Initial commit"
+```
+"""
 function shell_split(s::AbstractString)
     parsed = shell_parse(s, false)[1]
     args = String[]
@@ -186,9 +219,9 @@ print_shell_escaped(io::IO; special::String="") = nothing
 """
     shell_escape(args::Union{Cmd,AbstractString...}; special::AbstractString="")
 
-The unexported `shell_escape` function is the inverse of the unexported `shell_split` function:
+The unexported `shell_escape` function is the inverse of the unexported [`Base.shell_split()`](@ref) function:
 it takes a string or command object and escapes any special characters in such a way that calling
-`shell_split` on it would give back the array of words in the original command. The `special`
+[`Base.shell_split()`](@ref) on it would give back the array of words in the original command. The `special`
 keyword argument controls what characters in addition to whitespace, backslashes, quotes and
 dollar signs are considered to be special (default: none).
 
@@ -216,7 +249,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
         function isword(c::AbstractChar)
             if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                 # word characters
-            elseif c == '_' || c == '/' || c == '+' || c == '-'
+            elseif c == '_' || c == '/' || c == '+' || c == '-' || c == '.'
                 # other common characters
             elseif c == '\''
                 have_single = true
@@ -251,6 +284,8 @@ The unexported `shell_escape_posixly` function
 takes a string or command object and escapes any special characters in such a way that
 it is safe to pass it as an argument to a posix shell.
 
+See also: [`Base.shell_escape()`](@ref)
+
 # Examples
 ```jldoctest
 julia> Base.shell_escape_posixly("cat", "/foo/bar baz", "&&", "echo", "done")
@@ -283,7 +318,7 @@ a backslash.
 This function should also work for a POSIX shell, except if the input
 string contains a linefeed (`"\\n"`) character.
 
-See also: [`shell_escape_posixly`](@ref)
+See also: [`Base.shell_escape_posixly()`](@ref)
 """
 function shell_escape_csh(io::IO, args::AbstractString...)
     first = true
@@ -381,9 +416,9 @@ run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
 With an I/O stream parameter `io`, the result will be written there,
 rather than returned as a string.
 
-See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).
+See also [`Base.escape_microsoft_c_args()`](@ref), [`Base.shell_escape_posixly()`](@ref).
 
-# Example
+# Examples
 ```jldoctest
 julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
 "a^^\\"^o\\"^^u^\\""
@@ -435,7 +470,7 @@ It joins command-line arguments to be passed to a Windows
 C/C++/Julia application into a command line, escaping or quoting the
 meta characters space, TAB, double quote and backslash where needed.
 
-See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
+See also [`Base.shell_escape_wincmd()`](@ref), [`Base.escape_raw_string()`](@ref).
 """
 function escape_microsoft_c_args(io::IO, args::AbstractString...)
     # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
diff --git a/base/show.jl b/base/show.jl
index 45d6a502619db..de45ca07e3131 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core.Compiler: has_typevar
+using .Compiler: has_typevar
 
 function show(io::IO, ::MIME"text/plain", u::UndefInitializer)
     show(io, u)
@@ -23,8 +23,17 @@ function show(io::IO, ::MIME"text/plain", r::LinRange)
     print_range(io, r)
 end
 
-function _isself(@nospecialize(ft))
-    name = ft.name.mt.name
+function show(io::IO, ::MIME"text/plain", r::LogRange)  # display LogRange like LinRange
+    isempty(r) && return show(io, r)
+    summary(io, r)
+    println(io, ":")
+    print_range(io, r, " ", ", ", "", " \u2026 ")
+end
+
+function _isself(ft::DataType)
+    ftname = ft.name
+    isdefined(ftname, :mt) || return false
+    name = ftname.mt.name
     mod = parentmodule(ft)  # NOTE: not necessarily the same as ft.name.mt.module
     return isdefined(mod, name) && ft == typeof(getfield(mod, name))
 end
@@ -63,13 +72,13 @@ ncodeunits(c::ANSIDelimiter) = ncodeunits(c.del)
 textwidth(::ANSIDelimiter) = 0
 
 # An iterator similar to `pairs(::String)` but whose values are Char or ANSIDelimiter
-struct ANSIIterator
-    captures::RegexMatchIterator
+struct ANSIIterator{S}
+    captures::RegexMatchIterator{S}
 end
 ANSIIterator(s::AbstractString) = ANSIIterator(eachmatch(ansi_regex, s))
 
-IteratorSize(::Type{ANSIIterator}) = SizeUnknown()
-eltype(::Type{ANSIIterator}) = Pair{Int, Union{Char,ANSIDelimiter}}
+IteratorSize(::Type{<:ANSIIterator}) = SizeUnknown()
+eltype(::Type{<:ANSIIterator}) = Pair{Int, Union{Char,ANSIDelimiter}}
 function iterate(I::ANSIIterator, (i, m_st)=(1, iterate(I.captures)))
     m_st === nothing && return nothing
     m, (j, new_m_st) = m_st
@@ -78,7 +87,7 @@ function iterate(I::ANSIIterator, (i, m_st)=(1, iterate(I.captures)))
 end
 textwidth(I::ANSIIterator) = mapreduce(textwidth∘last, +, I; init=0)
 
-function _truncate_at_width_or_chars(ignore_ANSI::Bool, str, width, rpad=false, chars="\r\n", truncmark="…")
+function _truncate_at_width_or_chars(ignore_ANSI::Bool, str::AbstractString, width::Int, rpad::Bool=false, chars="\r\n", truncmark="…")
     truncwidth = textwidth(truncmark)
     (width <= 0 || width < truncwidth) && return ""
     wid = truncidx = lastidx = 0
@@ -143,7 +152,7 @@ function show(io::IO, ::MIME"text/plain", iter::Union{KeySet,ValueIterator})
 end
 
 function show(io::IO, ::MIME"text/plain", t::AbstractDict{K,V}) where {K,V}
-    isempty(t) && return show(io, t)
+    (isempty(t) || !haslength(t)) && return show(io, t)
     # show more descriptively, with one line per key/value pair
     recur_io = IOContext(io, :SHOWN_SET => t)
     limit = get(io, :limit, false)::Bool
@@ -292,35 +301,51 @@ struct IOContext{IO_t <: IO} <: AbstractPipe
     dict::ImmutableDict{Symbol, Any}
 
     function IOContext{IO_t}(io::IO_t, dict::ImmutableDict{Symbol, Any}) where IO_t<:IO
-        @assert !(IO_t <: IOContext) "Cannot create `IOContext` from another `IOContext`."
+        io isa IOContext && (io = io.io) # implicitly unwrap, since the io.dict field is not useful anymore, and could confuse pipe_reader consumers
         return new(io, dict)
     end
 end
 
-# (Note that TTY and TTYTerminal io types have a :color property.)
-unwrapcontext(io::IO) = io, get(io,:color,false) ? ImmutableDict{Symbol,Any}(:color, true) : ImmutableDict{Symbol,Any}()
-unwrapcontext(io::IOContext) = io.io, io.dict
+# (Note that TTY and TTYTerminal io types have an implied :color property.)
+ioproperties(io::IO) = get(io, :color, false) ? ImmutableDict{Symbol,Any}(:color, true) : ImmutableDict{Symbol,Any}()
+ioproperties(io::IOContext) = io.dict
+# these can probably be deprecated, but there is a use in the ecosystem for them
+unwrapcontext(io::IO) = (io,)
+unwrapcontext(io::IOContext) = (io.io,)
 
-function IOContext(io::IO, dict::ImmutableDict)
-    io0 = unwrapcontext(io)[1]
-    IOContext{typeof(io0)}(io0, dict)
+function IOContext(io::IO, dict::ImmutableDict{Symbol, Any})
+    return IOContext{typeof(io)}(io, dict)
+end
+
+function IOContext(io::IOContext, dict::ImmutableDict{Symbol, Any})
+    return typeof(io)(io.io, dict)
 end
 
-convert(::Type{IOContext}, io::IO) = IOContext(unwrapcontext(io)...)::IOContext
+
+convert(::Type{IOContext}, io::IOContext) = io
+convert(::Type{IOContext}, io::IO) = IOContext(io, ioproperties(io))::IOContext
+convert(::Type{IOContext{IO_t}}, io::IOContext{IO_t}) where {IO_t} = io
+convert(::Type{IOContext{IO_t}}, io::IO) where {IO_t} = IOContext{IO_t}(io, ioproperties(io))::IOContext{IO_t}
 
 IOContext(io::IO) = convert(IOContext, io)
+IOContext{IO_t}(io::IO) where {IO_t} = convert(IOContext{IO_t}, io)
 
 function IOContext(io::IO, KV::Pair)
-    io0, d = unwrapcontext(io)
-    IOContext(io0, ImmutableDict{Symbol,Any}(d, KV[1], KV[2]))
+    d = ioproperties(io)
+    return IOContext(io, ImmutableDict{Symbol,Any}(d, KV[1], KV[2]))
 end
 
 """
     IOContext(io::IO, context::IOContext)
 
 Create an `IOContext` that wraps an alternate `IO` but inherits the properties of `context`.
+
+!!! note
+    Unless explicitly set in the wrapped `io` the `displaysize` of `io` will not be inherited.
+    This is because by default `displaysize` is not a property of IO objects themselves, but lazily inferred,
+    as the size of the terminal window can change during the lifetime of the IO object.
 """
-IOContext(io::IO, context::IO) = IOContext(unwrapcontext(io)[1], unwrapcontext(context)[2])
+IOContext(io::IO, context::IO) = IOContext(io, ioproperties(context))
 
 """
     IOContext(io::IO, KV::Pair...)
@@ -418,7 +443,7 @@ function show_circular(io::IOContext, @nospecialize(x))
     for (k, v) in io.dict
         if k === :SHOWN_SET
             if v === x
-                print(io, "#= circular reference @-$d =#")
+                printstyled(io, "#= circular reference @-$d =#"; color = :yellow)
                 return true
             end
             d += 1
@@ -497,24 +522,16 @@ function _show_default(io::IO, @nospecialize(x))
 end
 
 function active_module()
-    isassigned(REPL_MODULE_REF) || return Main
-    REPL = REPL_MODULE_REF[]
-    return invokelatest(REPL.active_module)::Module
+    if ccall(:jl_is_in_pure_context, Bool, ())
+        error("active_module() should not be called from a pure context")
+    end
+    if !@isdefined(active_repl) || active_repl === nothing
+        return Main
+    end
+    return invokelatest(active_module, active_repl)::Module
 end
 
-# Check if a particular symbol is exported from a standard library module
-function is_exported_from_stdlib(name::Symbol, mod::Module)
-    !isdefined(mod, name) && return false
-    orig = getfield(mod, name)
-    while !(mod === Base || mod === Core)
-        activemod = active_module()
-        parent = parentmodule(mod)
-        if mod === activemod || mod === parent || parent === activemod
-            return false
-        end
-        mod = parent
-    end
-    return isexported(mod, name) && isdefined(mod, name) && !isdeprecated(mod, name) && getfield(mod, name) === orig
+module UsesCoreAndBaseOnly
 end
 
 function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
@@ -527,13 +544,13 @@ function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
         print(io, mt.name)
     elseif isdefined(mt, :module) && isdefined(mt.module, mt.name) &&
         getfield(mt.module, mt.name) === f
-        mod = active_module()
-        if is_exported_from_stdlib(mt.name, mt.module) || mt.module === mod
-            show_sym(io, mt.name)
-        else
+        # this used to call the removed internal function `is_exported_from_stdlib`, which effectively
+        # just checked for exports from Core and Base.
+        mod = get(io, :module, UsesCoreAndBaseOnly)
+        if !(isvisible(mt.name, mt.module, mod) || mt.module === mod)
             print(io, mt.module, ".")
-            show_sym(io, mt.name)
         end
+        show_sym(io, mt.name)
     else
         fallback(io, f)
     end
@@ -664,7 +681,7 @@ function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVecto
         has_typevar(v.lb, p) && return false
         has_typevar(v.ub, p) && return false
     end
-    for i = 1:length(env)
+    for i = eachindex(env)
         i == skip && continue
         has_typevar(env[i], p) && return false
     end
@@ -720,9 +737,9 @@ end
 function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
-        # IOContext. If :module is not set, default to Main (or current active module).
+        # IOContext. If :module is not set, default to Main.
         # nothing can be used to force printing prefix.
-        from = get(io, :module, active_module())
+        from = get(io, :module, Main)
         if (from === nothing || !isvisible(name.name, name.mod, from))
             show(io, name.mod)
             print(io, ".")
@@ -1023,6 +1040,21 @@ function is_global_function(tn::Core.TypeName, globname::Union{Symbol,Nothing})
     return false
 end
 
+function check_world_bounded(tn::Core.TypeName)
+    bnd = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), tn.module, tn.name, true)
+    isdefined(bnd, :partitions) || return nothing
+    partition = @atomic bnd.partitions
+    while true
+        if is_defined_const_binding(binding_kind(partition)) && partition_restriction(partition) <: tn.wrapper
+            max_world = @atomic partition.max_world
+            max_world == typemax(UInt) && return nothing
+            return Int(partition.min_world):Int(max_world)
+        end
+        isdefined(partition, :next) || return nothing
+        partition = @atomic partition.next
+    end
+end
+
 function show_type_name(io::IO, tn::Core.TypeName)
     if tn === UnionAll.name
         # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type.
@@ -1034,12 +1066,14 @@ function show_type_name(io::IO, tn::Core.TypeName)
     sym = (globfunc ? globname : tn.name)::Symbol
     globfunc && print(io, "typeof(")
     quo = false
+    world = check_world_bounded(tn)
+    world !== nothing && print(io, "@world(")
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless type is visible from module passed to
-        # IOContext If :module is not set, default to Main (or current active module).
+        # IOContext If :module is not set, default to Main.
         # nothing can be used to force printing prefix
-        from = get(io, :module, active_module())
-        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from))
+        from = get(io, :module, Main)
+        if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from::Module))
             show(io, tn.module)
             print(io, ".")
             if globfunc && !is_id_start_char(first(string(sym)))
@@ -1052,6 +1086,7 @@ function show_type_name(io::IO, tn::Core.TypeName)
         end
     end
     show_sym(io, sym)
+    world !== nothing && print(io, ", ", world, ")")
     quo      && print(io, ")")
     globfunc && print(io, ")")
     nothing
@@ -1082,29 +1117,68 @@ function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
 
     # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg`
     if istuple
+        if n == 0
+            print(io, "Tuple{}")
+            return
+        end
+
+        # find the length of the homogeneous tail
         max_n = 3
         taillen = 1
-        for i in (n-1):-1:1
-            if parameters[i] === parameters[n]
-                taillen += 1
+        pn = parameters[n]
+        fulln = n
+        vakind = :none
+        vaN = 0
+        if pn isa Core.TypeofVararg
+            if isdefined(pn, :N)
+                vaN = pn.N
+                if vaN isa Int
+                    taillen = vaN
+                    fulln += taillen - 1
+                    vakind = :fixed
+                else
+                    vakind = :bound
+                end
             else
-                break
+                vakind = :unbound
+            end
+            pn = unwrapva(pn)
+        end
+        if !(pn isa TypeVar || pn isa Type)
+            # prefer Tuple over NTuple if it contains something other than types
+            # (e.g. if the user has switched the N and T accidentally)
+            taillen = 0
+        elseif vakind === :none || vakind === :fixed
+            for i in (n-1):-1:1
+                if parameters[i] === pn
+                    taillen += 1
+                else
+                    break
+                end
             end
         end
-        if n == taillen > max_n
-            print(io, "NTuple{", n, ", ")
-            show(io, parameters[1])
+
+        # prefer NTuple over Tuple if it is a Vararg without a fixed length
+        # and prefer Tuple for short lists of elements
+        if (vakind == :bound && n == 1 == taillen) || (vakind === :fixed && taillen == fulln > max_n) ||
+           (vakind === :none && taillen == fulln > max_n)
+            print(io, "NTuple{")
+            vakind === :bound ? show(io, vaN) : print(io, fulln)
+            print(io, ", ")
+            show(io, pn)
             print(io, "}")
         else
             print(io, "Tuple{")
-            for i = 1:(taillen > max_n ? n-taillen : n)
+            headlen = (taillen > max_n ? fulln - taillen : fulln)
+            for i = 1:headlen
                 i > 1 && print(io, ", ")
-                show(io, parameters[i])
+                show(io, vakind === :fixed && i >= n ? pn : parameters[i])
             end
-            if taillen > max_n
-                print(io, ", Vararg{")
-                show(io, parameters[n])
-                print(io, ", ", taillen, "}")
+            if headlen < fulln
+                headlen > 0 && print(io, ", ")
+                print(io, "Vararg{")
+                show(io, pn)
+                print(io, ", ", fulln - headlen, "}")
             end
             print(io, "}")
         end
@@ -1132,11 +1206,11 @@ end
 
 function show_at_namedtuple(io::IO, syms::Tuple, types::DataType)
     first = true
-    for i in 1:length(syms)
+    for i in eachindex(syms)
         if !first
             print(io, ", ")
         end
-        print(io, syms[i])
+        show_sym(io, syms[i])
         typ = types.parameters[i]
         if typ !== Any
             print(io, "::")
@@ -1190,14 +1264,13 @@ function show(io::IO, tn::Core.TypeName)
     print(io, ")")
 end
 
+nonnothing_nonmissing_typeinfo(io::IO) = nonmissingtype(nonnothingtype(get(io, :typeinfo, Any)))
+show(io::IO, b::Bool) = print(io, nonnothing_nonmissing_typeinfo(io) === Bool ? (b ? "1" : "0") : (b ? "true" : "false"))
 show(io::IO, ::Nothing) = print(io, "nothing")
-show(io::IO, b::Bool) = print(io, get(io, :typeinfo, Any) === Bool ? (b ? "1" : "0") : (b ? "true" : "false"))
 show(io::IO, n::Signed) = (write(io, string(n)); nothing)
 show(io::IO, n::Unsigned) = print(io, "0x", string(n, pad = sizeof(n)<<1, base = 16))
 print(io::IO, n::Unsigned) = print(io, string(n))
 
-show(io::IO, p::Ptr) = print(io, typeof(p), " @0x$(string(UInt(p), base = 16, pad = Sys.WORD_SIZE>>2))")
-
 has_tight_type(p::Pair) =
     typeof(p.first)  == typeof(p).parameters[1] &&
     typeof(p.second) == typeof(p).parameters[2]
@@ -1277,17 +1350,27 @@ function sourceinfo_slotnames(slotnames::Vector{Symbol})
     return printnames
 end
 
-show(io::IO, l::Core.MethodInstance) = show_mi(io, l)
+show(io::IO, mi::Core.MethodInstance) = show_mi(io, mi)
+function show(io::IO, codeinst::Core.CodeInstance)
+    print(io, "CodeInstance for ")
+    def = codeinst.def
+    if isa(def, Core.ABIOverride)
+        show_mi(io, def.def)
+        print(io, " (ABI Overridden)")
+    else
+        show_mi(io, def::MethodInstance)
+    end
+end
 
-function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
-    def = l.def
+function show_mi(io::IO, mi::Core.MethodInstance, from_stackframe::Bool=false)
+    def = mi.def
     if isa(def, Method)
-        if isdefined(def, :generator) && l === def.generator
+        if isdefined(def, :generator) && mi === def.generator
             print(io, "MethodInstance generator for ")
             show(io, def)
         else
             print(io, "MethodInstance for ")
-            show_tuple_as_call(io, def.name, l.specTypes; qualified=true)
+            show_tuple_as_call(io, def.name, mi.specTypes; qualified=true)
         end
     else
         print(io, "Toplevel MethodInstance thunk")
@@ -1295,41 +1378,19 @@ function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
         # MethodInstance is part of a stacktrace, it gets location info
         # added by other means.  But if it isn't, then we should try
         # to print a little more identifying information.
-        if !from_stackframe
-            linetable = l.uninferred.linetable
-            line = isempty(linetable) ? "unknown" : (lt = linetable[1]::Union{LineNumberNode,Core.LineInfoNode}; string(lt.file, ':', lt.line))
-            print(io, " from ", def, " starting at ", line)
-        end
-    end
-end
-
-# These sometimes show up as Const-values in InferenceFrameInfo signatures
-show(io::IO, r::Core.Compiler.UnitRange) = show(io, r.start : r.stop)
-show(io::IO, mime::MIME{Symbol("text/plain")}, r::Core.Compiler.UnitRange) = show(io, mime, r.start : r.stop)
-
-function show(io::IO, mi_info::Core.Compiler.Timings.InferenceFrameInfo)
-    mi = mi_info.mi
-    def = mi.def
-    if isa(def, Method)
-        if isdefined(def, :generator) && mi === def.generator
-            print(io, "InferenceFrameInfo generator for ")
-            show(io, def)
-        else
-            print(io, "InferenceFrameInfo for ")
-            argnames = [isa(a, Core.Const) ? (isa(a.val, Type) ? "" : a.val) : "" for a in mi_info.slottypes[1:mi_info.nargs]]
-            show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
+        if !from_stackframe && isdefined(mi, :cache)
+            ci = mi.cache
+            if ci.owner === :uninferred
+                di = ci.inferred.debuginfo
+                file, line = IRShow.debuginfo_firstline(di)
+                file = string(file)
+                line = isempty(file) || line < 0 ? "<unknown>" : "$file:$line"
+                print(io, " from ", def, " starting at ", line)
+            end
         end
-    else
-        linetable = mi.uninferred.linetable
-        line = isempty(linetable) ? "" : (lt = linetable[1]; string(lt.file, ':', lt.line))
-        print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
     end
 end
 
-function show(io::IO, tinf::Core.Compiler.Timings.Timing)
-    print(io, "Core.Compiler.Timings.Timing(", tinf.mi_info, ") with ", length(tinf.children), " children")
-end
-
 function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, delim, cl,
                           delim_one, i1=first(LinearIndices(itr)), l=last(LinearIndices(itr)))
     print(io, op)
@@ -1345,11 +1406,11 @@ function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, de
                     x = itr[i]
                     show(recur_io, x)
                 end
-                i += 1
-                if i > l
+                if i == l
                     delim_one && first && print(io, delim)
                     break
                 end
+                i += 1
                 first = false
                 print(io, delim)
                 print(io, ' ')
@@ -1422,9 +1483,7 @@ show(io::IO, s::Symbol) = show_unquoted_quote_expr(io, s, 0, 0, 0)
 #   eval(Meta.parse("Set{Int64}([2,3,1])")) # ==> An actual set
 # While this isn’t true of ALL show methods, it is of all ASTs.
 
-using Core.Compiler: TypedSlot, UnoptSlot
-
-const ExprNode = Union{Expr, QuoteNode, UnoptSlot, LineNumberNode, SSAValue,
+const ExprNode = Union{Expr, QuoteNode, SlotNumber, LineNumberNode, SSAValue,
                        GotoNode, GotoIfNot, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
                        ReturnNode}
 # Operators have precedence levels from 1-N, and show_unquoted defaults to a
@@ -1744,7 +1803,7 @@ function show_sym(io::IO, sym::Symbol; allow_macroname=false)
         print(io, '@')
         show_sym(io, Symbol(sym_str[2:end]))
     else
-        print(io, "var", repr(string(sym)))
+        print(io, "var", repr(string(sym))) # TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
     end
 end
 
@@ -1775,19 +1834,14 @@ function show_globalref(io::IO, ex::GlobalRef; allow_macroname=false)
     nothing
 end
 
-function show_unquoted(io::IO, ex::UnoptSlot, ::Int, ::Int)
-    typ = isa(ex, TypedSlot) ? ex.typ : Any
+function show_unquoted(io::IO, ex::SlotNumber, ::Int, ::Int)
     slotid = ex.id
     slotnames = get(io, :SOURCE_SLOTNAMES, false)
-    if (isa(slotnames, Vector{String}) &&
-        slotid <= length(slotnames::Vector{String}))
-        print(io, (slotnames::Vector{String})[slotid])
+    if isa(slotnames, Vector{String}) && slotid ≤ length(slotnames)
+        print(io, slotnames[slotid])
     else
         print(io, "_", slotid)
     end
-    if typ !== Any && isa(ex, TypedSlot)
-        print(io, "::", typ)
-    end
 end
 
 function show_unquoted(io::IO, ex::QuoteNode, indent::Int, prec::Int)
@@ -2128,7 +2182,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # comparison (i.e. "x < y < z")
     elseif head === :comparison && nargs >= 3 && (nargs&1==1)
-        comp_prec = minimum(operator_precedence, args[2:2:end])
+        comp_prec = minimum(operator_precedence, args[2:2:end]; init=typemax(Int))
         if comp_prec <= prec
             show_enclosed_list(io, '(', args, " ", ')', indent, comp_prec, quote_level)
         else
@@ -2149,8 +2203,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :do && nargs == 2
         iob = IOContext(io, beginsym=>false)
         show_unquoted(iob, args[1], indent, -1, quote_level)
-        print(io, " do ")
-        show_list(iob, (((args[2]::Expr).args[1])::Expr).args, ", ", 0, 0, quote_level)
+        print(io, " do")
+        do_args = (((args[2]::Expr).args[1])::Expr).args
+        if !isempty(do_args)
+            print(io, ' ')
+            show_list(iob, do_args, ", ", 0, 0, quote_level)
+        end
         for stmt in (((args[2]::Expr).args[2])::Expr).args
             print(io, '\n', " "^(indent + indent_width))
             show_unquoted(iob, stmt, indent + indent_width, -1, quote_level)
@@ -2230,7 +2288,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         print(io, head, ' ')
         show_list(io, args, ", ", indent, 0, quote_level)
 
-    elseif head === :export
+    elseif head in (:export, :public)
         print(io, head, ' ')
         show_list(io, mapany(allow_macroname, args), ", ", indent)
 
@@ -2327,7 +2385,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         if get(io, beginsym, false)
             print(io, '(')
             ind = indent + indent_width
-            for i = 1:length(ex.args)
+            for i = eachindex(ex.args)
                 if i > 1
                     # if there was only a comment before the first semicolon, the expression would get parsed as a NamedTuple
                     if !(i == 2 && ex.args[1] isa LineNumberNode)
@@ -2450,6 +2508,11 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :meta && nargs == 2 && args[1] === :pop_loc
         print(io, "# meta: pop locations ($(args[2]::Int))")
     # print anything else as "Expr(head, args...)"
+    elseif head === :toplevel
+        # Reset SOURCE_SLOTNAMES. Raw SlotNumbers are not valid in Expr(:toplevel), but
+        # we want to show bad ASTs reasonably to make errors understandable.
+        lambda_io = IOContext(io, :SOURCE_SLOTNAMES => false)
+        show_unquoted_expr_fallback(lambda_io, ex, indent, quote_level)
     else
         unhandled = true
     end
@@ -2475,7 +2538,7 @@ function show_signature_function(io::IO, @nospecialize(ft), demangle=false, farg
     uw = unwrap_unionall(ft)
     if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) && _isself(uw)
         uwmod = parentmodule(uw)
-        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uwmod) && uwmod !== Main
+        if qualified && !isexported(uwmod, uw.name.mt.name) && uwmod !== Main
             print_within_stacktrace(io, uwmod, '.', bold=true)
         end
         s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
@@ -2513,7 +2576,8 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type;
         return
     end
     tv = Any[]
-    io = IOContext(IOBuffer(), out)
+    buf = IOBuffer()
+    io = IOContext(buf, out)
     env_io = io
     while isa(sig, UnionAll)
         push!(tv, sig.var)
@@ -2556,20 +2620,23 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type;
     end
     print_within_stacktrace(io, ")", bold=true)
     show_method_params(io, tv)
-    str = String(take!(unwrapcontext(io)[1]))
-    if get(out, :limit, false)::Bool
-        sz = get(out, :displaysize, (typemax(Int), typemax(Int)))::Tuple{Int, Int}
+    str = String(take!(buf))
+    str = type_limited_string_from_context(out, str)
+    print(out, str)
+    nothing
+end
+
+function type_limited_string_from_context(out::IO, str::String)
+    typelimitflag = get(out, :stacktrace_types_limited, nothing)
+    if typelimitflag isa RefValue{Bool}
+        sz = get(out, :displaysize, Base.displaysize_(out))::Tuple{Int, Int}
         str_lim = type_depth_limit(str, max(sz[2], 120))
         if sizeof(str_lim) < sizeof(str)
-            typelimitflag = get(out, :stacktrace_types_limited, nothing)
-            if typelimitflag !== nothing
-                typelimitflag[] = true
-            end
+            typelimitflag[] = true
         end
         str = str_lim
     end
-    print(out, str)
-    nothing
+    return str
 end
 
 # limit nesting depth of `{ }` until string textwidth is less than `n`
@@ -2760,32 +2827,8 @@ function show(io::IO, vm::Core.TypeofVararg)
     end
 end
 
-module IRShow
-    const Compiler = Core.Compiler
-    using Core.IR
-    import ..Base
-    import .Compiler: IRCode, TypedSlot, CFG, scan_ssa_use!,
-        isexpr, compute_basic_blocks, block_for_inst, IncrementalCompact,
-        Effects, ALWAYS_TRUE, ALWAYS_FALSE
-    Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
-    Base.size(r::Compiler.StmtRange) = Compiler.size(r)
-    Base.first(r::Compiler.StmtRange) = Compiler.first(r)
-    Base.last(r::Compiler.StmtRange) = Compiler.last(r)
-    Base.length(is::Compiler.InstructionStream) = Compiler.length(is)
-    Base.iterate(is::Compiler.InstructionStream, st::Int=1) = (st <= Compiler.length(is)) ? (is[st], st + 1) : nothing
-    Base.getindex(is::Compiler.InstructionStream, idx::Int) = Compiler.getindex(is, idx)
-    Base.getindex(node::Compiler.Instruction, fld::Symbol) = Compiler.getindex(node, fld)
-    include("compiler/ssair/show.jl")
-
-    const __debuginfo = Dict{Symbol, Any}(
-        # :full => src -> Base.IRShow.statementidx_lineinfo_printer(src), # and add variable slot information
-        :source => src -> Base.IRShow.statementidx_lineinfo_printer(src),
-        # :oneliner => src -> Base.IRShow.statementidx_lineinfo_printer(Base.IRShow.PartialLineInfoPrinter, src),
-        :none => src -> Base.IRShow.lineinfo_disabled,
-        )
-    const default_debuginfo = Ref{Symbol}(:none)
-    debuginfo(sym) = sym === :default ? default_debuginfo[] : sym
-end
+Compiler.load_irshow!()
+const IRShow = Compiler.IRShow # an alias for compatibility
 
 function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
     # Fix slot names and types in function body
@@ -2794,41 +2837,73 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
     if src.slotnames !== nothing
         lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => sourceinfo_slotnames(src))
     end
-    if isempty(src.linetable) || src.linetable[1] isa LineInfoNode
-        println(io)
-        # TODO: static parameter values?
-        # only accepts :source or :none, we can't have a fallback for default since
-        # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
-        IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
-    else
-        # this is a CodeInfo that has not been used as a method yet, so its locations are still LineNumberNodes
-        body = Expr(:block)
-        body.args = src.code
-        show(lambda_io, body)
-    end
+    println(io)
+    # TODO: static parameter values?
+    # only accepts :source or :none, we can't have a fallback for default since
+    # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
+    IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
     print(io, ")")
 end
 
-function show(io::IO, inferred::Core.Compiler.InferenceResult)
-    mi = inferred.linfo
-    tt = mi.specTypes.parameters[2:end]
-    tts = join(["::$(t)" for t in tt], ", ")
-    rettype = inferred.result
-    if isa(rettype, Core.Compiler.InferenceState)
-        rettype = rettype.bestguess
+show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io, Core.SlotNumber(val.n), indent, prec)
+
+show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
+function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
+    args = String[let
+        e = stmt.edges[i]
+        v = !isassigned(stmt.values, i) ? "#undef" :
+            sprint(; context=io) do io′
+                show_unquoted(io′, stmt.values[i], indent)
+            end
+        "$prefix$e => $v"
+        end for i in 1:length(stmt.edges)
+    ]
+    print(io, "φ ", '(')
+    join(io, args, ", ")
+    print(io, ')')
+end
+
+function show_unquoted(io::IO, stmt::PhiCNode, indent::Int, ::Int)
+    print(io, "φᶜ (")
+    first = true
+    for v in stmt.values
+        first ? (first = false) : print(io, ", ")
+        show_unquoted(io, v, indent)
     end
-    if isa(mi.def, Method)
-        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::PiNode, indent::Int, ::Int)
+    print(io, "π (")
+    show_unquoted(io, stmt.val, indent)
+    print(io, ", ")
+    printstyled(io, stmt.typ, color=:cyan)
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::UpsilonNode, indent::Int, ::Int)
+    print(io, "ϒ (")
+    isdefined(stmt, :val) ?
+        show_unquoted(io, stmt.val, indent) :
+        print(io, "#undef")
+    print(io, ")")
+end
+
+function show_unquoted(io::IO, stmt::ReturnNode, indent::Int, ::Int)
+    if !isdefined(stmt, :val)
+        print(io, "unreachable")
     else
-        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+        print(io, "return ")
+        show_unquoted(io, stmt.val, indent)
     end
 end
 
-function show(io::IO, ::Core.Compiler.NativeInterpreter)
-    print(io, "Core.Compiler.NativeInterpreter(...)")
+show_unquoted(io::IO, stmt::GotoIfNot, indent::Int, ::Int) = show_unquoted_gotoifnot(io, stmt, indent, "%")
+function show_unquoted_gotoifnot(io::IO, stmt::GotoIfNot, indent::Int, prefix::String)
+    print(io, "goto ", prefix, stmt.dest, " if not ")
+    show_unquoted(io, stmt.cond, indent)
 end
 
-
 function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     if isempty(x)
         print(io, "empty SimpleVector")
@@ -2836,7 +2911,7 @@ function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     end
     print(io, "SimpleVector")
     if n > 0
-        for i = 1:length(x)
+        for i in eachindex(x)
             println(io)
             print(io, indent, "  ", i, ": ")
             if isassigned(x,i)
@@ -2927,6 +3002,13 @@ end
 
 # Types
 function dump(io::IOContext, x::DataType, n::Int, indent)
+    # For some reason, tuples are structs
+    is_struct = isstructtype(x) && !(x <: Tuple)
+    is_mut = is_struct && ismutabletype(x)
+    is_mut && print(io, "mutable ")
+    is_struct && print(io, "struct ")
+    isprimitivetype(x) && print(io, "primitive type ")
+    isabstracttype(x) && print(io, "abstract type ")
     print(io, x)
     if x !== Any
         print(io, " <: ", supertype(x))
@@ -2946,10 +3028,15 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
         end
         fields = fieldnames(x)
         fieldtypes = datatype_fieldtypes(x)
-        for idx in 1:length(fields)
+        for idx in eachindex(fields)
             println(io)
-            print(io, indent, "  ", fields[idx], "::")
-            print(tvar_io, fieldtypes[idx])
+            print(io, indent, "  ")
+            is_mut && isconst(x, idx) && print(io, "const ")
+            print(io, fields[idx])
+            if isassigned(fieldtypes, idx)
+                print(io, "::")
+                print(tvar_io, fieldtypes[idx])
+            end
         end
     end
     nothing
@@ -3063,7 +3150,7 @@ Print to a stream `io`, or return a string `str`, giving a brief description of
 a value. By default returns `string(typeof(x))`, e.g. [`Int64`](@ref).
 
 For arrays, returns a string of size and type info,
-e.g. `10-element Array{Int64,1}`.
+e.g. `10-element Vector{Int64}` or `9×4×5 Array{Float64, 3}`.
 
 # Examples
 ```jldoctest
@@ -3119,7 +3206,7 @@ representing argument `x` in terms of its type. (The double-colon is
 omitted if `toplevel=true`.) However, you can
 specialize this function for specific types to customize printing.
 
-# Example
+# Examples
 
 A SubArray created as `view(a, :, 3, 2:5)`, where `a` is a
 3-dimensional Float64 array, has type
@@ -3178,7 +3265,9 @@ showindices(io) = nothing
 function showarg(io::IO, r::ReshapedArray, toplevel)
     print(io, "reshape(")
     showarg(io, parent(r), false)
-    print(io, ", ", join(r.dims, ", "))
+    if !isempty(r.dims)
+        print(io, ", ", join(r.dims, ", "))
+    end
     print(io, ')')
     toplevel && print(io, " with eltype ", eltype(r))
     return nothing
@@ -3255,10 +3344,90 @@ bitstring(B::BitArray) = sprint(bitshow, B)
 function show(io::IO, oc::Core.OpaqueClosure)
     A, R = typeof(oc).parameters
     show_tuple_as_call(io, Symbol(""), A; hasfirst=false)
-    print(io, "::", R)
     print(io, "->◌")
+    print(io, "::", R)
 end
 
 function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R}
     show(io, oc)
 end
+
+# printing bindings and partitions
+function print_partition(io::IO, partition::Core.BindingPartition)
+    print(io, partition.min_world)
+    print(io, ":")
+    max_world = @atomic partition.max_world
+    if max_world == typemax(UInt)
+        print(io, '∞')
+    else
+        print(io, max_world)
+    end
+    print(io, " - ")
+    kind = binding_kind(partition)
+    if is_defined_const_binding(kind)
+        print(io, "constant binding to ")
+        print(io, partition_restriction(partition))
+    elseif kind == BINDING_KIND_UNDEF_CONST
+        print(io, "undefined const binding")
+    elseif kind == BINDING_KIND_GUARD
+        print(io, "undefined binding - guard entry")
+    elseif kind == BINDING_KIND_FAILED
+        print(io, "ambiguous binding - guard entry")
+    elseif kind == BINDING_KIND_DECLARED
+        print(io, "undefined, but declared using `global` - guard entry")
+    elseif kind == BINDING_KIND_IMPLICIT
+        print(io, "implicit `using` from ")
+        print(io, partition_restriction(partition))
+    elseif kind == BINDING_KIND_EXPLICIT
+        print(io, "explicit `using` from ")
+        print(io, partition_restriction(partition))
+    elseif kind == BINDING_KIND_IMPORTED
+        print(io, "explicit `import` from ")
+        print(io, partition_restriction(partition))
+    else
+        @assert kind == BINDING_KIND_GLOBAL
+        print(io, "global variable with type ")
+        print(io, partition_restriction(partition))
+    end
+end
+
+function show(io::IO, ::MIME"text/plain", partition::Core.BindingPartition)
+    print(io, "BindingPartition ")
+    print_partition(io, partition)
+end
+
+function show(io::IO, ::MIME"text/plain", bnd::Core.Binding)
+    print(io, "Binding ")
+    print(io, bnd.globalref)
+    if !isdefined(bnd, :partitions)
+        print(io, "No partitions")
+    else
+        partition = @atomic bnd.partitions
+        while true
+            println(io)
+            print(io, "   ")
+            print_partition(io, partition)
+            isdefined(partition, :next) || break
+            partition = @atomic partition.next
+        end
+    end
+end
+
+# Special pretty printing for EvalInto/IncludeInto
+function show(io::IO, ii::IncludeInto)
+    if getglobal(ii.m, :include) === ii
+        print(io, ii.m)
+        print(io, ".include")
+    else
+        show_default(io, ii)
+    end
+end
+
+function show(io::IO, ei::Core.EvalInto)
+    if getglobal(ei.m, :eval) === ei
+        print(io, ei.m)
+        print(io, ".eval")
+    else
+        show_default(io, ei)
+    end
+end
diff --git a/base/slicearray.jl b/base/slicearray.jl
index e5a433cdb8d2a..215cf13f9651e 100644
--- a/base/slicearray.jl
+++ b/base/slicearray.jl
@@ -78,7 +78,7 @@ end
     eachslice(A::AbstractArray; dims, drop=true)
 
 Create a [`Slices`](@ref) object that is an array of slices over dimensions `dims` of `A`, returning
-views that select all the data from the other dimensions in `A`. `dims` can either by an
+views that select all the data from the other dimensions in `A`. `dims` can either be an
 integer or a tuple of integers.
 
 If `drop = true` (the default), the outer `Slices` will drop the inner dimensions, and
@@ -96,7 +96,7 @@ See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref) and [`selectd
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator, and only a single dimension `dims` was supported.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> m = [1 2 3; 4 5 6; 7 8 9]
@@ -144,7 +144,7 @@ See also [`eachcol`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> a = [1 2; 3 4]
@@ -182,7 +182,7 @@ See also [`eachrow`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 !!! compat "Julia 1.9"
      Prior to Julia 1.9, this returned an iterator.
 
-# Example
+# Examples
 
 ```jldoctest
 julia> a = [1 2; 3 4]
diff --git a/base/some.jl b/base/some.jl
index 0d538cbed6c23..4269b2d78aedd 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -16,7 +16,7 @@ Some(::Type{T}) where {T} = Some{Type{T}}(T)
 
 promote_rule(::Type{Some{T}}, ::Type{Some{S}}) where {T, S<:T} = Some{T}
 
-nonnothingtype(::Type{T}) where {T} = typesplit(T, Nothing)
+nonnothingtype(@nospecialize(T::Type)) = typesplit(T, Nothing)
 promote_rule(T::Type{Nothing}, S::Type) = Union{S, Nothing}
 function promote_rule(T::Type{>:Nothing}, S::Type)
     R = nonnothingtype(T)
@@ -138,10 +138,36 @@ true
     This macro is available as of Julia 1.7.
 """
 macro something(args...)
-    expr = :(nothing)
+    noth = GlobalRef(Base, :nothing)
+    something = GlobalRef(Base, :something)
+
+    # This preserves existing semantics of throwing on `nothing`
+    expr = :($something($noth))
+
+    #=
+    We go through the arguments in reverse
+    because we're building a nested if/else
+    expression from the inside out.
+    The innermost thing to check is the last argument,
+    which is why we need the last argument first
+    when building the final expression.
+    =#
     for arg in reverse(args)
-        expr = :(val = $(esc(arg)); val !== nothing ? val : ($expr))
+        val = gensym()
+        expr = quote
+            $val = $(esc(arg))
+            if !isnothing($val)
+                # unwrap eagerly to help type inference
+                $something($val)
+            else
+                $expr
+            end
+        end
     end
-    something = GlobalRef(Base, :something)
-    return :($something($expr))
+    return expr
 end
+
+==(a::Some, b::Some) = a.value == b.value
+isequal(a::Some, b::Some)::Bool = isequal(a.value, b.value)
+const hash_some_seed = UInt == UInt64 ? 0xde5c997007a4ca3a : 0x78c29c09
+hash(s::Some, h::UInt) = hash(s.value, hash_some_seed + h)
diff --git a/base/sort.jl b/base/sort.jl
index 90f8755d3b1a4..8254f56b3f952 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -4,9 +4,8 @@ module Sort
 
 using Base.Order
 
-using Base: copymutable, midpoint, require_one_based_indexing, uinttype,
-    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit,
-    IteratorSize, HasShape, IsInfinite, tail
+using Base: copymutable, midpoint, require_one_based_indexing, uinttype, tail,
+    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit
 
 import Base:
     sort,
@@ -63,10 +62,10 @@ function issorted(itr, order::Ordering)
 end
 
 """
-    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
-Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what
-order is considered to be sorted just as they do for [`sort`](@ref).
+Test whether a collection is in sorted order. The keywords modify what
+order is considered sorted, as described in the [`sort!`](@ref) documentation.
 
 # Examples
 ```jldoctest
@@ -81,14 +80,32 @@ false
 
 julia> issorted([(1, "b"), (2, "a")], by = x -> x[2], rev=true)
 true
+
+julia> issorted([1, 2, -2, 3], by=abs)
+true
 ```
 """
-issorted(itr;
-    lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
-    issorted(itr, ord(lt,by,rev,order))
+function issorted(itr;
+        lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward)
+    # Explicit branching because the compiler can't optimize away the
+    # type instability of the `ord` call with Bool `rev` parameter.
+    if rev === true
+        issorted(itr, ord(lt, by, true, order))
+    else
+        issorted(itr, ord(lt, by, nothing, order))
+    end
+end
 
 function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
-    _sort!(v, InitialOptimizations(ScratchQuickSort(k)), o, (;))
+    # TODO move k from `alg` to `kw`
+    # Don't perform InitialOptimizations before Bracketing. The optimizations take O(n)
+    # time and so does the whole sort. But do perform them before recursive calls because
+    # that can cause significant speedups when the target range is large so the runtime is
+    # dominated by k log k and the optimizations runs in O(k) time.
+    _sort!(v, BoolOptimization(
+        Small{12}( # Very small inputs should go straight to insertion sort
+            BracketedSort(k))),
+        o, (;))
     maybeview(v, k)
 end
 
@@ -96,14 +113,17 @@ maybeview(v, k) = view(v, k)
 maybeview(v, k::Integer) = v[k]
 
 """
-    partialsort!(v, k; by=<transform>, lt=<comparison>, rev=false)
+    partialsort!(v, k; by=identity, lt=isless, rev=false)
 
-Partially sort the vector `v` in place, according to the order specified by `by`, `lt` and
-`rev` so that the value at index `k` (or range of adjacent values if `k` is a range) occurs
+Partially sort the vector `v` in place so that the value at index `k` (or
+range of adjacent values if `k` is a range) occurs
 at the position where it would appear if the array were fully sorted. If `k` is a single
 index, that value is returned; if `k` is a range, an array of values at those indices is
 returned. Note that `partialsort!` may not fully sort the input array.
 
+For the keyword arguments, see the documentation of [`sort!`](@ref).
+
+
 # Examples
 ```jldoctest
 julia> a = [1, 2, 4, 3, 4]
@@ -150,26 +170,26 @@ partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange};
     partialsort!(v, k, ord(lt,by,rev,order))
 
 """
-    partialsort(v, k, by=<transform>, lt=<comparison>, rev=false)
+    partialsort(v, k, by=identity, lt=isless, rev=false)
 
-Variant of [`partialsort!`](@ref) which copies `v` before partially sorting it, thereby returning the
+Variant of [`partialsort!`](@ref) that copies `v` before partially sorting it, thereby returning the
 same thing as `partialsort!` but leaving `v` unmodified.
 """
 partialsort(v::AbstractVector, k::Union{Integer,OrdinalRange}; kws...) =
     partialsort!(copymutable(v), k; kws...)
 
 # reference on sorted binary search:
-#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+#   https://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
 
-# index of the first value of vector a that is greater than or equal to x;
+# index of the first value of vector a that is greater than or equivalent to x;
 # returns lastindex(v)+1 if x is greater than all values in v.
 function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
     hi = hi + T(1)
     len = hi - lo
-    @inbounds while len != 0
+    while len != 0
         half_len = len >>> 0x01
         m = lo + half_len
-        if lt(o, v[m], x)
+        if lt(o, @inbounds(v[m]), x)
             lo = m + 1
             len -= half_len + 1
         else
@@ -180,15 +200,15 @@ function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::key
     return lo
 end
 
-# index of the last value of vector a that is less than or equal to x;
+# index of the last value of vector a that is less than or equivalent to x;
 # returns firstindex(v)-1 if x is less than all values of v.
 function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
     u = T(1)
     lo = lo - u
     hi = hi + u
-    @inbounds while lo < hi - u
+    while lo != hi - u
         m = midpoint(lo, hi)
-        if lt(o, x, v[m])
+        if lt(o, x, @inbounds(v[m]))
             hi = m
         else
             lo = m
@@ -197,29 +217,32 @@ function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keyt
     return lo
 end
 
-# returns the range of indices of v equal to x
+# returns the range of indices of v equivalent to x
 # if v does not contain x, returns a 0-length range
 # indicating the insertion point of x
 function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRange{keytype(v)} where T<:Integer
     u = T(1)
     lo = ilo - u
     hi = ihi + u
-    @inbounds while lo < hi - u
+    while lo != hi - u
         m = midpoint(lo, hi)
-        if lt(o, v[m], x)
+        if lt(o, @inbounds(v[m]), x)
             lo = m
-        elseif lt(o, x, v[m])
+        elseif lt(o, x, @inbounds(v[m]))
             hi = m
         else
-            a = searchsortedfirst(v, x, max(lo,ilo), m, o)
-            b = searchsortedlast(v, x, m, min(hi,ihi), o)
+            a = searchsortedfirst(v, x, lo+u, m, o)
+            b = searchsortedlast(v, x, m, hi-u, o)
             return a : b
         end
     end
     return (lo + 1) : (hi - 1)
 end
 
-function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+
+const FastRangeOrderings = Union{DirectOrdering,Lt{typeof(<)},ReverseOrdering{Lt{typeof(<)}}}
+
+function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -232,7 +255,7 @@ function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering):
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -245,7 +268,7 @@ function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)
     end
 end
 
-function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -253,7 +276,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     elseif h == 0 || !lt(o, x, l)
         length(a)
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             fld(floor(Integer, x) - f, h) + 1
         else
             fld(ceil(Integer, x) - f, h) + 1
@@ -261,7 +284,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -269,7 +292,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     elseif h == 0 || lt(o, l, x)
         length(a) + 1
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             cld(ceil(Integer, x) - f, h) + 1
         else
             cld(floor(Integer, x) - f, h) + 1
@@ -277,7 +300,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     end
 end
 
-searchsorted(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering) =
+searchsorted(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings) =
     searchsortedfirst(a, x, o) : searchsortedlast(a, x, o)
 
 for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
@@ -290,16 +313,19 @@ for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
 end
 
 """
-    searchsorted(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsorted(v, x; by=identity, lt=isless, rev=false)
 
-Return the range of indices of `a` which compare as equal to `x` (using binary search)
-according to the order specified by the `by`, `lt` and `rev` keywords, assuming that `a`
-is already sorted in that order. Return an empty range located at the insertion point
-if `a` does not contain values equal to `x`.
+Return the range of indices in `v` where values are equivalent to `x`, or an
+empty range located at the insertion point if `v` does not contain values
+equivalent to `x`. The vector `v` must be sorted according to the order defined
+by the keywords. Refer to [`sort!`](@ref) for the meaning of the keywords and
+the definition of equivalence. Note that the `by` function is applied to the
+searched value `x` as well as the values in `v`.
 
-See [`sort!`](@ref) for an explanation of the keyword arguments `by`, `lt` and `rev`.
+The range is generally found using binary search, but there are optimized
+implementations for some inputs.
 
-See also: [`insorted`](@ref), [`searchsortedfirst`](@ref), [`sort`](@ref), [`findall`](@ref).
+See also: [`searchsortedfirst`](@ref), [`sort!`](@ref), [`insorted`](@ref), [`findall`](@ref).
 
 # Examples
 ```jldoctest
@@ -324,15 +350,19 @@ julia> searchsorted([1=>"one", 2=>"two", 2=>"two", 4=>"four"], 2=>"two", by=firs
 """ searchsorted
 
 """
-    searchsortedfirst(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsortedfirst(v, x; by=identity, lt=isless, rev=false)
 
-Return the index of the first value in `a` greater than or equal to `x`, according to the
-specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `a`.
-`a` is assumed to be sorted.
+Return the index of the first value in `v` that is not ordered before `x`.
+If all values in `v` are ordered before `x`, return `lastindex(v) + 1`.
 
-`insert!`ing `x` at this index will maintain sorted order.
+The vector `v` must be sorted according to the order defined by the keywords.
+`insert!`ing `x` at the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
 
-See [`sort!`](@ref) for an explanation of the keyword arguments `by`, `lt` and `rev`.
+The index is generally found using binary search, but there are optimized
+implementations for some inputs.
 
 See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref).
 
@@ -353,19 +383,25 @@ julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 9) # no match, insert at end
 julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 1
 
-julia> searchsortedfirst([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) # Compare the keys of the pairs
+julia> searchsortedfirst([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) # compare the keys of the pairs
 3
 ```
 """ searchsortedfirst
 
 """
-    searchsortedlast(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsortedlast(v, x; by=identity, lt=isless, rev=false)
+
+Return the index of the last value in `v` that is not ordered after `x`.
+If all values in `v` are ordered after `x`, return `firstindex(v) - 1`.
 
-Return the index of the last value in `a` less than or equal to `x`, according to the
-specified order. Return `firstindex(a) - 1` if `x` is less than all values in `a`. `a` is
-assumed to be sorted.
+The vector `v` must be sorted according to the order defined by the keywords.
+`insert!`ing `x` immediately after the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
 
-See [`sort!`](@ref) for an explanation of the keyword arguments `by`, `lt` and `rev`.
+The index is generally found using binary search, but there are optimized
+implementations for some inputs
 
 # Examples
 ```jldoctest
@@ -390,12 +426,16 @@ julia> searchsortedlast([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) #
 """ searchsortedlast
 
 """
-    insorted(x, a; by=<transform>, lt=<comparison>, rev=false) -> Bool
+    insorted(x, v; by=identity, lt=isless, rev=false) -> Bool
 
-Determine whether an item `x` is in the sorted collection `a`, in the sense that
-it is [`==`](@ref) to one of the values of the collection according to the order
-specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already
-sorted in that order, see [`sort`](@ref) for the keywords.
+Determine whether a vector `v` contains any value equivalent to `x`.
+The vector `v` must be sorted according to the order defined by the keywords.
+Refer to [`sort!`](@ref) for the meaning of the keywords and the definition of
+equivalence. Note that the `by` function is applied to the searched value `x`
+as well as the values in `v`.
+
+The check is generally done using binary search, but there are optimized
+implementations for some inputs.
 
 See also [`in`](@ref).
 
@@ -415,6 +455,9 @@ false
 
 julia> insorted(0, [1, 2, 4, 5, 5, 7]) # no match
 false
+
+julia> insorted(2=>"TWO", [1=>"one", 2=>"two", 4=>"four"], by=first) # compare the keys of the pairs
+true
 ```
 
 !!! compat "Julia 1.6"
@@ -478,7 +521,7 @@ end
 ## sorting algorithm components ##
 
 """
-    _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset)
+    _sort!(v::AbstractVector, a::Base.Sort.Algorithm, o::Base.Order.Ordering, kw; t, offset)
 
 An internal function that sorts `v` using the algorithm `a` under the ordering `o`,
 subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only
@@ -492,7 +535,7 @@ no scratch space is present.
 
 A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There
 are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space
-may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
+may be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
 
 `t` is an appropriate scratch space for the algorithm at hand, to be accessed as
 `t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in
@@ -500,9 +543,37 @@ internal or recursive calls.
 """
 function _sort! end
 
+# TODO: delete this optimization when views have no overhead.
+const UnwrappableSubArray = SubArray{T, 1, <:AbstractArray{T}, <:Tuple{AbstractUnitRange, Vararg{Number}}, true} where T
+"""
+    SubArrayOptimization(next) isa Base.Sort.Algorithm
+
+Unwrap certain known SubArrays because views have a performance overhead 😢
+
+Specifically, unwraps some instances of the type
 
+    $UnwrappableSubArray
 """
-    MissingOptimization(next) <: Algorithm
+struct SubArrayOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+
+_sort!(v::AbstractVector, a::SubArrayOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw)
+    @getkw lo hi
+    # @assert v.stride1 == 1
+    parent = v.parent
+    if parent isa Array && !(parent isa Vector) && hi - lo < 100
+        # vec(::Array{T, ≠1}) allocates and is therefore somewhat expensive.
+        # We don't want that for small inputs.
+        _sort!(v, a.next, o, kw)
+    else
+        _sort!(vec(parent), a.next, o, (;kw..., lo = lo + v.offset1, hi = hi + v.offset1))
+    end
+end
+
+"""
+    MissingOptimization(next) isa Base.Sort.Algorithm
 
 Filter out missing values.
 
@@ -561,7 +632,7 @@ function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)
     i - 1
 end
 """
-    send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi)
+    send_to_end!(f::Function, v::AbstractVector, o::Base.Order.DirectOrdering[, end_stable]; lo, hi)
 
 Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end.
 
@@ -615,7 +686,7 @@ end
 
 
 """
-    IEEEFloatOptimization(next) <: Algorithm
+    IEEEFloatOptimization(next) isa Base.Sort.Algorithm
 
 Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers.
 
@@ -660,7 +731,7 @@ end
 
 
 """
-    BoolOptimization(next) <: Algorithm
+    BoolOptimization(next) isa Base.Sort.Algorithm
 
 Sort `AbstractVector{Bool}`s using a specialized version of counting sort.
 
@@ -687,7 +758,7 @@ end
 
 
 """
-    IsUIntMappable(yes, no) <: Algorithm
+    IsUIntMappable(yes, no) isa Base.Sort.Algorithm
 
 Determines if the elements of a vector can be mapped to unsigned integers while preserving
 their order under the specified ordering.
@@ -709,7 +780,7 @@ end
 
 
 """
-    Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm
+    Small{N}(small=SMALL_ALGORITHM, big) isa Base.Sort.Algorithm
 
 Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big`
 algorithm.
@@ -741,13 +812,23 @@ Insertion sort traverses the collection one element at a time, inserting
 each element into its correct, sorted position in the output vector.
 
 Characteristics:
-* *stable*: preserves the ordering of elements which compare equal
-(e.g. "a" and "A" in a sort of letters which ignores case).
+* *stable*: preserves the ordering of elements that compare equal
+(e.g. "a" and "A" in a sort of letters that ignores case).
 * *in-place* in memory.
 * *quadratic performance* in the number of elements to be sorted:
 it is well-suited to small collections but should not be used for large ones.
 """
 const InsertionSort = InsertionSortAlg()
+
+"""
+    SMALL_ALGORITHM
+
+Default sorting algorithm for small arrays.
+
+This is an alias for a simple low-overhead algorithm that does not scale well
+to large arrays, unlike high-overhead recursive algorithms used for larger arrays.
+`SMALL_ALGORITHM` is a good choice for the base case of a recursive algorithm.
+"""
 const SMALL_ALGORITHM = InsertionSortAlg()
 
 function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw)
@@ -771,7 +852,7 @@ end
 
 
 """
-    CheckSorted(next) <: Algorithm
+    CheckSorted(next) isa Base.Sort.Algorithm
 
 Check if the input is already sorted and for large inputs, also check if it is
 reverse-sorted. The reverse-sorted check is unstable.
@@ -798,7 +879,7 @@ end
 
 
 """
-    ComputeExtrema(next) <: Algorithm
+    ComputeExtrema(next) isa Base.Sort.Algorithm
 
 Compute the extrema of the input under the provided order.
 
@@ -824,7 +905,7 @@ end
 
 
 """
-    ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm
+    ConsiderCountingSort(counting=CountingSort(), next) isa Base.Sort.Algorithm
 
 If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to
 the `next` algorithm.
@@ -852,7 +933,7 @@ _sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v,
 
 
 """
-    CountingSort <: Algorithm
+    CountingSort() isa Base.Sort.Algorithm
 
 Use the counting sort algorithm.
 
@@ -888,7 +969,7 @@ end
 
 
 """
-    ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm
+    ConsiderRadixSort(radix=RadixSort(), next) isa Base.Sort.Algorithm
 
 If the number of bits in the input's range is small enough and the input supports efficient
 bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm.
@@ -911,7 +992,7 @@ end
 
 
 """
-    RadixSort <: Algorithm
+    RadixSort() isa Base.Sort.Algorithm
 
 Use the radix sort algorithm.
 
@@ -966,8 +1047,8 @@ end
 
 
 """
-    ScratchQuickSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
-    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
+    ScratchQuickSort(next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
+    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Base.Sort.Algorithm=Base.Sort.SMALL_ALGORITHM) isa Base.Sort.Algorithm
 
 Use the `ScratchQuickSort` algorithm with the `next` algorithm as a base case.
 
@@ -981,8 +1062,8 @@ is treated as the first or last index of the input, respectively.
 `lo` and `hi` may be specified together as an `AbstractUnitRange`.
 
 Characteristics:
-  * *stable*: preserves the ordering of elements which compare equal
-    (e.g. "a" and "A" in a sort of letters which ignores case).
+  * *stable*: preserves the ordering of elements that compare equal
+    (e.g. "a" and "A" in a sort of letters that ignores case).
   * *not in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref).
   * *linear runtime* if `length(lo:hi)` is constant
@@ -1007,7 +1088,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer
         v::AbstractVector, rev::Bool, pivot_dest::AbstractVector, pivot_index_offset::Integer)
     # Ideally we would use `pivot_index = rand(lo:hi)`, but that requires Random.jl
     # and would mutate the global RNG in sorting.
-    pivot_index = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + lo
+    pivot_index = mod(hash(lo), lo:hi)
     @inbounds begin
         pivot = v[pivot_index]
         while lo < pivot_index
@@ -1083,7 +1164,196 @@ end
 
 
 """
-    StableCheckSorted(next) <: Algorithm
+    BracketedSort(target[, next::Algorithm]) isa Base.Sort.Algorithm
+
+Perform a partialsort for the elements that fall into the indices specified by the `target`
+using BracketedSort with the `next` algorithm for subproblems.
+
+BracketedSort takes a random* sample of the input, estimates the quantiles of the input
+using the quantiles of the sample to find signposts that almost certainly bracket the target
+values, filters the value in the input that fall between the signpost values to the front of
+the input, and then, if that "almost certainly" turned out to be true, finds the target
+within the small chunk that are, by value, between the signposts and now by position, at the
+front of the vector. On small inputs or when target is close to the size of the input,
+BracketedSort falls back to the `next` algorithm directly. Otherwise, BracketedSort uses the
+`next` algorithm only to compute quantiles of the sample and to find the target within the
+small chunk.
+
+## Performance
+
+If the `next` algorithm has `O(n * log(n))` runtime and the input is not pathological then
+the runtime of this algorithm is `O(n + k * log(k))` where `n` is the length of the input
+and `k` is `length(target)`. On pathological inputs the asymptotic runtime is the same as
+the runtime of the `next` algorithm.
+
+BracketedSort itself does not allocate. If `next` is in-place then BracketedSort is also
+in-place. If `next` is not in place, and it's space usage increases monotonically with input
+length then BracketedSort's maximum space usage will never be more than the space usage
+of `next` on the input BracketedSort receives. For large nonpathological inputs and targets
+substantially smaller than the size of the input, BracketedSort's maximum memory usage will
+be much less than `next`'s. If the maximum additional space usage of `next` scales linearly
+then for small k the average* maximum additional space usage of BracketedSort will be
+`O(n^(2.3/3))`.
+
+By default, BracketedSort uses the `O(n)` space and `O(n + k log k)` runtime
+`ScratchQuickSort` algorithm recursively.
+
+*Sorting is unable to depend on Random.jl because Random.jl depends on sorting.
+ Consequently, we use `hash` as a source of randomness. The average runtime guarantees
+ assume that `hash(x::Int)` produces a random result. However, as this randomization is
+ deterministic, if you try hard enough you can find inputs that consistently reach the
+ worst case bounds. Actually constructing such inputs is an exercise left to the reader.
+ Have fun :).
+
+Characteristics:
+  * *unstable*: does not preserve the ordering of elements that compare equal
+    (e.g. "a" and "A" in a sort of letters that ignores case).
+  * *in-place* in memory if the `next` algorithm is in-place.
+  * *estimate-and-filter*: strategy
+  * *linear runtime* if `length(target)` is constant and `next` is reasonable
+  * *n + k log k* worst case runtime if `next` has that runtime.
+  * *pathological inputs* can significantly increase constant factors.
+"""
+struct BracketedSort{T, F} <: Algorithm
+    target::T
+    get_next::F
+end
+
+# TODO: this composition between BracketedSort and ScratchQuickSort does not bring me joy
+BracketedSort(k) = BracketedSort(k, k -> InitialOptimizations(ScratchQuickSort(k)))
+
+function bracket_kernel!(v::AbstractVector, lo, hi, lo_signpost, hi_signpost, o)
+    i = 0
+    count_below = 0
+    checkbounds(v, lo:hi)
+    for j in lo:hi
+        x = @inbounds v[j]
+        a = lo_signpost !== nothing && lt(o, x, lo_signpost)
+        b = hi_signpost === nothing || !lt(o, hi_signpost, x)
+        count_below += a
+        # if a != b # This branch is almost never taken, so making it branchless is bad.
+        #     @inbounds v[i], v[j] = v[j], v[i]
+        #     i += 1
+        # end
+        c = a != b # JK, this is faster.
+        k = i * c + j
+        # Invariant: @assert firstindex(v) ≤ lo ≤ i + j ≤ k ≤ j ≤ hi ≤ lastindex(v)
+        @inbounds v[j], v[k] = v[k], v[j]
+        i += c - 1
+    end
+    count_below, i+hi
+end
+
+function move!(v, target, source)
+    # This function never dominates runtime—only add `@inbounds` if you can demonstrate a
+    # performance improvement. And if you do, also double check behavior when `target`
+    # is out of bounds.
+    @assert length(target) == length(source)
+    if length(target) == 1 || isdisjoint(target, source)
+        for (i, j) in zip(target, source)
+            v[i], v[j] = v[j], v[i]
+        end
+    else
+        @assert minimum(source) <= minimum(target)
+        reverse!(v, minimum(source), maximum(target))
+        reverse!(v, minimum(target), maximum(target))
+    end
+end
+
+function _sort!(v::AbstractVector, a::BracketedSort, o::Ordering, kw)
+    @getkw lo hi scratch
+    # TODO for further optimization: reuse scratch between trials better, from signpost
+    # selection to recursive calls, and from the fallback (but be aware of type stability,
+    # especially when sorting IEEE floats.
+
+    # We don't need to bounds check target because that is done higher up in the stack
+    # However, we cannot assume the target is inbounds.
+    lo < hi || return scratch
+    ln = hi - lo + 1
+
+    # This is simply a precomputed short-circuit to avoid doing scalar math for small inputs.
+    # It does not change dispatch at all.
+    ln < 260 && return _sort!(v, a.get_next(a.target), o, kw)
+
+    target = a.target
+    k = cbrt(ln)
+    k2 = round(Int, k^2)
+    k2ln = k2/ln
+    offset = .15k*top_set_bit(k2) # TODO for further optimization: tune this
+    lo_signpost_i, hi_signpost_i =
+        (floor(Int, (tar - lo) * k2ln + lo + off) for (tar, off) in
+            ((minimum(target), -offset), (maximum(target), offset)))
+    lastindex_sample = lo+k2-1
+    expected_middle_ln = (min(lastindex_sample, hi_signpost_i) - max(lo, lo_signpost_i) + 1) / k2ln
+    # This heuristic is complicated because it fairly accurately reflects the runtime of
+    # this algorithm which is necessary to get good dispatch when both the target is large
+    # and the input are large.
+    # expected_middle_ln is a float and k2 is significantly below typemax(Int), so this will
+    # not overflow:
+    # TODO move target from alg to kw to avoid this ickyness:
+    ln <= 130 + 2k2 + 2expected_middle_ln && return _sort!(v, a.get_next(a.target), o, kw)
+
+    # We store the random sample in
+    #     sample = view(v, lo:lo+k2)
+    # but views are not quite as fast as using the input array directly,
+    # so we don't actually construct this view at runtime.
+
+    # TODO for further optimization: handle lots of duplicates better.
+    # Right now lots of duplicates rounds up when it could use some super fast optimizations
+    # in some cases.
+    # e.g.
+    #
+    # Target:                      |----|
+    # Sorted input: 000000000000000000011111112222223333333333
+    #
+    # Will filter all zeros and ones to the front when it could just take the first few
+    # it encounters. This optimization would be especially potent when `allequal(ans)` and
+    # equal elements are egal.
+
+    # 3 random trials should typically give us 0.99999 reliability; we can assume
+    # the input is pathological and abort to fallback if we fail three trials.
+    seed = hash(ln, Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    for attempt in 1:3
+        seed = hash(attempt, seed)
+        for i in lo:lo+k2-1
+            j = mod(hash(i, seed), i:hi) # TODO for further optimization: be sneaky and remove this division
+            v[i], v[j] = v[j], v[i]
+        end
+        count_below, lastindex_middle = if lo_signpost_i <= lo && lastindex_sample <= hi_signpost_i
+            # The heuristics higher up in this function that dispatch to the `next`
+            # algorithm should prevent this from happening.
+            # Specifically, this means that expected_middle_ln == ln, so
+            # ln <= ... + 2.0expected_middle_ln && return ...
+            # will trigger.
+            @assert false
+            # But if it does happen, the kernel reduces to
+            0, hi
+        elseif lo_signpost_i <= lo
+            _sort!(v, a.get_next(hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, nothing, v[hi_signpost_i], o)
+        elseif lastindex_sample <= hi_signpost_i
+            _sort!(v, a.get_next(lo_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], nothing, o)
+        else
+            # TODO for further optimization: don't sort the middle elements
+            _sort!(v, a.get_next(lo_signpost_i:hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], v[hi_signpost_i], o)
+        end
+        target_in_middle = target .- count_below
+        if lo <= minimum(target_in_middle) && maximum(target_in_middle) <= lastindex_middle
+            scratch = _sort!(v, a.get_next(target_in_middle), o, (;kw..., hi=lastindex_middle))
+            move!(v, target, target_in_middle)
+            return scratch
+        end
+        # This line almost never runs.
+    end
+    # This line only runs on pathological inputs. Make sure it's covered by tests :)
+    _sort!(v, a.get_next(target), o, kw)
+end
+
+
+"""
+    StableCheckSorted(next) isa Base.Sort.Algorithm
 
 Check if an input is sorted and/or reverse-sorted.
 
@@ -1183,7 +1453,7 @@ end
 ## default sorting policy ##
 
 """
-    InitialOptimizations(next) <: Algorithm
+    InitialOptimizations(next) isa Base.Sort.Algorithm
 
 Attempt to apply a suite of low-cost optimizations to the input vector before sorting. These
 optimizations may be automatically applied by the `sort!` family of functions when
@@ -1195,29 +1465,26 @@ future versions of Julia.
 If `next` is stable, then `InitialOptimizations(next)` is also stable.
 
 The specific optimizations attempted by `InitialOptimizations` are
-[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to
-[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref).
+[`SubArrayOptimization`](@ref), [`MissingOptimization`](@ref), [`BoolOptimization`](@ref),
+dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`, and
+[`IEEEFloatOptimization`](@ref).
 """
-InitialOptimizations(next) = MissingOptimization(
-    BoolOptimization(
-        Small{10}(
-            IEEEFloatOptimization(
-                next))))
-"""
-    DEFAULT_STABLE
-
-The default sorting algorithm.
+InitialOptimizations(next) = SubArrayOptimization(
+    MissingOptimization(
+        BoolOptimization(
+            Small{10}(
+                IEEEFloatOptimization(
+                    next)))))
 
-This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
-equal). It makes an effort to be fast for most inputs.
-
-The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help
-for the current dispatch system.
+"""
+    struct DefaultStable <: Algorithm end
 
-# Extended Help
+`DefaultStable` is an algorithm which indicates that a fast, general purpose sorting
+algorithm should be used, but does not specify exactly which algorithm.
 
-`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid
-of Radix, Insertion, Counting, Quick sorts.
+Currently, when sorting short NTuples, this is an unrolled mergesort, and otherwise it is
+composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid of Radix, Insertion,
+Counting, Quick sorts.
 
 We begin with MissingOptimization because it has no runtime cost when it is not
 triggered and can enable other optimizations to be applied later. For example,
@@ -1270,14 +1537,46 @@ Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compare
 length, we apply [`CountingSort`](@ref).
 
 Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort,
-but we conside rthe number of _bits_ in the range, rather than the range itself.
+but we consider the number of _bits_ in the range, rather than the range itself.
 Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this
 stage.
 
 Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
 otherwise we dispatch to [`ScratchQuickSort`](@ref).
 """
-const DEFAULT_STABLE = InitialOptimizations(
+struct DefaultStable <: Algorithm end
+
+"""
+    DEFAULT_STABLE
+
+The default sorting algorithm.
+
+This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
+equal). It makes an effort to be fast for most inputs.
+
+The algorithms used by `DEFAULT_STABLE` are an implementation detail. See the docstring
+of `Base.Sort.DefaultStable` for the current dispatch system.
+"""
+const DEFAULT_STABLE = DefaultStable()
+
+"""
+    DefaultUnstable <: Algorithm
+
+Like [`DefaultStable`](@ref), but does not guarantee stability.
+"""
+struct DefaultUnstable <: Algorithm end
+
+"""
+    DEFAULT_UNSTABLE
+
+An efficient sorting algorithm which may or may not be stable.
+
+The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
+the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
+"""
+const DEFAULT_UNSTABLE = DefaultUnstable()
+
+const _DEFAULT_ALGORITHMS_FOR_VECTORS = InitialOptimizations(
     IsUIntMappable(
         Small{40}(
             CheckSorted(
@@ -1288,15 +1587,10 @@ const DEFAULT_STABLE = InitialOptimizations(
                                 ScratchQuickSort())))))),
         StableCheckSorted(
             ScratchQuickSort())))
-"""
-    DEFAULT_UNSTABLE
 
-An efficient sorting algorithm.
+_sort!(v::AbstractVector, ::Union{DefaultStable, DefaultUnstable}, o::Ordering, kw) =
+    _sort!(v, _DEFAULT_ALGORITHMS_FOR_VECTORS, o, kw)
 
-The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
-the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
-"""
-const DEFAULT_UNSTABLE = DEFAULT_STABLE
 const SMALL_THRESHOLD  = 20
 
 function Base.show(io::IO, alg::Algorithm)
@@ -1326,20 +1620,59 @@ defalg(v::AbstractArray) = DEFAULT_STABLE
 defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
 defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
 defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(v::NTuple) = DEFAULT_STABLE
 
 """
-    sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(v; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
-Sort the vector `v` in place. A stable algorithm is used by default. You can select a
-specific algorithm to use via the `alg` keyword (see [Sorting Algorithms](@ref) for
-available algorithms). The `by` keyword lets you provide a function that will be applied to
-each element before comparison; the `lt` keyword allows providing a custom "less than"
-function (note that for every `x` and `y`, only one of `lt(x,y)` and `lt(y,x)` can return
-`true`); use `rev=true` to reverse the sorting order. `rev=true` preserves forward stability:
-Elements that compare equal are not reversed. These options are independent and can
-be used together in all possible combinations: if both `by` and `lt` are specified, the `lt`
-function is applied to the result of the `by` function; `rev=true` reverses whatever
-ordering specified via the `by` and `lt` keywords.
+Sort the vector `v` in place. A stable algorithm is used by default: the
+ordering of elements that compare equal is preserved. A specific algorithm can
+be selected via the `alg` keyword (see [Sorting Algorithms](@ref) for available
+algorithms).
+
+Elements are first transformed with the function `by` and then compared
+according to either the function `lt` or the ordering `order`. Finally, the
+resulting order is reversed if `rev=true` (this preserves forward stability:
+elements that compare equal are not reversed). The current implementation applies
+the `by` transformation before each comparison rather than once per element.
+
+Passing an `lt` other than `isless` along with an `order` other than
+[`Base.Order.Forward`](@ref) or [`Base.Order.Reverse`](@ref) is not permitted,
+otherwise all options are independent and can be used together in all possible
+combinations. Note that `order` can also include a "by" transformation, in
+which case it is applied after that defined with the `by` keyword. For more
+information on `order` values see the documentation on [Alternate
+Orderings](@ref).
+
+Relations between two elements are defined as follows (with "less" and
+"greater" exchanged when `rev=true`):
+
+* `x` is less than `y` if `lt(by(x), by(y))` (or `Base.Order.lt(order, by(x), by(y))`) yields true.
+* `x` is greater than `y` if `y` is less than `x`.
+* `x` and `y` are equivalent if neither is less than the other ("incomparable"
+  is sometimes used as a synonym for "equivalent").
+
+The result of `sort!` is sorted in the sense that every element is greater than
+or equivalent to the previous one.
+
+The `lt` function must define a strict weak order, that is, it must be
+
+* irreflexive: `lt(x, x)` always yields `false`,
+* asymmetric: if `lt(x, y)` yields `true` then `lt(y, x)` yields `false`,
+* transitive: `lt(x, y) && lt(y, z)` implies `lt(x, z)`,
+* transitive in equivalence: `!lt(x, y) && !lt(y, x)` and `!lt(y, z) && !lt(z,
+  y)` together imply `!lt(x, z) && !lt(z, x)`. In words: if `x` and `y` are
+  equivalent and `y` and `z` are equivalent then `x` and `z` must be
+  equivalent.
+
+For example `<` is a valid `lt` function for `Int` values but `≤` is not: it
+violates irreflexivity. For `Float64` values even `<` is invalid as it violates
+the fourth condition: `1.0` and `NaN` are equivalent and so are `NaN` and `2.0`
+but `1.0` and `2.0` are not equivalent.
+
+See also [`sort`](@ref), [`sortperm`](@ref), [`sortslices`](@ref),
+[`partialsort!`](@ref), [`partialsortperm`](@ref), [`issorted`](@ref),
+[`searchsorted`](@ref), [`insorted`](@ref), [`Base.Order.ord`](@ref).
 
 # Examples
 ```jldoctest
@@ -1366,6 +1699,32 @@ julia> v = [(1, "c"), (3, "a"), (2, "b")]; sort!(v, by = x -> x[2]); v
  (3, "a")
  (2, "b")
  (1, "c")
+
+julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs))
+4-element Vector{Int64}:
+ 2
+ 1
+ 3
+ 0
+
+julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs)) == sort(0:3, by=x->abs(x-2))
+true
+
+julia> sort([2, NaN, 1, NaN, 3]) # correct sort with default lt=isless
+5-element Vector{Float64}:
+   1.0
+   2.0
+   3.0
+ NaN
+ NaN
+
+julia> sort([2, NaN, 1, NaN, 3], lt=<) # wrong sort due to invalid lt. This behavior is undefined.
+5-element Vector{Float64}:
+   2.0
+ NaN
+   1.0
+ NaN
+   3.0
 ```
 """
 function sort!(v::AbstractVector{T};
@@ -1380,14 +1739,12 @@ function sort!(v::AbstractVector{T};
 end
 
 """
-    sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(v::Union{AbstractVector, NTuple}; alg::Base.Sort.Algorithm=Base.Sort.defalg(v), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified.
 
-Uses `Base.copymutable` to support immutable collections and iterables.
-
-!!! compat "Julia 1.10"
-    `sort` of arbitrary iterables requires at least Julia 1.10.
+!!! compat "Julia 1.12"
+    Sorting `NTuple`s requires Julia 1.12 or later.
 
 # Examples
 ```jldoctest
@@ -1406,32 +1763,36 @@ julia> v
  2
 ```
 """
-function sort(v; kws...)
-    size = IteratorSize(v)
-    size == HasShape{0}() && throw(ArgumentError("$v cannot be sorted"))
-    size == IsInfinite() && throw(ArgumentError("infinite iterator $v cannot be sorted"))
-    sort!(copymutable(v); kws...)
-end
-sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) # for method disambiguation
-sort(::AbstractString; kws...) =
-    throw(ArgumentError("sort(::AbstractString) is not supported"))
-sort(::Tuple; kws...) =
-    throw(ArgumentError("sort(::Tuple) is only supported for NTuples"))
-
-function sort(x::NTuple{N}; lt::Function=isless, by::Function=identity,
-              rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where N
-    o = ord(lt,by,rev,order)
-    if N > 9
-        v = sort!(copymutable(x), DEFAULT_STABLE, o)
-        tuple((v[i] for i in 1:N)...)
+sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...)
+
+function sort(x::NTuple;
+              alg::Algorithm=defalg(x),
+              lt=isless,
+              by=identity,
+              rev::Union{Bool,Nothing}=nothing,
+              order::Ordering=Forward,
+              scratch::Union{Vector, Nothing}=nothing)
+    # Can't do this check with type parameters because of https://github.com/JuliaLang/julia/issues/56698
+    scratch === nothing || eltype(x) == eltype(scratch) || throw(ArgumentError("scratch has the wrong eltype"))
+    _sort(x, alg, ord(lt,by,rev,order), (;scratch))::typeof(x)
+end
+# Folks who want to hack internals can define a new _sort(x::NTuple, ::TheirAlg, o::Ordering)
+# or _sort(x::NTuple{N, TheirType}, ::DefaultStable, o::Ordering) where N
+function _sort(x::NTuple, a::Union{DefaultStable, DefaultUnstable}, o::Ordering, kw)
+    # The unrolled tuple sort is prohibitively slow to compile for length > 9.
+    # See https://github.com/JuliaLang/julia/pull/46104#issuecomment-1435688502 for benchmarks
+    if length(x) > 9
+        v = copymutable(x)
+        _sort!(v, a, o, kw)
+        typeof(x)(v)
     else
-        _sort(x, o)
+        _mergesort(x, o)
     end
 end
-_sort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x
-function _sort(x::NTuple, o::Ordering)
+_mergesort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x
+function _mergesort(x::NTuple, o::Ordering)
     a, b = Base.IteratorsMD.split(x, Val(length(x)>>1))
-    merge(_sort(a, o), _sort(b, o), o)
+    merge(_mergesort(a, o), _mergesort(b, o), o)
 end
 merge(x::NTuple, y::NTuple{0}, o::Ordering) = x
 merge(x::NTuple{0}, y::NTuple, o::Ordering) = y
@@ -1439,19 +1800,18 @@ merge(x::NTuple{0}, y::NTuple{0}, o::Ordering) = x # Method ambiguity
 merge(x::NTuple, y::NTuple, o::Ordering) =
     (lt(o, y[1], x[1]) ? (y[1], merge(x, tail(y), o)...) : (x[1], merge(tail(x), y, o)...))
 
-
 ## partialsortperm: the permutation to sort the first k elements of an array ##
 
 """
-    partialsortperm(v, k; by=<transform>, lt=<comparison>, rev=false)
+    partialsortperm(v, k; by=identity, lt=isless, rev=false)
 
 Return a partial permutation `I` of the vector `v`, so that `v[I]` returns values of a fully
 sorted version of `v` at index `k`. If `k` is a range, a vector of indices is returned; if
 `k` is an integer, a single index is returned. The order is specified using the same
-keywords as `sort!`. The permutation is stable, meaning that indices of equal elements
-appear in ascending order.
+keywords as `sort!`. The permutation is stable: the indices of equal elements
+will appear in ascending order.
 
-Note that this function is equivalent to, but more efficient than, calling `sortperm(...)[k]`.
+This function is equivalent to, but more efficient than, calling `sortperm(...)[k]`.
 
 # Examples
 ```jldoctest
@@ -1477,7 +1837,7 @@ partialsortperm(v::AbstractVector, k::Union{Integer,OrdinalRange}; kwargs...) =
     partialsortperm!(similar(Vector{eltype(k)}, axes(v,1)), v, k; kwargs...)
 
 """
-    partialsortperm!(ix, v, k; by=<transform>, lt=<comparison>, rev=false)
+    partialsortperm!(ix, v, k; by=identity, lt=isless, rev=false)
 
 Like [`partialsortperm`](@ref), but accepts a preallocated index vector `ix` the same size as
 `v`, which is used to store (a permutation of) the indices of `v`.
@@ -1497,6 +1857,8 @@ v[ix[k]] == partialsort(v, k)
 The return value is the `k`th element of `ix` if `k` is an integer, or view into `ix` if `k` is
 a range.
 
+$(Base._DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> v = [3, 1, 2, 1];
@@ -1538,12 +1900,12 @@ end
 ## sortperm: the permutation to sort an array ##
 
 """
-    sortperm(A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm(A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension.
 If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified
 using the same keywords as [`sort!`](@ref). The permutation is guaranteed to be stable even
-if the sorting algorithm is unstable, meaning that indices of equal elements appear in
+if the sorting algorithm is unstable: the indices of equal elements will appear in
 ascending order.
 
 See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref).
@@ -1616,11 +1978,13 @@ end
 
 
 """
-    sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+    sortperm!(ix, A; alg::Base.Sort.Algorithm=Base.Sort.DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward, [dims::Integer])
 
 Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`.
 `ix` is initialized to contain the values `LinearIndices(A)`.
 
+$(Base._DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.9"
     The method accepting `dims` requires at least Julia 1.9.
 
@@ -1702,7 +2066,7 @@ end
 ## sorting multi-dimensional arrays ##
 
 """
-    sort(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort a multidimensional array `A` along the given dimension.
 See [`sort!`](@ref) for a description of possible
@@ -1774,10 +2138,11 @@ end
 
 
 """
-    sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort!(A; dims::Integer, alg::Base.Sort.Algorithm=Base.Sort.defalg(A), lt=isless, by=identity, rev::Bool=false, order::Base.Order.Ordering=Base.Order.Forward)
 
 Sort the multidimensional array `A` along dimension `dims`.
-See [`sort!`](@ref) for a description of possible keyword arguments.
+See the one-dimensional version of [`sort!`](@ref) for a description of
+possible keyword arguments.
 
 To sort slices of an array, refer to [`sortslices`](@ref).
 
@@ -1809,30 +2174,43 @@ function sort!(A::AbstractArray{T};
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
                order::Ordering=Forward, # TODO stop eagerly over-allocating.
-               scratch::Union{Vector{T}, Nothing}=Vector{T}(undef, size(A, dims))) where T
-    __sort!(A, Val(dims), maybe_apply_initial_optimizations(alg), ord(lt, by, rev, order), scratch)
-end
-function __sort!(A::AbstractArray{T}, ::Val{K},
-                alg::Algorithm,
-                order::Ordering,
-                scratch::Union{Vector{T}, Nothing}) where {K,T}
+               scratch::Union{Vector{T}, Nothing}=size(A, dims) < 10 ? nothing : Vector{T}(undef, size(A, dims))) where T
     nd = ndims(A)
-
-    1 <= K <= nd || throw(ArgumentError("dimension out of range"))
-
-    remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd)
-    for idx in CartesianIndices(remdims)
-        Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...)
-        sort!(Av; alg, order, scratch)
+    1 <= dims <= nd || throw(ArgumentError("dimension out of range"))
+    alg2 = maybe_apply_initial_optimizations(alg)
+    order2 = ord(lt, by, rev, order)
+    foreach(ntuple(Val, nd)) do d
+        get_value(d) == dims || return
+        # We assume that an Integer between 1 and nd must be equal to one of the
+        # values 1:nd. If this assumption is false, then what's an integer? and
+        # also sort! will silently do nothing.
+
+        idxs = CartesianIndices(ntuple(i -> i == get_value(d) ? 1 : axes(A, i), ndims(A)))
+        get_view(idx) = view(A, ntuple(i -> i == get_value(d) ? Colon() : idx[i], ndims(A))...)
+        if d == Val(1) || size(A, get_value(d)) < 30
+            for idx in idxs
+                sort!(get_view(idx); alg=alg2, order=order2, scratch)
+            end
+        else
+            v = similar(get_view(first(idxs)))
+            for idx in idxs
+                vw = get_view(idx)
+                v .= vw
+                sort!(v; alg=alg2, order=order2, scratch)
+                vw .= v
+            end
+        end
+        A
     end
     A
 end
+get_value(::Val{x}) where x = x
 
 
 ## uint mapping to allow radix sorting primitives other than UInts ##
 
 """
-    UIntMappable(T::Type, order::Ordering)
+    UIntMappable(T::Type, order::Base.Order.Ordering)
 
 Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
 [`uint_unmap`](@ref) are implemented.
@@ -1842,7 +2220,7 @@ If either is not implemented, return `nothing`.
 UIntMappable(T::Type, order::Ordering) = nothing
 
 """
-    uint_map(x, order::Ordering)::Unsigned
+    uint_map(x, order::Base.Order.Ordering)::Unsigned
 
 Map `x` to an un unsigned integer, maintaining sort order.
 
@@ -1856,7 +2234,7 @@ See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
 function uint_map end
 
 """
-    uint_unmap(T::Type, u::Unsigned, order::Ordering)
+    uint_unmap(T::Type, u::Unsigned, order::Base.Order.Ordering)
 
 Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
 `x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
@@ -1926,18 +2304,18 @@ struct MergeSortAlg     <: Algorithm end
 """
     PartialQuickSort{T <: Union{Integer,OrdinalRange}}
 
-Indicate that a sorting function should use the partial quick sort
-algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
-to largest, finding them and sorting them using [`QuickSort`](@ref).
+Indicate that a sorting function should use the partial quick sort algorithm.
+`PartialQuickSort(k)` is like `QuickSort`, but is only required to find and
+sort the elements that would end up in `v[k]` were `v` fully sorted.
 
 Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
+  * *not stable*: does not preserve the ordering of elements that
+    compare equal (e.g. "a" and "A" in a sort of letters that
     ignores case).
   * *in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
 
-  Note that `PartialQuickSort(k)` does not necessarily sort the whole array. For example,
+Note that `PartialQuickSort(k)` does not necessarily sort the whole array. For example,
 
 ```jldoctest
 julia> x = rand(100);
@@ -1969,8 +2347,8 @@ Indicate that a sorting function should use the quick sort
 algorithm, which is *not* stable.
 
 Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
+  * *not stable*: does not preserve the ordering of elements that
+    compare equal (e.g. "a" and "A" in a sort of letters that
     ignores case).
   * *in-place* in memory.
   * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
@@ -1988,8 +2366,8 @@ subcollection at each step, until the entire
 collection has been recombined in sorted form.
 
 Characteristics:
-  * *stable*: preserves the ordering of elements which compare
-    equal (e.g. "a" and "A" in a sort of letters which ignores
+  * *stable*: preserves the ordering of elements that compare
+    equal (e.g. "a" and "A" in a sort of letters that ignores
     case).
   * *not in-place* in memory.
   * *divide-and-conquer* sort strategy.
@@ -2164,7 +2542,7 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
     @getkw lo hi scratch legacy_dispatch_entry
     if legacy_dispatch_entry === a
         # This error prevents infinite recursion for unknown algorithms
-        throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined"))
+        throw(ArgumentError(LazyString("Base.Sort._sort!(::", typeof(v), ", ::", typeof(a), ", ::", typeof(o), ", ::Any) is not defined")))
     else
         sort!(v, lo, hi, a, o)
         scratch
diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl
index 9fda5c41fb09e..ce3a3d67e3ba4 100644
--- a/base/special/cbrt.jl
+++ b/base/special/cbrt.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Float32/Float64 based on C implementations from FDLIBM (http://www.netlib.org/fdlibm/)
+# Float32/Float64 based on C implementations from FDLIBM (https://www.netlib.org/fdlibm/)
 # and FreeBSD:
 #
 ## ====================================================
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 9cca6f568305f..38d7509807aed 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -216,6 +216,7 @@ end
     small_part =  muladd(jU, expm1b_kernel(base, r), jL) + jU
 
     if !(abs(x) <= SUBNORM_EXP(base, T))
+        isnan(x) && return x
         x >= MAX_EXP(base, T) && return Inf
         x <= MIN_EXP(base, T) && return 0.0
         if k <= -53
@@ -243,6 +244,7 @@ end
     hi, lo = Base.canonicalize2(1.0, kern)
     small_part = fma(jU, hi, muladd(jU, (lo+xlo), very_small))
     if !(abs(x) <= SUBNORM_EXP(base, T))
+        isnan(x) && return x
         x >= MAX_EXP(base, T) && return Inf
         x <= MIN_EXP(base, T) && return 0.0
         if k <= -53
@@ -250,7 +252,7 @@ end
             twopk = (k + UInt64(53)) << 52
             return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
-        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+        k == 1024 && return (small_part * 2.0) * 2.0^1023
     end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
@@ -460,7 +462,7 @@ function expm1(x::Float32)
     end
     x = Float64(x)
     N_float = round(x*Ln2INV(Float64))
-    N = unsafe_trunc(UInt64, N_float)
+    N = unsafe_trunc(Int64, N_float)
     r = muladd(N_float, Ln2(Float64), x)
     hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026,
                       0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124))
@@ -477,7 +479,7 @@ function expm1(x::Float16)
         return Float16(x*evalpoly(x, (1f0, .5f0, 0.16666628f0, 0.04166785f0, 0.008351848f0, 0.0013675707f0)))
     end
     N_float = round(x*Ln2INV(Float32))
-    N = unsafe_trunc(UInt32, N_float)
+    N = unsafe_trunc(Int32, N_float)
     r = muladd(N_float, Ln2(Float32), x)
     hi = evalpoly(r, (1f0, .5f0, 0.16666667f0, 0.041665863f0, 0.008333111f0, 0.0013981499f0, 0.00019983904f0))
     small_part = r*hi
diff --git a/base/special/log.jl b/base/special/log.jl
index 5d7f1c8118724..029394b7a63f1 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -155,14 +155,11 @@ logbU(::Type{Float64},::Val{10}) = 0.4342944819032518
 logbL(::Type{Float64},::Val{10}) = 1.098319650216765e-17
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float64` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
+@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi,lo = t_log_Float64[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi,lo = t_log_Float64[jp]
     l_hi = mf* 0.6931471805601177 + hi
     l_lo = mf*-1.7239444525614835e-13 + lo
 
@@ -216,14 +213,11 @@ end
 end
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float32` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
+@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0f0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi = t_log_Float32[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi = t_log_Float32[jp]
     l = mf*0.6931471805599453 + hi
 
     ## Step 3
@@ -260,14 +254,14 @@ end
     Float32(logb(Float32, base)*(u64 + q))
 end
 
-log2(x::Float32)  = _log(x, Val(2),  :log2)
-log(x::Float32)   = _log(x, Val(:ℯ), :log)
-log10(x::Float32) = _log(x, Val(10), :log10)
-log2(x::Float64)  = _log(x, Val(2),  :log2)
-log(x::Float64)   = _log(x, Val(:ℯ), :log)
-log10(x::Float64) = _log(x, Val(10), :log10)
+@noinline log2(x::Float32)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float32)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float32) = _log(x, Val(10), :log10)
+@noinline log2(x::Float64)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float64)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float64) = _log(x, Val(10), :log10)
 
-function _log(x::Float64, base, func)
+@inline function _log(x::Float64, base, func::Symbol)
     if x > 0.0
         x == Inf && return x
 
@@ -294,15 +288,15 @@ function _log(x::Float64, base, func)
 
         return log_proc1(y,mf,F,f,base)
     elseif x == 0.0
-        -Inf
+        return -Inf
     elseif isnan(x)
-        NaN
+        return NaN
     else
         throw_complex_domainerror(func, x)
     end
 end
 
-function _log(x::Float32, base, func)
+@inline function _log(x::Float32, base, func::Symbol)
     if x > 0f0
         x == Inf32 && return x
 
@@ -327,11 +321,11 @@ function _log(x::Float32, base, func)
         F = (y + 65536.0f0) - 65536.0f0 # 0x1p-7*round(0x1p7*y)
         f = y-F
 
-        log_proc1(y,mf,F,f,base)
+        return log_proc1(y,mf,F,f,base)
     elseif x == 0f0
-        -Inf32
+        return -Inf32
     elseif isnan(x)
-        NaN32
+        return NaN32
     else
         throw_complex_domainerror(func, x)
     end
@@ -562,17 +556,17 @@ end
 # Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c
 # Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed)
 # note that this isn't an exact translation as this version compacts the table to reduce cache pressure.
-function _log_ext(xu)
+function _log_ext(xu::UInt64)
     # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact.
     # The range is split into N subintervals.
     # The ith subinterval contains z and c is near the center of the interval.
     tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1
-    i = (tmp >> 45) & 127
     z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000))
     k = Float64(tmp >> 52)
     # log(x) = k*Ln2 + log(c) + log1p(z/c-1).
-    # getfield instead of getindex to satisfy effect analysis not knowing whether this is inbounds
-    t, logctail = getfield(t_log_table_compact, Int(i+1))
+    # N.B. :nothrow and :noub since `idx` is known to be `1 ≤ idx ≤ length(t_log_table_compact)`
+    idx = (tmp >> 45) & (length(t_log_table_compact)-1) + 1
+    t, logctail = Base.@assume_effects :nothrow :noub @inbounds t_log_table_compact[idx]
     invc, logc = log_tab_unpack(t)
     # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and
     # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representable.
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
index de5c4151df2d0..b0a17fdc25087 100644
--- a/base/special/rem_pio2.jl
+++ b/base/special/rem_pio2.jl
@@ -126,10 +126,7 @@ function fromfraction(f::Int128)
     return (z1,z2)
 end
 
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the accesses to `INV_2PI` are really safe here
-Base.@assume_effects :consistent function paynehanek(x::Float64)
+function paynehanek(x::Float64)
     # 1. Convert to form
     #
     #    x = X * 2^k,
@@ -168,15 +165,15 @@ Base.@assume_effects :consistent function paynehanek(x::Float64)
     idx = k >> 6
 
     shift = k - (idx << 6)
-    if shift == 0
-        @inbounds a1 = INV_2PI[idx+1]
-        @inbounds a2 = INV_2PI[idx+2]
-        @inbounds a3 = INV_2PI[idx+3]
+    Base.@assume_effects :nothrow :noub @inbounds if shift == 0
+        a1 = INV_2PI[idx+1]
+        a2 = INV_2PI[idx+2]
+        a3 = INV_2PI[idx+3]
     else
         # use shifts to extract the relevant 64 bit window
-        @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
-        @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
-        @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
+        a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
+        a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
+        a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
     end
 
     # 3. Perform the multiplication:
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 5b2a23688ca6b..66e4b46d7d489 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -165,11 +165,13 @@ end
 @noinline sincos_domain_error(x) = throw(DomainError(x, "sincos(x) is only defined for finite x."))
 
 """
-    sincos(x)
+    sincos(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute the sine and cosine of `x`, where `x` is in radians, returning
 a tuple `(sine, cosine)`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` if `isnan(x)`.
+
 See also [`cis`](@ref), [`sincospi`](@ref), [`sincosd`](@ref).
 """
 function sincos(x::T) where T<:Union{Float32, Float64}
@@ -783,22 +785,22 @@ end
 end
 
 """
-    sinpi(x)
+    sinpi(x::T) where T -> float(T)
 
 Compute ``\\sin(\\pi x)`` more accurately than `sin(pi*x)`, especially for large `x`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 See also [`sind`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
-function sinpi(_x::T) where T<:Union{IEEEFloat, Rational}
+function sinpi(_x::T) where T<:IEEEFloat
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`sinpi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return copysign(zero(T), _x)
-    end
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -816,20 +818,22 @@ function sinpi(_x::T) where T<:Union{IEEEFloat, Rational}
     return ifelse(signbit(_x), -res, res)
 end
 """
-    cospi(x)
+    cospi(x::T) where T -> float(T)
 
 Compute ``\\cos(\\pi x)`` more accurately than `cos(pi*x)`, especially for large `x`.
+
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
+See also: [`cispi`](@ref), [`sincosd`](@ref), [`cospi`](@ref).
 """
-function cospi(x::T) where T<:Union{IEEEFloat, Rational}
+function cospi(x::T) where T<:IEEEFloat
     x = abs(x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`cospi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return one(T)
-    end
+    x >= maxintfloat(T) && return one(T)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -846,26 +850,26 @@ function cospi(x::T) where T<:Union{IEEEFloat, Rational}
     end
 end
 """
-    sincospi(x)
+    sincospi(x::T) where T -> Tuple{float(T),float(T)}
 
 Simultaneously compute [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) (the sine and cosine of `π*x`,
 where `x` is in radians), returning a tuple `(sine, cosine)`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` tuple if `isnan(x)`.
+
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 
 See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
-function sincospi(_x::T) where T<:Union{IEEEFloat, Rational}
+function sincospi(_x::T) where T<:IEEEFloat
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x, x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`sincospi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all 1 or zero.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return (copysign(zero(T), _x), one(T))
-    end
+    x >= maxintfloat(T) && return (copysign(zero(T), _x), one(T))
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -886,29 +890,28 @@ function sincospi(_x::T) where T<:Union{IEEEFloat, Rational}
 end
 
 """
-    tanpi(x)
+    tanpi(x::T) where T -> float(T)
 
 Compute ``\\tan(\\pi x)`` more accurately than `tan(pi*x)`, especially for large `x`.
 
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
 !!! compat "Julia 1.10"
     This function requires at least Julia 1.10.
 
 See also [`tand`](@ref), [`sinpi`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
-
-function tanpi(_x::T) where T<:Union{IEEEFloat, Rational}
+function tanpi(_x::T) where T<:IEEEFloat
     # This is modified from sincospi.
     # Would it be faster or more accurate to make a tanpi_kernel?
     x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x
-        throw(DomainError(x, "`x` cannot be infinite."))
+        throw(DomainError(x, "`tanpi(x)` is only defined for finite `x`."))
     end
     # For large x, answers are all zero.
     # All integer values for floats larger than maxintfloat are even.
-    if T <: AbstractFloat
-        x >= maxintfloat(T) && return copysign(zero(T), _x)
-    end
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
 
     # reduce to interval [0, 0.5]
     n = round(2*x)
@@ -933,10 +936,10 @@ cospi(x::Integer) = isodd(x) ? -one(float(x)) : one(float(x))
 tanpi(x::Integer) = x >= 0 ? (isodd(x) ? -zero(float(x)) : zero(float(x))) :
                              (isodd(x) ? zero(float(x)) : -zero(float(x)))
 sincospi(x::Integer) = (sinpi(x), cospi(x))
-sinpi(x::Real) = sin(pi*x)
-cospi(x::Real) = cos(pi*x)
-sincospi(x::Real) = sincos(pi*x)
-tanpi(x::Real) = tan(pi*x)
+sinpi(x::AbstractFloat) = sin(pi*x)
+cospi(x::AbstractFloat) = cos(pi*x)
+sincospi(x::AbstractFloat) = sincos(pi*x)
+tanpi(x::AbstractFloat) = tan(pi*x)
 tanpi(x::Complex) = sinpi(x) / cospi(x) # Is there a better way to do this?
 
 function sinpi(z::Complex{T}) where T
@@ -1072,9 +1075,11 @@ isinf_real(x::Complex) = isinf(real(x)) && isfinite(imag(x))
 isinf_real(x::Number) = false
 
 """
-    sinc(x)
+    sinc(x::T) where {T <: Number} -> float(T)
+
+Compute normalized sinc function ``\\operatorname{sinc}(x) = \\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
 
-Compute ``\\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+Return a `T(NaN)` if `isnan(x)`.
 
 See also [`cosc`](@ref), its derivative.
 """
@@ -1089,10 +1094,14 @@ _sinc(x::Float16) = Float16(_sinc(Float32(x)))
 _sinc(x::ComplexF16) = ComplexF16(_sinc(ComplexF32(x)))
 
 """
-    cosc(x)
+    cosc(x::T) where {T <: Number} -> float(T)
 
 Compute ``\\cos(\\pi x) / x - \\sin(\\pi x) / (\\pi x^2)`` if ``x \\neq 0``, and ``0`` if
 ``x = 0``. This is the derivative of `sinc(x)`.
+
+Return a `T(NaN)` if `isnan(x)`.
+
+See also [`sinc`](@ref).
 """
 cosc(x::Number) = _cosc(float(x))
 function _cosc(x::Number)
@@ -1136,19 +1145,25 @@ for (finv, f, finvh, fh, finvd, fd, fn) in ((:sec, :cos, :sech, :cosh, :secd, :c
     dname = string(finvd)
     @eval begin
         @doc """
-            $($name)(x)
+            $($name)(x::T) where {T <: Number} -> float(T)
 
         Compute the $($fn) of `x`, where `x` is in radians.
+
+        Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
         """ ($finv)(z::Number) = inv(($f)(z))
         @doc """
-            $($hname)(x)
+            $($hname)(x::T) where {T <: Number} -> float(T)
 
         Compute the hyperbolic $($fn) of `x`.
+
+        Return a `T(NaN)` if `isnan(x)`.
         """ ($finvh)(z::Number) = inv(($fh)(z))
         @doc """
-            $($dname)(x)
+            $($dname)(x::T) where {T <: Number} -> float(T)
 
         Compute the $($fn) of `x`, where `x` is in degrees.
+
+        Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
         """ ($finvd)(z::Number) = inv(($fd)(z))
     end
 end
@@ -1160,11 +1175,15 @@ for (tfa, tfainv, hfa, hfainv, fn) in ((:asec, :acos, :asech, :acosh, "secant"),
     hname = string(hfa)
     @eval begin
         @doc """
-            $($tname)(x)
-        Compute the inverse $($fn) of `x`, where the output is in radians. """ ($tfa)(y::Number) = ($tfainv)(inv(y))
+            $($tname)(x::T) where {T <: Number} -> float(T)
+
+        Compute the inverse $($fn) of `x`, where the output is in radians.
+        """ ($tfa)(y::Number) = ($tfainv)(inv(y))
         @doc """
-            $($hname)(x)
-        Compute the inverse hyperbolic $($fn) of `x`. """ ($hfa)(y::Number) = ($hfainv)(inv(y))
+            $($hname)(x::T) where {T <: Number} -> float(T)
+
+        Compute the inverse hyperbolic $($fn) of `x`.
+        """ ($hfa)(y::Number) = ($hfainv)(inv(y))
     end
 end
 
@@ -1189,7 +1208,7 @@ deg2rad_ext(x::Real) = deg2rad(x) # Fallback
 
 function sind(x::Real)
     if isinf(x)
-        return throw(DomainError(x, "`x` cannot be infinite."))
+        return throw(DomainError(x, "`sind(x)` is only defined for finite `x`."))
     elseif isnan(x)
         return x
     end
@@ -1220,7 +1239,7 @@ end
 
 function cosd(x::Real)
     if isinf(x)
-        return throw(DomainError(x, "`x` cannot be infinite."))
+        return throw(DomainError(x, "`cosd(x)` is only defined for finite `x`."))
     elseif isnan(x)
         return x
     end
@@ -1247,9 +1266,12 @@ end
 tand(x::Real) = sind(x) / cosd(x)
 
 """
-    sincosd(x)
+    sincosd(x::T) where T -> Tuple{float(T),float(T)}
+
+Simultaneously compute the sine and cosine of `x`, where `x` is in degrees, returning
+a tuple `(sine, cosine)`.
 
-Simultaneously compute the sine and cosine of `x`, where `x` is in degrees.
+Throw a [`DomainError`](@ref) if `isinf(x)`, return a `(T(NaN), T(NaN))` tuple if `isnan(x)`.
 
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
@@ -1265,11 +1287,13 @@ for (fd, f, fn) in ((:sind, :sin, "sine"), (:cosd, :cos, "cosine"), (:tand, :tan
         name = string(fd)
         @eval begin
             @doc """
-                $($name)(x)
+                $($name)(x::T) where T -> float(T)
 
             Compute $($fn) of `x`, where `x` is in $($un).
             If `x` is a matrix, `x` needs to be a square matrix.
 
+            Throw a [`DomainError`](@ref) if `isinf(x)`, return a `T(NaN)` if `isnan(x)`.
+
             !!! compat "Julia 1.7"
                 Matrix arguments require Julia 1.7 or later.
             """ ($fd)(x) = ($f)(($fu).(x))
@@ -1297,11 +1321,15 @@ for (fd, f, fn) in ((:asind, :asin, "sine"), (:acosd, :acos, "cosine"),
 end
 
 """
-    atand(y)
-    atand(y,x)
+    atand(y::T) where T -> float(T)
+    atand(y::T, x::S) where {T,S} -> promote_type(T,S)
+    atand(y::AbstractMatrix{T}) where T -> AbstractMatrix{Complex{float(T)}}
 
 Compute the inverse tangent of `y` or `y/x`, respectively, where the output is in degrees.
 
+Return a `NaN` if `isnan(y)` or `isnan(x)`. The returned `NaN` is either a `T` in the single
+argument version, or a `promote_type(T,S)` in the two argument version.
+
 !!! compat "Julia 1.7"
     The one-argument method supports square matrix arguments as of Julia 1.7.
 """
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 9c942814eefad..01e8a3cf62e72 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -7,7 +7,8 @@ module StackTraces
 
 
 import Base: hash, ==, show
-import Core: CodeInfo, MethodInstance
+import Core: CodeInfo, MethodInstance, CodeInstance
+using Base.IRShow: normalize_method_name, append_scopes!, LineInfoNode
 
 export StackTrace, StackFrame, stacktrace
 
@@ -20,10 +21,10 @@ Stack information representing execution context, with the following fields:
 
   The name of the function containing the execution context.
 
-- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}`
+- `linfo::Union{Method, Core.MethodInstance, Core.CodeInstance, Core.CodeInfo, Nothing}`
 
-  The MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
+  The Method, MethodInstance, CodeInstance, or CodeInfo containing the execution context (if it could be found), \
+     or nothing (for example, if the inlining was a result of macro expansion).
 
 - `file::Symbol`
 
@@ -53,9 +54,9 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
     file::Symbol
     "the line number in the file containing the execution context"
     line::Int
-    "the MethodInstance or CodeInfo containing the execution context (if it could be found), \
-     or Module (for macro expansions)"
-    linfo::Union{MethodInstance, Method, Module, CodeInfo, Nothing}
+    "the CodeInstance or CodeInfo containing the execution context (if it could be found), \
+     or nothing (for example, if the inlining was a result of macro expansion)."
+    linfo::Union{Core.MethodInstance, Core.CodeInstance, Method, CodeInfo, Nothing}
     "true if the code is from C"
     from_c::Bool
     "true if the code is from an inlined frame"
@@ -97,87 +98,6 @@ function hash(frame::StackFrame, h::UInt)
     return h
 end
 
-get_inlinetable(::Any) = nothing
-function get_inlinetable(mi::MethodInstance)
-    isdefined(mi, :def) && mi.def isa Method && isdefined(mi, :cache) && isdefined(mi.cache, :inferred) &&
-        mi.cache.inferred !== nothing || return nothing
-    linetable = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), mi.def, mi.cache, mi.cache.inferred).linetable
-    return filter!(x -> x.inlined_at > 0, linetable)
-end
-
-get_method_instance_roots(::Any) = nothing
-function get_method_instance_roots(mi::Union{Method, MethodInstance})
-    m = mi isa MethodInstance ? mi.def : mi
-    m isa Method && isdefined(m, :roots) || return nothing
-    return filter(x -> x isa MethodInstance, m.roots)
-end
-
-function lookup_inline_frame_info(func::Symbol, file::Symbol, linenum::Int, inlinetable::Vector{Core.LineInfoNode})
-    #REPL frames and some base files lack this prefix while others have it; should fix?
-    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
-    linfo = nothing
-    #=
-    Some matching entries contain the MethodInstance directly.
-    Other matching entries contain only a Method or Symbol (function name); such entries
-    are located after the entry with the MethodInstance, so backtracking is required.
-    If backtracking fails, the Method or Module is stored for return, but we continue
-    the search in case a MethodInstance is found later.
-    TODO: If a backtrack has failed, do we need to backtrack again later if another Method
-    or Symbol match is found? Or can a limit on the subsequent backtracks be placed?
-    =#
-    for (i, line) in enumerate(inlinetable)
-        Base.IRShow.method_name(line) === func && line.file ∈ (file, filestripped) && line.line == linenum || continue
-        if line.method isa MethodInstance
-            linfo = line.method
-            break
-        elseif line.method isa Method || line.method isa Symbol
-            linfo = line.method isa Method ? line.method : line.module
-            # backtrack to find the matching MethodInstance, if possible
-            for j in (i - 1):-1:1
-                nextline = inlinetable[j]
-                nextline.inlined_at == line.inlined_at && Base.IRShow.method_name(line) === Base.IRShow.method_name(nextline) && line.file === nextline.file || break
-                if nextline.method isa MethodInstance
-                    linfo = nextline.method
-                    break
-                end
-            end
-        end
-    end
-    return linfo
-end
-
-function lookup_inline_frame_info(func::Symbol, file::Symbol, miroots::Vector{Any})
-    # REPL frames and some base files lack this prefix while others have it; should fix?
-    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
-    matches = filter(miroots) do x
-        x.def isa Method || return false
-        m = x.def::Method
-        return m.name == func && m.file ∈ (file, filestripped)
-    end
-    if length(matches) > 1
-        # ambiguous, check if method is same and return that instead
-        all_matched = true
-        for m in matches
-            all_matched = m.def.line == matches[1].def.line &&
-                m.def.module == matches[1].def.module
-            all_matched || break
-        end
-        if all_matched
-            return matches[1].def
-        end
-        # all else fails, return module if they match, or give up
-        all_matched = true
-        for m in matches
-            all_matched = m.def.module == matches[1].def.module
-            all_matched || break
-        end
-        return all_matched ? matches[1].def.module : nothing
-    elseif length(matches) == 1
-        return matches[1]
-    end
-    return nothing
-end
-
 """
     lookup(pointer::Ptr{Cvoid}) -> Vector{StackFrame}
 
@@ -189,25 +109,14 @@ Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     infos = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint), pointer, false)::Core.SimpleVector
     pointer = convert(UInt64, pointer)
     isempty(infos) && return [StackFrame(empty_sym, empty_sym, -1, nothing, true, false, pointer)] # this is equal to UNKNOWN
-    parent_linfo = infos[end][4]
-    inlinetable = get_inlinetable(parent_linfo)
-    miroots = inlinetable === nothing ? get_method_instance_roots(parent_linfo) : nothing # fallback if linetable missing
     res = Vector{StackFrame}(undef, length(infos))
-    for i in reverse(1:length(infos))
+    for i in 1:length(infos)
         info = infos[i]::Core.SimpleVector
         @assert(length(info) == 6)
         func = info[1]::Symbol
         file = info[2]::Symbol
         linenum = info[3]::Int
         linfo = info[4]
-        if i < length(infos)
-            if inlinetable !== nothing
-                linfo = lookup_inline_frame_info(func, file, linenum, inlinetable)
-            elseif miroots !== nothing
-                linfo = lookup_inline_frame_info(func, file, miroots)
-            end
-            linfo = linfo === nothing ? parentmodule(res[i + 1]) : linfo # e.g. `macro expansion`
-        end
         res[i] = StackFrame(func, file, linenum, linfo, info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
@@ -215,34 +124,56 @@ end
 
 const top_level_scope_sym = Symbol("top-level scope")
 
-function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
+function lookup(ip::Base.InterpreterIP)
     code = ip.code
     if code === nothing
         # interpreted top-level expression with no CodeInfo
         return [StackFrame(top_level_scope_sym, empty_sym, 0, nothing, false, false, 0)]
     end
-    codeinfo = (code isa MethodInstance ? code.uninferred : code)::CodeInfo
     # prepare approximate code info
     if code isa MethodInstance && (meth = code.def; meth isa Method)
         func = meth.name
         file = meth.file
         line = meth.line
+        codeinfo = meth.source
     else
         func = top_level_scope_sym
         file = empty_sym
         line = Int32(0)
+        if code isa Core.CodeInstance
+            codeinfo = code.inferred::CodeInfo
+            def = code.def
+            if isa(def, Core.ABIOverride)
+                def = def.def
+            end
+            if isa(def, MethodInstance) && isa(def.def, Method)
+                meth = def.def
+                func = meth.name
+                file = meth.file
+                line = meth.line
+            end
+        else
+            codeinfo = code::CodeInfo
+        end
     end
-    i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
-    if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
+    def = (code isa CodeInfo ? StackTraces : code) # Module just used as a token for top-level code
+    pc::Int = max(ip.stmt + 1, 0) # n.b. ip.stmt is 0-indexed
+    scopes = LineInfoNode[]
+    append_scopes!(scopes, pc, codeinfo.debuginfo, def)
+    if isempty(scopes)
         return [StackFrame(func, file, line, code, false, false, 0)]
     end
-    lineinfo = codeinfo.linetable[codeinfo.codelocs[i]]::Core.LineInfoNode
-    scopes = StackFrame[]
-    while true
-        inlined = lineinfo.inlined_at != 0
-        push!(scopes, StackFrame(Base.IRShow.method_name(lineinfo)::Symbol, lineinfo.file, lineinfo.line, inlined ? nothing : code, false, inlined, 0))
-        inlined || break
-        lineinfo = codeinfo.linetable[lineinfo.inlined_at]::Core.LineInfoNode
+    inlined = false
+    scopes = map(scopes) do lno
+        if inlined
+            def = lno.method
+            def isa Union{Method,Core.CodeInstance,MethodInstance} || (def = nothing)
+        else
+            def = codeinfo
+        end
+        sf = StackFrame(normalize_method_name(lno.method), lno.file, lno.line, def, false, inlined, 0)
+        inlined = true
+        return sf
     end
     return scopes
 end
@@ -254,7 +185,7 @@ Return a stack trace in the form of a vector of `StackFrame`s. (By default stack
 doesn't return C functions, but this can be enabled.) When called without specifying a
 trace, `stacktrace` first calls `backtrace`.
 """
-Base.@constprop :none function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Core.Compiler.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
+Base.@constprop :none function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
     stack = StackTrace()
     for ip in trace
         for frame in lookup(ip)
@@ -306,6 +237,23 @@ end
 
 is_top_level_frame(f::StackFrame) = f.linfo isa CodeInfo || (f.linfo === nothing && f.func === top_level_scope_sym)
 
+function frame_method_or_module(lkup::StackFrame)
+    code = lkup.linfo
+    code isa Method && return code
+    code isa Module && return code
+    mi = frame_mi(lkup)
+    mi isa MethodInstance || return nothing
+    return mi.def
+end
+
+function frame_mi(lkup::StackFrame)
+    code = lkup.linfo
+    code isa Core.CodeInstance && (code = code.def)
+    code isa Core.ABIOverride && (code = code.def)
+    code isa MethodInstance || return nothing
+    return code
+end
+
 function show_spec_linfo(io::IO, frame::StackFrame)
     linfo = frame.linfo
     if linfo === nothing
@@ -320,16 +268,18 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         print(io, "top-level scope")
     elseif linfo isa Module
         Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
-    elseif linfo isa MethodInstance
-        def = linfo.def
-        if def isa Module
-            Base.show_mi(io, linfo, #=from_stackframe=#true)
+    else
+        if linfo isa Union{MethodInstance, CodeInstance}
+            def = frame_method_or_module(frame)
+            if def isa Module
+                Base.show_mi(io, linfo, #=from_stackframe=#true)
+            else
+                show_spec_sig(io, def, frame_mi(frame).specTypes)
+            end
         else
-            show_spec_sig(io, def, linfo.specTypes)
+            m = linfo::Method
+            show_spec_sig(io, m, m.sig)
         end
-    else
-        m = linfo::Method
-        show_spec_sig(io, m, m.sig)
     end
 end
 
@@ -381,6 +331,12 @@ end
 
 function Base.parentmodule(frame::StackFrame)
     linfo = frame.linfo
+    if linfo isa CodeInstance
+        linfo = linfo.def
+        if isa(linfo, Core.ABIOverride)
+            linfo = linfo.def
+        end
+    end
     if linfo isa MethodInstance
         def = linfo.def
         if def isa Module
diff --git a/base/stat.jl b/base/stat.jl
index 81f9dcfd20191..fc2ac9a04b0bf 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -25,6 +25,37 @@ export
     stat,
     uperm
 
+"""
+    StatStruct
+
+A struct which stores information about a file. Usually
+constructed by calling [`stat`](@ref) on a path.
+
+This struct is used internally as the foundation of a number of utility
+functions. Some return specific parts of the information stored in it
+directly, such as [`filesize`](@ref), [`mtime`](@ref) and [`ctime`](@ref). Others add
+some logic on top using bit-manipulation, such as [`isfifo`](@ref), [`ischardev`](@ref), and [`issetuid`](@ref).
+
+The following fields of this struct are considered public API:
+
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
+
+See also: [`stat`](@ref)
+"""
 struct StatStruct
     desc    :: Union{String, OS_HANDLE} # for show method, not included in equality or hash
     device  :: UInt
@@ -39,10 +70,11 @@ struct StatStruct
     blocks  :: Int64
     mtime   :: Float64
     ctime   :: Float64
+    ioerrno :: Int32
 end
 
 @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
-  $(let ex = true
+    $(let ex = true
         for fld in fieldnames(StatStruct)[2:end]
             ex = :(getfield(x, $(QuoteNode(fld))) === getfield(y, $(QuoteNode(fld))) && $ex)
         end
@@ -50,28 +82,29 @@ end
     end)
 end
 @eval function Base.hash(obj::StatStruct, h::UInt)
-  $(quote
+    $(quote
         $(Any[:(h = hash(getfield(obj, $(QuoteNode(fld))), h)) for fld in fieldnames(StatStruct)[2:end]]...)
         return h
     end)
 end
 
-StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
-StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Base.UV_ENOENT)
+StatStruct(buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct("", buf, ioerrno)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct(
     desc isa OS_HANDLE ? desc : String(desc),
-    ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno
 )
 
 function iso_datetime_with_relative(t, tnow)
@@ -106,35 +139,41 @@ end
 function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
     print(io, oneline ? "StatStruct(" : "StatStruct for ")
     show(io, st.desc)
-    oneline || print(io, "\n  ")
-    print(io, " size: ", st.size, " bytes")
-    oneline || print(io, "\n")
-    print(io, " device: ", st.device)
-    oneline || print(io, "\n ")
-    print(io, " inode: ", st.inode)
-    oneline || print(io, "\n  ")
-    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
-    oneline || print(io, "\n ")
-    print(io, " nlink: ", st.nlink)
-    oneline || print(io, "\n   ")
-    print(io, " uid: $(st.uid)")
-    username = getusername(st.uid)
-    username === nothing || print(io, " (", username, ")")
-    oneline || print(io, "\n   ")
-    print(io, " gid: ", st.gid)
-    groupname = getgroupname(st.gid)
-    groupname === nothing || print(io, " (", groupname, ")")
-    oneline || print(io, "\n  ")
-    print(io, " rdev: ", st.rdev)
-    oneline || print(io, "\n ")
-    print(io, " blksz: ", st.blksize)
-    oneline || print(io, "\n")
-    print(io, " blocks: ", st.blocks)
-    tnow = round(UInt, time())
-    oneline || print(io, "\n ")
-    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
-    oneline || print(io, "\n ")
-    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    code = st.ioerrno
+    if code != 0
+        print(io, oneline ? " " : "\n ")
+        print(io, Base.uverrorname(code), ": ", Base.struverror(code))
+    else
+        oneline || print(io, "\n  ")
+        print(io, " size: ", st.size, " bytes")
+        oneline || print(io, "\n")
+        print(io, " device: ", st.device)
+        oneline || print(io, "\n ")
+        print(io, " inode: ", st.inode)
+        oneline || print(io, "\n  ")
+        print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+        oneline || print(io, "\n ")
+        print(io, " nlink: ", st.nlink)
+        oneline || print(io, "\n   ")
+        print(io, " uid: $(st.uid)")
+        username = getusername(st.uid)
+        username === nothing || print(io, " (", username, ")")
+        oneline || print(io, "\n   ")
+        print(io, " gid: ", st.gid)
+        groupname = getgroupname(st.gid)
+        groupname === nothing || print(io, " (", groupname, ")")
+        oneline || print(io, "\n  ")
+        print(io, " rdev: ", st.rdev)
+        oneline || print(io, "\n ")
+        print(io, " blksz: ", st.blksize)
+        oneline || print(io, "\n")
+        print(io, " blocks: ", st.blocks)
+        tnow = round(UInt, time())
+        oneline || print(io, "\n ")
+        print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+        oneline || print(io, "\n ")
+        print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    end
     oneline && print(io, ")")
     return nothing
 end
@@ -144,62 +183,68 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false
 
 # stat & lstat functions
 
+checkstat(s::StatStruct) = Int(s.ioerrno) in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) ? s : uv_error(string("stat(", repr(s.desc), ")"), s.ioerrno)
+
 macro stat_call(sym, arg1type, arg)
     return quote
-        stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ())))
+        stat_buf = fill!(Memory{UInt8}(undef, Int(ccall(:jl_sizeof_stat, Int32, ()))), 0x00)
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
-        if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
-        end
-        st = StatStruct($(esc(arg)), stat_buf)
-        if ispath(st) != (r == 0)
-            error("stat returned zero type for a valid path")
-        end
-        return st
+        return checkstat(StatStruct($(esc(arg)), stat_buf, r))
     end
 end
 
 stat(fd::OS_HANDLE)         = @stat_call jl_fstat OS_HANDLE fd
-stat(path::AbstractString)  = @stat_call jl_stat  Cstring path
-lstat(path::AbstractString) = @stat_call jl_lstat Cstring path
+function stat(path::AbstractString)
+    # @info "stat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_stat  Cstring path
+end
+function lstat(path::AbstractString)
+    # @info "lstat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_lstat Cstring path
+end
 if RawFD !== OS_HANDLE
     global stat(fd::RawFD)  = stat(Libc._get_osfhandle(fd))
 end
-stat(fd::Integer)           = stat(RawFD(fd))
 
 """
-    stat(file)
+    stat(path)
+    stat(path_elements...)
 
 Return a structure whose fields contain information about the file.
-The fields of the structure are:
+If multiple arguments are given, they are joined by [`joinpath`](@ref).
 
-| Name    | Description                                                        |
-|:--------|:-------------------------------------------------------------------|
-| desc    | The path or OS file descriptor                                     |
-| size    | The size (in bytes) of the file                                    |
-| device  | ID of the device that contains the file                            |
-| inode   | The inode number of the file                                       |
-| mode    | The protection mode of the file                                    |
-| nlink   | The number of hard links to the file                               |
-| uid     | The user id of the owner of the file                               |
-| gid     | The group id of the file owner                                     |
-| rdev    | If this file refers to a device, the ID of the device it refers to |
-| blksize | The file-system preferred block size for the file                  |
-| blocks  | The number of such blocks allocated                                |
-| mtime   | Unix timestamp of when the file was last modified                  |
-| ctime   | Unix timestamp of when the file's metadata was changed             |
+The fields of the structure are:
 
-"""
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
+"""
+stat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("stat not implemented for $(typeof(path))") : stat(path2))
 stat(path...) = stat(joinpath(path...))
 
 """
-    lstat(file)
+    lstat(path)
+    lstat(path_elements...)
 
-Like [`stat`](@ref), but for symbolic links gets the info for the link
-itself rather than the file it refers to.
-This function must be called on a file path rather than a file object or a file
-descriptor.
+Like [`stat`](@ref), but for symbolic links gets the info
+for the link itself rather than the file it refers to.
+
+This function must be called on a file path rather
+than a file object or a file descriptor.
 """
+lstat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("lstat not implemented for $(typeof(path))") : lstat(path2))
 lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
@@ -250,9 +295,14 @@ const filemode_table = (
 )
 
 """
-    filemode(file)
+    filemode(path)
+    filemode(path_elements...)
+    filemode(stat_struct)
+
+Return the mode of the file located at `path`,
+or the mode indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).mode`.
+Equivalent to `stat(path).mode` or `stat_struct.mode`.
 """
 filemode(st::StatStruct) = st.mode
 filemode_string(st::StatStruct) = filemode_string(st.mode)
@@ -273,23 +323,38 @@ function filemode_string(mode)
 end
 
 """
-    filesize(path...)
+    filesize(path)
+    filesize(path_elements...)
+    filesize(stat_struct)
 
-Equivalent to `stat(file).size`.
+Return the size of the file located at `path`,
+or the size indicated by file descriptor `stat_struct`.
+
+Equivalent to `stat(path).size` or `stat_struct.size`.
 """
 filesize(st::StatStruct) = st.size
 
 """
-    mtime(file)
+    mtime(path)
+    mtime(path_elements...)
+    mtime(stat_struct)
+
+Return the unix timestamp of when the file at `path` was last modified,
+or the last modified timestamp indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).mtime`.
+Equivalent to `stat(path).mtime` or `stat_struct.mtime`.
 """
 mtime(st::StatStruct) = st.mtime
 
 """
-    ctime(file)
+    ctime(path)
+    ctime(path_elements...)
+    ctime(stat_struct)
+
+Return the unix timestamp of when the metadata of the file at `path` was last modified,
+or the last modified metadata timestamp indicated by the file descriptor `stat_struct`.
 
-Equivalent to `stat(file).ctime`.
+Equivalent to `stat(path).ctime` or `stat_struct.ctime`.
 """
 ctime(st::StatStruct) = st.ctime
 
@@ -297,31 +362,48 @@ ctime(st::StatStruct) = st.ctime
 
 """
     ispath(path) -> Bool
+    ispath(path_elements...) -> Bool
 
 Return `true` if a valid filesystem entity exists at `path`,
 otherwise returns `false`.
+
 This is the generalization of [`isfile`](@ref), [`isdir`](@ref) etc.
 """
-ispath(st::StatStruct) = filemode(st) & 0xf000 != 0x0000
+ispath(st::StatStruct) = st.ioerrno == 0
+function ispath(path::String)
+    # We use `access()` and `F_OK` to determine if a given path exists. `F_OK` comes from `unistd.h`.
+    F_OK = 0x00
+    r = ccall(:jl_fs_access, Cint, (Cstring, Cint), path, F_OK)
+    if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
+        uv_error(string("ispath(", repr(path), ")"), r)
+    end
+    return r == 0
+end
+ispath(path::AbstractString) = ispath(String(path))
 
 """
     isfifo(path) -> Bool
+    isfifo(path_elements...) -> Bool
+    isfifo(stat_struct) -> Bool
 
-Return `true` if `path` is a FIFO, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` is FIFO, `false` otherwise.
 """
 isfifo(st::StatStruct) = filemode(st) & 0xf000 == 0x1000
 
 """
     ischardev(path) -> Bool
+    ischardev(path_elements...) -> Bool
+    ischardev(stat_struct) -> Bool
 
-Return `true` if `path` is a character device, `false` otherwise.
+Return `true` if the path `path` or file descriptor `stat_struct` refer to a character device, `false` otherwise.
 """
 ischardev(st::StatStruct) = filemode(st) & 0xf000 == 0x2000
 
 """
     isdir(path) -> Bool
+    isdir(path_elements...) -> Bool
 
-Return `true` if `path` is a directory, `false` otherwise.
+Return `true` if `path` points to a directory, `false` otherwise.
 
 # Examples
 ```jldoctest
@@ -338,15 +420,18 @@ isdir(st::StatStruct) = filemode(st) & 0xf000 == 0x4000
 
 """
     isblockdev(path) -> Bool
+    isblockdev(path_elements...) -> Bool
+    isblockdev(stat_struct) -> Bool
 
-Return `true` if `path` is a block device, `false` otherwise.
+Return `true` if the path `path` or file descriptor `stat_struct` refer to a block device, `false` otherwise.
 """
 isblockdev(st::StatStruct) = filemode(st) & 0xf000 == 0x6000
 
 """
     isfile(path) -> Bool
+    isfile(path_elements...) -> Bool
 
-Return `true` if `path` is a regular file, `false` otherwise.
+Return `true` if `path` points to a regular file, `false` otherwise.
 
 # Examples
 ```jldoctest
@@ -372,15 +457,17 @@ isfile(st::StatStruct) = filemode(st) & 0xf000 == 0x8000
 
 """
     islink(path) -> Bool
+    islink(path_elements...) -> Bool
 
-Return `true` if `path` is a symbolic link, `false` otherwise.
+Return `true` if `path` points to a symbolic link, `false` otherwise.
 """
 islink(st::StatStruct) = filemode(st) & 0xf000 == 0xa000
 
 """
     issocket(path) -> Bool
+    issocket(path_elements...) -> Bool
 
-Return `true` if `path` is a socket, `false` otherwise.
+Return `true` if `path` points to a socket, `false` otherwise.
 """
 issocket(st::StatStruct) = filemode(st) & 0xf000 == 0xc000
 
@@ -388,29 +475,37 @@ issocket(st::StatStruct) = filemode(st) & 0xf000 == 0xc000
 
 """
     issetuid(path) -> Bool
+    issetuid(path_elements...) -> Bool
+    issetuid(stat_struct) -> Bool
 
-Return `true` if `path` has the setuid flag set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the setuid flag set, `false` otherwise.
 """
 issetuid(st::StatStruct) = (filemode(st) & 0o4000) > 0
 
 """
     issetgid(path) -> Bool
+    issetgid(path_elements...) -> Bool
+    issetgid(stat_struct) -> Bool
 
-Return `true` if `path` has the setgid flag set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the setgid flag set, `false` otherwise.
 """
 issetgid(st::StatStruct) = (filemode(st) & 0o2000) > 0
 
 """
     issticky(path) -> Bool
+    issticky(path_elements...) -> Bool
+    issticky(stat_struct) -> Bool
 
-Return `true` if `path` has the sticky bit set, `false` otherwise.
+Return `true` if the file at `path` or file descriptor `stat_struct` have the sticky bit set, `false` otherwise.
 """
 issticky(st::StatStruct) = (filemode(st) & 0o1000) > 0
 
 """
-    uperm(file)
+    uperm(path)
+    uperm(path_elements...)
+    uperm(stat_struct)
 
-Get the permissions of the owner of the file as a bitfield of
+Return a bitfield of the owner permissions for the file at `path` or file descriptor `stat_struct`.
 
 | Value | Description        |
 |:------|:-------------------|
@@ -418,22 +513,52 @@ Get the permissions of the owner of the file as a bitfield of
 | 02    | Write Permission   |
 | 04    | Read Permission    |
 
-For allowed arguments, see [`stat`](@ref).
+The fact that a bitfield is returned means that if the permission
+is read+write, the bitfield is "110", which maps to the decimal
+value of 0+2+4=6. This is reflected in the printing of the
+returned `UInt8` value.
+
+See also [`gperm`](@ref) and [`operm`](@ref).
+
+```jldoctest
+julia> touch("dummy_file");  # Create test-file without contents
+
+julia> uperm("dummy_file")
+0x06
+
+julia> bitstring(ans)
+"00000110"
+
+julia> has_read_permission(path) = uperm(path) & 0b00000100 != 0;  # Use bit mask to check specific bit
+
+julia> has_read_permission("dummy_file")
+true
+
+julia> rm("dummy_file")     # Clean up test-file
+```
 """
 uperm(st::StatStruct) = UInt8((filemode(st) >> 6) & 0x7)
 
 """
-    gperm(file)
+    gperm(path)
+    gperm(path_elements...)
+    gperm(stat_struct)
 
 Like [`uperm`](@ref) but gets the permissions of the group owning the file.
+
+See also [`operm`](@ref).
 """
 gperm(st::StatStruct) = UInt8((filemode(st) >> 3) & 0x7)
 
 """
-    operm(file)
+    operm(path)
+    operm(path_elements...)
+    operm(stat_struct)
+
+Like [`uperm`](@ref) but gets the permissions for people who neither own the
+file nor are a member of the group owning the file.
 
-Like [`uperm`](@ref) but gets the permissions for people who neither own the file nor are a member of
-the group owning the file
+See also [`gperm`](@ref).
 """
 operm(st::StatStruct) = UInt8((filemode(st)     ) & 0x7)
 
@@ -469,7 +594,7 @@ function samefile(a::StatStruct, b::StatStruct)
 end
 
 """
-    samefile(path_a::AbstractString, path_b::AbstractString)
+    samefile(path_a, path_b)
 
 Check if the paths `path_a` and `path_b` refer to the same existing file or directory.
 """
@@ -477,6 +602,7 @@ samefile(a::AbstractString, b::AbstractString) = samefile(stat(a), stat(b))
 
 """
     ismount(path) -> Bool
+    ismount(path_elements...) -> Bool
 
 Return `true` if `path` is a mount point, `false` otherwise.
 """
diff --git a/base/staticdata.jl b/base/staticdata.jl
new file mode 100644
index 0000000000000..79d81788cc16a
--- /dev/null
+++ b/base/staticdata.jl
@@ -0,0 +1,296 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module StaticData
+
+using Core: CodeInstance, MethodInstance
+using Base: get_world_counter
+
+const WORLD_AGE_REVALIDATION_SENTINEL::UInt = 1
+const _jl_debug_method_invalidation = Ref{Union{Nothing,Vector{Any}}}(nothing)
+debug_method_invalidation(onoff::Bool) =
+    _jl_debug_method_invalidation[] = onoff ? Any[] : nothing
+
+function get_ci_mi(codeinst::CodeInstance)
+    def = codeinst.def
+    if def isa Core.ABIOverride
+        return def.def
+    else
+        return def::MethodInstance
+    end
+end
+
+# Restore backedges to external targets
+# `edges` = [caller1, ...], the list of worklist-owned code instances internally
+# `ext_ci_list` = [caller1, ...], the list of worklist-owned code instances externally
+function insert_backedges(edges::Vector{Any}, ext_ci_list::Union{Nothing,Vector{Any}})
+    # determine which CodeInstance objects are still valid in our image
+    # to enable any applicable new codes
+    stack = CodeInstance[]
+    visiting = IdDict{CodeInstance,Int}()
+    _insert_backedges(edges, stack, visiting)
+    if ext_ci_list !== nothing
+        _insert_backedges(ext_ci_list, stack, visiting, #=external=#true)
+    end
+end
+
+function _insert_backedges(edges::Vector{Any}, stack::Vector{CodeInstance}, visiting::IdDict{CodeInstance,Int}, external::Bool=false)
+    for i = 1:length(edges)
+        codeinst = edges[i]::CodeInstance
+        verify_method_graph(codeinst, stack, visiting)
+        minvalid = codeinst.min_world
+        maxvalid = codeinst.max_world
+        if maxvalid ≥ minvalid
+            if get_world_counter() == maxvalid
+                # if this callee is still valid, add all the backedges
+                Base.Compiler.store_backedges(codeinst, codeinst.edges)
+            end
+            if get_world_counter() == maxvalid
+                maxvalid = typemax(UInt)
+                @atomic :monotonic codeinst.max_world = maxvalid
+            end
+            if external
+                caller = get_ci_mi(codeinst)
+                @assert isdefined(codeinst, :inferred) # See #53586, #53109
+                inferred = @ccall jl_rettype_inferred(
+                    codeinst.owner::Any, caller::Any, minvalid::UInt, maxvalid::UInt)::Any
+                if inferred !== nothing
+                    # We already got a code instance for this world age range from
+                    # somewhere else - we don't need this one.
+                else
+                    @ccall jl_mi_cache_insert(caller::Any, codeinst::Any)::Cvoid
+                end
+            end
+        end
+    end
+end
+
+function verify_method_graph(codeinst::CodeInstance, stack::Vector{CodeInstance}, visiting::IdDict{CodeInstance,Int})
+    @assert isempty(stack); @assert isempty(visiting);
+    child_cycle, minworld, maxworld = verify_method(codeinst, stack, visiting)
+    @assert child_cycle == 0
+    @assert isempty(stack); @assert isempty(visiting);
+    nothing
+end
+
+# Test all edges relevant to a method:
+# - Visit the entire call graph, starting from edges[idx] to determine if that method is valid
+# - Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+#   and slightly modified with an early termination option once the computation reaches its minimum
+function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visiting::IdDict{CodeInstance,Int})
+    world = codeinst.min_world
+    let max_valid2 = codeinst.max_world
+        if max_valid2 ≠ WORLD_AGE_REVALIDATION_SENTINEL
+            return 0, world, max_valid2
+        end
+    end
+    current_world = get_world_counter()
+    local minworld::UInt, maxworld::UInt = 1, current_world
+    @assert get_ci_mi(codeinst).def isa Method
+    if haskey(visiting, codeinst)
+        return visiting[codeinst], minworld, maxworld
+    end
+    push!(stack, codeinst)
+    depth = length(stack)
+    visiting[codeinst] = depth
+    # TODO JL_TIMING(VERIFY_IMAGE, VERIFY_Methods)
+    callees = codeinst.edges
+    # verify current edges
+    if isempty(callees)
+        # quick return: no edges to verify (though we probably shouldn't have gotten here from WORLD_AGE_REVALIDATION_SENTINEL)
+    elseif maxworld == unsafe_load(cglobal(:jl_require_world, UInt))
+        # if no new worlds were allocated since serializing the base module, then no new validation is worth doing right now either
+        minworld = maxworld
+    else
+        j = 1
+        while j ≤ length(callees)
+            local min_valid2::UInt, max_valid2::UInt
+            edge = callees[j]
+            @assert !(edge isa Method) # `Method`-edge isn't allowed for the optimized one-edge format
+            if edge isa Core.BindingPartition
+                j += 1
+                continue
+            end
+            if edge isa CodeInstance
+                edge = get_ci_mi(edge)
+            end
+            if edge isa MethodInstance
+                sig = typeintersect((edge.def::Method).sig, edge.specTypes) # TODO??
+                min_valid2, max_valid2, matches = verify_call(sig, callees, j, 1, world)
+                j += 1
+            elseif edge isa Int
+                sig = callees[j+1]
+                min_valid2, max_valid2, matches = verify_call(sig, callees, j+2, edge, world)
+                j += 2 + edge
+                edge = sig
+            else
+                callee = callees[j+1]
+                if callee isa Core.MethodTable # skip the legacy edge (missing backedge)
+                    j += 2
+                    continue
+                end
+                if callee isa CodeInstance
+                    callee = get_ci_mi(callee)
+                end
+                if callee isa MethodInstance
+                    meth = callee.def::Method
+                else
+                    meth = callee::Method
+                end
+                min_valid2, max_valid2 = verify_invokesig(edge, meth, world)
+                matches = nothing
+                j += 2
+            end
+            if minworld < min_valid2
+                minworld = min_valid2
+            end
+            if maxworld > max_valid2
+                maxworld = max_valid2
+            end
+            invalidations = _jl_debug_method_invalidation[]
+            if max_valid2 ≠ typemax(UInt) && invalidations !== nothing
+                push!(invalidations, edge, "insert_backedges_callee", codeinst, matches)
+            end
+            if max_valid2 == 0 && invalidations === nothing
+                break
+            end
+        end
+    end
+    # verify recursive edges (if valid, or debugging)
+    cycle = depth
+    cause = codeinst
+    if maxworld ≠ 0 || _jl_debug_method_invalidation[] !== nothing
+        for j = 1:length(callees)
+            edge = callees[j]
+            if !(edge isa CodeInstance)
+                continue
+            end
+            callee = edge
+            local min_valid2::UInt, max_valid2::UInt
+            child_cycle, min_valid2, max_valid2 = verify_method(callee, stack, visiting)
+            if minworld < min_valid2
+                minworld = min_valid2
+            end
+            if minworld > max_valid2
+                max_valid2 = 0
+            end
+            if maxworld > max_valid2
+                cause = callee
+                maxworld = max_valid2
+            end
+            if max_valid2 == 0
+                # found what we were looking for, so terminate early
+                break
+            elseif child_cycle ≠ 0 && child_cycle < cycle
+                # record the cycle will resolve at depth "cycle"
+                cycle = child_cycle
+            end
+        end
+    end
+    if maxworld ≠ 0 && cycle ≠ depth
+        return cycle, minworld, maxworld
+    end
+    # If we are the top of the current cycle, now mark all other parts of
+    # our cycle with what we found.
+    # Or if we found a failed edge, also mark all of the other parts of the
+    # cycle as also having a failed edge.
+    while length(stack) ≥ depth
+        child = pop!(stack)
+        if maxworld ≠ 0
+            @atomic  :monotonic child.min_world = minworld
+        end
+        @atomic :monotonic child.max_world = maxworld
+        @assert visiting[child] == length(stack) + 1
+        delete!(visiting, child)
+        invalidations = _jl_debug_method_invalidation[]
+        if invalidations !== nothing && maxworld < current_world
+            push!(invalidations, child, "verify_methods", cause)
+        end
+    end
+    return 0, minworld, maxworld
+end
+
+function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n::Int, world::UInt)
+    # verify that these edges intersect with the same methods as before
+    lim = _jl_debug_method_invalidation[] !== nothing ? Int(typemax(Int32)) : n
+    minworld = Ref{UInt}(1)
+    maxworld = Ref{UInt}(typemax(UInt))
+    has_ambig = Ref{Int32}(0)
+    result = Base._methods_by_ftype(sig, nothing, lim, world, #=ambig=#false, minworld, maxworld, has_ambig)
+    if result === nothing
+        maxworld[] = 0
+    else
+        # setdiff!(result, expected)
+        if length(result) ≠ n
+            maxworld[] = 0
+        end
+        ins = 0
+        for k = 1:length(result)
+            match = result[k]::Core.MethodMatch
+            local found = false
+            for j = 1:n
+                t = expecteds[i+j-1]
+                if t isa Method
+                    meth = t
+                else
+                    if t isa CodeInstance
+                        t = get_ci_mi(t)
+                    else
+                        t = t::MethodInstance
+                    end
+                    meth = t.def::Method
+                end
+                if match.method == meth
+                    found = true
+                    break
+                end
+            end
+            if !found
+                # intersection has a new method or a method was
+                # deleted--this is now probably no good, just invalidate
+                # everything about it now
+                maxworld[] = 0
+                if _jl_debug_method_invalidation[] === nothing
+                    break
+                end
+                ins += 1
+                result[ins] = match.method
+            end
+        end
+        if maxworld[] ≠ typemax(UInt) && _jl_debug_method_invalidation[] !== nothing
+            resize!(result, ins)
+        end
+    end
+    return minworld[], maxworld[], result
+end
+
+function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UInt)
+    @assert invokesig isa Type
+    local minworld::UInt, maxworld::UInt
+    if invokesig === expected.sig
+        # the invoke match is `expected` for `expected->sig`, unless `expected` is invalid
+        minworld = expected.primary_world
+        maxworld = expected.deleted_world
+        @assert minworld ≤ world
+        if maxworld < world
+            maxworld = 0
+        end
+    else
+        minworld = 1
+        maxworld = typemax(UInt)
+        mt = Base.get_methodtable(expected)
+        if mt === nothing
+            maxworld = 0
+        else
+            matched, valid_worlds = Base.Compiler._findsup(invokesig, mt, world)
+            minworld, maxworld = valid_worlds.min_world, valid_worlds.max_world
+            if matched === nothing
+                maxworld = 0
+            elseif matched.method != expected
+                maxworld = 0
+            end
+        end
+    end
+    return minworld, maxworld
+end
+
+end # module StaticData
diff --git a/base/stream.jl b/base/stream.jl
index 0b6c9a93777f6..e81f65685df72 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -76,7 +76,7 @@ function getproperty(stream::LibuvStream, name::Symbol)
 end
 
 # IO
-# +- GenericIOBuffer{T<:AbstractArray{UInt8,1}} (not exported)
+# +- GenericIOBuffer{T<:AbstractVector{UInt8}} (not exported)
 # +- AbstractPipe (not exported)
 # .  +- Pipe
 # .  +- Process (not exported)
@@ -89,7 +89,7 @@ end
 # .  +- TTY (not exported)
 # .  +- UDPSocket
 # .  +- BufferStream (FIXME: 2.0)
-# +- IOBuffer = Base.GenericIOBuffer{Array{UInt8,1}}
+# +- IOBuffer = Base.GenericIOBuffer{Vector{UInt8}}
 # +- IOStream
 
 # IOServer
@@ -122,7 +122,7 @@ const DEFAULT_READ_BUFFER_SZ = 10485760 # 10 MB
 if Sys.iswindows()
     const MAX_OS_WRITE = UInt(0x1FF0_0000) # 511 MB (determined semi-empirically, limited to 31 MB on XP)
 else
-    const MAX_OS_WRITE = UInt(typemax(Csize_t))
+    const MAX_OS_WRITE = UInt(0x7FFF_0000) # almost 2 GB (both macOS and linux have this kernel restriction, although only macOS documents it)
 end
 
 
@@ -304,7 +304,7 @@ function init_stdio(handle::Ptr{Cvoid})
     elseif t == UV_TTY
         io = TTY(handle, StatusOpen)
     elseif t == UV_TCP
-        Sockets = require(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
+        Sockets = require_stdlib(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
         io = Sockets.TCPSocket(handle, StatusOpen)
     elseif t == UV_NAMED_PIPE
         io = PipeEndpoint(handle, StatusOpen)
@@ -341,7 +341,7 @@ function open(h::OS_HANDLE)
     elseif t == UV_TTY
         io = TTY(h)
     elseif t == UV_TCP
-        Sockets = require(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
+        Sockets = require_stdlib(PkgId(UUID((0x6462fe0b_24de_5631, 0x8697_dd941f90decc)), "Sockets"))
         io = Sockets.TCPSocket(h)
     elseif t == UV_NAMED_PIPE
         io = PipeEndpoint(h)
@@ -436,7 +436,10 @@ end
 
 function closewrite(s::LibuvStream)
     iolock_begin()
-    check_open(s)
+    if !iswritable(s)
+        iolock_end()
+        return
+    end
     req = Libc.malloc(_sizeof_uv_shutdown)
     uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
     err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
@@ -450,14 +453,16 @@ function closewrite(s::LibuvStream)
     sigatomic_begin()
     uv_req_set_data(req, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -564,8 +569,14 @@ displaysize(io::IO) = displaysize()
 displaysize() = (parse(Int, get(ENV, "LINES",   "24")),
                  parse(Int, get(ENV, "COLUMNS", "80")))::Tuple{Int, Int}
 
+# This is a fancy way to make de-specialize a call to `displaysize(io::IO)`
+# which is unfortunately invalidated by REPL
+#  (https://github.com/JuliaLang/julia/issues/56080)
+#
+# This makes the call less efficient, but avoids being invalidated by REPL.
+displaysize_(io::IO) = Base.invoke_in_world(Base.tls_world_age(), displaysize, io)::Tuple{Int,Int}
+
 function displaysize(io::TTY)
-    # A workaround for #34620 and #26687 (this still has the TOCTOU problem).
     check_open(io)
 
     local h::Int, w::Int
@@ -588,6 +599,7 @@ function displaysize(io::TTY)
     s1 = Ref{Int32}(0)
     s2 = Ref{Int32}(0)
     iolock_begin()
+    check_open(io)
     Base.uv_error("size (TTY)", ccall(:uv_tty_get_winsize,
                                       Int32, (Ptr{Cvoid}, Ptr{Int32}, Ptr{Int32}),
                                       io, s1, s2) != 0)
@@ -605,7 +617,7 @@ end
 function alloc_request(buffer::IOBuffer, recommended_size::UInt)
     ensureroom(buffer, Int(recommended_size))
     ptr = buffer.append ? buffer.size + 1 : buffer.ptr
-    nb = min(length(buffer.data), buffer.maxsize) - ptr + 1
+    nb = min(length(buffer.data)-buffer.offset, buffer.maxsize) + buffer.offset - ptr + 1
     return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb)
 end
 
@@ -616,6 +628,7 @@ function notify_filled(buffer::IOBuffer, nread::Int)
         buffer.size += nread
     else
         buffer.ptr += nread
+        buffer.size = max(buffer.size, buffer.ptr - 1)
     end
     nothing
 end
@@ -740,24 +753,42 @@ mutable struct Pipe <: AbstractPipe
 end
 
 """
-Construct an uninitialized Pipe object.
+    Pipe()
 
-The appropriate end of the pipe will be automatically initialized if
-the object is used in process spawning. This can be useful to easily
-obtain references in process pipelines, e.g.:
+Construct an uninitialized Pipe object, especially for IO communication between multiple processes.
+
+The appropriate end of the pipe will be automatically initialized if the object is used in
+process spawning. This can be useful to easily obtain references in process pipelines, e.g.:
 
 ```
 julia> err = Pipe()
 
 # After this `err` will be initialized and you may read `foo`'s
-# stderr from the `err` pipe.
+# stderr from the `err` pipe, or pass `err` to other pipelines.
 julia> run(pipeline(pipeline(`foo`, stderr=err), `cat`), wait=false)
+
+# Now destroy the write half of the pipe, so that the read half will get EOF
+julia> closewrite(err)
+
+julia> read(err, String)
+"stderr messages"
 ```
+
+See also [`Base.link_pipe!`](@ref).
 """
 Pipe() = Pipe(PipeEndpoint(), PipeEndpoint())
 pipe_reader(p::Pipe) = p.out
 pipe_writer(p::Pipe) = p.in
 
+"""
+    link_pipe!(pipe; reader_supports_async=false, writer_supports_async=false)
+
+Initialize `pipe` and link the `in` endpoint to the `out` endpoint. The keyword
+arguments `reader_supports_async`/`writer_supports_async` correspond to
+`OVERLAPPED` on Windows and `O_NONBLOCK` on POSIX systems. They should be `true`
+unless they'll be used by an external program (e.g. the output of a command
+executed with [`run`](@ref)).
+"""
 function link_pipe!(pipe::Pipe;
                     reader_supports_async = false,
                     writer_supports_async = false)
@@ -910,8 +941,9 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
     if bytesavailable(sbuf) >= nb
         nread = readbytes!(sbuf, a, nb)
     else
+        initsize = length(a)
         newbuf = PipeBuffer(a, maxsize=nb)
-        newbuf.size = 0 # reset the write pointer to the beginning
+        newbuf.size = newbuf.offset # reset the write pointer to the beginning
         nread = try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -920,7 +952,8 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         finally
             s.buffer = sbuf
         end
-        compact(newbuf)
+        _take!(a, _unsafe_take!(newbuf))
+        length(a) >= initsize || resize!(a, initsize)
     end
     iolock_end()
     return nread
@@ -958,7 +991,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
         unsafe_read(sbuf, p, nb)
     else
         newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb))
-        newbuf.size = 0 # reset the write pointer to the beginning
+        newbuf.size = newbuf.offset # reset the write pointer to the beginning
         try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -995,7 +1028,7 @@ function readavailable(this::LibuvStream)
     return bytes
 end
 
-function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
+function copyuntil(out::IO, x::LibuvStream, c::UInt8; keep::Bool=false)
     iolock_begin()
     buf = x.buffer
     @assert buf.seekable == false
@@ -1025,9 +1058,9 @@ function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
             end
         end
     end
-    bytes = readuntil(buf, c, keep=keep)
+    copyuntil(out, buf, c; keep)
     iolock_end()
-    return bytes
+    return out
 end
 
 uv_write(s::LibuvStream, p::Vector{UInt8}) = GC.@preserve p uv_write(s, pointer(p), UInt(sizeof(p)))
@@ -1040,17 +1073,19 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
     sigatomic_begin()
     uv_req_set_data(uvw, ct)
     iolock_end()
-    status = try
+    local status
+    try
         sigatomic_end()
         # wait for the last chunk to complete (or error)
         # assume that any errors would be sticky,
         # (so we don't need to monitor the error status of the intermediate writes)
-        wait()::Cint
+        status = wait()::Cint
+        sigatomic_begin()
     finally
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -1273,7 +1308,7 @@ the pipe.
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1286,7 +1321,7 @@ Like [`redirect_stdout`](@ref), but for [`stderr`](@ref).
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1300,7 +1335,7 @@ Note that the direction of the stream is reversed.
 
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
-    `Pipe`, socket, or `devnull`.
+    [`Pipe`](@ref), socket, or `devnull`.
 
 See also [`redirect_stdio`](@ref).
 """
@@ -1310,7 +1345,8 @@ redirect_stdin
     redirect_stdio(;stdin=stdin, stderr=stderr, stdout=stdout)
 
 Redirect a subset of the streams `stdin`, `stderr`, `stdout`.
-Each argument must be an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+Each argument must be an `IOStream`, `TTY`, [`Pipe`](@ref), socket, or
+`devnull`.
 
 !!! compat "Julia 1.7"
     `redirect_stdio` requires Julia 1.7 or later.
@@ -1330,7 +1366,7 @@ call `f()` and restore each stream.
 Possible values for each stream are:
 * `nothing` indicating the stream should not be redirected.
 * `path::AbstractString` redirecting the stream to the file at `path`.
-* `io` an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+* `io` an `IOStream`, `TTY`, [`Pipe`](@ref), socket, or `devnull`.
 
 # Examples
 ```julia-repl
@@ -1489,7 +1525,7 @@ closewrite(s::BufferStream) = close(s)
 function close(s::BufferStream)
     lock(s.cond) do
         s.status = StatusClosed
-        notify(s.cond)
+        notify(s.cond) # aka flush
         nothing
     end
 end
@@ -1549,6 +1585,7 @@ stop_reading(s::BufferStream) = nothing
 write(s::BufferStream, b::UInt8) = write(s, Ref{UInt8}(b))
 function unsafe_write(s::BufferStream, p::Ptr{UInt8}, nb::UInt)
     nwrite = lock(s.cond) do
+        check_open(s)
         rv = unsafe_write(s.buffer, p, nb)
         s.buffer_writes || notify(s.cond)
         rv
@@ -1569,9 +1606,18 @@ end
 buffer_writes(s::BufferStream, bufsize=0) = (s.buffer_writes = true; s)
 function flush(s::BufferStream)
     lock(s.cond) do
+        check_open(s)
         notify(s.cond)
         nothing
     end
 end
 
 skip(s::BufferStream, n) = skip(s.buffer, n)
+
+function reseteof(s::BufferStream)
+    lock(s.cond) do
+        s.status = StatusOpen
+        nothing
+    end
+    nothing
+end
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
new file mode 100644
index 0000000000000..814ee2afa9d55
--- /dev/null
+++ b/base/strings/annotated.jl
@@ -0,0 +1,660 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+const Annotation = NamedTuple{(:label, :value), Tuple{Symbol, Any}}
+const RegionAnnotation = NamedTuple{(:region, :label, :value), Tuple{UnitRange{Int}, Symbol, Any}}
+
+"""
+    AnnotatedString{S <: AbstractString} <: AbstractString
+
+A string with metadata, in the form of annotated regions.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractString`](@ref) that allows for regions of the wrapped string to be
+annotated with labeled values.
+
+```text
+                           C
+                    ┌──────┸─────────┐
+  "this is an example annotated string"
+  └──┰────────┼─────┘         │
+     A        └─────┰─────────┘
+                    B
+```
+
+The above diagram represents a `AnnotatedString` where three ranges have been
+annotated (labeled `A`, `B`, and `C`). Each annotation holds a label (`Symbol`)
+and a value (`Any`). These three pieces of information are held as a
+`$RegionAnnotation`.
+
+Labels do not need to be unique, the same region can hold multiple annotations
+with the same label.
+
+Code written for `AnnotatedString`s in general should conserve the following
+properties:
+- Which characters an annotation is applied to
+- The order in which annotations are applied to each character
+
+Additional semantics may be introduced by specific uses of `AnnotatedString`s.
+
+A corollary of these rules is that adjacent, consecutively placed, annotations
+with identical labels and values are equivalent to a single annotation spanning
+the combined range.
+
+See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref),
+[`annotations`](@ref), and [`annotate!`](@ref).
+
+# Constructors
+
+```julia
+AnnotatedString(s::S<:AbstractString) -> AnnotatedString{S}
+AnnotatedString(s::S<:AbstractString, annotations::Vector{$RegionAnnotation})
+```
+
+A AnnotatedString can also be created with [`annotatedstring`](@ref), which acts much
+like [`string`](@ref) but preserves any annotations present in the arguments.
+
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString)
+julia> AnnotatedString("this is an example annotated string",
+                    [(1:18, :A, 1), (12:28, :B, 2), (18:35, :C, 3)])
+"this is an example annotated string"
+```
+"""
+struct AnnotatedString{S <: AbstractString} <: AbstractString
+    string::S
+    annotations::Vector{RegionAnnotation}
+end
+
+"""
+    AnnotatedChar{S <: AbstractChar} <: AbstractChar
+
+A Char with annotations.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractChar`](@ref), which holds a list of arbitrary labelled annotations
+(`$Annotation`) with the wrapped character.
+
+See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`,
+and `annotate!`.
+
+# Constructors
+
+```julia
+AnnotatedChar(s::S) -> AnnotatedChar{S}
+AnnotatedChar(s::S, annotations::Vector{$Annotation})
+```
+
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedChar)
+julia> AnnotatedChar('j', [(:label, 1)])
+'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase)
+```
+"""
+struct AnnotatedChar{C <: AbstractChar} <: AbstractChar
+    char::C
+    annotations::Vector{Annotation}
+end
+
+## Constructors ##
+
+# When called with overly-specialised arguments
+
+AnnotatedString(s::AbstractString, annots::Vector) =
+    AnnotatedString(s, Vector{RegionAnnotation}(annots))
+
+AnnotatedString(s::AbstractString, annots) =
+    AnnotatedString(s, collect(RegionAnnotation, annots))
+
+AnnotatedChar(c::AbstractChar, annots::Vector) =
+    AnnotatedChar(c, Vector{Annotation}(annots))
+
+AnnotatedChar(c::AbstractChar, annots) =
+    AnnotatedChar(c, collect(Annotation, annots))
+
+# Constructors to avoid recursive wrapping
+
+AnnotatedString(s::AnnotatedString, annots::Vector{RegionAnnotation}) =
+    AnnotatedString(s.string, vcat(s.annotations, annots))
+
+AnnotatedChar(c::AnnotatedChar, annots::Vector{Annotation}) =
+    AnnotatedChar(c.char, vcat(c.annotations, Vector{Annotation}(annots)))
+
+# To avoid pointless overhead
+String(s::AnnotatedString{String}) = s.string
+
+## Conversion/promotion ##
+
+convert(::Type{AnnotatedString}, s::AnnotatedString) = s
+convert(::Type{AnnotatedString{S}}, s::S) where {S <: AbstractString} =
+    AnnotatedString(s, Vector{RegionAnnotation}())
+convert(::Type{AnnotatedString}, s::S) where {S <: AbstractString} =
+    convert(AnnotatedString{S}, s)
+AnnotatedString(s::S) where {S <: AbstractString} = convert(AnnotatedString{S}, s)
+
+convert(::Type{AnnotatedChar}, c::AnnotatedChar) = c
+convert(::Type{AnnotatedChar{C}}, c::C) where { C <: AbstractChar } =
+    AnnotatedChar{C}(c, Vector{Annotation}())
+convert(::Type{AnnotatedChar}, c::C) where { C <: AbstractChar } =
+    convert(AnnotatedChar{C}, c)
+
+AnnotatedChar(c::AbstractChar) = convert(AnnotatedChar, c)
+AnnotatedChar(c::UInt32) = convert(AnnotatedChar, Char(c))
+AnnotatedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(AnnotatedChar, C(c))
+
+promote_rule(::Type{<:AnnotatedString}, ::Type{<:AbstractString}) = AnnotatedString
+
+## AbstractString interface ##
+
+ncodeunits(s::AnnotatedString) = ncodeunits(s.string)
+codeunits(s::AnnotatedString) = codeunits(s.string)
+codeunit(s::AnnotatedString) = codeunit(s.string)
+codeunit(s::AnnotatedString, i::Integer) = codeunit(s.string, i)
+isvalid(s::AnnotatedString, i::Integer) = isvalid(s.string, i)
+@propagate_inbounds iterate(s::AnnotatedString, i::Integer=firstindex(s)) =
+    if i <= lastindex(s.string); (s[i], nextind(s, i)) end
+eltype(::Type{<:AnnotatedString{S}}) where {S} = AnnotatedChar{eltype(S)}
+firstindex(s::AnnotatedString) = firstindex(s.string)
+lastindex(s::AnnotatedString) = lastindex(s.string)
+
+function getindex(s::AnnotatedString, i::Integer)
+    @boundscheck checkbounds(s, i)
+    @inbounds if isvalid(s, i)
+        AnnotatedChar(s.string[i], Annotation[(; label, value) for (; label, value) in annotations(s, i)])
+    else
+        string_index_err(s, i)
+    end
+end
+
+# To make `AnnotatedString`s repr-evaluable, we need to override
+# the generic `AbstractString` 2-arg show method.
+
+function show(io::IO, s::A) where {A <: AnnotatedString}
+    show(io, A)
+    print(io, '(')
+    show(io, s.string)
+    print(io, ", ")
+    tupanns = Vector{Tuple{UnitRange{Int}, Symbol, Any}}(map(values, s.annotations))
+    show(IOContext(io, :typeinfo => typeof(tupanns)), tupanns)
+    print(io, ')')
+end
+
+# But still use the generic `AbstractString` fallback for the 3-arg show.
+show(io::IO, ::MIME"text/plain", s::AnnotatedString) =
+    invoke(show, Tuple{IO, AbstractString}, io, s)
+
+## AbstractChar interface ##
+
+ncodeunits(c::AnnotatedChar) = ncodeunits(c.char)
+codepoint(c::AnnotatedChar) = codepoint(c.char)
+
+# Avoid the iteration fallback with comparison
+cmp(a::AnnotatedString, b::AbstractString) = cmp(a.string, b)
+cmp(a::AbstractString, b::AnnotatedString) = cmp(a, b.string)
+# To avoid method ambiguity
+cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string)
+
+==(a::AnnotatedString, b::AnnotatedString) =
+    a.string == b.string && a.annotations == b.annotations
+
+==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b
+==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string
+
+# To prevent substring equality from hitting the generic fallback
+
+function ==(a::SubString{<:AnnotatedString}, b::SubString{<:AnnotatedString})
+    SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) ==
+        SubString(b.string.string, b.offset, b.ncodeunits, Val(:noshift)) &&
+        annotations(a) == annotations(b)
+end
+
+==(a::SubString{<:AnnotatedString}, b::AnnotatedString) =
+    annotations(a) == annotations(b) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b.string
+
+==(a::SubString{<:AnnotatedString}, b::AbstractString) =
+    isempty(annotations(a)) && SubString(a.string.string, a.offset, a.ncodeunits, Val(:noshift)) == b
+
+==(a::AbstractString, b::SubString{<:AnnotatedString}) = b == a
+
+==(a::AnnotatedString, b::SubString{<:AnnotatedString}) = b == a
+
+"""
+    annotatedstring(values...)
+
+Create a `AnnotatedString` from any number of `values` using their
+[`print`](@ref)ed representation.
+
+This acts like [`string`](@ref), but takes care to preserve any annotations
+present (in the form of [`AnnotatedString`](@ref) or [`AnnotatedChar`](@ref) values).
+
+See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref).
+
+## Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString, annotatedstring)
+julia> annotatedstring("now a AnnotatedString")
+"now a AnnotatedString"
+
+julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label, 1)]), ", and unannotated")
+"annotated, and unannotated"
+```
+"""
+function annotatedstring(xs...)
+    isempty(xs) && return AnnotatedString("")
+    size = mapreduce(_str_sizehint, +, xs)
+    buf = IOBuffer(sizehint=size)
+    s = IOContext(buf, :color => true)
+    annotations = Vector{RegionAnnotation}()
+    for x in xs
+        size = filesize(s.io)
+        if x isa AnnotatedString
+            for annot in x.annotations
+                push!(annotations, setindex(annot, annot.region .+ size, :region))
+            end
+            print(s, x.string)
+        elseif x isa SubString{<:AnnotatedString}
+            for annot in x.string.annotations
+                start, stop = first(annot.region), last(annot.region)
+                if start <= x.offset + x.ncodeunits && stop > x.offset
+                    rstart = size + max(0, start - x.offset - 1) + 1
+                    rstop = size + min(stop, x.offset + x.ncodeunits) - x.offset
+                    push!(annotations, setindex(annot, rstart:rstop, :region))
+                end
+            end
+            print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift)))
+        elseif x isa AnnotatedChar
+            for annot in x.annotations
+                push!(annotations, (region=1+size:1+size, annot...))
+            end
+            print(s, x.char)
+        else
+            print(s, x)
+        end
+    end
+    str = String(take!(buf))
+    AnnotatedString(str, annotations)
+end
+
+annotatedstring(s::AnnotatedString) = s
+annotatedstring(c::AnnotatedChar) =
+    AnnotatedString(string(c.char), [(region=1:ncodeunits(c), annot...) for annot in c.annotations])
+
+AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s)
+
+function repeat(str::AnnotatedString, r::Integer)
+    r == 0 && return one(AnnotatedString)
+    r == 1 && return str
+    unannot = repeat(str.string, r)
+    annotations = Vector{RegionAnnotation}()
+    len = ncodeunits(str)
+    fullregion = firstindex(str):lastindex(str)
+    if isempty(str.annotations)
+    elseif allequal(a -> a.region, str.annotations) && first(str.annotations).region == fullregion
+        newfullregion = firstindex(unannot):lastindex(unannot)
+        for annot in str.annotations
+            push!(annotations, setindex(annot, newfullregion, :region))
+        end
+    else
+        for offset in 0:len:(r-1)*len
+            for annot in str.annotations
+                push!(annotations, setindex(annot, annot.region .+ offset, :region))
+            end
+        end
+    end
+    AnnotatedString(unannot, annotations)
+end
+
+repeat(str::SubString{<:AnnotatedString}, r::Integer) =
+    repeat(AnnotatedString(str), r)
+
+function repeat(c::AnnotatedChar, r::Integer)
+    str = repeat(c.char, r)
+    fullregion = firstindex(str):lastindex(str)
+    AnnotatedString(str, [(region=fullregion, annot...) for annot in c.annotations])
+end
+
+function reverse(s::AnnotatedString)
+    lastind = lastindex(s)
+    AnnotatedString(
+        reverse(s.string),
+        [setindex(annot,
+                  UnitRange(1 + lastind - last(annot.region),
+                            1 + lastind - first(annot.region)),
+                  :region)
+         for annot in s.annotations])
+end
+
+# TODO optimise?
+reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s))
+
+# TODO implement `replace(::AnnotatedString, ...)`
+
+## End AbstractString interface ##
+
+function _annotate!(annlist::Vector{RegionAnnotation}, region::UnitRange{Int}, label::Symbol, @nospecialize(value::Any))
+    if value === nothing
+        deleteat!(annlist, findall(ann -> ann.region == region && ann.label === label, annlist))
+    else
+        push!(annlist, RegionAnnotation((; region, label, value)))
+    end
+end
+
+"""
+    annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol, value)
+    annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol, value)
+
+Annotate a `range` of `str` (or the entire string) with a labeled value `(label, value)`.
+To remove existing `label` annotations, use a value of `nothing`.
+
+The order in which annotations are applied to `str` is semantically meaningful,
+as described in [`AnnotatedString`](@ref).
+"""
+annotate!(s::AnnotatedString, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (_annotate!(s.annotations, range, label, val); s)
+
+annotate!(ss::AnnotatedString, label::Symbol, @nospecialize(val::Any)) =
+    annotate!(ss, firstindex(ss):lastindex(ss), label, val)
+
+annotate!(s::SubString{<:AnnotatedString}, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (annotate!(s.string, s.offset .+ (range), label, val); s)
+
+annotate!(s::SubString{<:AnnotatedString}, label::Symbol, @nospecialize(val::Any)) =
+    (annotate!(s.string, s.offset .+ (1:s.ncodeunits), label, val); s)
+
+"""
+    annotate!(char::AnnotatedChar, label::Symbol, value::Any)
+
+Annotate `char` with the labeled value `(label, value)`.
+"""
+annotate!(c::AnnotatedChar, label::Symbol, @nospecialize(val::Any)) =
+    (push!(c.annotations, Annotation((; label, val))); c)
+
+"""
+    annotations(str::Union{AnnotatedString, SubString{AnnotatedString}},
+                [position::Union{Integer, UnitRange}]) ->
+        Vector{$RegionAnnotation}
+
+Get all annotations that apply to `str`. Should `position` be provided, only
+annotations that overlap with `position` will be returned.
+
+Annotations are provided together with the regions they apply to, in the form of
+a vector of region–annotation tuples.
+
+In accordance with the semantics documented in [`AnnotatedString`](@ref), the
+order of annotations returned matches the order in which they were applied.
+
+See also: [`annotate!`](@ref).
+"""
+annotations(s::AnnotatedString) = s.annotations
+
+function annotations(s::SubString{<:AnnotatedString})
+    RegionAnnotation[
+        setindex(ann, first(ann.region)-s.offset:last(ann.region)-s.offset, :region)
+        for ann in annotations(s.string, s.offset+1:s.offset+s.ncodeunits)]
+end
+
+function annotations(s::AnnotatedString, pos::UnitRange{<:Integer})
+    # TODO optimise
+    RegionAnnotation[
+        setindex(ann, max(first(pos), first(ann.region)):min(last(pos), last(ann.region)), :region)
+        for ann in s.annotations if !isempty(intersect(pos, ann.region))]
+end
+
+annotations(s::AnnotatedString, pos::Integer) = annotations(s, pos:pos)
+
+annotations(s::SubString{<:AnnotatedString}, pos::Integer) =
+    annotations(s.string, s.offset + pos)
+
+annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
+    annotations(s.string, first(pos)+s.offset:last(pos)+s.offset)
+
+"""
+    annotations(chr::AnnotatedChar) -> Vector{$Annotation}
+
+Get all annotations of `chr`, in the form of a vector of annotation pairs.
+"""
+annotations(c::AnnotatedChar) = c.annotations
+
+## Character transformation helper function, c.f. `unicode.jl`.
+
+"""
+    annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+
+Transform every character in `str` with `f`, adjusting annotation regions as
+appropriate. `f` must take one of two forms, either:
+- `f(c::Char) -> Char`, or
+- `f(c::Char, state) -> (Char, state)`.
+
+This works by comparing the number of code units of each character before and
+after transforming with `f`, recording and aggregating any differences, then
+applying them to the annotation regions.
+
+Returns an `AnnotatedString{String}` (regardless of the original underling
+string type of `str`).
+"""
+function annotated_chartransform(f::Function, str::AnnotatedString, state=nothing)
+    outstr = IOBuffer()
+    annots = RegionAnnotation[]
+    bytepos = firstindex(str) - 1
+    offsets = [bytepos => 0]
+    for c in str.string
+        oldnb = ncodeunits(c)
+        bytepos += oldnb
+        if isnothing(state)
+            c = f(c)
+        else
+            c, state = f(c, state)
+        end
+        nb = write(outstr, c)
+        if nb != oldnb
+            push!(offsets, bytepos => last(last(offsets)) + nb - oldnb)
+        end
+    end
+    for annot in str.annotations
+        start, stop = first(annot.region), last(annot.region)
+        start_offset = last(offsets[findlast(<=(start) ∘ first, offsets)::Int])
+        stop_offset  = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int])
+        push!(annots, setindex(annot, (start + start_offset):(stop + stop_offset), :region))
+    end
+    AnnotatedString(String(take!(outstr)), annots)
+end
+
+## AnnotatedIOBuffer
+
+struct AnnotatedIOBuffer <: AbstractPipe
+    io::IOBuffer
+    annotations::Vector{RegionAnnotation}
+end
+
+AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}())
+AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
+
+function show(io::IO, aio::AnnotatedIOBuffer)
+    show(io, AnnotatedIOBuffer)
+    size = filesize(aio.io)
+    print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ",
+          length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
+end
+
+pipe_reader(io::AnnotatedIOBuffer) = io.io
+pipe_writer(io::AnnotatedIOBuffer) = io.io
+
+# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
+position(io::AnnotatedIOBuffer) = position(io.io)
+seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
+seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io)
+skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
+copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
+
+annotations(io::AnnotatedIOBuffer) = io.annotations
+
+annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (_annotate!(io.annotations, range, label, val); io)
+
+function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    astr = AnnotatedString(astr)
+    offset = position(io.io)
+    eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
+    _insert_annotations!(io, astr.annotations)
+    write(io.io, String(astr))
+end
+
+write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
+    write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations]))
+write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
+write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
+write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
+
+function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
+    destpos = position(dest)
+    isappending = eof(dest)
+    srcpos = position(src)
+    nb = write(dest.io, src.io)
+    isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
+    srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region)
+                 for annot in src.annotations if first(annot.region) >= srcpos]
+    _insert_annotations!(dest, srcannots, destpos - srcpos)
+    nb
+end
+
+# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
+# work as expected.
+function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), s)
+    else
+        invoke(write, Tuple{IO, typeof(s)}, io, s)
+    end::Int
+end
+# Can't be part of the `Union` above because it introduces method ambiguities
+function write(io::AbstractPipe, c::AnnotatedChar)
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), c)
+    else
+        invoke(write, Tuple{IO, typeof(c)}, io, c)
+    end::Int
+end
+
+"""
+    _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int})
+
+Erase the presence of `annotations` within a certain `span`.
+
+This operates by removing all elements of `annotations` that are entirely
+contained in `span`, truncating ranges that partially overlap, and splitting
+annotations that subsume `span` to just exist either side of `span`.
+"""
+function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int})
+    # Clear out any overlapping pre-existing annotations.
+    filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations)
+    extras = Tuple{Int, RegionAnnotation}[]
+    for i in eachindex(annotations)
+        annot = annotations[i]
+        region = annot.region
+        # Test for partial overlap
+        if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
+            annotations[i] =
+                setindex(annot,
+                         if first(region) < first(span)
+                             first(region):first(span)-1
+                         else
+                             last(span)+1:last(region)
+                         end,
+                         :region)
+            # If `span` fits exactly within `region`, then we've only copied over
+            # the beginning overhang, but also need to conserve the end overhang.
+            if first(region) < first(span) && last(span) < last(region)
+                push!(extras, (i, setindex(annot, last(span)+1:last(region), :region)))
+            end
+        end
+    end
+    # Insert any extra entries in the appropriate position
+    for (offset, (i, entry)) in enumerate(extras)
+        insert!(annotations, i + offset, entry)
+    end
+    annotations
+end
+
+"""
+    _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io))
+
+Register new `annotations` in `io`, applying an `offset` to their regions.
+
+The largely consists of simply shifting the regions of `annotations` by `offset`
+and pushing them onto `io`'s annotations. However, when it is possible to merge
+the new annotations with recent annotations in accordance with the semantics
+outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
+is a run of the most recent annotations that are also present as the first
+`annotations`, with the same value and adjacent regions, the new annotations are
+merged into the existing recent annotations by simply extending their range.
+
+This is implemented so that one can say write an `AnnotatedString` to an
+`AnnotatedIOBuffer` one character at a time without needlessly producing a
+new annotation for each character.
+"""
+function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io))
+    run = 0
+    if !isempty(io.annotations) && last(last(io.annotations).region) == offset
+        for i in reverse(axes(annotations, 1))
+            annot = annotations[i]
+            first(annot.region) == 1 || continue
+            i <= length(io.annotations) || continue
+            if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value
+                valid_run = true
+                for runlen in 1:i
+                    new = annotations[begin+runlen-1]
+                    old = io.annotations[end-i+runlen]
+                    if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
+                        valid_run = false
+                        break
+                    end
+                end
+                if valid_run
+                    run = i
+                    break
+                end
+            end
+        end
+    end
+    for runindex in 0:run-1
+        old_index = lastindex(io.annotations) - run + 1 + runindex
+        old = io.annotations[old_index]
+        new = annotations[begin+runindex]
+        io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region)
+    end
+    for index in run+1:lastindex(annotations)
+        annot = annotations[index]
+        start, stop = first(annot.region), last(annot.region)
+        push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region))
+    end
+end
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
+    if (start = position(io)) == 0
+        AnnotatedString(read(io.io, T), copy(io.annotations))
+    else
+        annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region)
+                  for annot in io.annotations if last(annot.region) > start]
+        AnnotatedString(read(io.io, T), annots)
+    end
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
+    pos = position(io)
+    char = read(io.io, T)
+    annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region]
+    AnnotatedChar(char, annots)
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
+
+function truncate(io::AnnotatedIOBuffer, size::Integer)
+    truncate(io.io, size)
+    filter!(ann -> first(ann.region) <= size, io.annotations)
+    map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region),
+         io.annotations, io.annotations)
+    io
+end
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 2609edeaaaa18..bf11199143c1e 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -16,9 +16,7 @@ about strings:
   * Each `AbstractChar` in a string is encoded by one or more code units
   * Only the index of the first code unit of an `AbstractChar` is a valid index
   * The encoding of an `AbstractChar` is independent of what precedes or follows it
-  * String encodings are [self-synchronizing] – i.e. `isvalid(s, i)` is O(1)
-
-[self-synchronizing]: https://en.wikipedia.org/wiki/Self-synchronizing_code
+  * String encodings are [self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code) – i.e. `isvalid(s, i)` is O(1)
 
 Some string functions that extract code units, characters or substrings from
 strings error if you pass them out-of-bounds or invalid string indices. This
@@ -31,7 +29,7 @@ types may choose different "imaginary" character sizes as makes sense for their
 implementations (e.g. substrings may pass index arithmetic through to the
 underlying string they provide a view into). Relaxed indexing functions include
 those intended for index arithmetic: `thisind`, `nextind` and `prevind`. This
-model allows index arithmetic to work with out-of- bounds indices as
+model allows index arithmetic to work with out-of-bounds indices as
 intermediate values so long as one never uses them to retrieve a character,
 which often helps avoid needing to code around edge cases.
 
@@ -148,9 +146,8 @@ Stacktrace:
 
 Return a tuple of the character in `s` at index `i` with the index of the start
 of the following character in `s`. This is the key method that allows strings to
-be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
-then a bounds error is raised. The `iterate` function, as part of the iteration
-protocol may assume that `i` is the start of a character in `s`.
+be iterated, yielding a sequences of characters. The `iterate` function, as part
+of the iteration protocol may assume that `i` is the start of a character in `s`.
 
 See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
@@ -181,6 +178,8 @@ firstindex(s::AbstractString) = 1
 lastindex(s::AbstractString) = thisind(s, ncodeunits(s)::Int)
 isempty(s::AbstractString) = iszero(ncodeunits(s)::Int)
 
+@propagate_inbounds first(s::AbstractString) = s[firstindex(s)]
+
 function getindex(s::AbstractString, i::Integer)
     @boundscheck checkbounds(s, i)
     @inbounds return isvalid(s, i) ? (iterate(s, i)::NTuple{2,Any})[1] : string_index_err(s, i)
@@ -243,9 +242,10 @@ end
 """
     *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...) -> AbstractString
 
-Concatenate strings and/or characters, producing a [`String`](@ref). This is equivalent
-to calling the [`string`](@ref) function on the arguments. Concatenation of built-in
-string types always produces a value of type `String` but other string types may choose
+Concatenate strings and/or characters, producing a [`String`](@ref) or
+[`AnnotatedString`](@ref) (as appropriate). This is equivalent to calling the
+[`string`](@ref) or [`annotatedstring`](@ref) function on the arguments. Concatenation of built-in string
+types always produces a value of type `String` but other string types may choose
 to return a string of a different type as appropriate.
 
 # Examples
@@ -257,10 +257,22 @@ julia> 'j' * "ulia"
 "julia"
 ```
 """
-(*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) = string(s1, ss...)
+function (*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...)
+    if _isannotated(s1) || any(_isannotated, ss)
+        annotatedstring(s1, ss...)
+    else
+        string(s1, ss...)
+    end
+end
 
 one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "")
 
+# This could be written as a single statement with three ||-clauses, however then effect
+# analysis thinks it may throw and runtime checks are added.
+# Also see `substring.jl` for the `::SubString{T}` method.
+_isannotated(S::Type) = S != Union{} && (S <: AnnotatedString || S <: AnnotatedChar)
+_isannotated(s) = _isannotated(typeof(s))
+
 ## generic string comparison ##
 
 """
@@ -311,7 +323,8 @@ end
     ==(a::AbstractString, b::AbstractString) -> Bool
 
 Test whether two strings are equal character by character (technically, Unicode
-code point by code point).
+code point by code point). Should either string be a [`AnnotatedString`](@ref) the
+string properties must match too.
 
 # Examples
 ```jldoctest
@@ -792,8 +805,8 @@ IndexStyle(::Type{<:CodeUnits}) = IndexLinear()
 
 write(io::IO, s::CodeUnits) = write(io, s.s)
 
-unsafe_convert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = unsafe_convert(Ptr{T}, s.s)
-unsafe_convert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = unsafe_convert(Ptr{Int8}, s.s)
+cconvert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = cconvert(Ptr{T}, s.s)
+cconvert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = cconvert(Ptr{Int8}, s.s)
 
 """
     codeunits(s::AbstractString)
diff --git a/base/strings/cstring.jl b/base/strings/cstring.jl
new file mode 100644
index 0000000000000..3a377ab0e7b1e
--- /dev/null
+++ b/base/strings/cstring.jl
@@ -0,0 +1,314 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+import Core.Intrinsics: bitcast
+
+"""
+    Cwstring
+
+A C-style string composed of the native wide character type
+[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
+C-style strings composed of the native character
+type, see [`Cstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+
+"""
+Cwstring
+
+"""
+    Cstring
+
+A C-style string composed of the native character type
+[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
+C-style strings composed of the native wide character
+type, see [`Cwstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+"""
+Cstring
+
+# construction from pointers
+Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
+Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
+Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
+Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
+
+convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
+convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
+convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
+convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
+
+"""
+    pointer(array [, index])
+
+Get the native address of an array or string, optionally at a given location `index`.
+
+This function is "unsafe". Be careful to ensure that a Julia reference to
+`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
+macro should be used to protect the `array` argument from garbage collection
+within a given block of code.
+
+Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
+"""
+function pointer end
+
+pointer(p::Cstring) = convert(Ptr{Cchar}, p)
+pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
+
+# comparisons against pointers (mainly to support `cstr==C_NULL`)
+==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
+==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
+
+unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
+
+# convert strings to String etc. to pass as pointers
+cconvert(::Type{Cstring}, s::String) = s
+cconvert(::Type{Cstring}, s::AbstractString) =
+    cconvert(Cstring, String(s)::String)
+
+function cconvert(::Type{Cwstring}, s::AbstractString)
+    v = transcode(Cwchar_t, String(s))
+    push!(v, 0)
+    return cconvert(Cwstring, v)
+end
+
+eltype(::Type{Cstring}) = Cchar
+eltype(::Type{Cwstring}) = Cwchar_t
+
+containsnul(p::Ptr, len) =
+    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
+containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
+containsnul(s::AbstractString) = '\0' in s
+
+function unsafe_convert(::Type{Cstring}, s::String)
+    p = unsafe_convert(Ptr{Cchar}, s)
+    containsnul(p, sizeof(s)) &&
+        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return Cstring(p)
+end
+
+unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
+
+function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
+    for i = 1:length(v)-1
+        v[i] == 0 &&
+            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
+    end
+    v[end] == 0 ||
+        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
+    return cconvert(Ptr{Cwchar_t}, v)
+end
+unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
+unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
+
+# symbols are guaranteed not to contain embedded NUL
+cconvert(::Type{Cstring}, s::Symbol) = s
+unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
+
+if ccall(:jl_get_UNAME, Any, ()) === :NT
+"""
+    Base.cwstring(s)
+
+Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
+functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
+conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
+same argument.
+
+This is only available on Windows.
+"""
+function cwstring(s::AbstractString)
+    bytes = codeunits(String(s))
+    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return push!(transcode(UInt16, bytes), 0)
+end
+end
+
+# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
+# and also UTF-32 for APIs using Cwchar_t on other platforms.
+
+"""
+    transcode(T, src)
+
+Convert string data between Unicode encodings. `src` is either a
+`String` or a `Vector{UIntXX}` of UTF-XX code units, where
+`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
+`String` to return a (UTF-8 encoded) `String` or `UIntXX`
+to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
+can also be used as the integer type, for converting `wchar_t*` strings
+used by external C libraries.)
+
+The `transcode` function succeeds as long as the input data can be
+reasonably represented in the target encoding; it always succeeds for
+conversions between UTF-XX encodings, even for invalid Unicode data.
+
+Only conversion to/from UTF-8 is currently supported.
+
+# Examples
+```jldoctest
+julia> str = "αβγ"
+"αβγ"
+
+julia> transcode(UInt16, str)
+3-element Vector{UInt16}:
+ 0x03b1
+ 0x03b2
+ 0x03b3
+
+julia> transcode(String, transcode(UInt16, str))
+"αβγ"
+```
+"""
+function transcode end
+
+transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
+transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
+transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(Vector(src)))
+transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(src))
+
+function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
+    buf = IOBuffer()
+    for c in src
+        print(buf, Char(c))
+    end
+    take!(buf)
+end
+transcode(::Type{String}, src::String) = src
+transcode(T, src::String) = transcode(T, codeunits(src))
+transcode(::Type{String}, src) = String(transcode(UInt8, src))
+
+function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
+    require_one_based_indexing(src)
+    dst = UInt16[]
+    i, n = 1, length(src)
+    n > 0 || return dst
+    sizehint!(dst, 2n)
+    a = src[1]
+    while true
+        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
+            b = src[i += 1]
+            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
+                # invalid UTF-8 (non-continuation or too-high code point)
+                push!(dst, a)
+                a = b; continue
+            elseif a < 0xe0 # 2-byte UTF-8
+                push!(dst, xor(0x3080, UInt16(a) << 6, b))
+            elseif i < n # 3/4-byte character
+                c = src[i += 1]
+                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
+                    push!(dst, a, b)
+                    a = c; continue
+                elseif a < 0xf0 # 3-byte UTF-8
+                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
+                elseif i < n
+                    d = src[i += 1]
+                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
+                        push!(dst, a, b, c)
+                        a = d; continue
+                    elseif a == 0xf0 && b < 0x90 # overlong encoding
+                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
+                    else # 4-byte UTF-8
+                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
+                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
+                    end
+                else # too short
+                    push!(dst, a, b, c)
+                    break
+                end
+            else # too short
+                push!(dst, a, b)
+                break
+            end
+        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
+            push!(dst, a)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
+    require_one_based_indexing(src)
+    n = length(src)
+    n == 0 && return UInt8[]
+
+    # Precompute m = sizeof(dst).   This involves annoying duplication
+    # of the loop over the src array.   However, this is not just an
+    # optimization: it is problematic for security reasons to grow
+    # dst dynamically, because Base.winprompt uses this function to
+    # convert passwords to UTF-8 and we don't want to make unintentional
+    # copies of the password data.
+    a = src[1]
+    i, m = 1, 0
+    while true
+        if a < 0x80
+            m += 1
+        elseif a < 0x800 # 2-byte UTF-8
+            m += 2
+        elseif a & 0xfc00 == 0xd800 && i < length(src)
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
+                m += 4
+            else
+                m += 3
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            m += 3
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+
+    dst = StringVector(m)
+    a = src[1]
+    i, j = 1, 0
+    while true
+        if a < 0x80 # ASCII
+            dst[j += 1] = a % UInt8
+        elseif a < 0x800 # 2-byte UTF-8
+            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        elseif a & 0xfc00 == 0xd800 && i < n
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00
+                # 2-unit UTF-16 sequence => 4-byte UTF-8
+                a += 0x2840
+                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
+                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
+                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
+                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
+            else
+                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
+    transcode(String, unsafe_wrap(Array, p, length; own=false))
+end
+function unsafe_string(cw::Cwstring)
+    p = convert(Ptr{Cwchar_t}, cw)
+    n = 1
+    while unsafe_load(p, n) != 0
+        n += 1
+    end
+    return unsafe_string(p, n - 1)
+end
diff --git a/base/strings/io.jl b/base/strings/io.jl
index 987a64798d3da..b4a3c7ad3e0c2 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -10,10 +10,10 @@ if `io` is not given) a canonical (un-decorated) text representation.
 The representation used by `print` includes minimal formatting and tries to
 avoid Julia-specific details.
 
-`print` falls back to calling `show`, so most types should just define
-`show`. Define `print` if your type has a separate "plain" representation.
-For example, `show` displays strings with quotes, and `print` displays strings
-without quotes.
+`print` falls back to calling the 2-argument `show(io, x)` for each argument `x` in `xs`,
+so most types should just define `show`. Define `print` if your type has a separate
+"plain" representation.  For example, `show` displays strings with quotes, and `print`
+displays strings without quotes.
 
 See also [`println`](@ref), [`string`](@ref), [`printstyled`](@ref).
 
@@ -51,6 +51,8 @@ function print(io::IO, xs...)
     return nothing
 end
 
+setfield!(typeof(print).name.mt, :max_args, 10, :monotonic)
+
 """
     println([io::IO], xs...)
 
@@ -74,6 +76,7 @@ julia> String(take!(io))
 """
 println(io::IO, xs...) = print(io, xs..., "\n")
 
+setfield!(typeof(println).name.mt, :max_args, 10, :monotonic)
 ## conversion of general objects to strings ##
 
 """
@@ -149,6 +152,7 @@ function print_to_string(xs...)
     end
     String(_unsafe_take!(s))
 end
+setfield!(typeof(print_to_string).name.mt, :max_args, 10, :monotonic)
 
 function string_with_env(env, xs...)
     if isempty(xs)
@@ -210,35 +214,29 @@ function show(
         # one line in collection, seven otherwise
         get(io, :typeinfo, nothing) === nothing && (limit *= 7)
     end
+    limit = max(0, limit-2) # quote chars
 
     # early out for short strings
-    len = ncodeunits(str)
-    len ≤ limit - 2 && # quote chars
-        return show(io, str)
+    check_textwidth(str, limit) && return show(io, str)
 
     # these don't depend on string data
     units = codeunit(str) == UInt8 ? "bytes" : "code units"
     skip_text(skip) = " ⋯ $skip $units ⋯ "
-    short = length(skip_text("")) + 4 # quote chars
-    chars = max(limit, short + 1) - short # at least 1 digit
 
-    # figure out how many characters to print in elided case
-    chars -= d = ndigits(len - chars) # first adjustment
-    chars += d - ndigits(len - chars) # second if needed
-    chars = max(0, chars)
+    # longest possible replacement string for omitted chars
+    max_replacement = skip_text(ncodeunits(str) * 100) # *100 for 2 inner quote chars
 
-    # find head & tail, avoiding O(length(str)) computation
-    head = nextind(str, 0, 1 + (chars + 1) ÷ 2)
-    tail = prevind(str, len + 1, chars ÷ 2)
+    head, tail = string_truncate_boundaries(str, limit, max_replacement, Val(:center))
 
     # threshold: min chars skipped to make elision worthwhile
-    t = short + ndigits(len - chars) - 1
-    n = tail - head # skipped code units
-    if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1)
-        skip = skip_text(n)
-        show(io, SubString(str, 1:prevind(str, head)))
-        printstyled(io, skip; color=:light_yellow, bold=true)
-        show(io, SubString(str, tail))
+    afterhead = nextind(str, head)
+    n = tail - afterhead # skipped code units
+    replacement = skip_text(n)
+    t = ncodeunits(replacement) # length of replacement (textwidth == ncodeunits here)
+    @views if 4t ≤ n || t ≤ n && t ≤ textwidth(str[afterhead:prevind(str,tail)])
+        show(io, str[begin:head])
+        printstyled(io, replacement; color=:light_yellow, bold=true)
+        show(io, str[tail:end])
     else
         show(io, str)
     end
@@ -246,14 +244,16 @@ end
 
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
-    GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
+    GC.@preserve s (unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))) % Int)::Int
 print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 
 """
     repr(x; context=nothing)
 
-Create a string from any value using the [`show`](@ref) function.
-You should not add methods to `repr`; define a `show` method instead.
+Create a string representation of any value using the 2-argument `show(io, x)` function,
+which aims to produce a string that is parseable Julia code, where possible.
+i.e. `eval(Meta.parse(repr(x))) == x` should hold true.
+You should not add methods to `repr`; define a [`show`](@ref) method instead.
 
 The optional keyword argument `context` can be set to a `:key=>value` pair, a
 tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
@@ -262,7 +262,7 @@ attributes are used for the I/O stream passed to `show`.
 Note that `repr(x)` is usually similar to how the value of `x` would
 be entered in Julia.  See also [`repr(MIME("text/plain"), x)`](@ref) to instead
 return a "pretty-printed" version of `x` designed more for human consumption,
-equivalent to the REPL display of `x`.
+equivalent to the REPL display of `x`, using the 3-argument `show(io, mime, x)`.
 
 !!! compat "Julia 1.7"
     Passing a tuple to keyword `context` requires Julia 1.7 or later.
@@ -353,9 +353,29 @@ function join(io::IO, iterator, delim="")
     end
 end
 
-join(iterator) = sprint(join, iterator)
-join(iterator, delim) = sprint(join, iterator, delim)
-join(iterator, delim, last) = sprint(join, iterator, delim, last)
+function _join_preserve_annotations(iterator, args...)
+    et = @default_eltype(iterator)
+    if isconcretetype(et) && !_isannotated(et) && !any(_isannotated, args)
+        sprint(join, iterator, args...)
+    else
+        io = AnnotatedIOBuffer()
+        join(io, iterator, args...)
+        # If we know (from compile time information, or dynamically in the case
+        # of iterators with a non-concrete eltype), that the result is annotated
+        # in nature, we extract an `AnnotatedString`, otherwise we just extract
+        # a plain `String` from `io`.
+        if isconcretetype(et) || !isempty(io.annotations)
+            seekstart(io)
+            read(io, AnnotatedString{String})
+        else
+            String(take!(io.io))
+        end
+    end
+end
+
+join(iterator) = _join_preserve_annotations(iterator)
+join(iterator, delim) = _join_preserve_annotations(iterator, delim)
+join(iterator, delim, last) = _join_preserve_annotations(iterator, delim, last)
 
 ## string escaping & unescaping ##
 
@@ -364,8 +384,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
     (c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
 
 """
-    escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
-    escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
+    escape_string(str::AbstractString[, esc]; keep=(), ascii=false, fullhex=false)::AbstractString
+    escape_string(io, str::AbstractString[, esc]; keep=())::Nothing
 
 General escaping of traditional C and Unicode escape sequences. The first form returns the
 escaped string, the second prints the result to `io`.
@@ -380,11 +400,23 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
 The argument `keep` specifies a collection of characters which are to be kept as
 they are. Notice that `esc` has precedence here.
 
+The argument `ascii` can be set to `true` to escape all non-ASCII characters,
+whereas the default `ascii=false` outputs printable Unicode characters as-is.
+(`keep` takes precedence over `ascii`.)
+
+The argument `fullhex` can be set to `true` to require all `\\u` escapes to be
+printed with 4 hex digits, and `\\U` escapes to be printed with 8 hex digits,
+whereas by default (`fullhex=false`) they are printed with fewer digits if
+possible (omitting leading zeros).
+
 See also [`unescape_string`](@ref) for the reverse operation.
 
 !!! compat "Julia 1.7"
     The `keep` argument is available as of Julia 1.7.
 
+!!! compat "Julia 1.12"
+    The `ascii` and `fullhex` arguments require Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> escape_string("aaa\\nbbb")
@@ -403,7 +435,7 @@ julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
 """
-function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
+function escape_string(io::IO, s::AbstractString, esc=""; keep = (), ascii::Bool=false, fullhex::Bool=false)
     a = Iterators.Stateful(s)
     for c::AbstractChar in a
         if c in esc
@@ -418,10 +450,10 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
             isprint(c)         ? print(io, c) :
                                  print(io, "\\x", string(UInt32(c), base = 16, pad = 2))
         elseif !isoverlong(c) && !ismalformed(c)
-            isprint(c)         ? print(io, c) :
-            c <= '\x7f'        ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
-            c <= '\uffff'      ? print(io, "\\u", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
-                                 print(io, "\\U", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
+            !ascii && isprint(c) ? print(io, c) :
+            c <= '\x7f'          ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
+            c <= '\uffff'        ? print(io, "\\u", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
+                                   print(io, "\\U", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
         else # malformed or overlong
             u = bswap(reinterpret(UInt32, c)::UInt32)
             while true
@@ -432,8 +464,8 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     end
 end
 
-escape_string(s::AbstractString, esc=('\"',); keep = ()) =
-    sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
+escape_string(s::AbstractString, esc=('\"',); keep = (), ascii::Bool=false, fullhex::Bool=false) =
+    sprint((io)->escape_string(io, s, esc; keep, ascii, fullhex), sizehint=lastindex(s))
 
 function print_quoted(io, s::AbstractString)
     print(io, '"')
@@ -590,14 +622,14 @@ julia> println(raw"\\\\x \\\\\\"")
 macro raw_str(s); s; end
 
 """
-    escape_raw_string(s::AbstractString)
-    escape_raw_string(io, s::AbstractString)
+    escape_raw_string(s::AbstractString, delim='"') -> AbstractString
+    escape_raw_string(io, s::AbstractString, delim='"')
 
 Escape a string in the manner used for parsing raw string literals.
-For each double-quote (`"`) character in input string `s`, this
-function counts the number _n_ of preceding backslash (`\\`) characters,
-and then increases there the number of backslashes from _n_ to 2_n_+1
-(even for _n_ = 0). It also doubles a sequence of backslashes at the end
+For each double-quote (`"`) character in input string `s` (or `delim` if
+specified), this function counts the number _n_ of preceding backslash (`\\`)
+characters, and then increases there the number of backslashes from _n_ to
+2_n_+1 (even for _n_ = 0). It also doubles a sequence of backslashes at the end
 of the string.
 
 This escaping convention is used in raw strings and other non-standard
@@ -605,38 +637,43 @@ string literals. (It also happens to be the escaping convention
 expected by the Microsoft C/C++ compiler runtime when it parses a
 command-line string into the argv[] array.)
 
-See also [`escape_string`](@ref).
+See also [`Base.escape_string()`](@ref).
 """
-function escape_raw_string(io, str::AbstractString)
+function escape_raw_string(io::IO, str::AbstractString, delim::Char='"')
+    total = 0
     escapes = 0
     for c in str
         if c == '\\'
             escapes += 1
         else
-            if c == '"'
+            if c == delim
                 # if one or more backslashes are followed by
                 # a double quote then escape all backslashes
                 # and the double quote
-                escapes = escapes * 2 + 1
-            end
-            while escapes > 0
-                write(io, '\\')
-                escapes -= 1
+                escapes += 1
+                total += escapes
+                while escapes > 0
+                    write(io, '\\')
+                    escapes -= 1
+                end
             end
             escapes = 0
-            write(io, c)
         end
+        write(io, c)
     end
     # also escape any trailing backslashes,
     # so they do not affect the closing quote
+    total += escapes
     while escapes > 0
-        write(io, '\\')
         write(io, '\\')
         escapes -= 1
     end
+    total
+end
+function escape_raw_string(str::AbstractString, delim::Char='"')
+    total = escape_raw_string(devnull, str, delim) # check whether the string even needs to be copied and how much to allocate for it
+    return total == 0 ? str : sprint(escape_raw_string, str, delim; sizehint = sizeof(str) + total)
 end
-escape_raw_string(str::AbstractString) = sprint(escape_raw_string, str;
-                                                sizehint = lastindex(str) + 2)
 
 ## multiline strings ##
 
@@ -764,3 +801,26 @@ function String(chars::AbstractVector{<:AbstractChar})
         end
     end
 end
+
+function AnnotatedString(chars::AbstractVector{C}) where {C<:AbstractChar}
+    str = if C <: AnnotatedChar
+        String(getfield.(chars, :char))
+    else
+        sprint(sizehint=length(chars)) do io
+            for c in chars
+                print(io, c)
+            end
+        end
+    end
+    annots = RegionAnnotation[]
+    point = 1
+    for c in chars
+        if c isa AnnotatedChar
+            for annot in c.annotations
+                push!(annots, (point:point, annot...))
+            end
+        end
+        point += ncodeunits(c)
+    end
+    AnnotatedString(str, annots)
+end
diff --git a/base/strings/search.jl b/base/strings/search.jl
index 1a3085e084ccd..5f658e24526ba 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -10,48 +10,92 @@ match strings with [`match`](@ref).
 """
 abstract type AbstractPattern end
 
-nothing_sentinel(i) = i == 0 ? nothing : i
+# TODO: These unions represent bytes in memory that can be accessed via a pointer.
+# this property is used throughout Julia, e.g. also in IO code.
+# This deserves a better solution - see #53178.
+# If such a better solution comes in place, these unions should be replaced.
+const DenseInt8 = Union{
+    DenseArray{Int8},
+    FastContiguousSubArray{Int8,N,<:DenseArray} where N
+}
+
+# Note: This union is different from that above in that it includes CodeUnits.
+# Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping
+# is buggy and may be removed in the future, see #54002
+const DenseUInt8 = Union{
+    DenseArray{UInt8},
+    FastContiguousSubArray{UInt8,N,<:DenseArray} where N,
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+    FastContiguousSubArray{UInt8,N,<:CodeUnits{UInt8, <:Union{String, SubString{String}}}} where N,
+}
+
+const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8}
+
+last_byteindex(x::Union{String, SubString{String}}) = ncodeunits(x)
+last_byteindex(x::DenseUInt8OrInt8) = lastindex(x)
+
+function last_utf8_byte(c::Char)
+    u = reinterpret(UInt32, c)
+    shift = ((4 - ncodeunits(c)) * 8) & 31
+    (u >> shift) % UInt8
+end
+
+# Whether the given byte is guaranteed to be the only byte in a Char
+# This holds even in the presence of invalid UTF8
+is_standalone_byte(x::UInt8) = (x < 0x80) | (x > 0xf7)
 
 function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
+                  s::Union{String, SubString{String}}, i::Integer)
     if i < 1 || i > sizeof(s)
         i == sizeof(s) + 1 && return nothing
         throw(BoundsError(s, i))
     end
     @inbounds isvalid(s, i) || string_index_err(s, i)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_search(s, c % UInt8, i))
+    c ≤ '\x7f' && return _search(s, first_utf8_byte(c), i)
     while true
         i = _search(s, first_utf8_byte(c), i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i === nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = nextind(s, i)
     end
 end
 
-findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_search(a, pred.x))
+function findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{UInt8, Int8}}, a::Union{DenseInt8, DenseUInt8})
+    findnext(pred, a, firstindex(a))
+end
+
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_search(a, pred.x, i))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
-findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+# iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same,
+# so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa
+findfirst(::typeof(iszero), a::DenseUInt8OrInt8) = _search(a, zero(UInt8))
+findnext(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _search(a, zero(UInt8), i)
 
-function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
-    if i < 1
+function _search(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = firstindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
         throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    n_bytes = lst - i + 1
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
+    end
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-fst, b, n_bytes)
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
+function _search(a::DenseUInt8, b::AbstractChar, i::Integer = firstindex(a))
     if isascii(b)
         _search(a,UInt8(b),i)
     else
@@ -60,41 +104,51 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
 end
 
 function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
+                  s::Union{String, SubString{String}}, i::Integer)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_rsearch(s, c % UInt8, i))
+    c ≤ '\x7f' && return _rsearch(s, first_utf8_byte(c), i)
     b = first_utf8_byte(c)
     while true
         i = _rsearch(s, b, i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i == nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = prevind(s, i)
     end
 end
 
-findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_rsearch(a, pred.x))
+function findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::DenseUInt8OrInt8)
+    findprev(pred, a, lastindex(a))
+end
 
-findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_rsearch(a, pred.x, i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
 
-findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
-findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
 
-function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
-    if i < 1
-        return i == 0 ? 0 : throw(BoundsError(a, i))
+# See comments above for findfirst(::typeof(iszero)) methods
+findlast(::typeof(iszero), a::DenseUInt8OrInt8) = _rsearch(a, zero(UInt8))
+findprev(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _rsearch(a, zero(UInt8), i)
+
+function _rsearch(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = last_byteindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
+        return i == fst - 1 ? nothing : throw(BoundsError(a, i))
+    end
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i-fst+1)
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
+function _rsearch(a::DenseUInt8, b::AbstractChar, i::Integer = length(a))
     if isascii(b)
         _rsearch(a,UInt8(b),i)
     else
@@ -102,6 +156,35 @@ function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
     end
 end
 
+function findall(
+    pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
+    s::Union{String, SubString{String}}
+)
+    c = Char(pred.x)::Char
+    byte = last_utf8_byte(c)
+    ncu = ncodeunits(c)
+
+    # If only one byte, and can't be part of another Char: Forward to memchr.
+    is_standalone_byte(byte) && return findall(==(byte), codeunits(s))
+    result = Int[]
+    i = firstindex(s)
+    while true
+        i = _search(s, byte, i)
+        isnothing(i) && return result
+        i += 1
+        index = i - ncu
+        # If the char is invalid, it's possible that its first byte is
+        # inside another char. If so, indexing into the string will throw an
+        # error, so we need to check for valid indices.
+        isvalid(s, index) || continue
+        # We use iterate here instead of indexing, because indexing wastefully
+        # checks for valid index. It would be better if there was something like
+        # try_getindex(::String, ::Int) we could use.
+        char = first(something(iterate(s, index)))
+        pred(char) && push!(result, index)
+    end
+end
+
 """
     findfirst(pattern::AbstractString, string::AbstractString)
     findfirst(pattern::AbstractPattern, string::String)
@@ -175,18 +258,19 @@ end
 
 in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 
-function _searchindex(s::Union{AbstractString,ByteArray},
+function _searchindex(s::Union{AbstractString,DenseUInt8OrInt8},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
+    sentinel = firstindex(s) - 1
     x = Iterators.peel(t)
     if isnothing(x)
-        return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
+        return firstindex(s) <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
     t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
-        if i === nothing return 0 end
+        if i === nothing return sentinel end
         ii = nextind(s, i)::Int
         a = Iterators.Stateful(trest)
         matched = all(splat(==), zip(SubString(s, ii), a))
@@ -201,10 +285,10 @@ function _search_bloom_mask(c)
     UInt64(1) << (c & 63)
 end
 
-_nthbyte(s::String, i) = codeunit(s, i)
+_nthbyte(s::Union{String, SubString{String}}, i) = codeunit(s, i)
 _nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
 
-function _searchindex(s::String, t::String, i::Integer)
+function _searchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     lastindex(t) == 1 && return something(findnext(isequal(t[1]), s, i), 0)
     _searchindex(codeunits(s), codeunits(t), i)
@@ -460,9 +544,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
 !!! compat "Julia 1.3"
      This method requires at least Julia 1.3.
 """
-
-function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
-                 s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
+function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
+                 s::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
                  ; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
@@ -515,7 +598,7 @@ function _rsearchindex(s::AbstractString,
     end
 end
 
-function _rsearchindex(s::String, t::String, i::Integer)
+function _rsearchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     if lastindex(t) == 1
         return something(findprev(isequal(t[1]), s, i), 0)
diff --git a/base/strings/string.jl b/base/strings/string.jl
index a26791958cd50..9f3c3d00e4b81 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -27,8 +27,6 @@ function Base.showerror(io::IO, exc::StringIndexError)
     end
 end
 
-const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
-
 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
 
 """
@@ -63,8 +61,28 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
 In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
 to guarantee consistent behavior.
 """
-String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
-String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)
+String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v))
+function String(v::Vector{UInt8})
+    #return ccall(:jl_array_to_string, Ref{String}, (Any,), v)
+    len = length(v)
+    len == 0 && return ""
+    ref = v.ref
+    if ref.ptr_or_offset == ref.mem.ptr
+        str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len)
+    else
+        str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len)
+    end
+    # optimized empty!(v); sizehint!(v, 0) calls
+    setfield!(v, :size, (0,))
+    setfield!(v, :ref, memoryref(Memory{UInt8}()))
+    return str
+end
+
+"Create a string re-using the memory, if possible.
+Mutating or reading the memory after calling this function is undefined behaviour."
+function unsafe_takestring(m::Memory{UInt8})
+    isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m))
+end
 
 """
     unsafe_string(p::Ptr{UInt8}, [length::Integer])
@@ -85,9 +103,11 @@ function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
     ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
 end
 
-# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
+# This is `@assume_effects :total !:consistent @ccall jl_alloc_string(n::Csize_t)::Ref{String}`,
 # but the macro is not available at this time in bootstrap, so we write it manually.
-@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0xe)), :(convert(Csize_t, n))))
+const _string_n_override = 0x04ee
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String},
+    :(Core.svec(Csize_t)), 1, QuoteNode((:ccall, _string_n_override)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
@@ -97,8 +117,8 @@ Create a new `String` from an existing `AbstractString`.
 String(s::AbstractString) = print_to_string(s)
 @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
-unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
-unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
+unsafe_wrap(::Type{Memory{UInt8}}, s::String) = ccall(:jl_string_to_genericmemory, Ref{Memory{UInt8}}, (Any,), s)
+unsafe_wrap(::Type{Vector{UInt8}}, s::String) = wrap(Array, unsafe_wrap(Memory{UInt8}, s))
 
 Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
 Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
@@ -157,15 +177,18 @@ typemin(::String) = typemin(String)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds b = codeunit(s, i)
     (b & 0xc0 == 0x80) & (i-1 > 0) || return i
-    @inbounds b = codeunit(s, i-1)
-    between(b, 0b11000000, 0b11110111) && return i-1
-    (b & 0xc0 == 0x80) & (i-2 > 0) || return i
-    @inbounds b = codeunit(s, i-2)
-    between(b, 0b11100000, 0b11110111) && return i-2
-    (b & 0xc0 == 0x80) & (i-3 > 0) || return i
-    @inbounds b = codeunit(s, i-3)
-    between(b, 0b11110000, 0b11110111) && return i-3
-    return i
+    (@noinline function _thisind_continued(s, i, n) # mark the rest of the function as a slow-path
+        local b
+        @inbounds b = codeunit(s, i-1)
+        between(b, 0b11000000, 0b11110111) && return i-1
+        (b & 0xc0 == 0x80) & (i-2 > 0) || return i
+        @inbounds b = codeunit(s, i-2)
+        between(b, 0b11100000, 0b11110111) && return i-2
+        (b & 0xc0 == 0x80) & (i-3 > 0) || return i
+        @inbounds b = codeunit(s, i-3)
+        between(b, 0b11110000, 0b11110111) && return i-3
+        return i
+    end)(s, i, n)
 end
 
 @propagate_inbounds nextind(s::String, i::Int) = _nextind_str(s, i)
@@ -176,23 +199,31 @@ end
     n = ncodeunits(s)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds l = codeunit(s, i)
-    (l < 0x80) | (0xf8 ≤ l) && return i+1
-    if l < 0xc0
-        i′ = @inbounds thisind(s, i)
-        return i′ < i ? @inbounds(nextind(s, i′)) : i+1
-    end
-    # first continuation byte
-    (i += 1) > n && return i
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xe0) && return i
-    # second continuation byte
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xf0) && return i
-    # third continuation byte
-    @inbounds b = codeunit(s, i)
-    ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    between(l, 0x80, 0xf7) || return i+1
+    (@noinline function _nextind_continued(s, i, n, l) # mark the rest of the function as a slow-path
+        if l < 0xc0
+            # handle invalid codeunit index by scanning back to the start of this index
+            # (which may be the same as this index)
+            i′ = @inbounds thisind(s, i)
+            i′ >= i && return i+1
+            i = i′
+            @inbounds l = codeunit(s, i)
+            (l < 0x80) | (0xf8 ≤ l) && return i+1
+            @assert l >= 0xc0 "invalid codeunit"
+        end
+        # first continuation byte
+        (i += 1) > n && return i
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xe0) && return i
+        # second continuation byte
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xf0) && return i
+        # third continuation byte
+        @inbounds b = codeunit(s, i)
+        return ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    end)(s, i, n, l)
 end
 
 ## checking UTF-8 & ACSII validity ##
@@ -247,7 +278,7 @@ end
 
            Shifts | 0  4 10 14 18 24  8 20 12 26
 
-    The shifts that represent each state were derived using teh SMT solver Z3, to ensure when encoded into
+    The shifts that represent each state were derived using the SMT solver Z3, to ensure when encoded into
     the rows the correct shift was a result.
 
     Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
@@ -401,10 +432,11 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
     b = @inbounds codeunit(s, i)
     u = UInt32(b) << 24
     between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1
-    return iterate_continued(s, i, u)
+    return @noinline iterate_continued(s, i, u)
 end
 
-function iterate_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function iterate_continued(s, i::Int, u::UInt32)
     u < 0xc0000000 && (i += 1; @goto ret)
     n = ncodeunits(s)
     # first continuation byte
@@ -433,7 +465,8 @@ end
     return getindex_continued(s, i, u)
 end
 
-function getindex_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function getindex_continued(s, i::Int, u::UInt32)
     if u < 0xc0000000
         # called from `getindex` which checks bounds
         @inbounds isvalid(s, i) && @goto ret
@@ -538,9 +571,10 @@ julia> repeat('A', 3)
 ```
 """
 function repeat(c::AbstractChar, r::Integer)
+    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
+    r = UInt(r)::UInt
     c = Char(c)::Char
     r == 0 && return ""
-    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
     u = bswap(reinterpret(UInt32, c))
     n = 4 - (leading_zeros(u | 0xff) >> 3)
     s = _string_n(n*r)
diff --git a/base/strings/strings.jl b/base/strings/strings.jl
index d995d8535e24b..8dae311f475b4 100644
--- a/base/strings/strings.jl
+++ b/base/strings/strings.jl
@@ -1,5 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+include("strings/annotated.jl")
 include("strings/search.jl")
 include("strings/unicode.jl")
 
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 792925f24b12b..50717d3c27e23 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -36,9 +36,18 @@ struct SubString{T<:AbstractString} <: AbstractString
         end
         return new(s, i-1, nextind(s,j)-i)
     end
+    function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString
+        @boundscheck if !(i == j == 0)
+            si, sj = i + 1, prevind(s, j + i + 1)
+            @inbounds isvalid(s, si) || string_index_err(s, si)
+            @inbounds isvalid(s, sj) || string_index_err(s, sj)
+        end
+        new(s, i, j)
+    end
 end
 
 @propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
+@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v)
 @propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
 @propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))
 
@@ -131,6 +140,8 @@ function hash(s::SubString{String}, h::UInt)
     ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
 end
 
+_isannotated(::SubString{T}) where {T} = _isannotated(T)
+
 """
     reverse(s::AbstractString) -> AbstractString
 
@@ -261,6 +272,7 @@ end
 
 function repeat(s::Union{String, SubString{String}}, r::Integer)
     r < 0 && throw(ArgumentError("can't repeat a string $r times"))
+    r = UInt(r)::UInt
     r == 0 && return ""
     r == 1 && return String(s)
     n = sizeof(s)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 17c5d66c160b6..f2938ba6021f2 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -4,7 +4,9 @@
 module Unicode
 
 import Base: show, ==, hash, string, Symbol, isless, length, eltype,
-             convert, isvalid, ismalformed, isoverlong, iterate
+             convert, isvalid, ismalformed, isoverlong, iterate,
+             AnnotatedString, AnnotatedChar, annotated_chartransform,
+             @assume_effects, annotations
 
 # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
 
@@ -155,15 +157,15 @@ function utf8proc_decompose(str, options, buffer, nwords, chartransform::typeof(
     ret < 0 && utf8proc_error(ret)
     return ret
 end
-function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
-    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::F) where F
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{F}),
                 str, sizeof(str), buffer, nwords, options,
-                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{F})), chartransform)
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
-function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
     nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
     buffer = Base.StringVector(nwords*4)
     nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
@@ -172,16 +174,24 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch
     return String(resize!(buffer, nbytes))
 end
 
-# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib
+"""
+`Dict` of `original codepoint => replacement codepoint` normalizations
+to perform on Julia identifiers, to canonicalize characters that
+are both easily confused and easily inputted by accident.
+
+!!! warning
+    When this table is updated, also update the corresponding table in `src/flisp/julia_charmap.h`.
+"""
 const _julia_charmap = Dict{UInt32,UInt32}(
-    0x025B => 0x03B5,
-    0x00B5 => 0x03BC,
-    0x00B7 => 0x22C5,
-    0x0387 => 0x22C5,
-    0x2212 => 0x002D,
+    0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon
+    0x00B5 => 0x03BC, # micro sign -> greek small letter mu
+    0x00B7 => 0x22C5, # middot char -> dot operator (#25098)
+    0x0387 => 0x22C5, # Greek interpunct -> dot operator (#25098)
+    0x2212 => 0x002D, # minus -> hyphen-minus (#26193)
+    0x210F => 0x0127, # hbar -> small letter h with stroke (#48870)
 )
 
-utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
+utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s), flags, chartransform)
 
 # Documented in Unicode module
 function normalize(
@@ -253,6 +263,15 @@ julia> textwidth('⛵')
 ```
 """
 function textwidth(c::AbstractChar)
+    ismalformed(c) && return 1
+    i = codepoint(c)
+    i < 0x7f && return Int(i >= 0x20) # ASCII fast path
+    Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+end
+
+function textwidth(c::Char)
+    b = bswap(reinterpret(UInt32, c)) # from isascii(c)
+    b < 0x7f && return Int(b >= 0x20) # ASCII fast path
     ismalformed(c) && return 1
     Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
 end
@@ -270,6 +289,8 @@ julia> textwidth("March")
 """
 textwidth(s::AbstractString) = mapreduce(textwidth, +, s; init=0)
 
+textwidth(s::AnnotatedString) = textwidth(s.string)
+
 """
     lowercase(c::AbstractChar)
 
@@ -289,6 +310,8 @@ julia> lowercase('Ö')
 lowercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('A' <= c <= 'Z' ? c + 0x20 : c) :
     T(ccall(:utf8proc_tolower, UInt32, (UInt32,), c))
 
+lowercase(c::AnnotatedChar) = AnnotatedChar(lowercase(c.char), annotations(c))
+
 """
     uppercase(c::AbstractChar)
 
@@ -308,6 +331,8 @@ julia> uppercase('ê')
 uppercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_toupper, UInt32, (UInt32,), c))
 
+uppercase(c::AnnotatedChar) = AnnotatedChar(uppercase(c.char), annotations(c))
+
 """
     titlecase(c::AbstractChar)
 
@@ -331,6 +356,8 @@ julia> uppercase('ǆ')
 titlecase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_totitle, UInt32, (UInt32,), c))
 
+titlecase(c::AnnotatedChar) = AnnotatedChar(titlecase(c.char), annotations(c))
+
 ############################################################################
 
 # returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category
@@ -339,7 +366,7 @@ function category_code(c::AbstractChar)
 end
 
 function category_code(x::Integer)
-    x ≤ 0x10ffff ? ccall(:utf8proc_category, Cint, (UInt32,), x) : Cint(30)
+    x ≤ 0x10ffff ? (@assume_effects :foldable @ccall utf8proc_category(UInt32(x)::UInt32)::Cint) : Cint(30)
 end
 
 # more human-readable representations of the category code
@@ -375,7 +402,8 @@ julia> islowercase('❤')
 false
 ```
 """
-islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))
+islowercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_islower(UInt32(c)::UInt32)::Cint)
 
 # true for Unicode upper and mixed case
 
@@ -399,7 +427,8 @@ julia> isuppercase('❤')
 false
 ```
 """
-isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))
+isuppercase(c::AbstractChar) = ismalformed(c) ? false :
+    Bool(@assume_effects :foldable @ccall utf8proc_isupper(UInt32(c)::UInt32)::Cint)
 
 """
     iscased(c::AbstractChar) -> Bool
@@ -419,7 +448,7 @@ end
 """
     isdigit(c::AbstractChar) -> Bool
 
-Tests whether a character is a decimal digit (0-9).
+Tests whether a character is an ASCII decimal digit (`0`-`9`).
 
 See also: [`isletter`](@ref).
 
@@ -512,11 +541,17 @@ iscntrl(c::AbstractChar) = c <= '\x1f' || '\x7f' <= c <= '\u9f'
 Tests whether a character belongs to the Unicode general category Punctuation, i.e. a
 character whose category code begins with 'P'.
 
+!!! note
+    This behavior is different from the `ispunct` function in C.
+
 # Examples
 ```jldoctest
 julia> ispunct('α')
 false
 
+julia> ispunct('=')
+false
+
 julia> ispunct('/')
 true
 
@@ -605,6 +640,7 @@ julia> uppercase("Julia")
 ```
 """
 uppercase(s::AbstractString) = map(uppercase, s)
+uppercase(s::AnnotatedString) = annotated_chartransform(uppercase, s)
 
 """
     lowercase(s::AbstractString)
@@ -620,6 +656,7 @@ julia> lowercase("STRINGS AND THINGS")
 ```
 """
 lowercase(s::AbstractString) = map(lowercase, s)
+lowercase(s::AnnotatedString) = annotated_chartransform(lowercase, s)
 
 """
     titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
@@ -668,6 +705,23 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo
     return String(take!(b))
 end
 
+# TODO: improve performance characteristics, room for a ~10x improvement.
+function titlecase(s::AnnotatedString; wordsep::Function = !isletter, strict::Bool=true)
+    initial_state = (; startword = true, state = Ref{Int32}(0),
+             c0 = eltype(s)(zero(UInt32)), wordsep, strict)
+    annotated_chartransform(s, initial_state) do c, state
+        if isgraphemebreak!(state.state, state.c0, c) && state.wordsep(c)
+            state = Base.setindex(state, true, :startword)
+            cnew = c
+        else
+            cnew = state.startword ? titlecase(c) : state.strict ? lowercase(c) : c
+            state = Base.setindex(state, false, :startword)
+        end
+        state = Base.setindex(state, c, :c0)
+        cnew, state
+    end
+end
+
 """
     uppercasefirst(s::AbstractString) -> String
 
@@ -692,6 +746,17 @@ function uppercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function uppercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (titlecase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+
 """
     lowercasefirst(s::AbstractString)
 
@@ -714,6 +779,17 @@ function lowercasefirst(s::AbstractString)
     string(c′, SubString(s, nextind(s, 1)))
 end
 
+# TODO: improve performance characteristics, room for a ~5x improvement.
+function lowercasefirst(s::AnnotatedString)
+    annotated_chartransform(s, true) do c, state
+        if state
+            (lowercase(c), false)
+        else
+            (c, state)
+        end
+    end
+end
+
 ############################################################################
 # iterators for grapheme segmentation
 
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 7a42d7fecfc91..87c2abab5344c 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -1,13 +1,23 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}
+"""
+    Base.Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},AbstractSet{<:AbstractChar}}
+
+An alias type for a either single character or a tuple/vector/set of characters, used to describe arguments
+of several string-matching functions such as [`startswith`](@ref) and [`strip`](@ref).
+
+!!! compat "Julia 1.11"
+    Julia versions prior to 1.11 only included `Set`, not `AbstractSet`, in `Base.Chars` types.
+"""
+const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},AbstractSet{<:AbstractChar}}
 
 # starts with and ends with predicates
 
 """
-    startswith(s::AbstractString, prefix::AbstractString)
+    startswith(s::AbstractString, prefix::Union{AbstractString,Base.Chars})
 
-Return `true` if `s` starts with `prefix`. If `prefix` is a vector or set
+Return `true` if `s` starts with `prefix`, which can be a string, a character,
+or a tuple/vector/set of characters. If `prefix` is a tuple/vector/set
 of characters, test whether the first character of `s` belongs to that set.
 
 See also [`endswith`](@ref), [`contains`](@ref).
@@ -30,10 +40,11 @@ end
 startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars
 
 """
-    endswith(s::AbstractString, suffix::AbstractString)
+    endswith(s::AbstractString, suffix::Union{AbstractString,Base.Chars})
 
-Return `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
-characters, test whether the last character of `s` belongs to that set.
+Return `true` if `s` ends with `suffix`, which can be a string, a character,
+or a tuple/vector/set of characters. If `suffix` is a tuple/vector/set
+of characters, test whether the last character of `s` belongs to that set.
 
 See also [`startswith`](@ref), [`contains`](@ref).
 
@@ -70,7 +81,8 @@ end
 """
     startswith(io::IO, prefix::Union{AbstractString,Base.Chars})
 
-Check if an `IO` object starts with a prefix.  See also [`peek`](@ref).
+Check if an `IO` object starts with a prefix, which can be either a string, a
+character, or a tuple/vector/set of characters.  See also [`peek`](@ref).
 """
 function Base.startswith(io::IO, prefix::Base.Chars)
     mark(io)
@@ -88,7 +100,6 @@ Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix))
 
 function endswith(a::Union{String, SubString{String}},
                   b::Union{String, SubString{String}})
-    cub = ncodeunits(b)
     astart = ncodeunits(a) - ncodeunits(b) + 1
     if astart < 1
         false
@@ -369,6 +380,7 @@ function lstrip(f, s::AbstractString)
 end
 lstrip(s::AbstractString) = lstrip(isspace, s)
 lstrip(s::AbstractString, chars::Chars) = lstrip(in(chars), s)
+lstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 
 """
     rstrip([pred=isspace,] str::AbstractString) -> SubString
@@ -402,6 +414,8 @@ function rstrip(f, s::AbstractString)
 end
 rstrip(s::AbstractString) = rstrip(isspace, s)
 rstrip(s::AbstractString, chars::Chars) = rstrip(in(chars), s)
+rstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
+
 
 """
     strip([pred=isspace,] str::AbstractString) -> SubString
@@ -429,6 +443,7 @@ julia> strip("{3, 5}\\n", ['{', '}', '\\n'])
 """
 strip(s::AbstractString) = lstrip(rstrip(s))
 strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
+strip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 strip(f, s::AbstractString) = lstrip(f, rstrip(f, s))
 
 ## string padding functions ##
@@ -454,13 +469,20 @@ function lpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if _isannotated(s) || _isannotated(p)
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
-    l = textwidth(p)
+    m ≤ 0 && return stringfn(s)
+    l = Int(textwidth(p))::Int
+    if l == 0
+        throw(ArgumentError("$(repr(p)) has zero textwidth" * (ncodeunits(p) != 1 ? "" :
+            "; maybe you want pad^max(0, npad - ncodeunits(str)) * str to pad by codeunits" *
+            (s isa AbstractString && codeunit(s) != UInt8 ? "?" : " (bytes)?"))))
+    end
     q, r = divrem(m, l)
-    r == 0 ? string(p^q, s) : string(p^q, first(p, r), s)
+    r == 0 ? stringfn(p^q, s) : stringfn(p^q, first(p, r), s)
 end
 
 """
@@ -484,13 +506,171 @@ function rpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if _isannotated(s) || _isannotated(p)
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
-    l = textwidth(p)
+    m ≤ 0 && return stringfn(s)
+    l = Int(textwidth(p))::Int
+    if l == 0
+        throw(ArgumentError("$(repr(p)) has zero textwidth" * (ncodeunits(p) != 1 ? "" :
+            "; maybe you want str * pad^max(0, npad - ncodeunits(str)) to pad by codeunits" *
+            (s isa AbstractString && codeunit(s) != UInt8 ? "?" : " (bytes)?"))))
+    end
     q, r = divrem(m, l)
-    r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
+    r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r))
+end
+
+"""
+    rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the last characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = rtruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 I lo…"
+
+julia> textwidth(s)
+10
+
+julia> rtruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:right))
+    if isnothing(ret)
+        return string(str)
+    else
+        left, _ = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement
+    end
+end
+
+"""
+    ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the first characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = ltruncate("🍕🍕 I love 🍕", 10)
+"…I love 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ltruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`rtruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:left))
+    if isnothing(ret)
+        return string(str)
+    else
+        _, right = ret::Tuple{Int,Int}
+        @views return replacement * str[right:end]
+    end
+end
+
+"""
+    ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the middle characters
+with `replacement` if necessary. The default replacement string is "…". By default, the truncation
+prefers keeping chars on the left, but this can be changed by setting `prefer_left` to `false`.
+
+# Examples
+```jldoctest
+julia> s = ctruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 …e 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ctruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`rtruncate`](@ref).
+"""
+function ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:center), prefer_left)
+    if isnothing(ret)
+        return string(str)
+    else
+        left, right = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement * str[right:end]
+    end
+end
+
+# return whether textwidth(str) <= maxwidth
+function check_textwidth(str::AbstractString, maxwidth::Integer)
+    # check efficiently for early return if str is wider than maxwidth
+    total_width = 0
+    for c in str
+        total_width += textwidth(c)
+        total_width > maxwidth && return false
+    end
+    return true
+end
+
+function string_truncate_boundaries(
+            str::AbstractString,
+            maxwidth::Integer,
+            replacement::Union{AbstractString,AbstractChar},
+            ::Val{mode},
+            prefer_left::Bool = true) where {mode}
+    maxwidth >= 0 || throw(ArgumentError("maxwidth $maxwidth should be non-negative"))
+    check_textwidth(str, maxwidth) && return nothing
+
+    l0, _ = left, right = firstindex(str), lastindex(str)
+    width = textwidth(replacement)
+    # used to balance the truncated width on either side
+    rm_width_left, rm_width_right, force_other = 0, 0, false
+    @inbounds while true
+        if mode === :left || (mode === :center && (!prefer_left || left > l0))
+            rm_width = textwidth(str[right])
+            if mode === :left || (rm_width_right <= rm_width_left || force_other)
+                force_other = false
+                (width += rm_width) <= maxwidth || break
+                rm_width_right += rm_width
+                right = prevind(str, right)
+            else
+                force_other = true
+            end
+        end
+        if mode ∈ (:right, :center)
+            rm_width = textwidth(str[left])
+            if mode === :left || (rm_width_left <= rm_width_right || force_other)
+                force_other = false
+                (width += textwidth(str[left])) <= maxwidth || break
+                rm_width_left += rm_width
+                left = nextind(str, left)
+            else
+                force_other = true
+            end
+        end
+    end
+    return prevind(str, left), nextind(str, right)
 end
 
 """
@@ -567,6 +747,8 @@ end
 
 # Specialization for partition(s,n) to return a SubString
 eltype(::Type{PartitionIterator{T}}) where {T<:AbstractString} = SubString{T}
+# SubStrings do not nest
+eltype(::Type{PartitionIterator{T}}) where {T<:SubString} = T
 
 function iterate(itr::PartitionIterator{<:AbstractString}, state = firstindex(itr.c))
     state > ncodeunits(itr.c) && return nothing
@@ -588,6 +770,101 @@ eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) wher
 eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
     eachsplit(str, isspace; limit, keepempty)
 
+"""
+    eachrsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
+    eachrsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
+
+Return an iterator over `SubString`s of `str`, produced when splitting on
+the delimiter(s) `dlm`, and yielded in reverse order (from right to left).
+`dlm` can be any of the formats allowed by [`findprev`](@ref)'s first argument
+(i.e. a string, a single character or a function), or a collection of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref), and `keepempty` default to `false`.
+
+The optional keyword arguments are:
+ - If `limit > 0`, the iterator will split at most `limit - 1` times before returning
+   the rest of the string unsplit. `limit < 1` implies no cap to splits (default).
+ - `keepempty`: whether empty fields should be returned when iterating
+   Default is `false` without a `dlm` argument, `true` with a `dlm` argument.
+
+Note that unlike [`split`](@ref), [`rsplit`](@ref) and [`eachsplit`](@ref), this
+function iterates the substrings right to left as they occur in the input.
+
+See also [`eachsplit`](@ref), [`rsplit`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+# Examples
+```jldoctest
+julia> a = "Ma.r.ch";
+
+julia> collect(eachrsplit(a, ".")) == ["ch", "r", "Ma"]
+true
+
+julia> collect(eachrsplit(a, "."; limit=2)) == ["ch", "Ma.r"]
+true
+```
+"""
+function eachrsplit end
+
+struct RSplitIterator{S <: AbstractString, F}
+    str::S
+    splitter::F
+    limit::Int
+    keepempty::Bool
+end
+
+eltype(::Type{<:RSplitIterator{T}}) where T = SubString{T}
+eltype(::Type{<:RSplitIterator{<:SubString{T}}}) where T = SubString{T}
+
+IteratorSize(::Type{<:RSplitIterator}) = SizeUnknown()
+
+eachrsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+    RSplitIterator(str, splitter, limit, keepempty)
+
+eachrsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+          limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, in(splitter); limit, keepempty)
+
+eachrsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachrsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+    eachrsplit(str, isspace; limit, keepempty)
+
+function Base.iterate(it::RSplitIterator, (to, remaining_splits)=(lastindex(it.str), it.limit-1))
+    to < 0 && return nothing
+    from = 1
+    next_to = -1
+    while !iszero(remaining_splits)
+        pos = findprev(it.splitter, it.str, to)
+        # If no matches: It returns the rest of the string, then the iterator stops.
+        if pos === nothing
+            from = 1
+            next_to = -1
+            break
+        else
+            from = nextind(it.str, last(pos))
+            # pos can be empty if we search for a zero-width delimiter, in which
+            # case pos is to:to-1.
+            # In this case, next_to must be to - 1, except if to is 0 or 1, in
+            # which case, we must stop iteration for some reason.
+            next_to = (isempty(pos) & (to < 2)) ? -1 : prevind(it.str, first(pos))
+
+            # If the element we emit is empty, discard it based on keepempty
+            if from > to && !(it.keepempty)
+                to = next_to
+                continue
+            end
+            break
+        end
+    end
+    from > to && !(it.keepempty) && return nothing
+    return (SubString(it.str, from, to), (next_to, remaining_splits-1))
+end
+
 """
     split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -656,37 +933,15 @@ julia> rsplit(a, "."; limit=2)
  "h"
 ```
 """
-function rsplit end
-
 function rsplit(str::T, splitter;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::AbstractChar;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-
-function _rsplit(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Array)
-    n = lastindex(str)::Int
-    r = something(findlast(splitter, str)::Union{Nothing,Int,UnitRange{Int}}, 0)
-    j, k = first(r), last(r)
-    while j > 0 && k > 0 && length(strs) != limit-1
-        (keepempty || k < n) && pushfirst!(strs, @inbounds SubString(str,nextind(str,k)::Int,n))
-        n = prevind(str, j)::Int
-        r = something(findprev(splitter,str,n)::Union{Nothing,Int,UnitRange{Int}}, 0)
-        j, k = first(r), last(r)
-    end
-    (keepempty || n > 0) && pushfirst!(strs, SubString(str,1,n))
-    return strs
+               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
+    reverse!(collect(eachrsplit(str, splitter; limit, keepempty)))
 end
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
 rsplit(str::AbstractString;
       limit::Integer=0, keepempty::Bool=false) =
-    rsplit(str, isspace; limit=limit, keepempty=keepempty)
+    rsplit(str, isspace; limit, keepempty)
 
 _replace(io, repl, str, r, pattern) = print(io, repl)
 _replace(io, repl::Function, str, r, pattern) =
@@ -700,12 +955,11 @@ _free_pat_replacer(x) = nothing
 _pat_replacer(x::AbstractChar) = isequal(x)
 _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x)
 
-function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N
-    count == 0 && return str
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N
     count < 0 && throw(DomainError(count, "`count` must be non-negative."))
-    n = 1
-    e1 = nextind(str, lastindex(str)) # sizeof(str)
-    i = a = firstindex(str)
+    e1 = nextind(str, lastindex(str)) # sizeof(str)+1
+    a = firstindex(str)
     patterns = map(p -> _pat_replacer(first(p)), pat_repl)
     replaces = map(last, pat_repl)
     rs = map(patterns) do p
@@ -716,11 +970,14 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         r isa Int && (r = r:r) # findnext / performance fix
         return r
     end
-    if all(>(e1), map(first, rs))
-        foreach(_free_pat_replacer, patterns)
-        return str
-    end
-    out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
+    return e1, patterns, replaces, rs, all(>(e1), map(first, rs))
+end
+
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_finish(io::IO, str, count::Int,
+                         e1::Int, patterns::Tuple, replaces::Tuple, rs::Tuple)
+    n = 1
+    i = a = firstindex(str)
     while true
         p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
         r = rs[p]
@@ -728,9 +985,9 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         j > e1 && break
         if i == a || i <= k
             # copy out preserved portion
-            GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i))
+            GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i))
             # copy out replacement string
-            _replace(out, replaces[p], str, r, patterns[p])
+            _replace(io, replaces[p], str, r, patterns[p])
         end
         if k < j
             i = j
@@ -755,13 +1012,39 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         n += 1
     end
     foreach(_free_pat_replacer, patterns)
-    write(out, SubString(str, i))
-    return String(take!(out))
+    write(io, SubString(str, i))
+    return io
+end
+
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_(io::IO, str, pat_repl::NTuple{N, Pair}, count::Int) where N
+    if count == 0
+        write(io, str)
+        return io
+    end
+    e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
+    if notfound
+        foreach(_free_pat_replacer, patterns)
+        write(io, str)
+        return io
+    end
+    return _replace_finish(io, str, count, e1, patterns, replaces, rs)
 end
 
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N
+    count == 0 && return String(str)
+    e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
+    if notfound
+        foreach(_free_pat_replacer, patterns)
+        return String(str)
+    end
+    out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
+    return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs)))
+end
 
 """
-    replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
+    replace([io::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
 
 Search for the given pattern `pat` in `s`, and replace each occurrence with `r`.
 If `count` is provided, replace at most `count` occurrences.
@@ -774,6 +1057,11 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then
 references in `r` are replaced with the corresponding matched text.
 To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).
 
+The return value is a new string after the replacements.  If the `io::IO` argument
+is supplied, the transformed string is instead written to `io` (returning `io`).
+(For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use
+a pre-allocated buffer array in-place.)
+
 Multiple patterns can be specified, and they will be applied left-to-right
 simultaneously, so only one pattern will be applied to any character, and the
 patterns will only be applied to the input text, not the replacements.
@@ -781,6 +1069,9 @@ patterns will only be applied to the input text, not the replacements.
 !!! compat "Julia 1.7"
     Support for multiple patterns requires version 1.7.
 
+!!! compat "Julia 1.10"
+    The `io::IO` argument requires version 1.10.
+
 # Examples
 ```jldoctest
 julia> replace("Python is a programming language.", "Python" => "Julia")
@@ -799,8 +1090,12 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a")
 "bca"
 ```
 """
+replace(io::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
+    _replace_(io, String(s), pat_f, Int(count))
+
 replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
-    replace(String(s), pat_f..., count=count)
+    _replace_(String(s), pat_f, Int(count))
+
 
 # TODO: allow transform as the first argument to replace?
 
@@ -926,12 +1221,12 @@ function bytes2hex end
 
 function bytes2hex(itr)
     eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
-    b = Base.StringVector(2*length(itr))
+    b = Base.StringMemory(2*length(itr))
     @inbounds for (i, x) in enumerate(itr)
         b[2i - 1] = hex_chars[1 + x >> 4]
         b[2i    ] = hex_chars[1 + x & 0xf]
     end
-    return String(b)
+    return unsafe_takestring(b)
 end
 
 function bytes2hex(io::IO, itr)
diff --git a/base/subarray.jl b/base/subarray.jl
index 901410e908d1e..eacaddc068f1f 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -52,8 +52,10 @@ viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
-# In general, ranges are only fast if all other indices are scalar
-viewindexing(I::Tuple{AbstractRange, Vararg{ScalarIndex}}) = IndexLinear()
+# In general, scalar ranges are only fast if all other indices are scalar
+# Other ranges, such as those of `CartesianIndex`es, are not fast even if these
+# are followed by `ScalarIndex`es
+viewindexing(I::Tuple{AbstractRange{<:ScalarIndex}, Vararg{ScalarIndex}}) = IndexLinear()
 # All other index combinations are slow
 viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 # Of course, all other array types are slow
@@ -108,16 +110,44 @@ unaliascopy(A::SubArray) = typeof(A)(unaliascopy(A.parent), map(unaliascopy, A.i
 
 # When the parent is an Array we can trim the size down a bit. In the future this
 # could possibly be extended to any mutable array.
-function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD}
-    dest = Array{T}(undef, index_lengths(V.indices...))
-    copyto!(dest, V)
-    SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD))
-end
+function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{ScalarIndex,AbstractRange{<:ScalarIndex},Array{<:Union{ScalarIndex,AbstractCartesianIndex}}}}},LD}
+    dest = Array{T}(undef, _trimmedshape(V.indices...))
+    trimmedpind = _trimmedpind(V.indices...)
+    vdest = trimmedpind isa Tuple{Vararg{Union{Slice,Colon}}} ? dest : view(dest, trimmedpind...)
+    copyto!(vdest, view(V, _trimmedvind(V.indices...)...))
+    indices = map(_trimmedindex, V.indices)
+    stride1 = LD ? compute_stride1(dest, indices) : 0
+    offset1 = LD ? compute_offset1(dest, stride1, indices) : 0
+    SubArray{T,N,A,I,LD}(dest, indices, offset1, stride1)
+end
+# Get the proper trimmed shape
+_trimmedshape(::ScalarIndex, rest...) = (1, _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractRange, rest...) = (isempty(i) ? zero(eltype(i)) : maximum(i), _trimmedshape(rest...)...)
+_trimmedshape(i::Union{UnitRange,StepRange,OneTo}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:ScalarIndex}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedshape(rest...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{N}}, rest...) where {N} = (length(i), ntuple(Returns(1), Val(N - 1))..., _trimmedshape(rest...)...)
+_trimmedshape() = ()
+# We can avoid the repeation from `AbstractArray{CartesianIndex{0}}`
+_trimmedpind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractRange, rest...) = (i, _trimmedpind(rest...)...)
+_trimmedpind(i::Union{UnitRange,StepRange,OneTo}, rest...) = ((:), _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedpind(rest...)
+_trimmedpind() = ()
+_trimmedvind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = (map(first, axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind() = ()
 # Transform indices to be "dense"
-_trimmedindex(i::Real) = oftype(i, 1)
-_trimmedindex(i::AbstractUnitRange) = oftype(i, oneto(length(i)))
-_trimmedindex(i::AbstractArray) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
-
+_trimmedindex(i::ScalarIndex) = oftype(i, 1)
+_trimmedindex(i::AbstractRange) = i
+_trimmedindex(i::Union{UnitRange,StepRange,OneTo}) = oftype(i, oneto(length(i)))
+_trimmedindex(i::AbstractArray{<:ScalarIndex}) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
+_trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{0}}) = oftype(i, copy(i))
+function _trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{N}}) where {N}
+    padding = ntuple(Returns(1), Val(N - 1))
+    ax1 = eachindex(IndexLinear(), i)
+    return oftype(i, reshape(CartesianIndices((ax1, padding...)), axes(i)))
+end
 ## SubArray creation
 # We always assume that the dimensionality of the parent matches the number of
 # indices that end up getting passed to it, so we store the parent as a
@@ -127,6 +157,11 @@ _maybe_reshape_parent(A::AbstractArray, ::NTuple{1, Bool}) = reshape(A, Val(1))
 _maybe_reshape_parent(A::AbstractArray{<:Any,1}, ::NTuple{1, Bool}) = reshape(A, Val(1))
 _maybe_reshape_parent(A::AbstractArray{<:Any,N}, ::NTuple{N, Bool}) where {N} = A
 _maybe_reshape_parent(A::AbstractArray, ::NTuple{N, Bool}) where {N} = reshape(A, Val(N))
+# The trailing singleton indices could be eliminated after bounds checking.
+rm_singleton_indices(ndims::Tuple, J1, Js...) = (J1, rm_singleton_indices(IteratorsMD._splitrest(ndims, index_ndims(J1)), Js...)...)
+rm_singleton_indices(::Tuple{}, ::ScalarIndex, Js...) = rm_singleton_indices((), Js...)
+rm_singleton_indices(::Tuple) = ()
+
 """
     view(A, inds...)
 
@@ -173,22 +208,15 @@ julia> view(2:5, 2:3) # returns a range as type is immutable
 3:4
 ```
 """
-function view(A::AbstractArray{<:Any,N}, I::Vararg{Any,M}) where {N,M}
+function view(A::AbstractArray, I::Vararg{Any,M}) where {M}
     @inline
     J = map(i->unalias(A,i), to_indices(A, I))
     @boundscheck checkbounds(A, J...)
-    if length(J) > ndims(A) && J[N+1:end] isa Tuple{Vararg{Int}}
-        # view([1,2,3], :, 1) does not need to reshape
-        return unsafe_view(A, J[1:N]...)
-    end
-    unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
+    J′ = rm_singleton_indices(ntuple(Returns(true), Val(ndims(A))), J...)
+    unsafe_view(_maybe_reshape_parent(A, index_ndims(J′...)), J′...)
 end
 
 # Ranges implement getindex to return recomputed ranges; use that for views, too (when possible)
-function view(r1::OneTo, r2::OneTo)
-    @_propagate_inbounds_meta
-    getindex(r1, r2)
-end
 function view(r1::AbstractUnitRange, r2::AbstractUnitRange{<:Integer})
     @_propagate_inbounds_meta
     getindex(r1, r2)
@@ -291,37 +319,66 @@ end
 
 # But SubArrays with fast linear indexing pre-compute a stride and offset
 FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true}
+# We define a convenience functions to compute the shifted parent index
+# This differs from reindex as this accepts the view directly, instead of its indices
+@inline _reindexlinear(V::FastSubArray, i::Int) = V.offset1 + V.stride1*i
+@inline _reindexlinear(V::FastSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ V.stride1 .* i
+
 function getindex(V::FastSubArray, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + V.stride1*i]
-    r
-end
-# We can avoid a multiplication if the first parent index is a Colon or AbstractUnitRange,
-# or if all the indices are scalars, i.e. the view is for a single value only
-FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
-                                      Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
-function getindex(V::FastContiguousSubArray, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
+
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + V.stride1*i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
-function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
+
+# We can avoid a multiplication if the first parent index is a Colon or AbstractUnitRange,
+# or if all the indices are scalars, i.e. the view is for a single value only
+FastContiguousSubArray{T,N,P,I<:Union{Tuple{AbstractUnitRange, Vararg{Any}},
+                                      Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
+
+@inline _reindexlinear(V::FastContiguousSubArray, i::Int) = V.offset1 + i
+@inline _reindexlinear(V::FastContiguousSubArray, i::AbstractUnitRange{Int}) = V.offset1 .+ i
+
+"""
+An internal type representing arrays stored contiguously in memory.
+"""
+const DenseArrayType{T,N} = Union{
+    DenseArray{T,N},
+    <:FastContiguousSubArray{T,N,<:DenseArray},
+}
+
+"""
+An internal type representing mutable arrays stored contiguously in memory.
+"""
+const MutableDenseArrayType{T,N} = Union{
+    Array{T, N},
+    Memory{T},
+    FastContiguousSubArray{T,N,<:Array},
+    FastContiguousSubArray{T,N,<:Memory}
+}
+
+# parents of FastContiguousSubArrays may support fast indexing with AbstractUnitRanges,
+# so we may just forward the indexing to the parent
+# This may only be done for non-offset ranges, as the result would otherwise have offset axes
+const _OneBasedRanges = Union{OneTo{Int}, UnitRange{Int}, Slice{OneTo{Int}}, IdentityUnitRange{OneTo{Int}}}
+function getindex(V::FastContiguousSubArray, i::_OneBasedRanges)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds r = V.parent[V.offset1 + i]
+    @inbounds r = V.parent[_reindexlinear(V, i)]
     r
 end
 
+@inline getindex(V::FastContiguousSubArray, i::Colon) = getindex(V, to_indices(V, (:,))...)
+
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
     @inline
@@ -332,28 +389,25 @@ end
 function setindex!(V::FastSubArray, x, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + V.stride1*i] = x
-    V
-end
-function setindex!(V::FastContiguousSubArray, x, i::Int)
-    @inline
-    @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + V.stride1*i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
-function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
+
+function setindex!(V::FastSubArray, x, i::AbstractUnitRange{Int})
     @inline
     @boundscheck checkbounds(V, i)
-    @inbounds V.parent[V.offset1 + i] = x
+    @inbounds V.parent[_reindexlinear(V, i)] = x
     V
 end
 
+@inline setindex!(V::FastSubArray, x, i::Colon) = setindex!(V, x, to_indices(V, (i,))...)
+
 function isassigned(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
     @inline
     @boundscheck checkbounds(Bool, V, I...) || return false
@@ -363,30 +417,17 @@ end
 function isassigned(V::FastSubArray, i::Int)
     @inline
     @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
-    r
-end
-function isassigned(V::FastContiguousSubArray, i::Int)
-    @inline
-    @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    @inbounds r = isassigned(V.parent, _reindexlinear(V, i))
     r
 end
 function isassigned(V::FastSubArray{<:Any, 1}, i::Int)
     @inline
     @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
-    r
-end
-function isassigned(V::FastContiguousSubArray{<:Any, 1}, i::Int)
-    @inline
-    @boundscheck checkbounds(Bool, V, i) || return false
-    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    @inbounds r = isassigned(V.parent, _reindexlinear(V, i))
     r
 end
 
 IndexStyle(::Type{<:FastSubArray}) = IndexLinear()
-IndexStyle(::Type{<:SubArray}) = IndexCartesian()
 
 # Strides are the distance in memory between adjacent elements in a given dimension
 # which we determine from the strides of the parent
@@ -396,7 +437,8 @@ substrides(strds::Tuple{}, ::Tuple{}) = ()
 substrides(strds::NTuple{N,Int}, I::Tuple{ScalarIndex, Vararg{Any}}) where N = (substrides(tail(strds), tail(I))...,)
 substrides(strds::NTuple{N,Int}, I::Tuple{Slice, Vararg{Any}}) where N = (first(strds), substrides(tail(strds), tail(I))...)
 substrides(strds::NTuple{N,Int}, I::Tuple{AbstractRange, Vararg{Any}}) where N = (first(strds)*step(I[1]), substrides(tail(strds), tail(I))...)
-substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is invalid for SubArrays with indices of type $(typeof(I[1]))"))
+substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(
+    LazyString("strides is invalid for SubArrays with indices of type ", typeof(I[1]))))
 
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
@@ -408,7 +450,7 @@ compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
     (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
-compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
+compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError(LazyString("invalid strided index type ", typeof(I[1]))))
 
 elsize(::Type{<:SubArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
@@ -416,12 +458,8 @@ iscontiguous(A::SubArray) = iscontiguous(typeof(A))
 iscontiguous(::Type{<:SubArray}) = false
 iscontiguous(::Type{<:FastContiguousSubArray}) = true
 
-first_index(V::FastSubArray) = V.offset1 + V.stride1 # cached for fast linear SubArrays
-function first_index(V::SubArray)
-    P, I = parent(V), V.indices
-    s1 = compute_stride1(P, I)
-    s1 + compute_offset1(P, s1, I)
-end
+first_index(V::FastSubArray) = V.offset1 + V.stride1 * firstindex(V) # cached for fast linear SubArrays
+first_index(V::SubArray) = compute_linindex(parent(V), V.indices)
 
 # Computing the first index simply steps through the indices, accumulating the
 # sum of index each multiplied by the parent's stride.
@@ -447,11 +485,6 @@ function compute_linindex(parent, I::NTuple{N,Any}) where N
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
     compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
-function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @inline
-    Δi = I[1]-first(IP[1])
-    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
-end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
     @inline
     Δi = first(I[1])-first(IP[1])
@@ -466,10 +499,6 @@ find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
 find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
-function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
-    return unsafe_convert(Ptr{T}, V.parent) + _memory_offset(V.parent, map(first, V.indices)...)
-end
-
 pointer(V::FastSubArray, i::Int) = pointer(V.parent, V.offset1 + V.stride1*i)
 pointer(V::FastContiguousSubArray, i::Int) = pointer(V.parent, V.offset1 + i)
 
@@ -494,3 +523,13 @@ function _indices_sub(i1::AbstractArray, I...)
 end
 
 has_offset_axes(S::SubArray) = has_offset_axes(S.indices...)
+
+function replace_in_print_matrix(S::SubArray{<:Any,2,<:AbstractMatrix}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,j)))..., s)
+end
+function replace_in_print_matrix(S::SubArray{<:Any,1,<:AbstractVector}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,)))..., j, s)
+end
+
+# XXX: this is considerably more unsafe than the other similarly named methods
+unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 9bbae187cab12..4f2646c7641b7 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -8,6 +8,9 @@ struct SummarySize
     chargeall::Any
 end
 
+nth_pointer_isdefined(obj, i::Int) = ccall(:jl_nth_pointer_isdefined, Cint, (Any, Csize_t), obj, i-1) != 0
+get_nth_pointer(obj, i::Int) = ccall(:jl_get_nth_pointer, Any, (Any, Csize_t), obj, i-1)
+
 """
     Base.summarysize(obj; exclude=Union{...}, chargeall=Union{...}) -> Int
 
@@ -49,16 +52,29 @@ function summarysize(obj;
             if isassigned(x, i)
                 val = x[i]
             end
-        elseif isa(x, Array)
-            nf = length(x)
-            if ccall(:jl_array_isassigned, Cint, (Any, UInt), x, i - 1) != 0
-                val = x[i]
+        elseif isa(x, GenericMemory)
+            T = eltype(x)
+            if Base.allocatedinline(T)
+                np = datatype_npointers(T)
+                nf = length(x) * np
+                idx = (i-1) ÷ np + 1
+                if @inbounds @inline isassigned(x, idx)
+                    elt = x[idx]
+                    p = (i-1) % np + 1
+                    if nth_pointer_isdefined(elt, p)
+                        val = get_nth_pointer(elt, p)
+                    end
+                end
+            else
+                nf = length(x)
+                if @inbounds @inline isassigned(x, i)
+                    val = x[i]
+                end
             end
         else
-            nf = nfields(x)
-            ft = typeof(x).types
-            if !isbitstype(ft[i]) && isdefined(x, i)
-                val = getfield(x, i)
+            nf = datatype_npointers(typeof(x))
+            if nth_pointer_isdefined(x, i)
+                val = get_nth_pointer(x, i)
             end
         end
         if nf > i
@@ -82,7 +98,7 @@ end
     # and so is somewhat approximate.
     key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
     haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
-    if nfields(obj) > 0
+    if datatype_npointers(typeof(obj)) > 0
         push!(ss.frontier_x, obj)
         push!(ss.frontier_i, 1)
     end
@@ -126,14 +142,14 @@ function (ss::SummarySize)(obj::Core.TypeName)
     return Core.sizeof(obj) + (isdefined(obj, :mt) ? ss(obj.mt) : 0)
 end
 
-function (ss::SummarySize)(obj::Array)
+function (ss::SummarySize)(obj::GenericMemory)
     haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true)
-    headersize = 4*sizeof(Int) + 8 + max(0, ndims(obj)-2)*sizeof(Int)
+    headersize = 2*sizeof(Int)
     size::Int = headersize
     datakey = unsafe_convert(Ptr{Cvoid}, obj)
     if !haskey(ss.seen, datakey)
         ss.seen[datakey] = true
-        dsize = Core.sizeof(obj)
+        dsize = sizeof(obj)
         T = eltype(obj)
         if isbitsunion(T)
             # add 1 union selector byte for each element
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 09ea015b0f903..42f54a849f157 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -1,12 +1,64 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-Core.include(Main, "Base.jl")
+# Can be loaded on top of either an existing system image built from
+# `Base_compiler.jl` or standalone, in which case we will build it now.
+let had_compiler = isdefined(Main, :Base)
+if had_compiler; else
+include("Base_compiler.jl")
+end
+
+Core.include(Base, "Base.jl")
+
+had_compiler && ccall(:jl_init_restored_module, Cvoid, (Any,), Base)
+end
 
 using .Base
 
 # Set up Main module
 using Base.MainInclude # ans, err, and sometimes Out
-import Base.MainInclude: eval, include
+
+# These definitions calls Base._include rather than Base.include to get
+# one-frame stacktraces for the common case of using include(fname) in Main.
+
+"""
+    include([mapexpr::Function,] path::AbstractString)
+
+Evaluate the contents of the input source file in the global scope of the containing module.
+Every module (except those defined with `baremodule`) has its own
+definition of `include`, which evaluates the file in that module.
+Returns the result of the last evaluated expression of the input file. During including,
+a task-local include path is set to the directory containing the file. Nested calls to
+`include` will search relative to that path. This function is typically used to load source
+interactively, or to combine files in packages that are broken into multiple source files.
+The argument `path` is normalized using [`normpath`](@ref) which will resolve
+relative path tokens such as `..` and convert `/` to the appropriate path separator.
+
+The optional first argument `mapexpr` can be used to transform the included code before
+it is evaluated: for each parsed expression `expr` in `path`, the `include` function
+actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
+
+Use [`Base.include`](@ref) to evaluate a file into another module.
+
+!!! note
+    Julia's syntax lowering recognizes an explicit call to a literal `include`
+    at top-level and inserts an implicit `@Core.latestworld` to make any include'd
+    definitions visible to subsequent code. Note however that this recognition
+    is *syntactic*. I.e. assigning `const myinclude = include` may require
+    and explicit `@Core.latestworld` call after `myinclude`.
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
+"""
+const include = Base.IncludeInto(Main)
+
+"""
+    eval(expr)
+
+Evaluate an expression in the global scope of the containing module.
+Every `Module` (except those defined with `baremodule`) has its own 1-argument
+definition of `eval`, which evaluates expressions in that module.
+"""
+const eval = Core.EvalInto(Main)
 
 # Ensure this file is also tracked
 pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__)))
@@ -29,55 +81,36 @@ let
 
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
-    stdlibs = [
-        # No dependencies
-        :ArgTools,
-        :Artifacts,
-        :Base64,
-        :CRC32c,
-        :FileWatching,
-        :Libdl,
-        :Logging,
-        :Mmap,
-        :NetworkOptions,
-        :SHA,
-        :Serialization,
-        :Sockets,
-        :Unicode,
-
-        # 1-depth packages
-        :LinearAlgebra,
-        :Markdown,
-        :Printf,
-        :Random,
-        :Tar,
-
-        # 2-depth packages
-        :Dates,
-        :Future,
-        :InteractiveUtils,
-        :LibGit2,
-        :UUIDs,
-
-        # 3-depth packages
-        :REPL,
-        :TOML,
-
-        # 4-depth packages
-        :LibCURL,
-
-        # 5-depth packages
-        :Downloads,
-
-        # 6-depth packages
-        :Pkg,
-    ]
+    if isdefined(Base.BuildSettings, :INCLUDE_STDLIBS)
+        # e.g. INCLUDE_STDLIBS = "FileWatching,Libdl,Artifacts,SHA,Sockets,LinearAlgebra,Random"
+        stdlibs = Symbol.(split(Base.BuildSettings.INCLUDE_STDLIBS, ","))
+    else
+        # TODO: this is included for compatibility with PackageCompiler, which looks for it.
+        # This should eventually be removed so we only use `BuildSettings`.
+        stdlibs = [
+            # No dependencies
+            :FileWatching, # used by loading.jl -- implicit assumption that init runs
+            :Libdl, # Transitive through LinAlg
+            :Artifacts, # Transitive through LinAlg
+            :SHA, # transitive through Random
+            :Sockets, # used by stream.jl
+
+            # Transitive through LingAlg
+            # OpenBLAS_jll
+            # libblastrampoline_jll
+
+            # 1-depth packages
+            :LinearAlgebra, # Commits type-piracy and GEMM
+            :Random, # Can't be removed due to rand being exported by Base
+        ]
+    end
     # PackageCompiler can filter out stdlibs so it can be empty
     maxlen = maximum(textwidth.(string.(stdlibs)); init=0)
 
     tot_time_stdlib = 0.0
     # use a temp module to avoid leaving the type of this closure in Main
-    m = Module()
+    push!(empty!(LOAD_PATH), "@stdlib")
+    m = Core.Module()
     GC.@preserve m begin
         print_time = @eval m (mod, t) -> (print(rpad(string(mod) * "  ", $maxlen + 3, "─"));
                                           Base.time_print(stdout, t * 10^9); println())
@@ -89,8 +122,9 @@ let
             print_time(stdlib, tt)
         end
         for dep in Base._require_dependencies
-            dep[3] == 0.0 && continue
-            push!(Base._included_files, dep[1:2])
+            mod, path, fsize, mtime = dep[1], dep[2], dep[3], dep[5]
+            (fsize == 0 || mtime == 0.0) && continue
+            push!(Base._included_files, (mod, path))
         end
         empty!(Base._require_dependencies)
         Base._track_dependencies[] = false
@@ -105,6 +139,7 @@ let
     Base.init_load_path() # want to be able to find external packages in userimg.jl
 
     ccall(:jl_clear_implicit_imports, Cvoid, (Any,), Main)
+
     tot_time_userimg = @elapsed (isfile("userimg.jl") && Base.include(Main, "userimg.jl"))
 
     tot_time_base = (Base.end_base_include - Base.start_base_include) * 10.0^(-9)
@@ -124,6 +159,7 @@ end
 
 empty!(Base.TOML_CACHE.d)
 Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+@eval Base BUILDROOT = ""
 @eval Sys begin
     BINDIR = ""
     STDLIB = ""
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 2c962088484e7..7dab313cf4f57 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -33,6 +33,9 @@ export BINDIR,
        iswindows,
        isjsvm,
        isexecutable,
+       isreadable,
+       iswritable,
+       username,
        which
 
 import ..Base: show
@@ -53,6 +56,8 @@ global STDLIB::String = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSI
 # In case STDLIB change after julia is built, the variable below can be used
 # to update cached method locations to updated ones.
 const BUILD_STDLIB_PATH = STDLIB
+# Similarly, this is the root of the julia repo directory that julia was built from
+const BUILD_ROOT_PATH = "$BINDIR/../.."
 
 # helper to avoid triggering precompile warnings
 
@@ -97,7 +102,45 @@ Standard word size on the current machine, in bits.
 """
 const WORD_SIZE = Core.sizeof(Int) * 8
 
-global SC_CLK_TCK::Clong, CPU_NAME::String, JIT::String
+"""
+    Sys.SC_CLK_TCK:
+
+The number of system "clock ticks" per second, corresponding to `sysconf(_SC_CLK_TCK)` on
+POSIX systems, or `0` if it is unknown.
+
+CPU times, e.g. as returned by `Sys.cpu_info()`, are in units of ticks, i.e. units of `1 / Sys.SC_CLK_TCK` seconds if `Sys.SC_CLK_TCK > 0`.
+"""
+global SC_CLK_TCK::Clong
+
+"""
+    Sys.CPU_NAME::String
+
+A string representing the name of CPU.
+
+# Examples
+For example, `Sys.CPU_NAME` might equal `"tigerlake"` on an
+[Intel Core "Tiger Lake" CPU](https://en.wikipedia.org/wiki/Tiger_Lake),
+or `"apple-m1"` on an [Apple M1 CPU](https://en.wikipedia.org/wiki/Apple_M1).
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
+global CPU_NAME::String
+
+"""
+    Sys.JIT::String
+
+A string representing the specific Just-In-Time (JIT) compiler being utilized in the current runtime.
+
+# Examples
+Currently, this equals `"ORCJIT"` for the LLVM "ORC" ("On-Request Compilation") JIT library:
+```jldoctest
+julia> Sys.JIT
+"ORCJIT"
+```
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
+global JIT::String
 
 function __init__()
     env_threads = nothing
@@ -106,7 +149,7 @@ function __init__()
     end
     global CPU_THREADS = if env_threads !== nothing
         env_threads = tryparse(Int, env_threads)
-        if !(env_threads isa Int && env_threads > 0)
+        if env_threads === nothing || env_threads <= 0
             env_threads = Int(ccall(:jl_cpu_threads, Int32, ()))
             Core.print(Core.stderr, "WARNING: couldn't parse `JULIA_CPU_THREADS` environment variable. Defaulting Sys.CPU_THREADS to $env_threads.\n")
         end
@@ -124,7 +167,7 @@ end
 # without pulling in anything unnecessary like `CPU_NAME`
 function __init_build()
     global BINDIR = ccall(:jl_get_julia_bindir, Any, ())::String
-    vers = "v$(VERSION.major).$(VERSION.minor)"
+    vers = "v$(string(VERSION.major)).$(string(VERSION.minor))"
     global STDLIB = abspath(BINDIR, "..", "share", "julia", "stdlib", vers)
     nothing
 end
@@ -138,6 +181,24 @@ mutable struct UV_cpu_info_t
     cpu_times!idle::UInt64
     cpu_times!irq::UInt64
 end
+
+"""
+    Sys.CPUinfo
+
+The `CPUinfo` type is a mutable struct with the following fields:
+- `model::String`: CPU model information.
+- `speed::Int32`: CPU speed (in MHz).
+- `cpu_times!user::UInt64`: Time spent in user mode. CPU state shows CPU time used by user space processes.
+- `cpu_times!nice::UInt64`: Time spent in nice mode. CPU state is a subset of the "user" state and shows the CPU time used by processes that have a positive niceness, meaning a lower priority than other tasks.
+- `cpu_times!sys::UInt64`: Time spent in system mode. CPU state shows the amount of CPU time used by the kernel.
+- `cpu_times!idle::UInt64`: Time spent in idle mode. CPU state shows the CPU time that's not actively being used.
+- `cpu_times!irq::UInt64`: Time spent handling interrupts. CPU state shows the amount of time the CPU has been servicing hardware interrupts.
+
+The times are in units of `1/Sys.SC_CLK_TCK` seconds if `Sys.SC_CLK_TCK > 0`; otherwise they are in
+unknown units.
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
 mutable struct CPUinfo
     model::String
     speed::Int32
@@ -152,6 +213,8 @@ CPUinfo(info::UV_cpu_info_t) = CPUinfo(unsafe_string(info.model), info.speed,
     info.cpu_times!user, info.cpu_times!nice, info.cpu_times!sys,
     info.cpu_times!idle, info.cpu_times!irq)
 
+public CPUinfo
+
 function _show_cpuinfo(io::IO, info::Sys.CPUinfo, header::Bool=true, prefix::AbstractString="    ")
     tck = SC_CLK_TCK
     if header
@@ -173,7 +236,7 @@ function _show_cpuinfo(io::IO, info::Sys.CPUinfo, header::Bool=true, prefix::Abs
     end
 end
 
-show(io::IO, info::CPUinfo) = _show_cpuinfo(io, info, true, "    ")
+show(io::IO, ::MIME"text/plain", info::CPUinfo) = _show_cpuinfo(io, info, true, "    ")
 
 function _cpu_summary(io::IO, cpu::AbstractVector{CPUinfo}, i, j)
     if j-i < 9
@@ -200,6 +263,17 @@ function _cpu_summary(io::IO, cpu::AbstractVector{CPUinfo}, i, j)
     println(io)
 end
 
+"""
+    Sys.cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo}=cpu_info())
+
+Print a summary of CPU information to the `io` stream (defaulting to [`stdout`](@ref)), organizing and displaying aggregated data for CPUs with the same model, for a given array of `CPUinfo` data structures
+describing a set of CPUs (which defaults to the return value of the [`Sys.cpu_info`](@ref) function).
+
+The summary includes aggregated information for each distinct CPU model,
+providing details such as average CPU speed and total time spent in different modes (user, nice, sys, idle, irq) across all cores with the same model.
+
+Note: Included in the detailed system information via `versioninfo(verbose=true)`.
+"""
 function cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo} = cpu_info())
     model = cpu[1].model
     first = 1
@@ -212,6 +286,18 @@ function cpu_summary(io::IO=stdout, cpu::AbstractVector{CPUinfo} = cpu_info())
     _cpu_summary(io, cpu, first, length(cpu))
 end
 
+"""
+    Sys.cpu_info()
+
+Return a vector of `CPUinfo` objects, where each object represents information about a CPU core.
+
+This is pretty-printed in a tabular format by `Sys.cpu_summary`, which is included in the output
+of `versioninfo(verbose=true)`, so most users will not need to access the `CPUinfo`
+data structures directly.
+
+The function provides information about each CPU, including model, speed, and usage statistics such as user time, nice time, system time, idle time, and interrupt time.
+
+"""
 function cpu_info()
     UVcpus = Ref{Ptr{UV_cpu_info_t}}()
     count = Ref{Int32}()
@@ -276,7 +362,7 @@ free_memory() = ccall(:uv_get_available_memory, UInt64, ())
 
 Get the total memory in RAM (including that which is currently used) in bytes.
 This amount may be constrained, e.g., by Linux control groups. For the unconstrained
-amount, see `Sys.physical_memory()`.
+amount, see `Sys.total_physical_memory()`.
 """
 function total_memory()
     constrained = ccall(:uv_get_constrained_memory, UInt64, ())
@@ -315,7 +401,7 @@ end
 
 Get the maximum resident set size utilized in bytes.
 See also:
-    - man page of `getrusage`(2) on Linux and FreeBSD.
+    - man page of `getrusage`(2) on Linux and BSD.
     - Windows API `GetProcessMemoryInfo`.
 """
 maxrss() = ccall(:jl_maxrss, Csize_t, ())
@@ -469,24 +555,9 @@ windows_version
 
 const WINDOWS_VISTA_VER = v"6.0"
 
-"""
-    Sys.isexecutable(path::String)
-
-Return `true` if the given `path` has executable permissions.
-
-!!! note
-    Prior to Julia 1.6, this did not correctly interrogate filesystem
-    ACLs on Windows, therefore it would return `true` for any
-    file.  From Julia 1.6 on, it correctly determines whether the
-    file is marked as executable or not.
-"""
-function isexecutable(path::String)
-    # We use `access()` and `X_OK` to determine if a given path is
-    # executable by the current user.  `X_OK` comes from `unistd.h`.
-    X_OK = 0x01
-    return ccall(:jl_fs_access, Cint, (Ptr{UInt8}, Cint), path, X_OK) == 0
-end
-isexecutable(path::AbstractString) = isexecutable(String(path))
+const isexecutable = Base.isexecutable
+const isreadable   = Base.isreadable
+const iswritable   = Base.iswritable
 
 """
     Sys.which(program_name::String)
@@ -567,4 +638,27 @@ function which(program_name::String)
 end
 which(program_name::AbstractString) = which(String(program_name))
 
+"""
+    Sys.username() -> String
+
+Return the username for the current user. If the username cannot be determined
+or is empty, this function throws an error.
+
+To retrieve a username that is overridable via an environment variable,
+e.g., `USER`, consider using
+```julia
+user = get(Sys.username, ENV, "USER")
+```
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+
+See also [`homedir`](@ref).
+"""
+function username()
+    pw = Libc.getpw()
+    isempty(pw.username) && Base.uv_error("username", Base.UV_ENOENT)
+    return pw.username
+end
+
 end # module Sys
diff --git a/base/task.jl b/base/task.jl
index 4fbb51fde3e8e..951e980ee903c 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -53,7 +53,6 @@ push!(c::CompositeException, ex) = push!(c.exceptions, ex)
 pushfirst!(c::CompositeException, ex) = pushfirst!(c.exceptions, ex)
 isempty(c::CompositeException) = isempty(c.exceptions)
 iterate(c::CompositeException, state...) = iterate(c.exceptions, state...)
-eltype(::Type{CompositeException}) = Any
 
 function showerror(io::IO, ex::CompositeException)
     if !isempty(ex)
@@ -104,7 +103,9 @@ function show_task_exception(io::IO, t::Task; indent = true)
 end
 
 function show(io::IO, t::Task)
-    print(io, "Task ($(t.state)) @0x$(string(convert(UInt, pointer_from_objref(t)), base = 16, pad = Sys.WORD_SIZE>>2))")
+    state = t.state
+    state_str = "$state" * ((state == :runnable && istaskstarted(t)) ? ", started" : "")
+    print(io, "Task ($state_str) @0x$(string(convert(UInt, pointer_from_objref(t)), base = 16, pad = Sys.WORD_SIZE>>2))")
 end
 
 """
@@ -113,6 +114,13 @@ end
 Wrap an expression in a [`Task`](@ref) without executing it, and return the [`Task`](@ref). This only
 creates a task, and does not run it.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a1() = sum(i for i in 1:1000);
@@ -135,34 +143,16 @@ macro task(ex)
     :(Task($thunk))
 end
 
-"""
-    current_task()
-
-Get the currently running [`Task`](@ref).
-"""
-current_task() = ccall(:jl_get_current_task, Ref{Task}, ())
-
 # task states
 
 const task_state_runnable = UInt8(0)
 const task_state_done     = UInt8(1)
 const task_state_failed   = UInt8(2)
 
-const _state_index = findfirst(==(:_state), fieldnames(Task))
-@eval function load_state_acquire(t)
-    # TODO: Replace this by proper atomic operations when available
-    @GC.preserve t llvmcall($("""
-        %ptr = inttoptr i$(Sys.WORD_SIZE) %0 to i8*
-        %rv = load atomic i8, i8* %ptr acquire, align 8
-        ret i8 %rv
-        """), UInt8, Tuple{Ptr{UInt8}},
-        Ptr{UInt8}(pointer_from_objref(t) + fieldoffset(Task, _state_index)))
-end
-
 @inline function getproperty(t::Task, field::Symbol)
     if field === :state
         # TODO: this field name should be deprecated in 2.0
-        st = load_state_acquire(t)
+        st = @atomic :acquire t._state
         if st === task_state_runnable
             return :runnable
         elseif st === task_state_done
@@ -178,11 +168,22 @@ end
     elseif field === :exception
         # TODO: this field name should be deprecated in 2.0
         return t._isexception ? t.result : nothing
+    elseif field === :scope
+        error("""
+            Querying a Task's `scope` field is disallowed.
+            The private `Core.current_scope()` function is better, though still an implementation detail.""")
     else
         return getfield(t, field)
     end
 end
 
+@inline function setproperty!(t::Task, field::Symbol, @nospecialize(v))
+    if field === :scope
+        istaskstarted(t) && error("Setting scope on a started task directly is disallowed.")
+    end
+    return @invoke setproperty!(t::Any, field::Symbol, v::Any)
+end
+
 """
     istaskdone(t::Task) -> Bool
 
@@ -205,7 +206,7 @@ julia> istaskdone(b)
 true
 ```
 """
-istaskdone(t::Task) = load_state_acquire(t) !== task_state_runnable
+istaskdone(t::Task) = (@atomic :acquire t._state) !== task_state_runnable
 
 """
     istaskstarted(t::Task) -> Bool
@@ -249,7 +250,7 @@ true
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
+istaskfailed(t::Task) = ((@atomic :acquire t._state) === task_state_failed)
 
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
@@ -302,14 +303,16 @@ end
 
 # just wait for a task to be done, no error propagation
 function _wait(t::Task)
+    t === current_task() && Core.throw(ConcurrencyViolationError("deadlock detected: cannot wait on current task"))
     if !istaskdone(t)
-        lock(t.donenotify)
+        donenotify = t.donenotify::ThreadSynchronizer
+        lock(donenotify)
         try
             while !istaskdone(t)
-                wait(t.donenotify)
+                wait(donenotify)
             end
         finally
-            unlock(t.donenotify)
+            unlock(donenotify)
         end
     end
     nothing
@@ -330,28 +333,180 @@ function _wait2(t::Task, waiter::Task)
             tid = Threads.threadid()
             ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
         end
-        lock(t.donenotify)
+        donenotify = t.donenotify::ThreadSynchronizer
+        lock(donenotify)
         if !istaskdone(t)
-            push!(t.donenotify.waitq, waiter)
-            unlock(t.donenotify)
+            push!(donenotify.waitq, waiter)
+            unlock(donenotify)
             return nothing
         else
-            unlock(t.donenotify)
+            unlock(donenotify)
         end
     end
     schedule(waiter)
     nothing
 end
 
-function wait(t::Task)
-    t === current_task() && error("deadlock detected: cannot wait on current task")
+"""
+    wait(t::Task; throw=true)
+
+Wait for a `Task` to finish.
+
+The keyword `throw` (defaults to `true`) controls whether a failed task results
+in an error, thrown as a [`TaskFailedException`](@ref) which wraps the failed task.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task, to prevent deadlocks.
+"""
+function wait(t::Task; throw=true)
     _wait(t)
-    if istaskfailed(t)
-        throw(TaskFailedException(t))
+    if throw && istaskfailed(t)
+        Core.throw(TaskFailedException(t))
     end
     nothing
 end
 
+# Wait multiple tasks
+
+"""
+    waitany(tasks; throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until at least one of the given tasks have been completed.
+
+If `throw` is `true`, throw `CompositeException` when one of the
+completed tasks completes with an exception.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+
+!!! warning
+    This may scale poorly compared to writing code that uses multiple individual tasks that
+    each runs serially, since this needs to scan the list of `tasks` each time and
+    synchronize with each one every time this is called. Or consider using
+    [`waitall(tasks; failfast=true)`](@ref waitall) instead.
+"""
+waitany(tasks; throw=true) = _wait_multiple(tasks, throw)
+
+"""
+    waitall(tasks; failfast=true, throw=true) -> (done_tasks, remaining_tasks)
+
+Wait until all the given tasks have been completed.
+
+If `failfast` is `true`, the function will return when at least one of the
+given tasks is finished by exception. If `throw` is `true`, throw
+`CompositeException` when one of the completed tasks has failed.
+
+`failfast` and `throw` keyword arguments work independently; when only
+`throw=true` is specified, this function waits for all the tasks to complete.
+
+The return value consists of two task vectors. The first one consists of
+completed tasks, and the other consists of uncompleted tasks.
+"""
+waitall(tasks; failfast=true, throw=true) = _wait_multiple(tasks, throw, true, failfast)
+
+function _wait_multiple(waiting_tasks, throwexc=false, all=false, failfast=false)
+    tasks = Task[]
+
+    for t in waiting_tasks
+        t isa Task || error("Expected an iterator of `Task` object")
+        push!(tasks, t)
+    end
+
+    if (all && !failfast) || length(tasks) <= 1
+        exception = false
+        # Force everything to finish synchronously for the case of waitall
+        # with failfast=false
+        for t in tasks
+            _wait(t)
+            exception |= istaskfailed(t)
+        end
+        if exception && throwexc
+            exceptions = [TaskFailedException(t) for t in tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return tasks, Task[]
+        end
+    end
+
+    exception = false
+    nremaining::Int = length(tasks)
+    done_mask = falses(nremaining)
+    for (i, t) in enumerate(tasks)
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+        else
+            done_mask[i] = false
+        end
+    end
+
+    if nremaining == 0
+        return tasks, Task[]
+    elseif any(done_mask) && (!all || (failfast && exception))
+        if throwexc && (!all || failfast) && exception
+            exceptions = [TaskFailedException(t) for t in tasks[done_mask] if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return tasks[done_mask], tasks[.~done_mask]
+        end
+    end
+
+    chan = Channel{Int}(Inf)
+    sentinel = current_task()
+    waiter_tasks = fill(sentinel, length(tasks))
+
+    for (i, done) in enumerate(done_mask)
+        done && continue
+        t = tasks[i]
+        if istaskdone(t)
+            done_mask[i] = true
+            exception |= istaskfailed(t)
+            nremaining -= 1
+            exception && failfast && break
+        else
+            waiter = @task put!(chan, i)
+            waiter.sticky = false
+            _wait2(t, waiter)
+            waiter_tasks[i] = waiter
+        end
+    end
+
+    while nremaining > 0
+        i = take!(chan)
+        t = tasks[i]
+        waiter_tasks[i] = sentinel
+        done_mask[i] = true
+        exception |= istaskfailed(t)
+        nremaining -= 1
+
+        # stop early if requested, unless there is something immediately
+        # ready to consume from the channel (using a race-y check)
+        if (!all || (failfast && exception)) && !isready(chan)
+            break
+        end
+    end
+
+    close(chan)
+
+    if nremaining == 0
+        return tasks, Task[]
+    else
+        remaining_mask = .~done_mask
+        for i in findall(remaining_mask)
+            waiter = waiter_tasks[i]
+            donenotify = tasks[i].donenotify::ThreadSynchronizer
+            @lock donenotify Base.list_deletefirst!(donenotify.waitq, waiter)
+        end
+        done_tasks = tasks[done_mask]
+        if throwexc && exception
+            exceptions = [TaskFailedException(t) for t in done_tasks if istaskfailed(t)]
+            throw(CompositeException(exceptions))
+        else
+            return done_tasks, tasks[remaining_mask]
+        end
+    end
+end
+
 """
     fetch(x::Any)
 
@@ -453,7 +608,8 @@ const sync_varname = gensym(:sync)
 """
     @sync
 
-Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn), `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete. All exceptions thrown by enclosed async operations are collected and thrown as
 a [`CompositeException`](@ref).
 
@@ -562,7 +718,7 @@ Print an error log to `stderr` if task `t` fails.
 
 # Examples
 ```julia-repl
-julia> Base._wait(errormonitor(Threads.@spawn error("task failed")))
+julia> wait(errormonitor(Threads.@spawn error("task failed")); throw = false)
 Unhandled Task ERROR: task failed
 Stacktrace:
 [...]
@@ -640,6 +796,17 @@ macro sync_add(expr)
     end
 end
 
+function repl_backend_task()
+    @isdefined(active_repl_backend) || return
+    backend = active_repl_backend
+    isdefined(backend, :backend_task) || return
+    backend_task = getfield(active_repl_backend, :backend_task)::Task
+    if backend_task._state === task_state_runnable && getfield(backend, :in_eval)
+        return backend_task
+    end
+    return
+end
+
 # runtime system hook called when a task finishes
 function task_done_hook(t::Task)
     # `finish_task` sets `sigatomic` before entering this function
@@ -661,10 +828,9 @@ function task_done_hook(t::Task)
     end
 
     if err && !handled && Threads.threadid() == 1
-        if isa(result, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, result) # this terminates the task
+        if isa(result, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, result)
         end
     end
     # Clear sigatomic before waiting
@@ -675,21 +841,23 @@ function task_done_hook(t::Task)
         # If an InterruptException happens while blocked in the event loop, try handing
         # the exception to the REPL task since the current task is done.
         # issue #19467
-        if Threads.threadid() == 1 &&
-            isa(e, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, e)
-        else
-            rethrow()
+        if Threads.threadid() == 1 && isa(e, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, e)
         end
+        rethrow() # this will terminate the program
     end
 end
 
+function init_task_lock(t::Task) # Function only called from jl_adopt_thread so foreign tasks have a lock.
+    if t.donenotify === nothing
+        t.donenotify = ThreadSynchronizer()
+    end
+end
 
 ## scheduler and work queue
 
-struct IntrusiveLinkedListSynchronized{T}
+mutable struct IntrusiveLinkedListSynchronized{T}
     queue::IntrusiveLinkedList{T}
     lock::Threads.SpinLock
     IntrusiveLinkedListSynchronized{T}() where {T} = new(IntrusiveLinkedList{T}(), Threads.SpinLock())
@@ -751,6 +919,7 @@ function workqueue_for(tid::Int)
         return @inbounds qs[tid]
     end
     # slow path to allocate it
+    @assert tid > 0
     l = Workqueues_lock
     @lock l begin
         qs = Workqueues
@@ -772,19 +941,27 @@ function enq_work(t::Task)
     # Sticky tasks go into their thread's work queue.
     if t.sticky
         tid = Threads.threadid(t)
-        if tid == 0 && !GC.in_finalizer()
+        if tid == 0
             # The task is not yet stuck to a thread. Stick it to the current
             # thread and do the same to the parent task (the current task) so
             # that the tasks are correctly co-scheduled (issue #41324).
             # XXX: Ideally we would be able to unset this.
-            tid = Threads.threadid()
-            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
-            current_task().sticky = true
+            if GC.in_finalizer()
+                # The task was launched in a finalizer. There is no thread to sticky it
+                # to, so just allow it to run anywhere as if it had been non-sticky.
+                t.sticky = false
+                @goto not_sticky
+            else
+                tid = Threads.threadid()
+                ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+                current_task().sticky = true
+            end
         end
         push!(workqueue_for(tid), t)
     else
+        @label not_sticky
         tp = Threads.threadpool(t)
-        if Threads.threadpoolsize(tp) == 1
+        if tp === :foreign || Threads.threadpoolsize(tp) == 1
             # There's only one thread in the task's assigned thread pool;
             # use its work queue.
             tid = (tp === :interactive) ? 1 : Threads.threadpoolsize(:interactive)+1
@@ -800,7 +977,11 @@ function enq_work(t::Task)
     return t
 end
 
-schedule(t::Task) = enq_work(t)
+function schedule(t::Task)
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
+    enq_work(t)
+end
 
 """
     schedule(t::Task, [val]; error=false)
@@ -816,6 +997,13 @@ the woken task.
     It is incorrect to use `schedule` on an arbitrary `Task` that has already been started.
     See [the API reference](@ref low-level-schedule-wait) for more information.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on, and when scheduled will make the task that they were scheduled
+    from sticky. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a5() = sum(i for i in 1:1000);
@@ -840,13 +1028,15 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
+        q = t.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
         t.queue === nothing || Base.error("schedule: Task not runnable")
         setfield!(t, :result, arg)
     end
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
     enq_work(t)
     return t
 end
@@ -864,7 +1054,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
@@ -876,11 +1066,19 @@ end
 
 A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
 immediately yields to `t` before calling the scheduler.
+
+Throws a `ConcurrencyViolationError` if `t` is the currently running task.
 """
 function yield(t::Task, @nospecialize(x=nothing))
-    (t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
+    ct = current_task()
+    t === ct && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
+    (t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-> wait_time
+    maybe_record_enqueued!(t)
     t.result = x
-    enq_work(current_task())
+    enq_work(ct)
     set_next_task(t)
     return try_yieldto(ensure_rescheduled)
 end
@@ -894,6 +1092,7 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
 or scheduling in any way. Its use is discouraged.
 """
 function yieldto(t::Task, @nospecialize(x=nothing))
+    ct = current_task()
     # TODO: these are legacy behaviors; these should perhaps be a scheduler
     # state error instead.
     if t._state === task_state_done
@@ -901,6 +1100,10 @@ function yieldto(t::Task, @nospecialize(x=nothing))
     elseif t._state === task_state_failed
         throw(t.result)
     end
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-unfairly-> wait_time
+    maybe_record_enqueued!(t)
     t.result = x
     set_next_task(t)
     return try_yieldto(identity)
@@ -914,6 +1117,10 @@ function try_yieldto(undo)
         rethrow()
     end
     ct = current_task()
+    # [task] wait_time -(re)started-> user_time
+    if ct.metrics_enabled
+        @atomic :monotonic ct.last_started_running_at = time_ns()
+    end
     if ct._isexception
         exc = ct.result
         ct.result = nothing
@@ -927,6 +1134,11 @@ end
 
 # yield to a task, throwing an exception in it
 function throwto(t::Task, @nospecialize exc)
+    ct = current_task()
+    # [task] user_time -yield-> wait_time
+    record_running_time!(ct)
+    # [task] created -scheduled-unfairly-> wait_time
+    maybe_record_enqueued!(t)
     t.result = exc
     t._isexception = true
     set_next_task(t)
@@ -979,6 +1191,9 @@ checktaskempty = Partr.multiq_check_empty
 end
 
 function wait()
+    ct = current_task()
+    # [task] user_time -yield-or-done-> wait_time
+    record_running_time!(ct)
     GC.safepoint()
     W = workqueue_for(Threads.threadid())
     poptask(W)
@@ -993,3 +1208,21 @@ if Sys.iswindows()
 else
     pause() = ccall(:pause, Cvoid, ())
 end
+
+# update the `running_time_ns` field of `t` to include the time since it last started running.
+function record_running_time!(t::Task)
+    if t.metrics_enabled && !istaskdone(t)
+        @atomic :monotonic t.running_time_ns += time_ns() - t.last_started_running_at
+    end
+    return t
+end
+
+# if this is the first time `t` has been added to the run queue
+# (or the first time it has been unfairly yielded to without being added to the run queue)
+# then set the `first_enqueued_at` field to the current time.
+function maybe_record_enqueued!(t::Task)
+    if t.metrics_enabled && t.first_enqueued_at == 0
+        @atomic :monotonic t.first_enqueued_at = time_ns()
+    end
+    return t
+end
diff --git a/base/terminfo.jl b/base/terminfo.jl
new file mode 100644
index 0000000000000..8ea8387077d36
--- /dev/null
+++ b/base/terminfo.jl
@@ -0,0 +1,386 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Since this code is in the startup-path, we go to some effort to
+# be easier on the compiler, such as using `map` over broadcasting.
+
+include("terminfo_data.jl")
+
+"""
+    struct TermInfoRaw
+
+A structured representation of a terminfo file, without any knowledge of
+particular capabilities, solely based on `term(5)`.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::BitVector`: A list of 0–$(length(TERM_FLAGS)) flag values.
+- `numbers::Union{Vector{Int16}, Vector{Int32}}`: A list of 0–$(length(TERM_NUMBERS))
+  number values. A value of `typemax(eltype(numbers))` is used to skip over
+  unspecified capabilities while ensuring value indices are correct.
+- `strings::Vector{Union{String, Nothing}}`: A list of 0–$(length(TERM_STRINGS))
+  string values. A value of `nothing` is used to skip over unspecified
+  capabilities while ensuring value indices are correct.
+- `extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String}}}`: Should an
+  extended info section exist, this gives the entire extended info as a
+  dictionary. Otherwise `nothing`.
+
+See also: `TermInfo` and `TermCapability`.
+"""
+struct TermInfoRaw
+    names::Vector{String}
+    flags::BitVector
+    numbers::Vector{Int}
+    strings::Vector{Union{String, Nothing}}
+    extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String, Nothing}}}
+end
+
+"""
+    struct TermInfo
+
+A parsed terminfo paired with capability information.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::Int`: The number of flags specified.
+- `numbers::BitVector`: A mask indicating which of `TERM_NUMBERS` have been
+  specified.
+- `strings::BitVector`: A mask indicating which of `TERM_STRINGS` have been
+  specified.
+- `extensions::Vector{Symbol}`: A list of extended capability variable names.
+- `capabilities::Dict{Symbol, Union{Bool, Int, String}}`: The capability values
+  themselves.
+
+See also: `TermInfoRaw` and `TermCapability`.
+"""
+struct TermInfo
+    names::Vector{String}
+    flags::Dict{Symbol, Bool}
+    numbers::Dict{Symbol, Int}
+    strings::Dict{Symbol, String}
+    extensions::Union{Nothing, Set{Symbol}}
+    aliases::Dict{Symbol, Symbol}
+end
+
+TermInfo() = TermInfo([], Dict(), Dict(), Dict(), nothing, Dict())
+
+function read(data::IO, ::Type{TermInfoRaw})
+    # Parse according to `term(5)`
+    # Header
+    magic = read(data, UInt16) |> ltoh
+    NumInt = if magic == 0o0432
+        Int16
+    elseif magic == 0o01036
+        Int32
+    else
+        throw(ArgumentError("Terminfo data did not start with the magic number 0o0432 or 0o01036"))
+    end
+    name_bytes, flag_bytes, numbers_count, string_count, table_bytes =
+        @ntuple 5 _->read(data, Int16) |> ltoh
+    # Terminal Names
+    term_names = map(String, split(String(read(data, name_bytes - 1)), '|'))
+    0x00 == read(data, UInt8) ||
+        throw(ArgumentError("Terminfo data did not contain a null byte after the terminal names section"))
+    # Boolean Flags
+    flags = map(==(0x01), read(data, flag_bytes))
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the flag section, expected to position the start of the numbers section on an even byte"))
+    end
+    # Numbers, Strings, Table
+    numbers = map(Int ∘ ltoh, reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    string_indices = map(ltoh, reinterpret(Int16, read(data, string_count * sizeof(Int16))))
+    strings_table = read(data, table_bytes)
+    strings = _terminfo_read_strings(strings_table, string_indices)
+    TermInfoRaw(term_names, flags, numbers, strings,
+                if !eof(data) extendedterminfo(data, NumInt) end)
+end
+
+"""
+    extendedterminfo(data::IO; NumInt::Union{Type{Int16}, Type{Int32}})
+
+Read an extended terminfo section from `data`, with `NumInt` as the numbers type.
+
+This will accept any terminfo content that conforms with `term(5)`.
+
+See also: `read(::IO, ::Type{TermInfoRaw})`
+"""
+function extendedterminfo(data::IO, NumInt::Union{Type{Int16}, Type{Int32}})
+    # Extended info
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte before the extended section; expected to position the start on an even byte"))
+    end
+    # Extended header
+    flag_bytes, numbers_count, string_count, table_count, table_bytes =
+        @ntuple 5 _->read(data, Int16) |> ltoh
+    # Extended flags/numbers/strings
+    flags = map(==(0x01), read(data, flag_bytes))
+    if flag_bytes % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the extended flag section; expected to position the start of the numbers section on an even byte"))
+    end
+    numbers = map(Int ∘ ltoh, reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    table_indices = map(ltoh, reinterpret(Int16, read(data, table_count * sizeof(Int16))))
+    table_data = read(data, table_bytes)
+    strings = _terminfo_read_strings(table_data, table_indices[1:string_count])
+    table_halfoffset = Int16(get(table_indices, string_count, 0) +
+        ncodeunits(something(get(strings, length(strings), ""), "")) + 1)
+    for index in string_count+1:lastindex(table_indices)
+        table_indices[index] += table_halfoffset
+    end
+    labels = map(Symbol, _terminfo_read_strings(table_data, table_indices[string_count+1:end]))
+    Dict{Symbol, Union{Bool, Int, String, Nothing}}(
+        zip(labels, Iterators.flatten((flags, numbers, strings))))
+end
+
+"""
+    _terminfo_read_strings(table::Vector{UInt8}, indices::Vector{Int16})
+
+From `table`, read a string starting at each position in `indices`. Each string
+must be null-terminated. Should an index be -1 or -2, `nothing` is given instead
+of a string.
+"""
+function _terminfo_read_strings(table::Vector{UInt8}, indices::Vector{Int16})
+    strings = Vector{Union{Nothing, String}}(undef, length(indices))
+    map!(strings, indices) do idx
+        if idx >= 0
+            len = findfirst(==(0x00), view(table, 1+idx:length(table)))
+            !isnothing(len) ||
+                throw(ArgumentError("Terminfo table entry @$idx does not terminate with a null byte"))
+            String(table[1+idx:idx+len-1])
+        elseif idx ∈ (-1, -2)
+        else
+            throw(ArgumentError("Terminfo table index is invalid: -2 ≰ $idx"))
+        end
+    end
+    strings
+end
+
+"""
+    TermInfo(raw::TermInfoRaw)
+
+Construct a `TermInfo` from `raw`, using known terminal capabilities (as of
+NCurses 6.3, see `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`).
+"""
+function TermInfo(raw::TermInfoRaw)
+    capabilities = Dict{Symbol, Union{Bool, Int, String}}()
+    sizehint!(capabilities, 2 * (length(raw.flags) + length(raw.numbers) + length(raw.strings)))
+    flags = Dict{Symbol, Bool}()
+    numbers = Dict{Symbol, Int}()
+    strings = Dict{Symbol, String}()
+    aliases = Dict{Symbol, Symbol}()
+    extensions = nothing
+    for (flag, value) in zip(TERM_FLAGS, raw.flags)
+        flags[flag.name] = value
+        aliases[flag.capname] = flag.name
+    end
+    for (num, value) in zip(TERM_NUMBERS, raw.numbers)
+        numbers[num.name] = Int(value)
+        aliases[num.capname] = num.name
+    end
+    for (str, value) in zip(TERM_STRINGS, raw.strings)
+        if !isnothing(value)
+            strings[str.name] = value
+            aliases[str.capname] = str.name
+        end
+    end
+    if !isnothing(raw.extended)
+        extensions = Set{Symbol}()
+        longalias(key, value) = first(get(TERM_USER, (typeof(value), key), (nothing, "")))
+        for (short, value) in raw.extended
+            long = longalias(short, value)
+            key = something(long, short)
+            push!(extensions, key)
+            if value isa Bool
+                flags[key] = value
+            elseif value isa Int
+                numbers[key] = value
+            elseif value isa String
+                strings[key] = value
+            end
+            if !isnothing(long)
+                aliases[short] = long
+            end
+        end
+    end
+    TermInfo(raw.names, flags, numbers, strings, extensions, aliases)
+end
+
+get(ti::TermInfo, key::Symbol, default::Bool)   = get(ti.flags,   get(ti.aliases, key, key), default)
+get(ti::TermInfo, key::Symbol, default::Int)    = get(ti.numbers, get(ti.aliases, key, key), default)
+get(ti::TermInfo, key::Symbol, default::String) = get(ti.strings, get(ti.aliases, key, key), default)
+
+haskey(ti::TermInfo, key::Symbol) =
+    haskey(ti.flags, key) || haskey(ti.numbers, key) || haskey(ti.strings, key) || haskey(ti.aliases, key)
+
+function getindex(ti::TermInfo, key::Symbol)
+    haskey(ti.flags, key) && return ti.flags[key]
+    haskey(ti.numbers, key) && return ti.numbers[key]
+    haskey(ti.strings, key) && return ti.strings[key]
+    haskey(ti.aliases, key) && return getindex(ti, ti.aliases[key])
+    throw(KeyError(key))
+end
+
+keys(ti::TermInfo) = keys(ti.flags) ∪ keys(ti.numbers) ∪ keys(ti.strings) ∪ keys(ti.aliases)
+
+function show(io::IO, ::MIME"text/plain", ti::TermInfo)
+    print(io, "TermInfo(", ti.names, "; ", length(ti.flags), " flags, ",
+          length(ti.numbers), " numbers, ", length(ti.strings), " strings")
+    !isnothing(ti.extensions) &&
+        print(io, ", ", length(ti.extensions), " extended capabilities")
+    print(io, ')')
+end
+
+"""
+    find_terminfo_file(term::String)
+
+Locate the terminfo file for `term`, return `nothing` if none could be found.
+
+The lookup policy is described in `terminfo(5)` "Fetching Compiled
+Descriptions". A terminfo database is included by default with Julia and is
+taken to be the first entry of `@TERMINFO_DIRS@`.
+"""
+function find_terminfo_file(term::String)
+    isempty(term) && return
+    chr, chrcode = string(first(term)), string(Int(first(term)), base=16)
+    terminfo_dirs = if haskey(ENV, "TERMINFO")
+        [ENV["TERMINFO"]]
+    elseif isdir(joinpath(homedir(), ".terminfo"))
+        [joinpath(homedir(), ".terminfo")]
+    else
+        String[]
+    end
+    haskey(ENV, "TERMINFO_DIRS") &&
+        append!(terminfo_dirs,
+                replace(split(ENV["TERMINFO_DIRS"], ':'),
+                        "" => "/usr/share/terminfo"))
+    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "julia", "terminfo"))
+    Sys.isunix() &&
+        push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo")
+    for dir in terminfo_dirs
+        if isfile(joinpath(dir, chr, term))
+            return joinpath(dir, chr, term)
+        elseif isfile(joinpath(dir, chrcode, term))
+            return joinpath(dir, chrcode, term)
+        elseif isfile(joinpath(dir, lowercase(chr), lowercase(term)))
+            # The vendored terminfo database is fully lowercase to avoid issues on
+            # case-sensitive filesystems. On Unix-like systems, terminfo files with
+            # different cases are hard links to one another, so this is still
+            # correct for non-vendored terminfo, just redundant.
+            return joinpath(dir, lowercase(chr), lowercase(term))
+        end
+    end
+    return nothing
+end
+
+"""
+    load_terminfo(term::String)
+
+Load the `TermInfo` for `term`, falling back on a blank `TermInfo`.
+"""
+function load_terminfo(term::String)
+    file = find_terminfo_file(term)
+    isnothing(file) && return TermInfo()
+    try
+        TermInfo(read(file, TermInfoRaw))
+    catch err
+        if err isa ArgumentError || err isa IOError
+            TermInfo()
+        else
+            rethrow()
+        end
+    end
+end
+
+"""
+The terminfo of the current terminal.
+"""
+current_terminfo::TermInfo = TermInfo()
+
+# Legacy/TTY methods and the `:color` parameter
+
+if Sys.iswindows()
+    ttyhascolor(term_type = nothing) = true
+else
+    function ttyhascolor(term_type = get(ENV, "TERM", ""))
+        startswith(term_type, "xterm") ||
+            haskey(current_terminfo, :setaf)
+    end
+end
+
+"""
+    ttyhastruecolor()
+
+Return a boolean signifying whether the current terminal supports 24-bit colors.
+
+Multiple conditions are taken as signifying truecolor support, specifically any of the following:
+- The `COLORTERM` environment variable is set to `"truecolor"` or `"24bit"`
+- The current terminfo sets the [`RGB`[^1]
+  capability](https://invisible-island.net/ncurses/man/user_caps.5.html#h3-Recognized-Capabilities)
+  (or the legacy `Tc` capability[^2]) flag
+- The current terminfo provides `setrgbf` and `setrgbb` strings[^3]
+- The current terminfo has a `colors` number greater that `256`, on a unix system
+- The VTE version is at least 3600 (detected via the `VTE_VERSION` environment variable)
+- The current terminal has the `XTERM_VERSION` environment variable set
+- The current terminal appears to be iTerm according to the `TERMINAL_PROGRAM` environment variable
+- The `TERM` environment variable corresponds to: linuxvt, rxvt, or st
+
+[^1]: Added to Ncurses 6.1, and used in `TERM=*-direct` terminfos.
+[^2]: Convention [added to tmux in 2016](https://github.com/tmux/tmux/commit/427b8204268af5548d09b830e101c59daa095df9),
+      superseded by `RGB`.
+[^3]: Proposed by [Rüdiger Sonderfeld in 2013](https://lists.gnu.org/archive/html/bug-ncurses/2013-10/msg00007.html),
+      adopted by a few terminal emulators.
+
+!!! note
+    The set of conditions is messy, because the situation is a mess, and there's
+    no resolution in sight. `COLORTERM` is widely accepted, but an imperfect
+    solution because only `TERM` is passed across `ssh` sessions. Terminfo is
+    the obvious place for a terminal to declare capabilities, but it's taken
+    enough years for ncurses/terminfo to declare a standard capability (`RGB`)
+    that a number of other approaches have taken root. Furthermore, the official
+    `RGB` capability is *incompatible* with 256-color operation, and so is
+    unable to resolve the fragmentation in the terminal ecosystem.
+"""
+function ttyhastruecolor()
+    # Lasciate ogne speranza, voi ch'intrate
+    get(ENV, "COLORTERM", "") ∈ ("truecolor", "24bit") ||
+        get(current_terminfo, :RGB, false) || get(current_terminfo, :Tc, false) ||
+        (haskey(current_terminfo, :setrgbf) && haskey(current_terminfo, :setrgbb)) ||
+        @static if Sys.isunix() get(current_terminfo, :colors, 0) > 256 else false end ||
+        (Sys.iswindows() && Sys.windows_version() ≥ v"10.0.14931") || # See <https://devblogs.microsoft.com/commandline/24-bit-color-in-the-windows-console/>
+        something(tryparse(Int, get(ENV, "VTE_VERSION", "")), 0) >= 3600 || # Per GNOME bug #685759 <https://bugzilla.gnome.org/show_bug.cgi?id=685759>
+        haskey(ENV, "XTERM_VERSION") ||
+        get(ENV, "TERMINAL_PROGRAM", "") == "iTerm.app" || # Why does Apple need to be special?
+        haskey(ENV, "KONSOLE_PROFILE_NAME") || # Per commentary in VT102Emulation.cpp
+        haskey(ENV, "KONSOLE_DBUS_SESSION") ||
+        let term = get(ENV, "TERM", "")
+            startswith(term, "linux") || # Linux 4.8+ supports true-colour SGR.
+                startswith(term, "rxvt") || # See <http://lists.schmorp.de/pipermail/rxvt-unicode/2016q2/002261.html>
+                startswith(term, "st") # From experimentation
+        end
+end
+
+function get_have_color()
+    global have_color
+    have_color === nothing && (have_color = ttyhascolor())
+    return have_color::Bool
+end
+
+function get_have_truecolor()
+    global have_truecolor
+    have_truecolor === nothing && (have_truecolor = ttyhastruecolor())
+    return have_truecolor::Bool
+end
+
+in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
+haskey(::TTY, key::Symbol) = key === :color
+getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
+get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/terminfo_data.jl b/base/terminfo_data.jl
new file mode 100644
index 0000000000000..caf2ff528d3e1
--- /dev/null
+++ b/base/terminfo_data.jl
@@ -0,0 +1,796 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Updating this listing is fairly easy, assuming existence of a unix system,
+# posix shell, and `awk`. Just update the version string in the commented out
+# `NCURSES_VERSION` variable, and run this file. This works because this file is
+# a bit of a quine.
+
+#=
+awk '/^#=run/{flag=1;next}/=#/{flag=0}flag{gsub(/__FILE__/,"\"'"$0"'\"");print}' "$0" | \
+  julia --startup-file=no -E 'readchomp("/dev/fd/0") |> Meta.parse |> eval' && echo "Done"; exit
+=#
+
+"""
+    struct TermCapability
+
+Specification of a single terminal capability.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `name::Symbol`: The name of the terminfo capability variable
+- `capname::Symbol`: The *Cap-name* of the capability
+- `description::String`: A description of the purpose of the capability
+
+See also: `TermInfo`, `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`.
+"""
+struct TermCapability
+    name::Symbol
+    capname::Symbol
+    description::String
+end
+
+#=run
+begin
+
+using Downloads
+
+version_info = IOBuffer()
+standard_caps = IOBuffer()
+user_caps = IOBuffer()
+
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/VERSION", version_info)
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/include/Caps", standard_caps)
+Downloads.download("https://raw.githubusercontent.com/mirror/ncurses/master/include/Caps-ncurses", user_caps)
+
+const TERM_FLAGS   = NTuple{3, String}[]
+const TERM_NUMBERS = NTuple{3, String}[]
+const TERM_STRINGS = NTuple{3, String}[]
+const TERM_USER    = NTuple{3, String}[]
+
+_, ncurses_version, ncurses_date = split(read(seekstart(version_info), String))
+
+for line in eachline(seekstart(standard_caps))
+    startswith(line, '#') && continue
+    components = split(line, '\t', keepempty=false)
+    if length(components) ∉ 8:9
+        @warn "Malformed line: $(sprint(show, line))"
+        continue
+    end
+    name, shortcode, type, _, _, _, _, description, _... = components
+    caplist = if type == "bool" TERM_FLAGS
+    elseif type == "num" TERM_NUMBERS
+    elseif type == "str" TERM_STRINGS
+    else
+        @warn "Unrecognised capability type: $type"
+        continue
+    end
+    push!(caplist, (name, shortcode, description))
+end
+
+for line in eachline(seekstart(user_caps))
+    startswith(line, '#') && continue
+    !startswith(line, "userdef") && continue
+    line = line[1+ncodeunits("userdef "):end]
+    components = split(line, '\t', keepempty=false)
+    if length(components) ∉ 4:5
+        @warn "Malformed line: $(sprint(show, line))"
+        continue
+    end
+    code, type, _, description, _... = components
+    if code == "xm"
+        components[3] == "-" || continue
+        description = "mouse response"
+    end
+    dtype = get(Dict("bool" => "Bool", "num" => "Int", "str" => "String"), type, nothing)
+    if isnothing(dtype)
+        @warn "Unrecognised data type: $type"
+        continue
+    end
+    push!(TERM_USER, (dtype, code, description))
+end
+
+push!(TERM_USER, ("Bool", "Tc", "tmux extension to indicate 24-bit truecolor support"))
+push!(TERM_USER, ("Bool", "Su", "kitty extension to indicate styled underline support"))
+
+const SENTINEL = "\n## GENERATED CODE BEYOND THIS POINT ##"
+const PREAMBLE = readuntil(__FILE__, SENTINEL, keep=true)
+
+out = IOBuffer()
+write(out, PREAMBLE, "\n\n# Terminfo Capabilities as of NCurses $ncurses_version-$ncurses_date\n",
+      "const NCURSES_VERSION = v\"$ncurses_version.$ncurses_date\"\n")
+
+for (ftype, list) in [("flag", TERM_FLAGS), ("number", TERM_NUMBERS), ("string", TERM_STRINGS)]
+    print(out, "\n\"\"\"\n\
+          Ordered list of known terminal capability $ftype fields, as of NCurses $ncurses_version-$ncurses_date.\n\
+          \"\"\"\n\
+          const TERM_$(uppercase(ftype))S = [")
+    namepad = maximum(textwidth, getindex.(list, 1)) + 1
+    codepad = maximum(textwidth, getindex.(list, 2)) + 1
+    for (name, shortcode, description) in list
+        print(out, "\n    TermCapability(:", name, ',', ' '^(namepad - textwidth(name)),
+              ':', shortcode, ',', ' '^(codepad - textwidth(shortcode)),
+              '"', escape_string(description), "\"),")
+    end
+    println(out, "\n]")
+end
+
+function getcustomalias(allterms::Vector{NTuple{3, String}}, type, short, description)
+    specific_aliases = Dict{String, String}(
+        "smxx"  => ":enter_strikeout_mode",
+        "rmxx"  => ":exit_strikeout_mode",
+        "Smol"  => ":enter_overline_mode",
+        "Rmol"  => ":exit_overline_mode",
+        "Cs"    => ":set_cursor_color",
+        "Cr"    => ":reset_cursor_color",
+        "Ss"    => ":set_cursor_style",
+        "Se"    => ":reset_cursor_style",
+        "Smulx" => ":set_underline_style",
+        "Su"    => ":can_style_underline",
+        "csl"   => ":clear_status_line",
+        "Ms"    => ":set_host_clipboard",
+        "Tc"    => ":truecolor",
+        "XF"    => ":xterm_focus")
+    if startswith(short, 'k') && !occursin("keypad", description)
+        return ":key_" * replace(lowercase(description), r"[^a-z]" => '_')
+    end
+    return get(specific_aliases, short, "nothing")
+end
+
+print(out, "\n\"\"\"\nTerminfo extensions that NCurses $ncurses_version-$ncurses_date is aware of.\n\"\"\"",
+           "\nconst TERM_USER = Dict{Tuple{DataType, Symbol}, Union{Tuple{Nothing, String}, Tuple{Symbol, String}}}(")
+shortpad = maximum(textwidth, getindex.(TERM_USER, 2)) + 1
+for (type, short, description) in TERM_USER
+    print(out, "\n    ($(rpad(type * ',', 7)) :$short)", ' '^(shortpad - textwidth(short)),
+          "=> (", getcustomalias(TERM_USER, type, short, description), ", \"",
+          escape_string(description), "\"),")
+end
+println(out, "\n)")
+
+open(io -> write(io, seekstart(out)), __FILE__, "w")
+
+end
+=#
+
+## GENERATED CODE BEYOND THIS POINT ##
+
+# Terminfo Capabilities as of NCurses 6.4-20230311
+const NCURSES_VERSION = v"6.4.20230311"
+
+"""
+Ordered list of known terminal capability flag fields, as of NCurses 6.4-20230311.
+"""
+const TERM_FLAGS = [
+    TermCapability(:auto_left_margin,         :bw,    "cub1 wraps from column 0 to last column"),
+    TermCapability(:auto_right_margin,        :am,    "terminal has automatic margins"),
+    TermCapability(:no_esc_ctlc,              :xsb,   "beehive (f1=escape, f2=ctrl C)"),
+    TermCapability(:ceol_standout_glitch,     :xhp,   "standout not erased by overwriting (hp)"),
+    TermCapability(:eat_newline_glitch,       :xenl,  "newline ignored after 80 cols (concept)"),
+    TermCapability(:erase_overstrike,         :eo,    "can erase overstrikes with a blank"),
+    TermCapability(:generic_type,             :gn,    "generic line type"),
+    TermCapability(:hard_copy,                :hc,    "hardcopy terminal"),
+    TermCapability(:has_meta_key,             :km,    "Has a meta key (i.e., sets 8th-bit)"),
+    TermCapability(:has_status_line,          :hs,    "has extra status line"),
+    TermCapability(:insert_null_glitch,       :in,    "insert mode distinguishes nulls"),
+    TermCapability(:memory_above,             :da,    "display may be retained above the screen"),
+    TermCapability(:memory_below,             :db,    "display may be retained below the screen"),
+    TermCapability(:move_insert_mode,         :mir,   "safe to move while in insert mode"),
+    TermCapability(:move_standout_mode,       :msgr,  "safe to move while in standout mode"),
+    TermCapability(:over_strike,              :os,    "terminal can overstrike"),
+    TermCapability(:status_line_esc_ok,       :eslok, "escape can be used on the status line"),
+    TermCapability(:dest_tabs_magic_smso,     :xt,    "tabs destructive, magic so char (t1061)"),
+    TermCapability(:tilde_glitch,             :hz,    "cannot print ~'s (Hazeltine)"),
+    TermCapability(:transparent_underline,    :ul,    "underline character overstrikes"),
+    TermCapability(:xon_xoff,                 :xon,   "terminal uses xon/xoff handshaking"),
+    TermCapability(:needs_xon_xoff,           :nxon,  "padding will not work, xon/xoff required"),
+    TermCapability(:prtr_silent,              :mc5i,  "printer will not echo on screen"),
+    TermCapability(:hard_cursor,              :chts,  "cursor is hard to see"),
+    TermCapability(:non_rev_rmcup,            :nrrmc, "smcup does not reverse rmcup"),
+    TermCapability(:no_pad_char,              :npc,   "pad character does not exist"),
+    TermCapability(:non_dest_scroll_region,   :ndscr, "scrolling region is non-destructive"),
+    TermCapability(:can_change,               :ccc,   "terminal can re-define existing colors"),
+    TermCapability(:back_color_erase,         :bce,   "screen erased with background color"),
+    TermCapability(:hue_lightness_saturation, :hls,   "terminal uses only HLS color notation (Tektronix)"),
+    TermCapability(:col_addr_glitch,          :xhpa,  "only positive motion for hpa/mhpa caps"),
+    TermCapability(:cr_cancels_micro_mode,    :crxm,  "using cr turns off micro mode"),
+    TermCapability(:has_print_wheel,          :daisy, "printer needs operator to change character set"),
+    TermCapability(:row_addr_glitch,          :xvpa,  "only positive motion for vpa/mvpa caps"),
+    TermCapability(:semi_auto_right_margin,   :sam,   "printing in last column causes cr"),
+    TermCapability(:cpi_changes_res,          :cpix,  "changing character pitch changes resolution"),
+    TermCapability(:lpi_changes_res,          :lpix,  "changing line pitch changes resolution"),
+    TermCapability(:backspaces_with_bs,       :OTbs,  "uses ^H to move left"),
+    TermCapability(:crt_no_scrolling,         :OTns,  "crt cannot scroll"),
+    TermCapability(:no_correctly_working_cr,  :OTnc,  "no way to go to start of line"),
+    TermCapability(:gnu_has_meta_key,         :OTMT,  "has meta key"),
+    TermCapability(:linefeed_is_newline,      :OTNL,  "move down with \\n"),
+    TermCapability(:has_hardware_tabs,        :OTpt,  "has 8-char tabs invoked with ^I"),
+    TermCapability(:return_does_clr_eol,      :OTxr,  "return clears the line"),
+]
+
+"""
+Ordered list of known terminal capability number fields, as of NCurses 6.4-20230311.
+"""
+const TERM_NUMBERS = [
+    TermCapability(:columns,                 :cols,   "number of columns in a line"),
+    TermCapability(:init_tabs,               :it,     "tabs initially every # spaces"),
+    TermCapability(:lines,                   :lines,  "number of lines on screen or page"),
+    TermCapability(:lines_of_memory,         :lm,     "lines of memory if > line. 0 means varies"),
+    TermCapability(:magic_cookie_glitch,     :xmc,    "number of blank characters left by smso or rmso"),
+    TermCapability(:padding_baud_rate,       :pb,     "lowest baud rate where padding needed"),
+    TermCapability(:virtual_terminal,        :vt,     "virtual terminal number (CB/unix)"),
+    TermCapability(:width_status_line,       :wsl,    "number of columns in status line"),
+    TermCapability(:num_labels,              :nlab,   "number of labels on screen"),
+    TermCapability(:label_height,            :lh,     "rows in each label"),
+    TermCapability(:label_width,             :lw,     "columns in each label"),
+    TermCapability(:max_attributes,          :ma,     "maximum combined attributes terminal can handle"),
+    TermCapability(:maximum_windows,         :wnum,   "maximum number of definable windows"),
+    TermCapability(:max_colors,              :colors, "maximum number of colors on screen"),
+    TermCapability(:max_pairs,               :pairs,  "maximum number of color-pairs on the screen"),
+    TermCapability(:no_color_video,          :ncv,    "video attributes that cannot be used with colors"),
+    TermCapability(:buffer_capacity,         :bufsz,  "numbers of bytes buffered before printing"),
+    TermCapability(:dot_vert_spacing,        :spinv,  "spacing of pins vertically in pins per inch"),
+    TermCapability(:dot_horz_spacing,        :spinh,  "spacing of dots horizontally in dots per inch"),
+    TermCapability(:max_micro_address,       :maddr,  "maximum value in micro_..._address"),
+    TermCapability(:max_micro_jump,          :mjump,  "maximum value in parm_..._micro"),
+    TermCapability(:micro_col_size,          :mcs,    "character step size when in micro mode"),
+    TermCapability(:micro_line_size,         :mls,    "line step size when in micro mode"),
+    TermCapability(:number_of_pins,          :npins,  "numbers of pins in print-head"),
+    TermCapability(:output_res_char,         :orc,    "horizontal resolution in units per line"),
+    TermCapability(:output_res_line,         :orl,    "vertical resolution in units per line"),
+    TermCapability(:output_res_horz_inch,    :orhi,   "horizontal resolution in units per inch"),
+    TermCapability(:output_res_vert_inch,    :orvi,   "vertical resolution in units per inch"),
+    TermCapability(:print_rate,              :cps,    "print rate in characters per second"),
+    TermCapability(:wide_char_size,          :widcs,  "character step size when in double wide mode"),
+    TermCapability(:buttons,                 :btns,   "number of buttons on mouse"),
+    TermCapability(:bit_image_entwining,     :bitwin, "number of passes for each bit-image row"),
+    TermCapability(:bit_image_type,          :bitype, "type of bit-image device"),
+    TermCapability(:magic_cookie_glitch_ul,  :OTug,   "number of blanks left by ul"),
+    TermCapability(:carriage_return_delay,   :OTdC,   "pad needed for CR"),
+    TermCapability(:new_line_delay,          :OTdN,   "pad needed for LF"),
+    TermCapability(:backspace_delay,         :OTdB,   "padding required for ^H"),
+    TermCapability(:horizontal_tab_delay,    :OTdT,   "padding required for ^I"),
+    TermCapability(:number_of_function_keys, :OTkn,   "count of function keys"),
+]
+
+"""
+Ordered list of known terminal capability string fields, as of NCurses 6.4-20230311.
+"""
+const TERM_STRINGS = [
+    TermCapability(:back_tab,                  :cbt,      "back tab (P)"),
+    TermCapability(:bell,                      :bel,      "audible signal (bell) (P)"),
+    TermCapability(:carriage_return,           :cr,       "carriage return (P*) (P*)"),
+    TermCapability(:change_scroll_region,      :csr,      "change region to line #1 to line #2 (P)"),
+    TermCapability(:clear_all_tabs,            :tbc,      "clear all tab stops (P)"),
+    TermCapability(:clear_screen,              :clear,    "clear screen and home cursor (P*)"),
+    TermCapability(:clr_eol,                   :el,       "clear to end of line (P)"),
+    TermCapability(:clr_eos,                   :ed,       "clear to end of screen (P*)"),
+    TermCapability(:column_address,            :hpa,      "horizontal position #1, absolute (P)"),
+    TermCapability(:command_character,         :cmdch,    "terminal settable cmd character in prototype !?"),
+    TermCapability(:cursor_address,            :cup,      "move to row #1 columns #2"),
+    TermCapability(:cursor_down,               :cud1,     "down one line"),
+    TermCapability(:cursor_home,               :home,     "home cursor (if no cup)"),
+    TermCapability(:cursor_invisible,          :civis,    "make cursor invisible"),
+    TermCapability(:cursor_left,               :cub1,     "move left one space"),
+    TermCapability(:cursor_mem_address,        :mrcup,    "memory relative cursor addressing, move to row #1 columns #2"),
+    TermCapability(:cursor_normal,             :cnorm,    "make cursor appear normal (undo civis/cvvis)"),
+    TermCapability(:cursor_right,              :cuf1,     "non-destructive space (move right one space)"),
+    TermCapability(:cursor_to_ll,              :ll,       "last line, first column (if no cup)"),
+    TermCapability(:cursor_up,                 :cuu1,     "up one line"),
+    TermCapability(:cursor_visible,            :cvvis,    "make cursor very visible"),
+    TermCapability(:delete_character,          :dch1,     "delete character (P*)"),
+    TermCapability(:delete_line,               :dl1,      "delete line (P*)"),
+    TermCapability(:dis_status_line,           :dsl,      "disable status line"),
+    TermCapability(:down_half_line,            :hd,       "half a line down"),
+    TermCapability(:enter_alt_charset_mode,    :smacs,    "start alternate character set (P)"),
+    TermCapability(:enter_blink_mode,          :blink,    "turn on blinking"),
+    TermCapability(:enter_bold_mode,           :bold,     "turn on bold (extra bright) mode"),
+    TermCapability(:enter_ca_mode,             :smcup,    "string to start programs using cup"),
+    TermCapability(:enter_delete_mode,         :smdc,     "enter delete mode"),
+    TermCapability(:enter_dim_mode,            :dim,      "turn on half-bright mode"),
+    TermCapability(:enter_insert_mode,         :smir,     "enter insert mode"),
+    TermCapability(:enter_secure_mode,         :invis,    "turn on blank mode (characters invisible)"),
+    TermCapability(:enter_protected_mode,      :prot,     "turn on protected mode"),
+    TermCapability(:enter_reverse_mode,        :rev,      "turn on reverse video mode"),
+    TermCapability(:enter_standout_mode,       :smso,     "begin standout mode"),
+    TermCapability(:enter_underline_mode,      :smul,     "begin underline mode"),
+    TermCapability(:erase_chars,               :ech,      "erase #1 characters (P)"),
+    TermCapability(:exit_alt_charset_mode,     :rmacs,    "end alternate character set (P)"),
+    TermCapability(:exit_attribute_mode,       :sgr0,     "turn off all attributes"),
+    TermCapability(:exit_ca_mode,              :rmcup,    "strings to end programs using cup"),
+    TermCapability(:exit_delete_mode,          :rmdc,     "end delete mode"),
+    TermCapability(:exit_insert_mode,          :rmir,     "exit insert mode"),
+    TermCapability(:exit_standout_mode,        :rmso,     "exit standout mode"),
+    TermCapability(:exit_underline_mode,       :rmul,     "exit underline mode"),
+    TermCapability(:flash_screen,              :flash,    "visible bell (may not move cursor)"),
+    TermCapability(:form_feed,                 :ff,       "hardcopy terminal page eject (P*)"),
+    TermCapability(:from_status_line,          :fsl,      "return from status line"),
+    TermCapability(:init_1string,              :is1,      "initialization string"),
+    TermCapability(:init_2string,              :is2,      "initialization string"),
+    TermCapability(:init_3string,              :is3,      "initialization string"),
+    TermCapability(:init_file,                 :if,       "name of initialization file"),
+    TermCapability(:insert_character,          :ich1,     "insert character (P)"),
+    TermCapability(:insert_line,               :il1,      "insert line (P*)"),
+    TermCapability(:insert_padding,            :ip,       "insert padding after inserted character"),
+    TermCapability(:key_backspace,             :kbs,      "backspace key"),
+    TermCapability(:key_catab,                 :ktbc,     "clear-all-tabs key"),
+    TermCapability(:key_clear,                 :kclr,     "clear-screen or erase key"),
+    TermCapability(:key_ctab,                  :kctab,    "clear-tab key"),
+    TermCapability(:key_dc,                    :kdch1,    "delete-character key"),
+    TermCapability(:key_dl,                    :kdl1,     "delete-line key"),
+    TermCapability(:key_down,                  :kcud1,    "down-arrow key"),
+    TermCapability(:key_eic,                   :krmir,    "sent by rmir or smir in insert mode"),
+    TermCapability(:key_eol,                   :kel,      "clear-to-end-of-line key"),
+    TermCapability(:key_eos,                   :ked,      "clear-to-end-of-screen key"),
+    TermCapability(:key_f0,                    :kf0,      "F0 function key"),
+    TermCapability(:key_f1,                    :kf1,      "F1 function key"),
+    TermCapability(:key_f10,                   :kf10,     "F10 function key"),
+    TermCapability(:key_f2,                    :kf2,      "F2 function key"),
+    TermCapability(:key_f3,                    :kf3,      "F3 function key"),
+    TermCapability(:key_f4,                    :kf4,      "F4 function key"),
+    TermCapability(:key_f5,                    :kf5,      "F5 function key"),
+    TermCapability(:key_f6,                    :kf6,      "F6 function key"),
+    TermCapability(:key_f7,                    :kf7,      "F7 function key"),
+    TermCapability(:key_f8,                    :kf8,      "F8 function key"),
+    TermCapability(:key_f9,                    :kf9,      "F9 function key"),
+    TermCapability(:key_home,                  :khome,    "home key"),
+    TermCapability(:key_ic,                    :kich1,    "insert-character key"),
+    TermCapability(:key_il,                    :kil1,     "insert-line key"),
+    TermCapability(:key_left,                  :kcub1,    "left-arrow key"),
+    TermCapability(:key_ll,                    :kll,      "lower-left key (home down)"),
+    TermCapability(:key_npage,                 :knp,      "next-page key"),
+    TermCapability(:key_ppage,                 :kpp,      "previous-page key"),
+    TermCapability(:key_right,                 :kcuf1,    "right-arrow key"),
+    TermCapability(:key_sf,                    :kind,     "scroll-forward key"),
+    TermCapability(:key_sr,                    :kri,      "scroll-backward key"),
+    TermCapability(:key_stab,                  :khts,     "set-tab key"),
+    TermCapability(:key_up,                    :kcuu1,    "up-arrow key"),
+    TermCapability(:keypad_local,              :rmkx,     "leave 'keyboard_transmit' mode"),
+    TermCapability(:keypad_xmit,               :smkx,     "enter 'keyboard_transmit' mode"),
+    TermCapability(:lab_f0,                    :lf0,      "label on function key f0 if not f0"),
+    TermCapability(:lab_f1,                    :lf1,      "label on function key f1 if not f1"),
+    TermCapability(:lab_f10,                   :lf10,     "label on function key f10 if not f10"),
+    TermCapability(:lab_f2,                    :lf2,      "label on function key f2 if not f2"),
+    TermCapability(:lab_f3,                    :lf3,      "label on function key f3 if not f3"),
+    TermCapability(:lab_f4,                    :lf4,      "label on function key f4 if not f4"),
+    TermCapability(:lab_f5,                    :lf5,      "label on function key f5 if not f5"),
+    TermCapability(:lab_f6,                    :lf6,      "label on function key f6 if not f6"),
+    TermCapability(:lab_f7,                    :lf7,      "label on function key f7 if not f7"),
+    TermCapability(:lab_f8,                    :lf8,      "label on function key f8 if not f8"),
+    TermCapability(:lab_f9,                    :lf9,      "label on function key f9 if not f9"),
+    TermCapability(:meta_off,                  :rmm,      "turn off meta mode"),
+    TermCapability(:meta_on,                   :smm,      "turn on meta mode (8th-bit on)"),
+    TermCapability(:newline,                   :nel,      "newline (behave like cr followed by lf)"),
+    TermCapability(:pad_char,                  :pad,      "padding char (instead of null)"),
+    TermCapability(:parm_dch,                  :dch,      "delete #1 characters (P*)"),
+    TermCapability(:parm_delete_line,          :dl,       "delete #1 lines (P*)"),
+    TermCapability(:parm_down_cursor,          :cud,      "down #1 lines (P*)"),
+    TermCapability(:parm_ich,                  :ich,      "insert #1 characters (P*)"),
+    TermCapability(:parm_index,                :indn,     "scroll forward #1 lines (P)"),
+    TermCapability(:parm_insert_line,          :il,       "insert #1 lines (P*)"),
+    TermCapability(:parm_left_cursor,          :cub,      "move #1 characters to the left (P)"),
+    TermCapability(:parm_right_cursor,         :cuf,      "move #1 characters to the right (P*)"),
+    TermCapability(:parm_rindex,               :rin,      "scroll back #1 lines (P)"),
+    TermCapability(:parm_up_cursor,            :cuu,      "up #1 lines (P*)"),
+    TermCapability(:pkey_key,                  :pfkey,    "program function key #1 to type string #2"),
+    TermCapability(:pkey_local,                :pfloc,    "program function key #1 to execute string #2"),
+    TermCapability(:pkey_xmit,                 :pfx,      "program function key #1 to transmit string #2"),
+    TermCapability(:print_screen,              :mc0,      "print contents of screen"),
+    TermCapability(:prtr_off,                  :mc4,      "turn off printer"),
+    TermCapability(:prtr_on,                   :mc5,      "turn on printer"),
+    TermCapability(:repeat_char,               :rep,      "repeat char #1 #2 times (P*)"),
+    TermCapability(:reset_1string,             :rs1,      "reset string"),
+    TermCapability(:reset_2string,             :rs2,      "reset string"),
+    TermCapability(:reset_3string,             :rs3,      "reset string"),
+    TermCapability(:reset_file,                :rf,       "name of reset file"),
+    TermCapability(:restore_cursor,            :rc,       "restore cursor to position of last save_cursor"),
+    TermCapability(:row_address,               :vpa,      "vertical position #1 absolute (P)"),
+    TermCapability(:save_cursor,               :sc,       "save current cursor position (P)"),
+    TermCapability(:scroll_forward,            :ind,      "scroll text up (P)"),
+    TermCapability(:scroll_reverse,            :ri,       "scroll text down (P)"),
+    TermCapability(:set_attributes,            :sgr,      "define video attributes #1-#9 (PG9)"),
+    TermCapability(:set_tab,                   :hts,      "set a tab in every row, current columns"),
+    TermCapability(:set_window,                :wind,     "current window is lines #1-#2 cols #3-#4"),
+    TermCapability(:tab,                       :ht,       "tab to next 8-space hardware tab stop"),
+    TermCapability(:to_status_line,            :tsl,      "move to status line, column #1"),
+    TermCapability(:underline_char,            :uc,       "underline char and move past it"),
+    TermCapability(:up_half_line,              :hu,       "half a line up"),
+    TermCapability(:init_prog,                 :iprog,    "path name of program for initialization"),
+    TermCapability(:key_a1,                    :ka1,      "upper left of keypad"),
+    TermCapability(:key_a3,                    :ka3,      "upper right of keypad"),
+    TermCapability(:key_b2,                    :kb2,      "center of keypad"),
+    TermCapability(:key_c1,                    :kc1,      "lower left of keypad"),
+    TermCapability(:key_c3,                    :kc3,      "lower right of keypad"),
+    TermCapability(:prtr_non,                  :mc5p,     "turn on printer for #1 bytes"),
+    TermCapability(:char_padding,              :rmp,      "like ip but when in insert mode"),
+    TermCapability(:acs_chars,                 :acsc,     "graphics charset pairs, based on vt100"),
+    TermCapability(:plab_norm,                 :pln,      "program label #1 to show string #2"),
+    TermCapability(:key_btab,                  :kcbt,     "back-tab key"),
+    TermCapability(:enter_xon_mode,            :smxon,    "turn on xon/xoff handshaking"),
+    TermCapability(:exit_xon_mode,             :rmxon,    "turn off xon/xoff handshaking"),
+    TermCapability(:enter_am_mode,             :smam,     "turn on automatic margins"),
+    TermCapability(:exit_am_mode,              :rmam,     "turn off automatic margins"),
+    TermCapability(:xon_character,             :xonc,     "XON character"),
+    TermCapability(:xoff_character,            :xoffc,    "XOFF character"),
+    TermCapability(:ena_acs,                   :enacs,    "enable alternate char set"),
+    TermCapability(:label_on,                  :smln,     "turn on soft labels"),
+    TermCapability(:label_off,                 :rmln,     "turn off soft labels"),
+    TermCapability(:key_beg,                   :kbeg,     "begin key"),
+    TermCapability(:key_cancel,                :kcan,     "cancel key"),
+    TermCapability(:key_close,                 :kclo,     "close key"),
+    TermCapability(:key_command,               :kcmd,     "command key"),
+    TermCapability(:key_copy,                  :kcpy,     "copy key"),
+    TermCapability(:key_create,                :kcrt,     "create key"),
+    TermCapability(:key_end,                   :kend,     "end key"),
+    TermCapability(:key_enter,                 :kent,     "enter/send key"),
+    TermCapability(:key_exit,                  :kext,     "exit key"),
+    TermCapability(:key_find,                  :kfnd,     "find key"),
+    TermCapability(:key_help,                  :khlp,     "help key"),
+    TermCapability(:key_mark,                  :kmrk,     "mark key"),
+    TermCapability(:key_message,               :kmsg,     "message key"),
+    TermCapability(:key_move,                  :kmov,     "move key"),
+    TermCapability(:key_next,                  :knxt,     "next key"),
+    TermCapability(:key_open,                  :kopn,     "open key"),
+    TermCapability(:key_options,               :kopt,     "options key"),
+    TermCapability(:key_previous,              :kprv,     "previous key"),
+    TermCapability(:key_print,                 :kprt,     "print key"),
+    TermCapability(:key_redo,                  :krdo,     "redo key"),
+    TermCapability(:key_reference,             :kref,     "reference key"),
+    TermCapability(:key_refresh,               :krfr,     "refresh key"),
+    TermCapability(:key_replace,               :krpl,     "replace key"),
+    TermCapability(:key_restart,               :krst,     "restart key"),
+    TermCapability(:key_resume,                :kres,     "resume key"),
+    TermCapability(:key_save,                  :ksav,     "save key"),
+    TermCapability(:key_suspend,               :kspd,     "suspend key"),
+    TermCapability(:key_undo,                  :kund,     "undo key"),
+    TermCapability(:key_sbeg,                  :kBEG,     "shifted begin key"),
+    TermCapability(:key_scancel,               :kCAN,     "shifted cancel key"),
+    TermCapability(:key_scommand,              :kCMD,     "shifted command key"),
+    TermCapability(:key_scopy,                 :kCPY,     "shifted copy key"),
+    TermCapability(:key_screate,               :kCRT,     "shifted create key"),
+    TermCapability(:key_sdc,                   :kDC,      "shifted delete-character key"),
+    TermCapability(:key_sdl,                   :kDL,      "shifted delete-line key"),
+    TermCapability(:key_select,                :kslt,     "select key"),
+    TermCapability(:key_send,                  :kEND,     "shifted end key"),
+    TermCapability(:key_seol,                  :kEOL,     "shifted clear-to-end-of-line key"),
+    TermCapability(:key_sexit,                 :kEXT,     "shifted exit key"),
+    TermCapability(:key_sfind,                 :kFND,     "shifted find key"),
+    TermCapability(:key_shelp,                 :kHLP,     "shifted help key"),
+    TermCapability(:key_shome,                 :kHOM,     "shifted home key"),
+    TermCapability(:key_sic,                   :kIC,      "shifted insert-character key"),
+    TermCapability(:key_sleft,                 :kLFT,     "shifted left-arrow key"),
+    TermCapability(:key_smessage,              :kMSG,     "shifted message key"),
+    TermCapability(:key_smove,                 :kMOV,     "shifted move key"),
+    TermCapability(:key_snext,                 :kNXT,     "shifted next key"),
+    TermCapability(:key_soptions,              :kOPT,     "shifted options key"),
+    TermCapability(:key_sprevious,             :kPRV,     "shifted previous key"),
+    TermCapability(:key_sprint,                :kPRT,     "shifted print key"),
+    TermCapability(:key_sredo,                 :kRDO,     "shifted redo key"),
+    TermCapability(:key_sreplace,              :kRPL,     "shifted replace key"),
+    TermCapability(:key_sright,                :kRIT,     "shifted right-arrow key"),
+    TermCapability(:key_srsume,                :kRES,     "shifted resume key"),
+    TermCapability(:key_ssave,                 :kSAV,     "shifted save key"),
+    TermCapability(:key_ssuspend,              :kSPD,     "shifted suspend key"),
+    TermCapability(:key_sundo,                 :kUND,     "shifted undo key"),
+    TermCapability(:req_for_input,             :rfi,      "send next input char (for ptys)"),
+    TermCapability(:key_f11,                   :kf11,     "F11 function key"),
+    TermCapability(:key_f12,                   :kf12,     "F12 function key"),
+    TermCapability(:key_f13,                   :kf13,     "F13 function key"),
+    TermCapability(:key_f14,                   :kf14,     "F14 function key"),
+    TermCapability(:key_f15,                   :kf15,     "F15 function key"),
+    TermCapability(:key_f16,                   :kf16,     "F16 function key"),
+    TermCapability(:key_f17,                   :kf17,     "F17 function key"),
+    TermCapability(:key_f18,                   :kf18,     "F18 function key"),
+    TermCapability(:key_f19,                   :kf19,     "F19 function key"),
+    TermCapability(:key_f20,                   :kf20,     "F20 function key"),
+    TermCapability(:key_f21,                   :kf21,     "F21 function key"),
+    TermCapability(:key_f22,                   :kf22,     "F22 function key"),
+    TermCapability(:key_f23,                   :kf23,     "F23 function key"),
+    TermCapability(:key_f24,                   :kf24,     "F24 function key"),
+    TermCapability(:key_f25,                   :kf25,     "F25 function key"),
+    TermCapability(:key_f26,                   :kf26,     "F26 function key"),
+    TermCapability(:key_f27,                   :kf27,     "F27 function key"),
+    TermCapability(:key_f28,                   :kf28,     "F28 function key"),
+    TermCapability(:key_f29,                   :kf29,     "F29 function key"),
+    TermCapability(:key_f30,                   :kf30,     "F30 function key"),
+    TermCapability(:key_f31,                   :kf31,     "F31 function key"),
+    TermCapability(:key_f32,                   :kf32,     "F32 function key"),
+    TermCapability(:key_f33,                   :kf33,     "F33 function key"),
+    TermCapability(:key_f34,                   :kf34,     "F34 function key"),
+    TermCapability(:key_f35,                   :kf35,     "F35 function key"),
+    TermCapability(:key_f36,                   :kf36,     "F36 function key"),
+    TermCapability(:key_f37,                   :kf37,     "F37 function key"),
+    TermCapability(:key_f38,                   :kf38,     "F38 function key"),
+    TermCapability(:key_f39,                   :kf39,     "F39 function key"),
+    TermCapability(:key_f40,                   :kf40,     "F40 function key"),
+    TermCapability(:key_f41,                   :kf41,     "F41 function key"),
+    TermCapability(:key_f42,                   :kf42,     "F42 function key"),
+    TermCapability(:key_f43,                   :kf43,     "F43 function key"),
+    TermCapability(:key_f44,                   :kf44,     "F44 function key"),
+    TermCapability(:key_f45,                   :kf45,     "F45 function key"),
+    TermCapability(:key_f46,                   :kf46,     "F46 function key"),
+    TermCapability(:key_f47,                   :kf47,     "F47 function key"),
+    TermCapability(:key_f48,                   :kf48,     "F48 function key"),
+    TermCapability(:key_f49,                   :kf49,     "F49 function key"),
+    TermCapability(:key_f50,                   :kf50,     "F50 function key"),
+    TermCapability(:key_f51,                   :kf51,     "F51 function key"),
+    TermCapability(:key_f52,                   :kf52,     "F52 function key"),
+    TermCapability(:key_f53,                   :kf53,     "F53 function key"),
+    TermCapability(:key_f54,                   :kf54,     "F54 function key"),
+    TermCapability(:key_f55,                   :kf55,     "F55 function key"),
+    TermCapability(:key_f56,                   :kf56,     "F56 function key"),
+    TermCapability(:key_f57,                   :kf57,     "F57 function key"),
+    TermCapability(:key_f58,                   :kf58,     "F58 function key"),
+    TermCapability(:key_f59,                   :kf59,     "F59 function key"),
+    TermCapability(:key_f60,                   :kf60,     "F60 function key"),
+    TermCapability(:key_f61,                   :kf61,     "F61 function key"),
+    TermCapability(:key_f62,                   :kf62,     "F62 function key"),
+    TermCapability(:key_f63,                   :kf63,     "F63 function key"),
+    TermCapability(:clr_bol,                   :el1,      "Clear to beginning of line"),
+    TermCapability(:clear_margins,             :mgc,      "clear right and left soft margins"),
+    TermCapability(:set_left_margin,           :smgl,     "set left soft margin at current column."),
+    TermCapability(:set_right_margin,          :smgr,     "set right soft margin at current column"),
+    TermCapability(:label_format,              :fln,      "label format"),
+    TermCapability(:set_clock,                 :sclk,     "set clock, #1 hrs #2 mins #3 secs"),
+    TermCapability(:display_clock,             :dclk,     "display clock"),
+    TermCapability(:remove_clock,              :rmclk,    "remove clock"),
+    TermCapability(:create_window,             :cwin,     "define a window #1 from #2,#3 to #4,#5"),
+    TermCapability(:goto_window,               :wingo,    "go to window #1"),
+    TermCapability(:hangup,                    :hup,      "hang-up phone"),
+    TermCapability(:dial_phone,                :dial,     "dial number #1"),
+    TermCapability(:quick_dial,                :qdial,    "dial number #1 without checking"),
+    TermCapability(:tone,                      :tone,     "select touch tone dialing"),
+    TermCapability(:pulse,                     :pulse,    "select pulse dialing"),
+    TermCapability(:flash_hook,                :hook,     "flash switch hook"),
+    TermCapability(:fixed_pause,               :pause,    "pause for 2-3 seconds"),
+    TermCapability(:wait_tone,                 :wait,     "wait for dial-tone"),
+    TermCapability(:user0,                     :u0,       "User string #0"),
+    TermCapability(:user1,                     :u1,       "User string #1"),
+    TermCapability(:user2,                     :u2,       "User string #2"),
+    TermCapability(:user3,                     :u3,       "User string #3"),
+    TermCapability(:user4,                     :u4,       "User string #4"),
+    TermCapability(:user5,                     :u5,       "User string #5"),
+    TermCapability(:user6,                     :u6,       "User string #6"),
+    TermCapability(:user7,                     :u7,       "User string #7"),
+    TermCapability(:user8,                     :u8,       "User string #8"),
+    TermCapability(:user9,                     :u9,       "User string #9"),
+    TermCapability(:orig_pair,                 :op,       "Set default pair to its original value"),
+    TermCapability(:orig_colors,               :oc,       "Set all color pairs to the original ones"),
+    TermCapability(:initialize_color,          :initc,    "initialize color #1 to (#2,#3,#4)"),
+    TermCapability(:initialize_pair,           :initp,    "Initialize color pair #1 to fg=(#2,#3,#4), bg=(#5,#6,#7)"),
+    TermCapability(:set_color_pair,            :scp,      "Set current color pair to #1"),
+    TermCapability(:set_foreground,            :setf,     "Set foreground color #1"),
+    TermCapability(:set_background,            :setb,     "Set background color #1"),
+    TermCapability(:change_char_pitch,         :cpi,      "Change number of characters per inch to #1"),
+    TermCapability(:change_line_pitch,         :lpi,      "Change number of lines per inch to #1"),
+    TermCapability(:change_res_horz,           :chr,      "Change horizontal resolution to #1"),
+    TermCapability(:change_res_vert,           :cvr,      "Change vertical resolution to #1"),
+    TermCapability(:define_char,               :defc,     "Define a character #1, #2 dots wide, descender #3"),
+    TermCapability(:enter_doublewide_mode,     :swidm,    "Enter double-wide mode"),
+    TermCapability(:enter_draft_quality,       :sdrfq,    "Enter draft-quality mode"),
+    TermCapability(:enter_italics_mode,        :sitm,     "Enter italic mode"),
+    TermCapability(:enter_leftward_mode,       :slm,      "Start leftward carriage motion"),
+    TermCapability(:enter_micro_mode,          :smicm,    "Start micro-motion mode"),
+    TermCapability(:enter_near_letter_quality, :snlq,     "Enter NLQ mode"),
+    TermCapability(:enter_normal_quality,      :snrmq,    "Enter normal-quality mode"),
+    TermCapability(:enter_shadow_mode,         :sshm,     "Enter shadow-print mode"),
+    TermCapability(:enter_subscript_mode,      :ssubm,    "Enter subscript mode"),
+    TermCapability(:enter_superscript_mode,    :ssupm,    "Enter superscript mode"),
+    TermCapability(:enter_upward_mode,         :sum,      "Start upward carriage motion"),
+    TermCapability(:exit_doublewide_mode,      :rwidm,    "End double-wide mode"),
+    TermCapability(:exit_italics_mode,         :ritm,     "End italic mode"),
+    TermCapability(:exit_leftward_mode,        :rlm,      "End left-motion mode"),
+    TermCapability(:exit_micro_mode,           :rmicm,    "End micro-motion mode"),
+    TermCapability(:exit_shadow_mode,          :rshm,     "End shadow-print mode"),
+    TermCapability(:exit_subscript_mode,       :rsubm,    "End subscript mode"),
+    TermCapability(:exit_superscript_mode,     :rsupm,    "End superscript mode"),
+    TermCapability(:exit_upward_mode,          :rum,      "End reverse character motion"),
+    TermCapability(:micro_column_address,      :mhpa,     "Like column_address in micro mode"),
+    TermCapability(:micro_down,                :mcud1,    "Like cursor_down in micro mode"),
+    TermCapability(:micro_left,                :mcub1,    "Like cursor_left in micro mode"),
+    TermCapability(:micro_right,               :mcuf1,    "Like cursor_right in micro mode"),
+    TermCapability(:micro_row_address,         :mvpa,     "Like row_address #1 in micro mode"),
+    TermCapability(:micro_up,                  :mcuu1,    "Like cursor_up in micro mode"),
+    TermCapability(:order_of_pins,             :porder,   "Match software bits to print-head pins"),
+    TermCapability(:parm_down_micro,           :mcud,     "Like parm_down_cursor in micro mode"),
+    TermCapability(:parm_left_micro,           :mcub,     "Like parm_left_cursor in micro mode"),
+    TermCapability(:parm_right_micro,          :mcuf,     "Like parm_right_cursor in micro mode"),
+    TermCapability(:parm_up_micro,             :mcuu,     "Like parm_up_cursor in micro mode"),
+    TermCapability(:select_char_set,           :scs,      "Select character set, #1"),
+    TermCapability(:set_bottom_margin,         :smgb,     "Set bottom margin at current line"),
+    TermCapability(:set_bottom_margin_parm,    :smgbp,    "Set bottom margin at line #1 or (if smgtp is not given) #2 lines from bottom"),
+    TermCapability(:set_left_margin_parm,      :smglp,    "Set left (right) margin at column #1"),
+    TermCapability(:set_right_margin_parm,     :smgrp,    "Set right margin at column #1"),
+    TermCapability(:set_top_margin,            :smgt,     "Set top margin at current line"),
+    TermCapability(:set_top_margin_parm,       :smgtp,    "Set top (bottom) margin at row #1"),
+    TermCapability(:start_bit_image,           :sbim,     "Start printing bit image graphics"),
+    TermCapability(:start_char_set_def,        :scsd,     "Start character set definition #1, with #2 characters in the set"),
+    TermCapability(:stop_bit_image,            :rbim,     "Stop printing bit image graphics"),
+    TermCapability(:stop_char_set_def,         :rcsd,     "End definition of character set #1"),
+    TermCapability(:subscript_characters,      :subcs,    "List of subscriptable characters"),
+    TermCapability(:superscript_characters,    :supcs,    "List of superscriptable characters"),
+    TermCapability(:these_cause_cr,            :docr,     "Printing any of these characters causes CR"),
+    TermCapability(:zero_motion,               :zerom,    "No motion for subsequent character"),
+    TermCapability(:char_set_names,            :csnm,     "Produce #1'th item from list of character set names"),
+    TermCapability(:key_mouse,                 :kmous,    "Mouse event has occurred"),
+    TermCapability(:mouse_info,                :minfo,    "Mouse status information"),
+    TermCapability(:req_mouse_pos,             :reqmp,    "Request mouse position"),
+    TermCapability(:get_mouse,                 :getm,     "Curses should get button events, parameter #1 not documented."),
+    TermCapability(:set_a_foreground,          :setaf,    "Set foreground color to #1, using ANSI escape"),
+    TermCapability(:set_a_background,          :setab,    "Set background color to #1, using ANSI escape"),
+    TermCapability(:pkey_plab,                 :pfxl,     "Program function key #1 to type string #2 and show string #3"),
+    TermCapability(:device_type,               :devt,     "Indicate language/codeset support"),
+    TermCapability(:code_set_init,             :csin,     "Init sequence for multiple codesets"),
+    TermCapability(:set0_des_seq,              :s0ds,     "Shift to codeset 0 (EUC set 0, ASCII)"),
+    TermCapability(:set1_des_seq,              :s1ds,     "Shift to codeset 1"),
+    TermCapability(:set2_des_seq,              :s2ds,     "Shift to codeset 2"),
+    TermCapability(:set3_des_seq,              :s3ds,     "Shift to codeset 3"),
+    TermCapability(:set_lr_margin,             :smglr,    "Set both left and right margins to #1, #2.  (ML is not in BSD termcap)."),
+    TermCapability(:set_tb_margin,             :smgtb,    "Sets both top and bottom margins to #1, #2"),
+    TermCapability(:bit_image_repeat,          :birep,    "Repeat bit image cell #1 #2 times"),
+    TermCapability(:bit_image_newline,         :binel,    "Move to next row of the bit image"),
+    TermCapability(:bit_image_carriage_return, :bicr,     "Move to beginning of same row"),
+    TermCapability(:color_names,               :colornm,  "Give name for color #1"),
+    TermCapability(:define_bit_image_region,   :defbi,    "Define rectangular bit image region"),
+    TermCapability(:end_bit_image_region,      :endbi,    "End a bit-image region"),
+    TermCapability(:set_color_band,            :setcolor, "Change to ribbon color #1"),
+    TermCapability(:set_page_length,           :slines,   "Set page length to #1 lines"),
+    TermCapability(:display_pc_char,           :dispc,    "Display PC character #1"),
+    TermCapability(:enter_pc_charset_mode,     :smpch,    "Enter PC character display mode"),
+    TermCapability(:exit_pc_charset_mode,      :rmpch,    "Exit PC character display mode"),
+    TermCapability(:enter_scancode_mode,       :smsc,     "Enter PC scancode mode"),
+    TermCapability(:exit_scancode_mode,        :rmsc,     "Exit PC scancode mode"),
+    TermCapability(:pc_term_options,           :pctrm,    "PC terminal options"),
+    TermCapability(:scancode_escape,           :scesc,    "Escape for scancode emulation"),
+    TermCapability(:alt_scancode_esc,          :scesa,    "Alternate escape for scancode emulation"),
+    TermCapability(:enter_horizontal_hl_mode,  :ehhlm,    "Enter horizontal highlight mode"),
+    TermCapability(:enter_left_hl_mode,        :elhlm,    "Enter left highlight mode"),
+    TermCapability(:enter_low_hl_mode,         :elohlm,   "Enter low highlight mode"),
+    TermCapability(:enter_right_hl_mode,       :erhlm,    "Enter right highlight mode"),
+    TermCapability(:enter_top_hl_mode,         :ethlm,    "Enter top highlight mode"),
+    TermCapability(:enter_vertical_hl_mode,    :evhlm,    "Enter vertical highlight mode"),
+    TermCapability(:set_a_attributes,          :sgr1,     "Define second set of video attributes #1-#6"),
+    TermCapability(:set_pglen_inch,            :slength,  "Set page length to #1 hundredth of an inch (some implementations use sL for termcap)."),
+    TermCapability(:termcap_init2,             :OTi2,     "secondary initialization string"),
+    TermCapability(:termcap_reset,             :OTrs,     "terminal reset string"),
+    TermCapability(:linefeed_if_not_lf,        :OTnl,     "use to move down"),
+    TermCapability(:backspace_if_not_bs,       :OTbc,     "move left, if not ^H"),
+    TermCapability(:other_non_function_keys,   :OTko,     "list of self-mapped keycaps"),
+    TermCapability(:arrow_key_map,             :OTma,     "map motion-keys for vi version 2"),
+    TermCapability(:acs_ulcorner,              :OTG2,     "single upper left"),
+    TermCapability(:acs_llcorner,              :OTG3,     "single lower left"),
+    TermCapability(:acs_urcorner,              :OTG1,     "single upper right"),
+    TermCapability(:acs_lrcorner,              :OTG4,     "single lower right"),
+    TermCapability(:acs_ltee,                  :OTGR,     "tee pointing right"),
+    TermCapability(:acs_rtee,                  :OTGL,     "tee pointing left"),
+    TermCapability(:acs_btee,                  :OTGU,     "tee pointing up"),
+    TermCapability(:acs_ttee,                  :OTGD,     "tee pointing down"),
+    TermCapability(:acs_hline,                 :OTGH,     "single horizontal line"),
+    TermCapability(:acs_vline,                 :OTGV,     "single vertical line"),
+    TermCapability(:acs_plus,                  :OTGC,     "single intersection"),
+    TermCapability(:memory_lock,               :meml,     "lock memory above cursor"),
+    TermCapability(:memory_unlock,             :memu,     "unlock memory"),
+    TermCapability(:box_chars_1,               :box1,     "box characters primary set"),
+]
+
+"""
+Terminfo extensions that NCurses 6.4-20230311 is aware of.
+"""
+const TERM_USER = Dict{Tuple{DataType, Symbol}, Union{Tuple{Nothing, String}, Tuple{Symbol, String}}}(
+    (Int,    :CO )    => (nothing, "number of indexed colors overlaying RGB space"),
+    (String, :E3)     => (nothing, "clears the terminal's scrollback buffer."),
+    (Bool,   :NQ)     => (nothing, "terminal does not support query/response"),
+    (Bool,   :RGB)    => (nothing, "use direct colors with 1/3 of color-pair bits per color."),
+    (Int,    :RGB)    => (nothing, "use direct colors with given number of bits per color."),
+    (String, :RGB)    => (nothing, "use direct colors with given bit-layout."),
+    (String, :TS)     => (nothing, "like \"tsl\", but uses no parameter."),
+    (Int,    :U8)     => (nothing, "terminal does/does not support VT100 SI/SO when processing UTF-8 encoding."),
+    (String, :XM)     => (nothing, "initialize alternate xterm mouse mode"),
+    (String, :grbom)  => (nothing, "disable real bold (not intensity bright) mode."),
+    (String, :gsbom)  => (nothing, "enable real bold (not intensity bright) mode."),
+    (String, :xm)     => (nothing, "mouse response"),
+    (String, :Rmol)   => (:exit_overline_mode, "remove overline-mode"),
+    (String, :Smol)   => (:enter_overline_mode, "set overline-mode"),
+    (String, :blink2) => (nothing, "turn on rapid blinking"),
+    (String, :norm)   => (nothing, "turn off bold and half-bright mode"),
+    (String, :opaq)   => (nothing, "turn off blank mode"),
+    (String, :setal)  => (nothing, "set underline-color"),
+    (String, :smul2)  => (nothing, "begin double underline mode"),
+    (Bool,   :AN)     => (nothing, "turn on autonuke."),
+    (Bool,   :AX)     => (nothing, "understands ANSI set default fg/bg color (\\E[39m / \\E[49m)."),
+    (String, :C0)     => (nothing, "use the string as a conversion table for font '0', like acsc."),
+    (Bool,   :C8)     => (nothing, "terminal shows bold as high-intensity colors."),
+    (String, :CE)     => (nothing, "switch cursor-keys back to normal mode."),
+    (String, :CS)     => (nothing, "switch cursor-keys to application mode."),
+    (String, :E0)     => (nothing, "switch charset 'G0' back to standard charset. Default is '\\E(B'."),
+    (Bool,   :G0)     => (nothing, "terminal can deal with ISO 2022 font selection sequences."),
+    (String, :KJ)     => (nothing, "set the encoding of the terminal."),
+    (Int,    :OL)     => (nothing, "set the screen program's output buffer limit."),
+    (String, :S0)     => (nothing, "switch charset 'G0' to the specified charset. Default is '\\E(%.'."),
+    (Bool,   :TF)     => (nothing, "add missing capabilities to screen's termcap/info entry. (Set by default)."),
+    (String, :WS)     => (nothing, "resize display. This capability has the desired width and height as arguments. SunView(tm) example: '\\E[8;%d;%dt'."),
+    (String, :XC)     => (nothing, "describe a translation of characters to strings depending on the current font."),
+    (Bool,   :XT)     => (nothing, "terminal understands special xterm sequences (OSC, mouse tracking)."),
+    (String, :Z0)     => (nothing, "change width to 132 columns."),
+    (String, :Z1)     => (nothing, "change width to 80 columns."),
+    (String, :Cr)     => (:reset_cursor_color, "restore the default cursor color."),
+    (String, :Cs)     => (:set_cursor_color, "set the cursor color."),
+    (String, :Csr)    => (nothing, "change the cursor style, overriding Ss."),
+    (String, :Ms)     => (:set_host_clipboard, "store the current buffer in the host terminal's selection (clipboard)."),
+    (String, :Se)     => (:reset_cursor_style, "reset the cursor style to the terminal initial state."),
+    (String, :Smulx)  => (:set_underline_style, "modify the appearance of underlines in VTE."),
+    (String, :Ss)     => (:set_cursor_style, "change the cursor style."),
+    (String, :rmxx)   => (:exit_strikeout_mode, "reset ECMA-48 strikeout/crossed-out attributes."),
+    (String, :smxx)   => (:enter_strikeout_mode, "set ECMA-48 strikeout/crossed-out attributes."),
+    (String, :BD)     => (nothing, "disables bracketed paste"),
+    (String, :BE)     => (nothing, "enables bracketed paste"),
+    (String, :PE)     => (nothing, "is sent after pasted text"),
+    (String, :PS)     => (nothing, "is sent before pasted text"),
+    (String, :RV)     => (nothing, "report terminal secondary device attributes"),
+    (String, :XR)     => (nothing, "report terminal version as a free-format string."),
+    (Bool,   :XF)     => (:xterm_focus, "terminal supports xterm focus in/out"),
+    (String, :rv)     => (nothing, "response to RV, regular expression"),
+    (String, :xr)     => (nothing, "response to XR, regular expression"),
+    (String, :csl)    => (:clear_status_line, "clear status line"),
+    (String, :kDC3)   => (:key_alt_delete_character, "alt delete-character"),
+    (String, :kDC4)   => (:key_shift_alt_delete_character, "shift+alt delete-character"),
+    (String, :kDC5)   => (:key_control_delete_character, "control delete-character"),
+    (String, :kDC6)   => (:key_shift_control_delete_character, "shift+control delete-character"),
+    (String, :kDC7)   => (:key_alt_control_delete_character, "alt+control delete-character"),
+    (String, :kDN)    => (:key_shift_down_cursor, "shift down-cursor"),
+    (String, :kDN3)   => (:key_alt_down_cursor, "alt down-cursor"),
+    (String, :kDN4)   => (:key_shift_alt_down_cursor, "shift+alt down-cursor"),
+    (String, :kDN5)   => (:key_control_down_cursor, "control down-cursor"),
+    (String, :kDN6)   => (:key_shift_control_down_cursor, "shift+control down-cursor"),
+    (String, :kDN7)   => (:key_alt_control_down_cursor, "alt+control down-cursor"),
+    (String, :kEND3)  => (:key_alt_end, "alt end"),
+    (String, :kEND4)  => (:key_shift_alt_end, "shift+alt end"),
+    (String, :kEND5)  => (:key_control_end, "control end"),
+    (String, :kEND6)  => (:key_shift_control_end, "shift+control end"),
+    (String, :kEND7)  => (:key_alt_control_end, "alt+control end"),
+    (String, :kHOM3)  => (:key_alt_home, "alt home"),
+    (String, :kHOM4)  => (:key_shift_alt_home, "shift+alt home"),
+    (String, :kHOM5)  => (:key_control_home, "control home"),
+    (String, :kHOM6)  => (:key_shift_control_home, "shift+control home"),
+    (String, :kHOM7)  => (:key_alt_control_home, "alt+control home"),
+    (String, :kIC3)   => (:key_alt_insert_character, "alt insert-character"),
+    (String, :kIC4)   => (:key_shift_alt_insert_character, "shift+alt insert-character"),
+    (String, :kIC5)   => (:key_control_insert_character, "control insert-character"),
+    (String, :kIC6)   => (:key_shift_control_insert_character, "shift+control insert-character"),
+    (String, :kIC7)   => (:key_alt_control_insert_character, "alt+control insert-character"),
+    (String, :kLFT3)  => (:key_alt_left_cursor, "alt left-cursor"),
+    (String, :kLFT4)  => (:key_shift_alt_left_cursor, "shift+alt left-cursor"),
+    (String, :kLFT5)  => (:key_control_left_cursor, "control left-cursor"),
+    (String, :kLFT6)  => (:key_shift_control_left_cursor, "shift+control left-cursor"),
+    (String, :kLFT7)  => (:key_alt_control_left_cursor, "alt+control left-cursor"),
+    (String, :kNXT3)  => (:key_alt_next, "alt next"),
+    (String, :kNXT4)  => (:key_shift_alt_next, "shift+alt next"),
+    (String, :kNXT5)  => (:key_control_next, "control next"),
+    (String, :kNXT6)  => (:key_shift_control_next, "shift+control next"),
+    (String, :kNXT7)  => (:key_alt_control_next, "alt+control next"),
+    (String, :kPRV3)  => (:key_alt_previous, "alt previous"),
+    (String, :kPRV4)  => (:key_shift_alt_previous, "shift+alt previous"),
+    (String, :kPRV5)  => (:key_control_previous, "control previous"),
+    (String, :kPRV6)  => (:key_shift_control_previous, "shift+control previous"),
+    (String, :kPRV7)  => (:key_alt_control_previous, "alt+control previous"),
+    (String, :kRIT3)  => (:key_alt_right_cursor, "alt right-cursor"),
+    (String, :kRIT4)  => (:key_shift_alt_right_cursor, "shift+alt right-cursor"),
+    (String, :kRIT5)  => (:key_control_right_cursor, "control right-cursor"),
+    (String, :kRIT6)  => (:key_shift_control_right_cursor, "shift+control right-cursor"),
+    (String, :kRIT7)  => (:key_alt_control_right_cursor, "alt+control right-cursor"),
+    (String, :kUP)    => (:key_shift_up_cursor, "shift up-cursor"),
+    (String, :kUP3)   => (:key_alt_up_cursor, "alt up-cursor"),
+    (String, :kUP4)   => (:key_shift_alt_up_cursor, "shift+alt up-cursor"),
+    (String, :kUP5)   => (:key_control_up_cursor, "control up-cursor"),
+    (String, :kUP6)   => (:key_shift_control_up_cursor, "shift+control up-cursor"),
+    (String, :kUP7)   => (:key_alt_control_up_cursor, "alt+control up-cursor"),
+    (String, :ka2)    => (nothing, "vt220-keypad extensions"),
+    (String, :kb1)    => (nothing, "vt220-keypad extensions"),
+    (String, :kb3)    => (nothing, "vt220-keypad extensions"),
+    (String, :kc2)    => (nothing, "vt220-keypad extensions"),
+    (String, :kxIN)   => (:key_mouse_response_on_focus_in, "mouse response on focus-in"),
+    (String, :kxOUT)  => (:key_mouse_response_on_focus_out, "mouse response on focus-out"),
+    (Bool,   :Tc)     => (:truecolor, "tmux extension to indicate 24-bit truecolor support"),
+    (Bool,   :Su)     => (:can_style_underline, "kitty extension to indicate styled underline support"),
+)
diff --git a/base/threadcall.jl b/base/threadcall.jl
index 7548c5063671f..fbc1a87a20980 100644
--- a/base/threadcall.jl
+++ b/base/threadcall.jl
@@ -1,8 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 const max_ccall_threads = parse(Int, get(ENV, "UV_THREADPOOL_SIZE", "4"))
-const thread_notifiers = Union{Base.Condition, Nothing}[nothing for i in 1:max_ccall_threads]
+const thread_notifiers = Union{Event, Nothing}[nothing for i in 1:max_ccall_threads]
 const threadcall_restrictor = Semaphore(max_ccall_threads)
+const threadcall_lock = Threads.SpinLock()
 
 """
     @threadcall((cfunc, clib), rettype, (argtypes...), argvals...)
@@ -81,8 +82,11 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
     # wait for a worker thread to be available
     acquire(threadcall_restrictor)
-    idx = findfirst(isequal(nothing), thread_notifiers)::Int
-    thread_notifiers[idx] = Base.Condition()
+    idx = -1
+    @lock threadcall_lock begin
+        idx = findfirst(isequal(nothing), thread_notifiers)::Int
+        thread_notifiers[idx] = Event()
+    end
 
     GC.@preserve args_arr ret_arr roots begin
         # queue up the work to be done
@@ -92,7 +96,9 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
         # wait for a result & return it
         wait(thread_notifiers[idx])
-        thread_notifiers[idx] = nothing
+        @lock threadcall_lock begin
+            thread_notifiers[idx] = nothing
+        end
         release(threadcall_restrictor)
 
         r = unsafe_load(convert(Ptr{rettype}, pointer(ret_arr)))
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index e4f14e26ac5a9..3d86e203ef72e 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -3,11 +3,13 @@
 export threadid, nthreads, @threads, @spawn,
        threadpool, nthreadpools
 
+public Condition, threadpoolsize, ngcthreads
+
 """
-    Threads.threadid() -> Int
+    Threads.threadid([t::Task]) -> Int
 
-Get the ID number of the current thread of execution. The master thread has
-ID `1`.
+Get the ID number of the current thread of execution, or the thread of task
+`t`. The master thread has ID `1`.
 
 # Examples
 ```julia-repl
@@ -21,12 +23,15 @@ julia> Threads.@threads for i in 1:4
 2
 5
 4
+
+julia> Threads.threadid(Threads.@spawn "foo")
+2
 ```
 
 !!! note
     The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration).
-    For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects.
-
+    For this reason in most cases it is not safe to use `threadid([task])` to index into, say, a vector of buffers or stateful
+    objects.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
@@ -44,8 +49,9 @@ maxthreadid() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cin
 """
     Threads.nthreads(:default | :interactive) -> Int
 
-Get the current number of threads within the specified thread pool. The threads in default
-have id numbers `1:nthreads(:default)`.
+Get the current number of threads within the specified thread pool. The threads in `:interactive`
+have id numbers `1:nthreads(:interactive)`, and the threads in `:default` have id numbers in
+`nthreads(:interactive) .+ (1:nthreads(:default))`.
 
 See also `BLAS.get_num_threads` and `BLAS.set_num_threads` in the [`LinearAlgebra`](@ref
 man-linalg) standard library, and `nprocs()` in the [`Distributed`](@ref man-distributed)
@@ -63,8 +69,10 @@ function _tpid_to_sym(tpid::Int8)
         return :interactive
     elseif tpid == 1
         return :default
+    elseif tpid == -1
+        return :foreign
     else
-        throw(ArgumentError("Unrecognized threadpool id $tpid"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool id ", tpid)))
     end
 end
 
@@ -73,21 +81,41 @@ function _sym_to_tpid(tp::Symbol)
         return Int8(0)
     elseif tp === :default
         return Int8(1)
+    elseif tp == :foreign
+        return Int8(-1)
     else
-        throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`"))
+        throw(ArgumentError(LazyString("Unrecognized threadpool name `", tp, "`")))
     end
 end
 
 """
     Threads.threadpool(tid = threadid()) -> Symbol
 
-Returns the specified thread's threadpool; either `:default` or `:interactive`.
+Returns the specified thread's threadpool; either `:default`, `:interactive`, or `:foreign`.
 """
 function threadpool(tid = threadid())
     tpid = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1)
     return _tpid_to_sym(tpid)
 end
 
+"""
+    Threads.threadpooldescription(tid = threadid()) -> String
+
+Returns the specified thread's threadpool name with extended description where appropriate.
+"""
+function threadpooldescription(tid = threadid())
+    threadpool_name = threadpool(tid)
+    if threadpool_name == :foreign
+        # TODO: extend tls to include a field to add a description to a foreign thread and make this more general
+        n_others = nthreads(:interactive) + nthreads(:default)
+        # Assumes GC threads come first in the foreign thread pool
+        if tid > n_others && tid <= n_others + ngcthreads()
+            return "foreign: gc"
+        end
+    end
+    return string(threadpool_name)
+end
+
 """
     Threads.nthreadpools() -> Int
 
@@ -108,6 +136,8 @@ See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
 function threadpoolsize(pool::Symbol = :default)
     if pool === :default || pool === :interactive
         tpid = _sym_to_tpid(pool)
+    elseif pool == :foreign
+        error("Threadpool size of `:foreign` is indeterminant")
     else
         error("invalid threadpool specified")
     end
@@ -146,7 +176,14 @@ function threading_run(fun, static)
     for i = 1:n
         t = Task(() -> fun(i)) # pass in tid
         t.sticky = static
-        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
+        if static
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
+        else
+            # TODO: this should be the current pool (except interactive) if there
+            # are ever more than two pools.
+            _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, _sym_to_tpid(:default))
+            @assert _result == 1
+        end
         tasks[i] = t
         schedule(t)
     end
@@ -163,9 +200,46 @@ end
 function _threadsfor(iter, lbody, schedule)
     lidx = iter.args[1]         # index
     range = iter.args[2]
+    esc_range = esc(range)
+    func = if schedule === :greedy
+        greedy_func(esc_range, lidx, lbody)
+    else
+        default_func(esc_range, lidx, lbody)
+    end
     quote
         local threadsfor_fun
-        let range = $(esc(range))
+        $func
+        if $(schedule === :greedy || schedule === :dynamic || schedule === :default)
+            threading_run(threadsfor_fun, false)
+        elseif ccall(:jl_in_threaded_region, Cint, ()) != 0 # :static
+            error("`@threads :static` cannot be used concurrently or nested")
+        else # :static
+            threading_run(threadsfor_fun, true)
+        end
+        nothing
+    end
+end
+
+function greedy_func(itr, lidx, lbody)
+    quote
+        let c = Channel{eltype($itr)}(0,spawn=true) do ch
+            for item in $itr
+                put!(ch, item)
+            end
+        end
+        function threadsfor_fun(tid)
+            for item in c
+                local $(esc(lidx)) = item
+                $(esc(lbody))
+            end
+        end
+        end
+    end
+end
+
+function default_func(itr, lidx, lbody)
+    quote
+        let range = $itr
         function threadsfor_fun(tid = 1; onethread = false)
             r = range # Load into local variable
             lenr = length(r)
@@ -203,14 +277,6 @@ function _threadsfor(iter, lbody, schedule)
             end
         end
         end
-        if $(schedule === :dynamic || schedule === :default)
-            threading_run(threadsfor_fun, false)
-        elseif ccall(:jl_in_threaded_region, Cint, ()) != 0 # :static
-            error("`@threads :static` cannot be used concurrently or nested")
-        else # :static
-            threading_run(threadsfor_fun, true)
-        end
-        nothing
     end
 end
 
@@ -276,6 +342,20 @@ microseconds).
 !!! compat "Julia 1.8"
     The `:dynamic` option for the `schedule` argument is available and the default as of Julia 1.8.
 
+### `:greedy`
+
+`:greedy` scheduler spawns up to [`Threads.threadpoolsize()`](@ref) tasks, each greedily working on
+the given iterated values as they are produced. As soon as one task finishes its work, it takes
+the next value from the iterator. Work done by any individual task is not necessarily on
+contiguous values from the iterator. The given iterator may produce values forever, only the
+iterator interface is required (no indexing).
+
+This scheduling option is generally a good choice if the workload of individual iterations
+is not uniform/has a large spread.
+
+!!! compat "Julia 1.11"
+    The `:greedy` option for the `schedule` argument is available as of Julia 1.11.
+
 ### `:static`
 
 `:static` scheduler creates one task per thread and divides the iterations equally among
@@ -289,7 +369,7 @@ thread other than 1.
     In newly written library functions, `:static` scheduling is discouraged because the
     functions using this option cannot be called from arbitrary worker threads.
 
-## Example
+## Examples
 
 To illustrate of the different scheduling strategies, consider the following function
 `busywait` containing a non-yielding timed loop that runs for a given number of seconds.
@@ -331,7 +411,7 @@ macro threads(args...)
             # for now only allow quoted symbols
             sched = nothing
         end
-        if sched !== :static && sched !== :dynamic
+        if sched !== :static && sched !== :dynamic && sched !== :greedy
             throw(ArgumentError("unsupported schedule argument in @threads"))
         end
     elseif na == 1
@@ -351,10 +431,11 @@ end
 
 function _spawn_set_thrpool(t::Task, tp::Symbol)
     tpid = _sym_to_tpid(tp)
-    if _nthreads_in_pool(tpid) == 0
+    if tpid == -1 || _nthreads_in_pool(tpid) == 0
         tpid = _sym_to_tpid(:default)
     end
-    ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    _result = ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    @assert _result == 1
     nothing
 end
 
@@ -406,7 +487,7 @@ macro spawn(args...)
         if ttype isa QuoteNode
             ttype = ttype.value
             if ttype !== :interactive && ttype !== :default
-                throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+                throw(ArgumentError(LazyString("unsupported threadpool in @spawn: ", ttype)))
             end
             tp = QuoteNode(ttype)
         else
diff --git a/base/threads.jl b/base/threads.jl
index 2d388cc4b9f77..bdd6677c5a955 100644
--- a/base/threads.jl
+++ b/base/threads.jl
@@ -8,7 +8,6 @@ module Threads
 global Condition # we'll define this later, make sure we don't import Base.Condition
 
 include("threadingconstructs.jl")
-include("atomics.jl")
 include("locks-mt.jl")
 
 end
diff --git a/base/timing.jl b/base/timing.jl
index d166b4162db59..65c2a643d6a52 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This type must be kept in sync with the C struct in src/gc.h
+# This type must be kept in sync with the C struct in src/gc-interface.h
 struct GC_Num
     allocd          ::Int64 # GC internal
     deferred_alloc  ::Int64 # GC internal
@@ -22,13 +22,18 @@ struct GC_Num
     total_time_to_safepoint     ::Int64
     sweep_time      ::Int64
     mark_time       ::Int64
+    stack_pool_sweep_time ::Int64
     total_sweep_time  ::Int64
+    total_sweep_page_walk_time              ::Int64
+    total_sweep_madvise_time                ::Int64
+    total_sweep_free_mallocd_memory_time    ::Int64
     total_mark_time   ::Int64
+    total_stack_pool_sweep_time::Int64
     last_full_sweep ::Int64
+    last_incremental_sweep ::Int64
 end
 
 gc_num() = ccall(:jl_gc_num, GC_Num, ())
-reset_gc_stats() = ccall(:jl_gc_reset_stats, Cvoid, ())
 
 # This type is to represent differences in the counters, so fields may be negative
 struct GC_Diff
@@ -47,7 +52,7 @@ gc_total_bytes(gc_num::GC_Num) =
     gc_num.allocd + gc_num.deferred_alloc + gc_num.total_allocd
 
 function GC_Diff(new::GC_Num, old::GC_Num)
-    # logic from `src/gc.c:jl_gc_total_bytes`
+    # logic from `jl_gc_total_bytes`
     old_allocd = gc_total_bytes(old)
     new_allocd = gc_total_bytes(new)
     return GC_Diff(new_allocd       - old_allocd,
@@ -97,6 +102,49 @@ function gc_live_bytes()
     Int(ccall(:jl_gc_live_bytes, Int64, ())) + num.allocd + num.deferred_alloc
 end
 
+# must be kept in sync with the value from `src/julia_threads.h``
+const JL_GC_N_MAX_POOLS = 51
+function gc_page_utilization_data()
+    page_utilization_raw = cglobal(:jl_gc_page_utilization_stats, Float64)
+    return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
+end
+
+
+const USING_STOCK_GC = occursin("stock", unsafe_string(ccall(:jl_gc_active_impl, Ptr{UInt8}, ())))
+# Full sweep reasons are currently only available for the stock GC
+@static if USING_STOCK_GC
+# must be kept in sync with `src/gc-stock.h``
+const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
+                            :FULL_SWEEP_REASON_USER_MAX_EXCEEDED, :FULL_SWEEP_REASON_LARGE_PROMOTION_RATE]
+end
+
+"""
+    Base.full_sweep_reasons()
+
+Return a dictionary of the number of times each full sweep reason has occurred.
+
+The reasons are:
+- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
+- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
+- `:FULL_SWEEP_REASON_USER_MAX_EXCEEDED`: Full sweep was forced due to the system reaching the heap soft size limit
+- `:FULL_SWEEP_REASON_LARGE_PROMOTION_RATE`: Full sweep was forced by a large promotion rate across GC generations
+
+Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
+"""
+function full_sweep_reasons()
+    d = Dict{Symbol, Int64}()
+    # populate the dictionary according to the reasons above for the stock GC
+    # otherwise return an empty dictionary for now
+    @static if USING_STOCK_GC
+        reason = cglobal(:jl_full_sweep_reasons, UInt64)
+        reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
+        for (i, r) in enumerate(FULL_SWEEP_REASONS)
+            d[r] = reasons_as_array[i]
+        end
+    end
+    return d
+end
+
 """
     Base.jit_total_bytes()
 
@@ -127,21 +175,52 @@ function padded_nonzero_print(value, str, always_print = true)
     end
 end
 
-function format_bytes(bytes) # also used by InteractiveUtils
-    bytes, mb = prettyprint_getunits(bytes, length(_mem_units), Int64(1024))
+"""
+    format_bytes(bytes; binary=true)
+
+Format a given number of bytes into a human-readable string.
+
+# Arguments
+- `bytes`: The number of bytes to format.
+- `binary=true`: If `true`, formats the bytes in binary units (powers of 1024). If `false`, uses decimal units (powers of 1000).
+
+# Returns
+`String`: A human-readable string representation of the bytes, formatted in either binary or decimal units based on the `binary` argument.
+
+# Examples
+```jldoctest
+julia> Base.format_bytes(1024)
+"1024 bytes"
+
+julia> Base.format_bytes(10000)
+"9.766 KiB"
+
+julia> Base.format_bytes(10000, binary=false)
+"10.000 kB"
+```
+"""
+function format_bytes(bytes; binary=true) # also used by InteractiveUtils
+    units = binary ? _mem_units : _cnt_units
+    factor = binary ? 1024 : 1000
+    bytes, mb = prettyprint_getunits(bytes, length(units), Int64(factor))
     if mb == 1
         return string(Int(bytes), " ", _mem_units[mb], bytes==1 ? "" : "s")
     else
-        return string(Ryu.writefixed(Float64(bytes), 3), " ", _mem_units[mb])
+        return string(Ryu.writefixed(Float64(bytes), 3), binary ? " $(units[mb])" : "$(units[mb])B")
     end
 end
 
-function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
+function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, newline=false;
+                    msg::Union{String,Nothing}=nothing)
     timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
     str = sprint() do io
-        _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        if msg isa String
+            print(io, msg, ": ")
+        else
+            print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        end
         print(io, timestr, " seconds")
-        parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
+        parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
         parens && print(io, " (")
         if bytes != 0 || allocs != 0
             allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -158,10 +237,17 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_ti
             end
             print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
         end
-        if compile_time > 0
+        if lock_conflicts > 0
             if bytes != 0 || allocs != 0 || gctime > 0
                 print(io, ", ")
             end
+            plural = lock_conflicts == 1 ? "" : "s"
+            print(io, lock_conflicts, " lock conflict$plural")
+        end
+        if compile_time > 0
+            if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
+                print(io, ", ")
+            end
             print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
         end
         if recompile_time > 0
@@ -176,11 +262,11 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_ti
     nothing
 end
 
-function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
+function timev_print(elapsedtime, diff::GC_Diff, lock_conflicts, compile_times; msg::Union{String,Nothing}=nothing)
     allocs = gc_alloc_count(diff)
     compile_time = first(compile_times)
     recompile_time = last(compile_times)
-    time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
+    time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, lock_conflicts, compile_time, recompile_time, true; msg)
     padded_nonzero_print(elapsedtime,       "elapsed time (ns)")
     padded_nonzero_print(diff.total_time,   "gc time (ns)")
     padded_nonzero_print(diff.allocd,       "bytes allocated")
@@ -212,7 +298,8 @@ end
 A macro to execute an expression, printing the time it took to execute, the number of
 allocations, and the total number of bytes its execution caused to be allocated, before
 returning the value of the expression. Any time spent garbage collecting (gc), compiling
-new code, or recompiling invalidated code is shown as a percentage.
+new code, or recompiling invalidated code is shown as a percentage. Any lock conflicts
+where a [`ReentrantLock`](@ref) had to wait are shown as a count.
 
 Optionally provide a description string to print before the time report.
 
@@ -233,6 +320,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
 
     Recompilation time being shown separately from compilation time was introduced in Julia 1.8
 
+!!! compat "Julia 1.11"
+    The reporting of any lock conflicts was added in Julia 1.11.
+
 ```julia-repl
 julia> x = rand(10,10);
 
@@ -267,22 +357,11 @@ macro time(ex)
 end
 macro time(msg, ex)
     quote
-        Experimental.@force_compile
-        local stats = gc_num()
-        local elapsedtime = time_ns()
-        cumulative_compile_timing(true)
-        local compile_elapsedtimes = cumulative_compile_time_ns()
-        local val = @__tryfinally($(esc(ex)),
-            (elapsedtime = time_ns() - elapsedtime;
-            cumulative_compile_timing(false);
-            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
-        )
-        local diff = GC_Diff(gc_num(), stats)
+        local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        time_print(stdout, elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
-        val
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
+        ret.value
     end
 end
 
@@ -351,22 +430,11 @@ macro timev(ex)
 end
 macro timev(msg, ex)
     quote
-        Experimental.@force_compile
-        local stats = gc_num()
-        local elapsedtime = time_ns()
-        cumulative_compile_timing(true)
-        local compile_elapsedtimes = cumulative_compile_time_ns()
-        local val = @__tryfinally($(esc(ex)),
-            (elapsedtime = time_ns() - elapsedtime;
-            cumulative_compile_timing(false);
-            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
-        )
-        local diff = GC_Diff(gc_num(), stats)
+        local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
-        val
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg_str)
+        ret.value
     end
 end
 
@@ -459,19 +527,57 @@ macro allocations(ex)
     end
 end
 
+"""
+    @lock_conflicts
+
+A macro to evaluate an expression, discard the resulting value, and instead return the
+total number of lock conflicts during evaluation, where a lock attempt on a [`ReentrantLock`](@ref)
+resulted in a wait because the lock was already held.
+
+See also [`@time`](@ref), [`@timev`](@ref) and [`@timed`](@ref).
+
+```julia-repl
+julia> @lock_conflicts begin
+    l = ReentrantLock()
+    Threads.@threads for i in 1:Threads.nthreads()
+        lock(l) do
+        sleep(1)
+        end
+    end
+end
+5
+```
+
+!!! compat "Julia 1.11"
+    This macro was added in Julia 1.11.
+"""
+macro lock_conflicts(ex)
+    quote
+        Threads.lock_profiling(true)
+        local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
+        try
+            $(esc(ex))
+        finally
+            Threads.lock_profiling(false)
+        end
+        Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts
+    end
+end
+
 """
     @timed
 
-A macro to execute an expression, and return the value of the expression, elapsed time,
-total bytes allocated, garbage collection time, and an object with various memory allocation
-counters.
+A macro to execute an expression, and return the value of the expression, elapsed time in seconds,
+total bytes allocated, garbage collection time, an object with various memory allocation
+counters, compilation time in seconds, and recompilation time in seconds. Any lock conflicts
+where a [`ReentrantLock`](@ref) had to wait are shown as a count.
 
 In some cases the system will look inside the `@timed` expression and compile some of the
 called code before execution of the top-level expression begins. When that happens, some
 compilation time will not be counted. To include this time you can run `@timed @eval ...`.
 
 See also [`@time`](@ref), [`@timev`](@ref), [`@elapsed`](@ref),
-[`@allocated`](@ref), and [`@allocations`](@ref).
+[`@allocated`](@ref), [`@allocations`](@ref), and [`@lock_conflicts`](@ref).
 
 ```julia-repl
 julia> stats = @timed rand(10^6);
@@ -490,19 +596,82 @@ julia> propertynames(stats.gcstats)
 
 julia> stats.gcstats.total_time
 5576500
+
+julia> stats.compile_time
+0.0
+
+julia> stats.recompile_time
+0.0
+
 ```
 
 !!! compat "Julia 1.5"
     The return type of this macro was changed from `Tuple` to `NamedTuple` in Julia 1.5.
+
+!!! compat "Julia 1.11"
+    The `lock_conflicts`, `compile_time`, and `recompile_time` fields were added in Julia 1.11.
 """
 macro timed(ex)
     quote
         Experimental.@force_compile
+        Threads.lock_profiling(true)
+        local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
         local stats = gc_num()
         local elapsedtime = time_ns()
-        local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
+        cumulative_compile_timing(true)
+        local compile_elapsedtimes = cumulative_compile_time_ns()
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            cumulative_compile_timing(false);
+            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
+            lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
+            Threads.lock_profiling(false))
+        )
         local diff = GC_Diff(gc_num(), stats)
-        (value=val, time=elapsedtime/1e9, bytes=diff.allocd, gctime=diff.total_time/1e9, gcstats=diff)
+        (
+            value=val,
+            time=elapsedtime/1e9,
+            bytes=diff.allocd,
+            gctime=diff.total_time/1e9,
+            gcstats=diff,
+            lock_conflicts=lock_conflicts,
+            compile_time=compile_elapsedtimes[1]/1e9,
+            recompile_time=compile_elapsedtimes[2]/1e9
+        )
+    end
+end
+
+# Exported, documented, and tested in InteractiveUtils
+# here so it's possible to time/trace all imports, including InteractiveUtils and its deps
+macro time_imports(ex)
+    quote
+        try
+            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
+            $(esc(ex))
+        finally
+            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+        end
+    end
+end
+
+macro trace_compile(ex)
+    quote
+        try
+            ccall(:jl_force_trace_compile_timing_enable, Cvoid, ())
+            $(esc(ex))
+        finally
+            ccall(:jl_force_trace_compile_timing_disable, Cvoid, ())
+        end
+    end
+end
+
+macro trace_dispatch(ex)
+    quote
+        try
+            ccall(:jl_force_trace_dispatch_enable, Cvoid, ())
+            $(esc(ex))
+        finally
+            ccall(:jl_force_trace_dispatch_disable, Cvoid, ())
+        end
     end
 end
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 6c4ff6e2a52c0..4d07cfed05d8a 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -1,11 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+`Base.TOML` is an undocumented internal part of Julia's TOML parser
+implementation.  Users should call the documented interface in the
+TOML.jl standard library instead (by `import TOML` or `using TOML`).
+"""
 module TOML
 
 using Base: IdSet
 
-# In case we do not have the Dates stdlib available
 # we parse DateTime into these internal structs,
+# unless a different DateTime library is passed to the Parser constructor
 # note that these do not do any argument checking
 struct Date
     year::Int
@@ -33,7 +38,7 @@ const TOMLDict  = Dict{String, Any}
 # Parser #
 ##########
 
-mutable struct Parser
+mutable struct Parser{Dates}
     str::String
     # 1 character look ahead
     current_char::Char
@@ -79,16 +84,11 @@ mutable struct Parser
 
     # Filled in in case we are parsing a file to improve error messages
     filepath::Union{String, Nothing}
-
-    # Get's populated with the Dates stdlib if it exists
-    Dates::Union{Module, Nothing}
 end
 
-const DATES_PKGID = Base.PkgId(Base.UUID("ade2ca70-3891-5945-98fb-dc099432e06a"), "Dates")
-
-function Parser(str::String; filepath=nothing)
+function Parser{Dates}(str::String; filepath=nothing) where {Dates}
     root = TOMLDict()
-    l = Parser(
+    l = Parser{Dates}(
             str,                  # str
             EOF_CHAR,             # current_char
             firstindex(str),      # pos
@@ -103,12 +103,12 @@ function Parser(str::String; filepath=nothing)
             IdSet{Any}(),         # static_arrays
             IdSet{TOMLDict}(),    # defined_tables
             root,
-            filepath,
-            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
+            filepath
         )
     startup(l)
     return l
 end
+
 function startup(l::Parser)
     # Populate our one character look-ahead
     c = eat_char(l)
@@ -119,8 +119,10 @@ function startup(l::Parser)
     end
 end
 
-Parser() = Parser("")
-Parser(io::IO) = Parser(read(io, String))
+Parser{Dates}() where {Dates} = Parser{Dates}("")
+Parser{Dates}(io::IO) where {Dates} = Parser{Dates}(read(io, String))
+
+# Parser(...) will be defined by TOML stdlib
 
 function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
     p.str = str
@@ -146,8 +148,6 @@ end
 # Errors #
 ##########
 
-throw_internal_error(msg) = error("internal TOML parser error: $msg")
-
 # Many functions return a ParserError. We want this to bubble up
 # all the way and have this error be returned to the user
 # if the parse is called with `raise=false`. This macro
@@ -367,7 +367,7 @@ end
 @inline peek(l::Parser) = l.current_char
 
 # Return true if the character was accepted. When a character
-# is accepted it get's eaten and we move to the next character
+# is accepted it gets eaten and we move to the next character
 @inline function accept(l::Parser, f::Union{Function, Char})::Bool
     c = peek(l)
     c == EOF_CHAR && return false
@@ -491,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
         d = d::TOMLDict
         key = dotted_keys[i]
         d = get!(TOMLDict, d, key)
-        if d isa Vector
+        if d isa Vector{Any}
             d = d[end]
+        elseif d isa Vector
+            return ParserError(ErrKeyAlreadyHasValue)
         end
         check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
     end
@@ -533,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
     end
     d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
     k = table_key[end]
-    old = get!(() -> [], d, k)
+    old = get!(() -> Any[], d, k)
     if old isa Vector
         if old in l.static_arrays
             return ParserError(ErrAddArrayToStaticArray)
@@ -542,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
         return ParserError(ErrArrayTreatedAsDictionary)
     end
     d_new = TOMLDict()
-    push!(old, d_new)
+    push!(old::Vector{Any}, d_new)
     push!(l.defined_tables, d_new)
     l.active_table = d_new
 
@@ -611,7 +613,7 @@ function _parse_key(l::Parser)
     else
         set_marker!(l)
         if accept_batch(l, isvalid_barekey_char)
-            if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
+            if !(peek(l) == '.' || iswhitespace(peek(l)) || peek(l) == ']' || peek(l) == '=')
                 c = eat_char(l)
                 return ParserError(ErrInvalidBareKeyCharacter, c)
             end
@@ -664,41 +666,20 @@ end
 # Array #
 #########
 
-function push!!(v::Vector, el)
-    # Since these types are typically non-inferrable, they are a big invalidation risk,
-    # and since it's used by the package-loading infrastructure the cost of invalidation
-    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
-    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
-    # is no ambiguity about what types of objects will be created.
-    T = eltype(v)
-    t = typeof(el)
-    if el isa T || t === T
-        push!(v, el::T)
-        return v
-    elseif T === Union{}
-        out = Vector{t}(undef, 1)
-        out[1] = el
-        return out
-    else
-        if T isa Union
-            newT = Any
-        else
-            newT = Union{T, typeof(el)}
-        end
-        new = Array{newT}(undef, length(v))
-        copy!(new, v)
-        return push!(new, el)
+function copyto_typed!(a::Vector{T}, b::Vector) where T
+    for i in 1:length(b)
+        a[i] = b[i]::T
     end
+    return nothing
 end
 
-function parse_array(l::Parser)::Err{Vector}
+function parse_array(l::Parser{Dates})::Err{Vector} where Dates
     skip_ws_nl(l)
-    array = Vector{Union{}}()
+    array = Vector{Any}()
     empty_array = accept(l, ']')
     while !empty_array
         v = @try parse_value(l)
-        # TODO: Worth to function barrier this?
-        array = push!!(array, v)
+        array = push!(array, v)
         # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
         skip_ws_nl(l)
         comma = accept(l, ',')
@@ -708,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector}
             return ParserError(ErrExpectedCommaBetweenItemsArray)
         end
     end
-    push!(l.static_arrays, array)
-    return array
+    # check for static type throughout array
+    T = !isempty(array) ? typeof(array[1]) : Union{}
+    for el in array
+        if typeof(el) != T
+            T = Any
+            break
+        end
+    end
+    if T === Any
+        new = array
+    elseif T === String
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Bool
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Int64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === UInt64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Float64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Union{}
+        new = Any[]
+    elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) ||
+        (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime)
+        # do nothing, leave as Vector{Any}
+        new = array
+    else @assert false end
+    push!(l.static_arrays, new)
+    return new
 end
 
 
@@ -849,7 +862,7 @@ function parse_number_or_date_start(l::Parser)
     ate, contains_underscore = @try accept_batch_underscore(l, isdigit, readed_zero)
     read_underscore |= contains_underscore
     if (read_digit || ate) && ok_end_value(peek(l))
-        return parse_int(l, contains_underscore)
+        return parse_integer(l, contains_underscore)
     end
     # Done with integers here
 
@@ -895,11 +908,22 @@ end
 function parse_float(l::Parser, contains_underscore)::Err{Float64}
     s = take_string_or_substring(l, contains_underscore)
     v = Base.tryparse(Float64, s)
-    v === nothing && return(ParserError(ErrGenericValueError))
+    v === nothing && return ParserError(ErrGenericValueError)
     return v
 end
 
-for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
+function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64}
+    s = take_string_or_substring(l, contains_underscore)
+    v = try
+        Base.parse(Int64, s; base=base)
+    catch e
+        e isa Base.OverflowError && return ParserError(ErrOverflowError)
+        rethrow()
+    end
+    return v
+end
+
+for (name, T1, T2, n1, n2) in (("integer", Int64,  Int128,  17,  33),
                                ("hex", UInt64, UInt128, 18,  34),
                                ("oct", UInt64, UInt128, 24,  45),
                                ("bin", UInt64, UInt128, 66, 130),
@@ -916,8 +940,8 @@ for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
                 Base.parse(BigInt, s; base)
             end
         catch e
-            e isa Base.OverflowError && return(ParserError(ErrOverflowError))
-            error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+            e isa Base.OverflowError && return ParserError(ErrOverflowError)
+            rethrow()
         end
         return v
     end
@@ -1009,26 +1033,26 @@ function parse_datetime(l)
     return try_return_datetime(l, year, month, day, h, m, s, ms)
 end
 
-function try_return_datetime(p, year, month, day, h, m, s, ms)
-    Dates = p.Dates
+function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.DateTime(year, month, day, h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return DateTime(year, month, day, h, m, s, ms)
     end
 end
 
-function try_return_date(p, year, month, day)
-    Dates = p.Dates
+function try_return_date(p::Parser{Dates}, year, month, day) where Dates
     if Dates !== nothing
         try
             return Dates.Date(year, month, day)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Date(year, month, day)
@@ -1044,13 +1068,13 @@ function parse_local_time(l::Parser)
     return try_return_time(l, h, m, s, ms)
 end
 
-function try_return_time(p, h, m, s, ms)
-    Dates = p.Dates
+function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.Time(h, m, s, ms)
-        catch
-            return ParserError(ErrParsingDateTime)
+        catch ex
+            ex isa ArgumentError && return ParserError(ErrParsingDateTime)
+            rethrow()
         end
     else
         return Time(h, m, s, ms)
@@ -1098,7 +1122,7 @@ function _parse_local_time(l::Parser, skip_hour=false)::Err{NTuple{4, Int64}}
         end
         # DateTime in base only manages 3 significant digits in fractional
         # second
-        fractional_second = parse_int(l, false)
+        fractional_second = parse_int(l, false)::Int64
         # Truncate off the rest eventual digits
         accept_batch(l, isdigit)
     end
diff --git a/base/ttyhascolor.jl b/base/ttyhascolor.jl
deleted file mode 100644
index 5984dba6d592e..0000000000000
--- a/base/ttyhascolor.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if Sys.iswindows()
-    ttyhascolor(term_type = nothing) = true
-else
-    function ttyhascolor(term_type = get(ENV, "TERM", ""))
-        startswith(term_type, "xterm") && return true
-        try
-            @static if Sys.KERNEL === :FreeBSD
-                return success(`tput AF 0`)
-            else
-                return success(`tput setaf 0`)
-            end
-        catch e
-            return false
-        end
-    end
-end
-function get_have_color()
-    global have_color
-    have_color === nothing && (have_color = ttyhascolor())
-    return have_color::Bool
-end
-in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
-haskey(::TTY, key::Symbol) = key === :color
-getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
-get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/tuple.jl b/base/tuple.jl
index 59fe2c1e531e1..ee3174d783531 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -28,10 +28,9 @@ firstindex(@nospecialize t::Tuple) = 1
 lastindex(@nospecialize t::Tuple) = length(t)
 size(@nospecialize(t::Tuple), d::Integer) = (d == 1) ? length(t) : throw(ArgumentError("invalid tuple dimension $d"))
 axes(@nospecialize t::Tuple) = (OneTo(length(t)),)
-@eval getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, $(Expr(:boundscheck)))
-@eval getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
-__inbounds_getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, false)
-__inbounds_getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), false)
+getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, @_boundscheck)
+getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), @_boundscheck)
+__safe_getindex(@nospecialize(t::Tuple), i::Int) = (@_nothrow_noub_meta; getfield(t, i, false))
 getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...,)
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
@@ -42,9 +41,9 @@ get(f::Callable, t::Tuple, i::Integer) = i in 1:length(t) ? getindex(t, i) : f()
 # returns new tuple; N.B.: becomes no-op if `i` is out-of-bounds
 
 """
-    setindex(c::Tuple, v, i::Integer)
+    setindex(t::Tuple, v, i::Integer)
 
-Creates a new tuple similar to `x` with the value at index `i` set to `v`.
+Creates a new tuple similar to `t` with the value at index `i` set to `v`.
 Throws a `BoundsError` when out of bounds.
 
 # Examples
@@ -61,7 +60,7 @@ end
 
 function _setindex(v, i::Integer, args::Vararg{Any,N}) where {N}
     @inline
-    return ntuple(j -> ifelse(j == i, v, args[j]), Val{N}())
+    return ntuple(j -> ifelse(j == i, v, args[j]), Val{N}())::NTuple{N, Any}
 end
 
 
@@ -69,11 +68,80 @@ end
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
     @inline
+    @_nothrow_meta
     return (1 <= i <= length(t)) ? (t[i], i + 1) : nothing
 end
 
 keys(@nospecialize t::Tuple) = OneTo(length(t))
 
+"""
+    prevind(A, i)
+
+Return the index before `i` in `A`. The returned index is often equivalent to
+`i - 1` for an integer `i`. This function can be useful for generic code.
+
+!!! warning
+    The returned index might be out of bounds. Consider using
+    [`checkbounds`](@ref).
+
+See also: [`nextind`](@ref).
+
+# Examples
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> prevind(x, 4) # valid result
+3
+
+julia> prevind(x, 1) # invalid result
+0
+
+julia> prevind(x, CartesianIndex(2, 2)) # valid result
+CartesianIndex(1, 2)
+
+julia> prevind(x, CartesianIndex(1, 1)) # invalid result
+CartesianIndex(2, 0)
+```
+"""
+function prevind end
+
+"""
+    nextind(A, i)
+
+Return the index after `i` in `A`. The returned index is often equivalent to
+`i + 1` for an integer `i`. This function can be useful for generic code.
+
+!!! warning
+    The returned index might be out of bounds. Consider using
+    [`checkbounds`](@ref).
+
+See also: [`prevind`](@ref).
+
+# Examples
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> nextind(x, 1) # valid result
+2
+
+julia> nextind(x, 4) # invalid result
+5
+
+julia> nextind(x, CartesianIndex(1, 1)) # valid result
+CartesianIndex(2, 1)
+
+julia> nextind(x, CartesianIndex(2, 2)) # invalid result
+CartesianIndex(1, 3)
+```
+"""
+function nextind end
+
 prevind(@nospecialize(t::Tuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::Tuple), i::Integer) = Int(i)+1
 
@@ -199,41 +267,31 @@ first(t::Tuple) = t[1]
 # eltype
 
 eltype(::Type{Tuple{}}) = Bottom
-function eltype(t::Type{<:Tuple{Vararg{E}}}) where {E}
-    if @isdefined(E)
-        return E
-    else
-        # TODO: need to guard against E being miscomputed by subtyping (ref #23017)
-        # and compute the result manually in this case
-        return _compute_eltype(t)
-    end
-end
+# the <: here makes the runtime a bit more complicated (needing to check isdefined), but really helps inference
+eltype(t::Type{<:Tuple{Vararg{E}}}) where {E} = @isdefined(E) ? (E isa Type ? E : Union{}) : _compute_eltype(t)
 eltype(t::Type{<:Tuple}) = _compute_eltype(t)
-function _tuple_unique_fieldtypes(@nospecialize t)
+function _compute_eltype(@nospecialize t)
     @_total_meta
-    types = IdSet()
+    has_free_typevars(t) && return Any
     t´ = unwrap_unionall(t)
     # Given t = Tuple{Vararg{S}} where S<:Real, the various
     # unwrapping/wrapping/va-handling here will return Real
     if t´ isa Union
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.a, t)))
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.b, t)))
-    else
-        for ti in (t´::DataType).parameters
-            push!(types, rewrap_unionall(unwrapva(ti), t))
-        end
+        return promote_typejoin(_compute_eltype(rewrap_unionall(t´.a, t)),
+                                _compute_eltype(rewrap_unionall(t´.b, t)))
     end
-    return Core.svec(types...)
-end
-function _compute_eltype(@nospecialize t)
-    @_total_meta # TODO: the compiler shouldn't need this
-    types = _tuple_unique_fieldtypes(t)
-    return afoldl(types...) do a, b
-        # if we've already reached Any, it can't widen any more
-        a === Any && return Any
-        b === Any && return Any
-        return promote_typejoin(a, b)
+    p = (t´::DataType).parameters
+    length(p) == 0 && return Union{}
+    elt = rewrap_unionall(unwrapva(p[1]), t)
+    elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+    r = elt
+    for i in 2:length(p)
+        r === Any && return r # if we've already reached Any, it can't widen any more
+        elt = rewrap_unionall(unwrapva(p[i]), t)
+        elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+        r = promote_typejoin(elt, r)
     end
+    return r
 end
 
 # We'd like to be able to infer eltype(::Tuple), which needs to be able to
@@ -251,6 +309,13 @@ end
 #  @ tuple.jl:209
 typeof(function eltype end).name.max_methods = UInt8(4)
 
+# key/val types
+keytype(@nospecialize t::Tuple) = keytype(typeof(t))
+keytype(@nospecialize T::Type{<:Tuple}) = Int
+
+valtype(@nospecialize t::Tuple) = valtype(typeof(t))
+valtype(@nospecialize T::Type{<:Tuple}) = eltype(T)
+
 # version of tail that doesn't throw on empty tuples (used in array indexing)
 safe_tail(t::Tuple) = tail(t)
 safe_tail(t::Tuple{}) = ()
@@ -332,7 +397,7 @@ end
 # n argument function
 heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
-map(f, ::Tuple{}...) = ()
+map(f, ::Tuple{}, ::Tuple{}...) = ()
 anyempty(x::Tuple{}, xs...) = true
 anyempty(x::Tuple, xs...) = anyempty(xs...)
 anyempty() = false
@@ -362,10 +427,6 @@ fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 
 # constructing from an iterator
 
-# only define these in Base, to avoid overwriting the constructors
-# NOTE: this means this constructor must be avoided in Core.Compiler!
-if nameof(@__MODULE__) === :Base
-
 function tuple_type_tail(T::Type)
     @_foldable_meta # TODO: this method is wrong (and not :foldable)
     if isa(T, UnionAll)
@@ -394,7 +455,7 @@ _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
     @noinline
-    throw(ArgumentError("too few elements for tuple type $T"))
+    throw(ArgumentError(LazyString("too few elements for tuple type ", T)))
 end
 
 function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
@@ -428,16 +489,15 @@ _totuple(::Type{Tuple}, itr, s...) = (collect(Iterators.rest(itr,s...))...,)
 _totuple(::Type{Tuple}, itr::Array) = (itr...,)
 _totuple(::Type{Tuple}, itr::SimpleVector) = (itr...,)
 _totuple(::Type{Tuple}, itr::NamedTuple) = (itr...,)
+_totuple(::Type{Tuple}, p::Pair) = (p.first, p.second)
 _totuple(::Type{Tuple}, x::Number) = (x,) # to make Tuple(x) inferable
 
-end
-
 ## find ##
 
 _findfirst_rec(f, i::Int, ::Tuple{}) = nothing
 _findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
 function _findfirst_loop(f::Function, t)
-    for i in 1:length(t)
+    for i in eachindex(t)
         f(t[i]) && return i
     end
     return nothing
@@ -471,7 +531,7 @@ function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
     return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
 end
 function _isequal(t1::Any32, t2::Any32)
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         if !isequal(t1[i], t2[i])
             return false
         end
@@ -502,7 +562,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
 end
 function _eq(t1::Any32, t2::Any32)
     anymissing = false
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         eq = (t1[i] == t2[i])
         if ismissing(eq)
             anymissing = true
@@ -598,7 +658,9 @@ all(x::Tuple{}) = true
 all(x::Tuple{Bool}) = x[1]
 all(x::Tuple{Bool, Bool}) = x[1]&x[2]
 all(x::Tuple{Bool, Bool, Bool}) = x[1]&x[2]&x[3]
-# use generic reductions for the rest
+all(x::Tuple{Any}) = x[1] || return false
+all(f, x::Tuple{}) = true
+all(f, x::Tuple{Any}) = all((f(x[1]),))
 
 any(x::Tuple{}) = false
 any(x::Tuple{Bool}) = x[1]
@@ -606,7 +668,7 @@ any(x::Tuple{Bool, Bool}) = x[1]|x[2]
 any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
 
 # a version of `in` esp. for NamedTuple, to make it pure, and not compiled for each tuple length
-function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
+function sym_in(x::Symbol, itr::Tuple{Vararg{Symbol}})
     @noinline
     @_total_meta
     for y in itr
@@ -614,7 +676,7 @@ function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
     end
     return false
 end
-in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
+in(x::Symbol, itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
 
 
 """
@@ -625,4 +687,13 @@ Return an empty tuple, `()`.
 empty(@nospecialize x::Tuple) = ()
 
 foreach(f, itr::Tuple) = foldl((_, x) -> (f(x); nothing), itr, init=nothing)
-foreach(f, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itrs...), init=nothing)
+foreach(f, itr::Tuple, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itr, itrs...), init=nothing)
+
+circshift((@nospecialize t::Union{Tuple{},Tuple{Any}}), @nospecialize _::Integer) = t
+circshift(t::Tuple{Any,Any}, shift::Integer) = iseven(shift) ? t : reverse(t)
+function circshift(x::Tuple{Any,Any,Any,Vararg{Any,N}}, shift::Integer) where {N}
+    @inline
+    len = N + 3
+    j = mod1(shift, len)
+    ntuple(k -> getindex(x, k-j+ifelse(k>j,0,len)), Val(len))::Tuple
+end
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index d91a04371230c..6928d420a3860 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -278,6 +278,7 @@ big(x::TwicePrecision) = big(x.hi) + big(x.lo)
 
 -(x::TwicePrecision) = TwicePrecision(-x.hi, -x.lo)
 
+zero(x::TwicePrecision) = zero(typeof(x))
 function zero(::Type{TwicePrecision{T}}) where {T}
     z = zero(T)
     TwicePrecision{T}(z, z)
@@ -476,9 +477,7 @@ end
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     T(x_hi + (x_lo + (shift_lo + r.ref.lo)))
@@ -486,7 +485,7 @@ end
 
 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo))
@@ -787,3 +786,19 @@ _tp_prod(t::TwicePrecision) = t
     x.hi < y.hi || ((x.hi == y.hi) & (x.lo < y.lo))
 
 isbetween(a, x, b) = a <= x <= b || b <= x <= a
+
+# These functions exist for use in LogRange:
+
+_exp_allowing_twice64(x::Number) = exp(x)
+_exp_allowing_twice64(x::TwicePrecision{Float64}) = Math.exp_impl(x.hi, x.lo, Val(:ℯ))
+
+# No error on negative x, and for NaN/Inf this returns junk:
+function _log_twice64_unchecked(x::Float64)
+    xu = reinterpret(UInt64, x)
+    if xu < (UInt64(1)<<52) # x is subnormal
+        xu = reinterpret(UInt64, x * 0x1p52) # normalize x
+        xu &= ~sign_mask(Float64)
+        xu -= UInt64(52) << 52 # mess with the exponent
+    end
+    TwicePrecision(Math._log_ext(xu)...)
+end
diff --git a/base/util.jl b/base/util.jl
index 4dcb819292ff8..c01ff697e64e3 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -135,8 +135,8 @@ See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
 
 !!! compat "Julia 1.7"
     Keywords except `color` and `bold` were added in Julia 1.7.
-!!! compat "Julia 1.9"
-    Support for italic output was added in Julia 1.9.
+!!! compat "Julia 1.10"
+    Support for italic output was added in Julia 1.10.
 """
 @constprop :none printstyled(io::IO, msg...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
     with_output_color(print, color, io, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
@@ -144,7 +144,7 @@ See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
     printstyled(stdout, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
-    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target)
+    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String}=nothing)
 
 Return a julia command similar to the one of the running process.
 Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`,
@@ -154,6 +154,8 @@ command line arguments that are not at their default values.
 
 Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notably not propagated currently.
 
+Unless set to `nothing`, the `cpu_target` keyword argument can be used to override the CPU target set for the running process.
+
 To get the julia command without propagated command line arguments, `julia_cmd()[1]` can be used.
 
 !!! compat "Julia 1.1"
@@ -163,8 +165,7 @@ To get the julia command without propagated command line arguments, `julia_cmd()
     The flags `--color` and `--startup-file` were added in Julia 1.5.
 
 !!! compat "Julia 1.9"
-    The keyword argument `cpu_target` was added.
-
+    The keyword argument `cpu_target` was added in 1.9.
     The flag `--pkgimages` was added in Julia 1.9.
 """
 function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String} = nothing)
@@ -205,6 +206,10 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
     end
     opts.can_inline == 0 && push!(addflags, "--inline=no")
     opts.use_compiled_modules == 0 && push!(addflags, "--compiled-modules=no")
+    opts.use_compiled_modules == 2 && push!(addflags, "--compiled-modules=existing")
+    opts.use_compiled_modules == 3 && push!(addflags, "--compiled-modules=strict")
+    opts.use_pkgimages == 0 && push!(addflags, "--pkgimages=no")
+    opts.use_pkgimages == 2 && push!(addflags, "--pkgimages=existing")
     opts.opt_level == 2 || push!(addflags, "-O$(opts.opt_level)")
     opts.opt_level_min == 0 || push!(addflags, "--min-optlevel=$(opts.opt_level_min)")
     push!(addflags, "-g$(opts.debug_level)")
@@ -240,17 +245,11 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
     if opts.use_sysimage_native_code == 0
         push!(addflags, "--sysimage-native-code=no")
     end
-    if opts.use_pkgimages == 0
-        push!(addflags, "--pkgimages=no")
-    else
-        # If pkgimage is set, malloc_log and code_coverage should not
-        @assert opts.malloc_log == 0 && opts.code_coverage == 0
-    end
-    return `$julia -C$cpu_target -J$image_file $addflags`
+    return `$julia -C $cpu_target -J$image_file $addflags`
 end
 
 function julia_exename()
-    if !Base.isdebugbuild()
+    if !isdebugbuild()
         return @static Sys.iswindows() ? "julia.exe" : "julia"
     else
         return @static Sys.iswindows() ? "julia-debug.exe" : "julia-debug"
@@ -272,15 +271,29 @@ function securezero! end
 unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len))
 
 """
-    Base.getpass(message::AbstractString) -> Base.SecretBuffer
+    Base.getpass(message::AbstractString; with_suffix::Bool=true) -> Base.SecretBuffer
 
 Display a message and wait for the user to input a secret, returning an `IO`
-object containing the secret.
+object containing the secret. If `with_suffix` is `true` (the default), the
+suffix `": "` will be appended to `message`.
 
 !!! info "Windows"
     Note that on Windows, the secret might be displayed as it is typed; see
     `Base.winprompt` for securely retrieving username/password pairs from a
     graphical interface.
+
+!!! compat "Julia 1.12"
+    The `with_suffix` keyword argument requires at least Julia 1.12.
+
+# Examples
+
+```julia-repl
+julia> Base.getpass("Secret")
+Secret: SecretBuffer("*******")
+
+julia> Base.getpass("Secret> "; with_suffix=false)
+Secret> SecretBuffer("*******")
+```
 """
 function getpass end
 
@@ -340,11 +353,13 @@ function with_raw_tty(f::Function, input::TTY)
     end
 end
 
-function getpass(input::TTY, output::IO, prompt::AbstractString)
+function getpass(input::TTY, output::IO, prompt::AbstractString; with_suffix::Bool=true)
     input === stdin || throw(ArgumentError("getpass only works for stdin"))
     with_raw_tty(stdin) do
-        print(output, prompt, ": ")
+        print(output, prompt)
+        with_suffix && print(output, ": ")
         flush(output)
+
         s = SecretBuffer()
         plen = 0
         while true
@@ -365,7 +380,7 @@ end
 
 # allow new getpass methods to be defined if stdin has been
 # redirected to some custom stream, e.g. in IJulia.
-getpass(prompt::AbstractString) = getpass(stdin, stdout, prompt)
+getpass(prompt::AbstractString; with_suffix::Bool=true) = getpass(stdin, stdout, prompt; with_suffix)
 
 """
     prompt(message; default="") -> Union{String, Nothing}
@@ -376,7 +391,7 @@ then the user can enter just a newline character to select the `default`.
 
 See also `Base.winprompt` (for Windows) and `Base.getpass` for secure entry of passwords.
 
-# Example
+# Examples
 
 ```julia-repl
 julia> your_name = Base.prompt("Enter your name");
@@ -492,8 +507,10 @@ unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_
 
 _crc32c(a::NTuple{<:Any, UInt8}, crc::UInt32=0x00000000) =
     unsafe_crc32c(Ref(a), length(a) % Csize_t, crc)
-_crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
+
+function _crc32c(a::DenseBytes, crc::UInt32=0x00000000)
     unsafe_crc32c(a, length(a) % Csize_t, crc)
+end
 
 function _crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
     unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
@@ -513,7 +530,6 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
 end
 _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc)
 _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc)
-_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc)
 _crc32c(x::UInt128, crc::UInt32=0x00000000) =
     ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16)
 _crc32c(x::UInt64, crc::UInt32=0x00000000) =
@@ -565,24 +581,31 @@ Stacktrace:
 macro kwdef(expr)
     expr = macroexpand(__module__, expr) # to expand @static
     isexpr(expr, :struct) || error("Invalid usage of @kwdef")
-    T = expr.args[2]
+    _, T, fieldsblock = expr.args
     if T isa Expr && T.head === :<:
         T = T.args[1]
     end
 
-    params_ex = Expr(:parameters)
-    call_args = Any[]
+    fieldnames = Any[]
+    defvals = Any[]
+    extract_names_and_defvals_from_kwdef_fieldblock!(fieldsblock, fieldnames, defvals)
+    parameters = map(fieldnames, defvals) do fieldname, defval
+        if isnothing(defval)
+            return fieldname
+        else
+            return Expr(:kw, fieldname, esc(defval))
+        end
+    end
 
-    _kwdef!(expr.args[3], params_ex.args, call_args)
     # Only define a constructor if the type has fields, otherwise we'll get a stack
     # overflow on construction
-    if !isempty(params_ex.args)
-        if T isa Symbol
-            sig = :(($(esc(T)))($params_ex))
-            call = :(($(esc(T)))($(call_args...)))
-            body = Expr(:block, __source__, call)
+    if !isempty(parameters)
+        T_no_esc = Meta.unescape(T)
+        if T_no_esc isa Symbol
+            sig = Expr(:call, esc(T), Expr(:parameters, parameters...))
+            body = Expr(:block, __source__, Expr(:call, esc(T), fieldnames...))
             kwdefs = Expr(:function, sig, body)
-        elseif isexpr(T, :curly)
+        elseif isexpr(T_no_esc, :curly)
             # if T == S{A<:AA,B<:BB}, define two methods
             #   S(...) = ...
             #   S{A,B}(...) where {A<:AA,B<:BB} = ...
@@ -590,11 +613,11 @@ macro kwdef(expr)
             P = T.args[2:end]
             Q = Any[isexpr(U, :<:) ? U.args[1] : U for U in P]
             SQ = :($S{$(Q...)})
-            body1 = Expr(:block, __source__, :(($(esc(S)))($(call_args...))))
-            sig1 = :(($(esc(S)))($params_ex))
+            body1 = Expr(:block, __source__, Expr(:call, esc(S), fieldnames...))
+            sig1 = Expr(:call, esc(S), Expr(:parameters, parameters...))
             def1 = Expr(:function, sig1, body1)
-            body2 = Expr(:block, __source__, :(($(esc(SQ)))($(call_args...))))
-            sig2 = :(($(esc(SQ)))($params_ex) where {$(esc.(P)...)})
+            body2 = Expr(:block, __source__, Expr(:call, esc(SQ), fieldnames...))
+            sig2 = :($(Expr(:call, esc(SQ), Expr(:parameters, parameters...))) where {$(esc.(P)...)})
             def2 = Expr(:function, sig2, body2)
             kwdefs = Expr(:block, def1, def2)
         else
@@ -611,61 +634,51 @@ end
 
 # @kwdef helper function
 # mutates arguments inplace
-function _kwdef!(blk, params_args, call_args)
-    for i in eachindex(blk.args)
-        ei = blk.args[i]
-        if ei isa Symbol
-            #  var
-            push!(params_args, ei)
-            push!(call_args, ei)
-        elseif ei isa Expr
-            is_atomic = ei.head === :atomic
-            ei = is_atomic ? first(ei.args) : ei # strip "@atomic" and add it back later
-            is_const = ei.head === :const
-            ei = is_const ? first(ei.args) : ei # strip "const" and add it back later
-            # Note: `@atomic const ..` isn't valid, but reconstruct it anyway to serve a nice error
-            if ei isa Symbol
-                # const var
-                push!(params_args, ei)
-                push!(call_args, ei)
-            elseif ei.head === :(=)
-                lhs = ei.args[1]
-                if lhs isa Symbol
-                    #  var = defexpr
-                    var = lhs
-                elseif lhs isa Expr && lhs.head === :(::) && lhs.args[1] isa Symbol
-                    #  var::T = defexpr
-                    var = lhs.args[1]
-                else
-                    # something else, e.g. inline inner constructor
-                    #   F(...) = ...
-                    continue
+function extract_names_and_defvals_from_kwdef_fieldblock!(block, names, defvals)
+    for (i, item) in pairs(block.args)
+        if isexpr(item, :block)
+            extract_names_and_defvals_from_kwdef_fieldblock!(item, names, defvals)
+        elseif item isa Expr && item.head in (:escape, :var"hygienic-scope")
+            n = length(names)
+            extract_names_and_defvals_from_kwdef_fieldblock!(item, names, defvals)
+            for j in n+1:length(defvals)
+                if !isnothing(defvals[j])
+                    defvals[j] = Expr(item.head, defvals[j])
                 end
-                defexpr = ei.args[2]  # defexpr
-                push!(params_args, Expr(:kw, var, esc(defexpr)))
-                push!(call_args, var)
-                lhs = is_const ? Expr(:const, lhs) : lhs
-                lhs = is_atomic ? Expr(:atomic, lhs) : lhs
-                blk.args[i] = lhs # overrides arg
-            elseif ei.head === :(::) && ei.args[1] isa Symbol
-                # var::Typ
-                var = ei.args[1]
-                push!(params_args, var)
-                push!(call_args, var)
-            elseif ei.head === :block
-                # can arise with use of @static inside type decl
-                _kwdef!(ei, params_args, call_args)
             end
+        else
+            def, name, defval = @something(def_name_defval_from_kwdef_fielddef(item), continue)
+            block.args[i] = def
+            push!(names, name)
+            push!(defvals, defval)
         end
     end
-    blk
+end
+
+function def_name_defval_from_kwdef_fielddef(kwdef)
+    if kwdef isa Symbol
+        return kwdef, kwdef, nothing
+    elseif isexpr(kwdef, :(::))
+        name, _ = kwdef.args
+        return kwdef, Meta.unescape(name), nothing
+    elseif isexpr(kwdef, :(=))
+        lhs, rhs = kwdef.args
+        def, name, _ = @something(def_name_defval_from_kwdef_fielddef(lhs), return nothing)
+        return def, name, rhs
+    elseif kwdef isa Expr && kwdef.head in (:const, :atomic)
+        def, name, defval = @something(def_name_defval_from_kwdef_fielddef(kwdef.args[1]), return nothing)
+        return Expr(kwdef.head, def), name, defval
+    elseif kwdef isa Expr && kwdef.head in (:escape, :var"hygienic-scope")
+        def, name, defval = @something(def_name_defval_from_kwdef_fielddef(kwdef.args[1]), return nothing)
+        return Expr(kwdef.head, def), name, isnothing(defval) ? defval : Expr(kwdef.head, defval)
+    end
 end
 
 # testing
 
 """
     Base.runtests(tests=["all"]; ncores=ceil(Int, Sys.CPU_THREADS / 2),
-                  exit_on_error=false, revise=false, [seed])
+                  exit_on_error=false, revise=false, propagate_project=true, [seed])
 
 Run the Julia unit tests listed in `tests`, which can be either a string or an array of
 strings, using `ncores` processors. If `exit_on_error` is `false`, when one test
@@ -673,12 +686,14 @@ fails, all remaining tests in other files will still be run; they are otherwise
 when `exit_on_error == true`.
 If `revise` is `true`, the `Revise` package is used to load any modifications to `Base` or
 to the standard libraries before running the tests.
+If `propagate_project` is true the current project is propagated to the test environment.
 If a seed is provided via the keyword argument, it is used to seed the
 global RNG in the context where the tests are run; otherwise the seed is chosen randomly.
 """
 function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
                   exit_on_error::Bool=false,
                   revise::Bool=false,
+                  propagate_project::Bool=false,
                   seed::Union{BitInteger,Nothing}=nothing)
     if isa(tests,AbstractString)
         tests = split(tests)
@@ -690,17 +705,19 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
     ENV2["JULIA_CPU_THREADS"] = "$ncores"
     pathsep = Sys.iswindows() ? ";" : ":"
     ENV2["JULIA_DEPOT_PATH"] = string(mktempdir(; cleanup = true), pathsep) # make sure the default depots can be loaded
-    delete!(ENV2, "JULIA_LOAD_PATH")
+    ENV2["JULIA_LOAD_PATH"] = string("@", pathsep, "@stdlib")
+    ENV2["JULIA_TESTS"] = "true"
     delete!(ENV2, "JULIA_PROJECT")
+    project_flag = propagate_project ? `--project` : ``
     try
-        run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR,
+        run(setenv(`$(julia_cmd()) $project_flag $(joinpath(Sys.BINDIR,
             Base.DATAROOTDIR, "julia", "test", "runtests.jl")) $tests`, ENV2))
         nothing
     catch
         buf = PipeBuffer()
-        original_load_path = copy(Base.LOAD_PATH); empty!(Base.LOAD_PATH); pushfirst!(Base.LOAD_PATH, "@stdlib")
-        Base.require(Base, :InteractiveUtils).versioninfo(buf)
-        empty!(Base.LOAD_PATH); append!(Base.LOAD_PATH, original_load_path)
+        let InteractiveUtils = Base.require_stdlib(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
+            @invokelatest InteractiveUtils.versioninfo(buf)
+        end
         error("A test has failed. Please submit a bug report (https://github.com/JuliaLang/julia/issues)\n" *
               "including error messages above and the output of versioninfo():\n$(read(buf, String))")
     end
diff --git a/base/uuid.jl b/base/uuid.jl
index ff4df68ddb7c8..4b9bae863d926 100644
--- a/base/uuid.jl
+++ b/base/uuid.jl
@@ -36,6 +36,8 @@ let
     Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h))
 end
 
+_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc)
+
 let
 @inline function uuid_kernel(s, i, u)
     _c = UInt32(@inbounds codeunit(s, i))
@@ -90,18 +92,18 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1]
     global string
     function string(u::UUID)
         u = u.value
-        a = Base.StringVector(36)
+        a = Base.StringMemory(36)
         for i in groupings
             @inbounds a[i] = hex_chars[1 + u & 0xf]
             u >>= 4
         end
         @inbounds a[24] = a[19] = a[14] = a[9] = '-'
-        return String(a)
+        return unsafe_takestring(a)
     end
 end
 
 print(io::IO, u::UUID) = print(io, string(u))
-show(io::IO, u::UUID) = print(io, "UUID(\"", u, "\")")
+show(io::IO, u::UUID) = print(io, UUID, "(\"", u, "\")")
 
 isless(a::UUID, b::UUID) = isless(a.value, b.value)
 
diff --git a/base/version.jl b/base/version.jl
index 67377c86a8493..b362daa78f04f 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -9,12 +9,19 @@ const VInt = UInt32
     VersionNumber
 
 Version number type which follows the specifications of
-[semantic versioning (semver)](https://semver.org/), composed of major, minor
+[semantic versioning (semver)](https://semver.org/spec/v2.0.0-rc.2.html), composed of major, minor
 and patch numeric values, followed by pre-release and build
-alpha-numeric annotations.
+alphanumeric annotations.
 
 `VersionNumber` objects can be compared with all of the standard comparison
-operators (`==`, `<`, `<=`, etc.), with the result following semver rules.
+operators (`==`, `<`, `<=`, etc.), with the result following semver v2.0.0-rc.2 rules.
+
+`VersionNumber` has the following public fields:
+- `v.major::Integer`
+- `v.minor::Integer`
+- `v.patch::Integer`
+- `v.prerelease::Tuple{Vararg{Union{Integer, AbstractString}}}`
+- `v.build::Tuple{Vararg{Union{Integer, AbstractString}}}`
 
 See also [`@v_str`](@ref) to efficiently construct `VersionNumber` objects
 from semver-format literal strings, [`VERSION`](@ref) for the `VersionNumber`
@@ -44,8 +51,7 @@ struct VersionNumber
     build::VerTuple
 
     function VersionNumber(major::VInt, minor::VInt, patch::VInt,
-            pre::VerTuple,
-            bld::VerTuple)
+                           @nospecialize(pre::VerTuple), @nospecialize(bld::VerTuple))
         major >= 0 || throw(ArgumentError("invalid negative major version: $major"))
         minor >= 0 || throw(ArgumentError("invalid negative minor version: $minor"))
         patch >= 0 || throw(ArgumentError("invalid negative patch version: $patch"))
@@ -172,7 +178,7 @@ ident_cmp(a::Integer, b::String ) = isempty(b) ? +1 : -1
 ident_cmp(a::String,  b::Integer) = isempty(a) ? -1 : +1
 ident_cmp(a::String,  b::String ) = cmp(a, b)
 
-function ident_cmp(A::VerTuple, B::VerTuple)
+function ident_cmp(@nospecialize(A::VerTuple), @nospecialize(B::VerTuple))
     for (a, b) in Iterators.Zip{Tuple{VerTuple,VerTuple}}((A, B))
         c = ident_cmp(a, b)
         (c != 0) && return c
@@ -266,59 +272,3 @@ else
 end
 
 libllvm_path() = ccall(:jl_get_libllvm, Any, ())
-
-function banner(io::IO = stdout)
-    if GIT_VERSION_INFO.tagged_commit
-        commit_string = TAGGED_RELEASE_BANNER
-    elseif isempty(GIT_VERSION_INFO.commit)
-        commit_string = ""
-    else
-        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
-        days = max(0, days)
-        unit = days == 1 ? "day" : "days"
-        distance = GIT_VERSION_INFO.fork_master_distance
-        commit = GIT_VERSION_INFO.commit_short
-
-        if distance == 0
-            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
-        else
-            branch = GIT_VERSION_INFO.branch
-            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
-        end
-    end
-
-    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
-
-    if get(io, :color, false)::Bool
-        c = text_colors
-        tx = c[:normal] # text
-        jl = c[:normal] # julia
-        d1 = c[:bold] * c[:blue]    # first dot
-        d2 = c[:bold] * c[:red]     # second dot
-        d3 = c[:bold] * c[:green]   # third dot
-        d4 = c[:bold] * c[:magenta] # fourth dot
-
-        print(io,"""               $(d3)_$(tx)
-           $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
-          $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
-           $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
-          $(jl)| | | | | | |/ _` |$(tx)  |
-          $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
-         $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
-        $(jl)|__/$(tx)                   |
-
-        """)
-    else
-        print(io,"""
-                       _
-           _       _ _(_)_     |  Documentation: https://docs.julialang.org
-          (_)     | (_) (_)    |
-           _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
-          | | | | | | |/ _` |  |
-          | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
-         _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
-        |__/                   |
-
-        """)
-    end
-end
diff --git a/base/views.jl b/base/views.jl
index 70d4c1d9110ee..6898abdda1471 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -123,20 +123,21 @@ julia> A
 ```
 """
 macro view(ex)
+    Meta.isexpr(ex, :ref) || throw(ArgumentError(
+        "Invalid use of @view macro: argument must be a reference expression A[...]."))
+    ex = replace_ref_begin_end!(ex)
+    # NOTE We embed `view` as a function object itself directly into the AST.
+    #      By doing this, we prevent the creation of function definitions like
+    #      `view(A, idx) = xxx` in cases such as `@view(A[idx]) = xxx.`
     if Meta.isexpr(ex, :ref)
-        ex = replace_ref_begin_end!(ex)
-        if Meta.isexpr(ex, :ref)
-            ex = Expr(:call, view, ex.args...)
-        else # ex replaced by let ...; foo[...]; end
-            if !(Meta.isexpr(ex, :let) && Meta.isexpr(ex.args[2], :ref))
-                error("invalid expression")
-            end
-            ex.args[2] = Expr(:call, view, ex.args[2].args...)
-        end
-        Expr(:&&, true, esc(ex))
+        ex = Expr(:call, view, ex.args...)
+    elseif Meta.isexpr(ex, :let) && (arg2 = ex.args[2]; Meta.isexpr(arg2, :ref))
+        # ex replaced by let ...; foo[...]; end
+        ex.args[2] = Expr(:call, view, arg2.args...)
     else
-        throw(ArgumentError("Invalid use of @view macro: argument must be a reference expression A[...]."))
+        error("invalid expression")
     end
+    return esc(ex)
 end
 
 ############################################################################
@@ -224,16 +225,16 @@ Similarly, `@views` converts string slices into [`SubString`](@ref) views.
     occurs in functions called by that code.
 
 !!! compat "Julia 1.5"
-    Using `begin` in an indexing expression to refer to the first index requires at least
-    Julia 1.5.
+    Using `begin` in an indexing expression to refer to the first index was implemented
+    in Julia 1.4, but was only supported by `@views` starting in Julia 1.5.
 
 # Examples
 ```jldoctest
 julia> A = zeros(3, 3);
 
 julia> @views for row in 1:3
-           b = A[row, :]
-           b[:] .= row
+           b = A[row, :] # b is a view, not a copy
+           b .= row      # assign every element to the row index
        end
 
 julia> A
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index 328f368c80b71..1a98bf1ee4333 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -54,17 +54,7 @@ WeakKeyDict(ps::Pair{K}...)             where {K}   = WeakKeyDict{K,Any}(ps)
 WeakKeyDict(ps::(Pair{K,V} where K)...) where {V}   = WeakKeyDict{Any,V}(ps)
 WeakKeyDict(ps::Pair...)                            = WeakKeyDict{Any,Any}(ps)
 
-function WeakKeyDict(kv)
-    try
-        Base.dict_with_eltype((K, V) -> WeakKeyDict{K, V}, kv, eltype(kv))
-    catch
-        if !isiterable(typeof(kv)) || !all(x->isa(x,Union{Tuple,Pair}),kv)
-            throw(ArgumentError("WeakKeyDict(kv): kv needs to be an iterator of tuples or pairs"))
-        else
-            rethrow()
-        end
-    end
-end
+WeakKeyDict(kv) = Base.dict_with_eltype((K, V) -> WeakKeyDict{K, V}, kv, eltype(kv))
 
 function _cleanup_locked(h::WeakKeyDict)
     if h.dirty
@@ -80,7 +70,7 @@ function _cleanup_locked(h::WeakKeyDict)
     return h
 end
 
-sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz)
+sizehint!(d::WeakKeyDict, newsz; shrink::Bool = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink)
 empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()
 
 IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
@@ -213,4 +203,6 @@ function iterate(t::WeakKeyDict{K,V}, state...) where {K, V}
     end
 end
 
+@propagate_inbounds Iterators.only(d::WeakKeyDict) = Iterators._only(d, first)
+
 filter!(f, d::WeakKeyDict) = filter_in_one_pass!(f, d)
diff --git a/cli/Makefile b/cli/Makefile
index b6a2b48ebf044..3cc0af1a76afd 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -17,14 +17,14 @@ LOADER_CFLAGS += -DGLIBCXX_LEAST_VERSION_SYMBOL=\"$(shell echo "$(CSL_NEXT_GLIBC
 endif
 
 ifeq ($(OS),WINNT)
-LOADER_LDFLAGS += -municode -mconsole -nostdlib --disable-auto-import \
-                  --disable-runtime-pseudo-reloc -lntdll -lkernel32 -lpsapi
+LOADER_LDFLAGS += -municode -mconsole -nostdlib -lntdll -lkernel32 -lpsapi
 else ifeq ($(OS),Linux)
-LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
+# textoff and notext are aliases to the same option which suppress the TEXTREL warning for i686
+LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed -Wl,-z,notext
 else ifeq ($(OS),FreeBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
-else ifeq ($(OS),Darwin)
-LOADER_LDFLAGS += -lSystem
+else ifeq ($(OS),OpenBSD)
+LOADER_LDFLAGS += -Wl,--no-as-needed -lpthread -rdynamic -lc -Wl,--as-needed
 endif
 
 # Build list of dependent libraries that must be opened
@@ -47,7 +47,7 @@ LIB_DOBJS := $(BUILDDIR)/loader_lib.dbg.obj
 # If this is an architecture that supports dynamic linking, link in a trampoline definition
 ifneq (,$(wildcard $(SRCDIR)/trampolines/trampolines_$(ARCH).S))
 LIB_OBJS += $(BUILDDIR)/loader_trampolines.o
-LIB_DOBJS += $(BUILDDIR)/loader_trampolines.o
+LIB_DOBJS += $(BUILDDIR)/loader_trampolines.dbg.obj
 endif
 
 default: release
@@ -64,6 +64,8 @@ $(BUILDDIR)/loader_exe.dbg.obj : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/
 	@$(call PRINT_CC, $(CC) $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_trampolines.o : $(SRCDIR)/trampolines/trampolines_$(ARCH).S $(HEADERS) $(SRCDIR)/trampolines/common.h
 	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) $< -c -o $@)
+$(BUILDDIR)/loader_trampolines.dbg.obj : $(SRCDIR)/trampolines/trampolines_$(ARCH).S $(HEADERS) $(SRCDIR)/trampolines/common.h
+	@$(call PRINT_CC, $(CC) $(DEBUGFLAGS) $(LOADER_CFLAGS) $< -c -o $@)
 
 # Debugging target to help us see what kind of code is being generated for our trampolines
 dump-trampolines: $(SRCDIR)/trampolines/trampolines_$(ARCH).S
@@ -104,7 +106,7 @@ julia-debug: $(build_bindir)/julia-debug$(EXE)
 libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
 
-ifneq (,$(filter $(OS), Linux FreeBSD))
+ifneq (,$(filter $(OS), Linux FreeBSD OpenBSD))
 VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap
 endif
 
@@ -114,7 +116,7 @@ STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_
 endif
 
 $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -125,7 +127,7 @@ ifeq ($(OS), WINNT)
 endif
 
 $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -148,7 +150,7 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
 	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION
 	sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/'
 
 clean: | $(CLEAN_TARGETS)
diff --git a/cli/README.md b/cli/README.md
index 4021aceb7d839..5a4ecc0a6fc2b 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -4,9 +4,9 @@ This directory contains the code used by the Julia loader, implementing the piec
 This loader comprises the `julia` executable and the `libjulia` library, which are responsible for setting things up such that `libjulia-internal` and any other internal dependencies can be reliably loaded.
 The code is organized in three pieces:
 
-* `loader_exe.c` gets built into the main `julia` executable.  It immediately loads `libjulia`.
-* `loader_lib.c` gets built into the main `libjulia` shared library.  This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
-* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines.  These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
+* `loader_exe.c` gets built into the main `julia` executable. It immediately loads `libjulia`.
+* `loader_lib.c` gets built into the main `libjulia` shared library. This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
+* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines. These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
 
 The main requirements of the loader are as follows:
 
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index d28958c097edb..f1a05b504d9da 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -18,7 +18,7 @@ JL_EXPORTED_DATA_SYMBOLS(XX)
 
 // define a copy of exported data
 #define jl_max_tags 64
-JL_DLLEXPORT void *small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
+JL_DLLEXPORT void *jl_small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
 
 // Declare list of exported functions (sans type)
 #define XX(name)    JL_DLLEXPORT void name(void);
diff --git a/cli/loader.h b/cli/loader.h
index b778976cee495..be5195583b29f 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -5,24 +5,6 @@
 #include "../src/support/dirpath.h"
 #include "../src/julia_fasttls.h"
 
-#ifdef _OS_WINDOWS_
-/* We need to reimplement a bunch of standard library stuff on windows,
- * but we want to make sure that it doesn't conflict with the actual implementations
- * once those get linked into this process. */
-#define fwrite loader_fwrite
-#define fputs loader_fputs
-#define exit loader_exit
-#define strlen loader_strlen
-#define wcslen loader_wcslen
-#define strncat loader_strncat
-#define memcpy loader_memcpy
-#define dirname loader_dirname
-#define strchr loader_strchr
-#define malloc loader_malloc
-#define realloc loader_realloc
-#define free loader_free
-#endif
-
 #ifdef _OS_WINDOWS_
 
 #define WIN32_LEAN_AND_MEAN
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 12feed0c508a0..af2a36cfce8ab 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -125,6 +125,32 @@ static void * lookup_symbol(const void * lib_handle, const char * symbol_name) {
 #endif
 }
 
+#if defined(_OS_WINDOWS_)
+void win32_formatmessage(DWORD code, char *reason, int len) {
+    DWORD res;
+    LPWSTR errmsg;
+    res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                         FORMAT_MESSAGE_FROM_SYSTEM |
+                         FORMAT_MESSAGE_IGNORE_INSERTS |
+                         FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                         NULL, code,
+                         MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
+                         (LPWSTR)&errmsg, 0, NULL);
+    if (!res && (GetLastError() == ERROR_MUI_FILE_NOT_FOUND ||
+                 GetLastError() == ERROR_RESOURCE_TYPE_NOT_FOUND)) {
+      res = FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                           FORMAT_MESSAGE_FROM_SYSTEM |
+                           FORMAT_MESSAGE_IGNORE_INSERTS |
+                           FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                           NULL, code,
+                           0, (LPWSTR)&errmsg, 0, NULL);
+    }
+    res = WideCharToMultiByte(CP_UTF8, 0, errmsg, -1, reason, len, NULL, NULL);
+    reason[len - 1] = '\0';
+    LocalFree(errmsg);
+}
+#endif
+
 // Find the location of libjulia.
 char *lib_dir = NULL;
 JL_DLLEXPORT const char * jl_get_libdir()
@@ -135,21 +161,21 @@ JL_DLLEXPORT const char * jl_get_libdir()
     }
 #if defined(_OS_WINDOWS_)
     // On Windows, we use GetModuleFileNameW
-    wchar_t *libjulia_path = utf8_to_wchar(LIBJULIA_NAME);
     HMODULE libjulia = NULL;
 
-    // Get a handle to libjulia.
-    if (!libjulia_path) {
-        jl_loader_print_stderr3("ERROR: Unable to convert path ", LIBJULIA_NAME, " to wide string!\n");
-        exit(1);
-    }
-    libjulia = LoadLibraryW(libjulia_path);
-    if (libjulia == NULL) {
-        jl_loader_print_stderr3("ERROR: Unable to load ", LIBJULIA_NAME, "!\n");
+    // Get a handle to libjulia
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)jl_get_libdir, &libjulia)) {
+        DWORD err = GetLastError();
+        jl_loader_print_stderr3("ERROR: could not locate library \"", LIBJULIA_NAME, "\"\n");
+
+        char msg[2048];
+        win32_formatmessage(err, msg, sizeof(msg));
+        jl_loader_print_stderr(msg);
         exit(1);
     }
-    free(libjulia_path);
-    libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
+
+    wchar_t *libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
     if (!GetModuleFileNameW(libjulia, libjulia_path, 32768)) {
         jl_loader_print_stderr("ERROR: GetModuleFileName() failed\n");
         exit(1);
@@ -281,6 +307,7 @@ static char *libstdcxxprobe(void)
         // See if the version is compatible
         char *dlerr = dlerror(); // clear out dlerror
         void *sym = dlsym(handle, GLIBCXX_LEAST_VERSION_SYMBOL);
+        (void)sym;
         dlerr = dlerror();
         if (dlerr) {
             // We can't use the library that was found, so don't write anything.
@@ -375,7 +402,6 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
     const char *lib_dir = jl_get_libdir();
 
     // Pre-load libraries that libjulia-internal needs.
-    int deps_len = strlen(&dep_libs[1]);
     char *curr_dep = &dep_libs[1];
 
     // We keep track of "special" libraries names (ones whose name is prefixed with `@`)
@@ -451,6 +477,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
                     char *cxxpath = libstdcxxprobe();
                     if (cxxpath) {
                         void *cxx_handle = dlopen(cxxpath, RTLD_LAZY);
+                        (void)cxx_handle;
                         const char *dlr = dlerror();
                         if (dlr) {
                             jl_loader_print_stderr("ERROR: Unable to dlopen(cxxpath) in parent!\n");
@@ -519,7 +546,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
         (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
     }
     // Next, if we're on Linux/FreeBSD, set up fast TLS.
-#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
+#if !defined(_OS_WINDOWS_) && !defined(_OS_OPENBSD_)
     void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
     if (jl_pgcstack_setkey == NULL) {
         jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
diff --git a/cli/loader_win_utils.c b/cli/loader_win_utils.c
index 2c3c826b08369..ed585a7a64ff0 100644
--- a/cli/loader_win_utils.c
+++ b/cli/loader_win_utils.c
@@ -12,7 +12,7 @@ static FILE _stderr = { INVALID_HANDLE_VALUE };
 FILE *stdout = &_stdout;
 FILE *stderr = &_stderr;
 
-int loader_fwrite(const char *str, size_t nchars, FILE *out) {
+int JL_HIDDEN fwrite(const char *str, size_t nchars, FILE *out) {
     DWORD written;
     if (out->isconsole) {
         // Windows consoles do not support UTF-8 (for reading input, though new Windows Terminal does for writing), only UTF-16.
@@ -20,10 +20,10 @@ int loader_fwrite(const char *str, size_t nchars, FILE *out) {
         if (!wstr)
             return -1;
         if (WriteConsoleW(out->fd, wstr, wcslen(wstr), &written, NULL)) {
-            loader_free(wstr);
+            free(wstr);
             return written;
         }
-        loader_free(wstr);
+        free(wstr);
     } else {
         // However, we want to print UTF-8 if the output is a file.
         if (WriteFile(out->fd, str, nchars, &written, NULL))
@@ -32,19 +32,19 @@ int loader_fwrite(const char *str, size_t nchars, FILE *out) {
     return -1;
 }
 
-int loader_fputs(const char *str, FILE *out) {
-    return loader_fwrite(str, loader_strlen(str), out);
+int JL_HIDDEN fputs(const char *str, FILE *out) {
+    return fwrite(str, strlen(str), out);
 }
 
-void * loader_malloc(const size_t size) {
+void JL_HIDDEN *malloc(const size_t size) {
     return HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, size);
 }
 
-void * loader_realloc(void * mem, const size_t size) {
+void JL_HIDDEN *realloc(void * mem, const size_t size) {
     return HeapReAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, mem, size);
 }
 
-void loader_free(void* mem) {
+void JL_HIDDEN free(void* mem) {
     HeapFree(GetProcessHeap(), 0, mem);
 }
 
@@ -110,7 +110,7 @@ void setup_stdio() {
     _stderr.isconsole = GetConsoleMode(_stderr.fd, &mode);
 }
 
-void loader_exit(int code) {
+void JL_HIDDEN exit(int code) {
     ExitProcess(code);
 }
 
@@ -148,21 +148,21 @@ wchar_t *utf8_to_wchar(const char * str) {
     return wstr;
 }
 
-size_t loader_strlen(const char * x) {
+size_t JL_HIDDEN strlen(const char * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-size_t loader_wcslen(const wchar_t * x) {
+size_t JL_HIDDEN wcslen(const wchar_t * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-char * loader_strncat(char * base, const char * tail, size_t maxlen) {
+char JL_HIDDEN *strncat(char * base, const char * tail, size_t maxlen) {
     int base_len = strlen(base);
     int tail_len = strlen(tail);
     for (int idx=base_len; idx<min(maxlen, base_len + tail_len); ++idx) {
@@ -171,14 +171,21 @@ char * loader_strncat(char * base, const char * tail, size_t maxlen) {
     return base;
 }
 
-void * loader_memcpy(void * dest, const void * src, size_t len) {
+void JL_HIDDEN *memcpy(void * dest, const void * src, size_t len) {
     for (int idx=0; idx<len; ++idx) {
         ((char *)dest)[idx] = ((const char *)src)[idx];
     }
     return dest;
 }
 
-char * loader_dirname(char * x) {
+void JL_HIDDEN *memset(void *s, int c, size_t n) {
+  unsigned char* p = s;
+  while(n--)
+    *p++ = (unsigned char)c;
+  return s;
+}
+
+char JL_HIDDEN *dirname(char * x) {
     int idx = strlen(x);
     while (idx > 0 && x[idx] != PATHSEPSTRING[0]) {
         idx -= 1;
@@ -198,7 +205,7 @@ char * loader_dirname(char * x) {
     return x;
 }
 
-char * loader_strchr(const char * haystack, int needle) {
+char JL_HIDDEN *strchr(const char * haystack, int needle) {
     int idx=0;
     while (haystack[idx] != needle) {
         if (haystack[idx] == 0) {
diff --git a/cli/trampolines/trampolines_aarch64.S b/cli/trampolines/trampolines_aarch64.S
index 2d87ae6dcdb1c..ccb9a647ac6c3 100644
--- a/cli/trampolines/trampolines_aarch64.S
+++ b/cli/trampolines/trampolines_aarch64.S
@@ -5,9 +5,9 @@
 
 #define XX(name) \
 .global CNAME(name) SEP \
+CNAME(name)##: SEP \
 .cfi_startproc SEP \
 .p2align    2 SEP \
-CNAME(name)##: SEP \
     adrp x16, PAGE(CNAME(name##_addr)) SEP \
     ldr x16, [x16, PAGEOFF(CNAME(name##_addr))] SEP \
     br x16 SEP \
diff --git a/cli/trampolines/trampolines_i686.S b/cli/trampolines/trampolines_i686.S
index 3d9cacf0ce652..f6c46fd6ee49b 100644
--- a/cli/trampolines/trampolines_i686.S
+++ b/cli/trampolines/trampolines_i686.S
@@ -3,13 +3,41 @@
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
+// set this option to 1 to get very slightly slower trampolines which however do not trigger
+// this linker warning:
+//   ld: ./loader_trampolines.o: warning: relocation against `jl_***_addr' in read-only section `.text'
+//   ld: warning: creating DT_TEXTREL in a shared object
+// If you have a large libjulia.so file or other restrictions on using TEXTREL for some
+// reason, this may be worthwhile.
+// This is not relevant on Windows (though it is valid there), since it always uses
+// DT_TEXTREL anyways, and does not support this notion of PIC.
+#define USE_PC32 0
+
+#if USE_PC32
+.cfi_startproc
+julia__x86.get_pc_thunk.ax:
+    mov    (%esp),%eax
+    ret
+.cfi_endproc
+
+#define CALL(name) \
+    call julia__x86.get_pc_thunk.ax; \
+    jmpl *(CNAMEADDR(name) - .)(%eax); \
+
+#else
+
+#define CALL(name) \
+    jmpl *(CNAMEADDR(name)); \
+
+#endif
+
 #define XX(name) \
 DEBUGINFO(CNAME(name)); \
 .global CNAME(name); \
 .cfi_startproc; \
 CNAME(name)##:; \
     CET_START(); \
-    jmpl *(CNAMEADDR(name)); \
+    CALL(name); \
     ud2; \
 .cfi_endproc; \
 EXPORT(name); \
diff --git a/cli/trampolines/trampolines_riscv64.S b/cli/trampolines/trampolines_riscv64.S
new file mode 100644
index 0000000000000..26307b7c2bb36
--- /dev/null
+++ b/cli/trampolines/trampolines_riscv64.S
@@ -0,0 +1,20 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define SEP ;
+
+#define XX(name) \
+.global CNAME(name) SEP \
+.cfi_startproc SEP \
+.p2align    2 SEP \
+ CNAME(name)##: SEP \
+    auipc t3, %pcrel_hi(CNAMEADDR(name)) SEP \
+    ld t3, %pcrel_lo(CNAME(name))(t3) SEP \
+    jr t3 SEP \
+.cfi_endproc SEP \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_x86_64.S b/cli/trampolines/trampolines_x86_64.S
index 3b800da56eee1..fcc8e40e1ddc9 100644
--- a/cli/trampolines/trampolines_x86_64.S
+++ b/cli/trampolines/trampolines_x86_64.S
@@ -6,9 +6,9 @@
 #define XX(name) \
 DEBUGINFO(name); \
 .global CNAME(name); \
+CNAME(name)##:; \
 .cfi_startproc; \
 SEH_START1(name); \
-CNAME(name)##:; \
 SEH_START2(); \
     CET_START(); \
     mov CNAMEADDR(name)(%rip),%r11; \
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
index 96ed13b54e0f9..025cfad82214b 100644
--- a/contrib/asan/Make.user.asan
+++ b/contrib/asan/Make.user.asan
@@ -6,6 +6,7 @@ TOOLDIR=$(TOOLCHAIN)/usr/tools
 USECLANG=1
 override CC=$(TOOLDIR)/clang
 override CXX=$(TOOLDIR)/clang++
+override PATCHELF=$(TOOLDIR)/patchelf
 export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
 
 USE_BINARYBUILDER_LLVM=1
@@ -16,9 +17,6 @@ override SANITIZE_ADDRESS=1
 # make the GC use regular malloc/frees, which are hooked by ASAN
 override WITH_GC_DEBUG_ENV=1
 
-# default to a debug build for better line number reporting
-override JULIA_BUILD_MODE=debug
-
 # Enable Julia assertions and LLVM assertions
 FORCE_ASSERTIONS=1
 LLVM_ASSERTIONS=1
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
index 77f3078b35c42..2e7f243772c81 100755
--- a/contrib/asan/build.sh
+++ b/contrib/asan/build.sh
@@ -40,7 +40,7 @@ if [ ! -d "$TOOLCHAIN" ]; then
     cp "$HERE/Make.user.tools"  "$TOOLCHAIN/Make.user"
 fi
 
-make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools install-patchelf
 
 echo
 echo "Building Julia..."
diff --git a/contrib/bolt/.gitignore b/contrib/bolt/.gitignore
new file mode 100644
index 0000000000000..921d429130268
--- /dev/null
+++ b/contrib/bolt/.gitignore
@@ -0,0 +1,10 @@
+profiles-bolt*
+optimized.build
+toolchain
+
+bolt
+bolt_instrument
+merge_data
+copy_originals
+stage0
+stage1
diff --git a/contrib/bolt/Makefile b/contrib/bolt/Makefile
new file mode 100644
index 0000000000000..76833b9865020
--- /dev/null
+++ b/contrib/bolt/Makefile
@@ -0,0 +1,136 @@
+.PHONY: clean clean_profiles restore_originals
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+
+PROFILE_DIR:=$(CURDIR)/profiles-bolt
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage1` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage1`. $\
+	You should end up with some data in $(PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+$(STAGE0_BUILD) $(STAGE1_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: | $(STAGE0_BUILD)
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-BOLT && \
+	touch $@
+
+# Build with our custom flags, binary builder doesn't use them so we need to build LLVM for now.
+# We manually skip package image creation so that we can profile it
+$(STAGE1_BUILD): stage0
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage1
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(PROFILE_DIR)/$$file-prof"); \
+		touch -d "@$$old_time" $$abs_file; \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+finish_stage1: stage1
+	$(MAKE) -C $(STAGE1_BUILD)
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE1_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+		touch -d "@$$old_time" $$abs_file; \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
diff --git a/contrib/bolt/README.md b/contrib/bolt/README.md
new file mode 100644
index 0000000000000..8680939ef6276
--- /dev/null
+++ b/contrib/bolt/README.md
@@ -0,0 +1,17 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a BOLT-optimized version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make copy_originals
+make bolt_instrument
+make finish_stage1
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index d5473ab4c7c62..fd3106587fb0d 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,51 +18,65 @@ const patterns = split("""
     *Makefile
 """)
 
+const is_gha = something(tryparse(Bool, get(ENV, "GITHUB_ACTIONS", "false")), false)
+
+# Note: `git ls-files` gives `/` as a path separator on Windows,
+#   so we just use `/` for all platforms.
 allow_tabs(path) =
     path == "Make.inc" ||
     endswith(path, "Makefile") ||
     endswith(path, ".make") ||
     endswith(path, ".mk") ||
-    startswith(path, joinpath("src", "support")) ||
-    startswith(path, joinpath("src", "flisp")) ||
-    endswith(path, joinpath("test", "syntax.jl")) ||
-    endswith(path, joinpath("test", "triplequote.jl"))
+    startswith(path, "src/support") ||
+    startswith(path, "src/flisp") ||
+    endswith(path, "test/syntax.jl") ||
+    endswith(path, "test/triplequote.jl")
 
 const errors = Set{Tuple{String,Int,String}}()
 
-for path in eachline(`git ls-files -- $patterns`)
-    lineno = 0
-    non_blank = 0
+function check_whitespace()
+    for path in eachline(`git ls-files -- $patterns`)
+        lineno = 0
+        non_blank = 0
 
-    file_err(msg) = push!(errors, (path, 0, msg))
-    line_err(msg) = push!(errors, (path, lineno, msg))
+        file_err(msg) = push!(errors, (path, 0, msg))
+        line_err(msg) = push!(errors, (path, lineno, msg))
 
-    isfile(path) || continue
-    for line in eachline(path, keep=true)
-        lineno += 1
-        contains(line, '\r')   && file_err("non-UNIX line endings")
-        contains(line, '\ua0') && line_err("non-breaking space")
-        allow_tabs(path) ||
-        contains(line, '\t')   && line_err("tab")
-        endswith(line, '\n')   || line_err("no trailing newline")
-        line = chomp(line)
-        endswith(line, r"\s")  && line_err("trailing whitespace")
-        contains(line, r"\S")  && (non_blank = lineno)
+        isfile(path) || continue
+        for line in eachline(path, keep=true)
+            lineno += 1
+            contains(line, '\r')   && file_err("non-UNIX line endings")
+            contains(line, '\ua0') && line_err("non-breaking space")
+            allow_tabs(path) ||
+                contains(line, '\t')   && line_err("tab")
+            endswith(line, '\n')   || line_err("no trailing newline")
+            line = chomp(line)
+            endswith(line, r"\s")  && line_err("trailing whitespace")
+            contains(line, r"\S")  && (non_blank = lineno)
+        end
+        non_blank < lineno         && line_err("trailing blank lines")
     end
-    non_blank < lineno         && line_err("trailing blank lines")
-end
 
-if isempty(errors)
-    println(stderr, "Whitespace check found no issues.")
-    exit(0)
-else
-    println(stderr, "Whitespace check found $(length(errors)) issues:")
-    for (path, lineno, msg) in sort!(collect(errors))
-        if lineno == 0
-            println(stderr, "$path -- $msg")
-        else
-            println(stderr, "$path:$lineno -- $msg")
+    if isempty(errors)
+        println(stderr, "Whitespace check found no issues.")
+        exit(0)
+    else
+        println(stderr, "Whitespace check found $(length(errors)) issues:")
+        for (path, lineno, msg) in sort!(collect(errors))
+            if lineno == 0
+                println(stderr, "$path -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, "::", msg)
+                end
+            else
+                println(stderr, "$path:$lineno -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, ",line=", lineno, "::", msg)
+                end
+            end
         end
+        exit(1)
     end
-    exit(1)
 end
+
+check_whitespace()
diff --git a/contrib/download_cmake.sh b/contrib/download_cmake.sh
index 1deeb08ddded2..5cf3c579ed052 100755
--- a/contrib/download_cmake.sh
+++ b/contrib/download_cmake.sh
@@ -8,17 +8,17 @@ mkdir -p "$(dirname "$0")"/../deps/scratch
 cd "$(dirname "$0")"/../deps/scratch
 
 CMAKE_VERSION_MAJOR=3
-CMAKE_VERSION_MINOR=19
-CMAKE_VERSION_PATCH=3
+CMAKE_VERSION_MINOR=30
+CMAKE_VERSION_PATCH=1
 CMAKE_VERSION_MAJMIN=$CMAKE_VERSION_MAJOR.$CMAKE_VERSION_MINOR
 CMAKE_VERSION=$CMAKE_VERSION_MAJMIN.$CMAKE_VERSION_PATCH
 
 # listed at https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/cmake-$CMAKE_VERSION-SHA-256.txt
 # for the files cmake-$CMAKE_VERSION-macos-universal.tar.gz
 # cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz and cmake-$CMAKE_VERSION-Linux-aarch64.tar.gz
-CMAKE_SHA256_DARWIN=a6b79ad05f89241a05797510e650354d74ff72cc988981cdd1eb2b3b2bda66ac
-CMAKE_SHA256_LINUX_X86_64=c18b65697e9679e5c88dccede08c323cd3d3730648e59048047bba82097e0ffc
-CMAKE_SHA256_LINUX_AARCH64=66e507c97ffb586d7ca6567890808b792c8eb004b645706df6fbf27826a395a2
+CMAKE_SHA256_DARWIN=51e12618829b811bba6f033ee8f39f6192da1b6abb20d82a7899d5134e879a4c
+CMAKE_SHA256_LINUX_X86_64=ac31f077ef3378641fa25a3cb980d21b2f083982d3149a8f2eb9154f2b53696b
+CMAKE_SHA256_LINUX_AARCH64=ad234996f8750f11d7bd0d17b03f55c434816adf1f1671aab9e8bab21a43286a
 
 PLATFORM="$(uname)-$(uname -m)"
 case $PLATFORM in
@@ -28,12 +28,12 @@ case $PLATFORM in
     echo "$CMAKE_SHA256_DARWIN  $FULLNAME.tar.gz" | shasum -a 256 -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/CMake.app/Contents/bin/cmake;;
   Linux-x86_64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-x86_64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_X86_64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
   Linux-aarch64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-aarch64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_AARCH64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
diff --git a/contrib/excise_stdlib.sh b/contrib/excise_stdlib.sh
new file mode 100755
index 0000000000000..3da9ff437bf83
--- /dev/null
+++ b/contrib/excise_stdlib.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Uses https://github.com/newren/git-filter-repo
+# Recommended use of `Github cli`
+
+set -e
+set -f
+set -x
+
+if [ -z "$*" ]; then echo "Expected name of stdlib"; fi
+
+STDLIB=$1
+WORKDIR=$(mktemp -d)
+
+echo "Excising stdlib $STDLIB; workdir $WORKDIR"
+pushd $WORKDIR
+git clone https://github.com/JuliaLang/julia $STDLIB
+pushd $STDLIB
+
+echo "Filtering repo"
+git filter-repo --subdirectory-filter stdlib/$STDLIB --path LICENSE.md \
+    --message-callback 'return re.sub(b"(\W)(#\d+)", lambda m: m.group(1) + b"JuliaLang/julia" + m.group(2), message)'
+
+
+echo "Deleting branches"
+git branch -l | grep -v release- | grep -v master | xargs git branch -v -D
+
+popd
+popd
+echo "Done! Inspect the result and push it!"
+echo """
+      cd $WORKDIR/$STDLIB
+      gh repo create JuliaLang/$STDLIB.jl --push --source=. --public
+      git push --all
+      git push --tags"""
+
+echo """
+     Remember to:
+     1. Add a README.md
+     2. Setup GHA or similar for CI
+     """
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index fea4ca6bc1fe3..b075223d9c7e4 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Prevent this from putting anyting into the Main namespace
-@eval Module() begin
+# Prevent this from putting anything into the Main namespace
+@eval Core.Module() begin
 
 if Threads.maxthreadid() != 1
     @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid()
@@ -12,8 +12,6 @@ Sys.__init_build()
 if !isdefined(Base, :uv_eventloop)
     Base.reinit_stdio()
 end
-Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
-import .FakePTYs: open_fake_pty
 using Base.Meta
 
 ## Debugging options
@@ -33,6 +31,50 @@ UP_ARROW = "\e[A"
 DOWN_ARROW = "\e[B"
 
 hardcoded_precompile_statements = """
+precompile(Base.unsafe_string, (Ptr{UInt8},))
+precompile(Base.unsafe_string, (Ptr{Int8},))
+
+# loading.jl
+precompile(Base.__require, (Module, Symbol))
+precompile(Base.__require, (Base.PkgId,))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
+precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
+precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int})
+precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int})
+
+# LazyArtifacts (but more generally helpful)
+precompile(Tuple{Type{Base.Val{x} where x}, Module})
+precompile(Tuple{Type{NamedTuple{(:honor_overrides,), T} where T<:Tuple}, Tuple{Bool}})
+precompile(Tuple{typeof(Base.unique!), Array{String, 1}})
+precompile(Tuple{typeof(Base.invokelatest), Any})
+precompile(Tuple{typeof(Base.vcat), Array{String, 1}, Array{String, 1}})
+
+# Pkg loading
+precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}})
+precompile(Tuple{typeof(Base.append!), Array{String, 1}, Array{String, 1}})
+precompile(Tuple{typeof(Base.join), Array{String, 1}, Char})
+precompile(Tuple{typeof(Base.getindex), Base.Dict{Any, Any}, Char})
+precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, Char})
+precompile(Tuple{typeof(Base.convert), Type{Base.Dict{String, Base.Dict{String, String}}}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.convert), Type{Base.Dict{String, Array{String, 1}}}, Base.Dict{String, Any}})
+
+# REPL
+precompile(isequal, (String, String))
+precompile(Base.check_open, (Base.TTY,))
+precompile(Base.getproperty, (Base.TTY, Symbol))
+precompile(write, (Base.TTY, String))
+precompile(Tuple{typeof(Base.get), Base.TTY, Symbol, Bool})
+precompile(Tuple{typeof(Base.hashindex), String, Int})
+precompile(Tuple{typeof(Base.write), Base.GenericIOBuffer{Array{UInt8, 1}}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int}, Int})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int}, Int, Int})
+precompile(Tuple{typeof(Base._typeddict), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
+precompile(Tuple{typeof(Base.promoteK), Type, Base.Dict{String, Any}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.promoteK), Type, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.promoteV), Type, Base.Dict{String, Any}, Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.eval_user_input), Base.PipeEndpoint, Any, Bool})
+precompile(Tuple{typeof(Base.get), Base.PipeEndpoint, Symbol, Bool})
+
 # used by Revise.jl
 precompile(Tuple{typeof(Base.parse_cache_header), String})
 precompile(Base.read_dependency_src, (String, String))
@@ -43,12 +85,17 @@ precompile(Tuple{typeof(haskey), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
 precompile(Tuple{typeof(push!), Vector{Function}, Function})
 
+# preferences
+precompile(Base.get_preferences, (Base.UUID,))
+precompile(Base.record_compiletime_preference, (Base.UUID, String))
+
 # miscellaneous
+precompile(Tuple{typeof(Base.exit)})
 precompile(Tuple{typeof(Base.require), Base.PkgId})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
-precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int64})
-precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int64})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int})
 precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
 precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
 precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
@@ -59,6 +106,23 @@ precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel,
 precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
 precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
 precompile(Tuple{typeof(Base.run_module_init), Module, Int})
+
+# precompilepkgs
+precompile(Tuple{typeof(Base.get), Type{Array{String, 1}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.get), Type{Base.Dict{String, Any}}, Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.haskey), Base.Dict{String, Any}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int, Int})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Base.TTY, Bool}, Int})
+precompile(Tuple{typeof(Base.open), Base.CmdRedirect, String, Base.TTY})
+precompile(Tuple{typeof(Base.Precompilation.precompilepkgs)})
+precompile(Tuple{typeof(Base.Precompilation.printpkgstyle), Base.TTY, Symbol, String})
+precompile(Tuple{typeof(Base.rawhandle), Base.TTY})
+precompile(Tuple{typeof(Base.setindex!), Base.Dict{String, Array{String, 1}}, Array{String, 1}, String})
+precompile(Tuple{typeof(Base.setindex!), GenericMemory{:not_atomic, Union{Base.Libc.RawFD, Base.SyncCloseFD, IO}, Core.AddrSpace{Core}(0x00)}, Base.TTY, Int})
+precompile(Tuple{typeof(Base.setup_stdio), Base.TTY, Bool})
+precompile(Tuple{typeof(Base.spawn_opts_inherit), Base.DevNull, Base.TTY, Base.TTY})
+precompile(Tuple{typeof(Core.kwcall), NamedTuple{(:context,), Tuple{Base.TTY}}, typeof(Base.sprint), Function})
+precompile(Tuple{Type{Base.UUID}, Base.UUID})
 """
 
 for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
@@ -66,75 +130,66 @@ for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base
     hardcoded_precompile_statements *= "precompile(Tuple{typeof(show), $IO, $T})\n"
 end
 
-repl_script = """
-2+2
-print("")
-printstyled("a", "b")
-display([1])
-display([1 2; 3 4])
-foo(x) = 1
-@time @eval foo(1)
-; pwd
-$CTRL_C
-$CTRL_R$CTRL_C
-? reinterpret
-using Ra\t$CTRL_C
-\\alpha\t$CTRL_C
-\e[200~paste here ;)\e[201~"$CTRL_C
-$UP_ARROW$DOWN_ARROW$CTRL_C
-123\b\b\b$CTRL_C
-\b\b$CTRL_C
-f(x) = x03
-f(1,2)
-[][1]
-cd("complet_path\t\t$CTRL_C
-"""
-
+# Precompiles for Revise and other packages
 precompile_script = """
-# NOTE: these were moved to the end of Base.jl. TODO: move back here.
-# # Used by Revise & its dependencies
-# while true  # force inference
-# delete!(push!(Set{Module}(), Base), Main)
-# m = first(methods(+))
-# delete!(push!(Set{Method}(), m), m)
-# empty!(Set())
-# push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
-# (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-# (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
-# (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-# (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
-# (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
-# Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
-# Dict(Base => [:(1+1)])[Base]
-# Dict(:one => [1])[:one]
-# Dict("abc" => Set())["abc"]
-# pushfirst!([], sum)
-# get(Base.pkgorigins, Base.PkgId(Base), nothing)
-# sort!([1,2,3])
-# unique!([1,2,3])
-# cumsum([1,2,3])
-# append!(Int[], BitSet())
-# isempty(BitSet())
-# delete!(BitSet([1,2]), 3)
-# deleteat!(Int32[1,2,3], [1,3])
-# deleteat!(Any[1,2,3], [1,3])
-# Core.svec(1, 2) == Core.svec(3, 4)
-# # copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(which(+, (Int, Int)), [Int, Int], Core.svec())))
-# any(t->t[1].line > 1, [(LineNumberNode(2,:none),:(1+1))])
-# break   # end force inference
-# end
+for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter())
+    m = match.method
+    delete!(push!(Set{Method}(), m), m)
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
+    break   # only actually need to do this once
+end
+empty!(Set())
+push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
+(setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
+(setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
+(setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
+(setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
+(setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
+Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
+Dict(Base => [:(1+1)])[Base]
+Dict(:one => [1])[:one]
+Dict("abc" => Set())["abc"]
+pushfirst!([], sum)
+get(Base.pkgorigins, Base.PkgId(Base), nothing)
+sort!([1,2,3])
+unique!([1,2,3])
+cumsum([1,2,3])
+append!(Int[], BitSet())
+isempty(BitSet())
+delete!(BitSet([1,2]), 3)
+deleteat!(Int32[1,2,3], [1,3])
+deleteat!(Any[1,2,3], [1,3])
+Core.svec(1, 2) == Core.svec(3, 4)
+any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
+
+# Code loading uses this
+sortperm(mtime.(readdir(".")), rev=true)
+# JLLWrappers uses these
+Dict{Base.UUID,Set{String}}()[Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
+get!(Set{String}, Dict{Base.UUID,Set{String}}(), Base.UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
+eachindex(IndexLinear(), Expr[])
+push!(Expr[], Expr(:return, false))
+vcat(String[], String[])
+k, v = (:hello => nothing)
+Base.print_time_imports_report(Base)
+Base.print_time_imports_report_init(Base)
+
+# Preferences uses these
+get(Dict{String,Any}(), "missing", nothing)
+delete!(Dict{String,Any}(), "missing")
+for (k, v) in Dict{String,Any}()
+    println(k)
+end
+
+# interactive startup uses this
+write(IOBuffer(), "")
+
+# precompile @time report generation and printing
+@time @eval Base.Experimental.@force_compile
 """
 
 julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
 
-have_repl =  haskey(Base.loaded_modules,
-                    Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
-if have_repl
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
-    """
-end
-
 Artifacts = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"),
           nothing)
@@ -143,25 +198,19 @@ if Artifacts !== nothing
     using Artifacts, Base.BinaryPlatforms, Libdl
     artifacts_toml = abspath(joinpath(Sys.STDLIB, "Artifacts", "test", "Artifacts.toml"))
     artifact_hash("HelloWorldC", artifacts_toml)
-    oldpwd = pwd(); cd(dirname(artifacts_toml))
-    macroexpand(Main, :(@artifact_str("HelloWorldC")))
-    cd(oldpwd)
     artifacts = Artifacts.load_artifacts_toml(artifacts_toml)
     platforms = [Artifacts.unpack_platform(e, "HelloWorldC", artifacts_toml) for e in artifacts["HelloWorldC"]]
     best_platform = select_platform(Dict(p => triplet(p) for p in platforms))
+    if best_platform !== nothing
+      # @artifact errors for unsupported platforms
+      oldpwd = pwd(); cd(dirname(artifacts_toml))
+      macroexpand(Main, :(@artifact_str("HelloWorldC")))
+      cd(oldpwd)
+    end
     dlopen("libjulia$(Base.isdebugbuild() ? "-debug" : "")", RTLD_LAZY | RTLD_DEEPBIND)
     """
 end
 
-Pkg = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"),
-          nothing)
-
-if Pkg !== nothing
-    # TODO: Split Pkg precompile script into REPL and script part
-    repl_script = Pkg.precompile_script * repl_script # do larger workloads first for better parallelization
-end
-
 FileWatching = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"), "FileWatching"),
           nothing)
@@ -182,28 +231,12 @@ if Libdl !== nothing
     """
 end
 
-InteractiveUtils = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("b77e0a4c-d291-57a0-90e8-8db25a27a240"), "InteractiveUtils"),
-          nothing)
-if InteractiveUtils !== nothing
-    repl_script *= """
-    @time_imports using Random
-    """
-end
-
-const JULIA_PROMPT = "julia> "
-const PKG_PROMPT = "pkg> "
-const SHELL_PROMPT = "shell> "
-const HELP_PROMPT = "help?> "
-
 # Printing the current state
 let
     global print_state
     print_lk = ReentrantLock()
     status = Dict{String, String}(
         "step1" => "W",
-        "step2" => "W",
-        "repl" => "0/0",
         "step3" => "W",
         "clock" => "◐",
     )
@@ -224,8 +257,6 @@ let
             isempty(args) || push!(status, args...)
             print("\r└ Collect (Basic: ")
             print_status("step1")
-            print(", REPL ", status["repl"], ": ")
-            print_status("step2")
             print(") => Execute ")
             print_status("step3")
         end
@@ -237,10 +268,11 @@ ansi_disablecursor = "\e[?25l"
 blackhole = Sys.isunix() ? "/dev/null" : "nul"
 procenv = Dict{String,Any}(
         "JULIA_HISTORY" => blackhole,
-        "JULIA_PROJECT" => nothing, # remove from environment
-        "JULIA_LOAD_PATH" => "@stdlib",
+        "JULIA_LOAD_PATH" => "@$(Sys.iswindows() ? ";" : ":")@stdlib",
         "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
-        "TERM" => "")
+        "TERM" => "",
+        # "JULIA_DEBUG" => "precompilation",
+        "JULIA_FALLBACK_REPL" => "true")
 
 generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed
     start_time = time_ns()
@@ -248,7 +280,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 
     # Extract the precompile statements from the precompile file
     statements_step1 = Channel{String}(Inf)
-    statements_step2 = Channel{String}(Inf)
 
     # From hardcoded statements
     for statement in split(hardcoded_precompile_statements::String, '\n')
@@ -263,7 +294,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         anim_chars = ["◐","◓","◑","◒"]
         current = 1
         if fancyprint
-            while isopen(statements_step2) || !isempty(statements_step2)
+            while isopen(statements_step1) || !isempty(statements_step1)
                 print_state("clock" => anim_chars[current])
                 wait(t)
                 current = current == 4 ? 1 : current + 1
@@ -277,24 +308,32 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "R")
         # Also precompile a package here
         pkgname = "__PackagePrecompilationStatementModule"
-        mkpath(joinpath(prec_path, pkgname, "src"))
-        path = joinpath(prec_path, pkgname, "src", "$pkgname.jl")
-        write(path,
-              """
-              module $pkgname
-              end
-              """)
+        pkguuid = "824efdaf-a0e9-431c-8ee7-3d356b2531c2"
+        pkgpath = joinpath(prec_path, pkgname)
+        mkpath(joinpath(pkgpath, "src"))
+        write(joinpath(pkgpath, "src", "$pkgname.jl"),
+            """
+            module $pkgname
+            println("Precompiling $pkgname")
+            end
+            """)
+        write(joinpath(pkgpath, "Project.toml"),
+            """
+            name = "$pkgname"
+            uuid = "$pkguuid"
+            """)
+        touch(joinpath(pkgpath, "Manifest.toml"))
         tmp_prec = tempname(prec_path)
         tmp_proc = tempname(prec_path)
         s = """
-            pushfirst!(DEPOT_PATH, $(repr(prec_path)));
+            pushfirst!(DEPOT_PATH, $(repr(joinpath(prec_path,"depot"))));
             Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
-            Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
+            Base.Precompilation.precompilepkgs(;fancyprint=true);
             $precompile_script
             """
         p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg
-                --cpu-target=native --startup-file=no --color=yes`, procenv),
-                 stdin=IOBuffer(s), stdout=debug_output))
+                --cpu-target=native --startup-file=no --color=yes --project=$(pkgpath)`, procenv),
+                 stdin=IOBuffer(s), stderr=debug_output, stdout=debug_output))
         n_step1 = 0
         for f in (tmp_prec, tmp_proc)
             isfile(f) || continue
@@ -307,105 +346,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "F$n_step1")
         return :ok
     end
-    !PARALLEL_PRECOMPILATION && wait(step1)
-
-    step2 = @async mktemp() do precompile_file, precompile_file_h
-        print_state("step2" => "R")
-        # Collect statements from running a REPL process and replaying our REPL script
-        touch(precompile_file)
-        pts, ptm = open_fake_pty()
-        if have_repl
-            cmdargs = `-e 'import REPL; REPL.Terminals.is_precompiling[] = true'`
-        else
-            cmdargs = `-e nothing`
-        end
-        p = run(addenv(addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg
-                --cpu-target=native --startup-file=no --color=yes -i $cmdargs```, procenv),
-                "JULIA_PKG_PRECOMPILE_AUTO" => "0"),
-            pts, pts, pts; wait=false)
-        Base.close_stdio(pts)
-        # Prepare a background process to copy output from process until `pts` is closed
-        output_copy = Base.BufferStream()
-        tee = @async try
-            while !eof(ptm)
-                l = readavailable(ptm)
-                write(debug_output, l)
-                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
-                write(output_copy, l)
-            end
-        catch ex
-            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
-                rethrow() # ignore EIO on ptm after pts dies
-            end
-        finally
-            close(output_copy)
-            close(ptm)
-        end
-        repl_inputter = @async begin
-            # wait for the definitive prompt before start writing to the TTY
-            readuntil(output_copy, JULIA_PROMPT)
-            sleep(0.1)
-            readavailable(output_copy)
-            # Input our script
-            if have_repl
-                precompile_lines = split(repl_script::String, '\n'; keepempty=false)
-                curr = 0
-                for l in precompile_lines
-                    sleep(0.1)
-                    curr += 1
-                    print_state("repl" => "$curr/$(length(precompile_lines))")
-                    # consume any other output
-                    bytesavailable(output_copy) > 0 && readavailable(output_copy)
-                    # push our input
-                    write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
-                    write(ptm, l, "\n")
-                    readuntil(output_copy, "\n")
-                    # wait for the next prompt-like to appear
-                    readuntil(output_copy, "\n")
-                    strbuf = ""
-                    while !eof(output_copy)
-                        strbuf *= String(readavailable(output_copy))
-                        occursin(JULIA_PROMPT, strbuf) && break
-                        occursin(PKG_PROMPT, strbuf) && break
-                        occursin(SHELL_PROMPT, strbuf) && break
-                        occursin(HELP_PROMPT, strbuf) && break
-                        sleep(0.1)
-                    end
-                end
-            end
-            write(ptm, "exit()\n")
-            wait(tee)
-            success(p) || Base.pipeline_error(p)
-            close(ptm)
-            write(debug_output, "\n#### FINISHED ####\n")
-        end
-
-        n_step2 = 0
-        precompile_copy = Base.BufferStream()
-        buffer_reader = @async for statement in eachline(precompile_copy)
-            print_state("step2" => "R$n_step2")
-            push!(statements_step2, statement)
-            n_step2 += 1
-        end
-
-        open(precompile_file, "r") do io
-            while true
-                # We need to allways call eof(io) for bytesavailable(io) to work
-                eof(io) && istaskdone(repl_inputter) && eof(io) && break
-                if bytesavailable(io) == 0
-                    sleep(0.1)
-                    continue
-                end
-                write(precompile_copy, readavailable(io))
-            end
-        end
-        close(precompile_copy)
-        wait(buffer_reader)
-        close(statements_step2)
-        print_state("step2" => "F$n_step2")
-        return :ok
-    end
-    !PARALLEL_PRECOMPILATION && wait(step2)
+    PARALLEL_PRECOMPILATION ? bind(statements_step1, step1) : wait(step1)
 
     # Create a staging area where all the loaded packages are available
     PrecompileStagingArea = Module()
@@ -414,12 +355,13 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             eval(PrecompileStagingArea, :(const $(Symbol(_mod)) = $_mod))
         end
     end
+    eval(PrecompileStagingArea, :(const Compiler = Base.Compiler))
 
     n_succeeded = 0
     # Make statements unique
     statements = Set{String}()
     # Execute the precompile statements
-    for sts in [statements_step1, statements_step2], statement in sts
+    for statement in statements_step1
         # Main should be completely clean
         occursin("Main.", statement) && continue
         Base.in!(statement, statements) && continue
@@ -446,7 +388,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             print_state("step3" => string("R$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
         catch ex
             # See #28808
-            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+            @warn "Failed to precompile expression" form=statement exception=(ex,catch_backtrace()) _module=nothing _file=nothing _line=0
         end
     end
     wait(clock) # Stop asynchronous printing
@@ -455,10 +397,10 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     println()
     # Seems like a reasonable number right now, adjust as needed
     # comment out if debugging script
-    n_succeeded > (have_repl ? 900 : 90) || @warn "Only $n_succeeded precompile statements"
+    have_repl = false
+    n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements"
 
     fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.")
-    fetch(step2) == :ok || throw("Step 2 of collecting precompiles failed.")
 
     tot_time = time_ns() - start_time
     println("Precompilation complete. Summary:")
@@ -466,15 +408,10 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 finally
     fancyprint && print(ansi_enablecursor)
     GC.gc(true); GC.gc(false); # reduce memory footprint
-    return
 end
 
 generate_precompile_statements()
 
-# As a last step in system image generation,
-# remove some references to build time environment for a more reproducible build.
-Base.Filesystem.temp_cleanup_purge(force=true)
-
 let stdout = Ref{IO}(stdout)
     Base.PROGRAM_FILE = ""
     Sys.BINDIR = ""
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index df17b967c1ed7..8b1eb55cbe4f4 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -67,9 +67,7 @@ function ldlibs(doframework)
         "julia"
     end
     if Sys.isunix()
-        return "-Wl,-rpath,$(shell_escape(libDir())) " *
-            (Sys.isapple() ? string() : "-Wl,-rpath,$(shell_escape(private_libDir())) ") *
-            "-l$libname"
+        return "-L$(shell_escape(private_libDir())) -Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname"
     else
         return "-l$libname -lopenlibm"
     end
diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl
new file mode 100644
index 0000000000000..0303e95f448b5
--- /dev/null
+++ b/contrib/juliac-buildscript.jl
@@ -0,0 +1,284 @@
+# Script to run in the process that generates juliac's object file output
+
+inputfile = ARGS[1]
+output_type = ARGS[2]
+add_ccallables = ARGS[3] == "true"
+
+# Initialize some things not usually initialized when output is requested
+Sys.__init__()
+Base.init_depot_path()
+Base.init_load_path()
+Base.init_active_project()
+task = current_task()
+task.rngState0 = 0x5156087469e170ab
+task.rngState1 = 0x7431eaead385992c
+task.rngState2 = 0x503e1d32781c2608
+task.rngState3 = 0x3a77f7189200c20b
+task.rngState4 = 0x5502376d099035ae
+uuid_tuple = (UInt64(0), UInt64(0))
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
+
+# Patch methods in Core and Base
+
+@eval Core begin
+    DomainError(@nospecialize(val), @nospecialize(msg::AbstractString)) = (@noinline; $(Expr(:new, :DomainError, :val, :msg)))
+end
+
+(f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd)
+
+@eval Base begin
+    depwarn(msg, funcsym; force::Bool=false) = nothing
+    _assert_tostring(msg) = ""
+    reinit_stdio() = nothing
+    JuliaSyntax.enable_in_core!() = nothing
+    init_active_project() = ACTIVE_PROJECT[] = nothing
+    set_active_project(projfile::Union{AbstractString,Nothing}) = ACTIVE_PROJECT[] = projfile
+    disable_library_threading() = nothing
+    start_profile_listener() = nothing
+    @inline function invokelatest(f::F, args...; kwargs...) where F
+        return f(args...; kwargs...)
+    end
+    function sprint(f::F, args::Vararg{Any,N}; context=nothing, sizehint::Integer=0) where {F<:Function,N}
+        s = IOBuffer(sizehint=sizehint)
+        if context isa Tuple
+            f(IOContext(s, context...), args...)
+        elseif context !== nothing
+            f(IOContext(s, context), args...)
+        else
+            f(s, args...)
+        end
+        String(_unsafe_take!(s))
+    end
+    function show_typeish(io::IO, @nospecialize(T))
+        if T isa Type
+            show(io, T)
+        elseif T isa TypeVar
+            print(io, (T::TypeVar).name)
+        else
+            print(io, "?")
+        end
+    end
+    function show(io::IO, T::Type)
+        if T isa DataType
+            print(io, T.name.name)
+            if T !== T.name.wrapper && length(T.parameters) > 0
+                print(io, "{")
+                first = true
+                for p in T.parameters
+                    if !first
+                        print(io, ", ")
+                    end
+                    first = false
+                    if p isa Int
+                        show(io, p)
+                    elseif p isa Type
+                        show(io, p)
+                    elseif p isa Symbol
+                        print(io, ":")
+                        print(io, p)
+                    elseif p isa TypeVar
+                        print(io, p.name)
+                    else
+                        print(io, "?")
+                    end
+                end
+                print(io, "}")
+            end
+        elseif T isa Union
+            print(io, "Union{")
+            show_typeish(io, T.a)
+            print(io, ", ")
+            show_typeish(io, T.b)
+            print(io, "}")
+        elseif T isa UnionAll
+            print(io, T.body::Type)
+            print(io, " where ")
+            print(io, T.var.name)
+        end
+    end
+    show_type_name(io::IO, tn::Core.TypeName) = print(io, tn.name)
+
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} =
+    _mapreduce_dim(f, op, init, A, dims)
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} =
+        reduce(op, map(f, A...); kw...)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        mapfoldl_impl(f, op, nt, A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        _mapreduce(f, op, IndexStyle(A), A)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
+        mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} =
+        mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
+
+    mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} =
+        reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
+        mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))
+
+    _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
+    mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
+    mapreduce_empty(::typeof(abs), op::F, T) where {F}     = abs(reduce_empty(op, T))
+    mapreduce_empty(::typeof(abs2), op::F, T) where {F}    = abs2(reduce_empty(op, T))
+end
+@eval Base.Unicode begin
+    function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
+        nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
+        buffer = Base.StringVector(nwords*4)
+        nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
+        nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
+        nbytes < 0 && utf8proc_error(nbytes)
+        return String(resize!(buffer, nbytes))
+    end
+end
+@eval Base.GMP begin
+    function __init__()
+        try
+            ccall((:__gmp_set_memory_functions, libgmp), Cvoid,
+                (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}),
+                cglobal(:jl_gc_counted_malloc),
+                cglobal(:jl_gc_counted_realloc_with_old_size),
+                cglobal(:jl_gc_counted_free_with_size))
+            ZERO.alloc, ZERO.size, ZERO.d = 0, 0, C_NULL
+            ONE.alloc, ONE.size, ONE.d = 1, 1, pointer(_ONE)
+        catch ex
+            Base.showerror_nostdio(ex, "WARNING: Error during initialization of module GMP")
+        end
+        # This only works with a patched version of GMP, ignore otherwise
+        try
+            ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid,
+                (Ptr{Cvoid},),
+                cglobal(:jl_throw_out_of_memory_error))
+            ALLOC_OVERFLOW_FUNCTION[] = true
+        catch ex
+            # ErrorException("ccall: could not find function...")
+            if typeof(ex) != ErrorException
+                rethrow()
+            end
+        end
+    end
+end
+@eval Base.Sort begin
+    issorted(itr;
+        lt::T=isless, by::F=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where {T,F} =
+        issorted(itr, ord(lt,by,rev,order))
+end
+@eval Base.TOML begin
+    function try_return_datetime(p, year, month, day, h, m, s, ms)
+        return DateTime(year, month, day, h, m, s, ms)
+    end
+    function try_return_date(p, year, month, day)
+        return Date(year, month, day)
+    end
+    function parse_local_time(l::Parser)
+        h = @try parse_int(l, false)
+        h in 0:23 || return ParserError(ErrParsingDateTime)
+        _, m, s, ms = @try _parse_local_time(l, true)
+        # TODO: Could potentially parse greater accuracy for the
+        # fractional seconds here.
+        return try_return_time(l, h, m, s, ms)
+    end
+    function try_return_time(p, h, m, s, ms)
+        return Time(h, m, s, ms)
+    end
+end
+
+# Load user code
+
+import Base.Experimental.entrypoint
+
+let mod = Base.include(Base.__toplevel__, inputfile)
+    if !isa(mod, Module)
+        mod = Main
+    end
+    if output_type == "--output-exe" && isdefined(mod, :main) && !add_ccallables
+        entrypoint(mod.main, ())
+    end
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{Base.SubString{String}, 1}, String))
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{String, 1}, Char))
+    entrypoint(Base.task_done_hook, (Task,))
+    entrypoint(Base.wait, ())
+    entrypoint(Base.trypoptask, (Base.StickyWorkqueue,))
+    entrypoint(Base.checktaskempty, ())
+    if add_ccallables
+        ccall(:jl_add_ccallable_entrypoints, Cvoid, ())
+    end
+end
+
+# Additional method patches depending on whether user code loads certain stdlibs
+
+let loaded = Symbol.(Base.loaded_modules_array())  # TODO better way to do this
+    if :SparseArrays in loaded
+        using SparseArrays
+        @eval SparseArrays.CHOLMOD begin
+            function __init__()
+                ccall((:SuiteSparse_config_malloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_malloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_calloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_calloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_realloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_realloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_free_func_set, :libsuitesparseconfig),
+                Cvoid, (Ptr{Cvoid},), cglobal(:jl_free, Ptr{Cvoid}))
+            end
+        end
+    end
+    if :Artifacts in loaded
+        using Artifacts
+        @eval Artifacts begin
+            function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts
+                # If the artifact exists, we're in the happy path and we can immediately
+                # return the path to the artifact:
+                dirs = artifacts_dirs(bytes2hex(hash.bytes))
+                for dir in dirs
+                    if isdir(dir)
+                        return jointail(dir, path_tail)
+                    end
+                end
+                error("Artifact not found")
+            end
+        end
+    end
+    if :Pkg in loaded
+        using Pkg
+        @eval Pkg begin
+            __init__() = rand() #TODO, methods that do nothing don't get codegened
+        end
+    end
+    if :StyledStrings in loaded
+        using StyledStrings
+        @eval StyledStrings begin
+            __init__() = rand()
+        end
+    end
+    if :Markdown in loaded
+        using Markdown
+        @eval Markdown begin
+            __init__() = rand()
+        end
+    end
+    if :JuliaSyntaxHighlighting in loaded
+        using JuliaSyntaxHighlighting
+        @eval JuliaSyntaxHighlighting begin
+            __init__() = rand()
+        end
+    end
+end
+
+empty!(Core.ARGS)
+empty!(Base.ARGS)
+empty!(LOAD_PATH)
+empty!(DEPOT_PATH)
+empty!(Base.TOML_CACHE.d)
+Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+Base.ACTIVE_PROJECT[] = nothing
+@eval Base begin
+    PROGRAM_FILE = ""
+end
+@eval Sys begin
+    BINDIR = ""
+    STDLIB = ""
+end
diff --git a/contrib/juliac.jl b/contrib/juliac.jl
new file mode 100644
index 0000000000000..20d56615c6357
--- /dev/null
+++ b/contrib/juliac.jl
@@ -0,0 +1,109 @@
+# Julia compiler wrapper script
+# NOTE: The interface and location of this script are considered unstable/experimental
+
+cmd = Base.julia_cmd()
+cmd = `$cmd --startup-file=no --history-file=no`
+output_type = nothing  # exe, sharedlib, sysimage
+outname = nothing
+file = nothing
+add_ccallables = false
+verbose = false
+
+help = findfirst(x->x == "--help", ARGS)
+if help !== nothing
+    println(
+        """
+        Usage: julia juliac.jl [--output-exe | --output-lib | --output-sysimage] <name> [options] <file.jl>
+        --experimental --trim=<no,safe,unsafe,unsafe-warn>  Only output code statically determined to be reachable
+        --compile-ccallable  Include all methods marked `@ccallable` in output
+        --verbose            Request verbose output
+        """)
+    exit(0)
+end
+
+# arguments to forward to julia compilation process
+julia_args = []
+
+let i = 1
+    while i <= length(ARGS)
+        arg = ARGS[i]
+        if arg == "--output-exe" || arg == "--output-lib" || arg == "--output-sysimage"
+            isnothing(output_type) || error("Multiple output types specified")
+            global output_type = arg
+            i == length(ARGS) && error("Output specifier requires an argument")
+            global outname = ARGS[i+1]
+            i += 1
+        elseif arg == "--compile-ccallable"
+            global add_ccallables = true
+        elseif arg == "--verbose"
+            global verbose = true
+        elseif startswith(arg, "--trim") || arg == "--experimental"
+            # forwarded args
+            push!(julia_args, arg)
+        else
+            if arg[1] == '-' || !isnothing(file)
+                println("Unexpected argument `$arg`")
+                exit(1)
+            end
+            global file = arg
+        end
+        i += 1
+    end
+end
+
+isnothing(outname) && error("No output file specified")
+isnothing(file) && error("No input file specified")
+
+absfile = abspath(file)
+cflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --cflags `)
+cflags = Base.shell_split(cflags)
+allflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --allflags`)
+allflags = Base.shell_split(allflags)
+tmpdir = mktempdir(cleanup=false)
+initsrc_path = joinpath(tmpdir, "init.c")
+init_path = joinpath(tmpdir, "init.a")
+img_path = joinpath(tmpdir, "img.a")
+bc_path = joinpath(tmpdir, "img-bc.a")
+
+open(initsrc_path, "w") do io
+    print(io, """
+              #include <julia.h>
+              __attribute__((constructor)) void static_init(void) {
+                  if (jl_is_initialized())
+                      return;
+                  julia_init(JL_IMAGE_IN_MEMORY);
+                  jl_exception_clear();
+              }
+              """)
+end
+
+cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $julia_args $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
+verbose && println("Running: $cmd")
+if !success(pipeline(cmd; stdout, stderr))
+    println(stderr, "\nFailed to compile $file")
+    exit(1)
+end
+
+run(`cc $(cflags) -g -c -o $init_path $initsrc_path`)
+
+if output_type == "--output-lib" || output_type == "--output-sysimage"
+    of, ext = splitext(outname)
+    soext = "." * Base.BinaryPlatforms.platform_dlext()
+    if ext == ""
+        outname = of * soext
+    end
+end
+
+julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal")
+try
+    if output_type == "--output-lib"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path  $(julia_libs)`)
+    elseif output_type == "--output-sysimage"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE)             $(julia_libs)`)
+    else
+        run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`)
+    end
+catch
+    println("\nCompilation failed.")
+    exit(1)
+end
diff --git a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
index db2f13b485189..1d20d6ed3efa1 100644
--- a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
+++ b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
@@ -51,7 +51,7 @@ + (ExecSandboxController *)sharedController {
 
 @end
 
-/// Location of an installed variant of Julia (frameowrk or nix hier).
+/// Location of an installed variant of Julia (framework or nix hier).
 @interface JuliaVariant : NSObject
 @property(readonly, nullable) NSBundle *bundle;
 @property(readonly, nonnull) NSURL *juliaexe;
diff --git a/contrib/mac/frameworkapp/README.md b/contrib/mac/frameworkapp/README.md
index 94c344d16564f..953ad115c94e1 100644
--- a/contrib/mac/frameworkapp/README.md
+++ b/contrib/mac/frameworkapp/README.md
@@ -4,23 +4,23 @@ New Julia Launcher App
 This builds the Julia framework and a launcher app and packages them in a
 product archive for the macOS Installer.
 
-Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive.  The
+Run `make APPLE_DEVELOPMENT_TEAM=xxxxxxxxxx` to build the product archive. The
 resulting archive may be installed to the home directory with
 `installer -pkg~/Documents/pkgs/Julia-1.1.0.pkg -target CurrentUserHomeDirectory`.
-To just build the app, build the `appexport` make target.  Read the comments at
+To just build the app, build the `appexport` make target. Read the comments at
 the top of the `Makefile` to set appropriate code signing parameters.
 
 The framework is installed in `/Library/Frameworks` and the app in
-`/Applications`.  Installation may be system-wide (i.e., relative to `/`) or
+`/Applications`. Installation may be system-wide (i.e., relative to `/`) or
 local to the user's home directory (i.e., `$Home/Applications/Julia.app`).
 
 The `julia` binary is embedded in the framework at
 `Julia.framework/Helpers/julia`.
 
-Multiple versions of Julia may be installed at once.  Each version is placed in
-the `Julia.framework/Versions` directory.  By default, the version is
+Multiple versions of Julia may be installed at once. Each version is placed in
+the `Julia.framework/Versions` directory. By default, the version is
 identified by the Major.Minor version number but may be customized by setting
-the `FRAMEWORK_VERSION` make variable.  The resulting product archive will not
-overwrite other versions but will upgrade a version if it exists.  Thus, the
+the `FRAMEWORK_VERSION` make variable. The resulting product archive will not
+overwrite other versions but will upgrade a version if it exists. Thus, the
 `1.1` framework version that is actually the 3rd patch (1.1.3) will overwrite
 any existing `1.1` framework version.
diff --git a/contrib/normalize_triplet.py b/contrib/normalize_triplet.py
index 77c047b360b76..833b725480996 100755
--- a/contrib/normalize_triplet.py
+++ b/contrib/normalize_triplet.py
@@ -14,11 +14,13 @@
     'i686': "i\\d86",
     'aarch64': "(arm|aarch)64",
     'armv7l': "arm(v7l)?",
+    'riscv64': "(rv64|riscv64)",
     'powerpc64le': "p(ower)?pc64le",
 }
 platform_mapping = {
     'darwin': "-apple-darwin[\\d\\.]*",
     'freebsd': "-(.*-)?freebsd[\\d\\.]*",
+    'openbsd': "-(.*-)?openbsd[\\d\\.]*",
     'windows': "-w64-mingw32",
     'linux': "-(.*-)?linux",
 }
@@ -96,6 +98,7 @@ def p(x):
         'darwin': 'apple-darwin',
         'windows': 'w64-mingw32',
         'freebsd': 'unknown-freebsd',
+        'openbsd': 'unknown-openbsd',
     }
     x = r(x)
     if x:
diff --git a/contrib/pgo-lto-bolt/.gitignore b/contrib/pgo-lto-bolt/.gitignore
new file mode 100644
index 0000000000000..1b29279acc0da
--- /dev/null
+++ b/contrib/pgo-lto-bolt/.gitignore
@@ -0,0 +1,14 @@
+stage0*
+stage1*
+stage2*
+bolt
+bolt_instrument
+merge_data
+copy_originals
+
+profiles
+profiles-bolt
+
+toolchain
+pgo-instrumented.build
+optimized.build
diff --git a/contrib/pgo-lto-bolt/Makefile b/contrib/pgo-lto-bolt/Makefile
new file mode 100644
index 0000000000000..ce1b8b04f68c9
--- /dev/null
+++ b/contrib/pgo-lto-bolt/Makefile
@@ -0,0 +1,190 @@
+.PHONY: clean clean_profiles restore_originals
+
+# See the makefiles in contrib/bolt and contrib/pgo-lto for more information.
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/pgo-instrumented.build
+STAGE2_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+BOLT_PROFILE_DIR:=$(CURDIR)/profiles-bolt
+PGO_PROFILE_DIR:=$(CURDIR)/profiles
+PGO_PROFILE_FILE:=$(PGO_PROFILE_DIR)/merged.prof
+PGO_PROFRAW_FILES:=$(wildcard $(PGO_PROFILE_DIR)/*.profraw)
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage2` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage2`. $\
+	You should end up with some data in $(BOLT_PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+TOOLCHAIN_FLAGS = $\
+	"CC=$(STAGE0_TOOLS)clang" $\
+	"CXX=$(STAGE0_TOOLS)clang++" $\
+	"LD=$(STAGE0_TOOLS)ld.lld" $\
+	"AR=$(STAGE0_TOOLS)llvm-ar" $\
+	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
+	"CFLAGS+=$(PGO_CFLAGS)" $\
+	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
+	"LDFLAGS+=-fuse-ld=lld $(PGO_LDFLAGS)"
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools install-BOLT && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: PGO_CFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_LDFLAGS:=-flto=thin -fprofile-generate=$(PGO_PROFILE_DIR)
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+stage2: export USE_BINARYBUILDER_LLVM=0
+stage2: $(PGO_PROFILE_FILE) | $(STAGE2_BUILD)
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage2
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(BOLT_PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(BOLT_PROFILE_DIR)/$$file-prof"); \
+		touch -d "@$$old_time" $$abs_file; \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+finish_stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+finish_stage2: stage2
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS)
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(BOLT_PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		old_time=$$(stat -c %Y $$abs_file); \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE2_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+		touch -d "@$$old_time" $$abs_file; \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PGO_PROFILE_DIR) $(BOLT_PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PGO_PROFILE_FILE) bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
+
+$(PGO_PROFILE_FILE): stage1 $(PGO_PROFRAW_FILES)
+	$(LLVM_PROFDATA) merge -output=$@ $(PGO_PROFRAW_FILES)
+
+# show top 50 functions
+top: $(PGO_PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
diff --git a/contrib/pgo-lto-bolt/README.md b/contrib/pgo-lto-bolt/README.md
new file mode 100644
index 0000000000000..ab574907c292f
--- /dev/null
+++ b/contrib/pgo-lto-bolt/README.md
@@ -0,0 +1,18 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a PGO+LTO+BOLT version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make stage2
+make copy_originals
+make bolt_instrument
+make finish_stage2
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/pgo-lto/.gitignore b/contrib/pgo-lto/.gitignore
new file mode 100644
index 0000000000000..978d8f2ca86dd
--- /dev/null
+++ b/contrib/pgo-lto/.gitignore
@@ -0,0 +1,4 @@
+profiles
+stage0*
+stage1*
+stage2*
diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile
new file mode 100644
index 0000000000000..ddd86f5d5b39a
--- /dev/null
+++ b/contrib/pgo-lto/Makefile
@@ -0,0 +1,74 @@
+.PHONY: top clean clean-profiles
+
+STAGE0_BUILD:=$(CURDIR)/stage0.build
+STAGE1_BUILD:=$(CURDIR)/stage1.build
+STAGE2_BUILD:=$(CURDIR)/stage2.build
+
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+PROFILE_DIR:=$(CURDIR)/profiles
+PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\
+			CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\
+			CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)"
+STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\
+			CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\
+			CXXFLAGS="-fprofile-use=$(PROFILE_FILE)"
+
+COMMON_FLAGS:=USECLANG=1 USE_BINARYBUILDER_LLVM=0
+
+all: stage2 # Default target as first in file
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: | $(STAGE1_BUILD)
+	@echo "--- Build Julia Stage 1 - with instrumentation"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
+	@echo "--- Build Julia Stage 2 - PGO + LTO optimised"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@
+
+.DEFAULT: stage2
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@
+
+$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw)
+	$(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw
+
+# show top 50 functions
+top: $(PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
+
+clean-profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PROFILE_FILE)
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index f67088141ccd4..5a787b0b67cb1 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -19,12 +19,12 @@ all: checksum pack-checksum
 # Get this list via:
 #    using BinaryBuilder
 #    print("TRIPLETS=\"$(join(sort(triplet.(BinaryBuilder.supported_platforms(;experimental=true))), " "))\"")
-TRIPLETS=aarch64-apple-darwin aarch64-linux-gnu aarch64-linux-musl armv6l-linux-gnueabihf armv6l-linux-musleabihf armv7l-linux-gnueabihf armv7l-linux-musleabihf i686-linux-gnu i686-linux-musl i686-w64-mingw32 powerpc64le-linux-gnu x86_64-apple-darwin x86_64-linux-gnu x86_64-linux-musl x86_64-unknown-freebsd x86_64-w64-mingw32
+TRIPLETS=aarch64-apple-darwin aarch64-linux-gnu aarch64-linux-musl aarch64-unknown-freebsd armv6l-linux-gnueabihf armv6l-linux-musleabihf armv7l-linux-gnueabihf armv7l-linux-musleabihf i686-linux-gnu i686-linux-musl i686-w64-mingw32 powerpc64le-linux-gnu riscv64-linux-gnu x86_64-apple-darwin x86_64-linux-gnu x86_64-linux-musl x86_64-unknown-freebsd x86_64-w64-mingw32
 CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
 NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
 
 # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient
+BB_PROJECTS=openssl libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient
 BB_GCC_EXPANDED_PROJECTS=openblas csl
 BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld
 # These are non-BB source-only deps
diff --git a/etc/write_base_cache.jl b/contrib/write_base_cache.jl
similarity index 100%
rename from etc/write_base_cache.jl
rename to contrib/write_base_cache.jl
diff --git a/deps/BOLT.mk b/deps/BOLT.mk
new file mode 100644
index 0000000000000..34391ab10f716
--- /dev/null
+++ b/deps/BOLT.mk
@@ -0,0 +1,118 @@
+## BOLT ##
+include $(SRCDIR)/BOLT.version
+
+ifneq ($(USE_BINARYBUILDER_BOLT), 1)
+BOLT_GIT_URL:=https://github.com/llvm/llvm-project.git
+BOLT_TAR_URL=https://api.github.com/repos/llvm/llvm-project/tarball/$1
+$(eval $(call git-external,BOLT,BOLT,CMakeLists.txt,,$(SRCCACHE)))
+
+BOLT_BUILDDIR := $(BUILDDIR)/$(BOLT_SRC_DIR)/build
+
+LLVM_ENABLE_PROJECTS := bolt
+
+LLVM_CFLAGS :=
+LLVM_CXXFLAGS :=
+LLVM_CPPFLAGS :=
+LLVM_LDFLAGS :=
+LLVM_CMAKE :=
+
+LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
+
+# Otherwise LLVM will translate \\ to / on mingw
+LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
+
+# Allow adding LLVM specific flags
+LLVM_CFLAGS += $(CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CXXFLAGS += $(LLVM_CXXFLAGS)
+LLVM_CPPFLAGS += $(CPPFLAGS)
+LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(LLVM_LDFLAGS)
+LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING=host -DCMAKE_BUILD_TYPE=Release
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
+
+ifeq ($(OS), WINNT)
+LLVM_CPPFLAGS += -D__USING_SJLJ_EXCEPTIONS__ -D__CRT__NO_INLINE
+endif # OS == WINNT
+ifneq ($(HOSTCC),$(CC))
+LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE="-DCMAKE_C_COMPILER=$$(which $(HOSTCC));-DCMAKE_CXX_COMPILER=$$(which $(HOSTCXX))"
+
+# Defaults to off when crosscompiling, starting from LLVM 18
+LLVM_CMAKE += -DBOLT_ENABLE_RUNTIME=ON
+endif
+ifeq ($(OS), emscripten)
+LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
+endif # OS == emscripten
+
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ifeq (${USECLANG},0)
+LLVM_CXXFLAGS += -mminimal-toc
+endif
+endif
+
+ifeq ($(fPIC),)
+LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
+endif
+
+LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
+	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
+ifeq ($(OS),Darwin)
+# Explicitly use the default for -mmacosx-version-min=10.9 and later
+LLVM_CMAKE += -DLLVM_ENABLE_LIBCXX=ON
+endif
+
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BOLT_BUILDDIR)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+$(BOLT_BUILDDIR)/build-configured: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) $(SRCCACHE)/$(BOLT_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-compiled: $(BOLT_BUILDDIR)/build-configured
+	cd $(BOLT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target bolt)
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-checked: $(BOLT_BUILDDIR)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	cd $(BOLT_BUILDDIR) && \
+		  $(CMAKE) --build . --target check-bolt
+endif
+	echo 1 > $@
+
+BOLT_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P tools/bolt/cmake_install.cmake
+
+$(eval $(call staged-install, \
+	bolt,$$(BOLT_SRC_DIR)/build, \
+	BOLT_INSTALL,,,))
+
+clean-bolt:
+	-rm -f $(BOLT_BUILDDIR)/build-configured $(BOLT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(BOLT_BUILDDIR) clean
+
+get-bolt: $(BOLT_SRC_FILE)
+extract-bolt: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+configure-bolt: $(BOLT_BUILDDIR)/build-configured
+compile-bolt: $(BOLT_BUILDDIR)/build-compiled
+fastcheck-bolt: #none
+check-bolt: $(BOLT_BUILDDIR)/build-checked
+
+else # USE_BINARYBUILDER_BOLT
+
+$(eval $(call bb-install,BOLT,BOLT,false,true))
+
+endif # USE_BINARYBUILDER_BOLT
diff --git a/deps/BOLT.version b/deps/BOLT.version
new file mode 100644
index 0000000000000..6a785041e163f
--- /dev/null
+++ b/deps/BOLT.version
@@ -0,0 +1,11 @@
+# -*- makefile -*-
+
+BOLT_VER := 18.1.4
+BOLT_JLL_VER := 18.1.4+0
+
+## jll artifact
+BOLT_JLL_NAME := BOLT
+
+## source build
+BOLT_BRANCH=llvmorg-$(BOLT_VER)
+BOLT_SHA1=e6c3289804a67ea0bb6a86fadbe454dd93b8d855
diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk
index e9cc0c942dbe0..4a8afa8fbd53c 100644
--- a/deps/JuliaSyntax.mk
+++ b/deps/JuliaSyntax.mk
@@ -4,7 +4,7 @@ $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC
 	@# no build steps
 	echo 1 > $@
 
-$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(JULIAHOME)/base))
+$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(BUILDROOT)/base))
 
 clean-JuliaSyntax:
 	-rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
@@ -12,5 +12,5 @@ get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE)
 extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted
 configure-JuliaSyntax: extract-JuliaSyntax
 compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
-fastcheck-JuliSyntax: check-JuliSyntax
-check-JuliSyntax: compile-JuliSyntax
+fastcheck-JuliaSyntax: check-JuliaSyntax
+check-JuliaSyntax: compile-JuliaSyntax
diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version
index b604eedaa43dd..86f94135884a0 100644
--- a/deps/JuliaSyntax.version
+++ b/deps/JuliaSyntax.version
@@ -1,4 +1,4 @@
 JULIASYNTAX_BRANCH = main
-JULIASYNTAX_SHA1 = 8731bab86f14762cca8cf24224d8c7a6a89c21c5
+JULIASYNTAX_SHA1 = dfd1d69b153eb119873035e62993a109b27192f0
 JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git
 JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1
diff --git a/deps/Makefile b/deps/Makefile
index 27f5fdbb693d5..396b1021c2ddd 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -22,11 +22,11 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # additionally all targets should be listed in the getall target for easier off-line compilation
 # if you are adding a new target, it can help to copy an similar, existing target
 #
-# autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
+# autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl openssl
 # custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
-# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls libtracyclient
+# CMake libs: llvm llvmunwind libgit2 libssh2 libtracyclient
 #
-# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient
+# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient, mmtk_julia
 #
 # to debug 'define' rules, replace eval at the usage site with info or error
 
@@ -64,13 +64,15 @@ ifeq ($(OS), Linux)
 DEP_LIBS += unwind
 else ifeq ($(OS), FreeBSD)
 DEP_LIBS += unwind
+else ifeq ($(OS), OpenBSD)
+DEP_LIBS += llvmunwind
 else ifeq ($(OS), Darwin)
 DEP_LIBS += llvmunwind
 endif
 endif
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 ifeq ($(USE_SYSTEM_PATCHELF), 0)
 DEP_LIBS += patchelf
 PATCHELF:=$(build_depsbindir)/patchelf
@@ -117,9 +119,8 @@ ifeq ($(USE_SYSTEM_GMP), 0)
 DEP_LIBS += gmp
 endif
 
-ifeq ($(USE_SYSTEM_LIBGIT2), 0)
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-DEP_LIBS += mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+DEP_LIBS += openssl
 endif
 
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
@@ -134,18 +135,19 @@ ifeq ($(USE_SYSTEM_CURL), 0)
 DEP_LIBS += curl
 endif
 
+ifeq ($(USE_SYSTEM_LIBGIT2), 0)
 DEP_LIBS += libgit2
-endif # USE_SYSTEM_LIBGIT2
+endif
 
 ifeq ($(USE_SYSTEM_MPFR), 0)
 DEP_LIBS += mpfr
 endif
 
-ifeq ($(USE_GPL_LIBS), 1)
+# Only some of the modules in SuiteSparse are GPL.
+# xref: `remove-libsuitesparse-gpl-lib` in libsuitesparse.mk
 ifeq ($(USE_SYSTEM_LIBSUITESPARSE), 0)
 DEP_LIBS += libsuitesparse
 endif
-endif
 
 ifeq ($(USE_SYSTEM_UTF8PROC), 0)
 DEP_LIBS += utf8proc
@@ -169,6 +171,13 @@ ifeq ($(WITH_ITTAPI),1)
 DEP_LIBS += ittapi
 endif
 
+ifeq ($(WITH_NVTX),1)
+DEP_LIBS += nvtx
+endif
+
+ifneq ($(WITH_TERMINFO),0)
+DEP_LIBS += terminfo
+endif
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -186,13 +195,18 @@ DEP_LIBS += libwhich
 endif
 endif
 
+ifneq (${MMTK_PLAN},None)
+DEP_LIBS += mmtk_julia
+endif
+
 DEP_LIBS_STAGED := $(DEP_LIBS)
 
 # list all targets
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
-	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax
+	objconv openssl libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
+	sanitizers libsuitesparse lld libtracyclient ittapi nvtx JuliaSyntax \
+	terminfo mmtk_julia
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -226,9 +240,11 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 	rm -rf $(build_prefix)
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
+include $(SRCDIR)/BOLT.mk
 include $(SRCDIR)/csl.mk
 include $(SRCDIR)/sanitizers.mk
 include $(SRCDIR)/ittapi.mk
+include $(SRCDIR)/nvtx.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -244,7 +260,7 @@ include $(SRCDIR)/unwind.mk
 include $(SRCDIR)/gmp.mk
 include $(SRCDIR)/mpfr.mk
 include $(SRCDIR)/patchelf.mk
-include $(SRCDIR)/mbedtls.mk
+include $(SRCDIR)/openssl.mk
 include $(SRCDIR)/libssh2.mk
 include $(SRCDIR)/nghttp2.mk
 include $(SRCDIR)/curl.mk
@@ -252,6 +268,10 @@ include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
 include $(SRCDIR)/libtracyclient.mk
+include $(SRCDIR)/terminfo.mk
+
+# MMTk
+include $(SRCDIR)/mmtk_julia.mk
 
 # vendored Julia libs
 include $(SRCDIR)/JuliaSyntax.mk
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index bd1cb65c6ae2d..cfa28a4d8b88f 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -16,16 +16,16 @@ $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPO
 BLASTRAMPOLINE_BUILD_ROOT := $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
 	cd $(dir $@)/src && $(MAKE) $(BLASTRAMPOLINE_BUILD_OPTS)
-ifeq ($(OS), WINNT)
-	# Windows doesn't like soft link, use hard link
-	cd $(BLASTRAMPOLINE_BUILD_ROOT)/build/ && \
-		cp -f --dereference --link libblastrampoline.dll libblastrampoline.dll
-endif
 	echo 1 > $@
 
 define BLASTRAMPOLINE_INSTALL
 	$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) install $(BLASTRAMPOLINE_BUILD_OPTS) DESTDIR="$2"
 endef
+ifeq ($(OS), WINNT)
+# Windows doesn't like soft link, use hard link to copy file without version suffix
+BLASTRAMPOLINE_INSTALL += && cd $2$$(build_prefix)/bin && \
+$$(WIN_MAKE_HARD_LINK) libblastrampoline-*.dll libblastrampoline.dll
+endif
 $(eval $(call staged-install, \
 	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
 	BLASTRAMPOLINE_INSTALL,, \
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 616300377e3e6..1e4a75305a4dd 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 ## source build
-BLASTRAMPOLINE_VER := 5.8.0
-BLASTRAMPOLINE_BRANCH=v5.8.0
-BLASTRAMPOLINE_SHA1=81316155d4838392e8462a92bcac3eebe9acd0c7
+BLASTRAMPOLINE_VER := 5.12.0
+BLASTRAMPOLINE_BRANCH=v5.12.0
+BLASTRAMPOLINE_SHA1=b127bc8dd4758ffc064340fff2aef4ead552f386
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
deleted file mode 100644
index 915ee5c4bb6bf..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-22c097ca7784442f1f10733db7961cc3
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
deleted file mode 100644
index b824dbcb73a08..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-915791ab9837f09db428060bd128e182dda38c8dc10e13f32f059eb8e8b477548e8ae2cd691522f98c88c510b78b2693018264b62d9cc76d5005ea8104d1539a
diff --git a/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/md5 b/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/md5
new file mode 100644
index 0000000000000..e172379604478
--- /dev/null
+++ b/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/md5
@@ -0,0 +1 @@
+d3209f45b8ea01a22ac7e9b265e3b84f
diff --git a/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/sha512 b/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/sha512
new file mode 100644
index 0000000000000..991a457654113
--- /dev/null
+++ b/deps/checksums/ArgTools-1314758ad02ff5e9e5ca718920c6c633b467a84a.tar.gz/sha512
@@ -0,0 +1 @@
+314981eee11356f14b6dc9e07389c51432e7862d6c767d87d6679385f5a36faef34902954a5dfa6b37d8f3f25eaa4f23ba9431cc78acd3513377955e7d73f210
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
new file mode 100644
index 0000000000000..62e63ff3174d6
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
@@ -0,0 +1 @@
+c12540d5889cef05bc87183a4ce5a54c
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
new file mode 100644
index 0000000000000..0635e180ac9a5
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
@@ -0,0 +1 @@
+61cc7cc42b925f37502eed0d31eafadbfdc24a9ebc892c9b8d96a27b004cbccf2e5da7face5c8d9c9db57fac1b5cf662d890a67337436c5d4aa3373256638ab1
diff --git a/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/md5 b/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/md5
new file mode 100644
index 0000000000000..e1c0f9e87b7c7
--- /dev/null
+++ b/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/md5
@@ -0,0 +1 @@
+98b8b8bc0ea4bf24c4b2986a5b7ae3e9
diff --git a/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/sha512 b/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/sha512
new file mode 100644
index 0000000000000..ed816ebc21e97
--- /dev/null
+++ b/deps/checksums/Distributed-c6136853451677f1957bec20ecce13419cde3a12.tar.gz/sha512
@@ -0,0 +1 @@
+4043933825bf716f2733f8e90632de34a95a437f3b31cda92edd510ffee208f8e374ec3c5922c8142342ae21b4ec4cbd1ecd4036b9057056a12c86169632ac7b
diff --git a/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/md5 b/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/md5
new file mode 100644
index 0000000000000..221a62b1cf231
--- /dev/null
+++ b/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/md5
@@ -0,0 +1 @@
+cdaea923f7fa855409e8456159251f54
diff --git a/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/sha512 b/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/sha512
new file mode 100644
index 0000000000000..b537ef2e9e1f6
--- /dev/null
+++ b/deps/checksums/Downloads-e692e77fb5427bf3c6e81514b323c39a88217eec.tar.gz/sha512
@@ -0,0 +1 @@
+e893fbe079a433c3038b79c4c2998d1ae9abaf92ff74152820a67e97ffee6f7f052085a7108410cbb1a3bd8cc6670736b0827c8b0608cc31941251dd6500d36a
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
deleted file mode 100644
index 4e70641a4a08b..0000000000000
--- a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fa2c90db0e7aa73186c491aa2f03bb2b
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
deleted file mode 100644
index 3f54f39d35ac6..0000000000000
--- a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d36737b946af5e720402ce4f25e4c69c740bdbdc174385d6448c3660b26fffe34c14af7c4dd4d26ad864ad12771cabdf922c8b3cf4423167a46cdf3001ede125
diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5
deleted file mode 100644
index 8bec9dde7fbae..0000000000000
--- a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6fdeb9332af478502be39af642027387
diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512
deleted file mode 100644
index 50c676f808c5c..0000000000000
--- a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fbb4ab0b99de7e1f86b918b401c2d42883a2bf8e80f6af4d6b85b7ca263d97cca1c47b25aca48359f14dee91b658684c0c590b7f20240bd9e0ce6e960ccf6647
diff --git a/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/md5 b/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/md5
new file mode 100644
index 0000000000000..51b30461d3905
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/md5
@@ -0,0 +1 @@
+e58559668aabb0fa96d598970c4d648e
diff --git a/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/sha512 b/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/sha512
new file mode 100644
index 0000000000000..63a513ec9ae63
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-dfd1d69b153eb119873035e62993a109b27192f0.tar.gz/sha512
@@ -0,0 +1 @@
+59e22f7db63a383beadf96a68d4db6ae173d61be6d766ea1792b3a3bd70125f73dd4df9e55bad4c66363aa0b6ff6ea5259d3c91abf42f5fe34446e3fa076cc87
diff --git a/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/md5
new file mode 100644
index 0000000000000..a86f3fe9c5561
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/md5
@@ -0,0 +1 @@
+401bb32ca43a8460d6790ee80e695bb5
diff --git a/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/sha512
new file mode 100644
index 0000000000000..6e54aef5fd34f
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-19bd57b89c648592155156049addf67e0638eab1.tar.gz/sha512
@@ -0,0 +1 @@
+db2c732d3343f5a8770b3516cdd900587d497feab2259a937d354fac436ab3cb099b0401fb4e05817e75744fb9877ab69b1e4879d8a710b33b69c95b7e58d961
diff --git a/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5 b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5
new file mode 100644
index 0000000000000..48bd7a8a7fa25
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/md5
@@ -0,0 +1 @@
+405faa2237105ff823e80e759b2df17a
diff --git a/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512 b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512
new file mode 100644
index 0000000000000..9fa6aec4d1939
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e4cfc39598c238f75bdfdbdb3f82c9329a5af59c.tar.gz/sha512
@@ -0,0 +1 @@
+9bd2bdd5a83df28a26ebfb0d4e59b50584962e07b1364e6fd76bc7a6a7b109f1facaa04366beaa9f340192ea9efa540decde1393ddd50dc3efa13937deeb5d7f
diff --git a/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/md5 b/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/md5
new file mode 100644
index 0000000000000..5bd44506fd874
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/md5
@@ -0,0 +1 @@
+eb4df255412ad9a05b807010f626afc8
diff --git a/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/sha512 b/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/sha512
new file mode 100644
index 0000000000000..23617698dd26e
--- /dev/null
+++ b/deps/checksums/LinearAlgebra-1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8.tar.gz/sha512
@@ -0,0 +1 @@
+3b4bf7b761d9585fb2d5c5b8418770be4d1d4399a5f25dd5b2e08785506f0732c8e140ada6f82f6d8a7a77a2c2f79e2feecd6eb0e19eda0c3ee519ba554c19ec
diff --git a/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/md5 b/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/md5
new file mode 100644
index 0000000000000..87111ac121562
--- /dev/null
+++ b/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/md5
@@ -0,0 +1 @@
+b851cab503506c37af6e4c861d81b8ce
diff --git a/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/sha512 b/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/sha512
new file mode 100644
index 0000000000000..79f9e269ff599
--- /dev/null
+++ b/deps/checksums/NetworkOptions-c090626d3feee6d6a5c476346d22d6147c9c6d2d.tar.gz/sha512
@@ -0,0 +1 @@
+4cba5c531e5e7205bb6d7f0179da8b29ca7c4dcf42f27de5f70be7674efc1fa92ea22e134e6584743e2905edbd754838d8a02f6ba7811c7a5b99ab9db3bde596
diff --git a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5 b/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5
deleted file mode 100644
index 9e91b76f9a3c8..0000000000000
--- a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-16bc9f2eefa3021e19a09ffefc84159b
diff --git a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512 b/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512
deleted file mode 100644
index 551f7c8da347c..0000000000000
--- a/deps/checksums/NetworkOptions-f7bbeb66f05fc651adb12758b650e8630a998fbd.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5b53c09343e25b5bde7ea12c2119da656040ca5f62ce934f00f57945ce73dfaf26522da6a9a007ba06ac6fd75a285cbcbdf5edaf9113faa7bba0398294fbd684
diff --git a/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/md5 b/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/md5
new file mode 100644
index 0000000000000..5180b5f916d1b
--- /dev/null
+++ b/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/md5
@@ -0,0 +1 @@
+2332986e216728bc85e364994f2ed910
diff --git a/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/sha512 b/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/sha512
new file mode 100644
index 0000000000000..04bc79171c734
--- /dev/null
+++ b/deps/checksums/Pkg-bc9fb21b1f2d72038491eff938673fc5fbc99445.tar.gz/sha512
@@ -0,0 +1 @@
+99bf03f921ae79767009dbd68a94a7119513b2454d5c9832b157bc1e092a35a6b90cb7a5d81346a7d927f9b0275328582098ca6b8b376b4a406ddf0b3167a280
diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5
deleted file mode 100644
index 8710722b5409c..0000000000000
--- a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f0e62f7b63dc9400caa2fec1b91b7889
diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512
deleted file mode 100644
index c92e62d861633..0000000000000
--- a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e48ee01791f58d41715fd44e16238d835315e930d3ef529dd3f3b5660935f7f0ca2c5163ec9c4e4d90e4ead5328f39e0bfffa88223c2094c8727460eac022cc1
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
deleted file mode 100644
index f682cf3518658..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-de53629eb0b1ce98ac6b245bdbf14e9d
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
deleted file mode 100644
index 870098ef7aada..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-71cdc58b03cc4f42f8c4b9c2353d6f94d77b4ac5c9d374387d435c57ba85e966f3be4e8c8447b34e184cb8e665c42b3cd2c9d9742c86f7fb5c71a85df5087966
diff --git a/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/md5 b/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/md5
new file mode 100644
index 0000000000000..52e05f5e427ae
--- /dev/null
+++ b/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/md5
@@ -0,0 +1 @@
+e52615827242aae56422a4f73a8c6878
diff --git a/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/sha512 b/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/sha512
new file mode 100644
index 0000000000000..e6b8446587554
--- /dev/null
+++ b/deps/checksums/SHA-8fa221ddc8f3b418d9929084f1644f4c32c9a27e.tar.gz/sha512
@@ -0,0 +1 @@
+7b1df257616aaa9067f822a88dddf52bc10f9f61e3a0728e33e595455bd7167e680c50371c41cb25f8c8a9fb9cf40225847df1523a6c6f3571a471f7163f563c
diff --git a/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/md5 b/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/md5
new file mode 100644
index 0000000000000..946bec189c1bd
--- /dev/null
+++ b/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/md5
@@ -0,0 +1 @@
+4b07db52a5a6d3cc6eeab380bd783a1e
diff --git a/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/sha512 b/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/sha512
new file mode 100644
index 0000000000000..846867193d932
--- /dev/null
+++ b/deps/checksums/SparseArrays-5f527215c188ee99247cdce31ba8ce9e11f35055.tar.gz/sha512
@@ -0,0 +1 @@
+4c631f1046ad0a6b972a4dce285c2092372ecbed269c83524c10b4be5124035670d703af53e1f8058d23230be20c06aa554097cc9bc7a12b3de3c039d3c545e8
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
deleted file mode 100644
index 96861ba265b5f..0000000000000
--- a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e6dc511b49e07a167848adc4e12690d8
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
deleted file mode 100644
index f503304f810e4..0000000000000
--- a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f40fd137ccd6651fc8b697f57cfcbd8e3feccb99f6a6b32fbaa69cc0160b78cefc662b914ff8f4e48478ca48f9583318a6030d922d43ed66f8db59fd5985f768
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
deleted file mode 100644
index 7e7a889eecd29..0000000000000
--- a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6564297a5f5971231809bf9940f68b98
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
deleted file mode 100644
index bbe9b8bed6371..0000000000000
--- a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-22d14c82a30f3ec7af09028423cc823808abf86918d5707fd1fcf6ca20dea7871589da9b22e462d194e86fcee380f549aeb65f585048f00bf23281786b17e040
diff --git a/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/md5 b/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/md5
new file mode 100644
index 0000000000000..3956c67f7fd47
--- /dev/null
+++ b/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/md5
@@ -0,0 +1 @@
+acf2bb0ea30132602e172e2f5f6274b4
diff --git a/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/sha512 b/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/sha512
new file mode 100644
index 0000000000000..051f2d0a862c3
--- /dev/null
+++ b/deps/checksums/Statistics-d49c2bf4f81e1efb4980a35fe39c815ef8396297.tar.gz/sha512
@@ -0,0 +1 @@
+5e879fe79bae19b62f81659a102602271c73a424faf4be069ab31fb50e30b536a8c7b3692127763000cc1dbab69c93ac3da7bace5f093d05dce2d652fb221d52
diff --git a/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/md5 b/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/md5
new file mode 100644
index 0000000000000..0fd8e8966e068
--- /dev/null
+++ b/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/md5
@@ -0,0 +1 @@
+411277f3701cc3e286ec8a84ccdf6f11
diff --git a/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/sha512 b/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/sha512
new file mode 100644
index 0000000000000..0b495aefef55d
--- /dev/null
+++ b/deps/checksums/StyledStrings-8985a37ac054c37d084a03ad2837208244824877.tar.gz/sha512
@@ -0,0 +1 @@
+95a7e92389f6fd02d3bec17ec0201ba41316aa2d7c321b14af88ccce8246fd0000ed2c0cc818f87cb81f7134304233db897f656426a00caac1bc7635056260c2
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
new file mode 100644
index 0000000000000..2f81a0d9191b5
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
@@ -0,0 +1 @@
+46541001073d1c3c85e18d910f8308f3
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
new file mode 100644
index 0000000000000..e2eb44845e276
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
@@ -0,0 +1 @@
+f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
new file mode 100644
index 0000000000000..3c7510a592760
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
@@ -0,0 +1 @@
+1d606dfc60d2af892009213650169129
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
new file mode 100644
index 0000000000000..ec49e695cbb3a
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
@@ -0,0 +1 @@
+6e60d74d00ffc2e1a5a9c13f59b3e3fc4360e641b9f0e3e4797c8b524288e779397bd56a8e57f47d5a06d1e6f359c86917164ec7f6e0ac3d6e876dfa09d2b0c8
diff --git a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5 b/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5
deleted file mode 100644
index 40d52c2803746..0000000000000
--- a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-438818cad063d6808354a9b4aecd3001
diff --git a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512 b/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512
deleted file mode 100644
index 27c57c5051212..0000000000000
--- a/deps/checksums/Tar-ff55460f4d329949661a33e6c8168ce6d890676c.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f9a6e7757bbcca09a84d92ab3a2690a51612c318bdfd98bbb4ffcef56305b019029838e5f1483c9febafa7ecb5e735e68855bc82d04b593af04a446e32436145
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index 011b0f6e4704d..9e007f6055cf9 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,38 @@
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/md5/0478361eac783b99002b1ad985182f05
-blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/sha512/2489ce5770a9861889a2d07e61440ba4f233a92efd4a3544747f83320e0e7a229a8fe01553d99f5f1d98713316f2506daf0adb7d024a46e32b3de1bb2966d637
-libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/md5/a28837b9838fef2b3831de3278ec7949
-libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/sha512/111ac2fe5f8f8102f2f7c9e9e6aa1d1a12d2db941238c949ff8e64b30335e8b2f6ecce0d5f577879c231eb839c06e259302b709f3d34e94a97047bfa984222f6
-libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/md5/9e781a026e03118df81347fb90f10d45
-libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/sha512/89469f32a666efd46437351a8fb16758c35e5aecc563d202b480c10ddf9fa5350a5a321076b79b0a1a07ec2cea0b73aa5c28979cc382a198fa96cca0b5899d25
-libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/md5/b7acda2fdd157bbb183d0dd33643beef
-libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/sha512/cf4125a47334fe2ec0d5a4b11624b12e1366ec031500218f680ad5a53152b9d752c0c02a0b92d0e07f3eb21f2f8f58d0c587438a4869a72197bbd5e91531369d
-libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/eafabd99fb1287d495acb8efb8091fde
-libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/63ff4e6bc400fa8ee713a1c5ae4af0a8e152d49860c6f5e94a17e426ad9f780d41cc0f84d33c75ea5347af1a53f07fc012798d603b6a94ea39f37cfd651a0719
-libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/9788f74b375ef6b84c16c080f2be5bdd
-libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/f00ebf794927404e2294a2fbb759b1e3e57836c7f683525fac0b2ac570da2c75904e43f154cf76fce310a624f9b35fbd40e6c7757882bb6f30db790f4221a543
-libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/4492bace63d8274d68ecdaa735e47e99
-libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/8868283e6c5224b80145fdfd17f13f713053ba94e49c170f38f0cbf9f794185d7dec9c107ce65dc76121d3ac5b21d2f3857f619d8279bede86a906230ff59a71
-libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/d66b6ed1d4e5f6a130f36791063e651d
-libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/414ad07574a6e9aa670bbfea13eaea11da13129c9ccb4193cad708014c31493ff10ff427558b90cb16040fa64c8a325c2e375e3310c39fb37bb3e7fdb6a72a5f
-libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/md5/595199a3a01174cfa4d9ce3407bf30dc
-libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/sha512/02c3b0c3c0a411d5090a081f3bbbe38aaae40eaa5fe63d0690e0582e233cd9ce76483922557d4f65dc457e29a4e84d86ee5af20a60b082aec7bec4ca8607c1ca
-libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/md5/5832d0044842cb84f4e1e1b0a04b8205
-libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/sha512/d28954d0feef6a33fa0bfeb59acb68821222d36a4e353eaf41936ee2c9aace719c2d0f0b0f080eafe2baecc67a29de4cacc0446aac776bbb615c4426d35c9c8f
-libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/md5/89c07640b6c7ed719199b0cd0a570961
-libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/sha512/71241e83501ed473af0bf60a3223075e22a48788fdcf0ad5b2932861c89ec0741c61bf6a04c8a26e68b2f39d360b6009a79ea2502b5cccf28249738e7796be89
-libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/5f76f5c6a88c0caaa6419ba212f8cb94
-libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/785071e682075b2cebd992394e66169f4ee2db3a8e23affb88dc05d9abf55f49d597b2a7400a13c83ad106ad825b5ee666b01f8625e51aec267132573273991e
-libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/md5/21beb51d448bd22e4608a16b3f4fde05
-libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/sha512/620ba64d93ef416e483f813617aa313957282d8361f920b5444702fa911ff0051d1f8a8814b5fa0b082fd4dc77d96cb8b763937c786959bbc97cbb6131617152
-libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/md5/14c1045ba4d400f490ddea5343a46f04
-libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/sha512/0fdae83f4df93b28951521cf426736367f568c1e76fb68eea42b045cc9a288b6836abb3206a6d61e4f88adcf198553e911c45231aecb0f552e06de28eb3bec54
-libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/md5/59b110676fcb2fcfdcf670a5d435d555
-libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/sha512/57a5022e9fabc0637a29f3c32f6180cb4f6a90282191232e299df6cea5265b535e4a0af4fde15c8fe80e5a59edea0fae96dd3a510f5720ecd78e85a2a9ffbfe0
-libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/cb1c14b4f8754561c5eaf8502582f09a
-libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/d3b19a2a9b3dc674119590d920a2e99705de823e7d01a210485b31f8b1ce59253c4a70f2d8fb967f7fa05efb6ac376d94e79ffc6848607a366b2f0caa58b4208
-libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/md5/34fdc53745245887f968f420b2f02ed9
-libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/bbf478736b7bd57b340ccd5b6744d526a7a95fc524d30fdf9af6e9d79285641be26fae5f9e5302d71a5be76b05c379e969a829e259d8100ba9c6ce202b632b3d
+blastrampoline-b127bc8dd4758ffc064340fff2aef4ead552f386.tar.gz/md5/395f2035bcb52e886b55ac926a7bf183
+blastrampoline-b127bc8dd4758ffc064340fff2aef4ead552f386.tar.gz/sha512/9ae0fe2ca75dc0b2c784d5b7248caca29ed6d44258743ee2b32827032734757e9078dd6bcdf80a02b042deb5c7ca7b4e5be392be6700efde91427091fb53a03f
+libblastrampoline.v5.12.0+0.aarch64-apple-darwin.tar.gz/md5/9a18b39bb575d0112834992043d302c0
+libblastrampoline.v5.12.0+0.aarch64-apple-darwin.tar.gz/sha512/4e406b155149414d3e4fd5db49ab56a87ed13577ebb399eaf8a251692c0b84e639c6e1a4eb20863e2638c31add0241ca916e57f91bb5a4aed07e2c56cc580870
+libblastrampoline.v5.12.0+0.aarch64-linux-gnu.tar.gz/md5/e100e93f0d6a104fc66c9f78a67150c5
+libblastrampoline.v5.12.0+0.aarch64-linux-gnu.tar.gz/sha512/f7e0c379e32d8163dbb4919b77e9637e1b16cf26618b9260222cf985bfab9ca3f36bebccd0e8360af68db925035c82127ba85d46b4a6578961dde6a049c7cf93
+libblastrampoline.v5.12.0+0.aarch64-linux-musl.tar.gz/md5/814a79e8cfe8744ca5a2a722f007fcaa
+libblastrampoline.v5.12.0+0.aarch64-linux-musl.tar.gz/sha512/bc886b199500fc4245a95446d4c862fc636711e0875a9d5cf9aef661d819d00324adfd3e037d9c03e274be26034353d033fb041e7608ecef222e1d154f38337d
+libblastrampoline.v5.12.0+0.aarch64-unknown-freebsd.tar.gz/md5/9b9a7fe0e45a73009bb9f8044f4a27a2
+libblastrampoline.v5.12.0+0.aarch64-unknown-freebsd.tar.gz/sha512/51d52afb13e326ef4750bdcad800aaf3db2c9e068b4c38bd148e312c63358b2228b81d23626d18b8983534a8a6f24df1b64b4e7121779d2535574ea907bd18ba
+libblastrampoline.v5.12.0+0.armv6l-linux-gnueabihf.tar.gz/md5/1b6fd062d133b13e8efc63f08528fb51
+libblastrampoline.v5.12.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/78d525f425ee27068b94b94f89ef44a51ffac9f642ffe66e177434804e59b4ac3ba875190aceee386a8d740f7903e979e5b91f0973138d0fc7753061c6f5f26d
+libblastrampoline.v5.12.0+0.armv6l-linux-musleabihf.tar.gz/md5/506be2b7669aa171efcc541388cb5444
+libblastrampoline.v5.12.0+0.armv6l-linux-musleabihf.tar.gz/sha512/2975136376c3f61b8f227676c4e1368d1847d85ff469dddbc0a330635eac77c00072c7544ae4aa9981d16a4ab04d494be54fc951b434a56fbf14003c42626579
+libblastrampoline.v5.12.0+0.armv7l-linux-gnueabihf.tar.gz/md5/99403eae880f52aa97884143e2ca7215
+libblastrampoline.v5.12.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/986dfcf5fe3ac731df3c71eb6b0bf3d7525511952d22cc9128ff35e6fcb330acf69e897aeb97920ebabd1ccccd1dd6ce9b6c16d0dbf661d39a103ce5b477462f
+libblastrampoline.v5.12.0+0.armv7l-linux-musleabihf.tar.gz/md5/20adf8d2ef348f5362cb03e1a2780476
+libblastrampoline.v5.12.0+0.armv7l-linux-musleabihf.tar.gz/sha512/95068a3b5bcf17bd5f13373a2730a6508d3992f0aa83a91629527821cf038b9607327843cc44fb72730b63c01d3d70e2eb488eca8f48ed9444d7736f67745d02
+libblastrampoline.v5.12.0+0.i686-linux-gnu.tar.gz/md5/a56f833ad986fc3e9e64e5abdb16915f
+libblastrampoline.v5.12.0+0.i686-linux-gnu.tar.gz/sha512/d478b4981dc17afb8aa8625fdbb23139f1c3edaa9aaa179e70d274984a056147b2e65e9f473b007733d094369f448823c33aa95fadd228016ecf9dfbf17f06bb
+libblastrampoline.v5.12.0+0.i686-linux-musl.tar.gz/md5/8578119b3b3e84393e6324996e9506aa
+libblastrampoline.v5.12.0+0.i686-linux-musl.tar.gz/sha512/b546de6687755ce43680f312008a23a8f9df422603098807f33e2ae969c9e9de0ca32a3319067d4f8fa1f782f21b6465638cd59e4c86fc6261fb4180f0ed116f
+libblastrampoline.v5.12.0+0.i686-w64-mingw32.tar.gz/md5/b9e2800b8758d3fa0ac0597f738c399c
+libblastrampoline.v5.12.0+0.i686-w64-mingw32.tar.gz/sha512/e0aa0ee2a750cfe702e0bd5861e352f97f433f67444dbc6e5814055fb32f571de318f640ac670c91bad233f8af85f0421daef71b7768a710de5b15febee28b27
+libblastrampoline.v5.12.0+0.powerpc64le-linux-gnu.tar.gz/md5/bab2048857c7c1ba4a6c3d540b9275c6
+libblastrampoline.v5.12.0+0.powerpc64le-linux-gnu.tar.gz/sha512/576026c970b19cc00480d7bb9439933c5bb432eec17def66b22f5c0dfd418bcf75bb10ccfc1b01fef48e8d504ebf953c5f6c63d504713315c43d9579ab5fa2e4
+libblastrampoline.v5.12.0+0.riscv64-linux-gnu.tar.gz/md5/f37e2849a948a8c8c8bfa6055e30909c
+libblastrampoline.v5.12.0+0.riscv64-linux-gnu.tar.gz/sha512/89f30d52f1a1dcc0aa38b4b343534b7fadcff12d788f455172c043ea2511c03b2735fdacf8f794a6f62156cb5d82fb0e9e0edd04bb9c57a1ca3e680410456b17
+libblastrampoline.v5.12.0+0.x86_64-apple-darwin.tar.gz/md5/b07c42b602b91bf2229b1a5cfd8e37b3
+libblastrampoline.v5.12.0+0.x86_64-apple-darwin.tar.gz/sha512/ab064dff373826776f9b64a4a77e3418461d53d5119798a5e702967e4ac4f68c58cd8c3c0cc01bda3edeb613cf50b9d3171d9141c91ff9ef3a2c88a8e8f00a37
+libblastrampoline.v5.12.0+0.x86_64-linux-gnu.tar.gz/md5/c37b01242012e51e124711d5ad10cf97
+libblastrampoline.v5.12.0+0.x86_64-linux-gnu.tar.gz/sha512/3f9015bec4aaddc677cb3f3aebd432db8bad89b3f6e563634a37569afeb9fb0efa4f214166c984c2c1926831d5cd79fcd4d605d40675e0d1a7e494a76c066f02
+libblastrampoline.v5.12.0+0.x86_64-linux-musl.tar.gz/md5/c24e440a1757a45f087a2e1ac649fb45
+libblastrampoline.v5.12.0+0.x86_64-linux-musl.tar.gz/sha512/824b930d50df929fd22ead6dffad06593d2aad9fcb149f07f1c2f6d4b7b34911e89c2be5a1e9b8ad5ad8292ac29f9e5dbe6d7bb205d2b207432ade61ae5f8b68
+libblastrampoline.v5.12.0+0.x86_64-unknown-freebsd.tar.gz/md5/5721328a24473cefbb3e77ba85e46922
+libblastrampoline.v5.12.0+0.x86_64-unknown-freebsd.tar.gz/sha512/3537ea491828492f1cb68fa961dc5574b63a88b49abf19eb86f9d1a4544e1398fcd84d6338c6dcb9550ee3abcdcab0654f5cc2b85699c5ed5b3b31a1c35a199d
+libblastrampoline.v5.12.0+0.x86_64-w64-mingw32.tar.gz/md5/450afb701cc2899c7c083bd3f3e580a0
+libblastrampoline.v5.12.0+0.x86_64-w64-mingw32.tar.gz/sha512/e4d1785a06b051a4f16edd7343021eed61ac45cf45d26b4e3ef1e54cfaadb44da2e74b7d854e31b05a733dbb3004f3e85644967316c4f41d1ad64400fed126f2
diff --git a/deps/checksums/cacert-2023-01-10.pem/md5 b/deps/checksums/cacert-2023-01-10.pem/md5
deleted file mode 100644
index 92063050b50f3..0000000000000
--- a/deps/checksums/cacert-2023-01-10.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-e7cf471ba7c88f4e313f492a76e624b3
diff --git a/deps/checksums/cacert-2023-01-10.pem/sha512 b/deps/checksums/cacert-2023-01-10.pem/sha512
deleted file mode 100644
index d3322e5890f81..0000000000000
--- a/deps/checksums/cacert-2023-01-10.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-08cd35277bf2260cb3232d7a7ca3cce6b2bd58af9221922d2c6e9838a19c2f96d1ca6d77f3cc2a3ab611692f9fec939e9b21f67442282e867a487b0203ee0279
diff --git a/deps/checksums/cacert-2024-12-31.pem/md5 b/deps/checksums/cacert-2024-12-31.pem/md5
new file mode 100644
index 0000000000000..b01bf68ddc247
--- /dev/null
+++ b/deps/checksums/cacert-2024-12-31.pem/md5
@@ -0,0 +1 @@
+d9178b626f8b87f51b47987418d012bf
diff --git a/deps/checksums/cacert-2024-12-31.pem/sha512 b/deps/checksums/cacert-2024-12-31.pem/sha512
new file mode 100644
index 0000000000000..c12b8215a7855
--- /dev/null
+++ b/deps/checksums/cacert-2024-12-31.pem/sha512
@@ -0,0 +1 @@
+bf578937d7826106bae1ebe74a70bfbc439387445a1f41ef57430de9d9aea6fcfa1884381bf0ef14632f6b89e9543642c9b774fcca93837efffdc557c4958dbd
diff --git a/deps/checksums/clang b/deps/checksums/clang
index c16dd849e6fc5..2158589b5cef5 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,108 +1,112 @@
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/5dce383804bd3d404b8a1936c12ba457
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/5661a1cb2044ded03566c9316978595d692667fbc4e951feca658f9986a8557196557b05ccddf1b00b818aac0893696c3bbbf63a35dc9ed7df146b4488529f6a
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/549cbc6fa28ebee446e99701aded16e8
-Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/55eea0b514aa2e43ad2f373ad25ea4fad5219ff1cd8d5b639914c218a0a454ae9b27b8d022ae73771d8ec89fa329f5bfde538817653cc59e569b600148d56842
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ac3cd40e47702f306bc42d6be5826029
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/73b217caa53702bc6fbbb3286241b7a20c111358cb9436283e9f7f9fec90436d5b54cb4c332afb7e447867a40ba46c9e3b93464acefbca7c0bb6191001525cbf
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b1a656501493c15b98442bde584a34d7
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f424254cc887301d4d5b04fa71e2c7da6e4d561725d5b06278925e05be1c62a74769f19c37b431c2e2d73e7e5129acff07ac54a0b7fd381821aece27f260c116
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/969170b1a791e89a0094154f34023e86
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/d6ae356c9b1b80cbc5cea4eb8632b77ab3ce0d060b103cec4a5f1c73feaaf60688c2253034b2a6e132273fe04c803de93f415cbe2ef40cf1d6f6a30dcfa03af3
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/22d599b774af41dcaa54481cc6325b1c
-Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b0f257d45f1a920f46b18049b762b5a3cefdf8683c4dce46f48ce2993e6a622dbdfaaa6cc9a9cda8a7f047094a6f804091d1ba6c83e26cefc38fbd1ca5c0a536
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f2f5064217c14700f0f933b704fff233
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/2284978d8cfe22aa49b1f3b161c75cb0c9d43f84674ba58a1335edf818b91c6ea1684a9c3580f2e1918fdc050a624c698a4e87dc163e9076b9d6c0023c989d7a
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/eafd72ec24ec81d42cb044e4e4d638dc
-Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/bbfc6c9179fc43a1db0ad82fc8c1fcc8ec8ce94d5c32b38cd1f88490dedc67953283995c0dd4db7262a9206431135cf2671c6ecc6580da65ba8ff4ec0323ab64
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0432eb21283647995e35bd0d486148ab
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/561beaf45770c06b35bc1626e93a0cd89874026a8afa22017b40eb1e6ba306b05305619d42a4a2145c576b1dcc77ade80cd0bf0e0237761f3517f4db402f9b74
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/653b9b87f2573818d66992f969f7811e
-Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/517df570b40b51a4f4cbcecbdaacdf0b592fce66ec328139d95eaf8b63c89a1adb41a9cfe4982f5bc032fb29a6b967dc1b16b0eced98cd78756ced36ff2257d8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/4b1a5cf46925575bbc6765f3336e1cc8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/7afb23aa5ce823b1f2371e038faf311e8e21c3843cc50a0b1473038cd746fcdc77dede67130631bfaee778c3d42ac1eaa23ec664a82f43e2ad406962f3019479
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5a6200aef0e6660bb156ecf3e53cc3c8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/0dc564fe753fbccfa03ac94e19828ea5ba2b8b74e7adbe7f501ac8b11d1ed8fd85a65572dcdf957018bfa1be3a6babadb1ec3937966347fe49fb38596a4b1728
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/ad693e5cf8f2583c3311a39c095b0bf8
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b4e1120c960bd69f2643f185607bb2139095fa7a2f943fffec65ccad9422f2bd801131185cbeea1b75298c64cbf109fe28bae54c1b9917fe1ce8b2248d623668
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c04cd594e25324c42d97739d72e772e1
-Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/5aeeedbc3f0f8327f7760abe3eb6fda368353a7b429e31ff47a7bf42d612d070cc86f0e97031ca0c2fa9f9f448757d59b2652d89bb05b27fd380f2116a5beb6b
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d706ad9062539a37df1e5cedc084086a
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/4862bbe0c71fe0e8cfddade0f881637ae5f58263208e1154f2284884ddf4ad43d76d98bde57904829f2218db21e4fb6ac038e231b682455fa22deeabe65f1336
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/6cc35754a4378902f9f126139bf299a5
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/4256e9c3f58dfc896d56eeccd7495601ec585e208857de14f91e2d95295a4d03009149f49254be40b27affd5a2250323c6d0744e1ddfbd5fb924fdedc8a993d6
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/128bb901686224fb6d32c9689c03cc21
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b7048ff3d8a3b3e3cddc49b2cd9fbda8ad308fe10e932e8d90000e76c12059547342a1132149254628077d0efc36b34479688b3e9f32e7364301e85a18304cf8
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d860412ac46bdeef203a578f0bfc5b05
-Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/497fa51af138b3c645d5017165aea6d33410262d2ce69e322b259b34fbdcf52a131541dbac66fae8b9a9027b70771199f9a76869721bf18760065ca7cb3b5364
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/6fb13f1cc2aec210298c3045f8a7fd94
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/085c94f43fb46ecc8cadfed5c5d91978c9ddb9d647eea6e82ff0a548eec53dbddc77721faaa8c43ab5b0674f83fef7aa3b34ba0dc273feabdbb8cb95bf5534ee
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/63d765b268e792df2aa92f3689de23de
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/32b2397bb4b627f0ad9b00838e30c965feca902e417117d0884244a2be6a50e0d4d40e55a27a87616e33819967455f90ae0a4319c2eefefd49b82e9041835444
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c00e93211a1e470f1b00a53e776a9e3c
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6621b3ab12302657ef2441482e8bc6335535964fda472ab8378221e4a9cc0813968589f457e1af66141821cdedbf8eff3080c20105eec810742e5539fc329fcf
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/254fdeddad203954ec0531875cecec8c
-Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/84a19469231a9204a553abc58073e423173ff828445634501a61837c0e249ed003f9051fcf1da4eb16201f80d755e7bb4b7513536c749eb1e7ea78c7ded59945
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/0475a3e401b59e1a34dcbd9d9b980823
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/e0c9e1e18cc4f7106effaeb04e0e6f41fe8ad872d67d3d0da928ce36d1bce6be3d5231d149b2d404b3a4b99900b50d280ac6f7dd8965d30c4dcd3913590144a6
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/08c9e802640582af0b79bc04702c9771
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/d4f413bbb5d5c3ae01cea2b87ef4e46816023fcf4373f00fca13f2edc6278eac651718feea3f8c7d04d3ef82360417dd93b6c7163d54ecd79a3811a0ed588054
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e7c253db924ea5cb5098be57029e009f
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/373884c492e5862aaff27f5782ba44e202e581e4faeb2cffe14bd696a590c0bc72459fccf3342aadbf189282af0c43efe3db113caa47c27c3ea556f0b3313e7e
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/9c1867e316ac258d9199b389ea053d2d
-Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/9537f285d2a06b8c86ff21aab9daad1ba7e71bcfac55d780c693da8cc250707011ee22ed021e387422543b1e2abbc34de1a7fe49175a27a9c11e43b00549f1be
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/f9a13a80efacf45f49d6d7591d2cc3ea
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/c7edc55c4f76ae086080ba639d83793738884b9385618c52b30f5c3fadb0ed2a31bbe95ab80c5eee8504ec6301d73fc7318a8c0f877ba8b5f51170de51179d9a
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/c9911680ea55b36c4b9f59cfda2a8e33
-Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9c3722bd402627a4f51b4c98c8712a85031aa79380fe38be0db9df13a5cfabe428fcc7d5d5cf804ac4387d738cad1796bb3f341ebdcf4726ea7f699c6de586e9
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/db82d62c163f69038364189a60b18d09
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/5dc415426bd99dc2d7b5fc4fe3f2bb1aabc8961fc2b03a2bc14562f330b273c4d1942d7ea5f05b38c76ee753b440cc4f92015a25f9de7980aa3b1d52f7d0f2bb
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/67b7194b31f68db8ffcf5ec250948740
-Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/a032c2ae911b6318ab23950ac74dc95f2c8bf815196be62e410b20cd2e271c4154f916388d119ca91c77e07853ba2c56bd5e75a4ce6742d2a7bbd9d3e61853ea
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/50b4fa021c1c9b6bdb29eae63ea22103
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/40b377df590521e5291c3f2f9daa8d60863c03253b07d0e537288324819a909ab3466b710b10b1a92ccd6f3566702c515d808f03e6d9fe9d01617b9a836bb63f
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/d2da27ebc23793c107cb03e176f02d6e
-Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/3ed297cfd3c1ec03cbff10d7b54f9f4a374a9cf8c699287f179ebd5fa000dd525fdbed3c31b59a8ae32ef1c56115c3a84640d776f01c8a92bfae979c332043f5
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/aefacc80a5f704aa7498b35dfc2441e6
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/76c7fd64fc4323ca442fb0aa30b236355b26328f897ea8cf3e3be029246574d150a9790ae1c45b289e4fc3050fdacc20b6d57b588a707f6d0750e6da91815edf
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/59048d333a8a261d079673828c174d96
-Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/bcd0c3c5e04cea24383fc2472f6190e48f8738fb7fa625ad700d1997f8aa81c9b6909af0fc38a2287b80756fbfd01300f3388c19c8df791d78ed913d8d59dee1
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/bb4007dc5b0c0d545f457bdf35e868ee
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2f686bdd0bbcc62aaf9e20d3804c83291ad7c41a0a174516d7a83dee7f969f7d50f19f70c0f35901a3eaa8d54fe83204d832a901586feb9eb8e141631c411b3b
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/08f088ab3498a4f7645393f43098583d
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/faf62bba3a282f218ea569d3064d6c0cefde9232d055fc3a08c994fe424f2b60dd9bbf1655f6ca101da701e3d05bd813695d6a66628ec2b6b4d11b89f773f0e4
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bb8f05da1e35ab358a96265f68b37f57
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/30e3789ccca1fdc5eecaeb25345c30bc4f752cd41b8725c5279654d9b3f500d6e8693c6d1dda8b3167fcce15443682994d66922a17986419eb48bb09970f02e0
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/ea9fdfb7c8d1a9c973ea953d4e057f0d
-Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5e5d9298a12e65a7e4d401a0e404eb172c96e70fa906096f549e7eda5dbfb294189e4f3526246f28f71ba3bcf35d1bf790f05522150c5877bf8f186d8c503795
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/053334d0c5aabaccc81f22c1a371c9a6
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/aa8daa99a4b52985d80e57d175b6fc4489058ed84f06fb2fd67710a873d5333ee77b64ed0620df099ed5617792fb3eab23d9cedf3ab3c79f4eb6f04ad1fd9588
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b80918f03dcdfc5b5f1e8afa90dd4e88
-Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/c0397541e06878535b41ba7479b603699d78f1ea3345d9a1146a0e7d17f42078e8365dc71a117981b2d2b25f35a40aeb707ff9ee8a2145303f3cb6567e82bd54
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/78b9e190d5cb7e6fb172814eda2996f7
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2c9a764ba2427faa8e67285205dd1b8c211665046c9a4a19aea02de46d02a6d4287467bacd1260b7996b2b85d3e571e750d92f02c21b180abe37709ee9da78c1
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/ba6dcd205dbd7c0301855f2a892c6467
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9a98c10943a8abfbe92b151f184370d21a10ce72afb22f131bd0522672c65875868357f60650122e1a2cc91254adceaf8044de4533aea08c4df400ded8c01669
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/ce62f8e67b89c612eea35f4ba0e09d45
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/9c3afaf0dd338abed0631b81d5f6c197b5dff6aae637996f5bc2f85f2f7dbf64a7a4bdc07dee9ab72abada5be576bb0466550280a9ee9093946a469a2b6af648
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/543ebeb138123ce190e74cf0ad17d43f
-Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/aff131b5d0ed372557e3195e15701543ec32db05d5fc18117c4aee789a5cb967706d28b2dc53588bc7566f3a4498fd9e2293518ff28387466464ee07c10e9fff
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/58617f16466bcb1b56b204dde697cd89
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/bdc0c52168beabc0552ee941246b1d4506fec50913030965b374f4cedd67d6fd2b5746f04505aa5bbd4e6d61c5f684dd22c3b207e364578fd8538aef8efe0b14
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/aa6f0d9a455f5f0109433b9cfaa8f009
-Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/b267bd6291fc5830ffee075af00fed9a37177141b0cdcaa8ffd602e6a8bfc58e191408c3a6a12c0fb3ea7a5d825adf1ef99122399e8246e0312b4cd056d49a2f
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ee2d7c4dc5c95e46c6d46c4fff112e9a
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/cd11acb2dccd2ac45a53fc48ee6a58299b5e54e80a5b9747c680e9b068381bf87cd388ee75cb0a51ccb1162ee8af03acd4c3f730a5f5a3ed5f443dd24ee91cde
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a5c16a8832f5c28346912f610932ecb4
-Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/91b244ccd569597fe42ec45e5a62f6de0ab2c4da048b8b3ed191bbdde0a8ba5a710054d9f40c31a405a6c494a25c7546748870d1170d76e2d3b22dbb0c618e87
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/2d789f91744aebb0deed9b91202c1abf
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/fb991942325fcbfa1ad4903db43e81fcfeda5d007ee664d96a0e0d2ee5f04b5767d6ad5d37e0273f5af626efbf1c6fde84d54536b74cb17433d29b6772bcf7bc
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/ab8fae829b5822e9123fc3d763d327e1
-Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/1b24b03f6a81fba7400bdaa57899e9cdffd6da7e476832870460a12ab6188662c15a3cadd80ccd7dc0790834aa76ba0df098b400c87fd067eaa9f9fec0b053be
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/d5638f87a6ac840d571a3973e89316cf
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/0f07e9e8dd75691ee73ab0e78a29047596a543c5886a137a7503c916ee6792cf7d6a7f279dbd864a2ad36d36aac422555d408381e3781ec004bcde5525abeb68
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/e777625c3c7efe2dcb029e74ac7d1ba7
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/25e0a48a4d8a2ad7f5f5adb7c30429655ff496e6b5a224fc5707f092233239d4c3f4cc17432de12815e546bb595caf2a70b18ff208a53b9f0236accbd83acda3
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/22e03dc887f6e425f98cd66e0859ab2f
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/ef20886b841ba8b749ffb0c5780a9dc25d5f563ef726b1026ee77607e0572c45b8eb3470e252f882e2c4c23a2159d88ee83d31aae5081c6e4f4c37a61a7875c1
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/5d8f1390ff66b6b357768b1994a43d1c
-Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/5fd2fc0cf888d95c38531d236564109b284f20faed222d1feeab2beae68662073c9c59baee310e2bd67908f267416cded7b75f73e28969e2a16d2fcea0b03854
+Clang.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/1dfebd0db436a282c2ccb01375e48419
+Clang.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/d5a8fc8be8bdcfb98c3f868c1a08cb18bffaca0c9fc6efbb11beaadf40ed5ca7e2a70c3be783a7cc93b23f39e06167784f63e91abe726240ad62d11210337794
+Clang.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/f82250af13bd879486677cbf1ae0b7dd
+Clang.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/c4f67a59e30ea7bfb9ac83f07b1e07c856113dbc674d3a7d01cc7bbc326a1529f97d0e1a08a3aa60e110f901dba6d4888bae7060e24065444baaf633482108d7
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/2817b0eeb83eff4e1f580729e02564ab
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/88242559299836c7a7b7d3a216353fc6880a587a839793ed71d6d053318d6e2071ff218587a082f2b5dd9fb2b0952b4c60e62030d707435607303708bb1e6d81
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d3f92998b7cc35a507cb1071baae8b02
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/be22296623f604927e2e815a1cc149addda6d567270a50b2cdf77fe5b09f74313210a1ca7b1b3194592da23490ba1ccfdab9f520ce7219989e646f12208e418a
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/716300acfdee4415f1afa3b5571b102b
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/b97efb3c461ea7d2736a3a8bb6b6b5c99f02df9a095f11291319c629d44f1fb934b124d38af6be3e5cc7103c6f85793d7f185c607383461de5d0c846560a1d1b
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/034f44b2fc61791234d9580402002fb2
+Clang.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/0b4ff55afcec0b1e8fbd09fab57de8b44d5ded360d3b53132c7a7df8d3a3b83a495bf6e0c706784e678c6de46be3a72e8bfe562c7f8dfad90b82880849625e35
+Clang.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/54211070d63a2afac6350d06442cb145
+Clang.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/a58f8afe9a20f202cf3956f758dc13a10be240d78877a02cd006d7e972751ed65623eef7e92a7256d9ed9157d6e277302f93b58f583d86d386ed4945f3c7d875
+Clang.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/7084567b3637fe64088fdce357a255de
+Clang.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/77ae83e159a814a7117cc859a0b2aa7a5d41f983d45b7eb1ce2fd2e93f8733ee067ac8c9fad9d5af90f852b8802043ef39c29b44430b2594892e57b61ccb680b
+Clang.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/9e294d16a6e1c2c76c03f32cbbbfbe23
+Clang.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/b8f83542b51f5cf953f6baed185550394744a8466307ee08525bf18a651fcecd7daafb98e75a0866b0e9a95a524e8940be7ae1878ba80d856182dcb7f7d2254e
+Clang.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/70a41c2ffd55d2d87a7b8728287eb9fd
+Clang.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/44bb3dea7227ee991b2666c43a88613d5b5d382eb560b5ad1f1184d38680c85a2ef961bac6ad71c2b920702c1ec6e09296198e7ff5e2929f4ba7839e55896e3f
+Clang.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/1f673de0cc2ec59cc62dee6040b2d6b7
+Clang.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/9b2e64cd2cd510677375f3d07d434f46066adb7464751dfeaebb057129f6b092d8425b0728f60dd9a2ec4cb29625ffc5cda57acf1d5465d5f82765369954c58a
+Clang.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/md5/0d91f5a19060c6a1b1dadb3befa0fe6a
+Clang.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/sha512/9f9aaa36e1dab2d98a17602ed0b27163729928bfe4ac0f7b565cff1e0a653855b0f3e404830cb77ff35d93c0d5c42ed11d2506aecb5ec8d3752fbdfeb0ff5b4c
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/95ee1406f8575898eb52e2c86ae18992
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4da66e4d397491836b3e539258844346fe50bff41e6c0628cbb5c0eac76147bd91d1720cec1523452efdb063adf6ef8792dc278244e1f8e194ef60a180442c56
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/6c4e4e892b54ce81d73a8598728083e3
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/53d08fd8b6782867cfa6ce001b14a2fde38bc9ffc85c7e148aebf59dd9c1c535b54eaea816c39fcff42abc456c1047ed13d688917302bcc5a281abe368bd29bb
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5acc5853111bcd529eeb06ea31b329e5
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b1794f7cdfba838a7e43de8f66700ae44fd16d8f06300e8ab955044ae9bc96110c5ea72691841cd3787cdc93dfb91c6b257702c20390689a8d1b45a994db2fd8
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/c4de50252e557fb126360001ddae6a97
+Clang.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/9343a7272c76d5341bb49273ff8d43bed09ad99b2879ec51cfb8946174181b286af82d85e2d3a13a375c7e7859e51e4a4f06031a6a3fe7e540700cfc6a795741
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/af301478b20e56cb7fa1160cda2573a2
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/8822c58df101c239221fead6fb523e677da04a065b42849a2e6ffff03dfd81e07f162a9bbdd29490ad9c0e0a33d362eec46608b9e6e42dfb4889da1c22191c91
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/901d2808599d5ac5ac7b5ca4bc39833d
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/820756cad00b1fe927801a253bd3077709c2b067ae79f9e1812f3cc9e85a0b7ac2ce1534031b7c6f7bda3364b7173c1c508e7c7d316920fb9bb901c16c1b18c7
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/d1f368604084e907c382aaf00efe452c
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/523b25f6b79e222eb65b5f4cd8f23b0d2c8b25b29af0df88efe45546ea57c7dabd88baef454fa0b76342d8d364739107271f25d3504380fdec5c9d225fcc2521
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/e57c116b2ad1cf32307eb4e600ac80be
+Clang.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/63366b983c7aac9fe1246b25432b2200c8316f569f6930eb12de3c867f448ffccb8756d418f92eae7751d4c9ce6c42cee38237e429b81530819684fd5150c93a
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/645929ce42276db10ab79184a60cd6e3
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/65555ed26d9bd670b8363e5dad949822c2bf0e141a5418e1dc30c3f8a4733dd050620e40be2e7552c2551ecb30d4ef3e8f74cb240f1d441a9720a25f5a3bcaa7
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/8424c6c6318dfa7bebeac33917b29453
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6cf90c253f6b22358c2389a2347af2febd010117b22de0cc91ad713b8c8224627398004567c96b673650212eb5bd40bb97b9a637d46ddfeb3c72388d83445017
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/ea8151dc1dc32befe579c7f9d7f13898
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/ed518423e9ec35afd7983471cf9ff1e971b840f637f34e0f62a1f6c7379ea59d4dafbeb9a311d39761733ecc98c0318ce3d8883298f8998e9c741441c7c9616b
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/70ed39b13bcb0435fee63bc30ae25a39
+Clang.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/b2afa383346875514c62129c2991b3604c4fd3d507ecf4fc4244dec81d08b30218f5aa03dc4977185c2c9fb2d08848ddd373e448883ab472e5221ae5bf285c99
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/e6798835128f663f0c837aed4463e34b
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c99856e16bd42ff967479e2c89690ea41268f1d1f868e2628482eafdfa53a0d69ed7c21ecc68ff0859eef07d9fe02f4844fad5f13df26cee6cea3a4254446096
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/92c1bd54b0474244e35c51952966a55b
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/2d7c3b60ba8b11cf903bc5ea720193852027cbe61ea0c8d6fac70be8f97691da3d36663aac6e61b68185dd83b42d09ad61dea973d9390271210d690295e4902c
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/c495d594f8ce1f701d1bab54d0b60521
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/0261bf45403daccf236723383341dc791e9cb3b291bde97812378d85aed785f083d5deea3bf806480a04ef1b972b00dccfd0537e43532a066c64733b817c3d77
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/41541de24d625271bdd5fad867b8eb0c
+Clang.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/595226ad7ef75ab8ae03adb456b4ee9e884e9554c720b6c4ecbc38c75d446ddba7898be94630673074f09f40c6dc3e18fea9cee5a91b8b0e4727d20a180f670c
+Clang.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/8bd8ca0436611e78882939067f6277f7
+Clang.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/27c7b06e93fb0fb516b1b240e0df6c95e8bad6aea04d637ba065c6fafd087bfa94d9136afd39273c8d82d9c467395dcbd7b16f6a4b829acb0c0d4a5677676a5b
+Clang.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/424bfbd7b69ddf7b1199afaacde3e028
+Clang.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/9c48d949309aef6ee39371ff39a4f12c31bf3f25ddd288b317b2a17a803db73850cba2886598a1d10c4c154d511a4b79958d1acc012e92491a63f3925c522873
+Clang.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b0b3e045ad64ecdc9848898f30d5f34
+Clang.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6c0f4bdabbbc94fc9e1fedc138b0bce99d383e380ae7222fb70f5935f17701d549f6486956c8a21731061e4bf60bbc52794f6ce6858b4d2adb89bf80f88795c0
+Clang.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3b7a461ebf957756aeb2a2455b0a298c
+Clang.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/74641a3636dd58c69415b19f0cb1de444215e22cfa9f0268fd549b5c53b206811d8beecdeb9692285613468d9a0569e836d225fb8361218438346914f6282839
+Clang.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/7533ca14f2932c35881ec05a5fb1e550
+Clang.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/a1e55212b92c6b6dffc7e7b316c98e421e8384f65d4339455694c53643a3509b817d2ecb4e8dcd5f147dcf1be3920bcf82c1cb1732b23657bc7e36abb800d21e
+Clang.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/5525f1e02315a128195cacb7f6cf7d44
+Clang.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/9ee9fe4b1f52dc6533f177256e60b0579943e8bb5ba34118e5a02d25b6a4419133f3f819aae1e02d916cc17edd09330facdc6625d66564ad3cbd97ebfc439e32
+Clang.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/9f442a545e9c3fbb0898b7a233e5079f
+Clang.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/99cf06a5cda26001ed8d8bb4915a6a5993d4c9c5a7a038ccff99a3fa752f207b02095bdf1689f5cb9a2584a7e3ef26436b840896fe9a5b9b626980ebc7d85751
+Clang.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/9910ade7fdfc95ac2db3113fbfde42e0
+Clang.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/6267f1b3dbbf7900bd72cd5700756e1e2c783157b87b1829af552f7dac36f749d9c7d2662235892105c959e1425914e944fbdd2f9521d2da7de321efe6c793a1
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/a6c7d64ede931fb19e066a1c191e2f6d
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/1a085a4ea1efb910f2b529f3c0e51be4a5e31debbefd00ceefeddc352b36bea6d0de5a06ea7d509098d16416b536ffed3da8485feefad7a2f11b1bc148a0c8c2
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/692af94ca3e5c3d229cbb459e266aadf
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/b27f05cfb0ada89cefc5a6f6527583b6b43d03525954d5b1ad1c807712efdb8750ea558a230b587a0c0d9e77c54d9f8978cc2f3884653808c7409eab1b32a055
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/3b59b6aa4b18b5dbbc632811f2ffa270
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f8c4b593f969c723ff1931c4875ed52497d83d74b94121890e10c9fcca5f6bddc5067555dee9949e61e426586ae3e568375fc44f318a07b70571ee34fdf7032c
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bc4be32ad57b13c3dabc80684a176ba7
+Clang.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/19a8346547b6c6adc2a9156e4b913b20137593752efa3648ad532b08de67cf015bba1eb023204755f48904c3381a3665c6c54fc8233c50e887a22ceebc652303
+Clang.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/13436ae410728f67c914fa7aed304736
+Clang.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/3f83f1659580f4c5085b2da1c1a90581dcb3c45f5da1cf4d1801e230bb56fdb78a98cfe41b755949b34316ae08c55f5b2d558bb4026503ef2afa895b59dc861c
+Clang.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/fa79485d88d173e15fb99b2f7fd793bc
+Clang.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/4886be75294979cdb55030747c664bd4cc2a2fa1489790d744e918a39fddcc5c214d4f39755d58206fd1bfd077774302b2be506ee80e4d0a2e2e2de642dbf124
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/4e5d1064d90f24d57d63f08b61baaab5
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/cbfbe8b6f2be80e59b69d25d6af901ccb4807b12180208b69afa7223dd7d5249255265bc319c9402a1b0d1f0995940e3e72d7ecf1009f60d83021f8d35626a46
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/22fead15b4c45398ca869821d04ce015
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/2ee7a7d3f293f7b63c89bbe3b541722c502a840883804ffe272848f4ac99b7a8ed350ebe92ec434dfdf03d1f4a5531c1367859f4a4603c98325abe5a0ad71177
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/46dd01b10377cc3d45c6a42cac0a07e5
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/957677ce4251938d0c5e066448762b38a21bcce5ed424072ccd58085167d61b7e45a88fe32375f6bbd43dfb579b65a9afc09a886a650fc634a8fb9c81f27c9e3
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bd9a61ea186a39162201341f0739fe84
+Clang.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7a06d2a9ef20e88daa00d627d482ebbb6bf7223219d8b2a24aa60ac9eda24649d206b093d5bdb88b65c1e2b0d1ba0ad7dd927697e2bbac65bc9b42f9d14ad0d9
+Clang.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/60c98c6cc7d4446fb52b7585bc8709f3
+Clang.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/4d55464b4499a45f774e1000a8b015326d114103a3d348fb263367e5506ca6659444ea6ee2767712903757e83939cd446aff6fe2351438b644f0057053422b58
+Clang.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/90a512d1881c4af1f1abfd5e90e37356
+Clang.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/62d6d855aebd49f132d6470c7b0d5a0b965c6489b025046c1ea73fc53336030d6c5b4c867523a9206821f7fcf62fdb37ef0b7ff4b5eb04d07f40b65edd2c8e0f
+Clang.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/c9eb9acb605d774db9636b82bf2e5f41
+Clang.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/96e1440b3b0378edf8907d4cf779b1c53d63f6d00fa798efe1b6aaa289135aba8fd00a8d6f55d9678136e9e07d0c189293aec64f46e66788b938e1f8e1fc2199
+Clang.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/5837070450c81d44395468d8e3671dc7
+Clang.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/0e8b674c0360f9586f03c7f5d0ffd5bc73dcde1e88eddf7d6360c1461adb8efffb104d8f454116a6a6cdc909973d0876745590b21009a9de56e12ce6e1c2e8fc
+Clang.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/f94431ce7b8a12774925348a076e39e9
+Clang.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/cdbcf5bd32a9fa4d5204e77f12d60b1fde540fc93243236f26896106d21f3b2106b0c3fcd93b1a7bbd6a9c4688200837f309b216ec9f334f8c8f28144b36d4ca
+Clang.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/4ca4824a441d51cd4d1fe3516d7841fb
+Clang.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/ac0a046ede4b3c9bc75bbf7d1189e4679df6c35ca50e97fd6dadf437aba00816f66038db5dfddcfe2c49140c8416c79cfa4b67db371b4185ee897e0585b96301
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/844031bd67137863f8e7dcd65aa6e45b
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/56efe56f02f0d13e03ba029cc2ccf2aaf2d50479d8153b7922392ff90327e3cded2c1e7fc8cd799737cd988e64bb9c74f2c0ea6156a04fc08f22a4dbe6156cba
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/cc2705c3a856574835383aac7185ab32
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/eb037e996168d6d8987ff50c45e879f5e9779b044075f91cd8bbfe096260cd155b36f80bad840e88e1ab7970517e692875d5e84adc447153f167dfed886e0442
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/2103b507b6aec55f8cb58a0c86aa461c
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/d9a4d6eeec2aac1bc41a0be40526842e782d0796a306d3c1b5e53f7f146628ed974c8a4c4dce8baff5734d973966b4f3e1310be40b90ced9981ace4c4369a257
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/daf3d83095fbad33bbb120314d6b53f7
+Clang.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/e68a71d0d89d16e0c5c9182b8a3336c67179f37e247c8eef3f21e362a3258ff4815f258d2430ca3883a52a95bc26c8e2c42e3dd081f4998ed309813f3d0a4aa6
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 4830109bd7aea..08802ca1f4de3 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,98 @@
-CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
-CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a
-CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336
-CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336
-CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565
-CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092
-CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f5795dada5360eb8422f45150b13bae9
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/6acd1bf7c81631cef9b8b0576ccece08723c5ae2f49de2487d3aefd25f9a0ad49df09e3782735267997d40687b04b85c89e00f6889b026af599bf1bbe91803a1
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/5e590f83161913f0145ba8d496b2504b
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4a3f36588afcdef26173764597054068e26f2376e6126a9a94c46b258b5d7a29951d47b5e1ba24df6c3d139bbc4decc5c501a266811692d7fadadc7bd7b6960d
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/27da4a7c890fe1427c33fe214cc5feaf
-CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/310ad00f053f9f3ec715ce2e8d20446f397728dff5acc787ea9c9332346607a3d42b678099c424e6d6e5294acddf2aa26051de657b48d34abfd04486951bf241
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa
-CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d
-CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc
-CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207
-CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/0c2fc6fae4ebe293a7f0dc1e91f6531a
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/fdb0ad061cacad0557fde3ec216fd3666284f24ad6a86f4a4b6f946dccb112c9704f52edba86f3b17d84c824affbcfef740720348ef227380cf6017811bda80b
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/005e608dbef2b5cdb7624702ccc426be
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/8bb2bcd0a6b1901e8a9be20f505bead5c78ecafbe5a8271cd13385553e5744e0c7bff62976ac9e7d74b8f3bd467603d4c0f5658e6b120bb23066c15e0a644ed4
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d6c2c7ad72bff7f7e5c43678d716a57a
-CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/36f5eba1b0be440797467cb7104652b74709913d2bad1b08ee2dc70f450fb8eab81b28f2b0bc8dfc238b3c46982c69aac831b4fad5bcee4e9dd114852fcb4a0b
+CompilerSupportLibraries.v1.3.0+1.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
+CompilerSupportLibraries.v1.3.0+1.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran3.tar.gz/md5/c679907ddce62f21bc30667cc40d8d52
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/284b17b6634087f0b969d3e99b2e4152667ab5eb9e6b5813f9739bd14ae1c25dba01f15488e901ca5fcfd780b02bc02b6bff670fefed7d965dcb585e81b03782
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran4.tar.gz/md5/1b4f6efeb83f5f3e27c42eddeafe993a
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/b1d5aa122b2bd25bcd1ce47e000f71785d617d77f44acda56f9f5ad77101a0c54f6c6a4c5560a7c12ffb8c89ae325d4f056bd92f893d219385c3d5c85aa457e9
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran5.tar.gz/md5/834adb105f78ac1bb223ef309dbf7cdc
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/dd0440805145f1a8e8521633a955317567606bf2e3725a5a7eb90515128b077f2163832ab608022fab152526f2a55991f50256ab92104d5d62bbb8a740e25009
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran3.tar.gz/md5/d613881e48181bb8ac0bf34a456c9736
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran3.tar.gz/sha512/34214bca9f7c66e3c508b2f9d88cb296695721cfba0c001660e2edb0387a2efbb2fecb0360f8eb2b1d0ec502480fe63e802f350367498a342c455c0f58aadd82
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran4.tar.gz/md5/97e4ea4394df1d784ce4de3f75aed580
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran4.tar.gz/sha512/a072ceece6600b704dae5a7491f1ead9b4e11da3d4438b7056f2c71e59b0a37d3023fb812cbae205a4f1fcaf18a4b223a5ba2cea32131c5eda0d55b1f0649c23
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran5.tar.gz/md5/df09c5b33b2e307e0d9c2b39b450c0eb
+CompilerSupportLibraries.v1.3.0+1.aarch64-linux-musl-libgfortran5.tar.gz/sha512/d0a8dc03ea1667d90bd58c2376b575a1090a54a4412bc53b311a3ea910c76dc698be5ca1078e6ca8341244f1fd6b84201ba10c10baba194c1d6c3ffb7e69563c
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran4.tar.gz/md5/4f9b257eabaf0a817755495cfbf75088
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran4.tar.gz/sha512/876036a8b599008512ab7010f4bc5f11fbf963bb9b9f77499adcca21fcad89f94180f653dce3121e5c1206f4fd4ace717ef8f3b40d8009a71039a84ae7272588
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran5.tar.gz/md5/0d296a080921b54d959a2a60884b7938
+CompilerSupportLibraries.v1.3.0+1.aarch64-unknown-freebsd-libgfortran5.tar.gz/sha512/d8232dd9131c2890ea2f5c90f62c646ea1dc93a0a6de4af0a98c7e69928c5ca5698e79ff9d23bdcf47de1f5670467c9e8fed5f01e82e009696641896f0658030
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/3e0727a3813c699b6daa041e336d6e13
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/00cc2c34af7c4a5df06deaff27dff5b94b231ede4afe7a47b7b783a8d2e62158c0ba1b2062d40df949fdc0a21ac703f8c9011f998ab032bac265aef153cea012
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/b7cb328b5e5fae5b5e456d058f5c18b7
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ff191595bdf4dfb2cdd77d42e591adc0b27ca0e1055efa7fb25fc06784f26add83e6c5c7594405bdfd715f9c8e6ae3f2171a50ae218b4b691099da754fe9bedd
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/46a3fc18a65e223ba59d984f99d42979
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/cb470147f6695b101d497bf2d84caeb1f97d967bf23d1844ad70be47505588d981df096378136a98c35cda5aec090255d60cf7c1c8def9801233c72ca002b563
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/28e1bc0fb0ac1512a8598f26ee3f376a
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/7ec17859790cd08942929281906918e4c69d7f306a8302dcd591a4a67b3d95f7f72f7afbeea3a86a0d94ca5b608b3bda00ce43b594e9f173edb0228c0f79ba49
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/f9bff1a49d95fc0f3ad3d4a90b259c87
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/8ad503a213b949f569c5b9eac28e33ed51cc55298bb66b147375dc12cb9ed90e60165aa2dca8e3d28f1a2c153894a9e4672bdb2ae3cfb3a67b1e06b345cb454f
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/d550a4dac1b20606681a56acc00c01ad
+CompilerSupportLibraries.v1.3.0+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/14ca10ad8809dfd2434e300ad5280915f21cc1ba159a9f4aed7aa2164ae624687a2a7a9e6dd99abcfe95f40cb037c72292c992f4483fa1affcf8a9b5cf29c9bf
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/3e0727a3813c699b6daa041e336d6e13
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/00cc2c34af7c4a5df06deaff27dff5b94b231ede4afe7a47b7b783a8d2e62158c0ba1b2062d40df949fdc0a21ac703f8c9011f998ab032bac265aef153cea012
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/b7cb328b5e5fae5b5e456d058f5c18b7
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ff191595bdf4dfb2cdd77d42e591adc0b27ca0e1055efa7fb25fc06784f26add83e6c5c7594405bdfd715f9c8e6ae3f2171a50ae218b4b691099da754fe9bedd
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/46a3fc18a65e223ba59d984f99d42979
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/cb470147f6695b101d497bf2d84caeb1f97d967bf23d1844ad70be47505588d981df096378136a98c35cda5aec090255d60cf7c1c8def9801233c72ca002b563
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/28e1bc0fb0ac1512a8598f26ee3f376a
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/7ec17859790cd08942929281906918e4c69d7f306a8302dcd591a4a67b3d95f7f72f7afbeea3a86a0d94ca5b608b3bda00ce43b594e9f173edb0228c0f79ba49
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/f9bff1a49d95fc0f3ad3d4a90b259c87
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/8ad503a213b949f569c5b9eac28e33ed51cc55298bb66b147375dc12cb9ed90e60165aa2dca8e3d28f1a2c153894a9e4672bdb2ae3cfb3a67b1e06b345cb454f
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/d550a4dac1b20606681a56acc00c01ad
+CompilerSupportLibraries.v1.3.0+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/14ca10ad8809dfd2434e300ad5280915f21cc1ba159a9f4aed7aa2164ae624687a2a7a9e6dd99abcfe95f40cb037c72292c992f4483fa1affcf8a9b5cf29c9bf
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran3.tar.gz/md5/73e14b94dc74d17aca38a51ad402f836
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran3.tar.gz/sha512/d37263a216fb3e9b94dd032642ed6bf5be154a5c66de3e4bd74e5e2059d9740958a673796eb652ca9ebea8ec09a7eec837d8906a50775913325899aa190808db
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran4.tar.gz/md5/23996e5c6690b35e7c36bff245f6f4d1
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran4.tar.gz/sha512/660dc4866a13f9a1ae98424b605723b250218a034e02151d4160d58ca07bba4fa1390e99e7fe2f31eccdd518d1ac4c5f5454968ce52525e3a2d21918b6b5bba8
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran5.tar.gz/md5/af836562cfaf76f0728be0d875d29ae1
+CompilerSupportLibraries.v1.3.0+1.i686-linux-gnu-libgfortran5.tar.gz/sha512/a2b10c2f72d1e84c7b496b7ad6d38629342c93cd6a7f691e5bbe96ce28ef40fd38509d382d22208e40cc4953e7b93d1c211bf59529db0ad1a77b684ba75bc68a
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran3.tar.gz/md5/502f089e5ee03b3a290ee6e18577a22f
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran3.tar.gz/sha512/13a97c2386f37aba2416ec35fe67b99a1eccb880b0254ff0a70f2ba01a01a15c80251606ec7eb0503d59a7723542b6b9778d6c9d9e4ba66ae5cce51e46a9cb40
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran4.tar.gz/md5/221aa40c278faee74ab6af46686d68d6
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran4.tar.gz/sha512/9e4e598c8acdecebc812555de9631f022f6158d679c329537e37f83c76c818f31476a5827924b5ac12978515d64a7e913f220ca75314f41d3227573e9a2ac9af
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran5.tar.gz/md5/c364ec196e66dd5eadc3932b208a0385
+CompilerSupportLibraries.v1.3.0+1.i686-linux-musl-libgfortran5.tar.gz/sha512/3f7b80fb35a967d9354c2f4c40bb6d62751a0d791aeec09817cdc278393cacef089214f61d8338c0981f7a4ed3144d37bc9267cf0e7ce6c4cf651bc67c431b70
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran3.tar.gz/md5/4177f1ede00d81472bb69888f5b3e26f
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran3.tar.gz/sha512/09ab710599d237ee35fca6a39b4d67b36bbadb7d127797724743026eae72319faa161755b03f4cb67c83f801aa4132968b561245487b2c2c0836d0ff867c0e83
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran4.tar.gz/md5/9402d280886784bc245096bdc838fbc6
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran4.tar.gz/sha512/60e72336efdd307b88b1e6db5234388ac1892504ac858b412d18f072a33ca1aeaf1b8621ccf43027508b7a4653150f0849a89c57164beb1e7f24ef32f7fb7f11
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran5.tar.gz/md5/310b163628e7defdfa6a293360b203db
+CompilerSupportLibraries.v1.3.0+1.i686-w64-mingw32-libgfortran5.tar.gz/sha512/02e9a797246feb9c4b09b0c67c773dac5c3bb61568bdd48be147adeb2dc08fd2bd7151f2293e2756685d011e463e39dc5ca0f79593dda7501cacbc15adfc74e0
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/c139a9d54f39701e805d2af185a6f17c
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/0d1f29cb04b42b276edd7998a02f6166295f6b7a2a8ffdf6b2986145476385b19c2f93b012974835363ef57f2018bdb80814adef3b72b9378f0d2c6a8805c43e
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/3ab360133835e1d0a6a24bb2de1dde02
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/9c2f765b58a73b3705f787f68c995d8f2cbd211978c0ec8ac2adbfec6685f4b3a02aa63bf75b9dbf0a2a5c048e35536929d04b89c120671174d76132cbd2c7ed
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/6ce9e27ab33b35900d8f81c2ad05eec2
+CompilerSupportLibraries.v1.3.0+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/1d8af2664e68d18ef4f68b9fed28979af0acf3dd09c8064c4b25b3268808bc6901ce727b5b3ec3c27e37914a7c1f8c92e5ce35de093d66cb6a2e98ad59c2252b
+CompilerSupportLibraries.v1.3.0+1.riscv64-linux-gnu-libgfortran5.tar.gz/md5/6c292cf98c6b4cbf10aeb4f0af383222
+CompilerSupportLibraries.v1.3.0+1.riscv64-linux-gnu-libgfortran5.tar.gz/sha512/1497789d918d633f319f89a04241678602d3b0f441ca6f8f6d756f6d1fba59d5eca54fd24183e39e9b956cd3c053afd747dc03a9a1e2d4819d26de3539c5eb07
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran3.tar.gz/md5/0aae7ac19dade024e0228bb1a3565edf
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/b779badad7e6021875b5df04793445b4056d84cc217f389f9496d8ca61af71d98a667ec29b912131c83319be4d6e82c59e7c3f409f302cc3691899f0e77edd46
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran4.tar.gz/md5/6fcb9749463a96504f1e23cd97695f60
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/66d4cb8237859234f8fd49461b5976a7f155e02fb93c765208701c43c041dc8693f3f8b868ba74bd28614586c0f5109a5b5aa0d0d69ac38732ad6d84d2635e04
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran5.tar.gz/md5/af01aefc789a0388df504abae68fc01f
+CompilerSupportLibraries.v1.3.0+1.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/444d1d54fb6ef95f2093894c685a4065e9708504b820bd9325bdf32619eac8b2972b1601e788ff8f1ee2759117b726c04c8bb395820359bdc737bdfdc3c4026b
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran3.tar.gz/md5/df1c55a47f9faebf09ea093d5d1ee344
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/03477fdf14d8dfce204999e6825d9ad94c2c78686d59b251f39d1bb357b3c9d9a74339c4d5f5e97420870d44f7bc2fceca637fbf7b862d0d1cf04a19a2a0b036
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran4.tar.gz/md5/8812418d84c2ac289d64a597d4968704
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/4da50ea541c13a98ae05c6ff67b8021496b871a205f994605f0230a67eb6c58ede55aa3a471df8bbdd5618177d34914186cfae106664b80a7fef795e5fe97e8f
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran5.tar.gz/md5/55bd8dacbc4afff6196494542ea13eec
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/2bb63e68f56818c0a3bb988f395ebcbe99af2740f806e324c385c1dcd7a5dbb058afd286fb6d85a1621ca668aba962a8701bef96a4547b0d22f92d9e4f4b51cc
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran3.tar.gz/md5/1e06592e53de4448b0712a79e61b9a51
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran3.tar.gz/sha512/cf92bbc217a51b9a18e07c5b5248ac5f59f92a7924c5fc566a1bd5b87a1acd36ec9c1d64871b273f80670596c05c1795cec91294f32f8dc1490633ea6d543037
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran4.tar.gz/md5/fa81135fc7e697eb8409baf3fcafdcb6
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran4.tar.gz/sha512/32ab98be0521f2451ce9b71c5ce7dfc70094583df80ed8db3990a2041594839f065abcf6c847fe6b8293eac3b3395da16ab3d24cf5e15c962aa320b28a6cd4be
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran5.tar.gz/md5/d6f0a90da74eaf2f9bf4f7b884231a2a
+CompilerSupportLibraries.v1.3.0+1.x86_64-linux-musl-libgfortran5.tar.gz/sha512/cbbbd0284799f78cf20a41f1b2d110651ee0460f0191d519d522a5034a31edaaf62ef130e7ef42c28882e224a4f997f0bead5b569254cdda7100b1f41e286b78
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/c525e70e726f0fc1c49deedd08ab6026
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/eb50d1443e1d13b892c141ac579b2e807f346d98a75e2ce9a0a23494c754b7149d1900046f5c39e324b48bfeedc6bee590a7e2c182e6f0e3c07b9f816fcb9d6d
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/9777c3216792efd8e8625f5f72442be6
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/215398a9c893a5298101d98a3cf3df1e59e6dd4b0d66b3cdcd9decd8725541ae33c30d1e391fb51d7aaaa33dc5911511257f7ee7e3ea6350a8942ae70fcb3ada
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/c7571567040d646935234b51c121745b
+CompilerSupportLibraries.v1.3.0+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/159900879d46eb2a2e45f0bfbf6eb7b03c1e28705d576ad712f67a3ae242e7e4642c08f3be181b9fbac659e1c76de6ca278ad3662fd15e8371adc7bf19e9e9b3
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/63187354746bbcfd43c11b8047595d21
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/7c004e5ee255a9cc410b2f8f8836d0dffae8f4e35552c57a74a9c2eb8dadd6f0966ffceb296fd61c5c0ad7a0ea25c80ee2d7bd80ed3ccf1305f236b64e2dad5a
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/64f5d316b2d694dbdb2c96e557482de8
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/e1b3be2e6e9d4ccae55ec131f6cd51a7c4391639365057f7c8ecde539c9f5fa4d73942cbc2d06c62f43c2e1bca0469862a9ac6dc064536400ec09f37a20e2b1d
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d10bb5d9facb9428c22f920798876f9b
+CompilerSupportLibraries.v1.3.0+1.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7671d0a7e1d965d0dfd56e3f037dbb47a2748cbff2656be26741e1b15687b3ba48bb44e7d43e005cd610257c94ffa8e71eb3e3ade772ee5c6f6aeee4535f04ce
diff --git a/deps/checksums/curl b/deps/checksums/curl
index 85974ba0bc8a0..b0a223f42c5e2 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,36 +1,40 @@
 LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/md5/e8c53aa3fb963c80921787d5d565eb2c
 LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/sha512/8e442ea834299df9c02acb87226c121395ad8e550025ac5ee1103df09c6ff43817e9e48dd1bcbc92c80331ef3ddff531962430269115179acbec2bab2de5b011
-LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/md5/f697b4391608c2916ef159187e0d0b29
-LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/sha512/41da87eed77ffac391a60a4af7fdc707f117affebe54960eaf43e3077440ce17d95fbe0f47de41bb1456e222e7a126d687fa0beb26cf98713b3472e9b3ba9e57
-LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/md5/9d3e7e7601ac21a587bbb4289e149225
-LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/sha512/67ac7bc108cc274ee5e088411dd9d652a969952892236d6c37a6dcd710a1887f9ff83df2c01ca0f5b16b2086852077d6c62ae7a13f7b9ac4b9e257cd1aacb0ea
-LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/md5/bd2b62cd40b9e87fe149d842d4ff55ca
-LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/sha512/7c6bff3dbe341e2a271b61e02767a25768b74631894c789fffdef580605d821518274a04d9441c9b5d3255b9a9297d0d35f22310dccaab367aa92d928f25c062
-LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/9effcc21c5074ef88ad54c8b6b7a3f8f
-LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/9327fc8e0db9edcf941548b0291e0bafe9b956e92f6edf47795ca961303a24ed305b30b09f29478a70149056411c4ca4652facbeca89c2bb3db41a6c97df14a9
-LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/9cb716973ec75e2a2fa7379201aad59f
-LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/3e4d22be628af7b478862593653a5d34c2d69623b70f128d9f15641ab3366282aadee96bc46ffacafa0dcbc539fbbda4e92f6ff5c7a4e65f59040948233eabce
-LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/95bd98a64034f8dfc5e1dda8fb7ac94e
-LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6a7898670e71efd7f06e614cdf535cf390eb6def9e93409d4ce2d9811a8e1f892959c0f6ca8e370f49e215df495ee8f95e1b7d9f92e2708ca548344b6ef9cc22
-LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/42aeb569e80865377c65bba6cc84b262
-LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/fa46e52d8abd49e22636e48fb43f11be95bfdabbc13142e0cdaf4bb892ff982eb09abd9f3bf1c33ad374efc18ce21ab9968ed22c084411a55afddec0c459ab3d
-LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/md5/ded5d6d6580b979c372992c0fcf0aad6
-LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/sha512/f8a40285a25d61878e87d525bebcfe6e8c30cc5a40f38297de774c8e3191490c38716b3938cf81582afb23714a38405c20ed0241bcd3d41c68a5594822498b70
-LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/md5/cd2bcf96545c783f5012611824169a93
-LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/sha512/318dd3adcbf36c7979df9f394e78b7fb876dc60c9ec87d6b0edf47676c69df4dc3e73c07b2434b15c6e7497b385dc0fbf3fe7e3235b291a369f6f1d883c99645
-LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/md5/276cc56eaf744ac0a5cec6c8c396ede7
-LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/sha512/55cd7882ad976aeed1acaab7b1d59279ff3a0d2456d0bffa6240957ac6f152e903485f0ca05baafa5e97e0d1474cb204987eb9c94b1b2ddd657b52864a44c646
-LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/cfdc41294b2f4aa85bb8b27beced17ca
-LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/24f92091ab44a3be40228a9d9a57febc026f49b12c538c98e46a06dbcd679086332b773662126c68dbe4a60dd90a77c970c8a398237afbcf06c660fdbea16a76
-LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/md5/10a19a4f428951adbca7cfee91406498
-LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/sha512/28ddbad4310ed886c65edf28ccf01a5aba77fe11784740600aaec2aaa5c10c5e5915e297a4d72dd85bbc5304bb2027f5d18b95f13868b4bb1353fafed7bce4e0
-LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/md5/a68df850605cc9ec24268887e4b4ea77
-LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/sha512/f532dfcc84dbb4b92229a79b5629b16198061158e1f12d2dd37948cd0ceccc095221b5fc9a8e2de30de19727c727ee500c8ea4508722c677c7938ddef1c40350
-LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/md5/023a2d8271173de0a02bdca8d1d55bbe
-LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/sha512/e3195f917c250f31ce9669c304918b33664c5b03583f328929e73377f4feff525cedac42dc74adc9ba98a704630294a5697f07eb95ca520c6db4a67f0f83383f
-LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/ecd39a1cc45ee76751e1e3c5edf469d7
-LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/fa06afb1173bc23474f8f7992268ae9a0df52bc3c1af86d2b60da2cfff43371bb029b51debe638d81d8a1dd334a95dcd3c53dc12923220ad9b1336fcdad1ff8a
-LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/md5/d9a735335e3603635a56eb3b86e6ea87
-LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/8fc6677b1be27a900d2a984cf9f9f4b3aa1555bfd732da2bd6553c28da98048c4c86216b57744d7156de94c522b013768e57f42e662845002e5bd9f730c818a8
-curl-8.0.1.tar.bz2/md5/b2e694208b4891d7396d118712148ff3
-curl-8.0.1.tar.bz2/sha512/24e84e922612ebf19341525c5f12f36e730cd21a5279cbea6421742d1ba61e5fa404f2add2e71d64e5692a1feabfa92c5a5d56501f161d1e157718fee467e0a5
+LibCURL.v8.11.1+1.aarch64-apple-darwin.tar.gz/md5/890c65523227b4352344b78575cd4c5c
+LibCURL.v8.11.1+1.aarch64-apple-darwin.tar.gz/sha512/fae539243adc805d8da0ac88cf67901ff1f12ae94e40293dc6a7e17072f8c0cb9f0a54b7e324bd52ad9361b764c8bc88728ff4495e0cd6dbf1eb93d2bae8994b
+LibCURL.v8.11.1+1.aarch64-linux-gnu.tar.gz/md5/bf937fb6a8ea8a82b732821f3652641c
+LibCURL.v8.11.1+1.aarch64-linux-gnu.tar.gz/sha512/230c9983e4c7810d3eee1a5eff7e8b8c44f76db7af8a8312a608609f87bc8a56031c337c06af00a536c10ed33725200aa137c3153ef6dcf6575cc7c350b3b461
+LibCURL.v8.11.1+1.aarch64-linux-musl.tar.gz/md5/b40ea4266dc48a1fbfa016fb8d0ca987
+LibCURL.v8.11.1+1.aarch64-linux-musl.tar.gz/sha512/032d6208ebe226da90d0ef1f1f2d20580fd4e37db68146d1e836a9be4c1fc5f7890f1b808337ca41f46a07a833b55f06f09a4a164f26d0824a649ea40b30233f
+LibCURL.v8.11.1+1.aarch64-unknown-freebsd.tar.gz/md5/69097390c0bd3a32969e47608f24363f
+LibCURL.v8.11.1+1.aarch64-unknown-freebsd.tar.gz/sha512/391019370a9c122e6425a3097edafe0980dc2077be015919e7914aa781ba10060e3af9ee1fa881d8536d0ca57783d0616a1b5735e2ae7e06ea4edfaee2994120
+LibCURL.v8.11.1+1.armv6l-linux-gnueabihf.tar.gz/md5/bc4ab567f8cc4cd88b2239123d103113
+LibCURL.v8.11.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/0ecbf2380852744815a8f7e99e7c276c342f847907eb7b0d256905ba854ee59d37f83456fcdc8931dc39dbaed58f0949205b80d23e43e2b59195d18a539d4047
+LibCURL.v8.11.1+1.armv6l-linux-musleabihf.tar.gz/md5/18c896c544f02f7f2b976c03fc3772f1
+LibCURL.v8.11.1+1.armv6l-linux-musleabihf.tar.gz/sha512/e9a73670f1c3638c3a886055b32df5baadc41aad9829cfa0d4e05acd46d2d012464114ed6fd1e3d182a4adc266c1da97e9a683c7ba69e93f61592acf8567e336
+LibCURL.v8.11.1+1.armv7l-linux-gnueabihf.tar.gz/md5/4672f9d67ff357a2eda6f77d8f470659
+LibCURL.v8.11.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/a2f2dc8d8e10c652a324a4da3d2337d2886626e1c417c68efbcfcefa443cb3ec81b52f2a212c4a7dbd6a5ae920e54d1bfdc02b68c2607d09784206cd4d11ffb0
+LibCURL.v8.11.1+1.armv7l-linux-musleabihf.tar.gz/md5/49d297563fd44a03f88f67bb7ea2a0be
+LibCURL.v8.11.1+1.armv7l-linux-musleabihf.tar.gz/sha512/5cc3902571f04c96be38de53b5320876a3e7e54934090ff2a80304a7ca59a361ed9f3f328c3e3c06ef33550d221a8243e924b7ea49792753f839c12aceb1e979
+LibCURL.v8.11.1+1.i686-linux-gnu.tar.gz/md5/f05a86574278ecf7802edeffe1fee9ac
+LibCURL.v8.11.1+1.i686-linux-gnu.tar.gz/sha512/e493b5836022a6280f21237fef423034a9701097cb271683e81d4b4e487a6289080d00016fbaaa8bddeb004d44626a0076fa7832835fe7f58b60af6798223f89
+LibCURL.v8.11.1+1.i686-linux-musl.tar.gz/md5/03340412ba27f231dbf2de58a1af871f
+LibCURL.v8.11.1+1.i686-linux-musl.tar.gz/sha512/541fbdd5570432832d3835038b41df73aac8e0e7bc03f41c696dc12a57bd4784b4da1f485264fd1fba263fe9e520a7dbb0ef9a365275efc30dfc361ceab252f3
+LibCURL.v8.11.1+1.i686-w64-mingw32.tar.gz/md5/5f2071282d572bbb53dfcfb16d0d9608
+LibCURL.v8.11.1+1.i686-w64-mingw32.tar.gz/sha512/e4d6fbd518055e8f2a71b89ee9a33728e6e076729adeafc358fc40f47d032b739363c9b57df5bfb3c43244f7b833afc76ae255e70bcf43b81262d74278532a22
+LibCURL.v8.11.1+1.powerpc64le-linux-gnu.tar.gz/md5/806ee9b51c2bffd798c1682867a7a2a0
+LibCURL.v8.11.1+1.powerpc64le-linux-gnu.tar.gz/sha512/20ae5f47ad24e1fba8ecdc3a81aa81acb5c3c224041f12f8be48f9a0abd5ce44117b096a59fc8f861b6f8c6ad9e4177e3a3ba3e2dbecb2078d4bab19bdd4d239
+LibCURL.v8.11.1+1.riscv64-linux-gnu.tar.gz/md5/2e029213e81955f39423733608c4ffa8
+LibCURL.v8.11.1+1.riscv64-linux-gnu.tar.gz/sha512/a634ae9de047bd8a93cbfaa3cd5375d895baf9917b5062653f15472527836b51eeb15006b5e1888251e3f09d8177b489ea9975580fe6d95bc759708fc9654fd1
+LibCURL.v8.11.1+1.x86_64-apple-darwin.tar.gz/md5/56cf7cf4ea22123e516843a5751eea17
+LibCURL.v8.11.1+1.x86_64-apple-darwin.tar.gz/sha512/5ae5569ade42cdf0a1aa8acfda7d1dd3df30d498637f83e93bd9f8be883ae777e789b417be24df83e42ebe32fb67cc328bedac3dc231d3569f585641175ed257
+LibCURL.v8.11.1+1.x86_64-linux-gnu.tar.gz/md5/a4a733fe879693e83e1f05b6ef742ea6
+LibCURL.v8.11.1+1.x86_64-linux-gnu.tar.gz/sha512/2767f49d4a528080a5c7fcdecd8374dd5498c5b1e0d65f58d027f6a9138cd00203732e5da1806b689efbaacb1ee905a6839a09eab35f0174279af314a34fca81
+LibCURL.v8.11.1+1.x86_64-linux-musl.tar.gz/md5/837a64073c3d8fd115cadf4af1b19235
+LibCURL.v8.11.1+1.x86_64-linux-musl.tar.gz/sha512/cf0559b65c213889ab0bad388ca6dc1699891e5cd2c5c34faf80cd60404b5f363eaa624d425fd463407d35c5cfd814c1a9964a2b3b638fa7e7a0a2919980ba8c
+LibCURL.v8.11.1+1.x86_64-unknown-freebsd.tar.gz/md5/22726eb8caed9b279e6cddbfa328f2c6
+LibCURL.v8.11.1+1.x86_64-unknown-freebsd.tar.gz/sha512/b9e304575bb0e3241f938c545a91cc5b722e8dfc53d6ad270ea75b8fb05655ae8a03e5f844f5b8a75a84133d0883369bc6c46f0805be37ee840f2f1168994c37
+LibCURL.v8.11.1+1.x86_64-w64-mingw32.tar.gz/md5/aa5ce49a63d216776ef9fc11b6b3b012
+LibCURL.v8.11.1+1.x86_64-w64-mingw32.tar.gz/sha512/4e7fe5ff82ca9bafbaca476aa51273ee9590058350c889a6dd8a0eecaa024d19f0a26dd7078808d08dfdf2f5751daec51e88dc88253a4638274edb63ae93fd3c
+curl-8.11.1.tar.bz2/md5/31dc730e6fff880a6ba92bdacead9d38
+curl-8.11.1.tar.bz2/sha512/30041e15b919684c46b6b3853950cba22e6fbc21157b1f682097277d2b20066ba71e51eb5d2c34bbd81b8bf4c2791255d6492ee21d49f606d71f66e211a6adde
diff --git a/deps/checksums/dsfmt b/deps/checksums/dsfmt
index edadf5c01b1d7..9d5fa782663ec 100644
--- a/deps/checksums/dsfmt
+++ b/deps/checksums/dsfmt
@@ -1,34 +1,38 @@
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/md5/0299af20dae6bed519635900687f4aeb
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/sha512/5f20bd7602f09dcb23299d979372453db9a0e76a66129d69cc93c4b45a65ad377486f3cecb7093ff65307f515358420dc318b19eaf5945ff2fbfbe6886e95efa
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/md5/78a0fa53ad3db17f2849c744246a6bc6
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/sha512/b855bf3349f1ee33978d2c35999fe24a91ee17c5af345881e26351099cea05477528da9df43773d82e20917b4b1dd8c5590eb5ebb99cb5c9c425d03e38192e32
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/md5/45829fa624e98f806e184cfdbb918a7a
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/sha512/28823838bba7c1bb40f636835b1f7b15c49c5395a72a1261f3d5eb22c54b487b98bbcd7cd79f12297ffb37c62bfc3b69f0ce9b7036b5c2662d6305497d2e09dc
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/md5/fdf55ed8c59b6cc0409fc8154777d57e
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/sha512/8249e8a74ea958ebdd1e4e967d1020158c49643bc33855a5f0043b77026c250371d60b95e8370e0f67dbbaa380a7c02fc1c4eff7d49933a5f471393abc2a266f
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/md5/4814dcf836033065745f3532ceabeb33
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/sha512/75b8df698762e421cbef208cf7fda556f812f7e9c0481f83ddf38e468459ffa6fbdde86b5942f28f47225c73901c9863246f77eed93abd73052b6d0918717444
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/md5/bff0a088b3bdf557dcebc48c2b260bb5
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/sha512/609b45b7330bbf6e093fe6277dd14c9e23fd8c8c5e4db6a7275d29c6436f7602cd5069a0912a6ae3fd02d492a25c56400166b25c02b7379eb152eba3aa019dcb
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/md5/5193c1f1c2d056b3ffd265f4ef18965b
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/sha512/3e1b0af492a83c076087923d317291fd473670626599d3d03ed86a4515362a24610f3a9b2bd4b71c15bf86b03e44a11fd973f9f16d8b01bfdabbf7ee1ea7f4bb
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/md5/69b959d409030f86eefbe1a0d4196787
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/sha512/0ff871b96031c5f11e5c5fbb4fd35c8bf5e3b1fa5c43dcece275bc847a82b89f0f60db5b273bef2dd31572e89c98694fd1cbc2b442ee3a5fdf3b44e8707ef338
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/md5/1fd3b4d5169be306b86cca9dfa6f014c
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/sha512/d5e129abf6ff8a1077bb9de27fdc17c131f26d9c3707c189c02649290b50699f26e39230ef875fd172b54b1e28b1b595cbf835c6a8c36e1101951444e174f02a
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/md5/2a6ea60fe134309ecafb0efd5364b186
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/sha512/50ffad9c0071746acff16532b71d84d53c6f11039aa10167f49ac9293f4819a905f63f521c93a45daed5068df0fea1699a15a1a1d6c100dce0932cce4165442d
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/md5/060a4ed22e6e36a661b08c804a7272bd
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/sha512/f1367b910512b996c4e6bfcb4a99afc3640a4ad5ec8e6a2fc092d677c7eb68527800c4d248188a2cd7a2d427cab472a2fdb48978aeab39805a62f774dc58bb50
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/md5/d768332dd9902c4e3869a160fb002af3
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/sha512/db3e43ea9b884fb2ddc9585a224d85835ead169f5996ffb20930a8970893f9cbbd8b54832a4fc78745c7dcd7991f973e929965ffded32ae8289c0be68316e60d
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/md5/671e5a06c68d23854051c78268bfb9ed
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/sha512/0b8eb9e527cea444fdc33a3089684f9b85a8889370fe0b240718d32332523f1175e38a9b51fdabf4a38bad4a820e956baceac253001213b1fc3e7a5eabf8664a
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/md5/65929d7a40fea8e8783cdeb77205ee06
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/sha512/ce1b49365b764cf67ef4757f91078ea11afc6e07c4a776258a09f58c9ff84ece440d80714a491c1a21da06ea6a67bd27d2933b862dbfecf3c357f3c32ebb4fc1
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/md5/e27869ac4f1ea6774ade7d3b53cd301b
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/sha512/762571a5d5773c2d9780586603859272f48ed67d6c8b09cd95c92fd62dc9bb03c274b12c2c04e05f426c9a42edbbc8e33beba3c79865f2c49459eca2d588b14c
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/md5/74e5c27ba9eb654b4e998ce73719e724
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/sha512/59badcef14b06f14f8f5bce1c72de6750c8310ae18581e24b5d663edefe1bed3d120b4cebb87b53dc664411b62d9802f75aefde4e5236ada1dec740e6ef2445d
-dsfmt-2.2.4.tar.gz/md5/ed30e63552d62df48d709dde4f755660
-dsfmt-2.2.4.tar.gz/sha512/fe84e986cbf198172340adfac0436b08f087643eca3f1ceccacde146cbfd8c41e3eb0dfbb062f7ca5f462db13c386abd7c269bc0cbefc9a0ecf97a8a8870a2e4
+dSFMT.v2.2.5+2.aarch64-apple-darwin.tar.gz/md5/4d9e6a1ed07d1fe1557845b763224eeb
+dSFMT.v2.2.5+2.aarch64-apple-darwin.tar.gz/sha512/930e12a9b6ac82888f4122515a8a7cc3aa5d5363e500455b33c57efb7656041fe3f0fa68b02dd048b2a9f00abb56449415f1edf600ef09703aaed991e1d6f23d
+dSFMT.v2.2.5+2.aarch64-linux-gnu.tar.gz/md5/260e14855dbc7773a2ca906d58cc57f2
+dSFMT.v2.2.5+2.aarch64-linux-gnu.tar.gz/sha512/820ca4c6afde931e855b74015150f4ffbb513276c3fa7dbcc1ec8d34c02d4989fb7424a6e4f81f93d054811b5f54f8633d955b05acdb088387ee90f1c3b00915
+dSFMT.v2.2.5+2.aarch64-linux-musl.tar.gz/md5/7ddccbad6b5c9de4be187fe76637a0d8
+dSFMT.v2.2.5+2.aarch64-linux-musl.tar.gz/sha512/e3c225da00927096e3a6cd4abc681fba8f469cb74828e7054d4f5684d71dcb8e75c9a81f14fa10bfbb78f62f9567a31a92edcca8d797e5810a2a44a3fc17bc84
+dSFMT.v2.2.5+2.aarch64-unknown-freebsd.tar.gz/md5/d592c490259f45acef2308fd61046404
+dSFMT.v2.2.5+2.aarch64-unknown-freebsd.tar.gz/sha512/4f4e100b4cd5301e815f29f911b3ddba845a90247f1d641ea11153f5845c700e6f94ccd4a1d46fbb9e64a0c5698c5419c52560f0629629ffd665cf9ddec24e17
+dSFMT.v2.2.5+2.armv6l-linux-gnueabihf.tar.gz/md5/a70329e0a6c57009c6b6950fd34089f6
+dSFMT.v2.2.5+2.armv6l-linux-gnueabihf.tar.gz/sha512/4418c42165660adc050e872ef834f920c89ed6a0d2b816821672b1e862e947aad7efd023289da9bf05bb2eb9ec4b9d2561c403e2d5384d5314a4ba016b1f9cfc
+dSFMT.v2.2.5+2.armv6l-linux-musleabihf.tar.gz/md5/6ffc798b8a0c847fa5cb93640bd66ab3
+dSFMT.v2.2.5+2.armv6l-linux-musleabihf.tar.gz/sha512/94e5ae07d0b1420abd7290519bce6f77deae634bbb4df31e3f02416bf509e555a9b1c9d19dd77ca76a308c2b86d5c9d4718b9ef83c13167b88a8181d8ca7e73a
+dSFMT.v2.2.5+2.armv7l-linux-gnueabihf.tar.gz/md5/660d95aa08580ca1716a89c4d8b1eb24
+dSFMT.v2.2.5+2.armv7l-linux-gnueabihf.tar.gz/sha512/bc757a9f805047be5375f92c10a3f3eab69345a4ec5cc997f763e66be36144a74d414ff926df8e17b9d5a2394189269c3188c55e0b7c75a72495394d65510cef
+dSFMT.v2.2.5+2.armv7l-linux-musleabihf.tar.gz/md5/78c487049092fe61949d506637c713bb
+dSFMT.v2.2.5+2.armv7l-linux-musleabihf.tar.gz/sha512/03ddada4478f05eab7d2971b2deaf2cba91f084d7ce66fc8219bcb3cf5c308ea13959fed95568ca80f4ce11794e197092984919265716de8f2558e2cb30d94ce
+dSFMT.v2.2.5+2.i686-linux-gnu.tar.gz/md5/11463fd3981a8c143d7aed691d18d4e0
+dSFMT.v2.2.5+2.i686-linux-gnu.tar.gz/sha512/db946a4fbd8a3163b8b1c25e02bfc4a841da7d2532892a99037bd48ac98e1840691e8cc0127d9457a82667a0131e4826cb4e9d0a13f127afc62da4eb68af5a3e
+dSFMT.v2.2.5+2.i686-linux-musl.tar.gz/md5/a61405f72c9a3bba5718f078c68e61a5
+dSFMT.v2.2.5+2.i686-linux-musl.tar.gz/sha512/726f130bbbfd0dece4185b89a25a73f3b5b950ebfb7f86aea6e9cbcf9ae932e591d20b854de0b4985103dbf8b4b7cb3560661c5070af971cd2c1f3ec3e1ea7d2
+dSFMT.v2.2.5+2.i686-w64-mingw32.tar.gz/md5/3bc27ef8f26c7a26f096cf1d558d408d
+dSFMT.v2.2.5+2.i686-w64-mingw32.tar.gz/sha512/ea3608d3ae3874ea57a1a08f69abe2a1638bc340db71c6fe3c4fd5637d8c54943bf16b099a46817387c1ed4cb5f3cd1c0ff19ae8a4ed85dd555555821af06374
+dSFMT.v2.2.5+2.powerpc64le-linux-gnu.tar.gz/md5/fd8c73961ef7c82201e6d86e8bf4324c
+dSFMT.v2.2.5+2.powerpc64le-linux-gnu.tar.gz/sha512/1bd0ebd019cfc6f25f7ba007547c5ee297854655b93c55e90d8ead420875de5a087e38956693d5e901ff2abf667c72aa66fb34f587b82adf4b91b3d5d666b5c7
+dSFMT.v2.2.5+2.riscv64-linux-gnu.tar.gz/md5/5c4981c2c016436faf6f33fa8df4204b
+dSFMT.v2.2.5+2.riscv64-linux-gnu.tar.gz/sha512/9b56f0abbfb2731d23b99b5286b69c31bfc21eb14f49d88953680d5596c20c6b4d59520828f0a398915d56c82e169a36316f8e319dfe4e25a8e3f44f2aca4938
+dSFMT.v2.2.5+2.x86_64-apple-darwin.tar.gz/md5/e21e30097f1f02c5cc14cca3f73ce92f
+dSFMT.v2.2.5+2.x86_64-apple-darwin.tar.gz/sha512/48b19706189eabcab2c823e6143ae22f4a330abb239c7a952913fe9973c5f750d72b113af32a82a1f6124c534495b26d1f81ccab407d8d15ee459dc83fb8d3cd
+dSFMT.v2.2.5+2.x86_64-linux-gnu.tar.gz/md5/fa671f4ca14b171d53c8866d03f9162a
+dSFMT.v2.2.5+2.x86_64-linux-gnu.tar.gz/sha512/2e242a1448da0508ea88cc1a106f1e74f8d7e7562cd82b80d86abf9a8b454653ad7612e25c30ce00c23757e8a5b7b5736253b00a52f9473af6c5d4df768138f2
+dSFMT.v2.2.5+2.x86_64-linux-musl.tar.gz/md5/c648294163882ec539ab646542c74880
+dSFMT.v2.2.5+2.x86_64-linux-musl.tar.gz/sha512/9e96a47d660854b6517364f0db40a2f4e0e3b814499a0349f7cf550b1c8d04589fca5eb4a75bf34f36d1b5d1b2277b3e9a961c887092abedd08f438e025329e7
+dSFMT.v2.2.5+2.x86_64-unknown-freebsd.tar.gz/md5/5a9b811be74f02202c57588f35582cb6
+dSFMT.v2.2.5+2.x86_64-unknown-freebsd.tar.gz/sha512/8dc6cae5cdf038fd5647cf86b85a15ac082d35b4532340e145b7e091839079ff47371aef6c3012a67692e492622b4f84db8f0ccf46049cc94926aed5c9cd9fb4
+dSFMT.v2.2.5+2.x86_64-w64-mingw32.tar.gz/md5/386adb3b7593c222dc7a1060a1356b21
+dSFMT.v2.2.5+2.x86_64-w64-mingw32.tar.gz/sha512/fe2ab5021126807b37042e89a22ef9a869c6a0a028680df445773b2affd11c2b02148be07d53504ea3842bb38bb62fe039529688266c1cba3545a892bd4dc185
+dsfmt-2.2.5.tar.gz/md5/d22e476b52cdee7d5b90d2f289570073
+dsfmt-2.2.5.tar.gz/sha512/951e8669350f750b8915a819e704eae0a9b9c9518b3e3b9a1905f9ca0d25cc4c2486cb479e258a4a114e9c26ceb73a6c4e9f1cc02ed19173aeb8f20189754f6b
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
index 0c45aa6a00ca9..949e4d738a472 100644
--- a/deps/checksums/gmp
+++ b/deps/checksums/gmp
@@ -1,60 +1,66 @@
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/md5/37a4c537149a1d6d7424833294e61dac
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/sha512/33dd86279b5b3b08496180c92971c2e7ef84715e9ed3a80071a178ee94de6231ea3cf7b4dd4fa7e0dbd0b386a1a04c4f6b28446e86cb92c100ebb295b2f5ee3a
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/md5/44ef76b228cdc4cf54e5d4b40a29034d
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/sha512/255a680c75d3e8ca542dffc47050adfce038e25a12a4131c18dc719d36b364c1a6488ee5743d1c5de445b4bc5ccbb932399f7071083d86fe5bd2befc521cfbfd
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/md5/0289ffc3621b5d62dc2f9e1b36c41f9f
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/sha512/f27b82efb5aa1d7eaaed7574d3312969664eac38f45cf40c6de13ca20b256d45481546fc1a402e6c04bee416c842a092a4e57b8df702bbcdc52f742555d07aa7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/md5/9ff4c76804f59056b49a9bf5b6a02099
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/sha512/d86afa10bdc4e20fa259a17ce7d0a5dca2524b42752bc7d5c33e4323973587d234d4c420900deef34670bfce8ab8c6725e7edb45bfd3896b2644a42ec187dfd7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/md5/cc9857a965afcdcbc2b378a368360690
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/sha512/c46bff9fdcbecc71c12914dadb31ee9fd5b4293cb45bda782200daa18d7f7e8b588e0c0f68a39c2fec7cc3d026bcef3620dae35ae2dd3acf2505dcfc084d11bd
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/5b3343367896e31b29571fe0d2b90390
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/65a501db63c386727aa336d6dbecdff0417628bc9ff7ac1b2161922246d94f8caa71b63fc3789ec6bb10aff03b96d5d0c22c37c82bd95d74e557df8de7e8a09c
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/cc04dda18412fa11f228e66eb5a03aad
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/49fdd452fe8f0129ee06795e04a0cc0238132f9d6f60a124dd2c7395fabbb71f005c16d95fdc00d87f8bf82b048cc54e07f162fbc38223c644854cc72c4d26b0
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/md5/675599595f3dedb8ca11151168da7110
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/eedcdc2230fd81d613d54be356679a97b59491f5f9a17c518239b5504c3dd5da15721d553f57ae21f1c55d253e808e7afd1d1651b8c666379c55c7b48f71217e
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/md5/9a74abbc46439ae8268ca926f0045691
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/6329506f7a886d0dd907b051d6cbab1bd0cd21b2d5715f55402bf9ad6cb1ae33e058931bdf6cba17658b0e455f9e4fb7f9aad274755a159106cfe1c4d1ea328a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/8c20e0def927a202f2d23aed78aadb4a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/b7f42efae6fce864c9e07714056444ba74befb9cc9a766ffe14e676240f23f83d3241b1bf3a8f4a282acbdc197287fffb27dadedf3055505ad63bb0b9df573c6
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/423a625816b3c52efa6021e76f6009b7
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/21cbbfd647d4a7c884344dc66e0fd83d654d22c3338669539e8eab515bdc6bbd772b47f949d28280789e4343e9a8d6319a73dc9e11c23da381b8a452ef7fb098
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/md5/7d67f981538d7a69ab1e458a54bf56f4
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/8aefbcddc326d4ef289dcdba8d3bd56a5f9656a7be30c83b4dbd9a0b8ee26a963c6a2f4294c94b8a8f2f712f1e1c9e17b8b9dcc9967d64294ca466e51656f7c7
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/md5/ed8713b71636ea75fcc0c9fbc4a8618d
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/d7f50d06a256fd9176d5fbf682ff599a5ffba62bb35fb37321ab41e88970921a9d9fa4531bd74e73e471c7e15fcae568d0536d3e32a2b2d7f81dc9cd1f0c039f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/md5/875f0bc57172788cb80ca2b80ff3065f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/sha512/808a3c2422b5168260dbf7a3875d5c8151e10b20a8ec87a66bf08f71ad7cf5de20fb7a4f3457c3ab2b4ffc9627764c743baa96f409629c70f2233ea7a5b628b9
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/md5/09ae13f2a6a0dc317d2bca5700d2bf59
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/sha512/9c986e2904247de937e30c05b29e0179986d7747b217468c59bc56af6d4c48d4575f24dace521dc8d66d84230eebd695fe0538972bfd744182ca940a23a9239c
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/md5/45f53fd95dd69a6ee6b43463976b5aa6
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/sha512/4df57d6c88f0ff86e0ee78da8f6ad02decf7a38884ae8c785c114e0e38e791b733e0d046c90712327c08645dd40b7f0391fcb3258cb3bfb8b6a62c59c27d6e83
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/md5/8b15988bfb1ba0543eefab73b3ac3439
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/sha512/e32dec7ded9bf6fc26033df83521481dde851c68d7cc45efaabeded7603417cdc5016de45f78a956b69aaed00a55a91aa8b1cd5bbe5431b01074dafce2c47751
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/md5/4138d0b5185f722aef4e1f215f381275
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/sha512/255d4ecf178b9440b667c56e542baa4422d731f83a67accd41b76268274c2344fbbf94979fddbbd1f6b5751bac2d228a8ef49a93365de78c1772146edd1b4845
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/md5/606b4b453af25ded1323aee9e085c132
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/sha512/8605b764ff6e5d81767432fd8e70c25c5ad76f2cac7c2b3d6ed0596df692300973803487c970a896a0a316d46de3e3cae31b21d4e11fe2961e228cd389da13da
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/md5/3fbd157df4ae738da6820b26fb75e75e
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/6e64c5c4e393c0001bd7085e627126134b5999c2d8df2fa9b72c9f9835d6b0f0ad440a2f58fe6537ec446a517f8df2667881871fce9b4d61c356d2b52080d641
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/md5/35608e3166278d52a482d7e19313eca6
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/a9550fe2b94e0e111a487159c0cd8fb6f1a21b8941ada7bb281572079dbbece921f80b0275bcc8f88117ecc72e7f8e93219350f5444b67295620db1aa9ae947d
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/md5/b5004a436660a2533b94b41c592b686c
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/sha512/b7b4dc8025ce304c5b899084f42c8f5aad5bbe03509bada17dbe6be952f98306729180a22b5d0a095692f349406db0b98f99f5e3f2be5f2165825e6f7f7d1813
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/md5/47ba899c9ac714a4594f999d845f45cf
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/sha512/99624ec71865d6285ab409ef54f4cf12ba246de6233de56a2fb9f70806574891539efed32e711202003570c157918fde8d53534c695fd5b8476e0d4e0ecd1bd4
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/md5/3b0c1258ecafcaf96e549f9b979420ee
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/sha512/b94d8f25d23597f96cc0cf0aebd1708755a8714ec4a481108add852b77addc737d3d8feba566ec410db019698ca2de826583b1a6105f0d2188679e7f72331df0
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/md5/061cfe5f416c1365e98d6b1ed89abd63
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/sha512/b6847f7ff599fa811851788a6ec6ce69ba02dbb3672d0a64b03b7056b35215536b059287709b3d207bc977094e994a7d744061b7ecf95886510285489bb89578
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/md5/81911acbc0c3607338c6455b1798cab8
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/sha512/e007441194abc5c80d9521a17e2ab9e6fb54f319571f4045fec2f7464ffaa99652d3252416c15d110dbf9deaad2c1dc94f81c638e28ce620cf543f554eb7d1e0
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/md5/ef7173194848e8d00d73ef05fc520f0e
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/sha512/512c3cf8fb951fe0ef7b1715b78202d0bdf5844fe33e16c4674a19e6335440fb5352d7bde71fce83e8e373efe43281d05b160b11657a582a9d3a0201ce97a189
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/md5/882c6749f217f5a691b744ef728ad089
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/sha512/53424ad8a9dcfb8e0e738d4521b2ab1c75aaf54668a54a76b8bcab2404308e69b531dc25b3dc18bc8eaa7ebd9e2914d6624c5d371e6c0ecb9e8d24aa575e99ab
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/md5/bcdd7bcbc69161744397d249a9c82e45
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/sha512/b7f8fb4f5aaf5034d4d2f60e29cc7b5e06c13d4b677af30f30831e1fc95925a575275ebffda36efcc09e29ccd78ba56475c1be3ad0627e28862057764f1ef74e
-gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
-gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
+GMP.v6.3.0+2.aarch64-apple-darwin.tar.gz/md5/3fb601fcf70024fcc40889cf1b958441
+GMP.v6.3.0+2.aarch64-apple-darwin.tar.gz/sha512/7ecc97c1f22287e9d7f3e8073e1cc3c6b3c75aa4a350a55a0b6f92c5bf60339b52f8866994f5973077e1026b9d3b10a7bcd71ec2abf25c3cc1bf6ca1041c3e73
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx03.tar.gz/md5/10581945c01bac319c9c2d76f1f7052c
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx03.tar.gz/sha512/3aa2799ef7783a4edb767a695bd2797776def8ce1b2dc471b2cc733371db9981d6c3f395fee2fb50b13c7ef74c1521d2787c29dc60a75e1b92652b94819b5364
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx11.tar.gz/md5/c1f9765fccec8ec131faa5e31b7ac28f
+GMP.v6.3.0+2.aarch64-linux-gnu-cxx11.tar.gz/sha512/aebde82400544dc7a2aef0a4531cee78f9abcac9352dfd5d86472a70d704b281de03325cc609583169ecbe4cb64623ab04a3d7fff9cf24c70991530fe530aa05
+GMP.v6.3.0+2.aarch64-linux-musl-cxx03.tar.gz/md5/b1f771c79f3b380555c1c96232074523
+GMP.v6.3.0+2.aarch64-linux-musl-cxx03.tar.gz/sha512/daca9d3b4179e99da8e61f4010f5965718c79d02627e0b3272e4d20c34dac0d933408dc7d760a6d6fa09546e436c800ad5da4a1d34283eac9558f3d2f97bebce
+GMP.v6.3.0+2.aarch64-linux-musl-cxx11.tar.gz/md5/523c386457e9d48430b83f2db85ac10f
+GMP.v6.3.0+2.aarch64-linux-musl-cxx11.tar.gz/sha512/18155dd92641bf6240606d23b0d3cab16bb9b63b6034a7c7c61f3728fb48a6b710fdc21c6477145c015c648557e97003b0cc6087b4b36a691daecb87272cd51a
+GMP.v6.3.0+2.aarch64-unknown-freebsd.tar.gz/md5/7dd3f2813fd7e9e620a8123ae2340ab2
+GMP.v6.3.0+2.aarch64-unknown-freebsd.tar.gz/sha512/375b12dee41285b65b5cdd55f6b000a90fd431c3eeb788a928396a102594fb6fad257f2c4e707f11ce7d0e4d45bc82a77ac85d8a48fa0a42f969b48b8b2c1c23
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/7d23f84102362ec3974ca2d84da33c4a
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/51e419159fad75ca0ab12c31db29259be6fa280e66e2b980df4c99a0558615297741f633322978a409fbc071ec71834214b12d27d04ced0c043c569438dabd12
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/5f809ffa56ec07cc04e3c4cb155faad0
+GMP.v6.3.0+2.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/e394afb93a2c0aebe0ac7887bb2610720cb926256f0f5e7b05f3b1a805d3f7967fb97f4227ccec049df554c6cd1c4d4e9414fc4fea33f201204dd87e207e33ff
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx03.tar.gz/md5/494564a56197edc5b8772c15eca7b117
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/a7bd8bc19a030c56edd4d91e3cff16d78d4a9c1c1bec99897e55cfaca7e14cb99cee32e220473e207b78f0b5e0c0bf188c679d1748c010380485fad4d89758c5
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx11.tar.gz/md5/751c36d4975d6ff88eb968123afc1845
+GMP.v6.3.0+2.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/af471834ba32a970b4f358a263434b03e169dc48445aa5af412ec51e70668a41699f9c408d90f64b06dc9233360f70a03df859428fdc0d759e5696a3ae32f3f4
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/ea9c867ae191a29647e8ccfb67947bc6
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/d6c44c945d1ef869155be087320d7750be549399b186aad8c92bba32ff5312bf09cbb2fb57be91be237be7d50f8f6ef0aea67070f50c024e6f5302485f405d5e
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/ee5becfac9fe3c448a5de322ddee66d7
+GMP.v6.3.0+2.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/bc9bb2ad83644cf0b9f2bb0bfce28938ee6e82dbc0de74d1f411a8eb5ab96c5ec00c648019384ec07f34a469bd984d6c62eac1bcb803eaa013b6c85547ec3277
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx03.tar.gz/md5/23962e487398f02c8d660724d88bf7f6
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/4c561053f79ed976a698c7382c5c94ebcbcd25ed27c939016bbb4af59948fd6bfb82e494e18fc7b4969941a7756c33afd2f177b3158f1b3d659215c25c958d2c
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx11.tar.gz/md5/4734feb61dd3f2a4e6e395f9ac7ccf57
+GMP.v6.3.0+2.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/088a52c372681b4853fe7c4c70eb8625b58df6d79eea2a8982bd781458188930aa31dd9a121ff7a6d00cd8165f5d126155d7f931100aeff256b55a2281d44a90
+GMP.v6.3.0+2.i686-linux-gnu-cxx03.tar.gz/md5/e229a7a09d6c843f03028b036a54b786
+GMP.v6.3.0+2.i686-linux-gnu-cxx03.tar.gz/sha512/d92cccfdd7abe3ca5c6ee1eecfe3f7aebe875ca6b9f6257bf1181dc5ee9c873a930ebb2accc825596ee26dc45bd290a482f0405cfd7a3a1b0eb606f5ca897b70
+GMP.v6.3.0+2.i686-linux-gnu-cxx11.tar.gz/md5/01dbe43b15197cd39351dce91b3a62c9
+GMP.v6.3.0+2.i686-linux-gnu-cxx11.tar.gz/sha512/d6e7ea99f76e10b4f7733d8c7f4af3fb2fc09618510c222da1fb95e8b4c83b0aa7c5d2f896bb620546bf39041d6dc1b32ca74ddf5024ef1beb5526b374ba885c
+GMP.v6.3.0+2.i686-linux-musl-cxx03.tar.gz/md5/ce2f8d8b59228888cb7f03da0c1aca70
+GMP.v6.3.0+2.i686-linux-musl-cxx03.tar.gz/sha512/cc024a2ca4b4f042c19f667c4c3c08e3041d9b9ea0279cc668a3c0212103e86444abbdb323304e05c506b44b3c1b32a55f90c04cc32e9d26ac013336821c9ac1
+GMP.v6.3.0+2.i686-linux-musl-cxx11.tar.gz/md5/c37741b3a03ef2e705d45124eae25afa
+GMP.v6.3.0+2.i686-linux-musl-cxx11.tar.gz/sha512/c343ad2ea47d5775e6e4c50fd8d46745d39f3632f4ad479199f7583fd02b08a0126048625d3999b23a0534e4f5c2bf19d021436229689da7c794427102c7780b
+GMP.v6.3.0+2.i686-w64-mingw32-cxx03.tar.gz/md5/52a773a2111f7b1f938e78263c4608b0
+GMP.v6.3.0+2.i686-w64-mingw32-cxx03.tar.gz/sha512/6ef89b7eda8f0709315c1080e4d57810f976939c755f160e34b04e4c199409c8c707036fae5a73fca3a16813cb4ceff8daca38d1ead73e36d7ff23506e5bb4b1
+GMP.v6.3.0+2.i686-w64-mingw32-cxx11.tar.gz/md5/88b1ff47d913fa301c95e9e2aecf42ce
+GMP.v6.3.0+2.i686-w64-mingw32-cxx11.tar.gz/sha512/3d631ee81906627a8bd9194fa8f18b634467565c10e5e08db7d1a4b0943bae9391ae15a1c39533c9796edf24e1f0210d082e44dc7c1fbd9f93855f37e207da07
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx03.tar.gz/md5/0b2c73cf7936500ce0f07577c4c76ba5
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/30e099bd6384e801fb28b4741810122f82ab0760a4e09d6ab28559b72feff278a48150579907cb2920a624fc85287a197743331bc1808353d0855c198341bfa1
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx11.tar.gz/md5/f496279b474948435f836ba39291c708
+GMP.v6.3.0+2.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/c37d4fbba284af87fc16a24bf1fdfe80b42c84bd44f1859d1c9ee97fdbb489817b58db80a078729e19c8a5b8448f9234408a8e477fd15acf15521f3129e86acd
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx03.tar.gz/md5/f07fc6751104a407ea2515fda3f26880
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx03.tar.gz/sha512/435b375da747d2dfba06a303b55118471c6ef705cc65afeabb5a59477cc98aa9a956b31c5e8b571126f63d922498b9a66510f8f6810a60f6a4fabba5ec368cdf
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx11.tar.gz/md5/493c24a7a7370f308f0da2955f40b5d5
+GMP.v6.3.0+2.riscv64-linux-gnu-cxx11.tar.gz/sha512/2e1a7562b759219d1a4283372e66fa1e907279c5b5feb8a858f6bd8de8b9c2ef3ddd09d5e812d93813fa781090574fd26d0cec85b211274db628681301a206f9
+GMP.v6.3.0+2.x86_64-apple-darwin.tar.gz/md5/c3bb785e10fe19cf1c47db6bc5e98fdd
+GMP.v6.3.0+2.x86_64-apple-darwin.tar.gz/sha512/5280896654e1c7864d770ecbfc853a1c7837c2b1dd369047432d10f831762a26fdaeac4201ca419d8bf7c545c107800b892660f4484b5eb87bfaf42c919fb640
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx03.tar.gz/md5/0fd62bb914554c3cb6b5dc0f5ec0d330
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx03.tar.gz/sha512/78cdf0cdcdca4a0ddc87755f4afdb8f290fa946b3c5541a3e31145f8bd905884d59f38e9f5ee4fe96ceaedaf90881af795f4e3ecf1be922103b838964da101cf
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx11.tar.gz/md5/02f54f8895bae0d7a824374888300744
+GMP.v6.3.0+2.x86_64-linux-gnu-cxx11.tar.gz/sha512/83c865f6164400e56c28949c680cf92457daa270b745d89034e1bcc46af1eb93c96bce708561dee03b58162191f6448e4325e921daec11083bbc42dcf3a1ffda
+GMP.v6.3.0+2.x86_64-linux-musl-cxx03.tar.gz/md5/8f3f26422f8bd0889b5c2ecd22d97101
+GMP.v6.3.0+2.x86_64-linux-musl-cxx03.tar.gz/sha512/680beb99936433bc1c3367e85f3a4129c5a99d4c4031a1da919293819f6d3f1b85be801a2f48af352c47d7cb6f394534333f1a0d0404ff41899952d55c4b1f75
+GMP.v6.3.0+2.x86_64-linux-musl-cxx11.tar.gz/md5/7ec0e3e9125c14a20d6d0044036f0996
+GMP.v6.3.0+2.x86_64-linux-musl-cxx11.tar.gz/sha512/c22e6a25ec854f9c199d5e76bc1dbcbe57c4cc219eb2b5f24418729252eee1a5c1d3e8bbf5b62d148cb408595e96f448f68a29a9425a902952bee666b6f051f6
+GMP.v6.3.0+2.x86_64-unknown-freebsd.tar.gz/md5/6782d7fd0bd15c189c4a1753ee0fb0eb
+GMP.v6.3.0+2.x86_64-unknown-freebsd.tar.gz/sha512/04d7a95337e832f7ec228f160a09b74ed7908ef9cef1bd392555392a24ff63ce4a88b616b5426cd710dcb581e164bb94c04fe17f0b599adf3c3bc33106bcd886
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx03.tar.gz/md5/b4cb31e93c85cd453b7d8d392a365088
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx03.tar.gz/sha512/3bd84fa8f580b272eecb06077ef710ae8df661126e86afa2c901b298a2598975a07f840b922da0066dbf555f03376cba1b7e4915cd37617341fd420b6707276d
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx11.tar.gz/md5/2342842254e7b47b26836366d29d6802
+GMP.v6.3.0+2.x86_64-w64-mingw32-cxx11.tar.gz/sha512/fb12be14433763d9de689a5df222802cd79d5c990da9a53855fd2f6f8e663a9838b444a310318c059cdb4962eb87d0d4cc2b54d163cf82b09377339c8e45510f
+gmp-6.3.0.tar.bz2/md5/c1cd6ef33085e9cb818b9b08371f9000
+gmp-6.3.0.tar.bz2/sha512/3b684c9bcb9ede2b7e54d0ba4c9764bfa17c20d4f3000017c553b6f1e135b536949580ff37341680c25dc236cfe0ba1db8cfdfe619ce013656189ef0871b89f8
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index a70a404ae6843..c1906c995cc73 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,34 +1,38 @@
-LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/md5/62bb842de0ede8a7c2b119cfa7402a61
-LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/sha512/e5117912419fd73138779322d5cb84454c641aad87d0df7d44b5074c96576fe1ee3822dba18c8207dacc9bae2b74cef87353d5c519fb7fba8ea89c858415f993
-LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/md5/3f42f283a9f550841b285216d681f3d0
-LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/sha512/0a793bb239976946941af5794cb45cfd7d1d99b9aa125800aee9337bf9d9c5152bcad258f75d987a7af9b547ea906ee2beebe7b8d2c8cea111e6878df0eb3ea9
-LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/md5/0f20cee604380bfa789334b5544b1cab
-LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/sha512/86d7e6a64bf24f3e69dfa4383ed896c5d8a915e19f6f0351e8cf38361352347c827f79032fd8576ca9bfb94dc8db4704d35540ae67b46d671f44ab549c6ceb49
-LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/md5/5c025b4c9065c0b481c7b0f6dd7666a0
-LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/5b1d3472df47462b3e38c5a5b3400d90038b1637a7f479e9fe04ef046849c14d12301328498429a9f290ff82b6343ccd9ae7616c5ff1d5fd83f35559bedf8747
-LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/md5/8015b63706e6d5826779f870681ff865
-LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/sha512/e3c8c46d8da8df409b2dc7c476da638da2c79974270390b84473ebefb66f26cf60647445c2b141f7b6cf45655de12404deea30731b812952fd9156acbd7344a1
-LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/md5/74672b31da80507609e59b19448ec415
-LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6c6365501abeffc7e796f3b67a139e93262dab1550ba5fe6ead179c0a9d32c62bab7b422b81524d7a367ca1032c7bfd2b3385155e364fc267f660dffa8eee39a
-LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/md5/057c22b3fc988a98551fc319eb080c39
-LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/sha512/edfb8c57aad5499fae88f09a17e905b4c009e2a8781727566321a858f3ed8a4bcb75b990ae5ad4ac57bcb2b01bd2dfbe0375b01a41405c161106881c8859aa78
-LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/md5/ecde35f4ca6b4a03f8491d90480f33b3
-LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/sha512/ca77a1b3c381be2286be9134d7adfde51fb38c4bc9dcb3f56cf1840809c40c484c843cf4ed8d77c538889e06cbef2e5d1b4468739bf761cc91c676a0dc5a34ee
-LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/md5/1a56e7832761479fe911b8efd66b5b73
-LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/sha512/e929261ba9564762d2b3c3191dde216caede5c436b84a00d08706a708436023430a9a762cbd94bf96e903a230c690ea28787ee08208d5b50e51d98e56587b30f
-LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/md5/671a1c045725877e1a4f55b42fbb15b9
-LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/sha512/5b0e78b5f5f24b7ee8c88d704bf58043626174d9e8e28226b72873f62d0ff6a6f87d6200adfd613e35c27f6d127d967f49a1f7ef26ded8d1b08c89589b59ce85
-LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/md5/4ffc17733025ac94e525f8d9416713a4
-LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/sha512/a382f7f15484426d6e913c9cd54facd63573650449f1a2d7b180f1905b79dc75280fdb48ff9e47ffc1ef70c9941d43a6ca35e21bc9746172689886fbbc9d65a4
-LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/md5/af4192c866787ce226fb7a6d5229bfa2
-LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/sha512/18bac55bd7bcd9ea66002c98717ef358710aa689c9bff63be77de1cce4db2082f023ee577060f6ed11e3830c2e751bf2adae1a9b232570a090031c5246f29edf
-LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/md5/d26008f39b244ab0caa804ae0365d69b
-LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/sha512/3d6068d2165c012ce66317cc0993c374df43cdb2dcd584ec7966f602062428d4f5e18d157c7aa19572affa1e9dcb0346105a01c64f8e5ac01546aaf7b5d99439
-LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/md5/fcbfc9f15ffe3c4b2ea055e198795e96
-LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/sha512/16bb30defa9d23e6025e3729e313766940105e02f00168e61bff81ae38beae9ae050a5fbf2307083b3cd89d364aa70a7042b94062160fda2174aaf5018f3e2f3
-LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/md5/a4fe2ed51c1ac1aaaa4f46a00714d85a
-LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/sha512/bba31901fcd8b2e69f43e9645c028be4c840b3d9afb4e92e64c9ea46c7fb44dfecf14f99cde586380ae0508fdb8402d3bbe93ec7b38219fe7806299b70576949
-LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/md5/11ed8da2cb4c7ef924b50768cbb54678
-LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/sha512/b39f12931d638809af27e446d7ac25b17bfd5c003cac89bcf83dc4c5331d14ec12b07ae410cfdc636546a3b1edf0f7d360bd194aa58c835261642b51edb4afd1
-libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/md5/831f4d09a6a22662dc0043063d0305cb
-libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/sha512/17ad43e6f80e87e8115cef89919475a9d9ea11d679e107221e6d82623577fc8e4002876a33c7eb2a52a47e3d8142976777bc79f81e4c4cf2da6adb1553d17b00
+LibGit2.v1.9.0+0.aarch64-apple-darwin.tar.gz/md5/1e22c2cf3e6003addd9bf16026ac4a06
+LibGit2.v1.9.0+0.aarch64-apple-darwin.tar.gz/sha512/78d5e5d246534164e1d70cf69dea273bbb8386df24c13fc3c3571762df15f2714307e7ff4cae6f977eee9def121c94cfe33cfcd44a60905a8161d65d17565e90
+LibGit2.v1.9.0+0.aarch64-linux-gnu.tar.gz/md5/70bfe9da256442ea2c295a016a89d3b9
+LibGit2.v1.9.0+0.aarch64-linux-gnu.tar.gz/sha512/14916a5521aa1281b443e61beee2573bc55b76d88810a3bec8bdea677d95763da82f1a527975cdabcdaa213e69aa1640201a03656bdb505b886906795aad0c74
+LibGit2.v1.9.0+0.aarch64-linux-musl.tar.gz/md5/62f6e885de29a345cc5ee3e773c74471
+LibGit2.v1.9.0+0.aarch64-linux-musl.tar.gz/sha512/09e793209505ea954e608c609138b8865d8a1630340fa8ff032a55234bfb8277d2c3c31f26048ae4993bf8c3d8f165abd0b4ccd80526c61efca0807f634572df
+LibGit2.v1.9.0+0.aarch64-unknown-freebsd.tar.gz/md5/6fcba6e43265aa7a1ea5bba85977d622
+LibGit2.v1.9.0+0.aarch64-unknown-freebsd.tar.gz/sha512/34b836d3c22436e74963141dbe1f9372cb7ee695ebb2054ee0af1353d4401e1dfb855e91341a1d06a24ce18d57caaa3aa1e2bc7063000fa4f9be40130eb6ff95
+LibGit2.v1.9.0+0.armv6l-linux-gnueabihf.tar.gz/md5/75ede2c2c7312adf06a2a9859cd6310f
+LibGit2.v1.9.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/9de567bee3aad33eebac51ad5b57b4fefaa4b778ce8510b2524a55cd223bfaf3051fd48c8713741e799d1464b308469580716dcb847a6eb97fd632727ca22a7d
+LibGit2.v1.9.0+0.armv6l-linux-musleabihf.tar.gz/md5/e5341f0c76c89273c465cb43cbf0f284
+LibGit2.v1.9.0+0.armv6l-linux-musleabihf.tar.gz/sha512/1029d47c82ce20223b1c108da77a1a32ef0b91b9645040c1d941e7abdd161011736a81f4ad25006b32d83d4c07c548fcf1c8a3326cf3cb91d56fd443e2e9ced7
+LibGit2.v1.9.0+0.armv7l-linux-gnueabihf.tar.gz/md5/03191a1c4ff1c1ae764092b26c941783
+LibGit2.v1.9.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/6bb113c722b550fb28fc84033a3a38565ed5305a7fa193eeb4949b979fcf4599b84c748f50dad2ad47481827138a6e405eaf727f719d219984a809088bbb2948
+LibGit2.v1.9.0+0.armv7l-linux-musleabihf.tar.gz/md5/1678d6e57aa887963b27917c884cbf36
+LibGit2.v1.9.0+0.armv7l-linux-musleabihf.tar.gz/sha512/52590e9ca4118e0dec70191353b2c76155363df77df6c0bb5741dfb3f333539a8ad75339796748a744c342b51c15869726cfe9bbf6ca78d524e7d2ccce4a4622
+LibGit2.v1.9.0+0.i686-linux-gnu.tar.gz/md5/3fc50746cb80e0455f8e7c7622cd433a
+LibGit2.v1.9.0+0.i686-linux-gnu.tar.gz/sha512/20c97e1a816456267a16759378a5e968e6bca122d1e0dc7cc282cad2bf2a8e3929e90373752065d91dfb6688e39ac6db660d9bdbb3277f1b9cb04b5d3f46fd8c
+LibGit2.v1.9.0+0.i686-linux-musl.tar.gz/md5/fadb5e051e3b21e68a61b2a3049f65c7
+LibGit2.v1.9.0+0.i686-linux-musl.tar.gz/sha512/369c8c64df89149e9ed600028c1ac96db24e7b2c1977146667b8aeba93aa7a3b4787a49734411448680654188ece33e740fa475108b80b876a5082edad722925
+LibGit2.v1.9.0+0.i686-w64-mingw32.tar.gz/md5/610da247e41070b73e71df7e41267846
+LibGit2.v1.9.0+0.i686-w64-mingw32.tar.gz/sha512/d5b61c885133e3002e48e0fc37ceed0bfeef070e8fc6b2d78ec5f3069ad80966ea5b3a2b3aeae1ca478e9a2f839309fd67c3a186ecf751f4642ff4cb4ca3cb38
+LibGit2.v1.9.0+0.powerpc64le-linux-gnu.tar.gz/md5/f05f5f07de55fd297c564b6cd4e54747
+LibGit2.v1.9.0+0.powerpc64le-linux-gnu.tar.gz/sha512/57b740ca3ef6b18994386d74f1cf39c97c1f58f5a63e749c1a0dcef8c43a915f13cc093a8e1d06cef1d1c60cf484ba0e38d20a96344df69dfc997daa63ee1137
+LibGit2.v1.9.0+0.riscv64-linux-gnu.tar.gz/md5/b043226b10e5cbbe4914be3392f5bf72
+LibGit2.v1.9.0+0.riscv64-linux-gnu.tar.gz/sha512/a580795dd9a7ee237cd1d51d55f5079588686b1adfe391a017de743946e1bd4e7d5e4f8b79a6f84f0ce165733ca1b67ea740d06fa18547c29616df2f73e3f289
+LibGit2.v1.9.0+0.x86_64-apple-darwin.tar.gz/md5/bad8607d4997ef82cd43edfc7579d0fb
+LibGit2.v1.9.0+0.x86_64-apple-darwin.tar.gz/sha512/c7359d79949a6727973b1df2264b672bfcd1617b6d4c74d281ef70ac93bcadfe47f99f7a5d031eed36b65077668ba12f2b31bbe6d491542b6938816659070317
+LibGit2.v1.9.0+0.x86_64-linux-gnu.tar.gz/md5/21e5fd214a6358f643477973c22ec70c
+LibGit2.v1.9.0+0.x86_64-linux-gnu.tar.gz/sha512/9e68cb6d25d85ad272fcb0d77deedce2daa9c62d7ce2fd7e9221647d021aa00e372f490ad29211d7ca2b5ddefb4addcc4733e25e3df038aaf26fe3cb269d8f56
+LibGit2.v1.9.0+0.x86_64-linux-musl.tar.gz/md5/e9ad320825b22ee378b33856ca266b12
+LibGit2.v1.9.0+0.x86_64-linux-musl.tar.gz/sha512/bd33b4d31a7622a0440bd0979ecc7bbdef7ba7a52bfc911f880c9430d57d2b9ea1c6c4e57697b5a2b63c2e00e07673b3dad6feac056a4f345ed6e3b0ef7aef77
+LibGit2.v1.9.0+0.x86_64-unknown-freebsd.tar.gz/md5/501c63c8810616e6764ff80c23fff0b5
+LibGit2.v1.9.0+0.x86_64-unknown-freebsd.tar.gz/sha512/109e5676899ba6992a68fcff6d7503f49cc3b748b4b0faffcf951f318f9730e242914b57a7848111e229642070fdbce29bc181cbc79ac2e794c6ef489bb27293
+LibGit2.v1.9.0+0.x86_64-w64-mingw32.tar.gz/md5/4e76fa8356407a7065b50298817ad462
+LibGit2.v1.9.0+0.x86_64-w64-mingw32.tar.gz/sha512/01204b29ff2f90a9204d2e91fb7d48a3b6bea008a77984e3e67423a04f630690073d648a7200168809999aa5885fa6035c5b099256724b0379229c257ef19b9f
+libgit2-338e6fb681369ff0537719095e22ce9dc602dbf0.tar.gz/md5/0ce4a212921ef1752ea057a3be45e384
+libgit2-338e6fb681369ff0537719095e22ce9dc602dbf0.tar.gz/sha512/4eb018a85a59c6ac0514f09f19a40813d8a4bc5ea230bf54897aa2ef5f584796e8c680a27ac68985a3457e1ea1f554ba4af803b430d67a9065cf51ff317d7a89
diff --git a/deps/checksums/libssh2 b/deps/checksums/libssh2
index f8c1fc5da8d37..056d373656d98 100644
--- a/deps/checksums/libssh2
+++ b/deps/checksums/libssh2
@@ -1,34 +1,38 @@
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/md5/b692a93b8f7e45edd5c5d397cd1d3725
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/sha512/8863fb372e3bccb9d6ff7f33494754b1391f0081426d1a42a3f3da69ced9d1b6246b7aa84269b7ec2844c27991d5998a6c58561b277f86daa96b577dec57b514
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/md5/ea2e202282947c4900d656c83ba30953
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/sha512/1c3a035a2e711ad013acb460293e929d18b58d345f84f4a7cda93510dca5e46d466f08b2b96e5742c16c509dc6ed6b6e2b13399bbd1c48340326e3e6d73f9322
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/md5/9dd3b1813cd1cc6246b31c5bd2df538b
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/sha512/075d681235961838e2bb14f2034daa65909a40972cf638b44646464f66973d139de9af9d653073c24510cd40e5068d3a41c09f6ff12835a8278259d8530a6720
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/md5/17a33524ad9e6dfcf239b076803e3c84
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/09ef31e3a6ee1e6055c6cf5c0f45fc2704701bdeb565b82a1896e8c83cc34c9dcf1b2e761d1c78b5d513291802a02ef3567a8a6d06d9d7ad946f4233e973c5c9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/md5/18db6a0698da98ec8247ab0f86e9a2e9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/sha512/11657f0b411ee81d84d3c02befd34d53d25da0485214f82e6ac9601fd065127e01ac74b592d328481a0ed7d04c231b37f4fec773e06d1c1f5186f6eb7eae57ce
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/md5/0a49a14c15176f32867f49a6e487de77
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/aa81a8504529b90e6e212f1495e8732118f5d0eececd4a809cecdeb88b97f0ca4a1081e669660ea8513b15f71299854da9eb5f8352f099796df4fde33f89072d
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/md5/0bb9d7c5c40d88e19a2d9f3178c4de64
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/sha512/cf361ac498daa4c3a0b044171165756e54402f70c75fd5d877e6a24db9b6930c678c3f45c16850b9138e8f97cab9f1cb4ba82262e48fad269a36fc556215899d
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/md5/651955e1c129d88d9dd0e9b048505db7
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/sha512/b48d3e5eb36d4e0ef36bc512f1fe65a85e0ddf16dab8da8190f642d2460b6ab94677838f263ad280f37a8bd838c4c8283a3cc706247d4241d8760fde797fc163
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/md5/74b4741009fea8bb10ab3f6a44937fb1
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/sha512/13b64fd49d6a6b80dede0c049db871c5b22fec2f093adbe6699f2e467f7458684cd38093230c5d2fc837e500c8d1e86cc2966b9805a2ed7a705d50108a95803f
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/md5/20c1c7d0825cba67a0bbfa822348a245
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/sha512/2ac02ff310911998c07861493b699837168e43e40172372c33fc7769ff6eae2f2f2c65b10319c2f10316d34c519ec2fc5308b327b62caeb415ac7c5c692fa81d
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/md5/6d180f087e415adbafa516c471315ce2
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/sha512/23ea211b5d1219454c14316e59cb94195195abebd9a4e7a6812c4d824abcac7c5b896c460c2dae3511abaae7e0afb5ead40a5836e5d94ec0c3a2b8076dd29e3e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/md5/dff956043faefa3396fc31bddbf83b1e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/sha512/00aa279251a04684d968e413bd9f652c6740bf4a6e860ba9b999c8584561499f1f589ca2eb3f06a01c539a952fffb41787a37a6e514d689b97693a5a7bf4c18f
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/md5/a50d390c1a8ea77d7d78c07a759fa79e
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/sha512/c985018dbc79c9d41ca3df940fcda15d7f57f0a4e869268ab7c255b4fbc7aa9bd5088281258de22523c777bc9210ce3c9e1f0f76693c0575506ea840619c0306
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/md5/a8b647a12439a7ec9b2d5111a4fd605c
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/sha512/7790bf3b88513a026f3b58090c5b39b0544d873f7bee4f4c06fb23f513954f580ff2d5d552f15f8b725fd3264585390c33283906f1844cf20ce6d2eee54495a7
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/md5/c4f76951ed337bc87d21993d1997dac8
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/sha512/3db3c44e2100d00a537c87590dcd5493dc9ec74913ce903ce7bca697ab590417bd55ec6475a0a790ab49e9444d79ece539733ac25b0b82eaab735c8c96c0e992
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/md5/b58d05eb572149dbfec7b53a75dc4d6f
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/sha512/422fb36c6d7d3f7153b52547fb98d70268da1506a4957e2772184ba52bf06455f869f1c491d82852494459189830c68569219fbb2c795ddb49d7e8a8e95d6988
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/md5/d0b060310da22a245fc488a300288198
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/sha512/17770f8de4f081840e765d6f7842d562e20f46972fb53a15e3c9e10421f3654a559c5dd1dfbafd7b4a0e5205d800e848b9c9c26ec1d8fc0d229d5070b6d19463
+LibSSH2.v1.11.3+1.aarch64-apple-darwin.tar.gz/md5/87ba86e78421d6195aa6a46129ff61d4
+LibSSH2.v1.11.3+1.aarch64-apple-darwin.tar.gz/sha512/2b7129be9e9518337f59857474882a6a3448f358c931c66ab9f9ec67506c68d2356df591bd45925154844ca0d6f6e1f071d4c54d62039c5078b468fcb356187b
+LibSSH2.v1.11.3+1.aarch64-linux-gnu.tar.gz/md5/84c6eb68e7797038d0863513fa4e292f
+LibSSH2.v1.11.3+1.aarch64-linux-gnu.tar.gz/sha512/3012beb35fdf94136907037e8f5261a5cc94d102f461172321d4ed8f328da3789d521513dd03cb344c6fcb73675cd1d3ede606bf9a904fb811d40c43fd09d8aa
+LibSSH2.v1.11.3+1.aarch64-linux-musl.tar.gz/md5/5a49057201e779f3427b794b72bf07a2
+LibSSH2.v1.11.3+1.aarch64-linux-musl.tar.gz/sha512/62a812efb4ad7b24bfeeb3bb89756004215c09a1cc01e0530f14ce4b8546f1dcbbac18155ac2ce08311c1790d659b14674e3bb3549ff68d1209d52b5e5986fff
+LibSSH2.v1.11.3+1.aarch64-unknown-freebsd.tar.gz/md5/a5129167b7be7ac8ba2c873e164afb1b
+LibSSH2.v1.11.3+1.aarch64-unknown-freebsd.tar.gz/sha512/f8d9cc5098a3b401fbbe98a24efaca0ea46f533ecaf11dbfe8f7e7e3853363af19914de62bd1cb5a573e55e90d5c6074532ddc6d64723c9e235b277f438ce6ef
+LibSSH2.v1.11.3+1.armv6l-linux-gnueabihf.tar.gz/md5/5c59c95612bf9aa172e5d487002db509
+LibSSH2.v1.11.3+1.armv6l-linux-gnueabihf.tar.gz/sha512/5ba41e49365c2018d55c92e4a23d806ca9ab960a448593b08380527da21eec03f76cab89c34befbc56f4104002aa189d5cae6f655797f1447f395b51a14d40e2
+LibSSH2.v1.11.3+1.armv6l-linux-musleabihf.tar.gz/md5/4bc27411f0eddf82a787d1ede17ce2c3
+LibSSH2.v1.11.3+1.armv6l-linux-musleabihf.tar.gz/sha512/d6024b6949ac6867c56c66defbb99300a5661e0c73da6c330165bceba78d64063986c8851601ca74554b27944d5b02e3f602b1e71781097bbb8b12effc0cbbdb
+LibSSH2.v1.11.3+1.armv7l-linux-gnueabihf.tar.gz/md5/40e1a0d323969b96ab121eb5a3ecc874
+LibSSH2.v1.11.3+1.armv7l-linux-gnueabihf.tar.gz/sha512/67ce15a5b1c1fe0fd1096ed5d2d9f44d83983de11c1bc651f5914d70d387a99ee6bde31716031b758f48981e2a9383599f077f02d61a5c783ee6d09a7bf445db
+LibSSH2.v1.11.3+1.armv7l-linux-musleabihf.tar.gz/md5/9453c52394b1b06bd36c43e461a3b48f
+LibSSH2.v1.11.3+1.armv7l-linux-musleabihf.tar.gz/sha512/c62068ecb1b88dbd08a2474e0b93cd313bdc4e1407a22cd9164a73b2d897564f12a3c34f6fc492b264af579b00e9335a0fe1fa853fbe0fbb18d8335b77d409b2
+LibSSH2.v1.11.3+1.i686-linux-gnu.tar.gz/md5/992453b1c59033aefa8d98b89f491ff6
+LibSSH2.v1.11.3+1.i686-linux-gnu.tar.gz/sha512/ebf14565d614086c4401e1a997a3aacc83f8e499ed836c429f87c4f95f1c8409713fad47f1c34a2b1cd23f90de3daf14caafba3c82b15642018592213607c874
+LibSSH2.v1.11.3+1.i686-linux-musl.tar.gz/md5/e0cb0566c724c107f4f04619080d4c0c
+LibSSH2.v1.11.3+1.i686-linux-musl.tar.gz/sha512/af7d08dba5bb06eaf7ce8aeb12b69701d3c2829996a1c8e68510c106402a1166ad060687987df49365c26d30e8d6511c66f2a50ec810a493d2c090931ccf05a5
+LibSSH2.v1.11.3+1.i686-w64-mingw32.tar.gz/md5/c5e8d3145deb56d6df008522a5d3ea6f
+LibSSH2.v1.11.3+1.i686-w64-mingw32.tar.gz/sha512/47f3c36747d2e42a4c0669ef468d395078328235d30056b7d67d76bd737b5118c1bbc720aef455c4d9017e7b9350e8cc043ed28264ea8a9ecb6833ca517f82aa
+LibSSH2.v1.11.3+1.powerpc64le-linux-gnu.tar.gz/md5/12eba4aec5e320a4d0cf09225bca3f7c
+LibSSH2.v1.11.3+1.powerpc64le-linux-gnu.tar.gz/sha512/d6b8413d77d8af3d29b867692f6c02b63e793f5e8f17c4777756d247c8e602b3ab87380031aefa60f2c3ddae5a3c7a1f1c739439f149db34a32c79f32e08048b
+LibSSH2.v1.11.3+1.riscv64-linux-gnu.tar.gz/md5/cc11dd403ecaa373241b3c30cd16bd24
+LibSSH2.v1.11.3+1.riscv64-linux-gnu.tar.gz/sha512/d195ad62cde58dfa1e3546efd70a5f6b8a0762a2a933c637120aa71eda45dc6dc4213e87f9f401e2e148bbd5fb10638e429ae514bcda5bada0940c70cb7ff15e
+LibSSH2.v1.11.3+1.x86_64-apple-darwin.tar.gz/md5/f6e7cd35e16290b198c80c61a0fca5e5
+LibSSH2.v1.11.3+1.x86_64-apple-darwin.tar.gz/sha512/2c83814ef6ae78ec94a43f2997151dd7195c0a0f9cf456fcd3f780268bd1cbdd7ea55182fc5a1f8e1413c26889e54fccb01964b0b91dd4b925ecaa16b7df8d07
+LibSSH2.v1.11.3+1.x86_64-linux-gnu.tar.gz/md5/95aa96befc9f9007e6a000a95c1b7572
+LibSSH2.v1.11.3+1.x86_64-linux-gnu.tar.gz/sha512/6058dca6d933afb7fe5fc3374937b4432f202a5dfe3ebcc2f91f65777230c18d76801c38071f84f8362527ee08656a97f79da234ab5481265a7ccf29e94c20c5
+LibSSH2.v1.11.3+1.x86_64-linux-musl.tar.gz/md5/88b69d889d602bc3df420535dba30f9e
+LibSSH2.v1.11.3+1.x86_64-linux-musl.tar.gz/sha512/7335954124074e7df786989db86e86e3bcf41f503b8e3b27d6ac18032c8025bec26180bd2c537b23349bcf5673eb67245531479b939670e620faf5aa13c8c4ab
+LibSSH2.v1.11.3+1.x86_64-unknown-freebsd.tar.gz/md5/6d5f6e9455c35c5f6655cb4d46797db0
+LibSSH2.v1.11.3+1.x86_64-unknown-freebsd.tar.gz/sha512/9515d11bb5686e29eb5a37bbcb7ab07574da0869c82e5b3f0cf282bbc56792af31e6174521d58133968b997caa6db75ac9b195024144fd2c95fd1bbf689ebbf6
+LibSSH2.v1.11.3+1.x86_64-w64-mingw32.tar.gz/md5/e66cdac0c2d5ce2d160e482d780ad0c3
+LibSSH2.v1.11.3+1.x86_64-w64-mingw32.tar.gz/sha512/2dabb1e8da5ea496898751d5517ca37178e1a44c78c26fe33f87487a0b4acf7185f686ce8d6ea0e65e38a8fd56e5ff09fd70becda402a942b5e459707eb2a44e
+libssh2-a312b43325e3383c865a87bb1d26cb52e3292641.tar.gz/md5/06d5e2881ac023583c7fd6665d628a87
+libssh2-a312b43325e3383c865a87bb1d26cb52e3292641.tar.gz/sha512/5dee8cce91853eb8c9968d7453b1ad0c3cd1411901d288f1731b7c7e4adf380313f61c2a66eee0d3b89eba79e420e13269bb3738bcf2c59f0b88276aa785fa8c
diff --git a/deps/checksums/libtracyclient b/deps/checksums/libtracyclient
index 19b7b26c5461e..a212c415f09e5 100644
--- a/deps/checksums/libtracyclient
+++ b/deps/checksums/libtracyclient
@@ -1,34 +1,34 @@
-LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/md5/08881ffc565e099903e2e972a7f7c002
-LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/sha512/a9dcc7f9ed7565a769dd1080513eec7439cd7b03d68d48f570ac3f396769ef0a7f9b07446045ce6536b7e67860096eb150670256c311c0a77ac1a271dc4b4422
-LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/md5/d6a8dbc7cf871f772f848a5e515e6502
-LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/sha512/cb9b3065f581a956d318d71a94216ca0e57599262a12a25bc2e6fa0234505fed5a9cad9c2eb7ad30d7ffe9c4ee3d26d9f645887d3f7180d69d3bf1d0745b4f22
-LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/md5/0d74193e3571fbd80eb7d9e884b47e53
-LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/sha512/18821911a96129486cb12726018b33fde1da345228623b7f326b92ccfcbbbb2349d79a35e6fa7cb4b6cf9283a860e8ac44c40d6b54a4dc1ea4373b869491b6d6
-LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/md5/6111f3b3c696d9d07139e137c2ec1d08
-LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/135139c221cb2d4d6000bd1a3771bd095e93487c7c649ebdf760ff5cb03f6ae003c33c2a36a52bbdf70e4c349195f78a97bc963336a36f33fcdeee33e4fc1eb7
-LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/md5/5b3154cc849b04bb3523f04fa4481b83
-LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/sha512/7f62a546c7cdbe3bb6a0a446980371ff340d5f530907a2434eba2a14bbfede8c740a763b0c68a252d7a3e357d9d933bcc6313919cd9bfa385715bc833be56cce
-LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/md5/f6952d495c5b699226260e065cf2703c
-LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/5fdad7f8ce3a03ce05adb3deb6bc8347aefcc8a7fe0a30e0f7684fe233eb8520aca138e0b8a6cc5555a1f2316a6e36bca32cb5de37f2aac5c5deddfaeb0f8570
-LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/md5/84924c2e32b39ed580b553a968e97360
-LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/sha512/2b81834b91472eb9897abefbe77e931782e8c14eaf7193f22fce82024610906b6e96122610edfab29a9c844581cc4ee9124e330af9eacd97fb8759c1de421472
-LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/md5/9f243a9d10cd928d45436f634d020c27
-LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/sha512/c9512030d83f32942c7fefd598bfa597ce758f39d11bc9551fbf565a418a3000d23f899f1e9411cddebb3642efef8cccfa3cf3f629bcc11fcf50585e1a80549e
-LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/md5/4aebc58f4c8101640d9e450338a4e12a
-LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/sha512/2085b7c0658bb39dce9a9b511c209a348916ed8e50ed0d51eb22f7eac167b890a87d357e433e12eaf7034c15842c8d2893a0c128443c4f25fa90fd5ca83e256d
-LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/md5/dc6f911f5cdd2789ef9f13a1a9882243
-LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/sha512/57894c759db949dc669e23b7d5e015942630328a3dc754185a0f6bae95a66f0c3e65e365317bae95f3a216f4dcab681203e64dc8c9a0b5478cc9e27c9dab2e56
-LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/md5/a7429f900f7f0a14fa355186d99a24e1
-LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/sha512/e37ff8e8de9b74367b9f0d6fe49d983900529caf9c2c55d5ace305d5896c2de6589380247dc85017d959901864d4a163fe110e6d860340d949c6ea4dec50f47c
-LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
-LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
-LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/md5/cfbe122083aeeea6bd7ddc4591b1cb53
-LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/sha512/e0418a0b50d64990d6f1b80dfe65e2360817211e1225c4d8d9fc9c871a95bbb62c2601c617adf1d55305518f5ba1dd05baee82f6934d0011269fab21b89336b9
-LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/md5/f152ba78f2461fec711144ae66380c34
-LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/sha512/f59f837d2beb4df4d3d65352a8c46261bb5a92ae88a62e2d1bfb7293184e02be982fbefe20736456719055e718a26003984224d0d74a0a6244dcc59e0d350556
-LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/md5/83c7b3d9438dd04d25573a386bc5c3df
-LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/sha512/f22d0d4f4171067bd1f56bb63dba801e262d0ed4809538dae907296d1a12817954ad759cdc9e61f710fff5802fb7371d8283d6df52c9e8faf6b43c713c23e371
-LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/md5/83f3db14b65b8e9942c754bcdb430060
-LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/sha512/8acdd1d407ae927925f33eb75891684d6687e3577d5f8ac77e738daedc8145462b1f044e31edd9e2db4507673a0abebcea19e171833042cbbe5a135b0c0435cb
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/md5/c6768380fef203f5310d2cb3ab6fe509
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/sha512/309216fbc5be52319241ccdd2360c8960ffecf50c963bf248ee6aab6a43a6cb4c3a703391c7d3e1b07cb077badce930673f85f28c0924975b75909a4654ca3a6
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/md5/20d6c3ef5032d458817f18aa7f92b44b
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/sha512/d2e341ff18bd06b57094f2356fdb36a3f9dcf56f3340b83006dc02d41d6e5040f145a23a06f86ccd1c9800e93fc4461ddd7902b6eccb93b78b365c824e5d764c
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/md5/58cd88ab771a31326fb4e3448325a17f
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/sha512/dfa11ac4fa5261bad557a244458b2f67c20c761c5d49d31488a9b944345e32e55f1a40515097b4f5f4896fe5046e3bbc70faa40ff4dd504d4d97cfa42e46bc33
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/md5/cce5ef56b53255494b334df157e285bd
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/sha512/1287f734669b7a7a6f43b6cf3b725de650e64edcd4284d51120e7371f91ca18c63d4981d58c28ce2da9355eefa5a02e0bc0b35303b096733a7842bc952da2df6
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/md5/60508f4f9c757678b8e52ed0342079eb
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/sha512/92d2d8521a8b6c6901f36ad71f59fa1b2eb58d6dd06da02140cc26b6c44d2741ddd0cd3107bbd3e70ca1713a33fda48d5288bec59304264d0afc20d4e0167a50
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/md5/64563cbf8fe18fe84a2ec9e9fda19e6b
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/sha512/b0158367834f32fd7449d45b983f37bcfed71727bcea3febf382a779265ce4b99845ae32f5c3969d83089bbf5e072680b11138a45b7b7030364ce341f285e220
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/md5/1d272a82701889fb695edde6bdde21bc
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/sha512/1539f06593eb769ba35ef198f90b9fa6c11d7146124f21e35c8fee577d8fcff3d71f4e38e6d26d84dc8f66b06a26a130f4bc740a201cb27573ec8e6816d489e2
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/md5/4e14c36ea3b1e54a26897767d4a010d6
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/sha512/4c1d9cda642a4ea3084b73c0b536edd0f33a216aa02c59f914ab428e0e97120ba3f81e7eb2262a2242884f553fd319b80ea7b013344e87e173dc1ee9b3421ef0
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/md5/75ced50efcc6ce1c17064a3447933fb1
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/sha512/969c41de91d288e4e37a36f990341c2f71d6788d93bb34eb6708532ea60bfa1bae6760871de33b42cca50b61dbf8028f639538f34ab9bebef2312d449c259f4c
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/md5/0f8c6cd2e1aa738b340e10e1ce81732b
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/sha512/76824a28f16650e14b06051f49a5c42cd8692dbcf808c2d47ab21ac986cf49e17508062ce5e0e71f2a689112098897dd7fcd47b819cab6967080b7e23224bf1e
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/md5/573e5c6aca49845b7e9e881c7bc8f755
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/sha512/7451731c6f0bf0ac08c966f925b0dd628f6f4c0ff7e2d65e8bd3d27231e3fbb2512169c917431baeca3fe8e66af4bbbea7ca4ba79dd7d3b9e1d43b08a580dc76
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/md5/886d5b76711252176eaf7e41dd2db0a5
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/sha512/783c6469c586520c7f1206f5c2eae6a909a2dac7c3f726f439da91b0f3dde970fc4f17c6e3b54aa8924ae537b1b6a14729cd1305e6488c458db68ffe973c2ced
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/md5/170a53b0c63e7e130bf3d71590e27193
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/sha512/ccda22a897358d9eb55b500dbeb35c8d136f484a649c29066d5f2d0665d5b71526502237dbc374c2d018fa212896fa6a6c6903fc7c4833f814d1e60c3f12fa83
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/md5/58fdabdbbdaa7b4f53bc4249a7c57059
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/sha512/ad0d4b74c707da86fbcf7210cbcc9a3ebdef770ad0bf12f33e3476c2e455d48e389f021fc47e50b85d1c4741072396b71dc034cb95bfd38f775f4879d06b5998
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/md5/cf5b40edd556a7b8ab23d28bf0eecb12
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/sha512/8ece28dae598418c3435cfd323609e6e615ce8299370040fdd3a000007faecf87c5ffdfebcca80ea9b4180ad9bf13d3f3d65fed2fafe6f54d4a6068f9cae61ca
 libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/md5/51986311723ba88ac305ad2c1e3e86c6
 libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/sha512/f92c5bd71fd3e933f03e3535c0668a9afddc7ea19531aaee11b22bde09c57cc8a555f7f17f489d4221645fb6d73ecf9299d5bb11949d7529987beec3e7d91763
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index 709fba71f159b..fb2904b308a90 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,38 @@
-LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/md5/1a58ce9dc88984c3b5f7df97af6cbf83
-LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/sha512/2bfd482ac759ac88d885371854affa8e358a10fea6c7756e0d1b366bc82ecbea56bdf24ca634525fb2a6fc2b3a5c77b07a4c6dec2923d8bffe2bc962bd3e7f84
-LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/md5/7f270dd1e3046c8db432e350dd5cf114
-LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/sha512/c0debcf17b54ba9f1588d4b267d610751f739d8ff96936c9d5fb6d8742039f8736c63fa70037322705569e221d73fb83c03b6ba9fb4454442fffd3a9f1a1a2da
-LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/md5/07f56c32d5a2c12e6c351cf9f705631c
-LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/sha512/8037d7aa0cb06850f055fd19cebdcfcf3146dde0d12768a9669bf05dcab91fdf3708798203258cb3f452158bdec7faae41e6afbb0e60b21403e683db3e23a1c9
-LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/md5/5558a7f68c7c375f40bc64da59fef0ad
-LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/sha512/92ed6601cb5aa9a3ea2478a1485849543c9e847c8e85542e72f372a2d37c4c8b90f5ecb1bee1e462db31e1e8dba460f584b3cca9c833989c2b9ee404e355654e
-LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/md5/de6bfb7f0c0468b79e8895f166fb6340
-LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/sha512/7948d007171bf57b827b489f3627ac74df447f4d696e8226e54e95ef0c8eed5a5ddbf758fbad841bc367f78cd61e6a5899eb478003dca3a79cb494b38cab830b
-LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/md5/5be35de1d881f80981647c369b9b4ec8
-LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/sha512/458e5058ea4e794e0dc790da4c98569676056bac336df69762e8ccfec8f2955dcc55e8d090daa1b191c0ffa41392a04530c9bc28aa27cf411c1df2f1ba14bb97
-LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/md5/8d034490da1ec2ef3dd3c69336177654
-LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/sha512/7f595a8ab8b664d229cf6144e9ed1b5936ba8aaa70b92611ddb85bbe9046bb1b94d8417355a5abf058fb00023d4d56be0b2ddfd5dba896cd7b64e84e32dbfc5a
-LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/md5/ccb9aba78456c99b8473e8ddd328f90e
-LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/sha512/d382d90137db308933257a75e51d90988d6d07663b3b2915478547127d32f73ae6cdb4575d5ee20758f8850c7e85908fe4710c053cb361826621f22bc5b6502d
-LibUV.v2.0.1+13.i686-linux-musl.tar.gz/md5/5ade48f16aa26bb68dc046d285c73043
-LibUV.v2.0.1+13.i686-linux-musl.tar.gz/sha512/f5728a5dc567268e59aa2697deb793ae427e11dcb6796c577e3da3ac24225ece5d4a6c4f903d4a7b184d3c3a3c8c1586c34b97e4a75de0a4e23ace720020fa8c
-LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/md5/399d6fbe54dcfb2f997f276cd38fd185
-LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/sha512/55707e02a4b5bdf9c94683dbaaea1cac58f7735d5ae22009c219ea61ddfab1fe19b9bc6e830fc32207efc588c27f92770d2441b972f351a1bb3fdbbf5671a58b
-LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/md5/26656d4eaae8739099c55054bad54f57
-LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/sha512/f85f8cfd91e7b1b02b073931ef9a3bb05620641d18ada039744a92b8c40e5a3de8d7c5efa7189b88baf1eb11fbcf9e6d16031b86e40f99f1b7cfebb0f5c5adf1
-LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/md5/c7da6b91394a20c43acdf6f680cb62e2
-LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/sha512/238d22bd299ae3b0dfd24a5b38d6d0d07b751fb301487a2d1d2f5313ae3596f33492388ea9fbff549293787505fc527e174ebcd4068f1bda43b40bc19e016d89
-LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/md5/8c8913068263257cce5042b725918e0e
-LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/sha512/a848381012d5a20a0c881f5835e479cfff811928ce508cc57041d69668782f2135c14c7e5388e7dbf693ae57aa1825d911f6f450b9e909cce45487b03a581a23
-LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/md5/16747c066b6d7fe56850c77f66ea7478
-LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/sha512/833a02f9191edf3b56f1e02f5671f22de6cb27ec3c9f770530ec95d8da7ba0b9c05bcdf6b094224ea8e43ba70918e1599f3237bd98900763daef80c327d3d2de
-LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/md5/71f7d9d9234a0623c4b2ee3a44089b62
-LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/sha512/e73911c3ec35a2201d42c035ecc86e8bd860604b950cb1b7784ff49e27ef5ac9b1da09b59d359ff25b093b87593a8305105bc43711c12eb9654972e280c26d3c
-LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/md5/471d20fa2eac6bfd5d7cdb1b7f58c602
-LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/sha512/3f5ad55268184227378ddcfed0146bf0386c8cf468bc53a348d21195d818db4db768be61fd23e1ee2ecbb52f073815884a04a923d815b9b5992825d144c0633a
-libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/md5/d2284d7f6fa75d6a35673d22e1be058b
-libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/sha512/68d6ab740945b9ce3475118ce3d186fb67d7e8125784cc0c827df23d63f50c40c0261ef37365d8c11ab9462a8dd4e2e6b19e91e3c84b64d8fb84fd3894afc4ac
+LibUV.v2.0.1+20.aarch64-apple-darwin.tar.gz/md5/7b889e32bcb01afc19f9f3801b28a0fd
+LibUV.v2.0.1+20.aarch64-apple-darwin.tar.gz/sha512/cfa58e835512957171c7f2dcc9171bd9ea1717f71ed6920b6cac2560da3c5b13440df0d14c5aee210df3346743d3605dec22d78891e8237f5c3867d5cb6d4f56
+LibUV.v2.0.1+20.aarch64-linux-gnu.tar.gz/md5/696ae3e79f9b838a98dae8152d980ff4
+LibUV.v2.0.1+20.aarch64-linux-gnu.tar.gz/sha512/74ac009cebfa3ec67209921296f5d395c2f888d825b0f3d3f3bad5432819deaf5ee758f88030e620530e94f4861734d7984b8ef981ae4eebc356d96e274d678d
+LibUV.v2.0.1+20.aarch64-linux-musl.tar.gz/md5/d23a45e4d9cefad93e3e83cf990c095a
+LibUV.v2.0.1+20.aarch64-linux-musl.tar.gz/sha512/80d06afae0b5ab657c5c743beeaff112c3cddabd2a2604f8fc16a50cab5db878b4ea8941496a11004c9464bcada13844528cc4fee209bdd8ba374d9b13351991
+LibUV.v2.0.1+20.aarch64-unknown-freebsd.tar.gz/md5/7957f7740cfe6dd5ccb4ff4cd2811b45
+LibUV.v2.0.1+20.aarch64-unknown-freebsd.tar.gz/sha512/923fc895d3fe41005e47d6af422ba450b32e94210c393065a891b44f83f016104d3073a9faa609cd7979bfeca9d0ed0c7164b37de5da92deeb4e8676311cc57f
+LibUV.v2.0.1+20.armv6l-linux-gnueabihf.tar.gz/md5/c2ec51470a4d66e3bd23bed67c109cc9
+LibUV.v2.0.1+20.armv6l-linux-gnueabihf.tar.gz/sha512/d139d5a6a141933a83f0d10f8da9366d709013bd2ef005d2b783716fb13d165b87640b7d9f51dc2772a69fc6f63e8545901c96da2343506a2f940edf36332164
+LibUV.v2.0.1+20.armv6l-linux-musleabihf.tar.gz/md5/a1a432902cd687f692c5619e72de241b
+LibUV.v2.0.1+20.armv6l-linux-musleabihf.tar.gz/sha512/209207dde41fa699adb72af9a6211d74366d393d335c9b0d4c9c9509c832123707bca27e8410c7b1c63f89fbae77dc15eba55031701e307f88d5c183b929d9f3
+LibUV.v2.0.1+20.armv7l-linux-gnueabihf.tar.gz/md5/bdb8124a2b3c9e42b1b9dc8ce813e664
+LibUV.v2.0.1+20.armv7l-linux-gnueabihf.tar.gz/sha512/2f8879b4f41aa6cab3b195a76dd02376bf5d47f51ac157541b0c8453d03cd2f51fac83f59b2cd2fa49a2395262d18d636251715f1a4912750aa3de56eab4d6f3
+LibUV.v2.0.1+20.armv7l-linux-musleabihf.tar.gz/md5/91ddead3be8fa8b06b37983cba074615
+LibUV.v2.0.1+20.armv7l-linux-musleabihf.tar.gz/sha512/3be790d1c580e2a69d76171b82cfd2f594135920e68f7f4ff7a6fdc42918130e628458492fa2a157947c25effd2de0a71d434fcc1c6fb1d741985bbbfcfac3c5
+LibUV.v2.0.1+20.i686-linux-gnu.tar.gz/md5/c906674ba1bffffb685f0f00189187c1
+LibUV.v2.0.1+20.i686-linux-gnu.tar.gz/sha512/c3e5b394959dc76e2abd51fe59f7e8bbb1755b3a008f019ad05c41ffe8fd9f42d0bf262a506b36f26a2f8f4b14c937eff70a9e1ba2c55f19fbc57e5ba9c2dacf
+LibUV.v2.0.1+20.i686-linux-musl.tar.gz/md5/977204bc42355bbdb908693b3baa8e10
+LibUV.v2.0.1+20.i686-linux-musl.tar.gz/sha512/5fb2717575ee97545026b79c2acc0660eaa04827637138896aabbe69bffa0c11732de4f9aad9dd78ba68db265ccf5ff3aef244d7da0008cafc4a417423db361e
+LibUV.v2.0.1+20.i686-w64-mingw32.tar.gz/md5/ae698bbab57855ad41bd850ef2ccc695
+LibUV.v2.0.1+20.i686-w64-mingw32.tar.gz/sha512/9c6530404babe8383c6a1db7fa1e81b40b08de0dc2d2be3507a6466c150acc842cca277e39680b21a6c7f5a6dbae618bd3f5c3ac8f11882898cc116d5e13e7d9
+LibUV.v2.0.1+20.powerpc64le-linux-gnu.tar.gz/md5/e68314bb638f210d2ec9326c617752ca
+LibUV.v2.0.1+20.powerpc64le-linux-gnu.tar.gz/sha512/f74ce6b21cd2776cdf49b4c6c2ad551c0bf55951f8bd9090020e71d2b233f72907a3e145b9a95715c391b82ad36ab1a069bb9f87d54c219179021cc26902dd22
+LibUV.v2.0.1+20.riscv64-linux-gnu.tar.gz/md5/d8e1ffb730c784df14faff06027b724d
+LibUV.v2.0.1+20.riscv64-linux-gnu.tar.gz/sha512/60e7699ac4dc353d0b9fbd34952bd68185ab301a449354b7e805b6759d3866ffa5906041cd9e6ff299cb9fe3f5a92f4c5bfd9c441210125d52f06d614afc84a5
+LibUV.v2.0.1+20.x86_64-apple-darwin.tar.gz/md5/15d8197dea20880edb96a8bf643fe95e
+LibUV.v2.0.1+20.x86_64-apple-darwin.tar.gz/sha512/7d65d4d2e0720f997c164234b78a729f4d4239fbb0b01634f23081e2209ab010ef27deca1cc3824fd8e17630370efa86f1567aae035a246ab9f60a6c14ea6d3b
+LibUV.v2.0.1+20.x86_64-linux-gnu.tar.gz/md5/013be6d2673a59cd00b2ea62d4e34e21
+LibUV.v2.0.1+20.x86_64-linux-gnu.tar.gz/sha512/f466af2a1f9ff83d887ecaa200d3042bd5685d6cd487af00bdf8c92bf1d4256017f2757084de3b7331071c473b254df43b03f580de09db3bb9268af759a5b0c7
+LibUV.v2.0.1+20.x86_64-linux-musl.tar.gz/md5/21099b0c3ad76c3d67fb24260ec39836
+LibUV.v2.0.1+20.x86_64-linux-musl.tar.gz/sha512/03279a4d29072246dd806d800b80d9db14b637235e211294d5840104056cd206b370a987a2b771216e762549d13b13432f1e1893510e4fba6c4b111bb3330a05
+LibUV.v2.0.1+20.x86_64-unknown-freebsd.tar.gz/md5/dfcce3d6c2c42f419987f8289b1ace02
+LibUV.v2.0.1+20.x86_64-unknown-freebsd.tar.gz/sha512/8ad3c51f43124b7ad43cbdfe92685ce448d3195eeff5838387ef3145f1bec89851106293eca501ab6f986c0714f9bf9ecbb5a7ef44935a76a95bbdecd4fd2fba
+LibUV.v2.0.1+20.x86_64-w64-mingw32.tar.gz/md5/7c37d147586c06f00f6dea947d7e912d
+LibUV.v2.0.1+20.x86_64-w64-mingw32.tar.gz/sha512/58762e5a7a8cfd4ee8f0c7ba2c2919fc3b922f673e9b6138ee3714062d8088cac8e3cd5bd244d262426260ac55cef609abb30c25b1a5e38123fb61476a522a53
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/md5/c1a7d3c74ef3999052f3bfe426264353
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/sha512/a3f16863b711ddeeb5ab8d135d7df7a4be19cc2b9821fc78c8cd3ba421231d39b7d8bd9965321455094fda01584842a58f60612d93082b4fe32210b8aa44d999
diff --git a/deps/checksums/libwhich b/deps/checksums/libwhich
index d4a0119625663..cd34ac7cc0b8b 100644
--- a/deps/checksums/libwhich
+++ b/deps/checksums/libwhich
@@ -1,2 +1,2 @@
-libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5/22fd8368c7b40209dada50e3205c1294
-libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512/6fb77b715d70d9bc95a8546c3bf97bd3677c7ea344b88bb5bc3bbfac9dceabe8a8cde7a0f64dec884cde802e4a3000e30837d3f824b5a9242348c4fe061526a3
+libwhich-99a0ea12689e41164456dba03e93bc40924de880.tar.gz/md5/213f0ad813de677d25787cae05901a9a
+libwhich-99a0ea12689e41164456dba03e93bc40924de880.tar.gz/sha512/7c42c3b6c480763b85f8c5eb927e776b48cb8a2be1e1c143e799628ee9265adea6a56b33c17583c8e6fc040a3889a4010ac674918bc6947899983a4942353526
diff --git a/deps/checksums/lld b/deps/checksums/lld
index 1b238fdbd1a96..fff3140025e8d 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,108 +1,112 @@
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/0edc0983135da9e37b18fa3fe6d56237
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/2adbb4eb76e72be28951c96140070b6d16c5144f689631d51b56365549a5d38535c1dbb5e351a6bdac4648ba52da02297591874193b1c16e7078060c99d23f04
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/59b06fca083f1a5e9bf9517ae4f6a4d6
-LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/7f1dc641df9288dfcd887239b86e7fe2871220b9d7f877b24b3197ab73d2176c4533decbea427b09e8f70ddc6c7570d31f5682eaed7215193e95f323769276a8
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/c97e607a661b9ff571eba4238ec649dd
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/7c7add8a0fac379b580a19a02966adca4932bd4573ba0111262544c0d935fc121c5aadaeadc97f9564331202b08c7366ceb170bb2b318db3425c157772d283ea
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d55ebbd25b97a4e4628fad1e04782056
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/681729b4d10d8f66b0cdb89ca4500ee8a417561cc886608d06af0809d946bdf7cf5c6bda2b6d5d577bae3a15dc347568a3d7d7428568f86ca61327041026fbd2
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/78b06e5a351e6eab372ae29d393ffdcf
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/37a8b5fa3491ec8ae74da88e81a0c229d38166acbb46ff3f5a819034c40fa59ca2ebf4c0ed58e615baf7bf7da789ba86114738252501cfbd842be95cc2104dd4
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/7ba5b76c83d746a3c62354bf753db697
-LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1fa403c8923487e2d6a8e8c1d86c2ea955ed32bcde2328cb1167a315cdcf704af896505e9c44b750ffca9e3ae66e805f60831136eb79fe1c6d58eaf81a78b1a4
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f052208026a0fd5120ea838843b244ac
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/fd9ff2d5836300bcf76e4aeefb1e57860b3203fab0c32e668dce3e636dc362876d0fba1f2c23bf55a342ac17294c73e839a8eaf065d64d4397582dc212b8b9f4
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4d1077835df0f592a168c140ffe6299e
-LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/8dfd44113b817f607bc38ac1b4ffb192be340c826b9bc8f9d41e92e0f0333d8fc4227f93aaed16a4b9e94a5ec8b79628f2d3a73fb644684a595921f36ccfbeb8
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0f31939f4ff00c572eb392b6e70aab38
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/581441087ad4869cfdba13808b2d6adaf929ea1b38ce96c357f276d77c3e63439f8edbb822c8f41770cb61fc08837d7eed2466d187683bc44f2cb3c553e2e60e
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/ca767173044b5a19a86c6a890dda3b05
-LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/0577785079039b534fd736ea7a51d9b5176693d81e0bcda4fccd760d7c1218042999b6a38b973a903c0ef68e57dfb3b86e9e2f9e307dbaf603997a853f34eed3
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/89bb950f17a5b792a6e60ef98450a6b4
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/54bb68159743cd14ac0fce7f218a66ff6bf29e626df8dbdbd6e8581699d9b1d357a3c10d86c6822bde7299c14728bc55480f91cefd041d1de61cc179ed347b9a
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/735e4dda5f8cc06934f6bda59eab21d6
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/a9b91beed959804b9e121fee786f28808a7670fc5d2728688cca1c7e0fe56e82e47d95712e38fdfc42e02030896843c4b3df9928eb34c2aca9ac02262427c76c
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/30a95179bef252aaca41984daa54c680
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/0302db3c04396a30d1f6ab8d8d585bbe3a9e70342f068747ddb875b024c173bb9bb34518da7e76a10d3a325dfd741118f36f67fb83251bdb8a9901c4799ad79f
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/4386c746c5d9b1408dbe7df04bc6a08d
-LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d71c6ebf5d3eb42368ab336cf8520afcd05470308ea117fe95797171e5c573948412ce777f62cbd45ee99ffa59cc769c276a60393a22fecffbeaf8b77b50ea35
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/49287977de61b100979355e458c8970c
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/85ed3b2c7d2478a307a393a2003e694fc3097cc6812143abb3cbdd73a7d36bcb6f06a7d341ea639b9849f714c2d8f418a8b96035ed1c19a3957b42d005c0427a
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/80a97341c9537b8a58c7df23f86d5cf4
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/5774b246ae820de4230a1f4f65bd683145dad5cbc4d326fd75649e06e773c74c2cffd48108a79ee0cc93175786450b6d50f7ac532e6f68961c18fe6119ef94f5
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6f84d6858aecdfd95726a37c9b6a0e0f
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/2cdac9a810c777ec6d85093926292c75e4287f83b7224246f6fa248e3874a2078c46377cd5ccb0f36a5e25b139691f1111d705079e89ea4215c9bc8659414094
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d40f0956cc36aa7846630755a672a91c
-LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/01368311a0ecfbe3f23514115f0bce7ce816c878815d937f3fa067b9daab07da0c02f520a96ad793212e5056bfb6294dd0129dae75f274dfeb48191e504c5322
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/689120b8091b9da8cc9528c96f5c5df2
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/ab78810af7d77116a4973b5825d5090133218cf08d5d77be14f83e028821e83493a112adf71094cc208f74cf4deabda63d7fff98866cc0304793aec9b27b7222
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5627ccf1677c48b7ef8ac9e5faac1d20
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/454d2636cd72974c79c2d907e56e3c69c30c3fff78b199591c9ebe4f14d04c40c4bd7331f8dc2c957c37e214da8d28ef3a47ed8d3dd4ca9d480d52bab3429b39
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/8f50e5f684c41845308c123f8e45a0d5
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/21baf8a00fa65473ff6cf7ef2974ef88cd5b0eadd06ff85598de10d09425074297bcff3472ef001047a5440065a2de2fc6b1eefe3a32c7c1b3e3261165dc063c
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c2e0a5f58e38a9acf2c3914177ceb827
-LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/2a1653d171a2ff08bde55c53973e62955fe9d9629388ae014a645d3199d8f4bcf0fb923d06812ccd62e224032b261c8ebed56ebebed750acbc87671203d7aee5
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/fa3959aa413a2b707d8831edd2bd7867
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/8b74fef916a72c2f4933c21d3344410c7e03e64265a44dd62cf2ef2ac0feeafeb2b443eafa5dad3d3d0028be96b9424ff67b16391f1b3a2185826de68921adab
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/b0751bf7eba4f7f7a28dc22993eac9cc
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/7510f7349b06365e9cd260229e7b8c84da26bac072c5fe9a4e59484d82a0753d4ecf1066ffe41343f881a682590dc9ee4ef4a49cd83dba45c21b8d76dfb80f67
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5abfe9e960bab4c8a44f41aaccaf936b
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/efda0e0a35e2774af2f2df53f89d61f146a5730086d40865d448b009c833934b23ea4b296c3dc3f2039527b72ef40493fdee6f7c630484f64cec2d1aebf4a4c1
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/bfe87378e965050b1b20e993c8b13a53
-LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/ef2fd5e81f349673417bffd68c4122a87c09caed3f6f8f0235bc70b75deca7363cad68276aa708fb9ad8f7edd249d49f78d9f5fe7b226b62e8604c7bd3d4b9cc
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/4ee16f57d7dc060007250e17ffd55817
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/27fd3a21bac676feb2c2c2363c027cf12988c70d889174e52c6bc1fcb4a93241f4bae85d5750ceba5fa971611700a9d15e3e02803cc14382cf6a1ab2918b719c
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/06699da5617371442b0539203152405d
-LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/83ba6300d5669b52c1913440598a2577106ea73e0b83549a5b3b0f081a94b6b8ca9fc05687d2be4b60c2d6a524bafd43b839082f0eee58b4685758061b229fde
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/a051688aa3a6383b4be4faa4f4aee985
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2059c6ac6579c4720e7167cd547b679a9c1a27a2c68174ed543be935ee23122234b3f2a4555de0abab3a982aba73d1751db336f3e28005ce8e4659d61f9269aa
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/600baa66310cf348ef3b4351ada014f4
-LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/52b4718993d8abdca8ab701e86022367655d7927dabb8f3a8e41e43dbc90a9af78caf8abd37907a79b0f05017b6f0ef72314a187dab5bdac8ef7996e74c96e2d
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/4bc599fc07e9c7c717355802c1538a6b
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/4521e40cf6cca31cc9ec8ad974c6eb922632d8ad0d5008c951e23b7ec193a71dba5f3bc2dadcfe47e2ca29395646293c6559bd88ac286c5d31d5c4521756177d
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/462b9c453405768c2d93535fc83308b8
-LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/39dee4d4a0073a8dc4ea63d43bc9a357bcf8e26e3c5c17f1441fa72145f5a4ff6a53e0aae6de687b8fcbace40207ba06e61cb8452c9bfff7882ab48e9f9f5ff0
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8b12a4f5db80b925785f42a97e6489f0
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/797d12888668712658fce85ff842d812a255fa4633bf4e78b21488867518a1fc2de746885e2fca1055595ae476670790239a714797f2322ca04027afbf27330f
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/acb8716cf94f654078c7dce4a140f71c
-LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/cf64ae04ae3e55575d5781ad30212b1c0ec734f81b42e3c26da8766bde7c47b6a9512515997afd15f9eeef2ee326c7aa589ee1b557c45b4ef955a8afc72fd759
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/331d844c447f564171345009764321a1
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/978349a74fc5498408a5318c87ec6d25c01268b9d21fb85e6bb601243ad0d33be8501b181d1f9ab7663433a740912f5bcb7160caf1011b1a2c84fdd51e0fce78
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8595a49c49e851973fffae7c4062911d
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f707e514843a206b53f380c7bd8d4d8203cc62219344c1234416462dc1cb3d3f8a7452ddfd0f07178d43dfb193b4402a018cc465dc76b43b687fd20fa1ea5222
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/5b4463e81c156dabe3d182c42eb647e1
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/995db577d4a78d62cfcfca3f1fafb333ff26548b41d8aa8d763e4705dcdfe8005e2f68873faba4040599a6d15821a523261d0451d75fdf6e1c5224e8e777a71e
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/d2f9f08cc952c0639f7ef1073c8630d6
-LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b1cab7b813fe0f7c26c55261e8561295cbdf1e812db3844b87605fb527d09855f2bef4a40ddb0a7cd354c7cbb626293d4d4012f33acc242f9af4abe1dbbbeeb7
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/e82e3b67a073cfa6b019bf5604eabf2a
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/9bb18adf78afa9dfa0054e6511f5750a9e2fa9138aeb1bd83f7a51d37d031e2f3c151463ea8f682dc7130cb98fafae0b84c60d3befe27f9d0d3dc3334ef82420
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/56da3cbe81ddff089ccf6b6392a9396c
-LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2af483a1761022dcad414fa7cec7fb5c6fd54be28185e49539f4824cb0b6acdc1cfa5c78de31268dbdc444201936c5a6d2e04f39ef6f0b9fb184985ba4e3daa2
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/15cbf5eaf89c7b834ee19629387515a5
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/75ce7c398bdfd57af2c09dfc946b024d5a72e90575ed92f28e015e620ca89e421dfc9a391f4a78277c3e06c38dd696d572c5601a2b1866e521dbc2fc5a60da56
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b895da29b6082cdff6f0324179352fdf
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/e89a97dfd6c345158e3e12cdf97d33c22f849e5438401cf5a3670c0d1cf0252ca03e4c52475a42c3e6c2b2d689c2f53fc5cb7c925a23167ac51fa1a5e01e3d7f
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/7edda2d8c2eaadec2d262ded2456934a
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/0b1d60840d638c0b0269b901a3f5198e18e244da338aef2fb49b474b3601d44a2b4dec13e258909985e363ef8a8749838b01dd195e05a266ca36e6d9f274ef17
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/e26138e3491a053ea9a998dd00ad728b
-LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1215861fa52b1ee21196bbce0e99912b25f887f5734e0c2628ac78c1af5fdf57c4d7cf099cddcd7031a26c60cf141aeea66a0147428008cb485c207e90801835
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/a1e786ac775517b8b483bbe3f6571d37
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/3937f156fc2fb8eecb13444c71f380753c16b08f29124228808c91ea4258ee2195219c4a9b601d4468cc24bd584403c16175518a620bd94a7dadff868b3771d7
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/976d840de14ef6ee2c0a538197fe8f10
-LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/7f58f975dc3d69f502537aca79509bbc3c4f5da2ff8ddb1c7e27180a6bb2123713eb42da61cfabd7a48a31fc464fd74554b34935dfdb3ec095d14ff443f514f3
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ab0295ba327cfa6b9a252b0e7a4b50a5
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/7c750916d4157ba0a37cd1277a0f8faf32123dfc626ea76f848a7c567fd889a7801f8402a307c190ab34fc21b156f2a23967abc9972fc103e5847a200ffc7305
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/6827f38ed653f33953ff7ae510a517d5
-LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/f01c655f6433ec6808b62872b8fb4c5a2d8e187643c11f0b4f5c06e2302e462353b516f431c1e26ee60b579c0f8c8c6385f018db3011c619745a39f9ef263436
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/385cd2715d29de3e85a3ac10bcbc88d8
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/5c90e8e583176ed9dd563f794073bb344283284a10e303834b6c5a9b71369f50dfbcbac61400ff70f34f3065279c848dc29086309ad38774e50eca3fdd5f9799
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/241978345735e3b57a88918693c0c0db
-LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/916c6a4540ce9a2b2574d92c3aed42171f9e49f776ab97d3e5be84df832d463b7e542529c3ae81e4d6a31d5789d55b96f9559f48c0e4c8be36d70e3ff6f4292f
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/a4f16e809240c1837b90d28930e3f711
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/983201793e0f9e6416bcea23b4a70a5a1a36fbdd72bed2cc60ec267eee441aa3d9c850b4aa3da6a232f3de451089754138ecd5411e5431f632e48c1993513ef9
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/70f47c2be55741f754ffe89e4749dafa
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/f2dcf4f6ce888801e8a14875909f78b46d8ed853a7063a185356c7f21e42e15323d847d9a9d4b020481a7fcec9539d979e4c7f2b083ac1c1bf75a275a200562b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/becf7c6cc39a98cb722899c94b32ca34
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/84818621307779e27cc149afbf958653049e47a62ca44ff78552878114c2fb0f7c40cc83722394ee8d880a6ddfdec79012235a6ed20bbfd1e5d9e83ed0a0199b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/0117c05f8dabf41c4628532d59cccd3b
-LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/b276dff2c54fdb6403a461ecf5435978e2cf9c9273934edcf3a31e7f640ecccf37de672f6b0b3f296ddb6a7059b0d95ca6c5bf62d62ca545cc62a69ebb84b8ce
+LLD.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/229323a0b31c29b4221d79ace1a76820
+LLD.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/c00fb8bf309f0cc6c8cb4465cc0062a8b1a848d9460c53241be654d88c598847b4590b4afa4b71c4859cfc67490942eddd79ae9ac4d75a9b0e392fbf67389a92
+LLD.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/ce7804a6a846d0d951aae34607c43bdc
+LLD.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/164adec7649a36b2967872884866de1c57f6f54e1c24f955593f9f6a10cd89c69493a64a37bf9f001ce3576baed867423d138dfb1df0139b4c1312e81001b167
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/cea134f347bae257cf5f55b6388cef81
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/16b59143e929791b0c3e56cfb4970d8b3c87adf6e847fa9e2aac17c4ff2aa311ba2c7511c1b0ae2f39d9aa92f87ad4d99c042fe35bec391ac865fedb72bd3b1e
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/5f903bab0e38fa608e8965acce6f020e
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/01e5f6a32958e04174c545f57c6c3b1bc88ccfd5ab18dcb9d67b92b55ebc7655a03bf963c4eaf7e5c3792d4691427a89db372e7534c6c8f965f8a715a32d9284
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/241a55374fd067f3736a2bb929e47015
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f1fedea4e6b5f6f3bbf4d705034d6c51b06f011c2ecec1ae49c5b7bd123891eee8b991462d60be7fffd58f7c773afe910a06ec0b55b37eed9b4d09b9fdbd5068
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/ff018c7448a7589935333e46739ee2c4
+LLD.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b646c6a945b8f42b396164a8e87fc2e54b1ad05681f438dfba83fdd3712a60167aaffcb0300bc42d904eb4bd34c002a71642b59540ca01e64d6fecc6daaacdd8
+LLD.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/e6ee9423a82322b9233cafb1c92eed2d
+LLD.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/c915582a9ce2dfa8721741fb1ed19b719ba40f0092c2d29ebd68829ee558cef0b044a5e40985cff88e89129cbeed052d85fa5c6b6d87f9b3a68a6e89079ab4f3
+LLD.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/cc55112e2db358cf26d7bae3211d8e4f
+LLD.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/0ecb43045419020eea911f1767dae23a6b1e81bb155ec493e911a9412e45f7ec71461aea2e6fe346e641747139cae43d9435ccecaa7fd6a234e4d69bb06606ed
+LLD.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/498b2909f80b20588135466d5211bc80
+LLD.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/120fff24e85cf970670b20b5f4509475a3ae0d7621f8f67d018f3a7625548d736a3abc89f015966b1329c6b0a08a1db832e035ee3bae384e2c5864b73a856600
+LLD.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/1bcd298d5292f8e51f19b97fa4b27ab0
+LLD.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/695c42557f9ee53b2e10bbf74653fbad4d02124b962a1f50cf719d2821607dfbb9c1bf638dbbc9e0e544f3020a9ef4a82decd13f886cc41ddf47c07a5e40eaa1
+LLD.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/f0e0668d29253cd834418c88ad63df31
+LLD.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/f910fd8ca972b1cbe0704d4d73273e2d6911d31ae5fe842250802cd33453e4fa2ed03ae4b4df43ea4df13711cf2409c16b1c44832b44cb05f7681488c4402681
+LLD.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/md5/84f79f1ce1fcd57ec4bd499a684da120
+LLD.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/sha512/d0e4a7ecff0e3f499dc22a9409ab8bff9099d4fdf191916426be917695c7fd55043b41cb0fa21541c3d6a6c202736b5c7b8fce53244e3ac713560a47a0ed6588
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/2323ff933feaf3754b442bee48a63607
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/47b8e490b89e04fb8886dae438e3ddcd53c4e98045de2b0def3988671827528c8e9ae296411464c0f17cc64bd3956644673f47a3817237f27e1c3ed16ac8ef01
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/37cf8528666064a434296f2e0039e9c6
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/ea1504a859509f8a16030db7a65f42f0e78d67adf5946497f2178bf25456c0f2583af72c636881a4bdd1210dc0d377bdf300ef55aef5db8c56995424a1886059
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/1c341f2b161e2320d3d1a74685887f54
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/4f6fc099293deb1a2cf729ea7edd6e17fea0dc8b9fae9acfe34e00b1f5c798933df9538c805c8d28c6279eb38f9ebae2a1aeb1a2f23087352c6eeb3b27b63ddc
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/e306d59c71b0958c77108e650fac2612
+LLD.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/79fd7cec0e169a9555ec9b0acc3248991e2e37a1d5bb422808ffcfd4f47e79321560b7985c82dfe070fb0b5ded5c160d83e358399c6e7608eeb62cd4a1406f88
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c1d080f1aebb58778d730578fb113290
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1f420da1897bd0a61413321aaaf032e8ed38d59e6dfe3313ca3a6ee6582ae6c566e3761ca8fcd1f5a964337ba8a9b3e73dc55ad68aca139beeb45e43d51e862b
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/6f4e0c7d2fe9ac254650dcd2842dafa8
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/bbc71b334250e5e6429766d88595adbb671a206630987ec2a27e05711ff0f844487dffc1c136052ec11394e9d5c51c70d1b75d5348f97d3bf7fab463291e9dc8
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/76925b9a7bc249b2227390c479c54f8d
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/20643ecb79732e3ae9666116dbd0763c18b808afa78e6a14998aadc7265cccd6efd28670592db61d3d27b8d3023be4c5f3df41fff9e1b38d61abf76829090b4f
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/399b9aac140d9050088fdb187ed4645f
+LLD.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/8bab65965670fe392e78d0b9dc78c92cdcf202898f6d5a3174eb89ca5cb95b995675c8a7d81bbc4e95e490ad1a43d9d29d7907b7006789c0143a1d8f24cccaeb
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/026a4f5ae9eb3ac05e5e8fa894d77a5b
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4bca8bd558619260cddf4e2f4593cbb2a0691b5ccc6d1dea6dfcc5a2b5f51d7d1a76c35e481244e211e2eacf32bd628df5ad0e6c75e5185bb1d9b569f6acbfd3
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f898ceabcba052b7e6713a2b2c208a92
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/92be1910f795390be5f15ba5b2c220a3209a5f7ac04fca3f5229486628bcf5d2f20cf6e4dda8b41d6beaaff42a68a9ddb95fdacc6eae33b9183b581e9a194895
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/e366058cf69a4367945bdba9523f2a0b
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/45a786e8d0162bd5bd01c029691d2928d3744ef4a7a1efc2e39755dee2f9a9ae23ee725f0454ca601cb9c082a342209e9129df851314b5757c74767b13508fc4
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/665a8502170729c86ea95a7ea2fcce0f
+LLD.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/c1a2a85c9ce14af8c91bc9a599393c52c0b8a585057366b1ceeed34c5db44641ecd0c9b377bee80cb4951fc7102fbb4f21fd050126bfa5bb4e582ffefee17035
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b90b2130262f63f5189cc8e4a65e4433
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c1cbfd38c82d676c3fdbec486691334cf7bf4115d9ef2665012b71725c28545a49f34edf5689ea0352822c811c24c89cc152d1fccd1586b17ae8e6b2503641df
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/2d5360da4b2c9ffcea5d0a646a7c114b
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/73323e0937fe4423883480294c8df44744acde4f47380e35535cbe69c855c0e35e86a1eced3085ae0285f284f47af5ef237f4650bf2b6a8b9d5308efce88fa02
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/a9b9a65938a7701aaac6fa706481c867
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/fe8243aa131ad8be54f0fca5754c2e68ec39049004ec8feed499731c5228a7a46e303ba866b9f9a55e5318c73d8a46d964673e111f6c60e5ae1628c568d7d894
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/0d9592a287c9231ae2db65000be2cea2
+LLD.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/4ee192dd33f518d2735a829ac8f822b5672b39e8c2254987aea6e5f2f0056213bd85d84c4050d52ba9ac8c35762521c324fe2d6e18db0396e7142af9cb61a561
+LLD.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/d487598dec9969485dcf785fc0968bd4
+LLD.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d3117739919696b9b0c9ae398f1b1e9db8bd3e2e27839f62b3551c22ae2517f8abb69e57e23d125002bb466889b7352e69c1e9dfd9abf1c5433f274e928b341
+LLD.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/943434b08dffb54e8cf04ae7bee34923
+LLD.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/77b7bbc5d988cf36ecd10609e091cf24dea134cd32c7ee96dec7bfe1a4942553b6205653edc16c8454261f621966daeb267f42562172bab4cec9693ad733d867
+LLD.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cb9e371947ad415de048636ed78ca48f
+LLD.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/c00b696fa146e8c29b37f15f78ab3325db9b3f5b3514e615f145b4eb7c9c8788662cfb6255b7dead596cad8c576b378f7459c2c85d462b597ba5e21adbac0536
+LLD.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/485f061ee8425f042e4dd3042388bf8a
+LLD.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/845a47a36c61b305bb70b1249f6fb7c4e8f740acff90d3e850ab2e887f7d959ae263431a02305bf7587e4194463f9932769d500a19709bc479eb6e6168325421
+LLD.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/e4f97e8334e1f29ad9083d051a50eab9
+LLD.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/13ff037881da8a2333129bb702f515a0eb1afb3e4f27298c035c133ce5c512fa643b2a90df38d6f61b1dd5e86e32998b9061241358b61be794caba2b989efb70
+LLD.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/570f50ef6523cb8133b160af8fa2057e
+LLD.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/69ec402469b2b2c85aabca1c8b36edd0c53b7e678e4c56fd96062b62a57b7ff1008f328d71e6aee36d4270a41a7bf84f62f934007398618b5426202d9614305d
+LLD.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/0503dc3e4e69ca6fd7e2a5dac9c4ef3a
+LLD.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9b6c851341c2642d5ed9169326b4de9eda50ea06b1270a721d2e85bce8ffe4c595cd491e0a218c3a418aed526f881737fbb44cb417cd5ba7db972bcbaa6ad0d1
+LLD.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/08b22e98c514d48ddb1039b44f64f480
+LLD.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/5e5b7c66d5fec3ff1a9cb7989d62887699cc3e70ab36a94e6f157cb0b9adbe8d63f5f1a74cfb6765cf46851087019b12ccf09ea848ed6456d17cdc796a5bf2e8
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7962fc6f08531f0dcfa44bd667f31582
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/2c936064685f12ed6764c187192023118e97dcbff6ca1656f0304a40772b4ecf55ee0296b3c2a00760f5bb437162e2b737635fdd59b889d35756d697fc7e6b72
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3eb4d78af670d446f696449a5e71e3ba
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/315dc76799f3e443fdb5ebbecf96a08070f8251930a26995de892b8e67bd35bbb365f2cc5fd93bc7cbcbc9edd08280ee8d2a36b28a704866dd3fdddae4969455
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/e73cadd0354897bd5bb611cc1c027858
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6f444a4ea22e7108ab75686ce9cd78c0db0a677e39e8434896fb1ec90b9dc013abf7de1024d329a9726dabf229a8a68c27a11f211092e676715d282efb7b8504
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/aeb310f106f31126dbe53459e36d33bd
+LLD.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/cd18c115415dd92bc7fbb5c29cacc5848b1f3851c3a526ff9c0813ad46824df0a4f13a66b1e6641ed11b44b5b937390619f01666fe6d5a047f1772f0ad03c941
+LLD.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/9493a58ed62367b45a055c8880de0924
+LLD.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/5a448c87ad627235d7d2c8f8f8866af0f6872c3f7775123edb09b23b772f165fa020fe0c592ad100f8c777213fe1346b642a556df66ed003771eb0e76345215a
+LLD.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/d397b37abf0026ca69fa6657dd791e27
+LLD.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/9e9fc915389bfa09cbe8b977f22a3466ccda052f415b3b5fdfc97a15e089d4f887fba97d6bfe6e17104f09bebe48c859bad25e9f2cabc179000247292eafca1b
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/5dc96eef71dc28611bc998ef966371c6
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/781993c75bb07db96d02b5a7e779116864730a9bb941b64420a435956a7ecd501b5b2673f1854c09ece5f0c73559d5723c271d6352be57ddae6801a695629362
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/8a1fe0ccf7699ab7a7a514b620112a70
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/d002083045d3eb7c749f2e97527c1228cd317a8138ff254228e43594a6cabee47fa363785466ebc2874cc438457640ff08a836eec7334afac451506ea7bbed03
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/331be92bd3d76bb8e86991b7832ad41a
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7b1c6df53311a17a92a41cb67ec476f947949c4ca5d15a643badaf9f01e76a186abbb6e156f95ad1605d83250df4e081164986a6b7fcb3238076b0ec5a3bb565
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/97c7f5267ad6927f699a25ce44f55a70
+LLD.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7b847c6026fd7daeb17a4459b852562ce6664b2f406664be672bcc384dd5a79b9505561fc61dd8fb78a903a2ed4978f322cccad19f5a3966bac856e877c11ef7
+LLD.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/c86da6a396fcdddbd26cfd71c0f70458
+LLD.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d5b75b43167080b8ea456e516c9ace02ee6066ce715a56f0b42cb8045b965b1cf8d4ebc0786c23be4544693ff858816a6257b0638ec11e077df32ead62f7efb
+LLD.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/d72e175272ed893688d18e868120c575
+LLD.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/9a46eeca8c7a8be65ed487a74227534e08a257e404814c44730f12a5bebc8cd160998cfd5ed30189aa606ddbe602e1b1788e465e4a210103c6726a7fd230abc3
+LLD.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0206fdaa9582ae3bddaed1b6fd7a8cb5
+LLD.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/584a67f603f656ca5d27aa0ef2e425ad385612aff06cdc1d534b5944939a09246c93954fc153b8a89acff721e657a8903af9a640abc252d4e452f348781bca98
+LLD.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/0dd14af342467eac2e13cad4acbc881f
+LLD.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/918f2c66898f828414009fa6ee273da5bd654e4b787ebb4d703f0be27e388b46870d68bd58c4f45638d276c61c1bfe2f3c67fbf34dfb5578158d072f82d927de
+LLD.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/d1862068a670d4c04887513b914e11a8
+LLD.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/c5a91657667394e468e71d9c07df0c71918d63d094d2598875f75cf3830d8502e70f59fba59b07a2d1e0551f58d0487521c856e68e4122fd6a6f7ebd1c7c0f58
+LLD.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/8dc0ec01029765dbfdd28d63bea8cfca
+LLD.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/234e9db1177003a074c6ca7236c589424b4617d1a359f5f9e2ba6095a7f317d62ac731319b4b4513c523e80c15b82c99ff0fc9df5f76fad452955492e9935b1d
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/7beb510d766ac1e16017aa6924e88659
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/bd18b733a6b2fbbeef7f8af2f13dade0330a525c83b4faed5a5d2507007be2f2f7be70f99d05524fa94ae1dca524be64adbb9dc87485477f62109f44cbae95fe
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/9ecca76cea81cd1d0fd3470778145371
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/d1548402dfcb4aa0cf3c9e445a9810e5d8bc2411de9943b57e892ec82af29e214f6d93c58af9cd0de9b4fa5a0438e4c1fe0b9591a9582143d470e7a42e685f4a
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/b1de7acc21fe51c1486854cd46b71bae
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9f8457c12801584340b3fbf846920299756359016d151018562f8c14e0a03f657fdb6eb1d7418fdfbf586c59e670d866384e822de9bde15b2dbd031ce5e6af8d
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/373a7007eb8b526811604fb0161f73af
+LLD.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/b586815621f698c7d6ff995c93e11ea1ec55e7e7c0e34ad874f64b942ecd73685cce150d51804bdd371ec42671e7814e364944276ec91282b9b8b8226a6d5786
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 6380397ffb84f..fbbb34480d893 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,252 +1,262 @@
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/f18fa63ec97c79f3773af2bba51f69c6
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/4ee1c3e746177296fbe976976c58b6ca09dec22943ac1e63008aeed94f46619e4e60d8278566e74f4912fa9d3aa21c8b03ae2bee360db54c7dcdfa2381469148
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/f482e543971546cd59d946cc33d79d5a
-LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/d026b746f419e9bcc04daea60b1e66e26d4132e7a551b0f14c95ea95dc9a0f4e645110d8cd5b91b92bce7775ababb715747a2e4a09c0920787e2f25ef1bfbf19
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/5d12f50225285b180274cc89c21e7c44
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/3947f0d909444716a29c26a0645288e0f02ab19e6fa6ac0104c5ffc9659f01337198a5914beca2ccea7c98c9aeb12fc537891d440766054c0b9d3bbc40e24165
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/e555476d3324996897cb0845ca22312b
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/a809d8c455d6f72c2bfc2517ab375d6ce329880ae33c5c1bf575dfd599d6132e38df35fac4300a0e72726ca33ae1db69ae67f5fb03d5c617eb34f7ad20f09b8d
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/6432ac27166a0ebb550c7b000c27e2da
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/be6440412d46252292e6d907f04193ed3f438b06419d0fb8b067a7cd89e5cd2dd9143af4605de9a2a697ec2745efbdaf6021d065346041fec3b86051de42a26b
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/0bfd05e6bd23c92b73751a86826b288e
-LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/68c08b2624bd0d38c7cfaa8b61b7e1ed70c7a106dda814f146a3f5796cbd42f476ef19f726d3ce368d89e624c7a3fa7f07829c171d79581f3cf565dba28c27de
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/53a9db6445352b44717f7e0f81d896b2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/ae34208c128f1d4468d8a25b060bd1904f36a73dd0029606394061843f90aa26f9c3071e8281e76dbc10fcfd103f04602fde370a0cb04d435fe2f7a230989cb2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/f7320272ec2f3cc86a742a8ce3b4cec2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/612f03f49b04fce2a21e3e0242c3ae591ccdf6398e31aaa63956c40fb805d4a060da8acd6e5ca1d1c0a7b1f994105ad74b1acf78490e31a149368c8a9c96c026
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/db7b7a03c047a6aa7b599cafbf6023c0
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/05474495e73c56a8bf8a2459e705198a6c6e32df5b83ab153f1080a763d2f7d79dbe014592e12f0f3063b30bb0641dcfbf4f161ed988c777c8955ce9bdb89cbe
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/88255189a80045bb410da1eee3c277e2
-LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b944ed004867d6bcf48dbc089d6ee5904318d6a2ab3a7dac3c802cb7646d4df21950a6e4bcd5bc57bbea872f99f39ef9e174dde8dfa4f5518f23a1fa0e8cf959
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/a25160098b55d2ec00cde15d088343f9
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/2e84a0b52a4852a69155aa4cdf33366b863caba7ced42db573e401a64c0fd2acd1d27446a3ad0ff94740a5fc4c579e745802bc32f925bb505177afdc64fb85eb
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/10b225be9d25681a36fbffdb5f3e315f
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/6c38d87c8aa321fa08ff9880bb27cedda1806bf6aece891f08f757e6276dd37e450a899c4fca587bb693f683f9ad0d85f388e7c4ec4a76c96e73f0f26ff6766a
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/320b77cc43b91549ae0e6b538ff53f7b
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6b297c643530c06be5ef1d8dc2fd47abbfaa3a7862ba42ee9e4cff1361e54aa7ce77d4d9d7f5d2db38a3c780cd38a472eba1308e1f50aba74d3de3bf188fe91a
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c3e0fe843bfcbe0c03a563bd40a16f0d
-LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/b62c3d8867594e34b1eb0c16f1db609c4b43146deceeabc23d4ee9af2046d8b2ae1a8566e2613a69691646d1991017f0a7d37ba8636a395d471f8f385a478479
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/be03ae93d0825f335411a4039905052a
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/9e0159681e8ecfe477d3099314ccf2986eb2a8325cee274b6ab35e04ee9e89ea61356e5082d9adab6c41b8be98d0171e41642afca283ec59ed91267e66223c6e
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/9e244718d094dd6b2cdc50be77a284af
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/705668d6b44bc754fff8f28246d8359773f29888c1f6ead6a5f1e10386c88572de27d4d47b8a1bb160211c07fcde2667833615c31ae445d1929229d981e36e3c
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/12162558c4c89913f0486f3a4c969c8f
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/dc6a48cdc9a04b3f0938784d5d40d0b453bf438881895c78a0cad9ebd83090cd9f1d12fc00df6538d053b2943a590a3217a8309aa0912fb3615d728280979276
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/e5012844af1fd76d6cf92ff0921a9f24
-LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/436ace73907097593bd060ff5674db2e36f7a6e4081033b078554b76244ba0d2caea30dd94a49fb62c96f2a1c3e1f190de440bd2bb9242c1206f4794b65b30a8
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/9ee929acc7c52d18a7c42808761ae233
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/12f07258d295245f2b53414d0df0144c547c60b090354b5548f50bb704a82e1623e55ad353eec233407f1840a50d423d1404fc3e7b87f2386863189e7f886813
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/c94a2e1f4bc031a7c663111babb0f8fd
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/4c82406d8df72312798d95ac0d038b38eb332b4f4f8a586bca7103bdbf7759365daccb6f3bdef9a9c74a06d04a12e96c01ac9fd03aa38f3c586a7ef3c7ec7e8c
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e038b8feabb2e60b866756a8dc7a5947
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/c3e03bff11db87c7f131dbf7163b414cac91556795e4c5c340bec52409c39f7e91c26cb34a6339c10610d0906f57a209d36f6cfd458b26d24ffca9a43d259f5a
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/b3bf4ff216946ad38ac6be230e0865e6
-LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/0daba831dda378b2add9607fdc0d32046c0390a0a63758a6cdd9c0b90f660559cad0e71c5ee0b1c4264f3427e523a8c615bb87ebdfb63a65b983acfcb8df43e1
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/33a3c56ab597e6f2c2863842f0103e53
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/fb15d07a66b1f56b73625ead591f90b57a843cd9cb140e5158159a5f7c9249437678c61d0e19a11a65a536776dad37abd6be34ee0ec5dec7c0736079a0fcc7e6
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/7488ef289e45e6c44753a42dc51aad7c
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/6ecd62f0756a1941c8df92605a7edf9fc2e70957f39ae407e5b1b49977301ac6e82d55bcb856668135c27f1a75d156f3dfe7a27c47c6a3594c2c9f032af8ef19
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5a286dd05b936c0a3ab61722531ef5ee
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/80016717959246708eec8588fd6bb5cb4894bf05c2d78cd1641e31cb43f38c0fda866283dabf1d999c77d030b70b89363e2346bd9b9310a2999623e47b2e4e7f
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/b62420d31c65fd8720427900b72f9aa4
-LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/f63f62a667f6f2c6ea76db2b142d58cad3165a426fd420348f0831d447a9eacfda5ec9c006e05f60c1f2804e8b25e87369e754a0bace28257035a63a1ea23a76
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ea922c8edae65c855e40f6ff924c35d7
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/d83a3737058da3c2427c061cac83ad910c43368e47bd1f9ff86c21ef0b40669946b128bd1063a8fcb081563ecf606d70a783a0747ec951c3443077b3ec8e93f8
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/7a20fc23311317b85127fa033cb69059
-LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/36d51f84dcb3c76556b6ee677a4f0fde1610df30a7030d1799fe9681c27e04faf1ecb4b5731db9a58060879076c037e3e5bab65faecc527296b439743bdd7d86
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bf6859a7e73fb51bf91e2c7ce5b879e9
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/39aa6b1e2923aa572458cba58a328bf6ac0efd5f11974e04343d65cbb56fc5804066f7cedb1e9c58252313f94ee0487d6855a1714adebb3b71fd6c783a01018b
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/10c32deaee824ed7a19dca9055a138ae
-LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b9b14c9ddc2b0b07c07a53bbd3b711737d1a7d71626d3c34812bc3862145865205e5da07b052e119aeaf54fb97968b27e86450d768312623a7a87c6b8179d872
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/caa574701f180bf4dc323ecb441fa53d
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/1c97d2311111f4411c3eedc6f1338a8c899932e7fc3490a03c0c9b2bc4c9a52d5797c50339ec7105d60edca951fc57c6f11bc7198c8e1c96334147d2b2dc670c
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/f46c39e2f848fb5fbc9f1eed7fa695af
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/ed5bfd8057b2d6d543c4a11f0c1c6502dc7aafd07d0c5a96ca2b1d0c5194093f20f995ee38a4a25cc0291b31c682c6dcee460f9fb657b90be5afd43258ce4c43
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/06533f3ac22a8a9be2501b6708821806
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/5284308b46ab1d8ed896e0425fae4288f87a640707c8cd5f298520cb19cea8d6311b0e6d21d5ed016f6d87f47b93d92d371abfe9bf1810b357972b7c9b437811
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/f75c2acc329a9ee041ff2c81aa93b4ed
-LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/6ec83776bac9e2cf2cbf3f890412a940c9507ba06eb50b6a05148c9c336775168cd5b6ec4aa1aa148703e6724c414830e54c3ae075e4d3649280ada705ce9816
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/7e2ea1a3e9c61976b446cbceadf33193
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/b21830528362115476cec7f32b11f3c1541a5779027c824882fdc00b248ea0f0ca8d08ebd86b938c10fc80a7f7930d86e2cd4482fdce33822613128eb250884c
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d77b1c5ec7cb8bd02ccd24347e2e620a
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/84ddacf1c222bb5d895939ba4aab80dc6b5c5c596a36fcc2869a87d639d006a156750f04d268b6c10b47d286cf3bb5e8c20804174fc93881383f2512833ad7cc
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/26f634aff16b5c8cff48b0183f3f8ddd
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/cc3619c9c8adf322bb334a6b2c9de1ad088a17f117bcb9aae5b51a4f7613a50391c3478b7f892e9dcdb802067de69b098ba7d61edc9979b8f960028af0fa172b
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/891a1f113e7f3f8dfa56f5f28e1c8176
-LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/9b6a4a26c8f83764f892f7caf5f09a5453ab6e89c742ae4cb1e831a0711104d131d8fe0d9a8cbdd384b2d881edb3d9026af804f47f5f79d62da1d51dad4ec0e0
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/7dbc009fb3ef6ba400baaafa733afb54
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c279c4be6a5e131b567625173b33e1f51a56c53eb0740895c1afc8b6824a00d4331df76bae9960c2143f7bfc2a9758dcbc7898fb49ef4aab56df6bba7030d636
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/007fdc357a995d68a01fb45d52a92da9
-LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2bf2752f654db140822f4ed74494bcdddb85f4040ae24a753ed9c77efa29d2f50397719fa20de031325823004a66ddc1c00c9624887289c8020d6627ffd21f5a
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/fb17aeedc48fb6a24f0aa2d078ceb2f3
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/bd622d2472f85ac5b0cb255a929413ae3b30ee06ec7204148072dc1f9da7bf451b07960f4905a66d2673db9926797e4bc33b262cff656e7bf4cbcfd132b49868
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/eceea244f8fdaf61c6addac8b8f57319
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/44ab4a30ff65685a121dc54c2de55de441fad95b02f54cb359ad44fb298adbf48fd7651ce871fecb40b08d95e1ca701ad4c857f975a37a5e5a42280dab6fc670
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/b09f19c4940f6fa12ea8b5076501e297
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/a52da2ace1f0f2ce0090a582a267fcba526c86a88be3d8e55020ea07e00a1cbb0323f8b8b0205c9417982774fcc05d667b8330f7676dd40c869f374130dcc50c
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/a365e7fd610b6f6ad2dda2d94a141b4b
-LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5242fa37a93dfd99720f9c4966b4f9ac164987cb8de136c01b3474860c6229538e73db7727a6c7c832f651ce7ccb97dba0082cd66da2fe812dbc8ecd44fe2cf8
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/6645a6254d82bf854e50e47d671b192e
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/d330eb15c34e13cad0eeb046e2e27f10eaefcf1d6cb68bc4d55668b55e3c00cfa07bccfb4292647a737ffc69bdf4070cf5a8bb1cb7f6319a1caf0faddde7aafe
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4073ae0cc33b7f803644a272cd0730d2
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/2ea897e3ed3688e2ae45918db51c5a1273afabf46d01a6a27739ac951803645861c549fd438b48dcda05294a4d87b6c39778d42e916301277a0bfc1d9ce53979
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/e223954ddf9e11830cbab24e4ed435c9
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/fb88bfc543ccae5cc9ef737e81757a8f7f61d1a2816501d569456fa62bd8ce30ae57b837ed32dd6d2a7c55fdc26c2c1b1a9965968f784eb3c01680f25ee5bd8e
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/356d2f3008be6e04843a278d182834ff
-LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/ae5b30925cce41593a34cf2e76b606e978c352f2bc915d8869b01600c8a81547ad392fc900766db2ade06355c2d95aa473bd51dd3d45f6bf20289d9cdfbb126a
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/c31804464c51d1967e73f491948e2763
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/84ab795067bbe71390f15b2d700ff9e0c4fc124c3d111bdd141643242cf6dd7d3317a92d9c97ef5129ef089cfa3d703abc2b12c6a9d2287c90a9ad58a4de8478
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/9f205efa80dbc9d43560830c668659b9
-LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/54548970bc7b3988142c1a5c2be36f877c4d2cbdb3a58dba71acd7bb32b20cab2ab12c82619abeb6b3bde9a95fb66942e08104df0fb0f59d2ead7eda957b783d
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/ab175b04b9c8dc73f2c06c06bd9d6915
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c28bb2033ce2fe182f6a5a29e34a6ce4cdd22e994245f7122c4efb39cedd491c9d4343d8ba2aa8062eac156ad36d9f54605e6832feadce3c6e9f66e9ed7c760f
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/7e4dedc77bdcd6853d613d8b0e3e9af0
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/e09c451cf018548bb388f9a0b419496a6c6540cdf1e204be391391b3a5645c2198562c2f995c3ae30f775c786e9e59e8b93c0fbb5d00fc9ebf1529dbca9c568a
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/0835b50b6cd53b4d1fd894f27b3e072a
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/8d228561f66feaaa96cf0af71421032f6c241e8a8ce3b8352771072d7bdd972e1b6270e15b0a4f5f4b76764cbd65ec371626cabe8607294041679fe9b6bac5f4
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/bb61fbd156bb0a70184f6f425ba770a5
-LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/ec310cab20f39facaa6c0b3a8badded0e4ffbd7bbc1fea6b3e67717046bfe6932a94cf562d3e35dba5052d5cfe62c540c6a38477452e535da52e650fe5dd4d6c
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/c1bfb47e9a53cc612fe98505788e1838
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/f16c9f1faa3e959d486fbb109add976c2a2018597a0b053ac3168abad074ff9c2b23874f8969f0a71c6551c8092082938bcc35ad846913a0a9965dd27d6dc876
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/cbe0859ffa50e2de82b8fe86c2540f6f
-libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/e864e7d62eb3b62066fe14210c43b79dfab704f04381ba29fcfc2a2e2b839e8db2ad3f61bb257b64cb6a546cc45e95195089e8b734425d9d4afa3168211f6762
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/352f8869f53096a566b387b885a74918
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/67dc69f8b327791ab77d4082208653ca74ce2cc750d9cba833cadf4d0f311dba73dbc951d0ce088a66b06321f7addda34bd5705a6c38d4d901b5813b2d1bd37b
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/220916b081fea2190e372df195daf13f
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/28bc05009335d61bfec33f24c89e67412f13760de72ea9acf7a12b2abf6d89cc3f3067fddb4ce598031b054b33efcf6773b4057d5adad830ab15c88fdbe56955
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2774e9f2922e087d06e0976076d3ecf3
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2aacbce77120fa9d24fd4026220e610b70c08b36175dee70f718f4d023b0ced9f8ae9dd2d58e35b61db7ca77ae337ed6f2da6a0de70296b4160a3f8e99ecdf67
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/63801b5fa51c2e75abd4b46f4ab1046c
-libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/eec9642a9c000d1aa3d298382a5b7c66caa81714665c7a405b416818f2e7a0cf1bedb81bc2a650452424391fe57061c33c2559abfc55bbac9b58e19d82131d5d
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/b3b3975a9a00b0292b9ba4b7fdf5e757
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/c886fff05f76053682a906dd94c6674f072206f37781b1025ec8a977eb952e0aeefcf20d76a3411e54782a6425667ee3a373f0a48d5a486fd4f37c02b0ecef78
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/88cf748f1a8086f949cb6217fcdd40b7
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/4e3d3cef71062b002406afb923f3d16508206662c3835242bf522cc7c881ea236695cee6add1b1f85a0b2708510dab2b59eafe004e67ee1d87a5970602a9d942
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/dae6e06bf26505fff786d0187cc5f90c
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/ed76e52223f84dd8c1ad7190341b167928493c2c617968aa17266c274527d18348865d9289cb82dd1c0d12240220750ac31e6c1354ddd9bc5ec2e226f360ba87
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/1bdae6507ca26b09a81c3b5b89f17908
-libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b2704c0ef478467eb0fa21c7b436d6efc9602e8723bcf194dfcf6b3ac33d316b79de66c0c1c291e92f45f5bb09b6ab579a45782fa1ba3c03192177aaad6c29e1
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/8906c5b197baec7fc795256b92ca0a75
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/b79ec5ef4e59b0016784d31e51a94c9b292d19c36d66eadcfb3be6579244048b2650206836b4e017a63d84d8a0c72cd487f22ea08fd92f5b5ab4cb46d218e1a0
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/bd81f6f43b54364bef1e6486c17e3dea
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/892e4478e672fed55d63bfbf20a959b488e1cfafa332e2f1743cb519594526b5e0f2167b67636714dec6f43c76dcc0eb0bb2775eb43e4d898e63a0d1e78e9c94
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6437ac1aa63c9b83c72238f4b0eaca00
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/f5e2bdb0af587e5cd55a5a2f16bf551c0e0fbadd2d9232fd5d3b2b38cdfaa80920d25903af5d79cb52a45a703a5bc07e550ca07163628cd1a79d3b3dda0d05d1
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/5616fc6e683ab133ed751d60164ca894
-libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/40944ea809c3f4000038b7b26e6297a5ce9d2710995c57b4e0751e74dcbeed9c00b1d89d0c75bf0f0d9094fd4811f5c5ca0cc5b83f54cbe20c1b2db85de44d72
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/dcdb815f425a6ec2aca7f29f601a73b5
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/3619419dbc81807db63e5c7bd7b237a6355ec60d2aada9bf26c1d38f10b4cb87a3cb3fc9a81e7f695ed7a195d2c3c214cd9bf96d3ccca68422898be323797fb1
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/ab2250406d3a69d68755b77b79b61f53
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/f5eaf02c7d19689a9cff2410269daccc00a075abde9287b025de3aff1d5b539b43001d1f2120f88c4c149af27eaf0caedb2942ae029550cc822e6af103b32960
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/77576af5b13b2916dae4e7e24760afec
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/1b3708202ccebd47aecca5a7c6396799ef14c4235b0904d23d6b6b4fdd91fb6b13a1627f65211ee0283a15d96b8a68cfc962d7aa2ddf75c08f2670a767c6cfa8
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/81277b7fde4cf08293f8ca956417fe05
-libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/72caccf9933e1790bdb0b6f6dc1ec5da6a84a5fc06336e29f2928142f3182261afd39477be913427d65655c40ddbda5ec5042c360bc49383e88c871db19b018b
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d326fe9ccfbbf179571fdcd684bb7b80
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/a34550dcbb416f79648a5c4306775f1aca041c4e8e3b269e94f960ec0925d5b7cca0ed1934b2b63b9f4437d304d658adc6c0d3e0169c629d50d7c0b5051dbb04
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5ced197907e87c470e5cc1ab08a7eedf
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/b57810b718bbfb1ec48415ec3e727388bb647fa3768ee191d81fbb16248edbde9332086d445ff57ad53e9d62fb9c8fb1f8be176649350f5eb57c744404c63cb9
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/4d5133f794e0b53d563ccfc10ca42e98
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/9fc7bbb8dee022304c4aedb930318db04345987bb7ec9b78c3d488a5616680738ca2b9a9087f60b7d6cc68650234295d18c6cee4a45d1956d2240791993fe45a
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/e5c8eae08bd2defe76e0985687d6f057
-libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d632971cd93131b90d5a26fdcd8a262f2042a2dd59a09c82a8523558f2b292f9a3f285b0a6276f0e6b255f34d855736c0bfb9f426488c5929f2abf6c0b921b73
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/f0fb4b9b0257e0ead2e5aeafebb64214
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/1993c7d6ceb7efd93f2eb21379c046073b7d9d2460d6eab5aca26cae94bcbe07658780a2f6382a052e4d64813614078b5e582a933a0bc9a5d64d8388df98ce69
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/e236983a6c801d33ead6f60140cf1ddd
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/c6b44cd0d9139e0b1d47c8b17e9035099a6b360f873a2fc5c6e84c1c97dd455510f4f4262c746b47910703158fe0ceba0d19b8e6a61117d9723346f4c3e89004
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c3ad2f3774b9b7651078fa3b2dfbe7ff
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/009561d4fecd65e35960843670048b79e70495c2cfc80a7c80614f253bea7ca46d8278d338bdf7719229fa7eb9f02299bf8bc39ace683b862ad005cfebcca0e7
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/6f8d226436a2822eb7e0f25d1073925c
-libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/b63a32b1eb4a8af210f6a9511bcc4c90ad39091a6b2c50431253f4fe5e1ab304b68f79e71fe55e173449ebc96a3395dd1ee55a9a8cdd289950b609a5bec8e722
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/a618c88b200fa25434e969a168b93a15
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/940d6b61162bdd2d9ab5c264c9ba71789638fec646e62b9204e9304c8244d10c8a5ea3603c84bff76c5471e7f3184a21e4d1760bfe05deec80c8126a7207db04
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8a4e4c69ff51c941244d0765947dfaba
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/287e59ff6e8e81e1650796da7a01be568b9ef024eef0505eaa34cdaf4cfd8d798596e9396e48fafa39acab5e70c3a41129917e8ec7d625f9acb896bc4e9e7b5e
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/3f131f5c0e11db8de1e0268708ff17c4
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/04d1371a970694c234880ccd826f6a75615793867a3ba1fdce683a844cac3c9d33a58d34163bf2141624dde71f3af0e3582effbfce679ad2134894f86ac3ce98
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/8636def624785ea4b99d12c0d65bc0c3
-libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b8ae5cc249664d32a8dbc26a2bf180a782f51ba69126d099bb239ee94afdca7b8492a7458971cc91aef0ca55a1ca38d3bf3c8716234ded81319a2ad5ac082732
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/bedb9f6540966fc382de1a4544ce8c9c
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/527ad792c220e491fcfb46de81b9b15cf4f6a1d50cfe4435296e0f94ae4d8e53165b6f634f85e95a8c7369a1e7b3788d1683fa77b843f56dfb1264313f80dae1
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/62051888207139e47c9a0694cf4de5c6
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/034e4e272d09ae8f573d3a7e591f93dc551651c7a32e2b8923fcd7fcf36be5bb491530f4673cb9bf39a54c1527cc3e3ecab64c79e3fd7075209fd81f32f7f4f9
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8543a076a97e6c72e7c514021ca5f121
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fc11ac25945adee135ebc523fe3908bcd5c5a7aa4c2a405e3dba61e0fb59502e5aef3cf4982502da7f7ee1974bcee8354ac675e0e0182f9319ea20c299813a1f
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/6247a9f59f87a2b923aacdc0a7c128ca
-libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/f13adadc653d2f8442c8ee4ecca9563d6cad5f958abf2893d8a3eda331d9ed8c33cd4a52bb721be811dec66b3b5566f038bfebbcfea620bf0094c305cd3aef0f
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/873155e60b133d597cf8c40169c5745e
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/a000e1fe4698d5c19bf85b048bcf76cdffea191ee281b44ffbd83230de5dd93c9efb564a51da082df070f2358d6dce423bf0d6023836217c5b34d563844d977e
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/17467f361317ad56762b7e455d869724
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/62a8d601c8db178cbdaa57a23a26cd65a8f3855be40ba2966b445afc9ee223db2ed6c2fc344ea98ff129d8917c14f34ed93158633521780d52763fc4a4f2a799
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2c094ecef656dc6c9317038b0c5a47cc
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/af5972750be3de00df275a0f03c9c8a3b487a040f9bd29356457bc18661ffba9b3aa909849b24ae1c518fd2975a9b687c33353ba927f8713796a6c8eefa6e509
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/c10497e3f473e80e309d4c6102fc194d
-libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/2349230301cbebe8c7a7d7054bb4e60d991e1798dbb8bc6b8cf73350738e7058a9eb3c1067ce7d3ece1780e360080d00dd4777359742aff924d2db5c463f2a8b
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/15c99e56a9e8ed664deb2d6aedeb7ea0
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c7d3d6d33f0fc0cad0394c02662bed2dd7d5389a6aa21027d7ebee124c3c9f5910316c44bd4485f1d45c6bb9fe12775c697a176602809bb52c8d3cfadf4f2737
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b8d748a34a381d085c962549612a212f
-libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/02afa1db42ff68a3ea0443ab539a7c613e5acb6170f7849cce1d36969ddad36e7546427bc55cd289df46a5fd8e83477b70941b8fd9aba0717dd861c84473da49
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/12f825c1c1586a8f7c9ce56e243b6bbf
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/f6c9cb33f129f1ff95800c0c88152d27e6de3fd78e01b29d75a80df9fdd8d95de70003dee0df3868215009cf434006223b488c64d6eb240f1e18799f529e980d
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/19d05d46cd97714abd23b668693afe4e
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/deb786016835fb34967e474235b1ca9c2e9f0258c88394979c41654fc4487ef83ac622f1e049aed5d83da8738b8f1079b3dbc67ca788f6c68b432d7007b850e7
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/0fee1aea27ac30304228af1f398dbf14
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/e14eb6fad8ef734efd5dae610cc1906901b389c7557853e7fad27c4cbf6c06614996bdd5840ee3640b9fcd8a870ea058c212bc978b6b869f4594cd8b06b42ca7
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/dc14c7faeadb0c42f4e9cffcae8c7684
-libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/10ef07d1e1fe3bcf8bc52da169156ad10de7b3bd54f16bf1d694bd243bc4c86b4244643f1a71fec94b024ffa2e605141eec9b10e6e65dce5d96aee2b454fdb6a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/ee90487acb75a33b77f24fdb075402f9
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/6bc8605021dbb23aa71636318468a1f81f8dbf7308d637f551132700634fea208d24608c4afb28a9609a7a866302597f684d204f718fd8cae10a616abc1b7b0a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/2c96c511ef55496a1044f63d4fdb096e
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/564202d6cd321b8b058124c4623bfa7d7310a5020140f194bfecd44a25490ff9590e661bbb838b1af4f7e40fc15f88363a1510d8f7a2138f8ccc52ad76700506
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/555ea3150d5eeeec54b1d463380447cf
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/9da05a39e8d4d9cffffe85bc2717e105a47137682ede9cbbd2f216065ebdbb6624b68a2e120a1b87247838276cd8a501c83aec63c91673229bde8d207f651f4c
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a1f6daa0703ddcbc87b8f9d17c9ad54a
-libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/e803ba34861b600b350bc99484adb619bd75a82162633e8d80f1456a908d42d95842f194a6752fa43e683c26356592fb94b64f7823b64edc922aca154d970288
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/364b73f29c1df14d8b942183cb113dd2
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/c4966e3607314acbace4b31dc095b81770ac3414ac1bddb43084443191b92b2b96f6702177dec76b70be12f7a3af4797c9692cf872ea7eaf60569dc7fdd92ee4
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/d4aea085c08951e0facaa553b3c22a91
-libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/cc5cc36d50a342b5692144905ae52676fe9ff19054245152e3fff02276250604009881325cb5ef063f274b51cb2b45dcc88db0a929f6244d81cad1f241bd0c64
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/5cdf36e1300bbc9b032bebe5cba7bd6a
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c732ba652aaf7a5f6aa8cd2f39088d83b78d2fe3121c4e2415bdc935b0a3ccdff7f028d3ef50f0b5f7bccff54f1fb5acbf970fc28301510d09b3f3847556c613
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/c5b335f634ec9e663a7c5d54dfeb1967
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/51c7b1ceb0e235d9d7db9727eb7744cbd8b2e51e189c58bfa6d3b65bc4b6e7a8224e8b7b57eeeefce01c7f65a4df48da97a975dec61fb000d83d23d15737728d
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/822be345af871cd1d5e595b2a83bedf3
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fda0ff71c7a26e783436da214acc22842fe73df1f9d1d526955f4acd0794c3afa8722df8e4c9671b11948fd96e4c079fe525c9bf3e38b5119a79793a22baf16c
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/1201b56c0dea9d1fd2a5ceb4d62f78a9
-libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/550c041f495a2d2439e6c4abcd4db6da06702d32046f6574f6a595fceea467ebf896635bc70d9c3e41c99b42404c87d98e3cd76a34b0f959d21284e3e4f15941
-llvm-julia-15.0.7-5.tar.gz/md5/1ffb5b00586262196d24dcc7baa4a4c0
-llvm-julia-15.0.7-5.tar.gz/sha512/5b5c93b4359cee649974bbdb5c3c191cff5ce5c3862e7cce00e2e35dd0627bf50e0aee454e67ea0fadd21c36065b7c1cae6e77abdd512fab70b71899d369cfac
-llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
-llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
+LLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/f8c2d285a6db7c3b89d295b32b78f07b
+LLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/99d923fff09b70093962cb32d2a12a2d2355824c1c3404900d593cfd0e95a4b52744e7d3fcd22407651916adc2e1534637437630843762c3f2c0c650881aa0e6
+LLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/2ad6bf2ab91cb75bc3bb627b1859997b
+LLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/bd06a3adcae64700f4051a18705e7937539b3cdfa61dda38260398a8896401a267b718594631d71afc68a3b273b0d05f6018927c3a08c070bd6c45d53b19c78a
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7bc3125dd810bcc44ea2d454b6caa683
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/86742a4476481b14145855ead8a5acc6397782f6d3445f900ac2de0570f1fcf53563cf5e1f3cb59886282083ce63756604f1ca2434e9e427cdc1bd1f68373581
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/4eae06d9e6272aef23afc191501810fd
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fb75927982b1428b05b765bd5ac017b2c15d89990b7e6cb582b9e1a3ec04d09801d25d5cc6c037a12c205edb7c0f7a2d33832a2d1de7920711e9720dc3ca3655
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cd86e18a63cd6e84a1493acf0df4e267
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/1dfefc4600368467ab90ccb527a9fdb012b9b7f485d932a0db8c4b1b81985fad931b74494b76ef2162e46280447d39a055b5681b33a17c564c50094de29aeb13
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c7cf7daa7c11827ae4f9fb2e16f3cce3
+LLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/dabe2940606a671a8e3b4f28bb9e813d000650203c382372142457020f2ccd498534903aa99320afb7ff960a62d752ee6cb724e74745bc1bad1051e12cf78ab4
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/62e575b89fd92d9206abebc19b084abf
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/7ac029567fd68fee82b7096e2fe278ee5cd2935494433b1faace036469c54bc471d614d0bb339750429dd88f3e723165d2dacaa627f73c3647c6f3b51a4a3034
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/5d39ef811bc78204ebfc7e98111469cf
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/10fc9a64d63351e168bc79fa63bcaa6fd49c8483e5ecc40a66216192588367e9b47ec3ea2c047e88f39ea8f1caf8052726f4bc8858223f7744606156b4133970
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/f072fe487e5d1b717aec49a6244adf05
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/42b03a2562728ac86e751abab2e8233d583baf006e69b107d002a9258844ad53f62e6332eab3790364940d478c7ebab6d3e0e2194220e8436f40e6b75063d1a2
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/eabf0239298f13ff4893011e75828bdf
+LLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/34724d9c9a550c85d406021d7265e1848b002b8f212427eebff6e8f03ec6acc336efb0c2cd9d9e1c76329e7c84a84a9d852b8de5897550d957e0e9385129033d
+LLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/8b736710b2c749fccf0a782f3b887ec2
+LLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/d7458ead5a604781a117e54a03dc6f3fc47e932298c68af425a6725ef4767bb512c910316818081d5e27d9d08b4ce1792d684c0014271fd492eedaf47acc5eb3
+LLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/md5/ed0487ad3494352ffebfac51ef947168
+LLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/sha512/e13082056be94335b1f4253afe3c4a25555b6bd10c5d68052f01117415dab344a3f883a9f25ff4ac630262756dd15825e74395650d80181c85c0663d7028a9f5
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/1910b5daa31db6542f0c762901ab7d43
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c43e8091e9946ba1d8849734a25b258df95b4759a79676565b624930d4a19805a78b66b1d193e528f95174d909d7895d4a4e49fe8ca298a24dc40d25c95900b1
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/a5198b13dc75ad3454e05aa6cdaca48f
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/9ec8078a1a7246f1545fe074783d6b88ce9b50f62b0438ff5637f6dedf5bcac427cc252c350354b7063f79f4e31a19f699c168c15bc6547a207da497026c2827
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/f569654ecdd8ec2a50986ccac8388c69
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9b50e3be1577a753f0ce42704846bd126229d8dd9f28bfcbda58c4f18e4b9ca4ec6bb9b57de61b3b9af8157a2983aeffb8af782a073e5e19a8ccc261cbea9601
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/496de8c9e2361f44ac6933480620d07f
+LLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/02a8ecfb6e81e0fe07fb0d616a84a590e23e944588c18348c32265bf6bf19196beec189a0bc40514e379e97a9c8bef83557260839800fabe9f8e39e96689713d
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/05bc7406fd0a703edbc912bb3230eb37
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/898dd4c19dd0f22dcd1bd44264daa8dc64340c890c3368fac7451da1ac872a687d55b5eb50ae4e156c2dc4ece226ec05775daebafe9d8b53eb83b72d2986ff92
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/d6ca30fc3a2796ebda2451f80846883d
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/d7dc96e1bbca38272b1ca78b3ff995fc30434937a58815c63d0a9b4a017964cfb269a1f3203ad8374870257152229941d420f098644375b5f4d1b88fe39e0dff
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/6eb1a197150ad6c165b82c5e0e0db102
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/a159598c2bf351ea79d01e8a454a82bbd9823c080399520af3182e57259957ad07834b03c336e6225857da365e8ec1aa9f65b0ddd0821883ae817cb81f8e6dab
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/116d849cb2fb4b1c8c517397b2b04192
+LLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/7b2596c76d2814fc30992ba78e5c8f93519442fa76004187de9830732b80bfc6c77f5d7aca042c20d8f868cd682bb6f71e3fa32940bc8c7b401753dc4ac2f331
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/27837dc854a173bd37a20f92383f6913
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1719205cba6de969e8724a99444bf958d5a7943ae90ee2dd11193f56ddfd4f0edf6d9af6da2e67787a64b91d994fee76bd8ffde36486c5229a980c2c4ef07e29
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f0016c21c045e205131ea22dc711acaf
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6d192b7e21c7ee3327d288b890f4c5dd03e5f53dcba6905a34cab96b7ad0ab6364f5271af88d95e60aab8f569a8840d17e16f27f6fcdafcaf537d5d4a651dca7
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/9a2bad4518966db29e37e7c88388e779
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b9a10af9dcbacf1f129d4e9b4cf562a6a4687252cc8a0fcd78f52d75c0c20be0ff32e67413a7902a628b04e7fac1091d35b64b145e33814899796009b6ed2853
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/77c4e24c1e44ce14bc6476954f294a15
+LLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/d9d90a4ac788dbbc1b532623a380d4cca8813ecdf8b7b4a8cfff769499e50a1433bac618234bd0765d8a4f50aafb3fa724d16ac71baf75ae5a2b4396fa2bd017
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b29e36dcf5a0aa05734f1d6a0afd6944
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/ab46a835f9843c5b3427101bcd0c5d2b8acf79693aa9b8d4282d499f25df4ca248a81fc94ddd96c75d69d3c6b3814b225eed81bec32fbe9199bffdd605f7fec8
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/a411269f925cc968a0438562262e6d97
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/04f275603134b0ea0f23da377e4983765885f2b1954d5c617134af9f103470a5e50dfda18bcddb836852db2382f1c134db40df00b36c8bd00e7a9e6ff1a9e684
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/841921e33407e15eeeaa76354aa2b737
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/e1fb8b75e141cc90916c5c81c31ee91336911983c525f38eab86682ba69679dfbe1f10c9b673323632fc75f38cacc2af47a3d5d5d1031ec9a2a60cebd68d501b
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/7342a1d7b1d2c0fed7f5edf1c331ffa8
+LLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/dae8ca11fa8d34f99ee19a95bcd108a65b9e6a6ddf2e5a9b126f2ba1b1cdff6b7ec21e9590d70b3785593435bb71e47703d9765811db814a90aa8a47940421ff
+LLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/10aac489dfa10a77427a82958f525da2
+LLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/a87f721df4fc5f6e929a54d8e41e55fb366a051a610836923213bfa42a7f1593de880391131619653cc3571bb76a4c82e011852ee5a6005523957c9f0937e6ba
+LLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/7f231fd359f9297261c22f95d8f738c8
+LLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fdd6441011609ef341108ff2d108c6f320d415b621a69922aeacc555c3d1ae6090a0f600f24e229a609b88ba9c1868900791a6590033b7dad333ad11f8a6365b
+LLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/c4523a485082044553e1a89049dc4734
+LLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/db365e63bbb5189f7f348e2fd51e627ddfebf838ca9dfc6c0f8a7bbf6b8a2a03d78ea3ccdf08b0c2674f4cf5a0979506efa643554091ba751f16051bdf42ca9f
+LLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bcd10e4f3e5a4b00d52441e0094de1c9
+LLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b17fae89a3dfaa9428cf48c9c0866477cc75edda6aa3800702227cc9e3d6ebaacbd60cccc96acb4ccde56a2de531dea5a436bac8e6c450a4674daae23b878037
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/6bb986b1c9b66ca24c976e6534726b00
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/1fd7cf1c80594561a8b83cf993192299e8a96046bd1e2f6eb330898c5e2dd0fc7c6ee0e3115d4e4049b83c71e724fab19a5d468e72fd141d8a2c4c02d831b71a
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/c44aad21aef3b92fa0b1543ab9e4b93a
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/1aed6fb716a576b132d13397c927b36f00d78a42e5273168f1eacd208e366c55328286c56bae0abaf2c7ee424e7f19f4e096cd53f7d7caf863a0d58de1a2386e
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/c3494f146906e178c5e5e32c10f6fec6
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/a0fe26f88492ce8416257e76a5938a65b4911822c9c3e3bd0e3455adae1beaa952a769d616e8f8525c3bac64a6e3cd7f1dfd68800b5e7db94ad63320a2716e2b
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/4644616c2e8937169500c200fb56322a
+LLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/1250c5c9541782dabb5f0063bb2a18ee15a5dcd0e8b675e78474fa7dce2d51dd97e1bc4eee0a526a73f7812c57e41faa85e021fea4de74d33c62ae67ca555d73
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/b39ce0b0f143c3bef4dade99251003bc
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/62148e1e0a31d6b28effda0a5016d9335005b27ffdc5be1d184efcbb13f13e29eca52eca19cc6800d1d0421c0e67a36027e05d5fdc967dae686b5bfd112fb2b6
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/9475748210eb5b1947fe3aa6673b6c29
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54320295e59e5903db558b6be0220442dbaf7ea78e1612d54a35cbe014541b354ea708679da00851b962140b6da77301e27b656fd478666d3f0f710382c13a85
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6a533054ccfc3d1b0920eabcfb45ee03
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/3871620aeea2ccaf6e4b17a675c5504624fc6d8ed57bf4e5b66e0372b7124e4f3d1e0f10baa1018d5a1ac5bc4bf0e9d2143e84827712fda1f512fed24829f1b9
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3fc6d1b7d59b98823d6016f97835b7c5
+LLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/745942235e40f2ab71a5eaef2768842823620d4a4dc7454a7512fb2bd95bc8a74323eec6a4b33edf1ef935151c18a20172f60fcca2fca1ff3a37b1e019ea4640
+LLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/4bf72195bb2b3fafd98bd3f1966dfd0a
+LLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/6554fd0374875428d0479e192ac3c70823a1143ac9acf0fafb3332f6c03e7fc8d14513512152bc995c186024bc36de77c5e7895ac1382f962b22b1089c3cf176
+LLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/5631a8736cab900c3fcfeb559abc54a2
+LLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/55d93ffcc0125720f7db379396c5a79e98408225aebebc72fdd05b38605e73481eef46c219f59088b3bdea6257a7a7e369e6e0110019164374ac35bb49897738
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/28ae362155ce224cef605cee53e36d0b
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d90f25e57f92a9da68245ceb15316e3868bf657d7e744f37cce5ccb4945777ec82fc5d470ba4fc104fe7aaabfff7b0dc260838a45331e4360b0fd14c59a55666
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d10ec63510dc1a043ee0a4e37b49eacd
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54c393208d1f51661e631cba62a21c0685fb58827067d5ea7c42fb3d6dd8c8db99d8ee1b3c304abc25510bcb0265d86ca03e1ce19be4faa252d97cfc8a1b52cb
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2c1e000206c9e7c6c8e7515eb8115e3e
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/12c0ead798e43448a30699b5386b3d88aac49aaef9bae283ea6d089a1c66df7293f4f220a2b5c3d96e73e556e37e745f38d81f5c68e09a86a2b19a6695eff460
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/21d6c5d5e422412b88ffce50862efb29
+LLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/5e8e17ba79134e9752c7fbd28b62e4616574a5e1dfcb0980160a3aad28a2f6cec4e48ed1acf73ca1f94d74397f7ee3eba53cb1280699e40c451295590ede3fe3
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/293fdc43431493f915a3e0a5b3c6d587
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/27e13a4334a3bfb3c91fd06abcc4eca7a347f4bffcbce40834302d153ef29756295121b42ac433c266668af1428ffa08ed12ce75f21fef44cd7ac1d8bdfd155a
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2825dac8280d0563b7f521a9eb8c0563
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/7f4549ac7b63e58d8c149f6b22bd997545713477a1df3b32adf640f3951580df1645f08756d9ba80c479160cf5759e3f9372396655a35cdca14f4be4afc4ae22
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0c0da0eccec4a092fc0e9a915716ed6f
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/e538e29c4d52d9aaf151670619702541fed8231ae4c7fb9431a425d10eea95433087034a37da8fe468bd27a1c882f6f8eb9549ef71964124db10e99f4b402ba5
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/6b4fd19277c978306441da3b58ab86a1
+LLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/6216b3e1dc6aea979d8b5abc4cc0faf510e4e64441b1d18b4b36c45d65e874e9046e14eea67efb88f3219449ef048d34fcb751b15c59f8a299aa822b426d50ae
+LLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/689ce55ca1eb1be8090a7dad2e5f1a86
+LLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/a2ebd80e71375abafdaa45d4d104c1822d2205bd680b8c8541aa90dbc54d530e348a64a18acfba14cb66c078f0386d54375bf26cddef935a348e874b99609312
+LLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/dbb26e6bd19d71607248446d38ea0a42
+LLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/eecaafa95e1df14f57f93e44732a23b1fb734af73bb533c8b4662dd0ddcfe696271571b97e2a5346581c000336f9fa0b28bf1c92535490e5174649a7e01b6019
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/51981c5aac875046101670896de92c2d
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/466da0868068d27dfa8284a3431925c9cfed9314f681bbadd0c331ae67a1acb975015a739abfea239e7f93a2fd7d439601f5d8421d7fa4fcceec5730649686a7
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/65da06ac7ef16d3e3ea6137cb9a943f4
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/6c70bcd54d1cbe502b7d9db50a59a62a8a10e4e90d7d607d61ed7737a70474aba2db5f5151b1dc03f965a84d8770d4be6f248ed1f4bc6c9e63298abecb936f1e
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/0a4cefbd15c37cb418cfaac56b789146
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/7fd5c69bfde6264ae4e548ec9c399dd09b1a5fe4b9cced23d6bc4257f0f67874b838d53ee8d6eef7fc01ee9d086758e06f00bb0a0388b97de2eb85143a47192a
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/da2430483844823d31bcc5f302252ac2
+LLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/19e9168b44d40acdc0d924e16f93c315237207a4441ae78997c511135872e557f654236bc859453069671145e81e961ac93c9dfa601d1b6631b9ccfa09b929b3
+LLVMLibUnwind.v19.1.4+0.aarch64-apple-darwin.tar.gz/md5/aace388fc1ece82ea524c582506ae931
+LLVMLibUnwind.v19.1.4+0.aarch64-apple-darwin.tar.gz/sha512/c0211340a05630bcfcf9e3bab97da3e9f07e596e8d391427fa919c99502ab0a09878eda379254f379511884347f7e742872e8589f9b6ccbc2d126a5dfe0a350f
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-gnu.tar.gz/md5/942d0b4ffb8bfd743cdafebf5bdfdbb3
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-gnu.tar.gz/sha512/ec68df054c6694d17cb7f5c389adc4b8b855023f9ca03713d21f1f0c58de2b90166a9f3981b81da5f817f6b09f85fb11e85732d6c78f1d115d6aecf326dc20a1
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-musl.tar.gz/md5/2c27d3c130f54e38e6639ebf7095f743
+LLVMLibUnwind.v19.1.4+0.aarch64-linux-musl.tar.gz/sha512/d348cc1f87927a3d36cd3f2587cf4161dbdc9f3555900ee338857d806384c0cff8fbe67bef97cad0d3098cc8c7f149aac699f3defe87db70fffcc94d681810b6
+LLVMLibUnwind.v19.1.4+0.aarch64-unknown-freebsd.tar.gz/md5/6bb1466d45159193407f27201a443ddc
+LLVMLibUnwind.v19.1.4+0.aarch64-unknown-freebsd.tar.gz/sha512/da6da450e6fba5d501be13d83bc9133796b92e1b3a6cc7cb97470cc7476a369fcd8ddbc9267f03fa4cbe1f2484359eeb70fb629b26c9a1d7ea0065c5a671e1b9
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-gnueabihf.tar.gz/md5/2cdf57d34b1db677498dfc5d89501599
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-gnueabihf.tar.gz/sha512/217c15e1bfdc72014dd26321eb46ae9cfadb7839c693caf3c974989ee2036781cf7e62bb7175766f5171bf32de53a95598ef463c70a0ac64ec012ca9bc19e6df
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-musleabihf.tar.gz/md5/110c80b549d1f80faa36a3e0b39a11b4
+LLVMLibUnwind.v19.1.4+0.armv6l-linux-musleabihf.tar.gz/sha512/b9151aaaaae4adf5da5701ee5962d712def509f85101dae485b905f73391d8658b5a0a58ea1a4c68cc3bc68d7e17d557c05c98d33d907cdb512513ffff75765b
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-gnueabihf.tar.gz/md5/bf50011ce9e4c82d49e61e868b27ea23
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-gnueabihf.tar.gz/sha512/d08faae71010e4a7d25a16374249ff1740ed7883e260e544e4fb0f0d3758d2eb76fea93433cb1987850f54f1ae6528b6336fc2e1db9b46f49defd870e97f8a94
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-musleabihf.tar.gz/md5/142118a84c1b959b0b202d51072168f9
+LLVMLibUnwind.v19.1.4+0.armv7l-linux-musleabihf.tar.gz/sha512/71ac937417f5f2226b8952c925fff94b553de8a29fc45fee6c0fef53a9cf8c07979c60408c8efcf827b260bc3a287059aefa24e050393f2e09b65af45b60d07f
+LLVMLibUnwind.v19.1.4+0.i686-linux-gnu.tar.gz/md5/1bcd011ba209cc840647c684dcad9631
+LLVMLibUnwind.v19.1.4+0.i686-linux-gnu.tar.gz/sha512/8309c3d82d0a94c4c7a8b72720702f5cb0c97f316492217f1eebfc0dc33b4e9c7c8af5c6ee3700ea0c1cc0fd66c90a52389c2aaaaeb67f6278e53e33a476abc1
+LLVMLibUnwind.v19.1.4+0.i686-linux-musl.tar.gz/md5/8db27a7ab4a23febfd6a8eb2f65cd611
+LLVMLibUnwind.v19.1.4+0.i686-linux-musl.tar.gz/sha512/dc7839d2c9a258b122985eb35096e0000561598c54fbd1c5f269921146e6e85589c6f60a0fb964ebfc78af703045373999163253ad2c8f09475bf6bdb923a59f
+LLVMLibUnwind.v19.1.4+0.i686-w64-mingw32.tar.gz/md5/7de74ebac40c9425f619c7f8b309de00
+LLVMLibUnwind.v19.1.4+0.i686-w64-mingw32.tar.gz/sha512/f28f4e8c25cdc06c8d363735e1914c748c150a962c37dfa8a45a3ba514d3fa1b6c551809b8d7f668b258c3165674f012ee6a18f36421e624f38ece27db755a3f
+LLVMLibUnwind.v19.1.4+0.powerpc64le-linux-gnu.tar.gz/md5/c5277c6c127ccc5fa66867ddeb6f93a2
+LLVMLibUnwind.v19.1.4+0.powerpc64le-linux-gnu.tar.gz/sha512/b3d61aee2187c185be1b1b26edaccea66da750931c1216db1f3e89393c1d2c101335d791f0124282320084e697386f395951035e5071da23ecd55133fad472fc
+LLVMLibUnwind.v19.1.4+0.x86_64-apple-darwin.tar.gz/md5/64d459ec7cb7d70b89f5ed62a1261425
+LLVMLibUnwind.v19.1.4+0.x86_64-apple-darwin.tar.gz/sha512/861130348376c8a54b2aa8c86d9d338a4b5fb88d3d2745578dcf15e0f477f518c07a505ce86c898c87142a7c5bf2e1ce43daedecc386a7f3bde67af8e6a56e64
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-gnu.tar.gz/md5/2702948c4171ad35f521e15ee4ebcc8e
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-gnu.tar.gz/sha512/306759ae9064a9746474c53b674eb0b9da7cef6271094009c3244542295ef7a86cb77096b4a18dc2e50628c6ab02e2f1c6e39a1401e86fe4743410ae8d782126
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-musl.tar.gz/md5/a7f9ea5dfbd4760b5a33c97581ad4b95
+LLVMLibUnwind.v19.1.4+0.x86_64-linux-musl.tar.gz/sha512/08add6b1a4e90f50fbceea6d72a476fba3a2b271f44bf64f06b53f35dfecc756f71843d54d0895a2f62d56df24f3675619cf3220215acb2e0a574696c6fa630c
+LLVMLibUnwind.v19.1.4+0.x86_64-unknown-freebsd.tar.gz/md5/05f5b916fa639a68096cc73fb82007f8
+LLVMLibUnwind.v19.1.4+0.x86_64-unknown-freebsd.tar.gz/sha512/0a137168c466861fdbdbef86dec96ece0d4c10f87fdc2dd729b445deb0fd59b214241b62644da77581a0100826e07dacf81fa060e67e35ff38df0d6807cb618b
+LLVMLibUnwind.v19.1.4+0.x86_64-w64-mingw32.tar.gz/md5/bb073cb86c821a70b845bd5de0edc2d9
+LLVMLibUnwind.v19.1.4+0.x86_64-w64-mingw32.tar.gz/sha512/24d206c65c7be34485a1492250a9ca958e70be7057b981940bc24c4822e50e3963c9f88f42892ba2ea6df17fedb2783ace1693aeac74f200a5ca6033a14d6cb9
+libLLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/f7ce9539d0802dd4b5e5e673d36d1a99
+libLLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/7a54be16ccc327731c802380d29f2c9ee5e635cd6af0b7eb6b69e9d3b0b4fecb74147359af182def3b016ec4445891bdb91eb0d541b783e451e8263968c25161
+libLLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/cd946ab46745ce71ad7438cf0f30cfd0
+libLLVM.v18.1.7+3.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/15f8bcdf6f66e654d5d6e950392ced62586e2bf7c2b0845db78282669c5440c2140432950c7726fcc8910c7113685cc29ac880de565f85b77536d63dbab0a8b5
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/766a2de98d275877bb676ff1f23e972f
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/3b353ea038fafefc13ccb4a81c7242d569c206362605be374fb312cb495f385796d052c3a7e08c7fe6ecaa3018e2a7e3dfa43d71a8c3a94987f7dc7aa378fd22
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0684a6b210b799a8a0f45a286f3dfcc5
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4221e2d74117bd7e89aba2945030c1507e51999b236814fd23036565364c328392e87032daf1b9fe274ed89fcf9a6dcd203f0f1c8602c2a08d3fcfa189a5fefe
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b460256e923637e5107d67859eb60ba
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7d3f2736afe4022842529b1355cf9914b7a1c7b1e261f814a4523ad30a0cf0189056d5117a06720bbb7a844a435bb632ddbda2daadbf7e01c0120452cd13e6a3
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c2b13a6a296adbb4be91dd3bb5be0877
+libLLVM.v18.1.7+3.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9086937e718125afd535b0066ee08a3523161a94fa7ef3c9a3e86bfe760f251b6ea7b035888e61a0e7f192ed25c9bd0f4dc153df86e08569e7067a7a30ba48c5
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/758d33fe0b2b3d0371708614365450e8
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/79a662f72ba1b89b373d1d143ee880a12cb128211e79182e7befe8b3e50298b594de2ce489ca8bcdeadb17fceee811622f8bfcbc3e232cefdaf9927177469eec
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2dcbb811be8985bfed3c8b37733c0d40
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/17f6fbd96ed5029f360c101cedad127881e14b42498d66f717448d99ca1909057ae79169d934e08157edcc7467db4b3941bdda26a2e9f42645963eec51f27e29
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/bd3b904b5f9464aaaf87c41b899c8ca5
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/fa99e8025419a18f548f658ea589771c2803480c3cb3a25cfb75e26ed0993b7b37bba204d7cba1475319a71159813b2b58a3b3327ba24d264cf80ef24263628d
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/b4f9038d5c3c13207111ee1a9a918cba
+libLLVM.v18.1.7+3.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/e8b97bee30f597cc06d31175e12f0c2035aef0054e8abdb431f31b1e9d440d561bd9bc6637a403441aa7f3e1d2a46c600734e17e3b7ed0ae899c92df91758780
+libLLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/1f59987d027a3bc930fca6bef917f739
+libLLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/7bd0532e11abf1c4979e59d513257d53ea940f15c08d2fa30dc16e59e11d1899dcd2abe4a35dd3c7719aa49aacfa1b0e49049df3548336e5ec64355319129b30
+libLLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/md5/e4ff6f08094846700acc4e55d5b79e93
+libLLVM.v18.1.7+3.aarch64-unknown-freebsd-llvm_version+18.tar.gz/sha512/8a575e9640e5ff9b75ef4e970f203139e51afbcbf1b82c774fbe4a0176c22c51029533c188fb89068c1714eb3c8b1b232804f276a68c0c40aa0a6611ae72d1ce
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/06d8e634b4a6914efc18b7962df52021
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/cf6aeed1eaf652e5830e34dd2ba88abc33668953281146106bbfdbc92f5f225645f00ff5b4a0eb902baf904362ab4eb32192fa50ee5b2672e8b031fe2550f9a8
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/53e83804b63e6ae4d0f1c97abcbbd1c8
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/45b3ee9b105ef2ef106fa8ac7b8e902cd1d6bf3c9bfb57edeca9e14f1654714d23fb086b369a9fd3cbb828c04fee4cfe80d2b2a2bfaa852d3ac65c0d213d8c62
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/91b6cf00564053d385e30b34e5b8778e
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9111f3f02b49bf78340c9b0c5c1325a1ca09b62c83aefece1121573dcc21dce095060351f18997971e5cfbaab346cb12c75cdc0fbe8fa92aca2e8a68b5f5f577
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/f6c91b71dfd73c7301a4e3de48e072de
+libLLVM.v18.1.7+3.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/581d7e1e4d85aeaf082fa31555074471705e391de0771bf66665807afb5192c79c481ca30e73a25f4e2d48d4d325f0198e39bcbfaed2c9bc7477ee917667f5ce
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/ce41ee46959e5e3a17b6c99293afedb7
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/73d8c5af750ea9deef822aec58d8697243ca154bc4435ac0b0ab8c90fc97750e91fa55f8de7b8283eb1ab19951cda3e3c4c60834bcf13730163e593126a8eb57
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/67ed5b654852dad400aef17fb542703f
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/07f70c57e27eea37f520f6f0a954b54d2506530d5eb5a74e5a8526ba8ef55a948073c49037544b602d03d0aa482704292eac943f0a83421386ccbfbf22ee8510
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8bd88d49ce21e5b63af6f77782eed4
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/cef1c561ae388b2baa08e39dc195989cb795d8a2747f5f11e0dc9d9e107b9e99dbba465335376beff2e1b326512f6afc962775e0b246f3edcfadf509235cabd8
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/5fbf26d20b2ce3f61edc9a9ca2eb5284
+libLLVM.v18.1.7+3.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/2c564c95d648458b9a0f0c963246cf5564c625107682f680390b6db5fde0e2b15a964fd3fd23734b5b2bb135db1fc698812d61b3f275710593f4defaee4a9c23
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c81bc29a75acf4f806f3eb13bf890604
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c8c922a0a4fefd549f1c2ba396a3cab9cf7738aa82e7ccf7ca29c090260e2d73ec45d6f2b07173d584f6074b10fa04052114deef6ecb6f53ea87f1924074137a
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/1fcb40ba1a427105b4e7d13a6c11dc78
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/392c9ee85ba7ab6697bb8979c7f443d1d25f7ac9178e96a886401cfc68d75a43ce98bf3038a7ba70a9a990f65e604d38e043472cec3badb25fbd1b38cfbb7162
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/427a19eaf69725d11bb33f48de9cb205
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/542e209b10c13d8dca867247a7414f84adb832f40051fcbdf0dcb09bc9664a77248e1b0ea1687805847dd9f5a05b86475dd76aba427c9a1bc83f8502444c60bd
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/ab34bfa2950014936edd13a7b5db8170
+libLLVM.v18.1.7+3.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/6376b25d0278e5c97581480fb4d54371b09a08be88f4cc39d2c7b3875f1189cef60c1be6bea5e12b0cf306cef8b394bc7d00f8b0fd95d749bd1b4eb318af7e15
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/cb6300fe87fd7cb9840f3bc44af26878
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/a7984cd90fef55559142fc05d91b0da1f37f77f25214e93ff7641b7c3958f08dc7c082611915dbfda4bbbaa392656ac8604d4f75369777dacfb78baee2f99b16
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/b8a4e8ef43340e9cbdf5e4479c6a5a56
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/fc249f2b666c8a8129e05ea08c773cbeb7af6d37791f271461eedd99adcfc5082e8609ed096d8a46edd1e73505352712a41e0ddc247a371f78227aab01fbe0f3
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5864689df3298be4b1b4df1ae0412d3a
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/8f32f73e366c3a6993fa8d6b8cd1a9391611b0644cd4a77a4f7a235c037fdb75308d99b5a23ada6e4a73ed5fbd8f929a981d6bf317d79d52396220c221619303
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/6bf798476c4e94716cc47a95580104ad
+libLLVM.v18.1.7+3.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/9dbd27a000dd3c3dda9047d366a667c4b179cc61582525adb0f8227e8055413ce46efcbc1530305400239656e2f1016fb8833fb7f4734714078e035d388f3531
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/66e2889f86ae6bc1977419e6d9be729e
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d0cac798c4979b4d818d36596b173e523cba3f41ff7ab1e2111f6a75c3e819e563e207a547328f005c5a93c7f8f88c17bf43c1139b5c2690df4f1d719f82920a
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0534b72d6d33c8573f79dce8a2a5a6e6
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/6beaf1b45eec8b46fbf92f692f53e6df40bf48e50589aeb5ef99240a5a3ec9089ffb350dda6df24530937d613bf6d2cc4da76e92921ea00def9d2d38ac5bbeba
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2cf9a1ca20472179ce4a9eb3a949457b
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/cebae06ccee12a14d20d3056ce0519b1e774e3c9d9200a783262fcc40aee6d7aabfb08714bf53b88e03d8b09a96d3cda248a70c16188f8c707b291642998262a
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/4712f6a46e0ff407ece958a7701511b9
+libLLVM.v18.1.7+3.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9a0a2dfa2076b93027f766277a6890cf94d67c131697f74945e92cf13ae64e84c09d3dd744498986fb22ad5e5465300aa9c8ae6632fcf919a0932515edfcc1e6
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/274c51cc4dc133d7470ef82987b78df6
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/24944b1fec24bd21f2f773480c7783975b2cce5ef9909f285c959d954669b98ae18a174126440c03de28d1fa9b055f4bd092104dcb29d8c0c07400dd8e4cb493
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/8b36d976399e4b603a1c4f8bce1510fc
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/4f5a1169cd566898357c98f86786bf86f6f1d9282327f8026c7d04359fa7148f4026ef2de765debfb45d4013368cbf420e78802289ceea253a9ed2f58e89db8a
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/121a0c243591d8295fd3063821569e01
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/e55fbf36802e7d8547e1aa0f60c650b29cc3dbeaff67e6b6a095e0647d6a8c6f55bc7cf72daaeb6f3d2e87e831b3cb275d8c3b4beea2413de8a1cfbac4771ec0
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/7af4fdf2475dcf896750e046edc9fd2c
+libLLVM.v18.1.7+3.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/e8294e855565109e70d0596402dd8b7886174034242cbc6deb55f481a306c85ed9840732b3cb346c2ed5ce10a3d42647f2d1a97d2e998805089533880a326197
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/bbf060d61b294b86f7e3dde381b00b8a
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/632372d41f6e400a10fae27c6cd06a5a344cfb5902cad7928cb4133f14f36f0a3373e69e73ce9baf52f518340593c3a5a16173ef59a1878e6300e9975aeaa157
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3d730b713e01cdb5a7a5a46028afd41b
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/052ab4fa7ac3b2c430601753ab078cdc9fd6db7f65ee0b76bb05473f4c5b99ec8919ad9d347425f1928cf619548e992c86ba97f9994218f50bca617e43d2f0d9
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/bf9dcb92ba8c031ae62ed4434fd5447f
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/e53be14dd02a2cef8eccafb9301d29c51d652c635703529c1444947002993f6639083eb8bef13af21c9796717ce4b3129dcdcbe2751a1173d39e321db8f6e3c7
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/b5cab0fc7c6643c6dd161f1e553ef1a0
+libLLVM.v18.1.7+3.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/4032634449e2669479761c4323096b152f8df4948e3a97eea10f0b400fbf2a00d1edda59b74a714b62c4e204b113d8ecda78d828c3344ebe8bd750d14b3c4c7d
+libLLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/9f31ae627df95fb4818d8bb96e17c941
+libLLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/da67146a80ba3615e5e46455144c5f4a25919e391aadd3d63c9c645b639d68f8883a61e947b767f4583f666e653721c53d5d4098c8af2abd81691f941fdde686
+libLLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/55fc5ae75087cb1ff1f08a1ef65f8b94
+libLLVM.v18.1.7+3.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/a000c0e349722f6b0196cc9a10aff8040dbe6a679bd79787c96c1de76968df636ab79dc24a31e4da960502858514fd74c3586c37411381d7ca68c5474576f7e0
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/69564913bae176a167d24d3291ef7af7
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/b8eeb86b66d767218e59671bdd597623238eea72319913c2ac5e116faec3f4c13739a24f3b95338ed857ec29e714dc0308e4ddbfe359332b3c27ad5235052342
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/bc9d5637fe30f21d2231a98371e798e4
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4efbc2823322abe80d0134d35926767bd9cab717cde9308726a6a8891e5a707476138888c695ed399e3dddb57baf17abbc43a0a338cea2e5c0f472ab427c12e3
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/8492ff91e6dbd1a66edd8aaf0390a582
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6443bd2fa9c5beecc2b002c26595f2cf3a8e2ea5eb49aa4c00f7252a6623fe0f8c01824941ebe5475460641285c4e56a5203056c1b93a78250b7e48fb5ac9e00
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/6918c9978fd8b5887c66eee76950478d
+libLLVM.v18.1.7+3.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/d455a4f433bf3ea1b5100b9d45199bc785e4b6fbc7659bf06cbde6ada471134e7d4243d3a3a1f71d579126ef8371d70e59f174e124b3ff8d4842e9ee83e2dea4
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/075f87d106dd95c8e9c6e7e157b5e9db
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8132379d8f44a21082c7a90f58a7dffb0c6ee725efd58a959d4023787411b080d72913bb1e89a35072f97aaf1ca512ab1d027b37eaed819e3c053d7a0cf64269
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/4cfc2838a77f05883f82e50b3723dcfe
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/20079c81cd6a4020b087485be1ab4928b3bd3e1a53728cc98137a35b969484278093bc75a9e51ddfd8331556577c5fb3109d74dc2eccffa93b5390e0fabff2b1
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8cbf00631bd4540b7335a86302a1fe
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/51ba9a4b74b740905cee4baf7f4e5f3620ed81e0746f49cd352d874ebedab95277c5031123f880c9239b7dbf505b10f6531f79c8a6b0482a652b8324f4137cf5
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/11010cc2d58b1a8c6a6e7bc24df0c0db
+libLLVM.v18.1.7+3.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/a6bdd9a2a2fa9a572e74ced69c3ce9d1b84cde18155ec9bc7dfbaba411ee6c43d229e6fb333eff66fb63b632b485b46b7cb1657c0c49d9d9bb849fa13f0bbc7b
+libLLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/566390f0f0fa92c4a9a400e25e7086d0
+libLLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/31981cc3be65117d8dfcb0254dcdecd79b0f141a61864db4e50b81fbe7a1db431b71f9ef43bbeb320e4ae33bb00f2db42d83f849ce6ca5044445cd5de9572566
+libLLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/b753aba58a0704da416bb06cd97acdd7
+libLLVM.v18.1.7+3.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/99358ace0ef20138284c3f8b28b46dd431b460d1c92034fc918233a266c9be398eba63d1758a388fb39935123c65f72969e01231e54b27cff771cdabef9171c2
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/52cee10b0dd37d9a4487d3762e1902c3
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/c44d305ffcb2939779a071a5a78ca9469654e36c5e4cf3e0e78603c85ec30eae3c8ab2594df19812d51dba7cea565c16a70f514faf30bc43b8f37592f57aa059
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/eef5f1bc5a0026bf96f33e2254b93711
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/df39558259dd59f7b602581e7afdf67e77c854c1192b53b24a5c2d133a4a74b3f44e74682f9f02745ef97a969de92566a7633c46816a031b14cb04006af845de
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/bbe95b31b958f187d49692d4856d84af
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/3035b3b8b1cd1349c893aa47f066a1b8b7610f69ff0c4f2f3325a377818fd8bb12ad5485730be354bc2a9982db405b5954dbda39bc7cff38dc22966a6d86c5d5
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/0e21a6d22dd45d125d0e98fe8f72e8c7
+libLLVM.v18.1.7+3.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/efbbad538c6f8b773d7ef1019a9b754e1ce7da59ea5f00f452fa7f7cc93c40f248762eb7f708e3d2fa7f9bdbc0b680d6e6502a07bbca0d4e701b51b0565d625e
+llvm-julia-18.1.7-2.tar.gz/md5/5c0ae4abc4ce31a86d5d6d4ecabc2683
+llvm-julia-18.1.7-2.tar.gz/sha512/b4d1dde929a8670eec1a9b25abe23fbc926a922e61b60ed99b52b440cd07cb026e7f746878292db4cd0cb422d9b87ecc4ee4b2b141f8e9411855d18da51facb9
+llvm-project-19.1.4.tar.xz/md5/1e13043b18558e4346ea3769094c9737
+llvm-project-19.1.4.tar.xz/sha512/a586f8a41dde5e0d9ca6d8c58e9ef2a2e59b70a86d2e2c46106dc31b5c096bb80af0cdbdb486179e9cc676a540099f49a1c2db9e5e84c50362db1f72e9af6906
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/deps/checksums/mbedtls b/deps/checksums/mbedtls
deleted file mode 100644
index d0b43ad80ea70..0000000000000
--- a/deps/checksums/mbedtls
+++ /dev/null
@@ -1,34 +0,0 @@
-MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/md5/ef83fb4706100ee678cd8af3f7a5c762
-MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/sha512/03dda8cc9afa3d79c3c733e45c77891e75d939dc2bcca5ba8eb7aa3bd01fb52011ea9323df9cf7294fe6dcf87eb86c1b1c4b2f3b8af6116929b3371698559fe4
-MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/md5/ac46c3840d2d0cc7c573f31c2f3d0d61
-MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/sha512/bb458f1dc9b8684a38f603136ee4ba1c51b47f5047c5a5cfe2c552be266e79dfcd8243b216b0831abf24390eeb6f4524bc7e43b2642eb2ad0227399222cd0d8a
-MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/md5/d74732e0bbcd03666243605e60bb345a
-MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/sha512/90b0699477b697b94c0ab1ba0607fb3e1cd40d66a80a51cb1e0f3b927de03ba201e7e280d453db672e6265db5b07d0145846e53ddbcb4b550afcabef1716470b
-MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/md5/65ce7c51884b50dcb8343a945644b862
-MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/e9df753e9f3a08fd645b15422be7cc0ec3aeac3f8d5f76e0c4c5ec24c54e1b653db320ed0c6799411802a05801241a5363bb449a8765fda7856413c7e3297721
-MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/md5/7b7fc8eafc95416d75e3f1bfb2640e09
-MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/sha512/68362114808fb4f986dea673ef1c7f104caad8233bed1c7f6a365d5d69bb7f7c92b234d6b1bfa5b014e7096411841c115a5cfe9932ae9ce642293cab962f8d38
-MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/md5/4a477379b15fafbf0c05435f5ab370ac
-MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/fd34b475bf94b411e3155f5a5166d1ad081fef3622d7b99f4915b592d4235f63a0b910e0559ba2a0c3d596df9ccc2d7ecb61984091debb20bd4b995942857132
-MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/md5/fc6551ef5f189010a84230dd48f6bdfe
-MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/sha512/d3a7199f3e1ffb1c289c5f0a4384f3b5d1af6e868eb1081d66d6cbfc60e6415e68a7e22afb497f2e7c7900678a19bf1ba2a4c888efa1019c03bce376af62154c
-MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/md5/335c3ac146bbe8cd862e4737bc362037
-MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/sha512/f12ef67a92af27f4021f73171cdf2ef5558f734fcb185e4417fd7e16752dafe3f75be4291854b5ce346abda674252d58064d9186122eb4f9b15ff89156d221ce
-MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/md5/435b864b02d1d2c96e5d8dc32b433ae1
-MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/sha512/52e3a79a70b3ff4617c93cafdeb702105c13b34687fc0fa31eebc91aa5cacea356d5b6a6bdbbfd81417d77debe256ea8f0f2a43c8d140154099bde097740dce7
-MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/md5/a238801f7e0d14f4b693aa4b74645263
-MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/sha512/431db4c388d3c52b08795d6fee6e6696cf383506a603816d6a63dc3571dbdc2b673837a1df1d9003c5009f8f8dc6eaaef3f80aaea396dc2fdf54b7e6a3c6aad6
-MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/md5/26c8f09aa65e5b70be528311519d4376
-MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/sha512/2d47567388b8554ce7714f4ded013fcbffbf94726dbc6a1b7287dc17b27d1fa35baba55cf7dac17c555892a5f4c74119afdf552b42b0e8f80f26621adaa4dbca
-MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/md5/dfc263208b1a8d4c29b4ec3b6f10e5ce
-MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/sha512/3b2941c4b151206a56a9a795f0f30519676ea4bc0c93f66b419b15568edc91bb976954f584116accb7f9bd067580712e61b3c580a249332640e27e6346ca51ff
-MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/md5/94b908036eecbe59372722b41f0b1985
-MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/sha512/c37a4c34eb450bd716c076c4105bd6022892731c470d64a854ac0fca6653dcf5a70b23982050e7d82cdfd67d02902d9efe4c94d2cf5e0d29d497c3c5ac03f8e8
-MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/md5/217866be499144eeb2e0944b0b60cc09
-MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/sha512/144180e1968da627c92173277a130283aea711157a04a2655786658234232e397985f63d5407166377fc5f38a7447c19797c51b66a9c4b1773601d9e7e01d0e0
-MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/md5/74316c624c8106faf7c04e05149b5c38
-MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/sha512/9eca254c9b663b2f5799705c2e0aebb5529a7ff7759b0f3b67516e622dd4561169fface1d08340666453e779133498eacb8ef2dae1ef6332ceb4d8052d3614d3
-MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/md5/cdd28912607781f5e6ea6cad73c7dba2
-MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/sha512/e5793778d57b725a0cab48dd7e8f45022699b654bb8e890620efa73628140e453c80601e43647a700d6090a4b66d3c30b11634c4224c016c11c7bfde6b8a1b2a
-mbedtls-2.28.2.tar.gz/md5/421c47c18ef46095e3ad38ffc0543e11
-mbedtls-2.28.2.tar.gz/sha512/93cdb44f764b200131b8dbefb9363e5fa38760eaf01473a512f93673cc55db3515830e16b813e03b39cb819323ad78cee4cb7f3fa85861ec5e72e0f89541c7fc
diff --git a/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/md5 b/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/md5
new file mode 100644
index 0000000000000..fc6955c8f2e7b
--- /dev/null
+++ b/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/md5
@@ -0,0 +1 @@
+1911cf084d26c48e2ed58af3d268b4b6
diff --git a/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/sha512 b/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/sha512
new file mode 100644
index 0000000000000..ea916976895a3
--- /dev/null
+++ b/deps/checksums/mmtk_julia-b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214.tar.gz/sha512
@@ -0,0 +1 @@
+75beab54398989c46b62e714b242cf6705d88d220f40c21e494e0f29161437f5fbe9ba05b543d2353a1ad76f4239ac4025b476be0be864649f310f14935289fe
diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr
index 2b4281659b13a..7b3b57978bd01 100644
--- a/deps/checksums/mpfr
+++ b/deps/checksums/mpfr
@@ -1,34 +1,38 @@
-MPFR.v4.2.0+0.aarch64-apple-darwin.tar.gz/md5/f9393a636497b19c846343b456b2dd7e
-MPFR.v4.2.0+0.aarch64-apple-darwin.tar.gz/sha512/a77a0387e84f572ef5558977096e70da8eb7b3674a8198cc6ae35462971f76d684145ffae7c2ddca32e2bd1c8b2ccb33e4447eb8606d5d5cd5958298472b3ea9
-MPFR.v4.2.0+0.aarch64-linux-gnu.tar.gz/md5/ade253017d195de694780c32f9161dcf
-MPFR.v4.2.0+0.aarch64-linux-gnu.tar.gz/sha512/1b68de5f8e557b7434c8c1bc016227b58683b56c0977b763422ea85a673bec446fcfee3a4f69e1d4689abb9bb6bf47f2a50fbb56ecac6a9d40096e66bd0f2080
-MPFR.v4.2.0+0.aarch64-linux-musl.tar.gz/md5/7dbd121c7192ccaf7191de5ab8d91afb
-MPFR.v4.2.0+0.aarch64-linux-musl.tar.gz/sha512/8614e3cb28491b24a0ec5060b44abaf264b61c91ddd29d70105ff583bd3112cff1b9bd5ed45e39f186265333982d5eeb8bf35fedc3b51b2a009cc7a51046b50b
-MPFR.v4.2.0+0.armv6l-linux-gnueabihf.tar.gz/md5/adb2b7fdf111c8b19df1516cfb278bb1
-MPFR.v4.2.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/0c47aeffd05a194802f6c4e0e2779d56fb46007e6c3e145ee6992854a21a317a9d51512c59a0ce4ddcd314c387945225c6557d6c2ab6961ae4848875e8983de8
-MPFR.v4.2.0+0.armv6l-linux-musleabihf.tar.gz/md5/c30358bdeffcff65ba9be906cd35889b
-MPFR.v4.2.0+0.armv6l-linux-musleabihf.tar.gz/sha512/2857ec27ae2d53a451d62dd241ce9b43f7ee182bee180ecd9ad92c907c66d0b0ab2d1ea3b20fe61cc176ae44ecbe6041305cc8a9343b396c9cb54dd77a1e2868
-MPFR.v4.2.0+0.armv7l-linux-gnueabihf.tar.gz/md5/a1e30436bade2150c9dc924177f0c321
-MPFR.v4.2.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/d2f4662c494fefda66847e7a085edda3ce396383aafb4e17fc2e176191b0f530541726c261cac3467f13136e8ec728c8a7cf0e352f3e9ebf960d153cbfe766b8
-MPFR.v4.2.0+0.armv7l-linux-musleabihf.tar.gz/md5/857e3c82804e7c853d21603f18caa715
-MPFR.v4.2.0+0.armv7l-linux-musleabihf.tar.gz/sha512/86cf3e940fd66820b5269e9aa2a49c3fc3077857bec037a08e0d301b0bf3cc5c79ac331cc6370d852e20f4acf8f601c49d5dbe24e96652e4411b3f33a11e3f45
-MPFR.v4.2.0+0.i686-linux-gnu.tar.gz/md5/5a432be79a112e67e970980f4bde13a0
-MPFR.v4.2.0+0.i686-linux-gnu.tar.gz/sha512/94198b23ac94dcb9dca95938a46b9899c3ef329bafbb13b32076cd3415b89f11908632c7c07e90549c01bd9ed7fc9a002dae07a645f85b8509234c49be729621
-MPFR.v4.2.0+0.i686-linux-musl.tar.gz/md5/4ce71dc250c2469f844a02c6ee6571a1
-MPFR.v4.2.0+0.i686-linux-musl.tar.gz/sha512/134b67b23de75ab172594cd0fac55b5c265730bfea195978698e3e6fbc47d65617652bd72d90ba092ed1bac4c29d5b2c109df5d8dc60b5d8f91159fd58575b67
-MPFR.v4.2.0+0.i686-w64-mingw32.tar.gz/md5/df41bde61d33b56fd48bdb0f9ec0c624
-MPFR.v4.2.0+0.i686-w64-mingw32.tar.gz/sha512/145bc14f22eb077992cd993a20d3205eeeee1d2bb99ff4f48277173b0b39c848e2cd3044d2141003607aa4ea3665546a87b9ffea87bf570ab1b152117ef4045c
-MPFR.v4.2.0+0.powerpc64le-linux-gnu.tar.gz/md5/d818894054b38232ba02ee0e129f6fe0
-MPFR.v4.2.0+0.powerpc64le-linux-gnu.tar.gz/sha512/0e73ca926f3e06466d1899f0b3e9ae4abe15102804dce6716ce23154344a571773c40d276f0038a0ae4e626799867ee715428e1d961334a01ad3091745367e8e
-MPFR.v4.2.0+0.x86_64-apple-darwin.tar.gz/md5/9652148df4e771be39713c4f43d3ff61
-MPFR.v4.2.0+0.x86_64-apple-darwin.tar.gz/sha512/91a0219fd1880dfa90d196fa403f4e1df0347ced58a4772492196b94476f346d80696885a4f3520424494bc09679cca0c0ccf2f6e9247d60b52ebdf564485e72
-MPFR.v4.2.0+0.x86_64-linux-gnu.tar.gz/md5/4de39327a792be708119ac7b43957628
-MPFR.v4.2.0+0.x86_64-linux-gnu.tar.gz/sha512/447b59d5589a8517061627668e8baed4366408cacc9d8e063528b9b795de6d27e4005844578310185f03f568f4948bc4a794624235875fb61b6187264b6f483b
-MPFR.v4.2.0+0.x86_64-linux-musl.tar.gz/md5/f9b8c3c094b339341b19828cc5e1d47c
-MPFR.v4.2.0+0.x86_64-linux-musl.tar.gz/sha512/c661e7c5bded3bdf11b2bd5e5ef4ad8e446934d9b82dfe26f0be1b83cea98d7e56e0903bfc1075f91c8d23401cc6b3b722f2d60f46d73cab884e81fe518aba27
-MPFR.v4.2.0+0.x86_64-unknown-freebsd.tar.gz/md5/83700aaebc7344d84d70f0bd0f9c7890
-MPFR.v4.2.0+0.x86_64-unknown-freebsd.tar.gz/sha512/039cb18a142a90fadc7951f05324fe9c033da9502a61da77fdcd5d9557075ad1ca8500b9b9b39ce57a44b9cb28d41dfc6cbde10cfdbdb40077ebada24a2bab9a
-MPFR.v4.2.0+0.x86_64-w64-mingw32.tar.gz/md5/9cdaa3fc0d13a8835d165c745937c385
-MPFR.v4.2.0+0.x86_64-w64-mingw32.tar.gz/sha512/21464bf836362ecc50da82859a4ba2de3d32d76ff57de9719ac850e73918814e1002130e0d6797fbb914b822f13bea383be3a29b2a1c9c8415cb2e3c5d321669
-mpfr-4.2.0.tar.bz2/md5/f8c66d737283fd35f9fe433fb419b05f
-mpfr-4.2.0.tar.bz2/sha512/cb2a9314b94e34a4ea49ce2619802e9420c982e55258a4bc423f802740632646a3d420e7fcf373b19618385b8b2b412abfa127e8f473053863424cac233893c0
+MPFR.v4.2.1+2.aarch64-apple-darwin.tar.gz/md5/1f5bba3e8e540720e239da75e5ae79eb
+MPFR.v4.2.1+2.aarch64-apple-darwin.tar.gz/sha512/7de26c625e540a5b88e280ec2cb8712d4514732d80a0c6342d2b2cabc6bc17c05f6c614b8e38800c93a4af5438c554733d3fa2002ef70072dfb44c08d3f03d26
+MPFR.v4.2.1+2.aarch64-linux-gnu.tar.gz/md5/112ddd4e5cddf36b005394f9cd81b8e5
+MPFR.v4.2.1+2.aarch64-linux-gnu.tar.gz/sha512/dc125f625e8c74ce18c052ef759ccbcfc2f3a932f2810a306bdddf70d5f37f3546200690fd08fb76742022322a7c1b9aa907b4aec6edb318060f0648ff426cbc
+MPFR.v4.2.1+2.aarch64-linux-musl.tar.gz/md5/a0919ef7cc35bb663d05e27da2bcb9a7
+MPFR.v4.2.1+2.aarch64-linux-musl.tar.gz/sha512/8acbaaca766c2ce225ac8df88c103a57fc52119d1fd54e9fc7d1f9d725c4ca9f74a0090e86eea0c140482a1abaf5b6086c453824a7516e9aef3ede5058f1767c
+MPFR.v4.2.1+2.aarch64-unknown-freebsd.tar.gz/md5/61e1dcc7e323b976854a4e8164316d37
+MPFR.v4.2.1+2.aarch64-unknown-freebsd.tar.gz/sha512/f3a5493f88b290d15aff9bf79b15158d19bea05af7210b2967368e0b2f98cd291f77e62f39ee0c7ad4e9d2ef6ebdba4bf2fea24c723791f71f7b9b1ef989a67d
+MPFR.v4.2.1+2.armv6l-linux-gnueabihf.tar.gz/md5/629aad4ac45ba23becd8a26df188638c
+MPFR.v4.2.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/bb05a8bf127eb16608a82037546f48462cb6168e1adcdb2c60dc3bd08f62cff30cf603abcab87bb336305d37dbb7b0480ea8f6664191879bdcd487738a33dd99
+MPFR.v4.2.1+2.armv6l-linux-musleabihf.tar.gz/md5/0c3c026051b096d98c8d476dd44db334
+MPFR.v4.2.1+2.armv6l-linux-musleabihf.tar.gz/sha512/9e791fe9748c87068c167517883cc905fe51ea38d2db89562a7a0959cfd83b268eed2897e5eaaf90c0b0b08a4efd8039bdeece64e83b17bf1d676570d13c2b98
+MPFR.v4.2.1+2.armv7l-linux-gnueabihf.tar.gz/md5/a2433a717e49ad95c3e430a538d01134
+MPFR.v4.2.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/abde21a943d4af312e0d44b1ff1d4aefa10b2f38c74ff0e04c0c2b8561750ef5d164679564ffe1b551821d83ebcafbe99467230b37fe4591c593a24dfb070c6a
+MPFR.v4.2.1+2.armv7l-linux-musleabihf.tar.gz/md5/4c892b4cbf1926d5d2b6a88330015c8f
+MPFR.v4.2.1+2.armv7l-linux-musleabihf.tar.gz/sha512/24825bb1268ef2ea42894ec9ff6589308abae430dd8e43a2ca0d368f1e718fd3cdf6d9bc4bc383346970ba845d2ef1721c4848ee0c783d09addc5505131db3e6
+MPFR.v4.2.1+2.i686-linux-gnu.tar.gz/md5/0b1e0268dcaeb3aa0f7f0a6451c6b841
+MPFR.v4.2.1+2.i686-linux-gnu.tar.gz/sha512/f0ef142c7b86e8f92b78a7ff0607da70bf8f3970b118fa77438cbb0acbea604dc0c7566b52ff1f85b179aac7661b31e4aee049f2c5ff799c95b385ba9cde2a25
+MPFR.v4.2.1+2.i686-linux-musl.tar.gz/md5/2fc9a938e76e7bdc0b73d7e8bfc8b8ee
+MPFR.v4.2.1+2.i686-linux-musl.tar.gz/sha512/4aed3884ad569b7695b9383db9d9dbb279ffe5349f7757b867ff860fa600b47faa4c169f4a60409666ce45fc6e6f269c18cef2df6fa0585f056d7e07e55005b8
+MPFR.v4.2.1+2.i686-w64-mingw32.tar.gz/md5/d13c44bb28d721107639c8555db5e157
+MPFR.v4.2.1+2.i686-w64-mingw32.tar.gz/sha512/1b5562d2df322c28bd06bb4ba8c9039cf90ed62affcf7f2b0d7ae8925d503c76a0d3d2f9b65c8c55575f245a4df8fbc4c7c63e93e7b973188f203a7fbda4eac5
+MPFR.v4.2.1+2.powerpc64le-linux-gnu.tar.gz/md5/52b3912b2c5f59ab3dcd7c3e06ca41b5
+MPFR.v4.2.1+2.powerpc64le-linux-gnu.tar.gz/sha512/533cf1f93c4464b4bed1d56ea79946fc2d20f3a7825d6b0383ed98cec99f85713e7bca549fd8948adb69aedc14e5d14a54238b3e67ef103e1b049b0cfb6cc1c9
+MPFR.v4.2.1+2.riscv64-linux-gnu.tar.gz/md5/aef7709c8457ee2db2622c39f1da16b7
+MPFR.v4.2.1+2.riscv64-linux-gnu.tar.gz/sha512/7a9c88563e3e7ab22a3aaa45690ed89c3e7eb22333a3d45c5e04ad2660c91ad2c97f10cd6c1aa1ccfdbf97186f9fd7f92330a41ec0be026e2ff84c5ba91f2652
+MPFR.v4.2.1+2.x86_64-apple-darwin.tar.gz/md5/12afc9778e39a5b6d9ea0161e2c80a95
+MPFR.v4.2.1+2.x86_64-apple-darwin.tar.gz/sha512/a9070423a898fa865740753ae7513d3cc0b500bd9b6b5c6aa672833dcac429efd806eff48501b51afcba5db0d31e79dac243b11b2f8847a1551576c6131506f5
+MPFR.v4.2.1+2.x86_64-linux-gnu.tar.gz/md5/46c6a5f40243795bdff51bd68a89c82e
+MPFR.v4.2.1+2.x86_64-linux-gnu.tar.gz/sha512/df8209d69ae55dd54491055078f113f4ac8be7bc68e1c0eb62944e6c9c04ed3e9a55c4a5f28ec68eb69f558d9f4d1b975f36de572fbd0ef7720568efc8042327
+MPFR.v4.2.1+2.x86_64-linux-musl.tar.gz/md5/045236ee0d558d2eda42df76c3397f69
+MPFR.v4.2.1+2.x86_64-linux-musl.tar.gz/sha512/52b68a673160af7cd09b191f3c28e17d5af7516b5baa86c0df9cb63a116772a15b5358f3db5f0b254b5752c652f8959454667cc1726ea4ff30946e3bbdb90ab4
+MPFR.v4.2.1+2.x86_64-unknown-freebsd.tar.gz/md5/da3da71bc7572eca5bc3d3895abf73c2
+MPFR.v4.2.1+2.x86_64-unknown-freebsd.tar.gz/sha512/4270b83ebe72d431f8fd9127b2b8d3bd75c2e52c563d390a4ca8d40c0514f5996fce57746d07b7d3bcbf93bfe78d420f815fde5eda4d84a5bcb7b7cf0e092504
+MPFR.v4.2.1+2.x86_64-w64-mingw32.tar.gz/md5/2a6f5ccb8d45591a845ad43916beb85a
+MPFR.v4.2.1+2.x86_64-w64-mingw32.tar.gz/sha512/db9ecc9d8247fe4421c4cc9c6ab540e17a7445056b7a1062d4e334b353783a1c067062fd8e6f0517d8bd8782c9bb75abcce8ab8247be707ba066dc90b7fc12ff
+mpfr-4.2.1.tar.bz2/md5/7765afa036e4ce7fb0e02bce0fef894b
+mpfr-4.2.1.tar.bz2/sha512/c81842532ecc663348deb7400d911ad71933d3b525a2f9e5adcd04265c9c0fdd1f22eca229f482703ac7f222ef209fc9e339dd1fa47d72ae57f7f70b2336a76f
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
index 6113b23d68c14..4520109441588 100644
--- a/deps/checksums/nghttp2
+++ b/deps/checksums/nghttp2
@@ -1,34 +1,38 @@
-nghttp2-1.52.0.tar.bz2/md5/bde5874bd8e7e8be3512a621de27b9d5
-nghttp2-1.52.0.tar.bz2/sha512/019ec7a904d1baf8755ffcea0b38acf45ea9c6829d989a530ab35807338ba78d3328b86eebb3106b8372b7a8c51b466974d423e0cd786b6d6d020f0840c160bf
-nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/md5/e3d9e07029e184cc55b7e0c4d2e27c7f
-nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/sha512/cd098db984f751b00d2cc99d7f7eba0fa830ba178dd85a9dfa679a591e62d57364dcfd74e6a55ef513a0436a8e520b1a5474d4bfa9a8bdcd70e398482b7c9985
-nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/md5/73fe75f3cfa2bd3e804ea39a4eb884a9
-nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/sha512/71f4b2a23ba148b66432797b0db954dbd98fc900045d4572f488b43779aae125f71929e5bba6bbadd30c7998a133c5e5beb70888968bf3b01bb5fe9c9ea0e451
-nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/md5/736a24a7eee567851a965558e31489fb
-nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/sha512/ab36182b04a590b092fae9e3a912a87467e8b01ad40a628a1d2e52910ee513ab327d5d2836df598d5aa8203f60a605d19d0b9636eb35d12a84a1c9d87124604b
-nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/md5/56fd32e8d77d4c9d9e2355565f4db19b
-nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/85718e0e5cee35d91a8684ea33d8f965bb30d62dbd6b74a574a2fbc4c1027b1ef23ef68f1dec3f037fa6c5739287329567df9591a69f8f23b23fab2516a0b644
-nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/md5/283273d3bf4d53b56d12ef6af2e72f20
-nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/sha512/5c1d92cbf5f2f4e1ceb4ee13634c0bceb6ca28abaf9d87cc673f264d274bb96aa095648295e9aa76f86eb0890a426f47c0b942e72610daf722ed8e86b5f0df69
-nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/md5/d7ae84e5365759a42d0fe0360f679b61
-nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/63212e3ad94d2bc54ca9ebd452d8de8e67aa53c03a3b3033d36da765303e714d8d5c24156ea4fb985acc72fe52e2977e8e8a658cdd9409bd41ecf401c08c1aee
-nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/md5/a6ad0f25f43b7f1832faeaaadf683ed4
-nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/sha512/64b9075c0d819288345d53c5ce88b360d2ca4d24c3d2e81fb53c55f86054b1a3e95d7831b363a4100965cdbf479268a5993d66ef59089a219a97b4151d8fef60
-nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/md5/9781f6eeb4d24a291d6737e59e74edc1
-nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/sha512/2b542cb67e78993ef881694dc50c980b57db3761c5f4e11c381afb1b31d1fb8ab0a8b20e1279303a602c07912f21e8ef9d732366b76ab3f356a74b444a5dc78c
-nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/md5/08603b9364179ab4cbe0637b9b1b63b5
-nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/sha512/0a5b79709482548c6a713843b670695b4b13d2b219b592d029719da0b4187fe884798fb44e2c511c300f02bab03f2b0b289d49d6256e3ce0b9602a66ea2382bd
-nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/md5/1abdf0cad466ed0ca0da137809999d8e
-nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/sha512/04680895ead989fda56b284d8963e7ca31680492c8f77f4c6bd7ca03b9a66ee7529b78cf35e07b2e106f43c9aa543dffd4081b034339803ba95021293d3df997
-nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/md5/ae411e40e24cb3f3b07fe8de211b58c6
-nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/sha512/7433502d76646e5761ea2707fa65ea5a412c513c70908a4d9ceb504f08121b1f39bcff984543370c221814785b7064f85dedc777a22df5e30a64a64e510e0978
-nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/md5/59f0de0affaa17898e837b5074de68fc
-nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/sha512/e639c813373b17d95220640ec2a568e9731cfc32df826610357ec9ff8e9d7e7abe10291140eaeb9342ae69215798bf3f999db7647c23efb4f815b54f4da9cfe4
-nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/md5/6bc8501392d47b349c7463e984dc5909
-nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/sha512/522cc2a8464ee5770c01b83a6b4ecbbcce322efffbd738f7c907643fe85342e785bbc805028d41c2b7404d6241168d1ab37a9db15018623c265b53905bcf060f
-nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/md5/725a6adc23880b28303017597b974535
-nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/sha512/ede5a34b7f71310e4c3cd99b9b61b2453db5dc8117675de12adb1e68c9283cdf821614f49f4d04bdd3b0f17d51a52972ec1e226d0dbdc5462b1a4a1fcc9f39e7
-nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/md5/a2b89913c1057ff67e7be6086619a65f
-nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/sha512/6b4efd2a0807f19cecf1f1e97b23ade11ed39f651e29586bb21185e17d0c50dcb63e26233ff994bfa934b383468e29f680b1ebe0cc2a2dd09768b14dead399a4
-nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/md5/e1c8ec6ec2d69b2ac64b114ebf09f8b4
-nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/sha512/cb43cb138f14717501e852ed388a44d41012e2bb70b6887584b37b4e0f42827d74f17ea85ba4aa0bc09d623dedeef73eee80815c1db2b6858b31251feb0b5580
+nghttp2-1.64.0.tar.bz2/md5/103421866471b6d5fc828189552d98a5
+nghttp2-1.64.0.tar.bz2/sha512/3b3d16168f6ea5a3e8a7b30b6545b86ff6be874771a1a8d502cbdc7c46f80195c1b8190b5279030aa2edea54fec40962cf245ce4fe89cdf89f06e3f091a34cda
+nghttp2.v1.64.0+1.aarch64-apple-darwin.tar.gz/md5/67a5c302c3d1089682e2d6c251273f7b
+nghttp2.v1.64.0+1.aarch64-apple-darwin.tar.gz/sha512/88abd96e47d85072abb74d65c0f4872134768b7703a9bb5e5e3f1fb8bdf4d0f6a9f07fbe76089c6746b22787774f362246092c3b2155e94345216aef9f67f2ac
+nghttp2.v1.64.0+1.aarch64-linux-gnu.tar.gz/md5/f45a84bd28f598305002f154fb02e3da
+nghttp2.v1.64.0+1.aarch64-linux-gnu.tar.gz/sha512/95e1034e1fcd5d96a5ed01e9870af27ee160cdcd624a196dbde31ad400768a55fae39ba11c22a868a5ec79e36e98dcf6a4f37c706e91e466fd01d9b8da868d78
+nghttp2.v1.64.0+1.aarch64-linux-musl.tar.gz/md5/a2680c3d4d94433c787fc1b75f4ab096
+nghttp2.v1.64.0+1.aarch64-linux-musl.tar.gz/sha512/8c02e4e3dfdefecc8c01b397ba8ece94be8f478aa63752d5310517728715b105101f371c281575de7e7c0678b31a7c7cfd021383d000b6f747992bbe47242047
+nghttp2.v1.64.0+1.aarch64-unknown-freebsd.tar.gz/md5/4d69a91c94fabf6af9f5197607639a58
+nghttp2.v1.64.0+1.aarch64-unknown-freebsd.tar.gz/sha512/8d679a0a9ee8a2136da71102eeaf511209c217c771a113fe2fa1736ac232a5637aeafa41ea53fb8ea2514857198cb907a4a8ea5667a325257f527cea4f2c5464
+nghttp2.v1.64.0+1.armv6l-linux-gnueabihf.tar.gz/md5/f44f3dde6ce2f4e34241a336c6f71934
+nghttp2.v1.64.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/02bda6e05712de74805a6895a2f8f29b5930af2c62fed2b3e8a8fcd210c3542484131a5442c8ff707a7ac20c16eae93752609aeb6123b2296346c6399a0369b6
+nghttp2.v1.64.0+1.armv6l-linux-musleabihf.tar.gz/md5/a4dd8ed38b411d4b7c2a3411e6d59b6e
+nghttp2.v1.64.0+1.armv6l-linux-musleabihf.tar.gz/sha512/a0552be25160d1878bfb10b30da23381d9d7f9ee7c749b0fb18a0a98ea1e9dcd53b103512b3fe2c98752063385b51551efff0dd0d9190db0105550595cd61d5e
+nghttp2.v1.64.0+1.armv7l-linux-gnueabihf.tar.gz/md5/f0f309affb5726efdd93cc4ed0ef25c3
+nghttp2.v1.64.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/55974dc62616e50d1cb1efda380edec6b69dfa175773abc48045ae34b545a19cc50b7a53b12b67dfd9bed9200fbba543875bb821d80e7c3dfcc2aaa3442a5a66
+nghttp2.v1.64.0+1.armv7l-linux-musleabihf.tar.gz/md5/ca090db026ea470523b8cb2c93eed8ac
+nghttp2.v1.64.0+1.armv7l-linux-musleabihf.tar.gz/sha512/d2dccf74baa66abd5ecbb187efbd79bf8a43e0b920d7f4c94b6569dd71b74aa4eb6dfcd8d090bd8841496e3f4aa184028b907faf5a0ea3f91df3bb8e286a27c2
+nghttp2.v1.64.0+1.i686-linux-gnu.tar.gz/md5/aebffa8c32cc829f9fb7089ff972b215
+nghttp2.v1.64.0+1.i686-linux-gnu.tar.gz/sha512/f43e4d90f4ca755ea2b4fc90f10944562e9a7b9666b8469bb0fbe12df38c9db9625bfd6f4f4ec16d7da3cb5e3e9e3d85b1bffb56be18d746ae7be2c3dae9a306
+nghttp2.v1.64.0+1.i686-linux-musl.tar.gz/md5/64cc8696b37eeb2a225a02412d73e86d
+nghttp2.v1.64.0+1.i686-linux-musl.tar.gz/sha512/2995ab7123630c2c7c606422b25a0a7761642d72177f26046b781209b13d7e90678923dacad35637df7cd171608d44f5d7d3c586768a1d4eaef0751a1f108c04
+nghttp2.v1.64.0+1.i686-w64-mingw32.tar.gz/md5/2ed9ff15d05f6cef7bf85bb19621a2fe
+nghttp2.v1.64.0+1.i686-w64-mingw32.tar.gz/sha512/45f38653664cc7343b66561a9b5bfec341593504714afbcb35a856343e583d2e75cab8062b2ff23ebdf4625607748f5c70b1ae79cc047a4073eb8d01f8252338
+nghttp2.v1.64.0+1.powerpc64le-linux-gnu.tar.gz/md5/4622f699a44d02570298daf5864cf60b
+nghttp2.v1.64.0+1.powerpc64le-linux-gnu.tar.gz/sha512/f2cc88fd537503ac138518b7f72a67c18508307c0dddca41d44c8496ca4dd8f15aa133e08f13e03b2fbb3c83272ea433456c9ebb929f648a7b2af13fcd048d71
+nghttp2.v1.64.0+1.riscv64-linux-gnu.tar.gz/md5/5ec27224b6a780e989479ae4b38e5b26
+nghttp2.v1.64.0+1.riscv64-linux-gnu.tar.gz/sha512/57cfc7297f1cd2b33578ccc5f0ae847ef4771c087fe1235edd541f6f07a9feb692c554c159a40118c619f16ec0bc3cc313af19a9e845240cc50427583505a9f0
+nghttp2.v1.64.0+1.x86_64-apple-darwin.tar.gz/md5/c6e5d0a179f065f4aab2b8661e6fc2d4
+nghttp2.v1.64.0+1.x86_64-apple-darwin.tar.gz/sha512/77073fecbdac780dea34c9cb42b019b7cfe8a125a1356cd7de2ffd3aebeb29aa6251008574efa8d0a017e86023248fdd93e50c4ed2952d8a23cb67e0cf557a74
+nghttp2.v1.64.0+1.x86_64-linux-gnu.tar.gz/md5/805f31fffc112ea45716b8a661911696
+nghttp2.v1.64.0+1.x86_64-linux-gnu.tar.gz/sha512/73ba29b4f65eeab2ea5c195cb17279d8234b6a650a1b58d0494a564725cd9c76c89321cd455280dd2627d6ba48e91a68be8bdcb5472eae9c3cfc3c2623892963
+nghttp2.v1.64.0+1.x86_64-linux-musl.tar.gz/md5/fbd3da6f5b100767ab7cb77ca6e6074b
+nghttp2.v1.64.0+1.x86_64-linux-musl.tar.gz/sha512/493f632d1ba08a0e8114c883d7fef12e364b0b2556e57ed8fd8d8716c43251675f3f60808e32ec430949c4480d5eb052a7837abda2f3e7423ffc658b45805e41
+nghttp2.v1.64.0+1.x86_64-unknown-freebsd.tar.gz/md5/1dba5e39ebfa6c6aac668d5488fb0a41
+nghttp2.v1.64.0+1.x86_64-unknown-freebsd.tar.gz/sha512/259c7cac2cc45a5ebf0c9f90277d6aee68332fe37e8269f93a7d72fae40d41899e6ee63c211dcee81d6bcf056d81e89624e1d09f960039a950b8709b5966ee49
+nghttp2.v1.64.0+1.x86_64-w64-mingw32.tar.gz/md5/228fd64b1581ca3a8a45646b434bbf2a
+nghttp2.v1.64.0+1.x86_64-w64-mingw32.tar.gz/sha512/c9ac17a35cd89e71c3a29165f29bb86bc589635bfafc70e64f7437200a886db8a74291ab40b255d977fa0b4bf8d88b12f75cf3ba75163dd906e08c30ec200b8f
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
new file mode 100644
index 0000000000000..a834d041324c4
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
@@ -0,0 +1 @@
+c866a3ff71f0640c47cda5d31f76c8e0
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
new file mode 100644
index 0000000000000..31eafabe3a66b
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
@@ -0,0 +1 @@
+a2db523b0068cb727db7e3a2210578f6d1de46493c5e3a9114ad961ed1553d10b646d11486fe4f987e43a9e2ea289d1923a63861f5fc56cada94bcf0b96b7dc8
diff --git a/deps/checksums/objconv b/deps/checksums/objconv
index f3dfb0de2ffab..ce20ae4acf76b 100644
--- a/deps/checksums/objconv
+++ b/deps/checksums/objconv
@@ -1,32 +1,32 @@
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/md5/bdf95a776cfe782d30f48a41575e1414
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/sha512/188b5e25d238a5e2f704c3ba8d2d57d6fe452f9d5c0e26b710ff225086581f906b8de6388c6240bbaa9d395cb58c0a73a67e65cbb8df6be7b98473101db467e0
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/md5/2c2b88856921c38294a30671d4794dac
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/sha512/2c0e6cf6da02e25386e89c51f5f2d39833b49653b20537c73f7938a4045805d07b0f520661d07332aa1372231d34a3a979ad490bf5eb91fc00fcc20da3e7a9bf
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/md5/11f6c06ee0d98b553781367d5404c76e
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/sha512/d93a742a08f873f9336f57a28af8a1eeff624d5d9dbcbceba0d58c17a2ee3791b363661af293d08997d701fc22177192e5b5154b827974163c189ad6511ea13a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/md5/0151be530a0d54376590065cef28666a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/cb7cf5b00f211be4194b5b6acf11cc491b5f140d990fd8babc6590649e9864cf07a421e8a87ccdbe0b8720bc6473166837e384202bcbac6cedb2a9bd9c46711b
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/md5/390251e8245a3d8d110a1786336663cc
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/sha512/b7eb9e4a983e69ca970ce86bf306b7df11bfa8aefdd26cc02841c563ad0b5dddcb47f106fe7a0a420b20ae1d4890e6a8011c0db5a26e3493c80e63eeaadf86b0
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/md5/5f924d5bc16bac6447e9f2deb943e60f
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/edaf3881754bc949ef3c60b058cc0cfff7e828d6486ca968940d216852baed5b06544dd48619cf045f3ef87df5ea00389ac3b298a1d4e9489995351e1e1ca952
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/md5/c967a9ed1764d9692e905d879a03f45f
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/sha512/08e9397bbd34734c3e9137f64a94a86ec11bc70eaf54811301e7bf782b581ffdcfa041936e29aa0a8ee46203591f8213d6170a7c6ea141a0ab625ac1156dcfbc
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/md5/a59fd92a1ed62048edb1a1297d615aa7
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/sha512/581fa0f5ea37e1802c9845bbc9df0b826fdad5900e712eed8767922e155026011570b4a4b8714430c038fb3c0d47965168a4c33b21bd28cd9080cb036fc9f033
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/md5/05524b26d550ad8fd045976f395cdf6a
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/sha512/5e6d3b27b80f96a4f1c278f2f8fe0ff1f9bdc2f1df223a7c4d1c235c18dd8eac0b8b74d37defda656142fb2882c1b590bb3e730cfed77e316012eb69b9580b53
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/md5/10e82481a5396b00f568eac690c47e0a
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/sha512/27d606acad2cf6789c9888c72887bb6a277c07f7b528fd8fe333f9738caae73e293df76ba9a0af5dceb00b8289bbc523ce235cb0eff0f0031bcf20300b4168cb
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/md5/8755aecaacc983e2a9a948eff5c485d9
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/sha512/8b2bf010ff7da164b59df7147cb4904ae6f2913a3095c649e20f4263f77fb92cf8513d9130a345576da2cca4caa30828cc43b9c8ae1870268e3140e0016ad557
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/md5/0657a0ef9f278718c741da4d72c0952c
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/sha512/ffd9247b02f72830d3b12e075124239ca472503701eef005b7457e21cd10103aaa13520206787818f11e9dcf35a156979e01cf5839dd554bab935ce757f032e0
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/md5/0e029960584d00dbf8673ec4fcd9eb83
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/sha512/ae747a84edccbc804239499c10d863c62bd5846b4ab87abab42c832c9fd446065024034d76ddc167d358821f90f8d2241c341232d9dd86cf31601e951e050a6e
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/md5/39fc1ec3602dcb6eb2f80448269588fa
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/sha512/e86114bf0b0da7297721c56b1cf246f52b9331083e4e73b53a30a1ff031f260a5d6bd97b455669c38af915689a363b99a30ea7ed743ebf49271b95e34bcfd85e
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/md5/9d331f32792c394c0d11dc4e6c24ffb0
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0c9b7c2f58110b3c8df52d83cbadd5349fb81732acae6786617e72a3150aa9ae8da7afa1e9eb08639f4dd4e7e69f29b882f98e99a8a4404b569c545c904f5523
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/md5/c8ef7dd7742e2c9bf2d05d2b0310bb50
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/sha512/b47ac1f3a10ee4f958dcda72ac45f35b94fd436498d008642ce53b93ff517c0d4158a72cbb849336dc9d4a16e26021af13e7b6976f83610380cd78cce6a7deb1
+Objconv.v2.53.0+0.aarch64-apple-darwin.tar.gz/md5/ff9f237208e8bb48daa5eb4d18526f24
+Objconv.v2.53.0+0.aarch64-apple-darwin.tar.gz/sha512/5960cc291fd551dc288cba4cfbe9c2448ebb67da72487a52d364419199e91541a427c70d73c610e054b7fe6617c9ca715ee5d3e6e694d49f16331a067bb7df73
+Objconv.v2.53.0+0.aarch64-linux-gnu.tar.gz/md5/e9ead2b4711ff50a9c88272fc5b4f0a5
+Objconv.v2.53.0+0.aarch64-linux-gnu.tar.gz/sha512/9eeab07437e61af46d884d4d3bfa6bf7538c9bd996a8b217748ed70a2d53df25c20235af78d8767106603a3c92955a1f1870533d018d35e55ba8088b14e685cc
+Objconv.v2.53.0+0.aarch64-linux-musl.tar.gz/md5/c506fb9e5701a416c2204dd42a0e84b1
+Objconv.v2.53.0+0.aarch64-linux-musl.tar.gz/sha512/b6e2d91cddfe6d8c42d233e9aca1f322a4f4e12480ed2a0d148a327955b4dfcf049cb05e2151bc7a64d46ca3e3507eaae8e77416eb895bac16f394217f3e83ca
+Objconv.v2.53.0+0.armv6l-linux-gnueabihf.tar.gz/md5/2f353785310504e7cbed6bc8bad2cc64
+Objconv.v2.53.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/153e595c0385395d90cafd57d08ab1ab0cae9f0376c13f139c9dc6132763cfd19c64e74e8d50f33aa50e74173f3f57f11a11ad0afeef20c2f38c00badd339068
+Objconv.v2.53.0+0.armv6l-linux-musleabihf.tar.gz/md5/a5413f1376abb962de5665cebea77c39
+Objconv.v2.53.0+0.armv6l-linux-musleabihf.tar.gz/sha512/78e6778bdc4d31905a585ecdf87c499f7befad4ee4c46a2d26e6b1e9330d9175d3def536291a4bea004dd65cf95b8b751557afc40a1513a786502000c5a3769e
+Objconv.v2.53.0+0.armv7l-linux-gnueabihf.tar.gz/md5/9008ac2cbe75388fcd34cbaf523512f3
+Objconv.v2.53.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/ba877ea596965ab1ec419cc46e59d9010a96c52fba8036374e9df78157812926a34ceb3d29e997a5a27d38143d404343cbcf15c1e1b136835235b9a3c8543c77
+Objconv.v2.53.0+0.armv7l-linux-musleabihf.tar.gz/md5/86d14f65fb4f3c0dc0848a9d683f0fa9
+Objconv.v2.53.0+0.armv7l-linux-musleabihf.tar.gz/sha512/bd3a29982a16437936481b7244d237d9ec8d0cc7d78e50cc8f30c7c3f58475ba7a35e1fc0f1d1540d780b2573cfc2de2e59c8dd8d1f90de7b7fbb81a74f526a6
+Objconv.v2.53.0+0.i686-linux-gnu.tar.gz/md5/d22c35a1b47c64a291903f0ca5acc297
+Objconv.v2.53.0+0.i686-linux-gnu.tar.gz/sha512/b431d44839a628b75c189c5188f15462eeaf666a868e33c56e1c5f41041f1903e6daa3c4538b95a96a26c5842501a0055c7f2f12585e4f341ee2c8fb452855de
+Objconv.v2.53.0+0.i686-linux-musl.tar.gz/md5/dd583694f915309c8e7868cebe27f20d
+Objconv.v2.53.0+0.i686-linux-musl.tar.gz/sha512/279f36b7d1bda71ac37a1cd9c9e7776808499066fde94b456b000c187afa173fe7858894bcb4666831af2948c4363a15955740da77c01d63f6007627b61c1c0b
+Objconv.v2.53.0+0.i686-w64-mingw32.tar.gz/md5/42c7264dac94b87556dfb2d469c5c60a
+Objconv.v2.53.0+0.i686-w64-mingw32.tar.gz/sha512/a7eda3e7c5b3b19321d49264c6a7c35c526955ea7fc6b0201062f7e136eea6799a4d9269d78bd84770146d79fa3e49c1251a4568b5ffd1675c952e9544566553
+Objconv.v2.53.0+0.powerpc64le-linux-gnu.tar.gz/md5/33cbf415f36c0be59f34bc2e9b4c1226
+Objconv.v2.53.0+0.powerpc64le-linux-gnu.tar.gz/sha512/71cb373ade619cd431d64a8e097ff18e4979b39cff8b6be71b6ce77dc2dc4f663b25b1ddcd74e8d60834574cc3a5ed512618e6205df757d1f9752d78e35a0ac8
+Objconv.v2.53.0+0.x86_64-apple-darwin.tar.gz/md5/036603ce6fe365ccb838aebb6479ac58
+Objconv.v2.53.0+0.x86_64-apple-darwin.tar.gz/sha512/86f2f3e551bdc0595dd096925115671558970b4750c13d1ceec28fa47a3f61459cb1c4c57a5f930670035136f8cbd6073b1aced0dcc245f3a7815c26f14ad6c0
+Objconv.v2.53.0+0.x86_64-linux-gnu.tar.gz/md5/6a3ed48d4c8181934c02de60902b9f4f
+Objconv.v2.53.0+0.x86_64-linux-gnu.tar.gz/sha512/94f306e8053ac94d9c679a403fcecd0a5d3bbc7f62409886cdf45342fba3fff7c22a30679aec0bd9d76262a3ae753b8a68c243b32e5966f83557bde5d988d80c
+Objconv.v2.53.0+0.x86_64-linux-musl.tar.gz/md5/093aa158739b90fd5f5583868b5c898e
+Objconv.v2.53.0+0.x86_64-linux-musl.tar.gz/sha512/715ea896c0cfbd2505d1ae39f2d8c6950d56f0b262c108a0e378163a209e4a1581e2d7d1f3a7677d489baa21dea9e46695415edf7615ec66078c1a7978cc578e
+Objconv.v2.53.0+0.x86_64-unknown-freebsd.tar.gz/md5/a56c36c004cfb70f7feb7be0b681af19
+Objconv.v2.53.0+0.x86_64-unknown-freebsd.tar.gz/sha512/ea40c235145b00b730ea81e946a0be247a10950a564a3ac33a493c0fbcaa866f039ccc47b852d436b13fe3c52b7436f4903621a8c91e48d947cddfda42e6a482
+Objconv.v2.53.0+0.x86_64-w64-mingw32.tar.gz/md5/291a044f511f9529e9e2425aef8a7c16
+Objconv.v2.53.0+0.x86_64-w64-mingw32.tar.gz/sha512/4c43ed6842f53ab6c081bff2392e65b5295acffc7940caaa8d36a2f845368d37aa40259d0825f0ff08fad2ba58d5accd78dd96d51c8992396571fb96c81b1555
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 5cd8d27baf25e..f1bcf3f322d8c 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,96 @@
-OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/f4ab1aa718db6ab731179199b48506ad
-OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/5cd6326eab751d087b6638acc256a7c5dfc3a8a4be8949f4e2b5b8079aedc05cd8569774da19912fcbcd2dc1eac6a09d72d19bdbeded1198317992a85ccd605b
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/57b8903e05998d293d28e70ee6cbc4d8
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/03325728191f88dcfc2bea16d818c0325b4f42019ed9c2e0533233e8e2a4da09a2c70503632fef2ab55ed12b7da39fdab470b801d34a9b6f576bda509f8a8a8d
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/fe529647382de5693557363f658c71b6
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/77ac56f683a481477fa898d208e67c0c04c1ab8ca9dacb1e4e4ea3795fadb2604faffd1f3fd35d53eecb223c7f92de40cc8b2bdeb9c8a6a1b6a9949965cb9380
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5aea8a00a946273a154110ca7b468214
-OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/a606933bed17e563d15ac206a4a37d38d75e9bb0bef46ef62485dcd32aa5a0e8501dab01f6887a1e60736c59177c6fbf0ec541fa521a9a8de854f44703f337c3
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/d81dc2a42a8c0d87f4ee9bad98579f2a
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f2bda57546f1b9aa1f8dfe9a07b2243cadc002a9ffefbcfdde344ccc96efb07608a55bf8dbb6de34925af03f01ac5487f9fe293befa84edd9a84c01a9b7409e1
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/400ba512f73a60420aa0d316bc24db48
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/927c711c3950f24e6b4c22c6dd92cd2b212e3df9241c637ff42f5b9135e7bee8f3864868aea594c6e8ba5b40f0563d63a5f8634ea3c3276bec35d480601e76e5
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/6a91ea53f3aff17b602b324d025309c5
-OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/8ee85883fcc605c16031bafdd0f1a4f4d4a5957a4f85c2022466232f902a4cf64c284537dd2f237221f7d0c154e2b46200501891d3990e94dcf49a74a66c36de
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
-OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
-OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
-OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
-OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/md5/639643a12f8018e4be7bb1f9f29e57f6
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/0993e1967964874a3f90610745d82369ee70fa4313445391fdcb26c4218c6badb18577c67648d2f77f359b163dafde31a3723998e0b006622effeace506b669f
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/md5/13ec86d62840258c425b0a5a6824a609
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/0bc74dac87b8ab5ea244fa5bcd05baf2968b7041c4eb392ff808d0aae897cec4b3082ef7fecda28aea2662b6cd956a5254212740b1802a947dd3f1e5a3dfe2d2
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/md5/413d4eae7b9c409204ab5fb7867dc30f
-OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/4a484d2aa239d8c1e2733cd9d16bd17549f5048d9958899a4e20039a7efcfd280bba901f3fe63b3b079fd7fae88911f7201a7649a472d47d0148ba8520f350cb
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/md5/7f342d27a9b193b5d37e2ae4de6e4640
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/sha512/2927b18e176e07fe8a05d2eba24f6160680131832094bde9634f0890c1bc3b877c3293163fc65067cea402f3e75871c41b47e4a9999f273e667ac400878aa2b2
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/md5/523c007c319adbdde6e8cd7d3d89a9a1
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/sha512/ddb7a8d67c9430976ad967e21a6b8717c8a5501e8808fabf6e7b2e7298a0ca56049dcfc12214a5a19dbf7bd52d625b0b2b1bcc6b4c1d921c3ee62fd2766da891
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/md5/7dd91db180e59da5f866f73eaccc4d1d
-OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/sha512/ff0ee65e536eae5ece7fbc00a0735349d560a142e025084d64f28891bdd3da5914e976640be354d8ad34fd3d89bfb90461eb95f2426d5e292906ed4ead1cfafc
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/fef43c3fed5ed7e9fdd9c7757be6b95e
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/b580c1da073ed94d1a259183c5b2a6896a746c5e88c83e2df57fea801f259cb49f99b3468bbc5c1d7dc6bb84f597843bc3c383c9cab7608dbfbbb15352fb1012
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/88db137baca7ce99e58ff3b13ee73644
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1608f3ee3964df833db9a1277fb9f69e3bb1d328a27482ac419e08520a51b2cb25501cf8986b2ff617bc04881984ce73ecd2b55b0c99afb5cb28f32d24d89052
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/32c1ca252dcae7d02bcd54d2b00a4409
-OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/401126557d3072d965327aa1440eeaf22fdfb1e5265c28dca779d81b94ababd1d487603d55e384f2bac305125c9ed3826f0bb7be99af20b0d18a674a8069ce5b
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/3059083c8293106486a0f28a3564e499
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/019bb4bc71d7be14f040b36d1b44f653ee89aac680749a6a3b8b72446dffae185dd3d8172ca7ac9aac45cfe564c0fc6cf3221a6f8496b9ba10d04ab44d897b65
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/648167f83536f32921f1208d09cc8f47
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/084346b93a99671967433f4ac6548d7b828aa65c402bac7e68aee78bbf75e5cb06b22f42a7d4876fdea3e838162278ee3fcf011fa18530c8d8b0e853a4c6440c
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/9796916fb0acbea2e93747dafa96d496
-OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/7c3643c3669fea262907bb5c0f27b492adfec910716498a0bd992d705a544b21023d77801f27c967c07be9d5b30bbd936137c8f59f61632fb16cc0e1f2efebd1
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/cbf9ad429547ebd1a473f735b6c65442
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5e98ec17ee35624bf0a286a2dbe01f5ae4fa879274af70b218080c537a325a92fe76331b746e98b3ce3a0d127df2c03f522f554cb43c169a2b7b1890a9a8a81f
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/28792164b6c34bc627966e338221ff34
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/433dcec661ff2459740c4d1e72d766549135f6f41a7ffb488502d76751fcb00c3d75aaa0e3db182441ef6b5e3b487a9df3e1b8b979da3681496f4ac6c6ce819b
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/7013b806bfcd2c65582df5f224bd7d86
-OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1078cf5583d158af5d38690acf913db378195b79b4743d977e7654c246fecb0ded4ebee96d89f54c5ec5f04af1b9858bcc0700251ccce1bf7c87926ede069b91
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/f959117d5c3fd001412c790bd478f7f6
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/e6fbe9fe1b7a92e10760d2b945bcc2c1c5e8399d729fbbb771764e7b72856707629123bc2d2fed2549f551776f8f0a737b0f414ffddc820a655172d933c10af9
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/af04d6bd91df5c9bcc63fe06c88a4b79
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/0cd4972d0a44505f9d8d3958bd20e491c986f55f5f84000ab534020dc8d39d788402355fa51bbd521c8c1bf6884d9d35c1db156bd106a98fbde80c104e8dd5a1
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/c5e6138630c5b616df1d045e1c388710
-OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/a54db7cb7e28dd792bd2c4f33945e7d99db1ee9a620bbe77a21cd7fa7f4cddc5c7744d27116951582f00223df09e7dc2258754032cebd57f61a723762743d3fb
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/7d407633f4f59c305896f9132c098cd2
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8a04d46b6dc2eef87d6c4ac43bcdacf5da2b1669bb829c42f07f7f73bc0dba35a6e48f303d1e9cb951062fa2c3a4cce894406c5551c2bac7f57f02d2f92122a3
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1d6c7e0b6f3eeedb41ecfea9881d0bac
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/9152b7f584ecc3f06caf0eaf0a496d9e9c16afe41a4750a9bcce0477cd3cabcdcec5c97c24fa3fba03d603148c8a3dcf7199c171abe10121aaee2f8a68b93c91
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/fdd5c9e5f746403f7ba4789d8d8c47e1
-OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/2bd980e1e2021b32f3455fb3fdbae407fb672074ca798664c77e063ea6a7503b625eac7655c8cf25307afbfd9abaa64af52fbb3ed811ff8eb6515e3edcf26b1d
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/8c69d9b7b6fbd0896f839c8979c35a81
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/d8859f485fa35b33be167dd45f1fe87696be0b12f27dd041087cfbb9df0da94bb726fb9c5f89162405de473969013e3a6a11b0520236db7f5603b25466ebf0d9
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/40724e1d694288f930a15860650f37bd
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/b7bd75b57803da93d19016f5fe63bd88357aa4e728fdde026a55ab2382957f5a82254b12e701ffb19085a6d1ecc0c0b0c685efb6fa9654e7537f146087cce00a
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/d78352f4e9baf1225aa135b03da9315b
-OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/aa4d3b0972429af7376e80eab93375ea0368f2f3a31cdbacdb782ff32f7b1c708c5e2d7f1c30ba5b8a7c604a3a7c27a7601fc7f09c8dad2b6dbc54ff099fc0e2
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/dbf8b0592102b01de80df0767f681227
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/9bdf9ab9c3ff36281fa501771c4ed932e8a481ffc4cef08725b4877999bd320c99f9c756beba7143050705323bdc0bea150ab3a11e47f3f7c60f206595c37b73
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/feba9f9647e82992ba310650e3b8ff71
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/b6c98a5a57764eef4940d81461f9706f905d376d165abdbd0fafbdd5802e34523ad15e6ee75a4550555b7c969630c43438d6cce3d6e37ac95e57b58bcc9d542c
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/732544eb61201b6dd7c27d5be376d50d
-OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7b68cceb0bdb892ae74e2744f2a9139602a03e01d937188ca9c875d606d79f555594a5ff022b64d955613b6eb0026a26003011dc17382f019882d9c4c612e8e2
-openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/md5/7ccaaaafc8176b87dc59d4e527ca4d9f
-openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/sha512/12235f0459469b483a393844c228be5ad4bc60575bbe4b3238198f2480b7b457e4b0609730ce6d99530bb82e1d16fdd2338ceed6d28c952e6fff0da7f571f863
+OpenBLAS.v0.3.29+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/227fc95ef10e30698aade797ebd8b685
+OpenBLAS.v0.3.29+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/49a932f0c1c2d1087d20a3de2940733ed6a944284e1cf2a384a7401c5ca6bd90a35e9679b4f19bac176923aa170427e7514a47fc16261413ee03a59bbb301bd0
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/77acdfde5dc6f05629f3fb68a95b78f8
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/f28187213eac2d481bc12263fe13fcb35f4771084bacaa42b0b149ac15cf89d033910519ecc5cada77915a48c95a2de3ea4a476c0c6bc3f154e7f2ceb4bf3ffd
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/4fb2bd80d3e4ad8ce04fa33c9a2aaa19
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8cc2aee3b351cc5c78e494efededdf98f65ce8942453bb3a55c90e0822ddcc07bc7716d0746bbc16701eca458b7a7aa933e9363f71bd56788c9fab36bd9bcf6d
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/3a3ef97dc80dec3d0debade503ca2232
+OpenBLAS.v0.3.29+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/53d707f9bf57c1a19279f0146e767d779280f922ef621b5f372cedc018efb2798adabbd762324819f342d0fd98ec17c68badc50da7b6e9aa3e57c3a3c045dab2
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/463cb6b46091f4b4b4f2535b9f38f11d
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5a5a7d5a7ca5e619d5af9bcbab7cfffcb4b7954005cb4a2d03f4cd0ef29c95707e830ad0b0303d694cace557cb1e9973c0244ae1f635249a313fb9f9cdfaacd9
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/699ca0247ec7cccec0d9d2801b5a35a7
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/3bb2926d2d2a43c280bb947063dd74b65194118edbd99df820bef56a546648ed903245e0947ebc31765ff43784b11349bf86cd592c78d143c0627d692162b344
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/2ab069e5abd5014495b849bfbaabbd3a
+OpenBLAS.v0.3.29+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/fd10e4ce326c524d97e69e50342ab63b8298c796faab8f4512772fbb9c4ae1ddc85d54643c868f3b2dc8084af974430e1f8751576bedfdc88af2ba0d2affba1a
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran4.tar.gz/md5/ce5d04e041e9447529ad8e043e45895c
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran4.tar.gz/sha512/eaf521d3957713e9d22b2c0b991f5eb846096891dc15bc42ad0817c32e6a1343617d28afe739dce0e39c185d022d3cdd44db2610635691990003b1b0a29f4657
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran5.tar.gz/md5/00b3a4433f93a56fa8b0f17acc254865
+OpenBLAS.v0.3.29+0.aarch64-unknown-freebsd-libgfortran5.tar.gz/sha512/a9845380778ec15642d74a46dfa65f8a325929f8ec8d61915941f6e228bb1ed29310f86f20ec559fdc2d5dac98a780f71a1b3116676a34e18ee7c0cb86cb7124
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/1be6fa7ef684733faab744fdec6c8dbd
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/7c7803a0484b8c8e343ff5049e52fe81b76e43f0aaca7a5ad0134079147d2311cb5b159738486dcdd7ec69eb42cb0eea738741401179499a53fead2fbd8dba3b
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/451dad687dd26a299e4a44db37a8db2a
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ea73ee91896203566dd7510303c73d77189afec913ac1de3b7c7935dc2c460f87c83a8ddd272d9542b619e419b9392479f02540ef1c8d3daa528bf05aaf5c3f1
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/07ca32f715981570f2e1a5ac6721e569
+OpenBLAS.v0.3.29+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/c6ece0dac375fd66a303ca6f503e46f78472a59dc13381e8462e3e9c29e133cbe87ee77f6144a80924ae286162620c4395f5217e4f9ba379a471409085950427
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/49ac07fcdf0d7ce221051d089b408e05
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/6c9c379473f1bb5f202ca183c6ef4d43b442c867e67712e6ec2936790c282143c1edae0a1385e366f729c952e02fca13604f6b51d778dabb28ca7be0f359281e
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/87e3dea9e115fbc9a0c7f64020c41f74
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/63a37a9cc882562978460e1e0f603177921a64ece7d4050b0b7a584e05d80f58314e7f8e988ea5446945d7009620c4f746ce547fe7dcb77a0707d54fd830983e
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/8c85e7ce9bd702438c548bdae54f5c32
+OpenBLAS.v0.3.29+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/3dbaa326944d79688fa167c968a7e2660bf3b94c2e052755cc8b1ede853c02364edb7fa974880c37c60ee6e6f84c75848eb4d999c5c1e8881441191dbab056e2
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/1be6fa7ef684733faab744fdec6c8dbd
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/7c7803a0484b8c8e343ff5049e52fe81b76e43f0aaca7a5ad0134079147d2311cb5b159738486dcdd7ec69eb42cb0eea738741401179499a53fead2fbd8dba3b
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/451dad687dd26a299e4a44db37a8db2a
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ea73ee91896203566dd7510303c73d77189afec913ac1de3b7c7935dc2c460f87c83a8ddd272d9542b619e419b9392479f02540ef1c8d3daa528bf05aaf5c3f1
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/07ca32f715981570f2e1a5ac6721e569
+OpenBLAS.v0.3.29+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/c6ece0dac375fd66a303ca6f503e46f78472a59dc13381e8462e3e9c29e133cbe87ee77f6144a80924ae286162620c4395f5217e4f9ba379a471409085950427
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/49ac07fcdf0d7ce221051d089b408e05
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/6c9c379473f1bb5f202ca183c6ef4d43b442c867e67712e6ec2936790c282143c1edae0a1385e366f729c952e02fca13604f6b51d778dabb28ca7be0f359281e
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/87e3dea9e115fbc9a0c7f64020c41f74
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/63a37a9cc882562978460e1e0f603177921a64ece7d4050b0b7a584e05d80f58314e7f8e988ea5446945d7009620c4f746ce547fe7dcb77a0707d54fd830983e
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/8c85e7ce9bd702438c548bdae54f5c32
+OpenBLAS.v0.3.29+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/3dbaa326944d79688fa167c968a7e2660bf3b94c2e052755cc8b1ede853c02364edb7fa974880c37c60ee6e6f84c75848eb4d999c5c1e8881441191dbab056e2
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran3.tar.gz/md5/86834236dee3db3affb38b8cdcf59681
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/6731b4ea22a0f3d25f9d041e2baa6d66f1027dce49931a334a33711fc4c6de5da368274c9328618ed78158855c5d38524b917447d1aafb5c551934cf982505d2
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran4.tar.gz/md5/c63c2fb1bda01456d99590e9aec3b45f
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/437c260499f4a28db9efb4bbdff31c0f675f3ccef1bd48fd2dfbb8c8897fc75608bd7247293bd3eae129b133cb05c3c8150dd19c243faa09b6506688f57c633a
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran5.tar.gz/md5/376567d56bf4314f8a4adcfc4d1baa66
+OpenBLAS.v0.3.29+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/c4952874b19af4fd0d5541999d07094f7e7e983124964405a4756b9adf619172b7128e11557e64a80bc4eadaf76c783609a75f25ccfc44fc4f181886a0c8ca18
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran3.tar.gz/md5/8f7abbc6d5cefdbefb2b9499ec8874c9
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran3.tar.gz/sha512/b8c39674df9400efecbe4ac740f0c3ef11a04dd852f31774d63db3ca6583a21c8e0a0b80aa4e7b82be7a8fa3de38892d4fbca34244acef7fb49e8ffc0e1eed09
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran4.tar.gz/md5/6b0f0544fe45de9d2dea946c7f55cc40
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran4.tar.gz/sha512/8c21df39a8ee99614ef0880706c1497d032f68dfc332cc5ee111f69bfc818db4896115a964f16115ac49b01b31713037c905792d9586dd05471efdb21dd0be88
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran5.tar.gz/md5/aa343048c35c5227a4bcc37f25ddfacb
+OpenBLAS.v0.3.29+0.i686-linux-musl-libgfortran5.tar.gz/sha512/af6c9d15d9d5a4901d228522d2e20da5276f1bf35d7f34648697ba7a39153a9152dc17f5f0d360593e733ef3e3317df29581cb86fdd9fe8d6e6093592a6240bb
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/8595dda5ee1f15b2070d8ac20077f389
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/df7d7ad68b47f8865d01f6edd6ba44587c6563ebc4a1900f92210b5117fc7c581e6145f95e10fe7a3db48eda9805330073c8cbeec7eb8a19978ec33f2528cef8
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/4e67905ab599f24327e9726f70d261cf
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/13ba78e98d7c2cda62a6ca9226365e90fa8a5404e4006ae5e49030b314b762a37d78977f14c72448c844e68a6b83ecd679c60362fde023c9052b9b8597d7775c
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/e78c5005d9ee57ab464fca86c6d6fff1
+OpenBLAS.v0.3.29+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/8ceb9527461136cd4f4d02f10c241f5e7070991f73c974389acedb1d9d7be4bade592bc021ba1001c5ac148ea580cf8355fb89c88438820bfa665bf3e72392fa
+OpenBLAS.v0.3.29+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/421d93da4cfab0df79569e09dff1015b
+OpenBLAS.v0.3.29+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/351174d948626ad36daf40c45672cd1ac40bbe4af25c28332fbea62a7ba89188a7d33836d327d31ce99b9a9334c6053366d33b58f588355c2818e332e46b34d0
+OpenBLAS.v0.3.29+0.riscv64-linux-gnu-libgfortran5.tar.gz/md5/34cc0b3260d9471bc8fb32005e3c5043
+OpenBLAS.v0.3.29+0.riscv64-linux-gnu-libgfortran5.tar.gz/sha512/5eec279c5eead55d099d8db4a75dd4a3f2bcbc8bb22b33884a89d678e4eebf87c6dece1aa4c24374d0162b35f376648a473c2d6d7866583e61016e37f4262820
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/f921a0ad6ebf91f444cb8d927886e573
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5cc98edf9fa8ba8981ce78b2595fd27645c783561ff19d0fd25ecc927f63492437a4b9b80d5caf51ad619b7ca5d24cb43e153156921f9f03c64741014b686196
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0126b52c134954e63ab8f9197afebd7a
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/6d1e37009e6831a26f25bfd3e95dbcc841ee50a3f84dc4355d7fd528cd74a400138955558306345e986a732d0d1ef9294c4f5be457d05119a8e1e5851cc8ca20
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/e8c7bd786672a9caf989dbe4fcef896a
+OpenBLAS.v0.3.29+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/2e708fddfa8e5821d2e44bbc00a86df83b09cdfc0054d7c2bbb2a8de52ed80c95973e6602048335a60b54be1baeb617121b605644daf50579b2044d0c5766063
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/b1efd957a2a63f814168bd318381812e
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/097a750b7f381089588498e52a2b07a67202bfd4bc2e38f5abbbeb372129e392fcd53beade2fa7cb60ef0038f2baf61d57fab40b5585806d3ddb1fcdad73bbe3
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/c3560828f503962c6ae94135c4f00ac5
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/1ad514172e51a5d3eb6fea03182e3eb9c6db99d9d11c430e3d8542a9ce0f5d6967e623b9c0951535b683210ce0b02460358c67520b06363594f6063f8f012396
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/07a9c3050824bbc6a96efdb333fff0ea
+OpenBLAS.v0.3.29+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/b737ab1fc8c5ffd1494804c59f8fd3e5d3d8a063a89fbbc29cbd75d43af233ddf77f63d0e514059164517f408ea340ffe95c020a7c696af8c52be3a7259922ab
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/9a4a828a1b58737c79eb170c94021c52
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/785443a38cda87a63ee4268cdaa51bbc2c4662de27e0695cd7e21ffe55c3bddb1fa1a399edec39c3466f2ea0bd5ce727daca2eb381213059419c2e8371b5a733
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/cd4afdd6f6ba06c7541e7124316802b3
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/55796fdb52e1ac92750dfc2233d3feb37b53920b12024be605bf6c7322153c4dbeb650f16d6def4f0fac685733a04a1c4cacb1fc4e562a27a00b4f44484a4715
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/8cd55ac7a7f0a7bda80b44171793718e
+OpenBLAS.v0.3.29+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/728991a4c39d691abebac3ebbb2dbe093f3a4acd2d3aefb5c7c08bccf0dc1fd5aaa24de6367961d278d448b76a4ddacab36b7be15128f7ccec5049eab83828da
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/c2dda93a61e02812831b6a6e33f7d2ca
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/bd62e44f266b834c6dfab068841506a83eaf510eefbcf8896dfca36671321430293dc251885af108d94affc5b193919e0e29c965fef3ce6d994df37324aef013
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/8cbd64d2ce4e3944e702696839a4ad3a
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/3621dfa5cf8bca62bb8f2a049acdc0ed4e02cb2b5585758e6e1173e61b3a5f0e1655a10f2feb2f0e70a098b00181d0b24dcd61e1205324d436b712f58e58df5d
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/476f1ebfb93baad6fac778fa00c4f99e
+OpenBLAS.v0.3.29+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/06aa18da572b0904e5d8ec0823626d0af02a29224aba98efd43d8fbf4636d2625ece9f88f9a86d2e493f016c106f2ae71422191afc16dda2b26bbc81eb09d901
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/8c55d04d9def74f6bc2cc0d03b764975
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/d6196a339a263d80c05b94596ec5acfeff6e3ce93fafee348a864f760aa1239aa59ee294cab29fd730dcf7974ac6dcb230433184be093612bad3bc3edc067649
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/8427f098a44457ba65b21a16439ee6c0
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/4855321b2a16d55e1c6e830e33d0a199286002798c0f33c7f594a55626b5a502df94c172de4fd0a38ab6ba92f384abbbc3ef06123c3115a3f290f50a9d43ae9d
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/9d1636bb7500d9ba15ed703231f8def2
+OpenBLAS.v0.3.29+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/0b3530fd95e01d58b85157d7bb75e44ee7b2f0c5a912920ff0763f404e1ab28d16a624463f3f20241c7baea57e00fca3f896d6e0befb6a1c9e5ece4264b87e35
+openblas-8795fc7985635de1ecf674b87e2008a15097ffab.tar.gz/md5/095d293409140dd8eee500eb92372eb7
+openblas-8795fc7985635de1ecf674b87e2008a15097ffab.tar.gz/sha512/7b10d4c2bef68159e0a88fb6d4fd0ecca17b4c6394479e8f838f5078d9d5acef24c6bd44777d43c03859c952d4612d76b57aa0bff367b197920ea16eb3839144
diff --git a/deps/checksums/openlibm b/deps/checksums/openlibm
index 4c8ad913fc58d..cad61fd42cf94 100644
--- a/deps/checksums/openlibm
+++ b/deps/checksums/openlibm
@@ -1,34 +1,38 @@
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/md5/cbb86fb881fce48d837e40017272f7f5
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/sha512/3fc7753ce12e0012b42e469084351ec97e20c1761b50add48af4b4d92ba61b9db4a015243f31f7ec9bf322a9a4e098cffad31cd4501424568bb49fe973b63a35
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/md5/416b30b38b91a76d1613753ec85ac4a9
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/sha512/9ee5821ee83cd5e29c4f338e0076fe3e3705925e8556abe95f0356151ae93f23be0bbcde42cbf6b382e8c29a201959cb31be349ef3d1447e2d19a414a476fc55
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/md5/b170fc0058803377d7c4d7d0c9e4b954
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/sha512/be311888953887745a2d64eb3d1d5755e2f37e9f46558c6f6722520c503ee23d3470e97d7bf28f0a03915a398c31080e789d6e1287a2b743b6fd3f37b3a2911a
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/63f60d2f13acc6fd2ba3854a8ecf2b0b
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/1853a8346f460cf7e26efefb27c36440976e40f000aefc22a81bb69bb25d07675a19f4b64c4dea8fedaaae76925467739cee8cd5743689ae55512e57dab54286
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/5b410e9611b188f34fcc5314c45d2297
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/ecb2fd14728c40c7e3d2cf7c4f1dc06568f1dacc60677218ec59011cd913cab173c33db1c402a8b27b8f0556ca66667ebc033130222617cb4f5d9d8cfe7431ed
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/0fc3732640b6bfd52759e74be75e2290
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/81214ec825d33e37c9e889bea054f3aa72466e5fa585356247ef0ec4653b548f7836219092a8c7f0bc3c694e97582012cd026325e0b1c1a6fc113c461dfe49f7
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/7ba7a7f9461b43b8ac622b9fa9c0489a
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/e088f806f9fad70b2e6ea28a39ffeb083b4c1c215b1cac73e848a06cb19efcf3ff100e4d401ec2a0ed225ecba6dad115f5d245a612a39c90334a753bc79947e6
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/md5/4a2eb776682551a25bf1d27e9d8b645e
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/sha512/716808c4a2a8c06439072a39db1f4b93a171a2f42e9677cb7f3eba04f204bc181f96c915ad8c74141952eb783cd82ecf3804ea60d830e3f5d9b88bfb4924223d
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/md5/1acd5b790b7b8d1c40c6b094b99fcdb6
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/sha512/22c28a5c5e9542ddfb23297523b56e0a964bc322d252d5681e763c28d4c876dd683d3456297e385f560ab4cf378e5848047aec6cc934850fd0a4df0ea6967b44
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/md5/8e974b3bafa9dfe1cdba1d31049d7e85
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/sha512/df9e5250dea575341ec1a40d94e3485701690542bc7dfede0504c04fdb7f3fd272d88debdd6546d8644563fb244373b5f4e214da1d0b0133db5b66cdafbf441f
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/83f68736e911b7c700bf7a8c79cc48a8
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/b879f81d0c02f457310efc08269a7012fe6ed479d33bf79830e48dafce03976425566c5a210ed4833e106848eda038ae531f0c956971b3139c60002a5b1c1325
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/md5/2cd7845dc3d5558cf77e8b6faac4a659
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/sha512/f894c5b7541ebd4f652cb0028b2d367db6af13258d5a42722f19e0ac3a6bc438453e5c2bd17d8b491288c93796ba638c4a3a247f9d33abbac392a6db9169cbcb
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/md5/eda96ecab33dfb9a015375813c41d14a
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/sha512/641d35dfde15b0868c4ede7d2210ac136e4ca7bf7a969623cbecd792eb09e60824601a6178dbc2c8e4d49523aa105956b5f63566b789003ec736164a8fe5df4b
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/md5/8d8e6ffa1ad9574bd8ead8b99754e122
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/sha512/a66102e69688fdda1c1a51ab07697db0e183b6def21dfed411cd6a92e6c23b22eacd6cccab16e43b86e318d967f81d43dc26bc778746186965bc68c65c7e87a0
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/7634eb65eea08ab48a9224295071f058
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/f2c0a6d5113de911aeb516308b1d0ea71a1d61f2ce54c367670a6567444e0030babd66546489e288891678aa6613f40fd0182261de3244f1aed1085c2a32e81c
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/md5/9de4a420caab2cb53ddf86eb0241885a
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/sha512/928675df75d6c6f1e1acc46a2bb554ef120d74671d23e8682adbe05732f668401eaebd98d534e54f7f34e1657d2e1b1d19e18b3822faa891cdf06afd0c3ee56b
-openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/md5/19408d70bf042a109e1c267a53740089
-openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/sha512/9597fdcbc4af8369e6eecc3f8e86f251661cc64d236578f3ee8a6b39e77a47951446e1a0fe1151513da153e7ed17bf39aa5a36c32153d0d0400232bed2839e22
+OpenLibm.v0.8.5+0.aarch64-apple-darwin.tar.gz/md5/5fcbd746e90712e396e76dc4e76724d0
+OpenLibm.v0.8.5+0.aarch64-apple-darwin.tar.gz/sha512/f4ac2bc38bdc723384b67119daa2974fb43da34b2e45cea2029ea48f92c84c4cad6dfb43521b09a1e89ddf8c5b8cc22a38fa4b78ba39ac7524fd6bd1ba897aa9
+OpenLibm.v0.8.5+0.aarch64-linux-gnu.tar.gz/md5/4d1b4cd566805b5179c5ecdd060da473
+OpenLibm.v0.8.5+0.aarch64-linux-gnu.tar.gz/sha512/a9fe1a3d2e3898c017eb8615b2f3dbb514995ff041ac964c931c99c60d8cfe4eab7563a9cd65058f42f83c812f33d998573a7c5cc56a2e3960a4657e459ed321
+OpenLibm.v0.8.5+0.aarch64-linux-musl.tar.gz/md5/413be59af62b3ce0ebafeca093e3179e
+OpenLibm.v0.8.5+0.aarch64-linux-musl.tar.gz/sha512/7bd76373e047ba854066af61f1c56b2e3a4d28c266228d7b30f596eadbaec52b070548ae60d41840c425ad5d0829c6c0cdaf326f2f160ed7508877ab5ec1a4b1
+OpenLibm.v0.8.5+0.aarch64-unknown-freebsd.tar.gz/md5/80736f9022c695eb1198e0b591a8fa63
+OpenLibm.v0.8.5+0.aarch64-unknown-freebsd.tar.gz/sha512/c633644578265e7ccc259ceb0442457b8c09290b4861b66c86dd6be7b30c4e394e70728142798097d6fe3afcfb4d9d1bd7ef58513fe8eed5684a4fba51bf185a
+OpenLibm.v0.8.5+0.armv6l-linux-gnueabihf.tar.gz/md5/8fe0900a318393a290907f016bc654c3
+OpenLibm.v0.8.5+0.armv6l-linux-gnueabihf.tar.gz/sha512/167100a2d46e68462ef9a66915ced881d6358f05337bd38f2f77176f41cfd5be37e3c5226dd5d7d59147bd3e1aa7fb0893c1c81e9516134d3ab663b5752c4969
+OpenLibm.v0.8.5+0.armv6l-linux-musleabihf.tar.gz/md5/e8566719387984604f19dc5f9354a783
+OpenLibm.v0.8.5+0.armv6l-linux-musleabihf.tar.gz/sha512/532dd2b764fa15f7a838fb14cccafd2d4fe8fa4a132ea8394479a719c7aee11442f1b8a18e5d4a26ca820fa696d9d2afc7f5ec63dd96fa3b6763cea72b7026c3
+OpenLibm.v0.8.5+0.armv7l-linux-gnueabihf.tar.gz/md5/8fe0900a318393a290907f016bc654c3
+OpenLibm.v0.8.5+0.armv7l-linux-gnueabihf.tar.gz/sha512/167100a2d46e68462ef9a66915ced881d6358f05337bd38f2f77176f41cfd5be37e3c5226dd5d7d59147bd3e1aa7fb0893c1c81e9516134d3ab663b5752c4969
+OpenLibm.v0.8.5+0.armv7l-linux-musleabihf.tar.gz/md5/e8566719387984604f19dc5f9354a783
+OpenLibm.v0.8.5+0.armv7l-linux-musleabihf.tar.gz/sha512/532dd2b764fa15f7a838fb14cccafd2d4fe8fa4a132ea8394479a719c7aee11442f1b8a18e5d4a26ca820fa696d9d2afc7f5ec63dd96fa3b6763cea72b7026c3
+OpenLibm.v0.8.5+0.i686-linux-gnu.tar.gz/md5/9580d34e69d6067427b9c33db631cfd3
+OpenLibm.v0.8.5+0.i686-linux-gnu.tar.gz/sha512/46934f82791f69ac5f5da0dab7dcc6e3e9a4577c3bb529e9c0519c38f140c7b54517c55ff3579cd4ed4df68f0863e006aa98e51873f1dab452ce9f853996429a
+OpenLibm.v0.8.5+0.i686-linux-musl.tar.gz/md5/66bfc9611d04c5d609e7824cb076d24b
+OpenLibm.v0.8.5+0.i686-linux-musl.tar.gz/sha512/1bda2395d44c22aba3d1aab2b08ae06f763d3755037d454aa73f8e8134289a1ab5d65862bbc5a17a7a6b9f2918eb87e926b21527ddc4471e2ea20d605ba14e2d
+OpenLibm.v0.8.5+0.i686-w64-mingw32.tar.gz/md5/0e97311b2f08b57d79085635f01ccced
+OpenLibm.v0.8.5+0.i686-w64-mingw32.tar.gz/sha512/ae061ea406c06969332af58ed6fdfce2825326d771d30274d90775a1709b0361b7ca1dc7e6b0b76b93e4dd7a81d1842510a2c835251ee0a0978d6c839d96070e
+OpenLibm.v0.8.5+0.powerpc64le-linux-gnu.tar.gz/md5/8ecfff7db76eee29591a654871e88855
+OpenLibm.v0.8.5+0.powerpc64le-linux-gnu.tar.gz/sha512/af03993b162316dd581f6ba5d1c23bca4c26cb22356ab229f326c42e111acbdf7ef45c9ad05894fe2d68794a63670cf89888653f788192a38b9255ce4bc72e28
+OpenLibm.v0.8.5+0.riscv64-linux-gnu.tar.gz/md5/69e06de135940666791c984941e9c4ad
+OpenLibm.v0.8.5+0.riscv64-linux-gnu.tar.gz/sha512/2ac84deb7eb80a6a6237eff6fe861fd2907b3c95d1a76366dea062f3f35228dbc67aa40bd982e646508b4ff7cb6ef029111e2c0325039e60679800d6c6886be5
+OpenLibm.v0.8.5+0.x86_64-apple-darwin.tar.gz/md5/bd671ab9fe01835cab3e42e7cfa790fb
+OpenLibm.v0.8.5+0.x86_64-apple-darwin.tar.gz/sha512/8bf2e66df17effc1e8778453904ffc20127f785bf096873289e8fdd8b17069ca844faffbd9f7621b87a7cb0a0051037eb9402360f2a03cf8794fbac8f7719777
+OpenLibm.v0.8.5+0.x86_64-linux-gnu.tar.gz/md5/df7fab134fbce3b625e9a82376f23e79
+OpenLibm.v0.8.5+0.x86_64-linux-gnu.tar.gz/sha512/64d07434e0db79833f84a2225838456eb9532617d377a776b3a534a908b1673bc4f890903f95350e4045e05c29539d993a18ecadeb879761e279ec3947f74390
+OpenLibm.v0.8.5+0.x86_64-linux-musl.tar.gz/md5/ebef6bb7651d116b397e035f39adfb1b
+OpenLibm.v0.8.5+0.x86_64-linux-musl.tar.gz/sha512/de9036073e5dba2721b4119ecbbd21a0c9f75b65aff9392b7e88e464da35b97135d62404477441d0dadd3a2f8d49f1082291b35bf4b626fb1096d36d401980bf
+OpenLibm.v0.8.5+0.x86_64-unknown-freebsd.tar.gz/md5/1115497539f00a37af18aa6516d52268
+OpenLibm.v0.8.5+0.x86_64-unknown-freebsd.tar.gz/sha512/71a2c06d141b3671fd220f2d88d72e845848b6d2b08a7b3a6c4bb1d5cc27cc450e1e681647bb583e7ed6375d5a70748401e95e61dc95d7808f33a9aa06755337
+OpenLibm.v0.8.5+0.x86_64-w64-mingw32.tar.gz/md5/b6b5335f4c83f7ebf0f74cf753358f00
+OpenLibm.v0.8.5+0.x86_64-w64-mingw32.tar.gz/sha512/e8351ddda305b757f337bb7ea26c441968843b23861676f0bdd7bcf83bb3969af790d4112307d3204eb87fac044dda9be305f349700ebe9ba2bfe3d6df24fde8
+openlibm-db24332879c320606c37f77fea165e6ecb49153c.tar.gz/md5/2375dd448e77e59152442a4b33abda01
+openlibm-db24332879c320606c37f77fea165e6ecb49153c.tar.gz/sha512/36054e7051990d04913f054a0542e2e104273f61308e9a442c2dab3dd392d40c03f264fbeca93c4296218eed85dad71028989a225088254013d752f4407d57ef
diff --git a/deps/checksums/openssl b/deps/checksums/openssl
new file mode 100644
index 0000000000000..c973f592861f3
--- /dev/null
+++ b/deps/checksums/openssl
@@ -0,0 +1,38 @@
+OpenSSL.v3.0.15+2.aarch64-apple-darwin.tar.gz/md5/d11d92e6530705e3d93925bbb4dfccff
+OpenSSL.v3.0.15+2.aarch64-apple-darwin.tar.gz/sha512/e30d763d956f930c3dab961ef1b382385b78cbb2324ae7f5e943420b9178bc2b086d9877c2d2b41b30a92ca109d7832a2ae50f70547fcc9788e25889d8252ffc
+OpenSSL.v3.0.15+2.aarch64-linux-gnu.tar.gz/md5/d29f0d3a35d592488ba3a8bbb0dc8d0e
+OpenSSL.v3.0.15+2.aarch64-linux-gnu.tar.gz/sha512/67c527c1930b903d2fbb55df1bd3fc1b8394bc4fadd15dd8fb84e776bae8c448487c117492e22b9b014f823cc7fe709695f4064639066b10427b06355540e997
+OpenSSL.v3.0.15+2.aarch64-linux-musl.tar.gz/md5/4f5313f1f18e29585951e95372a7a0fe
+OpenSSL.v3.0.15+2.aarch64-linux-musl.tar.gz/sha512/48007a1f6667d6aeb87cc7287723ed00e39fe2bc9c353ff33348442516f1a28961985cc4a29a2a8f76b3a7049bd955973562d7c6c4af43af884596def636f7f8
+OpenSSL.v3.0.15+2.aarch64-unknown-freebsd.tar.gz/md5/5b6041353197bb8f75b39ed8f58cf4e9
+OpenSSL.v3.0.15+2.aarch64-unknown-freebsd.tar.gz/sha512/9be617d51fdc167085887380e720e6baf8e1e180f455b297f44d0bc0862fd490f015b5292d952d4ad095750cde796cc7dac4f901389b73135cb399b3a9d378c1
+OpenSSL.v3.0.15+2.armv6l-linux-gnueabihf.tar.gz/md5/858f548a28e289153842226473138a3e
+OpenSSL.v3.0.15+2.armv6l-linux-gnueabihf.tar.gz/sha512/f9385678fca65d1fb8d96756442518b16607a57a9b6d76991414b37dfc4e30a7e1eebe5f3977b088b491216af4a34f958b64fe95062ee9ae23a9212f46c4e923
+OpenSSL.v3.0.15+2.armv6l-linux-musleabihf.tar.gz/md5/c4e52ecb4f9e24d948724424f1070071
+OpenSSL.v3.0.15+2.armv6l-linux-musleabihf.tar.gz/sha512/12f9276c68049026f2741c7d97e62d24525e5e832911546e1ea3868362034e6384304d749730122edf828b8c5573084055d59cc0bd75bda32f000ce630837c2b
+OpenSSL.v3.0.15+2.armv7l-linux-gnueabihf.tar.gz/md5/767d3f3047366ccd6e2aa275f80d9f6c
+OpenSSL.v3.0.15+2.armv7l-linux-gnueabihf.tar.gz/sha512/17700fd33c221070a7dd2db79d045e102591b85e16b3d4099356fb6a8635aea297b5fcef91740f75c55344a12ed356772b3b85c0cc68627856093ceb53ea8eb3
+OpenSSL.v3.0.15+2.armv7l-linux-musleabihf.tar.gz/md5/3ef2385cb1fec9e2d3af2ba9385ac733
+OpenSSL.v3.0.15+2.armv7l-linux-musleabihf.tar.gz/sha512/6156e9431fa8269b8d037149271be6cca0b119be67be01cfd958dabf59cdd468ef2a5ebf885e5835585006efdedd29afc308076283d070d4ae743146b57cd2b1
+OpenSSL.v3.0.15+2.i686-linux-gnu.tar.gz/md5/e62992d214cec6b1970f9fbd04cb8ecd
+OpenSSL.v3.0.15+2.i686-linux-gnu.tar.gz/sha512/dfdb3d2d1d5fed7bf1c322899d6138c81f0653350f4b918858dd51bf7bcc86d2d04de824533925fa5f8d366a5c18ee33ade883f50a538b657717f8a428be8c60
+OpenSSL.v3.0.15+2.i686-linux-musl.tar.gz/md5/186a6bb8055ce089ac0c9897bd2cd697
+OpenSSL.v3.0.15+2.i686-linux-musl.tar.gz/sha512/f3c8d608113e9b0e91dd6af697172a46892d4a66572e35e13ad394397291dded3042667c1ec4fafe051778e71ff56a876dc3e848a2b85cef9f925ef3969ab950
+OpenSSL.v3.0.15+2.i686-w64-mingw32.tar.gz/md5/b72b8e4883337e4bc90094dce86c8b8b
+OpenSSL.v3.0.15+2.i686-w64-mingw32.tar.gz/sha512/3b5ddef15ca1463ab92ef5b88df36f8418c8c44ffb123a0922e55718ab317b5fe379994aba9a5e8ca112475043d5cf99b1574702cdb30de438f458ee06ac80ea
+OpenSSL.v3.0.15+2.powerpc64le-linux-gnu.tar.gz/md5/da194ce6f37f34cc19cc78d25c9af5e2
+OpenSSL.v3.0.15+2.powerpc64le-linux-gnu.tar.gz/sha512/e256a9d9a0af8764de730419281aa4d3ee9f6146692ec9105a318d8301d8fda5cca82c6ef4d0d7b70d721992361771724b237ce26ef81f92c295f6056d5a7cdd
+OpenSSL.v3.0.15+2.riscv64-linux-gnu.tar.gz/md5/86825ee5f83ec0c827d5c051fe1a3d41
+OpenSSL.v3.0.15+2.riscv64-linux-gnu.tar.gz/sha512/7db4ae2f0a9491ae484da5b8b0c3698d970ada91c83f9783c9e5bd92006f52dffa1a4c7fb282b63e34760199a97c52793040dc306ad0986970cfa233e29cb195
+OpenSSL.v3.0.15+2.x86_64-apple-darwin.tar.gz/md5/271cc359f5bc4718659044ad5ac7631d
+OpenSSL.v3.0.15+2.x86_64-apple-darwin.tar.gz/sha512/10e7575dc4cce6c617c96e6f94dbfe3058aad696292d3fac4bde7c92623f2a849b7d10e35b156b7582294b3cf103d61b3ea73605f958ee4c9f8ff05b647939a7
+OpenSSL.v3.0.15+2.x86_64-linux-gnu.tar.gz/md5/5d045d93d632af9914bff551f67eed9b
+OpenSSL.v3.0.15+2.x86_64-linux-gnu.tar.gz/sha512/240791382d9549be029e2d404bc0e962f9876ab0597bf20cf34c87fcfafc3d75ba9f223641287895f9aee8519a5a33293910ed6d67bc1424ff3513eedaa8b699
+OpenSSL.v3.0.15+2.x86_64-linux-musl.tar.gz/md5/bb2637babf3730ed1117f89cb8aab34a
+OpenSSL.v3.0.15+2.x86_64-linux-musl.tar.gz/sha512/b847539acc00870f77b242eeccfcf16f590493b7deb0089fa3654026f4016d40f9595d3bbb21ab981e9decfde4321da71f162beb1837a158fd3a884375a86fee
+OpenSSL.v3.0.15+2.x86_64-unknown-freebsd.tar.gz/md5/23b69e0256e6c86e026be3ade20aed5c
+OpenSSL.v3.0.15+2.x86_64-unknown-freebsd.tar.gz/sha512/1b7da1e13d325c7776b8e1a63aaa334bd633bb10604f8bed5f5f6a81955268b3d11ad221a5dd181dbdc7ad27c35d5754e6875d36226003c2fd7da6cd91854de1
+OpenSSL.v3.0.15+2.x86_64-w64-mingw32.tar.gz/md5/73cf4138ab403b7c9f91368a030590f9
+OpenSSL.v3.0.15+2.x86_64-w64-mingw32.tar.gz/sha512/052bb52837c29b4b18a97df71a80ad77486bd6ccef6e2e57dfa68a02754180976dc0302a158886393ef13fe91904f963119b17429a4ecc6f8b6c80ff878df05d
+openssl-3.0.15.tar.gz/md5/08f458c00fff496a52ef931c481045cd
+openssl-3.0.15.tar.gz/sha512/acd80f2f7924d90c1416946a5c61eff461926ad60f4821bb6b08845ea18f8452fd5e88a2c2c5bd0d7590a792cb8341a3f3be042fd0a5b6c9c1b84a497c347bbf
diff --git a/deps/checksums/p7zip b/deps/checksums/p7zip
index b3c24a811a043..6850967ace1b5 100644
--- a/deps/checksums/p7zip
+++ b/deps/checksums/p7zip
@@ -1,34 +1,38 @@
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/md5/af8134ed9c24b99d69e4edb4d5226ca5
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/sha512/b8bb6aee60a54cca37568af8b2d9baedd892ba0d4918b93bcb29d74189524af7115901f4fabafb1ca58ed17e97c59846fcdfbd460abc81059806802b0a7be840
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/md5/20abac5ebb99f31742878013c02f96a3
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/sha512/6d8ebf895b969b1f707d0c23a19db4cd0dee47957d076e6e389395e09404d55bfcb78bb14bb67bb35b93b6a0072f2b4f097d839503d1ccab62b4ce28939dc71d
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/md5/185c979c7419b7ded3832c0f5cfd3b77
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/sha512/722e880c9f111738cb4cde84bf62c36892dbefdba625ae2b9e0fae76a7b1eabfa481a9838fbf9667223f19f62b6f09fcfd42b50c2bff7a65af0fae3616250fc7
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/md5/dceb37181763f86bf12f8ca473cf3403
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/51e409bbcd3c54838cb3219b2476c8b45c8340e0a2fd26cced0d8484ae7f51711723e06e9023fce9ae9a1b51b5fb94aba536428ce2a5c5902b38498a0b3c2b50
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/md5/193ecd888787ea03a500d102a7e33afa
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d525aad33f5ed27dc993f31c6db2996b830716bfac9bc7c49cb462ea3f0b412d0d3267765b9952c85e9c9be31d36d095d55ba89c0fa2c92823d9490372389c95
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/md5/096f11a7f1af5ff730bb8cfef22e335e
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/1866ffd0169e0795594aaa70f1af8102ebbd79b3cafaadfb9c6a537dac0cdbb6eb7c31ad5165a975508c1b850744f94b60d9c530d658cdcc5536a474203cff21
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/md5/fef1576982f45d1922582f6f7a7d6665
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/sha512/71061585b32fa1a8e0a403a60c07e9f90586291a9799d7e2d6f7e6ec9f7b0ebf4b45ed080efd87cad82c45f71ec9a14cbcf9134a73bad4f5e3329f23bc6df01a
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/md5/8818389b3bf00f10c6a39fe0c4a331b4
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/sha512/bec2051a258f7e8a762b7cd4324e7b8f00fe5d99d48f05fb3557c41604e8b08af9ab66ab830f4a48086656be41aaf011b2aae0fb530e0ffefec38689f85a3bb5
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/md5/4ed9c16a65ed1d656aa214013e46eb28
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/sha512/7a5b3e15d0038bea0de7fc28ce058d7f93b8e04f271e30953a6b52d2b5d71f59d10177033e888a50cf8dfeb4f44bcf3271c9b9d1b28d0122ab2b239decdad446
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/md5/d06cff2ec0b7c8415700587f931ce1ac
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/sha512/ed72440f5306a57465a70b00bff33185a83c3e223844a79aa0b0d1fbe30dbd35da75e6188725aa621f5c4574a09527daf1e4893c7c6979ab91b2c09b4979dbcb
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/md5/949ca7d111e497b82c9c762e5ac63a6b
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/sha512/4842e0d44bf6380100723209596f526181fefe8a81d59c28658d03ea16600e71d010d5c7898b4c943efdd9caaa2301c3fdb0dccb343d631d1734acda1c559f65
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/md5/2322c7a08f62592ca394a716949008bc
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/sha512/9549f3e1052730ce13414636b32f0d1a9a1ac944a2b622380eac0da144b11fd65d437afe877ba6797d651da9c4ec77f0ebd3e515146caceaa2524829419eda48
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/md5/a21b12946a62ef3688d5fc965974e8f7
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/sha512/d32faeac23acf8a023f65350ba1d62bb3d9f904e32570ae03b8fb0a5375758784dd95be8caeecd007cbde40e103854a077e2c817f62afa72491f3b8966deb738
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/md5/c448e872d4ad66beb2d46d9134952f2f
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/sha512/92588f4817e145ef655c718dec049e7f43dd93644f43f19cd320643fac5f5b2312837c7a6c3e782e97fd08747311c58ed4657484f8bc778942fc5206ff8ea4e5
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/md5/2cca6259a2eb1b0fea777d566267bf05
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/sha512/92f90e2be4a8b8fcd80a4ceacac8bbab750913526b85f9279f8ee9ed91b77248b5de2d35d0c6241d0ad51fda185f4cb1ead1dcc9d23e2bef35e0b61efe3c3170
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/md5/5d272c78d7ffb40da0f333463f3cc098
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/sha512/2d999c6df4786cec1bba396b3a651a63740f4b799e9fc11754afd24438076e898daae74b4d3c7072450428e89881991e8884711cd4c349879a00c7aeeb4e1d3e
-p7zip-17.04.tar.gz/md5/00acfd6be87848231722d2d53f89e4a5
-p7zip-17.04.tar.gz/sha512/ad176db5b657b1c39584f6792c47978d94f2f1ccb1cf5bdb0f52ab31a7356b3822f4a922152c4253f4aa7e79166ba052b6592530b7a38f548cd555fe9c008be3
+p7zip-17.05.tar.gz/md5/de921a08f37242a8eed8e4a758fbcb58
+p7zip-17.05.tar.gz/sha512/97a7cfd15287998eb049c320548477be496c4ddf6b45c833c42adca4ab88719b07a442ae2e71cf2dc3b30a0777a3acab0a1a30f01fd85bacffa3fa9bd22c3f7d
+p7zip.v17.5.0+2.aarch64-apple-darwin.tar.gz/md5/2a254e251901b3d1ddfd7aff23a6e5eb
+p7zip.v17.5.0+2.aarch64-apple-darwin.tar.gz/sha512/8efb9a2c9bcab388e523adba3dc0b876e8ae34e2440c3eee01fd780eb87c8619c7a7bbdc46d703ccefff6aa6ad64c4e4b45b723136ab1f6fd6de4f52e75ebbbf
+p7zip.v17.5.0+2.aarch64-linux-gnu.tar.gz/md5/bb1f3773fd409dbb91a10f7d9d2e99b5
+p7zip.v17.5.0+2.aarch64-linux-gnu.tar.gz/sha512/e95ccc342be644570d218d25403b91a7db9ee983fbf8cce3deff453355d68d426f9301eaac865a98691025b596b8cd77ebebf6184c0eaf8b2f294bc6763b9a4b
+p7zip.v17.5.0+2.aarch64-linux-musl.tar.gz/md5/3fac518a6a70412294d71ca510958cf2
+p7zip.v17.5.0+2.aarch64-linux-musl.tar.gz/sha512/fc127790739bf8a8b918b2e83753d86f5e79ee8706bde4cc79d74d9f7d846aae99a109da4b2b3cc92ccedc1eef4d52a555a65a95f588e173e0fecc11f2ca21e6
+p7zip.v17.5.0+2.aarch64-unknown-freebsd.tar.gz/md5/4190f8d7d42572b3fdab0fa382417d43
+p7zip.v17.5.0+2.aarch64-unknown-freebsd.tar.gz/sha512/5b0cb08374b8561873f76cb2b8bcbb8de1ff4c91bde23222cc1b650c6ea2fff265e48b6190551ed136324a47d25e1d357a754295b674e74b4628b20223ad067d
+p7zip.v17.5.0+2.armv6l-linux-gnueabihf.tar.gz/md5/355410848192de3b02d12fd663867f4b
+p7zip.v17.5.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/8f103b41e755d157d70dacca89a0ef4610bea109686b4005e8edd5f79ed2e6419c00c2625d0ab90e6e33fa389e670490d8de263c0bdae952cc34cbbf440e275f
+p7zip.v17.5.0+2.armv6l-linux-musleabihf.tar.gz/md5/34363b227306fce34a728af54b71064f
+p7zip.v17.5.0+2.armv6l-linux-musleabihf.tar.gz/sha512/8dd7b37ce6223c9fedcaa999eb806eb6dec8c4a3133d3c07e2456cb8543b8e4f5b881c1bff2d2e25f19b1312b18673e9013aeff87d6a274eec6c451b1ba0d6b9
+p7zip.v17.5.0+2.armv7l-linux-gnueabihf.tar.gz/md5/dbb1fc0cf3bea674442ff8cc932a94cd
+p7zip.v17.5.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/c4d71d905fa420391417786ed206a0c334475dd0df8baa1fc3f6560ce548db11805003d0d0b35bb622fe818c761f2b0abe0796d1cbfce2a922da69e697f056a2
+p7zip.v17.5.0+2.armv7l-linux-musleabihf.tar.gz/md5/d188b5dd453faedb616ba9c48fdeab6b
+p7zip.v17.5.0+2.armv7l-linux-musleabihf.tar.gz/sha512/ea30a775370502ca9e271b87cbda528d0c51d63ce0df41883d4dbc1527a32f251d797f3692fcf9b883b5fbaaad80515b971a8f8fe09ba102978b19a0ecb58528
+p7zip.v17.5.0+2.i686-linux-gnu.tar.gz/md5/dc02bdde045a0b6b22cf14d6960e63ed
+p7zip.v17.5.0+2.i686-linux-gnu.tar.gz/sha512/d2d0dd14a5fc1163fea2276e0925bfa8d075d5dba1d8018e4e3160977d3b09642b2e521d8e57d049abaf0e2ea391a846f0b0136b3c59e8b476c8c52ac5210447
+p7zip.v17.5.0+2.i686-linux-musl.tar.gz/md5/0b8658147938a8ec109ee2b3b0a0665f
+p7zip.v17.5.0+2.i686-linux-musl.tar.gz/sha512/411b2950f5928c537b87ba0651c09c08e57afed765db9fee89eda8b12939ef0da94c8ba38c0a24ba46b4513a0e4cca798eb09f2b20a011099ed3cf14455dd19e
+p7zip.v17.5.0+2.i686-w64-mingw32.tar.gz/md5/98bdd8767c77a35f71303ff490a3d363
+p7zip.v17.5.0+2.i686-w64-mingw32.tar.gz/sha512/14f08071af74297df8bfe1d9f7efa3c0212e62ace573848f17b729e4c36dc3861110f3c5cc9315364c318e5b040736443a24492e86d76161993653a309996eb3
+p7zip.v17.5.0+2.powerpc64le-linux-gnu.tar.gz/md5/b18c917b9852898a9b9d6d24bcc6863e
+p7zip.v17.5.0+2.powerpc64le-linux-gnu.tar.gz/sha512/0148dc8a0bc9c95212d7f8e2f92ee24e968eb7290fe72c7ae02e286bf5c05dd6b1f10b32350a7ff37777ed5a8cc21f3303f464620f3394c7a4690ae98bf77299
+p7zip.v17.5.0+2.riscv64-linux-gnu.tar.gz/md5/8d5f804091c2d21b2c35121d40d1024f
+p7zip.v17.5.0+2.riscv64-linux-gnu.tar.gz/sha512/68042f32b8b9f8d422dc0390efa2502d4a1a816daf4adf1133128f9366ec93ee1c1dda699844c0c3c3649a6b55a16312bd6b8fe4aedd6780e6faf11509932a9a
+p7zip.v17.5.0+2.x86_64-apple-darwin.tar.gz/md5/da31752a2556644d39e48649bb0111de
+p7zip.v17.5.0+2.x86_64-apple-darwin.tar.gz/sha512/0695ad111263d2fadfdf9a46ce7ee80def0bf60db7d1c2585ed2af6fc945fb169311a9f1ffc6f95fb43b0b03694d2d1be9136d3d78ba2ef2b19228987883a385
+p7zip.v17.5.0+2.x86_64-linux-gnu.tar.gz/md5/2fb55d86e4eaccb0488bd637d088b996
+p7zip.v17.5.0+2.x86_64-linux-gnu.tar.gz/sha512/38ac355157d59c09f308fc29964d0e9c1466c9633efd8d3c6ff3c738abce2af45ebc6b92a29f56d5e7baa4871f9f39b14ecfcbedd4e2f4ca7c0fe6627c6b13e7
+p7zip.v17.5.0+2.x86_64-linux-musl.tar.gz/md5/f0bd567a851d2dd9d306552ffafbca3a
+p7zip.v17.5.0+2.x86_64-linux-musl.tar.gz/sha512/e60047a6e7e3496cb6658f87c8c88676f399cd9f3d0d7daa880b6be09cd5525f7f22776896f1375722b47555514ff8c018f02ce800ec3fd0ed922e16e8a6d657
+p7zip.v17.5.0+2.x86_64-unknown-freebsd.tar.gz/md5/d37bd26e39a3ec84f262636f70624341
+p7zip.v17.5.0+2.x86_64-unknown-freebsd.tar.gz/sha512/0604a880c19f9d72d5828edd75be641625c29f230b3a5e7d70ec3812c014c96b76ee7b45e0e80f49be63f109a48700e75d1e5be01b5ae7b46d42dafda9885e8c
+p7zip.v17.5.0+2.x86_64-w64-mingw32.tar.gz/md5/f02c7b2481dee880b096340a8735350f
+p7zip.v17.5.0+2.x86_64-w64-mingw32.tar.gz/sha512/08b717c1b072d1309f6af8973eb09b1a482abb7ae7d01fba79873d4310a7c11292e2e8779029f99cc60627ed0d064224bc87782e587c520f970b840b7b838052
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
index a7122c400749a..6392e44d8f2e8 100644
--- a/deps/checksums/patchelf
+++ b/deps/checksums/patchelf
@@ -1,2 +1,2 @@
-patchelf-0.13.tar.bz2/md5/d387eee9325414be0b1a80c8fbd2745f
-patchelf-0.13.tar.bz2/sha512/43c3f99fe922e2f34d860389165bcc2b0f3f3317e124eb8443017f71b1f223d96a7c815dc81f51b14958b7dc316f75c4ab367ccc287cd99c82abe890b09a478d
+patchelf-0.17.2.tar.bz2/md5/d76db4f1a27b0934d0b0d0585b081c0f
+patchelf-0.17.2.tar.bz2/sha512/8277adf95513f88fb190536a38bdfdf438a4cc7685d8a130bdffbe064441f0f25095b6c83bbb190133e1a138963776d15b46c247dd2f1a073a1bfe1d1dbdd503
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
index cab79abe745bf..9e290c914baec 100644
--- a/deps/checksums/pcre
+++ b/deps/checksums/pcre
@@ -1,34 +1,38 @@
-PCRE2.v10.42.0+0.aarch64-apple-darwin.tar.gz/md5/667a570d341396c3213749ee1e5b5fda
-PCRE2.v10.42.0+0.aarch64-apple-darwin.tar.gz/sha512/c1bb99e8928efded9b0ea3f294ceb41daea7254204ca30c0ff88686110ccd58138d8ea8b20b9a9d6d16a6d8d3f34e27e74e7b57d3c8fe6b051c9d8fa6f86431a
-PCRE2.v10.42.0+0.aarch64-linux-gnu.tar.gz/md5/1a758f275ff3306fbad7698df7b9b7be
-PCRE2.v10.42.0+0.aarch64-linux-gnu.tar.gz/sha512/d09508c0b255366d01f1b4d1ae6748a8e47f18c451498d30715f5f968784990949dab7540cd086396abd912f61b5f7c44c8c72a27efaba0a7fc08b71a167c057
-PCRE2.v10.42.0+0.aarch64-linux-musl.tar.gz/md5/e61147579fdc9b57a61b814bdf9c84bb
-PCRE2.v10.42.0+0.aarch64-linux-musl.tar.gz/sha512/eecaf4c1937fc04210b910ac65318524c02d690e8c4894c38e74eaba36d26c87a1fd9e1cc36f4307a11ff3552a79f081fa8f05085435eb34872dc2fdecce2d18
-PCRE2.v10.42.0+0.armv6l-linux-gnueabihf.tar.gz/md5/b4c484a3b87923c0e2e4d9cc5f140eb7
-PCRE2.v10.42.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/5931cf13d340971356a9b87f62c9efdb3656ba649e7b25f1722127a3fd70973d94c815a37b43cecab8eb0ed8d1ae02ef1a0c0a12051852c1b9242c3eaa01c496
-PCRE2.v10.42.0+0.armv6l-linux-musleabihf.tar.gz/md5/bc7b5bb1c5b0b99c121bad5a89299ca7
-PCRE2.v10.42.0+0.armv6l-linux-musleabihf.tar.gz/sha512/86b5ad4fa6f4b5bd1a76ad68ddff4b39916d0ed0acc03a3fee8eab5256aaed53abc0ff4ce9d9d9f8b9203c087211684da92fe6aa06ff5bc331ba1b3da2cba57e
-PCRE2.v10.42.0+0.armv7l-linux-gnueabihf.tar.gz/md5/3541eb26fa5a4d13e2c7d063dbd900d8
-PCRE2.v10.42.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/872181f931662edaf653351486c5e2a700e94cfa0966ca90eca893fdc75dd46eb40d9d45737c198aa4b9ad8ebab33fd78697ef35906985e4e1c9748ddf58d363
-PCRE2.v10.42.0+0.armv7l-linux-musleabihf.tar.gz/md5/fe059feb18fcc9312f1033362070fe34
-PCRE2.v10.42.0+0.armv7l-linux-musleabihf.tar.gz/sha512/5a96acf3908c964ccb4f296c449499388ed447d9a094c2760c979e02ef656fa710ede3926b9626e89fb5b0545c111e6eedff21e48416e923c17fc9ff129d0519
-PCRE2.v10.42.0+0.i686-linux-gnu.tar.gz/md5/67f49cb139017109c422c51c0120823a
-PCRE2.v10.42.0+0.i686-linux-gnu.tar.gz/sha512/8873d9995bdf5701fc5a24163f93eada12af76d09781a679a4ed61b66f117cf322505d291931d1c58b3b3eb560f6487a1100b0735c14abe6cb38677750b481c7
-PCRE2.v10.42.0+0.i686-linux-musl.tar.gz/md5/092af10d8182cb4240cdd975efce4d7c
-PCRE2.v10.42.0+0.i686-linux-musl.tar.gz/sha512/79a48f4fd50ffdf49c8d57581e01ace38c1b3d7edd86d44db44b8efd93074d16faf035131a0d60c6631b8bf22f0fd8296acedba45908da56e8096c296122f047
-PCRE2.v10.42.0+0.i686-w64-mingw32.tar.gz/md5/2bb13db8b5d6d1a5632de3db874c2614
-PCRE2.v10.42.0+0.i686-w64-mingw32.tar.gz/sha512/7d1324696087c32d1bbbb64f5e4b8c8a220ef216d025886b3c3e6d685c3f701428c6696d7ae0bcc771d3295381ba2bdd5db040f788f8a9a58f80ad4d790dd141
-PCRE2.v10.42.0+0.powerpc64le-linux-gnu.tar.gz/md5/0de1215b2a1e9c0efd131355e9fbf2c1
-PCRE2.v10.42.0+0.powerpc64le-linux-gnu.tar.gz/sha512/69dae12627685ae665db8c91264a79aba7c60ae97eccdc79ef889f2a5f69b465fa333aba298fc90bbb95710cfc324e3630bc427a97577855e8fb6c8fe227cfec
-PCRE2.v10.42.0+0.x86_64-apple-darwin.tar.gz/md5/c5c52b399921c5ab81a5f598b350d2ca
-PCRE2.v10.42.0+0.x86_64-apple-darwin.tar.gz/sha512/e6c8ba3aa3fbf54b37079301ab317104c6852812b23835f52ca40f31f0831678172d32e077fbaa712a8a2cb16d62bb97d475827004353e7807922a2d6e049b28
-PCRE2.v10.42.0+0.x86_64-linux-gnu.tar.gz/md5/b074dd1f85e24e723349e566350e2c78
-PCRE2.v10.42.0+0.x86_64-linux-gnu.tar.gz/sha512/236017e02c9f32b913b772dbf22897c8460e5791f196c86f8a073e329ad8925f6859afe48f3bf18ca057c265f08fedbde255360d8f859e2303c6569ab1b0e1bb
-PCRE2.v10.42.0+0.x86_64-linux-musl.tar.gz/md5/9f32ca77e79843fc9c4b5fc8ed336d11
-PCRE2.v10.42.0+0.x86_64-linux-musl.tar.gz/sha512/334a31724e9d69c6517568d922717ce76d85cf87dbc863b7262b25ab43c79734b457833cd42674eb6a004864e5c74da3ae1d0a45794b4cd459eea24d9669fac5
-PCRE2.v10.42.0+0.x86_64-unknown-freebsd.tar.gz/md5/037bf13e9a53eb90846b6643610a17df
-PCRE2.v10.42.0+0.x86_64-unknown-freebsd.tar.gz/sha512/64bc9acda3d158621f442aa2e766730cc425df3795965f461b530d8152934ffaf93d75b86ebc483345b78b203b0502857683c183ec65a01da1834b55405c7f77
-PCRE2.v10.42.0+0.x86_64-w64-mingw32.tar.gz/md5/6b04c3778bf02947cb1b7e70a41f3292
-PCRE2.v10.42.0+0.x86_64-w64-mingw32.tar.gz/sha512/9b808832cc48703ed525eca06d1dd0162dae3f94a9ad72d044876edcb86a90e8443c8b169e60ccf3507d5960156c447d8f3f30e586ac2a22b6d43dbe807009d0
-pcre2-10.42.tar.bz2/md5/a8e9ab2935d428a4807461f183034abe
-pcre2-10.42.tar.bz2/sha512/72fbde87fecec3aa4b47225dd919ea1d55e97f2cbcf02aba26e5a0d3b1ffb58c25a80a9ef069eb99f9cf4e41ba9604ad06a7ec159870e1e875d86820e12256d3
+PCRE2.v10.44.0+1.aarch64-apple-darwin.tar.gz/md5/14de26cfc0f6ff7635fac39e81e81a27
+PCRE2.v10.44.0+1.aarch64-apple-darwin.tar.gz/sha512/45079ecca5f4966a32895fcc63585f1dd60f306dc1cb5c098d42452fcff67f7f6b405c200a15747af4680151bb6a6374832a0119b8ddd743d2ed13d0beaef7c9
+PCRE2.v10.44.0+1.aarch64-linux-gnu.tar.gz/md5/3cf179ed36d37bff698ab81cf3d5797b
+PCRE2.v10.44.0+1.aarch64-linux-gnu.tar.gz/sha512/db93e5a5c0c46b5536ed49515682d9bfe1d23f6ba8ae2468289ec8f2160140f39f5606a3c7095f45251f3663d8ccf2d6d7e5e8b1efb21c39bbf9a13b6ec60ef9
+PCRE2.v10.44.0+1.aarch64-linux-musl.tar.gz/md5/02baa415218f581a5ceeb7bf7fc0a090
+PCRE2.v10.44.0+1.aarch64-linux-musl.tar.gz/sha512/1685f37ed8f465ecc2f738fdf65d20bb1806934ff2c50194882282fb6c3900121c61c39210e4c0b89847493bfc3e15bb7b9136b0d968103b47c8662a78b412fe
+PCRE2.v10.44.0+1.aarch64-unknown-freebsd.tar.gz/md5/4de065ea59ab4f622b46079df1d9d941
+PCRE2.v10.44.0+1.aarch64-unknown-freebsd.tar.gz/sha512/aa6df9edfb690d155a8b5a9390db7ca11622ac0020174cf070a33a075801bfe43bd4c80b8e28017989a8b7374d39897cdcf72ab0e1962e3e234239975f7ac0b4
+PCRE2.v10.44.0+1.armv6l-linux-gnueabihf.tar.gz/md5/f8a0907fbb20a06507fce849db098c4f
+PCRE2.v10.44.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/3f5bcc1742380a31683a81740d55e198d7ec8d8ea5a13d6d0556d6603e4fadbf0dc648093c44e36dd6d3793c52a5e3dae6f2f459c73e3d3b5a005f3395d26772
+PCRE2.v10.44.0+1.armv6l-linux-musleabihf.tar.gz/md5/8854c24183441aa6fd21989c00888904
+PCRE2.v10.44.0+1.armv6l-linux-musleabihf.tar.gz/sha512/a74d9378f071dc4cb021e5171d66cd4ac5de3b348e993fc90d824ce5d2f554f7c8af7af55ec31d874d302aaba7d542b6505cc5963e53656c28026a06a53ed48b
+PCRE2.v10.44.0+1.armv7l-linux-gnueabihf.tar.gz/md5/04960309ee7cf69a53e280878d5880ef
+PCRE2.v10.44.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/a1644daf036daa3799368598427c87c23bcfdddac55a0d06adca08a2e9d617c893285855af562101b05129d0ed0d84d22f5a8a1703316ecd09aa1752b8330eef
+PCRE2.v10.44.0+1.armv7l-linux-musleabihf.tar.gz/md5/1335defc6090be76c509840633f7cdfb
+PCRE2.v10.44.0+1.armv7l-linux-musleabihf.tar.gz/sha512/9595052eeae4da413b930b14d7e89359a29220cd9e908325e0b7788c8f4a2feb2134e78a0d8f56007787f0fefadc9de31750db6104bbdd048fa50e1d785c2a8c
+PCRE2.v10.44.0+1.i686-linux-gnu.tar.gz/md5/e2d6be1d19566c965c2afeb995aba52f
+PCRE2.v10.44.0+1.i686-linux-gnu.tar.gz/sha512/4a9d981bb6aa9150b670db7c5d4d188c8391fcb2a16bc710ede7a84bf7ec546fc5fd9096a339720579d25b6dcb5674b2b5b28e9664e5ef589b1a5044ce38b6a7
+PCRE2.v10.44.0+1.i686-linux-musl.tar.gz/md5/23cf857bd3daea4f094fcec48a7712dc
+PCRE2.v10.44.0+1.i686-linux-musl.tar.gz/sha512/534f0cfab0cd60db9498eff387f7280a8baaf893a98dd2e7a737e68ba6473ed8236e9da85116eefb9812ec5323c705a00fcaff010b1900f752de8bdff65ef3ad
+PCRE2.v10.44.0+1.i686-w64-mingw32.tar.gz/md5/3d05764df2305f16e4ffab60031ad40c
+PCRE2.v10.44.0+1.i686-w64-mingw32.tar.gz/sha512/3e21cc6b71849c1a361373de30567990dba13dfd8812e7a7b5e2734b572bf1d45aeb730289d329975e76932c4c40e476824be2ab8e80a40fb7a7e2f46159235a
+PCRE2.v10.44.0+1.powerpc64le-linux-gnu.tar.gz/md5/596d7c29d1417ed8959ea3ae3b4df453
+PCRE2.v10.44.0+1.powerpc64le-linux-gnu.tar.gz/sha512/89e03bfd6890150e2c8dddc4e7d024f2e09421c25a3d0fef3b5cd7f6bab7d6402ec1e82b02ecb5d26d01dfa2fb6068d050513894c374b7f2244c8fcbf00d69e2
+PCRE2.v10.44.0+1.riscv64-linux-gnu.tar.gz/md5/8330a431f4da1d20cffdb64d2c270dfb
+PCRE2.v10.44.0+1.riscv64-linux-gnu.tar.gz/sha512/a836d0b9feefd9ffd50cf29db72ab704e6ae442939322526e2a5613973eabc8e543c5546ce507b0c5f9e6f1ce324978aeb6e99f8833eb60fc90e74139e47c6d2
+PCRE2.v10.44.0+1.x86_64-apple-darwin.tar.gz/md5/18f13c78ff6388c601bd36788e526b31
+PCRE2.v10.44.0+1.x86_64-apple-darwin.tar.gz/sha512/7b43a289f54064fc3c292de98173ec91cde2e49402c99c7848cbdc0e6d90a23a86d41f521e3986fcc8d941ee070d09e29ddc89a4e23009b8e9333e577ae4a09c
+PCRE2.v10.44.0+1.x86_64-linux-gnu.tar.gz/md5/9f45feca0955f81ceb898208b9c74e15
+PCRE2.v10.44.0+1.x86_64-linux-gnu.tar.gz/sha512/eac215838306f7b5adb2166c3f620a69ed52fbd752ef3673a887507963a826c305d9b078dbb5236dc9a45eaca0d34f77325aab41703745701a077c84822ec0d0
+PCRE2.v10.44.0+1.x86_64-linux-musl.tar.gz/md5/79f092c6e8e971027ac6c1f0987376fb
+PCRE2.v10.44.0+1.x86_64-linux-musl.tar.gz/sha512/2c5655b0f719a7d442c89f1040f2973b03f8becd855a0cfd6c0a985a07b25de351a84e3b9daaebd952b62628db0d937de08a8d05ee4bcace7e72d6b5ce6b8435
+PCRE2.v10.44.0+1.x86_64-unknown-freebsd.tar.gz/md5/a0bc32a099a584d453458a76c892fe47
+PCRE2.v10.44.0+1.x86_64-unknown-freebsd.tar.gz/sha512/6649c1b9e9569a9decccf6ebaa61d44acdb9069208ec796777d8e70a908210f775be2142053f6a5762ebaa321e297f6d8b51db99629766bc702c498b5f772492
+PCRE2.v10.44.0+1.x86_64-w64-mingw32.tar.gz/md5/eeffb6164fba08b0d5c7f50afa081475
+PCRE2.v10.44.0+1.x86_64-w64-mingw32.tar.gz/sha512/f06db992a2070a88559c15224972aeb098d4291a4325970fc0fbbb7cdd539f4a2fd4f90c0de90a34fe454da6c38290f9e0c7fdf2fe8c441f687fe4491d652adc
+pcre2-10.44.tar.bz2/md5/9d1fe11e2e919c7b395e3e8f0a5c3eec
+pcre2-10.44.tar.bz2/sha512/ee91cc10a2962bc7818b03d368df3dd31f42ea9a7260ae51483ea8cd331b7431e36e63256b0adc213cc6d6741e7c90414fd420622308c0ae3fcb5dd878591be2
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index 65db184c5cbca..f4e47961d8cc3 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,36 +1,40 @@
-SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
-SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
+SuiteSparse-7.8.3.tar.gz/md5/242e38ecfc8a3e3aa6b7d8d44849c5cf
+SuiteSparse-7.8.3.tar.gz/sha512/fc0fd0aaf55a6712a3b8ca23bf7536a31d52033e090370ebbf291f05d0e073c7dcfd991a80b037f54663f524804582b87af86522c2e4435091527f0d3c189244
 SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5/46541001073d1c3c85e18d910f8308f3
 SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512/f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
-SuiteSparse.v5.10.1+6.aarch64-apple-darwin.tar.gz/md5/14cc0d3c7b5271246eb45c495c7a4e79
-SuiteSparse.v5.10.1+6.aarch64-apple-darwin.tar.gz/sha512/a56da81a5165bcdf49d1913799bffcaea84efd6f8740dd002f700eb4070313cac64be5359ba88d1f39fe976944e34ee6ed6575ceade2ae2d97b850e6a1aee0ae
-SuiteSparse.v5.10.1+6.aarch64-linux-gnu.tar.gz/md5/b93b047040e2db5e0277e52b9bd3feb7
-SuiteSparse.v5.10.1+6.aarch64-linux-gnu.tar.gz/sha512/e03a9ecafce9dcc6791dd202efac2f864bdf3a0a4524567801c092304c17ab15dae949abfb1fe2bc71b367a0e398260ccfdd91dad611860090df471b44e75ee3
-SuiteSparse.v5.10.1+6.aarch64-linux-musl.tar.gz/md5/22c44d9d82608724e1aa62d126fdf030
-SuiteSparse.v5.10.1+6.aarch64-linux-musl.tar.gz/sha512/39a3c11429cd3e6afa2f615dc4b0c8d16d7b94a423d76e598b3b48db2c47fe64d644233e2a672bd6654f8bd57da91dd7a787a3e4978f0f803237ab4ec6f97905
-SuiteSparse.v5.10.1+6.armv6l-linux-gnueabihf.tar.gz/md5/505ee3c0750a720ed1e4de670f81e220
-SuiteSparse.v5.10.1+6.armv6l-linux-gnueabihf.tar.gz/sha512/20fafbdd2df96427b95b730901663c255dafc415f3a8154e3364ec46ca2b205fa45a081f92272b81d7aed22b9f8373d2d4eee70ff8ab5ed8d1d80b6a340c8aad
-SuiteSparse.v5.10.1+6.armv6l-linux-musleabihf.tar.gz/md5/8e1821668cbca9c2d3c5cee5ad1746c8
-SuiteSparse.v5.10.1+6.armv6l-linux-musleabihf.tar.gz/sha512/58fb4ec10a537d101e0be8417648a4d0127444b3fe8a32498320aaaefc747f5cac3c7503b70775c1d708b077034060fe5ed8609e73bf9be22f9a8729abc4c73d
-SuiteSparse.v5.10.1+6.armv7l-linux-gnueabihf.tar.gz/md5/43d133a916e548ecae50671b92f64c6f
-SuiteSparse.v5.10.1+6.armv7l-linux-gnueabihf.tar.gz/sha512/f7f767c0e7eb45afe10941513695bfcc9e0628195cb9245a9c24700967f9cfa7cd0030cdcfaf47a76400d5dd3eb908c1f9ea5e44efd3054ed7bba47e664279a2
-SuiteSparse.v5.10.1+6.armv7l-linux-musleabihf.tar.gz/md5/7c3b2e19d3296002b1aa72b951421eec
-SuiteSparse.v5.10.1+6.armv7l-linux-musleabihf.tar.gz/sha512/7546ce844b03d0414168ab6d0925f848b14b35ed27cb545b41f2512bad44b7da4f39004e75657c7c572557ccb015177d3e0d346e2c3182b27a6ee602876ee0df
-SuiteSparse.v5.10.1+6.i686-linux-gnu.tar.gz/md5/e00a73f0fad92a266dd8d3774707f9b1
-SuiteSparse.v5.10.1+6.i686-linux-gnu.tar.gz/sha512/9cc2332a78d0490170d722d2f062d6f660fb3bd9042dd177c3683675d0f44306b93bf882cb79c0707ab79318280d08582431eb1c92334f2bb50946e942be0b16
-SuiteSparse.v5.10.1+6.i686-linux-musl.tar.gz/md5/71fb647a76ecc9e547df903535011b5b
-SuiteSparse.v5.10.1+6.i686-linux-musl.tar.gz/sha512/7806cd9179e46fa61b63a3f711b37289da72a48430912e564c88e3dcb4caaad8a9bd232d6f572f8270806d286e4a4eb9edfdcda29fe8d91dadb1b03d57eda76d
-SuiteSparse.v5.10.1+6.i686-w64-mingw32.tar.gz/md5/d4e6c9aba53b2107469cda6de9ca2724
-SuiteSparse.v5.10.1+6.i686-w64-mingw32.tar.gz/sha512/c0c49641c6e7f3f0333e3fa44ce62dcd4ad5942c74b2429aaeb49fd0d7b8c13c872150ae4d54cc5cfaae07a65a24a7d4ea731adc78db3d9341a54e5edb5c80f0
-SuiteSparse.v5.10.1+6.powerpc64le-linux-gnu.tar.gz/md5/5432dca00f7e0f42b7dbd16083537318
-SuiteSparse.v5.10.1+6.powerpc64le-linux-gnu.tar.gz/sha512/61946a7faa2a49613ea2c08a01f064b619c9ec134f0d9509eb42a96bebf2a63f5fb57b14702f25618def410658da8334bb6aa5200280956e573aa944476efef2
-SuiteSparse.v5.10.1+6.x86_64-apple-darwin.tar.gz/md5/ca175d433a02f91407e2921872c2b67c
-SuiteSparse.v5.10.1+6.x86_64-apple-darwin.tar.gz/sha512/14d9b01e2db8c04f9a1076bcbac022c6573728f708f31344825805fed53971e922aecebeb4b2f567a6b5f44ad27c0d66e142887ff4684c8679ab65b902538abf
-SuiteSparse.v5.10.1+6.x86_64-linux-gnu.tar.gz/md5/6c271ced91dbb1bf748efbaace1dac10
-SuiteSparse.v5.10.1+6.x86_64-linux-gnu.tar.gz/sha512/5984db9c101ef80d63024bc3b51821268349450deedd5aaea5fade0fc5932992379a0133c4f91711af134014835afea1bde518ae1e7efd482d556a97e54b0238
-SuiteSparse.v5.10.1+6.x86_64-linux-musl.tar.gz/md5/c7d55069969dbb98997687c847ab643d
-SuiteSparse.v5.10.1+6.x86_64-linux-musl.tar.gz/sha512/b54012765f7c7329125b41c3fb678e23888a858d3fd5a139c52bd980e383a308282238020754e795de6457fb312b61c39e6ab2d665ca5af95c65f52f0c354067
-SuiteSparse.v5.10.1+6.x86_64-unknown-freebsd.tar.gz/md5/e641be38c8205e362a7299c736aedad5
-SuiteSparse.v5.10.1+6.x86_64-unknown-freebsd.tar.gz/sha512/d55e85335bccb59210014c35233ad9e42f5d086f01a43fe0ee13f21cbb8555ea05f1d91c95a6d3f883477086851e123c4b0cde7cd2dcd8e08835fe9f685d5b25
-SuiteSparse.v5.10.1+6.x86_64-w64-mingw32.tar.gz/md5/45cad947fa962e1f192cb7b52a1f7b3c
-SuiteSparse.v5.10.1+6.x86_64-w64-mingw32.tar.gz/sha512/e6545c681ba7d2346baf8fafabdf25f2faf6ea54763d999b14499f30d235e90f34fd4f83430ea7f17c01adea0699dff6c4d7ae3cb938c749d6a15f8bf4f1519f
+SuiteSparse.v7.8.3+2.aarch64-apple-darwin.tar.gz/md5/e97424dc7b9e44cb18ca50b99d71a0a8
+SuiteSparse.v7.8.3+2.aarch64-apple-darwin.tar.gz/sha512/2d077c785fe1109ad83e001516394c027f0ba1d77186e73df3d89ee03aea7817f09e15c0b1302711aebd168e06675cc865e2936c4c654c9f65932434f974e9c6
+SuiteSparse.v7.8.3+2.aarch64-linux-gnu.tar.gz/md5/7b14b0fe44156b117ebee60811f666e6
+SuiteSparse.v7.8.3+2.aarch64-linux-gnu.tar.gz/sha512/dc726895c6f4c3e7a8d87507946cc0fd15e8d6c6d6d22698976df8c53aeec0532abb0c34bcb98767a6ee3b3146f60189a575a8e523be75b951d33535b6de5359
+SuiteSparse.v7.8.3+2.aarch64-linux-musl.tar.gz/md5/5a4e485cb6a7025196548e8bab4228c6
+SuiteSparse.v7.8.3+2.aarch64-linux-musl.tar.gz/sha512/9c091cd13b61a4d291751c294a080b95c9068b13bd805089c962e06fafdb12da08087709759d5310ceb2c115255b14c1a99c4ed45f660f8680dcc30f55ac6f49
+SuiteSparse.v7.8.3+2.aarch64-unknown-freebsd.tar.gz/md5/895f1b9b79455c3c26a9e3c64c335dc5
+SuiteSparse.v7.8.3+2.aarch64-unknown-freebsd.tar.gz/sha512/9876a711d79314dcf68bacf0460267f69474c736346bf9f098ed56c3430fca67d9b73d7d30fa6d3c5ac1cacb017f57be2d83b2310f695d51b73e3ce98234a10e
+SuiteSparse.v7.8.3+2.armv6l-linux-gnueabihf.tar.gz/md5/9784404d2ff45a94a9ce9e671badcdf0
+SuiteSparse.v7.8.3+2.armv6l-linux-gnueabihf.tar.gz/sha512/8f49fc05c80f6b869810e5f128c04a5816ebc2ae24313ba699723be9550407a634486d0c95089325350381cd534d3e991cc07a80f509a612f6da27da32b1d6d0
+SuiteSparse.v7.8.3+2.armv6l-linux-musleabihf.tar.gz/md5/b26cdada5bd08e3284dd74e7bb14a770
+SuiteSparse.v7.8.3+2.armv6l-linux-musleabihf.tar.gz/sha512/78695ebab8c73bed06723e3052c471888e84e90681561e32469bac49784b8973af355a19db5ad3663373fde97b92ade112e48a0ece4209b9933c05faa9a7b83d
+SuiteSparse.v7.8.3+2.armv7l-linux-gnueabihf.tar.gz/md5/4ae051735ae0c34ab2d5e1f0bec3f26c
+SuiteSparse.v7.8.3+2.armv7l-linux-gnueabihf.tar.gz/sha512/efdd084c9bfac1cd9a4128aa7d6a4f0d682cd6c791dbcf1ed2f53b77f6e9c4ab9ed13f54ff6dc54603b5426bc09f7833defaca3d135cd1995d92a177d1d97eb0
+SuiteSparse.v7.8.3+2.armv7l-linux-musleabihf.tar.gz/md5/2e55d69017c5ec6fca694aabf1470dc1
+SuiteSparse.v7.8.3+2.armv7l-linux-musleabihf.tar.gz/sha512/1c75fa1536f5db13e1d613d61959dc1004570a5df45f75820f9554c7395f7492ad5edd5865d5b5c3f6aee65ffa33aa68dab1616547aeb7614da6bf708a748dc1
+SuiteSparse.v7.8.3+2.i686-linux-gnu.tar.gz/md5/817e2479c223e9c023ec431aea4b2361
+SuiteSparse.v7.8.3+2.i686-linux-gnu.tar.gz/sha512/c0e4482570bed4366f68df9635a6229289008c0754c3d636d26a44004e5c80ae04c6ab434b0dbf57baa05e4afc847716d9e059904faf9d10be7abdade2890803
+SuiteSparse.v7.8.3+2.i686-linux-musl.tar.gz/md5/e7a6c8f58a81e0ac511d44e878b9128d
+SuiteSparse.v7.8.3+2.i686-linux-musl.tar.gz/sha512/54c00ba5bd8bb514e8cc29ce0ed596c642678ea1fe27571cbbb3210b9ad8940ecac2100e05b29d5db51804daab714c48d18dbf65650924b90c076078681990a3
+SuiteSparse.v7.8.3+2.i686-w64-mingw32.tar.gz/md5/3db74decc8363735a0a9f71f2558c2c9
+SuiteSparse.v7.8.3+2.i686-w64-mingw32.tar.gz/sha512/d5dc5e263a9705d2510f541ffe9c5a543a4a0ba7c104ad03142ef04de5a25cdb7da59eed3218cdf540b7b2167235206d438db283da682ba31568d99076fccece
+SuiteSparse.v7.8.3+2.powerpc64le-linux-gnu.tar.gz/md5/78f008e14dfb2580da95ac58466d9d16
+SuiteSparse.v7.8.3+2.powerpc64le-linux-gnu.tar.gz/sha512/ea7370f5071c1e5bb2980f76f16ec88eded7fab6d7773c99fff9087adee446b5f4cefd16ff483aa92197fabea35cb3e486742a58fefa6fee0997a358ada14c8a
+SuiteSparse.v7.8.3+2.riscv64-linux-gnu.tar.gz/md5/966c2093ab4d0652084652f54e58c5f7
+SuiteSparse.v7.8.3+2.riscv64-linux-gnu.tar.gz/sha512/febff4241ff15471a7a3bcb544a7e34ab71da1c516b3fcff89cd726d392c2fcea0bb970c8a2ee7a4461c341c7be02f47fcdc47f4a7f88307cceeb3f75327c625
+SuiteSparse.v7.8.3+2.x86_64-apple-darwin.tar.gz/md5/7a8b5ab88a83081545a411202caf5a1f
+SuiteSparse.v7.8.3+2.x86_64-apple-darwin.tar.gz/sha512/186f5fb8a7117d8cf77b311ea0c137b270eca895aecd31dc616b86425c62ae3d0218938cf1cf54d5009b7ff8247f40ae21b6c452fdd339fead5622e710c03418
+SuiteSparse.v7.8.3+2.x86_64-linux-gnu.tar.gz/md5/001cb5ce58344f1186a51d6018a9c337
+SuiteSparse.v7.8.3+2.x86_64-linux-gnu.tar.gz/sha512/8cd407ff3e857cb49d26adde68746c0a73742c4556ab624b34eb5caa95a7c438d1aa6a7fb3b9d54341f0469ae7b19f78e09f2a2642f15f4795d9583162cf12e1
+SuiteSparse.v7.8.3+2.x86_64-linux-musl.tar.gz/md5/305e268c36927a94037419153d37fe91
+SuiteSparse.v7.8.3+2.x86_64-linux-musl.tar.gz/sha512/aebd921b721f8f71bbdc94aa60ae2f2c3bc5de21acbda0ae1e96e19dbc86c13cbe1921cd5938bbb4f9f83c84a2a6b4f980e743684e8236e7a979ead0042e9dec
+SuiteSparse.v7.8.3+2.x86_64-unknown-freebsd.tar.gz/md5/81f85e3374a9b7bbe0a25b8cb88d3438
+SuiteSparse.v7.8.3+2.x86_64-unknown-freebsd.tar.gz/sha512/fb0b1c219d1ce35f79d945fccb5c5a2e1e8d9f32a2401dc3071781740c6c16e729746ffb02c76680e681f8978d19948b28612c97136f1396d030e69c3eb336d9
+SuiteSparse.v7.8.3+2.x86_64-w64-mingw32.tar.gz/md5/a3a68bafb213ea68006cdfdbb15e1457
+SuiteSparse.v7.8.3+2.x86_64-w64-mingw32.tar.gz/sha512/6db131bb8b0efbcbaf4ee9f2688dc1083a570da5fc9ddb044ffc9308fd5d6949241cd780e5246484ae6417b261bfd61683b6122c7dba7d1598d5e89be6d73acc
diff --git a/deps/checksums/terminfo b/deps/checksums/terminfo
new file mode 100644
index 0000000000000..bd971e72b1be8
--- /dev/null
+++ b/deps/checksums/terminfo
@@ -0,0 +1,2 @@
+TermInfoDB-v2023.12.9.any.tar.gz/md5/573d9b5adaf6af500e3dfae6e3d15ebf
+TermInfoDB-v2023.12.9.any.tar.gz/sha512/e0a5bfe54346f9d5690a840628b329f6fac7375b0d29337bc70813ae3553a72bb397f8034d221c544289e40c4cfc685d5805777b7528f05bbe0123b5905c24a4
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 7a3141d79368c..5d4967cb0cf22 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,28 @@
-LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/md5/b40fee1e2995d3fa2c823c45b231d9f0
-LibUnwind.v1.5.0+4.aarch64-linux-gnu.tar.gz/sha512/d5865dabb541c3e1a5b6bc20547adc0788dde0f74731006e44e2cd128742c1ce61638a31340f8f4bfcd8b052706c3d57c24a202d048cb8d0496a909ff51fe9f7
-LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/md5/580b46908f43309c3f88c9ec4177d296
-LibUnwind.v1.5.0+4.aarch64-linux-musl.tar.gz/sha512/c12caa005586bea53932054d2742d6b55c40fd1a284daeb73924f3b761115929e022f3cf377b590d818e2c69726d42f12d4c87be2daf6d43caeaef54e226afdb
-LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/md5/5af8f16e7eb32718cde68ee840c373c2
-LibUnwind.v1.5.0+4.armv6l-linux-gnueabihf.tar.gz/sha512/71e6f64477bc356c42bf1604e61a2596dfdb90f5fc3005e6656f2aa5ba0576867e6b482501d3d3c68da623cf4d6c572e4fb9708a71988671b1bbe76d6c2e4754
-LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/md5/446f9021d1903410ed9b2e400e2533af
-LibUnwind.v1.5.0+4.armv6l-linux-musleabihf.tar.gz/sha512/bf39ac9faea323c394e627647aaafacccdcd9545ac970b771dc4736376c56f0e1cfe58fead45625b7c491d91ae4f1dd41c3303d04536ef514c3a3657c06fd261
-LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/md5/ab594ba2df5cdc08dcf74ee2d0af9742
-LibUnwind.v1.5.0+4.armv7l-linux-gnueabihf.tar.gz/sha512/80f3b0c922b27d98fec1ba58f227af3c9d3e9691f34ed088152619289fa09b03a5b891162cd8ba497432867d60c2cd97a3466178c0891d848ded167e64f720ef
-LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/md5/84cdf938ab0880447f242d86ad9e6d1d
-LibUnwind.v1.5.0+4.armv7l-linux-musleabihf.tar.gz/sha512/a985e9fc4e75cb292e7cb80ae0446110221a7f785818f53ac26c03dc2e142c959a6f380ffbceb43039dc95659e0da608b436d5faa5133f7d49308dd6198652f3
-LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/md5/29a8d300b5edc3b25fc0c38d415ec4a7
-LibUnwind.v1.5.0+4.i686-linux-gnu.tar.gz/sha512/c96b954ee5736ad69a47e1214aac483ed2697a013749a696de823e2064bd5869590ae17c19268bf06227c9065b10bb36b197fb73987a74706fd37e0eefc17254
-LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/md5/fe8822d87cbad1abc4173a0c5c3f082f
-LibUnwind.v1.5.0+4.i686-linux-musl.tar.gz/sha512/ff09cdbb4046413c260df0058a2fb3c2daa56e656a038c1ff4c47b251254e08066ae3b8b144a02483e1ca7d92192d8e3c1b005adcf2dad26343219eab4c26d95
-LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/md5/15eea5ef1f4ad04cc8fb8f701571233f
-LibUnwind.v1.5.0+4.powerpc64le-linux-gnu.tar.gz/sha512/875d50cea141397783c4d3062a08a1951fb14c96e9c99489ddeb91f94f403c48e8d358c181b6649198318586463efedd1b5f991acc792d8412a6ad2c810c568e
-LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/md5/2b7b2264763d10f39c548b3f23ea1a95
-LibUnwind.v1.5.0+4.x86_64-linux-gnu.tar.gz/sha512/7e76ae26ce7f6f60020af0908c7197e28204a8b290022af7dd92b17d64b01d68338d347e3f78a5946fef2faec3cd3f1c274bc55de1472a6245867b8e5219dd0a
-LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/md5/84789e4ee681fbe4697e02431ab1004b
-LibUnwind.v1.5.0+4.x86_64-linux-musl.tar.gz/sha512/e8166e2efbb70a3b492551556c72181c505b8cdb2e5d528caa69b32727c59f3e065e4455fdd9749878bb6d1ab5962ca7dfe2ebc9efa6dbdb0bebd210bd16c6a7
-LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/md5/f35f256dd24183f72a932946c07073b0
-LibUnwind.v1.5.0+4.x86_64-unknown-freebsd.tar.gz/sha512/de80153025ba3e4192c8faf3f7c5f5a0044d4580f8cb56f4c0206f7030cbeeb406cdd064f87b4568392c06e96b9e32fc07c55b68b92e8cc5d596fb79040ecb78
-libunwind-1.5.0.tar.gz/md5/c6923dda0675f6a4ef21426164dc8b6a
-libunwind-1.5.0.tar.gz/sha512/1df20ca7a8cee2f2e61294fa9b677e88fec52e9d5a329f88d05c2671c69fa462f6c18808c97ca9ff664ef57292537a844f00b18d142b1938c9da701ca95a4bab
+LibUnwind.v1.8.1+2.aarch64-linux-gnu.tar.gz/md5/de3690f3a8ecf0aa5d2525813bdab3c8
+LibUnwind.v1.8.1+2.aarch64-linux-gnu.tar.gz/sha512/366090b4291623603e54d3c73437efcbc3c7f52ce0c64a63e8439eff8a3ddeb4efc1ab6b2513e0a60e2714239bf259cd667159a24207f0c9ce3134530e539155
+LibUnwind.v1.8.1+2.aarch64-linux-musl.tar.gz/md5/e8adf4e842e998b6806653964e721a47
+LibUnwind.v1.8.1+2.aarch64-linux-musl.tar.gz/sha512/77411646767f5f13e2f45d32bfa48d6864b712d46d339e3fd4d62d12f4a26b6ffb8293636209ee5645d8e5552bdf70db5a848736ef0df75db74c8c878553cd40
+LibUnwind.v1.8.1+2.aarch64-unknown-freebsd.tar.gz/md5/ee8fc39c934cf1c640ae4ae41addcc30
+LibUnwind.v1.8.1+2.aarch64-unknown-freebsd.tar.gz/sha512/6245fc3003ef24fce0f84007c0fa1390658e71dc64da6a2f5d296d3928351096ed2c0c83808890413332883abe5fcee7615eb40b2baeddfc56d3484315f3dacf
+LibUnwind.v1.8.1+2.armv6l-linux-gnueabihf.tar.gz/md5/4c454e174be7b5f220f4cb8f659722d8
+LibUnwind.v1.8.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/f6e3d83576ae963f400972250c8558b0b15bdd9657aac6eacbd0c3f59af6a3574d0cc475c6e606ad8f2e0b178ba33f297aec0aeac8a5970d93b2c36d9ffae59d
+LibUnwind.v1.8.1+2.armv6l-linux-musleabihf.tar.gz/md5/dbec8675d2b73807c9d9e3afc2ce2260
+LibUnwind.v1.8.1+2.armv6l-linux-musleabihf.tar.gz/sha512/45d9ac63282c21bdc6488b65fae8f03bbaa55d18b346ac3fc3d40f38ebd05b2a0db539f23dc6c6f88bbbad8f2ec2cdcf677db1acff83a99d9875bee93555ad1e
+LibUnwind.v1.8.1+2.armv7l-linux-gnueabihf.tar.gz/md5/98517b7a4ae874099ef0aafb46e740c9
+LibUnwind.v1.8.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/3a00792415a15fe45c3454f9bf480222862217178a61db0738837537c7e2c50f71b53063facd591680b14e7b3bde218c34cee9b2854ad94897b306388749af1b
+LibUnwind.v1.8.1+2.armv7l-linux-musleabihf.tar.gz/md5/f276569278383f7711f40e623670620d
+LibUnwind.v1.8.1+2.armv7l-linux-musleabihf.tar.gz/sha512/48160616ac1ed4b3e343556517e3cbb4959e80e9be237fc820e33e06f6668e95d9365dd7c86e68dc898fee1141cd825495bbbc27d685913a2f2808d974b54c19
+LibUnwind.v1.8.1+2.i686-linux-gnu.tar.gz/md5/2cd0203f2b70436ac2323077dad1d5d1
+LibUnwind.v1.8.1+2.i686-linux-gnu.tar.gz/sha512/fa42b3306d9b67011468b2c07bdb6cca6847f0f1632ee4aca3212c5944e991f9a1ae8f881fb4ce86e641e977695942d873a39fc212bdcf6acdf3e12c24b31d8e
+LibUnwind.v1.8.1+2.i686-linux-musl.tar.gz/md5/3c456a1b3da2f5d785e02e1b6cb4cd74
+LibUnwind.v1.8.1+2.i686-linux-musl.tar.gz/sha512/fce8368ee670109b681c9d442ad89fee8fdf8eac1e115407784d1e8b82cfb98acd9d2edb4dbea29f8c63c83054da2a4d34149fe231655e2535834a4ef7319666
+LibUnwind.v1.8.1+2.powerpc64le-linux-gnu.tar.gz/md5/73b04ae80ca9fdbe06b3eeaae40d5dc5
+LibUnwind.v1.8.1+2.powerpc64le-linux-gnu.tar.gz/sha512/d4083a696a3492ced38b05fb573d44c4cc2b5332a351b65be2c3992d9e932bb6ea71f48260c643fa54219adb800b5da41160e1d56b0d9145061edf2e5dfc0ef6
+LibUnwind.v1.8.1+2.x86_64-linux-gnu.tar.gz/md5/f9d6132f4166c5ede15b2303280a1066
+LibUnwind.v1.8.1+2.x86_64-linux-gnu.tar.gz/sha512/124159e7d13ce1caee5e2527746ec98b10a776f57e5f9c99053b7ab76e7d9447b998cbc044da7671fd39356445a983f16f2c7bbefc076b29e45d2c2bb4d0364e
+LibUnwind.v1.8.1+2.x86_64-linux-musl.tar.gz/md5/665d9215ef915269e009f7dde1f827b3
+LibUnwind.v1.8.1+2.x86_64-linux-musl.tar.gz/sha512/2d8754bbfa7a4b576fb58a2d22b08940bb9f615988bfc388e9ea2cc96e3a573e6c31a4023b2509a3424a0ce3d946584c09ac5d18e4bca6f0f47e52597e193944
+LibUnwind.v1.8.1+2.x86_64-unknown-freebsd.tar.gz/md5/cc8149747db86524da0c9749ed538f3d
+LibUnwind.v1.8.1+2.x86_64-unknown-freebsd.tar.gz/sha512/4d416999616fbf08103553aa43603ce62109c21e9a97d6a391fb267c04d382834da380f459c96412773f19d93b8e996ddd405831623ce118d239ad1a0d9025fd
+libunwind-1.8.1.tar.gz/md5/10c96118ff30b88c9eeb6eac8e75599d
+libunwind-1.8.1.tar.gz/sha512/aba7b578c1b8cbe78f05b64e154f3530525f8a34668b2a9f1ee6acb4b22c857befe34ad4e9e8cca99dbb66689d41bc72060a8f191bd8be232725d342809431b3
diff --git a/deps/checksums/utf8proc b/deps/checksums/utf8proc
index c1b2a6779e555..2055d3323b7e1 100644
--- a/deps/checksums/utf8proc
+++ b/deps/checksums/utf8proc
@@ -1,2 +1,2 @@
-utf8proc-1cb28a66ca79a0845e99433fd1056257456cef8b.tar.gz/md5/aff37aadd1b02cad3259683e8a5f4543
-utf8proc-1cb28a66ca79a0845e99433fd1056257456cef8b.tar.gz/sha512/3ee433e5577e01f334aa4224275dfb7ee6ae7c785013df3eee6fc0488218d3bc895649811589edf57461c6520ad70437fbf6a376959a6a6f70bd920eb01c5001
+utf8proc-a1b99daa2a3393884220264c927a48ba1251a9c6.tar.gz/md5/2c404870fdc19982ec5313ee78e478d7
+utf8proc-a1b99daa2a3393884220264c927a48ba1251a9c6.tar.gz/sha512/a6652f5840439fe051d973d9467ca9805dcea8d0ac75a2d35e3f8041c513d6ccd5d205a3873f28d7cb5e33ce6471165850164997f188ca359111963b3aac9a16
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
index 15e2cffa5b485..bd651003399b9 100644
--- a/deps/checksums/zlib
+++ b/deps/checksums/zlib
@@ -1,34 +1,38 @@
-Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/md5/64403a5962d70d7e4b6bf7c225526144
-Zlib.v1.2.13+0.aarch64-apple-darwin.tar.gz/sha512/a7e6bb32c324943e5df3fa8501ee9d744d132db6f27033fe8ce789c1f19f26c15dc456ee8d6fc8095b427054e750ffe268500f5f69edecaa1af230b4b23535c4
-Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/md5/a2d3265543017db03bc47b9d9778d99d
-Zlib.v1.2.13+0.aarch64-linux-gnu.tar.gz/sha512/c8143445222e151d7f522a98ee8f2742571542f4e71d515e88086c9d7f27b952662ced93f40c795e0de42e3a07c0cb5e1d9d8e792347f3c068cb07ccc144a640
-Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/md5/c1f2a1c562f72c7aa4b228f57c2346d4
-Zlib.v1.2.13+0.aarch64-linux-musl.tar.gz/sha512/7ed89bc7696690c03617c7413f5456ff5a1caa0dd600880ae67132f6c9190672ae451a06d23956a1969be00bf5c8f29bfa4f5bc4ab646b3b375c350f67c993e5
-Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/md5/7dff966f7bc5dd2902fa9ce20444235b
-Zlib.v1.2.13+0.armv6l-linux-gnueabihf.tar.gz/sha512/49e7b4a7c84996b697cf944b11ce06ce6064983a6a911c4539587385afa1e0119e3b1dbf816703a2c132acc90f7f114ec10631647638b59b14954382c1a82014
-Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/md5/6982f19d2446559c0fd369afe84ebe4a
-Zlib.v1.2.13+0.armv6l-linux-musleabihf.tar.gz/sha512/8f69dfb7fb91cd6f7c934e1acddd83f77c2ebcc1732553f41ae1adcb7805a3304d16062133ce5094a8aea18ff5eca5f7a2df5724ae5a5cb9137caee732c1bf36
-Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/md5/30579a91f8f1c96752fe9a82bc053523
-Zlib.v1.2.13+0.armv7l-linux-gnueabihf.tar.gz/sha512/64f6a0e66ee13b086609e0d070c8742de20052e1ef43da201be0007e478c65b2f0a28a3c19ca5be6537b7c8bbeb6a4b2886c15a1e47bb2bd1cfe9d5e1590a620
-Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/md5/b052ad151dbc3bad78762bc06164d667
-Zlib.v1.2.13+0.armv7l-linux-musleabihf.tar.gz/sha512/b5d2de09a4d65d898cf9ba0db34327c712f42a78cd1fd0f1d77fd8798910502049be63ccfed23de5fe3b499d9e0fe3d4cbb07c72765fd54db275e92f8f1e4dc4
-Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/md5/3074702010889f586b43aa3dbbda4ceb
-Zlib.v1.2.13+0.i686-linux-gnu.tar.gz/sha512/92aa87c5aa3831155305276c2f0da091b5be4e8a396772e1a28650c2837ceb116dd2207329732b653a97c011abd7dd6ac1fc9574ac64cb3049ccd36fa6700748
-Zlib.v1.2.13+0.i686-linux-musl.tar.gz/md5/eff02476825ea7a53ab26b346d58f96e
-Zlib.v1.2.13+0.i686-linux-musl.tar.gz/sha512/14b72607d524948198e999e3919ee01046c049b3ec441bc581c77642cf37c3d28cc3c5500a3c073d62e9b8dc1efc9661b23bb925ed9c80b5e69abaddbcb59115
-Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/md5/279d2699458b1dfec80da17dd6f32f02
-Zlib.v1.2.13+0.i686-w64-mingw32.tar.gz/sha512/fb14d27b4f4ed5eb75bf4d4377074a206610558301be89ed692cf61d1266e425edb0489511fbbec100dafc71cff2cac863a4ea4ec70cfaa94e8175b9b7add25c
-Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/md5/bc69de101d9159b22b7a334e2700faa6
-Zlib.v1.2.13+0.powerpc64le-linux-gnu.tar.gz/sha512/174eb4f154594d268d970d23eb6144dd2f6be41ddcfb9bc756b2ff48f0781ad0ed6571e2ead64dab0967da91517a02cd8db2b0e33a0bde9400103b5204f78e85
-Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/md5/9a53075fc5595e638bacd25341f7ff42
-Zlib.v1.2.13+0.x86_64-apple-darwin.tar.gz/sha512/8124f677c036a288575712e201a809f44532b300fa56f8c12be9a1d7094fd644cb198c47b63d9f9f16d5509e27e7b3c59f080d4748ae489a4977fdfeae79e762
-Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/md5/b192d547d56124262e2ae744f385efd6
-Zlib.v1.2.13+0.x86_64-linux-gnu.tar.gz/sha512/c6dca3c0a713ef2e2296bc9e9afa75e103a4cc4f00b5c905ebc5cff688904d6a454f83ab5ef3b6c66bdf425daa2fcd25825e50a3534c0ff109b13affbb686179
-Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/md5/f2a466b38b2ff1c895f630982147a950
-Zlib.v1.2.13+0.x86_64-linux-musl.tar.gz/sha512/191261d37fc501591005bf680d76bf518da261252456c4fef1c12bc572f9200a855fbd1b125bb8ad10d803eedbc53d4c9d7a2861e9a35d629fb40f87e5306f5f
-Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/md5/00cb91c5edede46f72fae113b3115799
-Zlib.v1.2.13+0.x86_64-unknown-freebsd.tar.gz/sha512/8894e4a89dbf10e60ed020993484dcad91a52a8d310f3dfcc53808643c8401b1e445db46a815c19d55c0e5fd1a386945d1253c16af94b00ff27ccda44941f69b
-Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/md5/f98c68e19d9cfd24c7cec0b79d374e05
-Zlib.v1.2.13+0.x86_64-w64-mingw32.tar.gz/sha512/8e68edbdfe4e2ec6de70a724e30bc2df439901291639eca9e5aace75e31c7c6d3f47021213b8b7473b1f6ad4986f6b8695da4e24e2ea3025681e5d07dcfc067d
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/md5/60a49c89b9409dd91c1b039266f7bd0c
-zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/sha512/83122539da9399ce5f51c2ecbc38a627405334a9a6d53a024341353c1263a1e3aef7498f30ee281a49b3022be70e992eae475691e33da7a9c6a59b83207bd688
+Zlib.v1.3.1+2.aarch64-apple-darwin.tar.gz/md5/938c376c7513fa48d4b8b78cea741260
+Zlib.v1.3.1+2.aarch64-apple-darwin.tar.gz/sha512/ccece3f5618efe3e3699eb521167e2ee768932ea6f4e411d36619a941af3e9e32394beb260171d557930382f412f9be70f4c69215d3f7e448d4446b1690111ee
+Zlib.v1.3.1+2.aarch64-linux-gnu.tar.gz/md5/44a14273caeea9c5cb34ce3e0ba9d1fc
+Zlib.v1.3.1+2.aarch64-linux-gnu.tar.gz/sha512/8977bdc225404a01746fc14885e4823b4e2781c73a75e0ee0c8d9ca58b706c6cf9f98647b4e22bb09e7e09640caf4643e5210054a4624e06c76fc3eb2c2a2728
+Zlib.v1.3.1+2.aarch64-linux-musl.tar.gz/md5/dcef6c714555de9b2181b8c5b0a2c668
+Zlib.v1.3.1+2.aarch64-linux-musl.tar.gz/sha512/499701cc0fd1e52f3952da1b3c4377662c54390db9ebd6f5be82ecc0ba8754d2ca42b2f572b3a78ccdef30e527b7bed22c15511944f1299398587c529f8f4619
+Zlib.v1.3.1+2.aarch64-unknown-freebsd.tar.gz/md5/166f8a076a01a6f0979c712d7cec44e8
+Zlib.v1.3.1+2.aarch64-unknown-freebsd.tar.gz/sha512/7a1546b614cb5e2c0566774247269740d881c0a6d22ef6dca8010d77829b4e64594f4e609bb83299fa239d66909a4eb046d6d078268006723747f86e6c733e6b
+Zlib.v1.3.1+2.armv6l-linux-gnueabihf.tar.gz/md5/1f0bcb50b545badbc9de1569f51c4668
+Zlib.v1.3.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/1e4bea6fa41300ec770822dcd9335d1393c087db45d128e2c60d9315db01a69c984c98304b83af0725a99ae3a5cac4a273f4eea8a4213454608edbe0e55c74ce
+Zlib.v1.3.1+2.armv6l-linux-musleabihf.tar.gz/md5/3a78103181bf8a74dfc0c6f7681bd3de
+Zlib.v1.3.1+2.armv6l-linux-musleabihf.tar.gz/sha512/2a7c70266fd5928e46c8d71d95884054eaff2432d9fbce37eef67eb62af2b087f5f9fa3752a5d14f50cd058519d39a1b81450b30786a4f66eafbd16d18ef7b6b
+Zlib.v1.3.1+2.armv7l-linux-gnueabihf.tar.gz/md5/4e202f829e7f478451e93da7be2b6f98
+Zlib.v1.3.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/0734bc8a84b039b971a15620adb9b5da77d1b1992fb4c6adf9031fa8c592512645d424d2ce752efdda1f300f871c3d4f3b159794c3725fd113e1acd5512aed59
+Zlib.v1.3.1+2.armv7l-linux-musleabihf.tar.gz/md5/5000d1941b7e32dec4a2d125bbd22fff
+Zlib.v1.3.1+2.armv7l-linux-musleabihf.tar.gz/sha512/6abd69ef6878fa6cdcf7fe94e4d7aedaae58d961122e131a414f3aea43b401a3812d9d847ab4b1690e9faf89d577935d7f547484edb6cb2814cbc1156159e8ed
+Zlib.v1.3.1+2.i686-linux-gnu.tar.gz/md5/7a5de529294b9d8dba4ac1eeb4cbcbdc
+Zlib.v1.3.1+2.i686-linux-gnu.tar.gz/sha512/72d52c4e2f01fe1436b22c854efff83068f6a65a9280556018c77bb843f81902c0c96f30132123d4dd6a66041e9391a418ceec227b2b2411f99a26df76d21c74
+Zlib.v1.3.1+2.i686-linux-musl.tar.gz/md5/d18b442e4d108397482fd852deb4241e
+Zlib.v1.3.1+2.i686-linux-musl.tar.gz/sha512/6c367f7c522167db510cf42a84dfcce75fc129bb49800c05b90dfdfa5fb31fa20ed020e165f1b283b81f4568c9bf79d4c41f7ad4e42a3513cb13149a50707114
+Zlib.v1.3.1+2.i686-w64-mingw32.tar.gz/md5/b2c8af112298ae9e635054e4ba22e5ae
+Zlib.v1.3.1+2.i686-w64-mingw32.tar.gz/sha512/0a28076fc8cf8daa925f4be76dd0025d01d8ad6bc269f87164749da0c3bea6f4b404ef097a2907ce2c009211d9f8387f844fe5e5b1bd2f6d77c9b35b2b1c7548
+Zlib.v1.3.1+2.powerpc64le-linux-gnu.tar.gz/md5/9ae4feb621ae088c323ff12235bdf5db
+Zlib.v1.3.1+2.powerpc64le-linux-gnu.tar.gz/sha512/912134f741fe96217d1b8465510ac82d238d3d8a21519534fb88c568092dcc9eb8e51ef66b7ca56a2a7a881474f04edb7a6f7bf1ebf06bfff7708b3edd3487c0
+Zlib.v1.3.1+2.riscv64-linux-gnu.tar.gz/md5/43b61473a02e492f932ce60f726966a8
+Zlib.v1.3.1+2.riscv64-linux-gnu.tar.gz/sha512/4742503831da6a6b7945c6e5efd96bd6b03b8a63b73e68181e961b6f326ac5d805350219c43f4537165d1af0ac2ed496d5a72edd0c1d11e012ef12385a8f2e5f
+Zlib.v1.3.1+2.x86_64-apple-darwin.tar.gz/md5/347a92686d81ed7c022d2e7310babb77
+Zlib.v1.3.1+2.x86_64-apple-darwin.tar.gz/sha512/a59b9c4f63124c893a2a11b886bbe03bfc75846599eb21997652bd62a2f28afe754d16127e964683552423bf1c0da160e46c32d0b584ed07e28f4e91904b9c67
+Zlib.v1.3.1+2.x86_64-linux-gnu.tar.gz/md5/0630f603e35ab56efdef331e29db436b
+Zlib.v1.3.1+2.x86_64-linux-gnu.tar.gz/sha512/b936c328fad685c22473ff4cbfcc8bc48772ed9927c5b29c5d0503b95888efd0ca8d891f53cca45da7b5768ed4a1f6994f9e211167f4704c15c688cced90cac5
+Zlib.v1.3.1+2.x86_64-linux-musl.tar.gz/md5/252d8542bb5a53c479f4ffc067847e27
+Zlib.v1.3.1+2.x86_64-linux-musl.tar.gz/sha512/449809acbbff1fcbd89b9689e803f69d1f9cb49860f0b508b69c560cfcb51232640fcff17ede6ea75d9906edb5a8f38139afd890f18a34260ef5dbb5d167af36
+Zlib.v1.3.1+2.x86_64-unknown-freebsd.tar.gz/md5/79fa906629dff81c38b93001a7798040
+Zlib.v1.3.1+2.x86_64-unknown-freebsd.tar.gz/sha512/53dbcce99b2d6ec82ef86d76f3f574db304ab90f1b131c49b2c06f89bd2426afa4a31bfa8455e2ecdad64d4da71fef1b2d79f471efd55a8bbdc29e95c952a289
+Zlib.v1.3.1+2.x86_64-w64-mingw32.tar.gz/md5/92b083205ca44db131b7cf6b9c09eb21
+Zlib.v1.3.1+2.x86_64-w64-mingw32.tar.gz/sha512/1c3b7b414f09b1896c5a08f156c5e55f07ee012cf3f6fe50d5ba116405dcd9a80e5004ee7c774f7cc981e4d1b099efee85e16b8417cef2734cb7c12ec440d09a
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/md5/7ce1b2766499af7d948130113b649028
+zlib-51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf.tar.gz/sha512/79d032b8c93260ce6b9806f2289cdccce67e9d80865b5bb39ac46dadffc8ee009da51c551eead59c56249c7adfa164c1d5ebcf2b10a8645e0b11b5650176cb24
diff --git a/deps/clang.version b/deps/clang.version
index d291dc8e8f8d8..0f49ecdd649f0 100644
--- a/deps/clang.version
+++ b/deps/clang.version
@@ -1,4 +1,6 @@
+# -*- makefile -*-
+
 ## jll artifact
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 15.0.7+5
+CLANG_JLL_VER  := 18.1.7+3
diff --git a/deps/csl.mk b/deps/csl.mk
index 457e276c66709..fef950aa41621 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -1,13 +1,13 @@
 # Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
 STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^programs: =' | sed -e "s/^programs: =//")
-STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
-ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # the cygwin-mingw32 compiler lies about it search directory paths
+STD_LIB_PATH += $(PATHSEP)$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
+ifeq ($(BUILD_OS),WINNT)  # the mingw compiler lies about it search directory paths
 STD_LIB_PATH := $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
 endif
 
 # Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
 define pathsearch
-$(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
+$(firstword $(wildcard $(addsuffix /$(1),$(subst $(PATHSEP), ,$(2)))))
 endef
 
 # CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
@@ -32,8 +32,8 @@ ifeq ($(USE_SYSTEM_CSL),1)
 USE_BINARYBUILDER_CSL ?= 0
 else
 # If it's not, see if we should disable it due to `libstdc++` being newer:
-LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
-ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+LIBSTDCXX_PATH := $(call pathsearch,$(call versioned_libname,libstdc++,6),$(STD_LIB_PATH))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p '$(LIBSTDCXX_PATH)' | grep '$(CSL_NEXT_GLIBCXX_VERSION)')))
 # Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
 # that is newer that whatever we have in CSL.  Default to not using BB.
 USE_BINARYBUILDER_CSL ?= 0
@@ -50,8 +50,8 @@ ifeq ($(USE_BINARYBUILDER_CSL),0)
 define copy_csl
 install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
 $$(build_shlibdir)/$(1): | $$(build_shlibdir)
-	-@SRC_LIB=$$(call pathsearch,$(1),$$(STD_LIB_PATH)); \
-	[ -n "$$$${SRC_LIB}" ] && cp $$$${SRC_LIB} $$(build_shlibdir)
+	-@SRC_LIB='$$(call pathsearch,$(1),$$(STD_LIB_PATH))'; \
+	[ -n "$$$${SRC_LIB}" ] && cp "$$$${SRC_LIB}" '$$(build_shlibdir)'
 endef
 
 # libgfortran has multiple names; we're just going to copy any version we can find
@@ -75,12 +75,24 @@ else
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s_seh,1)))
 endif
 else
-ifeq ($(APPLE_ARCH),arm64)
+ifeq ($(OS),Darwin)
+# On macOS, libgcc_s has soversion 1.1 always on aarch64 and only for GCC 12+
+# (-> libgfortran 5) on x86_64
+ifeq ($(ARCH),aarch64)
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1.1)))
 else
+ifeq ($(LIBGFORTRAN_VERSION),5)
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1.1)))
+else
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
+endif
+endif
+else
+# Other targets just use libgcc_s.1
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
 endif
 endif
+
 # winpthread is only Windows, pthread is only others
 ifeq ($(OS),WINNT)
 $(eval $(call copy_csl,$(call versioned_libname,libwinpthread,1)))
@@ -104,4 +116,23 @@ distclean-csl: clean-csl
 
 else
 $(eval $(call bb-install,csl,CSL,true))
+ifeq ($(OS),WINNT)
+GCC_VERSION = 14
+install-csl:
+	mkdir -p $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libgcc_s.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libgcc.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libmsvcrt.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libssp.dll.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/$(GCC_VERSION)/libssp.dll.a $(build_libdir)/
+endif
+endif
+ifeq ($(OS),WINNT)
+uninstall-csl: uninstall-gcc-libraries
+uninstall-gcc-libraries:
+	-rm -f $(build_private_libdir)/libgcc_s.a
+	-rm -f $(build_private_libdir)/libgcc.a
+	-rm -f $(build_private_libdir)/libmsvcrt.a
+	-rm -f $(build_private_libdir)/libssp.dll.a
+	-rm -f $(build_libdir)/libssp.dll.a
 endif
diff --git a/deps/curl.mk b/deps/curl.mk
index a063dfe07fba0..6232d56e5e333 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -1,6 +1,10 @@
 ## CURL ##
 include $(SRCDIR)/curl.version
 
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/openssl
+endif
+
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/libssh2
 endif
@@ -14,7 +18,7 @@ $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/nghttp
 endif
 
 ifneq ($(USE_BINARYBUILDER_CURL),1)
-CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
+CURL_LDFLAGS := $(RPATH_ESCAPED_ORIGIN) -Wl,-rpath,$(build_shlibdir)
 
 # On older Linuces (those that use OpenSSL < 1.1) we include `libpthread` explicitly.
 # It doesn't hurt to include it explicitly elsewhere, so we do so.
@@ -35,25 +39,29 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 
 ## xref: https://github.com/JuliaPackaging/Yggdrasil/blob/master/L/LibCURL/common.jl
 # Disable....almost everything
-CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
-	--without-gnutls --without-libidn2 --without-librtmp \
-	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
-	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
-	--without-gssapi --without-brotli
+CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON)				\
+        --without-gnutls						\
+        --without-libidn2 --without-librtmp				\
+        --without-nss --without-libpsl					\
+        --disable-ares --disable-manual					\
+        --disable-ldap --disable-ldaps --without-zsh-functions-dir	\
+        --disable-static --without-libgsasl				\
+        --without-brotli
 # A few things we actually enable
-CURL_CONFIGURE_FLAGS += --enable-versioned-symbols \
-	--with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-nghttp2=${build_prefix}
+CURL_CONFIGURE_FLAGS +=											\
+        --with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-nghttp2=${build_prefix}	\
+        --enable-versioned-symbols
 
 # We use different TLS libraries on different platforms.
 #   On Windows, we use schannel
 #   On MacOS, we use SecureTransport
-#   On Linux, we use mbedTLS
+#   On Linux, we use OpenSSL
 ifeq ($(OS), WINNT)
 CURL_TLS_CONFIGURE_FLAGS := --with-schannel
 else ifeq ($(OS), Darwin)
 CURL_TLS_CONFIGURE_FLAGS := --with-secure-transport
 else
-CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
+CURL_TLS_CONFIGURE_FLAGS := --with-openssl
 endif
 CURL_CONFIGURE_FLAGS += $(CURL_TLS_CONFIGURE_FLAGS)
 
diff --git a/deps/curl.version b/deps/curl.version
index f704bc2bebc61..fbbb55ffb17ea 100644
--- a/deps/curl.version
+++ b/deps/curl.version
@@ -3,4 +3,4 @@
 CURL_JLL_NAME := LibCURL
 
 ## source build
-CURL_VER := 8.0.1
+CURL_VER := 8.11.1
diff --git a/deps/dsfmt.version b/deps/dsfmt.version
index bbb63417f46cd..d81db2d10ff09 100644
--- a/deps/dsfmt.version
+++ b/deps/dsfmt.version
@@ -1,5 +1,7 @@
+# -*- makefile -*-
+
 ## jll artifact
 DSFMT_JLL_NAME := dSFMT
 
 ## source build
-DSFMT_VER := 2.2.4
+DSFMT_VER := 2.2.5
diff --git a/deps/gmp.mk b/deps/gmp.mk
index 12ba15f8aa0f6..23075c861cd35 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -35,29 +35,17 @@ $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 checksum-gmp: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
-# Apply fix to avoid using Apple ARM reserved register X18
-# Necessary for version 6.2.1, remove after next gmp release
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-HG-changeset.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-exception.patch
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-alloc_overflow.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-alloc_overflow.patch
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
-	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
-	echo 1 > $@
-
-$(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied
-	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-CVE-2021-43618.patch
-	echo 1 > $@
-
-$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied
+$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-alloc_overflow.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
diff --git a/deps/gmp.version b/deps/gmp.version
index f77cac5906cea..3b6659faea7b7 100644
--- a/deps/gmp.version
+++ b/deps/gmp.version
@@ -1,5 +1,6 @@
+# -*- makefile -*-
 ## jll artifact
 GMP_JLL_NAME := GMP
 
 ## source build
-GMP_VER := 6.2.1
+GMP_VER := 6.3.0
diff --git a/deps/ittapi.mk b/deps/ittapi.mk
index 1a47c3ae89390..b62b981a34ddb 100644
--- a/deps/ittapi.mk
+++ b/deps/ittapi.mk
@@ -40,4 +40,5 @@ fastcheck-ittapi: #none
 check-ittapi: #none
 
 clean-ittapi:
-	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
+	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled
+	-rm -f $(build_libdir)/libittnotify.a $(build_libdir)/libjitprofiling.a
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 014fdc0108f7c..8b17ae6d70424 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -9,8 +9,8 @@ ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/libssh2
 endif
 
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/openssl
 endif
 
 LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON -DBUILD_CLI=OFF
@@ -29,15 +29,23 @@ endif
 ifeq ($(BUILD_OS),WINNT)
 LIBGIT2_OPTS += -G"MSYS Makefiles"
 else
-LIBGIT2_OPTS += -DBUILD_CLAR=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
+LIBGIT2_OPTS += -DBUILD_TESTS=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
 LIBGIT2_OPTS += -DCMAKE_FIND_ROOT_PATH=/usr/$(XC_HOST) -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY
 endif
 endif
+ifeq ($(OS),OpenBSD)
+# iconv.h is third-party
+LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-I/usr/local/include"
+endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
-LIBGIT2_OPTS += -DUSE_HTTPS="mbedTLS" -DUSE_SHA1="CollisionDetection" -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
+LIBGIT2_OPTS += -DUSE_HTTPS="OpenSSL" -DUSE_SHA1="CollisionDetection" -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
+# use the bundled distribution of libpcre. we should consider linking against the
+# pcre2 library we're building anyway, but this is currently how Yggdrasil does it.
+LIBGIT2_OPTS += -DREGEX_BACKEND="builtin"
+
 LIBGIT2_SRC_PATH := $(SRCCACHE)/$(LIBGIT2_SRC_DIR)
 
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extracted
diff --git a/deps/libgit2.version b/deps/libgit2.version
index b8cefc3c5c6f3..6bfb6106e67d2 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -3,11 +3,12 @@
 LIBGIT2_JLL_NAME := LibGit2
 
 ## source build
-LIBGIT2_BRANCH=v1.6.1
-LIBGIT2_SHA1=8a871d13b7f4e186b8ad943ae5a7fcf30be52e67
+LIBGIT2_BRANCH=v1.9.0
+LIBGIT2_SHA1=338e6fb681369ff0537719095e22ce9dc602dbf0
 
 ## Other deps
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2023-01-10
+# Keep in sync with `stdlib/MozillaCACerts_jll/Project.toml`.
+MOZILLA_CACERT_VERSION := 2024-12-31
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index d0174c0c090e2..3f802db15be6d 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -4,23 +4,27 @@ LIBSSH2_GIT_URL := https://github.com/libssh2/libssh2.git
 LIBSSH2_TAR_URL = https://api.github.com/repos/libssh2/libssh2/tarball/$1
 $(eval $(call git-external,libssh2,LIBSSH2,CMakeLists.txt,,$(SRCCACHE)))
 
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
+ifeq ($(USE_SYSTEM_OPENSSL), 0)
+$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/openssl
 endif
 
 LIBSSH2_OPTS := $(CMAKE_COMMON) -DBUILD_SHARED_LIBS=ON -DBUILD_EXAMPLES=OFF \
 		-DCMAKE_BUILD_TYPE=Release
 
+ifneq ($(fPIC),)
+LIBSSH2_OPTS += -DCMAKE_C_FLAGS="-fPIC"
+endif
+
 ifeq ($(OS),WINNT)
 LIBSSH2_OPTS += -DCRYPTO_BACKEND=WinCNG -DENABLE_ZLIB_COMPRESSION=OFF
 ifeq ($(BUILD_OS),WINNT)
 LIBSSH2_OPTS += -G"MSYS Makefiles"
 endif
 else
-LIBSSH2_OPTS += -DCRYPTO_BACKEND=mbedTLS -DENABLE_ZLIB_COMPRESSION=OFF
+LIBSSH2_OPTS += -DCRYPTO_BACKEND=OpenSSL -DENABLE_ZLIB_COMPRESSION=OFF
 endif
 
-ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
 LIBSSH2_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
@@ -30,22 +34,6 @@ endif
 
 LIBSSH2_SRC_PATH := $(SRCCACHE)/$(LIBSSH2_SRC_DIR)
 
- # Apply patch to fix v1.10.0 CVE (https://github.com/libssh2/libssh2/issues/649), drop with v1.11
-$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied: $(LIBSSH2_SRC_PATH)/source-extracted
-	cd $(LIBSSH2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libssh2-userauth-check.patch
-	echo 1 > $@
-
-# issue:   https://github.com/JuliaLang/julia/issues/45645#issuecomment-1153214379
-# fix pr:  https://github.com/libssh2/libssh2/pull/711
-$(LIBSSH2_SRC_PATH)/libssh2-fix-import-lib-name.patch-applied: $(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied
-	cd $(LIBSSH2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libssh2-fix-import-lib-name.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: \
-	$(LIBSSH2_SRC_PATH)/libssh2-fix-import-lib-name.patch-applied
-
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
@@ -53,7 +41,7 @@ $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extr
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured
-	$(MAKE) -C $(dir $<) libssh2
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-checked: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
diff --git a/deps/libssh2.version b/deps/libssh2.version
index 3d5b2bb98d7eb..d6cc8a629c3bf 100644
--- a/deps/libssh2.version
+++ b/deps/libssh2.version
@@ -1,7 +1,8 @@
+# -*- makefile -*-
 ## jll artifact
 LIBSSH2_JLL_NAME := LibSSH2
 
 ## source build
-LIBSSH2_VER := 1.10.2
-LIBSSH2_BRANCH=libssh2-1.10.0
-LIBSSH2_SHA1=635caa90787220ac3773c1d5ba11f1236c22eae8
+LIBSSH2_VER := 1.11.1
+LIBSSH2_BRANCH=libssh2-1.11.1
+LIBSSH2_SHA1=a312b43325e3383c865a87bb1d26cb52e3292641
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index 7d79e03ee8d0e..85b2c23473a18 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -1,39 +1,43 @@
 ## LIBSUITESPARSE ##
 include $(SRCDIR)/libsuitesparse.version
 
-ifeq ($(USE_BLAS64), 1)
-UMFPACK_CONFIG := -DLONGBLAS='long long'
-CHOLMOD_CONFIG := -DLONGBLAS='long long'
-SPQR_CONFIG := -DLONGBLAS='long long'
-UMFPACK_CONFIG += -DSUN64
-CHOLMOD_CONFIG += -DSUN64
-SPQR_CONFIG += -DSUN64
-endif
-
-# Disable linking to libmetis
-CHOLMOD_CONFIG += -DNPARTITION
-
 ifneq ($(USE_BINARYBUILDER_LIBSUITESPARSE), 1)
 
-LIBSUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
-LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
+LIBSUITESPARSE_PROJECTS := "suitesparse_config;amd;btf;camd;ccolamd;colamd;cholmod;klu;ldl;umfpack;rbio;spqr"
+LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig $(subst ;, ,$(LIBSUITESPARSE_PROJECTS)))
 
-SUITESPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(LIBSUITESPARSE_VER)/lib"
-ifeq ($(OS), Darwin)
-SUITESPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
-endif
-LIBSUITESPARSE_MFLAGS := CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" F77="$(FC)" \
-	  AR="$(AR)" RANLIB="$(RANLIB)" \
-	  BLAS="-L$(build_shlibdir) -lblastrampoline" \
-	  LAPACK="-L$(build_shlibdir) -lblastrampoline" \
-	  LDFLAGS="$(SUITESPARSE_LIB) $(SANITIZE_LDFLAGS)" CFOPENMP="" CUDA=no CUDA_PATH="" \
-	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" \
-	  CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" \
-	  SPQR_CONFIG="$(SPQR_CONFIG)"
 ifeq ($(OS),WINNT)
-LIBSUITESPARSE_MFLAGS += UNAME=Windows
+BLAS_LIB_NAME_NO_EXT:=blastrampoline-5
+else
+BLAS_LIB_NAME_NO_EXT:=blastrampoline
+endif
+
+LIBSUITESPARSE_CMAKE_FLAGS := $(CMAKE_COMMON) \
+	  -DCMAKE_BUILD_TYPE=Release \
+	  -DBUILD_STATIC_LIBS=OFF \
+	  -DBUILD_TESTING=OFF \
+	  -DSUITESPARSE_ENABLE_PROJECTS=$(LIBSUITESPARSE_PROJECTS) \
+	  -DSUITESPARSE_DEMOS=OFF \
+	  -DSUITESPARSE_USE_STRICT=ON \
+	  -DSUITESPARSE_USE_CUDA=OFF \
+	  -DSUITESPARSE_USE_FORTRAN=OFF \
+	  -DSUITESPARSE_USE_OPENMP=OFF \
+	  -DCHOLMOD_PARTITION=ON \
+	  -DBLAS_FOUND=1 \
+	  -DBLAS_LIBRARIES="$(build_shlibdir)/lib$(BLAS_LIB_NAME_NO_EXT).$(SHLIB_EXT)" \
+	  -DBLAS_LINKER_FLAGS="$(BLAS_LIB_NAME_NO_EXT)" \
+	  -DBLA_VENDOR="$(BLAS_LIB_NAME_NO_EXT)" \
+	  -DLAPACK_LIBRARIES="$(build_shlibdir)/lib$(BLAS_LIB_NAME_NO_EXT).$(SHLIB_EXT)" \
+	  -DLAPACK_LINKER_FLAGS="${BLAS_LIB_NAME_NO_EXT}"
+
+ifeq ($(BINARY),64)
+LIBSUITESPARSE_CMAKE_FLAGS += -DBLAS64_SUFFIX="_64" -DSUITESPARSE_USE_64BIT_BLAS=YES
 else
-LIBSUITESPARSE_MFLAGS += UNAME=$(OS)
+LIBSUITESPARSE_CMAKE_FLAGS += -DSUITESPARSE_USE_64BIT_BLAS=NO
+endif
+
+ifneq (,$(findstring $(OS),Linux FreeBSD OpenBSD))
+LIBSUITESPARSE_CMAKE_FLAGS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
 $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz: | $(SRCCACHE)
@@ -48,20 +52,15 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted: $(SRCCACHE)/Suit
 checksum-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
 	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
 
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
 
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(LIBSUITESPARSE_MFLAGS)
-	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
-	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
-		$(MAKE) -C $(dir $<)$${PROJ} library $(LIBSUITESPARSE_MFLAGS) || exit 1; \
-		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
-	done
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched
+	cd $(dir $<) && $(CMAKE) . $(LIBSUITESPARSE_CMAKE_FLAGS)
+	$(MAKE) -C $(dir $<)
+	$(MAKE) -C $(dir $<) install
 	echo 1 > $@
 
 ifeq ($(OS),WINNT)
@@ -70,19 +69,14 @@ else
 LIBSUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
 endif
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
-	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+	for PROJ in $(shell echo $(subst ;, ,$(LIBSUITESPARSE_PROJECTS))); do \
 		$(LIBSUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
 	done
 	echo 1 > $@
 
-UNINSTALL_suitesparse := $(LIBSUITESPARSE_VER) manual_suitesparse $(LIBSUITESPARSE_LIBS)
+UNINSTALL_libsuitesparse := $(LIBSUITESPARSE_VER) manual_libsuitesparse $(LIBSUITESPARSE_LIBS)
 
 $(build_prefix)/manifest/libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
-	for lib in $(LIBSUITESPARSE_LIBS); do \
-		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
-	done
-	#cp -a $(dir $<)lib/* $(build_shlibdir)
-	#cp -a $(dir $<)include/* $(build_includedir)
 	echo $(UNINSTALL_libsuitesparse) > $@
 
 clean-libsuitesparse: uninstall-libsuitesparse
@@ -101,7 +95,7 @@ configure-libsuitesparse: extract-libsuitesparse
 compile-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
 fastcheck-libsuitesparse: #none
 check-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked
-install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse
+install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse remove-libsuitesparse-gpl-lib
 
 else # USE_BINARYBUILDER_LIBSUITESPARSE
 
@@ -109,6 +103,7 @@ $(eval $(call bb-install,libsuitesparse,LIBSUITESPARSE,false))
 
 # libsuitesparse depends on blastrampoline
 compile-libsuitesparse: | $(build_prefix)/manifest/blastrampoline
+install-libsuitesparse: | remove-libsuitesparse-gpl-lib
 endif
 
 define manual_libsuitesparse
@@ -116,3 +111,13 @@ uninstall-libsuitesparse:
 	-rm -f $(build_prefix)/manifest/libsuitesparse
 	-rm -f $(addprefix $(build_shlibdir)/lib,$3)
 endef
+
+remove-libsuitesparse-gpl-lib:
+ifeq ($(USE_GPL_LIBS),0)
+	@echo Removing GPL libs...
+	-rm -f $(build_bindir)/libcholmod*
+	-rm -f $(build_bindir)/libklu_cholmod*
+	-rm -f $(build_bindir)/librbio*
+	-rm -f $(build_bindir)/libspqr*
+	-rm -f $(build_bindir)/libumfpack*
+endif
diff --git a/deps/libsuitesparse.version b/deps/libsuitesparse.version
index 2237db6f2d116..cc294f68c2d5a 100644
--- a/deps/libsuitesparse.version
+++ b/deps/libsuitesparse.version
@@ -1,5 +1,8 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBSUITESPARSE_JLL_NAME := SuiteSparse
 
 ## source build
-LIBSUITESPARSE_VER := 5.10.1
+LIBSUITESPARSE_VER := 7.8.3
+LIBSUITESPARSE_SHA1=d3c4926d2c47fd6ae558e898bfc072ade210a2a1
diff --git a/deps/libtracyclient.version b/deps/libtracyclient.version
index 0baf8504261f1..60b5a3e8ce630 100644
--- a/deps/libtracyclient.version
+++ b/deps/libtracyclient.version
@@ -1,6 +1,6 @@
 ## jll artifact
 LIBTRACYCLIENT_JLL_NAME := LibTracyClient
-LIBTRACYCLIENT_JLL_VER := 0.9.1+2
+LIBTRACYCLIENT_JLL_VER := 0.9.1+5
 
 ## source build
 LIBTRACYCLIENT_VER := 0.9.1
diff --git a/deps/libuv.version b/deps/libuv.version
index 01bf4fecc6dc6..ebfc63927d9db 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBUV_JLL_NAME := LibUV
 
 ## source build
 LIBUV_VER := 2
-LIBUV_BRANCH=julia-uv2-1.44.2
-LIBUV_SHA1=2723e256e952be0b015b3c0086f717c3d365d97e
+LIBUV_BRANCH=julia-uv2-1.48.0
+LIBUV_SHA1=af4172ec713ee986ba1a989b9e33993a07c60c9e
diff --git a/deps/libwhich.version b/deps/libwhich.version
index 0fa713024ef99..09ea0197d10c1 100644
--- a/deps/libwhich.version
+++ b/deps/libwhich.version
@@ -1,2 +1,2 @@
 LIBWHICH_BRANCH=master
-LIBWHICH_SHA1=81e9723c0273d78493dc8c8ed570f68d9ce7e89e
+LIBWHICH_SHA1=99a0ea12689e41164456dba03e93bc40924de880
diff --git a/deps/lld.version b/deps/lld.version
index d4b2a664d980c..8c7008fc93d7d 100644
--- a/deps/lld.version
+++ b/deps/lld.version
@@ -1,3 +1,5 @@
+# -*- makefile -*-
+
 ## jll artifact
 LLD_JLL_NAME := LLD
-LLD_JLL_VER := 15.0.7+5
+LLD_JLL_VER := 18.1.7+3
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
index f2ecd0b33e989..8a1159fd69174 100644
--- a/deps/llvm-tools.version
+++ b/deps/llvm-tools.version
@@ -1,5 +1,7 @@
+# -*- makefile -*-
+
 ## jll artifact
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 15.0.7+5
-LLVM_TOOLS_ASSERT_JLL_VER := 15.0.7+5
+LLVM_TOOLS_JLL_VER := 18.1.7+3
+LLVM_TOOLS_ASSERT_JLL_VER := 18.1.7+3
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 2a8365dd73e75..09dd4f187d611 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -86,22 +86,23 @@ endif
 LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
 
 # Allow adding LLVM specific flags
-LLVM_CFLAGS += $(CFLAGS)
-LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CFLAGS += $(CFLAGS) $(BOLT_CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS) $(BOLT_CFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(BOLT_LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
 LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)"
 LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
-LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+LLVM_CMAKE += -DLLVM_ENABLE_ZSTD=OFF
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
 endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off -DLLVM_ENABLE_LIBEDIT=OFF
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
@@ -133,7 +134,7 @@ endif # USE_PERF_JITEVENTS
 
 ifeq ($(BUILD_LLDB),1)
 ifeq ($(USECLANG),0)
-LLVM_CXXFLAGS += -std=c++0x
+LLVM_CXXFLAGS += -std=c++17
 endif # USECLANG
 ifeq ($(LLDB_DISABLE_PYTHON),1)
 LLVM_CXXFLAGS += -DLLDB_DISABLE_PYTHON
@@ -210,11 +211,14 @@ LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
 LLVM_CMAKE += -DLLVM_VERSION_SUFFIX:STRING="jl"
 LLVM_CMAKE += -DLLVM_SHLIB_SYMBOL_VERSION:STRING="JL_LLVM_$(LLVM_VER_SHORT)"
 
+# Change the default bug report URL to Julia's issue tracker
+LLVM_CMAKE += -DBUG_REPORT_URL="https://github.com/julialang/julia"
+
 # Apply version-specific LLVM patches sequentially
 LLVM_PATCH_PREV :=
 define LLVM_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
@@ -223,14 +227,16 @@ endef
 
 define LLVM_PROJ_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
+ifeq ($(shell test $(LLVM_VER_MAJ) -lt 19 && echo true),true)
 $(eval $(call LLVM_PATCH,llvm-ittapi-cmake))
+endif
 
 ifeq ($(USE_SYSTEM_ZLIB), 0)
 $(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib
@@ -249,7 +255,7 @@ $(BUILDDIR)/julia-patches.patch:
 
 # Apply the patch.
 $(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied: $(BUILDDIR)/julia-patches.patch $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
-	cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 < $(realpath $<)
+	cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 -f < $(realpath $<)
 	echo 1 > $@
 
 # Require application of Julia's patchset before configuring LLVM.
@@ -288,6 +294,9 @@ ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
+ifeq ($(BUILD_LLD), 1)
+LLVM_INSTALL += && cp $2$$(build_bindir)/lld$$(EXE) $2$$(build_depsbindir)
+endif
 
 $(eval $(call staged-install, \
 	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
diff --git a/deps/llvm.version b/deps/llvm.version
index 4e7969994141e..be03d1529ce7c 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -2,14 +2,14 @@
 
 ## jll artifact
 LLVM_JLL_NAME := libLLVM
-LLVM_ASSERT_JLL_VER := 15.0.7+5
+LLVM_ASSERT_JLL_VER := 18.1.7+3
 ## source build
 # Version number of LLVM
-LLVM_VER := 15.0.7
+LLVM_VER := 18.1.7
 # Git branch name in `LLVM_GIT_URL` repository
-LLVM_BRANCH=julia-15.0.7-5
+LLVM_BRANCH=julia-18.1.7-2
 # Git ref in `LLVM_GIT_URL` repository
-LLVM_SHA1=julia-15.0.7-5
+LLVM_SHA1=julia-18.1.7-2
 
 ## Following options are used to automatically fetch patchset from Julia's fork.  This is
 ## useful if you want to build an external LLVM while still applying Julia's patches.
@@ -18,6 +18,6 @@ LLVM_APPLY_JULIA_PATCHES := 0
 # GitHub repository to use for fetching the Julia patches to apply to LLVM source code.
 LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project
 # Base GitHub ref for generating the diff.
-LLVM_BASE_REF := llvm:llvmorg-15.0.7
+LLVM_BASE_REF := llvm:llvmorg-18.1.7
 # Julia fork's GitHub ref for generating the diff.
-LLVM_JULIA_REF := JuliaLang:julia-15.0.7-5
+LLVM_JULIA_REF := JuliaLang:julia-18.1.7-2
diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version
index 7d13af9a158f7..666cae54025b4 100644
--- a/deps/llvmunwind.version
+++ b/deps/llvmunwind.version
@@ -2,4 +2,4 @@
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 ## source build
-LLVMUNWIND_VER := 12.0.1
+LLVMUNWIND_VER := 19.1.4
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
deleted file mode 100644
index b4147c2c2684e..0000000000000
--- a/deps/mbedtls.mk
+++ /dev/null
@@ -1,97 +0,0 @@
-## mbedtls
-include $(SRCDIR)/mbedtls.version
-
-ifneq ($(USE_BINARYBUILDER_MBEDTLS), 1)
-MBEDTLS_SRC = mbedtls-$(MBEDTLS_VER)
-MBEDTLS_URL = https://github.com/Mbed-TLS/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
-
-MBEDTLS_OPTS := $(CMAKE_COMMON) -DUSE_SHARED_MBEDTLS_LIBRARY=ON \
-    -DUSE_STATIC_MBEDTLS_LIBRARY=OFF -DENABLE_PROGRAMS=OFF -DCMAKE_BUILD_TYPE=Release
-
-MBEDTLS_OPTS += -DENABLE_ZLIB_SUPPORT=OFF -DMBEDTLS_FATAL_WARNINGS=OFF
-ifeq ($(BUILD_OS),WINNT)
-MBEDTLS_OPTS += -G"MSYS Makefiles"
-endif
-
-ifneq (,$(findstring $(OS),Linux FreeBSD))
-MBEDTLS_OPTS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
-endif
-
-$(SRCCACHE)/$(MBEDTLS_SRC).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(MBEDTLS_URL)
-
-$(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-	$(JLCHECKSUM) $<
-	mkdir -p $(dir $@) && \
-	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
-	# Force-enable MD4
-	sed -i.org "s|//#define MBEDTLS_MD4_C|#define MBEDTLS_MD4_C|" $(SRCCACHE)/$(MBEDTLS_SRC)/include/mbedtls/config.h
-	touch -c $(SRCCACHE)/$(MBEDTLS_SRC)/CMakeLists.txt # old target
-	echo 1 > $@
-
-checksum-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-	$(JLCHECKSUM) $<
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-configured: $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(MBEDTLS_OPTS)
-	echo 1 > $@
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled: $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured
-	$(MAKE) -C $(dir $<)
-	echo 1 > $@
-
-$(BUILDDIR)/$(MBEDTLS_SRC)/build-checked: $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-ifeq ($(OS),$(BUILD_OS))
-	$(MAKE) -C $(dir $@) test
-endif
-	echo 1 > $@
-
-ifeq ($(OS),WINNT)
-define MBEDTLS_INSTALL
-	mkdir -p $2/$$(build_shlibdir)
-	cp $1/library/libmbedcrypto.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-	cp $1/library/libmbedx509.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-	cp $1/library/libmbedtls.$$(SHLIB_EXT) $2/$$(build_shlibdir)
-endef
-else
-define MBEDTLS_INSTALL
-	$(call MAKE_INSTALL,$1,$2,)
-endef
-endif
-$(eval $(call staged-install, \
-	mbedtls,$(MBEDTLS_SRC), \
-	MBEDTLS_INSTALL,,, \
-	$$(INSTALL_NAME_CMD)libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CMD)libmbedtls.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedx509.1.dylib @rpath/libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CMD)libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedcrypto.$$(SHLIB_EXT)))
-
-
-clean-mbedtls:
-	-rm -f $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured \
-		$(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-	-$(MAKE) -C $(BUILDDIR)/$(MBEDTLS_SRC) clean
-
-distclean-mbedtls:
-	rm -rf $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz \
-		$(SRCCACHE)/$(MBEDTLS_SRC) \
-		$(BUILDDIR)/$(MBEDTLS_SRC)
-
-
-get-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
-extract-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted
-configure-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured
-compile-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
-# tests disabled since they are known to fail
-fastcheck-mbedtls: #check-mbedtls
-check-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-checked
-
-else # USE_BINARYBUILDER_MBEDTLS
-
-$(eval $(call bb-install,mbedtls,MBEDTLS,false))
-
-endif
diff --git a/deps/mbedtls.version b/deps/mbedtls.version
deleted file mode 100644
index f262476af1684..0000000000000
--- a/deps/mbedtls.version
+++ /dev/null
@@ -1,5 +0,0 @@
-## jll artifact
-MBEDTLS_JLL_NAME := MbedTLS
-
-## source build
-MBEDTLS_VER := 2.28.2
diff --git a/deps/mmtk_julia.mk b/deps/mmtk_julia.mk
new file mode 100644
index 0000000000000..7ec55426821c3
--- /dev/null
+++ b/deps/mmtk_julia.mk
@@ -0,0 +1,72 @@
+## MMTK ##
+
+# Both MMTK_MOVING and MMTK_PLAN should be specified in the Make.user file.
+# At this point, since we only support non-moving this is always set to 0
+# FIXME: change it to `?:` when introducing moving plans
+MMTK_MOVING := 0
+MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING)
+
+$(eval $(call git-external,mmtk_julia,MMTK_JULIA,,,$(BUILDDIR)))
+get-mmtk_julia: $(MMTK_JULIA_SRC_FILE)
+
+# Download the binding, build it from source
+ifeq (${MMTK_JULIA_DIR},$(BUILDROOT)/usr/lib/mmtk_julia)
+
+MMTK_JULIA_DIR=$(BUILDROOT)/deps/$(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)
+MMTK_JULIA_LIB_PATH=$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)
+PROJECT_DIRS := JULIA_PATH=$(JULIAHOME) JULIA_BUILDROOT=$(BUILDROOT) MMTK_JULIA_DIR=$(MMTK_JULIA_DIR)
+
+$(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/build-compiled: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+	@echo 1 > $@
+
+# NB: use the absolute dir when creating the symlink
+$(BUILDROOT)/usr/lib/libmmtk_julia.so: $(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so
+	@ln -sf $(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so $@
+
+$(MMTK_JULIA_LIB_PATH)/libmmtk_julia.so: $(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/source-extracted
+	@$(PROJECT_DIRS) $(MMTK_VARS) $(MAKE) -C $(MMTK_JULIA_DIR) $(MMTK_BUILD)
+
+extract-mmtk_julia: $(BUILDDIR)/$(MMTK_JULIA_SRC_DIR)/source-extracted
+configure-mmtk_julia: extract-mmtk_julia
+compile-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+fastcheck-mmtk_julia: #none
+check-mmtk_julia: compile-mmtk_julia
+
+$(eval $(call symlink_install,mmtk_julia,$$(MMTK_JULIA_SRC_DIR),$$(BUILDROOT)/usr/lib))
+
+# In this case, there is a custom version of the binding in MMTK_JULIA_DIR
+# Build it and symlink libmmtk_julia.so file into $(BUILDROOT)/usr/lib
+else
+
+PROJECT_DIRS := JULIA_PATH=$(JULIAHOME) JULIA_BUILDROOT=$(BUILDROOT) MMTK_JULIA_DIR=$(MMTK_JULIA_DIR)
+MMTK_JULIA_LIB_PATH=$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)
+
+install-mmtk_julia: compile-mmtk_julia $(build_prefix)/manifest/mmtk_julia
+
+compile-mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+
+version-check-mmtk_julia: $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so
+
+# NB: This will NOT run `cargo build` if there are changes in the Rust source files
+# inside the binding repo. However the target below should remake the symlink if there
+# are changes in the libmmtk_julia.so from the custom MMTK_JULIA_DIR folder
+$(BUILDROOT)/usr/lib/libmmtk_julia.so: $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so
+	@ln -sf $(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so $@
+
+$(MMTK_JULIA_DIR)/mmtk/target/$(MMTK_BUILD)/libmmtk_julia.so:
+	@$(PROJECT_DIRS) $(MMTK_VARS) $(MAKE) -C $(MMTK_JULIA_DIR) $(MMTK_BUILD)
+
+MMTK_JULIA_VER := mmtk_julia_custom
+
+UNINSTALL_mmtk_julia := $(MMTK_JULIA_VER) manual_mmtk_julia
+
+define manual_mmtk_julia
+uninstall-mmtk_julia:
+	-rm -f $(build_prefix)/manifest/mmtk_julia
+	-rm -f $(BUILDROOT)/usr/lib/libmmtk_julia.so
+endef
+
+$(build_prefix)/manifest/mmtk_julia: $(BUILDROOT)/usr/lib/libmmtk_julia.so
+	@echo $(UNINSTALL_mmtk_julia) > $@
+
+endif # MMTK_JULIA_DIR
diff --git a/deps/mmtk_julia.version b/deps/mmtk_julia.version
new file mode 100644
index 0000000000000..60f7cffe7b4de
--- /dev/null
+++ b/deps/mmtk_julia.version
@@ -0,0 +1,4 @@
+MMTK_JULIA_BRANCH = master
+MMTK_JULIA_SHA1 = b69acf5af7a7dd97c1cc6fd99f7c2d51b477f214
+MMTK_JULIA_GIT_URL := https://github.com/mmtk/mmtk-julia.git
+MMTK_JULIA_TAR_URL = https://github.com/mmtk/mmtk-julia/archive/refs/tags/v0.30.2.tar.gz
diff --git a/deps/mpfr.version b/deps/mpfr.version
index e4f1c8a45aeb0..ec109e181ecdc 100644
--- a/deps/mpfr.version
+++ b/deps/mpfr.version
@@ -2,4 +2,4 @@
 MPFR_JLL_NAME := MPFR
 
 ## source build
-MPFR_VER := 4.2.0
+MPFR_VER := 4.2.1
diff --git a/deps/nghttp2.version b/deps/nghttp2.version
index 200e08bf4bfd9..a3cd46d457c2c 100644
--- a/deps/nghttp2.version
+++ b/deps/nghttp2.version
@@ -3,4 +3,4 @@
 NGHTTP2_JLL_NAME := nghttp2
 
 ## source build
-NGHTTP2_VER := 1.52.0
+NGHTTP2_VER := 1.64.0
diff --git a/deps/nvtx.mk b/deps/nvtx.mk
new file mode 100644
index 0000000000000..c4d4db2deba65
--- /dev/null
+++ b/deps/nvtx.mk
@@ -0,0 +1,31 @@
+## nvtx ##
+include $(SRCDIR)/nvtx.version
+
+NVTX_GIT_URL := https://github.com/NVIDIA/NVTX.git
+NVTX_TAR_URL = https://api.github.com/repos/NVIDIA/NVTX/tarball/$1
+$(eval $(call git-external,nvtx,NVTX,,,$(SRCCACHE)))
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	echo 1 > $@
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+	echo 1 > $@
+
+define NVTX_INSTALL
+	cp -a $(SRCCACHE)/$(NVTX_SRC_DIR)/c/include $2/$$(build_includedir)/
+endef
+
+$(eval $(call staged-install, \
+	nvtx,$(NVTX_SRC_DIR), \
+	NVTX_INSTALL,,,))
+
+get-nvtx: $(NVTX_SRC_FILE)
+extract-nvtx: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+configure-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+compile-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
+fastcheck-nvtx: #none
+check-nvtx: #none
+
+clean-nvtx:
+	-rm -f $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
diff --git a/deps/nvtx.version b/deps/nvtx.version
new file mode 100644
index 0000000000000..e26c55cae095e
--- /dev/null
+++ b/deps/nvtx.version
@@ -0,0 +1,4 @@
+# -*- makefile -*-
+## source build
+NVTX_BRANCH=dev
+NVTX_SHA1=733fb419540bc1d152bc682d2ca066c7bb79da29
diff --git a/deps/objconv.version b/deps/objconv.version
index 322c8fa828a17..185354e23b9e1 100644
--- a/deps/objconv.version
+++ b/deps/objconv.version
@@ -1,7 +1,7 @@
 ## jll artifact
 # Objconv (we don't ship this, so no need for a fake JLL; therefore we specify the JLL_VER here instead of in a `stdlib/Objconv_jll/Project.toml` file)
 OBJCONV_JLL_NAME := Objconv
-OBJCONV_JLL_VER  := 2.49.1+0
+OBJCONV_JLL_VER  := 2.53.0+0
 
 ## source build
-OBJCONV_VER := 2.49.1
+OBJCONV_VER := 2.53.0
diff --git a/deps/openblas.mk b/deps/openblas.mk
index e2837bc47232a..e5a988ba84df2 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -10,7 +10,7 @@ OPENBLAS_BUILD_OPTS := CC="$(CC) $(SANITIZE_OPTS)" FC="$(FC) $(SANITIZE_OPTS)" L
 # Thread support
 ifeq ($(OPENBLAS_USE_THREAD), 1)
 OPENBLAS_BUILD_OPTS += USE_THREAD=1
-OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
+OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=400
 # Maximum number of threads for parallelism
 OPENBLAS_BUILD_OPTS += NUM_THREADS=512
 else
@@ -43,7 +43,7 @@ OPENBLAS_FFLAGS := $(JFFLAGS) $(USE_BLAS_FFLAGS)
 OPENBLAS_CFLAGS := -O2
 
 # Decide whether to build for 32-bit or 64-bit arch
-ifneq ($(BUILD_OS),$(OS))
+ifneq ($(XC_HOST),)
 OPENBLAS_BUILD_OPTS += OSNAME=$(OS) CROSS=1 HOSTCC=$(HOSTCC) CROSS_SUFFIX=$(CROSS_COMPILE)
 endif
 ifeq ($(OS),WINNT)
@@ -95,12 +95,7 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/
 		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/neoverse-generic-kernels.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/openblas.version b/deps/openblas.version
index be0506fcd5137..f9729639c67ab 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -3,9 +3,9 @@
 OPENBLAS_JLL_NAME := OpenBLAS
 
 ## source build
-OPENBLAS_VER := 0.3.23
-OPENBLAS_BRANCH=v0.3.23
-OPENBLAS_SHA1=394a9fbafe9010b76a2615c562204277a956eb52
+OPENBLAS_VER := 0.3.29
+OPENBLAS_BRANCH=v0.3.29
+OPENBLAS_SHA1=8795fc7985635de1ecf674b87e2008a15097ffab
 
 # LAPACK, source-only
 LAPACK_VER := 3.9.0
diff --git a/deps/openlibm.version b/deps/openlibm.version
index f35b291260380..788701a66301b 100644
--- a/deps/openlibm.version
+++ b/deps/openlibm.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 OPENLIBM_JLL_NAME := OpenLibm
 
 ## source build
-OPENLIBM_VER := 0.8.1
-OPENLIBM_BRANCH=v0.8.1
-OPENLIBM_SHA1=ae2d91698508701c83cab83714d42a1146dccf85
+OPENLIBM_VER := 0.8.5
+OPENLIBM_BRANCH=v0.8.5
+OPENLIBM_SHA1=db24332879c320606c37f77fea165e6ecb49153c
diff --git a/deps/openssl.mk b/deps/openssl.mk
new file mode 100644
index 0000000000000..6f96717b2fb74
--- /dev/null
+++ b/deps/openssl.mk
@@ -0,0 +1,104 @@
+## OpenSSL ##
+include $(SRCDIR)/openssl.version
+
+ifneq ($(USE_BINARYBUILDER_OPENSSL),1)
+
+ifeq ($(OS),Darwin)
+ifeq ($(APPLE_ARCH),arm64)
+OPENSSL_TARGET := darwin64-arm64-cc
+else
+OPENSSL_TARGET := darwin64-x86_64-cc
+endif
+else ifeq ($(OS),WINNT)
+ifeq ($(ARCH),x86_64)
+OPENSSL_TARGET := mingw64
+else
+OPENSSL_TARGET := mingw
+endif
+else ifeq ($(OS),FreeBSD)
+ifeq ($(ARCH),aarch64)
+OPENSSL_TARGET := BSD-aarch64
+else
+OPENSSL_TARGET := BSD-x86_64
+endif
+else ifeq ($(OS),Linux)
+ifeq ($(ARCH),x86_64)
+OPENSSL_TARGET := linux-x86_64
+else ifeq ($(ARCH),i686)
+OPENSSL_TARGET := linux-x86
+else ifeq ($(ARCH),arm)
+OPENSSL_TARGET := linux-armv4
+else ifeq ($(ARCH),aarch64)
+OPENSSL_TARGET := linux-aarch64
+else ifeq ($(ARCH),ppc64le)
+OPENSSL_TARGET := linux-ppc64le
+else ifeq ($(ARCH),powerpc64le)
+OPENSSL_TARGET := linux-ppc64le
+else ifeq ($(ARCH),riscv64)
+OPENSSL_TARGET := linux64-riscv64
+endif
+else
+OPENSSL_TARGET := unknown
+endif
+
+$(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://www.openssl.org/source/$(notdir $@)
+
+$(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+	$(JLCHECKSUM) $<
+	cd $(dir $<) && $(TAR) -zxf $<
+	touch -c $(SRCCACHE)/openssl-$(OPENSSL_VER)/configure # old target
+	echo 1 > $@
+
+checksum-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+	$(JLCHECKSUM) $<
+
+# We cannot use $(CONFIGURE_COMMON) in this step, because openssl's Configure scripts is picky
+# and does not like that we pass make variables as arguments, it wants them in the environment
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured: $(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+        CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLAGS)" \
+	$(dir $<)/Configure shared --prefix=$(abspath $(build_prefix)) $(OPENSSL_TARGET)
+	echo 1 > $@
+
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
+
+$(BUILDDIR)/openssl-$(OPENSSL_VER)/build-checked: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	$(MAKE) -C $(dir $@) test
+endif
+	echo 1 > $@
+
+$(eval $(call staged-install, \
+	openssl,openssl-$(OPENSSL_VER), \
+	MAKE_INSTALL,,, \
+	$$(WIN_MAKE_HARD_LINK) $(build_bindir)/libcrypto-*.dll $(build_bindir)/libcrypto.dll && \
+	$$(WIN_MAKE_HARD_LINK) $(build_bindir)/libssl-*.dll $(build_bindir)/libssl.dll && \
+	$$(INSTALL_NAME_CMD)libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libcrypto.$$(SHLIB_EXT) && \
+	$$(INSTALL_NAME_CMD)libssl.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT) && \
+	$$(INSTALL_NAME_CHANGE_CMD) $$(build_shlibdir)/libcrypto.3.dylib @rpath/libcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libssl.$$(SHLIB_EXT)))
+
+clean-openssl:
+	-rm -f $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-configured $(BUILDDIR)/-openssl-$(OPENSSL_VER)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/-openssl-$(OPENSSL_VER) clean
+
+distclean-openssl:
+	rm -rf $(SRCCACHE)/-openssl-$(OPENSSL_VER).tar.gz \
+		$(SRCCACHE)/-openssl-$(OPENSSL_VER) \
+		$(BUILDDIR)/-openssl-$(OPENSSL_VER)
+
+get-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER).tar.gz
+extract-openssl: $(SRCCACHE)/openssl-$(OPENSSL_VER)/source-extracted
+configure-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-configured
+compile-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-compiled
+fastcheck-openssl: check-openssl
+check-openssl: $(BUILDDIR)/openssl-$(OPENSSL_VER)/build-checked
+
+else # USE_BINARYBUILDER_OPENSSL
+
+$(eval $(call bb-install,openssl,OPENSSL,false))
+
+endif
diff --git a/deps/openssl.version b/deps/openssl.version
new file mode 100644
index 0000000000000..7253e063167db
--- /dev/null
+++ b/deps/openssl.version
@@ -0,0 +1,6 @@
+# -*- makefile -*-
+## jll artifact
+OPENSSL_JLL_NAME := OpenSSL
+
+## source build
+OPENSSL_VER := 3.0.15
diff --git a/deps/p7zip.version b/deps/p7zip.version
index d4a13155d9162..0fcde938eeb95 100644
--- a/deps/p7zip.version
+++ b/deps/p7zip.version
@@ -2,4 +2,4 @@
 P7ZIP_JLL_NAME := p7zip
 
 ## source build
-P7ZIP_VER := 17.04
+P7ZIP_VER := 17.05
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 9b4947f183117..c019892058d0e 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -20,7 +20,7 @@ $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: XC_HOST:=$(BUILD_MACHINE)
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)" MAKE=$(MAKE)
 	echo 1 > $@
 
 $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured
diff --git a/deps/patchelf.version b/deps/patchelf.version
index bbeaa87d25136..6e4f32a0c2fe4 100644
--- a/deps/patchelf.version
+++ b/deps/patchelf.version
@@ -1,3 +1,4 @@
 ## source build
 # Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.13
+# NOTE: Do not upgrade this to 0.18+ until https://github.com/NixOS/patchelf/issues/492 is fixed
+PATCHELF_VER := 0.17.2
diff --git a/deps/patches/gmp-CVE-2021-43618.patch b/deps/patches/gmp-CVE-2021-43618.patch
deleted file mode 100644
index a4e420e9219da..0000000000000
--- a/deps/patches/gmp-CVE-2021-43618.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-# Origin: https://gmplib.org/repo/gmp-6.2/rev/561a9c25298e
-# HG changeset patch
-# User Marco Bodrato <bodrato@mail.dm.unipi.it>
-# Date 1634836009 -7200
-# Node ID 561a9c25298e17bb01896801ff353546c6923dbd
-# Parent  e1fd9db13b475209a864577237ea4b9105b3e96e
-mpz/inp_raw.c: Avoid bit size overflows
-
-diff -r e1fd9db13b47 -r 561a9c25298e mpz/inp_raw.c
---- a/mpz/inp_raw.c	Tue Dec 22 23:49:51 2020 +0100
-+++ b/mpz/inp_raw.c	Thu Oct 21 19:06:49 2021 +0200
-@@ -88,8 +88,11 @@
- 
-   abs_csize = ABS (csize);
- 
-+  if (UNLIKELY (abs_csize > ~(mp_bitcnt_t) 0 / 8))
-+    return 0; /* Bit size overflows */
-+
-   /* round up to a multiple of limbs */
--  abs_xsize = BITS_TO_LIMBS (abs_csize*8);
-+  abs_xsize = BITS_TO_LIMBS ((mp_bitcnt_t) abs_csize * 8);
- 
-   if (abs_xsize != 0)
-     {
diff --git a/deps/patches/gmp-HG-changeset.patch b/deps/patches/gmp-HG-changeset.patch
deleted file mode 100644
index 7437fb6f2f748..0000000000000
--- a/deps/patches/gmp-HG-changeset.patch
+++ /dev/null
@@ -1,520 +0,0 @@
-
-# HG changeset patch
-# User Torbjorn Granlund <tg@gmplib.org>
-# Date 1606685500 -3600
-# Node ID 5f32dbc41afc1f8cd77af1614f0caeb24deb7d7b
-# Parent  94c84d919f83ba963ed1809f8e80c7bef32db55c
-Avoid the x18 register since it is reserved on Darwin.
-
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aors_n.asm
---- a/mpn/arm64/aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -68,7 +68,7 @@
- EPILOGUE()
- PROLOGUE(func_n)
- 	CLRCY
--L(ent):	lsr	x18, n, #2
-+L(ent):	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x7, [up]
-@@ -77,7 +77,7 @@
- 	str	x13, [rp],#8
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(ret)
-+L(b01):	cbz	x17, L(ret)
- 	ldp	x4, x5, [up,#8]
- 	ldp	x8, x9, [vp,#8]
- 	sub	up, up, #8
-@@ -88,7 +88,7 @@
- 	ldp	x10, x11, [vp,#8]
- 	add	up, up, #8
- 	add	vp, vp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -101,7 +101,7 @@
- 
- L(b10):	ldp	x6, x7, [up]
- 	ldp	x10, x11, [vp]
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -114,8 +114,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	stp	x12, x13, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	ADDSUBC	x12, x6, x10
- 	ADDSUBC	x13, x7, x11
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsmul_1.asm
---- a/mpn/arm64/aorsmul_1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aorsmul_1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -32,10 +32,15 @@
- 
- include(`../config.m4')
- 
--C	     cycles/limb
--C Cortex-A53	9.3-9.8
--C Cortex-A57	 7.0
--C X-Gene	 5.0
-+C	       addmul_1        submul_1
-+C	     cycles/limb     cycles/limb
-+C Cortex-A53	9.3-9.8		9.3-9.8
-+C Cortex-A55    9.0-9.5		9.3-9.8
-+C Cortex-A57	 7		 7
-+C Cortex-A72
-+C Cortex-A73	 6		 6
-+C X-Gene	 5		 5
-+C Apple M1	 1.75		 1.75
- 
- C NOTES
- C  * It is possible to keep the carry chain alive between the addition blocks
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsorrlshC_n.asm
---- a/mpn/arm64/aorsorrlshC_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/aorsorrlshC_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -65,14 +65,14 @@
- 
- ASM_START()
- PROLOGUE(func_n)
--	lsr	x18, n, #2
-+	lsr	x6, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x5, [up]
- 	tbnz	n, #1, L(b11)
- 
- L(b01):	ldr	x11, [vp]
--	cbz	x18, L(1)
-+	cbz	x6, L(1)
- 	ldp	x8, x9, [vp,#8]
- 	lsl	x13, x11, #LSH
- 	ADDSUB(	x15, x13, x5)
-@@ -94,7 +94,7 @@
- 	ADDSUB(	x17, x13, x5)
- 	str	x17, [rp],#8
- 	sub	up, up, #8
--	cbz	x18, L(end)
-+	cbz	x6, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -107,7 +107,7 @@
- L(b10):	CLRRCY(	x9)
- 	ldp	x10, x11, [vp]
- 	sub	up, up, #16
--	cbz	x18, L(end)
-+	cbz	x6, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -124,8 +124,8 @@
- 	ADDSUBC(x16, x12, x4)
- 	ADDSUBC(x17, x13, x5)
- 	stp	x16, x17, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x6, x6, #1
-+	cbnz	x6, L(top)
- 
- L(end):	ldp	x4, x5, [up,#16]
- 	extr	x12, x10, x9, #RSH
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/cnd_aors_n.asm
---- a/mpn/arm64/cnd_aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/cnd_aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -65,7 +65,7 @@
- 
- 	CLRCY
- 
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x13, [vp]
-@@ -75,7 +75,7 @@
- 	str	x9, [rp]
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(rt)
-+L(b01):	cbz	x17, L(rt)
- 	ldp	x12, x13, [vp,#8]
- 	ldp	x10, x11, [up,#8]
- 	sub	up, up, #8
-@@ -86,7 +86,7 @@
- L(b11):	ldp	x12, x13, [vp,#8]!
- 	ldp	x10, x11, [up,#8]!
- 	sub	rp, rp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	ldp	x12, x13, [vp]
-@@ -99,7 +99,7 @@
- 	b	L(mid)
- 
- L(b10):	sub	rp, rp, #16
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	bic	x6, x12, cnd
-@@ -116,8 +116,8 @@
- 	ADDSUBC	x9, x11, x7
- 	ldp	x10, x11, [up,#32]!
- 	stp	x8, x9, [rp,#32]!
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	bic	x6, x12, cnd
- 	bic	x7, x13, cnd
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/logops_n.asm
---- a/mpn/arm64/logops_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/logops_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -78,7 +78,7 @@
- 
- ASM_START()
- PROLOGUE(func)
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x7, [up]
-@@ -88,7 +88,7 @@
- 	str	x15, [rp],#8
- 	tbnz	n, #1, L(b11)
- 
--L(b01):	cbz	x18, L(ret)
-+L(b01):	cbz	x17, L(ret)
- 	ldp	x4, x5, [up,#8]
- 	ldp	x8, x9, [vp,#8]
- 	sub	up, up, #8
-@@ -99,7 +99,7 @@
- 	ldp	x10, x11, [vp,#8]
- 	add	up, up, #8
- 	add	vp, vp, #8
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 	b	L(top)
- 
- L(bx0):	tbnz	n, #1, L(b10)
-@@ -110,7 +110,7 @@
- 
- L(b10):	ldp	x6, x7, [up]
- 	ldp	x10, x11, [vp]
--	cbz	x18, L(end)
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#16]
-@@ -127,8 +127,8 @@
- 	POSTOP(	x12)
- 	POSTOP(	x13)
- 	stp	x12, x13, [rp],#16
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	LOGOP(	x12, x6, x10)
- 	LOGOP(	x13, x7, x11)
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshift.asm
---- a/mpn/arm64/lshift.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/lshift.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -61,7 +61,7 @@
- 	add	rp, rp_arg, n, lsl #3
- 	add	up, up, n, lsl #3
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x4, [up,#-8]
-@@ -69,7 +69,7 @@
- 
- L(b01):	NSHIFT	x0, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	str	x2, [rp,#-8]
- 	ret
- L(gt1):	ldp	x4, x5, [up,#-24]
-@@ -89,7 +89,7 @@
- 	PSHIFT	x13, x5, cnt
- 	NSHIFT	x10, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	orr	x10, x10, x13
- 	stp	x2, x10, [rp,#-16]
- 	ret
-@@ -123,11 +123,11 @@
- 	orr	x11, x12, x2
- 	stp	x10, x11, [rp,#-32]!
- 	PSHIFT	x2, x4, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x6, tnc
- 	PSHIFT	x13, x7, cnt
- 	NSHIFT	x12, x7, tnc
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	orr	x10, x10, x13
- 	orr	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshiftc.asm
---- a/mpn/arm64/lshiftc.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/lshiftc.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -61,7 +61,7 @@
- 	add	rp, rp_arg, n, lsl #3
- 	add	up, up, n, lsl #3
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x4, [up,#-8]
-@@ -69,7 +69,7 @@
- 
- L(b01):	NSHIFT	x0, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	mvn	x2, x2
- 	str	x2, [rp,#-8]
- 	ret
-@@ -90,7 +90,7 @@
- 	PSHIFT	x13, x5, cnt
- 	NSHIFT	x10, x4, tnc
- 	PSHIFT	x2, x4, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	eon	x10, x10, x13
- 	mvn	x2, x2
- 	stp	x2, x10, [rp,#-16]
-@@ -125,11 +125,11 @@
- 	eon	x11, x12, x2
- 	stp	x10, x11, [rp,#-32]!
- 	PSHIFT	x2, x4, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x6, tnc
- 	PSHIFT	x13, x7, cnt
- 	NSHIFT	x12, x7, tnc
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	eon	x10, x10, x13
- 	eon	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/mul_1.asm
---- a/mpn/arm64/mul_1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/mul_1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -56,7 +56,7 @@
- 
- PROLOGUE(mpn_mul_1)
- 	adds	x4, xzr, xzr		C clear register and cy flag
--L(com):	lsr	x18, n, #2
-+L(com):	lsr	x17, n, #2
- 	tbnz	n, #0, L(bx1)
- 
- L(bx0):	mov	x11, x4
-@@ -65,7 +65,7 @@
- L(b10):	ldp	x4, x5, [up]
- 	mul	x8, x4, v0
- 	umulh	x10, x4, v0
--	cbz	x18, L(2)
-+	cbz	x17, L(2)
- 	ldp	x6, x7, [up,#16]!
- 	mul	x9, x5, v0
- 	b	L(mid)-8
-@@ -80,7 +80,7 @@
- 	str	x9, [rp],#8
- 	tbnz	n, #1, L(b10)
- 
--L(b01):	cbz	x18, L(1)
-+L(b01):	cbz	x17, L(1)
- 
- L(b00):	ldp	x6, x7, [up]
- 	mul	x8, x6, v0
-@@ -90,8 +90,8 @@
- 	adcs	x12, x8, x11
- 	umulh	x11, x7, v0
- 	add	rp, rp, #16
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x17, x17, #1
-+	cbz	x17, L(end)
- 
- 	ALIGN(16)
- L(top):	mul	x8, x4, v0
-@@ -110,8 +110,8 @@
- 	stp	x12, x13, [rp],#32
- 	adcs	x12, x8, x11
- 	umulh	x11, x7, v0
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x17, x17, #1
-+	cbnz	x17, L(top)
- 
- L(end):	mul	x8, x4, v0
- 	adcs	x13, x9, x10
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rsh1aors_n.asm
---- a/mpn/arm64/rsh1aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/rsh1aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -59,7 +59,7 @@
- 
- ASM_START()
- PROLOGUE(func_n)
--	lsr	x18, n, #2
-+	lsr	x6, n, #2
- 
- 	tbz	n, #0, L(bx0)
- 
-@@ -69,7 +69,7 @@
- 
- L(b01):	ADDSUB	x13, x5, x9
- 	and	x10, x13, #1
--	cbz	x18, L(1)
-+	cbz	x6, L(1)
- 	ldp	x4, x5, [up],#48
- 	ldp	x8, x9, [vp],#48
- 	ADDSUBC	x14, x4, x8
-@@ -80,8 +80,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	str	x17, [rp], #24
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x6, x6, #1
-+	cbz	x6, L(end)
- 	b	L(top)
- 
- L(1):	cset	x14, COND
-@@ -97,7 +97,7 @@
- 	ldp	x8, x9, [vp],#32
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
--	cbz	x18, L(3)
-+	cbz	x6, L(3)
- 	ldp	x4, x5, [up,#-16]
- 	ldp	x8, x9, [vp,#-16]
- 	extr	x17, x12, x15, #1
-@@ -117,7 +117,7 @@
- 	ADDSUB	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	and	x10, x12, #1
--	cbz	x18, L(2)
-+	cbz	x6, L(2)
- 	ldp	x4, x5, [up,#-16]
- 	ldp	x8, x9, [vp,#-16]
- 	ADDSUBC	x14, x4, x8
-@@ -134,8 +134,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	add	rp, rp, #16
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x6, x6, #1
-+	cbz	x6, L(end)
- 
- 	ALIGN(16)
- L(top):	ldp	x4, x5, [up,#-16]
-@@ -152,8 +152,8 @@
- 	ADDSUBC	x12, x4, x8
- 	ADDSUBC	x13, x5, x9
- 	stp	x16, x17, [rp],#32
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x6, x6, #1
-+	cbnz	x6, L(top)
- 
- L(end):	extr	x16, x15, x14, #1
- 	extr	x17, x12, x15, #1
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rshift.asm
---- a/mpn/arm64/rshift.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/rshift.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -60,7 +60,7 @@
- PROLOGUE(mpn_rshift)
- 	mov	rp, rp_arg
- 	sub	tnc, xzr, cnt
--	lsr	x18, n, #2
-+	lsr	x17, n, #2
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	ldr	x5, [up]
-@@ -68,7 +68,7 @@
- 
- L(b01):	NSHIFT	x0, x5, tnc
- 	PSHIFT	x2, x5, cnt
--	cbnz	x18, L(gt1)
-+	cbnz	x17, L(gt1)
- 	str	x2, [rp]
- 	ret
- L(gt1):	ldp	x4, x5, [up,#8]
-@@ -89,7 +89,7 @@
- 	PSHIFT	x13, x4, cnt
- 	NSHIFT	x10, x5, tnc
- 	PSHIFT	x2, x5, cnt
--	cbnz	x18, L(gt2)
-+	cbnz	x17, L(gt2)
- 	orr	x10, x10, x13
- 	stp	x10, x2, [rp]
- 	ret
-@@ -121,11 +121,11 @@
- 	orr	x11, x12, x2
- 	stp	x11, x10, [rp,#32]!
- 	PSHIFT	x2, x5, cnt
--L(lo0):	sub	x18, x18, #1
-+L(lo0):	sub	x17, x17, #1
- L(lo3):	NSHIFT	x10, x7, tnc
- 	NSHIFT	x12, x6, tnc
- 	PSHIFT	x13, x6, cnt
--	cbnz	x18, L(top)
-+	cbnz	x17, L(top)
- 
- L(end):	orr	x10, x10, x13
- 	orr	x11, x12, x2
-diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/sqr_diag_addlsh1.asm
---- a/mpn/arm64/sqr_diag_addlsh1.asm	Sat Nov 28 23:38:32 2020 +0100
-+++ b/mpn/arm64/sqr_diag_addlsh1.asm	Sun Nov 29 22:31:40 2020 +0100
-@@ -47,7 +47,7 @@
- ASM_START()
- PROLOGUE(mpn_sqr_diag_addlsh1)
- 	ldr	x15, [up],#8
--	lsr	x18, n, #1
-+	lsr	x14, n, #1
- 	tbz	n, #0, L(bx0)
- 
- L(bx1):	adds	x7, xzr, xzr
-@@ -62,8 +62,8 @@
- 	ldr	x17, [up],#16
- 	ldp	x6, x7, [tp],#32
- 	umulh	x11, x15, x15
--	sub	x18, x18, #1
--	cbz	x18, L(end)
-+	sub	x14, x14, #1
-+	cbz	x14, L(end)
- 
- 	ALIGN(16)
- L(top):	extr	x9, x6, x5, #63
-@@ -84,8 +84,8 @@
- 	extr	x8, x5, x4, #63
- 	stp	x12, x13, [rp],#16
- 	adcs	x12, x8, x10
--	sub	x18, x18, #1
--	cbnz	x18, L(top)
-+	sub	x14, x14, #1
-+	cbnz	x14, L(top)
- 
- L(end):	extr	x9, x6, x5, #63
- 	mul	x10, x17, x17
diff --git a/deps/patches/gmp_alloc_overflow_func.patch b/deps/patches/gmp-alloc_overflow.patch
similarity index 54%
rename from deps/patches/gmp_alloc_overflow_func.patch
rename to deps/patches/gmp-alloc_overflow.patch
index 51506d70d46fb..6a0f02c66e3f4 100644
--- a/deps/patches/gmp_alloc_overflow_func.patch
+++ b/deps/patches/gmp-alloc_overflow.patch
@@ -1,7 +1,7 @@
-diff --git a/gmp-h.in b/gmp-h.in
---- a/gmp-h.in
-+++ b/gmp-h.in
-@@ -479,6 +479,13 @@ using std::FILE;
+diff -ru gmp-6.3.0/gmp-h.in gmp-6.3.0.new/gmp-h.in
+--- gmp-6.3.0/gmp-h.in	2023-07-29 09:42:16
++++ gmp-6.3.0.new/gmp-h.in	2023-12-29 15:33:34
+@@ -487,6 +487,12 @@
  				      void *(**) (void *, size_t, size_t),
  				      void (**) (void *, size_t)) __GMP_NOTHROW;
  
@@ -10,15 +10,23 @@ diff --git a/gmp-h.in b/gmp-h.in
 +
 +#define mp_get_alloc_overflow_function __gmp_get_alloc_overflow_function
 +__GMP_DECLSPEC void mp_get_alloc_overflow_function (void (**) (void)) __GMP_NOTHROW;
-+
 +
  #define mp_bits_per_limb __gmp_bits_per_limb
  __GMP_DECLSPEC extern const int mp_bits_per_limb;
  
-diff --git a/gmp-impl.h b/gmp-impl.h
---- a/gmp-impl.h
-+++ b/gmp-impl.h
-@@ -696,10 +696,12 @@ struct tmp_debug_entry_t {
+diff -ru gmp-6.3.0/gmp-impl.h gmp-6.3.0.new/gmp-impl.h
+--- gmp-6.3.0/gmp-impl.h	2023-07-29 09:42:16
++++ gmp-6.3.0.new/gmp-impl.h	2023-12-30 16:02:36
+@@ -58,6 +58,8 @@
+    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
+ 
+ #include <limits.h>
++#include <stdio.h>
++#include <stdlib.h>
+ 
+ /* For fat.h and other fat binary stuff.
+    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
+@@ -699,14 +701,22 @@
  __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
  __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
  __GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
@@ -31,10 +39,7 @@ diff --git a/gmp-impl.h b/gmp-impl.h
  
  #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
    ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
-@@ -727,6 +729,12 @@ struct tmp_debug_entry_t {
- 	(ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));	\
-   } while (0)
- 
+ #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 +#define __GMP_ALLOC_OVERFLOW_FUNC()                              \
 +  do {                                                           \
 +    (*__gmp_alloc_overflow_func) ();                             \
@@ -42,12 +47,12 @@ diff --git a/gmp-impl.h b/gmp-impl.h
 +    abort ();                                                    \
 +  } while (0)
  
- /* Dummy for non-gcc, code involving it will go dead. */
- #if ! defined (__GNUC__) || __GNUC__ < 2
-diff --git a/memory.c b/memory.c
---- a/memory.c
-+++ b/memory.c
-@@ -38,6 +38,7 @@ see https://www.gnu.org/licenses/.  */
+ #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)		\
+   ((type *) (*__gmp_reallocate_func)					\
+diff -ru gmp-6.3.0/memory.c gmp-6.3.0.new/memory.c
+--- gmp-6.3.0/memory.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/memory.c	2023-12-29 15:43:27
+@@ -37,6 +37,7 @@
  void * (*__gmp_allocate_func) (size_t) = __gmp_default_allocate;
  void * (*__gmp_reallocate_func) (void *, size_t, size_t) = __gmp_default_reallocate;
  void   (*__gmp_free_func) (void *, size_t) = __gmp_default_free;
@@ -55,21 +60,22 @@ diff --git a/memory.c b/memory.c
  
  
  /* Default allocation functions.  In case of failure to allocate/reallocate
-@@ -144,3 +145,10 @@ void
+@@ -142,4 +143,11 @@
+   }
  #endif
    free (blk_ptr);
- }
++}
 +
 +void
 +__gmp_default_alloc_overflow(void)
 +{
 +    fprintf (stderr, "gmp: overflow in mpz type\n");
 +    abort();
-+}
-diff --git a/mp_get_fns.c b/mp_get_fns.c
---- a/mp_get_fns.c
-+++ b/mp_get_fns.c
-@@ -46,3 +46,11 @@ mp_get_memory_functions (void *(**alloc_
+ }
+diff -ru gmp-6.3.0/mp_get_fns.c gmp-6.3.0.new/mp_get_fns.c
+--- gmp-6.3.0/mp_get_fns.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/mp_get_fns.c	2023-12-29 15:43:27
+@@ -45,3 +45,11 @@
    if (free_func != NULL)
      *free_func = __gmp_free_func;
  }
@@ -81,10 +87,10 @@ diff --git a/mp_get_fns.c b/mp_get_fns.c
 +  if (alloc_overflow_func != NULL)
 +    *alloc_overflow_func = __gmp_alloc_overflow_func;
 +}
-diff --git a/mp_set_fns.c b/mp_set_fns.c
---- a/mp_set_fns.c
-+++ b/mp_set_fns.c
-@@ -48,3 +48,12 @@ mp_set_memory_functions (void *(*alloc_f
+diff -ru gmp-6.3.0/mp_set_fns.c gmp-6.3.0.new/mp_set_fns.c
+--- gmp-6.3.0/mp_set_fns.c	2023-07-29 09:42:16
++++ gmp-6.3.0.new/mp_set_fns.c	2023-12-29 15:43:27
+@@ -47,3 +47,12 @@
    __gmp_reallocate_func = realloc_func;
    __gmp_free_func = free_func;
  }
@@ -97,58 +103,66 @@ diff --git a/mp_set_fns.c b/mp_set_fns.c
 +    alloc_overflow_func = __gmp_default_alloc_overflow;
 +  __gmp_alloc_overflow_func = alloc_overflow_func;
 +}
-diff --git a/mpz/init2.c b/mpz/init2.c
---- a/mpz/init2.c
-+++ b/mpz/init2.c
-@@ -45,8 +45,7 @@ mpz_init2 (mpz_ptr x, mp_bitcnt_t bits)
+diff -ru gmp-6.3.0/mpz/init2.c gmp-6.3.0.new/mpz/init2.c
+--- gmp-6.3.0/mpz/init2.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/init2.c	2023-12-30 12:22:34
+@@ -41,7 +41,7 @@
+   if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/mpz/realloc.c b/mpz/realloc.c
---- a/mpz/realloc.c
-+++ b/mpz/realloc.c
-@@ -45,16 +45,14 @@ void *
+   PTR(x) = __GMP_ALLOCATE_FUNC_LIMBS (new_alloc);
+diff -ru gmp-6.3.0/mpz/realloc.c gmp-6.3.0.new/mpz/realloc.c
+--- gmp-6.3.0/mpz/realloc.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/realloc.c	2023-12-30 12:22:47
+@@ -42,12 +42,12 @@
+   if (sizeof (mp_size_t) == sizeof (int))
      {
        if (UNLIKELY (new_alloc > ULONG_MAX / GMP_NUMB_BITS))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
    else
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/mpz/realloc2.c b/mpz/realloc2.c
---- a/mpz/realloc2.c
-+++ b/mpz/realloc2.c
-@@ -45,8 +45,7 @@ mpz_realloc2 (mpz_ptr m, mp_bitcnt_t bit
+   if (ALLOC (m) == 0)
+diff -ru gmp-6.3.0/mpz/realloc2.c gmp-6.3.0.new/mpz/realloc2.c
+--- gmp-6.3.0/mpz/realloc2.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/mpz/realloc2.c	2023-12-30 12:22:59
+@@ -42,7 +42,7 @@
+   if (sizeof (unsigned long) > sizeof (int)) /* param vs _mp_size field */
      {
        if (UNLIKELY (new_alloc > INT_MAX))
- 	{
--	  fprintf (stderr, "gmp: overflow in mpz type\n");
--	  abort ();
-+	  __GMP_ALLOC_OVERFLOW_FUNC ();
- 	}
+-	MPZ_OVERFLOW;
++	__GMP_ALLOC_OVERFLOW_FUNC ();
      }
  
-diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c
---- a/tests/mpz/t-pow.c
-+++ b/tests/mpz/t-pow.c
-@@ -195,6 +195,34 @@ check_random (int reps)
+   if (ALLOC (m) == 0)
+diff -ru gmp-6.3.0/tal-reent.c gmp-6.3.0.new/tal-reent.c
+--- gmp-6.3.0/tal-reent.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/tal-reent.c	2023-12-30 12:19:40
+@@ -61,6 +61,10 @@
+ 
+   total_size = size + HSIZ;
+   p = __GMP_ALLOCATE_FUNC_TYPE (total_size, char);
++  if (!p)
++    {
++      __GMP_ALLOC_OVERFLOW_FUNC ();
++    }
+   P->size = total_size;
+   P->next = *markp;
+   *markp = P;
+diff -ru gmp-6.3.0/tests/mpz/t-pow.c gmp-6.3.0.new/tests/mpz/t-pow.c
+--- gmp-6.3.0/tests/mpz/t-pow.c	2023-07-29 09:42:17
++++ gmp-6.3.0.new/tests/mpz/t-pow.c	2023-12-30 15:57:58
+@@ -194,6 +194,33 @@
    mpz_clear (want);
  }
  
@@ -178,12 +192,11 @@ diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c
 +  }
 +  mpz_clear (x);
 +}
-+
 +
  int
  main (int argc, char **argv)
  {
-@@ -212,6 +240,7 @@ main (int argc, char **argv)
+@@ -211,6 +238,7 @@
  
    check_various ();
    check_random (reps);
diff --git a/deps/patches/libssh2-fix-import-lib-name.patch b/deps/patches/libssh2-fix-import-lib-name.patch
deleted file mode 100644
index 15aafb58d2736..0000000000000
--- a/deps/patches/libssh2-fix-import-lib-name.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 3732420725efbf410df5863b91a09ca214ee18ba Mon Sep 17 00:00:00 2001
-From: "Y. Yang" <metab0t@users.noreply.github.com>
-Date: Thu, 16 Jun 2022 19:16:37 +0800
-Subject: [PATCH] Fix DLL import library name
-
-https://aur.archlinux.org/packages/mingw-w64-libssh2
-https://cmake.org/cmake/help/latest/prop_tgt/IMPORT_PREFIX.html
----
- src/CMakeLists.txt | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
-index cb8fee1..17ecefd 100644
---- a/src/CMakeLists.txt
-+++ b/src/CMakeLists.txt
-@@ -220,6 +220,7 @@ endif()
- add_library(libssh2 ${SOURCES})
- # we want it to be called libssh2 on all platforms
- set_target_properties(libssh2 PROPERTIES PREFIX "")
-+set_target_properties(libssh2 PROPERTIES IMPORT_PREFIX "")
- 
- target_compile_definitions(libssh2 PRIVATE ${PRIVATE_COMPILE_DEFINITIONS})
- target_include_directories(libssh2
--- 
-2.36.1
-
diff --git a/deps/patches/libssh2-userauth-check.patch b/deps/patches/libssh2-userauth-check.patch
deleted file mode 100644
index 1dc6108ebece7..0000000000000
--- a/deps/patches/libssh2-userauth-check.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 37ee0aa214655b63e7869d1d74ff1ec9f9818a5e Mon Sep 17 00:00:00 2001
-From: Daniel Stenberg <daniel@haxx.se>
-Date: Fri, 17 Dec 2021 17:46:29 +0100
-Subject: [PATCH] userauth: check for too large userauth_kybd_auth_name_len
- (#650)
-
-... before using it.
-
-Reported-by: MarcoPoloPie
-Fixes #649
----
- src/userauth.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/userauth.c b/src/userauth.c
-index 40ef915..caa5635 100644
---- a/src/userauth.c
-+++ b/src/userauth.c
-@@ -1769,6 +1769,11 @@ userauth_keyboard_interactive(LIBSSH2_SESSION * session,
-             if(session->userauth_kybd_data_len >= 5) {
-                 /* string    name (ISO-10646 UTF-8) */
-                 session->userauth_kybd_auth_name_len = _libssh2_ntohu32(s);
-+                if(session->userauth_kybd_auth_name_len >
-+                   session->userauth_kybd_data_len - 5)
-+                    return _libssh2_error(session,
-+                                          LIBSSH2_ERROR_OUT_OF_BOUNDARY,
-+                                          "Bad keyboard auth name");
-                 s += 4;
-             }
-             else {
diff --git a/deps/patches/libunwind-aarch64-inline-asm.patch b/deps/patches/libunwind-aarch64-inline-asm.patch
new file mode 100644
index 0000000000000..123643e30cdeb
--- /dev/null
+++ b/deps/patches/libunwind-aarch64-inline-asm.patch
@@ -0,0 +1,157 @@
+From 6ae71b3ea71bff0f38c7a6a05beda30b7dce1ef6 Mon Sep 17 00:00:00 2001
+From: Stephen Webb <swebb@blackberry.com>
+Date: Mon, 22 Apr 2024 15:56:54 -0400
+Subject: [PATCH] Rework inline aarch64 as for setcontext
+
+Modern GC and clang were barfing on the inline asm constraints for the
+aarch64-linux setcontext() replacement. Reformulated the asm code to
+reduce the required constraints.
+---
+ src/aarch64/Gos-linux.c | 115 +++++++++++++++++++++-------------------
+ 1 file changed, 61 insertions(+), 54 deletions(-)
+
+diff --git a/src/aarch64/Gos-linux.c b/src/aarch64/Gos-linux.c
+index 7cd8c879f..1e4949623 100644
+--- a/src/aarch64/Gos-linux.c
++++ b/src/aarch64/Gos-linux.c
+@@ -2,6 +2,7 @@
+    Copyright (C) 2008 CodeSourcery
+    Copyright (C) 2011-2013 Linaro Limited
+    Copyright (C) 2012 Tommi Rantala <tt.rantala@gmail.com>
++   Copyright 2024 Stephen M. Webb  <swebb@blackberry.com>
+ 
+ This file is part of libunwind.
+ 
+@@ -28,6 +29,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ 
+ #ifndef UNW_REMOTE_ONLY
+ 
++/* Magic constants generated from gen-offsets.c */
++#define SC_R0_OFF   "8"
++#define SC_R2_OFF   "24"
++#define SC_R18_OFF  "152"
++#define SC_R20_OFF  "168"
++#define SC_R22_OFF  "184"
++#define SC_R24_OFF  "200"
++#define SC_R26_OFF  "216"
++#define SC_R28_OFF  "232"
++#define SC_R30_OFF  "248"
++
++#define FP_R08_OFF "80"
++#define FP_R09_OFF "88"
++#define FP_R10_OFF "96"
++#define FP_R11_OFF "104"
++#define FP_R12_OFF "112"
++#define FP_R13_OFF "120"
++#define FP_R14_OFF "128"
++#define FP_R15_OFF "136"
++
++#define SC_SP_OFF   "0x100"
++
+ HIDDEN int
+ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg)
+ {
+@@ -36,65 +59,49 @@ aarch64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor, void *arg)
+ 
+   if (c->sigcontext_format == AARCH64_SCF_NONE)
+     {
++
++      /*
++       * This is effectively the old POSIX setcontext().
++       *
++       * This inline asm is broken up to use local scratch registers for the
++       * uc_mcontext.regs and FPCTX base addresses because newer versions of GCC
++       * and clang barf on too many constraints (gh-702) when the C array
++       * elements are used directly.
++       *
++       * Clobbers aren't required for the inline asm because they just convince
++       * the compiler to save those registers and they never get restored
++       * becauise the asm ends with a plain ol' ret.
++       */
++      register void* uc_mcontext __asm__ ("x5") = (void*) &uc->uc_mcontext;
++      register void* fpctx __asm__ ("x4") = (void*) GET_FPCTX(uc);
++
+       /* Since there are no signals involved here we restore EH and non scratch
+          registers only.  */
+       __asm__ __volatile__ (
+-        "ldr x0,  %[x0]\n\t"
+-        "ldr x1,  %[x1]\n\t"
+-        "ldr x2,  %[x2]\n\t"
+-        "ldr x3,  %[x3]\n\t"
+-        "ldr x19, %[x19]\n\t"
+-        "ldr x20, %[x20]\n\t"
+-        "ldr x21, %[x21]\n\t"
+-        "ldr x22, %[x22]\n\t"
+-        "ldr x23, %[x23]\n\t"
+-        "ldr x24, %[x24]\n\t"
+-        "ldr x25, %[x25]\n\t"
+-        "ldr x26, %[x26]\n\t"
+-        "ldr x27, %[x27]\n\t"
+-        "ldr x28, %[x28]\n\t"
+-        "ldr x29, %[x29]\n\t"
+-        "ldr x30, %[x30]\n\t"
+-        "ldr d8,  %[d8]\n\t"
+-        "ldr d9,  %[d9]\n\t"
+-        "ldr d10, %[d10]\n\t"
+-        "ldr d11, %[d11]\n\t"
+-        "ldr d12, %[d12]\n\t"
+-        "ldr d13, %[d13]\n\t"
+-        "ldr d14, %[d14]\n\t"
+-        "ldr d15, %[d15]\n\t"
+-        "ldr x5,  %[sp]\n\t"
++        "ldp x0,  x1,  [x5, " SC_R0_OFF  "]\n\t"
++        "ldp x2,  x3,  [x5, " SC_R2_OFF  "]\n\t"
++        "ldp x18, x19, [x5, " SC_R18_OFF "]\n\t"
++        "ldp x20, x21, [x5, " SC_R20_OFF "]\n\t"
++        "ldp x22, x23, [x5, " SC_R22_OFF "]\n\t"
++        "ldp x24, x25, [x5, " SC_R24_OFF "]\n\t"
++        "ldp x26, x27, [x5, " SC_R26_OFF "]\n\t"
++        "ldp x28, x29, [x5, " SC_R28_OFF "]\n\t"
++        "ldr x30, [x5, " SC_R30_OFF "]\n\t"
++        "ldr d8,  [x4, " FP_R08_OFF "]\n\t"
++        "ldr d9,  [x4, " FP_R09_OFF "]\n\t"
++        "ldr d10, [x4, " FP_R10_OFF "]\n\t"
++        "ldr d11, [x4, " FP_R11_OFF "]\n\t"
++        "ldr d12, [x4, " FP_R12_OFF "]\n\t"
++        "ldr d13, [x4, " FP_R13_OFF "]\n\t"
++        "ldr d14, [x4, " FP_R14_OFF "]\n\t"
++        "ldr d15, [x4, " FP_R15_OFF "]\n\t"
++        "ldr x5,  [x5, " SC_SP_OFF "]\n\t"
+         "mov sp, x5\n\t"
+         "ret\n"
+-        :
+-        : [x0]  "m"(uc->uc_mcontext.regs[0]),
+-		  [x1]  "m"(uc->uc_mcontext.regs[1]),
+-		  [x2]  "m"(uc->uc_mcontext.regs[2]),
+-		  [x3]  "m"(uc->uc_mcontext.regs[3]),
+-		  [x19] "m"(uc->uc_mcontext.regs[19]),
+-		  [x20] "m"(uc->uc_mcontext.regs[20]),
+-		  [x21] "m"(uc->uc_mcontext.regs[21]),
+-		  [x22] "m"(uc->uc_mcontext.regs[22]),
+-		  [x23] "m"(uc->uc_mcontext.regs[23]),
+-		  [x24] "m"(uc->uc_mcontext.regs[24]),
+-		  [x25] "m"(uc->uc_mcontext.regs[25]),
+-		  [x26] "m"(uc->uc_mcontext.regs[26]),
+-		  [x27] "m"(uc->uc_mcontext.regs[27]),
+-		  [x28] "m"(uc->uc_mcontext.regs[28]),
+-		  [x29] "m"(uc->uc_mcontext.regs[29]), /* FP */
+-		  [x30] "m"(uc->uc_mcontext.regs[30]), /* LR */
+-		  [d8]  "m"(GET_FPCTX(uc)->vregs[8]),
+-		  [d9]  "m"(GET_FPCTX(uc)->vregs[9]),
+-		  [d10] "m"(GET_FPCTX(uc)->vregs[10]),
+-		  [d11] "m"(GET_FPCTX(uc)->vregs[11]),
+-		  [d12] "m"(GET_FPCTX(uc)->vregs[12]),
+-		  [d13] "m"(GET_FPCTX(uc)->vregs[13]),
+-		  [d14] "m"(GET_FPCTX(uc)->vregs[14]),
+-		  [d15] "m"(GET_FPCTX(uc)->vregs[15]),
+-          [sp]  "m"(uc->uc_mcontext.sp)
+-		: "x0",   "x1",  "x2",  "x3", "x19", "x20", "x21", "x22", "x23", "x24",
+-		  "x25", "x26", "x27", "x28", "x29", "x30"
+-	  );
++        : 
++        : [uc_mcontext] "r"(uc_mcontext),
++          [fpctx] "r"(fpctx)
++      );
+     }
+   else
+     {
diff --git a/deps/patches/libunwind-cfa-rsp.patch b/deps/patches/libunwind-cfa-rsp.patch
deleted file mode 100644
index 6b2080c10c2cf..0000000000000
--- a/deps/patches/libunwind-cfa-rsp.patch
+++ /dev/null
@@ -1,368 +0,0 @@
-From 8c8c78e2db09c5dc66ad0188a088b1664483a13f Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sun, 29 Aug 2021 11:07:54 -0700
-Subject: [PATCH] x86_64: Stop aliasing RSP and CFA
-
-RSP and CFA are different concepts. RSP refers to the physical
-register, CFA is a virtual register that serves as the base
-address for various other saved registers. It is true that
-in many frames these are set to alias, however this is not
-a requirement. For example, a function that performs a stack
-switch would likely change the rsp in the middle of the function,
-but would keep the CFA at the original RSP such that saved registers
-may be appropriately recovered.
-
-We are seeing incorrect unwinds in the Julia runtime when running
-julia under rr. This is because injects code (with correct CFI)
-that performs just such a stack switch [1]. GDB manages to unwind
-this correctly, but libunwind incorrectly sets the rsp to the CFA
-address, causing a misunwind.
-
-Tested on x86_64, patches for other architectures are ported, but
-not tested.
-
-[1] https://github.com/rr-debugger/rr/blob/469c22059a4a1798d33a8a224457faf22b2c178c/src/preload/syscall_hook.S#L454
----
- include/dwarf.h                 |  3 +-
- include/libunwind_i.h           |  4 ++
- include/tdep-x86/dwarf-config.h |  2 -
- include/tdep-x86/libunwind_i.h  | 73 ++++++++++++---------------------
- src/dwarf/Gparser.c             | 15 +++++--
- src/x86/Gos-freebsd.c           |  1 +
- src/x86/Gregs.c                 |  2 +-
- src/x86/Gstep.c                 |  4 +-
- src/x86_64/Gos-freebsd.c        |  1 +
- src/x86_64/Gregs.c              |  2 +-
- src/x86_64/Gstep.c              |  2 +-
- 11 files changed, 52 insertions(+), 57 deletions(-)
-
-diff --git a/include/dwarf.h b/include/dwarf.h
-index 175c419bb..23ff4c4f6 100644
---- a/include/dwarf.h
-+++ b/include/dwarf.h
-@@ -231,6 +231,7 @@ typedef enum
-     DWARF_WHERE_REG,            /* register saved in another register */
-     DWARF_WHERE_EXPR,           /* register saved */
-     DWARF_WHERE_VAL_EXPR,       /* register has computed value */
-+    DWARF_WHERE_CFA,            /* register is set to the computed cfa value */
-   }
- dwarf_where_t;
- 
-@@ -313,7 +314,7 @@ typedef struct dwarf_cursor
-     void *as_arg;               /* argument to address-space callbacks */
-     unw_addr_space_t as;        /* reference to per-address-space info */
- 
--    unw_word_t cfa;     /* canonical frame address; aka frame-/stack-pointer */
-+    unw_word_t cfa;     /* canonical frame address; aka frame-pointer */
-     unw_word_t ip;              /* instruction pointer */
-     unw_word_t args_size;       /* size of arguments */
-     unw_word_t eh_args[UNW_TDEP_NUM_EH_REGS];
-diff --git a/include/libunwind_i.h b/include/libunwind_i.h
-index fea5c2607..6c7dda9a8 100644
---- a/include/libunwind_i.h
-+++ b/include/libunwind_i.h
-@@ -346,6 +346,10 @@ static inline void invalidate_edi (struct elf_dyn_info *edi)
- 
- #include "tdep/libunwind_i.h"
- 
-+#ifndef TDEP_DWARF_SP
-+#define TDEP_DWARF_SP UNW_TDEP_SP
-+#endif
-+
- #ifndef tdep_get_func_addr
- # define tdep_get_func_addr(as,addr,v)          (*(v) = addr, 0)
- #endif
-diff --git a/include/tdep-x86/dwarf-config.h b/include/tdep-x86/dwarf-config.h
-index f76f9c1c4..11398e4e6 100644
---- a/include/tdep-x86/dwarf-config.h
-+++ b/include/tdep-x86/dwarf-config.h
-@@ -43,9 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
- typedef struct dwarf_loc
-   {
-     unw_word_t val;
--#ifndef UNW_LOCAL_ONLY
-     unw_word_t type;            /* see X86_LOC_TYPE_* macros.  */
--#endif
-   }
- dwarf_loc_t;
- 
-diff --git a/include/tdep-x86/libunwind_i.h b/include/tdep-x86/libunwind_i.h
-index d4c5ccdb1..ad4edc2f5 100644
---- a/include/tdep-x86/libunwind_i.h
-+++ b/include/tdep-x86/libunwind_i.h
-@@ -84,15 +84,26 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
- }
- 
- #define DWARF_GET_LOC(l)        ((l).val)
-+# define DWARF_LOC_TYPE_MEM     (0 << 0)
-+# define DWARF_LOC_TYPE_FP      (1 << 0)
-+# define DWARF_LOC_TYPE_REG     (1 << 1)
-+# define DWARF_LOC_TYPE_VAL     (1 << 2)
- 
--#ifdef UNW_LOCAL_ONLY
-+# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
-+# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
-+# define DWARF_IS_MEM_LOC(l)    ((l).type == DWARF_LOC_TYPE_MEM)
-+# define DWARF_IS_VAL_LOC(l)    (((l).type & DWARF_LOC_TYPE_VAL) != 0)
-+
-+# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
- # define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)   (DWARF_GET_LOC (l) == 0)
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r) })
--# define DWARF_IS_REG_LOC(l)    0
-+# define DWARF_IS_NULL_LOC(l)                                           \
-+                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
-+# define DWARF_VAL_LOC(c,v)     DWARF_LOC ((v), DWARF_LOC_TYPE_VAL)
-+# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), DWARF_LOC_TYPE_MEM)
-+
-+#ifdef UNW_LOCAL_ONLY
- # define DWARF_REG_LOC(c,r)     (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
- 
-@@ -114,35 +125,8 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-   return 0;
- }
- 
--static inline int
--dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
--                                   0, c->as_arg);
--}
--
--static inline int
--dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
--                                   1, c->as_arg);
--}
--
- #else /* !UNW_LOCAL_ONLY */
--# define DWARF_LOC_TYPE_FP      (1 << 0)
--# define DWARF_LOC_TYPE_REG     (1 << 1)
--# define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)                                           \
--                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
--# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
--# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
- # define DWARF_REG_LOC(c,r)     DWARF_LOC((r), DWARF_LOC_TYPE_REG)
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   DWARF_LOC((r), (DWARF_LOC_TYPE_REG      \
-                                                 | DWARF_LOC_TYPE_FP))
- 
-@@ -192,38 +176,33 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-                                    1, c->as_arg);
- }
- 
-+#endif /* !UNW_LOCAL_ONLY */
-+
- static inline int
- dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
- {
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
--  else
-+  if (DWARF_IS_MEM_LOC (loc))
-     return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
-+  assert(DWARF_IS_VAL_LOC (loc));
-+  *val = DWARF_GET_LOC (loc);
-+  return 0;
- }
- 
- static inline int
- dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- {
-+  assert(!DWARF_IS_VAL_LOC (loc));
-+
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
-                                      1, c->as_arg);
-@@ -232,7 +211,9 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
-                                      1, c->as_arg);
- }
- 
--#endif /* !UNW_LOCAL_ONLY */
-+// For historical reasons, the DWARF numbering does not match the libunwind
-+// numbering, necessitating this override
-+#define TDEP_DWARF_SP 4
- 
- #define tdep_getcontext_trace           unw_getcontext
- #define tdep_init_done                  UNW_OBJ(init_done)
-diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
-index da170d4b3..70a62c505 100644
---- a/src/dwarf/Gparser.c
-+++ b/src/dwarf/Gparser.c
-@@ -508,6 +508,9 @@ setup_fde (struct dwarf_cursor *c, dwarf_state_record_t *sr)
-   for (i = 0; i < DWARF_NUM_PRESERVED_REGS + 2; ++i)
-     set_reg (sr, i, DWARF_WHERE_SAME, 0);
- 
-+  // SP defaults to CFA (but is overridable)
-+  set_reg (sr, TDEP_DWARF_SP, DWARF_WHERE_CFA, 0);
-+
-   struct dwarf_cie_info *dci = c->pi.unwind_info;
-   sr->rs_current.ret_addr_column  = dci->ret_addr_column;
-   unw_word_t addr = dci->cie_instr_start;
-@@ -792,14 +795,14 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-       /* As a special-case, if the stack-pointer is the CFA and the
-          stack-pointer wasn't saved, popping the CFA implicitly pops
-          the stack-pointer as well.  */
--      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == UNW_TDEP_SP)
--          && (UNW_TDEP_SP < ARRAY_SIZE(rs->reg.val))
--          && (rs->reg.where[UNW_TDEP_SP] == DWARF_WHERE_SAME))
-+      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == TDEP_DWARF_SP)
-+          && (TDEP_DWARF_SP < ARRAY_SIZE(rs->reg.val))
-+          && (DWARF_IS_NULL_LOC(c->loc[TDEP_DWARF_SP])))
-           cfa = c->cfa;
-       else
-         {
-           regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
--          if ((ret = unw_get_reg ((unw_cursor_t *) c, regnum, &cfa)) < 0)
-+          if ((ret = unw_get_reg (dwarf_to_cursor(c), regnum, &cfa)) < 0)
-             return ret;
-         }
-       cfa += rs->reg.val[DWARF_CFA_OFF_COLUMN];
-@@ -836,6 +839,10 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-         case DWARF_WHERE_SAME:
-           break;
- 
-+        case DWARF_WHERE_CFA:
-+          new_loc[i] = DWARF_VAL_LOC (c, cfa);
-+          break;
-+
-         case DWARF_WHERE_CFAREL:
-           new_loc[i] = DWARF_MEM_LOC (c, cfa + rs->reg.val[i]);
-           break;
-diff --git a/src/x86/Gos-freebsd.c b/src/x86/Gos-freebsd.c
-index 7dd014046..1b251d027 100644
---- a/src/x86/Gos-freebsd.c
-+++ b/src/x86/Gos-freebsd.c
-@@ -138,6 +138,7 @@ x86_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[ST0] = DWARF_NULL_LOC;
-   } else if (c->sigcontext_format == X86_SCF_FREEBSD_SYSCALL) {
-     c->dwarf.loc[EIP] = DWARF_LOC (c->dwarf.cfa, 0);
-+    c->dwarf.loc[ESP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 4);
-     c->dwarf.loc[EAX] = DWARF_NULL_LOC;
-     c->dwarf.cfa += 4;
-     c->dwarf.use_prev_instr = 1;
-diff --git a/src/x86/Gregs.c b/src/x86/Gregs.c
-index 4a9592617..9446d6c62 100644
---- a/src/x86/Gregs.c
-+++ b/src/x86/Gregs.c
-@@ -53,7 +53,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_CFA:
--    case UNW_X86_ESP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -81,6 +80,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_ECX: loc = c->dwarf.loc[ECX]; break;
-     case UNW_X86_EBX: loc = c->dwarf.loc[EBX]; break;
- 
-+    case UNW_X86_ESP: loc = c->dwarf.loc[ESP]; break;
-     case UNW_X86_EBP: loc = c->dwarf.loc[EBP]; break;
-     case UNW_X86_ESI: loc = c->dwarf.loc[ESI]; break;
-     case UNW_X86_EDI: loc = c->dwarf.loc[EDI]; break;
-diff --git a/src/x86/Gstep.c b/src/x86/Gstep.c
-index 129b739a3..061dcbaaa 100644
---- a/src/x86/Gstep.c
-+++ b/src/x86/Gstep.c
-@@ -47,7 +47,7 @@ unw_step (unw_cursor_t *cursor)
-     {
-       /* DWARF failed, let's see if we can follow the frame-chain
-          or skip over the signal trampoline.  */
--      struct dwarf_loc ebp_loc, eip_loc;
-+      struct dwarf_loc ebp_loc, eip_loc, esp_loc;
- 
-       /* We could get here because of missing/bad unwind information.
-          Validate all addresses before dereferencing. */
-@@ -77,6 +77,7 @@ unw_step (unw_cursor_t *cursor)
-                  c->dwarf.cfa);
- 
-           ebp_loc = DWARF_LOC (c->dwarf.cfa, 0);
-+          esp_loc = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-           eip_loc = DWARF_LOC (c->dwarf.cfa + 4, 0);
-           c->dwarf.cfa += 8;
- 
-@@ -87,6 +88,7 @@ unw_step (unw_cursor_t *cursor)
-             c->dwarf.loc[i] = DWARF_NULL_LOC;
- 
-           c->dwarf.loc[EBP] = ebp_loc;
-+          c->dwarf.loc[ESP] = esp_loc;
-           c->dwarf.loc[EIP] = eip_loc;
-           c->dwarf.use_prev_instr = 1;
-         }
-diff --git a/src/x86_64/Gos-freebsd.c b/src/x86_64/Gos-freebsd.c
-index 8f28d1d8c..0c5a17940 100644
---- a/src/x86_64/Gos-freebsd.c
-+++ b/src/x86_64/Gos-freebsd.c
-@@ -133,6 +133,7 @@ x86_64_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[RCX] = c->dwarf.loc[R10];
-     /*  rsp_loc = DWARF_LOC(c->dwarf.cfa - 8, 0);       */
-     /*  rbp_loc = c->dwarf.loc[RBP];                    */
-+    c->dwarf.loc[RSP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-     c->dwarf.loc[RIP] = DWARF_LOC (c->dwarf.cfa, 0);
-     ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
-     Debug (1, "Frame Chain [RIP=0x%Lx] = 0x%Lx\n",
-diff --git a/src/x86_64/Gregs.c b/src/x86_64/Gregs.c
-index baf8a24f0..dff5bcbe7 100644
---- a/src/x86_64/Gregs.c
-+++ b/src/x86_64/Gregs.c
-@@ -79,7 +79,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_64_CFA:
--    case UNW_X86_64_RSP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -107,6 +106,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_64_RCX: loc = c->dwarf.loc[RCX]; break;
-     case UNW_X86_64_RBX: loc = c->dwarf.loc[RBX]; break;
- 
-+    case UNW_X86_64_RSP: loc = c->dwarf.loc[RSP]; break;
-     case UNW_X86_64_RBP: loc = c->dwarf.loc[RBP]; break;
-     case UNW_X86_64_RSI: loc = c->dwarf.loc[RSI]; break;
-     case UNW_X86_64_RDI: loc = c->dwarf.loc[RDI]; break;
-diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
-index 3c5c3830f..fdad298c7 100644
---- a/src/x86_64/Gstep.c
-+++ b/src/x86_64/Gstep.c
-@@ -223,7 +223,7 @@ unw_step (unw_cursor_t *cursor)
-                   Debug (2, "RIP fixup didn't work, falling back\n");
-                   unw_word_t rbp1 = 0;
-                   rbp_loc = DWARF_LOC(rbp, 0);
--                  rsp_loc = DWARF_NULL_LOC;
-+                  rsp_loc = DWARF_VAL_LOC(c, rbp + 16);
-                   rip_loc = DWARF_LOC (rbp + 8, 0);
-                   ret = dwarf_get (&c->dwarf, rbp_loc, &rbp1);
-                   Debug (1, "[RBP=0x%lx] = 0x%lx (cfa = 0x%lx) -> 0x%lx\n",
diff --git a/deps/patches/libunwind-configure-static-lzma.patch b/deps/patches/libunwind-configure-static-lzma.patch
new file mode 100644
index 0000000000000..f8b428f60550b
--- /dev/null
+++ b/deps/patches/libunwind-configure-static-lzma.patch
@@ -0,0 +1,20 @@
+--- configure.orig	2023-06-04 05:19:04
++++ configure	2023-06-07 08:35:11
+@@ -18117,7 +18117,7 @@
+   $as_echo_n "(cached) " >&6
+ else
+   ac_check_lib_save_LIBS=$LIBS
+-LIBS="-llzma  $LIBS"
++LIBS="-L${libdir} -l:liblzma.a $LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
+@@ -18148,7 +18148,7 @@
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_lzma_lzma_mf_is_supported" >&5
+ $as_echo "$ac_cv_lib_lzma_lzma_mf_is_supported" >&6; }
+ if test "x$ac_cv_lib_lzma_lzma_mf_is_supported" = xyes; then :
+-  LIBLZMA=-llzma
++  LIBLZMA="-L${libdir} -l:liblzma.a"
+ 
+ $as_echo "#define HAVE_LZMA 1" >>confdefs.h
+ 
diff --git a/deps/patches/libunwind-disable-initial-exec-tls.patch b/deps/patches/libunwind-disable-initial-exec-tls.patch
new file mode 100644
index 0000000000000..c6718ac2db98f
--- /dev/null
+++ b/deps/patches/libunwind-disable-initial-exec-tls.patch
@@ -0,0 +1,44 @@
+diff --git a/include/libunwind-common.h.in b/include/libunwind-common.h.in
+index 893fdd69..80ab9648 100644
+--- a/include/libunwind-common.h.in
++++ b/include/libunwind-common.h.in
+@@ -340,5 +340,6 @@ extern int unw_get_elf_filename_by_ip (unw_addr_space_t, unw_word_t, char *,
+ extern const char *unw_strerror (int);
+ extern int unw_backtrace (void **, int);
+ extern int unw_backtrace2 (void **, int, unw_context_t*, int);
++extern int unw_ensure_tls (void);
+ 
+ extern unw_addr_space_t unw_local_addr_space;
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index 7a5d7e1f..8453ffb0 100644
+--- a/src/dwarf/Gparser.c
++++ b/src/dwarf/Gparser.c
+@@ -623,7 +623,7 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp)
+ #if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
+   if (likely (caching == UNW_CACHE_PER_THREAD))
+     {
+-      static _Thread_local struct dwarf_rs_cache tls_cache __attribute__((tls_model("initial-exec")));
++      static _Thread_local struct dwarf_rs_cache tls_cache;
+       Debug (16, "using TLS cache\n");
+       cache = &tls_cache;
+     }
+diff --git a/src/mi/init.c b/src/mi/init.c
+index e4431eeb..07cae852 100644
+--- a/src/mi/init.c
++++ b/src/mi/init.c
+@@ -82,3 +82,15 @@ mi_init (void)
+   unw_init_page_size();
+   assert(sizeof(struct cursor) <= sizeof(unw_cursor_t));
+ }
++
++int
++unw_ensure_tls (void)
++{
++#if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
++  static _Thread_local int alloc_trigger;
++  alloc_trigger = 1;
++  return alloc_trigger;
++#else
++  return 0;
++#endif
++}
diff --git a/deps/patches/libunwind-dwarf-table.patch b/deps/patches/libunwind-dwarf-table.patch
deleted file mode 100644
index 5905982f9a349..0000000000000
--- a/deps/patches/libunwind-dwarf-table.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From a5b5fd28ed03cb1ab524d24dc534c1fa167bf5a1 Mon Sep 17 00:00:00 2001
-From: Alex Arslan <ararslan@comcast.net>
-Date: Fri, 5 Nov 2021 16:58:41 -0700
-Subject: [PATCH] Fix table indexing in `dwarf_search_unwind_table`
-
-`table_len` is used as an index into `table`, assuming it represents the
-number of entries. However, it is defined as the number of entries
-multiplied by `sizeof(unw_word_t)`. This is accounted for in other
-places that use `table_len`, e.g. in `lookup`, which divides out the
-size of `unw_word_t`, but the indexing expression uses `table_len`
-directly. So when `table` has say 2 entries, we're actually looking at
-index 15 rather than 1 in the comparison. This can cause the conditional
-to erroneously evaluate to true, allowing the following line to
-segfault.
-
-This was observed with JIT compiled code from Julia with LLVM on
-FreeBSD.
-
-Co-Authored-By: Jameson Nash <vtjnash@gmail.com>
----
- src/dwarf/Gfind_proc_info-lsb.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
-index 5e27a501..af4cbce8 100644
---- a/src/dwarf/Gfind_proc_info-lsb.c
-+++ b/src/dwarf/Gfind_proc_info-lsb.c
-@@ -866,7 +866,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-   if (as == unw_local_addr_space)
-     {
-       e = lookup (table, table_len, ip - ip_base);
--      if (e && &e[1] < &table[table_len])
-+      if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
- 	last_ip = e[1].start_ip_offset + ip_base;
-       else
- 	last_ip = di->end_ip;
diff --git a/deps/patches/libunwind-non-empty-structs.patch b/deps/patches/libunwind-non-empty-structs.patch
deleted file mode 100644
index 0c04709a13184..0000000000000
--- a/deps/patches/libunwind-non-empty-structs.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 1f35cd8f2bdcc1876af7352cc3e87bb7277e8162 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <mose@gnu.org>
-Date: Sat, 18 Jun 2022 10:35:36 +0100
-Subject: [PATCH 1/1] Make some structs non-empty
-
-Backport of <https://github.com/libunwind/libunwind/pull/332>.
----
- include/libunwind-aarch64.h | 6 ++++++
- include/libunwind-arm.h     | 6 ++++++
- include/libunwind-x86.h     | 6 ++++++
- 3 files changed, 18 insertions(+)
-
-diff --git a/include/libunwind-aarch64.h b/include/libunwind-aarch64.h
-index aeaef630..b7066c51 100644
---- a/include/libunwind-aarch64.h
-+++ b/include/libunwind-aarch64.h
-@@ -35,6 +35,10 @@ extern "C" {
- #include <stddef.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      aarch64
- #define UNW_TARGET_AARCH64      1
- 
-@@ -60,6 +64,7 @@ typedef long double unw_tdep_fpreg_t;
- typedef struct
-   {
-     /* no aarch64-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-@@ -169,6 +174,7 @@ aarch64_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-diff --git a/include/libunwind-arm.h b/include/libunwind-arm.h
-index 6709b7ab..7c7005d1 100644
---- a/include/libunwind-arm.h
-+++ b/include/libunwind-arm.h
-@@ -32,6 +32,10 @@ extern "C" {
- #include <inttypes.h>
- #include <stddef.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      arm
- #define UNW_TARGET_ARM  1
- 
-@@ -247,6 +251,7 @@ arm_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -288,6 +293,7 @@ unw_tdep_context_t;
- typedef struct
-   {
-     /* no arm-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-diff --git a/include/libunwind-x86.h b/include/libunwind-x86.h
-index 40fe0464..d3b741d3 100644
---- a/include/libunwind-x86.h
-+++ b/include/libunwind-x86.h
-@@ -34,6 +34,10 @@ extern "C" {
- #include <inttypes.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      x86
- #define UNW_TARGET_X86  1
- 
-@@ -158,6 +162,7 @@ x86_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -169,6 +174,7 @@ typedef ucontext_t unw_tdep_context_t;
- typedef struct
-   {
-     /* no x86-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
--- 
-2.36.1
-
diff --git a/deps/patches/libunwind-prefer-extbl.patch b/deps/patches/libunwind-prefer-extbl.patch
deleted file mode 100644
index 07b172604d623..0000000000000
--- a/deps/patches/libunwind-prefer-extbl.patch
+++ /dev/null
@@ -1,194 +0,0 @@
-From 2d6a50435bb743be1e4d88eee002372344348349 Mon Sep 17 00:00:00 2001
-From: Yichao Yu <yyc1992@gmail.com>
-Date: Sun, 29 Aug 2021 13:43:01 -0700
-Subject: [PATCH] Prefer EXTBL unwinding on ARM
-
-It is part of the C++ ABI so a EXTBL unwind info that's not `CANT_UNWIND`
-should always be reliable/correct.
-Ignore `ESTOPUNWIND` so that a `CANT_UNWIND` info can fallback to unwinding
-using the debug info instead.
----
- include/tdep-arm/libunwind_i.h |  4 +++
- src/arm/Gex_tables.c           | 18 ++++++++---
- src/arm/Gstep.c                | 55 ++++++++++++++++++++--------------
- 3 files changed, 51 insertions(+), 26 deletions(-)
-
-diff --git a/include/tdep-arm/libunwind_i.h b/include/tdep-arm/libunwind_i.h
-index 88ebfb069..5bd28c953 100644
---- a/include/tdep-arm/libunwind_i.h
-+++ b/include/tdep-arm/libunwind_i.h
-@@ -256,6 +256,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- #define tdep_init_done                  UNW_OBJ(init_done)
- #define tdep_init                       UNW_OBJ(init)
- #define arm_find_proc_info              UNW_OBJ(find_proc_info)
-+#define arm_find_proc_info2             UNW_OBJ(find_proc_info2)
- #define arm_put_unwind_info             UNW_OBJ(put_unwind_info)
- /* Platforms that support UNW_INFO_FORMAT_TABLE need to define
-    tdep_search_unwind_table.  */
-@@ -297,6 +298,9 @@ extern void tdep_init (void);
- extern int arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-                                unw_proc_info_t *pi, int need_unwind_info,
-                                void *arg);
-+extern int arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                                unw_proc_info_t *pi, int need_unwind_info,
-+                                void *arg, int methods);
- extern void arm_put_unwind_info (unw_addr_space_t as,
-                                   unw_proc_info_t *pi, void *arg);
- extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index efdcf2978..083d2b2f7 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -506,18 +506,20 @@ arm_phdr_cb (struct dl_phdr_info *info, size_t size, void *data)
- }
- 
- HIDDEN int
--arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
--                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                     unw_proc_info_t *pi, int need_unwind_info, void *arg,
-+                     int methods)
- {
-   int ret = -1;
-   intrmask_t saved_mask;
- 
-   Debug (14, "looking for IP=0x%lx\n", (long) ip);
- 
--  if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_DWARF) && (methods & UNW_ARM_METHOD_DWARF))
-     ret = dwarf_find_proc_info (as, ip, pi, need_unwind_info, arg);
- 
--  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX) &&
-+      (methods & UNW_ARM_METHOD_EXIDX))
-     {
-       struct arm_cb_data cb_data;
- 
-@@ -540,6 +542,14 @@ arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-   return ret;
- }
- 
-+HIDDEN int
-+arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-+                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+{
-+    return arm_find_proc_info2 (as, ip, pi, need_unwind_info, arg,
-+                                UNW_ARM_METHOD_ALL);
-+}
-+
- HIDDEN void
- arm_put_unwind_info (unw_addr_space_t as, unw_proc_info_t *proc_info, void *arg)
- {
-diff --git a/src/arm/Gstep.c b/src/arm/Gstep.c
-index 895e8a892..e4ada651b 100644
---- a/src/arm/Gstep.c
-+++ b/src/arm/Gstep.c
-@@ -54,17 +54,22 @@ arm_exidx_step (struct cursor *c)
-                                      c->dwarf.as_arg);
-   if (ret == -UNW_ENOINFO)
-     {
-+#ifdef UNW_LOCAL_ONLY
-+      if ((ret = arm_find_proc_info2 (c->dwarf.as, ip, &c->dwarf.pi,
-+                                      1, c->dwarf.as_arg,
-+                                      UNW_ARM_METHOD_EXIDX)) < 0)
-+        return ret;
-+#else
-       if ((ret = tdep_find_proc_info (&c->dwarf, ip, 1)) < 0)
-         return ret;
-+#endif
-     }
- 
-   if (c->dwarf.pi.format != UNW_INFO_FORMAT_ARM_EXIDX)
-     return -UNW_ENOINFO;
- 
-   ret = arm_exidx_extract (&c->dwarf, buf);
--  if (ret == -UNW_ESTOPUNWIND)
--    return 0;
--  else if (ret < 0)
-+  if (ret < 0)
-     return ret;
- 
-   ret = arm_exidx_decode (buf, ret, &c->dwarf);
-@@ -88,6 +93,7 @@ unw_step (unw_cursor_t *cursor)
- {
-   struct cursor *c = (struct cursor *) cursor;
-   int ret = -UNW_EUNSPEC;
-+  int has_stopunwind = 0;
- 
-   Debug (1, "(cursor=%p)\n", c);
- 
-@@ -95,17 +101,31 @@ unw_step (unw_cursor_t *cursor)
-   if (unw_is_signal_frame (cursor) > 0)
-      return arm_handle_signal_frame (cursor);
- 
-+  /* First, try extbl-based unwinding. */
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+    {
-+      ret = arm_exidx_step (c);
-+      Debug(1, "arm_exidx_step()=%d\n", ret);
-+      if (ret > 0)
-+        return 1;
-+      if (ret == 0)
-+        return ret;
-+      if (ret == -UNW_ESTOPUNWIND)
-+        has_stopunwind = 1;
-+    }
-+
- #ifdef CONFIG_DEBUG_FRAME
--  /* First, try DWARF-based unwinding. */
-+  /* Second, try DWARF-based unwinding. */
-   if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-     {
-+      Debug (13, "%s(ret=%d), trying extbl\n",
-+             UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() failed " : "",
-+             ret);
-       ret = dwarf_step (&c->dwarf);
-       Debug(1, "dwarf_step()=%d\n", ret);
- 
-       if (likely (ret > 0))
-         return 1;
--      else if (unlikely (ret == -UNW_ESTOPUNWIND))
--        return ret;
- 
-       if (ret < 0 && ret != -UNW_ENOINFO)
-         {
-@@ -115,18 +135,9 @@ unw_step (unw_cursor_t *cursor)
-     }
- #endif /* CONFIG_DEBUG_FRAME */
- 
--  /* Next, try extbl-based unwinding. */
--  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
--    {
--      Debug (13, "%s(ret=%d), trying extbl\n",
--             UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() failed " : "",
--             ret);
--      ret = arm_exidx_step (c);
--      if (ret > 0)
--        return 1;
--      if (ret == -UNW_ESTOPUNWIND || ret == 0)
--        return ret;
--    }
-+  // Before trying the fallback, if any unwind info tell us to stop, do that.
-+  if (has_stopunwind)
-+    return -UNW_ESTOPUNWIND;
- 
-   /* Fall back on APCS frame parsing.
-      Note: This won't work in case the ARM EABI is used. */
-@@ -139,13 +150,13 @@ unw_step (unw_cursor_t *cursor)
-       if (UNW_TRY_METHOD(UNW_ARM_METHOD_FRAME))
-         {
-           Debug (13, "%s%s%s%s(ret=%d), trying frame-chain\n",
--                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) && UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "and " : "",
-                  UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) || UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "failed " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) && UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "and " : "",
-+                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) || UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "failed " : "",
-                  ret);
-           ret = UNW_ESUCCESS;
--          /* DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-+          /* EXIDX and/or DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-           unw_word_t instr, i;
-           dwarf_loc_t ip_loc, fp_loc;
-           unw_word_t frame;
diff --git a/deps/patches/libunwind-revert_prelink_unwind.patch b/deps/patches/libunwind-revert_prelink_unwind.patch
new file mode 100644
index 0000000000000..80de3c9ce4571
--- /dev/null
+++ b/deps/patches/libunwind-revert_prelink_unwind.patch
@@ -0,0 +1,187 @@
+From 3af39d34f576890e7f1f3e97cc1cb45b4b76aa47 Mon Sep 17 00:00:00 2001
+From: Tim Besard <tim.besard@gmail.com>
+Date: Tue, 16 Jan 2024 09:49:21 -0800
+Subject: [PATCH] Revert "Fix unwinding of pre-linked libraries"
+
+This reverts commit a4014f33775321b4106a1134b89020a7774902dd,
+which regresses unwinding on FreeBSD (JuliaLang/julia#51467).
+---
+ include/dwarf.h                 |  2 --
+ include/libunwind-dynamic.h     |  1 -
+ src/dwarf/Gfind_proc_info-lsb.c | 42 +++++++--------------------------
+ src/dwarf/Gfind_unwind_table.c  |  1 -
+ 4 files changed, 8 insertions(+), 38 deletions(-)
+
+diff --git a/include/dwarf.h b/include/dwarf.h
+index 4fd1dba0..3fc6bce2 100644
+--- a/include/dwarf.h
++++ b/include/dwarf.h
+@@ -371,8 +371,6 @@ struct unw_debug_frame_list
+     /* The start (inclusive) and end (exclusive) of the described region.  */
+     unw_word_t start;
+     unw_word_t end;
+-    /* ELF load offset */
+-    unw_word_t load_offset;
+     /* The debug frame itself.  */
+     char *debug_frame;
+     size_t debug_frame_size;
+diff --git a/include/libunwind-dynamic.h b/include/libunwind-dynamic.h
+index a26f2c99..c902ccd9 100644
+--- a/include/libunwind-dynamic.h
++++ b/include/libunwind-dynamic.h
+@@ -141,7 +141,6 @@ typedef struct unw_dyn_info
+     unw_word_t gp;              /* global-pointer in effect for this entry */
+     int32_t format;             /* real type: unw_dyn_info_format_t */
+     int32_t pad;
+-    unw_word_t load_offset;     /* ELF load offset */
+     union
+       {
+         unw_dyn_proc_info_t pi;
+diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
+index c11345e8..c701ccfb 100644
+--- a/src/dwarf/Gfind_proc_info-lsb.c
++++ b/src/dwarf/Gfind_proc_info-lsb.c
+@@ -108,17 +108,13 @@ linear_search (unw_addr_space_t as, unw_word_t ip,
+ 
+ static int
+ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+-                  unw_word_t segbase, unw_word_t *load_offset)
++                  unw_word_t segbase)
+ {
+   struct elf_image ei;
+-  Elf_W (Ehdr) *ehdr;
+-  Elf_W (Phdr) *phdr;
+   Elf_W (Shdr) *shdr;
+-  int i;
+   int ret;
+ 
+   ei.image = NULL;
+-  *load_offset = 0;
+ 
+   ret = elf_w (load_debuginfo) (file, &ei, is_local);
+   if (ret != 0)
+@@ -193,20 +189,6 @@ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+ #if defined(SHF_COMPRESSED)
+     }
+ #endif
+-
+-  ehdr = ei.image;
+-  phdr = (Elf_W (Phdr) *) ((char *) ei.image + ehdr->e_phoff);
+-
+-  for (i = 0; i < ehdr->e_phnum; ++i)
+-    if (phdr[i].p_type == PT_LOAD)
+-      {
+-        *load_offset = segbase - phdr[i].p_vaddr;
+-
+-        Debug (4, "%s load offset is 0x%zx\n", file, *load_offset);
+-
+-        break;
+-      }
+-
+   mi_munmap(ei.image, ei.size);
+   return 0;
+ }
+@@ -259,7 +241,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+   int err;
+   char *buf;
+   size_t bufsize;
+-  unw_word_t load_offset;
+ 
+   /* First, see if we loaded this frame already.  */
+ 
+@@ -287,7 +268,7 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+     name = (char*) dlname;
+ 
+   err = load_debug_frame (name, &buf, &bufsize, as == unw_local_addr_space,
+-                          segbase, &load_offset);
++                          segbase);
+ 
+   if (!err)
+     {
+@@ -300,7 +281,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+ 
+       fdesc->start = start;
+       fdesc->end = end;
+-      fdesc->load_offset = load_offset;
+       fdesc->debug_frame = buf;
+       fdesc->debug_frame_size = bufsize;
+       fdesc->index = NULL;
+@@ -497,7 +477,6 @@ dwarf_find_debug_frame (int found, unw_dyn_info_t *di_debug, unw_word_t ip,
+   di->format = UNW_INFO_FORMAT_TABLE;
+   di->start_ip = fdesc->start;
+   di->end_ip = fdesc->end;
+-  di->load_offset = fdesc->load_offset;
+   di->u.ti.name_ptr = (unw_word_t) (uintptr_t) obj_name;
+   di->u.ti.table_data = (unw_word_t *) fdesc;
+   di->u.ti.table_len = sizeof (*fdesc) / sizeof (unw_word_t);
+@@ -960,14 +939,12 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+     ip_base = segbase;
+   }
+ 
+-  Debug (6, "lookup IP 0x%lx\n", (long) (ip - ip_base - di->load_offset));
+-
+ #ifndef UNW_REMOTE_ONLY
+   if (as == unw_local_addr_space)
+     {
+-      e = lookup (table, table_len, ip - ip_base - di->load_offset);
++      e = lookup (table, table_len, ip - ip_base);
+       if (e && &e[1] < &table[table_len / sizeof (struct table_entry)])
+-	last_ip = e[1].start_ip_offset + ip_base + di->load_offset;
++	last_ip = e[1].start_ip_offset + ip_base;
+       else
+ 	last_ip = di->end_ip;
+     }
+@@ -975,7 +952,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+ #endif
+     {
+ #ifndef UNW_LOCAL_ONLY
+-      int32_t last_ip_offset = di->end_ip - ip_base - di->load_offset;
++      int32_t last_ip_offset = di->end_ip - ip_base;
+       segbase = di->u.rti.segbase;
+       if ((ret = remote_lookup (as, (uintptr_t) table, table_len,
+                                 ip - ip_base, &ent, &last_ip_offset, arg)) < 0)
+@@ -983,7 +960,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       if (ret)
+ 	{
+ 	  e = &ent;
+-	  last_ip = last_ip_offset + ip_base + di->load_offset;
++	  last_ip = last_ip_offset + ip_base;
+ 	}
+       else
+         e = NULL;       /* no info found */
+@@ -997,8 +974,8 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+          unwind info.  */
+       return -UNW_ENOINFO;
+     }
+-  Debug (15, "ip=0x%lx, load_offset=0x%lx, start_ip=0x%lx\n",
+-         (long) ip, (long) di->load_offset, (long) (e->start_ip_offset));
++  Debug (15, "ip=0x%lx, start_ip=0x%lx\n",
++         (long) ip, (long) (e->start_ip_offset));
+   if (debug_frame_base)
+     fde_addr = e->fde_offset + debug_frame_base;
+   else
+@@ -1022,9 +999,6 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       pi->flags = UNW_PI_FLAG_DEBUG_FRAME;
+     }
+ 
+-  pi->start_ip += di->load_offset;
+-  pi->end_ip += di->load_offset;
+-
+ #if defined(NEED_LAST_IP)
+   pi->last_ip = last_ip;
+ #else
+diff --git a/src/dwarf/Gfind_unwind_table.c b/src/dwarf/Gfind_unwind_table.c
+index a7c4dfd3..2b503ea9 100644
+--- a/src/dwarf/Gfind_unwind_table.c
++++ b/src/dwarf/Gfind_unwind_table.c
+@@ -197,7 +197,6 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi,
+ 
+       edi->di_cache.start_ip = start_ip;
+       edi->di_cache.end_ip = end_ip;
+-      edi->di_cache.load_offset = 0;
+       edi->di_cache.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+       edi->di_cache.u.rti.name_ptr = 0;
+       /* two 32-bit values (ip_offset/fde_offset) per table-entry: */
+-- 
+2.43.0
+
diff --git a/deps/patches/libunwind-static-arm.patch b/deps/patches/libunwind-static-arm.patch
deleted file mode 100644
index 92544a003b8b9..0000000000000
--- a/deps/patches/libunwind-static-arm.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index d6573a65..1d64803e 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -381,7 +381,7 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
-   return nbuf;
- }
- 
--int
-+static int
- arm_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
- 			 unw_dyn_info_t *di, unw_proc_info_t *pi,
- 			 int need_unwind_info, void *arg)
diff --git a/deps/patches/llvm-libunwind-force-dwarf.patch b/deps/patches/llvm-libunwind-force-dwarf.patch
index 2f4d31acb8a4a..494c5e77e187b 100644
--- a/deps/patches/llvm-libunwind-force-dwarf.patch
+++ b/deps/patches/llvm-libunwind-force-dwarf.patch
@@ -6,22 +6,23 @@ Date:   Tue Aug 27 15:01:22 2013 -0400
     Add option to step with DWARF
 
 ---
-diff -pur a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
---- a/libunwind/include/libunwind.h	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/include/libunwind.h	2022-05-04 18:44:24.000000000 +0200
+diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
+index b2dae8f..fc37afb 100644
+--- a/libunwind/include/libunwind.h
++++ b/libunwind/include/libunwind.h
 @@ -108,6 +108,7 @@ extern "C" {
- 
+
  extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL;
  extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
 +extern int unw_init_local_dwarf(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
  extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL;
  extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL;
  extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL;
-Only in b/libunwind/include: libunwind.h.orig
-diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
---- a/libunwind/src/UnwindCursor.hpp	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/UnwindCursor.hpp	2022-05-04 18:45:11.000000000 +0200
-@@ -437,6 +437,9 @@ public:
+diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
+index 7753936..26ca486 100644
+--- a/libunwind/src/UnwindCursor.hpp
++++ b/libunwind/src/UnwindCursor.hpp
+@@ -453,6 +453,9 @@ public:
    virtual bool isSignalFrame() {
      _LIBUNWIND_ABORT("isSignalFrame not implemented");
    }
@@ -31,7 +32,7 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    virtual bool getFunctionName(char *, size_t, unw_word_t *) {
      _LIBUNWIND_ABORT("getFunctionName not implemented");
    }
-@@ -894,6 +897,7 @@ public:
+@@ -944,6 +947,7 @@ public:
    virtual void        getInfo(unw_proc_info_t *);
    virtual void        jumpto();
    virtual bool        isSignalFrame();
@@ -39,24 +40,23 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
    virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
    virtual const char *getRegisterName(int num);
-@@ -963,7 +967,7 @@ private:
+@@ -1031,7 +1035,7 @@ private:
                                              const UnwindInfoSections &sects);
-   int stepWithCompactEncoding() {
-   #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+   int stepWithCompactEncoding(bool stage2 = false) {
+ #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
 -    if ( compactSaysUseDwarf() )
 +    if ( _forceDwarf || compactSaysUseDwarf() )
-       return stepWithDwarfFDE();
-   #endif
+       return stepWithDwarfFDE(stage2);
+ #endif
      R dummy;
-@@ -1198,6 +1202,7 @@ private:
-   unw_proc_info_t  _info;
-   bool             _unwindInfoMissing;
-   bool             _isSignalFrame;
-+  bool             _forceDwarf;
- #if defined(_LIBUNWIND_TARGET_LINUX) && defined(_LIBUNWIND_TARGET_AARCH64)
+@@ -1317,13 +1321,14 @@ private:
+ #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN)
    bool             _isSigReturn = false;
  #endif
-@@ -1207,7 +1212,7 @@ private:
++  bool             _forceDwarf;
+ };
+
+
  template <typename A, typename R>
  UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
      : _addressSpace(as), _registers(context), _unwindInfoMissing(false),
@@ -65,8 +65,8 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
                  "UnwindCursor<> does not fit in unw_cursor_t");
    static_assert((alignof(UnwindCursor<A, R>) <= alignof(unw_cursor_t)),
-@@ -1217,7 +1222,8 @@ UnwindCursor<A, R>::UnwindCursor(unw_con
- 
+@@ -1333,7 +1338,8 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
+
  template <typename A, typename R>
  UnwindCursor<A, R>::UnwindCursor(A &as, void *)
 -    : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) {
@@ -75,18 +75,18 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
    memset(&_info, 0, sizeof(_info));
    // FIXME
    // fill in _registers from thread arg
-@@ -1273,6 +1279,10 @@ template <typename A, typename R> bool U
+@@ -1396,6 +1402,10 @@ template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
    return _isSignalFrame;
  }
- 
+
 +template <typename A, typename R> void UnwindCursor<A, R>::setForceDWARF(bool force) {
 +  _forceDwarf = force;
 +}
 +
  #endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
- 
+
  #if defined(_LIBUNWIND_ARM_EHABI)
-@@ -1941,7 +1951,13 @@ void UnwindCursor<A, R>::setInfoBasedOnI
+@@ -2611,7 +2621,12 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
          // record that we have no unwind info.
          if (_info.format == 0)
            _unwindInfoMissing = true;
@@ -96,14 +96,14 @@ diff -pur a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
 +  #else
          return;
 +  #endif
-+
        }
      }
  #endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
---- a/libunwind/src/libunwind.cpp	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/libunwind.cpp	2022-05-04 18:44:24.000000000 +0200
-@@ -71,6 +71,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(u
+diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
+index 217dde9..8e9a77a 100644
+--- a/libunwind/src/libunwind.cpp
++++ b/libunwind/src/libunwind.cpp
+@@ -86,6 +86,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
    new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
        UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
            context, LocalAddressSpace::sThisAddressSpace);
@@ -111,10 +111,10 @@ diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
  #undef REGISTER_KIND
    AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
    co->setInfoBasedOnIPRegister();
-@@ -79,6 +80,54 @@ _LIBUNWIND_HIDDEN int __unw_init_local(u
+@@ -109,6 +110,54 @@ _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
  }
- _LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local)
- 
+ _LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg)
+
 +_LIBUNWIND_HIDDEN int __unw_init_local_dwarf(unw_cursor_t *cursor,
 +                                       unw_context_t *context) {
 +  _LIBUNWIND_TRACE_API("__unw_init_local_dwarf(cursor=%p, context=%p)",
@@ -163,14 +163,15 @@ diff -pur a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
 +}
 +_LIBUNWIND_WEAK_ALIAS(__unw_init_local_dwarf, unw_init_local_dwarf)
 +
- /// Get value of specified register at cursor position in stack frame.
- _LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
-                                     unw_word_t *value) {
-diff -pur a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
---- a/libunwind/src/libunwind_ext.h	2021-06-28 18:23:38.000000000 +0200
-+++ b/libunwind/src/libunwind_ext.h	2022-05-04 18:44:24.000000000 +0200
+ /// Set value of specified register at cursor position in stack frame.
+ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                     unw_word_t value) {
+diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
+index 28db43a..c4f9767 100644
+--- a/libunwind/src/libunwind_ext.h
++++ b/libunwind/src/libunwind_ext.h
 @@ -25,6 +25,7 @@ extern "C" {
- 
+
  extern int __unw_getcontext(unw_context_t *);
  extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
 +extern int __unw_init_local_dwarf(unw_cursor_t *, unw_context_t *);
diff --git a/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch b/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
index afb4b941d5b92..0e517d8ec7aa8 100644
--- a/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
+++ b/deps/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
@@ -16,7 +16,7 @@ single FDE. I suspect this was just an Apple bug, compensated by Apple-
 specific code in LLVM.
 
 See lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp and
-http://lists.llvm.org/pipermail/llvm-dev/2013-April/061737.html
+https://lists.llvm.org/pipermail/llvm-dev/2013-April/061737.html
 for more detail.
 
 This change is based on the LLVM RTDyldMemoryManager.cpp. It should
diff --git a/deps/patches/llvm-libunwind-prologue-epilogue.patch b/deps/patches/llvm-libunwind-prologue-epilogue.patch
index 7dadca728f9cf..b2618998905e4 100644
--- a/deps/patches/llvm-libunwind-prologue-epilogue.patch
+++ b/deps/patches/llvm-libunwind-prologue-epilogue.patch
@@ -14,7 +14,7 @@ index 1c3175dff50a..78a658ccbc27 100644
 @@ -310,6 +310,50 @@ int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame(
    uint32_t savedRegistersLocations =
        EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
- 
+
 +  // If we have not stored EBP yet
 +  if (functionStart == registers.getIP()) {
 +    uint64_t rsp = registers.getSP();
diff --git a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch b/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
deleted file mode 100644
index 4e3897dfb9801..0000000000000
--- a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-Upstream commit 8c03fdf34a659925a3f09c8f54016e47ea1c7519 changed the build such
-that it requires living inside the monorepo with libcxx available, only so that
-it can reuse a CMake file to simplify some build steps. This patch is a revert
-of that commit applied only to libunwind.
-
----
-diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
-index 570b8db90653..a383d7d77d6f 100644
---- a/libunwind/CMakeLists.txt
-+++ b/libunwind/CMakeLists.txt
-@@ -1,7 +1,3 @@
--if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/../libcxx")
--  message(FATAL_ERROR "libunwind requires being built in a monorepo layout with libcxx available")
--endif()
--
- #===============================================================================
- # Setup Project
- #===============================================================================
-@@ -15,31 +11,103 @@ set(CMAKE_MODULE_PATH
-   ${CMAKE_MODULE_PATH}
-   )
- 
--set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
--set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
--set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH
--        "Specify path to libc++ source.")
--
- if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
-   project(libunwind LANGUAGES C CXX ASM)
- 
-+  # Rely on llvm-config.
-+  set(CONFIG_OUTPUT)
-+  if(NOT LLVM_CONFIG_PATH)
-+    find_program(LLVM_CONFIG_PATH "llvm-config")
-+  endif()
-+  if (DEFINED LLVM_PATH)
-+    set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_MAIN_SRC_DIR ${LLVM_PATH})
-+    set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules")
-+  elseif(LLVM_CONFIG_PATH)
-+    message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}")
-+    set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root")
-+    execute_process(COMMAND ${CONFIG_COMMAND}
-+                    RESULT_VARIABLE HAD_ERROR
-+                    OUTPUT_VARIABLE CONFIG_OUTPUT)
-+    if (NOT HAD_ERROR)
-+      string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";"
-+             CONFIG_OUTPUT ${CONFIG_OUTPUT})
-+    else()
-+      string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}")
-+      message(STATUS "${CONFIG_COMMAND_STR}")
-+      message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
-+    endif()
-+
-+    list(GET CONFIG_OUTPUT 0 INCLUDE_DIR)
-+    list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT)
-+    list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR)
-+
-+    set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
-+    set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py")
-+
-+    # --cmakedir is supported since llvm r291218 (4.0 release)
-+    execute_process(
-+      COMMAND ${LLVM_CONFIG_PATH} --cmakedir
-+      RESULT_VARIABLE HAD_ERROR
-+      OUTPUT_VARIABLE CONFIG_OUTPUT
-+      ERROR_QUIET)
-+    if(NOT HAD_ERROR)
-+      string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
-+      file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH)
-+    else()
-+      file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE)
-+      set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
-+    endif()
-+  else()
-+    message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: "
-+                    "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. "
-+                    "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config "
-+                    "or -DLLVM_PATH=path/to/llvm-source-root.")
-+  endif()
-+
-+  if (EXISTS ${LLVM_CMAKE_PATH})
-+    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-+    include("${LLVM_CMAKE_PATH}/AddLLVM.cmake")
-+    include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake")
-+  else()
-+    message(WARNING "Not found: ${LLVM_CMAKE_PATH}")
-+  endif()
-+
-   set(PACKAGE_NAME libunwind)
-   set(PACKAGE_VERSION 12.0.1)
-   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
- 
--  # Add the CMake module path of libcxx so we can reuse HandleOutOfTreeLLVM.cmake
--  set(LIBUNWIND_LIBCXX_CMAKE_PATH "${LIBUNWIND_LIBCXX_PATH}/cmake/Modules")
--  list(APPEND CMAKE_MODULE_PATH "${LIBUNWIND_LIBCXX_CMAKE_PATH}")
-+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+    set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+  else()
-+    # Seek installed Lit.
-+    find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit
-+                 DOC "Path to lit.py")
-+  endif()
- 
--  # In a standalone build, we don't have llvm to automatically generate the
--  # llvm-lit script for us.  So we need to provide an explicit directory that
--  # the configurator should write the script into.
--  set(LIBUNWIND_STANDALONE_BUILD 1)
--  set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-+  if (LLVM_LIT)
-+    # Define the default arguments to use with 'lit', and an option for the user
-+    # to override.
-+    set(LIT_ARGS_DEFAULT "-sv")
-+    if (MSVC OR XCODE)
-+      set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
-+    endif()
-+    set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
-+
-+    # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
-+    if (WIN32 AND NOT CYGWIN)
-+      set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
-+    endif()
-+  else()
-+    set(LLVM_INCLUDE_TESTS OFF)
-+  endif()
- 
--  # Find the LLVM sources and simulate LLVM CMake options.
--  include(HandleOutOfTreeLLVM)
-+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
- else()
-   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
- endif()
-@@ -85,8 +153,6 @@ set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING
-     "Additional compiler flags for test programs.")
- set(LIBUNWIND_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING
-     "The Lit testing configuration to use when running the tests.")
--set(LIBUNWIND_TEST_PARAMS "" CACHE STRING
--    "A list of parameters to run the Lit test suite with.")
- 
- if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC)
-   message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
-@@ -113,6 +179,9 @@ set(CMAKE_MODULE_PATH
-     "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
-     ${CMAKE_MODULE_PATH})
- 
-+set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
-+set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
-+
- if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
-   set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
-   set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/deps/patches/neoverse-generic-kernels.patch b/deps/patches/neoverse-generic-kernels.patch
deleted file mode 100644
index ab37e3783bf3e..0000000000000
--- a/deps/patches/neoverse-generic-kernels.patch
+++ /dev/null
@@ -1,19 +0,0 @@
-diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
-index ea010db4..074d7215 100644
---- a/kernel/arm64/KERNEL.NEOVERSEN1
-+++ b/kernel/arm64/KERNEL.NEOVERSEN1
-@@ -91,10 +91,10 @@ IDAMAXKERNEL   = iamax_thunderx2t99.c
- ICAMAXKERNEL   = izamax_thunderx2t99.c
- IZAMAXKERNEL   = izamax_thunderx2t99.c
- 
--SNRM2KERNEL    = scnrm2_thunderx2t99.c
--DNRM2KERNEL    = dznrm2_thunderx2t99.c
--CNRM2KERNEL    = scnrm2_thunderx2t99.c
--ZNRM2KERNEL    = dznrm2_thunderx2t99.c
-+SNRM2KERNEL    = nrm2.S
-+DNRM2KERNEL    = nrm2.S
-+CNRM2KERNEL    = znrm2.S
-+ZNRM2KERNEL    = znrm2.S
- 
- DDOTKERNEL     = dot_thunderx2t99.c
- SDOTKERNEL     = dot_thunderx2t99.c
diff --git a/deps/patches/openblas-ofast-power.patch b/deps/patches/openblas-ofast-power.patch
index 405e3f7581331..01089286257f7 100644
--- a/deps/patches/openblas-ofast-power.patch
+++ b/deps/patches/openblas-ofast-power.patch
@@ -1,17 +1,29 @@
 diff --git a/Makefile.power b/Makefile.power
-index 28a0bae0..b4869fbd 100644
+index aa1ca080a..42c417a78 100644
 --- a/Makefile.power
 +++ b/Makefile.power
-@@ -11,7 +11,7 @@ endif
- 
- ifeq ($(CORE), POWER10)
+@@ -13,16 +13,16 @@ ifeq ($(CORE), POWER10)
  ifneq ($(C_COMPILER), PGI)
+ ifeq ($(C_COMPILER), GCC))
+ ifeq ($(GCCVERSIONGTEQ10), 1)
 -CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
 +CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
- ifeq ($(F_COMPILER), IBM)
- FCOMMON_OPT += -O2 -qrecur -qnosave
+ else ifneq ($(GCCVERSIONGT4), 1)
+ $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
+-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
+ else
+ $(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
+-CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
+ endif
  else
-@@ -22,7 +22,7 @@ endif
+-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+ endif
+ ifeq ($(F_COMPILER), IBM)
+ FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
+@@ -34,7 +34,7 @@ endif
  
  ifeq ($(CORE), POWER9)
  ifneq ($(C_COMPILER), PGI)
@@ -20,7 +32,7 @@ index 28a0bae0..b4869fbd 100644
  ifeq ($(C_COMPILER), GCC)
  ifneq ($(GCCVERSIONGT4), 1)
  $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
-@@ -59,7 +59,7 @@ endif
+@@ -70,7 +70,7 @@ endif
  
  ifeq ($(CORE), POWER8)
  ifneq ($(C_COMPILER), PGI)
diff --git a/deps/pcre.mk b/deps/pcre.mk
index cd1180d992885..3ff85d5569ad9 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -9,6 +9,9 @@ PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 ifeq ($(OS),emscripten)
 PCRE_CFLAGS += -fPIC
 PCRE_JIT = --disable-jit
+else ifeq ($(OS),OpenBSD)
+# jit will need RWX memory
+PCRE_JIT = --disable-jit
 else
 PCRE_JIT = --enable-jit
 endif
diff --git a/deps/pcre.version b/deps/pcre.version
index ce27921435e1d..78245a5777a0c 100644
--- a/deps/pcre.version
+++ b/deps/pcre.version
@@ -1,5 +1,6 @@
+# -*- makefile -*-
 ## jll artifact
 PCRE_JLL_NAME := PCRE2
 
 ## source build
-PCRE_VER := 10.42
+PCRE_VER := 10.44
diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk
index 81db75a4ee63e..2d0f0988a39b9 100644
--- a/deps/sanitizers.mk
+++ b/deps/sanitizers.mk
@@ -6,14 +6,16 @@ SANITIZER_LIB_PATH := $(dir $(shell LANG=C $(CC) -print-file-name=libasan.so))
 endif
 
 # Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
-define pathsearch
+define pathsearch_all
 $(wildcard $(addsuffix /$(1),$(subst :, ,$(2))))
 endef
 
 define copy_sanitizer_lib
-install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
+install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
 $$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir)
 	-cp $$< $$@
+	$(if $(filter $(OS), Linux), \
+		  -$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$$$ORIGIN' $$@ , 0)
 endef
 
 ifeq ($(USECLANG),1)
diff --git a/deps/terminfo.mk b/deps/terminfo.mk
new file mode 100644
index 0000000000000..60865838a813e
--- /dev/null
+++ b/deps/terminfo.mk
@@ -0,0 +1,43 @@
+## TERMINFO-DB ##
+include $(SRCDIR)/terminfo.version
+
+$(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/JuliaBinaryWrappers/TermInfoDB_jll.jl/releases/download/$(TERMINFO_TAG)/TermInfoDB.v$(TERMINFO_VER).any.tar.gz
+	touch -c $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+	rm -rf $(dir $@)
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
+	echo 1 > $@
+
+checksum-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+	echo 1 > $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+	echo 1 > $@
+
+define TERMINFO_INSTALL
+	mkdir -p $2/$$(build_datarootdir)/julia
+	cp -R $1/terminfo $2/$$(build_datarootdir)/julia/
+endef
+$(eval $(call staged-install, \
+	terminfo,TermInfoDB-v$(TERMINFO_VER), \
+	TERMINFO_INSTALL,,,,))
+
+clean-terminfo:
+	-rm -f $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+
+distclean-terminfo:
+	rm -rf $(SRCCACHE)/TermInfoDB*.tar.gz $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER) $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)
+
+get-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+extract-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+configure-terminfo: extract-terminfo
+compile-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+fastcheck-terminfo: check-terminfo
+check-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked
diff --git a/deps/terminfo.version b/deps/terminfo.version
new file mode 100644
index 0000000000000..b7c020b830517
--- /dev/null
+++ b/deps/terminfo.version
@@ -0,0 +1,3 @@
+# -*- makefile -*-
+TERMINFO_VER := 2023.12.9
+TERMINFO_TAG := TermInfoDB-v$(TERMINFO_VER)+0
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index 3cefc253cec3d..01b57316f9d1a 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -36,8 +36,8 @@ CMAKE_COMMON += -DCMAKE_C_COMPILER_LAUNCHER=ccache
 CMAKE_COMMON += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_CC := "$$(which $(shell echo $(CC_ARG) | cut -d' ' -f1))"
 CMAKE_CXX := "$$(which $(shell echo $(CXX_ARG) | cut -d' ' -f1))"
-CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -d' ' -f2-)
-CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -d' ' -f2-)
+CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -s -d' ' -f2-)
+CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -s -d' ' -f2-)
 else
 CMAKE_CC := "$$(which $(CC_BASE))"
 CMAKE_CXX := "$$(which $(CXX_BASE))"
diff --git a/deps/tools/jlchecksum b/deps/tools/jlchecksum
index 87db805dbfab3..9945ec89e6bda 100755
--- a/deps/tools/jlchecksum
+++ b/deps/tools/jlchecksum
@@ -63,7 +63,7 @@ find_checksum()
         fi
     done
     if [ ! -f "$DEPSDIR/checksums/$BASENAME/$CHECKSUM_TYPE" ]; then
-        if [ ${TAGGED_RELEASE_BANNER:-} ]; then
+        if [ "${TAGGED_RELEASE_BANNER:-}" ]; then
             echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, failing release build." >&2
             exit 3
         fi
@@ -87,15 +87,17 @@ SHA512_PROG=""
 MD5_PROG=""
 find_checksum_progs()
 {
-    if [ ! -z $(which sha512sum) ]; then
+    if [ ! -z $(which sha512sum 2>/dev/null) ]; then
         SHA512_PROG="sha512sum $ARG1 | awk '{ print \$1; }'"
-    elif [ ! -z $(which shasum) ]; then
+    elif [ ! -z $(which shasum 2>/dev/null) ]; then
         SHA512_PROG="shasum -a 512 $ARG1 | awk '{ print \$1; }'"
+    elif [ ! -z $(which sha512 2>/dev/null) ]; then
+        SHA512_PROG="sha512 -q $ARG1"
     fi
 
-    if [ ! -z $(which md5sum) ]; then
+    if [ ! -z $(which md5sum 2>/dev/null) ]; then
         MD5_PROG="md5sum $ARG1 | awk '{ print \$1; }'"
-    elif [ ! -z $(which md5) ]; then
+    elif [ ! -z $(which md5 2>/dev/null) ]; then
         MD5_PROG="md5 -q $ARG1"
     fi
 }
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 76593df1e5ef0..c934c382a23e7 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -26,33 +26,29 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UN
 checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-prefer-extbl.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p0 -f -u -l < $(SRCDIR)/patches/libunwind-configure-static-lzma.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-static-arm.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-configure-static-lzma.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-revert_prelink_unwind.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u < $(SRCDIR)/patches/libunwind-cfa-rsp.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-aarch64-inline-asm.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-dwarf-table.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-non-empty-structs.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-disable-initial-exec-tls.patch
 	echo 1 > $@
 
 # note minidebuginfo requires liblzma, which we do not have a source build for
 # (it will be enabled in BinaryBuilder-based downloads however)
 # since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks --enable-per-thread-cache
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
@@ -89,50 +85,62 @@ check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked
 
 ## LLVM libunwind ##
 
-LLVMUNWIND_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=MinSizeRel -DLIBUNWIND_ENABLE_PEDANTIC=OFF -DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz
-
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+LLVMUNWIND_OPTS := $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) \
+	-DCMAKE_BUILD_TYPE=MinSizeRel \
+	-DLIBUNWIND_ENABLE_PEDANTIC=OFF \
+	-DLIBUNWIND_INCLUDE_DOCS=OFF \
+	-DLIBUNWIND_INCLUDE_TESTS=OFF \
+	-DLIBUNWIND_INSTALL_HEADERS=ON \
+	-DLIBUNWIND_ENABLE_ASSERTIONS=OFF \
+	-DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config \
+	-DLLVM_ENABLE_RUNTIMES="libunwind" \
+	-DLLVM_PATH=$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/llvm
+
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/llvm-project-$(LLVMUNWIND_VER).src.tar.xz
+
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) xf $<
-	mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)
+	mv $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-revert-monorepo-requirement.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
-	echo 1 > $@
-
-checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+checksum-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS)
+	$(CMAKE) $(dir $<) -S $(dir $<)/runtimes $(LLVMUNWIND_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-	$(MAKE) -C $(dir $<)
+	cd $(dir $<) && \
+	$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target unwind)
 	echo 1 > $@
 
+LIBUNWIND_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P libunwind/cmake_install.cmake
+
 $(eval $(call staged-install, \
 	llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \
-	MAKE_INSTALL,,, \
-	cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir)))
+	LIBUNWIND_INSTALL,,, \
+	cp -fR $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/* $(build_includedir)))
 
 clean-llvmunwind:
 	-rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
@@ -140,14 +148,14 @@ clean-llvmunwind:
 	-$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean
 
 distclean-llvmunwind:
-	rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \
+	rm -rf $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz \
 		$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \
 		$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)
 
-get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
-extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+get-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
+extract-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+configure-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-configured
+compile-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-compiled
 fastcheck-llvmunwind: check-llvmunwind
 check-llvmunwind: # no test/check provided by Makefile
 
diff --git a/deps/unwind.version b/deps/unwind.version
index e17b2e91c2e51..e3ed63675fd8c 100644
--- a/deps/unwind.version
+++ b/deps/unwind.version
@@ -2,5 +2,5 @@
 UNWIND_JLL_NAME := LibUnwind
 
 ## source build
-UNWIND_VER_TAG := 1.5
-UNWIND_VER := 1.5.0
+UNWIND_VER_TAG := 1.8.1
+UNWIND_VER := 1.8.1
diff --git a/deps/utf8proc.version b/deps/utf8proc.version
index 659b995e8abaf..c880d6561ce09 100644
--- a/deps/utf8proc.version
+++ b/deps/utf8proc.version
@@ -1,2 +1,2 @@
-UTF8PROC_BRANCH=v2.8.0
-UTF8PROC_SHA1=1cb28a66ca79a0845e99433fd1056257456cef8b
+UTF8PROC_BRANCH=v2.10.0
+UTF8PROC_SHA1=a1b99daa2a3393884220264c927a48ba1251a9c6
diff --git a/deps/valgrind/valgrind.h b/deps/valgrind/valgrind.h
index 2e07a49d91dfa..b33fd70fab672 100644
--- a/deps/valgrind/valgrind.h
+++ b/deps/valgrind/valgrind.h
@@ -1065,7 +1065,7 @@ typedef
 
 /* Use these to write the name of your wrapper.  NOTE: duplicates
    VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h.  NOTE also: inserts
-   the default behaviour equivalance class tag "0000" into the name.
+   the default behaviour equivalence class tag "0000" into the name.
    See pub_tool_redir.h for details -- normally you don't need to
    think about this, though. */
 
diff --git a/deps/zlib.version b/deps/zlib.version
index 89a304c49b6dc..27d862a4cc35b 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -3,6 +3,6 @@
 ZLIB_JLL_NAME := Zlib
 
 ## source build
-ZLIB_VER := 1.2.13
-ZLIB_BRANCH=v1.2.13
-ZLIB_SHA1=04f42ceca40f73e2978b50e93806c2a18c1281fc
+ZLIB_VER := 1.3.1
+ZLIB_BRANCH=v1.3.1
+ZLIB_SHA1=51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index cf50a1d41ddbd..e91958808828e 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,84 +1,233 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
+project_hash = "1e9ffa7d4739f7d125a5e2c66af8747a8effd889"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
 uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
 version = "0.0.1"
 
+[[deps.AbstractTrees]]
+git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.5"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
 [[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CodecZlib]]
+deps = ["TranscodingStreams", "Zlib_jll"]
+git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.7.6"
 
 [[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.9.1"
+version = "0.9.3"
 
 [[deps.Documenter]]
-deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "6030186b00a38e9d0434518627426570aac2ef95"
+deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"]
+git-tree-sha1 = "d0ea2c044963ed6f37703cead7e29f70cba13d7e"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.23"
+version = "1.8.0"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.Expat_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "1c6317308b9dc757616f0b5cb379db10494443a7"
+uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
+version = "2.6.2+0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Git]]
+deps = ["Git_jll"]
+git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e"
+uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
+version = "1.3.1"
+
+[[deps.Git_jll]]
+deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+git-tree-sha1 = "ea372033d09e4552a04fd38361cd019f9003f4f4"
+uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
+version = "2.46.2+0"
 
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
-git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
+git-tree-sha1 = "b6d6bfdd7ce25b0f9b2f6b3dd56b2673a66c8770"
 uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
-version = "0.2.2"
+version = "0.2.5"
 
 [[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.6.1"
 
 [[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
+git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.3"
+version = "0.21.4"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LazilyInitializedFields]]
+git-tree-sha1 = "0f2da712350b020bc3957f269c9caad516383ee0"
+uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf"
+version = "1.3.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.9.1+0"
 
 [[deps.LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "OpenSSL_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.8.4+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "OpenSSL_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.3+0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "61dfdba58e585066d8bce214c5a51eaa0539f269"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.17.0+1"
 
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
 
 [[deps.Markdown]]
-deps = ["Base64"]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MarkdownAST]]
+deps = ["AbstractTrees", "Markdown"]
+git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899"
+uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391"
+version = "0.1.2"
 
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2024.3.11"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 version = "1.2.0"
 
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "Libdl", "NetworkOptions"]
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.15+1"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.43.0+1"
+
 [[deps.Parsers]]
-deps = ["Dates"]
-git-tree-sha1 = "3d5bf43e3e8b412656404ed9466f1dcbf7c50269"
+deps = ["Dates", "PrecompileTools", "UUIDs"]
+git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.4.0"
+version = "2.8.1"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.12.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.2.1"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.4.3"
 
 [[deps.Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
 
 [[deps.Random]]
-deps = ["SHA", "Serialization"]
+deps = ["SHA"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.RegistryInstances]]
+deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"]
+git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51"
+uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3"
+version = "0.1.0"
 
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -86,13 +235,56 @@ version = "0.7.0"
 
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
 
 [[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
 
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.TranscodingStreams]]
+git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.11.3"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
 
 [[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+1"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.63.0+1"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.5.0+1"
diff --git a/doc/make.jl b/doc/make.jl
index a9343a3133a63..43d51e9936b58 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -3,11 +3,13 @@ Base.ACTIVE_PROJECT[] = nothing
 empty!(LOAD_PATH)
 push!(LOAD_PATH, @__DIR__, "@stdlib")
 empty!(DEPOT_PATH)
-pushfirst!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
+push!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
+push!(DEPOT_PATH, abspath(Sys.BINDIR, "..", "share", "julia"))
 using Pkg
 Pkg.instantiate()
 
 using Documenter
+import LibGit2
 
 baremodule GenStdLib end
 
@@ -42,6 +44,75 @@ cd(joinpath(@__DIR__, "src")) do
     end
 end
 
+# Because we have standard libraries that are hosted outside of the julia repo,
+# but their docs are included in the manual, we need to populate the remotes argument
+# of makedocs(), to make sure that Documenter knows how to resolve the directories
+# in stdlib/ to the correct remote Git repositories (for source and edit links).
+#
+# This function parses the *.version files in stdlib/, returning a dictionary with
+# all the key-value pairs from those files. *_GIT_URL and *_SHA1 fields are the ones
+# we will actually be interested in.
+function parse_stdlib_version_file(path)
+    values = Dict{String,String}()
+    for line in readlines(path)
+        m = match(r"^([A-Z0-9_]+)\s+:?=\s+(\S+)$", line)
+        if isnothing(m)
+            @warn "Unable to parse line in $(path)" line
+        else
+            values[m[1]] = m[2]
+        end
+    end
+    return values
+end
+# This generates the value that will be passed to the `remotes` argument of makedocs(),
+# by looking through all *.version files in stdlib/.
+documenter_stdlib_remotes = let stdlib_dir = realpath(joinpath(@__DIR__, "..", "stdlib"))
+    # Get a list of all *.version files in stdlib/..
+    version_files = filter(readdir(stdlib_dir)) do fname
+        isfile(joinpath(stdlib_dir, fname)) && endswith(fname, ".version")
+    end
+    # .. and then parse them, each becoming an entry for makedocs's remotes.
+    # The values for each are of the form path => (remote, sha1), where
+    #  - path: the path to the stdlib package's root directory, i.e. "stdlib/$PACKAGE"
+    #  - remote: a Documenter.Remote object, pointing to the Git repository where package is hosted
+    #  - sha1: the SHA1 of the commit that is included with the current Julia version
+    remotes_list = map(version_files) do version_fname
+        package = match(r"(.+)\.version", version_fname)[1]
+        versionfile = parse_stdlib_version_file(joinpath(stdlib_dir, version_fname))
+        # From the (all uppercase) $(package)_GIT_URL and $(package)_SHA1 fields, we'll determine
+        # the necessary information. If this logic happens to fail for some reason for any of the
+        # standard libraries, we'll crash the documentation build, so that it could be fixed.
+        remote = let git_url_key = "$(uppercase(package))_GIT_URL"
+            haskey(versionfile, git_url_key) || error("Missing $(git_url_key) in $version_fname")
+            m = match(LibGit2.GITHUB_REGEX, versionfile[git_url_key])
+            isnothing(m) && error("Unable to parse $(git_url_key)='$(versionfile[git_url_key])' in $version_fname")
+            Documenter.Remotes.GitHub(m[2], m[3])
+        end
+        package_sha = let sha_key = "$(uppercase(package))_SHA1"
+            haskey(versionfile, sha_key) || error("Missing $(sha_key) in $version_fname")
+            versionfile[sha_key]
+        end
+        # Construct the absolute (local) path to the stdlib package's root directory
+        package_root_dir = joinpath(stdlib_dir, "$(package)-$(package_sha)")
+        # Documenter needs package_root_dir to exist --- it's just a sanity check it does on the remotes= keyword.
+        # In normal (local) builds, this will be the case, since the Makefiles will have unpacked the standard
+        # libraries. However, on CI we do this thing where we actually build docs in a clean worktree, just
+        # unpacking the `usr/` directory from the main build, and the unpacked stdlibs will be missing, and this
+        # will cause Documenter to throw an error. However, we don't _actually_ need the source files of the standard
+        # libraries to be present, so we just generate empty root directories to satisfy the check in Documenter.
+        isdir(package_root_dir) || mkpath(package_root_dir)
+        package_root_dir => (remote, package_sha)
+    end
+    Dict(
+        # We also add the root of the repository to `remotes`, because we do not always build the docs in a
+        # checked out JuliaLang/julia repository. In particular, when building Julia from tarballs, there is no
+        # Git information available. And also the way the BuildKite CI is configured to check out the code means
+        # that in some circumstances the Git repository information is incorrect / no available via Git.
+        dirname(@__DIR__) => (Documenter.Remotes.GitHub("JuliaLang", "julia"), Base.GIT_VERSION_INFO.commit),
+        remotes_list...
+    )
+end
+
 # Check if we are building a PDF
 const render_pdf = "pdf" in ARGS
 
@@ -63,6 +134,7 @@ generate_markdown("NEWS")
 
 Manual = [
     "manual/getting-started.md",
+    "manual/installation.md",
     "manual/variables.md",
     "manual/integers-and-floating-point-numbers.md",
     "manual/mathematical-operations.md",
@@ -112,6 +184,7 @@ BaseDocs = [
     "base/arrays.md",
     "base/parallel.md",
     "base/multi-threading.md",
+    "base/scopedvalues.md",
     "base/constants.md",
     "base/file.md",
     "base/io-network.md",
@@ -143,6 +216,7 @@ DevDocs = [
         "devdocs/isbitsunionarrays.md",
         "devdocs/sysimg.md",
         "devdocs/pkgimg.md",
+        "devdocs/llvm-passes.md",
         "devdocs/llvm.md",
         "devdocs/stdio.md",
         "devdocs/boundscheck.md",
@@ -152,8 +226,12 @@ DevDocs = [
         "devdocs/inference.md",
         "devdocs/ssair.md",
         "devdocs/EscapeAnalysis.md",
+        "devdocs/aot.md",
         "devdocs/gc-sa.md",
         "devdocs/gc.md",
+        "devdocs/jit.md",
+        "devdocs/builtins.md",
+        "devdocs/precompile_hang.md",
     ],
     "Developing/debugging Julia's C code" => [
         "devdocs/backtraces.md",
@@ -170,6 +248,7 @@ DevDocs = [
         "devdocs/build/windows.md",
         "devdocs/build/freebsd.md",
         "devdocs/build/arm.md",
+        "devdocs/build/riscv.md",
         "devdocs/build/distributing.md",
     ]
 ]
@@ -286,6 +365,9 @@ else
         collapselevel = 1,
         sidebar_sitename = false,
         ansicolor = true,
+        size_threshold = 800 * 2^10, # 800 KiB
+        size_threshold_warn = 200 * 2^10, # the manual has quite a few large pages, so we warn at 200+ KiB only
+        inventory_version = VERSION,
     )
 end
 
@@ -297,12 +379,12 @@ makedocs(
     doctest   = ("doctest=fix" in ARGS) ? (:fix) : ("doctest=only" in ARGS) ? (:only) : ("doctest=true" in ARGS) ? true : false,
     linkcheck = "linkcheck=true" in ARGS,
     linkcheck_ignore = ["https://bugs.kde.org/show_bug.cgi?id=136779"], # fails to load from nanosoldier?
-    strict    = true,
     checkdocs = :none,
     format    = format,
     sitename  = "The Julia Language",
     authors   = "The Julia Project",
     pages     = PAGES,
+    remotes   = documenter_stdlib_remotes,
 )
 
 # Update URLs to external stdlibs (JuliaLang/julia#43199)
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index fa9f641b1e76f..2da11ae1b3f18 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -21,14 +21,14 @@
 .\" - diagnostics
 .\" - notes
 
-.TH JULIA 1 2022-02-17 JULIA
+.TH JULIA 1 2023-09-01 JULIA
 
 .\" from the front page of https://julialang.org/
 .SH NAME
 julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
-\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMMFILE] [ARGS...]
+\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMFILE] [ARGS...]
 
 If a Julia source file is given as a \fIPROGRAMFILE\fP (optionally followed by
 arguments in \fIARGS\fP) Julia will execute the program and exit.
@@ -59,7 +59,7 @@ Display version information
 
 .TP
 -h, --help
-Print help message
+Print command-line options (this message)
 
 .TP
 --help-hidden
@@ -67,7 +67,7 @@ Print uncommon options not shown by `-h`
 
 .TP
 --project[=<dir>/@.]
-Set <dir> as the home project/environment. The default @. option will search
+Set <dir> as the active project/environment. The default @. option will search
 through parent directories until a Project.toml or JuliaProject.toml file is
 found.
 
@@ -77,7 +77,7 @@ Start up with the given system image file
 
 .TP
 -H, --home <dir>
-Set location of julia executable
+Set location of `julia` executable
 
 .TP
 --startup-file={yes*|no}
@@ -93,8 +93,16 @@ Enable or disable Julia's default signal handlers
 Use native code from system image if available
 
 .TP
---compiled-modules={yes*|no}
-Enable or disable incremental precompilation of modules
+--compiled-modules={yes*|no|existing|strict}
+Enable or disable incremental precompilation of modules.
+The `existing` option allows use of existing compiled modules that were
+previously precompiled, but disallows creation of new precompile files.
+The `strict` option is similar, but will error if no precompile file is found.
+
+.TP
+--pkgimages={yes*|no|existing}
+Enable or disable usage of native code caching in the form of pkgimages
+The `existing` option allows use of existing pkgimages but disallows creation of new ones
 
 .TP
 -e, --eval <expr>
@@ -104,24 +112,30 @@ Evaluate <expr>
 -E, --print <expr>
 Evaluate <expr> and display the result
 
+.TP
+-m, --module <Package> [args]
+Run entry point of `Package` (`@main` function) with `args'
+
 .TP
 -L, --load <file>
 Load <file> immediately on all processors
 
 .TP
--t, --threads <n>
-Enable n threads; "auto" tries to infer a useful default number
-of threads to use but the exact behavior might change in the future.
-Currently, "auto" uses the number of CPUs assigned to this julia
-process based on the OS-specific affinity assignment interface, if
-supported (Linux and Windows). If this is not supported (macOS) or
-process affinity is not configured, it uses the number of CPU
-threads.
+-t, --threads {auto|N[,auto|M]}
+Enable N[+M] threads; N threads are assigned to the `default`
+threadpool, and if M is specified, M threads are assigned to the
+`interactive` threadpool; `auto` tries to infer a useful
+default number of threads to use but the exact behavior might change
+in the future. Currently sets N to the number of CPUs assigned to
+this Julia process based on the OS-specific affinity assignment
+interface if supported (Linux and Windows) or to the number of CPU
+threads if not supported (MacOS) or if process affinity is not
+configured, and sets M to 1.
 
 .TP
---gcthreads <n>
-Enable n GC threads; If unspecified is set to half of the
-compute worker threads.
+--gcthreads=N[,M]
+Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC.
+N is set to the number of compute threads and M is set to 0 if unspecified.
 
 .TP
 -p, --procs {N|auto}
@@ -133,7 +147,7 @@ as the number of local CPU threads (logical cores)
 Run processes on hosts listed in <file>
 
 .TP
--i
+-i, --interactive
 Interactive mode; REPL runs and `isinteractive()` is true
 
 .TP
@@ -141,7 +155,7 @@ Interactive mode; REPL runs and `isinteractive()` is true
 Quiet startup: no banner, suppress REPL warnings
 
 .TP
---banner={yes|no|auto*}
+--banner={yes|no|short|auto*}
 Enable or disable startup banner
 
 .TP
@@ -169,15 +183,15 @@ Enable or disable warning for ambiguous top-level scope
 Limit usage of CPU features up to <target>; set to `help` to see the available options
 
 .TP
--O, --optimize={0,1,2*,3}
+-O, --optimize={0|1|2*|3}
 Set the optimization level (level 3 if `-O` is used without a level)
 
 .TP
---min-optlevel={0*,1,2,3}
+--min-optlevel={0*|1|2|3}
 Set a lower bound on the optimization level
 
 .TP
--g {0,1*,2}
+-g, --debug-info={0|1*|2}
 Set the level of debug info generation (level 2 if `-g` is used without a level)
 
 .TP
@@ -189,8 +203,12 @@ Control whether inlining is permitted, including overriding @inline declarations
 Emit bounds checks always, never, or respect @inbounds declarations
 
 .TP
---math-mode={ieee|user}
-Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)
+--math-mode={ieee|user*}
+Always follow `ieee` floating point semantics or respect `@fastmath` declarations
+
+.TP
+--polly={yes*|no}
+Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)
 
 .TP
 --code-coverage[={none*|user|all}]
@@ -202,8 +220,8 @@ Count executions of source lines in a file or files under a given directory. A `
 be placed before the path to indicate this option. A `@` with no path will track the current directory.
 
 .TP
- --code-coverage=tracefile.info
- Append coverage information to the LCOV tracefile (filename supports format tokens)
+--code-coverage=tracefile.info
+Append coverage information to the LCOV tracefile (filename supports format tokens)
 
 .TP
 --track-allocation[={none*|user|all}]
@@ -211,8 +229,8 @@ Count bytes allocated by each source line (omitting setting is equivalent to `us
 
 .TP
 --track-allocation=@<path>
-Count bytes allocated by each source line in a file or files under a given directory. A `@`
-must be placed before the path to indicate this option. A `@` with no path will track the current directory.
+Count bytes but only in files that fall under the given file path/directory.
+The `@` prefix is required to select this option. A `@` with no path will track the current directory.
 
 .TP
 --bug-report=KIND
@@ -223,8 +241,9 @@ fallbacks to the latest compatible BugReporting.jl if not. For more information,
 
 .TP
 --heap-size-hint=<size>
-Forces garbage collection if memory usage is higher than that value. The memory hint might be
-specified in megabytes (500M) or gigabytes (1.5G)
+Forces garbage collection if memory usage is higher than the given value.
+The value may be specified as a number of bytes, optionally in units of
+KB, MB, GB, or TB, or as a percentage of physical memory with %.
 
 .TP
 --compile={yes*|no|all|min}
@@ -263,13 +282,30 @@ Generate an assembly file (.s)
 Generate an incremental output file (rather than complete)
 
 .TP
---trace-compile={stderr,name}
-Print precompile statements for methods compiled during execution or save to a path
+--trace-compile={stderr|name}
+Print precompile statements for methods compiled during execution or save to stderr or a path.
+Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported
+
+.TP
+--trace-compile-timing=
+If --trace-compile is enabled show how long each took to compile in ms
+
+.TP
+--trace-dispatch={stderr|name}
+Print precompile statements for methods dispatched during execution or save to stderr or a path.
+
+.TP
+--task-metrics={yes|no*}
+Enable the collection of per-task metrics.
 
 .TP
 -image-codegen
 Force generate code in imaging mode
 
+.TP
+--permalloc-pkgimg={yes|no*}
+Copy the data section of package images into memory
+
 .SH FILES AND ENVIRONMENT
 See https://docs.julialang.org/en/v1/manual/environment-variables/
 
@@ -277,6 +313,15 @@ See https://docs.julialang.org/en/v1/manual/environment-variables/
 Please report any bugs using the GitHub issue tracker:
 https://github.com/julialang/julia/issues?state=open
 
-
 .SH AUTHORS
 Contributors: https://github.com/JuliaLang/julia/graphs/contributors
+
+.SH INTERNET RESOURCES
+Website:  https://julialang.org/
+.br
+Documentation:  https://docs.julialang.org/
+.br
+Downloads:  https://julialang.org/downloads/
+
+.SH LICENSING
+Julia is an open-source project. It is made available under the MIT license.
diff --git a/doc/src/assets/cover.tex b/doc/src/assets/cover.tex
index 67b77e520acd3..b959477913f59 100644
--- a/doc/src/assets/cover.tex
+++ b/doc/src/assets/cover.tex
@@ -15,7 +15,7 @@
 %% ---- reset page geometry for cover page
 \newgeometry{left=2cm,right=2cm,bottom=3cm}
 % ref: memman@v3.7q, P65, "4.1. Styling the titling"
-%   http://mirrors.ctan.org/macros/latex/contrib/memoir/memman.pdf
+%   https://mirrors.ctan.org/macros/latex/contrib/memoir/memman.pdf
 \begin{titlingpage}
     % set background image
     \BgThispage
diff --git a/doc/src/assets/custom.sty b/doc/src/assets/custom.sty
index 03e6ff805cd3f..ebc11f0414945 100644
--- a/doc/src/assets/custom.sty
+++ b/doc/src/assets/custom.sty
@@ -6,7 +6,7 @@
 \usepackage{geometry}
 % "some": use \BgThispage to change background
 % ref: background@v2.1,# 2.1 Options, "pages="
-%   http://mirrors.ctan.org/macros/latex/contrib/background/background.pdf
+%   https://mirrors.ctan.org/macros/latex/contrib/background/background.pdf
 \usepackage[pages=some]{background}
 
 %% Color definitions for Julia
@@ -27,7 +27,7 @@ contents={
 %% Place the background image `title-bg' in the right place via `tikz'.
 % tikz option "remember picture", "overlay"
 % ref: pgfmanual@3.1.9a, #17.13.1 Referencing a Node in a Different Picture\
-%   http://mirrors.ctan.org/graphics/pgf/base/doc/pgfmanual.pdf
+%   https://mirrors.ctan.org/graphics/pgf/base/doc/pgfmanual.pdf
 \begin{tikzpicture}[remember picture,overlay,draw=white]
   \draw [path picture={
     % ref: pgfmanual, 15.6, "Predefined node path picture bounding box"
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index 6585f98360585..defe497daf00c 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -30,6 +30,9 @@ Base.StridedArray
 Base.StridedVector
 Base.StridedMatrix
 Base.StridedVecOrMat
+Base.GenericMemory
+Base.Memory
+Base.memoryref
 Base.Slices
 Base.RowSlices
 Base.ColumnSlices
@@ -76,6 +79,7 @@ to operate on arrays, you should use `sin.(a)` to vectorize via `broadcast`.
 Base.broadcast
 Base.Broadcast.broadcast!
 Base.@__dot__
+Base.Broadcast.BroadcastFunction
 ```
 
 For specializing broadcast on custom types, see
@@ -95,6 +99,8 @@ Base.Broadcast.result_style
 ```@docs
 Base.getindex(::AbstractArray, ::Any...)
 Base.setindex!(::AbstractArray, ::Any, ::Any...)
+Base.nextind
+Base.prevind
 Base.copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
 Base.copy!
 Base.isassigned
@@ -109,6 +115,12 @@ Base.checkindex
 Base.elsize
 ```
 
+While most code can be written in an index-agnostic manner (see, e.g., [`eachindex`](@ref)), it can sometimes be useful to explicitly check for offset axes:
+```@docs
+Base.require_one_based_indexing
+Base.has_offset_axes
+```
+
 ## Views (SubArrays and other view types)
 
 A “view” is a data structure that acts like an array (it is a subtype of `AbstractArray`), but the underlying data is actually
@@ -120,7 +132,7 @@ accessing the first 10 elements of `x`. Writing to a view, e.g. `v[3] = 2`, writ
 
 Slicing operations like `x[1:10]` create a copy by default in Julia. `@view x[1:10]` changes it to make a view. The
 `@views` macro can be used on a whole block of code (e.g. `@views function foo() .... end` or `@views begin ... end`)
-to change all the slicing operations in that block to use views.  Sometimes making a copy of the data is faster and
+to change all the slicing operations in that block to use views. Sometimes making a copy of the data is faster and
 sometimes using a view is faster, as described in the [performance tips](@ref man-performance-views).
 
 ```@docs
@@ -132,6 +144,7 @@ Base.parentindices
 Base.selectdim
 Base.reinterpret
 Base.reshape
+Base.insertdims
 Base.dropdims
 Base.vec
 Base.SubArray
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index bb72b94293c6b..e6c8ff554d494 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -4,7 +4,7 @@
 
 Julia Base contains a range of functions and macros appropriate for performing
 scientific and numerical computing, but is also as broad as those of many general purpose programming
-languages.  Additional functionality is available from a growing collection of
+languages. Additional functionality is available from a growing collection of
 [available packages](https://julialang.org/packages/).
 Functions are grouped by topic below.
 
@@ -30,10 +30,13 @@ Base.isinteractive
 Base.summarysize
 Base.__precompile__
 Base.include
-Base.MainInclude.include
+Main.include
 Base.include_string
 Base.include_dependency
 __init__
+Base.OncePerProcess
+Base.OncePerTask
+Base.OncePerThread
 Base.which(::Any, ::Any)
 Base.methods
 Base.@show
@@ -60,6 +63,7 @@ However, you can create variables with names:
 Finally:
 `where` is parsed as an infix operator for writing parametric method and type definitions;
 `in` and `isa` are parsed as infix operators;
+`public` is parsed as a keyword when beginning a toplevel statement;
 `outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop;
 and `as` is used as a keyword to rename an identifier brought into scope by `import` or `using`.
 Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, though.
@@ -67,6 +71,7 @@ Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, t
 ```@docs
 module
 export
+public
 import
 using
 as
@@ -100,6 +105,11 @@ where
 ;
 =
 ?:
+.=
+.
+->
+::
+[]
 ```
 
 ## Standard Modules
@@ -129,6 +139,7 @@ Core.:(===)
 Core.isa
 Base.isequal
 Base.isless
+Base.isunordered
 Base.ifelse
 Core.typeassert
 Core.typeof
@@ -145,6 +156,7 @@ Base.setproperty!
 Base.replaceproperty!
 Base.swapproperty!
 Base.modifyproperty!
+Base.setpropertyonce!
 Base.propertynames
 Base.hasproperty
 Core.getfield
@@ -152,9 +164,9 @@ Core.setfield!
 Core.modifyfield!
 Core.replacefield!
 Core.swapfield!
+Core.setfieldonce!
 Core.isdefined
-Core.getglobal
-Core.setglobal!
+Core.isdefinedglobal
 Base.@isdefined
 Base.convert
 Base.promote
@@ -274,6 +286,7 @@ Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
 Base.splat
+Base.Fix
 Base.Fix1
 Base.Fix2
 ```
@@ -282,7 +295,7 @@ Base.Fix2
 
 ```@docs
 Core.eval
-Base.MainInclude.eval
+Main.eval
 Base.@eval
 Base.evalfile
 Base.esc
@@ -304,7 +317,12 @@ Base.@simd
 Base.@polly
 Base.@generated
 Base.@assume_effects
+```
+
+## Managing deprecations
+```@docs
 Base.@deprecate
+Base.depwarn
 ```
 
 ## Missing Values
@@ -335,6 +353,12 @@ Base.Cmd
 Base.setenv
 Base.addenv
 Base.withenv
+Base.shell_escape
+Base.shell_split
+Base.shell_escape_posixly
+Base.shell_escape_csh
+Base.shell_escape_wincmd
+Base.escape_microsoft_c_args
 Base.setcpuaffinity
 Base.pipeline(::Any, ::Any, ::Any, ::Any...)
 Base.pipeline(::Base.AbstractCmd)
@@ -349,6 +373,7 @@ Base.@timed
 Base.@elapsed
 Base.@allocated
 Base.@allocations
+Base.@lock_conflicts
 Base.EnvDict
 Base.ENV
 Base.Sys.STDLIB
@@ -370,6 +395,9 @@ Base.Sys.uptime
 Base.Sys.isjsvm
 Base.Sys.loadavg
 Base.Sys.isexecutable
+Base.Sys.isreadable
+Base.Sys.iswritable
+Base.Sys.username
 Base.@static
 ```
 
@@ -401,6 +429,7 @@ Core.DivideError
 Core.DomainError
 Base.EOFError
 Core.ErrorException
+Core.FieldError
 Core.InexactError
 Core.InterruptException
 Base.KeyError
@@ -450,10 +479,28 @@ Base.@__DIR__
 Base.@__LINE__
 Base.fullname
 Base.names
+Base.isexported
+Base.ispublic
 Base.nameof(::Function)
 Base.functionloc(::Any, ::Any)
 Base.functionloc(::Method)
 Base.@locals
+Core.getglobal
+Core.setglobal!
+Core.modifyglobal!
+Core.swapglobal!
+Core.setglobalonce!
+Core.replaceglobal!
+```
+
+## Documentation
+(See also the [documentation](@ref man-documentation) chapter.)
+```@docs
+Base.@doc
+Docs.HTML
+Docs.Text
+Docs.hasdoc
+Docs.undocumented_names
 ```
 
 ## Code loading
@@ -464,6 +511,7 @@ Base.locate_package
 Base.require
 Base.compilecache
 Base.isprecompiled
+Base.get_extension
 ```
 
 ## Internals
@@ -474,6 +522,7 @@ Base.GC.enable
 Base.GC.@preserve
 Base.GC.safepoint
 Base.GC.enable_logging
+Base.GC.logging_enabled
 Meta.lower
 Meta.@lower
 Meta.parse(::AbstractString, ::Int)
diff --git a/doc/src/base/c.md b/doc/src/base/c.md
index e221a6432542f..bf7e2577029fe 100644
--- a/doc/src/base/c.md
+++ b/doc/src/base/c.md
@@ -14,7 +14,7 @@ Base.unsafe_modify!
 Base.unsafe_replace!
 Base.unsafe_swap!
 Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any)
-Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any)
+Base.unsafe_copyto!(::Array, ::Any, ::Array, ::Any, ::Any)
 Base.copyto!
 Base.pointer
 Base.unsafe_wrap{T,N}(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, ::Ptr{T}, ::NTuple{N,Int})
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 96f540086d021..e724930222a13 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -36,15 +36,15 @@ Fully implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `Number`
+  * [`Tuple`](@ref)
+  * [`Number`](@ref)
   * [`AbstractArray`](@ref)
   * [`BitSet`](@ref)
   * [`IdDict`](@ref)
   * [`Dict`](@ref)
   * [`WeakKeyDict`](@ref)
   * `EachLine`
-  * `AbstractString`
+  * [`AbstractString`](@ref)
   * [`Set`](@ref)
   * [`Pair`](@ref)
   * [`NamedTuple`](@ref)
@@ -64,6 +64,7 @@ Base.LinRange
 
 ```@docs
 Base.isempty
+Base.isdone
 Base.empty!
 Base.length
 Base.checked_length
@@ -73,14 +74,14 @@ Fully implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `Number`
+  * [`Tuple`](@ref)
+  * [`Number`](@ref)
   * [`AbstractArray`](@ref)
   * [`BitSet`](@ref)
   * [`IdDict`](@ref)
   * [`Dict`](@ref)
   * [`WeakKeyDict`](@ref)
-  * `AbstractString`
+  * [`AbstractString`](@ref)
   * [`Set`](@ref)
   * [`NamedTuple`](@ref)
 
@@ -89,6 +90,7 @@ Fully implemented by:
 ```@docs
 Base.in
 Base.:∉
+Base.hasfastin
 Base.eltype
 Base.indexin
 Base.unique
@@ -164,8 +166,8 @@ Partially implemented by:
 
   * [`AbstractRange`](@ref)
   * [`UnitRange`](@ref)
-  * `Tuple`
-  * `AbstractString`
+  * [`Tuple`](@ref)
+  * [`AbstractString`](@ref)
   * [`Dict`](@ref)
   * [`IdDict`](@ref)
   * [`WeakKeyDict`](@ref)
@@ -193,7 +195,7 @@ Dictionaries may also be created with generators. For example, `Dict(i => f(i) f
 
 Given a dictionary `D`, the syntax `D[x]` returns the value of key `x` (if it exists) or throws
 an error, and `D[x] = y` stores the key-value pair `x => y` in `D` (replacing any existing value
-for the key `x`).  Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
+for the key `x`). Multiple arguments to `D[...]` are converted to tuples; for example, the syntax
 `D[x,y]`  is equivalent to `D[(x,y)]`, i.e. it refers to the value keyed by the tuple `(x,y)`.
 
 ```@docs
@@ -202,6 +204,7 @@ Base.Dict
 Base.IdDict
 Base.WeakKeyDict
 Base.ImmutableDict
+Base.PersistentDict
 Base.haskey
 Base.get
 Base.get!
@@ -222,18 +225,20 @@ Base.valtype
 
 Fully implemented by:
 
-  * [`IdDict`](@ref)
   * [`Dict`](@ref)
+  * [`IdDict`](@ref)
   * [`WeakKeyDict`](@ref)
 
 Partially implemented by:
 
-  * [`BitSet`](@ref)
   * [`Set`](@ref)
+  * [`BitSet`](@ref)
+  * [`IdSet`](@ref)
   * [`EnvDict`](@ref Base.EnvDict)
   * [`Array`](@ref)
   * [`BitArray`](@ref)
   * [`ImmutableDict`](@ref Base.ImmutableDict)
+  * [`PersistentDict`](@ref Base.PersistentDict)
   * [`Iterators.Pairs`](@ref)
 
 ## Set-Like Collections
@@ -242,6 +247,7 @@ Partially implemented by:
 Base.AbstractSet
 Base.Set
 Base.BitSet
+Base.IdSet
 Base.union
 Base.union!
 Base.intersect
@@ -251,6 +257,7 @@ Base.symdiff
 Base.symdiff!
 Base.intersect!
 Base.issubset
+Base.in!
 Base.:⊈
 Base.:⊊
 Base.issetequal
@@ -259,8 +266,10 @@ Base.isdisjoint
 
 Fully implemented by:
 
-  * [`BitSet`](@ref)
   * [`Set`](@ref)
+  * [`BitSet`](@ref)
+  * [`IdSet`](@ref)
+
 
 Partially implemented by:
 
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 9a9dc5d8a72f8..300738a39322d 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -1,6 +1,8 @@
 # Filesystem
 
 ```@docs
+Base.read(::String)
+Base.write(::String, ::Any)
 Base.Filesystem.pwd
 Base.Filesystem.cd(::AbstractString)
 Base.Filesystem.cd(::Function)
@@ -27,6 +29,7 @@ Base.Filesystem.operm
 Base.Filesystem.cp
 Base.download
 Base.Filesystem.mv
+Base.Filesystem.rename
 Base.Filesystem.rm
 Base.Filesystem.touch
 Base.Filesystem.tempname
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index 4e371039f1a9b..cd3bb9fbfa7aa 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -6,10 +6,14 @@
 Base.stdout
 Base.stderr
 Base.stdin
+Base.read(::AbstractString)
+Base.write(::AbstractString, ::Any)
 Base.open
 Base.IOStream
 Base.IOBuffer
 Base.take!(::Base.GenericIOBuffer)
+Base.Pipe
+Base.link_pipe!
 Base.fdio
 Base.flush
 Base.close
@@ -35,6 +39,7 @@ Base.eof
 Base.isreadonly
 Base.iswritable
 Base.isreadable
+Base.isexecutable
 Base.isopen
 Base.fd
 Base.redirect_stdio
@@ -71,6 +76,8 @@ Base.readline
 Base.readuntil
 Base.readlines
 Base.eachline
+Base.copyline
+Base.copyuntil
 Base.displaysize
 ```
 
@@ -108,17 +115,17 @@ PNG images in a window can register this capability with Julia, so that calling
 types with PNG representations will automatically display the image using the module's window.
 
 In order to define a new display backend, one should first create a subtype `D` of the abstract
-class [`AbstractDisplay`](@ref).  Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
+class [`AbstractDisplay`](@ref). Then, for each MIME type (`mime` string) that can be displayed on `D`, one should
 define a function `display(d::D, ::MIME"mime", x) = ...` that displays `x` as that MIME type,
 usually by calling [`show(io, mime, x)`](@ref) or [`repr(io, mime, x)`](@ref).
 A [`MethodError`](@ref) should be thrown if `x` cannot be displayed
 as that MIME type; this is automatic if one calls `show` or `repr`. Finally, one should define a function
 `display(d::D, x)` that queries [`showable(mime, x)`](@ref) for the `mime` types supported by `D`
 and displays the "best" one; a `MethodError` should be thrown if no supported MIME types are found
-for `x`.  Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
+for `x`. Similarly, some subtypes may wish to override [`redisplay(d::D, ...)`](@ref Base.Multimedia.redisplay). (Again, one should
 `import Base.display` to add new methods to `display`.) The return values of these functions are
 up to the implementation (since in some cases it may be useful to return a display "handle" of
-some type).  The display functions for `D` can then be called directly, but they can also be invoked
+some type). The display functions for `D` can then be called directly, but they can also be invoked
 automatically from [`display(x)`](@ref) simply by pushing a new display onto the display-backend stack
 with:
 
diff --git a/doc/src/base/libc.md b/doc/src/base/libc.md
index 08d2670123234..b598baaa16bab 100644
--- a/doc/src/base/libc.md
+++ b/doc/src/base/libc.md
@@ -17,6 +17,9 @@ Base.Libc.time(::Base.Libc.TmStruct)
 Base.Libc.strftime
 Base.Libc.strptime
 Base.Libc.TmStruct
+Base.Libc.FILE
+Base.Libc.dup
 Base.Libc.flush_cstdio
 Base.Libc.systemsleep
+Base.Libc.mkfifo
 ```
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index 62368424629c6..4f816ce2a6c1d 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -14,10 +14,12 @@ Base.fma
 Base.muladd
 Base.inv(::Number)
 Base.div
+Base.div(::Any, ::Any, ::RoundingMode)
 Base.fld
 Base.cld
 Base.mod
 Base.rem
+Base.rem(::Any, ::Any, ::RoundingMode)
 Base.rem2pi
 Base.Math.mod2pi
 Base.divrem
@@ -37,6 +39,8 @@ Base.:(:)
 Base.range
 Base.OneTo
 Base.StepRangeLen
+Base.logrange
+Base.LogRange
 Base.:(==)
 Base.:(!=)
 Base.:(!==)
@@ -70,6 +74,7 @@ Base.Math.tand
 Base.Math.sincosd
 Base.Math.sinpi
 Base.Math.cospi
+Base.Math.tanpi
 Base.Math.sincospi
 Base.sinh(::Number)
 Base.cosh(::Number)
@@ -118,7 +123,7 @@ Base.exp10
 Base.Math.ldexp
 Base.Math.modf
 Base.expm1
-Base.round(::Type, ::Any)
+Base.round
 Base.Rounding.RoundingMode
 Base.Rounding.RoundNearest
 Base.Rounding.RoundNearestTiesAway
@@ -138,6 +143,7 @@ Base.minmax
 Base.Math.clamp
 Base.Math.clamp!
 Base.abs
+Base.Checked
 Base.Checked.checked_abs
 Base.Checked.checked_neg
 Base.Checked.checked_add
@@ -148,6 +154,7 @@ Base.Checked.checked_rem
 Base.Checked.checked_fld
 Base.Checked.checked_mod
 Base.Checked.checked_cld
+Base.Checked.checked_pow
 Base.Checked.add_with_overflow
 Base.Checked.sub_with_overflow
 Base.Checked.mul_with_overflow
@@ -158,7 +165,8 @@ Base.signbit
 Base.flipsign
 Base.sqrt(::Number)
 Base.isqrt
-Base.Math.cbrt
+Base.Math.cbrt(::AbstractFloat)
+Base.fourthroot(::Number)
 Base.real
 Base.imag
 Base.reim
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 45a60b14d541a..81d1d83d765ac 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -25,19 +25,13 @@ atomic
 Base.@atomic
 Base.@atomicswap
 Base.@atomicreplace
+Base.@atomiconce
+Base.AtomicMemory
 ```
 
-!!! note
-
-    The following APIs are fairly primitive, and will likely be exposed through an `unsafe_*`-like wrapper.
-
-```
-Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
-Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
-Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
-Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
-Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
-```
+There are also optional memory ordering parameters for the `unsafe` set of functions, that
+select the C/C++-compatible versions of these atomic operations, if that parameter is specified to
+[`unsafe_load`](@ref), [`unsafe_store!`](@ref), [`unsafe_swap!`](@ref), [`unsafe_replace!`](@ref), and [`unsafe_modify!`](@ref).
 
 !!! warning
 
@@ -71,3 +65,11 @@ These building blocks are used to create the regular synchronization objects.
 ```@docs
 Base.Threads.SpinLock
 ```
+
+## Task metrics (Experimental)
+
+```@docs
+Base.Experimental.task_metrics
+Base.Experimental.task_running_time_ns
+Base.Experimental.task_wall_time_ns
+```
diff --git a/doc/src/base/numbers.md b/doc/src/base/numbers.md
index 8167650ac17d1..aad4e94901054 100644
--- a/doc/src/base/numbers.md
+++ b/doc/src/base/numbers.md
@@ -63,6 +63,8 @@ Core.Int64
 Core.UInt64
 Core.Int128
 Core.UInt128
+Base.Int
+Base.UInt
 Base.BigInt
 Base.Complex
 Base.Rational
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index c9f24429fd0e5..cd5c95f17994a 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -13,6 +13,7 @@ Base.istaskfailed
 Base.task_local_storage(::Any)
 Base.task_local_storage(::Any, ::Any)
 Base.task_local_storage(::Function, ::Any, ::Any)
+Core.ConcurrencyViolationError
 ```
 
 ## Scheduling
@@ -30,6 +31,8 @@ Base.schedule
 Base.errormonitor
 Base.@sync
 Base.wait
+Base.waitany
+Base.waitall
 Base.fetch(t::Task)
 Base.fetch(x::Any)
 Base.timedwait
@@ -50,6 +53,8 @@ Base.unlock
 Base.trylock
 Base.islocked
 Base.ReentrantLock
+Base.@lock
+Base.Lockable
 ```
 
 ## Channels
@@ -60,7 +65,9 @@ Base.Channel
 Base.Channel(::Function)
 Base.put!(::Channel, ::Any)
 Base.take!(::Channel)
+Base.isfull(::Channel)
 Base.isready(::Channel)
+Base.isopen(::Channel)
 Base.fetch(::Channel)
 Base.close(::Channel)
 Base.bind(c::Channel, task::Task)
@@ -69,11 +76,11 @@ Base.bind(c::Channel, task::Task)
 ## [Low-level synchronization using `schedule` and `wait`](@id low-level-schedule-wait)
 
 The easiest correct use of [`schedule`](@ref) is on a `Task` that is not started (scheduled)
-yet.  However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
-low-level building block for constructing synchronization interfaces.  A crucial
+yet. However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
+low-level building block for constructing synchronization interfaces. A crucial
 pre-condition of calling `schedule(task)` is that the caller must "own" the `task`; i.e., it
 must know that the call to `wait` in the given `task` is happening at the locations known to
-the code calling `schedule(task)`.  One strategy for ensuring such pre-condition is to use
+the code calling `schedule(task)`. One strategy for ensuring such pre-condition is to use
 atomics, as demonstrated in the following example:
 
 ```jldoctest
@@ -118,8 +125,8 @@ function Base.wait(ev::OneWayEvent)
     state, ok = @atomicreplace(ev.state, OWE_EMPTY => OWE_WAITING)
     if ok
         # OWE_EMPTY -> OWE_WAITING transition means that the notifier task is guaranteed to
-        # invoke OWE_WAITING -> OWE_NOTIFYING transition.  The waiter task must call
-        # `wait()` immediately.  In particular, it MUST NOT invoke any function that may
+        # invoke OWE_WAITING -> OWE_NOTIFYING transition. The waiter task must call
+        # `wait()` immediately. In particular, it MUST NOT invoke any function that may
         # yield to the scheduler at this point in code.
         wait()
     else
@@ -132,7 +139,7 @@ end
 
 ev = OneWayEvent()
 @sync begin
-    @async begin
+    Threads.@spawn begin
         wait(ev)
         println("done")
     end
@@ -145,12 +152,12 @@ notifying...
 done
 ```
 
-`OneWayEvent` lets one task to `wait` for another task's `notify`.  It is a limited
+`OneWayEvent` lets one task to `wait` for another task's `notify`. It is a limited
 communication interface since `wait` can only be used once from a single task (note the
 non-atomic assignment of `ev.task`)
 
 In this example, `notify(ev::OneWayEvent)` is allowed to call `schedule(ev.task)` if and
-only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`.  This lets us know that
+only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`. This lets us know that
 the task executing `wait(ev::OneWayEvent)` is now in the `ok` branch and that there cannot be
 other tasks that tries to `schedule(ev.task)` since their
 `@atomicreplace(ev.state, state => OWE_NOTIFYING)` will fail.
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index dbea97e4e3cb5..8956cebd53971 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -22,6 +22,7 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | `'`         | a trailing apostrophe is the [`adjoint`](@ref) (that is, the complex transpose) operator Aᴴ |
 | [`*`](@ref) | the asterisk is used for multiplication, including matrix multiplication and [string concatenation](@ref man-concatenation) |
 | [`/`](@ref) | forward slash divides the argument on its left by the one on its right                      |
+| [`//`](@ref) | double forward slash performs exact, rational division                                     |
 | [`\`](@ref) | backslash operator divides the argument on its right by the one on its left, commonly used to solve matrix equations |
 | `()`        | parentheses with no arguments constructs an empty [`Tuple`](@ref)                           |
 | `(a,...)`   | parentheses with comma-separated arguments constructs a tuple containing its arguments      |
diff --git a/doc/src/base/reflection.md b/doc/src/base/reflection.md
index e9da82475fd68..d88c3c8b0d0cf 100644
--- a/doc/src/base/reflection.md
+++ b/doc/src/base/reflection.md
@@ -4,9 +4,9 @@ Julia provides a variety of runtime reflection capabilities.
 
 ## Module bindings
 
-The exported names for a `Module` are available using [`names(m::Module)`](@ref), which will return
-an array of [`Symbol`](@ref) elements representing the exported bindings. `names(m::Module, all = true)`
-returns symbols for all bindings in `m`, regardless of export status.
+The public names for a `Module` are available using [`names(m::Module)`](@ref), which will return
+an array of [`Symbol`](@ref) elements representing the public bindings. `names(m::Module, all = true)`
+returns symbols for all bindings in `m`, regardless of public status.
 
 ## DataType fields
 
@@ -51,9 +51,10 @@ The *direct* subtypes of any `DataType` may be listed using [`subtypes`](@ref).
 the abstract `DataType` [`AbstractFloat`](@ref) has four (concrete) subtypes:
 
 ```jldoctest; setup = :(using InteractiveUtils)
-julia> subtypes(AbstractFloat)
-4-element Vector{Any}:
+julia> InteractiveUtils.subtypes(AbstractFloat)
+5-element Vector{Any}:
  BigFloat
+ Core.BFloat16
  Float16
  Float32
  Float64
@@ -62,6 +63,9 @@ julia> subtypes(AbstractFloat)
 Any abstract subtype will also be included in this list, but further subtypes thereof will not;
 recursive application of [`subtypes`](@ref) may be used to inspect the full type tree.
 
+Note that [`subtypes`](@ref) is located inside [`InteractiveUtils`](@ref man-interactive-utils) but
+is automatically exported when using the REPL.
+
 ## DataType layout
 
 The internal representation of a `DataType` is critically important when interfacing with C code
@@ -82,7 +86,7 @@ the unquoted and interpolated expression ([`Expr`](@ref)) form for a given macro
 be passed instead!). For example:
 
 ```jldoctest; setup = :(using InteractiveUtils)
-julia> macroexpand(@__MODULE__, :(@edit println("")) )
+julia> InteractiveUtils.macroexpand(@__MODULE__, :(@edit println("")) )
 :(InteractiveUtils.edit(println, (Base.typesof)("")))
 ```
 
@@ -93,14 +97,15 @@ Finally, the [`Meta.lower`](@ref) function gives the `lowered` form of any expre
 particular interest for understanding how language constructs map to primitive operations such
 as assignments, branches, and calls:
 
-```jldoctest
+```jldoctest; setup = (using Base: +, sin)
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
-    @ none within `top-level scope`
-1 ─ %1 = 1 + 2
-│   %2 = sin(0.5)
-│   %3 = Base.vect(%1, %2)
-└──      return %3
+1 ─ %1 = :+
+│   %2 =   dynamic (%1)(1, 2)
+│   %3 = sin
+│   %4 =   dynamic (%3)(0.5)
+│   %5 =   dynamic Base.vect(%2, %4)
+└──      return %5
 ))))
 ```
 
@@ -139,11 +144,11 @@ For more information see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@code
 The aforementioned functions and macros take the keyword argument `debuginfo` that controls the level
 debug information printed.
 
-```julia-repl
-julia> @code_typed debuginfo=:source +(1,1)
+```jldoctest; setup = :(using InteractiveUtils), filter = r"int.jl:\d+"
+julia> InteractiveUtils.@code_typed debuginfo=:source +(1,1)
 CodeInfo(
-    @ int.jl:53 within `+'
-1 ─ %1 = Base.add_int(x, y)::Int64
+    @ int.jl:87 within `+`
+1 ─ %1 = intrinsic Base.add_int(x, y)::Int64
 └──      return %1
 ) => Int64
 ```
diff --git a/doc/src/base/scopedvalues.md b/doc/src/base/scopedvalues.md
new file mode 100644
index 0000000000000..6ad553429bb1f
--- /dev/null
+++ b/doc/src/base/scopedvalues.md
@@ -0,0 +1,317 @@
+# [Scoped Values](@id scoped-values)
+
+Scoped values provide an implementation of dynamic scoping in Julia.
+
+!!! note "Lexical scoping vs dynamic scoping"
+    [Lexical scoping](@ref scope-of-variables) is the default behavior in Julia.
+    Under lexical scoping the scope of a variable is determined by the lexical
+    (textual) structure of a program.
+    Under dynamic scoping a variable is bound to the most recent assigned value
+    during the program's execution.
+
+The state of a scoped value is dependent on the execution path of the program.
+This means that for a scoped value you may observe multiple different values
+concurrently.
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+
+In its simplest form you can create a [`ScopedValue`](@ref Base.ScopedValues.ScopedValue)
+with a default value and then use [`with`](@ref Base.ScopedValues.with) or
+[`@with`](@ref Base.ScopedValues.@with) to enter a new dynamic scope. The new scope will
+inherit all values from the parent scope (and recursively from all outer scopes) with the
+provided scoped value taking priority over previous definitions.
+
+Let's first look at an example of **lexical** scope. A `let` statement begins
+a new lexical scope within which the outer definition of `x` is shadowed by
+it's inner definition.
+
+```julia
+x = 1
+let x = 5
+    @show x # 5
+end
+@show x # 1
+```
+
+In the following example, since Julia uses lexical scope, the variable `x` in the body
+of `f` refers to the `x` defined in the global scope, and entering a `let` scope does
+not change the value `f` observes.
+
+```julia
+x = 1
+f() = @show x
+let x = 5
+    f() # 1
+end
+f() # 1
+```
+
+Now using a `ScopedValue` we can use **dynamic** scoping.
+
+```julia
+using Base.ScopedValues
+
+x = ScopedValue(1)
+f() = @show x[]
+with(x=>5) do
+    f() # 5
+end
+f() # 1
+```
+
+Note that the observed value of the `ScopedValue` is dependent on the execution
+path of the program.
+
+It often makes sense to use a `const` variable to point to a scoped value,
+and you can set the value of multiple `ScopedValue`s with one call to `with`.
+
+
+```julia
+using Base.ScopedValues
+
+f() = @show a[]
+g() = @show b[]
+
+const a = ScopedValue(1)
+const b = ScopedValue(2)
+
+f() # a[] = 1
+g() # b[] = 2
+
+# Enter a new dynamic scope and set value.
+with(a => 3) do
+    f() # a[] = 3
+    g() # b[] = 2
+    with(a => 4, b => 5) do
+        f() # a[] = 4
+        g() # b[] = 5
+    end
+    f() # a[] = 3
+    g() # b[] = 2
+end
+
+f() # a[] = 1
+g() # b[] = 2
+```
+
+`ScopedValues` provides a macro version of `with`. The expression `@with var=>val expr`
+evaluates `expr` in a new dynamic scope with `var` set to `val`. `@with var=>val expr`
+is equivalent to `with(var=>val) do expr end`. However, `with` requires a zero-argument
+closure or function, which results in an extra call-frame. As an example, consider the
+following function `f`:
+
+```julia
+using Base.ScopedValues
+const a = ScopedValue(1)
+f(x) = a[] + x
+```
+
+If you wish to run `f` in a dynamic scope with `a` set to `2`, then you can use `with`:
+
+```julia
+with(() -> f(10), a=>2)
+```
+
+However, this requires wrapping `f` in a zero-argument function. If you wish to avoid
+the extra call-frame, then you can use the `@with` macro:
+
+```julia
+@with a=>2 f(10)
+```
+
+!!! note
+    Dynamic scopes are inherited by [`Task`](@ref)s, at the moment of task creation. Dynamic scopes are **not** propagated through `Distributed.jl` operations.
+
+In the example below we open a new dynamic scope before launching a task.
+The parent task and the two child tasks observe independent values of the
+same scoped value at the same time.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const scoped_val = ScopedValue(1)
+@sync begin
+    with(scoped_val => 2)
+        @spawn @show scoped_val[] # 2
+    end
+    with(scoped_val => 3)
+        @spawn @show scoped_val[] # 3
+    end
+    @show scoped_val[] # 1
+end
+```
+
+Scoped values are constant throughout a scope, but you can store mutable
+state in a scoped value. Just keep in mind that the usual caveats
+for global variables apply in the context of concurrent programming.
+
+Care is also required when storing references to mutable state in scoped
+values. You might want to explicitly [unshare mutable state](@ref unshare_mutable_state)
+when entering a new dynamic scope.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const sval_dict = ScopedValue(Dict())
+
+# Example of using a mutable value wrongly
+@sync begin
+    # `Dict` is not thread-safe the usage below is invalid
+    @spawn (sval_dict[][:a] = 3)
+    @spawn (sval_dict[][:b] = 3)
+end
+
+@sync begin
+    # If we instead pass a unique dictionary to each
+    # task we can access the dictionaries race free.
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:a] = 3)
+    end
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:b] = 3)
+    end
+end
+```
+
+## Example
+
+In the example below we use a scoped value to implement a permission check in
+a web-application. After determining the permissions of the request,
+a new dynamic scope is entered and the scoped value `LEVEL` is set.
+Other parts of the application can query the scoped value and will receive
+the appropriate value. Other alternatives like task-local storage and global variables
+are not well suited for this kind of propagation; our only alternative would have
+been to thread a value through the entire call-chain.
+
+```julia
+using Base.ScopedValues
+
+const LEVEL = ScopedValue(:GUEST)
+
+function serve(request, response)
+    level = isAdmin(request) ? :ADMIN : :GUEST
+    with(LEVEL => level) do
+        Threads.@spawn handle(request, response)
+    end
+end
+
+function open(connection::Database)
+    level = LEVEL[]
+    if level !== :ADMIN
+        error("Access disallowed")
+    end
+    # ... open connection
+end
+
+function handle(request, response)
+    # ...
+    open(Database(#=...=#))
+    # ...
+end
+```
+
+## Idioms
+### [Unshare mutable state](@id unshare_mutable_state)
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+const sval_dict = ScopedValue(Dict())
+
+# If you want to add new values to the dict, instead of replacing
+# it, unshare the values explicitly. In this example we use `merge`
+# to unshare the state of the dictionary in parent scope.
+@sync begin
+    with(sval_dict => merge(sval_dict[], Dict(:a => 10))) do
+        @spawn @show sval_dict[][:a]
+    end
+    @spawn sval_dict[][:a] = 3 # Not a race since they are unshared.
+end
+```
+
+### Scoped values as globals
+
+In order to access the value of a scoped value, the scoped value itself has to
+be in (lexical) scope. This means most often you likely want to use scoped values
+as constant globals.
+
+```julia
+using Base.ScopedValues
+const sval = ScopedValue(1)
+```
+
+Indeed one can think of scoped values as hidden function arguments.
+
+This does not preclude their use as non-globals.
+
+```julia
+using Base.ScopedValues
+import Base.Threads: @spawn
+
+function main()
+    role = ScopedValue(:client)
+
+    function launch()
+        #...
+        role[]
+    end
+
+    @with role => :server @spawn launch()
+    launch()
+end
+```
+
+But it might have been simpler to just directly pass the function argument
+in these cases.
+
+### Very many ScopedValues
+
+If you find yourself creating many `ScopedValue`'s for one given module,
+it may be better to use a dedicated struct to hold them.
+
+```julia
+using Base.ScopedValues
+
+Base.@kwdef struct Configuration
+    color::Bool = false
+    verbose::Bool = false
+end
+
+const CONFIG = ScopedValue(Configuration(color=true))
+
+@with CONFIG => Configuration(color=CONFIG[].color, verbose=true) begin
+    @show CONFIG[].color # true
+    @show CONFIG[].verbose # true
+end
+```
+
+## API docs
+
+```@docs
+Base.ScopedValues.ScopedValue
+Base.ScopedValues.with
+Base.ScopedValues.@with
+Base.isassigned(::Base.ScopedValues.ScopedValue)
+Base.ScopedValues.get
+```
+
+## Implementation notes and performance
+
+`Scope`s use a persistent dictionary. Lookup and insertion is `O(log(32, n))`,
+upon dynamic scope entry a small amount of data is copied and the unchanged
+data is shared among other scopes.
+
+The `Scope` object itself is not user-facing and may be changed in a future
+version of Julia.
+
+## Design inspiration
+
+This design was heavily inspired by [JEPS-429](https://openjdk.org/jeps/429),
+which in turn was inspired by dynamically scoped free variables in many Lisp dialects. In particular Interlisp-D and its deep binding strategy.
+
+A prior design discussed was context variables ala [PEPS-567](https://peps.python.org/pep-0567/) and implemented in Julia as [ContextVariablesX.jl](https://github.com/tkf/ContextVariablesX.jl).
diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md
index 16e1839cf64a2..cef080c5f8995 100644
--- a/doc/src/base/sort.md
+++ b/doc/src/base/sort.md
@@ -1,7 +1,7 @@
 # Sorting and Related Functions
 
-Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays of
-values. By default, Julia picks reasonable algorithms and sorts in standard ascending order:
+Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays
+of values. By default, Julia picks reasonable algorithms and sorts in ascending order:
 
 ```jldoctest
 julia> sort([2,3,1])
@@ -11,7 +11,7 @@ julia> sort([2,3,1])
  3
 ```
 
-You can easily sort in reverse order as well:
+You can sort in reverse order as well:
 
 ```jldoctest
 julia> sort([2,3,1], rev=true)
@@ -36,12 +36,12 @@ julia> a
  3
 ```
 
-Instead of directly sorting an array, you can compute a permutation of the array's indices that
-puts the array into sorted order:
+Instead of directly sorting an array, you can compute a permutation of the array's
+indices that puts the array into sorted order:
 
 ```julia-repl
 julia> v = randn(5)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
   0.297288
   0.382396
  -0.597634
@@ -49,7 +49,7 @@ julia> v = randn(5)
  -0.839027
 
 julia> p = sortperm(v)
-5-element Array{Int64,1}:
+5-element Vector{Int64}:
  5
  3
  4
@@ -57,7 +57,7 @@ julia> p = sortperm(v)
  2
 
 julia> v[p]
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
  -0.0104452
@@ -65,11 +65,11 @@ julia> v[p]
   0.382396
 ```
 
-Arrays can easily be sorted according to an arbitrary transformation of their values:
+Arrays can be sorted according to an arbitrary transformation of their values:
 
 ```julia-repl
 julia> sort(v, by=abs)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.0104452
   0.297288
   0.382396
@@ -81,7 +81,7 @@ Or in reverse order by a transformation:
 
 ```julia-repl
 julia> sort(v, by=abs, rev=true)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
   0.382396
@@ -93,7 +93,7 @@ If needed, the sorting algorithm can be chosen:
 
 ```julia-repl
 julia> sort(v, alg=InsertionSort)
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.839027
  -0.597634
  -0.0104452
@@ -101,9 +101,12 @@ julia> sort(v, alg=InsertionSort)
   0.382396
 ```
 
-All the sorting and order related functions rely on a "less than" relation defining a total order
-on the values to be manipulated. The `isless` function is invoked by default, but the relation
-can be specified via the `lt` keyword.
+All the sorting and order related functions rely on a "less than" relation defining a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings)
+on the values to be manipulated. The `isless` function is invoked by default, but the
+relation can be specified via the `lt` keyword, a function that takes two array elements
+and returns `true` if and only if the first argument is "less than" the second. See
+[`sort!`](@ref) and [Alternate Orderings](@ref) for more information.
 
 ## Sorting Functions
 
@@ -163,24 +166,19 @@ Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = Inline
     be stable since Julia 1.9. Previous versions had unstable edge cases when
     sorting numeric arrays.
 
-## Alternate orderings
+## Alternate Orderings
 
-By default, `sort` and related functions use [`isless`](@ref) to compare two
-elements in order to determine which should come first. The
-[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining
-alternate orderings on the same set of elements: when calling a sorting function like
-`sort`, an instance of `Ordering` can be provided with the keyword argument `order`.
+By default, `sort`, `searchsorted`, and related functions use [`isless`](@ref) to compare
+two elements in order to determine which should come first. The
+[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining alternate
+orderings on the same set of elements: when calling a sorting function like
+`sort!`, an instance of `Ordering` can be provided with the keyword argument `order`.
 
-Instances of `Ordering` define a [total order](https://en.wikipedia.org/wiki/Total_order)
-on a set of elements, so that for any elements `a`, `b`, `c` the following hold:
-
-* Exactly one of the following is true: `a` is less than `b`, `b` is less than
-  `a`, or `a` and `b` are equal (according to [`isequal`](@ref)).
-* The relation is transitive - if `a` is less than `b` and `b` is less than `c`
-  then `a` is less than `c`.
-
-The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to
-test whether `a` is less than `b` according to a given order.
+Instances of `Ordering` define an order through the [`Base.Order.lt`](@ref)
+function, which works as a generalization of `isless`.
+This function's behavior on custom `Ordering`s must satisfy all the conditions of a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings).
+See [`sort!`](@ref) for details and examples of valid and invalid `lt` functions.
 
 ```@docs
 Base.Order.Ordering
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index 263c0019788c3..a9637a1a7be3a 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -43,6 +43,9 @@ Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
 Base.lpad
 Base.rpad
+Base.ltruncate
+Base.rtruncate
+Base.ctruncate
 Base.findfirst(::AbstractString, ::AbstractString)
 Base.findnext(::AbstractString, ::AbstractString, ::Integer)
 Base.findnext(::AbstractChar, ::AbstractString, ::Integer)
@@ -51,8 +54,9 @@ Base.findlast(::AbstractChar, ::AbstractString)
 Base.findprev(::AbstractString, ::AbstractString, ::Integer)
 Base.occursin
 Base.reverse(::Union{String,SubString{String}})
-Base.replace(s::AbstractString, ::Pair...)
+Base.replace(::IO, s::AbstractString, ::Pair...)
 Base.eachsplit
+Base.eachrsplit
 Base.split
 Base.rsplit
 Base.strip
@@ -74,8 +78,8 @@ Base.chopprefix
 Base.chopsuffix
 Base.chomp
 Base.thisind
-Base.nextind
-Base.prevind
+Base.nextind(::AbstractString, ::Integer, ::Integer)
+Base.prevind(::AbstractString, ::Integer, ::Integer)
 Base.textwidth
 Base.isascii
 Base.iscntrl
@@ -89,5 +93,20 @@ Base.isspace
 Base.isuppercase
 Base.isxdigit
 Base.escape_string
+Base.escape_raw_string
 Base.unescape_string
 ```
+
+## `AnnotatedString`s
+
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
+```@docs
+Base.AnnotatedString
+Base.AnnotatedChar
+Base.annotatedstring
+Base.annotations
+Base.annotate!
+```
diff --git a/doc/src/devdocs/EscapeAnalysis.md b/doc/src/devdocs/EscapeAnalysis.md
index 983a6782ccc79..d8efd759fa131 100644
--- a/doc/src/devdocs/EscapeAnalysis.md
+++ b/doc/src/devdocs/EscapeAnalysis.md
@@ -1,6 +1,6 @@
 # `EscapeAnalysis`
 
-`Core.Compiler.EscapeAnalysis` is a compiler utility module that aims to analyze
+`Compiler.EscapeAnalysis` is a compiler utility module that aims to analyze
 escape information of [Julia's SSA-form IR](@ref Julia-SSA-form-IR) a.k.a. `IRCode`.
 
 This escape analysis aims to:
@@ -18,9 +18,14 @@ This escape analysis aims to:
 ## Try it out!
 
 You can give a try to the escape analysis by loading the `EAUtils.jl` utility script that
-define the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
+defines the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
 ```@repl EAUtils
-include(normpath(Sys.BINDIR, "..", "share", "julia", "test", "compiler", "EscapeAnalysis", "EAUtils.jl")); using .EAUtils
+# InteractiveUtils.@activate Compiler # to use the stdlib version of the Compiler
+
+let JULIA_DIR = normpath(Sys.BINDIR, "..", "share", "julia")
+    include(normpath(JULIA_DIR, "Compiler", "test", "EAUtils.jl"))
+    using .EAUtils
+end
 
 mutable struct SafeRef{T}
     x::T
@@ -30,29 +35,27 @@ Base.setindex!(x::SafeRef, v) = x.x = v;
 Base.isassigned(x::SafeRef) = true;
 get′(x) = isassigned(x) ? x[] : throw(x);
 
-result = code_escapes((String,String,String,String)) do s1, s2, s3, s4
-    r1 = Ref(s1)
+result = code_escapes((Base.RefValue{String},String,String,)) do r1, s2, s3
     r2 = Ref(s2)
     r3 = SafeRef(s3)
     try
         s1 = get′(r1)
         ret = sizeof(s1)
     catch err
-        global GV = err # will definitely escape `r1`
+        global GV = err # `r1` may escape
     end
-    s2 = get′(r2)       # still `r2` doesn't escape fully
-    s3 = get′(r3)       # still `r3` doesn't escape fully
-    s4 = sizeof(s4)     # the argument `s4` doesn't escape here
+    s2 = get′(r2)       # `r2` doesn't escape
+    s3 = get′(r3)       # `r3` doesn't escape
     return s2, s3, s4
 end
 ```
 
-The symbols in the side of each call argument and SSA statements represents the following meaning:
+The symbols on the side of each call argument and SSA statements represent the following meaning:
 - `◌` (plain): this value is not analyzed because escape information of it won't be used anyway (when the object is `isbitstype` for example)
 - `✓` (green or cyan): this value never escapes (`has_no_escape(result.state[x])` holds), colored blue if it has arg escape also (`has_arg_escape(result.state[x])` holds)
 - `↑` (blue or yellow): this value can escape to the caller via return (`has_return_escape(result.state[x])` holds), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
 - `X` (red): this value can escape to somewhere the escape analysis can't reason about like escapes to a global memory (`has_all_escape(result.state[x])` holds)
-- `*` (bold): this value's escape state is between the `ReturnEscape` and `AllEscape` in the partial order of [`EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
+- `*` (bold): this value's escape state is between the `ReturnEscape` and `AllEscape` in the partial order of [`EscapeInfo`](@ref Base.Compiler.EscapeAnalysis.EscapeInfo), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
 - `′`: this value has additional object field / array element information in its `AliasInfo` property
 
 Escape information of each call argument and SSA value can be inspected programmatically as like:
@@ -67,7 +70,7 @@ result.state[Core.SSAValue(3)] # get EscapeInfo of `r3`
 ### Lattice Design
 
 `EscapeAnalysis` is implemented as a [data-flow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis)
-that works on a lattice of [`x::EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo),
+that works on a lattice of [`x::EscapeInfo`](@ref Base.Compiler.EscapeAnalysis.EscapeInfo),
 which is composed of the following properties:
 - `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
 - `x.ReturnEscape::BitSet`: records SSA statements where `x` can escape to the caller via return
@@ -98,10 +101,10 @@ One distinctive design of this escape analysis is that it is fully _backward_,
 i.e. escape information flows _from usages to definitions_.
 For example, in the code snippet below, EA first analyzes the statement `return %1` and
 imposes `ReturnEscape` on `%1` (corresponding to `obj`), and then it analyzes
-`%1 = %new(Base.RefValue{String, _2}))` and propagates the `ReturnEscape` imposed on `%1`
-to the call argument `_2` (corresponding to `s`):
+`%1 = %new(Base.RefValue{Base.RefValue{String}, _2}))` and propagates the `ReturnEscape`
+imposed on `%1` to the call argument `_2` (corresponding to `s`):
 ```@repl EAUtils
-code_escapes((String,)) do s
+code_escapes((Base.RefValue{String},)) do s
     obj = Ref(s)
     return obj
 end
@@ -113,7 +116,7 @@ As a result this scheme enables a simple implementation of escape analysis,
 e.g. `PhiNode` for example can be handled simply by propagating escape information
 imposed on a `PhiNode` to its predecessor values:
 ```@repl EAUtils
-code_escapes((Bool, String, String)) do cnd, s, t
+code_escapes((Bool, Base.RefValue{String}, Base.RefValue{String})) do cnd, s, t
     if cnd
         obj = Ref(s)
     else
@@ -358,14 +361,10 @@ non-inlined callees that has been derived by previous `IPO EA`.
 More interestingly, it is also valid to use `IPO EA` escape information for type inference,
 e.g., inference accuracy can be improved by forming `Const`/`PartialStruct`/`MustAlias` of mutable object.
 
-Since the computational cost of `analyze_escapes` is not that cheap,
-both `IPO EA` and `Local EA` are better to run only when there is any profitability.
-Currently `EscapeAnalysis` provides the `is_ipo_profitable` heuristic to check a profitability of `IPO EA`.
 ```@docs
-Core.Compiler.EscapeAnalysis.analyze_escapes
-Core.Compiler.EscapeAnalysis.EscapeState
-Core.Compiler.EscapeAnalysis.EscapeInfo
-Core.Compiler.EscapeAnalysis.is_ipo_profitable
+Base.Compiler.EscapeAnalysis.analyze_escapes
+Base.Compiler.EscapeAnalysis.EscapeState
+Base.Compiler.EscapeAnalysis.EscapeInfo
 ```
 
 --------------------------------------------------------------------------------------------
diff --git a/doc/src/devdocs/aot.md b/doc/src/devdocs/aot.md
new file mode 100644
index 0000000000000..cdaf1880ab927
--- /dev/null
+++ b/doc/src/devdocs/aot.md
@@ -0,0 +1,76 @@
+# Ahead of Time Compilation
+
+This document describes the design and structure of the ahead-of-time (AOT) compilation system in Julia. This system is used when generating system images and package images. Much of the implementation described here is located in `aotcompile.cpp`, `staticdata.c`, and `processor.cpp`
+
+## Introduction
+
+Though Julia normally compiles code just-in-time (JIT), it is possible to compile code ahead of time and save the resulting code to a file. This can be useful for a number of reasons:
+1. To reduce the time it takes to start a Julia process.
+2. To reduce the time spent in the JIT compiler instead of executing code (time to first execution, TTFX).
+3. To reduce the amount of memory used by the JIT compiler.
+
+## High-Level Overview
+
+The following descriptions are a snapshot of the current implementation details of the end-to-end pipeline that happens internally when the user compiles a new AOT module, such as occurs when they type `using Foo`. These details are likely to change over time as we implement better ways to handle them, so current implementations may not exactly match the dataflow and functions described below.
+
+### Compiling Code Images
+
+Firstly, the methods that need to be compiled to native code must be identified. This can only be done by actually executing the code to be compiled, as the set of methods that need to be compiled depends on the types of the arguments passed to the methods, and method invocations with certain combinations of types may not be known until runtime. During this process, the exact methods that the compiler sees are tracked for later compilation, producing a compilation trace.
+
+!!! note
+
+    Currently when compiling images, Julia runs the trace generation in a different process than the process performing the AOT compilation. This can have impacts when attempting to use a debugger during precompilation. The best way to debug precompilation with a debugger is to use the rr debugger, record the entire process tree, use `rr ps` to identify the relevant failing process, and then use `rr replay -p PID` to replay just the failing process.
+
+Once the methods to be compiled have been identified, they are passed to the `jl_create_system_image` function. This function sets up a number of data structures that will be used when serializing native code to a file, and then calls `jl_create_native` with the array of methods. `jl_create_native` runs codegen on the methods produces one or more LLVM modules. `jl_create_system_image` then records some useful information about what codegen produced from the module(s).
+
+The module(s) are then passed to `jl_dump_native`, along with the information recorded by `jl_create_system_image`. `jl_dump_native` contains the code necessary to serialize the module(s) to bitcode, object, or assembly files depending on the command-line options passed to Julia. The serialized code and information are then written to a file as an archive.
+
+The final step is to run a system linker on the object files in the archive produced by `jl_dump_native`. Once this step is complete, a shared library containing the compiled code is produced.
+
+### Loading Code Images
+
+When loading a code image, the shared library produced by the linker is loaded into memory. The system image data is then loaded from the shared library. This data contains information about the types, methods, and code instances that were compiled into the shared library. This data is used to restore the state of the runtime to what it was when the code image was compiled.
+
+If the code image was compiled with multiversioning, the loader will pick the appropriate version of each function to use based on the CPU features available on the current machine.
+
+For system images, since no other code has been loaded, the state of the runtime is now the same as it was when the code image was compiled. For package images, the environment may have changed compared to when the code was compiled, so each method must be checked against the global method table to determine if it is still valid code.
+
+## Compiling Methods
+
+### Tracing Compiled Methods
+
+Julia has a command-line flag to record all of the methods that are compiled by the JIT compiler, `--trace-compile=filename`. When a function is compiled and this flag has a filename, Julia will print out a precompile statement to that file with the method and argument types it was called with. This therefore generates a precompile script that can be used later in the AOT compilation process. The [PrecompileTools](https://julialang.github.io/PrecompileTools.jl/stable/) package has tooling that can make taking advantage of this functionality easier for package developers.
+
+### `jl_create_system_image`
+
+`jl_create_system_image` saves all of the Julia-specific metadata necessary to later restore the state of the runtime. This includes data such as code instances, method instances, method tables, and type information. This function also sets up the data structures necessary to serialize the native code to a file. Finally, it calls `jl_create_native` to create one or more LLVM modules containing the native code for the methods passed to it. `jl_create_native` is responsible for running codegen on the methods passed to it.
+
+### `jl_dump_native`
+
+`jl_dump_native` is responsible for serializing the LLVM module containing the native code to a file. In addition to the module, the system image data produced by `jl_create_system_image` is compiled as a global variable. The output of this method is bitcode, object, and/or assembly archives containing the code and system image data.
+
+`jl_dump_native` is typically one of the larger time sinks when emitting native code, with much of the time spent in optimizing LLVM IR and emitting machine code. Therefore, this function is capable of multithreading the optimization and machine code emission steps. This multithreading is parameterized on the size of the module, but can be explicitly overridden by setting the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable. The default maximum number of threads is half the number of available threads, but setting it to be lower can reduce peak memory usage during compilation.
+
+`jl_dump_native` can also produce native code optimized for multiple architectures, when integrated with the Julia loader. This is triggered by setting the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable and mediated by the multiversioning pass in the optimization pipeline. To make this work with multithreading, an annotation step is added before the module is split into submodules that are emitted on their own threads, and this annotation step uses information available throughout the entire module to decide what functions are cloned for different architectures. Once the annotation has happened, individual threads can emit code for different architectures in parallel, knowing that a different submodule is guaranteed to produce the necessary functions that will be called by a cloned function.
+
+Some other metadata about how the module was serialized is also stored in the archive, such as the number of threads used to serialize the module and the number of functions that were compiled.
+
+### Static Linking
+
+The final step in the AOT compilation process is to run a linker on the object files in the archive produced by `jl_dump_native`. This produces a shared library containing the compiled code. This shared library can then be loaded by Julia to restore the state of the runtime. When compiling a system image, the native linker used by a C compiler is used to produce the final shared library. For package images, the LLVM linker LLD is used to provide a more consistent linking interface.
+
+## Loading Code Images
+
+### Loading the Shared Library
+
+The first step in loading a code image is to load the shared library produced by the linker. This is done by calling `jl_dlopen` on the path to the shared library. This function is responsible for loading the shared library and resolving all of the symbols in the library.
+
+### Loading Native Code
+
+The loader first needs to identify whether the native code that was compiled is valid for the architecture that the loader is running on. This is necessary to avoid executing instructions that older CPUs do not recognize. This is done by checking the CPU features available on the current machine against the CPU features that the code was compiled for. When multiversioning is enabled, the loader will pick the appropriate version of each function to use based on the CPU features available on the current machine. If none of the feature sets that were multiversioned, the loader will throw an error.
+
+Part of the multiversioning pass creates a number of global arrays of all of the functions in the module. When this process is multithreaded, an array of arrays is created, which the loader reorganizes into one large array with all of the functions that were compiled for this architecture. A similar process occurs for the global variables in the module.
+
+### Setting Up Julia State
+
+The loader then uses the global variables and functions produced from loading native code to set up Julia runtime core data structures in the current process. This setup involves adding types and methods to the Julia runtime, and making the cached native code available for use by other Julia functions and the interpreter. For package images, each method must be validated, in that the global method table's state must match the state that the package image was compiled for. In particular, if a different set of methods exists at the load time compared to compile time of the package image, the method must be invalidated and recompiled on first use. This is necessary to ensure that execution semantics remain the same regardless of if a package was precompiled or if the code was directly executed. System images do not need to perform this validation, since the global method table is empty at load time. Thus, system images have faster load times than package images.
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index 1a11a5918d091..fe63dfe35edac 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -254,11 +254,6 @@ types exist in lowered form:
     Identifies arguments and local variables by consecutive numbering. It has an
     integer-valued `id` field giving the slot index.
     The types of these slots can be found in the `slottypes` field of their `CodeInfo` object.
-    When a slot has different types at different uses and thus requires per-use type annotations,
-    they are converted to temporary `Core.Compiler.TypedSlot` object. This object has an
-    additional `typ` field as well as the `id` field. Note that `Core.Compiler.TypedSlot`
-    only appears in an unoptimized lowered form that is scheduled for optimization,
-    and it never appears elsewhere.
 
   * `Argument`
 
@@ -421,7 +416,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
   * `new`
 
     Allocates a new struct-like object. First argument is the type. The [`new`](@ref) pseudo-function is lowered
-    to this, and the type is always inserted by the compiler.  This is very much an internal-only
+    to this, and the type is always inserted by the compiler. This is very much an internal-only
     feature, and does no checking. Evaluating arbitrary `new` expressions can easily segfault.
 
   * `splatnew`
@@ -436,12 +431,12 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
   * `the_exception`
 
-    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
+    Yields the caught exception inside a `catch` block, as returned by `jl_current_exception(ct)`.
 
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
-    error.  Yields a token which is consumed by `pop_exception`.
+    error. Yields a token which is consumed by `pop_exception`.
 
   * `leave`
 
@@ -503,9 +498,9 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The number of required arguments for a varargs function definition.
 
-      * `args[5]::QuoteNode{Symbol}` : calling convention
+      * `args[5]::QuoteNode{<:Union{Symbol,Tuple{Symbol,UInt16}}`: calling convention
 
-        The calling convention for the call.
+        The calling convention for the call, optionally with effects.
 
       * `args[6:5+length(args[3])]` : arguments
 
@@ -524,18 +519,22 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The function signature of the opaque closure. Opaque closures don't participate in dispatch, but the input types can be restricted.
 
-      * `args[2]` : isva
-
-        Indicates whether the closure accepts varargs.
-
-      * `args[3]` : lb
+      * `args[2]` : lb
 
         Lower bound on the output type. (Defaults to `Union{}`)
 
-      * `args[4]` : ub
+      * `args[3]` : ub
 
         Upper bound on the output type. (Defaults to `Any`)
 
+      * `args[4]` : constprop
+
+        Indicates whether the opaque closure's identity may be used for constant
+        propagation. The `@opaque` macro enables this by default, but this will
+        cause additional inference which may be undesirable and prevents the
+        code from running during precompile.
+        If `args[4]` is a method, the argument is considered skipped.
+
       * `args[5]` : method
 
         The actual method as an `opaque_closure_method` expression.
@@ -606,15 +605,9 @@ for important details on how to modify these fields safely.
 
   * `sparam_vals`
 
-    The values of the static parameters in `specTypes` indexed by `def.sparam_syms`. For the
-    `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`. But for a
-    runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and
-    indexable.
-
-  * `uninferred`
-
-    The uncompressed source code for a toplevel thunk. Additionally, for a generated function,
-    this is one of many places that the source code might be found.
+    The values of the static parameters in `specTypes`.
+    For the `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`.
+    But for a runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and indexable.
 
   * `backedges`
 
@@ -633,6 +626,10 @@ for important details on how to modify these fields safely.
 
     The `MethodInstance` that this cache entry is derived from.
 
+  * `owner`
+
+    A token that represents the owner of this `CodeInstance`. Will use `jl_egal` to match.
+
 
   * `rettype`/`rettype_const`
 
@@ -667,7 +664,7 @@ for important details on how to modify these fields safely.
 
 ### CodeInfo
 
-A (usually temporary) container for holding lowered source code.
+A (usually temporary) container for holding lowered (and possibly inferred) source code.
 
   * `code`
 
@@ -695,39 +692,21 @@ A (usually temporary) container for holding lowered source code.
 
   * `ssaflags`
 
-    Statement-level flags for each expression in the function. Many of these are reserved, but not yet implemented:
-
-    * 0x01 << 0 = statement is marked as `@inbounds`
-    * 0x01 << 1 = statement is marked as `@inline`
-    * 0x01 << 2 = statement is marked as `@noinline`
-    * 0x01 << 3 = statement is within a block that leads to `throw` call
-    * 0x01 << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free
-    * 0x01 << 5-6 = <unused>
-    * 0x01 << 7 = <reserved> has out-of-band info
-
-  * `linetable`
-
-    An array of source location objects
+    Statement-level 32 bits flags for each expression in the function.
+    See the definition of `jl_code_info_t` in julia.h for more details.
 
-  * `codelocs`
+These are only populated after inference (or by generated functions in some cases):
 
-    An array of integer indices into the `linetable`, giving the location associated
-    with each statement.
+  * `debuginfo`
 
-Optional Fields:
-
-  * `slottypes`
-
-    An array of types for the slots.
+    An object to retrieve source information for each statements, see
+    [How to interpret line numbers in a `CodeInfo` object](@ref).
 
   * `rettype`
 
-    The inferred return type of the lowered form (IR). Default value is `Any`.
-
-  * `method_for_inference_limit_heuristics`
-
-    The `method_for_inference_heuristics` will expand the given method's generator if
-    necessary during inference.
+    The inferred return type of the lowered form (IR). Default value is `Any`. This is
+    mostly present for convenience, as (due to the way OpaqueClosures work) it is not
+    necessarily the rettype used by codegen.
 
   * `parent`
 
@@ -741,16 +720,19 @@ Optional Fields:
 
     The range of world ages for which this code was valid at the time when it had been inferred.
 
+Optional Fields:
 
-Boolean properties:
+  * `slottypes`
 
-  * `inferred`
+    An array of types for the slots.
 
-    Whether this has been produced by type inference.
+  * `method_for_inference_limit_heuristics`
+
+    The `method_for_inference_heuristics` will expand the given method's generator if
+    necessary during inference.
 
-  * `inlineable`
 
-    Whether this should be eligible for inlining.
+Boolean properties:
 
   * `propagate_inbounds`
 
@@ -760,7 +742,7 @@ Boolean properties:
 
 `UInt8` settings:
 
-  * `constprop`
+  * `constprop`, `inlineable`
 
     * 0 = use heuristic
     * 1 = aggressive
@@ -776,3 +758,79 @@ Boolean properties:
     * 0x01 << 4 = the syntactic control flow within this method is guaranteed to terminate (`:terminates_locally`)
 
     See the documentation of `Base.@assume_effects` for more details.
+
+
+#### How to interpret line numbers in a `CodeInfo` object
+
+There are 2 common forms for this data: one used internally that compresses the data somewhat and one used in the compiler.
+They contain the same basic info, but the compiler version is all mutable while the version used internally is not.
+
+Many consumers may be able to call `Base.IRShow.buildLineInfoNode`,
+`Base.IRShow.append_scopes!`, or `Stacktraces.lookup(::InterpreterIP)` to avoid needing to
+(re-)implement these details specifically.
+
+The definitions of each of these are:
+
+```julia
+struct Core.DebugInfo
+    @noinline
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::SimpleVector{DebugInfo}
+    codelocs::String # compressed data
+end
+mutable struct Core.Compiler.DebugInfoStream
+    def::Union{Method,MethodInstance,Symbol}
+    linetable::Union{Nothing,DebugInfo}
+    edges::Vector{DebugInfo}
+    firstline::Int32 # the starting line for this block (specified by an index of 0)
+    codelocs::Vector{Int32} # for each statement:
+        # index into linetable (if defined), else a line number (in the file represented by def)
+        # then index into edges
+        # then index into edges[linetable]
+end
+```
+
+
+  * `def` : where this `DebugInfo` was defined (the `Method`, `MethodInstance`, or `Symbol` of file scope, for example)
+
+  * `linetable`
+
+    Another `DebugInfo` that this was derived from, which contains the actual line numbers,
+    such that this DebugInfo contains only the indexes into it. This avoids making copies,
+    as well as makes it possible to track how each individual statement transformed from
+    source to optimized, not just the separate line numbers. If `def` is not a Symbol, then
+    that object replaces the current function object for the metadata on what function is
+    conceptually being executed (e.g. think Cassette transforms here). The `codelocs` values
+    described below also are interpreted as an index into the `codelocs` in this object,
+    instead of being a line number itself.
+
+  * `edges` : Vector of the unique DebugInfo for every function inlined into this (which
+    recursively have the edges for everything inlined into them).
+
+  * `firstline` (when uncompressed to DebugInfoStream)
+
+    The line number associated with the `begin` statement (or other keyword such as
+    `function` or `quote`) that delineates where this code definition "starts".
+
+  * `codelocs` (when uncompressed to `DebugInfoStream`)
+
+    A vector of indices, with 3 values for each statement in the IR plus one for the
+    starting point of the block, that describe the stacktrace from that point:
+     1. the integer index into the `linetable.codelocs` field, giving the
+        original location associated with each statement (including its syntactic edges),
+        or zero indicating no change to the line number from the previously
+        executed statement (which is not necessarily syntactic or lexical prior),
+        or the line number itself if the `linetable` field is `nothing`.
+     2. the integer index into `edges`, giving the `DebugInfo` inlined there,
+        or zero if there are no edges.
+     3. (if entry 2 is non-zero) the integer index into `edges[].codelocs`,
+        to interpret recursively for each function in the inlining stack,
+        or zero indicating to use `edges[].firstline` as the line number.
+
+    Special codes include:
+     - `(zero, zero, *) `: no change to the line number or edges from the previous statement
+       (you may choose to interpret this either syntactically or lexically). The inlining
+       depth also might have changed, though most callers should ignore that.
+     - `(zero, non-zero, *)` : no line number, just edges (usually because of
+       macro-expansion into top-level code).
diff --git a/doc/src/devdocs/backtraces.md b/doc/src/devdocs/backtraces.md
index 4ed3ea47efbb5..d0533ebe57fcb 100644
--- a/doc/src/devdocs/backtraces.md
+++ b/doc/src/devdocs/backtraces.md
@@ -1,12 +1,12 @@
 # Reporting and analyzing crashes (segfaults)
 
-So you managed to break Julia.  Congratulations!  Collected here are some general procedures you
-can undergo for common symptoms encountered when something goes awry.  Including the information
+So you managed to break Julia. Congratulations!  Collected here are some general procedures you
+can undergo for common symptoms encountered when something goes awry. Including the information
 from these debugging steps can greatly help the maintainers when tracking down a segfault or trying
 to figure out why your script is running slower than expected.
 
 If you've been directed to this page, find the symptom that best matches what you're experiencing
-and follow the instructions to generate the debugging information requested.  Table of symptoms:
+and follow the instructions to generate the debugging information requested. Table of symptoms:
 
   * [Segfaults during bootstrap (`sysimg.jl`)](@ref)
   * [Segfaults when running a script](@ref)
@@ -26,10 +26,10 @@ versioninfo()
 ## Segfaults during bootstrap (`sysimg.jl`)
 
 Segfaults toward the end of the `make` process of building Julia are a common symptom of something
-going wrong while Julia is preparsing the corpus of code in the `base/` folder.  Many factors
+going wrong while Julia is preparsing the corpus of code in the `base/` folder. Many factors
 can contribute toward this process dying unexpectedly, however it is as often as not due to an
 error in the C-code portion of Julia, and as such must typically be debugged with a debug build
-inside of `gdb`.  Explicitly:
+inside of `gdb`. Explicitly:
 
 Create a debug build of Julia:
 
@@ -40,7 +40,7 @@ $ make debug
 
 Note that this process will likely fail with the same error as a normal `make` incantation, however
 this will create a debug executable that will offer `gdb` the debugging symbols needed to get
-accurate backtraces.  Next, manually run the bootstrap process inside of `gdb`:
+accurate backtraces. Next, manually run the bootstrap process inside of `gdb`:
 
 ```
 $ cd base/
@@ -48,14 +48,14 @@ $ gdb -x ../contrib/debug_bootstrap.gdb
 ```
 
 This will start `gdb`, attempt to run the bootstrap process using the debug build of Julia, and
-print out a backtrace if (when) it segfaults.  You may need to hit `<enter>` a few times to get
-the full backtrace.  Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
+print out a backtrace if (when) it segfaults. You may need to hit `<enter>` a few times to get
+the full backtrace. Create a [gist](https://gist.github.com) with the backtrace, the [version info](@ref dev-version-info),
 and any other pertinent information you can think of and open a new [issue](https://github.com/JuliaLang/julia/issues?q=is%3Aopen)
 on Github with a link to the gist.
 
 ## Segfaults when running a script
 
-The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref).  Create a debug
+The procedure is very similar to [Segfaults during bootstrap (`sysimg.jl`)](@ref). Create a debug
 build of Julia, and run your script inside of a debugged Julia process:
 
 ```
@@ -64,7 +64,7 @@ $ make debug
 $ gdb --args usr/bin/julia-debug <path_to_your_script>
 ```
 
-Note that `gdb` will sit there, waiting for instructions.  Type `r` to run the process, and `bt`
+Note that `gdb` will sit there, waiting for instructions. Type `r` to run the process, and `bt`
 to generate a backtrace once it segfaults:
 
 ```
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 7acd32f04dc75..fa2cda2698bfe 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -105,7 +105,7 @@ checkbounds_indices(Bool, (IA1, IA...), (I1, I...)) = checkindex(Bool, IA1, I1)
                                                       checkbounds_indices(Bool, IA, I)
 ```
 
-so `checkindex` checks a single dimension.  All of these functions, including the unexported
+so `checkindex` checks a single dimension. All of these functions, including the unexported
 `checkbounds_indices` have docstrings accessible with `?` .
 
 If you have to customize bounds checking for a specific array type, you should specialize `checkbounds(Bool, A, I...)`.
@@ -113,10 +113,10 @@ However, in most cases you should be able to rely on `checkbounds_indices` as lo
 useful `axes` for your array type.
 
 If you have novel index types, first consider specializing `checkindex`, which handles a single
-index for a particular dimension of an array.  If you have a custom multidimensional index type
+index for a particular dimension of an array. If you have a custom multidimensional index type
 (similar to `CartesianIndex`), then you may have to consider specializing `checkbounds_indices`.
 
-Note this hierarchy has been designed to reduce the likelihood of method ambiguities.  We try
+Note this hierarchy has been designed to reduce the likelihood of method ambiguities. We try
 to make `checkbounds` the place to specialize on array type, and try to avoid specializations
 on index types; conversely, `checkindex` is intended to be specialized only on index type (especially,
 the last argument).
diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md
index 747ee25d22a04..df9ede07d270f 100644
--- a/doc/src/devdocs/build/arm.md
+++ b/doc/src/devdocs/build/arm.md
@@ -55,18 +55,9 @@ due to unsupported inline assembly. In that case, add `MCPU=armv7-a` to
 
 ## AArch64 (ARMv8)
 
-Julia has been successfully built on the following ARMv8 devices:
+Julia is expected to work and build on ARMv8 cpus. One should follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md). Julia expects to have around 8GB of ram or swap enabled to build itself.
 
-* [nVidia Jetson TX1 & TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html);
-* [X-Gene 1](https://www.apm.com/products/data-center/x-gene-family/x-gene/);
-* [Overdrive 3000](https://softiron.com/products/overdrive-3000/);
-* [Cavium ThunderX](https://www.cavium.com/ThunderX_ARM_Processors.html) on [packet.net](https://www.packet.net).
-
-Compilation on `ARMv8-A` requires that `Make.user` is configured as follows:
-
-```
-MCPU=armv8-a
-```
+### Known issues
 
 Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions.
 Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like
@@ -77,21 +68,3 @@ Should this happen, ask your system administrator to increase the limit of memor
 ```
 sysctl -w vm.max_map_count=262144
 ```
-
-### nVidia Jetson TX2
-
-Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html)
-platform with minimal configuration changes.
-
-After configuring `Make.user` as per the `AArch64` instructions in this document,
-follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md).
-The majority of the build dependencies specified in the instructions are installed by
-the default configuration flashed by [Jetpack 3.0](https://developer.nvidia.com/embedded/jetpack). The remaining tools can be installed by issuing the following command:
-
-```
-sudo apt-get install gfortran wget cmake
-```
-
-A full parallel build, including LLVM,
-will complete in around two hours. All tests pass and CUDA functionality is available
-through, e.g., [CUDAdrv](https://github.com/JuliaGPU/CUDAdrv.jl).
diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md
index ad3871c2e70f0..5fe038959edf0 100644
--- a/doc/src/devdocs/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -16,7 +16,7 @@ variables.
 
 When compiled the first time, the build will automatically download
 pre-built [external
-dependencies](#required-build-tools-and-external-libraries). If you
+dependencies](#Required-Build-Tools-and-External-Libraries). If you
 prefer to build all the dependencies on your own, or are building on a system that cannot
 access the network during the build process, add the following in `Make.user`:
 
@@ -60,6 +60,16 @@ To run julia from anywhere you can:
 
 - write `prefix=/path/to/install/folder` into `Make.user` and then run `make install`. If there is a version of Julia already installed in this folder, you should delete it before running `make install`.
 
+Some of the options you can set to control the build of Julia are listed and documented at the beginning of the file `Make.inc`, but you should never edit it for this purpose, use `Make.user` instead.
+
+Julia's Makefiles define convenient automatic rules called `print-<VARNAME>` for printing the value of variables, replacing `<VARNAME>` with the name of the variable to print the value of.
+For example
+```console
+$ make print-JULIA_PRECOMPILE
+JULIA_PRECOMPILE=1
+```
+These rules are useful for debugging purposes.
+
 Now you should be able to run Julia like this:
 
     julia
@@ -138,6 +148,7 @@ Notes for various operating systems:
 Notes for various architectures:
 
 * [ARM](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/arm.md)
+* [RISC-V](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/riscv.md)
 
 ## Required Build Tools and External Libraries
 
@@ -184,7 +195,7 @@ uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/juli
 - **[libgit2]**              — Git linkable library, used by Julia's package manager.
 - **[curl]**                 — libcurl provides download and proxy support.
 - **[libssh2]**              — library for SSH transport, used by libgit2 for packages with SSH remotes.
-- **[mbedtls]**              — library used for cryptography and transport layer security, used by libssh2
+- **[OpenSSL]**              — library used for cryptography and transport layer security, used by libgit2 and libssh2.
 - **[utf8proc]**             — a library for processing UTF-8 encoded Unicode strings.
 - **[LLVM libunwind]**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
 - **[ITTAPI]**               — Intel's Instrumentation and Tracing Technology and Just-In-Time API.
@@ -219,7 +230,7 @@ uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/juli
 [utf8proc]:     https://julialang.org/utf8proc/
 [libunwind]:    https://www.nongnu.org/libunwind
 [libssh2]:      https://www.libssh2.org
-[mbedtls]:      https://tls.mbed.org/
+[OpenSSL]:      https://www.openssl.org/
 [pkg-config]:   https://www.freedesktop.org/wiki/Software/pkg-config/
 [powershell]:   https://docs.microsoft.com/en-us/powershell/scripting/wmf/overview
 [which]:        https://carlowood.github.io/which/
@@ -238,11 +249,49 @@ The most complicated dependency is LLVM, for which we require additional patches
 For packaging Julia with LLVM, we recommend either:
  - bundling a Julia-only LLVM library inside the Julia package, or
  - adding the patches to the LLVM package of the distribution.
-   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch.
-   * The only Julia-specific patch is the lib renaming (`llvm-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
+   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/18.x` branch.
+   * The only Julia-specific patch is the lib renaming (`llvm7-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
    * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM.
 
-Using an unpatched or different version of LLVM will result in errors and/or poor performance. Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
+Using an unpatched or different version of LLVM will result in errors and/or poor performance.
+You can build a different version of LLVM from a remote Git repository with the following options in the `Make.user` file:
+
+```make
+# Force source build of LLVM
+USE_BINARYBUILDER_LLVM = 0
+# Use Git for fetching LLVM source code
+# this is either `1` to get all of them
+DEPS_GIT = 1
+# or a space-separated list of specific dependencies to download with git
+DEPS_GIT = llvm
+
+# Other useful options:
+#URL of the Git repository you want to obtain LLVM from:
+#  LLVM_GIT_URL = ...
+#Name of the alternate branch to clone from git
+#  LLVM_BRANCH = julia-16.0.6-0
+#SHA hash of the alternate commit to check out automatically
+#  LLVM_SHA1 = $(LLVM_BRANCH)
+#List of LLVM targets to build. It is strongly recommended to keep at least all the
+#default targets listed in `deps/llvm.mk`, even if you don't necessarily need all of them.
+#  LLVM_TARGETS = ...
+#Use ccache for faster recompilation in case you need to restart a build.
+#  USECCACHE = 1
+#  CMAKE_GENERATOR=Ninja
+#  LLVM_ASSERTIONS=1
+#  LLVM_DEBUG=Symbols
+```
+
+The various build phases are controlled by specific files:
+ * `deps/llvm.version` : touch or change to checkout a new version, `make get-llvm check-llvm`
+ * `deps/srccache/llvm/source-extracted` : result of `make extract-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make configure-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make compile-llvm`
+ * `usr-staging/llvm/build_Release*.tgz` : result of `make stage-llvm` (regenerate with `make reinstall-llvm`)
+ * `usr/manifest/llvm` : result of `make install-llvm` (regenerate with `make uninstall-llvm`)
+ * `make version-check-llvm` : runs every time to warn the user if there are local modifications
+
+Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
 
 ### libuv
 
@@ -287,8 +336,8 @@ Please note that assert builds of Julia will be slower than regular (non-assert)
 
 ## Building 32-bit Julia on a 64-bit machine
 
-Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine.  Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
-However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system.  At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
+Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine. Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
+However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system. At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
 
 ```sh
 $ docker pull i386/ubuntu
diff --git a/doc/src/devdocs/build/distributing.md b/doc/src/devdocs/build/distributing.md
index c49f6f071224c..ed06c20fa0df3 100644
--- a/doc/src/devdocs/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -2,9 +2,9 @@ Binary distributions
 =======================================
 
 These notes are for those wishing to compile a binary distribution of Julia
-for distribution on various platforms.  We love users spreading Julia as
+for distribution on various platforms. We love users spreading Julia as
 far and wide as they can, trying it out on as wide an array of
-operating systems and hardware configurations as possible.  As each
+operating systems and hardware configurations as possible. As each
 platform has specific gotchas and processes that must be followed in
 order to create a portable, working Julia distribution, we have
 separated most of the notes by OS.
@@ -53,7 +53,7 @@ as it will make Julia fail at startup on any machine with incompatible CPUs
 We therefore recommend that you pass the `MARCH` variable when calling `make`,
 setting it to the baseline target you intend to support. This will determine
 the target CPU for both the Julia executable and libraries, and the system
-image (the latter can also be set using `JULIA_CPU_TARGET`). Typically useful
+image (the latter can also be set using [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET)). Typically useful
 values for x86 CPUs are `x86-64` and `core2` (for 64-bit builds) and
 `pentium4` (for 32-bit builds). Unfortunately, CPUs older than Pentium 4
 are currently not supported (see
@@ -86,8 +86,8 @@ installation-wide initialization file. This file can be used by
 distribution managers to set up custom paths or initialization code.
 For Linux distribution packages, if `$prefix` is
 set to `/usr`, there is no `/usr/etc` to look into. This requires
-the path to Julia's private `etc` directory to be changed.  This can
-be done via the `sysconfdir` make variable when building.  Simply
+the path to Julia's private `etc` directory to be changed. This can
+be done via the `sysconfdir` make variable when building. Simply
 pass `sysconfdir=/etc` to `make` when building and Julia will first
 check `/etc/julia/startup.jl` before trying
 `$prefix/etc/julia/startup.jl`.
@@ -97,18 +97,18 @@ OS X
 
 To create a binary distribution on OSX, build Julia first, then cd to
 `contrib/mac/app`, and run `make` with the same makevars that were used
-with `make` when building Julia proper.  This will then
+with `make` when building Julia proper. This will then
 create a `.dmg` file in the `contrib/mac/app` directory holding a
 completely self-contained Julia.app.
 
 Alternatively, Julia may be built as a framework by invoking `make` with the
-`darwinframework` target and `DARWIN_FRAMEWORK=1` set.  For example,
+`darwinframework` target and `DARWIN_FRAMEWORK=1` set. For example,
 `make DARWIN_FRAMEWORK=1 darwinframework`.
 
 Windows
 -------
 
-Instructions for reating a Julia distribution on Windows are described in the
+Instructions for creating a Julia distribution on Windows are described in the
 [build devdocs for Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md).
 
 Notes on BLAS and LAPACK
diff --git a/doc/src/devdocs/build/linux.md b/doc/src/devdocs/build/linux.md
index 4e596ef73341b..8c4773e4e41ad 100644
--- a/doc/src/devdocs/build/linux.md
+++ b/doc/src/devdocs/build/linux.md
@@ -8,7 +8,7 @@
 
 ## Architecture Customization
 
-Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and `JULIA_CPU_TARGET`.
+Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET).
 
 For example, to build for Pentium 4, set `MARCH=pentium4` and install the necessary system libraries for linking. On Ubuntu, these may include lib32gfortran-6-dev, lib32gcc1, and lib32stdc++6, among others.
 
diff --git a/doc/src/devdocs/build/riscv.md b/doc/src/devdocs/build/riscv.md
new file mode 100644
index 0000000000000..7c0e7ab29d9f8
--- /dev/null
+++ b/doc/src/devdocs/build/riscv.md
@@ -0,0 +1,103 @@
+# RISC-V (Linux)
+
+Julia has experimental support for 64-bit RISC-V (RV64) processors running
+Linux. This file provides general guidelines for compilation, in addition to
+instructions for specific devices.
+
+A list of [known issues](https://github.com/JuliaLang/julia/labels/system:riscv)
+for RISC-V is available. If you encounter difficulties, please create an issue
+including the output from `cat /proc/cpuinfo`.
+
+
+## Compiling Julia
+
+For now, Julia will need to be compiled entirely from source, i.e., including
+all of its dependencies. This can be accomplished with the following
+`Make.user`:
+
+```make
+USE_BINARYBUILDER := 0
+```
+
+Additionally, it is required to indicate what architecture, and optionally which
+CPU to build for. This can be done by setting the `MARCH` and `MCPU` variables
+in `Make.user`
+
+The `MARCH` variable needs to be set to a RISC-V ISA string, which can be found by
+looking at the documentation of your device, or by inspecting `/proc/cpuinfo`. Only
+use flags that your compiler supports, e.g., run `gcc -march=help` to see a list of
+supported flags. A common value is `rv64gc`, which is a good starting point.
+
+The `MCPU` variable is optional, and can be used to further optimize the
+generated code for a specific CPU. If you are unsure, it is recommended to leave
+it unset. You can find a list of supported values by running `gcc --target-help`.
+
+For example, if you are using a StarFive VisionFive2, which contains a JH7110
+processor based on the SiFive U74, you can set these flags as follows:
+
+```make
+MARCH := rv64gc_zba_zbb
+MCPU := sifive-u74
+```
+
+If you prefer a portable build, you could use:
+
+```make
+MARCH := rv64gc
+
+# also set JULIA_CPU_TARGET to the expanded form of rv64gc
+# (it normally copies the value of MCPU, which we don't set)
+JULIA_CPU_TARGET := generic-rv64,i,m,a,f,d,zicsr,zifencei,c
+```
+
+### Cross-compilation
+
+A native build on a RISC-V device may take a very long time, so it's also
+possible to cross-compile Julia on a faster machine.
+
+First, get a hold of a RISC-V cross-compilation toolchain that provides
+support for C, C++ and Fortran. This can be done by checking-out the
+[riscv-gnu-toolchain](https://github.com/riscv-collab/riscv-gnu-toolchain)
+repository and building it as follows:
+
+```sh
+sudo mkdir /opt/riscv && sudo chown $USER /opt/riscv
+./configure --prefix=/opt/riscv --with-languages=c,c++,fortran
+make linux -j$(nproc)
+```
+
+Then, install the QEMU user-mode emulator for RISC-V, along with `binfmt`
+support to enable execution of RISC-V binaries on the host machine. The
+exact steps depend on your distribution, e.g., on Arch Linux it involves
+installing the `qemu-user-static` and `qemu-user-static-binfmt` packages.
+Note that to actually execute RISC-V binaries, QEMU will need to be able to
+find the RISC-V system root, which can be achieved by setting the
+`QEMU_LD_PREFIX` environment variable to the path of the root filesystem.
+
+Finally, compile Julia with the following `Make.user` variables (in addition to
+the ones from the previous section):
+
+```make
+XC_HOST=riscv64-unknown-linux-gnu
+OS=Linux
+export QEMU_LD_PREFIX=/opt/riscv/sysroot
+```
+
+Note that you will have to execute `make` with `PATH` set to include the
+cross-compilation toolchain, e.g., by running:
+
+```sh
+PATH=/opt/riscv/bin:$PATH make -j$(nproc)
+```
+
+Because of the RISC-V sysroot we use being very barren, you may need to
+add additional libraries that the Julia build system currently expects
+to be available system-wide. For example, the build currently relies on
+a system-provided `libz`, so you may need to copy this library from the
+Julia build into the system root:
+
+```sh
+make -C deps install-zlib
+cp -v usr/lib/libz.*   /opt/riscv/sysroot/usr/lib
+cp -v usr/include/z*.h /opt/riscv/sysroot/usr/include
+```
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
index 7192bb8a7a544..ba4af459e24d0 100644
--- a/doc/src/devdocs/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -47,27 +47,30 @@ MinGW-w64 compilers available through Cygwin's package manager.
     either 32 or 64 bit Julia from either 32 or 64 bit Cygwin. 64 bit Cygwin
     has a slightly smaller but often more up-to-date selection of packages.
 
-    Advanced: you may skip steps 2-4 by running:
+    *Advanced*: you may skip steps 2-4 by running:
 
-        setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
-        :: replace <url> with a site from https://cygwin.com/mirrors.html
-        :: or run setup manually first and select a mirror
+    ```sh
+    setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
+    ```
 
- 2. Select installation location and download mirror.
+    replacing `<url>` with a site from [https://cygwin.com/mirrors.html](https://cygwin.com/mirrors.html)
+    or run setup manually first and select a mirror.
 
- 3. At the '*Select Packages'* step, select the following:
+ 2. Select installation location and a mirror to download from.
 
-    1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
-    2.  From the *Net* category: `curl`
-    3.  From *Interpreters* (or *Python*) category: `m4`, `python3`
-    4.  From the *Archive* category: `p7zip`
-    5.  For 32 bit Julia, and also from the *Devel* category:
-        `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
-    6.  For 64 bit Julia, and also from the *Devel* category:
-        `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
+ 3. At the *Select Packages* step, select the following:
+
+    1. From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
+    2. From the *Net* category: `curl`
+    3. From *Interpreters* (or *Python*) category: `m4`, `python3`
+    4. From the *Archive* category: `p7zip`
+    5. For 32 bit Julia, and also from the *Devel* category:
+       `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
+    6. For 64 bit Julia, and also from the *Devel* category:
+       `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
 
  4. Allow Cygwin installation to finish, then start from the installed shortcut
-    a *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
+    *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
 
  5. Build Julia and its dependencies from source:
 
@@ -93,64 +96,67 @@ MinGW-w64 compilers available through Cygwin's package manager.
        make -j 4       # Adjust the number of threads (4) to match your build environment.
        make -j 4 debug # This builds julia-debug.exe
        ```
-
-
-    > Protip: build both!
-    > ```sh
-    > make O=julia-win32 configure
-    > make O=julia-win64 configure
-    > echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
-    > echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
-    > echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
-    >         $(error "in-tree build disabled")
-    >       endif' >> Make.user
-    > make -C julia-win32  # build for Windows x86 in julia-win32 folder
-    > make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
-    > ```
-
  6. Run Julia using the Julia executables directly
     ```sh
     usr/bin/julia.exe
     usr/bin/julia-debug.exe
     ```
 
+!!! note "Pro tip: build both!"
+    ```sh
+    make O=julia-win32 configure
+    make O=julia-win64 configure
+    echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
+    echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
+    echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
+            $(error "in-tree build disabled")
+          endif' >> Make.user
+    make -C julia-win32  # build for Windows x86 in julia-win32 folder
+    make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
+    ```
+
 ### Compiling with MinGW/MSYS2
 
-> MSYS2 provides a robust MSYS experience.
+[MSYS2](https://www.msys2.org/) is a software distribution and build environment for Windows.
 
 Note: MSYS2 requires **64 bit** Windows 7 or newer.
 
- 1. Install and configure [MSYS2](https://www.msys2.org/), Software Distribution
-    and Building Platform for Windows.
+ 1. Install and configure MSYS2.
 
     1. Download and run the latest installer for the
         [64-bit](https://github.com/msys2/msys2-installer/releases/latest) distribution.
         The installer will have a name like `msys2-x86_64-yyyymmdd.exe`.
 
-    2. Open MSYS2. Update package database and base packages:
-        ```sh
+    2. Open the MSYS2 shell. Update the package database and base packages:
+
+        ```
         pacman -Syu
         ```
+    3. Exit and restart MSYS2. Update the rest of the base packages:
 
-    3. Exit and restart MSYS2, Update the rest of the base packages:
-        ```sh
+        ```
         pacman -Syu
         ```
 
-    3. Then install tools required to build julia:
-        ```sh
-        # tools
+    4. Then install tools required to build julia:
+
+        ```
         pacman -S cmake diffutils git m4 make patch tar p7zip curl python
+        ```
+
+        For 64 bit Julia, install the x86_64 version:
 
-        # For 64 bit Julia, install x86_64
+        ```
         pacman -S mingw-w64-x86_64-gcc
-        # For 32 bit Julia, install i686
-        pacman -S mingw-w64-i686-gcc
         ```
 
-    4. Configuration of MSYS2 is complete. Now `exit` the MSYS2 shell.
+        For 32 bit Julia, install the i686 version:
 
+        ```
+        pacman -S mingw-w64-i686-gcc
+        ```
 
+    5. Configuration of MSYS2 is complete. Now `exit` the MSYS2 shell.
  2. Build Julia and its dependencies with pre-build dependencies.
 
     1. Open a new [**MINGW64/MINGW32 shell**](https://www.msys2.org/docs/environments/#overview).
@@ -158,25 +164,27 @@ Note: MSYS2 requires **64 bit** Windows 7 or newer.
         so if you want to build the x86_64 and i686 versions,
         you'll need to build them in each environment separately.
 
-    2. and clone the Julia sources
-        ```sh
+    2. Clone the Julia sources:
+
+        ```
         git clone https://github.com/JuliaLang/julia.git
         cd julia
         ```
 
     3. Start the build
-        ```sh
+
+        ```
         make -j$(nproc)
         ```
 
-    > Protip: build in dir
-    > ```sh
-    > make O=julia-mingw-w64 configure
-    > echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
-    >         $(error "in-tree build disabled")
-    >       endif' >> Make.user
-    > make -C julia-mingw-w64
-    > ```
+!!! note "Pro tip: build in dir"
+    ```sh
+    make O=julia-mingw-w64 configure
+    echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
+            $(error "in-tree build disabled")
+          endif' >> Make.user
+    make -C julia-mingw-w64
+    ```
 
 
 ### Cross-compiling from Unix (Linux/Mac/WSL)
@@ -185,7 +193,7 @@ You can also use MinGW-w64 cross compilers to build a Windows version of Julia f
 Linux, Mac, or the Windows Subsystem for Linux (WSL).
 
 First, you will need to ensure your system has the required dependencies. We
-need wine (>=1.7.5), a system compiler, and some downloaders. Note: a cygwin install might
+need wine (>=1.7.5), a system compiler, and some downloaders. Note: a Cygwin install might
 interfere with this method if using WSL.
 
 **On Ubuntu** (on other Linux systems the dependency names are likely to be similar):
@@ -193,12 +201,14 @@ interfere with this method if using WSL.
 apt-get install wine-stable gcc wget p7zip-full winbind mingw-w64 gfortran-mingw-w64
 dpkg --add-architecture i386 && apt-get update && apt-get install wine32 # add sudo to each if needed
 # switch all of the following to their "-posix" variants (interactively):
-for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do sudo update-alternatives --config $pkg; done
+for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do
+    sudo update-alternatives --config $pkg
+done
 ```
 
 **On Mac**: Install XCode, XCode command line tools, X11 (now
 [XQuartz](https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
-or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`, or `brew
+or [Homebrew](https://brew.sh/). Then run `port install wine wget mingw-w64`, or `brew
 install wine wget mingw-w64`, as appropriate.
 
 **Then run the build:**
@@ -211,19 +221,19 @@ install wine wget mingw-w64`, as appropriate.
  6. `make binary-dist` then `make exe` to create the Windows installer.
  7. move the `julia-*.exe` installer to the target machine
 
-If you are building for 64-bit windows, the steps are essentially the same.
-Just replace `i686` in `XC_HOST` with `x86_64`. (note: on Mac, wine only runs
+If you are building for 64-bit Windows, the steps are essentially the same.
+Just replace `i686` in `XC_HOST` with `x86_64`. (Note: on Mac, wine only runs
 in 32-bit mode).
 
 
 ## Debugging a cross-compiled build under wine
 
 The most effective way to debug a cross-compiled version of Julia on the cross-compilation
-host is to install a windows version of gdb and run it under wine as usual. The pre-built
+host is to install a Windows version of GDB and run it under wine as usual. The pre-built
 packages available [as part of the MSYS2
-project](https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work. Apart
+project](https://packages.msys2.org/) are known to work. Apart
 from the GDB package you may also need the python and termcap packages. Finally, GDB's
-prompt may not work when launch from the command line. This can be worked around by
+prompt may not work when launched from the command line. This can be worked around by
 prepending `wineconsole` to the regular GDB invocation.
 
 
@@ -232,30 +242,31 @@ prepending `wineconsole` to the regular GDB invocation.
 Compiling using one of the options above creates a basic Julia build, but not some
 extra components that are included if you run the full Julia binary installer.
 If you need these components, the easiest way to get them is to build the installer
-yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```. Then running the resulting installer.
+yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```.
+Then run the resulting installer.
 
 
 ## Windows Build Debugging
 
 
-### GDB hangs with cygwin mintty
+### GDB hangs with Cygwin mintty
 
-- Run gdb under the windows console (cmd) instead. gdb [may not function
+- Run GDB under the Windows console (cmd) instead. GDB [may not function
   properly](https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
-  cygwin applications. You can use `cmd /c start` to start the windows console from mintty
+  Cygwin applications. You can use `cmd /c start` to start the Windows console from mintty
   if necessary.
 
 ### GDB not attaching to the right process
 
- - Use the PID from the windows task manager or `WINPID` from the `ps` command
-   instead of the PID from unix style command line tools (e.g. `pgrep`).  You
-   may need to add the PID column if it is not shown by default in the windows
+ - Use the PID from the Windows task manager or `WINPID` from the `ps` command
+   instead of the PID from unix-style command line tools (e.g. `pgrep`). You
+   may need to add the PID column if it is not shown by default in the Windows
    task manager.
 
 ### GDB not showing the right backtrace
 
  - When attaching to the julia process, GDB may not be attaching to the right
-   thread.  Use `info threads` command to show all the threads and
+   thread. Use `info threads` command to show all the threads and
    `thread <threadno>` to switch threads.
  - Be sure to use a 32 bit version of GDB to debug a 32 bit build of Julia, or
    a 64 bit version of GDB to debug a 64 bit build of Julia.
diff --git a/doc/src/devdocs/builtins.md b/doc/src/devdocs/builtins.md
new file mode 100644
index 0000000000000..e53321f3e70a0
--- /dev/null
+++ b/doc/src/devdocs/builtins.md
@@ -0,0 +1,28 @@
+# [Core.Builtins](@id lib-builtins)
+
+## Builtin Function APIs
+
+The following Builtin function APIs are considered unstable, but provide the basic
+definitions for what defines the abilities and behaviors of a Julia program. They are
+typically accessed through a higher level generic API.
+
+```@docs
+Core.memoryrefnew
+Core.memoryrefoffset
+Core.memoryrefget
+Core.memoryrefset!
+Core.memoryref_isassigned
+Core.memoryrefswap!
+Core.memoryrefmodify!
+Core.memoryrefreplace!
+Core.memoryrefsetonce!
+Core.Intrinsics.atomic_pointerref
+Core.Intrinsics.atomic_pointerset
+Core.Intrinsics.atomic_pointerswap
+Core.Intrinsics.atomic_pointermodify
+Core.Intrinsics.atomic_pointerreplace
+Core.get_binding_type
+Core.IntrinsicFunction
+Core.Intrinsics
+Core.IR
+```
diff --git a/doc/src/devdocs/cartesian.md b/doc/src/devdocs/cartesian.md
index 1d338cbd8fab3..8d5d6d1832e23 100644
--- a/doc/src/devdocs/cartesian.md
+++ b/doc/src/devdocs/cartesian.md
@@ -26,7 +26,7 @@ end
 ```
 
 In general, Cartesian allows you to write generic code that contains repetitive elements, like
-the nested loops in this example.  Other applications include repeated expressions (e.g., loop
+the nested loops in this example. Other applications include repeated expressions (e.g., loop
 unwinding) or creating function calls with variable numbers of arguments without using the "splat"
 construct (`i...`).
 
@@ -71,7 +71,7 @@ DocTestSetup = nothing
 
 The first argument to both of these macros is the number of expressions, which must be an integer.
 When you're writing a function that you intend to work in multiple dimensions, this may not be
-something you want to hard-code. The recommended approach is to use a `@generated function`.  Here's
+something you want to hard-code. The recommended approach is to use a `@generated function`. Here's
 an example:
 
 ```julia
@@ -91,7 +91,7 @@ Naturally, you can also prepare expressions or perform calculations before the `
 ### Anonymous-function expressions as macro arguments
 
 Perhaps the single most powerful feature in `Cartesian` is the ability to supply anonymous-function
-expressions that get evaluated at parsing time.  Let's consider a simple example:
+expressions that get evaluated at parsing time. Let's consider a simple example:
 
 ```julia
 @nexprs 2 j->(i_j = 1)
@@ -106,8 +106,8 @@ i_2 = 1
 ```
 
 In each generated statement, an "isolated" `j` (the variable of the anonymous function) gets replaced
-by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax.  This
-allows you to do math on the index `j`.  Here's an example computing the strides of an array:
+by values in the range `1:2`. Generally speaking, Cartesian employs a LaTeX-like syntax. This
+allows you to do math on the index `j`. Here's an example computing the strides of an array:
 
 ```julia
 s_1 = 1
@@ -133,6 +133,7 @@ Base.Cartesian.@nref
 Base.Cartesian.@nextract
 Base.Cartesian.@nexprs
 Base.Cartesian.@ncall
+Base.Cartesian.@ncallkw
 Base.Cartesian.@ntuple
 Base.Cartesian.@nall
 Base.Cartesian.@nany
diff --git a/doc/src/devdocs/compiler.md b/doc/src/devdocs/compiler.md
index 0749eafd81bd3..8f5f2bb1aa17c 100644
--- a/doc/src/devdocs/compiler.md
+++ b/doc/src/devdocs/compiler.md
@@ -94,11 +94,16 @@ Use appropriate care when copying.
 
 ## Specialized Calling Convention Signature Representation
 
-A `jl_returninfo_t` object describes the calling convention details of any callable.
+A `jl_returninfo_t` object describes the specialized calling convention details of any
+callable. It can be generated from any (specTypes, rettype) pair, such as a CodeInstance, or
+other place they are declared. This is the expected calling convention for specptr, but
+other data may be stored there. Only if the function pointer stored there has the
+expected specialized calling convention will the corresponding flag be set in specsigflags
+to indicate it is useable.
 
-If any of the arguments or return type of a method can be represented unboxed,
-and the method is not varargs, it'll be given an optimized calling convention
-signature based on its `specTypes` and `rettype` fields.
+If any of the arguments or return type of a method can be represented unboxed, and none are
+unable to be represented unboxed (such as an unbounded vararg), it will be given an
+optimized calling convention signature based on the `specTypes` and `rettype` values.
 
 The general principles are that:
 
@@ -112,4 +117,5 @@ The total logic for this is implemented by `get_specsig_function` and `deserves_
 
 Additionally, if the return type is a union, it may be returned as a pair of values (a pointer and a tag).
 If the union values can be stack-allocated, then sufficient space to store them will also be passed as a hidden first argument.
+If the struct to return needs gc roots, space for those will be passed as a hidden second argument.
 It is up to the callee whether the returned pointer will point to this space, a boxed object, or even other constant memory.
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index 7639e8be2ef96..0c7ee9d98f046 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -41,11 +41,16 @@ useful.
 
 ## Useful Julia functions for Inspecting those variables
 
-  * `jl_gdblookup($rip)` :: For looking up the current function and line. (use `$eip` on i686 platforms)
+  * `jl_print_task_backtraces(0)` :: Similar to gdb's `thread apply all bt` or lldb's `thread backtrace
+    all`. Runs all threads while printing backtraces for all existing tasks.
+  * `jl_gdblookup($pc)` :: For looking up the current function and line.
+  * `jl_gdblookupinfo($pc)` :: For looking up the current method instance object.
+  * `jl_gdbdumpcode(mi)` :: For dumping all of `code_typed/code_llvm/code_asm` when the REPL is not working right.
   * `jlbacktrace()` :: For dumping the current Julia backtrace stack to stderr. Only usable after
     `record_backtrace()` has been called.
   * `jl_dump_llvm_value(Value*)` :: For invoking `Value->dump()` in gdb, where it doesn't work natively.
     For example, `f->linfo->functionObject`, `f->linfo->specFunctionObject`, and `to_function(f->linfo)`.
+  * `jl_dump_llvm_module(Module*)` :: For invoking `Module->dump()` in gdb, where it doesn't work natively.
   * `Type->dump()` :: only works in lldb. Note: add something like `;1` to prevent lldb from printing
     its prompt over the output
   * `jl_eval_string("expr")` :: for invoking side-effects to modify the current state or to lookup
@@ -236,7 +241,7 @@ process)
 
 Julia now works out of the box with [rr](https://rr-project.org/), the lightweight recording and
 deterministic debugging framework from Mozilla. This allows you to replay the trace of an execution
-deterministically.  The replayed execution's address spaces, register contents, syscall data etc
+deterministically. The replayed execution's address spaces, register contents, syscall data etc
 are exactly the same in every run.
 
 A recent version of rr (3.1.0 or higher) is required.
diff --git a/doc/src/devdocs/external_profilers.md b/doc/src/devdocs/external_profilers.md
index 956d66508fc89..836d821b91df9 100644
--- a/doc/src/devdocs/external_profilers.md
+++ b/doc/src/devdocs/external_profilers.md
@@ -13,7 +13,7 @@ you add it to `JL_TIMING_OWNERS` (and possibly `JL_TIMING_EVENTS`).
 
 ### Dynamically Enabling and Disabling Zones
 
-The `JULIA_TIMING_SUBSYSTEMS` environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
+The [`JULIA_TIMING_SUBSYSTEMS`](@ref JULIA_TIMING_SUBSYSTEMS) environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
 zones.
 
 ## Tracy Profiler
@@ -39,7 +39,13 @@ run(TracyProfiler_jll.tracy())
 !!! note
     On macOS, you may want to set the `TRACY_DPI_SCALE` environment variable to `1.0` if the UI elements in the profiler appear excessively large.
 
-To run a "headless" instance that saves the trace to disk, use `TracyProfiler_jll.capture() -o mytracefile.tracy` instead.
+To run a "headless" instance that saves the trace to disk, use
+
+```julia
+run(`$(TracyProfiler_jll.capture()) -o mytracefile.tracy`)
+```
+
+instead.
 
 For information on using the Tracy UI, refer to the Tracy manual.
 
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 283f63b2d0dce..777afaa56348d 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -15,14 +15,14 @@ has a `TypeName`.
 
 ## [Function calls](@id Function-calls)
 
-Given the call `f(x,y)`, the following steps are performed: first, the method table to use is
+Given the call `f(x, y)`, the following steps are performed: first, the method table to use is
 accessed as `typeof(f).name.mt`. Second, an argument tuple type is formed, `Tuple{typeof(f), typeof(x), typeof(y)}`.
 Note that the type of the function itself is the first element. This is because the type might
 have parameters, and so needs to take part in dispatch. This tuple type is looked up in the method
 table.
 
 This dispatch process is performed by `jl_apply_generic`, which takes two arguments: a pointer
-to an array of the values f, x, and y, and the number of values (in this case 3).
+to an array of the values `f`, `x`, and `y`, and the number of values (in this case 3).
 
 Throughout the system, there are two kinds of APIs that handle functions and argument lists: those
 that accept the function and arguments separately, and those that accept a single argument structure.
@@ -214,16 +214,16 @@ use keyword arguments are dispatched directly to the called function's kwsorter.
 call:
 
 ```julia
-circle((0,0), 1.0, color = red; other...)
+circle((0, 0), 1.0, color = red; other...)
 ```
 
 is lowered to:
 
 ```julia
-kwcall(merge((color = red,), other), circle, (0,0), 1.0)
+kwcall(merge((color = red,), other), circle, (0, 0), 1.0)
 ```
 
- `kwcall` (also in`Core`) denotes a kwcall signature and dispatch.
+`kwcall` (also in`Core`) denotes a kwcall signature and dispatch.
 The keyword splatting operation (written as `other...`) calls the named tuple `merge` function.
 This function further unpacks each *element* of `other`, expecting each one to contain two values
 (a symbol and a value).
@@ -267,7 +267,7 @@ element instead of the second.
 The front end generates type declarations for all closures. Initially, this was implemented by
 generating normal type declarations. However, this produced an extremely large number of constructors,
 all of which were trivial (simply passing all arguments through to [`new`](@ref)). Since methods are partially
-ordered, inserting all of these methods is O(n^2), plus there are just too many of them to keep
+ordered, inserting all of these methods is O(n²), plus there are just too many of them to keep
 around. This was optimized by generating `struct_type` expressions directly (bypassing default
 constructor generation), and using `new` directly to create closure instances. Not the prettiest
 thing ever, but you do what you gotta do.
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
index c072912e77c3f..a45e8afb271ce 100644
--- a/doc/src/devdocs/gc.md
+++ b/doc/src/devdocs/gc.md
@@ -2,71 +2,60 @@
 
 ## Introduction
 
-Julia has a serial, stop-the-world, generational, non-moving mark-sweep garbage collector.
-Native objects are precisely scanned and foreign ones are conservatively marked.
+Julia has a non-moving, partially concurrent, parallel, generational and mostly precise mark-sweep collector (an interface
+for conservative stack scanning is provided as an option for users who wish to call Julia from C).
 
-## Memory layout of objects and GC bits
+## Allocation
 
-An opaque tag is stored in the front of GC managed objects, and its lowest two bits are
-used for garbage collection.  The lowest bit is set for marked objects and the second
-lowest bit stores age information (e.g. it's only set for old objects).
+Julia uses two types of allocators, the size of the allocation request determining which one is used. Objects up to 2k
+bytes are allocated on a per-thread free-list pool allocator, while objects larger than 2k bytes are allocated through libc
+malloc.
 
-Objects are aligned by a multiple of 4 bytes to ensure this pointer tagging is legal.
+Julia’s pool allocator partitions objects on different size classes, so that a memory page managed by the pool allocator
+(which spans 4 operating system pages on 64bit platforms) only contains objects of the same size class. Each memory
+page from the pool allocator is paired with some page metadata stored on per-thread lock-free lists. The page metadata contains information such as whether the page has live objects at all, number of free slots, and offsets to the first and last objects in the free-list contained in that page. These metadata are used to optimize the collection phase: a page which has no live objects at all may be returned to the operating system without any need of scanning it, for example.
 
-## Pool allocation
+While a page that has no objects may be returned to the operating system, its associated metadata is permanently
+allocated and may outlive the given page. As mentioned above, metadata for allocated pages are stored on per-thread lock-free
+lists. Metadata for free pages, however, may be stored into three separate lock-free lists depending on whether the page has been mapped but never accessed (`page_pool_clean`), or whether the page has been lazily sweeped and it's waiting to be madvised by a background GC thread (`page_pool_lazily_freed`), or whether the page has been madvised (`page_pool_freed`).
 
-Sufficiently small objects (up to 2032 bytes) are allocated on per-thread object
-pools.
+Julia's pool allocator follows a "tiered" allocation discipline. When requesting a memory page for the pool allocator, Julia will:
 
-A three-level tree (analogous to a three-level page-table) is used to keep metadata
-(e.g. whether a page has been allocated, whether contains marked objects, number of free objects etc.)
-about address ranges spanning at least one page.
-Sweeping a pool allocated object consists of inserting it back into the free list
-maintained by its pool.
+- Try to claim a page from `page_pool_lazily_freed`, which contains pages which were empty on the last stop-the-world phase, but not yet madvised by a concurrent sweeper GC thread.
 
-## Malloc'd arrays and big objects
+- If it failed claiming a page from `page_pool_lazily_freed`, it will try to claim a page from `the page_pool_clean`, which contains pages which were mmaped on a previous page allocation request but never accessed.
 
-Two lists are used to keep track of the remaining allocated objects:
-one for sufficiently large malloc'd arrays (`mallocarray_t`) and one for
-sufficiently large objects (`bigval_t`).
+- If it failed claiming a page from `pool_page_clean` and from `page_pool_lazily_freed`, it will try to claim a page
+  from `page_pool_freed`, which contains pages which have already been madvised by a concurrent sweeper GC thread and whose underlying virtual address can be recycled.
 
-Sweeping these objects consists of unlinking them from their list and calling `free` on the
-corresponding address.
+- If it failed in all of the attempts mentioned above, it will mmap a batch of pages, claim one page for itself, and
+  insert the remaining pages into `page_pool_clean`.
 
-## Generational and remembered sets
+![Diagram of tiered pool allocation](./img/gc-tiered-allocation.jpg)
 
-Field writes into old objects trigger a write barrier if the written field
-points to a young object and if a write barrier has not been triggered on the old object yet.
-In this case, the old object being written to is enqueued into a remembered set, and
-its mark bit is set to indicate that a write barrier has already been triggered on it.
+## Marking and Generational Collection
 
-There is no explicit flag to determine whether a marking pass will scan the
-entire heap or only through young objects and remembered set.
-The mark bits of the objects themselves are used to determine whether a full mark happens.
-The mark-sweep algorithm follows this sequence of steps:
+Julia’s mark phase is implemented through a parallel iterative depth-first-search over the object graph. Julia’s collector is non-moving, so object age information can’t be determined through the memory region in which the object resides alone, but has to be somehow encoded in the object header or on a side table. The lowest two bits of an object’s header are used to store, respectively, a mark bit that is set when an object is scanned during the mark phase and an age bit for the generational collection.
 
-- Objects in the remembered set have their GC mark bits reset
-(these are set once write barrier is triggered, as described above) and are enqueued.
+Generational collection is implemented through sticky bits: objects are only pushed to the mark-stack, and therefore
+traced, if their mark-bits are not set. When objects reach the oldest generation, their mark-bits are not reset during
+the so-called "quick-sweep", which leads to these objects not being traced in a subsequent mark phase. A "full-sweep",
+however, causes the mark-bits of all objects to be reset, leading to all objects being traced in a subsequent mark phase.
+Objects are promoted to the next generation during every sweep phase they survive. On the mutator side, field writes
+are intercepted through a write barrier that pushes an object’s address into a per-thread remembered set if the object is
+in the last generation, and if the object at the field being written is not. Objects in this remembered set are then traced
+during the mark phase.
 
-- Roots (e.g. thread locals) are enqueued.
+## Sweeping
 
-- Object graph is traversed and mark bits are set.
+Sweeping of object pools for Julia may fall into two categories: if a given page managed by the pool allocator contains at least one live object, then a free-list must be threaded through its dead objects; if a given page contains no live objects at all, then its underlying physical memory may be returned to the operating system through, for instance, the use of madvise system calls on Linux.
 
-- Object pools, malloc'd arrays and big objects are sweeped. On a full sweep,
-the mark bits of all marked objects are reset. On a generational sweep,
-only the mark bits of marked young objects are reset.
-
-- Mark bits of objects in the remembered set are set,
-so we don't trigger the write barrier on them again.
-
-After these stages, old objects will be left with their mark bits set,
-so that references from them are not explored in a subsequent generational collection.
-This scheme eliminates the need of explicitly keeping a flag to indicate a full mark
-(though a flag to indicate a full sweep is necessary).
+The first category of sweeping is parallelized through work-stealing. For the second category of sweeping, if concurrent page sweeping is enabled through the flag `--gcthreads=X,1` we perform the madvise system calls in a background sweeper thread, concurrently with the mutator threads. During the stop-the-world phase of the collector, pool allocated pages which contain no live objects are initially pushed into the `pool_page_lazily_freed`. The background sweeping thread is then woken up and is responsible for removing pages from `pool_page_lazily_freed`, calling madvise on them, and inserting them into `pool_page_freed`. As described above, `pool_page_lazily_freed` is also shared with mutator threads. This implies that on allocation-heavy multithreaded workloads, mutator threads would often avoid a page fault on allocation (coming from accessing a fresh mmaped page or accessing a madvised page) by directly allocating from a page in `pool_page_lazily_freed`, while the background sweeper thread needs to madvise a reduce number of pages given some of them were already claimed by the mutators.
 
 ## Heuristics
 
 GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
-If a GC was unproductive, then we increase the size of the allocation interval to allow objects more time to die.
-If a GC returns a lot of space we can shrink the interval. The goal is to find a steady state where we are
-allocating just about the same amount as we are collecting.
+
+The GC heuristics measure how big the heap size is after a collection and set the next collection according to the algorithm described by https://dl.acm.org/doi/10.1145/3563323, in summary, it argues that the heap target should have a square root relationship with the live heap, and that it should also be scaled by how fast the GC is freeing objects and how fast the mutators are allocating. The heuristics measure the heap size by counting the number of pages that are in use and the objects that use malloc. Previously we measured the heap size by counting the alive objects, but that doesn't take into account fragmentation which could lead to bad decisions, that also meant that we used thread local information (allocations) to make decisions about a process wide (when to GC), measuring pages means the decision is global.
+
+The GC will do full collections when the heap size reaches 80% of the maximum allowed size.
diff --git a/doc/src/devdocs/img/gc-tiered-allocation.jpg b/doc/src/devdocs/img/gc-tiered-allocation.jpg
new file mode 100644
index 0000000000000..4ab0e1298364c
Binary files /dev/null and b/doc/src/devdocs/img/gc-tiered-allocation.jpg differ
diff --git a/doc/src/devdocs/img/precompilation_hang.png b/doc/src/devdocs/img/precompilation_hang.png
new file mode 100644
index 0000000000000..d076b7697f271
Binary files /dev/null and b/doc/src/devdocs/img/precompilation_hang.png differ
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index b6614d060a0c8..98f020dda1d8e 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -36,9 +36,9 @@ m = first(mths)
 # Create variables needed to call `typeinf_code`
 interp = Core.Compiler.NativeInterpreter()
 sparams = Core.svec()      # this particular method doesn't have type-parameters
-optimize = true            # run all inference optimizations
+run_optimizer = true       # run all inference optimizations
 types = Tuple{typeof(convert), atypes.parameters...} # Tuple{typeof(convert), Type{Int}, UInt}
-Core.Compiler.typeinf_code(interp, m, types, sparams, optimize)
+Core.Compiler.typeinf_code(interp, m, types, sparams, run_optimizer)
 ```
 
 If your debugging adventures require a `MethodInstance`, you can look it up by
@@ -96,18 +96,20 @@ Each statement gets analyzed for its total cost in a function called
 as follows:
 ```jldoctest; filter=r"tuple.jl:\d+"
 julia> Base.print_statement_costs(stdout, map, (typeof(sqrt), Tuple{Int},)) # map(sqrt, (2,))
-map(f, t::Tuple{Any}) @ Base tuple.jl:273
-  0 1 ─ %1  = Base.getfield(_3, 1, true)::Int64
-  1 │   %2  = Base.sitofp(Float64, %1)::Float64
-  2 │   %3  = Base.lt_float(%2, 0.0)::Bool
-  0 └──       goto #3 if not %3
-  0 2 ─       invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %2::Float64)::Union{}
+map(f, t::Tuple{Any}) @ Base tuple.jl:281
+  0 1 ─ %1  = $(Expr(:boundscheck, true))::Bool
+  0 │   %2  =   builtin Base.getfield(_3, 1, %1)::Int64
+  1 │   %3  = intrinsic Base.sitofp(Float64, %2)::Float64
+  0 │   %4  = intrinsic Base.lt_float(%3, 0.0)::Bool
+  0 └──       goto #3 if not %4
+  0 2 ─          invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %3::Float64)::Union{}
   0 └──       unreachable
- 20 3 ─ %7  = Base.Math.sqrt_llvm(%2)::Float64
+ 20 3 ─ %8  = intrinsic Base.Math.sqrt_llvm(%3)::Float64
   0 └──       goto #4
   0 4 ─       goto #5
-  0 5 ─ %10 = Core.tuple(%7)::Tuple{Float64}
-  0 └──       return %10
+  0 5 ─ %11 =   builtin Core.tuple(%8)::Tuple{Float64}
+  0 └──       return %11
+
 ```
 
 The line costs are in the left column. This includes the consequences of inlining and other forms of optimization.
diff --git a/doc/src/devdocs/isbitsunionarrays.md b/doc/src/devdocs/isbitsunionarrays.md
index 2a25c033ec9fd..f01afe50985ec 100644
--- a/doc/src/devdocs/isbitsunionarrays.md
+++ b/doc/src/devdocs/isbitsunionarrays.md
@@ -18,6 +18,12 @@ Lastly, a value of `0x00` signals that the `nothing` value will be returned for
 type with a single type instance, it technically has a size of 0. The type tag byte for a type's Union field is stored
 directly after the field's computed Union memory.
 
-## isbits Union Arrays
+## isbits Union Memory
 
-Julia can now also store "isbits Union" values inline in an Array, as opposed to requiring an indirection box. The optimization is accomplished by storing an extra "type tag array" of bytes, one byte per array element, alongside the bytes of the actual array data. This type tag array serves the same function as the type field case: its value signals the type of the actual stored Union value in the array. In terms of layout, a Julia Array can include extra "buffer" space before and after its actual data values, which are tracked in the `a->offset` and `a->maxsize` fields of the `jl_array_t*` type. The "type tag array" is treated exactly as another `jl_array_t*`, but which shares the same `a->offset`, `a->maxsize`, and `a->len` fields. So the formula to access an isbits Union Array's type tag bytes is `a->data + (a->maxsize - a->offset) * a->elsize + a->offset`; i.e. the Array's `a->data` pointer is already shifted by `a->offset`, so correcting for that, we follow the data all the way to the max of what it can hold `a->maxsize`, then adjust by `a->offset` more bytes to account for any present "front buffering" the array might be doing. This layout in particular allows for very efficient resizing operations as the type tag data only ever has to move when the actual array's data has to move.
+Julia can now also store "isbits Union" values inline in a Memory, as opposed to requiring
+an indirection box. The optimization is accomplished by storing an extra "type tag memory"
+of bytes, one byte per element, alongside the bytes of the actual data. This type tag memory
+serves the same function as the type field case: its value signals the type of the actual
+stored Union value. The "type tag memory" directly follows the regular data space. So the
+formula to access an isbits Union Array's type tag bytes is `a->data + a->length *
+a->elsize`.
diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md
index f33b968ad3948..96315c67b659f 100644
--- a/doc/src/devdocs/jit.md
+++ b/doc/src/devdocs/jit.md
@@ -59,20 +59,22 @@ In addition, there are a number of different transitional states that occur duri
 
 1. When writing `invoke`, `specsigflags`, and `specptr`:
       1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write.
-      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written.
+      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written, then restart from step 1 if desired.
       3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write.
       4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`.
       5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set.
 2. When reading all of `invoke`, `specsigflags`, and `specptr`:
-   1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
-   2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
-   3. Read the `specptr` field with at least an acquire memory ordering.
+   1. Read the `specptr` field with any memory ordering.
+   2. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
+   3. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
    5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering.
-   6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`.
+   6. Re-read the `invoke` field with any memory ordering. This load will be referred to as `final_invoke`.
    7. Read the `specsigflags` field with any memory ordering.
    8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3.
 3. When updating a `specptr` to a different but equivalent function pointer:
    1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten.
 
+Correctly reading these fields is implemented in `jl_read_codeinst_invoke`.
+
 Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future.
diff --git a/doc/src/devdocs/llvm-passes.md b/doc/src/devdocs/llvm-passes.md
new file mode 100644
index 0000000000000..7b847abaa2149
--- /dev/null
+++ b/doc/src/devdocs/llvm-passes.md
@@ -0,0 +1,149 @@
+# Custom LLVM Passes
+
+Julia has a number of custom LLVM passes. Broadly, they can be classified into passes that are required to be run to maintain Julia semantics, and passes that take advantage of Julia semantics to optimize LLVM IR.
+
+## Semantic Passes
+
+These passes are used to transform LLVM IR into code that is legal to be run on a CPU. Their main purpose is to enable simpler IR to be emitted by codegen, which then enables other LLVM passes to optimize common patterns.
+
+### CPUFeatures
+
+* Filename: `llvm-cpufeatures.cpp`
+* Class Name: `CPUFeaturesPass`
+* Opt Name: `module(CPUFeatures)`
+
+This pass lowers the `julia.cpu.have_fma.(f32|f64)` intrinsic to either true or false, depending on the target architecture and target features present on the function. This intrinsic is often used to determine if using algorithms dependent on fast [fused multiply-add](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply%E2%80%93add) operations is better than using standard algorithms not dependent on such instructions.
+
+### DemoteFloat16
+
+* Filename: `llvm-demote-float16.cpp`
+* ClassName: `DemoteFloat16Pass`
+* Opt Name `function(DemoteFloat16)`
+
+This pass replaces [float16](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) operations with float32 operations on architectures that do not natively support float16 operations. This is done by inserting `fpext` and `fptrunc` instructions around any float16 operation. On architectures that do support native float16 operations, this pass is a no-op.
+
+### LateGCLowering
+
+* Filename: `llvm-late-gc-lowering.cpp`
+* Class Name: `LateLowerGCPass`
+* Opt Name: `function(LateLowerGCFrame)`
+
+This pass performs most of the GC rooting work required to track pointers between GC safepoints. It also lowers several intrinsics to their corresponding instruction translation, and is permitted to violate the non-integral invariants previously established (`pointer_from_objref` is lowered to a `ptrtoint` instruction here). This pass typically occupies the most time out of all the custom Julia passes, due to its dataflow algorithm to minimize the number of objects live at any safepoint.
+
+### FinalGCLowering
+
+* Filename: `llvm-final-gc-lowering.cpp`
+* Class Name: `FinalLowerGCPass`
+* Opt Name: `module(FinalLowerGC)`
+
+This pass lowers a few last intrinsics to their final form targeting functions in the `libjulia` library. Separating this from `LateGCLowering` enables other backends (GPU compilation) to supply their own custom lowerings for these intrinsics, enabling the Julia pipeline to be used on those backends as well.
+
+### LowerHandlers
+
+* Filename: `llvm-lower-handlers.cpp`
+* Class Name: `LowerExcHandlersPass`
+* Opt Name: `function(LowerExcHandlers)`
+
+This pass lowers exception handling intrinsics into calls to runtime functions that are actually called when handling exceptions.
+
+### RemoveNI
+
+* Filename: `llvm-remove-ni.cpp`
+* Class Name: `RemoveNIPass`
+* Opt Name: `module(RemoveNI)`
+
+This pass removes the non-integral address spaces from the module's datalayout string. This enables the backend to lower Julia's custom address spaces directly to machine code, without a costly rewrite of every pointer operation to address space 0.
+
+### SIMDLoop
+
+* Filename: `llvm-simdloop.cpp`
+* Class Name: `LowerSIMDLoopPass`
+* Opt Name: `loop(LowerSIMDLoop)`
+
+This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
+
+### LowerPTLS
+
+* Filename: `llvm-ptls.cpp`
+* Class Name: `LowerPTLSPass`
+* Opt Name: `module(LowerPTLSPass)`
+
+This pass lowers thread-local Julia intrinsics to assembly instructions. Julia relies on thread-local storage for garbage collection and multithreading task scheduling. When compiling code for system images and package images, this pass replaces calls to intrinsics with loads from global variables that are initialized at load time.
+
+If codegen produces a function with a `swiftself` argument and calling convention, this pass assumes the `swiftself` argument is the pgcstack and will replace the intrinsics with that argument. Doing so provides speedups on architectures that have slow thread local storage accesses.
+
+### RemoveAddrspaces
+
+* Filename: `llvm-remove-addrspaces.cpp`
+* Class Name: `RemoveAddrspacesPass`
+* Opt Name: `module(RemoveAddrspaces)`
+
+This pass renames pointers in one address space to another address space. This is used to remove Julia-specific address spaces from LLVM IR.
+
+### RemoveJuliaAddrspaces
+
+* Filename: `llvm-remove-addrspaces.cpp`
+* Class Name: `RemoveJuliaAddrspacesPass`
+* Opt Name: `module(RemoveJuliaAddrspaces)`
+
+This pass removes Julia-specific address spaces from LLVM IR. It is mostly used for displaying LLVM IR in a less cluttered format. Internally, it is implemented off the RemoveAddrspaces pass.
+
+### Multiversioning
+
+* Filename: `llvm-multiversioning.cpp`
+* Class Name: `MultiVersioningPass`
+* Opt Name: `module(JuliaMultiVersioning)`
+
+This pass performs modifications to a module to create functions that are optimized for running on different architectures (see sysimg.md and pkgimg.md for more details). Implementation-wise, it clones functions and applies different target-specific attributes to them to allow the optimizer to use advanced features such as vectorization and instruction scheduling for that platform. It also creates some infrastructure to enable the Julia image loader to select the appropriate version of the function to call based on the architecture the loader is running on. The target-specific attributes are controlled by the `julia.mv.specs` module flag, which during compilation is derived from the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable. The pass must also be enabled by providing a `julia.mv.enable` module flag with a value of 1.
+
+!!! warning
+
+    Use of `llvmcall` with multiversioning is dangerous. `llvmcall` enables access to features not typically exposed by the Julia APIs, and are therefore usually not available on all architectures. If multiversioning is enabled and code generation is requested for a target architecture that does not support the feature required by an `llvmcall` expression, LLVM will probably error out, likely with an abort and the message `LLVM ERROR: Do not know how to split the result of this operator!`.
+
+### GCInvariantVerifier
+
+* Filename: `llvm-gc-invariant-verifier.cpp`
+* Class Name: `GCInvariantVerifierPass`
+* Opt Name: `module(GCInvariantVerifier)`
+
+This pass is used to verify Julia's invariants about LLVM IR. This includes things such as the nonexistence of `ptrtoint` in Julia's [non-integral address spaces](https://llvm.org/docs/LangRef.html#non-integral-pointer-type) [^nislides] and the existence of only blessed `addrspacecast` instructions (Tracked -> Derived, 0 -> Tracked, etc). It performs no transformations on IR.
+
+[^nislides]: https://llvm.org/devmtg/2015-02/slides/chisnall-pointers-not-int.pdf
+
+## Optimization Passes
+
+These passes are used to perform transformations on LLVM IR that LLVM will not perform itself, e.g. fast math flag propagation, escape analysis, and optimizations on Julia-specific internal functions. They use knowledge about Julia's semantics to perform these optimizations.
+
+### AllocOpt
+
+* Filename: `llvm-alloc-opt.cpp`
+* Class Name: `AllocOptPass`
+* Opt Name: `function(AllocOpt)`
+
+Julia does not have the concept of a program stack as a place to allocate mutable objects. However, allocating objects on the stack reduces GC pressure and is critical for GPU compilation. Thus, `AllocOpt` performs heap to stack conversion of objects that it can prove do not [escape](https://en.wikipedia.org/wiki/Escape_analysis) the current function. It also performs a number of other optimizations on allocations, such as removing allocations that are never used, optimizing typeof calls to freshly allocated objects, and removing stores to allocations that are immediately overwritten. The escape analysis implementation is located in `llvm-alloc-helpers.cpp`. Currently, this pass does not use information from `EscapeAnalysis.jl`, though that may change in the future.
+
+### PropagateJuliaAddrspaces
+
+* Filename: `llvm-propagate-addrspaces.cpp`
+* Class Name: `PropagateJuliaAddrspacesPass`
+* Opt Name: `function(PropagateJuliaAddrspaces)`
+
+This pass is used to propagate Julia-specific address spaces through operations on pointers. LLVM is not allowed to introduce or remove addrspacecast instructions by optimizations, so this pass acts to eliminate redundant addrspace casts by replacing operations with their equivalent in a Julia address space. For more information on Julia's address spaces, see (TODO link to llvm.md).
+
+### JuliaLICM
+
+* Filename: `llvm-julia-licm.cpp`
+* Class Name: `JuliaLICMPass`
+* Opt Name: `loop(JuliaLICM)`
+
+This pass is used to hoist Julia-specific intrinsics out of loops. Specifically, it performs the following transformations:
+1. Hoist `gc_preserve_begin` and sink `gc_preserve_end` out of loops when the preserved objects are loop-invariant.
+   1. Since objects preserved within a loop are likely preserved for the duration of the loop, this transformation can reduce the number of `gc_preserve_begin`/`gc_preserve_end` pairs in the IR. This makes it easier for the `LateLowerGCPass` to identify where particular objects are preserved.
+2. Hoist write barriers with invariant objects
+   1. Here we assume that there are only two generations that an object can be a part of. Given that, a write barrier needs to only execute once for any pair of the same object. Thus, we can hoist write barriers out of loops when the object being written to is loop-invariant.
+3. Hoist allocations out of loops when they do not escape the loop
+   1. We use a very conservative definition of escape here, the same as the one used in `AllocOptPass`. This transformation can reduce the number of allocations in the IR, even when an allocation escapes the function altogether.
+
+!!! note
+
+    This pass is required to preserve LLVM's [MemorySSA](https://llvm.org/docs/MemorySSA.html) ([Short Video](https://www.youtube.com/watch?v=bdxWmryoHak), [Longer Video](https://www.youtube.com/watch?v=1e5y6WDbXCQ)) and [ScalarEvolution](https://baziotis.cs.illinois.edu/compilers/introduction-to-scalar-evolution.html) ([Newer Slides](https://llvm.org/devmtg/2018-04/slides/Absar-ScalarEvolution.pdf) [Older Slides](https://llvm.org/devmtg/2009-10/ScalarEvolutionAndLoopOptimization.pdf)) analyses.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 4e5e90d7cdbc6..2155e5da6fd7b 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -11,13 +11,13 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 
 | File                             | Description                                                        |
 |:-------------------------------- |:------------------------------------------------------------------ |
-| `aotcompile.cpp`                 | Legacy pass manager pipeline, compiler C-interface entry           |
+| `aotcompile.cpp`                 | Compiler C-interface entry and object file emission                |
 | `builtins.c`                     | Builtin functions                                                  |
 | `ccall.cpp`                      | Lowering [`ccall`](@ref)                                           |
 | `cgutils.cpp`                    | Lowering utilities, notably for array and tuple accesses           |
 | `codegen.cpp`                    | Top-level of code generation, pass list, lowering builtins         |
 | `debuginfo.cpp`                  | Tracks debug information for JIT code                              |
-| `disasm.cpp`                     | Handles native object file and JIT code diassembly                 |
+| `disasm.cpp`                     | Handles native object file and JIT code disassembly                |
 | `gf.c`                           | Generic functions                                                  |
 | `intrinsics.cpp`                 | Lowering intrinsics                                                |
 | `jitlayers.cpp`                  | JIT-specific code, ORC compilation layers/utilities                |
@@ -30,7 +30,6 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 | `llvm-julia-licm.cpp`            | Custom LLVM pass to hoist/sink Julia-specific intrinsics           |
 | `llvm-late-gc-lowering.cpp`      | Custom LLVM pass to root GC-tracked values                         |
 | `llvm-lower-handlers.cpp`        | Custom LLVM pass to lower try-catch blocks                         |
-| `llvm-muladd.cpp`                | Custom LLVM pass for fast-match FMA                                |
 | `llvm-multiversioning.cpp`       | Custom LLVM pass to generate sysimg code on multiple architectures |
 | `llvm-propagate-addrspaces.cpp`  | Custom LLVM pass to canonicalize addrspaces                        |
 | `llvm-ptls.cpp`                  | Custom LLVM pass to lower TLS operations                           |
@@ -43,7 +42,7 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 Some of the `.cpp` files form a group that compile to a single object.
 
 The difference between an intrinsic and a builtin is that a builtin is a first class function
-that can be used like any other Julia function.  An intrinsic can operate only on unboxed data,
+that can be used like any other Julia function. An intrinsic can operate only on unboxed data,
 and therefore its arguments must be statically typed.
 
 ### [Alias Analysis](@id LLVM-Alias-Analysis)
@@ -75,7 +74,7 @@ implies that option by default.
 
 ## Passing options to LLVM
 
-You can pass options to LLVM via the environment variable `JULIA_LLVM_ARGS`.
+You can pass options to LLVM via the environment variable [`JULIA_LLVM_ARGS`](@ref JULIA_LLVM_ARGS).
 Here are example settings using `bash` syntax:
 
   * `export JULIA_LLVM_ARGS=-print-after-all` dumps IR after each pass.
@@ -120,7 +119,14 @@ Here are example settings using `bash` syntax:
 On occasion, it can be useful to debug LLVM's transformations in isolation from
 the rest of the Julia system, e.g. because reproducing the issue inside `julia`
 would take too long, or because one wants to take advantage of LLVM's tooling
-(e.g. bugpoint). To get unoptimized IR for the entire system image, pass the
+(e.g. bugpoint).
+
+To start with, you can install the developer tools to work with LLVM via:
+```
+make -C deps install-llvm-tools
+```
+
+To get unoptimized IR for the entire system image, pass the
 `--output-unopt-bc unopt.bc` option to the system image build process, which will
 output the unoptimized IR to an `unopt.bc` file. This file can then be passed to
 LLVM tools as usual. `libjulia` can function as an LLVM pass plugin and can be
@@ -129,15 +135,15 @@ environment. In addition, it exposes the `-julia` meta-pass, which runs the
 entire Julia pass-pipeline over the IR. As an example, to generate a system
 image with the old pass manager, one could do:
 ```
-opt -enable-new-pm=0 -load libjulia-codegen.so -julia -o opt.bc unopt.bc
+
 llc -o sys.o opt.bc
 cc -shared -o sys.so sys.o
 ```
 To generate a system image with the new pass manager, one could do:
 ```
-opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -o opt.bc unopt.bc
-llc -o sys.o opt.bc
-cc -shared -o sys.so sys.o
+./usr/tools/opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -o opt.bc unopt.bc
+./usr/tools/llc -o sys.o opt.bc
+./usr/tools/cc -shared -o sys.so sys.o
 ```
 This system image can then be loaded by `julia` as usual.
 
@@ -147,11 +153,29 @@ using:
 fun, T = +, Tuple{Int,Int} # Substitute your function of interest here
 optimize = false
 open("plus.ll", "w") do file
-    println(file, InteractiveUtils._dump_function(fun, T, false, false, false, true, :att, optimize, :default))
+    code_llvm(file, fun, T; raw=true, dump_module=true, optimize)
 end
 ```
 These files can be processed the same way as the unoptimized sysimg IR shown
-above.
+above, or if you want to see the LLVM IR yourself and get extra verification run, you can use
+```
+./usr/tools/opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -S -verify-each plus.ll
+```
+(note on MacOS this would be `libjulia-codegen.dylib` and on Windows `libjulia-codegen.dll`)
+
+## Running the LLVM test suite
+
+To run the llvm tests locally, you need to first install the tools, build julia, then you
+can run the tests:
+```
+make -C deps install-llvm-tools
+make -j julia-src-release
+make -C test/llvmpasses
+```
+
+If you want to run the individual test files directly, via the commands at the top of each
+test file, the first step here will have installed the tools into `./usr/tools/opt`. Then
+you'll want to manually replace `%s` with the name of the test file.
 
 ## Improving LLVM optimizations for Julia
 
@@ -167,7 +191,7 @@ study it and the pass of interest in isolation.
 3. Pick out the IR at the point just before the pass of interest runs.
 4. Strip the debug metadata and fix up the TBAA metadata by hand.
 
-The last step is labor intensive.  Suggestions on a better way would be appreciated.
+The last step is labor intensive. Suggestions on a better way would be appreciated.
 
 ## The jlcall calling convention
 
@@ -320,8 +344,8 @@ ccall(:foo, Cvoid, (Ptr{Float64},), A)
 In lowering, the compiler will insert a conversion from the array to the
 pointer which drops the reference to the array value. However, we of course
 need to make sure that the array does stay alive while we're doing the
-[`ccall`](@ref). To understand how this is done, first recall the lowering of the
-above code:
+[`ccall`](@ref). To understand how this is done, lets look at a hypothetical
+approximate possible lowering of the above code:
 ```julia
 return $(Expr(:foreigncall, :(:foo), Cvoid, svec(Ptr{Float64}), 0, :(:ccall), Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(A)), :(A)))
 ```
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index bef1419b1c8f8..8d6672842c3c8 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -30,6 +30,7 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
 >   * jl_in_stackwalk (Win32)
 >   * ResourcePool<?>::mutex
 >   * RLST_mutex
+>   * llvm_printing_mutex
 >   * jl_locked_stream::mutex
 >   * debuginfo_asyncsafe
 >   * inference_timing_mutex
@@ -40,7 +41,7 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
 
 The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
 
->   * typecache
+>   * global_roots_lock
 >   * Module->lock
 >   * JLDebuginfoPlugin::PluginMutex
 >   * newly_inferred_mutex
@@ -48,6 +49,7 @@ The following is a leaf lock (level 2), and only acquires level 1 locks (safepoi
 The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:
 
 >   * Method->writelock
+>   * typecache
 
 The following is a level 4 lock, which can only recurse to acquire level 1, 2, or 3 locks:
 
@@ -69,19 +71,8 @@ The following is a level 5 lock
 
 The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
 
->   * codegen
 >   * jl_modules_mutex
 
-The following is an almost root lock (level end-1), meaning only the root look may be held when
-trying to acquire it:
-
->   * typeinf
->
->     > this one is perhaps one of the most tricky ones, since type-inference can be invoked from many
->     > points
->     >
->     > currently the lock is merged with the codegen lock, since they call each other recursively
-
 The following lock synchronizes IO operation. Be aware that doing any I/O (for example,
 printing warning messages or debug information) while holding any other lock listed above
 may result in pernicious and hard-to-find deadlocks. BE VERY CAREFUL!
@@ -91,6 +82,8 @@ may result in pernicious and hard-to-find deadlocks. BE VERY CAREFUL!
 >
 >     > this may continue to be held after releasing the iolock, or acquired without it,
 >     > but be very careful to never attempt to acquire the iolock while holding it
+>
+>   * Libdl.LazyLibrary lock
 
 
 The following is the root lock, meaning no other lock shall be held when trying to acquire it:
@@ -145,33 +138,17 @@ Module serializer : toplevel lock
 
 JIT & type-inference : codegen lock
 
-MethodInstance/CodeInstance updates : Method->writelock, codegen lock
+MethodInstance/CodeInstance updates : Method->writelock
 
 >   * These are set at construction and immutable:
 >       * specTypes
 >       * sparam_vals
 >       * def
-
->   * These are set by `jl_type_infer` (while holding codegen lock):
->       * cache
->       * rettype
->       * inferred
-        * valid ages
-
->   * `inInference` flag:
->       * optimization to quickly avoid recurring into `jl_type_infer` while it is already running
->       * actual state (of setting `inferred`, then `fptr`) is protected by codegen lock
+>       * owner
 
 >   * Function pointers:
->       * these transition once, from `NULL` to a value, while the codegen lock is held
->
->   * Code-generator cache (the contents of `functionObjectsDecls`):
->       * these can transition multiple times, but only while the codegen lock is held
->       * it is valid to use old version of this, or block for new versions of this, so races are benign,
->         as long as the code is careful not to reference other data in the method instance (such as `rettype`)
->         and assume it is coordinated, unless also holding the codegen lock
+>       * these transition once, from `NULL` to a value, which is coordinated internal to the JIT
 >
-LLVMContext : codegen lock
 
 Method : Method->writelock
 
diff --git a/doc/src/devdocs/meta.md b/doc/src/devdocs/meta.md
index 7a58578b3e53e..7b37ceaad068d 100644
--- a/doc/src/devdocs/meta.md
+++ b/doc/src/devdocs/meta.md
@@ -2,7 +2,7 @@
 
 In some circumstances, one might wish to provide hints or instructions that a given block of code
 has special properties: you might always want to inline it, or you might want to turn on special
-compiler optimization passes.  Starting with version 0.4, Julia has a convention that these instructions
+compiler optimization passes. Starting with version 0.4, Julia has a convention that these instructions
 can be placed inside a `:meta` expression, which is typically (but not necessarily) the first
 expression in the body of a function.
 
@@ -34,9 +34,8 @@ quote
 end
 ```
 
-`Base.pushmeta!(ex, :symbol, args...)` appends `:symbol` to the end of the `:meta` expression,
-creating a new `:meta` expression if necessary. If `args` is specified, a nested expression containing
-`:symbol` and these arguments is appended instead, which can be used to specify additional information.
+`Base.pushmeta!(ex, tag::Union{Symbol,Expr})` appends `:tag` to the end of the `:meta` expression,
+creating a new `:meta` expression if necessary.
 
 To use the metadata, you have to parse these `:meta` expressions. If your implementation can be
 performed within Julia, `Base.popmeta!` is very handy: `Base.popmeta!(body, :symbol)` will scan
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index caba6c3f12190..8134132d6ee75 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -92,7 +92,7 @@ The corresponding global `jl_datatype_t` objects are created by [`jl_init_types`
 
 The garbage collector uses several bits from the metadata portion of the `jl_typetag_t` to track
 each object in the system. Further details about this algorithm can be found in the comments of
-the [garbage collector implementation in `gc.c`](https://github.com/JuliaLang/julia/blob/master/src/gc.c).
+the [garbage collector implementation in `gc-stock.c`](https://github.com/JuliaLang/julia/blob/master/src/gc-stock.c).
 
 ## Object allocation
 
@@ -163,11 +163,8 @@ Arrays:
 
 ```c
 jl_array_t *jl_new_array(jl_value_t *atype, jl_tuple_t *dims);
-jl_array_t *jl_new_arrayv(jl_value_t *atype, ...);
 jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
-jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
-jl_array_t *jl_alloc_vec_any(size_t n);
+jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 ```
 
 Note that many of these have alternative allocation functions for various special-purposes. The
@@ -182,7 +179,7 @@ jl_value_t *newstruct(jl_value_t *type);
 jl_value_t *newobj(jl_value_t *type, size_t nfields);
 ```
 
-And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc.c`),
+And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc-stock.c`),
 then tagged with its type:
 
 ```c
diff --git a/doc/src/devdocs/offset-arrays.md b/doc/src/devdocs/offset-arrays.md
index cc647eb1bd464..9a234288c6097 100644
--- a/doc/src/devdocs/offset-arrays.md
+++ b/doc/src/devdocs/offset-arrays.md
@@ -2,7 +2,7 @@
 
 Conventionally, Julia's
 arrays are indexed starting at 1, whereas some other languages start numbering at 0, and yet others
-(e.g., Fortran) allow you to specify arbitrary starting indices.  While there is much merit in
+(e.g., Fortran) allow you to specify arbitrary starting indices. While there is much merit in
 picking a standard (i.e., 1 for Julia), there are some algorithms which simplify considerably
 if you can index outside the range `1:size(A,d)` (and not just `0:size(A,d)-1`, either).
 To facilitate such computations, Julia supports arrays with arbitrary indices.
@@ -57,8 +57,8 @@ the cause try running julia with the option `--check-bounds=yes`.)
 ### Using `axes` for bounds checks and loop iteration
 
 `axes(A)` (reminiscent of `size(A)`) returns a tuple of `AbstractUnitRange{<:Integer}` objects, specifying
-the range of valid indices along each dimension of `A`.  When `A` has unconventional indexing,
-the ranges may not start at 1.  If you just want the range for a particular dimension `d`, there
+the range of valid indices along each dimension of `A`. When `A` has unconventional indexing,
+the ranges may not start at 1. If you just want the range for a particular dimension `d`, there
 is `axes(A, d)`.
 
 Base implements a custom range type, `OneTo`, where `OneTo(n)` means the same thing as `1:n` but
@@ -102,7 +102,7 @@ a convenient way of producing an all-zeros array that matches the indices of A i
 
 Let's walk through a couple of explicit examples. First, if `A` has conventional indices, then
 `similar(Array{Int}, axes(A))` would end up calling `Array{Int}(undef, size(A))`, and thus return
-an array.  If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
+an array. If `A` is an `AbstractArray` type with unconventional indexing, then `similar(Array{Int}, axes(A))`
 should return something that "behaves like" an `Array{Int}` but with a shape (including indices)
 that matches `A`.  (The most obvious implementation is to allocate an `Array{Int}(undef, size(A))` and
 then "wrap" it in a type that shifts the indices.)
@@ -118,7 +118,7 @@ This page focuses on the steps needed to define unconventional indexing.
 ### Custom `AbstractUnitRange` types
 
 If you're writing a non-1 indexed array type, you will want to specialize `axes` so it returns
-a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`.  The advantage of a custom type
+a `UnitRange`, or (perhaps better) a custom `AbstractUnitRange`. The advantage of a custom type
 is that it "signals" the allocation type for functions like `similar`. If we're writing an array
 type for which indexing will start at 0, we likely want to begin by creating a new `AbstractUnitRange`,
 `ZeroRange`, where `ZeroRange(n)` is equivalent to `0:n-1`.
@@ -150,7 +150,7 @@ axes(A::AbstractArray{T,N}, d) where {T,N} = d <= N ? axes(A)[d] : OneTo(1)
 ```
 
 may not be what you want: you may need to specialize it to return something other than `OneTo(1)`
-when `d > ndims(A)`.  Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
+when `d > ndims(A)`. Likewise, in `Base` there is a dedicated function `axes1` which is equivalent
 to `axes(A, 1)` but which avoids checking (at runtime) whether `ndims(A) > 0`. (This is purely
 a performance optimization.)  It is defined as:
 
diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md
index d9fc1a33a4d24..64f4e640b7c19 100644
--- a/doc/src/devdocs/pkgimg.md
+++ b/doc/src/devdocs/pkgimg.md
@@ -9,7 +9,7 @@ In fact the underlying serialization format is the same, and the system image is
 Package images are shared libraries that contain both code and data. Like `.ji` cache files, they are generated per package. The data section contains both global data (global variables in the package) as well as the necessary metadata about what methods and types are defined by the package. The code section contains native objects that cache the final output of Julia's LLVM-based compiler.
 
 The command line option `--pkgimages=no` can be used to turn off object caching for this session. Note that this means that cache files have to likely be regenerated.
-See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref env-max-num-precompile-files) for the upper limit of variants Julia caches per default.
+See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref JULIA_MAX_NUM_PRECOMPILE_FILES) for the upper limit of variants Julia caches per default.
 
 !!! note
     While the package images present themselves as native shared libraries, they are only an approximation thereof. You will not be able to link against them from a native program and they must be loaded from Julia.
@@ -17,7 +17,7 @@ See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref env-max-num-precompile-files) for th
 
 ## Linking
 
-Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable `JULIA_VERBOSE_LINKING` to `true` to make the package image linking process verbose.
+Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable [`JULIA_VERBOSE_LINKING`](@ref JULIA_VERBOSE_LINKING) to `true` to make the package image linking process verbose.
 
 Furthermore, we cannot assume that the user has a working system linker installed. Therefore, Julia ships with LLD, the LLVM linker, to provide a working out of the box experience. In `base/linking.jl`, we implement a limited interface to be able to link package images on all supported platforms.
 
@@ -33,7 +33,7 @@ Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS vers
 To that effect we link with `-undefined dynamic_lookup`.
 
 ## [Package images optimized for multiple microarchitectures](@id pkgimgs-multi-versioning)
-Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogenous environment, with a unified cache,
+Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogeneous environment, with a unified cache,
 you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches.
 
 ## Flags that impact package image creation and selection
diff --git a/doc/src/devdocs/precompile_hang.md b/doc/src/devdocs/precompile_hang.md
new file mode 100644
index 0000000000000..2204651848509
--- /dev/null
+++ b/doc/src/devdocs/precompile_hang.md
@@ -0,0 +1,98 @@
+# Fixing precompilation hangs due to open tasks or IO
+
+On Julia 1.10 or higher, you might see the following message:
+
+![Screenshot of precompilation hang](./img/precompilation_hang.png)
+
+This may repeat. If it continues to repeat with no hints that it will
+resolve itself, you may have a "precompilation hang" that requires
+fixing. Even if it's transient, you might prefer to resolve it so that
+users will not be bothered by this warning. This page walks you
+through how to analyze and fix such issues.
+
+If you follow the advice and hit `Ctrl-C`, you might see
+
+```
+^C Interrupted: Exiting precompilation...
+
+  1 dependency had warnings during precompilation:
+┌ Test1 [ac89d554-e2ba-40bc-bc5c-de68b658c982]
+│  [pid 2745] waiting for IO to finish:
+│   Handle type        uv_handle_t->data
+│   timer              0x55580decd1e0->0x7f94c3a4c340
+```
+
+This message conveys two key pieces of information:
+
+- the hang is occurring during precompilation of `Test1`, a dependency of `Test2` (the package we were trying to load with `using Test2`)
+- during precompilation of `Test1`, Julia created a `Timer` object (use `?Timer` if you're unfamiliar with Timers) which is still open; until that closes, the process is hung
+
+If this is enough of a hint for you to figure out how `timer = Timer(args...)` is being created, one good solution is to add `wait(timer)` if `timer` eventually finishes on its own, or `close(timer)` if you need to force-close it, before the final `end` of the module.
+
+However, there are cases that may not be that straightforward. Usually the best option is to start by determining whether the hang is due to code in Test1 or whether it is due to one of Test1's dependencies:
+
+- Option 1: `Pkg.add("Aqua")` and use [`Aqua.test_persistent_tasks`](https://juliatesting.github.io/Aqua.jl/dev/#Aqua.test_persistent_tasks-Tuple{Base.PkgId}). This should help you identify which package is causing the problem, after which the instructions [below](@ref pchang_fix) should be followed. If needed, you can create a `PkgId` as `Base.PkgId(UUID("..."), "Test1")`, where `...` comes from the `uuid` entry in `Test1/Project.toml`.
+- Option 2: manually diagnose the source of the hang.
+
+To manually diagnose:
+
+1. `Pkg.develop("Test1")`
+2. Comment out all the code `include`d or defined in `Test1`, *except* the `using/import` statements.
+3. Try `using Test2` (or even `using Test1` assuming that hangs too) again
+
+Now we arrive at a fork in the road: either
+
+- the hang persists, indicating it is [due to one of your dependencies](@ref pchang_deps)
+- the hang disappears, indicating that it is [due to something in your code](@ref pchang_fix).
+
+## [Diagnosing and fixing hangs due to a package dependency](@id pchang_deps)
+
+Use a binary search to identify the problematic dependency: start by commenting out half your dependencies, then when you isolate which half is responsible comment out half of that half, etc. (You don't have to remove them from the project, just comment out the `using`/`import` statements.)
+
+Once you've identified a suspect (here we'll call it `ThePackageYouThinkIsCausingTheProblem`), first try precompiling that package. If it also hangs during precompilation, continue chasing the problem backwards.
+
+However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`. To test this theory, set up a minimal working example (MWE), something like
+
+```julia
+(@v1.10) pkg> generate MWE
+  Generating  project MWE:
+    MWE\Project.toml
+    MWE\src\MWE.jl
+```
+
+where the source code of `MWE.jl` is
+
+```julia
+module MWE
+using ThePackageYouThinkIsCausingTheProblem
+end
+```
+
+and you've added `ThePackageYouThinkIsCausingTheProblem` to MWE's dependencies.
+
+If that MWE reproduces the hang, you've found your culprit:
+`ThePackageYouThinkIsCausingTheProblem.__init__` must be creating the `Timer` object. If the timer object can be safely `close`d, that's a good option. Otherwise, the most common solution is to avoid creating the timer while *any* package is being precompiled: add
+
+```julia
+ccall(:jl_generating_output, Cint, ()) == 1 && return nothing
+```
+
+as the first line of `ThePackageYouThinkIsCausingTheProblem.__init__`, and it will avoid doing any initialization in any Julia process whose purpose is to precompile packages.
+
+## [Fixing package code to avoid hangs](@id pchang_fix)
+
+Search your package for suggestive words (here like "Timer") and see if you can identify where the problem is being created. Note that a method *definition* like
+
+```julia
+maketimer() = Timer(timer -> println("hi"), 0; interval=1)
+```
+
+is not problematic in and of itself: it can cause this problem only if `maketimer` gets called while the module is being defined. This might be happening from a top-level statement such as
+
+```julia
+const GLOBAL_TIMER = maketimer()
+```
+
+or it might conceivably occur in a [precompile workload](https://github.com/JuliaLang/PrecompileTools.jl).
+
+If you struggle to identify the causative lines, then consider doing a binary search: comment out sections of your package (or `include` lines to omit entire files) until you've reduced the problem in scope.
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index d15723e945462..a0e072c0b1ae3 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -137,8 +137,8 @@ fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
 beaver = @spawn begin
     while true
         fib(30)
-        # This safepoint is necessary until #41616, since otherwise this
-        # loop will never yield to GC.
+        # A manual safepoint is necessary since otherwise this loop
+        # may never yield to GC.
         GC.safepoint()
     end
 end
@@ -188,7 +188,7 @@ Julia session and get the PID and REPL's task address:
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
@@ -206,7 +206,7 @@ Now we can start `bpftrace` and have it monitor `rt__new__task` for *only* this
 
 And if we spawn a single task:
 
-`@async 1+1`
+`Threads.@spawn 1+1`
 
 we see this task being created:
 
@@ -215,8 +215,8 @@ we see this task being created:
 However, if we spawn a bunch of tasks from that newly-spawned task:
 
 ```julia
-@async for i in 1:10
-   @async 1+1
+Threads.@spawn for i in 1:10
+   Threads.@spawn 1+1
 end
 ```
 
@@ -264,7 +264,7 @@ We can see this problem illustrated with `bpftrace` quite easily. First, in one
    _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
   | | | | | | |/ _` |  |
   | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
- _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org release
 |__/                   |
 
 1> getpid()
diff --git a/doc/src/devdocs/require.md b/doc/src/devdocs/require.md
index 5198a7425ee49..9f824e78a8653 100644
--- a/doc/src/devdocs/require.md
+++ b/doc/src/devdocs/require.md
@@ -7,26 +7,22 @@ precompilation cache. It is the implementation of the `import` statement.
 The features below are experimental and not part of the stable Julia API.
 Before building upon them inform yourself about the current thinking and whether they might change soon.
 
-### Module loading callbacks
+### Package loading callbacks
 
-It is possible to listen to the modules loaded by `Base.require`, by registering a callback.
+It is possible to listen to the packages loaded by `Base.require`, by registering a callback.
 
 ```julia
-loaded_packages = Channel{Symbol}()
-callback = (mod::Symbol) -> put!(loaded_packages, mod)
+loaded_packages = Base.PkgId[]
+callback = (pkg::Base.PkgId) -> push!(loaded_packages, pkg)
 push!(Base.package_callbacks, callback)
 ```
 
-Please note that the symbol given to the callback is a non-unique identifier and
-it is the responsibility of the callback provider to walk the module chain to
-determine the fully qualified name of the loaded binding.
+Using this would look something like:
 
-The callback below is an example of how to do that:
+```julia-repl
+julia> using Example
 
-```julia
-# Get the fully-qualified name of a module.
-function module_fqn(name::Symbol)
-    fqn = fullname(Base.root_module(name))
-    return join(fqn, '.')
-end
+julia> loaded_packages
+1-element Vector{Base.PkgId}:
+ Example [7876af07-990d-54b4-ab0e-23690620f79a]
 ```
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
index 6d3de6d1f5758..2eb065a62e4bf 100644
--- a/doc/src/devdocs/ssair.md
+++ b/doc/src/devdocs/ssair.md
@@ -1,5 +1,53 @@
 # Julia SSA-form IR
 
+Julia uses a static single assignment intermediate representation ([SSA IR](https://en.wikipedia.org/wiki/Static_single-assignment_form)) to perform optimization.
+This IR is different from LLVM IR, and unique to Julia.
+It allows for Julia specific optimizations.
+
+1. Basic blocks (regions with no control flow) are explicitly annotated.
+2. if/else and loops are turned into `goto` statements.
+3. lines with multiple operations are split into multiple lines by introducing variables.
+
+For example the following Julia code:
+```julia
+function foo(x)
+    y = sin(x)
+    if x > 5.0
+        y = y + cos(x)
+    end
+    return exp(2) + y
+end
+```
+when called with a `Float64` argument is translated into:
+
+```julia
+using InteractiveUtils
+@code_typed foo(1.0)
+```
+
+```llvm
+CodeInfo(
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+2 ─ %4 = invoke Main.cos(x::Float64)::Float64
+└── %5 = Base.add_float(%1, %4)::Float64
+3 ┄ %6 = φ (#2 => %5, #1 => %1)::Float64
+│   %7 = Base.add_float(7.38905609893065, %6)::Float64
+└──      return %7
+) => Float64
+```
+
+In this example, we can see all of these changes.
+1. The first basic block is everything in
+```llvm
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+```
+2. The `if` statement is translated into `goto #3 if not %2` which goes to the 3rd basic block if `x>5` isn't met and otherwise goes to the second basic block.
+3. `%2` is an SSA value introduced to represent `x > 5`.
+
 ## Background
 
 Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
@@ -11,11 +59,9 @@ linearized (i.e. turned into a form where function arguments could only be SSA v
 conditional control flow). This negated much of the usefulness of SSA form representation when performing
 middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
 form representation, but the lack of such a representation ultimately proved prohibitive.
+## Categories of IR nodes
 
-## New IR nodes
-
-With the new IR representation, the compiler learned to handle four new IR nodes, Phi nodes, Pi
-nodes as well as PhiC nodes and Upsilon nodes (the latter two are only used for exception handling).
+The SSA IR representation has four categories of IR nodes: Phi, Pi, PhiC, and Upsilon nodes (the latter two are only used for exception handling).
 
 ### Phi nodes and Pi nodes
 
@@ -37,6 +83,15 @@ may assume that any use of a Phi node will have an assigned value in the corresp
 for the mapping to be incomplete, i.e. for a Phi node to have missing incoming edges. In that case, it must
 be dynamically guaranteed that the corresponding value will not be used.
 
+Note that SSA uses semantically occur after the terminator of the corresponding predecessor ("on the edge").
+Consequently, if multiple Phi nodes appear at the start of a basic block, they are run simultaneously.
+This means that in the following IR snippet, if we came from block `23`, `%46` will take the value associated to
+`%45` _before_ we entered this block.
+```julia
+%45 = φ (#18 => %23, #23 => %50)
+%46 = φ (#18 => 1.0, #23 => %45)
+```
+
 PiNodes encode statically proven information that may be implicitly assumed in basic blocks dominated by a given
 pi node. They are conceptually equivalent to the technique introduced in the paper
 [ABCD: Eliminating Array Bounds Checks on Demand](https://dl.acm.org/citation.cfm?id=358438.349342) or the predicate info nodes in LLVM. To see how they work, consider,
@@ -144,7 +199,7 @@ The corresponding IR (with irrelevant types stripped) is:
 4 ┄ %13 = φᶜ (%3, %6, %9)::Bool
 │   %14 = φᶜ (%4, %7, %10)::Core.Compiler.MaybeUndef(Int64)
 │   %15 = φᶜ (%5)::Core.Const(1)
-└──       $(Expr(:leave, 1))
+└──       $(Expr(:leave, Core.SSAValue(2)))
 5 ─       $(Expr(:pop_exception, :(%2)))::Any
 │         $(Expr(:throw_undef_if_not, :y, :(%13)))::Any
 │   %19 = Core.tuple(%15, %14)
@@ -179,7 +234,7 @@ Instead, we do the following:
 - RAUW style operations are performed by setting the corresponding statement index to the replacement
   value.
 - Statements are erased by setting the corresponding statement to `nothing` (this is essentially just a special-case
-  convention of the above.
+  convention of the above).
 - If there are any uses of the statement being erased, they will be set to `nothing`.
 
 There is a `compact!` function that compacts the above data structure by performing the insertion of nodes in the appropriate place, trivial copy propagation, and renaming of uses to any changed SSA values. However, the clever part
diff --git a/doc/src/devdocs/stdio.md b/doc/src/devdocs/stdio.md
index 5ee4f0206ee0b..352420e25de77 100644
--- a/doc/src/devdocs/stdio.md
+++ b/doc/src/devdocs/stdio.md
@@ -36,7 +36,7 @@ Julia's `__init__()` function (in `base/sysimg.jl`) calls `reinit_stdio()` (in `
 to create Julia objects for [`Base.stdin`](@ref), [`Base.stdout`](@ref) and [`Base.stderr`](@ref).
 
 `reinit_stdio()` uses [`ccall`](@ref) to retrieve pointers to `JL_STD*` and calls `jl_uv_handle_type()`
-to inspect the type of each stream.  It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
+to inspect the type of each stream. It then creates a Julia `Base.IOStream`, `Base.TTY` or `Base.PipeEndpoint`
 object to represent each stream, e.g.:
 
 ```
@@ -63,7 +63,7 @@ stream.jl: function write(s::IO, p::Ptr, nb::Integer)
 ## printf() during initialization
 
 The libuv streams relied upon by `jl_printf()` etc., are not available until midway through
-initialization of the runtime (see `init.c`, `init_stdio()`).  Error messages or warnings that
+initialization of the runtime (see `init.c`, `init_stdio()`). Error messages or warnings that
 need to be printed before this are routed to the standard C library `fwrite()` function by the
 following mechanism:
 
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index cec7a64a65245..75b76bcb563a1 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -1,6 +1,6 @@
 # SubArrays
 
-Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref).  This page
+Julia's `SubArray` type is a container encoding a "view" of a parent [`AbstractArray`](@ref). This page
 documents some of the design principles and implementation of `SubArray`s.
 
 One of the major design goals is to ensure high performance for views of both [`IndexLinear`](@ref) and
@@ -56,8 +56,8 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
 end
 ```
 
-`SubArray` has 5 type parameters.  The first two are the standard element type and dimensionality.
- The next is the type of the parent `AbstractArray`.  The most heavily-used is the fourth parameter,
+`SubArray` has 5 type parameters. The first two are the standard element type and dimensionality.
+ The next is the type of the parent `AbstractArray`. The most heavily-used is the fourth parameter,
 a `Tuple` of the types of the indices for each dimension. The final one, `L`, is only provided
 as a convenience for dispatch; it's a boolean that represents whether the index types support
 fast linear indexing. More on that later.
@@ -78,8 +78,8 @@ one to dispatch to efficient algorithms.
 ### Index translation
 
 Performing index translation requires that you do different things for different concrete `SubArray`
-types.  For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
-of the parent array, whereas for `S2` one needs to apply them to the second and third.  The simplest
+types. For example, for `S1`, one needs to apply the `i,j` indices to the first and third dimensions
+of the parent array, whereas for `S2` one needs to apply them to the second and third. The simplest
 approach to indexing would be to do the type-analysis at runtime:
 
 ```julia
@@ -161,7 +161,7 @@ julia> diff(A[2:2:4,:][:])
 ```
 
 A view constructed as `view(A, 2:2:4, :)` happens to have uniform stride, and therefore linear
-indexing indeed could be performed efficiently.  However, success in this case depends on the
+indexing indeed could be performed efficiently. However, success in this case depends on the
 size of the array: if the first dimension instead were odd,
 
 ```jldoctest
@@ -192,7 +192,7 @@ then `A[2:2:4,:]` does not have uniform stride, so we cannot guarantee efficient
     levels of indirection; they can simply re-compute the indices into the original parent array!
   * Hopefully by now it's fairly clear that supporting slices means that the dimensionality, given
     by the parameter `N`, is not necessarily equal to the dimensionality of the parent array or the
-    length of the `indices` tuple.  Neither do user-supplied indices necessarily line up with entries
+    length of the `indices` tuple. Neither do user-supplied indices necessarily line up with entries
     in the `indices` tuple (e.g., the second user-supplied index might correspond to the third dimension
     of the parent array, and the third element in the `indices` tuple).
 
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 40fcd3fa602f8..2cbba2744d4a1 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -3,15 +3,15 @@
 ## [Building the Julia system image](@id Building-the-Julia-system-image)
 
 Julia ships with a preparsed system image containing the contents of the `Base` module, named
-`sys.ji`.  This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
-as many platforms as possible, so as to give vastly improved startup times.  On systems that do
+`sys.ji`. This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
+as many platforms as possible, so as to give vastly improved startup times. On systems that do
 not ship with a precompiled system image file, one can be generated from the source files shipped
 in Julia's `DATAROOTDIR/julia/base` folder.
 
 Julia will by default generate its system image on half of the available system threads. This
-may be controlled by the [`JULIA_IMAGE_THREADS`](@ref env-image-threads) environment variable.
+may be controlled by the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable.
 
-This operation is useful for multiple reasons.  A user may:
+This operation is useful for multiple reasons. A user may:
 
   * Build a precompiled shared library system image on a platform that did not ship with one, thereby
     improving startup times.
@@ -34,7 +34,7 @@ based on available CPU features.
 ### Specifying multiple system image targets
 
 A multi-microarchitecture system image can be enabled by passing multiple targets
-during system image compilation. This can be done either with the `JULIA_CPU_TARGET` make option
+during system image compilation. This can be done either with the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) make option
 or with the `-C` command line option when running the compilation command manually.
 Multiple targets are separated by `;` in the option string.
 The syntax for each target is a CPU name followed by multiple features separated by `,`.
@@ -84,11 +84,11 @@ generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)
 This creates a system image with three separate targets; one for a generic `x86_64`
 processor, one with a `sandybridge` ISA (explicitly excluding `xsaveopt`) that explicitly
 clones all functions, and one targeting the `haswell` ISA, based off of the `sandybridge`
-sysimg version, and also excluding `rdrnd`.  When a Julia implementation loads the
+sysimg version, and also excluding `rdrnd`. When a Julia implementation loads the
 generated sysimg, it will check the host processor for matching CPU capability flags,
-enabling the highest ISA level possible.  Note that the base level (`generic`) requires
+enabling the highest ISA level possible. Note that the base level (`generic`) requires
 the `cx16` instruction, which is disabled in some virtualization software and must be
-enabled for the `generic` target to be loaded.  Alternatively, a sysimg could be generated
+enabled for the `generic` target to be loaded. Alternatively, a sysimg could be generated
 with the target `generic,-cx16` for greater compatibility, however note that this may cause
 performance and stability problems in some code.
 
@@ -117,3 +117,82 @@ See code comments for each components for more implementation details.
     depending on the ISA. The target selection will prefer exact CPU name match,
     larger vector register size, and larger number of features.
     An overview of this process is in `src/processor.cpp`.
+
+## Trimming
+
+System images are typically quite large, since Base includes a lot of functionality, and by
+default system images also include several packages such as LinearAlgebra for convenience
+and backwards compatibility. Most programs will use only a fraction of the functions in
+these packages. Therefore it makes sense to build binaries that exclude unused functions
+to save space, referred to as "trimming".
+
+While the basic idea of trimming is sound, Julia has dynamic and reflective features that make it
+difficult (or impossible) to know in general which functions are unused. As an extreme example,
+consider code like
+
+```
+getglobal(Base, Symbol(readchomp(stdin)))(1)
+```
+
+This code reads a function name from `stdin` and calls the named function from Base on the value
+`1`. In this case it is impossible to predict which function will be called, so no functions
+can reliably be considered "unused". With some noteworthy exceptions (Julia's own REPL being
+one of them), most real-world programs do not do things like this.
+
+Less extreme cases occur, for example, when there are type instabilities that make it impossible
+for the compiler to predict which method will be called. However, if code is well-typed and does
+not use reflection, a complete and (hopefully) relatively small set of needed methods can be
+determined, and the rest can be removed. The `--trim` command-line option requests this kind of
+compilation.
+
+When `--trim` is specified in a command used to build a system image, the compiler begins
+tracing calls starting at methods marked using `Base.Experimental.entrypoint`. If a call is too
+dynamic to reasonably narrow down the possible call targets, an error is given at compile
+time showing the location of the call. For testing purposes, it is possible to skip these
+errors by specifying `--trim=unsafe` or `--trim=unsafe-warn`. Then you will get a system
+image built, but it may crash at run time if needed code is not present.
+
+It typically makes sense to specify `--strip-ir` along with `--trim`, since trimmed binaries
+are fully compiled and therefore don't need Julia IR. At some point we may make `--trim` imply
+`--strip-ir`, but for now we have kept them orthogonal.
+
+To get the smallest possible binary, it will also help to specify `--strip-metadata` and
+run the Unix `strip` utility. However, those steps remove Julia-specific and native (DWARF format)
+debug info, respectively, and so will make debugging more difficult.
+
+### Common problems
+
+- The Base global variables `stdin`, `stdout`, and `stderr` are non-constant and so their
+  types are not known. All printing should use a specific IO object with a known type.
+  The easiest substitution is to use `print(Core.stdout, x)` instead of `print(x)` or
+  `print(stdout, x)`.
+- Use tools like [JET.jl](https://github.com/aviatesk/JET.jl),
+  [Cthulhu.jl](https://github.com/JuliaDebug/Cthulhu.jl), and/or
+  [SnoopCompile](https://github.com/timholy/SnoopCompile.jl)
+  to identify failures of type-inference, and follow our [Performance Tips](@ref) to fix them.
+
+### Compatibility concerns
+
+We have identified many small changes to Base that significantly increase the set of programs
+that can be reliably trimmed. Unfortunately some of those changes would be considered breaking,
+and so are only applied when trimming is requested (this is done by an external build script,
+currently maintained inside the test suite as `contrib/juliac-buildscript.jl`).
+Therefore in many cases trimming will require you to opt in to new variants of Base and some
+standard libraries.
+
+If you want to use trimming, it is important to set up continuous integration testing that
+performs a trimmed build and fully tests the resulting program.
+Fortunately, if your program successfully compiles with `--trim` then it is very likely to work
+the same as it did before. However, CI is needed to ensure that your program continues to build
+with trimming as you develop it.
+
+Package authors may wish to test that their package is "trimming safe", however this is impossible
+in general. Trimming is only expected to work given concrete entry points such as `main()` and
+library entry points meant to be called from outside Julia. For generic packages, existing tests
+for type stability like `@inferred` and `JET.@report_call` are about as close as you can get to checking
+trim compatibility.
+
+Trimming also introduces new compatibility issues between minor versions of Julia. At this time,
+we are not able to guarantee that a program that can be trimmed in one version of Julia
+can also be trimmed in all future versions of Julia. However, breakage of that kind is expected
+to be rare. We also plan to try to *increase* the set of programs that can be trimmed over time.
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index c3afc26600c65..a09df61e4881d 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -1,6 +1,6 @@
 # More about types
 
-If you've used Julia for a while, you understand the fundamental role that types play.  Here we
+If you've used Julia for a while, you understand the fundamental role that types play. Here we
 try to get under the hood, focusing particularly on [Parametric Types](@ref).
 
 ## Types and sets (and `Any` and `Union{}`/`Bottom`)
@@ -52,7 +52,7 @@ julia> typejoin(Tuple{Integer, Float64}, Tuple{Int, Real})
 Tuple{Integer, Real}
 ```
 
-While these operations may seem abstract, they lie at the heart of Julia.  For example, method
+While these operations may seem abstract, they lie at the heart of Julia. For example, method
 dispatch is implemented by stepping through the items in a method list until reaching one for which
 the type of the argument tuple is a subtype of the method signature.
 For this algorithm to work, it's important that methods be sorted by their specificity, and that the
@@ -93,13 +93,15 @@ UnionAll
   var: TypeVar
     name: Symbol T
     lb: Union{}
-    ub: Any
+    ub: abstract type Any
   body: UnionAll
     var: TypeVar
       name: Symbol N
       lb: Union{}
-      ub: Any
-    body: Array{T, N} <: DenseArray{T, N}
+      ub: abstract type Any
+    body: mutable struct Array{T, N} <: DenseArray{T, N}
+      ref::MemoryRef{T}
+      size::NTuple{N, Int64}
 ```
 
 This indicates that `Array` actually names a `UnionAll` type. There is one `UnionAll` type for
@@ -179,13 +181,13 @@ TypeName
     var: TypeVar
       name: Symbol T
       lb: Union{}
-      ub: Any
+      ub: abstract type Any
     body: UnionAll
       var: TypeVar
         name: Symbol N
         lb: Union{}
-        ub: Any
-      body: Array{T, N} <: DenseArray{T, N}
+        ub: abstract type Any
+      body: mutable struct Array{T, N} <: DenseArray{T, N}
   cache: SimpleVector
     ...
 
@@ -223,7 +225,7 @@ Ptr{Cvoid} @0x00007fcc7de64850
 The `wrapper` field of [`Array`](@ref) points to itself, but for `Array{TV,NV}` it points back
 to the original definition of the type.
 
-What about the other fields? `hash` assigns an integer to each type.  To examine the `cache`
+What about the other fields? `hash` assigns an integer to each type. To examine the `cache`
 field, it's helpful to pick a type that is less heavily used than Array. Let's first create our
 own type:
 
@@ -243,8 +245,8 @@ variables are not cached.
 
 ## Tuple types
 
-Tuple types constitute an interesting special case.  For dispatch to work on declarations like
-`x::Tuple`, the type has to be able to accommodate any tuple.  Let's check the parameters:
+Tuple types constitute an interesting special case. For dispatch to work on declarations like
+`x::Tuple`, the type has to be able to accommodate any tuple. Let's check the parameters:
 
 ```jldoctest
 julia> Tuple
@@ -489,7 +491,7 @@ julia> function mysubtype(a,b)
        end
 ```
 
-and then set a breakpoint in `jl_breakpoint`.  Once this breakpoint gets triggered, you can set
+and then set a breakpoint in `jl_breakpoint`. Once this breakpoint gets triggered, you can set
 breakpoints in other functions.
 
 As a warm-up, try the following:
@@ -519,10 +521,6 @@ than the other.)  Likewise, `Tuple{Int,Vararg{Int}}` is not a subtype of `Tuple{
 considered more specific. However, `morespecific` does get a bonus for length: in particular,
 `Tuple{Int,Int}` is more specific than `Tuple{Int,Vararg{Int}}`.
 
-If you're debugging how methods get sorted, it can be convenient to define the function:
-
-```julia
-type_morespecific(a, b) = ccall(:jl_type_morespecific, Cint, (Any,Any), a, b)
-```
-
-which allows you to test whether tuple type `a` is more specific than tuple type `b`.
+Additionally, if 2 methods are defined with identical signatures, per type-equal, then they
+will instead by compared by order of addition, such that the later method is more specific
+than the earlier one.
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index 7e62aeb176f3c..015c4a6d983ee 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -6,22 +6,22 @@ Julia.
 
 ## General considerations
 
-By default, Valgrind assumes that there is no self modifying code in the programs it runs.  This
+By default, Valgrind assumes that there is no self modifying code in the programs it runs. This
 assumption works fine in most instances but fails miserably for a just-in-time compiler like
-`julia`.  For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
+`julia`. For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
 code may crash or behave unexpectedly (often in subtle ways).
 
-In some cases, to better detect memory errors using Valgrind it can help to compile `julia` with
-memory pools disabled.  The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
-`MEMDEBUG2` disables memory pools in FemtoLisp.  To build `julia` with both flags, add the following
+In some cases, to better detect memory errors using Valgrind, it can help to compile `julia` with
+memory pools disabled. The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
+`MEMDEBUG2` disables memory pools in FemtoLisp. To build `julia` with both flags, add the following
 line to `Make.user`:
 
 ```make
 CFLAGS = -DMEMDEBUG -DMEMDEBUG2
 ```
 
-Another thing to note: if your program uses multiple workers processes, it is likely that you
-want all such worker processes to run under Valgrind, not just the parent process.  To do this,
+Another thing to note: if your program uses multiple worker processes, it is likely that you
+want all such worker processes to run under Valgrind, not just the parent process. To do this,
 pass `--trace-children=yes` to `valgrind`.
 
 Yet another thing to note: if using `valgrind` errors with `Unable to find compatible target in system image`,
@@ -29,9 +29,9 @@ try rebuilding the sysimage with target `generic` or julia with `JULIA_CPU_TARGE
 
 ## Suppressions
 
-Valgrind will typically display spurious warnings as it runs.  To reduce the number of such warnings,
+Valgrind will typically display spurious warnings as it runs. To reduce the number of such warnings,
 it helps to provide a [suppressions file](https://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
-to Valgrind.  A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
+to Valgrind. A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
 
 The suppressions file can be used from the `julia/` source directory as follows:
 
@@ -40,13 +40,13 @@ $ valgrind --smc-check=all-non-file --suppressions=contrib/valgrind-julia.supp .
 ```
 
 Any memory errors that are displayed should either be reported as bugs or contributed as additional
-suppressions.  Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
+suppressions. Note that some versions of Valgrind are [shipped with insufficient default suppressions](https://github.com/JuliaLang/julia/issues/8314#issuecomment-55766210),
 so that may be one thing to consider before submitting any bugs.
 
 ## Running the Julia test suite under Valgrind
 
 It is possible to run the entire Julia test suite under Valgrind, but it does take quite some
-time (typically several hours).  To do so, run the following command from the `julia/test/` directory:
+time (typically several hours). To do so, run the following command from the `julia/test/` directory:
 
 ```
 valgrind --smc-check=all-non-file --trace-children=yes --suppressions=$PWD/../contrib/valgrind-julia.supp ../julia runtests.jl all
@@ -57,7 +57,7 @@ to `valgrind` as well.
 
 ## Additional spurious warnings
 
-This section covers Valgrind warnings which cannot be added to the
+This section covers Valgrind warnings that cannot be added to the
 suppressions file yet are nonetheless safe to ignore.
 
 ### Unhandled rr system calls
@@ -65,7 +65,7 @@ suppressions file yet are nonetheless safe to ignore.
 Valgrind will emit a warning if it encounters any of the [system calls
 that are specific to
 rr](https://github.com/rr-debugger/rr/blob/master/src/preload/rrcalls.h),
-the [Record and Replay Framework](https://rr-project.org/).  In
+the [Record and Replay Framework](https://rr-project.org/). In
 particular, a warning about an unhandled `1008` syscall will be shown
 when julia tries to detect whether it is running under rr:
 
diff --git a/doc/src/index.md b/doc/src/index.md
index bb758d14b4cf2..8c88af424e8e3 100644
--- a/doc/src/index.md
+++ b/doc/src/index.md
@@ -34,7 +34,7 @@ Markdown.parse("""
 
 ## [Important Links](@id man-important-links)
 
-Below is a non-exhasutive list of links that will be useful as you learn and use the Julia programming language.
+Below is a non-exhaustive list of links that will be useful as you learn and use the Julia programming language.
 
 - [Julia Homepage](https://julialang.org)
 - [Download Julia](https://julialang.org/downloads/)
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 0b4532e1b423d..02d71fcd9939e 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -326,8 +326,8 @@ These syntaxes are shorthands for function calls that themselves are convenience
 | Syntax                 | Function         | Description                                                                                                |
 |:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- |
 |                        | [`cat`](@ref)    | concatenate input arrays along dimension(s) `k`                                                            |
-| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)`                                                                           |
-| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)`                                                                           |
+| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)`                                                                          |
+| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)`                                                                          |
 | `[A B; C D; ...]`      | [`hvcat`](@ref)  | simultaneous vertical and horizontal concatenation                                                         |
 | `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate |
 
@@ -355,7 +355,7 @@ julia> Int8[[1 2] [3 4]]
 Comprehensions provide a general and powerful way to construct arrays. Comprehension syntax is
 similar to set construction notation in mathematics:
 
-```
+```julia
 A = [ F(x, y, ...) for x=rx, y=ry, ... ]
 ```
 
@@ -366,11 +366,11 @@ The result is an N-d dense array with dimensions that are the concatenation of t
 of the variable ranges `rx`, `ry`, etc. and each `F(x,y,...)` evaluation returns a scalar.
 
 The following example computes a weighted average of the current element and its left and right
-neighbor along a 1-d grid. :
+neighbor along a 1-d grid:
 
 ```julia-repl
 julia> x = rand(8)
-8-element Array{Float64,1}:
+8-element Vector{Float64}:
  0.843025
  0.869052
  0.365105
@@ -381,7 +381,7 @@ julia> x = rand(8)
  0.809411
 
 julia> [ 0.25*x[i-1] + 0.5*x[i] + 0.25*x[i+1] for i=2:length(x)-1 ]
-6-element Array{Float64,1}:
+6-element Vector{Float64}:
  0.736559
  0.57468
  0.685417
@@ -398,7 +398,7 @@ the result in single precision by writing:
 Float32[ 0.25*x[i-1] + 0.5*x[i] + 0.25*x[i+1] for i=2:length(x)-1 ]
 ```
 
-## Generator Expressions
+## [Generator Expressions](@id man-generators)
 
 Comprehensions can also be written without the enclosing square brackets, producing an object
 known as a generator. This object can be iterated to produce values on demand, instead of allocating
@@ -430,7 +430,7 @@ julia> map(tuple, (1/(i+j) for i=1:2, j=1:2), [1 3; 2 4])
 
 Generators are implemented via inner functions. Just like
 inner functions used elsewhere in the language, variables from the enclosing scope can be
-"captured" in the inner function.  For example, `sum(p[i] - q[i] for i=1:n)`
+"captured" in the inner function. For example, `sum(p[i] - q[i] for i=1:n)`
 captures the three variables `p`, `q` and `n` from the enclosing scope.
 Captured variables can present performance challenges; see
 [performance tips](@ref man-performance-captured).
@@ -603,7 +603,7 @@ overwritten with the value of `X`, [`convert`](@ref)ing to the
 If any index `I_k` is itself an array, then the right hand side `X` must also be an
 array with the same shape as the result of indexing `A[I_1, I_2, ..., I_n]` or a vector with
 the same number of elements. The value in location `I_1[i_1], I_2[i_2], ..., I_n[i_n]` of
-`A` is overwritten with the value `X[I_1, I_2, ..., I_n]`, converting if necessary. The
+`A` is overwritten with the value `X[i_1, i_2, ..., i_n]`, converting if necessary. The
 element-wise assignment operator `.=` may be used to [broadcast](@ref Broadcasting) `X`
 across the selected locations:
 
@@ -714,7 +714,7 @@ julia> A[:, 3:3]
 ### Cartesian indices
 
 The special `CartesianIndex{N}` object represents a scalar index that behaves
-like an `N`-tuple of integers spanning multiple dimensions.  For example:
+like an `N`-tuple of integers spanning multiple dimensions. For example:
 
 ```jldoctest cartesianindex
 julia> A = reshape(1:32, 4, 4, 2);
@@ -793,38 +793,46 @@ Indexing by a boolean vector `B` is effectively the same as indexing by the
 vector of integers that is returned by [`findall(B)`](@ref). Similarly, indexing
 by a `N`-dimensional boolean array is effectively the same as indexing by the
 vector of `CartesianIndex{N}`s where its values are `true`. A logical index
-must be a vector of the same length as the dimension it indexes into, or it
-must be the only index provided and match the size and dimensionality of the
-array it indexes into. It is generally more efficient to use boolean arrays as
-indices directly instead of first calling [`findall`](@ref).
+must be an array of the same shape as the dimension(s) it indexes into, or it
+must be the only index provided and match the shape of the one-dimensional
+reshaped view of the array it indexes into. It is generally more efficient
+to use boolean arrays as indices directly instead of first calling [`findall`](@ref).
 
 ```jldoctest
-julia> x = reshape(1:16, 4, 4)
-4×4 reshape(::UnitRange{Int64}, 4, 4) with eltype Int64:
- 1  5   9  13
- 2  6  10  14
- 3  7  11  15
- 4  8  12  16
+julia> x = reshape(1:12, 2, 3, 2)
+2×3×2 reshape(::UnitRange{Int64}, 2, 3, 2) with eltype Int64:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
 
-julia> x[[false, true, true, false], :]
-2×4 Matrix{Int64}:
- 2  6  10  14
- 3  7  11  15
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+
+julia> x[:, [true false; false true; true false]]
+2×3 Matrix{Int64}:
+ 1  5   9
+ 2  6  10
 
 julia> mask = map(ispow2, x)
-4×4 Matrix{Bool}:
- 1  0  0  0
- 1  0  0  0
- 0  0  0  0
- 1  1  0  1
+2×3×2 Array{Bool, 3}:
+[:, :, 1] =
+ 1  0  0
+ 1  1  0
+
+[:, :, 2] =
+ 0  0  0
+ 1  0  0
 
 julia> x[mask]
-5-element Vector{Int64}:
-  1
-  2
-  4
-  8
- 16
+4-element Vector{Int64}:
+ 1
+ 2
+ 4
+ 8
+
+julia> x[vec(mask)] == x[mask] # we can also index with a single Boolean vector
+true
 ```
 
 ### Number of indices
@@ -881,7 +889,7 @@ in their implementations, other arrays — like [`Diagonal`](@ref) — need the
 full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
 introspect which is which).
 
-!!! warnings
+!!! warning
 
     When iterating over all the indices for an array, it is
     better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
@@ -935,13 +943,13 @@ element of `axes(A, d)` where `d` is that particular dimension number). This
 allows vectors to be indexed like one-column matrices, for example:
 
 ```jldoctest
-julia> A = [8,6,7]
+julia> A = [8, 6, 7]
 3-element Vector{Int64}:
  8
  6
  7
 
-julia> A[2,1]
+julia> A[2, 1]
 6
 ```
 
@@ -1006,7 +1014,7 @@ The following operators are supported for arrays:
 
 To enable convenient vectorization of mathematical and other operations,
 Julia [provides the dot syntax](@ref man-vectorized) `f.(args...)`, e.g. `sin.(x)`
-or `min.(x,y)`, for elementwise operations over arrays or mixtures of arrays and
+or `min.(x, y)`, for elementwise operations over arrays or mixtures of arrays and
 scalars (a [Broadcasting](@ref) operation); these have the additional advantage of
 "fusing" into a single loop when combined with other dot calls, e.g. `sin.(cos.(x))`.
 
@@ -1020,7 +1028,7 @@ operations like `<`, *only* the elementwise `.<` version is applicable to arrays
 
 Also notice the difference between `max.(a,b)`, which [`broadcast`](@ref)s [`max`](@ref)
 elementwise over `a` and `b`, and [`maximum(a)`](@ref), which finds the largest value within
-`a`. The same relationship holds for `min.(a,b)` and `minimum(a)`.
+`a`. The same relationship holds for `min.(a, b)` and `minimum(a)`.
 
 ## Broadcasting
 
@@ -1032,7 +1040,7 @@ be to replicate the vector to the size of the matrix:
 julia> a = rand(2, 1); A = rand(2, 3);
 
 julia> repeat(a, 1, 3) + A
-2×3 Array{Float64,2}:
+2×3 Matrix{Float64}:
  1.20813  1.82068  1.25387
  1.56851  1.86401  1.67846
 ```
@@ -1043,16 +1051,16 @@ without using extra memory, and applies the given function elementwise:
 
 ```julia-repl
 julia> broadcast(+, a, A)
-2×3 Array{Float64,2}:
+2×3 Matrix{Float64}:
  1.20813  1.82068  1.25387
  1.56851  1.86401  1.67846
 
 julia> b = rand(1,2)
-1×2 Array{Float64,2}:
+1×2 Matrix{Float64}:
  0.867535  0.00457906
 
 julia> broadcast(+, a, b)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  1.71056  0.847604
  1.73659  0.873631
 ```
@@ -1066,7 +1074,7 @@ is equivalent to `broadcast(f, args...)`, providing a convenient syntax to broad
 [automatically fuse](@ref man-dot-operators) into a single `broadcast` call.
 
 Additionally, [`broadcast`](@ref) is not limited to arrays (see the function documentation);
-it also handles scalars, tuples and other collections.  By default, only some argument types are
+it also handles scalars, tuples and other collections. By default, only some argument types are
 considered scalars, including (but not limited to) `Number`s, `String`s, `Symbol`s, `Type`s, `Function`s
 and some common singletons like `missing` and `nothing`. All other arguments are
 iterated over or indexed into elementwise.
@@ -1111,10 +1119,10 @@ generally work correctly as a fallback for any specific array implementation.
 The `AbstractArray` type includes anything vaguely array-like, and implementations of it might
 be quite different from conventional arrays. For example, elements might be computed on request
 rather than stored. However, any concrete `AbstractArray{T,N}` type should generally implement
-at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A,i)`](@ref) and [`getindex(A,i1,...,iN)`](@ref getindex);
-mutable arrays should also implement [`setindex!`](@ref). It is recommended that these operations
-have nearly constant time complexity, as otherwise some array
-functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A,T=eltype(A),dims=size(A))`](@ref)
+at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A, i)`](@ref) and
+[`getindex(A, i1, ..., iN)`](@ref getindex); mutable arrays should also implement [`setindex!`](@ref).
+It is recommended that these operations have nearly constant time complexity, as otherwise some array
+functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A, T=eltype(A), dims=size(A))`](@ref)
 method, which is used to allocate a similar array for [`copy`](@ref) and other out-of-place
 operations. No matter how an `AbstractArray{T,N}` is represented internally, `T` is the type of
 object returned by *integer* indexing (`A[1, ..., 1]`, when `A` is not empty) and `N` should be
@@ -1136,7 +1144,7 @@ is created with the [`view`](@ref) function, which is called the same way as
 of [`view`](@ref) looks the same as the result of [`getindex`](@ref), except the
 data is left in place. [`view`](@ref) stores the input index vectors in a
 `SubArray` object, which can later be used to index the original array
-indirectly.  By putting the [`@views`](@ref) macro in front of an expression or
+indirectly. By putting the [`@views`](@ref) macro in front of an expression or
 block of code, any `array[...]` slice in that expression will be converted to
 create a `SubArray` view instead.
 
@@ -1161,7 +1169,7 @@ julia> stride(A, 1)
 
 The stride of the second dimension is the spacing between elements in the same row, skipping
 as many elements as there are in a single column (`5`). Similarly, jumping between the two
-"pages" (in the third dimension) requires skipping `5*7 == 35` elements.  The [`strides`](@ref)
+"pages" (in the third dimension) requires skipping `5*7 == 35` elements. The [`strides`](@ref)
 of this array is the tuple of these three numbers together:
 
 ```julia-repl
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 5b43ba971ee1c..d1d095c48b2ff 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -64,8 +64,8 @@ the next input prompt appears. That is because the REPL is waiting for `t`
 to finish before proceeding.
 
 It is common to want to create a task and schedule it right away, so the
-macro [`@async`](@ref) is provided for that purpose --- `@async x` is
-equivalent to `schedule(@task x)`.
+macro [`Threads.@spawn`](@ref) is provided for that purpose --- `Threads.@spawn x` is
+equivalent to `task = @task x; task.sticky = false; schedule(task)`.
 
 ## Communicating with Channels
 
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        errormonitor(@async foo())
+        errormonitor(Threads.@spawn foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -194,10 +194,11 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     to the maximum number of elements that can be held in the channel at any time. For example, `Channel(32)`
     creates a channel that can hold a maximum of 32 objects of any type. A `Channel{MyType}(64)` can
     hold up to 64 objects of `MyType` at any time.
-  * If a [`Channel`](@ref) is empty, readers (on a [`take!`](@ref) call) will block until data is available.
-  * If a [`Channel`](@ref) is full, writers (on a [`put!`](@ref) call) will block until space becomes available.
+  * If a [`Channel`](@ref) is empty, readers (on a [`take!`](@ref) call) will block until data is available (see [`isempty`](@ref)).
+  * If a [`Channel`](@ref) is full, writers (on a [`put!`](@ref) call) will block until space becomes available (see [`isfull`](@ref)).
   * [`isready`](@ref) tests for the presence of any object in the channel, while [`wait`](@ref)
     waits for an object to become available.
+  * Note that if another task is currently waiting to `put!` an object into a channel, a channel can have more items available than its capacity.
   * A [`Channel`](@ref) is in an open state initially. This means that it can be read from and written to
     freely via [`take!`](@ref) and [`put!`](@ref) calls. [`close`](@ref) closes a [`Channel`](@ref).
     On a closed [`Channel`](@ref), [`put!`](@ref) will fail. For example:
@@ -263,10 +264,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           errormonitor(@async do_work())
+           errormonitor(Threads.@spawn do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 7b889589c592d..d198c796a2e0b 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -27,9 +27,9 @@ commonly passed in registers when using C or Julia calling conventions.
 The syntax for [`@ccall`](@ref) to generate a call to the library function is:
 
 ```julia
-  @ccall library.function_name(argvalue1::argtype1, ...)::returntype
-  @ccall function_name(argvalue1::argtype1, ...)::returntype
-  @ccall $function_pointer(argvalue1::argtype1, ...)::returntype
+@ccall library.function_name(argvalue1::argtype1, ...)::returntype
+@ccall function_name(argvalue1::argtype1, ...)::returntype
+@ccall $function_pointer(argvalue1::argtype1, ...)::returntype
 ```
 
 where `library` is a string constant or literal (but see [Non-constant Function
@@ -253,10 +253,14 @@ to the specified type. For example, the following call:
 will behave as if it were written like this:
 
 ```julia
-@ccall "libfoo".foo(
-    Base.unsafe_convert(Int32, Base.cconvert(Int32, x))::Int32,
-    Base.unsafe_convert(Float64, Base.cconvert(Float64, y))::Float64
+c_x = Base.cconvert(Int32, x)
+c_y = Base.cconvert(Float64, y)
+GC.@preserve c_x c_y begin
+    @ccall "libfoo".foo(
+        Base.unsafe_convert(Int32, c_x)::Int32,
+        Base.unsafe_convert(Float64, c_y)::Float64
     )::Cvoid
+end
 ```
 
 [`Base.cconvert`](@ref) normally just calls [`convert`](@ref), but can be defined to return an
@@ -272,17 +276,17 @@ it to be freed prematurely.
 
 First, let's review some relevant Julia type terminology:
 
-| Syntax / Keyword              | Example                                     | Description                                                                                                                                                                                                                                                                    |
-|:----------------------------- |:------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `mutable struct`              | `BitSet`                                    | "Leaf Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a leaf type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed.              |
-| `abstract type`               | `Any`, `AbstractArray{T, N}`, `Complex{T}`  | "Super Type" :: A super-type (not a leaf-type) that cannot be instantiated, but can be used to describe a group of types.                                                                                                                                                      |
-| `T{A}`                        | `Vector{Int}`                               | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization).                                                                                                                                                                          |
-|                               |                                             | "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                                                                                                                                  |
-| `primitive type`              | `Int`, `Float64`                            | "Primitive Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                           |
-| `struct`                      | `Pair{Int, Int}`                            | "Struct" :: A type with all fields defined to be constant. It is defined by-value, and may be stored with a type-tag.                                                                                                                                                       |
-|                               | `ComplexF64` (`isbits`)                     | "Is-Bits"   :: A `primitive type`, or a `struct` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag.                                                                                                                       |
-| `struct ...; end`             | `nothing`                                   | "Singleton" :: a Leaf Type or Struct with no fields.                                                                                                                                                                                                                        |
-| `(...)` or `tuple(...)`       | `(1, 2, 3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous struct type, or a constant array. Represented as either an array or a struct.                                                                                                                                |
+| Syntax / Keyword              | Example                                     | Description                                                                                                                                                                                                                                                                                                       |
+|:----------------------------- |:------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `mutable struct`              | `BitSet`                                    | "Concrete Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a concrete type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed. Also see [`isconcretetype`](@ref).      |
+| `abstract type`               | `Any`, `AbstractArray{T, N}`, `Complex{T}`  | "Super Type" :: A super-type (not a concrete type) that cannot be instantiated, but can be used to describe a group of types. Also see [`isabstracttype`](@ref).                                                                                                                                                  |
+| `T{A}`                        | `Vector{Int}`                               | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization).                                                                                                                                                                                                             |
+|                               |                                             | "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                                                                                                                                                                     |
+| `primitive type`              | `Int`, `Float64`                            | "Primitive Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                                                         |
+| `struct`                      | `Pair{Int, Int}`                            | "Struct" :: A type with all fields defined to be constant. It is defined by-value, and may be stored with a type-tag.                                                                                                                                                                                             |
+|                               | `ComplexF64` (`isbits`)                     | "Is-Bits"   :: A `primitive type`, or a `struct` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag.                                                                                                                                                        |
+| `struct ...; end`             | `nothing`                                   | "Singleton" :: a concrete Type or Struct with no fields.                                                                                                                                                                                                                                                          |
+| `(...)` or `tuple(...)`       | `(1, 2, 3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous struct type, or a constant array. Represented as either an array or a struct.                                                                                                                                                                      |
 
 ### [Bits Types](@id man-bits-types)
 
@@ -622,7 +626,7 @@ For translating a C argument list to Julia:
       * argument value will be copied (passed by value)
   * `struct T` (including typedef to a struct)
 
-      * `T`, where `T` is a Julia leaf type
+      * `T`, where `T` is a concrete Julia type
       * argument value will be copied (passed by value)
   * `void*`
 
@@ -675,7 +679,7 @@ For translating a C return type to Julia:
       * argument value will be copied (returned by-value)
   * `struct T` (including typedef to a struct)
 
-      * `T`, where `T` is a Julia Leaf Type
+      * `T`, where `T` is a concrete Julia Type
       * argument value will be copied (returned by-value)
   * `void*`
 
@@ -821,7 +825,7 @@ Instead define a [`Base.cconvert`](@ref) method and pass the variables directly
 automatically arranges that all of its arguments will be preserved from garbage collection until
 the call returns. If a C API will store a reference to memory allocated by Julia, after the `@ccall`
 returns, you must ensure that the object remains visible to the garbage collector. The suggested
-way to do this is to make a global variable of type `Array{Ref,1}` to hold these values until
+way to do this is to make a global variable of type `Vector{Ref}` to hold these values until
 the C library notifies you that it is finished with them.
 
 Whenever you have created a pointer to Julia data, you must ensure the original data exists until
@@ -992,10 +996,10 @@ A table of translations between the macro and function interfaces is given below
 |------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
 | `@ccall clock()::Int32`                                                      | `ccall(:clock, Int32, ())`                                                  |
 | `@ccall f(a::Cint)::Cint`                                                    | `ccall(:a, Cint, (Cint,), a)`                                               |
-| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), (a, b))`                      |
+| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), a, b)`                        |
 | `@ccall $fptr.f()::Cvoid`                                                    | `ccall(fptr, f, Cvoid, ())`                                                 |
 | `@ccall printf("%s = %d\n"::Cstring ; "foo"::Cstring, foo::Cint)::Cint`      | `<unavailable>`                                                             |
-| `@ccall printf("%s = %d\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
+| `@ccall printf("%s = %s\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
 | `<unavailable>`                                                              | `ccall(:gethostname, stdcall, Int32, (Ptr{UInt8}, UInt32), hn, length(hn))` |
 
 ## [Calling Convention](@id calling-convention)
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index 743ee83c333a4..5c8315693c71e 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -14,7 +14,7 @@ Code inclusion is quite straightforward and simple: it evaluates the given sourc
 
 A *package* is a source tree with a standard layout providing functionality that can be reused by other Julia projects. A package is loaded by `import X` or  `using X` statements. These statements also make the module named `X`—which results from loading the package code—available within the module where the import statement occurs. The meaning of `X` in `import X` is context-dependent: which `X` package is loaded depends on what code the statement occurs in. Thus, handling of `import X` happens in two stages: first, it determines **what** package is defined to be `X` in this context; second, it determines **where** that particular `X` package is found.
 
-These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`), or folders of source files.
+These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`, or the same names suffixed by `-v{major}.{minor}.toml` for specific versions), or folders of source files.
 
 
 ## Federation of packages
@@ -63,7 +63,7 @@ Each kind of environment defines these three maps differently, as detailed in th
 
 ### Project environments
 
-A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred.
+A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred. However, from Julia v1.11 onwards, `(Julia)Manifest-v{major}.{minor}.toml` is recognized as a format to make a given julia version use a specific manifest file i.e. in the same folder, a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by any other julia version.
 
 The roots, graph and paths maps of a project environment are defined as follows:
 
@@ -160,11 +160,12 @@ What happens if `import Zebra` is evaluated in the main `App` code base? Since `
 **The paths map** of a project environment is extracted from the manifest file. The path of a package `uuid` named `X` is determined by these rules (in order):
 
 1. If the project file in the directory matches `uuid` and name `X`, then either:
-   - It has a toplevel `path` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
-   - Otherwise, `uuid` is mapped to  `src/X.jl` relative to the directory containing the project file.
-2. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
-   - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
-   - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   - It has a toplevel `entryfile` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
+   - Otherwise, `uuid` is mapped to `src/X.jl` relative to the directory containing the project file.
+2. 1. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
+      - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
+      - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   2. If this is a directory then `uuid` is mapped to `src/X.jl` unless the matching manifest stanza has an `entryfile` entry in which case this is used. In both cases, these are relative to the directory in 2.1.
 
 If any of these result in success, the path to the source code entry point will be either that result, the relative path from that result plus `src/X.jl`; otherwise, there is no path mapping for `uuid`. When loading `X`, if no source code path is found, the lookup will fail, and the user may be prompted to install the appropriate package version or to take other corrective action (e.g. declaring `X` as a dependency).
 
@@ -208,7 +209,6 @@ This example map includes three different kinds of package locations (the first
 2. The public `Priv` and `Zebra` packages are in the system depot, where packages installed and managed by the system administrator live. These are available to all users on the system.
 3. The `Pub` package is in the user depot, where packages installed by the user live. These are only available to the user who installed them.
 
-
 ### Package directories
 
 Package directories provide a simpler kind of environment without the ability to handle name collisions. In a package directory, the set of top-level packages is the set of subdirectories that "look like" packages. A package `X` exists in a package directory if the directory contains one of the following "entry point" files:
@@ -351,7 +351,7 @@ Since the primary environment is typically the environment of a project you're w
 
 ### [Package Extensions](@id man-extensions)
 
-A package "extension" is a module that is automatically loaded when a specified set of other packages (its "extension dependencies") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The extension dependencies of an extension are a subset of those packages listed under the `[weakdeps]` section of the project file. Those packages can have compat entries like other packages.
+A package "extension" is a module that is automatically loaded when a specified set of other packages (its "triggers") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The triggers of an extension are a subset of those packages listed under the `[weakdeps]` (and possibly, but uncommonly the `[deps]`) section of the project file. Those packages can have compat entries like other packages.
 
 ```toml
 name = "MyPackage"
@@ -371,8 +371,8 @@ FooExt = "ExtDep"
 ```
 
 The keys under `extensions` are the names of the extensions.
-They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded.
-If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity.
+They are loaded when all the packages on the right hand side (the triggers) of that extension are loaded.
+If an extension only has one trigger the list of triggers can be written as just a string for brevity.
 The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for
 extension `FooExt`.
 The content of an extension is often structured as:
@@ -380,10 +380,10 @@ The content of an extension is often structured as:
 ```
 module FooExt
 
-# Load main package and extension dependencies
+# Load main package and triggers
 using MyPackage, ExtDep
 
-# Extend functionality in main package with types from the extension dependencies
+# Extend functionality in main package with types from the triggers
 MyPackage.func(x::ExtDep.SomeStruct) = ...
 
 end
@@ -391,9 +391,31 @@ end
 
 When a package with extensions is added to an environment, the `weakdeps` and `extensions` sections
 are stored in the manifest file in the section for that package. The dependency lookup rules for
-a package are the same as for its "parent" except that the listed extension dependencies are also considered as
+a package are the same as for its "parent" except that the listed triggers are also considered as
 dependencies.
 
+### [Workspaces](@id workspaces)
+
+A project file can define a workspace by giving a set of projects that is part of that workspace:
+
+```toml
+[workspace]
+projects = ["test", "benchmarks", "docs", "SomePackage"]
+```
+
+Each subfolder contains its own `Project.toml` file, which may include additional dependencies and compatibility constraints. In such cases, the package manager gathers all dependency information from all the projects in the workspace generating a single manifest file that combines the versions of all dependencies.
+
+Furthermore, workspaces can be "nested", meaning a project defining a workspace can also be part of another workspace. In this scenario, a single manifest file is still utilized, stored alongside the "root project" (the project that doesn't have another workspace including it). An example file structure could look like this:
+
+```
+Project.toml # projects = ["MyPackage"]
+Manifest.toml
+MyPackage/
+    Project.toml # projects = ["test"]
+    test/
+        Project.toml
+```
+
 ### [Package/Environment Preferences](@id preferences)
 
 Preferences are dictionaries of metadata that influence package behavior within an environment.
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index e1651c61a3ec3..14dd60d89b384 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -39,6 +39,77 @@ $ julia --color=yes -O -- script.jl arg1 arg2..
 
 See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
 
+## The `Main.main` entry point
+
+As of Julia, 1.11, `Base` exports the macro `@main`. This macro expands to the symbol `main`,
+but at the conclusion of executing a script or expression, `julia` will attempt to execute
+`Main.main(Base.ARGS)` if such a function `Main.main` has been defined and this behavior was opted into
+by using the `@main` macro.
+
+This feature is intended to aid in the unification
+of compiled and interactive workflows. In compiled workflows, loading the code that defines the `main`
+function may be spatially and temporally separated from the invocation. However, for interactive workflows,
+the behavior is equivalent to explicitly calling `exit(main(ARGS))` at the end of the evaluated script or
+expression.
+
+!!! compat "Julia 1.11"
+    The special entry point `Main.main` was added in Julia 1.11. For compatibility with prior julia versions,
+    add an explicit `@isdefined(var"@main") ? (@main) : exit(main(ARGS))` at the end of your scripts.
+
+To see this feature in action, consider the following definition, which will execute the print function despite there being no explicit call to `main`:
+
+```
+$ julia -e '(@main)(args) = println("Hello World!")'
+Hello World!
+$
+```
+
+Only the `main` binding in the `Main` module has this behavior and only if
+the macro `@main` was used within the defining module.
+
+For example, using `hello` instead of `main` will not result in the `hello` function executing:
+
+```
+$ julia -e 'hello(args) = println("Hello World!")'
+$
+```
+
+and neither will a plain definition of `main`:
+```
+$ julia -e 'main(args) = println("Hello World!")'
+$
+```
+
+However, the opt-in need not occur at definition time:
+```
+$ julia -e 'main(args) = println("Hello World!"); @main'
+Hello World!
+$
+```
+
+The `main` binding may be imported from a package. A *hello world* package defined as
+
+```
+module Hello
+
+export main
+(@main)(args) = println("Hello from the package!")
+
+end
+```
+
+may be used as:
+
+```
+$ julia -e 'using Hello'
+Hello from the package!
+$ julia -e 'import Hello' # N.B.: Execution depends on the binding not whether the package is loaded
+$
+```
+
+However, note that the current best practice recommendation is to not mix application and reusable library
+code in the same package. Helper applications may be distributed as separate packages or as scripts with
+separate `main` entry points in a package's `bin` folder.
 
 ## Parallel mode
 
@@ -93,44 +164,48 @@ The following is a complete list of command-line switches available when launchi
 |Switch                                 |Description|
 |:---                                   |:---|
 |`-v`, `--version`                      |Display version information|
-|`-h`, `--help`                         |Print command-line options (this message).|
-|`--help-hidden`                        |Uncommon options not shown by `-h`|
-|`--project[={<dir>\|@.}]`              |Set `<dir>` as the home project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
+|`-h`, `--help`                         |Print command-line options (this message)|
+|`--help-hidden`                        |Print uncommon options not shown by `-h`|
+|`--project[={<dir>\|@temp\|@.}]`       |Set `<dir>` as the active project/environment. Or, create a temporary environment with `@temp`. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
 |`-J`, `--sysimage <file>`              |Start up with the given system image file|
 |`-H`, `--home <dir>`                   |Set location of `julia` executable|
-|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH` environment variable is unset, load `~/.julia/config/startup.jl`|
+|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is unset, load `~/.julia/config/startup.jl`|
 |`--handle-signals={yes*\|no}`          |Enable or disable Julia's default signal handlers|
 |`--sysimage-native-code={yes*\|no}`    |Use native code from system image if available|
-|`--compiled-modules={yes*\|no}`        |Enable or disable incremental precompilation of modules|
-|`--pkgimages={yes*\|no}`               |Enable or disable usage of native code caching in the form of pkgimages|
+|`--compiled-modules={yes*\|no\|existing\|strict}` |Enable or disable incremental precompilation of modules. The `existing` option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files. The `strict` option is similar, but will error if no precompile file is found. |
+|`--pkgimages={yes*\|no\|existing}`     |Enable or disable usage of native code caching in the form of pkgimages. The `existing` option allows use of existing pkgimages but disallows creation of new ones|
 |`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
 |`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
+|`-m`, `--module <Package> [args]`      |Run entry point of `Package` (`@main` function) with `args'|
 |`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto}`            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
-| `--gcthreads {N}`                     |Enable N GC threads; If unspecified is set to half of the compute worker threads.|
+|`-t`, `--threads {auto\|N[,auto\|M]}`  |Enable N[+M] threads; N threads are assigned to the `default` threadpool, and if M is specified, M threads are assigned to the `interactive` threadpool; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future. Currently sets N to the number of CPUs assigned to this Julia process based on the OS-specific affinity assignment interface if supported (Linux and Windows) or to the number of CPU threads if not supported (MacOS) or if process affinity is not configured, and sets M to 1.|
+| `--gcthreads=N[,M]`                   |Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC. N is set to the number of compute threads and M is set to 0 if unspecified.|
 |`-p`, `--procs {N\|auto}`              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
 |`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
-|`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
+|`-i`, `--interactive`                  |Interactive mode; REPL runs and `isinteractive()` is true|
 |`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
-|`--banner={yes\|no\|auto*}`            |Enable or disable startup banner|
+|`--banner={yes\|no\|short\|auto*}`     |Enable or disable startup banner|
 |`--color={yes\|no\|auto*}`             |Enable or disable color text|
 |`--history-file={yes*\|no}`            |Load or save history|
 |`--depwarn={yes\|no*\|error}`          |Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)|
 |`--warn-overwrite={yes\|no*}`          |Enable or disable method overwrite warnings|
 |`--warn-scope={yes*\|no}`              |Enable or disable warning for ambiguous top-level scope|
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
-|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
-|`--min-optlevel={0*,1,2,3}`            |Set the lower bound on per-module optimization|
-|`-g`, `--debug-info={0,1*,2}`          |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
+|`-O`, `--optimize={0\|1\|2*\|3}`       |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
+|`--min-optlevel={0*\|1\|2\|3}`         |Set the lower bound on per-module optimization|
+|`-g`, `--debug-info={0\|1*\|2}`        |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
 |`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
 |`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations ($)|
-|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
+|`--math-mode={ieee\|user*}`            |Always follow `ieee` floating point semantics or respect `@fastmath` declarations|
+|`--polly={yes*\|no}`                   |Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)|
 |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
 |`--code-coverage=@<path>`              |Count executions but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
 |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
 |`--track-allocation=@<path>`           |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
+|`--task-metrics={yes\|no*}`             |Enable the collection of per-task metrics|
 |`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
+|`--heap-size-hint=<size>`              |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.|
 |`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
 |`--output-o <name>`                    |Generate an object file (including system image data)|
 |`--output-ji <name>`                   |Generate a system image data file (.ji)|
@@ -140,9 +215,16 @@ The following is a complete list of command-line switches available when launchi
 |`--output-bc <name>`                   |Generate LLVM bitcode (.bc)|
 |`--output-asm <name>`                  |Generate an assembly file (.s)|
 |`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
-|`--trace-compile={stderr,name}`        |Print precompile statements for methods compiled during execution or save to a path|
+|`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
+|`--trace-compile-timing`               |If `--trace-compile` is enabled show how long each took to compile in ms|
+|`--trace-dispatch={stderr\|name}`      |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
 |`--image-codegen`                      |Force generate code in imaging mode|
+|`--permalloc-pkgimg={yes\|no*}`        |Copy the data section of package images into memory|
+|`--trim={no*\|safe\|unsafe\|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
+
+Options that have the form `--option={...}` can be specified either as `--option=value` or as `--option value`. For example, `julia --banner=no` is equivalent to `julia --banner no`. This is especially relevant for options that take a filename for output, because forgetting to specifying the argument for (say) `--trace-compile` will cause the option following it to be interpreted as the filename, possibly unintentionally overwriting it.
 
+Note that options of the form `--option[=...]` can **not** be specified as `--option value`, but only as `--option=value` (or simply `--option`, when no argument is provided).
 
 !!! compat "Julia 1.1"
     In Julia 1.0, the default `--project=@.` option did not search up from the root
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index 9cab2ed1e4f24..d1d6ffeca245f 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -254,13 +254,30 @@ julia> float(3//4)
 ```
 
 Conversion from rational to floating-point respects the following identity for any integral values
-of `a` and `b`, with the exception of the two cases `b == 0` and `a == 0 && b < 0`:
+of `a` and `b`, except when `a==0 && b <= 0`:
 
 ```jldoctest
 julia> a = 1; b = 2;
 
 julia> isequal(float(a//b), a/b)
 true
+
+julia> a, b = 0, 0
+(0, 0)
+
+julia> float(a//b)
+ERROR: ArgumentError: invalid rational: zero(Int64)//zero(Int64)
+Stacktrace:
+[...]
+
+julia> a/b
+NaN
+
+julia> a, b = 0, -1
+(0, -1)
+
+julia> float(a//b), a/b
+(0.0, -0.0)
 ```
 
 Constructing infinite rational values is acceptable:
diff --git a/doc/src/manual/constructors.md b/doc/src/manual/constructors.md
index 6ec206dade335..9f9afca3e076c 100644
--- a/doc/src/manual/constructors.md
+++ b/doc/src/manual/constructors.md
@@ -293,6 +293,8 @@ Point{Float64}(1.0, 2.5)
 
 julia> Point(1,2.5) ## implicit T ##
 ERROR: MethodError: no method matching Point(::Int64, ::Float64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
+
 Closest candidates are:
   Point(::T, ::T) where T<:Real at none:2
 
@@ -372,10 +374,13 @@ However, other similar calls still don't work:
 ```jldoctest parametric2
 julia> Point(1.5,2)
 ERROR: MethodError: no method matching Point(::Float64, ::Int64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   Point(::T, !Matched::T) where T<:Real
    @ Main none:1
+  Point(!Matched::Int64, !Matched::Float64)
+   @ Main none:1
 
 Stacktrace:
 [...]
@@ -491,6 +496,7 @@ operator, which provides a syntax for writing rationals (e.g. `1 ⊘ 2`). Julia'
 type uses the [`//`](@ref) operator for this purpose. Before these definitions, `⊘`
 is a completely undefined operator with only syntax and no meaning. Afterwards, it behaves just
 as described in [Rational Numbers](@ref) -- its entire behavior is defined in these few lines.
+Note that the infix use of `⊘` works because Julia has a set of symbols that are recognized to be infix operators.
 The first and most basic definition just makes `a ⊘ b` construct a `OurRational` by applying the
 `OurRational` constructor to `a` and `b` when they are integers. When one of the operands of `⊘`
 is already a rational number, we construct a new rational for the resulting ratio slightly differently;
@@ -555,6 +561,7 @@ julia> struct SummedArray{T<:Number,S<:Number}
 
 julia> SummedArray(Int32[1; 2; 3], Int32(6))
 ERROR: MethodError: no method matching SummedArray(::Vector{Int32}, ::Int32)
+The type `SummedArray` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   SummedArray(::Vector{T}) where T
@@ -568,3 +575,53 @@ This constructor will be invoked by the syntax `SummedArray(a)`. The syntax `new
 specifying parameters for the type to be constructed, i.e. this call will return a `SummedArray{T,S}`.
 `new{T,S}` can be used in any constructor definition, but for convenience the parameters
 to `new{}` are automatically derived from the type being constructed when possible.
+
+## Constructors are just callable objects
+
+An object of any type may be [made callable](@ref "Function-like objects") by defining a
+method. This includes types, i.e., objects of type [`Type`](@ref); and constructors may,
+in fact, be viewed as just callable type objects. For example, there are many methods
+defined on `Bool` and various supertypes of it:
+
+```julia-repl
+julia> methods(Bool)
+# 10 methods for type constructor:
+  [1] Bool(x::BigFloat)
+     @ Base.MPFR mpfr.jl:393
+  [2] Bool(x::Float16)
+     @ Base float.jl:338
+  [3] Bool(x::Rational)
+     @ Base rational.jl:138
+  [4] Bool(x::Real)
+     @ Base float.jl:233
+  [5] (dt::Type{<:Integer})(ip::Sockets.IPAddr)
+     @ Sockets ~/tmp/jl/jl/julia-nightly-assert/share/julia/stdlib/v1.11/Sockets/src/IPAddr.jl:11
+  [6] (::Type{T})(x::Enum{T2}) where {T<:Integer, T2<:Integer}
+     @ Base.Enums Enums.jl:19
+  [7] (::Type{T})(z::Complex) where T<:Real
+     @ Base complex.jl:44
+  [8] (::Type{T})(x::Base.TwicePrecision) where T<:Number
+     @ Base twiceprecision.jl:265
+  [9] (::Type{T})(x::T) where T<:Number
+     @ boot.jl:894
+ [10] (::Type{T})(x::AbstractChar) where T<:Union{AbstractChar, Number}
+     @ char.jl:50
+```
+
+The usual constructor syntax is exactly equivalent to the function-like object
+syntax, so trying to define a method with each syntax will cause the first method
+to be overwritten by the next one:
+
+```jldoctest
+julia> struct S
+           f::Int
+       end
+
+julia> S() = S(7)
+S
+
+julia> (::Type{S})() = S(8)  # overwrites the previous constructor method
+
+julia> S()
+S(8)
+```
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 5d12530892b1e..ed6f26725f87c 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -139,7 +139,7 @@ julia> test(1,2)
 x is less than y.
 
 julia> test(2,1)
-ERROR: UndefVarError: `relation` not defined
+ERROR: UndefVarError: `relation` not defined in local scope
 Stacktrace:
  [1] test(::Int64, ::Int64) at ./none:7
 ```
@@ -248,7 +248,7 @@ no
 ## Short-Circuit Evaluation
 
 The `&&` and `||` operators in Julia correspond to logical “and” and “or” operations, respectively,
-and are typically used for this purpose.  However, they have an additional property of *short-circuit*
+and are typically used for this purpose. However, they have an additional property of *short-circuit*
 evaluation: they don't necessarily evaluate their second argument, as explained below.  (There
 are also bitwise `&` and `|` operators that can be used as logical “and” and “or” *without*
 short-circuit behavior, but beware that `&` and `|` have higher precedence than `&&` and `||` for evaluation order.)
@@ -397,7 +397,7 @@ julia> while i <= 3
 3
 ```
 
-The `while` loop evaluates the condition expression (`i <= 5` in this case), and as long it remains
+The `while` loop evaluates the condition expression (`i <= 3` in this case), and as long it remains
 `true`, keeps also evaluating the body of the `while` loop. If the condition expression is `false`
 when the `while` loop is first reached, the body is never evaluated.
 
@@ -414,8 +414,33 @@ julia> for i = 1:3
 3
 ```
 
-Here the `1:3` is a range object, representing the sequence of numbers 1, 2, 3. The `for`
-loop iterates through these values, assigning each one in turn to the variable `i`. One rather
+Here the `1:3` is a [`range`](@ref) object, representing the sequence of numbers 1, 2, 3. The `for`
+loop iterates through these values, assigning each one in turn to the variable `i`.
+In general, the `for` construct can loop over any "iterable" object (or "container"), from a  range like `1:3` or `1:3:13` (a [`StepRange`](@ref) indicating every 3rd integer 1, 4, 7, …, 13) to more generic containers like arrays, including [iterators defined by user code](@ref man-interface-iteration)
+or external packages. For containers other than ranges, the alternative
+(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
+the code read more clearly:
+
+```jldoctest
+julia> for i in [1,4,0]
+           println(i)
+       end
+1
+4
+0
+
+julia> for s ∈ ["foo","bar","baz"]
+           println(s)
+       end
+foo
+bar
+baz
+```
+
+Various types of iterable containers will be introduced and discussed in later sections of the
+manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
+
+One rather
 important distinction between the previous `while` loop form and the `for` loop form is the scope
 during which the variable is visible. A `for` loop always introduces a new iteration variable in
 its body, regardless of whether a variable of the same name exists in the enclosing scope.
@@ -433,7 +458,7 @@ julia> for j = 1:3
 3
 
 julia> j
-ERROR: UndefVarError: `j` not defined
+ERROR: UndefVarError: `j` not defined in `Main`
 ```
 
 ```jldoctest
@@ -455,29 +480,6 @@ Use `for outer` to modify the latter behavior and reuse an existing local variab
 See [Scope of Variables](@ref scope-of-variables) for a detailed explanation of variable scope, [`outer`](@ref), and how it works in
 Julia.
 
-In general, the `for` loop construct can iterate over any container. In these cases, the alternative
-(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
-the code read more clearly:
-
-```jldoctest
-julia> for i in [1,4,0]
-           println(i)
-       end
-1
-4
-0
-
-julia> for s ∈ ["foo","bar","baz"]
-           println(s)
-       end
-foo
-bar
-baz
-```
-
-Various types of iterable containers will be introduced and discussed in later sections of the
-manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
-
 It is sometimes convenient to terminate the repetition of a `while` before the test condition
 is falsified or stop iterating in a `for` loop before the end of the iterable object is reached.
 This can be accomplished with the `break` keyword:
@@ -599,6 +601,7 @@ below all interrupt the normal flow of control.
 | [`DomainError`](@ref)         |
 | [`EOFError`](@ref)            |
 | [`ErrorException`](@ref)      |
+| [`FieldError`](@ref)          |
 | [`InexactError`](@ref)        |
 | [`InitError`](@ref)           |
 | [`InterruptException`](@ref)  |
@@ -637,11 +640,11 @@ julia> struct MyCustomException <: Exception end
 ### The [`throw`](@ref) function
 
 Exceptions can be created explicitly with [`throw`](@ref). For example, a function defined only
-for nonnegative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
+for non-negative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
 is negative:
 
 ```jldoctest; filter = r"Stacktrace:(\n \[[0-9]+\].*)*"
-julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be nonnegative"))
+julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be non-negative"))
 f (generic function with 1 method)
 
 julia> f(1)
@@ -649,7 +652,7 @@ julia> f(1)
 
 julia> f(-1)
 ERROR: DomainError with -1:
-argument must be nonnegative
+argument must be non-negative
 Stacktrace:
  [1] f(::Int64) at ./none:1
 ```
@@ -860,7 +863,8 @@ end
            else
                foo
            end
-    ERROR: UndefVarError: `foo` not defined
+    ERROR: UndefVarError: `foo` not defined in `Main`
+    Suggestion: check for spelling errors or missing imports.
     ```
     Use the [`local` keyword](@ref local-scope) outside the `try` block to make the variable
     accessible from anywhere within the outer scope.
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index f0c156f21ea62..9f785a560bfcc 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -165,6 +165,7 @@ constructor.
 Such a definition might look like this:
 
 ```julia
+import Base: convert
 convert(::Type{MyType}, x) = MyType(x)
 ```
 
@@ -195,6 +196,8 @@ convert(::Type{T}, x::T) where {T<:Number} = x
 
 Similar definitions exist for `AbstractString`, [`AbstractArray`](@ref), and [`AbstractDict`](@ref).
 
+
+
 ## Promotion
 
 Promotion refers to converting values of mixed types to a single common type. Although it is not
@@ -291,6 +294,7 @@ another type object, such that instances of the argument types will be promoted
 type. Thus, by defining the rule:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
 ```
 
@@ -336,6 +340,7 @@ Finally, we finish off our ongoing case study of Julia's rational number type, w
 sophisticated use of the promotion mechanism with the following promotion rules:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{Rational{S}}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:AbstractFloat} = promote_type(T,S)
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index 4531506d5c49d..873a94ffb2181 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -48,7 +48,7 @@ Generally it makes sense for `n` to equal the number of CPU threads (logical cor
 argument implicitly loads module [`Distributed`](@ref man-distributed).
 
 
-```julia
+```julia-repl
 $ julia -p 2
 
 julia> r = remotecall(rand, 2, 2, 2)
@@ -58,7 +58,7 @@ julia> s = @spawnat 2 1 .+ fetch(r)
 Future(2, 1, 5, nothing)
 
 julia> fetch(s)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  1.18526  1.50912
  1.16296  1.60607
 ```
@@ -106,7 +106,7 @@ julia> s = @spawnat :any 1 .+ fetch(r)
 Future(3, 1, 5, nothing)
 
 julia> fetch(s)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  1.38854  1.9098
  1.20939  1.57158
 ```
@@ -123,7 +123,7 @@ An important thing to remember is that, once fetched, a [`Future`](@ref Distribu
 locally. Further [`fetch`](@ref) calls do not entail a network hop. Once all referencing [`Future`](@ref Distributed.Future)s
 have fetched, the remote stored value is deleted.
 
-[`@async`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
+[`Threads.@spawn`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
 use it to create a "feeder" task for each process. Each task picks the next index that needs to
 be computed, then waits for its process to finish, then repeats until we run out of indices. Note
 that the feeder tasks do not begin to execute until the main task reaches the end of the [`@sync`](@ref)
@@ -153,12 +153,12 @@ julia> function rand2(dims...)
        end
 
 julia> rand2(2,2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.153756  0.368514
  1.15119   0.918912
 
 julia> fetch(@spawnat :any rand2(2,2))
-ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))
+ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))))
 Stacktrace:
 [...]
 ```
@@ -186,7 +186,7 @@ end
 ```
 
 In order to refer to `MyType` across all processes, `DummyModule.jl` needs to be loaded on
-every process.  Calling `include("DummyModule.jl")` loads it only on a single process.  To
+every process. Calling `include("DummyModule.jl")` loads it only on a single process. To
 load it on every process, use the [`@everywhere`](@ref) macro (starting Julia with `julia -p
 2`):
 
@@ -198,7 +198,7 @@ loaded
 ```
 
 As usual, this does not bring `DummyModule` into scope on any of the process, which requires
-[`using`](@ref) or [`import`](@ref).  Moreover, when `DummyModule` is brought into scope on one process, it
+[`using`](@ref) or [`import`](@ref). Moreover, when `DummyModule` is brought into scope on one process, it
 is not on any other:
 
 ```julia-repl
@@ -209,7 +209,7 @@ MyType(7)
 
 julia> fetch(@spawnat 2 MyType(7))
 ERROR: On worker 2:
-UndefVarError: `MyType` not defined
+UndefVarError: `MyType` not defined in `Main`
 ⋮
 
 julia> fetch(@spawnat 2 DummyModule.MyType(7))
@@ -262,7 +262,7 @@ as a programmatic means of adding, removing and querying the processes in a clus
 julia> using Distributed
 
 julia> addprocs(2)
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  2
  3
 ```
@@ -270,10 +270,11 @@ julia> addprocs(2)
 Module [`Distributed`](@ref man-distributed) must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
 It is automatically made available on the worker processes.
 
-Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
-their global state (such as global variables, new method definitions, and loaded modules) with any
-of the other running processes. You may use `addprocs(exeflags="--project")` to initialize a worker with
-a particular environment, and then `@everywhere using <modulename>` or `@everywhere include("file.jl")`.
+!!! note
+    Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
+    their global state (such as command-line switches, global variables, new method definitions, and loaded modules) with any
+    of the other running processes. You may use `addprocs(exeflags="--project")` to initialize a worker with
+    a particular environment, and then `@everywhere using <modulename>` or `@everywhere include("file.jl")`.
 
 Other types of clusters can be supported by writing your own custom `ClusterManager`, as described
 below in the [ClusterManagers](@ref) section.
@@ -539,9 +540,72 @@ Methods [`put!`](@ref), [`take!`](@ref), [`fetch`](@ref), [`isready`](@ref) and
 on a [`RemoteChannel`](@ref) are proxied onto the backing store on the remote process.
 
 [`RemoteChannel`](@ref) can thus be used to refer to user implemented `AbstractChannel` objects.
-A simple example of this is provided in `dictchannel.jl` in the
-[Examples repository](https://github.com/JuliaAttic/Examples), which uses a dictionary as its
-remote store.
+A simple example of this is the following `DictChannel` which uses a dictionary as its
+remote store:
+
+```jldoctest
+julia> struct DictChannel{T} <: AbstractChannel{T}
+           d::Dict
+           cond_take::Threads.Condition    # waiting for data to become available
+           DictChannel{T}() where {T} = new(Dict(), Threads.Condition())
+           DictChannel() = DictChannel{Any}()
+       end
+
+julia> begin
+       function Base.put!(D::DictChannel, k, v)
+           @lock D.cond_take begin
+               D.d[k] = v
+               notify(D.cond_take)
+           end
+           return D
+       end
+       function Base.take!(D::DictChannel, k)
+           @lock D.cond_take begin
+               v = fetch(D, k)
+               delete!(D.d, k)
+               return v
+           end
+       end
+       Base.isready(D::DictChannel) = @lock D.cond_take !isempty(D.d)
+       Base.isready(D::DictChannel, k) = @lock D.cond_take haskey(D.d, k)
+       function Base.fetch(D::DictChannel, k)
+           @lock D.cond_take begin
+               wait(D, k)
+               return D.d[k]
+           end
+       end
+       function Base.wait(D::DictChannel, k)
+           @lock D.cond_take begin
+               while !isready(D, k)
+                   wait(D.cond_take)
+               end
+           end
+       end
+       end;
+
+julia> d = DictChannel();
+
+julia> isready(d)
+false
+
+julia> put!(d, :k, :v);
+
+julia> isready(d, :k)
+true
+
+julia> fetch(d, :k)
+:v
+
+julia> wait(d, :k)
+
+julia> take!(d, :k)
+:v
+
+julia> isready(d, :k)
+false
+```
+
+
 
 
 ## Channels and RemoteChannels
@@ -593,7 +657,7 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
@@ -670,7 +734,7 @@ serialization/deserialization of data. Consequently, the call refers to the same
 as passed - no copies are created. This behavior is highlighted below:
 
 ```julia-repl
-julia> using Distributed;
+julia> using Distributed
 
 julia> rc = RemoteChannel(()->Channel(3));   # RemoteChannel created on local node
 
@@ -684,7 +748,7 @@ julia> for i in 1:3
 julia> result = [take!(rc) for _ in 1:3];
 
 julia> println(result);
-Array{Int64,1}[[3], [3], [3]]
+[[3], [3], [3]]
 
 julia> println("Num Unique objects : ", length(unique(map(objectid, result))));
 Num Unique objects : 1
@@ -703,7 +767,7 @@ julia> for i in 1:3
 julia> result = [take!(rc) for _ in 1:3];
 
 julia> println(result);
-Array{Int64,1}[[1], [2], [3]]
+[[1], [2], [3]]
 
 julia> println("Num Unique objects : ", length(unique(map(objectid, result))));
 Num Unique objects : 3
@@ -750,16 +814,18 @@ will always operate on copies of arguments.
 
 ## [Shared Arrays](@id man-shared-arrays)
 
-Shared Arrays use system shared memory to map the same array across many processes. While there
-are some similarities to a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl), the
-behavior of a [`SharedArray`](@ref) is quite different. In a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl),
-each process has local access to just a chunk of the data, and no two processes share the same
-chunk; in contrast, in a [`SharedArray`](@ref) each "participating" process has access to the
-entire array.  A [`SharedArray`](@ref) is a good choice when you want to have a large amount of
-data jointly accessible to two or more processes on the same machine.
+Shared Arrays use system shared memory to map the same array across many processes. A
+[`SharedArray`](@ref) is a good choice when you want to have a large amount of data jointly
+accessible to two or more processes on the same machine. Shared Array support is available via the
+module `SharedArrays`, which must be explicitly loaded on all participating workers.
 
-Shared Array support is available via module `SharedArrays` which must be explicitly loaded on
-all participating workers.
+A complementary data structure is provided by the external package
+[`DistributedArrays.jl`](https://github.com/JuliaParallel/DistributedArrays.jl) in the form of a
+`DArray`. While there are some similarities to a [`SharedArray`](@ref), the behavior of a
+[`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl) is quite different. In a
+[`SharedArray`](@ref), each "participating" process has access to the entire array; in contrast, in
+a [`DArray`](https://github.com/JuliaParallel/DistributedArrays.jl), each process has local access
+to just a chunk of the data, and no two processes share the same chunk.
 
 [`SharedArray`](@ref) indexing (assignment and accessing values) works just as with regular arrays,
 and is efficient because the underlying memory is available to the local process. Therefore,
@@ -789,7 +855,7 @@ Here's a brief example:
 julia> using Distributed
 
 julia> addprocs(3)
-3-element Array{Int64,1}:
+3-element Vector{Int64}:
  2
  3
  4
@@ -797,7 +863,7 @@ julia> addprocs(3)
 julia> @everywhere using SharedArrays
 
 julia> S = SharedArray{Int,2}((3,4), init = S -> S[localindices(S)] = repeat([myid()], length(localindices(S))))
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  3  4
  2  3  3  4
  2  3  4  4
@@ -806,7 +872,7 @@ julia> S[3,2] = 7
 7
 
 julia> S
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  3  4
  2  3  3  4
  2  7  4  4
@@ -818,7 +884,7 @@ you wish:
 
 ```julia-repl
 julia> S = SharedArray{Int,2}((3,4), init = S -> S[indexpids(S):length(procs(S)):length(S)] = repeat([myid()], length( indexpids(S):length(procs(S)):length(S))))
-3×4 SharedArray{Int64,2}:
+3×4 SharedMatrix{Int64}:
  2  2  2  2
  3  3  3  3
  4  4  4  4
@@ -830,7 +896,7 @@ conflicts. For example:
 ```julia
 @sync begin
     for p in procs(S)
-        @async begin
+        Threads.@spawn begin
             remotecall_wait(fill!, p, S, p)
         end
     end
@@ -912,7 +978,7 @@ and one that delegates in chunks:
 julia> function advection_shared!(q, u)
            @sync begin
                for p in procs(q)
-                   @async remotecall_wait(advection_shared_chunk!, p, q, u)
+                   Threads.@spawn remotecall_wait(advection_shared_chunk!, p, q, u)
                end
            end
            q
@@ -1263,8 +1329,11 @@ in future releases.
 ## Noteworthy external packages
 
 Outside of Julia parallelism there are plenty of external packages that should be mentioned.
-For example [MPI.jl](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI` protocol, [Dagger.jl](https://github.com/JuliaParallel/Dagger.jl) provides functionality similar to Python's [Dask](https://dask.org/), and
-[DistributedArrays.jl](https://github.com/JuliaParallel/Distributedarrays.jl) provides array operations distributed across workers, as presented in [Shared Arrays](@ref).
+For example, [`MPI.jl`](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI`
+protocol, [`Dagger.jl`](https://github.com/JuliaParallel/Dagger.jl) provides functionality similar to
+Python's [Dask](https://dask.org/), and
+[`DistributedArrays.jl`](https://github.com/JuliaParallel/Distributedarrays.jl) provides array
+operations distributed across workers, as [outlined above](@ref man-shared-arrays).
 
 A mention must be made of Julia's GPU programming ecosystem, which includes:
 
@@ -1302,7 +1371,7 @@ julia> all(C .≈ 4*π)
 true
 
 julia> typeof(C)
-Array{Float64,1}
+Vector{Float64} (alias for Array{Float64, 1})
 
 julia> dB = distribute(B);
 
@@ -1314,7 +1383,7 @@ julia> all(dC .≈ 4*π)
 true
 
 julia> typeof(dC)
-DistributedArrays.DArray{Float64,1,Array{Float64,1}}
+DistributedArrays.DArray{Float64,1,Vector{Float64}}
 
 julia> cuB = CuArray(B);
 
@@ -1350,7 +1419,7 @@ function declaration, let's see if it works with the aforementioned datatypes:
 julia> M = [2. 1; 1 1];
 
 julia> v = rand(2)
-2-element Array{Float64,1}:
+2-element Vector{Float64}:
 0.40395
 0.445877
 
@@ -1373,7 +1442,7 @@ julia> dv = distribute(v);
 julia> dC = power_method(dM, dv);
 
 julia> typeof(dC)
-Tuple{DistributedArrays.DArray{Float64,1,Array{Float64,1}},Float64}
+Tuple{DistributedArrays.DArray{Float64,1,Vector{Float64}},Float64}
 ```
 
 To end this short exposure to external packages, we can consider `MPI.jl`, a Julia wrapper
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 4c724e1deaaeb..a11d41d441b73 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -19,6 +19,10 @@ environments provide a way to access documentation directly:
 - In [Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
   under the cursor.
 
+
+`Docs.hasdoc(module, name)::Bool` tells whether a name has a docstring. `Docs.undocumented_names(module; all)`
+returns the undocumented names in a module.
+
 ## Writing Documentation
 
 Julia enables package developers and users to document functions, types and other objects easily
@@ -138,7 +142,7 @@ As in the example above, we recommend following some simple conventions when wri
    # Examples
    ```jldoctest
    julia> a = [1 2; 3 4]
-   2×2 Array{Int64,2}:
+   2×2 Matrix{Int64}:
     1  2
     3  4
    ```
@@ -303,15 +307,16 @@ Or for use with Julia's metaprogramming functionality:
 ```julia
 for (f, op) in ((:add, :+), (:subtract, :-), (:multiply, :*), (:divide, :/))
     @eval begin
-        $f(a,b) = $op(a,b)
+        $f(a, b) = $op(a, b)
     end
 end
-@doc "`add(a,b)` adds `a` and `b` together" add
-@doc "`subtract(a,b)` subtracts `b` from `a`" subtract
+@doc "`add(a, b)` adds `a` and `b` together" add
+@doc "`subtract(a, b)` subtracts `b` from `a`" subtract
 ```
 
-Documentation in non-toplevel blocks, such as `begin`, `if`, `for`, and `let`, should be
-added to the documentation system via `@doc` as well. For example:
+Documentation in non-toplevel blocks, such as `begin`, `if`, `for`, `let`, and
+inner constructors, should be added to the documentation system via `@doc` as
+well. For example:
 
 ```julia
 if condition()
@@ -402,7 +407,7 @@ f(x) = x
 
 "..."
 function f(x)
-    x
+    return x
 end
 
 "..."
@@ -429,10 +434,13 @@ Adds docstring `"..."` to the `@m(::Any)` macro definition.
 
 ```julia
 "..."
-:(@m)
+:(@m1)
+
+"..."
+macro m2 end
 ```
 
-Adds docstring `"..."` to the macro named `@m`.
+Adds docstring `"..."` to the macros named `@m1` and `@m2`.
 
 ### Types
 
@@ -453,6 +461,20 @@ end
 
 Adds the docstring `"..."` to types `T1`, `T2`, and `T3`.
 
+```
+"..."
+T1
+
+"..."
+T2
+
+"..."
+T3
+```
+
+Adds the docstring `"..."` to types `T1`, `T2`, and `T3`.
+The previous version is the preferred syntax, however both are equivalent.
+
 ```julia
 "..."
 struct T
@@ -460,11 +482,17 @@ struct T
     x
     "y"
     y
+
+    @doc "Inner constructor"
+    function T()
+        new(...)
+    end
 end
 ```
 
-Adds docstring `"..."` to type `T`, `"x"` to field `T.x` and `"y"` to field `T.y`. Also applicable
-to `mutable struct` types.
+Adds docstring `"..."` to type `T`, `"x"` to field `T.x`, `"y"` to field `T.y`,
+and `"Inner constructor"` to the inner constructor `T()`. Also applicable to
+`mutable struct` types.
 
 ### Modules
 
@@ -483,6 +511,20 @@ end
 Adds docstring `"..."` to the `Module` `M`. Adding the docstring above the `Module` is the preferred
 syntax, however both are equivalent.
 
+The module docstring is evaluated *inside* the scope of the module, allowing
+access to all the symbols defined in and imported into the module:
+
+```julia
+"The magic number is $(MAGIC)."
+module DocStringEval
+const MAGIC = 42
+end
+```
+
+Documenting a `baremodule` by placing a docstring above the expression automatically imports
+`@doc` into the module. These imports must be done manually when the module expression is not
+documented:
+
 ```julia
 "..."
 baremodule M
@@ -499,10 +541,6 @@ f(x) = x
 end
 ```
 
-Documenting a `baremodule` by placing a docstring above the expression automatically imports
-`@doc` into the module. These imports must be done manually when the module expression is not
-documented.
-
 ### Global Variables
 
 ```julia
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 2b6e48c533849..f578e10764101 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -247,7 +247,7 @@ Its second argument `args` is an array of `jl_value_t*` arguments and `nargs` is
 arguments.
 
 There is also an alternative, possibly simpler, way of calling Julia functions and that is via [`@cfunction`](@ref).
-Using `@cfunction` allows you to do the type conversions on the Julia side which typically is easier than doing it on
+Using `@cfunction` allows you to do the type conversions on the Julia side, which is typically easier than doing it on
 the C side. The `sqrt` example above would with `@cfunction` be written as:
 
 ```c
@@ -255,7 +255,10 @@ double (*sqrt_jl)(double) = jl_unbox_voidpointer(jl_eval_string("@cfunction(sqrt
 double ret = sqrt_jl(2.0);
 ```
 
-where we first define a C callable function in Julia, extract the function pointer from it and finally call it.
+where we first define a C callable function in Julia, extract the function pointer from it, and finally call it.
+In addition to simplifying type conversions by doing them in the higher-level language, calling Julia functions
+via `@cfunction` pointers eliminates the dynamic-dispatch overhead required by `jl_call` (for which all of the
+arguments are "boxed"), and should have performance equivalent to native C function pointers.
 
 ## Memory Management
 
@@ -409,7 +412,7 @@ per pointer using
 ```c
 jl_module_t *mod = jl_main_module;
 jl_sym_t *var = jl_symbol("var");
-jl_binding_t *bp = jl_get_binding_wr(mod, var);
+jl_binding_t *bp = jl_get_binding_wr(mod, var, 1);
 jl_checked_assignment(bp, mod, var, val);
 ```
 
@@ -432,14 +435,14 @@ object has just been allocated and no garbage collection has run since then. Not
 `jl_...` functions can sometimes invoke garbage collection.
 
 The write barrier is also necessary for arrays of pointers when updating their data directly.
-For example:
+Calling `jl_array_ptr_set` is usually much preferred. But direct updates can be done. For example:
 
 ```c
 jl_array_t *some_array = ...; // e.g. a Vector{Any}
-void **data = (void**)jl_array_data(some_array);
+void **data = jl_array_data(some_array, void*);
 jl_value_t *some_value = ...;
 data[0] = some_value;
-jl_gc_wb(some_array, some_value);
+jl_gc_wb(jl_array_owner(some_array), some_value);
 ```
 
 ### Controlling the Garbage Collector
@@ -487,13 +490,13 @@ referenced.
 In order to access the data of `x`, we can use `jl_array_data`:
 
 ```c
-double *xData = (double*)jl_array_data(x);
+double *xData = jl_array_data(x, double);
 ```
 
 Now we can fill the array:
 
 ```c
-for(size_t i=0; i<jl_array_len(x); i++)
+for (size_t i = 0; i < jl_array_nrows(x); i++)
     xData[i] = i;
 ```
 
@@ -527,10 +530,11 @@ that creates a 2D array and accesses its properties:
 ```c
 // Create 2D array of float64 type
 jl_value_t *array_type = jl_apply_array_type((jl_value_t*)jl_float64_type, 2);
-jl_array_t *x  = jl_alloc_array_2d(array_type, 10, 5);
+int dims[] = {10,5};
+jl_array_t *x  = jl_alloc_array_nd(array_type, dims, 2);
 
 // Get array pointer
-double *p = (double*)jl_array_data(x);
+double *p = jl_array_data(x, double);
 // Get number of dimensions
 int ndims = jl_array_ndims(x);
 // Get the size of the i-th dim
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index eb26063a5e61e..b3bfa5204e603 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -2,7 +2,7 @@
 
 Julia can be configured with a number of environment variables, set either in
 the usual way for each operating system, or in a portable way from within Julia.
-Supposing that you want to set the environment variable `JULIA_EDITOR` to `vim`,
+Supposing that you want to set the environment variable [`JULIA_EDITOR`](@ref JULIA_EDITOR) to `vim`,
 you can type `ENV["JULIA_EDITOR"] = "vim"` (for instance, in the REPL) to make
 this change on a case by case basis, or add the same to the user configuration
 file `~/.julia/config/startup.jl` in the user's home directory to have a
@@ -16,15 +16,26 @@ including those which include `JULIA` in their names.
 
 !!! note
 
-    Some variables, such as `JULIA_NUM_THREADS` and `JULIA_PROJECT`, need to be set before Julia
-    starts, therefore adding these to `~/.julia/config/startup.jl` is too late in the startup process.
+    It is recommended to avoid changing environment variables during runtime,
+    such as within a `~/.julia/config/startup.jl`.
+
+    One reason is that some julia language variables, such as [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS)
+    and [`JULIA_PROJECT`](@ref JULIA_PROJECT), need to be set before Julia starts.
+
+    Similarly, `__init__()` functions of user modules in the sysimage (via PackageCompiler) are
+    run before `startup.jl`, so setting environment variables in a `startup.jl` may be too late for
+    user code.
+
+    Further, changing environment variables during runtime can introduce data races into
+    otherwise benign code.
+
     In Bash, environment variables can either be set manually by running, e.g.,
     `export JULIA_NUM_THREADS=4` before starting Julia, or by adding the same command to
     `~/.bashrc` or `~/.bash_profile` to set the variable each time Bash is started.
 
 ## File locations
 
-### `JULIA_BINDIR`
+### [`JULIA_BINDIR`](@id JULIA_BINDIR)
 
 The absolute path of the directory containing the Julia executable, which sets
 the global variable [`Sys.BINDIR`](@ref). If `$JULIA_BINDIR` is not set, then
@@ -61,7 +72,7 @@ by default (via `Base.load_julia_startup()`).
 
 For example, a Linux installation with a Julia executable located at
 `/bin/julia`, a `DATAROOTDIR` of `../share`, and a `SYSCONFDIR` of `../etc` will
-have `JULIA_BINDIR` set to `/bin`, a source-file search path of
+have [`JULIA_BINDIR`](@ref JULIA_BINDIR) set to `/bin`, a source-file search path of
 
 ```
 /share/julia/base
@@ -73,7 +84,7 @@ and a global configuration search path of
 /etc/julia/startup.jl
 ```
 
-### `JULIA_PROJECT`
+### [`JULIA_PROJECT`](@id JULIA_PROJECT)
 
 A directory path that indicates which project should be the initial active project.
 Setting this environment variable has the same effect as specifying the `--project`
@@ -85,27 +96,27 @@ the chapter on [Code Loading](@ref code-loading).
 
 !!! note
 
-    `JULIA_PROJECT` must be defined before starting julia; defining it in `startup.jl`
+    [`JULIA_PROJECT`](@ref JULIA_PROJECT) must be defined before starting julia; defining it in `startup.jl`
     is too late in the startup process.
 
-### `JULIA_LOAD_PATH`
+### [`JULIA_LOAD_PATH`](@id JULIA_LOAD_PATH)
 
-The `JULIA_LOAD_PATH` environment variable is used to populate the global Julia
+The [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is used to populate the global Julia
 [`LOAD_PATH`](@ref) variable, which determines which packages can be loaded via
 `import` and `using` (see [Code Loading](@ref code-loading)).
 
-Unlike the shell `PATH` variable, empty entries in `JULIA_LOAD_PATH` are expanded to
+Unlike the shell `PATH` variable, empty entries in [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) are expanded to
 the default value of `LOAD_PATH`, `["@", "@v#.#", "@stdlib"]` when populating
 `LOAD_PATH`. This allows easy appending, prepending, etc. of the load path value in
-shell scripts regardless of whether `JULIA_LOAD_PATH` is already set or not. For
+shell scripts regardless of whether [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is already set or not. For
 example, to prepend the directory `/foo/bar` to `LOAD_PATH` just do
 ```sh
 export JULIA_LOAD_PATH="/foo/bar:$JULIA_LOAD_PATH"
 ```
-If the `JULIA_LOAD_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_LOAD_PATH` is not set, then
+If the [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is already set, its old value will be
+prepended with `/foo/bar`. On the other hand, if [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is not set, then
 it will be set to `/foo/bar:` which will expand to a `LOAD_PATH` value of
-`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If `JULIA_LOAD_PATH` is set to the empty
+`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is set to the empty
 string, it expands to an empty `LOAD_PATH` array. In other words, the empty string
 is interpreted as a zero-element array, not a one-element array of the empty string.
 This behavior was chosen so that it would be possible to set an empty load path via
@@ -117,31 +128,46 @@ environment variable or if it must have a value, set it to the string `:`.
     On Windows, path elements are separated by the `;` character, as is the case with
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
-### `JULIA_DEPOT_PATH`
-
-The `JULIA_DEPOT_PATH` environment variable is used to populate the global Julia
-[`DEPOT_PATH`](@ref) variable, which controls where the package manager, as well
-as Julia's code loading mechanisms, look for package registries, installed
-packages, named environments, repo clones, cached compiled package images,
-configuration files, and the default location of the REPL's history file.
-
-Unlike the shell `PATH` variable but similar to `JULIA_LOAD_PATH`, empty entries in
-`JULIA_DEPOT_PATH` are expanded to the default value of `DEPOT_PATH`. This allows
-easy appending, prepending, etc. of the depot path value in shell scripts regardless
-of whether `JULIA_DEPOT_PATH` is already set or not. For example, to prepend the
-directory `/foo/bar` to `DEPOT_PATH` just do
+### [`JULIA_DEPOT_PATH`](@id JULIA_DEPOT_PATH)
+
+The [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is used to populate the
+global Julia [`DEPOT_PATH`](@ref) variable, which controls where the package manager, as well
+as Julia's code loading mechanisms, look for package registries, installed packages, named
+environments, repo clones, cached compiled package images, configuration files, and the default
+location of the REPL's history file.
+
+Unlike the shell `PATH` variable but similar to [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH),
+empty entries in [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) have special behavior:
+- At the end, it is expanded to the default value of `DEPOT_PATH`, *excluding* the user depot.
+- At the start, it is expanded to the default value of `DEPOT_PATH`, *including* the user depot.
+This allows easy overriding of the user depot, while still retaining access to resources that
+are bundled with Julia, like cache files, artifacts, etc. For example, to switch the user depot
+to `/foo/bar` use a trailing `:`
+```sh
+export JULIA_DEPOT_PATH="/foo/bar:"
+```
+All package operations, like cloning registries or installing packages, will now write to
+`/foo/bar`, but since the empty entry is expanded to the default system depot, any bundled
+resources will still be available. If you really only want to use the depot at `/foo/bar`,
+and not load any bundled resources, simply set the environment variable to `/foo/bar`
+without the trailing colon.
+
+To append a depot at the end of the full default list, including the default user depot, use a
+leading `:`
 ```sh
-export JULIA_DEPOT_PATH="/foo/bar:$JULIA_DEPOT_PATH"
+export JULIA_DEPOT_PATH=":/foo/bar"
 ```
-If the `JULIA_DEPOT_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_DEPOT_PATH` is not set, then
-it will be set to `/foo/bar:` which will have the effect of prepending `/foo/bar` to
-the default depot path. If `JULIA_DEPOT_PATH` is set to the empty string, it expands
-to an empty `DEPOT_PATH` array. In other words, the empty string is interpreted as a
-zero-element array, not a one-element array of the empty string. This behavior was
-chosen so that it would be possible to set an empty depot path via the environment
-variable. If you want the default depot path, either unset the environment variable
-or if it must have a value, set it to the string `:`.
+
+There are two exceptions to the above rule. First, if [`JULIA_DEPOT_PATH`](@ref
+JULIA_DEPOT_PATH) is set to the empty string, it expands to an empty `DEPOT_PATH` array. In
+other words, the empty string is interpreted as a zero-element array, not a one-element
+array of the empty string. This behavior was chosen so that it would be possible to set an
+empty depot path via the environment variable.
+
+Second, if no user depot is specified in [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), then
+the empty entry is expanded to the default depot *including* the user depot. This makes
+it possible to use the default depot, as if the environment variable was unset, by setting
+it to the string `:`.
 
 !!! note
 
@@ -149,12 +175,12 @@ or if it must have a value, set it to the string `:`.
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
 !!! note
-    `JULIA_DEPOT_PATH` must be defined before starting julia; defining it in
+    [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) must be defined before starting julia; defining it in
     `startup.jl` is too late in the startup process; at that point you can instead
     directly modify the `DEPOT_PATH` array, which is populated from the environment
     variable.
 
-### `JULIA_HISTORY`
+### [`JULIA_HISTORY`](@id JULIA_HISTORY)
 
 The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 `$JULIA_HISTORY` is not set, then `REPL.find_hist_file()` defaults to
@@ -163,96 +189,116 @@ The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 $(DEPOT_PATH[1])/logs/repl_history.jl
 ```
 
-### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id env-max-num-precompile-files)
+### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id JULIA_MAX_NUM_PRECOMPILE_FILES)
 
 Sets the maximum number of different instances of a single package that are to be stored in the precompile cache (default = 10).
 
-### `JULIA_VERBOSE_LINKING`
+### [`JULIA_VERBOSE_LINKING`](@id JULIA_VERBOSE_LINKING)
 
 If set to true, linker commands will be displayed during precompilation.
 
 ## Pkg.jl
 
-### `JULIA_CI`
+### [`JULIA_CI`](@id JULIA_CI)
 
 If set to `true`, this indicates to the package server that any package operations are part of a continuous integration (CI) system for the purposes of gathering package usage statistics.
 
-### `JULIA_NUM_PRECOMPILE_TASKS`
+### [`JULIA_NUM_PRECOMPILE_TASKS`](@id JULIA_NUM_PRECOMPILE_TASKS)
 
 The number of parallel tasks to use when precompiling packages. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_DEVDIR`
+### [`JULIA_PKG_DEVDIR`](@id JULIA_PKG_DEVDIR)
 
 The default directory used by [`Pkg.develop`](https://pkgdocs.julialang.org/v1/api/#Pkg.develop) for downloading packages.
 
-### `JULIA_PKG_IGNORE_HASHES`
+### [`JULIA_PKG_IGNORE_HASHES`](@id JULIA_PKG_IGNORE_HASHES)
 
 If set to `1`, this will ignore incorrect hashes in artifacts. This should be used carefully, as it disables verification of downloads, but can resolve issues when moving files across different types of file systems. See [Pkg.jl issue #2317](https://github.com/JuliaLang/Pkg.jl/issues/2317) for more details.
 
 !!! compat "Julia 1.6"
     This is only supported in Julia 1.6 and above.
 
-### `JULIA_PKG_OFFLINE`
+### [`JULIA_PKG_OFFLINE`](@id JULIA_PKG_OFFLINE)
 
 If set to `true`, this will enable offline mode: see [`Pkg.offline`](https://pkgdocs.julialang.org/v1/api/#Pkg.offline).
 
 !!! compat "Julia 1.5"
     Pkg's offline mode requires Julia 1.5 or later.
 
-### `JULIA_PKG_PRECOMPILE_AUTO`
+### [`JULIA_PKG_PRECOMPILE_AUTO`](@id JULIA_PKG_PRECOMPILE_AUTO)
 
 If set to `0`, this will disable automatic precompilation by package actions which change the manifest. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_SERVER`
+### [`JULIA_PKG_SERVER`](@id JULIA_PKG_SERVER)
 
 Specifies the URL of the package registry to use. By default, `Pkg` uses
 `https://pkg.julialang.org` to fetch Julia packages. In addition, you can disable the use of the PkgServer
 protocol, and instead access the packages directly from their hosts (GitHub, GitLab, etc.)
 by setting: ``` export JULIA_PKG_SERVER="" ```
 
-### `JULIA_PKG_SERVER_REGISTRY_PREFERENCE`
+### [`JULIA_PKG_SERVER_REGISTRY_PREFERENCE`](@id JULIA_PKG_SERVER_REGISTRY_PREFERENCE)
 
 Specifies the preferred registry flavor. Currently supported values are `conservative`
 (the default), which will only publish resources that have been processed by the storage
 server (and thereby have a higher probability of being available from the PkgServers),
 whereas `eager` will publish registries whose resources have not necessarily been
-processed by the storage servers.  Users behind restrictive firewalls that do not allow
+processed by the storage servers. Users behind restrictive firewalls that do not allow
 downloading from arbitrary servers should not use the `eager` flavor.
 
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above.
 
-### `JULIA_PKG_UNPACK_REGISTRY`
+### [`JULIA_PKG_UNPACK_REGISTRY`](@id JULIA_PKG_UNPACK_REGISTRY)
 
 If set to `true`, this will unpack the registry instead of storing it as a compressed tarball.
 
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above. Earlier versions will always unpack the registry.
 
-### `JULIA_PKG_USE_CLI_GIT`
+### [`JULIA_PKG_USE_CLI_GIT`](@id JULIA_PKG_USE_CLI_GIT)
 
 If set to `true`, Pkg operations which use the git protocol will use an external `git` executable instead of the default libgit2 library.
 
 !!! compat "Julia 1.7"
     Use of the `git` executable is only supported on Julia 1.7 and above.
 
-### `JULIA_PKGRESOLVE_ACCURACY`
+### [`JULIA_PKGRESOLVE_ACCURACY`](@id JULIA_PKGRESOLVE_ACCURACY)
 
 The accuracy of the package resolver. This should be a positive integer, the default is `1`.
 
+### [`JULIA_PKG_PRESERVE_TIERED_INSTALLED`](@id JULIA_PKG_PRESERVE_TIERED_INSTALLED)
+
+Change the default package installation strategy to `Pkg.PRESERVE_TIERED_INSTALLED`
+to let the package manager try to install versions of packages while keeping as many
+versions of packages already installed as possible.
+
+!!! compat "Julia 1.9"
+    This only affects Julia 1.9 and above.
+
+### [`JULIA_PKG_GC_AUTO`](@id JULIA_PKG_GC_AUTO)
+
+If set to `false`, automatic garbage collection of packages and artifacts will be disabled;
+see [`Pkg.gc`](https://pkgdocs.julialang.org/v1/api/#Pkg.gc) for more details.
+
+!!! compat "Julia 1.12"
+    This environment variable is only supported on Julia 1.12 and above.
+
 ## Network transport
 
-### `JULIA_NO_VERIFY_HOSTS` / `JULIA_SSL_NO_VERIFY_HOSTS` / `JULIA_SSH_NO_VERIFY_HOSTS` / `JULIA_ALWAYS_VERIFY_HOSTS`
+### [`JULIA_NO_VERIFY_HOSTS`](@id JULIA_NO_VERIFY_HOSTS)
+### [`JULIA_SSL_NO_VERIFY_HOSTS`](@id JULIA_SSL_NO_VERIFY_HOSTS)
+### [`JULIA_SSH_NO_VERIFY_HOSTS`](@id JULIA_SSH_NO_VERIFY_HOSTS)
+### [`JULIA_ALWAYS_VERIFY_HOSTS`](@id JULIA_ALWAYS_VERIFY_HOSTS)
 
 Specify hosts whose identity should or should not be verified for specific transport layers. See [`NetworkOptions.verify_host`](https://github.com/JuliaLang/NetworkOptions.jl#verify_host)
 
-### `JULIA_SSL_CA_ROOTS_PATH`
+### [`JULIA_SSL_CA_ROOTS_PATH`](@id JULIA_SSL_CA_ROOTS_PATH)
 
 Specify the file or directory containing the certificate authority roots. See [`NetworkOptions.ca_roots`](https://github.com/JuliaLang/NetworkOptions.jl#ca_roots)
 
 ## External applications
 
-### `JULIA_SHELL`
+### [`JULIA_SHELL`](@id JULIA_SHELL)
 
 The absolute path of the shell with which Julia should execute external commands
 (via `Base.repl_cmd()`). Defaults to the environment variable `$SHELL`, and
@@ -263,7 +309,7 @@ falls back to `/bin/sh` if `$SHELL` is unset.
     On Windows, this environment variable is ignored, and external commands are
     executed directly.
 
-### `JULIA_EDITOR`
+### [`JULIA_EDITOR`](@id JULIA_EDITOR)
 
 The editor returned by `InteractiveUtils.editor()` and used in, e.g., [`InteractiveUtils.edit`](@ref),
 referring to the command of the preferred editor, for instance `vim`.
@@ -277,12 +323,12 @@ To use Visual Studio Code on Windows, set `$JULIA_EDITOR` to `code.cmd`.
 
 ## Parallelization
 
-### [`JULIA_CPU_THREADS`](@id env-cpu-threads)
+### [`JULIA_CPU_THREADS`](@id JULIA_CPU_THREADS)
 
 Overrides the global variable [`Base.Sys.CPU_THREADS`](@ref), the number of
 logical CPU cores available.
 
-### `JULIA_WORKER_TIMEOUT`
+### [`JULIA_WORKER_TIMEOUT`](@id JULIA_WORKER_TIMEOUT)
 
 A [`Float64`](@ref) that sets the value of `Distributed.worker_timeout()` (default: `60.0`).
 This function gives the number of seconds a worker process will wait for
@@ -290,16 +336,25 @@ a master process to establish a connection before dying.
 
 ### [`JULIA_NUM_THREADS`](@id JULIA_NUM_THREADS)
 
-An unsigned 64-bit integer (`uint64_t`) that sets the maximum number of threads
-available to Julia.  If `$JULIA_NUM_THREADS` is not positive or is not set, or
-if the number of CPU threads cannot be determined through system calls, then the
-number of threads is set to `1`.
+An unsigned 64-bit integer (`uint64_t`) or string that sets the maximum number
+of threads available to Julia. If `$JULIA_NUM_THREADS` is not set or is a
+non-positive integer, or if the number of CPU threads cannot be determined
+through system calls, then the number of threads is set to `1`.
 
 If `$JULIA_NUM_THREADS` is set to `auto`, then the number of threads will be set
-to the number of CPU threads.
+to the number of CPU threads. It can also be set to a comma-separated string to
+specify the size of the `:default` and `:interactive` [threadpools](@ref
+man-threadpools), respectively:
+```bash
+# 5 threads in the :default pool and 2 in the :interactive pool
+export JULIA_NUM_THREADS=5,2
+
+# `auto` threads in the :default pool and 1 in the :interactive pool
+export JULIA_NUM_THREADS=auto,1
+```
 
 !!! note
-    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in
+    `JULIA_NUM_THREADS` must be defined before starting Julia; defining it in
     `startup.jl` is too late in the startup process.
 
 !!! compat "Julia 1.5"
@@ -309,76 +364,108 @@ to the number of CPU threads.
 !!! compat "Julia 1.7"
     The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
 
-### `JULIA_THREAD_SLEEP_THRESHOLD`
+!!! compat "Julia 1.9"
+    The `x,y` format for threadpools requires Julia 1.9 or above.
+
+### [`JULIA_THREAD_SLEEP_THRESHOLD`](@id JULIA_THREAD_SLEEP_THRESHOLD)
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
 then spinning threads never sleep. Otherwise, `$JULIA_THREAD_SLEEP_THRESHOLD` is
 interpreted as an unsigned 64-bit integer (`uint64_t`) and gives, in
 nanoseconds, the amount of time after which spinning threads should sleep.
 
-### [`JULIA_NUM_GC_THREADS`](@id env-gc-threads)
+### [`JULIA_NUM_GC_THREADS`](@id JULIA_NUM_GC_THREADS)
 
-Sets the number of threads used by Garbage Collection. If unspecified is set to
-half of the number of worker threads.
+Sets the number of threads used by Garbage Collection. If unspecified is set to the number of worker threads.
 
 !!! compat "Julia 1.10"
     The environment variable was added in 1.10
 
-### [`JULIA_IMAGE_THREADS`](@id env-image-threads)
+### [`JULIA_IMAGE_THREADS`](@id JULIA_IMAGE_THREADS)
 
 An unsigned 32-bit integer that sets the number of threads used by image
 compilation in this Julia process. The value of this variable may be
 ignored if the module is a small module. If left unspecified, the smaller
-of the value of [`JULIA_CPU_THREADS`](@ref env-cpu-threads) or half the
+of the value of [`JULIA_CPU_THREADS`](@ref JULIA_CPU_THREADS) or half the
 number of logical CPU cores is used in its place.
 
-### `JULIA_IMAGE_TIMINGS`
+### [`JULIA_IMAGE_TIMINGS`](@id JULIA_IMAGE_TIMINGS)
 
 A boolean value that determines if detailed timing information is printed during
 during image compilation. Defaults to 0.
 
-### `JULIA_EXCLUSIVE`
+### [`JULIA_EXCLUSIVE`](@id JULIA_EXCLUSIVE)
 
 If set to anything besides `0`, then Julia's thread policy is consistent with
 running on a dedicated machine: the master thread is on proc 0, and threads are
 affinitized. Otherwise, Julia lets the operating system handle thread policy.
 
+## Garbage Collection
+
+### [`JULIA_HEAP_SIZE_HINT`](@id JULIA_HEAP_SIZE_HINT)
+
+Environment variable equivalent to the `--heap-size-hint=<size>[<unit>]` command line option.
+
+Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of:
+
+    - B  (bytes)
+    - K  (kibibytes)
+    - M  (mebibytes)
+    - G  (gibibytes)
+    - T  (tebibytes)
+    - %  (percentage of physical memory)
+
+For example, `JULIA_HEAP_SIZE_HINT=1G` would provide a 1 GB heap size hint to the garbage collector.
+
 ## REPL formatting
 
 Environment variables that determine how REPL output should be formatted at the
-terminal. Generally, these variables should be set to [ANSI terminal escape
+terminal. The `JULIA_*_COLOR` variables should be set to [ANSI terminal escape
 sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
 
-### `JULIA_ERROR_COLOR`
+### [`JULIA_ERROR_COLOR`](@id JULIA_ERROR_COLOR)
 
 The formatting `Base.error_color()` (default: light red, `"\033[91m"`) that
 errors should have at the terminal.
 
-### `JULIA_WARN_COLOR`
+### [`JULIA_WARN_COLOR`](@id JULIA_WARN_COLOR)
 
 The formatting `Base.warn_color()` (default: yellow, `"\033[93m"`) that warnings
 should have at the terminal.
 
-### `JULIA_INFO_COLOR`
+### [`JULIA_INFO_COLOR`](@id JULIA_INFO_COLOR)
 
 The formatting `Base.info_color()` (default: cyan, `"\033[36m"`) that info
 should have at the terminal.
 
-### `JULIA_INPUT_COLOR`
+### [`JULIA_INPUT_COLOR`](@id JULIA_INPUT_COLOR)
 
 The formatting `Base.input_color()` (default: normal, `"\033[0m"`) that input
 should have at the terminal.
 
-### `JULIA_ANSWER_COLOR`
+### [`JULIA_ANSWER_COLOR`](@id JULIA_ANSWER_COLOR)
 
 The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output
 should have at the terminal.
 
+### [`NO_COLOR`](@id NO_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then colored
+text will be disabled on the REPL. Can be overridden with the flag `--color=yes` or with the
+environment variable [`FORCE_COLOR`](@ref FORCE_COLOR). This environment variable is
+[commonly recognized by command-line applications](https://no-color.org/).
+
+### [`FORCE_COLOR`](@id FORCE_COLOR)
+
+When this variable is present and not an empty string (regardless of its value) then
+colored text will be enabled on the REPL. Can be overridden with the flag `--color=no`. This
+environment variable is [commonly recognized by command-line applications](https://force-color.org/).
+
 ## System and Package Image Building
 
-### `JULIA_CPU_TARGET`
+### [`JULIA_CPU_TARGET`](@id JULIA_CPU_TARGET)
 
 Modify the target machine architecture for (pre)compiling
 [system](@ref sysimg-multi-versioning) and [package images](@ref pkgimgs-multi-versioning).
@@ -387,9 +474,9 @@ Unlike the `--cpu-target`, or `-C`, [command line option](@ref cli), it does not
 just-in-time (JIT) code generation within a Julia session where machine code is only
 stored in memory.
 
-Valid values for `JULIA_CPU_TARGET` can be obtained by executing `julia -C help`.
+Valid values for [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) can be obtained by executing `julia -C help`.
 
-Setting `JULIA_CPU_TARGET` is important for heterogeneous compute systems where processors of
+Setting [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) is important for heterogeneous compute systems where processors of
 distinct types or features may be present. This is commonly encountered in high performance
 computing (HPC) clusters since the component nodes may be using distinct processors.
 
@@ -425,38 +512,22 @@ A few special features are supported:
 
 ## Debugging and profiling
 
-### `JULIA_DEBUG`
+### [`JULIA_DEBUG`](@id JULIA_DEBUG)
 
 Enable debug logging for a file or module, see [`Logging`](@ref man-logging) for more information.
 
-### `JULIA_GC_ALLOC_POOL`, `JULIA_GC_ALLOC_OTHER`, `JULIA_GC_ALLOC_PRINT`
-
-If set, these environment variables take strings that optionally start with the
-character `'r'`, followed by a string interpolation of a colon-separated list of
-three signed 64-bit integers (`int64_t`). This triple of integers `a:b:c`
-represents the arithmetic sequence `a`, `a + b`, `a + 2*b`, ... `c`.
+### [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@id JULIA_PROFILE_PEEK_HEAP_SNAPSHOT)
 
-*   If it's the `n`th time that `jl_gc_pool_alloc()` has been called, and `n`
-    belongs to the arithmetic sequence represented by `$JULIA_GC_ALLOC_POOL`,
-    then garbage collection is forced.
-*   If it's the `n`th time that `maybe_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_OTHER`, then garbage
-    collection is forced.
-*   If it's the `n`th time that `jl_gc_collect()` has been called, and `n` belongs
-    to the arithmetic sequence represented by `$JULIA_GC_ALLOC_PRINT`, then counts
-    for the number of calls to `jl_gc_pool_alloc()` and `maybe_collect()` are
-    printed.
+Enable collecting of a heap snapshot during execution via the profiling peek mechanism.
+See [Triggered During Execution](@ref).
 
-If the value of the environment variable begins with the character `'r'`, then
-the interval between garbage collection events is randomized.
-
-!!! note
+### [`JULIA_TIMING_SUBSYSTEMS`](@id JULIA_TIMING_SUBSYSTEMS)
 
-    These environment variables only have an effect if Julia was compiled with
-    garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
-    in the build configuration).
+Allows you to enable or disable zones for a specific Julia run.
+For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable
+the `INFERENCE` zones. See [Dynamically Enabling and Disabling Zones](@ref).
 
-### `JULIA_GC_NO_GENERATIONAL`
+### [`JULIA_GC_NO_GENERATIONAL`](@id JULIA_GC_NO_GENERATIONAL)
 
 If set to anything besides `0`, then the Julia garbage collector never performs
 "quick sweeps" of memory.
@@ -467,7 +538,7 @@ If set to anything besides `0`, then the Julia garbage collector never performs
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `JULIA_GC_WAIT_FOR_DEBUGGER`
+### [`JULIA_GC_WAIT_FOR_DEBUGGER`](@id JULIA_GC_WAIT_FOR_DEBUGGER)
 
 If set to anything besides `0`, then the Julia garbage collector will wait for
 a debugger to attach instead of aborting whenever there's a critical error.
@@ -478,7 +549,7 @@ a debugger to attach instead of aborting whenever there's a critical error.
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `ENABLE_JITPROFILING`
+### [`ENABLE_JITPROFILING`](@id ENABLE_JITPROFILING)
 
 If set to anything besides `0`, then the compiler will create and register an
 event listener for just-in-time (JIT) profiling.
@@ -494,12 +565,16 @@ event listener for just-in-time (JIT) profiling.
     * [Perf](https://perf.wiki.kernel.org) (`USE_PERF_JITEVENTS` set to `1`
       in the build configuration). This integration is enabled by default.
 
-### `ENABLE_GDBLISTENER`
+### [`ENABLE_GDBLISTENER`](@id ENABLE_GDBLISTENER)
 
 If set to anything besides `0` enables GDB registration of Julia code on release builds.
 On debug builds of Julia this is always enabled. Recommended to use with `-g 2`.
 
 
-### `JULIA_LLVM_ARGS`
+### [`JULIA_LLVM_ARGS`](@id JULIA_LLVM_ARGS)
 
 Arguments to be passed to the LLVM backend.
+
+### `JULIA_FALLBACK_REPL`
+
+Forces the fallback repl instead of REPL.jl.
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index bdecb5ecf106f..2673ca7532acf 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -8,34 +8,51 @@ No.
 
 ### Why don't you compile Matlab/Python/R/… code to Julia?
 
-Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language.  Simple, right?
+Since many people are familiar with the syntax of other dynamic languages, and lots of code has already been written in those languages, it is natural to wonder why we didn't just plug a Matlab or Python front-end into a Julia back-end (or “transpile” code to Julia) in order to get all the performance benefits of Julia without requiring programmers to learn a new language. Simple, right?
 
-The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about.  Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT).   Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts.  If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse).  The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics.  The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
+The basic issue is that there is *nothing special about Julia's compiler*: we use a commonplace compiler (LLVM) with no “secret sauce” that other language developers don't know about. Indeed, Julia's compiler is in many ways much simpler than those of other dynamic languages (e.g. PyPy or LuaJIT). Julia's performance advantage derives almost entirely from its front-end: its language semantics allow a [well-written Julia program](@ref man-performance-tips) to *give more opportunities to the compiler* to generate efficient code and memory layouts. If you tried to compile Matlab or Python code to Julia, our compiler would be limited by the semantics of Matlab or Python to producing code no better than that of existing compilers for those languages (and probably worse). The key role of semantics is also why several existing Python compilers (like Numba and Pythran) only attempt to optimize a small subset of the language (e.g. operations on Numpy arrays and scalars), and for this subset they are already doing at least as well as we could for the same semantics. The people working on those projects are incredibly smart and have accomplished amazing things, but retrofitting a compiler onto a language that was designed to be interpreted is a very difficult problem.
 
-Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient.  Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
+Julia's advantage is that good performance is not limited to a small subset of “built-in” types and operations, and one can write high-level type-generic code that works on arbitrary user-defined types while remaining fast and memory-efficient. Types in languages like Python simply don't provide enough information to the compiler for similar capabilities, so as soon as you used those languages as a Julia front-end you would be stuck.
 
 For similar reasons, automated translation to Julia would also typically generate unreadable, slow, non-idiomatic code that would not be a good starting point for a native Julia port from another language.
 
-On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities.  We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
+On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities. We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
 
 ## [Public API](@id man-api)
 
 ### How does Julia define its public API?
 
-Julia `Base` and standard library functionality described in the
-[the documentation](https://docs.julialang.org/) that is not marked as unstable
-(e.g. experimental and internal) is covered by [SemVer](https://semver.org/).
-Functions, types, and constants are not part of the public API if they are not
-included in the documentation, _even if they have docstrings_.
+Julia's public [API](https://en.wikipedia.org/wiki/API) is the behavior described in
+documentation of public symbols from `Base` and the standard libraries. Functions,
+types, and constants are not part of the public API if they are not public, even if
+they have docstrings or are described in the documentation. Further, only the documented
+behavior of public symbols is part of the public API. Undocumented behavior of public
+symbols is internal.
+
+Public symbols are those marked with either `public foo` or `export foo`.
+
+In other words:
+
+- Documented behavior of public symbols is part of the public API.
+- Undocumented behavior of public symbols is not part of the public API.
+- Documented behavior of private symbols is not part of the public API.
+- Undocumented behavior of private symbols is not part of the public API.
+
+You can get a complete list of the public symbols from a module with `names(MyModule)`.
+
+Package authors are encouraged to define their public API similarly.
+
+Anything in Julia's Public API is covered by [SemVer](https://semver.org/) and therefore
+will not be removed or receive meaningful breaking changes before Julia 2.0.
 
 ### There is a useful undocumented function/type/constant. Can I use it?
 
-Updating Julia may break your code if you use non-public API.  If the code is
-self-contained, it may be a good idea to copy it into your project.  If you want to rely on
+Updating Julia may break your code if you use non-public API. If the code is
+self-contained, it may be a good idea to copy it into your project. If you want to rely on
 a complex non-public API, especially when using it from a stable package, it is a good idea
 to open an [issue](https://github.com/JuliaLang/julia/issues) or
 [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
-into a public API.  However, we do not discourage the attempt to create packages that expose
+into a public API. However, we do not discourage the attempt to create packages that expose
 stable public interfaces while relying on non-public implementation details of Julia and
 buffering the differences across different Julia versions.
 
@@ -54,12 +71,12 @@ session (technically, in module `Main`), it is always present.
 
 If memory usage is your concern, you can always replace objects with ones that consume less memory.
  For example, if `A` is a gigabyte-sized array that you no longer need, you can free the memory
-with `A = nothing`.  The memory will be released the next time the garbage collector runs; you can force
+with `A = nothing`. The memory will be released the next time the garbage collector runs; you can force
 this to happen with [`GC.gc()`](@ref Base.GC.gc). Moreover, an attempt to use `A` will likely result in an error, because most methods are not defined on type `Nothing`.
 
 ### How can I modify the declaration of a type in my session?
 
-Perhaps you've defined a type and then realize you need to add a new field.  If you try this at
+Perhaps you've defined a type and then realize you need to add a new field. If you try this at
 the REPL, you get the error:
 
 ```
@@ -70,8 +87,8 @@ Types in module `Main` cannot be redefined.
 
 While this can be inconvenient when you are developing new code, there's an excellent workaround.
  Modules can be replaced by redefining them, and so if you wrap all your new code inside a module
-you can redefine types and constants.  You can't import the type names into `Main` and then expect
-to be able to redefine them there, but you can use the module name to resolve the scope.  In other
+you can redefine types and constants. You can't import the type names into `Main` and then expect
+to be able to redefine them there, but you can use the module name to resolve the scope. In other
 words, while developing you might use a workflow something like this:
 
 ```julia
@@ -101,7 +118,7 @@ If one needs functionality both available as a library and a script, it is bette
 
 Running a Julia script using `julia file.jl` does not throw
 [`InterruptException`](@ref) when you try to terminate it with CTRL-C
-(SIGINT).  To run a certain code before terminating a Julia script,
+(SIGINT). To run a certain code before terminating a Julia script,
 which may or may not be caused by CTRL-C, use [`atexit`](@ref).
 Alternatively, you can use `julia -e 'include(popfirst!(ARGS))'
 file.jl` to execute a script while being able to catch
@@ -134,7 +151,7 @@ invoking an [operating-system shell](https://en.wikipedia.org/wiki/Shell_(comput
 That means that `run` does not perform wildcard expansion of `*` (["globbing"](https://en.wikipedia.org/wiki/Glob_(programming))),
 nor does it interpret [shell pipelines](https://en.wikipedia.org/wiki/Pipeline_(Unix)) like `|` or `>`.
 
-You can still do globbing and pipelines using Julia features, however.  For example, the built-in
+You can still do globbing and pipelines using Julia features, however. For example, the built-in
 [`pipeline`](@ref) function allows you to chain external programs and files, similar to shell pipes, and
 the [Glob.jl package](https://github.com/vtjnash/Glob.jl) implements POSIX-compatible globbing.
 
@@ -158,7 +175,7 @@ end
 ```
 and notice that it works fine in an interactive environment (like the Julia REPL),
 but gives ```UndefVarError: `x` not defined``` when you try to run it in script or other
-file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
+file. What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
 
 Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
 an assignment to a global in that local scope.
@@ -167,7 +184,7 @@ As mentioned above, Julia (version 1.5 or later) allows you to omit the `global`
 keyword for code in the REPL (and many other interactive environments), to simplify
 exploration (e.g. copy-pasting code from a function to run interactively).
 However, once you move to code in files, Julia requires a more disciplined approach
-to global variables.  You have least three options:
+to global variables. You have least three options:
 
 1. Put the code into a function (so that `x` is a *local* variable in a function). In general, it is good software engineering to use functions rather than global scripts (search online for "why global variables bad" to see many explanations). In Julia, global variables are also [slow](@ref man-performance-tips).
 2. Wrap the code in a [`let`](@ref) block.  (This makes `x` a local variable within the `let ... end` statement, again eliminating the need for `global`).
@@ -235,7 +252,7 @@ the variables `A` and `x` were distinct bindings referring to the same mutable `
 
 ### Can I use `using` or `import` inside a function?
 
-No, you are not allowed to have a `using` or `import` statement inside a function.  If you want
+No, you are not allowed to have a `using` or `import` statement inside a function. If you want
 to import a module but only use its symbols inside a specific function or set of functions, you
 have two options:
 
@@ -249,7 +266,7 @@ have two options:
    ```
 
    This loads the module `Foo` and defines a variable `Foo` that refers to the module, but does not
-   import any of the other symbols from the module into the current namespace.  You refer to the
+   import any of the other symbols from the module into the current namespace. You refer to the
    `Foo` symbols by their qualified names `Foo.bar` etc.
 2. Wrap your function in a module:
 
@@ -378,7 +395,7 @@ julia> twothreearr()
 
 ### [What does "type-stable" mean?](@id man-type-stability)
 
-It means that the type of the output is predictable from the types of the inputs.  In particular,
+It means that the type of the output is predictable from the types of the inputs. In particular,
 it means that the type of the output cannot vary depending on the *values* of the inputs. The
 following code is *not* type-stable:
 
@@ -410,9 +427,9 @@ Stacktrace:
 [...]
 ```
 
-This behavior is an inconvenient consequence of the requirement for type-stability.  In the case
+This behavior is an inconvenient consequence of the requirement for type-stability. In the case
 of [`sqrt`](@ref), most users want `sqrt(2.0)` to give a real number, and would be unhappy if
-it produced the complex number `1.4142135623730951 + 0.0im`.  One could write the [`sqrt`](@ref)
+it produced the complex number `1.4142135623730951 + 0.0im`. One could write the [`sqrt`](@ref)
 function to switch to a complex-valued output only when passed a negative number (which is what
 [`sqrt`](@ref) does in some other languages), but then the result would not be [type-stable](@ref man-type-stability)
 and the [`sqrt`](@ref) function would have poor performance.
@@ -430,14 +447,14 @@ julia> sqrt(-2.0+0im)
 The parameters of a [parametric type](@ref Parametric-Types) can hold either
 types or bits values, and the type itself chooses how it makes use of these parameters.
 For example, `Array{Float64, 2}` is parameterized by the type `Float64` to express its
-element type and the integer value `2` to express its number of dimensions.  When
+element type and the integer value `2` to express its number of dimensions. When
 defining your own parametric type, you can use subtype constraints to declare that a
 certain parameter must be a subtype ([`<:`](@ref)) of some abstract type or a previous
-type parameter.  There is not, however, a dedicated syntax to declare that a parameter
+type parameter. There is not, however, a dedicated syntax to declare that a parameter
 must be a _value_ of a given type — that is, you cannot directly declare that a
 dimensionality-like parameter [`isa`](@ref) `Int` within the `struct` definition, for
-example.  Similarly, you cannot do computations (including simple things like addition
-or subtraction) on type parameters.  Instead, these sorts of constraints and
+example. Similarly, you cannot do computations (including simple things like addition
+or subtraction) on type parameters. Instead, these sorts of constraints and
 relationships may be expressed through additional type parameters that are computed
 and enforced within the type's [constructors](@ref man-constructors).
 
@@ -708,7 +725,7 @@ julia> module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: `Foo` not defined
+UndefVarError: `Foo` not defined in `Main`
 Stacktrace:
 [...]
 ```
@@ -729,7 +746,7 @@ julia> @everywhere module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: `gvar` not defined
+UndefVarError: `gvar` not defined in `Main.Foo`
 Stacktrace:
 [...]
 ```
@@ -765,7 +782,7 @@ bar (generic function with 1 method)
 
 julia> remotecall_fetch(bar, 2)
 ERROR: On worker 2:
-UndefVarError: `#bar` not defined
+UndefVarError: `#bar` not defined in `Main`
 [...]
 
 julia> anon_bar  = ()->1
@@ -787,6 +804,7 @@ foo (generic function with 1 method)
 
 julia> foo([1])
 ERROR: MethodError: no method matching foo(::Vector{Int64})
+The function `foo` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   foo(!Matched::Vector{Real})
@@ -882,7 +900,7 @@ array to store the result. If you prefer to mutate `x`, use `x .+= y` to update
 individually.
 
 While this behavior might surprise some, the choice is deliberate. The main reason is the presence
-of immutable objects within Julia, which cannot change their value once created.  Indeed, a
+of immutable objects within Julia, which cannot change their value once created. Indeed, a
 number is an immutable object; the statements `x = 5; x += 1` do not modify the meaning of `5`,
 they modify the value bound to `x`. For an immutable, the only way to change the value is to reassign
 it.
@@ -925,7 +943,7 @@ Consider the printed output from the following:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async write(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn write(stdout, string(i), " Foo ", " Bar ")
        end
 123 Foo  Foo  Foo  Bar  Bar  Bar
 ```
@@ -938,7 +956,7 @@ in the above example results in:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async println(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn println(stdout, string(i), " Foo ", " Bar ")
        end
 1 Foo  Bar
 2 Foo  Bar
@@ -951,7 +969,7 @@ You can lock your writes with a `ReentrantLock` like this:
 julia> l = ReentrantLock();
 
 julia> @sync for i in 1:3
-           @async begin
+           Threads.@spawn begin
                lock(l)
                try
                    write(stdout, string(i), " Foo ", " Bar ")
@@ -1035,17 +1053,15 @@ Modifying OpenBLAS settings or compiling Julia with a different BLAS library, eg
 
 ### How do I manage precompilation caches in distributed file systems?
 
-When using `julia` in high-performance computing (HPC) facilities, invoking
-_n_ `julia` processes simultaneously creates at most _n_ temporary copies of
-precompilation cache files. If this is an issue (slow and/or small distributed
-file system), you may:
-
-1. Use `julia` with `--compiled-modules=no` flag to turn off precompilation.
-2. Configure a private writable depot using `pushfirst!(DEPOT_PATH, private_path)`
-   where `private_path` is a path unique to this `julia` process.  This
-   can also be done by setting environment variable `JULIA_DEPOT_PATH` to
-   `$private_path:$HOME/.julia`.
-3. Create a symlink from `~/.julia/compiled` to a directory in a scratch space.
+When using Julia in high-performance computing (HPC) facilities with shared filesystems, it is recommended to use a shared
+depot (via the [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable). Since Julia v1.10, multiple Julia processes on functionally similar
+workers and using the same depot will coordinate via pidfile locks to only spend effort precompiling on one process while the
+others wait. The precompilation process will indicate when the process is precompiling or waiting for another that is
+precompiling. If non-interactive the messages are via `@debug`.
+
+However, due to caching of binary code, the cache rejection since v1.9 is more strict and users may need to set the
+[`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable appropriately to get a single cache that is usable throughout the HPC
+environment.
 
 ## Julia Releases
 
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index a724f450dccfa..0fcfdeb80d7b9 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -5,7 +5,7 @@ functions are not pure mathematical functions, because they can alter and be aff
 by the global state of the program. The basic syntax for defining functions in Julia is:
 
 ```jldoctest
-julia> function f(x,y)
+julia> function f(x, y)
            x + y
        end
 f (generic function with 1 method)
@@ -18,7 +18,7 @@ There is a second, more terse syntax for defining a function in Julia. The tradi
 declaration syntax demonstrated above is equivalent to the following compact "assignment form":
 
 ```jldoctest fofxy
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 ```
 
@@ -30,7 +30,7 @@ both typing and visual noise.
 A function is called using the traditional parenthesis syntax:
 
 ```jldoctest fofxy
-julia> f(2,3)
+julia> f(2, 3)
 5
 ```
 
@@ -40,14 +40,14 @@ like any other value:
 ```jldoctest fofxy
 julia> g = f;
 
-julia> g(2,3)
+julia> g(2, 3)
 5
 ```
 
 As with variables, Unicode can also be used for function names:
 
 ```jldoctest
-julia> ∑(x,y) = x + y
+julia> ∑(x, y) = x + y
 ∑ (generic function with 1 method)
 
 julia> ∑(2, 3)
@@ -73,11 +73,11 @@ function f(x, y)
 end
 ```
 The statement `x[1] = 42` *mutates* the object `x`, and hence this change *will* be visible in the array passed
-by the caller for this argument.   On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
+by the caller for this argument. On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
 `y` to refer to a new value `7 + y`, rather than mutating the *original* object referred to by `y`,
-and hence does *not* change the corresponding argument passed by the caller.   This can be seen if we call `f(x, y)`:
+and hence does *not* change the corresponding argument passed by the caller. This can be seen if we call `f(x, y)`:
 ```julia-repl
-julia> a = [4,5,6]
+julia> a = [4, 5, 6]
 3-element Vector{Int64}:
  4
  5
@@ -102,6 +102,9 @@ As a common convention in Julia (not a syntactic requirement), such a function w
 [typically be named `f!(x, y)`](@ref man-punctuation) rather than `f(x, y)`, as a visual reminder at
 the call site that at least one of the arguments (often the first one) is being mutated.
 
+!!! warning "Shared memory between arguments"
+    The behavior of a mutating function can be unexpected when a mutated argument shares memory with another argument, a situation known as aliasing (e.g. when one is a view of the other).
+    Unless the function docstring explicitly indicates that aliasing produces the expected result, it is the responsibility of the caller to ensure proper behavior on such inputs.
 
 ## Argument-type declarations
 
@@ -112,13 +115,13 @@ fib(n::Integer) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)
 ```
 and the `::Integer` specification means that it will only be callable when `n` is a subtype of the [abstract](@ref man-abstract-types) `Integer` type.
 
-Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller.   For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
+Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller. For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
 
-* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
-* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types.  For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
+* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments. For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
+* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types. For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
 * **Clarity:** Type declarations can serve as a form of documentation about the expected arguments.
 
-However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
+However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate. For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)). If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason. In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**. You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
 
 ## The `return` Keyword
 
@@ -130,7 +133,7 @@ the `return` keyword causes a function to return immediately, providing
 an expression whose value is returned:
 
 ```julia
-function g(x,y)
+function g(x, y)
     return x * y
     x + y
 end
@@ -140,19 +143,19 @@ Since function definitions can be entered into interactive sessions, it is easy
 definitions:
 
 ```jldoctest
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 
-julia> function g(x,y)
+julia> function g(x, y)
            return x * y
            x + y
        end
 g (generic function with 1 method)
 
-julia> f(2,3)
+julia> f(2, 3)
 5
 
-julia> g(2,3)
+julia> g(2, 3)
 6
 ```
 
@@ -163,18 +166,18 @@ is of real use. Here, for example, is a function that computes the hypotenuse le
 triangle with sides of length `x` and `y`, avoiding overflow:
 
 ```jldoctest
-julia> function hypot(x,y)
+julia> function hypot(x, y)
            x = abs(x)
            y = abs(y)
            if x > y
                r = y/x
-               return x*sqrt(1+r*r)
+               return x*sqrt(1 + r*r)
            end
            if y == 0
-               return zero(x)
+               return x
            end
            r = x/y
-           return y*sqrt(1+r*r)
+           return y*sqrt(1 + r*r)
        end
 hypot (generic function with 1 method)
 
@@ -186,7 +189,7 @@ There are three possible points of return from this function, returning the valu
 expressions, depending on the values of `x` and `y`. The `return` on the last line could be omitted
 since it is the last expression.
 
-### Return type
+### [Return type](@id man-functions-return-type)
 
 A return type can be specified in the function declaration using the `::` operator. This converts
 the return value to the specified type.
@@ -205,7 +208,7 @@ See [Type Declarations](@ref) for more on return types.
 
 Return type declarations are **rarely used** in Julia: in general, you should
 instead write "type-stable" functions in which Julia's compiler can automatically
-infer the return type.  For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
+infer the return type. For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
 
 ### Returning nothing
 
@@ -243,7 +246,7 @@ as you would any other function:
 julia> 1 + 2 + 3
 6
 
-julia> +(1,2,3)
+julia> +(1, 2, 3)
 6
 ```
 
@@ -254,7 +257,7 @@ operators such as [`+`](@ref) and [`*`](@ref) just like you would with other fun
 ```jldoctest
 julia> f = +;
 
-julia> f(1,2,3)
+julia> f(1, 2, 3)
 6
 ```
 
@@ -289,15 +292,15 @@ syntaxes:
 
 ```jldoctest
 julia> x -> x^2 + 2x - 1
-#1 (generic function with 1 method)
+#2 (generic function with 1 method)
 
 julia> function (x)
            x^2 + 2x - 1
        end
-#3 (generic function with 1 method)
+#5 (generic function with 1 method)
 ```
 
-This creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
+Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
 2x - 1` at that value. Notice that the result is a generic function, but with a compiler-generated
 name based on consecutive numbering.
 
@@ -327,28 +330,17 @@ julia> map(x -> x^2 + 2x - 1, [1, 3, -1])
 ```
 
 An anonymous function accepting multiple arguments can be written using the syntax `(x,y,z)->2x+y-z`.
-A zero-argument anonymous function is written as `()->3`. The idea of a function with no arguments
-may seem strange, but is useful for "delaying" a computation. In this usage, a block of code is
-wrapped in a zero-argument function, which is later invoked by calling it as `f`.
 
-As an example, consider this call to [`get`](@ref):
+Argument-type declarations for anonymous functions work as for named functions, for example `x::Integer->2x`.
+The return type of an anonymous function cannot be specified.
 
-```julia
-get(dict, key) do
-    # default value calculated here
-    time()
-end
-```
-
-The code above is equivalent to calling `get` with an anonymous function containing the code
-enclosed between `do` and `end`, like so:
-
-```julia
-get(()->time(), dict, key)
-```
-
-The call to [`time`](@ref) is delayed by wrapping it in a 0-argument anonymous function
-that is called only if the requested key is absent from `dict`.
+A zero-argument anonymous function can be written as `()->2+2`. The idea of a function with
+no arguments may seem strange, but is useful in cases where a result cannot (or should not)
+be precomputed. For example, Julia has a zero-argument [`time`](@ref) function that returns
+the current time in seconds, and thus `seconds = ()->round(Int, time())` is an anonymous
+function that returns this time rounded to the nearest integer assigned to the variable
+`seconds`. Each time this anonymous function is called as `seconds()` the current time will
+be calculated and returned.
 
 ## Tuples
 
@@ -402,7 +394,7 @@ left side of an assignment: the value on the right side is _destructured_ by ite
 over and assigning to each variable in turn:
 
 ```jldoctest
-julia> (a,b,c) = 1:3
+julia> (a, b, c) = 1:3
 1:3
 
 julia> b
@@ -417,7 +409,7 @@ This can be used to return multiple values from functions by returning a tuple o
 other iterable value. For example, the following function returns two values:
 
 ```jldoctest foofunc
-julia> function foo(a,b)
+julia> function foo(a, b)
            a+b, a*b
        end
 foo (generic function with 1 method)
@@ -427,14 +419,14 @@ If you call it in an interactive session without assigning the return value anyw
 see the tuple returned:
 
 ```jldoctest foofunc
-julia> foo(2,3)
+julia> foo(2, 3)
 (5, 6)
 ```
 
 Destructuring assignment extracts each value into a variable:
 
 ```jldoctest foofunc
-julia> x, y = foo(2,3)
+julia> x, y = foo(2, 3)
 (5, 6)
 
 julia> x
@@ -473,7 +465,7 @@ Other valid left-hand side expressions can be used as elements of the assignment
 ```jldoctest
 julia> X = zeros(3);
 
-julia> X[1], (a,b) = (1, (2, 3))
+julia> X[1], (a, b) = (1, (2, 3))
 (1, (2, 3))
 
 julia> X
@@ -624,9 +616,9 @@ julia> foo(A(3, 4))
 
 For anonymous functions, destructuring a single argument requires an extra comma:
 
-```
-julia> map(((x,y),) -> x + y, [(1,2), (3,4)])
-2-element Array{Int64,1}:
+```julia-repl
+julia> map(((x, y),) -> x + y, [(1, 2), (3, 4)])
+2-element Vector{Int64}:
  3
  7
 ```
@@ -638,7 +630,7 @@ Such functions are traditionally known as "varargs" functions, which is short fo
 of arguments". You can define a varargs function by following the last positional argument with an ellipsis:
 
 ```jldoctest barfunc
-julia> bar(a,b,x...) = (a,b,x)
+julia> bar(a, b, x...) = (a, b, x)
 bar (generic function with 1 method)
 ```
 
@@ -647,16 +639,16 @@ The variables `a` and `b` are bound to the first two argument values as usual, a
 two arguments:
 
 ```jldoctest barfunc
-julia> bar(1,2)
+julia> bar(1, 2)
 (1, 2, ())
 
-julia> bar(1,2,3)
+julia> bar(1, 2, 3)
 (1, 2, (3,))
 
 julia> bar(1, 2, 3, 4)
 (1, 2, (3, 4))
 
-julia> bar(1,2,3,4,5,6)
+julia> bar(1, 2, 3, 4, 5, 6)
 (1, 2, (3, 4, 5, 6))
 ```
 
@@ -673,7 +665,7 @@ call instead:
 julia> x = (3, 4)
 (3, 4)
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 ```
 
@@ -684,7 +676,7 @@ of arguments go. This need not be the case, however:
 julia> x = (2, 3, 4)
 (2, 3, 4)
 
-julia> bar(1,x...)
+julia> bar(1, x...)
 (1, 2, (3, 4))
 
 julia> x = (1, 2, 3, 4)
@@ -697,15 +689,15 @@ julia> bar(x...)
 Furthermore, the iterable object splatted into a function call need not be a tuple:
 
 ```jldoctest barfunc
-julia> x = [3,4]
+julia> x = [3, 4]
 2-element Vector{Int64}:
  3
  4
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 
-julia> x = [1,2,3,4]
+julia> x = [1, 2, 3, 4]
 4-element Vector{Int64}:
  1
  2
@@ -720,9 +712,9 @@ Also, the function that arguments are splatted into need not be a varargs functi
 often is):
 
 ```jldoctest
-julia> baz(a,b) = a + b;
+julia> baz(a, b) = a + b;
 
-julia> args = [1,2]
+julia> args = [1, 2]
 2-element Vector{Int64}:
  1
  2
@@ -730,7 +722,7 @@ julia> args = [1,2]
 julia> baz(args...)
 3
 
-julia> args = [1,2,3]
+julia> args = [1, 2, 3]
 3-element Vector{Int64}:
  1
  2
@@ -738,6 +730,7 @@ julia> args = [1,2,3]
 
 julia> baz(args...)
 ERROR: MethodError: no method matching baz(::Int64, ::Int64, ::Int64)
+The function `baz` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   baz(::Any, ::Any)
@@ -831,7 +824,7 @@ prior keyword arguments.
 The types of keyword arguments can be made explicit as follows:
 
 ```julia
-function f(;x::Int=1)
+function f(; x::Int=1)
     ###
 end
 ```
@@ -931,8 +924,10 @@ map([A, B, C]) do x
 end
 ```
 
-The `do x` syntax creates an anonymous function with argument `x` and passes it as the first argument
-to [`map`](@ref). Similarly, `do a,b` would create a two-argument anonymous function. Note that `do (a,b)` would create a one-argument anonymous function,
+The `do x` syntax creates an anonymous function with argument `x` and passes
+the anonymous function as the first argument
+to the "outer" function - [`map`](@ref) in this example.
+Similarly, `do a,b` would create a two-argument anonymous function. Note that `do (a,b)` would create a one-argument anonymous function,
 whose argument is a tuple to be deconstructed. A plain `do` would declare that what follows is an anonymous function of the form `() -> ...`.
 
 How these arguments are initialized depends on the "outer" function; here, [`map`](@ref) will
@@ -981,7 +976,7 @@ can create performance challenges as discussed in [performance tips](@ref man-pe
 Functions in Julia can be combined by composing or piping (chaining) them together.
 
 Function composition is when you combine functions together and apply the resulting composition to arguments.
-You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...)` is the same as `f(g(args...))`.
+You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...; kw...)` is the same as `f(g(args...; kw...))`.
 
 You can type the composition operator at the REPL and suitably-configured editors using `\circ<tab>`.
 
@@ -1077,13 +1072,13 @@ in advance by the library writer.
 
 More generally, `f.(args...)` is actually equivalent to `broadcast(f, args...)`, which allows
 you to operate on multiple arrays (even of different shapes), or a mix of arrays and scalars (see
-[Broadcasting](@ref)). For example, if you have `f(x,y) = 3x + 4y`, then `f.(pi,A)` will return
-a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1,vector2)` will return
-a new vector consisting of `f(vector1[i],vector2[i])` for each index `i` (throwing an exception
+[Broadcasting](@ref)). For example, if you have `f(x, y) = 3x + 4y`, then `f.(pi, A)` will return
+a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1, vector2)` will return
+a new vector consisting of `f(vector1[i], vector2[i])` for each index `i` (throwing an exception
 if the vectors have different length).
 
 ```jldoctest
-julia> f(x,y) = 3x + 4y;
+julia> f(x, y) = 3x + 4y;
 
 julia> A = [1.0, 2.0, 3.0];
 
@@ -1103,7 +1098,7 @@ julia> f.(A, B)
 ```
 
 Keyword arguments are not broadcasted over, but are simply passed through to each call of
-the function.  For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
+the function. For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
 
 Moreover, *nested* `f.(args...)` calls are *fused* into a single `broadcast` loop. For example,
 `sin.(cos.(X))` is equivalent to `broadcast(x -> sin(cos(x)), X)`, similar to `[sin(cos(x)) for x in X]`:
@@ -1159,6 +1154,8 @@ julia> 1:5 .|> [x->x^2, inv, x->2*x, -, isodd]
  true
 ```
 
+All functions in the fused broadcast are always called for every element of the result. Thus `X .+ σ .* randn.()` will add a mask of independent and identically sampled random values to each element of the array `X`, but `X .+ σ .* randn()` will add the *same* random sample to each element. In cases where the fused computation is constant along one or more axes of the broadcast iteration, it may be possible to leverage a space-time tradeoff and allocate intermediate values to reduce the number of computations. See more at [performance tips](@ref man-performance-unfuse).
+
 ## Further Reading
 
 We should mention here that this is far from a complete picture of defining functions. Julia has
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index e972788022de6..2c69aabbda192 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -10,8 +10,9 @@ known as a read-eval-print loop or "REPL") by double-clicking the Julia executab
 `julia` from the command line:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
+REPL.banner(io)
 banner = String(take!(io))
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia> 1 + 2\n3\n\njulia> ans\n3\n```")
@@ -55,4 +56,4 @@ search: begin disable_sigint reenable_sigint
   begin...end denotes a block of code.
 ```
 
-If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips), or check out the comprehensive [ModernJuliaWorkflows](https://modernjuliaworkflows.org/) blog.
diff --git a/doc/src/manual/img/cpu-profile.png b/doc/src/manual/img/cpu-profile.png
new file mode 100644
index 0000000000000..ec48b41f6e78b
Binary files /dev/null and b/doc/src/manual/img/cpu-profile.png differ
diff --git a/doc/src/manual/img/task-sampling-failure.png b/doc/src/manual/img/task-sampling-failure.png
new file mode 100644
index 0000000000000..46bbd9b33b6ce
Binary files /dev/null and b/doc/src/manual/img/task-sampling-failure.png differ
diff --git a/doc/src/manual/img/wall-time-profiler-channel-example.png b/doc/src/manual/img/wall-time-profiler-channel-example.png
new file mode 100644
index 0000000000000..26cb4a4522621
Binary files /dev/null and b/doc/src/manual/img/wall-time-profiler-channel-example.png differ
diff --git a/doc/src/manual/img/wall-time-profiler-compute-bound-example.png b/doc/src/manual/img/wall-time-profiler-compute-bound-example.png
new file mode 100644
index 0000000000000..983b01bcc2dea
Binary files /dev/null and b/doc/src/manual/img/wall-time-profiler-compute-bound-example.png differ
diff --git a/doc/src/manual/installation.md b/doc/src/manual/installation.md
new file mode 100644
index 0000000000000..f45aba2c37a28
--- /dev/null
+++ b/doc/src/manual/installation.md
@@ -0,0 +1,126 @@
+# [Installation](@id man-installation)
+
+There are many ways to install Julia. The following sections highlight the
+recommended method for each of the main supported platforms, and then present
+alternative ways that might be useful in specialized situations.
+
+The current installation recommendation is a solution based on Juliaup. If you
+installed Julia previously with a method that is _not_ based on Juliaup and want
+to switch your system to an installation that is based on Juliaup, we recommend
+that you uninstall all previous Julia versions, ensure that you remove anything
+Julia related from your `PATH` variable and then install Julia with one of the
+methods described below.
+
+## Windows
+
+On Windows Julia can be installed directly from the Windows store
+[here](https://www.microsoft.com/store/apps/9NJNWW8PVKMN). One can also install
+exactly the same version by executing
+
+```
+winget install julia -s msstore
+```
+
+in any shell.
+
+## Mac and Linux
+
+Julia can be installed on Linux or Mac by executing
+
+```
+curl -fsSL https://install.julialang.org | sh
+```
+
+in a shell.
+
+### Command line arguments
+
+One can pass various command line arguments to the Julia installer. The syntax
+for installer arguments is
+
+```bash
+curl -fsSL https://install.julialang.org | sh -s -- <ARGS>
+```
+
+Here `<ARGS>` should be replaced with one or more of the following arguments:
+- `--yes` (or `-y`): Run the installer in a non-interactive mode. All
+  configuration values use their default or a value supplied as a command line
+  argument.
+- `--default-channel=<NAME>`: Configure the default Juliaup channel. For
+  example `--default-channel lts` would install the `lts` channel and configure it
+  as the default.
+- `--add-to-path=<yes|no>`: Configure whether Julia should be added to the `PATH`
+  environment variable. Valid values are `yes` (default) and `no`.
+- `--background-selfupdate=<SECONDS>`: Configure an optional CRON job that
+  auto-updates Juliaup if `<SECONDS>` has a value larger than 0. The actual value
+  controls how often the CRON job will run to check for a new Juliaup version in
+  seconds. The default value is 0, i.e. no CRON job will be created.
+- `--startup-selfupdate=<MINUTES>`: Configure how often Julia will check for new
+  versions of Juliaup when Julia is started. The default is every 1440 minutes.
+- `-p=<PATH>` (or `--path`): Configure where the Julia and Juliaup binaries are
+  installed. The default is `~/.juliaup`.
+
+## Alternative installation methods
+
+Note that we recommend the following methods _only_ if none of the installation
+methods described above work for your system.
+
+Some of the installation methods described below recommend installing a package
+called `juliaup`. Note that this nevertheless installs a fully functional
+Julia system, not just Juliaup.
+
+### App Installer (Windows)
+
+If the Windows Store is blocked on a system, we have an alternative
+[MSIX App Installer](https://learn.microsoft.com/en-us/windows/msix/app-installer/app-installer-file-overview)
+based setup. To use the App Installer version, download
+[this](https://install.julialang.org/Julia.appinstaller) file and open it by
+double clicking on it.
+
+### MSI Installer (Windows)
+
+If neither the Windows Store nor the App Installer version work on your Windows
+system, you can also use a MSI based installer. Note that this installation
+methods comes with serious limitations and is generally not recommended unless
+no other method works. For example, there is no automatic update mechanism for
+Juliaup with this installation method. The 64 bit version of the MSI installer
+can be downloaded from [here](https://install.julialang.org/Julia-x64.msi) and
+the 32 bit version from [here](https://install.julialang.org/Julia-x86.msi).
+
+ By default the install will be a per-user install that does not require
+ elevation. You can also do a system install by running the following command
+ from a shell:
+
+```
+msiexec /i <PATH_TO_JULIA_MSI> ALLUSERS=1
+```
+
+### [Homebrew](https://brew.sh) (Mac and Linux)
+
+On systems with brew, you can install Julia by running
+```
+brew install juliaup
+```
+in a shell. Note that you will have to update Juliaup with standard brew
+commands.
+
+### [Arch Linux - AUR](https://aur.archlinux.org/packages/juliaup/) (Linux)
+
+On Arch Linux, Juliaup is available [in the Arch User Repository (AUR)](https://aur.archlinux.org/packages/juliaup/).
+
+### [openSUSE Tumbleweed](https://get.opensuse.org/tumbleweed/) (Linux)
+
+On openSUSE Tumbleweed, you can install Julia by running
+
+```sh
+zypper install juliaup
+```
+in a shell with root privileges.
+
+### [cargo](https://crates.io/crates/juliaup/) (Windows, Mac and Linux)
+
+To install Julia via Rust's cargo, run:
+
+```sh
+cargo install juliaup
+```
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 173ca7847616e..0ee7850c92087 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -243,11 +243,10 @@ julia> x + 1 == typemin(Int64)
 true
 ```
 
-Thus, arithmetic with Julia integers is actually a form of [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic).
-This reflects the characteristics of the underlying arithmetic of integers as implemented on modern
-computers. In applications where overflow is possible, explicit checking for wraparound produced
-by overflow is essential; otherwise, the [`BigInt`](@ref) type in [Arbitrary Precision Arithmetic](@ref)
-is recommended instead.
+Arithmetic operations with Julia's integer types inherently perform [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic),
+mirroring the characteristics of integer arithmetic on modern computer hardware. In scenarios where overflow is a possibility,
+it is crucial to explicitly check for wraparound effects that can result from such overflows.
+The [`Base.Checked`](@ref) module provides a suite of arithmetic operations equipped with overflow checks, which trigger errors if an overflow occurs. For use cases where overflow cannot be tolerated under any circumstances, utilizing the [`BigInt`](@ref) type, as detailed in [Arbitrary Precision Arithmetic](@ref), is advisable.
 
 An example of overflow behavior and how to potentially resolve it is as follows:
 
@@ -632,7 +631,7 @@ BigInt
 
 The default precision (in number of bits of the significand) and rounding mode of [`BigFloat`](@ref)
 operations can be changed globally by calling [`setprecision`](@ref) and [`setrounding`](@ref),
-and all further calculations will take these changes in account.  Alternatively, the precision
+and all further calculations will take these changes in account. Alternatively, the precision
 or the rounding can be changed only within the execution of a particular block of code by using
 the same functions with a `do` block:
 
@@ -653,6 +652,13 @@ julia> setprecision(40) do
 1.1000000000004
 ```
 
+!!! warning
+    The relation between [`setprecision`](@ref) or [`setrounding`](@ref) and
+    [`@big_str`](@ref), the macro used for `big` string literals (such as
+    `big"0.3"`), might not be intuitive, as a consequence of the fact that
+    `@big_str` is a macro. See the [`@big_str`](@ref) documentation for
+    details.
+
 ## [Numeric Literal Coefficients](@id man-numeric-literal-coefficients)
 
 To make common numeric formulae and expressions clearer, Julia allows variables to be immediately
@@ -693,7 +699,7 @@ julia> 2(x-1)^2 - 3(x-1) + 1
 !!! note
     The precedence of numeric literal coefficients used for implicit
     multiplication is higher than other binary operators such as multiplication
-    (`*`), and division (`/`, `\`, and `//`).  This means, for example, that
+    (`*`), and division (`/`, `\`, and `//`). This means, for example, that
     `1 / 2im` equals `-0.5im` and `6 // 2(2 + 1)` equals `1 // 1`.
 
 Additionally, parenthesized expressions can be used as coefficients to variables, implying multiplication
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index bcb15da69dedf..e752448f14a25 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -7,29 +7,24 @@ to generically build upon those behaviors.
 
 ## [Iteration](@id man-interface-iteration)
 
-| Required methods               |                        | Brief description                                                                     |
-|:------------------------------ |:---------------------- |:------------------------------------------------------------------------------------- |
-| `iterate(iter)`                |                        | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty        |
-| `iterate(iter, state)`         |                        | Returns either a tuple of the next item and next state or `nothing` if no items remain  |
-| **Important optional methods** | **Default definition** | **Brief description**                                                                 |
-| `Base.IteratorSize(IterType)`  | `Base.HasLength()`     | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
-| `Base.IteratorEltype(IterType)`| `Base.HasEltype()`     | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate                    |
-| `eltype(IterType)`             | `Any`                  | The type of the first entry of the tuple returned by `iterate()`                      |
-| `length(iter)`                 | (*undefined*)          | The number of items, if known                                                         |
-| `size(iter, [dim])`            | (*undefined*)          | The number of items in each dimension, if known                                       |
-| `Base.isdone(iter[, state])`   | `missing`              | Fast-path hint for iterator completion. Should be defined for stateful iterators, or else `isempty(iter)` may call `iterate(iter[, state])` and mutate the iterator. |
-
-| Value returned by `IteratorSize(IterType)` | Required Methods                           |
-|:------------------------------------------ |:------------------------------------------ |
-| `Base.HasLength()`                         | [`length(iter)`](@ref)                     |
-| `Base.HasShape{N}()`                       | `length(iter)`  and `size(iter, [dim])`    |
-| `Base.IsInfinite()`                        | (*none*)                                   |
-| `Base.SizeUnknown()`                       | (*none*)                                   |
-
-| Value returned by `IteratorEltype(IterType)` | Required Methods   |
-|:-------------------------------------------- |:------------------ |
-| `Base.HasEltype()`                           | `eltype(IterType)` |
-| `Base.EltypeUnknown()`                       | (*none*)           |
+There are two methods that are always required:
+
+| Required method         | Brief description                                                                        |
+|:----------------------- |:---------------------------------------------------------------------------------------- |
+| [`iterate(iter)`](@ref) | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty |
+| `iterate(iter, state)`  | Returns either a tuple of the next item and next state or `nothing` if no items remain   |
+
+There are several more methods that should be defined in some circumstances.
+Please note that you should always define at least one of `Base.IteratorSize(IterType)` and `length(iter)` because the default definition of `Base.IteratorSize(IterType)` is `Base.HasLength()`.
+
+| Method                                  | When should this method be defined?                                         | Default definition | Brief description |
+|:--- |:--- |:--- |:--- |
+| [`Base.IteratorSize(IterType)`](@ref)   | If default is not appropriate                                               | `Base.HasLength()` | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
+| [`length(iter)`](@ref)                  | If `Base.IteratorSize()` returns `Base.HasLength()` or `Base.HasShape{N}()` | (*undefined*)      | The number of items, if known |
+| [`size(iter, [dim])`](@ref)             | If `Base.IteratorSize()` returns `Base.HasShape{N}()`                       | (*undefined*)      | The number of items in each dimension, if known |
+| [`Base.IteratorEltype(IterType)`](@ref) | If default is not appropriate                                               | `Base.HasEltype()` | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate |
+| [`eltype(IterType)`](@ref)              | If default is not appropriate                                               | `Any`              | The type of the first entry of the tuple returned by `iterate()` |
+| [`Base.isdone(iter, [state])`](@ref)    | **Must** be defined if iterator is stateful                                 | `missing`          | Fast-path hint for iterator completion. If not defined for a stateful iterator then functions that check for done-ness, like `isempty()` and `zip()`, may mutate the iterator and cause buggy behaviour! |
 
 Sequential iteration is implemented by the [`iterate`](@ref) function. Instead
 of mutating objects as they are iterated over, Julia iterators may keep track
@@ -96,7 +91,7 @@ julia> sum(Squares(100))
 ```
 
 There are a few more methods we can extend to give Julia more information about this iterable
-collection.  We know that the elements in a `Squares` sequence will always be `Int`. By extending
+collection. We know that the elements in a `Squares` sequence will always be `Int`. By extending
 the [`eltype`](@ref) method, we can give that information to Julia and help it make more specialized
 code in the more complicated methods. We also know the number of elements in our sequence, so
 we can extend [`length`](@ref), too:
@@ -136,7 +131,7 @@ to additionally specialize those extra behaviors when they know a more efficient
 be used in their specific case.
 
 It is also often useful to allow iteration over a collection in *reverse order*
-by iterating over [`Iterators.reverse(iterator)`](@ref).  To actually support
+by iterating over [`Iterators.reverse(iterator)`](@ref). To actually support
 reverse-order iteration, however, an iterator
 type `T` needs to implement `iterate` for `Iterators.Reverse{T}`.
 (Given `r::Iterators.Reverse{T}`, the underling iterator of type `T` is `r.itr`.)
@@ -157,13 +152,13 @@ julia> collect(Iterators.reverse(Squares(4)))
 
 | Methods to implement | Brief description                |
 |:-------------------- |:-------------------------------- |
-| `getindex(X, i)`     | `X[i]`, indexed element access   |
-| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment   |
+| `getindex(X, i)`     | `X[i]`, indexed access, non-scalar `i` should allocate a copy  |
+| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment         |
 | `firstindex(X)`         | The first index, used in `X[begin]` |
-| `lastindex(X)`           | The last index, used in `X[end]` |
+| `lastindex(X)`           | The last index, used in `X[end]`   |
 
 For the `Squares` iterable above, we can easily compute the `i`th element of the sequence by squaring
-it.  We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
+it. We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
 simply needs to define [`getindex`](@ref):
 
 ```jldoctest squaretype
@@ -238,12 +233,12 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
 If a type is defined as a subtype of `AbstractArray`, it inherits a very large set of rich behaviors
-including iteration and multidimensional indexing built on top of single-element access.  See
+including iteration and multidimensional indexing built on top of single-element access. See
 the [arrays manual page](@ref man-multi-dim-arrays) and the [Julia Base section](@ref lib-arrays) for more supported methods.
 
 A key part in defining an `AbstractArray` subtype is [`IndexStyle`](@ref). Since indexing is
 such an important part of an array and often occurs in hot loops, it's important to make both
-indexing and indexed assignment as efficient as possible.  Array data structures are typically
+indexing and indexed assignment as efficient as possible. Array data structures are typically
 defined in one of two ways: either it most efficiently accesses its elements using just one index
 (linear indexing) or it intrinsically accesses the elements with indices specified for every dimension.
  These two modalities are identified by Julia as `IndexLinear()` and `IndexCartesian()`.
@@ -251,7 +246,7 @@ defined in one of two ways: either it most efficiently accesses its elements usi
 provides a traits-based mechanism to enable efficient generic code for all array types.
 
 This distinction determines which scalar indexing methods the type must define. `IndexLinear()`
-arrays are simple: just define `getindex(A::ArrayType, i::Int)`.  When the array is subsequently
+arrays are simple: just define `getindex(A::ArrayType, i::Int)`. When the array is subsequently
 indexed with a multidimensional set of indices, the fallback `getindex(A::AbstractArray, I...)`
 efficiently converts the indices into one linear index and then calls the above method. `IndexCartesian()`
 arrays, on the other hand, require methods to be defined for each supported dimensionality with
@@ -412,7 +407,7 @@ perhaps range-types `Ind` of your own design. For more information, see
 
 A strided array is a subtype of `AbstractArray` whose entries are stored in memory with fixed strides.
 Provided the element type of the array is compatible with BLAS, a strided array can utilize BLAS and LAPACK routines
-for more efficient linear algebra routines.  A typical example of a user-defined strided array is one
+for more efficient linear algebra routines. A typical example of a user-defined strided array is one
 that wraps a standard `Array` with additional structure.
 
 Warning: do not implement these methods if the underlying storage is not actually strided, as it
@@ -469,7 +464,7 @@ container for broadcasting, then the following method should be defined:
 ```julia
 Base.broadcastable(o::MyType) = Ref(o)
 ```
-that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container.   For example, such a wrapper
+that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container. For example, such a wrapper
 method is defined for types themselves, functions, special singletons like [`missing`](@ref) and [`nothing`](@ref), and dates.
 
 Custom array-like types can specialize
@@ -531,8 +526,8 @@ similar(bc::Broadcasted{DefaultArrayStyle{N}}, ::Type{ElType}) where {N,ElType}
 
 However, if needed you can specialize on any or all of these arguments. The final argument
 `bc` is a lazy representation of a (potentially fused) broadcast operation, a `Broadcasted`
-object.  For these purposes, the most important fields of the wrapper are
-`f` and `args`, describing the function and argument list, respectively.  Note that the argument
+object. For these purposes, the most important fields of the wrapper are
+`f` and `args`, describing the function and argument list, respectively. Note that the argument
 list can — and often does — include other nested `Broadcasted` wrappers.
 
 For a complete example, let's say you have created a type, `ArrayAndChar`, that stores an
@@ -841,3 +836,51 @@ julia> p.r
 Finally, it is worth noting that adding instance properties like this is quite
 rarely done in Julia and should in general only be done if there is a good
 reason for doing so.
+
+## [Rounding](@id man-rounding-interface)
+
+| Methods to implement                          | Default definition        | Brief description                                                                                   |
+|:--------------------------------------------- |:------------------------- |:--------------------------------------------------------------------------------------------------- |
+| `round(x::ObjType, r::RoundingMode)`          | none                      | Round `x` and return the result. If possible, round should return an object of the same type as `x` |
+| `round(T::Type, x::ObjType, r::RoundingMode)` | `convert(T, round(x, r))` | Round `x`, returning the result as a `T`                                                            |
+
+To support rounding on a new type it is typically sufficient to define the single method
+`round(x::ObjType, r::RoundingMode)`. The passed rounding mode determines in which direction
+the value should be rounded. The most commonly used rounding modes are `RoundNearest`,
+`RoundToZero`, `RoundDown`, and `RoundUp`, as these rounding modes are used in the
+definitions of the one argument `round`, method, and `trunc`, `floor`, and `ceil`,
+respectively.
+
+In some cases, it is possible to define a three-argument `round` method that is more
+accurate or performant than the two-argument method followed by conversion. In this case it
+is acceptable to define the three argument method in addition to the two argument method.
+If it is impossible to represent the rounded result as an object of the type `T`,
+then the three argument method should throw an `InexactError`.
+
+For example, if we have an `Interval` type which represents a range of possible values
+similar to https://github.com/JuliaPhysics/Measurements.jl, we may define rounding on that
+type with the following
+
+```jldoctest
+julia> struct Interval{T}
+           min::T
+           max::T
+       end
+
+julia> Base.round(x::Interval, r::RoundingMode) = Interval(round(x.min, r), round(x.max, r))
+
+julia> x = Interval(1.7, 2.2)
+Interval{Float64}(1.7, 2.2)
+
+julia> round(x)
+Interval{Float64}(2.0, 2.0)
+
+julia> floor(x)
+Interval{Float64}(1.0, 2.0)
+
+julia> ceil(x)
+Interval{Float64}(2.0, 3.0)
+
+julia> trunc(x)
+Interval{Float64}(1.0, 2.0)
+```
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index 21722a5e80684..d2cef68bd6fff 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -20,9 +20,9 @@ are supported on all primitive numeric types:
 | `x ÷ y`    | integer divide | x / y, truncated to an integer          |
 | `x \ y`    | inverse divide | equivalent to `y / x`                   |
 | `x ^ y`    | power          | raises `x` to the `y`th power           |
-| `x % y`    | remainder      | equivalent to `rem(x,y)`                |
+| `x % y`    | remainder      | equivalent to `rem(x, y)`               |
 
-A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x+y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
+A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x + y)`, is treated as a multiplication, except with higher precedence than other binary operations. See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
 Julia's promotion system makes arithmetic operations on mixtures of argument types "just work"
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
@@ -171,15 +171,15 @@ The updating versions of all the binary arithmetic and bitwise operators are:
 For *every* binary operation like `^`, there is a corresponding
 "dot" operation `.^` that is *automatically* defined
 to perform `^` element-by-element on arrays. For example,
-`[1,2,3] ^ 3` is not defined, since there is no standard
+`[1, 2, 3] ^ 3` is not defined, since there is no standard
 mathematical meaning to "cubing" a (non-square) array, but
-`[1,2,3] .^ 3` is defined as computing the elementwise
-(or "vectorized") result `[1^3, 2^3, 3^3]`.  Similarly for unary
+`[1, 2, 3] .^ 3` is defined as computing the elementwise
+(or "vectorized") result `[1^3, 2^3, 3^3]`. Similarly for unary
 operators like `!` or `√`, there is a corresponding `.√` that
 applies the operator elementwise.
 
 ```jldoctest
-julia> [1,2,3] .^ 3
+julia> [1, 2, 3] .^ 3
 3-element Vector{Int64}:
   1
   8
@@ -204,9 +204,9 @@ as `a .= a .+ b`, where `.=` is a fused *in-place* assignment operation
 (see the [dot syntax documentation](@ref man-vectorized)).
 
 Note the dot syntax is also applicable to user-defined operators.
-For example, if you define `⊗(A,B) = kron(A,B)` to give a convenient
+For example, if you define `⊗(A, B) = kron(A, B)` to give a convenient
 infix syntax `A ⊗ B` for Kronecker products ([`kron`](@ref)), then
-`[A,B] .⊗ [C,D]` will compute `[A⊗C, B⊗D]` with no additional coding.
+`[A, B] .⊗ [C, D]` will compute `[A⊗C, B⊗D]` with no additional coding.
 
 Combining dot operators with numeric literals can be ambiguous.
 For example, it is not clear whether `1.+x` means `1. + x` or `1 .+ x`.
@@ -332,7 +332,7 @@ Mixed-type comparisons between signed integers, unsigned integers, and floats ca
 great deal of care has been taken to ensure that Julia does them correctly.
 
 For other types, `isequal` defaults to calling [`==`](@ref), so if you want to define
-equality for your own types then you only need to add a [`==`](@ref) method.  If you define
+equality for your own types then you only need to add a [`==`](@ref) method. If you define
 your own equality function, you should probably define a corresponding [`hash`](@ref) method
 to ensure that `isequal(x,y)` implies `hash(x) == hash(y)`.
 
@@ -394,7 +394,7 @@ Julia applies the following order and associativity of operations, from highest
 |:-------------- |:------------------------------------------------------------------------------------------------- |:-------------------------- |
 | Syntax         | `.` followed by `::`                                                                              | Left                       |
 | Exponentiation | `^`                                                                                               | Right                      |
-| Unary          | `+ - √`                                                                                           | Right[^1]                  |
+| Unary          | `+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓ <: >:`                                                                     | Right[^1]                  |
 | Bitshifts      | `<< >> >>>`                                                                                       | Left                       |
 | Fractions      | `//`                                                                                              | Left                       |
 | Multiplication | `* / % & \ ÷`                                                                                     | Left[^2]                   |
@@ -457,7 +457,7 @@ Juxtaposition parses like a unary operator, which has the same natural asymmetry
 Julia supports three forms of numerical conversion, which differ in their handling of inexact
 conversions.
 
-  * The notation `T(x)` or `convert(T,x)` converts `x` to a value of type `T`.
+  * The notation `T(x)` or `convert(T, x)` converts `x` to a value of type `T`.
 
       * If `T` is a floating-point type, the result is the nearest representable value, which could be
         positive or negative infinity.
@@ -502,7 +502,7 @@ julia> round(Int8,127.4)
 127
 
 julia> round(Int8,127.6)
-ERROR: InexactError: trunc(Int8, 128.0)
+ERROR: InexactError: Int8(128.0)
 Stacktrace:
 [...]
 ```
@@ -524,48 +524,49 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 
 ### Division functions
 
-| Function                  | Description                                                                                               |
-|:------------------------- |:--------------------------------------------------------------------------------------------------------- |
-| [`div(x,y)`](@ref), `x÷y` | truncated division; quotient rounded towards zero                                                         |
-| [`fld(x,y)`](@ref)        | floored division; quotient rounded towards `-Inf`                                                         |
-| [`cld(x,y)`](@ref)        | ceiling division; quotient rounded towards `+Inf`                                                         |
-| [`rem(x,y)`](@ref), `x%y` | remainder; satisfies `x == div(x,y)*y + rem(x,y)`; sign matches `x`                                       |
-| [`mod(x,y)`](@ref)        | modulus; satisfies `x == fld(x,y)*y + mod(x,y)`; sign matches `y`                                         |
-| [`mod1(x,y)`](@ref)       | `mod` with offset 1; returns `r∈(0,y]` for `y>0` or `r∈[y,0)` for `y<0`, where `mod(r, y) == mod(x, y)`   |
-| [`mod2pi(x)`](@ref)       | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
-| [`divrem(x,y)`](@ref)     | returns `(div(x,y),rem(x,y))`                                                                             |
-| [`fldmod(x,y)`](@ref)     | returns `(fld(x,y),mod(x,y))`                                                                             |
-| [`gcd(x,y...)`](@ref)     | greatest positive common divisor of `x`, `y`,...                                                          |
-| [`lcm(x,y...)`](@ref)     | least positive common multiple of `x`, `y`,...                                                            |
+| Function                   | Description                                                                                               |
+|:-------------------------- |:--------------------------------------------------------------------------------------------------------- |
+| [`div(x, y)`](@ref), `x÷y` | truncated division; quotient rounded towards zero                                                         |
+| [`fld(x, y)`](@ref)        | floored division; quotient rounded towards `-Inf`                                                         |
+| [`cld(x, y)`](@ref)        | ceiling division; quotient rounded towards `+Inf`                                                         |
+| [`rem(x, y)`](@ref), `x%y` | remainder; satisfies `x == div(x, y)*y + rem(x, y)`; sign matches `x`                                     |
+| [`mod(x, y)`](@ref)        | modulus; satisfies `x == fld(x, y)*y + mod(x, y)`; sign matches `y`                                       |
+| [`mod1(x, y)`](@ref)       | `mod` with offset 1; returns `r∈(0, y]` for `y>0` or `r∈[y, 0)` for `y<0`, where `mod(r, y) == mod(x, y)` |
+| [`mod2pi(x)`](@ref)        | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
+| [`divrem(x, y)`](@ref)     | returns `(div(x, y),rem(x, y))`                                                                           |
+| [`fldmod(x, y)`](@ref)     | returns `(fld(x, y), mod(x, y))`                                                                          |
+| [`gcd(x, y...)`](@ref)     | greatest positive common divisor of `x`, `y`,...                                                          |
+| [`lcm(x, y...)`](@ref)     | least positive common multiple of `x`, `y`,...                                                            |
 
 ### Sign and absolute value functions
 
-| Function                | Description                                                |
-|:----------------------- |:---------------------------------------------------------- |
-| [`abs(x)`](@ref)        | a positive value with the magnitude of `x`                 |
-| [`abs2(x)`](@ref)       | the squared magnitude of `x`                               |
-| [`sign(x)`](@ref)       | indicates the sign of `x`, returning -1, 0, or +1          |
-| [`signbit(x)`](@ref)    | indicates whether the sign bit is on (true) or off (false) |
-| [`copysign(x,y)`](@ref) | a value with the magnitude of `x` and the sign of `y`      |
-| [`flipsign(x,y)`](@ref) | a value with the magnitude of `x` and the sign of `x*y`    |
+| Function                 | Description                                                |
+|:------------------------ |:---------------------------------------------------------- |
+| [`abs(x)`](@ref)         | a positive value with the magnitude of `x`                 |
+| [`abs2(x)`](@ref)        | the squared magnitude of `x`                               |
+| [`sign(x)`](@ref)        | indicates the sign of `x`, returning -1, 0, or +1          |
+| [`signbit(x)`](@ref)     | indicates whether the sign bit is on (true) or off (false) |
+| [`copysign(x, y)`](@ref) | a value with the magnitude of `x` and the sign of `y`      |
+| [`flipsign(x, y)`](@ref) | a value with the magnitude of `x` and the sign of `x*y`    |
 
 ### Powers, logs and roots
 
-| Function                 | Description                                                                |
-|:------------------------ |:-------------------------------------------------------------------------- |
-| [`sqrt(x)`](@ref), `√x`  | square root of `x`                                                         |
-| [`cbrt(x)`](@ref), `∛x`  | cube root of `x`                                                           |
-| [`hypot(x,y)`](@ref)     | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
-| [`exp(x)`](@ref)         | natural exponential function at `x`                                        |
-| [`expm1(x)`](@ref)       | accurate `exp(x)-1` for `x` near zero                                      |
-| [`ldexp(x,n)`](@ref)     | `x*2^n` computed efficiently for integer values of `n`                     |
-| [`log(x)`](@ref)         | natural logarithm of `x`                                                   |
-| [`log(b,x)`](@ref)       | base `b` logarithm of `x`                                                  |
-| [`log2(x)`](@ref)        | base 2 logarithm of `x`                                                    |
-| [`log10(x)`](@ref)       | base 10 logarithm of `x`                                                   |
-| [`log1p(x)`](@ref)       | accurate `log(1+x)` for `x` near zero                                      |
-| [`exponent(x)`](@ref)    | binary exponent of `x`                                                     |
-| [`significand(x)`](@ref) | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
+| Function                      | Description                                                                |
+|:----------------------------- |:-------------------------------------------------------------------------- |
+| [`sqrt(x)`](@ref), `√x`       | square root of `x`                                                         |
+| [`cbrt(x)`](@ref), `∛x`       | cube root of `x`                                                           |
+| [`fourthroot(x)`](@ref), `∜x` | fourth root of `x`                                                         |
+| [`hypot(x, y)`](@ref)         | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
+| [`exp(x)`](@ref)              | natural exponential function at `x`                                        |
+| [`expm1(x)`](@ref)            | accurate `exp(x) - 1` for `x` near zero                                    |
+| [`ldexp(x, n)`](@ref)         | `x * 2^n` computed efficiently for integer values of `n`                   |
+| [`log(x)`](@ref)              | natural logarithm of `x`                                                   |
+| [`log(b, x)`](@ref)           | base `b` logarithm of `x`                                                  |
+| [`log2(x)`](@ref)             | base 2 logarithm of `x`                                                    |
+| [`log10(x)`](@ref)            | base 10 logarithm of `x`                                                   |
+| [`log1p(x)`](@ref)            | accurate `log(1 + x)` for `x` near zero                                    |
+| [`exponent(x)`](@ref)         | binary exponent of `x`                                                     |
+| [`significand(x)`](@ref)      | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
 
 For an overview of why functions like [`hypot`](@ref), [`expm1`](@ref), and [`log1p`](@ref)
 are necessary and useful, see John D. Cook's excellent pair of blog posts on the subject: [expm1, log1p, erfc](https://www.johndcook.com/blog/2010/06/07/math-library-functions-that-seem-unnecessary/),
@@ -587,7 +588,7 @@ These are all single-argument functions, with [`atan`](@ref) also accepting two
 corresponding to a traditional [`atan2`](https://en.wikipedia.org/wiki/Atan2) function.
 
 Additionally, [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) are provided for more accurate computations
-of [`sin(pi*x)`](@ref) and [`cos(pi*x)`](@ref) respectively.
+of [`sin(pi * x)`](@ref) and [`cos(pi * x)`](@ref) respectively.
 
 In order to compute trigonometric functions with degrees instead of radians, suffix the function
 with `d`. For example, [`sind(x)`](@ref) computes the sine of `x` where `x` is specified in degrees.
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index 2d7deae0f1c54..b619021fcef92 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -379,7 +379,7 @@ julia> ex = :(a + b)
 :(a + b)
 
 julia> eval(ex)
-ERROR: UndefVarError: `b` not defined
+ERROR: UndefVarError: `b` not defined in `Main`
 [...]
 
 julia> a = 1; b = 2;
@@ -397,7 +397,7 @@ julia> ex = :(x = 1)
 :(x = 1)
 
 julia> x
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 
 julia> eval(ex)
 1
@@ -629,6 +629,15 @@ julia> @showarg(1+1)
 
 julia> @showarg(println("Yo!"))
 :(println("Yo!"))
+
+julia> @showarg(1)        # Numeric literal
+1
+
+julia> @showarg("Yo!")    # String literal
+"Yo!"
+
+julia> @showarg("Yo! $("hello")")    # String with interpolation is an Expr rather than a String
+:("Yo! $("hello")")
 ```
 
 In addition to the given argument list, every macro is passed extra arguments named `__source__` and `__module__`.
@@ -1340,8 +1349,7 @@ julia> function sub2ind_loop(dims::NTuple{N}, I::Integer...) where N
                ind = I[i]-1 + dims[i]*ind
            end
            return ind + 1
-       end
-sub2ind_loop (generic function with 1 method)
+       end;
 
 julia> sub2ind_loop((3, 5), 1, 2)
 4
@@ -1380,8 +1388,7 @@ julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen (generic function with 1 method)
+       end;
 
 julia> sub2ind_gen((3, 5), 1, 2)
 4
@@ -1392,11 +1399,6 @@ julia> sub2ind_gen((3, 5), 1, 2)
 An easy way to find out is to extract the body into another (regular) function:
 
 ```jldoctest sub2ind_gen2
-julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-           return sub2ind_gen_impl(dims, I...)
-       end
-sub2ind_gen (generic function with 1 method)
-
 julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
            length(I) == N || return :(error("partial indexing is unsupported"))
            ex = :(I[$N] - 1)
@@ -1404,8 +1406,14 @@ julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} w
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen_impl (generic function with 1 method)
+       end;
+
+julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           return sub2ind_gen_impl(dims, I...)
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 We can now execute `sub2ind_gen_impl` and examine the expression it returns:
@@ -1434,25 +1442,34 @@ To solve this problem, the language provides syntax for writing normal, non-gene
 alternative implementations of generated functions.
 Applied to the `sub2ind` example above, it would look like this:
 
-```julia
-function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-    if N != length(I)
-        throw(ArgumentError("Number of dimensions must match number of indices."))
-    end
-    if @generated
-        ex = :(I[$N] - 1)
-        for i = (N - 1):-1:1
-            ex = :(I[$i] - 1 + dims[$i] * $ex)
-        end
-        return :($ex + 1)
-    else
-        ind = I[N] - 1
-        for i = (N - 1):-1:1
-            ind = I[i] - 1 + dims[i]*ind
-        end
-        return ind + 1
-    end
-end
+```jldoctest sub2ind_gen_opt
+julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
+           ex = :(I[$N] - 1)
+           for i = (N - 1):-1:1
+               ex = :(I[$i] - 1 + dims[$i] * $ex)
+           end
+           return :($ex + 1)
+       end;
+
+julia> function sub2ind_gen_fallback(dims::NTuple{N}, I) where N
+           ind = I[N] - 1
+           for i = (N - 1):-1:1
+               ind = I[i] - 1 + dims[i]*ind
+           end
+           return ind + 1
+       end;
+
+julia> function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           length(I) == N || error("partial indexing is unsupported")
+           if @generated
+               return sub2ind_gen_impl(dims, I...)
+           else
+               return sub2ind_gen_fallback(dims, I)
+           end
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 Internally, this code creates two implementations of the function: a generated one where
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index 8ca00aa1cfe76..3c234b17f10d8 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -76,6 +76,7 @@ Applying it to any other types of arguments will result in a [`MethodError`](@re
 ```jldoctest fofxy
 julia> f(2.0, 3)
 ERROR: MethodError: no method matching f(::Float64, ::Int64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(::Float64, !Matched::Float64)
@@ -86,6 +87,7 @@ Stacktrace:
 
 julia> f(Float32(2.0), 3.0)
 ERROR: MethodError: no method matching f(::Float32, ::Float64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Float64, ::Float64)
@@ -96,6 +98,7 @@ Stacktrace:
 
 julia> f(2.0, "3.0")
 ERROR: MethodError: no method matching f(::Float64, ::String)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(::Float64, !Matched::Float64)
@@ -106,6 +109,7 @@ Stacktrace:
 
 julia> f("2.0", "3.0")
 ERROR: MethodError: no method matching f(::String, ::String)
+The function `f` exists, but no method is defined for this combination of argument types.
 ```
 
 As you can see, the arguments must be precisely of type [`Float64`](@ref). Other numeric
@@ -164,16 +168,20 @@ and applying it will still result in a [`MethodError`](@ref):
 ```jldoctest fofxy
 julia> f("foo", 3)
 ERROR: MethodError: no method matching f(::String, ::Int64)
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Number, ::Number)
    @ Main none:1
+  f(!Matched::Float64, !Matched::Float64)
+   @ Main none:1
 
 Stacktrace:
 [...]
 
 julia> f()
 ERROR: MethodError: no method matching f()
+The function `f` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   f(!Matched::Float64, !Matched::Float64)
@@ -334,10 +342,11 @@ Stacktrace:
 [...]
 ```
 
-Here the call `g(2.0, 3.0)` could be handled by either the `g(Float64, Any)` or the `g(Any, Float64)`
-method, and neither is more specific than the other. In such cases, Julia raises a [`MethodError`](@ref)
-rather than arbitrarily picking a method. You can avoid method ambiguities by specifying an appropriate
-method for the intersection case:
+Here the call `g(2.0, 3.0)` could be handled by either the `g(::Float64, ::Any)` or the
+`g(::Any, ::Float64)` method. The order in which the methods are defined does not matter and
+neither is more specific than the other. In such cases, Julia raises a
+[`MethodError`](@ref) rather than arbitrarily picking a method. You can avoid method
+ambiguities by specifying an appropriate method for the intersection case:
 
 ```jldoctest gofxy
 julia> g(x::Float64, y::Float64) = 2x + 2y
@@ -406,7 +415,20 @@ Here's an example where the method type parameter `T` is used as the type parame
 type `Vector{T}` in the method signature:
 
 ```jldoctest
-julia> myappend(v::Vector{T}, x::T) where {T} = [v..., x]
+julia> function myappend(v::Vector{T}, x::T) where {T}
+           return [v..., x]
+       end
+myappend (generic function with 1 method)
+```
+
+The type parameter `T` in this example ensures that the added element `x` is a subtype of the
+existing eltype of the vector `v`.
+The `where` keyword introduces a list of those constraints after the method signature definition.
+This works the same for one-line definitions, as seen above, and must appear _before_ the [return
+type declaration](@ref man-functions-return-type), if present, as illustrated below:
+
+```jldoctest
+julia> (myappend(v::Vector{T}, x::T)::Vector) where {T} = [v..., x]
 myappend (generic function with 1 method)
 
 julia> myappend([1,2,3],4)
@@ -418,6 +440,7 @@ julia> myappend([1,2,3],4)
 
 julia> myappend([1,2,3],2.5)
 ERROR: MethodError: no method matching myappend(::Vector{Int64}, ::Float64)
+The function `myappend` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   myappend(::Vector{T}, !Matched::T) where T
@@ -435,6 +458,7 @@ julia> myappend([1.0,2.0,3.0],4.0)
 
 julia> myappend([1.0,2.0,3.0],4)
 ERROR: MethodError: no method matching myappend(::Vector{Float64}, ::Int64)
+The function `myappend` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   myappend(::Vector{T}, !Matched::T) where T
@@ -444,9 +468,9 @@ Stacktrace:
 [...]
 ```
 
-As you can see, the type of the appended element must match the element type of the vector it
-is appended to, or else a [`MethodError`](@ref) is raised. In the following example, the method type parameter
-`T` is used as the return value:
+If the type of the appended element does not match the element type of the vector it is appended to,
+a [`MethodError`](@ref) is raised.
+In the following example, the method's type parameter `T` is used as the return value:
 
 ```jldoctest
 julia> mytypeof(x::T) where {T} = T
@@ -480,6 +504,7 @@ true
 
 julia> same_type_numeric("foo", 2.0)
 ERROR: MethodError: no method matching same_type_numeric(::String, ::Float64)
+The function `same_type_numeric` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   same_type_numeric(!Matched::T, ::T) where T<:Number
@@ -492,6 +517,7 @@ Stacktrace:
 
 julia> same_type_numeric("foo", "bar")
 ERROR: MethodError: no method matching same_type_numeric(::String, ::String)
+The function `same_type_numeric` exists, but no method is defined for this combination of argument types.
 
 julia> same_type_numeric(Int32(1), Int64(2))
 false
@@ -588,7 +614,7 @@ Start some other operations that use `f(x)`:
 julia> g(x) = f(x)
 g (generic function with 1 method)
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 ```
 
 Now we add some new methods to `f(x)`:
@@ -613,7 +639,7 @@ julia> g(1)
 julia> fetch(schedule(t, 1))
 "original definition"
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 
 julia> fetch(schedule(t, 1))
 "definition for Int"
@@ -638,7 +664,7 @@ abstract type AbstractArray{T, N} end
 eltype(::Type{<:AbstractArray{T}}) where {T} = T
 ```
 
-using so-called triangular dispatch.  Note that `UnionAll` types, for
+using so-called triangular dispatch. Note that `UnionAll` types, for
 example `eltype(AbstractArray{T} where T <: Integer)`, do not match the
 above method. The implementation of `eltype` in `Base` adds a fallback
 method to `Any` for such cases.
@@ -672,11 +698,14 @@ While this works for declared types, it fails for types without
 supertypes:
 
 ```julia-repl
-julia> eltype_wrong(Union{AbstractArray{Int}, AbstractArray{Float64}})
-ERROR: MethodError: no method matching supertype(::Type{Union{AbstractArray{Float64,N} where N, AbstractArray{Int64,N} where N}})
+julia> eltype_wrong(Union{Vector{Int}, Matrix{Int}})
+ERROR: MethodError: no method matching supertype(::Type{VecOrMat{Int64}})
+
 Closest candidates are:
-  supertype(::DataType) at operators.jl:43
-  supertype(::UnionAll) at operators.jl:48
+  supertype(::UnionAll)
+   @ Base operators.jl:44
+  supertype(::DataType)
+   @ Base operators.jl:43
 ```
 
 ### Building a similar type with a different type parameter
@@ -719,8 +748,8 @@ often it is best to separate each level of dispatch into distinct functions.
 This may sound similar in approach to single-dispatch, but as we shall see below, it is still more flexible.
 
 For example, trying to dispatch on the element-type of an array will often run into ambiguous situations.
-Instead, commonly code will dispatch first on the container type,
-then recurse down to a more specific method based on eltype.
+Instead, common code will dispatch first on the container type,
+then recurse down to a more specific method based on `eltype`.
 In most cases, the algorithms lend themselves conveniently to this hierarchical approach,
 while in other cases, this rigor must be resolved manually.
 This dispatching branching can be observed, for example, in the logic to sum two matrices:
@@ -750,7 +779,7 @@ often referred to as a
 
 This pattern is implemented by defining a generic function which
 computes a different singleton value (or type) for each trait-set to which the
-function arguments may belong to.  If this function is pure there is
+function arguments may belong to. If this function is pure there is
 no impact on performance compared to normal dispatch.
 
 The example in the previous section glossed over the implementation details of
@@ -865,8 +894,8 @@ matmul(a, b) = matmul(promote(a, b)...)
 ## Parametrically-constrained Varargs methods
 
 Function parameters can also be used to constrain the number of arguments that may be supplied
-to a "varargs" function ([Varargs Functions](@ref)).  The notation `Vararg{T,N}` is used to indicate
-such a constraint.  For example:
+to a "varargs" function ([Varargs Functions](@ref)). The notation `Vararg{T,N}` is used to indicate
+such a constraint. For example:
 
 ```jldoctest
 julia> bar(a,b,x::Vararg{Any,2}) = (a,b,x)
@@ -874,6 +903,7 @@ bar (generic function with 1 method)
 
 julia> bar(1,2,3)
 ERROR: MethodError: no method matching bar(::Int64, ::Int64, ::Int64)
+The function `bar` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   bar(::Any, ::Any, ::Any, !Matched::Any)
@@ -887,6 +917,7 @@ julia> bar(1,2,3,4)
 
 julia> bar(1,2,3,4,5)
 ERROR: MethodError: no method matching bar(::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
+The function `bar` exists, but no method is defined for this combination of argument types.
 
 Closest candidates are:
   bar(::Any, ::Any, ::Any, ::Any)
@@ -997,7 +1028,7 @@ function emptyfunc end
 ## [Method design and the avoidance of ambiguities](@id man-method-design-ambiguities)
 
 Julia's method polymorphism is one of its most powerful features, yet
-exploiting this power can pose design challenges.  In particular, in
+exploiting this power can pose design challenges. In particular, in
 more complex method hierarchies it is not uncommon for
 [ambiguities](@ref man-ambiguities) to arise.
 
@@ -1140,7 +1171,7 @@ sure this method is implemented with generic calls (like `similar` and
 When this approach is not possible, it may be worth starting a
 discussion with other developers about resolving the ambiguity; just
 because one method was defined first does not necessarily mean that it
-can't be modified or eliminated.  As a last resort, one developer can
+can't be modified or eliminated. As a last resort, one developer can
 define the "band-aid" method
 
 ```julia
diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md
index 9bddcdfbb2ac2..8c8e801ccac9a 100644
--- a/doc/src/manual/missing.md
+++ b/doc/src/manual/missing.md
@@ -88,7 +88,7 @@ true
 ```
 
 The [`isless`](@ref) operator is another exception: `missing` is considered
-as greater than any other value. This operator is used by [`sort`](@ref),
+as greater than any other value. This operator is used by [`sort!`](@ref),
 which therefore places `missing` values after all other values:
 
 ```jldoctest
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 4ffb1bca26e50..cf24474916bef 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -7,7 +7,8 @@ Modules in Julia help organize code into coherent units. They are delimited synt
    allows the same name to be used for different functions or global variables without conflict, as long as they are in separate modules.
 
 2. Modules have facilities for detailed namespace management: each defines a set of names it
-   `export`s, and can import names from other modules with `using` and `import` (we explain these below).
+   `export`s and marks as `public`, and can import names from other modules with `using` and
+   `import` (we explain these below).
 
 3. Modules can be precompiled for faster loading, and may contain code for runtime initialization.
 
@@ -16,7 +17,7 @@ Typically, in larger Julia packages you will see module code organized into file
 ```julia
 module SomeModule
 
-# export, using, import statements are usually here; we discuss these below
+# export, public, using, import statements are usually here; we discuss these below
 
 include("file1.jl")
 include("file2.jl")
@@ -103,9 +104,19 @@ Also, some modules don't export names at all. This is usually done if they use c
 words, such as `derivative`, in their API, which could easily clash with the export lists of other
 modules. We will see how to manage name clashes below.
 
+To mark a name as public without exporting it into the namespace of folks who call `using NiceStuff`,
+one can use `public` instead of `export`. This marks the public name(s) as part of the public API,
+but does not have any namespace implications. The `public` keyword is only available in Julia 1.11
+and above. To maintain compatibility with Julia 1.10 and below, use the `@compat` macro from the
+[Compat](https://github.com/JuliaLang/Compat.jl) package, or the version-aware construct
+
+```julia
+VERSION >= v"1.11.0-DEV.469" && eval(Meta.parse("public a, b, c"))
+```
+
 ### Standalone `using` and `import`
 
-Possibly the most common way of loading a module is `using ModuleName`. This [loads](@ref
+For interactive use, the most common way of loading a module is `using ModuleName`. This [loads](@ref
 code-loading) the code associated with `ModuleName`, and brings
 
 1. the module name
@@ -143,7 +154,7 @@ As we will see in the next section `import .NiceStuff` is equivalent to `using .
 You can combine multiple `using` and `import` statements of the same kind in a comma-separated expression, e.g.
 
 ```jldoctest module_manual
-julia> using LinearAlgebra, Statistics
+julia> using LinearAlgebra, Random
 ```
 
 ### `using` and `import` with specific identifiers, and adding methods
@@ -161,6 +172,13 @@ Importantly, the module name `NiceStuff` will *not* be in the namespace. If you
 julia> using .NiceStuff: nice, DOG, NiceStuff
 ```
 
+When two or more packages/modules export a name and that name does not refer to the
+same thing in each of the packages, and the packages are loaded via `using` without
+an explicit list of names, it is an error to reference that name without qualification.
+It is thus recommended that code intended to be forward-compatible with future versions
+of its dependencies and of Julia, e.g., code in released packages, list the names it
+uses from each loaded package, e.g., `using Foo: Foo, f` rather than `using Foo`.
+
 Julia has two forms for seemingly the same thing because only `import ModuleName: f` allows adding methods to `f`
 *without a module path*.
 That is to say, the following example will give an error:
@@ -177,7 +195,6 @@ Stacktrace:
    @ none:0
  [2] top-level scope
    @ none:1
-
 ```
 
 This error prevents accidentally adding methods to functions in other modules that you only intended to use.
@@ -189,17 +206,16 @@ julia> using .NiceStuff
 julia> struct Cat end
 
 julia> NiceStuff.nice(::Cat) = "nice 😸"
-
 ```
 
 Alternatively, you can `import` the specific function name:
 ```jldoctest module_manual
 julia> import .NiceStuff: nice
 
-julia> struct Cat end
+julia> struct Mouse end
 
-julia> nice(::Cat) = "nice 😸"
-nice (generic function with 2 methods)
+julia> nice(::Mouse) = "nice 🐭"
+nice (generic function with 3 methods)
 ```
 
 Which one you choose is a matter of style. The first form makes it clear that you are adding a
@@ -274,14 +290,14 @@ julia> module B
 B
 ```
 
-The statement `using .A, .B` works, but when you try to call `f`, you get a warning
+The statement `using .A, .B` works, but when you try to call `f`, you get an error with a hint
 
 ```jldoctest module_manual
 julia> using .A, .B
 
 julia> f
-WARNING: both B and A export "f"; uses of it in module Main must be qualified
-ERROR: UndefVarError: `f` not defined
+ERROR: UndefVarError: `f` not defined in `Main`
+Hint: It looks like two or more modules export different bindings with this name, resulting in ambiguity. Try explicitly importing it from a particular module, or qualifying the name with the module it should come from.
 ```
 
 Here, Julia cannot decide which `f` you are referring to, so you have to make a choice. The following solutions are commonly used:
@@ -397,7 +413,7 @@ x = 0
 
 module Sub
 using ..TestPackage
-z = y # ERROR: UndefVarError: `y` not defined
+z = y # ERROR: UndefVarError: `y` not defined in `Main`
 end
 
 y = 1
@@ -413,7 +429,7 @@ For similar reasons, you cannot use a cyclic ordering:
 module A
 
 module B
-using ..C # ERROR: UndefVarError: `C` not defined
+using ..C # ERROR: UndefVarError: `C` not defined in `Main.A`
 end
 
 module C
@@ -429,7 +445,7 @@ Large modules can take several seconds to load because executing all of the stat
 often involves compiling a large amount of code.
 Julia creates precompiled caches of the module to reduce this time.
 
-Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module.  If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
+Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module. If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
 cache files will be stored in the `compiled` subfolder of `DEPOT_PATH[1]`. If nothing about your system changes,
 such cache files will be used when you load the module with `import` or `using`.
 
@@ -440,10 +456,12 @@ recompiled upon `using` or `import`. Dependencies are modules it
 imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref)
 in the module file(s).
 
-For file dependencies, a change is determined by examining whether the modification time (`mtime`)
-of each file loaded by `include` or added explicitly by `include_dependency` is unchanged, or equal
-to the modification time truncated to the nearest second (to accommodate systems that can't copy
-mtime with sub-second accuracy). It also takes into account whether the path to the file chosen
+For file dependencies loaded by `include`, a change is determined by examining whether the
+file size (`fsize`) or content (condensed into a hash) is unchanged.
+For file dependencies loaded by `include_dependency` a change is determined by examining whether the modification time (`mtime`)
+is unchanged, or equal to the modification time truncated to the nearest second
+(to accommodate systems that can't copy mtime with sub-second accuracy).
+It also takes into account whether the path to the file chosen
 by the search logic in `require` matches the path that had created the precompile file. It also takes
 into account the set of dependencies already loaded into the current process and won't recompile those
 modules, even if their files change or disappear, in order to avoid creating incompatibilities between
@@ -479,12 +497,12 @@ or other imported modules have their `__init__` functions called *before* the `_
 enclosing module.
 
 Two typical uses of `__init__` are calling runtime initialization functions of external C libraries
-and initializing global constants that involve pointers returned by external libraries.  For example,
+and initializing global constants that involve pointers returned by external libraries. For example,
 suppose that we are calling a C library `libfoo` that requires us to call a `foo_init()` initialization
 function at runtime. Suppose that we also want to define a global constant `foo_data_ptr` that
 holds the return value of a `void *foo_data()` function defined by `libfoo` -- this constant must
 be initialized at runtime (not at compile time) because the pointer address will change from run
-to run.  You could accomplish this by defining the following `__init__` function in your module:
+to run. You could accomplish this by defining the following `__init__` function in your module:
 
 ```julia
 const foo_data_ptr = Ref{Ptr{Cvoid}}(0)
@@ -509,9 +527,9 @@ null pointers unless they are hidden inside an [`isbits`](@ref) object). This in
 of the Julia functions [`@cfunction`](@ref) and [`pointer`](@ref).
 
 Dictionary and set types, or in general anything that depends on the output of a `hash(key)` method,
-are a trickier case.  In the common case where the keys are numbers, strings, symbols, ranges,
+are a trickier case. In the common case where the keys are numbers, strings, symbols, ranges,
 `Expr`, or compositions of these types (via arrays, tuples, sets, pairs, etc.) they are safe to
-precompile.  However, for a few other key types, such as `Function` or `DataType` and generic
+precompile. However, for a few other key types, such as `Function` or `DataType` and generic
 user-defined types where you haven't defined a `hash` method, the fallback `hash` method depends
 on the memory address of the object (via its `objectid`) and hence may change from run to run.
 If you have one of these key types, or if you aren't sure, to be safe you can initialize this
@@ -590,15 +608,19 @@ A few other points to be aware of:
    an error to do this, but you simply need to be prepared that the system will try to copy some
    of these and to create a single unique instance of others.
 
-It is sometimes helpful during module development to turn off incremental precompilation. The
-command line flag `--compiled-modules={yes|no}` enables you to toggle module precompilation on and
-off. When Julia is started with `--compiled-modules=no` the serialized modules in the compile cache
-are ignored when loading modules and module dependencies.
-More fine-grained control is available with `--pkgimages=no`, which suppresses only
-native-code storage during precompilation. `Base.compilecache` can still be called
-manually. The state of this command line flag is passed to `Pkg.build` to disable automatic
-precompilation triggering when installing, updating, and explicitly building packages.
+It is sometimes helpful during module development to turn off incremental precompilation.
+The command line flag `--compiled-modules={yes|no|existing}` enables you to toggle module
+precompilation on and off. When Julia is started with `--compiled-modules=no` the serialized
+modules in the compile cache are ignored when loading modules and module dependencies. In
+some cases, you may want to load existing precompiled modules, but not create new ones. This
+can be done by starting Julia with `--compiled-modules=existing`. More fine-grained control
+is available with `--pkgimages={yes|no|existing}`, which only affects native-code storage
+during precompilation. `Base.compilecache` can still be called manually. The state of this
+command line flag is passed to `Pkg.build` to disable automatic precompilation triggering
+when installing, updating, and explicitly building packages.
 
 You can also debug some precompilation failures with environment variables. Setting
-`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of compiled
-native code. See the **Developer Documentation** part of the Julia manual, where you will find further details in the section documenting Julia's internals under "Package Images".
+`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of
+compiled native code. See the **Developer Documentation** part of the Julia manual, where
+you will find further details in the section documenting Julia's internals under "Package
+Images".
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 056ceb1363fd7..209e2ffe1da56 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -27,7 +27,7 @@ The number of threads can either be specified as an integer (`--threads=4`) or a
     In older versions you must use the environment variable instead.
 
 !!! compat "Julia 1.7"
-    Using `auto` as value of the environment variable `JULIA_NUM_THREADS` requires at least Julia 1.7.
+    Using `auto` as value of the environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) requires at least Julia 1.7.
     In older versions, this value is ignored.
 Lets start Julia with 4 threads:
 
@@ -76,7 +76,7 @@ julia> Threads.threadid()
 
 The Garbage Collector (GC) can use multiple threads. The amount used is either half the number
 of compute worker threads or configured by either the `--gcthreads` command line argument or by using the
-[`JULIA_NUM_GC_THREADS`](@ref env-gc-threads) environment variable.
+[`JULIA_NUM_GC_THREADS`](@ref JULIA_NUM_GC_THREADS) environment variable.
 
 !!! compat "Julia 1.10"
     The `--gcthreads` command line argument requires at least Julia 1.10.
@@ -102,7 +102,7 @@ Julia may be started with one or more threads reserved to run interactive tasks:
 $ julia --threads 3,1
 ```
 
-The environment variable `JULIA_NUM_THREADS` can also be used similarly:
+The environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) can also be used similarly:
 ```bash
 export JULIA_NUM_THREADS=3,1
 ```
@@ -116,8 +116,8 @@ julia> using Base.Threads
 julia> nthreadpools()
 2
 
-julia> threadpool()
-:default
+julia> threadpool() # the main thread is in the interactive thread pool
+:interactive
 
 julia> nthreads(:default)
 3
@@ -133,61 +133,13 @@ julia> nthreads()
     The zero-argument version of `nthreads` returns the number of threads
     in the default pool.
 
+!!! note
+    Depending on whether Julia has been started with interactive threads,
+    the main thread is either in the default or interactive thread pool.
+
 Either or both numbers can be replaced with the word `auto`, which causes
 Julia to choose a reasonable default.
 
-## Communication and synchronization
-
-Although Julia's threads can communicate through shared memory, it is notoriously
-difficult to write correct and data-race free multi-threaded code. Julia's
-[`Channel`](@ref)s are thread-safe and may be used to communicate safely.
-
-### Data-race freedom
-
-You are entirely responsible for ensuring that your program is data-race free,
-and nothing promised here can be assumed if you do not observe that
-requirement. The observed results may be highly unintuitive.
-
-The best way to ensure this is to acquire a lock around any access to data that
-can be observed from multiple threads. For example, in most cases you should
-use the following code pattern:
-
-```julia-repl
-julia> lock(lk) do
-           use(a)
-       end
-
-julia> begin
-           lock(lk)
-           try
-               use(a)
-           finally
-               unlock(lk)
-           end
-       end
-```
-where `lk` is a lock (e.g. `ReentrantLock()`) and `a` data.
-
-Additionally, Julia is not memory safe in the presence of a data race. Be very
-careful about reading _any_ data if another thread might write to it!
-Instead, always use the lock pattern above when changing data (such as assigning
-to a global or closure variable) accessed by other threads.
-
-```julia
-Thread 1:
-global b = false
-global a = rand()
-global b = true
-
-Thread 2:
-while !b; end
-bad_read1(a) # it is NOT safe to access `a` here!
-
-Thread 3:
-while !@isdefined(a); end
-bad_read2(a) # it is NOT safe to access `a` here
-```
-
 ## The `@threads` Macro
 
 Let's work a simple example using our native threads. Let us create an array of zeros:
@@ -239,10 +191,11 @@ julia> a
 
 Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref).
 
-### Using `@threads` without data races
+### Using `@threads` without data-races
 
-Taking the example of a naive sum
+The concept of a data-race is elaborated on in ["Communication and data races between threads"](@ref man-communication-and-data-races). For now, just known that a data race can result in incorrect results and dangerous errors.
 
+Lets say we want to make the function `sum_single` below multithreaded.
 ```julia-repl
 julia> function sum_single(a)
            s = 0
@@ -275,12 +228,11 @@ julia> sum_multi_bad(1:1_000_000)
 Note that the result is not `500000500000` as it should be, and will most likely change each evaluation.
 
 To fix this, buffers that are specific to the task may be used to segment the sum into chunks that are race-free.
-Here `sum_single` is reused, with its own internal buffer `s`, and vector `a` is split into `nthreads()`
-chunks for parallel work via `nthreads()` `@spawn`-ed tasks.
-
+Here `sum_single` is reused, with its own internal buffer `s`. The input vector `a` is split into at most `nthreads()`
+chunks for parallel work. We then use `Threads.@spawn` to create tasks that individually sum each chunk. Finally, we sum the results from each task using `sum_single` again:
 ```julia-repl
 julia> function sum_multi_good(a)
-           chunks = Iterators.partition(a, length(a) ÷ Threads.nthreads())
+           chunks = Iterators.partition(a, cld(length(a), Threads.nthreads()))
            tasks = map(chunks) do chunk
                Threads.@spawn sum_single(chunk)
            end
@@ -292,7 +244,7 @@ sum_multi_good (generic function with 1 method)
 julia> sum_multi_good(1:1_000_000)
 500000500000
 ```
-!!! Note
+!!! note
     Buffers should not be managed based on `threadid()` i.e. `buffers = zeros(Threads.nthreads())` because concurrent tasks
     can yield, meaning multiple concurrent tasks may use the same buffer on a given thread, introducing risk of data races.
     Further, when more than one thread is available tasks may change thread at yield points, which is known as
@@ -301,7 +253,75 @@ julia> sum_multi_good(1:1_000_000)
 Another option is the use of atomic operations on variables shared across tasks/threads, which may be more performant
 depending on the characteristics of the operations.
 
-## Atomic Operations
+## [Communication and data-races between threads](@id man-communication-and-data-races)
+
+Although Julia's threads can communicate through shared memory, it is notoriously difficult to write correct and data-race free multi-threaded code. Julia's
+[`Channel`](@ref)s are thread-safe and may be used to communicate safely. There are also sections below that explain how to use [locks](@ref man-using-locks) and [atomics](@ref man-atomic-operations) to avoid data-races.
+
+In certain cases, Julia is able to detect a detect safety violations, in particular in regards to deadlocks or other known-unsafe operations such as yielding
+to the currently running task. In these cases, a [`ConcurrencyViolationError`](@ref) is thrown.
+
+### Data-race freedom
+
+You are entirely responsible for ensuring that your program is data-race free,
+and nothing promised here can be assumed if you do not observe that
+requirement. The observed results may be highly unintuitive.
+
+If data-races are introduced, Julia is not memory safe. **Be very
+careful about reading _any_ data if another thread might write to it, as it could result in segmentation faults or worse**. Below are a couple of unsafe ways to access global variables from different threads:
+```julia
+Thread 1:
+global b = false
+global a = rand()
+global b = true
+
+Thread 2:
+while !b; end
+bad_read1(a) # it is NOT safe to access `a` here!
+
+Thread 3:
+while !@isdefined(a); end
+bad_read2(a) # it is NOT safe to access `a` here
+```
+
+### [Using locks to avoid data-races](@id man-using-locks)
+An important tool to avoid data-races, and thereby write thread-safe code, is the concept of a "lock". A lock can be locked and unlocked. If a thread has locked a lock, and not unlocked it, it is said to "hold" the lock. If there is only one lock, and we write code the requires holding the lock to access some data, we can ensure that multiple threads will never access the same data simultaneously. Note that the link between a lock and a variable is made by the programmer, and not the program.
+
+For example, we can create a lock `my_lock`, and lock it while we mutate a variable `my_variable`. This is done most simply with the `@lock` macro:
+
+```julia-repl
+julia> my_lock = ReentrantLock();
+
+julia> my_variable = [1, 2, 3];
+
+julia> @lock my_lock my_variable[1] = 100
+100
+```
+
+By using a similar pattern with the same lock and variable, but on another thread, the operations are free from data-races.
+
+We could have performed the operation above with the functional version of `lock`, in the following two ways:
+```julia-repl
+julia> lock(my_lock) do
+           my_variable[1] = 100
+       end
+100
+
+julia> begin
+           lock(my_lock)
+           try
+               my_variable[1] = 100
+           finally
+               unlock(my_lock)
+           end
+       end
+100
+```
+
+All three options are equivalent. Note how the final version requires an explicit `try`-block to ensure that the lock is always unlocked, whereas the first two version do this internally. One should always use the lock pattern above when changing data (such as assigning
+to a global or closure variable) accessed by other threads. Failing to do this could have unforeseen and serious consequences.
+
+### [Atomic Operations](@id man-atomic-operations)
 
 Julia supports accessing and modifying values *atomically*, that is, in a thread-safe way to avoid
 [race conditions](https://en.wikipedia.org/wiki/Race_condition). A value (which must be of a primitive
@@ -370,11 +390,12 @@ julia> acc[]
 ```
 
 
-## [Per-field atomics](@id man-atomics)
+#### [Per-field atomics](@id man-atomics)
 
 We can also use atomics on a more granular level using the [`@atomic`](@ref
-Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap), and
-[`@atomicreplace`](@ref Base.@atomicreplace) macros.
+Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap),
+[`@atomicreplace`](@ref Base.@atomicreplace) macros, and
+[`@atomiconce`](@ref Base.@atomiconce) macros.
 
 Specific details of the memory model and other details of the design are written
 in the [Julia Atomics
@@ -439,7 +460,7 @@ threads in Julia:
     multiple threads where at least one thread modifies the collection
     (common examples include `push!` on arrays, or inserting
     items into a `Dict`).
-  * The schedule used by `@spawn` is nondeterministic and should not be relied on.
+  * The schedule used by [`@spawn`](@ref Threads.@spawn) is nondeterministic and should not be relied on.
   * Compute-bound, non-memory-allocating tasks can prevent garbage collection from
     running in other threads that are allocating memory. In these cases it may
     be necessary to insert a manual call to `GC.safepoint()` to allow GC to run.
@@ -448,7 +469,8 @@ threads in Julia:
     method, and module definitions in parallel.
   * Be aware that finalizers registered by a library may break if threads are enabled.
     This may require some transitional work across the ecosystem before threading
-    can be widely adopted with confidence. See the next section for further details.
+    can be widely adopted with confidence. See the section on
+    [the safe use of finalizers](@ref man-finalizers) for further details.
 
 ## [Task Migration](@id man-task-migration)
 
@@ -464,7 +486,7 @@ and therefore should not be used to index into a vector of buffers or stateful o
     Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were
     started on.
 
-## Safe use of Finalizers
+## [Safe use of Finalizers](@id man-finalizers)
 
 Because finalizers can interrupt any code, they must be very careful in how
 they interact with any global state. Unfortunately, the main reason that
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 00a10177b2155..3ef41754c1e07 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -1,9 +1,10 @@
 # Networking and Streams
 
 Julia provides a rich interface to deal with streaming I/O objects such as terminals, pipes and
-TCP sockets. This interface, though asynchronous at the system level, is presented in a synchronous
-manner to the programmer and it is usually unnecessary to think about the underlying asynchronous
-operation. This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
+TCP sockets.
+These objects allow data to be sent and received in a stream-like fashion, which means that data is processed sequentially as it becomes available.
+This interface, though asynchronous at the system level, is presented in a synchronous manner to the programmer.
+This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
 functionality.
 
 ## Basic Stream I/O
@@ -30,7 +31,7 @@ For example, to read a simple byte array, we could do:
 
 ```julia-repl
 julia> x = zeros(UInt8, 4)
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x00
  0x00
  0x00
@@ -38,7 +39,7 @@ julia> x = zeros(UInt8, 4)
 
 julia> read!(stdin, x)
 abcd
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x61
  0x62
  0x63
@@ -51,7 +52,7 @@ example, we could have written the above as:
 ```julia-repl
 julia> read(stdin, 4)
 abcd
-4-element Array{UInt8,1}:
+4-element Vector{UInt8}:
  0x61
  0x62
  0x63
@@ -66,8 +67,8 @@ abcd
 "abcd"
 ```
 
-Note that depending on your terminal settings, your TTY may be line buffered and might thus require
-an additional enter before the data is sent to Julia.
+Note that depending on your terminal settings, your TTY ("teletype terminal") may be line buffered and might thus require an additional enter before `stdin` data is sent to Julia.
+When running Julia from the command line in a TTY, output is sent to the console by default, and standard input is read from the keyboard.
 
 To read every line from [`stdin`](@ref) you can use [`eachline`](@ref):
 
@@ -150,7 +151,7 @@ julia> f = open("hello.txt")
 IOStream(<file hello.txt>)
 
 julia> readlines(f)
-1-element Array{String,1}:
+1-element Vector{String}:
  "Hello, World!"
 ```
 
@@ -205,6 +206,24 @@ julia> open("hello.txt") do f
 "HELLO AGAIN."
 ```
 
+If you want to redirect stdout to a file
+
+```# Open file for writing
+out_file = open("output.txt", "w")
+
+# Redirect stdout to file
+redirect_stdout(out_file) do
+    # Your code here
+    println("This output goes to `out_file` via the `stdout` variable.")
+end
+
+# Close file
+close(out_file)
+
+```
+
+Redirecting stdout to a file can help you save and analyze program output, automate processes, and meet compliance requirements.
+
 ## A simple TCP example
 
 Let's jump right in with a simple example involving TCP sockets.
@@ -214,7 +233,7 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2000)
            while true
                sock = accept(server)
@@ -286,11 +305,11 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2001)
            while true
                sock = accept(server)
-               @async while isopen(sock)
+               Threads.@spawn while isopen(sock)
                    write(sock, readline(sock, keep=true))
                end
            end
@@ -300,7 +319,7 @@ Task (runnable) @0x00007fd31dc12e60
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> errormonitor(@async while isopen(clientside)
+julia> errormonitor(Threads.@spawn while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
        end)
 Task (runnable) @0x00007fd31dc11870
@@ -336,20 +355,19 @@ ip"74.125.226.225"
 
 ## Asynchronous I/O
 
-
 All I/O operations exposed by [`Base.read`](@ref) and [`Base.write`](@ref) can be performed
 asynchronously through the use of [coroutines](@ref man-tasks). You can create a new coroutine to
-read from or write to a stream using the [`@async`](@ref) macro:
+read from or write to a stream using the [`Threads.@spawn`](@ref) macro:
 
 ```julia-repl
-julia> task = @async open("foo.txt", "w") do io
+julia> task = Threads.@spawn open("foo.txt", "w") do io
            write(io, "Hello, World!")
        end;
 
 julia> wait(task)
 
 julia> readlines("foo.txt")
-1-element Array{String,1}:
+1-element Vector{String}:
  "Hello, World!"
 ```
 
@@ -361,7 +379,7 @@ your program to block until all of the coroutines it wraps around have exited:
 julia> using Sockets
 
 julia> @sync for hostname in ("google.com", "github.com", "julialang.org")
-           @async begin
+           Threads.@spawn begin
                conn = connect(hostname, 80)
                write(conn, "GET / HTTP/1.1\r\nHost:$(hostname)\r\n\r\n")
                readline(conn, keep=true)
@@ -418,6 +436,7 @@ close(socket)
 This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
 
 Listener:
+
 ```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
@@ -430,6 +449,7 @@ close(socket)
 ```
 
 Sender:
+
 ```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 470ec9a315ce4..33285bde8a066 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -46,7 +46,7 @@ may trip up Julia users accustomed to MATLAB:
   * A Julia script may contain any number of functions, and all definitions will be externally visible
     when the file is loaded. Function definitions can be loaded from files outside the current working
     directory.
-  * In Julia, reductions such as [`sum`](@ref), [`prod`](@ref), and [`max`](@ref) are performed
+  * In Julia, reductions such as [`sum`](@ref), [`prod`](@ref), and [`maximum`](@ref) are performed
     over every element of an array when called with a single argument, as in `sum(A)`, even if `A`
     has more than one dimension.
   * In Julia, parentheses must be used to call a function with zero arguments, like in [`rand()`](@ref).
@@ -56,6 +56,10 @@ may trip up Julia users accustomed to MATLAB:
   * In Julia, if `A` and `B` are arrays, logical comparison operations like `A == B` do not return
     an array of booleans. Instead, use `A .== B`, and similarly for the other boolean operators like
     [`<`](@ref), [`>`](@ref).
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    in MATLAB `exp(A)` applies elementwise and `expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * In Julia, the operators [`&`](@ref), [`|`](@ref), and [`⊻`](@ref xor) ([`xor`](@ref)) perform the
     bitwise operations equivalent to `and`, `or`, and `xor` respectively in MATLAB, and have precedence
     similar to Python's bitwise operators (unlike C). They can operate on scalars or element-wise
@@ -78,6 +82,9 @@ may trip up Julia users accustomed to MATLAB:
     provides the higher order functions [`filter`](@ref) and [`filter!`](@ref), allowing users
     to write `filter(z->z>3, x)` and `filter!(z->z>3, x)` as alternatives to the corresponding transliterations
     `x[x.>3]` and `x = x[x.>3]`. Using [`filter!`](@ref) reduces the use of temporary arrays.
+  * Following on from the previous point, to replace values that meet specific criteria, for example a
+    thresholding operation on all elements in a matrix, could be achieved in Matlab as follows `A(A < threshold) = 0`.
+    The Julia equivalent would be `A[A .< threshold] .= 0`.
   * The analogue of extracting (or "dereferencing") all elements of a cell array, e.g. in `vertcat(A{:})`
     in MATLAB, is written using the splat operator in Julia, e.g. as `vcat(A...)`.
   * In Julia, the `adjoint` function performs conjugate transposition; in MATLAB, `adjoint` provides the
@@ -106,7 +113,7 @@ For users coming to Julia from R, these are some noteworthy differences:
       * In Julia, `[1, 2, 3, 4][[true, false]]` throws a [`BoundsError`](@ref).
       * In Julia, `[1, 2, 3, 4][[true, false, true, false]]` produces `[1, 3]`.
   * Like many languages, Julia does not always allow operations on vectors of different lengths, unlike
-    R where the vectors only need to share a common index range.  For example, `c(1, 2, 3, 4) + c(1, 2)`
+    R where the vectors only need to share a common index range. For example, `c(1, 2, 3, 4) + c(1, 2)`
     is valid R but the equivalent `[1, 2, 3, 4] + [1, 2]` will throw an error in Julia.
   * Julia allows an optional trailing comma when that comma does not change the meaning of code.
     This can cause confusion among R users when indexing into arrays. For example, `x[1,]` in R
@@ -137,7 +144,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     or `if 1==1`.
   * Julia does not provide `nrow` and `ncol`. Instead, use `size(M, 1)` for `nrow(M)` and `size(M, 2)`
     for `ncol(M)`.
-  * Julia is careful to distinguish scalars, vectors and matrices.  In R, `1` and `c(1)` are the same.
+  * Julia is careful to distinguish scalars, vectors and matrices. In R, `1` and `c(1)` are the same.
     In Julia, they cannot be used interchangeably.
   * Julia's [`diag`](@ref) and [`diagm`](@ref) are not like R's.
   * Julia cannot assign to the results of function calls on the left hand side of an assignment operation:
@@ -167,12 +174,12 @@ For users coming to Julia from R, these are some noteworthy differences:
     have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
     Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
-    in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
+    in R, but both arguments need to have the same dimensions. While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
   * Julia's [`sum`](@ref), [`prod`](@ref), [`maximum`](@ref), and [`minimum`](@ref) are different
     from their counterparts in R. They all accept an optional keyword argument `dims`, which indicates the
-    dimensions, over which the operation is carried out.  For instance, let `A = [1 2; 3 4]` in Julia
-    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R.  Then `sum(A)` gives the same result as
+    dimensions, over which the operation is carried out. For instance, let `A = [1 2; 3 4]` in Julia
+    and `B <- rbind(c(1,2),c(3,4))` be the same matrix in R. Then `sum(A)` gives the same result as
     `sum(B)`, but `sum(A, dims=1)` is a row vector containing the sum over each column and `sum(A, dims=2)`
     is a column vector containing the sum over each row. This contrasts to the behavior of R, where separate
     `colSums(B)` and `rowSums(B)` functions provide these functionalities. If the `dims` keyword argument is a
@@ -213,11 +220,11 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Unlike Python, Julia allows [AbstractArrays with arbitrary indexes](https://julialang.org/blog/2017/04/offset-arrays/).
     Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
     `a[end]` and `a[end-1]` in Julia.
-  * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
-  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
+  * Julia requires `end` for indexing until the last element. `x[2:end]` in Julia is equivalent to `x[1:]` in Python.
+  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is the same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
   * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
-    in array-indexing.  The `range` function is also supported.
+    in array-indexing. The `range` function is also supported.
   * In Julia, indexing a matrix with arrays like `X[[1,2], [1,3]]` refers to a sub-matrix that contains the intersections of the first and second rows with the first and third columns. In Python, `X[[1,2], [1,3]]` refers to a vector that contains the values of cell `[1,1]` and `[2,3]` in the matrix. `X[[1,2], [1,3]]` in Julia is equivalent with `X[np.ix_([0,1],[0,2])]` in Python. `X[[0,1], [0,2]]` in Python is equivalent with `X[[CartesianIndex(1,1), CartesianIndex(2,3)]]` in Julia.
   * Julia has no line continuation syntax: if, at the end of a line, the input so far is a complete
     expression, it is considered done; otherwise the input continues. One way to force an expression
@@ -245,12 +252,17 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, the exponentiation operator is `^`, not `**` as in Python.
   * Julia uses `nothing` of type `Nothing` to represent a null value, whereas Python uses `None` of type `NoneType`.
   * In Julia, the standard operators over a matrix type are matrix operations, whereas, in Python, the standard operators are element-wise operations. When both `A` and `B` are matrices, `A * B` in Julia performs matrix multiplication, not element-wise multiplication as in Python. `A * B` in Julia is equivalent with `A @ B` in Python, whereas `A * B` in Python is equivalent with `A .* B` in Julia.
+  * In Julia, when you want to apply a scalar-valued function elementwise to an array, use broadcasting
+    syntax: `f.(A)` instead of `f(A)`. In some cases, both operations are defined but mean different things:
+    `numpy.exp(A)` applies elementwise and `scipy.linalg.expm(A)` is the [matrix exponential](https://en.wikipedia.org/wiki/Matrix_exponential),
+    but in Julia `exp.(A)` applies elementwise and `exp(A)` is the matrix exponential.
   * The adjoint operator `'` in Julia returns an adjoint of a vector (a lazy representation of row vector), whereas the transpose operator `.T` over a vector in Python returns the original vector (non-op).
   * In Julia, a function may contain multiple concrete implementations (called *methods*), which are selected via multiple dispatch based on the types of all arguments to the call, as compared to functions in Python, which have a single implementation and no polymorphism (as opposed to Python method calls which use a different syntax and allows dispatch on the receiver of the method).
   * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
   * Calling a method of a class instance in Python (`x = MyClass(*args); x.f(y)`) corresponds to a function call in Julia, e.g. `x = MyType(args...); f(x, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
   * Julia structures may have exactly one abstract supertype, whereas Python classes can inherit from one or more (abstract or concrete) superclasses.
-  * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * The logical Julia program structure (Packages and Modules) is independent of the file structure, whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * In Julia, it is idiomatic to split the text of large modules into multiple files, without introducing a new module per file. The code is reassembled inside a single module in a main file via `include`. While the Python equivalent (`exec`) is not typical for this use (it will silently clobber prior definitions), Julia programs are defined as a unit at the `module` level with `using` or `import`, which will only get executed once when first needed--like `include` in Python. Within those modules, the individual files that make up that module are loaded with `include` by listing them once in the intended order.
   * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to a conditional expression in Python `1 if x > 0 else -1`.
   * In Julia the `@` symbol refers to a macro, whereas in Python it refers to a decorator.
   * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia (compared with Python, Julia is faster at ordinary control flow but slower at exception-catching).
@@ -258,7 +270,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Be careful with non-constant global variables in Julia, especially in tight loops. Since you can write close-to-metal code in Julia (unlike Python), the effect of globals can be drastic (see [Performance Tips](@ref man-performance-tips)).
   * In Julia, rounding and truncation are explicit. Python's `int(3.7)` should be `floor(Int, 3.7)` or `Int(floor(3.7))` and is distinguished from `round(Int, 3.7)`. `floor(x)` and `round(x)` on their own return an integer value of the same type as `x` rather than always returning `Int`.
   * In Julia, parsing is explicit. Python's `float("3.7")` would be `parse(Float64, "3.7")` in Julia.
-  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.  Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
+  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`. Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
   * In Julia, a new local scope is introduced by most code blocks, including loops and `try` — `catch` — `finally`. Note that comprehensions (list, generator, etc.) introduce a new local scope both in Python and Julia, whereas `if` blocks do not introduce a new local scope in both languages.
 
 ## Noteworthy differences from C/C++
@@ -295,7 +307,7 @@ For users coming to Julia from R, these are some noteworthy differences:
      Floating point literals are closer in behavior to C/C++. Octal (prefixed with `0o`) and binary
     (prefixed with `0b`) literals are also treated as unsigned (or `BigInt` for more than 128 bits).
   * In Julia, the division operator [`/`](@ref) returns a floating point number when both operands
-    are of integer type.  To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
+    are of integer type. To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
   * Indexing an `Array` with floating point types is generally an error in Julia. The Julia
     equivalent of the C expression `a[i / 2]` is `a[i ÷ 2 + 1]`, where `i` is of integer type.
   * String literals can be delimited with either `"`  or `"""`, `"""` delimited literals can contain
@@ -304,7 +316,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     evaluates the variable name or the expression in the context of the function.
   * `//` indicates a [`Rational`](@ref) number, and not a single-line comment (which is `#` in Julia)
   * `#=` indicates the start of a multiline comment, and `=#` ends it.
-  * Functions in Julia return values from their last expression(s) or the `return` keyword.  Multiple
+  * Functions in Julia return values from their last expression(s) or the `return` keyword. Multiple
     values can be returned from functions and assigned as tuples, e.g. `(a, b) = myfunction()` or
     `a, b = myfunction()`, instead of having to pass pointers to values as one would have to do in
     C/C++ (i.e. `a = myfunction(&b)`.
@@ -315,7 +327,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     meaning within `[ ]`, something to watch out for. `;` can be used to separate expressions on a
     single line, but are not strictly necessary in many cases, and are more an aid to readability.
   * In Julia, the operator [`⊻`](@ref xor) ([`xor`](@ref)) performs the bitwise XOR operation, i.e.
-    [`^`](@ref) in C/C++.  Also, the bitwise operators do not have the same precedence as C/C++, so
+    [`^`](@ref) in C/C++. Also, the bitwise operators do not have the same precedence as C/C++, so
     parenthesis may be required.
   * Julia's [`^`](@ref) is exponentiation (pow), not bitwise XOR as in C/C++ (use [`⊻`](@ref xor), or
     [`xor`](@ref), in Julia)
@@ -352,9 +364,9 @@ For users coming to Julia from R, these are some noteworthy differences:
     it's more general than that since methods are dispatched on every argument type, not only `this`,
     using the most-specific-declaration rule).
 
-### Julia &hArr; C/C++: Namespaces
+### Julia ⇔ C/C++: Namespaces
   * C/C++ `namespace`s correspond roughly to Julia `module`s.
-  * There are no private globals or fields in Julia.  Everything is publicly accessible
+  * There are no private globals or fields in Julia. Everything is publicly accessible
     through fully qualified paths (or relative paths, if desired).
   * `using MyNamespace::myfun` (C++) corresponds roughly to `import MyModule: myfun` (Julia).
   * `using namespace MyNamespace` (C++) corresponds roughly to `using MyModule` (Julia)
@@ -364,7 +376,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Caveat: `import`/`using` (Julia) works only at the global scope level (`module`s)
     * In C++, `using namespace X` works within arbitrary scopes (ex: function scope).
 
-### Julia &hArr; C/C++: Module loading
+### Julia ⇔ C/C++: Module loading
   * When you think of a C/C++ "**library**", you are likely looking for a Julia "**package**".
     * Caveat: C/C++ libraries often house multiple "software modules" whereas Julia
       "packages" typically house one.
@@ -392,10 +404,10 @@ For users coming to Julia from R, these are some noteworthy differences:
     paths to the `Base.LOAD_PATH` array.
     * Packages from directory-based repositories do not require the `Pkg.add()` tool prior to
       being loaded with `import` or `using`. They are simply available to the project.
-    * Directory-based package repositories are the **quickest solution** to developping local
+    * Directory-based package repositories are the **quickest solution** to developing local
       libraries of "software modules".
 
-### Julia &hArr; C/C++: Assembling modules
+### Julia ⇔ C/C++: Assembling modules
   * In C/C++, `.c`/`.cpp` files are compiled & added to a library with build/`make` scripts.
     * In Julia, `import [PkgName]`/`using [PkgName]` statements load `[PkgName].jl` located
       in a package's `[PkgName]/src/` subdirectory.
@@ -412,20 +424,21 @@ For users coming to Julia from R, these are some noteworthy differences:
       Julia package* ("software module"). It is therefore relatively straightforward to ensure
       file are `include`d only once (No `#ifdef` confusion).
 
-### Julia &hArr; C/C++: Module interface
-  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s `export`
-    symbols that are intended for their users.
+### Julia ⇔ C/C++: Module interface
+  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s mark
+    specific symbols that are intended for their users as `public`or `export`ed.
     * Often, Julia `module`s simply add functionality by generating new "methods" to existing
       functions (ex: `Base.push!`).
     * Developers of Julia packages therefore cannot rely on header files for interface
       documentation.
     * Interfaces for Julia packages are typically described using docstrings, README.md,
       static web pages, ...
-  * Some developers choose not to `export` all symbols required to use their package/module.
+  * Some developers choose not to `export` all symbols required to use their package/module,
+    but should still mark unexported user facing symbols as `public`.
     * Users might be expected to access these components by qualifying functions/structs/...
       with the package/module name (ex: `MyModule.run_this_task(...)`).
 
-### Julia &hArr; C/C++: Quick reference
+### Julia ⇔ C/C++: Quick reference
 
 | Software Concept   | Julia | C/C++ |
 | :---               | :---  | :---  |
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index c86630ce2a8f1..d506ac9946ba6 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -3,7 +3,16 @@
 In the following sections, we briefly go through a few techniques that can help make your Julia
 code run as fast as possible.
 
-## Performance critical code should be inside a function
+## [Table of contents](@id man-performance-tips-toc)
+
+```@contents
+Pages = ["performance-tips.md"]
+Depth = 3
+```
+
+## General advice
+
+### Performance critical code should be inside a function
 
 Any code that is performance critical should be inside a function. Code inside functions tends to run much faster than top level code, due to how Julia's compiler works.
 
@@ -11,7 +20,7 @@ The use of functions is not only important for performance: functions are more r
 
 The functions should take arguments, instead of operating directly on global variables, see the next point.
 
-## Avoid untyped global variables
+### Avoid untyped global variables
 
 The value of an untyped global variable might change at any point, possibly leading to a change of its type. This makes
 it difficult for the compiler to optimize code using global variables. This also applies to type-valued variables,
@@ -24,7 +33,7 @@ performance:
 const DEFAULT_VAL = 0
 ```
 
-If a global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals).
+If a non-constant global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals); `const` globals need not be annotated because their type is inferred from their initialization value.
 
 Uses of untyped globals can be optimized by annotating their types at the point of use:
 
@@ -61,7 +70,7 @@ julia> global x = 1.0
 
 so all the performance issues discussed previously apply.
 
-## Measure performance with [`@time`](@ref) and pay attention to memory allocation
+### Measure performance with [`@time`](@ref) and pay attention to memory allocation
 
 A useful tool for measuring performance is the [`@time`](@ref) macro. We here repeat the example
 with the global variable above, but this time with the type annotation removed:
@@ -94,8 +103,8 @@ a vector of 64-bit floats so there should be no need to allocate (heap) memory.
 
 We should clarify that what `@time` reports is specifically *heap* allocations, which are typically needed for either
 mutable objects or for creating/growing variable-sized containers (such as `Array` or `Dict`, strings, or "type-unstable"
-objects whose type is only known at runtime).  Allocating (or deallocating) such blocks of memory may require an expensive
-system call (e.g. via `malloc` in C), and they must be tracked for garbage collection.  In contrast, immutable values like
+objects whose type is only known at runtime). Allocating (or deallocating) such blocks of memory may require an expensive function
+call to libc (e.g. via `malloc` in C), and they must be tracked for garbage collection. In contrast, immutable values like
 numbers (except bignums), tuples, and immutable `struct`s can be stored much more cheaply, e.g. in stack or CPU-register
 memory, so one doesn’t typically worry about the performance cost of "allocating" them.
 
@@ -149,7 +158,38 @@ its algorithmic aspects (see [Pre-allocating outputs](@ref)).
     For more serious benchmarking, consider the [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl)
     package which among other things evaluates the function multiple times in order to reduce noise.
 
-## [Tools](@id tools)
+### Break functions into multiple definitions
+
+Writing a function as many small definitions allows the compiler to directly call the most applicable
+code, or even inline it.
+
+Here is an example of a "compound function" that should really be written as multiple definitions:
+
+```julia
+using LinearAlgebra
+
+function mynorm(A)
+    if isa(A, Vector)
+        return sqrt(real(dot(A,A)))
+    elseif isa(A, Matrix)
+        return maximum(svdvals(A))
+    else
+        error("mynorm: invalid argument")
+    end
+end
+```
+
+This can be written more concisely and efficiently as:
+
+```julia
+mynorm(x::Vector) = sqrt(real(dot(x, x)))
+mynorm(A::Matrix) = maximum(svdvals(A))
+```
+
+It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
+written as the `mynorm` example.
+
+### [Tools](@id tools)
 
 Julia and its package ecosystem includes tools that may help you diagnose problems and improve
 the performance of your code:
@@ -157,7 +197,7 @@ the performance of your code:
   * [Profiling](@ref) allows you to measure the performance of your running code and identify lines
     that serve as bottlenecks. For complex projects, the [ProfileView](https://github.com/timholy/ProfileView.jl)
     package can help you visualize your profiling results.
-  * The [Traceur](https://github.com/JunoLab/Traceur.jl) package can help you find common performance problems in your code.
+  * The [JET](https://github.com/aviatesk/JET.jl) package can help you find common performance problems in your code.
   * Unexpectedly-large memory allocations--as reported by [`@time`](@ref), [`@allocated`](@ref), or
     the profiler (through calls to the garbage-collection routines)--hint that there might be issues
     with your code. If you don't see another reason for the allocations, suspect a type problem.
@@ -166,7 +206,14 @@ the performance of your code:
   * `@code_warntype` generates a representation of your code that can be helpful in finding expressions
     that result in type uncertainty. See [`@code_warntype`](@ref) below.
 
-## [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
+## Type inference
+
+In many languages with optional type declarations, adding declarations is the principal way to
+make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
+the types of all function arguments, local variables, and expressions. However, there are a few
+specific instances where declarations are helpful.
+
+### [Avoid containers with abstract type parameters](@id man-performance-abstract-container)
 
 When working with parameterized types, including arrays, it is best to avoid parameterizing with
 abstract types where possible.
@@ -210,13 +257,6 @@ better than `IdDict{Type, Vector}`
 
 See also the discussion under [Parametric Types](@ref).
 
-## Type declarations
-
-In many languages with optional type declarations, adding declarations is the principal way to
-make code run faster. This is *not* the case in Julia. In Julia, the compiler generally knows
-the types of all function arguments, local variables, and expressions. However, there are a few
-specific instances where declarations are helpful.
-
 ### Avoid fields with abstract type
 
 Types can be declared without specifying the types of their fields:
@@ -358,6 +398,27 @@ julia> !isconcretetype(Array), !isabstracttype(Array), isstructtype(Array), !isc
 ```
 In this case, it would be better to avoid declaring `MyType` with a field `a::Array` and instead declare the field as `a::Array{T,N}` or as `a::A`, where `{T,N}` or `A` are parameters of `MyType`.
 
+The previous advice is especially useful when the fields of a struct are meant to be functions, or more generally callable objects.
+It is very tempting to define a struct as follows:
+
+```julia
+struct MyCallableWrapper
+    f::Function
+end
+```
+
+But since `Function` is an abstract type, every call to `wrapper.f` will require dynamic dispatch, due to the type instability of accessing the field `f`.
+Instead, you should write something like:
+
+```julia
+struct MyCallableWrapper{F}
+    f::F
+end
+```
+
+which has nearly identical behavior but will be much faster (because the type instability is eliminated).
+Note that we do not impose `F<:Function`: this means callable objects which do not subtype `Function` are also allowed for the field `f`.
+
 ### Avoid fields with abstract containers
 
 The same best practices also work for container types:
@@ -587,38 +648,7 @@ would not normally specialize that method call. You need to check the
 when argument types are changed, i.e., if `Base.specializations(@which f(...))` contains specializations
 for the argument in question.
 
-## Break functions into multiple definitions
-
-Writing a function as many small definitions allows the compiler to directly call the most applicable
-code, or even inline it.
-
-Here is an example of a "compound function" that should really be written as multiple definitions:
-
-```julia
-using LinearAlgebra
-
-function mynorm(A)
-    if isa(A, Vector)
-        return sqrt(real(dot(A,A)))
-    elseif isa(A, Matrix)
-        return maximum(svdvals(A))
-    else
-        error("mynorm: invalid argument")
-    end
-end
-```
-
-This can be written more concisely and efficiently as:
-
-```julia
-mynorm(x::Vector) = sqrt(real(dot(x, x)))
-mynorm(A::Matrix) = maximum(svdvals(A))
-```
-
-It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
-written as the `mynorm` example.
-
-## Write "type-stable" functions
+### Write "type-stable" functions
 
 When possible, it helps to ensure that a function always returns a value of the same type. Consider
 the following definition:
@@ -639,7 +669,7 @@ pos(x) = x < 0 ? zero(x) : x
 There is also a [`oneunit`](@ref) function, and a more general [`oftype(x, y)`](@ref) function, which
 returns `y` converted to the type of `x`.
 
-## Avoid changing the type of a variable
+### Avoid changing the type of a variable
 
 An analogous "type-stability" problem exists for variables used repeatedly within a function:
 
@@ -662,7 +692,7 @@ optimize the body of the loop. There are several possible fixes:
   * Use an explicit conversion by `x = oneunit(Float64)`
   * Initialize with the first loop iteration, to `x = 1 / rand()`, then loop `for i = 2:10`
 
-## [Separate kernel functions (aka, function barriers)](@id kernel-functions)
+### [Separate kernel functions (aka, function barriers)](@id kernel-functions)
 
 Many functions follow a pattern of performing some set-up work, and then running many iterations
 to perform a core computation. Where possible, it is a good idea to put these core computations
@@ -721,7 +751,172 @@ or the [`fill!`](@ref) function, which we could have used instead of writing our
 Functions like `strange_twos` occur when dealing with data of uncertain type, for example data
 loaded from an input file that might contain either integers, floats, strings, or something else.
 
-## [Types with values-as-parameters](@id man-performance-value-type)
+### [[`@code_warntype`](@ref)](@id man-code-warntype)
+
+The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
+be helpful in diagnosing type-related problems. Here's an example:
+
+```julia-repl
+julia> @noinline pos(x) = x < 0 ? 0 : x;
+
+julia> function f(x)
+           y = pos(x)
+           return sin(y*x + 1)
+       end;
+
+julia> @code_warntype f(3.2)
+MethodInstance for f(::Float64)
+  from f(x) @ Main REPL[9]:1
+Arguments
+  #self#::Core.Const(f)
+  x::Float64
+Locals
+  y::Union{Float64, Int64}
+Body::Float64
+1 ─      (y = Main.pos(x))
+│   %2 = (y * x)::Float64
+│   %3 = (%2 + 1)::Float64
+│   %4 = Main.sin(%3)::Float64
+└──      return %4
+```
+
+Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
+[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
+Your code is being presented in form that has been heavily digested on its way to generating
+compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
+(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
+is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
+in this document, red text is denoted by uppercase.
+
+At the top, the inferred return type of the function is shown as `Body::Float64`.
+The next lines represent the body of `f` in Julia's SSA IR form.
+The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
+Looking at the body, you can see that the first thing that happens is that `pos` is called and the
+return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
+it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
+input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
+The net result is that `f(x::Float64)` will not be type-unstable
+in its output, even if some of the intermediate computations are type-unstable.
+
+How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
+to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
+would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
+might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
+output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
+effects of type instability. This is particularly relevant in cases where fixing the type instability
+is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
+breaking up functions) are your best tools to contain the "damage" from type instability.
+Also, note that even Julia Base has functions that are type unstable.
+For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
+or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
+type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
+are color highlighted in yellow, instead of red.
+
+The following examples may help you interpret expressions marked as containing non-concrete types:
+
+  * Function body starting with `Body::Union{T1,T2})`
+      * Interpretation: function with unstable return type
+      * Suggestion: make the return value type-stable, even if you have to annotate it
+
+  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
+      * Interpretation: call to a type-unstable function `g`.
+      * Suggestion: fix the function, or if necessary annotate the return value
+
+  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
+      * Interpretation: accessing elements of poorly-typed arrays
+      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
+        element accesses
+
+  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
+      * Interpretation: getting a field that is of non-concrete type. In this case, the type of `x`, say `ArrayContainer`, had a
+        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
+      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
+        of `ArrayContainer`
+
+### [Performance of captured variable](@id man-performance-captured)
+
+Consider the following example that defines an inner function:
+```julia
+function abmult(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
+end
+```
+
+Function `abmult` returns a function `f` that multiplies its argument by
+the absolute value of `r`. The inner function assigned to `f` is called a
+"closure". Inner functions are also used by the
+language for `do`-blocks and for generator expressions.
+
+This style of code presents performance challenges for the language.
+The parser, when translating it into lower-level instructions,
+substantially reorganizes the above code by extracting the
+inner function to a separate code block.  "Captured" variables such as `r`
+that are shared by inner functions and their enclosing scope are
+also extracted into a heap-allocated "box" accessible to both inner and
+outer functions because the language specifies that `r` in the
+inner scope must be identical to `r` in the outer scope even after the
+outer scope (or another inner function) modifies `r`.
+
+The discussion in the preceding paragraph referred to the "parser", that is, the phase
+of compilation that takes place when the module containing `abmult` is first loaded,
+as opposed to the later phase when it is first invoked. The parser does not "know" that
+`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
+The magic of type inference takes place in the later phase of compilation.
+
+Thus, the parser does not know that `r` has a fixed type (`Int`).
+Nor that `r` does not change value once the inner function is created (so that
+the box is unneeded). Therefore, the parser emits code for
+box that holds an object with an abstract type such as `Any`, which
+requires run-time type dispatch for each occurrence of `r`. This can be
+verified by applying `@code_warntype` to the above function. Both the boxing
+and the run-time type dispatch can cause loss of performance.
+
+If captured variables are used in a performance-critical section of the code,
+then the following tips help ensure that their use is performant. First, if
+it is known that a captured variable does not change its type, then this can
+be declared explicitly with a type annotation (on the variable, not the
+right-hand side):
+```julia
+function abmult2(r0::Int)
+    r::Int = r0
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f
+end
+```
+The type annotation partially recovers lost performance due to capturing because
+the parser can associate a concrete type to the object in the box.
+Going further, if the captured variable does not need to be boxed at all (because it
+will not be reassigned after the closure is created), this can be indicated
+with `let` blocks as follows.
+```julia
+function abmult3(r::Int)
+    if r < 0
+        r = -r
+    end
+    f = let r = r
+            x -> x * r
+    end
+    return f
+end
+```
+The `let` block creates a new variable `r` whose scope is only the
+inner function. The second technique recovers full language performance
+in the presence of captured variables. Note that this is a rapidly
+evolving aspect of the compiler, and it is likely that future releases
+will not require this degree of programmer annotation to attain performance.
+In the mean time, some user-contributed packages like
+[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
+insertion of `let` statements as in `abmult3`.
+
+
+### [Types with values-as-parameters](@id man-performance-value-type)
 
 Let's say you want to create an `N`-dimensional array that has size 3 along each axis. Such arrays
 can be created like this:
@@ -811,7 +1006,7 @@ In this example, `N` is passed as a parameter, so its "value" is known to the co
 `Val(T)` works only when `T` is either hard-coded/literal (`Val(3)`) or already specified in the
 type-domain.
 
-## The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
+### The dangers of abusing multiple dispatch (aka, more on types with values-as-parameters)
 
 Once one learns to appreciate multiple dispatch, there's an understandable tendency to go overboard
 and try to use it for everything. For example, you might imagine using it to store information,
@@ -858,113 +1053,24 @@ or thousands of variants compiled for it. Each of these increases the size of th
 code, the length of internal lists of methods, etc. Excess enthusiasm for values-as-parameters
 can easily waste enormous resources.
 
-## [Access arrays in memory order, along columns](@id man-performance-column-major)
+## Memory management and arrays
 
-Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
-stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
-as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
+### Pre-allocate outputs
 
-```jldoctest
-julia> x = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> x[:]
-4-element Vector{Int64}:
- 1
- 3
- 2
- 4
-```
-
-This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
-name a few). The alternative to column-major ordering is row-major ordering, which is the convention
-adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
-have significant performance effects when looping over arrays. A rule of thumb to keep in mind
-is that with column-major arrays, the first index changes most rapidly. Essentially this means
-that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
-Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
-
-Consider the following contrived example. Imagine we wanted to write a function that accepts a
-[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
-of the input vector. Assume that it is not important whether rows or columns are filled with these
-copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
-do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
-
-```julia
-function copy_cols(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[:, i] = x
-    end
-    return out
-end
-
-function copy_rows(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for i = inds
-        out[i, :] = x
-    end
-    return out
-end
-
-function copy_col_row(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for col = inds, row = inds
-        out[row, col] = x[row]
-    end
-    return out
-end
-
-function copy_row_col(x::Vector{T}) where T
-    inds = axes(x, 1)
-    out = similar(Array{T}, inds, inds)
-    for row = inds, col = inds
-        out[row, col] = x[col]
-    end
-    return out
-end
-```
-
-Now we will time each of these functions using the same random `10000` by `1` input vector:
-
-```julia-repl
-julia> x = randn(10000);
-
-julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
-
-julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
-copy_cols:    0.331706323
-copy_rows:    1.799009911
-copy_col_row: 0.415630047
-copy_row_col: 1.721531501
-```
-
-Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
-respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
-`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
-first element to appear in a slice expression should be coupled with the inner-most loop.
-
-## Pre-allocating outputs
-
-If your function returns an `Array` or some other complex type, it may have to allocate memory.
-Unfortunately, oftentimes allocation and its converse, garbage collection, are substantial bottlenecks.
+If your function returns an `Array` or some other complex type, it may have to allocate memory.
+Unfortunately, oftentimes allocation and its converse, garbage collection, are substantial bottlenecks.
 
 Sometimes you can circumvent the need to allocate memory on each function call by preallocating
 the output. As a trivial example, compare
 
 ```jldoctest prealloc
 julia> function xinc(x)
-           return [x, x+1, x+2]
+           return [x + i for i  in 1:3000]
        end;
 
 julia> function loopinc()
            y = 0
-           for i = 1:10^7
+           for i = 1:10^5
                ret = xinc(i)
                y += ret[2]
            end
@@ -976,16 +1082,16 @@ with
 
 ```jldoctest prealloc
 julia> function xinc!(ret::AbstractVector{T}, x::T) where T
-           ret[1] = x
-           ret[2] = x+1
-           ret[3] = x+2
+           for i in 1:3000
+               ret[i] = x+i
+           end
            nothing
        end;
 
 julia> function loopinc_prealloc()
-           ret = Vector{Int}(undef, 3)
+           ret = Vector{Int}(undef, 3000)
            y = 0
-           for i = 1:10^7
+           for i = 1:10^5
                xinc!(ret, i)
                y += ret[2]
            end
@@ -997,12 +1103,12 @@ Timing results:
 
 ```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)"
 julia> @time loopinc()
-  0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time)
-50000015000000
+  0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time)
+5000250000
 
 julia> @time loopinc_prealloc()
-  0.030850 seconds (6 allocations: 288 bytes)
-50000015000000
+  0.009410 seconds (2 allocations: 23.477 KiB)
+5000250000
 ```
 
 Preallocation has other advantages, for example by allowing the caller to control the "output"
@@ -1014,7 +1120,55 @@ some judgment may be required. However, for "vectorized" (element-wise) function
 syntax `x .= f.(y)` can be used for in-place operations with fused loops and no temporary arrays
 (see the [dot syntax for vectorizing functions](@ref man-vectorized)).
 
-## More dots: Fuse vectorized operations
+### [Consider using views for slices](@id man-performance-views)
+
+In Julia, an array "slice" expression like `array[1:5, :]` creates
+a copy of that data (except on the left-hand side of an assignment,
+where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
+If you are doing many operations on the slice, this can be good for
+performance because it is more efficient to work with a smaller
+contiguous copy than it would be to index into the original array.
+On the other hand, if you are just doing a few simple operations on
+the slice, the cost of the allocation and copy operations can be
+substantial.
+
+An alternative is to create a "view" of the array, which is
+an array object (a `SubArray`) that actually references the data
+of the original array in-place, without making a copy. (If you
+write to a view, it modifies the original array's data as well.)
+This can be done for individual slices by calling [`view`](@ref),
+or more simply for a whole expression or block of code by putting
+[`@views`](@ref) in front of that expression. For example:
+
+```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
+julia> fcopy(x) = sum(x[2:end-1]);
+
+julia> @views fview(x) = sum(x[2:end-1]);
+
+julia> x = rand(10^6);
+
+julia> @time fcopy(x);
+  0.003051 seconds (3 allocations: 7.629 MB)
+
+julia> @time fview(x);
+  0.001020 seconds (1 allocation: 16 bytes)
+```
+
+Notice both the 3× speedup and the decreased memory allocation
+of the `fview` version of the function.
+
+### Consider StaticArrays.jl for small fixed-size vector/matrix operations
+
+If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
+known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
+This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
+specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
+
+For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors. By
+using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
+vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
+
+### More dots: Fuse vectorized operations
 
 Julia has a special [dot syntax](@ref man-vectorized) that converts
 any scalar function into a "vectorized" function call, and any operator
@@ -1060,51 +1214,132 @@ a new temporary array and executes in a separate loop. In this example
 convenient to sprinkle some dots in your expressions than to
 define a separate function for each vectorized operation.
 
-## [Consider using views for slices](@id man-performance-views)
+### [Fewer dots: Unfuse certain intermediate broadcasts](@id man-performance-unfuse)
 
-In Julia, an array "slice" expression like `array[1:5, :]` creates
-a copy of that data (except on the left-hand side of an assignment,
-where `array[1:5, :] = ...` assigns in-place to that portion of `array`).
-If you are doing many operations on the slice, this can be good for
-performance because it is more efficient to work with a smaller
-contiguous copy than it would be to index into the original array.
-On the other hand, if you are just doing a few simple operations on
-the slice, the cost of the allocation and copy operations can be
-substantial.
+The dot loop fusion mentioned above enables concise and idiomatic code to express highly performant operations. However, it is important to remember that the fused operation will be computed at every iteration of the broadcast. This means that in some situations, particularly in the presence of composed or multidimensional broadcasts, an expression with dot calls may be computing a function more times than intended. As an example, say we want to build a random matrix whose rows have Euclidean norm one. We might write something like the following:
+```
+julia> x = rand(1000, 1000);
 
-An alternative is to create a "view" of the array, which is
-an array object (a `SubArray`) that actually references the data
-of the original array in-place, without making a copy. (If you
-write to a view, it modifies the original array's data as well.)
-This can be done for individual slices by calling [`view`](@ref),
-or more simply for a whole expression or block of code by putting
-[`@views`](@ref) in front of that expression. For example:
+julia> d = sum(abs2, x; dims=2);
 
-```jldoctest; filter = r"[0-9\.]+ seconds \(.*?\)"
-julia> fcopy(x) = sum(x[2:end-1]);
+julia> @time x ./= sqrt.(d);
+  0.002049 seconds (4 allocations: 96 bytes)
+```
+This will work. However, this expression will actually recompute `sqrt(d[i])` for *every* element in the row `x[i, :]`, meaning that many more square roots are computed than necessary. To see precisely over which indices the broadcast will iterate, we can call `Broadcast.combine_axes` on the arguments of the fused expression. This will return a tuple of ranges whose entries correspond to the axes of iteration; the product of lengths of these ranges will be the total number of calls to the fused operation.
 
-julia> @views fview(x) = sum(x[2:end-1]);
+It follows that when some components of the broadcast expression are constant along an axis—like the `sqrt` along the second dimension in the preceding example—there is potential for a performance improvement by forcibly "unfusing" those components, i.e. allocating the result of the broadcasted operation in advance and reusing the cached value along its constant axis. Some such potential approaches are to use temporary variables, wrap components of a dot expression in `identity`, or use an equivalent intrinsically vectorized (but non-fused) function.
+```
+julia> @time let s = sqrt.(d); x ./= s end;
+  0.000809 seconds (5 allocations: 8.031 KiB)
 
-julia> x = rand(10^6);
+julia> @time x ./= identity(sqrt.(d));
+  0.000608 seconds (5 allocations: 8.031 KiB)
 
-julia> @time fcopy(x);
-  0.003051 seconds (3 allocations: 7.629 MB)
+julia> @time x ./= map(sqrt, d);
+  0.000611 seconds (4 allocations: 8.016 KiB)
+```
 
-julia> @time fview(x);
-  0.001020 seconds (1 allocation: 16 bytes)
+Any of these options yields approximately a three-fold speedup at the cost of an allocation; for large broadcastables this speedup can be asymptotically very large.
+
+### [Access arrays in memory order, along columns](@id man-performance-column-major)
+
+Multidimensional arrays in Julia are stored in column-major order. This means that arrays are
+stacked one column at a time. This can be verified using the `vec` function or the syntax `[:]`
+as shown below (notice that the array is ordered `[1 3 2 4]`, not `[1 2 3 4]`):
+
+```jldoctest
+julia> x = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> x[:]
+4-element Vector{Int64}:
+ 1
+ 3
+ 2
+ 4
 ```
 
-Notice both the 3× speedup and the decreased memory allocation
-of the `fview` version of the function.
+This convention for ordering arrays is common in many languages like Fortran, Matlab, and R (to
+name a few). The alternative to column-major ordering is row-major ordering, which is the convention
+adopted by C and Python (`numpy`) among other languages. Remembering the ordering of arrays can
+have significant performance effects when looping over arrays. A rule of thumb to keep in mind
+is that with column-major arrays, the first index changes most rapidly. Essentially this means
+that looping will be faster if the inner-most loop index is the first to appear in a slice expression.
+Keep in mind that indexing an array with `:` is an implicit loop that iteratively accesses all elements within a particular dimension; it can be faster to extract columns than rows, for example.
 
-## Copying data is not always bad
+Consider the following contrived example. Imagine we wanted to write a function that accepts a
+[`Vector`](@ref) and returns a square [`Matrix`](@ref) with either the rows or the columns filled with copies
+of the input vector. Assume that it is not important whether rows or columns are filled with these
+copies (perhaps the rest of the code can be easily adapted accordingly). We could conceivably
+do this in at least four ways (in addition to the recommended call to the built-in [`repeat`](@ref)):
+
+```julia
+function copy_cols(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[:, i] = x
+    end
+    return out
+end
+
+function copy_rows(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for i = inds
+        out[i, :] = x
+    end
+    return out
+end
+
+function copy_col_row(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for col = inds, row = inds
+        out[row, col] = x[row]
+    end
+    return out
+end
+
+function copy_row_col(x::Vector{T}) where T
+    inds = axes(x, 1)
+    out = similar(Array{T}, inds, inds)
+    for row = inds, col = inds
+        out[row, col] = x[col]
+    end
+    return out
+end
+```
+
+Now we will time each of these functions using the same random `10000` by `1` input vector:
+
+```julia-repl
+julia> x = randn(10000);
+
+julia> fmt(f) = println(rpad(string(f)*": ", 14, ' '), @elapsed f(x))
+
+julia> map(fmt, [copy_cols, copy_rows, copy_col_row, copy_row_col]);
+copy_cols:    0.331706323
+copy_rows:    1.799009911
+copy_col_row: 0.415630047
+copy_row_col: 1.721531501
+```
+
+Notice that `copy_cols` is much faster than `copy_rows`. This is expected because `copy_cols`
+respects the column-based memory layout of the `Matrix` and fills it one column at a time. Additionally,
+`copy_col_row` is much faster than `copy_row_col` because it follows our rule of thumb that the
+first element to appear in a slice expression should be coupled with the inner-most loop.
+
+### Copying data is not always bad
 
 Arrays are stored contiguously in memory, lending themselves to CPU vectorization
 and fewer memory accesses due to caching. These are the same reasons that it is recommended
 to access arrays in column-major order (see above). Irregular access patterns and non-contiguous
 views can drastically slow down computations on arrays because of non-sequential memory access.
 
-Copying irregularly-accessed data into a contiguous array before repeated access it can result
+Copying irregularly-accessed data into a contiguous array before repeatedly accessing it can result
 in a large speedup, such as in the example below. Here, a matrix is being accessed at
 randomly-shuffled indices before being multiplied. Copying into plain arrays speeds up the
 multiplication even with the added cost of copying and allocation.
@@ -1134,88 +1369,161 @@ julia> @time iterated_neural_network(A[inds, inds], x, 10)
 1569
 ```
 
-Provided there is enough memory, the cost of copying the view to an array is outweighed
-by the speed boost from doing the repeated matrix multiplications on a contiguous array.
+Provided there is enough memory, the cost of copying the view to an array is outweighed
+by the speed boost from doing the repeated matrix multiplications on a contiguous array.
+
+### [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
+
+This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
+Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
+In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
+
+Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
+It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
+Its current value can be accessed with `BLAS.get_num_threads()`.
+
+When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
+However, it is generally recommended to check and set the value manually.
+The OpenBLAS behavior is as follows:
+
+* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
+* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+
+When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
+Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
+However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+
+### [Alternative linear algebra backends](@id man-backends-linear-algebra)
+
+As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
+Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+
+These are external packages, so we will not discuss them in detail here.
+Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
+
+## Execution latency, package loading and package precompiling time
+
+### Reducing time to first plot etc.
+
+The first time a julia method is called it (and any methods it calls, or ones that can be statically determined) will be
+compiled. The [`@time`](@ref) macro family illustrates this.
+
+```
+julia> foo() = rand(2,2) * rand(2,2)
+foo (generic function with 1 method)
+
+julia> @time @eval foo();
+  0.252395 seconds (1.12 M allocations: 56.178 MiB, 2.93% gc time, 98.12% compilation time)
+
+julia> @time @eval foo();
+  0.000156 seconds (63 allocations: 2.453 KiB)
+```
+
+Note that `@time @eval` is better for measuring compilation time because without [`@eval`](@ref), some compilation may
+already be done before timing starts.
 
-## Consider StaticArrays.jl for small fixed-size vector/matrix operations
+When developing a package, you may be able to improve the experience of your users with *precompilation*
+so that when they use the package, the code they use is already compiled. To precompile package code effectively, it's
+recommended to use [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) to run a
+"precompile workload" during precompilation time that is representative of typical package usage, which will cache the
+native compiled code into the package `pkgimage` cache, greatly reducing "time to first execution" (often referred to as
+TTFX) for such usage.
 
-If your application involves many small (`< 100` element) arrays of fixed sizes (i.e. the size is
-known prior to execution), then you might want to consider using the [StaticArrays.jl package](https://github.com/JuliaArrays/StaticArrays.jl).
-This package allows you to represent such arrays in a way that avoids unnecessary heap allocations and allows the compiler to
-specialize code for the *size* of the array, e.g. by completely unrolling vector operations (eliminating the loops) and storing elements in CPU registers.
+Note that [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) workloads can be
+disabled and sometimes configured via Preferences if you do not want to spend the extra time precompiling, which
+may be the case during development of a package.
 
-For example, if you are doing computations with 2d geometries, you might have many computations with 2-component vectors.  By
-using the `SVector` type from StaticArrays.jl, you can use convenient vector notation and operations like `norm(3v - w)` on
-vectors `v` and `w`, while allowing the compiler to unroll the code to a minimal computation equivalent to `@inbounds hypot(3v[1]-w[1], 3v[2]-w[2])`.
+### Reducing package loading time
 
-## Avoid string interpolation for I/O
+Keeping the time taken to load the package down is usually helpful.
+General good practice for package developers includes:
 
-When writing data to a file (or other I/O device), forming extra intermediate strings is a source
-of overhead. Instead of:
+1. Reduce your dependencies to those you really need. Consider using [package extensions](@ref) to support interoperability with other packages without bloating your essential dependencies.
+3. Avoid use of [`__init__()`](@ref) functions unless there is no alternative, especially those which might trigger a lot
+   of compilation, or just take a long time to execute.
+4. Where possible, fix [invalidations](https://julialang.org/blog/2020/08/invalidations/) among your dependencies and from your package code.
 
-```julia
-println(file, "$a $b")
-```
+The tool [`@time_imports`](@ref) can be useful in the REPL to review the above factors.
 
-use:
+```julia-repl
+julia> @time @time_imports using Plots
+      0.5 ms  Printf
+     16.4 ms  Dates
+      0.7 ms  Statistics
+               ┌ 23.8 ms SuiteSparse_jll.__init__() 86.11% compilation time (100% recompilation)
+     90.1 ms  SuiteSparse_jll 91.57% compilation time (82% recompilation)
+      0.9 ms  Serialization
+               ┌ 39.8 ms SparseArrays.CHOLMOD.__init__() 99.47% compilation time (100% recompilation)
+    166.9 ms  SparseArrays 23.74% compilation time (100% recompilation)
+      0.4 ms  Statistics → SparseArraysExt
+      0.5 ms  TOML
+      8.0 ms  Preferences
+      0.3 ms  PrecompileTools
+      0.2 ms  Reexport
+... many deps omitted for example ...
+      1.4 ms  Tar
+               ┌ 73.8 ms p7zip_jll.__init__() 99.93% compilation time (100% recompilation)
+     79.4 ms  p7zip_jll 92.91% compilation time (100% recompilation)
+               ┌ 27.7 ms GR.GRPreferences.__init__() 99.77% compilation time (100% recompilation)
+     43.0 ms  GR 64.26% compilation time (100% recompilation)
+               ┌ 2.1 ms Plots.__init__() 91.80% compilation time (100% recompilation)
+    300.9 ms  Plots 0.65% compilation time (100% recompilation)
+  1.795602 seconds (3.33 M allocations: 190.153 MiB, 7.91% gc time, 39.45% compilation time: 97% of which was recompilation)
 
-```julia
-println(file, a, " ", b)
 ```
 
-The first version of the code forms a string, then writes it to the file, while the second version
-writes values directly to the file. Also notice that in some cases string interpolation can be
-harder to read. Consider:
+Notice that in this example there are multiple packages loaded, some with `__init__()` functions, some of which cause
+compilation of which some is recompilation. Recompilation is caused by earlier packages invalidating methods, then in
+these cases when the following packages run their `__init__()` function some hit recompilation before the code can be run.
 
-```julia
-println(file, "$(f(a))$(f(b))")
-```
+Further, note the `Statistics` extension `SparseArraysExt` has been activated because `SparseArrays` is in the dependency
+tree. i.e. see `0.4 ms  Statistics → SparseArraysExt`.
 
-versus:
+This report gives a good opportunity to review whether the cost of dependency load time is worth the functionality it brings.
+Also the `Pkg` utility `why` can be used to report why a an indirect dependency exists.
 
-```julia
-println(file, f(a), f(b))
+```
+(CustomPackage) pkg> why FFMPEG_jll
+  Plots → FFMPEG → FFMPEG_jll
+  Plots → GR → GR_jll → FFMPEG_jll
 ```
 
-## Optimize network I/O during parallel execution
-
-When executing a remote function in parallel:
+or to see the indirect dependencies that a package brings in, you can `pkg> rm` the package, see the deps that are removed
+from the manifest, then revert the change with `pkg> undo`.
 
-```julia
-using Distributed
+If loading time is dominated by slow `__init__()` methods having compilation, one verbose way to identify what is being
+compiled is to use the julia args `--trace-compile=stderr --trace-compile-timing` which will report a [`precompile`](@ref)
+statement each time a method is compiled, along with how long compilation took. The InteractiveUtils macro
+[`@trace_compile`](@ref) provides a way to enable those args for a specific call. So a call for a complete report report would look like:
 
-responses = Vector{Any}(undef, nworkers())
-@sync begin
-    for (idx, pid) in enumerate(workers())
-        @async responses[idx] = remotecall_fetch(foo, pid, args...)
-    end
-end
+```
+julia> @time @time_imports @trace_compile using CustomPackage
+...
 ```
 
-is faster than:
+Note the `--startup-file=no` which helps isolate the test from packages you may have in your `startup.jl`.
 
-```julia
-using Distributed
+More analysis of the reasons for recompilation can be achieved with the
+[`SnoopCompile`](https://github.com/timholy/SnoopCompile.jl) package.
 
-refs = Vector{Any}(undef, nworkers())
-for (idx, pid) in enumerate(workers())
-    refs[idx] = @spawnat pid foo(args...)
-end
-responses = [fetch(r) for r in refs]
+### Reducing precompilation time
+
+If package precompilation is taking a long time, one option is to set the following internal and then precompile.
 ```
+julia> Base.PRECOMPILE_TRACE_COMPILE[] = "stderr"
 
-The former results in a single network round-trip to every worker, while the latter results in
-two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
-(or even a [`wait`](@ref)).
-The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
+pkg> precompile
+```
 
-## Fix deprecation warnings
+This has the effect of setting `--trace-compile=stderr --trace-compile-timing` in the precompilation processes themselves,
+so will show which methods are precompiled and how long they took to precompile.
+
+There are also profiling options such as [using the external profiler Tracy to profile the precompilation process](@ref Profiling-package-precompilation-with-Tracy).
 
-A deprecated function internally performs a lookup in order to print a relevant warning only once.
-This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
-be modified as suggested by the warnings.
 
-## Tweaks
+## Miscellaneous
+
+### Tweaks
 
 These are some minor points that might help in tight inner loops.
 
@@ -1225,17 +1533,23 @@ These are some minor points that might help in tight inner loops.
   * Use [`div(x,y)`](@ref) for truncating division of integers instead of [`trunc(x/y)`](@ref), [`fld(x,y)`](@ref)
     instead of [`floor(x/y)`](@ref), and [`cld(x,y)`](@ref) instead of [`ceil(x/y)`](@ref).
 
-## [Performance Annotations](@id man-performance-annotations)
+### Fix deprecation warnings
+
+A deprecated function internally performs a lookup in order to print a relevant warning only once.
+This extra lookup can cause a significant slowdown, so all uses of deprecated functions should
+be modified as suggested by the warnings.
+
+### [Performance Annotations](@id man-performance-annotations)
 
 Sometimes you can enable better optimization by promising certain program properties.
 
   * Use [`@inbounds`](@ref) to eliminate array bounds checking within expressions. Be certain before doing
-    this. If the subscripts are ever out of bounds, you may suffer crashes or silent corruption.
+    this. If the indices are ever out of bounds, you may suffer crashes or silent corruption.
   * Use [`@fastmath`](@ref) to allow floating point optimizations that are correct for real numbers, but lead
     to differences for IEEE numbers. Be careful when doing this, as this may change numerical results.
     This corresponds to the `-ffast-math` option of clang.
   * Write [`@simd`](@ref) in front of `for` loops to promise that the iterations are independent and may be
-    reordered.  Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
+    reordered. Note that in many cases, Julia can automatically vectorize code without the `@simd` macro;
     it is only beneficial in cases where such a transformation would otherwise be illegal, including cases
     like allowing floating-point re-associativity and ignoring dependent memory accesses (`@simd ivdep`).
     Again, be very careful when asserting `@simd` as erroneously annotating a loop with dependent iterations
@@ -1258,7 +1572,7 @@ the optimizer from trying to be too clever and defeat our benchmark):
 ```julia
 @noinline function inner(x, y)
     s = zero(eltype(x))
-    for i=eachindex(x)
+    for i in eachindex(x, y)
         @inbounds s += x[i]*y[i]
     end
     return s
@@ -1266,7 +1580,7 @@ end
 
 @noinline function innersimd(x, y)
     s = zero(eltype(x))
-    @simd for i = eachindex(x)
+    @simd for i in eachindex(x, y)
         @inbounds s += x[i] * y[i]
     end
     return s
@@ -1395,7 +1709,7 @@ julia> f_fast(NaN)
 false
 ```
 
-## Treat Subnormal Numbers as Zeros
+### Treat Subnormal Numbers as Zeros
 
 Subnormal numbers, formerly called [denormal numbers](https://en.wikipedia.org/wiki/Denormal_number),
 are useful in many contexts, but incur a performance penalty on some hardware. A call [`set_zero_subnormals(true)`](@ref)
@@ -1468,195 +1782,105 @@ In some applications, an alternative to zeroing subnormal numbers is to inject a
 a = rand(Float32,1000) * 1.f-9
 ```
 
-## [[`@code_warntype`](@ref)](@id man-code-warntype)
+### Avoid string interpolation for I/O
 
-The macro [`@code_warntype`](@ref) (or its function variant [`code_warntype`](@ref)) can sometimes
-be helpful in diagnosing type-related problems. Here's an example:
+When writing data to a file (or other I/O device), forming extra intermediate strings is a source
+of overhead. Instead of:
 
-```julia-repl
-julia> @noinline pos(x) = x < 0 ? 0 : x;
+```julia
+println(file, "$a $b")
+```
 
-julia> function f(x)
-           y = pos(x)
-           return sin(y*x + 1)
-       end;
+use:
 
-julia> @code_warntype f(3.2)
-MethodInstance for f(::Float64)
-  from f(x) @ Main REPL[9]:1
-Arguments
-  #self#::Core.Const(f)
-  x::Float64
-Locals
-  y::Union{Float64, Int64}
-Body::Float64
-1 ─      (y = Main.pos(x))
-│   %2 = (y * x)::Float64
-│   %3 = (%2 + 1)::Float64
-│   %4 = Main.sin(%3)::Float64
-└──      return %4
+```julia
+println(file, a, " ", b)
 ```
 
-Interpreting the output of [`@code_warntype`](@ref), like that of its cousins [`@code_lowered`](@ref),
-[`@code_typed`](@ref), [`@code_llvm`](@ref), and [`@code_native`](@ref), takes a little practice.
-Your code is being presented in form that has been heavily digested on its way to generating
-compiled machine code. Most of the expressions are annotated by a type, indicated by the `::T`
-(where `T` might be [`Float64`](@ref), for example). The most important characteristic of [`@code_warntype`](@ref)
-is that non-concrete types are displayed in red; since this document is written in Markdown, which has no color,
-in this document, red text is denoted by uppercase.
+The first version of the code forms a string, then writes it to the file, while the second version
+writes values directly to the file. Also notice that in some cases string interpolation can be
+harder to read. Consider:
 
-At the top, the inferred return type of the function is shown as `Body::Float64`.
-The next lines represent the body of `f` in Julia's SSA IR form.
-The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
-Looking at the body, you can see that the first thing that happens is that `pos` is called and the
-return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
-it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
-input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
-The net result is that `f(x::Float64)` will not be type-unstable
-in its output, even if some of the intermediate computations are type-unstable.
+```julia
+println(file, "$(f(a))$(f(b))")
+```
 
-How you use this information is up to you. Obviously, it would be far and away best to fix `pos`
-to be type-stable: if you did so, all of the variables in `f` would be concrete, and its performance
-would be optimal. However, there are circumstances where this kind of *ephemeral* type instability
-might not matter too much: for example, if `pos` is never used in isolation, the fact that `f`'s
-output is type-stable (for [`Float64`](@ref) inputs) will shield later code from the propagating
-effects of type instability. This is particularly relevant in cases where fixing the type instability
-is difficult or impossible. In such cases, the tips above (e.g., adding type annotations and/or
-breaking up functions) are your best tools to contain the "damage" from type instability.
-Also, note that even Julia Base has functions that are type unstable.
-For example, the function [`findfirst`](@ref) returns the index into an array where a key is found,
-or `nothing` if it is not found, a clear type instability. In order to make it easier to find the
-type instabilities that are likely to be important, `Union`s containing either `missing` or `nothing`
-are color highlighted in yellow, instead of red.
+versus:
 
-The following examples may help you interpret expressions marked as containing non-leaf types:
+```julia
+println(file, f(a), f(b))
+```
 
-  * Function body starting with `Body::Union{T1,T2})`
-      * Interpretation: function with unstable return type
-      * Suggestion: make the return value type-stable, even if you have to annotate it
+### Avoid eager string materialization
 
-  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
-      * Interpretation: call to a type-unstable function `g`.
-      * Suggestion: fix the function, or if necessary annotate the return value
+In settings where a string representation of an object is only needed
+conditionally (e.g. in error paths of functions or conditional warnings such as
+deprecations), it is advisable to avoid the overhead of eagerly materializing
+the string. Since Julia 1.8, this can be achieved via
+[`LazyString`](@ref) and the corresponding string macro [`@lazy_str`](@ref).
 
-  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
-      * Interpretation: accessing elements of poorly-typed arrays
-      * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
-        element accesses
+For example, instead of:
 
-  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
-      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
-        field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
-      * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
-        of `ArrayContainer`
+```julia
+Base.depwarn("`foo` is deprecated for type $(typeof(x))", :bar)
+```
 
-## [Performance of captured variable](@id man-performance-captured)
+use:
 
-Consider the following example that defines an inner function:
 ```julia
-function abmult(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
+Base.depwarn(lazy"`foo` is deprecated for type $(typeof(x))", :bar)
 ```
 
-Function `abmult` returns a function `f` that multiplies its argument by
-the absolute value of `r`. The inner function assigned to `f` is called a
-"closure". Inner functions are also used by the
-language for `do`-blocks and for generator expressions.
+or the equivalent macro-free version:
 
-This style of code presents performance challenges for the language.
-The parser, when translating it into lower-level instructions,
-substantially reorganizes the above code by extracting the
-inner function to a separate code block.  "Captured" variables such as `r`
-that are shared by inner functions and their enclosing scope are
-also extracted into a heap-allocated "box" accessible to both inner and
-outer functions because the language specifies that `r` in the
-inner scope must be identical to `r` in the outer scope even after the
-outer scope (or another inner function) modifies `r`.
+```julia
+Base.depwarn(LazyString("`foo` is deprecated for type ", typeof(x)), :bar)
+```
 
-The discussion in the preceding paragraph referred to the "parser", that is, the phase
-of compilation that takes place when the module containing `abmult` is first loaded,
-as opposed to the later phase when it is first invoked. The parser does not "know" that
-`Int` is a fixed type, or that the statement `r = -r` transforms an `Int` to another `Int`.
-The magic of type inference takes place in the later phase of compilation.
+Through this approach, the interpolated string will only be constructed when it is actually displayed.
 
-Thus, the parser does not know that `r` has a fixed type (`Int`).
-nor that `r` does not change value once the inner function is created (so that
-the box is unneeded).  Therefore, the parser emits code for
-box that holds an object with an abstract type such as `Any`, which
-requires run-time type dispatch for each occurrence of `r`.  This can be
-verified by applying `@code_warntype` to the above function.  Both the boxing
-and the run-time type dispatch can cause loss of performance.
+### Optimize network I/O during parallel execution
+
+When executing a remote function in parallel:
 
-If captured variables are used in a performance-critical section of the code,
-then the following tips help ensure that their use is performant. First, if
-it is known that a captured variable does not change its type, then this can
-be declared explicitly with a type annotation (on the variable, not the
-right-hand side):
-```julia
-function abmult2(r0::Int)
-    r::Int = r0
-    if r < 0
-        r = -r
-    end
-    f = x -> x * r
-    return f
-end
-```
-The type annotation partially recovers lost performance due to capturing because
-the parser can associate a concrete type to the object in the box.
-Going further, if the captured variable does not need to be boxed at all (because it
-will not be reassigned after the closure is created), this can be indicated
-with `let` blocks as follows.
 ```julia
-function abmult3(r::Int)
-    if r < 0
-        r = -r
-    end
-    f = let r = r
-            x -> x * r
+using Distributed
+
+responses = Vector{Any}(undef, nworkers())
+@sync begin
+    for (idx, pid) in enumerate(workers())
+        Threads.@spawn responses[idx] = remotecall_fetch(foo, pid, args...)
     end
-    return f
 end
 ```
-The `let` block creates a new variable `r` whose scope is only the
-inner function. The second technique recovers full language performance
-in the presence of captured variables. Note that this is a rapidly
-evolving aspect of the compiler, and it is likely that future releases
-will not require this degree of programmer annotation to attain performance.
-In the mean time, some user-contributed packages like
-[FastClosures](https://github.com/c42f/FastClosures.jl) automate the
-insertion of `let` statements as in `abmult3`.
-
-## [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
-
-This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
-Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
-In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
-
-Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
-It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
-Its current value can be accessed with `BLAS.get_num_threads()`.
-
-When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
-However, it is generally recommended to check and set the value manually.
-The OpenBLAS behavior is as follows:
 
-* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
-* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+is faster than:
 
-When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
-Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
-However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+```julia
+using Distributed
 
-## [Alternative linear algebra backends](@id man-backends-linear-algebra)
+refs = Vector{Any}(undef, nworkers())
+for (idx, pid) in enumerate(workers())
+    refs[idx] = @spawnat pid foo(args...)
+end
+responses = [fetch(r) for r in refs]
+```
 
-As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
-Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+The former results in a single network round-trip to every worker, while the latter results in
+two network calls - first by the [`@spawnat`](@ref) and the second due to the [`fetch`](@ref)
+(or even a [`wait`](@ref)).
+The [`fetch`](@ref)/[`wait`](@ref) is also being executed serially resulting in an overall poorer performance.
 
-These are external packages, so we will not discuss them in detail here.
-Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
+### [Use `MutableArithmetics` for more control over allocation for mutable arithmetic types](@id man-perftips-mutablearithmetics)
+
+Some [`Number`](@ref) subtypes, such as [`BigInt`](@ref) or [`BigFloat`](@ref), may
+be implemented as [`mutable struct`](@ref) types, or they may have mutable
+components. The arithmetic interfaces in Julia `Base` usually opt for convenience
+over efficiency in such cases, so using them in a naive manner may result in
+suboptimal performance. The abstractions of the
+[`MutableArithmetics`](https://juliahub.com/ui/Packages/General/MutableArithmetics)
+package, on the other hand, make it possible to exploit the mutability of such types
+for writing fast code that allocates only as much as necessary. `MutableArithmetics`
+also makes it possible to copy values of mutable arithmetic types explicitly when
+necessary. `MutableArithmetics` is a user package and is not affiliated with the
+Julia project.
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index e5f1d6c417fa6..49b58ba9671c2 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -297,10 +297,224 @@ Of course, you can decrease the delay as well as increase it; however, the overh
 grows once the delay becomes similar to the amount of time needed to take a backtrace (~30 microseconds
 on the author's laptop).
 
+## Wall-time Profiler
+
+### Introduction & Problem Motivation
+
+The profiler described in the previous section is a sampling CPU profiler. At a high level, the profiler periodically stops all Julia compute threads to collect their backtraces and estimates the time spent in each function based on the number of backtrace samples that include a frame from that function. However, note that only tasks currently running on system threads just before the profiler stops them will have their backtraces collected.
+
+While this profiler is typically well-suited for workloads where the majority of tasks are compute-bound, it is less helpful for systems where most tasks are IO-heavy or for diagnosing contention on synchronization primitives in your code.
+
+Let's consider this simple workload:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const N_SPAWNED_TASKS = (1 << 10)
+const WAIT_TIME_NS = 10_000_000
+
+function spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            take!(ch)
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        put!(ch, i)
+        busywait()
+    end
+end
+
+Profile.@profile main()
+```
+
+Our goal is to detect whether there is contention on the `ch` channel—i.e., whether the number of waiters is excessive given the rate at which work items are being produced in the channel.
+
+If we run this, we obtain the following [PProf](https://github.com/JuliaPerf/PProf.jl) flame graph:
+
+![CPU Profile](./img/cpu-profile.png)
+
+This profile provides no information to help determine where contention occurs in the system’s synchronization primitives. Waiters on a channel will be blocked and descheduled, meaning no system thread will be running the tasks assigned to those waiters, and as a result, they won't be sampled by the profiler.
+
+### Wall-time Profiler
+
+Instead of sampling threads—and thus only sampling tasks that are running—a wall-time task profiler samples tasks independently of their scheduling state. For example, tasks that are sleeping on a synchronization primitive at the time the profiler is running will be sampled with the same probability as tasks that were actively running when the profiler attempted to capture backtraces.
+
+This approach allows us to construct a profile where backtraces from tasks blocked on the `ch` channel, as in the example above, are actually represented.
+
+Let's run the same example, but now with a wall-time profiler:
+
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const N_SPAWNED_TASKS = (1 << 10)
+const WAIT_TIME_NS = 10_000_000
+
+function spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            take!(ch)
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    spawn_a_bunch_of_tasks_waiting_on_channel()
+    for i in 1:N_SPAWNED_TASKS
+        put!(ch, i)
+        busywait()
+    end
+end
+
+Profile.@profile_walltime main()
+```
+
+We obtain the following flame graph:
+
+![Wall-time Profile Channel](./img/wall-time-profiler-channel-example.png)
+
+We see that a large number of samples come from channel-related `take!` functions, which allows us to determine that there is indeed an excessive number of waiters in `ch`.
+
+### A Compute-Bound Workload
+
+Despite the wall-time profiler sampling all live tasks in the system and not just the currently running ones, it can still be helpful for identifying performance hotspots, even if your code is compute-bound. Let’s consider a simple example:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+ch = Channel(1)
+
+const MAX_ITERS = (1 << 22)
+const N_TASKS = (1 << 12)
+
+function spawn_a_task_waiting_on_channel()
+    Threads.@spawn begin
+        take!(ch)
+    end
+end
+
+function sum_of_sqrt()
+    sum_of_sqrt = 0.0
+    for i in 1:MAX_ITERS
+        sum_of_sqrt += sqrt(i)
+    end
+    return sum_of_sqrt
+end
+
+function spawn_a_bunch_of_compute_heavy_tasks()
+    Threads.@sync begin
+        for i in 1:N_TASKS
+            Threads.@spawn begin
+                sum_of_sqrt()
+            end
+        end
+    end
+end
+
+function main()
+    spawn_a_task_waiting_on_channel()
+    spawn_a_bunch_of_compute_heavy_tasks()
+end
+
+Profile.@profile_walltime main()
+```
+
+After collecting a wall-time profile, we get the following flame graph:
+
+![Wall-time Profile Compute-Bound](./img/wall-time-profiler-compute-bound-example.png)
+
+Notice how many of the samples contain `sum_of_sqrt`, which is the expensive compute function in our example.
+
+### Identifying Task Sampling Failures in your Profile
+
+In the current implementation, the wall-time profiler attempts to sample from tasks that have been alive since the last garbage collection, along with those created afterward. However, if most tasks are extremely short-lived, you may end up sampling tasks that have already completed, resulting in missed backtrace captures.
+
+If you encounter samples containing `failed_to_sample_task_fun` or `failed_to_stop_thread_fun`, this likely indicates a high volume of short-lived tasks, which prevented their backtraces from being collected.
+
+Let's consider this simple example:
+
+```Julia
+using Base.Threads
+using Profile
+using PProf
+
+const N_SPAWNED_TASKS = (1 << 16)
+const WAIT_TIME_NS = 100_000
+
+function spawn_a_bunch_of_short_lived_tasks()
+    for i in 1:N_SPAWNED_TASKS
+        Threads.@spawn begin
+            # Do nothing
+        end
+    end
+end
+
+function busywait()
+    t0 = time_ns()
+    while true
+        if time_ns() - t0 > WAIT_TIME_NS
+            break
+        end
+    end
+end
+
+function main()
+    GC.enable(false)
+    spawn_a_bunch_of_short_lived_tasks()
+    for i in 1:N_SPAWNED_TASKS
+        busywait()
+    end
+    GC.enable(true)
+end
+
+Profile.@profile_walltime main()
+```
+
+Notice that the tasks spawned in `spawn_a_bunch_of_short_lived_tasks` are extremely short-lived. Since these tasks constitute the majority in the system, we will likely miss capturing a backtrace for most sampled tasks.
+
+After collecting a wall-time profile, we obtain the following flame graph:
+
+![Task Sampling Failure](./img/task-sampling-failure.png)
+
+The large number of samples from `failed_to_stop_thread_fun` confirms that we have a significant number of short-lived tasks in the system.
+
 ## Memory allocation analysis
 
 One of the most common techniques to improve performance is to reduce memory allocation. Julia
-provides several tools measure this:
+provides several tools to measure this:
 
 ### `@time`
 
@@ -338,15 +552,91 @@ argument can be passed to speed it up by making it skip some allocations.
 Passing `sample_rate=1.0` will make it record everything (which is slow);
 `sample_rate=0.1` will record only 10% of the allocations (faster), etc.
 
-!!! note
+!!! compat "Julia 1.11"
+
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
+
+    Since Julia 1.11, all allocations should have a type reported.
+
+For more details on how to use this tool, please see the following talk from JuliaCon 2022:
+https://www.youtube.com/watch?v=BFvpwC8hEWQ
+
+##### Allocation Profiler Example
+
+In this simple example, we use PProf to visualize the alloc profile. You could use another
+visualization tool instead. We collect the profile (specifying a sample rate), then we visualize it.
+```julia
+using Profile, PProf
+Profile.Allocs.clear()
+Profile.Allocs.@profile sample_rate=0.0001 my_function()
+PProf.Allocs.pprof()
+```
+
+Here is a more in-depth example, showing how we can tune the sample rate. A
+good number of samples to aim for is around 1 - 10 thousand. Too many, and the
+profile visualizer can get overwhelmed, and profiling will be slow. Too few,
+and you don't have a representative sample.
+
+
+```julia-repl
+julia> import Profile
+
+julia> @time my_function()  # Estimate allocations from a (second-run) of the function
+  0.110018 seconds (1.50 M allocations: 58.725 MiB, 17.17% gc time)
+500000
+
+julia> Profile.Allocs.clear()
+
+julia> Profile.Allocs.@profile sample_rate=0.001 begin   # 1.5 M * 0.001 = ~1.5K allocs.
+           my_function()
+       end
+500000
+
+julia> prof = Profile.Allocs.fetch();  # If you want, you can also manually inspect the results.
+
+julia> length(prof.allocs)  # Confirm we have expected number of allocations.
+1515
+
+julia> using PProf  # Now, visualize with an external tool, like PProf or ProfileCanvas.
+
+julia> PProf.Allocs.pprof(prof; from_c=false)  # You can optionally pass in a previously fetched profile result.
+Analyzing 1515 allocation samples... 100%|████████████████████████████████| Time: 0:00:00
+Main binary filename not available.
+Serving web UI on http://localhost:62261
+"alloc-profile.pb.gz"
+```
+Then you can view the profile by navigating to http://localhost:62261, and the profile is saved to disk.
+See PProf package for more options.
+
+##### Allocation Profiling Tips
+
+As stated above, aim for around 1-10 thousand samples in your profile.
+
+Note that we are uniformly sampling in the space of _all allocations_, and are not weighting
+our samples by the size of the allocation. So a given allocation profile may not give a
+representative profile of where most bytes are allocated in your program, unless you had set
+`sample_rate=1`.
+
+Allocations can come from users directly constructing objects, but can also come from inside
+the runtime or be inserted into compiled code to handle type instability. Looking at the
+"source code" view can be helpful to isolate them, and then other external tools such as
+[`Cthulhu.jl`](https://github.com/JuliaDebug/Cthulhu.jl) can be useful for identifying the
+cause of the allocation.
 
-    The current implementation of the Allocations Profiler _does not
-    capture types for all allocations._ Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+##### Allocation Profile Visualization Tools
 
-    You can read more about the missing types and the plan to improve this, here:
-    [issue #43688](https://github.com/JuliaLang/julia/issues/43688).
+There are several profiling visualization tools now that can all display Allocation
+Profiles. Here is a small list of some of the main ones we know about:
+- [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
+- [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl)
+- VSCode's built-in profile visualizer (`@profview_allocs`) [docs needed]
+- Viewing the results directly in the REPL
+  - You can inspect the results in the REPL via [`Profile.Allocs.fetch()`](@ref), to view
+    the stacktrace and type of each allocation.
 
 #### Line-by-Line Allocation Tracking
 
@@ -381,7 +671,7 @@ Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profil
 Depending on the tool you choose, compile with `USE_INTEL_JITEVENTS`, `USE_OPROFILE_JITEVENTS` and
 `USE_PERF_JITEVENTS` set to 1 in `Make.user`. Multiple flags are supported.
 
-Before running Julia set the environment variable `ENABLE_JITPROFILING` to 1.
+Before running Julia set the environment variable [`ENABLE_JITPROFILING`](@ref ENABLE_JITPROFILING) to 1.
 
 Now you have a multitude of ways to employ those tools!
 For example with `OProfile` you can try a simple recording :
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index ed3fe85194d93..1f9f3129ca16b 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -79,6 +79,18 @@ julia> `echo "foo bar"`[2]
 "foo bar"
 ```
 
+You can also pass a `IOBuffer`, and later read from it:
+
+```jldoctest
+julia> io = PipeBuffer(); # PipeBuffer is a type of IOBuffer
+
+julia> run(`echo world`, devnull, io, stderr);
+
+julia> readlines(io)
+1-element Vector{String}:
+ "world"
+```
+
 ## [Interpolation](@id command-interpolation)
 
 Suppose you want to do something a bit more complicated and use the name of a file in the variable
@@ -320,8 +332,8 @@ will attempt to store the data in the kernel's buffers while waiting for a reade
 Another common solution is to separate the reader and writer of the pipeline into separate [`Task`](@ref)s:
 
 ```julia
-writer = @async write(process, "data")
-reader = @async do_compute(read(process, String))
+writer = Threads.@spawn write(process, "data")
+reader = Threads.@spawn do_compute(read(process, String))
 wait(writer)
 fetch(reader)
 ```
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index fca4fc75d9e0f..57431d07c0aa5 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -25,7 +25,7 @@ There are a few noteworthy high-level features about Julia's strings:
     the [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoding. (A [`transcode`](@ref) function is
     provided to convert to/from other Unicode encodings.)
   * All string types are subtypes of the abstract type `AbstractString`, and external packages define
-    additional `AbstractString` subtypes (e.g. for other encodings).  If you define a function expecting
+    additional `AbstractString` subtypes (e.g. for other encodings). If you define a function expecting
     a string argument, you should declare the type as `AbstractString` in order to accept any string
     type.
   * Like C and Java, but unlike most dynamic languages, Julia has a first-class type for representing
@@ -244,7 +244,7 @@ happens to contain only a single character. In Julia these are very different th
 Range indexing makes a copy of the selected part of the original string.
 Alternatively, it is possible to create a view into a string using the type [`SubString`](@ref).
 More simply, using the [`@views`](@ref) macro on a block of code converts all string slices
-into substrings.  For example:
+into substrings. For example:
 
 ```jldoctest
 julia> str = "long string"
@@ -402,7 +402,7 @@ julia> collect(eachindex(s))
 ```
 
 To access the raw code units (bytes for UTF-8) of the encoding, you can use the [`codeunit(s,i)`](@ref)
-function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref).  The [`codeunits(s)`](@ref)
+function, where the index `i` runs consecutively from `1` to [`ncodeunits(s)`](@ref). The [`codeunits(s)`](@ref)
 function returns an `AbstractVector{UInt8}` wrapper that lets you access these raw codeunits (bytes) as an array.
 
 Strings in Julia can contain invalid UTF-8 code unit sequences. This convention allows to
@@ -832,7 +832,7 @@ of the substring that matches, but perhaps we want to capture any non-blank text
 character. We could do the following:
 
 ```jldoctest
-julia> m = match(r"^\s*(?:#\s*(.*?)\s*$|$)", "# a comment ")
+julia> m = match(r"^\s*(?:#\s*(.*?)\s*$)", "# a comment ")
 RegexMatch("# a comment ", 1="a comment")
 ```
 
@@ -957,11 +957,11 @@ i   Do case-insensitive pattern matching.
     that would cross the Unicode rules/non-Unicode rules boundary
     (ords 255/256) will not succeed.
 
-m   Treat string as multiple lines.  That is, change "^" and "$"
+m   Treat string as multiple lines. That is, change "^" and "$"
     from matching the start or end of the string to matching the
     start or end of any line anywhere within the string.
 
-s   Treat string as single line.  That is, change "." to match any
+s   Treat string as single line. That is, change "." to match any
     character whatsoever, even a newline, which normally it would
     not match.
 
@@ -981,10 +981,10 @@ x   Tells the regular expression parser to ignore most whitespace
 For example, the following regex has all three flags turned on:
 
 ```jldoctest
-julia> r"a+.*b+.*?d$"ism
-r"a+.*b+.*?d$"ims
+julia> r"a+.*b+.*d$"ism
+r"a+.*b+.*d$"ims
 
-julia> match(r"a+.*b+.*?d$"ism, "Goodbye,\nOh, angry,\nBad world\n")
+julia> match(r"a+.*b+.*d$"ism, "Goodbye,\nOh, angry,\nBad world\n")
 RegexMatch("angry,\nBad world")
 ```
 
@@ -1012,7 +1012,7 @@ ERROR: syntax: invalid escape sequence
 Triple-quoted regex strings, of the form `r"""..."""`, are also supported (and may be convenient
 for regular expressions containing quotation marks or newlines).
 
-The `Regex()` constructor may be used to create a valid regex string programmatically.  This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
+The `Regex()` constructor may be used to create a valid regex string programmatically. This permits using the contents of string variables and other string operations when constructing the regex string. Any of the regex codes above can be used within the single string argument to `Regex()`. Here are some examples:
 
 ```jldoctest
 julia> using Dates
@@ -1142,9 +1142,9 @@ some confusion regarding the matter.
 
 Version numbers can easily be expressed with non-standard string literals of the form [`v"..."`](@ref @v_str).
 Version number literals create [`VersionNumber`](@ref) objects which follow the
-specifications of [semantic versioning](https://semver.org/),
+specifications of [semantic versioning 2.0.0-rc2](https://semver.org/spec/v2.0.0-rc.2.html),
 and therefore are composed of major, minor and patch numeric values, followed by pre-release and
-build alpha-numeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
+build alphanumeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
 `0`, minor version `2`, patch version `1`, pre-release `rc1` and build `win64`. When entering
 a version literal, everything except the major version number is optional, therefore e.g.  `v"0.2"`
 is equivalent to `v"0.2.0"` (with empty pre-release/build annotations), `v"2"` is equivalent to
@@ -1203,3 +1203,55 @@ Notice that the first two backslashes appear verbatim in the output, since they
 precede a quote character.
 However, the next backslash character escapes the backslash that follows it, and the
 last backslash escapes a quote, since these backslashes appear before a quote.
+
+
+## [Annotated Strings](@id man-annotated-strings)
+
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
+It is sometimes useful to be able to hold metadata relating to regions of a
+string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and
+allows for regions of it to be annotated with labelled values (`:label => value`).
+All generic string operations are applied to the underlying string. However,
+when possible, styling information is preserved. This means you can manipulate a
+[`AnnotatedString`](@ref Base.AnnotatedString) —taking substrings, padding them,
+concatenating them with other strings— and the metadata annotations will "come
+along for the ride".
+
+This string type is fundamental to the [StyledStrings stdlib](@ref
+stdlib-styledstrings), which uses `:face`-labelled annotations to hold styling
+information.
+
+When concatenating a [`AnnotatedString`](@ref Base.AnnotatedString), take care to use
+[`annotatedstring`](@ref Base.annotatedstring) instead of [`string`](@ref) if you want
+to keep the string annotations.
+
+```jldoctest
+julia> str = Base.AnnotatedString("hello there",
+               [(1:5, :word, :greeting), (7:11, :label, 1)])
+"hello there"
+
+julia> length(str)
+11
+
+julia> lpad(str, 14)
+"   hello there"
+
+julia> typeof(lpad(str, 7))
+Base.AnnotatedString{String}
+
+julia> str2 = Base.AnnotatedString(" julia", [(2:6, :face, :magenta)])
+" julia"
+
+julia> Base.annotatedstring(str, str2)
+"hello there julia"
+
+julia> str * str2 == Base.annotatedstring(str, str2) # *-concatenation still works
+true
+```
+
+The annotations of a [`AnnotatedString`](@ref Base.AnnotatedString) can be accessed
+and modified via the [`annotations`](@ref Base.annotations) and
+[`annotate!`](@ref Base.annotate!) functions.
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index d567bf7627073..b9740102faea7 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -96,7 +96,7 @@ Instead of:
 
 ```julia
 function double(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -107,7 +107,7 @@ use:
 
 ```julia
 function double!(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -116,7 +116,7 @@ end
 
 Julia Base uses this convention throughout and contains examples of functions
 with both copying and modifying forms (e.g., [`sort`](@ref) and [`sort!`](@ref)), and others
-which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)).  It
+which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)). It
 is typical for such functions to also return the modified array for convenience.
 
 Functions related to IO or making use of random number generators (RNG) are notable exceptions:
@@ -262,6 +262,29 @@ Splicing function arguments can be addictive. Instead of `[a..., b...]`, use sim
 which already concatenates arrays. [`collect(a)`](@ref) is better than `[a...]`, but since `a`
 is already iterable it is often even better to leave it alone, and not convert it to an array.
 
+## Ensure constructors return an instance of their own type
+
+When a method `T(x)` is called on a type `T`, it is generally expected to return a value of type T.
+Defining a [constructor](@ref man-constructors) that returns an unexpected type can lead to confusing and unpredictable behavior:
+
+```jldoctest
+julia> struct Foo{T}
+           x::T
+       end
+
+julia> Base.Float64(foo::Foo) = Foo(Float64(foo.x))  # Do not define methods like this
+
+julia> Float64(Foo(3))  # Should return `Float64`
+Foo{Float64}(3.0)
+
+julia> Foo{Int}(x) = Foo{Float64}(x)  # Do not define methods like this
+
+julia> Foo{Int}(3)  # Should return `Foo{Int}`
+Foo{Float64}(3.0)
+```
+
+To maintain code clarity and ensure type consistency, always design constructors to return an instance of the type they are supposed to construct.
+
 ## Don't use unnecessary static parameters
 
 A function signature:
@@ -346,7 +369,7 @@ This would provide custom showing of vectors with a specific new element type. W
 this should be avoided. The trouble is that users will expect a well-known type like `Vector()`
 to behave in a certain way, and overly customizing its behavior can make it harder to work with.
 
-## Avoid type piracy
+## [Avoid type piracy](@id avoid-type-piracy)
 
 "Type piracy" refers to the practice of extending or redefining methods in Base
 or other packages on types that you have not defined. In extreme cases, you can crash Julia
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 3510dfe7a7042..f13e2e6865d0f 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -713,10 +713,12 @@ For the default constructor, exactly one argument must be supplied for each fiel
 ```jldoctest pointtype
 julia> Point{Float64}(1.0)
 ERROR: MethodError: no method matching Point{Float64}(::Float64)
+The type `Point{Float64}` exists, but no method is defined for this combination of argument types when trying to construct it.
 [...]
 
-julia> Point{Float64}(1.0,2.0,3.0)
+julia> Point{Float64}(1.0, 2.0, 3.0)
 ERROR: MethodError: no method matching Point{Float64}(::Float64, ::Float64, ::Float64)
+The type `Point{Float64}` exists, but no method is defined for this combination of argument types when trying to construct it.
 [...]
 ```
 
@@ -748,6 +750,7 @@ to `Point` have the same type. When this isn't the case, the constructor will fa
 ```jldoctest pointtype
 julia> Point(1,2.5)
 ERROR: MethodError: no method matching Point(::Int64, ::Float64)
+The type `Point` exists, but no method is defined for this combination of argument types when trying to construct it.
 
 Closest candidates are:
   Point(::T, !Matched::T) where T
@@ -1099,7 +1102,7 @@ Array{Vector{T}, 1} where T
 Type `T1` defines a 1-dimensional array of 1-dimensional arrays; each
 of the inner arrays consists of objects of the same type, but this type may vary from one inner array to the next.
 On the other hand, type `T2` defines a 1-dimensional array of 1-dimensional arrays all of whose inner arrays must have the
-same type.  Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
+same type. Note that `T2` is an abstract type, e.g., `Array{Array{Int,1},1} <: T2`, whereas `T1` is a concrete type. As a consequence, `T1` can be constructed with a zero-argument constructor `a=T1()` but `T2` cannot.
 
 There is a convenient syntax for naming such types, similar to the short form of function
 definition syntax:
@@ -1338,6 +1341,16 @@ type -- either [`Int32`](@ref) or [`Int64`](@ref).
 reflects the size of a native pointer on that machine, the floating point register sizes
 are specified by the IEEE-754 standard.)
 
+Type aliases may be parametrized:
+
+```jldoctest
+julia> const Family{T} = Set{T}
+Set
+
+julia> Family{Char} === Set{Char}
+true
+```
+
 ## Operations on Types
 
 Since types in Julia are themselves objects, ordinary functions can operate on them. Some functions
@@ -1403,14 +1416,16 @@ is raised:
 ```jldoctest; filter = r"Closest candidates.*"s
 julia> supertype(Union{Float64,Int64})
 ERROR: MethodError: no method matching supertype(::Type{Union{Float64, Int64}})
+The function `supertype` exists, but no method is defined for this combination of argument types.
+
 Closest candidates are:
 [...]
 ```
 
 ## [Custom pretty-printing](@id man-custom-pretty-printing)
 
-Often, one wants to customize how instances of a type are displayed.  This is accomplished by
-overloading the [`show`](@ref) function.  For example, suppose we define a type to represent
+Often, one wants to customize how instances of a type are displayed. This is accomplished by
+overloading the [`show`](@ref) function. For example, suppose we define a type to represent
 complex numbers in polar form:
 
 ```jldoctest polartype
@@ -1465,13 +1480,13 @@ julia> [Polar(3, 4.0), Polar(4.0,5.3)]
  4.0 * exp(5.3im)
 ```
 
-where the single-line `show(io, z)` form is still used for an array of `Polar` values.   Technically,
-the REPL calls `display(z)` to display the result of executing a line, which defaults to `show(stdout, MIME("text/plain"), z)`,
-which in turn defaults to `show(stdout, z)`, but you should *not* define new [`display`](@ref)
+where the single-line `show(io, z)` form is still used for an array of `Polar` values. Technically,
+the REPL calls `display(z)` to display the result `z` of executing a line, which defaults to `show(io, MIME("text/plain"), z)` (where `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref)),
+which in turn defaults to `show(io, z)`, but you should *not* define new [`display`](@ref)
 methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref Multimedia-I/O)).
 
 Moreover, you can also define `show` methods for other MIME types in order to enable richer display
-(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia).   For example,
+(HTML, images, etcetera) of objects in environments that support this (e.g. IJulia). For example,
 we can define formatted HTML display of `Polar` objects, with superscripts and italics, via:
 
 ```jldoctest polartype
@@ -1493,9 +1508,9 @@ julia> show(stdout, "text/html", Polar(3.0,4.0))
 ```
 
 As a rule of thumb, the single-line `show` method should print a valid Julia expression for creating
-the shown object.  When this `show` method contains infix operators, such as the multiplication
+the shown object. When this `show` method contains infix operators, such as the multiplication
 operator (`*`) in our single-line `show` method for `Polar` above, it may not parse correctly when
-printed as part of another object.  To see this, consider the expression object (see [Program
+printed as part of another object. To see this, consider the expression object (see [Program
 representation](@ref)) which takes the square of a specific instance of our `Polar` type:
 
 ```jldoctest polartype
@@ -1509,7 +1524,7 @@ julia> print(:($a^2))
 
 Because the operator `^` has higher precedence than `*` (see [Operator Precedence and Associativity](@ref)), this
 output does not faithfully represent the expression `a ^ 2` which should be equal to `(3.0 *
-exp(4.0im)) ^ 2`.  To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
+exp(4.0im)) ^ 2`. To solve this issue, we must make a custom method for `Base.show_unquoted(io::IO,
 z::Polar, indent::Int, precedence::Int)`, which is called internally by the expression object when
 printing:
 
@@ -1529,7 +1544,7 @@ julia> :($a^2)
 ```
 
 The method defined above adds parentheses around the call to `show` when the precedence of the
-calling operator is higher than or equal to the precedence of multiplication.  This check allows
+calling operator is higher than or equal to the precedence of multiplication. This check allows
 expressions which parse correctly without the parentheses (such as `:($a + 2)` and `:($a == 2)`) to
 omit them when printing:
 
@@ -1572,11 +1587,24 @@ julia> [Polar(3, 4.0) Polar(4.0,5.3)]
 See the [`IOContext`](@ref) documentation for a list of common properties which can be used
 to adjust printing.
 
+### Output-function summary
+
+Here is a brief summary of the different output functions in Julia and how they are related.
+Most new types should only need to define `show` methods, if anything.
+
+* [`display(x)`](@ref) tells the current environment to display `x` in whatever way it thinks best. (This might even be a graphical display in something like a Jupyter or Pluto notebook.) By default (e.g. in scripts or in the text REPL), it calls `show(io, "text/plain", x)`, or equivalently `show(io, MIME"text/plain"(), x)`, for an appropriate `io` stream. (In the REPL, `io` is an [`IOContext`](@ref) wrapper around [`stdout`](@ref).) The REPL uses `display` to output the result of an evaluated expression.
+* The 3-argument [`show(io, ::MIME"text/plain", x)`](@ref) method performs verbose pretty-printing of `x`. By default (if no 3-argument method is defined for `typeof(x)`), it calls the 2-argument `show(io, x)`. It is called by the 2-argument `repr("text/plain", x)`. Other 3-argument `show` methods can be defined for additional MIME types as discussed above, to enable richer display of `x` in some interactive environments.
+* The 2-argument [`show(io, x)`](@ref) is the default simple text representation of `x`. It is called by the 1-argument [`repr(x)`](@ref), and is typically the format you might employ to input `x` into Julia. The 1-argument `show(x)` calls `show(stdout, x)`.
+* [`print(io, x)`](@ref) by default calls `show(io, x)`, but a few types have a distinct `print` format — most notably, when `x` is a string, `print` outputs the raw text whereas `show` outputs an escaped string enclosed in quotation marks. The 1-argument `print(x)` calls `print(stdout, x)`. `print` is also called by [`string(x)`](@ref).  See also [`println`](@ref) (to append a newline) and [`printstyled`](@ref) (to add colors etc.), both of which call `print`.
+* [`write(io, x)`](@ref), if it is defined (it generally has *no* default definition for new types), writes a "raw" binary representation of `x` to `io`, e.g. an `x::Int32` will be written as 4 bytes.
+
+It is also helpful to be familiar with the metadata that can be attached to an `io` stream by an [`IOContext`](@ref) wrapper. For example, the REPL sets the `:limit => true` flag from `display` for an evaluated expression, in order to limit the output to fit in the terminal; you can query this flag with `get(io, :limit, false)`. And when displaying an object contained within, for example, a multi-column matrix, the `:compact => true` flag could be set, which you can query with `get(io, :compact, false)`.
+
 ## "Value types"
 
 In Julia, you can't dispatch on a *value* such as `true` or `false`. However, you can dispatch
 on parametric types, and Julia allows you to include "plain bits" values (Types, Symbols, Integers,
-floating-point numbers, tuples, etc.) as type parameters.  A common example is the dimensionality
+floating-point numbers, tuples, etc.) as type parameters. A common example is the dimensionality
 parameter in `Array{T,N}`, where `T` is a type (e.g., [`Float64`](@ref)) but `N` is just an `Int`.
 
 You can create your own custom types that take values as parameters, and use them to control dispatch
@@ -1594,7 +1622,7 @@ julia> Val(x) = Val{x}()
 Val
 ```
 
-There is no more to the implementation of `Val` than this.  Some functions in Julia's standard
+There is no more to the implementation of `Val` than this. Some functions in Julia's standard
 library accept `Val` instances as arguments, and you can also use it to write your own functions.
  For example:
 
@@ -1617,7 +1645,7 @@ a *type*, i.e., use `foo(Val(:bar))` rather than `foo(Val{:bar})`.
 
 It's worth noting that it's extremely easy to mis-use parametric "value" types, including `Val`;
 in unfavorable cases, you can easily end up making the performance of your code much *worse*.
- In particular, you would never want to write actual code as illustrated above.  For more information
+ In particular, you would never want to write actual code as illustrated above. For more information
 about the proper (and improper) uses of `Val`, please read [the more extensive discussion in the performance tips](@ref man-performance-value-type).
 
 [^1]: "Small" is defined by the `max_union_splitting` configuration, which currently defaults to 4.
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index 7539e75bb4f24..eba970c051f1e 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -2,7 +2,7 @@
 
 The following table lists Unicode characters that can be entered via
 tab completion of LaTeX-like abbreviations in the Julia REPL (and
-in various other editing environments).  You can also get information on how to
+in various other editing environments). You can also get information on how to
 type a symbol by entering it in the REPL help, i.e. by typing `?` and then
 entering the symbol in the REPL (e.g., by copy-paste from somewhere you saw
 the symbol).
@@ -52,11 +52,12 @@ function fix_combining_chars(char)
     return cat == 6 || cat == 8 ? "$NBSP$char$NBSP" : "$char"
 end
 
-
 function table_entries(completions, unicode_dict)
-    entries = [[
-        "Code point(s)", "Character(s)",
-        "Tab completion sequence(s)", "Unicode name(s)"
+    entries = Any[Any[
+        ["Code point(s)"],
+        ["Character(s)"],
+        ["Tab completion sequence(s)"],
+        ["Unicode name(s)"],
     ]]
     for (chars, inputs) in sort!(collect(completions), by = first)
         code_points, unicode_names, characters = String[], String[], String[]
@@ -65,12 +66,21 @@ function table_entries(completions, unicode_dict)
             push!(unicode_names, get(unicode_dict, UInt32(char), "(No Unicode name)"))
             push!(characters, isempty(characters) ? fix_combining_chars(char) : "$char")
         end
+        inputs_md = []
+        for (i, input) in enumerate(inputs)
+            i > 1 && push!(inputs_md, ", ")
+            push!(inputs_md, Markdown.Code("", input))
+        end
         push!(entries, [
-            join(code_points, " + "), join(characters),
-            join(inputs, ", "), join(unicode_names, " + ")
+            [join(code_points, " + ")],
+            [join(characters)],
+            inputs_md,
+            [join(unicode_names, " + ")],
         ])
     end
-    return Markdown.Table(entries, [:l, :l, :l, :l])
+    table = Markdown.Table(entries, [:l, :c, :l, :l])
+    # We also need to wrap the Table in a Markdown.MD "document"
+    return Markdown.MD([table])
 end
 
 table_entries(
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index c763d62680091..99f7ba088311d 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -16,17 +16,58 @@ introduce a "soft scope", which affects whether
 [shadowing](https://en.wikipedia.org/wiki/Variable_shadowing)
 a global variable by the same name is allowed or not.
 
-### [Scope constructs](@id man-scope-table)
+!!! info "Summary"
+    Variables defined in global scope may be undefined in inner local scopes,
+    depending on where the code is run, in order to balance safety and convenience.
+    The hard and soft local scoping rules define the interplay between global and local variables.
+
+    However, variables defined only in local scope behave consistently in all contexts.
+    If the variable is already defined, it will be reused. If the variable is not defined,
+    it will be made available to the current and inner scopes (but not outer scopes).
+
+!!! tip "A Common Confusion"
+    If you run into an unexpectedly undefined variable,
+
+    ```julia
+    # Print the numbers 1 through 5
+    i = 0
+    while i < 5
+        i += 1     # ERROR: UndefVarError: `i` not defined
+        println(i)
+    end
+    ```
+
+    a simple fix is to change all global variable definitions into local definitions
+    by wrapping the code in a `let` block or `function`.
+
+    ```julia
+    # Print the numbers 1 through 5
+    let i = 0
+        while i < 5
+            i += 1     # Now outer `i` is defined in the inner scope of the while loop
+            println(i)
+        end
+    end
+    ```
+
+    This is a common source of confusion when writing procedural scripts,
+    but it becomes a non-issue if code is moved inside functions
+    or executed interactively in the REPL.
+
+    See also the [`global`](@ref) and [`local`](@ref) keywords
+    to explicitly achieve any desired scoping behavior.
+
+### [Scope Constructs](@id man-scope-table)
 
 The constructs introducing scope blocks are:
 
-| Construct | Scope type | Allowed within |
-|:----------|:-----------|:---------------|
+| Construct | Scope Type Introduced | Scope Types Able to Contain Construct |
+|:----------|:----------------------|:--------------------------------------|
 | [`module`](@ref), [`baremodule`](@ref) | global | global |
-| [`struct`](@ref) | local (soft) | global |
-| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
+| [`struct`](@ref) | local (hard) | global |
 | [`macro`](@ref) | local (hard) | global |
-| functions, [`do`](@ref) blocks, [`let`](@ref) blocks, comprehensions, generators | local (hard) | global, local |
+| [`for`](@ref), [`while`](@ref), [`try`](@ref try) | local (soft) | global, local |
+| [`function`](@ref), [`do`](@ref), [`let`](@ref), [comprehensions](@ref man-comprehensions), [generators](@ref man-generators) | local (hard) | global, local |
 
 Notably missing from this table are
 [begin blocks](@ref man-compound-expressions) and [if blocks](@ref man-conditional-evaluation)
@@ -67,31 +108,7 @@ Each module introduces a new global scope, separate from the global scope of all
 is no all-encompassing global scope. Modules can introduce variables of other modules into their
 scope through the [using or import](@ref modules) statements or through qualified access using the
 dot-notation, i.e. each module is a so-called *namespace* as well as a first-class data structure
-associating names with values. Note that while variable bindings can be read externally, they can only
-be changed within the module to which they belong. As an escape hatch, you can always evaluate code
-inside that module to modify a variable; this guarantees, in particular, that module bindings cannot
-be modified externally by code that never calls `eval`.
-
-```jldoctest
-julia> module A
-           a = 1 # a global in A's scope
-       end;
-
-julia> module B
-           module C
-               c = 2
-           end
-           b = C.c    # can access the namespace of a nested global scope
-                      # through a qualified access
-           import ..A # makes module A available
-           d = A.a
-       end;
-
-julia> module D
-           b = a # errors as D's global scope is separate from A's
-       end;
-ERROR: UndefVarError: `a` not defined
-```
+associating names with values.
 
 If a top-level expression contains a variable declaration with keyword `local`,
 then that variable is not accessible outside that expression.
@@ -152,10 +169,10 @@ that location:
 1. **Existing local:** If `x` is *already a local variable*, then the existing local `x` is
    assigned;
 2. **Hard scope:** If `x` is *not already a local variable* and assignment occurs inside of any
-   hard scope construct (i.e. within a `let` block, function or macro body, comprehension, or
+   hard scope construct (i.e. within a `let` block, function, struct or macro body, comprehension, or
    generator), a new local named `x` is created in the scope of the assignment;
 3. **Soft scope:** If `x` is *not already a local variable* and all of the scope constructs
-   containing the assignment are soft scopes (loops, `try`/`catch` blocks, or `struct` blocks), the
+   containing the assignment are soft scopes (loops, `try`/`catch` blocks), the
    behavior depends on whether the global variable `x` is defined:
    * if global `x` is *undefined*, a new local named `x` is created in the scope of the
      assignment;
@@ -187,7 +204,7 @@ julia> greet()
 hello
 
 julia> x # global
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Inside of the `greet` function, the assignment `x = "hello"` causes `x` to be a new local variable
@@ -256,7 +273,7 @@ julia> sum_to(10)
 55
 
 julia> s # global
-ERROR: UndefVarError: `s` not defined
+ERROR: UndefVarError: `s` not defined in `Main`
 ```
 
 Since `s` is local to the function `sum_to`, calling the function has no effect on the global
@@ -343,7 +360,7 @@ hello
 hello
 
 julia> x
-ERROR: UndefVarError: `x` not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Since the global `x` is not defined when the `for` loop is evaluated, the first clause of the soft
@@ -408,7 +425,7 @@ julia> code = """
 julia> include_string(Main, code)
 ┌ Warning: Assignment to `s` in soft scope is ambiguous because a global variable by the same name exists: `s` will be treated as a new local. Disambiguate by using `local s` to suppress this warning or `global s` to assign to the existing global variable.
 └ @ string:4
-ERROR: LoadError: UndefVarError: `s` not defined
+ERROR: LoadError: UndefVarError: `s` not defined in local scope
 ```
 
 Here we use [`include_string`](@ref), to evaluate `code` as though it were the contents of a file.
@@ -559,7 +576,7 @@ julia> let x = 1, z
            println("z: $z") # errors as z has not been assigned yet but is local
        end
 x: 1, y: -1
-ERROR: UndefVarError: `z` not defined
+ERROR: UndefVarError: `z` not defined in local scope
 ```
 
 The assignments are evaluated in order, with each right-hand side evaluated in the scope before
@@ -726,7 +743,7 @@ ERROR: invalid redefinition of constant x
 julia> const y = 1.0
 1.0
 
-julia> y = 2.0
+julia> const y = 2.0
 WARNING: redefinition of constant y. This may fail, cause incorrect answers, or produce other errors.
 2.0
 ```
@@ -738,34 +755,13 @@ julia> const z = 100
 julia> z = 100
 100
 ```
-The last rule applies to immutable objects even if the variable binding would change, e.g.:
-```julia-repl
-julia> const s1 = "1"
-"1"
-
-julia> s2 = "1"
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x00000000132c9638
- Ptr{UInt8} @0x0000000013dd3d18
-
-julia> s1 = s2
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x0000000013dd3d18
- Ptr{UInt8} @0x0000000013dd3d18
-```
-However, for mutable objects the warning is printed as expected:
+* if an assignment would change the mutable object to which the variable points (regardless of whether those two objects are deeply equal), a warning is printed:
 ```jldoctest
 julia> const a = [1]
 1-element Vector{Int64}:
  1
 
-julia> a = [1]
+julia> const a = [1]
 WARNING: redefinition of constant a. This may fail, cause incorrect answers, or produce other errors.
 1-element Vector{Int64}:
  1
@@ -786,7 +782,7 @@ f (generic function with 1 method)
 julia> f()
 1
 
-julia> x = 2
+julia> const x = 2
 WARNING: redefinition of constant x. This may fail, cause incorrect answers, or produce other errors.
 2
 
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 6c22719c1ce86..ad2c60a029032 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -59,10 +59,10 @@ name `δ` can be entered by typing `\delta`-*tab*, or even `α̂⁽²⁾` by `\a
 that you don't know how to type, the REPL help will tell you: just type `?` and
 then paste the symbol.)
 
-Julia will even let you redefine built-in constants and functions if needed (although
-this is not recommended to avoid potential confusions):
+Julia will even let you shadow existing exported constants and functions with local ones
+(although this is not recommended to avoid potential confusions):
 
-```jldoctest
+```jldoctest; filter = r"with \d+ methods"
 julia> pi = 3
 3
 
@@ -71,6 +71,12 @@ julia> pi
 
 julia> sqrt = 4
 4
+
+julia> length() = 5
+length (generic function with 1 method)
+
+julia> Base.length
+length (generic function with 79 methods)
 ```
 
 However, if you try to redefine a built-in constant or function already in use, Julia will give
@@ -104,25 +110,24 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some
 can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f`
 will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed
 as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron`
-to define `⊗` as an infix Kronecker product).  Operators can also be suffixed with modifying marks,
+to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks,
 primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`.
 A space is required between an operator that ends with a subscript/superscript letter and a subsequent
 variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be written as `+ᵃ x` to distinguish
 it from `+ ᵃx` where `ᵃx` is the variable name.
 
 
-A particular class of variable names is one that contains only underscores. These identifiers can only be assigned values, which are immediately discarded, and cannot therefore be used to assign values to other variables (i.e., they cannot be used as [`rvalues`](https://en.wikipedia.org/wiki/Value_(computer_science)#Assignment:_l-values_and_r-values)) or use the last value
-assigned to them in any way.
+A particular class of variable names is one that contains only underscores. These identifiers are write-only. I.e. they can only be assigned values, which are immediately discarded, and their values cannot be used in any way.
 
 ```julia-repl
 julia> x, ___ = size([2 2; 1 1])
 (2, 2)
 
 julia> y = ___
-ERROR: syntax: all-underscore identifier used as rvalue
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
 
 julia> println(___)
-ERROR: syntax: all-underscore identifier used as rvalue
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
 ```
 
 The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref Keywords):
@@ -151,7 +156,7 @@ The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign
 
 An assignment `variable = value` "binds" the name `variable` to the `value` computed
 on the right-hand side, and the whole assignment is treated by Julia as an expression
-equal to the right-hand-side `value`.  This means that assignments can be *chained*
+equal to the right-hand-side `value`. This means that assignments can be *chained*
 (the same `value` assigned to multiple variables with `variable1 = variable2 = value`)
 or used in other expressions, and is also why their result is shown in the REPL as
 the value of the right-hand side.  (In general, the REPL displays the value of whatever
@@ -170,7 +175,7 @@ julia> b
 ```
 
 A common confusion is the distinction between *assignment* (giving a new "name" to a value)
-and *mutation* (changing a value).  If you run `a = 2` followed by `a = 3`, you have changed
+and *mutation* (changing a value). If you run `a = 2` followed by `a = 3`, you have changed
 the "name" `a` to refer to a new value `3` … you haven't changed the number `2`, so `2+2`
 will still give `4` and not `6`!   This distinction becomes more clear when dealing with
 *mutable* types like [arrays](@ref lib-arrays), whose contents *can* be changed:
@@ -208,11 +213,13 @@ julia> b   # b refers to the original array object, which has been mutated
   3
 ```
 That is, `a[i] = value` (an alias for [`setindex!`](@ref)) *mutates* an existing array object
-in memory, accessible via either `a` or `b`.  Subsequently setting `a = 3.14159`
+in memory, accessible via either `a` or `b`. Subsequently setting `a = 3.14159`
 does not change this array, it simply binds `a` to a different object; the array is still
-accessible via `b`. The other common syntax to mutate an existing object is
+accessible via `b`. Another common syntax to mutate an existing object is
 `a.field = value` (an alias for [`setproperty!`](@ref)), which can be used to change
-a [`mutable struct`](@ref).
+a [`mutable struct`](@ref). There is also mutation via dot assignment, for example
+`b .= 5:7` (which mutates our array `b` in-place to contain `[5,6,7]`), as part of Julia's
+[vectorized "dot" syntax](@ref man-dot-operators).
 
 When you call a [function](@ref man-functions) in Julia, it behaves as if you *assigned*
 the argument values to new variable names corresponding to the function arguments, as discussed
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index 4085a51ff9131..bfc526edbf8dd 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -10,57 +10,40 @@ your experience at the command line.
 
 ### A basic editor/REPL workflow
 
-The most basic Julia workflows involve using a text editor in conjunction with the `julia` command
-line. A common pattern includes the following elements:
+The most basic Julia workflows involve using a text editor in conjunction with the `julia` command line.
 
-  * **Put code under development in a temporary module.** Create a file, say `Tmp.jl`, and include
-    within it
+Create a file, say `Tmp.jl`, and include within it
+```julia
+module Tmp
 
-    ```julia
-    module Tmp
-    export say_hello
+say_hello() = println("Hello!")
 
-    say_hello() = println("Hello!")
+# Your other definitions here
 
-    # your other definitions here
+end # module
 
-    end
-    ```
-  * **Put your test code in another file.** Create another file, say `tst.jl`, which looks like
+using .Tmp
+```
+Then, in the same directory, start the Julia REPL (using the `julia` command).
+Run the new file as follows:
+```
+julia> include("Tmp.jl")
 
-    ```julia
-    include("Tmp.jl")
-    import .Tmp
-    # using .Tmp # we can use `using` to bring the exported symbols in `Tmp` into our namespace
+julia> Tmp.say_hello()
+Hello!
+```
+Explore ideas in the REPL. Save good ideas in `Tmp.jl`.
+To reload the file after it has been changed, just `include` it again.
 
-    Tmp.say_hello()
-    # say_hello()
+The key in the above is that your code is encapsulated in a module.
+That allows you to edit `struct` definitions and remove methods, without restarting Julia.
 
-    # your other test code here
-    ```
+(Explanation: `struct`s cannot be edited after definition, nor can methods be deleted.
+But you _can_ overwrite the definition of a module, which is what we do when we re-`include("Tmp.jl")`).
 
-    and includes tests for the contents of `Tmp`.
-    Alternatively, you can wrap the contents of your test file in a module, as
+In addition, the encapsulation of code in a module protects it from being influenced
+by previous state in the REPL, protecting you from hard-to-detect errors.
 
-    ```julia
-    module Tst
-        include("Tmp.jl")
-        import .Tmp
-        #using .Tmp
-
-        Tmp.say_hello()
-        # say_hello()
-
-        # your other test code here
-    end
-    ```
-
-    The advantage is that your testing code is now contained in a module and does not use the global scope in `Main` for
-    definitions, which is a bit more tidy.
-
-  * `include` the `tst.jl` file in the Julia REPL with `include("tst.jl")`.
-
-  * **Lather. Rinse. Repeat.** Explore ideas at the `julia` command prompt. Save good ideas in `tst.jl`. To execute `tst.jl` after it has been changed, just `include` it again.
 
 ## Browser-based workflow
 
@@ -131,5 +114,5 @@ the following modifications:
    ```
 
    You can iteratively modify the code in MyPkg in your editor and re-run the
-   tests with `include("runtests.jl")`.  You generally should not need to restart
+   tests with `include("runtests.jl")`. You generally should not need to restart
    your Julia session to see the changes take effect (subject to a few [limitations](https://timholy.github.io/Revise.jl/stable/limitations/)).
diff --git a/julia.spdx.json b/julia.spdx.json
index bea7bdc6c3a5d..0d7ab1df94688 100644
--- a/julia.spdx.json
+++ b/julia.spdx.json
@@ -86,6 +86,18 @@
             "copyrightText": "Copyright (c) 2020 Stefan Karpinski <stefan@karpinski.org> and contributors",
             "summary": "ArgTools provides tools for creating consistent, flexible APIs that work with various kinds of function arguments."
         },
+        {
+            "name": "LinearAlgebra.jl",
+            "SPDXID": "SPDXRef-JuliaLinearAlgebra",
+            "downloadLocation": "git+https://github.com/JuliaLang/LinearAlgebra.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://juliastats.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/LinearAlgebra.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2009-2024: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
+            "summary": "Development repository for the LinearAlgebra standard library (stdlib) that ships with Julia."
+        },
         {
             "name": "Tar.jl",
             "SPDXID": "SPDXRef-JuliaTar",
@@ -219,16 +231,16 @@
             "summary": "libssh2 is a library implementing the SSH2 protocol, available under the revised BSD license."
         },
         {
-            "name": "mbedtls",
-            "SPDXID": "SPDXRef-mbedtls",
-            "downloadLocation": "git+https://github.com/ARMmbed/mbedtls.git",
+            "name": "OpenSSL",
+            "SPDXID": "SPDXRef-OpenSSL",
+            "downloadLocation": "git+https://github.com/openssl/openssl.git",
             "filesAnalyzed": false,
-            "homepage": "https://tls.mbed.org",
-            "sourceInfo": "The version in use can be found in the file deps/mbedtls.version",
+            "homepage": "https://www.openssl.org",
+            "sourceInfo": "The version in use can be found in the file deps/openssl.version",
             "licenseConcluded": "Apache-2.0",
             "licenseDeclared": "Apache-2.0",
-            "copyrightText": "NOASSERTION",
-            "summary": "An open source, portable, easy to use, readable and flexible SSL library."
+            "copyrightText": "Copyright (c) 1998-2024 The OpenSSL Project Authors. Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson.",
+            "summary": "OpenSSL is a robust, commercial-grade, full-featured Open Source Toolkit for the TLS (formerly SSL), DTLS and QUIC (currently client side only) protocols.",
         },
         {
             "name": "mpfr",
@@ -370,6 +382,32 @@
             "copyrightText": "Copyright © 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.",
             "summary": "utf8proc is a small, clean C library that provides Unicode normalization, case-folding, and other operations for data in the UTF-8 encoding."
         },
+        {
+            "name": "LibTracyClient",
+            "SPDXID": "SPDXRef-LibTracyClient",
+            "downloadLocation": "git+https://github.com/wolfpld/tracy.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/wolfpld/tracy",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/libtracyclient.version",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 2017-2024, Bartosz Taudul <wolf@nereid.pl>",
+            "summary": "A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.",
+            "comment": "LibTracyClient is an optional dependency that is not built by default"
+        },
+        {
+            "name": "ittapi",
+            "SPDXID": "SPDXRef-ittapi",
+            "downloadLocation": "git+https://github.com/intel/ittapi.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/intel/ittapi",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/ittapi.version",
+            "licenseConcluded": "BSD-3-Clause AND GPL-2.0-only",
+            "licenseDeclared": "BSD-3-Clause AND GPL-2.0-only",
+            "copyrightText": "Copyright (c) 2019 Intel Corporation",
+            "summary": "The Instrumentation and Tracing Technology (ITT) API enables your application to generate and control the collection of trace data during its execution across different Intel tools.",
+            "comment": "ITTAPI is an optional dependency that is not built by default"
+        },
         {
             "name": "7-Zip",
             "SPDXID": "SPDXRef-7zip",
@@ -522,7 +560,7 @@
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
         {
-            "spdxElementId": "SPDXRef-mbedtls",
+            "spdxElementId": "SPDXRef-OpenSSL",
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
@@ -581,6 +619,16 @@
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
+        {
+            "spdxElementId": "SPDXRef-LibTracyClient",
+            "relationshipType": "OPTIONAL_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-ittapi",
+            "relationshipType": "OPTIONAL_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
         {
             "spdxElementId": "SPDXRef-7zip",
             "relationshipType": "RUNTIME_DEPENDENCY_OF",
diff --git a/pkgimage.mk b/pkgimage.mk
index 0803a188851bb..78b2618be549f 100644
--- a/pkgimage.mk
+++ b/pkgimage.mk
@@ -2,132 +2,36 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(SRCDIR)
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
 
-VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 
 # set some influential environment variables
-export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
-export JULIA_LOAD_PATH := @stdlib
+export JULIA_DEPOT_PATH := $(shell echo $(call cygpath_w,$(build_prefix)/share/julia))
+export JULIA_LOAD_PATH := @stdlib$(PATHSEP)$(shell echo $(call cygpath_w,$(JULIAHOME)/stdlib))
 unexport JULIA_PROJECT :=
 unexport JULIA_BINDIR :=
 
+export JULIA_FALLBACK_REPL := true
+
 default: release
-release: all-release
-debug: all-debug
+release: $(BUILDDIR)/stdlib/release.image
+debug: $(BUILDDIR)/stdlib/debug.image
 all: release debug
 
-$(JULIA_DEPOT_PATH):
+$(JULIA_DEPOT_PATH)/compiled:
 	mkdir -p $@
 
 print-depot-path:
 	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH')
 
-STDLIBS := ArgTools Artifacts Base64 CRC32c FileWatching Libdl NetworkOptions SHA Serialization \
-		   GMP_jll LLVMLibUnwind_jll LibUV_jll LibUnwind_jll MbedTLS_jll OpenLibm_jll PCRE2_jll \
-		   Zlib_jll dSFMT_jll libLLVM_jll libblastrampoline_jll OpenBLAS_jll Printf Random Tar \
-		   LibSSH2_jll MPFR_jll LinearAlgebra Dates Distributed Future LibGit2 Profile SparseArrays UUIDs \
-		   SharedArrays TOML Test LibCURL Downloads Pkg Dates LazyArtifacts Sockets Unicode Markdown \
-		   InteractiveUtils REPL DelimitedFiles
-
-all-release: $(addprefix cache-release-, $(STDLIBS))
-all-debug:   $(addprefix cache-debug-, $(STDLIBS))
-
-define pkgimg_builder
-$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
-    $$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1)
-$$(BUILDDIR)/stdlib/$1.release.image: $$($1_SRCS) $$(addsuffix .release.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-	touch $$@
-cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
-$$(BUILDDIR)/stdlib/$1.debug.image: $$($1_SRCS) $$(addsuffix .debug.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
-	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
-cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
-.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
-endef
-
-# Used to just define them in the dependency graph
-# reside in the system image
-define sysimg_builder
-$$(BUILDDIR)/stdlib/$1.release.image:
-	touch $$@
-cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
-$$(BUILDDIR)/stdlib/$1.debug.image:
-	touch $$@
-cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
-.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
-endef
-
-# no dependencies
-$(eval $(call pkgimg_builder,MozillaCACerts_jll,))
-$(eval $(call sysimg_builder,ArgTools,))
-$(eval $(call sysimg_builder,Artifacts,))
-$(eval $(call sysimg_builder,Base64,))
-$(eval $(call sysimg_builder,CRC32c,))
-$(eval $(call sysimg_builder,FileWatching,))
-$(eval $(call sysimg_builder,Libdl,))
-$(eval $(call sysimg_builder,Logging,))
-$(eval $(call sysimg_builder,Mmap,))
-$(eval $(call sysimg_builder,NetworkOptions,))
-$(eval $(call sysimg_builder,SHA,))
-$(eval $(call sysimg_builder,Serialization,))
-$(eval $(call sysimg_builder,Sockets,))
-$(eval $(call sysimg_builder,Unicode,))
-$(eval $(call pkgimg_builder,Profile,))
-
-# 1-depth packages
-$(eval $(call pkgimg_builder,GMP_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LLVMLibUnwind_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibUV_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibUnwind_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,MbedTLS_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,nghttp2_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,OpenLibm_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,PCRE2_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,Zlib_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,dSFMT_jll,Artifacts Libdl))
-$(eval $(call pkgimg_builder,libLLVM_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,libblastrampoline_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,OpenBLAS_jll,Artifacts Libdl))
-$(eval $(call sysimg_builder,Markdown,Base64))
-$(eval $(call sysimg_builder,Printf,Unicode))
-$(eval $(call sysimg_builder,Random,SHA))
-$(eval $(call sysimg_builder,Tar,ArgTools,SHA))
-$(eval $(call pkgimg_builder,DelimitedFiles,Mmap))
-
-# 2-depth packages
-$(eval $(call pkgimg_builder,LLD_jll,Zlib_jll libLLVM_jll Artifacts Libdl))
-$(eval $(call pkgimg_builder,LibSSH2_jll,Artifacts Libdl MbedTLS_jll))
-$(eval $(call pkgimg_builder,MPFR_jll,Artifacts Libdl GMP_jll))
-$(eval $(call sysimg_builder,LinearAlgebra,Libdl libblastrampoline_jll OpenBLAS_jll))
-$(eval $(call sysimg_builder,Dates,Printf))
-$(eval $(call pkgimg_builder,Distributed,Random Serialization Sockets))
-$(eval $(call sysimg_builder,Future,Random))
-$(eval $(call sysimg_builder,InteractiveUtils,Markdown))
-$(eval $(call sysimg_builder,LibGit2,NetworkOptions Printf SHA Base64))
-$(eval $(call sysimg_builder,UUIDs,Random SHA))
-
- # 3-depth packages
- # LibGit2_jll
-$(eval $(call pkgimg_builder,LibCURL_jll,LibSSH2_jll nghttp2_jll MbedTLS_jll Zlib_jll Artifacts Libdl))
-$(eval $(call sysimg_builder,REPL,InteractiveUtils Markdown Sockets Unicode))
-$(eval $(call pkgimg_builder,SharedArrays,Distributed Mmap Random Serialization))
-$(eval $(call sysimg_builder,TOML,Dates))
-$(eval $(call pkgimg_builder,Test,Logging Random Serialization InteractiveUtils))
-
-# 4-depth packages
-$(eval $(call sysimg_builder,LibCURL,LibCURL_jll MozillaCACerts_jll))
-
-# 5-depth packages
-$(eval $(call sysimg_builder,Downloads,ArgTools FileWatching LibCURL NetworkOptions))
-
-# 6-depth packages
-$(eval $(call sysimg_builder,Pkg,Dates LibGit2 Libdl Logging Printf Random SHA UUIDs)) # Markdown REPL
+$(BUILDDIR)/stdlib/%.image: $(JULIAHOME)/stdlib/Project.toml $(JULIAHOME)/stdlib/Manifest.toml $(INDEPENDENT_STDLIBS_SRCS) $(JULIA_DEPOT_PATH)/compiled
+	@$(call PRINT_JULIA, JULIA_CPU_TARGET="$(JULIA_CPU_TARGET)" $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e \
+		'Base.Precompilation.precompilepkgs(configs=[``=>Base.CacheFlags(debug_level=2, opt_level=3), ``=>Base.CacheFlags(check_bounds=1, debug_level=2, opt_level=3)])')
+	touch $@
 
-# 7-depth packages
-$(eval $(call pkgimg_builder,LazyArtifacts,Artifacts Pkg))
+$(BUILDDIR)/stdlib/release.image: $(build_private_libdir)/sys.$(SHLIB_EXT)
+$(BUILDDIR)/stdlib/debug.image: $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 
-$(eval $(call pkgimg_builder,SparseArrays,Libdl LinearAlgebra Random Serialization))
-# SuiteSparse_jll
-# Statistics
+clean:
+	rm -rf $(JULIA_DEPOT_PATH)/compiled
+	rm -f $(BUILDDIR)/stdlib/*.image
diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index f06d4362bf958..86b0bdb27638b 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -7,16 +7,11 @@
 #include <llvm/Support/MathExtras.h>
 
 #include "APInt-C.h"
-#include "julia.h"
 #include "julia_assert.h"
 #include "julia_internal.h"
 
 using namespace llvm;
 
-inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
-    return alignTo(Value, Align, Skew);
-}
-
 const unsigned int integerPartWidth = llvm::APInt::APINT_BITS_PER_WORD;
 const unsigned int host_char_bit = 8;
 
@@ -25,15 +20,15 @@ const unsigned int host_char_bit = 8;
     APInt s; \
     if ((numbits % integerPartWidth) != 0) { \
         /* use LLT_ALIGN to round the memory area up to the nearest integerPart-sized chunk */ \
-        unsigned nbytes = RoundUpToAlignment(numbits, integerPartWidth) / host_char_bit; \
+        unsigned nbytes = alignTo(numbits, integerPartWidth) / host_char_bit; \
         integerPart *data_a64 = (integerPart*)alloca(nbytes); \
         /* TODO: this memcpy assumes little-endian,
          * for big-endian, need to align the copy to the other end */ \
-        memcpy(data_a64, p##s, RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
-        s = APInt(numbits, makeArrayRef(data_a64, nbytes / sizeof(integerPart))); \
+        memcpy(data_a64, p##s, alignTo(numbits, host_char_bit) / host_char_bit); \
+        s = APInt(numbits, ArrayRef<uint64_t>(data_a64, nbytes / sizeof(integerPart))); \
     } \
     else { \
-        s = APInt(numbits, makeArrayRef(p##s, numbits / integerPartWidth)); \
+        s = APInt(numbits, ArrayRef<uint64_t>(p##s, numbits / integerPartWidth)); \
     }
 
 /* assign to "integerPart *pr" from "APInt a" */
@@ -47,7 +42,7 @@ const unsigned int host_char_bit = 8;
     else if (numbits <= 64) \
         *(uint64_t*)p##r = a.getZExtValue(); \
     else \
-        memcpy(p##r, a.getRawData(), RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
+        memcpy(p##r, a.getRawData(), alignTo(numbits, host_char_bit) / host_char_bit); \
 
 extern "C" JL_DLLEXPORT
 void LLVMNeg(unsigned numbits, integerPart *pa, integerPart *pr) {
@@ -313,17 +308,25 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
     ASSIGN(r, a)
 }
 
-void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
+extern "C" float julia_half_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_half(float param) JL_NOTSAFEPOINT;
+extern "C" float julia_bfloat_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_bfloat(float param) JL_NOTSAFEPOINT;
+
+void LLVMFPtoInt(jl_datatype_t *ty, void *pa, jl_datatype_t *oty, integerPart *pr, bool isSigned, bool *isExact) {
     double Val;
-    if (numbits == 16)
-        Val = julia__gnu_h2f_ieee(*(uint16_t*)pa);
-    else if (numbits == 32)
+    if (ty == jl_float16_type)
+        Val = julia_half_to_float(*(uint16_t*)pa);
+    else if (ty == jl_bfloat16_type)
+        Val = julia_bfloat_to_float(*(uint16_t*)pa);
+    else if (ty == jl_float32_type)
         Val = *(float*)pa;
-    else if (numbits == 64)
+    else if (jl_float64_type)
         Val = *(double*)pa;
     else
         jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned onumbytes = jl_datatype_size(oty);
+    unsigned onumbits = onumbytes * host_char_bit;
     if (onumbits <= 64) { // fast-path, if possible
         if (isSigned) {
             int64_t ia = Val;
@@ -350,7 +353,7 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
         APFloat a(Val);
         bool isVeryExact;
         APFloat::roundingMode rounding_mode = APFloat::rmNearestTiesToEven;
-        unsigned nbytes = RoundUpToAlignment(onumbits, integerPartWidth) / host_char_bit;
+        unsigned nbytes = alignTo(onumbits, integerPartWidth) / host_char_bit;
         integerPart *parts = (integerPart*)alloca(nbytes);
         APFloat::opStatus status = a.convertToInteger(MutableArrayRef<integerPart>(parts, nbytes), onumbits, isSigned, rounding_mode, &isVeryExact);
         memcpy(pr, parts, onumbytes);
@@ -360,69 +363,78 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, NULL);
+void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, true, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, NULL);
+void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, false, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, true, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, false, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(true);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(false);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("UItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("SExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     int signbit = (inumbits - 1) % host_char_bit;
     int sign = ((unsigned char*)pa)[inumbytes - 1] & (1 << signbit) ? -1 : 0;
@@ -437,11 +449,12 @@ void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("ZExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     // copy over the input bytes
     memcpy(pr, pa, inumbytes);
@@ -454,31 +467,32 @@ void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMTrunc(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits < inumbits))
+void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes < inumbytes))
         jl_error("Trunc: output bitsize must be < input bitsize");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
     memcpy(pr, pa, onumbytes);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_8(uint8_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_8(uint8_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_16(uint16_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_16(uint16_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_32(uint32_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_32(uint32_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
-unsigned countTrailingZeros_64(uint64_t Val) {
-    return countTrailingZeros(Val);
+unsigned countr_zero_64(uint64_t Val) {
+    return countr_zero(Val);
 }
 
 extern "C" JL_DLLEXPORT
@@ -510,31 +524,31 @@ void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integer
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa) {
+unsigned LLVMPopcount(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countPopulation();
+    return a.popcount();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountr_one(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countTrailingOnes();
+    return a.countr_one();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countTrailingZeros();
+    return a.countr_zero();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountl_one(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countLeadingOnes();
+    return a.countl_one();
 }
 
 extern "C" JL_DLLEXPORT
-unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa) {
+unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa) {
     CREATE(a)
-    return a.countLeadingZeros();
+    return a.countl_zero();
 }
diff --git a/src/APInt-C.h b/src/APInt-C.h
index e71d49e82e99a..59ce3c765eeec 100644
--- a/src/APInt-C.h
+++ b/src/APInt-C.h
@@ -3,12 +3,15 @@
 #ifndef JL_APINT_C_H
 #define JL_APINT_C_H
 
+#include "julia.h"
+#include "dtypes.h"
+#include "llvm-version.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-#include "dtypes.h"
 
-#ifdef LLVM_VERSION_MAJOR
+#if defined(__cplusplus) && defined(LLVM_VERSION_MAJOR)
 using integerPart = llvm::APInt::WordType;
 #else
 typedef void integerPart;
@@ -51,30 +54,30 @@ JL_DLLEXPORT int LLVMDiv_uov(unsigned numbits, integerPart *pa, integerPart *pb,
 JL_DLLEXPORT int LLVMRem_sov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT int LLVMRem_uov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
-JL_DLLEXPORT unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa);
-JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_one(unsigned numbits, integerPart *pa);
+JL_DLLEXPORT unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa);
 
-JL_DLLEXPORT void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMZExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMTrunc(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
-JL_DLLEXPORT int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
 JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 
-JL_DLLEXPORT unsigned countTrailingZeros_8(uint8_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_16(uint16_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_32(uint32_t Val);
-JL_DLLEXPORT unsigned countTrailingZeros_64(uint64_t Val);
+JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val);
+JL_DLLEXPORT unsigned countr_zero_16(uint16_t Val);
+JL_DLLEXPORT unsigned countr_zero_32(uint32_t Val);
+JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val);
 
 //uint8_t getSwappedBytes_8(uint8_t Value); // no-op
 //uint16_t getSwappedBytes_16(uint16_t Value);
diff --git a/src/Makefile b/src/Makefile
index 9e34dfda1c4ed..b49d27e05ff28 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -26,7 +26,11 @@ endif
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
 ifeq ($(USECLANG),1)
-FLAGS += -Wno-return-type-c-linkage
+FLAGS += -Wno-return-type-c-linkage -Wno-atomic-alignment
+endif
+
+ifneq (${MMTK_PLAN},None)
+FLAGS += -I$(MMTK_API_INC)
 endif
 
 FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"'
@@ -40,23 +44,41 @@ ifeq ($(OS),FreeBSD)
 FLAGS += -I$(LOCALBASE)/include
 endif
 
+# GC source code. It depends on which GC implementation to use.
+GC_SRCS := gc-common gc-stacks gc-alloc-profiler gc-heap-snapshot
+ifneq (${MMTK_PLAN},None)
+GC_SRCS += gc-mmtk
+else
+GC_SRCS += gc-stock gc-debug gc-pages gc-page-profiler
+endif
+
 SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
-	dlload sys init task array staticdata toplevel jl_uv datatype \
-	simplevector runtime_intrinsics precompile jloptions \
-	threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
-	jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
-	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
+	dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \
+	simplevector runtime_intrinsics precompile jloptions mtarraylist \
+	threading scheduler stackwalk \
+	method jlapi signal-handling safepoint timing subtype rtutils \
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine \
+	$(GC_SRCS)
 
 RT_LLVMLINK :=
 CG_LLVMLINK :=
 
 ifeq ($(JULIACODEGEN),LLVM)
-CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
-	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
-	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
+# Currently these files are used by both GCs. But we should make the list specific to stock, and MMTk should have its own implementation.
+GC_CODEGEN_SRCS := llvm-final-gc-lowering llvm-late-gc-lowering llvm-gc-invariant-verifier
+ifneq (${MMTK_PLAN},None)
+FLAGS += -I$(MMTK_API_INC)
+GC_CODEGEN_SRCS += llvm-late-gc-lowering-mmtk
+else
+GC_CODEGEN_SRCS += llvm-late-gc-lowering-stock
+endif
+CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
+	llvm-pass-helpers llvm-ptls \
+	llvm-lower-handlers llvm-propagate-addrspaces \
 	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api \
+	$(GC_CODEGEN_SRCS)
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 CG_LLVM_LIBS := all
 ifeq ($(USE_POLLY),1)
@@ -77,7 +99,7 @@ else
 # JULIACODEGEN != LLVM
 endif
 
-RT_LLVM_LIBS := support
+RT_LLVM_LIBS := support targetparser
 
 ifeq ($(OS),WINNT)
 SRCS += win32_ucontext
@@ -99,7 +121,12 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+ifneq (${MMTK_PLAN},None)
+	PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-mmtk.h)
+else
+	PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,gc-tls-stock.h)
+endif
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
@@ -110,6 +137,10 @@ PUBLIC_HEADER_TARGETS := $(addprefix $(build_includedir)/julia/,$(notdir $(PUBLI
 LLVM_LDFLAGS := $(shell $(LLVM_CONFIG_HOST) --ldflags)
 LLVM_CXXFLAGS := $(shell $(LLVM_CONFIG_HOST) --cxxflags)
 
+ifeq ($(OS)_$(BINARY),WINNT_32)
+LLVM_CXXFLAGS += -I$(SRCDIR)/support/win32-clang-ABI-bug
+endif
+
 # llvm-config --cxxflags does not return -DNDEBUG
 ifeq ($(shell $(LLVM_CONFIG_HOST) --assertion-mode),OFF)
 LLVM_CXXFLAGS += -DNDEBUG
@@ -119,8 +150,8 @@ ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
 # USE_SYSTEM_LLVM != 0
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
-LLVM_SHLIB_SYMBOL_VERSION := $(shell nm -D --with-symbol-versions $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
-                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -e 's/.*@//')
+LLVM_SHLIB_SYMBOL_VERSION := $(shell readelf -W --dyn-syms $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
+                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -ne 's/.*@//p')
 
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
@@ -153,15 +184,15 @@ endif
 CLANG_LDFLAGS := $(LLVM_LDFLAGS)
 ifeq ($(OS), Darwin)
 CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup
-OSLIBS += -Wl,-U,__dyld_atfork_parent -Wl,-U,__dyld_atfork_prepare -Wl,-U,__dyld_dlopen_atfork_parent -Wl,-U,__dyld_dlopen_atfork_prepare
+OSLIBS += -Wl,-U,__dyld_atfork_parent -Wl,-U,__dyld_atfork_prepare -Wl,-U,__dyld_dlopen_atfork_parent -Wl,-U,__dyld_dlopen_atfork_prepare -Wl,-U,_jl_image_pointers -Wl,-U,_jl_system_image_data -Wl,-U,_jl_system_image_size
 LIBJULIA_PATH_REL := @rpath/libjulia
 else
 LIBJULIA_PATH_REL := libjulia
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
-CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
+RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB)
+CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB)
 RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
 CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
 RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
@@ -223,7 +254,7 @@ $(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc
 	# to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`.  This avoids
 	# potential confusion with debugging tools, when inspecting a process that has both `libjulia` and `libjulia-internal`
 	# loaded at the same time.
-	grep 'XX(.\+)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
+	grep 'XX(..*)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
 
 # source file rules
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
@@ -252,6 +283,8 @@ $(build_includedir)/julia/uv/*.h: $(LIBUV_INC)/uv/*.h | $(build_includedir)/juli
 	$(INSTALL_F) $^ $(build_includedir)/julia/uv
 
 libccalltest: $(build_shlibdir)/libccalltest.$(SHLIB_EXT)
+libccalllazyfoo: $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+libccalllazybar: $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT)
 libllvmcalltest: $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT)
 
 ifeq ($(OS), Linux)
@@ -276,6 +309,12 @@ endif
 	mv $@.tmp $@
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
+$(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazyfoo.$(SHLIB_EXT)))
+
+$(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
+
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
 
@@ -294,17 +333,20 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 $(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
 $(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
-$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
+$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/idset.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
 	intrinsics.cpp jitlayers.h intrinsics.h llvm-codegen-shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
 $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
-$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
-$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
-$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/gc-mmtk.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-stacks.o $(BUILDDIR)/gc-stacks.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h
@@ -314,10 +356,10 @@ $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDI
 $(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
 $(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
 $(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
@@ -328,10 +370,10 @@ $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,pr
 $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
 $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/precompile_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
+$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
 $(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
 
-$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
+$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc-common.o gc-stock.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
@@ -368,13 +410,13 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 
 CXXLD = $(CXX) -shared
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_CONFIG_ABSOLUTE)
 	sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \
 		        -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/"
 
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -402,7 +444,7 @@ libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
 $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -459,6 +501,8 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
 ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
 ifeq ($(OS),Darwin)
 ANALYSIS_DEPS += llvmunwind
+else ifeq ($(OS),OpenBSD)
+ANALYSIS_DEPS += llvmunwind
 else ifneq ($(OS),WINNT)
 ANALYSIS_DEPS += unwind
 endif
@@ -481,8 +525,6 @@ SA_EXCEPTIONS-jloptions.c                   := -Xanalyzer -analyzer-config -Xana
 SA_EXCEPTIONS-subtype.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
 SA_EXCEPTIONS-codegen.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
  # these need to be annotated (and possibly fixed)
-SKIP_IMPLICIT_ATOMICS := staticdata.c
- # these need to be annotated (and possibly fixed)
 SKIP_GC_CHECK := codegen.cpp rtutils.c
 
 # make sure LLVM's invariant information is not discarded with -DNDEBUG
@@ -520,10 +562,15 @@ clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB
 		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
 
 # set the exports for the source files based on where they are getting linked
-clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS
+$(addprefix clang-sa-,$(SRCS)):      DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sagc-,$(SRCS)):    DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-tidy-,$(SRCS)):    DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sa-,$(CODEGEN_SRCS)):   DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-sagc-,$(CODEGEN_SRCS)): DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-tidy-,$(CODEGEN_SRCS)): DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
 
 # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
-tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS)))
+tidysrc: $(addprefix clang-tidy-,$(CODEGEN_SRCS) $(SRCS))
 analyzesrc: $(addprefix clang-sa-,$(CODEGEN_SRCS) $(SRCS))
 analyzegc: $(addprefix clang-sagc-,$(filter-out $(basename $(SKIP_GC_CHECK)),$(CODEGEN_SRCS) $(SRCS)))
 analyze: analyzesrc analyzegc tidysrc
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 514c3c5a81a6d..0a193ee132556 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
 Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
     if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
@@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
@@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return nullptr;
     return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
@@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
     // uniquely addressable members.
     // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
     size_t dsz = jl_datatype_size(dt);
-    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
+    if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return NULL;
     nele = 0;
     ElementType eltype;
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 441aa95b1fdf6..8839a37da6e13 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
     if (jl_is_structtype(dt)) {
         // Fast path checks before descending the type hierarchy
         // (4 x 128b vector == 64B max size)
-        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->haspadding)
+        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
             return 0;
 
         base = NULL;
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 2e18acdbd4f4b..f02e1022ddc2d 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
 // count the homogeneous floating aggregate size (saturating at max count of 8)
 unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
 {
-    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->haspadding)
+    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
         return 9;
 
     size_t i, l = ty->layout->nfields;
@@ -118,7 +118,12 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *T
 Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
-    size_t size = jl_datatype_size(dt);
+
+    // LLVM passes Float16 in floating-point registers, but this doesn't match the ABI.
+    // No C compiler seems to support _Float16 yet, so in the meantime, pass as i16
+    if (dt == jl_float16_type || dt == jl_bfloat16_type)
+        return Type::getInt16Ty(ctx);
+
     // don't need to change bitstypes
     if (!jl_datatype_nfields(dt))
         return NULL;
@@ -143,6 +148,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
     }
     // rewrite integer-sized (non-HFA) struct to an array
     // the bitsize of the integer gives the desired alignment
+    size_t size = jl_datatype_size(dt);
     if (size > 8) {
         if (jl_datatype_align(dt) <= 8) {
             Type  *T_int64 = Type::getInt64Ty(ctx);
diff --git a/src/abi_riscv.cpp b/src/abi_riscv.cpp
new file mode 100644
index 0000000000000..cbd85892801c8
--- /dev/null
+++ b/src/abi_riscv.cpp
@@ -0,0 +1,315 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+//===----------------------------------------------------------------------===//
+//
+// The ABI implementation used for RISC-V targets.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Procedure Call Standard can be found here:
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
+//
+// This code is based on:
+// - The Rust implementation:
+//    https://github.com/rust-lang/rust/blob/master/compiler/rustc_target/src/abi/call/riscv.rs
+// - The LLVM RISC-V backend:
+//   https://github.com/llvm/llvm-project/blob/78533528cf5ed04ac78722afff7c9f2f91aa8359/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L10865
+//
+//===----------------------------------------------------------------------===//
+
+
+struct ABI_RiscvLayout : AbiLayout {
+
+static const size_t XLen = 8;
+static const size_t FLen = 8;
+static const int NumArgGPRs = 8;
+static const int NumArgFPRs = 8;
+
+// available register num is needed to determine if fp pair or int-fp pair in a struct should be unpacked
+// WARN: with this, use_sret must only be called once before the next
+// needPassByRef call, otherwise avail_gprs is wrong
+int avail_gprs, avail_fprs;
+
+// preferred type is determined in the same time of use_sret & needPassByRef
+// cache it here to avoid computing it again in preferred_llvm_type
+Type *cached_llvmtype = NULL;
+
+ABI_RiscvLayout() : avail_gprs(NumArgGPRs), avail_fprs(NumArgFPRs) {}
+
+enum RegPassKind { UNKNOWN = 0, INTEGER = 1, FLOAT = 2 };
+
+struct ElementType {
+    RegPassKind type;
+    jl_datatype_t *dt;
+    ElementType() : type(RegPassKind::UNKNOWN), dt(NULL) {};
+};
+
+bool is_floattype(jl_datatype_t *dt) const
+{
+    return dt == jl_float16_type || dt == jl_float32_type || dt == jl_float64_type;
+}
+
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
+{
+    assert(is_floattype(dt));
+    switch (jl_datatype_size(dt)) {
+    case 2: return Type::getHalfTy(ctx);
+    case 4: return Type::getFloatTy(ctx);
+    case 8: return Type::getDoubleTy(ctx);
+    case 16: return Type::getFP128Ty(ctx);
+    default: assert(0 && "abi_riscv: unsupported floating point type"); return NULL;
+    }
+}
+
+// for primitive types that can be passed as integer
+// includes integer, bittypes, pointer
+Type *get_llvm_inttype(jl_datatype_t *dt, LLVMContext &ctx) const
+{
+    assert(jl_is_primitivetype(dt));
+    // XXX: without Zfh, Float16 is passed in integer registers
+    if (dt == jl_float16_type)
+        return Type::getInt32Ty(ctx);
+    assert(!is_floattype(dt));
+    if (dt == jl_bool_type)
+        return getInt8Ty(ctx);
+    if (dt == jl_int32_type)
+        return getInt32Ty(ctx);
+    if (dt == jl_int64_type)
+        return getInt64Ty(ctx);
+    int nb = jl_datatype_size(dt);
+    return Type::getIntNTy(ctx, nb * 8);
+}
+
+bool should_use_fp_conv(jl_datatype_t *dt, ElementType &ele1, ElementType &ele2) const
+{
+    if (jl_is_primitivetype(dt)) {
+        size_t dsz = jl_datatype_size(dt);
+        if (dsz > FLen) {
+            return false;
+        }
+        if (is_floattype(dt)) {
+            if (ele1.type == RegPassKind::UNKNOWN) {
+                ele1.type = RegPassKind::FLOAT;
+                ele1.dt = dt;
+            }
+            else if (ele2.type == RegPassKind::UNKNOWN) {
+                ele2.type = RegPassKind::FLOAT;
+                ele2.dt = dt;
+            }
+            else {
+                // 3 elements not eligible, must be a pair
+                return false;
+            }
+        }
+        // integer or pointer type or bitstypes
+        else {
+            if (ele1.type == RegPassKind::UNKNOWN) {
+                ele1.type = RegPassKind::INTEGER;
+                ele1.dt = dt;
+            }
+            else if (ele1.type == RegPassKind::INTEGER) {
+                // two integers not eligible
+                return false;
+            }
+            // ele1.type == RegPassKind::FLOAT
+            else {
+                if (ele2.type == RegPassKind::UNKNOWN) {
+                    ele2.type = RegPassKind::INTEGER;
+                    ele2.dt = dt;
+                }
+                else {
+                    // 3 elements not eligible, must be a pair
+                    return false;
+                }
+            }
+        }
+    }
+    else { // aggregates
+        while (size_t nfields = jl_datatype_nfields(dt)) {
+            size_t i;
+            size_t fieldsz;
+            for (i = 0; i < nfields; i++) {
+                if ((fieldsz = jl_field_size(dt, i))) {
+                    break;
+                }
+            }
+            assert(i < nfields);
+            // If there's only one non zero sized member, try again on this member
+            if (fieldsz == jl_datatype_size(dt)) {
+                dt = (jl_datatype_t *)jl_field_type(dt, i);
+                if (!jl_is_datatype(dt)) // could be inline union #46787
+                    return false;
+                continue;
+            }
+            for (; i < nfields; i++) {
+                size_t fieldsz = jl_field_size(dt, i);
+                if (fieldsz == 0)
+                    continue;
+                jl_datatype_t *fieldtype = (jl_datatype_t *)jl_field_type(dt, i);
+                if (!jl_is_datatype(dt)) // could be inline union
+                    return false;
+                // This needs to be done after the zero size member check
+                if (ele2.type != RegPassKind::UNKNOWN) {
+                    // we already have a pair and can't accept more elements
+                    return false;
+                }
+                if (!should_use_fp_conv(fieldtype, ele1, ele2)) {
+                    return false;
+                }
+            }
+            break;
+        }
+    }
+    // Tuple{Int,} can reach here as well, but doesn't really hurt
+    return true;
+}
+
+Type *get_llvm_inttype_byxlen(size_t xlen, LLVMContext &ctx) const
+{
+    if (xlen == 8) {
+        return getInt64Ty(ctx);
+    }
+    else if (xlen == 4) {
+        return getInt32Ty(ctx);
+    }
+    else {
+        assert(0 && "abi_riscv: unsupported xlen");
+        return NULL;
+    }
+}
+
+Type *classify_arg(jl_datatype_t *ty, int &avail_gprs, int &avail_fprs, bool &onstack,
+                   LLVMContext &ctx) const
+{
+    onstack = false;
+    if (ty == jl_nothing_type) {
+        return NULL;
+    }
+    ElementType ele1, ele2;
+    if (should_use_fp_conv(ty, ele1, ele2)) {
+        if (ele1.type == RegPassKind::FLOAT) {
+            if (ele2.type == RegPassKind::FLOAT) {
+                if (avail_fprs >= 2) {
+                    avail_fprs -= 2;
+                    SmallVector<Type *, 2> eles;
+                    eles.push_back(get_llvm_fptype(ele1.dt, ctx));
+                    eles.push_back(get_llvm_fptype(ele2.dt, ctx));
+                    return StructType::get(ctx, eles);
+                }
+            }
+            else if (ele2.type == RegPassKind::INTEGER) {
+                if (avail_fprs >= 1 && avail_gprs >= 1) {
+                    avail_fprs -= 1;
+                    avail_gprs -= 1;
+                    SmallVector<Type *, 2> eles;
+                    eles.push_back(get_llvm_fptype(ele1.dt, ctx));
+                    eles.push_back(get_llvm_inttype(ele2.dt, ctx));
+                    return StructType::get(ctx, eles);
+                }
+            }
+            else {
+                // A struct containing just one floating-point real is passed
+                // as though it were a standalone floating-point real.
+                if (avail_fprs >= 1) {
+                    avail_fprs -= 1;
+                    return get_llvm_fptype(ele1.dt, ctx);
+                }
+            }
+        }
+        else if (ele1.type == RegPassKind::INTEGER) {
+            if (ele2.type == RegPassKind::FLOAT) {
+                if (avail_fprs >= 1 && avail_gprs >= 1) {
+                    avail_fprs -= 1;
+                    avail_gprs -= 1;
+                    return StructType::get(get_llvm_inttype(ele1.dt, ctx),
+                                           get_llvm_fptype(ele2.dt, ctx));
+                }
+            }
+        }
+    }
+    size_t dsz = jl_datatype_size(ty);
+    if (dsz > 2 * XLen) {
+        if (!jl_is_primitivetype(ty)) {
+            onstack = true;
+        }
+        // else let llvm backend handle scalars
+        if (avail_gprs >= 1) {
+            avail_gprs -= 1;
+        }
+        return NULL;
+    }
+
+    if (dsz > XLen) {
+        size_t alignment = jl_datatype_align(ty);
+        bool align_regs = alignment > XLen;
+        if (avail_gprs >= 2) {
+            avail_gprs -= 2;
+        }
+        // should we handle variadic as well?
+        // Variadic arguments with 2×XLEN-bit alignment and size at most 2×XLEN
+        // bits are passed in an aligned register pair
+        else {
+            avail_gprs = 0;
+        }
+
+        if (!jl_is_primitivetype(ty)) {
+            // Aggregates or scalars passed on the stack are aligned to the
+            // greater of the type alignment and XLen bits, but never more than
+            // the stack alignment.
+            if (align_regs) {
+                if (alignment == 16) {
+                    return Type::getInt128Ty(ctx);
+                }
+                else {
+                    return Type::getInt64Ty(ctx);
+                }
+            }
+            else {
+                return ArrayType::get(get_llvm_inttype_byxlen(XLen, ctx), 2);
+            }
+        }
+        // let llvm backend handle scalars
+        return NULL;
+    }
+
+    //else dsz <= XLen
+    if (avail_gprs >= 1) {
+        avail_gprs -= 1;
+    }
+    if (!jl_is_primitivetype(ty)) {
+        return get_llvm_inttype_byxlen(XLen, ctx);
+    }
+    return get_llvm_inttype(ty, ctx);
+}
+
+bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) override
+{
+    bool onstack = false;
+    int gprs = 2;
+    int fprs = FLen ? 2 : 0;
+    this->cached_llvmtype = classify_arg(ty, gprs, fprs, onstack, ctx);
+    if (onstack) {
+        this->avail_gprs -= 1;
+        return true;
+    }
+    else {
+        return false;
+    }
+}
+
+bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab, LLVMContext &ctx,
+                   Type *Ty) override
+{
+    bool onstack = false;
+    this->cached_llvmtype =
+        classify_arg(ty, this->avail_gprs, this->avail_fprs, onstack, ctx);
+    return onstack;
+}
+
+Type *preferred_llvm_type(jl_datatype_t *ty, bool isret,
+                          LLVMContext &ctx) const override
+{
+    return this->cached_llvmtype;
+}
+
+};
diff --git a/src/abi_win32.cpp b/src/abi_win32.cpp
index 078d9b6df4e44..ccfc6a16ebee3 100644
--- a/src/abi_win32.cpp
+++ b/src/abi_win32.cpp
@@ -52,7 +52,7 @@ bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     // Use pass by reference for all structs
-    if (dt->layout->nfields > 0) {
+    if (dt->layout->nfields > 0 || dt->layout->npointers) {
         ab.addByValAttr(Ty);
         return true;
     }
@@ -63,7 +63,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
 {
     // Arguments are either scalar or passed by value
     // rewrite integer sized (non-sret) struct to the corresponding integer
-    if (!dt->layout->nfields)
+    if (!dt->layout->nfields && !dt->layout->npointers)
         return NULL;
     return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
 }
diff --git a/src/abi_x86_64.cpp b/src/abi_x86_64.cpp
index c3d12417e6de8..6a853421dbccd 100644
--- a/src/abi_x86_64.cpp
+++ b/src/abi_x86_64.cpp
@@ -118,7 +118,8 @@ struct Classification {
 void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) const
 {
     // Floating point types
-    if (dt == jl_float64_type || dt == jl_float32_type) {
+    if (dt == jl_float64_type || dt == jl_float32_type || dt == jl_float16_type ||
+        dt == jl_bfloat16_type) {
         accum.addField(offset, Sse);
     }
     // Misc types
@@ -147,7 +148,7 @@ void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) con
         accum.addField(offset, Sse);
     }
     // Other struct types
-    else if (jl_datatype_size(dt) <= 16 && dt->layout) {
+    else if (jl_datatype_size(dt) <= 16 && dt->layout && !jl_is_layout_opaque(dt->layout)) {
         size_t i;
         for (i = 0; i < jl_datatype_nfields(dt); ++i) {
             jl_value_t *ty = jl_field_type(dt, i);
@@ -239,7 +240,9 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
                 types[0] = Type::getIntNTy(ctx, nbits);
             break;
         case Sse:
-            if (size <= 4)
+            if (size <= 2)
+                types[0] = Type::getHalfTy(ctx);
+            else if (size <= 4)
                 types[0] = Type::getFloatTy(ctx);
             else
                 types[0] = Type::getDoubleTy(ctx);
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 1f02a014175b4..0235758979cd1 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -4,7 +4,8 @@
 #include "platform.h"
 
 // target support
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
+#include "llvm/Support/CodeGen.h"
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
@@ -14,24 +15,9 @@
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/BasicAliasAnalysis.h>
-#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
-#include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
@@ -62,12 +48,10 @@ using namespace llvm;
 #include "jitlayers.h"
 #include "serialize.h"
 #include "julia_assert.h"
-#include "llvm-codegen-shared.h"
 #include "processor.h"
 
 #define DEBUG_TYPE "julia_aotcompile"
 
-STATISTIC(CICacheLookups, "Number of codeinst cache lookups");
 STATISTIC(CreateNativeCalls, "Number of jl_create_native calls made");
 STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native");
 STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
@@ -85,11 +69,11 @@ static void addComdat(GlobalValue *G, Triple &T)
 
 typedef struct {
     orc::ThreadSafeModule M;
-    std::vector<GlobalValue*> jl_sysimg_fvars;
-    std::vector<GlobalValue*> jl_sysimg_gvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_fvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_gvars;
     std::map<jl_code_instance_t*, std::tuple<uint32_t, uint32_t>> jl_fvar_map;
-    std::vector<void*> jl_value_to_llvm;
-    std::vector<jl_code_instance_t*> jl_external_to_llvm;
+    SmallVector<void*, 0> jl_value_to_llvm;
+    SmallVector<jl_code_instance_t*, 0> jl_external_to_llvm;
 } jl_native_code_desc_t;
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -106,22 +90,55 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
+extern "C" JL_DLLEXPORT_CODEGEN void
+jl_get_llvm_mis_impl(void *native_code, size_t *num_elements, jl_method_instance_t **data)
+{
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &map = desc->jl_fvar_map;
+
+    if (data == NULL) {
+        *num_elements = map.size();
+        return;
+    }
+
+    assert(*num_elements == map.size());
+    size_t i = 0;
+    for (auto &ci : map) {
+        data[i++] = jl_get_ci_mi(ci.first);
+    }
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gvs_impl(void *native_code,
+                                                          size_t *num_elements, void **data)
 {
     // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable
-    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    arraylist_grow(gvs, data->jl_value_to_llvm.size());
-    memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*));
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &value_map = desc->jl_value_to_llvm;
+
+    if (data == NULL) {
+        *num_elements = value_map.size();
+        return;
+    }
+
+    assert(*num_elements == value_map.size());
+    memcpy(data, value_map.data(), *num_elements * sizeof(void *));
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns)
+extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_external_fns_impl(void *native_code,
+                                                                   size_t *num_elements,
+                                                                   jl_code_instance_t *data)
 {
-    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    arraylist_grow(external_fns, data->jl_external_to_llvm.size());
-    memcpy(external_fns->items, data->jl_external_to_llvm.data(),
-        external_fns->len * sizeof(jl_code_instance_t*));
+    jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
+    auto &external_map = desc->jl_external_to_llvm;
+
+    if (data == NULL) {
+        *num_elements = external_map.size();
+        return;
+    }
+
+    assert(*num_elements == external_map.size());
+    memcpy((void *)data, (const void *)external_map.data(),
+           *num_elements * sizeof(jl_code_instance_t *));
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -145,12 +162,78 @@ GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 }
 
 
-static void emit_offset_table(Module &mod, const std::vector<GlobalValue*> &vars, StringRef name, Type *T_psize)
+
+template<typename T>
+static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
+{
+    // Get information about sysimg export functions from the two global variables.
+    // Strip them from the Module so that it's easier to handle the uses.
+    GlobalVariable *gv = M.getGlobalVariable(name);
+    assert(gv && gv->hasInitializer());
+    ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
+    unsigned nele = Ty->getArrayNumElements();
+    SmallVector<T*, 0> res(nele);
+    ConstantArray *ary = nullptr;
+    if (gv->getInitializer()->isNullValue()) {
+        for (unsigned i = 0; i < nele; ++i)
+            res[i] = cast<T>(Constant::getNullValue(Ty->getArrayElementType()));
+    }
+    else {
+        ary = cast<ConstantArray>(gv->getInitializer());
+        unsigned i = 0;
+        while (i < nele) {
+            llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
+            if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
+                // Shouldn't happen in regular use, but can happen in bugpoint.
+                nele--;
+                continue;
+            }
+            res[i++] = cast<T>(val);
+        }
+        res.resize(nele);
+    }
+    assert(gv->use_empty());
+    gv->eraseFromParent();
+    if (ary && ary->use_empty())
+        ary->destroyConstant();
+    return res;
+}
+
+static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
+{
+    if (ptr->getType()->isPointerTy())
+        ptr = ConstantExpr::getPtrToInt(ptr, T_size);
+    auto ptrdiff = ConstantExpr::getSub(ptr, base);
+    return T_size->getPrimitiveSizeInBits() > 32 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
+}
+
+static Constant *emit_offset_table(Module &M, Type *T_size, ArrayRef<Constant*> vars,
+                                   StringRef name, StringRef suffix)
+{
+    auto T_int32 = Type::getInt32Ty(M.getContext());
+    uint32_t nvars = vars.size();
+    ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
+    auto gv = new GlobalVariable(M, vars_type, true,
+                                 GlobalVariable::ExternalLinkage,
+                                 nullptr,
+                                 name + "_offsets" + suffix);
+    auto vbase = ConstantExpr::getPtrToInt(gv, T_size);
+    SmallVector<Constant*, 0> offsets(nvars + 1);
+    offsets[0] = ConstantInt::get(T_int32, nvars);
+    for (uint32_t i = 0; i < nvars; i++)
+        offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
+    gv->setInitializer(ConstantArray::get(vars_type, offsets));
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
+    return vbase;
+}
+
+static void emit_table(Module &mod, ArrayRef<GlobalValue*> vars,
+                       StringRef name, Type *T_psize)
 {
     // Emit a global variable with all the variable addresses.
-    // The cloning pass will convert them into offsets.
     size_t nvars = vars.size();
-    std::vector<Constant*> addrs(nvars);
+    SmallVector<Constant*, 0> addrs(nvars);
     for (size_t i = 0; i < nvars; i++) {
         Constant *var = vars[i];
         addrs[i] = ConstantExpr::getBitCast(var, T_psize);
@@ -223,152 +306,391 @@ static void makeSafeName(GlobalObject &G)
         G.setName(StringRef(SafeName.data(), SafeName.size()));
 }
 
-static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
+namespace { // file-local namespace
+class egal_set {
+public:
+    jl_genericmemory_t *list = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    jl_genericmemory_t *keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
+    egal_set(egal_set&) = delete;
+    egal_set(egal_set&&) = delete;
+    egal_set() = default;
+    void insert(jl_value_t *val)
+    {
+        jl_value_t *rval = jl_idset_get(list, keyset, val);
+        if (rval == NULL) {
+            ssize_t idx;
+            list = jl_idset_put_key(list, val, &idx);
+            keyset = jl_idset_put_idx(list, keyset, idx);
+        }
+    }
+    jl_value_t *get(jl_value_t *val)
+    {
+        return jl_idset_get(list, keyset, val);
+    }
+};
+}
+using ::egal_set;
+typedef DenseMap<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
+
+static void record_method_roots(egal_set &method_roots, jl_method_instance_t *mi)
 {
-    ++CICacheLookups;
-    jl_value_t *ci = cgparams.lookup(mi, world, world);
-    JL_GC_PROMISE_ROOTED(ci);
-    jl_code_instance_t *codeinst = NULL;
-    if (ci != jl_nothing) {
-        codeinst = (jl_code_instance_t*)ci;
-        *src_out = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        jl_method_t *def = codeinst->def->def.method;
-        if ((jl_value_t*)*src_out == jl_nothing)
-            *src_out = NULL;
-        if (*src_out && jl_is_method(def))
-            *src_out = jl_uncompress_ir(def, codeinst, (jl_value_t*)*src_out);
-    }
-    if (*src_out == NULL || !jl_is_code_info(*src_out)) {
-        if (cgparams.lookup != jl_rettype_inferred_addr) {
-            jl_error("Refusing to automatically run type inference with custom cache lookup.");
+    jl_method_t *m = mi->def.method;
+    if (!jl_is_method(m))
+        return;
+    // the method might have a root for this already; use it if so
+    JL_LOCK(&m->writelock);
+    if (m->roots) {
+        size_t j, len = jl_array_dim0(m->roots);
+        for (j = 0; j < len; j++) {
+            jl_value_t *v = jl_array_ptr_ref(m->roots, j);
+            if (jl_is_globally_rooted(v))
+                continue;
+            method_roots.insert(v);
         }
-        else {
-            *src_out = jl_type_infer(mi, world, 0);
-            if (*src_out) {
-                codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
-                if ((*src_out)->inferred) {
-                    jl_value_t *null = nullptr;
-                    jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
+    }
+    JL_UNLOCK(&m->writelock);
+}
+
+static void aot_optimize_roots(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
+{
+    for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) {
+        jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i);
+        auto ref = params.global_targets.find((void*)val);
+        if (ref == params.global_targets.end())
+            continue;
+        auto get_global_root = [val, &method_roots]() {
+            if (jl_is_globally_rooted(val))
+                return val;
+            jl_value_t *mval = method_roots.get(val);
+            if (mval)
+                return mval;
+            return jl_as_global_root(val, 1);
+        };
+        jl_value_t *mval = get_global_root();
+        if (mval != val) {
+            GlobalVariable *GV = ref->second;
+            params.global_targets.erase(ref);
+            auto mref = params.global_targets.find((void*)mval);
+            if (mref != params.global_targets.end()) {
+                // replace ref with mref in all Modules
+                std::string OldName(GV->getName());
+                StringRef NewName(mref->second->getName());
+                for (auto &def : compiled_functions) {
+                    orc::ThreadSafeModule &TSM = std::get<0>(def.second);
+                    Module &M = *TSM.getModuleUnlocked();
+                    if (GlobalValue *GV2 = M.getNamedValue(OldName)) {
+                        if (GV2 == GV)
+                            GV = nullptr;
+                        // either replace or rename the old value to use the other equivalent name
+                        if (GlobalValue *GV3 = M.getNamedValue(NewName)) {
+                            GV2->replaceAllUsesWith(GV3);
+                            GV2->eraseFromParent();
+                        }
+                        else {
+                            GV2->setName(NewName);
+                        }
+                    }
                 }
+                assert(GV == nullptr);
+            }
+            else {
+                params.global_targets[(void*)mval] = GV;
             }
         }
     }
-    *ci_out = codeinst;
 }
 
+static void resolve_workqueue(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
+{
+    decltype(params.workqueue) workqueue;
+    std::swap(params.workqueue, workqueue);
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH1(&codeinst);
+    assert(!params.cache);
+    while (!workqueue.empty()) {
+        auto it = workqueue.pop_back_val();
+        codeinst = it.first;
+        auto &proto = it.second;
+        // try to emit code for this item from the workqueue
+        StringRef invokeName = "";
+        StringRef preal_decl = "";
+        bool preal_specsig = false;
+        {
+            auto it = compiled_functions.find(codeinst);
+            if (it != compiled_functions.end()) {
+                auto &decls = it->second.second;
+                invokeName = decls.functionObject;
+                if (decls.functionObject == "jl_fptr_args") {
+                    preal_decl = decls.specFunctionObject;
+                }
+                else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                    preal_decl = decls.specFunctionObject;
+                    preal_specsig = true;
+                }
+            }
+            else if (params.params->trim) {
+                jl_safe_printf("warning: no code provided for function ");
+                jl_(codeinst->def);
+                if (params.params->trim)
+                    abort();
+            }
+        }
+        // patch up the prototype we emitted earlier
+        Module *mod = proto.decl->getParent();
+        assert(proto.decl->isDeclaration());
+        Function *pinvoke = nullptr;
+        if (preal_decl.empty()) {
+            if (invokeName.empty() && params.params->trim) {
+                jl_safe_printf("warning: bailed out to invoke when compiling: ");
+                jl_(codeinst->def);
+                abort();
+            }
+            pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
+            if (!proto.specsig)
+                proto.decl->replaceAllUsesWith(pinvoke);
+        }
+        if (proto.specsig && !preal_specsig) {
+            // get or build an fptr1 that can invoke codeinst
+            if (pinvoke == nullptr)
+                pinvoke = get_or_emit_fptr1(preal_decl, mod);
+            // emit specsig-to-(jl)invoke conversion
+            proto.decl->setLinkage(GlobalVariable::InternalLinkage);
+            //protodecl->setAlwaysInline();
+            jl_init_function(proto.decl, params.TargetTriple);
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
+            bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+            // TODO: maybe this can be cached in codeinst->specfptr?
+            emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke);
+            preal_decl = ""; // no need to fixup the name
+        }
+        if (!preal_decl.empty()) {
+            // merge and/or rename this prototype to the real function
+            if (Value *specfun = mod->getNamedValue(preal_decl)) {
+                if (proto.decl != specfun)
+                    proto.decl->replaceAllUsesWith(specfun);
+            }
+            else {
+                proto.decl->setName(preal_decl);
+            }
+        }
+        if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
+            assert(proto.specsig);
+            StringRef ocinvokeDecl = invokeName;
+            // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
+            // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
+            if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
+                ocinvokeDecl = pinvoke->getName();
+            assert(!ocinvokeDecl.empty());
+            assert(ocinvokeDecl != "jl_fptr_args");
+            assert(ocinvokeDecl != "jl_fptr_sparam");
+            // merge and/or rename this prototype to the real function
+            if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) {
+                if (proto.oc != specfun)
+                    proto.oc->replaceAllUsesWith(specfun);
+            }
+            else {
+                proto.oc->setName(ocinvokeDecl);
+            }
+        }
+        workqueue.append(params.workqueue);
+        params.workqueue.clear();
+    }
+    JL_GC_POP();
+}
+
+
 // takes the running content that has collected in the shadow module and dump it to disk
-// this builds the object file portion of the sysimage files for fast startup, and can
+// this builds the object file portion of the sysimage files for fast startup
+// `_external_linkage` create linkages between pkgimages.
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, int _trim, int _external_linkage, size_t world)
+{
+    JL_TIMING(INFERENCE, INFERENCE);
+    auto ct = jl_current_task;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
+
+    jl_cgparams_t cgparams = jl_default_cgparams;
+    cgparams.trim = _trim ? 1 : 0;
+    size_t compile_for[] = { jl_typeinf_world, world };
+    int compiler_world = 1;
+    if (_trim || compile_for[0] == 0)
+        compiler_world = 0;
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 4);
+    jl_array_t *codeinfos = NULL;
+    if (jl_typeinf_func) {
+        fargs[0] = (jl_value_t*)jl_typeinf_func;
+        fargs[1] = (jl_value_t*)methods;
+#ifdef _P64
+        jl_value_t *jl_array_ulong_type = jl_array_uint64_type;
+#else
+        jl_value_t *jl_array_ulong_type = jl_array_uint32_type;
+#endif
+        jl_array_t *worlds = jl_alloc_array_1d(jl_array_ulong_type, 1 + compiler_world);
+        fargs[2] = (jl_value_t*)worlds;
+        jl_array_data(worlds, size_t)[0] = jl_typeinf_world;
+        jl_array_data(worlds, size_t)[compiler_world] = world; // might overwrite previous
+        fargs[3] = _trim ? jl_true : jl_false;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_typeinf_world;
+        codeinfos = (jl_array_t*)jl_apply(fargs, 4);
+        ct->world_age = last_age;
+        JL_TYPECHK(create_native, array_any, (jl_value_t*)codeinfos);
+    }
+    else {
+        // we could put a very simple generator here, but there is no reason to do that right now
+        jl_error("inference not available for generating compiled output");
+    }
+    fargs[0] = (jl_value_t*)codeinfos;
+    void *data = jl_emit_native(codeinfos, llvmmod, &cgparams, _external_linkage);
+
+    // move everything inside, now that we've merged everything
+    // (before adding the exported headers)
+    ((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) {
+        auto TT = Triple(M.getTargetTriple());
+        Function *juliapersonality_func = nullptr;
+        if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+            // setting the function personality enables stack unwinding and catching exceptions
+            // so make sure everything has something set
+            Type *T_int32 = Type::getInt32Ty(M.getContext());
+            juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
+                Function::ExternalLinkage, "__julia_personality", M);
+            juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
+        }
+        for (GlobalObject &G : M.global_objects()) {
+            if (!G.isDeclaration()) {
+                G.setLinkage(GlobalValue::InternalLinkage);
+                G.setDSOLocal(true);
+                makeSafeName(G);
+                if (Function *F = dyn_cast<Function>(&G)) {
+                    if (juliapersonality_func) {
+                        // Add unwind exception personalities to functions to handle async exceptions
+                        F->setPersonalityFn(juliapersonality_func);
+                    }
+                }
+            }
+        }
+    });
+
+    JL_GC_POP();
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
+    }
+    return data;
+}
+
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
 // all reachable & inferrrable functions.
-// The `policy` flag switches between the default mode `0` and the extern mode `1` used by GPUCompiler.
-// `_imaging_mode` controls if raw pointers can be embedded (e.g. the code will be loaded into the same session).
-// `_external_linkage` create linkages between pkgimages.
 extern "C" JL_DLLEXPORT_CODEGEN
-void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
+void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _external_linkage)
 {
     JL_TIMING(NATIVE_AOT, NATIVE_Create);
     ++CreateNativeCalls;
-    CreateNativeMax.updateMax(jl_array_len(methods));
+    CreateNativeMax.updateMax(jl_array_nrows(codeinfos));
     if (cgparams == NULL)
         cgparams = &jl_default_cgparams;
     jl_native_code_desc_t *data = new jl_native_code_desc_t;
-    CompilationPolicy policy = (CompilationPolicy) _policy;
-    bool imaging = imaging_default() || _imaging_mode == 1;
-    jl_workqueue_t emitted;
     jl_method_instance_t *mi = NULL;
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    auto ct = jl_current_task;
-    bool timed = (ct->reentrant_timing & 1) == 0;
-    if (timed)
-        ct->reentrant_timing |= 1;
     orc::ThreadSafeContext ctx;
     orc::ThreadSafeModule backing;
     if (!llvmmod) {
-        ctx = jl_ExecutionEngine->acquireContext();
-        backing = jl_create_ts_module("text", ctx, imaging);
+        ctx = jl_ExecutionEngine->makeContext();
+        backing = jl_create_ts_module("text", ctx);
     }
     orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
     auto ctxt = clone.getContext();
 
-    uint64_t compiler_start_time = 0;
-    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-    if (measure_compile_time_enabled)
-        compiler_start_time = jl_hrtime();
-
     // compile all methods for the current world and type-inference world
-
-    JL_LOCK(&jl_codegen_lock);
     auto target_info = clone.withModuleDo([&](Module &M) {
         return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
     });
+    egal_set method_roots;
     jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
+    if (!llvmmod)
+        params.getContext().setDiscardValueNames(true);
     params.params = cgparams;
-    params.imaging = imaging;
-    params.debug_level = jl_options.debug_level;
+    assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled
     params.external_linkage = _external_linkage;
-    size_t compile_for[] = { jl_typeinf_world, _world };
-    for (int worlds = 0; worlds < 2; worlds++) {
-        params.world = compile_for[worlds];
-        if (!params.world)
-            continue;
-        // Don't emit methods for the typeinf_world with extern policy
-        if (policy != CompilationPolicy::Default && params.world == jl_typeinf_world)
-            continue;
-        size_t i, l;
-        for (i = 0, l = jl_array_len(methods); i < l; i++) {
-            // each item in this list is either a MethodInstance indicating something
-            // to compile, or an svec(rettype, sig) describing a C-callable alias to create.
-            jl_value_t *item = jl_array_ptr_ref(methods, i);
-            if (jl_is_simplevector(item)) {
-                if (worlds == 1)
-                    jl_compile_extern_c(wrap(&clone), &params, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
-                continue;
-            }
-            mi = (jl_method_instance_t*)item;
-            src = NULL;
-            // if this method is generally visible to the current compilation world,
-            // and this is either the primary world, or not applicable in the primary world
-            // then we want to compile and emit this
-            if (mi->def.method->primary_world <= params.world && params.world <= mi->def.method->deleted_world) {
-                // find and prepare the source code to compile
-                jl_code_instance_t *codeinst = NULL;
-                jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src);
-                if (src && !emitted.count(codeinst)) {
-                    // now add it to our compilation results
-                    JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            clone.getModuleUnlocked()->getDataLayout(),
-                            Triple(clone.getModuleUnlocked()->getTargetTriple()));
-                    jl_llvm_functions_t decls = jl_emit_code(result_m, mi, src, codeinst->rettype, params);
-                    if (result_m)
-                        emitted[codeinst] = {std::move(result_m), std::move(decls)};
+    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH3(&params.temporary_roots, &method_roots.list, &method_roots.keyset);
+    jl_compiled_functions_t compiled_functions;
+    size_t i, l;
+    for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
+        // each item in this list is either a CodeInstance followed by a CodeInfo indicating something
+        // to compile, or a rettype followed by a sig describing a C-callable alias to create.
+        jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
+        if (jl_is_code_instance(item)) {
+            // now add it to our compilation results
+            jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
+            jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
+            assert(jl_is_code_info(src));
+            if (compiled_functions.count(codeinst))
+                continue; // skip any duplicates that accidentally made there way in here (or make this an error?)
+            if (_external_linkage) {
+                uint8_t specsigflags;
+                jl_callptr_t invoke;
+                void *fptr;
+                jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                if (invoke != NULL && (specsigflags & 0b100)) {
+                    // this codeinst is already available externally
+                    // TODO: for performance, avoid generating the src code when we know it would reach here anyways
+                    continue;
                 }
             }
+            orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)),
+                    params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
+                    Triple(clone.getModuleUnlocked()->getTargetTriple()));
+            jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
+            record_method_roots(method_roots, jl_get_ci_mi(codeinst));
+            if (result_m)
+                compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+            else if (params.params->trim) {
+                // if we're building a small image, we need to compile everything
+                // to ensure that we have all the information we need.
+                jl_safe_printf("codegen failed to compile code root ");
+                jl_(mi);
+                abort();
+            }
+        }
+        else {
+            jl_value_t *sig = jl_array_ptr_ref(codeinfos, ++i);
+            assert(jl_is_type(item) && jl_is_type(sig));
+            jl_compile_extern_c(wrap(&clone), &params, NULL, item, sig);
         }
-
-        // finally, make sure all referenced methods also get compiled or fixed up
-        jl_compile_workqueue(emitted, *clone.getModuleUnlocked(), params, policy);
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them
+    resolve_workqueue(params, method_roots, compiled_functions);
+    aot_optimize_roots(params, method_roots, compiled_functions);
+    params.temporary_roots = nullptr;
     JL_GC_POP();
 
     // process the globals array, before jl_merge_module destroys them
-    std::vector<std::string> gvars(params.globals.size());
-    data->jl_value_to_llvm.resize(params.globals.size());
+    SmallVector<std::string, 0> gvars(params.global_targets.size());
+    data->jl_value_to_llvm.resize(params.global_targets.size());
     StringSet<> gvars_names;
     DenseSet<GlobalValue *> gvars_set;
 
     size_t idx = 0;
-    for (auto &global : params.globals) {
+    for (auto &global : params.global_targets) {
         gvars[idx] = global.second->getName().str();
+        global.second->setInitializer(literal_static_pointer_val(global.first, global.second->getValueType()));
         assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
         assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
         data->jl_value_to_llvm[idx] = global.first;
         idx++;
     }
-    CreateNativeMethods += emitted.size();
+    CreateNativeMethods += compiled_functions.size();
 
     size_t offset = gvars.size();
     data->jl_external_to_llvm.resize(params.external_fns.size());
@@ -390,37 +712,42 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
 
     // clones the contents of the module `m` to the shadow_output collector
     // while examining and recording what kind of function pointer we have
-    Linker L(*clone.getModuleUnlocked());
-    for (auto &def : emitted) {
-        jl_merge_module(clone, std::move(std::get<0>(def.second)));
-        jl_code_instance_t *this_code = def.first;
-        jl_llvm_functions_t decls = std::get<1>(def.second);
-        StringRef func = decls.functionObject;
-        StringRef cfunc = decls.specFunctionObject;
-        uint32_t func_id = 0;
-        uint32_t cfunc_id = 0;
-        if (func == "jl_fptr_args") {
-            func_id = -1;
-        }
-        else if (func == "jl_fptr_sparam") {
-            func_id = -2;
-        }
-        else {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
-            func_id = data->jl_sysimg_fvars.size();
+    {
+        Linker L(*clone.getModuleUnlocked());
+        for (auto &def : compiled_functions) {
+            jl_merge_module(clone, std::move(std::get<0>(def.second)));
+            jl_code_instance_t *this_code = def.first;
+            jl_llvm_functions_t decls = std::get<1>(def.second);
+            StringRef func = decls.functionObject;
+            StringRef cfunc = decls.specFunctionObject;
+            uint32_t func_id = 0;
+            uint32_t cfunc_id = 0;
+            if (func == "jl_fptr_args") {
+                func_id = -1;
+            }
+            else if (func == "jl_fptr_sparam") {
+                func_id = -2;
+            }
+            else if (decls.functionObject == "jl_f_opaque_closure_call") {
+                func_id = -4;
+            }
+            else {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
+                func_id = data->jl_sysimg_fvars.size();
+            }
+            if (!cfunc.empty()) {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
+                cfunc_id = data->jl_sysimg_fvars.size();
+            }
+            data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
         }
-        if (!cfunc.empty()) {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
-            cfunc_id = data->jl_sysimg_fvars.size();
+        if (params._shared_module) {
+            bool error = L.linkInModule(std::move(params._shared_module));
+            assert(!error && "Error linking in shared module");
+            (void)error;
         }
-        data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
-    }
-    if (params._shared_module) {
-        bool error = L.linkInModule(std::move(params._shared_module));
-        assert(!error && "Error linking in shared module");
-        (void)error;
     }
 
     // now get references to the globals in the merged module
@@ -428,56 +755,14 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     for (auto &global : gvars) {
         //Safe b/c context is locked by params
         GlobalVariable *G = cast<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
-        G->setInitializer(ConstantPointerNull::get(cast<PointerType>(G->getValueType())));
-        G->setLinkage(GlobalValue::ExternalLinkage);
-        G->setVisibility(GlobalValue::HiddenVisibility);
+        assert(G->hasInitializer());
+        G->setLinkage(GlobalValue::InternalLinkage);
         G->setDSOLocal(true);
         data->jl_sysimg_gvars.push_back(G);
     }
     CreateNativeGlobals += gvars.size();
 
-    //Safe b/c context is locked by params
-    auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
-    Function *juliapersonality_func = nullptr;
-    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-        // setting the function personality enables stack unwinding and catching exceptions
-        // so make sure everything has something set
-        Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
-        juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
-            Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
-        juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
-    }
-
-    // move everything inside, now that we've merged everything
-    // (before adding the exported headers)
-    if (policy == CompilationPolicy::Default) {
-        //Safe b/c context is locked by params
-        for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) {
-            if (!G.isDeclaration()) {
-                G.setLinkage(GlobalValue::ExternalLinkage);
-                G.setVisibility(GlobalValue::HiddenVisibility);
-                G.setDSOLocal(true);
-                makeSafeName(G);
-                if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-                    // Add unwind exception personalities to functions to handle async exceptions
-                    if (Function *F = dyn_cast<Function>(&G))
-                        F->setPersonalityFn(juliapersonality_func);
-                }
-            }
-        }
-    }
-
     data->M = std::move(clone);
-    if (timed) {
-        if (measure_compile_time_enabled) {
-            auto end = jl_hrtime();
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-        }
-        ct->reentrant_timing &= ~1ull;
-    }
-    if (ctx.getContext()) {
-        jl_ExecutionEngine->releaseContext(std::move(ctx));
-    }
     return (void*)data;
 }
 
@@ -495,7 +780,6 @@ static void reportWriterError(const ErrorInfoBase &E)
     jl_safe_printf("ERROR: failed to emit output file %s\n", err.c_str());
 }
 
-#if JULIA_FLOAT16_ABI == 1
 static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionType *FT)
 {
     Function *target = M.getFunction(alias);
@@ -512,7 +796,7 @@ static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionT
     auto val = builder.CreateCall(target, CallArgs);
     builder.CreateRet(val);
 }
-#endif
+
 void multiversioning_preannotate(Module &M);
 
 // See src/processor.h for documentation about this table. Corresponds to jl_image_shard_t.
@@ -528,14 +812,13 @@ static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize,
             return gv;
         };
         auto table = tables.data() + i * sizeof(jl_image_shard_t) / sizeof(void *);
-        table[offsetof(jl_image_shard_t, fvar_base) / sizeof(void*)] = create_gv("jl_fvar_base", false);
-        table[offsetof(jl_image_shard_t, fvar_offsets) / sizeof(void*)] = create_gv("jl_fvar_offsets", true);
+        table[offsetof(jl_image_shard_t, fvar_count) / sizeof(void*)] = create_gv("jl_fvar_count", true);
+        table[offsetof(jl_image_shard_t, fvar_ptrs) / sizeof(void*)] = create_gv("jl_fvar_ptrs", true);
         table[offsetof(jl_image_shard_t, fvar_idxs) / sizeof(void*)] = create_gv("jl_fvar_idxs", true);
-        table[offsetof(jl_image_shard_t, gvar_base) / sizeof(void*)] = create_gv("jl_gvar_base", false);
         table[offsetof(jl_image_shard_t, gvar_offsets) / sizeof(void*)] = create_gv("jl_gvar_offsets", true);
         table[offsetof(jl_image_shard_t, gvar_idxs) / sizeof(void*)] = create_gv("jl_gvar_idxs", true);
         table[offsetof(jl_image_shard_t, clone_slots) / sizeof(void*)] = create_gv("jl_clone_slots", true);
-        table[offsetof(jl_image_shard_t, clone_offsets) / sizeof(void*)] = create_gv("jl_clone_offsets", true);
+        table[offsetof(jl_image_shard_t, clone_ptrs) / sizeof(void*)] = create_gv("jl_clone_ptrs", true);
         table[offsetof(jl_image_shard_t, clone_idxs) / sizeof(void*)] = create_gv("jl_clone_idxs", true);
     }
     auto tables_arr = ConstantArray::get(ArrayType::get(T_psize, tables.size()), tables);
@@ -638,7 +921,7 @@ static FunctionInfo getFunctionWeight(const Function &F)
         auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
         // base16, so must be at most 4 * length bits long
         // popcount gives number of clones
-        info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1;
+        info.clones = APInt(val.size() * 4, val, 16).popcount() + 1;
     }
     info.weight += info.insts;
     // more basic blocks = more complex than just sum of insts,
@@ -649,6 +932,7 @@ static FunctionInfo getFunctionWeight(const Function &F)
 }
 
 struct ModuleInfo {
+    Triple triple;
     size_t globals;
     size_t funcs;
     size_t bbs;
@@ -659,6 +943,7 @@ struct ModuleInfo {
 
 ModuleInfo compute_module_info(Module &M) {
     ModuleInfo info;
+    info.triple = Triple(M.getTargetTriple());
     info.globals = 0;
     info.funcs = 0;
     info.bbs = 0;
@@ -685,17 +970,24 @@ ModuleInfo compute_module_info(Module &M) {
 }
 
 struct Partition {
-    StringSet<> globals;
+    StringMap<bool> globals;
     StringMap<unsigned> fvars;
     StringMap<unsigned> gvars;
     size_t weight;
 };
 
-static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, size_t fvars_size, size_t gvars_size) {
+static bool canPartition(const Function &F)
+{
+    return !F.hasFnAttribute(Attribute::AlwaysInline);
+}
+
+static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
     bool bad = false;
 #ifndef JL_NDEBUG
-    SmallVector<uint32_t> fvars(fvars_size);
-    SmallVector<uint32_t> gvars(gvars_size);
+    size_t fvars_size = fvars.size();
+    size_t gvars_size = gvars.size();
+    SmallVector<uint32_t, 0> fvars_partition(fvars_size);
+    SmallVector<uint32_t, 0> gvars_partition(gvars_size);
     StringMap<uint32_t> GVNames;
     for (uint32_t i = 0; i < partitions.size(); i++) {
         for (auto &name : partitions[i].globals) {
@@ -706,45 +998,55 @@ static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partiti
             GVNames[name.getKey()] = i;
         }
         for (auto &fvar : partitions[i].fvars) {
-            if (fvars[fvar.second] != 0) {
+            if (fvars_partition[fvar.second] != 0) {
                 bad = true;
-                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars[fvar.second] - 1 << "\n";
+                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars_partition[fvar.second] - 1 << "\n";
             }
-            fvars[fvar.second] = i+1;
+            fvars_partition[fvar.second] = i+1;
         }
         for (auto &gvar : partitions[i].gvars) {
-            if (gvars[gvar.second] != 0) {
+            if (gvars_partition[gvar.second] != 0) {
                 bad = true;
-                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars[gvar.second] - 1 << "\n";
+                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars_partition[gvar.second] - 1 << "\n";
             }
-            gvars[gvar.second] = i+1;
+            gvars_partition[gvar.second] = i+1;
         }
     }
-    for (auto &GV : M.globals()) {
+    for (auto &GV : M.global_values()) {
         if (GV.isDeclaration()) {
             if (GVNames.count(GV.getName())) {
                 bad = true;
                 dbgs() << "Global " << GV.getName() << " is a declaration but is in partition " << GVNames[GV.getName()] << "\n";
             }
         } else {
+            // Local global values are not partitioned
             if (!GVNames.count(GV.getName())) {
                 bad = true;
                 dbgs() << "Global " << GV << " not in any partition\n";
             }
-            if (!GV.hasExternalLinkage()) {
-                bad = true;
-                dbgs() << "Global " << GV << " has non-external linkage " << GV.getLinkage() << " but is in partition " << GVNames[GV.getName()] << "\n";
+            for (ConstantUses<GlobalValue> uses(const_cast<GlobalValue*>(&GV), const_cast<Module&>(M)); !uses.done(); uses.next()) {
+                auto val = uses.get_info().val;
+                if (!GVNames.count(val->getName())) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is not in any partition\n";
+                    continue;
+                }
+                if (GVNames[val->getName()] != GVNames[GV.getName()]) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is in partition " << GVNames[GV.getName()] << " but " << val->getName() << " is in partition " << GVNames[val->getName()] << "\n";
+                }
             }
         }
     }
     for (uint32_t i = 0; i < fvars_size; i++) {
-        if (fvars[i] == 0) {
+        if (fvars_partition[i] == 0) {
+            auto gv = find_if(fvars.begin(), fvars.end(), [i](auto var) { return var.second == i; });
             bad = true;
-            dbgs() << "fvar " << i << " not in any partition\n";
+            dbgs() << "fvar " << gv->first->getName() << " at " << i << " not in any partition\n";
         }
     }
     for (uint32_t i = 0; i < gvars_size; i++) {
-        if (gvars[i] == 0) {
+        if (gvars_partition[i] == 0) {
             bad = true;
             dbgs() << "gvar " << i << " not in any partition\n";
         }
@@ -767,7 +1069,7 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
             unsigned size;
             size_t weight;
         };
-        std::vector<Node> nodes;
+        SmallVector<Node, 0> nodes;
         DenseMap<GlobalValue *, unsigned> node_map;
         unsigned merged;
 
@@ -806,9 +1108,16 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     for (auto &G : M.global_values()) {
         if (G.isDeclaration())
             continue;
-        if (isa<Function>(G)) {
-            partitioner.make(&G, getFunctionWeight(cast<Function>(G)).weight);
-        } else {
+        // Currently ccallable global aliases have extern linkage, we only want to make the
+        // internally linked functions/global variables extern+hidden
+        if (G.hasLocalLinkage()) {
+            G.setLinkage(GlobalValue::ExternalLinkage);
+            G.setVisibility(GlobalValue::HiddenVisibility);
+        }
+        if (auto F = dyn_cast<Function>(&G)) {
+            partitioner.make(&G, getFunctionWeight(*F).weight);
+        }
+        else {
             partitioner.make(&G, 1);
         }
     }
@@ -818,6 +1127,8 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
         for (ConstantUses<GlobalValue> uses(partitioner.nodes[i].GV, M); !uses.done(); uses.next()) {
             auto val = uses.get_info().val;
             auto idx = partitioner.node_map.find(val);
+            // This can fail if we can't partition a global, but it uses something we can partition
+            // This should be fixed by altering canPartition to not permit partitioning this global
             assert(idx != partitioner.node_map.end());
             partitioner.merge(i, idx->second);
         }
@@ -828,12 +1139,12 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     auto pcomp = [](const Partition *p1, const Partition *p2) {
         return p1->weight > p2->weight;
     };
-    std::priority_queue<Partition *, std::vector<Partition *>, decltype(pcomp)> pq(pcomp);
+    std::priority_queue<Partition *, SmallVector<Partition *, 0>, decltype(pcomp)> pq(pcomp);
     for (unsigned i = 0; i < threads; ++i) {
         pq.push(&partitions[i]);
     }
 
-    std::vector<unsigned> idxs(partitioner.nodes.size());
+    SmallVector<unsigned, 0> idxs(partitioner.nodes.size());
     std::iota(idxs.begin(), idxs.end(), 0);
     std::sort(idxs.begin(), idxs.end(), [&](unsigned a, unsigned b) {
         //because roots have more weight than their children,
@@ -845,40 +1156,42 @@ static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
     for (unsigned idx = 0; idx < idxs.size(); ++idx) {
         auto i = idxs[idx];
         auto root = partitioner.find(i);
-        assert(root == i || partitioner.nodes[root].GV == nullptr);
-        if (partitioner.nodes[root].GV) {
+        assert(root == i || partitioner.nodes[root].weight == 0);
+        if (partitioner.nodes[root].weight) {
             auto &node = partitioner.nodes[root];
             auto &P = *pq.top();
             pq.pop();
             auto name = node.GV->getName();
-            P.globals.insert(name);
+            P.globals.insert({name, true});
             if (fvars.count(node.GV))
                 P.fvars[name] = fvars[node.GV];
             if (gvars.count(node.GV))
                 P.gvars[name] = gvars[node.GV];
             P.weight += node.weight;
-            node.GV = nullptr;
+            node.weight = 0;
             node.size = &P - partitions.data();
             pq.push(&P);
         }
         if (root != i) {
             auto &node = partitioner.nodes[i];
-            assert(node.GV != nullptr);
+            assert(node.weight != 0);
             // we assigned its root already, so just add it to the root's partition
             // don't touch the priority queue, since we're not changing the weight
             auto &P = partitions[partitioner.nodes[root].size];
             auto name = node.GV->getName();
-            P.globals.insert(name);
+            P.globals.insert({name, true});
             if (fvars.count(node.GV))
                 P.fvars[name] = fvars[node.GV];
             if (gvars.count(node.GV))
                 P.gvars[name] = gvars[node.GV];
-            node.GV = nullptr;
+            node.weight = 0;
             node.size = partitioner.nodes[root].size;
         }
     }
 
-    bool verified = verify_partitioning(partitions, M, fvars.size(), gvars.size());
+    bool verified = verify_partitioning(partitions, M, fvars, gvars);
+    if (!verified)
+        llvm_dump(&M);
     assert(verified && "Partitioning failed to partition globals correctly");
     (void) verified;
 
@@ -920,7 +1233,6 @@ struct ShardTimers {
     ImageTimer deserialize;
     ImageTimer materialize;
     ImageTimer construct;
-    ImageTimer deletion;
     // impl timers
     ImageTimer unopt;
     ImageTimer optimize;
@@ -934,13 +1246,12 @@ struct ShardTimers {
     void print(raw_ostream &out, bool clear=false) {
         StringRef sep = "===-------------------------------------------------------------------------===";
         out << formatv("{0}\n{1}\n{0}\n", sep, fmt_align(name + " : " + desc, AlignStyle::Center, sep.size()));
-        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed + deletion.elapsed +
+        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed +
             unopt.elapsed + optimize.elapsed + opt.elapsed + obj.elapsed + asm_.elapsed;
         out << "Time (s)  Name  Description\n";
         deserialize.print(out, clear);
         materialize.print(out, clear);
         construct.print(out, clear);
-        deletion.print(out, clear);
         unopt.print(out, clear);
         optimize.print(out, clear);
         opt.print(out, clear);
@@ -950,8 +1261,6 @@ struct ShardTimers {
     }
 };
 
-void emitFloat16Wrappers(Module &M, bool external);
-
 struct AOTOutputs {
     SmallVector<char, 0> unopt, opt, obj, asm_;
 };
@@ -970,7 +1279,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
             SourceTM.getRelocationModel(),
             SourceTM.getCodeModel(),
             SourceTM.getOptLevel()));
-
+    fixupTM(*TM);
     if (unopt) {
         timers.unopt.startTimer();
         raw_svector_ostream OS(out.unopt);
@@ -984,60 +1293,73 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
     if (!opt && !obj && !asm_) {
         return out;
     }
-    assert(!verifyModule(M, &errs()));
-
-    timers.optimize.startTimer();
+    assert(!verifyLLVMIR(M));
 
-#ifndef JL_USE_NEW_PM
-    legacy::PassManager optimizer;
-    addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-    addOptimizationPasses(&optimizer, jl_options.opt_level, true, true);
-    addMachinePasses(&optimizer, jl_options.opt_level);
-#else
-
-    auto PMTM = std::unique_ptr<TargetMachine>(
-        SourceTM.getTarget().createTargetMachine(
-            SourceTM.getTargetTriple().str(),
-            SourceTM.getTargetCPU(),
-            SourceTM.getTargetFeatureString(),
-            SourceTM.Options,
-            SourceTM.getRelocationModel(),
-            SourceTM.getCodeModel(),
-            SourceTM.getOptLevel()));
-    NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
-#endif
-    optimizer.run(M);
-    assert(!verifyModule(M, &errs()));
-    bool inject_aliases = false;
-    for (auto &F : M.functions()) {
-        if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
-            inject_aliases = true;
-            break;
+    {
+        timers.optimize.startTimer();
+
+        auto PMTM = std::unique_ptr<TargetMachine>(
+            SourceTM.getTarget().createTargetMachine(
+                SourceTM.getTargetTriple().str(),
+                SourceTM.getTargetCPU(),
+                SourceTM.getTargetFeatureString(),
+                SourceTM.Options,
+                SourceTM.getRelocationModel(),
+                SourceTM.getCodeModel(),
+                SourceTM.getOptLevel()));
+        fixupTM(*PMTM);
+        NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
+        optimizer.run(M);
+        assert(!verifyLLVMIR(M));
+        bool inject_aliases = false;
+        for (auto &F : M.functions()) {
+            if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
+                inject_aliases = true;
+                break;
+            }
         }
-    }
-    // no need to inject aliases if we have no functions
-
-    if (inject_aliases) {
-#if JULIA_FLOAT16_ABI == 1
-        // We would like to emit an alias or an weakref alias to redirect these symbols
-        // but LLVM doesn't let us emit a GlobalAlias to a declaration...
-        // So for now we inject a definition of these functions that calls our runtime
-        // functions. We do so after optimization to avoid cloning these functions.
-        injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
-                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
-                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
-        injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
-                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+        // no need to inject aliases if we have no functions
+
+        if (inject_aliases) {
+            // We would like to emit an alias or an weakref alias to redirect these symbols
+            // but LLVM doesn't let us emit a GlobalAlias to a declaration...
+            // So for now we inject a definition of these functions that calls our runtime
+            // functions. We do so after optimization to avoid cloning these functions.
+            // Float16 conversion routines
+#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_)
+            // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin
+            // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9
+            injectCRTAlias(M, "__gnu_h2f_ieee", "julia_half_to_float",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
+            injectCRTAlias(M, "__extendhfsf2", "julia_half_to_float",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
+            injectCRTAlias(M, "__gnu_f2h_ieee", "julia_float_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsfhf2", "julia_float_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncdfhf2", "julia_double_to_half",
+                    FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
 #else
-        emitFloat16Wrappers(M, false);
+            injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
 #endif
+
+            // BFloat16 conversion routines
+            injectCRTAlias(M, "__truncsfbf2", "julia__truncsfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsdbf2", "julia__truncdfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+        }
+        timers.optimize.stopTimer();
     }
-    timers.optimize.stopTimer();
 
     if (opt) {
         timers.opt.startTimer();
@@ -1055,7 +1377,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.obj);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::ObjectFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of object files\n");
         emitter.run(M);
         timers.obj.stopTimer();
@@ -1066,7 +1392,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.asm_);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::AssemblyFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of assembly files\n");
         emitter.run(M);
         timers.asm_.stopTimer();
@@ -1077,7 +1407,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
 
 // serialize module to bitcode
 static auto serializeModule(const Module &M) {
-    assert(!verifyModule(M, &errs()) && "Serializing invalid module!");
+    assert(!verifyLLVMIR(M) && "Serializing invalid module!");
     SmallVector<char, 0> ClonedModuleBuffer;
     BitcodeWriter BCWriter(ClonedModuleBuffer);
     BCWriter.writeModule(M);
@@ -1089,38 +1419,59 @@ static auto serializeModule(const Module &M) {
 // Modules are deserialized lazily by LLVM, to avoid deserializing
 // unnecessary functions. We take advantage of this by serializing
 // the entire module once, then deleting the bodies of functions
-// that are not in this partition. Once unnecesary functions are
+// that are not in this partition. Once unnecessary functions are
 // deleted, we then materialize the entire module to make use-lists
 // consistent.
 static void materializePreserved(Module &M, Partition &partition) {
     DenseSet<GlobalValue *> Preserve;
-    for (auto &GV : M.global_values()) {
-        if (!GV.isDeclaration()) {
-            if (partition.globals.count(GV.getName())) {
-                Preserve.insert(&GV);
-            }
+    for (auto &Name : partition.globals) {
+        auto *GV = M.getNamedValue(Name.first());
+        assert(GV && !GV->isDeclaration() && !GV->hasLocalLinkage());
+        if (!Name.second) {
+            // We skip partitioning for internal variables, so this has
+            // the same effect as putting it in preserve.
+            // This just avoids a hashtable lookup.
+            GV->setLinkage(GlobalValue::InternalLinkage);
+            assert(GV->hasDefaultVisibility());
+        } else {
+            Preserve.insert(GV);
         }
     }
+
     for (auto &F : M.functions()) {
-        if (!F.isDeclaration()) {
-            if (!Preserve.contains(&F)) {
-                F.deleteBody();
-                F.setLinkage(GlobalValue::ExternalLinkage);
-                F.setVisibility(GlobalValue::HiddenVisibility);
-                F.setDSOLocal(true);
-            }
+        if (F.isDeclaration())
+            continue;
+        if (F.hasLocalLinkage())
+            continue;
+        if (Preserve.contains(&F))
+            continue;
+        if (!canPartition(F)) {
+            F.setLinkage(GlobalValue::AvailableExternallyLinkage);
+            F.setVisibility(GlobalValue::HiddenVisibility);
+            F.setDSOLocal(true);
+            continue;
         }
+        F.deleteBody();
+        F.setLinkage(GlobalValue::ExternalLinkage);
+        F.setVisibility(GlobalValue::HiddenVisibility);
+        F.setDSOLocal(true);
     }
+
     for (auto &GV : M.globals()) {
-        if (!GV.isDeclaration()) {
-            if (!Preserve.contains(&GV)) {
-                GV.setInitializer(nullptr);
-                GV.setLinkage(GlobalValue::ExternalLinkage);
-                GV.setVisibility(GlobalValue::HiddenVisibility);
-                GV.setDSOLocal(true);
-            }
-        }
+        if (GV.isDeclaration())
+            continue;
+        if (Preserve.contains(&GV))
+            continue;
+        if (GV.hasLocalLinkage())
+            continue;
+        GV.setInitializer(nullptr);
+        GV.setLinkage(GlobalValue::ExternalLinkage);
+        GV.setVisibility(GlobalValue::HiddenVisibility);
+        if (GV.getDLLStorageClass() != GlobalValue::DLLStorageClassTypes::DefaultStorageClass)
+            continue; // Don't mess with exported or imported globals
+        GV.setDSOLocal(true);
     }
+
     // Global aliases are a pain to deal with. It is illegal to have an alias to a declaration,
     // so we need to replace them with either a function or a global variable declaration. However,
     // we can't just delete the alias, because that would break the users of the alias. Therefore,
@@ -1129,25 +1480,27 @@ static void materializePreserved(Module &M, Partition &partition) {
     // to deleting the old alias.
     SmallVector<std::pair<GlobalAlias *, GlobalValue *>> DeletedAliases;
     for (auto &GA : M.aliases()) {
-        if (!GA.isDeclaration()) {
-            if (!Preserve.contains(&GA)) {
-                if (GA.getValueType()->isFunctionTy()) {
-                    auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
-                    // This is an extremely sad hack to make sure the global alias never points to an extern function
-                    auto BB = BasicBlock::Create(M.getContext(), "", F);
-                    new UnreachableInst(M.getContext(), BB);
-                    GA.setAliasee(F);
-
-                    DeletedAliases.push_back({ &GA, F });
-                }
-                else {
-                    auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
-                    DeletedAliases.push_back({ &GA, GV });
-                }
-            }
+        assert(!GA.isDeclaration() && "Global aliases can't be declarations!"); // because LLVM says so
+        if (Preserve.contains(&GA))
+            continue;
+        if (GA.hasLocalLinkage())
+            continue;
+        if (GA.getValueType()->isFunctionTy()) {
+            auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
+            // This is an extremely sad hack to make sure the global alias never points to an extern function
+            auto BB = BasicBlock::Create(M.getContext(), "", F);
+            new UnreachableInst(M.getContext(), BB);
+            GA.setAliasee(F);
+            DeletedAliases.push_back({ &GA, F });
+        }
+        else {
+            auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
+            DeletedAliases.push_back({ &GA, GV });
         }
     }
+
     cantFail(M.materializeAll());
+
     for (auto &Deleted : DeletedAliases) {
         Deleted.second->takeName(Deleted.first);
         Deleted.first->replaceAllUsesWith(Deleted.second);
@@ -1162,8 +1515,8 @@ static void materializePreserved(Module &M, Partition &partition) {
 }
 
 // Reconstruct jl_fvars, jl_gvars, jl_fvars_idxs, and jl_gvars_idxs from the partition
-static void construct_vars(Module &M, Partition &partition) {
-    std::vector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
+static void construct_vars(Module &M, Partition &partition, StringRef suffix) {
+    SmallVector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
     fvar_pairs.reserve(partition.fvars.size());
     for (auto &fvar : partition.fvars) {
         auto F = M.getFunction(fvar.first());
@@ -1171,8 +1524,8 @@ static void construct_vars(Module &M, Partition &partition) {
         assert(!F->isDeclaration());
         fvar_pairs.push_back({ fvar.second, F });
     }
-    std::vector<GlobalValue *> fvars;
-    std::vector<uint32_t> fvar_idxs;
+    SmallVector<GlobalValue *, 0> fvars;
+    SmallVector<uint32_t, 0> fvar_idxs;
     fvars.reserve(fvar_pairs.size());
     fvar_idxs.reserve(fvar_pairs.size());
     std::sort(fvar_pairs.begin(), fvar_pairs.end());
@@ -1180,16 +1533,16 @@ static void construct_vars(Module &M, Partition &partition) {
         fvars.push_back(fvar.second);
         fvar_idxs.push_back(fvar.first);
     }
-    std::vector<std::pair<uint32_t, GlobalValue *>> gvar_pairs;
+    SmallVector<std::pair<uint32_t, GlobalValue *>, 0> gvar_pairs;
     gvar_pairs.reserve(partition.gvars.size());
     for (auto &gvar : partition.gvars) {
-        auto GV = M.getGlobalVariable(gvar.first());
+        auto GV = M.getNamedGlobal(gvar.first());
         assert(GV);
         assert(!GV->isDeclaration());
         gvar_pairs.push_back({ gvar.second, GV });
     }
-    std::vector<GlobalValue *> gvars;
-    std::vector<uint32_t> gvar_idxs;
+    SmallVector<Constant*, 0> gvars;
+    SmallVector<uint32_t, 0> gvar_idxs;
     gvars.reserve(gvar_pairs.size());
     gvar_idxs.reserve(gvar_pairs.size());
     std::sort(gvar_pairs.begin(), gvar_pairs.end());
@@ -1199,9 +1552,9 @@ static void construct_vars(Module &M, Partition &partition) {
     }
 
     // Now commit the fvars, gvars, and idxs
-    auto T_psize = M.getDataLayout().getIntPtrType(M.getContext())->getPointerTo();
-    emit_offset_table(M, fvars, "jl_fvars", T_psize);
-    emit_offset_table(M, gvars, "jl_gvars", T_psize);
+    auto T_size = M.getDataLayout().getIntPtrType(M.getContext());
+    emit_table(M, fvars, "jl_fvars", T_size->getPointerTo());
+    emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
     auto fidxs = ConstantDataArray::get(M.getContext(), fvar_idxs);
     auto fidxs_var = new GlobalVariable(M, fidxs->getType(), true,
                                         GlobalVariable::ExternalLinkage,
@@ -1211,23 +1564,15 @@ static void construct_vars(Module &M, Partition &partition) {
     auto gidxs = ConstantDataArray::get(M.getContext(), gvar_idxs);
     auto gidxs_var = new GlobalVariable(M, gidxs->getType(), true,
                                         GlobalVariable::ExternalLinkage,
-                                        gidxs, "jl_gvar_idxs");
+                                        gidxs, "jl_gvar_idxs" + suffix);
     gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
     gidxs_var->setDSOLocal(true);
 }
 
-// Materialization will leave many unused declarations, which multiversioning would otherwise clone.
-// This function removes them to avoid unnecessary cloning of declarations.
-// The GlobalDCEPass is much better at this, but we only care about removing unused
-// declarations, not actually about seeing if code is dead (codegen knows it is live, by construction).
-static void dropUnusedGlobals(Module &M) {
-    std::vector<GlobalValue *> unused;
-    for (auto &G : M.global_values()) {
-        if (G.isDeclaration() && G.use_empty())
-            unused.push_back(&G);
-    }
-    for (auto &G : unused)
-        G->eraseFromParent();
+extern "C" void lambda_trampoline(void* arg) {
+    std::function<void()>* func = static_cast<std::function<void()>*>(arg);
+    (*func)();
+    delete func;
 }
 
 // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
@@ -1248,7 +1593,6 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
         timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
         timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
         timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
-        timers[i].deletion.init("deletion_" + idx, "Delete dead declarations");
         timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
         timers[i].optimize.init("optimize_" + idx, "Optimize shard");
         timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
@@ -1276,7 +1620,17 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     // Single-threaded case
     if (threads == 1) {
         output_timer.startTimer();
-        outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+        {
+            JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+            // convert gvars to the expected offset table format for shard 0
+            if (M.getGlobalVariable("jl_gvars")) {
+                auto gvars = consume_gv<Constant>(M, "jl_gvars", false);
+                Type *T_size = M.getDataLayout().getIntPtrType(M.getContext());
+                emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix"
+                M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0");
+            }
+            outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+        }
         output_timer.stopTimer();
         // Don't need M anymore
         module_released(M);
@@ -1314,40 +1668,49 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     output_timer.startTimer();
 
     // Start all of the worker threads
-    std::vector<std::thread> workers(threads);
-    for (unsigned i = 0; i < threads; i++) {
-        workers[i] = std::thread([&, i]() {
-            LLVMContext ctx;
-            // Lazily deserialize the entire module
-            timers[i].deserialize.startTimer();
-            auto M = cantFail(getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx), "Error loading module");
-            timers[i].deserialize.stopTimer();
-
-            timers[i].materialize.startTimer();
-            materializePreserved(*M, partitions[i]);
-            timers[i].materialize.stopTimer();
-
-            timers[i].construct.startTimer();
-            construct_vars(*M, partitions[i]);
-            M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), "_" + std::to_string(i)));
-            // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
-            // or it may skip emitting debug info for that file. Here set it to ./julia#N
-            DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
-            for (DICompileUnit *CU : M->debug_compile_units())
-                CU->replaceOperandWith(0, topfile);
-            timers[i].construct.stopTimer();
-
-            timers[i].deletion.startTimer();
-            dropUnusedGlobals(*M);
-            timers[i].deletion.stopTimer();
-
-            outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
-        });
-    }
-
-    // Wait for all of the worker threads to finish
-    for (auto &w : workers)
-        w.join();
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+        std::vector<uv_thread_t> workers(threads);
+        for (unsigned i = 0; i < threads; i++) {
+            std::function<void()> func = [&, i]() {
+                LLVMContext ctx;
+                ctx.setDiscardValueNames(true);
+                // Lazily deserialize the entire module
+                timers[i].deserialize.startTimer();
+                auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
+                // Make sure this also fails with only julia, but not LLVM assertions enabled,
+                // otherwise, the first error we hit is the LLVM module verification failure,
+                // which will look very confusing, because the module was partially deserialized.
+                bool deser_succeeded = (bool)EM;
+                auto M = cantFail(std::move(EM), "Error loading module");
+                assert(deser_succeeded); (void)deser_succeeded;
+                timers[i].deserialize.stopTimer();
+
+                timers[i].materialize.startTimer();
+                materializePreserved(*M, partitions[i]);
+                timers[i].materialize.stopTimer();
+
+                timers[i].construct.startTimer();
+                std::string suffix = "_" + std::to_string(i);
+                construct_vars(*M, partitions[i], suffix);
+                M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix));
+                // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
+                // or it may skip emitting debug info for that file. Here set it to ./julia#N
+                DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
+                for (DICompileUnit *CU : M->debug_compile_units())
+                    CU->replaceOperandWith(0, topfile);
+                timers[i].construct.stopTimer();
+
+                outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
+            };
+            auto arg = new std::function<void()>(func);
+            uv_thread_create(&workers[i], lambda_trampoline, arg); // Use libuv thread to avoid issues with stack sizes
+        }
+
+        // Wait for all of the worker threads to finish
+        for (unsigned i = 0; i < threads; i++)
+            uv_thread_join(&workers[i]);
+    }
 
     output_timer.stopTimer();
 
@@ -1372,12 +1735,21 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
     return outputs;
 }
 
+extern int jl_is_timing_passes;
 static unsigned compute_image_thread_count(const ModuleInfo &info) {
     // 32-bit systems are very memory-constrained
 #ifdef _P32
     LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
     return 1;
 #endif
+    if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417
+        return 1;
+    // COFF has limits on external symbols (even hidden) up to 65536. We reserve the last few
+    // for any of our other symbols that we insert during compilation.
+    if (info.triple.isOSBinFormatCOFF() && info.globals > 64000) {
+        LLVM_DEBUG(dbgs() << "COFF is restricted to a single thread for large images\n");
+        return 1;
+    }
     // This is not overridable because empty modules do occasionally appear, but they'll be very small and thus exit early to
     // known easy behavior. Plus they really don't warrant multiple threads
     if (info.weight < 1000) {
@@ -1427,13 +1799,16 @@ static unsigned compute_image_thread_count(const ModuleInfo &info) {
     return threads;
 }
 
+jl_emission_params_t default_emission_params = { 1 };
+
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
-        ios_t *z, ios_t *s)
+        ios_t *z, ios_t *s,
+        jl_emission_params_t *params)
 {
     JL_TIMING(NATIVE_AOT, NATIVE_Dump);
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -1442,6 +1817,11 @@ void jl_dump_native_impl(void *native_code,
         delete data;
         return;
     }
+
+    if (!params) {
+        params = &default_emission_params;
+    }
+
     // We don't want to use MCJIT's target machine because
     // it uses the large code model and we may potentially
     // want less optimizations there.
@@ -1451,16 +1831,24 @@ void jl_dump_native_impl(void *native_code,
         TheTriple.setObjectFormat(Triple::COFF);
     } else if (TheTriple.isOSDarwin()) {
         TheTriple.setObjectFormat(Triple::MachO);
-        TheTriple.setOS(llvm::Triple::MacOSX);
-    }
-    Optional<Reloc::Model> RelocModel;
-    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+        SmallString<16> Str;
+        Str += "macosx";
+        if (TheTriple.isAArch64())
+            Str += "11.0.0"; // Update this if MACOSX_VERSION_MIN changes
+        else
+            Str += "10.14.0";
+        TheTriple.setOSName(Str);
+    }
+    std::optional<Reloc::Model> RelocModel;
+    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) {
         RelocModel = Reloc::PIC_;
     }
+
     CodeModel::Model CMModel = CodeModel::Small;
-    if (TheTriple.isPPC()) {
-        // On PPC the small model is limited to 16bit offsets
-        CMModel = CodeModel::Medium;
+    if (TheTriple.isPPC() || TheTriple.isRISCV() ||
+        (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())) {
+        // On PPC the small model is limited to 16bit offsets. For very large images the small code model
+        CMModel = CodeModel::Medium; //  isn't good enough on x86 so use Medium, it has no cost because only the image goes in .ldata
     }
     std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
@@ -1470,8 +1858,13 @@ void jl_dump_native_impl(void *native_code,
             jl_ExecutionEngine->getTargetOptions(),
             RelocModel,
             CMModel,
+#if JL_LLVM_VERSION >= 180000
+            CodeGenOptLevel::Aggressive // -O3 TODO: respect command -O0 flag?
+#else
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
+#endif
             ));
+    fixupTM(*SourceTM);
     auto DL = jl_create_datalayout(*SourceTM);
     std::string StackProtectorGuard;
     unsigned OverrideStackAlignment;
@@ -1488,7 +1881,9 @@ void jl_dump_native_impl(void *native_code,
     SmallVector<AOTOutputs, 16> data_outputs;
     SmallVector<AOTOutputs, 16> metadata_outputs;
     if (z) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Sysimg);
         LLVMContext Context;
+        Context.setDiscardValueNames(true);
         Module sysimgM("sysimg", Context);
         sysimgM.setTargetTriple(TheTriple.str());
         sysimgM.setDataLayout(DL);
@@ -1500,6 +1895,12 @@ void jl_dump_native_impl(void *native_code,
                                      GlobalVariable::ExternalLinkage,
                                      data, "jl_system_image_data");
         sysdata->setAlignment(Align(64));
+#if JL_LLVM_VERSION >= 180000
+        sysdata->setCodeModel(CodeModel::Large);
+#else
+        if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
+            sysdata->setSection(".ldata");
+#endif
         addComdat(sysdata, TheTriple);
         Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
         addComdat(new GlobalVariable(sysimgM, len->getType(), true,
@@ -1515,8 +1916,7 @@ void jl_dump_native_impl(void *native_code,
         sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {});
     }
 
-    bool imaging_mode = imaging_default() || jl_options.outputo;
-
+    const bool imaging_mode = true;
     unsigned threads = 1;
     unsigned nfvars = 0;
     unsigned ngvars = 0;
@@ -1526,12 +1926,29 @@ void jl_dump_native_impl(void *native_code,
     bool has_veccall = false;
 
     data->M.withModuleDo([&](Module &dataM) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Setup);
         dataM.setTargetTriple(TheTriple.str());
         dataM.setDataLayout(DL);
+        dataM.setPICLevel(PICLevel::BigPIC);
         auto &Context = dataM.getContext();
 
         Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo();
 
+        // This should really be in jl_create_native, but we haven't
+        // yet set the target triple binary format correctly at that
+        // point. This should be resolved when we start JITting for
+        // COFF when we switch over to JITLink.
+        for (auto &GA : dataM.aliases()) {
+            // Global aliases are only used for ccallable things, so we should
+            // mark them as dllexport
+            addComdat(&GA, TheTriple);
+        }
+
+        // Wipe the global initializers, we'll reset them at load time
+        for (auto gv : data->jl_sysimg_gvars) {
+            cast<GlobalVariable>(gv)->setInitializer(Constant::getNullValue(gv->getValueType()));
+        }
+
         // add metadata information
         if (imaging_mode) {
             multiversioning_preannotate(dataM);
@@ -1560,9 +1977,9 @@ void jl_dump_native_impl(void *native_code,
             LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n");
             nfvars = data->jl_sysimg_fvars.size();
             ngvars = data->jl_sysimg_gvars.size();
-            emit_offset_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
-            emit_offset_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
-            std::vector<uint32_t> idxs;
+            emit_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
+            emit_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
+            SmallVector<uint32_t, 0> idxs;
             idxs.resize(data->jl_sysimg_gvars.size());
             std::iota(idxs.begin(), idxs.end(), 0);
             auto gidxs = ConstantDataArray::get(Context, idxs);
@@ -1582,22 +1999,13 @@ void jl_dump_native_impl(void *native_code,
             fidxs_var->setDSOLocal(true);
             dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0"));
 
-            // reflect the address of the jl_RTLD_DEFAULT_handle variable
-            // back to the caller, so that we can check for consistency issues
-            GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&dataM);
-            addComdat(new GlobalVariable(dataM,
-                                        jlRTLD_DEFAULT_var->getType(),
-                                        true,
-                                        GlobalVariable::ExternalLinkage,
-                                        jlRTLD_DEFAULT_var,
-                                        "jl_RTLD_DEFAULT_handle_pointer"), TheTriple);
-
             // let the compiler know we are going to internalize a copy of this,
             // if it has a current usage with ExternalLinkage
-            auto small_typeof_copy = dataM.getGlobalVariable("small_typeof");
-            if (small_typeof_copy) {
-                small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
-                small_typeof_copy->setDSOLocal(true);
+            auto jl_small_typeof_copy = dataM.getGlobalVariable("jl_small_typeof");
+            if (jl_small_typeof_copy) {
+                jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+                jl_small_typeof_copy->setDSOLocal(true);
+                jl_small_typeof_copy->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DefaultStorageClass);
             }
         }
 
@@ -1615,17 +2023,31 @@ void jl_dump_native_impl(void *native_code,
         data_outputs = compile(*dataM, "text", threads, [data](Module &) { delete data; });
     }
 
-    {
+    if (params->emit_metadata) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Metadata);
         LLVMContext Context;
+        Context.setDiscardValueNames(true);
         Module metadataM("metadata", Context);
         metadataM.setTargetTriple(TheTriple.str());
         metadataM.setDataLayout(DL);
         metadataM.setStackProtectorGuard(StackProtectorGuard);
         metadataM.setOverrideStackAlignment(OverrideStackAlignment);
 
+        // reflect the address of the jl_RTLD_DEFAULT_handle variable
+        // back to the caller, so that we can check for consistency issues
+        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&metadataM);
+
         Type *T_size = DL.getIntPtrType(Context);
         Type *T_psize = T_size->getPointerTo();
 
+        auto FT = FunctionType::get(Type::getInt8Ty(Context)->getPointerTo()->getPointerTo(), {}, false);
+        auto F = Function::Create(FT, Function::ExternalLinkage, "get_jl_RTLD_DEFAULT_handle_addr", metadataM);
+        llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
+        builder.CreateRet(jlRTLD_DEFAULT_var);
+        F->setLinkage(GlobalValue::ExternalLinkage);
+        if (TheTriple.isOSBinFormatCOFF())
+            F->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
+
         if (TheTriple.isOSWindows()) {
             // Windows expect that the function `_DllMainStartup` is present in an dll.
             // Normal compilers use something like Zig's crtdll.c instead we provide a
@@ -1642,7 +2064,7 @@ void jl_dump_native_impl(void *native_code,
         if (imaging_mode) {
             auto specs = jl_get_llvm_clone_targets();
             const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
-            std::vector<uint8_t> data;
+            SmallVector<uint8_t, 0> data;
             auto push_i32 = [&] (uint32_t v) {
                 uint8_t buff[4];
                 memcpy(buff, &v, 4);
@@ -1661,13 +2083,13 @@ void jl_dump_native_impl(void *native_code,
             auto shards = emit_shard_table(metadataM, T_size, T_psize, threads);
             auto ptls = emit_ptls_table(metadataM, T_size, T_psize);
             auto header = emit_image_header(metadataM, threads, nfvars, ngvars);
-            auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*));
-            auto small_typeof_copy = new GlobalVariable(metadataM, AT, false,
+            auto AT = ArrayType::get(T_size, sizeof(jl_small_typeof) / sizeof(void*));
+            auto jl_small_typeof_copy = new GlobalVariable(metadataM, AT, false,
                                                         GlobalVariable::ExternalLinkage,
                                                         Constant::getNullValue(AT),
-                                                        "small_typeof");
-            small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
-            small_typeof_copy->setDSOLocal(true);
+                                                        "jl_small_typeof");
+            jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+            jl_small_typeof_copy->setDSOLocal(true);
             AT = ArrayType::get(T_psize, 5);
             auto pointers = new GlobalVariable(metadataM, AT, false,
                                             GlobalVariable::ExternalLinkage,
@@ -1675,7 +2097,7 @@ void jl_dump_native_impl(void *native_code,
                                                     ConstantExpr::getBitCast(header, T_psize),
                                                     ConstantExpr::getBitCast(shards, T_psize),
                                                     ConstantExpr::getBitCast(ptls, T_psize),
-                                                    ConstantExpr::getBitCast(small_typeof_copy, T_psize),
+                                                    ConstantExpr::getBitCast(jl_small_typeof_copy, T_psize),
                                                     ConstantExpr::getBitCast(target_ids, T_psize)
                                             }),
                                             "jl_image_pointers");
@@ -1690,10 +2112,18 @@ void jl_dump_native_impl(void *native_code,
         metadata_outputs = compile(metadataM, "data", 1, [](Module &) {});
     }
 
-    object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Write);
+
+        object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+#if JL_LLVM_VERSION >= 180000
+#define WritingMode SymtabWritingMode::NormalSymtab
+#else
+#define WritingMode true
+#endif
 #define WRITE_ARCHIVE(fname, field, prefix, suffix) \
     if (fname) {\
-        std::vector<NewArchiveMember> archive; \
+        SmallVector<NewArchiveMember, 0> archive; \
         SmallVector<std::string, 16> filenames; \
         SmallVector<StringRef, 16> buffers; \
         for (size_t i = 0; i < threads; i++) { \
@@ -1709,432 +2139,137 @@ void jl_dump_native_impl(void *native_code,
         for (size_t i = 0; i < filenames.size(); i++) { \
             archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
         } \
-        handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \
+        handleAllErrors(writeArchive(fname, archive, WritingMode, Kind, true, false), reportWriterError); \
     }
 
-    WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
-    WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
-    WRITE_ARCHIVE(obj_fname, obj, "", ".o");
-    WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
-}
-
-void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
-{
-    PM->add(new TargetLibraryInfoWrapperPass(triple));
-    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
+        WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
+        WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
+        WRITE_ARCHIVE(obj_fname, obj, "", ".o");
+        WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
+#undef WRITE_ARCHIVE
+    }
 }
 
 
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel)
+// sometimes in GDB you want to find out what code would be created from a mi
+extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
 {
-    // TODO: don't do this on CPUs that natively support Float16
-    PM->add(createDemoteFloat16Pass());
-    if (optlevel > 1)
-        PM->add(createGVNPass());
-}
+    jl_llvmf_dump_t llvmf_dump;
+    size_t world = jl_current_task->world_age;
+    JL_STREAM *stream = (JL_STREAM*)STDERR_FILENO;
 
-// this defines the set of optimization passes defined for Julia at various optimization levels.
-// it assumes that the TLI and TTI wrapper passes have already been added.
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
-                           bool lower_intrinsics, bool dump_native,
-                           bool external_use)
-{
-    // Note: LLVM 12 disabled the hoisting of common instruction
-    //       before loop vectorization (https://reviews.llvm.org/D84108).
-    //
-    // TODO: CommonInstruction hoisting/sinking enables AllocOpt
-    //       to merge allocations and sometimes eliminate them,
-    //       since AllocOpt does not handle PhiNodes.
-    //       Enable this instruction hoisting because of this and Union benchmarks.
-    auto basicSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true);
-    auto aggressiveSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true)
-        //These mess with loop rotation, so only do them after that
-        .hoistCommonInsts(true)
-        // Causes an SRET assertion error in late-gc-lowering
-        // .sinkCommonInsts(true)
-        ;
-#ifdef JL_DEBUG_BUILD
-    PM->add(createGCInvariantVerifierPass(true));
-    PM->add(createVerifierPass());
-#endif
+    jl_code_info_t *src = jl_gdbcodetyped1(mi, world);
+    JL_GC_PUSH1(&src);
 
-    PM->add(createConstantMergePass());
-    if (opt_level < 2) {
-        if (!dump_native) {
-            // we won't be multiversioning, so lower CPU feature checks early on
-            // so that we can avoid an additional CFG simplification pass at the end.
-            PM->add(createCPUFeaturesPass());
-            if (opt_level == 1)
-                PM->add(createInstSimplifyLegacyPass());
-        }
-        PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-        if (opt_level == 1) {
-            PM->add(createSROAPass());
-            PM->add(createInstructionCombiningPass());
-            PM->add(createEarlyCSEPass());
-            // maybe add GVN?
-            // also try GVNHoist and GVNSink
-        }
-        PM->add(createMemCpyOptPass());
-        PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (lower_intrinsics) {
-            PM->add(createBarrierNoopPass());
-            PM->add(createLowerExcHandlersPass());
-            PM->add(createGCInvariantVerifierPass(false));
-            PM->add(createRemoveNIPass());
-            PM->add(createLateLowerGCFramePass());
-            PM->add(createFinalLowerGCPass());
-            PM->add(createLowerPTLSPass(dump_native));
-        }
-        else {
-            PM->add(createRemoveNIPass());
-        }
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (dump_native) {
-            PM->add(createMultiVersioningPass(external_use));
-            PM->add(createCPUFeaturesPass());
-            // minimal clean-up to get rid of CPU feature checks
-            if (opt_level == 1) {
-                PM->add(createInstSimplifyLegacyPass());
-                PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-            }
-        }
-#if JL_LLVM_VERSION < 150000
-#if defined(_COMPILER_ASAN_ENABLED_)
-        PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-        PM->add(createMemorySanitizerLegacyPassPass());
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-        PM->add(createThreadSanitizerLegacyPassPass());
-#endif
-#endif
-        return;
-    }
-    PM->add(createPropagateJuliaAddrspaces());
-    PM->add(createScopedNoAliasAAWrapperPass());
-    PM->add(createTypeBasedAAWrapperPass());
-    if (opt_level >= 3) {
-        PM->add(createBasicAAWrapperPass());
-    }
-
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    PM->add(createDeadCodeEliminationPass());
-    PM->add(createSROAPass());
-
-    //PM->add(createMemCpyOptPass());
-
-    PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-
-    // Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
-    // merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
-    // pass.
-    PM->add(createAllocOptPass());
-    // consider AggressiveInstCombinePass at optlevel > 2
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    if (dump_native)
-        PM->add(createMultiVersioningPass(external_use));
-    PM->add(createCPUFeaturesPass());
-    PM->add(createSROAPass());
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createJumpThreadingPass());
-    PM->add(createCorrelatedValuePropagationPass());
-
-    PM->add(createReassociatePass());
-
-    PM->add(createEarlyCSEPass());
-
-    // Load forwarding above can expose allocations that aren't actually used
-    // remove those before optimizing loops.
-    PM->add(createAllocOptPass());
-    PM->add(createLoopRotatePass());
-    // moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
-#ifdef USE_POLLY
-    // LCSSA (which has already run at this point due to the dependencies of the
-    // above passes) introduces redundant phis that hinder Polly. Therefore we
-    // run InstCombine here to remove them.
-    PM->add(createInstructionCombiningPass());
-    PM->add(polly::createCodePreparationPass());
-    polly::registerPollyPasses(*PM);
-    PM->add(polly::createCodegenCleanupPass());
-#endif
-    // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
-    PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-#if JL_LLVM_VERSION >= 150000
-    PM->add(createSimpleLoopUnswitchLegacyPass());
-#else
-    PM->add(createLoopUnswitchPass());
-#endif
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come before indvars
-    // Subsequent passes not stripping metadata from terminator
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createLoopIdiomPass());
-    PM->add(createIndVarSimplifyPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createSimpleLoopUnrollPass());
-
-    // Run our own SROA on heap objects before LLVM's
-    PM->add(createAllocOptPass());
-    // Re-run SROA after loop-unrolling (useful for small loops that operate,
-    // over the structure of an aggregate)
-    PM->add(createSROAPass());
-    // might not be necessary:
-    PM->add(createInstSimplifyLegacyPass());
-
-    PM->add(createGVNPass());
-    PM->add(createMemCpyOptPass());
-    PM->add(createSCCPPass());
-
-    //These next two passes must come before IRCE to eliminate the bounds check in #43308
-    PM->add(createCorrelatedValuePropagationPass());
-    PM->add(createDeadCodeEliminationPass());
-
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come between the two GVN passes
-
-    // Run instcombine after redundancy elimination to exploit opportunities
-    // opened up by them.
-    // This needs to be InstCombine instead of InstSimplify to allow
-    // loops over Union-typed arrays to vectorize.
-    PM->add(createInstructionCombiningPass());
-    PM->add(createJumpThreadingPass());
-    if (opt_level >= 3) {
-        PM->add(createGVNPass()); // Must come after JumpThreading and before LoopVectorize
-    }
-    PM->add(createDeadStoreEliminationPass());
-    // see if all of the constant folding has exposed more loops
-    // to simplification and deletion
-    // this helps significantly with cleaning up iteration
-    PM->add(createCFGSimplificationPass(aggressiveSimplifyCFGOptions));
-
-    // More dead allocation (store) deletion before loop optimization
-    // consider removing this:
-    // Moving this after aggressive CFG simplification helps deallocate when allocations are hoisted
-    PM->add(createAllocOptPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createInstructionCombiningPass());
-    PM->add(createLoopVectorizePass());
-    PM->add(createLoopLoadEliminationPass());
-    // Cleanup after LV pass
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass( // Aggressive CFG simplification
-        aggressiveSimplifyCFGOptions
-    ));
-    PM->add(createSLPVectorizerPass());
-    // might need this after LLVM 11:
-    //PM->add(createVectorCombinePass());
-
-    PM->add(createAggressiveDCEPass());
-
-    if (lower_intrinsics) {
-        // LowerPTLS removes an indirect call. As a result, it is likely to trigger
-        // LLVM's devirtualization heuristics, which would result in the entire
-        // pass pipeline being re-executed. Prevent this by inserting a barrier.
-        PM->add(createBarrierNoopPass());
-        PM->add(createLowerExcHandlersPass());
-        PM->add(createGCInvariantVerifierPass(false));
-        // Needed **before** LateLowerGCFrame on LLVM < 12
-        // due to bug in `CreateAlignmentAssumption`.
-        PM->add(createRemoveNIPass());
-        PM->add(createLateLowerGCFramePass());
-        PM->add(createFinalLowerGCPass());
-        // We need these two passes and the instcombine below
-        // after GC lowering to let LLVM do some constant propagation on the tags.
-        // and remove some unnecessary write barrier checks.
-        PM->add(createGVNPass());
-        PM->add(createSCCPPass());
-        // Remove dead use of ptls
-        PM->add(createDeadCodeEliminationPass());
-        PM->add(createLowerPTLSPass(dump_native));
-        PM->add(createInstructionCombiningPass());
-        // Clean up write barrier and ptls lowering
-        PM->add(createCFGSimplificationPass());
+    jl_printf(stream, "---- dumping IR for ----\n");
+    jl_static_show(stream, (jl_value_t*)mi);
+    jl_printf(stream, "\n----\n");
+
+    jl_printf(stream, "\n---- unoptimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, false, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    else {
-        PM->add(createRemoveNIPass());
+    jl_printf(stream, "\n----\n");
+
+    jl_printf(stream, "\n---- optimized IR ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-    PM->add(createCombineMulAddPass());
-    PM->add(createDivRemPairsPass());
-#if JL_LLVM_VERSION < 150000
-#if defined(_COMPILER_ASAN_ENABLED_)
-    PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-    PM->add(createMemorySanitizerLegacyPassPass());
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-    PM->add(createThreadSanitizerLegacyPassPass());
-#endif
-#endif
-}
+    jl_printf(stream, "\n----\n");
 
-// An LLVM module pass that just runs all julia passes in order. Useful for
-// debugging
-template <int OptLevel, bool dump_native>
-class JuliaPipeline : public Pass {
-public:
-    static char ID;
-    // A bit of a hack, but works
-    struct TPMAdapter : public PassManagerBase {
-        PMTopLevelManager *TPM;
-        TPMAdapter(PMTopLevelManager *TPM) : TPM(TPM) {}
-        void add(Pass *P) { TPM->schedulePass(P); }
-    };
-    void preparePassManager(PMStack &Stack) override {
-        (void)jl_init_llvm();
-        PMTopLevelManager *TPM = Stack.top()->getTopLevelManager();
-        TPMAdapter Adapter(TPM);
-        addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-        addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true);
-        addMachinePasses(&Adapter, OptLevel);
-    }
-    JuliaPipeline() : Pass(PT_PassManager, ID) {}
-    Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override {
-        return createPrintModulePass(O, Banner);
+    jl_printf(stream, "\n---- assembly ----\n");
+    jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_asm(&llvmf_dump, 0, "", "source", 0, true);
+        if (ir != NULL && jl_is_string(ir))
+            jl_printf(stream, "%s", jl_string_data(ir));
     }
-};
-template<> char JuliaPipeline<0,false>::ID = 0;
-template<> char JuliaPipeline<2,false>::ID = 0;
-template<> char JuliaPipeline<3,false>::ID = 0;
-template<> char JuliaPipeline<0,true>::ID = 0;
-template<> char JuliaPipeline<2,true>::ID = 0;
-template<> char JuliaPipeline<3,true>::ID = 0;
-static RegisterPass<JuliaPipeline<0,false>> X("juliaO0", "Runs the entire julia pipeline (at -O0)", false, false);
-static RegisterPass<JuliaPipeline<2,false>> Y("julia", "Runs the entire julia pipeline (at -O2)", false, false);
-static RegisterPass<JuliaPipeline<3,false>> Z("juliaO3", "Runs the entire julia pipeline (at -O3)", false, false);
-
-static RegisterPass<JuliaPipeline<0,true>> XS("juliaO0-sysimg", "Runs the entire julia pipeline (at -O0/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<2,true>> YS("julia-sysimg", "Runs the entire julia pipeline (at -O2/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<3,true>> ZS("juliaO3-sysimg", "Runs the entire julia pipeline (at -O3/sysimg mode)", false, false);
+    jl_printf(stream, "\n----\n");
+    JL_GC_POP();
 
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
-    addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics);
+    return src;
 }
 
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
-// misuse will leak memory or cause read-after-free
+// This is paired with jl_dump_function_ir and jl_dump_function_asm, either of which will free all memory allocated here
 extern "C" JL_DLLEXPORT_CODEGEN
-void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
+void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params)
 {
-    if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
-            mi->def.method->generator == NULL) {
-        // not a generic function
-        dump->F = NULL;
-        return;
-    }
-
-    // get the source code for this function
-    jl_value_t *jlrettype = (jl_value_t*)jl_any_type;
-    jl_code_info_t *src = NULL;
-    jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH3(&src, &jlrettype, &codeinst);
-    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && mi->def.method->source != jl_nothing && jl_ir_flag_inferred(mi->def.method->source)) {
-        src = (jl_code_info_t*)mi->def.method->source;
-        if (src && !jl_is_code_info(src))
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-    }
-    else {
-        jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
-        if (ci != jl_nothing) {
-            codeinst = (jl_code_instance_t*)ci;
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-            if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-            jlrettype = codeinst->rettype;
-            codeinst = NULL; // not needed outside of this branch
-        }
-        if (!src || (jl_value_t*)src == jl_nothing) {
-            src = jl_type_infer(mi, world, 0);
-            if (src)
-                jlrettype = src->rettype;
-            else if (jl_is_method(mi->def.method)) {
-                src = mi->def.method->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)mi->def.method->source;
-                if (src && (jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-            }
-            // TODO: use mi->uninferred
-        }
-    }
-
     // emit this function into a new llvm module
+    dump->F = nullptr;
+    dump->TSM = nullptr;
     if (src && jl_is_code_info(src)) {
-        auto ctx = jl_ExecutionEngine->getContext();
-        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx, imaging_default());
-        uint64_t compiler_start_time = 0;
-        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-        if (measure_compile_time_enabled)
-            compiler_start_time = jl_hrtime();
-        JL_LOCK(&jl_codegen_lock);
-        auto target_info = m.withModuleDo([&](Module &M) {
-            return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
-        });
-        jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
-        output.world = world;
-        output.params = &params;
-        output.imaging = imaging_default();
-        // This would be nice, but currently it causes some assembly regressions that make printed output
-        // differ very significantly from the actual non-imaging mode code.
-        // // Force imaging mode for names of pointers
-        // output.imaging = true;
-        // This would also be nice, but it seems to cause OOMs on the windows32 builder
-        // // Force at least medium debug info for introspection
-        // No debug info = no variable names,
-        // max debug info = llvm.dbg.declare/value intrinsics which clutter IR output
-        output.debug_level = jl_options.debug_level;
-        auto decls = jl_emit_code(m, mi, src, jlrettype, output);
-        JL_UNLOCK(&jl_codegen_lock); // Might GC
-
-        Function *F = NULL;
-        if (m) {
-            // if compilation succeeded, prepare to return the result
-            // For imaging mode, global constants are currently private without initializer
-            // which isn't legal. Convert them to extern linkage so that the code can compile
-            // and will better match what's actually in sysimg.
-            for (auto &global : output.globals)
-                global.second->setLinkage(GlobalValue::ExternalLinkage);
-            assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
-            if (optimize) {
-#ifndef JL_USE_NEW_PM
-                legacy::PassManager PM;
-                addTargetPasses(&PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-                addOptimizationPasses(&PM, jl_options.opt_level);
-                addMachinePasses(&PM, jl_options.opt_level);
-#else
-                NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level)};
-#endif
-                //Safe b/c context lock is held by output
-                PM.run(*m.getModuleUnlocked());
-                assert(!verifyModule(*m.getModuleUnlocked(), &errs()));
+        auto ctx = jl_ExecutionEngine->makeContext();
+        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx);
+        Function *F = nullptr;
+        {
+            uint64_t compiler_start_time = 0;
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
+                compiler_start_time = jl_hrtime();
+            auto target_info = m.withModuleDo([&](Module &M) {
+                return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+            });
+            jl_codegen_params_t output(ctx, std::move(target_info.first), std::move(target_info.second));
+            output.params = &params;
+            output.imaging_mode = jl_options.image_codegen;
+            output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+            JL_GC_PUSH1(&output.temporary_roots);
+            auto decls = jl_emit_code(m, mi, src, NULL, output);
+            output.temporary_roots = nullptr;
+            JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it
+
+            if (m) {
+                // if compilation succeeded, prepare to return the result
+                // Similar to jl_link_global from jitlayers.cpp,
+                // so that code_llvm shows similar codegen to the jit
+                for (auto &global : output.global_targets) {
+                    if (jl_options.image_codegen) {
+                        global.second->setLinkage(GlobalValue::ExternalLinkage);
+                    }
+                    else {
+                        auto p = literal_static_pointer_val(global.first, global.second->getValueType());
+                        Type *elty = PointerType::get(output.getContext(), 0);
+                        // For pretty printing, when LLVM inlines the global initializer into its loads
+                        auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
+                        global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
+                        global.second->setConstant(true);
+                        global.second->setLinkage(GlobalValue::PrivateLinkage);
+                        global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                        global.second->setVisibility(GlobalValue::DefaultVisibility);
+                    }
+                }
+                if (!jl_options.image_codegen) {
+                    optimizeDLSyms(*m.getModuleUnlocked());
+                }
+                assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                if (optimize) {
+                    NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level)};
+                    //Safe b/c context lock is held by output
+                    PM.run(*m.getModuleUnlocked());
+                    assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                }
+                const std::string *fname;
+                if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
+                    getwrapper = false;
+                if (!getwrapper)
+                    fname = &decls.specFunctionObject;
+                else
+                    fname = &decls.functionObject;
+                F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
+            }
+            if (measure_compile_time_enabled) {
+                auto end = jl_hrtime();
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
             }
-            const std::string *fname;
-            if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
-                getwrapper = false;
-            if (!getwrapper)
-                fname = &decls.specFunctionObject;
-            else
-                fname = &decls.functionObject;
-            F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
-        }
-        JL_GC_POP();
-        if (measure_compile_time_enabled) {
-            auto end = jl_hrtime();
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
         }
         if (F) {
             dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
@@ -2142,7 +2277,4 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
             return;
         }
     }
-
-    const char *mname = name_from_method_instance(mi);
-    jl_errorf("unable to compile source for function %s", mname);
 }
diff --git a/src/array.c b/src/array.c
index 5226c729d32e7..da9cb24b4d0e9 100644
--- a/src/array.c
+++ b/src/array.c
@@ -16,200 +16,28 @@
 extern "C" {
 #endif
 
-#define JL_ARRAY_IMPL_NUL 1
-
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
-
-static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
-{
-    // array can assume more alignment than a field would normally have
-    assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
-    if (hasptr) {
-        size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void* const*)src, nptr);
-        jl_gc_multi_wb(parent, src);
-    }
-    else {
-        switch (nb) {
-        case  0: break;
-        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
-        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
-        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
-        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
-        case 16:
-            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
-            break;
-        default: memcpy(dst, src, nb);
-        }
-    }
-}
-
-static inline void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
-{
-    if (hasptr)
-        memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
-    else
-        memmove(dst, src, nb);
-}
-
-// array constructors ---------------------------------------------------------
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    assert(jl_array_isbitsunion(a));
-    return ((char*)jl_array_data(a)) + ((jl_array_ndims(a) == 1 ? (a->maxsize - a->offset) : jl_array_len(a)) * a->elsize) + a->offset;
-}
-
-STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        assert(jl_is_string(a) || a->flags.how != 3);
-    }
-    return (jl_value_t*)a;
-}
-
-#if defined(_P64) && defined(UINT128MAX)
-typedef __uint128_t wideint_t;
-#else
-typedef uint64_t wideint_t;
-#endif
-
 #define MAXINTVAL (((size_t)-1)>>1)
 
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz)
+JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
 {
     size_t i;
     size_t _nel = 1;
-    for(i=0; i < ndims; i++) {
+    for (i = 0; i < ndims; i++) {
         size_t di = dims[i];
-        wideint_t prod = (wideint_t)_nel * (wideint_t)di;
-        if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
+        int overflow = __builtin_mul_overflow(_nel, di, &_nel);
+        if (overflow || di >= MAXINTVAL)
             return 1;
-        _nel = prod;
     }
-    wideint_t prod = (wideint_t)elsz * (wideint_t)_nel;
-    if (prod >= (wideint_t) MAXINTVAL)
-        return 2;
     *nel = _nel;
-    *tot = (size_t)prod;
     return 0;
 }
 
-static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                               int8_t isunboxed, int8_t hasptr, int8_t isunion, int8_t zeroinit, size_t elsz)
-{
-    jl_task_t *ct = jl_current_task;
-    size_t i, tot, nel;
-    void *data;
-    jl_array_t *a;
-    assert(isunboxed || elsz == sizeof(void*));
-    assert(atype == NULL || isunion == jl_is_uniontype(jl_tparam0(atype)));
-    int validated = jl_array_validate_dims(&nel, &tot, ndims, dims, elsz);
-    if (validated == 1)
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    else if (validated == 2)
-        jl_error("invalid Array size");
-    if (isunboxed) {
-        if (elsz == 1 && !isunion) {
-            // extra byte for all julia allocated byte arrays
-            tot++;
-        }
-        if (isunion) {
-            // an extra byte for each isbits union array element, stored after a->maxsize
-            tot += nel;
-        }
-    }
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    if (tot <= ARRAY_INLINE_NBYTES) {
-        // align data area
-        if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
-        else if (isunboxed && elsz >= 4)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
-        size_t doffs = tsz;
-        tsz += tot;
-        // jl_array_t is large enough that objects will always be aligned 16
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        assert(((size_t)a & 15) == 0);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 0;
-        data = (char*)a + doffs;
-    }
-    else {
-        data = jl_gc_managed_malloc(tot);
-        // Allocate the Array **after** allocating the data
-        // to make sure the array is still young
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-    }
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-
-    if (zeroinit)
-        memset(data, 0, tot);
-    a->data = data;
-    if (JL_ARRAY_IMPL_NUL && elsz == 1)
-        ((char*)data)[tot - 1] = '\0';
-    a->length = nel;
-    a->flags.ndims = ndims;
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = hasptr;
-    a->elsize = elsz;
-    a->flags.isshared = 0;
-    a->flags.isaligned = 1;
-    a->offset = 0;
-    if (ndims == 1) {
-        a->nrows = nel;
-        a->maxsize = nel;
-    }
-    else if (a->flags.ndims != ndims) {
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        for (i = 0; i < ndims; i++)
-            adims[i] = dims[i];
-    }
-
-    return a;
-}
-
-static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
-{
-    jl_value_t *eltype = jl_tparam0(atype);
-    size_t elsz = 0, al = 0;
-    if (!jl_is_kind(jl_typeof(eltype)))
-        jl_type_error_rt("Array", "element type", (jl_value_t*)jl_type_type, eltype);
-    int isunboxed = jl_islayout_inline(eltype, &elsz, &al);
-    int isunion = jl_is_uniontype(eltype);
-    int hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    if (!isunboxed) {
-        elsz = sizeof(void*);
-        al = elsz;
-    }
-    else {
-        elsz = LLT_ALIGN(elsz, al);
-    }
-    int zi = !isunboxed || hasptr || isunion || (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->zeroinit);
-
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, zi, elsz);
-}
-
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz)
-{
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, 0, (size_t)elsz);
-}
-
 #ifndef JL_NDEBUG
 static inline int is_ntuple_long(jl_value_t *v)
 {
     if (!jl_is_tuple(v))
         return 0;
-    jl_value_t *tt = jl_typeof(v);
+    jl_value_t *tt = (jl_value_t*)jl_typetagof(v);
     size_t i, nfields = jl_nparams(tt);
     for (i = 0; i < nfields; i++) {
         if (jl_tparam(tt, i) != (jl_value_t*)jl_long_type) {
@@ -220,313 +48,130 @@ static inline int is_ntuple_long(jl_value_t *v)
 }
 #endif
 
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *_dims)
+#define jl_array_elsize(a) (((jl_datatype_t*)jl_typetagof((a)->ref.mem))->layout->size)
+
+static char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
+    assert(jl_genericmemory_isbitsunion(a->ref.mem));
+    return jl_genericmemory_typetagdata(a->ref.mem) + (uintptr_t)a->ref.ptr_or_offset;
+}
 
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    size_t *dims = (size_t*)_dims;
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
+STATIC_INLINE jl_array_t *_new_array(jl_value_t *atype, jl_genericmemory_t *mem, const jl_datatype_layout_t *layout, uint32_t ndims, size_t *dims)
+{
+    jl_task_t *ct = jl_current_task;
+    size_t i;
+    int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t);
     jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    // copy data (except dims) from the old object
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->data = NULL;
-    a->flags.isaligned = data->flags.isaligned;
-    a->elsize = data->elsize;
-    a->flags.ptrarray = data->flags.ptrarray;
-    a->flags.hasptr = data->flags.hasptr;
-
-    // if data is itself a shared wrapper,
-    // owner should point back to the original array
-    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
-    jl_array_data_owner(a) = (jl_value_t*)owner;
-
-    a->flags.how = 3;
-    a->data = data->data;
-    a->flags.isshared = 1;
-    data->flags.isshared = 1;
-
-    if (ndims == 1) {
-        size_t l = dims[0];
-        a->length = l;
-        a->nrows = l;
-        a->maxsize = l;
-    }
-    else if (a->flags.ndims != ndims) {
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        size_t l = 1;
-        wideint_t prod;
-        for (size_t i = 0; i < ndims; i++) {
-            adims[i] = dims[i];
-            prod = (wideint_t)l * (wideint_t)adims[i];
-            if (prod > (wideint_t) MAXINTVAL)
-                jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-            l = prod;
-        }
-        a->length = l;
-    }
-
+    a->ref.mem = mem;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = 0;
+    else
+        a->ref.ptr_or_offset = mem->ptr;
+    for (i = 0; i < ndims; i++)
+        a->dimsize[i] = dims[i];
     return a;
 }
 
-JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
+STATIC_INLINE jl_array_t *new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = 1;
-    a->offset = 0;
-    a->data = jl_string_data(str);
-    a->flags.isaligned = 0;
-    a->elsize = 1;
-    a->flags.ptrarray = 0;
-    a->flags.hasptr = 0;
-    jl_array_data_owner(a) = str;
-    a->flags.how = 3;
-    a->flags.isshared = 1;
-    size_t l = jl_string_len(str);
-    a->length = l;
-    a->nrows = a->maxsize = l;
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims))
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions: too large for system address width");
+    if (*(size_t*)jl_tparam1(atype) != ndims)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    // extra byte for all julia allocated byte vectors
+    jl_genericmemory_t *mem = jl_alloc_genericmemory(mtype, nel);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
-// own_buffer != 0 iff GC should call free() on this pointer eventually
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz);
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str);
+
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = 1;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;  // TODO: allow passing memalign'd buffers
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    a->nrows = nel;
-    a->maxsize = nel;
-    a->offset = 0;
+    if (*(size_t*)jl_tparam1(atype) != 1)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, 1, &nel);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    size_t nel = 1;
-    jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
-    wideint_t prod;
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
-    for (size_t i = 0; i < ndims; i++) {
-        prod = (wideint_t)nel * (wideint_t)dims[i];
-        if (prod > (wideint_t) MAXINTVAL)
-            jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-        nel = prod;
-    }
-    if (__unlikely(ndims == 1))
-        return jl_ptr_to_array_1d(atype, data, nel, own_buffer);
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    assert(ndims != 1); // handled above
-    if (a->flags.ndims != ndims)
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims))
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions: too large for system address width");
+    if (*(size_t*)jl_tparam1(atype) != ndims)
         jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    memcpy(&a->nrows, dims, ndims * sizeof(size_t));
-    return a;
-}
-
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *_dims)
-{
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    return _new_array(atype, ndims, (size_t*)_dims);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
-{
-    return _new_array(atype, 1, &nr);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc)
-{
-    size_t d[2] = {nr, nc};
-    return _new_array(atype, 2, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z)
-{
-    size_t d[3] = {nr, nc, z};
-    return _new_array(atype, 3, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
-{
-    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
-    memcpy(a->data, str, len);
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
 {
-    size_t len = jl_array_len(a);
+    size_t len = jl_array_nrows(a); // only for Vector
     if (len == 0) {
         // this may seem like purely an optimization (which it also is), but it
         // also ensures that calling `String(a)` doesn't corrupt a previous
         // string also created the same way, where `a = StringVector(_)`.
         return jl_an_empty_string;
     }
-    if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
-        (jl_array_ndims(a) != 1 ||
-         ((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
-        jl_value_t *o = jl_array_data_owner(a);
-        if (jl_is_string(o)) {
-            a->flags.isshared = 1;
-            *(size_t*)o = len;
-            a->nrows = 0;
-            a->length = 0;
-            a->maxsize = 0;
-            return o;
-        }
-    }
-    a->nrows = 0;
-    a->length = 0;
-    a->maxsize = 0;
-    return jl_pchar_to_string((const char*)jl_array_data(a), len);
+    jl_value_t *str;
+    if (a->ref.ptr_or_offset == a->ref.mem->ptr)
+        str = jl_genericmemory_to_string(a->ref.mem, len);
+    else
+        str = jl_pchar_to_string(jl_array_data(a, char), len);
+    a->ref.mem = (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance;
+    a->ref.ptr_or_offset = a->ref.mem->ptr;
+    a->dimsize[0] = 0;
+    return str;
 }
 
-JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
 {
-    if (len == 0)
-        return jl_an_empty_string;
-    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
-    if (sz < len) // overflow
-        jl_throw(jl_memory_exception);
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *s;
-    jl_ptls_t ptls = ct->ptls;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass_align8(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        s = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typetagof(s, jl_string_tag, 0);
-    maybe_record_alloc_to_profile(s, len, jl_string_type);
-    *(size_t*)s = len;
-    jl_string_data(s)[len] = 0;
-    return s;
+    return new_array(atype, 1, &nr);
 }
 
-JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc)
 {
-    jl_value_t *s = jl_alloc_string(len);
-    if (len > 0)
-        memcpy(jl_string_data(s), str, len);
-    return s;
+    size_t dims[2] = {nr, nc};
+    return new_array(atype, 2, &dims[0]);
 }
 
-JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z)
 {
-    return jl_pchar_to_string(str, strlen(str));
+    size_t dims[3] = {nr, nc, z};
+    return new_array(atype, 3, &dims[0]);
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims)
+{
+    return new_array(atype, ndims, dims);
+}
+
+JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
+    assert(jl_array_data(a, char));
+    memcpy(jl_array_data(a, char), str, len);
+    return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n)
@@ -543,714 +188,70 @@ JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim)
     return ret;
 }
 
-// array primitives -----------------------------------------------------------
-
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
-{
-    assert(i < jl_array_len(a));
-    assert(a->flags.ptrarray);
-    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)a->data) + i);
-    if (elt == NULL)
-        jl_throw(jl_undefref_exception);
-    return elt;
-}
-
-
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray)
-        return jl_ptrarrayref(a, i);
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = (jl_value_t*)jl_tparam0(jl_typeof(a));
-    if (jl_is_uniontype(eltype)) {
-        // isbits union selector bytes are always stored directly after the last array element
-        uint8_t sel = jl_array_typetagdata(a)[i];
-        eltype = jl_nth_union_component(eltype, sel);
-        if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
-            return ((jl_datatype_t*)eltype)->instance;
-    }
-    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
-    if (__unlikely(r == NULL))
-        jl_throw(jl_undefref_exception);
-    return r;
-}
-
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray) {
-        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)jl_array_data(a)) + i) != NULL;
-    }
-    else if (a->flags.hasptr) {
-         jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-         assert(eltype->layout->first_ptr >= 0);
-         jl_value_t **elem = (jl_value_t**)((char*)a->data + i * a->elsize);
-         return elem[eltype->layout->first_ptr] != NULL;
-    }
-    return 1;
-}
-
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i)
-{
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = jl_tparam0(jl_typeof(a));
-    if (eltype != (jl_value_t*)jl_any_type) {
-        JL_GC_PUSH1(&rhs);
-        if (!jl_isa(rhs, eltype))
-            jl_type_error("arrayset", eltype, rhs);
-        JL_GC_POP();
-    }
-    if (!a->flags.ptrarray) {
-        int hasptr;
-        if (jl_is_uniontype(eltype)) {
-            uint8_t *psel = &((uint8_t*)jl_array_typetagdata(a))[i];
-            unsigned nth = 0;
-            if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
-                assert(0 && "invalid arrayset to isbits union");
-            *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
-                return;
-            hasptr = 0;
-        }
-        else {
-            hasptr = a->flags.hasptr;
-        }
-        arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
-    }
-    else {
-        jl_atomic_store_release(((_Atomic(jl_value_t*)*)a->data) + i, rhs);
-        jl_gc_wb(jl_array_owner(a), rhs);
-    }
-}
-
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
-{
-    if (i >= jl_array_len(a))
-        jl_bounds_error_int((jl_value_t*)a, i + 1);
-    if (a->flags.ptrarray)
-        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
-    else if (a->flags.hasptr) {
-        size_t elsize = a->elsize;
-        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
-        memset((char*)a->data + elsize * i, 0, elsize);
-    }
-}
-
-// at this size and bigger, allocate resized array data with malloc directly
-// instead of managing them separately as gc objects
-#define MALLOC_THRESH 1048576
-
-// Resize the buffer to a max size of `newlen`
-// The buffer can either be newly allocated or realloc'd, the return
-// value is 1 if a new buffer is allocated and 0 if it is realloc'd.
-// the caller needs to take care of moving the data from the old buffer
-// to the new one if necessary.
-// When this function returns, the `->data` pointer always points to
-// the **beginning** of the new buffer.
-static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
-{
-    jl_task_t *ct = jl_current_task;
-    assert(!a->flags.isshared || a->flags.how == 3);
-    size_t elsz = a->elsize;
-    size_t nbytes = newlen * elsz;
-    size_t oldnbytes = a->maxsize * elsz;
-    size_t oldoffsnb = a->offset * elsz;
-    size_t oldlen = a->nrows;
-    int isbitsunion = jl_array_isbitsunion(a);
-    assert(nbytes >= oldnbytes);
-    if (elsz == 1 && !isbitsunion) {
-        nbytes++;
-        oldnbytes++;
-    }
-    if (isbitsunion) {
-        nbytes += newlen;
-        oldnbytes += a->maxsize;
-    }
-    int newbuf = 0;
-    if (a->flags.how == 2) {
-        // already malloc'd - use realloc
-        char *olddata = (char*)a->data - oldoffsnb;
-        a->data = jl_gc_managed_realloc(olddata, nbytes, oldnbytes,
-                                        a->flags.isaligned, (jl_value_t*)a);
-    }
-    else if (a->flags.how == 3 && jl_is_string(jl_array_data_owner(a)) && !isbitsunion) {
-        // if data is in a String, keep it that way
-        jl_value_t *s;
-        if (a->flags.isshared) {
-            s = jl_alloc_string(nbytes - (elsz == 1));
-            newbuf = 1;
-        }
-        else {
-            s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1));
-        }
-        jl_array_data_owner(a) = s;
-        jl_gc_wb(a, s);
-        a->data = jl_string_data(s);
-    }
-    else {
-        newbuf = 1;
-        if (nbytes >= MALLOC_THRESH) {
-            a->data = jl_gc_managed_malloc(nbytes);
-            jl_gc_track_malloced_array(ct->ptls, a);
-            a->flags.how = 2;
-            a->flags.isaligned = 1;
-        }
-        else {
-            a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
-            a->flags.how = 1;
-            jl_gc_wb_buf(a, a->data, nbytes);
-        }
-    }
-    if (JL_ARRAY_IMPL_NUL && elsz == 1 && !isbitsunion)
-        memset((char*)a->data + oldnbytes - 1, 0, nbytes - oldnbytes + 1);
-    (void)oldlen;
-    assert(oldlen == a->nrows &&
-           "Race condition detected: recursive resizing on the same array.");
-    a->flags.isshared = 0;
-    a->maxsize = newlen;
-    return newbuf;
-}
-
-static void NOINLINE array_try_unshare(jl_array_t *a)
-{
-    if (a->flags.isshared) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        // allow resizing when data is shared with a String
-        if (jl_is_string(jl_array_data_owner(a)))
-            return;
-        assert(a->offset == 0);
-        size_t len = a->maxsize;
-        size_t nbytes = len * a->elsize;
-        if (jl_array_isbitsunion(a)) {
-            nbytes += len;
-        }
-        char *olddata = (char*)a->data;
-        int newbuf = array_resize_buffer(a, len);
-        assert(newbuf);
-        (void)newbuf;
-        memcpy(a->data, olddata, nbytes);
-    }
-}
-
-size_t overallocation(size_t maxsize)
-{
-    if (maxsize < 8)
-        return 8;
-    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
-    // for small n, we grow faster than O(n)
-    // for large n, we grow at O(n/8)
-    // and as we reach O(memory) for memory>>1MB,
-    // this means we end by adding about 10% of memory each time
-    int exp2 = sizeof(maxsize) * 8 -
-#ifdef _P64
-        __builtin_clzll(maxsize);
-#else
-        __builtin_clz(maxsize);
-#endif
-    maxsize += ((size_t)1 << (exp2 * 7 / 8)) * 4 + maxsize / 8;
-    return maxsize;
-}
-
-STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
-                                        size_t n)
+JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
 {
-    // designed to handle the case of growing and shrinking at both ends
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
-    }
+    size_t n = jl_array_nrows(a);
+    size_t elsz = jl_array_elsize(a);
+    char *data = jl_array_data(a,char);
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(a->ref.mem);
+    int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
     size_t newnrows = n + inc;
-    size_t elsz = a->elsize;
-    size_t nbinc = inc * elsz;
-    char *data = (char*)a->data;
-    char *newdata;
-    char *typetagdata;
-    char *newtypetagdata = NULL;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    if (a->offset >= inc) {
-        // already have enough space in a->offset
-        newdata = data - nbinc;
-        a->offset -= inc;
-        if (isbitsunion) newtypetagdata = typetagdata - inc;
-        if (idx > 0) {
-            // inserting new elements after 1st element
-            memmove_safe(a->flags.hasptr, newdata, data, idx * elsz);
-            if (isbitsunion) {
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-        }
-    }
-    else {
-        // not enough room for requested growth from existing a->offset
-        size_t oldoffset = a->offset;
-        size_t oldoffsnb = oldoffset * elsz;
-        size_t oldmaxsize = a->maxsize;
-        size_t nb1 = idx * elsz;
-        if (inc > (a->maxsize - n) / 2 - (a->maxsize - n) / 20) {
-            // not enough room for requested growth from end of array
-            size_t newlen = inc * 2;
-            while (n + 2 * inc > newlen - a->offset)
-                newlen *= 2;
-            size_t newmaxsize = overallocation(a->maxsize);
-            if (newlen < newmaxsize)
-                newlen = newmaxsize;
-            size_t newoffset = (newlen - newnrows) / 2;
-            if (!array_resize_buffer(a, newlen)) {
-                data = (char*)a->data + oldoffsnb;
-            }
-            newdata = (char*)a->data + newoffset * elsz;
-            if (isbitsunion) {
-                typetagdata = data + (oldmaxsize - oldoffset) * elsz + oldoffset;
-                newtypetagdata = newdata + (a->maxsize - newoffset) * elsz + newoffset;
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-                memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            }
-            // We could use memcpy if resizing allocates a new buffer,
-            // hopefully it's not a particularly important optimization.
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            a->offset = newoffset;
-        }
-        else {
-            // use extra space between a->nrows & a->maxsize
-            a->offset = (a->maxsize - newnrows) / 2;
-            newdata = data - oldoffsnb + a->offset * elsz;
-            if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-        }
-    }
-    a->length = newnrows;
-    a->nrows = newnrows;
-    a->data = newdata;
-    if (jl_is_array_zeroinit(a)) {
-        memset(newdata + idx * elsz, 0, nbinc);
-    }
-    if (newtypetagdata) {
-        memset(newtypetagdata + idx, 0, inc);
-    }
-}
-
-STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
-                                        size_t inc, size_t n)
-{
-    // optimized for the case of only growing and shrinking at the end
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
+    if (!isbitsunion && elsz == 0) {
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2);
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        a->dimsize[0] = newnrows;
+        return;
     }
-    size_t elsz = a->elsize;
-    char *data = (char*)a->data;
-    char *typetagdata;
-    char *newtypetagdata;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    int has_gap = n > idx;
-    size_t reqmaxsize = a->offset + n + inc;
-    if (__unlikely(reqmaxsize > a->maxsize)) {
-        size_t nb1 = idx * elsz;
-        size_t nbinc = inc * elsz;
-        // grow either by our computed overallocation factor or exactly the requested size,
-        // whichever is larger
-        size_t newmaxsize = overallocation(a->maxsize);
+    size_t oldoffset = isbitsunion ? (size_t)data : (data - (char*)a->ref.mem->ptr) / elsz;
+    if (isbitsunion)
+        data = (char*)a->ref.mem->ptr + oldoffset * elsz;
+    size_t oldmaxsize = a->ref.mem->length;
+    size_t reqmaxsize = oldoffset + newnrows;
+    if (__unlikely(reqmaxsize > oldmaxsize)) {
+        size_t newmaxsize;
+        if (oldmaxsize < 4) // typical sequence: 0, // 4, // 6, 9, 13, 19, 28, 42, // 50, 60, 72, ...
+            newmaxsize = 4;
+        else if (oldmaxsize < 48)
+            newmaxsize = oldmaxsize*3/2; // grow by 50%
+        else
+            newmaxsize = oldmaxsize*6/5; // grow by 20%
         if (newmaxsize < reqmaxsize)
             newmaxsize = reqmaxsize;
-        size_t oldmaxsize = a->maxsize;
-        int newbuf = array_resize_buffer(a, newmaxsize);
-        char *newdata = (char*)a->data + a->offset * elsz;
-        if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-        if (newbuf) {
-            memcpy(newdata, data, nb1);
-            if (isbitsunion) {
-                memcpy(newtypetagdata, typetagdata, idx);
-                if (has_gap) memcpy(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memcpy(newdata + nb1 + nbinc, data + nb1, n * elsz - nb1);
-        }
-        else {
-            if (isbitsunion) {
-                typetagdata = newdata + (oldmaxsize - a->offset) * elsz + a->offset;
-                if (has_gap) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
-        }
-        a->data = data = newdata;
-    }
-    else if (has_gap) {
-        if (isbitsunion) {
-            memmove(typetagdata + idx + inc, typetagdata + idx, n - idx);
-            memset(typetagdata + idx, 0, inc);
-        }
-        size_t nb1 = idx * elsz;
-        memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
-    }
-    else {
-        // there was enough room for requested growth already in a->maxsize
-        if (isbitsunion)
-            memset(typetagdata + idx, 0, inc);
-    }
-    size_t newnrows = n + inc;
-    a->length = newnrows;
-    a->nrows = newnrows;
-    if (jl_is_array_zeroinit(a)) {
-        memset(data + idx * elsz, 0, inc * elsz);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_at(jl_array_t *a, ssize_t idx, size_t inc)
-{
-    // No need to explicitly unshare.
-    // Shared arrays are guaranteed to trigger the slow path for growing.
-    size_t n = jl_array_nrows(a);
-    if (idx < 0 || idx > n)
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (idx + 1 < n / 2) {
-        jl_array_grow_at_beg(a, idx, inc, n);
-    }
-    else {
-        jl_array_grow_at_end(a, idx, inc, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_end(a, n, inc, n);
-}
-
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_beg(a, 0, inc, n);
-}
-
-STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
-{
-    //if we don't manage this array return
-    if (a->flags.how == 0) return;
-
-    size_t elsz = a->elsize;
-    size_t newbytes = (a->maxsize - dec) * a->elsize;
-    size_t oldnbytes = (a->maxsize) * a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) {
-        newbytes += a->maxsize - dec;
-        oldnbytes += a->maxsize;
-    }
-
-    if (elsz == 1 && !isbitsunion) {
-        newbytes++;
-        oldnbytes++;
-    }
-    char *originalptr = ((char*) a->data) - a->offset * a->elsize;
-    if (a->flags.how == 1) {
-        //this is a julia-allocated buffer that needs to be marked
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        jl_task_t *ct = jl_current_task;
-        char *originaldata = (char*) a->data - a->offset * a->elsize;
-        char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes);
-        jl_gc_wb_buf(a, newdata, newbytes);
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-        memcpy(newdata, originaldata, newbytes);
-        a->data = newdata + a->offset * elsz;
-    }
-    else if (a->flags.how == 2) {
-        //malloc-allocated pointer this array object manages
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        size_t oldoffsnb = a->offset * elsz;
-        a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
-                a->flags.isaligned, (jl_value_t*) a)) + oldoffsnb;
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-    }
-    else if (a->flags.how == 3) {
-        //this has has a pointer to the object that owns the data
-    }
-}
-
-static size_t jl_array_limit_offset(jl_array_t *a, size_t offset)
-{
-    // make sure offset doesn't grow forever due to deleting at beginning
-    // and growing at end
-    if (offset >= 13 * a->maxsize / 20)
-        offset = 17 * (a->maxsize - a->nrows) / 100;
-#ifdef _P64
-    while (offset > (size_t)UINT32_MAX) {
-        offset /= 2;
-    }
-#endif
-    return offset;
-}
-
-STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    size_t elsz = a->elsize;
-    size_t offset = a->offset;
-    int isbitsunion = jl_array_isbitsunion(a);
-    offset += dec;
-    a->length = n - dec;
-    a->nrows = n - dec;
-    size_t newoffs = jl_array_limit_offset(a, offset);
-    assert(newoffs <= offset);
-    size_t nbdec = dec * elsz;
-    if (__unlikely(newoffs != offset) || idx > 0) {
-        char *olddata = (char*)a->data;
-        char *newdata = olddata - (a->offset - newoffs) * elsz;
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = jl_array_typetagdata(a);
-            newtypetagdata = typetagdata - (a->offset - newoffs);
-        }
-
-        size_t nb1 = idx * elsz; // size in bytes of the first block
-        size_t nbtotal = a->nrows * elsz; // size in bytes of the new array
-        // Implicit '\0' for byte arrays
-        if (elsz == 1 && !isbitsunion)
-            nbtotal++;
-        if (idx > 0) {
-            memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
-            if (isbitsunion) memmove(newtypetagdata, typetagdata, idx);
-        }
-        // Move the rest of the data if the offset changed
-        if (newoffs != offset) {
-            memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
-        }
-        a->data = newdata;
-    }
-    else {
-        char *data = (char*)a->data;
-        a->data = data + nbdec;
-    }
-    a->offset = newoffs;
-}
-
-STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    char *data = (char*)a->data;
-    size_t elsz = a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    size_t last = idx + dec;
-    if (n > last) {
-        memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz);
+        // TODO: round this up to newmaxsize < GC_MAX_SZCLASS ? jl_gc_sizeclasses[jl_gc_szclass(newmaxsize)] : LLT_ALIGN(newmaxsize, 4096), after accounting for the object header (24 bytes)
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, newmaxsize);
+        char *newdata = (char*)newmem->ptr + oldoffset * elsz;
+        memcpy(newdata, data, n * elsz);
         if (isbitsunion) {
             char *typetagdata = jl_array_typetagdata(a);
-            memmove(typetagdata + idx, typetagdata + last, n - last);
+            char *newtypetagdata = (char*)newmem->ptr + newmaxsize * elsz + oldoffset;
+            memcpy(newtypetagdata, typetagdata, n);
         }
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        if (isbitsunion)
+            a->ref.ptr_or_offset = (void*)oldoffset;
+        else
+            a->ref.ptr_or_offset = newdata;
     }
-    n -= dec;
-    if (elsz == 1 && !isbitsunion)
-        data[n] = 0;
-    a->nrows = n;
-    a->length = n;
-}
-
-JL_DLLEXPORT void jl_array_del_at(jl_array_t *a, ssize_t idx, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    size_t last = idx + dec;
-    if (__unlikely(idx < 0))
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (__unlikely(last > n))
-        jl_bounds_error_int((jl_value_t*)a, last);
-    // The unsharing needs to happen before we modify the buffer
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (idx < n - last) {
-        jl_array_del_at_beg(a, idx, dec, n);
-    }
-    else {
-        jl_array_del_at_end(a, idx, dec, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    if (__unlikely(dec > n))
-        jl_bounds_error_int((jl_value_t*)a, dec);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
-        return;
-    jl_array_del_at_beg(a, 0, dec, n);
+    a->dimsize[0] = newnrows;
 }
 
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec)
 {
+    // assume inbounds, assume unshared
     size_t n = jl_array_nrows(a);
     if (__unlikely(n < dec))
         jl_bounds_error_int((jl_value_t*)a, 0);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
+    if (__unlikely(dec == 0))
         return;
-    jl_array_del_at_end(a, n - dec, dec, n);
-}
-
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
-{
-    size_t n = jl_array_nrows(a);
-
-    size_t min = a->offset + a->length;
-    sz = (sz < min) ? min : sz;
-
-    if (sz <= a->maxsize) {
-        size_t dec = a->maxsize - sz;
-        //if we don't save at least an eighth of maxsize then its not worth it to shrink
-        if (dec <= a->maxsize / 8) return;
-        jl_array_shrink(a, dec);
-    }
-    else {
-        size_t inc = sz - n;
-        jl_array_grow_end(a, inc);
-
-        a->nrows = n;
-        a->length = n;
-    }
-}
-
-JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
-{
-    size_t elsz = ary->elsize;
-    size_t len = jl_array_len(ary);
-    int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary)));
-    jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary),
-                                      &ary->nrows, !ary->flags.ptrarray,
-                                      ary->flags.hasptr, isunion, 0, elsz);
-    memcpy(new_ary->data, ary->data, len * elsz);
-    // ensure isbits union arrays copy their selector bytes correctly
-    if (jl_array_isbitsunion(ary))
-        memcpy(jl_array_typetagdata(new_ary), jl_array_typetagdata(ary), len);
-    return new_ary;
-}
-
-// Copy element by element until we hit a young object, at which point
-// we can finish by using `memmove`.
-static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
-                                                  void **src_p, void **dest_p,
-                                                  ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + i);
-        jl_atomic_store_release(dest_pa + i, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
-                                                   void **src_p, void **dest_p,
-                                                   ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + n - i - 1);
-        jl_atomic_store_release(dest_pa + n - i - 1, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-// Unsafe, assume inbounds and that dest and src have the same eltype
-JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
-                                    jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT
-{
-    assert(dest->flags.ptrarray && src->flags.ptrarray);
-    jl_value_t *owner = jl_array_owner(dest);
-    // Destination is old and doesn't refer to any young object
-    if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
-        jl_value_t *src_owner = jl_array_owner(src);
-        // Source is young or being promoted or might refer to young objects
-        // (i.e. source is not an old object that doesn't have wb triggered)
-        if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
-            ssize_t done;
-            if (dest_p < src_p || dest_p > src_p + n) {
-                done = jl_array_ptr_copy_forward(owner, src_p, dest_p, n);
-                dest_p += done;
-                src_p += done;
-            }
-            else {
-                done = jl_array_ptr_copy_backward(owner, src_p, dest_p, n);
-            }
-            n -= done;
-        }
+    n -= dec;
+    a->dimsize[0] = n;
+    // don't leave behind deleted data
+    if (jl_is_genericmemory_zeroinit(a->ref.mem) && !jl_genericmemory_isbitsunion(a->ref.mem)) {
+        size_t elsz = jl_array_elsize(a);
+        memset(jl_array_data(a,char) + n * elsz, 0, elsz * dec);
     }
-    memmove_refs(dest_p, src_p, n);
 }
 
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
@@ -1274,50 +275,63 @@ JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2)
     }
 }
 
-JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len);
+
+JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
 {
-    return jl_array_data_owner(a);
+    size_t len = jl_array_len(ary);
+    jl_genericmemory_t *mem = jl_genericmemory_copy_slice(ary->ref.mem, ary->ref.ptr_or_offset, len);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *new_ary = _new_array((jl_value_t*)jl_typetagof(ary), mem, ((jl_datatype_t*)jl_typetagof(ary->ref.mem))->layout, jl_array_ndims(ary), &ary->dimsize[0]);
+    JL_GC_POP();
+    return new_ary;
 }
 
-STATIC_INLINE int jl_has_implicit_byte_owned(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
 {
-    assert(a->flags.how != 3);
-    if (!a->flags.isshared)
-        return 1;
-    return a->flags.how == 1;
+    if (len == 0)
+        return jl_an_empty_string;
+    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
+    if (sz < len) // overflow
+        jl_throw(jl_memory_exception);
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *s;
+    jl_ptls_t ptls = ct->ptls;
+    s = (jl_value_t*)jl_gc_alloc(ptls, sz, jl_string_type);
+    jl_set_typetagof(s, jl_string_tag, 0);
+    *(size_t*)s = len;
+    jl_string_data(s)[len] = 0;
+    return s;
 }
 
-STATIC_INLINE int jl_has_implicit_byte(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
 {
-    // * unshared:
-    //   * how: 0-2
-    //     We own and allocated the data.
-    //     It should have the extra byte.
-    // * shared:
-    //   * how: 0, 2
-    //     The data might come from external source without implicit NUL byte.
-    //     There could be an entra byte for a `reinterpreted` array
-    //     but that should be unlikely for strings.
-    //   * how: 1
-    //     We allocated the data with the extra byte.
-    //   * how: 3
-    //     We should check the owner.
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        if (jl_is_string(a)) return 1;
-        return a->elsize == 1 && jl_has_implicit_byte_owned(a);
-    }
-    return jl_has_implicit_byte_owned(a);
+    jl_value_t *s = jl_alloc_string(len);
+    if (len > 0)
+        memcpy(jl_string_data(s), str, len);
+    return s;
 }
 
-// Create an array with the same content
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
 {
-    assert(jl_typeof(a) == jl_array_uint8_type);
-    if (!jl_has_implicit_byte(a))
-        a = jl_array_copy(a);
-    ((char*)a->data)[a->nrows] = 0;
-    return a;
+    return jl_pchar_to_string(str, strlen(str));
+}
+
+
+// deprecated and unused internally, but some packages (notably OrderedCollections.jl) have not yet started to use the modern Base.unsetindex API
+JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
+{
+    if (i >= jl_array_len(a))
+        jl_bounds_error_int((jl_value_t*)a, i + 1);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_atomic_store_relaxed(jl_array_data(a,_Atomic(jl_value_t*)) + i, NULL);
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t elsize = layout->size;
+        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
+        memset(jl_array_data(a,char) + elsize * i, 0, elsize);
+    }
 }
 
 #ifdef __cplusplus
diff --git a/src/ast.c b/src/ast.c
index 06727b453d6a3..0f24d96393f2f 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
 #endif
@@ -28,6 +29,7 @@ JL_DLLEXPORT jl_sym_t *jl_top_sym;
 JL_DLLEXPORT jl_sym_t *jl_module_sym;
 JL_DLLEXPORT jl_sym_t *jl_slot_sym;
 JL_DLLEXPORT jl_sym_t *jl_export_sym;
+JL_DLLEXPORT jl_sym_t *jl_public_sym;
 JL_DLLEXPORT jl_sym_t *jl_import_sym;
 JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
 JL_DLLEXPORT jl_sym_t *jl_quote_sym;
@@ -59,6 +61,8 @@ JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 JL_DLLEXPORT jl_sym_t *jl_as_sym;
 JL_DLLEXPORT jl_sym_t *jl_global_sym;
+JL_DLLEXPORT jl_sym_t *jl_globaldecl_sym;
+JL_DLLEXPORT jl_sym_t *jl_local_sym;
 JL_DLLEXPORT jl_sym_t *jl_list_sym;
 JL_DLLEXPORT jl_sym_t *jl_dot_sym;
 JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
@@ -97,6 +101,8 @@ JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
 JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
 JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
 JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+JL_DLLEXPORT jl_sym_t *jl_eval_sym;
+JL_DLLEXPORT jl_sym_t *jl_include_sym;
 JL_DLLEXPORT jl_sym_t *jl_atom_sym;
 JL_DLLEXPORT jl_sym_t *jl_statement_sym;
 JL_DLLEXPORT jl_sym_t *jl_all_sym;
@@ -112,7 +118,8 @@ JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
 JL_DLLEXPORT jl_sym_t *jl_release_sym;
 JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
 JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
-
+JL_DLLEXPORT jl_sym_t *jl_uninferred_sym;
+JL_DLLEXPORT jl_sym_t *jl_latestworld_sym;
 
 static const uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
@@ -149,32 +156,71 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
 static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
 
+static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
+{
+    assert(issymbol(s));
+    if (fl_isgensym(fl_ctx, s)) {
+        char gsname[16];
+        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
+                           ((gensym_t*)ptr(s))->id, 10);
+        *(--n) = '#';
+        return jl_symbol(n);
+    }
+    return jl_symbol(symbol_name(fl_ctx, s));
+}
+
 static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     // tells whether a var is defined in and *by* the current module
     argcount(fl_ctx, "defined-julia-global", nargs, 1);
     (void)tosymbol(fl_ctx, args[0], "defined-julia-global");
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    jl_sym_t *var = jl_symbol(symbol_name(fl_ctx, args[0]));
+    jl_sym_t *var = scmsym_to_julia(fl_ctx, args[0]);
     jl_binding_t *b = jl_get_module_binding(ctx->module, var, 0);
-    return (b != NULL && jl_atomic_load_relaxed(&b->owner) == b) ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return (bpart != NULL && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
+// Used to generate a unique suffix for a given symbol (e.g. variable or type name)
+// first argument contains a stack of method definitions seen so far by `closure-convert` in flisp.
+// if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes
+// of the form $top_level_method_name##$counter, where `counter` is the smallest integer
+// such that the resulting name is not already defined in the current module's bindings.
+// If the top of the stack is NIL, we simply return the current module's counter.
+// This ensures that precompile statements are a bit more stable across different versions
+// of a codebase. see #53719
+static value_t fl_module_unique_name(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
+    argcount(fl_ctx, "julia-module-unique-name", nargs, 1);
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    assert(ctx->module);
-    return fixnum(jl_module_next_counter(ctx->module));
-}
-
-static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return symbol(fl_ctx, jl_filename);
-}
-
-static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
-{
-    return fixnum(jl_lineno);
+    jl_module_t *m = ctx->module;
+    assert(m != NULL);
+    // Get the outermost function name from the `parsed_method_stack` top
+    char *funcname = NULL;
+    value_t parsed_method_stack = args[0];
+    if (parsed_method_stack != fl_ctx->NIL) {
+        value_t bottom_stack_symbol = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "last")), parsed_method_stack);
+        funcname = tosymbol(fl_ctx, bottom_stack_symbol, "julia-module-unique-name")->name;
+    }
+    size_t sz = funcname != NULL ? strlen(funcname) + 32 : 32; // 32 is enough for the suffix
+    char *buf = (char*)alloca(sz);
+    if (funcname != NULL && strchr(funcname, '#') == NULL) {
+        for (int i = 0; ; i++) {
+            snprintf(buf, sz, "%s##%d", funcname, i);
+            jl_sym_t *sym = jl_symbol(buf);
+            JL_LOCK(&m->lock);
+            if (jl_get_module_binding(m, sym, 0) == NULL) { // make sure this name is not already taken
+                jl_get_module_binding(m, sym, 1); // create the binding
+                JL_UNLOCK(&m->lock);
+                return symbol(fl_ctx, buf);
+            }
+            JL_UNLOCK(&m->lock);
+        }
+    }
+    else {
+        snprintf(buf, sz, "%d", jl_module_next_counter(m));
+    }
+    return symbol(fl_ctx, buf);
 }
 
 static int jl_is_number(jl_value_t *v)
@@ -206,10 +252,8 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
 
 static const builtinspec_t julia_flisp_ast_ext[] = {
     { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
-    { "current-julia-module-counter", fl_current_module_counter },
+    { "current-julia-module-counter", fl_module_unique_name },
     { "julia-scalar?", fl_julia_scalar },
-    { "julia-current-file", fl_julia_current_file },
-    { "julia-current-line", fl_julia_current_line },
     { NULL, NULL }
 };
 
@@ -304,6 +348,7 @@ void jl_init_common_symbols(void)
     jl_lambda_sym = jl_symbol("lambda");
     jl_module_sym = jl_symbol("module");
     jl_export_sym = jl_symbol("export");
+    jl_public_sym = jl_symbol("public");
     jl_import_sym = jl_symbol("import");
     jl_using_sym = jl_symbol("using");
     jl_assign_sym = jl_symbol("=");
@@ -318,6 +363,8 @@ void jl_init_common_symbols(void)
     jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
     jl_const_sym = jl_symbol("const");
     jl_global_sym = jl_symbol("global");
+    jl_globaldecl_sym = jl_symbol("globaldecl");
+    jl_local_sym = jl_symbol("local");
     jl_thunk_sym = jl_symbol("thunk");
     jl_toplevel_sym = jl_symbol("toplevel");
     jl_dot_sym = jl_symbol(".");
@@ -363,6 +410,8 @@ void jl_init_common_symbols(void)
     jl_aliasscope_sym = jl_symbol("aliasscope");
     jl_popaliasscope_sym = jl_symbol("popaliasscope");
     jl_thismodule_sym = jl_symbol("thismodule");
+    jl_eval_sym = jl_symbol("eval");
+    jl_include_sym = jl_symbol("include");
     jl_block_sym = jl_symbol("block");
     jl_atom_sym = jl_symbol("atom");
     jl_statement_sym = jl_symbol("statement");
@@ -375,6 +424,8 @@ void jl_init_common_symbols(void)
     jl_release_sym = jl_symbol("release");
     jl_acquire_release_sym = jl_symbol("acquire_release");
     jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
+    jl_uninferred_sym = jl_symbol("uninferred");
+    jl_latestworld_sym = jl_symbol("latestworld");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -413,20 +464,6 @@ JL_DLLEXPORT void fl_profile(const char *fname)
     jl_ast_ctx_leave(ctx);
 }
 
-
-static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
-{
-    assert(issymbol(s));
-    if (fl_isgensym(fl_ctx, s)) {
-        char gsname[16];
-        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
-                           ((gensym_t*)ptr(s))->id, 10);
-        *(--n) = '#';
-        return jl_symbol(n);
-    }
-    return jl_symbol(symbol_name(fl_ctx, s));
-}
-
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod)
 {
     jl_value_t *v = NULL;
@@ -436,7 +473,7 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
-        //jl_(jl_current_exception());
+        //jl_(jl_current_exception(jl_current_task));
         //jlbacktrace();
         jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
@@ -537,20 +574,16 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             JL_GC_POP();
             return temp;
         }
-        else if (sym == jl_lineinfo_sym && n == 5) {
-            jl_value_t *modu=NULL, *name=NULL, *file=NULL, *linenum=NULL, *inlinedat=NULL;
-            JL_GC_PUSH5(&modu, &name, &file, &linenum, &inlinedat);
+        else if (sym == jl_lineinfo_sym && n == 3) {
+            jl_value_t *file=NULL, *linenum=NULL, *inlinedat=NULL;
+            JL_GC_PUSH3(&file, &linenum, &inlinedat);
             value_t lst = e;
-            modu = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
-            name = scm_to_julia_(fl_ctx, car_(lst), mod);
-            lst = cdr_(lst);
             file = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             linenum = scm_to_julia_(fl_ctx, car_(lst), mod);
             lst = cdr_(lst);
             inlinedat = scm_to_julia_(fl_ctx, car_(lst), mod);
-            temp = jl_new_struct(jl_lineinfonode_type, modu, name, file, linenum, inlinedat);
+            temp = jl_new_struct(jl_lineinfonode_type, file, linenum, inlinedat);
             JL_GC_POP();
             return temp;
         }
@@ -564,6 +597,15 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             temp = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
             temp = jl_new_struct(jl_gotoifnot_type, ex, temp);
         }
+        else if (sym == jl_enter_sym) {
+            ex = scm_to_julia_(fl_ctx, car_(e), mod);
+            temp = jl_new_struct_uninit(jl_enternode_type);
+            jl_enternode_scope(temp) = NULL;
+            jl_enternode_catch_dest(temp) = jl_unbox_long(ex);
+            if (n == 2) {
+                jl_enternode_scope(temp) = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
+            }
+        }
         else if (sym == jl_newvar_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_newvarnode_type, ex);
@@ -627,6 +669,8 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
     if (iscvalue(e) && cv_class((cvalue_t*)ptr(e)) == jl_ast_ctx(fl_ctx)->jvtype) {
         return *(jl_value_t**)cv_data((cvalue_t*)ptr(e));
     }
+    fl_print(fl_ctx, ios_stderr, e);
+    ios_putc('\n', ios_stderr);
     jl_error("malformed tree");
 }
 
@@ -648,9 +692,9 @@ static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
 static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv, int check_valid)
 {
     value_t temp;
-    for(long i=jl_array_len(a)-1; i >= 0; i--) {
+    for (long i = jl_array_nrows(a) - 1; i >= 0; i--) {
         *pv = fl_cons(fl_ctx, fl_ctx->NIL, *pv);
-        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a,i), check_valid);
+        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a, i), check_valid);
         // note: must be separate statement
         car_(*pv) = temp;
     }
@@ -685,8 +729,20 @@ static int julia_to_scm_noalloc1(fl_context_t *fl_ctx, jl_value_t *v, value_t *r
 
 static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) JL_NOTSAFEPOINT
 {
-    if (jl_is_long(v) && fits_fixnum(jl_unbox_long(v)))
-        return fixnum(jl_unbox_long(v));
+    if (jl_is_long(v)) {
+        if (fits_fixnum(jl_unbox_long(v))) {
+            return fixnum(jl_unbox_long(v));
+        } else {
+#ifdef _P64
+            value_t prim = cprim(fl_ctx, fl_ctx->int64type, sizeof(int64_t));
+            *((int64_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#else
+            value_t prim = cprim(fl_ctx, fl_ctx->int32type, sizeof(int32_t));
+            *((int32_t*)cp_data((cprim_t*)ptr(prim))) = jl_unbox_long(v);
+#endif
+            return prim;
+        }
+    }
     if (check_valid) {
         if (jl_is_ssavalue(v))
             lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
@@ -878,7 +934,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         JL_GC_PUSH2(&new_ci, &new_code);
         new_ci = jl_copy_code_info(new_ci);
         new_code = jl_array_copy(new_ci->code);
-        size_t clen = jl_array_len(new_code);
+        size_t clen = jl_array_nrows(new_code);
         for (int i = 0; i < clen; ++i) {
             jl_array_ptr_set(new_code, i, jl_copy_ast(
                 jl_array_ptr_ref(new_code, i)
@@ -890,18 +946,9 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         jl_gc_wb(new_ci, new_ci->slotnames);
         new_ci->slotflags = jl_array_copy(new_ci->slotflags);
         jl_gc_wb(new_ci, new_ci->slotflags);
-        new_ci->codelocs = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->codelocs);
-        jl_gc_wb(new_ci, new_ci->codelocs);
-        new_ci->linetable = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->linetable);
-        jl_gc_wb(new_ci, new_ci->linetable);
         new_ci->ssaflags = jl_array_copy(new_ci->ssaflags);
         jl_gc_wb(new_ci, new_ci->ssaflags);
 
-        if (new_ci->edges != jl_nothing) {
-            new_ci->edges = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->edges);
-            jl_gc_wb(new_ci, new_ci->edges);
-        }
-
         if (jl_is_array(new_ci->ssavaluetypes)) {
             new_ci->ssavaluetypes = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->ssavaluetypes);
             jl_gc_wb(new_ci, new_ci->ssavaluetypes);
@@ -911,7 +958,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
     }
     if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        size_t i, l = jl_array_len(e->args);
+        size_t i, l = jl_array_nrows(e->args);
         jl_expr_t *ne = jl_exprn(e->head, l);
         JL_GC_PUSH2(&ne, &expr);
         for (i = 0; i < l; i++) {
@@ -942,7 +989,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
     return expr;
 }
 
-JL_DLLEXPORT int jl_is_operator(char *sym)
+JL_DLLEXPORT int jl_is_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -951,7 +998,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_unary_operator(char *sym)
+JL_DLLEXPORT int jl_is_unary_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -960,7 +1007,7 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
+JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -969,7 +1016,7 @@ JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
+JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -978,7 +1025,7 @@ JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
     return res;
 }
 
-JL_DLLEXPORT int jl_operator_precedence(char *sym)
+JL_DLLEXPORT int jl_operator_precedence(const char *sym)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -989,11 +1036,11 @@ JL_DLLEXPORT int jl_operator_precedence(char *sym)
 
 int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
 {
-    size_t i, l = jl_array_len(body);
+    size_t i, l = jl_array_nrows(body);
     for (i = 0; i < l; i++) {
         jl_expr_t *stmt = (jl_expr_t*)jl_array_ptr_ref(body, i);
         if (jl_is_expr((jl_value_t*)stmt) && stmt->head == jl_meta_sym) {
-            size_t i, l = jl_array_len(stmt->args);
+            size_t i, l = jl_array_nrows(stmt->args);
             for (i = 0; i < l; i++)
                 if (jl_array_ptr_ref(stmt->args, i) == (jl_value_t*)sym)
                     return 1;
@@ -1058,7 +1105,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
 {
     jl_task_t *ct = jl_current_task;
     JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION);
-    size_t nargs = jl_array_len(args) + 1;
+    size_t nargs = jl_array_nrows(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
     jl_value_t **margs;
     JL_GC_PUSHARGS(margs, nargs);
@@ -1103,7 +1150,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
                 margs[0] = jl_cstr_to_string("<macrocall>");
             margs[1] = jl_fieldref(lno, 0); // extract and allocate line number
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, margs[0], margs[1],
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
     ct->world_age = last_age;
@@ -1119,7 +1166,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     jl_expr_t *e = (jl_expr_t*)expr;
     if (e->head == jl_inert_sym ||
         e->head == jl_module_sym ||
-        //e->head == jl_toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
+        e->head == jl_toplevel_sym ||
         e->head == jl_meta_sym) {
         return expr;
     }
@@ -1197,7 +1244,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     }
 
     size_t i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_array_ptr_ref(e->args, i);
         jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world, throw_load_error);
         if (a != a2)
diff --git a/src/ast.scm b/src/ast.scm
index 87db8449b3992..5cc97014e373e 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -114,7 +114,7 @@
                    (deparse-prefix-call (cadr e) (cddr e) #\( #\)))))
            (($ &)          (if (and (pair? (cadr e))
                                     (not (memq (caadr e)
-                                               '(outerref null true false tuple $ vect braces))))
+                                               '(null true false tuple $ vect braces))))
                                (string (car e) "(" (deparse (cadr e)) ")")
                                (string (car e) (deparse (cadr e)))))
            ((|::|)         (if (length= e 2)
@@ -249,12 +249,11 @@
            ;; misc syntax forms
            ((import using)
             (string (car e) " " (string.join (map deparse-import-path (cdr e)) ", ")))
-           ((global local export) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
+           ((global local export public) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
            ((const)        (string "const " (deparse (cadr e))))
            ((top)          (deparse (cadr e)))
            ((core)         (string "Core." (deparse (cadr e))))
            ((globalref)    (string (deparse (cadr e)) "." (deparse-colon-dot (caddr e))))
-           ((outerref)     (string (deparse (cadr e))))
            ((ssavalue)     (string "SSAValue(" (cadr e) ")"))
            ((line)         (if (length= e 2)
                                (string "# line " (cadr e))
@@ -298,7 +297,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
+  (memq (car e) '(quote top core globalref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
@@ -393,9 +392,6 @@
 (define (globalref? e)
   (and (pair? e) (eq? (car e) 'globalref)))
 
-(define (outerref? e)
-  (and (pair? e) (eq? (car e) 'outerref)))
-
 (define (nothing? e)
   (and (pair? e) (eq? (car e) 'null)))
 
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 64e3fbd1af366..77463ae4884cb 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -21,46 +21,60 @@ extern "C" {
     JL_DLLEXPORT extern jl_fptr_args_t jl_f_##name##_addr
 #endif
 
-DECLARE_BUILTIN(applicable);
 DECLARE_BUILTIN(_apply_iterate);
 DECLARE_BUILTIN(_apply_pure);
-DECLARE_BUILTIN(apply_type);
-DECLARE_BUILTIN(arrayref);
-DECLARE_BUILTIN(arrayset);
-DECLARE_BUILTIN(arraysize);
 DECLARE_BUILTIN(_call_in_world);
 DECLARE_BUILTIN(_call_in_world_total);
 DECLARE_BUILTIN(_call_latest);
-DECLARE_BUILTIN(replacefield);
-DECLARE_BUILTIN(const_arrayref);
+DECLARE_BUILTIN(_compute_sparams);
 DECLARE_BUILTIN(_expr);
+DECLARE_BUILTIN(_svec_ref);
+DECLARE_BUILTIN(_typebody);
+DECLARE_BUILTIN(_typevar);
+DECLARE_BUILTIN(applicable);
+DECLARE_BUILTIN(apply_type);
+DECLARE_BUILTIN(compilerbarrier);
+DECLARE_BUILTIN(current_scope);
+DECLARE_BUILTIN(donotdelete);
 DECLARE_BUILTIN(fieldtype);
+DECLARE_BUILTIN(finalizer);
 DECLARE_BUILTIN(getfield);
+DECLARE_BUILTIN(getglobal);
 DECLARE_BUILTIN(ifelse);
 DECLARE_BUILTIN(invoke);
 DECLARE_BUILTIN(is);
 DECLARE_BUILTIN(isa);
 DECLARE_BUILTIN(isdefined);
+DECLARE_BUILTIN(isdefinedglobal);
 DECLARE_BUILTIN(issubtype);
+DECLARE_BUILTIN(memorynew);
+DECLARE_BUILTIN(memoryref);
+DECLARE_BUILTIN(memoryref_isassigned);
+DECLARE_BUILTIN(memoryrefget);
+DECLARE_BUILTIN(memoryrefmodify);
+DECLARE_BUILTIN(memoryrefoffset);
+DECLARE_BUILTIN(memoryrefreplace);
+DECLARE_BUILTIN(memoryrefset);
+DECLARE_BUILTIN(memoryrefsetonce);
+DECLARE_BUILTIN(memoryrefswap);
 DECLARE_BUILTIN(modifyfield);
+DECLARE_BUILTIN(modifyglobal);
 DECLARE_BUILTIN(nfields);
+DECLARE_BUILTIN(replacefield);
+DECLARE_BUILTIN(replaceglobal);
 DECLARE_BUILTIN(setfield);
+DECLARE_BUILTIN(setfieldonce);
+DECLARE_BUILTIN(setglobal);
+DECLARE_BUILTIN(setglobalonce);
 DECLARE_BUILTIN(sizeof);
 DECLARE_BUILTIN(svec);
 DECLARE_BUILTIN(swapfield);
+DECLARE_BUILTIN(swapglobal);
 DECLARE_BUILTIN(throw);
+DECLARE_BUILTIN(throw_methoderror);
 DECLARE_BUILTIN(tuple);
 DECLARE_BUILTIN(typeassert);
-DECLARE_BUILTIN(_typebody);
 DECLARE_BUILTIN(typeof);
-DECLARE_BUILTIN(_typevar);
-DECLARE_BUILTIN(donotdelete);
-DECLARE_BUILTIN(compilerbarrier);
-DECLARE_BUILTIN(getglobal);
-DECLARE_BUILTIN(setglobal);
-DECLARE_BUILTIN(finalizer);
-DECLARE_BUILTIN(_compute_sparams);
-DECLARE_BUILTIN(_svec_ref);
 
 JL_CALLABLE(jl_f__structtype);
 JL_CALLABLE(jl_f__abstracttype);
@@ -68,7 +82,6 @@ JL_CALLABLE(jl_f__primitivetype);
 JL_CALLABLE(jl_f__setsuper);
 JL_CALLABLE(jl_f__equiv_typedef);
 JL_CALLABLE(jl_f_get_binding_type);
-JL_CALLABLE(jl_f_set_binding_type);
 JL_CALLABLE(jl_f__compute_sparams);
 JL_CALLABLE(jl_f__svec_ref);
 #ifdef __cplusplus
diff --git a/src/builtins.c b/src/builtins.c
index b664b8d73710f..90d8a0d453e20 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -115,7 +115,7 @@ static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_
                     continue; // skip this field (it is #undef)
                 }
             }
-            if (!ft->layout->haspadding) {
+            if (!ft->layout->flags.haspadding && ft->layout->flags.isbitsegal) {
                 if (!bits_equal(ao, bo, ft->layout->size))
                     return 0;
             }
@@ -222,7 +222,7 @@ JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const j
 JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
     if (dtag < jl_max_tags << 4) {
-        switch ((enum jlsmall_typeof_tags)(dtag >> 4)) {
+        switch ((enum jl_small_typeof_tags)(dtag >> 4)) {
         case jl_int8_tag:
         case jl_uint8_tag:
             return *(uint8_t*)a == *(uint8_t*)b;
@@ -284,7 +284,7 @@ inline int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t
     if (sz == 0)
         return 1;
     size_t nf = jl_datatype_nfields(dt);
-    if (nf == 0 || !dt->layout->haspadding)
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal))
         return bits_equal(a, b, sz);
     return compare_fields(a, b, dt);
 }
@@ -344,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
             i++;
             pe = pe->prev;
         }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
     }
     if (tv == jl_uniontype_type) {
@@ -391,7 +394,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     if (sz == 0)
         return ~h;
     size_t f, nf = jl_datatype_nfields(dt);
-    if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) {
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->flags.isbitsegal && dt->layout->npointers == 0)) {
         // operate element-wise if there are unused bits inside,
         // otherwise just take the whole data block at once
         // a few select pointers (notably symbol) also have special hash values
@@ -432,55 +435,62 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     return h;
 }
 
-static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
+static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    if (dt == jl_simplevector_type)
-        return hash_svec((jl_svec_t*)v);
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dtv = (jl_datatype_t*)v;
-        uintptr_t h = ~dtv->name->hash;
-        return bitmix(h, hash_svec(dtv->parameters));
-    }
-    if (dt == jl_string_type) {
+    jl_datatype_t *dt = (jl_datatype_t*)jl_to_typeof(tv);
+    if (dt->name->mutabl) {
+        if (dt == jl_string_type) {
 #ifdef _P64
-        return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #else
-        return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #endif
-    }
-    if (dt == jl_module_type) {
-        jl_module_t *m = (jl_module_t*)v;
-        return m->hash;
-    }
-    if (dt->name->mutabl)
+        }
+        if (dt == jl_simplevector_type)
+            return hash_svec((jl_svec_t*)v);
+        if (dt == jl_datatype_type) {
+            jl_datatype_t *dtv = (jl_datatype_t*)v;
+            uintptr_t h = ~dtv->name->hash;
+            return bitmix(h, hash_svec(dtv->parameters));
+        }
+        if (dt == jl_module_type) {
+            jl_module_t *m = (jl_module_t*)v;
+            return m->hash;
+        }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
+    }
     return immut_id_(dt, v, dt->hash);
 }
 
-JL_DLLEXPORT inline uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
+JL_DLLEXPORT inline uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *dt = (jl_datatype_t*)tv;
-    if (dt == jl_symbol_type)
+    if (tv == jl_symbol_tag << 4) {
         return ((jl_sym_t*)v)->hash;
-    if (dt == jl_typename_type)
-        return ((jl_typename_t*)v)->hash;
-    if (dt == jl_datatype_type) {
+    }
+    else if (tv == jl_datatype_tag << 4) {
         jl_datatype_t *dtv = (jl_datatype_t*)v;
         if (dtv->isconcretetype)
             return dtv->hash;
     }
-    return jl_object_id__cold(dt, v);
+    else if (tv == (uintptr_t)jl_typename_type) {
+        return ((jl_typename_t*)v)->hash;
+    }
+    return jl_object_id__cold(tv, v);
 }
 
 
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_object_id_(jl_typeof(v), v);
+    return jl_object_id_(jl_typetagof(v), v);
 }
 
 // eq hash table --------------------------------------------------------------
 
 #include "iddict.c"
+#include "idset.c"
 
 // object model and type primitives -------------------------------------------
 
@@ -511,21 +521,18 @@ JL_CALLABLE(jl_f_sizeof)
     }
     if (jl_is_datatype(x)) {
         jl_datatype_t *dx = (jl_datatype_t*)x;
-        if (dx->layout == NULL) {
+        if (!jl_struct_try_layout(dx)) {
             if (dx->name->abstract)
                 jl_errorf("Abstract type %s does not have a definite size.", jl_symbol_name(dx->name->name));
             else
                 jl_errorf("Argument is an incomplete %s type and does not have a definite size.", jl_symbol_name(dx->name->name));
         }
-        if (jl_is_layout_opaque(dx->layout))
+        if (jl_is_layout_opaque(dx->layout)) // includes all GenericMemory{kind,T}
             jl_errorf("Type %s does not have a definite size.", jl_symbol_name(dx->name->name));
         return jl_box_long(jl_datatype_size(x));
     }
     if (x == jl_bottom_type)
         jl_error("The empty type does not have a definite size since it does not have instances.");
-    if (jl_is_array(x)) {
-        return jl_box_long(jl_array_len(x) * ((jl_array_t*)x)->elsize);
-    }
     if (jl_is_string(x))
         return jl_box_long(jl_string_len(x));
     if (jl_is_symbol(x))
@@ -535,7 +542,10 @@ JL_CALLABLE(jl_f_sizeof)
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(x);
     assert(jl_is_datatype(dt));
     assert(!dt->name->abstract);
-    return jl_box_long(jl_datatype_size(dt));
+    size_t sz = dt->layout->size;
+    if (jl_is_genericmemory(x))
+        sz = (sz + (dt->layout->flags.arrayelem_isunion ? 1 : 0)) * ((jl_genericmemory_t*)x)->length;
+    return jl_box_long(sz);
 }
 
 JL_CALLABLE(jl_f_issubtype)
@@ -570,6 +580,14 @@ JL_CALLABLE(jl_f_throw)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_throw_methoderror)
+{
+    JL_NARGSV(throw_methoderror, 1);
+    size_t world = jl_get_tls_world_age();
+    jl_method_error(args[0], &args[1], nargs, world);
+    return jl_nothing;
+}
+
 JL_CALLABLE(jl_f_ifelse)
 {
     JL_NARGS(ifelse, 3, 3);
@@ -577,6 +595,12 @@ JL_CALLABLE(jl_f_ifelse)
     return (args[0] == jl_false ? args[2] : args[1]);
 }
 
+JL_CALLABLE(jl_f_current_scope)
+{
+    JL_NARGS(current_scope, 0, 0);
+    return jl_current_task->scope;
+}
+
 // apply ----------------------------------------------------------------------
 
 static NOINLINE jl_svec_t *_copy_to(size_t newalloc, jl_value_t **oldargs, size_t oldalloc)
@@ -607,6 +631,12 @@ STATIC_INLINE void _grow_to(jl_value_t **root, jl_value_t ***oldargs, jl_svec_t
     *n_alloc = newalloc;
 }
 
+
+static jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
+{
+    return jl_memoryrefget(jl_memoryrefindex(a->ref, i), 0);
+}
+
 static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
 {
     jl_function_t *f = args[0];
@@ -615,6 +645,17 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
         if (f == jl_builtin_svec) {
             if (jl_is_svec(args[1]))
                 return args[1];
+            if (jl_is_genericmemory(args[1])) {
+                jl_genericmemory_t *mem = (jl_genericmemory_t*)args[1];
+                size_t n = mem->length;
+                jl_svec_t *t = jl_alloc_svec(n);
+                JL_GC_PUSH1(&t);
+                for (size_t i = 0; i < n; i++) {
+                    jl_svecset(t, i, jl_genericmemoryref(mem, i));
+                }
+                JL_GC_POP();
+                return (jl_value_t*)t;
+            }
             if (jl_is_array(args[1])) {
                 size_t n = jl_array_len(args[1]);
                 jl_svec_t *t = jl_alloc_svec(n);
@@ -641,6 +682,9 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
         else if (jl_is_tuple(args[i]) || jl_is_namedtuple(args[i])) {
             precount += jl_nfields(args[i]);
         }
+        else if (jl_is_genericmemory(args[i])) {
+            precount += ((jl_genericmemory_t*)args[i])->length;
+        }
         else if (jl_is_array(args[i])) {
             precount += jl_array_len(args[i]);
         }
@@ -649,7 +693,7 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
         }
     }
     if (extra && iterate == NULL) {
-        jl_undefined_var_error(jl_symbol("iterate"));
+        jl_undefined_var_error(jl_symbol("iterate"), NULL);
     }
     // allocate space for the argument array and gc roots for it
     // based on our previous estimates
@@ -709,13 +753,40 @@ static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *itera
                     jl_gc_wb(arg_heap, newargs[n - 1]);
             }
         }
+        else if (jl_is_genericmemory(ai)) {
+            jl_genericmemory_t *mem = (jl_genericmemory_t*)ai;
+            size_t j, al = mem->length;
+            precount = (precount > al) ? precount - al : 0;
+            _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
+            assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                for (j = 0; j < al; j++) {
+                    jl_value_t *arg = jl_genericmemory_ptr_ref(mem, j);
+                    // apply with array splatting may have embedded NULL value (#11772)
+                    if (__unlikely(arg == NULL))
+                        jl_throw(jl_undefref_exception);
+                    newargs[n++] = arg;
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, arg);
+                }
+            }
+            else {
+                for (j = 0; j < al; j++) {
+                    newargs[n++] = jl_genericmemoryref(mem, j);
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, newargs[n - 1]);
+                }
+            }
+        }
         else if (jl_is_array(ai)) {
             jl_array_t *aai = (jl_array_t*)ai;
             size_t j, al = jl_array_len(aai);
             precount = (precount > al) ? precount - al : 0;
             _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
             assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
-            if (aai->flags.ptrarray) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(aai->ref.mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
                 for (j = 0; j < al; j++) {
                     jl_value_t *arg = jl_array_ptr_ref(aai, j);
                     // apply with array splatting may have embedded NULL value (#11772)
@@ -860,22 +931,27 @@ JL_CALLABLE(jl_f__call_in_world_total)
 
 // tuples ---------------------------------------------------------------------
 
-JL_CALLABLE(jl_f_tuple)
+static jl_value_t *arg_tuple(jl_value_t *a1, jl_value_t **args, size_t nargs)
 {
     size_t i;
-    if (nargs == 0)
-        return (jl_value_t*)jl_emptytuple;
-    jl_datatype_t *tt = jl_inst_arg_tuple_type(args[0], &args[1], nargs, 0);
+    jl_datatype_t *tt = jl_inst_arg_tuple_type(a1, args, nargs, 0);
     JL_GC_PROMISE_ROOTED(tt); // it is a concrete type
     if (tt->instance != NULL)
         return tt->instance;
     jl_task_t *ct = jl_current_task;
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(tt), tt);
     for (i = 0; i < nargs; i++)
-        set_nth_field(tt, jv, i, args[i], 0);
+        set_nth_field(tt, jv, i, i == 0 ? a1 : args[i - 1], 0);
     return jv;
 }
 
+JL_CALLABLE(jl_f_tuple)
+{
+    if (nargs == 0)
+        return (jl_value_t*)jl_emptytuple;
+    return arg_tuple(args[0], &args[1], nargs);
+}
+
 JL_CALLABLE(jl_f_svec)
 {
     size_t i;
@@ -937,11 +1013,11 @@ static inline size_t get_checked_fieldindex(const char *name, jl_datatype_t *st,
     else {
         jl_value_t *ts[2] = {(jl_value_t*)jl_long_type, (jl_value_t*)jl_symbol_type};
         jl_value_t *t = jl_type_union(ts, 2);
-        jl_type_error("getfield", t, arg);
+        jl_type_error(name, t, arg);
     }
     if (mutabl && jl_field_isconst(st, idx)) {
         jl_errorf("%s: const field .%s of type %s cannot be changed", name,
-                jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+                jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
     }
     return idx;
 }
@@ -972,9 +1048,11 @@ JL_CALLABLE(jl_f_getfield)
         jl_atomic_error("getfield: non-atomic field cannot be accessed atomically");
     if (isatomic && order == jl_memory_order_notatomic)
         jl_atomic_error("getfield: atomic field cannot be accessed non-atomically");
-    v = jl_get_nth_field_checked(v, idx);
-    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
-        jl_fence(); // `v` already had at least consume ordering
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    v = jl_get_nth_field_checked(v, idx); // `v` already had at least consume ordering
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v;
 }
 
@@ -996,7 +1074,7 @@ JL_CALLABLE(jl_f_setfield)
     jl_value_t *ft = jl_field_type_concrete(st, idx);
     if (!jl_isa(args[2], ft))
         jl_type_error("setfield!", ft, args[2]);
-    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_release)
+    if (order >= jl_memory_order_release)
         jl_fence(); // `st->[idx]` will have at least relaxed ordering
     set_nth_field(st, v, idx, args[2], isatomic);
     return args[2];
@@ -1070,6 +1148,35 @@ JL_CALLABLE(jl_f_replacefield)
     return v;
 }
 
+JL_CALLABLE(jl_f_setfieldonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    JL_NARGS(setfieldonce!, 3, 5);
+    if (nargs >= 4) {
+        JL_TYPECHK(setfieldonce!, symbol, args[3]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 5) {
+        JL_TYPECHK(setfieldonce!, symbol, args[4]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("setfieldonce!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (success_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfieldonce!: atomic field cannot be written non-atomically"
+                                 : "setfieldonce!: non-atomic field cannot be written atomically");
+    if (isatomic == (failure_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfieldonce!: atomic field cannot be accessed non-atomically"
+                                 : "setfieldonce!: non-atomic field cannot be accessed atomically");
+    int success = set_nth_fieldonce(st, v, idx, args[2], isatomic); // always seq_cst, if isatomic needed at all
+    return success ? jl_true : jl_false;
+}
 
 static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 {
@@ -1125,6 +1232,8 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
             tt = ((jl_tvar_t*)tt)->ub;
         if (tt == (jl_value_t*)jl_any_type)
             return (jl_value_t*)jl_any_type;
+        if (tt == (jl_value_t*)jl_bottom_type)
+            return (jl_value_t*)jl_bottom_type;
         JL_GC_PUSH1(&f);
         if (jl_is_symbol(f))
             f = jl_box_long(field_index+1);
@@ -1178,7 +1287,12 @@ JL_CALLABLE(jl_f_isdefined)
         JL_TYPECHK(isdefined, symbol, args[1]);
         m = (jl_module_t*)args[0];
         s = (jl_sym_t*)args[1];
-        return jl_boundp(m, s) ? jl_true : jl_false; // is seq_cst already
+        if (order == jl_memory_order_unspecified)
+            order = jl_memory_order_unordered;
+        if (order < jl_memory_order_unordered)
+            jl_atomic_error("isdefined: module binding cannot be accessed non-atomically");
+        int bound = jl_boundp(m, s, 1); // seq_cst always
+        return bound ? jl_true : jl_false;
     }
     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
     assert(jl_is_datatype(vt));
@@ -1205,15 +1319,11 @@ JL_CALLABLE(jl_f_isdefined)
         jl_atomic_error("isdefined: non-atomic field cannot be accessed atomically");
     if (isatomic && order == jl_memory_order_notatomic)
         jl_atomic_error("isdefined: atomic field cannot be accessed non-atomically");
-    int v = jl_field_isdefined(args[0], idx);
-    if (v == 2) {
-        if (order > jl_memory_order_notatomic)
-            jl_fence(); // isbits case has no ordering already
-    }
-    else {
-        if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
-            jl_fence(); // `v` already gave at least consume ordering
-    }
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    int v = jl_field_isdefined(args[0], idx); // relaxed ordering
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v ? jl_true : jl_false;
 }
 
@@ -1234,11 +1344,41 @@ JL_CALLABLE(jl_f_getglobal)
     JL_TYPECHK(getglobal, symbol, (jl_value_t*)sym);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("getglobal: module binding cannot be read non-atomically");
-    jl_value_t *v = jl_eval_global_var(mod, sym);
-    // is seq_cst already, no fence needed
+    else if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    jl_value_t *v = jl_eval_global_var(mod, sym); // relaxed load
+    if (order >= jl_memory_order_acquire)
+        jl_fence();
     return v;
 }
 
+JL_CALLABLE(jl_f_isdefinedglobal)
+{
+    jl_module_t *m = NULL;
+    jl_sym_t *s = NULL;
+    JL_NARGS(isdefined, 2, 3);
+    int allow_import = 1;
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    JL_TYPECHK(isdefined, module, args[0]);
+    JL_TYPECHK(isdefined, symbol, args[1]);
+    if (nargs == 3) {
+        JL_TYPECHK(isdefined, bool, args[2]);
+        allow_import = jl_unbox_bool(args[2]);
+    }
+    if (nargs == 4) {
+        JL_TYPECHK(isdefined, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    m = (jl_module_t*)args[0];
+    s = (jl_sym_t*)args[1];
+    if (order == jl_memory_order_unspecified)
+        order = jl_memory_order_unordered;
+    if (order < jl_memory_order_unordered)
+        jl_atomic_error("isdefined: module binding cannot be accessed non-atomically");
+    int bound = jl_boundp(m, s, allow_import); // seq_cst always
+    return bound ? jl_true : jl_false;
+}
+
 JL_CALLABLE(jl_f_setglobal)
 {
     enum jl_memory_order order = jl_memory_order_release;
@@ -1253,9 +1393,12 @@ JL_CALLABLE(jl_f_setglobal)
     JL_TYPECHK(setglobal!, symbol, (jl_value_t*)var);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
-    // is seq_cst already, no fence needed
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
-    jl_checked_assignment(b, mod, var, args[2]);
+    else if (order >= jl_memory_order_seq_cst)
+        jl_fence();
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
+    jl_checked_assignment(b, mod, var, args[2]); // release store
+    if (order >= jl_memory_order_seq_cst)
+        jl_fence();
     return args[2];
 }
 
@@ -1266,42 +1409,110 @@ JL_CALLABLE(jl_f_get_binding_type)
     jl_sym_t *var = (jl_sym_t*)args[1];
     JL_TYPECHK(get_binding_type, module, (jl_value_t*)mod);
     JL_TYPECHK(get_binding_type, symbol, (jl_value_t*)var);
-    jl_value_t *ty = jl_get_binding_type(mod, var);
-    if (ty == (jl_value_t*)jl_nothing) {
-        jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-        if (b == NULL)
-            return (jl_value_t*)jl_any_type;
-        jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-        if (b2 != b)
-            return (jl_value_t*)jl_any_type;
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-        return jl_atomic_load_relaxed(&b->ty);
-    }
-    return ty;
-}
-
-JL_CALLABLE(jl_f_set_binding_type)
-{
-    JL_NARGS(set_binding_type!, 2, 3);
-    jl_module_t *m = (jl_module_t*)args[0];
-    jl_sym_t *s = (jl_sym_t*)args[1];
-    JL_TYPECHK(set_binding_type!, module, (jl_value_t*)m);
-    JL_TYPECHK(set_binding_type!, symbol, (jl_value_t*)s);
-    jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
-    JL_TYPECHK(set_binding_type!, type, ty);
-    jl_binding_t *b = jl_get_binding_wr(m, s);
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty) && ty != old_ty) {
-        if (nargs == 2)
-            return jl_nothing;
-        jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
-                  jl_symbol_name(m->name), jl_symbol_name(s));
-    }
-    jl_gc_wb_binding(b, ty);
-    return jl_nothing;
+    jl_value_t *ret = jl_get_binding_type(mod, var);
+    if (ret == jl_nothing)
+        return (jl_value_t*)jl_any_type;
+    return ret;
 }
 
+JL_CALLABLE(jl_f_swapglobal)
+{
+    enum jl_memory_order order = jl_memory_order_release;
+    JL_NARGS(swapglobal!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(swapglobal!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(swapglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(swapglobal!, symbol, (jl_value_t*)var);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("swapglobal!: module binding cannot be written non-atomically");
+    // is seq_cst already, no fence needed
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
+    return jl_checked_swap(b, mod, var, args[2]);
+}
+
+JL_CALLABLE(jl_f_modifyglobal)
+{
+    enum jl_memory_order order = jl_memory_order_release;
+    JL_NARGS(modifyglobal!, 4, 5);
+    if (nargs == 5) {
+        JL_TYPECHK(modifyglobal!, symbol, args[4]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(modifyglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(modifyglobal!, symbol, (jl_value_t*)var);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("modifyglobal!: module binding cannot be written non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
+    // is seq_cst already, no fence needed
+    return jl_checked_modify(b, mod, var, args[2], args[3]);
+}
+
+JL_CALLABLE(jl_f_replaceglobal)
+{
+    enum jl_memory_order success_order = jl_memory_order_release;
+    JL_NARGS(replaceglobal!, 4, 6);
+    if (nargs >= 5) {
+        JL_TYPECHK(replaceglobal!, symbol, args[4]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 6) {
+        JL_TYPECHK(replaceglobal!, symbol, args[5]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[5], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(replaceglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(replaceglobal!, symbol, (jl_value_t*)var);
+    if (success_order == jl_memory_order_notatomic)
+        jl_atomic_error("replaceglobal!: module binding cannot be written non-atomically");
+    if (failure_order == jl_memory_order_notatomic)
+        jl_atomic_error("replaceglobal!: module binding cannot be accessed non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
+    // is seq_cst already, no fence needed
+    return jl_checked_replace(b, mod, var, args[2], args[3]);
+}
+
+JL_CALLABLE(jl_f_setglobalonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_release;
+    JL_NARGS(setglobalonce!, 3, 5);
+    if (nargs >= 4) {
+        JL_TYPECHK(setglobalonce!, symbol, args[3]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 5) {
+        JL_TYPECHK(setglobalonce!, symbol, args[4]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(setglobalonce!, module, (jl_value_t*)mod);
+    JL_TYPECHK(setglobalonce!, symbol, (jl_value_t*)var);
+    if (success_order == jl_memory_order_notatomic)
+        jl_atomic_error("setglobalonce!: module binding cannot be written non-atomically");
+    if (failure_order == jl_memory_order_notatomic)
+        jl_atomic_error("setglobalonce!: module binding cannot be accessed non-atomically");
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
+    // is seq_cst already, no fence needed
+    jl_value_t *old = jl_checked_assignonce(b, mod, var, args[2]);
+    return old == NULL ? jl_true : jl_false;
+}
+
+
 
 // apply_type -----------------------------------------------------------------
 
@@ -1363,11 +1574,11 @@ JL_CALLABLE(jl_f_apply_type)
         jl_vararg_t *vm = (jl_vararg_t*)args[0];
         if (!vm->T) {
             JL_NARGS(apply_type, 2, 3);
-            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1, 0);
         }
         else if (!vm->N) {
             JL_NARGS(apply_type, 2, 2);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1, 0);
         }
     }
     else if (jl_is_unionall(args[0])) {
@@ -1398,14 +1609,49 @@ JL_CALLABLE(jl_f_invoke)
 {
     JL_NARGSV(invoke, 2);
     jl_value_t *argtypes = args[1];
-    JL_GC_PUSH1(&argtypes);
-    if (!jl_is_tuple_type(jl_unwrap_unionall(args[1])))
-        jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, args[1]);
+    if (jl_is_method(argtypes)) {
+        jl_method_t *m = (jl_method_t*)argtypes;
+        if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)m->sig))
+            jl_type_error("invoke: argument type error", argtypes, arg_tuple(args[0], &args[2], nargs - 1));
+        return jl_gf_invoke_by_method(m, args[0], &args[2], nargs - 1);
+    } else if (jl_is_code_instance(argtypes)) {
+        jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1];
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        // N.B.: specTypes need not be a subtype of the method signature. We need to check both.
+        if (jl_is_abioverride(codeinst->def)) {
+            jl_datatype_t *abi = (jl_datatype_t*)((jl_abi_override_t*)(codeinst->def))->abi;
+            if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, abi)) {
+                jl_type_error("invoke: argument type error (ABI overwrite)", (jl_value_t*)abi, arg_tuple(args[0], &args[2], nargs - 1));
+            }
+        } else {
+            if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)mi->specTypes) ||
+                (jl_is_method(mi->def.value) && !jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)mi->def.method->sig))) {
+                jl_type_error("invoke: argument type error", mi->specTypes, arg_tuple(args[0], &args[2], nargs - 1));
+            }
+        }
+        if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age ||
+            jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) {
+            jl_error("invoke: CodeInstance not valid for this world");
+        }
+        if (!invoke) {
+            jl_compile_codeinst(codeinst);
+            invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        }
+        if (invoke) {
+            return invoke(args[0], &args[2], nargs - 2, codeinst);
+        } else {
+            if (codeinst->owner != jl_nothing) {
+                jl_error("Failed to invoke or compile external codeinst");
+            }
+            return jl_invoke(args[0], &args[2], nargs - 1, mi);
+        }
+    }
+    if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes)))
+        jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, argtypes);
     if (!jl_tuple_isa(&args[2], nargs - 2, (jl_datatype_t*)argtypes))
-        jl_error("invoke: argument type error");
-    jl_value_t *res = jl_gf_invoke(argtypes, args[0], &args[2], nargs - 1);
-    JL_GC_POP();
-    return res;
+        jl_type_error("invoke: argument type error", argtypes, jl_f_tuple(NULL, &args[2], nargs - 2));
+    return jl_gf_invoke(argtypes, args[0], &args[2], nargs - 1);
 }
 
 // Expr constructor for internal use ------------------------------------------
@@ -1463,72 +1709,272 @@ JL_CALLABLE(jl_f__typevar)
     return (jl_value_t *)jl_new_typevar((jl_sym_t*)args[0], args[1], args[2]);
 }
 
-// arrays ---------------------------------------------------------------------
-
-JL_CALLABLE(jl_f_arraysize)
+// genericmemory ---------------------------------------------------------------------
+JL_CALLABLE(jl_f_memorynew)
 {
-    JL_NARGS(arraysize, 2, 2);
-    JL_TYPECHK(arraysize, array, args[0]);
-    jl_array_t *a = (jl_array_t*)args[0];
-    size_t nd = jl_array_ndims(a);
-    JL_TYPECHK(arraysize, long, args[1]);
-    int dno = jl_unbox_long(args[1]);
-    if (dno < 1)
-        jl_error("arraysize: dimension out of range");
-    if (dno > nd)
-        return jl_box_long(1);
-    return jl_box_long((&a->nrows)[dno-1]);
+    JL_NARGS(memorynew, 2, 2);
+    jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type;
+    JL_TYPECHK(memorynew, genericmemory_type, args[0]);
+    JL_TYPECHK(memorynew, long, args[1]);
+    size_t nel = jl_unbox_long(args[1]);
+    return (jl_value_t*)jl_alloc_genericmemory(args[0], nel);
 }
 
-static size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs,
-                             const char *fname)
+JL_CALLABLE(jl_f_memoryref)
 {
-    size_t i = 0;
-    size_t k, stride = 1;
-    size_t nd = jl_array_ndims(a);
-    for (k = 0; k < nidxs; k++) {
-        if (!jl_is_long(args[k]))
-            jl_type_error(fname, (jl_value_t*)jl_long_type, args[k]);
-        size_t ii = jl_unbox_long(args[k]) - 1;
-        i += ii * stride;
-        size_t d = (k >= nd) ? 1 : jl_array_dim(a, k);
-        if (k < nidxs - 1 && ii >= d)
-            jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-        stride *= d;
+    JL_NARGS(memoryref, 1, 3);
+    if (nargs == 1) {
+        JL_TYPECHK(memoryref, genericmemory, args[0]);
+        jl_genericmemory_t *m = (jl_genericmemory_t*)args[0];
+        jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(((jl_datatype_t*)jl_typetagof(m))->parameters), 3);
+        JL_GC_PROMISE_ROOTED(typ); // it is a concrete type
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            return (jl_value_t*)jl_new_memoryref(typ, m, 0);
+        return (jl_value_t*)jl_new_memoryref(typ, m, m->ptr);
+    }
+    else {
+        JL_TYPECHK(memoryref, genericmemoryref, args[0]);
+        JL_TYPECHK(memoryref, long, args[1]);
+        if (nargs == 3)
+            JL_TYPECHK(memoryref, bool, args[2]);
+        jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)args[0];
+        size_t i = jl_unbox_long(args[1]) - 1;
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m->mem))->layout;
+        char *data = (char*)m->ptr_or_offset;
+        if (layout->flags.arrayelem_isboxed) {
+            if (((data - (char*)m->mem->ptr) / sizeof(jl_value_t*)) + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += sizeof(jl_value_t*) * i;
+        }
+        else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+            if ((size_t)data + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += i;
+        }
+        else {
+            if (((data - (char*)m->mem->ptr) / layout->size) + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += layout->size * i;
+        }
+        return (jl_value_t*)jl_new_memoryref((jl_value_t*)jl_typetagof(m), m->mem, data);
     }
-    for (; k < nd; k++)
-        stride *= jl_array_dim(a, k);
-    if (i >= stride)
-        jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-    return i;
 }
 
-JL_CALLABLE(jl_f_arrayref)
+JL_CALLABLE(jl_f_memoryrefoffset)
 {
-    JL_NARGSV(arrayref, 3);
-    JL_TYPECHK(arrayref, bool, args[0]);
-    JL_TYPECHK(arrayref, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[2], nargs - 2, "arrayref");
-    return jl_arrayref(a, i);
+    JL_NARGS(memoryrefoffset, 1, 1);
+    JL_TYPECHK(memoryref, genericmemoryref, args[0]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    size_t offset;
+    if (layout->flags.arrayelem_isboxed) {
+        offset = (((char*)m.ptr_or_offset - (char*)m.mem->ptr) / sizeof(jl_value_t*));
+    }
+    else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = (size_t)m.ptr_or_offset;
+    }
+    else {
+        offset = ((char*)m.ptr_or_offset - (char*)m.mem->ptr) / layout->size;
+    }
+    return (jl_value_t*)jl_box_long(offset + 1);
 }
 
-JL_CALLABLE(jl_f_const_arrayref)
+JL_CALLABLE(jl_f_memoryrefget)
 {
-    return jl_f_arrayref(F, args, nargs);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefget, 3, 3);
+    JL_TYPECHK(memoryrefget, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefget, symbol, args[1]);
+    JL_TYPECHK(memoryrefget, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[1] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+            jl_atomic_error("memoryrefget: non-atomic memory cannot be accessed atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefget: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefget(m, kind == (jl_value_t*)jl_atomic_sym);
 }
 
-JL_CALLABLE(jl_f_arrayset)
+JL_CALLABLE(jl_f_memoryrefset)
 {
-    JL_NARGSV(arrayset, 4);
-    JL_TYPECHK(arrayset, bool, args[0]);
-    JL_TYPECHK(arrayset, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[3], nargs - 3, "arrayset");
-    jl_arrayset(a, args[2], i);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefset!, 4, 4);
+    JL_TYPECHK(memoryrefset!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefset!, symbol, args[2]);
+    JL_TYPECHK(memoryrefset!, bool, args[3]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[2] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 0, 1);
+            jl_atomic_error("memoryrefset!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 0, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefset!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    jl_memoryrefset(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
     return args[1];
 }
 
+JL_CALLABLE(jl_f_memoryref_isassigned)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryref_isassigned, 3, 3);
+    JL_TYPECHK(memoryref_isassigned, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryref_isassigned, symbol, args[1]);
+    JL_TYPECHK(memoryref_isassigned, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[1] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+            jl_atomic_error("memoryref_isassigned: non-atomic memory cannot be accessed atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[1], 1, 0);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryref_isassigned: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        // TODO(jwn): decide on the fences required for ordering here
+        return jl_false;
+    return jl_memoryref_isassigned(m, kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefswap)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefswap!, 4, 4);
+    JL_TYPECHK(memoryrefswap!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefswap!, symbol, args[2]);
+    JL_TYPECHK(memoryrefswap!, bool, args[3]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[2] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+            jl_atomic_error("memoryrefswap!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefswap!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefswap(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefmodify)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefmodify!, 5, 5);
+    JL_TYPECHK(memoryrefmodify!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefmodify!, symbol, args[3]);
+    JL_TYPECHK(memoryrefmodify!, bool, args[4]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[3] != kind) {
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+            jl_atomic_error("memoryrefmodify!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+        if (order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefmodify!: atomic memory cannot be written non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefmodify(m, args[1], args[2], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefreplace)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    enum jl_memory_order failure_order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefreplace!, 6, 6);
+    JL_TYPECHK(memoryrefreplace!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefreplace!, symbol, args[3]);
+    JL_TYPECHK(memoryrefreplace!, symbol, args[4]);
+    JL_TYPECHK(memoryrefreplace!, bool, args[5]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[4] != kind)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (args[3] != kind) {
+            success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+            jl_atomic_error("memoryrefreplace!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 0);
+        if (failure_order > success_order)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (success_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefreplace!: atomic memory cannot be written non-atomically");
+        if (failure_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefreplace!: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefreplace(m, args[1], args[2], kind == (jl_value_t*)jl_atomic_sym);
+}
+
+JL_CALLABLE(jl_f_memoryrefsetonce)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    enum jl_memory_order failure_order = jl_memory_order_notatomic;
+    JL_NARGS(memoryrefsetonce!, 5, 5);
+    JL_TYPECHK(memoryrefsetonce!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefsetonce!, symbol, args[2]);
+    JL_TYPECHK(memoryrefsetonce!, symbol, args[3]);
+    JL_TYPECHK(memoryrefsetonce!, bool, args[4]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *kind = jl_tparam0(jl_typetagof(m.mem));
+    if (kind == (jl_value_t*)jl_not_atomic_sym) {
+        if (args[3] != kind)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (args[2] != kind) {
+            success_order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+            jl_atomic_error("memoryrefsetonce!: non-atomic memory cannot be written atomically");
+        }
+    }
+    else if (kind == (jl_value_t*)jl_atomic_sym) {
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 1);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 0);
+        if (failure_order > success_order)
+            jl_atomic_error("invalid atomic ordering"); // because either it is invalid, or failure_order > success_order
+        if (success_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefsetonce!: atomic memory cannot be written non-atomically");
+        if (failure_order == jl_memory_order_notatomic)
+            jl_atomic_error("memoryrefsetonce!: atomic memory cannot be accessed non-atomically");
+    }
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefsetonce(m, args[1], kind == (jl_value_t*)jl_atomic_sym);
+}
+
 // type definition ------------------------------------------------------------
 
 JL_CALLABLE(jl_f__structtype)
@@ -1655,11 +2101,9 @@ JL_CALLABLE(jl_f__compute_sparams)
 
 JL_CALLABLE(jl_f__svec_ref)
 {
-    JL_NARGS(_svec_ref, 3, 3);
-    jl_value_t *b = args[0];
-    jl_svec_t *s = (jl_svec_t*)args[1];
-    jl_value_t *i = (jl_value_t*)args[2];
-    JL_TYPECHK(_svec_ref, bool, b);
+    JL_NARGS(_svec_ref, 2, 2);
+    jl_svec_t *s = (jl_svec_t*)args[0];
+    jl_value_t *i = (jl_value_t*)args[1];
     JL_TYPECHK(_svec_ref, simplevector, (jl_value_t*)s);
     JL_TYPECHK(_svec_ref, long, i);
     size_t len = jl_svec_len(s);
@@ -1667,7 +2111,7 @@ JL_CALLABLE(jl_f__svec_ref)
     if (idx < 1 || idx > len) {
         jl_bounds_error_int((jl_value_t*)s, idx);
     }
-    return jl_svec_ref(s, idx-1);
+    return jl_svecref(s, idx-1);
 }
 
 static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
@@ -1680,7 +2124,7 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
         jl_value_t *ta = jl_svecref(old, i);
         jl_value_t *tb = jl_svecref(ft, i);
         if (jl_has_free_typevars(ta)) {
-            if (!jl_has_free_typevars(tb) || !jl_egal(ta, tb))
+            if (!jl_has_free_typevars(tb) || !jl_types_egal(ta, tb))
                 return 0;
         }
         else if (jl_has_free_typevars(tb) || jl_typetagof(ta) != jl_typetagof(tb) ||
@@ -1696,36 +2140,54 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
 // inline it. The only way fields can reference this type (due to
 // syntax-enforced restrictions) is via being passed as a type parameter. Thus
 // we can conservatively check this by examining only the parameters of the
-// dependent types.
-// affects_layout is a hack introduced by #35275 to workaround a problem
-// introduced by #34223: it checks whether we will potentially need to
-// compute the layout of the object before we have fully computed the types of
-// the fields during recursion over the allocation of the parameters for the
-// field types (of the concrete subtypes)
-static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
-{
-    if (jl_is_uniontype(p))
-        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
-               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
-    if (jl_is_unionall(p))
-        return references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0) ||
-               references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0) ||
-               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
+// dependent types. Additionally, a field might have already observed this
+// object for layout purposes before we got around to deciding if inlining
+// would be possible, so we cannot change the layout now if so.
+// affects_layout is a (conservative) analysis of layout_uses_free_typevars
+// freevars is a (conservative) analysis of what calling jl_has_bound_typevars from name->wrapper gives (TODO: just call this instead?)
+static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout, int freevars) JL_NOTSAFEPOINT
+{
+    if (freevars && !jl_has_free_typevars(p))
+        freevars = 0;
+    while (jl_is_unionall(p)) {
+        if (references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0, freevars) ||
+            references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0, freevars))
+            return 1;
+       p = ((jl_unionall_t*)p)->body;
+    }
+    if (jl_is_uniontype(p)) {
+        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout, freevars) ||
+               references_name(((jl_uniontype_t*)p)->b, name, affects_layout, freevars);
+    }
+    if (jl_is_vararg(p)) {
+        jl_value_t *T = ((jl_vararg_t*)p)->T;
+        jl_value_t *N = ((jl_vararg_t*)p)->N;
+        return (T && references_name(T, name, affects_layout, freevars)) ||
+               (N && references_name(N, name, affects_layout, freevars));
+    }
     if (jl_is_typevar(p))
         return 0; // already checked by unionall, if applicable
     if (jl_is_datatype(p)) {
         jl_datatype_t *dp = (jl_datatype_t*)p;
         if (affects_layout && dp->name == name)
             return 1;
-        // affects_layout checks whether we will need to attempt to layout this
-        // type (based on whether all copies of it have the same layout) in
-        // that case, we still need to check the recursive parameters for
-        // layout recursion happening also, but we know it won't itself cause
-        // problems for the layout computation
-        affects_layout = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        affects_layout = jl_is_genericmemory_type(dp) || ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        // and even if it has a layout, the fields themselves might trigger layouts if they use tparam i
+        // rather than checking this for each field, we just assume it applies
+        if (!affects_layout && freevars && jl_field_names(dp) != jl_emptysvec) {
+            jl_svec_t *types = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->types;
+            size_t i, l = jl_svec_len(types);
+            for (i = 0; i < l; i++) {
+                jl_value_t *ft = jl_svecref(types, i);
+                if (!jl_is_typevar(ft) && jl_has_free_typevars(ft)) {
+                    affects_layout = 1;
+                    break;
+                }
+            }
+        }
         size_t i, l = jl_nparams(p);
         for (i = 0; i < l; i++) {
-            if (references_name(jl_tparam(p, i), name, affects_layout))
+            if (references_name(jl_tparam(p, i), name, affects_layout, freevars))
                 return 1;
         }
     }
@@ -1761,12 +2223,12 @@ JL_CALLABLE(jl_f__typebody)
             // able to compute the layout of the object before needing to
             // publish it, so we must assume it cannot be inlined, if that
             // check passes, then we also still need to check the fields too.
-            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
+            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 0, 1))) {
                 int mayinlinealloc = 1;
                 size_t i;
                 for (i = 0; i < nf; i++) {
                     jl_value_t *fld = jl_svecref(ft, i);
-                    if (references_name(fld, dt->name, 1)) {
+                    if (references_name(fld, dt->name, 1, 1)) {
                         mayinlinealloc = 0;
                         break;
                     }
@@ -1819,6 +2281,9 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     JL_GC_PUSH2(&a, &b);
     a = jl_rewrap_unionall((jl_value_t*)dta->super, dta->name->wrapper);
     b = jl_rewrap_unionall((jl_value_t*)dtb->super, dtb->name->wrapper);
+    // if tb recursively refers to itself in its supertype, assume that it refers to ta
+    // before checking whether the supertypes are equal
+    b = jl_substitute_datatype(b, dtb, dta);
     if (!jl_types_equal(a, b))
         goto no;
     JL_TRY {
@@ -1861,13 +2326,12 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
     unsigned fargs = intrinsic_nargs[f];
     if (!fargs)
-        jl_errorf("`%s` must be compiled to be called", jl_intrinsic_name(f));
+        jl_errorf("`%s` requires the compiler", jl_intrinsic_name(f));
     JL_NARGS(intrinsic_call, fargs, fargs);
 
     union {
@@ -1920,6 +2384,7 @@ unsigned jl_intrinsic_nargs(int f)
 
 static void add_intrinsic_properties(enum intrinsic f, unsigned nargs, void (*pfunc)(void))
 {
+    assert(nargs <= 5 && "jl_f_intrinsic_call only implements up to 5 args");
     intrinsic_nargs[f] = nargs;
     runtime_fp[f] = pfunc;
 }
@@ -1929,7 +2394,7 @@ static void add_intrinsic(jl_module_t *inm, const char *name, enum intrinsic f)
     jl_value_t *i = jl_permbox32(jl_intrinsic_type, 0, (int32_t)f);
     jl_sym_t *sym = jl_symbol(name);
     jl_set_const(inm, sym, i);
-    jl_module_export(inm, sym);
+    jl_module_public(inm, sym, 1);
 }
 
 void jl_init_intrinsic_properties(void) JL_GC_DISABLED
@@ -1972,12 +2437,13 @@ static void add_builtin(const char *name, jl_value_t *v)
     jl_set_const(jl_core_module, jl_symbol(name), v);
 }
 
-jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b)
+jl_fptr_args_t jl_get_builtin_fptr(jl_datatype_t *dt)
 {
-    assert(jl_isa(b, (jl_value_t*)jl_builtin_type));
-    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&jl_gf_mtable(b)->defs);
+    assert(jl_subtype((jl_value_t*)dt, (jl_value_t*)jl_builtin_type));
+    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&dt->name->mt->defs);
     jl_method_instance_t *mi = jl_atomic_load_relaxed(&entry->func.method->unspecialized);
     jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+    assert(ci->owner == jl_nothing);
     return jl_atomic_load_relaxed(&ci->specptr.fptr1);
 }
 
@@ -2001,6 +2467,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     // field access
     jl_builtin_getfield = add_builtin_func("getfield",  jl_f_getfield);
     jl_builtin_setfield = add_builtin_func("setfield!",  jl_f_setfield);
+    jl_builtin_setfieldonce = add_builtin_func("setfieldonce!",  jl_f_setfieldonce);
     jl_builtin_swapfield = add_builtin_func("swapfield!",  jl_f_swapfield);
     jl_builtin_modifyfield = add_builtin_func("modifyfield!",  jl_f_modifyfield);
     jl_builtin_replacefield = add_builtin_func("replacefield!",  jl_f_replacefield);
@@ -2011,14 +2478,24 @@ void jl_init_primitives(void) JL_GC_DISABLED
     // module bindings
     jl_builtin_getglobal = add_builtin_func("getglobal", jl_f_getglobal);
     jl_builtin_setglobal = add_builtin_func("setglobal!", jl_f_setglobal);
+    jl_builtin_isdefinedglobal = add_builtin_func("isdefinedglobal", jl_f_isdefinedglobal);
     add_builtin_func("get_binding_type", jl_f_get_binding_type);
-    add_builtin_func("set_binding_type!", jl_f_set_binding_type);
-
-    // array primitives
-    jl_builtin_arrayref = add_builtin_func("arrayref", jl_f_arrayref);
-    jl_builtin_const_arrayref = add_builtin_func("const_arrayref", jl_f_arrayref);
-    jl_builtin_arrayset = add_builtin_func("arrayset", jl_f_arrayset);
-    jl_builtin_arraysize = add_builtin_func("arraysize", jl_f_arraysize);
+    jl_builtin_swapglobal = add_builtin_func("swapglobal!", jl_f_swapglobal);
+    jl_builtin_replaceglobal = add_builtin_func("replaceglobal!", jl_f_replaceglobal);
+    jl_builtin_modifyglobal = add_builtin_func("modifyglobal!", jl_f_modifyglobal);
+    jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce);
+
+    // memory primitives
+    jl_builtin_memorynew = add_builtin_func("memorynew", jl_f_memorynew);
+    jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref);
+    jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
+    jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);
+    jl_builtin_memoryrefset = add_builtin_func("memoryrefset!", jl_f_memoryrefset);
+    jl_builtin_memoryref_isassigned = add_builtin_func("memoryref_isassigned", jl_f_memoryref_isassigned);
+    jl_builtin_memoryrefswap = add_builtin_func("memoryrefswap!", jl_f_memoryrefswap);
+    jl_builtin_memoryrefreplace = add_builtin_func("memoryrefreplace!", jl_f_memoryrefreplace);
+    jl_builtin_memoryrefmodify = add_builtin_func("memoryrefmodify!", jl_f_memoryrefmodify);
+    jl_builtin_memoryrefsetonce = add_builtin_func("memoryrefsetonce!", jl_f_memoryrefsetonce);
 
     // method table utils
     jl_builtin_applicable = add_builtin_func("applicable", jl_f_applicable);
@@ -2045,6 +2522,8 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin_func("finalizer", jl_f_finalizer);
     add_builtin_func("_compute_sparams", jl_f__compute_sparams);
     add_builtin_func("_svec_ref", jl_f__svec_ref);
+    jl_builtin_current_scope = add_builtin_func("current_scope", jl_f_current_scope);
+    add_builtin_func("throw_methoderror", jl_f_throw_methoderror);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -2060,7 +2539,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
     add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
-    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0));
+    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0));
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
     add_builtin("MethodTable", (jl_value_t*)jl_methtable_type);
@@ -2082,21 +2561,27 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Builtin", (jl_value_t*)jl_builtin_type);
     add_builtin("MethodInstance", (jl_value_t*)jl_method_instance_type);
     add_builtin("CodeInfo", (jl_value_t*)jl_code_info_type);
-    add_builtin("Ref", (jl_value_t*)jl_ref_type);
-    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
     add_builtin("LLVMPtr", (jl_value_t*)jl_llvmpointer_type);
     add_builtin("Task", (jl_value_t*)jl_task_type);
     add_builtin("OpaqueClosure", (jl_value_t*)jl_opaque_closure_type);
 
+    add_builtin("AddrSpace", (jl_value_t*)jl_addrspace_type);
+    add_builtin("Ref", (jl_value_t*)jl_ref_type);
+    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
+    //add_builtin("GenericPtr", (jl_value_t*)jl_genericpointer_type);
     add_builtin("AbstractArray", (jl_value_t*)jl_abstractarray_type);
     add_builtin("DenseArray", (jl_value_t*)jl_densearray_type);
     add_builtin("Array", (jl_value_t*)jl_array_type);
+    add_builtin("GenericMemory", (jl_value_t*)jl_genericmemory_type);
+    add_builtin("GenericMemoryRef", (jl_value_t*)jl_genericmemoryref_type);
 
     add_builtin("Expr", (jl_value_t*)jl_expr_type);
     add_builtin("LineNumberNode", (jl_value_t*)jl_linenumbernode_type);
-    add_builtin("LineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("LegacyLineInfoNode", (jl_value_t*)jl_lineinfonode_type);
+    add_builtin("DebugInfo", (jl_value_t*)jl_debuginfo_type);
     add_builtin("GotoNode", (jl_value_t*)jl_gotonode_type);
     add_builtin("GotoIfNot", (jl_value_t*)jl_gotoifnot_type);
+    add_builtin("EnterNode", (jl_value_t*)jl_enternode_type);
     add_builtin("ReturnNode", (jl_value_t*)jl_returnnode_type);
     add_builtin("PiNode", (jl_value_t*)jl_pinode_type);
     add_builtin("PhiNode", (jl_value_t*)jl_phinode_type);
@@ -2105,6 +2590,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
     add_builtin("Binding", (jl_value_t*)jl_binding_type);
+    add_builtin("BindingPartition", (jl_value_t*)jl_binding_partition_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
     add_builtin("NamedTuple", (jl_value_t*)jl_namedtuple_type);
 
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 47496a3a91ba6..1b635ca40840f 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -22,21 +22,25 @@ TRANSFORMED_CCALL_STAT(jl_cpu_wake);
 TRANSFORMED_CCALL_STAT(jl_gc_safepoint);
 TRANSFORMED_CCALL_STAT(jl_get_ptls_states);
 TRANSFORMED_CCALL_STAT(jl_threadid);
+TRANSFORMED_CCALL_STAT(jl_get_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_set_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_get_tls_world_age);
+TRANSFORMED_CCALL_STAT(jl_get_world_counter);
 TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal);
 TRANSFORMED_CCALL_STAT(jl_get_current_task);
 TRANSFORMED_CCALL_STAT(jl_set_next_task);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_begin);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_end);
-TRANSFORMED_CCALL_STAT(jl_svec_len);
-TRANSFORMED_CCALL_STAT(jl_svec_ref);
-TRANSFORMED_CCALL_STAT(jl_array_isassigned);
 TRANSFORMED_CCALL_STAT(jl_string_ptr);
 TRANSFORMED_CCALL_STAT(jl_symbol_name);
+TRANSFORMED_CCALL_STAT(jl_genericmemory_owner);
+TRANSFORMED_CCALL_STAT(jl_alloc_genericmemory);
 TRANSFORMED_CCALL_STAT(memcpy);
 TRANSFORMED_CCALL_STAT(memset);
 TRANSFORMED_CCALL_STAT(memmove);
 TRANSFORMED_CCALL_STAT(jl_object_id);
 #undef TRANSFORMED_CCALL_STAT
+extern "C" JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 
 STATISTIC(EmittedCCalls, "Number of ccalls emitted");
 STATISTIC(DeferredCCallLookups, "Number of ccalls looked up at runtime");
@@ -80,13 +84,13 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
     else {
         std::string name = "ccalllib_";
         name += llvm::sys::path::filename(f_lib);
-        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
+        name += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
         runtime_lib = true;
         auto &libgv = ctx.emission_context.libMapGV[f_lib];
         if (libgv.first == NULL) {
-            libptrgv = new GlobalVariable(*M, getInt8PtrTy(M->getContext()), false,
+            libptrgv = new GlobalVariable(*M, getPointerTy(M->getContext()), false,
                                           GlobalVariable::ExternalLinkage,
-                                          Constant::getNullValue(getInt8PtrTy(M->getContext())), name);
+                                          Constant::getNullValue(getPointerTy(M->getContext())), name);
             libgv.first = libptrgv;
         }
         else {
@@ -100,7 +104,7 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         std::string name = "ccall_";
         name += f_name;
         name += "_";
-        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
+        name += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
         auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
         llvmgv = new GlobalVariable(*M, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
@@ -152,7 +156,7 @@ static Value *runtime_sym_lookup(
             dlsym_lookup);
 
     assert(f->getParent() != NULL);
-    f->getBasicBlockList().push_back(dlsym_lookup);
+    dlsym_lookup->insertInto(f);
     irbuilder.SetInsertPoint(dlsym_lookup);
     Instruction *llvmf;
     Value *nameval = stringConstPtr(emission_context, irbuilder, f_name);
@@ -168,34 +172,24 @@ static Value *runtime_sym_lookup(
         }
         else {
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getPointerTy(irbuilder.getContext()));
         }
-        llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
+        auto lookup = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
+        llvmf = lookup;
     }
     setName(emission_context, llvmf, f_name + StringRef(".found"));
     StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     irbuilder.CreateBr(ccall_bb);
 
-    f->getBasicBlockList().push_back(ccall_bb);
+    ccall_bb->insertInto(f);
     irbuilder.SetInsertPoint(ccall_bb);
     PHINode *p = irbuilder.CreatePHI(T_pvoidfunc, 2);
     p->addIncoming(llvmf_orig, enter_bb);
     p->addIncoming(llvmf, llvmf->getParent());
     setName(emission_context, p, f_name);
-    return irbuilder.CreateBitCast(p, funcptype);
-}
-
-static Value *runtime_sym_lookup(
-        jl_codectx_t &ctx,
-        PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
-        const char *f_name, Function *f,
-        GlobalVariable *libptrgv,
-        GlobalVariable *llvmgv, bool runtime_lib)
-{
-    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, funcptype, f_lib, lib_expr,
-                              f_name, f, libptrgv, llvmgv, runtime_lib);
+    return p;
 }
 
 static Value *runtime_sym_lookup(
@@ -215,7 +209,7 @@ static Value *runtime_sym_lookup(
         std::string gvname = "libname_";
         gvname += f_name;
         gvname += "_";
-        gvname += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
+        gvname += std::to_string(jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1));
         llvmgv = new GlobalVariable(*jl_Module, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(T_pvoidfunc), gvname);
@@ -225,7 +219,7 @@ static Value *runtime_sym_lookup(
         libptrgv = prepare_global_in(jl_Module, libptrgv);
     }
     llvmgv = prepare_global_in(jl_Module, llvmgv);
-    return runtime_sym_lookup(ctx, funcptype, f_lib, lib_expr, f_name, f, libptrgv, llvmgv, runtime_lib);
+    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, funcptype, f_lib, lib_expr, f_name, f, libptrgv, llvmgv, runtime_lib);
 }
 
 // Emit a "PLT" entry that will be lazily initialized
@@ -243,29 +237,33 @@ static GlobalVariable *emit_plt_thunk(
     libptrgv = prepare_global_in(M, libptrgv);
     llvmgv = prepare_global_in(M, llvmgv);
     std::string fname;
-    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
     Function *plt = Function::Create(functype,
-                                     GlobalVariable::ExternalLinkage,
+                                     GlobalVariable::PrivateLinkage,
                                      fname, M);
     plt->setAttributes(attrs);
     if (cc != CallingConv::C)
         plt->setCallingConv(cc);
-    fname += "_got";
     auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
     GlobalVariable *got = new GlobalVariable(*M, T_pvoidfunc, false,
                                              GlobalVariable::ExternalLinkage,
-                                             ConstantExpr::getBitCast(plt, T_pvoidfunc),
-                                             fname);
+                                             plt,
+                                             fname + "_got");
+    if (runtime_lib) {
+        got->addAttribute("julia.libname", f_lib);
+    } else {
+        got->addAttribute("julia.libidx", std::to_string((uintptr_t) f_lib));
+    }
+    got->addAttribute("julia.fname", f_name);
     BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", plt);
     IRBuilder<> irbuilder(b0);
     Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
                                     llvmgv, runtime_lib);
-    StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, Align(sizeof(void*)));
+    StoreInst *store = irbuilder.CreateAlignedStore(ptr, got, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     SmallVector<Value*, 16> args;
-    for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
-        args.push_back(&*arg);
-    assert(cast<PointerType>(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype));
+    for (auto &arg : plt->args())
+        args.push_back(&arg);
     CallInst *ret = irbuilder.CreateCall(
         functype,
         ptr, ArrayRef<Value*>(args));
@@ -307,14 +305,12 @@ static Value *emit_plt(
         CallingConv::ID cc, const char *f_lib, const char *f_name)
 {
     ++PLT;
-    assert(ctx.emission_context.imaging);
     // Don't do this for vararg functions so that the `musttail` is only
     // an optimization and is not required to function correctly.
     assert(!functype->isVarArg());
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
     bool runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
-    PointerType *funcptype = PointerType::get(functype, 0);
 
     auto &pltMap = ctx.emission_context.allPltMap[attrs];
     auto key = std::make_tuple(llvmgv, functype, cc);
@@ -331,7 +327,7 @@ static Value *emit_plt(
     // since the only thing we do to this loaded pointer is to call it
     // immediately.
     got_val->setAtomic(AtomicOrdering::Unordered);
-    return ctx.builder.CreateBitCast(got_val, funcptype);
+    return got_val;
 }
 
 // --- ABI Implementations ---
@@ -368,6 +364,7 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 
 #include "abi_arm.cpp"
 #include "abi_aarch64.cpp"
+#include "abi_riscv.cpp"
 #include "abi_ppc64le.cpp"
 #include "abi_win32.cpp"
 #include "abi_win64.cpp"
@@ -376,22 +373,24 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 
 #if defined ABI_LLVM
   typedef ABI_LLVMLayout DefaultAbiState;
-#elif defined _CPU_X86_64_
-#  if defined _OS_WINDOWS_
+#elif defined _OS_WINDOWS_
+#  if defined _CPU_X86_64_
      typedef ABI_Win64Layout DefaultAbiState;
-#  else
-     typedef ABI_x86_64Layout DefaultAbiState;
-#  endif
-#elif defined _CPU_X86_
-#  if defined _OS_WINDOWS_
+#  elif defined _CPU_X86_
      typedef ABI_Win32Layout DefaultAbiState;
 #  else
-     typedef ABI_x86Layout DefaultAbiState;
+#    error Windows is currently only supported on x86 and x86_64
 #  endif
+#elif defined _CPU_X86_64_
+  typedef ABI_x86_64Layout DefaultAbiState;
+#elif defined _CPU_X86_
+  typedef ABI_x86Layout DefaultAbiState;
 #elif defined _CPU_ARM_
   typedef ABI_ARMLayout DefaultAbiState;
 #elif defined _CPU_AARCH64_
   typedef ABI_AArch64Layout DefaultAbiState;
+#elif defined _CPU_RISCV64_
+  typedef ABI_RiscvLayout DefaultAbiState;
 #elif defined _CPU_PPC64_
   typedef ABI_PPC64leLayout DefaultAbiState;
 #else
@@ -414,7 +413,7 @@ static Value *llvm_type_rewrite(
 
     assert(from_type->isPointerTy() == target_type->isPointerTy()); // expect that all ABIs consider all pointers to be equivalent
     if (target_type->isPointerTy())
-        return emit_bitcast(ctx, v, target_type);
+        return v;
 
     // simple integer and float widening & conversion cases
     if (from_type->getPrimitiveSizeInBits() > 0 &&
@@ -442,24 +441,13 @@ static Value *llvm_type_rewrite(
     // we need to use this alloca copy trick instead
     // On ARM and AArch64, the ABI requires casting through memory to different
     // sizes.
-    Value *from;
-    Value *to;
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
-    unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
-    if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
-        to = emit_static_alloca(ctx, target_type);
-        setName(ctx.emission_context, to, "type_rewrite_buffer");
-        cast<AllocaInst>(to)->setAlignment(Align(align));
-        from = emit_bitcast(ctx, to, from_type->getPointerTo());
-    }
-    else {
-        from = emit_static_alloca(ctx, from_type);
-        setName(ctx.emission_context, from, "type_rewrite_buffer");
-        cast<AllocaInst>(from)->setAlignment(Align(align));
-        to = emit_bitcast(ctx, from, target_type->getPointerTo());
-    }
-    ctx.builder.CreateAlignedStore(v, from, Align(align));
-    auto pun = ctx.builder.CreateAlignedLoad(target_type, to, Align(align));
+    Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type));
+    size_t nb = std::max(DL.getTypeAllocSize(target_type), DL.getTypeAllocSize(from_type));
+    AllocaInst *cast = emit_static_alloca(ctx, nb, align);
+    setName(ctx.emission_context, cast, "type_rewrite_buffer");
+    ctx.builder.CreateAlignedStore(v, cast, align);
+    auto pun = ctx.builder.CreateAlignedLoad(target_type, cast, align);
     setName(ctx.emission_context, pun, "type_rewrite");
     return pun;
 }
@@ -478,7 +466,7 @@ static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
                 ctx.spvals_ptr,
                 ConstantInt::get(ctx.types().T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)))
     };
-    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
+    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), ArrayRef<Value*>(args));
     addRetAttr(call, Attribute::getWithAlignment(ctx.builder.getContext(), Align(16)));
     return call;
 }
@@ -488,15 +476,16 @@ static const std::string make_errmsg(const char *fname, int n, const char *err)
     std::string _msg;
     raw_string_ostream msg(_msg);
     msg << fname;
-    if (n > 0)
-        msg << " argument " << n;
-    else
+    if (n > 0) {
+        msg << " argument ";
+        msg << n;
+    } else
         msg << " return";
     msg << err;
     return msg.str();
 }
 
-static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
+static jl_cgval_t typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
 {
     if (jlto != (jl_value_t*)jl_any_type && !jl_subtype(jvinfo.typ, jlto)) {
         if (jlto == (jl_value_t*)jl_voidpointer_type) {
@@ -504,6 +493,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
             if (!jl_is_cpointer_type(jvinfo.typ)) {
                 // emit a typecheck, if not statically known to be correct
                 emit_cpointercheck(ctx, jvinfo, make_errmsg("ccall", argn + 1, ""));
+                return update_julia_type(ctx, jvinfo, (jl_value_t*)jl_pointer_type);
             }
         }
         else {
@@ -528,8 +518,10 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 ctx.builder.CreateUnreachable();
                 ctx.builder.SetInsertPoint(passBB);
             }
+            return update_julia_type(ctx, jvinfo, jlto);
         }
     }
+    return jvinfo;
 }
 
 // Emit code to convert argument to form expected by C ABI
@@ -539,7 +531,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
 static Value *julia_to_native(
         jl_codectx_t &ctx,
         Type *to, bool toboxed, jl_value_t *jlto, jl_unionall_t *jlto_env,
-        const jl_cgval_t &jvinfo,
+        jl_cgval_t jvinfo,
         bool byRef, int argn)
 {
     // We're passing Any
@@ -549,22 +541,16 @@ static Value *julia_to_native(
     }
     assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto));
 
-    typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
+    jvinfo = typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
     if (!byRef)
         return emit_unbox(ctx, to, jvinfo, jlto);
 
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
-    Value *slot = emit_static_alloca(ctx, to);
+    Align align(julia_alignment(jlto));
+    Value *slot = emit_static_alloca(ctx, to, align);
     setName(ctx.emission_context, slot, "native_convert_buffer");
-    if (!jvinfo.ispointer()) {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
-    }
-    else {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto));
-    }
+    emit_unbox_store(ctx, jvinfo, slot, ctx.tbaa().tbaa_stack, align);
     return slot;
 }
 
@@ -615,11 +601,13 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         jl_cgval_t arg1 = emit_expr(ctx, arg);
         jl_value_t *ptr_ty = arg1.typ;
         if (!jl_is_cpointer_type(ptr_ty)) {
+            if (!ccall)
+                return;
             const char *errmsg = invalid_symbol_err_msg(ccall);
             emit_cpointercheck(ctx, arg1, errmsg);
         }
         arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, ctx.types().T_size, arg1, (jl_value_t*)jl_voidpointer_type);
+        jl_ptr = emit_unbox(ctx, ctx.types().T_ptr, arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
         out.gcroot = ptr;
@@ -663,15 +651,16 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 f_lib = jl_symbol_name((jl_sym_t*)t1);
             else if (jl_is_string(t1))
                 f_lib = jl_string_data(t1);
-            else
-                f_name = NULL;
+            else {
+                out.lib_expr = t1;
+            }
         }
     }
 }
 
 // --- code generator for cglobal ---
 
-static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs);
+static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, ArrayRef<jl_cgval_t> argv, size_t nargs);
 
 static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
@@ -686,7 +675,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
         rt = static_eval(ctx, args[2]);
         if (rt == NULL) {
             JL_GC_POP();
-            jl_cgval_t argv[2] = {jl_cgval_t(), jl_cgval_t()};
+            jl_cgval_t argv[2];
             argv[0] = emit_expr(ctx, args[1]);
             argv[1] = emit_expr(ctx, args[2]);
             return emit_runtime_call(ctx, JL_I::cglobal, argv, nargs);
@@ -698,51 +687,32 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = ctx.types().T_size;
+    Type *lrt = ctx.types().T_ptr;
     assert(lrt == julia_type_to_llvm(ctx, rt));
 
     interpret_symbol_arg(ctx, sym, args[1], /*ccall=*/false, false);
 
-    if (sym.f_name == NULL && sym.fptr == NULL && sym.jl_ptr == NULL && sym.gcroot != NULL) {
-        const char *errmsg = invalid_symbol_err_msg(/*ccall=*/false);
-        jl_cgval_t arg1 = emit_expr(ctx, args[1]);
-        emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
-        JL_GC_POP();
-        return jl_cgval_t();
-    }
-
     if (sym.jl_ptr != NULL) {
-        res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt);
+        res = sym.jl_ptr;
     }
     else if (sym.fptr != NULL) {
         res = ConstantInt::get(lrt, (uint64_t)sym.fptr);
-        if (ctx.emission_context.imaging)
-            jl_printf(JL_STDERR,"WARNING: literal address used in cglobal for %s; code cannot be statically compiled\n", sym.f_name);
     }
-    else {
+    else if (sym.f_name != NULL) {
         if (sym.lib_expr) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), NULL, sym.lib_expr, sym.f_name, ctx.f);
-        }
-        else if (ctx.emission_context.imaging) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-            res = ctx.builder.CreatePtrToInt(res, lrt);
+            res = runtime_sym_lookup(ctx, getPointerTy(ctx.builder.getContext()), NULL, sym.lib_expr, sym.f_name, ctx.f);
         }
         else {
-            void *symaddr;
-
-            void* libsym = jl_get_library_(sym.f_lib, 0);
-            int symbol_found = jl_dlsym(libsym, sym.f_name, &symaddr, 0);
-            if (!libsym || !symbol_found) {
-                // Error mode, either the library or the symbol couldn't be find during compiletime.
-                // Fallback to a runtime symbol lookup.
-                res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-                res = ctx.builder.CreatePtrToInt(res, lrt);
-            } else {
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the address of the cglobal
-                res = ConstantInt::get(lrt, (uint64_t)symaddr);
-            }
+            res = runtime_sym_lookup(ctx, getPointerTy(ctx.builder.getContext()), sym.f_lib, NULL, sym.f_name, ctx.f);
         }
+    } else {
+        // Fall back to runtime intrinsic
+        JL_GC_POP();
+        jl_cgval_t argv[2];
+        argv[0] = emit_expr(ctx, args[1]);
+        if (nargs == 2)
+            argv[1] = emit_expr(ctx, args[2]);
+        return emit_runtime_call(ctx, nargs == 1 ? JL_I::cglobal_auto : JL_I::cglobal, argv, nargs);
     }
 
     JL_GC_POP();
@@ -767,7 +737,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     jl_value_t *ir_arg = args[1];
     JL_GC_PUSH4(&ir, &rt, &at, &entry);
     if (jl_is_ssavalue(ir_arg))
-        ir_arg = jl_arrayref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
+        ir_arg = jl_array_ptr_ref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
     ir = static_eval(ctx, ir_arg);
     if (!ir) {
         emit_error(ctx, "error statically evaluating llvm IR argument");
@@ -775,7 +745,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         return jl_cgval_t();
     }
     if (jl_is_ssavalue(args[2]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *rtt = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
+        jl_value_t *rtt = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
         if (jl_is_type_type(rtt))
             rt = jl_tparam0(rtt);
     }
@@ -788,7 +758,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         }
     }
     if (jl_is_ssavalue(args[3]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *att = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
+        jl_value_t *att = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
         if (jl_is_type_type(att))
             at = jl_tparam0(att);
     }
@@ -832,19 +802,14 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     JL_TYPECHK(llvmcall, type, rt);
     JL_TYPECHK(llvmcall, type, at);
 
-    // Generate arguments
-    std::string arguments;
-    raw_string_ostream argstream(arguments);
-    jl_svec_t *tt = ((jl_datatype_t*)at)->parameters;
-    jl_value_t *rtt = rt;
+    // Determine argument types
+    //
+    // Semantics for arguments are as follows:
+    // If the argument type is immutable (including bitstype), we pass the loaded llvm value
+    // type. Otherwise we pass a pointer to a jl_value_t.
+    jl_svec_t *tt = ((jl_datatype_t *)at)->parameters;
     size_t nargt = jl_svec_len(tt);
-
-    /*
-     * Semantics for arguments are as follows:
-     * If the argument type is immutable (including bitstype), we pass the loaded llvm value
-     * type. Otherwise we pass a pointer to a jl_value_t.
-     */
-    std::vector<llvm::Type*> argtypes;
+    SmallVector<llvm::Type*, 0> argtypes;
     SmallVector<Value *, 8> argvals(nargt);
     for (size_t i = 0; i < nargt; ++i) {
         jl_value_t *tti = jl_svecref(tt,i);
@@ -864,45 +829,91 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         argvals[i] = llvm_type_rewrite(ctx, v, t, issigned);
     }
 
+    // Determine return type
+    jl_value_t *rtt = rt;
     bool retboxed;
     Type *rettype = julia_type_to_llvm(ctx, rtt, &retboxed);
 
     // Make sure to find a unique name
     std::string ir_name;
     while (true) {
-        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(ir_name)
+            << (ctx.f->getName().str()) << "u"
+            << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         if (jl_Module->getFunction(ir_name) == NULL)
             break;
     }
 
     // generate a temporary module that contains our IR
     std::unique_ptr<Module> Mod;
+    bool shouldDiscardValueNames = ctx.builder.getContext().shouldDiscardValueNames();
+    Function *f;
     if (entry == NULL) {
         // we only have function IR, which we should put in a function
 
-        bool first = true;
-        for (std::vector<Type *>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
-            if (!first)
+        // stringify arguments
+        std::string arguments;
+        raw_string_ostream argstream(arguments);
+        for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
+            if (it != argtypes.begin())
                 argstream << ",";
-            else
-                first = false;
             (*it)->print(argstream);
             argstream << " ";
         }
 
+        // stringify return type
         std::string rstring;
         raw_string_ostream rtypename(rstring);
         rettype->print(rtypename);
-        std::map<uint64_t,std::string> localDecls;
 
+        // generate IR function definition
         std::string ir_string;
         raw_string_ostream ir_stream(ir_string);
-        ir_stream << "; Number of arguments: " << nargt << "\n"
-        << "define "<<rtypename.str()<<" @\"" << ir_name << "\"("<<argstream.str()<<") {\n"
-        << jl_string_data(ir) << "\n}";
+        ir_stream << "define " << rtypename.str() << " @\"" << ir_name << "\"("
+                  << argstream.str() << ") {\n"
+                  << jl_string_data(ir) << "\n}";
 
         SMDiagnostic Err = SMDiagnostic();
+        ctx.builder.getContext().setDiscardValueNames(false);
         Mod = parseAssemblyString(ir_stream.str(), Err, ctx.builder.getContext());
+        ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
+
+        // backwards compatibility: support for IR with integer pointers
+        if (!Mod) {
+            std::string compat_arguments;
+            raw_string_ostream compat_argstream(compat_arguments);
+            for (size_t i = 0; i < nargt; ++i) {
+                if (i > 0)
+                    compat_argstream << ",";
+                jl_value_t *tti = jl_svecref(tt, i);
+                Type *t;
+                if (jl_is_cpointer_type(tti))
+                    t = ctx.types().T_size;
+                else
+                    t = argtypes[i];
+                t->print(compat_argstream);
+                compat_argstream << " ";
+            }
+
+            std::string compat_rstring;
+            raw_string_ostream compat_rtypename(compat_rstring);
+            if (jl_is_cpointer_type(rtt))
+                ctx.types().T_size->print(compat_rtypename);
+            else
+                rettype->print(compat_rtypename);
+
+            std::string compat_ir_string;
+            raw_string_ostream compat_ir_stream(compat_ir_string);
+            compat_ir_stream << "define " << compat_rtypename.str() << " @\"" << ir_name
+                             << "\"(" << compat_argstream.str() << ") {\n"
+                             << jl_string_data(ir) << "\n}";
+
+            SMDiagnostic Err = SMDiagnostic();
+            ctx.builder.getContext().setDiscardValueNames(false);
+            Mod = parseAssemblyString(compat_ir_stream.str(), Err, ctx.builder.getContext());
+            ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
+        }
+
         if (!Mod) {
             std::string message = "Failed to parse LLVM assembly: \n";
             raw_string_ostream stream(message);
@@ -912,7 +923,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             return jl_cgval_t();
         }
 
-        Function *f = Mod->getFunction(ir_name);
+        f = Mod->getFunction(ir_name);
         f->addFnAttr(Attribute::AlwaysInline);
     }
     else {
@@ -920,7 +931,9 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
 
         if (jl_is_string(ir)) {
             SMDiagnostic Err = SMDiagnostic();
+            ctx.builder.getContext().setDiscardValueNames(false);
             Mod = parseAssemblyString(jl_string_data(ir), Err, ctx.builder.getContext());
+            ctx.builder.getContext().setDiscardValueNames(shouldDiscardValueNames);
             if (!Mod) {
                 std::string message = "Failed to parse LLVM assembly: \n";
                 raw_string_ostream stream(message);
@@ -932,7 +945,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         }
         else {
             auto Buf = MemoryBuffer::getMemBuffer(
-                StringRef((char *)jl_array_data(ir), jl_array_len(ir)), "llvmcall",
+                StringRef(jl_array_data(ir, char), jl_array_nrows(ir)), "llvmcall",
                 /*RequiresNullTerminator*/ false);
             Expected<std::unique_ptr<Module>> ModuleOrErr =
                 parseBitcodeFile(*Buf, ctx.builder.getContext());
@@ -950,21 +963,96 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             Mod = std::move(ModuleOrErr.get());
         }
 
-        Function *f = Mod->getFunction(jl_string_data(entry));
+        f = Mod->getFunction(jl_string_data(entry));
         if (!f) {
             emit_error(ctx, "Module IR does not contain specified entry function");
             JL_GC_POP();
             return jl_cgval_t();
         }
+        assert(!f->isDeclaration());
         f->setName(ir_name);
+    }
 
-        // verify the function type
-        assert(!f->isDeclaration());
-        assert(f->getReturnType() == rettype);
-        int i = 0;
-        for (std::vector<Type *>::iterator it = argtypes.begin();
-            it != argtypes.end(); ++it, ++i)
-            assert(*it == f->getFunctionType()->getParamType(i));
+    // backwards compatibility: support for IR with integer pointers
+    bool mismatched_pointers = false;
+    for (size_t i = 0; i < nargt; ++i) {
+        jl_value_t *tti = jl_svecref(tt, i);
+        if (jl_is_cpointer_type(tti) &&
+            !f->getFunctionType()->getParamType(i)->isPointerTy()) {
+            mismatched_pointers = true;
+            break;
+        }
+    }
+    if (mismatched_pointers) {
+        if (jl_options.depwarn) {
+            if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+                jl_error("llvmcall with integer pointers is deprecated, "
+                         "use an actual pointer type instead.");
+
+            // ensure we only depwarn once per method
+            // TODO: lift this into a reusable codegen-level depwarn utility
+            static std::set<jl_method_t*> llvmcall_depwarns;
+            jl_method_t *m = ctx.linfo->def.method;
+            if (llvmcall_depwarns.find(m) == llvmcall_depwarns.end()) {
+                llvmcall_depwarns.insert(m);
+                jl_printf(JL_STDERR,
+                        "WARNING: llvmcall with integer pointers is deprecated.\n"
+                        "Use actual pointers instead, replacing i32 or i64 with i8* or ptr\n"
+                        "in ");
+                jl_static_show(JL_STDERR, (jl_value_t*) ctx.linfo->def.method);
+                jl_printf(JL_STDERR, " at %s\n", ctx.file.str().c_str());
+            }
+        }
+
+        // wrap the function, performing the necessary pointer conversion
+
+        Function *inner = f;
+        inner->setName(ir_name + ".inner");
+
+        FunctionType *wrapper_ft = FunctionType::get(rettype, argtypes, false);
+        Function *wrapper =
+            Function::Create(wrapper_ft, inner->getLinkage(), ir_name, *Mod);
+
+        wrapper->copyAttributesFrom(inner);
+        inner->addFnAttr(Attribute::AlwaysInline);
+
+        BasicBlock *entry = BasicBlock::Create(ctx.builder.getContext(), "", wrapper);
+        IRBuilder<> irbuilder(entry);
+        SmallVector<Value *, 0> wrapper_args;
+        for (size_t i = 0; i < nargt; ++i) {
+            jl_value_t *tti = jl_svecref(tt, i);
+            Value *v = wrapper->getArg(i);
+            if (jl_is_cpointer_type(tti))
+                v = irbuilder.CreatePtrToInt(v, ctx.types().T_size);
+            wrapper_args.push_back(v);
+        }
+        Value *call = irbuilder.CreateCall(inner, wrapper_args);
+        // check if void
+        if (rettype->isVoidTy())
+            irbuilder.CreateRetVoid();
+        else {
+            if (jl_is_cpointer_type(rtt))
+                call = irbuilder.CreateIntToPtr(call, ctx.types().T_ptr);
+            irbuilder.CreateRet(call);
+        }
+
+        f = wrapper;
+    }
+
+    // verify the function type
+    assert(f->getReturnType() == rettype);
+    int i = 0;
+    for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end();
+         ++it, ++i) {
+        if (*it != f->getFunctionType()->getParamType(i)) {
+            std::string message;
+            raw_string_ostream stream(message);
+            stream << "Malformed llvmcall: argument " << i + 1 << " type "
+                   << *f->getFunctionType()->getParamType(i)
+                   << " does not match expected argument type " << **it;
+            emit_error(ctx, stream.str());
+            return jl_cgval_t();
+        }
     }
 
     // copy module properties that should always match
@@ -1002,7 +1090,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     if (inst->getType() != rettype) {
         std::string message;
         raw_string_ostream stream(message);
-        stream << "llvmcall return type " << *inst->getType()
+        stream << "Malformed llvmcall: return type " << *inst->getType()
                << " does not match declared return type" << *rettype;
         emit_error(ctx, stream.str());
         return jl_cgval_t();
@@ -1019,8 +1107,9 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_
     // XXX: need to handle parameterized zero-byte types (singleton)
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     unsigned nb = DL.getTypeStoreSize(result->getType());
+    unsigned align = sizeof(void*); // Allocations are at least pointer aligned
     MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-    Value *strct = emit_allocobj(ctx, nb, runtime_dt);
+    Value *strct = emit_allocobj(ctx, nb, runtime_dt, true, align);
     setName(ctx.emission_context, strct, "ccall_result_box");
     init_bits_value(ctx, strct, result, tbaa);
     return strct;
@@ -1041,10 +1130,10 @@ static jl_cgval_t mark_or_box_ccall_result(jl_codectx_t &ctx, Value *result, boo
 
 class function_sig_t {
 public:
-    std::vector<Type*> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
-    std::vector<Type*> fargt_sig; // vector of ABI coercion types for call signature
-    std::vector<bool> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
-    std::vector<bool> byRefList; // vector of "byref" parameters
+    SmallVector<Type*, 0> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
+    SmallVector<Type*, 0> fargt_sig; // vector of ABI coercion types for call signature
+    SmallVector<bool, 0> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
+    SmallVector<bool, 0> byRefList; // vector of "byref" parameters
     AttributeList attributes; // vector of function call site attributes
     Type *lrt; // input parameter of the llvm return type (from julia_struct_to_llvm)
     bool retboxed; // input parameter indicating whether lrt is jl_value_t*
@@ -1073,7 +1162,7 @@ class function_sig_t {
     FunctionType *functype(LLVMContext &ctxt) const {
         assert(err_msg.empty());
         if (nreqargs > 0)
-            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, makeArrayRef(fargt_sig).slice(0, nreqargs), true);
+            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, ArrayRef<Type*>(fargt_sig).slice(0, nreqargs), true);
         else
             return FunctionType::get(sret ? getVoidTy(ctxt) : prt, fargt_sig, false);
     }
@@ -1082,7 +1171,7 @@ class function_sig_t {
             jl_codectx_t &ctx,
             const native_sym_arg_t &symarg,
             jl_cgval_t *argv,
-            SmallVector<Value*, 16> &gc_uses,
+            SmallVectorImpl<Value*> &gc_uses,
             bool static_rt) const;
 
 private:
@@ -1090,7 +1179,7 @@ std::string generate_func_sig(const char *fname)
 {
     assert(rt && !jl_is_abstract_ref_type(rt));
 
-    std::vector<AttributeSet> paramattrs;
+    SmallVector<AttributeSet, 0> paramattrs;
     std::unique_ptr<AbiLayout> abi;
     if (llvmcall)
         abi.reset(new ABI_LLVMLayout());
@@ -1133,7 +1222,7 @@ std::string generate_func_sig(const char *fname)
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            t = getInt8PtrTy(LLVMCtx);
+            t = getPointerTy(LLVMCtx);
             isboxed = false;
         }
         else if (llvmcall && jl_is_llvmpointer_type(tti)) {
@@ -1142,26 +1231,26 @@ std::string generate_func_sig(const char *fname)
             isboxed = false;
         }
         else {
-            if (jl_is_primitivetype(tti)) {
+            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
+            if (t == getVoidTy(LLVMCtx)) {
+                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
+            }
+            if (jl_is_primitivetype(tti) && t->isIntegerTy()) {
                 // see pull req #978. need to annotate signext/zeroext for
                 // small integer arguments.
                 jl_datatype_t *bt = (jl_datatype_t*)tti;
-                if (jl_datatype_size(bt) < 4 && bt != jl_float16_type) {
+                if (jl_datatype_size(bt) < 4) {
                     if (jl_signed_type && jl_subtype(tti, (jl_value_t*)jl_signed_type))
                         ab.addAttribute(Attribute::SExt);
                     else
                         ab.addAttribute(Attribute::ZExt);
                 }
             }
-
-            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
-            if (t == getVoidTy(LLVMCtx)) {
-                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
-            }
         }
 
         Type *pat;
-        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
+        // n.b. `Array` used as argument type just passes a julia object reference
+        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_array_type(tti) || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
             tti = (jl_value_t*)jl_voidpointer_type; // passed as pointer
         }
 
@@ -1292,7 +1381,7 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
     JL_TYPECHK(ccall, type, rt);
     JL_TYPECHK(ccall, simplevector, at);
 
-    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) ||
+    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) || jl_is_genericmemory_type(rt) ||
             (jl_is_datatype(rt) && ((jl_datatype_t*)rt)->layout != NULL &&
              jl_is_layout_opaque(((jl_datatype_t*)rt)->layout))) {
         // n.b. `Array` used as return type just returns a julia object reference
@@ -1370,11 +1459,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         return jl_cgval_t();
     }
 
-    auto ccallarg = [=] (size_t i) {
-        assert(i < nccallargs && i + fc_args_start <= nargs);
-        return args[fc_args_start + i];
-    };
-
     auto _is_libjulia_func = [&] (uintptr_t ptr, StringRef name) {
         if ((uintptr_t)fptr == ptr)
             return true;
@@ -1401,21 +1485,24 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     SmallVector<jl_cgval_t, 4> argv(nccallargs);
     for (size_t i = 0; i < nccallargs; i++) {
         // Julia (expression) value of current parameter
-        jl_value_t *argi = ccallarg(i);
+        assert(i < nccallargs && i + fc_args_start <= nargs);
+        jl_value_t *argi = args[fc_args_start + i];
         argv[i] = emit_expr(ctx, argi);
+        if (argv[i].typ == jl_bottom_type) {
+            JL_GC_POP();
+            return jl_cgval_t();
+        }
     }
 
     // emit roots
-    SmallVector<Value*, 16> gc_uses;
+    SmallVector<Value*> gc_uses;
     for (size_t i = nccallargs + fc_args_start; i <= nargs; i++) {
         // Julia (expression) value of current parameter gcroot
         jl_value_t *argi_root = args[i];
         if (jl_is_long(argi_root))
             continue;
         jl_cgval_t arg_root = emit_expr(ctx, argi_root);
-        Value *gc_root = get_gc_root_for(arg_root);
-        if (gc_root)
-            gc_uses.push_back(gc_root);
+        gc_uses.append(get_gc_roots_for(ctx, arg_root));
     }
 
     jl_unionall_t *unionall = (jl_is_method(ctx.linfo->def.method) && jl_is_unionall(ctx.linfo->def.method->sig))
@@ -1462,7 +1549,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         return jl_cgval_t();
     }
     if (rt != args[2] && rt != (jl_value_t*)jl_any_type)
-        rt = jl_ensure_rooted(ctx, rt);
+        jl_temporary_root(ctx, rt);
     function_sig_t sig("ccall", lrt, rt, retboxed,
                        (jl_svec_t*)at, unionall, nreqargs,
                        cc, llvmcall, &ctx.emission_context);
@@ -1479,25 +1566,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     // some special functions
     bool isVa = nreqargs > 0;
     (void)isVa; // prevent compiler warning
-    if (is_libjulia_func(jl_array_ptr)) {
-        ++CCALL_STAT(jl_array_ptr);
-        assert(lrt == ctx.types().T_size);
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &ary = argv[0];
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, ctx.builder.CreatePtrToInt(emit_unsafe_arrayptr(ctx, ary), lrt),
-                                        retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_value_ptr)) {
+    if (is_libjulia_func(jl_value_ptr)) {
         ++CCALL_STAT(jl_value_ptr);
-        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_size);
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = ctx.types().T_size;
+            largty = ctx.types().T_ptr;
             isboxed = false;
         }
         else {
@@ -1506,11 +1584,10 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         Value *retval;
         if (isboxed) {
             retval = boxed(ctx, argv[0]);
-            retval = emit_pointer_from_objref(ctx, emit_bitcast(ctx, retval, ctx.types().T_prjlvalue));
+            retval = emit_pointer_from_objref(ctx, retval /*T_prjlvalue*/);
         }
         else {
             retval = emit_unbox(ctx, largty, argv[0], tti);
-            retval = emit_inttoptr(ctx, retval, ctx.types().T_pjlvalue);
         }
         // retval is now an untracked jl_value_t*
         if (retboxed)
@@ -1584,23 +1661,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func("jl_get_ptls_states")) {
+    else if (is_libjulia_func(jl_get_ptls_states)) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        return mark_or_box_ccall_result(ctx,
-            ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt),
-            retboxed, rt, unionall, static_rt);
+        return mark_or_box_ccall_result(ctx, get_current_ptls(ctx), retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_threadid)) {
         ++CCALL_STAT(jl_threadid);
         assert(lrt == getInt16Ty(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
+        Value *ptask = get_current_task(ctx);
         const int tid_offset = offsetof(jl_task_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t)));
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptask, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int8_t)));
         setName(ctx.emission_context, ptid, "thread_id_ptr");
         LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
         setName(ctx.emission_context, tid, "thread_id");
@@ -1608,15 +1682,77 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ai.decorateInst(tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_get_ptls_rng)) {
+        ++CCALL_STAT(jl_get_ptls_rng);
+        assert(lrt == getInt64Ty(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        LoadInst *rng_value = ctx.builder.CreateAlignedLoad(getInt64Ty(ctx.builder.getContext()), rng_ptr, Align(sizeof(void*)));
+        setName(ctx.emission_context, rng_value, "rngseed");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(rng_value);
+        return mark_or_box_ccall_result(ctx, rng_value, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_set_ptls_rng)) {
+        ++CCALL_STAT(jl_set_ptls_rng);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        assert(argv[0].V->getType() == getInt64Ty(ctx.builder.getContext()));
+        auto store = ctx.builder.CreateAlignedStore(argv[0].V, rng_ptr, Align(sizeof(void*)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(store);
+        return ghostValue(ctx, jl_nothing_type);
+    }
+    else if (is_libjulia_func(jl_get_tls_world_age)) {
+        ++CCALL_STAT(jl_get_tls_world_age);
+        assert(lrt == ctx.types().T_size);
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *world_age = get_tls_world_age(ctx);
+        return mark_or_box_ccall_result(ctx, world_age, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_get_world_counter)) {
+        ++CCALL_STAT(jl_get_world_counter);
+        assert(lrt == ctx.types().T_size);
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+
+        // jl_task_t *ct = jl_current_task;
+        // if (ct->ptls->in_pure_callback)
+        //     return ~(size_t)0;
+        // return jl_atomic_load_acquire(&jl_world_counter);
+        Type *T_int16 = getInt16Ty(ctx.builder.getContext());
+        Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_tls_states_t, in_pure_callback) / sizeof(int16_t));
+        Value *field_ptr = ctx.builder.CreateInBoundsGEP(T_int16, get_current_ptls(ctx), offset);
+        Instruction *in_pure_callback = ai.decorateInst(ctx.builder.CreateAlignedLoad(T_int16,
+            field_ptr, Align(sizeof(int16_t)), "in_pure_callback"));
+        Value *cond = ctx.builder.CreateICmpEQ(in_pure_callback, ConstantInt::get(T_int16, 0));
+
+        Value *world_counter = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
+        cast<LoadInst>(world_counter)->setOrdering(AtomicOrdering::Acquire);
+        Value *ret = ctx.builder.CreateSelect(cond, world_counter, ConstantInt::get(ctx.types().T_size, ~(size_t)0));
+        return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
+    }
     else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
 #ifdef NDEBUG
              || is_libjulia_func(jl_gc_enable_finalizers_internal)
 #endif
              ) {
         JL_GC_POP();
-        Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
+        Value *ptls_p = get_current_ptls(ctx);
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
-        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4));
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, finh_offset / sizeof(int8_t)));
         setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr");
         LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
         setName(ctx.emission_context, finh, "finalizers_inhibited");
@@ -1639,7 +1775,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == ctx.types().T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), ctx.types().T_pjlvalue));
+        auto ct = track_pjlvalue(ctx, get_current_task(ctx));
         return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_set_next_task)) {
@@ -1647,7 +1783,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
+        Value *ptls_pv = get_current_ptls(ctx);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
         Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*)));
         setName(ctx.emission_context, pnt, "next_task_ptr");
@@ -1700,154 +1836,57 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ctx.builder.SetInsertPoint(checkBB);
         auto signal_page_load = ctx.builder.CreateLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
-                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
+                emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const),
+                    -sizeof(size_t)),
                 true);
         setName(ctx.emission_context, signal_page_load, "signal_page_load");
         ctx.builder.CreateBr(contBB);
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(contBB);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func(jl_svec_len)) {
-        ++CCALL_STAT(jl_svec_len);
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &svecv = argv[0];
-        Value *len;
-        if (svecv.constant && svecv.typ == (jl_value_t*)jl_simplevector_type) {
-            // Check the type as well before we call
-            len = ConstantInt::get(ctx.types().T_size, jl_svec_len(svecv.constant));
-        }
-        else {
-            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_size->getPointerTo());
-            setName(ctx.emission_context, ptr, "svec_len_ptr");
-            len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr);
-            setName(ctx.emission_context, len, "svec_len");
-            // Only mark with TBAA if we are sure about the type.
-            // This could otherwise be in a dead branch
-            if (svecv.typ == (jl_value_t*)jl_simplevector_type) {
-                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-                ai.decorateInst(cast<Instruction>(len));
-            }
-            MDBuilder MDB(ctx.builder.getContext());
-            auto rng = MDB.createRange(
-                Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX / sizeof(void*) - 1));
-            cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-        }
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, len, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_svec_ref) && argv[1].typ == (jl_value_t*)jl_long_type) {
-        ++CCALL_STAT(jl_svec_ref);
-        assert(lrt == ctx.types().T_prjlvalue);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        const jl_cgval_t &svecv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, 1));
-        setName(ctx.emission_context, idx, "svec_idx");
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
-        setName(ctx.emission_context, ptr, "svec_data_ptr");
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
-                                                         decay_derived(ctx, ptr), idx);
-        setName(ctx.emission_context, slot_addr, "svec_slot_addr");
-        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
-                                                       Align(sizeof(void*)));
-        setName(ctx.emission_context, load, "svec_slot");
-        load->setAtomic(AtomicOrdering::Unordered);
-        // Only mark with TBAA if we are sure about the type.
-        // This could otherwise be in a dead branch
-        if (svecv.typ == (jl_value_t*)jl_simplevector_type) {
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            ai.decorateInst(load);
-        }
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, load, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_array_isassigned) &&
-             argv[1].typ == (jl_value_t*)jl_ulong_type) {
-        ++CCALL_STAT(jl_array_isassigned);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        jl_value_t *aryex = ccallarg(0);
-        const jl_cgval_t &aryv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        jl_datatype_t *arydt = (jl_datatype_t*)jl_unwrap_unionall(aryv.typ);
-        if (jl_is_array_type(arydt)) {
-            jl_value_t *ety = jl_tparam0(arydt);
-            bool ptrarray = !jl_stored_inline(ety);
-            if (!ptrarray && !jl_type_hasptr(ety)) {
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1),
-                                                false, rt, unionall, static_rt);
-            }
-            else if (!jl_has_free_typevars(ety)) {
-                Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_ulong_type);
-                Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), ctx.types().T_pprjlvalue);
-                if (!ptrarray) {
-                    size_t elsz = jl_datatype_size(ety);
-                    unsigned align = jl_datatype_align(ety);
-                    size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*);
-                    if (stride != 1)
-                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(ctx.types().T_size, stride));
-                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
-                    setName(ctx.emission_context, idx, "array_idx");
-                }
-                Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx);
-                setName(ctx.emission_context, slot_addr, "array_slot_addr");
-                LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*)));
-                setName(ctx.emission_context, load, "array_slot");
-                load->setAtomic(AtomicOrdering::Unordered);
-                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_ptrarraybuf);
-                ai.decorateInst(load);
-                Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt32Ty(ctx.builder.getContext()));
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
-            }
-        }
-    }
     else if (is_libjulia_func(jl_string_ptr)) {
         ++CCALL_STAT(jl_string_ptr);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "string_ptr");
+        auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
         ++CCALL_STAT(jl_symbol_name);
-        assert(lrt == ctx.types().T_size);
+        assert(lrt == ctx.types().T_ptr);
         assert(!isVa && !llvmcall && nccallargs == 1);
-        auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                ctx.types().T_pprjlvalue);
+        auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
-        setName(ctx.emission_context, strp, "symbol_name");
+        auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_genericmemory_owner) || is_libjulia_func(ijl_genericmemory_owner)) {
+        ++CCALL_STAT(jl_genericmemory_owner);
+        assert(lrt == ctx.types().T_prjlvalue);
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        Value *obj = emit_genericmemoryowner(ctx, boxed(ctx, argv[0]));
+        JL_GC_POP();
+        return mark_julia_type(ctx, obj, true, jl_any_type);
+    }
     else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
         ++CCALL_STAT(memcpy);
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
+                MaybeAlign(1),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(1),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
-                MaybeAlign(0),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
@@ -1859,11 +1898,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &val = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
         Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
         Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
         ctx.builder.CreateMemSet(
-            emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+            destp,
             val8,
             emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
             MaybeAlign(1)
@@ -1877,14 +1916,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_ptr, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemMove(
-                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                destp,
                 MaybeAlign(0),
-                emit_inttoptr(ctx,
-                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    getInt8PtrTy(ctx.builder.getContext())),
+                emit_unbox(ctx, ctx.types().T_ptr, src, (jl_value_t*)jl_voidpointer_type),
                 MaybeAlign(0),
                 emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
@@ -1899,7 +1936,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (val.typ == (jl_value_t*)jl_symbol_type) {
             JL_GC_POP();
             const int hash_offset = offsetof(jl_sym_t, hash);
-            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo());
+            Value *ph1 = decay_derived(ctx, boxed(ctx, val));
             Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr));
             setName(ctx.emission_context, ph2, "object_id_ptr");
             LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr);
@@ -1911,17 +1948,15 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         else if (!val.isboxed) {
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
-            auto T_pint8_derived = PointerType::get(getInt8Ty(ctx.builder.getContext()), AddressSpace::Derived);
-            if (!val.isghost && !val.ispointer())
+            auto T_p_derived = PointerType::get(ctx.builder.getContext(), AddressSpace::Derived);
+            if (!val.isghost)
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
-                emit_typeof(ctx, val),
-                val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
-                    ctx.builder.CreateBitCast(
-                        decay_derived(ctx, data_pointer(ctx, val)),
-                        T_pint8_derived)
+                emit_typeof(ctx, val, false, true),
+                val.isghost ? ConstantPointerNull::get(T_p_derived) :
+                        decay_derived(ctx, data_pointer(ctx, val))
             };
-            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), makeArrayRef(args));
+            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), ArrayRef<Value*>(args));
             setName(ctx.emission_context, ret, "object_id");
             JL_GC_POP();
             return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
@@ -1942,7 +1977,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         jl_codectx_t &ctx,
         const native_sym_arg_t &symarg,
         jl_cgval_t *argv,
-        SmallVector<Value*, 16> &gc_uses,
+        SmallVectorImpl<Value*> &gc_uses,
         bool static_rt) const
 {
     ++EmittedCCalls;
@@ -1958,10 +1993,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         // Current C function parameter
         jl_cgval_t &arg = argv[ai];
         jl_value_t *jargty = jl_svecref(at, ai); // Julia type of the current parameter
-        Type *largty = fargt.at(ai); // LLVM type of the current parameter
-        bool toboxed = fargt_isboxed.at(ai);
-        Type *pargty = fargt_sig.at(ai + sret); // LLVM coercion type
-        bool byRef = byRefList.at(ai); // Argument attributes
+        Type *largty = fargt[ai]; // LLVM type of the current parameter
+        bool toboxed = fargt_isboxed[ai];
+        Type *pargty = fargt_sig[ai + sret]; // LLVM coercion type
+        bool byRef = byRefList[ai]; // Argument attributes
 
         // if we know the function sparams, try to fill those in now
         // so that the julia_to_native type checks are more likely to be doable (e.g. concrete types) at compile-time
@@ -1969,8 +2004,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         if (ctx.spvals_ptr == NULL && !toboxed && unionall_env && jl_has_typevar_from_unionall(jargty, unionall_env) &&
                 jl_svec_len(ctx.linfo->sparam_vals) > 0) {
             jargty_in_env = jl_instantiate_type_in_env(jargty_in_env, unionall_env, jl_svec_data(ctx.linfo->sparam_vals));
-            if (jargty_in_env != jargty)
-                jargty_in_env = jl_ensure_rooted(ctx, jargty_in_env);
+            if (jargty_in_env != jargty) {
+                JL_GC_PUSH1(&jargty_in_env);
+                jl_temporary_root(ctx, jargty_in_env);
+                JL_GC_POP();
+            }
         }
 
         Value *v;
@@ -2010,21 +2048,21 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (sret) {
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
-            result = emit_static_alloca(ctx, lrt);
+            result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt)));
             setName(ctx.emission_context, result, "ccall_sret");
             sretty = lrt;
-            argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0));
+            argvals[0] = result;
         }
         else {
             // XXX: result needs to be zero'd and given a GC root here
             // and has incorrect write barriers.
             // instead this code path should behave like `unsafe_load`
-            result = emit_allocobj(ctx, (jl_datatype_t*)rt);
+            result = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
             setName(ctx.emission_context, result, "ccall_sret_box");
             sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
-            argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig.at(0));
+            argvals[0] = emit_pointer_from_objref(ctx, result);
         }
     }
 
@@ -2056,7 +2094,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (!isa<Function>(llvmf) || cast<Function>(llvmf)->isIntrinsic() || cast<Function>(llvmf)->getFunctionType() != functype)
                     llvmf = NULL;
             }
-            else if (f_name.startswith("llvm.")) {
+            else if (f_name.starts_with("llvm.")) {
                 // compute and verify auto-mangling for intrinsic name
                 auto ID = Function::lookupIntrinsicID(f_name);
                 if (ID != Intrinsic::not_intrinsic) {
@@ -2086,16 +2124,22 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     }
     else if (symarg.jl_ptr != NULL) {
         ++LiteralCCalls;
-        null_pointer_check(ctx, symarg.jl_ptr);
-        Type *funcptype = PointerType::get(functype, 0);
-        llvmf = emit_inttoptr(ctx, symarg.jl_ptr, funcptype);
+        null_pointer_check(ctx, symarg.jl_ptr, nullptr);
+        llvmf = symarg.jl_ptr;
     }
     else if (symarg.fptr != NULL) {
         ++LiteralCCalls;
-        Type *funcptype = PointerType::get(functype, 0);
+        Type *funcptype = functype->getPointerTo(0);
         llvmf = literal_static_pointer_val((void*)(uintptr_t)symarg.fptr, funcptype);
-        if (ctx.emission_context.imaging)
-            jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", symarg.f_name);
+        setName(ctx.emission_context, llvmf, "ccall_fptr");
+    }
+    else if (!ctx.params->use_jlplt) {
+        if ((symarg.f_lib && !((symarg.f_lib == JL_EXE_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_DL_LIBNAME))) || symarg.lib_expr) {
+            emit_error(ctx, "ccall: Had library expression, but symbol lookup was disabled");
+        }
+        llvmf = jl_Module->getOrInsertFunction(symarg.f_name, functype).getCallee();
     }
     else {
         assert(symarg.f_name != NULL);
@@ -2104,7 +2148,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             ++DeferredCCallLookups;
             llvmf = runtime_sym_lookup(ctx, funcptype, NULL, symarg.lib_expr, symarg.f_name, ctx.f);
         }
-        else if (ctx.emission_context.imaging) {
+        else {
             ++DeferredCCallLookups;
             // vararg requires musttail,
             // but musttail is incompatible with noreturn.
@@ -2113,22 +2157,6 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             else
                 llvmf = emit_plt(ctx, functype, attributes, cc, symarg.f_lib, symarg.f_name);
         }
-        else {
-            void *symaddr;
-            void *libsym = jl_get_library_(symarg.f_lib, 0);
-            int symbol_found = jl_dlsym(libsym, symarg.f_name, &symaddr, 0);
-            if (!libsym || !symbol_found) {
-                ++DeferredCCallLookups;
-                // either the library or the symbol could not be found, place a runtime
-                // lookup here instead.
-                llvmf = runtime_sym_lookup(ctx, funcptype, symarg.f_lib, NULL, symarg.f_name, ctx.f);
-            } else {
-                ++LiteralCCalls;
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the function address
-                llvmf = literal_static_pointer_val(symaddr, funcptype);
-            }
-        }
     }
 
     OperandBundleDef OpBundle("jl_roots", gc_uses);
@@ -2183,10 +2211,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         else if (jlretboxed && !retboxed) {
             assert(jl_is_datatype(rt));
             if (static_rt) {
-                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt);
+                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
                 setName(ctx.emission_context, strct, "ccall_ret_box");
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-                int boxalign = julia_alignment(rt);
+                Align boxalign(julia_alignment(rt));
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
@@ -2194,12 +2222,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
-                    auto slot = emit_static_alloca(ctx, resultTy);
+                    auto slot = emit_static_alloca(ctx, resultTy, boxalign);
                     setName(ctx.emission_context, slot, "type_pun_slot");
-                    slot->setAlignment(Align(boxalign));
-                    ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
+                    ctx.builder.CreateAlignedStore(result, slot, boxalign);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-                    emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign);
+                    emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
                 }
                 else {
                     init_bits_value(ctx, strct, result, tbaa, boxalign);
diff --git a/src/ccalllazybar.c b/src/ccalllazybar.c
new file mode 100644
index 0000000000000..84bf9763fffa5
--- /dev/null
+++ b/src/ccalllazybar.c
@@ -0,0 +1,10 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+// We expect this to come from `libccalllazyfoo`
+extern int foo(int);
+
+DLLEXPORT int bar(int a) {
+    return foo(a + 1);
+}
diff --git a/src/ccalllazyfoo.c b/src/ccalllazyfoo.c
new file mode 100644
index 0000000000000..d68421adef67b
--- /dev/null
+++ b/src/ccalllazyfoo.c
@@ -0,0 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+DLLEXPORT int foo(int a) {
+    return a*2;
+}
diff --git a/src/ccalltest.c b/src/ccalltest.c
index e35ff38eb7dc8..0c7c85b328415 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -1,41 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "../src/support/platform.h"
-#include "../src/support/dtypes.h"
-
-// Borrow definition from `support/dtypes.h`
-#ifdef _OS_WINDOWS_
-#  define DLLEXPORT __declspec(dllexport)
-#else
-# if defined(_OS_LINUX_) && !defined(_COMPILER_CLANG_)
-// Clang and ld disagree about the proper relocation for STV_PROTECTED, causing
-// linker errors.
-#  define DLLEXPORT __attribute__ ((visibility("protected")))
-# else
-#  define DLLEXPORT __attribute__ ((visibility("default")))
-# endif
-#endif
-
-
-#ifdef _P64
-#define jint int64_t
-#define PRIjint PRId64
-#else
-#define jint int32_t
-#define PRIjint PRId32
-#endif
+#include "ccalltest_common.h"
 
 int verbose = 1;
-
 int c_int = 0;
 
-
 //////////////////////////////////
 // Test for proper argument register truncation
 
diff --git a/src/ccalltest_common.h b/src/ccalltest_common.h
new file mode 100644
index 0000000000000..484cbde593369
--- /dev/null
+++ b/src/ccalltest_common.h
@@ -0,0 +1,30 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+#include <stdio.h>
+#include <stdlib.h>
+#include <complex.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../src/support/platform.h"
+#include "../src/support/dtypes.h"
+
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_) && !defined(_COMPILER_CLANG_)
+// Clang and ld disagree about the proper relocation for STV_PROTECTED, causing
+// linker errors.
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
+#ifdef _P64
+#define jint int64_t
+#define PRIjint PRId64
+#else
+#define jint int32_t
+#define PRIjint PRId32
+#endif
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index b627224e027a9..c257d2a2e3331 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -25,18 +25,21 @@
 #  include <sys/types.h>
 #  include <sys/resource.h>
 #endif
+#ifdef _OS_OPENBSD_
+#  include <sys/resource.h>
+#endif
 #include "julia_assert.h"
 
 namespace {
 
-static size_t get_block_size(size_t size)
+static size_t get_block_size(size_t size) JL_NOTSAFEPOINT
 {
     return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) :
             jl_page_size * 256);
 }
 
 // Wrapper function to mmap/munmap/mprotect pages...
-static void *map_anon_page(size_t size)
+static void *map_anon_page(size_t size) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size,
@@ -51,7 +54,7 @@ static void *map_anon_page(size_t size)
     return mem;
 }
 
-static void unmap_page(void *ptr, size_t size)
+static void unmap_page(void *ptr, size_t size) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     VirtualFree(ptr, size, MEM_DECOMMIT);
@@ -68,7 +71,7 @@ enum class Prot : int {
     NO = PAGE_NOACCESS
 };
 
-static void protect_page(void *ptr, size_t size, Prot flags)
+static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT
 {
     DWORD old_prot;
     if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) {
@@ -86,7 +89,7 @@ enum class Prot : int {
     NO = PROT_NONE
 };
 
-static void protect_page(void *ptr, size_t size, Prot flags)
+static void protect_page(void *ptr, size_t size, Prot flags) JL_NOTSAFEPOINT
 {
     int ret = mprotect(ptr, size, (int)flags);
     if (ret != 0) {
@@ -95,7 +98,7 @@ static void protect_page(void *ptr, size_t size, Prot flags)
     }
 }
 
-static bool check_fd_or_close(int fd)
+static bool check_fd_or_close(int fd) JL_NOTSAFEPOINT
 {
     if (fd == -1)
         return false;
@@ -126,7 +129,7 @@ static intptr_t anon_hdl = -1;
 // Also, creating big file mapping and then map pieces of it seems to
 // consume too much global resources. Therefore, we use each file mapping
 // as a block on windows
-static void *create_shared_map(size_t size, size_t id)
+static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT
 {
     void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS,
                                0, 0, size);
@@ -134,13 +137,13 @@ static void *create_shared_map(size_t size, size_t id)
     return addr;
 }
 
-static intptr_t init_shared_map()
+static intptr_t init_shared_map() JL_NOTSAFEPOINT
 {
     anon_hdl = 0;
     return 0;
 }
 
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
+static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT
 {
     assert(size % jl_page_size == 0);
     DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
@@ -159,7 +162,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
 }
 #else // _OS_WINDOWS_
 // For shared mapped region
-static intptr_t get_anon_hdl(void)
+static intptr_t get_anon_hdl(void) JL_NOTSAFEPOINT
 {
     int fd = -1;
 
@@ -225,7 +228,7 @@ static struct _make_shared_map_lock {
     };
 } shared_map_lock;
 
-static size_t get_map_size_inc()
+static size_t get_map_size_inc() JL_NOTSAFEPOINT
 {
     rlimit rl;
     if (getrlimit(RLIMIT_FSIZE, &rl) != -1) {
@@ -239,7 +242,7 @@ static size_t get_map_size_inc()
     return map_size_inc_default;
 }
 
-static void *create_shared_map(size_t size, size_t id)
+static void *create_shared_map(size_t size, size_t id) JL_NOTSAFEPOINT
 {
     void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
                       anon_hdl, id);
@@ -247,7 +250,7 @@ static void *create_shared_map(size_t size, size_t id)
     return addr;
 }
 
-static intptr_t init_shared_map()
+static intptr_t init_shared_map() JL_NOTSAFEPOINT
 {
     anon_hdl = get_anon_hdl();
     if (anon_hdl == -1)
@@ -262,7 +265,7 @@ static intptr_t init_shared_map()
     return anon_hdl;
 }
 
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
+static void *alloc_shared_page(size_t size, size_t *id, bool exec) JL_NOTSAFEPOINT
 {
     assert(size % jl_page_size == 0);
     size_t off = jl_atomic_fetch_add(&map_offset, size);
@@ -289,7 +292,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
 #ifdef _OS_LINUX_
 // Using `/proc/self/mem`, A.K.A. Keno's remote memory manager.
 
-ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
+ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr) JL_NOTSAFEPOINT
 {
     static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits");
 #ifdef _P64
@@ -316,7 +319,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
 
 // Do not call this directly.
 // Use `get_self_mem_fd` which has a guard to call this only once.
-static int _init_self_mem()
+static int _init_self_mem() JL_NOTSAFEPOINT
 {
     struct utsname kernel;
     uname(&kernel);
@@ -356,13 +359,13 @@ static int _init_self_mem()
     return fd;
 }
 
-static int get_self_mem_fd()
+static int get_self_mem_fd() JL_NOTSAFEPOINT
 {
     static int fd = _init_self_mem();
     return fd;
 }
 
-static void write_self_mem(void *dest, void *ptr, size_t size)
+static void write_self_mem(void *dest, void *ptr, size_t size) JL_NOTSAFEPOINT
 {
     while (size > 0) {
         ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest);
@@ -421,7 +424,7 @@ struct Block {
 
     Block(const Block&) = delete;
     Block &operator=(const Block&) = delete;
-    Block(Block &&other)
+    Block(Block &&other) JL_NOTSAFEPOINT
         : ptr(other.ptr),
           total(other.total),
           avail(other.avail)
@@ -430,9 +433,9 @@ struct Block {
         other.total = other.avail = 0;
     }
 
-    Block() = default;
+    Block() JL_NOTSAFEPOINT = default;
 
-    void *alloc(size_t size, size_t align)
+    void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t aligned_avail = avail & (-align);
         if (aligned_avail < size)
@@ -441,7 +444,7 @@ struct Block {
         avail = aligned_avail - size;
         return p;
     }
-    void reset(void *addr, size_t size)
+    void reset(void *addr, size_t size) JL_NOTSAFEPOINT
     {
         if (avail >= jl_page_size) {
             uintptr_t end = uintptr_t(ptr) + total;
@@ -459,7 +462,8 @@ class RWAllocator {
     static constexpr int nblocks = 8;
     Block blocks[nblocks]{};
 public:
-    void *alloc(size_t size, size_t align)
+    RWAllocator() JL_NOTSAFEPOINT = default;
+    void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t min_size = (size_t)-1;
         int min_id = 0;
@@ -495,9 +499,9 @@ struct SplitPtrBlock : public Block {
 
     uintptr_t wr_ptr{0};
     uint32_t state{0};
-    SplitPtrBlock() = default;
+    SplitPtrBlock() JL_NOTSAFEPOINT = default;
 
-    void swap(SplitPtrBlock &other)
+    void swap(SplitPtrBlock &other) JL_NOTSAFEPOINT
     {
         std::swap(ptr, other.ptr);
         std::swap(total, other.total);
@@ -506,7 +510,7 @@ struct SplitPtrBlock : public Block {
         std::swap(state, other.state);
     }
 
-    SplitPtrBlock(SplitPtrBlock &&other)
+    SplitPtrBlock(SplitPtrBlock &&other) JL_NOTSAFEPOINT
         : SplitPtrBlock()
     {
         swap(other);
@@ -531,11 +535,12 @@ class ROAllocator {
     // but might not have all the permissions set or data copied yet.
     SmallVector<SplitPtrBlock, 16> completed;
     virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                             size_t size, size_t align) = 0;
-    virtual SplitPtrBlock alloc_block(size_t size) = 0;
+                             size_t size, size_t align) JL_NOTSAFEPOINT = 0;
+    virtual SplitPtrBlock alloc_block(size_t size) JL_NOTSAFEPOINT = 0;
 public:
-    virtual ~ROAllocator() {}
-    virtual void finalize()
+    ROAllocator() JL_NOTSAFEPOINT = default;
+    virtual ~ROAllocator() JL_NOTSAFEPOINT {}
+    virtual void finalize() JL_NOTSAFEPOINT
     {
         for (auto &alloc: allocations) {
             // ensure the mapped pages are consistent
@@ -549,7 +554,7 @@ class ROAllocator {
     }
     // Allocations that have not been finalized yet.
     SmallVector<Allocation, 16> allocations;
-    void *alloc(size_t size, size_t align)
+    void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT
     {
         size_t min_size = (size_t)-1;
         int min_id = 0;
@@ -600,7 +605,7 @@ class ROAllocator {
 template<bool exec>
 class DualMapAllocator : public ROAllocator<exec> {
 protected:
-    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override
+    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT
     {
         assert((char*)rt_ptr >= block.ptr &&
                (char*)rt_ptr < (block.ptr + block.total));
@@ -615,7 +620,7 @@ class DualMapAllocator : public ROAllocator<exec> {
         }
         return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr));
     }
-    SplitPtrBlock alloc_block(size_t size) override
+    SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT
     {
         SplitPtrBlock new_block;
         // use `wr_ptr` to record the id initially
@@ -623,7 +628,7 @@ class DualMapAllocator : public ROAllocator<exec> {
         new_block.reset(ptr, size);
         return new_block;
     }
-    void finalize_block(SplitPtrBlock &block, bool reset)
+    void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT
     {
         // This function handles setting the block to the right mode
         // and free'ing maps that are not needed anymore.
@@ -659,11 +664,11 @@ class DualMapAllocator : public ROAllocator<exec> {
         }
     }
 public:
-    DualMapAllocator()
+    DualMapAllocator() JL_NOTSAFEPOINT
     {
         assert(anon_hdl != -1);
     }
-    void finalize() override
+    void finalize() override JL_NOTSAFEPOINT
     {
         for (auto &block : this->blocks) {
             finalize_block(block, false);
@@ -682,7 +687,7 @@ class SelfMemAllocator : public ROAllocator<exec> {
     SmallVector<Block, 16> temp_buff;
 protected:
     void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                     size_t size, size_t align) override
+                     size_t size, size_t align) override JL_NOTSAFEPOINT
     {
         assert(!(block.state & SplitPtrBlock::InitAlloc));
         for (auto &wr_block: temp_buff) {
@@ -696,13 +701,13 @@ class SelfMemAllocator : public ROAllocator<exec> {
         new_block.reset(map_anon_page(block_size), block_size);
         return new_block.alloc(size, align);
     }
-    SplitPtrBlock alloc_block(size_t size) override
+    SplitPtrBlock alloc_block(size_t size) override JL_NOTSAFEPOINT
     {
         SplitPtrBlock new_block;
         new_block.reset(map_anon_page(size), size);
         return new_block;
     }
-    void finalize_block(SplitPtrBlock &block, bool reset)
+    void finalize_block(SplitPtrBlock &block, bool reset) JL_NOTSAFEPOINT
     {
         if (!(block.state & SplitPtrBlock::Alloc))
             return;
@@ -715,13 +720,13 @@ class SelfMemAllocator : public ROAllocator<exec> {
         }
     }
 public:
-    SelfMemAllocator()
+    SelfMemAllocator() JL_NOTSAFEPOINT
         : ROAllocator<exec>(),
           temp_buff()
     {
         assert(get_self_mem_fd() != -1);
     }
-    void finalize() override
+    void finalize() override JL_NOTSAFEPOINT
     {
         for (auto &block : this->blocks) {
             finalize_block(block, false);
@@ -767,17 +772,15 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
     RWAllocator rw_alloc;
     std::unique_ptr<ROAllocator<false>> ro_alloc;
     std::unique_ptr<ROAllocator<true>> exe_alloc;
-    bool code_allocated;
     size_t total_allocated;
 
 public:
-    RTDyldMemoryManagerJL()
+    RTDyldMemoryManagerJL() JL_NOTSAFEPOINT
         : SectionMemoryManager(),
           pending_eh(),
           rw_alloc(),
           ro_alloc(),
           exe_alloc(),
-          code_allocated(false),
           total_allocated(0)
     {
 #ifdef _OS_LINUX_
@@ -791,12 +794,12 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
             exe_alloc.reset(new DualMapAllocator<true>());
         }
     }
-    ~RTDyldMemoryManagerJL() override
+    ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT
     {
     }
-    size_t getTotalBytes() { return total_allocated; }
+    size_t getTotalBytes() JL_NOTSAFEPOINT { return total_allocated; }
     void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override;
+                          size_t Size) override JL_NOTSAFEPOINT;
 #if 0
     // Disable for now since we are not actually using this.
     void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
@@ -804,16 +807,16 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
 #endif
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID,
-                                 StringRef SectionName) override;
+                                 StringRef SectionName) override JL_NOTSAFEPOINT;
     uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID, StringRef SectionName,
-                                 bool isReadOnly) override;
+                                 bool isReadOnly) override JL_NOTSAFEPOINT;
     using SectionMemoryManager::notifyObjectLoaded;
     void notifyObjectLoaded(RuntimeDyld &Dyld,
-                            const object::ObjectFile &Obj) override;
-    bool finalizeMemory(std::string *ErrMsg = nullptr) override;
+                            const object::ObjectFile &Obj) override JL_NOTSAFEPOINT;
+    bool finalizeMemory(std::string *ErrMsg = nullptr) override JL_NOTSAFEPOINT;
     template <typename DL, typename Alloc>
-    void mapAddresses(DL &Dyld, Alloc &&allocator)
+    void mapAddresses(DL &Dyld, Alloc &&allocator) JL_NOTSAFEPOINT
     {
         for (auto &alloc: allocator->allocations) {
             if (alloc.rt_addr == alloc.wr_addr || alloc.relocated)
@@ -823,48 +826,21 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
         }
     }
     template <typename DL>
-    void mapAddresses(DL &Dyld)
+    void mapAddresses(DL &Dyld) JL_NOTSAFEPOINT
     {
         if (!ro_alloc)
             return;
         mapAddresses(Dyld, ro_alloc);
         mapAddresses(Dyld, exe_alloc);
     }
-#ifdef _OS_WINDOWS_
-    template <typename Alloc>
-    void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator)
-    {
-        for (auto &alloc: allocator->allocations) {
-            if (alloc.rt_addr == rt_addr) {
-                return alloc.wr_addr;
-            }
-        }
-        return nullptr;
-    }
-    void *lookupWriteAddressFor(void *rt_addr)
-    {
-        if (!ro_alloc)
-            return rt_addr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc))
-            return ptr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc))
-            return ptr;
-        return rt_addr;
-    }
-#endif // _OS_WINDOWS_
 };
 
 uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
                                                     unsigned Alignment,
                                                     unsigned SectionID,
-                                                    StringRef SectionName)
+                                                    StringRef SectionName) JL_NOTSAFEPOINT
 {
     // allocating more than one code section can confuse libunwind.
-#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_)
-    // TODO: Figure out why msan and now asan too need this.
-    assert(!code_allocated);
-    code_allocated = true;
-#endif
     total_allocated += Size;
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
@@ -878,7 +854,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
                                                     unsigned Alignment,
                                                     unsigned SectionID,
                                                     StringRef SectionName,
-                                                    bool isReadOnly)
+                                                    bool isReadOnly) JL_NOTSAFEPOINT
 {
     total_allocated += Size;
     jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
@@ -892,7 +868,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
 }
 
 void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld,
-                                               const object::ObjectFile &Obj)
+                                               const object::ObjectFile &Obj) JL_NOTSAFEPOINT
 {
     if (!ro_alloc) {
         assert(!exe_alloc);
@@ -903,9 +879,8 @@ void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld,
     mapAddresses(Dyld);
 }
 
-bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg)
+bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) JL_NOTSAFEPOINT
 {
-    code_allocated = false;
     if (ro_alloc) {
         ro_alloc->finalize();
         assert(exe_alloc);
@@ -923,7 +898,7 @@ bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg)
 
 void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr,
                                              uint64_t LoadAddr,
-                                             size_t Size)
+                                             size_t Size) JL_NOTSAFEPOINT
 {
     if (uintptr_t(Addr) == LoadAddr) {
         register_eh_frames(Addr, Size);
@@ -936,7 +911,7 @@ void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr,
 #if 0
 void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
                                                uint64_t LoadAddr,
-                                               size_t Size)
+                                               size_t Size) JL_NOTSAFEPOINT
 {
     deregister_eh_frames((uint8_t*)LoadAddr, Size);
 }
@@ -944,19 +919,12 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
 
 }
 
-#ifdef _OS_WINDOWS_
-void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr)
-{
-    return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr);
-}
-#endif
-
-RTDyldMemoryManager* createRTDyldMemoryManager()
+RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT
 {
     return new RTDyldMemoryManagerJL();
 }
 
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm)
+size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT
 {
     return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
 }
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 8442ba99bb411..98c5627578b80 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -28,13 +28,8 @@ STATISTIC(EmittedGetfieldKnowns, "Number of known getfield calls emitted");
 STATISTIC(EmittedSetfield, "Number of setfield calls emitted");
 STATISTIC(EmittedUnionLoads, "Number of union loads emitted");
 STATISTIC(EmittedVarargsLength, "Number of varargs length calls emitted");
-STATISTIC(EmittedArraysize, "Number of arraysize calls emitted");
-STATISTIC(EmittedArraylen, "Number of array length calls emitted");
-STATISTIC(EmittedArrayptr, "Number of array data pointer loads emitted");
-STATISTIC(EmittedArrayflags, "Number of arrayflags calls emitted");
-STATISTIC(EmittedArrayNDims, "Number of array ndims calls emitted");
+STATISTIC(EmittedArrayptr, "Number of array ptr calls emitted");
 STATISTIC(EmittedArrayElsize, "Number of array elsize calls emitted");
-STATISTIC(EmittedArrayOffset, "Number of array offset calls emitted");
 STATISTIC(EmittedArrayNdIndex, "Number of array nd index calls emitted");
 STATISTIC(EmittedBoxes, "Number of box operations emitted");
 STATISTIC(EmittedCPointerChecks, "Number of C pointer checks emitted");
@@ -62,10 +57,10 @@ static Value *maybe_decay_untracked(jl_codectx_t &ctx, Value *V)
 static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() == AddressSpace::Derived)
+    if (T->getPointerAddressSpace() == AddressSpace::Derived)
         return V;
     // Once llvm deletes pointer element types, we won't need it here any more either.
-    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    Type *NewT = PointerType::get(T, AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -73,9 +68,9 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V)
 static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V)
 {
     Type *T = V->getType();
-    if (cast<PointerType>(T)->getAddressSpace() != AddressSpace::Tracked)
+    if (T->getPointerAddressSpace() != AddressSpace::Tracked)
         return V;
-    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
+    Type *NewT = PointerType::get(T, AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -111,20 +106,24 @@ AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
 static Value *stringConstPtr(
         jl_codegen_params_t &emission_context,
         IRBuilder<> &irbuilder,
-        const std::string &txt)
+        const Twine &txt)
 {
     Module *M = jl_builderModule(irbuilder);
-    StringRef ctxt(txt.c_str(), txt.size() + 1);
-    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), arrayRefFromStringRef(ctxt));
-    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
-    Value *Args[] = { zero, zero };
-    auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(),
-                                       // Addrspacecast in case globals are in non-0 AS
-                                       irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)),
-                                       Args);
-    setName(emission_context, gep, "string_const_ptr");
-    return gep;
+    SmallVector<char, 128> ctxt;
+    txt.toVector(ctxt);
+    // null-terminate the string
+    ctxt.push_back(0);
+    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), ctxt);
+    ctxt.pop_back();
+    // We use this for the name of the gv, so cap its size to avoid memory blowout
+    if (ctxt.size() > 28) {
+        ctxt.resize(28);
+        ctxt[25] = ctxt[26] = ctxt[27] = '.';
+    }
+    // Doesn't need to be aligned, we shouldn't operate on these like julia objects
+    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M);
+    // AddrSpaceCast in case globals are in non-0 AS
+    return irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0));
 }
 
 
@@ -203,9 +202,9 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debu
         uint64_t SizeInBits = jl_datatype_nbits(jdt);
         ditype = dbuilder->createBasicType(tname, SizeInBits, llvm::dwarf::DW_ATE_unsigned);
     }
-    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout)) {
+    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout) && !jl_is_array_type(jdt)) {
         size_t ntypes = jl_datatype_nfields(jdt);
-        std::vector<llvm::Metadata*> Elements(ntypes);
+        SmallVector<llvm::Metadata*, 0> Elements(ntypes);
         for (unsigned i = 0; i < ntypes; i++) {
             jl_value_t *el = jl_field_type_concrete(jdt, i);
             DIType *di;
@@ -268,7 +267,7 @@ void jl_debugcache_t::initialize(Module *m) {
                                                 __alignof__(jl_value_t*) * 8);
 
     SmallVector<llvm::Metadata *, 1> Elts;
-    std::vector<Metadata*> diargs(0);
+    SmallVector<Metadata*, 0> diargs(0);
     Elts.push_back(jl_pvalue_dillvmt);
     dbuilder.replaceArrays(jl_value_dillvmt,
     dbuilder.getOrCreateArray(Elts));
@@ -290,13 +289,10 @@ void jl_debugcache_t::initialize(Module *m) {
 
 static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
 {
-    unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+    unsigned AS = V->getType()->getPointerAddressSpace();
     if (AS != AddressSpace::Tracked && AS != AddressSpace::Derived)
         return V;
     V = decay_derived(ctx, V);
-    Type *T = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-    if (V->getType() != T)
-        V = ctx.builder.CreateBitCast(V, T);
     Function *F = prepare_call(pointer_from_objref_func);
     CallInst *Call = ctx.builder.CreateCall(F, V);
     Call->setAttributes(F->getAttributes());
@@ -304,25 +300,97 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
     return Call;
 }
 
-static Value *get_gc_root_for(const jl_cgval_t &x)
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, Align alignment, bool isVolatile=false);
+
+static bool type_is_permalloc(jl_value_t *typ)
 {
-    if (x.Vboxed)
-        return x.Vboxed;
-    if (x.ispointer() && !x.constant) {
-        assert(x.V);
-        if (PointerType *T = dyn_cast<PointerType>(x.V->getType())) {
-            if (T->getAddressSpace() == AddressSpace::Tracked ||
-                T->getAddressSpace() == AddressSpace::Derived) {
-                return x.V;
-            }
+    // Singleton should almost always be handled by the later optimization passes.
+    // Also do it here since it is cheap and save some effort in LLVM passes.
+    if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ))
+        return true;
+    return typ == (jl_value_t*)jl_symbol_type ||
+        typ == (jl_value_t*)jl_int8_type ||
+        typ == (jl_value_t*)jl_uint8_type;
+}
+
+
+// find the offset of pointer fields which never need a write barrier since their type-analysis
+// shows they are permanently rooted
+static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl<unsigned> &res, unsigned offset)
+{
+    // This is a inlined field at `offset`.
+    if (!typ->layout || typ->layout->npointers == 0)
+        return;
+    jl_svec_t *types = jl_get_fieldtypes(typ);
+    size_t nf = jl_svec_len(types);
+    for (size_t i = 0; i < nf; i++) {
+        jl_value_t *_fld = jl_svecref(types, i);
+        if (!jl_is_datatype(_fld))
+            continue;
+        jl_datatype_t *fld = (jl_datatype_t*)_fld;
+        if (jl_field_isptr(typ, i)) {
+            // pointer field, check if field is perm-alloc
+            if (type_is_permalloc((jl_value_t*)fld))
+                res.push_back(offset + jl_field_offset(typ, i));
+            continue;
         }
+        // inline field
+        find_perm_offsets(fld, res, offset + jl_field_offset(typ, i));
     }
-    return nullptr;
 }
 
-// --- emitting pointers directly into code ---
+// load a pointer to N inlined_roots into registers (as a SmallVector)
+static llvm::SmallVector<Value*,0> load_gc_roots(jl_codectx_t &ctx, Value *inline_roots_ptr, size_t npointers, bool isVolatile=false)
+{
+    SmallVector<Value*,0> gcroots(npointers);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    auto roots_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    for (size_t i = 0; i < npointers; i++) {
+        auto *ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(jl_value_t*)), Align(sizeof(void*)), isVolatile);
+        roots_ai.decorateInst(ptr);
+        gcroots[i] = ptr;
+    }
+    return gcroots;
+}
+
+// inlined bool indicates whether this must return the inlined roots inside x separately, or whether x itself may be used as the root (if x is already isboxed)
+static llvm::SmallVector<Value*,0> get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x, bool inlined=false)
+{
+    if (x.constant || x.typ == jl_bottom_type)
+        return {};
+    if (!inlined && x.Vboxed) // superset of x.isboxed
+        return {x.Vboxed};
+    assert(!x.isboxed || !inlined);
+    if (!x.inline_roots.empty()) {
+        // if (!inlined) { // TODO: implement this filter operation
+        //     SmallVector<unsigned,4> perm_offsets;
+        //     find_perm_offsets(typ, perm_offsets, 0);
+        //     return filter(!in(perm_offsets), x.inline_roots)
+        // }
+        return x.inline_roots;
+    }
+    if (!inlined && x.ispointer()) {
+        assert(x.V);
+        assert(x.V->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
+        return {x.V};
+    }
+    else if (jl_is_concrete_immutable(x.typ) && !jl_is_pointerfree(x.typ)) {
+        jl_value_t *jltype = x.typ;
+        Type *T = julia_type_to_llvm(ctx, jltype);
+        Value *agg = emit_unbox(ctx, T, x, jltype);
+        SmallVector<unsigned,4> perm_offsets;
+        find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0);
+        return ExtractTrackedValues(agg, agg->getType(), false, ctx.builder, perm_offsets);
+    }
+    // nothing here to root, move along
+    return {};
+}
 
+// --- emitting pointers directly into code ---
 
+static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val);
+static void jl_temporary_root(jl_codectx_t &ctx, jl_value_t *val);
 static inline Constant *literal_static_pointer_val(const void *p, Type *T);
 
 static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
@@ -330,12 +398,12 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
     // emit a GlobalVariable for a jl_value_t named "cname"
     // store the name given so we can reuse it (facilitating merging later)
     // so first see if there already is a GlobalVariable for this address
-    GlobalVariable* &gv = ctx.global_targets[addr];
+    GlobalVariable* &gv = ctx.emission_context.global_targets[addr];
     Module *M = jl_Module;
     StringRef localname;
     std::string gvname;
     if (!gv) {
-        uint64_t id = ctx.emission_context.imaging ? jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1) : ctx.global_targets.size();
+        uint64_t id = jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); // TODO: use ctx.emission_context.global_targets.size()
         raw_string_ostream(gvname) << cname << id;
         localname = StringRef(gvname);
     }
@@ -391,16 +459,6 @@ static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 {
     // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code
     // also, try to give it a nice name for gdb, for easy identification
-    if (!ctx.emission_context.imaging) {
-        // TODO: this is an optimization, but is it useful or premature
-        // (it'll block any attempt to cache these, but can be simply deleted)
-        Module *M = jl_Module;
-        GlobalVariable *gv = new GlobalVariable(
-                *M, ctx.types().T_pjlvalue, true, GlobalVariable::PrivateLinkage,
-                literal_static_pointer_val(p, ctx.types().T_pjlvalue));
-        gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-        return gv;
-    }
     if (JuliaVariable *gv = julia_const_gv(p)) {
         // if this is a known special object, use the existing GlobalValue
         return prepare_global_in(jl_Module, gv);
@@ -410,8 +468,14 @@ static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
         if (addr->smalltag) {
             // some common builtin datatypes have a special pool for accessing them by smalltag id
             Constant *tag = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), addr->smalltag << 4);
-            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(ctx.builder.getContext()), prepare_global_in(jl_Module, jlsmall_typeof_var), tag);
-            return ConstantExpr::getBitCast(smallp, ctx.types().T_ppjlvalue);
+            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(ctx.builder.getContext()), prepare_global_in(jl_Module, jl_small_typeof_var), tag);
+            auto ty = ctx.types().T_ppjlvalue;
+            if (ty->getPointerAddressSpace() == smallp->getType()->getPointerAddressSpace())
+                return ConstantExpr::getBitCast(smallp, ty);
+            else {
+                Constant *newsmallp = ConstantExpr::getAddrSpaceCast(smallp, ty);
+                return ConstantExpr::getBitCast(newsmallp, ty);
+            }
         }
         // DataTypes are prefixed with a +
         return julia_pgv(ctx, "+", addr->name->name, addr->name->module, p);
@@ -438,11 +502,7 @@ static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 
 static size_t dereferenceable_size(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array has at least this much data
-        return sizeof(jl_array_t);
-    }
-    else if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
+    if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
         return jl_datatype_size(jt);
     }
     return 0;
@@ -451,15 +511,12 @@ static size_t dereferenceable_size(jl_value_t *jt)
 // Return the min required / expected alignment of jltype (on the stack or heap)
 static unsigned julia_alignment(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array always has this alignment
-        return JL_SMALL_BYTE_ALIGNMENT;
-    }
     if (jt == (jl_value_t*)jl_datatype_type) {
         // types are never allocated in julia code/on the stack
         // and this is the guarantee we have for the GC bits
         return 16;
     }
+
     assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
     unsigned alignment = jl_datatype_align(jt);
     if (alignment > JL_HEAP_ALIGNMENT)
@@ -513,8 +570,6 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
 {
     if (p == NULL)
         return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
     Value *pgv = literal_pointer_val_slot(ctx, p);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
@@ -524,49 +579,28 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
     return load;
 }
 
-// Returns ctx.types().T_pjlvalue
-static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
-{
-    // emit a pointer to any jl_value_t which will be valid across reloading code
-    if (p == NULL)
-        return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
-    // bindings are prefixed with jl_bnd#
-    jl_globalref_t *gr = p->globalref;
-    Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
-            false, sizeof(jl_binding_t), alignof(jl_binding_t)));
-    setName(ctx.emission_context, load, pgv->getName());
-    return load;
-}
-
 // bitcast a value, but preserve its address space when dealing with pointer types
 static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value)
 {
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        ++EmittedPointerBitcast;
-        return ctx.builder.CreateBitCast(v, jl_value_addr);
+    if (isa<PointerType>(jl_value)) {
+        return v;
     }
     else {
         return ctx.builder.CreateBitCast(v, jl_value);
     }
 }
 
-static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
-    if (to != V->getType())
-        return emit_bitcast(ctx, V, to);
-    return V;
-}
+// static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
+//     if (isa<PointerType>(to)) {
+//         return V;
+//     }
+//     if (to != V->getType())
+//         return emit_bitcast(ctx, V, to);
+//     return V;
+// }
 
 static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
-    bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
     Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_binding_t, value) / ctx.types().sizeof_ptr);
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
@@ -575,32 +609,16 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b)
 {
     // emit a literal_pointer_val to a jl_binding_t
     // binding->value are prefixed with *
-    if (ctx.emission_context.imaging) {
-        jl_globalref_t *gr = b->globalref;
-        Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b);
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-        auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))));
-        setName(ctx.emission_context, load, pgv->getName());
-        return load;
-    }
-    else {
-        return literal_static_pointer_val(b, ctx.types().T_pjlvalue);
-    }
+    jl_globalref_t *gr = b->globalref;
+    Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))));
+    setName(ctx.emission_context, load, pgv->getName());
+    return load;
 }
 
 // --- mapping between julia and llvm types ---
 
-static bool type_is_permalloc(jl_value_t *typ)
-{
-    // Singleton should almost always be handled by the later optimization passes.
-    // Also do it here since it is cheap and save some effort in LLVM passes.
-    if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ))
-        return true;
-    return typ == (jl_value_t*)jl_symbol_type ||
-        typ == (jl_value_t*)jl_int8_type ||
-        typ == (jl_value_t*)jl_uint8_type;
-}
-
 static unsigned convert_struct_offset(const llvm::DataLayout &DL, Type *lty, unsigned byte_offset)
 {
     const StructLayout *SL = DL.getStructLayout(cast<StructType>(lty));
@@ -614,19 +632,13 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt
     return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset);
 }
 
-static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset)
-{
-    unsigned idx = convert_struct_offset(ctx, lty, byte_offset);
-    return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
-}
-
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
 static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
@@ -667,6 +679,10 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
         return getFloatTy(ctxt);
     if (bt == (jl_value_t*)jl_float64_type)
         return getDoubleTy(ctxt);
+    if (bt == (jl_value_t*)jl_bfloat16_type)
+        return getBFloatTy(ctxt);
+    if (jl_is_cpointer_type(bt))
+        return PointerType::get(ctxt, 0);
     if (jl_is_llvmpointer_type(bt)) {
         jl_value_t *as_param = jl_tparam1(bt);
         int as;
@@ -676,7 +692,7 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
             as = jl_unbox_int64(as_param);
         else
             jl_error("invalid pointer address space");
-        return PointerType::get(getInt8Ty(ctxt), as);
+        return PointerType::get(ctxt, as);
     }
     int nb = jl_datatype_size(bt);
     return Type::getIntNTy(ctxt, nb * 8);
@@ -695,18 +711,55 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
     return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT});
 }
 
+static llvm::StructType* get_jlmemoryref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(llvm::Type::getInt8Ty(C), AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryboxedref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(JuliaType::get_prjlvalue_ty(C), AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryunionref(llvm::LLVMContext &C, llvm::Type *T_size) {
+    return llvm::StructType::get(C, {
+            T_size, // offset
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static StructType *get_memoryref_type(LLVMContext &ctxt, Type *T_size, const jl_datatype_layout_t *layout, unsigned AS)
+{
+    // TODO: try to remove this slightly odd special case
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if (isboxed)
+        return get_jlmemoryboxedref(ctxt, AS);
+    if (isunion || isghost)
+        return get_jlmemoryunionref(ctxt, T_size);
+    return get_jlmemoryref(ctxt, AS);
+}
+
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_primitivetype(jt))
         return bitstype_to_llvm(jt, ctxt, llvmcall);
     jl_datatype_t *jst = (jl_datatype_t*)jt;
-    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout))) {
+    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout)) && !jl_is_array_type(jst) && !jl_is_genericmemory_type(jst)) {
+        if (jl_is_genericmemoryref_type(jst)) {
+            jl_value_t *mty_dt = jl_field_type_concrete(jst, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Type *T_size = bitstype_to_llvm((jl_value_t*)jl_long_type, ctxt);
+            return get_memoryref_type(ctxt, T_size, layout, 0);
+        }
         bool isTuple = jl_is_tuple_type(jt);
         jl_svec_t *ftypes = jl_get_fieldtypes(jst);
         size_t i, ntypes = jl_svec_len(ftypes);
@@ -717,12 +770,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
         if (ntypes == 0 || jl_datatype_nbits(jst) == 0)
             return getVoidTy(ctxt);
         Type *_struct_decl = NULL;
-        // TODO: we should probably make a temporary root for `jst` somewhere
+        if (ctx)
+            jl_temporary_root(*ctx, jt);
         // don't use pre-filled struct_decl for llvmcall (f16, etc. may be different)
         Type *&struct_decl = (ctx && !llvmcall ? ctx->llvmtypes[jst] : _struct_decl);
         if (struct_decl)
             return struct_decl;
-        std::vector<Type*> latypes(0);
+        SmallVector<Type*, 0> latypes(0);
         bool isarray = true;
         bool isvector = true;
         jl_value_t *jlasttype = NULL;
@@ -744,17 +798,15 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                 lty = JuliaType::get_prjlvalue_ty(ctxt);
                 isvector = false;
             }
-            else if (ty == (jl_value_t*)jl_bool_type) {
-                lty = getInt8Ty(ctxt);
-            }
             else if (jl_is_uniontype(ty)) {
                 // pick an Integer type size such that alignment will generally be correct,
                 // and always end with an Int8 (selector byte).
                 // We may need to insert padding first to get to the right offset
                 size_t fsz = 0, al = 0;
                 bool isptr = !jl_islayout_inline(ty, &fsz, &al);
-                assert(!isptr && fsz == jl_field_size(jst, i) - 1); (void)isptr;
-                if (fsz > 0) {
+                assert(!isptr && fsz < jl_field_size(jst, i)); (void)isptr;
+                size_t fsz1 = jl_field_size(jst, i) - 1;
+                if (fsz1 > 0) {
                     if (al > MAX_ALIGN) {
                         Type *AlignmentType;
                         AlignmentType = ArrayType::get(FixedVectorType::get(getInt8Ty(ctxt), al), 0);
@@ -762,8 +814,8 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                         al = MAX_ALIGN;
                     }
                     Type *AlignmentType = IntegerType::get(ctxt, 8 * al);
-                    unsigned NumATy = fsz / al;
-                    unsigned remainder = fsz % al;
+                    unsigned NumATy = fsz1 / al;
+                    unsigned remainder = fsz1 % al;
                     assert(al == 1 || NumATy > 0);
                     while (NumATy--)
                         latypes.push_back(AlignmentType);
@@ -821,13 +873,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     //  // pick an Integer type size such that alignment will be correct
     //  // and always end with an Int8 (selector byte)
     //  lty = ArrayType::get(IntegerType::get(lty->getContext(), 8 * al), fsz / al);
-    //  std::vector<Type*> Elements(2);
+    //  SmallVector<Type*, 0> Elements(2);
     //  Elements[0] = lty;
     //  Elements[1] = getInt8Ty(ctxt);
     //  unsigned remainder = fsz % al;
     //  while (remainder--)
     //      Elements.push_back(getInt8Ty(ctxt));
-    //  lty = StructType::get(lty->getContext(), makeArrayRef(Elements));
+    //  lty = StructType::get(lty->getContext(),ArrayRef<Type*>(Elements));
     // }
     if (isboxed) *isboxed = true;
     return JuliaType::get_prjlvalue_ty(ctxt);
@@ -874,7 +926,7 @@ static bool is_tupletype_homogeneous(jl_svec_t *t, bool allow_va = false)
 }
 
 static bool for_each_uniontype_small(
-        std::function<void(unsigned, jl_datatype_t*)> f,
+        llvm::function_ref<void(unsigned, jl_datatype_t*)> f,
         jl_value_t *ty,
         unsigned &counter)
 {
@@ -885,6 +937,9 @@ static bool for_each_uniontype_small(
         allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter);
         return allunbox;
     }
+    else if (ty == (jl_value_t*)jl_typeofbottom_type->super) {
+        f(++counter, jl_typeofbottom_type); // treat Tuple{union{}} as identical to typeof(Union{})
+    }
     else if (jl_is_pointerfree(ty)) {
         f(++counter, (jl_datatype_t*)ty);
         return true;
@@ -928,7 +983,7 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
     if (x.constant) {
         Constant *val = julia_const_to_llvm(ctx, x.constant);
         if (val)
-            data = get_pointer_to_constant(ctx.emission_context, val, "_j_const", *jl_Module);
+            data = get_pointer_to_constant(ctx.emission_context, val, Align(julia_alignment(jl_typeof(x.constant))), "_j_const", *jl_Module);
         else
             data = literal_pointer_val(ctx, x.constant);
     }
@@ -943,57 +998,10 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, uint64_t sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (sz == 0)
         return;
-    assert(align && "align must be specified");
-    // If the types are small and simple, use load and store directly.
-    // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
-    // that interferes with other optimizations.
-    // TODO: Restore this for opaque pointers? Needs extra type information from the caller.
-    if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) {
-        // The size limit is arbitrary but since we mainly care about floating points and
-        // machine size vectors this should be enough.
-        const DataLayout &DL = jl_Module->getDataLayout();
-        auto srcty = cast<PointerType>(src->getType());
-        //TODO unsafe nonopaque pointer
-        auto srcel = srcty->getNonOpaquePointerElementType();
-        auto dstty = cast<PointerType>(dst->getType());
-        //TODO unsafe nonopaque pointer
-        auto dstel = dstty->getNonOpaquePointerElementType();
-        while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) {
-            src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0);
-            srcel = srcel->getArrayElementType();
-            srcty = srcel->getPointerTo();
-        }
-        while (dstel->isArrayTy() && dstel->getArrayNumElements() == 1) {
-            dst = ctx.builder.CreateConstInBoundsGEP2_32(dstel, dst, 0, 0);
-            dstel = dstel->getArrayElementType();
-            dstty = dstel->getPointerTo();
-        }
-
-        llvm::Type *directel = nullptr;
-        if (srcel->isSized() && srcel->isSingleValueType() && DL.getTypeStoreSize(srcel) == sz) {
-            directel = srcel;
-            dst = emit_bitcast(ctx, dst, srcty);
-        }
-        else if (dstel->isSized() && dstel->isSingleValueType() &&
-                 DL.getTypeStoreSize(dstel) == sz) {
-            directel = dstel;
-            src = emit_bitcast(ctx, src, dstty);
-        }
-        if (directel) {
-            if (isa<Instruction>(src) && !src->hasName())
-                setName(ctx.emission_context, src, "memcpy_refined_src");
-            if (isa<Instruction>(dst) && !dst->hasName())
-                setName(ctx.emission_context, dst, "memcpy_refined_dst");
-            auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
-            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
-            ++SkippedMemcpys;
-            return;
-        }
-    }
     ++EmittedMemcpys;
 
     // the memcpy intrinsic does not allow to specify different alias tags
@@ -1007,51 +1015,278 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // above problem won't be as serious.
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, Value *sz, unsigned align, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, Value *sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
-        emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align, is_volatile);
+        emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
         return;
     }
     ++EmittedMemcpys;
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                        jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align, bool is_volatile=false)
+                        jl_aliasinfo_t const &src_ai, T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
-    emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align, is_volatile);
+    emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
-                        T1 &&sz, unsigned align, bool is_volatile=false)
+                        T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
-    emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align, is_volatile);
+    emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
+}
+
+static bool allpointers(jl_datatype_t *typ)
+{
+    return jl_datatype_size(typ) == typ->layout->npointers * sizeof(void*);
+}
+
+// compute the space required by split_value_into, by simulating it
+// returns (sizeof(split_value), n_pointers)
+static std::pair<size_t,size_t> split_value_size(jl_datatype_t *typ)
+{
+    assert(jl_is_datatype(typ));
+    size_t dst_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    // drop the data pointer if the entire structure is just pointers
+    // TODO: eventually we could drop the slots for the pointers from inside the
+    //       types to pack it together, but this can change the alignment of the bits
+    //       in the fields inside, even if those bits have no pointers themselves. So
+    //       we would actually need to compute, for each pointer, whether any
+    //       subsequent field needed the extra alignment (for example, we can
+    //       drop space for any runs of two/four pointer).  Some of these
+    //       functions are already written in a way to support that, but not
+    //       fully implemented yet.
+    bool nodata = allpointers(typ);
+    if (nodata)
+        dst_off = 0;
+    else
+        dst_off = jl_datatype_size(typ);
+    return std::make_pair(dst_off, npointers);
+}
+
+// take a value `x` and split its bits into dst and the roots into inline_roots
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, Value *inline_roots_ptr, jl_aliasinfo_t const &roots_ai, bool isVolatileStore=false)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src, isVolatileStore);
+        for (size_t i = 0; i < sizes.second; i++) {
+            Value *unbox = x.inline_roots[i];
+            roots_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        }
+        return;
+    }
+    if (inline_roots_ptr == nullptr) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst, isVolatileStore);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        roots_ai.decorateInst(ctx.builder.CreateAlignedStore(load, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src,
+                isVolatileStore));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, MutableArrayRef<Value*> inline_roots)
 {
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
-            idx);
-    setName(ctx.emission_context, vptr, "arraysize_ptr");
-    LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, vptr, PointerType::get(type, 0)));
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-    ai.decorateInst(load);
-    return load;
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src);
+        for (size_t i = 0; i < sizes.second; i++)
+            inline_roots[i] = x.inline_roots[i];
+        return;
+    }
+    if (inline_roots.empty()) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        inline_roots[i] = load;
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
+}
+
+static std::pair<AllocaInst*, SmallVector<Value*,0>> split_value(jl_codectx_t &ctx, const jl_cgval_t &x, Align x_alignment)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    auto sizes = split_value_size(typ);
+    Align align_dst(julia_alignment((jl_value_t*)typ));
+    AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, align_dst) : nullptr;
+    SmallVector<Value*,0> roots(sizes.second);
+    auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+    split_value_into(ctx, x, x_alignment, bits, align_dst, stack_ai, MutableArrayRef(roots));
+    return std::make_pair(bits, roots);
+}
+
+// Return the offset values corresponding to jl_field_offset, but into the two buffers for a split value (or -1)
+static std::pair<ssize_t,ssize_t> split_value_field(jl_datatype_t *typ, unsigned idx)
+{
+    size_t fldoff = jl_field_offset(typ, idx);
+    size_t src_off = 0;
+    size_t dst_off = 0;
+    assert(typ->layout->first_ptr >= 0);
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; i < npointers; i++) {
+        size_t ptr = jl_ptr_offset(typ, i) * sizeof(void*);
+        if (ptr >= fldoff) {
+            if (ptr >= fldoff + jl_field_size(typ, idx))
+                break;
+            bool onlyptr = jl_field_isptr(typ, idx) || allpointers((jl_datatype_t*)jl_field_type(typ, idx));
+            return std::make_pair(onlyptr ? -1 : dst_off + fldoff - src_off, i);
+        }
+        dst_off += ptr - src_off;
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(dst_off + sizeof(void*) == src_off);
+            dst_off = src_off;
+        }
+    }
+    return std::make_pair(dst_off + fldoff - src_off, -1);
+}
+
+// Copy `x` to `dst`, where `x` was a split value and dst needs to have a native layout, copying any inlined roots back into their native location.
+// This does not respect roots, so you must call emit_write_multibarrier afterwards.
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatileStore)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    assert(typ->layout->first_ptr >= 0 && !x.inline_roots.empty());
+    Align align_dst = alignment;
+    Align align_src(julia_alignment(x.typ));
+    Value *src = x.V;
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    bool isstack = isa<AllocaInst>(dst->stripInBoundsOffsets()) || dst_ai.tbaa == ctx.tbaa().tbaa_stack;
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > dst_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - dst_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            src_off += ptr - dst_off;
+        }
+        if (last)
+            break;
+        auto *root = x.inline_roots[i];
+        auto *store = ctx.builder.CreateAlignedStore(root, emit_ptrgep(ctx, dst, ptr), Align(sizeof(void*)), isVolatileStore);
+        if (!isstack)
+            store->setOrdering(AtomicOrdering::Unordered);
+        dst_ai.decorateInst(store);
+        align_dst = align_src = Align(sizeof(void*));
+        dst_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(src_off + sizeof(void*) == dst_off);
+            src_off = dst_off;
+        }
+    }
 }
 
 static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
@@ -1095,7 +1330,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
         if (jl_has_intersect_type_not_kind(typ))
             return false;
         for (size_t i = 0; i < jl_tags_count; i++) {
-            jl_datatype_t *dt = small_typeof[(i << 4) / sizeof(*small_typeof)];
+            jl_datatype_t *dt = jl_small_typeof[(i << 4) / sizeof(*jl_small_typeof)];
             if (dt && !jl_has_empty_intersection((jl_value_t*)dt, typ))
                 return false;
         }
@@ -1106,24 +1341,19 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
     if (p.TIndex) {
         Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         bool allunboxed = is_uniontype_allunboxed(p.typ);
-        Type *expr_type = justtag ? ctx.types().T_size : ctx.emission_context.imaging ? ctx.types().T_pjlvalue : ctx.types().T_prjlvalue;
-        Value *datatype_or_p = Constant::getNullValue(ctx.emission_context.imaging ? expr_type->getPointerTo() : expr_type);
+        Type *expr_type = justtag ? ctx.types().T_size : ctx.types().T_pjlvalue;
+        Value *datatype_or_p = Constant::getNullValue(expr_type->getPointerTo());
         unsigned counter = 0;
         for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
                 Constant *ptr;
                 if (justtag && jt->smalltag) {
-                    ptr = ConstantInt::get(expr_type, jt->smalltag << 4);
-                    if (ctx.emission_context.imaging)
-                        ptr = get_pointer_to_constant(ctx.emission_context, ptr, "_j_tag", *jl_Module);
+                    ptr = get_pointer_to_constant(ctx.emission_context, ConstantInt::get(expr_type, jt->smalltag << 4), Align(sizeof(jl_value_t*)), StringRef("_j_smalltag_") + jl_symbol_name(jt->name->name), *jl_Module);
                 }
-                else if (ctx.emission_context.imaging)
+                else {
                     ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx, (jl_value_t*)jt), datatype_or_p->getType());
-                else if (justtag)
-                    ptr = ConstantInt::get(expr_type, (uintptr_t)jt);
-                else
-                    ptr = ConstantExpr::getAddrSpaceCast(literal_static_pointer_val((jl_value_t*)jt, ctx.types().T_pjlvalue), expr_type);
+                }
                 datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
                 setName(ctx.emission_context, datatype_or_p, "typetag_ptr");
             },
@@ -1131,12 +1361,9 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
             counter);
         auto emit_unboxty = [&] () -> Value* {
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            if (ctx.emission_context.imaging) {
-                Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
-                setName(ctx.emission_context, datatype, "typetag");
-                return justtag ? datatype : track_pjlvalue(ctx, datatype);
-            }
-            return datatype_or_p;
+            Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
+            setName(ctx.emission_context, datatype, "typetag");
+            return justtag ? datatype : track_pjlvalue(ctx, datatype);
         };
         Value *res;
         if (!allunboxed) {
@@ -1171,35 +1398,42 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
+    Value *Ptr = decay_derived(ctx, dt);
+    unsigned Idx = offsetof(jl_datatype_t, types);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-                ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+                ctx.types().T_pjlvalue, emit_ptrgep(ctx, Ptr, Idx), Align(sizeof(void*))));
     setName(ctx.emission_context, types, "datatype_types");
     return types;
 }
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo());
+    Value *type_svec = emit_datatype_types(ctx, dt);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*))));
     setName(ctx.emission_context, nfields, "datatype_nfields");
     return nfields;
 }
 
-static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt)
+// emit the size field from the layout of a dt
+static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion=false)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext())->getPointerTo());
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
-    Ptr = ctx.builder.CreateInBoundsGEP(getInt32PtrTy(ctx.builder.getContext()), Ptr, Idx);
-    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
-    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
-    Ptr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
-    auto Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t))));
+    Value *Ptr = decay_derived(ctx, dt);
+    Ptr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_t, layout));
+    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
+    Value *SizePtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, size));
+    Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t))));
     setName(ctx.emission_context, Size, "datatype_size");
+    if (add_isunion) {
+        Value *FlagPtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, flags));
+        Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t))));
+        Flag = ctx.builder.CreateLShr(Flag, 4);
+        Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1));
+        Flag = ctx.builder.CreateZExt(Flag, Size->getType());
+        Size = ctx.builder.CreateAdd(Size, Flag);
+    }
     return Size;
 }
 
@@ -1222,7 +1456,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             BasicBlock *dynloadBB = BasicBlock::Create(ctx.builder.getContext(), "dyn_sizeof", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_sizeof", ctx.f);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB);
             ctx.builder.SetInsertPoint(dynloadBB);
@@ -1255,10 +1489,10 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
+    Value *Ptr = decay_derived(ctx, dt);
     Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, name));
     Value *Nam = ai.decorateInst(
-            ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
+            ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getPointerTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
     Value *Idx2 = ConstantInt::get(ctx.types().T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
     Value *mutabl = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
@@ -1270,7 +1504,7 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 {
     Value *isprimitive;
-    isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isprimitive = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1)));
     isprimitive = ctx.builder.CreateLShr(isprimitive, 7);
@@ -1282,10 +1516,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
     unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_pjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
-            ConstantInt::get(ctx.types().T_size, n));
+    Value *vptr = emit_ptrgep(ctx, maybe_decay_tracked(ctx, dt), n * sizeof(jl_value_t*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
     setName(ctx.emission_context, name, "datatype_name");
@@ -1297,13 +1528,13 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 // the error is always thrown. This may cause non dominated use
 // of SSA value error in the verifier.
 
-static void just_emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void just_emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     ++EmittedErrors;
     ctx.builder.CreateCall(F, stringConstPtr(ctx.emission_context, ctx.builder, txt));
 }
 
-static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     just_emit_error(ctx, F, txt);
     ctx.builder.CreateUnreachable();
@@ -1311,25 +1542,30 @@ static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
     ctx.builder.SetInsertPoint(cont);
 }
 
-static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, const Twine &txt)
 {
     emit_error(ctx, prepare_call(jlerror_func), txt);
 }
 
 // DO NOT PASS IN A CONST CONDITION!
-static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
+static void error_unless(jl_codectx_t &ctx, Function *F, Value *cond, const Twine &msg)
 {
     ++EmittedConditionalErrors;
     BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
     BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
     ctx.builder.CreateCondBr(cond, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
-    just_emit_error(ctx, prepare_call(jlerror_func), msg);
+    just_emit_error(ctx, F, msg);
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
+static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg)
+{
+    error_unless(ctx, prepare_call(jlerror_func), cond, msg);
+}
+
 static void raise_exception(jl_codectx_t &ctx, Value *exc,
                             BasicBlock *contBB=nullptr)
 {
@@ -1340,7 +1576,7 @@ static void raise_exception(jl_codectx_t &ctx, Value *exc,
         contBB = BasicBlock::Create(ctx.builder.getContext(), "after_throw", ctx.f);
     }
     else {
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
     }
     ctx.builder.SetInsertPoint(contBB);
 }
@@ -1356,16 +1592,31 @@ static void raise_exception_unless(jl_codectx_t &ctx, Value *cond, Value *exc)
     raise_exception(ctx, exc, passBB);
 }
 
+static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name, jl_value_t *scope)
+{
+    ++EmittedUndefVarErrors;
+    BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
+    BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
+    ctx.builder.CreateCondBr(ok, ifok, err);
+    ctx.builder.SetInsertPoint(err);
+    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func), {
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)),
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, scope))});
+    ctx.builder.CreateUnreachable();
+    ifok->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(ifok);
+}
+
 static Value *null_pointer_cmp(jl_codectx_t &ctx, Value *v)
 {
     ++EmittedNullchecks;
-    return ctx.builder.CreateICmpNE(v, Constant::getNullValue(v->getType()));
+    return ctx.builder.CreateIsNotNull(v);
 }
 
 
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
-static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck = nullptr)
+static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck)
 {
     if (nullcheck) {
         *nullcheck = v;
@@ -1375,13 +1626,34 @@ static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck =
             literal_pointer_val(ctx, jl_undefref_exception));
 }
 
+
+static void null_load_check(jl_codectx_t &ctx, Value *v, jl_module_t *scope, jl_sym_t *name)
+{
+    Value *notnull = null_pointer_cmp(ctx, v);
+    if (name && scope)
+        undef_var_error_ifnot(ctx, notnull, name, (jl_value_t*)scope);
+    else
+        raise_exception_unless(ctx, notnull, literal_pointer_val(ctx, jl_undefref_exception));
+}
+
 template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+static void emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, MutableArrayRef<Value*> defval, Func &&func)
 {
+    if (ifnot == nullptr) {
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
+    }
     if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
         if (Cond->isZero())
-            return defval;
-        return func();
+            return;
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
     }
     ++EmittedGuards;
     BasicBlock *currBB = ctx.builder.GetInsertBlock();
@@ -1390,16 +1662,33 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval,
     ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
     ctx.builder.SetInsertPoint(passBB);
     auto res = func();
+    assert(res.size() == defval.size());
     passBB = ctx.builder.GetInsertBlock();
     ctx.builder.CreateBr(exitBB);
     ctx.builder.SetInsertPoint(exitBB);
-    if (defval == nullptr)
+    for (size_t i = 0; i < defval.size(); i++) {
+        PHINode *phi = ctx.builder.CreatePHI(defval[i]->getType(), 2);
+        phi->addIncoming(defval[i], currBB);
+        phi->addIncoming(res[i], passBB);
+        setName(ctx.emission_context, phi, "guard_res");
+        defval[i] = phi;
+    }
+}
+
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+{
+    MutableArrayRef res(&defval, defval == nullptr ? 0 : 1);
+    auto funcwrap = [&func] () -> SmallVector<Value*,1> {
+        auto res = func();
+        if (res == nullptr)
+            return {};
+        return {res};
+    };
+    emit_guarded_test(ctx, ifnot, res, funcwrap);
+    if (res.empty())
         return nullptr;
-    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
-    phi->addIncoming(defval, currBB);
-    phi->addIncoming(res, passBB);
-    setName(ctx.emission_context, phi, "guard_res");
-    return phi;
+    return res[0];
 }
 
 template<typename Func>
@@ -1456,8 +1745,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
             // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
             // and we need to use it as an index to get the real object now
             Module *M = jl_Module;
-            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jlsmall_typeof_var), tag);
-            smallp = ctx.builder.CreateBitCast(smallp, typetag->getType()->getPointerTo(0));
+            Value *smallp = emit_ptrgep(ctx, prepare_global_in(M, jl_small_typeof_var), tag);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
             small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
@@ -1470,14 +1758,14 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
 
 static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
 
-static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
     ctx.builder.CreateCall(prepare_call(jltypeerror_func),
                        { msg_val, maybe_decay_untracked(ctx, type), mark_callee_rooted(ctx, boxed(ctx, x))});
 }
 
-static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
 {
     just_emit_type_error(ctx, x, type, msg);
     ctx.builder.CreateUnreachable();
@@ -1516,28 +1804,37 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
 }
 
 // a simple case of emit_isa that is obvious not to include a safe-point
-static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt)
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt, bool could_be_null=false)
 {
-    assert(jl_is_concrete_type((jl_value_t*)dt));
+    assert(jl_is_concrete_type((jl_value_t*)dt) || is_uniquerep_Type((jl_value_t*)dt));
     if (arg.TIndex) {
         unsigned tindex = get_box_tindex(dt, arg.typ);
         if (tindex > 0) {
             // optimize more when we know that this is a split union-type where tindex = 0 is invalid
-            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
             auto isa = ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex));
             setName(ctx.emission_context, isa, "exactly_isa");
             return isa;
         }
         else if (arg.Vboxed) {
-            // test for (arg.TIndex == 0x80 && typeof(arg.V) == type)
-            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            // test for (arg.TIndex == UNION_BOX_MARKER && typeof(arg.V) == type)
+            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
+            if (could_be_null) {
+                isboxed = ctx.builder.CreateAnd(isboxed,
+                    ctx.builder.CreateNot(null_pointer_cmp(ctx, arg.Vboxed)));
+            }
             setName(ctx.emission_context, isboxed, "isboxed");
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
             BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
             ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
-            Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            Value *istype_boxed = NULL;
+            if (is_uniquerep_Type((jl_value_t*)dt)) {
+                istype_boxed = ctx.builder.CreateICmpEQ(decay_derived(ctx, arg.Vboxed), decay_derived(ctx, literal_pointer_val(ctx, jl_tparam0(dt))));
+            } else {
+                istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            }
             ctx.builder.CreateBr(postBB);
             isaBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(postBB);
@@ -1551,13 +1848,20 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data
             return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         }
     }
-    auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
-    setName(ctx.emission_context, isa, "exactly_isa");
-    return isa;
+    Value *isnull = NULL;
+    if (could_be_null && arg.isboxed) {
+        isnull = null_pointer_cmp(ctx, arg.Vboxed);
+    }
+    Constant *Vfalse = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+    return emit_guarded_test(ctx, isnull, Vfalse, [&]{
+        auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
+        setName(ctx.emission_context, isa, "exactly_isa");
+        return isa;
+    });
 }
 
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
-                                        jl_value_t *type, const std::string *msg);
+                                        jl_value_t *type, const Twine &msg);
 
 static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type,
                            SmallVectorImpl<std::pair<std::pair<BasicBlock*,BasicBlock*>,Value*>> &bbs)
@@ -1569,7 +1873,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         return;
     }
     BasicBlock *enter = ctx.builder.GetInsertBlock();
-    Value *v = emit_isa(ctx, x, type, nullptr).first;
+    Value *v = emit_isa(ctx, x, type, Twine()).first;
     BasicBlock *exit = ctx.builder.GetInsertBlock();
     bbs.emplace_back(std::make_pair(enter, exit), v);
     BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
@@ -1577,7 +1881,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 }
 
 // Should agree with `_can_optimize_isa` above
-static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string *msg)
+static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     ++EmittedIsa;
     // TODO: The subtype check below suffers from incorrectness issues due to broken
@@ -1585,7 +1889,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     // actual `isa` calls, this optimization should already have been performed upstream
     // anyway, but having this optimization in codegen might still be beneficial for
     // `typeassert`s if we can make it correct.
-    Optional<bool> known_isa;
+    std::optional<bool> known_isa;
     jl_value_t *intersected_type = type;
     if (x.constant)
         known_isa = jl_isa(x.constant, type);
@@ -1596,9 +1900,11 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         if (intersected_type == (jl_value_t*)jl_bottom_type)
             known_isa = false;
     }
+    if (intersected_type == (jl_value_t*)jl_typeofbottom_type->super)
+        intersected_type = (jl_value_t*)jl_typeofbottom_type; // swap abstract Type{Union{}} for concrete typeof(Union{})
     if (known_isa) {
-        if (!*known_isa && msg) {
-            emit_type_error(ctx, x, literal_pointer_val(ctx, type), *msg);
+        if (!*known_isa && !msg.isTriviallyEmpty()) {
+            emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         }
         return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), *known_isa), true);
     }
@@ -1630,7 +1936,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     if (jl_has_intersect_type_not_kind(type) || jl_has_intersect_type_not_kind(intersected_type)) {
         Value *vx = boxed(ctx, x);
         Value *vtyp = track_pjlvalue(ctx, literal_pointer_val(ctx, type));
-        if (msg && *msg == "typeassert") {
+        if (msg.isSingleStringRef() && msg.getSingleStringRef() == "typeassert") {
             ctx.builder.CreateCall(prepare_call(jltypeassert_func), { vx, vtyp });
             return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1), true);
         }
@@ -1690,17 +1996,17 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
 // declare that the pointer is legal (for zero bytes) even though it might be undef.
 static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
-    return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] {
-        return emit_isa(ctx, val, typ, nullptr).first;
+    return emit_nullcheck_guard(ctx, val.inline_roots.empty() && val.ispointer() ? val.V : nullptr, [&] {
+        return emit_isa(ctx, val, typ, Twine()).first;
     });
 }
 
 
-static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string &msg)
+static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     Value *istype;
     bool handled_msg;
-    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, &msg);
+    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, msg);
     if (!handled_msg) {
         ++EmittedTypechecks;
         BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
@@ -1711,7 +2017,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         just_emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         ctx.builder.CreateUnreachable();
 
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
 }
@@ -1719,7 +2025,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isconcrete = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
     isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
@@ -1728,7 +2034,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
     return isconcrete;
 }
 
-static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const std::string &msg)
+static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const Twine &msg)
 {
     ++EmittedConcretechecks;
     assert(typ->getType() == ctx.types().T_prjlvalue);
@@ -1754,7 +2060,6 @@ static bool bounds_check_enabled(jl_codectx_t &ctx, jl_value_t *inbounds) {
 static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_value_t *boundscheck)
 {
     Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
-#if CHECK_BOUNDS==1
     if (bounds_check_enabled(ctx, boundscheck)) {
         ++EmittedBoundschecks;
         Value *ok = ctx.builder.CreateICmpULT(im1, len);
@@ -1772,7 +2077,10 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
         else { // unboxed jl_value_t*
             Value *a = ainfo.V;
             if (ainfo.isghost) {
-                a = Constant::getNullValue(getInt8PtrTy(ctx.builder.getContext()));
+                a = Constant::getNullValue(getPointerTy(ctx.builder.getContext()));
+            }
+            else if (!ainfo.inline_roots.empty()) {
+                a = value_to_pointer(ctx, ainfo).V;
             }
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
@@ -1782,26 +2090,32 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
                 a = tempSpace;
             }
             ctx.builder.CreateCall(prepare_call(jluboundserror_func), {
-                    emit_bitcast(ctx, decay_derived(ctx, a), getInt8PtrTy(ctx.builder.getContext())),
+                    decay_derived(ctx, a),
                     literal_pointer_val(ctx, ty),
                     i });
         }
         ctx.builder.CreateUnreachable();
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
-#endif
     return im1;
 }
 
-static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+static Value *CreateSimplifiedExtractValue(jl_codectx_t &ctx, Value *Agg, ArrayRef<unsigned> Idxs)
+{
+    // aka IRBuilder<InstSimplifyFolder>
+    SimplifyQuery SQ(jl_Module->getDataLayout()); // not actually used, but required by API
+    if (Value *Inst = simplifyExtractValueInst(Agg, Idxs, SQ))
+        return Inst;
+    return ctx.builder.CreateExtractValue(Agg, Idxs);
+}
 
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x);
 
-std::vector<unsigned> first_ptr(Type *T)
+SmallVector<unsigned, 0> first_ptr(Type *T)
 {
     if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         if (!isa<StructType>(T)) {
@@ -1819,7 +2133,7 @@ std::vector<unsigned> first_ptr(Type *T)
         unsigned i = 0;
         for (Type *ElTy : T->subtypes()) {
             if (isa<PointerType>(ElTy) && ElTy->getPointerAddressSpace() == AddressSpace::Tracked) {
-                return std::vector<unsigned>{i};
+                return SmallVector<unsigned, 0>{i};
             }
             auto path = first_ptr(ElTy);
             if (!path.empty()) {
@@ -1837,23 +2151,23 @@ Value *extract_first_ptr(jl_codectx_t &ctx, Value *V)
     if (path.empty())
         return NULL;
     std::reverse(std::begin(path), std::end(path));
-    return ctx.builder.CreateExtractValue(V, path);
+    return CreateSimplifiedExtractValue(ctx, V, path);
 }
 
 
 static void emit_lockstate_value(jl_codectx_t &ctx, Value *strct, bool newstate)
 {
     ++EmittedLockstates;
-    Value *v = mark_callee_rooted(ctx, strct);
-    ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
-}
-static void emit_lockstate_value(jl_codectx_t &ctx, const jl_cgval_t &strct, bool newstate)
-{
-    assert(strct.isboxed);
-    emit_lockstate_value(ctx, boxed(ctx, strct), newstate);
+    if (strct->getType()->getPointerAddressSpace() == AddressSpace::Loaded) {
+        Value *v = strct;
+        ctx.builder.CreateCall(prepare_call(newstate ? jllockfield_func : jlunlockfield_func), v);
+    }
+    else {
+        Value *v = mark_callee_rooted(ctx, strct);
+        ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
+    }
 }
 
-
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype,
@@ -1861,50 +2175,64 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
                              bool maybe_null_if_boxed = true, unsigned alignment = 0,
                              Value **nullcheck = nullptr)
 {
-    // TODO: we should use unordered loads for anything with CountTrackedPointers(elty).count > 0 (if not otherwise locked)
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
-    if (type_is_ghost(elty))
+    if (type_is_ghost(elty)) {
+        if (isStrongerThanMonotonic(Order))
+            ctx.builder.CreateFence(Order);
         return ghostValue(ctx, jltype);
+    }
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
+    if (idx_0based)
+        ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based);
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type
     AllocaInst *intcast = NULL;
-    if (Order == AtomicOrdering::NotAtomic) {
-        if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) {
-            intcast = emit_static_alloca(ctx, elty);
-            setName(ctx.emission_context, intcast, "aggregate_load_box");
+    if (Order == AtomicOrdering::NotAtomic && !isboxed && !aliasscope && elty->isAggregateType() && !jl_is_genericmemoryref_type(jltype)) {
+        // use split_value to do this load
+        auto src = mark_julia_slot(ptr, jltype, NULL, tbaa);
+        auto copy = split_value(ctx, src, Align(alignment));
+        if (maybe_null_if_boxed && !copy.second.empty()) {
+            null_pointer_check(ctx, copy.second[0], nullcheck);
         }
+        return mark_julia_slot(copy.first, jltype, NULL, ctx.tbaa().tbaa_stack, copy.second);
     }
-    else {
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic) {
         if (!isboxed && !elty->isIntOrPtrTy()) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_load_box");
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+            realelty = elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+        }
+        if (isa<IntegerType>(elty)) {
+            unsigned nb2 = PowerOf2Ceil(nb);
+            if (nb != nb2)
+                elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
         }
     }
-    Type *realelty = elty;
-    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
-        unsigned nb2 = PowerOf2Ceil(nb);
-        if (nb != nb2)
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
-    }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    Value *data;
-    if (ptr->getType() != ptrty)
-        data = emit_bitcast(ctx, ptr, ptrty);
-    else
-        data = ptr;
-    if (idx_0based)
-        data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
     Value *instr = nullptr;
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
-    if (intcast && Order == AtomicOrdering::NotAtomic) {
-        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment);
+    if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
+        // load these FCA as individual fields, so LLVM does not need to split them later
+        Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0);
+        LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
+        load0->setOrdering(Order);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+        ai.decorateInst(load0);
+        Value *fld1 = ctx.builder.CreateStructGEP(elty, ptr, 1);
+        LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false);
+        static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+        maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*));
+        load1->setOrdering(Order);
+        ai.decorateInst(load1);
+        instr = Constant::getNullValue(elty);
+        instr = ctx.builder.CreateInsertValue(instr, load0, 0);
+        instr = ctx.builder.CreateInsertValue(instr, load1, 1);
     }
     else {
-        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment), false);
         load->setOrdering(Order);
         if (isboxed)
             maybe_mark_load_dereferenceable(load, true, jltype);
@@ -1912,16 +2240,16 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         ai.scope = MDNode::concatenate(aliasscope, ai.scope);
         ai.decorateInst(load);
         instr = load;
-        if (elty != realelty)
-            instr = ctx.builder.CreateTrunc(instr, realelty);
-        if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
-            instr = nullptr;
-        }
+    }
+    if (elty != realelty)
+        instr = ctx.builder.CreateTrunc(instr, realelty);
+    if (intcast) {
+        ctx.builder.CreateAlignedStore(instr, intcast, Align(alignment));
+        instr = nullptr;
     }
     if (maybe_null_if_boxed) {
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
         if (first_ptr)
             null_pointer_check(ctx, first_ptr, nullcheck);
@@ -1934,7 +2262,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         instr = ctx.builder.CreateTrunc(instr, getInt1Ty(ctx.builder.getContext()));
     }
     if (instr)
@@ -1944,20 +2272,27 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
 }
 
 static jl_cgval_t typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
+        Value *ptr, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
-        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const Twine &fname,
+        jl_module_t *mod, jl_sym_t *var)
 {
     auto newval = [&](const jl_cgval_t &lhs) {
         const jl_cgval_t argv[3] = { cmp, lhs, rhs };
         jl_cgval_t ret;
         if (modifyop) {
-            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type, nullptr);
         }
         else {
+            if (trim_may_error(ctx.params->trim)) {
+                // if we know the return type, we can assume the result is of that type
+                errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                print_stacktrace(ctx, ctx.params->trim);
+            }
             Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
             ret = mark_julia_type(ctx, callval, true, jl_any_type);
         }
@@ -1965,9 +2300,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ret = update_julia_type(ctx, ret, jltype);
         return ret;
     };
-    assert(!needlock || parent != nullptr);
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
-    if (type_is_ghost(elty)) {
+    if (type_is_ghost(elty) ||
+            (issetfieldonce && !maybe_null_if_boxed) ||
+            (issetfieldonce && !isboxed && !jl_type_hasptr(jltype))) {
         if (isStrongerThanMonotonic(Order))
             ctx.builder.CreateFence(Order);
         if (issetfield) {
@@ -1983,21 +2323,32 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         else if (isswapfield) {
             return ghostValue(ctx, jltype);
         }
-        else { // modifyfield
+        else if (ismodifyfield) {
             jl_cgval_t oldval = ghostValue(ctx, jltype);
             const jl_cgval_t argv[2] = { oldval, newval(oldval) };
             jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
             return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
+        else { // issetfieldonce
+            return mark_julia_const(ctx, jl_false);
+        }
     }
+    // if FailOrder was inherited from Order, may need to remove Load-only effects now
+    if (FailOrder == AtomicOrdering::AcquireRelease)
+        FailOrder = AtomicOrdering::Acquire;
+    if (FailOrder == AtomicOrdering::Release)
+        FailOrder = AtomicOrdering::Monotonic;
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     AllocaInst *intcast = nullptr;
+    Type *intcast_eltyp = nullptr;
+    bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        intcast_eltyp = elty;
+        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         if (!issetfield) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_store_box");
         }
-        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
     }
     Type *realelty = elty;
     if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
@@ -2006,35 +2357,33 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
     }
     Value *r = nullptr;
-    if (issetfield || isswapfield || isreplacefield)  {
-        if (isboxed)
+    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  { // e.g. !ismodifyfield
+        assert(isboxed || rhs.typ == jltype);
+        if (isboxed) {
             r = boxed(ctx, rhs);
-        else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+        }
+        else if (intcast) {
+            emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+            r = ctx.builder.CreateLoad(realelty, intcast);
+        }
+        else if (aliasscope || Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
             r = emit_unbox(ctx, realelty, rhs, jltype);
-            if (realelty != elty)
-                r = ctx.builder.CreateZExt(r, elty);
         }
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
     }
-    Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
-    if (ptr->getType() != ptrty)
-        ptr = ctx.builder.CreateBitCast(ptr, ptrty);
-    if (idx_0based)
-        ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based);
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
     Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
     BasicBlock *DoneBB = nullptr;
     if (needlock)
-        emit_lockstate_value(ctx, parent, true);
+        emit_lockstate_value(ctx, needlock, true);
     jl_cgval_t oldval = rhs;
+    // TODO: we should do Release ordering for anything with CountTrackedPointers(elty).count > 0, instead of just isboxed
     if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
         if (isswapfield) {
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-            setName(ctx.emission_context, load, "swapfield_load");
+            setName(ctx.emission_context, load, "swap_load");
             if (isboxed)
                 load->setOrdering(AtomicOrdering::Unordered);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2052,20 +2401,22 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
-            emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+            emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
         }
     }
-    else if (isswapfield && isStrongerThanMonotonic(Order)) {
+    else if (isswapfield) {
+        if (Order == AtomicOrdering::Unordered)
+            Order = AtomicOrdering::Monotonic;
         assert(Order != AtomicOrdering::NotAtomic && r);
         auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-        setName(ctx.emission_context, store, "swapfield_atomicrmw");
+        setName(ctx.emission_context, store, "swap_atomicrmw");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
         ai.decorateInst(store);
         instr = store;
     }
     else {
-        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+        // replacefield, modifyfield, swapfield, setfieldonce (isboxed && atomic)
         DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
         bool needloop;
         PHINode *Succ = nullptr, *Current = nullptr;
@@ -2075,15 +2426,16 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else if (!isboxed) {
                 assert(jl_is_concrete_type(jltype));
-                needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
-                Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
+                needloop = ((jl_datatype_t*)jltype)->layout->flags.haspadding ||
+                          !((jl_datatype_t*)jltype)->layout->flags.isbitsegal;
+                Value *SameType = emit_isa(ctx, cmp, jltype, Twine()).first;
                 if (SameType != ConstantInt::getTrue(ctx.builder.getContext())) {
                     BasicBlock *SkipBB = BasicBlock::Create(ctx.builder.getContext(), "skip_xchg", ctx.f);
                     BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "ok_xchg", ctx.f);
                     ctx.builder.CreateCondBr(SameType, BB, SkipBB);
                     ctx.builder.SetInsertPoint(SkipBB);
                     LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-                    setName(ctx.emission_context, load, "atomic_replacefield_initial");
+                    setName(ctx.emission_context, load, "atomic_replace_initial");
                     load->setOrdering(FailOrder == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Monotonic : FailOrder);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
                     ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
@@ -2096,7 +2448,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
                 }
-                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                cmp = update_julia_type(ctx, cmp, jltype);
+                if (intcast) {
+                    emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                    Compare = ctx.builder.CreateLoad(realelty, intcast);
+                }
+                else {
+                    Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                }
                 if (realelty != elty)
                     Compare = ctx.builder.CreateZExt(Compare, elty);
             }
@@ -2111,6 +2470,11 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 needloop = true;
             }
         }
+        else if (issetfieldonce) {
+            needloop = !isboxed && Order != AtomicOrdering::NotAtomic && nb > sizeof(void*);
+            if (Order != AtomicOrdering::NotAtomic)
+                Compare = Constant::getNullValue(elty);
+        }
         else { // swap or modify
             LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             Current->setOrdering(Order == AtomicOrdering::NotAtomic && !isboxed ? Order : AtomicOrdering::Monotonic);
@@ -2133,21 +2497,22 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         if (ismodifyfield) {
             if (needlock)
-                emit_lockstate_value(ctx, parent, false);
+                emit_lockstate_value(ctx, needlock, false);
             Value *realCompare = Compare;
             if (realelty != elty)
                 realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
             if (intcast) {
-                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
-                if (maybe_null_if_boxed)
-                    realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                assert(!isboxed);
+                ctx.builder.CreateStore(realCompare, intcast);
+                if (tracked_pointers)
+                    realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
             }
-            if (maybe_null_if_boxed) {
-                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
-                if (first_ptr)
-                    null_pointer_check(ctx, first_ptr, nullptr);
+            if (maybe_null_if_boxed && tracked_pointers) {
+                Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
+                assert(first_ptr);
+                null_load_check(ctx, first_ptr, mod, var);
             }
-            if (intcast)
+            if (intcast && !tracked_pointers)
                 oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
             else
                 oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
@@ -2155,18 +2520,24 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (isboxed) {
                 r = boxed(ctx, rhs);
             }
-            else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+            else if (intcast) {
+                emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                r = ctx.builder.CreateLoad(realelty, intcast);
+                if (!tracked_pointers) // oldval is a slot, so put the oldval back
+                    ctx.builder.CreateStore(realCompare, intcast);
+            }
+            else if (Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
                 r = emit_unbox(ctx, realelty, rhs, jltype);
-                if (realelty != elty)
-                    r = ctx.builder.CreateZExt(r, elty);
             }
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
             if (needlock)
-                emit_lockstate_value(ctx, parent, true);
+                emit_lockstate_value(ctx, needlock, true);
             cmp = oldval;
         }
         Value *Done;
         if (Order == AtomicOrdering::NotAtomic) {
-            // modifyfield or replacefield
+            // modifyfield or replacefield or setfieldonce
             assert(elty == realelty && !intcast);
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2178,9 +2549,11 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (maybe_null_if_boxed && !ismodifyfield)
                 first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
             oldval = mark_julia_type(ctx, load, isboxed, jltype);
-            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
-                return emit_f_is(ctx, oldval, cmp);
-            });
+            assert(!issetfieldonce || first_ptr != nullptr);
+            if (issetfieldonce)
+                Success = ctx.builder.CreateIsNull(first_ptr);
+            else
+                Success = emit_f_is(ctx, oldval, cmp, first_ptr, nullptr);
             if (needloop && ismodifyfield)
                 CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
             assert(Succ == nullptr);
@@ -2195,18 +2568,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else {
                 assert(!isboxed && rhs.typ == jltype);
-                emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+                emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
             }
             ctx.builder.CreateBr(DoneBB);
             instr = load;
         }
-        else {
+        else { // something atomic
             assert(r);
             if (Order == AtomicOrdering::Unordered)
                 Order = AtomicOrdering::Monotonic;
             if (Order == AtomicOrdering::Monotonic && isboxed)
                 Order = AtomicOrdering::Release;
-            if (!isreplacefield)
+            if (!isreplacefield && !issetfieldonce)
                 FailOrder = AtomicOrdering::Monotonic;
             else if (FailOrder == AtomicOrdering::Unordered)
                 FailOrder = AtomicOrdering::Monotonic;
@@ -2217,28 +2590,36 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
             Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
             Done = Success;
-            if (isreplacefield && needloop) {
+            if ((isreplacefield || issetfieldonce) && needloop) {
                 Value *realinstr = instr;
                 if (realelty != elty)
                     realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
-                    ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
+                    ctx.builder.CreateStore(realinstr, intcast);
+                    // n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
                     oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
                     if (maybe_null_if_boxed)
-                        realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                        realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
                 }
                 else {
                     oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
                 }
-                Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
-                    Value *first_ptr = nullptr;
-                    if (maybe_null_if_boxed)
-                        first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
-                    return emit_nullcheck_guard(ctx, first_ptr, [&] {
-                        return emit_f_is(ctx, oldval, cmp);
+                if (issetfieldonce) {
+                    assert(!isboxed && maybe_null_if_boxed);
+                    Value *first_ptr = extract_first_ptr(ctx, realinstr);
+                    assert(first_ptr != nullptr);
+                    Done = ctx.builder.CreateIsNotNull(first_ptr);
+                }
+                else {
+                    // Done = !(!Success && (first_ptr != NULL && oldval == cmp))
+                    Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
+                        Value *first_ptr = nullptr;
+                        if (maybe_null_if_boxed)
+                            first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
+                        return emit_f_is(ctx, oldval, cmp, first_ptr, nullptr);
                     });
-                });
-                Done = ctx.builder.CreateNot(Done);
+                    Done = ctx.builder.CreateNot(Done);
+                }
             }
             if (needloop)
                 ctx.builder.CreateCondBr(Done, DoneBB, BB);
@@ -2257,22 +2638,32 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
     if (DoneBB)
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
-        emit_lockstate_value(ctx, parent, false);
-    if (parent != NULL) {
-        if (isreplacefield) {
-            // TODO: avoid this branch if we aren't making a write barrier
+        emit_lockstate_value(ctx, needlock, false);
+    if (parent != NULL && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
+        if (isreplacefield || issetfieldonce) {
             BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
             ctx.builder.CreateCondBr(Success, BB, DoneBB);
             ctx.builder.SetInsertPoint(BB);
         }
         if (r) {
+            if (realelty != elty)
+                r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
+            if (intcast) {
+                ctx.builder.CreateStore(r, intcast);
+                r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
+            }
             if (!isboxed)
                 emit_write_multibarrier(ctx, parent, r, rhs.typ);
-            else if (!type_is_permalloc(rhs.typ))
+            else
                 emit_write_barrier(ctx, parent, r);
         }
-        if (isreplacefield) {
+        else {
+            assert(!isboxed);
+            assert(!rhs.inline_roots.empty());
+            emit_write_multibarrier(ctx, parent, rhs);
+        }
+        if (isreplacefield || issetfieldonce) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
         }
@@ -2282,26 +2673,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
         oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
     }
+    else if (issetfieldonce) {
+        oldval = mark_julia_type(ctx, Success, false, jl_bool_type);
+    }
     else if (!issetfield) { // swapfield or replacefield
         if (realelty != elty)
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
-            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
-            instr = nullptr;
+            ctx.builder.CreateStore(instr, intcast);
+            if (tracked_pointers)
+                instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
         }
-        if (maybe_null_if_boxed) {
-            if (intcast)
-                instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        if (maybe_null_if_boxed && tracked_pointers) {
             Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-            if (first_ptr)
-                null_pointer_check(ctx, first_ptr, nullptr);
-            if (intcast && !first_ptr)
-                instr = nullptr;
+            assert(first_ptr);
+            null_load_check(ctx, first_ptr, mod, var);
         }
-        if (instr)
-            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-        else
+        if (intcast && !tracked_pointers)
             oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
+        else
+            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
             Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
             const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
@@ -2325,7 +2716,7 @@ static Value *julia_bool(jl_codectx_t &ctx, Value *cond)
 
 // --- accessing the representations of built-in data types ---
 
-static void emit_atomic_error(jl_codectx_t &ctx, const std::string &msg)
+static void emit_atomic_error(jl_codectx_t &ctx, const Twine &msg)
 {
     emit_error(ctx, prepare_call(jlatomicerror_func), msg);
 }
@@ -2334,6 +2725,32 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck=nullptr);
 
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt, size_t idx)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    if (idx < nfields - (unsigned)stt->name->n_uninitialized)
+        return false;
+    if (!jl_field_isptr(stt, idx) && !jl_type_hasptr(jl_field_type(stt, idx)))
+        return false;
+    if (strct.constant) {
+        if ((jl_is_immutable(stt) || jl_field_isconst(stt, idx)) && jl_field_isdefined(strct.constant, idx))
+            return false;
+    }
+    return true;
+}
+
+static bool field_may_be_null(const jl_cgval_t &strct, jl_datatype_t *stt)
+{
+    size_t nfields = jl_datatype_nfields(stt);
+    for (size_t i = 0; i < (unsigned)stt->name->n_uninitialized; i++) {
+        size_t idx = nfields - i - 1;
+        if (field_may_be_null(strct, stt, idx))
+            return true;
+    }
+    return false;
+}
+
+
 static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         jl_cgval_t *ret, jl_cgval_t strct,
         Value *idx, jl_datatype_t *stt, jl_value_t *inbounds,
@@ -2341,7 +2758,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
 {
     ++EmittedGetfieldUnknowns;
     size_t nfields = jl_datatype_nfields(stt);
-    bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
+    bool maybe_null = field_may_be_null(strct, stt);
     auto idx0 = [&]() {
         return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(ctx.types().T_size, nfields), inbounds);
     };
@@ -2360,7 +2777,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     }
     assert(!jl_is_vecelement_type((jl_value_t*)stt));
 
-    if (!strct.ispointer()) { // unboxed
+    if (strct.inline_roots.empty() && !strct.ispointer()) { // unboxed
         assert(jl_is_concrete_immutable((jl_value_t*)stt));
         bool isboxed = is_datatype_all_pointers(stt);
         jl_svec_t *types = stt->types;
@@ -2399,24 +2816,25 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             assert((cast<ArrayType>(strct.V->getType())->getElementType() == ctx.types().T_prjlvalue) == isboxed);
             Value *idx = idx0();
             unsigned i = 0;
-            Value *fld = ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i));
+            Value *fld = ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i));
             for (i = 1; i < nfields; i++) {
                 fld = ctx.builder.CreateSelect(
                         ctx.builder.CreateICmpEQ(idx, ConstantInt::get(idx->getType(), i)),
-                        ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)),
+                        ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i)),
                         fld);
             }
             setName(ctx.emission_context, fld, "getfield");
             jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type;
             if (isboxed && maybe_null)
-                null_pointer_check(ctx, fld);
+                null_pointer_check(ctx, fld, nullptr);
             *ret = mark_julia_type(ctx, fld, isboxed, jft);
             return true;
         }
     }
 
     bool maybeatomic = stt->name->atomicfields != NULL;
-    if (strct.ispointer() && !maybeatomic) { // boxed or stack
+    if ((strct.inline_roots.empty() && strct.ispointer()) && !maybeatomic) { // boxed or stack
+        // COMBAK: inline_roots support could be implemented for this
         if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
             emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
             *ret = jl_cgval_t(); // unreachable
@@ -2438,7 +2856,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             }
             Value *fldptr = ctx.builder.CreateInBoundsGEP(
                     ctx.types().T_prjlvalue,
-                    emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue),
+                    data_pointer(ctx, strct),
                     idx0());
             setName(ctx.emission_context, fldptr, "getfield_ptr");
             LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*)));
@@ -2448,7 +2866,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             ai.decorateInst(fld);
             maybe_mark_load_dereferenceable(fld, maybe_null, minimum_field_size, minimum_align);
             if (maybe_null)
-                null_pointer_check(ctx, fld);
+                null_pointer_check(ctx, fld, nullptr);
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
         }
@@ -2461,8 +2879,8 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             if (!stt->name->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
                                                  ((jl_datatype_t*)jft)->layout->npointers))) {
                 // just compute the pointer and let user load it when necessary
-                Type *fty = julia_type_to_llvm(ctx, jft);
-                Value *addr = ctx.builder.CreateInBoundsGEP(fty, emit_bitcast(ctx, ptr, PointerType::get(fty, 0)), idx);
+                Type *fty = julia_type_to_llvm(ctx, jft); //TODO: move this to a int8 GEP
+                Value *addr = ctx.builder.CreateInBoundsGEP(fty, ptr, idx);
                 *ret = mark_julia_slot(addr, jft, NULL, strct.tbaa);
                 return true;
             }
@@ -2492,27 +2910,150 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
     Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
     if (fsz > 0 && mutabl) {
         // move value to an immutable stack slot (excluding tindex)
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        AllocaInst *lv = emit_static_alloca(ctx, fsz, Align(al));
         setName(ctx.emission_context, lv, "immutable_union");
-        if (al > 1)
-            lv->setAlignment(Align(al));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-        emit_memcpy(ctx, lv, ai, addr, ai, fsz, al);
+        emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al));
         addr = lv;
     }
     return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
 }
 
+static bool isTBAA(MDNode *TBAA, std::initializer_list<const char*> const strset)
+{
+    if (!TBAA)
+        return false;
+    while (TBAA->getNumOperands() > 1) {
+        TBAA = cast<MDNode>(TBAA->getOperand(1).get());
+        auto str = cast<MDString>(TBAA->getOperand(0))->getString();
+        for (auto str2 : strset) {
+            if (str == str2) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+// Check if this is a load from an immutable value. The easiest
+// way to do so is to look at the tbaa and see if it derives from
+// jtbaa_immut.
+static bool isLoadFromImmut(LoadInst *LI)
+{
+    if (LI->getMetadata(LLVMContext::MD_invariant_load))
+        return true;
+    MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
+        return true;
+    return false;
+}
+
+static bool isConstGV(GlobalVariable *gv)
+{
+    return gv->isConstant() || gv->getMetadata("julia.constgv");
+}
+
+// Check if this is can be traced through constant loads to an constant global
+// or otherwise globally rooted value.
+// Almost all `tbaa_const` loads satisfies this with the exception of
+// task local constants which are constant as far as the code is concerned but aren't
+// global constants. For task local constant `task_local` will be true when this function
+// returns.
+// Unlike this function in llvm-late-gc-lowering, we do not examine PhiNode, as those are not emitted yet
+static bool isLoadFromConstGV(LoadInst *LI);
+static bool isLoadFromConstGV(Value *v)
+{
+    v = v->stripInBoundsOffsets();
+    if (auto LI = dyn_cast<LoadInst>(v))
+        return isLoadFromConstGV(LI);
+    if (auto gv = dyn_cast<GlobalVariable>(v))
+        return isConstGV(gv);
+    // null pointer
+    if (isa<ConstantData>(v))
+        return true;
+    // literal pointers
+    if (auto CE = dyn_cast<ConstantExpr>(v))
+        return (CE->getOpcode() == Instruction::IntToPtr &&
+                isa<ConstantData>(CE->getOperand(0)));
+    if (auto SL = dyn_cast<SelectInst>(v))
+        return (isLoadFromConstGV(SL->getTrueValue()) &&
+                isLoadFromConstGV(SL->getFalseValue()));
+    if (auto call = dyn_cast<CallInst>(v)) {
+        auto callee = call->getCalledFunction();
+        if (callee && callee->getName() == "julia.typeof") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.get_pgcstack") {
+            return true;
+        }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0)) &&
+                   isLoadFromConstGV(call->getArgOperand(1));
+        }
+    }
+    if (isa<Argument>(v)) {
+        return true;
+    }
+    return false;
+}
+
+// The white list implemented here and above in `isLoadFromConstGV(Value*)` should
+// cover all the cases we and LLVM generates.
+static bool isLoadFromConstGV(LoadInst *LI)
+{
+    // We only emit single slot GV in codegen
+    // but LLVM global merging can change the pointer operands to GEPs/bitcasts
+    auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
+    assert(load_base); // Static analyzer
+    auto gv = dyn_cast<GlobalVariable>(load_base);
+    if (isLoadFromImmut(LI)) {
+        if (gv)
+            return true;
+        return isLoadFromConstGV(load_base);
+    }
+    if (gv)
+        return isConstGV(gv);
+    return false;
+}
+
+
+static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_datatype_t *jt, unsigned idx, size_t byte_offset)
+{
+    auto tbaa = strct.tbaa;
+    if (tbaa == ctx.tbaa().tbaa_datatype)
+        if (byte_offset != offsetof(jl_datatype_t, types))
+            return ctx.tbaa().tbaa_const;
+    if (tbaa == ctx.tbaa().tbaa_array) {
+        if (jl_is_genericmemory_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_memorylen;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_memoryptr;
+        }
+        else if (jl_is_array_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_arrayptr;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_arraysize;
+        }
+    }
+    if (strct.V && jl_field_isconst(jt, idx) && isLoadFromConstGV(strct.V))
+        return ctx.tbaa().tbaa_const; //TODO: it seems odd to have a field with a tbaa that doesn't alias it's containing struct's tbaa
+                                      //Does the fact that this is marked as constant make this fine?
+    return tbaa;
+}
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     jl_value_t *jfty = jl_field_type(jt, idx);
     bool isatomic = jl_field_isatomic(jt, idx);
-    bool needlock = isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE;
     if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
         emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
         return jl_cgval_t(); // unreachable
@@ -2530,44 +3071,60 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
     }
     if (type_is_ghost(julia_type_to_llvm(ctx, jfty)))
         return ghostValue(ctx, jfty);
-    size_t nfields = jl_datatype_nfields(jt);
-    bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
+    Value *needlock = nullptr;
+    if (isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE) {
+        assert(strct.isboxed);
+        needlock = boxed(ctx, strct);
+    }
+    bool maybe_null = field_may_be_null(strct, jt, idx);
     size_t byte_offset = jl_field_offset(jt, idx);
-    auto tbaa = strct.tbaa;
-    if (tbaa == ctx.tbaa().tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
-        tbaa = ctx.tbaa().tbaa_const;
-    if (strct.ispointer()) {
-        Value *staddr = data_pointer(ctx, strct);
-        bool isboxed;
-        Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed);
-        Value *addr;
-        if (isboxed) {
-            // byte_offset == 0 is an important special case here, e.g.
-            // for single field wrapper types. Introducing the bitcast
-            // can pessimize mem2reg
-            if (byte_offset > 0) {
-                addr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()),
-                        emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(ctx.types().T_size, byte_offset));
-            }
-            else {
-                addr = staddr;
-            }
+    if (!strct.inline_roots.empty()) {
+        assert(!isatomic && !needlock);
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
+        auto offsets = split_value_field(jt, idx);
+        bool hasptr = offsets.second >= 0;
+        assert(hasptr == jl_field_isptr(jt, idx) || jl_type_hasptr(jfty));
+        ArrayRef<Value*> roots;
+        if (hasptr) {
+            roots = ArrayRef(strct.inline_roots).slice(offsets.second, jl_field_isptr(jt, idx) ? 1 : ((jl_datatype_t*)jfty)->layout->npointers);
+            if (maybe_null)
+                null_pointer_check(ctx, roots[0], nullcheck);
+        }
+        if (jl_field_isptr(jt, idx)) {
+            return mark_julia_type(ctx, roots[0], true, jfty);
+        }
+        Value *addr = offsets.first < 0 ? nullptr : offsets.first == 0 ? strct.V : emit_ptrgep(ctx, strct.V, offsets.first);
+        if (jl_is_uniontype(jfty)) {
+            size_t fsz = 0, al = 0;
+            int union_max = jl_islayout_inline(jfty, &fsz, &al);
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
+            bool isptr = (union_max == 0);
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, false, union_max, strct.tbaa);
+        }
+        else if (jfty == (jl_value_t*)jl_bool_type) {
+            unsigned align = jl_field_align(jt, idx);
+            return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
+                    AtomicOrdering::NotAtomic, maybe_null, align, nullcheck);
         }
         else {
-            staddr = maybe_bitcast(ctx, staddr, lt->getPointerTo());
-            if (jl_is_vecelement_type((jl_value_t*)jt))
-                addr = staddr; // VecElement types are unwrapped in LLVM.
-            else if (isa<StructType>(lt))
-                addr = emit_struct_gep(ctx, lt, staddr, byte_offset);
-            else
-                addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
+            return mark_julia_slot(addr, jfty, nullptr, tbaa, roots);
         }
+    }
+    else if (strct.ispointer()) {
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
+        Value *staddr = data_pointer(ctx, strct);
+        Value *addr;
+        if (jl_is_vecelement_type((jl_value_t*)jt) || byte_offset == 0)
+            addr = staddr; // VecElement types are unwrapped in LLVM.
+        else
+            addr = emit_ptrgep(ctx, staddr, byte_offset);
+        if (addr != staddr)
+            setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
         if (jl_field_isptr(jt, idx)) {
-            setName(ctx.emission_context, addr, "getfield_addr");
-            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
-            setName(ctx.emission_context, Load, "getfield");
+            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
+            setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine());
             Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -2580,16 +3137,14 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             size_t fsz = 0, al = 0;
             int union_max = jl_islayout_inline(jfty, &fsz, &al);
             bool isptr = (union_max == 0);
-            assert(!isptr && fsz == jl_field_size(jt, idx) - 1); (void)isptr;
-            Value *ptindex;
-            if (isboxed) {
-                ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz);
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
+            Value *ptindex = emit_ptrgep(ctx, staddr, byte_offset + fsz1);
+            auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, strct.tbaa);
+            if (val.V && val.V != addr) {
+                setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine());
             }
-            else {
-                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
-            }
-            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
+            return val;
         }
         assert(jl_is_concrete_type(jfty));
         if (jl_field_isconst(jt, idx) && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
@@ -2599,12 +3154,15 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         unsigned align = jl_field_align(jt, idx);
         if (needlock)
-            emit_lockstate_value(ctx, strct, true);
+            emit_lockstate_value(ctx, needlock, true);
         jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
                 needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order),
                 maybe_null, align, nullcheck);
+        if (ret.V) {
+            setNameWithField(ctx.emission_context, ret.V, get_objname, jt, idx, Twine());
+        }
         if (needlock)
-            emit_lockstate_value(ctx, strct, false);
+            emit_lockstate_value(ctx, needlock, false);
         return ret;
     }
     else if (isa<UndefValue>(strct.V)) {
@@ -2620,6 +3178,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         else if (isa<VectorType>(T)) {
             fldv = ctx.builder.CreateExtractElement(obj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         else if (!jl_field_isptr(jt, idx) && jl_is_uniontype(jfty)) {
             int fsz = jl_field_size(jt, idx) - 1;
@@ -2629,30 +3188,29 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 unsigned st_idx = convert_struct_offset(ctx, T, byte_offset);
                 IntegerType *ET = cast<IntegerType>(T->getStructElementType(st_idx));
                 unsigned align = (ET->getBitWidth() + 7) / 8;
-                lv = emit_static_alloca(ctx, ET);
-                setName(ctx.emission_context, lv, "union_split");
-                lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align));
+                lv = emit_static_alloca(ctx, fsz, Align(align));
                 // emit all of the align-sized words
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
                     unsigned fld = st_idx + i;
-                    Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(fld));
-                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                    Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(fld));
+                    Value *fldp = emit_ptrgep(ctx, lv, i * align);
                     ctx.builder.CreateAlignedStore(fldv, fldp, Align(align));
                 }
                 // emit remaining bytes up to tindex
                 if (i < ptindex - st_idx) {
-                    Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                    staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                    Value *staddr = emit_ptrgep(ctx, lv, i * align);
                     for (; i < ptindex - st_idx; i++) {
-                        Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx + i));
-                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                        Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx + i));
+                        Value *fldp = emit_ptrgep(ctx, staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
+                setNameWithField(ctx.emission_context, lv, get_objname, jt, idx, Twine());
             }
-            Value *tindex0 = ctx.builder.CreateExtractValue(obj, makeArrayRef(ptindex));
+            Value *tindex0 = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(ptindex));
             Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
+            setNameWithField(ctx.emission_context, tindex, get_objname, jt, idx, Twine(".tindex"));
             return mark_julia_slot(lv, jfty, tindex, ctx.tbaa().tbaa_stack);
         }
         else {
@@ -2663,7 +3221,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 st_idx = convert_struct_offset(ctx, T, byte_offset);
             else
                 llvm_unreachable("encountered incompatible type for a struct");
-            fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx));
+            fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         if (maybe_null) {
             Value *first_ptr = jl_field_isptr(jt, idx) ? fldv : extract_first_ptr(ctx, fldv);
@@ -2694,394 +3253,124 @@ static Value *emit_n_varargs(jl_codectx_t &ctx)
 #endif
 }
 
-static bool arraytype_constdim(jl_value_t *ty, size_t *dim)
+static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *typ, bool add_isunion)
 {
-    if (jl_is_array_type(ty) && jl_is_long(jl_tparam1(ty))) {
-        *dim = jl_unbox_long(jl_tparam1(ty));
-        return true;
+    ++EmittedArrayElsize;
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        size_t sz = sty->layout->size;
+        if (sty->layout->flags.arrayelem_isunion)
+            sz++;
+        auto elsize = ConstantInt::get(ctx.types().T_size, sz);
+        return elsize;
+    }
+    else {
+        Value *t = emit_typeof(ctx, v, false, false, true);
+        Value *elsize = emit_datatype_size(ctx, t, add_isunion);
+        elsize = ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        setName(ctx.emission_context, elsize, "elsize");
+        return elsize;
     }
-    return false;
-}
-
-static bool arraytype_constshape(jl_value_t *ty)
-{
-    size_t dim;
-    if (!arraytype_constdim(ty, &dim))
-        return false;
-    return dim != 1;
 }
 
-static bool arraytype_constelsize(jl_datatype_t *ty, size_t *elsz)
+static ssize_t genericmemoryype_constelsize(jl_value_t *typ)
 {
-    assert(jl_is_array_type(ty));
-    jl_value_t *ety = jl_tparam0(ty);
-    if (jl_has_free_typevars(ety))
-        return false;
-    // `jl_islayout_inline` requires `*elsz` and `al` to be initialized.
-    size_t al = 0;
-    *elsz = 0;
-    int union_max = jl_islayout_inline(ety, elsz, &al);
-    bool isboxed = (union_max == 0);
-    if (isboxed) {
-        *elsz = sizeof(void*);
-    }
-    else if (jl_is_primitivetype(ety)) {
-        // Primitive types should use the array element size, but
-        // this can be different from the type's size
-        *elsz = LLT_ALIGN(*elsz, al);
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_array_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 0);
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        return sty->layout->size;
     }
-    return true;
+    return -1;
 }
 
-static intptr_t arraytype_maxsize(jl_value_t *ty)
+static intptr_t genericmemoryype_maxsize(jl_value_t *ty) // the maxsize is strictly less than the return value
 {
-    if (!jl_is_array_type(ty))
-        return INTPTR_MAX;
-    size_t elsz;
-    if (arraytype_constelsize((jl_datatype_t*)ty, &elsz) || elsz == 0)
+    ssize_t elsz = genericmemoryype_constelsize(ty);
+    if (elsz <= 1)
         return INTPTR_MAX;
     return INTPTR_MAX / elsz;
 }
 
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo);
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *dim)
+static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *typ)
 {
-    size_t ndim;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraysize;
-    if (arraytype_constdim(tinfo.typ, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(ctx.types().T_size, 1);
-        if (ndim == 1) {
-            if (auto d = dyn_cast<ConstantInt>(dim)) {
-                if (d->getZExtValue() == 1) {
-                    return emit_arraylen(ctx, tinfo);
-                }
-            }
-        }
-        if (ndim > 1) {
-            if (tinfo.constant && isa<ConstantInt>(dim)) {
-                auto n = cast<ConstantInt>(dim)->getZExtValue() - 1;
-                return ConstantInt::get(ctx.types().T_size, jl_array_dim(tinfo.constant, n));
-            }
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraysize;
-    Value *t = boxed(ctx, tinfo);
-    int o = offsetof(jl_array_t, nrows) / sizeof(void*) - 1;
-    auto load = emit_nthptr_recast(ctx,
-            t,
-            ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)),
-            tbaa, ctx.types().T_size);
-    setName(ctx.emission_context, load, "arraysize");
+    addr = decay_derived(ctx, addr);
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 0);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(ctx.types().T_jlgenericmemory->getElementType(0), addr, Align(sizeof(size_t)));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
+    aliasinfo_mem.decorateInst(LI);
     MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
-    load->setMetadata(LLVMContext::MD_range, rng);
-    return load;
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int dim)
-{
-    return emit_arraysize(ctx, tinfo, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), dim));
-}
-
-static Value *emit_vectormaxsize(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    return emit_arraysize(ctx, ary, 2); // maxsize aliases ncols in memory layout for vector
-}
-
-static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    size_t ndim;
-    jl_value_t *ty = tinfo.typ;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraylen;
-    if (arraytype_constdim(ty, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(ctx.types().T_size, 1);
-        if (ndim != 1) {
-            if (tinfo.constant)
-                return ConstantInt::get(ctx.types().T_size, jl_array_len(tinfo.constant));
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraylen;
-    Value *t = boxed(ctx, tinfo);
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            1); //index (not offset) of length field in ctx.types().T_pjlarray
-    setName(ctx.emission_context, addr, "arraylen_ptr");
-    LoadInst *len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, addr, ctx.types().alignof_ptr);
-    setName(ctx.emission_context, len, "arraylen");
-    len->setOrdering(AtomicOrdering::NotAtomic);
-    MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
-    len->setMetadata(LLVMContext::MD_range, rng);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-    return ai.decorateInst(len);
-}
-
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    return emit_arraylen_prim(ctx, tinfo);
+    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
+    LI->setMetadata(LLVMContext::MD_range, rng);
+    setName(ctx.emission_context, LI, "memory_len");
+    return LI;
 }
 
-static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *t, unsigned AS, bool isboxed)
+static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_datatype_layout_t *layout, unsigned AS)
 {
     ++EmittedArrayptr;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-                                              emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0);
-    setName(ctx.emission_context, addr, "arrayptr_ptr");
-    // Normally allocated array of 0 dimension always have a inline pointer.
-    // However, we can't rely on that here since arrays can also be constructed from C pointers.
-    PointerType *PT = cast<PointerType>(addr->getType());
-    PointerType *PPT = cast<PointerType>(ctx.types().T_jlarray->getElementType(0));
-    PointerType *LoadT = PPT;
-
-    if (isboxed) {
-        LoadT = PointerType::get(ctx.types().T_prjlvalue, AS);
-    }
-    else if (AS != PPT->getAddressSpace()) {
-        LoadT = PointerType::getWithSamePointeeType(PPT, AS);
-    }
-    if (LoadT != PPT) {
-        const auto Ty = PointerType::get(LoadT, PT->getAddressSpace());
-        addr = ctx.builder.CreateBitCast(addr, Ty);
-    }
-
-    LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *)));
-    setName(ctx.emission_context, LI, "arrayptr");
+    Value *addr = mem;
+    addr = decay_derived(ctx, addr);
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
+    setName(ctx.emission_context, addr, "memory_data_ptr");
+    PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
     LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
-    jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr);
+    jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
     aliasinfo.decorateInst(LI);
-
-    return LI;
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed);
-}
-
-static Value *emit_unsafe_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    t = emit_pointer_from_objref(ctx, decay_derived(ctx, t));
-    return emit_arrayptr_internal(ctx, tinfo, t, 0, isboxed);
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, bool isboxed = false)
-{
-    return emit_arrayptr(ctx, tinfo, isboxed);
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, int dim)
-{
-    return emit_arraysize(ctx, tinfo, dim);
-}
-
-static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayflags;
-    Value *t = boxed(ctx, tinfo);
-    int arrayflag_field = 2;
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            arrayflag_field);
-    setName(ctx.emission_context, addr, "arrayflags_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayflags);
-    auto flags = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-    setName(ctx.emission_context, flags, "arrayflags");
-    return flags;
-}
-
-static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    ++EmittedArrayNDims;
-    Value *flags = emit_arrayflags(ctx, ary);
-    cast<LoadInst>(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None));
-    flags = ctx.builder.CreateLShr(flags, 2);
-    flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1
-    setName(ctx.emission_context, flags, "arrayndims");
-    return flags;
-}
-
-static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayElsize;
-    Value *t = boxed(ctx, tinfo);
-    int elsize_field = 3;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            elsize_field);
-    setName(ctx.emission_context, addr, "arrayelsize_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto elsize = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-    setName(ctx.emission_context, elsize, "arrayelsize");
-    return elsize;
-}
-
-static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd)
-{
-    ++EmittedArrayOffset;
-    if (nd != -1 && nd != 1) // only Vector can have an offset
-        return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0);
-    Value *t = boxed(ctx, tinfo);
-    int offset_field = 4;
-
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            offset_field);
-    setName(ctx.emission_context, addr, "arrayoffset_ptr");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayoffset);
-    auto offset = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t))));
-    setName(ctx.emission_context, offset, "arrayoffset");
-    return offset;
-}
-
-// Returns the size of the array represented by `tinfo` for the given dimension `dim` if
-// `dim` is a valid dimension, otherwise returns constant one.
-static Value *emit_arraysize_for_unsafe_dim(jl_codectx_t &ctx,
-        const jl_cgval_t &tinfo, jl_value_t *ex, size_t dim, size_t nd)
-{
-    return dim > nd ? ConstantInt::get(ctx.types().T_size, 1) : emit_arraysize(ctx, tinfo, ex, dim);
+    Value *ptr = LI;
+    if (AS) {
+        assert(AS == AddressSpace::Loaded);
+        ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
+    }
+    setName(ctx.emission_context, ptr, "memory_data");
+    return ptr;
 }
 
-// `nd == -1` means the dimension is unknown.
-static Value *emit_array_nd_index(
-        jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ex, ssize_t nd,
-        const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds)
+static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 {
-    ++EmittedArrayNdIndex;
-    Value *a = boxed(ctx, ainfo);
-    Value *i = Constant::getNullValue(ctx.types().T_size);
-    Value *stride = ConstantInt::get(ctx.types().T_size, 1);
-#if CHECK_BOUNDS==1
-    bool bc = bounds_check_enabled(ctx, inbounds);
-    BasicBlock *failBB = NULL, *endBB = NULL;
-    if (bc) {
-        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
-        endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
-    }
-#endif
-    SmallVector<Value *> idxs(nidxs);
-    for (size_t k = 0; k < nidxs; k++) {
-        idxs[k] = emit_unbox(ctx, ctx.types().T_size, argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
-    }
-    Value *ii = NULL;
-    for (size_t k = 0; k < nidxs; k++) {
-        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(ctx.types().T_size, 1));
-        i = ctx.builder.CreateAdd(i, ctx.builder.CreateMul(ii, stride));
-        if (k < nidxs - 1) {
-            assert(nd >= 0);
-            Value *d = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k + 1, nd);
-#if CHECK_BOUNDS==1
-            if (bc) {
-                BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib");
-                // if !(i < d) goto error
-                auto bc = ctx.builder.CreateICmpULT(ii, d);
-                setName(ctx.emission_context, bc, "inbounds");
-                ctx.builder.CreateCondBr(bc, okBB, failBB);
-                ctx.f->getBasicBlockList().push_back(okBB);
-                ctx.builder.SetInsertPoint(okBB);
-            }
-#endif
-            stride = ctx.builder.CreateMul(stride, d);
-            setName(ctx.emission_context, stride, "stride");
-        }
-    }
-#if CHECK_BOUNDS==1
-    if (bc) {
-        // We have already emitted a bounds check for each index except for
-        // the last one which we therefore have to do here.
-        if (nidxs == 1) {
-            // Linear indexing: Check against the entire linear span of the array
-            Value *alen = emit_arraylen(ctx, ainfo);
-            auto bc = ctx.builder.CreateICmpULT(i, alen);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, endBB, failBB);
-        } else if (nidxs >= (size_t)nd){
-            // No dimensions were omitted; just check the last remaining index
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, endBB, failBB);
-        } else {
-            // There were fewer indices than dimensions; check the last remaining index
-            BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib");
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension);
-            setName(ctx.emission_context, bc, "inbounds");
-            ctx.builder.CreateCondBr(bc, checktrailingdimsBB, failBB);
-            ctx.f->getBasicBlockList().push_back(checktrailingdimsBB);
-            ctx.builder.SetInsertPoint(checktrailingdimsBB);
-            // And then also make sure that all dimensions that weren't explicitly
-            // indexed into have size 1
-            for (size_t k = nidxs+1; k < (size_t)nd; k++) {
-                BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok");
-                Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd);
-                auto bc = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1));
-                setName(ctx.emission_context, bc, "inbounds");
-                ctx.builder.CreateCondBr(bc, dimsokBB, failBB);
-                ctx.f->getBasicBlockList().push_back(dimsokBB);
-                ctx.builder.SetInsertPoint(dimsokBB);
-            }
-            Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd);
-            auto bc2 = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1));
-            setName(ctx.emission_context, bc2, "inbounds");
-            ctx.builder.CreateCondBr(bc2, endBB, failBB);
-        }
-
-        ctx.f->getBasicBlockList().push_back(failBB);
-        ctx.builder.SetInsertPoint(failBB);
-        // CreateAlloca is OK here since we are on an error branch
-        Value *tmp = ctx.builder.CreateAlloca(ctx.types().T_size, ConstantInt::get(ctx.types().T_size, nidxs));
-        setName(ctx.emission_context, tmp, "errorbox");
-        for (size_t k = 0; k < nidxs; k++) {
-            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(ctx.types().T_size, tmp, ConstantInt::get(ctx.types().T_size, k)), ctx.types().alignof_ptr);
-        }
-        ctx.builder.CreateCall(prepare_call(jlboundserrorv_func),
-            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(ctx.types().T_size, nidxs) });
-        ctx.builder.CreateUnreachable();
-
-        ctx.f->getBasicBlockList().push_back(endBB);
-        ctx.builder.SetInsertPoint(endBB);
-    }
-#endif
-
-    return i;
+    Value *m = decay_derived(ctx, t);
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, m, 1);
+    Type *T_data = ctx.types().T_jlgenericmemory->getElementType(1);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(T_data, addr, Align(sizeof(char*)));
+    LI->setOrdering(AtomicOrdering::NotAtomic);
+    LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown);
+    aliasinfo_mem.decorateInst(LI);
+    addr = emit_ptrgep(ctx, m, JL_SMALL_BYTE_ALIGNMENT);
+    Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI));
+    return emit_guarded_test(ctx, foreign, t, [&] {
+            addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1);
+            LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+            ai.decorateInst(owner);
+            return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner);
+        });
 }
 
 // --- boxing ---
 
-static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt);
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
-                            unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
+                            Align alignment = Align(sizeof(void*))) // min alignment in julia's gc is pointer-aligned
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     // newv should already be tagged
-    ai.decorateInst(ctx.builder.CreateAlignedStore(v, emit_bitcast(ctx, newv,
-        PointerType::get(v->getType(), 0)), Align(alignment)));
+    ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, alignment));
 }
 
-static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, MDNode *tbaa)
+static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t &v)
 {
-    // newv should already be tagged
-    if (v.ispointer()) {
-        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*));
-    }
-    else {
-        init_bits_value(ctx, newv, v.V, tbaa);
-    }
+    MDNode *tbaa = jl_is_mutable(v.typ) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
+    Align newv_align{std::max(julia_alignment(v.typ), (unsigned)sizeof(void*))};
+    newv = maybe_decay_tracked(ctx, newv);
+    emit_unbox_store(ctx, v, newv, tbaa, newv_align);
 }
 
 static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant *constant, jl_value_t *jt)
@@ -3125,14 +3414,14 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
     else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
-        // SVE: Elsewhere we use `getMinKownValue`
+        // SVE: Elsewhere we use `getMinKnownValue`
         nargs = CAZ->getElementCount().getFixedValue();
     }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
     else
         return NULL;
-    assert(nargs > 0 && jst->instance == NULL);
+    assert(nargs > 0 && !jl_is_datatype_singleton(jst));
     if (nargs != jl_datatype_nfields(jst))
         return NULL;
 
@@ -3168,8 +3457,6 @@ static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction<TypeFn_t> *intr,
     return Call;
 }
 
-static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val);
-
 static Value *as_value(jl_codectx_t &ctx, Type *to, const jl_cgval_t &v)
 {
     assert(!v.isboxed);
@@ -3198,11 +3485,13 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     if (t == getInt1Ty(ctx.builder.getContext()))
         return track_pjlvalue(ctx, julia_bool(ctx, as_value(ctx, t, vinfo)));
 
-    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
+    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && vinfo.inline_roots.empty() && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
         if (Constant *c = dyn_cast<Constant>(vinfo.V)) {
             jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt);
             if (s) {
-                s = jl_ensure_rooted(ctx, s);
+                JL_GC_PUSH1(&s);
+                jl_temporary_root(ctx, s);
+                JL_GC_POP();
                 return track_pjlvalue(ctx, literal_pointer_val(ctx, s));
             }
         }
@@ -3235,17 +3524,19 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     else if (jb == jl_char_type)
         box = call_with_attrs(ctx, box_char_func, as_value(ctx, t, vinfo));
     else if (jb == jl_ssavalue_type) {
-        unsigned zero = 0;
         Value *v = as_value(ctx, t, vinfo);
         assert(v->getType() == ctx.emission_context.llvmtypes[jl_ssavalue_type]);
-        v = ctx.builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
+        v = ctx.builder.CreateExtractValue(v, 0);
         box = call_with_attrs(ctx, box_ssavalue_func, v);
     }
     else if (!jb->name->abstract && jl_datatype_nbits(jb) == 0) {
         // singleton
-        assert(jb->instance != NULL);
+        assert(jl_is_datatype_singleton(jb));
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance));
     }
+    if (box) {
+        setName(ctx.emission_context, box, [&]() {return "box_" + std::string(jl_symbol_name(jb->name->name));});
+    }
     return box;
 }
 
@@ -3262,7 +3553,7 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_valu
             },
             ut,
             counter);
-    setName(ctx.emission_context, tindex, "tindex");
+    setName(ctx.emission_context, tindex, datatype_tag->getName() + ".tindex");
     return tindex;
 }
 
@@ -3279,6 +3570,7 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
     return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
+
 static void union_alloca_type(jl_uniontype_t *ut,
         bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
 {
@@ -3302,6 +3594,8 @@ static void union_alloca_type(jl_uniontype_t *ut,
             },
             (jl_value_t*)ut,
             counter);
+    if (align > JL_HEAP_ALIGNMENT)
+        align = JL_HEAP_ALIGNMENT;
 }
 
 static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes)
@@ -3310,12 +3604,9 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
     union_alloca_type(ut, allunbox, nbytes, align, min_align);
     if (nbytes > 0) {
         // at least some of the values can live on the stack
-        // try to pick an Integer type size such that SROA will emit reasonable code
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        assert(align % min_align == 0);
+        AllocaInst *lv = emit_static_alloca(ctx, nbytes, Align(align));
         setName(ctx.emission_context, lv, "unionalloca");
-        if (align > 1)
-            lv->setAlignment(Align(align));
         return lv;
     }
     return NULL;
@@ -3326,7 +3617,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
  * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty,
  * skip[0] (corresponding to unknown boxed) must always be set. In that
  * case, the calling code must separately deal with the case where
- * `vinfo` is already an unknown boxed union (union tag 0x80).
+ * `vinfo` is already an unknown boxed union (union tag UNION_BOX_MARKER).
  */
 // Returns ctx.types().T_prjlvalue
 static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip)
@@ -3369,9 +3660,9 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
                     box = _boxed_special(ctx, vinfo_r, t);
                     if (!box) {
-                        box = emit_allocobj(ctx, jt);
+                        box = emit_allocobj(ctx, jt, true);
                         setName(ctx.emission_context, box, "unionbox");
-                        init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+                        init_bits_cgval(ctx, box, vinfo_r);
                     }
                 }
                 tempBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -3414,7 +3705,7 @@ static Function *mangleIntrinsic(IntrinsicInst *call) //mangling based on replac
 
     auto newfType = FunctionType::get(
             oldfType->getReturnType(),
-            makeArrayRef(argTys).slice(0, oldfType->getNumParams()),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
             oldfType->isVarArg());
 
     // Accumulate an array of overloaded types for the given intrinsic
@@ -3444,7 +3735,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
     for (auto *User : Val->users()) {
         if (isa<GetElementPtrInst>(User)) {
             GetElementPtrInst *Inst = cast<GetElementPtrInst>(User);
-            Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            Inst->mutateType(PointerType::get(Inst->getType(), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
         else if (isa<IntrinsicInst>(User)) {
@@ -3453,7 +3744,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
         }
         else if (isa<BitCastInst>(User)) {
             BitCastInst *Inst = cast<BitCastInst>(User);
-            Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
+            Inst->mutateType(PointerType::get(Inst->getType(), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
     }
@@ -3486,21 +3777,20 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
         box = box_union(ctx, vinfo, skip_none);
     }
     else {
-        assert(vinfo.V && "Missing data for unboxed value.");
+        assert((vinfo.V || !vinfo.inline_roots.empty()) && "Missing data for unboxed value.");
         assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed.");
         Type *t = julia_type_to_llvm(ctx, jt);
         assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
         box = _boxed_special(ctx, vinfo, t);
         if (!box) {
             bool do_promote = vinfo.promotion_point;
-            if (do_promote && is_promotable) {
+            if (do_promote && is_promotable && vinfo.inline_roots.empty()) {
                 auto IP = ctx.builder.saveIP();
                 ctx.builder.SetInsertPoint(vinfo.promotion_point);
-                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
                 Value *decayed = decay_derived(ctx, box);
                 AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
                 box->takeName(originalAlloca);
-                decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived));
                 // Warning: Very illegal IR here temporarily
                 originalAlloca->mutateType(decayed->getType());
                 recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
@@ -3508,10 +3798,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
                 // end illegal IR
                 originalAlloca->eraseFromParent();
                 ctx.builder.restoreIP(IP);
-            } else {
-                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
-                setName(ctx.emission_context, box, "box");
-                init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+            }
+            else {
+                auto arg_typename = [&] JL_NOTSAFEPOINT {
+                    return "box::" + std::string(jl_symbol_name(((jl_datatype_t*)(jt))->name->name));
+                };
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
+                setName(ctx.emission_context, box, arg_typename);
+                init_bits_cgval(ctx, box, vinfo);
             }
         }
     }
@@ -3524,30 +3818,25 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
         // TODO: make this a lifetime_end & dereferenceable annotation?
         ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
-    if (jl_is_concrete_type(src.typ) || src.constant) {
-        jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
+    if (src.constant) {
+        jl_value_t *typ = jl_typeof(src.constant);
         assert(skip || jl_is_pointerfree(typ));
         if (jl_is_pointerfree(typ)) {
-            unsigned alignment = julia_alignment(typ);
-            if (!src.ispointer() || src.constant) {
-                emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
-            }
-            else {
-                Value *src_ptr = data_pointer(ctx, src);
-                unsigned nb = jl_datatype_size(typ);
-                // TODO: this branch may be bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use:
-                //   select copy dest -> dest to simulate an undef value / conditional copy
-                // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
-                auto f = [&] {
-                    (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
-                    return nullptr;
-                };
-                if (skip)
-                    emit_guarded_test(ctx, skip, nullptr, f);
-                else
-                    f();
-            }
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(typ);
+                emit_unbox_store(ctx, mark_julia_const(ctx, src.constant), dest, tbaa_dst, Align(alignment), isVolatile);
+                return nullptr;
+            });
+        }
+    }
+    else if (jl_is_concrete_type(src.typ)) {
+        assert(skip || jl_is_pointerfree(src.typ));
+        if (jl_is_pointerfree(src.typ)) {
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(src.typ);
+                emit_unbox_store(ctx, src, dest, tbaa_dst, Align(alignment), isVolatile);
+                return nullptr;
+            });
         }
     }
     else if (src.TIndex) {
@@ -3555,8 +3844,6 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
         if (skip)
             tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), tindex);
         Value *src_ptr = data_pointer(ctx, src);
-        src_ptr = src_ptr ? maybe_bitcast(ctx, src_ptr, getInt8PtrTy(ctx.builder.getContext())) : src_ptr;
-        dest = maybe_bitcast(ctx, dest, getInt8PtrTy(ctx.builder.getContext()));
         BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "union_move_skip", ctx.f);
         SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
         BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_move", ctx.f);
@@ -3577,7 +3864,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                             return;
                         } else {
                             emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
+                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile);
                         }
                     }
                     ctx.builder.CreateBr(postBB);
@@ -3599,21 +3886,18 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     }
     else {
         assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
-        auto f = [&] {
+        emit_guarded_test(ctx, skip, nullptr, [&] {
             Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
-            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src, copy_bytes, /*TODO: min-align*/1, isVolatile);
+            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
+                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile);
             return nullptr;
-        };
-        if (skip)
-            emit_guarded_test(ctx, skip, nullptr, f);
-        else
-            f();
+        });
     }
 }
 
 
-static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std::string &msg)
+static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const Twine &msg)
 {
     ++EmittedCPointerChecks;
     Value *t = emit_typeof(ctx, x, false, false);
@@ -3630,13 +3914,14 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
     just_emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
     ctx.builder.CreateUnreachable();
 
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
 // allocation for known size object
 // returns a prjlvalue
-static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
+static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt,
+                            bool fully_initialized, unsigned align)
 {
     ++EmittedAllocObjs;
     Value *current_task = get_current_task(ctx);
@@ -3644,19 +3929,22 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
     auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(ctx.types().T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
     if (static_size > 0)
-        call->addRetAttr(Attribute::getWithDereferenceableBytes(ctx.builder.getContext(), static_size));
+        call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), static_size));
+    call->addRetAttr(Attribute::getWithAlignment(call->getContext(), Align(align)));
+    if (fully_initialized)
+        call->addFnAttr(Attribute::get(call->getContext(), Attribute::AllocKind, uint64_t(AllocFnKind::Alloc | AllocFnKind::Uninitialized)));
     return call;
 }
 
-static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt)
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized)
 {
-    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue));
+    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue),
+                         fully_initialized, julia_alignment((jl_value_t*)jt));
 }
 
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
-    pval = ctx.builder.CreateBitCast(pval, getInt8PtrTy(ctx.builder.getContext()));
     Function *F = prepare_call(jl_newbits_func);
     auto call = ctx.builder.CreateCall(F, { jt, pval });
     call->setAttributes(F->getAttributes());
@@ -3666,7 +3954,7 @@ static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 // if ptr is NULL this emits a write barrier _back_
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
 {
-    emit_write_barrier(ctx, parent, makeArrayRef(ptr));
+    emit_write_barrier(ctx, parent, ArrayRef<Value*>(ptr));
 }
 
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*> ptrs)
@@ -3676,36 +3964,13 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
     if (ptrs.empty())
         return;
     SmallVector<Value*, 8> decay_ptrs;
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, parent));
     for (auto ptr : ptrs) {
-        decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
+        decay_ptrs.push_back(maybe_decay_untracked(ctx, ptr));
     }
     ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
 }
 
-static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
-{
-    // This is a inlined field at `offset`.
-    if (!typ->layout || typ->layout->npointers == 0)
-        return;
-    jl_svec_t *types = jl_get_fieldtypes(typ);
-    size_t nf = jl_svec_len(types);
-    for (size_t i = 0; i < nf; i++) {
-        jl_value_t *_fld = jl_svecref(types, i);
-        if (!jl_is_datatype(_fld))
-            continue;
-        jl_datatype_t *fld = (jl_datatype_t*)_fld;
-        if (jl_field_isptr(typ, i)) {
-            // pointer field, check if field is perm-alloc
-            if (type_is_permalloc((jl_value_t*)fld))
-                res.push_back(offset + jl_field_offset(typ, i));
-            continue;
-        }
-        // inline field
-        find_perm_offsets(fld, res, offset + jl_field_offset(typ, i));
-    }
-}
-
 static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg,
                                     jl_value_t *jltype)
 {
@@ -3716,156 +3981,197 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
     emit_write_barrier(ctx, parent, ptrs);
 }
 
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x)
+{
+    auto ptrs = get_gc_roots_for(ctx, x, true);
+    emit_write_barrier(ctx, parent, ptrs);
+}
+
+static jl_cgval_t union_store(jl_codectx_t &ctx,
+        Value *ptr, Value *ptindex, jl_cgval_t rhs, jl_cgval_t cmp,
+        jl_value_t *jltype, MDNode *tbaa, MDNode *tbaa_tindex,
+        AtomicOrdering Order, AtomicOrdering FailOrder,
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        const jl_cgval_t *modifyop, const Twine &fname)
+{
+    assert(Order == AtomicOrdering::NotAtomic);
+    if (issetfieldonce)
+        return mark_julia_const(ctx, jl_false);
+    size_t fsz = 0, al = 0;
+    int union_max = jl_islayout_inline(jltype, &fsz, &al);
+    assert(union_max > 0);
+    // compute tindex from rhs
+    jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jltype);
+    if (rhs_union.typ == jl_bottom_type)
+        return jl_cgval_t();
+    if (needlock)
+        emit_lockstate_value(ctx, needlock, true);
+    BasicBlock *ModifyBB = NULL;
+    if (ismodifyfield) {
+        ModifyBB = BasicBlock::Create(ctx.builder.getContext(), "modify_xchg", ctx.f);
+        ctx.builder.CreateBr(ModifyBB);
+        ctx.builder.SetInsertPoint(ModifyBB);
+    }
+    jl_cgval_t oldval = rhs;
+    if (!issetfield)
+        oldval = emit_unionload(ctx, ptr, ptindex, jltype, fsz, al, tbaa, true, union_max, tbaa_tindex);
+    Value *Success = NULL;
+    BasicBlock *DoneBB = NULL;
+    if (isreplacefield || ismodifyfield) {
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, needlock, false);
+            const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+            if (modifyop) {
+                rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type, nullptr);
+            }
+            else {
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
+                Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
+                rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+            }
+            emit_typecheck(ctx, rhs, jltype, fname);
+            rhs = update_julia_type(ctx, rhs, jltype);
+            rhs_union = convert_julia_type(ctx, rhs, jltype);
+            if (rhs_union.typ == jl_bottom_type)
+                return jl_cgval_t();
+            if (needlock)
+                emit_lockstate_value(ctx, needlock, true);
+            cmp = oldval;
+            oldval = emit_unionload(ctx, ptr, ptindex, jltype, fsz, al, tbaa, true, union_max, tbaa_tindex);
+        }
+        BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
+        DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
+        Success = emit_f_is(ctx, oldval, cmp);
+        ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
+        ctx.builder.SetInsertPoint(XchgBB);
+    }
+    Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jltype);
+    tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_tindex);
+    ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+    // copy data
+    if (!rhs.isghost) {
+        emit_unionmove(ctx, ptr, tbaa, rhs, nullptr);
+    }
+    if (isreplacefield || ismodifyfield) {
+        ctx.builder.CreateBr(DoneBB);
+        ctx.builder.SetInsertPoint(DoneBB);
+    }
+    if (needlock)
+        emit_lockstate_value(ctx, needlock, false);
+    if (isreplacefield) {
+        Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
+        jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+        jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    else if (ismodifyfield) {
+        jl_cgval_t argv[2] = {oldval, rhs};
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    return oldval;
+}
+
 static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
         jl_cgval_t rhs, jl_cgval_t cmp,
         bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
-        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        const jl_cgval_t *modifyop, const std::string &fname)
+        Value *needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield, bool issetfieldonce,
+        const jl_cgval_t *modifyop, const Twine &fname)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     ++EmittedSetfield;
     assert(strct.ispointer());
     size_t byte_offset = jl_field_offset(sty, idx0);
+    auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset);
     Value *addr = data_pointer(ctx, strct);
     if (byte_offset > 0) {
-        addr = ctx.builder.CreateInBoundsGEP(
-                getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, byte_offset)); // TODO: use emit_struct_gep
+        addr = emit_ptrgep(ctx, addr, byte_offset);
+        setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr"));
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
-    if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
-        size_t fsz = 0, al = 0;
-        int union_max = jl_islayout_inline(jfty, &fsz, &al);
-        bool isptr = (union_max == 0);
-        assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
-        // compute tindex from rhs
-        jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
-        if (rhs_union.typ == jl_bottom_type)
-            return jl_cgval_t();
-        Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(ctx.types().T_size, fsz));
-        if (needlock)
-            emit_lockstate_value(ctx, strct, true);
-        BasicBlock *ModifyBB = NULL;
-        if (ismodifyfield) {
-            ModifyBB = BasicBlock::Create(ctx.builder.getContext(), "modify_xchg", ctx.f);
-            ctx.builder.CreateBr(ModifyBB);
-            ctx.builder.SetInsertPoint(ModifyBB);
-        }
-        jl_cgval_t oldval = rhs;
-        if (!issetfield)
-            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
-        Value *Success = NULL;
-        BasicBlock *DoneBB = NULL;
-        if (isreplacefield || ismodifyfield) {
-            if (ismodifyfield) {
-                if (needlock)
-                    emit_lockstate_value(ctx, strct, false);
-                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
-                if (modifyop) {
-                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
-                }
-                else {
-                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
-                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
-                }
-                emit_typecheck(ctx, rhs, jfty, fname);
-                rhs = update_julia_type(ctx, rhs, jfty);
-                rhs_union = convert_julia_type(ctx, rhs, jfty);
-                if (rhs_union.typ == jl_bottom_type)
-                    return jl_cgval_t();
-                if (needlock)
-                    emit_lockstate_value(ctx, strct, true);
-                cmp = oldval;
-                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
-            }
-            BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
-            DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
-            Success = emit_f_is(ctx, oldval, cmp);
-            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
-            ctx.builder.SetInsertPoint(XchgBB);
-        }
-        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-        ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
-        // copy data
-        if (!rhs.isghost) {
-            emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
-        }
-        if (isreplacefield || ismodifyfield) {
-            ctx.builder.CreateBr(DoneBB);
-            ctx.builder.SetInsertPoint(DoneBB);
-        }
-        if (needlock)
-            emit_lockstate_value(ctx, strct, false);
-        if (isreplacefield) {
-            Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
-            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
-            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
-        }
-        else if (ismodifyfield) {
-            jl_cgval_t argv[2] = {oldval, rhs};
-            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
-            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
-        }
-        return oldval;
-    }
-    else {
-        unsigned align = jl_field_align(sty, idx0);
-        bool isboxed = jl_field_isptr(sty, idx0);
-        size_t nfields = jl_datatype_nfields(sty);
-        bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
-        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
-            wb ? boxed(ctx, strct) : nullptr,
-            isboxed, Order, FailOrder, align,
-            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
-    }
-}
-
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable)
+    bool isboxed = jl_field_isptr(sty, idx0);
+    if (!isboxed && jl_is_uniontype(jfty)) {
+        size_t fsz1 = jl_field_size(sty, idx0) - 1;
+        Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
+        setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr"));
+        return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, ctx.tbaa().tbaa_unionselbyte,
+            Order, FailOrder,
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
+            modifyop, fname);
+    }
+    unsigned align = jl_field_align(sty, idx0);
+    bool maybe_null = field_may_be_null(strct, sty, idx0);
+    return typed_store(ctx, addr, rhs, cmp, jfty, tbaa, nullptr,
+        wb ? boxed(ctx, strct) : nullptr,
+        isboxed, Order, FailOrder, align,
+        needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
+        maybe_null, modifyop, fname, nullptr, nullptr);
+}
+
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, ArrayRef<jl_cgval_t> argv, bool is_promotable)
 {
     ++EmittedNewStructs;
     assert(jl_is_datatype(ty));
     assert(jl_is_concrete_type(ty));
     jl_datatype_t *sty = (jl_datatype_t*)ty;
+    auto arg_typename = [&] JL_NOTSAFEPOINT {
+        return "new::" + std::string(jl_symbol_name((sty)->name->name));
+    };
     size_t nf = jl_datatype_nfields(sty);
     if (nf > 0 || sty->name->mutabl) {
         if (deserves_stack(ty)) {
             Type *lt = julia_type_to_llvm(ctx, ty);
             unsigned na = nargs < nf ? nargs : nf;
 
-            // whether we should perform the initialization with the struct as a IR value
-            // or instead initialize the stack buffer with stores
-            auto tracked = CountTrackedPointers(lt);
+            // choose whether we should perform the initialization with the struct as a IR value
+            // or instead initialize the stack buffer with stores (the later is nearly always better)
+            auto tracked = split_value_size(sty);
+            assert(CountTrackedPointers(lt).count == tracked.second);
             bool init_as_value = false;
             if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ?
                 init_as_value = true;
             }
-            else if (tracked.count) {
-                init_as_value = true;
-            }
 
             Instruction *promotion_point = nullptr;
             ssize_t promotion_ssa = -1;
             Value *strct;
+            SmallVector<Value*,0> inline_roots;
             if (type_is_ghost(lt)) {
-                strct = NULL;
+                strct = nullptr;
             }
             else if (init_as_value) {
-                if (tracked.count)
+                if (tracked.second) {
                     strct = Constant::getNullValue(lt);
-                else
+                }
+                else {
                     strct = UndefValue::get(lt);
+                    if (nargs < nf)
+                        strct = ctx.builder.CreateFreeze(strct); // Change this to zero initialize instead?
+                }
+            }
+            else if (tracked.second) {
+                inline_roots.resize(tracked.second, Constant::getNullValue(ctx.types().T_prjlvalue));
+                strct = nullptr;
+                if (tracked.first) {
+                    AllocaInst *bits = emit_static_alloca(ctx, tracked.first, Align(julia_alignment(ty)));
+                    strct = bits;
+                    setName(ctx.emission_context, bits, arg_typename);
+                    is_promotable = false; // wrong layout for promotion
+                }
             }
             else {
-                strct = emit_static_alloca(ctx, lt);
-                setName(ctx.emission_context, strct, "newstruct");
-                if (tracked.count)
-                    undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack);
+                strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty)));
+                setName(ctx.emission_context, strct, arg_typename);
             }
 
             for (unsigned i = 0; i < na; i++) {
@@ -3877,26 +4183,33 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 fval_info = update_julia_type(ctx, fval_info, jtype);
                 if (fval_info.typ == jl_bottom_type)
                     return jl_cgval_t();
+                if (type_is_ghost(lt))
+                    continue;
+                Type *fty = julia_type_to_llvm(ctx, jtype);
+                if (type_is_ghost(fty))
+                    continue;
+                Instruction *dest = nullptr;
+                MutableArrayRef<Value*> roots;
+                ssize_t offs = jl_field_offset(sty, i);
+                ssize_t ptrsoffs = -1;
+                if (!inline_roots.empty())
+                    std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                unsigned llvm_idx = init_as_value ? ((i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i) : -1u;
                 // TODO: Use (post-)domination instead.
                 bool field_promotable = !jl_is_uniontype(jtype) && !init_as_value && fval_info.promotion_ssa != -1 &&
+                    fval_info.inline_roots.empty() && inline_roots.empty() && // these need to be compatible, if they were to be implemented
                     fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock();
                 if (field_promotable) {
                     savedIP = ctx.builder.saveIP();
                     ctx.builder.SetInsertPoint(fval_info.promotion_point);
                 }
-                if (type_is_ghost(lt))
-                    continue;
-                Type *fty = julia_type_to_llvm(ctx, jtype);
-                if (type_is_ghost(fty))
-                    continue;
-                Value *dest = NULL;
-                unsigned offs = jl_field_offset(sty, i);
-                unsigned llvm_idx = (i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i;
                 if (!init_as_value) {
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
-                    Instruction *inst;
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
+                    Instruction *inst = strct && offs >= 0 ? cast<Instruction>(emit_ptrgep(ctx, strct, offs)) : nullptr;
+                    if (!inline_roots.empty() && ptrsoffs >= 0)
+                        roots = MutableArrayRef(inline_roots).slice(ptrsoffs, jl_field_isptr(sty, i) ? 1 : ((jl_datatype_t*)jtype)->layout->npointers);
+                    dest = inst;
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3916,10 +4229,13 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (jl_field_isptr(sty, i)) {
                     fval = boxed(ctx, fval_info, field_promotable);
                     if (!init_as_value) {
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                        StoreInst *SI = cast<StoreInst>(ai.decorateInst(
-                                ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))));
-                        SI->setOrdering(AtomicOrdering::Unordered);
+                        if (dest) {
+                            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                            ai.decorateInst(ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i))));
+                        }
+                        else {
+                            roots[0] = fval;
+                        }
                     }
                 }
                 else if (jl_is_uniontype(jtype)) {
@@ -3931,47 +4247,46 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
                     size_t fsz = 0, al = 0;
                     bool isptr = !jl_islayout_inline(jtype, &fsz, &al);
-                    assert(!isptr && fsz == jl_field_size(sty, i) - 1); (void)isptr;
+                    assert(!isptr && fsz < jl_field_size(sty, i)); (void)isptr;
+                    size_t fsz1 = jl_field_size(sty, i) - 1;
                     if (init_as_value) {
                         // If you wanted to implement init_as_value,
                         // would need to emit the union-move into temporary memory,
                         // then load it and combine with the tindex.
                         // But more efficient to just store it directly.
-                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz);
-                        if (fsz > 0 && !fval_info.isghost) {
+                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz1);
+                        if (fsz1 > 0 && !fval_info.isghost) {
                             Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
-                            AllocaInst *lv = emit_static_alloca(ctx, ET);
+                            AllocaInst *lv = emit_static_alloca(ctx, fsz1, Align(al));
                             setName(ctx.emission_context, lv, "unioninit");
-                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al));
                             emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
-                            for (; i < fsz / al; i++) {
-                                Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                            for (; i < fsz1 / al; i++) {
+                                Value *fldp = emit_ptrgep(ctx, lv, i * al);
                                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                 Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
-                                strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
-                                Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
+                                Value *staddr = emit_ptrgep(ctx, lv, i * al);
                                 for (; i < ptindex - llvm_idx; i++) {
-                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                                    Value *fldp = emit_ptrgep(ctx, staddr, i);
                                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                     Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
-                                    strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                    strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                                 }
                             }
                         }
                         llvm_idx = ptindex;
                         fval = tindex;
                         if (jl_is_vecelement_type(ty))
-                            fval = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                            fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     }
                     else {
-                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz);
+                        Value *ptindex = emit_ptrgep(ctx, strct, offs + fsz1);
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                         ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
@@ -3982,10 +4297,15 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     if (field_promotable) {
                         fval_info.V->replaceAllUsesWith(dest);
                         cast<Instruction>(fval_info.V)->eraseFromParent();
-                    } else if (init_as_value) {
+                    }
+                    else if (init_as_value) {
                         fval = emit_unbox(ctx, fty, fval_info, jtype);
-                    } else {
-                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, jl_field_align(sty, i));
+                    }
+                    else if (!roots.empty()) {
+                        split_value_into(ctx, fval_info, Align(julia_alignment(jtype)), dest, Align(jl_field_align(sty, i)), jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots);
+                    }
+                    else {
+                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, Align(jl_field_align(sty, i)));
                     }
                 }
                 if (init_as_value) {
@@ -3995,7 +4315,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     else if (lt->isVectorTy())
                         strct = ctx.builder.CreateInsertElement(strct, fval, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), llvm_idx));
                     else if (lt->isAggregateType())
-                        strct = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                        strct = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     else
                         assert(false);
                 }
@@ -4003,28 +4323,38 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     ctx.builder.restoreIP(savedIP);
                 }
             }
-            for (size_t i = nargs; i < nf; i++) {
-                if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                    unsigned offs = jl_field_offset(sty, i);
-                    int fsz = jl_field_size(sty, i) - 1;
-                    unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
-                    if (init_as_value)
-                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), makeArrayRef(llvm_idx));
-                    else {
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-                        ai.decorateInst(ctx.builder.CreateAlignedStore(
-                                ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                                ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
-                                Align(1)));
+            if (init_as_value) {
+                for (size_t i = nargs; i < nf; i++) {
+                    if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
+                        ssize_t offs = jl_field_offset(sty, i);
+                        ssize_t ptrsoffs = -1;
+                        if (!inline_roots.empty())
+                            std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                        assert(ptrsoffs < 0 && offs >= 0);
+                        int fsz = jl_field_size(sty, i) - 1;
+                        unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
+                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
                     }
                 }
             }
+            if (nargs < nf) {
+                assert(!init_as_value);
+                IRBuilderBase::InsertPoint savedIP = ctx.builder.saveIP();
+                if (promotion_point)
+                    ctx.builder.SetInsertPoint(promotion_point);
+                if (strct) {
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                    promotion_point = ai.decorateInst(ctx.builder.CreateMemSet(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
+                                                                jl_datatype_size(ty), MaybeAlign(jl_datatype_align(ty))));
+                }
+                ctx.builder.restoreIP(savedIP);
+            }
             if (type_is_ghost(lt))
                 return mark_julia_const(ctx, sty->instance);
             else if (init_as_value)
                 return mark_julia_type(ctx, strct, false, ty);
             else {
-                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack);
+                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack, inline_roots);
                 if (is_promotable && promotion_point) {
                     ret.promotion_point = promotion_point;
                     ret.promotion_ssa = promotion_ssa;
@@ -4032,8 +4362,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 return ret;
             }
         }
-        Value *strct = emit_allocobj(ctx, sty);
-        setName(ctx.emission_context, strct, "newstruct");
+        Value *strct = emit_allocobj(ctx, sty, nargs >= nf);
+        setName(ctx.emission_context, strct, arg_typename);
         jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
         strct = decay_derived(ctx, strct);
         undef_derived_strct(ctx, strct, sty, strctinfo.tbaa);
@@ -4042,8 +4372,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                 ai.decorateInst(ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                        ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
-                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                        emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
                         Align(1)));
             }
         }
@@ -4060,18 +4389,19 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             rhs = update_julia_type(ctx, rhs, ft);
             if (rhs.typ == jl_bottom_type)
                 return jl_cgval_t();
-            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, nullptr, true, false, false, false, false, nullptr, "new");
         }
         return strctinfo;
     }
     else {
-        // 0 fields, ghost or bitstype
+        // 0 fields, ghost or primitive type
         if (jl_datatype_nbits(sty) == 0)
             return ghostValue(ctx, sty);
+        // n.b. this is not valid IR form to construct a primitive type (use bitcast for example)
         bool isboxed;
         Type *lt = julia_type_to_llvm(ctx, ty, &isboxed);
         assert(!isboxed);
-        return mark_julia_type(ctx, UndefValue::get(lt), false, ty);
+        return mark_julia_type(ctx, ctx.builder.CreateFreeze(UndefValue::get(lt)), false, ty);
     }
 }
 
@@ -4083,11 +4413,8 @@ static void emit_signal_fence(jl_codectx_t &ctx)
 static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
     ++EmittedDeferSignal;
-    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx),
-                               PointerType::get(ctx.types().T_sigatomic, 0));
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()),
-            offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
-    return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
+    Value *ptls = get_current_ptls(ctx);
+    return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal));
 }
 
 #ifndef JL_NDEBUG
@@ -4099,10 +4426,410 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
            (a->prefer_specsig == b->prefer_specsig) &&
            (a->gnu_pubnames == b->gnu_pubnames) &&
            (a->debug_info_kind == b->debug_info_kind) &&
-           (a->lookup == b->lookup) &&
-           (a->generic_context == b->generic_context);
+           (a->safepoint_on_entry == b->safepoint_on_entry) &&
+           (a->gcstack_arg == b->gcstack_arg) &&
+           (a->use_jlplt == b->use_jlplt);
+}
+#endif
+
+static auto *emit_genericmemory_unchecked(jl_codectx_t &ctx, Value *cg_nbytes, Value *cg_typ)
+{
+    auto ptls = get_current_ptls(ctx);
+    auto call = prepare_call(jl_alloc_genericmemory_unchecked_func);
+    auto *alloc = ctx.builder.CreateCall(call, { ptls, cg_nbytes, cg_typ});
+    alloc->setAttributes(call->getAttributes());
+    alloc->addRetAttr(Attribute::getWithAlignment(alloc->getContext(), Align(JL_HEAP_ALIGNMENT)));
+    call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), sizeof(jl_genericmemory_t)));
+    return alloc;
+}
+
+static void emit_memory_zeroinit_and_stores(jl_codectx_t &ctx, jl_datatype_t *typ, Value* alloc, Value* nbytes, Value* nel, int zi)
+{
+    auto arg_typename = [&] JL_NOTSAFEPOINT {
+        std::string type_str;
+        auto eltype = jl_tparam1(typ);
+        if (jl_is_datatype(eltype))
+            type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
+        else if (jl_is_uniontype(eltype))
+            type_str = "Union";
+        else
+            type_str = "<unknown type>";
+        return "Memory{" + type_str + "}[]";
+    };
+    setName(ctx.emission_context, alloc, arg_typename);
+    // set length (jl_alloc_genericmemory_unchecked_func doesn't have it)
+    Value *decay_alloc = decay_derived(ctx, alloc);
+    Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0);
+    auto len_store = ctx.builder.CreateAlignedStore(nel, len_field, Align(sizeof(void*)));
+    auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
+    aliasinfo.decorateInst(len_store);
+    // zeroinit pointers and unions
+    if (zi) {
+        Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1);
+        auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*)));
+        aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+        aliasinfo.decorateInst(load);
+        auto int8t = getInt8Ty(ctx.builder.getContext());
+        ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*)));
+    }
+    return;
+}
+
+
+static jl_cgval_t emit_const_len_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, size_t nel, jl_genericmemory_t *inst)
+{
+    if (nel == 0) {
+        Value *empty_alloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst));
+        return mark_julia_type(ctx, empty_alloc, true, typ);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout;
+    assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL);
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)typ)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+    size_t nbytes;
+    bool overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nbytes, nel, &nbytes);
+    }
+    // overflow if signed size is too big or nel is too big (the latter matters iff elsz==0)
+    ssize_t tmp=1;
+    overflow |= __builtin_add_overflow(nel, 1, &tmp) || __builtin_add_overflow(nbytes, 1, &tmp);
+    if (overflow)
+        emit_error(ctx, prepare_call(jlargumenterror_func), "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+
+    auto T_size = ctx.types().T_size;
+    auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ);
+    auto cg_nbytes = ConstantInt::get(T_size, nbytes);
+    auto cg_nel = ConstantInt::get(T_size, nel);
+    size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
+    // if allocation fits within GC pools
+    int pooled = tot <= GC_MAX_SZCLASS;
+    Value *alloc, *decay_alloc, *memory_ptr;
+    jl_aliasinfo_t aliasinfo;
+    if (pooled) {
+        alloc = emit_allocobj(ctx, tot, cg_typ, false, JL_SMALL_BYTE_ALIGNMENT);
+        decay_alloc = decay_derived(ctx, alloc);
+        memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1);
+        setName(ctx.emission_context, memory_ptr, "memory_ptr");
+        auto objref = emit_pointer_from_objref(ctx, alloc);
+        Value *memory_data = emit_ptrgep(ctx, objref, JL_SMALL_BYTE_ALIGNMENT);
+        auto *store = ctx.builder.CreateAlignedStore(memory_data, memory_ptr, Align(sizeof(void*)));
+        aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
+        aliasinfo.decorateInst(store);
+        setName(ctx.emission_context, memory_data, "memory_data");
+    } else { // just use the dynamic length version since the malloc will be slow anyway
+        alloc = emit_genericmemory_unchecked(ctx, cg_nbytes, cg_typ);
+    }
+    emit_memory_zeroinit_and_stores(ctx, typ, alloc, cg_nbytes, cg_nel, zi);
+    return mark_julia_type(ctx, alloc, true, typ);
+}
+
+static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval_t nel, jl_genericmemory_t *inst)
+{
+    emit_typecheck(ctx, nel, (jl_value_t*)jl_long_type, "memorynew");
+    nel = update_julia_type(ctx, nel, (jl_value_t*)jl_long_type);
+    if (nel.typ == jl_bottom_type)
+        return jl_cgval_t();
+
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout;
+    assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL);
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)typ)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+
+    auto T_size = ctx.types().T_size;
+    BasicBlock *emptymemBB, *nonemptymemBB, *retvalBB;
+    emptymemBB = BasicBlock::Create(ctx.builder.getContext(), "emptymem");
+    nonemptymemBB = BasicBlock::Create(ctx.builder.getContext(), "nonemptymem");
+    retvalBB = BasicBlock::Create(ctx.builder.getContext(), "retval");
+    auto nel_unboxed = emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_long_type);
+    Value *memorynew_empty = ctx.builder.CreateICmpEQ(nel_unboxed, ConstantInt::get(T_size, 0));
+    setName(ctx.emission_context, memorynew_empty, "memorynew_empty");
+    ctx.builder.CreateCondBr(memorynew_empty, emptymemBB, nonemptymemBB);
+    // if nel == 0
+    emptymemBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(emptymemBB);
+    auto emptyalloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst));
+    ctx.builder.CreateBr(retvalBB);
+    nonemptymemBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(nonemptymemBB);
+
+    auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ);
+    auto cg_elsz = ConstantInt::get(T_size, elsz);
+
+    FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, ArrayRef<Type*>(T_size));
+    // compute nbytes with possible overflow
+    Value *prod_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, cg_elsz});
+    Value *nbytes = ctx.builder.CreateExtractValue(prod_with_overflow, 0);
+    Value *overflow = ctx.builder.CreateExtractValue(prod_with_overflow, 1);
+    if (isunion) {
+        // if isunion, we need to allocate the union selector bytes as well
+        intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sadd_with_overflow, ArrayRef<Type*>(T_size));
+        Value *add_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, nbytes});
+        nbytes = ctx.builder.CreateExtractValue(add_with_overflow, 0);
+        Value *overflow1 = ctx.builder.CreateExtractValue(add_with_overflow, 1);
+        overflow = ctx.builder.CreateOr(overflow, overflow1);
+    }
+    Value *negnel = ctx.builder.CreateICmpSLT(nel_unboxed, ConstantInt::get(T_size, 0));
+    overflow = ctx.builder.CreateOr(overflow, negnel);
+    auto cg_typemax_int = ConstantInt::get(T_size, (((size_t)-1)>>1)-1);
+    Value *tobignel = ctx.builder.CreateICmpSLT(cg_typemax_int, elsz == 0 ? nel_unboxed: nbytes);
+    overflow = ctx.builder.CreateOr(overflow, tobignel);
+    Value *notoverflow = ctx.builder.CreateNot(overflow);
+    error_unless(ctx, prepare_call(jlargumenterror_func), notoverflow, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    // actually allocate the memory
+
+    Value *alloc = emit_genericmemory_unchecked(ctx, nbytes, cg_typ);
+    emit_memory_zeroinit_and_stores(ctx, typ, alloc, nbytes, nel_unboxed, zi);
+    ctx.builder.CreateBr(retvalBB);
+    nonemptymemBB = ctx.builder.GetInsertBlock();
+    // phi node to choose which side of branch
+    retvalBB->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(retvalBB);
+    auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
+    phi->addIncoming(emptyalloc, emptymemBB);
+    phi->addIncoming(alloc, nonemptymemBB);
+    return mark_julia_type(ctx, phi, true, typ);
+}
+
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, const jl_datatype_layout_t *layout, jl_value_t *typ)
+{
+    //jl_cgval_t argv[] = {
+    //    mark_julia_type(ctx, mem, true, jl_any_type),
+    //    mark_julia_type(ctx, data, false, jl_voidpointer_type)
+    //};
+    //return emit_new_struct(ctx, typ, 3, argv);
+    Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
+    ref = ctx.builder.CreateInsertValue(ref, data, 0);
+    ref = ctx.builder.CreateInsertValue(ref, mem, 1);
+    setName(ctx.emission_context, ref, "memory_ref");
+    return mark_julia_type(ctx, ref, false, typ);
+}
+
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &mem, const jl_datatype_layout_t *layout, jl_value_t *typ)
+{
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    Value *data = (!isboxed && isunion) || isghost ? ConstantInt::get(ctx.types().T_size, 0) : emit_genericmemoryptr(ctx, boxed(ctx, mem), layout, 0);
+    return _emit_memoryref(ctx, boxed(ctx, mem), data, layout, typ);
+}
+
+static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    if (!ref.inline_roots.empty()) {
+        LLVMContext &C = ctx.builder.getContext();
+        StructType *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load0 = ctx.builder.CreateLoad(type->getElementType(0), ref.V);
+        jl_aliasinfo_t ai0 = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai0.decorateInst(load0);
+        setName(ctx.emission_context, load0, "memory_ref_FCA0");
+        Value *root = ctx.builder.CreateBitCast(ref.inline_roots[0], type->getElementType(1));
+        Value *load = Constant::getNullValue(type);
+        load = ctx.builder.CreateInsertValue(load, load0, 0);
+        load = ctx.builder.CreateInsertValue(load, root, 1);
+        return load;
+    }
+    else if (ref.ispointer()) {
+        LLVMContext &C = ctx.builder.getContext();
+        Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai.decorateInst(load);
+        setName(ctx.emission_context, load, "memory_ref_FCA");
+        return load;
+    }
+    else {
+        return ref.V;
+    }
 }
+
+static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cgval_t idx, jl_value_t *inbounds, const jl_datatype_layout_t *layout)
+{
+    ++EmittedArrayNdIndex;
+    emit_typecheck(ctx, idx, (jl_value_t*)jl_long_type, "memoryref");
+    idx = update_julia_type(ctx, idx, (jl_value_t*)jl_long_type);
+    if (idx.typ == jl_bottom_type)
+        return jl_cgval_t();
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    maybeSetName(ctx.emission_context, data, "memoryref_data");
+    Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, mem, "memoryref_mem");
+    Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
+    Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, offset, "memoryref_offset");
+    Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+    bool bc = bounds_check_enabled(ctx, inbounds);
+#if 1
+    Value *ovflw = nullptr;
+#endif
+    Value *newdata;
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if ((!isboxed && isunion) || isghost) {
+        newdata = ctx.builder.CreateAdd(data, offset);
+        setName(ctx.emission_context, newdata, "memoryref_data+offset");
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
+            setName(ctx.emission_context, offset, "memoryref_isinbounds");
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            failBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            endBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    else {
+        Value *boffset;
+#if 0
+        if (bc) {
+            auto *MulF = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, offset->getType());
+            CallInst *Mul = ctx.builder.CreateCall(MulF, {offset, elsz});
+            boffset = ctx.builder.CreateExtractValue(Mul, 0);
+            ovflw = ctx.builder.CreateExtractValue(Mul, 1);
+        }
+        else
+#else
+        if (bc) {
+            // n.b. we could boundscheck that -len<=offset<=len instead of using smul.ovflw,
+            // since we know that len*elsz does not overflow,
+            // and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
+            setName(ctx.emission_context, ovflw, "memoryref_ovflw");
+        }
+#endif
+        boffset = ctx.builder.CreateMul(offset, elsz);
+        setName(ctx.emission_context, boffset, "memoryref_byteoffset");
+        newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
+        setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
+        (void)boffset; // LLVM is very bad at handling GEP with types different from the load
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+#if 0
+            Value *mend = mptr;
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            mend = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), mptr, blen);
+            Value *inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateICmpULE(mptr, newdata),
+                    ctx.builder.CreateICmpULT(newdata, mend));
+            inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateNot(ovflw),
+                    inbound);
+#elif 1
+            Value *bidx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            setName(ctx.emission_context, blen, "memoryref_bytelen");
+            Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds");
+            inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds&notovflw");
+#else
+            Value *idx0; // (newdata - mptr) / elsz
+            idx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            idx0 = ctx.builder.CreateExactUDiv(idx0, elsz);
+            Value *inbound = ctx.builder.CreateICmpULT(idx0, mlen);
 #endif
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            failBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            endBB->insertInto(ctx.f);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    return _emit_memoryref(ctx, mem, newdata, layout, ref.typ);
+}
+
+static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    Value *offset;
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = data;
+    }
+    else {
+        Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+        Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+        // (data - mptr) / elsz
+        offset = ctx.builder.CreateSub(
+            ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
+            ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+        setName(ctx.emission_context, offset, "memoryref_offset");
+        Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+        offset = ctx.builder.CreateExactUDiv(offset, elsz);
+        setName(ctx.emission_context, offset, "memoryref_offsetidx");
+    }
+    offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
+    return mark_julia_type(ctx, offset, false, jl_long_type);
+}
+
+static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    V = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, V, "memoryref_mem");
+    return V;
+}
+
+static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    assert(!layout->flags.arrayelem_isunion && layout->size != 0);
+    Value *newref = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, newref, 0);
+    unsigned AS = AddressSpace::Loaded;
+    Value *mem = CreateSimplifiedExtractValue(ctx, newref, 1);
+    // rebuild GEP on data, so that we manually hoist this gc_loaded_func call over it, back to the original load
+    // we should add this to llvm-julia-licm too, so we can attempt hoisting over PhiNodes too (which aren't defined yet here)
+    IRBuilder<>::InsertPointGuard resetIP(ctx.builder);
+    SmallVector<GetElementPtrInst*,0> GEPlist;
+    data = data->stripPointerCastsSameRepresentation();
+    while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(data)) { // ignoring bitcast will not be required with opaque pointers
+        GEPlist.push_back(GEP);
+        data = GEP->getPointerOperand()->stripPointerCastsSameRepresentation();
+    }
+    data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
+    if (!GEPlist.empty()) {
+        for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
+            GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GEP->clone());
+            GEP2->mutateType(PointerType::get(GEP->getResultElementType(), AS));
+            GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
+            GEP2->setIsInBounds(true);
+            ctx.builder.Insert(GEP2);
+            data = GEP2;
+        }
+    }
+    setName(ctx.emission_context, data, "memoryref_data");
+    return data;
+}
 
 // Reset us back to codegen debug type
 #undef DEBUG_TYPE
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 086d925802f63..fdbe5ec9d9e29 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -1,5 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "clang/AST/Type.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/StaticAnalyzer/Checkers/SValExplainer.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@@ -14,6 +15,7 @@
 #include "clang/Tooling/Tooling.h"
 #include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
 
+#include "llvm/Support/Debug.h"
 #include <iostream>
 #include <memory>
 
@@ -29,7 +31,7 @@ namespace {
 using namespace clang;
 using namespace ento;
 
-#define PDP std::shared_ptr<PathDiagnosticPiece>
+typedef std::shared_ptr<PathDiagnosticPiece> PDP;
 #define MakePDP make_unique<PathDiagnosticEventPiece>
 
 static const Stmt *getStmtForDiagnostics(const ExplodedNode *N)
@@ -199,7 +201,7 @@ class GCChecker
   static bool isGCTracked(const Expr *E);
   bool isGloballyRootedType(QualType Type) const;
   static void dumpState(const ProgramStateRef &State);
-  static bool declHasAnnotation(const clang::Decl *D, const char *which);
+  static const AnnotateAttr *declHasAnnotation(const clang::Decl *D, const char *which);
   static bool isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM);
   static const SourceManager &getSM(CheckerContext &C) { return C.getSourceManager(); }
   bool isSafepoint(const CallEvent &Call, CheckerContext &C) const;
@@ -251,6 +253,18 @@ class GCChecker
     PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
   };
 
+  class SafepointBugVisitor : public BugReporterVisitor {
+  public:
+    SafepointBugVisitor() {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+    }
+
+    PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
+  };
+
   class GCValueBugVisitor : public BugReporterVisitor {
   protected:
     SymbolRef Sym;
@@ -364,6 +378,38 @@ PDP GCChecker::GCBugVisitor::VisitNode(const ExplodedNode *N,
   return nullptr;
 }
 
+PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N,
+                                       BugReporterContext &BRC, PathSensitiveBugReport &BR) {
+  const ExplodedNode *PrevN = N->getFirstPred();
+  unsigned NewSafepointDisabled = N->getState()->get<SafepointDisabledAt>();
+  unsigned OldSafepointDisabled = PrevN->getState()->get<SafepointDisabledAt>();
+  if (NewSafepointDisabled != OldSafepointDisabled) {
+    const Decl *D = &N->getCodeDecl();
+    const AnnotateAttr *Ann = declHasAnnotation(D, "julia_not_safepoint");
+    PathDiagnosticLocation Pos;
+    if (OldSafepointDisabled == (unsigned)-1) {
+      if (Ann) {
+        Pos = PathDiagnosticLocation{Ann->getLoc(), BRC.getSourceManager()};
+        return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+      } else {
+        PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+            N->getLocationContext(), BRC.getSourceManager());
+        if (Pos.isValid())
+          return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+        //N->getLocation().dump();
+      }
+    } else if (NewSafepointDisabled == (unsigned)-1) {
+      PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+          N->getLocationContext(), BRC.getSourceManager());
+      if (Pos.isValid())
+        return MakePDP(Pos, "Safepoints re-enabled here");
+      //N->getLocation().dump();
+    }
+    // n.b. there may be no position here to report if they were disabled by julia_notsafepoint_enter/leave
+  }
+  return nullptr;
+}
+
 PDP GCChecker::GCValueBugVisitor::ExplainNoPropagationFromExpr(
     const clang::Expr *FromWhere, const ExplodedNode *N,
     PathDiagnosticLocation Pos, BugReporterContext &BRC, PathSensitiveBugReport &BR) {
@@ -712,12 +758,12 @@ void GCChecker::checkEndFunction(const clang::ReturnStmt *RS,
   }
 }
 
-bool GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
+const AnnotateAttr *GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
   for (const auto *Ann : D->specific_attrs<AnnotateAttr>()) {
     if (Ann->getAnnotation() == which)
-      return true;
+      return Ann;
   }
-  return false;
+  return nullptr;
 }
 
 bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM) {
@@ -726,82 +772,92 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const S
   SourceLocation Loc = FD->getLocation();
   StringRef Name = SM.getFilename(Loc);
   Name = llvm::sys::path::filename(Name);
-  if (Name.startswith("llvm-"))
+  if (Name.starts_with("llvm-"))
       return true;
   return false;
 }
 
 static bool isMutexLock(StringRef name) {
     return name == "uv_mutex_lock" ||
-           //name == "uv_mutex_trylock" ||
+           name == "uv_mutex_trylock" ||
            name == "pthread_mutex_lock" ||
-           //name == "pthread_mutex_trylock" ||
+           name == "pthread_mutex_trylock" ||
+           name == "__gthread_mutex_lock" ||
+           name == "__gthread_mutex_trylock" ||
+           name == "__gthread_recursive_mutex_lock" ||
+           name == "__gthread_recursive_mutex_trylock" ||
            name == "pthread_spin_lock" ||
-           //name == "pthread_spin_trylock" ||
+           name == "pthread_spin_trylock" ||
            name == "uv_rwlock_rdlock" ||
-           //name == "uv_rwlock_tryrdlock" ||
+           name == "uv_rwlock_tryrdlock" ||
            name == "uv_rwlock_wrlock" ||
-           //name == "uv_rwlock_trywrlock" ||
+           name == "uv_rwlock_trywrlock" ||
            false;
 }
 
 static bool isMutexUnlock(StringRef name) {
     return name == "uv_mutex_unlock" ||
            name == "pthread_mutex_unlock" ||
+           name == "__gthread_mutex_unlock" ||
+           name == "__gthread_recursive_mutex_unlock" ||
            name == "pthread_spin_unlock" ||
            name == "uv_rwlock_rdunlock" ||
            name == "uv_rwlock_wrunlock" ||
            false;
 }
 
-#if LLVM_VERSION_MAJOR >= 13
-#define endswith_lower endswith_insensitive
-#endif
 
 bool GCChecker::isGCTrackedType(QualType QT) {
   return isJuliaType(
              [](StringRef Name) {
-               if (Name.endswith_lower("jl_value_t") ||
-                   Name.endswith_lower("jl_svec_t") ||
-                   Name.endswith_lower("jl_sym_t") ||
-                   Name.endswith_lower("jl_expr_t") ||
-                   Name.endswith_lower("jl_code_info_t") ||
-                   Name.endswith_lower("jl_array_t") ||
-                   Name.endswith_lower("jl_method_t") ||
-                   Name.endswith_lower("jl_method_instance_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_datatype_t") ||
-                   Name.endswith_lower("jl_typemap_entry_t") ||
-                   Name.endswith_lower("jl_typemap_level_t") ||
-                   Name.endswith_lower("jl_typename_t") ||
-                   Name.endswith_lower("jl_module_t") ||
-                   Name.endswith_lower("jl_tupletype_t") ||
-                   Name.endswith_lower("jl_gc_tracked_buffer_t") ||
-                   Name.endswith_lower("jl_binding_t") ||
-                   Name.endswith_lower("jl_ordereddict_t") ||
-                   Name.endswith_lower("jl_tvar_t") ||
-                   Name.endswith_lower("jl_typemap_t") ||
-                   Name.endswith_lower("jl_unionall_t") ||
-                   Name.endswith_lower("jl_methtable_t") ||
-                   Name.endswith_lower("jl_cgval_t") ||
-                   Name.endswith_lower("jl_codectx_t") ||
-                   Name.endswith_lower("jl_ast_context_t") ||
-                   Name.endswith_lower("jl_code_instance_t") ||
-                   Name.endswith_lower("jl_excstack_t") ||
-                   Name.endswith_lower("jl_task_t") ||
-                   Name.endswith_lower("jl_uniontype_t") ||
-                   Name.endswith_lower("jl_method_match_t") ||
-                   Name.endswith_lower("jl_vararg_t") ||
-                   Name.endswith_lower("jl_opaque_closure_t") ||
-                   Name.endswith_lower("jl_globalref_t") ||
-                   // Probably not technically true for these, but let's allow it
-                   Name.endswith_lower("typemap_intersection_env") ||
-                   Name.endswith_lower("interpreter_state") ||
-                   Name.endswith_lower("jl_typeenv_t") ||
-                   Name.endswith_lower("jl_stenv_t") ||
-                   Name.endswith_lower("jl_varbinding_t") ||
-                   Name.endswith_lower("set_world") ||
-                   Name.endswith_lower("jl_codectx_t")) {
+               if (Name.ends_with_insensitive("jl_value_t") ||
+                   Name.ends_with_insensitive("jl_svec_t") ||
+                   Name.ends_with_insensitive("jl_sym_t") ||
+                   Name.ends_with_insensitive("jl_expr_t") ||
+                   Name.ends_with_insensitive("jl_code_info_t") ||
+                   Name.ends_with_insensitive("jl_array_t") ||
+                   Name.ends_with_insensitive("jl_genericmemory_t") ||
+                   //Name.ends_with_insensitive("jl_genericmemoryref_t") ||
+                   Name.ends_with_insensitive("jl_method_t") ||
+                   Name.ends_with_insensitive("jl_method_instance_t") ||
+                   Name.ends_with_insensitive("jl_debuginfo_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_datatype_t") ||
+                   Name.ends_with_insensitive("jl_typemap_entry_t") ||
+                   Name.ends_with_insensitive("jl_typemap_level_t") ||
+                   Name.ends_with_insensitive("jl_typename_t") ||
+                   Name.ends_with_insensitive("jl_module_t") ||
+                   Name.ends_with_insensitive("jl_tupletype_t") ||
+                   Name.ends_with_insensitive("jl_gc_tracked_buffer_t") ||
+                   Name.ends_with_insensitive("jl_binding_t") ||
+                   Name.ends_with_insensitive("jl_binding_partition_t") ||
+                   Name.ends_with_insensitive("jl_ordereddict_t") ||
+                   Name.ends_with_insensitive("jl_tvar_t") ||
+                   Name.ends_with_insensitive("jl_typemap_t") ||
+                   Name.ends_with_insensitive("jl_unionall_t") ||
+                   Name.ends_with_insensitive("jl_methtable_t") ||
+                   Name.ends_with_insensitive("jl_cgval_t") ||
+                   Name.ends_with_insensitive("jl_codectx_t") ||
+                   Name.ends_with_insensitive("jl_ast_context_t") ||
+                   Name.ends_with_insensitive("jl_code_instance_t") ||
+                   Name.ends_with_insensitive("jl_excstack_t") ||
+                   Name.ends_with_insensitive("jl_task_t") ||
+                   Name.ends_with_insensitive("jl_uniontype_t") ||
+                   Name.ends_with_insensitive("jl_method_match_t") ||
+                   Name.ends_with_insensitive("jl_vararg_t") ||
+                   Name.ends_with_insensitive("jl_opaque_closure_t") ||
+                   Name.ends_with_insensitive("jl_globalref_t") ||
+                   Name.ends_with_insensitive("jl_abi_override_t") ||
+                   // Probably not technically true for these, but let's allow it as a root
+                   Name.ends_with_insensitive("jl_ircode_state") ||
+                   Name.ends_with_insensitive("typemap_intersection_env") ||
+                   Name.ends_with_insensitive("interpreter_state") ||
+                   Name.ends_with_insensitive("jl_typeenv_t") ||
+                   Name.ends_with_insensitive("jl_stenv_t") ||
+                   Name.ends_with_insensitive("jl_varbinding_t") ||
+                   Name.ends_with_insensitive("set_world") ||
+                   Name.ends_with_insensitive("jl_ptr_kind_union_t") ||
+                   Name.ends_with_insensitive("jl_codectx_t")) {
                  return true;
                }
                return false;
@@ -824,7 +880,7 @@ bool GCChecker::isGCTracked(const Expr *E) {
 
 bool GCChecker::isGloballyRootedType(QualType QT) const {
   return isJuliaType(
-      [](StringRef Name) { return Name.endswith("jl_sym_t"); }, QT);
+      [](StringRef Name) { return Name.ends_with("jl_sym_t"); }, QT);
 }
 
 bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
@@ -853,9 +909,11 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
     if (!Decl || !FD) {
       if (Callee == nullptr) {
         isCalleeSafepoint = true;
-      } else if (const TypedefType *TDT = dyn_cast<TypedefType>(Callee->getType())) {
-        isCalleeSafepoint =
-            !declHasAnnotation(TDT->getDecl(), "julia_not_safepoint");
+      } else if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(Callee->getType())){
+        if (const TypedefType *TDT = dyn_cast<TypedefType>(ET->getNamedType())) {
+          isCalleeSafepoint =
+              !declHasAnnotation(TDT->getDecl(), "julia_not_safepoint");
+        }
       } else if (const CXXPseudoDestructorExpr *PDE =
                      dyn_cast<CXXPseudoDestructorExpr>(Callee)) {
         // A pseudo-destructor is an expression that looks like a member
@@ -868,9 +926,9 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith("uv_") ||
-                FD->getName().startswith("unw_") ||
-                FD->getName().startswith("_U")) &&
+               (FD->getName().starts_with("uv_") ||
+                FD->getName().starts_with("unw_") ||
+                FD->getName().starts_with("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
       else
@@ -900,7 +958,7 @@ bool GCChecker::processPotentialSafepoint(const CallEvent &Call,
             isGCTrackedType(ParmType->getPointeeType())) {
           // This is probably an out parameter. Find the value it refers to now.
           SVal Loaded =
-              State->getSVal(Call.getArgSVal(i).getAs<Loc>().getValue());
+              State->getSVal(*(Call.getArgSVal(i).getAs<Loc>()));
           SpeciallyRootedSymbol = Loaded.getAsSymbol();
           continue;
         }
@@ -1007,13 +1065,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
+        if (FDName.starts_with("jl_box_") || FDName.starts_with("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
+            if (FDName.starts_with("jl_box_u") || FDName.starts_with("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
@@ -1123,10 +1181,10 @@ void GCChecker::checkDerivingExpr(const Expr *Result, const Expr *Parent,
     // TODO: We may want to refine this. This is to track pointers through the
     // array list in jl_module_t.
     bool ParentIsModule = isJuliaType(
-        [](StringRef Name) { return Name.endswith("jl_module_t"); },
+        [](StringRef Name) { return Name.ends_with("jl_module_t"); },
         Parent->getType());
     bool ResultIsArrayList = isJuliaType(
-        [](StringRef Name) { return Name.endswith("arraylist_t"); },
+        [](StringRef Name) { return Name.ends_with("arraylist_t"); },
         Result->getType());
     if (!(ParentIsModule && ResultIsArrayList) && isGCTracked(Parent)) {
       ResultTracked = false;
@@ -1302,6 +1360,7 @@ void GCChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const {
               Report->addNote(
                   "Tried to call method defined here",
                   PathDiagnosticLocation::create(FD, C.getSourceManager()));
+            Report->addVisitor(make_unique<SafepointBugVisitor>());
           },
           C, ("Calling potential safepoint as " +
               Call.getKindAsString() + " from function annotated JL_NOTSAFEPOINT").str());
@@ -1398,7 +1457,8 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
   } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" ||
              name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" ||
              name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" ||
-             name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8") {
+             name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8" ||
+             name == "JL_GC_PUSH9") {
     ProgramStateRef State = C.getState();
     // Transform slots to roots, transform values to rooted
     unsigned NumArgs = CE->getNumArgs();
@@ -1478,7 +1538,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
       }
     }
     if (FD) {
-      Loc ItemsLoc = State->getLValue(FD, ArrayList).getAs<Loc>().getValue();
+      Loc ItemsLoc = *(State->getLValue(FD, ArrayList).getAs<Loc>());
       SVal Items = State->getSVal(ItemsLoc);
       if (Items.isUnknown()) {
         Items = C.getSValBuilder().conjureSymbolVal(
@@ -1646,7 +1706,7 @@ void GCChecker::checkLocation(SVal SLoc, bool IsLoad, const Stmt *S,
   // better than this.
   if (IsLoad && (RS = State->get<GCRootMap>(SLoc.getAsRegion()))) {
     SymbolRef LoadedSym =
-        State->getSVal(SLoc.getAs<Loc>().getValue()).getAsSymbol();
+        State->getSVal(*SLoc.getAs<Loc>()).getAsSymbol();
     if (LoadedSym) {
       const ValueState *ValS = State->get<GCValueMap>(LoadedSym);
       if (!ValS || !ValS->isRooted() || ValS->RootDepth > RS->RootedAtDepth) {
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index de5f2a2770c04..5e243ddda28c9 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -15,12 +15,13 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
         ios_t *z, ios_t *s) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_mis_fallback(void *native_code, arraylist_t* MIs) UNAVAILABLE
 
 JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
 
 JL_DLLEXPORT void *jl_LLVMCreateDisasm_fallback(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
 JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_fallback(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
@@ -38,17 +39,32 @@ JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t image_base, const struct _
     (void)image_base; (void)fptrs; (void)linfos; (void)n;
 }
 
-JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
 {
-    return NULL;
+    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
 }
 
-JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
+JL_DLLEXPORT int jl_compile_codeinst_fallback(jl_code_instance_t *unspec)
 {
-    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+    // Do nothing. The caller will notice that we failed to provide an ->invoke and trigger
+    // appropriate fallbacks.
+    return 0;
 }
 
-JL_DLLEXPORT void jl_generate_fptr_for_oc_wrapper_fallback(jl_code_instance_t *unspec) UNAVAILABLE
+JL_DLLEXPORT void jl_emit_codeinst_to_jit_fallback(jl_code_instance_t *codeinst, jl_code_info_t *src)
+{
+    jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
+    if (jl_is_code_info(inferred))
+        return;
+    if (jl_is_svec(src->edges)) {
+        jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src->edges);
+        jl_gc_wb(codeinst, src->edges);
+    }
+    jl_atomic_store_release(&codeinst->debuginfo, src->debuginfo);
+    jl_gc_wb(codeinst, src->debuginfo);
+    jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
+    jl_gc_wb(codeinst, src);
+}
 
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
 {
@@ -69,7 +85,8 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
     return 0;
 }
 
-JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world) UNAVAILABLE
+JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, int _trim, int _external_linkage, size_t _world) UNAVAILABLE
+JL_DLLEXPORT void *jl_emit_native_fallback(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _external_linkage) UNAVAILABLE
 
 JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
 {
@@ -107,59 +124,8 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
     return 0;
 }
 
-JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE
-
-JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size,
-    int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE
-
 JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { }
 
-JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraJuliaLICMPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddAllocOptPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_fallback(void *PM, bool_t imaging_mode) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t Strong) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE
-
-JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE
-
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
-
-#include "llvm-julia-passes.inc"
-
-#undef MODULE_PASS
-#undef CGSCC_PASS
-#undef FUNCTION_PASS
-#undef LOOP_PASS
-
 //LLVM C api to the julia JIT
 JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 122170ae3fa97..7bc14d2d0347f 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -16,13 +16,14 @@
 #include <array>
 #include <vector>
 #include <set>
+#include <unordered_set>
 #include <functional>
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
@@ -40,10 +41,10 @@
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/Analysis/InstructionSimplify.h>
 
 // support
 #include <llvm/ADT/SmallBitVector.h>
-#include <llvm/ADT/Optional.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Support/FormattedStream.h>
@@ -76,6 +77,10 @@
 #include <llvm/Bitcode/BitcodeReader.h>
 #include <llvm/Linker/Linker.h>
 
+#ifdef USE_ITTAPI
+#include "ittapi/ittnotify.h"
+#endif
+
 using namespace llvm;
 
 static bool jl_fpo_disabled(const Triple &TT) {
@@ -86,7 +91,7 @@ static bool jl_fpo_disabled(const Triple &TT) {
     // MSAN doesn't support FPO
     return true;
 #endif
-    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD()) {
+    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD() || TT.isOSOpenBSD()) {
         return true;
     }
     return false;
@@ -125,6 +130,9 @@ auto getFloatTy(LLVMContext &ctxt) {
 auto getDoubleTy(LLVMContext &ctxt) {
     return Type::getDoubleTy(ctxt);
 }
+auto getBFloatTy(LLVMContext &ctxt) {
+    return Type::getBFloatTy(ctxt);
+}
 auto getFP128Ty(LLVMContext &ctxt) {
     return Type::getFP128Ty(ctxt);
 }
@@ -134,23 +142,8 @@ auto getVoidTy(LLVMContext &ctxt) {
 auto getCharTy(LLVMContext &ctxt) {
     return getInt32Ty(ctxt);
 }
-auto getInt8PtrTy(LLVMContext &ctxt) {
-    return Type::getInt8PtrTy(ctxt);
-}
-auto getInt16PtrTy(LLVMContext &ctxt) {
-    return Type::getInt16PtrTy(ctxt);
-}
-auto getInt32PtrTy(LLVMContext &ctxt) {
-    return Type::getInt32PtrTy(ctxt);
-}
-auto getInt64PtrTy(LLVMContext &ctxt) {
-    return Type::getInt64PtrTy(ctxt);
-}
-auto getFloatPtrTy(LLVMContext &ctxt) {
-    return Type::getFloatPtrTy(ctxt);
-}
-auto getDoublePtrTy(LLVMContext &ctxt) {
-    return Type::getDoublePtrTy(ctxt);
+auto getPointerTy(LLVMContext &ctxt) {
+    return PointerType::get(ctxt, 0);
 }
 
 typedef Instruction TerminatorInst;
@@ -160,7 +153,6 @@ typedef Instruction TerminatorInst;
 #endif
 
 #include "jitlayers.h"
-#include "llvm-codegen-shared.h"
 #include "processor.h"
 #include "julia_assert.h"
 
@@ -171,12 +163,60 @@ void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
 {
     // we do the constant check again later, duplicating it here just makes sure the assertion
     // fires on debug builds even if debug info is not enabled
+    // note that if this assertion fires then the implication is that the caller of setName
+    // is not checking that setName is only called for non-folded instructions (e.g. folded bitcasts
+    // and 0-byte geps), which can result in information loss on the renamed instruction.
     assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
-    if (params.debug_level && !isa<Constant>(V)) {
+    if (!isa<Constant>(V)) {
         V->setName(Name);
     }
 }
 
+void maybeSetName(jl_codegen_params_t &params, Value *V, const Twine &Name)
+{
+    // To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments
+    if (isa<Instruction>(V))
+        V->setName(Name);
+}
+
+void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (!params.getContext().shouldDiscardValueNames() && !isa<Constant>(V))
+        V->setName(Twine(GetName()));
+}
+
+void setNameWithField(jl_codegen_params_t &params, Value *V, std::function<StringRef()> GetObjName, jl_datatype_t *jt, unsigned idx, const Twine &suffix)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (!params.getContext().shouldDiscardValueNames() && !isa<Constant>(V)) {
+        if (jl_is_tuple_type(jt)){
+            V->setName(Twine(GetObjName()) + "[" + Twine(idx + 1) + "]"+ suffix);
+            return;
+        }
+
+        if (jl_is_namedtuple_type(jt)) {
+            auto names = jl_tparam0(jt);
+            assert(jl_is_tuple(names));
+            if (idx < jl_nfields(names)) {
+                auto name = jl_fieldref(names, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        } else {
+            auto flds = jl_field_names(jt);
+            if (idx < jl_svec_len(flds)) {
+                auto name = jl_svecref(flds, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        }
+        V->setName(Twine(GetObjName()) + "." + Twine("unknown field") + suffix);
+    }
+}
+
 STATISTIC(EmittedAllocas, "Number of allocas emitted");
 STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted");
 STATISTIC(ModulesCreated, "Number of LLVM Modules created");
@@ -191,7 +231,6 @@ STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted");
 STATISTIC(EmittedInvokes, "Number of invokes emitted");
 STATISTIC(EmittedCalls, "Number of calls emitted");
 STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted");
-STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted");
 STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted");
 STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted");
 STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated");
@@ -239,12 +278,14 @@ extern void _chkstk(void);
 
 // types
 struct jl_typecache_t {
+    Type *T_ptr;
     Type *T_size;
     Type *T_jlvalue;
     Type *T_pjlvalue;
     Type *T_prjlvalue;
     Type *T_ppjlvalue;
     Type *T_pprjlvalue;
+    StructType *T_jlgenericmemory;
     StructType *T_jlarray;
     Type *T_pjlarray;
     FunctionType *T_jlfunc;
@@ -252,24 +293,24 @@ struct jl_typecache_t {
 
     IntegerType *T_sigatomic;
 
-    Type *T_ppint8;
     unsigned sizeof_ptr;
     Align alignof_ptr;
 
     bool initialized;
 
     jl_typecache_t() :
-        T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
-        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr), T_jlarray(nullptr),
-        T_pjlarray(nullptr), T_jlfunc(nullptr), T_jlfuncparams(nullptr),
-        T_sigatomic(nullptr), T_ppint8(nullptr), initialized(false) {}
+        T_ptr(nullptr), T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
+        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr),
+        T_jlgenericmemory(nullptr), T_jlarray(nullptr), T_pjlarray(nullptr),
+        T_jlfunc(nullptr), T_jlfuncparams(nullptr), T_sigatomic(nullptr),
+        initialized(false) {}
 
     void initialize(LLVMContext &context, const DataLayout &DL) {
         if (initialized) {
             return;
         }
         initialized = true;
-        T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
+        T_ptr = getPointerTy(context);
         T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
         T_size = DL.getIntPtrType(context);
         sizeof_ptr = DL.getPointerSize();
@@ -286,15 +327,12 @@ struct jl_typecache_t {
         T_jlfuncparams = JuliaType::get_jlfuncparams_ty(context);
         assert(T_jlfuncparams != NULL);
 
-        Type *vaelts[] = {PointerType::get(getInt8Ty(context), AddressSpace::Loaded)
-                        , T_size
-                        , getInt16Ty(context)
-                        , getInt16Ty(context)
-                        , getInt32Ty(context)
+        T_jlgenericmemory = StructType::get(context, { T_size, T_pprjlvalue /* [, real-owner] */ });
+        Type *vaelts[] = { PointerType::get(getInt8Ty(context), AddressSpace::Loaded),
+                           PointerType::get(T_jlgenericmemory, AddressSpace::Tracked),
+                           // dimsize[ndims]
         };
-        static_assert(sizeof(jl_array_flags_t) == sizeof(int16_t),
-                    "Size of jl_array_flags_t is not the same as int16_t");
-        T_jlarray = StructType::get(context, makeArrayRef(vaelts));
+        T_jlarray = StructType::get(context, ArrayRef<Type*>(vaelts));
         T_pjlarray = PointerType::get(T_jlarray, 0);
     }
 };
@@ -309,19 +347,19 @@ struct jl_tbaacache_t {
     MDNode *tbaa_unionselbyte;   // a selector byte in isbits Union struct fields
     MDNode *tbaa_data;       // Any user data that `pointerset/ref` are allowed to alias
     MDNode *tbaa_binding;        // jl_binding_t::value
-    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t
+    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t or jl_genericmemory_t
     MDNode *tbaa_mutab;              // mutable type
     MDNode *tbaa_datatype;               // datatype
     MDNode *tbaa_immut;              // immutable type
     MDNode *tbaa_ptrarraybuf;    // Data in an array of boxed values
     MDNode *tbaa_arraybuf;       // Data in an array of POD
-    MDNode *tbaa_array;      // jl_array_t
-    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t
+    MDNode *tbaa_array;      // jl_array_t or jl_genericmemory_t
+    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t (to memoryref)
     MDNode *tbaa_arraysize;      // A size in a jl_array_t
-    MDNode *tbaa_arraylen;       // The len in a jl_array_t
-    MDNode *tbaa_arrayflags;     // The flags in a jl_array_t
-    MDNode *tbaa_arrayoffset;     // The offset in a jl_array_t
-    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_array_t
+    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_genericmemory_t
+    MDNode *tbaa_memoryptr;      // The pointer inside a jl_genericmemory_t
+    MDNode *tbaa_memorylen;      // The length in a jl_genericmemory_t
+    MDNode *tbaa_memoryown;      // The owner in a foreign jl_genericmemory_t
     MDNode *tbaa_const;      // Memory that is immutable by the time LLVM can see it
     bool initialized;
 
@@ -330,8 +368,8 @@ struct jl_tbaacache_t {
                     tbaa_value(nullptr), tbaa_mutab(nullptr), tbaa_datatype(nullptr),
                     tbaa_immut(nullptr), tbaa_ptrarraybuf(nullptr), tbaa_arraybuf(nullptr),
                     tbaa_array(nullptr), tbaa_arrayptr(nullptr), tbaa_arraysize(nullptr),
-                    tbaa_arraylen(nullptr), tbaa_arrayflags(nullptr), tbaa_arrayoffset(nullptr),
-                    tbaa_arrayselbyte(nullptr), tbaa_const(nullptr), initialized(false) {}
+                    tbaa_arrayselbyte(nullptr), tbaa_memoryptr(nullptr), tbaa_memorylen(nullptr), tbaa_memoryown(nullptr),
+                    tbaa_const(nullptr), initialized(false) {}
 
     auto tbaa_make_child(MDBuilder &mbuilder, const char *name, MDNode *parent = nullptr, bool isConstant = false) {
         MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
@@ -369,11 +407,11 @@ struct jl_tbaacache_t {
         std::tie(tbaa_array, tbaa_array_scalar) = tbaa_make_child(mbuilder, "jtbaa_array");
         tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first;
         tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first;
-        tbaa_arraylen = tbaa_make_child(mbuilder, "jtbaa_arraylen", tbaa_array_scalar).first;
-        tbaa_arrayflags = tbaa_make_child(mbuilder, "jtbaa_arrayflags", tbaa_array_scalar).first;
-        tbaa_arrayoffset = tbaa_make_child(mbuilder, "jtbaa_arrayoffset", tbaa_array_scalar).first;
-        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
         tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first;
+        tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar).first;
+        tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar).first;
+        tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar).first;
+        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
     }
 };
 
@@ -386,7 +424,7 @@ struct jl_noaliascache_t {
         MDNode *gcframe;        // GC frame
         MDNode *stack;          // Stack slot
         MDNode *data;           // Any user data that `pointerset/ref` are allowed to alias
-        MDNode *type_metadata;  // Non-user-accessible type metadata incl. size, union selectors, etc.
+        MDNode *type_metadata;  // Non-user-accessible type metadata incl. union selectors, etc.
         MDNode *constant;       // Memory that is immutable by the time LLVM can see it
 
         jl_regions_t(): gcframe(nullptr), stack(nullptr), data(nullptr), type_metadata(nullptr), constant(nullptr) {}
@@ -493,9 +531,12 @@ struct JuliaVariable {
         if (GlobalValue *V = m->getNamedValue(name))
             return cast<GlobalVariable>(V);
         auto T_size = m->getDataLayout().getIntPtrType(m->getContext());
-        return new GlobalVariable(*m, _type(T_size),
+        auto var = new GlobalVariable(*m, _type(T_size),
                 isconst, GlobalVariable::ExternalLinkage,
                 NULL, name);
+        if (Triple(m->getTargetTriple()).isOSWindows())
+            var->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLImportStorageClass); // Cross-library imports must be explicit for COFF (Windows)
+        return var;
     }
     GlobalVariable *realize(jl_codectx_t &ctx);
 };
@@ -560,18 +601,21 @@ static inline void add_named_global(StringRef name, T *addr)
     add_named_global(name, (void*)(uintptr_t)addr);
 }
 
-AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds)
+AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds, std::initializer_list<Attribute> extra={})
 {
-    SmallVector<Attribute, 8> attrs(attrkinds.size());
+    SmallVector<Attribute, 8> attrs(attrkinds.size() + extra.size());
     for (size_t i = 0; i < attrkinds.size(); i++)
         attrs[i] = Attribute::get(C, attrkinds.begin()[i]);
-    return AttributeSet::get(C, makeArrayRef(attrs));
+    for (size_t i = 0; i < extra.size(); i++)
+        attrs[attrkinds.size() + i] = extra.begin()[i];
+    return AttributeSet::get(C, ArrayRef<Attribute>(attrs));
 }
 
 static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }
 
 static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
 static FunctionType *get_func2_sig(LLVMContext &C) { return JuliaType::get_jlfunc2_ty(C); }
+static FunctionType *get_func3_sig(LLVMContext &C) { return JuliaType::get_jlfunc3_ty(C); }
 
 static FunctionType *get_donotdelete_sig(LLVMContext &C) {
     return FunctionType::get(getVoidTy(C), true);
@@ -588,9 +632,12 @@ static AttributeList get_func_attrs(LLVMContext &C)
 
 static AttributeList get_donotdelete_func_attrs(LLVMContext &C)
 {
-    AttributeSet FnAttrs = Attributes(C, {Attribute::InaccessibleMemOnly, Attribute::WillReturn, Attribute::NoUnwind});
+    AttrBuilder FnAttrs(C);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
     return AttributeList::get(C,
-            FnAttrs,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {}),
             None);
 }
@@ -611,20 +658,53 @@ static AttributeList get_attrs_basic(LLVMContext &C)
                 None);
 }
 
-static AttributeList get_attrs_sext(LLVMContext &C)
+static AttributeList get_attrs_box_float(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::SExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None);
 }
 
-static AttributeList get_attrs_zext(LLVMContext &C)
+static AttributeList get_attrs_box_sext(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addAttribute(Attribute::getWithDereferenceableBytes(C, nbytes));
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::ZExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::SExt)}));
+}
+
+static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes)
+{
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
+    return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::ZExt)}));
 }
 
 
@@ -632,25 +712,25 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
     XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jlexe_var = new JuliaVariable{
     XSTR(jl_exe_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldll_var = new JuliaVariable{
     XSTR(jl_libjulia_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
 static const auto jldlli_var = new JuliaVariable{
     XSTR(jl_libjulia_internal_handle),
     true,
-    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+    [](Type *T_size) -> Type * { return getPointerTy(T_size->getContext()); },
 };
-static const auto jlsmall_typeof_var = new JuliaVariable{
-    XSTR(small_typeof),
+static const auto jl_small_typeof_var = new JuliaVariable{
+    XSTR(jl_small_typeof),
     true,
     [](Type *T_size) -> Type * { return getInt8Ty(T_size->getContext()); },
 };
@@ -718,25 +798,40 @@ static const auto jlthrow_func = new JuliaFunction<>{
 static const auto jlerror_func = new JuliaFunction<>{
     XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
+    get_attrs_noreturn,
+};
+static const auto jlargumenterror_func = new JuliaFunction<>{
+    XSTR(jl_argument_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlatomicerror_func = new JuliaFunction<>{
     XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+            {getPointerTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jltypeerror_func = new JuliaFunction<>{
     XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {getPointerTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlundefvarerror_func = new JuliaFunction<>{
     XSTR(jl_undefined_var_error),
+    [](LLVMContext &C) {
+        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted);
+        return FunctionType::get(getVoidTy(C), {T, T}, false);
+    },
+    get_attrs_noreturn,
+};
+static const auto jlhasnofield_func = new JuliaFunction<>{
+    XSTR(jl_has_no_field_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted),
+             PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserrorv_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -772,12 +867,58 @@ static const auto jlcheckassign_func = new JuliaFunction<>{
             {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction<>{
-    XSTR(jl_declare_constant),
+static const auto jlcheckreplace_func = new JuliaFunction<>{
+    XSTR(jl_checked_replace),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jlcheckmodify_func = new JuliaFunction<>{
+    XSTR(jl_checked_modify),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jlcheckswap_func = new JuliaFunction<>{
+    XSTR(jl_checked_swap),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    nullptr,
+};
+static const auto jlcheckassignonce_func = new JuliaFunction<>{
+    XSTR(jl_checked_assignonce),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    nullptr,
+};
+static const auto jldeclareconstval_func = new JuliaFunction<>{
+    XSTR(jl_declare_constant_val),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, T_prjlvalue}, false); },
+    nullptr,
+};
+static const auto jldeclareglobal_func = new JuliaFunction<>{
+    XSTR(jl_declare_global),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(getVoidTy(C),
-            {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
+            {T_pjlvalue, T_pjlvalue, T_prjlvalue}, false); },
     nullptr,
 };
 static const auto jlgetbindingorerror_func = new JuliaFunction<>{
@@ -794,7 +935,17 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
+    },
+    nullptr,
+};
+static const auto jlgetbindingvalue_func = new JuliaFunction<>{
+    XSTR(jl_reresolve_binding_value_seqcst),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -803,7 +954,7 @@ static const auto jlboundp_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(getInt32Ty(C),
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
     },
     nullptr,
 };
@@ -845,6 +996,20 @@ static const auto jlinvoke_func = new JuliaFunction<>{
             {AttributeSet(),
              Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
 };
+static const auto jlinvokeoc_func = new JuliaFunction<>{
+    XSTR(jl_invoke_oc),
+    get_func2_sig,
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            Attributes(C, {Attribute::NonNull}),
+            {AttributeSet(),
+             Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
+};
+static const auto jlopaque_closure_call_func = new JuliaFunction<>{
+    XSTR(jl_f_opaque_closure_call),
+    get_func_sig,
+    get_func_attrs,
+};
 static const auto jlmethod_func = new JuliaFunction<>{
     XSTR(jl_method_def),
     [](LLVMContext &C) {
@@ -857,13 +1022,12 @@ static const auto jlmethod_func = new JuliaFunction<>{
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction<>{
-    XSTR(jl_generic_function_def),
+    XSTR(jl_declare_const_gf),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue}, false);
+        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -885,32 +1049,84 @@ static const auto jlunlockvalue_func = new JuliaFunction<>{
             AttributeSet(),
             {Attributes(C, {Attribute::NoCapture})}); },
 };
+static const auto jllockfield_func = new JuliaFunction<>{
+    XSTR(jl_lock_field),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Loaded)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
+static const auto jlunlockfield_func = new JuliaFunction<>{
+    XSTR(jl_unlock_field),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Loaded)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
 static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt8PtrTy(C)}, false); },
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jl_current_exception_func = new JuliaFunction<>{
     XSTR(jl_current_exception),
-    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_pjlvalue_ty(C)}, false); },
     nullptr,
 };
 static const auto jlleave_func = new JuliaFunction<>{
     XSTR(jl_pop_handler),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getInt32Ty(C)}, false); },
-    nullptr,
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
+};
+static const auto jlleave_noexcept_func = new JuliaFunction<>{
+    XSTR(jl_pop_handler_noexcept),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, getInt32Ty(C)}, false); },
+    [](LLVMContext &C) {
+            auto FnAttrs = AttrBuilder(C);
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            FnAttrs.addAttribute(Attribute::NoUnwind);
+            auto RetAttrs = AttrBuilder(C);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None);
+        },
 };
 static const auto jl_restore_excstack_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_restore_excstack),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
-            {T_size}, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_size}, false); },
     nullptr,
 };
 static const auto jl_excstack_state_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_excstack_state),
-    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size, false); },
+    [](LLVMContext &C, Type *T_size) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_size, {T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -918,26 +1134,55 @@ static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
     [](LLVMContext &C, Type *T_size) {
         Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
         return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
-            AttributeSet(),
-            None); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None); },
 };
 static const auto jl_alloc_obj_func = new JuliaFunction<TypeFnContextAndSizeT>{
     "julia.gc_alloc_obj",
     [](LLVMContext &C, Type *T_size) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         return FunctionType::get(T_prjlvalue,
-                {T_ppjlvalue, T_size, T_prjlvalue}, false);
+                {T_pjlvalue, T_size, T_prjlvalue}, false);
     },
     [](LLVMContext &C) {
         auto FnAttrs = AttrBuilder(C);
         FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes
-#if JL_LLVM_VERSION >= 150000
-        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned);
-#endif
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef));
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        auto RetAttrs = AttrBuilder(C);
+        RetAttrs.addAttribute(Attribute::NoAlias);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet::get(C, RetAttrs),
+            None);
+    },
+};
+static const auto jl_alloc_genericmemory_unchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_alloc_genericmemory_unchecked),
+    [](LLVMContext &C, Type *T_size) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue, T_size, T_pjlvalue}, false);
+    },
+    [](LLVMContext &C) {
+        auto FnAttrs = AttrBuilder(C);
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef));
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
         auto RetAttrs = AttrBuilder(C);
         RetAttrs.addAttribute(Attribute::NoAlias);
         RetAttrs.addAttribute(Attribute::NonNull);
@@ -952,7 +1197,7 @@ static const auto jl_newbits_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, getInt8PtrTy(C)}, false);
+                {T_prjlvalue, getPointerTy(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
@@ -969,27 +1214,31 @@ static const auto jl_typeof_func = new JuliaFunction<>{
         return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue}, false);
     },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadNone, Attribute::NoUnwind, Attribute::NoRecurse}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
-    "julia.loopinfo_marker",
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
-            AttributeSet(),
-            None); },
-};
+
 static const auto jl_write_barrier_func = new JuliaFunction<>{
     "julia.write_barrier",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {JuliaType::get_prjlvalue_ty(C)}, true); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
-            {Attributes(C, {Attribute::ReadOnly})}); },
+            {Attributes(C, {Attribute::ReadOnly})});
+    },
 };
 
 static const auto jlisa_func = new JuliaFunction<>{
@@ -1024,7 +1273,7 @@ static const auto jlapplytype_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         return AttributeList::get(C,
             AttributeSet(),
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::NonNull),
+            AttributeSet::get(C, ArrayRef<Attribute>({Attribute::get(C, Attribute::NonNull),
                                                Attribute::getWithAlignment(C, Align(16))})),
             None);
     },
@@ -1032,7 +1281,7 @@ static const auto jlapplytype_func = new JuliaFunction<>{
 static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_object_id_),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size,
-            {JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
+            {T_size, PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
     nullptr,
 };
 static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
@@ -1040,9 +1289,9 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
     [](LLVMContext &C, const Triple &T) {
         if (T.isOSWindows())
             return FunctionType::get(getInt32Ty(C),
-                {getInt8PtrTy(C)}, false);
+                {getPointerTy(C)}, false);
         return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt32Ty(C)}, false);
+            {getPointerTy(C), getInt32Ty(C)}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
@@ -1052,9 +1301,13 @@ static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
 static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(memcmp),
     [](LLVMContext &C, Type *T_size) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), T_size}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
+            {getPointerTy(C), getPointerTy(C), T_size}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref));
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
             None); },
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
@@ -1062,13 +1315,13 @@ static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
 static const auto jldlsym_func = new JuliaFunction<>{
     XSTR(jl_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
+            {getPointerTy(C), getPointerTy(C), PointerType::get(getPointerTy(C), 0)}, false); },
     nullptr,
 };
 static const auto jllazydlsym_func = new JuliaFunction<>{
     XSTR(jl_lazy_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
-            {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
+            {JuliaType::get_prjlvalue_ty(C), getPointerTy(C)}, false); },
     nullptr,
 };
 static const auto jltypeassert_func = new JuliaFunction<>{
@@ -1092,6 +1345,23 @@ static const auto jlgetnthfieldchecked_func = new JuliaFunction<TypeFnContextAnd
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto jlfieldindex_func = new JuliaFunction<>{
+    XSTR(jl_field_index),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_prjlvalue, getInt32Ty(C)}, false);
+    },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::readOnly());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet(),
+            None); }, // This function can error if the third argument is 1 so don't do that.
+};
 static const auto jlfieldisdefinedchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_field_isdefined_checked),
     [](LLVMContext &C, Type *T_size) {
@@ -1116,9 +1386,9 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
             {
                 T_prjlvalue, // f (object)
                 T_pjlvalue, // result
-                getInt8PtrTy(C), // cache
+                getPointerTy(C), // cache
                 T_pjlvalue, // fill
-                FunctionType::get(getInt8PtrTy(C), { getInt8PtrTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
+                FunctionType::get(getPointerTy(C), { getPointerTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
                 T_pjlvalue, // env
                 T_pprjlvalue, // vals
             }, false);
@@ -1139,34 +1409,43 @@ static const auto sync_gc_total_bytes_func = new JuliaFunction<>{
             {getInt64Ty(C)}, false); },
     nullptr,
 };
-static const auto jlarray_data_owner_func = new JuliaFunction<>{
-    XSTR(jl_array_data_owner),
-    [](LLVMContext &C) {
+static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_alloc_genericmemory),
+    [](LLVMContext &C, Type *T_Size) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
-        return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue}, false);
-    },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}),
-            Attributes(C, {Attribute::NonNull}),
-            None); },
+        return FunctionType::get(T_prjlvalue, // new Memory
+                                {T_prjlvalue, // type
+                                T_Size        // nelements
+                                }, false); },
+        [](LLVMContext &C) {
+            AttrBuilder FnAttrs(C);
+            AttrBuilder RetAttrs(C);
+            FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef) | MemoryEffects::argMemOnly(ModRefInfo::Ref));
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            RetAttrs.addAlignmentAttr(Align(16));
+            RetAttrs.addAttribute(Attribute::NonNull);
+            RetAttrs.addDereferenceableAttr(16);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None); },
 };
-#define BOX_FUNC(ct,at,attrs)                                                    \
+#define BOX_FUNC(ct,at,attrs,nbytes)                                                    \
 static const auto box_##ct##_func = new JuliaFunction<>{                           \
     XSTR(jl_box_##ct),                                                           \
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C),\
             {at}, false); },                                                     \
-    attrs,                                                                       \
-}
-BOX_FUNC(int16, getInt16Ty(C), get_attrs_sext);
-BOX_FUNC(uint16, getInt16Ty(C), get_attrs_zext);
-BOX_FUNC(int32, getInt32Ty(C), get_attrs_sext);
-BOX_FUNC(uint32, getInt32Ty(C), get_attrs_zext);
-BOX_FUNC(int64, getInt64Ty(C), get_attrs_sext);
-BOX_FUNC(uint64, getInt64Ty(C), get_attrs_zext);
-BOX_FUNC(char, getCharTy(C), get_attrs_zext);
-BOX_FUNC(float32, getFloatTy(C), get_attrs_basic);
-BOX_FUNC(float64, getDoubleTy(C), get_attrs_basic);
+    [](LLVMContext &C) { return attrs(C,nbytes); },                                                                \
+}
+BOX_FUNC(int16, getInt16Ty(C), get_attrs_box_sext, 2);
+BOX_FUNC(uint16, getInt16Ty(C), get_attrs_box_zext, 2);
+BOX_FUNC(int32, getInt32Ty(C), get_attrs_box_sext, 4);
+BOX_FUNC(uint32, getInt32Ty(C), get_attrs_box_zext, 4);
+BOX_FUNC(int64, getInt64Ty(C), get_attrs_box_sext, 8);
+BOX_FUNC(uint64, getInt64Ty(C), get_attrs_box_zext, 8);
+BOX_FUNC(char, getCharTy(C), get_attrs_box_zext, 1);
+BOX_FUNC(float32, getFloatTy(C), get_attrs_box_float, 4);
+BOX_FUNC(float64, getDoubleTy(C), get_attrs_box_float, 8);
 #undef BOX_FUNC
 
 static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
@@ -1178,6 +1457,12 @@ static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
     },
     get_attrs_basic,
 };
+static const auto jlgetbuiltinfptr_func = new JuliaFunction<>{
+    XSTR(jl_get_builtin_fptr),
+    [](LLVMContext &C) { return FunctionType::get(get_func_sig(C)->getPointerTo(),
+            {JuliaType::get_prjlvalue_ty(C)}, false); },
+    nullptr,
+};
 
 
 // placeholder functions
@@ -1198,9 +1483,12 @@ static const auto gc_preserve_end_func = new JuliaFunction<> {
 };
 static const auto except_enter_func = new JuliaFunction<>{
     "julia.except_enter",
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
+    [](LLVMContext &C) {
+         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+         auto RT = StructType::get(getInt32Ty(C), getPointerTy(C));
+         return FunctionType::get(RT, {T_pjlvalue}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReturnsTwice)})),
+            Attributes(C, {Attribute::ReturnsTwice}),
             AttributeSet(),
             None); },
 };
@@ -1208,11 +1496,44 @@ static const auto pointer_from_objref_func = new JuliaFunction<>{
     "julia.pointer_from_objref",
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pjlvalue_ty(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReadNone), Attribute::get(C, Attribute::NoUnwind)})),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::Speculatable);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        FnAttrs.addAttribute(Attribute::NoSync);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto gc_loaded_func = new JuliaFunction<>{
+    "julia.gc_loaded",
+    // # memory(none) nosync nounwind speculatable willreturn norecurse
+    // declare nonnull noundef ptr(Loaded) @"julia.gc_loaded"(ptr(Tracked) nocapture nonnull noundef readnone, ptr nonnull noundef readnone)
+    //  top:
+    //   %metadata GC base pointer is ptr(Tracked)
+    //   ret addrspacecast ptr to ptr(Loaded)
+    [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_prjlvalue_ty(C), AddressSpace::Loaded),
+            {JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_prjlvalue_ty(C), 0)}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+        FnAttrs.addAttribute(Attribute::NoSync);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::Speculatable);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+        AttrBuilder RetAttrs(C);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        RetAttrs.addAttribute(Attribute::NoUndef);
+        return AttributeList::get(C, AttributeSet::get(C,FnAttrs), AttributeSet::get(C,RetAttrs),
+                { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
+                  Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) });
+                  },
+};
 
 // julia.call represents a call with julia calling convention, it is used as
 //
@@ -1250,9 +1571,25 @@ static const auto julia_call2 = new JuliaFunction<>{
     get_attrs_basic,
 };
 
+// julia.call3 is like julia.call, except that %fptr is derived rather than tracked
+static const auto julia_call3 = new JuliaFunction<>{
+    "julia.call3",
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
+        return FunctionType::get(T_prjlvalue,
+            {get_func3_sig(C)->getPointerTo(),
+             T}, // %f
+            true); }, // %args
+    get_attrs_basic,
+};
+
+
 static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs};
+
 static const auto &builtin_func_map() {
-    static std::map<jl_fptr_args_t, JuliaFunction<>*> builtins = {
+    static auto builtins = new DenseMap<jl_fptr_args_t, JuliaFunction<>*> {
           { jl_f_is_addr,                 new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
           { jl_f_typeof_addr,             new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
           { jl_f_sizeof_addr,             new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
@@ -1266,6 +1603,7 @@ static const auto &builtin_func_map() {
           { jl_f__call_in_world_addr,     new JuliaFunction<>{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
           { jl_f__call_in_world_total_addr, new JuliaFunction<>{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
           { jl_f_throw_addr,              new JuliaFunction<>{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+          { jl_f_throw_methoderror_addr,  new JuliaFunction<>{XSTR(jl_f_throw_methoderror), get_func_sig, get_func_attrs} },
           { jl_f_tuple_addr,              jltuple_func },
           { jl_f_svec_addr,               new JuliaFunction<>{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
           { jl_f_applicable_addr,         new JuliaFunction<>{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
@@ -1279,17 +1617,34 @@ static const auto &builtin_func_map() {
           { jl_f_nfields_addr,            new JuliaFunction<>{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
           { jl_f__expr_addr,              new JuliaFunction<>{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
           { jl_f__typevar_addr,           new JuliaFunction<>{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
-          { jl_f_arrayref_addr,           new JuliaFunction<>{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_const_arrayref_addr,     new JuliaFunction<>{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_arrayset_addr,           new JuliaFunction<>{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
-          { jl_f_arraysize_addr,          new JuliaFunction<>{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
+          { jl_f_memorynew_addr,          new JuliaFunction<>{XSTR(jl_f_memorynew), get_func_sig, get_func_attrs} },
+          { jl_f_memoryref_addr,          new JuliaFunction<>{XSTR(jl_f_memoryref), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefoffset_addr,    new JuliaFunction<>{XSTR(jl_f_memoryrefoffset), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefset_addr,       new JuliaFunction<>{XSTR(jl_f_memoryrefset), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefswap_addr,      new JuliaFunction<>{XSTR(jl_f_memoryrefswap), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefreplace_addr,   new JuliaFunction<>{XSTR(jl_f_memoryrefreplace), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefmodify_addr,    new JuliaFunction<>{XSTR(jl_f_memoryrefmodify), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefsetonce_addr,   new JuliaFunction<>{XSTR(jl_f_memoryrefsetonce), get_func_sig, get_func_attrs} },
+          { jl_f_memoryref_isassigned_addr,new JuliaFunction<>{XSTR(jl_f_memoryref_isassigned), get_func_sig, get_func_attrs} },
           { jl_f_apply_type_addr,         new JuliaFunction<>{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
           { jl_f_donotdelete_addr,        new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
           { jl_f_compilerbarrier_addr,    new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
           { jl_f_finalizer_addr,          new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
-          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
+          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} },
+          { jl_f_current_scope_addr,      new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} },
         };
-    return builtins;
+    return *builtins;
+}
+
+static const auto &may_dispatch_builtins() {
+    static auto builtins = new DenseSet<jl_fptr_args_t>(
+        {jl_f__apply_iterate_addr,
+        jl_f__apply_pure_addr,
+        jl_f__call_in_world_addr,
+        jl_f__call_in_world_total_addr,
+        jl_f__call_latest_addr,
+        });
+    return *builtins;
 }
 
 static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
@@ -1297,19 +1652,6 @@ static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(j
 static _Atomic(uint64_t) globalUniqueGeneratedNames{1};
 
 // --- code generation ---
-extern "C" {
-    jl_cgparams_t jl_default_cgparams = {1, 1, 0,
-#ifdef _OS_WINDOWS_
-        0,
-#else
-        1,
-#endif
-        (int) DICompileUnit::DebugEmissionKind::FullDebug,
-        1,
-        1,
-        jl_rettype_inferred_addr, NULL };
-}
-
 
 static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
     jt = jl_unwrap_unionall(jt);
@@ -1320,6 +1662,8 @@ static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
         return tbaa_cache.tbaa_value;
     if (jl_is_abstracttype(jt))
         return tbaa_cache.tbaa_value;
+    if (jl_is_genericmemory_type(jt) || jl_is_array_type(jt))
+        return tbaa_cache.tbaa_array;
     // If we're here, we know all subtypes are (im)mutable, even if we
     // don't know what the exact type is
     return jl_is_mutable(jt) ? tbaa_cache.tbaa_mutab : tbaa_cache.tbaa_immut;
@@ -1429,21 +1773,32 @@ struct jl_aliasinfo_t {
 };
 
 // metadata tracking for a llvm Value* during codegen
+const uint8_t UNION_BOX_MARKER = 0x80;
 struct jl_cgval_t {
     Value *V; // may be of type T* or T, or set to NULL if ghost (or if the value has not been initialized yet, for a variable definition)
     // For unions, we may need to keep a reference to the boxed part individually.
     // If this is non-NULL, then, at runtime, we satisfy the invariant that (for the corresponding
-    // runtime values) if `(TIndex | 0x80) != 0`, then `Vboxed == V` (by value).
+    // runtime values) if `(TIndex | UNION_BOX_MARKER) != 0`, then `Vboxed == V` (by value).
     // For convenience, we also set this value of isboxed values, in which case
     // it is equal (at compile time) to V.
-    // If this is non-NULL, it is always of type `T_prjlvalue`
+
+    // If this is non-NULL (at compile time), it is always of type `T_prjlvalue`.
+    // N.B.: In general we expect this to always be a dereferenceable pointer at runtime.
+    //       However, there are situations where this value may be a runtime NULL
+    //       (PhiNodes with undef predecessors or PhiC with undef UpsilonNode).
+    //       The middle-end arranges appropriate error checks before any use
+    //       of this value that may read a non-dereferenceable Vboxed, with two
+    //       exceptions: PhiNode and UpsilonNode arguments which need special
+    //       handling to account for the possibility that this may be NULL.
     Value *Vboxed;
+
     Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8
+    SmallVector<Value*,0> inline_roots; // if present, `V` is a pointer, but not in canonical layout
     jl_value_t *constant; // constant value (rooted in linfo.def.roots)
-    jl_value_t *typ; // the original type of V, never NULL
+    jl_value_t *typ; // the original type of V, never nullptr
     bool isboxed; // whether this value is a jl_value_t* allocated on the heap with the right type tag
     bool isghost; // whether this value is "ghost"
-    MDNode *tbaa; // The related tbaa node. Non-NULL iff this holds an address.
+    MDNode *tbaa; // The related tbaa node. Non-nullptr iff this holds an address.
     // If non-null, this memory location may be promoted on use, by hoisting the
     // destination memory above the promotion point.
     Instruction *promotion_point;
@@ -1454,13 +1809,15 @@ struct jl_cgval_t {
     bool ispointer() const
     {
         // whether this value is compatible with `data_pointer`
+        assert(inline_roots.empty());
         return tbaa != nullptr;
     }
     jl_cgval_t(Value *Vval, jl_value_t *typ, Value *tindex) : // general value constructor
-        V(Vval), // V is allowed to be NULL in a jl_varinfo_t context, but not during codegen contexts
+        V(Vval), // V is allowed to be nullptr in a jl_varinfo_t context, but not during codegen contexts
         Vboxed(nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(),
+        constant(nullptr),
         typ(typ),
         isboxed(false),
         isghost(false),
@@ -1468,13 +1825,15 @@ struct jl_cgval_t {
         promotion_point(nullptr),
         promotion_ssa(-1)
     {
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
-    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa) : // general pointer constructor
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete;
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots) : // general pointer constructor
         V(Vptr),
         Vboxed(isboxed ? Vptr : nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(inline_roots),
+        constant(nullptr),
         typ(typ),
         isboxed(isboxed),
         isghost(false),
@@ -1484,15 +1843,16 @@ struct jl_cgval_t {
     {
         if (Vboxed)
             assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext()));
-        assert(tbaa != NULL);
-        assert(!(isboxed && TIndex != NULL));
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(tbaa != nullptr);
+        assert(!(isboxed && TIndex != nullptr));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
     explicit jl_cgval_t(jl_value_t *typ) : // ghost value constructor
-        // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t() instead)
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
+        // mark explicit to avoid being used implicitly for conversion from nullptr (use jl_cgval_t() instead)
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
         constant(((jl_datatype_t*)typ)->instance),
         typ(typ),
         isboxed(false),
@@ -1508,6 +1868,7 @@ struct jl_cgval_t {
         V(v.V),
         Vboxed(v.Vboxed),
         TIndex(tindex),
+        inline_roots(v.inline_roots),
         constant(v.constant),
         typ(typ),
         isboxed(v.isboxed),
@@ -1521,17 +1882,18 @@ struct jl_cgval_t {
         // this constructor expects we had a badly or equivalently typed version
         // make sure we aren't discarding the actual type information
         if (v.TIndex) {
-            assert((TIndex == NULL) == jl_is_concrete_type(typ));
+            assert((TIndex == nullptr) == jl_is_concrete_type(typ));
         }
         else {
             assert(isboxed || v.typ == typ || tindex);
         }
     }
     explicit jl_cgval_t() : // undef / unreachable constructor
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
-        constant(NULL),
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
+        constant(nullptr),
         typ(jl_bottom_type),
         isboxed(false),
         isghost(true),
@@ -1547,6 +1909,7 @@ struct jl_varinfo_t {
     Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked ctx.tbaa().tbaa_const, if appropriate)
     jl_cgval_t value; // a stack slot or constant value
     Value *pTIndex; // i8* stack slot for the value.TIndex tag describing `value.V`
+    AllocaInst *inline_roots; // stack roots for the inline_roots array, if needed
     DILocalVariable *dinfo;
     // if the variable might be used undefined and is not boxed
     // this i1 flag is true when it is defined
@@ -1557,11 +1920,12 @@ struct jl_varinfo_t {
     bool usedUndef;
     bool used;
 
-    jl_varinfo_t(LLVMContext &ctxt) : boxroot(NULL),
+    jl_varinfo_t(LLVMContext &ctxt) : boxroot(nullptr),
                      value(jl_cgval_t()),
-                     pTIndex(NULL),
-                     dinfo(NULL),
-                     defFlag(NULL),
+                     pTIndex(nullptr),
+                     inline_roots(nullptr),
+                     dinfo(nullptr),
+                     defFlag(nullptr),
                      isSA(false),
                      isVolatile(false),
                      isArgument(false),
@@ -1578,17 +1942,16 @@ class jl_codectx_t {
     IRBuilder<> builder;
     jl_codegen_params_t &emission_context;
     llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
-    std::map<void*, GlobalVariable*> &global_targets;
-    std::map<std::tuple<jl_code_instance_t*, bool>, GlobalVariable*> &external_calls;
     Function *f = NULL;
+    MDNode* LoopID = NULL;
     // local var info. globals are not in here.
-    std::vector<jl_varinfo_t> slots;
+    SmallVector<jl_varinfo_t, 0> slots;
     std::map<int, jl_varinfo_t> phic_slots;
-    std::vector<jl_cgval_t> SAvalues;
-    std::vector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>> PhiNodes;
-    std::vector<bool> ssavalue_assigned;
-    std::vector<int> ssavalue_usecount;
-    std::vector<orc::ThreadSafeModule> oc_modules;
+    std::map<int, std::pair<Value*, Value*> > scope_restore;
+    SmallVector<jl_cgval_t, 0> SAvalues;
+    SmallVector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, SmallVector<PHINode*,0>, jl_value_t *>, 0> PhiNodes;
+    SmallVector<bool, 0> ssavalue_assigned;
+    SmallVector<int, 0> ssavalue_usecount;
     jl_module_t *module = NULL;
     jl_typecache_t type_cache;
     jl_tbaacache_t tbaa_cache;
@@ -1597,10 +1960,11 @@ class jl_codectx_t {
     jl_value_t *rettype = NULL;
     jl_code_info_t *source = NULL;
     jl_array_t *code = NULL;
-    size_t world = 0;
+    size_t min_world = 0;
+    size_t max_world = -1;
     const char *name = NULL;
     StringRef file{};
-    ssize_t *line = NULL;
+    int32_t line = -1;
     Value *spvals_ptr = NULL;
     Value *argArray = NULL;
     Value *argCount = NULL;
@@ -1613,23 +1977,27 @@ class jl_codectx_t {
 
     Value *pgcstack = NULL;
     Instruction *topalloca = NULL;
+    Value *world_age_at_entry = NULL;
 
     bool use_cache = false;
     bool external_linkage = false;
     const jl_cgparams_t *params = NULL;
 
-    std::vector<std::unique_ptr<Module>> llvmcall_modules;
+    SmallVector<std::unique_ptr<Module>, 0> llvmcall_modules;
 
-    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params)
+    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params, size_t min_world, size_t max_world)
       : builder(llvmctx),
         emission_context(params),
         call_targets(),
-        global_targets(params.globals),
-        external_calls(params.external_fns),
-        world(params.world),
+        min_world(min_world),
+        max_world(max_world),
         use_cache(params.cache),
         external_linkage(params.external_linkage),
-        params(params.params) { }
+        params(params.params) {
+    }
+
+    jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params, jl_code_instance_t *ci) :
+        jl_codectx_t(llvmctx, params, jl_atomic_load_relaxed(&ci->min_world), jl_atomic_load_relaxed(&ci->max_world)) {}
 
     jl_typecache_t &types() {
         type_cache.initialize(builder.getContext(), emission_context.DL);
@@ -1729,35 +2097,215 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg);
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg,
+        ArrayRef<const char*> ArgNames=None, unsigned nreq=0);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign);
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa);
+                                     jl_binding_t **pbnd, bool assign, bool alloc);
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
-static Value *get_last_age_field(jl_codectx_t &ctx);
+static Value *get_tls_world_age(jl_codectx_t &ctx);
+static Value *get_scope_field(jl_codectx_t &ctx);
+static Value *get_tls_world_age_field(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+                             ArrayRef<jl_cgval_t> args, size_t nargs, JuliaFunction<> *trampoline);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+                             ArrayRef<jl_cgval_t> args, size_t nargs, JuliaFunction<> *trampoline);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
-static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable=false);
-static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, ArrayRef<jl_cgval_t> argv, bool is_promotable=false);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt, Value *age_ok);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
-static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
+static unsigned julia_alignment(jl_value_t *jt);
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile);
+
+static void print_stack_crumbs(jl_codectx_t &ctx)
+{
+    errs() << "\n";
+    errs() << "Stacktrace:\n";
+    jl_method_instance_t *caller = ctx.linfo;
+    jl_((jl_value_t*)caller);
+    errs() << "In " << ctx.file << ":" << ctx.line << "\n";
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+            if (jl_is_method_instance(caller)) {
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    errs() << std::get<0>(frame) << "<inlined> \n";
+                    errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+                }
+                auto &frame = std::get<CallFrames>(it->second).front();
+                jl_((jl_value_t*)caller);
+                errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+            }
+        }
+        else
+            break;
+    }
+    abort();
+}
 
-static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
-{
-    return G->realize(M);
+static jl_value_t *StackFrame(
+        jl_value_t *linfo,
+        std::string fn_name,
+        std::string filepath,
+        int32_t lineno,
+        jl_value_t *inlined)
+{
+    jl_value_t *StackFrame = jl_get_global(jl_base_module, jl_symbol("StackFrame"));
+    assert(StackFrame != nullptr);
+
+    jl_value_t *args[7] = {
+        /* func */ (jl_value_t *)jl_symbol(fn_name.c_str()),
+        /* line */ (jl_value_t *)jl_symbol(filepath.c_str()),
+        /* line */ jl_box_int32(lineno),
+        /* linfo */ (jl_value_t *)linfo,
+        /* from_c */ jl_false,
+        /* inlined */ inlined,
+        /* pointer */ jl_box_uint64(0)
+    };
+
+    jl_value_t *frame = nullptr;
+    JL_TRY {
+        frame = jl_apply_generic(StackFrame, args, 7);
+    } JL_CATCH {
+        jl_safe_printf("Error creating stack frame\n");
+    }
+    return frame;
 }
 
-template<typename TypeFn_t>
+static void push_frames(jl_codectx_t &ctx, jl_method_instance_t *caller, jl_method_instance_t *callee)
+{
+    CallFrames frames;
+    auto it = ctx.emission_context.enqueuers.find(callee);
+    if (it != ctx.emission_context.enqueuers.end())
+        return;
+    auto DL = ctx.builder.getCurrentDebugLocation();
+    if (caller == nullptr || !DL) { // Used in various places
+        frames.push_back({ctx.funcName, "", 0});
+        ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+        return;
+    }
+    auto filename = std::string(DL->getFilename());
+    auto line = DL->getLine();
+    auto fname = std::string(DL->getScope()->getSubprogram()->getName());
+    frames.push_back({fname, filename, line});
+    auto DI = DL.getInlinedAt();
+    while (DI) {
+        auto filename = std::string(DI->getFilename());
+        auto line = DI->getLine();
+        auto fname = std::string(DI->getScope()->getSubprogram()->getName());
+        frames.push_back({fname, filename, line});
+        DI = DI->getInlinedAt();
+    }
+    ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+}
+
+static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT
+{
+    static intptr_t counter = 5;
+    jl_method_instance_t *caller = (jl_method_instance_t*)counter; //nothing serves as a sentinel for the bottom for the stack
+    push_frames(ctx, ctx.linfo, (jl_method_instance_t*)caller);
+    counter++;
+    jl_array_t *out = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH1(&out);
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+
+            // assert(ctx.emission_context.enqueuers.count(caller) == 1);
+            // Each enqueuer should only be enqueued at least once and only once. Check why this assert is triggering
+            // This isn't a fatal error, just means that we may get a wrong backtrace
+            if (jl_is_method_instance(caller)) {
+                //TODO: Use a subrange when C++20 is a thing
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    jl_value_t *stackframe = StackFrame(jl_nothing, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_true);
+                    if (stackframe == nullptr)
+                        print_stack_crumbs(ctx);
+                    jl_array_ptr_1d_push(out, stackframe);
+                }
+                auto &frame = std::get<CallFrames>(it->second).back();
+                jl_value_t *stackframe = StackFrame((jl_value_t *)caller, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_false);
+                if (stackframe == nullptr)
+                    print_stack_crumbs(ctx);
+                jl_array_ptr_1d_push(out, stackframe);
+            }
+        }
+        else
+            break;
+    }
+    JL_GC_POP();
+    return out;
+}
+
+static void print_stacktrace(jl_codectx_t &ctx, int trim)
+{
+    jl_task_t *ct = jl_get_current_task();
+    assert(ct);
+
+    // Temporarily operate in the current age
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
+    jl_array_t* bt = build_stack_crumbs(ctx);
+    JL_GC_PUSH1(&bt);
+
+    // Call `reinit_stdio` to get TTY IO objects (w/ color)
+    jl_value_t *reinit_stdio = jl_get_global(jl_base_module, jl_symbol("_reinit_stdio"));
+    assert(reinit_stdio);
+    jl_apply_generic(reinit_stdio, nullptr, 0);
+
+    // Show the backtrace
+    jl_value_t *show_backtrace = jl_get_global(jl_base_module, jl_symbol("show_backtrace"));
+    jl_value_t *base_stderr = jl_get_global(jl_base_module, jl_symbol("stderr"));
+    assert(show_backtrace && base_stderr);
+
+    JL_TRY {
+        jl_value_t *args[2] = { base_stderr, (jl_value_t *)bt };
+        jl_apply_generic(show_backtrace, args, 2);
+    } JL_CATCH {
+        jl_printf(JL_STDERR,"Error showing backtrace\n");
+        print_stack_crumbs(ctx);
+    }
+
+    jl_printf(JL_STDERR, "\n\n");
+    JL_GC_POP();
+    ct->world_age = last_age;
+
+    if (trim == JL_TRIM_SAFE) {
+        jl_printf(JL_STDERR,"Aborting compilation due to finding a dynamic dispatch");
+        exit(1);
+    }
+    return;
+}
+
+static int trim_may_error(int trim)
+{
+    return (trim == JL_TRIM_SAFE) || (trim == JL_TRIM_UNSAFE_WARN);
+}
+
+static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
+{
+    return G->realize(M);
+}
+
+template<typename TypeFn_t>
 static Function *prepare_call_in(Module *M, JuliaFunction<TypeFn_t> *G)
 {
     return G->realize(M);
@@ -1771,53 +2319,90 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
     if (!local) {
         // Copy the GlobalVariable, but without the initializer, so it becomes a declaration
         GlobalVariable *proto = new GlobalVariable(*M, G->getValueType(),
-                G->isConstant(), GlobalVariable::ExternalLinkage,
+                G->isConstant(), G->getLinkage(),
                 nullptr, G->getName(), nullptr, G->getThreadLocalMode());
+        if (proto->hasLocalLinkage()) {
+            proto->setInitializer(G->getInitializer());
+        }
         proto->copyAttributesFrom(G);
-        // DLLImport only needs to be set for the shadow module
-        // it just gets annoying in the JIT
-        proto->setDLLStorageClass(GlobalValue::DefaultStorageClass);
         return proto;
     }
     return cast<GlobalVariable>(local);
 }
 
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, size_t byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), base, byte_offset);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), base, byte_offset, Name);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
 
 // --- convenience functions for tagging llvm values with julia types ---
 
-static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, StringRef name, Module &M)
+static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, Align align, const Twine &name, Module &M)
 {
     GlobalVariable *&gv = emission_context.mergedConstants[val];
-    StringRef localname;
-    std::string ssno;
-    if (gv == nullptr) {
-        raw_string_ostream(ssno) << name << emission_context.mergedConstants.size();
-        localname = StringRef(ssno);
-    }
-    else {
-        localname = gv->getName();
-        if (gv->getParent() != &M)
-            gv = cast_or_null<GlobalVariable>(M.getNamedValue(localname));
-    }
-    if (gv == nullptr) {
-        gv = new GlobalVariable(
+    auto get_gv = [&](const Twine &name) {
+        auto gv = new GlobalVariable(
                 M,
                 val->getType(),
                 true,
                 GlobalVariable::PrivateLinkage,
                 val,
-                localname);
+                name);
         gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+        gv->setAlignment(align);
+        return gv;
+    };
+    if (gv == nullptr) {
+        gv = get_gv(name + "#" + Twine(emission_context.mergedConstants.size()));
+    }
+    else if (gv->getParent() != &M) {
+        StringRef gvname = gv->getName();
+        gv = M.getNamedGlobal(gvname);
+        if (!gv) {
+            gv = get_gv(gvname);
+        }
     }
-    assert(localname == gv->getName());
+    assert(gv->getName().starts_with(name.str()));
     assert(val == gv->getInitializer());
     return gv;
 }
 
-static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align)
 {
     ++EmittedAllocas;
-    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca);
+    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca);
+}
+
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, unsigned nb, Align align)
+{
+    // Stupid hack: SROA takes hints from the element type, and will happily split this allocation into lots of unaligned bits
+    // if it cannot find something better to do, which is terrible for performance.
+    // However, if we emit this with an element size equal to the alignment, it will instead split it into aligned chunks
+    // which is great for performance and vectorization.
+    if (alignTo(nb, align) == align.value()) // don't bother with making an array of length 1
+        return emit_static_alloca(ctx, ctx.builder.getIntNTy(align.value() * 8), align);
+    return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getIntNTy(align.value() * 8), alignTo(nb, align) / align.value()), align);
+}
+
+static AllocaInst *emit_static_roots(jl_codectx_t &ctx, unsigned nroots)
+{
+    AllocaInst *staticroots = emit_static_alloca(ctx, ctx.types().T_prjlvalue, Align(sizeof(void*)));
+    staticroots->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nroots));
+    IRBuilder<> builder(ctx.topalloca);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    // make sure these are nullptr early from LLVM's perspective, in case it decides to SROA it
+    ai.decorateInst(builder.CreateMemSet(staticroots, builder.getInt8(0), nroots * sizeof(void*), staticroots->getAlign()))->moveAfter(ctx.topalloca);
+    return staticroots;
 }
 
 static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
@@ -1826,13 +2411,12 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st
     size_t first_offset = sty->layout->nfields ? jl_field_offset(sty, 0) : 0;
     if (first_offset != 0)
         ctx.builder.CreateMemSet(ptr, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), first_offset, MaybeAlign(0));
-    size_t i, np = sty->layout->npointers;
-    if (np == 0)
+    if (sty->layout->first_ptr < 0)
         return;
+    size_t i, np = sty->layout->npointers;
     auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext());
-    ptr = ctx.builder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
     for (i = 0; i < np; i++) {
-        Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
+        Value *fld = emit_ptrgep(ctx, ptr, jl_ptr_offset(sty, i) * sizeof(jl_value_t*));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
@@ -1845,8 +2429,8 @@ static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
     if (auto I = dyn_cast<PtrToIntInst>(v)) {
         auto ptr = I->getOperand(0);
         if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace())
-            return ctx.builder.CreateBitCast(ptr, ty);
-        else if (cast<PointerType>(ty)->hasSameElementTypeAs(cast<PointerType>(ptr->getType())))
+            return ptr;
+        else
             return ctx.builder.CreateAddrSpaceCast(ptr, ty);
     }
     ++EmittedIntToPtrs;
@@ -1864,8 +2448,10 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
     if (jl_is_type_type(typ)) {
         assert(is_uniquerep_Type(typ));
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
-        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
         constant.constant = jl_tparam0(typ);
+        if (typ == (jl_value_t*)jl_typeofbottom_type->super)
+            constant.isghost = true;
         return constant;
     }
     return jl_cgval_t(typ);
@@ -1878,7 +2464,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_datatype_t *typ)
 static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
 {
     jl_value_t *typ;
-    if (jl_is_type(jv)) {
+    if (jl_is_type(jv) && jv != jl_bottom_type) {
         typ = (jl_value_t*)jl_wrap_Type(jv); // TODO: gc-root this?
     }
     else {
@@ -1886,16 +2472,16 @@ static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
         if (jl_is_datatype_singleton((jl_datatype_t*)typ))
             return ghostValue(ctx, typ);
     }
-    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     constant.constant = jv;
     return constant;
 }
 
 
-static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa)
+static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots=None)
 {
     // this enables lazy-copying of immutable values and stack or argument slots
-    jl_cgval_t tagval(v, false, typ, tindex, tbaa);
+    jl_cgval_t tagval(v, false, typ, tindex, tbaa, inline_roots);
     return tagval;
 }
 
@@ -1913,20 +2499,43 @@ static bool valid_as_globalinit(const Value *v) {
     return isa<Constant>(v);
 }
 
+static Value *zext_struct(jl_codectx_t &ctx, Value *V);
+
+// TODO: in the future, assume all callers will handle the interior pointers separately, and have
+// have zext_struct strip them out, so we aren't saving those to the stack here causing shadow stores
+// to be necessary too
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_t *typ, Value *tindex)
 {
     Value *loc;
+    v = zext_struct(ctx, v);
+    Align align(julia_alignment(typ));
     if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr
-        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), "_j_const", *jl_Module);
+        assert(jl_is_concrete_type(typ)); // not legal to have an unboxed abstract type
+        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), align, "_j_const", *jl_Module);
     }
     else {
-        loc = emit_static_alloca(ctx, v->getType());
-        ctx.builder.CreateStore(v, loc);
+        loc = emit_static_alloca(ctx, v->getType(), align);
+        ctx.builder.CreateAlignedStore(v, loc, align);
     }
     return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack);
 }
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v)
 {
+    if (!v.inline_roots.empty()) {
+        //if (v.V == nullptr) {
+        //    AllocaInst *loc = emit_static_roots(ctx, v.inline_roots.size());
+        //    for (size_t i = 0; i < v.inline_roots.counts(); i++)
+        //        ctx.builder.CreateAlignedStore(v.inline_roots[i], emit_ptrgep(ctx, loc, i * sizeof(void*)), Align(sizeof(void*)));
+        //    return mark_julia_slot(loc, v.typ, v.TIndex, ctx.tbaa().tbaa_gcframe);
+        //}
+        Align align(julia_alignment(v.typ));
+        Type *ty = julia_type_to_llvm(ctx, v.typ);
+        AllocaInst *loc = emit_static_alloca(ctx, ty, align);
+        auto tbaa = v.V == nullptr ? ctx.tbaa().tbaa_gcframe : ctx.tbaa().tbaa_stack;
+        auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        recombine_value(ctx, v, loc, stack_ai, align, false);
+        return mark_julia_slot(loc, v.typ, v.TIndex, tbaa);
+    }
     if (v.ispointer())
         return v;
     return value_to_pointer(ctx, v.V, v.typ, v.TIndex);
@@ -1939,7 +2548,8 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
             // replace T::Type{T} with T
             return ghostValue(ctx, typ);
         }
-    } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
+    }
+    else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
         // no need to explicitly load/store a constant/ghost value
         return ghostValue(ctx, typ);
     }
@@ -1947,13 +2557,14 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
     if (type_is_ghost(T)) {
         return ghostValue(ctx, typ);
     }
-    if (v && !isboxed && v->getType()->isAggregateType() && CountTrackedPointers(v->getType()).count == 0) {
+    if (v && !isboxed && v->getType()->isAggregateType()) {
         // eagerly put this back onto the stack
         // llvm mem2reg pass will remove this if unneeded
-        return value_to_pointer(ctx, v, typ, NULL);
+        if (CountTrackedPointers(v->getType()).count == 0)
+            return value_to_pointer(ctx, v, typ, NULL);
     }
     if (isboxed)
-        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     return jl_cgval_t(v, typ, NULL);
 }
 
@@ -1988,7 +2599,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
             if (alwaysboxed) {
                 // discovered that this union-split type must actually be isboxed
                 if (v.Vboxed) {
-                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
                 }
                 else {
                     // type mismatch (there weren't any boxed values in the union)
@@ -2031,7 +2642,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
-        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
+        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1));
         setName(ctx.emission_context, vi.defFlag, "isdefined");
         store_def_flag(ctx, vi, false);
     }
@@ -2040,17 +2651,6 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 
 // --- utilities ---
 
-static Constant *undef_value_for_type(Type *T) {
-    auto tracked = CountTrackedPointers(T);
-    Constant *undef;
-    if (tracked.count)
-        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-        undef = Constant::getNullValue(T);
-    else
-        undef = UndefValue::get(T);
-    return undef;
-}
-
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
@@ -2093,7 +2693,7 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     // previous value was a split union, compute new index, or box
-    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
     SmallBitVector skip_box(1, true);
     Value *tindex = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
     if (jl_is_uniontype(typ)) {
@@ -2136,14 +2736,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
     // some of the values are still unboxed
     if (!isa<Constant>(new_tindex)) {
         Value *wasboxed = NULL;
-        // If the old value was boxed and unknown (type tag 0x80),
+        // If the old value was boxed and unknown (type tag UNION_BOX_MARKER),
         // it is possible that the tag was actually one of the types
         // that are now explicitly represented. To find out, we need
         // to compare typeof(v.Vboxed) (i.e. the type of the unknown
         // value) against all the types that are now explicitly
         // selected and select the appropriate one as our new tindex.
         if (v.Vboxed) {
-            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex);
             wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             setName(ctx.emission_context, wasboxed, "wasboxed");
@@ -2165,10 +2765,10 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             };
 
             // If we don't find a match. The type remains unknown
-            // (0x80). We could use `v.Tindex`, here, since we know
-            // it has to be 0x80, but it seems likely the backend
+            // (UNION_BOX_MARKER). We could use `v.Tindex`, here, since we know
+            // it has to be UNION_BOX_MARKER, but it seems likely the backend
             // will like the explicit constant better.
-            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             unsigned counter = 0;
             for_each_uniontype_small(
                 // for each new union-split value
@@ -2178,7 +2778,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                         // didn't handle this item before, select its new union index
                         maybe_setup_union_isa();
                         Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), union_box_dt);
-                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | idx), union_box_tindex);
+                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER | idx), union_box_tindex);
                     }
                 },
                 typ,
@@ -2188,7 +2788,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f);
                 ctx.builder.CreateBr(postBB);
                 ctx.builder.SetInsertPoint(currBB);
-                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                 ctx.builder.CreateCondBr(wasunknown, union_isaBB, postBB);
                 ctx.builder.SetInsertPoint(postBB);
                 PHINode *tindex_phi = ctx.builder.CreatePHI(getInt8Ty(ctx.builder.getContext()), 2);
@@ -2200,14 +2800,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
         }
         if (!skip_box.all()) {
             // some values weren't unboxed in the new union
-            // box them now (tindex above already selected 0x80 = box for them)
+            // box them now (tindex above already selected UNION_BOX_MARKER = box for them)
             Value *boxv = box_union(ctx, v, skip_box);
             if (v.Vboxed) {
                 // If the value is boxed both before and after, we don't need
                 // to touch it at all. Otherwise we're either transitioning
                 // unboxed->boxed, or leaving an unboxed value in place.
                 Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 boxv = ctx.builder.CreateSelect(
                     ctx.builder.CreateAnd(wasboxed, isboxed), v.Vboxed, boxv);
@@ -2226,22 +2826,22 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 tbaa = oldv.tbaa;
                 slotv = ctx.builder.CreateSelect(isboxv,
                             decay_derived(ctx, boxv),
-                            decay_derived(ctx, emit_bitcast(ctx, slotv, boxv->getType())));
+                            decay_derived(ctx, slotv));
             }
-            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa);
+            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa, v.inline_roots);
             assert(boxv->getType() == ctx.types().T_prjlvalue);
             newv.Vboxed = boxv;
             return newv;
         }
     }
     else {
-        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
 // given a value marked with type `v.typ`, compute the mapping and/or boxing to return a value of type `typ`
-// TODO: should this set TIndex when trivial (such as 0x80 or concrete types) ?
+// TODO: should this set TIndex when trivial (such as UNION_BOX_MARKER or concrete types) ?
 static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     if (typ == (jl_value_t*)jl_typeofbottom_type)
@@ -2253,29 +2853,28 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         return ghostValue(ctx, typ);
     Value *new_tindex = NULL;
     if (jl_is_concrete_type(typ)) {
-        if (v.TIndex && !jl_is_pointerfree(typ)) {
-            // discovered that this union-split type must actually be isboxed
-            if (v.Vboxed) {
-                return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
-            }
-            else {
-                // type mismatch: there weren't any boxed values in the union
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
-            }
+        if (jl_is_concrete_type(v.typ)) {
+            // type mismatch: changing from one leaftype to another
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
-        if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
-            if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
-                // type mismatch: changing from one leaftype to another
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
+        bool mustbox_union = v.TIndex && !jl_is_pointerfree(typ);
+        if (v.Vboxed && (v.isboxed || mustbox_union)) {
+            if (skip) {
+                *skip = ctx.builder.CreateNot(emit_exactly_isa(ctx, v, (jl_datatype_t*)typ, true));
             }
+            return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
+        }
+        if (mustbox_union) {
+            // type mismatch: there weren't any boxed values in the union
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
     }
     else {
@@ -2289,9 +2888,9 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
             unsigned new_idx = get_box_tindex((jl_datatype_t*)v.typ, typ);
             if (new_idx) {
                 new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx);
-                if (v.V && !v.ispointer()) {
+                if (v.V && v.inline_roots.empty() && !v.ispointer()) {
                     // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value
-                    return value_to_pointer(ctx, v.V, typ, new_tindex);
+                    return jl_cgval_t(value_to_pointer(ctx, v), typ, new_tindex);
                 }
             }
             else if (jl_subtype(v.typ, typ)) {
@@ -2313,27 +2912,19 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         }
         if (makeboxed) {
             // convert to a simple isboxed value
-            return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+            return mark_julia_type(ctx, boxed(ctx, v), true, typ);
         }
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
-std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &context, bool imaging_mode, const DataLayout &DL, const Triple &triple)
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &context, const DataLayout &DL, const Triple &triple)
 {
     ++ModulesCreated;
     auto m = std::make_unique<Module>(name, context);
-    // Some linkers (*cough* OS X) don't understand DWARF v4, so we use v2 in
-    // imaging mode. The structure of v4 is slightly nicer for debugging JIT
-    // code.
+    // According to clang darwin above 10.10 supports dwarfv4
     if (!m->getModuleFlag("Dwarf Version")) {
-        int dwarf_version = 4;
-    if (triple.isOSDarwin()) {
-        if (imaging_mode) {
-            dwarf_version = 2;
-        }
-    }
-    m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", dwarf_version);
+        m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 4);
     }
     if (!m->getModuleFlag("Debug Info Version"))
         m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
@@ -2356,20 +2947,20 @@ std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &conte
 
 static void jl_name_jlfunc_args(jl_codegen_params_t &params, Function *F) {
     assert(F->arg_size() == 3);
-    F->getArg(0)->setName("function");
-    F->getArg(1)->setName("args");
-    F->getArg(2)->setName("nargs");
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
 }
 
 static void jl_name_jlfuncparams_args(jl_codegen_params_t &params, Function *F) {
     assert(F->arg_size() == 4);
-    F->getArg(0)->setName("function");
-    F->getArg(1)->setName("args");
-    F->getArg(2)->setName("nargs");
-    F->getArg(3)->setName("sparams");
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
+    F->getArg(3)->setName("sparams::Any");
 }
 
-static void jl_init_function(Function *F, const Triple &TT)
+void jl_init_function(Function *F, const Triple &TT)
 {
     // set any attributes that *must* be set on all functions
     AttrBuilder attr(F->getContext());
@@ -2380,11 +2971,7 @@ static void jl_init_function(Function *F, const Triple &TT)
         attr.addStackAlignmentAttr(16);
     }
     if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
-#if JL_LLVM_VERSION < 150000
-        attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
-#else
         attr.addUWTableAttr(llvm::UWTableKind::Default); // force NeedsWinEH
-#endif
     }
     if (jl_fpo_disabled(TT))
         attr.addAttribute("frame-pointer", "all");
@@ -2407,7 +2994,7 @@ static void jl_init_function(Function *F, const Triple &TT)
     F->addFnAttrs(attr);
 }
 
-static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t *rettype, bool prefer_specsig)
+static bool uses_specsig(jl_value_t *sig, bool needsparams, jl_value_t *rettype, bool prefer_specsig)
 {
     if (needsparams)
         return false;
@@ -2417,11 +3004,10 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
         return false;
     if (jl_nparams(sig) == 0)
         return false;
-    if (va) {
-        if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
-            return false;
-    }
+    if (jl_vararg_kind(jl_tparam(sig, jl_nparams(sig) - 1)) == JL_VARARG_UNBOUND)
+        return false;
     // not invalid, consider if specialized signature is worthwhile
+    // n.b. sig is sometimes wrong for OC (tparam0 might be the captures type of the specialization, even though what gets passed in that slot is an OC object), so prefer_specsig is always set (instead of recomputing tparam0 using get_oc_type)
     if (prefer_specsig)
         return true;
     if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
@@ -2449,10 +3035,8 @@ static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t
     return false; // jlcall sig won't require any box allocations
 }
 
-static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+static std::pair<bool, bool> uses_specsig(jl_value_t *abi, jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
 {
-    int va = lam->def.method->isva;
-    jl_value_t *sig = lam->specTypes;
     bool needsparams = false;
     if (jl_is_method(lam->def.method)) {
         if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
@@ -2462,7 +3046,7 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
                 needsparams = true;
         }
     }
-    return std::make_pair(uses_specsig(sig, needsparams, va, rettype, prefer_specsig), needsparams);
+    return std::make_pair(uses_specsig(abi, needsparams, rettype, prefer_specsig), needsparams);
 }
 
 
@@ -2476,7 +3060,7 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha
 {
     Value *pv = ConstantExpr::getIntToPtr(
         ConstantInt::get(ctx.types().T_size, (uintptr_t)ptr),
-        getInt64PtrTy(ctx.builder.getContext()));
+        getPointerTy(ctx.builder.getContext()));
     Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
     v = ctx.builder.CreateAdd(v, addend);
     ctx.builder.CreateStore(v, pv, true); // volatile, not atomic, so this might be an underestimate,
@@ -2487,7 +3071,7 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha
 
 static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 {
-    if (ctx.emission_context.imaging)
+    if (ctx.emission_context.imaging_mode)
         return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
@@ -2498,7 +3082,7 @@ static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 
 static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync)
 {
-    if (ctx.emission_context.imaging)
+    if (ctx.emission_context.imaging_mode)
         return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
@@ -2524,10 +3108,10 @@ static void cg_bdw(jl_codectx_t &ctx, jl_sym_t *var, jl_binding_t *b)
     }
 }
 
-static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
+static jl_value_t *static_apply_type(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> args, size_t nargs)
 {
     assert(nargs > 1);
-    SmallVector<jl_value_t *> v(nargs);
+    SmallVector<jl_value_t *, 0> v(nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
             return NULL;
@@ -2563,8 +3147,8 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     if (jl_is_ssavalue(ex)) {
         ssize_t idx = ((jl_ssavalue_t*)ex)->id - 1;
         assert(idx >= 0);
-        if (ctx.ssavalue_assigned.at(idx)) {
-            return ctx.SAvalues.at(idx).constant;
+        if (ctx.ssavalue_assigned[idx]) {
+            return ctx.SAvalues[idx].constant;
         }
         return NULL;
     }
@@ -2577,10 +3161,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     if (jl_is_globalref(ex)) {
         s = jl_globalref_name(ex);
         jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), s);
-        if (b && b->constp) {
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
             if (b->deprecated)
                 cg_bdw(ctx, s, b);
-            return jl_atomic_load_relaxed(&b->value);
+            return v;
         }
         return NULL;
     }
@@ -2599,10 +3184,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     s = (jl_sym_t*)static_eval(ctx, jl_exprarg(e, 2));
                     if (s && jl_is_symbol(s)) {
                         jl_binding_t *b = jl_get_binding(m, s);
-                        if (b && b->constp) {
+                        jl_value_t *v = jl_get_binding_value_if_const(b);
+                        if (v) {
                             if (b->deprecated)
                                 cg_bdw(ctx, s, b);
-                            return jl_atomic_load_relaxed(&b->value);
+                            return v;
                         }
                     }
                 }
@@ -2690,7 +3276,7 @@ static bool local_var_occurs(jl_value_t *e, int sl)
 static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
 {
     std::set<int> av;
-    for(int i=s; i <= l; i++) {
+    for(int i=s; i < l; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts,i);
         if (jl_is_expr(st)) {
             if (((jl_expr_t*)st)->head == jl_assign_sym) {
@@ -2704,23 +3290,23 @@ static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
     return av;
 }
 
-static void mark_volatile_vars(jl_array_t *stmts, std::vector<jl_varinfo_t> &slots)
+static void mark_volatile_vars(jl_array_t *stmts, SmallVectorImpl<jl_varinfo_t> &slots)
 {
     size_t slength = jl_array_dim0(stmts);
     for (int i = 0; i < (int)slength; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
-        if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == jl_enter_sym) {
-                int last = jl_unbox_long(jl_exprarg(st, 0));
-                std::set<int> as = assigned_in_try(stmts, i + 1, last);
-                for (int j = 0; j < (int)slength; j++) {
-                    if (j < i || j > last) {
-                        std::set<int>::iterator it = as.begin();
-                        for (; it != as.end(); it++) {
-                            if (local_var_occurs(jl_array_ptr_ref(stmts, j), *it)) {
-                                jl_varinfo_t &vi = slots[*it];
-                                vi.isVolatile = true;
-                            }
+        if (jl_is_enternode(st)) {
+            int last = jl_enternode_catch_dest(st);
+            if (last == 0)
+                continue;
+            std::set<int> as = assigned_in_try(stmts, i + 1, last - 1);
+            for (int j = 0; j < (int)slength; j++) {
+                if (j < i || j > last) {
+                    std::set<int>::iterator it = as.begin();
+                    for (; it != as.end(); it++) {
+                        if (local_var_occurs(jl_array_ptr_ref(stmts, j), *it)) {
+                            jl_varinfo_t &vi = slots[*it];
+                            vi.isVolatile = true;
                         }
                     }
                 }
@@ -2778,7 +3364,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phicnode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             general_use_analysis(ctx, v, f);
@@ -2786,7 +3372,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phinode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 1);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             if (v)
@@ -2812,77 +3398,168 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
 
 // ---- Get Element Pointer (GEP) instructions within the GC frame ----
 
-static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
-{
-    if (jl_is_globally_rooted(val))
-        return val;
-    jl_method_t *m = ctx.linfo->def.method;
-    if (jl_is_method(m)) {
-        // the method might have a root for this already; use it if so
-        JL_LOCK(&m->writelock);
-        if (m->roots) {
-            size_t i, len = jl_array_dim0(m->roots);
-            for (i = 0; i < len; i++) {
-                jl_value_t *mval = jl_array_ptr_ref(m->roots, i);
-                if (mval == val || jl_egal(mval, val)) {
-                    JL_UNLOCK(&m->writelock);
-                    return mval;
-                }
-            }
+static void jl_temporary_root(jl_codegen_params_t &ctx, jl_value_t *val)
+{
+    if (!jl_is_globally_rooted(val)) {
+        jl_array_t *roots = ctx.temporary_roots;
+        for (size_t i = 0; i < jl_array_dim0(roots); i++) {
+            if (jl_array_ptr_ref(roots, i) == val)
+                return;
         }
-        JL_UNLOCK(&m->writelock);
+        jl_array_ptr_1d_push(roots, val);
     }
-    return jl_as_global_root(val);
+}
+static void jl_temporary_root(jl_codectx_t &ctx, jl_value_t *val)
+{
+    jl_temporary_root(ctx.emission_context, val);
 }
 
 // --- generating function calls ---
 
+static jl_cgval_t emit_globalref_runtime(jl_codectx_t &ctx, jl_binding_t *bnd, jl_module_t *mod, jl_sym_t *name)
+{
+    Value *bp = julia_binding_gv(ctx, bnd);
+    Value *v = ctx.builder.CreateCall(prepare_call(jlgetbindingvalue_func), { bp });
+    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, (jl_value_t*)mod);
+    return mark_julia_type(ctx, v, true, jl_any_type);
+}
+
 static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
-    jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
-    if (bp == NULL)
-        return jl_cgval_t();
-    bp = julia_binding_pvalue(ctx, bp);
-    if (bnd) {
-        jl_value_t *v = jl_atomic_load_acquire(&bnd->value); // acquire value for ty
-        if (v != NULL) {
-            if (bnd->constp)
-                return mark_julia_const(ctx, v);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            setName(ctx.emission_context, v, jl_symbol_name(name));
-            v->setOrdering(order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-            return mark_julia_type(ctx, v, true, ty);
+    jl_binding_t *bnd = jl_get_module_binding(mod, name, 1);
+    assert(bnd);
+    jl_binding_partition_t *bpart = jl_get_binding_partition_all(bnd, ctx.min_world, ctx.max_world);
+    if (!bpart) {
+        return emit_globalref_runtime(ctx, bnd, mod, name);
+    }
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // try to look this up now.
+        // TODO: This is bad and we'd like to delete it.
+        jl_get_binding(mod, name);
+    }
+    // bpart was updated in place - this will change with full partition
+    pku = jl_atomic_load_acquire(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // Redo the lookup at runtime
+        return emit_globalref_runtime(ctx, bnd, mod, name);
+    } else {
+        while (true) {
+            if (!bpart)
+                break;
+            if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+                break;
+            if (bnd->deprecated) {
+                cg_bdw(ctx, name, bnd);
+            }
+            bnd = (jl_binding_t*)decode_restriction_value(pku);
+            bpart = jl_get_binding_partition_all(bnd, ctx.min_world, ctx.max_world);
+            if (!bpart)
+                break;
+            pku = jl_atomic_load_acquire(&bpart->restriction);
+        }
+        if (bpart && jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *constval = decode_restriction_value(pku);
+            if (!constval) {
+                undef_var_error_ifnot(ctx, ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), name, (jl_value_t*)mod);
+                return jl_cgval_t();
+            }
+            return mark_julia_const(ctx, constval);
         }
     }
-    // todo: use type info to avoid undef check
-    return emit_checked_var(ctx, bp, name, false, ctx.tbaa().tbaa_binding);
+    if (!bpart) {
+        return emit_globalref_runtime(ctx, bnd, mod, name);
+    }
+    Value *bp = julia_binding_gv(ctx, bnd);
+    if (bnd->deprecated) {
+        cg_bdw(ctx, name, bnd);
+    }
+    assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL);
+    jl_value_t *ty = decode_restriction_value(pku);
+    bp = julia_binding_pvalue(ctx, bp);
+    if (ty == nullptr)
+        ty = (jl_value_t*)jl_any_type;
+    return update_julia_type(ctx, emit_checked_var(ctx, bp, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding), ty);
 }
 
-static bool emit_globalset(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, const jl_cgval_t &rval_info, AtomicOrdering Order)
+static jl_cgval_t emit_globalop(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, jl_cgval_t rval, const jl_cgval_t &cmp,
+                                AtomicOrdering Order, AtomicOrdering FailOrder,
+                                bool issetglobal, bool isreplaceglobal, bool isswapglobal, bool ismodifyglobal, bool issetglobalonce,
+                                const jl_cgval_t *modifyop, bool alloc)
 {
     jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, alloc);
+    jl_binding_partition_t *bpart = jl_get_binding_partition_all(bnd, ctx.min_world, ctx.max_world);
     if (bp == NULL)
-        return false;
-    Value *rval = boxed(ctx, rval_info);
-    if (bnd && !bnd->constp) {
-        jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-        if (ty && jl_subtype(rval_info.typ, ty)) { // TODO: use typeassert here instead
-            StoreInst *v = ctx.builder.CreateAlignedStore(rval, julia_binding_pvalue(ctx, bp), Align(sizeof(void*)));
-            v->setOrdering(Order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            emit_write_barrier(ctx, bp, rval);
-            return true;
-        }
-    }
-    ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
-            { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), mark_callee_rooted(ctx, rval) });
-    return true;
+        return jl_cgval_t();
+    if (bpart) {
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (!jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *ty = decode_restriction_value(pku);
+            if (ty != nullptr) {
+                const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+                if (!ismodifyglobal) {
+                    // TODO: use typeassert in jl_check_binding_wr too
+                    emit_typecheck(ctx, rval, ty, "typeassert");
+                    rval = update_julia_type(ctx, rval, ty);
+                    if (rval.typ == jl_bottom_type)
+                        return jl_cgval_t();
+                }
+                bool isboxed = true;
+                bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
+                return typed_store(ctx,
+                                julia_binding_pvalue(ctx, bp),
+                                rval, cmp, ty,
+                                ctx.tbaa().tbaa_binding,
+                                nullptr,
+                                bp,
+                                isboxed,
+                                Order,
+                                FailOrder,
+                                0,
+                                nullptr,
+                                issetglobal,
+                                isreplaceglobal,
+                                isswapglobal,
+                                ismodifyglobal,
+                                issetglobalonce,
+                                maybe_null,
+                                modifyop,
+                                fname,
+                                mod,
+                                sym);
+
+            }
+        }
+    }
+    Value *m = literal_pointer_val(ctx, (jl_value_t*)mod);
+    Value *s = literal_pointer_val(ctx, (jl_value_t*)sym);
+    if (issetglobal) {
+        ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return rval;
+    }
+    else if (isreplaceglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckreplace_func),
+                { bp, m, s, boxed(ctx, cmp), boxed(ctx, rval) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (isswapglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckswap_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (ismodifyglobal) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckmodify_func),
+                { bp, m, s, boxed(ctx, cmp), boxed(ctx, rval) });
+        return mark_julia_type(ctx, r, true, jl_any_type);
+    }
+    else if (issetglobalonce) {
+        Value *r = ctx.builder.CreateCall(prepare_call(jlcheckassignonce_func),
+                { bp, m, s, mark_callee_rooted(ctx, boxed(ctx, rval)) });
+        return mark_julia_type(ctx, r, true, jl_bool_type);
+    }
+    abort(); // unreachable
 }
 
 static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
@@ -2961,28 +3638,84 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     return phi;
 }
 
+struct egal_desc {
+    size_t offset;
+    size_t nrepeats;
+    size_t data_bytes;
+    size_t padding_bytes;
+};
+
+template <typename callback>
+static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty, egal_desc &current_desc)
+{
+    // Memcmp, but with masked padding
+    size_t data_bytes = 0;
+    size_t padding_bytes = 0;
+    size_t nfields = jl_datatype_nfields(aty);
+    size_t total_size = jl_datatype_size(aty);
+    assert(aty->layout->flags.isbitsegal);
+    for (size_t i = 0; i < nfields; ++i) {
+        size_t offset = jl_field_offset(aty, i);
+        size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
+        size_t fsz = jl_field_size(aty, i);
+        jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
+        assert(jl_is_datatype(fty)); // union fields should never reach here
+        assert(fty->layout->flags.isbitsegal);
+        if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
+            // The field has no internal padding
+            data_bytes += fsz;
+            if (offset + fsz == fend) {
+                // The field has no padding after. Merge this into the current
+                // comparison range and go to next field.
+            } else {
+                padding_bytes = fend - offset - fsz;
+                // Found padding. Either merge this into the current comparison
+                // range, or emit the old one and start a new one.
+                if (current_desc.data_bytes == data_bytes &&
+                        current_desc.padding_bytes == padding_bytes) {
+                    // Same as the previous range, just note that down, so we
+                    // emit this as a loop.
+                    current_desc.nrepeats += 1;
+                } else {
+                    if (current_desc.nrepeats != 0)
+                        emit_desc(current_desc);
+                    current_desc.nrepeats = 1;
+                    current_desc.data_bytes = data_bytes;
+                    current_desc.padding_bytes = padding_bytes;
+                }
+                data_bytes = 0;
+            }
+        } else {
+            // The field may have internal padding. Recurse this.
+            data_bytes += emit_masked_bits_compare(emit_desc, fty, current_desc);
+        }
+    }
+    return data_bytes;
+}
+
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
 {
     ++EmittedBitsCompares;
+    jl_value_t *argty = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
     bool isboxed;
     Type *at = julia_type_to_llvm(ctx, arg1.typ, &isboxed);
-    assert(jl_is_datatype(arg1.typ) && arg1.typ == arg2.typ && !isboxed);
+    assert(jl_is_datatype(arg1.typ) && arg1.typ == (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ) && !isboxed);
 
     if (type_is_ghost(at))
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
         Type *at_int = INTT(at, ctx.emission_context.DL);
-        Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at_int, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at_int, arg2, argty);
         return ctx.builder.CreateICmpEQ(varg1, varg2);
     }
 
     if (at->isVectorTy()) {
-        jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types;
+        jl_svec_t *types = ((jl_datatype_t*)argty)->types;
         Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-        Value *varg1 = emit_unbox(ctx, at, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at, arg2, argty);
         for (size_t i = 0, l = jl_svec_len(types); i < l; i++) {
             jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
@@ -2997,27 +3730,27 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
     }
 
     if (at->isAggregateType()) { // Struct or Array
-        jl_datatype_t *sty = (jl_datatype_t*)arg1.typ;
+        jl_datatype_t *sty = (jl_datatype_t*)argty;
         size_t sz = jl_datatype_size(sty);
-        if (sz > 512 && !sty->layout->haspadding) {
-            Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
+        if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) {
+            Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
-            Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
+            Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
                 value_to_pointer(ctx, arg2).V;
             varg1 = emit_pointer_from_objref(ctx, varg1);
             varg2 = emit_pointer_from_objref(ctx, varg2);
-            Value *gc_uses[2];
-            int nroots = 0;
-            if ((gc_uses[nroots] = get_gc_root_for(arg1)))
-                nroots++;
-            if ((gc_uses[nroots] = get_gc_root_for(arg2)))
-                nroots++;
-            OperandBundleDef OpBundle("jl_roots", makeArrayRef(gc_uses, nroots));
+            SmallVector<Value*, 0> gc_uses;
+            // these roots may seem a bit overkill, but we want to make sure
+            // that a!=b implies (a,)!=(b,) even if a and b are unused and
+            // therefore could be freed and then the memory for a reused for b
+            gc_uses.append(get_gc_roots_for(ctx, arg1));
+            gc_uses.append(get_gc_roots_for(ctx, arg2));
+            OperandBundleDef OpBundle("jl_roots", gc_uses);
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
-                        ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
-                        ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
+                        varg1,
+                        varg2,
                         ConstantInt::get(ctx.types().T_size, sz) },
-                    ArrayRef<OperandBundleDef>(&OpBundle, nroots ? 1 : 0));
+                    ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
 
             if (arg1.tbaa || arg2.tbaa) {
                 jl_aliasinfo_t ai;
@@ -3036,6 +3769,92 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             }
             return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
+        else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) {
+            Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
+                value_to_pointer(ctx, arg1).V;
+            Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
+                value_to_pointer(ctx, arg2).V;
+            varg1 = emit_pointer_from_objref(ctx, varg1);
+            varg2 = emit_pointer_from_objref(ctx, varg2);
+
+            // See above for why we want to do this
+            SmallVector<Value*, 0> gc_uses;
+            gc_uses.append(get_gc_roots_for(ctx, arg1));
+            gc_uses.append(get_gc_roots_for(ctx, arg2));
+            OperandBundleDef OpBundle("jl_roots", gc_uses);
+
+            Value *answer = nullptr;
+            auto emit_desc = [&](egal_desc desc) {
+                Value *ptr1 = varg1;
+                Value *ptr2 = varg2;
+                if (desc.offset != 0) {
+                    ptr1 = emit_ptrgep(ctx, ptr1, desc.offset);
+                    ptr2 = emit_ptrgep(ctx, ptr2, desc.offset);
+                }
+
+                Value *new_ptr1 = ptr1;
+                Value *endptr1 = nullptr;
+                BasicBlock *postBB = nullptr;
+                BasicBlock *loopBB = nullptr;
+                PHINode *answerphi = nullptr;
+                if (desc.nrepeats != 1) {
+                    // Set up loop
+                    endptr1 = emit_ptrgep(ctx, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));;
+
+                    BasicBlock *currBB = ctx.builder.GetInsertBlock();
+                    loopBB = BasicBlock::Create(ctx.builder.getContext(), "egal_loop", ctx.f);
+                    postBB = BasicBlock::Create(ctx.builder.getContext(), "post", ctx.f);
+                    ctx.builder.CreateBr(loopBB);
+
+                    ctx.builder.SetInsertPoint(loopBB);
+                    Type *TInt1 = getInt1Ty(ctx.builder.getContext());
+                    answerphi = ctx.builder.CreatePHI(TInt1, 2);
+                    answerphi->addIncoming(answer ? answer : ConstantInt::get(TInt1, 1), currBB);
+                    answer = answerphi;
+
+                    PHINode *itr1 = ctx.builder.CreatePHI(ptr1->getType(), 2);
+                    PHINode *itr2 = ctx.builder.CreatePHI(ptr2->getType(), 2);
+
+                    new_ptr1 = emit_ptrgep(ctx, itr1, desc.data_bytes + desc.padding_bytes);
+                    itr1->addIncoming(ptr1, currBB);
+                    itr1->addIncoming(new_ptr1, loopBB);
+
+                    Value *new_ptr2 = emit_ptrgep(ctx, itr2, desc.data_bytes + desc.padding_bytes);
+                    itr2->addIncoming(ptr2, currBB);
+                    itr2->addIncoming(new_ptr2, loopBB);
+
+                    ptr1 = itr1;
+                    ptr2 = itr2;
+                }
+
+                // Emit memcmp. TODO: LLVM has a pass to expand this for additional
+                // performance.
+                Value *this_answer = ctx.builder.CreateCall(prepare_call(memcmp_func),
+                    { ptr1,
+                      ptr2,
+                      ConstantInt::get(ctx.types().T_size, desc.data_bytes) },
+                    ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
+                this_answer = ctx.builder.CreateICmpEQ(this_answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                answer = answer ? ctx.builder.CreateAnd(answer, this_answer) : this_answer;
+                if (endptr1) {
+                    answerphi->addIncoming(answer, loopBB);
+                    Value *loopend = ctx.builder.CreateICmpEQ(new_ptr1, endptr1);
+                    ctx.builder.CreateCondBr(loopend, postBB, loopBB);
+                    ctx.builder.SetInsertPoint(postBB);
+                }
+            };
+            egal_desc current_desc = {0};
+            size_t trailing_data_bytes = emit_masked_bits_compare(emit_desc, sty, current_desc);
+            assert(current_desc.nrepeats != 0);
+            emit_desc(current_desc);
+            if (trailing_data_bytes != 0) {
+                current_desc.nrepeats = 1;
+                current_desc.data_bytes = trailing_data_bytes;
+                current_desc.padding_bytes = 0;
+                emit_desc(current_desc);
+            }
+            return answer;
+        }
         else {
             jl_svec_t *types = sty->types;
             Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
@@ -3079,8 +3898,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (arg1.constant && arg2.constant)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), jl_egal(arg1.constant, arg2.constant));
 
-    jl_value_t *rt1 = arg1.typ;
-    jl_value_t *rt2 = arg2.typ;
+    jl_value_t *rt1 = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
+    jl_value_t *rt2 = (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ);
     if (jl_is_concrete_type(rt1) && jl_is_concrete_type(rt2) && !jl_is_kind(rt1) && !jl_is_kind(rt2) && rt1 != rt2) {
         // disjoint concrete leaf types are never equal (quick test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
@@ -3102,8 +3921,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
             // (which was probably caught above, but just to be safe, we repeat it here explicitly)
             return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
         // rooting these values isn't needed since we won't load this pointer
         // and we know at least one of them is a unique Singleton
         // which is already enough to ensure pointer uniqueness for this test
@@ -3115,16 +3934,18 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
 
-    bool justbits1 = jl_is_concrete_immutable(rt1);
-    bool justbits2 = jl_is_concrete_immutable(rt2);
+    // can compare any concrete immutable by bits, except for UnionAll
+    // which has a special non-bits based egal
+    bool justbits1 = jl_is_concrete_immutable(rt1) && !jl_is_kind(rt1);
+    bool justbits2 = jl_is_concrete_immutable(rt2) && !jl_is_kind(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
             jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2);
             if (typ == jl_bool_type) { // aka jl_pointer_egal
                 // some optimizations for bool, since pointer comparison may be better
                 if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
-                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
-                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.Vboxed;
+                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.Vboxed;
                     return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
                 }
             }
@@ -3160,37 +3981,70 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
 }
 
 static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                            const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+                            ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
 {
+    bool issetglobal = f == jl_builtin_setglobal;
+    bool isreplaceglobal = f == jl_builtin_replaceglobal;
+    bool isswapglobal = f == jl_builtin_swapglobal;
+    bool ismodifyglobal = f == jl_builtin_modifyglobal;
+    bool issetglobalonce = f == jl_builtin_setglobalonce;
+    const jl_cgval_t undefval;
     const jl_cgval_t &mod = argv[1];
     const jl_cgval_t &sym = argv[2];
-    const jl_cgval_t &val = argv[3];
-    enum jl_memory_order order = jl_memory_order_unspecified;
-    assert(f == jl_builtin_setglobal && modifyop == nullptr && "unimplemented");
-
-    if (nargs == 4) {
-        const jl_cgval_t &arg4 = argv[4];
-        if (arg4.constant && jl_is_symbol(arg4.constant))
-            order = jl_get_atomic_order((jl_sym_t*)arg4.constant, false, true);
-        else
+    jl_cgval_t val = argv[isreplaceglobal || ismodifyglobal ? 4 : 3];
+    const jl_cgval_t &cmp = isreplaceglobal || ismodifyglobal ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_release;
+    const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+    if (nargs >= (isreplaceglobal || ismodifyglobal ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplaceglobal || ismodifyglobal ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
             return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetglobal, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if ((isreplaceglobal || issetglobalonce) && nargs == (isreplaceglobal ? 6 : 5)) {
+        const jl_cgval_t &ord = argv[isreplaceglobal ? 6 : 5];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
     }
-    else
-        order = jl_memory_order_release;
 
-    if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) {
-        emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "setglobal!: module binding cannot be written non-atomically");
+    if (order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx,
+                issetglobal ? "setglobal!: module binding cannot be written non-atomically" :
+                isreplaceglobal ? "replaceglobal!: module binding cannot be written non-atomically" :
+                isswapglobal ? "swapglobal!: module binding cannot be written non-atomically" :
+                ismodifyglobal ? "modifyglobal!: module binding cannot be written non-atomically" :
+                "setglobalonce!: module binding cannot be written non-atomically");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+    else if (fail_order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx,
+                isreplaceglobal ? "replaceglobal!: module binding cannot be accessed non-atomically" :
+                "setglobalonce!: module binding cannot be accessed non-atomically");
         *ret = jl_cgval_t(); // unreachable
         return true;
     }
 
     if (sym.constant && jl_is_symbol(sym.constant)) {
-        jl_sym_t *name = (jl_sym_t*)sym.constant;
         if (mod.constant && jl_is_module(mod.constant)) {
-            if (emit_globalset(ctx, (jl_module_t*)mod.constant, name, val, get_llvm_atomic_order(order)))
-                *ret = val;
-            else
-                *ret = jl_cgval_t(); // unreachable
+            *ret = emit_globalop(ctx, (jl_module_t*)mod.constant, (jl_sym_t*)sym.constant, val, cmp,
+                                 get_llvm_atomic_order(order), get_llvm_atomic_order(fail_order),
+                                 issetglobal,
+                                 isreplaceglobal,
+                                 isswapglobal,
+                                 ismodifyglobal,
+                                 issetglobalonce,
+                                 modifyop,
+                                 false);
             return true;
         }
     }
@@ -3199,20 +4053,21 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 }
 
 static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+                           ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
 {
     ++EmittedOpfields;
     bool issetfield = f == jl_builtin_setfield;
     bool isreplacefield = f == jl_builtin_replacefield;
     bool isswapfield = f == jl_builtin_swapfield;
     bool ismodifyfield = f == jl_builtin_modifyfield;
+    bool issetfieldonce = f == jl_builtin_setfieldonce;
     const jl_cgval_t undefval;
     const jl_cgval_t &obj = argv[1];
     const jl_cgval_t &fld = argv[2];
     jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
     const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
     enum jl_memory_order order = jl_memory_order_notatomic;
-    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : ismodifyfield ? "modifyfield!" : "setfieldonce!";
     if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
         const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
         emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
@@ -3221,8 +4076,8 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
     }
     enum jl_memory_order fail_order = order;
-    if (isreplacefield && nargs == 6) {
-        const jl_cgval_t &ord = argv[6];
+    if ((isreplacefield || issetfieldonce) && nargs == (isreplacefield ? 6 : 5)) {
+        const jl_cgval_t &ord = argv[isreplacefield ? 6 : 5];
         emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
         if (!ord.constant)
             return false;
@@ -3270,13 +4125,19 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             isswapfield ?
                             (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
                                       : "swapfield!: non-atomic field cannot be written atomically") :
+                            ismodifyfield ?
                             (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
-                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                                      : "modifyfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "setfieldonce!: atomic field cannot be written non-atomically"
+                                      : "setfieldonce!: non-atomic field cannot be written atomically"));
                 }
                 else if (isatomic == (fail_order == jl_memory_order_notatomic)) {
                     emit_atomic_error(ctx,
+                            isreplacefield ?
                             (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
-                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                                      : "replacefield!: non-atomic field cannot be accessed atomically") :
+                            (isatomic ? "setfieldonce!: atomic field cannot be accessed non-atomically"
+                                      : "setfieldonce!: non-atomic field cannot be accessed atomically"));
                 }
                 else if (!uty->name->mutabl) {
                     std::string msg = fname + ": immutable struct of type "
@@ -3286,13 +4147,14 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
                 else if (jl_field_isconst(uty, idx)) {
                     std::string msg = fname + ": const field ."
-                        + std::string(jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(uty), idx)))
+                        + std::string(jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(uty), idx)))
                         + " of type "
                         + std::string(jl_symbol_name(uty->name->name))
                         + " cannot be changed";
                     emit_error(ctx, msg);
                 }
                 else {
+                    assert(obj.isboxed);
                     *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true,
                             (needlock || order <= jl_memory_order_notatomic)
                                 ? AtomicOrdering::NotAtomic
@@ -3300,7 +4162,8 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             (needlock || fail_order <= jl_memory_order_notatomic)
                                 ? AtomicOrdering::NotAtomic
                                 : get_llvm_atomic_order(fail_order),
-                            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                            needlock ? boxed(ctx, obj) : nullptr,
+                            issetfield, isreplacefield, isswapfield, ismodifyfield, issetfieldonce,
                             modifyop, fname);
                 }
                 return true;
@@ -3310,16 +4173,223 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     return false;
 }
 
+static jl_cgval_t emit_isdefinedglobal(jl_codectx_t &ctx, jl_module_t *modu, jl_sym_t *name, int allow_import, enum jl_memory_order order)
+{
+    Value *isnull = NULL;
+    jl_binding_t *bnd = allow_import ? jl_get_binding(modu, name) : jl_get_module_binding(modu, name, 0);
+    jl_binding_partition_t *bpart = jl_get_binding_partition_all(bnd, ctx.min_world, ctx.max_world);
+    jl_ptr_kind_union_t pku = bpart ? jl_atomic_load_relaxed(&bpart->restriction) : encode_restriction(NULL, BINDING_KIND_GUARD);
+    if (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_get_binding_value_if_const(bnd))
+            return mark_julia_const(ctx, jl_true);
+        Value *bp = julia_binding_gv(ctx, bnd);
+        bp = julia_binding_pvalue(ctx, bp);
+        LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
+        ai.decorateInst(v);
+        v->setOrdering(get_llvm_atomic_order(order));
+        isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
+    }
+    else {
+        Value *v = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
+                literal_pointer_val(ctx, (jl_value_t*)modu),
+                literal_pointer_val(ctx, (jl_value_t*)name),
+                ConstantInt::get(getInt32Ty(ctx.builder.getContext()), allow_import)
+            });
+        isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+    }
+    return mark_julia_type(ctx, isnull, false, jl_bool_type);
+}
+
+static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                            ArrayRef<jl_cgval_t> argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    bool issetmemory = f == jl_builtin_memoryrefset;
+    bool isreplacememory = f == jl_builtin_memoryrefreplace;
+    bool isswapmemory = f == jl_builtin_memoryrefswap;
+    bool ismodifymemory = f == jl_builtin_memoryrefmodify;
+    bool issetmemoryonce = f == jl_builtin_memoryrefsetonce;
+
+    const jl_cgval_t undefval;
+    const jl_cgval_t &ref = argv[1];
+    jl_cgval_t val = argv[isreplacememory || ismodifymemory ? 3 : 2];
+    jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+    if (!jl_is_genericmemoryref_type(mty_dt) || !jl_is_concrete_type(mty_dt))
+        return false;
+
+    jl_value_t *kind = jl_tparam0(mty_dt);
+    jl_value_t *ety = jl_tparam1(mty_dt);
+    jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+    mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+    if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+        return false;
+
+    const jl_cgval_t &cmp = isreplacememory || ismodifymemory ? argv[2] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetmemory ? "memoryrefset!" : isreplacememory ? "memoryrefreplace!" : isswapmemory ? "memoryrefswap!" : ismodifymemory ? "memoryrefmodify!" : "memoryrefsetonce!";
+    {
+        const jl_cgval_t &ord = argv[isreplacememory || ismodifymemory ? 4 : 3];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetmemory, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacememory || issetmemoryonce) {
+        const jl_cgval_t &ord = argv[isreplacememory ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+
+    jl_value_t *boundscheck = argv[nargs].constant;
+    emit_typecheck(ctx, argv[nargs], (jl_value_t*)jl_bool_type, fname);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
+    bool needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
+    size_t elsz = layout->size;
+    size_t al = layout->alignment;
+    if (al > JL_HEAP_ALIGNMENT)
+        al = JL_HEAP_ALIGNMENT;
+    if (isatomic == (order == jl_memory_order_notatomic)) {
+        emit_atomic_error(ctx,
+                issetmemory ?
+                (isatomic ? "memoryrefset!: atomic memory cannot be written non-atomically"
+                          : "memoryrefset!: non-atomic memory cannot be written atomically") :
+                isreplacememory ?
+                (isatomic ? "memoryrefreplace!: atomic memory cannot be written non-atomically"
+                          : "memoryrefreplace!: non-atomic memory cannot be written atomically") :
+                isswapmemory ?
+                (isatomic ? "memoryrefswap!: atomic memory cannot be written non-atomically"
+                          : "memoryrefswap!: non-atomic memory cannot be written atomically") :
+                ismodifymemory ?
+                (isatomic ? "memoryrefmodify!: atomic memory cannot be written non-atomically"
+                          : "memoryrefmodify!: non-atomic memory cannot be written atomically") :
+                (isatomic ? "memoryrefsetonce!: atomic memory cannot be written non-atomically"
+                          : "memoryrefsetonce!: non-atomic memory cannot be written atomically"));
+        *ret = jl_cgval_t();
+        return true;
+    }
+    else if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+        emit_atomic_error(ctx,
+                isreplacememory ?
+                (isatomic ? "memoryrefreplace!: atomic memory cannot be accessed non-atomically"
+                          : "memoryrefreplace!: non-atomic memory cannot be accessed atomically") :
+                (isatomic ? "memoryrefsetonce!: atomic memory cannot be accessed non-atomically"
+                          : "memoryrefsetonce!: non-atomic memory cannot be accessed atomically"));
+        *ret = jl_cgval_t();
+        return true;
+    }
+    Value *mem = emit_memoryref_mem(ctx, ref, layout);
+    Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+    if (bounds_check_enabled(ctx, boundscheck)) {
+        BasicBlock *failBB, *endBB;
+        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+        endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+        ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+        failBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(failBB);
+        ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+        ctx.builder.CreateUnreachable();
+        endBB->insertInto(ctx.f);
+        ctx.builder.SetInsertPoint(endBB);
+    }
+    if (!ismodifymemory) {
+        emit_typecheck(ctx, val, ety, fname);
+        val = update_julia_type(ctx, val, ety);
+        if (val.typ == jl_bottom_type)
+            return true;
+    }
+    AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                            ? AtomicOrdering::NotAtomic
+                            : get_llvm_atomic_order(order);
+    AtomicOrdering FailOrder = (needlock || fail_order <= jl_memory_order_notatomic)
+                        ? AtomicOrdering::NotAtomic
+                        : get_llvm_atomic_order(fail_order);
+    if (isunion) {
+        assert(!isatomic && !needlock);
+        Value *V = emit_memoryref_FCA(ctx, ref, layout);
+        Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+        Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+        Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+        // compute tindex from val
+        Value *ptindex;
+        if (elsz == 0) {
+            ptindex = data;
+        }
+        else {
+            // isbits union selector bytes are stored after mem->length
+            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+            data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
+        }
+        ptindex = emit_ptrgep(ctx, ptindex, idx0);
+        *ret = union_store(ctx, data, ptindex, val, cmp, ety,
+            ctx.tbaa().tbaa_arraybuf, ctx.tbaa().tbaa_arrayselbyte,
+            Order, FailOrder,
+            nullptr, issetmemory, isreplacememory, isswapmemory, ismodifymemory, issetmemoryonce,
+            modifyop, fname);
+    }
+    else {
+        Value *ptr = (layout->size == 0 ? nullptr : emit_memoryref_ptr(ctx, ref, layout));
+        Value *lock = nullptr;
+        bool maybenull = true;
+        if (needlock) {
+            assert(ptr);
+            lock = ptr;
+            // ptr += sizeof(lock);
+            ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+        }
+        Value *data_owner = NULL; // owner object against which the write barrier must check
+        if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
+            data_owner = emit_memoryref_mem(ctx, ref, layout);
+        }
+        *ret = typed_store(ctx,
+                    ptr,
+                    val, cmp, ety,
+                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                    ctx.noalias().aliasscope.current,
+                    data_owner,
+                    isboxed,
+                    Order,
+                    FailOrder,
+                    al,
+                    lock,
+                    issetmemory,
+                    isreplacememory,
+                    isswapmemory,
+                    ismodifymemory,
+                    issetmemoryonce,
+                    maybenull,
+                    modifyop,
+                    fname,
+                    nullptr,
+                    nullptr);
+    }
+    return true;
+}
+
 static jl_llvm_functions_t
     emit_function(
         orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
+        jl_value_t *abi,
+        jl_value_t *rettype,
         jl_codegen_params_t &params);
 
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name);
+
 static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
-                              const jl_cgval_t *argv, size_t nargs, jl_value_t *rt,
+                              ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt,
                               jl_expr_t *ex, bool is_promotable)
 // returns true if the call has been handled
 {
@@ -3327,7 +4397,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     if (f == jl_builtin_is && nargs == 2) {
         // emit comparison test
         Value *ans = emit_f_is(ctx, argv[1], argv[2]);
-        *ret = mark_julia_type(ctx, ctx.builder.CreateZExt(ans, getInt8Ty(ctx.builder.getContext())), false, jl_bool_type);
+        *ret = mark_julia_type(ctx, ans, false, jl_bool_type);
         return true;
     }
 
@@ -3365,9 +4435,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         const jl_cgval_t &ty = argv[2];
         if (jl_is_type_type(ty.typ) && !jl_has_free_typevars(ty.typ)) {
             jl_value_t *tp0 = jl_tparam0(ty.typ);
-            Value *isa_result = emit_isa(ctx, arg, tp0, NULL).first;
-            if (isa_result->getType() == getInt1Ty(ctx.builder.getContext()))
-                isa_result = ctx.builder.CreateZExt(isa_result, getInt8Ty(ctx.builder.getContext()));
+            Value *isa_result = emit_isa(ctx, arg, tp0, Twine()).first;
             *ret = mark_julia_type(ctx, isa_result, false, jl_bool_type);
             return true;
         }
@@ -3393,9 +4461,15 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 #ifdef _P64
                 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
+                Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to Core._apply_iterate detected\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 return true;
             }
         }
@@ -3407,7 +4481,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             return true;
         }
         if (jl_is_tuple_type(rt) && jl_is_concrete_type(rt) && nargs == jl_datatype_nfields(rt)) {
-            *ret = emit_new_struct(ctx, rt, nargs, &argv[1], is_promotable);
+            *ret = emit_new_struct(ctx, rt, nargs, argv.drop_front(), is_promotable);
             return true;
         }
     }
@@ -3419,270 +4493,291 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
-    else if (f == jl_builtin_arraysize && nargs == 2) {
-        const jl_cgval_t &ary = argv[1];
-        const jl_cgval_t &idx = argv[2];
-        jl_value_t *aty = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty) && idx.typ == (jl_value_t*)jl_long_type) {
-            jl_value_t *ndp = jl_tparam1(aty);
-            if (jl_is_long(ndp)) {
-                size_t ndims = jl_unbox_long(ndp);
-                if (idx.constant) {
-                    uint32_t idx_const = (uint32_t)jl_unbox_long(idx.constant);
-                    if (idx_const > 0 && idx_const <= ndims) {
-                        jl_value_t *ary_ex = jl_exprarg(ex, 1);
-                        *ret = mark_julia_type(ctx, emit_arraysize(ctx, ary, ary_ex, idx_const), false, jl_long_type);
-                        return true;
-                    }
-                    else if (idx_const > ndims) {
-                        *ret = mark_julia_type(ctx, ConstantInt::get(ctx.types().T_size, 1), false, jl_long_type);
-                        return true;
-                    }
-                }
-                else {
-                    Value *idx_dyn = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
-                    auto positive = ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size));
-                    setName(ctx.emission_context, positive, "ispositive");
-                    error_unless(ctx, positive, "arraysize: dimension out of range");
-                    BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f);
-                    BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange");
-                    BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize");
-                    auto oor = ctx.builder.CreateICmpSLE(idx_dyn,
-                                ConstantInt::get(ctx.types().T_size, ndims));
-                    setName(ctx.emission_context, oor, "sizeddim");
-                    ctx.builder.CreateCondBr(oor, inBB, outBB);
-                    ctx.builder.SetInsertPoint(outBB);
-                    Value *v_one = ConstantInt::get(ctx.types().T_size, 1);
-                    ctx.builder.CreateBr(ansBB);
-                    ctx.f->getBasicBlockList().push_back(inBB);
-                    ctx.builder.SetInsertPoint(inBB);
-                    Value *v_sz = emit_arraysize(ctx, ary, idx_dyn);
-                    ctx.builder.CreateBr(ansBB);
-                    inBB = ctx.builder.GetInsertBlock(); // could have changed
-                    ctx.f->getBasicBlockList().push_back(ansBB);
-                    ctx.builder.SetInsertPoint(ansBB);
-                    PHINode *result = ctx.builder.CreatePHI(ctx.types().T_size, 2);
-                    result->addIncoming(v_one, outBB);
-                    result->addIncoming(v_sz, inBB);
-                    setName(ctx.emission_context, result, "arraysize");
-                    *ret = mark_julia_type(ctx, result, false, jl_long_type);
-                    return true;
-                }
-            }
+    else if (f == jl_builtin_memorynew && (nargs == 2)) {
+        const jl_cgval_t &memty = argv[1];
+        if (!memty.constant)
+            return false;
+        jl_datatype_t *typ = (jl_datatype_t*) memty.constant;
+        if (!jl_is_concrete_type((jl_value_t*)typ) || !jl_is_genericmemory_type(typ))
+            return false;
+        jl_genericmemory_t *inst = (jl_genericmemory_t*)((jl_datatype_t*)typ)->instance;
+        if (inst == NULL)
+            return false;
+        if (argv[2].constant) {
+            if (!jl_is_long(argv[2].constant))
+                return false;
+            size_t nel = jl_unbox_long(argv[2].constant);
+            if (nel < 0)
+                return false;
+            *ret = emit_const_len_memorynew(ctx, typ, nel, inst);
+        }
+        else {
+            *ret = emit_memorynew(ctx, typ, argv[2], inst);
         }
+        return true;
     }
 
-    else if ((f == jl_builtin_arrayref || f == jl_builtin_const_arrayref) && nargs >= 3) {
-        const jl_cgval_t &ary = argv[2];
-        bool indices_ok = true;
-        for (size_t i = 3; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
-            }
+    else if (f == jl_builtin_memoryref && nargs == 1) {
+        const jl_cgval_t &mem = argv[1];
+        jl_datatype_t *mty_dt = (jl_datatype_t*)jl_unwrap_unionall(mem.typ);
+        if (jl_is_genericmemory_type(mty_dt) && jl_is_concrete_type((jl_value_t*)mty_dt)) {
+            jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(mty_dt->parameters), jl_svec_len(mty_dt->parameters));
+            const jl_datatype_layout_t *layout = mty_dt->layout;
+            *ret = _emit_memoryref(ctx, mem, layout, typ);
+            return true;
         }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 3)) {
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayref");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[3], nargs - 2, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    assert(((jl_datatype_t*)ety)->instance != NULL);
-                    *ret = ghostValue(ctx, ety);
-                }
-                else if (!isboxed && jl_is_uniontype(ety)) {
-                    Value *data = emit_arrayptr(ctx, ary, ary_ex);
-                    Value *offset = emit_arrayoffset(ctx, ary, nd);
-                    Value *ptindex;
-                    if (elsz == 0) {
-                        ptindex = data;
-                    }
-                    else {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                        data = emit_bitcast(ctx, data, AT->getPointerTo());
-                        // isbits union selector bytes are stored after a->maxsize
-                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                        setName(ctx.emission_context, ndims, "ndims");
-                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                        setName(ctx.emission_context, is_vector, "isvec");
-                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
-                        setName(ctx.emission_context, selidx_v, "selidx_v");
-                        Value *selidx_m = emit_arraylen(ctx, ary);
-                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                        setName(ctx.emission_context, selidx, "selidx");
-                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                        setName(ctx.emission_context, ptindex, "ptindex");
-                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                        setName(ctx.emission_context, data, "data");
-                    }
-                    ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                    *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
-                    if (ret->V)
-                        setName(ctx.emission_context, ret->V, "arrayref");
+    }
+
+    else if (f == jl_builtin_memoryref && (nargs == 2 || nargs == 3)) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            jl_value_t *boundscheck = nargs == 3 ? argv[3].constant : nullptr;
+            if (nargs == 3)
+                emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryref");
+            *ret = emit_memoryref(ctx, ref, argv[2], boundscheck, layout);
+            return true;
+        }
+    }
+
+    else if (f == jl_builtin_memoryrefoffset && nargs == 1) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            *ret = emit_memoryref_offset(ctx, ref, layout);
+            return true;
+        }
+    }
+
+    else if (f == jl_builtin_memoryrefget && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *kind = jl_tparam0(mty_dt);
+            jl_value_t *ety = jl_tparam1(mty_dt);
+            jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+                return false;
+            enum jl_memory_order order = jl_memory_order_unspecified;
+            const std::string fname = "memoryrefget";
+            {
+                const jl_cgval_t &ord = argv[2];
+                emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+                if (!ord.constant)
+                    return false;
+                order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+            }
+            if (order == jl_memory_order_invalid) {
+                emit_atomic_error(ctx, "invalid atomic ordering");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
+            if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "memoryrefget: non-atomic memory cannot be accessed atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (isatomic && order == jl_memory_order_notatomic) {
+                emit_atomic_error(ctx, "memoryrefget: atomic memory cannot be accessed non-atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (order == jl_memory_order_unspecified) {
+                order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+            }
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryref");
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            if (bounds_check_enabled(ctx, boundscheck)) {
+                BasicBlock *failBB, *endBB;
+                failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+                endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+                failBB->insertInto(ctx.f);
+                ctx.builder.SetInsertPoint(failBB);
+                ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+                ctx.builder.CreateUnreachable();
+                endBB->insertInto(ctx.f);
+                ctx.builder.SetInsertPoint(endBB);
+            }
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            bool isunion = layout->flags.arrayelem_isunion;
+            size_t elsz = layout->size;
+            size_t al = layout->alignment;
+            if (al > JL_HEAP_ALIGNMENT)
+                al = JL_HEAP_ALIGNMENT;
+            bool needlock = isatomic && !isboxed && elsz > MAX_ATOMIC_SIZE;
+            AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                                    ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
+                                    : get_llvm_atomic_order(order);
+            bool maybenull = true;
+            if (!isboxed && !isunion && elsz == 0) {
+                assert(jl_is_datatype(ety) && jl_is_datatype_singleton((jl_datatype_t*)ety));
+                *ret = ghostValue(ctx, ety);
+                if (isStrongerThanMonotonic(Order))
+                    ctx.builder.CreateFence(Order);
+            }
+            else if (isunion) {
+                assert(!isatomic && !needlock);
+                Value *V = emit_memoryref_FCA(ctx, ref, layout);
+                Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+                Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+                Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+                Value *ptindex;
+                if (elsz == 0) {
+                    ptindex = data;
                 }
                 else {
-                    MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.noalias().aliasscope.current : nullptr;
-                    *ret = typed_load(ctx,
-                            emit_arrayptr(ctx, ary, ary_ex),
-                            idx, ety,
-                            isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                            aliasscope,
-                            isboxed,
-                            AtomicOrdering::NotAtomic);
-                    if (ret->V)
-                        setName(ctx.emission_context, ret->V, "arrayref");
+                    Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                    // isbits union selector bytes are stored after mem->length bytes
+                    ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+                    data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
+                }
+                ptindex = emit_ptrgep(ctx, ptindex, idx0);
+                size_t elsz_c = 0, al_c = 0;
+                int union_max = jl_islayout_inline(ety, &elsz_c, &al_c);
+                assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al);
+                *ret = emit_unionload(ctx, data, ptindex, ety, elsz_c, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
+            }
+            else {
+                Value *ptr = (layout->size == 0 ? nullptr : emit_memoryref_ptr(ctx, ref, layout));
+                Value *lock = nullptr;
+                if (needlock) {
+                    assert(ptr);
+                    lock = ptr;
+                    // ptr += sizeof(lock);
+                    ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    emit_lockstate_value(ctx, lock, true);
+                }
+                *ret = typed_load(ctx, ptr, nullptr, ety,
+                        isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                        ctx.noalias().aliasscope.current,
+                        isboxed, Order, maybenull, al);
+                if (needlock) {
+                    emit_lockstate_value(ctx, lock, false);
                 }
-                return true;
             }
+            return true;
         }
     }
 
-    else if (f == jl_builtin_arrayset && nargs >= 4) {
-        const jl_cgval_t &ary = argv[2];
-        jl_cgval_t val = argv[3];
-        bool indices_ok = true;
-        for (size_t i = 4; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
+    else if ((f == jl_builtin_memoryrefset && nargs == 4) ||
+             (f == jl_builtin_memoryrefswap && nargs == 4) ||
+             (f == jl_builtin_memoryrefreplace && nargs == 6) ||
+             (f == jl_builtin_memoryrefmodify && nargs == 5) ||
+             (f == jl_builtin_memoryrefsetonce && nargs == 5)) {
+        return emit_f_opmemory(ctx, ret, f, argv, nargs, nullptr);
+    }
+
+
+    else if (f == jl_builtin_memoryref_isassigned && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *kind = jl_tparam0(mty_dt);
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+                return false;
+            enum jl_memory_order order = jl_memory_order_unspecified;
+            const std::string fname = "memoryref_isassigned";
+            {
+                const jl_cgval_t &ord = argv[2];
+                emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+                if (!ord.constant)
+                    return false;
+                order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
             }
-        }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                emit_typecheck(ctx, val, ety, "arrayset");
-                val = update_julia_type(ctx, val, ety);
-                if (val.typ == jl_bottom_type)
-                    return true;
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    // no-op
+            if (order == jl_memory_order_invalid) {
+                emit_atomic_error(ctx, "invalid atomic ordering");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            bool isatomic = kind == (jl_value_t*)jl_atomic_sym;
+            if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "memoryref_isassigned: non-atomic memory cannot be accessed atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (isatomic && order == jl_memory_order_notatomic) {
+                emit_atomic_error(ctx, "memoryref_isassigned: atomic memory cannot be accessed non-atomically");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
+            if (order == jl_memory_order_unspecified) {
+                order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+            }
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, fname);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr;
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            if (isboxed || layout->first_ptr >= 0) {
+                bool needlock = isatomic && !isboxed && layout->size > MAX_ATOMIC_SIZE;
+                AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic)
+                                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic)
+                                        : get_llvm_atomic_order(order);
+                PHINode *result = nullptr;
+                if (oob) {
+                    BasicBlock *passBB, *endBB, *fromBB;
+                    passBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                    endBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+
+                    passBB->insertInto(ctx.f);
+                    endBB->insertInto(ctx.f);
+                    fromBB = ctx.builder.CreateCondBr(oob, endBB, passBB)->getParent();
+                    ctx.builder.SetInsertPoint(endBB);
+                    result = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+                    result->addIncoming(ConstantInt::get(result->getType(), 0), fromBB);
+                    setName(ctx.emission_context, result, "arraysize");
+                    ctx.builder.SetInsertPoint(passBB);
                 }
-                else {
-                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                        Value *aryv = boxed(ctx, ary);
-                        Value *flags = emit_arrayflags(ctx, ary);
-                        // the owner of the data is ary itself except if ary->how == 3
-                        flags = ctx.builder.CreateAnd(flags, 3);
-                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3));
-                        setName(ctx.emission_context, is_owned, "has_owner");
-                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                        BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f);
-                        BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f);
-                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                        ctx.builder.SetInsertPoint(ownedBB);
-                        // load owner pointer
-                        Instruction *own_ptr;
-                        if (jl_is_long(ndp)) {
-                            own_ptr = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue,
-                                        emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue),
-                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                    Align(sizeof(void*)));
-                            setName(ctx.emission_context, own_ptr, "external_owner");
-                            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-                            ai.decorateInst(maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                        }
-                        else {
-                            own_ptr = ctx.builder.CreateCall(
-                                prepare_call(jlarray_data_owner_func),
-                                {aryv});
-                        }
-                        ctx.builder.CreateBr(mergeBB);
-                        ctx.builder.SetInsertPoint(mergeBB);
-                        data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
-                        data_owner->addIncoming(aryv, curBB);
-                        data_owner->addIncoming(own_ptr, ownedBB);
-                        setName(ctx.emission_context, data_owner, "data_owner");
-                    }
-                    if (!isboxed && jl_is_uniontype(ety)) {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                        Value *offset = emit_arrayoffset(ctx, ary, nd);
-                        // compute tindex from val
-                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-                        Value *ptindex;
-                        if (elsz == 0) {
-                            ptindex = data;
-                        }
-                        else {
-                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                            setName(ctx.emission_context, is_vector, "is_vector");
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
-                            setName(ctx.emission_context, selidx_v, "selidx_v");
-                            Value *selidx_m = emit_arraylen(ctx, ary);
-                            Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            setName(ctx.emission_context, selidx, "selidx");
-                            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            setName(ctx.emission_context, ptindex, "ptindex");
-                            data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                            setName(ctx.emission_context, data, "data");
-                        }
-                        ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                        setName(ctx.emission_context, ptindex, "ptindex");
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayselbyte);
-                        ai.decorateInst(ctx.builder.CreateStore(tindex, ptindex));
-                        if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
-                            // copy data (if any)
-                            emit_unionmove(ctx, data, ctx.tbaa().tbaa_arraybuf, val, nullptr);
-                        }
-                    }
-                    else {
-                        typed_store(ctx,
-                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                    idx, val, jl_cgval_t(), ety,
-                                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                                    ctx.noalias().aliasscope.current,
-                                    data_owner,
-                                    isboxed,
-                                    isboxed ? AtomicOrdering::Release : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    /*FailOrder*/AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    0,
-                                    false,
-                                    true,
-                                    false,
-                                    false,
-                                    false,
-                                    false,
-                                    nullptr,
-                                    "");
-                    }
+                Value *elem = emit_memoryref_ptr(ctx, ref, layout);
+                if (needlock) {
+                    // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
+                    // elem += sizeof(lock);
+                    elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
                 }
-                *ret = ary;
-                return true;
+                if (!isboxed)
+                    elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
+                // emit this using the same type as jl_builtin_memoryrefget
+                // so that LLVM may be able to load-load forward them and fold the result
+                auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                LoadInst *fldv = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, elem, ctx.types().alignof_ptr);
+                fldv->setOrdering(Order);
+                ai.decorateInst(fldv);
+                Value *isdef = ctx.builder.CreateIsNotNull(fldv);
+                setName(ctx.emission_context, isdef, fname);
+                if (oob) {
+                    assert(result);
+                    result->addIncoming(isdef, ctx.builder.CreateBr(result->getParent())->getParent());
+                    ctx.builder.SetInsertPoint(result->getParent());
+                    isdef = result;
+                }
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
+            }
+            else if (oob) {
+                Value *isdef = ctx.builder.CreateNot(oob);
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
             }
+            else {
+                *ret = mark_julia_const(ctx, jl_true);
+            }
+            return true;
         }
     }
 
+
     else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3 || nargs == 4)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
@@ -3741,7 +4836,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
+                                emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)),
                                 NULL, NULL);
                         Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
@@ -3782,44 +4877,64 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
                     // as long as we know that all elements are of the same (leaf) type.
-                    if (obj.ispointer()) {
-                        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
-                            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
-                            *ret = jl_cgval_t(); // unreachable
-                            return true;
-                        }
-                        // Determine which was the type that was homogeneous
-                        jl_value_t *jt = jl_tparam0(utt);
-                        if (jl_is_vararg(jt))
-                            jt = jl_unwrap_vararg(jt);
-                        assert(jl_is_datatype(jt));
-                        // This is not necessary for correctness, but allows to omit
-                        // the extra code for getting the length of the tuple
-                        if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
-                        }
-                        else {
-                            vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
-                                emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)),
-                                jl_true);
-                        }
-                        bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
-                        Value *ptr = data_pointer(ctx, obj);
-                        *ret = typed_load(ctx, ptr, vidx,
-                                isboxed ? (jl_value_t*)jl_any_type : jt,
-                                obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    jl_cgval_t ptrobj = obj.isboxed ? obj : value_to_pointer(ctx, obj);
+                    if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                        emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+                        *ret = jl_cgval_t(); // unreachable
                         return true;
                     }
+                    // Determine which was the type that was homogeneous
+                    jl_value_t *jt = jl_tparam0(utt);
+                    if (jl_is_vararg(jt))
+                        jt = jl_unwrap_vararg(jt);
+                    assert(jl_is_datatype(jt));
+                    // This is not necessary for correctness, but allows to omit
+                    // the extra code for getting the length of the tuple
+                    if (!bounds_check_enabled(ctx, boundscheck)) {
+                        vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                    }
+                    else {
+                        vidx = emit_bounds_check(ctx, ptrobj, (jl_value_t*)ptrobj.typ, vidx,
+                            emit_datatype_nfields(ctx, emit_typeof(ctx, ptrobj, false, false)),
+                            jl_true);
+                    }
+                    bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
+                    Value *ptr = data_pointer(ctx, ptrobj);
+                    *ret = typed_load(ctx, ptr, vidx,
+                            isboxed ? (jl_value_t*)jl_any_type : jt,
+                            ptrobj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    return true;
                 }
 
                 // Unknown object, but field known to be integer
                 vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
-                Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx });
+                Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx }, "getfield");
                 *ret = mark_julia_type(ctx, fld_val, true, jl_any_type);
                 return true;
             }
         }
-        // TODO: generic getfield func with more efficient calling convention
+        else if (fld.typ == (jl_value_t*)jl_symbol_type) { // Known type but unknown symbol
+            if (jl_is_datatype(utt) && (utt != jl_module_type) && jl_struct_try_layout(utt)) {
+                if ((jl_datatype_nfields(utt) == 1 && !jl_is_namedtuple_type(utt) && !jl_is_tuple_type(utt))) {
+                    jl_svec_t *fn = jl_field_names(utt);
+                    assert(jl_svec_len(fn) == 1);
+                    Value *typ_sym = literal_pointer_val(ctx, jl_svecref(fn, 0));
+                    Value *cond = ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, typ_sym), mark_callee_rooted(ctx, boxed(ctx, fld)));
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
+                    *ret = emit_getfield_knownidx(ctx, obj, 0, utt, order);
+                    return true;
+                }
+                else {
+                    Value *index = ctx.builder.CreateCall(prepare_call(jlfieldindex_func),
+                            {emit_typeof(ctx, obj, false, false), boxed(ctx, fld), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)});
+                    Value *cond = ctx.builder.CreateICmpNE(index, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1));
+                    emit_hasnofield_error_ifnot(ctx, cond, utt, fld);
+                    Value *idx2 = ctx.builder.CreateAdd(ctx.builder.CreateIntCast(index, ctx.types().T_size, false), ConstantInt::get(ctx.types().T_size, 1)); // getfield_unknown is 1 based
+                    if (emit_getfield_unknownidx(ctx, ret, obj, idx2, utt, jl_false, order))
+                        return true;
+                }
+            }
+        }
         return false;
     }
 
@@ -3855,14 +4970,19 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return false;
     }
 
-    else if (f == jl_builtin_setglobal && (nargs == 3 || nargs == 4)) {
+    else if ((f == jl_builtin_setglobal && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_swapglobal && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_replaceglobal && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == jl_builtin_modifyglobal && (nargs == 4 || nargs == 5)) ||
+             (f == jl_builtin_setglobalonce && (nargs == 3 || nargs == 4 || nargs == 5))) {
         return emit_f_opglobal(ctx, ret, f, argv, nargs, nullptr);
     }
 
     else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
-             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5)) ||
+             (f == jl_builtin_setfieldonce && (nargs == 3 || nargs == 4 || nargs == 5))) {
         return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
@@ -3913,7 +5033,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 if (nargs == 3)
                     emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
-                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx);
+                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, types_svec), idx);
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                 Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
                 setName(ctx.emission_context, fieldtyp, "fieldtype");
@@ -3940,7 +5060,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), ctx.types().T_size->getPointerTo());
+            auto ptr = boxed(ctx, obj);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr));
             MDBuilder MDB(ctx.builder.getContext());
@@ -3959,19 +5079,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = mark_julia_type(ctx, len, false, jl_long_type);
             return true;
         }
-        else if (jl_is_array_type(sty)) {
-            auto len = emit_arraylen(ctx, obj);
-            Value *elsize;
-            size_t elsz;
-            if (arraytype_constelsize(sty, &elsz)) {
-                elsize = ConstantInt::get(ctx.types().T_size, elsz);
-            }
-            else {
-                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), ctx.types().T_size);
-            }
+        else if (jl_is_genericmemory_type(sty)) {
+            Value *v = boxed(ctx, obj);
+            auto len = emit_genericmemorylen(ctx, v, (jl_value_t*)sty);
+            auto elsize = emit_genericmemoryelsize(ctx, v, obj.typ, true);
             *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type);
-            if (ret->V)
-                setName(ctx.emission_context, ret->V, "sizeof");
             return true;
         }
     }
@@ -3981,13 +5093,51 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             // don't bother codegen constant-folding for toplevel.
             jl_value_t *ty = static_apply_type(ctx, argv, nargs + 1);
             if (ty != NULL) {
-                ty = jl_ensure_rooted(ctx, ty);
+                JL_GC_PUSH1(&ty);
+                jl_temporary_root(ctx, ty);
+                JL_GC_POP();
                 *ret = mark_julia_const(ctx, ty);
                 return true;
             }
         }
     }
 
+    else if (f == jl_builtin_isdefinedglobal && (nargs == 2 || nargs == 3 || nargs == 4)) {
+        const jl_cgval_t &mod = argv[1];
+        const jl_cgval_t &sym = argv[2];
+        bool allow_import = true;
+        enum jl_memory_order order = jl_memory_order_unspecified;
+
+        if (nargs >= 3) {
+            const jl_cgval_t &arg3 = argv[3];
+            if (arg3.constant && jl_is_bool(arg3.constant))
+                allow_import = jl_unbox_bool(arg3.constant);
+            else
+                return false;
+        }
+
+        if (nargs == 4) {
+            const jl_cgval_t &arg4 = argv[4];
+            if (arg4.constant && jl_is_symbol(arg4.constant))
+                order = jl_get_atomic_order((jl_sym_t*)arg4.constant, true, false);
+            else
+                return false;
+        }
+        else
+            order = jl_memory_order_unordered;
+
+        if (order < jl_memory_order_unordered) {
+            return false;
+        }
+
+        if (!mod.constant || !sym.constant || !jl_is_symbol(sym.constant) || !jl_is_module(mod.constant)) {
+            return false;
+        }
+
+        *ret = emit_isdefinedglobal(ctx, (jl_module_t*)mod.constant, (jl_sym_t*)sym.constant, allow_import, order);
+        return true;
+    }
+
     else if (f == jl_builtin_isdefined && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
@@ -4060,20 +5210,23 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = jl_cgval_t(); // unreachable
             return true;
         }
-        else if (fieldidx < nf - stt->name->n_uninitialized) {
+        else if (!field_may_be_null(obj, stt, fieldidx)) {
             *ret = mark_julia_const(ctx, jl_true);
         }
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
             Value *fldv;
             size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*);
-            auto tbaa = obj.tbaa;
-            if (tbaa == ctx.tbaa().tbaa_datatype && offs != offsetof(jl_datatype_t, types))
-                tbaa = ctx.tbaa().tbaa_const;
-            if (obj.ispointer()) {
+            if (!obj.inline_roots.empty()) {
+                auto offsets = split_value_field(stt, fieldidx);
+                assert(offsets.second >= 0);
+                fldv = obj.inline_roots[offsets.second];
+            }
+            else if (obj.ispointer()) {
+                auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs);
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
-                Value *ptr = emit_bitcast(ctx, data_pointer(ctx, obj), ctx.types().T_pprjlvalue);
-                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
+                Value *ptr = data_pointer(ctx, obj);
+                Value *addr = emit_ptrgep(ctx, ptr, offs * sizeof(jl_value_t*));
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -4101,6 +5254,14 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
+    else if (f == jl_builtin_current_scope && (nargs == 0)) {
+        jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        Instruction *v = scope_ai.decorateInst(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, get_scope_field(ctx), ctx.types().alignof_ptr));
+        *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt);
+        return true;
+    }
+
     else if (f == jl_builtin_donotdelete) {
         // For now we emit this as a vararg call to the builtin
         // (which doesn't look at the arguments). In the future,
@@ -4131,12 +5292,18 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
+    else if (f == jl_builtin_compilerbarrier && (nargs == 2)) {
+        emit_typecheck(ctx, argv[1], (jl_value_t*)jl_symbol_type, "compilerbarrier");
+        *ret = argv[2];
+        return true;
+    }
+
     return false;
 }
 
 // Returns ctx.types().T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
+                             ArrayRef<jl_cgval_t> argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     ++EmittedJLCalls;
     Function *TheTrampoline = prepare_call(trampoline);
@@ -4146,7 +5313,15 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t
     if (theF)
         theArgs.push_back(theF);
     for (size_t i = 0; i < nargs; i++) {
-        Value *arg = boxed(ctx, argv[i]);
+        Value *arg;
+        if (i == 0 && trampoline == julia_call3) {
+            const jl_cgval_t &f = argv[i];
+            arg = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
+            arg = decay_derived(ctx, arg);
+        }
+        else {
+            arg = boxed(ctx, argv[i]);
+        }
         theArgs.push_back(arg);
     }
     CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs);
@@ -4157,49 +5332,32 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t
 
 // Returns ctx.types().T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
+                             ArrayRef<jl_cgval_t> argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, jl_returninfo_t &returninfo, ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
     bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
-    FunctionType *cft = returninfo.decl.getFunctionType();
-    *cc = returninfo.cc;
-    *return_roots = returninfo.return_roots;
-
-    size_t nfargs = cft->getNumParams();
-    SmallVector<Value *> argvals(nfargs);
+    size_t nfargs = returninfo.decl.getFunctionType()->getNumParams();
+    SmallVector<Value *, 0> argvals(nfargs);
     unsigned idx = 0;
     AllocaInst *result = nullptr;
-    switch (returninfo.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-        assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-        argvals[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
+
+    if (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union) {
+        result = emit_static_alloca(ctx, returninfo.union_bytes, Align(returninfo.union_align));
         setName(ctx.emission_context, result, "sret_box");
-        if (returninfo.union_align > 1)
-            result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
         idx++;
-        break;
     }
 
+    AllocaInst *return_roots = nullptr;
     if (returninfo.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots));
+        assert(returninfo.cc == jl_returninfo_t::SRet);
+        return_roots = emit_static_roots(ctx, returninfo.return_roots);
         argvals[idx] = return_roots;
         idx++;
     }
@@ -4208,39 +5366,62 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
         idx++;
     }
     for (size_t i = 0; i < nargs; i++) {
-        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
         // n.b.: specTypes is required to be a datatype by construction for specsig
-        jl_cgval_t arg = argv[i];
         if (is_opaque_closure && i == 0) {
-            Type *at = cft->getParamType(idx);
-            // Special optimization for opaque closures: We know that specsig opaque
-            // closures don't look at their type tag (they are fairly quickly discarded
-            // for their environments). Therefore, we can just pass these as a pointer,
-            // rather than a boxed value.
-            arg = value_to_pointer(ctx, arg);
-            argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
-        }
-        else if (is_uniquerep_Type(jt)) {
+            // Special implementation for opaque closures: their jt and thus
+            // julia_type_to_llvm values are likely wrong (based on captures instead of the OC), so override the
+            // behavior here to directly pass the expected pointer directly instead of
+            // computing it from the available information
+            // jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(specTypes, jlretty)
+            jl_cgval_t arg = argv[i];
+            if (arg.isghost) {
+                argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived));
+            }
+            else {
+                if (!arg.isboxed)
+                    arg = value_to_pointer(ctx, arg);
+                argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+            }
+            idx++;
             continue;
-        } else {
+        }
+        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
+        jl_cgval_t arg = update_julia_type(ctx, argv[i], jt);
+        if (arg.typ == jl_bottom_type)
+            return jl_cgval_t();
+        if (is_uniquerep_Type(jt)) {
+            continue;
+        }
+        else {
             bool isboxed = deserves_argbox(jt);
             Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
             if (type_is_ghost(et))
                 continue;
             assert(idx < nfargs);
-            Type *at = cft->getParamType(idx);
             if (isboxed) {
-                assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
                 argvals[idx] = boxed(ctx, arg);
             }
             else if (et->isAggregateType()) {
-                arg = value_to_pointer(ctx, arg);
-                // can lazy load on demand, no copy needed
-                assert(at == PointerType::get(et, AddressSpace::Derived));
-                argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+                auto tracked = CountTrackedPointers(et);
+                if (tracked.count && !tracked.all) {
+                    Value *val = arg.V;
+                    SmallVector<Value*,0> roots(arg.inline_roots);
+                    if (roots.empty())
+                        std::tie(val, roots) = split_value(ctx, arg, Align(julia_alignment(jt)));
+                    AllocaInst *proots = emit_static_roots(ctx, roots.size());
+                    for (size_t i = 0; i < roots.size(); i++)
+                        ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*)));
+                    assert(val);
+                    argvals[idx++] = decay_derived(ctx, val);
+                    argvals[idx] = proots;
+                }
+                else {
+                    if (!arg.isboxed)
+                        arg = value_to_pointer(ctx, arg);
+                    argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+                }
             }
             else {
-                assert(at == et);
                 Value *val = emit_unbox(ctx, et, arg, jt);
                 if (!val) {
                     // There was a type mismatch of some sort - exit early
@@ -4253,25 +5434,9 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
         idx++;
     }
     assert(idx == nfargs);
-    Value *TheCallee = returninfo.decl.getCallee();
-    if (fromexternal) {
-        std::string namep("p");
-        namep += cast<Function>(returninfo.decl.getCallee())->getName();
-        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
-        if (GV == nullptr) {
-            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false,
-                                    GlobalVariable::ExternalLinkage,
-                                    Constant::getNullValue(TheCallee->getType()),
-                                    namep);
-            ctx.external_calls[std::make_tuple(fromexternal, true)] = GV;
-        }
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
-        setName(ctx.emission_context, TheCallee, namep);
-    }
-    CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
+    CallInst *call = ctx.builder.CreateCall(returninfo.decl, argvals);
     call->setAttributes(returninfo.attrs);
-    if (gcstack_arg)
+    if (gcstack_arg && ctx.emission_context.use_swiftcc)
         call->setCallingConv(CallingConv::Swift);
 
     jl_cgval_t retval;
@@ -4284,16 +5449,16 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             break;
         case jl_returninfo_t::SRet:
             assert(result);
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
+            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_gcframe, load_gc_roots(ctx, return_roots, returninfo.return_roots));
             break;
         case jl_returninfo_t::Union: {
             Value *box = ctx.builder.CreateExtractValue(call, 0);
             Value *tindex = ctx.builder.CreateExtractValue(call, 1);
             Value *derived = ctx.builder.CreateSelect(
                 ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], ctx.types().T_pjlvalue)),
+                decay_derived(ctx, result),
                 decay_derived(ctx, box)
             );
             retval = mark_julia_slot(derived,
@@ -4307,20 +5472,80 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
             break;
     }
+    return retval;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty, Value *age_ok)
+{
+    ++EmittedSpecfunCalls;
+    // emit specialized call site
+    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
+    *cc = returninfo.cc;
+    *nreturn_roots = returninfo.return_roots;
+    if (fromexternal) {
+        std::string namep("p");
+        Value *TheCallee = returninfo.decl.getCallee();
+        namep += cast<Function>(TheCallee)->getName();
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false,
+                                    GlobalVariable::ExternalLinkage,
+                                    Constant::getNullValue(TheCallee->getType()),
+                                    namep);
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, true)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
+        setName(ctx.emission_context, TheCallee, namep);
+        returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), TheCallee);
+    }
+    if (age_ok) {
+        std::string funcName(specFunctionObject);
+        funcName += "_gfthunk";
+        Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
+                GlobalVariable::InternalLinkage, funcName, jl_Module);
+        jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+        gf_thunk->setAttributes(AttributeList::get(gf_thunk->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
+        // build a specsig -> jl_apply_generic converter thunk
+        // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer),
+        // but which has the signature of a specsig
+        emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots, specTypes, jlretty, is_opaque_closure, nargs, ctx.emission_context,
+            prepare_call(jlapplygeneric_func));
+        returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk));
+    }
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, argv, nargs);
     // see if inference has a different / better type for the call than the lambda
     return update_julia_type(ctx, retval, inferred_retty);
 }
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty, Value *age_ok)
 {
     bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
     return emit_call_specfun_other(ctx, is_opaque_closure, mi->specTypes, jlretty, NULL,
-        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty);
+        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty, age_ok);
+}
+
+static jl_value_t *get_ci_abi(jl_code_instance_t *ci)
+{
+    if (jl_typeof(ci->def) == (jl_value_t*)jl_abioverride_type)
+        return ((jl_abi_override_t*)ci->def)->abi;
+    return jl_get_ci_mi(ci)->specTypes;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_code_instance_t *ci, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+    ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty, Value *age_ok)
+{
+    jl_method_instance_t *mi = jl_get_ci_mi(ci);
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    return emit_call_specfun_other(ctx, is_opaque_closure, get_ci_abi(ci), jlretty, NULL,
+        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty, age_ok);
 }
 
 static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *inferred_retty, Value *age_ok)
 {
     Value *theFptr;
     if (fromexternal) {
@@ -4333,103 +5558,118 @@ static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(pfunc),
                                     namep);
-            ctx.external_calls[std::make_tuple(fromexternal, false)] = GV;
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, false)] = GV;
         }
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*))));
-        setName(ctx.emission_context, theFptr, namep);
+        setName(ctx.emission_context, theFptr, specFunctionObject);
     }
     else {
         theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee();
         addRetAttr(cast<Function>(theFptr), Attribute::NonNull);
     }
+    if (age_ok)
+        theFptr = ctx.builder.CreateSelect(age_ok, theFptr, prepare_call(jlapplygeneric_func));
     Value *ret = emit_jlcall(ctx, FunctionCallee(ctx.types().T_jlfunc, theFptr), nullptr, argv, nargs, julia_call);
     return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
 
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
-    return emit_invoke(ctx, lival, argv.data(), nargs, rt);
+    return emit_invoke(ctx, lival, argv, nargs, rt, nullptr);
 }
 
-static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayRef<jl_cgval_t> argv, size_t nargs, jl_value_t *rt, Value *age_ok)
 {
     ++EmittedInvokes;
     bool handled = false;
     jl_cgval_t result;
     if (lival.constant) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)lival.constant;
+        jl_method_instance_t *mi;
+        jl_value_t *ci = nullptr;
+        if (jl_is_method_instance(lival.constant)) {
+            mi = (jl_method_instance_t*)lival.constant;
+        }
+        else {
+            ci = lival.constant;
+            assert(jl_is_code_instance(ci));
+            mi = jl_get_ci_mi((jl_code_instance_t*)ci);
+        }
         assert(jl_is_method_instance(mi));
         if (mi == ctx.linfo) {
-            // handle self-recursion specially
+            // handle self-recursion specially (TODO: assuming ci is a valid invoke for mi?)
             jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == ctx.types().T_jlfunc) {
-                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, nullptr, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, nullptr, argv, nargs, rt, age_ok);
                 handled = true;
             }
             else if (ft != ctx.types().T_jlfuncparams) {
                 unsigned return_roots = 0;
-                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt);
+                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt, age_ok);
                 handled = true;
             }
         }
         else {
-            jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            if (ci != jl_nothing) {
+            if (ci) {
+                jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
                 auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
                  // check if we know how to handle this specptr
                 if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(ctx, codeinst->rettype_const);
                     handled = true;
                 }
-                else if (invoke != jl_fptr_sparam_addr) {
+                else {
                     bool specsig, needsparams;
-                    std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig);
-                    std::string name;
-                    StringRef protoname;
-                    bool need_to_emit = true;
-                    bool cache_valid = ctx.use_cache || ctx.external_linkage;
-                    bool external = false;
-
-                    // Check if we already queued this up
-                    auto it = ctx.call_targets.find(codeinst);
-                    if (need_to_emit && it != ctx.call_targets.end()) {
-                        protoname = std::get<2>(it->second)->getName();
-                        need_to_emit = cache_valid = false;
-                    }
+                    std::tie(specsig, needsparams) = uses_specsig(get_ci_abi(codeinst), mi, codeinst->rettype, ctx.params->prefer_specsig);
+                    if (needsparams) {
+                        if (trim_may_error(ctx.params->trim))
+                            push_frames(ctx, ctx.linfo, mi);
+                        Value *r = emit_jlcall(ctx, jlinvoke_func, track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)mi)), argv, nargs, julia_call2);
+                        result = mark_julia_type(ctx, r, true, rt);
+                        handled = true;
+                    } else {
+                        std::string name;
+                        StringRef protoname;
+                        bool need_to_emit = true;
+                        bool cache_valid = ctx.use_cache || ctx.external_linkage;
+                        bool external = false;
+
+                        // Check if we already queued this up
+                        auto it = ctx.call_targets.find(codeinst);
+                        if (need_to_emit && it != ctx.call_targets.end()) {
+                            assert(it->second.specsig == specsig);
+                            protoname = it->second.decl->getName();
+                            need_to_emit = cache_valid = false;
+                        }
 
-                    // Check if it is already compiled (either JIT or externally)
-                    if (cache_valid) {
-                        // optimization: emit the correct name immediately, if we know it
-                        // TODO: use `emitted` map here too to try to consolidate names?
-                        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                        if (fptr) {
-                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                                jl_cpu_pause();
-                            }
-                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
+                        // Check if it is already compiled (either JIT or externally)
+                        if (need_to_emit && cache_valid) {
+                            // optimization: emit the correct name immediately, if we know it
+                            // TODO: use `emitted` map here too to try to consolidate names?
+                            uint8_t specsigflags;
+                            jl_callptr_t invoke;
+                            void *fptr;
+                            jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                            if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr) {
+                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
                                 if (ctx.external_linkage) {
                                     // TODO: Add !specsig support to aotcompile.cpp
                                     // Check that the codeinst is containing native code
-                                    if (specsig && jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b100) {
+                                    if (specsig && (specsigflags & 0b100)) {
                                         external = true;
                                         need_to_emit = false;
                                     }
@@ -4439,71 +5679,111 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                                 }
                             }
                         }
-                    }
-                    if (need_to_emit) {
-                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
-                        protoname = StringRef(name);
-                    }
-                    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
-                    unsigned return_roots = 0;
-                    if (specsig)
-                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt);
-                    else
-                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt);
-                    handled = true;
-                    if (need_to_emit) {
-                        Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
-                        ctx.call_targets[codeinst] = std::make_tuple(cc, return_roots, trampoline_decl, specsig);
+                        if (need_to_emit) {
+                            raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+                            protoname = StringRef(name);
+                        }
+                        jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+                        unsigned return_roots = 0;
+                        if (specsig)
+                            result = emit_call_specfun_other(ctx, codeinst, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt, age_ok);
+                        else
+                            result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt, age_ok);
+                        handled = true;
+                        if (need_to_emit) {
+                            Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
+                            ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, nullptr, specsig};
+                            if (trim_may_error(ctx.params->trim))
+                                push_frames(ctx, ctx.linfo, mi);
+                        }
                     }
                 }
             }
         }
     }
     if (!handled) {
-        Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
-        result = mark_julia_type(ctx, r, true, rt);
+        if (trim_may_error(ctx.params->trim)) {
+            if (lival.constant) {
+                push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant);
+            }
+            else {
+                errs() << "Dynamic call to unknown function";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+
+                print_stacktrace(ctx, ctx.params->trim);
+            }
+        }
+        Value *r = age_ok ? emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call) : emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
+        result = mark_julia_type(ctx, r, true, age_ok ? (jl_value_t*)jl_any_type : rt);
     }
-    if (result.typ == jl_bottom_type)
+    if (result.typ == jl_bottom_type) {
+#ifndef JL_NDEBUG
+        emit_error(ctx, "(Internal Error - IR Validity): Returned from function we expected not to.");
+#endif
         CreateTrap(ctx.builder);
+    }
     return result;
 }
 
 static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     ++EmittedInvokes;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
     const jl_cgval_t &f = argv[0];
-    jl_cgval_t ret;
-    if (f.constant && f.constant == jl_builtin_modifyfield) {
-        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv.data(), nargs - 1, &lival))
-            return ret;
-        auto it = builtin_func_map().find(jl_f_modifyfield_addr);
-        assert(it != builtin_func_map().end());
-        Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
-        return mark_julia_type(ctx, oldnew, true, rt);
+    if (f.constant) {
+        jl_cgval_t ret;
+        auto it = builtin_func_map().end();
+        if (f.constant == jl_builtin_modifyfield) {
+            if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(jl_f_modifyfield_addr);
+            assert(it != builtin_func_map().end());
+        }
+        else if (f.constant == jl_builtin_modifyglobal) {
+            if (emit_f_opglobal(ctx, &ret, jl_builtin_modifyglobal, argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(jl_f_modifyglobal_addr);
+            assert(it != builtin_func_map().end());
+        }
+        else if (f.constant == jl_builtin_memoryrefmodify) {
+            if (emit_f_opmemory(ctx, &ret, jl_builtin_memoryrefmodify, argv, nargs - 1, &lival))
+                return ret;
+            it = builtin_func_map().find(jl_f_memoryrefmodify_addr);
+            assert(it != builtin_func_map().end());
+        }
+        else if (jl_typetagis(f.constant, jl_intrinsic_type)) {
+            JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+            if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+                return emit_atomic_pointerop(ctx, fi, ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, &lival);
+        }
+
+        if (it != builtin_func_map().end()) {
+            Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
+            return mark_julia_type(ctx, oldnew, true, rt);
+        }
     }
-    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
-        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
-        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
-            return emit_atomic_pointerop(ctx, fi, argv.data(), nargs - 1, &lival);
+    if (trim_may_error(ctx.params->trim)) {
+        errs() << "ERROR: dynamic invoke modify call to";
+        jl_(args[0]);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
     }
-
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv.data(), nargs, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
-static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, jl_cgval_t *argv, size_t nargs)
+static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, MutableArrayRef<jl_cgval_t> argv /*n.b. this mutation is unusual */, size_t nargs)
 {
     jl_datatype_t *oc_argt = (jl_datatype_t *)jl_tparam0(oc_type);
     jl_value_t *oc_rett = jl_tparam1(oc_type);
@@ -4515,6 +5795,8 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
             typ = jl_unwrap_vararg(typ);
         emit_typecheck(ctx, argv[i+1], typ, "typeassert");
         argv[i+1] = update_julia_type(ctx, argv[i+1], typ);
+        if (argv[i+1].typ == jl_bottom_type)
+            return jl_cgval_t();
     }
     jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
     unsigned return_roots = 0;
@@ -4525,7 +5807,7 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
     Value *specptr = emit_unbox(ctx, ctx.types().T_size, closure_specptr, (jl_value_t*)jl_long_type);
     JL_GC_PUSH1(&sigtype);
     jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", NULL, argv, nargs,
-        &cc, &return_roots, oc_rett);
+        &cc, &return_roots, oc_rett, nullptr);
     JL_GC_POP();
     return r;
 }
@@ -4533,25 +5815,23 @@ static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, j
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bool is_promotable)
 {
     ++EmittedCalls;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t nargs = jl_array_dim0(ex->args);
     assert(nargs >= 1);
     jl_cgval_t f = emit_expr(ctx, args[0]);
+    if (f.typ == jl_bottom_type) {
+        return jl_cgval_t();
+    }
 
     if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         return emit_intrinsic(ctx, fi, args, nargs - 1);
     }
 
-    jl_value_t *context = ctx.params->generic_context == jl_nothing ? nullptr : ctx.params->generic_context;
-    size_t n_generic_args = nargs + (context ? 1 : 0);
+    size_t n_generic_args = nargs;
+
+    SmallVector<jl_cgval_t, 0> argv(n_generic_args);
 
-    SmallVector<jl_cgval_t> generic_argv(n_generic_args);
-    jl_cgval_t *argv = generic_argv.data();
-    if (context) {
-        generic_argv[0] = mark_julia_const(ctx, context);
-        argv = &generic_argv[1];
-    }
     argv[0] = f;
     for (size_t i = 1; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i]);
@@ -4559,21 +5839,70 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             return jl_cgval_t(); // anything past here is unreachable
     }
 
-    if (f.constant && jl_isa(f.constant, (jl_value_t*)jl_builtin_type)) {
-        if (f.constant == jl_builtin_ifelse && nargs == 4)
-            return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
-        jl_cgval_t result;
-        bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
-        if (handled) {
-            return result;
+    if (jl_subtype(f.typ, (jl_value_t*)jl_builtin_type)) {
+        if (f.constant) {
+            if (f.constant == jl_builtin_ifelse && nargs == 4)
+                return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
+            jl_cgval_t result;
+            bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
+            if (handled)
+                return result;
+            jl_fptr_args_t builtin_fptr = jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant));
+            // special case for some known builtin not handled by emit_builtin_call
+            auto it = builtin_func_map().find(builtin_fptr);
+            if (it != builtin_func_map().end()) {
+                if (trim_may_error(ctx.params->trim)) {
+                    bool may_dispatch = may_dispatch_builtins().count(builtin_fptr);
+                    if (may_dispatch && f.constant == jl_builtin__apply_iterate && nargs >= 4) {
+                        if (jl_subtype(argv[2].typ, (jl_value_t*)jl_builtin_type)) {
+                            static jl_value_t *jl_dispatchfree_apply_iterate_type = NULL;
+                            if (!jl_dispatchfree_apply_iterate_type) {
+                                jl_value_t *types[5] = {
+                                    (jl_value_t *)jl_simplevector_type,
+                                    (jl_value_t *)jl_genericmemory_type,
+                                    (jl_value_t *)jl_array_type,
+                                    (jl_value_t *)jl_tuple_type,
+                                    (jl_value_t *)jl_namedtuple_type,
+                                };
+                                jl_dispatchfree_apply_iterate_type = jl_as_global_root(jl_type_union(types, 5), 1);
+                            }
+                            for (size_t i = 3; i < nargs; i++) {
+                                auto ai = argv[i].typ;
+                                if (!jl_subtype(ai, jl_dispatchfree_apply_iterate_type))
+                                    break;
+                            }
+                            may_dispatch = false;
+                        }
+                    }
+                    if (may_dispatch) {
+                        errs() << "ERROR: Dynamic call to builtin " << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name);
+                        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                        print_stacktrace(ctx, ctx.params->trim);
+                    }
+                }
+                Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
+                setName(ctx.emission_context, ret, it->second->name + "_ret");
+                return mark_julia_type(ctx, ret, true, rt);
+            }
         }
-
-        // special case for known builtin not handled by emit_builtin_call
-        auto it = builtin_func_map().find(jl_get_builtin_fptr(f.constant));
-        if (it != builtin_func_map().end()) {
-            Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
-            return mark_julia_type(ctx, ret, true, rt);
+        FunctionCallee fptr;
+        JuliaFunction<> *cc;
+        if (f.typ == (jl_value_t*)jl_intrinsic_type) {
+            fptr = prepare_call(jlintrinsic_func);
+            cc = julia_call3;
         }
+        else {
+            fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)}));
+            cc = julia_call;
+        }
+        if (trim_may_error(ctx.params->trim)) {
+            errs() << "ERROR: Dynamic call to unknown builtin";
+            errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+            print_stacktrace(ctx, ctx.params->trim);
+        }
+        Value *ret = emit_jlcall(ctx, fptr, nullptr, argv, nargs, cc);
+        setName(ctx.emission_context, ret, "Builtin_ret");
+        return mark_julia_type(ctx, ret, true, rt);
     }
 
     // handle calling an OpaqueClosure
@@ -4582,33 +5911,68 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         jl_value_t *oc_rett = jl_tparam1(f.typ);
         if (jl_is_datatype(oc_argt) && jl_tupletype_length_compat(oc_argt, nargs-1)) {
             jl_value_t *sigtype = jl_argtype_with_function_type((jl_value_t*)f.typ, (jl_value_t*)oc_argt);
-            if (uses_specsig(sigtype, false, true, oc_rett, true)) {
+            if (uses_specsig(sigtype, false, oc_rett, true)) {
                 JL_GC_PUSH1(&sigtype);
                 jl_cgval_t r = emit_specsig_oc_call(ctx, f.typ, sigtype, argv, nargs);
                 JL_GC_POP();
                 return r;
             }
+            // TODO: else emit_oc_call
         }
     }
+    int failed_dispatch = !argv[0].constant;
+    if (ctx.params->trim != JL_TRIM_NO) {
+        // TODO: Implement the last-minute call resolution that used to be here
+        //       in inference instead.
+    }
 
+    if (failed_dispatch && trim_may_error(ctx.params->trim)) {
+        errs() << "Dynamic call to ";
+        jl_jmp_buf *old_buf = jl_get_safe_restore();
+        jl_jmp_buf buf;
+        jl_set_safe_restore(&buf);
+        if (!jl_setjmp(buf, 0)) {
+            jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)args[0]);
+            jl_printf((JL_STREAM*)STDERR_FILENO,"(");
+            for (size_t i = 1; i < nargs; ++i) {
+                jl_value_t *typ = argv[i].typ;
+                if (!jl_is_concrete_type(typ)) // Print type in red
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[31m");
+                jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)argv[i].typ);
+                if (!jl_is_concrete_type(typ))
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[0m");
+                if (i != nargs-1)
+                    jl_printf((JL_STREAM*)STDERR_FILENO,", ");
+            }
+            jl_printf((JL_STREAM*)STDERR_FILENO,")\n");
+        }
+        else {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR while printing error -- ABORTING !!!\n");
+        }
+        jl_set_safe_restore(old_buf);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
+    }
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv.data(), n_generic_args, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
 // --- accessing and assigning variables ---
 
-static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_datatype_t *type, jl_cgval_t name)
 {
     ++EmittedUndefVarErrors;
+    assert(name.typ == (jl_value_t*)jl_symbol_type);
     BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
     BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
     ctx.builder.CreateCondBr(ok, ifok, err);
     ctx.builder.SetInsertPoint(err);
-    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func),
-        mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)));
+    ctx.builder.CreateCall(prepare_call(jlhasnofield_func),
+                          {mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)type)),
+                           mark_callee_rooted(ctx, boxed(ctx, name))});
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(ifok);
+    ifok->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(ifok);
 }
 
@@ -4616,25 +5980,29 @@ static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
 // if the reference currently bound or assign == true,
 //   pbnd will also be assigned with the binding address
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign)
+                                     jl_binding_t **pbnd, bool assign, bool alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, s, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition_all(b, ctx.min_world, ctx.max_world);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
             // not yet declared
             b = NULL;
     }
     else {
-        b = jl_atomic_load_relaxed(&b->owner);
-        if (b == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // try to look this up now
             b = jl_get_binding(m, s);
+            bpart = jl_get_binding_partition_all(b, ctx.min_world, ctx.max_world);
+        }
+        pku = jl_walk_binding_inplace_all(&b, &bpart, ctx.min_world, ctx.max_world);
     }
-    if (b == NULL) {
+    if (!b || !bpart) {
         // var not found. switch to delayed lookup.
         Constant *initnul = Constant::getNullValue(ctx.types().T_pjlvalue);
         GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue,
-                false, GlobalVariable::PrivateLinkage, initnul);
+                false, GlobalVariable::PrivateLinkage, initnul, "jl_binding_ptr"); // LLVM has bugs with nameless globals
         LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*)));
         setName(ctx.emission_context, cachedval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".cached");
         cachedval->setOrdering(AtomicOrdering::Unordered);
@@ -4644,15 +6012,23 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         auto iscached = ctx.builder.CreateICmpNE(cachedval, initnul);
         setName(ctx.emission_context, iscached, "iscached");
         ctx.builder.CreateCondBr(iscached, have_val, not_found);
-        ctx.f->getBasicBlockList().push_back(not_found);
+        not_found->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(not_found);
-        Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
-                { literal_pointer_val(ctx, (jl_value_t*)m),
-                  literal_pointer_val(ctx, (jl_value_t*)s) });
+        Value *bval = nullptr;
+        if (assign) {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s),
+                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), alloc)});
+        } else {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s)});
+        }
         setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found");
         ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
         ctx.builder.CreateBr(have_val);
-        ctx.f->getBasicBlockList().push_back(have_val);
+        have_val->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(have_val);
         PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2);
         p->addIncoming(cachedval, currentbb);
@@ -4661,11 +6037,12 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         return p;
     }
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) != b) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // this will fail at runtime, so defer to the runtime to create the error
             ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
                     { literal_pointer_val(ctx, (jl_value_t*)m),
-                      literal_pointer_val(ctx, (jl_value_t*)s) });
+                      literal_pointer_val(ctx, (jl_value_t*)s),
+                      ConstantInt::get(getInt32Ty(ctx.builder.getContext()), alloc) });
             CreateTrap(ctx.builder);
             return NULL;
         }
@@ -4678,7 +6055,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
     return julia_binding_gv(ctx, b);
 }
 
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa)
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa)
 {
     LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
     setName(ctx.emission_context, v, jl_symbol_name(name) + StringRef(".checked"));
@@ -4689,7 +6066,7 @@ static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name,
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(v);
     }
-    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name);
+    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, scope);
     return mark_julia_type(ctx, v, true, jl_any_type);
 }
 
@@ -4702,10 +6079,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
         }
     }
     assert(ctx.spvals_ptr != NULL);
-    Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue,
-            ctx.spvals_ptr,
-            i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+    Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
     setName(ctx.emission_context, sp, "sparam");
@@ -4715,11 +6089,11 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
         sparam = (jl_unionall_t*)sparam->body;
         assert(jl_is_unionall(sparam));
     }
-    undef_var_error_ifnot(ctx, isnull, sparam->var->name);
+    undef_var_error_ifnot(ctx, isnull, sparam->var->name, (jl_value_t*)jl_static_parameter_sym);
     return mark_julia_type(ctx, sp, true, jl_any_type);
 }
 
-static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
+static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_import)
 {
     Value *isnull = NULL;
     if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
@@ -4736,10 +6110,10 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             Value *box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
             if (vi.pTIndex) {
                 // value is either boxed in the stack slot, or unboxed in value
-                // as indicated by testing (pTIndex & 0x80)
+                // as indicated by testing (pTIndex & UNION_BOX_MARKER)
                 Value *tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(sizeof(void*)), vi.isVolatile);
                 Value *load_unbox = ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             }
@@ -4758,86 +6132,65 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             }
         }
         assert(ctx.spvals_ptr != NULL);
-        Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-                ctx.types().T_prjlvalue,
-                ctx.spvals_ptr,
-                i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+        Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
         isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     }
     else {
-        jl_module_t *modu;
-        jl_sym_t *name;
-        if (jl_is_globalref(sym)) {
-            modu = jl_globalref_mod(sym);
-            name = jl_globalref_name(sym);
-        }
-        else {
-            assert(jl_is_symbol(sym) && "malformed isdefined expression");
-            modu = ctx.module;
-            name = (jl_sym_t*)sym;
-        }
-        jl_binding_t *bnd = jl_get_binding(modu, name);
-        if (bnd) {
-            if (jl_atomic_load_relaxed(&bnd->value) != NULL)
-                return mark_julia_const(ctx, jl_true);
-            Value *bp = julia_binding_gv(ctx, bnd);
-            bp = julia_binding_pvalue(ctx, bp);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
-            ai.decorateInst(v);
-            v->setOrdering(AtomicOrdering::Unordered);
-            isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
-        }
-        else {
-            Value *v = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
-                    literal_pointer_val(ctx, (jl_value_t*)modu),
-                    literal_pointer_val(ctx, (jl_value_t*)name)
-                });
-            isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-        }
+        assert(false && "malformed expression");
     }
     return mark_julia_type(ctx, isnull, false, jl_bool_type);
 }
 
 static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) {
-    jl_value_t *typ = vi.value.typ;
     jl_cgval_t v;
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
-        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) {
+        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !(vi.value.V || vi.inline_roots)) {
             v = vi.value;
             if (vi.pTIndex)
                 v.TIndex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1));
         }
         else {
             // copy value to a non-mutable (non-volatile SSA) location
-            AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
-            setName(ctx.emission_context, varslot, jl_symbol_name(varname));
-            Type *T = varslot->getAllocatedType();
-            assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
-            AllocaInst *ssaslot = cast<AllocaInst>(varslot->clone());
-            setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa"));
-            ssaslot->insertAfter(varslot);
-            if (vi.isVolatile) {
-                Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
-                        varslot->getAlign(),
-                        true);
-                ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign());
-            }
-            else {
-                const DataLayout &DL = jl_Module->getDataLayout();
-                uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value());
+            // since this might be a union slot, the most convenient approach to copying
+            // is to move the whole alloca chunk
+            AllocaInst *ssaslot = nullptr;
+            if (vi.value.V) {
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
+                Type *T = varslot->getAllocatedType();
+                assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
+                ssaslot = cast<AllocaInst>(varslot->clone());
+                setName(ctx.emission_context, ssaslot, varslot->getName() + StringRef(".ssa"));
+                ssaslot->insertAfter(varslot);
+                if (vi.isVolatile) {
+                    Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlign(), true);
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign()));
+                }
+                else {
+                    const DataLayout &DL = jl_Module->getDataLayout();
+                    uint64_t sz = DL.getTypeStoreSize(T);
+                    emit_memcpy(ctx, ssaslot, stack_ai, vi.value, sz, ssaslot->getAlign(), varslot->getAlign());
+                }
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
                 tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1), vi.isVolatile);
-            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack);
+            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack, None);
+        }
+        if (vi.inline_roots) {
+            AllocaInst *varslot = vi.inline_roots;
+            size_t nroots = cast<ConstantInt>(varslot->getArraySize())->getZExtValue();
+            auto T_prjlvalue = varslot->getAllocatedType();
+            if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                nroots *= AT->getNumElements();
+                T_prjlvalue = AT->getElementType();
+            }
+            assert(T_prjlvalue == ctx.types().T_prjlvalue);
+            v.inline_roots = load_gc_roots(ctx, varslot, nroots, vi.isVolatile);
         }
-        if (vi.boxroot == NULL)
-            v = update_julia_type(ctx, v, typ);
         if (vi.usedUndef) {
             assert(vi.defFlag);
             isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile);
@@ -4848,32 +6201,30 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
         Value *box_isnull = NULL;
         if (vi.usedUndef)
             box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
-        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ);
+        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, vi.value.typ);
         if (vi.pTIndex) {
             // value is either boxed in the stack slot, or unboxed in value
-            // as indicated by testing (pTIndex & 0x80)
+            // as indicated by testing (pTIndex & UNION_BOX_MARKER)
             Value *load_unbox = ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
-            if (v.V) { // v.V will be null if it is a union of all ghost values
-                v.V = ctx.builder.CreateSelect(load_unbox, emit_bitcast(ctx,
-                    decay_derived(ctx, v.V), boxed->getType()), decay_derived(ctx, boxed));
-            } else
+            if (v.V) // v.V will be null if it is a union of all ghost values
+                v.V = ctx.builder.CreateSelect(load_unbox, decay_derived(ctx, v.V), decay_derived(ctx, boxed));
+            else
                 v.V = boxed;
             v.Vboxed = boxed;
-            v = update_julia_type(ctx, v, typ);
         }
         else {
-            v = mark_julia_type(ctx, boxed, true, typ);
+            v = mark_julia_type(ctx, boxed, true, vi.value.typ);
             if (vi.usedUndef)
                 isnull = box_isnull;
         }
     }
     if (isnull) {
         setName(ctx.emission_context, isnull, jl_symbol_name(varname) + StringRef("_is_null"));
-        undef_var_error_ifnot(ctx, isnull, varname);
+        undef_var_error_ifnot(ctx, isnull, varname, (jl_value_t*)jl_local_sym);
     }
     return v;
 }
@@ -4883,6 +6234,12 @@ static jl_cgval_t emit_local(jl_codectx_t &ctx, jl_value_t *slotload)
     size_t sl = jl_slot_number(slotload) - 1;
     jl_varinfo_t &vi = ctx.slots[sl];
     jl_sym_t *sym = slot_symbol(ctx, sl);
+    if (sym == jl_unused_sym) {
+        // This shouldn't happen in well-formed input, but let's be robust,
+        // since we otherwise cause undefined behavior here.
+        emit_error(ctx, "(INTERNAL ERROR): Tried to use `#undef#` argument.");
+        return jl_cgval_t();
+    }
     return emit_varinfo(ctx, vi, sym);
 }
 
@@ -4892,51 +6249,27 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
         store_def_flag(ctx, vi, true);
 
     if (!vi.value.constant) { // check that this is not a virtual store
-        assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
+        assert(vi.inline_roots || vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
         // store value
-        if (vi.value.V == NULL) {
-            // all ghost values in destination - nothing to copy or store
-        }
-        else if (rval_info.constant || !rval_info.ispointer()) {
-            if (rval_info.isghost) {
-                // all ghost values in source - nothing to copy or store
-            }
-            else {
-                if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) {
-                    // isbits cast-on-assignment is invalid. this branch should be dead-code.
-                    CreateTrap(ctx.builder);
-                }
-                else {
-                    Value *dest = vi.value.V;
-                    if (vi.pTIndex)
-                        ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
-                    Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
-                    Type *dest_ty = store_ty->getPointerTo();
-                    if (dest_ty != dest->getType())
-                        dest = emit_bitcast(ctx, dest, dest_ty);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                    ai.decorateInst(ctx.builder.CreateStore(
-                                      emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
-                                      dest,
-                                      vi.isVolatile));
-                }
-            }
-        }
-        else {
-            if (vi.pTIndex == NULL) {
-                assert(jl_is_concrete_type(vi.value.typ));
-                // Sometimes we can get into situations where the LHS and RHS
-                // are the same slot. We're not allowed to memcpy in that case
-                // due to LLVM bugs.
-                // This check should probably mostly catch the relevant situations.
-                if (vi.value.V != rval_info.V) {
-                    Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
-                    emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
-                                julia_alignment(rval_info.typ), vi.isVolatile);
-                }
-            }
+        rval_info = update_julia_type(ctx, rval_info, vi.value.typ);
+        if (rval_info.typ == jl_bottom_type)
+            return;
+        if (vi.pTIndex && vi.value.V) // TODO: use lifetime-end here instead
+            ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
+        // Sometimes we can get into situations where the LHS and RHS
+        // are the same slot. We're not allowed to memcpy in that case
+        // due to LLVM bugs.
+        // This check should probably mostly catch the relevant situations.
+        if (vi.value.V != nullptr ? vi.value.V != rval_info.V : vi.inline_roots != nullptr) {
+            MDNode *tbaa = ctx.tbaa().tbaa_stack; // Use vi.value.tbaa ?
+            if (rval_info.TIndex)
+                emit_unionmove(ctx, vi.value.V, tbaa, rval_info, /*skip*/isboxed, vi.isVolatile);
             else {
-                emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, /*skip*/isboxed, vi.isVolatile);
+                Align align(julia_alignment(rval_info.typ));
+                if (vi.inline_roots)
+                    split_value_into(ctx, rval_info, align, vi.value.V, align, jl_aliasinfo_t::fromTBAA(ctx, tbaa), vi.inline_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe), vi.isVolatile);
+                else
+                    emit_unbox_store(ctx, rval_info, vi.value.V, tbaa, align, vi.isVolatile);
             }
         }
     }
@@ -4951,7 +6284,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     jl_value_t *phiType = NULL;
     if (jl_is_array(ssavalue_types)) {
         phiType = jl_array_ptr_ref(ssavalue_types, idx);
-    } else {
+    }
+    else {
         phiType = (jl_value_t*)jl_any_type;
     }
     jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
@@ -4961,6 +6295,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         return;
     }
     AllocaInst *dest = nullptr;
+    SmallVector<PHINode*,0> roots;
     // N.B.: For any memory space, used as a phi,
     // we need to emit space twice here. The reason for this is that
     // phi nodes may be arguments of other phi nodes, so if we don't
@@ -4971,34 +6306,34 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         size_t min_align, nbytes;
         dest = try_emit_union_alloca(ctx, ((jl_uniontype_t*)phiType), allunbox, min_align, nbytes);
         if (dest) {
-            Instruction *phi = dest->clone();
+            AllocaInst *phi = cast<AllocaInst>(dest->clone());
             phi->insertAfter(dest);
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
-            BB->getInstList().insert(InsertPt, Tindex_phi);
-            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_len(edges), "ptr_phi");
-            BB->getInstList().insert(InsertPt, ptr_phi);
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
+            Tindex_phi->insertInto(BB, InsertPt);
+            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "ptr_phi");
+            ptr_phi->insertInto(BB, InsertPt);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
-            ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
+            ctx.builder.CreateMemCpy(phi, Align(min_align), dest, dest->getAlign(), nbytes, false);
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
-                maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
-                maybe_bitcast(ctx, decay_derived(ctx, phi), getInt8PtrTy(ctx.builder.getContext())));
-            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
+                decay_derived(ctx, ptr_phi),
+                decay_derived(ctx, phi));
+            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType));
             val.Vboxed = ptr_phi;
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r));
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
         else if (allunbox) {
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
-            BB->getInstList().insert(InsertPt, Tindex_phi);
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
+            Tindex_phi->insertInto(BB, InsertPt);
             jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack);
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r));
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
     }
@@ -5007,39 +6342,54 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     // The frontend should really not emit this, but we allow it
     // for convenience.
     if (type_is_ghost(vtype)) {
-        assert(jl_is_datatype(phiType) && ((jl_datatype_t*)phiType)->instance);
+        assert(jl_is_datatype(phiType) && jl_is_datatype_singleton((jl_datatype_t*)phiType));
         // Skip adding it to the PhiNodes list, since we didn't create one.
-        ctx.SAvalues.at(idx) = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
-        ctx.ssavalue_assigned.at(idx) = true;
+        ctx.SAvalues[idx] = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
+        ctx.ssavalue_assigned[idx] = true;
         return;
     }
     jl_cgval_t slot;
     PHINode *value_phi = NULL;
-    if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) {
+    if (!isboxed && vtype->isAggregateType()) {
         // the value will be moved into dest in the predecessor critical block.
         // here it's moved into phi in the successor (from dest)
-        dest = emit_static_alloca(ctx, vtype);
-        Value *phi = emit_static_alloca(ctx, vtype);
-        ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
-             dest, MaybeAlign(0),
-             jl_datatype_size(phiType), false);
-        ctx.builder.CreateLifetimeEnd(dest);
-        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
+        auto tracked = CountTrackedPointers(vtype);
+        if (tracked.count) {
+            roots.resize(tracked.count);
+            assert(tracked.count == split_value_size((jl_datatype_t*)phiType).second);
+            for (size_t nr = 0; nr < tracked.count; nr++) {
+                auto root_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "root_phi");
+                root_phi->insertInto(BB, InsertPt);
+                roots[nr] = root_phi;
+            }
+        }
+        AllocaInst *phi = nullptr;
+        if (!tracked.all) {
+            Align align(julia_alignment(phiType));
+            unsigned nb = jl_datatype_size(phiType);
+            dest = emit_static_alloca(ctx, nb, align);
+            phi = cast<AllocaInst>(dest->clone());
+            phi->insertBefore(dest);
+            ctx.builder.CreateMemCpy(phi, align, dest, align, nb, false);
+            ctx.builder.CreateLifetimeEnd(dest);
+        }
+        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack,
+                roots.empty() ? ArrayRef<Value*>() : ArrayRef((Value *const *)&roots.front(), roots.size()));
     }
     else {
-        value_phi = PHINode::Create(vtype, jl_array_len(edges), "value_phi");
-        BB->getInstList().insert(InsertPt, value_phi);
+        value_phi = PHINode::Create(vtype, jl_array_nrows(edges), "value_phi");
+        value_phi->insertInto(BB, InsertPt);
         slot = mark_julia_type(ctx, value_phi, isboxed, phiType);
     }
-    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, r));
-    ctx.SAvalues.at(idx) = slot;
-    ctx.ssavalue_assigned.at(idx) = true;
+    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r));
+    ctx.SAvalues[idx] = slot;
+    ctx.ssavalue_assigned[idx] = true;
     return;
 }
 
 static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_value_t *r)
 {
-    assert(!ctx.ssavalue_assigned.at(ssaidx_0based));
+    assert(!ctx.ssavalue_assigned[ssaidx_0based]);
     if (jl_is_phinode(r)) {
         return emit_phinode_assign(ctx, ssaidx_0based, r);
     }
@@ -5051,8 +6401,9 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             it = ctx.phic_slots.emplace(ssaidx_0based, jl_varinfo_t(ctx.builder.getContext())).first;
         }
         slot = emit_varinfo(ctx, it->second, jl_symbol("phic"));
-    } else {
-        slot = emit_expr(ctx, r, ssaidx_0based); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer)
+    }
+    else {
+        slot = emit_expr(ctx, r, ssaidx_0based);
     }
     if (slot.isboxed || slot.TIndex) {
         // see if inference suggested a different type for the ssavalue than the expression
@@ -5065,18 +6416,27 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             }
         }
     }
-    ctx.SAvalues.at(ssaidx_0based) = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
-    ctx.ssavalue_assigned.at(ssaidx_0based) = true;
+    ctx.SAvalues[ssaidx_0based] = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
+    ctx.ssavalue_assigned[ssaidx_0based] = true;
 }
 
-static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL)
+static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL, bool allow_mismatch=false)
 {
     if (!vi.used || vi.value.typ == jl_bottom_type)
         return;
 
     // convert rval-type to lval-type
     jl_value_t *slot_type = vi.value.typ;
-    rval_info = convert_julia_type(ctx, rval_info, slot_type);
+    // If allow_mismatch is set, type mismatches will not result in traps.
+    // This is used for upsilon nodes, where the destination can have a narrower
+    // type than the store, if inference determines that the store is never read.
+    Value *skip = NULL;
+    rval_info = convert_julia_type(ctx, rval_info, slot_type, &skip);
+    if (!allow_mismatch && skip) {
+        CreateTrap(ctx.builder);
+        return;
+    }
+
     if (rval_info.typ == jl_bottom_type)
         return;
 
@@ -5086,13 +6446,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (rval_info.TIndex) {
             tindex = rval_info.TIndex;
             if (!vi.boxroot)
-                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
         }
         else {
             assert(rval_info.isboxed || rval_info.constant);
             tindex = compute_tindex_unboxed(ctx, rval_info, vi.value.typ);
             if (vi.boxroot)
-                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             else
                 rval_info.TIndex = tindex;
         }
@@ -5106,7 +6466,7 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (vi.pTIndex && rval_info.TIndex) {
             ctx.builder.CreateStore(rval_info.TIndex, vi.pTIndex, vi.isVolatile);
             isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             rval = rval_info.Vboxed ? rval_info.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
             assert(rval->getType() == ctx.types().T_prjlvalue);
@@ -5121,8 +6481,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
 
     // store unboxed variables
     if (!vi.boxroot || (vi.pTIndex && rval_info.TIndex)) {
-        emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+        emit_guarded_test(ctx, skip ? ctx.builder.CreateNot(skip) : nullptr, nullptr, [&]{
+            emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+            return nullptr;
+        });
     }
+
+    return;
 }
 
 static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssize_t ssaval)
@@ -5134,21 +6499,26 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
         int sl = jl_slot_number(l) - 1;
         // it's a local variable
         jl_varinfo_t &vi = ctx.slots[sl];
-        return emit_varinfo_assign(ctx, vi, rval_info, l);
+        emit_varinfo_assign(ctx, vi, rval_info, l);
+        return;
     }
 
     jl_module_t *mod;
     jl_sym_t *sym;
+    bool toplevel = jl_is_module(ctx.linfo->def.value);
+    bool alloc = toplevel;
     if (jl_is_symbol(l)) {
         mod = ctx.module;
         sym = (jl_sym_t*)l;
     }
     else {
         assert(jl_is_globalref(l));
+        alloc &= jl_globalref_mod(l) == ctx.module;
         mod = jl_globalref_mod(l);
         sym = jl_globalref_name(l);
     }
-    emit_globalset(ctx, mod, sym, rval_info, AtomicOrdering::Release);
+    emit_globalop(ctx, mod, sym, rval_info, jl_cgval_t(), AtomicOrdering::Release, AtomicOrdering::NotAtomic,
+                  true, false, false, false, false, nullptr, alloc);
     // Global variable. Does not need debug info because the debugger knows about
     // its memory location.
 }
@@ -5165,15 +6535,17 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
     // upsilon node is not dynamically observed.
     if (val) {
         jl_cgval_t rval_info = emit_expr(ctx, val);
-        if (rval_info.typ == jl_bottom_type)
+        if (rval_info.typ == jl_bottom_type) {
             // as a special case, PhiC nodes are allowed to use undefined
             // values, since they are just copy operations, so we need to
             // ignore the store (it will not by dynamically observed), while
             // normally, for any other operation result, we'd assume this store
             // was unreachable and dead
             val = NULL;
-        else
-            emit_varinfo_assign(ctx, vi, rval_info);
+        }
+        else {
+            emit_varinfo_assign(ctx, vi, rval_info, NULL, true);
+        }
     }
     if (!val) {
         if (vi.boxroot) {
@@ -5185,16 +6557,27 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
             // does need to satisfy the union invariants (i.e. inbounds
             // tindex).
             ctx.builder.CreateAlignedStore(
-                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80) :
+                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER) :
                              ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x01),
                 vi.pTIndex, Align(1), true);
         }
         else if (vi.value.V && !vi.value.constant && vi.value.typ != jl_bottom_type) {
-            assert(vi.value.ispointer());
-            Type *T = cast<AllocaInst>(vi.value.V)->getAllocatedType();
-            if (CountTrackedPointers(T).count) {
-                // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-                ctx.builder.CreateStore(Constant::getNullValue(T), vi.value.V, true);
+            assert(vi.inline_roots || vi.value.ispointer());
+            if (vi.inline_roots) {
+                // memory optimization: make gc pointers re-initialized to NULL
+                AllocaInst *ssaroots = vi.inline_roots;
+                size_t nroots = cast<ConstantInt>(ssaroots->getArraySize())->getZExtValue();
+                auto T_prjlvalue = ssaroots->getAllocatedType();
+                if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                    nroots *= AT->getNumElements();
+                    T_prjlvalue = AT->getElementType();
+                }
+                assert(T_prjlvalue == ctx.types().T_prjlvalue);
+                Value *nullval = Constant::getNullValue(T_prjlvalue);
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+                for (size_t i = 0; i < nroots; i++) {
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(nullval, emit_ptrgep(ctx, ssaroots, i * sizeof(void*)), ssaroots->getAlign(), true));
+                }
             }
         }
     }
@@ -5204,7 +6587,7 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
 
 static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, const jl_cgval_t &fexpr, jl_value_t *rt, jl_svec_t *argt);
 
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg)
 {
     bool isbool = (condV.typ == (jl_value_t*)jl_bool_type);
     if (!isbool) {
@@ -5215,19 +6598,18 @@ static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const s
         emit_typecheck(ctx, condV, (jl_value_t*)jl_bool_type, msg);
     }
     if (isbool) {
-        Value *cond = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
-        assert(cond->getType() == getInt8Ty(ctx.builder.getContext()));
-        return ctx.builder.CreateXor(ctx.builder.CreateTrunc(cond, getInt1Ty(ctx.builder.getContext())), ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1));
+        Value *cond = emit_unbox(ctx, getInt1Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
+        return ctx.builder.CreateNot(cond);
     }
     if (condV.isboxed) {
         return ctx.builder.CreateICmpEQ(boxed(ctx, condV),
             track_pjlvalue(ctx, literal_pointer_val(ctx, jl_false)));
     }
-    // not a boolean
-    return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); // TODO: replace with Undef
+    // not a boolean (unreachable dead code)
+    return UndefValue::get(getInt1Ty(ctx.builder.getContext()));
 }
 
-static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const Twine &msg)
 {
     return emit_condition(ctx, emit_expr(ctx, cond), msg);
 }
@@ -5266,7 +6648,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     jl_sym_t *head = ex->head;
     if (head == jl_meta_sym || head == jl_inbounds_sym || head == jl_coverageeffect_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
@@ -5275,14 +6657,50 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     else if (head == jl_leave_sym) {
-        assert(jl_is_long(args[0]));
-        ctx.builder.CreateCall(prepare_call(jlleave_func),
-                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_unbox_long(args[0])));
+        int hand_n_leave = 0;
+        Value *scope_to_restore = nullptr, *token = nullptr;
+        for (size_t i = 0; i < jl_expr_nargs(ex); ++i) {
+            jl_value_t *arg = args[i];
+            if (arg == jl_nothing)
+                continue;
+            assert(jl_is_ssavalue(arg));
+            size_t enter_idx = ((jl_ssavalue_t*)arg)->id - 1;
+            jl_value_t *enter_stmt = jl_array_ptr_ref(ctx.code, enter_idx);
+            if (enter_stmt == jl_nothing)
+                continue;
+            if (ctx.scope_restore.count(enter_idx)) {
+                // TODO: The semantics of `gc_preserve` are not perfect here. An `Expr(:enter, ...)` block may
+                //       have multiple exits, but effects of `preserve_end` are only extended to the end of the
+                //       dominance of each `Expr(:leave, ...)`.
+                //
+                //       That means that a scope object can suddenly end up preserved again outside of an
+                //       `Expr(:enter, ...)` region where it ought to be dead. It'd be preferable if the effects
+                //       of gc_preserve_end propagated through a control-flow joins as long as all incoming
+                //       agree about the preserve state.
+                //
+                //       This is correct as-is anyway - it just means the scope lives longer than it needs to
+                //       if the `Expr(:enter, ...)` has multiple exits.
+                std::tie(token, scope_to_restore) = ctx.scope_restore[enter_idx];
+                ctx.builder.CreateCall(prepare_call(gc_preserve_end_func), {token});
+            }
+            if (jl_enternode_catch_dest(enter_stmt)) {
+                // We're not actually setting up the exception frames for these, so
+                // we don't need to exit them.
+                hand_n_leave += 1;
+                scope_to_restore = nullptr; // restored by exception handler
+            }
+        }
+        ctx.builder.CreateCall(prepare_call(jlleave_noexcept_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), hand_n_leave)});
+        if (scope_to_restore) {
+            Value *scope_ptr = get_scope_field(ctx);
+            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(
+                ctx.builder.CreateAlignedStore(scope_to_restore, scope_ptr, ctx.types().alignof_ptr));
+        }
     }
     else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
         assert(excstack_state.V && excstack_state.V->getType() == ctx.types().T_size);
-        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
+        ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), {get_current_task(ctx), excstack_state.V});
         return;
     }
     else {
@@ -5295,8 +6713,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
 {
     jl_svec_t *sig_args = NULL;
     jl_value_t *sigtype = NULL;
-    jl_code_info_t *ir = NULL;
-    JL_GC_PUSH3(&sig_args, &sigtype, &ir);
+    JL_GC_PUSH2(&sig_args, &sigtype);
 
     size_t nsig = 1 + jl_svec_len(argt_typ->parameters);
     sig_args = jl_alloc_svec_uninit(nsig);
@@ -5306,64 +6723,102 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     }
     sigtype = jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
-    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.world, ctx.world);
+    jl_method_instance_t *mi;
+    jl_code_instance_t *ci;
 
-    if (ci == NULL || (jl_value_t*)ci == jl_nothing) {
-        JL_GC_POP();
-        return std::make_pair((Function*)NULL, (Function*)NULL);
+    if (closure_method->source) {
+        mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
+        ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world);
     }
-    auto inferred = jl_atomic_load_relaxed(&ci->inferred);
-    if (!inferred || inferred == jl_nothing) {
+    else {
+        mi = (jl_method_instance_t*)jl_atomic_load_relaxed(&closure_method->specializations);
+        assert(jl_is_method_instance(mi));
+        ci = jl_atomic_load_relaxed(&mi->cache);
+    }
+    if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->rettype != rettype || !jl_egal(sigtype, mi->specTypes)) { // TODO: correctly handle the ABI conversion if rettype != ci->rettype
         JL_GC_POP();
         return std::make_pair((Function*)NULL, (Function*)NULL);
     }
-    ++EmittedOpaqueClosureFunctions;
-
-    ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred);
 
-    // TODO: Emit this inline and outline it late using LLVM's coroutine support.
-    orc::ThreadSafeModule closure_m = jl_create_ts_module(
-            name_from_method_instance(mi), ctx.emission_context.tsctx,
-            ctx.emission_context.imaging,
-            jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
-    jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
-
-    assert(closure_decls.functionObject != "jl_fptr_sparam");
-    bool isspecsig = closure_decls.functionObject != "jl_fptr_args";
+    // method lookup code (similar to emit_invoke, and the inverse of emit_specsig_oc_call)
+    bool specsig = uses_specsig(sigtype, false, rettype, true);
+    std::string name;
+    std::string oc;
+    StringRef protoname;
+    StringRef proto_oc;
+
+    // Check if we already queued this up
+    auto it = ctx.call_targets.find(ci);
+    bool need_to_emit = it == ctx.call_targets.end();
+    if (!need_to_emit) {
+        assert(specsig == it->second.specsig);
+        if (specsig) {
+            protoname = it->second.decl->getName();
+            proto_oc = it->second.oc->getName();
+        }
+        else {
+            proto_oc = it->second.decl->getName();
+        }
+        need_to_emit = false;
+    }
+    else {
+        if (specsig) {
+            raw_string_ostream(name) << "j_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+            protoname = StringRef(name);
+        }
+        raw_string_ostream(oc) << "j1_" << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
+        proto_oc = StringRef(oc);
+    }
 
-    Function *F = NULL;
-    std::string fname = isspecsig ?
-        closure_decls.functionObject :
-        closure_decls.specFunctionObject;
-    if (GlobalValue *V = jl_Module->getNamedValue(fname)) {
+    // Get the fptr1 OC
+    Function *F = nullptr;
+    if (GlobalValue *V = jl_Module->getNamedValue(proto_oc)) {
         F = cast<Function>(V);
-    } else {
+    }
+    else {
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
-                             fname, jl_Module);
+                             proto_oc, jl_Module);
         jl_init_function(F, ctx.emission_context.TargetTriple);
         jl_name_jlfunc_args(ctx.emission_context, F);
         F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()}));
     }
-    Function *specF = NULL;
-    if (!isspecsig) {
-        specF = F;
-    } else {
-        //emission context holds context lock so can get module
-        specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
-        if (specF) {
-            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
-                closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
-            specF = cast<Function>(returninfo.decl.getCallee());
-        }
+
+    // Get the specsig (if applicable)
+    Function *specF = nullptr;
+    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+    unsigned return_roots = 0;
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    assert(is_opaque_closure);
+    if (specsig) {
+        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+        jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, nullptr, protoname, mi->specTypes, rettype, is_opaque_closure, gcstack_arg);
+        cc = returninfo.cc;
+        return_roots = returninfo.return_roots;
+        specF = cast<Function>(returninfo.decl.getCallee());
+    }
+
+    if (need_to_emit) {
+        ctx.call_targets[ci] = {cc, return_roots, specsig ? specF : F, specsig ? F : nullptr, specsig};
     }
-    ctx.oc_modules.push_back(std::move(closure_m));
+
     JL_GC_POP();
     return std::make_pair(F, specF);
 }
 
-// `expr` is not clobbered in JL_TRY
+static void emit_latestworld(jl_codectx_t &ctx)
+{
+    auto world_age_field = get_tls_world_age_field(ctx);
+    LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+        prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr,
+        /*isVolatile*/false);
+    world->setOrdering(AtomicOrdering::Acquire);
+    StoreInst *store_world = ctx.builder.CreateAlignedStore(world, world_age_field,
+        ctx.types().alignof_ptr, /*isVolatile*/false);
+    (void)store_world;
+}
+
+// `expr` is not actually clobbered in JL_TRY
 JL_GCC_IGNORE_START("-Wclobbered")
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_0based)
 {
@@ -5377,12 +6832,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     if (jl_is_ssavalue(expr)) {
         ssize_t idx = ((jl_ssavalue_t*)expr)->id - 1;
         assert(idx >= 0);
-        if (!ctx.ssavalue_assigned.at(idx)) {
-            ctx.ssavalue_assigned.at(idx) = true; // (assignment, not comparison test)
+        if (!ctx.ssavalue_assigned[idx]) {
+            ctx.ssavalue_assigned[idx] = true; // (assignment, not comparison test)
             return jl_cgval_t(); // dead code branch
         }
         else {
-            return ctx.SAvalues.at(idx); // at this point, SAvalues[idx] actually contains the SAvalue
+            return ctx.SAvalues[idx]; // at this point, SAvalues[idx] actually contains the SAvalue
         }
     }
     if (jl_is_globalref(expr)) {
@@ -5405,21 +6860,27 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_value_t *val = expr;
         if (jl_is_quotenode(expr))
             val = jl_fieldref_noalloc(expr, 0);
-        if (jl_is_method(ctx.linfo->def.method)) // toplevel exprs are already rooted
-            val = jl_ensure_rooted(ctx, val);
+        // Toplevel exprs are rooted but because codegen assumes this is constant, it removes the write barriers for this code.
+        // This means we have to globally root the value here. (The other option would be to change how we optimize toplevel code)
+        jl_temporary_root(ctx, val);
         return mark_julia_const(ctx, val);
     }
 
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     // this is object-disoriented.
     // however, this is a good way to do it because it should *not* be easy
     // to add new node types.
     if (head == jl_isdefined_sym) {
-        assert(nargs == 1);
-        return emit_isdefined(ctx, args[0]);
+        assert(nargs == 1 || nargs == 2);
+        int allow_import = 1;
+        if (nargs == 2) {
+            assert(jl_is_bool(args[1]));
+            allow_import = args[1] == jl_true;
+        }
+        return emit_isdefined(ctx, args[0], allow_import);
     }
     else if (head == jl_throw_undef_if_not_sym) {
         assert(nargs == 2);
@@ -5430,7 +6891,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 literal_pointer_val(ctx, jl_undefref_exception));
         }
         else {
-            undef_var_error_ifnot(ctx, cond, var);
+            undef_var_error_ifnot(ctx, cond, var, (jl_value_t*)jl_local_sym);
         }
         return ghostValue(ctx, jl_nothing_type);
     }
@@ -5454,7 +6915,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             expr_t = (jl_value_t*)jl_any_type;
         else {
             expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type : jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaidx_0based);
-            is_promotable = ctx.ssavalue_usecount.at(ssaidx_0based) == 1;
+            is_promotable = ctx.ssavalue_usecount[ssaidx_0based] == 1;
         }
         jl_cgval_t res = emit_call(ctx, ex, expr_t, is_promotable);
         // some intrinsics (e.g. typeassert) can return a wider type
@@ -5488,7 +6949,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_method_sym) {
         if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn));
+            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn) || jl_is_globalref(mn));
 
             Value *bp = NULL, *name;
             jl_binding_t *bnd = NULL;
@@ -5507,27 +6968,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                     bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
                 }
                 JL_CATCH {
-                    jl_value_t *e = jl_current_exception();
+                    jl_value_t *e = jl_current_exception(jl_current_task);
                     // errors. boo. :(
-                    e = jl_as_global_root(e);
+                    JL_GC_PUSH1(&e);
+                    e = jl_as_global_root(e, 1);
+                    JL_GC_POP();
                     raise_exception(ctx, literal_pointer_val(ctx, e));
                     return ghostValue(ctx, jl_nothing_type);
                 }
                 bp = julia_binding_gv(ctx, bnd);
-                bp = julia_binding_pvalue(ctx, bp);
-            }
-            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
-                // XXX: eval_methoddef does not have this code branch
-                int sl = jl_slot_number(mn)-1;
-                jl_varinfo_t &vi = ctx.slots[sl];
-                bp = vi.boxroot;
-                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-            }
-            if (bp) {
-                Value *mdargs[] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp, literal_pointer_val(ctx, bnd) };
                 jl_cgval_t gf = mark_julia_type(
                         ctx,
-                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), { bp,
+                            literal_pointer_val(ctx, (jl_value_t*)mod), name
+                        }),
                         true,
                         jl_function_type);
                 return gf;
@@ -5546,13 +7000,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         };
         jl_cgval_t meth = mark_julia_type(
             ctx,
-            ctx.builder.CreateCall(prepare_call(jlmethod_func), makeArrayRef(mdargs)),
+            ctx.builder.CreateCall(prepare_call(jlmethod_func), ArrayRef<Value*>(mdargs)),
             true,
             jl_method_type);
         return meth;
     }
     else if (head == jl_const_sym) {
-        assert(nargs == 1);
+        assert(nargs <= 2);
         jl_sym_t *sym = (jl_sym_t*)args[0];
         jl_module_t *mod = ctx.module;
         if (jl_is_globalref(sym)) {
@@ -5560,20 +7014,36 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             sym = jl_globalref_name(sym);
         }
         if (jl_is_symbol(sym)) {
-            jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
-            if (bp)
-                ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
-                        { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
+            jl_binding_t *bnd = jl_get_module_binding(mod, sym, 1);
+            if (nargs == 2) {
+                jl_cgval_t rhs = emit_expr(ctx, args[1]);
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, rhs) });
+            } else {
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), ConstantPointerNull::get(cast<PointerType>(ctx.types().T_prjlvalue)) });
+            }
+        }
+    }
+    else if (head == jl_globaldecl_sym) {
+        assert(nargs == 2);
+        jl_sym_t *sym = (jl_sym_t*)args[0];
+        jl_module_t *mod = ctx.module;
+        if (jl_is_globalref(sym)) {
+            mod = jl_globalref_mod(sym);
+            sym = jl_globalref_name(sym);
         }
+        jl_cgval_t typ = emit_expr(ctx, args[1]);
+        ctx.builder.CreateCall(prepare_call(jldeclareglobal_func),
+                { literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, typ) });
     }
     else if (head == jl_new_sym) {
         bool is_promotable = false;
         if (ssaidx_0based >= 0) {
-            is_promotable = ctx.ssavalue_usecount.at(ssaidx_0based) == 1;
+            is_promotable = ctx.ssavalue_usecount[ssaidx_0based] == 1;
         }
         assert(nargs > 0);
-        SmallVector<jl_cgval_t> argv(nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
@@ -5582,12 +7052,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 jl_is_datatype(jl_tparam0(ty)) &&
                 jl_is_concrete_type(jl_tparam0(ty))) {
             assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
-            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, argv.data() + 1, is_promotable);
+            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, ArrayRef<jl_cgval_t>(argv).drop_front(), is_promotable);
             if (is_promotable && res.promotion_point && res.promotion_ssa==-1)
                 res.promotion_ssa = ssaidx_0based;
             return res;
         }
-        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv.data(), nargs, julia_call);
+        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, julia_call);
         // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
@@ -5605,15 +7075,16 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
     else if (head == jl_new_opaque_closure_sym) {
-        assert(nargs >= 4 && "Not enough arguments in new_opaque_closure");
-        SmallVector<jl_cgval_t, 4> argv(nargs, jl_cgval_t());
+        assert(nargs >= 5 && "Not enough arguments in new_opaque_closure");
+        SmallVector<jl_cgval_t, 5> argv(nargs, jl_cgval_t());
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
         const jl_cgval_t &argt = argv[0];
         const jl_cgval_t &lb = argv[1];
         const jl_cgval_t &ub = argv[2];
-        const jl_cgval_t &source = argv[3];
+        // argv[3] - constprop marker not used here
+        const jl_cgval_t &source = argv[4];
         if (source.constant == NULL) {
             // For now, we require non-constant source to be handled by using
             // eval. This should probably be a verifier error and an abort here.
@@ -5626,35 +7097,46 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ((jl_method_t*)source.constant)->nargs > 0 &&
             jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant);
 
+        if (!can_optimize && trim_may_error(ctx.params->trim)) {
+            // if we know the return type, we can assume the result is of that type
+            errs() << "ERROR: Dynamic call to OpaqueClosure method\n";
+            errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+            print_stacktrace(ctx, ctx.params->trim);
+        }
+
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
             jl_value_t *env_t = NULL;
             JL_GC_PUSH2(&closure_t, &env_t);
 
-            SmallVector<jl_value_t *> env_component_ts(nargs-4);
-            for (size_t i = 0; i < nargs - 4; ++i) {
-                env_component_ts[i] = argv[4+i].typ;
+            size_t ncapture_args = nargs-5;
+            SmallVector<jl_value_t *, 0> env_component_ts(ncapture_args);
+            for (size_t i = 0; i < ncapture_args; ++i) {
+                jl_value_t *typ = argv[nargs-ncapture_args+i].typ;
+                if (typ == jl_bottom_type) {
+                    JL_GC_POP();
+                    return jl_cgval_t();
+                }
+                env_component_ts[i] = typ;
             }
 
-            env_t = jl_apply_tuple_type_v(env_component_ts.data(), nargs-4);
+            env_t = jl_apply_tuple_type_v(env_component_ts.data(), ncapture_args);
             // we need to know the full env type to look up the right specialization
             if (jl_is_concrete_type(env_t)) {
                 jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
                 Function *F, *specF;
-                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_datatype_t*)env_t, argt_typ, ub.constant);
+                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_tupletype_t*)env_t, argt_typ, ub.constant);
                 if (F) {
                     jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-                    Instruction *I = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_last_age_field(ctx), ctx.types().alignof_ptr);
-                    jl_cgval_t world_age = mark_julia_type(ctx, ai.decorateInst(I), false, jl_long_type);
+                    jl_cgval_t world_age = mark_julia_type(ctx, get_tls_world_age(ctx), false, jl_long_type);
                     jl_cgval_t fptr;
                     if (specF)
                         fptr = mark_julia_type(ctx, specF, false, jl_voidpointer_type);
                     else
-                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
+                        fptr = mark_julia_type(ctx, Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
 
                     // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
-                    jl_cgval_t env = emit_new_struct(ctx, env_t, nargs-4, &argv.data()[4]);
+                    jl_cgval_t env = emit_new_struct(ctx, env_t, ncapture_args, ArrayRef<jl_cgval_t>(argv).drop_front(nargs-ncapture_args));
 
                     jl_cgval_t closure_fields[5] = {
                         env,
@@ -5675,13 +7157,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         }
 
         return mark_julia_type(ctx,
-                emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, Constant::getNullValue(ctx.types().T_prjlvalue), argv.data(), nargs, julia_call),
+                emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, Constant::getNullValue(ctx.types().T_prjlvalue), argv, nargs, julia_call),
                 true, jl_any_type);
     }
     else if (head == jl_exc_sym) {
         assert(nargs == 0);
         return mark_julia_type(ctx,
-                ctx.builder.CreateCall(prepare_call(jl_current_exception_func)),
+                ctx.builder.CreateCall(prepare_call(jl_current_exception_func), {get_current_task(ctx)}),
                 true, jl_any_type);
     }
     else if (head == jl_copyast_sym) {
@@ -5697,43 +7179,41 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     }
     else if (head == jl_loopinfo_sym) {
         // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
+        // to LLVM LoopID
         SmallVector<Metadata *, 8> MDs;
+
+        // Reserve first location for self reference to the LoopID metadata node.
+        TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
+        MDs.push_back(TempNode.get());
+
         for (int i = 0, ie = nargs; i < ie; ++i) {
             Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
             if (MD)
                 MDs.push_back(MD);
         }
 
-        MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
-        CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
-        I->setMetadata("julia.loopinfo", MD);
+        ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
+        // Replace the temporary node with a self-reference.
+        ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
         return jl_cgval_t();
     }
     else if (head == jl_leave_sym || head == jl_coverageeffect_sym
-            || head == jl_pop_exception_sym || head == jl_enter_sym || head == jl_inbounds_sym
+            || head == jl_pop_exception_sym || head == jl_inbounds_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
     else if (head == jl_boundscheck_sym) {
-        return mark_julia_const(ctx, bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
+        jl_value_t *def = (nargs == 0) ? jl_true : args[0];
+        return mark_julia_const(ctx, bounds_check_enabled(ctx, def) ? jl_true : jl_false);
     }
     else if (head == jl_gc_preserve_begin_sym) {
-        SmallVector<jl_cgval_t> argv(nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
-        std::vector<Value*> vals;
+        SmallVector<Value*, 0> vals;
         for (size_t i = 0; i < nargs; ++i) {
-            const jl_cgval_t &ai = argv[i];
-            if (ai.constant || ai.typ == jl_bottom_type)
-                continue;
-            if (ai.isboxed) {
-                vals.push_back(ai.Vboxed);
-            }
-            else if (jl_is_concrete_immutable(ai.typ) && !jl_is_pointerfree(ai.typ)) {
-                Type *at = julia_type_to_llvm(ctx, ai.typ);
-                vals.push_back(emit_unbox(ctx, at, ai, ai.typ));
-            }
+            vals.append(get_gc_roots_for(ctx, argv[i]));
         }
         Value *token = vals.empty()
             ? (Value*)ConstantTokenNone::get(ctx.builder.getContext())
@@ -5754,6 +7234,10 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ctx.builder.CreateCall(prepare_call(gc_preserve_end_func), {token.V});
         return jl_cgval_t((jl_value_t*)jl_nothing_type);
     }
+    else if (head == jl_latestworld_sym && !jl_is_method(ctx.linfo->def.method)) {
+        emit_latestworld(ctx);
+        return jl_cgval_t((jl_value_t*)jl_nothing_type);
+    }
     else {
         if (jl_is_toplevel_only_expr(expr) &&
             !jl_is_method(ctx.linfo->def.method)) {
@@ -5782,40 +7266,70 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
     // this will require the runtime, but it gets deleted later if unused
     ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
     ctx.pgcstack = ctx.topalloca;
+    ctx.pgcstack->setName("pgcstack");
 }
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
+    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
-static Value *get_last_age_field(jl_codectx_t &ctx)
+static Value *get_tls_world_age_field(jl_codectx_t &ctx)
+{
+    Value *ct = get_current_task(ctx);
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, world_age), "world_age");
+}
+
+// Get the value of the world age of the current task
+static Value *get_tls_world_age(jl_codectx_t &ctx)
+{
+    if (ctx.world_age_at_entry)
+        return ctx.world_age_at_entry;
+    IRBuilderBase::InsertPointGuard IP(ctx.builder);
+    bool toplevel = !jl_is_method(ctx.linfo->def.method);
+    if (!toplevel) {
+        ctx.builder.SetInsertPoint(ctx.topalloca->getParent(), ++ctx.topalloca->getIterator());
+        ctx.builder.SetCurrentDebugLocation(ctx.topalloca->getStableDebugLoc());
+    }
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    auto *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_tls_world_age_field(ctx), ctx.types().alignof_ptr);
+    ai.decorateInst(world);
+    if (!toplevel)
+        ctx.world_age_at_entry = world;
+    return world;
+}
+
+static Value *get_scope_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_size,
-            ctx.builder.CreateBitCast(ct, ctx.types().T_size->getPointerTo()),
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
-            "world_age");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "scope");
+}
+
+Function *get_or_emit_fptr1(StringRef preal_decl, Module *M)
+{
+    return cast<Function>(M->getOrInsertFunction(preal_decl, get_func_sig(M->getContext()), get_func_attrs(M->getContext())).getCallee());
 }
 
-static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
+Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT
 {
     ++EmittedToJLInvokes;
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, codeinst);
     std::string name;
-    raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
     Function *f = Function::Create(ctx.types().T_jlfunc,
             GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f, params.TargetTriple);
+    if (trim_may_error(params.params->trim)) {
+        push_frames(ctx, ctx.linfo, jl_get_ci_mi(codeinst));
+    }
     jl_name_jlfunc_args(params, f);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
@@ -5823,18 +7337,17 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     ctx.builder.SetInsertPoint(b0);
     Function *theFunc;
     Value *theFarg;
-    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-    bool cache_valid = params.cache;
 
-    if (cache_valid && invoke != NULL) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
+    if (!theFptrName.empty()) {
         theFunc = cast<Function>(
             M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee());
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
     }
     else {
-        theFunc = prepare_call(jlinvoke_func);
-        theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst->def);
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+        theFunc = prepare_call(is_opaque_closure ? jlinvokeoc_func : jlinvoke_func);
+        theFarg = literal_pointer_val(ctx, (jl_value_t*)mi);
     }
     theFarg = track_pjlvalue(ctx, theFarg);
     auto args = f->arg_begin();
@@ -5844,15 +7357,17 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     return f;
 }
 
-static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) {
-    return ArrayType::get(ctx.types().T_prjlvalue, rootcount);
-}
-
-static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
-    return ArrayType::get(getInt8Ty(C), unionbytes);
+static jl_value_t *get_oc_type(jl_value_t *calltype, jl_value_t *rettype) JL_ALWAYS_LEAFTYPE
+{
+    jl_value_t *argtype = jl_argtype_without_function((jl_value_t*)calltype);
+    JL_GC_PUSH1(&argtype);
+    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, argtype, rettype);
+    JL_GC_PROMISE_ROOTED(oc_type);
+    JL_GC_POP();
+    return oc_type;
 }
 
-static void emit_cfunc_invalidate(
+void emit_specsig_to_fptr1(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
         jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
@@ -5860,7 +7375,7 @@ static void emit_cfunc_invalidate(
         Function *target)
 {
     ++EmittedCFuncInvalidates;
-    jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params);
+    jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params, 0, 0);
     ctx.f = gf_thunk;
 
     BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", gf_thunk);
@@ -5869,7 +7384,7 @@ static void emit_cfunc_invalidate(
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
     Function::arg_iterator AI = gf_thunk->arg_begin();
-    SmallVector<jl_cgval_t> myargs(nargs);
+    SmallVector<jl_cgval_t, 0> myargs(nargs);
     if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
         ++AI;
     if (return_roots)
@@ -5878,14 +7393,21 @@ static void emit_cfunc_invalidate(
         ++AI; // gcstack_arg
     }
     for (size_t i = 0; i < nargs; i++) {
+        if (i == 0 && is_for_opaque_closure) {
+            // `jt` would be wrong here (it is the captures type), so is not used used for
+            // the ABI decisions, but the argument actually will require boxing as its real type
+            // which can be exactly recomputed from the specialization, as that defined the ABI
+            jl_value_t *oc_type = get_oc_type(calltype, rettype);
+            Value *arg_v = &*AI;
+            ++AI;
+            myargs[i] = mark_julia_slot(arg_v, (jl_value_t*)oc_type, NULL, ctx.tbaa().tbaa_const);
+            continue;
+        }
+        // n.b. calltype is required to be a datatype by construction for specsig
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
         bool isboxed = false;
         Type *et;
-        if (i == 0 && is_for_opaque_closure) {
-            et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-        }
-        else if (deserves_argbox(jt)) {
+        if (deserves_argbox(jt)) {
             et = ctx.types().T_prjlvalue;
             isboxed = true;
         }
@@ -5896,28 +7418,33 @@ static void emit_cfunc_invalidate(
             myargs[i] = mark_julia_const(ctx, jl_tparam0(jt));
         }
         else if (type_is_ghost(et)) {
-            assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->instance);
+            assert(jl_is_datatype(jt) && jl_is_datatype_singleton((jl_datatype_t*)jt));
             myargs[i] = mark_julia_const(ctx, ((jl_datatype_t*)jt)->instance);
         }
         else {
             Value *arg_v = &*AI;
             ++AI;
-            Type *at = arg_v->getType();
-            if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
-                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
+            if (!isboxed && et->isAggregateType()) {
+                auto tracked = CountTrackedPointers(et);
+                SmallVector<Value*,0> roots;
+                if (tracked.count && !tracked.all) {
+                    roots = load_gc_roots(ctx, &*AI, tracked.count);
+                    ++AI;
+                }
+                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const, roots);
             }
             else {
-                assert(at == et);
+                assert(arg_v->getType() == et);
                 myargs[i] = mark_julia_type(ctx, arg_v, isboxed, jt);
             }
-            (void)at;
         }
     }
     assert(AI == gf_thunk->arg_end());
-    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs.data(), nargs, julia_call);
+    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs, nargs, julia_call);
     jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type);
     if (cc != jl_returninfo_t::Boxed) {
         emit_typecheck(ctx, gf_retbox, rettype, "cfunction");
+        gf_retbox = update_julia_type(ctx, gf_retbox, rettype);
     }
 
     switch (cc) {
@@ -5930,20 +7457,20 @@ static void emit_cfunc_invalidate(
             ctx.builder.CreateRetVoid();
         }
         else {
-            gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo());
             ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gfrt, gf_ret, Align(julia_alignment(rettype))));
         }
         break;
     }
     case jl_returninfo_t::SRet: {
+        Value *sret = &*gf_thunk->arg_begin();
+        Align align(julia_alignment(rettype));
         if (return_roots) {
-            Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
-            assert(cast<PointerType>(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots)));
-            root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
-            ctx.builder.CreateStore(gf_ret, root1);
+            Value *roots = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
+            split_value_into(ctx, gf_retbox, align, sret, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+        }
+        else {
+            emit_unbox_store(ctx, gf_retbox, sret, ctx.tbaa().tbaa_stack, align);
         }
-        emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
-                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype));
         ctx.builder.CreateRetVoid();
         break;
     }
@@ -5951,7 +7478,7 @@ static void emit_cfunc_invalidate(
         Type *retty = gf_thunk->getReturnType();
         Value *gf_retval = UndefValue::get(retty);
         Value *tindex = compute_box_tindex(ctx, emit_typeof(ctx, gf_retbox, false, true), (jl_value_t*)jl_any_type, rettype);
-        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, gf_ret, 0);
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, tindex, 1);
         ctx.builder.CreateRet(gf_retval);
@@ -5965,18 +7492,7 @@ static void emit_cfunc_invalidate(
     }
 }
 
-static void emit_cfunc_invalidate(
-        Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
-        size_t nargs,
-        jl_codegen_params_t &params)
-{
-    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params,
-        prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func));
-}
-
-#include <iostream>
-static Function* gen_cfun_wrapper(
+static Function *gen_cfun_wrapper(
     Module *into, jl_codegen_params_t &params,
     const function_sig_t &sig, jl_value_t *ff, const char *aliasname,
     jl_value_t *declrt, jl_method_instance_t *lam,
@@ -5988,42 +7504,19 @@ static Function* gen_cfun_wrapper(
     size_t nargs = sig.nccallargs;
     const char *name = "cfunction";
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_code_instance_t *codeinst = NULL;
     bool nest = (!ff || unionall_env);
     jl_value_t *astrt = (jl_value_t*)jl_any_type;
-    void *callptr = NULL;
-    int calltype = 0;
     if (aliasname)
         name = aliasname;
     else if (lam)
         name = jl_symbol_name(lam->def.method->name);
-    if (lam && params.cache) {
-        // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
-        codeinst = jl_compile_method_internal(lam, world);
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-        assert(invoke);
-        if (fptr) {
-            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-        }
-        // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-        if (invoke == jl_fptr_args_addr) {
-            callptr = fptr;
-            calltype = 1;
-        }
-        else if (invoke == jl_fptr_const_return_addr) {
-            // don't need the fptr
-            callptr = (void*)codeinst->rettype_const;
-            calltype = 2;
-        }
-        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
-            callptr = fptr;
-            calltype = 3;
-        }
-        astrt = codeinst->rettype;
+
+    jl_code_instance_t *codeinst = NULL;
+    if (lam) {
+        // TODO: this isn't ideal to be unconditionally calling type inference from here
+        codeinst = jl_type_infer(lam, world, SOURCE_MODE_NOT_REQUIRED);
+        if (codeinst)
+            astrt = codeinst->rettype;
         if (astrt != (jl_value_t*)jl_bottom_type &&
             jl_type_intersection(astrt, declrt) == jl_bottom_type) {
             // Do not warn if the function never returns since it is
@@ -6034,7 +7527,7 @@ static Function* gen_cfun_wrapper(
     }
 
     std::string funcName;
-    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
 
     Module *M = into; // Safe because ctx lock is held by params
     AttributeList attributes = sig.attributes;
@@ -6042,13 +7535,13 @@ static Function* gen_cfun_wrapper(
     if (nest) {
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
-        std::vector<Type*> fargt_sig(sig.fargt_sig);
+        SmallVector<Type*, 0> fargt_sig(sig.fargt_sig.begin(), sig.fargt_sig.end());
 
         fargt_sig.insert(fargt_sig.begin() + sig.sret, JuliaType::get_pprjlvalue_ty(M->getContext()));
 
         // Shift LLVM attributes for parameters one to the right, as
         // we are adding the extra nest parameter after sret arg.
-        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        SmallVector<std::pair<unsigned, AttributeSet>, 0> newAttributes;
         newAttributes.reserve(attributes.getNumAttrSets() + 1);
         auto it = *attributes.indexes().begin();
         const auto it_end = *attributes.indexes().end();
@@ -6098,9 +7591,8 @@ static Function* gen_cfun_wrapper(
     jl_init_function(cw, params.TargetTriple);
     cw->setAttributes(AttributeList::get(M->getContext(), {attributes, cw->getAttributes()}));
 
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, 0, 0);
     ctx.f = cw;
-    ctx.world = world;
     ctx.name = name;
     ctx.funcName = name;
 
@@ -6110,23 +7602,19 @@ static Function* gen_cfun_wrapper(
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0, true);
 
-    Value *world_age_field = get_last_age_field(ctx);
+    auto world_age_field = get_tls_world_age_field(ctx);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-    Value *last_age = ai.decorateInst(
+    ctx.world_age_at_entry = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
-
     Value *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
         prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
     cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
 
-    Value *age_ok = NULL;
-    if (calltype) {
+    Value *age_ok = nullptr;
+    if (codeinst) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(
-                    ctx.types().T_size,
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), ctx.types().T_size->getPointerTo()),
-                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
+                emit_ptrgep(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), offsetof(jl_code_instance_t, max_world)),
                 ctx.types().alignof_ptr);
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
     }
@@ -6136,7 +7624,7 @@ static Function* gen_cfun_wrapper(
     Function::arg_iterator AI = cw->arg_begin();
     Value *sretPtr = sig.sret ? &*AI++ : NULL;
     Value *nestPtr = nest ? &*AI++ : NULL;
-    SmallVector<jl_cgval_t> inputargs(nargs + 1);
+    SmallVector<jl_cgval_t, 0> inputargs(nargs + 1);
     if (ff) {
         // we need to pass the function object even if (even though) it is a singleton
         inputargs[0] = mark_julia_const(ctx, ff);
@@ -6153,7 +7641,7 @@ static Function* gen_cfun_wrapper(
     for (size_t i = 0; i < nargs; ++i, ++AI) {
         // figure out how to unpack this argument type
         Value *val = &*AI;
-        assert(sig.fargt_sig.at(i + sig.sret) == val->getType());
+        assert(sig.fargt_sig[i + sig.sret] == val->getType());
         jl_cgval_t &inputarg = inputargs[i + 1];
         jl_value_t *jargty = jl_svecref(sig.at, i);
         bool aref = jl_is_abstract_ref_type(jargty);
@@ -6179,7 +7667,7 @@ static Function* gen_cfun_wrapper(
         if (aref) {
             if (jargty == (jl_value_t*)jl_any_type) {
                 inputarg = mark_julia_type(ctx,
-                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, val, ctx.types().T_pprjlvalue), Align(sizeof(void*))),
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*))),
                         true, jl_any_type);
             }
             else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed
@@ -6191,14 +7679,13 @@ static Function* gen_cfun_wrapper(
                     inputarg = ghostValue(ctx, jargty);
                 }
                 else {
-                    val = emit_bitcast(ctx, val, T->getPointerTo());
                     val = ctx.builder.CreateAlignedLoad(T, val, Align(1)); // make no alignment assumption about pointer from C
                     inputarg = mark_julia_type(ctx, val, false, jargty);
                 }
             }
             else if (static_at || (!jl_is_typevar(jargty) && !jl_is_immutable_datatype(jargty))) {
                 // must be a jl_value_t* (because it's mutable or contains gc roots)
-                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, emit_bitcast(ctx, val, ctx.types().T_prjlvalue)), true, jargty_proper);
+                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, val), true, jargty_proper);
             }
             else {
                 // allocate val into a new box, if it might not be boxed
@@ -6208,26 +7695,25 @@ static Function* gen_cfun_wrapper(
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
                 Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                        emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                         Align(sizeof(void*)));
                 BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
                 BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
                 BasicBlock *unboxedBB = BasicBlock::Create(ctx.builder.getContext(), "maybe-unboxed", cw);
                 BasicBlock *isanyBB = BasicBlock::Create(ctx.builder.getContext(), "any", cw);
                 BasicBlock *afterBB = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-                Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt intead
+                Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt instead
                 ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
                 ctx.builder.SetInsertPoint(boxedBB);
-                Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
+                Value *p1 = val;
                 p1 = track_pjlvalue(ctx, p1);
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(loadBB);
                 Value *isrtany = ctx.builder.CreateICmpEQ(
-                        literal_pointer_val(ctx, (jl_value_t*)jl_any_type),
-                        ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue));
+                        literal_pointer_val(ctx, (jl_value_t*)jl_any_type), val);
                 ctx.builder.CreateCondBr(isrtany, isanyBB, unboxedBB);
                 ctx.builder.SetInsertPoint(isanyBB);
-                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateBitCast(val, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, val, Align(sizeof(void*)));
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(unboxedBB);
                 Value *p3 = emit_new_bits(ctx, runtime_dt, val);
@@ -6242,7 +7728,7 @@ static Function* gen_cfun_wrapper(
             }
         }
         else {
-            bool argboxed = sig.fargt_isboxed.at(i);
+            bool argboxed = sig.fargt_isboxed[i];
             if (argboxed) {
                 // a jl_value_t*, even when represented as a struct
                 inputarg = mark_julia_type(ctx, val, true, jargty_proper);
@@ -6251,7 +7737,7 @@ static Function* gen_cfun_wrapper(
                 // something of type T
                 // undo whatever we might have done to this poor argument
                 assert(jl_is_datatype(jargty));
-                if (sig.byRefList.at(i)) {
+                if (sig.byRefList[i]) {
                     val = ctx.builder.CreateAlignedLoad(sig.fargt[i], val, Align(1)); // unknown alignment from C
                 }
                 else {
@@ -6275,7 +7761,7 @@ static Function* gen_cfun_wrapper(
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
                     Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                            emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -6289,180 +7775,23 @@ static Function* gen_cfun_wrapper(
     // Create the call
     bool jlfunc_sret;
     jl_cgval_t retval;
-    if (calltype == 2) {
-        nargs = 0; // arguments not needed -- TODO: not really true, should emit an age_ok test and jlcall
-        jlfunc_sret = false;
-        retval = mark_julia_const(ctx, (jl_value_t*)callptr);
-    }
-    else if (calltype == 0 || calltype == 1) {
-        // emit a jlcall
-        jlfunc_sret = false;
-        Function *theFptr = NULL;
-        if (calltype == 1) {
-            StringRef fname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-            theFptr = cast_or_null<Function>(jl_Module->getNamedValue(fname));
-            if (!theFptr) {
-                theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
-                                           fname, jl_Module);
-                jl_init_function(theFptr, ctx.emission_context.TargetTriple);
-                jl_name_jlfunc_args(ctx.emission_context, theFptr);
-                addRetAttr(theFptr, Attribute::NonNull);
-            }
-            else {
-                assert(theFptr->getFunctionType() == ctx.types().T_jlfunc);
-            }
-        }
-        BasicBlock *b_generic, *b_jlcall, *b_after;
-        Value *ret_jlcall;
-        if (age_ok) {
-            assert(theFptr);
-            b_generic = BasicBlock::Create(ctx.builder.getContext(), "generic", cw);
-            b_jlcall = BasicBlock::Create(ctx.builder.getContext(), "apply", cw);
-            b_after = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-            ctx.builder.CreateCondBr(age_ok, b_jlcall, b_generic);
-            ctx.builder.SetInsertPoint(b_jlcall);
-            // for jlcall, we need to pass the function object even if it is a ghost.
-            Value *theF = boxed(ctx, inputargs[0]);
-            assert(theF);
-            ret_jlcall = emit_jlcall(ctx, theFptr, theF, &inputargs[1], nargs, julia_call);
-            ctx.builder.CreateBr(b_after);
-            ctx.builder.SetInsertPoint(b_generic);
-        }
-        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs.data(), nargs + 1, julia_call);
-        if (age_ok) {
-            ctx.builder.CreateBr(b_after);
-            ctx.builder.SetInsertPoint(b_after);
-            PHINode *retphi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
-            retphi->addIncoming(ret_jlcall, b_jlcall);
-            retphi->addIncoming(ret, b_generic);
-            ret = retphi;
+    if (codeinst) {
+        retval = emit_invoke(ctx, mark_julia_const(ctx, (jl_value_t*)codeinst), inputargs, nargs + 1, astrt, age_ok);
+        jlfunc_sret = retval.V && isa<AllocaInst>(retval.V) && !retval.TIndex && retval.inline_roots.empty();
+        if (jlfunc_sret && sig.sret) {
+            // fuse the two sret together
+            assert(retval.ispointer());
+            AllocaInst *result = cast<AllocaInst>(retval.V);
+            retval.V = sretPtr;
+            result->replaceAllUsesWith(sretPtr);
+            result->eraseFromParent();
         }
-        retval = mark_julia_type(ctx, ret, true, astrt);
     }
     else {
-        bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-        assert(calltype == 3);
-        // emit a specsig call
-        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-        StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
-        FunctionType *cft = returninfo.decl.getFunctionType();
-        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
-
-        // TODO: Can use use emit_call_specfun_other here?
-        std::vector<Value*> args;
-        Value *result;
-        if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) {
-            // fuse the two sret together, or emit an alloca to hold it
-            if (sig.sret && jlfunc_sret) {
-                result = emit_bitcast(ctx, sretPtr, cft->getParamType(0));
-            }
-            else {
-                if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-                    setName(ctx.emission_context, result, "sret");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                } else {
-                    result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
-                    setName(ctx.emission_context, result, "result_union");
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                }
-            }
-            args.push_back(result);
-        }
-        if (returninfo.return_roots) {
-            AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
-            setName(ctx.emission_context, return_roots, "return_roots");
-            args.push_back(return_roots);
-        }
-        if (gcstack_arg)
-            args.push_back(ctx.pgcstack);
-        for (size_t i = 0; i < nargs + 1; i++) {
-            // figure out how to repack the arguments
-            jl_cgval_t &inputarg = inputargs[i];
-            Value *arg;
-            jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-                jl_nth_slot_type(lam->specTypes, i);
-            // n.b. specTypes is required to be a datatype by construction for specsig
-            bool isboxed = deserves_argbox(spect);
-            Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
-            if (is_uniquerep_Type(spect)) {
-                continue;
-            }
-            else if (isboxed) {
-                arg = boxed(ctx, inputarg);
-            }
-            else if (type_is_ghost(T)) {
-                continue; // ghost types are skipped by the specsig method signature
-            }
-            else if (T->isAggregateType()) {
-                // aggregate types are passed by pointer
-                inputarg = value_to_pointer(ctx, inputarg);
-                arg = maybe_bitcast(ctx, decay_derived(ctx, data_pointer(ctx, inputarg)),
-                    T->getPointerTo());
-            }
-            else {
-                arg = emit_unbox(ctx, T, inputarg, spect);
-                assert(!isa<UndefValue>(arg));
-            }
-
-            // add to argument list
-            args.push_back(arg);
-        }
-        Value *theFptr = returninfo.decl.getCallee();
-        assert(theFptr);
-        if (age_ok) {
-            funcName += "_gfthunk";
-            Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
-                    GlobalVariable::InternalLinkage, funcName, M);
-            jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
-            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
-            // build a  specsig -> jl_apply_generic converter thunk
-            // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer),
-            // but which has the signature of a specsig
-            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
-            theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
-        }
-
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
-        CallInst *call = ctx.builder.CreateCall(
-            returninfo.decl.getFunctionType(),
-            theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.attrs);
-        if (gcstack_arg)
-            call->setCallingConv(CallingConv::Swift);
-
-        switch (returninfo.cc) {
-            case jl_returninfo_t::Boxed:
-                retval = mark_julia_type(ctx, call, true, astrt);
-                break;
-            case jl_returninfo_t::Register:
-                retval = mark_julia_type(ctx, call, false, astrt);
-                break;
-            case jl_returninfo_t::SRet:
-                retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa().tbaa_stack);
-                break;
-            case jl_returninfo_t::Union: {
-                Value *box = ctx.builder.CreateExtractValue(call, 0);
-                Value *tindex = ctx.builder.CreateExtractValue(call, 1);
-                Value *derived = ctx.builder.CreateSelect(
-                    ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
-                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                    decay_derived(ctx, ctx.builder.CreateBitCast(result, ctx.types().T_pjlvalue)),
-                    decay_derived(ctx, box));
-                retval = mark_julia_slot(derived,
-                                         astrt,
-                                         tindex,
-                                         ctx.tbaa().tbaa_stack);
-                assert(box->getType() == ctx.types().T_prjlvalue);
-                retval.Vboxed = box;
-                break;
-            }
-            case jl_returninfo_t::Ghosts:
-                retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa().tbaa_stack);
-                break;
-        }
+        // emit a dispatch
+        jlfunc_sret = false;
+        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs, nargs + 1, julia_call);
+        retval = mark_julia_type(ctx, ret, true, astrt);
     }
 
     // inline a call to typeassert here, if required
@@ -6494,21 +7823,24 @@ static Function* gen_cfun_wrapper(
         r = NULL;
     }
 
-    ctx.builder.CreateStore(last_age, world_age_field);
+    ctx.builder.CreateStore(ctx.world_age_at_entry, world_age_field);
     ctx.builder.CreateRet(r);
 
     ctx.builder.SetCurrentDebugLocation(noDbg);
     ctx.builder.ClearInsertionPoint();
 
     if (aliasname) {
-        GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
+        auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
                             GlobalValue::ExternalLinkage, aliasname, cw, M);
+        if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
+            alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
+        }
     }
 
     if (nest) {
         funcName += "make";
         Function *cw_make = Function::Create(
-                FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
+                FunctionType::get(getPointerTy(ctx.builder.getContext()), { getPointerTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
         jl_init_function(cw_make, ctx.emission_context.TargetTriple);
@@ -6523,8 +7855,8 @@ static Function* gen_cfun_wrapper(
         Function *adjust_trampoline = Intrinsic::getDeclaration(cw_make->getParent(), Intrinsic::adjust_trampoline);
         cwbuilder.CreateCall(init_trampoline, {
                 Tramp,
-                cwbuilder.CreateBitCast(cw, getInt8PtrTy(ctx.builder.getContext())),
-                cwbuilder.CreateBitCast(NVal, getInt8PtrTy(ctx.builder.getContext()))
+                cw,
+                NVal,
             });
         cwbuilder.CreateRet(cwbuilder.CreateCall(adjust_trampoline, { Tramp }));
         cw = cw_make;
@@ -6581,7 +7913,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         return jl_cgval_t();
     }
     if (rt != declrt && rt != (jl_value_t*)jl_any_type)
-        rt = jl_ensure_rooted(ctx, rt);
+        jl_temporary_root(ctx, rt);
 
     function_sig_t sig("cfunction", lrt, rt, retboxed, argt, unionall_env, false, CallingConv::C, false, &ctx.emission_context);
     assert(sig.fargt.size() + sig.sret == sig.fargt_sig.size());
@@ -6618,7 +7950,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         sigt = NULL;
     }
     else {
-        sigt = jl_apply_tuple_type((jl_svec_t*)sigt);
+        sigt = jl_apply_tuple_type((jl_svec_t*)sigt, 1);
     }
     if (sigt && !(unionall_env && jl_has_typevar_from_unionall(rt, unionall_env))) {
         unionall_env = NULL;
@@ -6628,14 +7960,13 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     if (ctx.emission_context.TargetTriple.isAArch64() || ctx.emission_context.TargetTriple.isARM() || ctx.emission_context.TargetTriple.isPPC64()) {
         if (nest) {
             emit_error(ctx, "cfunction: closures are not supported on this platform");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
     // try to look up this function for direct invoking
-    jl_method_instance_t *lam = sigt ? jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0) : NULL;
+    jl_method_instance_t *lam = sigt ? jl_get_specialization1((jl_tupletype_t*)sigt, world, 0) : NULL;
     Value *F = gen_cfun_wrapper(
             jl_Module, ctx.emission_context,
             sig, fexpr_rt.constant, NULL,
@@ -6649,12 +7980,14 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         jl_svec_t *fill = jl_emptysvec;
         if (closure_types) {
             assert(ctx.spvals_ptr);
-            size_t n = jl_array_len(closure_types);
-            jl_svec_t *fill = jl_alloc_svec_uninit(n);
+            size_t n = jl_array_nrows(closure_types);
+            fill = jl_alloc_svec_uninit(n);
             for (size_t i = 0; i < n; i++) {
                 jl_svecset(fill, i, jl_array_ptr_ref(closure_types, i));
             }
-            fill = (jl_svec_t*)jl_ensure_rooted(ctx, (jl_value_t*)fill);
+            JL_GC_PUSH1(&fill);
+            jl_temporary_root(ctx, (jl_value_t*)fill);
+            JL_GC_POP();
         }
         Type *T_htable = ArrayType::get(ctx.types().T_size, sizeof(htable_t) / sizeof(void*));
         Value *cache = new GlobalVariable(*jl_Module, T_htable, false,
@@ -6663,7 +7996,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         F = ctx.builder.CreateCall(prepare_call(jlgetcfunctiontrampoline_func), {
                  fobj,
                  literal_pointer_val(ctx, output_type),
-                 ctx.builder.CreateBitCast(cache, getInt8PtrTy(ctx.builder.getContext())),
+                 cache,
                  literal_pointer_val(ctx, (jl_value_t*)fill),
                  F,
                  closure_types ? literal_pointer_val(ctx, (jl_value_t*)unionall_env) : Constant::getNullValue(ctx.types().T_pjlvalue),
@@ -6676,9 +8009,9 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
         if (outboxed) {
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
-            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type);
+            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type, true);
             setName(ctx.emission_context, strct, "cfun_result");
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo());
+            Value *derived_strct = decay_derived(ctx, strct);
             MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.decorateInst(ctx.builder.CreateStore(F, derived_strct));
@@ -6698,7 +8031,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
 
 // do codegen to create a C-callable alias/wrapper, or if sysimg_handle is set,
 // restore one from a loaded system image.
-const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
+const char *jl_generate_ccallable(Module *llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
 {
     ++GeneratedCCallables;
     jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
@@ -6727,19 +8060,21 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
                            argtypes, NULL, false, CallingConv::C, false, &params);
         if (sig.err_msg.empty()) {
             size_t world = jl_atomic_load_acquire(&jl_world_counter);
-            size_t min_valid = 0;
-            size_t max_valid = ~(size_t)0;
             if (sysimg_handle) {
                 // restore a ccallable from the system image
                 void *addr;
                 int found = jl_dlsym(sysimg_handle, name, &addr, 0);
                 if (found)
                     add_named_global(name, addr);
+                else {
+                    err = jl_get_exceptionf(jl_errorexception_type, "%s not found in sysimg", name);
+                    jl_throw(err);
+                }
             }
             else {
-                jl_method_instance_t *lam = jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0);
+                jl_method_instance_t *lam = jl_get_specialization1((jl_tupletype_t*)sigt, world, 0);
                 //Safe b/c params holds context lock
-                gen_cfun_wrapper(unwrap(llvmmod)->getModuleUnlocked(), params, sig, ff, name, declrt, lam, NULL, NULL, NULL);
+                gen_cfun_wrapper(llvmmod, params, sig, ff, name, declrt, lam, NULL, NULL, NULL);
             }
             JL_GC_POP();
             return name;
@@ -6750,7 +8085,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
 }
 
 // generate a julia-callable function that calls f (AKA lam)
-static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName,
+// if is_opaque_closure, then generate the OC invoke, rather than a real invoke
+static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, bool is_opaque_closure, StringRef funcName,
         Module *M, jl_codegen_params_t &params)
 {
     ++GeneratedInvokeWrappers;
@@ -6767,140 +8103,62 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     //Value *mfunc = &*AI++; (void)mfunc; // unused
     assert(AI == w->arg_end());
 
-    jl_codectx_t ctx(M->getContext(), params);
+    jl_codectx_t ctx(M->getContext(), params, 0, 0);
     ctx.f = w;
     ctx.linfo = lam;
-    ctx.rettype = jlretty;
-    ctx.world = 0;
-
-    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
-    ctx.builder.SetInsertPoint(b0);
-    DebugLoc noDbg;
-    ctx.builder.SetCurrentDebugLocation(noDbg);
-    allocate_gc_frame(ctx, b0);
-
-    // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
-    size_t nfargs = ftype->getNumParams();
-    SmallVector<Value *> args(nfargs);
-    unsigned idx = 0;
-    AllocaInst *result = NULL;
-    switch (f.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
-        setName(ctx.emission_context, result, "sret");
-        args[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes));
-        if (f.union_align > 1)
-            result->setAlignment(Align(f.union_align));
-        args[idx] = result;
-        idx++;
-        setName(ctx.emission_context, result, "result_union");
-        break;
-    }
-    if (f.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
-        setName(ctx.emission_context, return_roots, "return_roots");
-        args[idx] = return_roots;
-        idx++;
-    }
-    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    if (gcstack_arg) {
-        args[idx] = ctx.pgcstack;
-        idx++;
-    }
-    bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-    for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
-        jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(lam->specTypes, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
-        bool isboxed = deserves_argbox(ty);
-        Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
-        if (type_is_ghost(lty) || is_uniquerep_Type(ty))
+    ctx.rettype = jlretty;
+
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
+    ctx.builder.SetInsertPoint(b0);
+    DebugLoc noDbg;
+    ctx.builder.SetCurrentDebugLocation(noDbg);
+    allocate_gc_frame(ctx, b0);
+
+    SmallVector<jl_cgval_t, 0> argv(nargs);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    for (size_t i = 0; i < nargs; ++i) {
+        if (i == 0 && is_opaque_closure) {
+            jl_value_t *oc_type = (jl_value_t*)jl_any_type; // more accurately: get_oc_type(lam->specTypes, jlretty)
+            argv[i] = mark_julia_slot(funcArg, oc_type, NULL, ctx.tbaa().tbaa_const);
             continue;
+        }
+        jl_value_t *ty = jl_nth_slot_type(abi, i);
         Value *theArg;
         if (i == 0) {
-            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
-            // come in as ::Tracked, but specsig expected ::Derived.
-            if (is_opaque_closure)
-                theArg = decay_derived(ctx, funcArg);
-            else
-                theArg = funcArg;
+            theArg = funcArg;
         }
         else {
-            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
             theArg = ai.decorateInst(maybe_mark_load_dereferenceable(
                     ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty));
         }
-        if (!isboxed) {
-            theArg = decay_derived(ctx, emit_bitcast(ctx, theArg, PointerType::get(lty, 0)));
-            if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
-        }
-        assert(dyn_cast<UndefValue>(theArg) == NULL);
-        args[idx] = theArg;
-        idx++;
+        argv[i] = mark_julia_type(ctx, theArg, true, ty);
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, args);
-    call->setAttributes(f.attrs);
-    if (gcstack_arg)
-        call->setCallingConv(CallingConv::Swift);
-    jl_cgval_t retval;
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, abi, jlretty, f, argv, nargs);
     if (retarg != -1) {
         Value *theArg;
         if (retarg == 0)
             theArg = funcArg;
         else
             theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1),
+                    emit_ptrgep(ctx, argArray, (retarg - 1) * ctx.types().sizeof_ptr),
                     Align(sizeof(void*)));
         retval = mark_julia_type(ctx, theArg, true, jl_any_type);
     }
-    else {
-        switch (f.cc) {
-        case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, jlretty);
-            break;
-        case jl_returninfo_t::Register:
-            retval = mark_julia_type(ctx, call, false, jlretty);
-            break;
-        case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
-            break;
-        case jl_returninfo_t::Union:
-            // result is technically not right here, but `boxed` will only look at it
-            // for the unboxed values, so it's ok.
-            retval = mark_julia_slot(result,
-                                     jlretty,
-                                     ctx.builder.CreateExtractValue(call, 1),
-                                     ctx.tbaa().tbaa_stack);
-            retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
-            assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue);
-            break;
-        case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
-            break;
-        }
-    }
-    ctx.builder.CreateRet(boxed(ctx, retval));
-    return w;
+    if (retval.typ == jl_bottom_type)
+        CreateTrap(ctx.builder, false);
+    else
+        ctx.builder.CreateRet(boxed(ctx, retval));
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg)
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg,
+        ArrayRef<const char*> ArgNames, unsigned nreq)
 {
     jl_returninfo_t props = {};
-    SmallVector<Type*, 8> fsig;
+    SmallVector<Type*,8> fsig;
+    SmallVector<std::string,4> argnames;
     Type *rt = NULL;
     Type *srt = NULL;
     if (jlrettype == (jl_value_t*)jl_bottom_type) {
@@ -6918,8 +8176,9 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
             props.cc = jl_returninfo_t::Union;
             Type *AT = ArrayType::get(getInt8Ty(ctx.builder.getContext()), props.union_bytes);
             fsig.push_back(AT->getPointerTo());
+            argnames.push_back("union_bytes_return");
             Type *pair[] = { ctx.types().T_prjlvalue, getInt8Ty(ctx.builder.getContext()) };
-            rt = StructType::get(ctx.builder.getContext(), makeArrayRef(pair));
+            rt = StructType::get(ctx.builder.getContext(), ArrayRef<Type*>(pair));
         }
         else if (allunbox) {
             props.cc = jl_returninfo_t::Ghosts;
@@ -6934,14 +8193,19 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         rt = julia_type_to_llvm(ctx, jlrettype, &retboxed);
         assert(!retboxed);
         if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) {
-            auto tracked = CountTrackedPointers(rt);
+            auto tracked = CountTrackedPointers(rt, true);
             assert(!tracked.derived);
-            if (tracked.count && !tracked.all)
+            if (tracked.count && !tracked.all) {
                 props.return_roots = tracked.count;
+                assert(props.return_roots == ((jl_datatype_t*)jlrettype)->layout->npointers);
+            }
             props.cc = jl_returninfo_t::SRet;
+            props.union_bytes = jl_datatype_size(jlrettype);
+            props.union_align = props.union_minalign = julia_alignment(jlrettype);
             // sret is always passed from alloca
             assert(M);
             fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
+            argnames.push_back("sret_return");
             srt = rt;
             rt = getVoidTy(ctx.builder.getContext());
         }
@@ -6979,38 +8243,39 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
-        fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
+        fsig.push_back(ctx.types().T_ptr);
+        argnames.push_back("return_roots");
     }
 
     if (gcstack_arg){
         AttrBuilder param(ctx.builder.getContext());
-        param.addAttribute(Attribute::SwiftSelf);
+        if (ctx.emission_context.use_swiftcc)
+            param.addAttribute(Attribute::SwiftSelf);
         param.addAttribute(Attribute::NonNull);
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
         fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0));
+        argnames.push_back("pgcstack_arg");
     }
 
-    for (size_t i = 0; i < jl_nparams(sig); i++) {
+    size_t nparams = jl_nparams(sig);
+    for (size_t i = 0; i < nparams; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
         bool isboxed = false;
-        Type *ty = NULL;
-        if (i == 0 && is_opaque_closure) {
-            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            isboxed = true; // true-ish anyway - we might not have the type tag
-        }
-        else {
+        Type *et = nullptr;
+        if (i != 0 || !is_opaque_closure) { // special token for OC argument
             if (is_uniquerep_Type(jt))
                 continue;
             isboxed = deserves_argbox(jt);
-            ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            if (type_is_ghost(et))
+                continue;
         }
-        if (type_is_ghost(ty))
-            continue;
         AttrBuilder param(ctx.builder.getContext());
-        if (ty->isAggregateType()) { // aggregate types are passed by pointer
+        Type *ty = et;
+        if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
-            ty = PointerType::get(ty, AddressSpace::Derived);
+            ty = ctx.builder.getPtrTy(AddressSpace::Derived);
         }
         else if (isboxed && jl_is_immutable_datatype(jt)) {
             param.addAttribute(Attribute::ReadOnly);
@@ -7022,6 +8287,26 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         }
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
         fsig.push_back(ty);
+        size_t argno = i < nreq ? i : nreq;
+        std::string genname;
+        if (!ArgNames.empty()) {
+            genname = ArgNames[argno];
+            if (genname.empty())
+                genname = (StringRef("#") + Twine(argno + 1)).str();
+            if (i >= nreq)
+                genname += (StringRef("[") + Twine(i - nreq + 1) + StringRef("]")).str();
+            const char *arg_typename = jl_is_datatype(jt) ? jl_symbol_name(((jl_datatype_t*)jt)->name->name) : "<unknown type>";
+            argnames.push_back((genname + StringRef("::") + arg_typename).str());
+        }
+        if (et && et->isAggregateType()) {
+            auto tracked = CountTrackedPointers(et);
+            if (tracked.count && !tracked.all) {
+                attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+                fsig.push_back(ctx.builder.getPtrTy(M->getDataLayout().getAllocaAddrSpace()));
+                if (!genname.empty())
+                    argnames.push_back((Twine(".roots.") + genname).str());
+            }
+        }
     }
 
     AttributeSet FnAttrs;
@@ -7038,7 +8323,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         if (f == NULL) {
             f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
             jl_init_function(f, ctx.emission_context.TargetTriple);
-            f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+            if (ctx.emission_context.params->debug_info_level >= 2) {
+                ios_t sigbuf;
+                ios_mem(&sigbuf, 0);
+                jl_static_show_func_sig((JL_STREAM*) &sigbuf, sig);
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes.addFnAttribute(ctx.builder.getContext(),"julia.fsig", StringRef(sigbuf.buf, sigbuf.size)), f->getAttributes()}));
+                ios_close(&sigbuf);
+            } else
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
         }
         else {
             assert(f->getFunctionType() == ftype);
@@ -7048,29 +8340,25 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
     else {
         if (fval->getType()->isIntegerTy())
             fval = emit_inttoptr(ctx, fval, ftype->getPointerTo());
-        else
-            fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
     }
-    if (gcstack_arg && isa<Function>(fval))
-        cast<Function>(fval)->setCallingConv(CallingConv::Swift);
+    if (auto F = dyn_cast<Function>(fval)) {
+        if (gcstack_arg && ctx.emission_context.use_swiftcc)
+            F->setCallingConv(CallingConv::Swift);
+        assert(F->arg_size() >= argnames.size());
+        for (size_t i = 0; i < argnames.size(); i++) {
+            F->getArg(i)->setName(argnames[i]);
+        }
+    }
     props.decl = FunctionCallee(ftype, fval);
     props.attrs = attributes;
     return props;
 }
 
-static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
-{
-    if (isptr && !cast<PointerType>(Src->getType())->isOpaqueOrPointeeTypeMatches(T))
-        Src = ctx.builder.CreateBitCast(Src, T->getPointerTo(Src->getType()->getPointerAddressSpace()));
-    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
-    assert(emitted == count); (void)emitted; (void)count;
-}
-
 static DISubroutineType *
 get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
 {
     size_t nargs = jl_nparams(sig); // TODO: if this is a Varargs function, our debug info for the `...` var may be misleading
-    std::vector<Metadata*> ditypes(nargs + 1);
+    SmallVector<Metadata*, 0> ditypes(nargs + 1);
     ditypes[0] = julia_type_to_di(ctx, debuginfo, rt, &dbuilder, false);
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
@@ -7080,13 +8368,13 @@ get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl
 }
 
 /* aka Core.Compiler.tuple_tfunc */
-static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
+static jl_datatype_t *compute_va_type(jl_value_t *sig, size_t nreq)
 {
-    size_t nvargs = jl_nparams(lam->specTypes)-nreq;
+    size_t nvargs = jl_nparams(sig)-nreq;
     jl_svec_t *tupargs = jl_alloc_svec(nvargs);
     JL_GC_PUSH1(&tupargs);
-    for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
-        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+    for (size_t i = nreq; i < jl_nparams(sig); ++i) {
+        jl_value_t *argType = jl_nth_slot_type(sig, i);
         // n.b. specTypes is required to be a datatype by construction for specsig
         if (is_uniquerep_Type(argType))
             argType = jl_typeof(jl_tparam0(argType));
@@ -7096,7 +8384,7 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
         }
         jl_svecset(tupargs, i-nreq, argType);
     }
-    jl_value_t *typ = jl_apply_tuple_type(tupargs);
+    jl_value_t *typ = jl_apply_tuple_type(tupargs, 1);
     JL_GC_POP();
     return (jl_datatype_t*)typ;
 }
@@ -7116,7 +8404,7 @@ static std::string get_function_name(bool specsig, bool needsparams, const char
         if (unadorned_name[0] == '@')
             unadorned_name++;
     }
-    funcName << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    funcName << unadorned_name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
     return funcName.str();
 }
 
@@ -7126,36 +8414,34 @@ static jl_llvm_functions_t
         orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
+        jl_value_t *abi,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
     ++EmittedFunctions;
     // step 1. unpack AST and allocate codegen context for this function
+    size_t min_world = src->min_world;
+    size_t max_world = src->max_world;
     jl_llvm_functions_t declarations;
-    jl_codectx_t ctx(*params.tsctx.getContext(), params);
+    jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world);
     jl_datatype_t *vatyp = NULL;
     JL_GC_PUSH2(&ctx.code, &vatyp);
     ctx.code = src->code;
     ctx.source = src;
 
     std::map<int, BasicBlock*> labels;
-    bool toplevel = false;
     ctx.module = jl_is_method(lam->def.method) ? lam->def.method->module : lam->def.module;
     ctx.linfo = lam;
-    ctx.name = TSM.getModuleUnlocked()->getModuleIdentifier().data();
-    size_t nreq = 0;
-    int va = 0;
-    if (jl_is_method(lam->def.method)) {
-        ctx.nargs = nreq = lam->def.method->nargs;
-        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
-        if ((nreq > 0 && jl_is_method(lam->def.value) && lam->def.method->isva)) {
-            assert(nreq > 0);
-            nreq--;
-            va = 1;
-        }
+    ctx.name = name_from_method_instance(lam);
+    size_t nreq = src->nargs;
+    int va = src->isva;
+    ctx.nargs = nreq;
+    if (va) {
+        assert(nreq > 0);
+        nreq--;
     }
-    else {
-        ctx.nargs = 0;
+    if (jl_is_method(lam->def.value)) {
+        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
     }
     ctx.nReqArgs = nreq;
     if (va) {
@@ -7163,7 +8449,6 @@ static jl_llvm_functions_t
         if (vn != jl_unused_sym)
             ctx.vaSlot = ctx.nargs - 1;
     }
-    toplevel = !jl_is_method(lam->def.method);
     ctx.rettype = jlrettype;
     ctx.funcName = ctx.name;
     ctx.spvals_ptr = NULL;
@@ -7183,26 +8468,32 @@ static jl_llvm_functions_t
     if (lam && jl_is_method(lam->def.method)) {
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
-    }
-    else if (jl_array_len(src->linetable) > 0) {
-        jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, 0);
-        ctx.file = jl_symbol_name((jl_sym_t*)jl_fieldref_noalloc(locinfo, 2));
-        toplineno = jl_unbox_int32(jl_fieldref(locinfo, 3));
+        ctx.line = lam->def.method->line;
+    }
+    else if ((jl_value_t*)src->debuginfo != jl_nothing) {
+        // look for the file and line info of the original start of this block, as reported by lowering
+        jl_debuginfo_t *debuginfo = src->debuginfo;
+        while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+            debuginfo = debuginfo->linetable;
+        ctx.file = jl_debuginfo_file(debuginfo);
+        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+        ctx.line = lineidx.line;
+        toplineno = std::max((int32_t)0, lineidx.line);
     }
     if (ctx.file.empty())
         ctx.file = "<missing>";
     // jl_printf(JL_STDERR, "\n*** compiling %s at %s:%d\n\n",
     //           jl_symbol_name(ctx.name), ctx.file.str().c_str(), toplineno);
 
-    bool debug_enabled = ctx.emission_context.debug_level != 0;
+    bool debug_enabled = ctx.emission_context.params->debug_info_level != 0;
     if (dbgFuncName.empty()) // Should never happen anymore?
         debug_enabled = false;
 
     // step 2. process var-info lists to see what vars need boxing
-    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
     size_t vinfoslen = jl_array_dim0(src->slotflags);
     ctx.slots.resize(vinfoslen, jl_varinfo_t(ctx.builder.getContext()));
-    assert(lam->specTypes); // the specTypes field should always be assigned
+    assert(abi); // the specTypes field should always be assigned
 
 
     // create SAvalue locations for SSAValue objects
@@ -7211,25 +8502,24 @@ static jl_llvm_functions_t
     ctx.ssavalue_usecount.assign(n_ssavalues, 0);
 
     bool specsig, needsparams;
-    std::tie(specsig, needsparams) = uses_specsig(lam, jlrettype, params.params->prefer_specsig);
-    if (!src->inferred)
-        specsig = false;
+    std::tie(specsig, needsparams) = uses_specsig(abi, lam, jlrettype, params.params->prefer_specsig);
 
     // step 3. some variable analysis
     size_t i;
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
         jl_sym_t *argname = slot_symbol(ctx, i);
         if (argname == jl_unused_sym)
             continue;
-        jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        jl_value_t *ty = jl_nth_slot_type(abi, i);
         // TODO: jl_nth_slot_type should call jl_rewrap_unionall
         //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         // OpaqueClosure implicitly loads the env
         if (i == 0 && ctx.is_opaque_closure) {
+            // n.b. this is not really needed, because ty was already supposed to be correct
             if (jl_is_array(src->slottypes)) {
-                ty = jl_arrayref((jl_array_t*)src->slottypes, i);
+                ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i);
             }
             else {
                 ty = (jl_value_t*)jl_any_type;
@@ -7240,7 +8530,7 @@ static jl_llvm_functions_t
     if (va && ctx.vaSlot != -1) {
         jl_varinfo_t &varinfo = ctx.slots[ctx.vaSlot];
         varinfo.isArgument = true;
-        vatyp = specsig ? compute_va_type(lam, nreq) : (jl_tuple_type);
+        vatyp = specsig ? compute_va_type(abi, nreq) : (jl_tuple_type);
         varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, vatyp);
     }
 
@@ -7248,7 +8538,7 @@ static jl_llvm_functions_t
         jl_varinfo_t &varinfo = ctx.slots[i];
         uint8_t flags = jl_array_uint8_ref(src->slotflags, i);
         varinfo.isSA = (jl_vinfo_sa(flags) != 0) || varinfo.isArgument;
-        varinfo.usedUndef = (jl_vinfo_usedundef(flags) != 0) || (!varinfo.isArgument && !src->inferred);
+        varinfo.usedUndef = (jl_vinfo_usedundef(flags) != 0) || !varinfo.isArgument;
         if (!varinfo.isArgument) {
             varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, (jl_value_t*)jl_any_type);
         }
@@ -7271,15 +8561,29 @@ static jl_llvm_functions_t
     // allocate Function declarations and wrapper objects
     //Safe because params holds ctx lock
     Module *M = TSM.getModuleUnlocked();
-    M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level);
-    jl_debugcache_t debuginfo;
-    debuginfo.initialize(M);
+    jl_debugcache_t debugcache;
+    debugcache.initialize(M);
     jl_returninfo_t returninfo = {};
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
-                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg));
+        SmallVector<const char*,0> ArgNames(0);
+        if (!M->getContext().shouldDiscardValueNames()) {
+            ArgNames.resize(ctx.nargs, "");
+            for (int i = 0; i < ctx.nargs; i++) {
+                jl_sym_t *argname = slot_symbol(ctx, i);
+                if (argname == jl_unused_sym)
+                    continue;
+                const char *name = jl_symbol_name(argname);
+                if (name[0] == '\0' && ctx.vaSlot == i)
+                    ArgNames[i] = "...";
+                else
+                    ArgNames[i] = name;
+            }
+        }
+        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, abi,
+                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg),
+                                          ArgNames, nreq);
         f = cast<Function>(returninfo.decl.getCallee());
         has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
         jl_init_function(f, ctx.emission_context.TargetTriple);
@@ -7288,7 +8592,7 @@ static jl_llvm_functions_t
         // case the apply-generic call can re-use the original box for the return
         int retarg = [stmts, nreq]() {
             int retarg = -1;
-            for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+            for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
                 jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
                 if (jl_is_returnnode(stmt)) {
                     stmt = jl_returnnode_value(stmt);
@@ -7309,9 +8613,10 @@ static jl_llvm_functions_t
         }();
 
         std::string wrapName;
-        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
-        (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
+        size_t nparams = jl_nparams(abi);
+        gen_invoke_wrapper(lam, abi, jlrettype, returninfo, nparams, retarg, ctx.is_opaque_closure, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
         // TODO: add attributes: dereferenceable<sizeof(void*) * nreq>
         // TODO: (if needsparams) add attributes: dereferenceable<sizeof(void*) * length(sp)>, readonly, nocapture
@@ -7321,16 +8626,23 @@ static jl_llvm_functions_t
                              GlobalVariable::ExternalLinkage,
                              declarations.specFunctionObject, M);
         jl_init_function(f, ctx.emission_context.TargetTriple);
-        if (needsparams) {
+        if (needsparams)
             jl_name_jlfuncparams_args(ctx.emission_context, f);
-        } else {
+        else
             jl_name_jlfunc_args(ctx.emission_context, f);
-        }
         f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()}));
         returninfo.decl = f;
         declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args";
     }
 
+    if (!params.getContext().shouldDiscardValueNames() && ctx.emission_context.params->debug_info_level >= 2 && lam->def.method && jl_is_method(lam->def.method) && lam->specTypes != (jl_value_t*)jl_emptytuple_type) {
+        ios_t sigbuf;
+        ios_mem(&sigbuf, 0);
+        jl_static_show_func_sig((JL_STREAM*) &sigbuf, (jl_value_t*)abi);
+        f->addFnAttr("julia.fsig", StringRef(sigbuf.buf, sigbuf.size));
+        ios_close(&sigbuf);
+    }
+
     AttrBuilder FnAttrs(ctx.builder.getContext(), f->getAttributes().getFnAttrs());
     AttrBuilder RetAttrs(ctx.builder.getContext(), f->getAttributes().getRetAttrs());
 
@@ -7378,12 +8690,12 @@ static jl_llvm_functions_t
     if (debug_enabled) {
         topfile = dbuilder.createFile(ctx.file, ".");
         DISubroutineType *subrty;
-        if (ctx.emission_context.debug_level <= 1)
-            subrty = debuginfo.jl_di_func_null_sig;
+        if (ctx.emission_context.params->debug_info_level <= 1)
+            subrty = debugcache.jl_di_func_null_sig;
         else if (!specsig)
-            subrty = debuginfo.jl_di_func_sig;
+            subrty = debugcache.jl_di_func_sig;
         else
-            subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
+            subrty = get_specsig_di(ctx, debugcache, jlrettype, abi, dbuilder);
         SP = dbuilder.createFunction(nullptr
                                      ,dbgFuncName      // Name
                                      ,f->getName()     // LinkageName
@@ -7399,7 +8711,7 @@ static jl_llvm_functions_t
                                      );
         topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL);
         f->setSubprogram(SP);
-        if (ctx.emission_context.debug_level >= 2) {
+        if (ctx.emission_context.params->debug_info_level >= 2) {
             const bool AlwaysPreserve = true;
             // Go over all arguments and local variables and initialize their debug information
             for (i = 0; i < nreq; i++) {
@@ -7414,7 +8726,7 @@ static jl_llvm_functions_t
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line
                     // Variable type
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -7425,7 +8737,7 @@ static jl_llvm_functions_t
                     has_sret + nreq + 1,                // Argument number (1-based)
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debugcache, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -7440,7 +8752,7 @@ static jl_llvm_functions_t
                     jl_symbol_name(s),       // Variable name
                     topfile,                 // File
                     toplineno == -1 ? 0 : toplineno, // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false), // Variable type
+                    julia_type_to_di(ctx, debugcache, varinfo.value.typ, &dbuilder, false), // Variable type
                     AlwaysPreserve,          // May be deleted if optimized out
                     DINode::FlagZero         // Flags (TODO: Do we need any)
                     );
@@ -7473,16 +8785,17 @@ static jl_llvm_functions_t
     // step 6. set up GC frame
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
-    Value *world_age_field = get_last_age_field(ctx);
-    if (toplevel || ctx.is_opaque_closure) {
+    Value *world_age_field = NULL;
+    if (ctx.is_opaque_closure) {
+        world_age_field = get_tls_world_age_field(ctx);
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
         last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-            ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
+                   ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
     }
 
     // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
-    auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) {
+    auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled](jl_varinfo_t &varinfo, jl_sym_t *s, int i) {
         jl_value_t *jt = varinfo.value.typ;
         assert(!varinfo.boxroot); // variables shouldn't have memory locs already
         if (varinfo.value.constant) {
@@ -7490,10 +8803,10 @@ static jl_llvm_functions_t
             alloc_def_flag(ctx, varinfo);
             return;
         }
-        else if (varinfo.isArgument && !(specsig && i == (size_t)ctx.vaSlot)) {
-            // if we can unbox it, just use the input pointer
-            if (i != (size_t)ctx.vaSlot && jl_is_concrete_immutable(jt))
-                return;
+        else if (varinfo.isArgument && (!va || ctx.vaSlot == -1 || i != ctx.vaSlot)) {
+            // just use the input pointer, if we have it
+            // (we will need to attach debuginfo later to it)
+            return;
         }
         else if (jl_is_uniontype(jt)) {
             bool allunbox;
@@ -7502,16 +8815,18 @@ static jl_llvm_functions_t
             if (lv) {
                 lv->setName(jl_symbol_name(s));
                 varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
-                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                varinfo.pTIndex = emit_static_alloca(ctx, 1, Align(1));
                 setName(ctx.emission_context, varinfo.pTIndex, "tindex");
+                // TODO: attach debug metadata to this variable
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
-                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                AllocaInst *lv = emit_static_alloca(ctx, 1, Align(1));
                 lv->setName(jl_symbol_name(s));
                 varinfo.pTIndex = lv;
                 varinfo.value.tbaa = NULL;
                 varinfo.value.isboxed = false;
+                // TODO: attach debug metadata to this variable
             }
             if (lv || allunbox)
                 alloc_def_flag(ctx, varinfo);
@@ -7519,48 +8834,35 @@ static jl_llvm_functions_t
                 return;
         }
         else if (deserves_stack(jt)) {
-            bool isboxed;
-            Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
-            assert(!isboxed);
-            assert(!type_is_ghost(vtype) && "constants should already be handled");
-            Value *lv = new AllocaInst(vtype, M->getDataLayout().getAllocaAddrSpace(), NULL, Align(jl_datatype_align(jt)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            if (CountTrackedPointers(vtype).count) {
-                StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
-                SI->insertAfter(ctx.topalloca);
-            }
-            varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
+            auto sizes = split_value_size((jl_datatype_t*)jt);
+            AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, Align(julia_alignment(jt))) : nullptr;
+            AllocaInst *roots = sizes.second > 0 ? emit_static_roots(ctx, sizes.second) : nullptr;
+            if (bits) bits->setName(jl_symbol_name(s));
+            if (roots) roots->setName(StringRef(".roots.") + jl_symbol_name(s));
+            varinfo.value = mark_julia_slot(bits, jt, NULL, ctx.tbaa().tbaa_stack, None);
+            varinfo.inline_roots = roots;
             alloc_def_flag(ctx, varinfo);
             if (debug_enabled && varinfo.dinfo) {
-                assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt);
-                dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
+                assert((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt);
+                dbuilder.insertDeclare(bits ? bits : roots, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
                                        ctx.builder.GetInsertBlock());
             }
             return;
         }
-        if (!varinfo.isArgument || // always need a slot if the variable is assigned
-            specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer)
-            (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
-            i == 0) { // or it is the first argument (which isn't in `argArray`)
-            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
-                jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
-            SI->insertAfter(ctx.topalloca);
-            varinfo.boxroot = av;
-            if (debug_enabled && varinfo.dinfo) {
-                DIExpression *expr;
-                if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) {
-                    expr = dbuilder.createExpression();
-                }
-                else {
-                    SmallVector<uint64_t, 8> addr;
-                    addr.push_back(llvm::dwarf::DW_OP_deref);
-                    expr = dbuilder.createExpression(addr);
-                }
-                dbuilder.insertDeclare(av, varinfo.dinfo, expr,
-                                            topdebugloc,
-                                ctx.builder.GetInsertBlock());
-            }
+        // otherwise give it a boxroot in this function
+        AllocaInst *av = emit_static_roots(ctx, 1);
+        av->setName(jl_symbol_name(s));
+        varinfo.boxroot = av;
+        if (debug_enabled && varinfo.dinfo) {
+            SmallVector<uint64_t, 1> addr;
+            DIExpression *expr;
+            if ((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
+                addr.push_back(llvm::dwarf::DW_OP_deref);
+            expr = dbuilder.createExpression(addr);
+            dbuilder.insertDeclare(av, varinfo.dinfo, expr,
+                                        topdebugloc,
+                            ctx.builder.GetInsertBlock());
         }
     };
 
@@ -7574,7 +8876,7 @@ static jl_llvm_functions_t
             varinfo.usedUndef = false;
             continue;
         }
-        allocate_local(varinfo, s);
+        allocate_local(varinfo, s, (int)i);
     }
 
     std::map<int, int> upsilon_to_phic;
@@ -7583,12 +8885,21 @@ static jl_llvm_functions_t
     // yield to them.
     // Also count ssavalue uses.
     {
-        for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+        for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
 
             auto scan_ssavalue = [&](jl_value_t *val) {
                 if (jl_is_ssavalue(val)) {
-                    ctx.ssavalue_usecount[((jl_ssavalue_t*)val)->id-1] += 1;
+                    size_t ssa_idx = ((jl_ssavalue_t*)val)->id-1;
+                    /*
+                     * We technically allow out of bounds SSAValues in dead IR, so make
+                     * sure to bounds check this here. It's still not *good* to leave
+                     * dead code in the IR, because this will conservatively overcount
+                     * it, but let's at least make it not crash.
+                     */
+                    if (ssa_idx < ctx.ssavalue_usecount.size()) {
+                        ctx.ssavalue_usecount[ssa_idx] += 1;
+                    }
                     return true;
                 }
                 return false;
@@ -7597,7 +8908,7 @@ static jl_llvm_functions_t
 
             if (jl_is_phicnode(stmt)) {
                 jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(values); ++j) {
+                for (size_t j = 0; j < jl_array_nrows(values); ++j) {
                     jl_value_t *val = jl_array_ptr_ref(values, j);
                     assert(jl_is_ssavalue(val));
                     upsilon_to_phic[((jl_ssavalue_t*)val)->id] = i;
@@ -7608,14 +8919,14 @@ static jl_llvm_functions_t
                 vi.used = true;
                 vi.isVolatile = true;
                 vi.value = mark_julia_type(ctx, (Value*)NULL, false, typ);
-                allocate_local(vi, jl_symbol("phic"));
+                allocate_local(vi, jl_symbol("phic"), -1);
             }
         }
     }
 
     // step 8. move args into local variables
     Function::arg_iterator AI = f->arg_begin();
-    std::vector<AttributeSet> attrs(f->arg_size()); // function declaration attributes
+    SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes
 
     auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
         if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
@@ -7628,18 +8939,24 @@ static jl_llvm_functions_t
         ++AI;
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
         jl_cgval_t theArg;
-        if (llvmArgType->isAggregateType()) {
+        if (!isboxed && llvmArgType->isAggregateType()) {
             maybe_mark_argument_dereferenceable(param, argType);
-            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const); // this argument is by-pointer
+            SmallVector<Value*,0> roots;
+            auto tracked = CountTrackedPointers(llvmArgType);
+            if (tracked.count && !tracked.all) {
+                roots = load_gc_roots(ctx, &*AI, tracked.count);
+                ++AI;
+            }
+            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const, roots); // this argument is by-pointer
         }
         else {
-            if (isboxed) // e.g. is-pointer
+            if (isboxed)
                 maybe_mark_argument_dereferenceable(param, argType);
             theArg = mark_julia_type(ctx, Arg, isboxed, argType);
             if (theArg.tbaa == ctx.tbaa().tbaa_immut)
                 theArg.tbaa = ctx.tbaa().tbaa_const;
         }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
         return theArg;
     };
 
@@ -7658,12 +8975,14 @@ static jl_llvm_functions_t
             Type *RT = Arg->getParamStructRetType();
             TypeSize sz = DL.getTypeAllocSize(RT);
             Align al = DL.getPrefTypeAlign(RT);
+            if (al > MAX_ALIGN)
+                al = Align(MAX_ALIGN);
             param.addAttribute(Attribute::NonNull);
             // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
             param.addDereferenceableAttr(sz);
             param.addAlignmentAttr(al);
         }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
     }
     if (returninfo.return_roots) {
         Argument *Arg = &*AI;
@@ -7674,66 +8993,59 @@ static jl_llvm_functions_t
         size_t size = returninfo.return_roots * sizeof(jl_value_t*);
         param.addDereferenceableAttr(size);
         param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
     }
     if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
         Argument *Arg = &*AI;
         ++AI;
         AttrBuilder param(ctx.builder.getContext());
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param);
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
     }
-    for (i = 0; i < nreq; i++) {
+    for (i = 0; i < nreq && i < vinfoslen; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
-        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
-        // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
-        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
-        bool isboxed = deserves_argbox(argType);
-        Type *llvmArgType = NULL;
-        if (i == 0 && ctx.is_opaque_closure) {
-            isboxed = true;
-            llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            argType = (jl_value_t*)jl_any_type;
-        }
-        else {
-            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
-        }
-        if (s == jl_unused_sym) {
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
-                ++AI;
-            continue;
-        }
         jl_varinfo_t &vi = ctx.slots[i];
         jl_cgval_t theArg;
-        if (s == jl_unused_sym || vi.value.constant) {
-            assert(vi.boxroot == NULL);
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
-                ++AI;
+        if (i == 0 && ctx.is_opaque_closure) {
+            // If this is an opaque closure, implicitly load the env and switch
+            // the world age. The specTypes value is wrong for this field, so
+            // this needs to be handled first.
+            // jl_value_t *oc_type = get_oc_type(calltype, rettype);
+            Value *oc_this = decay_derived(ctx, &*AI);
+            ++AI; // both specsig (derived) and fptr1 (box) pass this argument as a distinct argument
+            // Load closure world
+            Value *worldaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, world));
+            jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
+            assert(ctx.world_age_at_entry == nullptr);
+            ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure
+            emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr);
+
+            if (s == jl_unused_sym || vi.value.constant)
+                continue;
+
+            // Load closure env, which is always a boxed value (usually some Tuple) currently
+            Value *envaddr = emit_ptrgep(ctx, oc_this, offsetof(jl_opaque_closure_t, captures));
+            theArg = typed_load(ctx, envaddr, NULL, (jl_value_t*)vi.value.typ,
+                nullptr, nullptr, /*isboxed*/true, AtomicOrdering::NotAtomic, false, sizeof(void*));
         }
         else {
-            // If this is an opaque closure, implicitly load the env and switch
-            // the world age.
-            if (i == 0 && ctx.is_opaque_closure) {
-                // Load closure world
-                Value *oc_this = decay_derived(ctx, &*AI++);
-                Value *argaddr = emit_bitcast(ctx, oc_this, getInt8PtrTy(ctx.builder.getContext()));
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
-
-                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
-
-                // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
-
-                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
-                theArg = update_julia_type(ctx, closure_env, vi.value.typ);
-            }
-            else if (specsig) {
+            jl_value_t *argType = jl_nth_slot_type(abi, i);
+            // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
+            //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
+            bool isboxed = deserves_argbox(argType);
+            Type *llvmArgType = NULL;
+            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+            if (s == jl_unused_sym || vi.value.constant) {
+                assert(vi.boxroot == NULL);
+                if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) {
+                    ++AI;
+                    auto tracked = CountTrackedPointers(llvmArgType);
+                    if (tracked.count && !tracked.all)
+                        ++AI;
+                }
+                continue;
+            }
+            if (specsig) {
                 theArg = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             else {
@@ -7742,18 +9054,18 @@ static jl_llvm_functions_t
                     theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
                 }
                 else {
-                    Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
+                    Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
                     Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
                             ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                             false, vi.value.typ));
                     theArg = mark_julia_type(ctx, load, true, vi.value.typ);
-                    if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
+                    if (debug_enabled && vi.dinfo && !vi.boxroot) {
                         SmallVector<uint64_t, 8> addr;
                         addr.push_back(llvm::dwarf::DW_OP_deref);
                         addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
                         addr.push_back((i - 1) * sizeof(void*));
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
+                        if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
                             addr.push_back(llvm::dwarf::DW_OP_deref);
                         dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
                                         topdebugloc,
@@ -7761,36 +9073,28 @@ static jl_llvm_functions_t
                     }
                 }
             }
+        }
 
-
-            if (vi.boxroot == NULL) {
-                assert(vi.value.V == NULL && "unexpected variable slot created for argument");
-                // keep track of original (possibly boxed) value to avoid re-boxing or moving
-                vi.value = theArg;
-                if (specsig && theArg.V && debug_enabled && vi.dinfo) {
-                    SmallVector<uint64_t, 8> addr;
-                    Value *parg;
-                    if (theArg.ispointer()) {
-                        parg = theArg.V;
-                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
-                            addr.push_back(llvm::dwarf::DW_OP_deref);
-                    }
-                    else {
-                        parg = ctx.builder.CreateAlloca(theArg.V->getType(), NULL, jl_symbol_name(s));
-                        ctx.builder.CreateStore(theArg.V, parg);
-                    }
-                    dbuilder.insertDeclare(parg, vi.dinfo, dbuilder.createExpression(addr),
-                                                topdebugloc,
-                                                ctx.builder.GetInsertBlock());
+        if (vi.boxroot == nullptr) {
+            assert(vi.value.V == nullptr && vi.inline_roots == nullptr && "unexpected variable slot created for argument");
+            // keep track of original (possibly boxed) value to avoid re-boxing or moving
+            vi.value = theArg;
+            if (debug_enabled && vi.dinfo && theArg.V) {
+                if (!theArg.inline_roots.empty() || theArg.ispointer()) {
+                    dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                            topdebugloc, ctx.builder.GetInsertBlock());
+                }
+                else {
+                    dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                                        topdebugloc, ctx.builder.GetInsertBlock());
                 }
-            }
-            else {
-                Value *argp = boxed(ctx, theArg);
-                ctx.builder.CreateStore(argp, vi.boxroot);
             }
         }
+        else {
+            Value *argp = boxed(ctx, theArg);
+            ctx.builder.CreateStore(argp, vi.boxroot);
+        }
     }
-
     // step 9. allocate rest argument
     CallInst *restTuple = NULL;
     if (va && ctx.vaSlot != -1) {
@@ -7799,22 +9103,22 @@ static jl_llvm_functions_t
             assert(vi.boxroot == NULL);
         }
         else if (specsig) {
-            ctx.nvargs = jl_nparams(lam->specTypes) - nreq;
-            SmallVector<jl_cgval_t> vargs(ctx.nvargs);
-            for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
-                jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+            ctx.nvargs = jl_nparams(abi) - nreq;
+            SmallVector<jl_cgval_t, 0> vargs(ctx.nvargs);
+            for (size_t i = nreq; i < jl_nparams(abi); ++i) {
+                jl_value_t *argType = jl_nth_slot_type(abi, i);
                 // n.b. specTypes is required to be a datatype by construction for specsig
                 bool isboxed = deserves_argbox(argType);
                 Type *llvmArgType = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
-                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs.data());
+                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
                 restTuple = emit_jlcall(ctx, jltuple_func, Constant::getNullValue(ctx.types().T_prjlvalue),
-                    vargs.data(), ctx.nvargs, julia_call);
+                    vargs, ctx.nvargs, julia_call);
                 jl_cgval_t tuple = mark_julia_type(ctx, restTuple, true, vi.value.typ);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
@@ -7825,10 +9129,8 @@ static jl_llvm_functions_t
             restTuple =
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
-                          ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
-                          ctx.builder.CreateSub(argCount,
-                                  ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
+                          emit_ptrgep(ctx, argArray, (nreq - 1) * sizeof(jl_value_t*)),
+                          ctx.builder.CreateSub(argCount, ctx.builder.getInt32(nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
             ctx.builder.CreateStore(restTuple, vi.boxroot);
         }
@@ -7845,8 +9147,8 @@ static jl_llvm_functions_t
         return (!jl_is_submodule(mod, jl_base_module) &&
                 !jl_is_submodule(mod, jl_core_module));
     };
-    auto in_tracked_path = [] (StringRef file) {
-        return jl_options.tracked_path != NULL && file.startswith(jl_options.tracked_path);
+    auto in_tracked_path = [] (StringRef file) { // falls within an explicitly set file or directory
+        return jl_options.tracked_path != NULL && file.starts_with(jl_options.tracked_path);
     };
     bool mod_is_user_mod = in_user_mod(ctx.module);
     bool mod_is_tracked = in_tracked_path(ctx.file);
@@ -7854,89 +9156,115 @@ static jl_llvm_functions_t
         DebugLoc loc;
         StringRef file;
         ssize_t line;
+        ssize_t line0; // if this represents pc=1, then also cover the entry to the function (pc=0)
         bool is_user_code;
-        bool is_tracked; // falls within an explicitly set file or directory
-        unsigned inlined_at;
-        bool operator ==(const DebugLineTable &other) const {
-            return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.is_tracked == is_tracked && other.inlined_at == inlined_at;
-        }
+        int32_t edgeid;
+        bool sameframe(const DebugLineTable &other) const {
+            // detect if the line info for this frame is unchanged (equivalent to loc == other.loc ignoring the inlined_at field)
+            return other.edgeid == edgeid && other.line == line;
+        };
     };
-    std::vector<DebugLineTable> linetable;
-    { // populate the linetable data format
-        assert(jl_is_array(src->linetable));
-        size_t nlocs = jl_array_len(src->linetable);
-        std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
-        linetable.resize(nlocs + 1);
-        DebugLineTable &topinfo = linetable[0];
-        topinfo.file = ctx.file;
-        topinfo.line = toplineno;
-        topinfo.is_user_code = mod_is_user_mod;
-        topinfo.is_tracked = mod_is_tracked;
-        topinfo.inlined_at = 0;
-        topinfo.loc = topdebugloc;
-        for (size_t i = 0; i < nlocs; i++) {
-            // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
-            jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
-            DebugLineTable &info = linetable[i + 1];
-            assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-            jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
-            jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
-            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
-            info.line = jl_unbox_int32(jl_fieldref(locinfo, 3));
-            info.inlined_at = jl_unbox_int32(jl_fieldref(locinfo, 4));
-            assert(info.inlined_at <= i);
-            info.file = jl_symbol_name(filesym);
-            if (info.file.empty())
-                info.file = "<missing>";
-            if (module == ctx.module)
-                info.is_user_code = mod_is_user_mod;
-            else
-                info.is_user_code = in_user_mod(module);
-            info.is_tracked = in_tracked_path(info.file);
-            if (debug_enabled) {
-                StringRef fname;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    fname = jl_symbol_name((jl_sym_t*)method);
-                if (fname.empty())
-                    fname = "macro expansion";
-                if (info.inlined_at == 0 && info.file == ctx.file) { // if everything matches, emit a toplevel line number
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+    DebugLineTable topinfo;
+    topinfo.file = ctx.file;
+    topinfo.line = toplineno;
+    topinfo.line0 = 0;
+    topinfo.is_user_code = mod_is_user_mod;
+    topinfo.loc = topdebugloc;
+    topinfo.edgeid = 0;
+    std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
+    SmallVector<DebugLineTable, 0> prev_lineinfo, new_lineinfo;
+    auto update_lineinfo = [&] (size_t pc) {
+        std::function<bool(jl_debuginfo_t*, jl_value_t*, size_t, size_t)> append_lineinfo =
+                [&] (jl_debuginfo_t *debuginfo, jl_value_t *func, size_t to, size_t pc) -> bool {
+            while (1) {
+                if (!jl_is_symbol(debuginfo->def)) // this is a path
+                    func = debuginfo->def; // this is inlined
+                struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                size_t i = lineidx.line;
+                if (i < 0) // pc out of range: broken debuginfo?
+                    return false;
+                if (i == 0 && lineidx.to == 0) // no update
+                    return false;
+                if (pc > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+                    // indirection node
+                    if (!append_lineinfo(debuginfo->linetable, func, to, i))
+                        return false; // no update
                 }
-                else { // otherwise, describe this as an inlining frame
-                    DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
-                    if (inl_SP == NULL) {
-                        DIFile *difile = dbuilder.createFile(info.file, ".");
-                        inl_SP = dbuilder.createFunction(difile
-                                                     ,std::string(fname) + ";" // Name
-                                                     ,fname            // LinkageName
-                                                     ,difile           // File
-                                                     ,0                // LineNo
-                                                     ,debuginfo.jl_di_func_null_sig // Ty
-                                                     ,0                // ScopeLine
-                                                     ,DINode::FlagZero // Flags
-                                                     ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
-                                                     ,nullptr          // Template Parameters
-                                                     ,nullptr          // Template Declaration
-                                                     ,nullptr          // ThrownTypes
-                                                     );
+                else {
+                    // actual node
+                    DebugLineTable info;
+                    info.edgeid = to;
+                    jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+                    if (modu == NULL)
+                        modu = ctx.module;
+                    info.file = jl_debuginfo_file1(debuginfo);
+                    info.line = i;
+                    info.line0 = 0;
+                    if (pc == 1) {
+                        struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+                        assert(lineidx.to == 0 && lineidx.pc == 0);
+                        if (lineidx.line > 0 && info.line != lineidx.line)
+                            info.line0 = lineidx.line;
+                    }
+                    if (info.file.empty())
+                        info.file = "<missing>";
+                    if (modu == ctx.module)
+                        info.is_user_code = mod_is_user_mod;
+                    else
+                        info.is_user_code = in_user_mod(modu);
+                    if (debug_enabled) {
+                        StringRef fname = jl_debuginfo_name(func);
+                        if (new_lineinfo.empty() && info.file == ctx.file) { // if everything matches, emit a toplevel line number
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
+                        }
+                        else { // otherwise, describe this as an inlining frame
+                            DebugLoc inl_loc = new_lineinfo.empty() ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : new_lineinfo.back().loc;
+                            DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
+                            if (inl_SP == NULL) {
+                                DIFile *difile = dbuilder.createFile(info.file, ".");
+                                inl_SP = dbuilder.createFunction(difile
+                                                             ,std::string(fname) + ";" // Name
+                                                             ,fname            // LinkageName
+                                                             ,difile           // File
+                                                             ,0                // LineNo
+                                                             ,debugcache.jl_di_func_null_sig // Ty
+                                                             ,0                // ScopeLine
+                                                             ,DINode::FlagZero // Flags
+                                                             ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
+                                                             ,nullptr          // Template Parameters
+                                                             ,nullptr          // Template Declaration
+                                                             ,nullptr          // ThrownTypes
+                                                             );
+                            }
+                            info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                        }
                     }
-                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable.at(info.inlined_at).loc;
-                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
+                    new_lineinfo.push_back(info);
                 }
+                to = lineidx.to;
+                if (to == 0)
+                    return true;
+                pc = lineidx.pc;
+                debuginfo = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, to - 1);
+                func = NULL;
             }
-        }
-    }
+        };
+        prev_lineinfo.resize(0);
+        std::swap(prev_lineinfo, new_lineinfo);
+        bool updated = append_lineinfo(src->debuginfo, (jl_value_t*)lam, 0, pc + 1);
+        if (!updated)
+            std::swap(prev_lineinfo, new_lineinfo);
+        else
+            assert(new_lineinfo.size() > 0);
+        return updated;
+    };
 
-    std::vector<MDNode*> aliasscopes;
+    SmallVector<MDNode*, 0> aliasscopes;
     MDNode* current_aliasscope = nullptr;
-    std::vector<Metadata*> scope_stack;
-    std::vector<MDNode*> scope_list_stack;
+    SmallVector<Metadata*, 0> scope_stack;
+    SmallVector<MDNode*, 0> scope_list_stack;
     {
-        size_t nstmts = jl_array_len(stmts);
+        size_t nstmts = jl_array_nrows(stmts);
         aliasscopes.resize(nstmts + 1, nullptr);
         MDBuilder mbuilder(ctx.builder.getContext());
         MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name);
@@ -7966,23 +9294,16 @@ static jl_llvm_functions_t
 
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
-    // step 11a. For top-level code, load the world age
-    if (toplevel && !ctx.is_opaque_closure) {
-        LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
-            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
-        world->setOrdering(AtomicOrdering::Acquire);
-        ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
-    }
-
-    // step 11b. Emit the entry safepoint
+    // step 11a. Emit the entry safepoint
     if (JL_FEAT_TEST(ctx, safepoint_on_entry))
         emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
 
-    // step 11c. Do codegen in control flow order
-    std::vector<int> workstack;
+    // step 11b. Do codegen in control flow order
+    SmallVector<int, 0> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
     int cursor = 0;
+    int current_label = 0;
     auto find_next_stmt = [&] (int seq_next) {
         // new style ir is always in dominance order, but frontend IR might not be
         // `seq_next` is the next statement we want to emit
@@ -7999,6 +9320,7 @@ static jl_llvm_functions_t
             workstack.pop_back();
             auto nextbb = BB.find(item + 1);
             if (nextbb == BB.end()) {
+                // Not a BB
                 cursor = item;
                 return;
             }
@@ -8009,62 +9331,91 @@ static jl_llvm_functions_t
             seq_next = -1;
             // if this BB is non-empty, we've visited it before so skip it
             if (!nextbb->second->getTerminator()) {
+                // New BB
                 ctx.builder.SetInsertPoint(nextbb->second);
                 cursor = item;
+                current_label = item;
                 return;
             }
         }
         cursor = -1;
     };
 
+    // If a pkgimage or sysimage is being generated, disable tracking.
+    // This means sysimage build or pkgimage precompilation workloads aren't tracked.
     auto do_coverage = [&] (bool in_user_code, bool is_tracked) {
-        return (coverage_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (coverage_mode == JL_LOG_ALL ||
                 (in_user_code && coverage_mode == JL_LOG_USER) ||
-                (is_tracked && coverage_mode == JL_LOG_PATH));
+                (is_tracked && coverage_mode == JL_LOG_PATH)));
     };
     auto do_malloc_log = [&] (bool in_user_code, bool is_tracked) {
-        return (malloc_log_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (malloc_log_mode == JL_LOG_ALL ||
                 (in_user_code && malloc_log_mode == JL_LOG_USER) ||
-                (is_tracked && malloc_log_mode == JL_LOG_PATH));
+                (is_tracked && malloc_log_mode == JL_LOG_PATH)));
     };
-    std::vector<unsigned> current_lineinfo, new_lineinfo;
-    auto coverageVisitStmt = [&] (size_t dbg) {
-        if (dbg == 0 || dbg >= linetable.size())
-            return;
-        // Compute inlining stack for current line, inner frame first
-        while (dbg) {
-            new_lineinfo.push_back(dbg);
-            dbg = linetable.at(dbg).inlined_at;
-        }
+    auto coverageVisitStmt = [&] () {
         // Visit frames which differ from previous statement as tracked in
-        // current_lineinfo (tracked outer frame first).
-        current_lineinfo.resize(new_lineinfo.size(), 0);
+        // prev_lineinfo (tracked outer frame first).
+        size_t dbg;
         for (dbg = 0; dbg < new_lineinfo.size(); dbg++) {
-            unsigned newdbg = new_lineinfo[new_lineinfo.size() - dbg - 1];
-            if (newdbg != current_lineinfo[dbg]) {
-                current_lineinfo[dbg] = newdbg;
-                const auto &info = linetable.at(newdbg);
-                if (do_coverage(info.is_user_code, info.is_tracked))
-                    coverageVisitLine(ctx, info.file, info.line);
+            if (dbg >= prev_lineinfo.size() || !new_lineinfo[dbg].sameframe(prev_lineinfo[dbg]))
+                break;
+        }
+        for (; dbg < new_lineinfo.size(); dbg++) {
+            const auto &newdbg = new_lineinfo[dbg];
+            bool is_tracked = in_tracked_path(newdbg.file);
+            if (do_coverage(newdbg.is_user_code, is_tracked)) {
+                if (newdbg.line0 != 0 && (dbg >= prev_lineinfo.size() || newdbg.edgeid != prev_lineinfo[dbg].edgeid || newdbg.line0 != prev_lineinfo[dbg].line))
+                    coverageVisitLine(ctx, newdbg.file, newdbg.line0);
+                coverageVisitLine(ctx, newdbg.file, newdbg.line);
             }
         }
-        new_lineinfo.clear();
     };
-    auto mallocVisitStmt = [&] (unsigned dbg, Value *sync) {
-        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || dbg == 0) {
+    auto mallocVisitStmt = [&] (Value *sync, bool have_dbg_update) {
+        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || !have_dbg_update) {
+            // TODD: add || new_lineinfo[0].sameframe(prev_lineinfo[0])) above, but currently this breaks the test for it (by making an optimization better)
             if (do_malloc_log(true, mod_is_tracked) && sync)
                 ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync});
             return;
         }
-        while (linetable.at(dbg).inlined_at)
-            dbg = linetable.at(dbg).inlined_at;
-        mallocVisitLine(ctx, ctx.file, linetable.at(dbg).line, sync);
+        mallocVisitLine(ctx, new_lineinfo[0].file, new_lineinfo[0].line, sync);
     };
     if (coverage_mode != JL_LOG_NONE) {
         // record all lines that could be covered
-        for (const auto &info : linetable)
-            if (do_coverage(info.is_user_code, info.is_tracked))
-                jl_coverage_alloc_line(info.file, info.line);
+        std::function<void(jl_debuginfo_t *debuginfo, jl_value_t *func)> record_line_exists = [&](jl_debuginfo_t *debuginfo, jl_value_t *func) {
+            if (!jl_is_symbol(debuginfo->def)) // this is a path
+                func = debuginfo->def; // this is inlined
+            for (size_t i = 0; i < jl_svec_len(debuginfo->edges); i++) {
+                jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, i);
+                record_line_exists(edge, NULL);
+            }
+            while ((jl_value_t*)debuginfo->linetable != jl_nothing)
+                debuginfo = debuginfo->linetable;
+            jl_module_t *modu = func ? jl_debuginfo_module1(func) : NULL;
+            if (modu == NULL)
+                modu = ctx.module;
+            StringRef file = jl_debuginfo_file1(debuginfo);
+            if (file.empty())
+                file = "<missing>";
+            bool is_user_code;
+            if (modu == ctx.module)
+                is_user_code = mod_is_user_mod;
+            else
+                is_user_code = in_user_mod(modu);
+            bool is_tracked = in_tracked_path(file);
+            if (do_coverage(is_user_code, is_tracked)) {
+                for (size_t pc = 0; 1; pc++) {
+                    struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, pc);
+                    if (lineidx.line == -1)
+                        break;
+                    if (lineidx.line > 0)
+                        jl_coverage_alloc_line(file, lineidx.line);
+                }
+            }
+        };
+        record_line_exists(src->debuginfo, (jl_value_t*)lam);
     }
 
     come_from_bb[0] = ctx.builder.GetInsertBlock();
@@ -8087,14 +9438,13 @@ static jl_llvm_functions_t
                 // targets.
                 if (i + 2 <= stmtslen)
                     branch_targets.insert(i + 2);
-            } else if (jl_is_expr(stmt)) {
-                if (((jl_expr_t*)stmt)->head == jl_enter_sym) {
-                    branch_targets.insert(i + 1);
-                    if (i + 2 <= stmtslen)
-                        branch_targets.insert(i + 2);
-                    int dest = jl_unbox_long(jl_array_ptr_ref(((jl_expr_t*)stmt)->args, 0));
-                    branch_targets.insert(dest);
-                }
+            } else if (jl_is_enternode(stmt)) {
+                branch_targets.insert(i + 1);
+                if (i + 2 <= stmtslen)
+                    branch_targets.insert(i + 2);
+                size_t catch_dest = jl_enternode_catch_dest(stmt);
+                if (catch_dest)
+                    branch_targets.insert(catch_dest);
             } else if (jl_is_gotonode(stmt)) {
                 int dest = jl_gotonode_label(stmt);
                 branch_targets.insert(dest);
@@ -8102,8 +9452,8 @@ static jl_llvm_functions_t
                     branch_targets.insert(i + 2);
             } else if (jl_is_phinode(stmt)) {
                 jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(edges); ++j) {
-                    size_t edge = ((int32_t*)jl_array_data(edges))[j];
+                for (size_t j = 0; j < jl_array_nrows(edges); ++j) {
+                    size_t edge = jl_array_data(edges, int32_t)[j];
                     if (edge == i)
                         branch_targets.insert(i + 1);
                 }
@@ -8117,30 +9467,23 @@ static jl_llvm_functions_t
         BB[label] = bb;
     }
 
+    new_lineinfo.push_back(topinfo);
     Value *sync_bytes = nullptr;
     if (do_malloc_log(true, mod_is_tracked))
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
-    { // coverage for the function definition line number
-        const auto &topinfo = linetable.at(0);
-        if (linetable.size() > 1) {
-            if (topinfo == linetable.at(1))
-                current_lineinfo.push_back(1);
-        }
-        if (do_coverage(topinfo.is_user_code, topinfo.is_tracked))
-            coverageVisitLine(ctx, topinfo.file, topinfo.line);
-    }
+    // coverage for the function definition line number (topinfo)
+    coverageVisitStmt();
 
     find_next_stmt(0);
     while (cursor != -1) {
-        int32_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[cursor];
-        if (debuginfoloc > 0) {
+        bool have_dbg_update = update_lineinfo(cursor);
+        if (have_dbg_update) {
             if (debug_enabled)
-                ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc);
-            coverageVisitStmt(debuginfoloc);
+                ctx.builder.SetCurrentDebugLocation(new_lineinfo.back().loc);
+            coverageVisitStmt();
         }
         ctx.noalias().aliasscope.current = aliasscopes[cursor];
         jl_value_t *stmt = jl_array_ptr_ref(stmts, cursor);
-        jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
         if (jl_is_returnnode(stmt)) {
             jl_value_t *retexpr = jl_returnnode_value(stmt);
             if (retexpr == NULL) {
@@ -8193,7 +9536,7 @@ static jl_llvm_functions_t
                         // also need to account for the possibility the return object is boxed
                         // and avoid / skip copying it to the stack
                         isboxed_union = ctx.builder.CreateICmpNE(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                         data = ctx.builder.CreateSelect(isboxed_union, retvalinfo.Vboxed, data);
                     }
@@ -8202,7 +9545,7 @@ static jl_llvm_functions_t
                     // treat this as a simple boxed returninfo
                     //assert(retvalinfo.isboxed);
                     tindex = compute_tindex_unboxed(ctx, retvalinfo, jlrettype);
-                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                     data = boxed(ctx, retvalinfo);
                     sret = NULL;
                 }
@@ -8216,37 +9559,39 @@ static jl_llvm_functions_t
                 break;
             }
             if (sret) {
-                if (retvalinfo.ispointer()) {
-                    if (returninfo.return_roots) {
-                        Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ);
-                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
+                Align align(returninfo.union_align);
+                if (!returninfo.return_roots && !retvalinfo.inline_roots.empty()) {
+                    assert(retvalinfo.V == nullptr);
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    split_value_into(ctx, retvalinfo, align, nullptr, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), sret, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (returninfo.return_roots) {
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    Value *return_roots = f->arg_begin() + 1;
+                    split_value_into(ctx, retvalinfo, align, sret, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), return_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (retvalinfo.ispointer()) {
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
                         emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
-                                    jl_datatype_size(jlrettype), julia_alignment(jlrettype));
+                                    jl_datatype_size(jlrettype), align, align);
                     }
                     else { // must be jl_returninfo_t::Union
                         emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
                     }
                 }
                 else {
-                    Type *store_ty = retvalinfo.V->getType();
-                    Type *dest_ty = store_ty->getPointerTo();
-                    Value *Val = retvalinfo.V;
-                    if (returninfo.return_roots) {
-                        assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty);
-                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
-                    if (dest_ty != sret->getType())
-                        sret = emit_bitcast(ctx, sret, dest_ty);
-                    ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ)));
+                    ctx.builder.CreateAlignedStore(retvalinfo.V, sret, align);
                     assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation
                 }
             }
 
-            mallocVisitStmt(debuginfoloc, sync_bytes);
-            if (toplevel || ctx.is_opaque_closure)
+            mallocVisitStmt(sync_bytes, have_dbg_update);
+            // N.B.: For toplevel thunks, we expect world age restore to be handled
+            // by the interpreter which invokes us.
+            if (ctx.is_opaque_closure)
                 ctx.builder.CreateStore(last_age, world_age_field);
             assert(type_is_ghost(retty) || returninfo.cc == jl_returninfo_t::SRet ||
                 retval->getType() == ctx.f->getReturnType());
@@ -8257,7 +9602,12 @@ static jl_llvm_functions_t
         if (jl_is_gotonode(stmt)) {
             int lname = jl_gotonode_label(stmt);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateBr(BB[lname]);
+            auto br = ctx.builder.CreateBr(BB[lname]);
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(lname - 1);
             continue;
         }
@@ -8270,44 +9620,84 @@ static jl_llvm_functions_t
             jl_value_t *cond = jl_gotoifnot_cond(stmt);
             int lname = jl_gotoifnot_label(stmt);
             Value *isfalse = emit_condition(ctx, cond, "if");
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
             workstack.push_back(lname - 1);
             BasicBlock *ifnot = BB[lname];
             BasicBlock *ifso = BB[cursor+2];
+            Instruction *br;
             if (ifnot == ifso)
-                ctx.builder.CreateBr(ifnot);
+                br = ctx.builder.CreateBr(ifnot);
             else
-                ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+                br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(cursor + 1);
             continue;
         }
-        else if (expr && expr->head == jl_enter_sym) {
-            jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
-
-            assert(jl_is_long(args[0]));
-            int lname = jl_unbox_long(args[0]);
-            // Save exception stack depth at enter for use in pop_exception
-            Value *excstack_state =
-                ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
-            assert(!ctx.ssavalue_assigned.at(cursor));
-            ctx.SAvalues.at(cursor) = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
-            ctx.ssavalue_assigned.at(cursor) = true;
-            CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
-            // We need to mark this on the call site as well. See issue #6757
-            sj->setCanReturnTwice();
-            Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-            BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
-            BasicBlock *handlr = NULL;
-            handlr = BB[lname];
-            workstack.push_back(lname - 1);
-            come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateCondBr(isz, tryblk, handlr);
-            ctx.builder.SetInsertPoint(tryblk);
+        else if (jl_is_enternode(stmt)) {
+            int lname = jl_enternode_catch_dest(stmt);
+            if (lname) {
+                // Save exception stack depth at enter for use in pop_exception
+                Value *excstack_state =
+                    ctx.builder.CreateCall(prepare_call(jl_excstack_state_func), {get_current_task(ctx)});
+                assert(!ctx.ssavalue_assigned[cursor]);
+                ctx.SAvalues[cursor] = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
+                ctx.ssavalue_assigned[cursor] = true;
+                // Actually enter the exception frame
+                auto ct = get_current_task(ctx);
+                CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func), {ct});
+                // We need to mark this on the call site as well. See issue #6757
+                sj->setCanReturnTwice();
+                Value *isz = ctx.builder.CreateICmpEQ(ctx.builder.CreateExtractValue(sj, 0), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                Value *ehbuf = ctx.builder.CreateExtractValue(sj, 1);
+                BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
+                BasicBlock *catchpop = BasicBlock::Create(ctx.builder.getContext(), "catch_pop", f);
+                BasicBlock *handlr = NULL;
+                handlr = BB[lname];
+                workstack.push_back(lname - 1);
+                come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
+                ctx.builder.CreateCondBr(isz, tryblk, catchpop);
+                ctx.builder.SetInsertPoint(catchpop);
+                {
+                    ctx.builder.CreateCall(prepare_call(jlleave_func), {get_current_task(ctx), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)});
+                    ctx.builder.CreateBr(handlr);
+                }
+                ctx.builder.SetInsertPoint(tryblk);
+                auto ehptr = emit_ptrgep(ctx, ct, offsetof(jl_task_t, eh));
+                ctx.builder.CreateAlignedStore(ehbuf, ehptr, ctx.types().alignof_ptr);
+            }
+            // For the two-arg version of :enter, twiddle the scope
+            if (jl_enternode_scope(stmt)) {
+                jl_cgval_t scope = emit_expr(ctx, jl_enternode_scope(stmt));
+                if (scope.typ == jl_bottom_type) {
+                    // Probably dead code, but let's be loud about it in case it isn't, so we fail
+                    // at the point of the miscompile, rather than later when something attempts to
+                    // read the scope.
+                    emit_error(ctx, "(INTERNAL ERROR): Attempted to execute EnterNode with bad scope");
+                    find_next_stmt(-1);
+                    continue;
+                }
+                Value *scope_boxed = boxed(ctx, scope);
+                Value *scope_ptr = get_scope_field(ctx);
+                LoadInst *current_scope = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, scope_ptr, ctx.types().alignof_ptr);
+                StoreInst *scope_store = ctx.builder.CreateAlignedStore(scope_boxed, scope_ptr, ctx.types().alignof_ptr);
+                jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(current_scope);
+                jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe).decorateInst(scope_store);
+                // GC preserve the scope, since it is not rooted in the `jl_handler_t *`
+                // and may be removed from jl_current_task by any nested block and then
+                // replaced later
+                Value *scope_token = ctx.builder.CreateCall(prepare_call(gc_preserve_begin_func), {scope_boxed});
+                ctx.scope_restore[cursor] = std::make_pair(scope_token, current_scope);
+            }
         }
         else {
             emit_stmtpos(ctx, stmt, cursor);
-            mallocVisitStmt(debuginfoloc, nullptr);
+            mallocVisitStmt(nullptr, have_dbg_update);
         }
         find_next_stmt(cursor + 1);
     }
@@ -8323,21 +9713,22 @@ static jl_llvm_functions_t
 
     // Codegen Phi nodes
     std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
-    std::vector<llvm::PHINode*> ToDelete;
+    SmallVector<llvm::PHINode*, 0> ToDelete;
     for (auto &tup : ctx.PhiNodes) {
         jl_cgval_t phi_result;
         PHINode *VN;
         jl_value_t *r;
         AllocaInst *dest;
+        SmallVector<PHINode*,0> roots;
         BasicBlock *PhiBB;
-        std::tie(phi_result, PhiBB, dest, VN, r) = tup;
+        std::tie(phi_result, PhiBB, dest, VN, roots, r) = tup;
         jl_value_t *phiType = phi_result.typ;
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1);
         PHINode *TindexN = cast_or_null<PHINode>(phi_result.TIndex);
         DenseSet<BasicBlock*> preds;
-        for (size_t i = 0; i < jl_array_len(edges); ++i) {
-            size_t edge = ((int32_t*)jl_array_data(edges))[i];
+        for (size_t i = 0; i < jl_array_nrows(edges); ++i) {
+            size_t edge = jl_array_data(edges, int32_t)[i];
             jl_value_t *value = jl_array_ptr_ref(values, i);
             // This edge value is undef, handle it the same as if the edge wasn't listed at all
             if (!value)
@@ -8356,7 +9747,7 @@ static jl_llvm_functions_t
                 // Only codegen this branch once for each PHI (the expression must be the same on all branches)
 #ifndef NDEBUG
                 for (size_t j = 0; j < i; ++j) {
-                    size_t j_edge = ((int32_t*)jl_array_data(edges))[j];
+                    size_t j_edge = jl_array_data(edges, int32_t)[j];
                     if (j_edge == edge) {
                         assert(jl_egal(value, jl_array_ptr_ref(values, j)));
                     }
@@ -8370,9 +9761,7 @@ static jl_llvm_functions_t
                 // Can't use `llvm::SplitCriticalEdge` here because
                 // we may have invalid phi nodes in the destination.
                 BasicBlock *NewBB = BasicBlock::Create(terminator->getContext(),
-                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge");
-                Function::iterator FBBI = FromBB->getIterator();
-                ctx.f->getBasicBlockList().insert(++FBBI, NewBB); // insert after existing block
+                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge", FromBB->getParent(), FromBB->getNextNode()); // insert after existing block
                 terminator->replaceSuccessorWith(PhiBB, NewBB);
                 DebugLoc Loc = terminator->getDebugLoc();
                 terminator = BranchInst::Create(PhiBB);
@@ -8390,6 +9779,7 @@ static jl_llvm_functions_t
                 val = mark_julia_const(ctx, val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
             if (!jl_is_uniontype(phiType) || !TindexN) {
                 if (VN) {
+                    assert(roots.empty() && !dest);
                     Value *V;
                     if (val.typ == (jl_value_t*)jl_bottom_type) {
                         V = undef_value_for_type(VN->getType());
@@ -8410,14 +9800,34 @@ static jl_llvm_functions_t
                     VN->addIncoming(V, ctx.builder.GetInsertBlock());
                     assert(!TindexN);
                 }
-                else if (dest && val.typ != (jl_value_t*)jl_bottom_type) {
+                else if ((dest || !roots.empty()) && val.typ != (jl_value_t*)jl_bottom_type) {
                     // must be careful to emit undef here (rather than a bitcast or
                     // load of val) if the runtime type of val isn't phiType
+                    auto tracked = split_value_size((jl_datatype_t*)phiType).second;
                     Value *isvalid = emit_isa_and_defined(ctx, val, phiType);
-                    emit_guarded_test(ctx, isvalid, nullptr, [&] {
-                        emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, julia_alignment(phiType));
-                        return nullptr;
+                    assert(roots.size() == tracked && isvalid != nullptr);
+                    SmallVector<Value*,0> incomingroots(0);
+                    if (tracked)
+                        incomingroots.resize(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                    emit_guarded_test(ctx, isvalid, incomingroots, [&] {
+                        jl_cgval_t typedval = update_julia_type(ctx, val, phiType);
+                        SmallVector<Value*,0> mayberoots(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                        if (typedval.typ != jl_bottom_type) {
+                            Align align(julia_alignment(phiType));
+                            if (tracked)
+                                split_value_into(ctx, typedval, align, dest, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), mayberoots);
+                            else
+                                emit_unbox_store(ctx, typedval, dest, ctx.tbaa().tbaa_stack, align);
+                        }
+                        return mayberoots;
                     });
+                    for (size_t nr = 0; nr < tracked; nr++)
+                        roots[nr]->addIncoming(incomingroots[nr], ctx.builder.GetInsertBlock());
+                }
+                else if (!roots.empty()) {
+                    Value *V = Constant::getNullValue(ctx.types().T_prjlvalue);
+                    for (size_t nr = 0; nr < roots.size(); nr++)
+                        roots[nr]->addIncoming(V, ctx.builder.GetInsertBlock());
                 }
             }
             else {
@@ -8426,23 +9836,24 @@ static jl_llvm_functions_t
                 // `V` is always initialized when it is used.
                 // Ref https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96629
                 Value *V = nullptr;
+                assert(roots.empty());
                 if (val.typ == (jl_value_t*)jl_bottom_type) {
                     if (VN)
                         V = undef_value_for_type(VN->getType());
                     RTindex = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 else if (jl_is_concrete_type(val.typ) || val.constant) {
-                    size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
+                    size_t tindex = get_box_tindex((jl_datatype_t*)(val.constant ? jl_typeof(val.constant) : val.typ), phiType);
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
-                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
                     }
                     else {
                         if (VN)
                             V = Constant::getNullValue(ctx.types().T_prjlvalue);
                         if (dest)
-                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, julia_alignment(val.typ));
+                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, Align(julia_alignment(val.typ)));
                         RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex);
                     }
                 }
@@ -8458,7 +9869,7 @@ static jl_llvm_functions_t
                         if (dest) {
                             // If dest is not set, this is a ghost union, the recipient of which
                             // is often not prepared to handle a boxed representation of the ghost.
-                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                         }
                         new_union.TIndex = RTindex;
                     }
@@ -8466,8 +9877,8 @@ static jl_llvm_functions_t
                         V = new_union.Vboxed ? new_union.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
                     if (dest) { // basically, if !ghost union
                         if (new_union.Vboxed != nullptr) {
-                            Value *isboxed = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
-                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            Value *isboxed = ctx.builder.CreateICmpNE( // if UNION_BOX_MARKER is set, we won't select this slot anyways
+                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                             skip = skip ? ctx.builder.CreateOr(isboxed, skip) : isboxed;
                         }
@@ -8512,20 +9923,20 @@ static jl_llvm_functions_t
                 Value *undef = undef_value_for_type(VN->getType());
                 VN->addIncoming(undef, FromBB);
                 if (TindexN) // let the runtime / optimizer know this is unknown / boxed / null, so that it won't try to union_move / copy it later
-                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             }
             if (TindexN)
                 TindexN->addIncoming(RTindex, FromBB);
-            if (dest) {
+            if (dest)
                 ctx.builder.CreateLifetimeStart(dest);
-                if (CountTrackedPointers(dest->getAllocatedType()).count)
-                    ctx.builder.CreateStore(Constant::getNullValue(dest->getAllocatedType()), dest);
-            }
+            for (size_t nr = 0; nr < roots.size(); nr++)
+                roots[nr]->addIncoming(Constant::getNullValue(ctx.types().T_prjlvalue), FromBB);
             ctx.builder.ClearInsertionPoint();
         }
     }
 
     for (PHINode *PN : ToDelete) {
+        // This basic block is statically unreachable, thus so is this PHINode
         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
         PN->eraseFromParent();
     }
@@ -8541,21 +9952,10 @@ static jl_llvm_functions_t
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
                     Function *F = call->getCalledFunction();
-                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().startswith("julia.") || &I == restTuple)) {
+                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().starts_with("julia.") || &I == restTuple)) {
                         I.setDebugLoc(topdebugloc);
                     }
                 }
-                if (toplevel && !ctx.is_opaque_closure && !in_prologue) {
-                    // we're at toplevel; insert an atomic barrier between every instruction
-                    // TODO: inference is invalid if this has any effect (which it often does)
-                    LoadInst *world = new LoadInst(ctx.types().T_size,
-                        prepare_global_in(jl_Module, jlgetworld_global), Twine(),
-                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
-                    world->setOrdering(AtomicOrdering::Acquire);
-                    StoreInst *store_world = new StoreInst(world, world_age_field,
-                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
-                    (void)store_world;
-                }
             }
             if (&I == &prologue_end)
                 in_prologue = false;
@@ -8566,15 +9966,18 @@ static jl_llvm_functions_t
 
     if (ctx.vaSlot > 0) {
         // remove VA allocation if we never referenced it
+        assert(ctx.slots[ctx.vaSlot].isSA && ctx.slots[ctx.vaSlot].isArgument);
         Instruction *root = cast_or_null<Instruction>(ctx.slots[ctx.vaSlot].boxroot);
         if (root) {
-            Instruction *store_value = NULL;
             bool have_real_use = false;
-            for (Use &U : root->uses()) {
-                User *RU = U.getUser();
+            for (User *RU : root->users()) {
                 if (StoreInst *SRU = dyn_cast<StoreInst>(RU)) {
-                    if (!store_value)
-                        store_value = dyn_cast<Instruction>(SRU->getValueOperand());
+                    assert(isa<ConstantPointerNull>(SRU->getValueOperand()) || SRU->getValueOperand() == restTuple);
+                    (void)SRU;
+                }
+                else if (MemSetInst *MSI = dyn_cast<MemSetInst>(RU)) {
+                    assert(MSI->getValue() == ctx.builder.getInt8(0));
+                    (void)MSI;
                 }
                 else if (isa<DbgInfoIntrinsic>(RU)) {
                 }
@@ -8586,22 +9989,21 @@ static jl_llvm_functions_t
                 }
             }
             if (!have_real_use) {
-                Instruction *use = NULL;
-                for (Use &U : root->uses()) {
-                    if (use) // erase after the iterator moves on
-                        use->eraseFromParent();
-                    User *RU = U.getUser();
-                    use = cast<Instruction>(RU);
+                for (User *RU : make_early_inc_range(root->users())) {
+                    // This is safe because it checked above that each User is known and has at most one Use of root
+                    cast<Instruction>(RU)->eraseFromParent();
                 }
-                if (use)
-                    use->eraseFromParent();
                 root->eraseFromParent();
-                assert(!store_value || store_value == restTuple);
                 restTuple->eraseFromParent();
             }
         }
     }
 
+    if (ctx.topalloca->use_empty()) {
+        ctx.topalloca->eraseFromParent();
+        ctx.topalloca = nullptr;
+    }
+
     // link the dependent llvmcall modules, but switch their function's linkage to internal
     // so that they don't conflict when they show up in the execution engine.
     Linker L(*jl_Module);
@@ -8617,44 +10019,29 @@ static jl_llvm_functions_t
             jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
-    // link in opaque closure modules
-    for (auto &TSMod : ctx.oc_modules) {
-        SmallVector<std::string, 1> Exports;
-        TSMod.withModuleDo([&](Module &Mod) {
-            for (const auto &F: Mod.functions())
-                if (!F.isDeclaration())
-                    Exports.push_back(F.getName().str());
-        });
-        jl_merge_module(TSM, std::move(TSMod));
-        for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
-    }
-
     JL_GC_POP();
     return declarations;
 }
 
 // --- entry point ---
 
-void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL);
-
-JL_GCC_IGNORE_START("-Wclobbered")
 jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &m,
         jl_method_instance_t *li,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
+        jl_value_t *abi,
         jl_codegen_params_t &params)
 {
     JL_TIMING(CODEGEN, CODEGEN_LLVM);
     jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK);
-    // caller must hold codegen_lock
     jl_llvm_functions_t decls = {};
     assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
         compare_cgparams(params.params, &jl_default_cgparams)) &&
         "functions compiled with custom codegen params must not be cached");
+    if (!abi)
+        abi = li->specTypes;
     JL_TRY {
-        decls = emit_function(m, li, src, jlrettype, params);
+        decls = emit_function(m, li, src, abi, src->rettype, params);
         auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream();
         if (stream) {
             jl_printf(stream, "%s\t", decls.specFunctionObject.c_str());
@@ -8675,7 +10062,7 @@ jl_llvm_functions_t jl_emit_code(
         decls.functionObject = "";
         decls.specFunctionObject = "";
         jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str());
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(jl_current_task));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
     }
@@ -8685,18 +10072,21 @@ jl_llvm_functions_t jl_emit_code(
 
 static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t &params, jl_method_instance_t *mi, jl_value_t *rettype)
 {
-    Module *M = m.getModuleUnlocked();
-    jl_codectx_t ctx(M->getContext(), params);
-    ctx.name = M->getModuleIdentifier().data();
-    std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
     jl_llvm_functions_t declarations;
     declarations.functionObject = "jl_f_opaque_closure_call";
-    if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
+    if (uses_specsig(mi->specTypes, false, rettype, true)) {
+        // context lock is held by params
+        Module *M = m.getModuleUnlocked();
+        jl_codectx_t ctx(M->getContext(), params, 0, 0);
+        ctx.name = M->getModuleIdentifier().data();
+        std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
         jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
         Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
         jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
         size_t nrealargs = jl_nparams(mi->specTypes);
-        emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context);
+        emit_specsig_to_fptr1(gf_thunk, returninfo.cc, returninfo.return_roots,
+                mi->specTypes, rettype, true, nrealargs, ctx.emission_context,
+                prepare_call_in(gf_thunk->getParent(), jlopaque_closure_call_func)); // TODO: this could call emit_oc_call directly
         declarations.specFunctionObject = funcName;
     }
     return declarations;
@@ -8709,286 +10099,44 @@ jl_llvm_functions_t jl_emit_codeinst(
         jl_codegen_params_t &params)
 {
     JL_TIMING(CODEGEN, CODEGEN_Codeinst);
-    jl_timing_show_method_instance(codeinst->def, JL_TIMING_DEFAULT_BLOCK);
-    JL_GC_PUSH1(&src);
+    jl_timing_show_method_instance(jl_get_ci_mi(codeinst), JL_TIMING_DEFAULT_BLOCK);
     if (!src) {
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        jl_method_t *def = codeinst->def->def.method;
-        // Check if this is the generic method for opaque closure wrappers -
-        // if so, generate the specsig -> invoke converter.
-        if (def == jl_opaque_closure_method) {
-            JL_GC_POP();
-            return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype);
-        }
-        if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def))
-            src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src);
-        if (!src || !jl_is_code_info(src)) {
-            JL_GC_POP();
-            m = orc::ThreadSafeModule();
-            return jl_llvm_functions_t(); // failed
-        }
-    }
-    jl_llvm_functions_t decls = jl_emit_code(m, codeinst->def, src, codeinst->rettype, params);
-
-    const std::string &specf = decls.specFunctionObject;
-    const std::string &f = decls.functionObject;
-    if (params.cache && !f.empty()) {
-        // Prepare debug info to receive this function
-        // record that this function name came from this linfo,
-        // so we can build a reverse mapping for debug-info.
-        bool toplevel = !jl_is_method(codeinst->def->def.method);
-        if (!toplevel) {
-            //Safe b/c params holds context lock
-            const DataLayout &DL = m.getModuleUnlocked()->getDataLayout();
-            // but don't remember toplevel thunks because
-            // they may not be rooted in the gc for the life of the program,
-            // and the runtime doesn't notify us when the code becomes unreachable :(
-            if (!specf.empty())
-                jl_add_code_in_flight(specf, codeinst, DL);
-            if (!f.empty() && f != "jl_fptr_args" && f != "jl_fptr_sparam")
-                jl_add_code_in_flight(f, codeinst, DL);
-        }
-
-        if (params.world) {// don't alter `inferred` when the code is not directly being used
-            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
-            // don't change inferred state
-            if (inferred) {
-                jl_method_t *def = codeinst->def->def.method;
-                if (// keep code when keeping everything
-                    !(JL_DELETE_NON_INLINEABLE) ||
-                    // aggressively keep code when debugging level >= 2
-                    // note that this uses the global jl_options.debug_level, not the local emission_ctx.debug_level
-                    jl_options.debug_level > 1) {
-                    // update the stored code
-                    if (inferred != (jl_value_t*)src) {
-                        if (jl_is_method(def)) {
-                            src = (jl_code_info_t*)jl_compress_ir(def, src);
-                            assert(jl_is_string(src));
-                            codeinst->relocatability = jl_string_data(src)[jl_string_len(src)-1];
-                        }
-                        jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
-                        jl_gc_wb(codeinst, src);
-                    }
-                }
-                else if (jl_is_method(def)) {// don't delete toplevel code
-                    if (// and there is something to delete (test this before calling jl_ir_inlining_cost)
-                            inferred != jl_nothing &&
-                            // don't delete inlineable code, unless it is constant
-                            (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr ||
-                                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) &&
-                            // don't delete code when generating a precompile file
-                            !(params.imaging || jl_options.incremental)) {
-                        // if not inlineable, code won't be needed again
-                        jl_atomic_store_release(&codeinst->inferred, jl_nothing);
-                    }
-                }
-            }
+        jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+        // Assert that this this is the generic method for opaque closure wrappers:
+        // this signals to instead compile specptr such that it holds the specptr -> invoke wrapper
+        // to satisfy the dispatching implementation requirements of jl_f_opaque_closure_call
+        if (mi->def.method == jl_opaque_closure_method) {
+            return jl_emit_oc_wrapper(m, params, mi, codeinst->rettype);
         }
+        m = orc::ThreadSafeModule();
+        return jl_llvm_functions_t(); // user error
     }
-    JL_GC_POP();
+    //assert(jl_egal((jl_value_t*)jl_atomic_load_relaxed(&codeinst->debuginfo), (jl_value_t*)src->debuginfo) && "trying to generate code for a codeinst for an incompatible src");
+    jl_llvm_functions_t decls = jl_emit_code(m, jl_get_ci_mi(codeinst), src, get_ci_abi(codeinst), params);
     return decls;
 }
 
-
-void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
-    jl_codegen_params_t &params, CompilationPolicy policy)
-{
-    JL_TIMING(CODEGEN, CODEGEN_Workqueue);
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    while (!params.workqueue.empty()) {
-        jl_code_instance_t *codeinst;
-        Function *protodecl;
-        jl_returninfo_t::CallingConv proto_cc;
-        bool proto_specsig;
-        unsigned proto_return_roots;
-        auto it = params.workqueue.back();
-        codeinst = it.first;
-        std::tie(proto_cc, proto_return_roots, protodecl, proto_specsig) = it.second;
-        params.workqueue.pop_back();
-        // try to emit code for this item from the workqueue
-        assert(codeinst->min_world <= params.world && codeinst->max_world >= params.world &&
-            "invalid world for code-instance");
-        StringRef preal_decl = "";
-        bool preal_specsig = false;
-        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
-        bool cache_valid = params.cache;
-        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-        if (cache_valid && invoke != NULL) {
-            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (fptr) {
-                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
-                    jl_cpu_pause();
-                }
-                // in case we are racing with another thread that is emitting this function
-                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-            }
-            if (invoke == jl_fptr_args_addr) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-            }
-            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-                preal_specsig = true;
-            }
-        }
-        else {
-            auto &result = emitted[codeinst];
-            jl_llvm_functions_t *decls = NULL;
-            if (std::get<0>(result)) {
-                decls = &std::get<1>(result);
-            }
-            else {
-                // Reinfer the function. The JIT came along and removed the inferred
-                // method body. See #34993
-                if (policy != CompilationPolicy::Default &&
-                    jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
-                    src = jl_type_infer(codeinst->def, jl_atomic_load_acquire(&jl_world_counter), 0);
-                    if (src) {
-                        orc::ThreadSafeModule result_m =
-                        jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                        result.second = jl_emit_code(result_m, codeinst->def, src, src->rettype, params);
-                        result.first = std::move(result_m);
-                    }
-                }
-                else {
-                    orc::ThreadSafeModule result_m =
-                        jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                    result.second = jl_emit_codeinst(result_m, codeinst, NULL, params);
-                    result.first = std::move(result_m);
-                }
-                if (std::get<0>(result))
-                    decls = &std::get<1>(result);
-                else
-                    emitted.erase(codeinst); // undo the insert above
-            }
-            if (decls) {
-                if (decls->functionObject == "jl_fptr_args") {
-                    preal_decl = decls->specFunctionObject;
-                }
-                else if (decls->functionObject != "jl_fptr_sparam") {
-                    preal_decl = decls->specFunctionObject;
-                    preal_specsig = true;
-                }
-            }
-        }
-        // patch up the prototype we emitted earlier
-        Module *mod = protodecl->getParent();
-        assert(protodecl->isDeclaration());
-        if (proto_specsig) {
-            // expected specsig
-            if (!preal_specsig) {
-                // emit specsig-to-(jl)invoke conversion
-                Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::InternalLinkage);
-                //protodecl->setAlwaysInline();
-                jl_init_function(protodecl, params.TargetTriple);
-                size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
-                // TODO: maybe this can be cached in codeinst->specfptr?
-                emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal);
-                preal_decl = ""; // no need to fixup the name
-            }
-            else {
-                assert(!preal_decl.empty());
-            }
-        }
-        else {
-            // expected non-specsig
-            if (preal_decl.empty() || preal_specsig) {
-                // emit jlcall1-to-(jl)invoke conversion
-                preal_decl = emit_tojlinvoke(codeinst, mod, params)->getName();
-            }
-        }
-        if (!preal_decl.empty()) {
-            // merge and/or rename this prototype to the real function
-            if (Value *specfun = mod->getNamedValue(preal_decl)) {
-                if (protodecl != specfun)
-                    protodecl->replaceAllUsesWith(specfun);
-            }
-            else {
-                protodecl->setName(preal_decl);
-            }
-        }
-    }
-    JL_GC_POP();
-}
-
-
 // --- initialization ---
-std::vector<std::pair<jl_value_t**, JuliaVariable*>> gv_for_global;
+static auto gv_for_global = new SmallVector<std::pair<jl_value_t**, JuliaVariable*>, 0>();
 static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr)
 {
-    gv_for_global.push_back(std::make_pair(addr, var));
+    gv_for_global->push_back(std::make_pair(addr, var));
 }
 static JuliaVariable *julia_const_gv(jl_value_t *val)
 {
-    for (auto &kv : gv_for_global) {
+    for (auto &kv : *gv_for_global) {
         if (*kv.first == val)
             return kv.second;
     }
     return nullptr;
 }
 
-// Handle FLOAT16 ABI v2
-#if JULIA_FLOAT16_ABI == 2
-static void makeCastCall(Module &M, StringRef wrapperName, StringRef calledName, FunctionType *FTwrapper, FunctionType *FTcalled, bool external)
-{
-    Function *calledFun = M.getFunction(calledName);
-    if (!calledFun) {
-        calledFun = Function::Create(FTcalled, Function::ExternalLinkage, calledName, M);
-    }
-    auto linkage = external ? Function::ExternalLinkage : Function::InternalLinkage;
-    auto wrapperFun = Function::Create(FTwrapper, linkage, wrapperName, M);
-    wrapperFun->addFnAttr(Attribute::AlwaysInline);
-    llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", wrapperFun));
-    SmallVector<Value *, 4> CallArgs;
-    if (wrapperFun->arg_size() != calledFun->arg_size()){
-        llvm::errs() << "FATAL ERROR: Can't match wrapper to called function";
-        abort();
-    }
-    for (auto wrapperArg = wrapperFun->arg_begin(), calledArg = calledFun->arg_begin();
-            wrapperArg != wrapperFun->arg_end() && calledArg != calledFun->arg_end(); ++wrapperArg, ++calledArg)
-    {
-        CallArgs.push_back(builder.CreateBitCast(wrapperArg, calledArg->getType()));
-    }
-    auto val = builder.CreateCall(calledFun, CallArgs);
-    auto retval = builder.CreateBitCast(val,wrapperFun->getReturnType());
-    builder.CreateRet(retval);
-}
-
-void emitFloat16Wrappers(Module &M, bool external)
-{
-    auto &ctx = M.getContext();
-    makeCastCall(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
-                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
-    makeCastCall(M, "__extendhfsf2", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
-                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
-    makeCastCall(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
-    makeCastCall(M, "__truncsfhf2", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
-    makeCastCall(M, "__truncdfhf2", "julia__truncdfhf2", FunctionType::get(Type::getHalfTy(ctx), { Type::getDoubleTy(ctx) }, false),
-                FunctionType::get(Type::getInt16Ty(ctx), { Type::getDoubleTy(ctx) }, false), external);
-}
-
-static void init_f16_funcs(void)
-{
-    auto ctx = jl_ExecutionEngine->acquireContext();
-    auto TSM =  jl_create_ts_module("F16Wrappers", ctx, imaging_default());
-    auto aliasM = TSM.getModuleUnlocked();
-    emitFloat16Wrappers(*aliasM, true);
-    jl_ExecutionEngine->addModule(std::move(TSM));
-}
-#endif
-
 static void init_jit_functions(void)
 {
-    add_named_global(jlsmall_typeof_var, &small_typeof);
+    add_named_global("jl_fptr_args", jl_fptr_args_addr);
+    add_named_global("jl_fptr_sparam", jl_fptr_sparam_addr);
+    add_named_global("jl_f_opaque_closure_call", &jl_f_opaque_closure_call);
+    add_named_global(jl_small_typeof_var, &jl_small_typeof);
     add_named_global(jlstack_chk_guard_var, &__stack_chk_guard);
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
     add_named_global(jlexe_var, &jl_exe_handle);
@@ -9011,6 +10159,7 @@ static void init_jit_functions(void)
     add_named_global(jlatomicerror_func, &jl_atomic_error);
     add_named_global(jlthrow_func, &jl_throw);
     add_named_global(jlundefvarerror_func, &jl_undefined_var_error);
+    add_named_global(jlhasnofield_func, &jl_has_no_field_error);
     add_named_global(jlboundserrorv_func, &jl_bounds_error_ints);
     add_named_global(jlboundserror_func, &jl_bounds_error_int);
     add_named_global(jlvboundserror_func, &jl_bounds_error_tuple_int);
@@ -9021,21 +10170,23 @@ static void init_jit_functions(void)
     add_named_global(memcmp_func, &memcmp);
     add_named_global(jltypeerror_func, &jl_type_error);
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
-    add_named_global(jldeclareconst_func, &jl_declare_constant);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
     add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr);
     add_named_global(jlboundp_func, &jl_boundp);
     for (auto it : builtin_func_map())
         add_named_global(it.second, it.first);
+    add_named_global(jlintrinsic_func, &jl_f_intrinsic_call);
+    add_named_global(jlgetbuiltinfptr_func, &jl_get_builtin_fptr);
     add_named_global(jlapplygeneric_func, &jl_apply_generic);
     add_named_global(jlinvoke_func, &jl_invoke);
     add_named_global(jltopeval_func, &jl_toplevel_eval);
     add_named_global(jlcopyast_func, &jl_copy_ast);
     //add_named_global(jlnsvec_func, &jl_svec);
     add_named_global(jlmethod_func, &jl_method_def);
-    add_named_global(jlgenericfunction_func, &jl_generic_function_def);
+    add_named_global(jlgenericfunction_func, &jl_declare_const_gf);
     add_named_global(jlenter_func, &jl_enter_handler);
     add_named_global(jl_current_exception_func, &jl_current_exception);
+    add_named_global(jlleave_noexcept_func, &jl_pop_handler_noexcept);
     add_named_global(jlleave_func, &jl_pop_handler);
     add_named_global(jl_restore_excstack_func, &jl_restore_excstack);
     add_named_global(jl_excstack_state_func, &jl_excstack_state);
@@ -9045,18 +10196,19 @@ static void init_jit_functions(void)
     add_named_global(jltypeassert_func, &jl_typeassert);
     add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
     add_named_global(jl_object_id__func, &jl_object_id_);
+    add_named_global(jl_alloc_genericmemory_unchecked_func, &jl_alloc_genericmemory_unchecked);
     add_named_global(jl_alloc_obj_func, (void*)NULL);
     add_named_global(jl_newbits_func, (void*)jl_new_bits);
-    add_named_global(jl_loopinfo_marker_func, (void*)NULL);
     add_named_global(jl_typeof_func, (void*)NULL);
     add_named_global(jl_write_barrier_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
     add_named_global("jl_adopt_thread", &jl_adopt_thread);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
     add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
+    add_named_global(jlfieldindex_func, &jl_field_index);
     add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
     add_named_global(sync_gc_total_bytes_func, &jl_gc_sync_total_bytes);
-    add_named_global(jlarray_data_owner_func, &jl_array_data_owner);
+    add_named_global(jl_allocgenericmemory, &jl_alloc_genericmemory);
     add_named_global(gcroot_flush_func, (void*)NULL);
     add_named_global(gc_preserve_begin_func, (void*)NULL);
     add_named_global(gc_preserve_end_func, (void*)NULL);
@@ -9064,9 +10216,14 @@ static void init_jit_functions(void)
     add_named_global(except_enter_func, (void*)NULL);
     add_named_global(julia_call, (void*)NULL);
     add_named_global(julia_call2, (void*)NULL);
+    add_named_global(jllockvalue_func, &jl_lock_value);
+    add_named_global(jlunlockvalue_func, &jl_unlock_value);
+    add_named_global(jllockfield_func, &jl_lock_field);
+    add_named_global(jlunlockfield_func, &jl_unlock_field);
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
+    add_named_global("__julia_personality", &__julia_personality);
 #if defined(_COMPILER_GCC_)
     add_named_global("___chkstk_ms", &___chkstk_ms);
 #else
@@ -9092,7 +10249,7 @@ static void init_jit_functions(void)
 }
 
 #ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+char jl_using_intel_jitevents = 0; // Non-zero if running under Intel VTune Amplifier
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
@@ -9103,12 +10260,13 @@ char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
 char jl_using_perf_jitevents = 0;
 #endif
 
+int jl_is_timing_passes = 0;
+
 extern "C" void jl_init_llvm(void)
 {
     jl_page_size = jl_getpagesize();
-    jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
-    jl_default_cgparams.generic_context = jl_nothing;
-
+    jl_default_debug_info_kind = jl_default_cgparams.debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
+    jl_default_cgparams.debug_info_level = (int) jl_options.debug_level;
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
     InitializeNativeTargetAsmParser();
@@ -9117,16 +10275,13 @@ extern "C" void jl_init_llvm(void)
     // Initialize passes
     PassRegistry &Registry = *PassRegistry::getPassRegistry();
     initializeCore(Registry);
-#if JL_LLVM_VERSION < 150000
-    initializeCoroutines(Registry);
-#endif
     initializeScalarOpts(Registry);
     initializeVectorization(Registry);
     initializeAnalysis(Registry);
     initializeTransformUtils(Registry);
     initializeInstCombine(Registry);
-    initializeAggressiveInstCombine(Registry);
-    initializeInstrumentation(Registry);
+    // TODO: initializeAggressiveInstCombine(Registry);
+    // TODO: initializeInstrumentation(Registry);
     initializeTarget(Registry);
 #ifdef USE_POLLY
     polly::initializePollyPasses(Registry);
@@ -9142,28 +10297,19 @@ extern "C" void jl_init_llvm(void)
     clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "0", 1);
-#ifdef JL_USE_NEW_PM
     // For parity with LoopUnswitch
     clopt = llvmopts.lookup("unswitch-threshold");
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "100", 1);
-#endif
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
     clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
     if (clopt && clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "4", 1);
 
-#if JL_LLVM_VERSION >= 150000
-    clopt = llvmopts.lookup("opaque-pointers");
-    if (clopt && clopt->getNumOccurrences() == 0) {
-#ifdef JL_LLVM_OPAQUE_POINTERS
-        cl::ProvidePositionalOption(clopt, "true", 1);
-#else
-        cl::ProvidePositionalOption(clopt, "false", 1);
-#endif
-    }
-#endif
+    clopt = llvmopts.lookup("time-passes");
+    if (clopt && clopt->getNumOccurrences() > 0)
+        jl_is_timing_passes = 1;
 
     jl_ExecutionEngine = new JuliaOJIT();
 
@@ -9182,14 +10328,19 @@ extern "C" void jl_init_llvm(void)
 #if defined(JL_USE_INTEL_JITEVENTS) || \
     defined(JL_USE_OPROFILE_JITEVENTS) || \
     defined(JL_USE_PERF_JITEVENTS)
-#ifdef JL_USE_JITLINK
-#pragma message("JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink")
-#else
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
 
 #if defined(JL_USE_INTEL_JITEVENTS)
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_intel_jitevents = 1;
+    if (jit_profiling) {
+        if (atoi(jit_profiling)) {
+            jl_using_intel_jitevents = 1;
+        }
+    } else {
+#ifdef USE_ITTAPI
+        __itt_collection_state state = __itt_get_collection_state();
+        jl_using_intel_jitevents = state == __itt_collection_init_successful ||
+                                   state == __itt_collection_collector_exists;
+#endif
     }
 #endif
 
@@ -9201,24 +10352,23 @@ extern "C" void jl_init_llvm(void)
 
 #if defined(JL_USE_PERF_JITEVENTS)
     if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
+        jl_using_perf_jitevents = 1;
     }
 #endif
 
 #ifdef JL_USE_INTEL_JITEVENTS
     if (jl_using_intel_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+        jl_ExecutionEngine->enableIntelJITEventListener();
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
     if (jl_using_oprofile_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+        jl_ExecutionEngine->enableOProfileJITEventListener();
 #endif
 
 #ifdef JL_USE_PERF_JITEVENTS
     if (jl_using_perf_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
-#endif
+        jl_ExecutionEngine->enablePerfJITEventListener();
 #endif
 #endif
 
@@ -9230,9 +10380,6 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_init_codegen_impl(void)
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
     init_jit_functions();
-#if JULIA_FLOAT16_ABI == 2
-    init_f16_funcs();
-#endif
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index 547d5d0eabede..3dfcf17a07b5c 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,98 +1,90 @@
 jl_symbol("="),
 jl_symbol("getproperty"),
-jl_symbol("apply_type"),
 jl_symbol("getfield"),
+jl_symbol("apply_type"),
+jl_symbol("==="),
 jl_symbol("getindex"),
 jl_symbol("convert"),
-jl_symbol("==="),
-jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
-jl_symbol("foreigncall"),
 jl_symbol("int.jl"),
-jl_symbol("throw"),
-jl_symbol("nothing"),
-jl_symbol("essentials.jl"),
 jl_symbol("+"),
-jl_symbol("unsafe_convert"),
+jl_symbol("boot.jl"),
+jl_symbol("essentials.jl"),
+jl_symbol("ccall"),
+jl_symbol("foreigncall"),
+jl_symbol("iterate"),
 jl_symbol("not_int"),
+jl_symbol("Base.jl"),
 jl_symbol("-"),
-jl_symbol("boot.jl"),
-jl_symbol("number.jl"),
+jl_symbol("throw"),
+jl_symbol("promotion.jl"),
 jl_symbol("length"),
 jl_symbol("<"),
-jl_symbol("cconvert"),
-jl_symbol("Base.jl"),
-jl_symbol("promotion.jl"),
-jl_symbol("tuple.jl"),
-jl_symbol("static_parameter"),
-jl_symbol("isempty"),
-jl_symbol("<="),
-jl_symbol("array.jl"),
+jl_symbol("isa"),
 jl_symbol("operators.jl"),
-jl_symbol("NamedTuple"),
+jl_symbol("number.jl"),
+jl_symbol("unsafe_convert"),
+jl_symbol("tuple.jl"),
+jl_symbol("nothing"),
 jl_symbol("bitcast"),
-jl_symbol("!"),
+jl_symbol("NamedTuple"),
 jl_symbol("indexed_iterate"),
-jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("Ptr"),
-jl_symbol("size"),
+jl_symbol("!"),
+jl_symbol("isempty"),
+jl_symbol("<="),
+jl_symbol("cconvert"),
 jl_symbol("add_int"),
+jl_symbol("static_parameter"),
+jl_symbol("array.jl"),
 jl_symbol("slt_int"),
-jl_symbol("*"),
-jl_symbol("range.jl"),
-jl_symbol("abstractarray.jl"),
 jl_symbol("!="),
-jl_symbol("isa"),
-jl_symbol("setindex!"),
-jl_symbol("string"),
-jl_symbol("ifelse"),
-jl_symbol(":"),
-jl_symbol(">"),
-jl_symbol("_apply_iterate"),
 jl_symbol("UInt64"),
+jl_symbol("range.jl"),
+jl_symbol("sle_int"),
+jl_symbol("size"),
 jl_symbol("&"),
-jl_symbol("max"),
+jl_symbol("abstractarray.jl"),
 jl_symbol("rem"),
-jl_symbol("sub_int"),
-jl_symbol(">="),
-jl_symbol("UInt8"),
-jl_symbol("iterators.jl"),
+jl_symbol(">"),
 jl_symbol("Int64"),
-jl_symbol("pairs"),
+jl_symbol("sub_int"),
+jl_symbol("*"),
 jl_symbol("and_int"),
+jl_symbol("string"),
+jl_symbol(">="),
+jl_symbol("Ptr"),
+jl_symbol("toInt64"),
 jl_symbol("last"),
-jl_symbol("typeof"),
-jl_symbol("arrayref"),
 jl_symbol("pointer.jl"),
-jl_symbol("toInt64"),
-jl_symbol("arraylen"),
+jl_symbol("reinterpret"),
+jl_symbol("first"),
+jl_symbol("pairs"),
+jl_symbol("_apply_iterate"),
 jl_symbol("typeassert"),
-jl_symbol("map"),
+jl_symbol(":"),
+jl_symbol("UInt8"),
+jl_symbol("setindex!"),
+jl_symbol("isdefined"),
+jl_symbol("typeof"),
+jl_symbol("promote"),
 jl_symbol("kwcall"),
-jl_symbol("ArgumentError"),
+jl_symbol("unsigned"),
+jl_symbol("_promote"),
+jl_symbol("toUInt64"),
+jl_symbol("map"),
 jl_symbol("lshr_int"),
+jl_symbol("gc_preserve_begin"),
+jl_symbol("gc_preserve_end"),
+jl_symbol("trunc_int"),
+jl_symbol("ArgumentError"),
 jl_symbol("axes"),
-jl_symbol("reinterpret"),
+jl_symbol("ult_int"),
+jl_symbol("UInt"),
+jl_symbol("zext_int"),
+jl_symbol("strings/string.jl"),
+jl_symbol("ifelse"),
 jl_symbol("Array"),
-jl_symbol("first"),
-jl_symbol("trunc_int"),
-jl_symbol("OneTo"),
-jl_symbol("haskey"),
-jl_symbol("Int"),
-jl_symbol("oneto"),
 jl_symbol("eq_int"),
 jl_symbol("throw_inexacterror"),
-jl_symbol("toUInt64"),
-jl_symbol("arraysize"),
-jl_symbol("UInt"),
-jl_symbol("setproperty!"),
-jl_symbol("check_top_bit"),
-jl_symbol("promote"),
-jl_symbol("unsigned"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("structdiff"),
-jl_symbol("undef"),
-jl_symbol("sizeof"),
-jl_symbol("String"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index b5a334172dd76..2a6990bac52ff 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,248 @@
-jl_symbol("namedtuple.jl"),
-jl_symbol("pop"),
-jl_symbol("inbounds"),
-jl_symbol("strings/string.jl"),
-jl_symbol("Ref"),
-jl_symbol("Vector"),
-jl_symbol("kwerr"),
-jl_symbol("_promote"),
+jl_symbol("|"),
+jl_symbol("setproperty!"),
 jl_symbol("sext_int"),
-jl_symbol("pointer"),
-jl_symbol("similar"),
-jl_symbol("arrayset"),
+jl_symbol("String"),
+jl_symbol("Int"),
+jl_symbol("iterators.jl"),
+jl_symbol("Colon"),
+jl_symbol("unchecked_oneto"),
+jl_symbol("structdiff"),
+jl_symbol("UnitRange"),
+jl_symbol("unitrange_last"),
+jl_symbol("sizeof"),
+jl_symbol("check_sign_bit"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("data"),
+jl_symbol("kwerr"),
 jl_symbol("axes1"),
 jl_symbol("eachindex"),
-jl_symbol("|"),
-jl_symbol("ult_int"),
-jl_symbol("lastindex"),
-jl_symbol("setfield!"),
-jl_symbol("UnitRange"),
-jl_symbol("push!"),
+jl_symbol("or_int"),
 jl_symbol("Bool"),
-jl_symbol("Colon"),
+jl_symbol("setfield!"),
 jl_symbol("fieldtype"),
-jl_symbol("unitrange_last"),
-jl_symbol("bitarray.jl"),
-jl_symbol("<<"),
-jl_symbol("zext_int"),
-jl_symbol("Tuple"),
+jl_symbol("Ref"),
+jl_symbol("pointer"),
+jl_symbol("max"),
+jl_symbol("push!"),
+jl_symbol("lastindex"),
 jl_symbol("reflection.jl"),
-jl_symbol("TypeError"),
-jl_symbol("print"),
-jl_symbol("eltype"),
+jl_symbol("<<"),
+jl_symbol("similar"),
+jl_symbol("Vector"),
+jl_symbol("UInt32"),
 jl_symbol(">>"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("gc_preserve_begin"),
-jl_symbol("require_one_based_indexing"),
-jl_symbol("gc_preserve_end"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("indices.jl"),
-jl_symbol("Cvoid"),
-jl_symbol("oftype"),
-jl_symbol("zero"),
-jl_symbol("float.jl"),
-jl_symbol("Any"),
-jl_symbol("checkbounds"),
-jl_symbol("or_int"),
-jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("checkbounds"),
+jl_symbol("undef"),
+jl_symbol("jl_string_ptr"),
+jl_symbol("error"),
 jl_symbol("strings/io.jl"),
-jl_symbol("shl_int"),
-jl_symbol("copy"),
-jl_symbol("macro expansion"),
-jl_symbol("abstractdict.jl"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("strings/basic.jl"),
+jl_symbol("merge"),
+jl_symbol("TypeError"),
+jl_symbol("keyword argument"),
 jl_symbol("in"),
-jl_symbol("io.jl"),
-jl_symbol("BlasInt"),
-jl_symbol("Float64"),
+jl_symbol("print"),
+jl_symbol("macro expansion"),
 jl_symbol("mul_int"),
-jl_symbol("UInt32"),
+jl_symbol("shl_int"),
 jl_symbol("C_NULL"),
+jl_symbol("oftype"),
+jl_symbol("_growend!"),
+jl_symbol("Any"),
+jl_symbol("Tuple"),
+jl_symbol("float.jl"),
+jl_symbol("ncodeunits"),
 jl_symbol("Integer"),
+jl_symbol("io.jl"),
+jl_symbol("eltype"),
+jl_symbol("name"),
+jl_symbol("parent"),
 jl_symbol("!=="),
-jl_symbol("merge"),
-jl_symbol("BoundsError"),
-jl_symbol("broadcasted"),
-jl_symbol("Cint"),
-jl_symbol("min"),
-jl_symbol("libblastrampoline"),
 jl_symbol("iszero"),
+jl_symbol("min"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("refvalue.jl"),
-jl_symbol("stride"),
-jl_symbol("error"),
-jl_symbol("ncodeunits"),
-jl_symbol("LinearIndices"),
-jl_symbol("Clong"),
-jl_symbol("pair.jl"),
-jl_symbol("_growend!"),
-jl_symbol("char.jl"),
-jl_symbol("copyto!"),
-jl_symbol("get"),
-jl_symbol("tail"),
-jl_symbol("real"),
 jl_symbol("Union"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("enter"),
-jl_symbol("leave"),
+jl_symbol("BlasInt"),
+jl_symbol("unsafe_load"),
+jl_symbol("indices.jl"),
+jl_symbol("x"),
+jl_symbol("require_one_based_indexing"),
+jl_symbol("namedtuple.jl"),
+jl_symbol("tail"),
+jl_symbol("Float64"),
+jl_symbol("head"),
+jl_symbol("Cvoid"),
+jl_symbol("copy"),
+jl_symbol("libblastrampoline"),
+jl_symbol("get"),
+jl_symbol("neg_int"),
+jl_symbol("stop"),
+jl_symbol("zero"),
 jl_symbol("add_ptr"),
-jl_symbol("chkstride1"),
+jl_symbol("toUInt32"),
+jl_symbol("ptr"),
+jl_symbol("char.jl"),
+jl_symbol("trunc"),
+jl_symbol("not_atomic"),
+jl_symbol("enter"),
+jl_symbol("Pair"),
+jl_symbol("jl_value_ptr"),
 jl_symbol("Expr"),
-jl_symbol("write"),
-jl_symbol("broadcast.jl"),
+jl_symbol("broadcasted"),
+jl_symbol("pointerref"),
+jl_symbol("multidimensional.jl"),
+jl_symbol("Generator"),
+jl_symbol("leave"),
+jl_symbol("memoryref"),
 jl_symbol("show.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("memoryrefget"),
+jl_symbol("reduce.jl"),
+jl_symbol("stride"),
+jl_symbol("pair.jl"),
+jl_symbol("_string"),
+jl_symbol("cmem.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("broadcast.jl"),
 jl_symbol("none"),
-jl_symbol("Generator"),
+jl_symbol("copyto!"),
+jl_symbol("chkstride1"),
+jl_symbol("value"),
+jl_symbol("write"),
+jl_symbol("identity"),
+jl_symbol("real"),
+jl_symbol("start"),
+jl_symbol("Cint"),
+jl_symbol("fill!"),
+jl_symbol("checkindex"),
+jl_symbol("keys"),
+jl_symbol("BoundsError"),
+jl_symbol("vals"),
+jl_symbol("Symbol"),
+jl_symbol("strings/util.jl"),
 jl_symbol("Int32"),
-jl_symbol("materialize"),
+jl_symbol("ht_keyindex"),
+jl_symbol("io"),
+jl_symbol("~"),
+jl_symbol("AssertionError"),
+jl_symbol("abstractdict.jl"),
 jl_symbol("show"),
-jl_symbol("lock"),
-jl_symbol("unsafe_load"),
-jl_symbol("gmp.jl"),
 jl_symbol("mpfr.jl"),
-jl_symbol("Symbol"),
-jl_symbol("Pair"),
-jl_symbol("resize!"),
-jl_symbol("neg_int"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("AssertionError"),
-jl_symbol("identity"),
-jl_symbol("one"),
-jl_symbol("reduce.jl"),
-jl_symbol("libcholmod"),
 jl_symbol("isless"),
+jl_symbol("args"),
+jl_symbol("lock"),
 jl_symbol("reducedim.jl"),
+jl_symbol("gmp.jl"),
+jl_symbol("offset"),
+jl_symbol("resize!"),
+jl_symbol("throw_boundserror"),
+jl_symbol("Clong"),
+jl_symbol("_call_latest"),
+jl_symbol("argtail"),
+jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("materialize"),
 jl_symbol("checksquare"),
-jl_symbol("sort.jl"),
-jl_symbol("generator.jl"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("Float32"),
-jl_symbol("chklapackerror"),
-jl_symbol("parent"),
-jl_symbol("task.jl"),
+jl_symbol("LinearIndices"),
+jl_symbol("ule_int"),
+jl_symbol("dict"),
 jl_symbol("div"),
-jl_symbol("cholmod_common"),
-jl_symbol("ht_keyindex"),
-jl_symbol("pop_exception"),
-jl_symbol("c.jl"),
-jl_symbol("firstindex"),
-jl_symbol("some.jl"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("sub_ptr"),
-jl_symbol("vect"),
-jl_symbol("unsafe_string"),
-jl_symbol("llvmcall"),
-jl_symbol("checkindex"),
-jl_symbol("_call_latest"),
+jl_symbol("chklapackerror"),
+jl_symbol("count"),
+jl_symbol("Float32"),
+jl_symbol("genericmemory.jl"),
+jl_symbol("print_to_string"),
 jl_symbol("rethrow"),
-jl_symbol("pointerref"),
+jl_symbol("sort.jl"),
+jl_symbol("boundscheck"),
 jl_symbol("println"),
-jl_symbol("keys"),
-jl_symbol("RefValue"),
+jl_symbol("loading.jl"),
+jl_symbol("collect"),
+jl_symbol("ashr_int"),
 jl_symbol("_expr"),
-jl_symbol("toUInt32"),
-jl_symbol("ismissing"),
-jl_symbol("throw_boundserror"),
-jl_symbol("IteratorSize"),
-jl_symbol("iddict.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("DataType"),
+jl_symbol("Dict"),
+jl_symbol("unsafe_string"),
+jl_symbol("RefValue"),
+jl_symbol("step"),
 jl_symbol("to_shape"),
-jl_symbol("Csize_t"),
-jl_symbol("~"),
-jl_symbol("argtail"),
-jl_symbol("include"),
-jl_symbol("set.jl"),
-jl_symbol("isequal"),
+jl_symbol("pop_exception"),
+jl_symbol("Memory"),
+jl_symbol("KeyError"),
+jl_symbol("chunks"),
 jl_symbol("refpointer.jl"),
-jl_symbol("=>"),
-jl_symbol("Val"),
-jl_symbol("Base"),
+jl_symbol("llvmcall"),
+jl_symbol("c.jl"),
+jl_symbol("set.jl"),
+jl_symbol("abs"),
+jl_symbol("checked_trunc_uint"),
+jl_symbol("Type"),
 jl_symbol("%"),
-jl_symbol("collect"),
-jl_symbol("Type##kw"),
-jl_symbol("typemax"),
-jl_symbol("fill!"),
-jl_symbol("ule_int"),
-jl_symbol("atomics.jl"),
-jl_symbol("libgit2"),
+jl_symbol("len"),
 jl_symbol("BigFloat"),
-jl_symbol("ashr_int"),
-jl_symbol("boundscheck"),
-jl_symbol("abs"),
-jl_symbol("^"),
-jl_symbol("ensure_initialized"),
-jl_symbol("_array_for"),
-jl_symbol("strings/util.jl"),
-jl_symbol("Dict"),
+jl_symbol("isequal"),
+jl_symbol("vect"),
+jl_symbol("sprint"),
+jl_symbol("mode"),
+jl_symbol("expr.jl"),
 jl_symbol("Nothing"),
-jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("Val"),
+jl_symbol("IteratorSize"),
+jl_symbol("=>"),
+jl_symbol("haskey"),
+jl_symbol("iddict.jl"),
 jl_symbol("unsafe_write"),
-jl_symbol("util.jl"),
+jl_symbol("val"),
+jl_symbol("flags"),
+jl_symbol("task.jl"),
+jl_symbol("UnionAll"),
+jl_symbol("memset"),
+jl_symbol("xor"),
+jl_symbol("jl_alloc_genericmemory"),
+jl_symbol("uplo"),
 jl_symbol("toInt32"),
-jl_symbol("loading.jl"),
-jl_symbol("value"),
-jl_symbol("expr.jl"),
-jl_symbol("print_to_string"),
+jl_symbol("Base"),
+jl_symbol("atomics.jl"),
+jl_symbol("uuid"),
+jl_symbol("one"),
+jl_symbol("math.jl"),
+jl_symbol("position"),
+jl_symbol("typemax"),
+jl_symbol("all"),
+jl_symbol("error.jl"),
+jl_symbol("path.jl"),
+jl_symbol("^"),
+jl_symbol("nextind"),
+jl_symbol("include"),
 jl_symbol("the_exception"),
-jl_symbol("nonzeros"),
-jl_symbol("<:"),
-jl_symbol("KeyError"),
-jl_symbol("xor"),
-jl_symbol("logging.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("Const"),
+jl_symbol("UInt128"),
+jl_symbol("codeunit"),
 jl_symbol("stat.jl"),
-jl_symbol("close"),
-jl_symbol("adjoint"),
-jl_symbol("meta"),
-jl_symbol("path.jl"),
-jl_symbol("round"),
-jl_symbol("Cstring"),
-jl_symbol("SizeUnknown"),
-jl_symbol("esc"),
-jl_symbol("missing.jl"),
+jl_symbol("gcutils.jl"),
+jl_symbol("UndefRefError"),
+jl_symbol("diag"),
 jl_symbol("throw_undef_if_not"),
-jl_symbol("error.jl"),
-jl_symbol("Type"),
-jl_symbol("mul!"),
-jl_symbol("math.jl"),
-jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("subarray.jl"),
-jl_symbol("noinline"),
 jl_symbol("isnan"),
-jl_symbol("ldiv!"),
-jl_symbol("DataType"),
-jl_symbol("codeunit"),
-jl_symbol("condition.jl"),
-jl_symbol("step"),
-jl_symbol("copyast"),
-jl_symbol("bitset.jl"),
-jl_symbol("float"),
-jl_symbol("fastmath.jl"),
-jl_symbol("_mod64"),
-jl_symbol("_div64"),
-jl_symbol("all"),
-jl_symbol("parse"),
-jl_symbol("joinpath"),
-jl_symbol("nextind"),
-jl_symbol("regex.jl"),
 jl_symbol("Enums.jl"),
-jl_symbol("promote_type"),
-jl_symbol("Cdouble"),
-jl_symbol("ComplexF32"),
-jl_symbol("read"),
-jl_symbol("intfuncs.jl"),
-jl_symbol("Complex"),
+jl_symbol("logging.jl"),
 jl_symbol("_deleteend!"),
-jl_symbol("stat"),
-jl_symbol("UnionAll"),
-jl_symbol("special/trig.jl"),
-jl_symbol("UInt128"),
-jl_symbol("_copyto_impl!"),
-jl_symbol("stream.jl"),
-jl_symbol("lmul!"),
-jl_symbol("repr"),
-jl_symbol("promote_rule"),
-jl_symbol("xor_int"),
-jl_symbol("complex.jl"),
-jl_symbol("transpose"),
-jl_symbol(">>>"),
-jl_symbol("cholmod_sparse"),
-jl_symbol("filemode"),
-jl_symbol("ComplexF64"),
-jl_symbol("SparseMatrixCSC"),
-jl_symbol("view"),
-jl_symbol("GitError"),
-jl_symbol("zeros"),
-jl_symbol("InexactError"),
+jl_symbol("indices"),
+jl_symbol("compiler/utilities.jl"),
+jl_symbol("Pairs"),
+jl_symbol("<:"),
+jl_symbol("compiler/tfuncs.jl"),
+jl_symbol("close"),
+jl_symbol("subarray.jl"),
+jl_symbol("fastmath.jl"),
+jl_symbol("invokelatest"),
+jl_symbol("jl_array_del_end"),
+jl_symbol("_mod64"),
+jl_symbol("parameters"),
diff --git a/src/coverage.cpp b/src/coverage.cpp
index 95924f326524b..685370198ff13 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -24,9 +24,9 @@ static int codegen_imaging_mode(void)
 
 const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
 typedef uint64_t logdata_block[logdata_blocksize];
-typedef StringMap< std::vector<logdata_block*> > logdata_t;
+typedef StringMap< SmallVector<logdata_block*, 0> > logdata_t;
 
-static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+static uint64_t *allocLine(SmallVector<logdata_block*, 0> &vec, int line)
 {
     unsigned block = line / logdata_blocksize;
     line = line % logdata_blocksize;
@@ -63,7 +63,7 @@ extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_
     StringRef filename = StringRef(filename_, len_filename);
     if (codegen_imaging_mode() || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
-    std::vector<logdata_block*> &vec = coverageData[filename];
+    SmallVector<logdata_block*, 0> &vec = coverageData[filename];
     uint64_t *ptr = allocLine(vec, line);
     (*ptr)++;
 }
@@ -77,19 +77,18 @@ JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line)
     return allocLine(mallocData[filename], line);
 }
 
-// Resets the malloc counts.
-extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+static void clear_log_data(logdata_t &logData, int resetValue)
 {
-    logdata_t::iterator it = mallocData.begin();
-    for (; it != mallocData.end(); it++) {
-        std::vector<logdata_block*> &bytes = (*it).second;
-        std::vector<logdata_block*>::iterator itb;
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        SmallVector<logdata_block*, 0> &bytes = (*it).second;
+        SmallVector<logdata_block*, 0>::iterator itb;
         for (itb = bytes.begin(); itb != bytes.end(); itb++) {
             if (*itb) {
                 logdata_block &data = **itb;
                 for (int i = 0; i < logdata_blocksize; i++) {
                     if (data[i] > 0)
-                        data[i] = 1;
+                        data[i] = resetValue;
                 }
             }
         }
@@ -97,6 +96,18 @@ extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
     jl_gc_sync_total_bytes(0);
 }
 
+// Resets the malloc counts.
+extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+{
+    clear_log_data(mallocData, 1);
+}
+
+// Resets the code coverage
+extern "C" JL_DLLEXPORT void jl_clear_coverage_data(void)
+{
+    clear_log_data(coverageData, 0);
+}
+
 static void write_log_data(logdata_t &logData, const char *extension)
 {
     std::string base = std::string(jl_options.julia_bindir);
@@ -104,7 +115,7 @@ static void write_log_data(logdata_t &logData, const char *extension)
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         std::string filename(it->first());
-        std::vector<logdata_block*> &values = it->second;
+        SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
             if (!jl_isabspath(filename.c_str()))
                 filename = base + filename;
@@ -160,7 +171,7 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         StringRef filename = it->first();
-        const std::vector<logdata_block*> &values = it->second;
+        const SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
             outf << "SF:" << filename.str() << '\n';
             size_t n_covered = 0;
@@ -192,11 +203,11 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
     outf.close();
 }
 
-extern "C" void jl_write_coverage_data(const char *output)
+extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
 {
     if (output) {
         StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
+        if (output_pattern.ends_with(".info"))
             write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
     }
     else {
diff --git a/src/crc32c.c b/src/crc32c.c
index 4ca8db06459a1..50d2acc603359 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -1,15 +1,16 @@
 /* crc32c.c -- compute CRC-32C using software table or available hardware instructions
- * Copyright (C) 2013 Mark Adler
- * Version 1.1  1 Aug 2013  Mark Adler
+ * Copyright (C) 2013, 2021 Mark Adler
+ * Version 1.1  1 Aug 2013  Mark Adler, updates from Version 1.2 5 June 2021
  *
  * Code retrieved in August 2016 from August 2013 post by Mark Adler on
- *    http://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
+ *    https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
  * Modified for use in libjulia:
  *    - exported function renamed to jl_crc32c, DLL exports added.
  *    - removed main() function
  *    - architecture and compiler detection
  *    - precompute crc32c tables and store in a generated .c file
  *    - ARMv8 support
+ * Updated to incorporate upstream 2021 patch by Mark Adler to register constraints.
  */
 
 /*
@@ -39,6 +40,8 @@
 /* Version history:
    1.0  10 Feb 2013  First version
    1.1   1 Aug 2013  Correct comments on why three crc instructions in parallel
+   1.2   5 Jun 2021  Correct register constraints on assembly instructions
+                     (+ other changes that were superfluous for us)
 */
 
 #include "julia.h"
@@ -53,14 +56,9 @@
 #define POLY 0x82f63b78
 
 /* Block sizes for three-way parallel crc computation.  LONG and SHORT must
-   both be powers of two.  The associated string constants must be set
-   accordingly, for use in constructing the assembler instructions. */
+   both be powers of two. */
 #define LONG 8192
-#define LONGx1 "8192"
-#define LONGx2 "16384"
 #define SHORT 256
-#define SHORTx1 "256"
-#define SHORTx2 "512"
 
 #ifndef GEN_CRC32C_TABLES
 #include "crc32c-tables.c"
@@ -97,27 +95,27 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
     /* compute the crc for up to seven leading bytes to bring the data pointer
        to an eight-byte boundary */
     while (len && ((uintptr_t)buf & 7) != 0) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
 
-    /* compute the crc on sets of LONG*3 bytes, executing three independent crc
-       instructions, each on LONG bytes -- this is optimized for the Nehalem,
-       Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
-       throughput of one crc per cycle, but a latency of three cycles */
+    /* compute the crc on sets of LONG*3 bytes,
+       making use of three ALUs in parallel on a single core. */
     while (len >= LONG * 3) {
         uintptr_t crc1 = 0;
         uintptr_t crc2 = 0;
         const char *end = buf + LONG;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" LONGx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" LONGx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[LONG]),
+                      "m"(* (const uintptr_t *) &buf[LONG*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1;
@@ -133,11 +131,13 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
         uintptr_t crc2 = 0;
         const char *end = buf + SHORT;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" SHORTx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" SHORTx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[SHORT]),
+                      "m"(* (const uintptr_t *) &buf[SHORT*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1;
@@ -150,18 +150,18 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
        block */
     const char *end = buf + (len - (len & 7));
     while (buf < end) {
-        __asm__(CRC32_PTR "\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__(CRC32_PTR "\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(* (const uintptr_t *) buf));
         buf += sizeof(void*);
     }
     len &= 7;
 
     /* compute the crc for up to seven trailing bytes */
     while (len) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
@@ -178,6 +178,9 @@ JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len)
     return crc32c_sse42(crc, buf, len);
 }
 #  else
+#if defined(JL_CRC32C_USE_IFUNC) && defined(_COMPILER_CLANG_)
+JL_UNUSED
+#endif
 static crc32c_func_t crc32c_dispatch(void)
 {
     // When used in ifunc, we cannot call external functions (i.e. jl_cpuid)
diff --git a/src/datatype.c b/src/datatype.c
index 95c3b11c9abdc..fd25cca503676 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
+#include <stdalign.h>
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
@@ -19,23 +20,21 @@ extern "C" {
 
 // allocating TypeNames -----------------------------------------------------------
 
-static int is10digit(char c) JL_NOTSAFEPOINT
-{
-    return (c >= '0' && c <= '9');
-}
-
 static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 {
     char *n = jl_symbol_name(s);
     if (n[0] != '#')
         return s;
-    char *end = strrchr(n, '#');
+    char *end = strchr(&n[1], '#');
+    // handle `#f...##...#...`
+    if (end != NULL && end[1] == '#')
+        end = strchr(&end[2], '#');
     int32_t len;
-    if (end == n || end == n+1)
+    if (end == NULL || end == n+1)
         len = strlen(n) - 1;
     else
         len = (end-n) - 1;  // extract `f` from `#f#...`
-    if (is10digit(n[1]))
+    if (isdigit(n[1]) || is_canonicalized_anonfn_typename(n))
         return _jl_symbol(n, len+1);
     return _jl_symbol(&n[1], len);
 }
@@ -49,7 +48,7 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     mt->name = jl_demangle_typename(name);
     mt->module = module;
     jl_atomic_store_relaxed(&mt->defs, jl_nothing);
-    jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
     jl_atomic_store_relaxed(&mt->cache, jl_nothing);
     jl_atomic_store_relaxed(&mt->max_args, 0);
     mt->backedges = NULL;
@@ -82,6 +81,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->atomicfields = NULL;
     tn->constfields = NULL;
     tn->max_methods = 0;
+    tn->constprop_heustic = 0;
     return tn;
 }
 
@@ -134,10 +134,10 @@ static uint32_t _hash_layout_djb2(uintptr_t _layout, void *unused) JL_NOTSAFEPOI
     size_t own_size = sizeof(jl_datatype_layout_t);
     const char *fields = jl_dt_layout_fields(layout);
     assert(fields);
-    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->fielddesc_type);
+    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->flags.fielddesc_type);
     const char *pointers = jl_dt_layout_ptrs(layout);
     assert(pointers);
-    size_t pointers_size = (layout->npointers << layout->fielddesc_type);
+    size_t pointers_size = layout->first_ptr < 0 ? 0 : (layout->npointers << layout->flags.fielddesc_type);
 
     uint_t hash = 5381;
     hash = _hash_djb2(hash, (char *)layout, own_size);
@@ -155,12 +155,12 @@ static int layout_eq(void *_l1, void *_l2, void *unused) JL_NOTSAFEPOINT
         return 0;
     const char *f1 = jl_dt_layout_fields(l1);
     const char *f2 = jl_dt_layout_fields(l2);
-    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->fielddesc_type);
+    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->flags.fielddesc_type);
     if (memcmp(f1, f2, fields_size))
         return 0;
     const char *p1 = jl_dt_layout_ptrs(l1);
     const char *p2 = jl_dt_layout_ptrs(l2);
-    size_t pointers_size = (l1->npointers << l1->fielddesc_type);
+    size_t pointers_size = l1->first_ptr < 0 ? 0 : (l1->npointers << l1->flags.fielddesc_type);
     if (memcmp(p1, p2, pointers_size))
         return 0;
     return 1;
@@ -179,6 +179,8 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
                                            uint32_t npointers,
                                            uint32_t alignment,
                                            int haspadding,
+                                           int isbitsegal,
+                                           int arrayelem,
                                            jl_fielddesc32_t desc[],
                                            uint32_t pointers[]) JL_NOTSAFEPOINT
 {
@@ -186,32 +188,34 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
 
     // compute the smallest fielddesc type that can hold the layout description
     int fielddesc_type = 0;
+    uint32_t max_size = 0;
+    uint32_t max_offset = 0;
     if (nfields > 0) {
-        uint32_t max_size = 0;
-        uint32_t max_offset = desc[nfields - 1].offset;
-        if (npointers > 0 && pointers[npointers - 1] > max_offset)
-            max_offset = pointers[npointers - 1];
+        max_offset = desc[nfields - 1].offset;
         for (size_t i = 0; i < nfields; i++) {
             if (desc[i].size > max_size)
                 max_size = desc[i].size;
         }
-        jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
-        jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
-        jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
-        if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
-            fielddesc_type = 1;
-            if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
-                fielddesc_type = 2;
-                if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
-                    assert(0); // should have been verified by caller
-                }
+    }
+    if (npointers > 0 && pointers[npointers - 1] > max_offset)
+        max_offset = pointers[npointers - 1];
+    jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
+    jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
+    jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
+    if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
+        fielddesc_type = 1;
+        if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
+            fielddesc_type = 2;
+            if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
+                assert(0); // should have been verified by caller
             }
         }
     }
+    int32_t first_ptr = (npointers > 0 ? (int32_t)pointers[0] : -1);
 
     // allocate a new descriptor, on the stack if possible.
     size_t fields_size = nfields * jl_fielddesc_size(fielddesc_type);
-    size_t pointers_size = (npointers << fielddesc_type);
+    size_t pointers_size = first_ptr < 0 ? 0 : (npointers << fielddesc_type);
     size_t flddesc_sz = sizeof(jl_datatype_layout_t) + fields_size + pointers_size;
     int should_malloc = flddesc_sz >= jl_page_size;
     jl_datatype_layout_t *mallocmem = (jl_datatype_layout_t *)(should_malloc ? malloc(flddesc_sz) : NULL);
@@ -221,11 +225,14 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
     flddesc->size = sz;
     flddesc->nfields = nfields;
     flddesc->alignment = alignment;
-    flddesc->haspadding = haspadding;
-    flddesc->fielddesc_type = fielddesc_type;
-    flddesc->padding = 0;
+    flddesc->flags.haspadding = haspadding;
+    flddesc->flags.isbitsegal = isbitsegal;
+    flddesc->flags.fielddesc_type = fielddesc_type;
+    flddesc->flags.arrayelem_isboxed = arrayelem == 1;
+    flddesc->flags.arrayelem_isunion = arrayelem == 2;
+    flddesc->flags.padding = 0;
     flddesc->npointers = npointers;
-    flddesc->first_ptr = (npointers > 0 ? pointers[0] : -1);
+    flddesc->first_ptr = first_ptr;
 
     // fill out the fields of the new descriptor
     jl_fielddesc8_t *desc8 = (jl_fielddesc8_t *)jl_dt_layout_fields(flddesc);
@@ -248,18 +255,20 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
             desc32[i].isptr = desc[i].isptr;
         }
     }
-    uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
-    uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
-    uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
-    for (size_t i = 0; i < npointers; i++) {
-        if (fielddesc_type == 0) {
-            ptrs8[i] = pointers[i];
-        }
-        else if (fielddesc_type == 1) {
-            ptrs16[i] = pointers[i];
-        }
-        else {
-            ptrs32[i] = pointers[i];
+    if (first_ptr >= 0) {
+        uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
+        uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
+        uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
+        for (size_t i = 0; i < npointers; i++) {
+            if (fielddesc_type == 0) {
+                ptrs8[i] = pointers[i];
+            }
+            else if (fielddesc_type == 1) {
+                ptrs16[i] = pointers[i];
+            }
+            else {
+                ptrs32[i] = pointers[i];
+            }
         }
     }
 
@@ -318,23 +327,23 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
 
 STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (!d->name->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
-            d->isconcretetype && !d->name->mutabl);
+    return d->isconcretetype && jl_datatype_size(d) == 0 && d->layout->npointers == 0 && !d->name->mutabl; // implies jl_is_layout_opaque
 }
 
 STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
+    // It's possible for st to already have an ->instance if it was redefined
+    if (st->instance)
+        return;
     if (jl_is_datatype_make_singleton(st)) {
-        // It's possible for st to already have an ->instance if it was redefined
-        if (!st->instance)
-            st->instance = jl_gc_permobj(0, st);
+        st->instance = jl_gc_permobj(0, st);
     }
 }
 
 // return whether all concrete subtypes of this type have the same layout
 int jl_struct_try_layout(jl_datatype_t *dt)
 {
-    if (dt->layout)
+    if (dt->layout || jl_is_genericmemory_type(dt))
         return 1;
     else if (!jl_has_fixed_layout(dt))
         return 0;
@@ -346,13 +355,15 @@ int jl_struct_try_layout(jl_datatype_t *dt)
 
 int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
 {
+    if (jl_typeofbottom_type && ty == jl_typeofbottom_type->super)
+        ty = jl_typeofbottom_type;
     if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
         if (ty->layout->npointers > 0) {
             if (pointerfree)
                 return 0;
             if (ty->name->n_uninitialized != 0)
                 return 0;
-            if (ty->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
+            if (ty->layout->flags.fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
                 return 0;
         }
         return 1;
@@ -478,6 +489,102 @@ static int is_type_identityfree(jl_value_t *t)
     return 0;
 }
 
+// make a copy of the layout of st, but with nfields=0
+void jl_get_genericmemory_layout(jl_datatype_t *st)
+{
+    jl_value_t *kind = jl_tparam0(st);
+    jl_value_t *eltype = jl_tparam1(st);
+    jl_value_t *addrspace = jl_tparam2(st);
+    if (!jl_is_typevar(eltype) && !jl_is_type(eltype)) {
+        // this is expected to have a layout, but since it is not constructable, we don't care too much what it is
+        static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), {0}};
+        st->layout = &opaque_ptr_layout;
+        st->has_concrete_subtype = 0;
+        return;
+    }
+
+    size_t elsz = 0, al = 1;
+    int isunboxed = jl_islayout_inline(eltype, &elsz, &al) && (kind != (jl_value_t*)jl_atomic_sym || jl_is_datatype(eltype));
+    int isunion = isunboxed && jl_is_uniontype(eltype);
+    int haspadding = 1; // we may want to eventually actually compute this more precisely
+    int isbitsegal = 0;
+    int nfields = 0; // aka jl_is_layout_opaque
+    int npointers = 1;
+    int zi;
+    uint32_t first_ptr = -1;
+    uint32_t *pointers = &first_ptr;
+    int needlock = 0;
+
+    if (isunboxed) {
+        elsz = LLT_ALIGN(elsz, al);
+        if (kind == (jl_value_t*)jl_atomic_sym) {
+            if (elsz > MAX_ATOMIC_SIZE)
+                needlock = 1;
+            else if (elsz > 0)
+                al = elsz = next_power_of_two(elsz);
+        }
+        if (isunion) {
+            zi = 1;
+        }
+        else {
+            assert(jl_is_datatype(eltype));
+            zi = ((jl_datatype_t*)eltype)->zeroinit;
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)eltype)->layout;
+            if (layout->first_ptr >= 0) {
+                first_ptr = layout->first_ptr;
+                npointers = layout->npointers;
+                if (layout->flags.fielddesc_type == 2) {
+                    pointers = (uint32_t*)jl_dt_layout_ptrs(layout);
+                }
+                else {
+                    pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t));
+                    for (int j = 0; j < npointers; j++) {
+                        pointers[j] = jl_ptr_offset((jl_datatype_t*)eltype, j);
+                    }
+                }
+            }
+        }
+        if (needlock) {
+            assert(al <= JL_SMALL_BYTE_ALIGNMENT);
+            size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
+            elsz += offset;
+            haspadding = 1;
+            zi = 1;
+        }
+    }
+    else {
+        elsz = sizeof(void*);
+        al = elsz;
+        zi = 1;
+    }
+
+    int arrayelem;
+    if (!isunboxed)
+        arrayelem = 1;
+    else if (isunion)
+        arrayelem = 2;
+    else
+        arrayelem = 0;
+    assert(!st->layout);
+    st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers);
+    st->zeroinit = zi;
+    //st->has_concrete_subtype = 1;
+    //st->isbitstype = 0;
+    //st->ismutationfree = 0;
+    //st->isidentityfree = 0;
+
+    if (jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+        if (kind == (jl_value_t*)jl_not_atomic_sym || kind == (jl_value_t*)jl_atomic_sym) {
+            jl_genericmemory_t *zeroinst = (jl_genericmemory_t*)jl_gc_permobj(LLT_ALIGN(sizeof(jl_genericmemory_t), JL_SMALL_BYTE_ALIGNMENT) + (elsz ? elsz : isunion), st);
+            zeroinst->length = 0;
+            zeroinst->ptr = (char*)zeroinst + JL_SMALL_BYTE_ALIGNMENT;
+            memset(zeroinst->ptr, 0, elsz ? elsz : isunion);
+            assert(!st->instance);
+            st->instance = (jl_value_t*)zeroinst;
+        }
+    }
+}
+
 void jl_compute_field_offsets(jl_datatype_t *st)
 {
     const uint64_t max_offset = (((uint64_t)1) << 32) - 1;
@@ -492,6 +599,10 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->zeroinit = 0;
         st->has_concrete_subtype = 1;
     }
+    if (st->name == jl_genericmemory_typename) {
+        jl_get_genericmemory_layout(st);
+        return;
+    }
     int isbitstype = st->isconcretetype && st->name->mayinlinealloc;
     int ismutationfree = !w->layout || !jl_is_layout_opaque(w->layout);
     int isidentityfree = !st->name->mutabl;
@@ -501,7 +612,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->layout = w->layout;
         st->zeroinit = w->zeroinit;
         st->has_concrete_subtype = w->has_concrete_subtype;
-        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
+        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_simplevector_type
             st->isbitstype = isbitstype && st->layout->npointers == 0;
             jl_maybe_allocate_singleton_instance(st);
         }
@@ -514,18 +625,17 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
-            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, 0, 0};
+            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &opaque_byte_layout;
             return;
         }
-        else if (st == jl_simplevector_type || st == jl_module_type || st->name == jl_array_typename) {
-            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), 0, 0};
+        else if (st == jl_simplevector_type || st == jl_module_type) {
+            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &opaque_ptr_layout;
             return;
         }
         else {
-            // reuse the same layout for all singletons
-            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, 0, 0};
+            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }};
             st->layout = &singleton_layout;
         }
     }
@@ -566,6 +676,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         size_t alignm = 1;
         int zeroinit = 0;
         int haspadding = 0;
+        int isbitsegal = 1;
         int homogeneous = 1;
         int needlock = 0;
         uint32_t npointers = 0;
@@ -580,19 +691,30 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     throw_ovf(should_malloc, desc, st, fsz);
                 desc[i].isptr = 0;
                 if (jl_is_uniontype(fld)) {
-                    haspadding = 1;
                     fsz += 1; // selector byte
                     zeroinit = 1;
+                    // TODO: Some unions could be bits comparable.
+                    isbitsegal = 0;
                 }
                 else {
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
-                    if (((jl_datatype_t*)fld)->layout->haspadding)
+                    if (((jl_datatype_t*)fld)->layout->flags.haspadding)
                         haspadding = 1;
+                    if (!((jl_datatype_t*)fld)->layout->flags.isbitsegal)
+                        isbitsegal = 0;
                     if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
-                        // field may be undef (may be uninitialized and contains pointer),
-                        // and contains non-pointer fields of non-zero sizes.
-                        haspadding = 1;
+                        // For field types that contain pointers, we allow inlinealloc
+                        // as long as the field type itself is always fully initialized.
+                        // In such a case, we use the first pointer in the inlined field
+                        // as the #undef marker (if it is zero, we treat the whole inline
+                        // struct as #undef). However, we do not zero-initialize the whole
+                        // struct, so the non-pointer parts of the inline allocation may
+                        // be arbitrary, but still need to compare egal (because all #undef)
+                        // representations are egal. Because of this, we cannot bitscompare
+                        // them.
+                        // TODO: Consider zero-initializing the whole struct.
+                        isbitsegal = 0;
                     }
                     if (!zeroinit)
                         zeroinit = ((jl_datatype_t*)fld)->zeroinit;
@@ -608,8 +730,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 zeroinit = 1;
                 npointers++;
                 if (!jl_pointer_egal(fld)) {
-                    // this somewhat poorly named flag says whether some of the bits can be non-unique
-                    haspadding = 1;
+                    isbitsegal = 0;
                 }
             }
             if (isatomic && fsz > MAX_ATOMIC_SIZE)
@@ -648,6 +769,8 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             if (al > alignm)
                 alignm = al;
         }
+        if (alignm > MAX_ALIGN)
+            alignm = MAX_ALIGN; // We cannot guarantee alignments over 16 bytes because that's what our heap is aligned as
         if (LLT_ALIGN(sz, alignm) > sz) {
             haspadding = 1;
             sz = LLT_ALIGN(sz, alignm);
@@ -670,7 +793,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             }
         }
         assert(ptr_i == npointers);
-        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, desc, pointers);
+        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, isbitsegal, 0, desc, pointers);
         if (should_malloc) {
             free(desc);
             if (npointers)
@@ -687,14 +810,6 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     return;
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && is10digit(other[1]);
-}
-
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
         jl_sym_t *name,
         jl_module_t *module,
@@ -710,7 +825,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_typename_t *tn = NULL;
     JL_GC_PUSH2(&t, &tn);
 
-    assert(parameters);
+    assert(parameters && fnames);
 
     // init enough before possibly calling jl_new_typename_in
     t = jl_new_uninitialized_datatype();
@@ -823,6 +938,10 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
+# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_)
+    if (alignm == 8)
+        alignm = 4;
+# endif
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
     // memoize isprimitivetype, since it is much easier than checking
@@ -832,7 +951,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
     bt->ismutationfree = 1;
     bt->isidentityfree = 1;
     bt->isbitstype = (parameters == jl_emptysvec);
-    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, NULL, NULL);
+    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, 1, 0, NULL, NULL);
     bt->instance = NULL;
     return bt;
 }
@@ -853,10 +972,13 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     layout->size = large ? GC_MAX_SZCLASS+1 : 0;
     layout->nfields = 0;
     layout->alignment = sizeof(void *);
-    layout->haspadding = 1;
     layout->npointers = haspointers;
-    layout->fielddesc_type = 3;
-    layout->padding = 0;
+    layout->flags.haspadding = 1;
+    layout->flags.isbitsegal = 0;
+    layout->flags.fielddesc_type = 3;
+    layout->flags.padding = 0;
+    layout->flags.arrayelem_isboxed = 0;
+    layout->flags.arrayelem_isunion = 0;
     jl_fielddescdyn_t * desc =
       (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
     desc->markfunc = markfunc;
@@ -884,7 +1006,7 @@ JL_DLLEXPORT int jl_reinit_foreign_type(jl_datatype_t *dt,
 
 JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 {
-    return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3;
+    return jl_is_datatype(dt) && dt->layout && dt->layout->flags.fielddesc_type == 3;
 }
 
 // bits constructors ----------------------------------------------------------
@@ -904,7 +1026,7 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
 
 #if MAX_POINTERATOMIC_SIZE >= 16
 typedef struct _jl_uint128_t {
-    uint64_t a;
+    alignas(16) uint64_t a;
     uint64_t b;
 } jl_uint128_t;
 #endif
@@ -968,6 +1090,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
     assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    // TODO: make this a memmove_refs if relevant
     memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
     return v;
 }
@@ -1122,13 +1245,10 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
     return success;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-allocated output */, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
 {
     // dst must have the required alignment for an atomic of the given size
     // n.b.: this does not spuriously fail if there are padding bits
-    jl_task_t *ct = jl_current_task;
-    int isptr = jl_field_isptr(rettyp, 0);
-    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : jl_datatype_size(rettyp), isptr ? dt : rettyp);
     int success;
     jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
     if (nb == 0) {
@@ -1136,7 +1256,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 1) {
         uint8_t *y8 = (uint8_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
@@ -1149,7 +1269,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 2) {
         uint16_t *y16 = (uint16_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
@@ -1167,7 +1287,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1184,7 +1304,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1202,7 +1322,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1215,28 +1335,54 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     else {
         abort();
     }
-    if (isptr) {
-        JL_GC_PUSH1(&y);
-        jl_value_t *z = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
-        *(jl_value_t**)z = y;
-        JL_GC_POP();
-        y = z;
-        nb = sizeof(jl_value_t*);
-    }
-    *((uint8_t*)y + nb) = success ? 1 : 0;
-    return y;
+    return success;
 }
 
-// used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_datatype_t *bt)
+JL_DLLEXPORT int jl_atomic_storeonce_bits(jl_datatype_t *dt, char *dst, const jl_value_t *src, int nb)
 {
-    uint64_t data = 0xffffffffffffffffULL;
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
-    if (bt->smalltag)
-        jl_set_typetagof(v, bt->smalltag, 0);
-    memcpy(v, &data, sizeof(size_t));
-    return v;
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this does not spuriously fail
+    // n.b.: hasptr == 1 therefore nb >= sizeof(void*), because ((jl_datatype_t*)ty)->layout->has_ptr >= 0
+    int success;
+#ifdef _P64
+    if (nb <= 4) {
+        uint32_t y32 = 0;
+        uint32_t z32 = zext_read32(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, &y32, z32);
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t y64 = 0;
+        uint64_t z64 = zext_read64(src, nb);
+        while (1) {
+            success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y64, z64);
+            if (success || undefref_check(dt, (jl_value_t*)&y64) != NULL)
+                break;
+        }
+    }
+#endif
+#else
+    if (nb <= 8) {
+        uint64_t y64 = 0;
+        uint64_t z64 = zext_read64(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y64, z64);
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        jl_uint128_t y128 = {0};
+        jl_uint128_t z128 = zext_read128(src, nb);
+        while (1) {
+            success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y128, z128);
+            if (success || undefref_check(dt, (jl_value_t*)&y128) != NULL)
+                break;
+        }
+    }
+#endif
+    else {
+        abort();
+    }
+    return success;
 }
 
 #define PERMBOXN_FUNC(nb)                                               \
@@ -1359,6 +1505,9 @@ void jl_init_int32_int64_cache(void)
     for(i=0; i < NBOX_C; i++) {
         boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, jl_int32_tag, i-NBOX_C/2);
         boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, jl_int64_tag, i-NBOX_C/2);
+        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
+        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
+        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
 #ifdef _P64
         boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, 0, i);
         boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, 0, i);
@@ -1383,9 +1532,6 @@ void jl_init_box_caches(void)
     }
     for (i = 0; i < NBOX_C; i++) {
         boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, jl_int16_tag, i-NBOX_C/2);
-        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
-        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
-        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
     }
 }
 
@@ -1401,10 +1547,11 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
+    if (type->instance != NULL)
+        return type->instance;
     va_list args;
     size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
@@ -1424,7 +1571,7 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
     jl_task_t *ct = jl_current_task;
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     size_t nf = jl_datatype_nfields(type);
@@ -1463,7 +1610,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
-    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL)
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout))
         jl_type_error("new", (jl_value_t *)jl_datatype_type, (jl_value_t *)type);
     size_t nargs = jl_nfields(tup);
     size_t nf = jl_datatype_nfields(type);
@@ -1510,10 +1657,13 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
-    if (!jl_is_datatype(type) || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        if (type == jl_typeofbottom_type->super)
+            return jl_bottom_type; // ::Type{Union{}} is an abstract type, but is also a singleton when used as a field type
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
+    if (type->instance != NULL)
+        return type->instance;
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
     if (type->smalltag) // TODO: do we need this?
@@ -1525,14 +1675,51 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 
 // field access ---------------------------------------------------------------
 
-JL_DLLEXPORT void jl_lock_value(jl_value_t *v) JL_NOTSAFEPOINT
+// TODO(jwn): these lock/unlock pairs must be full seq-cst fences
+JL_DLLEXPORT void jl_lock_value(jl_mutex_t *v) JL_NOTSAFEPOINT
+{
+    JL_LOCK_NOGC(v);
+}
+
+JL_DLLEXPORT void jl_unlock_value(jl_mutex_t *v) JL_NOTSAFEPOINT
 {
-    JL_LOCK_NOGC((jl_mutex_t*)v);
+    JL_UNLOCK_NOGC(v);
 }
 
-JL_DLLEXPORT void jl_unlock_value(jl_value_t *v) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_lock_field(jl_mutex_t *v) JL_NOTSAFEPOINT
 {
-    JL_UNLOCK_NOGC((jl_mutex_t*)v);
+    JL_LOCK_NOGC(v);
+}
+
+JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT
+{
+    JL_UNLOCK_NOGC(v);
+}
+
+static inline char *lock(char *p, jl_value_t *parent, int needlock, enum atomic_kind isatomic) JL_NOTSAFEPOINT
+{
+    if (needlock) {
+        if (isatomic == isatomic_object) {
+            jl_lock_value((jl_mutex_t*)parent);
+        }
+        else {
+            jl_lock_field((jl_mutex_t*)p);
+            return p + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT);
+        }
+    }
+    return p;
+}
+
+static inline void unlock(char *p, jl_value_t *parent, int needlock, enum atomic_kind isatomic) JL_NOTSAFEPOINT
+{
+    if (needlock) {
+        if (isatomic == isatomic_object) {
+            jl_unlock_value((jl_mutex_t*)parent);
+        }
+        else {
+            jl_unlock_field((jl_mutex_t*)p);
+        }
+    }
 }
 
 JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
@@ -1558,8 +1745,7 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
         }
     }
     if (err)
-        jl_errorf("type %s has no field %s", jl_symbol_name(t->name->name),
-                  jl_symbol_name(fld));
+        jl_has_no_field_error(t, fld);
     return -1;
 }
 
@@ -1591,11 +1777,12 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
     else if (needlock) {
         jl_task_t *ct = jl_current_task;
         r = jl_gc_alloc(ct->ptls, fsz, ty);
-        jl_lock_value(v);
+        jl_lock_value((jl_mutex_t*)v);
         memcpy((char*)r, (char*)v + offs, fsz);
-        jl_unlock_value(v);
+        jl_unlock_value((jl_mutex_t*)v);
     }
     else {
+        // TODO: a finalizer here could make the isunion case not quite right
         r = jl_new_bits(ty, (char*)v + offs);
     }
     return undefref_check((jl_datatype_t*)ty, r);
@@ -1618,30 +1805,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
     return r;
 }
 
-static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
-{
-    if (hasptr) {
-        // assert that although dst might have some undefined bits, the src heap box should be okay with that
-        assert(LLT_ALIGN(nb, sizeof(void*)) == LLT_ALIGN(jl_datatype_size(jl_typeof(src)), sizeof(void*)));
-        size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void**)src, nptr);
-        jl_gc_multi_wb(parent, src);
-        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
-        dst = dst + nptr * sizeof(void*);
-        nb -= nptr * sizeof(void*);
-    }
-    else {
-        // src must be a heap box.
-        assert(nb == jl_datatype_size(jl_typeof(src)));
-        if (nb >= 16) {
-            memcpy(dst, jl_assume_aligned(src, 16), nb);
-            return;
-        }
-    }
-    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
-}
-
-void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
+inline void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
 {
     size_t offs = jl_field_offset(st, i);
     if (rhs == NULL) { // TODO: this should be invalid, but it happens frequently in ircode.c
@@ -1670,26 +1834,77 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs,
             hasptr = 0;
         }
         else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
         }
         size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        assert(!isatomic || jl_typeis(rhs, ty));
         int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
         if (isatomic && !needlock) {
             jl_atomic_store_bits((char*)v + offs, rhs, fsz);
-            if (hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
         }
         else if (needlock) {
-            jl_lock_value(v);
+            jl_lock_value((jl_mutex_t*)v);
             memcpy((char*)v + offs, (char*)rhs, fsz);
-            jl_unlock_value(v);
+            jl_unlock_value((jl_mutex_t*)v);
         }
         else {
-            memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            memassign_safe(hasptr, (char*)v + offs, rhs, fsz);
         }
+        if (hasptr)
+            jl_gc_multi_wb(v, rhs); // rhs is immutable
     }
 }
 
+inline jl_value_t *swap_bits(jl_value_t *ty, char *v, uint8_t *psel, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    jl_value_t *rty = jl_typeof(rhs);
+    int hasptr;
+    int isunion = psel != NULL;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+    }
+    else {
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+    }
+    size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    assert(!isatomic || jl_typeis(rhs, ty));
+    jl_value_t *r;
+    if (isatomic && !needlock) {
+        r = jl_atomic_swap_bits(rty, v, rhs, fsz);
+    }
+    else {
+        if (needlock) {
+            jl_task_t *ct = jl_current_task;
+            r = jl_gc_alloc(ct->ptls, fsz, ty);
+            char *px = lock(v, parent, needlock, isatomic);
+            memcpy((char*)r, px, fsz);
+            memcpy(px, (char*)rhs, fsz);
+            unlock(v, parent, needlock, isatomic);
+        }
+        else {
+            r = jl_new_bits(isunion ? jl_nth_union_component(ty, *psel) : ty, v);
+            if (isunion) {
+                unsigned nth = 0;
+                if (!jl_find_union_component(ty, rty, &nth))
+                    assert(0 && "invalid field assignment to isbits union");
+                *psel = nth;
+                if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                    return r;
+            }
+            memassign_safe(hasptr, v, rhs, fsz);
+        }
+    }
+    if (!isunion)
+        r = undefref_check((jl_datatype_t*)ty, r);
+    if (hasptr)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
 jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
 {
     jl_value_t *ty = jl_field_type_concrete(st, i);
@@ -1697,138 +1912,139 @@ jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_
        jl_type_error("swapfield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
     jl_value_t *r;
+    char *p = (char*)v + offs;
     if (jl_field_isptr(st, i)) {
         if (isatomic)
-            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)p, rhs);
         else
-            r = jl_atomic_exchange_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+            r = jl_atomic_exchange_release((_Atomic(jl_value_t*)*)p, rhs);
         jl_gc_wb(v, rhs);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        return r;
     }
     else {
-        jl_value_t *rty = jl_typeof(rhs);
-        int hasptr;
-        int isunion = jl_is_uniontype(ty);
-        if (isunion) {
-            assert(!isatomic);
-            r = jl_get_nth_field(v, i);
-            size_t fsz = jl_field_size(st, i);
-            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
-            unsigned nth = 0;
-            if (!jl_find_union_component(ty, rty, &nth))
-                assert(0 && "invalid field assignment to isbits union");
-            *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
-                return r;
-            hasptr = 0;
-        }
-        else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-        }
-        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-        if (isatomic && !needlock) {
-            r = jl_atomic_swap_bits(rty, (char*)v + offs, rhs, fsz);
-            if (hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        uint8_t *psel = jl_is_uniontype(ty) ? (uint8_t*)&p[jl_field_size(st, i) - 1] : NULL;
+        return swap_bits(ty, p, psel, v, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+}
+
+inline jl_value_t *modify_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name)
+{
+    jl_value_t *r = isatomic ? jl_atomic_load(p) : jl_atomic_load_relaxed(p);
+    if (__unlikely(r == NULL)) {
+        if (mod && name)
+            jl_undefined_var_error(name, (jl_value_t*)mod);
+        jl_throw(jl_undefref_exception);
+    }
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    args[0] = r;
+    while (1) {
+        args[1] = rhs;
+        jl_value_t *y = jl_apply_generic(op, args, 2);
+        args[1] = y;
+        if (!jl_isa(y, ty)) {
+            if (mod && name)
+                jl_errorf("cannot assign an incompatible value to the global %s.%s.", jl_symbol_name(mod->name), jl_symbol_name(name));
+            jl_type_error(jl_is_genericmemory(parent) ? "memoryrefmodify!" : "modifyfield!", ty, y);
         }
-        else {
-            if (needlock) {
-                jl_task_t *ct = jl_current_task;
-                r = jl_gc_alloc(ct->ptls, fsz, ty);
-                jl_lock_value(v);
-                memcpy((char*)r, (char*)v + offs, fsz);
-                memcpy((char*)v + offs, (char*)rhs, fsz);
-                jl_unlock_value(v);
-            }
-            else {
-                if (!isunion)
-                    r = jl_new_bits(ty, (char*)v + offs);
-                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
-            }
-            if (needlock || !isunion)
-                r = undefref_check((jl_datatype_t*)ty, r);
+        if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_release(p, &r, y)) {
+            jl_gc_wb(parent, y);
+            break;
         }
+        args[0] = r;
+        jl_gc_safepoint();
     }
-    if (__unlikely(r == NULL))
-        jl_throw(jl_undefref_exception);
-    return r;
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
 }
 
-jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
+inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, enum atomic_kind isatomic)
 {
-    size_t offs = jl_field_offset(st, i);
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    jl_value_t *r = jl_get_nth_field_checked(v, i);
-    if (isatomic && jl_field_isptr(st, i))
-        jl_fence(); // load was previously only relaxed
+    int hasptr;
+    int isunion = psel != NULL;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+    }
+    else {
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+    }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 2);
-    args[0] = r;
     while (1) {
+        jl_value_t *r;
+        jl_value_t *rty = isunion ? jl_nth_union_component(ty, *psel) : ty;
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the initial copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            r = jl_atomic_new_bits(rty, p);
+        }
+        else if (needlock) {
+            jl_task_t *ct = jl_current_task;
+            r = jl_gc_alloc(ct->ptls, fsz, rty);
+            char *px = lock(p, parent, needlock, isatomic);
+            memcpy((char*)r, px, fsz);
+            unlock(p, parent, needlock, isatomic);
+        }
+        else {
+            r = jl_new_bits(rty, p);
+        }
+        r = undefref_check((jl_datatype_t*)rty, r);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        args[0] = r;
         args[1] = rhs;
         jl_value_t *y = jl_apply_generic(op, args, 2);
         args[1] = y;
-        if (!jl_isa(y, ty))
-            jl_type_error("modifyfield!", ty, y);
-        if (jl_field_isptr(st, i)) {
-            _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
-            if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y))
+        if (!jl_isa(y, ty)) {
+            jl_type_error(jl_is_genericmemory(parent) ? "memoryrefmodify!" : "modifyfield!", ty, y);
+        }
+        jl_value_t *yty = jl_typeof(y);
+        if (isatomic && !needlock) {
+            assert(yty == rty);
+            if (jl_atomic_bool_cmpswap_bits(p, r, y, fsz)) {
+                if (hasptr)
+                    jl_gc_multi_wb(parent, y); // y is immutable
                 break;
+            }
         }
         else {
-            jl_value_t *yty = jl_typeof(y);
-            jl_value_t *rty = jl_typeof(r);
-            int hasptr;
-            int isunion = jl_is_uniontype(ty);
-            if (isunion) {
-                assert(!isatomic);
-                hasptr = 0;
-            }
-            else {
-                hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-            }
-            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-            if (isatomic && !needlock) {
-                if (jl_atomic_bool_cmpswap_bits((char*)v + offs, r, y, fsz)) {
-                    if (hasptr)
-                        jl_gc_multi_wb(v, y); // y is immutable
-                    break;
-                }
-                r = jl_atomic_new_bits(ty, (char*)v + offs);
-            }
-            else {
-                if (needlock)
-                    jl_lock_value(v);
-                int success = memcmp((char*)v + offs, r, fsz) == 0;
-                if (success) {
-                    if (isunion) {
-                        size_t fsz = jl_field_size(st, i);
-                        uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
-                        success = (jl_typeof(r) == jl_nth_union_component(ty, *psel));
-                        if (success) {
-                            unsigned nth = 0;
-                            if (!jl_find_union_component(ty, yty, &nth))
-                                assert(0 && "invalid field assignment to isbits union");
-                            *psel = nth;
-                            if (jl_is_datatype_singleton((jl_datatype_t*)yty))
-                                break;
-                        }
-                        fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
-                    }
-                    else {
-                        assert(yty == ty && rty == ty);
+            char *px = lock(p, parent, needlock, isatomic);
+            int success = memcmp(px, (char*)r, fsz) == 0;
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
+                success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
+            if (success) {
+                if (isunion) {
+                    success = (rty == jl_nth_union_component(ty, *psel));
+                    if (success) {
+                        unsigned nth = 0;
+                        if (!jl_find_union_component(ty, yty, &nth))
+                            assert(0 && "invalid field assignment to isbits union");
+                        *psel = nth;
+                        if (jl_is_datatype_singleton((jl_datatype_t*)yty))
+                            break;
                     }
-                    memassign_safe(hasptr, v, (char*)v + offs, y, fsz);
+                    fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
                 }
-                if (needlock)
-                    jl_unlock_value(v);
-                if (success)
-                    break;
-                r = jl_get_nth_field(v, i);
+                else {
+                    assert(yty == ty && rty == ty);
+                }
+                memassign_safe(hasptr, px, y, fsz);
+            }
+            unlock(p, parent, needlock, isatomic);
+            if (success) {
+                if (hasptr)
+                    jl_gc_multi_wb(parent, y); // y is immutable
+                break;
             }
         }
-        args[0] = r;
         jl_gc_safepoint();
     }
     // args[0] == r (old)
@@ -1840,91 +2056,105 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
     return args[0];
 }
 
-jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
 {
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    if (!jl_isa(rhs, ty))
-        jl_type_error("replacefield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
-    jl_value_t *r = expected;
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        return modify_value(ty, (_Atomic(jl_value_t*)*)p, v, op, rhs, isatomic, NULL, NULL);
+    }
+    else {
+        uint8_t *psel = jl_is_uniontype(ty) ? (uint8_t*)&p[jl_field_size(st, i) - 1] : NULL;
+        return modify_bits(ty, p, psel, v, op, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+}
+
+inline jl_value_t *replace_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name)
+{
     jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
-    if (jl_field_isptr(st, i)) {
-        _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
-        int success;
-        while (1) {
-            success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs);
-            if (success)
-                jl_gc_wb(v, rhs);
-            if (__unlikely(r == NULL))
-                jl_throw(jl_undefref_exception);
-            if (success || !jl_egal(r, expected))
-                break;
+    jl_value_t *r = expected;
+    int success;
+    while (1) {
+        success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_release(p, &r, rhs);
+        if (success)
+            jl_gc_wb(parent, rhs);
+        if (__unlikely(r == NULL)) {
+            if (mod && name)
+                jl_undefined_var_error(name, (jl_value_t*)mod);
+            jl_throw(jl_undefref_exception);
         }
-        JL_GC_PUSH1(&r);
-        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
-        JL_GC_POP();
+        if (success || !jl_egal(r, expected))
+            break;
+    }
+    JL_GC_PUSH1(&r);
+    r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
+    JL_GC_POP();
+    return r;
+}
+
+inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    int hasptr;
+    int isunion = psel != NULL;
+    size_t fsz = jl_field_size(rettyp, 0);
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    assert(jl_field_offset(rettyp, 1) == fsz);
+    jl_value_t *rty = ty;
+    if (isunion) {
+        assert(!isatomic);
+        hasptr = 0;
+        isatomic = isatomic_none; // this makes GCC happy
     }
     else {
-        int hasptr;
-        int isunion = jl_is_uniontype(ty);
-        int needlock;
-        jl_value_t *rty = ty;
-        size_t fsz = jl_field_size(st, i);
-        if (isunion) {
-            assert(!isatomic);
-            hasptr = 0;
-            needlock = 0;
-            isatomic = 0; // this makes GCC happy
-        }
-        else {
-            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
-            fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
-        }
-        if (isatomic && !needlock) {
-            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
-            int success = *((uint8_t*)r + fsz);
-            if (success && hasptr)
-                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+        assert(jl_typeis(rhs, ty));
+    }
+    int success;
+    jl_task_t *ct = jl_current_task;
+    assert(!jl_field_isptr(rettyp, 0));
+    jl_value_t *r = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
+    if (isatomic && !needlock) {
+        size_t rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the compare
+        success = jl_atomic_cmpswap_bits((jl_datatype_t*)rty, r, p, expected, rhs, rsz);
+        *((uint8_t*)r + fsz) = success ? 1 : 0;
+    }
+    else {
+        char *px = lock(p, parent, needlock, isatomic);
+        if (isunion)
+            rty = jl_nth_union_component(rty, *psel);
+        size_t rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the compare
+        memcpy((char*)r, px, rsz); // copy field // TODO: make this a memmove_refs if relevant
+        if (isunion)
+            *((uint8_t*)r + fsz - 1) = *psel; // copy union bits
+        success = (rty == jl_typeof(expected));
+        if (success) {
+            success = memcmp((char*)r, (char*)expected, rsz) == 0;
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
+                success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
         }
-        else {
-            jl_task_t *ct = jl_current_task;
-            uint8_t *psel = NULL;
+        *((uint8_t*)r + fsz) = success ? 1 : 0;
+        if (success) {
+            jl_value_t *rty = jl_typeof(rhs);
             if (isunion) {
-                psel = &((uint8_t*)v)[offs + fsz - 1];
-                rty = jl_nth_union_component(rty, *psel);
+                rsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                unsigned nth = 0;
+                if (!jl_find_union_component(ty, rty, &nth))
+                    assert(0 && "invalid field assignment to isbits union");
+                *psel = nth;
+                if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                    return r;
             }
-            assert(!jl_field_isptr(rettyp, 0));
-            r = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), (jl_value_t*)rettyp);
-            int success = (rty == jl_typeof(expected));
-            if (needlock)
-                jl_lock_value(v);
-            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
-            if (success) {
-                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-                if (((jl_datatype_t*)rty)->layout->haspadding)
-                    success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
-                else
-                    success = memcmp((char*)r, (char*)expected, fsz) == 0;
-            }
-            *((uint8_t*)r + fsz) = success ? 1 : 0;
-            if (success) {
-                jl_value_t *rty = jl_typeof(rhs);
-                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-                if (isunion) {
-                    unsigned nth = 0;
-                    if (!jl_find_union_component(ty, rty, &nth))
-                        assert(0 && "invalid field assignment to isbits union");
-                    *psel = nth;
-                    if (jl_is_datatype_singleton((jl_datatype_t*)rty))
-                        return r;
-                }
-                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
-            }
-            if (needlock)
-                jl_unlock_value(v);
+            memassign_safe(hasptr, px, rhs, rsz);
         }
+        unlock(p, parent, needlock, isatomic);
+    }
+    if (success && hasptr)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    if (!isunion) {
         r = undefref_check((jl_datatype_t*)rty, r);
         if (__unlikely(r == NULL))
             jl_throw(jl_undefref_exception);
@@ -1932,6 +2162,74 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
     return r;
 }
 
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("replacefield!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        return replace_value(ty, (_Atomic(jl_value_t*)*)p, v, expected, rhs, isatomic, NULL, NULL);
+    }
+    else {
+        size_t fsz = jl_field_size(st, i);
+        int isunion = jl_is_uniontype(ty);
+        uint8_t *psel = isunion ? (uint8_t*)&p[fsz - 1] : NULL;
+        return replace_bits(ty, p, psel, v, expected, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+}
+
+inline int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic)
+{
+    size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+    assert(rty->layout->first_ptr >= 0);
+    int hasptr = 1;
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    int success;
+    if (isatomic && !needlock) {
+        success = jl_atomic_storeonce_bits(rty, p, rhs, fsz);
+    }
+    else {
+        char *px = lock(p, parent, needlock, isatomic);
+        success = undefref_check(rty, (jl_value_t*)px) == NULL;
+        if (success)
+            memassign_safe(hasptr, px, rhs, fsz);
+        unlock(p, parent, needlock, isatomic);
+    }
+    if (success)
+        jl_gc_multi_wb(parent, rhs); // rhs is immutable
+    return success;
+}
+
+int set_nth_fieldonce(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("setfieldonce!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    int success;
+    char *p = (char*)v + offs;
+    if (jl_field_isptr(st, i)) {
+        _Atomic(jl_value_t*) *px = (_Atomic(jl_value_t*)*)p;
+        jl_value_t *r = NULL;
+        success = isatomic ? jl_atomic_cmpswap(px, &r, rhs) : jl_atomic_cmpswap_release(px, &r, rhs);
+        if (success)
+            jl_gc_wb(v, rhs);
+    }
+    else {
+        int isunion = jl_is_uniontype(ty);
+        if (isunion)
+            return 0;
+        int hasptr = ((jl_datatype_t*)ty)->layout->first_ptr >= 0;
+        if (!hasptr)
+            return 0;
+        assert(ty == jl_typeof(rhs));
+        success = setonce_bits((jl_datatype_t*)ty, p, v, rhs, isatomic ? isatomic_object : isatomic_none);
+    }
+    return success;
+}
+
 JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
@@ -1964,6 +2262,39 @@ JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
     return jl_field_offset(ty, field - 1);
 }
 
+jl_value_t *get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
+    const jl_datatype_layout_t *ly = dt->layout;
+    uint32_t npointers = ly->npointers;
+    if (i >= npointers)
+        jl_bounds_error_int(v, i);
+    const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly);
+    const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly);
+    const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
+    uint32_t fld;
+    if (ly->flags.fielddesc_type == 0)
+        fld = ptrs8[i];
+    else if (ly->flags.fielddesc_type == 1)
+        fld = ptrs16[i];
+    else
+        fld = ptrs32[i];
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)(&((jl_value_t**)v)[fld]));
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_nth_pointer(jl_value_t *v, size_t i)
+{
+    jl_value_t *ptrf = get_nth_pointer(v, i);
+    if (__unlikely(ptrf == NULL))
+        jl_throw(jl_undefref_exception);
+    return ptrf;
+}
+
+JL_DLLEXPORT int jl_nth_pointer_isdefined(jl_value_t *v, size_t i)
+{
+    return get_nth_pointer(v, i) != NULL;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/debug-registry.h b/src/debug-registry.h
index bad59f205acb3..72189c60d3d40 100644
--- a/src/debug-registry.h
+++ b/src/debug-registry.h
@@ -14,7 +14,7 @@ typedef struct {
     int64_t slide;
 } objfileentry_t;
 
-// Central registry for resolving function addresses to `jl_method_instance_t`s and
+// Central registry for resolving function addresses to `jl_code_instance_t`s and
 // originating `ObjectFile`s (for the DWARF debug info).
 //
 // A global singleton instance is notified by the JIT whenever a new object is emitted,
@@ -32,7 +32,7 @@ class JITDebugInfoRegistry
             std::unique_lock<std::mutex> lock;
             CResourceT &resource;
 
-            Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {}
+            Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : lock(mutex), resource(resource) {}
             Lock(Lock &&) JL_NOTSAFEPOINT = default;
             Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default;
 
@@ -56,7 +56,7 @@ class JITDebugInfoRegistry
                 return resource;
             }
 
-            ~Lock() JL_NOTSAFEPOINT = default;
+            ~Lock() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
         };
     private:
 
@@ -68,28 +68,29 @@ class JITDebugInfoRegistry
 
         Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {}
 
-        LockT operator*() JL_NOTSAFEPOINT {
+        LockT operator*() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
             return LockT(mutex, resource);
         }
 
-        ConstLockT operator*() const JL_NOTSAFEPOINT {
+        ConstLockT operator*() const JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
             return ConstLockT(mutex, resource);
         }
 
-        ~Locked() JL_NOTSAFEPOINT = default;
+        ~Locked() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
     };
 
     struct image_info_t {
         uint64_t base;
         jl_image_fptrs_t fptrs;
-        jl_method_instance_t **fvars_linfo;
+        jl_code_instance_t **fvars_cinst;
         size_t fvars_n;
     };
 
     struct libc_frames_t {
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
-        std::atomic<void(*)(void*)> libc_register_frame_{nullptr};
-        std::atomic<void(*)(void*)> libc_deregister_frame_{nullptr};
+        typedef void (*frame_register_func)(void *) JL_NOTSAFEPOINT;
+        std::atomic<frame_register_func> libc_register_frame_{nullptr};
+        std::atomic<frame_register_func> libc_deregister_frame_{nullptr};
 
         void libc_register_frame(const char *Entry) JL_NOTSAFEPOINT;
 
@@ -98,22 +99,32 @@ class JITDebugInfoRegistry
     };
 private:
 
-    struct ObjectInfo {
-        const llvm::object::ObjectFile *object = nullptr;
-        size_t SectionSize = 0;
-        ptrdiff_t slide = 0;
-        llvm::object::SectionRef Section{};
-        llvm::DIContext *context = nullptr;
+    struct LazyObjectInfo {
+        SmallVector<uint8_t, 0> data;
+        size_t uncompressedsize;
+        std::unique_ptr<const llvm::object::ObjectFile> object;
+        std::unique_ptr<llvm::DIContext> context;
+        LazyObjectInfo() = delete;
+        ~LazyObjectInfo() JL_NOTSAFEPOINT = default;
+    };
+
+    struct SectionInfo {
+        LazyObjectInfo *object;
+        size_t SectionSize;
+        ptrdiff_t slide;
+        uint64_t SectionIndex;
+        SectionInfo() = delete;
+        ~SectionInfo() JL_NOTSAFEPOINT = default;
     };
 
     template<typename KeyT, typename ValT>
     using rev_map = std::map<KeyT, ValT, std::greater<KeyT>>;
 
-    typedef rev_map<size_t, ObjectInfo> objectmap_t;
+    typedef rev_map<size_t, SectionInfo> objectmap_t;
     typedef rev_map<uint64_t, objfileentry_t> objfilemap_t;
 
     objectmap_t objectmap{};
-    rev_map<size_t, std::pair<size_t, jl_method_instance_t *>> linfomap{};
+    rev_map<size_t, std::pair<size_t, jl_code_instance_t *>> cimap{};
 
     // Maintain a mapping of unrealized function names -> linfo objects
     // so that when we see it get emitted, we can add a link back to the linfo
@@ -134,10 +145,9 @@ class JITDebugInfoRegistry
     libc_frames_t libc_frames{};
 
     void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT;
-    jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT;
+    jl_code_instance_t *lookupCodeInstance(size_t pointer) JL_NOTSAFEPOINT;
     void registerJITObject(const llvm::object::ObjectFile &Object,
-                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress) JL_NOTSAFEPOINT;
+                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress) JL_NOTSAFEPOINT;
     objectmap_t& getObjectMap() JL_NOTSAFEPOINT;
     void add_image_info(image_info_t info) JL_NOTSAFEPOINT;
     bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT;
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 35e41fe657045..17e093cecb89a 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -7,6 +7,7 @@
 #include <llvm/DebugInfo/DWARF/DWARFContext.h>
 #include <llvm/Object/SymbolSize.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/MemoryBufferRef.h>
 #include <llvm/IR/Function.h>
 #include <llvm/ADT/StringRef.h>
 #include <llvm/ADT/StringMap.h>
@@ -57,7 +58,7 @@ extern "C" void __register_frame(void*) JL_NOTSAFEPOINT;
 extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT;
 
 template <typename callback>
-static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
+static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) JL_NOTSAFEPOINT
 {
     const char *P = EHFrameAddr;
     const char *End = P + EHFrameSize;
@@ -93,12 +94,12 @@ void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t
     (**codeinst_in_flight)[mangle(name, DL)] = codeinst;
 }
 
-jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer)
+jl_code_instance_t *JITDebugInfoRegistry::lookupCodeInstance(size_t pointer)
 {
     jl_lock_profile();
-    auto region = linfomap.lower_bound(pointer);
-    jl_method_instance_t *linfo = NULL;
-    if (region != linfomap.end() && pointer < region->first + region->second.first)
+    auto region = cimap.lower_bound(pointer);
+    jl_code_instance_t *linfo = NULL;
+    if (region != cimap.end() && pointer < region->first + region->second.first)
         linfo = region->second.second;
     jl_unlock_profile();
     return linfo;
@@ -161,8 +162,18 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT
 
 
 // --- storing and accessing source location metadata ---
-void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL)
+void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
 {
+    // Non-opaque-closure MethodInstances are considered globally rooted
+    // through their methods, but for OC, we need to create a global root
+    // here.
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) {
+        jl_task_t *ct = jl_current_task;
+        int8_t gc_state = jl_gc_unsafe_enter(ct->ptls);
+        jl_as_global_root((jl_value_t*)mi, 1);
+        jl_gc_unsafe_leave(ct->ptls, gc_state);
+    }
     getJITDebugRegistry().add_code_in_flight(name, codeinst, DL);
 }
 
@@ -222,11 +233,21 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
 #endif
 
 void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
-                        std::function<uint64_t(const StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress)
+                        std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
     object::section_iterator EndSection = Object.section_end();
 
+    bool anyfunctions = false;
+    for (const object::SymbolRef &sym_iter : Object.symbols()) {
+        object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
+        if (SymbolType != object::SymbolRef::ST_Function)
+            continue;
+        anyfunctions = true;
+        break;
+    }
+    if (!anyfunctions)
+        return;
+
 #ifdef _CPU_ARM_
     // ARM does not have/use .eh_frame
     uint64_t arm_exidx_addr = 0;
@@ -280,14 +301,13 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
 #if defined(_OS_WINDOWS_)
     uint64_t SectionAddrCheck = 0;
     uint64_t SectionLoadCheck = 0; (void)SectionLoadCheck;
-    uint64_t SectionWriteCheck = 0; (void)SectionWriteCheck;
     uint8_t *UnwindData = NULL;
 #if defined(_CPU_X86_64_)
     uint8_t *catchjmp = NULL;
     for (const object::SymbolRef &sym_iter : Object.symbols()) {
         StringRef sName = cantFail(sym_iter.getName());
-        if (sName.equals("__UnwindData") || sName.equals("__catchjmp")) {
-            uint64_t Addr = cantFail(sym_iter.getAddress());
+        if (sName == "__UnwindData" || sName == "__catchjmp") {
+            uint64_t Addr = cantFail(sym_iter.getAddress()); // offset into object (including section offset)
             auto Section = cantFail(sym_iter.getSection());
             assert(Section != EndSection && Section->isText());
             uint64_t SectionAddr = Section->getAddress();
@@ -299,44 +319,27 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
                         SectionLoadCheck == SectionLoadAddr);
             SectionAddrCheck = SectionAddr;
             SectionLoadCheck = SectionLoadAddr;
-            SectionWriteCheck = SectionLoadAddr;
-            if (lookupWriteAddress)
-                SectionWriteCheck = (uintptr_t)lookupWriteAddress((void*)SectionLoadAddr);
-            Addr += SectionWriteCheck - SectionLoadCheck;
-            if (sName.equals("__UnwindData")) {
+            Addr += SectionLoadAddr - SectionAddr;
+            if (sName == "__UnwindData") {
                 UnwindData = (uint8_t*)Addr;
             }
-            else if (sName.equals("__catchjmp")) {
+            else if (sName == "__catchjmp") {
                 catchjmp = (uint8_t*)Addr;
             }
         }
     }
     assert(catchjmp);
     assert(UnwindData);
-    assert(SectionAddrCheck);
     assert(SectionLoadCheck);
-    assert(!memcmp(catchjmp, "\0\0\0\0\0\0\0\0\0\0\0\0", 12) &&
-            !memcmp(UnwindData, "\0\0\0\0\0\0\0\0\0\0\0\0", 12));
-    catchjmp[0] = 0x48;
-    catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&__julia_personality]
-    *(uint64_t*)(&catchjmp[2]) = (uint64_t)&__julia_personality;
-    catchjmp[10] = 0xff;
-    catchjmp[11] = 0xe0; // jmp RAX
-    UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER
-    UnwindData[1] = 4;    // size of prolog (bytes)
-    UnwindData[2] = 2;    // count of unwind codes (slots)
-    UnwindData[3] = 0x05; // frame register (rbp) = rsp
-    UnwindData[4] = 4;    // second instruction
-    UnwindData[5] = 0x03; // mov RBP, RSP
-    UnwindData[6] = 1;    // first instruction
-    UnwindData[7] = 0x50; // push RBP
-    *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionWriteCheck); // relative location of catchjmp
-    UnwindData -= SectionWriteCheck - SectionLoadCheck;
 #endif // defined(_OS_X86_64_)
 #endif // defined(_OS_WINDOWS_)
 
+    SmallVector<uint8_t, 0> packed;
+    compression::zlib::compress(ArrayRef<uint8_t>((uint8_t*)Object.getData().data(), Object.getData().size()), packed, compression::zlib::DefaultCompression);
+    jl_jit_add_bytes(packed.size());
+    auto ObjectCopy = new LazyObjectInfo{packed, Object.getData().size()}; // intentionally leaked so that we don't need to ref-count it, intentionally copied so that we exact-size the allocation (since no shrink_to_fit function)
     auto symbols = object::computeSymbolSizes(Object);
-    bool first = true;
+    bool hassection = false;
     for (const auto &sym_size : symbols) {
         const object::SymbolRef &sym_iter = sym_size.first;
         object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
@@ -348,7 +351,7 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         uint64_t SectionAddr = Section->getAddress();
         StringRef secName = cantFail(Section->getName());
         uint64_t SectionLoadAddr = getLoadAddress(secName);
-        Addr -= SectionAddr - SectionLoadAddr;
+        Addr += SectionLoadAddr - SectionAddr;
         StringRef sName = cantFail(sym_iter.getName());
         uint64_t SectionSize = Section->getSize();
         size_t Size = sym_size.second;
@@ -374,25 +377,24 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         }
         jl_profile_atomic([&]() JL_NOTSAFEPOINT {
             if (codeinst)
-                linfomap[Addr] = std::make_pair(Size, codeinst->def);
-            if (first) {
-                objectmap[SectionLoadAddr] = {&Object,
-                    (size_t)SectionSize,
-                    (ptrdiff_t)(SectionAddr - SectionLoadAddr),
-                    *Section,
-                    nullptr,
-                    };
-                first = false;
-            }
+                cimap[Addr] = std::make_pair(Size, codeinst);
+            hassection = true;
+            objectmap.insert(std::pair{SectionLoadAddr, SectionInfo{
+                ObjectCopy,
+                (size_t)SectionSize,
+                (ptrdiff_t)(SectionAddr - SectionLoadAddr),
+                Section->getIndex()
+                }});
         });
     }
+    if (!hassection) // clang-sa demands that we do this to fool cplusplus.NewDeleteLeaks
+        delete ObjectCopy;
 }
 
 void jl_register_jit_object(const object::ObjectFile &Object,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress) JL_NOTSAFEPOINT
+                            std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
-    getJITDebugRegistry().registerJITObject(Object, getLoadAddress, lookupWriteAddress);
+    getJITDebugRegistry().registerJITObject(Object, getLoadAddress);
 }
 
 // TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols
@@ -503,7 +505,7 @@ static int lookup_pointer(
                 std::size_t semi_pos = func_name.find(';');
                 if (semi_pos != std::string::npos) {
                     func_name = func_name.substr(0, semi_pos);
-                    frame->linfo = NULL; // Looked up on Julia side
+                    frame->ci = NULL; // Looked up on Julia side
                 }
             }
         }
@@ -540,7 +542,7 @@ static int lookup_pointer(
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
 
 void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry) {
-    auto libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
+    frame_register_func libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
     if (!libc_register_frame_) {
         libc_register_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__register_frame");
         jl_atomic_store_release(&this->libc_register_frame_, libc_register_frame_);
@@ -553,7 +555,7 @@ void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry)
 }
 
 void JITDebugInfoRegistry::libc_frames_t::libc_deregister_frame(const char *Entry) {
-    auto libc_deregister_frame_ = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
+    frame_register_func libc_deregister_frame_  = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
     if (!libc_deregister_frame_) {
         libc_deregister_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__deregister_frame");
         jl_atomic_store_release(&this->libc_deregister_frame_, libc_deregister_frame_);
@@ -689,9 +691,9 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) JL_NOTSAFEPO
 }
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_register_fptrs_impl(uint64_t image_base, const jl_image_fptrs_t *fptrs,
-    jl_method_instance_t **linfos, size_t n)
+    jl_code_instance_t **cinfos, size_t n)
 {
-    getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, linfos, n});
+    getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, cinfos, n});
 }
 
 template<typename T>
@@ -1127,7 +1129,7 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     if (entry.obj)
         *Section = getModuleSectionForAddress(entry.obj, pointer + entry.slide);
     // Assume we only need base address for sysimg for now
-    if (!inimage || !image_info.fptrs.base)
+    if (!inimage || 0 == image_info.fptrs.nptrs)
         saddr = nullptr;
     get_function_name_and_base(*Section, pointer, entry.slide, inimage, saddr, name, untrusted_dladdr);
     return true;
@@ -1170,18 +1172,17 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
         JITDebugInfoRegistry::image_info_t image;
         bool inimage = getJITDebugRegistry().get_image_info(fbase, &image);
         if (isImage && saddr && inimage) {
-            intptr_t diff = (uintptr_t)saddr - (uintptr_t)image.fptrs.base;
             for (size_t i = 0; i < image.fptrs.nclones; i++) {
-                if (diff == image.fptrs.clone_offsets[i]) {
+                if (saddr == image.fptrs.clone_ptrs[i]) {
                     uint32_t idx = image.fptrs.clone_idxs[i] & jl_sysimg_val_mask;
                     if (idx < image.fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
-                        frame0->linfo = image.fvars_linfo[idx];
+                        frame0->ci = image.fvars_cinst[idx];
                     break;
                 }
             }
             for (size_t i = 0; i < image.fvars_n; i++) {
-                if (diff == image.fptrs.offsets[i]) {
-                    frame0->linfo = image.fvars_linfo[i];
+                if (saddr == image.fptrs.ptrs[i]) {
+                    frame0->ci = image.fvars_cinst[i];
                     break;
                 }
             }
@@ -1204,11 +1205,33 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
     auto fit = objmap.lower_bound(fptr);
     if (fit != objmap.end() && fptr < fit->first + fit->second.SectionSize) {
         *slide = fit->second.slide;
-        *Section = fit->second.Section;
-        if (context) {
-            if (fit->second.context == nullptr)
-                fit->second.context = DWARFContext::create(*fit->second.object).release();
-            *context = fit->second.context;
+        auto lazyobject = fit->second.object;
+        if (!lazyobject->object && !lazyobject->data.empty()) {
+            if (lazyobject->uncompressedsize) {
+                SmallVector<uint8_t, 0> unpacked;
+                Error E = compression::zlib::decompress(lazyobject->data, unpacked, lazyobject->uncompressedsize);
+                if (E)
+                    lazyobject->data.clear();
+                else
+                    lazyobject->data = std::move(unpacked);
+                jl_jit_add_bytes(lazyobject->data.size() - lazyobject->uncompressedsize);
+                lazyobject->uncompressedsize = 0;
+            }
+            if (!lazyobject->data.empty()) {
+                auto obj = object::ObjectFile::createObjectFile(MemoryBufferRef(StringRef((const char*)lazyobject->data.data(), lazyobject->data.size()), "jit.o"));
+                if (obj)
+                    lazyobject->object = std::move(*obj);
+                else
+                    lazyobject->data.clear();
+            }
+        }
+        if (lazyobject->object) {
+            *Section = *std::next(lazyobject->object->section_begin(), fit->second.SectionIndex);
+            if (context) {
+                if (lazyobject->context == nullptr)
+                    lazyobject->context = DWARFContext::create(*lazyobject->object);
+                *context = lazyobject->context.get();
+            }
         }
         found = 1;
     }
@@ -1231,16 +1254,16 @@ extern "C" JL_DLLEXPORT_CODEGEN int jl_getFunctionInfo_impl(jl_frame_t **frames_
     int64_t slide;
     uint64_t symsize;
     if (jl_DI_for_fptr(pointer, &symsize, &slide, &Section, &context)) {
-        frames[0].linfo = getJITDebugRegistry().lookupLinfo(pointer);
+        frames[0].ci = getJITDebugRegistry().lookupCodeInstance(pointer);
         int nf = lookup_pointer(Section, context, frames_out, pointer, slide, true, noInline);
         return nf;
     }
     return jl_getDylibFunctionInfo(frames_out, pointer, skipC, noInline);
 }
 
-extern "C" jl_method_instance_t *jl_gdblookuplinfo(void *p) JL_NOTSAFEPOINT
+extern "C" jl_code_instance_t *jl_gdblookupci(void *p) JL_NOTSAFEPOINT
 {
-    return getJITDebugRegistry().lookupLinfo((size_t)p);
+    return getJITDebugRegistry().lookupCodeInstance((size_t)p);
 }
 
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
@@ -1258,14 +1281,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
 {
   // On OS X OS X __register_frame takes a single FDE as an argument.
   // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
-  processFDEs((char*)Addr, Size, [](const char *Entry) {
+  processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT {
       getJITDebugRegistry().libc_frames.libc_register_frame(Entry);
     });
 }
 
 void deregister_eh_frames(uint8_t *Addr, size_t Size)
 {
-   processFDEs((char*)Addr, Size, [](const char *Entry) {
+   processFDEs((char*)Addr, Size, [](const char *Entry) JL_NOTSAFEPOINT {
       getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry);
     });
 }
@@ -1277,7 +1300,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 // Skip over an arbitrary long LEB128 encoding.
 // Return the pointer to the first unprocessed byte.
-static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End)
+static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT
 {
     const uint8_t *P = Addr;
     while ((*P >> 7) != 0 && P < End)
@@ -1289,7 +1312,7 @@ static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End)
 // bytes than what there are more bytes than what the type can store.
 // Adjust the pointer to the first unprocessed byte.
 template<typename T> static T parse_leb128(const uint8_t *&Addr,
-                                           const uint8_t *End)
+                                           const uint8_t *End) JL_NOTSAFEPOINT
 {
     typedef typename std::make_unsigned<T>::type uT;
     uT v = 0;
@@ -1312,7 +1335,7 @@ template<typename T> static T parse_leb128(const uint8_t *&Addr,
 }
 
 template <typename U, typename T>
-static U safe_trunc(T t)
+static U safe_trunc(T t) JL_NOTSAFEPOINT
 {
     assert((t >= static_cast<T>(std::numeric_limits<U>::min()))
            && (t <= static_cast<T>(std::numeric_limits<U>::max())));
@@ -1352,9 +1375,9 @@ enum DW_EH_PE : uint8_t {
 };
 
 // Parse the CIE and return the type of encoding used by FDE
-static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End)
+static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) JL_NOTSAFEPOINT
 {
-    // http://www.airs.com/blog/archives/460
+    // https://www.airs.com/blog/archives/460
     // Length (4 bytes)
     uint32_t cie_size = *(const uint32_t*)Addr;
     const uint8_t *cie_addr = Addr + 4;
@@ -1458,7 +1481,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
 
     // Now first count the number of FDEs
     size_t nentries = 0;
-    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
+    processFDEs((char*)Addr, Size, [&](const char*) JL_NOTSAFEPOINT { nentries++; });
     if (nentries == 0)
         return;
 
@@ -1481,13 +1504,13 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     // While we're at it, also record the start_ip and size,
     // which we fill in the table
     unw_table_entry *table = new unw_table_entry[nentries];
-    std::vector<uintptr_t> start_ips(nentries);
+    SmallVector<uintptr_t, 0> start_ips(nentries);
     size_t cur_entry = 0;
     // Cache the previously parsed CIE entry so that we can support multiple
     // CIE's (may not happen) without parsing it every time.
     const uint8_t *cur_cie = nullptr;
     DW_EH_PE encoding = DW_EH_PE_omit;
-    processFDEs((char*)Addr, Size, [&](const char *Entry) {
+    processFDEs((char*)Addr, Size, [&](const char *Entry) JL_NOTSAFEPOINT {
             // Skip Length (4bytes) and CIE offset (4bytes)
             uint32_t fde_size = *(const uint32_t*)Entry;
             uint32_t cie_id = ((const uint32_t*)Entry)[1];
@@ -1608,7 +1631,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 #endif
 
 extern "C" JL_DLLEXPORT_CODEGEN
-uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
+uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr) JL_NOTSAFEPOINT
 {
     // Might be called from unmanaged thread
     jl_lock_profile();
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 5b5cdcb82d534..6cd7528910765 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // Declarations for debuginfo.cpp
+void jl_jit_add_bytes(size_t bytes) JL_NOTSAFEPOINT;
 
 int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT;
diff --git a/src/disasm.cpp b/src/disasm.cpp
index 9414c0a2a065d..6a7985bd7ec1b 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -16,7 +16,7 @@
 //
 //    University of Illinois at Urbana-Champaign
 //
-//    http://llvm.org
+//    https://llvm.org
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of
 // this software and associated documentation files (the "Software"), to deal with
@@ -58,7 +58,7 @@
 #include "llvm-version.h"
 
 // for outputting disassembly
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/AsmParser/Parser.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/BinaryFormat/COFF.h>
@@ -99,6 +99,7 @@
 // for outputting assembly
 #include <llvm/CodeGen/AsmPrinter.h>
 #include <llvm/CodeGen/AsmPrinterHandler.h>
+#include <llvm/CodeGen/DebugHandlerBase.h>
 #include <llvm/CodeGen/MachineModuleInfo.h>
 #include <llvm/CodeGen/Passes.h>
 #include <llvm/CodeGen/TargetPassConfig.h>
@@ -117,7 +118,7 @@ using namespace llvm;
 // helper class for tracking inlining context while printing debug info
 class DILineInfoPrinter {
     // internal state:
-    std::vector<DILineInfo> context;
+    SmallVector<DILineInfo, 0> context;
     uint32_t inline_depth = 0;
     // configuration options:
     const char* LineStart = "; ";
@@ -147,7 +148,7 @@ class DILineInfoPrinter {
     }
 
     void emit_finish(raw_ostream &Out) JL_NOTSAFEPOINT;
-    void emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI) JL_NOTSAFEPOINT;
+    void emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI) JL_NOTSAFEPOINT;
 
     struct repeat {
         size_t times;
@@ -169,7 +170,7 @@ class DILineInfoPrinter {
 
     void emit_lineinfo(raw_ostream &Out, DILineInfo &DI) JL_NOTSAFEPOINT
     {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DIvec[0] = DI;
         emit_lineinfo(Out, DIvec);
     }
@@ -177,7 +178,7 @@ class DILineInfoPrinter {
     void emit_lineinfo(raw_ostream &Out, DIInliningInfo &DI) JL_NOTSAFEPOINT
     {
         uint32_t nframes = DI.getNumberOfFrames();
-        std::vector<DILineInfo> DIvec(nframes);
+        SmallVector<DILineInfo, 0> DIvec(nframes);
         for (uint32_t i = 0; i < DI.getNumberOfFrames(); i++) {
             DIvec[i] = DI.getFrame(i);
         }
@@ -207,7 +208,7 @@ void DILineInfoPrinter::emit_finish(raw_ostream &Out)
     this->inline_depth = 0;
 }
 
-void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI)
+void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI)
 {
     if (verbosity == output_none)
         return;
@@ -217,8 +218,8 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     // compute the size of the matching prefix in the inlining information stack
     uint32_t nctx;
     for (nctx = 0; nctx < context.size() && nctx < nframes; nctx++) {
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine != FrameLine) {
             break;
         }
@@ -230,27 +231,27 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
             // if so, drop all existing calls to it from the top of the context
             // AND check if instead the context was previously printed that way
             // but now has removed the recursive frames
-            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';'); // last matching frame
-            if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
-                (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
+            StringRef method = StringRef(context[nctx - 1].FunctionName).rtrim(';'); // last matching frame
+            if ((nctx < nframes && StringRef(DI[nframes - nctx - 1].FunctionName).rtrim(';') == method) ||
+                (nctx < context.size() && StringRef(context[nctx].FunctionName).rtrim(';') == method)) {
                 update_line_only = true;
                 // transform nctx to exclude the combined frames
-                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method)
+                while (nctx > 0 && StringRef(context[nctx - 1].FunctionName).rtrim(';') == method)
                     nctx -= 1;
             }
         }
         if (!update_line_only && nctx < context.size() && nctx < nframes) {
             // look at the first non-matching element to see if we are only changing the line number
-            const DILineInfo &CtxLine = context.at(nctx);
-            const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+            const DILineInfo &CtxLine = context[nctx];
+            const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
             if (StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';'))
                 update_line_only = true;
         }
     }
     else if (nctx < context.size() && nctx < nframes) {
         // look at the first non-matching element to see if we are only changing the line number
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine.FileName == FrameLine.FileName &&
                 StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';')) {
             update_line_only = true;
@@ -262,9 +263,9 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         uint32_t npops;
         if (collapse_recursive) {
             npops = 1;
-            StringRef Prev = StringRef(context.at(nctx).FunctionName).rtrim(';');
+            StringRef Prev = StringRef(context[nctx].FunctionName).rtrim(';');
             for (uint32_t i = nctx + 1; i < context.size(); i++) {
-                StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+                StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
                 if (Prev != Next)
                     npops += 1;
                 Prev = Next;
@@ -282,7 +283,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     }
     // print the new frames
     while (nctx < nframes) {
-        const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+        const DILineInfo &frame = DI[nframes - 1 - nctx];
         Out << LineStart << inlining_indent("│");
         nctx += 1;
         context.push_back(frame);
@@ -301,7 +302,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << " within `" << method << "`";
         if (collapse_recursive) {
             while (nctx < nframes) {
-                const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+                const DILineInfo &frame = DI[nframes - 1 - nctx];
                 if (StringRef(frame.FunctionName).rtrim(';') != method)
                     break;
                 nctx += 1;
@@ -313,10 +314,10 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << "\n";
     }
 #ifndef JL_NDEBUG
-    StringRef Prev = StringRef(context.at(0).FunctionName).rtrim(';');
+    StringRef Prev = StringRef(context[0].FunctionName).rtrim(';');
     uint32_t depth2 = 1;
     for (uint32_t i = 1; i < nctx; i++) {
-        StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+        StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
         if (!collapse_recursive || Prev != Next)
             depth2 += 1;
         Prev = Next;
@@ -363,6 +364,10 @@ class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
 void LineNumberAnnotatedWriter::emitFunctionAnnot(
       const Function *F, formatted_raw_ostream &Out)
 {
+    if (F->hasFnAttribute("julia.fsig")) {
+        auto sig = F->getFnAttribute("julia.fsig").getValueAsString();
+        Out << "; Function Signature: " << sig << "\n";
+    }
     InstrLoc = nullptr;
     DISubprogram *FuncLoc = F->getSubprogram();
     if (!FuncLoc) {
@@ -371,7 +376,7 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
             FuncLoc = SP->second;
     }
     if (FuncLoc) {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DILineInfo &DI = DIvec.back();
         DI.FunctionName = FuncLoc->getName().str();
         DI.FileName = FuncLoc->getFilename().str();
@@ -398,7 +403,7 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
 {
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
-        std::vector<DILineInfo> DIvec;
+        SmallVector<DILineInfo, 0> DIvec;
         do {
             DIvec.emplace_back();
             DILineInfo &DI = DIvec.back();
@@ -491,12 +496,12 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada
     std::string code;
     raw_string_ostream stream(code);
 
-    {
+    if (dump->F) {
         //RAII will release the module
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         //If TSM is not passed in, then the context MUST be locked externally.
         //RAII will release the lock
-        Optional<orc::ThreadSafeContext::Lock> lock;
+        std::optional<orc::ThreadSafeContext::Lock> lock;
         if (TSM) {
             lock.emplace(TSM->getContext().getLock());
         }
@@ -793,19 +798,15 @@ static const char *SymbolLookup(void *DisInfo, uint64_t ReferenceValue, uint64_t
 
 static int OpInfoLookup(void *DisInfo, uint64_t PC,
                         uint64_t Offset,
-#if JL_LLVM_VERSION < 150000
-                        uint64_t Size,
-#else
                         uint64_t OpSize, uint64_t InstSize,
-#endif
                         int TagType, void *TagBuf)
 {
-    SymbolTable *SymTab = (SymbolTable*)DisInfo;
+    // SymbolTable *SymTab = (SymbolTable*)DisInfo;
     LLVMOpInfo1 *info = (LLVMOpInfo1*)TagBuf;
     memset(info, 0, sizeof(*info));
     if (TagType != 1)
         return 0;               // Unknown data format
-    PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
+    // PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
     // TODO: see if we knew of a relocation applied at PC
     // info->AddSymbol.Present = 1;
     // info->AddSymbol.Name = name;
@@ -906,11 +907,7 @@ static void jl_dump_asm_internal(
     std::unique_ptr<MCCodeEmitter> CE;
     std::unique_ptr<MCAsmBackend> MAB;
     if (ShowEncoding) {
-#if JL_LLVM_VERSION >= 150000
         CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx));
-#else
-        CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
-#endif
         MAB.reset(TheTarget->createMCAsmBackend(*STI, *MRI, Options));
     }
 
@@ -920,16 +917,18 @@ static void jl_dump_asm_internal(
     // LLVM will destroy the formatted stream, and we keep the raw stream.
     std::unique_ptr<formatted_raw_ostream> ustream(new formatted_raw_ostream(rstream));
     std::unique_ptr<MCStreamer> Streamer(
-            TheTarget->createAsmStreamer(Ctx, std::move(ustream), /*asmverbose*/true,
-                                         /*useDwarfDirectory*/ true,
-                                         IP.release(),
-                                         std::move(CE), std::move(MAB),
-                                         /*ShowInst*/ false));
-#if JL_LLVM_VERSION >= 140000
-    Streamer->initSections(true, *STI);
+#if JL_LLVM_VERSION >= 190000
+        TheTarget->createAsmStreamer(Ctx, std::move(ustream),
+
+                                     IP.release(), std::move(CE), std::move(MAB))
 #else
-    Streamer->InitSections(true);
+        TheTarget->createAsmStreamer(Ctx, std::move(ustream), /*asmverbose*/ true,
+                                     /*useDwarfDirectory*/ true, IP.release(),
+                                     std::move(CE), std::move(MAB),
+                                     /*ShowInst*/ false)
 #endif
+    );
+    Streamer->initSections(true, *STI);
 
     // Make the MemoryObject wrapper
     ArrayRef<uint8_t> memoryObject(const_cast<uint8_t*>((const uint8_t*)Fptr),Fsize);
@@ -1045,9 +1044,6 @@ static void jl_dump_asm_internal(
             MCInst Inst;
             MCDisassembler::DecodeStatus S;
             FuncMCView view = memoryObject.slice(Index);
-#if JL_LLVM_VERSION < 150000
-#define getCommentOS() GetCommentOS()
-#endif
             S = DisAsm->getInstruction(Inst, insSize, view, 0,
                                       /*CStream*/ pass != 0 ? Streamer->getCommentOS () : nulls());
             if (pass != 0 && Streamer->getCommentOS ().tell() > 0)
@@ -1058,6 +1054,8 @@ static void jl_dump_asm_internal(
                 if (insSize == 0) // skip illegible bytes
 #if defined(_CPU_PPC_) || defined(_CPU_PPC64_) || defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
                     insSize = 4; // instructions are always 4 bytes
+#elif defined(_CPU_RISCV64_)
+                    insSize = 2; // instructions can be 2 bytes when compressed
 #else
                     insSize = 1; // attempt to slide 1 byte forward
 #endif
@@ -1105,7 +1103,7 @@ static void jl_dump_asm_internal(
                             const MCOperand &OpI = Inst.getOperand(Op);
                             if (OpI.isImm()) {
                                 int64_t imm = OpI.getImm();
-                                if (opinfo.OpInfo[Op].OperandType == MCOI::OPERAND_PCREL)
+                                if (opinfo.operands()[Op].OperandType == MCOI::OPERAND_PCREL)
                                     imm += Fptr + Index;
                                 const char *name = DisInfo.lookupSymbolName(imm);
                                 if (name)
@@ -1151,7 +1149,11 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
     return &MMIWP->getMMI().getContext();
 }
 
+#if JL_LLVM_VERSION >= 190000
+class LineNumberPrinterHandler : public DebugHandlerBase {
+#else
 class LineNumberPrinterHandler : public AsmPrinterHandler {
+#endif
     MCStreamer &S;
     LineNumberAnnotatedWriter LinePrinter;
     std::string Buffer;
@@ -1160,7 +1162,11 @@ class LineNumberPrinterHandler : public AsmPrinterHandler {
 
 public:
     LineNumberPrinterHandler(AsmPrinter &Printer, const char *debuginfo)
-        : S(*Printer.OutStreamer),
+        :
+#if JL_LLVM_VERSION >= 190000
+          DebugHandlerBase(&Printer),
+#endif
+          S(*Printer.OutStreamer),
           LinePrinter("; ", true, debuginfo),
           RawStream(Buffer),
           Stream(RawStream) {}
@@ -1179,12 +1185,20 @@ class LineNumberPrinterHandler : public AsmPrinterHandler {
     //virtual void beginModule(Module *M) override {}
     virtual void endModule() override {}
     /// note that some AsmPrinter implementations may not call beginFunction at all
+#if JL_LLVM_VERSION >= 190000
+    virtual void beginFunctionImpl(const MachineFunction *MF) override {
+#else
     virtual void beginFunction(const MachineFunction *MF) override {
+#endif
         LinePrinter.emitFunctionAnnot(&MF->getFunction(), Stream);
         emitAndReset();
     }
     //virtual void markFunctionEnd() override {}
+#if JL_LLVM_VERSION >= 190000
+    virtual void endFunctionImpl(const MachineFunction *MF) override {
+#else
     virtual void endFunction(const MachineFunction *MF) override {
+#endif
         LinePrinter.emitEnd(Stream);
         emitAndReset();
     }
@@ -1207,7 +1221,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
 {
     // precise printing via IR assembler
     SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
+    if (dump->F) { // scope block also
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         llvm::raw_svector_ostream asmfile(ObjBufferSV);
         TSM->withModuleDo([&](Module &m) {
@@ -1227,7 +1241,11 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
         addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
         if (emit_mc) {
             raw_svector_ostream obj_OS(ObjBufferSV);
+#if JL_LLVM_VERSION >= 180000
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CodeGenFileType::ObjectFile, false, nullptr))
+#else
             if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+#endif
                 return jl_an_empty_string;
             TSM->withModuleDo([&](Module &m) { PM.run(m); });
         }
@@ -1252,23 +1270,27 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
                 STI, MRI, TM->Options.MCOptions));
             std::unique_ptr<MCCodeEmitter> MCE;
             if (binary) { // enable MCAsmStreamer::AddEncodingComment printing
-#if JL_LLVM_VERSION >= 150000
                 MCE.reset(TM->getTarget().createMCCodeEmitter(MII, *Context));
-#else
-                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, MRI, *Context));
-#endif
             }
             auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
             std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-                *Context, std::move(FOut), true,
-                true, InstPrinter,
-                std::move(MCE), std::move(MAB),
-                false));
+#if JL_LLVM_VERSION >= 190000
+                *Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB)
+#else
+                *Context, std::move(FOut), true, true, InstPrinter, std::move(MCE),
+                std::move(MAB), false
+#endif
+                    ));
             std::unique_ptr<AsmPrinter> Printer(
                 TM->getTarget().createAsmPrinter(*TM, std::move(S)));
+#if JL_LLVM_VERSION >= 190000
+            Printer->addDebugHandler(
+                        std::make_unique<LineNumberPrinterHandler>(*Printer, debuginfo));
+#else
             Printer->addAsmPrinterHandler(AsmPrinter::HandlerInfo(
                         std::unique_ptr<AsmPrinterHandler>(new LineNumberPrinterHandler(*Printer, debuginfo)),
                         "emit", "Debug Info Emission", "Julia", "Julia::LineNumberPrinterHandler Markup"));
+#endif
             if (!Printer)
                 return jl_an_empty_string;
             PM.add(Printer.release());
diff --git a/src/dlload.c b/src/dlload.c
index ffa9a053d5f1c..91980cc4ecbbf 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -188,7 +188,7 @@ JL_DLLEXPORT JL_NO_SANITIZE void *jl_dlopen(const char *filename, unsigned flags
         dlopen = (dlopen_prototype*)dlsym(RTLD_NEXT, "dlopen");
         if (!dlopen)
             return NULL;
-        void *libdl_handle = dlopen("libdl.so", RTLD_NOW | RTLD_NOLOAD);
+        void *libdl_handle = dlopen("libdl.so.2", RTLD_NOW | RTLD_NOLOAD);
         assert(libdl_handle);
         dlopen = (dlopen_prototype*)dlsym(libdl_handle, "dlopen");
         dlclose(libdl_handle);
@@ -309,10 +309,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     */
     if (!abspath && !is_atpath && jl_base_module != NULL) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"), 0);
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_get_binding_value(b) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
-            for (j = 0; j < jl_array_len(DL_LOAD_PATH); j++) {
+            for (j = 0; j < jl_array_nrows(DL_LOAD_PATH); j++) {
                 char *dl_path = jl_string_data(jl_array_ptr_data(DL_LOAD_PATH)[j]);
                 size_t len = strlen(dl_path);
                 if (len == 0)
@@ -435,6 +435,13 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
 // Look for symbols in internal libraries
 JL_DLLEXPORT const char *jl_dlfind(const char *f_name)
 {
+#ifdef _OS_FREEBSD_
+    // This is a workaround for FreeBSD <= 13.2 which do not have
+    // https://cgit.freebsd.org/src/commit/?id=21a52f99440c9bec7679f3b0c5c9d888901c3694
+    // (See https://github.com/JuliaLang/julia/issues/50846)
+    if (strcmp(f_name, "dl_iterate_phdr") == 0)
+        return JL_EXE_LIBNAME;
+#endif
     void * dummy;
     if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0))
         return JL_LIBJULIA_INTERNAL_DL_LIBNAME;
diff --git a/src/engine.cpp b/src/engine.cpp
new file mode 100644
index 0000000000000..858f37b55e85e
--- /dev/null
+++ b/src/engine.cpp
@@ -0,0 +1,153 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <mutex>
+#include <condition_variable>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+using namespace llvm;
+
+struct ReservationInfo {
+    int16_t tid = 0;
+    jl_code_instance_t *ci = nullptr;
+};
+
+struct InferKey {
+    jl_method_instance_t *mi = nullptr;
+    jl_value_t *owner = nullptr;
+};
+
+template<> struct llvm::DenseMapInfo<InferKey> {
+  using FirstInfo = DenseMapInfo<jl_method_instance_t*>;
+  using SecondInfo = DenseMapInfo<jl_value_t*>;
+
+  static inline InferKey getEmptyKey() {
+    return InferKey{FirstInfo::getEmptyKey(),
+                    SecondInfo::getEmptyKey()};
+  }
+
+  static inline InferKey getTombstoneKey() {
+    return InferKey{FirstInfo::getTombstoneKey(),
+                    SecondInfo::getTombstoneKey()};
+  }
+
+  static unsigned getHashValue(const InferKey& PairVal) {
+    return detail::combineHashValue(FirstInfo::getHashValue(PairVal.mi),
+                                    SecondInfo::getHashValue(PairVal.owner));
+  }
+
+  static bool isEqual(const InferKey &LHS, const InferKey &RHS) {
+    return LHS.mi == RHS.mi && LHS.owner == RHS.owner;
+  }
+};
+
+static std::mutex engine_lock; // n.b. this lock is only ever held briefly
+static std::condition_variable engine_wait; // but it may be waiting a while in this state
+// map from MethodInstance to threadid that owns it currently for inference
+static DenseMap<InferKey, ReservationInfo> Reservations;
+// vector of which threads are blocked and which lease they need
+static SmallVector<InferKey, 0> Awaiting; // (this could be merged into ptls also)
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph
+    jl_code_instance_t *ci = jl_new_codeinst_uninit(m, owner); // allocate a placeholder
+    JL_GC_PUSH1(&ci);
+    auto tid = jl_atomic_load_relaxed(&ct->tid);
+    if (([tid, m, owner, ci] () -> bool { // necessary scope block / lambda for unique_lock
+            jl_unique_gcsafe_lock lock(engine_lock);
+            InferKey key{m, owner};
+            if ((signed)Awaiting.size() < tid + 1)
+                Awaiting.resize(tid + 1);
+            while (1) {
+                auto record = Reservations.find(key);
+                if (record == Reservations.end()) {
+                    Reservations[key] = ReservationInfo{tid, ci};
+                    return false;
+                }
+                // before waiting, need to run deadlock/cycle detection
+                // there is a cycle if the thread holding our lease is blocked
+                // and waiting for (transitively) any lease that is held by this thread
+                auto wait_tid = record->second.tid;
+                while (1) {
+                    if (wait_tid == tid)
+                        return true;
+                    if ((signed)Awaiting.size() <= wait_tid)
+                        break; // no cycle, since it is running (and this should be unreachable)
+                    auto key2 = Awaiting[wait_tid];
+                    if (key2.mi == nullptr)
+                        break; // no cycle, since it is running
+                    auto record2 = Reservations.find(key2);
+                    if (record2 == Reservations.end())
+                        break; // no cycle, since it is about to resume
+                    assert(wait_tid != record2->second.tid);
+                    wait_tid = record2->second.tid;
+                }
+                Awaiting[tid] = key;
+                lock.wait(engine_wait);
+                Awaiting[tid] = InferKey{};
+            }
+        })())
+        ct->ptls->engine_nqueued--;
+    JL_GC_POP();
+    return ci;
+}
+
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner)
+{
+    jl_task_t *ct = jl_current_task;
+    InferKey key = {m, owner};
+    std::unique_lock lock(engine_lock);
+    auto record = Reservations.find(key);
+    return record != Reservations.end() && record->second.tid == jl_atomic_load_relaxed(&ct->tid);
+}
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states)
+{
+    std::unique_lock lock(engine_lock);
+    bool any = false;
+    for (auto I = Reservations.begin(); I != Reservations.end(); ++I) {
+        jl_code_instance_t *ci = I->second.ci;
+        if (!gc_marked(jl_astaggedvalue(ci)->bits.gc)) {
+            auto tid = I->second.tid;
+            Reservations.erase(I);
+            jl_ptls_t ptls2 = gc_all_tls_states[tid];
+            ptls2->engine_nqueued--;
+            any = true;
+        }
+    }
+    if (any)
+        engine_wait.notify_all();
+}
+
+void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src)
+{
+    jl_task_t *ct = jl_current_task;
+    std::unique_lock lock(engine_lock);
+    auto record = Reservations.find(InferKey{jl_get_ci_mi(ci), ci->owner});
+    if (record == Reservations.end() || record->second.ci != ci)
+        return;
+    assert(jl_atomic_load_relaxed(&ct->tid) == record->second.tid);
+    ct->ptls->engine_nqueued--; // re-enables finalizers, but doesn't immediately try to run them
+    Reservations.erase(record);
+    engine_wait.notify_all();
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/features_x86.h b/src/features_x86.h
index 08f979df546b7..b6e2b23985b4f 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -5,6 +5,13 @@
 #else
 #define JL_X86_64ONLY_VER(x) x
 #endif
+// The code is similar to what is here so the bits can be used as reference
+// https://github.com/llvm/llvm-project/blob/3f7905733820851bc4f65cb4af693c3101cbf20d/llvm/lib/TargetParser/Host.cpp#L1257
+
+// The way the bits here work is an index into the features array. This is a bit array
+// The index works as follows:
+// 32*i + j where i is the index into the array and j is the bit in the array.
+// There is a reference to what each index corresponds to in _get_host_cpu
 
 // X86 features definition
 // EAX=1: ECX
@@ -45,15 +52,15 @@ JL_FEATURE_DEF(avx512ifma, 32 * 2 + 21, 0)
 // JL_FEATURE_DEF(pcommit, 32 * 2 + 22, 0) // Deprecated
 JL_FEATURE_DEF(clflushopt, 32 * 2 + 23, 0)
 JL_FEATURE_DEF(clwb, 32 * 2 + 24, 0)
-JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 0)
-JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 0)
+// JL_FEATURE_DEF(avx512pf, 32 * 2 + 26, 0) // Deprecated in LLVM 19
+// JL_FEATURE_DEF(avx512er, 32 * 2 + 27, 0) // Deprecated in LLVM 19
 JL_FEATURE_DEF(avx512cd, 32 * 2 + 28, 0)
 JL_FEATURE_DEF(sha, 32 * 2 + 29, 0)
 JL_FEATURE_DEF(avx512bw, 32 * 2 + 30, 0)
 JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 0)
 
 // EAX=7,ECX=0: ECX
-JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0)
+// JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0) // Deprecated in LLVM 19
 JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 0)
 JL_FEATURE_DEF(pku, 32 * 3 + 4, 0) // ospke
 JL_FEATURE_DEF(waitpkg, 32 * 3 + 5, 0)
@@ -79,6 +86,7 @@ JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
 JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
 JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
 JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
+// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
 JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
 JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
 JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
@@ -110,10 +118,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
 JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)
 
 // EAX=7,ECX=1: EAX
+JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
+JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
+JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
+JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
 JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
 JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
+JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
+JL_FEATURE_DEF_NAME(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
+JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
+JL_FEATURE_DEF(avxifma, 32 * 9 + 23, 160000)
+
+// EAX=7,ECX=1: EBX
+JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
+JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
+JL_FEATURE_DEF_NAME(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
+JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
+JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
+JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
+// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
+// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)
 
 // EAX=0x14,ECX=0: EBX
-JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
+JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)
 
 #undef JL_X86_64ONLY_VER
diff --git a/src/flisp/compiler.lsp b/src/flisp/compiler.lsp
index fdc516dce3ea8..e5a79e8fee6bb 100644
--- a/src/flisp/compiler.lsp
+++ b/src/flisp/compiler.lsp
@@ -864,7 +864,7 @@
 
 		 (else #f)))))))
 
-; From SRFI 89 by Marc Feeley (http://srfi.schemers.org/srfi-89/srfi-89.html)
+; From SRFI 89 by Marc Feeley (https://srfi.schemers.org/srfi-89/srfi-89.html)
 ; Copyright (C) Marc Feeley 2006. All Rights Reserved.
 ;
 ; "alist" is a list of pairs of the form "(keyword . value)"
diff --git a/src/flisp/cvalues.c b/src/flisp/cvalues.c
index a5635c238ba3c..749b8802dfe82 100644
--- a/src/flisp/cvalues.c
+++ b/src/flisp/cvalues.c
@@ -101,7 +101,7 @@ void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv)
     autorelease(fl_ctx, cv);
 }
 
-static value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
 {
     cprim_t *pcp = (cprim_t*)alloc_words(fl_ctx, CPRIM_NWORDS-1+NWORDS(sz));
     pcp->type = type;
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index b031e456cd3fe..f8dd1cfd81ed0 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -158,7 +158,7 @@ value_t fl_cons(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_list2(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_listn(fl_context_t *fl_ctx, size_t n, ...) JL_NOTSAFEPOINT;
 value_t symbol(fl_context_t *fl_ctx, const char *str) JL_NOTSAFEPOINT;
-char *symbol_name(fl_context_t *fl_ctx, value_t v);
+char *symbol_name(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 int fl_is_keyword_name(const char *str, size_t len);
 value_t alloc_vector(fl_context_t *fl_ctx, size_t n, int init);
 size_t llength(value_t v);
@@ -328,6 +328,7 @@ typedef float    fl_float_t;
 typedef value_t (*builtin_t)(fl_context_t*, value_t*, uint32_t);
 
 value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
+value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 value_t cvalue_no_finalizer(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 void add_finalizer(fl_context_t *fl_ctx, cvalue_t *cv);
 void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv);
diff --git a/src/flisp/iostream.c b/src/flisp/iostream.c
index b2b2477bb43c6..c1c6d965d2917 100644
--- a/src/flisp/iostream.c
+++ b/src/flisp/iostream.c
@@ -354,7 +354,7 @@ value_t fl_ioreaduntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     ios_setbuf(&dest, data, 80, 0);
     char delim = get_delim_arg(fl_ctx, args[1], "io.readuntil");
     ios_t *src = toiostream(fl_ctx, args[0], "io.readuntil");
-    size_t n = ios_copyuntil(&dest, src, delim);
+    size_t n = ios_copyuntil(&dest, src, delim, 1);
     cv->len = n;
     if (dest.buf != data) {
         // outgrew initial space
@@ -376,7 +376,7 @@ value_t fl_iocopyuntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     ios_t *dest = toiostream(fl_ctx, args[0], "io.copyuntil");
     ios_t *src = toiostream(fl_ctx, args[1], "io.copyuntil");
     char delim = get_delim_arg(fl_ctx, args[2], "io.copyuntil");
-    return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim));
+    return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim, 1));
 }
 
 value_t fl_iocopy(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index f29e3972755c5..07d074e1fb80b 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -76,7 +76,7 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
              wc != 0x233f &&  // notslash
              wc != 0x00a6) || // broken bar
 
-            // math symbol (category Sm) whitelist
+            // math symbol (category Sm) allowlist
             (wc >= 0x2140 && wc <= 0x2a1c &&
              ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
@@ -405,7 +405,7 @@ value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t
     uint8_t *s = (uint8_t*)cvalue_data(args[0]);
     size_t len = cv_len((cvalue_t*)ptr(args[0]));
     uint32_t u = _string_only_julia_char(s, len);
-    if (u == (uint32_t)-1)
+    if (u == UINT32_MAX)
         return fl_ctx->F;
     return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
 }
diff --git a/src/flisp/print.c b/src/flisp/print.c
index 2b20d0d98b225..a6f633c2e6701 100644
--- a/src/flisp/print.c
+++ b/src/flisp/print.c
@@ -518,7 +518,7 @@ static void print_string(fl_context_t *fl_ctx, ios_t *f, char *str, size_t sz)
     }
     else {
         while (i < sz) {
-            size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, 1, 0);
+            size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, "\"", 0);
             outsn(fl_ctx, buf, f, n-1);
         }
     }
diff --git a/src/flisp/read.c b/src/flisp/read.c
index 9a480e0536c7a..7a6039323a988 100644
--- a/src/flisp/read.c
+++ b/src/flisp/read.c
@@ -303,7 +303,7 @@ static uint32_t peek(fl_context_t *fl_ctx)
             fl_ctx->readtokval = fixnum(x);
         }
         else if (c == '!') {
-            // #! single line comment for shbang script support
+            // #! single line comment for shebang script support
             do {
                 ch = ios_getc(readF(fl_ctx));
             } while (ch != IOS_EOF && (char)ch != '\n');
diff --git a/src/flisp/table.c b/src/flisp/table.c
index 1d8aed358e88d..8836c93f81513 100644
--- a/src/flisp/table.c
+++ b/src/flisp/table.c
@@ -102,7 +102,7 @@ value_t fl_table(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
         else
             k = arg;
     }
-    if (h->table != &h->_space[0]) {
+    if (cnt <= HT_N_INLINE && h->table != &h->_space[0]) {
         // We expected to use the inline table, but we ended up outgrowing it.
         // Make sure to register the finalizer.
         add_finalizer(fl_ctx, (cvalue_t*)ptr(nt));
diff --git a/src/flisp/unittest.lsp b/src/flisp/unittest.lsp
index 584d5c81225e8..16774a97e3233 100644
--- a/src/flisp/unittest.lsp
+++ b/src/flisp/unittest.lsp
@@ -267,4 +267,23 @@
 (assert (equal? `(a `(b c)) '(a (quasiquote (b c)))))
 (assert (equal? ````x '```x))
 
+;; make many initialized tables large enough not to be stored in-line
+(for 1 100
+     (lambda (i)
+       (table eq?      2      eqv?     2
+              equal?   2      atom?    1
+              not      1      null?    1
+              boolean? 1      symbol?  1
+              number?  1      bound?   1
+              pair?    1      builtin? 1
+              vector?  1      fixnum?  1
+              cons     2      car      1
+              cdr      1      set-car! 2
+              set-cdr! 2      =        2
+              <        2      compare  2
+              aref     2      aset!    3
+              div0     2)))
+;; now allocate enough to trigger GC
+(for 1 8000000 (lambda (i) (cons 1 2)))
+
 #t
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index 1bcbeb2189f5f..5b462d48cd2de 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -3,13 +3,13 @@
 #include "gc-alloc-profiler.h"
 
 #include "julia_internal.h"
-#include "gc.h"
+
+#include "llvm/ADT/SmallVector.h"
 
 #include <string>
-#include <vector>
 
 using std::string;
-using std::vector;
+using llvm::SmallVector;
 
 struct jl_raw_backtrace_t {
     jl_bt_element_t *data;
@@ -27,17 +27,17 @@ struct jl_raw_alloc_t {
 // == These structs define the global singleton profile buffer that will be used by
 // callbacks to store profile results. ==
 struct jl_per_thread_alloc_profile_t {
-    vector<jl_raw_alloc_t> allocs;
+    SmallVector<jl_raw_alloc_t, 0> allocs;
 };
 
 struct jl_alloc_profile_t {
     double sample_rate;
 
-    vector<jl_per_thread_alloc_profile_t> per_thread_profiles;
+    SmallVector<jl_per_thread_alloc_profile_t, 0> per_thread_profiles;
 };
 
 struct jl_combined_results {
-    vector<jl_raw_alloc_t> combined_allocs;
+    SmallVector<jl_raw_alloc_t, 0> combined_allocs;
 };
 
 // == Global variables manipulated by callbacks ==
diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h
index 3fd8bf4388a0a..fcd8e45caa2d8 100644
--- a/src/gc-alloc-profiler.h
+++ b/src/gc-alloc-profiler.h
@@ -35,6 +35,7 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t
 
 extern int g_alloc_profile_enabled;
 
+// This should only be used from _deprecated_ code paths. We shouldn't see UNKNOWN anymore.
 #define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03)
 
 static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT {
diff --git a/src/gc-common.c b/src/gc-common.c
new file mode 100644
index 0000000000000..c07b707b17709
--- /dev/null
+++ b/src/gc-common.c
@@ -0,0 +1,723 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-common.h"
+#include "julia.h"
+#include "julia_atomics.h"
+#include "julia_gcext.h"
+#include "julia_assert.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+jl_gc_num_t gc_num = {0};
+
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
+{
+    return gc_num.total_time;
+}
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+jl_gc_callback_list_t *gc_cblist_root_scanner;
+jl_gc_callback_list_t *gc_cblist_task_scanner;
+jl_gc_callback_list_t *gc_cblist_pre_gc;
+jl_gc_callback_list_t *gc_cblist_post_gc;
+jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+jl_gc_callback_list_t *gc_cblist_notify_external_free;
+jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+static void jl_gc_register_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func)
+            return;
+        list = &((*list)->next);
+    }
+    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
+    (*list)->next = NULL;
+    (*list)->func = func;
+}
+
+static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func) {
+            jl_gc_callback_list_t *tmp = *list;
+            (*list) = (*list)->next;
+            free(tmp);
+            return;
+        }
+        list = &((*list)->next);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+}
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+#if defined(_OS_WINDOWS_)
+// helper function based partly on wine msvcrt80+ heap.c
+// but with several fixes to improve the correctness of the computation and remove unnecessary parameters
+#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
+                               ~(sizeof(void *) - 1)))
+static size_t _aligned_msize(void *p)
+{
+    void *alloc_ptr = *(void**)SAVED_PTR(p);
+    return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr);
+}
+#undef SAVED_PTR
+#endif
+
+size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT
+{
+#if defined(_OS_WINDOWS_)
+    if (isaligned)
+        return _aligned_msize(p);
+    else
+        return _msize(p);
+#elif defined(_OS_DARWIN_)
+    return malloc_size(p);
+#else
+    return malloc_usable_size(p);
+#endif
+}
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+jl_mutex_t finalizers_lock;
+arraylist_t finalizer_list_marked;
+arraylist_t to_finalize;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
+{
+    arraylist_push(&to_finalize, o);
+    arraylist_push(&to_finalize, f);
+    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
+    // release our lock, and that will have a release barrier by then
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
+}
+
+void run_finalizer(jl_task_t *ct, void *o, void *ff)
+{
+    int ptr_finalizer = gc_ptr_tag(o, 1);
+    o = gc_ptr_clear_tag(o, 3);
+    if (ptr_finalizer) {
+        ((void (*)(void*))ff)((void*)o);
+        return;
+    }
+    JL_TRY {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
+        ct->world_age = last_age;
+    }
+    JL_CATCH {
+        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
+    }
+}
+
+// if `need_sync` is true, the `list` is the `finalizers` list of another
+// thread and we need additional synchronizations
+static void finalize_object(arraylist_t *list, jl_value_t *o,
+                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
+{
+    // The acquire load makes sure that the first `len` objects are valid.
+    // If `need_sync` is true, all mutations of the content should be limited
+    // to the first `oldlen` elements and no mutation is allowed after the
+    // new length is published with the `cmpxchg` at the end of the function.
+    // This way, the mutation should not conflict with the owning thread,
+    // which only writes to locations later than `len`
+    // and will not resize the buffer without acquiring the lock.
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
+    size_t oldlen = len;
+    void **items = list->items;
+    size_t j = 0;
+    for (size_t i = 0; i < len; i += 2) {
+        void *v = items[i];
+        int move = 0;
+        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
+            void *f = items[i + 1];
+            move = 1;
+            arraylist_push(copied_list, v);
+            arraylist_push(copied_list, f);
+        }
+        if (move || __unlikely(!v)) {
+            // remove item
+        }
+        else {
+            if (j < i) {
+                items[j] = items[i];
+                items[j+1] = items[i+1];
+            }
+            j += 2;
+        }
+    }
+    len = j;
+    if (oldlen == len)
+        return;
+    if (need_sync) {
+        // The memset needs to be unconditional since the thread might have
+        // already read the length.
+        // The `memset` (like any other content mutation) has to be done
+        // **before** the `cmpxchg` which publishes the length.
+        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
+    }
+    else {
+        list->len = len;
+    }
+}
+
+// The first two entries are assumed to be empty and the rest are assumed to
+// be pointers to `jl_value_t` objects
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
+{
+    void **items = list->items;
+    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
+}
+
+// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
+// to be hold for the current thread and will release the lock when the
+// function returns.
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
+{
+    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
+    // of `finalizer`) in a finalizer:
+    uint8_t sticky = ct->sticky;
+    // empty out the first two entries for the GC frame
+    arraylist_push(list, list->items[0]);
+    arraylist_push(list, list->items[1]);
+    jl_gc_push_arraylist(ct, list);
+    void **items = list->items;
+    size_t len = list->len;
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    // run finalizers in reverse order they were added, so lower-level finalizers run last
+    for (size_t i = len-4; i >= 2; i -= 2)
+        run_finalizer(ct, items[i], items[i + 1]);
+    // first entries were moved last to make room for GC frame metadata
+    run_finalizer(ct, items[len-2], items[len-1]);
+    // matches the jl_gc_push_arraylist above
+    JL_GC_POP();
+    ct->sticky = sticky;
+}
+
+static uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+{
+    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+}
+
+void run_finalizers(jl_task_t *ct, int finalizers_thread)
+{
+    // Racy fast path:
+    // The race here should be OK since the race can only happen if
+    // another thread is writing to it with the lock held. In such case,
+    // we don't need to run pending finalizers since the writer thread
+    // will flush it.
+    if (to_finalize.len == 0)
+        return;
+    JL_LOCK_NOGC(&finalizers_lock);
+    if (to_finalize.len == 0) {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+        return;
+    }
+    arraylist_t copied_list;
+    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
+    if (to_finalize.items == to_finalize._space) {
+        copied_list.items = copied_list._space;
+    }
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
+    arraylist_new(&to_finalize, 0);
+
+    uint64_t save_rngState[JL_RNG_SIZE];
+    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
+    jl_rng_split(ct->rngState, finalizer_rngState);
+
+    // This releases the finalizers lock.
+    int8_t was_in_finalizer = ct->ptls->in_finalizer;
+    ct->ptls->in_finalizer = !finalizers_thread;
+    jl_gc_run_finalizers_in_list(ct, &copied_list);
+    ct->ptls->in_finalizer = was_in_finalizer;
+    arraylist_free(&copied_list);
+
+    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
+}
+
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0 && ptls->engine_nqueued == 0) {
+        run_finalizers(ct, 0);
+    }
+}
+
+JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
+{
+    if (ptls == NULL)
+        ptls = jl_current_task->ptls;
+    return ptls->finalizers_inhibited;
+}
+
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    ptls->finalizers_inhibited++;
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
+{
+    jl_task_t *ct = jl_current_task;
+#ifdef NDEBUG
+    ct->ptls->finalizers_inhibited--;
+#else
+    jl_gc_enable_finalizers(ct, 1);
+#endif
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    int old_val = ptls->finalizers_inhibited;
+    int new_val = old_val + (on ? -1 : 1);
+    if (new_val < 0) {
+        JL_TRY {
+            jl_error(""); // get a backtrace
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
+            // Only print the backtrace once, to avoid spamming the logs
+            static int backtrace_printed = 0;
+            if (backtrace_printed == 0) {
+                backtrace_printed = 1;
+                jlbacktrace(); // written to STDERR_FILENO
+            }
+        }
+        return;
+    }
+    ptls->finalizers_inhibited = new_val;
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(ct);
+    }
+}
+
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
+{
+    return jl_current_task->ptls->in_finalizer;
+}
+
+static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
+{
+    void **items = flist->items;
+    size_t len = flist->len;
+    for(size_t i = 0; i < len; i+=2) {
+        void *v = items[i];
+        void *f = items[i + 1];
+        if (__unlikely(!v))
+            continue;
+        schedule_finalization(v, f);
+    }
+    flist->len = 0;
+}
+
+void jl_gc_run_all_finalizers(jl_task_t *ct)
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    // this is called from `jl_atexit_hook`; threads could still be running
+    // so we have to guard the finalizers' lists
+    JL_LOCK_NOGC(&finalizers_lock);
+    schedule_all_finalizers(&finalizer_list_marked);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            schedule_all_finalizers(&ptls2->finalizers);
+    }
+    // unlock here because `run_finalizers` locks this
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    run_finalizers(ct, 1);
+}
+
+void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
+    arraylist_t *a = &ptls->finalizers;
+    // This acquire load and the release store at the end are used to
+    // synchronize with `finalize_object` on another thread. Apart from the GC,
+    // which is blocked by entering a unsafe region, there might be only
+    // one other thread accessing our list in `finalize_object`
+    // (only one thread since it needs to acquire the finalizer lock).
+    // Similar to `finalize_object`, all content mutation has to be done
+    // between the acquire and the release of the length.
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
+    if (__unlikely(oldlen + 2 > a->max)) {
+        JL_LOCK_NOGC(&finalizers_lock);
+        // `a->len` might have been modified.
+        // Another possibility is to always grow the array to `oldlen + 2` but
+        // it's simpler this way and uses slightly less memory =)
+        oldlen = a->len;
+        arraylist_grow(a, 2);
+        a->len = oldlen;
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    void **items = a->items;
+    items[oldlen] = v;
+    items[oldlen + 1] = f;
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
+}
+
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
+{
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
+}
+
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+{
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
+{
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
+        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
+    }
+    else {
+        jl_gc_add_finalizer_(ptls, v, f);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_gc_add_finalizer_th(ptls, v, f);
+}
+
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
+{
+    JL_LOCK_NOGC(&finalizers_lock);
+    // Copy the finalizers into a temporary list so that code in the finalizer
+    // won't change the list as we loop through them.
+    // This list is also used as the GC frame when we are running the finalizers
+    arraylist_t copied_list;
+    arraylist_new(&copied_list, 0);
+    // No need to check the to_finalize list since the user is apparently
+    // still holding a reference to the object
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
+    }
+    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
+    if (copied_list.len > 0) {
+        // This releases the finalizers lock.
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
+    }
+    else {
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+    arraylist_free(&copied_list);
+}
+
+JL_DLLEXPORT void jl_finalize(jl_value_t *o)
+{
+    jl_finalize_th(jl_current_task, o);
+}
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// Allocation
+// =========================================================================== //
+
+JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    return jl_gc_alloc(ptls, sz, ty);
+}
+
+JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_alloc(ptls, sz, NULL);
+}
+
+// allocator entry points
+
+JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    return jl_gc_alloc_(ptls, sz, ty);
+}
+
+JL_DLLEXPORT void *jl_malloc(size_t sz)
+{
+    return jl_gc_counted_malloc(sz);
+}
+
+//_unchecked_calloc does not check for potential overflow of nm*sz
+STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
+    size_t nmsz = nm*sz;
+    return jl_gc_counted_calloc(nmsz, 1);
+}
+
+JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
+{
+    if (nm > SSIZE_MAX/sz)
+        return NULL;
+    return _unchecked_calloc(nm, sz);
+}
+
+JL_DLLEXPORT void jl_free(void *p)
+{
+    if (p != NULL) {
+        size_t sz = memory_block_usable_size(p, 0);
+        return jl_gc_counted_free_with_size(p, sz);
+    }
+}
+
+JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
+{
+    size_t old = p ? memory_block_usable_size(p, 0) : 0;
+    return jl_gc_counted_realloc_with_old_size(p, old, sz);
+}
+
+// =========================================================================== //
+// Generic Memory
+// =========================================================================== //
+
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    size_t sz = layout->size * m->length;
+    if (layout->flags.arrayelem_isunion)
+        // account for isbits Union array selector bytes
+        sz += m->length;
+    return sz;
+}
+
+// tracking Memorys with malloc'd storage
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
+    // This is **NOT** a GC safe point.
+    void *a = (void*)((uintptr_t)m | !!isaligned);
+    small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
+}
+
+// =========================================================================== //
+// GC Debug
+// =========================================================================== //
+
+int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
+{
+    int nf = (int)jl_datatype_nfields(vt);
+    for (int i = 1; i < nf; i++) {
+        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
+            return i - 1;
+    }
+    return nf - 1;
+}
+
+int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
+{
+    char *slot = (char*)_slot;
+    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
+    char *start = NULL;
+    size_t len = 0;
+    size_t elsize = sizeof(void*);
+    if (vt == jl_module_type) {
+        jl_module_t *m = (jl_module_t*)obj;
+        start = (char*)m->usings.items;
+        len = module_usings_length(m);
+        elsize = sizeof(struct _jl_module_using);
+    }
+    else if (vt == jl_simplevector_type) {
+        start = (char*)jl_svec_data(obj);
+        len = jl_svec_len(obj);
+    }
+    if (slot < start || slot >= start + elsize * len)
+        return -1;
+    return (slot - start) / elsize;
+}
+
+// =========================================================================== //
+// GC Control
+// =========================================================================== //
+
+JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) {
+    return jl_atomic_load_acquire(&jl_gc_disable_counter);
+}
+
+JL_DLLEXPORT int jl_gc_is_enabled(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return !ptls->disable_gc;
+}
+
+int gc_logging_enabled = 0;
+
+JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
+    gc_logging_enabled = enable;
+}
+
+JL_DLLEXPORT int jl_is_gc_logging_enabled(void) {
+    return gc_logging_enabled;
+}
+
+
+// collector entry point and control
+_Atomic(uint32_t) jl_gc_disable_counter = 1;
+
+JL_DLLEXPORT int jl_gc_enable(int on)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    int prev = !ptls->disable_gc;
+    ptls->disable_gc = (on == 0);
+    if (on && !prev) {
+        // disable -> enable
+        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
+            gc_num.allocd += gc_num.deferred_alloc;
+            gc_num.deferred_alloc = 0;
+        }
+    }
+    else if (prev && !on) {
+        // enable -> disable
+        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
+        // check if the GC is running and wait for it to finish
+        jl_gc_safepoint_(ptls);
+    }
+    return prev;
+}
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_new_weakref_th(ptls, value);
+}
+
+const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
+{
+    return jl_buff_tag;
+}
+
+// callback for passing OOM errors from gmp
+JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
+{
+    jl_throw(jl_memory_exception);
+}
+
+// Sweeping mtarraylist_buffers:
+// These buffers are made unreachable via `mtarraylist_resizeto` from mtarraylist.c
+// and are freed at the end of GC via jl_gc_sweep_stack_pools_and_mtarraylist_buffers
+void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls == NULL) {
+            continue;
+        }
+        small_arraylist_t *buffers = &ptls->lazily_freed_mtarraylist_buffers;
+        void *buf;
+        while ((buf = small_arraylist_pop(buffers)) != NULL) {
+            free(buf);
+        }
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-common.h b/src/gc-common.h
new file mode 100644
index 0000000000000..ca5e21084209e
--- /dev/null
+++ b/src/gc-common.h
@@ -0,0 +1,225 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_COMMON_H
+#define JL_GC_COMMON_H
+
+#include "julia.h"
+#include "julia_internal.h"
+#ifndef _OS_WINDOWS_
+#include <sys/mman.h>
+#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif
+
+#include <stdlib.h>
+
+#if defined(_OS_DARWIN_)
+#include <malloc/malloc.h>
+#else
+#include <malloc.h> // for malloc_trim
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Big objects
+// =========================================================================== //
+
+JL_EXTENSION typedef struct _bigval_t {
+    struct _bigval_t *next;
+    struct _bigval_t *prev;
+    size_t sz;
+#ifdef _P64 // Add padding so that the value is 64-byte aligned
+    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
+    void *_padding[8 - 4];
+#else
+    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
+    void *_padding[16 - 4];
+#endif
+    //struct jl_taggedvalue_t <>;
+    union {
+        uintptr_t header;
+        struct {
+            uintptr_t gc:2;
+        } bits;
+    };
+    // must be 64-byte aligned here, in 32 & 64 bit modes
+} bigval_t;
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+typedef void (*jl_gc_cb_func_t)(void);
+
+typedef struct _jl_gc_callback_list_t {
+    struct _jl_gc_callback_list_t *next;
+    jl_gc_cb_func_t func;
+} jl_gc_callback_list_t;
+
+extern jl_gc_callback_list_t *gc_cblist_root_scanner;
+extern jl_gc_callback_list_t *gc_cblist_task_scanner;
+extern jl_gc_callback_list_t *gc_cblist_pre_gc;
+extern jl_gc_callback_list_t *gc_cblist_post_gc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_free;
+extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+#define gc_invoke_callbacks(ty, list, args) \
+    do { \
+        for (jl_gc_callback_list_t *cb = list; \
+                cb != NULL; \
+                cb = cb->next) \
+        { \
+            ((ty)(cb->func)) args; \
+        } \
+    } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+#if defined(_OS_WINDOWS_)
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+    return _aligned_malloc(sz ? sz : 1, align);
+}
+STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+    (void)oldsz;
+    return _aligned_realloc(p, sz ? sz : 1, align);
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    _aligned_free(p);
+}
+#else
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return malloc(sz);
+#endif
+    void *ptr;
+    if (posix_memalign(&ptr, align, sz))
+        return NULL;
+    return ptr;
+}
+STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return realloc(d, sz);
+#endif
+    void *b = jl_malloc_aligned(sz, align);
+    if (b != NULL) {
+        memcpy(b, d, oldsz > sz ? sz : oldsz);
+        free(d);
+    }
+    return b;
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    free(p);
+}
+#endif
+#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
+#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
+
+// =========================================================================== //
+// Pointer tagging
+// =========================================================================== //
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_OLD) != 0;
+}
+
+STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+{
+    return (tag & ~(uintptr_t)3) | bits;
+}
+
+STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return ((uintptr_t)v) & mask;
+}
+
+STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return (void*)(((uintptr_t)v) & ~mask);
+}
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+extern jl_gc_num_t gc_num;
+
+// =========================================================================== //
+// Stop-the-world for GC
+// =========================================================================== //
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+// Protect all access to `finalizer_list_marked` and `to_finalize`.
+// For accessing `ptls->finalizers`, the lock is needed if a thread
+// is going to realloc the buffer (of its own list) or accessing the
+// list of another thread
+extern jl_mutex_t finalizers_lock;
+// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
+// `to_finalize` should not have tagged pointers.
+extern arraylist_t finalizer_list_marked;
+extern arraylist_t to_finalize;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT;
+void run_finalizer(jl_task_t *ct, void *o, void *ff);
+void run_finalizers(jl_task_t *ct, int finalizers_thread);
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
+
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// Logging
+// =========================================================================== //
+
+extern int gc_logging_enabled;
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+// number of stacks to always keep available per pool
+#define MIN_STACK_MAPPINGS_PER_POOL 5
+
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
+void sweep_mtarraylist_buffers(void) JL_NOTSAFEPOINT;
+
+#endif // JL_GC_COMMON_H
diff --git a/src/gc-debug.c b/src/gc-debug.c
index bab2c5b0fa607..6e51064035b7b 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,7 +1,11 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
+#include "julia.h"
 #include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 // re-include assert.h without NDEBUG,
@@ -80,7 +84,6 @@ void add_lostval_parent(jl_value_t *parent)
  innocent looking functions which allocate (and thus trigger marking) only on special cases.
 
  If you can't find it, you can try the following :
- - Ensure that should_timeout() is deterministic instead of clock based.
  - Once you have a completely deterministic program which crashes on gc_verify, the addresses
    should stay constant between different runs (with same binary, same environment ...).
    Do not forget to turn off ASLR (linux: echo 0 > /proc/sys/kernel/randomize_va_space).
@@ -97,7 +100,7 @@ static arraylist_t bits_save[4];
 static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
 {
     jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
-    jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
+    jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
     jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
     while ((char*)pv <= lim) {
@@ -112,7 +115,7 @@ static void gc_clear_mark_outer(int bits)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             gc_clear_mark_page(pg, bits);
             pg = pg->next;
@@ -132,7 +135,7 @@ static void clear_mark(int bits)
     }
     bigval_t *v;
     for (int i = 0; i < gc_n_threads; i++) {
-        v = gc_all_tls_states[i]->heap.big_objects;
+        v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -142,7 +145,7 @@ static void clear_mark(int bits)
         }
     }
 
-    v = big_objects_marked;
+    v = oldest_generation_of_bigvals;
     while (v != NULL) {
         void *gcv = &v->header;
         if (!gc_verifying)
@@ -170,7 +173,7 @@ static void gc_verify_track(jl_ptls_t ptls)
         return;
     do {
         jl_gc_markqueue_t mq;
-        jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+        jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
         ws_queue_t *cq = &mq.chunk_queue;
         ws_queue_t *q = &mq.ptr_queue;
         jl_atomic_store_relaxed(&cq->top, 0);
@@ -230,7 +233,7 @@ void gc_verify(jl_ptls_t ptls)
         return;
     }
     jl_gc_markqueue_t mq;
-    jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq.chunk_queue;
     ws_queue_t *q = &mq.ptr_queue;
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -289,7 +292,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
     char *data = pg->data;
     char *page_begin = data + GC_PAGE_OFFSET;
@@ -347,44 +350,15 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     }
 }
 
-static void gc_verify_tags_pagetable0(pagetable0_t *pagetable0)
+static void gc_verify_tags_pagestack(void)
 {
-    for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_page(pagetable0->meta[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
-static void gc_verify_tags_pagetable1(pagetable1_t *pagetable1)
-{
-    for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_pagetable0(pagetable1->meta0[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
-static void gc_verify_tags_pagetable(void)
-{
-    for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_verify_tags_pagetable1(memory_map.meta1[pg_i * 32 + j]);
-                }
-            }
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            gc_verify_tags_page(pg);
+            pg = pg->next;
         }
     }
 }
@@ -396,7 +370,7 @@ void gc_verify_tags(void)
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             // for all pools, iterate its freelist
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *next = p->freelist;
             jl_taggedvalue_t *last = NULL;
             char *allocating = gc_page_data(next);
@@ -421,7 +395,7 @@ void gc_verify_tags(void)
 
     // verify that all the objects on every page are either valid julia objects
     // or are part of the freelist or are on the allocated half of a page
-    gc_verify_tags_pagetable();
+    gc_verify_tags_pagestack();
 }
 #endif
 
@@ -563,13 +537,13 @@ static void gc_scrub_task(jl_task_t *ta)
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
+    if (ta->ctx.copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
-    else if (ta->stkbuf) {
-        low  = (char*)ta->stkbuf;
-        high = (char*)ta->stkbuf + ta->bufsz;
+    else if (ta->ctx.stkbuf) {
+        low  = (char*)ta->ctx.stkbuf;
+        high = (char*)ta->ctx.stkbuf + ta->ctx.bufsz;
     }
     else
         return;
@@ -603,91 +577,6 @@ void jl_gc_debug_print_status(void)
 }
 #endif
 
-#ifdef OBJPROFILE
-static htable_t obj_counts[3];
-static htable_t obj_sizes[3];
-void objprofile_count(void *ty, int old, int sz)
-{
-    if (gc_verifying) return;
-    if ((intptr_t)ty <= 0x10) {
-        ty = (void*)jl_buff_tag;
-    }
-    else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
-             jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
-             ((jl_datatype_t*)ty)->instance) {
-        ty = jl_singleton_tag;
-    }
-    void **bp = ptrhash_bp(&obj_counts[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)2;
-    else
-        (*((intptr_t*)bp))++;
-    bp = ptrhash_bp(&obj_sizes[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)(intptr_t)(1 + sz);
-    else
-        *((intptr_t*)bp) += sz;
-}
-
-void objprofile_reset(void)
-{
-    for (int g = 0; g < 3; g++) {
-        htable_reset(&obj_counts[g], 0);
-        htable_reset(&obj_sizes[g], 0);
-    }
-}
-
-static void objprofile_print(htable_t nums, htable_t sizes)
-{
-    for(int i=0; i < nums.size; i+=2) {
-        if (nums.table[i+1] != HT_NOTFOUND) {
-            void *ty = nums.table[i];
-            int num = (intptr_t)nums.table[i + 1] - 1;
-            size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
-            static const int ptr_hex_width = 2 * sizeof(void*);
-            if (sz > 2e9) {
-                jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e6) {
-                jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e3) {
-                jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
-                               num, 6, ((double)sz) / 1024,
-                               ptr_hex_width, ty);
-            }
-            else {
-                jl_safe_printf(" %6d : %*d  B of (%*p) ",
-                          num, 6, (int)sz, ptr_hex_width, ty);
-            }
-            if (ty == (void*)jl_buff_tag)
-                jl_safe_printf("#<buffer>");
-            else if (ty == jl_malloc_tag)
-                jl_safe_printf("#<malloc>");
-            else if (ty == jl_singleton_tag)
-                jl_safe_printf("#<singletons>");
-            else
-                jl_static_show(JL_STDERR, (jl_value_t*)ty);
-            jl_safe_printf("\n");
-        }
-    }
-}
-
-void objprofile_printall(void)
-{
-    jl_safe_printf("Transient mark :\n");
-    objprofile_print(obj_counts[0], obj_sizes[0]);
-    jl_safe_printf("Perm mark :\n");
-    objprofile_print(obj_counts[1], obj_sizes[1]);
-    jl_safe_printf("Remset :\n");
-    objprofile_print(obj_counts[2], obj_sizes[2]);
-}
-#endif
-
 #if defined(GC_TIME) || defined(GC_FINAL_STATS)
 STATIC_INLINE double jl_ns2ms(int64_t t)
 {
@@ -784,11 +673,12 @@ void jl_print_gc_stats(JL_STREAM *s)
     malloc_stats();
 #endif
     double ptime = jl_hrtime() - process_t0;
-    jl_safe_printf("exec time\t%.5f sec\n", ptime);
+    double exec_time = jl_ns2s(ptime);
+    jl_safe_printf("exec time\t%.5f sec\n", exec_time);
     if (gc_num.pause > 0) {
         jl_safe_printf("gc time  \t%.5f sec (%2.1f%%) in %d (%d full) collections\n",
                        jl_ns2s(gc_num.total_time),
-                       jl_ns2s(gc_num.total_time) / ptime * 100,
+                       jl_ns2s(gc_num.total_time) / exec_time * 100,
                        gc_num.pause, gc_num.full_sweep);
         jl_safe_printf("gc pause \t%.2f ms avg\n\t\t%2.0f ms max\n",
                        jl_ns2ms(gc_num.total_time) / gc_num.pause,
@@ -847,11 +737,11 @@ void gc_time_pool_end(int sweep_full)
     double sweep_speed = sweep_gb / sweep_pool_sec;
     jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
                    "(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
-                   "%" PRId64 " freed with %" PRId64 " lazily) %s\n",
+                   "%" PRId64 " freed) %s\n",
                    sweep_pool_sec * 1000, sweep_speed,
                    (total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
                    total_pages, total_pages - skipped_pages,
-                   freed_pages, lazy_freed_pages,
+                   freed_pages,
                    sweep_full ? "full" : "quick");
 }
 
@@ -890,29 +780,29 @@ void gc_time_big_end(void)
                    t_ms, big_freed, big_total, big_reset);
 }
 
-static int64_t mallocd_array_total;
-static int64_t mallocd_array_freed;
-static int64_t mallocd_array_sweep_start;
+static int64_t mallocd_memory_total;
+static int64_t mallocd_memory_freed;
+static int64_t mallocd_memory_sweep_start;
 
-void gc_time_mallocd_array_start(void)
+void gc_time_mallocd_memory_start(void)
 {
-    mallocd_array_total = 0;
-    mallocd_array_freed = 0;
-    mallocd_array_sweep_start = jl_hrtime();
+    mallocd_memory_total = 0;
+    mallocd_memory_freed = 0;
+    mallocd_memory_sweep_start = jl_hrtime();
 }
 
-void gc_time_count_mallocd_array(int bits)
+void gc_time_count_mallocd_memory(int bits)
 {
-    mallocd_array_total++;
-    mallocd_array_freed += !gc_marked(bits);
+    mallocd_memory_total++;
+    mallocd_memory_freed += !gc_marked(bits);
 }
 
-void gc_time_mallocd_array_end(void)
+void gc_time_mallocd_memory_end(void)
 {
-    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_array_sweep_start);
+    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_memory_sweep_start);
     jl_safe_printf("GC sweep arrays %.2f ms "
                    "(freed %" PRId64 " / %" PRId64 ")\n",
-                   t_ms, mallocd_array_freed, mallocd_array_total);
+                   t_ms, mallocd_memory_freed, mallocd_memory_total);
 }
 
 void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
@@ -922,8 +812,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
     int64_t remset_nptr = 0;
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        last_remset_len += ptls2->heap.last_remset->len;
-        remset_nptr = ptls2->heap.remset_nptr;
+        last_remset_len += ptls2->gc_tls.heap.last_remset->len;
+        remset_nptr = ptls2->gc_tls.heap.remset_nptr;
     }
     jl_safe_printf("GC mark pause %.2f ms | "
                    "scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
@@ -943,12 +833,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
     jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
                    "(freed %" PRId64 " kB EST %" PRId64 " kB "
                    "[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
-                   "(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
+                   "(%.2f ms in post_mark) %s\n",
                    jl_ns2ms(sweep_pause), live_bytes / 1024,
                    gc_num.freed / 1024, estimate_freed / 1024,
                    gc_num.freed - estimate_freed, pct, gc_num.allocd / 1024,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
-                   sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
+                   sweep_full ? "full" : "quick");
 }
 
 void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
@@ -968,11 +858,35 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
         jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
                        " live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
                        PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
-                       "ms sweep time = %" PRIu64 "ms \n",
+                       "ms sweep time = %" PRIu64 "ms\n",
                        end, freed, live/1024/1024,
                        interval/1024/1024, pause/1000000, ttsp,
                        mark/1000000,sweep/1000000);
 }
+
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap)
+{
+    jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            //"  nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  mut_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            "  pause_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  thrash_counter=%d%s"
+                            "  current_heap=%" PRIu64 " MB"
+                            "  target_heap=%" PRIu64 " MB\n",
+                   old_alloc_diff/1024, alloc_mem/1024,
+                   old_mut_time/1000, alloc_time/1000,
+                   old_freed_diff/1024, gc_mem/1024,
+                   old_pause_time/1000, gc_time/1000,
+                   thrash_counter, reason,
+                   current_heap/1024/1024, target_heap/1024/1024);
+}
 #endif
 
 void jl_gc_debug_init(void)
@@ -996,13 +910,6 @@ void jl_gc_debug_init(void)
     arraylist_new(&lostval_parents_done, 0);
 #endif
 
-#ifdef OBJPROFILE
-    for (int g = 0; g < 3; g++) {
-        htable_new(&obj_counts[g], 0);
-        htable_new(&obj_sizes[g], 0);
-    }
-#endif
-
 #ifdef GC_FINAL_STATS
     process_t0 = jl_hrtime();
 #endif
@@ -1011,113 +918,136 @@ void jl_gc_debug_init(void)
 // GC summary stats
 
 #ifdef MEMPROFILE
-// TODO repair this and possibly merge with `gc_count_pool`
-static size_t pool_stats(jl_gc_pool_t *p, size_t *pwaste, size_t *np,
-                         size_t *pnold)
+
+typedef struct _gc_memprofile_stat_t {
+    size_t nfree; // for pool only
+    size_t npgs;  // for pool only
+    size_t nused;
+    size_t nbytes_used;
+    size_t nused_old;
+    size_t nbytes_used_old;
+} gc_memprofile_stat_t;
+
+void gc_stats_all_pool(void)
 {
-    jl_taggedvalue_t *halfpages = p->newpages;
-    size_t osize = p->osize;
-    size_t nused=0, nfree=0, npgs=0, nold=0;
-
-    if (halfpages != NULL) {
-        npgs++;
-        char *v = gc_page_data(halfpages) + GC_PAGE_OFFSET;
-        char *lim = (char*)halfpages - 1;
-        int i = 0;
-        while (v <= lim) {
-            if (!gc_marked(((jl_taggedvalue_t*)v)->bits.gc)) {
-                nfree++;
+    gc_memprofile_stat_t stat[JL_GC_N_POOLS];
+    memset(stat, 0, sizeof(stat));
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            assert(gc_alloc_map_is_set(pg->data));
+            int pool_n = pg->pool_n;
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[pool_n];
+            char *data = pg->data;
+            // compute the start of the data area in this page
+            jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+            // compute the limit of valid data in this page
+            char *lim = data + GC_PAGE_SZ - pg->osize;
+            char *lim_newpages = data + GC_PAGE_SZ;
+            if (gc_page_data((char*)p->newpages - 1) == data) {
+                lim_newpages = (char*)p->newpages;
             }
-            else {
-                nused++;
-                if (((jl_taggedvalue_t*)v)->bits.gc == GC_OLD_MARKED) {
-                    nold++;
+            char *v = (char*)v0;
+            gc_memprofile_stat_t *stat_n = &stat[pool_n];
+            while (v <= lim) {
+                uint8_t bits = ((jl_taggedvalue_t*)v)->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    stat_n->nfree++;
+                }
+                else {
+                    if (gc_old(bits)) {
+                        assert(bits == GC_OLD_MARKED);
+                        stat_n->nused_old++;
+                        stat_n->nbytes_used_old += pg->osize;
+                    }
+                    else {
+                        stat_n->nused++;
+                        stat_n->nbytes_used += pg->osize;
+                    }
                 }
+                v = v + pg->osize;
             }
-            v = v + osize;
-            i++;
+            stat_n->npgs++;
+            pg = pg->next;
         }
-        // only the first page is allocated on
     }
-    *pwaste = npgs * GC_PAGE_SZ - (nused * p->osize);
-    *np = npgs;
-    *pnold = nold;
-    if (npgs != 0) {
-        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
-                       p->osize,
-                       (long long)nused,
-                       (long long)(nused + nfree),
-                       (long long)(nused ? (nold * 100) / nused : 0),
-                       (long long)npgs,
-                       (long long)((nused * p->osize) / 1024),
-                       (long long)(*pwaste / 1024));
-    }
-    return nused*p->osize;
-}
-
-void gc_stats_all_pool(void)
-{
-    size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
-            nb += b;
-            no += (b / ptls2->heap.norm_pools[i].osize);
-            tw += w;
-            tp += np;
-            nold += nol;
-            noldbytes += nol * ptls2->heap.norm_pools[i].osize;
-        }
+        jl_ptls_t ptls = jl_current_task->ptls;
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[i];
+        gc_memprofile_stat_t *s = &stat[i];
+        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
+            p->osize,
+            (long long)(s->nused + s->nused_old),
+            (long long)(s->nused + s->nused_old + s->nfree),
+            (long long)((s->nused + s->nused_old) ? (s->nused_old * 100) / (s->nused + s->nused_old) : 0),
+            (long long)s->npgs,
+            (long long)(((s->nused + s->nused_old) * p->osize) / 1024),
+            (long long)((GC_PAGE_SZ * s->npgs - s->nused * p->osize) / 1024));
     }
-    jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
-                   "%lld total fragments (%lld%% overhead), in %lld pages\n",
-                   (long long)no,
-                   (long long)(no ? (nold * 100) / no : 0),
-                   (long long)(nb / 1024),
-                   (long long)(nb ? (noldbytes * 100) / nb : 0),
-                   (long long)tw,
-                   (long long)(nb ? (tw * 100) / nb : 0),
-                   (long long)tp);
 }
 
 void gc_stats_big_obj(void)
 {
-    size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
+    gc_memprofile_stat_t stat;
+    memset(&stat, 0, sizeof(stat));
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        bigval_t *v = ptls2->heap.big_objects;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
-                nused++;
-                nbytes += v->sz & ~3;
+                if (gc_old(v->bits.gc)) {
+                    assert(v->bits.gc == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += v->sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += v->sz;
+                }
             }
             v = v->next;
         }
-        v = big_objects_marked;
+        v = oldest_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
-            if (gc_marked(v->bits.gc)) {
-                nused_old++;
-                nbytes_old += v->sz & ~3;
-            }
+            assert(v->bits.gc == GC_OLD_MARKED);
+            stat.nused_old++;
+            stat.nbytes_used_old += v->sz;
             v = v->next;
         }
 
-        mallocarray_t *ma = ptls2->heap.mallocarrays;
-        while (ma != NULL) {
-            if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
-                nused++;
-                nbytes += jl_array_nbytes(ma->a);
+        void **lst = ptls2->gc_tls.heap.mallocarrays.items;
+        for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
+            uint8_t bits = jl_astaggedvalue(m)->bits.gc;
+            if (gc_marked(bits)) {
+                size_t sz = jl_genericmemory_nbytes(m);
+                if (gc_old(bits)) {
+                    assert(bits == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += sz;
+                }
             }
-            ma = ma->next;
         }
     }
-
     jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
-                   (long long)((nbytes + nbytes_old) / 1024),
-                   (long long)(nbytes + nbytes_old ? (nbytes_old * 100) / (nbytes + nbytes_old) : 0),
-                   (long long)(nused + nused_old),
-                   (long long)(nused + nused_old ? (nused_old * 100) / (nused + nused_old) : 0));
+                   (long long)((stat.nbytes_used + stat.nbytes_used_old) / 1024),
+                   (long long)(stat.nbytes_used + stat.nbytes_used_old ? (stat.nbytes_used_old * 100) / (stat.nbytes_used + stat.nbytes_used_old) : 0),
+                   (long long)(stat.nused + stat.nused_old),
+                   (long long)(stat.nused + stat.nused_old ? (stat.nused_old * 100) / (stat.nused + stat.nused_old) : 0));
 }
 #endif //MEMPROFILE
 
@@ -1149,7 +1079,7 @@ static void gc_count_pool_pagetable(void)
 {
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
         while (pg != NULL) {
             if (gc_alloc_map_is_set(pg->data)) {
                 gc_count_pool_page(pg);
@@ -1173,58 +1103,25 @@ void gc_count_pool(void)
     jl_safe_printf("************************\n");
 }
 
-int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
-{
-    int nf = (int)jl_datatype_nfields(vt);
-    for (int i = 1; i < nf; i++) {
-        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
-            return i - 1;
-    }
-    return nf - 1;
-}
-
-int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
-{
-    char *slot = (char*)_slot;
-    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
-    char *start = NULL;
-    size_t len = 0;
-    size_t elsize = sizeof(void*);
-    if (vt == jl_module_type) {
-        jl_module_t *m = (jl_module_t*)obj;
-        start = (char*)m->usings.items;
-        len = m->usings.len;
-    }
-    else if (vt == jl_simplevector_type) {
-        start = (char*)jl_svec_data(obj);
-        len = jl_svec_len(obj);
-    }
-    else if (vt->name == jl_array_typename) {
-        jl_array_t *a = (jl_array_t*)obj;
-        start = (char*)a->data;
-        len = jl_array_len(a);
-        elsize = a->elsize;
-    }
-    if (slot < start || slot >= start + elsize * len)
-        return -1;
-    return (slot - start) / elsize;
-}
-
-static int gc_logging_enabled = 0;
-
-JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
-    gc_logging_enabled = enable;
-}
-
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
     if (!gc_logging_enabled) {
         return;
     }
-    jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
-        pause/1e6, freed/1e6,
+    jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
+        pause/1e6, freed/(double)(1<<20),
         full ? "full" : "incr",
         recollect ? "recollect" : ""
     );
+
+    jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
+        // live_bytes/(double)(1<<20), live byes tracking is not accurate.
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
+        (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
+    );
+    // Should fragmentation use bytes_resident instead of heap_size?
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
index b1401653d99ff..f3793939610b5 100644
--- a/src/gc-heap-snapshot.cpp
+++ b/src/gc-heap-snapshot.cpp
@@ -2,21 +2,26 @@
 
 #include "gc-heap-snapshot.h"
 
+#include "julia.h"
 #include "julia_internal.h"
-#include "gc.h"
+#include "julia_assert.h"
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/DenseMap.h"
 
 #include <vector>
 #include <string>
 #include <sstream>
+#include <iostream>
+#include <set>
 
-using std::vector;
 using std::string;
+using std::set;
 using std::ostringstream;
 using std::pair;
 using std::make_pair;
+using llvm::SmallVector;
 using llvm::StringMap;
 using llvm::DenseMap;
 using llvm::StringRef;
@@ -53,8 +58,9 @@ void print_str_escape_json(ios_t *stream, StringRef s)
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601
 
 struct Edge {
-    size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
+    uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
     size_t name_or_index; // name of the field (for objects/modules) or index of array
+    size_t from_node;  // This is a deviation from the .heapsnapshot format to support streaming.
     size_t to_node;
 };
 
@@ -63,29 +69,34 @@ struct Edge {
 //   [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ]
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575
 
-const int k_node_number_of_fields = 7;
 struct Node {
-    size_t type; // index into snapshot->node_types
+    uint8_t type; // index into snapshot->node_types
     size_t name;
     size_t id; // This should be a globally-unique counter, but we use the memory address
     size_t self_size;
     size_t trace_node_id;  // This is ALWAYS 0 in Javascript heap-snapshots.
-    // whether the from_node is attached or dettached from the main application state
+    // whether the from_node is attached or detached from the main application state
     // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745
-    int detachedness;  // 0 - unknown, 1 - attached, 2 - detached
-    vector<Edge> edges;
+    uint8_t detachedness;  // 0 - unknown, 1 - attached, 2 - detached
 
     ~Node() JL_NOTSAFEPOINT = default;
 };
 
-struct StringTable {
+class StringTable {
+protected:
     StringMap<size_t> map;
-    vector<StringRef> strings;
+    SmallVector<StringRef, 0> strings;
+    size_t next_id;
+
+public:
+    StringTable() JL_NOTSAFEPOINT : map(), strings(), next_id(0) {};
 
     size_t find_or_create_string_id(StringRef key) JL_NOTSAFEPOINT {
-        auto val = map.insert(make_pair(key, map.size()));
-        if (val.second)
+        auto val = map.insert(make_pair(key, next_id));
+        if (val.second) {
             strings.push_back(val.first->first());
+            next_id++;
+        }
         return val.first->second;
     }
 
@@ -105,77 +116,205 @@ struct StringTable {
     }
 };
 
-struct HeapSnapshot {
-    vector<Node> nodes;
-    // edges are stored on each from_node
+// a string table with partial strings in memory and all strings serialized to a file
+class SerializedStringTable: public StringTable {
+    public:
 
-    StringTable names;
+    // serialize the string only if it's not already in the table
+    size_t serialize_if_necessary(ios_t *stream, StringRef key) JL_NOTSAFEPOINT {
+        auto val = map.insert(make_pair(key, next_id));
+        if (val.second) {
+            strings.push_back(val.first->first());
+            // persist the string size first, then the string itself
+            // so that we could read it back in the same order
+            size_t s_size = key.size();
+            ios_write(stream, reinterpret_cast<const char*>(&s_size), sizeof(size_t));
+            ios_write(stream, key.data(), s_size);
+            next_id++;
+        }
+        return val.first->second;
+    }
+
+    // serialize the string without checking if it is in the table or not
+    // and return its index. This means that we might have duplicates in the
+    // output string file.
+    size_t serialize(ios_t *stream, StringRef key) JL_NOTSAFEPOINT {
+        size_t s_size = key.size();
+        ios_write(stream, reinterpret_cast<const char*>(&s_size), sizeof(size_t));
+        ios_write(stream, key.data(), s_size);
+        size_t current = next_id;
+        next_id++;
+        return current;
+    }
+};
+
+struct HeapSnapshot {
+    // names could be very large, so we keep them in a separate binary file
+    // and use a StringTable to keep track of the indices of frequently used strings
+    // to reduce duplicates in the output file to some degree
+    SerializedStringTable names;
+    // node types and edge types are very small and keep them in memory
     StringTable node_types;
     StringTable edge_types;
     DenseMap<void *, size_t> node_ptr_to_index_map;
 
-    size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
+    size_t num_nodes = 0; // Since we stream out to files,
+    size_t num_edges = 0; // we need to track the counts here.
+
+    // Node internal_root;
+
+    // Used for streaming
+    // Since nodes and edges are just one giant array of integers, we stream them as
+    // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to
+    // fit the pointer ids).
+    ios_t *nodes;
+    ios_t *edges;
+    // strings are serialized to a file in binary format
+    ios_t *strings;
+    // the following file is written out as json data.
+    ios_t *json;
+
+    size_t internal_root_idx = 0; // node index of the internal root node
+    size_t _gc_root_idx = 1; // node index of the GC roots node
+    size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node
 };
 
 // global heap snapshot, mutated by garbage collector
 // when snapshotting is on.
 int gc_heap_snapshot_enabled = 0;
+int gc_heap_snapshot_redact_data = 0;
 HeapSnapshot *g_snapshot = nullptr;
-extern jl_mutex_t heapsnapshot_lock;
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
 
+void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one);
 void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
 static inline void _record_gc_edge(const char *edge_type,
                                    jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
-void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
-void _add_internal_root(HeapSnapshot *snapshot);
+void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
+void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
 
 
-JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
+    ios_t *strings, ios_t *json, char all_one, char redact_data)
 {
     HeapSnapshot snapshot;
-    _add_internal_root(&snapshot);
+    snapshot.nodes = nodes;
+    snapshot.edges = edges;
+    snapshot.strings = strings;
+    snapshot.json = json;
 
     jl_mutex_lock(&heapsnapshot_lock);
 
     // Enable snapshotting
     g_snapshot = &snapshot;
+    gc_heap_snapshot_redact_data = redact_data;
     gc_heap_snapshot_enabled = true;
 
+    _add_synthetic_root_entries(&snapshot);
+
     // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot`
     jl_gc_collect(JL_GC_FULL);
 
     // Disable snapshotting
     gc_heap_snapshot_enabled = false;
+    gc_heap_snapshot_redact_data = 0;
     g_snapshot = nullptr;
 
     jl_mutex_unlock(&heapsnapshot_lock);
 
     // When we return, the snapshot is full
     // Dump the snapshot
-    serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
+    final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one);
 }
 
-// adds a node at id 0 which is the "uber root":
-// a synthetic node which points to all the GC roots.
-void _add_internal_root(HeapSnapshot *snapshot)
+void serialize_node(HeapSnapshot *snapshot, const Node &node) JL_NOTSAFEPOINT
 {
+    // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
+    ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type));
+    ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name));
+    ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id));
+    ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size));
+    // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in
+    // post-processing.
+    ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id));
+    ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness));
+
+    g_snapshot->num_nodes += 1;
+}
+
+void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) JL_NOTSAFEPOINT
+{
+    // ["type","name_or_index","to_node"]
+    ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type));
+    ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index));
+    // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields, which is 7)
+    ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node));
+    ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node));
+
+    g_snapshot->num_edges += 1;
+}
+
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212
+// add synthetic nodes for the uber root, the GC roots, and the GC finalizer list roots
+void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT
+{
+    // adds a node at id 0 which is the "uber root":
+    // a synthetic node which points to all the GC roots.
     Node internal_root{
-        snapshot->node_types.find_or_create_string_id("synthetic"),
-        snapshot->names.find_or_create_string_id(""), // name
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, ""), // name
         0, // id
         0, // size
         0, // size_t trace_node_id (unused)
-        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+    };
+    serialize_node(snapshot, internal_root);
+
+    // Add a node for the GC roots
+    snapshot->_gc_root_idx = snapshot->internal_root_idx + 1;
+    Node gc_roots{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // name
+        snapshot->_gc_root_idx, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
     };
-    snapshot->nodes.push_back(internal_root);
+    serialize_node(snapshot, gc_roots);
+    Edge root_to_gc_roots{
+        (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // edge label
+        snapshot->internal_root_idx, // from
+        snapshot->_gc_root_idx // to
+    };
+    serialize_edge(snapshot, root_to_gc_roots);
+
+    // add a node for the gc finalizer list roots
+    snapshot->_gc_finlist_root_idx = snapshot->internal_root_idx + 2;
+    Node gc_finlist_roots{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // name
+        snapshot->_gc_finlist_root_idx, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0 // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+    };
+    serialize_node(snapshot, gc_finlist_roots);
+    Edge root_to_gc_finlist_roots{
+        (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // edge label
+        snapshot->internal_root_idx, // from
+        snapshot->_gc_finlist_root_idx // to
+    };
+    serialize_edge(snapshot, root_to_gc_finlist_roots);
 }
 
 // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
 // returns the index of the new node
 size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
@@ -192,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
     if (jl_is_string(a)) {
         node_type = "String";
-        name = jl_string_data(a);
+        name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
         self_size = jl_string_len(a);
     }
     else if (jl_is_symbol(a)) {
@@ -241,21 +380,21 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
         ios_mem(&str_, 0);
         JL_STREAM* str = (JL_STREAM*)&str_;
         jl_static_show(str, (jl_value_t*)type);
-
+        node_type = StringRef((const char*)str_.buf, str_.size);
         name = StringRef((const char*)str_.buf, str_.size);
     }
 
-    g_snapshot->nodes.push_back(Node{
-        g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
-        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+    auto node = Node{
+        (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
+        g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name;
         (size_t)a,     // size_t id;
         // We add 1 to self-size for the type tag that all heap-allocated objects have.
         // Also because the Chrome Snapshot viewer ignores size-0 leaves!
         sizeof(void*) + self_size, // size_t self_size;
         0,             // size_t trace_node_id (unused)
         0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(g_snapshot, node);
 
     if (ios_need_close)
         ios_close(&str_);
@@ -265,20 +404,20 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
 static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
 
-    g_snapshot->nodes.push_back(Node{
-        g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
-        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+    auto node = Node{
+        (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
+        g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name;
         (size_t)a,     // size_t id;
         bytes,         // size_t self_size;
         0,             // size_t trace_node_id (unused)
         0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(g_snapshot, node);
 
     return val.first->second;
 }
@@ -314,16 +453,29 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
     }
 }
 
-
 void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
 {
-    record_node_to_gc_snapshot(root);
+    size_t to_node_idx = record_node_to_gc_snapshot(root);
+    auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name);
 
-    auto &internal_root = g_snapshot->nodes.front();
-    auto to_node_idx = g_snapshot->node_ptr_to_index_map[root];
-    auto edge_label = g_snapshot->names.find_or_create_string_id(name);
+    _record_gc_just_edge("internal", g_snapshot->internal_root_idx, to_node_idx, edge_label);
+}
 
-    _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
+void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    auto to_node_idx = record_node_to_gc_snapshot(root);
+    auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name);
+
+    _record_gc_just_edge("internal", g_snapshot->_gc_root_idx, to_node_idx, edge_label);
+}
+
+void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT
+{
+    auto to_node_idx = record_node_to_gc_snapshot(obj);
+    ostringstream ss;
+    ss << "finlist-" << index;
+    auto edge_label = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, ss.str());
+    _record_gc_just_edge("internal", g_snapshot->_gc_finlist_root_idx, to_node_idx, edge_label);
 }
 
 // Add a node to the heap snapshot representing a Julia stack frame.
@@ -332,20 +484,20 @@ void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
 // Stack frame nodes point at the objects they have as local variables.
 size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT
 {
-    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size()));
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes));
     if (!val.second) {
         return val.first->second;
     }
 
-    snapshot->nodes.push_back(Node{
-        snapshot->node_types.find_or_create_string_id("synthetic"),
-        snapshot->names.find_or_create_string_id("(stack frame)"), // name
+    auto node = Node{
+        (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.serialize_if_necessary(snapshot->strings, "(stack frame)"), // name
         (size_t)frame, // id
         1, // size
         0, // size_t trace_node_id (unused)
         0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
-        vector<Edge>() // outgoing edges
-    });
+    };
+    serialize_node(snapshot, node);
 
     return val.first->second;
 }
@@ -354,30 +506,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J
 {
     auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from);
     auto to_idx = record_node_to_gc_snapshot(to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("local var");
-    _record_gc_just_edge("internal", from_node, to_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "local var");
+    _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
 {
     auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from);
     auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("stack");
-    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "stack");
+    _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
 {
     auto from_node_idx = _record_stack_frame_node(g_snapshot, from);
     auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
-    Node &from_node = g_snapshot->nodes[from_node_idx];
 
-    auto name_idx = g_snapshot->names.find_or_create_string_id("next frame");
-    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+    auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "next frame");
+    _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx);
 }
 
 void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
@@ -389,62 +538,59 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void
 {
     string path = _fieldpath_for_slot(from, slot);
     _record_gc_edge("property", from, to,
-                    g_snapshot->names.find_or_create_string_id(path));
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, path));
 }
 
-void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_binding_t *binding) JL_NOTSAFEPOINT
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
 {
-    jl_globalref_t *globalref = binding->globalref;
-    jl_sym_t *name = globalref->name;
     auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module);
-    auto to_node_idx = record_pointer_to_gc_snapshot(binding, sizeof(jl_binding_t), jl_symbol_name(name));
-
-    jl_value_t *value = jl_atomic_load_relaxed(&binding->value);
-    auto value_idx = value ? record_node_to_gc_snapshot(value) : 0;
-    jl_value_t *ty = jl_atomic_load_relaxed(&binding->ty);
-    auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0;
-    auto globalref_idx = record_node_to_gc_snapshot((jl_value_t*)globalref);
+    auto to_bindings_idx = record_node_to_gc_snapshot(bindings);
+    auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset);
 
-    auto &from_node = g_snapshot->nodes[from_node_idx];
-    auto &to_node = g_snapshot->nodes[to_node_idx];
-
-    _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("<native>"));
-    if (value_idx)     _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value"));
-    if (ty_idx)        _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty"));
-    if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref"));
-}
+    if (to_bindings_idx > 0) {
+        _record_gc_just_edge("internal", from_node_idx, to_bindings_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindings"));
+    }
+    if (to_bindingkeyset_idx > 0) {
+        _record_gc_just_edge("internal", from_node_idx, to_bindingkeyset_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindingkeyset"));
+    }
+ }
 
 void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
 {
     _record_gc_edge("internal", from, to,
-                    g_snapshot->names.find_or_create_string_id("<internal>"));
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<internal>"));
 }
 
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("binding", from, to,
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<binding>"));
+}
+
+
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
-    size_t name_or_idx = g_snapshot->names.find_or_create_string_id("<native>");
+    // valid alloc_type values are 0, 1, 2
+    assert(alloc_type <= 2);
+    size_t name_or_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<native>");
 
     auto from_node_idx = record_node_to_gc_snapshot(from);
-    const char *alloc_kind;
+    const char *alloc_kind = NULL;
     switch (alloc_type)
     {
     case 0:
-        alloc_kind = "<malloc>";
+        alloc_kind = "<generic memory - malloc>";
         break;
     case 1:
-        alloc_kind = "<pooled>";
+        alloc_kind = "<generic memory - pool alloc>";
         break;
     case 2:
-        alloc_kind = "<inline>";
-        break;
-    default:
-        alloc_kind = "<undef>";
+        alloc_kind = "<generic memory - inline alloc>";
         break;
     }
     auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind);
-    auto &from_node = g_snapshot->nodes[from_node_idx];
 
-    _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx);
+    _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx);
 }
 
 static inline void _record_gc_edge(const char *edge_type, jl_value_t *a,
@@ -453,84 +599,51 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a,
     auto from_node_idx = record_node_to_gc_snapshot(a);
     auto to_node_idx = record_node_to_gc_snapshot(b);
 
-    auto &from_node = g_snapshot->nodes[from_node_idx];
-
-    _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx);
+    _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx);
 }
 
-void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
+void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
 {
-    from_node.edges.push_back(Edge{
-        g_snapshot->edge_types.find_or_create_string_id(edge_type),
+    auto edge = Edge{
+        (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type),
         name_or_idx, // edge label
+        from_idx, // from
         to_idx // to
-    });
+    };
 
-    g_snapshot->num_edges += 1;
+    serialize_edge(g_snapshot, edge);
 }
 
-void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one)
+void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one)
 {
     // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
-    ios_printf(stream, "{\"snapshot\":{");
-    ios_printf(stream, "\"meta\":{");
-    ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
-    ios_printf(stream, "\"node_types\":[");
-    snapshot.node_types.print_json_array(stream, false);
-    ios_printf(stream, ",");
-    ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
-    ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
-    ios_printf(stream, "\"edge_types\":[");
-    snapshot.edge_types.print_json_array(stream, false);
-    ios_printf(stream, ",");
-    ios_printf(stream, "\"string_or_number\",\"from_node\"]");
-    ios_printf(stream, "},\n"); // end "meta"
-    ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size());
-    ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges);
-    ios_printf(stream, "},\n"); // end "snapshot"
-
-    ios_printf(stream, "\"nodes\":[");
-    bool first_node = true;
-    for (const auto &from_node : snapshot.nodes) {
-        if (first_node) {
-            first_node = false;
-        }
-        else {
-            ios_printf(stream, ",");
-        }
-        // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
-        ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n",
-                            from_node.type,
-                            from_node.name,
-                            from_node.id,
-                            all_one ? (size_t)1 : from_node.self_size,
-                            from_node.edges.size(),
-                            from_node.trace_node_id,
-                            from_node.detachedness);
-    }
-    ios_printf(stream, "],\n");
-
-    ios_printf(stream, "\"edges\":[");
-    bool first_edge = true;
-    for (const auto &from_node : snapshot.nodes) {
-        for (const auto &edge : from_node.edges) {
-            if (first_edge) {
-                first_edge = false;
-            }
-            else {
-                ios_printf(stream, ",");
-            }
-            ios_printf(stream, "%zu,%zu,%zu\n",
-                                edge.type,
-                                edge.name_or_index,
-                                edge.to_node * k_node_number_of_fields);
-        }
-    }
-    ios_printf(stream, "],\n"); // end "edges"
-
-    ios_printf(stream, "\"strings\":");
-
-    snapshot.names.print_json_array(stream, true);
-
-    ios_printf(stream, "}");
+    // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51
+    ios_printf(json, "{\"snapshot\":{\n");
+
+    ios_printf(json, "  \"meta\":{\n");
+    ios_printf(json, "    \"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],\n");
+    ios_printf(json, "    \"node_types\":[");
+    snapshot.node_types.print_json_array(json, false);
+    ios_printf(json, ",");
+    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],\n");
+    ios_printf(json, "    \"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],\n");
+    ios_printf(json, "    \"edge_types\":[");
+    snapshot.edge_types.print_json_array(json, false);
+    ios_printf(json, ",");
+    ios_printf(json, "\"string_or_number\",\"from_node\"],\n");
+    // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "    \"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],\n");
+    ios_printf(json, "    \"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],\n");
+    ios_printf(json, "    \"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],\n");
+    ios_printf(json, "    \"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]\n");
+    // end not used
+    ios_printf(json, "  },\n"); // end "meta"
+
+    ios_printf(json, "  \"node_count\":%zu,\n", snapshot.num_nodes);
+    ios_printf(json, "  \"edge_count\":%zu,\n", snapshot.num_edges);
+    ios_printf(json, "  \"trace_function_count\":0\n"); // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "}\n"); // end "snapshot"
+
+    // this } is removed by the julia reassembler in Profile
+    ios_printf(json, "}");
 }
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
index 8c3af5b86bec7..dc5b22bb72eb1 100644
--- a/src/gc-heap-snapshot.h
+++ b/src/gc-heap-snapshot.h
@@ -20,7 +20,7 @@ void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_N
 void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT;
 void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT;
 void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT;
-void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT;
 // Used for objects managed by GC, but which aren't exposed in the julia object, so have no
 // field or index.  i.e. they're not reachable from julia code, but we _will_ hit them in
 // the GC mark phase (so we can check their type tag to get the size).
@@ -28,10 +28,16 @@ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *
 // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
 // size of the object, even though we're never going to mark that object.
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT;
-
+// Used for objects that are reachable from the GC roots
+void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
+// Used for objects that are reachable from the finalizer list
+void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;
+// Used for objects reachable from the binding partition pointer union
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
 
 extern int gc_heap_snapshot_enabled;
 extern int prev_sweep_full;
+extern jl_mutex_t heapsnapshot_lock;
 
 int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
 int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
@@ -60,6 +66,12 @@ static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL
         _gc_heap_snapshot_record_root(root, name);
     }
 }
+static inline void gc_heap_snapshot_record_array_edge_index(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && from != NULL && to != NULL)) {
+        _gc_heap_snapshot_record_array_edge(from, to, index);
+    }
+}
 static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -73,10 +85,10 @@ static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_valu
     }
 }
 
-static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
+static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT
 {
-    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
-        _gc_heap_snapshot_record_module_to_binding(module, binding);
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full) && bindings != NULL && bindingkeyset != NULL) {
+        _gc_heap_snapshot_record_module_to_binding(module, bindings, bindingkeyset);
     }
 }
 
@@ -87,6 +99,13 @@ static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from,
     }
 }
 
+static inline void gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_binding_partition_edge(from, to);
+    }
+}
+
 static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -94,10 +113,25 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* t
     }
 }
 
+static inline void gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && root != NULL)) {
+        _gc_heap_snapshot_record_gc_roots(root, name);
+    }
+}
+
+static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && finlist != NULL)) {
+        _gc_heap_snapshot_record_finlist(finlist, index);
+    }
+}
+
 // ---------------------------------------------------------------------
 // Functions to call from Julia to take heap snapshot
 // ---------------------------------------------------------------------
-JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
+    ios_t *strings, ios_t *json, char all_one, char redact_data);
 
 
 #ifdef __cplusplus
diff --git a/src/gc-interface.h b/src/gc-interface.h
new file mode 100644
index 0000000000000..826e91355b17a
--- /dev/null
+++ b/src/gc-interface.h
@@ -0,0 +1,257 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  Garbage Collection interface that must be implemented by third-party GCs
+*/
+
+#ifndef JL_GC_INTERFACE_H
+#define JL_GC_INTERFACE_H
+
+#include "dtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _jl_tls_states_t;
+struct _jl_value_t;
+struct _jl_weakref_t;
+struct _jl_datatype_t;
+
+// ========================================================================= //
+// GC Metrics
+// ========================================================================= //
+
+// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
+typedef struct {
+    int64_t allocd;
+    int64_t deferred_alloc;
+    int64_t freed;
+    uint64_t malloc;
+    uint64_t realloc;
+    uint64_t poolalloc;
+    uint64_t bigalloc;
+    uint64_t freecall;
+    uint64_t total_time;
+    uint64_t total_allocd;
+    size_t interval;
+    int pause;
+    int full_sweep;
+    uint64_t max_pause;
+    uint64_t max_memory;
+    uint64_t time_to_safepoint;
+    uint64_t max_time_to_safepoint;
+    uint64_t total_time_to_safepoint;
+    uint64_t sweep_time;
+    uint64_t mark_time;
+    uint64_t stack_pool_sweep_time;
+    uint64_t total_sweep_time;
+    uint64_t    total_sweep_page_walk_time;
+    uint64_t    total_sweep_madvise_time;
+    uint64_t    total_sweep_free_mallocd_memory_time;
+    uint64_t total_mark_time;
+    uint64_t total_stack_pool_sweep_time;
+    uint64_t last_full_sweep;
+    uint64_t last_incremental_sweep;
+} jl_gc_num_t;
+
+// ========================================================================= //
+// System-wide Initialization
+// ========================================================================= //
+
+// System-wide initialization function. Responsible for initializing global locks as well as
+// global memory parameters (e.g. target heap size) used by the collector.
+void jl_gc_init(void);
+// Spawns GC threads.
+void jl_start_gc_threads(void);
+
+// ========================================================================= //
+// Per-thread Initialization
+// ========================================================================= //
+
+// Initializes thread-local data structures such as thread-local object pools,
+// thread-local remembered sets and thread-local allocation counters.
+// Should be called exactly once per Julia thread.
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT;
+// Deallocates any memory previously used for thread-local GC data structures.
+// Mostly used to ensure that we perform this memory cleanup for foreign threads that are
+// about to leave Julia.
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls);
+
+// ========================================================================= //
+// Controls
+// ========================================================================= //
+
+typedef enum {
+    JL_GC_AUTO = 0, // use heuristics to determine the collection type
+    JL_GC_FULL = 1, // force a full collection
+    JL_GC_INCREMENTAL = 2, // force an incremental collection
+} jl_gc_collection_t;
+// Enables or disables (depending on the value of the argument) the collector. Returns
+// whether GC was previously enabled.
+JL_DLLEXPORT int jl_gc_enable(int on);
+// Returns whether the collector is enabled.
+JL_DLLEXPORT int jl_gc_is_enabled(void);
+// Sets a soft limit to Julia's heap.
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+// Runs a GC cycle. This function's parameter determines whether we're running an
+// incremental, full, or automatic (i.e. heuristic driven) collection. Returns whether we
+// should run a collection cycle again (e.g. a full mark right after a full sweep to ensure
+// we do a full heap traversal).
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection);
+// Returns whether the thread with `tid` is a collector thread
+JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT;
+// Returns which GC implementation is being used and possibly its version according to the list of supported GCs
+// NB: it should clearly identify the GC by including e.g. ‘stock’ or ‘mmtk’ as a substring.
+JL_DLLEXPORT const char* jl_gc_active_impl(void);
+// Sweep Julia's stack pools and mtarray buffers. Note that this function has been added to the interface as
+// each GC should implement it but it will most likely not be used by other code in the runtime.
+// It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk.
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
+// ========================================================================= //
+// Metrics
+// ========================================================================= //
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void);
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then),
+// compared to the value at the last time this function was called.
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then)
+// compared to the value at the last time this function was called. The offset parameter
+// is subtracted from this value in order to obtain the return value.
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
+// Returns the number of pool allocated bytes. This could always return 0 for GC
+// implementations that do not use pools.
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void);
+// Returns the number of live bytes at the end of the last collection cycle
+// (doesn't include the number of allocated bytes since then).
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void);
+// Stores the number of live bytes at the end of the last collection cycle plus the number
+// of bytes we allocated since then into the 64-bit integer pointer passed as an argument.
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT;
+// Retrieves the value of Julia's soft heap limit.
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
+// High-resolution (nano-seconds) value of total time spent in GC.
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void);
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+// On GCC, this function is inlined when sz is constant (see julia_internal.h)
+// In general, this function should implement allocation and should use the specific GC's logic
+// to decide whether to allocate a small or a large object. Finally, note that this function
+// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record
+// an allocation of that type in the allocation profiler.
+struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty);
+// Allocates small objects and increments Julia allocation counterst. Size of the object
+// header must be included in the object size. The (possibly unused in some implementations)
+// offset to the arena in which we're allocating is passed in the second parameter, and the
+// object size in the third parameter. If thread-local allocators are used, then this
+// function should allocate in the thread-local allocator of the thread referenced by the
+// jl_ptls_t argument. An additional (last) parameter containing information about the type
+// of the object being allocated may be used to record an allocation of that type in the
+// allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_small_alloc(struct _jl_tls_states_t *ptls,
+                                                   int offset, int osize,
+                                                   struct _jl_value_t *type);
+// Description: Allocates large objects and increments Julia allocation counters. Size of
+// the object header must be included in the object size. If thread-local allocators are
+// used, then this function should allocate in the thread-local allocator of the thread
+// referenced by the jl_ptls_t argument. An additional (last) parameter containing
+// information about the type of the object being allocated may be used to record an
+// allocation of that type in the allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_big_alloc(struct _jl_tls_states_t *ptls, size_t sz,
+                                                 struct _jl_value_t *type);
+// Wrapper around Libc malloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
+// Wrapper around Libc calloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz);
+// Wrapper around Libc free that updates Julia allocation counters.
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz);
+// Wrapper around Libc realloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz);
+// Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and
+// Strings. It increments Julia allocation counters and should check whether we're close to
+// the Julia heap target, and therefore, whether we should run a collection. Note that this
+// doesn't record the size of the allocation request in a side metadata (i.e. a few words in
+// front of the memory payload): this function is used for Julia object allocations, and we
+// assume that there is already a field in the Julia object being allocated that we may use
+// to store the size of the memory buffer.
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the thread referenced by the first jl_ptls_t argument.
+JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls,
+                                                        struct _jl_value_t *value);
+// Permanently allocates a memory slot of the size specified by the first parameter. This
+// block of memory is allocated in an immortal region that is never swept. The second
+// parameter specifies whether the memory should be filled with zeros. The third and fourth
+// parameters specify the alignment and an offset in bytes, respectively. Specifically, the
+// pointer obtained by advancing the result of this function by the number of bytes
+// specified in the fourth parameter will be aligned according to the value given by the
+// third parameter in bytes.
+JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align,
+                                    unsigned offset) JL_NOTSAFEPOINT;
+// Permanently allocates an object of the size specified by the first parameter. Size of the
+// object header must be included in the object size. This object is allocated in an
+// immortal region that is never swept. The second parameter specifies the type of the
+// object being allocated and will be used to set the object header.
+//
+// !!! warning: Because permanently allocated objects are not swept, the GC will not
+//              necessarily mark any objects that would have ordinarily been rooted by
+//              the allocated object. All objects stored in fields of this object
+//              must be either permanently allocated or have other roots.
+struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT;
+// This function notifies the GC about memory addresses that are set when loading the boot image.
+// The GC may use that information to, for instance, determine that such objects should
+// be treated as marked and belonged to the old generation in nursery collections.
+void jl_gc_notify_image_load(const char* img_data, size_t len);
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+// Write barrier slow-path. If a generational collector is used,
+// it may enqueue an old object into the remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+// In a generational collector is used, this function walks over the fields of the
+// object specified by the second parameter (as defined by the data type in the third
+// parameter). If a field points to a young object, the first parameter is enqueued into the
+// remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
+// If a generational collector is used, checks whether the function argument points to an
+// old object, and if so, calls the write barrier slow path above. In most cases, this
+// function is used when its caller has verified that there is a young reference in the
+// object that's being passed as an argument to this function.
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT;
+// Write barrier function that must be used after pointer writes to heap-allocated objects –
+// the value of the field being written must also point to a heap-allocated object.
+// If a generational collector is used, it may check whether the two function arguments are
+// in different GC generations (i.e. if the first argument points to an old object and the
+// second argument points to a young object), and if so, call the write barrier slow-path.
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
+// Freshly allocated objects are known to be in the young generation until the next safepoint,
+// so write barriers can be omitted until the next allocation. This function is a no-op that
+// can be used to annotate that a write barrier would be required were it not for this property
+// (as opposed to somebody just having forgotten to think about write barriers).
+STATIC_INLINE void jl_gc_wb_fresh(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Used to annotate that a write barrier would be required, but may be omitted because `ptr`
+// is known to be an old object.
+STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Write-barrier function that must be used after copying multiple fields of an object into
+// another. It should be semantically equivalent to triggering multiple write barriers – one
+// per field of the object being copied, but may be special-cased for performance reasons.
+STATIC_INLINE void jl_gc_multi_wb(const void *parent,
+                                  const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c
new file mode 100644
index 0000000000000..78882c8eb0225
--- /dev/null
+++ b/src/gc-mmtk.c
@@ -0,0 +1,1180 @@
+#include "gc-common.h"
+#include "gc-tls-mmtk.h"
+#include "mmtkMutator.h"
+#include "threading.h"
+
+// File exists in the binding
+#include "mmtk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ========================================================================= //
+// Julia specific
+// ========================================================================= //
+
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
+extern const unsigned pool_sizes[];
+extern jl_mutex_t finalizers_lock;
+
+// FIXME: Should the values below be shared between both GC's?
+// Note that MMTk uses a hard max heap limit, which is set by default
+// as 70% of the free available memory. The min heap is set as the
+// default_collect_interval variable below.
+
+// max_total_memory is a suggestion.  We try very hard to stay
+// under this limit, but we will go above it rather than halting.
+#ifdef _P64
+typedef uint64_t memsize_t;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+#else
+typedef uint32_t memsize_t;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
+// Work really hard to stay within 2GB
+// Alternative is to risk running out of address space
+// on 32 bit architectures.
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
+#endif
+
+// ========================================================================= //
+// Defined by the binding
+// ========================================================================= //
+
+extern void mmtk_julia_copy_stack_check(int copy_stack);
+extern void mmtk_gc_init(uintptr_t min_heap_size, uintptr_t max_heap_size, uintptr_t n_gcthreads, uintptr_t header_size, uintptr_t tag);
+extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
+extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
+extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
+extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator);
+extern void mmtk_store_obj_size_c(void* obj, size_t size);
+extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
+extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS;
+
+// ========================================================================= //
+// GC Initialization and Control
+// ========================================================================= //
+
+void jl_gc_init(void) {
+    // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167)
+    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
+
+    arraylist_new(&to_finalize, 0);
+    arraylist_new(&finalizer_list_marked, 0);
+
+    gc_num.allocd = 0;
+    gc_num.max_pause = 0;
+    gc_num.max_memory = 0;
+
+    long long min_heap_size;
+    long long max_heap_size;
+    char* min_size_def = getenv("MMTK_MIN_HSIZE");
+    char* min_size_gb = getenv("MMTK_MIN_HSIZE_G");
+
+    char* max_size_def = getenv("MMTK_MAX_HSIZE");
+    char* max_size_gb = getenv("MMTK_MAX_HSIZE_G");
+
+    // default min heap currently set as Julia's default_collect_interval
+    if (min_size_def != NULL) {
+        char *p;
+        double min_size = strtod(min_size_def, &p);
+        min_heap_size = (long) 1024 * 1024 * min_size;
+    } else if (min_size_gb != NULL) {
+        char *p;
+        double min_size = strtod(min_size_gb, &p);
+        min_heap_size = (long) 1024 * 1024 * 1024 * min_size;
+    } else {
+        min_heap_size = default_collect_interval;
+    }
+
+    // default max heap currently set as 70% the free memory in the system
+    if (max_size_def != NULL) {
+        char *p;
+        double max_size = strtod(max_size_def, &p);
+        max_heap_size = (long) 1024 * 1024 * max_size;
+    } else if (max_size_gb != NULL) {
+        char *p;
+        double max_size = strtod(max_size_gb, &p);
+        max_heap_size = (long) 1024 * 1024 * 1024 * max_size;
+    } else {
+        max_heap_size = uv_get_free_memory() * 70 / 100;
+    }
+
+    // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads
+    assert(jl_n_gcthreads == 0);
+
+    // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined
+    int copy_stacks;
+
+#ifdef COPY_STACKS
+    copy_stacks = 1;
+#else
+    copy_stacks = 0;
+#endif
+
+    mmtk_julia_copy_stack_check(copy_stacks);
+
+    // if only max size is specified initialize MMTk with a fixed size heap
+    // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads.
+    // If the two values are the same, we can use either. Otherwise, we need to be careful.
+    uintptr_t gcthreads = jl_options.nmarkthreads;
+    if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) {
+        mmtk_gc_init(0, max_heap_size, gcthreads, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    } else {
+        mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    }
+}
+
+void jl_start_gc_threads(void) {
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_initialize_collection((void *)ptls);
+}
+
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT {
+    jl_thread_heap_common_t *heap = &ptls->gc_tls_common.heap;
+    small_arraylist_new(&heap->weak_refs, 0);
+    small_arraylist_new(&heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&heap->free_stacks[i], 0);
+    small_arraylist_new(&heap->mallocarrays, 0);
+    arraylist_new(&ptls->finalizers, 0);
+    // Initialize `lazily_freed_mtarraylist_buffers`
+    small_arraylist_new(&ptls->lazily_freed_mtarraylist_buffers, 0);
+    // Clear the malloc sz count
+    jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+    // Create mutator
+    MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid);
+    // Copy the mutator to the thread local storage
+    memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext));
+    // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed)
+    mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator);
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+}
+
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) {
+    mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator);
+}
+
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) {
+    // MMTk currently does not allow setting the heap size at runtime
+}
+
+STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
+{
+    // Just do a safe point for general maybe_collect
+    jl_gc_safepoint_(ptls);
+}
+
+// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll),
+// is expensive. So we only check for every few allocations.
+static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz)
+{
+    // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to
+    // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage
+    // as much as we can.
+    if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) {
+        jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+        mmtk_gc_poll(ptls);
+    } else {
+        size_t curr = jl_atomic_load_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll);
+        jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, curr + sz);
+        jl_gc_safepoint_(ptls);
+    }
+}
+
+// This is called when the user calls for a GC with Gc.gc()
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) {
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        return;
+    }
+    mmtk_handle_user_collection_request(ptls, collection);
+}
+
+
+// Based on jl_gc_collect from gc-stock.c
+// called when stopping the thread in `mmtk_block_for_gc`
+JL_DLLEXPORT void jl_gc_prepare_to_collect(void)
+{
+    // FIXME: set to JL_GC_AUTO since we're calling it from mmtk
+    // maybe just remove this?
+    JL_PROBE_GC_BEGIN(JL_GC_AUTO);
+
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        return;
+    }
+
+    int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
+    jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
+    // `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
+    uint64_t t0 = jl_hrtime();
+    if (!jl_safepoint_start_gc(ct)) {
+        jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+        jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+        return;
+    }
+
+    JL_TIMING_SUSPEND_TASK(GC, ct);
+    JL_TIMING(GC, GC);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    // Now we are ready to wait for other threads to hit the safepoint,
+    // we can do a few things that doesn't require synchronization.
+    //
+    // We must sync here with the tls_lock operations, so that we have a
+    // seq-cst order between these events now we know that either the new
+    // thread must run into our safepoint flag or we must observe the
+    // existence of the thread in the jl_n_threads count.
+    //
+    // TODO: concurrently queue objects
+    jl_fence();
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads);
+    JL_PROBE_GC_STOP_THE_WORLD();
+
+    uint64_t t1 = jl_hrtime();
+    uint64_t duration = t1 - t0;
+    if (duration > gc_num.max_time_to_safepoint)
+        gc_num.max_time_to_safepoint = duration;
+    gc_num.time_to_safepoint = duration;
+    gc_num.total_time_to_safepoint += duration;
+
+    if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock
+#ifndef __clang_gcanalyzer__
+        mmtk_block_thread_for_gc();
+#endif
+        JL_UNLOCK_NOGC(&finalizers_lock);
+    }
+
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
+    jl_safepoint_end_gc();
+    jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+    JL_PROBE_GC_END();
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+
+    // Only disable finalizers on current thread
+    // Doing this on all threads is racy (it's impossible to check
+    // or wait for finalizers on other threads without dead lock).
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+        JL_TIMING(GC, GC_Finalizers);
+        run_finalizers(ct, 0);
+    }
+    JL_PROBE_GC_FINALIZER();
+
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+}
+
+// ========================================================================= //
+// GC Statistics
+// ========================================================================= //
+
+JL_DLLEXPORT const char* jl_gc_active_impl(void) {
+    const char* mmtk_version = get_mmtk_version();
+    return mmtk_version;
+}
+
+int64_t last_gc_total_bytes = 0;
+int64_t last_live_bytes = 0; // live_bytes at last collection
+int64_t live_bytes = 0;
+
+// FIXME: The functions combine_thread_gc_counts and reset_thread_gc_counts
+// are currently nearly identical for mmtk and for stock. However, the stats
+// are likely different (e.g., MMTk doesn't track the bytes allocated in the fastpath,
+// but only when the slowpath is called). We might need to adapt these later so that
+// the statistics are the same or as close as possible for each GC.
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls) {
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            if (update_heap) {
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+            }
+        }
+    }
+}
+
+void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls != NULL) {
+            // don't reset `pool_live_bytes` here
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+        }
+    }
+}
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) {
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    return num;
+}
+
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT {
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
+{
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb - offset;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) {
+    return 0;
+}
+
+void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+}
+
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
+{
+}
+
+int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
+void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
+{
+    combine_thread_gc_counts(&gc_num, 0);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
+    gc_num.allocd = 0;
+    gc_num.deferred_alloc = 0;
+    reset_thread_gc_counts();
+}
+
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void) {
+    return last_live_bytes;
+}
+
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
+{
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    // Sync this logic with `base/util.jl:GC_Diff`
+    *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
+}
+
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    // FIXME: should probably return MMTk's heap size
+    return max_total_memory;
+}
+
+// These are needed to collect MMTk statistics from a Julia program using ccall
+JL_DLLEXPORT void (jl_mmtk_harness_begin)(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_harness_begin(ptls);
+}
+
+JL_DLLEXPORT void (jl_mmtk_harness_end)(void)
+{
+    mmtk_harness_end();
+}
+
+// ========================================================================= //
+// Root Processing, Object Scanning and Julia-specific sweeping
+// ========================================================================= //
+
+static void add_node_to_roots_buffer(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len, void* root) {
+    if (root == NULL)
+        return;
+
+    buf->ptr[*buf_len] = root;
+    *buf_len += 1;
+    if (*buf_len >= buf->cap) {
+        RootsWorkBuffer new_buf = (closure->report_nodes_func)(buf->ptr, *buf_len, buf->cap, closure->data, true);
+        *buf = new_buf;
+        *buf_len = 0;
+    }
+}
+
+static void add_node_to_tpinned_roots_buffer(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len, void* root) {
+    if (root == NULL)
+        return;
+
+    buf->ptr[*buf_len] = root;
+    *buf_len += 1;
+    if (*buf_len >= buf->cap) {
+        RootsWorkBuffer new_buf = (closure->report_tpinned_nodes_func)(buf->ptr, *buf_len, buf->cap, closure->data, true);
+        *buf = new_buf;
+        *buf_len = 0;
+    }
+}
+
+JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure)
+{
+    // Create a new buf
+    RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true);
+    size_t len = 0;
+
+    // add module
+    add_node_to_roots_buffer(closure, &buf, &len, jl_main_module);
+
+    // buildin values
+    add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any);
+    add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order);
+    for (size_t i = 0; i < jl_current_modules.size; i += 2) {
+        if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
+            add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]);
+        }
+    }
+    add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type);
+    for (size_t i = 0; i < N_CALL_CACHE; i++) {
+         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
+        add_node_to_roots_buffer(closure, &buf, &len, v);
+    }
+    add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation);
+
+    // constants
+    add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type);
+    add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names);
+
+    // jl_global_roots_table must be transitively pinned
+    RootsWorkBuffer tpinned_buf = (closure->report_tpinned_nodes_func)((void**)0, 0, 0, closure->data, true);
+    size_t tpinned_len = 0;
+    add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_list);
+    add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_keyset);
+
+    // Push the result of the work.
+    (closure->report_nodes_func)(buf.ptr, len, buf.cap, closure->data, false);
+    (closure->report_tpinned_nodes_func)(tpinned_buf.ptr, tpinned_len, tpinned_buf.cap, closure->data, false);
+}
+
+JL_DLLEXPORT void jl_gc_scan_julia_exc_obj(void* obj_raw, void* closure, ProcessSlotFn process_slot) {
+    jl_task_t *ta = (jl_task_t*)obj_raw;
+
+    if (ta->excstack) { // inlining label `excstack` from mark_loop
+
+        // the excstack should always be a heap object
+        assert(mmtk_object_is_managed_by_mmtk(ta->excstack));
+
+        process_slot(closure, &ta->excstack);
+        jl_excstack_t *excstack = ta->excstack;
+        size_t itr = ta->excstack->top;
+        size_t bt_index = 0;
+        size_t jlval_index = 0;
+        while (itr > 0) {
+            size_t bt_size = jl_excstack_bt_size(excstack, itr);
+            jl_bt_element_t *bt_data = jl_excstack_bt_data(excstack, itr);
+            for (; bt_index < bt_size; bt_index += jl_bt_entry_size(bt_data + bt_index)) {
+                jl_bt_element_t *bt_entry = bt_data + bt_index;
+                if (jl_bt_is_native(bt_entry))
+                    continue;
+                // Found an extended backtrace entry: iterate over any
+                // GC-managed values inside.
+                size_t njlvals = jl_bt_num_jlvals(bt_entry);
+                while (jlval_index < njlvals) {
+                    jl_value_t** new_obj_slot = &bt_entry[2 + jlval_index].jlvalue;
+                    jlval_index += 1;
+                    process_slot(closure, new_obj_slot);
+                }
+                jlval_index = 0;
+            }
+
+            jl_bt_element_t *stack_raw = (jl_bt_element_t *)(excstack+1);
+            jl_value_t** stack_obj_slot = &stack_raw[itr-1].jlvalue;
+
+            itr = jl_excstack_next(excstack, itr);
+            bt_index = 0;
+            jlval_index = 0;
+            process_slot(closure, stack_obj_slot);
+        }
+    }
+}
+
+// This is used in mmtk_sweep_malloced_memory and it is slightly different
+// from jl_gc_free_memory from gc-stock.c as the stock GC updates the
+// information in the global variable gc_heap_stats (which is specific to the stock GC)
+static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
+{
+    assert(jl_is_genericmemory(m));
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    size_t freed_bytes = memory_block_usable_size(d, isaligned);
+    assert(freed_bytes != 0);
+    if (isaligned)
+        jl_free_aligned(d);
+    else
+        free(d);
+    gc_num.freed += freed_bytes;
+    gc_num.freecall++;
+}
+
+JL_DLLEXPORT void jl_gc_mmtk_sweep_malloced_memory(void) JL_NOTSAFEPOINT
+{
+    void* iter = mmtk_new_mutator_iterator();
+    jl_ptls_t ptls2 = (jl_ptls_t)mmtk_get_next_mutator_tls(iter);
+    while(ptls2 != NULL) {
+        size_t n = 0;
+        size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
+        void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
+        // filter without preserving order
+        while (n < l) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
+            if (mmtk_is_live_object(m)) {
+                n++;
+            }
+            else {
+                int isaligned = (uintptr_t)lst[n] & 1;
+                jl_gc_free_memory(m, isaligned);
+                l--;
+                lst[n] = lst[l];
+            }
+        }
+        ptls2->gc_tls_common.heap.mallocarrays.len = l;
+        ptls2 = (jl_ptls_t)mmtk_get_next_mutator_tls(iter);
+    }
+    mmtk_close_mutator_iterator(iter);
+}
+
+#define jl_genericmemory_elsize(a) (((jl_datatype_t*)jl_typetagof(a))->layout->size)
+
+// if data is inlined inside the genericmemory object --- to->ptr needs to be updated when copying the array
+JL_DLLEXPORT void jl_gc_update_inlined_array(void* from, void* to) {
+    jl_value_t* jl_from = (jl_value_t*) from;
+    jl_value_t* jl_to = (jl_value_t*) to;
+
+    uintptr_t tag_to = (uintptr_t)jl_typeof(jl_to);
+    jl_datatype_t *vt = (jl_datatype_t*)tag_to;
+
+    if(vt->name == jl_genericmemory_typename) {
+        jl_genericmemory_t *a = (jl_genericmemory_t*)jl_from;
+        jl_genericmemory_t *b = (jl_genericmemory_t*)jl_to;
+        int how = jl_genericmemory_how(a);
+
+        if (how == 0 && mmtk_object_is_managed_by_mmtk(a->ptr)) { // a is inlined (a->ptr points into the mmtk object)
+            size_t offset_of_data = ((size_t)a->ptr - (size_t)a);
+            if (offset_of_data > 0) {
+                b->ptr = (void*)((size_t) b + offset_of_data);
+            }
+        }
+    }
+}
+
+// modified sweep_stack_pools from gc-stacks.c
+JL_DLLEXPORT void jl_gc_mmtk_sweep_stack_pools(void)
+{
+    // Stack sweeping algorithm:
+    //    // deallocate stacks if we have too many sitting around unused
+    //    for (stk in halfof(free_stacks))
+    //        free_stack(stk, pool_sz);
+    //    // then sweep the task stacks
+    //    for (t in live_tasks)
+    //        if (!gc-marked(t))
+    //            stkbuf = t->stkbuf
+    //            bufsz = t->bufsz
+    //            if (stkbuf)
+    //                push(free_stacks[sz], stkbuf)
+    assert(gc_n_threads);
+    for (int i = 0; i < jl_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+
+        // free half of stacks that remain unused since last sweep
+        for (int p = 0; p < JL_N_STACK_POOLS; p++) {
+            small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
+            size_t n_to_free;
+            if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
+            }
+            else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
+                n_to_free = al->len / 2;
+                if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
+                    n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
+            }
+            else {
+                n_to_free = 0;
+            }
+            for (int n = 0; n < n_to_free; n++) {
+                void *stk = small_arraylist_pop(al);
+                free_stack(stk, pool_sizes[p]);
+            }
+            if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                small_arraylist_free(al);
+            }
+        }
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
+        }
+
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = 0;
+        size_t ndel = 0;
+        size_t l = live_tasks->len;
+        void **lst = live_tasks->items;
+        if (l == 0)
+            continue;
+        while (1) {
+            jl_task_t *t = (jl_task_t*)lst[n];
+            if (mmtk_is_live_object(t)) {
+                jl_task_t *maybe_forwarded = (jl_task_t*)mmtk_get_possibly_forwarded(t);
+                live_tasks->items[n] = maybe_forwarded;
+                t = maybe_forwarded;
+                assert(jl_is_task(t));
+                if (t->ctx.stkbuf == NULL)
+                    ndel++; // jl_release_task_stack called
+                else
+                    n++;
+            } else {
+                ndel++;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
+                if (stkbuf) {
+                    t->ctx.stkbuf = NULL;
+                    _jl_free_stack(ptls2, stkbuf, bufsz);
+                }
+#ifdef _COMPILER_TSAN_ENABLED_
+                if (t->ctx.tsan_state) {
+                    __tsan_destroy_fiber(t->ctx.tsan_state);
+                    t->ctx.tsan_state = NULL;
+                }
+#endif
+            }
+            if (n >= l - ndel)
+                break;
+            void *tmp = lst[n];
+            lst[n] = lst[n + ndel];
+            lst[n + ndel] = tmp;
+        }
+        live_tasks->len -= ndel;
+    }
+}
+
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    jl_gc_mmtk_sweep_stack_pools();
+    sweep_mtarraylist_buffers();
+}
+
+JL_DLLEXPORT void* jl_gc_get_stackbase(int16_t tid) {
+    assert(tid >= 0);
+    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    return ptls2->stackbase;
+}
+
+JL_DLLEXPORT void jl_gc_update_stats(uint64_t inc, size_t mmtk_live_bytes, bool is_nursery_gc) {
+    gc_num.total_time += inc;
+    gc_num.pause += 1;
+    gc_num.full_sweep += !(is_nursery_gc);
+    gc_num.total_allocd += gc_num.allocd;
+    gc_num.allocd = 0;
+    live_bytes = mmtk_live_bytes;
+}
+
+#define jl_genericmemory_data_owner_field_addr(a) ((jl_value_t**)((jl_genericmemory_t*)(a) + 1))
+
+JL_DLLEXPORT void* jl_gc_get_owner_address_to_mmtk(void* m) {
+    return (void*)jl_genericmemory_data_owner_field_addr(m);
+}
+
+// same as jl_genericmemory_how but with JL_DLLEXPORT
+// we should probably inline this in Rust
+JL_DLLEXPORT size_t jl_gc_genericmemory_how(void *arg) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t* m = (jl_genericmemory_t*)arg;
+    if (m->ptr == (void*)((char*)m + 16)) // JL_SMALL_BYTE_ALIGNMENT (from julia_internal.h)
+        return 0;
+    jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+    if (owner == (jl_value_t*)m)
+        return 1;
+    if (owner == NULL)
+        return 2;
+    return 3;
+}
+
+// ========================================================================= //
+// Weak References and Finalizers
+// ========================================================================= //
+
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
+{
+    jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type);
+    wr->value = value;  // NOTE: wb not needed here
+    mmtk_add_weak_candidate(wr);
+    return wr;
+}
+
+JL_DLLEXPORT void* jl_gc_get_thread_finalizer_list(void* ptls_raw) {
+    jl_ptls_t ptls = (jl_ptls_t) ptls_raw;
+    return (void*)&ptls->finalizers;
+}
+
+JL_DLLEXPORT void* jl_gc_get_to_finalize_list(void) {
+    return (void*)&to_finalize;
+}
+
+JL_DLLEXPORT void* jl_gc_get_marked_finalizers_list(void) {
+    return (void*)&finalizer_list_marked;
+}
+
+JL_DLLEXPORT int* jl_gc_get_have_pending_finalizers(void) {
+    return (int*)&jl_gc_have_pending_finalizers;
+}
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
+#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)
+
+int jl_gc_classify_pools(size_t sz, int *osize)
+{
+    if (sz > GC_MAX_SZCLASS)
+        return -1; // call big alloc function
+    size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    *osize = LLT_ALIGN(allocsz, 16);
+    return 0; // use MMTk's fastpath logic
+}
+
+#define MMTK_MIN_ALIGNMENT 4
+// MMTk assumes allocation size is aligned to min alignment.
+STATIC_INLINE size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT
+{
+    return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1);
+}
+
+STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) {
+    intptr_t delta = (-offset - *cursor) & (align - 1);
+    uintptr_t result = *cursor + (uintptr_t)delta;
+
+    if (__unlikely(result + size > limit)) {
+        return (void*) mmtk_alloc(mutator, size, align, offset, allocator);
+    } else{
+        *cursor = result + size;
+        return (void*)result;
+    }
+}
+
+STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
+}
+
+inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    mmtk_post_alloc(mutator, obj, size, 0);
+}
+
+STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    // FIXME: for now, we do nothing
+    // but when supporting moving, this is where we set the valid object (VO) bit
+}
+
+STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
+}
+
+STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    // FIXME: Similarly, for now, we do nothing
+    // but when supporting moving, this is where we set the valid object (VO) bit
+    // and log (old gen) bit
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    jl_value_t *v;
+    if ((uintptr_t)ty != jl_buff_tag) {
+        // v needs to be 16 byte aligned, therefore v_tagged needs to be offset accordingly to consider the size of header
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize, align), align, sizeof(jl_taggedvalue_t));
+        v = jl_valueof(v_tagged);
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize, align));
+    } else {
+        // allocating an extra word to store the size of buffer objects
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align), align, 0);
+        jl_value_t* v_tagged_aligned = ((jl_value_t*)((char*)(v_tagged) + sizeof(jl_taggedvalue_t)));
+        v = jl_valueof(v_tagged_aligned);
+        mmtk_store_obj_size_c(v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+    }
+
+    ptls->gc_tls_common.gc_num.allocd += osize;
+    ptls->gc_tls_common.gc_num.poolalloc++;
+
+    return v;
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    size_t offs = offsetof(bigval_t, header);
+    assert(sz >= sizeof(jl_taggedvalue_t) && "sz must include tag");
+    static_assert(offsetof(bigval_t, header) >= sizeof(void*), "Empty bigval header?");
+    static_assert(sizeof(bigval_t) % JL_HEAP_ALIGNMENT == 0, "");
+    size_t allocsz = LLT_ALIGN(sz + offs, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz) { // overflow in adding offs, size was "negative"
+        assert(0 && "Error when allocating big object");
+        jl_throw(jl_memory_exception);
+    }
+
+    bigval_t *v = (bigval_t*)mmtk_alloc_large(&ptls->gc_tls.mmtk_mutator, allocsz, JL_CACHE_BYTE_ALIGNMENT, 0, 2);
+
+    if (v == NULL) {
+        assert(0 && "Allocation failed");
+        jl_throw(jl_memory_exception);
+    }
+    v->sz = allocsz;
+
+    ptls->gc_tls_common.gc_num.allocd += allocsz;
+    ptls->gc_tls_common.gc_num.bigalloc++;
+
+    jl_value_t *result = jl_valueof(&v->header);
+    mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2);
+
+    return result;
+}
+
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
+    return val;
+}
+
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+{
+    // TODO: assertion needed here?
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_big(ptls, sz);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
+    return val;
+}
+
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_mmtk_gc_alloc_big(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
+}
+
+// allocation wrappers that track allocation and let collection run
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = malloc(sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = calloc(nm, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, nm * sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    free(p);
+    if (pgcstack != NULL && ct->world_age) {
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz);
+    }
+}
+
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        if (sz < old)
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz);
+        else
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old);
+    }
+    return realloc(p, sz);
+}
+
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    size_t allocsz = mmtk_align_alloc_sz(sz);
+    void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset);
+    return addr;
+}
+
+void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
+{
+    return jl_gc_perm_alloc_nolock(sz, zero, align, offset);
+}
+
+jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
+{
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(o), allocsz);
+    o->header = (uintptr_t)ty;
+    return jl_valueof(o);
+}
+
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    maybe_collect(ptls);
+    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz)  // overflow in adding offs, size was "negative"
+        jl_throw(jl_memory_exception);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    void *b = malloc_cache_align(allocsz);
+    if (b == NULL)
+        jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    // FIXME: Should these be part of mmtk's heap?
+    // malloc_maybe_collect(ptls, sz);
+    // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz);
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    // jl_gc_managed_malloc is currently always used for allocating array buffers.
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
+    return b;
+}
+
+void jl_gc_notify_image_load(const char* img_data, size_t len)
+{
+    mmtk_set_vm_space((void*)img_data, len);
+}
+
+// ========================================================================= //
+// Code specific to stock that is not supported by MMTk
+// ========================================================================= //
+
+// mutex for page profile
+uv_mutex_t page_profile_lock;
+
+JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
+{
+    uv_mutex_lock(&page_profile_lock);
+    const char *str = "Page profiler in unsupported in MMTk.";
+    ios_write(stream, str, strlen(str));
+    uv_mutex_unlock(&page_profile_lock);
+}
+
+// this seems to be needed by the gc tests
+#define JL_GC_N_MAX_POOLS 51
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
+{
+    // FIXME: MMTk would have to provide its own stats
+}
+
+#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return MMTK_GC_PAGE_SZ;
+}
+
+// Not used by mmtk
+// Number of GC threads that may run parallel marking
+int jl_n_markthreads;
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
+// `tid` of first GC thread
+int gc_first_tid;
+// Number of threads sweeping stacks
+_Atomic(int) gc_n_threads_sweeping_stacks;
+// counter for sharing work when sweeping stacks
+_Atomic(int) gc_ptls_sweep_idx;
+// counter for round robin of giving back stack pages to the OS
+_Atomic(int) gc_stack_free_idx = 0;
+
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
+{
+    mmtk_unreachable();
+    return 0;
+}
+
+JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
+                                            jl_value_t **objs, size_t nobjs)
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void)
+{
+    // TODO: meaningful for MMTk?
+    return GC_MAX_SZCLASS;
+}
+
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+{
+    // FIXME: do we need to implement this?
+}
+
+// gc-debug functions
+JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p)
+{
+    return NULL;
+}
+
+void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT
+{
+}
+
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    return 0;
+}
+
+void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT
+{
+    // May not be accurate but should be helpful enough
+    uint64_t pool_count = gc_num.poolalloc;
+    uint64_t big_count = gc_num.bigalloc;
+    jl_safe_printf("Allocations: %" PRIu64 " "
+                   "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
+                   pool_count + big_count, pool_count, big_count, gc_num.pause);
+}
+
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
+{
+    return sizeof(bigval_t);
+}
+
+void jl_print_gc_stats(JL_STREAM *s)
+{
+}
+
+JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void)
+{
+    return 0;
+}
+
+// TODO: if this is needed, it can be added in MMTk
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
+{
+    return NULL;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c
new file mode 100644
index 0000000000000..2625fa812781a
--- /dev/null
+++ b/src/gc-page-profiler.c
@@ -0,0 +1,180 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-page-profiler.h"
+#include "julia.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// whether page profiling is enabled
+int page_profile_enabled;
+// number of pages written
+size_t page_profile_pages_written;
+// stream to write page profile to
+ios_t *page_profile_stream;
+// mutex for page profile
+uv_mutex_t page_profile_lock;
+
+gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT
+{
+    gc_page_profiler_serializer_t serializer;
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_new(&serializer.typestrs, GC_PAGE_SZ);
+        serializer.buffers = (char *)malloc_s(GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY);
+        serializer.cursor = 0;
+    }
+    else {
+        serializer.typestrs.len = 0;
+    }
+    return serializer;
+}
+
+void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
+                             jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        serializer->typestrs.len = 0;
+        serializer->data = (char *)pg->data;
+        serializer->osize = pg->osize;
+        serializer->cursor = 0;
+        serializer->capacity = GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY;
+    }
+}
+
+void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_free(&serializer->typestrs);
+        free(serializer->buffers);
+    }
+}
+
+void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer,
+                              const char *str) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        arraylist_push(&serializer->typestrs, (void *)str);
+    }
+}
+
+void gc_enable_page_profile(void) JL_NOTSAFEPOINT
+{
+    page_profile_enabled = 1;
+}
+
+void gc_disable_page_profile(void) JL_NOTSAFEPOINT
+{
+    page_profile_enabled = 0;
+}
+
+int gc_page_profile_is_enabled(void) JL_NOTSAFEPOINT
+{
+    return page_profile_enabled;
+}
+
+void gc_page_profile_write_preamble(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        const size_t large_enough_str_size = 4096;
+        char str[large_enough_str_size];
+        snprintf(str, large_enough_str_size,
+                 "{\"address\": \"%p\",\"object_size\": %d,\"objects\": [",
+                 serializer->data, serializer->osize);
+        ios_write(page_profile_stream, str, strlen(str));
+    }
+}
+
+void gc_page_profile_write_epilogue(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        const char *str = "]}";
+        ios_write(page_profile_stream, str, strlen(str));
+    }
+}
+
+void gc_page_profile_write_comma(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        // write comma if not first page
+        if (page_profile_pages_written > 0) {
+            const char *str = ",";
+            ios_write(page_profile_stream, str, strlen(str));
+        }
+    }
+}
+
+void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer)
+    JL_NOTSAFEPOINT
+{
+    size_t large_enough_str_size = 4096;
+    if (__unlikely(page_profile_enabled)) {
+        // write to file
+        uv_mutex_lock(&page_profile_lock);
+        gc_page_profile_write_comma(serializer);
+        gc_page_profile_write_preamble(serializer);
+        char *str = (char *)malloc_s(large_enough_str_size);
+        for (size_t i = 0; i < serializer->typestrs.len; i++) {
+            const char *name = (const char *)serializer->typestrs.items[i];
+            if (name == GC_SERIALIZER_EMPTY) {
+                snprintf(str, large_enough_str_size, "\"empty\",");
+            }
+            else if (name == GC_SERIALIZER_GARBAGE) {
+                snprintf(str, large_enough_str_size, "\"garbage\",");
+            }
+            else {
+                while ((strlen(name) + 1) > large_enough_str_size) {
+                    large_enough_str_size *= 2;
+                    str = (char *)realloc_s(str, large_enough_str_size);
+                }
+                snprintf(str, large_enough_str_size, "\"%s\",", name);
+            }
+            // remove trailing comma for last element
+            if (i == serializer->typestrs.len - 1) {
+                str[strlen(str) - 1] = '\0';
+            }
+            ios_write(page_profile_stream, str, strlen(str));
+        }
+        free(str);
+        gc_page_profile_write_epilogue(serializer);
+        page_profile_pages_written++;
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+void gc_page_profile_write_json_preamble(ios_t *stream) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        uv_mutex_lock(&page_profile_lock);
+        const char *str = "{\"pages\": [";
+        ios_write(stream, str, strlen(str));
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+void gc_page_profile_write_json_epilogue(ios_t *stream) JL_NOTSAFEPOINT
+{
+    if (__unlikely(page_profile_enabled)) {
+        uv_mutex_lock(&page_profile_lock);
+        const char *str = "]}";
+        ios_write(stream, str, strlen(str));
+        uv_mutex_unlock(&page_profile_lock);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
+{
+    gc_enable_page_profile();
+    page_profile_pages_written = 0;
+    page_profile_stream = stream;
+    gc_page_profile_write_json_preamble(stream);
+    jl_gc_collect(JL_GC_FULL);
+    gc_page_profile_write_json_epilogue(stream);
+    gc_disable_page_profile();
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gc-page-profiler.h b/src/gc-page-profiler.h
new file mode 100644
index 0000000000000..0dd72ad072fa9
--- /dev/null
+++ b/src/gc-page-profiler.h
@@ -0,0 +1,146 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef GC_PAGE_PROFILER_H
+#define GC_PAGE_PROFILER_H
+
+#include "gc-stock.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY (4096)
+
+typedef struct {
+    arraylist_t typestrs;
+    char *data;
+    int osize;
+    char *buffers;
+    size_t cursor;
+    size_t capacity;
+} gc_page_profiler_serializer_t;
+
+// mutex for page profile
+extern uv_mutex_t page_profile_lock;
+// whether page profiling is enabled
+extern int page_profile_enabled;
+
+// Serializer functions
+gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT;
+void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer,
+                             jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT;
+void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT;
+void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer,
+                              const char *str) JL_NOTSAFEPOINT;
+// Page profile functions
+#define GC_SERIALIZER_EMPTY ((const char *)0x1)
+#define GC_SERIALIZER_GARBAGE ((const char *)0x2)
+STATIC_INLINE void gc_page_profile_write_empty_page(gc_page_profiler_serializer_t *serializer,
+                                 int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        gc_page_serializer_write(serializer, GC_SERIALIZER_EMPTY);
+    }
+}
+STATIC_INLINE void gc_page_profile_write_garbage(gc_page_profiler_serializer_t *serializer,
+                                                 int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        gc_page_serializer_write(serializer, GC_SERIALIZER_GARBAGE);
+    }
+}
+STATIC_INLINE char *gc_page_profile_request_buffer(gc_page_profiler_serializer_t *serializer, size_t size) JL_NOTSAFEPOINT
+{
+    while (serializer->cursor + size >= serializer->capacity) {
+        serializer->capacity *= 2;
+        serializer->buffers = (char *)realloc_s(serializer->buffers, serializer->capacity);
+    }
+    char *p = &serializer->buffers[serializer->cursor];
+    memset(p, 0, size);
+    serializer->cursor += size;
+    return p;
+}
+STATIC_INLINE void gc_page_profile_write_live_obj(gc_page_profiler_serializer_t *serializer,
+                                                  jl_taggedvalue_t *v,
+                                                  int enabled) JL_NOTSAFEPOINT
+{
+    if (__unlikely(enabled)) {
+        jl_value_t *a = jl_valueof(v);
+        jl_value_t *t = jl_typeof(a);
+        ios_t str_;
+        int ios_need_close = 0;
+        char *type_name = NULL;
+        char *type_name_in_serializer = NULL;
+        if (t == (jl_value_t *)jl_get_buff_tag()) {
+            type_name = "Buffer";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_string(a)) {
+            type_name = "String";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_symbol(a)) {
+            type_name = jl_symbol_name((jl_sym_t *)a);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_simplevector(a)) {
+            type_name = "SimpleVector";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_module(a)) {
+            type_name = jl_symbol_name_(((jl_module_t *)a)->name);
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_task(a)) {
+            type_name = "Task";
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, strlen(type_name) + 1);
+            strcpy(type_name_in_serializer, type_name);
+        }
+        else if (jl_is_datatype(a)) {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, a);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        else {
+            ios_need_close = 1;
+            ios_mem(&str_, 0);
+            JL_STREAM *str = (JL_STREAM *)&str_;
+            jl_static_show(str, t);
+            type_name = str_.buf;
+            type_name_in_serializer =
+                gc_page_profile_request_buffer(serializer, str_.size + 1);
+            memcpy(type_name_in_serializer, type_name, str_.size);
+        }
+        gc_page_serializer_write(serializer, type_name_in_serializer);
+        if (ios_need_close) {
+            ios_close(&str_);
+        }
+        jl_may_leak(type_name_in_serializer);
+    }
+}
+void gc_enable_page_profile(void) JL_NOTSAFEPOINT;
+void gc_disable_page_profile(void) JL_NOTSAFEPOINT;
+int gc_page_profile_is_enabled(void) JL_NOTSAFEPOINT;
+void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GC_PAGE_PROFILER_H
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 682e76611f5d9..71d59de29166f 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -9,6 +10,13 @@
 extern "C" {
 #endif
 
+uv_mutex_t gc_pages_lock;
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return GC_PAGE_SZ;
+}
+
 // Try to allocate memory in chunks to permit faster allocation
 // and improve memory locality of the pools
 #ifdef _P64
@@ -52,6 +60,8 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
         // round data pointer up to the nearest gc_page_data-aligned
         // boundary if mmap didn't already do so.
         mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz);
     return mem;
 }
 
@@ -60,7 +70,7 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
 // more chunks (or other allocations). The final page count is recorded
 // and will be used as the starting count next time. If the page count is
 // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown.
-// Assumes `gc_perm_lock` is acquired, the lock is released before the
+// Assumes `gc_pages_lock` is acquired, the lock is released before the
 // exception is thrown.
 char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
 {
@@ -80,7 +90,7 @@ char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&gc_pages_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -98,7 +108,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     jl_gc_pagemeta_t *meta = NULL;
 
     // try to get page from `pool_lazily_freed`
-    meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed);
+    meta = pop_lf_back(&global_page_pool_lazily_freed);
     if (meta != NULL) {
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         // page is already mapped
@@ -106,25 +116,26 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     }
 
     // try to get page from `pool_clean`
-    meta = pop_lf_page_metadata_back(&global_page_pool_clean);
+    meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
 
     // try to get page from `pool_freed`
-    meta = pop_lf_page_metadata_back(&global_page_pool_freed);
+    meta = pop_lf_back(&global_page_pool_freed);
     if (meta != NULL) {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
 
-    uv_mutex_lock(&gc_perm_lock);
+    uv_mutex_lock(&gc_pages_lock);
     // another thread may have allocated a large block while we were waiting...
-    meta = pop_lf_page_metadata_back(&global_page_pool_clean);
+    meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
-        uv_mutex_unlock(&gc_perm_lock);
-        gc_alloc_map_set(meta->data, 1);
+        uv_mutex_unlock(&gc_pages_lock);
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
     // must map a new set of pages
@@ -135,13 +146,13 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
         pg->data = data + GC_PAGE_SZ * i;
         gc_alloc_map_maybe_create(pg->data);
         if (i == 0) {
-            gc_alloc_map_set(pg->data, 1);
+            gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
         }
         else {
-            push_lf_page_metadata_back(&global_page_pool_clean, pg);
+            push_lf_back(&global_page_pool_clean, pg);
         }
     }
-    uv_mutex_unlock(&gc_perm_lock);
+    uv_mutex_unlock(&gc_pages_lock);
 exit:
 #ifdef _OS_WINDOWS_
     VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
@@ -188,6 +199,7 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
     madvise(p, decommit_size, MADV_DONTNEED);
 #endif
     msan_unpoison(p, decommit_size);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index b35c1722c82ff..9387c7fb065ec 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "threading.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -19,9 +20,6 @@
 # endif
 #endif
 
-// number of stacks to always keep available per pool
-#define MIN_STACK_MAPPINGS_PER_POOL 5
-
 const size_t jl_guard_size = (4096 * 8);
 static _Atomic(uint32_t) num_stack_mappings = 0;
 
@@ -32,47 +30,65 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
     void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
     if (stk == NULL)
         return MAP_FAILED;
+
+    // set up a guard page to detect stack overflow
     DWORD dwOldProtect;
     if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
         VirtualFree(stk, 0, MEM_RELEASE);
         return MAP_FAILED;
     }
-    jl_atomic_fetch_add(&num_stack_mappings, 1);
+
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
 
-static void free_stack(void *stkbuf, size_t bufsz)
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     VirtualFree(stkbuf, 0, MEM_RELEASE);
-    jl_atomic_fetch_add(&num_stack_mappings, -1);
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 
 #else
 
 static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
 {
+# ifdef _OS_OPENBSD_
+    // we don't set up a guard page to detect stack overflow: on OpenBSD, any
+    // mmap-ed region has guard page managed by the kernel, so there is no
+    // need for it. Additionally, a memory region used as stack (memory
+    // allocated with MAP_STACK option) has strict permission, and you can't
+    // "create" a guard page on such memory by using `mprotect` on it
+    void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+    if (stk == MAP_FAILED)
+        return MAP_FAILED;
+# else
     void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (stk == MAP_FAILED)
         return MAP_FAILED;
-#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
-    // setup a guard page to detect stack overflow
+
+    // set up a guard page to detect stack overflow
     if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
         munmap(stk, bufsz);
         return MAP_FAILED;
     }
-#endif
-    jl_atomic_fetch_add(&num_stack_mappings, 1);
+# endif
+
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
-static void free_stack(void *stkbuf, size_t bufsz)
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
     munmap(stkbuf, bufsz);
-    jl_atomic_fetch_add(&num_stack_mappings, -1);
+    jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 #endif
 
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void) JL_NOTSAFEPOINT
+{
+    return jl_atomic_load_relaxed(&num_stack_mappings);
+}
 
 const unsigned pool_sizes[] = {
     128 * 1024,
@@ -104,7 +120,7 @@ static unsigned select_pool(size_t nb) JL_NOTSAFEPOINT
 }
 
 
-static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
 #ifdef _COMPILER_ASAN_ENABLED_
     __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
@@ -112,7 +128,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
             return;
         }
     }
@@ -130,18 +146,18 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 {
     // avoid adding an original thread stack to the free list
-    if (task == ptls->root_task && !task->copy_stack)
+    if (task == ptls->root_task && !task->ctx.copy_stack)
         return;
-    void *stkbuf = task->stkbuf;
-    size_t bufsz = task->bufsz;
+    void *stkbuf = task->ctx.stkbuf;
+    size_t bufsz = task->ctx.bufsz;
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            task->stkbuf = NULL;
+            task->ctx.stkbuf = NULL;
 #ifdef _COMPILER_ASAN_ENABLED_
             __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
 #endif
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
         }
     }
 }
@@ -156,9 +172,9 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(ssize);
         ssize = pool_sizes[pool_id];
-        arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
+        small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id];
         if (pool->len > 0) {
-            stk = arraylist_pop(pool);
+            stk = small_arraylist_pop(pool);
         }
     }
     else {
@@ -177,108 +193,85 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     }
     *bufsz = ssize;
     if (owner) {
-        arraylist_t *live_tasks = &ptls->heap.live_tasks;
-        arraylist_push(live_tasks, owner);
+        small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks;
+        mtarraylist_push(live_tasks, owner);
     }
     return stk;
 }
 
-void sweep_stack_pools(void)
+// Builds a list of the live tasks. Racy: `live_tasks` can expand at any time.
+arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT
 {
-    // Stack sweeping algorithm:
-    //    // deallocate stacks if we have too many sitting around unused
-    //    for (stk in halfof(free_stacks))
-    //        free_stack(stk, pool_sz);
-    //    // then sweep the task stacks
-    //    for (t in live_tasks)
-    //        if (!gc-marked(t))
-    //            stkbuf = t->stkbuf
-    //            bufsz = t->bufsz
-    //            if (stkbuf)
-    //                push(free_stacks[sz], stkbuf)
-    assert(gc_n_threads);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-
-        // free half of stacks that remain unused since last sweep
-        for (int p = 0; p < JL_N_STACK_POOLS; p++) {
-            arraylist_t *al = &ptls2->heap.free_stacks[p];
-            size_t n_to_free;
-            if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
-                n_to_free = al->len / 2;
-                if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
-                    n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
-            }
-            else {
-                n_to_free = 0;
-            }
-            for (int n = 0; n < n_to_free; n++) {
-                void *stk = arraylist_pop(al);
-                free_stack(stk, pool_sizes[p]);
-            }
+    arraylist_t *tasks = (arraylist_t*)malloc_s(sizeof(arraylist_t));
+    arraylist_new(tasks, 0);
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (size_t i = 0; i < nthreads; i++) {
+        // skip GC threads...
+        if (gc_is_collector_thread(i)) {
+            continue;
         }
-
-        arraylist_t *live_tasks = &ptls2->heap.live_tasks;
-        size_t n = 0;
-        size_t ndel = 0;
-        size_t l = live_tasks->len;
-        void **lst = live_tasks->items;
-        if (l == 0)
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL) {
             continue;
-        while (1) {
-            jl_task_t *t = (jl_task_t*)lst[n];
-            assert(jl_is_task(t));
-            if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
-                if (t->stkbuf == NULL)
-                    ndel++; // jl_release_task_stack called
-                else
-                    n++;
-            }
-            else {
-                ndel++;
-                void *stkbuf = t->stkbuf;
-                size_t bufsz = t->bufsz;
-                if (stkbuf) {
-                    t->stkbuf = NULL;
-                    _jl_free_stack(ptls2, stkbuf, bufsz);
-                }
-#ifdef _COMPILER_TSAN_ENABLED_
-                if (t->ctx.tsan_state) {
-                    __tsan_destroy_fiber(t->ctx.tsan_state);
-                    t->ctx.tsan_state = NULL;
-                }
-#endif
+        }
+        jl_task_t *t = ptls2->root_task;
+        if (t->ctx.stkbuf != NULL) {
+            arraylist_push(tasks, t);
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        for (size_t i = 0; i < n; i++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
+            assert(t != NULL);
+            if (t->ctx.stkbuf != NULL) {
+                arraylist_push(tasks, t);
             }
-            if (n >= l - ndel)
-                break;
-            void *tmp = lst[n];
-            lst[n] = lst[n + ndel];
-            lst[n + ndel] = tmp;
         }
-        live_tasks->len -= ndel;
     }
+    return tasks;
 }
 
 JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    arraylist_t *live_tasks = &ptls->heap.live_tasks;
-    size_t i, j, l;
-    jl_array_t *a;
-    do {
-        l = live_tasks->len;
-        a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
-    } while (l + 1 < live_tasks->len);
-    l = live_tasks->len;
-    void **lst = live_tasks->items;
-    j = 0;
-    ((void**)jl_array_data(a))[j++] = ptls->root_task;
-    for (i = 0; i < l; i++) {
-        if (((jl_task_t*)lst[i])->stkbuf != NULL)
-            ((void**)jl_array_data(a))[j++] = lst[i];
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
+restart:
+    for (size_t i = 0; i < nthreads; i++) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        l += n + (ptls2->root_task->ctx.stkbuf != NULL);
+    }
+    l += l / 20; // add 5% for margin of estimation error
+    jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
+    nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t j = 0;
+    for (size_t i = 0; i < nthreads; i++) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        jl_task_t *t = ptls2->root_task;
+        if (t->ctx.stkbuf != NULL) {
+            if (j == l)
+                goto restart;
+            jl_array_data(a,void*)[j++] = t;
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        for (size_t i = 0; i < n; i++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
+            if (t->ctx.stkbuf != NULL) {
+                if (j == l)
+                    goto restart;
+                jl_array_data(a,void*)[j++] = t;
+            }
+        }
     }
-    l = jl_array_len(a);
     if (j < l) {
         JL_GC_PUSH1(&a);
         jl_array_del_end(a, l - j);
diff --git a/src/gc.c b/src/gc-stock.c
similarity index 57%
rename from src/gc.c
rename to src/gc-stock.c
index 9fd93b7340d56..8118b3c5629ae 100644
--- a/src/gc.c
+++ b/src/gc-stock.c
@@ -1,11 +1,14 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
+#include "gc-alloc-profiler.h"
+#include "gc-heap-snapshot.h"
+#include "gc-page-profiler.h"
+#include "julia.h"
+#include "julia_atomics.h"
 #include "julia_gcext.h"
 #include "julia_assert.h"
-#ifdef __GLIBC__
-#include <malloc.h> // for malloc_trim
-#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -17,8 +20,18 @@ int jl_n_markthreads;
 int jl_n_sweepthreads;
 // Number of threads currently running the GC mark-loop
 _Atomic(int) gc_n_threads_marking;
+// Number of threads sweeping
+_Atomic(int) gc_n_threads_sweeping_pools;
+// Number of threads sweeping stacks
+_Atomic(int) gc_n_threads_sweeping_stacks;
+// Temporary for the `ptls->gc_tls.page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
+_Atomic(jl_gc_padded_page_stack_t *) gc_allocd_scratch;
 // `tid` of mutator thread that triggered GC
 _Atomic(int) gc_master_tid;
+// counter for sharing work when sweeping stacks
+_Atomic(int) gc_ptls_sweep_idx;
+// counter for round robin of giving back stack pages to the OS
+_Atomic(int) gc_stack_free_idx = 0;
 // `tid` of first GC thread
 int gc_first_tid;
 // Mutex/cond used to synchronize wakeup of GC threads on parallel marking
@@ -26,117 +39,12 @@ uv_mutex_t gc_threads_lock;
 uv_cond_t gc_threads_cond;
 // To indicate whether concurrent sweeping should run
 uv_sem_t gc_sweep_assists_needed;
-
-// Linked list of callback functions
-
-typedef void (*jl_gc_cb_func_t)(void);
-
-typedef struct jl_gc_callback_list_t {
-    struct jl_gc_callback_list_t *next;
-    jl_gc_cb_func_t func;
-} jl_gc_callback_list_t;
-
-static jl_gc_callback_list_t *gc_cblist_root_scanner;
-static jl_gc_callback_list_t *gc_cblist_task_scanner;
-static jl_gc_callback_list_t *gc_cblist_pre_gc;
-static jl_gc_callback_list_t *gc_cblist_post_gc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_free;
-
-#define gc_invoke_callbacks(ty, list, args) \
-    do { \
-        for (jl_gc_callback_list_t *cb = list; \
-                cb != NULL; \
-                cb = cb->next) \
-        { \
-            ((ty)(cb->func)) args; \
-        } \
-    } while (0)
-
-static void jl_gc_register_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func)
-            return;
-        list = &((*list)->next);
-    }
-    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
-    (*list)->next = NULL;
-    (*list)->func = func;
-}
-
-static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func) {
-            jl_gc_callback_list_t *tmp = *list;
-            (*list) = (*list)->next;
-            free(tmp);
-            return;
-        }
-        list = &((*list)->next);
-    }
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-}
-
-// Protect all access to `finalizer_list_marked` and `to_finalize`.
-// For accessing `ptls->finalizers`, the lock is needed if a thread
-// is going to realloc the buffer (of its own list) or accessing the
-// list of another thread
-static jl_mutex_t finalizers_lock;
-static uv_mutex_t gc_cache_lock;
-
-// mutex for gc-heap-snapshot.
-jl_mutex_t heapsnapshot_lock;
+// Mutex used to coordinate entry of GC threads in the mark loop
+uv_mutex_t gc_queue_observer_lock;
+// Tag for sentinel nodes in bigval list
+uintptr_t gc_bigval_sentinel_tag;
+// Table recording number of full GCs due to each reason
+JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
@@ -174,462 +82,13 @@ static _Atomic(int) support_conservative_marking = 0;
  * finalizers in unmanaged (GC safe) mode.
  */
 
-jl_gc_num_t gc_num = {0};
-static size_t last_long_collect_interval;
-int gc_n_threads;
-jl_ptls_t* gc_all_tls_states;
-const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
-{
-    return jl_buff_tag;
-}
-
-// List of marked big objects.  Not per-thread.  Accessed only by master thread.
-bigval_t *big_objects_marked = NULL;
+gc_heapstatus_t gc_heap_stats = {0};
 
-// -- Finalization --
-// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
-// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
-//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
-// `to_finalize` should not have tagged pointers.
-arraylist_t finalizer_list_marked;
-arraylist_t to_finalize;
-JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
-
-
-NOINLINE uintptr_t gc_get_stack_ptr(void)
-{
-    return (uintptr_t)jl_get_frame_addr();
-}
-
-#define should_timeout() 0
-
-void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
-{
-    JL_TIMING(GC, GC_Stop);
-#ifdef USE_TRACY
-    TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx;
-    TracyCZoneColor(ctx, 0x696969);
-#endif
-    assert(gc_n_threads);
-    if (gc_n_threads > 1)
-        jl_wake_libuv();
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL) {
-            // This acquire load pairs with the release stores
-            // in the signal handler of safepoint so we are sure that
-            // all the stores on those threads are visible.
-            // We're currently also using atomic store release in mutator threads
-            // (in jl_gc_state_set), but we may want to use signals to flush the
-            // memory operations on those threads lazily instead.
-            while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state))
-                jl_cpu_pause(); // yield?
-        }
-    }
-}
-
-// malloc wrappers, aligned allocation
-
-#if defined(_OS_WINDOWS_)
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-    return _aligned_malloc(sz ? sz : 1, align);
-}
-STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-    (void)oldsz;
-    return _aligned_realloc(p, sz ? sz : 1, align);
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    _aligned_free(p);
-}
-#else
-STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return malloc(sz);
-#endif
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz))
-        return NULL;
-    return ptr;
-}
-STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return realloc(d, sz);
-#endif
-    void *b = jl_malloc_aligned(sz, align);
-    if (b != NULL) {
-        memcpy(b, d, oldsz > sz ? sz : oldsz);
-        free(d);
-    }
-    return b;
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    free(p);
-}
-#endif
-#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
-#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
-
-static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
-{
-    arraylist_push(&to_finalize, o);
-    arraylist_push(&to_finalize, f);
-    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
-    // release our lock, and that will have a release barrier by then
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
-}
-
-static void run_finalizer(jl_task_t *ct, void *o, void *ff)
-{
-    int ptr_finalizer = gc_ptr_tag(o, 1);
-    o = gc_ptr_clear_tag(o, 3);
-    if (ptr_finalizer) {
-        ((void (*)(void*))ff)((void*)o);
-        return;
-    }
-    JL_TRY {
-        size_t last_age = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
-        ct->world_age = last_age;
-    }
-    JL_CATCH {
-        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
-    }
-}
-
-// if `need_sync` is true, the `list` is the `finalizers` list of another
-// thread and we need additional synchronizations
-static void finalize_object(arraylist_t *list, jl_value_t *o,
-                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
-{
-    // The acquire load makes sure that the first `len` objects are valid.
-    // If `need_sync` is true, all mutations of the content should be limited
-    // to the first `oldlen` elements and no mutation is allowed after the
-    // new length is published with the `cmpxchg` at the end of the function.
-    // This way, the mutation should not conflict with the owning thread,
-    // which only writes to locations later than `len`
-    // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
-    size_t oldlen = len;
-    void **items = list->items;
-    size_t j = 0;
-    for (size_t i = 0; i < len; i += 2) {
-        void *v = items[i];
-        int move = 0;
-        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
-            void *f = items[i + 1];
-            move = 1;
-            arraylist_push(copied_list, v);
-            arraylist_push(copied_list, f);
-        }
-        if (move || __unlikely(!v)) {
-            // remove item
-        }
-        else {
-            if (j < i) {
-                items[j] = items[i];
-                items[j+1] = items[i+1];
-            }
-            j += 2;
-        }
-    }
-    len = j;
-    if (oldlen == len)
-        return;
-    if (need_sync) {
-        // The memset needs to be unconditional since the thread might have
-        // already read the length.
-        // The `memset` (like any other content mutation) has to be done
-        // **before** the `cmpxchg` which publishes the length.
-        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
-    }
-    else {
-        list->len = len;
-    }
-}
-
-// The first two entries are assumed to be empty and the rest are assumed to
-// be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
-{
-    void **items = list->items;
-    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ct->gcstack;
-    ct->gcstack = (jl_gcframe_t*)items;
-}
-
-// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
-// to be hold for the current thread and will release the lock when the
-// function returns.
-static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
-{
-    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
-    // of `finalizer`) in a finalizer:
-    uint8_t sticky = ct->sticky;
-    // empty out the first two entries for the GC frame
-    arraylist_push(list, list->items[0]);
-    arraylist_push(list, list->items[1]);
-    jl_gc_push_arraylist(ct, list);
-    void **items = list->items;
-    size_t len = list->len;
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    // run finalizers in reverse order they were added, so lower-level finalizers run last
-    for (size_t i = len-4; i >= 2; i -= 2)
-        run_finalizer(ct, items[i], items[i + 1]);
-    // first entries were moved last to make room for GC frame metadata
-    run_finalizer(ct, items[len-2], items[len-1]);
-    // matches the jl_gc_push_arraylist above
-    JL_GC_POP();
-    ct->sticky = sticky;
-}
-
-static uint64_t finalizer_rngState[JL_RNG_SIZE];
-
-void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
-
-JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
-{
-    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
-}
-
-static void run_finalizers(jl_task_t *ct)
-{
-    // Racy fast path:
-    // The race here should be OK since the race can only happen if
-    // another thread is writing to it with the lock held. In such case,
-    // we don't need to run pending finalizers since the writer thread
-    // will flush it.
-    if (to_finalize.len == 0)
-        return;
-    JL_LOCK_NOGC(&finalizers_lock);
-    if (to_finalize.len == 0) {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-        return;
-    }
-    arraylist_t copied_list;
-    memcpy(&copied_list, &to_finalize, sizeof(copied_list));
-    if (to_finalize.items == to_finalize._space) {
-        copied_list.items = copied_list._space;
-    }
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
-    arraylist_new(&to_finalize, 0);
-
-    uint64_t save_rngState[JL_RNG_SIZE];
-    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
-    jl_rng_split(ct->rngState, finalizer_rngState);
-
-    // This releases the finalizers lock.
-    int8_t was_in_finalizer = ct->ptls->in_finalizer;
-    ct->ptls->in_finalizer = 1;
-    jl_gc_run_finalizers_in_list(ct, &copied_list);
-    ct->ptls->in_finalizer = was_in_finalizer;
-    arraylist_free(&copied_list);
-
-    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
-}
-
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
-        run_finalizers(ct);
-    }
-}
-
-JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
-{
-    if (ptls == NULL)
-        ptls = jl_current_task->ptls;
-    return ptls->finalizers_inhibited;
-}
-
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    ptls->finalizers_inhibited++;
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
-{
-    jl_task_t *ct = jl_current_task;
-#ifdef NDEBUG
-    ct->ptls->finalizers_inhibited--;
-#else
-    jl_gc_enable_finalizers(ct, 1);
-#endif
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
-{
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    int old_val = ptls->finalizers_inhibited;
-    int new_val = old_val + (on ? -1 : 1);
-    if (new_val < 0) {
-        JL_TRY {
-            jl_error(""); // get a backtrace
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
-            // Only print the backtrace once, to avoid spamming the logs
-            static int backtrace_printed = 0;
-            if (backtrace_printed == 0) {
-                backtrace_printed = 1;
-                jlbacktrace(); // written to STDERR_FILENO
-            }
-        }
-        return;
-    }
-    ptls->finalizers_inhibited = new_val;
-    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
-        jl_gc_run_pending_finalizers(ct);
-    }
-}
-
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
-{
-    return jl_current_task->ptls->in_finalizer;
-}
-
-static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
-{
-    void **items = flist->items;
-    size_t len = flist->len;
-    for(size_t i = 0; i < len; i+=2) {
-        void *v = items[i];
-        void *f = items[i + 1];
-        if (__unlikely(!v))
-            continue;
-        schedule_finalization(v, f);
-    }
-    flist->len = 0;
-}
-
-void jl_gc_run_all_finalizers(jl_task_t *ct)
-{
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    // this is called from `jl_atexit_hook`; threads could still be running
-    // so we have to guard the finalizers' lists
-    JL_LOCK_NOGC(&finalizers_lock);
-    schedule_all_finalizers(&finalizer_list_marked);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            schedule_all_finalizers(&ptls2->finalizers);
-    }
-    // unlock here because `run_finalizers` locks this
-    JL_UNLOCK_NOGC(&finalizers_lock);
-    gc_n_threads = 0;
-    gc_all_tls_states = NULL;
-    run_finalizers(ct);
-}
-
-void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
-{
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
-    arraylist_t *a = &ptls->finalizers;
-    // This acquire load and the release store at the end are used to
-    // synchronize with `finalize_object` on another thread. Apart from the GC,
-    // which is blocked by entering a unsafe region, there might be only
-    // one other thread accessing our list in `finalize_object`
-    // (only one thread since it needs to acquire the finalizer lock).
-    // Similar to `finalize_object`, all content mutation has to be done
-    // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
-    if (__unlikely(oldlen + 2 > a->max)) {
-        JL_LOCK_NOGC(&finalizers_lock);
-        // `a->len` might have been modified.
-        // Another possibility is to always grow the array to `oldlen + 2` but
-        // it's simpler this way and uses slightly less memory =)
-        oldlen = a->len;
-        arraylist_grow(a, 2);
-        a->len = oldlen;
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    void **items = a->items;
-    items[oldlen] = v;
-    items[oldlen + 1] = f;
-    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
-}
-
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
-{
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
-}
-
-// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
-JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
-{
-    assert(!gc_ptr_tag(v, 3));
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
-}
-
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
-{
-    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
-        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
-    }
-    else {
-        jl_gc_add_finalizer_(ptls, v, f);
-    }
-}
-
-JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
-{
-    JL_LOCK_NOGC(&finalizers_lock);
-    // Copy the finalizers into a temporary list so that code in the finalizer
-    // won't change the list as we loop through them.
-    // This list is also used as the GC frame when we are running the finalizers
-    arraylist_t copied_list;
-    arraylist_new(&copied_list, 0);
-    // No need to check the to_finalize list since the user is apparently
-    // still holding a reference to the object
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
-    }
-    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
-    gc_n_threads = 0;
-    gc_all_tls_states = NULL;
-    if (copied_list.len > 0) {
-        // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ct, &copied_list);
-    }
-    else {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    arraylist_free(&copied_list);
-}
+// List of big objects in oldest generation (`GC_OLD_MARKED`).  Not per-thread.  Accessed only by master thread.
+bigval_t *oldest_generation_of_bigvals = NULL;
 
 // explicitly scheduled objects for the sweepfunc callback
-static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
+static void gc_sweep_foreign_objs_in_list(arraylist_t *objs) JL_NOTSAFEPOINT
 {
     size_t p = 0;
     for (size_t i = 0; i < objs->len; i++) {
@@ -647,13 +106,13 @@ static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
     objs->len = p;
 }
 
-static void gc_sweep_foreign_objs(void)
+static void gc_sweep_foreign_objs(void) JL_NOTSAFEPOINT
 {
     assert(gc_n_threads);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
+            gc_sweep_foreign_objs_in_list(&ptls2->gc_tls.sweep_objs);
     }
 }
 
@@ -665,21 +124,30 @@ static int64_t last_gc_total_bytes = 0;
 #ifdef _P64
 typedef uint64_t memsize_t;
 static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
-static const size_t max_collect_interval = 1250000000UL;
 static size_t total_mem;
 // We expose this to the user/ci as jl_gc_set_max_memory
 static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
 #else
 typedef uint32_t memsize_t;
 static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
-static const size_t max_collect_interval =  500000000UL;
 // Work really hard to stay within 2GB
 // Alternative is to risk running out of address space
 // on 32 bit architectures.
-static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
 #endif
-
+// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
+// start with values that are in the target ranges to reduce transient hiccups at startup
+static uint64_t old_pause_time = 1e7; // 10 ms
+static uint64_t old_mut_time = 1e9; // 1 second
+static uint64_t old_heap_size = 0;
+static uint64_t old_alloc_diff = default_collect_interval;
+static uint64_t old_freed_diff = default_collect_interval;
+static uint64_t gc_end_time = 0;
+static int thrash_counter = 0;
+static int thrashing = 0;
 // global variables for GC stats
+static uint64_t freed_in_runtime = 0;
 
 // Resetting the object to a young object, this is used when marking the
 // finalizer list to collect them the next time because the object is very
@@ -730,76 +198,43 @@ static int mark_reset_age = 0;
 static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
 int prev_sweep_full = 1;
+int current_sweep_full = 0;
+int next_sweep_full = 0;
+int under_pressure = 0;
 
 // Full collection heuristics
 static int64_t live_bytes = 0;
 static int64_t promoted_bytes = 0;
 static int64_t last_live_bytes = 0; // live_bytes at last collection
-static int64_t t_start = 0; // Time GC starts;
 #ifdef __GLIBC__
 // maxrss at last malloc_trim
 static int64_t last_trim_maxrss = 0;
 #endif
 
-static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
+static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
 {
-    const int nbig = gc_cache->nbig_obj;
-    for (int i = 0; i < nbig; i++) {
-        void *ptr = gc_cache->big_obj[i];
-        bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
-        gc_big_object_unlink(hdr);
-        if (gc_ptr_tag(ptr, 1)) {
-            gc_big_object_link(hdr, &ptls->heap.big_objects);
-        }
-        else {
-            // Move hdr from `big_objects` list to `big_objects_marked list`
-            gc_big_object_link(hdr, &big_objects_marked);
-        }
-    }
-    gc_cache->nbig_obj = 0;
     perm_scanned_bytes += gc_cache->perm_scanned_bytes;
     scanned_bytes += gc_cache->scanned_bytes;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
 }
 
-static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    uv_mutex_lock(&gc_cache_lock);
-    gc_sync_cache_nolock(ptls, &ptls->gc_cache);
-    uv_mutex_unlock(&gc_cache_lock);
-}
-
 // No other threads can be running marking at the same time
-static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
+static void gc_sync_all_caches(jl_ptls_t ptls)
 {
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL)
-            gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
-    }
-}
-
-STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
-                                       int toyoung) JL_NOTSAFEPOINT
-{
-    const int nentry = sizeof(ptls->gc_cache.big_obj) / sizeof(void*);
-    size_t nobj = ptls->gc_cache.nbig_obj;
-    if (__unlikely(nobj >= nentry)) {
-        gc_sync_cache(ptls);
-        nobj = 0;
+            gc_sync_cache(ptls, &ptls2->gc_tls.gc_cache);
     }
-    uintptr_t v = (uintptr_t)hdr;
-    ptls->gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
-    ptls->gc_cache.nbig_obj = nobj + 1;
 }
 
 // Atomically set the mark bit for object and return whether it was previously unmarked
 FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
 {
     assert(gc_marked(mark_mode));
-    uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header);
+    uintptr_t tag = o->header;
     if (gc_marked(tag))
         return 0;
     if (mark_reset_age) {
@@ -813,9 +248,13 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N
         tag = tag | mark_mode;
         assert((tag & 0x3) == mark_mode);
     }
-    jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than
-    verify_val(jl_valueof(o));                                     //potentially redoing work because of a stale tag.
-    return 1;
+    // XXX: note that marking not only sets the GC bits but also updates the
+    // page metadata for pool allocated objects.
+    // The second step is **not** idempotent, so we need a compare exchange here
+    // (instead of a pair of load&store) to avoid marking an object twice
+    tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
+    verify_val(jl_valueof(o));
+    return !gc_marked(tag);
 }
 
 // This function should be called exactly once during marking for each big
@@ -826,21 +265,17 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
     assert(!gc_alloc_map_is_set((char*)o));
     bigval_t *hdr = bigval_header(o);
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3;
-        gc_queue_big_marked(ptls, hdr, 0);
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += hdr->sz;
     }
     else {
-        ptls->gc_cache.scanned_bytes += hdr->sz & ~3;
-        // We can't easily tell if the object is old or being promoted
-        // from the gc bits but if the `age` is `0` then the object
-        // must be already on a young list.
+        ptls->gc_tls.gc_cache.scanned_bytes += hdr->sz;
         if (mark_reset_age) {
+            assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
             // Reset the object as if it was just allocated
-            gc_queue_big_marked(ptls, hdr, 1);
+            gc_big_object_unlink(hdr);
+            gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, hdr);
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, hdr->sz & ~3);
 }
 
 // This function should be called exactly once during marking for each pool
@@ -852,18 +287,16 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
     gc_setmark_big(ptls, o, mark_mode);
 #else
     if (mark_mode == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.perm_scanned_bytes += page->osize;
         static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
         jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
     }
     else {
-        ptls->gc_cache.scanned_bytes += page->osize;
+        ptls->gc_tls.gc_cache.scanned_bytes += page->osize;
         if (mark_reset_age) {
             page->has_young = 1;
         }
     }
-    objprofile_count(jl_typeof(jl_valueof(o)),
-                     mark_mode == GC_OLD_MARKED, page->osize);
     page->has_marked = 1;
 #endif
 }
@@ -912,7 +345,7 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
 
 STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 {
-    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -922,13 +355,12 @@ STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 
 // weak references
 
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
-                                                jl_value_t *value)
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
 {
     jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
                                                   jl_weakref_type);
     wr->value = value;  // NOTE: wb not needed here
-    arraylist_push(&ptls->heap.weak_refs, wr);
+    small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr);
     return wr;
 }
 
@@ -938,8 +370,8 @@ static void clear_weak_refs(void)
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL) {
-            size_t n, l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
+            size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
             for (n = 0; n < l; n++) {
                 jl_weakref_t *wr = (jl_weakref_t*)lst[n];
                 if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
@@ -956,32 +388,42 @@ static void sweep_weak_refs(void)
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL) {
             size_t n = 0;
-            size_t ndel = 0;
-            size_t l = ptls2->heap.weak_refs.len;
-            void **lst = ptls2->heap.weak_refs.items;
-            if (l == 0)
-                continue;
-            while (1) {
-                jl_weakref_t *wr = (jl_weakref_t*)lst[n];
-                if (gc_marked(jl_astaggedvalue(wr)->bits.gc))
+            size_t i = 0;
+            size_t l = ptls2->gc_tls_common.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
+            // filter with preserving order
+            for (i = 0; i < l; i++) {
+                jl_weakref_t *wr = (jl_weakref_t*)lst[i];
+                if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
+                    lst[n] = wr;
                     n++;
-                else
-                    ndel++;
-                if (n >= l - ndel)
-                    break;
-                void *tmp = lst[n];
-                lst[n] = lst[n + ndel];
-                lst[n + ndel] = tmp;
+                }
             }
-            ptls2->heap.weak_refs.len -= ndel;
+            ptls2->gc_tls_common.heap.weak_refs.len = n;
         }
     }
 }
 
 
+STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz;
+    if (alloc_acc < 16*1024)
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc);
+    else {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+    }
+}
+
+STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz);
+}
+
 // big value list
 
-// Size includes the tag and the tag is not cleared!!
+// Size includes the tag and the tag field is undefined on return (must be set before the next GC safepoint)
 STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
 {
     maybe_collect(ptls);
@@ -997,23 +439,28 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
     v->sz = allocsz;
-    gc_big_object_link(v, &ptls->heap.big_objects);
+#ifndef NDEBUG
+    v->header = 0; // must be initialized (and not gc_bigval_sentinel_tag) or gc_big_object_link assertions will get confused
+#endif
+    gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, v);
     return jl_valueof(&v->header);
 }
 
+
 // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
 {
     jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
-    maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
     return val;
 }
 
@@ -1024,90 +471,100 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
     return jl_gc_big_alloc_inner(ptls, sz);
 }
 
-// Sweep list rooted at *pv, removing and freeing any unmarked objects.
-// Return pointer to last `next` field in the culled list.
-static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
+FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
 {
-    bigval_t *v = *pv;
+    gc_big_object_unlink(v);
+    gc_num.freed += v->sz;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - v->sz);
+#ifdef MEMDEBUG
+    memset(v, 0xbb, v->sz);
+#endif
+    gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
+    jl_free_aligned(v);
+}
+
+static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *last_node = young;
+    bigval_t *v = young->next; // skip the sentinel
+    bigval_t *old = oldest_generation_of_bigvals;
+    int sweep_full = current_sweep_full; // don't load the global in the hot loop
     while (v != NULL) {
         bigval_t *nxt = v->next;
         int bits = v->bits.gc;
         int old_bits = bits;
         if (gc_marked(bits)) {
-            pv = &v->next;
             if (sweep_full || bits == GC_MARKED) {
                 bits = GC_OLD;
+                last_node = v;
+            }
+            else { // `bits == GC_OLD_MARKED`
+                assert(bits == GC_OLD_MARKED);
+                // reached oldest generation, move from young list to old list
+                gc_big_object_unlink(v);
+                gc_big_object_link(old, v);
             }
             v->bits.gc = bits;
         }
         else {
-            // Remove v from list and free it
-            *pv = nxt;
-            if (nxt)
-                nxt->prev = pv;
-            gc_num.freed += v->sz&~3;
-#ifdef MEMDEBUG
-            memset(v, 0xbb, v->sz&~3);
-#endif
-            gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
-                gc_cblist_notify_external_free, (v));
-            jl_free_aligned(v);
+            sweep_unlink_and_free(v);
         }
         gc_time_count_big(old_bits, bits);
         v = nxt;
     }
-    return pv;
+    return last_node;
 }
 
-static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
+static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
+    while (v != NULL) {
+        bigval_t *nxt = v->next;
+        assert(v->bits.gc == GC_OLD_MARKED);
+        v->bits.gc = GC_OLD;
+        gc_time_count_big(GC_OLD_MARKED, GC_OLD);
+        v = nxt;
+    }
+}
+
+static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     gc_time_big_start();
     assert(gc_n_threads);
+    bigval_t *last_node_in_my_list = NULL;
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            sweep_big_list(sweep_full, &ptls2->heap.big_objects);
+        if (ptls2 != NULL) {
+            bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->gc_tls.heap.young_generation_of_bigvals);
+            if (ptls == ptls2) {
+                last_node_in_my_list = last_node;
+            }
+        }
     }
-    if (sweep_full) {
-        bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = last_next;
-        *last_next = ptls->heap.big_objects;
-        ptls->heap.big_objects = big_objects_marked;
-        if (ptls->heap.big_objects)
-            ptls->heap.big_objects->prev = &ptls->heap.big_objects;
-        big_objects_marked = NULL;
+    if (current_sweep_full) {
+        sweep_list_of_oldest_bigvals(ptls->gc_tls.heap.young_generation_of_bigvals);
+        // move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
+        assert(last_node_in_my_list != NULL);
+        assert(last_node_in_my_list->next == NULL);
+        last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
+        if (oldest_generation_of_bigvals->next != NULL) {
+            oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
+        }
+        oldest_generation_of_bigvals->next = NULL;
     }
     gc_time_big_end();
 }
 
-// tracking Arrays with malloc'd storage
-
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
-{
-    // This is **NOT** a GC safe point.
-    mallocarray_t *ma;
-    if (ptls->heap.mafreelist == NULL) {
-        ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
-    }
-    else {
-        ma = ptls->heap.mafreelist;
-        ptls->heap.mafreelist = ma->next;
-    }
-    ma->a = a;
-    ma->next = ptls->heap.mallocarrays;
-    ptls->heap.mallocarrays = ma;
-}
-
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+    jl_batch_accum_heap_size(ptls, sz);
 }
 
-static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
+// Only safe to update the heap inside the GC
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
 {
     int gc_n_threads;
     jl_ptls_t* gc_all_tls_states;
@@ -1116,13 +573,19 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
-            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
-            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
-            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
-            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
-            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
-            dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall);
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            if (update_heap) {
+                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc);
+                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+                jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+            }
         }
     }
 }
@@ -1136,8 +599,14 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls != NULL) {
-            memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+            // don't reset `pool_live_bytes` here
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
         }
     }
 }
@@ -1150,68 +619,57 @@ static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
 
 void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 0);
     inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
     gc_num.allocd = 0;
     gc_num.deferred_alloc = 0;
     reset_thread_gc_counts();
 }
 
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT
+static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
 {
-    size_t sz = 0;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (jl_array_ndims(a) == 1)
-        sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0);
+    assert(jl_is_genericmemory(m));
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    size_t freed_bytes = memory_block_usable_size(d, isaligned);
+    assert(freed_bytes != 0);
+    if (isaligned)
+        jl_free_aligned(d);
     else
-        sz = a->elsize * jl_array_len(a);
-    if (isbitsunion)
-        // account for isbits Union array selector bytes
-        sz += jl_array_len(a);
-    return sz;
-}
-
-static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 2) {
-        char *d = (char*)a->data - a->offset*a->elsize;
-        if (a->flags.isaligned)
-            jl_free_aligned(d);
-        else
-            free(d);
-        gc_num.freed += jl_array_nbytes(a);
-        gc_num.freecall++;
-    }
+        free(d);
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes);
+    gc_num.freed += freed_bytes;
+    gc_num.freecall++;
 }
 
-static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT
+static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
 {
-    gc_time_mallocd_array_start();
+    gc_time_mallocd_memory_start();
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL) {
-            mallocarray_t *ma = ptls2->heap.mallocarrays;
-            mallocarray_t **pma = &ptls2->heap.mallocarrays;
-            while (ma != NULL) {
-                mallocarray_t *nxt = ma->next;
-                int bits = jl_astaggedvalue(ma->a)->bits.gc;
-                if (gc_marked(bits)) {
-                    pma = &ma->next;
+            size_t n = 0;
+            size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
+            void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
+            // filter without preserving order
+            while (n < l) {
+                jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
+                if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
+                    n++;
                 }
                 else {
-                    *pma = nxt;
-                    assert(ma->a->flags.how == 2);
-                    jl_gc_free_array(ma->a);
-                    ma->next = ptls2->heap.mafreelist;
-                    ptls2->heap.mafreelist = ma;
+                    int isaligned = (uintptr_t)lst[n] & 1;
+                    jl_gc_free_memory(m, isaligned);
+                    l--;
+                    lst[n] = lst[l];
                 }
-                gc_time_count_mallocd_array(bits);
-                ma = nxt;
             }
+            ptls2->gc_tls_common.heap.mallocarrays.len = l;
         }
     }
-    gc_time_mallocd_array_end();
+    gc_time_mallocd_memory_end();
 }
 
 // pool allocation
@@ -1219,7 +677,7 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
 {
     assert(GC_PAGE_OFFSET >= sizeof(void*));
     pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
-    pg->pool_n = p - ptls2->heap.norm_pools;
+    pg->pool_n = p - ptls2->gc_tls.heap.norm_pools;
     jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     pg->has_young = 0;
     pg->has_marked = 0;
@@ -1230,47 +688,47 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
     return beg;
 }
 
-jl_gc_global_page_pool_t global_page_pool_lazily_freed;
-jl_gc_global_page_pool_t global_page_pool_clean;
-jl_gc_global_page_pool_t global_page_pool_freed;
+jl_gc_page_stack_t global_page_pool_lazily_freed;
+jl_gc_page_stack_t global_page_pool_clean;
+jl_gc_page_stack_t global_page_pool_freed;
 pagetable_t alloc_map;
 
 // Add a new page to the pool. Discards any pages in `p->newpages` before.
 static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
-    // in pool_alloc significantly
+    // in small_alloc significantly
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed);
-    if (pg == NULL) {
-        pg = jl_gc_alloc_page();
-    }
+    jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
     pg->thread_n = ptls->tid;
     set_page_metadata(pg);
-    push_page_metadata_back(&ptls->page_metadata_allocd, pg);
+    push_lf_back(&ptls->gc_tls.page_metadata_allocd, pg);
     jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
     p->newpages = fl;
     return fl;
 }
 
 // Size includes the tag and the tag is not cleared!!
-STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
+STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
-    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
+    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + offset);
     assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
-    return jl_gc_big_alloc(ptls, osize);
+    return jl_gc_big_alloc(ptls, osize, NULL);
 #endif
     maybe_collect(ptls);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v != NULL) {
@@ -1310,20 +768,42 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
     return jl_valueof(v);
 }
 
-// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
-                                          int osize)
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
 {
-    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
-    maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
+    jl_value_t *val = jl_gc_small_alloc_inner(ptls, offset, osize);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
     return val;
 }
 
-// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
-// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
+// This wrapper exists only to prevent `jl_gc_small_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_small_alloc_inner`
 // into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) {
-    return jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) {
+    return jl_gc_small_alloc_inner(ptls, offset, osize);
+}
+
+// Size does NOT include the type tag!!
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass(allocsz);
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_gc_big_alloc_noinline(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
 }
 
 int jl_gc_classify_pools(size_t sz, int *osize)
@@ -1333,19 +813,43 @@ int jl_gc_classify_pools(size_t sz, int *osize)
     size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     int klass = jl_gc_szclass(allocsz);
     *osize = jl_gc_sizeclasses[klass];
-    return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]);
+    return (int)(intptr_t)(&((jl_ptls_t)0)->gc_tls.heap.norm_pools[klass]);
 }
 
 // sweep phase
 
-int64_t lazy_freed_pages = 0;
+gc_fragmentation_stat_t gc_page_fragmentation_stats[JL_GC_N_POOLS];
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_update_page_fragmentation_data(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    gc_fragmentation_stat_t *stats = &gc_page_fragmentation_stats[pg->pool_n];
+    jl_atomic_fetch_add_relaxed(&stats->n_freed_objs, pg->nfree);
+    jl_atomic_fetch_add_relaxed(&stats->n_pages_allocd, 1);
+}
+
+STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
+        gc_fragmentation_stat_t *stats = &gc_page_fragmentation_stats[i];
+        double utilization = 1.0;
+        size_t n_freed_objs = jl_atomic_load_relaxed(&stats->n_freed_objs);
+        size_t n_pages_allocd = jl_atomic_load_relaxed(&stats->n_pages_allocd);
+        if (n_pages_allocd != 0) {
+            utilization -= ((double)n_freed_objs * (double)jl_gc_sizeclasses[i]) / (double)n_pages_allocd / (double)GC_PAGE_SZ;
+        }
+        jl_gc_page_utilization_stats[i] = utilization;
+        jl_atomic_store_relaxed(&stats->n_freed_objs, 0);
+        jl_atomic_store_relaxed(&stats->n_pages_allocd, 0);
+    }
+}
 
-// Returns pointer to terminal pointer of list rooted at *pfl.
-static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd,
-                                        jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
+// Walks over a page, reconstruting the free lists if the page contains at least one live object. If not,
+// queues up the page for later decommit (i.e. through `madvise` on Unix).
+static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
 {
     char *data = pg->data;
-    jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+    jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
     char *lim = data + GC_PAGE_SZ - osize;
     char *lim_newpages = data + GC_PAGE_SZ;
     if (gc_page_data((char*)p->newpages - 1) == data) {
@@ -1353,51 +857,56 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
     }
     size_t old_nfree = pg->nfree;
     size_t nfree;
+    // avoid loading a global variable in the hot path
+    int page_profile_enabled = gc_page_profile_is_enabled();
+    gc_page_serializer_init(s, pg);
 
     int re_use_page = 1;
-    int freed_lazily = 0;
     int freedall = 1;
     int pg_skpd = 1;
     if (!pg->has_marked) {
         re_use_page = 0;
-    #ifdef _P64 // TODO: re-enable on `_P32`?
-        // lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
-        // eager version: (freedall) free page as soon as possible
-        // the eager one uses less memory.
-        // FIXME - need to do accounting on a per-thread basis
-        // on quick sweeps, keep a few pages empty but allocated for performance
-        if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
-            lazy_freed_pages++;
-            freed_lazily = 1;
-        }
-    #endif
         nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
+        gc_page_profile_write_empty_page(s, page_profile_enabled);
         goto done;
     }
     // For quick sweep, we might be able to skip the page if the page doesn't
     // have any young live cell before marking.
-    if (!sweep_full && !pg->has_young) {
+    if (!current_sweep_full && !pg->has_young) {
         assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
         if (!prev_sweep_full || pg->prev_nold == pg->nold) {
-            // the position of the freelist begin/end in this page
-            // is stored in its metadata
-            if (pg->fl_begin_offset != (uint16_t)-1) {
-                *pfl = page_pfl_beg(pg);
-                pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
-            }
             freedall = 0;
             nfree = pg->nfree;
+            gc_page_profile_write_empty_page(s, page_profile_enabled);
             goto done;
         }
     }
 
     pg_skpd = 0;
-    {  // scope to avoid clang goto errors
+    {   // scope to avoid clang goto errors
         int has_marked = 0;
         int has_young = 0;
         int16_t prev_nold = 0;
         int pg_nfree = 0;
+        jl_taggedvalue_t *fl = NULL;
+        jl_taggedvalue_t **pfl = &fl;
         jl_taggedvalue_t **pfl_begin = NULL;
+        // collect page profile
+        jl_taggedvalue_t *v = v0;
+        if (page_profile_enabled) {
+            while ((char*)v <= lim) {
+                int bits = v->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    gc_page_profile_write_garbage(s, page_profile_enabled);
+                }
+                else {
+                    gc_page_profile_write_live_obj(s, v, page_profile_enabled);
+                }
+                v = (jl_taggedvalue_t*)((char*)v + osize);
+            }
+            v = v0;
+        }
+        // sweep the page
         while ((char*)v <= lim) {
             int bits = v->bits.gc;
             // if an object is past `lim_newpages` then we can guarantee it's garbage
@@ -1408,7 +917,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
                 pg_nfree++;
             }
             else { // marked young or old
-                if (sweep_full || bits == GC_MARKED) { // old enough
+                if (current_sweep_full || bits == GC_MARKED) { // old enough
                     bits = v->bits.gc = GC_OLD; // promote
                 }
                 prev_nold++;
@@ -1430,7 +939,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
         }
 
         pg->nfree = pg_nfree;
-        if (sweep_full) {
+        if (current_sweep_full) {
             pg->nold = 0;
             pg->prev_nold = prev_nold;
         }
@@ -1439,48 +948,185 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
 
 done:
     if (re_use_page) {
-        push_page_metadata_back(allocd, pg);
-    }
-    else if (freed_lazily) {
-        push_page_metadata_back(lazily_freed, pg);
+        push_lf_back(allocd, pg);
     }
     else {
-    #ifdef _P64 // only enable concurrent sweeping on 64bit
-        if (jl_n_sweepthreads == 0) {
-            jl_gc_free_page(pg);
-            push_lf_page_metadata_back(&global_page_pool_freed, pg);
-        }
-        else {
-            gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
-            push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg);
-        }
-    #else
-        jl_gc_free_page(pg);
-        push_lf_page_metadata_back(&global_page_pool_freed, pg);
-    #endif
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
+        gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
+        push_lf_back(&global_page_pool_lazily_freed, pg);
     }
+    gc_page_profile_write_to_file(s);
+    gc_update_page_fragmentation_data(pg);
     gc_time_count_page(freedall, pg_skpd);
-    gc_num.freed += (nfree - old_nfree) * osize;
-    return pfl;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    // Note that we aggregate the `pool_live_bytes` over all threads before returning this
+    // value to the user. It doesn't matter how the `pool_live_bytes` are partitioned among
+    // the threads as long as the sum is correct. Let's add the `pool_live_bytes` to the current thread
+    // instead of adding it to the thread that originally allocated the page, so we can avoid
+    // an atomic-fetch-add here.
+    size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta);
+    jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
 }
 
 // the actual sweeping over all allocated pages in a memory pool
-STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd,
-                                      jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
     jl_ptls_t ptls2 = gc_all_tls_states[t_n];
-    jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
+    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
     int osize = pg->osize;
-    pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize);
+    gc_sweep_page(s, p, allocd, pg, osize);
 }
 
 // sweep over all memory that is being used and not in a pool
 static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
-    sweep_malloced_arrays();
-    sweep_big(ptls, sweep_full);
+    uint64_t t_free_mallocd_memory_start = jl_hrtime();
+    gc_sweep_foreign_objs();
+    sweep_malloced_memory();
+    sweep_big(ptls);
+    uint64_t t_free_mallocd_memory_end = jl_hrtime();
+    gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start;
+    jl_engine_sweep(gc_all_tls_states);
+}
+
+// wake up all threads to sweep the stacks
+void gc_sweep_wake_all_stacks(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    uv_mutex_lock(&gc_threads_lock);
+    int first = gc_first_parallel_collector_thread_id();
+    int last = gc_last_parallel_collector_thread_id();
+    for (int i = first; i <= last; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        gc_check_ptls_of_parallel_collector_thread(ptls2);
+        jl_atomic_fetch_add(&ptls2->gc_tls.gc_stack_sweep_requested, 1);
+    }
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
+    return;
+}
+
+void gc_sweep_wait_for_all_stacks(void) JL_NOTSAFEPOINT
+{
+    while ((jl_atomic_load_acquire(&gc_ptls_sweep_idx) >= 0 ) || jl_atomic_load_acquire(&gc_n_threads_sweeping_stacks) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+extern const unsigned pool_sizes[];
+
+void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT
+{
+    // Stack sweeping algorithm:
+    //    // deallocate stacks if we have too many sitting around unused
+    //    for (stk in halfof(free_stacks))
+    //        free_stack(stk, pool_sz);
+    //    // then sweep the task stacks
+    //    for (t in live_tasks)
+    //        if (!gc-marked(t))
+    //            stkbuf = t->stkbuf
+    //            bufsz = t->bufsz
+    //            if (stkbuf)
+    //                push(free_stacks[sz], stkbuf)
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, 1);
+    while (1) {
+        int i = jl_atomic_fetch_add_relaxed(&gc_ptls_sweep_idx, -1);
+        if (i < 0)
+            break;
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL)
+            continue;
+        assert(gc_n_threads);
+        // free half of stacks that remain unused since last sweep
+        if (i == jl_atomic_load_relaxed(&gc_stack_free_idx)) {
+            for (int p = 0; p < JL_N_STACK_POOLS; p++) {
+                small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
+                size_t n_to_free;
+                if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                    n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
+                }
+                else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
+                    n_to_free = al->len / 2;
+                    if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
+                        n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
+                }
+                else {
+                    n_to_free = 0;
+                }
+                for (int n = 0; n < n_to_free; n++) {
+                    void *stk = small_arraylist_pop(al);
+                    free_stack(stk, pool_sizes[p]);
+                }
+                if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                    small_arraylist_free(al);
+                }
+            }
+        }
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
+        }
+
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = 0;
+        size_t ndel = 0;
+        size_t l = live_tasks->len;
+        void **lst = live_tasks->items;
+        if (l == 0)
+            continue;
+        while (1) {
+            jl_task_t *t = (jl_task_t*)lst[n];
+            assert(jl_is_task(t));
+            if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
+                if (t->ctx.stkbuf == NULL)
+                    ndel++; // jl_release_task_stack called
+                else
+                    n++;
+            }
+            else {
+                ndel++;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
+                if (stkbuf) {
+                    t->ctx.stkbuf = NULL;
+                    _jl_free_stack(ptls2, stkbuf, bufsz);
+                }
+#ifdef _COMPILER_TSAN_ENABLED_
+                if (t->ctx.tsan_state) {
+                    __tsan_destroy_fiber(t->ctx.tsan_state);
+                    t->ctx.tsan_state = NULL;
+                }
+#endif
+            }
+            if (n >= l - ndel)
+                break;
+            void *tmp = lst[n];
+            lst[n] = lst[n + ndel];
+            lst[n + ndel] = tmp;
+        }
+        live_tasks->len -= ndel;
+    }
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_stacks, -1);
+}
+
+JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // initialize ptls index for parallel sweeping of stack pools
+    assert(gc_n_threads);
+    int stack_free_idx = jl_atomic_load_relaxed(&gc_stack_free_idx);
+    if (stack_free_idx + 1 == gc_n_threads)
+        jl_atomic_store_relaxed(&gc_stack_free_idx, 0);
+    else
+        jl_atomic_store_relaxed(&gc_stack_free_idx, stack_free_idx + 1);
+    jl_atomic_store_release(&gc_ptls_sweep_idx, gc_n_threads - 1); // idx == gc_n_threads = release stacks to the OS so it's serial
+    uv_mutex_lock(&live_tasks_lock);
+    gc_sweep_wake_all_stacks(ptls);
+    sweep_stack_pool_loop();
+    gc_sweep_wait_for_all_stacks();
+    sweep_mtarraylist_buffers();
+    uv_mutex_unlock(&live_tasks_lock);
 }
 
 static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
@@ -1499,11 +1145,202 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_
     pg->nfree = nfree;
 }
 
+// pre-scan pages to check whether there are enough pages so that's worth parallelizing
+// also sweeps pages that don't need to be linearly scanned
+int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch)
+{
+    // 4MB worth of pages is worth parallelizing
+    const int n_pages_worth_parallel_sweep = (int)(4 * (1 << 20) / GC_PAGE_SZ);
+    int n_pages_to_scan = 0;
+    gc_page_profiler_serializer_t serializer = gc_page_serializer_create();
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *dest = &new_gc_allocd_scratch[ptls2->tid].stack;
+        jl_gc_page_stack_t tmp;
+        jl_gc_pagemeta_t *tail = NULL;
+        memset(&tmp, 0, sizeof(tmp));
+        while (1) {
+            jl_gc_pagemeta_t *pg = pop_lf_back_nosync(&ptls2->gc_tls.page_metadata_allocd);
+            if (pg == NULL) {
+                break;
+            }
+            int should_scan = 1;
+            if (!pg->has_marked) {
+                should_scan = 0;
+            }
+            if (!current_sweep_full && !pg->has_young) {
+                assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
+                if (!prev_sweep_full || pg->prev_nold == pg->nold) {
+                    should_scan = 0;
+                }
+            }
+            if (should_scan) {
+                if (tail == NULL) {
+                    tail = pg;
+                }
+                n_pages_to_scan++;
+                push_lf_back_nosync(&tmp, pg);
+            }
+            else {
+                gc_sweep_pool_page(&serializer, dest, pg);
+            }
+            if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
+                break;
+            }
+        }
+        if (tail != NULL) {
+            tail->next = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
+        }
+        ptls2->gc_tls.page_metadata_allocd = tmp;
+        if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
+            break;
+        }
+    }
+    gc_page_serializer_destroy(&serializer);
+    return n_pages_to_scan >= n_pages_worth_parallel_sweep;
+}
+
+// wake up all threads to sweep the pages
+void gc_sweep_wake_all_pages(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch)
+{
+    int parallel_sweep_worthwhile = gc_sweep_prescan(ptls, new_gc_allocd_scratch);
+    if (parallel_sweep_worthwhile && !page_profile_enabled) {
+        jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
+        uv_mutex_lock(&gc_threads_lock);
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_atomic_fetch_add(&ptls2->gc_tls.gc_sweeps_requested, 1);
+        }
+        uv_cond_broadcast(&gc_threads_cond);
+        uv_mutex_unlock(&gc_threads_lock);
+        return;
+    }
+    if (page_profile_enabled) {
+        // we need to ensure that no threads are running sweeping when
+        // collecting a page profile.
+        // wait for all to leave in order to ensure that a straggler doesn't
+        // try to enter sweeping after we set `gc_allocd_scratch` below.
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            while (jl_atomic_load_acquire(&ptls2->gc_tls.gc_sweeps_requested) != 0) {
+                jl_cpu_pause();
+            }
+        }
+    }
+    jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch);
+}
+
+// wait for all threads to finish sweeping
+void gc_sweep_wait_for_all_pages(void)
+{
+    jl_atomic_store(&gc_allocd_scratch, NULL);
+    while (jl_atomic_load_acquire(&gc_n_threads_sweeping_pools) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+// sweep all pools
+void gc_sweep_pool_parallel(jl_ptls_t ptls)
+{
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_pools, 1);
+    jl_gc_padded_page_stack_t *allocd_scratch = jl_atomic_load(&gc_allocd_scratch);
+    if (allocd_scratch != NULL) {
+        gc_page_profiler_serializer_t serializer = gc_page_serializer_create();
+        while (1) {
+            int found_pg = 0;
+            // sequentially walk the threads and sweep the pages
+            for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+                jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+                // skip foreign threads that already exited
+                if (ptls2 == NULL) {
+                    continue;
+                }
+                jl_gc_page_stack_t *dest = &allocd_scratch[ptls2->tid].stack;
+                jl_gc_pagemeta_t *pg = try_pop_lf_back(&ptls2->gc_tls.page_metadata_allocd);
+                // failed steal attempt
+                if (pg == NULL) {
+                    continue;
+                }
+                gc_sweep_pool_page(&serializer, dest, pg);
+                found_pg = 1;
+            }
+            if (!found_pg) {
+                // check for termination
+                int no_more_work = 1;
+                for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+                    jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+                    // skip foreign threads that already exited
+                    if (ptls2 == NULL) {
+                        continue;
+                    }
+                    jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
+                    if (pg != NULL) {
+                        no_more_work = 0;
+                        break;
+                    }
+                }
+                if (no_more_work) {
+                    break;
+                }
+            }
+            jl_cpu_pause();
+        }
+        gc_page_serializer_destroy(&serializer);
+    }
+    jl_atomic_fetch_add(&gc_n_threads_sweeping_pools, -1);
+}
+
+// free all pages (i.e. through `madvise` on Linux) that were lazily freed
+void gc_free_pages(void)
+{
+    size_t n_pages_seen = 0;
+    jl_gc_page_stack_t tmp;
+    memset(&tmp, 0, sizeof(tmp));
+    while (1) {
+        jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed);
+        if (pg == NULL) {
+            break;
+        }
+        n_pages_seen++;
+        // keep the last few pages around for a while
+        if (n_pages_seen * GC_PAGE_SZ <= default_collect_interval) {
+            push_lf_back(&tmp, pg);
+            continue;
+        }
+        jl_gc_free_page(pg);
+        push_lf_back(&global_page_pool_freed, pg);
+    }
+    // If concurrent page sweeping is disabled, then `gc_free_pages` will be called in the stop-the-world
+    // phase. We can guarantee, therefore, that there won't be any concurrent modifications to
+    // `global_page_pool_lazily_freed`, so it's safe to assign `tmp` back to `global_page_pool_lazily_freed`.
+    // Otherwise, we need to use the thread-safe push_lf_back/pop_lf_back functions.
+    if (jl_n_sweepthreads == 0) {
+        global_page_pool_lazily_freed = tmp;
+    }
+    else {
+        while (1) {
+            jl_gc_pagemeta_t *pg = pop_lf_back(&tmp);
+            if (pg == NULL) {
+                break;
+            }
+            push_lf_back(&global_page_pool_lazily_freed, pg);
+        }
+    }
+}
+
 // setup the data-structures for a sweep over all memory pools
-static void gc_sweep_pool(int sweep_full)
+static void gc_sweep_pool(void)
 {
     gc_time_pool_start();
-    lazy_freed_pages = 0;
 
     // For the benefit of the analyzer, which doesn't know that gc_n_threads
     // doesn't change over the course of this function
@@ -1511,7 +1348,7 @@ static void gc_sweep_pool(int sweep_full)
 
     // allocate enough space to hold the end of the free list chain
     // for every thread and pool size
-    jl_taggedvalue_t ***pfl = (jl_taggedvalue_t ***) alloca(n_threads * JL_GC_N_POOLS * sizeof(jl_taggedvalue_t**));
+    jl_taggedvalue_t ***pfl = (jl_taggedvalue_t ***) malloc_s(n_threads * JL_GC_N_POOLS * sizeof(jl_taggedvalue_t**));
 
     // update metadata of pages that were pointed to by freelist or newpages from a pool
     // i.e. pages being the current allocation target
@@ -1523,8 +1360,9 @@ static void gc_sweep_pool(int sweep_full)
             }
             continue;
         }
+        jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0);
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
-            jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
             if (last != NULL) {
                 jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last));
@@ -1542,49 +1380,83 @@ static void gc_sweep_pool(int sweep_full)
                 pg->nfree = (GC_PAGE_SZ - (last_p - gc_page_data(last_p - 1))) / p->osize;
                 pg->has_young = 1;
             }
-            p->newpages = NULL;
-        }
-        jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed;
-        while (pg != NULL) {
-            jl_gc_pagemeta_t *pg2 = pg->next;
-            lazy_freed_pages++;
-            pg = pg2;
         }
     }
 
-    // the actual sweeping
-    for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            jl_gc_pagemeta_t *allocd = NULL;
-            jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
+    uint64_t t_page_walk_start = jl_hrtime();
+    {
+        // the actual sweeping
+        jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
+        jl_ptls_t ptls = jl_current_task->ptls;
+        gc_sweep_wake_all_pages(ptls, new_gc_allocd_scratch);
+        gc_sweep_pool_parallel(ptls);
+        gc_sweep_wait_for_all_pages();
+
+        // reset half-pages pointers
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL) {
+                ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
+                for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                    jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
+                    p->newpages = NULL;
+                }
+            }
+        }
+
+        // merge free lists
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 == NULL) {
+                continue;
+            }
+            jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
             while (pg != NULL) {
                 jl_gc_pagemeta_t *pg2 = pg->next;
-                gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full);
+                if (pg->fl_begin_offset != UINT16_MAX) {
+                    char *cur_pg = pg->data;
+                    jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
+                    jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
+                    *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
+                    pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
+                }
                 pg = pg2;
             }
-            ptls2->page_metadata_allocd = allocd;
         }
-    }
 
-    // null out terminal pointers of free lists
-    for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            for (int i = 0; i < JL_GC_N_POOLS; i++) {
-                *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+        // null out terminal pointers of free lists
+        for (int t_i = 0; t_i < n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL) {
+                for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                    *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+                }
             }
         }
+
+        // cleanup
+        free(pfl);
+        free(new_gc_allocd_scratch);
     }
+    uint64_t t_page_walk_end = jl_hrtime();
+    gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start;
 
 #ifdef _P64 // only enable concurrent sweeping on 64bit
     // wake thread up to sweep concurrently
     if (jl_n_sweepthreads > 0) {
         uv_sem_post(&gc_sweep_assists_needed);
     }
+    else {
+        uint64_t t_madvise_start = jl_hrtime();
+        gc_free_pages();
+        uint64_t t_madvise_end = jl_hrtime();
+        gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start;
+    }
+#else
+    gc_free_pages();
 #endif
-
-    gc_time_pool_end(sweep_full);
+    gc_dump_page_utilization_data();
+    gc_time_pool_end(current_sweep_full);
 }
 
 static void gc_sweep_perm_alloc(void)
@@ -1600,20 +1472,19 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *o = jl_astaggedvalue(ptr);
-    // The modification of the `gc_bits` is not atomic but it
-    // should be safe here since GC is not allowed to run here and we only
-    // write GC_OLD to the GC bits outside GC. This could cause
-    // duplicated objects in the remset but that shouldn't be a problem.
-    o->bits.gc = GC_MARKED;
-    arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
-    ptls->heap.remset_nptr++; // conservative
+    // The modification of the `gc_bits` needs to be atomic.
+    // We need to ensure that objects are in the remset at
+    // most once, since the mark phase may update page metadata,
+    // which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
+    uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
+    if (header & GC_OLD) { // write barrier has not been triggered in this object yet
+        arraylist_push(&ptls->gc_tls.heap.remset, (jl_value_t*)ptr);
+        ptls->gc_tls.heap.remset_nptr++; // conservative
+    }
 }
 
-void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
+void jl_gc_queue_multiroot(const jl_value_t *parent, const void *ptr, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
-    // first check if this is really necessary
-    // TODO: should we store this info in one of the extra gc bits?
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
     const jl_datatype_layout_t *ly = dt->layout;
     uint32_t npointers = ly->npointers;
     //if (npointers == 0) // this was checked by the caller
@@ -1629,14 +1500,14 @@ void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_N
     const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
     for (size_t i = 1; i < npointers; i++) {
         uint32_t fld;
-        if (ly->fielddesc_type == 0) {
+        if (ly->flags.fielddesc_type == 0) {
             fld = ptrs8[i];
         }
-        else if (ly->fielddesc_type == 1) {
+        else if (ly->flags.fielddesc_type == 1) {
             fld = ptrs16[i];
         }
         else {
-            assert(ly->fielddesc_type == 2);
+            assert(ly->flags.fielddesc_type == 2);
             fld = ptrs32[i];
         }
         jl_value_t *ptrf = ((jl_value_t**)ptr)[fld];
@@ -1673,7 +1544,7 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
 
 STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *child) JL_NOTSAFEPOINT
 {
-#ifdef GC_ASSERT_PARENT_VALIDITY
+#if defined(GC_VERIFY) || defined(GC_ASSERT_PARENT_VALIDITY)
     jl_taggedvalue_t *child_astagged = jl_astaggedvalue(child);
     jl_taggedvalue_t *child_vtag = (jl_taggedvalue_t *)(child_astagged->header & ~(uintptr_t)0xf);
     uintptr_t child_vt = (uintptr_t)child_vtag;
@@ -1710,8 +1581,8 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
                                        uintptr_t nptr) JL_NOTSAFEPOINT
 {
     if (__unlikely((nptr & 0x3) == 0x3)) {
-        ptls->heap.remset_nptr += nptr >> 2;
-        arraylist_t *remset = ptls->heap.remset;
+        ptls->gc_tls.heap.remset_nptr += nptr >> 2;
+        arraylist_t *remset = &ptls->gc_tls.heap.remset;
         size_t len = remset->len;
         if (__unlikely(len >= remset->max)) {
             arraylist_push(remset, obj);
@@ -1726,6 +1597,10 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
 // Push a work item to the queue
 STATIC_INLINE void gc_ptr_queue_push(jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT
 {
+#ifdef JL_DEBUG_BUILD
+    if (obj == gc_findval)
+        jl_raise_debugger();
+#endif
     ws_array_t *old_a = ws_queue_push(&mq->ptr_queue, &obj, sizeof(jl_value_t*));
     // Put `old_a` in `reclaim_set` to be freed after the mark phase
     if (__unlikely(old_a != NULL))
@@ -1775,7 +1650,7 @@ JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t
     if (jl_n_gcthreads == 0) {
         jl_safe_printf("\n");
         jl_value_t *new_obj;
-        jl_gc_markqueue_t *mq = &ptls->mark_queue;
+        jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
         jl_safe_printf("thread %d ptr queue:\n", ptls->tid);
         jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n");
         while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) {
@@ -1814,7 +1689,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_
                          uint8_t *obj8_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj8_begin < obj8_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj8_begin < obj8_end; obj8_begin++) {
@@ -1846,7 +1721,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint
                           uint16_t *obj16_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj16_begin < obj16_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj16_begin < obj16_end; obj16_begin++) {
@@ -1878,7 +1753,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
                           uint32_t *obj32_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
     (void)jl_assume(obj32_begin < obj32_end);
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t **slot = NULL;
     jl_value_t *new_obj = NULL;
     for (; obj32_begin < obj32_end; obj32_begin++) {
@@ -1909,16 +1784,18 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
 STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
                       jl_value_t **obj_end, uint32_t step, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     // Decide whether need to chunk objary
+    assert(step > 0);
     (void)jl_assume(step > 0);
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
         // the first young object before starting this chunk
         // (this also would be valid for young objects, but probably less beneficial)
         for (; obj_begin < obj_end; obj_begin += step) {
-            new_obj = *obj_begin;
+            jl_value_t **slot = obj_begin;
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
                                gc_slot_to_arrayidx(obj_parent, obj_begin));
@@ -1927,7 +1804,7 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
                     nptr |= 1;
                 if (!gc_marked(o->header))
                     break;
-                gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(obj_parent, slot);
             }
         }
     }
@@ -1949,13 +1826,14 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
         }
     }
     for (; obj_begin < scan_end; obj_begin += step) {
+        jl_value_t **slot = obj_begin;
         new_obj = *obj_begin;
         if (new_obj != NULL) {
             verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
                         gc_slot_to_arrayidx(obj_parent, obj_begin));
             gc_assert_parent_validity(obj_parent, new_obj);
             gc_try_claim_and_push(mq, new_obj, &nptr);
-            gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
+            gc_heap_snapshot_record_array_edge(obj_parent, slot);
         }
     }
     if (too_big) {
@@ -1970,14 +1848,14 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
 }
 
 // Mark array with 8bit field descriptors
-STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
-                    jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end,
+STATIC_INLINE void gc_mark_memory8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
+                    jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end, uintptr_t elsize,
                     uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
-    size_t elsize = ((jl_array_t *)ary8_parent)->elsize / sizeof(jl_value_t *);
     assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
     // Decide whether need to chunk objary
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
@@ -1986,7 +1864,8 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
         for (; ary8_begin < ary8_end; ary8_begin += elsize) {
             int early_end = 0;
             for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-                new_obj = ary8_begin[*pindex];
+                jl_value_t **slot = &ary8_begin[*pindex];
+                new_obj = *slot;
                 if (new_obj != NULL) {
                     verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
                                 gc_slot_to_arrayidx(ary8_parent, ary8_begin));
@@ -1997,7 +1876,7 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
                         early_end = 1;
                         break;
                     }
-                    gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
+                    gc_heap_snapshot_record_array_edge(ary8_parent, slot);
                 }
             }
             if (early_end)
@@ -2016,26 +1895,27 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
         // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
         // scanning the array elements
         if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
-            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
             gc_chunkqueue_push(mq, &c);
             pushed_chunk = 1;
         }
     }
-    for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+    for (; ary8_begin < scan_end; ary8_begin += elsize) {
         for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-            new_obj = ary8_begin[*pindex];
+            jl_value_t **slot = &ary8_begin[*pindex];
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
                                gc_slot_to_arrayidx(ary8_parent, ary8_begin));
                 gc_assert_parent_validity(ary8_parent, new_obj);
                 gc_try_claim_and_push(mq, new_obj, &nptr);
-                gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(ary8_parent, slot);
             }
         }
     }
     if (too_big) {
         if (!pushed_chunk) {
-            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
             gc_chunkqueue_push(mq, &c);
         }
     }
@@ -2045,14 +1925,14 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
 }
 
 // Mark array with 16bit field descriptors
-STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_value_t **ary16_begin,
-                     jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end,
+STATIC_INLINE void gc_mark_memory16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_value_t **ary16_begin,
+                     jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end, size_t elsize,
                      uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
-    size_t elsize = ((jl_array_t *)ary16_parent)->elsize / sizeof(jl_value_t *);
     assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
     // Decide whether need to chunk objary
     if ((nptr & 0x2) == 0x2) {
         // pre-scan this object: most of this object should be old, so look for
@@ -2061,7 +1941,8 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
         for (; ary16_begin < ary16_end; ary16_begin += elsize) {
             int early_end = 0;
             for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-                new_obj = ary16_begin[*pindex];
+                jl_value_t **slot = &ary16_begin[*pindex];
+                new_obj = *slot;
                 if (new_obj != NULL) {
                     verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
                                 gc_slot_to_arrayidx(ary16_parent, ary16_begin));
@@ -2072,7 +1953,7 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
                         early_end = 1;
                         break;
                     }
-                    gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
+                    gc_heap_snapshot_record_array_edge(ary16_parent, slot);
                 }
             }
             if (early_end)
@@ -2098,13 +1979,14 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
     }
     for (; ary16_begin < scan_end; ary16_begin += elsize) {
         for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
-            new_obj = ary16_begin[*pindex];
+            jl_value_t **slot = &ary16_begin[*pindex];
+            new_obj = *slot;
             if (new_obj != NULL) {
                 verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
                                gc_slot_to_arrayidx(ary16_parent, ary16_begin));
                 gc_assert_parent_validity(ary16_parent, new_obj);
                 gc_try_claim_and_push(mq, new_obj, &nptr);
-                gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
+                gc_heap_snapshot_record_array_edge(ary16_parent, slot);
             }
         }
     }
@@ -2129,8 +2011,8 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
             jl_value_t **obj_end = c->end;
             uint32_t step = c->step;
             uintptr_t nptr = c->nptr;
-            gc_mark_objarray(ptls, obj_parent, obj_begin, obj_end, step,
-                             nptr);
+            gc_mark_objarray(ptls, obj_parent, obj_begin, obj_end,
+                             step, nptr);
             break;
         }
         case GC_ary8_chunk: {
@@ -2139,9 +2021,10 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
             jl_value_t **ary8_end = c->end;
             uint8_t *elem_begin = (uint8_t *)c->elem_begin;
             uint8_t *elem_end = (uint8_t *)c->elem_end;
+            size_t elsize = c->step;
             uintptr_t nptr = c->nptr;
-            gc_mark_array8(ptls, ary8_parent, ary8_begin, ary8_end, elem_begin, elem_end,
-                           nptr);
+            gc_mark_memory8(ptls, ary8_parent, ary8_begin, ary8_end, elem_begin, elem_end,
+                           elsize, nptr);
             break;
         }
         case GC_ary16_chunk: {
@@ -2150,20 +2033,22 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
             jl_value_t **ary16_end = c->end;
             uint16_t *elem_begin = (uint16_t *)c->elem_begin;
             uint16_t *elem_end = (uint16_t *)c->elem_end;
+            size_t elsize = c->step;
             uintptr_t nptr = c->nptr;
-            gc_mark_array16(ptls, ary16_parent, ary16_begin, ary16_end, elem_begin, elem_end,
-                            nptr);
+            gc_mark_memory16(ptls, ary16_parent, ary16_begin, ary16_end, elem_begin, elem_end,
+                            elsize, nptr);
             break;
         }
         case GC_finlist_chunk: {
+            jl_value_t *fl_parent = c->parent;
             jl_value_t **fl_begin = c->begin;
             jl_value_t **fl_end = c->end;
-            gc_mark_finlist_(mq, fl_begin, fl_end);
+            gc_mark_finlist_(mq, fl_parent, fl_begin, fl_end);
             break;
         }
         default: {
             // `empty-chunk` should be checked by caller
-            jl_safe_printf("GC internal error: chunk mismatch cid=%d\n", c->cid);
+            jl_safe_printf("GC internal error: chunk mismatch\n");
             abort();
         }
     }
@@ -2173,7 +2058,7 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
 STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots, uintptr_t offset,
                    uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     uint32_t nr = nroots >> 2;
     while (1) {
@@ -2218,7 +2103,7 @@ STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroot
 // Mark exception stack
 STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, size_t itr) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *new_obj;
     while (itr > 0) {
         size_t bt_size = jl_excstack_bt_size(excstack, itr);
@@ -2246,44 +2131,38 @@ STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, siz
 }
 
 // Mark module binding
-STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, jl_binding_t **mb_begin,
-                            jl_binding_t **mb_end, uintptr_t nptr,
+STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, uintptr_t nptr,
                             uint8_t bits) JL_NOTSAFEPOINT
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    for (; mb_begin < mb_end; mb_begin++) {
-        jl_binding_t *b = *mb_begin;
-        if (b == (jl_binding_t *)jl_nothing)
-            continue;
-        verify_parent1("module", mb_parent, mb_begin, "binding_buff");
-        gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)b);
-        gc_try_claim_and_push(mq, b, &nptr);
-    }
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings);
     gc_assert_parent_validity((jl_value_t *)mb_parent, bindings);
     gc_try_claim_and_push(mq, bindings, &nptr);
     jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset);
     gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset);
     gc_try_claim_and_push(mq, bindingkeyset, &nptr);
+    gc_heap_snapshot_record_module_to_binding(mb_parent, bindings, bindingkeyset);
     gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent);
     gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr);
-    size_t nusings = mb_parent->usings.len;
+    size_t nusings = module_usings_length(mb_parent);
     if (nusings > 0) {
         // this is only necessary because bindings for "using" modules
         // are added only when accessed. therefore if a module is replaced
         // after "using" it but before accessing it, this array might
         // contain the only reference.
         jl_value_t *obj_parent = (jl_value_t *)mb_parent;
-        jl_value_t **objary_begin = (jl_value_t **)mb_parent->usings.items;
-        jl_value_t **objary_end = objary_begin + nusings;
-        gc_mark_objarray(ptls, obj_parent, objary_begin, objary_end, 1, nptr);
+        struct _jl_module_using *objary_begin = (struct _jl_module_using *)mb_parent->usings.items;
+        struct _jl_module_using *objary_end = objary_begin + nusings;
+        static_assert(sizeof(struct _jl_module_using) == 3*sizeof(void *), "Mismatch in _jl_module_using size");
+        static_assert(offsetof(struct _jl_module_using, mod) == 0, "Expected `mod` at the beginning of _jl_module_using");
+        gc_mark_objarray(ptls, obj_parent, (jl_value_t**)objary_begin, (jl_value_t**)objary_end, 3, nptr);
     }
     else {
         gc_mark_push_remset(ptls, (jl_value_t *)mb_parent, nptr);
     }
 }
 
-void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end)
+void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end)
 {
     jl_value_t *new_obj;
     // Decide whether need to chunk finlist
@@ -2293,8 +2172,10 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
         gc_chunkqueue_push(mq, &c);
         fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
     }
+    size_t i = 0;
     for (; fl_begin < fl_end; fl_begin++) {
-        new_obj = *fl_begin;
+        jl_value_t **slot = fl_begin;
+        new_obj = *slot;
         if (__unlikely(new_obj == NULL))
             continue;
         if (gc_ptr_tag(new_obj, 1)) {
@@ -2305,6 +2186,13 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
         if (gc_ptr_tag(new_obj, 2))
             continue;
         gc_try_claim_and_push(mq, new_obj, NULL);
+        if (fl_parent != NULL) {
+            gc_heap_snapshot_record_array_edge(fl_parent, slot);
+        } else {
+            // This is a list of objects following the same format as a finlist
+            // if `fl_parent` is NULL
+            gc_heap_snapshot_record_finlist(new_obj, ++i);
+        }
     }
 }
 
@@ -2316,14 +2204,14 @@ void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start)
         return;
     jl_value_t **fl_begin = (jl_value_t **)list->items + start;
     jl_value_t **fl_end = (jl_value_t **)list->items + len;
-    gc_mark_finlist_(mq, fl_begin, fl_end);
+    gc_mark_finlist_(mq, NULL, fl_begin, fl_end);
 }
 
 JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
 {
     int may_claim = gc_try_setmark_tag(jl_astaggedvalue(obj), GC_MARKED);
     if (may_claim)
-        gc_ptr_queue_push(&ptls->mark_queue, obj);
+        gc_ptr_queue_push(&ptls->gc_tls.mark_queue, obj);
     return may_claim;
 }
 
@@ -2334,24 +2222,18 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
     gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
 }
 
-// Enqueue and mark all outgoing references from `new_obj` which have not been marked
-// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
-// objects which have been enqueued into the `remset`
-FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
-                              int meta_updated)
+// Enqueue and mark all outgoing references from `new_obj` which have not been marked yet.
+// `_new_obj` has its lowest bit tagged if it's in the remset (in which case we shouldn't update page metadata)
+FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj)
 {
-    jl_value_t *new_obj = (jl_value_t *)_new_obj;
+    int meta_updated = (uintptr_t)_new_obj & GC_REMSET_PTR_TAG;
+    jl_value_t *new_obj = (jl_value_t *)((uintptr_t)_new_obj & ~(uintptr_t)GC_REMSET_PTR_TAG);
     mark_obj: {
-    #ifdef JL_DEBUG_BUILD
-        if (new_obj == gc_findval)
-            jl_raise_debugger();
-    #endif
         jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
         uintptr_t vtag = o->header & ~(uintptr_t)0xf;
         uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
         int update_meta = __likely(!meta_updated && !gc_verifying);
         int foreign_alloc = 0;
-        // directly point at eyt_obj_in_img to encourage inlining
         if (update_meta && o->bits.in_image) {
             foreign_alloc = 1;
             update_meta = 0;
@@ -2365,7 +2247,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             vtag == (jl_vararg_tag << 4)) {
             // these objects have pointers in them, but no other special handling
             // so we want these to fall through to the end
-            vtag = (uintptr_t)small_typeof[vtag / sizeof(*small_typeof)];
+            vtag = (uintptr_t)ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
         }
         else if (vtag < jl_max_tags << 4) {
             // these objects either have specialing handling
@@ -2375,8 +2257,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
                 jl_value_t *objary_parent = new_obj;
                 jl_value_t **objary_begin = data;
                 jl_value_t **objary_end = data + l;
@@ -2387,22 +2267,13 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             else if (vtag == jl_module_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_module_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
                 jl_module_t *mb_parent = (jl_module_t *)new_obj;
-                jl_svec_t *bindings = jl_atomic_load_relaxed(&mb_parent->bindings);
-                jl_binding_t **table = (jl_binding_t**)jl_svec_data(bindings);
-                size_t bsize = jl_svec_len(bindings);
-                uintptr_t nptr = ((bsize + mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
-                jl_binding_t **mb_begin = table + 1;
-                jl_binding_t **mb_end = table + bsize;
-                gc_mark_module_binding(ptls, mb_parent, mb_begin, mb_end, nptr, bits);
+                uintptr_t nptr = ((module_usings_length(mb_parent) + 1) << 2) | (bits & GC_OLD);
+                gc_mark_module_binding(ptls, mb_parent, nptr, bits);
             }
             else if (vtag == jl_task_tag << 4) {
                 if (update_meta)
                     gc_setmark(ptls, o, bits, sizeof(jl_task_t));
-                else if (foreign_alloc)
-                    objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
                 jl_task_t *ta = (jl_task_t *)new_obj;
                 gc_scrub_record_task(ta);
                 if (gc_cblist_task_scanner) {
@@ -2411,9 +2282,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                                         (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
                 }
         #ifdef COPY_STACKS
-                void *stkbuf = ta->stkbuf;
-                if (stkbuf && ta->copy_stack) {
-                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                void *stkbuf = ta->ctx.stkbuf;
+                if (stkbuf && ta->ctx.copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz);
                     // For gc_heap_snapshot_record:
                     // TODO: attribute size of stack
                     // TODO: edge to stack data
@@ -2426,12 +2297,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 uintptr_t lb = 0;
                 uintptr_t ub = (uintptr_t)-1;
         #ifdef COPY_STACKS
-                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                if (stkbuf && ta->ctx.copy_stack && !ta->ptls) {
                     int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                     assert(tid >= 0);
                     jl_ptls_t ptls2 = gc_all_tls_states[tid];
                     ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
+                    lb = ub - ta->ctx.copy_stack;
                     offset = (uintptr_t)stkbuf - lb;
                 }
         #endif
@@ -2451,7 +2322,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_mark_excstack(ptls, excstack, itr);
                 }
                 const jl_datatype_layout_t *layout = jl_task_type->layout;
-                assert(layout->fielddesc_type == 0);
+                assert(layout->flags.fielddesc_type == 0);
                 assert(layout->nfields > 0);
                 uint32_t npointers = layout->npointers;
                 char *obj8_parent = (char *)ta;
@@ -2471,16 +2342,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
             }
             else {
-                jl_datatype_t *vt = small_typeof[vtag / sizeof(*small_typeof)];
+                jl_datatype_t *vt = ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
                 size_t dtsz = jl_datatype_size(vt);
                 if (update_meta)
                     gc_setmark(ptls, o, bits, dtsz);
-                else if (foreign_alloc)
-                    objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
             }
             return;
         }
@@ -2490,72 +2357,59 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 gc_dump_queue_and_abort(ptls, vt);
         }
         jl_datatype_t *vt = (jl_datatype_t *)vtag;
-        if (vt->name == jl_array_typename) {
-            jl_array_t *a = (jl_array_t *)new_obj;
-            jl_array_flags_t flags = a->flags;
+        if (vt->name == jl_genericmemory_typename) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)new_obj;
+            int pooled = 1; // The jl_genericmemory_t itself is always pooled-size, even with data attached to it
             if (update_meta) {
-                if (flags.pooled)
+                if (pooled)
                     gc_setmark_pool(ptls, o, bits);
                 else
                     gc_setmark_big(ptls, o, bits);
             }
-            else if (foreign_alloc) {
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t));
-            }
-            if (flags.how == 0) {
-                void *data_ptr = (char*)a + sizeof(jl_array_t) +jl_array_ndimwords(a->flags.ndims) * sizeof(size_t);
-                gc_heap_snapshot_record_hidden_edge(new_obj, data_ptr, jl_array_nbytes(a), 2);
+            int how = jl_genericmemory_how(m);
+            if (how == 0 || how == 2) {
+                gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, jl_genericmemory_nbytes(m), how == 0 ? 2 : 0);
             }
-            if (flags.how == 1) {
-                void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize);
-                verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)");
-                gc_heap_snapshot_record_hidden_edge(new_obj, jl_valueof(val_buf), jl_array_nbytes(a), flags.pooled);
-                (void)val_buf;
-                gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize,
-                                bits, jl_array_nbytes(a));
-            }
-            else if (flags.how == 2) {
+            else if (how == 1) {
                 if (update_meta || foreign_alloc) {
-                    objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
-                                     jl_array_nbytes(a));
-                    gc_heap_snapshot_record_hidden_edge(new_obj, a->data, jl_array_nbytes(a), flags.pooled);
+                    size_t nb = jl_genericmemory_nbytes(m);
+                    gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, nb, 0);
                     if (bits == GC_OLD_MARKED) {
-                        ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_tls.gc_cache.perm_scanned_bytes += nb;
                     }
                     else {
-                        ptls->gc_cache.scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_tls.gc_cache.scanned_bytes += nb;
                     }
                 }
             }
-            else if (flags.how == 3) {
-                jl_value_t *owner = jl_array_data_owner(a);
+            else if (how == 3) {
+                jl_value_t *owner = jl_genericmemory_data_owner_field(m);
                 uintptr_t nptr = (1 << 2) | (bits & GC_OLD);
                 gc_try_claim_and_push(mq, owner, &nptr);
                 gc_heap_snapshot_record_internal_array_edge(new_obj, owner);
                 gc_mark_push_remset(ptls, new_obj, nptr);
                 return;
             }
-            if (!a->data || jl_array_len(a) == 0)
+            if (m->length == 0)
                 return;
-            if (flags.ptrarray) {
-                if ((jl_datatype_t *)jl_tparam0(vt) == jl_symbol_type)
+            const jl_datatype_layout_t *layout = vt->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                if ((jl_datatype_t*)jl_tparam1(vt) == jl_symbol_type)
                     return;
-                size_t l = jl_array_len(a);
                 jl_value_t *objary_parent = new_obj;
-                jl_value_t **objary_begin = (jl_value_t **)a->data;
-                jl_value_t **objary_end = objary_begin + l;
+                jl_value_t **objary_begin = (jl_value_t **)m->ptr;
+                jl_value_t **objary_end = objary_begin + m->length;
                 uint32_t step = 1;
-                uintptr_t nptr = (l << 2) | (bits & GC_OLD);
+                uintptr_t nptr = (m->length << 2) | (bits & GC_OLD);
                 gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
             }
-            else if (flags.hasptr) {
-                jl_datatype_t *et = (jl_datatype_t *)jl_tparam0(vt);
-                const jl_datatype_layout_t *layout = et->layout;
+            else if (layout->first_ptr >= 0) {
+                const jl_datatype_layout_t *layout = vt->layout;
                 unsigned npointers = layout->npointers;
-                unsigned elsize = a->elsize / sizeof(jl_value_t *);
-                size_t l = jl_array_len(a);
+                unsigned elsize = layout->size / sizeof(jl_value_t*);
+                size_t l = m->length;
                 jl_value_t *objary_parent = new_obj;
-                jl_value_t **objary_begin = (jl_value_t **)a->data;
+                jl_value_t **objary_begin = (jl_value_t**)m->ptr;
                 jl_value_t **objary_end = objary_begin + l * elsize;
                 uint32_t step = elsize;
                 uintptr_t nptr = ((l * npointers) << 2) | (bits & GC_OLD);
@@ -2563,17 +2417,17 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     objary_begin += layout->first_ptr;
                     gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
                 }
-                else if (layout->fielddesc_type == 0) {
-                    uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
+                else if (layout->flags.fielddesc_type == 0) {
+                    uint8_t *obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
                     uint8_t *obj8_end = obj8_begin + npointers;
-                    gc_mark_array8(ptls, objary_parent, objary_begin, objary_end, obj8_begin,
-                                   obj8_end, nptr);
+                    gc_mark_memory8(ptls, objary_parent, objary_begin, objary_end, obj8_begin, obj8_end,
+                                   elsize, nptr);
                 }
-                else if (layout->fielddesc_type == 1) {
-                    uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
+                else if (layout->flags.fielddesc_type == 1) {
+                    uint16_t *obj16_begin = (uint16_t*)jl_dt_layout_ptrs(layout);
                     uint16_t *obj16_end = obj16_begin + npointers;
-                    gc_mark_array16(ptls, objary_parent, objary_begin, objary_end, obj16_begin,
-                                    obj16_end, nptr);
+                    gc_mark_memory16(ptls, objary_parent, objary_begin, objary_end, obj16_begin, obj16_end,
+                                    elsize, nptr);
                 }
                 else {
                     assert(0 && "unimplemented");
@@ -2584,8 +2438,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         size_t dtsz = jl_datatype_size(vt);
         if (update_meta)
             gc_setmark(ptls, o, bits, dtsz);
-        else if (foreign_alloc)
-            objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
         if (vt == jl_weakref_type)
             return;
         const jl_datatype_layout_t *layout = vt->layout;
@@ -2593,9 +2445,19 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         if (npointers == 0)
             return;
         uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
-        assert((layout->nfields > 0 || layout->fielddesc_type == 3) &&
+        if (vt == jl_binding_partition_type) {
+            // BindingPartition has a special union of jl_value_t and flag bits
+            // but is otherwise regular.
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_valueof(o);
+            jl_value_t *val = decode_restriction_value(
+                jl_atomic_load_relaxed(&bpart->restriction));
+            if (val)
+                gc_heap_snapshot_record_binding_partition_edge((jl_value_t*)bpart, val);
+            gc_try_claim_and_push(mq, val, &nptr);
+        }
+        assert((layout->nfields > 0 || layout->flags.fielddesc_type == 3) &&
                "opaque types should have been handled specially");
-        if (layout->fielddesc_type == 0) {
+        if (layout->flags.fielddesc_type == 0) {
             char *obj8_parent = (char *)new_obj;
             uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
             uint8_t *obj8_end = obj8_begin + npointers;
@@ -2608,7 +2470,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_ptr_queue_push(mq, new_obj);
             }
         }
-        else if (layout->fielddesc_type == 1) {
+        else if (layout->flags.fielddesc_type == 1) {
             char *obj16_parent = (char *)new_obj;
             uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
             uint16_t *obj16_end = obj16_begin + npointers;
@@ -2621,7 +2483,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     gc_ptr_queue_push(mq, new_obj);
             }
         }
-        else if (layout->fielddesc_type == 2) {
+        else if (layout->flags.fielddesc_type == 2) {
             // This is very uncommon
             // Do not do store to load forwarding to save some code size
             char *obj32_parent = (char *)new_obj;
@@ -2637,7 +2499,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             }
         }
         else {
-            assert(layout->fielddesc_type == 3);
+            assert(layout->flags.fielddesc_type == 3);
             jl_fielddescdyn_t *desc = (jl_fielddescdyn_t *)jl_dt_layout_fields(layout);
             int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
             uintptr_t young = desc->markfunc(ptls, new_obj);
@@ -2651,12 +2513,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
 void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 {
     while (1) {
-        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->mark_queue);
+        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->gc_tls.mark_queue);
         // No more objects to mark
         if (__unlikely(new_obj == NULL)) {
             return;
         }
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
     }
 }
 
@@ -2674,22 +2536,21 @@ void gc_drain_own_chunkqueue(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 }
 
 // Main mark loop. Stack (allocated on the heap) of `jl_value_t *`
-// is used to keep track of processed items. Maintaning this stack (instead of
+// is used to keep track of processed items. Maintaining this stack (instead of
 // native one) avoids stack overflow when marking deep objects and
 // makes it easier to implement parallel marking via work-stealing
 JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
 {
-    gc_mark_loop_serial_(ptls, &ptls->mark_queue);
-    gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
+    gc_mark_loop_serial_(ptls, &ptls->gc_tls.mark_queue);
+    gc_drain_own_chunkqueue(ptls, &ptls->gc_tls.mark_queue);
 }
 
 void gc_mark_and_steal(jl_ptls_t ptls)
 {
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    jl_gc_markqueue_t *mq_master = NULL;
     int master_tid = jl_atomic_load(&gc_master_tid);
-    if (master_tid != -1)
-        mq_master = &gc_all_tls_states[master_tid]->mark_queue;
+    assert(master_tid != -1);
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+    jl_gc_markqueue_t *mq_master = &gc_all_tls_states[master_tid]->gc_tls.mark_queue;
     void *new_obj;
     jl_gc_chunk_t c;
     pop : {
@@ -2705,18 +2566,22 @@ void gc_mark_and_steal(jl_ptls_t ptls)
         goto steal;
     }
     mark : {
-        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        gc_mark_outrefs(ptls, mq, new_obj);
         goto pop;
     }
     // Note that for the stealing heuristics, we try to
-    // steal chunks much more agressively than pointers,
+    // steal chunks much more aggressively than pointers,
     // since we know chunks will likely expand into a lot
     // of work for the mark loop
     steal : {
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
         // Try to steal chunk from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
@@ -2724,8 +2589,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
             }
         }
         // Sequentially walk GC threads to try to steal chunk
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             c = gc_chunkqueue_steal_from(mq2);
             if (c.cid != GC_empty_chunk) {
                 gc_mark_chunk(ptls, mq, &c);
@@ -2733,59 +2600,129 @@ void gc_mark_and_steal(jl_ptls_t ptls)
             }
         }
         // Try to steal chunk from master thread
-        if (mq_master != NULL) {
-            c = gc_chunkqueue_steal_from(mq_master);
-            if (c.cid != GC_empty_chunk) {
-                gc_mark_chunk(ptls, mq, &c);
-                goto pop;
-            }
+        c = gc_chunkqueue_steal_from(mq_master);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            goto pop;
         }
         // Try to steal pointer from random GC thread
         for (int i = 0; i < 4 * jl_n_markthreads; i++) {
-            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads;
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            int v = gc_random_parallel_collector_thread_id(ptls);
+            jl_ptls_t ptls2 = gc_all_tls_states[v];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
         // Sequentially walk GC threads to try to steal pointer
-        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
-            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            jl_gc_markqueue_t *mq2 = &ptls2->gc_tls.mark_queue;
             new_obj = gc_ptr_queue_steal_from(mq2);
             if (new_obj != NULL)
                 goto mark;
         }
         // Try to steal pointer from master thread
-        if (mq_master != NULL) {
-            new_obj = gc_ptr_queue_steal_from(mq_master);
-            if (new_obj != NULL)
-                goto mark;
+        new_obj = gc_ptr_queue_steal_from(mq_master);
+        if (new_obj != NULL)
+            goto mark;
+    }
+}
+
+size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    assert(ptls != NULL);
+    // assume each chunk is worth 256 units of work and each pointer
+    // is worth 1 unit of work
+    size_t work = 256 * (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.chunk_queue.top));
+    work += (jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->gc_tls.mark_queue.ptr_queue.top));
+    return work;
+}
+
+/**
+ * Correctness argument for the mark-loop termination protocol.
+ *
+ * Safety properties:
+ * - No work items shall be in any thread's queues when `gc_should_mark` observes
+ * that `gc_n_threads_marking` is zero.
+ *
+ * - No work item shall be stolen from the master thread (i.e. mutator thread which started
+ * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
+ * `gc_should_mark` observes that `gc_n_threads_marking` is zero. This property is
+ * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
+ * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
+ * and that no work is stolen from us at that point.
+ *
+ * Proof:
+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
+ * the mark-loop after `gc_n_threads_marking` reaches zero.
+ */
+
+int gc_should_mark(void)
+{
+    int should_mark = 0;
+    uv_mutex_lock(&gc_queue_observer_lock);
+    while (1) {
+        int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
+        if (n_threads_marking == 0) {
+            break;
+        }
+        int tid = jl_atomic_load_relaxed(&gc_master_tid);
+        assert(tid != -1);
+        assert(gc_all_tls_states != NULL);
+        size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
+        int first = gc_first_parallel_collector_thread_id();
+        int last = gc_last_parallel_collector_thread_id();
+        for (int i = first; i <= last; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_check_ptls_of_parallel_collector_thread(ptls2);
+            work += gc_count_work_in_queue(ptls2);
+        }
+        // if there is a lot of work left, enter the mark loop
+        if (work >= 16 * n_threads_marking) {
+            jl_atomic_fetch_add(&gc_n_threads_marking, 1); // A possibility would be to allow a thread that found lots
+                                                           // of work to increment this
+            should_mark = 1;
+            break;
         }
+        jl_cpu_pause();
     }
+    uv_mutex_unlock(&gc_queue_observer_lock);
+    return should_mark;
+}
+
+void gc_wake_all_for_marking(jl_ptls_t ptls)
+{
+    uv_mutex_lock(&gc_threads_lock);
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
 }
 
 void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
 {
-    int backoff = GC_BACKOFF_MIN;
     if (master) {
         jl_atomic_store(&gc_master_tid, ptls->tid);
-        // Wake threads up and try to do some work
-        uv_mutex_lock(&gc_threads_lock);
         jl_atomic_fetch_add(&gc_n_threads_marking, 1);
-        uv_cond_broadcast(&gc_threads_cond);
-        uv_mutex_unlock(&gc_threads_lock);
+        gc_wake_all_for_marking(ptls);
         gc_mark_and_steal(ptls);
         jl_atomic_fetch_add(&gc_n_threads_marking, -1);
     }
-    while (jl_atomic_load(&gc_n_threads_marking) > 0) {
-        // Try to become a thief while other threads are marking
-        jl_atomic_fetch_add(&gc_n_threads_marking, 1);
-        if (jl_atomic_load(&gc_master_tid) != -1) {
-            gc_mark_and_steal(ptls);
+    while (1) {
+        int should_mark = gc_should_mark();
+        if (!should_mark) {
+            break;
         }
+        gc_mark_and_steal(ptls);
         jl_atomic_fetch_add(&gc_n_threads_marking, -1);
-        // Failed to steal
-        gc_backoff(&backoff);
     }
 }
 
@@ -2801,10 +2738,8 @@ void gc_mark_loop(jl_ptls_t ptls)
 
 void gc_mark_loop_barrier(void)
 {
-    jl_atomic_store(&gc_master_tid, -1);
-    while (jl_atomic_load(&gc_n_threads_marking) != 0) {
-        jl_cpu_pause();
-    }
+    assert(jl_atomic_load_relaxed(&gc_n_threads_marking) == 0);
+    jl_atomic_store_relaxed(&gc_master_tid, -1);
 }
 
 void gc_mark_clean_reclaim_sets(void)
@@ -2812,30 +2747,26 @@ void gc_mark_clean_reclaim_sets(void)
     // Clean up `reclaim-sets`
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        arraylist_t *reclaim_set2 = &ptls2->gc_tls.mark_queue.reclaim_set;
         ws_array_t *a = NULL;
         while ((a = (ws_array_t *)arraylist_pop(reclaim_set2)) != NULL) {
             free(a->buffer);
             free(a);
         }
     }
-}
-
-static void gc_premark(jl_ptls_t ptls2)
-{
-    arraylist_t *remset = ptls2->heap.remset;
-    ptls2->heap.remset = ptls2->heap.last_remset;
-    ptls2->heap.last_remset = remset;
-    ptls2->heap.remset->len = 0;
-    ptls2->heap.remset_nptr = 0;
-    // avoid counting remembered objects
-    // in `perm_scanned_bytes`
-    size_t len = remset->len;
-    void **items = remset->items;
-    for (size_t i = 0; i < len; i++) {
-        jl_value_t *item = (jl_value_t *)items[i];
-        objprofile_count(jl_typeof(item), 2, 0);
-        jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
+    // Reset queue indices
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.ptr_queue.top, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.bottom, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls.mark_queue.chunk_queue.top, 0);
     }
 }
 
@@ -2882,14 +2813,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
     }
 }
 
-static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
+static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
 {
-    size_t len = ptls2->heap.last_remset->len;
-    void **items = ptls2->heap.last_remset->items;
+    void **items = ptls2->gc_tls.heap.remset.items;
+    size_t len = ptls2->gc_tls.heap.remset.len;
     for (size_t i = 0; i < len; i++) {
-        // Objects in the `remset` are already marked,
-        // so a `gc_try_claim_and_push` wouldn't work here
-        gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
+        void *_v = items[i];
+        jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
+        jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
+        gc_ptr_queue_push(mq, v);
+    }
+    // Don't forget to clear the remset
+    ptls2->gc_tls.heap.remset.len = 0;
+    ptls2->gc_tls.heap.remset_nptr = 0;
+}
+
+static void gc_check_all_remsets_are_empty(void)
+{
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            assert(ptls2->gc_tls.heap.remset.len == 0);
+            assert(ptls2->gc_tls.heap.remset_nptr == 0);
+        }
     }
 }
 
@@ -2901,27 +2847,38 @@ static void gc_mark_roots(jl_gc_markqueue_t *mq)
 {
     // modules
     gc_try_claim_and_push(mq, jl_main_module, NULL);
-    gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_main_module, "main_module");
     // invisible builtin values
     gc_try_claim_and_push(mq, jl_an_empty_vec_any, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_an_empty_vec_any, "an_empty_vec_any");
     gc_try_claim_and_push(mq, jl_module_init_order, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_module_init_order, "module_init_order");
     for (size_t i = 0; i < jl_current_modules.size; i += 2) {
         if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
             gc_try_claim_and_push(mq, jl_current_modules.table[i], NULL);
-            gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
+            gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_current_modules.table[i], "top level module");
         }
     }
     gc_try_claim_and_push(mq, jl_anytuple_type_type, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_anytuple_type_type, "anytuple_type_type");
     for (size_t i = 0; i < N_CALL_CACHE; i++) {
         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
         gc_try_claim_and_push(mq, v, NULL);
+        gc_heap_snapshot_record_array_edge_index((jl_value_t*)jl_anytuple_type_type, (jl_value_t*)v, i);
     }
-    gc_try_claim_and_push(mq, jl_all_methods, NULL);
     gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)_jl_debug_method_invalidation, "debug_method_invalidation");
     // constants
     gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_emptytuple_type, "emptytuple_type");
     gc_try_claim_and_push(mq, cmpswap_names, NULL);
-    gc_try_claim_and_push(mq, jl_global_roots_table, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)cmpswap_names, "cmpswap_names");
+    gc_try_claim_and_push(mq, jl_global_roots_list, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_list, "global_roots_list");
+    gc_try_claim_and_push(mq, jl_global_roots_keyset, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_keyset, "global_roots_keyset");
+    gc_try_claim_and_push(mq, precompile_field_replace, NULL);
+    gc_heap_snapshot_record_gc_roots((jl_value_t*)precompile_field_replace, "precompile_field_replace");
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
@@ -2975,63 +2932,25 @@ static void sweep_finalizer_list(arraylist_t *list)
     list->len = j;
 }
 
-// collector entry point and control
-_Atomic(uint32_t) jl_gc_disable_counter = 1;
-
-JL_DLLEXPORT int jl_gc_enable(int on)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    int prev = !ptls->disable_gc;
-    ptls->disable_gc = (on == 0);
-    if (on && !prev) {
-        // disable -> enable
-        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
-            gc_num.allocd += gc_num.deferred_alloc;
-            gc_num.deferred_alloc = 0;
-        }
-    }
-    else if (prev && !on) {
-        // enable -> disable
-        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
-        // check if the GC is running and wait for it to finish
-        jl_gc_safepoint_(ptls);
-    }
-    return prev;
-}
-
-JL_DLLEXPORT int jl_gc_is_enabled(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return !ptls->disable_gc;
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT {
+    return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid);
 }
 
 JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     // Sync this logic with `base/util.jl:GC_Diff`
     *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
 }
 
-JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
-{
-    return gc_num.total_time;
-}
-
 JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     return num;
 }
 
-JL_DLLEXPORT void jl_gc_reset_stats(void)
-{
-    gc_num.max_pause = 0;
-    gc_num.max_memory = 0;
-    gc_num.max_time_to_safepoint = 0;
-}
-
 // TODO: these were supposed to be thread local
 JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
 {
@@ -3051,39 +2970,79 @@ JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
     return newtb - oldtb;
 }
 
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
+{
+    int n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    int64_t pool_live_bytes = 0;
+    for (int i = 0; i < n_threads; i++) {
+        jl_ptls_t ptls2 = all_tls_states[i];
+        if (ptls2 != NULL) {
+            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes);
+        }
+    }
+    return pool_live_bytes;
+}
+
 JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
 {
     return live_bytes;
 }
 
+uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
+{
+    double est = factor * old_val + (1 - factor) * new_val;
+    if (est <= 1)
+        return 1; // avoid issues with <= 0
+    if (est > (uint64_t)2<<36)
+        return (uint64_t)2<<36; // avoid overflow
+    return est;
+}
+
+// an overallocation curve inspired by array allocations
+// grows very fast initially, then much slower at large heaps
+static uint64_t overallocation(uint64_t old_val, uint64_t val, uint64_t max_val)
+{
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow much faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time at most
+    int exp2 = sizeof(old_val) * 8 -
+#ifdef _P64
+        __builtin_clzll(old_val);
+#else
+        __builtin_clz(old_val);
+#endif
+    uint64_t inc = (uint64_t)((size_t)1 << (exp2 * 7 / 8)) * 4 + old_val / 8;
+    // once overallocation would exceed max_val, grow by no more than 5% of max_val
+    if (inc + val > max_val)
+        if (inc > max_val / 20)
+            return max_val / 20;
+    return inc;
+}
+
 size_t jl_maxrss(void);
 
 // Only one thread should be running in this function
 static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 1);
 
     // We separate the update of the graph from the update of live_bytes here
     // so that the sweep shows a downward trend in memory usage.
     jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
 
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
+    uint64_t mutator_time = gc_end_time == 0 ? old_mut_time : gc_start_time - gc_end_time;
+    uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
     uint64_t start_mark_time = jl_hrtime();
     JL_PROBE_GC_MARK_BEGIN();
     {
         JL_TIMING(GC, GC_Mark);
-
-        // 1. fix GC bits of objects in the remset.
-        assert(gc_n_threads);
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            if (ptls2 != NULL)
-                gc_premark(ptls2);
-        }
-
         assert(gc_n_threads);
         int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
         for (int t_i = 0; t_i < gc_n_threads; t_i++) {
@@ -3091,21 +3050,23 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
             jl_ptls_t ptls_dest = ptls;
             jl_gc_markqueue_t *mq_dest = mq;
             if (!single_threaded_mark) {
-                ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads];
-                mq_dest = &ptls_dest->mark_queue;
+                int dest_tid = gc_ith_parallel_collector_thread_id(t_i % jl_n_markthreads);
+                ptls_dest = gc_all_tls_states[dest_tid];
+                mq_dest = &ptls_dest->gc_tls.mark_queue;
             }
             if (ptls2 != NULL) {
-                // 2.1. mark every thread local root
+                // 1.1. mark every thread local root
                 gc_queue_thread_local(mq_dest, ptls2);
-                // 2.2. mark any managed objects in the backtrace buffer
+                // 1.2. mark any managed objects in the backtrace buffer
                 // TODO: treat these as roots for gc_heap_snapshot_record
                 gc_queue_bt_buf(mq_dest, ptls2);
-                // 2.3. mark every object in the `last_remsets` and `rem_binding`
-                gc_queue_remset(ptls_dest, ptls2);
+                // 1.3. mark every object in the remset
+                gc_queue_remset(mq_dest, ptls2);
             }
         }
+        gc_check_all_remsets_are_empty();
 
-        // 3. walk roots
+        // 2. walk roots
         gc_mark_roots(mq);
         if (gc_cblist_root_scanner) {
             gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
@@ -3115,7 +3076,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_mark_loop_barrier();
         gc_mark_clean_reclaim_sets();
 
-        // 4. check for objects to finalize
+        // 3. check for objects to finalize
         clear_weak_refs();
         // Record the length of the marked list since we need to
         // mark the object moved to the marked list from the
@@ -3160,86 +3121,52 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t mark_time = end_mark_time - start_mark_time;
     gc_num.mark_time = mark_time;
     gc_num.total_mark_time += mark_time;
-    int64_t allocd = gc_num.allocd;
     gc_settime_postmark_end();
     // marking is over
 
     // Flush everything in mark cache
-    gc_sync_all_caches_nolock(ptls);
+    gc_sync_all_caches(ptls);
 
-    int64_t live_sz_ub = live_bytes + allocd;
-    int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
-    int64_t estimate_freed = live_sz_ub - live_sz_est;
 
     gc_verify(ptls);
-
     gc_stats_all_pool();
     gc_stats_big_obj();
-    objprofile_printall();
-    objprofile_reset();
     gc_num.total_allocd += gc_num.allocd;
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
-    // 5. next collection decision
-    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(allocd/10));
-    int nptr = 0;
+    // 4. next collection decision
+    int remset_nptr = 0;
+    int sweep_full = next_sweep_full;
+    int recollect = 0;
     assert(gc_n_threads);
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL)
-            nptr += ptls2->heap.remset_nptr;
+            remset_nptr += ptls2->gc_tls.heap.remset_nptr;
     }
+    (void)remset_nptr; //Use this information for something?
 
-    // many pointers in the intergen frontier => "quick" mark is not quick
-    int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
-    int sweep_full = 0;
-    int recollect = 0;
-
-    // update heuristics only if this GC was automatically triggered
-    if (collection == JL_GC_AUTO) {
-        if (large_frontier) {
-            sweep_full = 1;
-            gc_num.interval = last_long_collect_interval;
-        }
-        if (not_freed_enough || large_frontier) {
-            gc_num.interval = gc_num.interval * 2;
-        }
-
-        size_t maxmem = 0;
-#ifdef _P64
-        // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2
-        maxmem = total_mem / (gc_n_threads - jl_n_gcthreads) / 2;
-#endif
-        if (maxmem < max_collect_interval)
-            maxmem = max_collect_interval;
-        if (gc_num.interval > maxmem) {
-            sweep_full = 1;
-            gc_num.interval = maxmem;
-        }
-    }
 
     // If the live data outgrows the suggested max_total_memory
     // we keep going with minimum intervals and full gcs until
     // we either free some space or get an OOM error.
-    if (live_bytes > max_total_memory) {
-        sweep_full = 1;
-    }
     if (gc_sweep_always_full) {
         sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
     }
     if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
     }
     if (sweep_full) {
         // these are the difference between the number of gc-perm bytes scanned
         // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
         promoted_bytes = 0;
-        last_long_collect_interval = gc_num.interval;
     }
     scanned_bytes = 0;
-    // 6. start sweeping
+    // 5. start sweeping
     uint64_t start_sweep_time = jl_hrtime();
     JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     {
@@ -3251,19 +3178,24 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 #ifdef USE_TRACY
         TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500);
 #endif
+        current_sweep_full = sweep_full;
         sweep_weak_refs();
-        sweep_stack_pools();
-        gc_sweep_foreign_objs();
+        uint64_t stack_pool_time = jl_hrtime();
+        jl_gc_sweep_stack_pools_and_mtarraylist_buffers(ptls);
+        stack_pool_time = jl_hrtime() - stack_pool_time;
+        gc_num.total_stack_pool_sweep_time += stack_pool_time;
+        gc_num.stack_pool_sweep_time = stack_pool_time;
         gc_sweep_other(ptls, sweep_full);
         gc_scrub();
         gc_verify_tags();
-        gc_sweep_pool(sweep_full);
+        gc_sweep_pool();
         if (sweep_full)
             gc_sweep_perm_alloc();
     }
+
     JL_PROBE_GC_SWEEP_END();
 
-    uint64_t gc_end_time = jl_hrtime();
+    gc_end_time = jl_hrtime();
     uint64_t pause = gc_end_time - gc_start_time;
     uint64_t sweep_time = gc_end_time - start_sweep_time;
     gc_num.total_sweep_time += sweep_time;
@@ -3271,9 +3203,116 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (sweep_full) {
         gc_num.last_full_sweep = gc_end_time;
     }
+    else {
+        gc_num.last_incremental_sweep = gc_end_time;
+    }
+
+    size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, heap_size);
+    freed_in_runtime = 0;
+    uint64_t user_max = max_total_memory * 0.8;
+    uint64_t alloc_diff = before_free_heap_size - old_heap_size;
+    uint64_t freed_diff = before_free_heap_size - heap_size;
+    uint64_t target_heap;
+    const char *reason = ""; (void)reason; // for GC_TIME output stats
+    old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
+    if (collection == JL_GC_AUTO) {
+        // update any heuristics only when the user does not force the GC
+        // but still update the timings, since GC was run and reset, even if it was too early
+        uint64_t target_allocs = 0.0;
+        double alloc_smooth_factor = 0.95;
+        double collect_smooth_factor = 0.5;
+        double tuning_factor = 2e4;
+        uint64_t alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
+        uint64_t alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); // TODO: subtract estimated finalizer time?
+        uint64_t gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
+        uint64_t gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
+        old_alloc_diff = alloc_mem;
+        old_mut_time = alloc_time;
+        old_freed_diff = gc_mem;
+        old_pause_time = gc_time;
+        // thrashing estimator: if GC time more than 50% of the runtime
+        if (pause > mutator_time && !(thrash_counter < 4))
+            thrash_counter += 1;
+        else if (thrash_counter > 0)
+            thrash_counter -= 1;
+        if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0) {
+            double alloc_rate = (double)alloc_mem/alloc_time;
+            double gc_rate = (double)gc_mem/gc_time;
+            target_allocs = sqrt((double)heap_size * alloc_rate / gc_rate) * tuning_factor;
+        }
+
+        if (thrashing == 0 && thrash_counter >= 3) {
+            // require 3 consecutive thrashing cycles to force the default allocator rate
+            thrashing = 1;
+            // and require 4 default allocations to clear
+            thrash_counter = 6;
+        }
+        else if (thrashing == 1 && thrash_counter <= 2) {
+            thrashing = 0; // maybe we should report this to the user or error out?
+        }
+
+        target_heap = target_allocs + heap_size;
+        // optionally smooth this:
+        //   target_heap = jl_gc_smooth(jl_atomic_load_relaxed(&gc_heap_stats.heap_target), target_heap, alloc_smooth_factor);
+
+        // compute some guardrails values
+        uint64_t min_target_allocs = heap_size / 20; // minimum 5% of current heap
+        if (min_target_allocs < default_collect_interval / 8) // unless the heap is small
+            min_target_allocs = default_collect_interval / 8;
+        uint64_t max_target_allocs = overallocation(before_free_heap_size, heap_size, user_max);
+        if (max_target_allocs < min_target_allocs)
+            max_target_allocs = min_target_allocs;
+        // respect max_total_memory first
+        if (target_heap > user_max) {
+            target_allocs = heap_size < user_max ? user_max - heap_size : 1;
+            reason = " user limit";
+        }
+        // If we are thrashing use a default only (an average) for a couple collections
+        if (thrashing) {
+            uint64_t thrashing_allocs = sqrt((double)min_target_allocs * max_target_allocs);
+            if (target_allocs < thrashing_allocs) {
+                target_allocs = thrashing_allocs;
+                reason = " thrashing";
+            }
+        }
+        // then add the guardrails for transient issues
+        if (target_allocs > max_target_allocs) {
+            target_allocs = max_target_allocs;
+            reason = " rate limit max";
+        }
+        else if (target_allocs < min_target_allocs) {
+            target_allocs = min_target_allocs;
+            reason = " min limit";
+        }
+        // and set the heap detection threshold
+        target_heap = target_allocs + heap_size;
+        if (target_heap < default_collect_interval) {
+            target_heap = default_collect_interval;
+            reason = " min heap";
+        }
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
+    }
+    else {
+        target_heap = jl_atomic_load_relaxed(&gc_heap_stats.heap_target);
+    }
 
+    double old_ratio = (double)promoted_bytes/(double)heap_size;
+    if (heap_size > user_max) {
+        next_sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_USER_MAX_EXCEEDED);
+    }
+    else if (old_ratio > 0.15) {
+        next_sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_PROMOTION_RATE);
+    }
+    else {
+        next_sweep_full = 0;
+    }
+    if (heap_size > user_max || thrashing)
+        under_pressure = 1;
     // sweeping is over
-    // 7. if it is a quick sweep, put back the remembered objects in queued state
+    // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
@@ -3281,13 +3320,30 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         if (ptls2 == NULL)
             continue;
         if (!sweep_full) {
-            for (int i = 0; i < ptls2->heap.remset->len; i++) {
-                void *ptr = ptls2->heap.remset->items[i];
+            for (int i = 0; i < ptls2->gc_tls.heap.remset.len; i++) {
+                void *ptr = ptls2->gc_tls.heap.remset.items[i];
                 jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
             }
         }
         else {
-            ptls2->heap.remset->len = 0;
+            ptls2->gc_tls.heap.remset.len = 0;
+        }
+        // free empty GC state for threads that have exited
+        if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+            // GC threads should never exit
+            assert(!gc_is_collector_thread(t_i));
+            jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap;
+            jl_thread_heap_t *heap = &ptls2->gc_tls.heap;
+            if (common_heap->weak_refs.len == 0)
+                small_arraylist_free(&common_heap->weak_refs);
+            if (common_heap->live_tasks.len == 0)
+                small_arraylist_free(&common_heap->live_tasks);
+            if (heap->remset.len == 0)
+                arraylist_free(&heap->remset);
+            if (ptls2->finalizers.len == 0)
+                arraylist_free(&ptls2->finalizers);
+            if (ptls2->gc_tls.sweep_objs.len == 0)
+                arraylist_free(&ptls2->gc_tls.sweep_objs);
         }
     }
 
@@ -3303,59 +3359,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 #endif
 
-    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
-
-    gc_final_pause_end(gc_start_time, gc_end_time);
-    gc_time_sweep_pause(gc_end_time, allocd, live_bytes,
-                        estimate_freed, sweep_full);
-    gc_num.full_sweep += sweep_full;
+    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes);
     uint64_t max_memory = last_live_bytes + gc_num.allocd;
     if (max_memory > gc_num.max_memory) {
         gc_num.max_memory = max_memory;
     }
-
+    gc_final_pause_end(gc_start_time, gc_end_time);
+    gc_time_sweep_pause(gc_end_time, gc_num.allocd, live_bytes,
+                        gc_num.freed, sweep_full);
+    gc_num.full_sweep += sweep_full;
     last_live_bytes = live_bytes;
-    // Can't call inc_live_bytes here because we already added allocd
-    // to the graph earlier
     live_bytes += -gc_num.freed + gc_num.allocd;
     jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
-    if (collection == JL_GC_AUTO) {
-        //If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
-        if (!not_freed_enough || large_frontier) {
-            int64_t tot = 2 * (live_bytes + gc_num.allocd) / 3;
-            if (gc_num.interval > tot) {
-                gc_num.interval = tot;
-                last_long_collect_interval = tot;
-            }
-        // If the current interval is larger than half the live data decrease the interval
-        }
-        else {
-            int64_t half = (live_bytes / 2);
-            if (gc_num.interval > half)
-                gc_num.interval = half;
-        }
-
-        // But never go below default
-        if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
-    }
-
-    if (gc_num.interval + live_bytes > max_total_memory) {
-        if (live_bytes < max_total_memory) {
-            gc_num.interval = max_total_memory - live_bytes;
-            last_long_collect_interval = max_total_memory - live_bytes;
-        }
-        else {
-            // We can't stay under our goal so let's go back to
-            // the minimum interval and hope things get better
-            gc_num.interval = default_collect_interval;
-        }
-    }
-
-    gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
+    gc_time_summary(sweep_full, gc_start_time, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
+    if (collection == JL_GC_AUTO) {
+        gc_heuristics_summary(
+            old_alloc_diff, alloc_diff,
+            old_mut_time, mutator_time,
+            old_freed_diff, freed_diff,
+            old_pause_time, pause - sweep_time,
+            thrash_counter, reason,
+            heap_size, target_heap);
+    }
 
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
@@ -3377,10 +3406,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
-        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
         static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
-        jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
     }
     jl_gc_debug_print();
@@ -3389,9 +3418,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
     // `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
     uint64_t t0 = jl_hrtime();
-    if (!jl_safepoint_start_gc()) {
+    if (!jl_safepoint_start_gc(ct)) {
         // either another thread is running GC, or the GC got disabled just now.
         jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+        jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
         return;
     }
 
@@ -3445,18 +3475,23 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
     JL_PROBE_GC_END();
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
-    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0 && ptls->engine_nqueued == 0) {
         JL_TIMING(GC, GC_Finalizers);
-        run_finalizers(ct);
+        run_finalizers(ct, 0);
     }
     JL_PROBE_GC_FINALIZER();
 
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
+    if (under_pressure)
+        gc_invoke_callbacks(jl_gc_cb_notify_gc_pressure_t,
+            gc_cblist_notify_gc_pressure, ());
+    under_pressure = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3474,42 +3509,35 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
     gc_mark_roots(mq);
 }
 
-// allocator entry points
-
-JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    return jl_gc_alloc_(ptls, sz, ty);
-}
-
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
-    jl_thread_heap_t *heap = &ptls->heap;
+    jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap;
+    jl_thread_heap_t *heap = &ptls->gc_tls.heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
         p[i].osize = jl_gc_sizeclasses[i];
         p[i].freelist = NULL;
         p[i].newpages = NULL;
     }
-    arraylist_new(&heap->weak_refs, 0);
-    arraylist_new(&heap->live_tasks, 0);
-    heap->mallocarrays = NULL;
-    heap->mafreelist = NULL;
-    heap->big_objects = NULL;
-    heap->remset = &heap->_remset[0];
-    heap->last_remset = &heap->_remset[1];
-    arraylist_new(heap->remset, 0);
-    arraylist_new(heap->last_remset, 0);
+    small_arraylist_new(&common_heap->weak_refs, 0);
+    small_arraylist_new(&common_heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&common_heap->free_stacks[i], 0);
+    small_arraylist_new(&common_heap->mallocarrays, 0);
+    heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
+    heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
+    arraylist_new(&heap->remset, 0);
     arraylist_new(&ptls->finalizers, 0);
-    arraylist_new(&ptls->sweep_objs, 0);
+    arraylist_new(&ptls->gc_tls.sweep_objs, 0);
 
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
+    jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
-    gc_cache->nbig_obj = 0;
 
     // Initialize GC mark-queue
-    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
     ws_queue_t *cq = &mq->chunk_queue;
     ws_array_t *wsa = create_ws_array(GC_CHUNK_QUEUE_INIT_SIZE, sizeof(jl_gc_chunk_t));
     jl_atomic_store_relaxed(&cq->top, 0);
@@ -3521,9 +3549,125 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     jl_atomic_store_relaxed(&q->bottom, 0);
     jl_atomic_store_relaxed(&q->array, wsa2);
     arraylist_new(&mq->reclaim_set, 32);
+    // Initialize `lazily_freed_mtarraylist_buffers`
+    small_arraylist_new(&ptls->lazily_freed_mtarraylist_buffers, 0);
+
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+}
+
+void jl_free_thread_gc_state(jl_ptls_t ptls)
+{
+    jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
+    ws_queue_t *cq = &mq->chunk_queue;
+    free_ws_array(jl_atomic_load_relaxed(&cq->array));
+    jl_atomic_store_relaxed(&cq->array, NULL);
+    ws_queue_t *q = &mq->ptr_queue;
+    free_ws_array(jl_atomic_load_relaxed(&q->array));
+    jl_atomic_store_relaxed(&q->array, NULL);
+    arraylist_free(&mq->reclaim_set);
+}
+
+void jl_start_gc_threads(void)
+{
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
+    uv_thread_t uvtid;
+    for (int i = nmutator_threads; i < nthreads; ++i) {
+        jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
+        t->tid = i;
+        t->barrier = &thread_init_done;
+        if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
+            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
+        }
+        else {
+            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        }
+        uv_thread_detach(&uvtid);
+    }
+}
+
+STATIC_INLINE int may_mark(void) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&gc_n_threads_marking) > 0);
+}
+
+STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0);
+}
+
+STATIC_INLINE int may_sweep_stack(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_stack_sweep_requested) > 0);
+}
+// parallel gc thread function
+void jl_parallel_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_PARALLEL_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
 
-    memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (!may_mark() && !may_sweep(ptls) && !may_sweep_stack(ptls)) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+        gc_mark_loop_parallel(ptls, 0);
+        if (may_sweep_stack(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            sweep_stack_pool_loop();
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_stack_sweep_requested, -1);
+        }
+        if (may_sweep(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            gc_sweep_pool_parallel(ptls);
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1);
+        }
+    }
+}
+
+// concurrent gc thread function
+void jl_concurrent_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_CONCURRENT_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_CONCURRENT_COLLECTOR_THREAD);
+        uv_sem_wait(&gc_sweep_assists_needed);
+        gc_free_pages();
+    }
 }
 
 // System-wide initializations
@@ -3531,48 +3675,61 @@ void jl_gc_init(void)
 {
     JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
     JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
-    uv_mutex_init(&gc_cache_lock);
+    uv_mutex_init(&page_profile_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_pages_lock);
     uv_mutex_init(&gc_threads_lock);
     uv_cond_init(&gc_threads_cond);
     uv_sem_init(&gc_sweep_assists_needed, 0);
+    uv_mutex_init(&gc_queue_observer_lock);
+    void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
+    uintptr_t addr = (uintptr_t)_addr;
+    gc_bigval_sentinel_tag = addr;
+    oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;
 
     jl_gc_init_page();
     jl_gc_debug_init();
 
     arraylist_new(&finalizer_list_marked, 0);
     arraylist_new(&to_finalize, 0);
-
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
     gc_num.interval = default_collect_interval;
-    last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
     gc_num.max_pause = 0;
     gc_num.max_memory = 0;
 
+    uint64_t mem_reserve = 250*1024*1024; // LLVM + other libraries need some amount of memory
+    uint64_t min_heap_size_hint = mem_reserve + 1*1024*1024;
+    uint64_t hint = jl_options.heap_size_hint;
+
+    // check if heap size specified on command line
+    if (jl_options.heap_size_hint == 0) {
+        char *cp = getenv(HEAP_SIZE_HINT);
+        if (cp)
+            hint = parse_heap_size_hint(cp, "JULIA_HEAP_SIZE_HINT=\"<size>[<unit>]\"");
+    }
 #ifdef _P64
     total_mem = uv_get_total_memory();
-    uint64_t constrained_mem = uv_get_constrained_memory();
-    if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    double percent;
-    if (total_mem < 128e9)
-        percent = total_mem * 2.34375e-12 + 0.6; // 60% at 0 gigs and 90% at 128 to not
-    else                                         // overcommit too much on memory contrained devices
-        percent = 0.9;
-    max_total_memory = total_mem * percent;
+    if (hint == 0) {
+        uint64_t constrained_mem = uv_get_constrained_memory();
+        if (constrained_mem > 0 && constrained_mem < total_mem)
+            hint = constrained_mem;
+    }
 #endif
-    if (jl_options.heap_size_hint)
-        jl_gc_set_max_memory(jl_options.heap_size_hint);
-
-    t_start = jl_hrtime();
+    if (hint) {
+        if (hint < min_heap_size_hint)
+            hint = min_heap_size_hint;
+        jl_gc_set_max_memory(hint - mem_reserve);
+    }
 }
 
 JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
 {
-    if (max_mem > 0
-        && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) {
-        max_total_memory = max_mem;
-    }
+#ifdef _P32
+    max_mem = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+#endif
+    max_total_memory = max_mem;
 }
 
 JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
@@ -3580,132 +3737,72 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
     return max_total_memory;
 }
 
-// callback for passing OOM errors from gmp
-JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
-{
-    jl_throw(jl_memory_exception);
-}
-
-// allocation wrappers that track allocation and let collection run
+// allocation wrappers that add to gc pressure
 
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = malloc(sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return malloc(sz);
+    return data;
 }
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = calloc(nm, sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return calloc(nm, sz);
+    return data;
 }
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
     free(p);
-    if (pgcstack != NULL && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
-            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
-    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL)
+        jl_batch_accum_free_size(ct->ptls, sz);
 }
 
 JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
 {
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack != NULL && ct->world_age) {
+    void *data = realloc(p, sz);
+    jl_task_t *ct = jl_get_current_task();
+    if (data != NULL && ct != NULL) {
+        sz = memory_block_usable_size(data, 0);
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        if (sz < old)
-            jl_atomic_store_relaxed(&ptls->gc_num.freed,
-                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
-        else
-            jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-                jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
-        jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-            jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-    }
-    return realloc(p, sz);
-}
-
-// allocation wrappers that save the size of allocations, to allow using
-// jl_gc_counted_* functions with a libc-compatible API.
-
-JL_DLLEXPORT void *jl_malloc(size_t sz)
-{
-    int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT);
-    if (p == NULL)
-        return NULL;
-    p[0] = sz;
-    return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
-}
-
-//_unchecked_calloc does not check for potential overflow of nm*sz
-STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
-    size_t nmsz = nm*sz;
-    int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
-    if (p == NULL)
-        return NULL;
-    p[0] = nmsz;
-    return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
-}
-
-JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
-{
-    if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
-        return NULL;
-    return _unchecked_calloc(nm, sz);
-}
-
-JL_DLLEXPORT void jl_free(void *p)
-{
-    if (p != NULL) {
-        int64_t *pp = (int64_t *)p - 2;
-        size_t sz = pp[0];
-        jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT);
-    }
-}
-
-JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
-{
-    int64_t *pp;
-    size_t szold;
-    if (p == NULL) {
-        pp = NULL;
-        szold = 0;
-    }
-    else {
-        pp = (int64_t *)p - 2;
-        szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT;
+        if (!(sz < old))
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1);
+        int64_t diff = sz - old;
+        if (diff < 0) {
+            jl_batch_accum_free_size(ptls, -diff);
+        }
+        else {
+            jl_batch_accum_heap_size(ptls, diff);
+        }
     }
-    int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT);
-    if (pnew == NULL)
-        return NULL;
-    pnew[0] = sz;
-    return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
+    return data;
 }
 
 // allocating blocks for Arrays and Strings
@@ -3717,10 +3814,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3728,6 +3822,14 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     void *b = malloc_cache_align(allocsz);
     if (b == NULL)
         jl_throw(jl_memory_exception);
+
+    size_t allocated_bytes = memory_block_usable_size(b, 1);
+    assert(allocated_bytes >= allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocated_bytes);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocated_bytes);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3737,91 +3839,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     return b;
 }
 
-static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
-                                 int isaligned, jl_value_t *owner, int8_t can_collect)
-{
-    if (can_collect)
-        maybe_collect(ptls);
-
-    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-
-    if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        inc_live_bytes(allocsz - oldsz);
-    }
-    else if (allocsz < oldsz)
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
-    else
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
-    void *b;
-    if (isaligned)
-        b = realloc_cache_align(d, allocsz, oldsz);
-    else
-        b = realloc(d, allocsz);
-    if (b == NULL)
-        jl_throw(jl_memory_exception);
-#ifdef _OS_WINDOWS_
-    SetLastError(last_error);
-#endif
-    errno = last_errno;
-    maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
-    return b;
-}
-
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1);
-}
-
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
-{
-    size_t len = jl_string_len(s);
-    if (sz <= len) return s;
-    jl_taggedvalue_t *v = jl_astaggedvalue(s);
-    size_t strsz = len + sizeof(size_t) + 1;
-    if (strsz <= GC_MAX_SZCLASS ||
-        // TODO: because of issue #17971 we can't resize old objects
-        gc_marked(v->bits.gc)) {
-        // pool allocated; can't be grown in place so allocate a new object.
-        jl_value_t *snew = jl_alloc_string(sz);
-        memcpy(jl_string_data(snew), jl_string_data(s), len);
-        return snew;
-    }
-    size_t newsz = sz + sizeof(size_t) + 1;
-    size_t offs = sizeof(bigval_t);
-    size_t oldsz = LLT_ALIGN(strsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    size_t allocsz = LLT_ALIGN(newsz + offs, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-    bigval_t *hdr = bigval_header(v);
-    jl_ptls_t ptls = jl_current_task->ptls;
-    maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc
-    gc_big_object_unlink(hdr);
-    // TODO: this is not safe since it frees the old pointer. ideally we'd like
-    // the old pointer to be left alone if we can't grow in place.
-    // for now it's up to the caller to make sure there are no references to the
-    // old pointer.
-    bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0);
-    newbig->sz = allocsz;
-    gc_big_object_link(newbig, &ptls->heap.big_objects);
-    jl_value_t *snew = jl_valueof(&newbig->header);
-    *(size_t*)snew = sz;
-    return snew;
-}
-
 // Perm gen allocator
 // 2M pool
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
@@ -3848,11 +3865,11 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz);
     errno = last_errno;
     jl_may_leak(base);
     assert(align > 0);
-    unsigned diff = (offset - (uintptr_t)base) % align;
-    return (void*)((char*)base + diff);
+    return (void*)(LLT_ALIGN((uintptr_t)base + offset, (uintptr_t)align) - offset);
 }
 
 STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT
@@ -3866,7 +3883,7 @@ STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned o
 }
 
 // **NOT** a safepoint
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT
 {
     // The caller should have acquired `gc_perm_lock`
     assert(align < GC_PERM_POOL_LIMIT);
@@ -3911,51 +3928,16 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     return p;
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_add_finalizer_th(ptls, v, f);
-}
-
-JL_DLLEXPORT void jl_finalize(jl_value_t *o)
-{
-    jl_finalize_th(jl_current_task, o);
-}
-
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_new_weakref_th(ptls, value);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sz, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, 0, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*), NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
+jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+    uintptr_t tag = (uintptr_t)ty;
+    o->header = tag | GC_OLD_MARKED;
+    return jl_valueof(o);
 }
 
 JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
@@ -4012,9 +3994,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             goto valid_object;
         }
         jl_gc_pool_t *pool =
-            gc_all_tls_states[meta->thread_n]->heap.norm_pools +
+            gc_all_tls_states[meta->thread_n]->gc_tls.heap.norm_pools +
             meta->pool_n;
-        if (meta->fl_begin_offset == (uint16_t) -1) {
+        if (meta->fl_begin_offset == UINT16_MAX) {
             // case 2: this is a page on the newpages list
             jl_taggedvalue_t *newpages = pool->newpages;
             // Check if the page is being allocated from via newpages
@@ -4058,6 +4040,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
         if (gc_page_data(cell) == gc_page_data(pool->freelist)
             && (char *)cell < (char *)pool->freelist)
             goto valid_object;
+        // already skipped marked or old objects above, so here
+        // the age bits are 0, thus the object is on the freelist
+        return NULL;
         // Not a freelist entry, therefore a valid object.
     valid_object:
         // We have to treat objects with type `jl_buff_tag` differently,
@@ -4081,15 +4066,18 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
     return sizeof(bigval_t);
 }
 
-
-JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
 {
-    return jl_gc_alloc(ptls, sz, ty);
+    arraylist_push(&ptls->gc_tls.sweep_objs, obj);
 }
 
-JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+void jl_gc_notify_image_load(const char* img_data, size_t len)
 {
-    arraylist_push(&ptls->sweep_objs, obj);
+    // Do nothing
+}
+
+JL_DLLEXPORT const char* jl_gc_active_impl(void) {
+    return "Built with stock GC";
 }
 
 #ifdef __cplusplus
diff --git a/src/gc.h b/src/gc-stock.h
similarity index 65%
rename from src/gc.h
rename to src/gc-stock.h
index b1eee5a1d5bda..d478ee1366da0 100644
--- a/src/gc.h
+++ b/src/gc-stock.h
@@ -5,11 +5,11 @@
   . non-moving, precise mark and sweep collector
   . pool-allocates small objects, keeps big objects on a simple list
 */
-
 #ifndef JL_GC_H
 #define JL_GC_H
 
 #include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
@@ -17,28 +17,22 @@
 #include "julia.h"
 #include "julia_threads.h"
 #include "julia_internal.h"
-#include "threading.h"
-#ifndef _OS_WINDOWS_
-#include <sys/mman.h>
-#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
 #include "julia_assert.h"
-#include "gc-heap-snapshot.h"
-#include "gc-alloc-profiler.h"
+#include "threading.h"
+#include "gc-common.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifdef GC_SMALL_PAGE
+#define GC_PAGE_LG2 12 // log2(size of a page)
+#else
 #define GC_PAGE_LG2 14 // log2(size of a page)
-#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
+#endif
+#define GC_PAGE_SZ (1 << GC_PAGE_LG2)
 #define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
 
-#define jl_malloc_tag ((void*)0xdeadaa01)
-#define jl_singleton_tag ((void*)0xdeadaa02)
-
 // Used by GC_DEBUG_ENV
 typedef struct {
     uint64_t num;
@@ -57,33 +51,6 @@ typedef struct {
     jl_alloc_num_t print;
 } jl_gc_debug_env_t;
 
-// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
-typedef struct {
-    int64_t     allocd;
-    int64_t     deferred_alloc;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
-    uint64_t    total_time;
-    uint64_t    total_allocd;
-    size_t      interval;
-    int         pause;
-    int         full_sweep;
-    uint64_t    max_pause;
-    uint64_t    max_memory;
-    uint64_t    time_to_safepoint;
-    uint64_t    max_time_to_safepoint;
-    uint64_t    total_time_to_safepoint;
-    uint64_t    sweep_time;
-    uint64_t    mark_time;
-    uint64_t    total_sweep_time;
-    uint64_t    total_mark_time;
-    uint64_t    last_full_sweep;
-} jl_gc_num_t;
-
 // Array chunks (work items representing suffixes of
 // large arrays of pointers left to be marked)
 
@@ -112,38 +79,16 @@ typedef struct _jl_gc_chunk_t {
 #define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
 #define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
 
+#define GC_REMSET_PTR_TAG (0x1)             // lowest bit of `jl_value_t *` is tagged if it's in the remset
+
 // layout for big (>2k) objects
 
-JL_EXTENSION typedef struct _bigval_t {
-    struct _bigval_t *next;
-    struct _bigval_t **prev; // pointer to the next field of the prev entry
-    size_t sz;
-#ifdef _P64 // Add padding so that the value is 64-byte aligned
-    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
-    void *_padding[8 - 4];
-#else
-    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
-    void *_padding[16 - 4];
-#endif
-    //struct jl_taggedvalue_t <>;
-    union {
-        uintptr_t header;
-        struct {
-            uintptr_t gc:2;
-        } bits;
-    };
-    // must be 64-byte aligned here, in 32 & 64 bit modes
-} bigval_t;
-
-// data structure for tracking malloc'd arrays.
-
-typedef struct _mallocarray_t {
-    jl_array_t *a;
-    struct _mallocarray_t *next;
-} mallocarray_t;
+extern uintptr_t gc_bigval_sentinel_tag;
 
 // pool page metadata
 typedef struct _jl_gc_pagemeta_t {
+    // next metadata structure in per-thread list
+    // or in one of the `jl_gc_page_stack_t`
     struct _jl_gc_pagemeta_t *next;
     // index of pool that owns this page
     uint8_t pool_n;
@@ -178,56 +123,111 @@ typedef struct _jl_gc_pagemeta_t {
     char *data;
 } jl_gc_pagemeta_t;
 
-typedef struct {
-    _Atomic(jl_gc_pagemeta_t *) page_metadata_back;
-} jl_gc_global_page_pool_t;
+extern jl_gc_page_stack_t global_page_pool_lazily_freed;
+extern jl_gc_page_stack_t global_page_pool_clean;
+extern jl_gc_page_stack_t global_page_pool_freed;
 
-extern jl_gc_global_page_pool_t global_page_pool_lazily_freed;
-extern jl_gc_global_page_pool_t global_page_pool_clean;
-extern jl_gc_global_page_pool_t global_page_pool_freed;
+// Lock-free stack implementation taken
+// from Herlihy's "The Art of Multiprocessor Programming"
+// XXX: this is not a general-purpose lock-free stack. We can
+// get away with just using a CAS and not implementing some ABA
+// prevention mechanism since once a node is popped from the
+// `jl_gc_page_stack_t`, it may only be pushed back to them
+// in the sweeping phase, which also doesn't push a node into the
+// same stack after it's popped
 
-#define GC_BACKOFF_MIN 4
-#define GC_BACKOFF_MAX 12
+STATIC_INLINE void push_lf_back_nosync(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+    elt->next = old_back;
+    jl_atomic_store_relaxed(&pool->bottom, elt);
+}
 
-STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT
+STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back_nosync(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
 {
-    if (*i < GC_BACKOFF_MAX) {
-        (*i)++;
-    }
-    for (int j = 0; j < (1 << *i); j++) {
-        jl_cpu_pause();
+    jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+    if (old_back == NULL) {
+        return NULL;
     }
+    jl_atomic_store_relaxed(&pool->bottom, old_back->next);
+    return old_back;
 }
 
-// Lock-free stack implementation taken
-// from Herlihy's "The Art of Multiprocessor Programming"
-
-STATIC_INLINE void push_lf_page_metadata_back(jl_gc_global_page_pool_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+STATIC_INLINE void push_lf_back(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
 {
     while (1) {
-        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back);
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
         elt->next = old_back;
-        if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, elt)) {
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, elt)) {
             break;
         }
         jl_cpu_pause();
     }
 }
 
-STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool_t *pool) JL_NOTSAFEPOINT
+#define MAX_POP_ATTEMPTS (1 << 10)
+
+STATIC_INLINE jl_gc_pagemeta_t *try_pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < MAX_POP_ATTEMPTS; i++) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        if (old_back == NULL) {
+            return NULL;
+        }
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) {
+            return old_back;
+        }
+        jl_cpu_pause();
+    }
+    return NULL;
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
 {
     while (1) {
-        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back);
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
         if (old_back == NULL) {
             return NULL;
         }
-        if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, old_back->next)) {
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) {
             return old_back;
         }
         jl_cpu_pause();
     }
 }
+typedef struct {
+    jl_gc_page_stack_t stack;
+    // pad to 128 bytes to avoid false-sharing
+#ifdef _P64
+    void *_pad[15];
+#else
+    void *_pad[31];
+#endif
+} jl_gc_padded_page_stack_t;
+static_assert(sizeof(jl_gc_padded_page_stack_t) == 128, "jl_gc_padded_page_stack_t is not 128 bytes");
+
+typedef struct {
+    _Atomic(size_t) n_freed_objs;
+    _Atomic(size_t) n_pages_allocd;
+} gc_fragmentation_stat_t;
 
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#define REGION0_PG_COUNT (1 << 16)
+#define REGION1_PG_COUNT (1 << 18)
+#define REGION2_PG_COUNT (1 << 18)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0xFFFF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 28) & 0x3FFFF)
+#define REGION_INDEX(p)  (((uintptr_t)(p) >> 46) & 0x3FFFF)
+#else
+#define REGION0_PG_COUNT (1 << 10)
+#define REGION1_PG_COUNT (1 << 10)
+#define REGION2_PG_COUNT (1 << 0)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0x3FF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
+#define REGION_INDEX(p)  (0)
+#endif
+#else
 #ifdef _P64
 #define REGION0_PG_COUNT (1 << 16)
 #define REGION1_PG_COUNT (1 << 16)
@@ -243,6 +243,7 @@ STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool
 #define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
 #define REGION_INDEX(p)  (0)
 #endif
+#endif
 
 // define the representation of the levels of the page-table (0 to 2)
 typedef struct {
@@ -257,6 +258,13 @@ typedef struct {
     pagetable1_t *meta1[REGION2_PG_COUNT];
 } pagetable_t;
 
+typedef struct {
+    _Atomic(size_t) bytes_mapped;
+    _Atomic(size_t) bytes_resident;
+    _Atomic(size_t) heap_size;
+    _Atomic(size_t) heap_target;
+} gc_heapstatus_t;
+
 #define GC_PAGE_UNMAPPED        0
 #define GC_PAGE_ALLOCATED       1
 #define GC_PAGE_LAZILY_FREED    2
@@ -366,89 +374,146 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 }
 #endif
 
-extern jl_gc_num_t gc_num;
-extern bigval_t *big_objects_marked;
-extern arraylist_t finalizer_list_marked;
-extern arraylist_t to_finalize;
-extern int64_t lazy_freed_pages;
+extern bigval_t *oldest_generation_of_bigvals;
+extern int64_t buffered_pages;
 extern int gc_first_tid;
-extern int gc_n_threads;
-extern jl_ptls_t* gc_all_tls_states;
+extern gc_heapstatus_t gc_heap_stats;
 
-STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_first_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
 {
-    return container_of(o, bigval_t, header);
+    if (jl_n_markthreads == 0) {
+        return 0;
+    }
+    return gc_first_tid;
 }
 
-STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_last_parallel_collector_thread_id(void) JL_NOTSAFEPOINT
 {
-    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
+    if (jl_n_markthreads == 0) {
+        return -1;
+    }
+    return gc_first_tid + jl_n_markthreads - 1;
 }
 
-STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT
 {
-    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
+    assert(i >= 0 && i < jl_n_markthreads);
+    return gc_first_tid + i;
 }
 
-STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT
 {
-    return (bits & GC_MARKED) != 0;
+    return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id();
 }
 
-STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT
 {
-    return (bits & GC_OLD) != 0;
+    if (jl_n_sweepthreads == 0) {
+        return 0;
+    }
+    int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id();
+    int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1;
+    return tid == concurrent_collector_thread_id;
 }
 
-STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    return (tag & ~(uintptr_t)3) | bits;
+    assert(jl_n_markthreads > 0);
+    int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n)
+    assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id());
+    return v;
 }
 
-STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+STATIC_INLINE int gc_parallel_collector_threads_enabled(void) JL_NOTSAFEPOINT
 {
-    return ((uintptr_t)v) & mask;
+    return jl_n_markthreads > 0;
 }
 
-STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_check_ptls_of_parallel_collector_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    return (void*)(((uintptr_t)v) & ~mask);
+    (void)ptls;
+    assert(gc_parallel_collector_threads_enabled());
+    assert(ptls != NULL);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
 }
 
-NOINLINE uintptr_t gc_get_stack_ptr(void);
+STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
+{
+    return container_of(o, bigval_t, header);
+}
 
-STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT
+STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
 {
-    *hdr->prev = hdr->next;
-    if (hdr->next) {
-        hdr->next->prev = hdr->prev;
+    return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
+}
+
+STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
+{
+    return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
+}
+
+FORCE_INLINE void gc_big_object_unlink(const bigval_t *node) JL_NOTSAFEPOINT
+{
+    assert(node != oldest_generation_of_bigvals);
+    assert(node->header != gc_bigval_sentinel_tag);
+    assert(node->prev != NULL);
+    if (node->next != NULL) {
+        node->next->prev = node->prev;
+    }
+    node->prev->next = node->next;
+}
+
+FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL_NOTSAFEPOINT
+{
+    assert(sentinel_node != NULL);
+    assert(sentinel_node->header == gc_bigval_sentinel_tag);
+    assert(sentinel_node->prev == NULL);
+    assert(node->header != gc_bigval_sentinel_tag);
+    // a new node gets linked in at the head of the list
+    node->next = sentinel_node->next;
+    node->prev = sentinel_node;
+    if (sentinel_node->next != NULL) {
+        sentinel_node->next->prev = node;
     }
+    sentinel_node->next = node;
 }
 
-STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFEPOINT
+// Must be kept in sync with `base/timing.jl`
+#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
+#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
+#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2)
+#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3)
+#define FULL_SWEEP_NUM_REASONS (4)
+
+extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
+STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT
 {
-    hdr->next = *list;
-    hdr->prev = list;
-    if (*list)
-        (*list)->prev = &hdr->next;
-    *list = hdr;
+    assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
+    jl_full_sweep_reasons[reason]++;
 }
 
+extern uv_mutex_t gc_perm_lock;
 extern uv_mutex_t gc_threads_lock;
 extern uv_cond_t gc_threads_cond;
 extern uv_sem_t gc_sweep_assists_needed;
 extern _Atomic(int) gc_n_threads_marking;
+extern _Atomic(int) gc_n_threads_sweeping_pools;
+extern _Atomic(int) n_threads_running;
+extern uv_barrier_t thread_init_done;
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
-void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
+void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
 void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
 void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
 void gc_mark_loop_serial(jl_ptls_t ptls);
 void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
-void sweep_stack_pools(void);
+void gc_sweep_pool_parallel(jl_ptls_t ptls);
+void gc_free_pages(void);
+void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT;
 void jl_gc_debug_init(void);
 
 // GC pages
 
+extern uv_mutex_t gc_pages_lock;
 void jl_gc_init_page(void) JL_NOTSAFEPOINT;
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
 void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
@@ -481,9 +546,9 @@ void gc_time_big_start(void) JL_NOTSAFEPOINT;
 void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
 void gc_time_big_end(void) JL_NOTSAFEPOINT;
 
-void gc_time_mallocd_array_start(void) JL_NOTSAFEPOINT;
-void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT;
-void gc_time_mallocd_array_end(void) JL_NOTSAFEPOINT;
+void gc_time_mallocd_memory_start(void) JL_NOTSAFEPOINT;
+void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT;
+void gc_time_mallocd_memory_end(void) JL_NOTSAFEPOINT;
 
 void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
                         int64_t perm_scanned_bytes);
@@ -494,6 +559,13 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
                      uint64_t freed, uint64_t live, uint64_t interval,
                      uint64_t pause, uint64_t ttsp, uint64_t mark,
                      uint64_t sweep);
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap);
 #else
 #define gc_time_pool_start()
 STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
@@ -510,17 +582,24 @@ STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
     (void)bits;
 }
 #define gc_time_big_end()
-#define gc_time_mallocd_array_start()
-STATIC_INLINE void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT
+#define gc_time_mallocd_memory_start()
+STATIC_INLINE void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT
 {
     (void)bits;
 }
-#define gc_time_mallocd_array_end()
+#define gc_time_mallocd_memory_end()
 #define gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes)
 #define gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,        \
                             estimate_freed, sweep_full)
 #define  gc_time_summary(sweep_full, start, end, freed, live,           \
                          interval, pause, ttsp, mark, sweep)
+#define gc_heuristics_summary( \
+        old_alloc_diff, alloc_mem, \
+        old_mut_time, alloc_time, \
+        old_freed_diff, gc_mem, \
+        old_pause_time, gc_time, \
+        thrash_counter, reason, \
+        current_heap, target_heap)
 #endif
 
 #ifdef MEMFENCE
@@ -605,24 +684,6 @@ static inline void gc_scrub(void)
 }
 #endif
 
-#ifdef OBJPROFILE
-void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT;
-void objprofile_printall(void);
-void objprofile_reset(void);
-#else
-static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT
-{
-}
-
-static inline void objprofile_printall(void)
-{
-}
-
-static inline void objprofile_reset(void)
-{
-}
-#endif
-
 #ifdef MEMPROFILE
 void gc_stats_all_pool(void);
 void gc_stats_big_obj(void);
@@ -634,10 +695,10 @@ void gc_stats_big_obj(void);
 // For debugging
 void gc_count_pool(void);
 
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT;
-
 JL_DLLEXPORT void jl_enable_gc_logging(int enable);
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_is_gc_logging_enabled(void);
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void);
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h
new file mode 100644
index 0000000000000..473668d648294
--- /dev/null
+++ b/src/gc-tls-common.h
@@ -0,0 +1,51 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_COMMON_H
+#define JL_GC_TLS_COMMON_H
+
+#include "julia_atomics.h"
+
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    // variable for tracking weak references
+    small_arraylist_t weak_refs;
+    // live tasks started on this thread
+    // that are holding onto a stack from the pool
+    small_arraylist_t live_tasks;
+
+    // variable for tracking malloc'd arrays
+    small_arraylist_t mallocarrays;
+
+#define JL_N_STACK_POOLS 16
+    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
+} jl_thread_heap_common_t;
+
+typedef struct {
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) pool_live_bytes;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
+} jl_thread_gc_num_common_t;
+
+typedef struct {
+    jl_thread_heap_common_t heap;
+    jl_thread_gc_num_common_t gc_num;
+} jl_gc_tls_states_common_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_COMMON_H
diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h
new file mode 100644
index 0000000000000..5b69aef5d55fb
--- /dev/null
+++ b/src/gc-tls-mmtk.h
@@ -0,0 +1,23 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_TLS_H
+#define JL_GC_TLS_H
+
+#include <assert.h>
+#include "mmtkMutator.h"
+#include "julia_atomics.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    MMTkMutatorContext mmtk_mutator;
+    _Atomic(size_t) malloc_sz_since_last_poll;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_H
diff --git a/src/gc-tls-stock.h b/src/gc-tls-stock.h
new file mode 100644
index 0000000000000..d82506383c501
--- /dev/null
+++ b/src/gc-tls-stock.h
@@ -0,0 +1,68 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_H
+#define JL_GC_TLS_H
+
+#include "julia_atomics.h"
+#include "work-stealing-queue.h"
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    struct _jl_taggedvalue_t *freelist; // root of list of free objects
+    struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
+    uint16_t osize; // size of objects in this pool
+} jl_gc_pool_t;
+
+typedef struct {
+    // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects
+    struct _bigval_t *young_generation_of_bigvals;
+
+    // lower bound of the number of pointers inside remembered values
+    int remset_nptr;
+    // remembered set
+    arraylist_t remset;
+
+    // variables for allocating objects from pools
+#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
+    jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
+} jl_thread_heap_t;
+
+typedef struct {
+    ws_queue_t chunk_queue;
+    ws_queue_t ptr_queue;
+    arraylist_t reclaim_set;
+} jl_gc_markqueue_t;
+
+typedef struct {
+    // thread local increment of `perm_scanned_bytes`
+    size_t perm_scanned_bytes;
+    // thread local increment of `scanned_bytes`
+    size_t scanned_bytes;
+} jl_gc_mark_cache_t;
+
+typedef struct {
+    _Atomic(struct _jl_gc_pagemeta_t *) bottom;
+} jl_gc_page_stack_t;
+
+typedef struct {
+    jl_thread_heap_t heap;
+    jl_gc_page_stack_t page_metadata_allocd;
+    jl_gc_markqueue_t mark_queue;
+    jl_gc_mark_cache_t gc_cache;
+    _Atomic(size_t) gc_sweeps_requested;
+    _Atomic(size_t) gc_stack_sweep_requested;
+    arraylist_t sweep_objs;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_H
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 8f03cc1560767..a91f2f994194c 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -15,12 +15,6 @@ function _eachmethod(f, m::Module, visited, vmt)
             x = getfield(m, nm)
             if isa(x, Module) && !in(x, visited)
                 _eachmethod(f, x, visited, vmt)
-            elseif isa(x, Function)
-                mt = typeof(x).name.mt
-                if !in(mt, vmt)
-                    push!(vmt, mt)
-                    Base.visit(f, mt)
-                end
             elseif isa(x, Type)
                 x = Base.unwrap_unionall(x)
                 if isa(x, DataType) && isdefined(x.name, :mt)
@@ -69,5 +63,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 94)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 94), 254)), "common_symbols2.inc", "w")
diff --git a/src/genericmemory.c b/src/genericmemory.c
new file mode 100644
index 0000000000000..e435ec3b63c9f
--- /dev/null
+++ b/src/genericmemory.c
@@ -0,0 +1,634 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  GenericMemory{kind, T} constructors and primitives
+*/
+#include <stdlib.h>
+#include <string.h>
+#ifdef _OS_WINDOWS_
+#include <malloc.h>
+#endif
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// genericmemory constructors ---------------------------------------------------------
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    assert(layout->flags.arrayelem_isunion);
+    return (char*)m->ptr + m->length * layout->size;
+}
+
+#define MAXINTVAL (((size_t)-1)>>1)
+
+// ONLY USE FROM CODEGEN. It only partially initializes the mem
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype)
+{
+    size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
+
+    int pooled = tot <= GC_MAX_SZCLASS;
+    char *data;
+    jl_genericmemory_t *m;
+    if (!pooled) {
+        data = (char*)jl_gc_managed_malloc(nbytes);
+        tot = sizeof(jl_genericmemory_t) + sizeof(void*);
+    }
+    m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype);
+    if (pooled) {
+        data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
+    }
+    else {
+        int isaligned = 1; // jl_gc_managed_malloc is always aligned
+        jl_gc_track_malloced_genericmemory(ptls, m, isaligned);
+        jl_genericmemory_data_owner_field(m) = (jl_value_t*)m;
+    }
+    // length set by codegen
+    m->ptr = data;
+    return m;
+}
+
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
+{
+    if (nel == 0) // zero-sized allocation optimization
+        return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    size_t nbytes;
+    int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
+    }
+    if ((nel >= MAXINTVAL-1) || (nbytes >= MAXINTVAL-1) || overflow)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemory_t *m = jl_alloc_genericmemory_unchecked((jl_ptls_t) ct->ptls, nbytes, (jl_datatype_t*)mtype);
+    m->length = nel;
+    if (zeroinit)
+        memset((char*)m->ptr, 0, nbytes);
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel)
+{
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        jl_value_t *kind = jl_tparam0((jl_datatype_t*)mtype);
+        if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic or :atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort(); // this is checked already by jl_get_genericmemory_layout
+    }
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    if (nel == 0) // zero-sized allocation optimization fast path
+        return m;
+
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)mtype)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+    return _new_genericmemory_(mtype, nel, isunion, zi, elsz);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str)
+{
+    if (jl_string_len(str) == 0)
+        return (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance;
+    jl_task_t *ct = jl_current_task;
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type);
+    m->length = jl_string_len(str);
+    m->ptr = jl_string_data(str);
+    jl_genericmemory_data_owner_field(m) = str;
+    return m;
+}
+
+// own_buffer != 0 iff GC should call free() on this pointer eventually
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                                         size_t nel, int own_buffer)
+{
+    jl_task_t *ct = jl_current_task;
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        jl_value_t *kind = jl_tparam0((jl_datatype_t*)mtype);
+        if (kind != (jl_value_t*)jl_not_atomic_sym && kind != (jl_value_t*)jl_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic or :atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort();
+    }
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    //if (nel == 0) {// zero-sized allocation optimization fast path
+    //    if (own_buffer)
+    //        free(data);
+    //    return m;
+    //}
+
+    size_t elsz = layout->size;
+    size_t align = layout->alignment;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    if (isboxed)
+        elsz = sizeof(void*);
+    if (isunion)
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: unspecified layout for union element type");
+    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
+    size_t nbytes;
+    int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
+    }
+    if ((nel >= MAXINTVAL) || (nbytes >= MAXINTVAL) || overflow)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype);
+    m->ptr = data;
+    m->length = nel;
+    jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL;
+    if (own_buffer) {
+        int isaligned = 0;  // TODO: allow passing memalign'd buffers
+        jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
+        size_t allocated_bytes = memory_block_usable_size(data, isaligned);
+        jl_gc_count_allocd(allocated_bytes);
+    }
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *nel)
+{
+    return jl_alloc_genericmemory(mtype, jl_unbox_long(nel));
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_genericmemory(const char *str, size_t len)
+{
+    jl_genericmemory_t *m = jl_alloc_genericmemory(jl_memory_uint8_type, len);
+    memcpy(m->ptr, str, len);
+    return m;
+}
+
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len)
+{
+    assert(len <= m->length);
+    if (len == 0) {
+        // this may seem like purely an optimization (which it also is), but it
+        // also ensures that calling `String(m)` doesn't corrupt a previous
+        // string also created the same way, where `m = StringVector(_)`.
+        return jl_an_empty_string;
+    }
+    int how = jl_genericmemory_how(m);
+    size_t mlength = m->length;
+    m->length = 0;
+    if (how != 0) {
+        jl_value_t *o = jl_genericmemory_data_owner_field(m);
+        jl_genericmemory_data_owner_field(m) = NULL;
+        if (how == 3 && // implies jl_is_string(o)
+             ((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) {
+            if (jl_string_data(o)[len] != '\0')
+                jl_string_data(o)[len] = '\0';
+            if (*(size_t*)o != len)
+                *(size_t*)o = len;
+            return o;
+        }
+        JL_GC_PUSH1(&o);
+        jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
+        JL_GC_POP();
+        return str;
+    }
+    // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object
+    return jl_pchar_to_string((const char*)m->ptr, len);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n)
+{
+    return jl_alloc_genericmemory(jl_memory_any_type, n);
+}
+
+JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
+                                          jl_genericmemory_t *src, char* srcdata,
+                                          size_t n) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
+    if (dt != (jl_datatype_t*)jl_typetagof(src))
+        jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
+    const jl_datatype_layout_t *layout = dt->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
+        _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
+            jl_value_t *src_owner = jl_genericmemory_owner(src);
+            ssize_t done = 0;
+            if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
+                if (dest_p < src_p || dest_p > src_p + n) {
+                    for (; done < n; done++) { // copy forwards
+                        void *val = jl_atomic_load_relaxed(src_p + done);
+                        jl_atomic_store_release(dest_p + done, val);
+                        // `val` is young or old-unmarked
+                        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
+                            jl_gc_queue_root(owner);
+                            break;
+                        }
+                    }
+                    src_p += done;
+                    dest_p += done;
+                } else {
+                    for (; done < n; done++) { // copy backwards
+                        void *val = jl_atomic_load_relaxed(src_p + n - done - 1);
+                        jl_atomic_store_release(dest_p + n - done - 1, val);
+                        // `val` is young or old-unmarked
+                        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
+                            jl_gc_queue_root(owner);
+                            break;
+                        }
+                    }
+                }
+                n -= done;
+            }
+        }
+        return memmove_refs(dest_p, src_p, n);
+    }
+    size_t elsz = layout->size;
+    char *src_p = srcdata;
+    int isbitsunion = layout->flags.arrayelem_isunion;
+    if (isbitsunion) {
+        char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
+        char *desttypetagdata = jl_genericmemory_typetagdata(dest);
+        memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
+        srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
+        destdata = (char*)dest->ptr + elsz*(size_t)destdata;
+    }
+    if (layout->first_ptr != -1) {
+        memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
+            jl_value_t *src_owner = jl_genericmemory_owner(src);
+            if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
+                dt = (jl_datatype_t*)jl_tparam1(dt);
+                for (size_t done = 0; done < n; done++) { // copy forwards
+                    char* s = (char*)src_p+done*elsz;
+                    if (*((jl_value_t**)s+layout->first_ptr) != NULL)
+                        jl_gc_queue_multiroot(owner, s, dt);
+                }
+            }
+        }
+    }
+    else {
+        memmove(destdata, srcdata, n * elsz);
+    }
+}
+
+
+// genericmemory primitives -----------------------------------------------------------
+
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *mem, size_t i)
+{
+    int isatomic = (jl_tparam0(jl_typetagof(mem)) == (jl_value_t*)jl_atomic_sym);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+    jl_genericmemoryref_t m;
+    m.mem = mem;
+    m.ptr_or_offset = (layout->flags.arrayelem_isunion || layout->size == 0) ? (void*)i : (void*)((char*)mem->ptr + layout->size * i);
+    return jl_memoryrefget(m, isatomic);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    size_t elsz = layout->size;
+    int isunion = layout->flags.arrayelem_isunion;
+    jl_genericmemory_t *new_mem = _new_genericmemory_(mtype, len, isunion, 0, elsz);
+    if (isunion) {
+        memcpy(new_mem->ptr, (char*)mem->ptr + (size_t)data * elsz, len * elsz);
+        memcpy(jl_genericmemory_typetagdata(new_mem), jl_genericmemory_typetagdata(mem) + (size_t)data, len);
+    }
+    else if (layout->first_ptr != -1) {
+        memmove_refs((_Atomic(void*)*)new_mem->ptr, (_Atomic(void*)*)data, len * elsz / sizeof(void*));
+    }
+    else if (data != NULL) {
+        memcpy(new_mem->ptr, data, len * elsz);
+    }
+    return new_mem;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy(jl_genericmemory_t *mem)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    return jl_genericmemory_copy_slice(mem, layout->flags.arrayelem_isunion || layout->size == 0 ? (void*)0 : mem->ptr, mem->length);
+}
+
+JL_DLLEXPORT jl_value_t *(jl_genericmemory_data_owner)(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_data_owner_field(m);
+}
+
+jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemoryref_t), typ);
+    m->mem = mem;
+    m->ptr_or_offset = data;
+    return m;
+}
+
+// memoryref primitives
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, size_t idx)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if ((layout->flags.arrayelem_isboxed || !layout->flags.arrayelem_isunion) && layout->size != 0) {
+        m.ptr_or_offset = (void*)((char*)m.ptr_or_offset + idx * layout->size);
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < layout->size * m.mem->length);
+    }
+    else {
+        m.ptr_or_offset = (void*)((size_t)m.ptr_or_offset + idx);
+        assert((size_t)m.ptr_or_offset < m.mem->length);
+    }
+    return m;
+}
+
+static jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, int isatomic) JL_NOTSAFEPOINT
+{
+    assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+    assert(((jl_datatype_t*)jl_typetagof(m.mem))->layout->flags.arrayelem_isboxed);
+    _Atomic(jl_value_t*) *ptr = (_Atomic(jl_value_t*)*)m.ptr_or_offset;
+    jl_value_t *elt = isatomic ? jl_atomic_load(ptr) : jl_atomic_load_relaxed(ptr);
+    if (elt == NULL)
+        jl_throw(jl_undefref_exception);
+    return elt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic)
+{
+    assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed)
+        return jl_ptrmemrefget(m, isatomic);
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        // isbits union selector bytes are always stored directly after the last memory element
+        uint8_t sel = jl_genericmemory_typetagdata(m.mem)[i];
+        eltype = jl_nth_union_component(eltype, sel);
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    if (layout->size == 0) {
+        assert(jl_is_datatype_singleton((jl_datatype_t*)eltype));
+        return ((jl_datatype_t*)eltype)->instance;
+    }
+    assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+    jl_value_t *r;
+    size_t fsz = jl_datatype_size(eltype);
+    int needlock = isatomic && fsz > MAX_ATOMIC_SIZE;
+    if (isatomic && !needlock) {
+        r = jl_atomic_new_bits(eltype, data);
+    }
+    else if (needlock) {
+        jl_task_t *ct = jl_current_task;
+        r = jl_gc_alloc(ct->ptls, fsz, eltype);
+        jl_lock_field((jl_mutex_t*)data);
+        memcpy((char*)r, data + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT), fsz);
+        jl_unlock_field((jl_mutex_t*)data);
+    }
+    else {
+        // TODO: a finalizer here could make the isunion case not quite right
+        r = jl_new_bits(eltype, data);
+    }
+    r = undefref_check((jl_datatype_t*)eltype, r);
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+static int _jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    _Atomic(jl_value_t*) *elem = (_Atomic(jl_value_t*)*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+    }
+    else if (layout->first_ptr >= 0) {
+        int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
+        if (needlock)
+            elem = elem + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT) / sizeof(jl_value_t*);
+        elem = &elem[layout->first_ptr];
+    }
+    else {
+        return 1;
+    }
+    return (isatomic ? jl_atomic_load(elem) : jl_atomic_load_relaxed(elem)) != NULL;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic)
+{
+    return _jl_memoryref_isassigned(m, isatomic) ? jl_true : jl_false;
+}
+
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic)
+{
+    assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym));
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        JL_GC_PUSH1(&rhs);
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefset!", eltype, rhs);
+        JL_GC_POP();
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        if (isatomic)
+            jl_atomic_store((_Atomic(jl_value_t*)*)m.ptr_or_offset, rhs);
+        else
+            jl_atomic_store_release((_Atomic(jl_value_t*)*)m.ptr_or_offset, rhs);
+        jl_gc_wb(jl_genericmemory_owner(m.mem), rhs);
+        return;
+    }
+    int hasptr;
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        uint8_t *psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        unsigned nth = 0;
+        if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
+            assert(0 && "invalid genericmemoryset to isbits union");
+        *psel = nth;
+        hasptr = 0;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    else {
+        hasptr = layout->first_ptr >= 0;
+    }
+    if (layout->size != 0) {
+        assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+        int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE;
+        size_t fsz = jl_datatype_size((jl_datatype_t*)jl_typeof(rhs)); // need to shrink-wrap the final copy
+        if (isatomic && !needlock) {
+            jl_atomic_store_bits(data, rhs, fsz);
+        }
+        else if (needlock) {
+            jl_lock_field((jl_mutex_t*)data);
+            memassign_safe(hasptr, data + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT), rhs, fsz);
+            jl_unlock_field((jl_mutex_t*)data);
+        }
+        else {
+            memassign_safe(hasptr, data, rhs, fsz);
+        }
+        if (hasptr)
+            jl_gc_multi_wb(jl_genericmemory_owner(m.mem), rhs); // rhs is immutable
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefswap(jl_genericmemoryref_t m, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefswap!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        jl_value_t *r;
+        if (isatomic)
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)data, rhs);
+        else
+            r = jl_atomic_exchange_release((_Atomic(jl_value_t*)*)data, rhs);
+        jl_gc_wb(owner, rhs);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+        return r;
+    }
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    return swap_bits(eltype, data, psel, owner, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefmodify(jl_genericmemoryref_t m, jl_value_t *op, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        return modify_value(eltype, (_Atomic(jl_value_t*)*)data, owner, op, rhs, isatomic, NULL, NULL);
+    }
+    size_t fsz = layout->size;
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * fsz;
+    }
+    return modify_bits(eltype, data, psel, owner, op, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefreplace(jl_genericmemoryref_t m, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefreplace!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        return replace_value(eltype, (_Atomic(jl_value_t*)*)data, owner, expected, rhs, isatomic, NULL, NULL);
+    }
+    uint8_t *psel = NULL;
+    if (layout->flags.arrayelem_isunion) {
+        assert(!isatomic);
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    return replace_bits(eltype, data, psel, owner, expected, rhs, isatomic ? isatomic_field : isatomic_none);
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefsetonce(jl_genericmemoryref_t m, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefsetonce!", eltype, rhs);
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    jl_value_t *owner = jl_genericmemory_owner(m.mem);
+    char *data = (char*)m.ptr_or_offset;
+    int success;
+    if (layout->flags.arrayelem_isboxed) {
+        assert(data - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        jl_value_t *r = NULL;
+        _Atomic(jl_value_t*) *px = (_Atomic(jl_value_t*)*)data;
+        success = isatomic ? jl_atomic_cmpswap(px, &r, rhs) : jl_atomic_cmpswap_release(px, &r, rhs);
+        if (success)
+            jl_gc_wb(owner, rhs);
+    }
+    else {
+        if (layout->flags.arrayelem_isunion) {
+            assert(!isatomic);
+            assert(jl_is_uniontype(eltype));
+            size_t i = (size_t)data;
+            assert(i < m.mem->length);
+            (void)i;
+            success = 0;
+        }
+        else if (layout->first_ptr < 0) {
+            success = 0;
+        }
+        else {
+            success = setonce_bits((jl_datatype_t*)eltype, data, owner, rhs, isatomic ? isatomic_field : isatomic_none);
+        }
+    }
+    return success ? jl_true : jl_false;
+}
+
+JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(m);
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gf.c b/src/gf.c
index 294e1fccb8783..ba28edfbeeff7 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -24,7 +24,9 @@
 extern "C" {
 #endif
 
+_Atomic(int) allow_new_worlds = 1;
 JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
+jl_mutex_t world_counter_lock;
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
@@ -94,7 +96,7 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
     JL_CATCH {
         ct->ptls->in_pure_callback = last_in;
         jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
     }
@@ -110,17 +112,19 @@ static int8_t jl_cachearg_offset(jl_methtable_t *mt)
 /// ----- Insertion logic for special entries ----- ///
 
 
-static uint_t speccache_hash(size_t idx, jl_svec_t *data)
+static uint_t speccache_hash(size_t idx, jl_value_t *data)
 {
-    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
+    jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_value_t *sig = ml->specTypes;
     if (jl_is_unionall(sig))
         sig = jl_unwrap_unionall(sig);
     return ((jl_datatype_t*)sig)->hash;
 }
 
-static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
+static int speccache_eq(size_t idx, const void *ty, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
     jl_value_t *sig = ml->specTypes;
     if (ty == sig)
@@ -134,12 +138,12 @@ static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
 // get or create the MethodInstance for a specialization
 static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams, jl_method_instance_t *mi_insert)
 {
-    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method)
+    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method && !m->is_for_opaque_closure)
         return jl_atomic_load_relaxed(&m->unspecialized); // handle builtin methods
     jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
     JL_TYPECHK(specializations, datatype, ut);
     uint_t hv = ((jl_datatype_t*)ut)->hash;
-    jl_array_t *speckeyset = NULL;
+    jl_genericmemory_t *speckeyset = NULL;
     jl_value_t *specializations = NULL;
     size_t i = -1, cl = 0, lastcl;
     for (int locked = 0; locked < 2; locked++) {
@@ -164,7 +168,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         }
         cl = jl_svec_len(specializations);
         if (hv) {
-            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, (jl_svec_t*)specializations, hv);
+            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv, 0);
             if (idx != -1) {
                 jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, idx);
                 if (locked)
@@ -210,7 +214,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
             jl_atomic_store_release(&m->specializations, specializations);
             jl_gc_wb(m, specializations);
             if (hv)
-                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, (jl_svec_t*)specializations);
+                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, specializations);
         }
         if (hv) {
             _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
@@ -242,7 +246,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         assert(jl_svecref(specializations, i) == jl_nothing);
         jl_svecset(specializations, i, mi);
         if (hv)
-            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, (jl_svec_t*)specializations);
+            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
         JL_GC_POP();
     }
     JL_UNLOCK(&m->writelock); // may gc
@@ -275,7 +279,7 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
     // TODO: this is sort of an odd lookup strategy (and the only user of
     // jl_typemap_assoc_by_type with subtype=0), while normally jl_gf_invoke_lookup would be
     // expected to be used instead
-    struct jl_typemap_assoc search = {type, world, NULL, 0, ~(size_t)0};
+    struct jl_typemap_assoc search = {type, world, NULL};
     jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, jl_cachearg_offset(mt), /*subtype*/0);
     if (!sf)
         return jl_nothing;
@@ -284,13 +288,6 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
 
 // ----- MethodInstance specialization instantiation ----- //
 
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED
 {
     jl_sym_t *sname = jl_symbol(name);
@@ -305,6 +302,8 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     m->module = jl_core_module;
     m->isva = 1;
     m->nargs = 2;
+    jl_atomic_store_relaxed(&m->primary_world, 1);
+    jl_atomic_store_relaxed(&m->deleted_world, ~(size_t)0);
     m->sig = (jl_value_t*)jl_anytuple_type;
     m->slot_syms = jl_an_empty_string;
     m->nospecialize = 0;
@@ -315,16 +314,16 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     JL_GC_PUSH2(&m, &newentry);
 
     newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
-            (jl_value_t*)m, 1, ~(size_t)0);
+            (jl_value_t*)m, jl_atomic_load_relaxed(&m->primary_world), jl_atomic_load_relaxed(&m->deleted_world));
     jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
 
     jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)jl_anytuple_type, jl_emptysvec);
     jl_atomic_store_relaxed(&m->unspecialized, mi);
     jl_gc_wb(m, mi);
 
-    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-        (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
-        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
+        0, 1, ~(size_t)0, 0, jl_nothing, NULL, NULL);
     jl_mi_cache_insert(mi, codeinst);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr1, fptr);
     jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_args);
@@ -338,14 +337,71 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     return dt;
 }
 
+// only relevant for bootstrapping. otherwise fairly broken.
+static int emit_codeinst_and_edges(jl_code_instance_t *codeinst)
+{
+    jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
+    if (code) {
+        if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL)
+            return 1;
+        if (code != jl_nothing) {
+            JL_GC_PUSH1(&code);
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            jl_method_t *def = mi->def.method;
+            if (jl_is_string(code) && jl_is_method(def))
+                code = (jl_value_t*)jl_uncompress_ir(def, codeinst, (jl_value_t*)code);
+            if (jl_is_code_info(code)) {
+                jl_emit_codeinst_to_jit(codeinst, (jl_code_info_t*)code);
+                if (0) {
+                    // next emit all the invoke edges too (if this seems profitable)
+                    jl_array_t *src = ((jl_code_info_t*)code)->code;
+                    for (size_t i = 0; i < jl_array_dim0(src); i++) {
+                        jl_value_t *stmt = jl_array_ptr_ref(src, i);
+                        if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_assign_sym)
+                            stmt = jl_exprarg(stmt, 1);
+                        if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_invoke_sym) {
+                            jl_value_t *invoke = jl_exprarg(stmt, 0);
+                            if (jl_is_code_instance(invoke))
+                                emit_codeinst_and_edges((jl_code_instance_t*)invoke);
+                        }
+                    }
+                }
+                JL_GC_POP();
+                return 1;
+            }
+            JL_GC_POP();
+        }
+    }
+    return 0;
+}
+
+// Opportunistic SOURCE_MODE_ABI cache lookup, only for bootstrapping.
+static jl_code_instance_t *jl_method_inferred_with_abi(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != jl_nothing)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            if (emit_codeinst_and_edges(codeinst))
+                return codeinst;
+        }
+    }
+    return NULL;
+}
+
 // run type inference on lambda "mi" for given argument types.
 // returns the inferred source, and may cache the result in mi
 // if successful, also updates the mi argument to describe the validity of this src
 // if inference doesn't occur (or can't finish), returns NULL instead
-jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
+jl_code_instance_t *jl_type_infer(jl_method_instance_t *mi, size_t world, uint8_t source_mode)
 {
-    if (jl_typeinf_func == NULL)
-        return NULL;
+    if (jl_typeinf_func == NULL) {
+        if (source_mode == SOURCE_MODE_ABI)
+            return jl_method_inferred_with_abi(mi, world);
+        else
+            return NULL;
+    }
     jl_task_t *ct = jl_current_task;
     if (ct->reentrant_timing & 0b1000) {
         // We must avoid attempting to re-enter inference here
@@ -356,16 +412,21 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     if ((ct->reentrant_timing & 0b1111) >= 0b110)
         return NULL;
 
-    jl_code_info_t *src = NULL;
+    jl_code_instance_t *ci = NULL;
 #ifdef ENABLE_INFERENCE
-    if (mi->inInference && !force)
+    if (jl_engine_hasreserved(mi, jl_nothing)) // don't recur on a thread on the same MethodInstance--force it to interpret it until the inference has finished
         return NULL;
     JL_TIMING(INFERENCE, INFERENCE);
     jl_value_t **fargs;
-    JL_GC_PUSHARGS(fargs, 3);
+    JL_GC_PUSHARGS(fargs, 4);
     fargs[0] = (jl_value_t*)jl_typeinf_func;
     fargs[1] = (jl_value_t*)mi;
     fargs[2] = jl_box_ulong(world);
+    fargs[3] = jl_box_uint8(source_mode);
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
 
     jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
 #ifdef TRACE_INFERENCE
@@ -375,13 +436,10 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jl_printf(JL_STDERR, "\n");
     }
 #endif
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
-    mi->inInference = 1;
     // first bit is for reentrant timing,
     // so adding 1 to the bit above performs
     // inference reentrancy counter addition.
@@ -391,39 +449,81 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     // allocate another bit for the counter.
     ct->reentrant_timing += 0b10;
     JL_TRY {
-        src = (jl_code_info_t*)jl_apply(fargs, 3);
+        ci = (jl_code_instance_t*)jl_apply(fargs, 4);
     }
     JL_CATCH {
-        jl_value_t *e = jl_current_exception();
+        jl_value_t *e = jl_current_exception(ct);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: during type inference of\n");
+        jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\nEncountered ");
         if (e == jl_stackovf_exception) {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: stack overflow in type inference of ");
-            jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
-            jl_printf((JL_STREAM*)STDERR_FILENO, ".\n");
+            jl_printf((JL_STREAM*)STDERR_FILENO, "stack overflow.\n");
             jl_printf((JL_STREAM*)STDERR_FILENO, "This might be caused by recursion over very long tuples or argument lists.\n");
         }
         else {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error in runtime:\n");
+            jl_printf((JL_STREAM*)STDERR_FILENO, "unexpected error in runtime:\n");
             jl_static_show((JL_STREAM*)STDERR_FILENO, e);
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
             jlbacktrace(); // written to STDERR_FILENO
         }
-        src = NULL;
+        ci = NULL;
+#ifndef JL_NDEBUG
+        abort();
+#endif
     }
     ct->world_age = last_age;
     ct->reentrant_timing -= 0b10;
-    mi->inInference = 0;
+    ct->ptls->in_pure_callback = last_pure;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
     errno = last_errno;
 
-    if (src && !jl_is_code_info(src)) {
-        src = NULL;
+    if (ci && !jl_is_code_instance(ci)) {
+        ci = NULL;
     }
     JL_GC_POP();
 #endif
 
-    return src;
+    return ci;
+}
+
+// Attempt to run `Core.Compiler.code_typed` on the lambda "mi"
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_code_info_t *ci = NULL;
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 4);
+    jl_module_t *CC = (jl_module_t*)jl_get_global(jl_core_module, jl_symbol("Compiler"));
+    if (CC != NULL && jl_is_module(CC)) {
+        fargs[0] = jl_get_global(CC, jl_symbol("NativeInterpreter"));;
+        fargs[1] = jl_box_ulong(world);
+        fargs[1] = jl_apply(fargs, 2);
+        fargs[0] = jl_get_global(CC, jl_symbol("typeinf_code"));
+        fargs[2] = (jl_value_t*)mi;
+        fargs[3] = jl_true;
+        ci = (jl_code_info_t*)jl_apply(fargs, 4);
+    }
+    ct->world_age = last_age;
+    ct->ptls->in_pure_callback = last_pure;
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    if (ci && !jl_is_code_info(ci)) {
+        ci = NULL;
+    }
+    JL_GC_POP();
+    return ci;
 }
 
 JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
@@ -436,63 +536,103 @@ JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
     return ret;
 }
 
-JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
+        size_t min_world, size_t max_world, jl_debuginfo_t *di, jl_svec_t *edges)
 {
+    jl_value_t *owner = jl_nothing; // TODO: owner should be arg
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
-        if (codeinst->min_world <= min_world && max_world <= codeinst->max_world) {
-            jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
-            if (code && (code == jl_nothing || jl_ir_flag_inferred(code)))
-                return (jl_value_t*)codeinst;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) == min_world &&
+            jl_atomic_load_relaxed(&codeinst->max_world) == max_world &&
+            jl_egal(codeinst->owner, owner) &&
+            jl_egal(codeinst->rettype, rettype)) {
+            if (di == NULL)
+                return codeinst;
+            jl_debuginfo_t *debuginfo = jl_atomic_load_relaxed(&codeinst->debuginfo);
+            if (di != debuginfo) {
+                if (!(debuginfo == NULL && jl_atomic_cmpswap_relaxed(&codeinst->debuginfo, &debuginfo, di)))
+                    if (!(debuginfo && jl_egal((jl_value_t*)debuginfo, (jl_value_t*)di)))
+                        continue;
+            }
+            // TODO: this is implied by the matching worlds, since it is intrinsic, so do we really need to verify it?
+            jl_svec_t *e = jl_atomic_load_relaxed(&codeinst->edges);
+            if (e && jl_egal((jl_value_t*)e, (jl_value_t*)edges))
+                return codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    return (jl_value_t*)jl_nothing;
+    codeinst = jl_new_codeinst(
+        mi, owner, rettype, (jl_value_t*)jl_any_type, NULL, NULL,
+        0, min_world, max_world, 0, jl_nothing, di, edges);
+    jl_mi_cache_insert(mi, codeinst);
+    return codeinst;
 }
-JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred;
 
+JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi,
+                                    jl_code_instance_t *ci)
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst) {
+        if (codeinst == ci)
+            return 1;
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    return 0;
+}
 
-JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
-        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world)
+// look for something with an egal ABI and properties that is already in the JIT (compiled=true) or simply in the cache (compiled=false)
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT
 {
+    jl_value_t *def = ci->def;
+    jl_method_instance_t *mi = jl_get_ci_mi(ci);
+    jl_value_t *owner = ci->owner;
+    jl_value_t *rettype = ci->rettype;
+    size_t min_world = jl_atomic_load_relaxed(&ci->min_world);
+    size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
-        if (codeinst->min_world == min_world &&
-            codeinst->max_world == max_world &&
+        if (codeinst != ci &&
+            jl_atomic_load_relaxed(&codeinst->inferred) != NULL &&
+            (!compiled || jl_atomic_load_relaxed(&codeinst->invoke) != NULL) &&
+            jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
+            jl_atomic_load_relaxed(&codeinst->max_world) >= max_world &&
+            jl_egal(codeinst->def, def) &&
+            jl_egal(codeinst->owner, owner) &&
             jl_egal(codeinst->rettype, rettype)) {
             return codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    codeinst = jl_new_codeinst(
-        mi, rettype, NULL, NULL,
-        0, min_world, max_world, 0, 0, jl_nothing, 0);
-    jl_mi_cache_insert(mi, codeinst);
-    return codeinst;
+    return (jl_code_instance_t*)jl_nothing;
 }
 
+
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
+        jl_method_instance_t *mi, jl_value_t *owner,
+        jl_value_t *rettype, jl_value_t *exctype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability
-        /*, jl_array_t *edges, int absolute_max*/)
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /*, int absolute_max*/)
 {
-    jl_task_t *ct = jl_current_task;
     assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(mi->def.value) || max_world != ~(size_t)0 || min_world <= 1 || edges == NULL || jl_svec_len(edges) != 0) && "missing edges");
+    jl_task_t *ct = jl_current_task;
     jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t),
             jl_code_instance_type);
-    codeinst->def = mi;
-    codeinst->min_world = min_world;
-    codeinst->max_world = max_world;
+    codeinst->def = (jl_value_t*)mi;
+    codeinst->owner = owner;
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_atomic_store_relaxed(&codeinst->min_world, min_world);
+    jl_atomic_store_relaxed(&codeinst->max_world, max_world);
     codeinst->rettype = rettype;
+    codeinst->exctype = exctype;
     jl_atomic_store_release(&codeinst->inferred, inferred);
-    //codeinst->edges = NULL;
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
+    jl_atomic_store_relaxed(&codeinst->debuginfo, (jl_value_t*)di == jl_nothing ? NULL : di);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
     jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
@@ -502,10 +642,78 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
-    codeinst->ipo_purity_bits = ipo_effects;
-    jl_atomic_store_relaxed(&codeinst->purity_bits, effects);
-    codeinst->argescapes = argescapes;
-    codeinst->relocatability = relocatability;
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
+    return codeinst;
+}
+
+JL_DLLEXPORT void jl_update_codeinst(
+        jl_code_instance_t *codeinst, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/)
+{
+    assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(codeinst->def->def.value) || max_world != ~(size_t)0 || min_world <= 1 || jl_svec_len(edges) != 0) && "missing edges");
+    codeinst->analysis_results = analysis_results;
+    jl_gc_wb(codeinst, analysis_results);
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    jl_atomic_store_relaxed(&codeinst->debuginfo, di);
+    jl_gc_wb(codeinst, di);
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_gc_wb(codeinst, edges);
+    if ((const_flags & 1) != 0) {
+        assert(codeinst->rettype_const);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_release(&codeinst->inferred, inferred);
+    jl_gc_wb(codeinst, inferred);
+    jl_atomic_store_relaxed(&codeinst->min_world, min_world); // XXX: these should be unchanged?
+    jl_atomic_store_relaxed(&codeinst->max_world, max_world); // since the edges shouldn't change after jl_fill_codeinst
+}
+
+JL_DLLEXPORT void jl_fill_codeinst(
+        jl_code_instance_t *codeinst,
+        jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/)
+{
+    assert(min_world <= max_world && "attempting to set invalid world constraints");
+    //assert((!jl_is_method(codeinst->def->def.value) || max_world != ~(size_t)0 || min_world <= 1 || jl_svec_len(edges) != 0) && "missing edges");
+    codeinst->rettype = rettype;
+    jl_gc_wb(codeinst, rettype);
+    codeinst->exctype = exctype;
+    jl_gc_wb(codeinst, exctype);
+    if ((const_flags & 2) != 0) {
+        codeinst->rettype_const = inferred_const;
+        jl_gc_wb(codeinst, inferred_const);
+    }
+    jl_atomic_store_relaxed(&codeinst->edges, edges);
+    jl_gc_wb(codeinst, edges);
+    if ((jl_value_t*)di != jl_nothing) {
+        jl_atomic_store_relaxed(&codeinst->debuginfo, di);
+        jl_gc_wb(codeinst, di);
+    }
+    if ((const_flags & 1) != 0) {
+        // TODO: may want to follow ordering restrictions here (see jitlayers.cpp)
+        assert(const_flags & 2);
+        jl_atomic_store_release(&codeinst->invoke, jl_fptr_const_return);
+    }
+    jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
+    assert(jl_atomic_load_relaxed(&codeinst->min_world) == 1);
+    assert(jl_atomic_load_relaxed(&codeinst->max_world) == 0);
+    jl_atomic_store_release(&codeinst->inferred, jl_nothing);
+    jl_atomic_store_release(&codeinst->min_world, min_world);
+    jl_atomic_store_release(&codeinst->max_world, max_world);
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner)
+{
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi, owner, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL);
+    jl_atomic_store_relaxed(&codeinst->min_world, 1); // make temporarily invalid before returning, so that jl_fill_codeinst is valid later
     return codeinst;
 }
 
@@ -527,6 +735,29 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
     return;
 }
 
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
+{
+    JL_GC_PUSH1(&ci);
+    if (jl_is_method(mi->def.method))
+        JL_LOCK(&mi->def.method->writelock);
+    jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
+    int ret = 0;
+    if (oldci == expected_ci) {
+        jl_atomic_store_relaxed(&ci->next, oldci);
+        if (oldci)
+            jl_gc_wb(ci, oldci);
+        jl_atomic_store_release(&mi->cache, ci);
+        jl_gc_wb(mi, ci);
+        ret = 1;
+    }
+    if (jl_is_method(mi->def.method))
+        JL_UNLOCK(&mi->def.method->writelock);
+    JL_GC_POP();
+    return ret;
+}
+
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
@@ -536,7 +767,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
     if (!jl_is_svec(specializations)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
         assert(jl_is_method_instance(mi));
-        if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+        if (jl_rettype_inferred_native(mi, world, world) == jl_nothing)
             jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         return 1;
     }
@@ -546,7 +777,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             assert(jl_is_method_instance(mi));
-            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+            if (jl_rettype_inferred_native(mi, world, world) == jl_nothing)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         }
     }
@@ -561,44 +792,42 @@ int foreach_mtable_in_module(
 {
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
         jl_sym_t *name = b->globalref->name;
-        if (jl_atomic_load_relaxed(&b->owner) == b && b->constp) {
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                jl_value_t *uw = jl_unwrap_unionall(v);
-                if (jl_is_datatype(uw)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
-                    if (tn->module == m && tn->name == name && tn->wrapper == v) {
-                        // this is the original/primary binding for the type (name/wrapper)
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
-                            assert(mt->module == m);
-                            if (!visit(mt, env))
-                                return 0;
-                        }
-                    }
-                }
-                else if (jl_is_module(v)) {
-                    jl_module_t *child = (jl_module_t*)v;
-                    if (child != m && child->parent == m && child->name == name) {
-                        // this is the original/primary binding for the submodule
-                        if (!foreach_mtable_in_module(child, visit, env))
-                            return 0;
-                    }
-                }
-                else if (jl_is_mtable(v)) {
-                    jl_methtable_t *mt = (jl_methtable_t*)v;
-                    if (mt->module == m && mt->name == name) {
-                        // this is probably an external method table here, so let's
-                        // assume so as there is no way to precisely distinguish them
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
+            jl_value_t *uw = jl_unwrap_unionall(v);
+            if (jl_is_datatype(uw)) {
+                jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
+                if (tn->module == m && tn->name == name && tn->wrapper == v) {
+                    // this is the original/primary binding for the type (name/wrapper)
+                    jl_methtable_t *mt = tn->mt;
+                    if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
+                        assert(mt->module == m);
                         if (!visit(mt, env))
                             return 0;
                     }
                 }
             }
+            else if (jl_is_module(v)) {
+                jl_module_t *child = (jl_module_t*)v;
+                if (child != m && child->parent == m && child->name == name) {
+                    // this is the original/primary binding for the submodule
+                    if (!foreach_mtable_in_module(child, visit, env))
+                        return 0;
+                }
+            }
+            else if (jl_is_mtable(v)) {
+                jl_methtable_t *mt = (jl_methtable_t*)v;
+                if (mt->module == m && mt->name == name) {
+                    // this is probably an external method table here, so let's
+                    // assume so as there is no way to precisely distinguish them
+                    if (!visit(mt, env))
+                        return 0;
+                }
+            }
         }
         table = jl_atomic_load_relaxed(&m->bindings);
     }
@@ -615,7 +844,7 @@ int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), voi
     if (mod_array) {
         JL_GC_PUSH1(&mod_array);
         int i;
-        for (i = 0; i < jl_array_len(mod_array); i++) {
+        for (i = 0; i < jl_array_nrows(mod_array); i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
             assert(jl_is_module(m));
             if (m->parent == m) // some toplevel modules (really just Base) aren't actually
@@ -640,7 +869,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env)
     // removes all method caches
     // this might not be entirely safe (GC or MT), thus we only do it very early in bootstrapping
     if (!mt->frozen) { // make sure not to reset builtin functions
-        jl_atomic_store_release(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+        jl_atomic_store_release(&mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
         jl_atomic_store_release(&mt->cache, jl_nothing);
     }
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), get_method_unspec_list, env);
@@ -664,10 +893,10 @@ JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
         JL_GC_PUSH1(&unspec);
         jl_foreach_reachable_mtable(reset_mt_caches, (void*)unspec);
         size_t i, l;
-        for (i = 0, l = jl_array_len(unspec); i < l; i++) {
+        for (i = 0, l = jl_array_nrows(unspec); i < l; i++) {
             jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
-            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
-                jl_type_infer(mi, world, 1);
+            if (jl_rettype_inferred_native(mi, world, world) == jl_nothing)
+                jl_type_infer(mi, world, SOURCE_MODE_NOT_REQUIRED);
         }
         JL_GC_POP();
     }
@@ -735,7 +964,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
                 vm = T_has_tv ? jl_type_unionall(v, T) : T;
                 if (N_has_tv)
                     N = NULL;
-                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs
+                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1, 0); // this cannot throw for these inputs
             }
             sp++;
             decl = ((jl_unionall_t*)decl)->body;
@@ -984,7 +1213,7 @@ static void jl_compilation_sig(
             // avoid Vararg{Type{Type{...}}}
             if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i)))
                 type_i = (jl_value_t*)jl_type_type;
-            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs
+            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1, 0); // this cannot throw for these inputs
         }
         else {
             type_i = inst_varargp_in_env(decl, sparams);
@@ -1213,12 +1442,12 @@ static int concretesig_equal(jl_value_t *tt, jl_value_t *simplesig) JL_NOTSAFEPO
     return 1;
 }
 
-static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
+static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
 {
     jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_eqtable_get(leafcache, (jl_value_t*)tt, NULL);
     if (entry) {
         do {
-            if (entry->min_world <= world && world <= entry->max_world) {
+            if (jl_atomic_load_relaxed(&entry->min_world) <= world && world <= jl_atomic_load_relaxed(&entry->max_world)) {
                 if (entry->simplesig == (void*)jl_nothing || concretesig_equal(tt, (jl_value_t*)entry->simplesig))
                     return entry;
             }
@@ -1227,8 +1456,7 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROP
     }
     return NULL;
 }
-
-static jl_method_instance_t *cache_method(
+jl_method_instance_t *cache_method(
         jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
@@ -1240,13 +1468,15 @@ static jl_method_instance_t *cache_method(
     int8_t offs = mt ? jl_cachearg_offset(mt) : 1;
     { // scope block
         if (mt) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
             if (entry)
                 return entry->func.linfo;
         }
-        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
+        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL};
+        jl_typemap_t *cacheentry = jl_atomic_load_relaxed(cache);
+        assert(cacheentry != NULL);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(cacheentry, &search, offs, /*subtype*/1);
         if (entry && entry->func.value)
             return entry->func.linfo;
     }
@@ -1266,7 +1496,7 @@ static jl_method_instance_t *cache_method(
     intptr_t max_varargs = get_max_varargs(definition, kwmt, mt, NULL);
     jl_compilation_sig(tt, sparams, definition, max_varargs, &newparams);
     if (newparams) {
-        temp2 = jl_apply_tuple_type(newparams);
+        temp2 = jl_apply_tuple_type(newparams, 1);
         // Now there may be a problem: the widened signature is more general
         // than just the given arguments, so it might conflict with another
         // definition that does not have cache instances yet. To fix this, we
@@ -1306,7 +1536,7 @@ static jl_method_instance_t *cache_method(
         }
         else {
             int unmatched_tvars = 0;
-            size_t i, l = jl_array_len(temp);
+            size_t i, l = jl_array_nrows(temp);
             for (i = 0; i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
                 if (matc->method == definition)
@@ -1339,7 +1569,7 @@ static jl_method_instance_t *cache_method(
             guardsigs = jl_alloc_svec(guards);
             temp3 = (jl_value_t*)guardsigs;
             guards = 0;
-            for (i = 0, l = jl_array_len(temp); i < l; i++) {
+            for (i = 0, l = jl_array_nrows(temp); i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
                 jl_method_t *other = matc->method;
                 if (other != definition) {
@@ -1389,14 +1619,14 @@ static jl_method_instance_t *cache_method(
         }
     }
     if (newparams) {
-        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams);
+        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
         temp2 = (jl_value_t*)simplett;
     }
 
     // short-circuit if an existing entry is already present
     // that satisfies our requirements
     if (cachett != tt) {
-        struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL, 0, ~(size_t)0};
+        struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL};
         jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && jl_egal((jl_value_t*)entry->simplesig, simplett ? (jl_value_t*)simplett : jl_nothing) &&
                 jl_egal((jl_value_t*)guardsigs, (jl_value_t*)entry->guardsigs)) {
@@ -1420,11 +1650,11 @@ static jl_method_instance_t *cache_method(
                 jl_cache_type_(tt);
             JL_UNLOCK(&typecache_lock); // Might GC
         }
-        jl_array_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *old = (jl_typemap_entry_t*)jl_eqtable_get(oldcache, (jl_value_t*)tt, jl_nothing);
         jl_atomic_store_relaxed(&newentry->next, old);
         jl_gc_wb(newentry, old);
-        jl_array_t *newcache = (jl_array_t*)jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
+        jl_genericmemory_t *newcache = jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
         if (newcache != oldcache) {
             jl_atomic_store_release(&mt->leafcache, newcache);
             jl_gc_wb(mt, newcache);
@@ -1440,34 +1670,45 @@ static jl_method_instance_t *cache_method(
 
 static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid);
 
-static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt, size_t world)
+static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt JL_MAYBE_UNROOTED, size_t world)
 {
-    // caller must hold the mt->writelock
+    jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+    jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
+    if (entry)
+        return entry->func.linfo;
+    JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
+    jl_method_match_t *matc = NULL;
+    JL_GC_PUSH2(&tt, &matc);
+    JL_LOCK(&mt->writelock);
     assert(tt->isdispatchtuple || tt->hasfreetypevars);
+    jl_method_instance_t *mi = NULL;
     if (tt->isdispatchtuple) {
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         if (entry)
-            return entry->func.linfo;
+            mi = entry->func.linfo;
     }
 
-    struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
-    if (entry)
-        return entry->func.linfo;
+    if (!mi) {
+        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL};
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
+        if (entry)
+            mi = entry->func.linfo;
+    }
 
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
-    jl_method_instance_t *nf = NULL;
-    if (matc) {
-        JL_GC_PUSH1(&matc);
-        jl_method_t *m = matc->method;
-        jl_svec_t *env = matc->sparams;
-        nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, tt, m, world, min_valid, max_valid, env);
-        JL_GC_POP();
+    if (!mi) {
+        size_t min_valid = 0;
+        size_t max_valid = ~(size_t)0;
+        matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
+        if (matc) {
+            jl_method_t *m = matc->method;
+            jl_svec_t *env = matc->sparams;
+            mi = cache_method(mt, &mt->cache, (jl_value_t*)mt, tt, m, world, min_valid, max_valid, env);
+        }
     }
-    return nf;
+    JL_UNLOCK(&mt->writelock);
+    JL_GC_POP();
+    return mi;
 }
 
 
@@ -1482,14 +1723,15 @@ static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_in
 {
     struct matches_env *closure = container_of(closure0, struct matches_env, match);
     assert(oldentry != closure->newentry && "entry already added");
-    assert(oldentry->min_world <= closure->newentry->min_world && "old method cannot be newer than new method");
-    assert(oldentry->max_world == ~(size_t)0 && "method cannot be added at the same time as method deleted");
+    assert(jl_atomic_load_relaxed(&oldentry->min_world) <= jl_atomic_load_relaxed(&closure->newentry->min_world) && "old method cannot be newer than new method");
+    assert(jl_atomic_load_relaxed(&oldentry->max_world) != jl_atomic_load_relaxed(&closure->newentry->min_world) && "method cannot be added at the same time as method deleted");
     // don't need to consider other similar methods if this oldentry will always fully intersect with them and dominates all of them
     typemap_slurp_search(oldentry, &closure->match);
     jl_method_t *oldmethod = oldentry->func.method;
     if (closure->match.issubty // e.g. jl_subtype(closure->newentry.sig, oldentry->sig)
         && jl_subtype(oldmethod->sig, (jl_value_t*)closure->newentry->sig)) { // e.g. jl_type_equal(closure->newentry->sig, oldentry->sig)
-        closure->replaced = oldentry;
+        if (closure->replaced == NULL || jl_atomic_load_relaxed(&closure->replaced->min_world) < jl_atomic_load_relaxed(&oldentry->min_world))
+            closure->replaced = oldentry; // must pick the newest insertion (both are still valid)
     }
     if (closure->shadowed == NULL)
         closure->shadowed = (jl_value_t*)jl_alloc_vec_any(0);
@@ -1533,14 +1775,6 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m)
     }
 }
 
-static int is_anonfn_typename(char *name)
-{
-    if (name[0] != '#' || name[1] == '#')
-        return 0;
-    char *other = strrchr(name, '#');
-    return other > &name[1] && other[1] > '0' && other[1] <= '9';
-}
-
 static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue)
 {
     // method overwritten
@@ -1568,8 +1802,10 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
         jl_printf(s, ".\n");
         jl_uv_flush(s);
     }
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "  ** incremental compilation may be fatally broken for this module **\n\n");
+    if (jl_generating_output() && jl_options.incremental) {
+        jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n");
+        jl_throw(jl_precompilable_error);
+    }
 }
 
 static void update_max_args(jl_methtable_t *mt, jl_value_t *type)
@@ -1602,105 +1838,62 @@ JL_DLLEXPORT jl_value_t *jl_debug_method_invalidation(int state)
     return jl_nothing;
 }
 
-// call external callbacks registered with this method_instance
-static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
-    jl_array_t *callbacks = mi->callbacks;
-    if (callbacks) {
-        // AbstractInterpreter allows for MethodInstances to be present in non-local caches
-        // inform those caches about the invalidation.
-        JL_TRY {
-            size_t i, l = jl_array_len(callbacks);
-            jl_value_t **args;
-            JL_GC_PUSHARGS(args, 3);
-            // these arguments are constant per call
-            args[1] = (jl_value_t*)mi;
-            args[2] = jl_box_uint32(max_world);
-
-            jl_task_t *ct = jl_current_task;
-            size_t last_age = ct->world_age;
-            ct->world_age = jl_get_world_counter();
-
-            jl_value_t **cbs = (jl_value_t**)jl_array_ptr_data(callbacks);
-            for (i = 0; i < l; i++) {
-                args[0] = cbs[i];
-                jl_apply(args, 3);
-            }
-            ct->world_age = last_age;
-            JL_GC_POP();
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "error in invalidation callback: ");
-            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // written to STDERR_FILENO
-        }
-    }
-}
-
-static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, int depth);
 
 // recursively invalidate cached methods that had an edge to a replaced method
-static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
+static void invalidate_code_instance(jl_code_instance_t *replaced, size_t max_world, int depth)
 {
     jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
     if (_jl_debug_method_invalidation) {
         jl_value_t *boxeddepth = NULL;
         JL_GC_PUSH1(&boxeddepth);
-        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced->def);
         boxeddepth = jl_box_int32(depth);
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, boxeddepth);
         JL_GC_POP();
     }
-    //jl_static_show(JL_STDERR, (jl_value_t*)replaced);
-    if (!jl_is_method(replaced->def.method))
+    //jl_static_show(JL_STDERR, (jl_value_t*)replaced->def);
+    jl_method_instance_t *replaced_mi = jl_get_ci_mi(replaced);
+    if (!jl_is_method(replaced_mi->def.method))
         return; // shouldn't happen, but better to be safe
-    JL_LOCK(&replaced->def.method->writelock);
-    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&replaced->cache);
-    while (codeinst) {
-        if (codeinst->max_world == ~(size_t)0) {
-            assert(codeinst->min_world - 1 <= max_world && "attempting to set illogical world constraints (probable race condition)");
-            codeinst->max_world = max_world;
-        }
-        assert(codeinst->max_world <= max_world);
-        JL_GC_PUSH1(&codeinst);
-        (*f)(codeinst);
-        JL_GC_POP();
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    JL_LOCK(&replaced_mi->def.method->writelock);
+    if (jl_atomic_load_relaxed(&replaced->max_world) == ~(size_t)0) {
+        assert(jl_atomic_load_relaxed(&replaced->min_world) - 1 <= max_world && "attempting to set illogical world constraints (probable race condition)");
+        jl_atomic_store_release(&replaced->max_world, max_world);
     }
+    assert(jl_atomic_load_relaxed(&replaced->max_world) <= max_world);
     // recurse to all backedges to update their valid range also
-    jl_array_t *backedges = replaced->backedges;
-    if (backedges) {
-        JL_GC_PUSH1(&backedges);
-        replaced->backedges = NULL;
-        size_t i = 0, l = jl_array_len(backedges);
-        jl_method_instance_t *replaced;
-        while (i < l) {
-            i = get_next_edge(backedges, i, NULL, &replaced);
-            invalidate_method_instance(f, replaced, max_world, depth + 1);
-        }
-        JL_GC_POP();
-    }
-    JL_UNLOCK(&replaced->def.method->writelock);
+    _invalidate_backedges(replaced_mi, max_world, depth + 1);
+    JL_UNLOCK(&replaced_mi->def.method->writelock);
 }
 
-// invalidate cached methods that overlap this definition
-static void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+JL_DLLEXPORT void jl_invalidate_code_instance(jl_code_instance_t *replaced, size_t max_world)
 {
-    JL_LOCK(&replaced_mi->def.method->writelock);
+    invalidate_code_instance(replaced, max_world, 1);
+}
+
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, int depth) {
     jl_array_t *backedges = replaced_mi->backedges;
-    //jl_static_show(JL_STDERR, (jl_value_t*)replaced_mi);
     if (backedges) {
         // invalidate callers (if any)
         replaced_mi->backedges = NULL;
         JL_GC_PUSH1(&backedges);
-        size_t i = 0, l = jl_array_len(backedges);
-        jl_method_instance_t *replaced;
+        size_t i = 0, l = jl_array_nrows(backedges);
+        jl_code_instance_t *replaced;
         while (i < l) {
             i = get_next_edge(backedges, i, NULL, &replaced);
-            invalidate_method_instance(f, replaced, max_world, 1);
+            JL_GC_PROMISE_ROOTED(replaced); // propagated by get_next_edge from backedges
+            invalidate_code_instance(replaced, max_world, depth);
         }
         JL_GC_POP();
     }
+}
+
+// invalidate cached methods that overlap this definition
+static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+{
+    JL_LOCK(&replaced_mi->def.method->writelock);
+    _invalidate_backedges(replaced_mi, max_world, 1);
     JL_UNLOCK(&replaced_mi->def.method->writelock);
     if (why && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced_mi);
@@ -1712,66 +1905,86 @@ static void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_insta
 }
 
 // add a backedge from callee to caller
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller)
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
-    JL_LOCK(&callee->def.method->writelock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        return;
     if (invokesig == jl_nothing)
         invokesig = NULL;      // julia uses `nothing` but C uses NULL (#undef)
-    int found = 0;
-    // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
-    if (!callee->backedges) {
-        // lazy-init the backedges array
-        callee->backedges = jl_alloc_vec_any(0);
-        jl_gc_wb(callee, callee->backedges);
-    }
-    else {
-        size_t i = 0, l = jl_array_len(callee->backedges);
-        for (i = 0; i < l; i++) {
-            // optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
-            jl_value_t *mi = jl_array_ptr_ref(callee->backedges, i);
-            if (mi != (jl_value_t*)caller)
-                continue;
-            jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
-            if (invokeTypes && jl_is_method_instance(invokeTypes))
-                invokeTypes = NULL;
-            if ((invokesig == NULL && invokeTypes == NULL) ||
-                (invokesig && invokeTypes && jl_types_equal(invokesig, invokeTypes))) {
-                found = 1;
-                break;
+    assert(jl_is_method_instance(callee));
+    assert(jl_is_code_instance(caller));
+    assert(invokesig == NULL || jl_is_type(invokesig));
+    JL_LOCK(&callee->def.method->writelock);
+    if (jl_atomic_load_relaxed(&allow_new_worlds)) {
+        int found = 0;
+        // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
+        if (!callee->backedges) {
+            // lazy-init the backedges array
+            callee->backedges = jl_alloc_vec_any(0);
+            jl_gc_wb(callee, callee->backedges);
+        }
+        else {
+            size_t i = 0, l = jl_array_nrows(callee->backedges);
+            for (i = 0; i < l; i++) {
+                // optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
+                jl_value_t *mi = jl_array_ptr_ref(callee->backedges, i);
+                if (mi != (jl_value_t*)caller)
+                    continue;
+                jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
+                if (invokeTypes && jl_is_method_instance(invokeTypes))
+                    invokeTypes = NULL;
+                if ((invokesig == NULL && invokeTypes == NULL) ||
+                    (invokesig && invokeTypes && jl_types_equal(invokesig, invokeTypes))) {
+                    found = 1;
+                    break;
+                }
             }
         }
+        if (!found)
+            push_edge(callee->backedges, invokesig, caller);
     }
-    if (!found)
-        push_edge(callee->backedges, invokesig, caller);
     JL_UNLOCK(&callee->def.method->writelock);
 }
 
 // add a backedge from a non-existent signature to caller
-JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller)
+JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_code_instance_t *caller)
 {
+    assert(jl_is_code_instance(caller));
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        return;
     JL_LOCK(&mt->writelock);
-    if (!mt->backedges) {
-        // lazy-init the backedges array
-        mt->backedges = jl_alloc_vec_any(2);
-        jl_gc_wb(mt, mt->backedges);
-        jl_array_ptr_set(mt->backedges, 0, typ);
-        jl_array_ptr_set(mt->backedges, 1, caller);
-    }
-    else {
-        // TODO: use jl_cache_type_(tt) like cache_method does, instead of a linear scan
-        size_t i, l = jl_array_len(mt->backedges);
-        for (i = 1; i < l; i += 2) {
-            if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
-                if (jl_array_ptr_ref(mt->backedges, i) == caller) {
-                    JL_UNLOCK(&mt->writelock);
-                    return;
+    if (jl_atomic_load_relaxed(&allow_new_worlds)) {
+        if (!mt->backedges) {
+            // lazy-init the backedges array
+            mt->backedges = jl_alloc_vec_any(2);
+            jl_gc_wb(mt, mt->backedges);
+            jl_array_ptr_set(mt->backedges, 0, typ);
+            jl_array_ptr_set(mt->backedges, 1, caller);
+        }
+        else {
+            // check if the edge is already present and avoid adding a duplicate
+            size_t i, l = jl_array_nrows(mt->backedges);
+            for (i = 1; i < l; i += 2) {
+                if (jl_array_ptr_ref(mt->backedges, i) == (jl_value_t*)caller) {
+                    if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
+                        JL_UNLOCK(&mt->writelock);
+                        return;
+                    }
                 }
-                // reuse the already cached instance of this type
-                typ = jl_array_ptr_ref(mt->backedges, i - 1);
             }
+            // reuse an already cached instance of this type, if possible
+            // TODO: use jl_cache_type_(tt) like cache_method does, instead of this linear scan?
+            for (i = 1; i < l; i += 2) {
+                if (jl_array_ptr_ref(mt->backedges, i) != (jl_value_t*)caller) {
+                    if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
+                        typ = jl_array_ptr_ref(mt->backedges, i - 1);
+                        break;
+                    }
+                }
+            }
+            jl_array_ptr_1d_push(mt->backedges, typ);
+            jl_array_ptr_1d_push(mt->backedges, (jl_value_t*)caller);
         }
-        jl_array_ptr_1d_push(mt->backedges, typ);
-        jl_array_ptr_1d_push(mt->backedges, caller);
     }
     JL_UNLOCK(&mt->writelock);
 }
@@ -1786,11 +1999,11 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
 {
     struct invalidate_mt_env *env = (struct invalidate_mt_env*)closure0;
     JL_GC_PROMISE_ROOTED(env->newentry);
-    if (oldentry->max_world == ~(size_t)0) {
+    if (jl_atomic_load_relaxed(&oldentry->max_world) == ~(size_t)0) {
         jl_method_instance_t *mi = oldentry->func.linfo;
         int intersects = 0;
         jl_method_instance_t **d = (jl_method_instance_t**)jl_array_ptr_data(env->shadowed);
-        size_t i, n = jl_array_len(env->shadowed);
+        size_t i, n = jl_array_nrows(env->shadowed);
         for (i = 0; i < n; i++) {
             if (mi == d[i]) {
                 intersects = 1;
@@ -1821,20 +2034,25 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                 JL_GC_POP();
             }
-            oldentry->max_world = env->max_world;
+            jl_atomic_store_relaxed(&oldentry->max_world, env->max_world);
             env->invalidated = 1;
         }
     }
     return 1;
 }
+
+struct disable_mt_env {
+    jl_method_t *replaced;
+    size_t max_world;
+};
 static int disable_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
 {
-    struct invalidate_mt_env *env = (struct invalidate_mt_env*)closure0;
-    if (oldentry->max_world < ~(size_t)0)
+    struct disable_mt_env *env = (struct disable_mt_env*)closure0;
+    if (jl_atomic_load_relaxed(&oldentry->max_world) < ~(size_t)0)
         return 1;
     jl_method_t *m = oldentry->func.linfo->def.method;
-    if (m == env->newentry->func.method)
-        oldentry->max_world = env->max_world;
+    if (m == env->replaced)
+        jl_atomic_store_relaxed(&oldentry->max_world, env->max_world);
     return 1;
 }
 
@@ -1847,44 +2065,38 @@ static int typemap_search(jl_typemap_entry_t *entry, void *closure)
     return 1;
 }
 
-static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT;
-
-#ifndef __clang_gcanalyzer__ /* in general, jl_typemap_visitor could be a safepoint, but not for typemap_search */
-static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT {
+static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) {
     jl_value_t *closure = (jl_value_t*)(method);
     if (jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), typemap_search, &closure))
         jl_error("method not in method table");
     return (jl_typemap_entry_t *)closure;
 }
-#endif
 
-static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, size_t max_world)
+static void jl_method_table_invalidate(jl_methtable_t *mt, jl_method_t *replaced, size_t max_world)
 {
-    jl_method_t *method = methodentry->func.method;
-    assert(!method->is_for_opaque_closure);
-    method->deleted_world = methodentry->max_world = max_world;
+    if (jl_options.incremental && jl_generating_output())
+        jl_error("Method deletion is not possible during Module precompile.");
+    assert(!replaced->is_for_opaque_closure);
+    assert(jl_atomic_load_relaxed(&jl_world_counter) == max_world);
     // drop this method from mt->cache
-    struct invalidate_mt_env mt_cache_env;
+    struct disable_mt_env mt_cache_env;
     mt_cache_env.max_world = max_world;
-    mt_cache_env.newentry = methodentry;
-    mt_cache_env.shadowed = NULL;
-    mt_cache_env.invalidated = 0;
+    mt_cache_env.replaced = replaced;
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), disable_mt_cache, (void*)&mt_cache_env);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    size_t i, l = jl_array_len(leafcache);
+    jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+    size_t i, l = leafcache->length;
     for (i = 1; i < l; i += 2) {
-        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_array_ptr_ref(leafcache, i);
+        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_genericmemory_ptr_ref(leafcache, i);
         if (oldentry) {
             while ((jl_value_t*)oldentry != jl_nothing) {
-                if (oldentry->max_world == ~(size_t)0)
-                    oldentry->max_world = mt_cache_env.max_world;
+                disable_mt_cache(oldentry, (void*)&mt_cache_env);
                 oldentry = jl_atomic_load_relaxed(&oldentry->next);
             }
         }
     }
     // Invalidate the backedges
     int invalidated = 0;
-    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&replaced->specializations);
     JL_GC_PUSH1(&specializations);
     if (!jl_is_svec(specializations))
         specializations = (jl_value_t*)jl_svec1(specializations);
@@ -1893,15 +2105,14 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
-            invalidate_external(mi, max_world);
-            invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_disable");
+            invalidate_backedges(mi, max_world, "jl_method_table_disable");
         }
     }
     JL_GC_POP();
     // XXX: this might have resolved an ambiguity, for which we have not tracked the edge here,
     // and thus now introduce a mistake into inference
     if (invalidated && _jl_debug_method_invalidation) {
-        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced);
         jl_value_t *loctag = jl_cstr_to_string("jl_method_table_disable");
         JL_GC_PUSH1(&loctag);
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
@@ -1909,17 +2120,66 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     }
 }
 
+static int erase_method_backedges(jl_typemap_entry_t *def, void *closure)
+{
+    jl_method_t *method = def->func.method;
+    JL_LOCK(&method->writelock);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    if (jl_is_svec(specializations)) {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
+            if ((jl_value_t*)mi != jl_nothing) {
+                mi->backedges = NULL;
+            }
+        }
+    }
+    else {
+        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+        mi->backedges = NULL;
+    }
+    JL_UNLOCK(&method->writelock);
+    return 1;
+}
+
+static int erase_all_backedges(jl_methtable_t *mt, void *env)
+{
+    // removes all method caches
+    // this might not be entirely safe (GC or MT), thus we only do it very early in bootstrapping
+    JL_LOCK(&mt->writelock);
+    mt->backedges = NULL;
+    JL_UNLOCK(&mt->writelock);
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), erase_method_backedges, env);
+    return 1;
+}
+
+JL_DLLEXPORT void jl_disable_new_worlds(void)
+{
+    if (jl_generating_output())
+        jl_error("Disabling Method changes is not possible when generating output.");
+    JL_LOCK(&world_counter_lock);
+    jl_atomic_store_relaxed(&allow_new_worlds, 0);
+    JL_UNLOCK(&world_counter_lock);
+    jl_foreach_reachable_mtable(erase_all_backedges, (void*)NULL);
+}
+
 JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *method)
 {
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "WARNING: method deletion during Module precompile may lead to undefined behavior"
-                             "\n  ** incremental compilation may be fatally broken for this module **\n\n");
     jl_typemap_entry_t *methodentry = do_typemap_search(mt, method);
+    JL_LOCK(&world_counter_lock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        jl_error("Method changes have been disabled via a call to disable_new_worlds.");
     JL_LOCK(&mt->writelock);
     // Narrow the world age on the method to make it uncallable
-    size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
-    jl_method_table_invalidate(mt, methodentry, world);
+    size_t world = jl_atomic_load_relaxed(&jl_world_counter);
+    assert(method == methodentry->func.method);
+    assert(jl_atomic_load_relaxed(&method->deleted_world) == ~(size_t)0);
+    jl_atomic_store_relaxed(&method->deleted_world, world);
+    jl_atomic_store_relaxed(&methodentry->max_world, world);
+    jl_method_table_invalidate(mt, method, world);
+    jl_atomic_store_release(&jl_world_counter, world + 1);
     JL_UNLOCK(&mt->writelock);
+    JL_UNLOCK(&world_counter_lock);
 }
 
 static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect JL_REQUIRE_ROOTED_SLOT, jl_value_t **isect2 JL_REQUIRE_ROOTED_SLOT)
@@ -1981,29 +2241,51 @@ static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_
     return 1;
 }
 
-JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
+jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
 {
     JL_TIMING(ADD_METHOD, ADD_METHOD);
     assert(jl_is_method(method));
     assert(jl_is_mtable(mt));
     jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK);
-    jl_value_t *type = method->sig;
+    jl_typemap_entry_t *newentry = NULL;
+    JL_GC_PUSH1(&newentry);
+    JL_LOCK(&mt->writelock);
+    // add our new entry
+    assert(jl_atomic_load_relaxed(&method->primary_world) == ~(size_t)0); // min-world
+    assert(jl_atomic_load_relaxed(&method->deleted_world) == 1); // max-world
+    newentry = jl_typemap_alloc((jl_tupletype_t*)method->sig, simpletype, jl_emptysvec, (jl_value_t*)method,
+            jl_atomic_load_relaxed(&method->primary_world), jl_atomic_load_relaxed(&method->deleted_world));
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
+    update_max_args(mt, method->sig);
+    JL_UNLOCK(&mt->writelock);
+    JL_GC_POP();
+    return newentry;
+}
+
+void jl_method_table_activate(jl_methtable_t *mt, jl_typemap_entry_t *newentry)
+{
+    JL_TIMING(ADD_METHOD, ADD_METHOD);
+    jl_method_t *method = newentry->func.method;
+    assert(jl_is_mtable(mt));
+    assert(jl_is_method(method));
+    jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK);
+    jl_value_t *type = (jl_value_t*)newentry->sig;
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
-    if (method->primary_world == 1)
-        method->primary_world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1;
-    size_t max_world = method->primary_world - 1;
+    JL_LOCK(&mt->writelock);
+    size_t world = jl_atomic_load_relaxed(&method->primary_world);
+    assert(world == jl_atomic_load_relaxed(&jl_world_counter) + 1); // min-world
+    assert(jl_atomic_load_relaxed(&method->deleted_world) == ~(size_t)0); // max-world
+    assert(jl_atomic_load_relaxed(&newentry->min_world) == ~(size_t)0);
+    assert(jl_atomic_load_relaxed(&newentry->max_world) == 1);
+    jl_atomic_store_relaxed(&newentry->min_world, world);
+    jl_atomic_store_relaxed(&method->primary_world, world);
+    size_t max_world = world - 1;
     jl_value_t *loctag = NULL;  // debug info for invalidation
     jl_value_t *isect = NULL;
     jl_value_t *isect2 = NULL;
     jl_value_t *isect3 = NULL;
-    jl_typemap_entry_t *newentry = NULL;
-    JL_GC_PUSH7(&oldvalue, &oldmi, &newentry, &loctag, &isect, &isect2, &isect3);
-    JL_LOCK(&mt->writelock);
-    // add our new entry
-    newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
-            (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
+    JL_GC_PUSH6(&oldvalue, &oldmi, &loctag, &isect, &isect2, &isect3);
     jl_typemap_entry_t *replaced = NULL;
     // then check what entries we replaced
     oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, jl_cachearg_offset(mt), max_world);
@@ -2012,7 +2294,8 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         oldvalue = (jl_value_t*)replaced;
         invalidated = 1;
         method_overwrite(newentry, replaced->func.method);
-        jl_method_table_invalidate(mt, replaced, max_world);
+        // this is an optimized version of below, given we know the type-intersection is exact
+        jl_method_table_invalidate(mt, replaced->func.method, max_world);
     }
     else {
         jl_method_t *const *d;
@@ -2024,14 +2307,15 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         else {
             assert(jl_is_array(oldvalue));
             d = (jl_method_t**)jl_array_ptr_data(oldvalue);
-            n = jl_array_len(oldvalue);
+            n = jl_array_nrows(oldvalue);
         }
         if (mt->backedges) {
             jl_value_t **backedges = jl_array_ptr_data(mt->backedges);
-            size_t i, na = jl_array_len(mt->backedges);
+            size_t i, na = jl_array_nrows(mt->backedges);
             size_t ins = 0;
             for (i = 1; i < na; i += 2) {
                 jl_value_t *backedgetyp = backedges[i - 1];
+                JL_GC_PROMISE_ROOTED(backedgetyp);
                 int missing = 0;
                 if (jl_type_intersection2(backedgetyp, (jl_value_t*)type, &isect, &isect2)) {
                     // See if the intersection was actually already fully
@@ -2060,9 +2344,9 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                     }
                 }
                 if (missing) {
-                    jl_method_instance_t *backedge = (jl_method_instance_t*)backedges[i];
-                    invalidate_external(backedge, max_world);
-                    invalidate_method_instance(&do_nothing_with_codeinst, backedge, max_world, 0);
+                    jl_code_instance_t *backedge = (jl_code_instance_t*)backedges[i];
+                    JL_GC_PROMISE_ROOTED(backedge);
+                    invalidate_code_instance(backedge, max_world, 0);
                     invalidated = 1;
                     if (_jl_debug_method_invalidation)
                         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)backedgetyp);
@@ -2119,25 +2403,29 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                         // over the intersection (not ambiguous) and the new method will be selected now (morespec_is)
                         int replaced_dispatch = is_replacing(ambig, type, m, d, n, isect, isect2, morespec);
                         // found that this specialization dispatch got replaced by m
-                        // call invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
+                        // call invalidate_backedges(mi, max_world, "jl_method_table_insert");
                         // but ignore invoke-type edges
                         jl_array_t *backedges = mi->backedges;
                         if (backedges) {
-                            size_t ib = 0, insb = 0, nb = jl_array_len(backedges);
+                            size_t ib = 0, insb = 0, nb = jl_array_nrows(backedges);
                             jl_value_t *invokeTypes;
-                            jl_method_instance_t *caller;
+                            jl_code_instance_t *caller;
                             while (ib < nb) {
                                 ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
+                                JL_GC_PROMISE_ROOTED(caller); // propagated by get_next_edge from backedges
                                 int replaced_edge;
                                 if (invokeTypes) {
                                     // n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
-                                    replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
+                                    if (jl_egal(invokeTypes, jl_get_ci_mi(caller)->def.method->sig))
+                                        replaced_edge = 0; // if invokeTypes == m.sig, then the only way to change this invoke is to replace the method itself
+                                    else
+                                        replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
                                 }
                                 else {
                                     replaced_edge = replaced_dispatch;
                                 }
                                 if (replaced_edge) {
-                                    invalidate_method_instance(&do_nothing_with_codeinst, caller, max_world, 1);
+                                    invalidate_code_instance(caller, max_world, 1);
                                     invalidated = 1;
                                 }
                                 else {
@@ -2147,7 +2435,6 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                             jl_array_del_end(backedges, nb - insb);
                         }
                         jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
-                        invalidate_external(mi, max_world);
                         if (_jl_debug_method_invalidation && invalidated) {
                             jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
                             loctag = jl_cstr_to_string("jl_method_table_insert");
@@ -2156,7 +2443,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                     }
                 }
             }
-            if (jl_array_len(oldmi)) {
+            if (jl_array_nrows(oldmi)) {
                 // search mt->cache and leafcache and drop anything that might overlap with the new method
                 // this is very cheap, so we don't mind being fairly conservative at over-approximating this
                 struct invalidate_mt_env mt_cache_env;
@@ -2166,10 +2453,10 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 mt_cache_env.invalidated = 0;
 
                 jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), invalidate_mt_cache, (void*)&mt_cache_env);
-                jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-                size_t i, l = jl_array_len(leafcache);
+                jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+                size_t i, l = leafcache->length;
                 for (i = 1; i < l; i += 2) {
-                    jl_value_t *entry = jl_array_ptr_ref(leafcache, i);
+                    jl_value_t *entry = jl_genericmemory_ptr_ref(leafcache, i);
                     if (entry) {
                         while (entry != jl_nothing) {
                             invalidate_mt_cache((jl_typemap_entry_t*)entry, (void*)&mt_cache_env);
@@ -2185,12 +2472,28 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         loctag = jl_cstr_to_string("jl_method_table_insert");
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
     }
-    update_max_args(mt, type);
+    jl_atomic_store_relaxed(&newentry->max_world, jl_atomic_load_relaxed(&method->deleted_world));
     JL_UNLOCK(&mt->writelock);
     JL_GC_POP();
 }
 
-static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, size_t world)
+JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
+{
+    jl_typemap_entry_t *newentry = jl_method_table_add(mt, method, simpletype);
+    JL_GC_PUSH1(&newentry);
+    JL_LOCK(&world_counter_lock);
+    if (!jl_atomic_load_relaxed(&allow_new_worlds))
+        jl_error("Method changes have been disabled via a call to disable_new_worlds.");
+    size_t world = jl_atomic_load_relaxed(&jl_world_counter) + 1;
+    jl_atomic_store_relaxed(&method->primary_world, world);
+    jl_atomic_store_relaxed(&method->deleted_world, ~(size_t)0);
+    jl_method_table_activate(mt, newentry);
+    jl_atomic_store_release(&jl_world_counter, world);
+    JL_UNLOCK(&world_counter_lock);
+    JL_GC_POP();
+}
+
+static void JL_NORETURN jl_method_error_bare(jl_value_t *f, jl_value_t *args, size_t world)
 {
     if (jl_methoderror_type) {
         jl_value_t *e = jl_new_struct_uninit(jl_methoderror_type);
@@ -2215,7 +2518,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
     // not reached
 }
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world)
+void JL_NORETURN jl_method_error(jl_value_t *f, jl_value_t **args, size_t na, size_t world)
 {
     jl_value_t *argtup = jl_f_tuple(NULL, args, na - 1);
     JL_GC_PUSH1(&argtup);
@@ -2233,7 +2536,22 @@ static jl_tupletype_t *lookup_arg_type_tuple(jl_value_t *arg1 JL_PROPAGATES_ROOT
     return jl_lookup_arg_tuple_type(arg1, args, nargs, 1);
 }
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
+JL_DLLEXPORT jl_value_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt)
+{
+    jl_methtable_t *mt = NULL;
+    if (_mt == jl_nothing)
+        mt = jl_gf_ft_mtable(jl_tparam0(tt));
+    else {
+        assert(jl_isa(_mt, (jl_value_t*)jl_methtable_type));
+        mt = (jl_methtable_t*) _mt;
+    }
+    jl_method_instance_t* mi = jl_mt_assoc_by_type(mt, tt, world);
+    if (!mi)
+        return jl_nothing;
+    return (jl_value_t*) mi;
+}
+
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
 {
     assert(nargs > 0 && "expected caller to handle this case");
     jl_methtable_t *mt = jl_gf_mtable(args[0]);
@@ -2242,16 +2560,7 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
     if (entry)
         return entry->func.linfo;
     jl_tupletype_t *tt = arg_type_tuple(args[0], &args[1], nargs);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
-    if (entry)
-        return entry->func.linfo;
-    JL_GC_PUSH1(&tt);
-    JL_LOCK(&mt->writelock);
-    jl_method_instance_t *sf = jl_mt_assoc_by_type(mt, tt, world);
-    JL_UNLOCK(&mt->writelock);
-    JL_GC_POP();
-    return sf;
+    return jl_mt_assoc_by_type(mt, tt, world);
 }
 
 // return a Vector{Any} of svecs, each describing a method match:
@@ -2260,7 +2569,7 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 // spvals is any matched static parameter values, m is the Method,
 // full is a boolean indicating if that method fully covers the input
 //
-// lim is the max # of methods to return. if there are more, returns jl_false.
+// lim is the max # of methods to return. if there are more, returns jl_nothing.
 // Negative values stand for no limit.
 // Unless lim == -1, remove matches that are unambiguously covered by earlier ones
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
@@ -2280,17 +2589,7 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
     return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT)
-{
-    jl_method_t *def = method->def.method;
-    jl_method_instance_t *mi = jl_get_unspecialized(def);
-    if (mi == NULL) {
-        return method;
-    }
-    return mi;
-}
-
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
 {
     // one unspecialized version of a function can be shared among all cached specializations
     if (!jl_is_method(def) || def->source == NULL) {
@@ -2311,36 +2610,87 @@ jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
     return unspec;
 }
 
+STATIC_INLINE jl_value_t *_jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst) {
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
+            max_world <= jl_atomic_load_relaxed(&codeinst->max_world) &&
+            jl_egal(codeinst->owner, owner)) {
+
+            jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
+            if (code)
+                return (jl_value_t*)codeinst;
+        }
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    return (jl_value_t*)jl_nothing;
+}
+
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)_jl_rettype_inferred(owner, mi, min_world, max_world);
+}
+
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred_native(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)_jl_rettype_inferred(jl_nothing, mi, min_world, max_world);
+}
+
+JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred_native;
 
 jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world)
 {
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
-    while (codeinst) {
-        if (codeinst->min_world <= world && world <= codeinst->max_world) {
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != jl_nothing)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
             if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL)
                 return codeinst;
         }
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     return NULL;
 }
 
 jl_mutex_t precomp_statement_out_lock;
 
-static void record_precompile_statement(jl_method_instance_t *mi)
+_Atomic(uint8_t) jl_force_trace_compile_timing_enabled = 0;
+
+/**
+ * @brief Enable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, 1);
+}
+/**
+ * @brief Disable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, -1);
+}
+
+static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time, int is_recompile)
 {
     static ios_t f_precompile;
     static JL_STREAM* s_precompile = NULL;
     jl_method_t *def = mi->def.method;
-    if (jl_options.trace_compile == NULL)
+    uint8_t force_trace_compile = jl_atomic_load_relaxed(&jl_force_trace_compile_timing_enabled);
+    if (force_trace_compile == 0 && jl_options.trace_compile == NULL)
         return;
     if (!jl_is_method(def))
         return;
+    if (def->is_for_opaque_closure)
+        return; // OpaqueClosure methods cannot be looked up by their types, so are incompatible with `precompile(...)`
 
     JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
         const char *t = jl_options.trace_compile;
-        if (!strncmp(t, "stderr", 6)) {
+        if (force_trace_compile || !strncmp(t, "stderr", 6)) {
             s_precompile = JL_STDERR;
         }
         else {
@@ -2350,17 +2700,139 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         }
     }
     if (!jl_has_free_typevars(mi->specTypes)) {
+        if (is_recompile && s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF)
+            jl_printf(s_precompile, "\e[33m");
+        if (force_trace_compile || jl_options.trace_compile_timing)
+            jl_printf(s_precompile, "#= %6.1f ms =# ", compilation_time / 1e6);
         jl_printf(s_precompile, "precompile(");
         jl_static_show(s_precompile, mi->specTypes);
-        jl_printf(s_precompile, ")\n");
+        jl_printf(s_precompile, ")");
+        if (is_recompile) {
+            jl_printf(s_precompile, " # recompile");
+            if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) {
+                jl_printf(s_precompile, "\e[0m");
+            }
+        }
+        jl_printf(s_precompile, "\n");
         if (s_precompile != JL_STDERR)
             ios_flush(&f_precompile);
     }
     JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+jl_mutex_t dispatch_statement_out_lock;
+
+_Atomic(uint8_t) jl_force_trace_dispatch_enabled = 0;
+
+/**
+ * @brief Enable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, 1);
+}
+/**
+ * @brief Disable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, -1);
+}
+
+static void record_dispatch_statement(jl_method_instance_t *mi)
+{
+    static ios_t f_dispatch;
+    static JL_STREAM* s_dispatch = NULL;
+    jl_method_t *def = mi->def.method;
+    if (!jl_is_method(def))
+        return;
+
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    JL_LOCK(&dispatch_statement_out_lock);
+    if (s_dispatch == NULL) {
+        const char *t = jl_options.trace_dispatch;
+        if (force_trace_dispatch || !strncmp(t, "stderr", 6)) {
+            s_dispatch = JL_STDERR;
+        }
+        else {
+            if (ios_file(&f_dispatch, t, 1, 1, 1, 1) == NULL)
+                jl_errorf("cannot open dispatch statement file \"%s\" for writing", t);
+            s_dispatch = (JL_STREAM*) &f_dispatch;
+        }
+    }
+    if (!jl_has_free_typevars(mi->specTypes)) {
+        jl_printf(s_dispatch, "precompile(");
+        jl_static_show(s_dispatch, mi->specTypes);
+        jl_printf(s_dispatch, ")\n");
+        if (s_dispatch != JL_STDERR)
+            ios_flush(&f_dispatch);
+    }
+    JL_UNLOCK(&dispatch_statement_out_lock);
+}
+
+// If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is
+// useful for the JIT itself, since it just doesn't cause redundant work or missed updates,
+// but merely causes it to look into the current JIT worklist.
+void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile)
+{
+    uint8_t flags = jl_atomic_load_acquire(&ci->specsigflags); // happens-before for subsequent read of fptr
+    while (1) {
+        jl_callptr_t initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
+        if (initial_invoke == jl_fptr_wait_for_compiled_addr) {
+            if (!waitcompile) {
+                *invoke = NULL;
+                *specptr = NULL;
+                *specsigflags = 0b00;
+                return;
+            }
+            jl_compile_codeinst(ci);
+            initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
+        }
+        void *fptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+        // TODO: if fptr is NULL, it may mean we read this too fast, and should have spun and waited for jl_compile_codeinst to finish
+        if (initial_invoke == NULL || fptr == NULL) {
+            *invoke = initial_invoke;
+            *specptr = NULL;
+            *specsigflags = 0b00;
+            return;
+        }
+        while (!(flags & 0b10)) {
+            jl_cpu_pause();
+            flags = jl_atomic_load_acquire(&ci->specsigflags);
+        }
+        jl_callptr_t final_invoke = jl_atomic_load_relaxed(&ci->invoke);
+        if (final_invoke == initial_invoke) {
+            *invoke = final_invoke;
+            *specptr = fptr;
+            *specsigflags = flags;
+            return;
+        }
+    }
+}
+
 jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
 
+JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src)
+{
+    assert(jl_is_code_info(src));
+    jl_emit_codeinst_to_jit(codeinst, src);
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+    if (jl_generating_output() && jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
+        jl_value_t *compressed = jl_compress_ir(mi->def.method, src);
+        // These should already be compatible (and should be an assert), but make sure of it anyways
+        if (jl_is_svec(src->edges)) {
+            jl_atomic_store_release(&codeinst->edges, (jl_svec_t*)src->edges);
+            jl_gc_wb(codeinst, src->edges);
+        }
+        jl_atomic_store_release(&codeinst->debuginfo, src->debuginfo);
+        jl_gc_wb(codeinst, src->debuginfo);
+        jl_atomic_store_release(&codeinst->inferred, compressed);
+        jl_gc_wb(codeinst, compressed);
+    }
+}
+
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
 {
     // quick check if we already have a compiled result
@@ -2375,18 +2847,18 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_code_instance_t *codeinst2 = jl_compile_method_internal(mi2, world);
         jl_code_instance_t *codeinst = jl_get_method_inferred(
                 mi, codeinst2->rettype,
-                codeinst2->min_world, codeinst2->max_world);
+                jl_atomic_load_relaxed(&codeinst2->min_world),
+                jl_atomic_load_relaxed(&codeinst2->max_world),
+                jl_atomic_load_relaxed(&codeinst2->debuginfo),
+                jl_atomic_load_relaxed(&codeinst2->edges));
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
             codeinst->rettype_const = codeinst2->rettype_const;
-            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
-            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            jl_gc_wb(codeinst, codeinst->rettype_const);
+            uint8_t specsigflags;
+            jl_callptr_t invoke;
+            void *fptr;
+            jl_read_codeinst_invoke(codeinst2, &specsigflags, &invoke, &fptr, 1);
             if (fptr != NULL) {
-                while (!(specsigflags & 0b10)) {
-                    jl_cpu_pause();
-                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
-                }
-                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
                 void *prev_fptr = NULL;
                 // see jitlayers.cpp for the ordering restrictions here
                 if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
@@ -2394,14 +2866,16 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                     jl_atomic_store_release(&codeinst->invoke, invoke);
                     // unspec is probably not specsig, but might be using specptr
                     jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
-                } else {
+                }
+                else {
                     // someone else already compiled it
                     while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
                         jl_cpu_pause();
                     }
                     // codeinst is now set up fully, safe to return
                 }
-            } else {
+            }
+            else {
                 jl_callptr_t prev = NULL;
                 jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
             }
@@ -2423,30 +2897,27 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     // if compilation is disabled or source is unavailable, try calling unspecialized version
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
         compile_option == JL_OPTIONS_COMPILE_MIN ||
-        def->source == jl_nothing) {
+        (jl_is_method(def) && def->source == jl_nothing)) {
         // copy fptr from the template method definition
         if (jl_is_method(def)) {
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
             if (unspecmi) {
                 jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
-                jl_callptr_t unspec_invoke = NULL;
-                if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
-                    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                        (jl_value_t*)jl_any_type, NULL, NULL,
-                        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
-                    if (unspec_fptr) {
-                        // wait until invoke and specsigflags are properly set
-                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
-                            jl_cpu_pause();
-                        }
-                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
-                    }
-                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
+                if (unspec && jl_atomic_load_acquire(&unspec->invoke) != NULL) {
+                    uint8_t specsigflags;
+                    jl_callptr_t invoke;
+                    void *fptr;
+                    jl_read_codeinst_invoke(unspec, &specsigflags, &invoke, &fptr, 1);
+                    jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+                        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+                        0, 1, ~(size_t)0, 0, jl_nothing, NULL, NULL);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
+                    jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                    jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+                    // unspec is probably not specsig, but might be using specptr
+                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
                     jl_mi_cache_insert(mi, codeinst);
-                    record_precompile_statement(mi);
+                    record_precompile_statement(mi, 0, 0);
                     return codeinst;
                 }
             }
@@ -2458,34 +2929,81 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         compile_option == JL_OPTIONS_COMPILE_MIN) {
         jl_code_info_t *src = jl_code_for_interpreter(mi, world);
         if (!jl_code_requires_compiler(src, 0)) {
-            jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                (jl_value_t*)jl_any_type, NULL, NULL,
-                0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+            jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
+                (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+                0, 1, ~(size_t)0, 0, jl_nothing, NULL, NULL);
             jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
-            record_precompile_statement(mi);
+            record_precompile_statement(mi, 0, 0);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
-            jl_printf(JL_STDERR, "code missing for ");
+            jl_printf(JL_STDERR, "No compiled code available for ");
             jl_static_show(JL_STDERR, (jl_value_t*)mi);
             jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
         }
     }
 
-    codeinst = jl_generate_fptr(mi, world);
+    // Ok, compilation is enabled. We'll need to try to compile something (probably).
+
+    // Everything from here on is considered (user facing) compile time
+    uint64_t start = jl_typeinf_timing_begin();
+
+    // Is a recompile if there is cached code, and it was compiled (not only inferred) before
+    int is_recompile = 0;
+    jl_code_instance_t *codeinst_old = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst_old != NULL) {
+        if (jl_atomic_load_relaxed(&codeinst_old->invoke) != NULL) {
+            is_recompile = 1;
+            break;
+        }
+        codeinst_old = jl_atomic_load_relaxed(&codeinst_old->next);
+    }
+
+    // jl_type_infer will internally do a cache lookup and jl_engine_reserve call
+    // to synchronize this across threads
     if (!codeinst) {
-        jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
-        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
+        // Don't bother inferring toplevel thunks or macros - the performance cost of inference is likely
+        // to significantly exceed the actual runtime.
+        int should_skip_inference = !jl_is_method(mi->def.method) || jl_symbol_name(mi->def.method->name)[0] == '@';
+
+        if (!should_skip_inference) {
+            codeinst = jl_type_infer(mi, world, SOURCE_MODE_ABI);
+        }
+    }
+
+    if (codeinst) {
+        if (jl_is_compiled_codeinst(codeinst)) {
+            jl_typeinf_timing_end(start, is_recompile);
+            // Already compiled - e.g. constabi, or compiled by a different thread while we were waiting.
+            return codeinst;
+        }
+
+        JL_GC_PUSH1(&codeinst);
+        double compile_time = jl_hrtime();
+        int did_compile = jl_compile_codeinst(codeinst);
+        compile_time = jl_hrtime() - compile_time;
+
+        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
+            // Something went wrong. Bail to the fallback path.
+            codeinst = NULL;
+        }
+        else if (did_compile && codeinst->owner == jl_nothing) {
+            record_precompile_statement(mi, compile_time, is_recompile);
+        }
+        JL_GC_POP();
+    }
+    if (!codeinst) {
+        jl_method_instance_t *unspec = jl_get_unspecialized(def);
+        if (unspec == NULL)
+            unspec = mi;
+        jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0, NULL, NULL);
         // ask codegen to make the fptr for unspec
         jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         if (ucache_invoke == NULL) {
-            if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
-                                              jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
-                jl_printf(JL_STDERR, "source not available for ");
-                jl_static_show(JL_STDERR, (jl_value_t*)mi);
-                jl_printf(JL_STDERR, "\n");
-                jl_error("source missing for method that needs to be compiled");
+            if ((!jl_is_method(def) || def->source == jl_nothing) &&
+                !jl_cached_uninferred(jl_atomic_load_relaxed(&jl_get_ci_mi(ucache)->cache), world)) {
+                jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
             }
             jl_generate_fptr_for_unspecialized(ucache);
             ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
@@ -2494,29 +3012,25 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         if (ucache_invoke != jl_fptr_sparam &&
             ucache_invoke != jl_fptr_interpret_call) {
             // only these care about the exact specTypes, otherwise we can use it directly
+            jl_typeinf_timing_end(start, is_recompile);
             return ucache;
         }
-        codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
-            0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
-        if (unspec_fptr) {
-            // wait until invoke and specsigflags are properly set
-            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
-                jl_cpu_pause();
-            }
-            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
-        }
-        // unspec is always not specsig, but might use specptr
-        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
-        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
+        uint8_t specsigflags;
+        jl_callptr_t invoke;
+        void *fptr;
+        jl_read_codeinst_invoke(ucache, &specsigflags, &invoke, &fptr, 1);
+        codeinst = jl_new_codeinst(mi, jl_nothing,
+            (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
+            0, 1, ~(size_t)0, 0, jl_nothing, NULL, NULL);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+        jl_atomic_store_relaxed(&codeinst->invoke, invoke);
+        jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
         jl_mi_cache_insert(mi, codeinst);
     }
-    else {
-        record_precompile_statement(mi);
-    }
     jl_atomic_store_relaxed(&codeinst->precompile, 1);
+    jl_typeinf_timing_end(start, is_recompile);
     return codeinst;
 }
 
@@ -2534,13 +3048,43 @@ jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_co
 
 jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
-    jl_svec_t *sparams = m->def->sparam_vals;
+    jl_svec_t *sparams = jl_get_ci_mi(m)->sparam_vals;
     assert(sparams != jl_emptysvec);
     jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
     assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
     return invoke(f, args, nargs, sparams);
 }
 
+jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+{
+    jl_callptr_t invoke = jl_atomic_load_acquire(&m->invoke);
+    if (invoke == &jl_fptr_wait_for_compiled) {
+        int64_t last_alloc = jl_options.malloc_log ? jl_gc_diff_total_bytes() : 0;
+        int last_errno = errno;
+#ifdef _OS_WINDOWS_
+        DWORD last_error = GetLastError();
+#endif
+        jl_compile_codeinst(m);
+#ifdef _OS_WINDOWS_
+        SetLastError(last_error);
+#endif
+        errno = last_errno;
+        if (jl_options.malloc_log)
+            jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
+        invoke = jl_atomic_load_acquire(&m->invoke);
+    }
+    return invoke(f, args, nargs, m);
+}
+
+// test whether codeinst->invoke is usable already without further compilation needed
+JL_DLLEXPORT int jl_is_compiled_codeinst(jl_code_instance_t *codeinst)
+{
+    jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    if (invoke == NULL || invoke == &jl_fptr_wait_for_compiled)
+        return 0;
+    return 1;
+}
+
 JL_DLLEXPORT const jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
 
 JL_DLLEXPORT const jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
@@ -2549,6 +3093,8 @@ JL_DLLEXPORT const jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
 
 JL_DLLEXPORT const jl_callptr_t jl_f_opaque_closure_call_addr = (jl_callptr_t)&jl_f_opaque_closure_call;
 
+JL_DLLEXPORT const jl_callptr_t jl_fptr_wait_for_compiled_addr = &jl_fptr_wait_for_compiled;
+
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
 {
@@ -2577,7 +3123,7 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t
     jl_compilation_sig(ti, env, m, max_varargs, &newparams);
     int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple;
     if (newparams) {
-        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams);
+        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
         if (!is_compileable) {
             // compute new env, if used below
             jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &newparams);
@@ -2615,7 +3161,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_
 }
 
 // return a MethodInstance for a compileable method_match
-jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
 {
     jl_method_t *m = match->method;
     jl_svec_t *env = match->sparams;
@@ -2623,6 +3169,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor
     jl_method_instance_t *mi = NULL;
     if (jl_is_datatype(ti)) {
         jl_methtable_t *mt = jl_method_get_table(m);
+        assert(mt != NULL);
         if ((jl_value_t*)mt != jl_nothing) {
             // get the specialization, possibly also caching it
             if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
@@ -2652,7 +3199,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor
 }
 
 // compile-time method lookup
-jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache)
+jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, int mt_cache)
 {
     if (jl_has_free_typevars((jl_value_t*)types))
         return NULL; // don't poison the cache due to a malformed query
@@ -2664,11 +3211,7 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES
     size_t max_valid2 = ~(size_t)0;
     int ambig = 0;
     jl_value_t *matches = jl_matching_methods(types, jl_nothing, 1, 1, world, &min_valid2, &max_valid2, &ambig);
-    if (*min_valid < min_valid2)
-        *min_valid = min_valid2;
-    if (*max_valid > max_valid2)
-        *max_valid = max_valid2;
-    if (matches == jl_nothing || jl_array_len(matches) != 1 || ambig)
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1 || ambig)
         return NULL;
     JL_GC_PUSH1(&matches);
     jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
@@ -2694,7 +3237,7 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
         *min_valid = min_valid2;
     if (*max_valid > max_valid2)
         *max_valid = max_valid2;
-    size_t i, n = jl_array_len(matches);
+    size_t i, n = jl_array_nrows(matches);
     if (n == 0)
         return NULL;
     JL_GC_PUSH1(&matches);
@@ -2722,7 +3265,7 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
             exclude = 0;
             for (size_t j = n-1; j > i; j--) {  // more general methods maybe more likely to be at end
                 jl_method_match_t *match2 = (jl_method_match_t*)jl_array_ptr_ref(matches, j);
-                if (jl_type_morespecific(match1->method->sig, match2->method->sig)) {
+                if (jl_method_morespecific(match1->method, match2->method)) {
                     exclude = 1;
                     break;
                 }
@@ -2746,10 +3289,10 @@ static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *
 
 static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
 {
-    jl_value_t *codeinst = jl_rettype_inferred(mi, world, world);
+    jl_value_t *codeinst = jl_rettype_inferred_native(mi, world, world);
     if (codeinst == jl_nothing) {
-        (void)jl_type_infer(mi, world, 1);
-        codeinst = jl_rettype_inferred(mi, world, world);
+        (void)jl_type_infer(mi, world, SOURCE_MODE_NOT_REQUIRED);
+        codeinst = jl_rettype_inferred_native(mi, world, world);
     }
     if (codeinst != jl_nothing) {
         if (jl_atomic_load_relaxed(&((jl_code_instance_t*)codeinst)->invoke) == jl_fptr_const_return)
@@ -2763,7 +3306,7 @@ static void jl_compile_now(jl_method_instance_t *mi)
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     _generate_from_hint(mi, world);
-    if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
+    if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
         // if it's part of the compiler, also attempt to compile for the compiler world too
         _generate_from_hint(mi, tworld);
     }
@@ -2772,7 +3315,8 @@ static void jl_compile_now(jl_method_instance_t *mi)
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world)
 {
     size_t tworld = jl_typeinf_world;
-    jl_atomic_store_relaxed(&mi->precompiled, 1);
+    uint8_t miflags = jl_atomic_load_relaxed(&mi->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+    jl_atomic_store_relaxed(&mi->flags, miflags);
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -2787,12 +3331,13 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
             jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
-            jl_atomic_store_relaxed(&mi2->precompiled, 1);
-            if (jl_rettype_inferred(mi2, world, world) == jl_nothing)
-                (void)jl_type_infer(mi2, world, 1);
-            if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
-                if (jl_rettype_inferred(mi2, tworld, tworld) == jl_nothing)
-                    (void)jl_type_infer(mi2, tworld, 1);
+            miflags = jl_atomic_load_relaxed(&mi2->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+            jl_atomic_store_relaxed(&mi2->flags, miflags);
+            if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing)
+                (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED);
+            if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
+                if (jl_rettype_inferred_native(mi2, tworld, tworld) == jl_nothing)
+                    (void)jl_type_infer(mi2, tworld, SOURCE_MODE_NOT_REQUIRED);
             }
         }
     }
@@ -2803,6 +3348,12 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
     }
 }
 
+JL_DLLEXPORT void jl_compile_method_sig(jl_method_t *m, jl_value_t *types, jl_svec_t *env, size_t world)
+{
+    jl_method_instance_t *mi = jl_specializations_get_linfo(m, types, env);
+    jl_compile_method_instance(mi, NULL, world);
+}
+
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
 {
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
@@ -2816,6 +3367,21 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
     return 1;
 }
 
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    size_t min_valid = 0;
+    size_t max_valid = ~(size_t)0;
+    jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1);
+    if (mi == NULL)
+        return 0;
+    JL_GC_PROMISE_ROOTED(mi);
+    if (jl_generating_output() && jl_options.trim) {
+        arraylist_push(jl_entrypoint_mis, mi);
+    }
+    return 1;
+}
+
 // add type of `f` to front of argument tuple type
 jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
 {
@@ -2832,12 +3398,29 @@ jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_v
     jl_svecset(tt, 0, ft);
     for (size_t i = 0; i < l; i++)
         jl_svecset(tt, i+1, jl_tparam(types,i));
-    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt);
+    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 1);
     tt = jl_rewrap_unionall_(tt, types0);
     JL_GC_POP();
     return tt;
 }
 
+// undo jl_argtype_with_function transform
+jl_value_t *jl_argtype_without_function(jl_value_t *ftypes)
+{
+    jl_value_t *types = jl_unwrap_unionall(ftypes);
+    size_t l = jl_nparams(types);
+    if (l == 1 && jl_is_vararg(jl_tparam0(types)))
+        return ftypes;
+    jl_value_t *tt = (jl_value_t*)jl_alloc_svec(l - 1);
+    JL_GC_PUSH1(&tt);
+    for (size_t i = 1; i < l; i++)
+        jl_svecset(tt, i - 1, jl_tparam(types, i));
+    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 0);
+    tt = jl_rewrap_unionall_(tt, types);
+    JL_GC_POP();
+    return tt;
+}
+
 #ifdef JL_TRACE
 static int trace_en = 0;
 static int error_en = 1;
@@ -2865,7 +3448,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     // manually inlined copy of jl_method_compiled
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
     while (codeinst) {
-        if (codeinst->min_world <= world && world <= codeinst->max_world) {
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
             jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
             if (invoke != NULL) {
                 jl_value_t *res = invoke(F, args, nargs, codeinst);
@@ -2897,6 +3480,18 @@ JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t na
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
+JL_DLLEXPORT jl_value_t *jl_invoke_oc(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc)
+{
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    size_t world = oc->world;
+    ct->world_age = world;
+    jl_value_t *ret = _jl_invoke(F, args, nargs, mfunc, world);
+    ct->world_age = last_age;
+    return ret;
+}
+
 STATIC_INLINE int sig_match_fast(jl_value_t *arg1t, jl_value_t **args, jl_value_t **sig, size_t n)
 {
     // NOTE: This function is a huge performance hot spot!!
@@ -2982,7 +3577,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             entry = jl_atomic_load_relaxed(&call_cache[cache_idx[i]]); \
             if (entry && nargs == jl_svec_len(entry->sig->parameters) && \
                 sig_match_fast(FT, args, jl_svec_data(entry->sig->parameters), nargs) && \
-                world >= entry->min_world && world <= entry->max_world) { \
+                world >= jl_atomic_load_relaxed(&entry->min_world) && world <= jl_atomic_load_relaxed(&entry->max_world)) { \
                 goto have_entry; \
             } \
         } while (0);
@@ -2998,9 +3593,9 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         // if no method was found in the associative cache, check the full cache
         JL_TIMING(METHOD_LOOKUP_FAST, METHOD_LOOKUP_FAST);
         mt = jl_gf_mtable(F);
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         entry = NULL;
-        if (leafcache != (jl_array_t*)jl_an_empty_vec_any &&
+        if (leafcache != (jl_genericmemory_t*)jl_an_empty_memory_any &&
                 jl_typetagis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
             // hashing args is expensive, but looking at mt->cache is probably even more expensive
             tt = lookup_arg_type_tuple(F, args, nargs);
@@ -3035,14 +3630,9 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         mfunc = entry->func.linfo;
     }
     else {
-        JL_GC_PUSH1(&tt);
         assert(tt);
-        JL_LOCK(&mt->writelock);
         // cache miss case
-        JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
         mfunc = jl_mt_assoc_by_type(mt, tt, world);
-        JL_UNLOCK(&mt->writelock);
-        JL_GC_POP();
         if (jl_options.malloc_log)
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
         if (mfunc == NULL) {
@@ -3053,6 +3643,17 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             jl_method_error(F, args, nargs, world);
             // unreachable
         }
+        // mfunc is about to be dispatched
+        uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+        if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+            uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+            uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+            if (!was_dispatched) {
+                miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+                jl_atomic_store_relaxed(&mfunc->flags, miflags);
+                record_dispatch_statement(mfunc);
+            }
+        }
     }
 
 #ifdef JL_TRACE
@@ -3084,7 +3685,7 @@ static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT
     if (mt == jl_nothing)
         mt = NULL;
     jl_value_t *matches = ml_matches((jl_methtable_t*)mt, (jl_tupletype_t*)types, 1, 0, 0, world, 1, min_valid, max_valid, NULL);
-    if (matches == jl_nothing || jl_array_len(matches) != 1)
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1)
         return NULL;
     jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     return matc;
@@ -3175,6 +3776,16 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+        uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+        uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+        if (!was_dispatched) {
+            miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+            jl_atomic_store_relaxed(&mfunc->flags, miflags);
+            record_dispatch_statement(mfunc);
+        }
+    }
     size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
@@ -3199,7 +3810,8 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     jl_gc_wb(ftype->name->mt, name);
     jl_set_const(module, tname, (jl_value_t*)ftype);
     jl_value_t *f = jl_new_struct(ftype);
-    ftype->instance = f; jl_gc_wb(ftype, f);
+    ftype->instance = f;
+    jl_gc_wb(ftype, f);
     JL_GC_POP();
     return (jl_function_t*)f;
 }
@@ -3243,30 +3855,37 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     struct ml_matches_env *closure = container_of(closure0, struct ml_matches_env, match);
     if (closure->intersections == 0 && !closure0->issubty)
         return 1;
-    if (closure->world < ml->min_world) {
+    size_t min_world = jl_atomic_load_relaxed(&ml->min_world);
+    size_t max_world = jl_atomic_load_relaxed(&ml->max_world);
+    if (closure->world < min_world) {
         // ignore method table entries that are part of a later world
-        if (closure->match.max_valid >= ml->min_world)
-            closure->match.max_valid = ml->min_world - 1;
+        if (closure->match.max_valid >= min_world)
+            closure->match.max_valid = min_world - 1;
         return 1;
     }
-    else if (closure->world > ml->max_world) {
+    else if (closure->world > max_world) {
         // ignore method table entries that have been replaced in the current world
-        if (closure->match.min_valid <= ml->max_world)
-            closure->match.min_valid = ml->max_world + 1;
+        if (closure->match.min_valid <= max_world)
+            closure->match.min_valid = max_world + 1;
         return 1;
     }
-    else {
-        // intersect the env valid range with method's inclusive valid range
-        if (closure->match.min_valid < ml->min_world)
-            closure->match.min_valid = ml->min_world;
-        if (closure->match.max_valid > ml->max_world)
-            closure->match.max_valid = ml->max_world;
-    }
     jl_method_t *meth = ml->func.method;
     if (closure->lim >= 0 && jl_is_dispatch_tupletype(meth->sig)) {
-        if (closure->lim == 0)
-            return 0;
-        closure->lim--;
+        int replaced = 0;
+        // check if this is replaced, in which case we need to avoid double-counting it against the limit
+        // (although it will figure out later which one to keep and return)
+        size_t len = jl_array_nrows(closure->t);
+        for (int i = 0; i < len; i++) {
+            if (jl_types_equal(((jl_method_match_t*)jl_array_ptr_ref(closure->t, i))->method->sig, meth->sig)) {
+                replaced = 1;
+                break;
+            }
+        }
+        if (!replaced) {
+            if (closure->lim == 0)
+                return 0;
+            closure->lim--;
+        }
     }
     // don't need to consider other similar methods if this ml will always fully intersect with them and dominates all of them
     if (!closure->include_ambiguous || closure->lim != -1)
@@ -3274,7 +3893,7 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     closure->matc = make_method_match((jl_tupletype_t*)closure->match.ti,
         closure->match.env, meth,
         closure->match.issubty ? FULLY_COVERS : NOT_FULLY_COVERS);
-    size_t len = jl_array_len(closure->t);
+    size_t len = jl_array_nrows(closure->t);
     if (len == 0) {
         closure->t = (jl_value_t*)jl_alloc_vec_any(1);
         jl_array_ptr_set(closure->t, 0, (jl_value_t*)closure->matc);
@@ -3291,7 +3910,6 @@ static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0)
     return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), jl_cachearg_offset(mt), env);
 }
 
-
 // Visit the candidate methods, starting from t[idx], to determine a possible valid sort ordering,
 // where every morespecific method appears before any method which it has a common
 // intersection with but is not partly ambiguous with (ambiguity is transitive, particularly
@@ -3342,7 +3960,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
             jl_method_t *m2 = matc2->method;
             if (jl_subtype(ti, m2->sig)) {
                 if (include_ambiguous) {
-                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                    if (!jl_method_morespecific(m2, m))
                         continue;
                 }
                 visited->items[idx] = (void*)1;
@@ -3361,7 +3979,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
     // First visit all "strong" edges where the child is definitely better.
     // This likely won't hit any cycles, but might (because morespecific is not transitive).
     // Along the way, record if we hit any ambiguities-we may need to track those later.
-    for (size_t childidx = 0; childidx < jl_array_len(t); childidx++) {
+    for (size_t childidx = 0; childidx < jl_array_nrows(t); childidx++) {
         if (childidx == idx)
             continue;
         int child_cycle = (size_t)visited->items[childidx];
@@ -3371,16 +3989,16 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
             continue; // already part of this cycle
         jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
         jl_method_t *m2 = matc2->method;
-        int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+        int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
         // TODO: we could change this to jl_has_empty_intersection(ti, (jl_value_t*)matc2->spec_types);
         // since we only care about sorting of the intersections the user asked us about
         if (!subt2 && jl_has_empty_intersection(m2->sig, m->sig))
             continue;
-        int msp = jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig);
-        int msp2 = !msp && jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig);
+        int msp = jl_method_morespecific(m, m2);
+        int msp2 = !msp && jl_method_morespecific(m2, m);
         if (!msp) {
             if (subt || !include_ambiguous || (lim != -1 && msp2)) {
-                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                if (subt2 || ((lim != -1 || (!include_ambiguous && !msp2)) && jl_subtype((jl_value_t*)ti, m2->sig))) {
                     // this may be filtered out as fully intersected, if applicable later
                     mayexclude = 1;
                 }
@@ -3417,7 +4035,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
                     jl_method_t *m2 = matc2->method;
                     if (jl_subtype(ti, m2->sig)) {
                         if (include_ambiguous) {
-                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                            if (!jl_method_morespecific(m2, m))
                                 continue;
                         }
                         visited->items[idx] = (void*)1;
@@ -3426,7 +4044,9 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
                 }
             }
             if ((size_t)visited->items[idx] == 1) {
-                assert(cycle == depth);
+                // n.b. cycle might be < depth, if we had a cycle with a child
+                // idx, but since we are on the top of the stack, nobody
+                // observed that and so we are content to ignore this
                 size_t childidx = (size_t)arraylist_pop(stack);
                 assert(childidx == idx); (void)childidx;
                 assert(!subt || *found_minmax == 2);
@@ -3491,7 +4111,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
                     jl_method_t *m2 = matc2->method;
                     if (jl_subtype(ti, m2->sig)) {
                         if (include_ambiguous) {
-                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                            if (!jl_method_morespecific(m2, m))
                                 continue;
                         }
                         visited->items[childidx] = (void*)1;
@@ -3514,7 +4134,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
                     size_t idx2 = (size_t)stack->items[j];
                     jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
                     jl_method_t *m2 = matc2->method;
-                    int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
                     // if their intersection contributes to the ambiguity cycle
                     // and the contribution of m is fully ambiguous with the portion of the cycle from m2
                     if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
@@ -3522,8 +4142,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
                         // we don't consider that a third method might be
                         // disrupting that ordering and just consider them
                         // pairwise to keep this simple).
-                        if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
-                            !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
+                        if (!jl_method_morespecific(m, m2) && !jl_method_morespecific(m2, m)) {
                             visited->items[childidx] = (void*)-1;
                             break;
                         }
@@ -3561,7 +4180,6 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
 }
 
 
-
 // This is the collect form of calling jl_typemap_intersection_visitor
 // with optimizations to skip fully shadowed methods.
 //
@@ -3597,14 +4215,14 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
         intersections, world, lim, include_ambiguous, /* .t = */ jl_an_empty_vec_any,
         /* .matc = */ NULL};
-    struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec, 1, ~(size_t)0};
+    struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec};
     jl_value_t *isect2 = NULL;
     JL_GC_PUSH6(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti, &isect2);
 
     if (mt) {
         // check the leaf cache if this type can be in there
         if (((jl_datatype_t*)unw)->isdispatchtuple) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)type, world);
             if (entry) {
                 jl_method_instance_t *mi = entry->func.linfo;
@@ -3625,10 +4243,12 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                     env.match.env, meth, FULLY_COVERS);
                 env.t = (jl_value_t*)jl_alloc_vec_any(1);
                 jl_array_ptr_set(env.t, 0, env.matc);
-                if (*min_valid < entry->min_world)
-                    *min_valid = entry->min_world;
-                if (*max_valid > entry->max_world)
-                    *max_valid = entry->max_world;
+                size_t min_world = jl_atomic_load_relaxed(&entry->min_world);
+                size_t max_world = jl_atomic_load_relaxed(&entry->max_world);
+                if (*min_valid < min_world)
+                    *min_valid = min_world;
+                if (*max_valid > max_world)
+                    *max_valid = max_world;
                 JL_GC_POP();
                 return env.t;
             }
@@ -3651,16 +4271,21 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                     env.match.env, meth, FULLY_COVERS);
                 env.t = (jl_value_t*)jl_alloc_vec_any(1);
                 jl_array_ptr_set(env.t, 0, env.matc);
-                if (*min_valid < entry->min_world)
-                    *min_valid = entry->min_world;
-                if (*max_valid > entry->max_world)
-                    *max_valid = entry->max_world;
+                size_t min_world = jl_atomic_load_relaxed(&entry->min_world);
+                size_t max_world = jl_atomic_load_relaxed(&entry->max_world);
+                if (*min_valid < min_world)
+                    *min_valid = min_world;
+                if (*max_valid > max_world)
+                    *max_valid = max_world;
                 JL_GC_POP();
                 return env.t;
             }
         }
         if (!ml_mtable_visitor(mt, &env.match)) {
             JL_GC_POP();
+            // if we return early, set only the min/max valid collected from matching
+            *min_valid = env.match.min_valid;
+            *max_valid = env.match.max_valid;
             return jl_nothing;
         }
     }
@@ -3668,14 +4293,18 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         // else: scan everything
         if (!jl_foreach_reachable_mtable(ml_mtable_visitor, &env.match)) {
             JL_GC_POP();
+            // if we return early, set only the min/max valid collected from matching
+            *min_valid = env.match.min_valid;
+            *max_valid = env.match.max_valid;
             return jl_nothing;
         }
     }
+    // if we return early, set only the min/max valid collected from matching
     *min_valid = env.match.min_valid;
     *max_valid = env.match.max_valid;
     // done with many of these values now
     env.match.ti = NULL; env.matc = NULL; env.match.env = NULL; search.env = NULL;
-    size_t i, j, len = jl_array_len(env.t);
+    size_t i, j, len = jl_array_nrows(env.t);
     jl_method_match_t *minmax = NULL;
     int minmax_ambig = 0;
     int all_subtypes = 1;
@@ -3690,7 +4319,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 jl_method_t *m = matc->method;
                 if (minmax != NULL) {
                     jl_method_t *minmaxm = minmax->method;
-                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+                    if (jl_method_morespecific(minmaxm, m))
                         continue;
                 }
                 minmax = matc;
@@ -3708,7 +4337,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 if (matc->fully_covers == FULLY_COVERS) {
                     jl_method_t *m = matc->method;
                     jl_method_t *minmaxm = minmax->method;
-                    if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
+                    if (!jl_method_morespecific(minmaxm, m)) {
                         minmax_ambig = 1;
                         minmax = NULL;
                         has_ambiguity = 1;
@@ -3733,7 +4362,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
                 if (matc->fully_covers != FULLY_COVERS) {
                     jl_method_t *m = matc->method;
-                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+                    if (jl_method_morespecific(minmaxm, m))
                         matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
                     else
                         all_subtypes = 0;
@@ -3821,7 +4450,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                     jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx);
                     jl_method_t *m = matc->method;
                     int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    for (size_t idx2 = 0; idx2 < jl_array_len(env.t); idx2++) {
+                    for (size_t idx2 = 0; idx2 < jl_array_nrows(env.t); idx2++) {
                         if (idx2 == idx)
                             continue;
                         // laborious test, checking for existence and coverage of another method (m3)
@@ -3852,8 +4481,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                         if (ti == jl_bottom_type)
                             continue;
                         // and they aren't themselves simply ordered
-                        if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) ||
-                            jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                        if (jl_method_morespecific(m, m2) || jl_method_morespecific(m2, m))
                             continue;
                         // now look for a third method m3 that dominated these and that fully covered this intersection already
                         size_t k;
@@ -3866,8 +4494,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                             jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx3);
                             jl_method_t *m3 = matc3->method;
                             if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig)) {
+                                    && jl_method_morespecific(m3, m) && jl_method_morespecific(m3, m2)) {
                                 //if (jl_subtype(matc->spec_types, ti) || jl_subtype(matc->spec_types, matc3->m3->sig))
                                 //    // check if it covered not only this intersection, but all intersections with matc
                                 //    // if so, we do not need to check all of them separately
@@ -3903,12 +4530,24 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             arraylist_push(&result, minmax);
             j++;
         }
-        memcpy(jl_array_data(env.t), result.items, j * sizeof(jl_method_match_t*));
+        memcpy(jl_array_data(env.t, jl_method_match_t*), result.items, j * sizeof(jl_method_match_t*));
         arraylist_free(&result);
         if (j != len)
             jl_array_del_end((jl_array_t*)env.t, len - j);
         len = j;
     }
+    for (j = 0; j < len; j++) {
+        jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+        jl_method_t *m = matc->method;
+        // method applicability is the same as typemapentry applicability
+        size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+        size_t max_world = jl_atomic_load_relaxed(&m->deleted_world);
+        // intersect the env valid range with method lookup's inclusive valid range
+        if (env.match.min_valid < min_world)
+            env.match.min_valid = min_world;
+        if (env.match.max_valid > max_world)
+            env.match.max_valid = max_world;
+    }
     if (mt && cache_result && ((jl_datatype_t*)unw)->isdispatchtuple) { // cache_result parameter keeps this from being recursive
         if (len == 1 && !has_ambiguity) {
             env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, 0);
@@ -3919,6 +4558,8 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             JL_UNLOCK(&mt->writelock);
         }
     }
+    *min_valid = env.match.min_valid;
+    *max_valid = env.match.max_valid;
     if (ambig != NULL)
         *ambig = has_ambiguity;
     JL_GC_POP();
@@ -3951,7 +4592,7 @@ JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void)
     return jl_hrtime();
 }
 
-JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
+JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile)
 {
     if (!start)
         return;
@@ -3960,19 +4601,12 @@ JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
     if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
         uint64_t inftime = jl_hrtime() - start;
         jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
+        if (is_recompile) {
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, inftime);
+        }
     }
 }
 
-JL_DLLEXPORT void jl_typeinf_lock_begin(void)
-{
-    JL_LOCK(&jl_codegen_lock);
-}
-
-JL_DLLEXPORT void jl_typeinf_lock_end(void)
-{
-    JL_UNLOCK(&jl_codegen_lock);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/iddict.c b/src/iddict.c
index 1fa8a67d1ae96..0a0895d048c32 100644
--- a/src/iddict.c
+++ b/src/iddict.c
@@ -1,49 +1,48 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#define hash_size(h) (jl_array_len(h) / 2)
+#define hash_size(h) (h->length / 2)
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 
-#define keyhash(k) jl_object_id_(jl_typeof(k), k)
+#define keyhash(k) jl_object_id_(jl_typetagof(k), k)
 #define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val);
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val);
 
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz)
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
-    jl_value_t **ol = (jl_value_t **)a->data;
-    jl_array_t *newa = jl_alloc_vec_any(newsz);
-    // keep the original array in the original slot since we need `ol`
+    jl_value_t **ol = (jl_value_t **) a->ptr;
+    jl_genericmemory_t *newa = NULL;
+    // keep the original memory in the original slot since we need `ol`
     // to be valid in the loop below.
     JL_GC_PUSH2(&newa, &a);
+    newa = jl_alloc_memory_any(newsz);
     for (i = 0; i < sz; i += 2) {
         if (ol[i + 1] != NULL) {
             jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
-            // it is however necessary here because allocation
-            // can (and will) occur in a recursive call inside table_lookup_bp
         }
     }
     JL_GC_POP();
     return newa;
 }
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val)
 {
     // pa points to a **un**rooted address
     uint_t hv;
-    jl_array_t *a = *pa;
+    jl_genericmemory_t *a = *pa;
     size_t orig, index, iter, empty_slot;
     size_t newsz, sz = hash_size(a);
     if (sz == 0) {
-        a = jl_alloc_vec_any(HT_N_INLINE);
+        a = jl_alloc_memory_any(HT_N_INLINE);
         sz = hash_size(a);
         *pa = a;
     }
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
 
     hv = keyhash(key);
     while (1) {
@@ -92,7 +91,7 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         /* quadruple size, rehash, retry the insert */
         /* it's important to grow the table really fast; otherwise we waste */
         /* lots of time rehashing all the keys over and over. */
-        sz = jl_array_len(a);
+        sz = a -> length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
@@ -102,20 +101,20 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         *pa = jl_idtable_rehash(*pa, newsz);
 
         a = *pa;
-        tab = (_Atomic(jl_value_t*)*)a->data;
+        tab =  (_Atomic(jl_value_t*)*) a->ptr;
         sz = hash_size(a);
         maxprobe = max_probe(sz);
     }
 }
 
 /* returns bp if key is in hash, otherwise NULL */
-inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
+inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 {
     size_t sz = hash_size(a);
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
     uint_t hv = keyhash(key);
     size_t index = h2index(hv, sz);
     sz *= 2;
@@ -142,7 +141,7 @@ inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL
 }
 
 JL_DLLEXPORT
-jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
+jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
 {
     int inserted = jl_table_assign_bp(&h, key, val);
     if (p_inserted)
@@ -153,20 +152,20 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int
 // Note: lookup in the IdDict is permitted concurrently, if you avoid deletions,
 // and assuming you do use an external lock around all insertions
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+jl_value_t *jl_eqtable_get(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp);
 }
 
-jl_value_t *jl_eqtable_getkey(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp - 1);
 }
 
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
+jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     if (found)
@@ -180,12 +179,12 @@ jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, in
 }
 
 JL_DLLEXPORT
-size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
+size_t jl_eqtable_nextind(jl_genericmemory_t *t, size_t i)
 {
     if (i & 1)
         i++;
-    size_t alen = jl_array_dim0(t);
-    while (i < alen && ((void **)t->data)[i + 1] == NULL)
+    size_t alen = t->length;
+    while (i < alen && ((void**) t->ptr)[i + 1] == NULL)
         i += 2;
     if (i >= alen)
         return (size_t)-1;
@@ -194,3 +193,4 @@ size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
 
 #undef hash_size
 #undef max_probe
+#undef h2index
diff --git a/src/idset.c b/src/idset.c
new file mode 100644
index 0000000000000..b9711ee17f021
--- /dev/null
+++ b/src/idset.c
@@ -0,0 +1,118 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+
+static uint_t idset_hash(size_t idx, jl_value_t *data)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_object_id(x);
+}
+
+static int idset_eq(size_t idx, const void *y, jl_value_t *data, uint_t hv)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_egal(x, (jl_value_t*)y);
+}
+
+jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz)
+{
+    if (newsz == 0)
+        return idxs;
+    newsz = next_power_of_two(newsz);
+    //if (idxs->length == newsz)
+    //    jl_idset_put_idx(keys, idxs, -newsz+1);
+    //else
+    return smallintset_rehash(idxs, idset_hash, (jl_value_t*)keys, newsz, 0);
+}
+
+// Return idx if key is in hash, otherwise -1
+// Note: lookup in the IdSet is permitted concurrently, if you avoid deletions,
+// and assuming you do use an external lock around all insertions
+ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    return jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 0);
+}
+
+jl_value_t *jl_idset_get(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    ssize_t idx = jl_idset_peek_bp(keys, idxs, key);
+    if (idx == -1)
+        return NULL;
+    return jl_genericmemory_ptr_ref(keys, idx);
+}
+
+
+static ssize_t idset_compact(jl_genericmemory_t *keys)
+{
+    // compact keys before rehashing idxs
+    ssize_t i, j;
+    ssize_t rehash = 0;
+    for (i = j = 0; i < keys->length; i++) {
+        jl_value_t *k = jl_genericmemory_ptr_ref(keys, i);
+        if (k != NULL) {
+            if (i != j) {
+                rehash = 1;
+                jl_genericmemory_ptr_set(keys, j, k);
+                jl_genericmemory_ptr_set(keys, i, NULL);
+            }
+            j++;
+        }
+    }
+    return rehash ? -j : j;
+}
+
+jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx)
+{
+    ssize_t l = keys->length;
+    ssize_t i = l;
+    while (i > 0 && jl_genericmemory_ptr_ref(keys, i - 1) == NULL)
+        i--;
+    // i points to the place to insert
+    *newidx = i;
+    if (i == l) {
+        i = idset_compact(keys);
+        if (i < 0) {
+            *newidx = i - 1;
+            i = -i;
+        }
+        if (i >= l / 3 * 2) {
+            size_t nl = l < 4 ? 4 : (l * 3) >> 1; // grow space by 50% if less than 33% free after compacting
+            jl_genericmemory_t *nk = jl_alloc_genericmemory(jl_memory_any_type, nl);
+            if (i > 0)
+                memcpy(nk->ptr, keys->ptr, sizeof(void*) * i);
+            keys = nk;
+        }
+    }
+    assert(jl_genericmemory_ptr_ref(keys, i) == NULL);
+    jl_genericmemory_ptr_set(keys, i, key);
+    return keys;
+}
+
+jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx)
+{
+    _Atomic(jl_genericmemory_t*) newidxs = idxs;
+    JL_GC_PUSH1(&newidxs);
+    if (idx < 0) { // full rehash
+        smallintset_empty(idxs);
+        for (ssize_t i = 0; i < -idx; i++)
+            if (jl_genericmemory_ptr_ref(keys, i) != NULL)
+                jl_smallintset_insert(&newidxs, NULL, idset_hash, i, (jl_value_t*)keys);
+    }
+    else {
+        jl_smallintset_insert(&newidxs, NULL, idset_hash, idx, (jl_value_t*)keys);
+    }
+    JL_GC_POP();
+    return jl_atomic_load_relaxed(&newidxs);
+}
+
+/* returns idx if key is in hash, otherwise -1 */
+ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    ssize_t idx = jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 1);
+    if (idx != -1)
+        jl_genericmemory_ptr_set(keys, idx, NULL);
+    return idx;
+}
diff --git a/src/init.c b/src/init.c
index 4a152ed04b13d..7b41e63e98455 100644
--- a/src/init.c
+++ b/src/init.c
@@ -44,6 +44,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 
 // list of modules being deserialized with __init__ methods
 jl_array_t *jl_module_init_order;
+arraylist_t *jl_entrypoint_mis;
 
 JL_DLLEXPORT size_t jl_page_size;
 
@@ -64,39 +65,29 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
     // threads since it seems to return bogus values for master thread on Linux
     // and possibly OSX.
     if (!ismaster) {
-#  if defined(_OS_LINUX_)
+#  if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
         pthread_attr_t attr;
+#if defined(_OS_FREEBSD_)
+        pthread_attr_init(&attr);
+        pthread_attr_get_np(pthread_self(), &attr);
+#else
         pthread_getattr_np(pthread_self(), &attr);
+#endif
         void *stackaddr;
         size_t stacksize;
         pthread_attr_getstack(&attr, &stackaddr, &stacksize);
         pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-#pragma GCC diagnostic push
-#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
-#pragma GCC diagnostic ignored "-Wdangling-pointer"
-#endif
-        *stack_hi = (void*)__builtin_frame_address(0);
-#pragma GCC diagnostic pop
+        *stack_lo = stackaddr;
+        *stack_hi = (char*)stackaddr + stacksize;
         return;
 #  elif defined(_OS_DARWIN_)
         extern void *pthread_get_stackaddr_np(pthread_t thread);
         extern size_t pthread_get_stacksize_np(pthread_t thread);
         pthread_t thread = pthread_self();
         void *stackaddr = pthread_get_stackaddr_np(thread);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
-        return;
-#  elif defined(_OS_FREEBSD_)
-        pthread_attr_t attr;
-        pthread_attr_init(&attr);
-        pthread_attr_get_np(pthread_self(), &attr);
-        void *stackaddr;
-        size_t stacksize;
-        pthread_attr_getstack(&attr, &stackaddr, &stacksize);
-        pthread_attr_destroy(&attr);
-        *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)__builtin_frame_address(0);
+        size_t stacksize = pthread_get_stacksize_np(thread);
+        *stack_lo = (char*)stackaddr - stacksize;
+        *stack_hi = stackaddr;
         return;
 #  else
 #      warning "Getting precise stack size for thread is not supported."
@@ -246,25 +237,16 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
 
     jl_task_t *ct = jl_get_current_task();
 
-    if (ct) {
-        if (exitcode == 0)
-            jl_write_compiler_output();
+    if (ct == NULL && jl_base_module) {
+        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
+    }
+    else if (ct != NULL) {
         // we are about to start tearing everything down, so lets try not to get
         // upset by the local mess of things when we run the user's _atexit hooks
         // this also forces us into a GC-unsafe region without a safepoint
         jl_task_frame_noreturn(ct);
-    }
-
-    if (ct == NULL && jl_base_module)
-        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
-    else if (ct != NULL)
         jl_gc_safepoint_(ct->ptls);
-
-    jl_print_gc_stats(JL_STDERR);
-    if (jl_options.code_coverage)
-        jl_write_coverage_data(jl_options.output_code_coverage);
-    if (jl_options.malloc_log)
-        jl_write_malloc_log();
+    }
 
     if (jl_base_module) {
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
@@ -282,7 +264,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                 jlbacktrace(); // written to STDERR_FILENO
             }
@@ -290,6 +272,15 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
         }
     }
 
+    if (ct && exitcode == 0)
+        jl_write_compiler_output();
+
+    jl_print_gc_stats(JL_STDERR);
+    if (jl_options.code_coverage)
+        jl_write_coverage_data(jl_options.output_code_coverage);
+    if (jl_options.malloc_log)
+        jl_write_malloc_log();
+
     // replace standard output streams with something that we can still print to
     // after the finalizers from base/stream.jl close the TTY
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
@@ -317,7 +308,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                     assert(item);
                     uv_unref(item->h);
                     jl_printf((JL_STREAM*)STDERR_FILENO, "error during exit cleanup: close: ");
-                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                    jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                     jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                     jlbacktrace(); // written to STDERR_FILENO
                     item = next_shutdown_queue_item(item);
@@ -338,15 +329,15 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
                          // we would like to guarantee this, but cannot currently, so there is still a small race window
                          // that needs to be fixed in libuv
     }
-    if (ct)
-        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
     if (loop != NULL) {
         // TODO: consider uv_loop_close(loop) here, before shutdown?
         uv_library_shutdown();
         // no JL_UV_UNLOCK(), since it is now torn down
     }
-
-    // TODO: Destroy threads?
+    if (ct)
+        jl_safepoint_suspend_all_threads(ct); // Destroy other threads, so that they don't segfault
+    if (ct)
+        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
 
     jl_destroy_timing(); // cleans up the current timing_stack for noreturn
 #ifdef USE_TIMING_COUNTS
@@ -372,7 +363,7 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\npostoutput hook threw an error: ");
-                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                 jlbacktrace(); // written to STDERR_FILENO
             }
@@ -382,6 +373,7 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
 }
 
 void post_boot_hooks(void);
+void post_image_load_hooks(void);
 
 JL_DLLEXPORT void *jl_libjulia_internal_handle;
 JL_DLLEXPORT void *jl_libjulia_handle;
@@ -538,7 +530,7 @@ int jl_isabspath(const char *in) JL_NOTSAFEPOINT
     return 0; // relative path
 }
 
-static char *abspath(const char *in, int nprefix)
+static char *absrealpath(const char *in, int nprefix)
 { // compute an absolute realpath location, so that chdir doesn't change the file reference
   // ignores (copies directly over) nprefix characters at the start of abspath
 #ifndef _OS_WINDOWS_
@@ -574,6 +566,14 @@ static char *abspath(const char *in, int nprefix)
         }
     }
 #else
+    // GetFullPathName intentionally errors if given an empty string so manually insert `.` to invoke cwd
+    char *in2 = (char*)malloc_s(JL_PATH_MAX);
+    if (strlen(in) - nprefix == 0) {
+        memcpy(in2, in, nprefix);
+        in2[nprefix] = '.';
+        in2[nprefix+1] = '\0';
+        in = in2;
+    }
     DWORD n = GetFullPathName(in + nprefix, 0, NULL, NULL);
     if (n <= 0) {
         jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
@@ -584,6 +584,7 @@ static char *abspath(const char *in, int nprefix)
         jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
     }
     memcpy(out, in, nprefix);
+    free(in2);
 #endif
     return out;
 }
@@ -619,7 +620,8 @@ static const char *absformat(const char *in)
 }
 
 static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
-{   // this function resolves the paths in jl_options to absolute file locations as needed
+{
+    // this function resolves the paths in jl_options to absolute file locations as needed
     // and it replaces the pointers to `julia_bindir`, `julia_bin`, `image_file`, and output file paths
     // it may fail, print an error, and exit(1) if any of these paths are longer than JL_PATH_MAX
     //
@@ -644,7 +646,7 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
         }
     }
     if (jl_options.julia_bindir)
-        jl_options.julia_bindir = abspath(jl_options.julia_bindir, 0);
+        jl_options.julia_bindir = absrealpath(jl_options.julia_bindir, 0);
     free(free_path);
     free_path = NULL;
     if (jl_options.image_file) {
@@ -659,33 +661,33 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
             jl_options.image_file = free_path;
         }
         if (jl_options.image_file)
-            jl_options.image_file = abspath(jl_options.image_file, 0);
+            jl_options.image_file = absrealpath(jl_options.image_file, 0);
         if (free_path) {
             free(free_path);
             free_path = NULL;
         }
     }
     if (jl_options.outputo)
-        jl_options.outputo = abspath(jl_options.outputo, 0);
+        jl_options.outputo = absrealpath(jl_options.outputo, 0);
     if (jl_options.outputji)
-        jl_options.outputji = abspath(jl_options.outputji, 0);
+        jl_options.outputji = absrealpath(jl_options.outputji, 0);
     if (jl_options.outputbc)
-        jl_options.outputbc = abspath(jl_options.outputbc, 0);
+        jl_options.outputbc = absrealpath(jl_options.outputbc, 0);
     if (jl_options.outputasm)
-        jl_options.outputasm = abspath(jl_options.outputasm, 0);
+        jl_options.outputasm = absrealpath(jl_options.outputasm, 0);
     if (jl_options.machine_file)
-        jl_options.machine_file = abspath(jl_options.machine_file, 0);
+        jl_options.machine_file = absrealpath(jl_options.machine_file, 0);
     if (jl_options.output_code_coverage)
         jl_options.output_code_coverage = absformat(jl_options.output_code_coverage);
     if (jl_options.tracked_path)
-        jl_options.tracked_path = absformat(jl_options.tracked_path);
+        jl_options.tracked_path = absrealpath(jl_options.tracked_path, 0);
 
     const char **cmdp = jl_options.cmds;
     if (cmdp) {
         for (; *cmdp; cmdp++) {
             const char *cmd = *cmdp;
             if (cmd[0] == 'L') {
-                *cmdp = abspath(cmd, 1);
+                *cmdp = absrealpath(cmd, 1);
             }
         }
     }
@@ -708,6 +710,7 @@ extern jl_mutex_t jl_modules_mutex;
 extern jl_mutex_t precomp_statement_out_lock;
 extern jl_mutex_t newly_inferred_mutex;
 extern jl_mutex_t global_roots_lock;
+extern jl_mutex_t profile_show_peek_cond_lock;
 
 static void restore_fp_env(void)
 {
@@ -719,14 +722,29 @@ static void restore_fp_env(void)
 static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
+JL_DLLEXPORT jl_cgparams_t jl_default_cgparams = {
+        /* track_allocations */ 1,
+        /* code_coverage */ 1,
+        /* prefer_specsig */ 0,
+#ifdef _OS_WINDOWS_
+        /* gnu_pubnames */ 0,
+#else
+        /* gnu_pubnames */ 1,
+#endif
+        /* debug_info_kind */ 0, // later DICompileUnit::DebugEmissionKind::FullDebug,
+        /* debug_info_level */ 0, // later jl_options.debug_level,
+        /* safepoint_on_entry */ 1,
+        /* gcstack_arg */ 1,
+        /* use_jlplt*/ 1,
+        /* trim */ 0 };
 
 static void init_global_mutexes(void) {
     JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
     JL_MUTEX_INIT(&precomp_statement_out_lock, "precomp_statement_out_lock");
     JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex");
     JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
-    JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
     JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
+    JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock");
 }
 
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
@@ -738,6 +756,13 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
 
+    // initialize symbol-table lock
+    uv_mutex_init(&symtab_lock);
+    // initialize the live tasks lock
+    uv_mutex_init(&live_tasks_lock);
+    // initialize the profiler buffer lock
+    uv_mutex_init(&bt_data_prof_lock);
+
     // initialize backtraces
     jl_init_profile_lock();
 #ifdef _OS_WINDOWS_
@@ -801,11 +826,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 #endif
 #endif
 
-    if ((jl_options.outputo || jl_options.outputbc || jl_options.outputasm) &&
-        (jl_options.code_coverage || jl_options.malloc_log)) {
-        jl_error("cannot generate code-coverage or track allocation information while generating a .o, .bc, or .s output file");
-    }
-
     jl_init_rand();
     jl_init_runtime_ccall();
     jl_init_tasks();
@@ -818,6 +838,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
     arraylist_new(&jl_linkage_blobs, 0);
     arraylist_new(&jl_image_relocs, 0);
+    arraylist_new(&jl_top_mods, 0);
     arraylist_new(&eytzinger_image_tree, 0);
     arraylist_new(&eytzinger_idxs, 0);
     arraylist_push(&eytzinger_idxs, (void*)0);
@@ -828,6 +849,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
 #pragma GCC diagnostic ignored "-Wdangling-pointer"
 #endif
+    if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) {
+        // enable before creating the root task so it gets timings too.
+        jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
+    }
     // warning: this changes `jl_current_task`, so be careful not to call that from this function
     jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
 #pragma GCC diagnostic pop
@@ -840,27 +865,29 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
     JL_TIMING(JULIA_INIT, JULIA_INIT);
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
-    if (jl_options.image_file)
+    if (rel == JL_IMAGE_IN_MEMORY) {
+        jl_set_sysimg_so(jl_exe_handle);
+        jl_options.image_file = jl_options.julia_bin;
+    }
+    else if (jl_options.image_file)
         jl_preload_sysimg_so(jl_options.image_file);
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
     jl_init_codegen();
 
+    jl_init_common_symbols();
     if (jl_options.image_file) {
         jl_restore_system_image(jl_options.image_file);
     } else {
         jl_init_types();
-        jl_global_roots_table = jl_alloc_vec_any(0);
+        jl_global_roots_list = (jl_genericmemory_t*)jl_an_empty_memory_any;
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
     }
 
-    jl_init_common_symbols();
     jl_init_flisp();
     jl_init_serializer();
 
     if (!jl_options.image_file) {
-        jl_core_module = jl_new_module(jl_symbol("Core"), NULL);
-        jl_core_module->parent = jl_core_module;
-        jl_type_typename->mt->module = jl_core_module;
         jl_top_module = jl_core_module;
         jl_init_intrinsic_functions();
         jl_init_primitives();
@@ -875,10 +902,14 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_n_markthreads = 0;
         jl_n_sweepthreads = 0;
         jl_n_gcthreads = 0;
-        jl_n_threads_per_pool[0] = 1;
-        jl_n_threads_per_pool[1] = 0;
+        jl_n_threads_per_pool[0] = 0; // Interactive threadpool
+        jl_n_threads_per_pool[1] = 1; // Default threadpool
+    } else {
+        post_image_load_hooks();
     }
     jl_start_threads();
+    jl_start_gc_threads();
+    uv_barrier_wait(&thread_init_done);
 
     jl_gc_enable(1);
 
@@ -886,7 +917,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_array_t *init_order = jl_module_init_order;
         JL_GC_PUSH1(&init_order);
         jl_module_init_order = NULL;
-        int i, l = jl_array_len(init_order);
+        int i, l = jl_array_nrows(init_order);
         for (i = 0; i < l; i++) {
             jl_value_t *mod = jl_array_ptr_ref(init_order, i);
             jl_module_run_initializer((jl_module_t*)mod);
@@ -894,6 +925,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         JL_GC_POP();
     }
 
+    if (jl_options.trim) {
+        jl_entrypoint_mis = (arraylist_t *)malloc_s(sizeof(arraylist_t));
+        arraylist_new(jl_entrypoint_mis, 0);
+    }
+
     if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
         jl_install_sigint_handler();
 }
diff --git a/src/interpreter.c b/src/interpreter.c
index 2ad56e76b2549..338853b56f692 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -17,6 +17,7 @@ extern "C" {
 typedef struct {
     jl_code_info_t *src; // contains the names and number of slots
     jl_method_instance_t *mi; // MethodInstance we're executing, or NULL if toplevel
+    jl_code_instance_t *ci; // CodeInstance we're executing (for generated functions)
     jl_module_t *module; // context for globals
     jl_value_t **locals; // slots for holding local slots and ssavalues
     jl_svec_t *sparam_vals; // method static parameters, if eval-ing a method body
@@ -65,7 +66,8 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
 // we define this separately so that we can populate the frame before we add it to the backtrace
 // it's recommended to mark the containing function with NOINLINE, though not essential
 #define JL_GC_ENABLEFRAME(frame) \
-  ((void**)&frame[1])[0] = __builtin_frame_address(0);
+    jl_signal_fence(); \
+    ((void**)&frame[1])[0] = __builtin_frame_address(0);
 
 #endif
 
@@ -92,9 +94,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
             jl_error("method: invalid declaration");
         }
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        _Atomic(jl_value_t*) *bp = &b->value;
-        jl_value_t *gf = jl_generic_function_def(fname, modu, bp, b);
-        return gf;
+        return jl_declare_const_gf(b, modu, fname);
     }
 
     jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
@@ -134,10 +134,31 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state
     JL_GC_PUSHARGS(argv, nargs - 1);
     size_t i;
     for (i = 1; i < nargs; i++)
-        argv[i] = eval_value(args[i], s);
-    jl_method_instance_t *meth = (jl_method_instance_t*)args[0];
-    assert(jl_is_method_instance(meth));
-    jl_value_t *result = jl_invoke(argv[1], &argv[2], nargs - 2, meth);
+        argv[i-1] = eval_value(args[i], s);
+    jl_value_t *c = args[0];
+    assert(jl_is_code_instance(c) || jl_is_method_instance(c));
+    jl_value_t *result = NULL;
+    if (jl_is_code_instance(c)) {
+        jl_code_instance_t *codeinst = (jl_code_instance_t*)c;
+        assert(jl_atomic_load_relaxed(&codeinst->min_world) <= jl_current_task->world_age &&
+               jl_current_task->world_age <= jl_atomic_load_relaxed(&codeinst->max_world));
+        jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        if (!invoke) {
+            jl_compile_codeinst(codeinst);
+            invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        }
+        if (invoke) {
+            result = invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst);
+
+        } else {
+            if (codeinst->owner != jl_nothing) {
+                jl_error("Failed to invoke or compile external codeinst");
+            }
+            result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, jl_get_ci_mi(codeinst));
+        }
+    } else {
+        result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, (jl_method_instance_t*)c);
+    }
     JL_GC_POP();
     return result;
 }
@@ -146,7 +167,7 @@ jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e)
 {
     jl_value_t *v = jl_get_global(m, e);
     if (v == NULL)
-        jl_undefined_var_error(e);
+        jl_undefined_var_error(e, (jl_value_t*)m);
     return v;
 }
 
@@ -154,18 +175,18 @@ jl_value_t *jl_eval_globalref(jl_globalref_t *g)
 {
     jl_value_t *v = jl_get_globalref_value(g);
     if (v == NULL)
-        jl_undefined_var_error(g->name);
+        jl_undefined_var_error(g->name, (jl_value_t*)g->mod);
     return v;
 }
 
 static int jl_source_nslots(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_array_len(src->slotflags);
+    return jl_array_nrows(src->slotflags);
 }
 
 static int jl_source_nssavalues(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
 }
 
 static void eval_stmt_value(jl_value_t *stmt, interpreter_state *s)
@@ -190,7 +211,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             jl_error("access to invalid slot number");
         jl_value_t *v = s->locals[n - 1];
         if (v == NULL)
-            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1));
+            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1), (jl_value_t*)jl_local_sym);
         return v;
     }
     if (jl_is_quotenode(e)) {
@@ -216,7 +237,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return e;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_value_t **args = jl_array_ptr_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     if (head == jl_call_sym) {
         return do_call(args, nargs, s);
@@ -230,17 +251,15 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
+        assert(nargs == 1 && "malformed IR");
         if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
             ssize_t n = jl_slot_number(sym);
             if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
                 jl_error("access to invalid slot number");
             defined = s->locals[n - 1] != NULL;
         }
-        else if (jl_is_globalref(sym)) {
-            defined = jl_boundp(jl_globalref_mod(sym), jl_globalref_name(sym));
-        }
-        else if (jl_is_symbol(sym)) {
-            defined = jl_boundp(s->module, (jl_sym_t*)sym);
+        else if (jl_is_globalref(sym) || jl_is_symbol(sym)) {
+            jl_error("[Internal Error]: :isdefined on globalref should use `isdefinedglobal`");
         }
         else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == jl_static_parameter_sym) {
             ssize_t n = jl_unbox_long(jl_exprarg(sym, 0));
@@ -267,7 +286,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             if (var == jl_getfield_undefref_sym)
                 jl_throw(jl_undefref_exception);
             else
-                jl_undefined_var_error(var);
+                jl_undefined_var_error(var, (jl_value_t*)jl_local_sym);
         }
         return jl_nothing;
     }
@@ -296,7 +315,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             argv[i] = eval_value(args[i], s);
         JL_NARGSV(new_opaque_closure, 4);
         jl_value_t *ret = (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)argv[0], argv[1], argv[2],
-            argv[3], argv+4, nargs-4, 1);
+            argv[4], argv+5, nargs-5, 1);
         JL_GC_POP();
         return ret;
     }
@@ -306,7 +325,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
             jl_value_t *sp = jl_svecref(s->sparam_vals, n - 1);
             if (jl_is_typevar(sp) && !s->preevaluation)
-                jl_undefined_var_error(((jl_tvar_t*)sp)->name);
+                jl_undefined_var_error(((jl_tvar_t*)sp)->name, (jl_value_t*)jl_static_parameter_sym);
             return sp;
         }
         // static parameter val unknown needs to be an error for ccall
@@ -316,7 +335,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return jl_copy_ast(eval_value(args[0], s));
     }
     else if (head == jl_exc_sym) {
-        return jl_current_exception();
+        return jl_current_exception(jl_current_task);
     }
     else if (head == jl_boundscheck_sym) {
         return jl_true;
@@ -350,6 +369,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
     size_t from = s->ip;
     size_t ip = to;
     unsigned nphiblockstmts = 0;
+    unsigned last_phi = 0;
     for (ip = to; ip < ns; ip++) {
         jl_value_t *e = jl_array_ptr_ref(stmts, ip);
         if (!jl_is_phinode(e)) {
@@ -360,9 +380,16 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             }
             // Everything else is allowed in the phi-block for implementation
             // convenience - fall through.
+        } else {
+            last_phi = nphiblockstmts + 1;
         }
         nphiblockstmts += 1;
     }
+    // Cut off the phi block at the last phi node. For global refs that are not
+    // actually in the phi block, we want to evaluate them in the regular interpreter
+    // loop instead to make sure exception state is set up properly in case they throw.
+    nphiblockstmts = last_phi;
+    ip = to + last_phi;
     if (nphiblockstmts) {
         jl_value_t **dest = &s->locals[jl_source_nslots(s->src) + to];
         jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphiblockstmts);
@@ -385,8 +412,8 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             //   %2 = phi ...
             //   %3 = phi (1)[1 => %a], (2)[2 => %b]
             // from = 1, to = closest = 2, i = 1 --> edge = 2, edge_from = 2, from = 2
-            for (unsigned j = 0; j < jl_array_len(edges); ++j) {
-                size_t edge_from = ((int32_t*)jl_array_data(edges))[j]; // 1-indexed
+            for (unsigned j = 0; j < jl_array_nrows(edges); ++j) {
+                size_t edge_from = jl_array_data(edges, int32_t)[j]; // 1-indexed
                 if (edge_from == from + 1) {
                     if (edge == -1)
                         edge = j;
@@ -443,15 +470,13 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
 static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel)
 {
     jl_handler_t __eh;
-    size_t ns = jl_array_len(stmts);
+    size_t ns = jl_array_nrows(stmts);
     jl_task_t *ct = jl_current_task;
 
     while (1) {
         s->ip = ip;
         if (ip >= ns)
             jl_error("`body` expression must terminate in `return`. Use `block` instead.");
-        if (toplevel)
-            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_value_t *stmt = jl_array_ptr_ref(stmts, ip);
         assert(!jl_is_phinode(stmt));
         size_t next_ip = ip + 1;
@@ -480,54 +505,27 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             ssize_t id = ((jl_ssavalue_t*)phic)->id - 1;
             s->locals[jl_source_nslots(s->src) + id] = val;
         }
-        else if (jl_is_expr(stmt)) {
-            // Most exprs are allowed to end a BB by fall through
-            jl_sym_t *head = ((jl_expr_t*)stmt)->head;
-            if (head == jl_assign_sym) {
-                jl_value_t *lhs = jl_exprarg(stmt, 0);
-                jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
-                if (jl_is_slotnumber(lhs)) {
-                    ssize_t n = jl_slot_number(lhs);
-                    assert(n <= jl_source_nslots(s->src) && n > 0);
-                    s->locals[n - 1] = rhs;
-                }
-                else {
-                    jl_module_t *modu;
-                    jl_sym_t *sym;
-                    if (jl_is_globalref(lhs)) {
-                        modu = jl_globalref_mod(lhs);
-                        sym = jl_globalref_name(lhs);
-                    }
-                    else {
-                        assert(jl_is_symbol(lhs));
-                        modu = s->module;
-                        sym = (jl_sym_t*)lhs;
-                    }
-                    JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr(modu, sym);
-                    jl_checked_assignment(b, modu, sym, rhs);
-                    JL_GC_POP();
-                }
-            }
-            else if (head == jl_enter_sym) {
-                jl_enter_handler(&__eh);
-                // This is a bit tricky, but supports the implementation of PhiC nodes.
-                // They are conceptually slots, but the slot to store to doesn't get explicitly
-                // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
-                // nicely with the rest of the SSA representation). In a compiler, we would figure
-                // out which slot to store to at compile time when we encounter the statement. We
-                // can't quite do that here, but we do something similar: We scan the catch entry
-                // block (the only place where PhiC nodes may occur) to find all the Upsilons we
-                // can possibly encounter. Then, we remember which slot they store to (we abuse the
-                // SSA value result array for this purpose). TODO: We could do this only the first
-                // time we encounter a given enter.
-                size_t catch_ip = jl_unbox_long(jl_exprarg(stmt, 0)) - 1;
+        else if (jl_is_enternode(stmt)) {
+            jl_enter_handler(ct, &__eh);
+            // This is a bit tricky, but supports the implementation of PhiC nodes.
+            // They are conceptually slots, but the slot to store to doesn't get explicitly
+            // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
+            // nicely with the rest of the SSA representation). In a compiler, we would figure
+            // out which slot to store to at compile time when we encounter the statement. We
+            // can't quite do that here, but we do something similar: We scan the catch entry
+            // block (the only place where PhiC nodes may occur) to find all the Upsilons we
+            // can possibly encounter. Then, we remember which slot they store to (we abuse the
+            // SSA value result array for this purpose). TODO: We could do this only the first
+            // time we encounter a given enter.
+            size_t catch_ip = jl_enternode_catch_dest(stmt);
+            if (catch_ip) {
+                catch_ip -= 1;
                 while (catch_ip < ns) {
                     jl_value_t *phicnode = jl_array_ptr_ref(stmts, catch_ip);
                     if (!jl_is_phicnode(phicnode))
                         break;
                     jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(phicnode, 0);
-                    for (size_t i = 0; i < jl_array_len(values); ++i) {
+                    for (size_t i = 0; i < jl_array_nrows(values); ++i) {
                         jl_value_t *val = jl_array_ptr_ref(values, i);
                         assert(jl_is_ssavalue(val));
                         size_t upsilon = ((jl_ssavalue_t*)val)->id - 1;
@@ -538,37 +536,88 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     catch_ip += 1;
                 }
                 // store current top of exception stack for restore in pop_exception.
-                s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state());
+            }
+            s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state(ct));
+            if (jl_enternode_scope(stmt)) {
+                jl_value_t *scope = eval_value(jl_enternode_scope(stmt), s);
+                JL_GC_PUSH1(&scope);
+                ct->scope = scope;
                 if (!jl_setjmp(__eh.eh_ctx, 1)) {
-                    return eval_body(stmts, s, next_ip, toplevel);
+                    ct->eh = &__eh;
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
                 }
-                else if (s->continue_at) { // means we reached a :leave expression
-                    ip = s->continue_at;
-                    s->continue_at = 0;
-                    continue;
+                JL_GC_POP();
+            }
+            else {
+                if (!jl_setjmp(__eh.eh_ctx, 1)) {
+                    ct->eh = &__eh;
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
                 }
-                else { // a real exception
-                    ip = catch_ip;
-                    continue;
+            }
+
+            if (s->continue_at) { // means we reached a :leave expression
+                jl_eh_restore_state_noexcept(ct, &__eh);
+                ip = s->continue_at;
+                s->continue_at = 0;
+                continue;
+            }
+            else { // a real exception
+                jl_eh_restore_state(ct, &__eh);
+                ip = catch_ip;
+                assert(jl_enternode_catch_dest(stmt) != 0);
+                continue;
+            }
+        }
+        else if (jl_is_expr(stmt)) {
+            // Most exprs are allowed to end a BB by fall through
+            jl_sym_t *head = ((jl_expr_t*)stmt)->head;
+            if (head == jl_assign_sym) {
+                jl_value_t *lhs = jl_exprarg(stmt, 0);
+                jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
+                if (jl_is_slotnumber(lhs)) {
+                    ssize_t n = jl_slot_number(lhs);
+                    assert(n <= jl_source_nslots(s->src) && n > 0);
+                    s->locals[n - 1] = rhs;
+                }
+                else {
+                    // This is an unmodeled error. Our frontend only generates
+                    // legal `=` expressions, but since GlobalRef used to be legal
+                    // here, give a loud error in case any package is modifying
+                    // internals.
+                    jl_error("Invalid IR: Assignment LHS not a Slot");
                 }
             }
             else if (head == jl_leave_sym) {
-                int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
-                assert(hand_n_leave > 0);
-                // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
-                jl_handler_t *eh = ct->eh;
-                while (--hand_n_leave > 0)
-                    eh = eh->prev;
-                jl_eh_restore_state(eh);
-                // leave happens during normal control flow, but we must
-                // longjmp to pop the eval_body call for each enter.
-                s->continue_at = next_ip;
-                asan_unpoison_task_stack(ct, &eh->eh_ctx);
-                jl_longjmp(eh->eh_ctx, 1);
+                int hand_n_leave = 0;
+                for (int i = 0; i < jl_expr_nargs(stmt); ++i) {
+                    jl_value_t *arg = jl_exprarg(stmt, i);
+                    if (arg == jl_nothing)
+                        continue;
+                    assert(jl_is_ssavalue(arg));
+                    jl_value_t *enter_stmt = jl_array_ptr_ref(stmts, ((jl_ssavalue_t*)arg)->id - 1);
+                    if (enter_stmt == jl_nothing)
+                        continue;
+                    hand_n_leave += 1;
+                }
+                if (hand_n_leave > 0) {
+                    assert(hand_n_leave > 0);
+                    // equivalent to jl_pop_handler(hand_n_leave), longjmping
+                    // to the :enter code above instead, which handles cleanup
+                    jl_handler_t *eh = ct->eh;
+                    while (--hand_n_leave > 0)
+                        eh = eh->prev;
+                    // leave happens during normal control flow, but we must
+                    // longjmp to pop the eval_body call for each enter.
+                    s->continue_at = next_ip;
+                    asan_unpoison_task_stack(ct, &eh->eh_ctx);
+                    jl_longjmp(eh->eh_ctx, 1);
+                }
             }
             else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
-                jl_restore_excstack(prev_state);
+                jl_restore_excstack(ct, prev_state);
             }
             else if (toplevel) {
                 if (head == jl_method_sym && jl_expr_nargs(stmt) > 1) {
@@ -578,6 +627,21 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     jl_value_t *res = jl_toplevel_eval(s->module, stmt);
                     s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
+                else if (head == jl_globaldecl_sym) {
+                    jl_value_t *val = eval_value(jl_exprarg(stmt, 1), s);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = val; // temporarily root
+                    jl_declare_global(s->module, jl_exprarg(stmt, 0), val);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = jl_nothing;
+                }
+                else if (head == jl_const_sym) {
+                    jl_value_t *val = jl_expr_nargs(stmt) == 1 ? NULL : eval_value(jl_exprarg(stmt, 1), s);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = val; // temporarily root
+                    jl_eval_const_decl(s->module, jl_exprarg(stmt, 0), val);
+                    s->locals[jl_source_nslots(s->src) + s->ip] = jl_nothing;
+                }
+                else if (head == jl_latestworld_sym) {
+                    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+                }
                 else if (jl_is_toplevel_only_expr(stmt)) {
                     jl_toplevel_eval(s->module, stmt);
                 }
@@ -640,31 +704,55 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 
 // preparing method IR for interpreter
 
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *mi, size_t world)
 {
-    jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&mi->uninferred);
+    jl_value_t *ret = NULL;
+    jl_code_info_t *src = NULL;
     if (jl_is_method(mi->def.value)) {
-        if (!src || (jl_value_t*)src == jl_nothing) {
-            if (mi->def.method->source) {
-                src = (jl_code_info_t*)mi->def.method->source;
+        if (mi->def.method->source) {
+            jl_method_t *m = mi->def.method;
+            src = (jl_code_info_t*)m->source;
+            if (!jl_is_code_info(src)) {
+                src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+                // Replace the method source by the uncompressed version,
+                // under the assumption that the interpreter may need to
+                // access it frequently. TODO: Have some sort of usage-based
+                // cache here.
+                m->source = (jl_value_t*)src;
+                jl_gc_wb(m, src);
             }
-            else {
+            ret = (jl_value_t*)src;
+        }
+        else {
+            jl_code_instance_t *cache = jl_atomic_load_relaxed(&mi->cache);
+            jl_code_instance_t *uninferred = jl_cached_uninferred(cache, world);
+            if (!uninferred) {
                 assert(mi->def.method->generator);
-                src = jl_code_for_staged(mi, world);
+                src = jl_code_for_staged(mi, world, &uninferred);
             }
+            ret = (jl_value_t*)uninferred;
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
-        if (src && (jl_value_t*)src != jl_nothing) {
-            JL_GC_PUSH1(&src);
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
-            jl_atomic_store_release(&mi->uninferred, (jl_value_t*)src);
-            jl_gc_wb(mi, src);
-            JL_GC_POP();
+    }
+    else {
+        jl_code_instance_t *uninferred = jl_cached_uninferred(jl_atomic_load_relaxed(&mi->cache), world);
+        ret = (jl_value_t*)uninferred;
+        if (ret) {
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
         }
     }
     if (!src || !jl_is_code_info(src)) {
-        jl_error("source missing for method called in interpreter");
+        jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
     }
-    return src;
+    return ret;
+}
+
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
+{
+    jl_value_t *code_or_ci = jl_code_or_ci_for_interpreter(mi, world);
+    if (jl_is_code_instance(code_or_ci))
+        return (jl_code_info_t*)jl_atomic_load_relaxed(&((jl_code_instance_t*)code_or_ci)->inferred);
+    return (jl_code_info_t*)code_or_ci;
 }
 
 // interpreter entry points
@@ -672,10 +760,18 @@ jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
 jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *codeinst)
 {
     interpreter_state *s;
-    jl_method_instance_t *mi = codeinst->def;
+    jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
     jl_task_t *ct = jl_current_task;
     size_t world = ct->world_age;
-    jl_code_info_t *src = jl_code_for_interpreter(mi, world);
+    jl_code_info_t *src = NULL;
+    jl_value_t *code = jl_code_or_ci_for_interpreter(mi, world);
+    jl_code_instance_t *ci = NULL;
+    if (jl_is_code_instance(code)) {
+        ci = (jl_code_instance_t*)code;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&ci->inferred);
+    } else {
+        src = (jl_code_info_t*)code;
+    }
     jl_array_t *stmts = src->code;
     assert(jl_typetagis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
@@ -690,8 +786,8 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     }
     else {
         s->module = mi->def.method->module;
-        size_t defargs = mi->def.method->nargs;
-        int isva = mi->def.method->isva ? 1 : 0;
+        size_t defargs = src->nargs;
+        int isva = src->isva;
         size_t i;
         s->locals[0] = f;
         assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -706,6 +802,7 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = mi;
+    s->ci = ci;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *r = eval_body(stmts, s, 0, 0);
     JL_GC_POP();
@@ -717,7 +814,25 @@ JL_DLLEXPORT const jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
-    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    jl_code_info_t *code = NULL;
+    if (source->source) {
+        code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
+    }
+    else {
+        // OC constructed from optimized IR. It'll have a single specialization with optimized code
+        // in it that we'll try to interpret.
+        jl_svec_t *specializations = (jl_svec_t*)jl_atomic_load_relaxed(&source->specializations);
+        assert(jl_is_method_instance(specializations));
+        jl_method_instance_t *mi = (jl_method_instance_t *)specializations;
+        jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+        jl_value_t *src = jl_atomic_load_relaxed(&ci->inferred);
+        if (!src) {
+            // This can happen if somebody did :new_opaque_closure with broken IR. This is definitely bad
+            // and UB, but let's try to be slightly nicer than segfaulting here for people debugging.
+            jl_error("Internal Error: Opaque closure with no source at all");
+        }
+        code = jl_uncompress_ir(source, ci, src);
+    }
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
     jl_task_t *ct = jl_current_task;
@@ -737,6 +852,7 @@ jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **ar
     s->preevaluation = 0;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     size_t defargs = source->nargs;
     int isva = source->isva;
     assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
@@ -768,11 +884,9 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     s->sparam_vals = jl_emptysvec;
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
     jl_value_t *r = eval_body(stmts, s, 0, 1);
-    ct->world_age = last_age;
     JL_GC_POP();
     return r;
 }
@@ -792,6 +906,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e
     s->preevaluation = (sparam_vals != NULL);
     s->continue_at = 0;
     s->mi = NULL;
+    s->ci = NULL;
     JL_GC_ENABLEFRAME(s);
     jl_value_t *v = eval_value(e, s);
     assert(v);
@@ -811,7 +926,8 @@ JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry,
     uintptr_t entry_tags = jl_bt_entry_descriptor(njlvalues, 0, JL_BT_INTERP_FRAME_TAG, s->ip);
     bt_entry[0].uintptr = JL_BT_NON_PTR_ENTRY;
     bt_entry[1].uintptr = entry_tags;
-    bt_entry[2].jlvalue = s->mi  ? (jl_value_t*)s->mi  :
+    bt_entry[2].jlvalue = s->ci  ? (jl_value_t*)s->ci  :
+                          s->mi  ? (jl_value_t*)s->mi  :
                           s->src ? (jl_value_t*)s->src : (jl_value_t*)jl_nothing;
     if (need_module) {
         // If we only have a CodeInfo (s->src), we are in a top level thunk and
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 810982370de19..7b5aa7c397129 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -4,6 +4,10 @@ namespace JL_I {
 #include "intrinsics.h"
 }
 
+#include <array>
+#include <bitset>
+#include <string>
+
 #include "ccall.cpp"
 
 //Mark our stats as being from intrinsics irgen
@@ -15,9 +19,9 @@ STATISTIC(EmittedCoercedUnboxes, "Number of unbox coercions emitted");
 STATISTIC(EmittedUnboxes, "Number of unboxes emitted");
 STATISTIC(EmittedRuntimeCalls, "Number of runtime intrinsic calls emitted");
 STATISTIC(EmittedIntrinsics, "Number of intrinsic calls emitted");
-STATISTIC(Emitted_arraylen, "Number of arraylen calls emitted");
 STATISTIC(Emitted_pointerref, "Number of pointerref calls emitted");
 STATISTIC(Emitted_pointerset, "Number of pointerset calls emitted");
+STATISTIC(Emitted_pointerarith, "Number of pointer arithmetic calls emitted");
 STATISTIC(Emitted_atomic_fence, "Number of atomic_fence calls emitted");
 STATISTIC(Emitted_atomic_pointerref, "Number of atomic_pointerref calls emitted");
 STATISTIC(Emitted_atomic_pointerop, "Number of atomic_pointerop calls emitted");
@@ -79,10 +83,14 @@ const auto &float_func() {
             float_func[sub_float] = true;
             float_func[mul_float] = true;
             float_func[div_float] = true;
+            float_func[min_float] = true;
+            float_func[max_float] = true;
             float_func[add_float_fast] = true;
             float_func[sub_float_fast] = true;
             float_func[mul_float_fast] = true;
             float_func[div_float_fast] = true;
+            float_func[min_float_fast] = true;
+            float_func[max_float_fast] = true;
             float_func[fma_float] = true;
             float_func[muladd_float] = true;
             float_func[eq_float] = true;
@@ -165,7 +173,7 @@ static Type *INTT(Type *t, const DataLayout &DL)
         return getInt64Ty(ctxt);
     if (t == getFloatTy(ctxt))
         return getInt32Ty(ctxt);
-    if (t == getHalfTy(ctxt))
+    if (t == getHalfTy(ctxt) || t == getBFloatTy(ctxt))
         return getInt16Ty(ctxt);
     unsigned nb = t->getPrimitiveSizeInBits();
     assert(t != getVoidTy(ctxt) && nb > 0);
@@ -174,12 +182,7 @@ static Type *INTT(Type *t, const DataLayout &DL)
 
 static Value *uint_cnvt(jl_codectx_t &ctx, Type *to, Value *x)
 {
-    Type *t = x->getType();
-    if (t == to)
-        return x;
-    if (to->getPrimitiveSizeInBits() < x->getType()->getPrimitiveSizeInBits())
-        return ctx.builder.CreateTrunc(x, to);
-    return ctx.builder.CreateZExt(x, to);
+    return ctx.builder.CreateZExtOrTrunc(x, to);
 }
 
 static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_datatype_t *bt)
@@ -229,7 +232,7 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
     }
 
     size_t nf = jl_datatype_nfields(bt);
-    std::vector<Constant*> fields(0);
+    SmallVector<Constant*, 0> fields(0);
     for (size_t i = 0; i < nf; i++) {
         size_t offs = jl_field_offset(bt, i);
         jl_value_t *ft = jl_field_type(bt, i);
@@ -245,8 +248,8 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         if (jl_is_uniontype(ft)) {
             // compute the same type layout as julia_struct_to_llvm
             size_t fsz = 0, al = 0;
-            (void)jl_islayout_inline(ft, &fsz, &al);
-            fsz = jl_field_size(bt, i);
+            (void)jl_islayout_inline(ft, &fsz, &al); // compute al
+            fsz = jl_field_size(bt, i); // get LLT_ALIGN(fsz+1,al)
             uint8_t sel = ((const uint8_t*)ptr)[offs + fsz - 1];
             jl_value_t *active_ty = jl_nth_union_component(ft, sel);
             size_t active_sz = jl_datatype_size(active_ty);
@@ -318,47 +321,110 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, jl_value_t *e)
     return julia_const_to_llvm(ctx, e, (jl_datatype_t*)bt);
 }
 
+static Constant *undef_value_for_type(Type *T) {
+    auto tracked = CountTrackedPointers(T);
+    Constant *undef;
+    if (tracked.count)
+        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
+        undef = Constant::getNullValue(T);
+    else
+        undef = UndefValue::get(T);
+    return undef;
+}
+
+// rebuild a struct type with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Type *zext_struct_type(Type *T)
+{
+    if (auto *AT = dyn_cast<ArrayType>(T)) {
+        return ArrayType::get(AT->getElementType(), AT->getNumElements());
+    }
+    else if (auto *ST = dyn_cast<StructType>(T)) {
+        SmallVector<Type*> Elements(ST->element_begin(), ST->element_end());
+        for (size_t i = 0; i < Elements.size(); i++) {
+            Elements[i] = zext_struct_type(Elements[i]);
+        }
+        return StructType::get(ST->getContext(), Elements, ST->isPacked());
+    }
+    else if (auto *VT = dyn_cast<VectorType>(T)) {
+        return VectorType::get(zext_struct_type(VT->getElementType()), VT);
+    }
+    else if (auto *IT = dyn_cast<IntegerType>(T)) {
+        unsigned BitWidth = IT->getBitWidth();
+        if (alignTo(BitWidth, 8) != BitWidth)
+            return IntegerType::get(IT->getContext(), alignTo(BitWidth, 8));
+    }
+    return T;
+}
+
+// rebuild a struct with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Value *zext_struct_helper(jl_codectx_t &ctx, Value *V, Type *T2)
+{
+    Type *T = V->getType();
+    if (T == T2)
+        return V;
+    if (auto *AT = dyn_cast<ArrayType>(T2)) {
+        Value *V2 = undef_value_for_type(AT);
+        for (size_t i = 0; i < AT->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), AT->getElementType());
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (auto *ST = dyn_cast<StructType>(T2)) {
+        Value *V2 = undef_value_for_type(ST);
+        for (size_t i = 0; i < ST->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), ST->getElementType(i));
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (T2->isIntegerTy() || T2->isVectorTy()) {
+        return ctx.builder.CreateZExt(V, T2);
+    }
+    return V;
+}
+
+static Value *zext_struct(jl_codectx_t &ctx, Value *V)
+{
+    return zext_struct_helper(ctx, V, zext_struct_type(V->getType()));
+}
+
 static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 {
+    if (unboxed->getType() == to)
+        return unboxed;
+    if (CastInst::castIsValid(Instruction::Trunc, unboxed, to))
+        return ctx.builder.CreateTrunc(unboxed, to);
+    unboxed = zext_struct(ctx, unboxed);
     Type *ty = unboxed->getType();
     if (ty == to)
         return unboxed;
     bool frompointer = ty->isPointerTy();
     bool topointer = to->isPointerTy();
     const DataLayout &DL = jl_Module->getDataLayout();
-    if (ty->isIntegerTy(1) && to->isIntegerTy(8)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateZExt(unboxed, to);
-    }
-    else if (ty->isIntegerTy(8) && to->isIntegerTy(1)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateTrunc(unboxed, to);
-    }
-    else if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
+    if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
         // this can happen in dead code
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to);
     }
-    if (frompointer && topointer) {
-        unboxed = emit_bitcast(ctx, unboxed, to);
-    }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
-        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to));
+        AllocaInst *cast = emit_static_alloca(ctx, ty, align);
         setName(ctx.emission_context, cast, "coercion");
-        ctx.builder.CreateStore(unboxed, cast);
-        unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
+        ctx.builder.CreateAlignedStore(unboxed, cast, align);
+        unboxed = ctx.builder.CreateAlignedLoad(to, cast, align);
     }
     else if (frompointer) {
         Type *INTT_to = INTT(to, DL);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
         setName(ctx.emission_context, unboxed, "coercion");
-        if (INTT_to != to)
+        if (INTT_to != to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     else if (topointer) {
         Type *INTT_to = INTT(to, DL);
-        if (to != INTT_to)
+        if (to != INTT_to) //TODO when is this true?
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
         setName(ctx.emission_context, unboxed, "coercion");
@@ -379,14 +445,14 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
         // up being dead code, and type inference knows that the other
         // branch's type is the only one that matters.
         if (type_is_ghost(to)) {
-            return NULL;
+            return nullptr;
         }
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to); // type mismatch error
     }
 
-    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : NULL;
-    if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion
+    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : nullptr;
+    if ((x.inline_roots.empty() && !x.ispointer()) || c != nullptr) { // already unboxed, but sometimes need conversion
         Value *unboxed = c ? c : x.V;
         return emit_unboxed_coercion(ctx, to, unboxed);
     }
@@ -396,8 +462,8 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
 
     if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) {
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
-        setName(ctx.emission_context, unbox_load, "unbox");
+        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), p));
+        setName(ctx.emission_context, unbox_load, p->getName() + ".unbox");
         if (jt == (jl_value_t*)jl_bool_type)
             unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
                 ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
@@ -411,34 +477,22 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     }
 
     unsigned alignment = julia_alignment(jt);
-    Type *ptype = to->getPointerTo();
-    if (p->getType() != ptype && isa<AllocaInst>(p)) {
-        // LLVM's mem2reg can't handle coercion if the load/store type does
-        // not match the type of the alloca. As such, it is better to
-        // perform the load using the alloca's type and then perform the
-        // appropriate coercion manually.
-        AllocaInst *AI = cast<AllocaInst>(p);
-        Type *AllocType = AI->getAllocatedType();
-        const DataLayout &DL = jl_Module->getDataLayout();
-        if (!AI->isArrayAllocation() &&
-                (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) &&
-                (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) &&
-                DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) {
-            Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment));
-            setName(ctx.emission_context, load, "unbox");
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-            return emit_unboxed_coercion(ctx, to, ai.decorateInst(load));
-        }
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    if (!x.inline_roots.empty()) {
+        assert(x.typ == jt);
+        AllocaInst *combined = emit_static_alloca(ctx, to, Align(alignment));
+        auto combined_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+        recombine_value(ctx, x, combined, combined_ai, Align(alignment), false);
+        p = combined;
+        ai = combined_ai;
     }
-    p = maybe_bitcast(ctx, p, ptype);
     Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
-    setName(ctx.emission_context, load, "unbox");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    setName(ctx.emission_context, load, p->getName() + ".unbox");
     return ai.decorateInst(load);
 }
 
 // emit code to store a raw value into a destination
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile)
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, Align alignment, bool isVolatile)
 {
     if (x.isghost) {
         // this can happen when a branch yielding a different type ends
@@ -447,29 +501,25 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
         return;
     }
 
-    Value *unboxed = nullptr;
-    if (!x.ispointer()) { // already unboxed, but sometimes need conversion
-        unboxed = x.V;
-        assert(unboxed);
-    }
+    auto dest_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
 
-    // bools stored as int8, but can be narrowed to int1 often
-    if (x.typ == (jl_value_t*)jl_bool_type)
-        unboxed = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), x, (jl_value_t*)jl_bool_type);
+    if (!x.inline_roots.empty()) {
+        recombine_value(ctx, x, dest, dest_ai, alignment, isVolatile);
+        return;
+    }
 
-    if (unboxed) {
-        Type *dest_ty = unboxed->getType()->getPointerTo();
-        if (dest->getType() != dest_ty)
-            dest = emit_bitcast(ctx, dest, dest_ty);
-        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
+    if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32)
+        assert(x.V);
+        Value *unboxed = zext_struct(ctx, x.V);
+        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, alignment);
         store->setVolatile(isVolatile);
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
-        ai.decorateInst(store);
+        dest_ai.decorateInst(store);
         return;
     }
 
     Value *src = data_pointer(ctx, x);
-    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, isVolatile);
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    emit_memcpy(ctx, dest, dest_ai, src, src_ai, jl_datatype_size(x.typ), Align(alignment), Align(julia_alignment(x.typ)), isVolatile);
 }
 
 static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
@@ -484,10 +534,10 @@ static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
     return NULL;
 }
 
-static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs)
+static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     Function *func = prepare_call(runtime_func()[f]);
-    SmallVector<Value *> argvalues(nargs);
+    SmallVector<Value *, 0> argvalues(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argvalues[i] = boxed(ctx, argv[i]);
     }
@@ -496,7 +546,7 @@ static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const
 }
 
 // put a bits type tag on some value (despite the name, this doesn't necessarily actually change anything about the value however)
-static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
+static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     // Give the arguments names //
     const jl_cgval_t &bt_value = argv[0];
@@ -537,7 +587,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         }
         else {
             Value *size = emit_datatype_size(ctx, typ);
-            auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb));
+            auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(size->getType(), nb));
             setName(ctx.emission_context, sizecheck, "sizecheck");
             error_unless(ctx,
                     sizecheck,
@@ -561,8 +611,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, v.tbaa);
         vx = ai.decorateInst(ctx.builder.CreateLoad(
             storage_type,
-            emit_bitcast(ctx, data_pointer(ctx, v),
-                storage_type->getPointerTo())));
+            data_pointer(ctx, v)));
         setName(ctx.emission_context, vx, "bitcast");
     }
 
@@ -574,13 +623,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
             vx = ctx.builder.CreateZExt(vx, llvmt);
         } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) {
             vx = ctx.builder.CreatePtrToInt(vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // CreatePtrToInt may undo an IntToPtr
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) {
             vx = emit_inttoptr(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_inttoptr may undo an PtrToInt
+                setName(ctx.emission_context, vx, "bitcast_coercion");
+        } else if (vxt->isPointerTy() && llvmt->isPointerTy()) {
+            // emit_bitcast preserves the origin address space, which we can't have here
+            vx = ctx.builder.CreateAddrSpaceCast(vx, llvmt);
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // cast may have been folded
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         } else {
             vx = emit_bitcast(ctx, vx, llvmt);
-            setName(ctx.emission_context, vx, "bitcast_coercion");
+            if (isa<Instruction>(vx) && !vx->hasName())
+                // emit_bitcast may undo another bitcast
+                setName(ctx.emission_context, vx, "bitcast_coercion");
         }
     }
 
@@ -588,7 +649,8 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         return mark_julia_type(ctx, vx, false, bt);
     }
     else {
-        Value *box = emit_allocobj(ctx, nb, bt_value_rt);
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, bt_value_rt, true, align);
         setName(ctx.emission_context, box, "bitcast_box");
         init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, bt->name->wrapper);
@@ -598,7 +660,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
 static jl_cgval_t generic_cast(
         jl_codectx_t &ctx,
         intrinsic f, Instruction::CastOps Op,
-        const jl_cgval_t *argv, bool toint, bool fromint)
+        ArrayRef<jl_cgval_t> argv, bool toint, bool fromint)
 {
     auto &TT = ctx.emission_context.TargetTriple;
     auto &DL = ctx.emission_context.DL;
@@ -631,10 +693,11 @@ static jl_cgval_t generic_cast(
             // understood that everything is implicitly rounded to 23 bits,
             // but if we start looking at more bits we need to actually do the
             // rounding first instead of carrying around incorrect low bits.
-            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            Align align(julia_alignment((jl_value_t*)jlto));
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align);
             setName(ctx.emission_context, jlfloattemp_var, "rounding_slot");
-            ctx.builder.CreateStore(from, jlfloattemp_var);
-            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+            ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align);
+            from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true);
             setName(ctx.emission_context, from, "rounded");
         }
     }
@@ -647,19 +710,20 @@ static jl_cgval_t generic_cast(
     else {
         Value *targ_rt = boxed(ctx, targ);
         emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type");
-        Value *box = emit_allocobj(ctx, nb, targ_rt);
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, targ_rt, true, align);
         setName(ctx.emission_context, box, "cast_box");
         init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, jlto->name->wrapper);
     }
 }
 
-static jl_cgval_t emit_runtime_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_runtime_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     return emit_runtime_call(ctx, pointerref, argv, 3);
 }
 
-static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
     const jl_cgval_t &i = argv[1];
@@ -688,7 +752,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
-        setName(ctx.emission_context, thePtr, "unbox_any_ptr");
+        if (isa<Instruction>(thePtr) && !thePtr->hasName())
+            setName(ctx.emission_context, thePtr, "unbox_any_ptr");
         LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb));
         setName(ctx.emission_context, load, "any_unbox");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
@@ -698,16 +763,16 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     else if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
         setName(ctx.emission_context, strct, "pointerref_box");
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerref_offset");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
-        thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, thePtr, "pointerref_src");
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
-        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1);
+        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb));
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -716,7 +781,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb);
+            thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1);
+            auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb);
             setName(ctx.emission_context, load.V, "pointerref");
             return load;
         }
@@ -726,16 +792,16 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     }
 }
 
-static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     return emit_runtime_call(ctx, pointerset, argv, 4);
 }
 
 // e[i] = x
-static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = argv[1];
+    jl_cgval_t x = argv[1];
     const jl_cgval_t &i = argv[2];
     const jl_cgval_t &align = argv[3];
 
@@ -758,15 +824,17 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         return jl_cgval_t();
     }
     emit_typecheck(ctx, x, ety, "pointerset");
+    x = update_julia_type(ctx, x, ety);
+    if (x.typ == jl_bottom_type)
+        return jl_cgval_t();
 
     Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
     setName(ctx.emission_context, im1, "pointerset_idx");
 
-    Value *thePtr;
+    Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ);
         auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
         auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size);
@@ -775,30 +843,60 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
         ai.decorateInst(store);
     }
+    else if (!x.inline_roots.empty()) {
+        recombine_value(ctx, e, thePtr, jl_aliasinfo_t(), Align(align_nb), false);
+    }
     else if (x.ispointer()) {
-        thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerset_offset");
-        auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        auto gep = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
-        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb);
+        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety)));
     }
     else {
         bool isboxed;
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
-            thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
+            thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1);
+            typed_store(ctx, thePtr, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, nullptr, true, false, false, false, false, false, nullptr, "atomic_pointerset", nullptr, nullptr);
         }
     }
     return e;
 }
 
-static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
+// ptr + offset
+// ptr - offset
+static jl_cgval_t emit_pointerarith(jl_codectx_t &ctx, intrinsic f,
+                                    ArrayRef<jl_cgval_t> argv)
+{
+    jl_value_t *ptrtyp = argv[0].typ;
+    jl_value_t *offtyp = argv[1].typ;
+    if (!jl_is_cpointer_type(ptrtyp) || offtyp != (jl_value_t *)jl_ulong_type)
+        return emit_runtime_call(ctx, f, argv, argv.size());
+    assert(f == add_ptr || f == sub_ptr);
+
+    Value *ptr = emit_unbox(ctx, ctx.types().T_ptr, argv[0], ptrtyp);
+    Value *off = emit_unbox(ctx, ctx.types().T_size, argv[1], offtyp);
+    if (f == sub_ptr)
+        off = ctx.builder.CreateNeg(off);
+    Value *ans = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), ptr, off);
+
+    if (jl_is_concrete_type(ptrtyp)) {
+        return mark_julia_type(ctx, ans, false, ptrtyp);
+    }
+    else {
+        Value *box = emit_allocobj(ctx, (jl_datatype_t *)ptrtyp, true);
+        setName(ctx.emission_context, box, "ptr_box");
+        init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
+        return mark_julia_type(ctx, box, true, (jl_datatype_t *)ptrtyp);
+    }
+}
+
+static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &ord = argv[0];
     if (ord.constant && jl_is_symbol(ord.constant)) {
@@ -814,7 +912,7 @@ static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
     return emit_runtime_call(ctx, atomic_fence, argv, 1);
 }
 
-static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
     const jl_cgval_t &ord = argv[1];
@@ -854,18 +952,17 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 
     if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
-        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
         setName(ctx.emission_context, strct, "atomic_pointerref_box");
-        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
-        thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
         LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
         setName(ctx.emission_context, load, "atomic_pointerref");
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(load);
         load->setOrdering(llvm_order);
-        thePtr = emit_bitcast(ctx, strct, thePtr->getType());
+        thePtr = strct;
         StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
         ai.decorateInst(store);
         return mark_julia_type(ctx, strct, true, ety);
@@ -892,7 +989,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 // e[i] <= x (swap)
 // e[i] y => x (replace)
 // x(e[i], y) (modify)
-static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef<jl_cgval_t> argv, int nargs, const jl_cgval_t *modifyop)
 {
     bool issetfield = f == atomic_pointerset;
     bool isreplacefield = f == atomic_pointerreplace;
@@ -900,7 +997,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
     bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    jl_cgval_t x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
     const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
     const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
@@ -929,8 +1026,8 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         // n.b.: the expected value (y) must be rooted, but not the others
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
         bool isboxed = true;
-        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        jl_cgval_t ret = typed_store(ctx, thePtr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), nullptr, issetfield, isreplacefield, isswapfield, ismodifyfield, false, false, modifyop, "atomic_pointermodify", nullptr, nullptr);
         if (issetfield)
             ret = e;
         return ret;
@@ -942,8 +1039,12 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    if (!ismodifyfield)
+    if (!ismodifyfield) {
         emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+        x = update_julia_type(ctx, x, ety);
+        if (x.typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
@@ -955,7 +1056,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
 
     if (!jl_isbits(ety)) {
         //if (!deserves_stack(ety))
-        //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        //Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         //uint64_t size = jl_datatype_size(ety);
         return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
     }
@@ -968,8 +1069,8 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
         else
             thePtr = nullptr; // could use any value here, since typed_store will not use it
-        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        jl_cgval_t ret = typed_store(ctx, thePtr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, nb, nullptr, issetfield, isreplacefield, isswapfield, ismodifyfield, false, false, modifyop, "atomic_pointermodify", nullptr, nullptr);
         if (issetfield)
             ret = e;
         return ret;
@@ -1029,7 +1130,7 @@ struct math_builder {
     }
 };
 
-static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **argvalues, size_t nargs,
+static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Value*> argvalues, size_t nargs,
                                      jl_datatype_t **newtyp, jl_value_t *xtyp);
 
 
@@ -1106,8 +1207,6 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
             else {
                 x_ptr = decay_derived(ctx, x_ptr);
                 y_ptr = decay_derived(ctx, y_ptr);
-                if (x_ptr->getType() != y_ptr->getType())
-                    y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType());
                 ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr);
                 setName(ctx.emission_context, ifelse_result, "ifelse_result");
                 ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa);
@@ -1193,11 +1292,11 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     if (f == cglobal_auto || f == cglobal)
         return emit_cglobal(ctx, args, nargs);
 
-    SmallVector<jl_cgval_t> argv(nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         jl_cgval_t arg = emit_expr(ctx, args[i + 1]);
         if (arg.typ == jl_bottom_type) {
-            // intrinsics generally don't handle buttom values, so bail out early
+            // intrinsics generally don't handle bottom values, so bail out early
             return jl_cgval_t();
         }
         argv[i] = arg;
@@ -1207,84 +1306,82 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     // return emit_runtime_call(ctx, f, argv, nargs);
 
     switch (f) {
-    case arraylen: {
-        ++Emitted_arraylen;
-        assert(nargs == 1);
-        const jl_cgval_t &x = argv[0];
-        jl_value_t *typ = jl_unwrap_unionall(x.typ);
-        if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
-        return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
-    }
     case pointerref:
         ++Emitted_pointerref;
         assert(nargs == 3);
-        return emit_pointerref(ctx, argv.data());
+        return emit_pointerref(ctx, argv);
     case pointerset:
         ++Emitted_pointerset;
         assert(nargs == 4);
-        return emit_pointerset(ctx, argv.data());
+        return emit_pointerset(ctx, argv);
+
+    case add_ptr:
+    case sub_ptr:
+        ++Emitted_pointerarith;
+        assert(nargs == 2);
+        return emit_pointerarith(ctx, f, argv);
+
     case atomic_fence:
         ++Emitted_atomic_fence;
         assert(nargs == 1);
-        return emit_atomicfence(ctx, argv.data());
+        return emit_atomicfence(ctx, argv);
     case atomic_pointerref:
         ++Emitted_atomic_pointerref;
         assert(nargs == 2);
-        return emit_atomic_pointerref(ctx, argv.data());
+        return emit_atomic_pointerref(ctx, argv);
     case atomic_pointerset:
     case atomic_pointerswap:
     case atomic_pointermodify:
     case atomic_pointerreplace:
         ++Emitted_atomic_pointerop;
-        return emit_atomic_pointerop(ctx, f, argv.data(), nargs, nullptr);
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
         ++Emitted_bitcast;
         assert(nargs == 2);
-        return generic_bitcast(ctx, argv.data());
+        return generic_bitcast(ctx, argv);
     case trunc_int:
         ++Emitted_trunc_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::Trunc, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
     case sext_int:
         ++Emitted_sext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SExt, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
     case zext_int:
         ++Emitted_zext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::ZExt, argv.data(), true, true);
+        return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
     case uitofp:
         ++Emitted_uitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::UIToFP, argv.data(), false, true);
+        return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
     case sitofp:
         ++Emitted_sitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SIToFP, argv.data(), false, true);
+        return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
     case fptoui:
         ++Emitted_fptoui;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToUI, argv.data(), true, false);
+        return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
     case fptosi:
         ++Emitted_fptosi;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToSI, argv.data(), true, false);
+        return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
     case fptrunc:
         ++Emitted_fptrunc;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPTrunc, argv.data(), false, false);
+        return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
     case fpext:
         ++Emitted_fpext;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPExt, argv.data(), false, false);
+        return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
 
     case not_int: {
         ++Emitted_not_int;
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
         Value *from = emit_unbox(ctx, xt, x, x.typ);
         Value *ans = ctx.builder.CreateNot(from);
@@ -1296,7 +1393,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!x.constant || !jl_is_datatype(x.constant))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         jl_datatype_t *dt = (jl_datatype_t*) x.constant;
 
         // select the appropriated overloaded intrinsic
@@ -1306,7 +1403,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         else if (dt == jl_float64_type)
             intr_name += "f64";
         else
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
 
         FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, getInt1Ty(ctx.builder.getContext()));
         auto ret = ctx.builder.CreateCall(intr);
@@ -1319,14 +1416,14 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
         // verify argument types
         if (!jl_is_primitivetype(xinfo.typ))
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         Type *xtyp = bitstype_to_llvm(xinfo.typ, ctx.builder.getContext(), true);
         if (float_func()[f])
             xtyp = FLOATT(xtyp);
         else
             xtyp = INTT(xtyp, DL);
         if (!xtyp)
-            return emit_runtime_call(ctx, f, argv.data(), nargs);
+            return emit_runtime_call(ctx, f, argv, nargs);
         ////Bool are required to be in the range [0,1]
         ////so while they are represented as i8,
         ////the operations need to be done in mod 1
@@ -1337,31 +1434,31 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         //if (xtyp == (jl_value_t*)jl_bool_type)
         //    r = getInt1Ty(ctx.builder.getContext());
 
-        SmallVector<Type *> argt(nargs);
+        SmallVector<Type *, 0> argt(nargs);
         argt[0] = xtyp;
 
         if (f == shl_int || f == lshr_int || f == ashr_int) {
             if (!jl_is_primitivetype(argv[1].typ))
-                return emit_runtime_call(ctx, f, argv.data(), nargs);
+                return emit_runtime_call(ctx, f, argv, nargs);
             argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
         }
         else {
             for (size_t i = 1; i < nargs; ++i) {
                 if (xinfo.typ != argv[i].typ)
-                    return emit_runtime_call(ctx, f, argv.data(), nargs);
+                    return emit_runtime_call(ctx, f, argv, nargs);
                 argt[i] = xtyp;
             }
         }
 
         // unbox the arguments
-        SmallVector<Value *> argvalues(nargs);
+        SmallVector<Value *, 0> argvalues(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argvalues[i] = emit_unbox(ctx, argt[i], argv[i], argv[i].typ);
         }
 
         // call the intrinsic
         jl_value_t *newtyp = xinfo.typ;
-        Value *r = emit_untyped_intrinsic(ctx, f, argvalues.data(), nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
+        Value *r = emit_untyped_intrinsic(ctx, f, argvalues, nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
         // Turn Bool operations into mod 1 now, if needed
         if (newtyp == (jl_value_t*)jl_bool_type && !r->getType()->isIntegerTy(1))
             r = ctx.builder.CreateTrunc(r, getInt1Ty(ctx.builder.getContext()));
@@ -1371,7 +1468,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     assert(0 && "unreachable");
 }
 
-static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **argvalues, size_t nargs,
+static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Value*> argvalues, size_t nargs,
                                      jl_datatype_t **newtyp, jl_value_t *xtyp)
 {
     ++EmittedUntypedIntrinsics;
@@ -1391,32 +1488,40 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case srem_int: return ctx.builder.CreateSRem(x, y);
     case urem_int: return ctx.builder.CreateURem(x, y);
 
-    // LLVM will not fold ptrtoint+arithmetic+inttoptr to GEP. The reason for this
-    // has to do with alias analysis. When adding two integers, either one of them
-    // could be the pointer base. With getelementptr, it is clear which of the
-    // operands is the pointer base. We also have this information at the julia
-    // level. Thus, to not lose information, we need to have a separate intrinsic
-    // for pointer arithmetic which lowers to getelementptr.
-    case add_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), y), t);
-
-    }
-
-    case sub_ptr: {
-        return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
-                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), ctx.builder.CreateNeg(y)), t);
-
-    }
-
     case neg_float: return math_builder(ctx)().CreateFNeg(x);
     case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
     case add_float: return math_builder(ctx)().CreateFAdd(x, y);
     case sub_float: return math_builder(ctx)().CreateFSub(x, y);
     case mul_float: return math_builder(ctx)().CreateFMul(x, y);
     case div_float: return math_builder(ctx)().CreateFDiv(x, y);
+    case min_float: {
+        assert(x->getType() == y->getType());
+        FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+        return ctx.builder.CreateCall(minintr, {x, y});
+    }
+    case max_float: {
+        assert(x->getType() == y->getType());
+        FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+        return ctx.builder.CreateCall(maxintr, {x, y});
+    }
+    case min_float_fast: {
+        assert(x->getType() == y->getType());
+        FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
+        auto call = ctx.builder.CreateCall(minintr, {x, y});
+        auto fmf = call->getFastMathFlags();
+        fmf.setFast();
+        call->copyFastMathFlags(fmf);
+        return call;
+    }
+    case max_float_fast: {
+        assert(x->getType() == y->getType());
+        FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
+        auto call = ctx.builder.CreateCall(maxintr, {x, y});
+        auto fmf = call->getFastMathFlags();
+        fmf.setFast();
+        call->copyFastMathFlags(fmf);
+        return call;
+    }
     case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
     case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
     case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
@@ -1424,7 +1529,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case fma_float: {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
-        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, makeArrayRef(t));
+        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(fmaintr, {x, y, z});
     }
     case muladd_float: {
@@ -1454,13 +1559,8 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
                 (f == checked_smul_int ?
                  Intrinsic::smul_with_overflow :
                  Intrinsic::umul_with_overflow)))));
-        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t));
-        Value *res = ctx.builder.CreateCall(intr, {x, y});
-        Value *val = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
-        setName(ctx.emission_context, val, "checked");
-        Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
-        setName(ctx.emission_context, obit, "overflow");
-        Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext()));
+        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, ArrayRef<Type*>(t));
+        Value *tupval = ctx.builder.CreateCall(intr, {x, y});
 
         jl_value_t *params[2];
         params[0] = xtyp;
@@ -1468,10 +1568,6 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
         *newtyp = tuptyp;
 
-        Value *tupval;
-        tupval = UndefValue::get(julia_type_to_llvm(ctx, (jl_value_t*)tuptyp));
-        tupval = ctx.builder.CreateInsertValue(tupval, val, ArrayRef<unsigned>(0));
-        tupval = ctx.builder.CreateInsertValue(tupval, obyte, ArrayRef<unsigned>(1));
         return tupval;
     }
 
@@ -1574,30 +1670,30 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         }
     }
     case bswap_int: {
-        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
-        return ctx.builder.CreateCall(bswapintr, x);
+        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, ArrayRef<Type*>(t)); //TODO: Move to deduction guides
+        return ctx.builder.CreateCall(bswapintr, x);                                                           // when we drop LLVM 15
     }
     case ctpop_int: {
-        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, makeArrayRef(t));
+        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(ctpopintr, x);
     }
     case ctlz_int: {
-        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
+        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, ArrayRef<Type*>(t));
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(ctlz, {x, y});
     }
     case cttz_int: {
-        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
+        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(t));
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(cttz, {x, y});
     }
 
     case abs_float: {
-        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, makeArrayRef(t));
+        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(absintr, x);
     }
     case copysign_float: {
-        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, makeArrayRef(t));
+        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(copyintr, {x, y});
     }
     case flipsign_int: {
@@ -1616,27 +1712,27 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         return ctx.builder.CreateXor(ctx.builder.CreateAdd(x, tmp), tmp);
     }
     case ceil_llvm: {
-        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, makeArrayRef(t));
+        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(ceilintr, x);
     }
     case floor_llvm: {
-        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, makeArrayRef(t));
+        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(floorintr, x);
     }
     case trunc_llvm: {
-        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, makeArrayRef(t));
+        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(truncintr, x);
     }
     case rint_llvm: {
-        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
+        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(rintintr, x);
     }
     case sqrt_llvm: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(sqrtintr, x);
     }
     case sqrt_llvm_fast: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
         return math_builder(ctx, true)().CreateCall(sqrtintr, x);
     }
 
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 93747faa74160..5765e3e671bc6 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -12,13 +12,13 @@
     ADD_I(udiv_int, 2) \
     ADD_I(srem_int, 2) \
     ADD_I(urem_int, 2) \
-    ADD_I(add_ptr, 2) \
-    ADD_I(sub_ptr, 2) \
     ADD_I(neg_float, 1) \
     ADD_I(add_float, 2) \
     ADD_I(sub_float, 2) \
     ADD_I(mul_float, 2) \
     ADD_I(div_float, 2) \
+    ADD_I(min_float, 2) \
+    ADD_I(max_float, 2) \
     ADD_I(fma_float, 3) \
     ADD_I(muladd_float, 3) \
     /*  fast arithmetic */ \
@@ -27,6 +27,8 @@
     ALIAS(sub_float_fast, sub_float) \
     ALIAS(mul_float_fast, mul_float) \
     ALIAS(div_float_fast, div_float) \
+    ALIAS(min_float_fast, min_float) \
+    ALIAS(max_float_fast, max_float) \
     /*  same-type comparisons */ \
     ADD_I(eq_int, 2) \
     ADD_I(ne_int, 2) \
@@ -86,6 +88,9 @@
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
     ADD_I(sqrt_llvm_fast, 1) \
+    /*  pointer arithmetic */ \
+    ADD_I(add_ptr, 2) \
+    ADD_I(sub_ptr, 2) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
@@ -99,8 +104,6 @@
     /*  c interface */ \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
-    /*  object access */ \
-    ADD_I(arraylen, 1) \
     /*  cpu feature tests */ \
     ADD_I(have_fma, 1) \
     /*  hidden intrinsics */ \
diff --git a/src/ircode.c b/src/ircode.c
index bc5cc61e7f892..99c5833ac3be7 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -10,21 +10,82 @@
 #include "julia_internal.h"
 #include "serialize.h"
 
-#ifndef _OS_WINDOWS_
-#include <dlfcn.h>
-#endif
-
-#include "valgrind.h"
 #include "julia_assert.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define TAG_SYMBOL              2
+#define TAG_SSAVALUE            3
+#define TAG_DATATYPE            4
+#define TAG_SLOTNUMBER          5
+#define TAG_SVEC                6
+#define TAG_NEARBYSSAVALUE      7
+#define TAG_NULL                8
+#define TAG_EXPR                9
+#define TAG_PHINODE            10
+#define TAG_PHICNODE           11
+#define TAG_LONG_SYMBOL        12
+#define TAG_LONG_SVEC          13
+#define TAG_LONG_EXPR          14
+#define TAG_LONG_PHINODE       15
+#define TAG_LONG_PHICNODE      16
+#define TAG_METHODROOT         17
+#define TAG_EDGE               18
+#define TAG_STRING             19
+#define TAG_SHORT_INT64        20
+//#define TAG_UNUSED           21
+#define TAG_CNULL              22
+#define TAG_ARRAY1D            23
+#define TAG_SINGLETON          24
+#define TAG_MODULE             25
+#define TAG_TVAR               26
+#define TAG_METHOD_INSTANCE    27
+#define TAG_METHOD             28
+#define TAG_CODE_INSTANCE      29
+#define TAG_COMMONSYM          30
+#define TAG_NEARBYGLOBAL       31
+#define TAG_GLOBALREF          32
+#define TAG_CORE               33
+#define TAG_BASE               34
+#define TAG_BITYPENAME         35
+#define TAG_NEARBYMODULE       36
+#define TAG_INT32              37
+#define TAG_INT64              38
+#define TAG_UINT8              39
+#define TAG_VECTORTY           40
+#define TAG_PTRTY              41
+#define TAG_LONG_SSAVALUE      42
+#define TAG_LONG_METHODROOT    43
+#define TAG_LONG_EDGE          44
+#define TAG_SHORTER_INT64      45
+#define TAG_SHORT_INT32        46
+#define TAG_CALL1              47
+#define TAG_CALL2              48
+#define TAG_SHORT_BACKREF      49
+#define TAG_BACKREF            50
+#define TAG_UNIONALL           51
+#define TAG_GOTONODE           52
+#define TAG_QUOTENODE          53
+#define TAG_GENERAL            54
+#define TAG_GOTOIFNOT          55
+#define TAG_RETURNNODE         56
+#define TAG_ARGUMENT           57
+#define TAG_RELOC_METHODROOT   58
+#define TAG_BINDING            59
+#define TAG_MEMORYT            60
+#define TAG_ENTERNODE          61
+
+#define LAST_TAG 61
+
+
 typedef struct {
     ios_t *s;
+    size_t ssaid;
     // method we're compressing for
     jl_method_t *method;
+    jl_svec_t *edges;
     jl_ptls_t ptls;
     uint8_t relocatability;
 } jl_ircode_state;
@@ -37,28 +98,29 @@ static jl_value_t *deser_tag[256];
 static htable_t common_symbol_tag;
 static jl_value_t *deser_symbols[256];
 
-void *jl_lookup_ser_tag(jl_value_t *v)
+static void *jl_lookup_ser_tag(jl_value_t *v)
 {
     return ptrhash_get(&ser_tag, v);
 }
 
-void *jl_lookup_common_symbol(jl_value_t *v)
+static void *jl_lookup_common_symbol(jl_value_t *v)
 {
     return ptrhash_get(&common_symbol_tag, v);
 }
 
-jl_value_t *jl_deser_tag(uint8_t tag)
+static jl_value_t *jl_deser_tag(uint8_t tag)
 {
     return deser_tag[tag];
 }
 
-jl_value_t *jl_deser_symbol(uint8_t tag)
+static jl_value_t *jl_deser_symbol(uint8_t tag)
 {
     return deser_symbols[tag];
 }
 
 // --- encoding ---
 
+static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal);
 #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0)
 
 static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
@@ -67,11 +129,11 @@ static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
         s->relocatability = 0;
 }
 
-static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
+static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v)
 {
     jl_array_t *rs = s->method->roots;
-    int i, l = jl_array_len(rs);
-    if (jl_is_symbol(v) || jl_is_concrete_type(v)) {
+    int i, l = jl_array_nrows(rs);
+    if (jl_is_symbol(v) || jl_is_concrete_type(v)) { // TODO: or more generally, any ptr-egal value
         for (i = 0; i < l; i++) {
             if (jl_array_ptr_ref(rs, i) == v)
                 return tagged_root(rr, s, i);
@@ -83,8 +145,14 @@ static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v)
                 return tagged_root(rr, s, i);
         }
     }
+    for (size_t i = 0; i < jl_svec_len(s->edges); i++) {
+        if (jl_svecref(s->edges, i) == v) {
+            rr->index = i;
+            return;
+        }
+    }
     jl_add_method_root(s->method, jl_precompile_toplevel_module, v);
-    return tagged_root(rr, s, jl_array_len(rs) - 1);
+    return tagged_root(rr, s, jl_array_nrows(rs) - 1);
 }
 
 static void jl_encode_int32(jl_ircode_state *s, int32_t x)
@@ -101,11 +169,24 @@ static void jl_encode_int32(jl_ircode_state *s, int32_t x)
 
 static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v)
 {
-    rle_reference rr;
+    rle_reference rr = {.key = -1, .index = -1};
 
+    if (jl_is_string(v))
+        v = jl_as_global_root(v, 1);
     literal_val_id(&rr, s, v);
     int id = rr.index;
     assert(id >= 0);
+    if (rr.key == -1) {
+        if (id <= UINT8_MAX) {
+            write_uint8(s->s, TAG_EDGE);
+            write_uint8(s->s, id);
+        }
+        else {
+            write_uint8(s->s, TAG_LONG_EDGE);
+            write_uint32(s->s, id);
+        }
+        return;
+    }
     if (rr.key) {
         write_uint8(s->s, TAG_RELOC_METHODROOT);
         write_uint64(s->s, rr.key);
@@ -121,7 +202,45 @@ static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v)
     }
 }
 
-static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
+static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len)
+{
+    jl_datatype_t *t = (jl_datatype_t*)jl_typetagof(mem);
+    size_t i;
+    const jl_datatype_layout_t *layout = t->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        for (i = 0; i < len; i++) {
+            jl_value_t *e = jl_genericmemory_ptr_ref(mem, offset + i);
+            jl_encode_value(s, e);
+        }
+    }
+    else if (layout->first_ptr >= 0) {
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
+        const char *data = (const char*)mem->ptr + offset * elsz;
+        for (i = 0; i < len; i++) {
+            const char *start = data;
+            for (j = 0; j < np; j++) {
+                uint32_t ptr = jl_ptr_offset(t, j);
+                const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
+                if ((const char*)fld != start)
+                    ios_write(s->s, start, (const char*)fld - start);
+                JL_GC_PROMISE_ROOTED(*fld);
+                jl_encode_value(s, *fld);
+                start = (const char*)&fld[1];
+            }
+            data += elsz;
+            if (data != start)
+                ios_write(s->s, start, data - start);
+        }
+    }
+    else {
+        ios_write(s->s, (char*)mem->ptr + offset * layout->size, len * layout->size);
+        if (layout->flags.arrayelem_isunion)
+            ios_write(s->s, jl_genericmemory_typetagdata(mem) + offset, len);
+    }
+}
+
+static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
 {
     size_t i;
 
@@ -189,6 +308,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
             jl_encode_value(s, jl_globalref_name(v));
         }
     }
+    else if (jl_is_ssavalue(v) && s->ssaid - ((jl_ssavalue_t*)v)->id < 256) {
+        write_uint8(s->s, TAG_NEARBYSSAVALUE);
+        write_uint8(s->s, s->ssaid - ((jl_ssavalue_t*)v)->id);
+    }
     else if (jl_is_ssavalue(v) && ((jl_ssavalue_t*)v)->id < 256 && ((jl_ssavalue_t*)v)->id >= 0) {
         write_uint8(s->s, TAG_SSAVALUE);
         write_uint8(s->s, ((jl_ssavalue_t*)v)->id);
@@ -203,7 +326,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
     else if (jl_is_expr(v)) {
         jl_expr_t *e = (jl_expr_t*)v;
-        size_t l = jl_array_len(e->args);
+        size_t l = jl_array_nrows(e->args);
         if (e->head == jl_call_sym) {
             if (l == 2) {
                 write_uint8(s->s, TAG_CALL1);
@@ -235,31 +358,34 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (jl_is_phinode(v)) {
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(v, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 1);
-        size_t l = jl_array_len(edges);
-        if (l <= 255 && jl_array_len(values) == l) {
+        size_t l = jl_array_nrows(edges);
+        if (l <= 255 && jl_array_nrows(values) == l) {
             write_uint8(s->s, TAG_PHINODE);
             write_uint8(s->s, (uint8_t)l);
         }
         else {
             write_uint8(s->s, TAG_LONG_PHINODE);
             write_int32(s->s, l);
-            write_int32(s->s, jl_array_len(values));
+            write_int32(s->s, jl_array_nrows(values));
         }
         for (i = 0; i < l; i++) {
-            int32_t e = ((int32_t*)jl_array_data(edges))[i];
-            if (e <= 20)
-                jl_encode_value(s, jl_box_int32(e));
+            int32_t e = jl_array_data(edges, int32_t)[i];
+            if (e <= 0 && e <= 20) { // 1-byte encodings
+                jl_value_t *ebox = jl_box_int32(e);
+                JL_GC_PROMISE_ROOTED(ebox);
+                jl_encode_value(s, ebox);
+            }
             else
                 jl_encode_int32(s, e);
         }
-        l = jl_array_len(values);
+        l = jl_array_nrows(values);
         for (i = 0; i < l; i++) {
             jl_encode_value(s, jl_array_ptr_ref(values, i));
         }
     }
     else if (jl_is_phicnode(v)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 0);
-        size_t l = jl_array_len(values);
+        size_t l = jl_array_nrows(values);
         if (l <= 255) {
             write_uint8(s->s, TAG_PHICNODE);
             write_uint8(s->s, (uint8_t)l);
@@ -274,20 +400,39 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
     else if (jl_is_gotonode(v)) {
         write_uint8(s->s, TAG_GOTONODE);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_gotoifnot(v)) {
         write_uint8(s->s, TAG_GOTOIFNOT);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
-        jl_encode_value(s, jl_get_nth_field(v, 1));
+        jl_value_t *f  = jl_get_nth_field_noalloc(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        f  = jl_get_nth_field(v, 1);
+        jl_encode_value(s, f);
+        JL_GC_POP();
+    }
+    else if (jl_is_enternode(v)) {
+        write_uint8(s->s, TAG_ENTERNODE);
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        f  = jl_get_nth_field_noalloc(v, 1);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_argument(v)) {
         write_uint8(s->s, TAG_ARGUMENT);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_value_t *f  = jl_get_nth_field(v, 0);
+        JL_GC_PUSH1(&f);
+        jl_encode_value(s, f);
+        JL_GC_POP();
     }
     else if (jl_is_returnnode(v)) {
         write_uint8(s->s, TAG_RETURNNODE);
-        jl_encode_value(s, jl_get_nth_field(v, 0));
+        jl_encode_value(s, jl_returnnode_value(v));
     }
     else if (jl_is_quotenode(v)) {
         write_uint8(s->s, TAG_QUOTENODE);
@@ -321,11 +466,6 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_UINT8);
         write_int8(s->s, *(int8_t*)jl_data_ptr(v));
     }
-    else if (jl_typetagis(v, jl_lineinfonode_type)) {
-        write_uint8(s->s, TAG_LINEINFO);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++)
-            jl_encode_value(s, jl_get_nth_field(v, i));
-    }
     else if (((jl_datatype_t*)jl_typeof(v))->instance == v) {
         write_uint8(s->s, TAG_SINGLETON);
         jl_encode_value(s, jl_typeof(v));
@@ -335,68 +475,41 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_int32(s->s, jl_string_len(v));
         ios_write(s->s, jl_string_data(v), jl_string_len(v));
     }
-    else if (as_literal && jl_is_array(v)) {
+    else if (as_literal && jl_is_array(v) && jl_array_ndims(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        jl_value_t *et = jl_tparam0(jl_typeof(ar));
-        int isunion = jl_is_uniontype(et);
-        if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) {
-            write_uint8(s->s, TAG_ARRAY1D);
-            write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f));
-        }
-        else {
-            write_uint8(s->s, TAG_ARRAY);
-            write_uint16(s->s, ar->flags.ndims);
-            write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff));
-        }
-        for (i = 0; i < ar->flags.ndims; i++)
-            jl_encode_value(s, jl_box_long(jl_array_dim(ar,i)));
+        write_uint8(s->s, TAG_ARRAY1D);
+        size_t l = jl_array_dim0(ar);
+        jl_value_t *lbox = jl_box_long(l);
+        JL_GC_PUSH1(&lbox);
+        jl_encode_value(s, lbox);
+        JL_GC_POP();
         jl_encode_value(s, jl_typeof(ar));
-        size_t l = jl_array_len(ar);
-        if (ar->flags.ptrarray) {
-            for (i = 0; i < l; i++) {
-                jl_value_t *e = jl_array_ptr_ref(v, i);
-                jl_encode_value(s, e);
-            }
-        }
-        else if (ar->flags.hasptr) {
-            const char *data = (const char*)jl_array_data(ar);
-            uint16_t elsz = ar->elsize;
-            size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-            for (i = 0; i < l; i++) {
-                const char *start = data;
-                for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j);
-                    const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
-                    if ((const char*)fld != start)
-                        ios_write(s->s, start, (const char*)fld - start);
-                    JL_GC_PROMISE_ROOTED(*fld);
-                    jl_encode_value(s, *fld);
-                    start = (const char*)&fld[1];
-                }
-                data += elsz;
-                if (data != start)
-                    ios_write(s->s, start, data - start);
-            }
-        }
-        else {
-            ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize);
-            if (jl_array_isbitsunion(ar))
-                ios_write(s->s, jl_array_typetagdata(ar), l);
-        }
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ar->ref.mem))->layout;
+        size_t offset;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            offset = (uintptr_t)ar->ref.ptr_or_offset;
+        else
+            offset = ((char*)ar->ref.ptr_or_offset - (char*)ar->ref.mem->ptr) / layout->size;
+        jl_encode_memory_slice(s, ar->ref.mem, offset, l);
+    }
+    else if (as_literal && jl_is_genericmemory(v)) {
+        jl_genericmemory_t* m = (jl_genericmemory_t*)v;
+        write_uint8(s->s, TAG_MEMORYT);
+        jl_encode_value(s, (jl_datatype_t*)jl_typetagof(v));
+        jl_value_t *lbox = jl_box_long(m->length);
+        JL_GC_PUSH1(&lbox);
+        jl_encode_value(s, lbox);
+        JL_GC_POP();
+        jl_encode_memory_slice(s, m, 0, m->length);
+    }
+    else if (as_literal && jl_is_layout_opaque(((jl_datatype_t*)jl_typeof(v))->layout)) {
+        assert(0 && "not legal to store this as literal");
     }
     else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) ||
              jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) ||
              (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) {
+        write_uint8(s->s, TAG_GENERAL);
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        size_t tsz = jl_datatype_size(t);
-        if (tsz <= 255) {
-            write_uint8(s->s, TAG_SHORT_GENERAL);
-            write_uint8(s->s, tsz);
-        }
-        else {
-            write_uint8(s->s, TAG_GENERAL);
-            write_int32(s->s, tsz);
-        }
         jl_encode_value(s, t);
 
         char *data = (char*)jl_data_ptr(v);
@@ -434,105 +547,114 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
 }
 
-static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t has_fcall,
-                                            uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop)
+static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t has_fcall,
+                                            uint8_t nospecializeinfer, uint8_t isva,
+                                            uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod,
+                                            jl_array_t *ssaflags)
 {
     jl_code_info_flags_t flags;
-    flags.bits.inferred = inferred;
     flags.bits.propagate_inbounds = propagate_inbounds;
     flags.bits.has_fcall = has_fcall;
     flags.bits.nospecializeinfer = nospecializeinfer;
+    flags.bits.isva = isva;
     flags.bits.inlining = inlining;
     flags.bits.constprop = constprop;
+    flags.bits.nargsmatchesmethod = nargsmatchesmethod;
+    flags.bits.has_ssaflags = 0;
+    const uint32_t *ssaflag_data = jl_array_data(ssaflags, uint32_t);
+    for (size_t i = 0, l = jl_array_dim0(ssaflags); i < l; i++)
+        if (ssaflag_data[i])
+            flags.bits.has_ssaflags = 1;
     return flags;
 }
 
 // --- decoding ---
 
-static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED;
+static jl_value_t *jl_decode_value(jl_ircode_state *s);
 
-static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     if (tag == TAG_SVEC)
         len = read_uint8(s->s);
     else
         len = read_int32(s->s);
-    jl_svec_t *sv = jl_alloc_svec_uninit(len);
-    jl_value_t **data = jl_svec_data(sv);
-    for (i = 0; i < len; i++) {
-        data[i] = jl_decode_value(s);
-    }
+    jl_svec_t *sv = jl_alloc_svec(len);
+    JL_GC_PUSH1(&sv);
+    for (i = 0; i < len; i++)
+        jl_svecset(sv, i, jl_decode_value(s));
+    JL_GC_POP();
     return (jl_value_t*)sv;
 }
 
-static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_genericmemory_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel)
 {
-    int16_t i, ndims;
-    int isptr, isunion, hasptr, elsize;
-    if (tag == TAG_ARRAY1D) {
-        ndims = 1;
-        elsize = read_uint8(s->s);
-        isptr = (elsize >> 7) & 1;
-        hasptr = (elsize >> 6) & 1;
-        isunion = (elsize >> 5) & 1;
-        elsize = elsize & 0x1f;
-    }
-    else {
-        ndims = read_uint16(s->s);
-        elsize = read_uint16(s->s);
-        isptr = (elsize >> 15) & 1;
-        hasptr = (elsize >> 14) & 1;
-        isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x1fff;
-    }
-    size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
-    for (i = 0; i < ndims; i++) {
-        dims[i] = jl_unbox_long(jl_decode_value(s));
-    }
-    jl_array_t *a = jl_new_array_for_deserialization(
-            (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize);
-    jl_value_t *aty = jl_decode_value(s);
-    jl_set_typeof(a, aty);
-    if (a->flags.ptrarray) {
-        jl_value_t **data = (jl_value_t**)jl_array_data(a);
-        size_t i, numel = jl_array_len(a);
+    jl_genericmemory_t *m = jl_alloc_genericmemory(mty, nel);
+    JL_GC_PUSH1(&m);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty)->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_value_t **data = (jl_value_t**)m->ptr;
+        size_t i, numel = m->length;
         for (i = 0; i < numel; i++) {
             data[i] = jl_decode_value(s);
+            jl_gc_wb(m, data[i]);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
-    }
-    else if (a->flags.hasptr) {
-        size_t i, numel = jl_array_len(a);
-        char *data = (char*)jl_array_data(a);
-        uint16_t elsz = a->elsize;
-        jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-        size_t j, np = et->layout->npointers;
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t i, numel = m->length;
+        char *data = (char*)m->ptr;
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
         for (i = 0; i < numel; i++) {
             char *start = data;
             for (j = 0; j < np; j++) {
-                uint32_t ptr = jl_ptr_offset(et, j);
+                uint32_t ptr = jl_ptr_offset((jl_datatype_t*)mty, j);
                 jl_value_t **fld = &((jl_value_t**)data)[ptr];
                 if ((char*)fld != start)
                     ios_readall(s->s, start, (const char*)fld - start);
                 *fld = jl_decode_value(s);
+                jl_gc_wb(m, fld);
                 start = (char*)&fld[1];
             }
             data += elsz;
             if (data != start)
                 ios_readall(s->s, start, data - start);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
     }
     else {
-        size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0;
-        size_t tot = jl_array_len(a) * a->elsize + extra;
-        ios_readall(s->s, (char*)jl_array_data(a), tot);
+        size_t extra = jl_genericmemory_isbitsunion(m) ? m->length : 0;
+        size_t tot = m->length * layout->size + extra;
+        ios_readall(s->s, (char*)m->ptr, tot);
     }
+    JL_GC_POP();
+    return m;
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
+
+static jl_value_t *jl_decode_value_array1d(jl_ircode_state *s, uint8_t tag)
+{
+    int16_t ndims = 1;
+    size_t dim0 = jl_unbox_long(jl_decode_value(s));
+    size_t len = dim0;
+    jl_value_t *aty = jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(aty); // (JL_ALWAYS_LEAFTYPE)
+    jl_genericmemory_t *mem = jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len);
+    JL_GC_PUSH1(&mem);
+    int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t);
+    jl_array_t *a = (jl_array_t*)jl_gc_alloc(s->ptls, tsz, aty);
+    a->ref.mem = mem;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = (void*)0;
+    else
+        a->ref.ptr_or_offset = a->ref.mem->ptr;
+    a->dimsize[0] = dim0;
+    JL_GC_POP();
     return (jl_value_t*)a;
 }
 
-static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     jl_sym_t *head = NULL;
@@ -553,14 +675,18 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D
     if (head == NULL)
         head = (jl_sym_t*)jl_decode_value(s);
     jl_expr_t *e = jl_exprn(head, len);
-    jl_value_t **data = (jl_value_t**)(e->args->data);
+    JL_GC_PUSH1(&e);
+    jl_value_t **data = jl_array_ptr_data(e->args);
+    jl_value_t *owner = jl_array_owner(e->args);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
+        jl_gc_wb(owner, data[i]);
     }
+    JL_GC_POP();
     return (jl_value_t*)e;
 }
 
-static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len_e, len_v;
     if (tag == TAG_PHINODE) {
@@ -570,21 +696,27 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI
         len_e = read_int32(s->s);
         len_v = read_int32(s->s);
     }
-    jl_array_t *e = jl_alloc_array_1d(jl_array_int32_type, len_e);
-    jl_array_t *v = jl_alloc_vec_any(len_v);
-    jl_value_t *phi = jl_new_struct(jl_phinode_type, e, v);
-    int32_t *data_e = (int32_t*)(e->data);
+    jl_array_t *e = NULL;
+    jl_array_t *v = NULL;
+    jl_value_t *phi = NULL;
+    JL_GC_PUSH3(&e, &v, &phi);
+    e = jl_alloc_array_1d(jl_array_int32_type, len_e);
+    v = jl_alloc_vec_any(len_v);
+    phi = jl_new_struct(jl_phinode_type, e, v);
+    int32_t *data_e = jl_array_data(e, int32_t);
     for (i = 0; i < len_e; i++) {
         data_e[i] = jl_unbox_int32(jl_decode_value(s));
     }
-    jl_value_t **data_v = (jl_value_t**)(v->data);
+    jl_value_t **data_v = jl_array_ptr_data(v);
     for (i = 0; i < len_v; i++) {
         data_v[i] = jl_decode_value(s);
+        jl_gc_wb(jl_array_owner(v), data_v[i]);
     }
+    JL_GC_POP();
     return phi;
 }
 
-static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag)
 {
     size_t i, len;
     if (tag == TAG_PHICNODE)
@@ -592,41 +724,53 @@ static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_D
     else
         len = read_int32(s->s);
     jl_array_t *v = jl_alloc_vec_any(len);
-    jl_value_t *phic = jl_new_struct(jl_phicnode_type, v);
-    jl_value_t **data = (jl_value_t**)(v->data);
+    jl_value_t *phic = (jl_value_t*)v;
+    JL_GC_PUSH1(&phic);
+    phic = jl_new_struct(jl_phicnode_type, v);
+    jl_value_t **data = jl_array_ptr_data(v);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
+        jl_gc_wb(jl_array_owner(v), data[i]);
     }
+    JL_GC_POP();
     return phic;
 }
 
-static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s)
 {
-    jl_value_t *mod = jl_decode_value(s);
-    jl_value_t *var = jl_decode_value(s);
-    return jl_module_globalref((jl_module_t*)mod, (jl_sym_t*)var);
+    jl_module_t *mod = (jl_module_t*)jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(mod);
+    jl_sym_t *var = (jl_sym_t*)jl_decode_value(s);
+    JL_GC_PROMISE_ROOTED(var);
+    return jl_module_globalref(mod, var);
 }
 
-static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_any(jl_ircode_state *s)
 {
-    int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s));
-    jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL);
-    jl_set_typeof(v, (void*)(intptr_t)0xf50);
     jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s);
-    if (dt->smalltag)
+    JL_GC_PROMISE_ROOTED(dt); // (JL_ALWAYS_LEAFTYPE)
+    // jl_new_struct_uninit
+    size_t sz = jl_datatype_size(dt);
+    jl_value_t *v = jl_gc_alloc(s->ptls, sz, dt);
+    if (dt->smalltag) // TODO: do we need this?
         jl_set_typetagof(v, dt->smalltag, 0);
-    else
-        jl_set_typeof(v, dt);
     char *data = (char*)jl_data_ptr(v);
     size_t i, np = dt->layout->npointers;
     char *start = data;
-    for (i = 0; i < np; i++) {
-        uint32_t ptr = jl_ptr_offset(dt, i);
-        jl_value_t **fld = &((jl_value_t**)data)[ptr];
-        if ((char*)fld != start)
-            ios_readall(s->s, start, (const char*)fld - start);
-        *fld = jl_decode_value(s);
-        start = (char*)&fld[1];
+    if (np) {
+        if (sz > 0)
+            memset(v, 0, sz);
+        JL_GC_PUSH1(&v);
+        for (i = 0; i < np; i++) {
+            uint32_t ptr = jl_ptr_offset(dt, i);
+            jl_value_t **fld = &((jl_value_t**)data)[ptr];
+            if ((char*)fld != start)
+                ios_readall(s->s, start, (const char*)fld - start);
+            *fld = jl_decode_value(s);
+            jl_gc_wb(v, *fld);
+            start = (char*)&fld[1];
+        }
+        JL_GC_POP();
     }
     data += jl_datatype_size(dt);
     if (data != start)
@@ -634,11 +778,11 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI
     return v;
 }
 
-static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
+static jl_value_t *jl_decode_value(jl_ircode_state *s)
 {
     assert(!ios_eof(s->s));
     jl_value_t *v;
-    size_t i, n;
+    size_t n;
     uint64_t key;
     uint8_t tag = read_uint8(s->s);
     if (tag > LAST_TAG)
@@ -663,6 +807,10 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         return lookup_root(s->method, 0, read_uint8(s->s));
     case TAG_LONG_METHODROOT:
         return lookup_root(s->method, 0, read_uint32(s->s));
+    case TAG_EDGE:
+        return jl_svecref(s->edges, read_uint8(s->s));
+    case TAG_LONG_EDGE:
+        return jl_svecref(s->edges, read_uint32(s->s));
     case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
         return jl_decode_value_svec(s, tag);
     case TAG_COMMONSYM:
@@ -670,14 +818,21 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_SSAVALUE:
         v = jl_box_ssavalue(read_uint8(s->s));
         return v;
+    case TAG_NEARBYSSAVALUE:
+        v = jl_box_ssavalue(s->ssaid - read_uint8(s->s));
+        return v;
     case TAG_LONG_SSAVALUE:
         v = jl_box_ssavalue(read_uint16(s->s));
         return v;
     case TAG_SLOTNUMBER:
         v = jl_box_slotnumber(read_uint16(s->s));
         return v;
-    case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D:
-        return jl_decode_value_array(s, tag);
+    case TAG_ARRAY1D:
+        return jl_decode_value_array1d(s, tag);
+    case TAG_MEMORYT:
+        v = jl_decode_value(s);
+        JL_GC_PROMISE_ROOTED(v); // (JL_ALWAYS_LEAFTYPE)
+        return (jl_value_t*)jl_decode_value_memory(s, v, jl_unbox_long(jl_decode_value(s)));
     case TAG_EXPR:      JL_FALLTHROUGH;
     case TAG_LONG_EXPR: JL_FALLTHROUGH;
     case TAG_CALL1:     JL_FALLTHROUGH;
@@ -688,22 +843,47 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_PHICNODE: JL_FALLTHROUGH; case TAG_LONG_PHICNODE:
         return jl_decode_value_phic(s, tag);
     case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE:
+    {
         v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_GOTOIFNOT:
+    {
         v = jl_new_struct_uninit(jl_gotoifnot_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0);
         set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0);
+        JL_GC_POP();
+        return v;
+    }
+    case TAG_ENTERNODE:
+    {
+        v = jl_new_struct_uninit(jl_enternode_type);
+        JL_GC_PUSH1(&v);
+        set_nth_field(jl_enternode_type, v, 0, jl_decode_value(s), 0);
+        set_nth_field(jl_enternode_type, v, 1, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_ARGUMENT:
+    {
         v = jl_new_struct_uninit(jl_argument_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_RETURNNODE:
+    {
         v = jl_new_struct_uninit(jl_returnnode_type);
+        JL_GC_PUSH1(&v);
         set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0);
+        JL_GC_POP();
         return v;
+    }
     case TAG_SHORTER_INT64:
         v = jl_box_int64((int16_t)read_uint16(s->s));
         return v;
@@ -722,9 +902,14 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_UINT8:
         return jl_box_uint8(read_uint8(s->s));
     case TAG_NEARBYGLOBAL:
-        assert(s->method != NULL);
+    {
+        jl_method_t *m = s->method;
+        assert(m != NULL);
+        JL_GC_PROMISE_ROOTED(m);
         v = jl_decode_value(s);
-        return jl_module_globalref(s->method->module, (jl_sym_t*)v);
+        JL_GC_PROMISE_ROOTED(v); // symbol
+        return jl_module_globalref(m->module, (jl_sym_t*)v);
+    }
     case TAG_NEARBYMODULE:
         assert(s->method != NULL);
         return (jl_value_t*)s->method->module;
@@ -737,26 +922,29 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case TAG_BASE:
         return (jl_value_t*)jl_base_module;
     case TAG_VECTORTY:
+    {
         v = jl_decode_value(s);
-        return jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1));
+        JL_GC_PUSH1(&v);
+        v = jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1));
+        JL_GC_POP();
+        return v;
+    }
     case TAG_PTRTY:
+    {
         v = jl_decode_value(s);
-        return jl_apply_type1((jl_value_t*)jl_pointer_type, v);
+        JL_GC_PUSH1(&v);
+        v = jl_apply_type1((jl_value_t*)jl_pointer_type, v);
+        JL_GC_POP();
+        return v;
+    }
     case TAG_STRING:
         n = read_int32(s->s);
         v = jl_alloc_string(n);
         ios_readall(s->s, jl_string_data(v), n);
         return v;
-    case TAG_LINEINFO:
-        v = jl_new_struct_uninit(jl_lineinfonode_type);
-        for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++) {
-            //size_t offs = jl_field_offset(jl_lineinfonode_type, i);
-            set_nth_field(jl_lineinfonode_type, v, i, jl_decode_value(s), 0);
-        }
-        return v;
     default:
-        assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL);
-        return jl_decode_value_any(s, tag);
+        assert(tag == TAG_GENERAL);
+        return jl_decode_value_any(s);
     }
 }
 
@@ -764,93 +952,152 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 
 typedef jl_value_t jl_string_t; // for local expressibility
 
+static size_t codelocs_parseheader(jl_string_t *cl, int *line_offset, int *line_bytes, int *to_bytes) JL_NOTSAFEPOINT
+{
+    if (jl_string_len(cl) == 0) {
+        *line_offset = *line_bytes = *to_bytes = 0;
+        return 0;
+    }
+    int32_t header[3];
+    memcpy(&header, (char*)jl_string_data(cl), sizeof(header));
+    *line_offset = header[0];
+    if (header[1] < 255)
+        *line_bytes = 1;
+    else if (header[1] < 65535)
+        *line_bytes = 2;
+    else
+        *line_bytes = 4;
+    if (header[2] == 0)
+        *to_bytes = 0;
+    else if (header[2] < 255)
+        *to_bytes = 1;
+    else if (header[2] < 65535)
+        *to_bytes = 2;
+    else
+        *to_bytes = 4;
+    assert(jl_string_len(cl) >= sizeof(header) + *line_bytes);
+    return (jl_string_len(cl) - sizeof(header) - *line_bytes) / (*line_bytes + *to_bytes * 2); // compute nstmts
+}
+#ifndef NDEBUG
+static int codelocs_nstmts(jl_string_t *cl) JL_NOTSAFEPOINT
+{
+    int line_offset, line_bytes, to_bytes;
+    return codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+}
+#endif
+
+#define IR_DATASIZE_FLAGS         sizeof(uint16_t)
+#define IR_DATASIZE_PURITY        sizeof(uint16_t)
+#define IR_DATASIZE_INLINING_COST sizeof(uint16_t)
+#define IR_DATASIZE_NSLOTS        sizeof(int32_t)
+typedef enum {
+    ir_offset_flags         = 0,
+    ir_offset_purity        = 0 + IR_DATASIZE_FLAGS,
+    ir_offset_inlining_cost = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY,
+    ir_offset_nslots        = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST,
+    ir_offset_slotflags     = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST + IR_DATASIZE_NSLOTS
+} ir_offset;
+
+// static_assert is technically a declaration, so shenanigans are required to
+// open an inline declaration context. `sizeof` is the traditional way to do this,
+// but this pattern is illegal in C++, which some compilers warn about, so use
+// `offsetof` instead.
+#define declaration_context(what) (void)offsetof(struct{what; int dummy_;}, dummy_)
+
+// Checks (at compile time) that sizeof(data) == macro_size
+#define checked_size(data, macro_size) \
+    (declaration_context(static_assert(sizeof(data) == macro_size, #macro_size " does not match written size")), data)
+
 JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
 {
     JL_TIMING(AST_COMPRESS, AST_COMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
+    int isdef = code == NULL;
+    if (isdef)
+        code = (jl_code_info_t*)m->source;
     assert(jl_is_method(m));
     assert(jl_is_code_info(code));
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
     ios_t dest;
     ios_mem(&dest, 0);
-    int en = jl_gc_enable(0); // Might GC
-    size_t i;
 
     if (m->roots == NULL) {
         m->roots = jl_alloc_vec_any(0);
         jl_gc_wb(m, m->roots);
     }
+    jl_value_t *edges = code->edges;
     jl_ircode_state s = {
         &dest,
+        0,
         m,
+        (!isdef && jl_is_svec(edges)) ? (jl_svec_t*)edges : jl_emptysvec,
         jl_current_task->ptls,
         1
     };
 
-    jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->has_fcall,
-                                                 code->nospecializeinfer, code->inlining, code->constprop);
-    write_uint8(s.s, flags.packed);
-    write_uint8(s.s, code->purity.bits);
-    write_uint16(s.s, code->inlining_cost);
-
-    size_t nslots = jl_array_len(code->slotflags);
+    uint8_t nargsmatchesmethod = code->nargs == m->nargs;
+    jl_code_info_flags_t flags = code_info_flags(code->propagate_inbounds, code->has_fcall,
+                                                 code->nospecializeinfer, code->isva,
+                                                 code->inlining, code->constprop,
+                                                 nargsmatchesmethod,
+                                                 code->ssaflags);
+    write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS));
+    write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY));
+    write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST));
+
+    size_t nslots = jl_array_nrows(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
-    write_int32(s.s, nslots);
-    ios_write(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    write_int32(s.s, checked_size((int32_t)nslots, IR_DATASIZE_NSLOTS));
+    ios_write(s.s, jl_array_data(code->slotflags, const char), nslots);
 
     // N.B.: The layout of everything before this point is explicitly referenced
     // by the various jl_ir_ accessors. Make sure to adjust those if you change
     // the data layout.
-
-    for (i = 0; i < 6; i++) {
-        int copy = 1;
-        if (i == 1) { // skip codelocs
-            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, codelocs));
-            continue;
-        }
-        if (i == 4) { // don't copy contents of method_for_inference_limit_heuristics field
-            assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, method_for_inference_limit_heuristics));
-            copy = 0;
-        }
-        jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy);
-    }
+    if (!nargsmatchesmethod) {
+        size_t nargs = code->nargs;
+        assert(nargs < INT32_MAX);
+        write_int32(s.s, (int32_t)nargs);
+    }
+
+    size_t i, l = jl_array_dim0(code->code);
+    write_uint64(s.s, l);
+    for (i = 0; i < l; i++) {
+        s.ssaid = i;
+        jl_encode_value(&s, jl_array_ptr_ref(code->code, i));
+    }
+    s.ssaid = 0;
+    jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1);
+    assert(jl_typetagis(code->ssaflags, jl_array_uint32_type));
+    assert(jl_array_dim0(code->ssaflags) == l);
+    const uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t);
+    if (flags.bits.has_ssaflags)
+        ios_write(s.s, (const char*)ssaflags_data, l * sizeof(*ssaflags_data));
 
     // For opaque closure, also save the slottypes. We technically only need the first slot type,
     // but this is simpler for now. We may want to refactor where this gets stored in the future.
     if (m->is_for_opaque_closure)
         jl_encode_value_(&s, code->slottypes, 1);
 
-    if (m->generator)
+    jl_string_t *v = NULL;
+    JL_GC_PUSH1(&v);
+    // Slotnames. For regular methods, we require that m->slot_syms matches the
+    // CodeInfo's slotnames, so we do not need to save it here.
+    if (m->generator) {
         // can't optimize generated functions
-        jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1);
-    else
-        jl_encode_value(&s, jl_nothing);
-
-    size_t nstmt = jl_array_len(code->code);
-    assert(nstmt == jl_array_len(code->codelocs));
-    if (jl_array_len(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint8(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
-        }
-    }
-    else if (jl_array_len(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            write_uint16(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
-        }
+        v = jl_compress_argnames(code->slotnames);
+        jl_encode_value_(&s, (jl_value_t*)v, 1);
     }
     else {
-        ios_write(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
+        jl_encode_value(&s, jl_nothing);
     }
 
     write_uint8(s.s, s.relocatability);
 
     ios_flush(s.s);
-    jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size);
+    v = jl_pchar_to_string(s.s->buf, s.s->size);
     ios_close(s.s);
-    if (jl_array_len(m->roots) == 0) {
+    if (jl_array_nrows(m->roots) == 0)
         m->roots = NULL;
-    }
-    JL_GC_PUSH1(&v);
-    jl_gc_enable(en);
     JL_UNLOCK(&m->writelock); // Might GC
     JL_GC_POP();
 
@@ -865,101 +1112,109 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
     assert(jl_is_string(data));
-    size_t i;
     ios_t src;
     ios_mem(&src, 0);
     ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0);
     src.size = jl_string_len(data);
-    int en = jl_gc_enable(0); // Might GC
     jl_ircode_state s = {
         &src,
+        0,
         m,
+        metadata == NULL ? NULL : jl_atomic_load_relaxed(&metadata->edges),
         jl_current_task->ptls,
         1
     };
-
     jl_code_info_t *code = jl_new_code_info_uninit();
+    jl_value_t *slotnames = NULL;
+    JL_GC_PUSH2(&code, &slotnames);
+
     jl_code_info_flags_t flags;
-    flags.packed = read_uint8(s.s);
+    flags.packed = read_uint16(s.s);
     code->inlining = flags.bits.inlining;
     code->constprop = flags.bits.constprop;
-    code->inferred = flags.bits.inferred;
     code->propagate_inbounds = flags.bits.propagate_inbounds;
     code->has_fcall = flags.bits.has_fcall;
     code->nospecializeinfer = flags.bits.nospecializeinfer;
-    code->purity.bits = read_uint8(s.s);
+    code->isva = flags.bits.isva;
+    code->purity.bits = read_uint16(s.s);
     code->inlining_cost = read_uint16(s.s);
 
-    size_t nslots = read_int32(&src);
+    size_t nslots = read_int32(s.s);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
-    ios_readall(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    jl_gc_wb(code, code->slotflags);
+    ios_readall(s.s, jl_array_data(code->slotflags, char), nslots);
+
+    if (flags.bits.nargsmatchesmethod) {
+        code->nargs = m->nargs;
+    } else {
+        code->nargs = read_int32(s.s);
+    }
+
+    size_t i, l = read_uint64(s.s);
+    code->code = jl_alloc_array_1d(jl_array_any_type, l);
+    jl_gc_wb(code, code->code);
+    for (i = 0; i < l; i++) {
+        s.ssaid = i;
+        jl_array_ptr_set(code->code, i, jl_decode_value(&s));
+    }
+    s.ssaid = 0;
+    code->ssavaluetypes = jl_decode_value(&s);
+    jl_gc_wb(code, code->ssavaluetypes);
+    code->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, l);
+    jl_gc_wb(code, code->ssaflags);
+    uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t);
+    if (flags.bits.has_ssaflags)
+        ios_readall(s.s, (char*)ssaflags_data, l * sizeof(*ssaflags_data));
+    else
+        memset(ssaflags_data, 0, l * sizeof(*ssaflags_data));
 
-    for (i = 0; i < 6; i++) {
-        if (i == 1)  // skip codelocs
-            continue;
-        assert(jl_field_isptr(jl_code_info_type, i));
-        jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i));
-        *fld = jl_decode_value(&s);
-    }
-    if (m->is_for_opaque_closure)
+    if (m->is_for_opaque_closure) {
         code->slottypes = jl_decode_value(&s);
+        jl_gc_wb(code, code->slottypes);
+    }
 
-    jl_value_t *slotnames = jl_decode_value(&s);
+    slotnames = jl_decode_value(&s);
     if (!jl_is_string(slotnames))
         slotnames = m->slot_syms;
     code->slotnames = jl_uncompress_argnames(slotnames);
+    jl_gc_wb(code, code->slotnames);
 
-    size_t nstmt = jl_array_len(code->code);
-    code->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmt);
-    if (jl_array_len(code->linetable) < 256) {
-        for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint8(s.s);
-        }
-    }
-    else if (jl_array_len(code->linetable) < 65536) {
-        for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint16(s.s);
-        }
-    }
-    else {
-        ios_readall(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
-    }
+    if (metadata)
+        code->debuginfo = jl_atomic_load_relaxed(&metadata->debuginfo);
+    else
+        code->debuginfo = m->debuginfo;
+    jl_gc_wb(code, code->debuginfo);
+    assert(code->debuginfo);
+    assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0);
 
     (void) read_uint8(s.s);   // relocatability
-
+    assert(!ios_eof(s.s));
     assert(ios_getc(s.s) == -1);
+
     ios_close(s.s);
-    JL_GC_PUSH1(&code);
-    jl_gc_enable(en);
     JL_UNLOCK(&m->writelock); // Might GC
-    JL_GC_POP();
     if (metadata) {
-        code->min_world = metadata->min_world;
-        code->max_world = metadata->max_world;
+        code->parent = jl_get_ci_mi(metadata);
+        jl_gc_wb(code, code->parent);
         code->rettype = metadata->rettype;
-        code->parent = metadata->def;
+        jl_gc_wb(code, code->rettype);
+        code->min_world = jl_atomic_load_relaxed(&metadata->min_world);
+        code->max_world = jl_atomic_load_relaxed(&metadata->max_world);
+        code->edges = (jl_value_t*)s.edges;
+        jl_gc_wb(code, s.edges);
     }
+    JL_GC_POP();
 
     return code;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_string_t *data)
-{
-    if (jl_is_code_info(data))
-        return ((jl_code_info_t*)data)->inferred;
-    assert(jl_is_string(data));
-    jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
-    return flags.bits.inferred;
-}
-
 JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining;
     assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
+    flags.packed = jl_string_data(data)[ir_offset_flags];
     return flags.bits.inlining;
 }
 
@@ -969,7 +1224,7 @@ JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_string_t *data)
         return ((jl_code_info_t*)data)->has_fcall;
     assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = jl_string_data(data)[0];
+    flags.packed = jl_string_data(data)[ir_offset_flags];
     return flags.bits.has_fcall;
 }
 
@@ -978,13 +1233,13 @@ JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining_cost;
     assert(jl_is_string(data));
-    uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + 2);
+    uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + ir_offset_inlining_cost);
     return res;
 }
 
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
 {
-    size_t nsyms = jl_array_len(syms);
+    size_t nsyms = jl_array_nrows(syms);
     size_t i, len = 0;
     for (i = 0; i < nsyms; i++) {
         jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(syms, i);
@@ -1012,11 +1267,11 @@ JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 {
     if (jl_is_code_info(data)) {
         jl_code_info_t *func = (jl_code_info_t*)data;
-        return jl_array_len(func->slotnames);
+        return jl_array_nrows(func->slotnames);
     }
     else {
         assert(jl_is_string(data));
-        int nslots = jl_load_unaligned_i32(jl_string_data(data) + 2 + sizeof(uint16_t));
+        int nslots = jl_load_unaligned_i32(jl_string_data(data) + ir_offset_nslots);
         return nslots;
     }
 }
@@ -1024,10 +1279,12 @@ JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_string_t *data, size_t i)
 {
     assert(i < jl_ir_nslots(data));
-    if (jl_is_code_info(data))
-        return ((uint8_t*)((jl_code_info_t*)data)->slotflags->data)[i];
+    if (jl_is_code_info(data)) {
+        jl_array_t *slotflags = ((jl_code_info_t*)data)->slotflags;
+        return jl_array_data(slotflags, uint8_t)[i];
+    }
     assert(jl_is_string(data));
-    return jl_string_data(data)[2 + sizeof(uint16_t) + sizeof(int32_t) + i];
+    return jl_string_data(data)[ir_offset_slotflags + i];
 }
 
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms)
@@ -1073,6 +1330,244 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i)
     return jl_nothing;
 }
 
+// codelocs are compressed as follows:
+// The input vector is a NTuple{3,UInt32} (struct jl_codeloc_t)
+// The vector is scanned for min and max of the values for each element
+// The output is then allocated to hold (min-line, max-line, max-at) first, then line - min (in the smallest space), then the remainder (in the smallest space)
+static inline struct jl_codeloc_t unpack_codeloc(jl_string_t *cl, size_t pc, int line_offset, int line_bytes, int to_bytes) JL_NOTSAFEPOINT
+{
+    const char *ptr = jl_string_data(cl) + sizeof(int32_t[3]);
+    if (pc == 0)
+        to_bytes = 0;
+    else
+        ptr += line_bytes + (pc - 1) * (line_bytes + to_bytes * 2);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    struct jl_codeloc_t codeloc;
+    switch (line_bytes) {
+    case 0:
+        codeloc.line = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.line = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.line = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.line = int32;
+        break;
+    }
+    if (codeloc.line > 0)
+        codeloc.line += line_offset - 1;
+    ptr += line_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.to = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.to = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.to = int16;
+        break;
+    case 4:
+        memcpy(&int32, ptr, 4);
+        codeloc.to = int32;
+        break;
+    }
+    ptr += to_bytes;
+    switch (to_bytes) {
+    case 0:
+        codeloc.pc = 0;
+        break;
+    case 1:
+        memcpy(&int8, ptr, 1);
+        codeloc.pc = int8;
+        break;
+    case 2:
+        memcpy(&int16, ptr, 2);
+        codeloc.pc = int16;
+        break;
+    case 3:
+        memcpy(&int32, ptr, 4);
+        codeloc.pc = int32;
+        break;
+    }
+    ptr += to_bytes;
+    return codeloc;
+}
+
+
+static const struct jl_codeloc_t badloc = {-1, 0, 0};
+
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_string_t *cl, size_t pc) JL_NOTSAFEPOINT
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nstmts = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    if (pc > nstmts)
+        return badloc;
+    return unpack_codeloc(cl, pc, line_offset, line_bytes, to_bytes);
+}
+
+static int allzero(jl_value_t *codelocs) JL_NOTSAFEPOINT
+{
+    int32_t *p = jl_array_data(codelocs,int32_t);
+    int32_t *pend = p + jl_array_nrows(codelocs);
+    do {
+        if (*p)
+            return 0;
+    } while (++p < pend);
+    return 1;
+}
+
+JL_DLLEXPORT jl_string_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts) // firstline+Vector{Int32} => Memory{UInt8}
+{
+    assert(jl_typeis(codelocs, jl_array_int32_type));
+    if (jl_array_nrows(codelocs) == 0)
+        nstmts = 0;
+    assert(nstmts * 3 == jl_array_nrows(codelocs));
+    if (allzero(codelocs))
+        return jl_an_empty_string;
+    struct jl_codeloc_t codeloc, min, max;
+    size_t i;
+    min.line = min.to = min.pc = firstline <= 0 ? INT32_MAX : firstline;
+    max.line = max.to = max.pc = 0;
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+#define SETMIN(x) if (codeloc.x < min.x) min.x = codeloc.x
+#define SETMAX(x) if (codeloc.x > max.x) max.x = codeloc.x
+        if (codeloc.line > 0)
+            SETMIN(line);
+        SETMAX(line);
+        SETMIN(to);
+        SETMAX(to);
+        SETMIN(pc);
+        SETMAX(pc);
+#undef SETMIN
+#undef SETMAX
+    }
+    int32_t header[3];
+    header[0] = min.line > max.line ? 0 : min.line;
+    header[1] = min.line > max.line ? 0 : max.line - min.line;
+    header[2] = max.to > max.pc ? max.to : max.pc;
+    size_t line_bytes;
+    if (header[1] < 255)
+        line_bytes = 1;
+    else if (header[1] < 65535)
+        line_bytes = 2;
+    else
+        line_bytes = 4;
+    size_t to_bytes;
+    if (header[2] == 0)
+        to_bytes = 0;
+    else if (header[2] < 255)
+        to_bytes = 1;
+    else if (header[2] < 65535)
+        to_bytes = 2;
+    else
+        to_bytes = 4;
+    jl_string_t *cl = jl_alloc_string(sizeof(header) + line_bytes + nstmts * (line_bytes + to_bytes * 2));
+    // store header structure
+    memcpy(jl_string_data(cl), &header, sizeof(header));
+    // pack bytes
+    char *ptr = jl_string_data(cl) + sizeof(header);
+    uint8_t int8;
+    uint16_t int16;
+    uint32_t int32;
+    { // store firstline value
+        int8 = int16 = int32 = firstline > 0 ? firstline - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+    }
+    for (i = 0; i < nstmts; i++) {
+        memcpy(&codeloc, jl_array_data(codelocs,int32_t) + 3 * i, sizeof(codeloc));
+        int8 = int16 = int32 = codeloc.line > 0 ? codeloc.line - header[0] + 1 : 0;
+        switch (line_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += line_bytes;
+        int8 = int16 = int32 = codeloc.to;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+        int8 = int16 = int32 = codeloc.pc;
+        switch (to_bytes) {
+        case 0:
+            break;
+        case 1:
+            memcpy(ptr, &int8, 1);
+            break;
+        case 2:
+            memcpy(ptr, &int16, 2);
+            break;
+        case 4:
+            memcpy(ptr, &int32, 4);
+            break;
+        }
+        ptr += to_bytes;
+    }
+    return cl;
+}
+
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_string_t *cl, size_t nstmts) // Memory{UInt8} => Vector{Int32}
+{
+    assert(jl_is_string(cl));
+    int line_offset, line_bytes, to_bytes;
+    size_t nlocs = codelocs_parseheader(cl, &line_offset, &line_bytes, &to_bytes);
+    assert(nlocs == 0 || nlocs == nstmts);
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmts * 3);
+    size_t i;
+    for (i = 0; i < nlocs; i++) {
+        struct jl_codeloc_t codeloc = unpack_codeloc(cl, i + 1, line_offset, line_bytes, to_bytes);;
+        memcpy(jl_array_data(codelocs,int32_t) + i * 3, &codeloc, sizeof(codeloc));
+    }
+    if (nlocs == 0) {
+        memset(jl_array_data(codelocs,int32_t), 0, nstmts * sizeof(struct jl_codeloc_t));
+    }
+    return codelocs;
+}
+
 void jl_init_serializer(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -1112,12 +1607,14 @@ void jl_init_serializer(void)
                      jl_methtable_type, jl_typemap_level_type,
                      jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type,
                      jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type),
-                     jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type,
+                     jl_emptytuple_type, jl_array_uint8_type, jl_array_uint32_type, jl_code_info_type,
                      jl_typeofbottom_type, jl_typeofbottom_type->super,
                      jl_namedtuple_type, jl_array_int32_type,
                      jl_uint32_type, jl_uint64_type,
                      jl_type_type_mt, jl_nonfunction_mt,
                      jl_opaque_closure_type,
+                     jl_memory_any_type,
+                     jl_memory_uint8_type,
 
                      ct->ptls->root_task,
 
@@ -1134,7 +1631,8 @@ void jl_init_serializer(void)
     deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type;
     deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type;
     deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type;
-    deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type;
+    deser_tag[TAG_ARRAY1D] = (jl_value_t*)jl_array_type;
+    deser_tag[TAG_MEMORYT] = (jl_value_t*)jl_genericmemory_type;
     deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type;
     deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type;
     deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type;
@@ -1148,7 +1646,6 @@ void jl_init_serializer(void)
     deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type;
     deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type;
     deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type;
-    deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type;
     deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type;
     deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type;
     deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type;
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index ae4a2ed02fb7e..0acb7beaca9ab 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -3,7 +3,7 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include <stdint.h>
-#include <sstream>
+#include <string>
 
 #include "llvm/IR/Mangler.h"
 #include <llvm/ADT/Statistic.h>
@@ -14,6 +14,15 @@
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
 #include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
 #include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
+#if JL_LLVM_VERSION >= 180000
+#include <llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h>
+#include <llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h>
+#endif
+#if JL_LLVM_VERSION >= 190000
+#include <llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h>
+#endif
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
@@ -28,31 +37,29 @@
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
 #include <llvm/MC/TargetRegistry.h>
 #include <llvm/Target/TargetOptions.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Object/SymbolSize.h>
 
 using namespace llvm;
 
-#include "llvm-codegen-shared.h"
 #include "jitlayers.h"
 #include "julia_assert.h"
 #include "processor.h"
 
+#if JL_LLVM_VERSION >= 180000
+# include <llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h>
+#else
 # include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
+#endif
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
-# if JL_LLVM_VERSION >= 150000
 # include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
-# endif
 # include <llvm/ExecutionEngine/SectionMemoryManager.h>
 
 #define DEBUG_TYPE "julia_jitlayers"
 
 STATISTIC(LinkedGlobals, "Number of globals linked");
-STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly");
-STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue");
-STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled");
 STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled");
 STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled");
 STATISTIC(ModulesAdded, "Number of modules added to the JIT");
@@ -116,6 +123,15 @@ static void *getTLSAddress(void *control)
 }
 #endif
 
+#ifdef _OS_OPENBSD_
+extern "C" {
+    __int128 __divti3(__int128, __int128);
+    __int128 __modti3(__int128, __int128);
+    unsigned __int128 __udivti3(unsigned __int128, unsigned __int128);
+    unsigned __int128 __umodti3(unsigned __int128, unsigned __int128);
+}
+#endif
+
 // Snooping on which functions are being compiled, and how long it takes
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_compiles_impl(void *s)
@@ -128,24 +144,19 @@ void jl_dump_llvm_opt_impl(void *s)
     **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
 }
 
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        std::vector<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
 static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT;
-static uint64_t getAddressForFunction(StringRef fname) JL_NOTSAFEPOINT;
 
 void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
 {
     ++LinkedGlobals;
     Constant *P = literal_static_pointer_val(addr, GV->getValueType());
     GV->setInitializer(P);
+    GV->setDSOLocal(true);
     if (jl_options.image_codegen) {
         // If we are forcing imaging mode codegen for debugging,
         // emit external non-const symbol to avoid LLVM optimizing the code
         // similar to non-imaging mode.
-        GV->setLinkage(GlobalValue::ExternalLinkage);
+        assert(GV->hasExternalLinkage());
     }
     else {
         GV->setConstant(true);
@@ -155,6 +166,53 @@ void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
     }
 }
 
+// convert local roots into global roots, if they are needed
+static void jl_optimize_roots(jl_codegen_params_t &params, jl_method_instance_t *mi, Module &M)
+{
+    JL_GC_PROMISE_ROOTED(params.temporary_roots); // rooted by caller
+    if (jl_array_dim0(params.temporary_roots) == 0)
+        return;
+    jl_method_t *m = mi->def.method;
+    if (jl_is_method(m))
+        // the method might have a root for this already; use it if so
+        JL_LOCK(&m->writelock);
+    for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) {
+        jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i);
+        auto ref = params.global_targets.find((void*)val);
+        if (ref == params.global_targets.end())
+            continue;
+        auto get_global_root = [val, m]() {
+            if (jl_is_globally_rooted(val))
+                return val;
+            if (jl_is_method(m) && m->roots) {
+                size_t j, len = jl_array_dim0(m->roots);
+                for (j = 0; j < len; j++) {
+                    jl_value_t *mval = jl_array_ptr_ref(m->roots, j);
+                    if (jl_egal(mval, val)) {
+                        return mval;
+                    }
+                }
+            }
+            return jl_as_global_root(val, 1);
+        };
+        jl_value_t *mval = get_global_root();
+        if (mval != val) {
+            GlobalVariable *GV = ref->second;
+            params.global_targets.erase(ref);
+            auto mref = params.global_targets.find((void*)mval);
+            if (mref != params.global_targets.end()) {
+                GV->replaceAllUsesWith(mref->second);
+                GV->eraseFromParent();
+            }
+            else {
+                params.global_targets[(void*)mval] = GV;
+            }
+        }
+    }
+    if (jl_is_method(m))
+        JL_UNLOCK(&m->writelock);
+}
+
 void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
 {
     for (auto &global : globals) {
@@ -162,175 +220,557 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
     }
 }
 
-// this generates llvm code for the lambda info
-// and adds the result to the jitlayers
-// (and the shadow module),
-// and generates code for it
-static jl_callptr_t _jl_compile_codeinst(
-        jl_code_instance_t *codeinst,
-        jl_code_info_t *src,
-        size_t world,
-        orc::ThreadSafeContext context,
-        bool is_recompile)
+  // lock for places where only single threaded behavior is implemented, so we need GC support
+static jl_mutex_t jitlock;
+  // locks for adding external code to the JIT atomically
+static std::mutex extern_c_lock;
+  // locks and barriers for this state
+static std::mutex engine_lock;
+static std::condition_variable engine_wait;
+static int threads_in_compiler_phase;
+  // the TSM for each codeinst
+static SmallVector<orc::ThreadSafeModule,0> sharedmodules;
+static DenseMap<jl_code_instance_t*, orc::ThreadSafeModule> emittedmodules;
+  // the invoke and specsig function names in the JIT
+static DenseMap<jl_code_instance_t*, jl_llvm_functions_t> invokenames;
+  // everything that any thread wants to compile right now
+static DenseSet<jl_code_instance_t*> compileready;
+  // everything that any thread has compiled recently
+static DenseSet<jl_code_instance_t*> linkready;
+  // a map from a codeinst to the outgoing edges needed before linking it
+static DenseMap<jl_code_instance_t*, SmallVector<jl_code_instance_t*,0>> complete_graph;
+  // the state for each codeinst and the number of unresolved edges (we don't
+  // really need this once JITLink is available everywhere, since every module
+  // is automatically complete, and we can emit any required fixups later as a
+  // separate module)
+static DenseMap<jl_code_instance_t*, std::tuple<jl_codegen_params_t, int>> incompletemodules;
+  // the set of incoming unresolved edges resolved by a codeinstance
+static DenseMap<jl_code_instance_t*, SmallVector<jl_code_instance_t*,0>> incomplete_rgraph;
+
+// Lock hierarchy here:
+//   jitlock is outermost, can contain others and allows GC
+//   engine_lock is next
+//   ThreadSafeContext locks are next, they should not be nested (unless engine_lock is also held, but this may make TSAN sad anyways)
+//   extern_c_lock is next
+//   jl_ExecutionEngine internal locks are exclusive to this list, since OrcJIT promises to never hold a lock over a materialization unit:
+//        construct a query object from a query set and query handler
+//        lock the session
+//        lodge query against requested symbols, collect required materializers (if any)
+//        unlock the session
+//        dispatch materializers (if any)
+//     However, this guarantee relies on Julia releasing all TSC locks before causing any materialization units to be dispatched
+//     as materialization may need to acquire TSC locks.
+
+
+static void finish_params(Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT
 {
-    // caller must hold codegen_lock
-    // and have disabled finalizers
-    uint64_t start_time = 0;
-    bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream();
-    if (timed)
-        start_time = jl_hrtime();
-
-    assert(jl_is_code_instance(codeinst));
-    assert(codeinst->min_world <= world && (codeinst->max_world >= world || codeinst->max_world == 0) &&
-        "invalid world for method-instance");
-
-    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
-#ifdef USE_TRACY
-    if (is_recompile) {
-        TracyCZoneColor(JL_TIMING_DEFAULT_BLOCK->tracy_ctx, 0xFFA500);
+    if (params._shared_module) {
+        sharedmodules.push_back(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
     }
+
+    // In imaging mode, we can't inline global variable initializers in order to preserve
+    // the fiction that we don't know what loads from the global will return. Thus, we
+    // need to emit a separate module for the globals before any functions are compiled,
+    // to ensure that the globals are defined when they are compiled.
+    if (jl_options.image_codegen) {
+        if (!params.global_targets.empty()) {
+            void **globalslots = new void*[params.global_targets.size()];
+            void **slot = globalslots;
+            for (auto &global : params.global_targets) {
+                auto GV = global.second;
+                *slot = global.first;
+                jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot);
+                slot++;
+            }
+#ifdef __clang_analyzer__
+            static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it
 #endif
-    jl_callptr_t fptr = NULL;
-    // emit the code in LLVM IR form
-    jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
-    params.cache = true;
-    params.world = world;
-    params.imaging = imaging_default();
-    params.debug_level = jl_options.debug_level;
-    jl_workqueue_t emitted;
-    {
-        orc::ThreadSafeModule result_m =
-            jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging, params.DL, params.TargetTriple);
-        jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
-        if (result_m)
-            emitted[codeinst] = {std::move(result_m), std::move(decls)};
-        {
-            auto temp_module = jl_create_llvm_module(name_from_method_instance(codeinst->def), params.getContext(), params.imaging);
-            jl_compile_workqueue(emitted, *temp_module, params, CompilationPolicy::Default);
         }
+    }
+    else {
+        StringMap<void*> NewGlobals;
+        for (auto &global : params.global_targets) {
+            NewGlobals[global.second->getName()] = global.first;
+        }
+        for (auto &GV : M->globals()) {
+            auto InitValue = NewGlobals.find(GV.getName());
+            if (InitValue != NewGlobals.end()) {
+                jl_link_global(&GV, InitValue->second);
+            }
+        }
+    }
+}
 
-        if (params._shared_module)
-            jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
-        if (!params.imaging) {
-            StringMap<orc::ThreadSafeModule*> NewExports;
-            StringMap<void*> NewGlobals;
-            for (auto &global : params.globals) {
-                NewGlobals[global.second->getName()] = global.first;
+static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t &params, bool forceall=false) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    jl_task_t *ct = jl_current_task;
+    decltype(params.workqueue) edges;
+    std::swap(params.workqueue, edges);
+    for (auto &it : edges) {
+        jl_code_instance_t *codeinst = it.first;
+        JL_GC_PROMISE_ROOTED(codeinst);
+        auto &proto = it.second;
+        // try to emit code for this item from the workqueue
+        StringRef invokeName = "";
+        StringRef preal_decl = "";
+        bool preal_specsig = false;
+        jl_callptr_t invoke = nullptr;
+        bool isedge = false;
+        assert(params.cache);
+        // Checking the cache here is merely an optimization and not strictly required
+        // But it must be consistent with the following invokenames lookup, which is protected by the engine_lock
+        uint8_t specsigflags;
+        void *fptr;
+        void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT; // declare it is not a safepoint (or deadlock) in this file due to 0 parameter
+        jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+        //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr)
+        if (invoke == jl_fptr_args_addr) {
+            preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+        }
+        else if (specsigflags & 0b1) {
+            preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+            preal_specsig = true;
+        }
+        bool force = forceall || invoke != nullptr;
+        if (preal_decl.empty()) {
+            auto it = invokenames.find(codeinst);
+            if (it != invokenames.end()) {
+                auto &decls = it->second;
+                invokeName = decls.functionObject;
+                if (decls.functionObject == "jl_fptr_args") {
+                    preal_decl = decls.specFunctionObject;
+                    isedge = true;
+                }
+                else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                    preal_decl = decls.specFunctionObject;
+                    preal_specsig = true;
+                    isedge = true;
+                }
+                force = true;
             }
-            for (auto &def : emitted) {
-                orc::ThreadSafeModule &TSM = std::get<0>(def.second);
-                //The underlying context object is still locked because params is not destroyed yet
-                auto M = TSM.getModuleUnlocked();
-                for (auto &F : M->global_objects()) {
-                    if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                        NewExports[F.getName()] = &TSM;
-                    }
+        }
+        if (preal_decl.empty()) {
+            // there may be an equivalent method already compiled (or at least registered with the JIT to compile), in which case we should be using that instead
+            jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 1);
+            if ((jl_value_t*)compiled_ci != jl_nothing) {
+                codeinst = compiled_ci;
+                uint8_t specsigflags;
+                void *fptr;
+                jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
+                //if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr)
+                if (invoke == jl_fptr_args_addr) {
+                    preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
                 }
-                // Let's link all globals here also (for now)
-                for (auto &GV : M->globals()) {
-                    auto InitValue = NewGlobals.find(GV.getName());
-                    if (InitValue != NewGlobals.end()) {
-                        jl_link_global(&GV, InitValue->second);
+                else if (specsigflags & 0b1) {
+                    preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, invoke, codeinst);
+                    preal_specsig = true;
+                }
+                if (preal_decl.empty()) {
+                    auto it = invokenames.find(codeinst);
+                    if (it != invokenames.end()) {
+                        auto &decls = it->second;
+                        invokeName = decls.functionObject;
+                        if (decls.functionObject == "jl_fptr_args") {
+                            preal_decl = decls.specFunctionObject;
+                            isedge = true;
+                        }
+                        else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call") {
+                            preal_decl = decls.specFunctionObject;
+                            preal_specsig = true;
+                            isedge = true;
+                        }
                     }
                 }
             }
-            DenseMap<orc::ThreadSafeModule*, int> Queued;
-            std::vector<orc::ThreadSafeModule*> Stack;
-            for (auto &def : emitted) {
-                // Add the results to the execution engine now
-                orc::ThreadSafeModule &M = std::get<0>(def.second);
-                jl_add_to_ee(M, NewExports, Queued, Stack);
-                assert(Queued.empty() && Stack.empty() && !M);
+        }
+        if (!preal_decl.empty() || force) {
+            // if we have a prototype emitted, compare it to what we emitted earlier
+            Module *mod = proto.decl->getParent();
+            assert(proto.decl->isDeclaration());
+            Function *pinvoke = nullptr;
+            if (preal_decl.empty()) {
+                if (invoke != nullptr && invokeName.empty()) {
+                    assert(invoke != jl_fptr_args_addr);
+                    if (invoke == jl_fptr_sparam_addr)
+                        invokeName = "jl_fptr_sparam";
+                    else if (invoke == jl_f_opaque_closure_call_addr)
+                        invokeName = "jl_f_opaque_closure_call";
+                    else
+                        invokeName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                }
+                pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
+                if (!proto.specsig)
+                    proto.decl->replaceAllUsesWith(pinvoke);
+                isedge = false;
+            }
+            if (proto.specsig && !preal_specsig) {
+                // get or build an fptr1 that can invoke codeinst
+                if (pinvoke == nullptr)
+                    pinvoke = get_or_emit_fptr1(preal_decl, mod);
+                // emit specsig-to-(jl)invoke conversion
+                proto.decl->setLinkage(GlobalVariable::InternalLinkage);
+                //protodecl->setAlwaysInline();
+                jl_init_function(proto.decl, params.TargetTriple);
+                // TODO: maybe this can be cached in codeinst->specfptr?
+                int8_t gc_state = jl_gc_unsafe_enter(ct->ptls); // codegen may contain safepoints (such as jl_subtype calls)
+                jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+                size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
+                bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+                emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke);
+                jl_gc_unsafe_leave(ct->ptls, gc_state);
+                preal_decl = ""; // no need to fixup the name
+            }
+            if (!preal_decl.empty()) {
+                // merge and/or rename this prototype to the real function
+                if (Value *specfun = mod->getNamedValue(preal_decl)) {
+                    if (proto.decl != specfun)
+                        proto.decl->replaceAllUsesWith(specfun);
+                }
+                else {
+                    proto.decl->setName(preal_decl);
+                }
+            }
+            if (proto.oc) { // additionally, if we are dealing with an OC constructor, then we might also need to fix up the fptr1 reference too
+                assert(proto.specsig);
+                StringRef ocinvokeDecl = invokeName;
+                if (invoke != nullptr && ocinvokeDecl.empty()) {
+                    // check for some special tokens used by opaque_closure.c and convert those to their real functions
+                    assert(invoke != jl_fptr_args_addr);
+                    assert(invoke != jl_fptr_sparam_addr);
+                    if (invoke == jl_fptr_interpret_call_addr)
+                        ocinvokeDecl = "jl_fptr_interpret_call";
+                    else if (invoke == jl_fptr_const_return_addr)
+                        ocinvokeDecl = "jl_fptr_const_return";
+                    else if (invoke == jl_f_opaque_closure_call_addr)
+                        ocinvokeDecl = "jl_f_opaque_closure_call";
+                    //else if (invoke == jl_interpret_opaque_closure_addr)
+                    else
+                        ocinvokeDecl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, invoke, codeinst);
+                }
+                // if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
+                // XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
+                if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return") {
+                    if (pinvoke == nullptr)
+                        ocinvokeDecl = get_or_emit_fptr1(preal_decl, mod)->getName();
+                    else
+                        ocinvokeDecl = pinvoke->getName();
+                }
+                assert(!ocinvokeDecl.empty());
+                assert(ocinvokeDecl != "jl_fptr_args");
+                assert(ocinvokeDecl != "jl_fptr_sparam");
+                // merge and/or rename this prototype to the real function
+                if (Value *specfun = mod->getNamedValue(ocinvokeDecl)) {
+                    if (proto.oc != specfun)
+                        proto.oc->replaceAllUsesWith(specfun);
+                }
+                else {
+                    proto.oc->setName(ocinvokeDecl);
+                }
             }
-        } else {
-            jl_jit_globals(params.globals);
-            auto main = std::move(emitted[codeinst].first);
-            for (auto &def : emitted) {
-                if (def.first != codeinst) {
-                    jl_merge_module(main, std::move(def.second.first));
+        }
+        else {
+            isedge = true;
+            params.workqueue.push_back(it);
+            incomplete_rgraph[codeinst].push_back(callee);
+        }
+        if (isedge)
+            complete_graph[callee].push_back(codeinst);
+    }
+    return params.workqueue.size();
+}
+
+// move codeinst (and deps) from incompletemodules to emitted modules
+// and populate compileready from complete_graph
+static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    SmallVector<jl_code_instance_t*> workqueue;
+    workqueue.push_back(codeinst);
+    while (!workqueue.empty()) {
+        codeinst = workqueue.pop_back_val();
+        if (!invokenames.count(codeinst)) {
+            // this means it should be compiled already while the callee was in stasis
+            assert(jl_is_compiled_codeinst(codeinst));
+            continue;
+        }
+        // if this was incomplete, force completion now of it
+        auto it = incompletemodules.find(codeinst);
+        if (it != incompletemodules.end()) {
+            int waiting = 0;
+            auto &edges = complete_graph[codeinst];
+            auto edges_end = std::remove_if(edges.begin(), edges.end(), [&waiting, codeinst] (jl_code_instance_t *edge) JL_NOTSAFEPOINT -> bool {
+                auto &redges = incomplete_rgraph[edge];
+                // waiting += std::erase(redges, codeinst);
+                auto redges_end = std::remove(redges.begin(), redges.end(), codeinst);
+                if (redges_end != redges.end()) {
+                    waiting += redges.end() - redges_end;
+                    redges.erase(redges_end, redges.end());
+                    assert(!invokenames.count(edge));
                 }
+                return !invokenames.count(edge);
+            });
+            edges.erase(edges_end, edges.end());
+            assert(waiting == std::get<1>(it->second));
+            std::get<1>(it->second) = 0;
+            auto &params = std::get<0>(it->second);
+            params.tsctx_lock = params.tsctx.getLock();
+            waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint
+            assert(!waiting); (void)waiting;
+            Module *M = emittedmodules[codeinst].getModuleUnlocked();
+            finish_params(M, params);
+            incompletemodules.erase(it);
+        }
+        // and then indicate this should be compiled now
+        if (!linkready.count(codeinst) && compileready.insert(codeinst).second) {
+            auto edges = complete_graph.find(codeinst);
+            if (edges != complete_graph.end()) {
+                workqueue.append(edges->second);
             }
-            jl_ExecutionEngine->addModule(std::move(main));
         }
-        ++CompiledCodeinsts;
-        MaxWorkqueueSize.updateMax(emitted.size());
-        IndirectCodeinsts += emitted.size() - 1;
-    }
-
-    size_t i = 0;
-    for (auto &def : emitted) {
-        jl_code_instance_t *this_code = def.first;
-        if (i < jl_timing_print_limit)
-            jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK);
-
-        jl_llvm_functions_t decls = std::get<1>(def.second);
-        jl_callptr_t addr;
-        bool isspecsig = false;
-        if (decls.functionObject == "jl_fptr_args") {
-            addr = jl_fptr_args_addr;
+    }
+}
+
+// notify any other pending work that this edge now has code defined
+static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
+{
+    auto notify = incomplete_rgraph.find(edge);
+    if (notify == incomplete_rgraph.end())
+        return;
+    auto redges = std::move(notify->second);
+    incomplete_rgraph.erase(notify);
+    for (size_t i = 0; i < redges.size(); i++) {
+        jl_code_instance_t *callee = redges[i];
+        auto it = incompletemodules.find(callee);
+        assert(it != incompletemodules.end());
+        if (--std::get<1>(it->second) == 0) {
+            auto &params = std::get<0>(it->second);
+            params.tsctx_lock = params.tsctx.getLock();
+            assert(callee == it->first);
+            int waiting = jl_analyze_workqueue(callee, params); // may safepoint
+            assert(!waiting); (void)waiting;
+            Module *M = emittedmodules[callee].getModuleUnlocked();
+            finish_params(M, params);
+            incompletemodules.erase(it);
         }
-        else if (decls.functionObject == "jl_fptr_sparam") {
-            addr = jl_fptr_sparam_addr;
+    }
+}
+
+
+// set the invoke field for codeinst (and all deps, and assist with other pending work from other threads) now
+static void jl_compile_codeinst_now(jl_code_instance_t *codeinst)
+{
+    jl_unique_gcsafe_lock lock(engine_lock);
+    if (!invokenames.count(codeinst))
+        return;
+    threads_in_compiler_phase++;
+    prepare_compile(codeinst); // may safepoint
+    while (1) {
+        // TODO: split up this work by ThreadSafeContext, so two threads don't need to get the same locks and stall
+        if (!sharedmodules.empty()) {
+            auto TSM = sharedmodules.pop_back_val();
+            lock.native.unlock();
+            {
+                auto Lock = TSM.getContext().getLock();
+                jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
+            }
+            jl_ExecutionEngine->addModule(std::move(TSM));
+            lock.native.lock();
         }
-        else if (decls.functionObject == "jl_f_opaque_closure_call") {
-            addr = jl_f_opaque_closure_call_addr;
+        else if (!compileready.empty()) {
+            // move a function from compileready to linkready then compile it
+            auto compilenext = compileready.begin();
+            codeinst = *compilenext;
+            compileready.erase(compilenext);
+            auto TSMref = emittedmodules.find(codeinst);
+            assert(TSMref != emittedmodules.end());
+            auto TSM = std::move(TSMref->second);
+            linkready.insert(codeinst);
+            emittedmodules.erase(TSMref);
+            lock.native.unlock();
+            uint64_t start_time = jl_hrtime();
+            {
+                auto Lock = TSM.getContext().getLock();
+                jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
+            }
+            jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint
+            // If logging of the compilation stream is enabled,
+            // then dump the method-instance specialization type to the stream
+            jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
+            if (jl_is_method(mi->def.method)) {
+                auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
+                if (stream) {
+                    uint64_t end_time = jl_hrtime();
+                    ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
+                    jl_static_show((JL_STREAM*)stream, mi->specTypes);
+                    ios_printf(stream, "\"\n");
+                }
+            }
+            lock.native.lock();
         }
         else {
-            addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
-            isspecsig = true;
+            break;
+        }
+    }
+    codeinst = nullptr;
+    // barrier until all threads have finished calling addModule
+    if (--threads_in_compiler_phase == 0) {
+        // the last thread out will finish linking everything
+        // then release all of the other threads
+        // move the function pointers out from invokenames to the codeinst
+
+        // batch compile job for all new functions
+        SmallVector<StringRef> NewDefs;
+        for (auto &this_code : linkready) {
+            auto it = invokenames.find(this_code);
+            assert(it != invokenames.end());
+            jl_llvm_functions_t &decls = it->second;
+            assert(!decls.functionObject.empty());
+            if (decls.functionObject != "jl_fptr_args" &&
+                decls.functionObject != "jl_fptr_sparam" &&
+                decls.functionObject != "jl_f_opaque_closure_call")
+                NewDefs.push_back(decls.functionObject);
+            if (!decls.specFunctionObject.empty())
+                NewDefs.push_back(decls.specFunctionObject);
         }
-        if (!decls.specFunctionObject.empty()) {
-            void *prev_specptr = NULL;
-            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
-            if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
-                // only set specsig and invoke if we were the first to set specptr
-                jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
-                // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
-                // either assumes that specptr was null, doesn't care about specptr,
-                // or will wait until specsigflags has 0b10 set before reloading invoke
-                jl_atomic_store_release(&this_code->invoke, addr);
-                jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
-            } else {
-                //someone else beat us, don't commit any results
-                while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
-                    jl_cpu_pause();
+        auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs);
+
+        size_t nextaddr = 0;
+        for (auto &this_code : linkready) {
+            auto it = invokenames.find(this_code);
+            assert(it != invokenames.end());
+            jl_llvm_functions_t &decls = it->second;
+            jl_callptr_t addr;
+            bool isspecsig = false;
+            if (decls.functionObject == "jl_fptr_args") {
+                addr = jl_fptr_args_addr;
+            }
+            else if (decls.functionObject == "jl_fptr_sparam") {
+                addr = jl_fptr_sparam_addr;
+            }
+            else if (decls.functionObject == "jl_f_opaque_closure_call") {
+                addr = jl_f_opaque_closure_call_addr;
+            }
+            else {
+                assert(NewDefs[nextaddr] == decls.functionObject);
+                addr = (jl_callptr_t)Addrs[nextaddr++];
+                assert(addr);
+                isspecsig = true;
+            }
+            if (!decls.specFunctionObject.empty()) {
+                void *prev_specptr = nullptr;
+                assert(NewDefs[nextaddr] == decls.specFunctionObject);
+                void *spec = (void*)Addrs[nextaddr++];
+                assert(spec);
+                if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
+                    // only set specsig and invoke if we were the first to set specptr
+                    jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
+                    // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
+                    // either assumes that specptr was null, doesn't care about specptr,
+                    // or will wait until specsigflags has 0b10 set before reloading invoke
+                    jl_atomic_store_release(&this_code->invoke, addr);
+                    jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
+                }
+                else {
+                    //someone else beat us, don't commit any results
+                    while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
+                        jl_cpu_pause();
+                    }
+                    addr = jl_atomic_load_relaxed(&this_code->invoke);
                 }
-                addr = jl_atomic_load_relaxed(&this_code->invoke);
             }
-        } else {
-            jl_callptr_t prev_invoke = NULL;
-            if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
-                addr = prev_invoke;
-                //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
-                //known lesser function)
+            else {
+                jl_callptr_t prev_invoke = nullptr;
+                // Allow replacing addr if it is either nullptr or our special waiting placeholder.
+                if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                    if (prev_invoke == jl_fptr_wait_for_compiled_addr && !jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                        addr = prev_invoke;
+                        //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
+                        //known lesser function)
+                    }
+                }
             }
+            invokenames.erase(it);
+            complete_graph.erase(this_code);
         }
-        if (this_code == codeinst)
-            fptr = addr;
-        i++;
+        linkready.clear();
+        engine_wait.notify_all();
     }
-    if (i > jl_timing_print_limit)
-        jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10);
+    else while (threads_in_compiler_phase) {
+        lock.wait(engine_wait);
+    }
+}
 
-    uint64_t end_time = 0;
-    if (timed)
-        end_time = jl_hrtime();
-
-    // If logging of the compilation stream is enabled,
-    // then dump the method-instance specialization type to the stream
-    jl_method_instance_t *mi = codeinst->def;
-    if (jl_is_method(mi->def.method)) {
-        auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
-        if (stream) {
-            ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
-            jl_static_show((JL_STREAM*)stream, mi->specTypes);
-            ios_printf(stream, "\"\n");
-        }
+void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
+
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_emit_codeinst_to_jit_impl(
+        jl_code_instance_t *codeinst,
+        jl_code_info_t *src)
+{
+    if (jl_is_compiled_codeinst(codeinst))
+        return;
+    { // lock scope
+        jl_unique_gcsafe_lock lock(engine_lock);
+        if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst))
+            return;
     }
-    return fptr;
+    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
+    // emit the code in LLVM IR form to the new context
+    jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
+    params.getContext().setDiscardValueNames(true);
+    params.cache = true;
+    params.imaging_mode = 0;
+    orc::ThreadSafeModule result_m =
+        jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple);
+    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH1(&params.temporary_roots);
+    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints
+    if (!result_m) {
+        JL_GC_POP();
+        return;
+    }
+    jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints
+    params.temporary_roots = nullptr;
+    JL_GC_POP();
+    { // drop lock before acquiring engine_lock
+        auto release = std::move(params.tsctx_lock);
+    }
+    jl_unique_gcsafe_lock lock(engine_lock);
+    if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst))
+        return; // destroy everything
+    const std::string &specf = decls.specFunctionObject;
+    const std::string &f = decls.functionObject;
+    assert(!f.empty());
+    // Prepare debug info to receive this function
+    // record that this function name came from this linfo,
+    // so we can build a reverse mapping for debug-info.
+    bool toplevel = !jl_is_method(jl_get_ci_mi(codeinst)->def.method);
+    if (!toplevel) {
+        // don't remember toplevel thunks because
+        // they may not be rooted in the gc for the life of the program,
+        // and the runtime doesn't notify us when the code becomes unreachable :(
+        if (!specf.empty())
+            jl_add_code_in_flight(specf, codeinst, params.DL);
+        if (f != "jl_fptr_args" && f != "jl_fptr_sparam")
+            jl_add_code_in_flight(f, codeinst, params.DL);
+    }
+    jl_callptr_t expected = NULL;
+    jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, jl_fptr_wait_for_compiled_addr);
+    invokenames[codeinst] = std::move(decls);
+    complete_emit(codeinst);
+    params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock
+    int waiting = jl_analyze_workqueue(codeinst, params);
+    if (waiting) {
+        auto release = std::move(params.tsctx_lock); // unlock again before moving from it
+        incompletemodules.try_emplace(codeinst, std::move(params), waiting);
+    }
+    else {
+        finish_params(result_m.getModuleUnlocked(), params);
+    }
+    emittedmodules[codeinst] = std::move(result_m);
 }
 
-const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
+
+const char *jl_generate_ccallable(Module *llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 // compile a C-callable alias
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -344,43 +784,73 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
+    jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
+    DataLayout DL = pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout();
+    Triple TargetTriple = pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple();
     orc::ThreadSafeContext ctx;
     auto into = unwrap(llvmmod);
-    jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
     orc::ThreadSafeModule backing;
+    bool success = true;
+    const char *name = "";
     if (into == NULL) {
-        if (!pparams) {
-            ctx = jl_ExecutionEngine->acquireContext();
-        }
-        backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
+        ctx = pparams ? pparams->tsctx : jl_ExecutionEngine->makeContext();
+        backing = jl_create_ts_module("cextern", ctx, DL, TargetTriple);
         into = &backing;
     }
-    JL_LOCK(&jl_codegen_lock);
-    auto target_info = into->withModuleDo([&](Module &M) {
-        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
-    });
-    jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second));
-    params.imaging = imaging_default();
-    params.debug_level = jl_options.debug_level;
-    if (pparams == NULL)
-        pparams = &params;
-    assert(pparams->tsctx.getContext() == into->getContext().getContext());
-    const char *name = jl_generate_ccallable(wrap(into), sysimg, declrt, sigt, *pparams);
-    bool success = true;
-    if (!sysimg) {
-        if (jl_ExecutionEngine->getGlobalValueAddress(name)) {
-            success = false;
+    { // params scope
+        jl_codegen_params_t params(into->getContext(), DL, TargetTriple);
+        if (pparams == NULL) {
+            params.cache = p == NULL;
+            params.imaging_mode = 0;
+            params.tsctx.getContext()->setDiscardValueNames(true);
+            pparams = &params;
+        }
+        Module &M = *into->getModuleUnlocked();
+        assert(pparams->tsctx.getContext() == &M.getContext());
+        name = jl_generate_ccallable(&M, sysimg, declrt, sigt, *pparams);
+        if (!sysimg && !p) {
+            { // drop lock to keep analyzer happy (since it doesn't know we have the only reference to it)
+                auto release = std::move(params.tsctx_lock);
+            }
+            { // lock scope
+                jl_unique_gcsafe_lock lock(extern_c_lock);
+                if (jl_ExecutionEngine->getGlobalValueAddress(name))
+                    success = false;
+            }
+            params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock
+            if (success && params.cache) {
+                size_t newest_world = jl_atomic_load_acquire(&jl_world_counter);
+                for (auto &it : params.workqueue) { // really just zero or one, and just the ABI not the rest of the metadata
+                    jl_code_instance_t *codeinst = it.first;
+                    JL_GC_PROMISE_ROOTED(codeinst);
+                    jl_code_instance_t *newest_ci = jl_type_infer(jl_get_ci_mi(codeinst), newest_world, SOURCE_MODE_ABI);
+                    if (newest_ci) {
+                        if (jl_egal(codeinst->rettype, newest_ci->rettype))
+                            it.first = codeinst;
+                        jl_compile_codeinst_now(newest_ci);
+                    }
+                }
+                jl_analyze_workqueue(nullptr, params, true);
+                assert(params.workqueue.empty());
+                finish_params(&M, params);
+            }
         }
-        if (success && p == NULL) {
-            jl_jit_globals(params.globals);
-            assert(params.workqueue.empty());
-            if (params._shared_module)
-                jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
+        pparams = nullptr;
+    }
+    if (!sysimg && success && llvmmod == NULL) {
+        { // lock scope
+            jl_unique_gcsafe_lock lock(extern_c_lock);
+            if (!jl_ExecutionEngine->getGlobalValueAddress(name)) {
+                {
+                    auto Lock = backing.getContext().getLock();
+                    jl_ExecutionEngine->optimizeDLSyms(*backing.getModuleUnlocked()); // safepoint
+                }
+                jl_ExecutionEngine->addModule(std::move(backing));
+                success = jl_ExecutionEngine->getGlobalValueAddress(name);
+                assert(success);
+            }
         }
-        if (success && llvmmod == NULL)
-            jl_ExecutionEngine->addModule(std::move(*into));
     }
-    JL_UNLOCK(&jl_codegen_lock);
     if (timed) {
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
@@ -388,9 +858,6 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
         }
         ct->reentrant_timing &= ~1ull;
     }
-    if (ctx.getContext()) {
-        jl_ExecutionEngine->releaseContext(std::move(ctx));
-    }
     return success;
 }
 
@@ -405,7 +872,7 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
         jl_type_error("@ccallable", (jl_value_t*)jl_anytuple_type_type, (jl_value_t*)sigt);
     // check that f is a guaranteed singleton type
     jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
-    if (!jl_is_datatype(ft) || ft->instance == NULL)
+    if (!jl_is_datatype(ft) || !jl_is_datatype_singleton(ft))
         jl_error("@ccallable: function object must be a singleton");
 
     // compute / validate return type
@@ -437,93 +904,18 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
         jl_error("@ccallable was already defined for this method name");
 }
 
-// this compiles li and emits fptr
 extern "C" JL_DLLEXPORT_CODEGEN
-jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+int jl_compile_codeinst_impl(jl_code_instance_t *ci)
 {
-    auto ct = jl_current_task;
-    bool timed = (ct->reentrant_timing & 1) == 0;
-    if (timed)
-        ct->reentrant_timing |= 1;
-    uint64_t compiler_start_time = 0;
-    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-    bool is_recompile = false;
-    if (measure_compile_time_enabled)
-        compiler_start_time = jl_hrtime();
-    // if we don't have any decls already, try to generate it now
-    jl_code_info_t *src = NULL;
-    jl_code_instance_t *codeinst = NULL;
-    JL_GC_PUSH2(&src, &codeinst);
-    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-    jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
-    if (ci != jl_nothing)
-        codeinst = (jl_code_instance_t*)ci;
-    if (codeinst) {
-        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
-        if ((jl_value_t*)src == jl_nothing)
-            src = NULL;
-        else if (jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-    }
-    else {
-        // identify whether this is an invalidated method that is being recompiled
-        is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
-    }
-    if (src == NULL && jl_is_method(mi->def.method) &&
-             jl_symbol_name(mi->def.method->name)[0] != '@') {
-        if (mi->def.method->source != jl_nothing) {
-            // If the caller didn't provide the source and IR is available,
-            // see if it is inferred, or try to infer it for ourself.
-            // (but don't bother with typeinf on macros or toplevel thunks)
-            src = jl_type_infer(mi, world, 0);
-        }
-    }
-    jl_code_instance_t *compiled = jl_method_compiled(mi, world);
-    if (compiled) {
-        codeinst = compiled;
-    }
-    else if (src && jl_is_code_info(src)) {
-        if (!codeinst) {
-            codeinst = jl_get_method_inferred(mi, src->rettype, src->min_world, src->max_world);
-            if (src->inferred) {
-                jl_value_t *null = nullptr;
-                jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
-            }
-        }
+    int newly_compiled = 0;
+    if (!jl_is_compiled_codeinst(ci)) {
         ++SpecFPtrCount;
-        _jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), is_recompile);
-        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
-            codeinst = NULL;
-    }
-    else {
-        codeinst = NULL;
-    }
-    JL_UNLOCK(&jl_codegen_lock);
-    if (timed) {
-        if (measure_compile_time_enabled) {
-            uint64_t t_comp = jl_hrtime() - compiler_start_time;
-            if (is_recompile) {
-                jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
-            }
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
-        }
-        ct->reentrant_timing &= ~1ull;
+        uint64_t start = jl_typeinf_timing_begin();
+        jl_compile_codeinst_now(ci);
+        jl_typeinf_timing_end(start, 0);
+        newly_compiled = 1;
     }
-    JL_GC_POP();
-    return codeinst;
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void jl_generate_fptr_for_oc_wrapper_impl(jl_code_instance_t *oc_wrap)
-{
-    if (jl_atomic_load_relaxed(&oc_wrap->invoke) != NULL) {
-        return;
-    }
-    JL_LOCK(&jl_codegen_lock);
-    if (jl_atomic_load_relaxed(&oc_wrap->invoke) == NULL) {
-        _jl_compile_codeinst(oc_wrap, NULL, 1, *jl_ExecutionEngine->getContext(), 0);
-    }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    return newly_compiled;
 }
 
 extern "C" JL_DLLEXPORT_CODEGEN
@@ -540,31 +932,44 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
-    JL_LOCK(&jl_codegen_lock);
-    if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
-        jl_code_info_t *src = NULL;
-        JL_GC_PUSH1(&src);
-        jl_method_t *def = unspec->def->def.method;
-        if (jl_is_method(def)) {
-            src = (jl_code_info_t*)def->source;
-            if (src && (jl_value_t*)src != jl_nothing)
-                src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
-        }
-        else {
-            src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred);
-            assert(src);
-        }
-        if (src) {
+    jl_code_info_t *src = NULL;
+    JL_GC_PUSH1(&src);
+    jl_method_t *def = jl_get_ci_mi(unspec)->def.method;
+    if (jl_is_method(def)) {
+        src = (jl_code_info_t*)def->source;
+        if (src && (jl_value_t*)src != jl_nothing)
+            src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
+    }
+    else {
+        jl_method_instance_t *mi = jl_get_ci_mi(unspec);
+        jl_code_instance_t *uninferred = jl_cached_uninferred(jl_atomic_load_relaxed(&mi->cache), 1);
+        assert(uninferred);
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred->inferred);
+        assert(src);
+    }
+    if (src) {
+        // TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion?
+        JL_LOCK(&jitlock); // TODO: use a better lock
+        if (!jl_is_compiled_codeinst(unspec)) {
             assert(jl_is_code_info(src));
             ++UnspecFPtrCount;
-            _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0);
+            jl_svec_t *edges = (jl_svec_t*)src->edges;
+            if (jl_is_svec(edges)) {
+                jl_atomic_store_release(&unspec->edges, edges); // n.b. this assumes the field was always empty svec(), which is not entirely true
+                jl_gc_wb(unspec, edges);
+            }
+            jl_debuginfo_t *debuginfo = src->debuginfo;
+            jl_atomic_store_release(&unspec->debuginfo, debuginfo); // n.b. this assumes the field was previously NULL, which is not entirely true
+            jl_gc_wb(unspec, debuginfo);
+            jl_emit_codeinst_to_jit(unspec, src);
+            jl_compile_codeinst_now(unspec);
         }
-        jl_callptr_t null = nullptr;
-        // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-        jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
-        JL_GC_POP();
+        JL_UNLOCK(&jitlock); // Might GC
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
+    JL_GC_POP();
+    jl_callptr_t null = nullptr;
+    // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
+    jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
     if (timed) {
         if (measure_compile_time_enabled) {
             auto end = jl_hrtime();
@@ -581,116 +986,86 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
-    jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
+    jl_code_instance_t *codeinst = jl_compile_method_internal(mi, world);
     if (codeinst) {
         uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
-        if (getwrapper)
-            return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-        if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-            // normally we prevent native code from being generated for these functions,
-            // (using sentinel value `1` instead)
-            // so create an exception here so we can print pretty our lies
-            auto ct = jl_current_task;
-            bool timed = (ct->reentrant_timing & 1) == 0;
-            if (timed)
-                ct->reentrant_timing |= 1;
-            uint64_t compiler_start_time = 0;
-            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
-            if (measure_compile_time_enabled)
-                compiler_start_time = jl_hrtime();
-            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-            specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-            if (specfptr == 0) {
-                jl_code_info_t *src = jl_type_infer(mi, world, 0);
-                JL_GC_PUSH1(&src);
-                jl_method_t *def = mi->def.method;
-                if (jl_is_method(def)) {
-                    if (!src) {
-                        // TODO: jl_code_for_staged can throw
-                        src = def->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)def->source;
-                    }
-                    if (src && (jl_value_t*)src != jl_nothing)
-                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
-                }
-                fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
-                specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                if (src && jl_is_code_info(src)) {
-                    if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), 0);
-                        specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
-                    }
-                }
-                JL_GC_POP();
-            }
-            JL_UNLOCK(&jl_codegen_lock);
-            if (timed) {
-                if (measure_compile_time_enabled) {
-                    auto end = jl_hrtime();
-                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-                }
-                ct->reentrant_timing &= ~1ull;
-            }
-        }
+        if (getwrapper || specfptr == 0)
+            specfptr = fptr;
         if (specfptr != 0)
             return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary);
     }
-
-    // whatever, that didn't work - use the assembler output instead
-    jl_llvmf_dump_t llvmf_dump;
-    jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams);
-    if (!llvmf_dump.F)
-        return jl_an_empty_string;
-    return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false);
+    return jl_an_empty_string;
 }
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel)
+{
+#ifdef DISABLE_OPT
+    return CodeGenOptLevel::None;
+#else
+    return optlevel == 0 ? CodeGenOptLevel::None :
+        optlevel == 1 ? CodeGenOptLevel::Less :
+        optlevel == 2 ? CodeGenOptLevel::Default :
+        CodeGenOptLevel::Aggressive;
+#endif
+}
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 {
 #ifdef DISABLE_OPT
     return CodeGenOpt::None;
 #else
-    return optlevel < 2 ? CodeGenOpt::None :
+    return optlevel == 0 ? CodeGenOpt::None :
+        optlevel == 1 ? CodeGenOpt::Less :
         optlevel == 2 ? CodeGenOpt::Default :
         CodeGenOpt::Aggressive;
 #endif
 }
+#endif
 
 static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 {
     return std::distance(F.begin(), F.end());
 }
 
-void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
-    ++ModulesOptimized;
-    size_t optlevel = SIZE_MAX;
-    TSM.withModuleDo([&](Module &M) {
-        if (jl_generating_output()) {
-            optlevel = 0;
-        }
-        else {
-            optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
-            size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
-            for (auto &F : M.functions()) {
-                if (!F.getBasicBlockList().empty()) {
+static constexpr size_t N_optlevels = 4;
+
+static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+    TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT {
+        size_t opt_level = std::max(static_cast<int>(jl_options.opt_level), 0);
+        do {
+            if (jl_generating_output()) {
+                opt_level = 0;
+                break;
+            }
+            size_t opt_level_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
+            for (auto &F : M) {
+                if (!F.isDeclaration()) {
                     Attribute attr = F.getFnAttribute("julia-optimization-level");
                     StringRef val = attr.getValueAsString();
                     if (val != "") {
                         size_t ol = (size_t)val[0] - '0';
-                        if (ol < optlevel)
-                            optlevel = ol;
+                        if (ol < opt_level)
+                            opt_level = ol;
                     }
                 }
             }
-            optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
-        }
+            if (opt_level < opt_level_min)
+                opt_level = opt_level_min;
+        } while (0);
+        // currently -O3 is max
+        opt_level = std::min(opt_level, N_optlevels - 1);
+        M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level);
     });
-    assert(optlevel != SIZE_MAX && "Failed to select a valid optimization level!");
-    this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
+    return TSM;
+}
+static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+    return selectOptLevel(std::move(TSM));
 }
 
 void jl_register_jit_object(const object::ObjectFile &debugObj,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress) JL_NOTSAFEPOINT;
+                            std::function<uint64_t(const StringRef &)> getLoadAddress);
 
 namespace {
 
@@ -705,30 +1080,27 @@ struct JITObjectInfo {
 class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
     std::mutex PluginMutex;
     std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
-    // Resources from distinct MaterializationResponsibilitys can get merged
-    // after emission, so we can have multiple debug objects per resource key.
-    std::map<ResourceKey, std::vector<std::unique_ptr<JITObjectInfo>>> RegisteredObjs;
 
 public:
     void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
                              jitlink::JITLinkContext &Ctx,
                              MemoryBufferRef InputObject) override
     {
-        // Keeping around a full copy of the input object file (and re-parsing it) is
-        // wasteful, but for now, this lets us reuse the existing debuginfo.cpp code.
-        // Should look into just directly pulling out all the information required in
-        // a JITLink pass and just keeping the required tables/DWARF sections around
-        // (perhaps using the LLVM DebuggerSupportPlugin as a reference).
         auto NewBuffer =
             MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName());
+        // Re-parsing the InputObject is wasteful, but for now, this lets us
+        // reuse the existing debuginfo.cpp code. Should look into just
+        // directly pulling out all the information required in a JITLink pass
+        // and just keeping the required tables/DWARF sections around (perhaps
+        // using the LLVM DebuggerSupportPlugin as a reference).
         auto NewObj =
             cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
 
         {
             std::lock_guard<std::mutex> lock(PluginMutex);
             assert(PendingObjs.count(&MR) == 0);
-            PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(
-                new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}});
+            PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(new JITObjectInfo{
+                std::move(NewBuffer), std::move(NewObj), {}});
         }
     }
 
@@ -753,14 +1125,9 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
                 return result->second;
             };
 
-            jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
-        }
-
-        cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
-            std::lock_guard<std::mutex> lock(PluginMutex);
-            RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
+            jl_register_jit_object(*NewInfo->Object, getLoadAddress);
             PendingObjs.erase(&MR);
-        }));
+        }
 
         return Error::success();
     }
@@ -772,24 +1139,13 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
         return Error::success();
     }
 
-    Error notifyRemovingResources(ResourceKey K) override
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
     {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        RegisteredObjs.erase(K);
-        // TODO: If we ever unload code, need to notify debuginfo registry.
         return Error::success();
     }
 
-    void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
-    {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        auto SrcIt = RegisteredObjs.find(SrcKey);
-        if (SrcIt != RegisteredObjs.end()) {
-            for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
-                RegisteredObjs[DstKey].push_back(std::move(Info));
-            RegisteredObjs.erase(SrcIt);
-        }
-    }
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 
     void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
                           jitlink::PassConfiguration &PassConfig) override
@@ -829,20 +1185,21 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
 
 class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 private:
-    std::atomic<size_t> &total_size;
+    std::atomic<size_t> &jit_bytes_size;
 
 public:
 
-    JLMemoryUsagePlugin(std::atomic<size_t> &total_size)
-        : total_size(total_size) {}
+    JLMemoryUsagePlugin(std::atomic<size_t> &jit_bytes_size)
+        : jit_bytes_size(jit_bytes_size) {}
 
     Error notifyFailed(orc::MaterializationResponsibility &MR) override {
         return Error::success();
     }
-    Error notifyRemovingResources(orc::ResourceKey K) override {
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
+    {
         return Error::success();
     }
-    void notifyTransferringResources(orc::ResourceKey DstKey,
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
                                      orc::ResourceKey SrcKey) override {}
 
     void modifyPassConfig(orc::MaterializationResponsibility &,
@@ -860,7 +1217,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
                 for (auto block : section.blocks()) {
                     secsize += block->getSize();
                 }
-                if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
+                if ((section.getMemProt() & orc::MemProt::Exec) == orc::MemProt::None) {
                     data_size += secsize;
                 } else {
                     code_size += secsize;
@@ -869,7 +1226,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
             }
             (void) code_size;
             (void) data_size;
-            this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            this->jit_bytes_size.fetch_add(graph_size, std::memory_order_relaxed);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
@@ -891,12 +1248,8 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 
 // TODO: Port our memory management optimisations to JITLink instead of using the
 // default InProcessMemoryManager.
-std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() {
-#if JL_LLVM_VERSION < 150000
-    return cantFail(jitlink::InProcessMemoryManager::Create());
-#else
-    return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>());
-#endif
+std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() JL_NOTSAFEPOINT {
+    return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(/*Reservation Granularity*/ 16 * 1024 * 1024));
 }
 
 #ifdef _COMPILER_CLANG_
@@ -921,7 +1274,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
     }
 };
 
-RTDyldMemoryManager* createRTDyldMemoryManager(void);
+RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT;
 
 // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
 class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
@@ -930,7 +1283,10 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
 
 public:
     ForwardingMemoryManager(std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr) : MemMgr(MemMgr) {}
-    virtual ~ForwardingMemoryManager() = default;
+    ForwardingMemoryManager(ForwardingMemoryManager &) = delete;
+    virtual ~ForwardingMemoryManager() {
+        assert(!MemMgr);
+    }
     virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                      unsigned SectionID,
                                      StringRef SectionName) override {
@@ -942,11 +1298,9 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                      bool IsReadOnly) override {
         return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly);
     }
-    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
-                                        uintptr_t RODataSize,
-                                        uint32_t RODataAlign,
-                                        uintptr_t RWDataSize,
-                                        uint32_t RWDataAlign) override {
+    virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign,
+                                        uintptr_t RODataSize, Align RODataAlign,
+                                        uintptr_t RWDataSize, Align RWDataAlign) override {
         return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
     }
     virtual bool needsToReserveAllocationSpace() override {
@@ -956,11 +1310,13 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                   size_t Size) override {
         return MemMgr->registerEHFrames(Addr, LoadAddr, Size);
     }
-    virtual void deregisterEHFrames() override {
-        return MemMgr->deregisterEHFrames();
-    }
+    virtual void deregisterEHFrames() override { /* not actually supported or allowed with this */ }
     virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-        return MemMgr->finalizeMemory(ErrMsg);
+        bool b = false;
+        if (MemMgr.use_count() == 2)
+            b = MemMgr->finalizeMemory(ErrMsg);
+        MemMgr.reset();
+        return b;
     }
     virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
                                     const object::ObjectFile &Obj) override {
@@ -968,33 +1324,12 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
     }
 };
 
-
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-void *lookupWriteAddressFor(RTDyldMemoryManager *MemMgr, void *rt_addr);
-#endif
-
-void registerRTDyldJITObject(const object::ObjectFile &Object,
-                             const RuntimeDyld::LoadedObjectInfo &L,
-                             const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
+#ifndef JL_USE_JITLINK
+static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR,
+                                    const object::ObjectFile &Object,
+                                    const RuntimeDyld::LoadedObjectInfo &L)
 {
-    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-    // If the debug object is unavailable, save (a copy of) the original object
-    // for our backtraces.
-    // This copy seems unfortunate, but there doesn't seem to be a way to take
-    // ownership of the original buffer.
-    if (!SavedObject.first) {
-        auto NewBuffer =
-            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
-        auto NewObj =
-            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
-        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
-    }
-    const object::ObjectFile *DebugObj = SavedObject.first.release();
-    SavedObject.second.release();
-
     StringMap<object::SectionRef> loadedSections;
-    // Use the original Object, not the DebugObject, as this is used for the
-    // RuntimeDyld::LoadedObjectInfo lookup.
     for (const object::SectionRef &lSection : Object.sections()) {
         auto sName = lSection.getName();
         if (sName) {
@@ -1011,14 +1346,11 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
         return L.getSectionLoadAddress(search->second);
     };
 
-    jl_register_jit_object(*DebugObj, getLoadAddress,
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-        [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
-#else
-        nullptr
-#endif
-    );
+    auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op.
+    jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress);
 }
+#endif
+
 namespace {
     static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
         TargetOptions options = TargetOptions();
@@ -1037,9 +1369,19 @@ namespace {
 #if defined(MSAN_EMUTLS_WORKAROUND)
         options.EmulatedTLS = true;
         options.ExplicitEmulatedTLS = true;
+#endif
+#if defined(_CPU_RISCV64_)
+        // we set these manually to avoid LLVM defaulting to soft-float
+#if defined(__riscv_float_abi_double)
+        options.MCOptions.ABIName = "lp64d";
+#elif defined(__riscv_float_abi_single)
+        options.MCOptions.ABIName = "lp64f";
+#else
+        options.MCOptions.ABIName = "lp64";
+#endif
 #endif
         uint32_t target_flags = 0;
-        auto target = jl_get_llvm_target(imaging_default(), target_flags);
+        auto target = jl_get_llvm_target(jl_generating_output(), target_flags);
         auto &TheCPU = target.first;
         SmallVector<std::string, 10> targetFeatures(target.second.begin(), target.second.end());
         std::string errorstr;
@@ -1071,7 +1413,7 @@ namespace {
             FeaturesStr = Features.getString();
         }
         // Allocate a target...
-        Optional<CodeModel::Model> codemodel =
+        std::optional<CodeModel::Model> codemodel =
 #ifdef _P64
             // Make sure we are using the large code model on 64bit
             // Let LLVM pick a default suitable for jitting on 32bit
@@ -1079,40 +1421,43 @@ namespace {
 #else
             None;
 #endif
+        if (TheTriple.isAArch64())
+            codemodel = CodeModel::Small;
+        else if (TheTriple.isRISCV()) {
+            // RISC-V will support large code model in LLVM 21
+            // https://github.com/llvm/llvm-project/pull/70308
+            codemodel = CodeModel::Medium;
+        }
+        // Generate simpler code for JIT
+        Reloc::Model relocmodel = Reloc::Static;
+        if (TheTriple.isRISCV()) {
+            // until large code model is supported, use PIC for RISC-V
+            // https://github.com/llvm/llvm-project/issues/106203
+            relocmodel = Reloc::PIC_;
+        }
         auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
         auto TM = TheTarget->createTargetMachine(
                 TheTriple.getTriple(), TheCPU, FeaturesStr,
                 options,
-                Reloc::Static, // Generate simpler code for JIT
+                relocmodel,
                 codemodel,
                 optlevel,
                 true // JIT
                 );
         assert(TM && "Failed to select target machine -"
                      " Is the LLVM backend for this CPU enabled?");
-        if (!TheTriple.isARM() && !TheTriple.isPPC64()) {
-            // FastISel seems to be buggy for ARM. Ref #13321
-            if (jl_options.opt_level < 2)
-                TM->setFastISel(true);
-        }
+        fixupTM(*TM);
         return std::unique_ptr<TargetMachine>(TM);
     }
-} // namespace
 
-namespace {
-
-#ifndef JL_USE_NEW_PM
-    typedef legacy::PassManager PassManager;
-#else
     typedef NewPM PassManager;
-#endif
 
     orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT {
         return orc::JITTargetMachineBuilder(TM.getTargetTriple())
             .setCPU(TM.getTargetCPU().str())
             .setFeatures(TM.getTargetFeatureString())
             .setOptions(TM.Options)
-            .setRelocationModel(Reloc::Static)
+            .setRelocationModel(TM.getRelocationModel())
             .setCodeModel(TM.getCodeModel())
             .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
     }
@@ -1124,161 +1469,458 @@ namespace {
             : JTMB(createJTMBFromTM(TM, optlevel)) {}
 
         std::unique_ptr<TargetMachine> operator()() JL_NOTSAFEPOINT {
-            return cantFail(JTMB.createTargetMachine());
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            return TM;
         }
     };
 
-#ifndef JL_USE_NEW_PM
-    struct PMCreator {
-        std::unique_ptr<TargetMachine> TM;
-        int optlevel;
-        PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
-        // overload for newpm compatibility
-        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) JL_NOTSAFEPOINT
-            : PMCreator(TM, optlevel) {}
-        PMCreator(const PMCreator &other) JL_NOTSAFEPOINT
-            : PMCreator(*other.TM, other.optlevel) {}
-        PMCreator(PMCreator &&other) JL_NOTSAFEPOINT
-            : TM(std::move(other.TM)), optlevel(other.optlevel) {}
-        friend void swap(PMCreator &self, PMCreator &other) JL_NOTSAFEPOINT {
-            using std::swap;
-            swap(self.TM, other.TM);
-            swap(self.optlevel, other.optlevel);
-        }
-        PMCreator &operator=(PMCreator other) JL_NOTSAFEPOINT {
-            swap(*this, other);
-            return *this;
-        }
-        auto operator()() JL_NOTSAFEPOINT {
-            auto PM = std::make_unique<legacy::PassManager>();
-            addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis());
-            addOptimizationPasses(PM.get(), optlevel);
-            addMachinePasses(PM.get(), optlevel);
-            return PM;
-        }
-    };
-#else
     struct PMCreator {
         orc::JITTargetMachineBuilder JTMB;
         OptimizationLevel O;
-        std::vector<std::function<void()>> &printers;
-        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
-            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}
+        SmallVector<std::function<void()>, 0> &printers;
+        std::mutex &llvm_printing_mutex;
+        PMCreator(TargetMachine &TM, int optlevel, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT
+            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers), llvm_printing_mutex(llvm_printing_mutex) {}
 
         auto operator()() JL_NOTSAFEPOINT {
-            auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
-            printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
-                NPM->printTimers();
-            });
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            auto NPM = std::make_unique<NewPM>(std::move(TM), O);
+            // TODO this needs to be locked, as different resource pools may add to the printer vector at the same time
+            {
+                std::lock_guard<std::mutex> lock(llvm_printing_mutex);
+                printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
+                    NPM->printTimers();
+                });
+            }
             return NPM;
         }
     };
-#endif
 
-    struct OptimizerT {
-        OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
-            : optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
-        OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete;
-        OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default;
+    template<size_t N>
+    struct sizedOptimizerT {
+        sizedOptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT {
+            for (size_t i = 0; i < N; i++) {
+                PMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>(PMCreator(TM, i, printers, llvm_printing_mutex));
+            }
+        }
 
-        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
             TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+                auto PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue();
+                assert(PoolIdx < N && "Invalid optimization pool index");
+
                 uint64_t start_time = 0;
-                std::stringstream before_stats_ss;
-                bool should_dump_opt_stats = false;
+
+                struct Stat {
+                    std::string name;
+                    uint64_t insts;
+                    uint64_t bbs;
+
+                    void dump(ios_t *stream) JL_NOTSAFEPOINT {
+                        ios_printf(stream, "    \"%s\":\n", name.c_str());
+                        ios_printf(stream, "        instructions: %u\n", insts);
+                        ios_printf(stream, "        basicblocks: %zd\n", bbs);
+                    }
+
+                    Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {}
+
+                    ~Stat() JL_NOTSAFEPOINT = default;
+                };
+                SmallVector<Stat, 8> before_stats;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream) {
-                        // Ensures that we don't _just_ write the second part of the YAML object
-                        should_dump_opt_stats = true;
-                        // We use a stringstream to later atomically write a YAML object
-                        // without the need to hold the stream lock over the optimization
-                        // Print LLVM function statistics _before_ optimization
-                        // Print all the information about this invocation as a YAML object
-                        before_stats_ss << "- \n";
-                        // We print the name and some statistics for each function in the module, both
-                        // before optimization and again afterwards.
-                        before_stats_ss << "  before: \n";
+                    if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             // Each function is printed as a YAML object with several attributes
-                            before_stats_ss << "    \"" << F.getName().str().c_str() << "\":\n";
-                            before_stats_ss << "        instructions: " << F.getInstructionCount() << "\n";
-                            before_stats_ss << "        basicblocks: " << countBasicBlocks(F) << "\n";
+                            before_stats.emplace_back(F);
                         }
 
                         start_time = jl_hrtime();
                     }
                 }
 
-                JL_TIMING(LLVM_OPT, LLVM_OPT);
-
-                //Run the optimization
-                assert(!verifyModule(M, &errs()));
-                (***PMs).run(M);
-                assert(!verifyModule(M, &errs()));
+                {
+                    JL_TIMING(LLVM_JIT, JIT_Opt);
+                    //Run the optimization
+                    (****PMs[PoolIdx]).run(M);
+                    assert(!verifyLLVMIR(M));
+                }
 
-                uint64_t end_time = 0;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream && should_dump_opt_stats) {
-                        ios_printf(stream, "%s", before_stats_ss.str().c_str());
-                        end_time = jl_hrtime();
+                    // Print optimization statistics as a YAML object
+                    // Looks like:
+                    // -
+                    //   before:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //   time_ns: uint64
+                    //   optlevel: int
+                    //   after:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
+                        uint64_t end_time = jl_hrtime();
+                        ios_printf(stream, "- \n");
+
+                        // Print LLVM function statistic _before_ optimization
+                        ios_printf(stream, "  before: \n");
+                        for (auto &s : before_stats) {
+                            s.dump(stream);
+                        }
                         ios_printf(stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
-                        ios_printf(stream, "  optlevel: %d\n", optlevel);
+                        ios_printf(stream, "  optlevel: %d\n", PoolIdx);
 
                         // Print LLVM function statistics _after_ optimization
                         ios_printf(stream, "  after: \n");
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
-                            ios_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
-                            ios_printf(stream, "        instructions: %u\n", F.getInstructionCount());
-                            ios_printf(stream, "        basicblocks: %zd\n", countBasicBlocks(F));
+                            Stat(F).dump(stream);
                         }
                     }
                 }
+                ++ModulesOptimized;
+                switch (PoolIdx) {
+                    case 0:
+                        ++OptO0;
+                        break;
+                    case 1:
+                        ++OptO1;
+                        break;
+                    case 2:
+                        ++OptO2;
+                        break;
+                    case 3:
+                        ++OptO3;
+                        break;
+                    default:
+                        // Change this if we ever gain other optlevels
+                        llvm_unreachable("optlevel is between 0 and 3!");
+                }
             });
-            switch (optlevel) {
-                case 0:
-                    ++OptO0;
-                    break;
-                case 1:
-                    ++OptO1;
-                    break;
-                case 2:
-                    ++OptO2;
-                    break;
-                case 3:
-                    ++OptO3;
-                    break;
-                default:
-                    llvm_unreachable("optlevel is between 0 and 3!");
-            }
-            return Expected<orc::ThreadSafeModule>{std::move(TSM)};
+            return TSM;
+        }
+    private:
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>, N> PMs;
+    };
+
+    // shim for converting a unique_ptr to a TransformFunction to a TransformFunction
+    template <typename T>
+    struct IRTransformRef {
+        IRTransformRef(T &transform) : transform(transform) {}
+        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+            return transform(std::move(TSM), R);
         }
     private:
-        int optlevel;
-        JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>> PMs;
+        T &transform;
     };
 
+    template<size_t N>
     struct CompilerT : orc::IRCompileLayer::IRCompiler {
 
-        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}
+        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM) JL_NOTSAFEPOINT
+            : orc::IRCompileLayer::IRCompiler(MO) {
+            for (size_t i = 0; i < N; ++i) {
+                TMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>(TMCreator(TM, i));
+            }
+        }
 
         Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
-            return orc::SimpleCompiler(***TMs)(M);
+            JL_TIMING(LLVM_JIT, JIT_Compile);
+            size_t PoolIdx;
+            if (auto opt_level = M.getModuleFlag("julia.optlevel")) {
+                PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(opt_level)->getValue())->getZExtValue();
+            }
+            else {
+                PoolIdx = jl_options.opt_level;
+            }
+            assert(PoolIdx < N && "Invalid optimization level for compiler!");
+            return orc::SimpleCompiler(****TMs[PoolIdx])(M);
         }
 
-        JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>, N> TMs;
     };
 }
 
+struct JuliaOJIT::OptimizerT {
+    OptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex)
+        : opt(TM, printers, llvm_printing_mutex) {}
+    orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+        return opt(std::move(TSM));
+    }
+    OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        return opt(std::move(TSM));
+    }
+private:
+    struct sizedOptimizerT<N_optlevels> opt;
+};
+
+struct JuliaOJIT::JITPointersT {
+    JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT
+        : SharedBytes(SharedBytes), Lock(Lock) {}
+
+    orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+        TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+            std::lock_guard<std::mutex> locked(Lock);
+            for (auto &GV : make_early_inc_range(M.globals())) {
+                if (auto *Shared = getSharedBytes(GV)) {
+                    ++InternedGlobals;
+                    GV.replaceAllUsesWith(Shared);
+                    GV.eraseFromParent();
+                }
+            }
+
+            // Windows needs some inline asm to help
+            // build unwind tables, if they have any functions to decorate
+            if (!M.functions().empty())
+                jl_decorate_module(M);
+        });
+        return TSM;
+    }
+    Expected<orc::ThreadSafeModule> operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+        return operator()(std::move(TSM));
+    }
+
+private:
+    // optimize memory by turning long strings into memoized copies, instead of
+    // making a copy per object file of output.
+    // we memoize them using a StringSet with a custom-alignment allocator
+    // to ensure they are properly aligned
+    Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT {
+        // We could probably technically get away with
+        // interning even external linkage globals,
+        // as long as they have global unnamedaddr,
+        // but currently we shouldn't be emitting those
+        // except in imaging mode, and we don't want to
+        // do this optimization there.
+        if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) {
+            return nullptr;
+        }
+        if (!GV.hasInitializer()) {
+            return nullptr;
+        }
+        if (!GV.isConstant()) {
+            return nullptr;
+        }
+        auto CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
+        if (!CDS) {
+            return nullptr;
+        }
+        StringRef Data = CDS->getRawDataValues();
+        if (Data.size() < 16) {
+            // Cutoff, since we don't want to intern small strings
+            return nullptr;
+        }
+        Align Required = GV.getAlign().valueOrOne();
+        Align Preferred = MaxAlignedAlloc::alignment(Data.size());
+        if (Required > Preferred)
+            return nullptr;
+        StringRef Interned = SharedBytes.insert(Data).first->getKey();
+        assert(llvm::isAddrAligned(Preferred, Interned.data()));
+        return literal_static_pointer_val(Interned.data(), GV.getType());
+    }
+
+    SharedBytesT &SharedBytes;
+    std::mutex &Lock;
+};
+
+
+struct JuliaOJIT::DLSymOptimizer {
+    DLSymOptimizer(bool named) JL_NOTSAFEPOINT {
+        this->named = named;
+#define INIT_RUNTIME_LIBRARY(libname, handle) \
+        do { \
+            auto libidx = (uintptr_t) libname; \
+            if (libidx >= runtime_symbols.size()) { \
+                runtime_symbols.resize(libidx + 1); \
+            } \
+            runtime_symbols[libidx].first = handle; \
+        } while (0)
+
+        INIT_RUNTIME_LIBRARY(NULL, jl_RTLD_DEFAULT_handle);
+        INIT_RUNTIME_LIBRARY(JL_EXE_LIBNAME, jl_exe_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_INTERNAL_DL_LIBNAME, jl_libjulia_internal_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_DL_LIBNAME, jl_libjulia_handle);
+
+#undef INIT_RUNTIME_LIBRARY
+    }
+    ~DLSymOptimizer() JL_NOTSAFEPOINT = default;
+
+    void *lookup_symbol(void *libhandle, const char *fname) JL_NOTSAFEPOINT {
+        void *addr;
+        jl_dlsym(libhandle, fname, &addr, 0);
+        return addr;
+    }
+
+    void *lookup(const char *libname, const char *fname) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+        StringRef lib(libname);
+        StringRef f(fname);
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        auto uit = user_symbols.find(lib);
+        if (uit == user_symbols.end()) {
+            jl_task_t *ct = jl_current_task;
+            int8_t gc_state = jl_gc_unsafe_enter(ct->ptls);
+            void *handle = jl_get_library_(libname, 0);
+            jl_gc_unsafe_leave(ct->ptls, gc_state);
+            if (!handle)
+                return nullptr;
+            uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap<void*>()))).first;
+        }
+        auto &symmap = uit->second.second;
+        auto it = symmap.find(f);
+        if (it != symmap.end()) {
+            return it->second;
+        }
+        void *handle = lookup_symbol(uit->second.first, fname);
+        symmap[f] = handle;
+        return handle;
+    }
+
+    void *lookup(uintptr_t libidx, const char *fname) JL_NOTSAFEPOINT {
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1));
+        auto it = runtime_symbols[libidx].second.find(fname);
+        if (it != runtime_symbols[libidx].second.end()) {
+            return it->second;
+        }
+        auto handle = lookup_symbol(runtime_symbols[libidx].first, fname);
+        runtime_symbols[libidx].second[fname] = handle;
+        return handle;
+    }
+
+    void operator()(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+        for (auto &GV : M.globals()) {
+            auto Name = GV.getName();
+            if (Name.starts_with("jlplt") && Name.ends_with("got")) {
+                auto fname = GV.getAttribute("julia.fname").getValueAsString().str();
+                void *addr;
+                if (GV.hasAttribute("julia.libname")) {
+                    auto libname = GV.getAttribute("julia.libname").getValueAsString().str();
+                    addr = lookup(libname.data(), fname.data());
+                } else {
+                    assert(GV.hasAttribute("julia.libidx") && "PLT entry should have either libname or libidx attribute!");
+                    auto libidx = (uintptr_t)std::stoull(GV.getAttribute("julia.libidx").getValueAsString().str());
+                    addr = lookup(libidx, fname.data());
+                }
+                if (addr) {
+                    Function *Thunk = nullptr;
+                    if (!GV.isDeclaration()) {
+                        Thunk = cast<Function>(GV.getInitializer()->stripPointerCasts());
+                        assert(++Thunk->uses().begin() == Thunk->uses().end() && "Thunk should only have one use in PLT initializer!");
+                        assert(Thunk->hasLocalLinkage() && "Thunk should not have non-local linkage!");
+                    } else {
+                        GV.setLinkage(GlobalValue::PrivateLinkage);
+                    }
+                    auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), GV.getValueType());
+                    if (named) {
+                        auto T = GV.getValueType();
+                        assert(T->isPointerTy());
+                        init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M);
+                    }
+                    GV.setInitializer(init);
+                    GV.setConstant(true);
+                    GV.setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                    if (Thunk) {
+                        Thunk->eraseFromParent();
+                    }
+                }
+            }
+        }
+
+        for (auto &F : M) {
+            for (auto &BB : F) {
+                SmallVector<Instruction *, 0> to_delete;
+                for (auto &I : make_early_inc_range(BB)) {
+                    auto CI = dyn_cast<CallInst>(&I);
+                    if (!CI)
+                        continue;
+                    auto Callee = CI->getCalledFunction();
+                    if (!Callee || Callee->getName() != XSTR(jl_load_and_lookup))
+                        continue;
+                    // Long-winded way of extracting fname without needing a second copy in an attribute
+                    auto fname = cast<ConstantDataArray>(cast<GlobalVariable>(CI->getArgOperand(1)->stripPointerCasts())->getInitializer())->getAsCString();
+                    auto libarg = CI->getArgOperand(0)->stripPointerCasts();
+                    // Should only use in store and phi node
+                    // Note that this uses the raw output of codegen,
+                    // which is why we can assume this
+                    assert(++++CI->use_begin() == CI->use_end());
+                    void *addr;
+                    if (auto GV = dyn_cast<GlobalVariable>(libarg)) {
+                        // Can happen if the library is the empty string, just give up when that happens
+                        if (isa<ConstantAggregateZero>(GV->getInitializer()))
+                            continue;
+                        auto libname = cast<ConstantDataArray>(GV->getInitializer())->getAsCString();
+                        addr = lookup(libname.data(), fname.data());
+                    } else {
+                        // Can happen if we fail the compile time dlfind i.e when we try a symbol that doesn't exist in libc
+                        if (dyn_cast<ConstantPointerNull>(libarg))
+                            continue;
+                        assert(cast<ConstantExpr>(libarg)->getOpcode() == Instruction::IntToPtr && "libarg should be either a global variable or a integer index!");
+                        libarg = cast<ConstantExpr>(libarg)->getOperand(0);
+                        auto libidx = cast<ConstantInt>(libarg)->getZExtValue();
+                        addr = lookup(libidx, fname.data());
+                    }
+                    if (addr) {
+                        auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), CI->getType());
+                        if (named) {
+                            auto T = CI->getType();
+                            assert(T->isPointerTy());
+                            init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M);
+                        }
+                        // DCE and SimplifyCFG will kill the branching structure around
+                        // the call, so we don't need to worry about removing everything
+                        for (auto user : make_early_inc_range(CI->users())) {
+                            if (auto SI = dyn_cast<StoreInst>(user)) {
+                                to_delete.push_back(SI);
+                            } else {
+                                auto PHI = cast<PHINode>(user);
+                                PHI->replaceAllUsesWith(init);
+                                to_delete.push_back(PHI);
+                            }
+                        }
+                        to_delete.push_back(CI);
+                    }
+                }
+                for (auto I : to_delete) {
+                    I->eraseFromParent();
+                }
+            }
+        }
+    }
+
+    std::mutex symbols_mutex;
+    StringMap<std::pair<void *, StringMap<void *>>> user_symbols;
+    SmallVector<std::pair<void *, StringMap<void *>>, 0> runtime_symbols;
+    bool named;
+};
+
+void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER {
+    JuliaOJIT::DLSymOptimizer(true)(M);
+}
+
+void fixupTM(TargetMachine &TM) {
+    auto TheTriple = TM.getTargetTriple();
+    if (jl_options.opt_level < 2) {
+        if (!TheTriple.isARM() && !TheTriple.isPPC64() && !TheTriple.isAArch64())
+            TM.setFastISel(true);
+        else    // FastISel seems to be buggy Ref #13321
+            TM.setFastISel(false);
+    }
+}
+
 llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     // Mark our address spaces as non-integral
     auto jl_data_layout = TM.createDataLayout();
@@ -1286,16 +1928,6 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     return jl_data_layout;
 }
 
-JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
-  : CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
-      std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
-    OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer,
-            llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {}
-
-#ifdef _COMPILER_ASAN_ENABLED_
-int64_t ___asan_globals_registered;
-#endif
-
 JuliaOJIT::JuliaOJIT()
   : TM(createTargetMachine()),
     DL(jl_create_datalayout(*TM)),
@@ -1303,33 +1935,27 @@ JuliaOJIT::JuliaOJIT()
     GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
     JD(ES.createBareJITDylib("JuliaOJIT")),
     ExternalJD(ES.createBareJITDylib("JuliaExternal")),
-    ContextPool([](){
-        auto ctx = std::make_unique<LLVMContext>();
-        return orc::ThreadSafeContext(std::move(ctx));
-    }),
+    DLSymOpt(std::make_unique<DLSymOptimizer>(false)),
 #ifdef JL_USE_JITLINK
     MemMgr(createJITLinkMemoryManager()),
     ObjectLayer(ES, *MemMgr),
 #else
     MemMgr(createRTDyldMemoryManager()),
-    ObjectLayer(
+    UnlockedObjectLayer(
             ES,
             [this]() {
                 std::unique_ptr<RuntimeDyld::MemoryManager> result(new ForwardingMemoryManager(MemMgr));
                 return result;
             }
         ),
+    ObjectLayer(UnlockedObjectLayer),
 #endif
-    LockLayer(ObjectLayer),
-    Pipelines{
-        std::make_unique<PipelineT>(LockLayer, *TM, 0, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 1, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 2, PrintLLVMTimers),
-        std::make_unique<PipelineT>(LockLayer, *TM, 3, PrintLLVMTimers),
-    },
-    OptSelLayer(Pipelines),
-    ExternalCompileLayer(ES, LockLayer,
-        std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM, 2))
+    CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
+    JITPointers(std::make_unique<JITPointersT>(SharedBytes, RLST_mutex)),
+    JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)),
+    Optimizers(std::make_unique<OptimizerT>(*TM, PrintLLVMTimers, llvm_printing_mutex)),
+    OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)),
+    OptSelLayer(ES, OptimizeLayer, static_cast<orc::ThreadSafeModule (*)(orc::ThreadSafeModule, orc::MaterializationResponsibility&)>(selectOptLevel))
 {
 #ifdef JL_USE_JITLINK
 # if defined(LLVM_SHLIB)
@@ -1343,14 +1969,9 @@ JuliaOJIT::JuliaOJIT()
         ES, std::move(ehRegistrar)));
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
-    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
+    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(jit_bytes_size));
 #else
-    ObjectLayer.setNotifyLoaded(
-        [this](orc::MaterializationResponsibility &MR,
-               const object::ObjectFile &Object,
-               const RuntimeDyld::LoadedObjectInfo &LO) {
-            registerRTDyldJITObject(Object, LO, MemMgr);
-        });
+    UnlockedObjectLayer.setNotifyLoaded(registerRTDyldJITObject);
 #endif
 
     std::string ErrorStr;
@@ -1395,7 +2016,7 @@ JuliaOJIT::JuliaOJIT()
                   DL.getGlobalPrefix(),
                   [&](const orc::SymbolStringPtr &S) {
                         const char *const atomic_prefix = "__atomic_";
-                        return (*S).startswith(atomic_prefix);
+                        return (*S).starts_with(atomic_prefix);
                   })));
         }
     }
@@ -1405,47 +2026,82 @@ JuliaOJIT::JuliaOJIT()
     ExternalJD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
     ExternalJD.addToLinkOrder(JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
 
-#if JULIA_FLOAT16_ABI == 1
     orc::SymbolAliasMap jl_crt = {
-        { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__gnu_f2h_ieee"), { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__truncsfhf2"),   { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
-        { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),   JITSymbolFlags::Exported } }
+        // Float16 conversion routines
+#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_)
+        // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin
+        // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9
+        { mangle("__gnu_h2f_ieee"), { mangle("julia_half_to_float"),  JITSymbolFlags::Exported } },
+        { mangle("__extendhfsf2"),  { mangle("julia_half_to_float"),  JITSymbolFlags::Exported } },
+        { mangle("__gnu_f2h_ieee"), { mangle("julia_float_to_half"),  JITSymbolFlags::Exported } },
+        { mangle("__truncsfhf2"),   { mangle("julia_float_to_half"),  JITSymbolFlags::Exported } },
+        { mangle("__truncdfhf2"),   { mangle("julia_double_to_half"), JITSymbolFlags::Exported } },
+#else
+        { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__gnu_f2h_ieee"), { mangle("julia__gnu_f2h_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__truncsfhf2"),   { mangle("julia__gnu_f2h_ieee"),  JITSymbolFlags::Exported } },
+        { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),    JITSymbolFlags::Exported } },
+#endif
+        // BFloat16 conversion routines
+        { mangle("__truncsfbf2"),   { mangle("julia__truncsfbf2"),    JITSymbolFlags::Exported } },
+        { mangle("__truncdfbf2"),   { mangle("julia__truncdfbf2"),    JITSymbolFlags::Exported } },
     };
     cantFail(GlobalJD.define(orc::symbolAliases(jl_crt)));
+
+#ifdef _OS_OPENBSD_
+    orc::SymbolMap i128_crt;
+
+    i128_crt[mangle("__divti3")] = JITEvaluatedSymbol::fromPointer(&__divti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__modti3")] = JITEvaluatedSymbol::fromPointer(&__modti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__udivti3")] = JITEvaluatedSymbol::fromPointer(&__udivti3, JITSymbolFlags::Exported);
+    i128_crt[mangle("__umodti3")] = JITEvaluatedSymbol::fromPointer(&__umodti3, JITSymbolFlags::Exported);
+
+    cantFail(GlobalJD.define(orc::absoluteSymbols(i128_crt)));
 #endif
 
 #ifdef MSAN_EMUTLS_WORKAROUND
     orc::SymbolMap msan_crt;
-    msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported);
-    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer(
-        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_get_address")] = {ExecutorAddr::fromPtr(msan_workaround::getTLSAddress), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size))), JITSymbolFlags::Exported};
+    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = {ExecutorAddr::fromPtr(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin))), JITSymbolFlags::Exported};
     cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
 #endif
+#if JL_LLVM_VERSION < 190000
 #ifdef _COMPILER_ASAN_ENABLED_
+    // this is a hack to work around a bad assertion:
+    //   /workspace/srcdir/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp:3028: llvm::Error llvm::orc::ExecutionSession::OL_notifyResolved(llvm::orc::MaterializationResponsibility&, const SymbolMap&): Assertion `(KV.second.getFlags() & ~JITSymbolFlags::Common) == (I->second & ~JITSymbolFlags::Common) && "Resolving symbol with incorrect flags"' failed.
+    // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44
+    static int64_t jl___asan_globals_registered;
     orc::SymbolMap asan_crt;
-    asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+    asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported};
     cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
 #endif
+#endif
 }
 
 JuliaOJIT::~JuliaOJIT() = default;
 
+ThreadSafeContext JuliaOJIT::makeContext()
+{
+    auto ctx = std::make_unique<LLVMContext>();
+    return orc::ThreadSafeContext(std::move(ctx));
+}
+
 orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 {
     std::string MangleName = getMangledName(Name);
@@ -1454,59 +2110,39 @@ orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 
 void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
 {
-    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
+    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}})));
 }
 
 void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
-    JL_TIMING(LLVM_ORC, LLVM_ORC);
+    JL_TIMING(LLVM_JIT, JIT_Total);
     ++ModulesAdded;
-    orc::SymbolLookupSet NewExports;
-    TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
-        jl_decorate_module(M);
-        shareStrings(M);
-        for (auto &F : M.global_values()) {
-            if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Name = ES.intern(getMangledName(F.getName()));
-                NewExports.add(std::move(Name));
-            }
-        }
-#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
-        // validate the relocations for M (not implemented for the JITLink memory manager yet)
-        for (Module::global_object_iterator I = M.global_objects().begin(), E = M.global_objects().end(); I != E; ) {
-            GlobalObject *F = &*I;
-            ++I;
-            if (F->isDeclaration()) {
-                if (F->use_empty())
-                    F->eraseFromParent();
-                else if (!((isa<Function>(F) && isIntrinsicFunction(cast<Function>(F))) ||
-                        findUnmangledSymbol(F->getName()) ||
-                        SectionMemoryManager::getSymbolAddressInProcess(
-                            getMangledName(F->getName())))) {
-                    llvm::errs() << "FATAL ERROR: "
-                                << "Symbol \"" << F->getName().str() << "\""
-                                << "not found";
-                    abort();
-                }
-            }
-        }
-#endif
-    });
-
-    // TODO: what is the performance characteristics of this?
-    cantFail(OptSelLayer.add(JD, std::move(TSM)));
-    // force eager compilation (for now), due to memory management specifics
-    // (can't handle compilation recursion)
-    for (auto &sym : cantFail(ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports))) {
-        assert(sym.second);
-        (void) sym;
+    TSM = selectOptLevel(std::move(TSM));
+    TSM = (*Optimizers)(std::move(TSM));
+    TSM = (*JITPointers)(std::move(TSM));
+    auto Lock = TSM.getContext().getLock();
+    Module &M = *TSM.getModuleUnlocked();
+    // Treat this as if one of the passes might contain a safepoint
+    // even though that shouldn't be the case and might be unwise
+    Expected<std::unique_ptr<MemoryBuffer>> Obj = CompileLayer.getCompiler()(M);
+    if (!Obj) {
+        ES.reportError(Obj.takeError());
+        errs() << "Failed to add module to JIT!\n";
+        errs() << "Dumping failing module\n" << M << "\n";
+        return;
+    }
+    { auto release = std::move(Lock); }
+    auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj));
+    if (Err) {
+        ES.reportError(std::move(Err));
+        errs() << "Failed to add objectfile to JIT!\n";
+        abort();
     }
 }
 
 Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize)
 {
-    if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error
-            {
+    if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error {
             if (M.getDataLayout().isDefault())
                 M.setDataLayout(DL);
             if (M.getDataLayout() != DL)
@@ -1515,37 +2151,61 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
                     M.getDataLayout().getStringRepresentation() + " (module) vs " +
                     DL.getStringRepresentation() + " (jit)",
                 inconvertibleErrorCode());
-
+            // OrcJIT requires that all modules / files have unique names:
+            M.setModuleIdentifier((M.getModuleIdentifier() + Twine("-") + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str());
             return Error::success();
-            }))
+        }))
         return Err;
-    return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
+    //if (ShouldOptimize)
+    //    return OptimizeLayer.add(JD, std::move(TSM));
+    return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
 }
 
 Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
     assert(Obj && "Can not add null object");
-    return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+    // OrcJIT requires that all modules / files have unique names:
+    // https://llvm.org/doxygen/namespacellvm_1_1orc.html#a1f5a1bc60c220cdccbab0f26b2a425e1
+    // so we have to force a copy here
+    std::string Name = ("jitted-" + Twine(jl_atomic_fetch_add_relaxed(&jitcounter, 1))).str();
+    Obj = Obj->getMemBufferCopy(Obj->getBuffer(), Name);
+    return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+}
+
+SmallVector<uint64_t> JuliaOJIT::findSymbols(ArrayRef<StringRef> Names)
+{
+    // assert(MemMgr.use_count() == 1); (true single-threaded, but slightly race-y to assert it with concurrent threads)
+    DenseMap<orc::NonOwningSymbolStringPtr, size_t> Unmangled;
+    orc::SymbolLookupSet Exports;
+    for (StringRef Name : Names) {
+        auto Mangled = ES.intern(getMangledName(Name));
+        Unmangled[NonOwningSymbolStringPtr(Mangled)] = Unmangled.size();
+        Exports.add(std::move(Mangled));
+    }
+    SymbolMap Syms = cantFail(ES.lookup(orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports)));
+    SmallVector<uint64_t> Addrs(Names.size());
+    for (auto it : Syms) {
+        Addrs[Unmangled.at(orc::NonOwningSymbolStringPtr(it.first))] = it.second.getAddress().getValue();
+    }
+    return Addrs;
 }
 
-JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
+Expected<ExecutorSymbolDef> JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
     auto Sym = ES.lookup(SearchOrder, Name);
-    if (Sym)
-        return *Sym;
-    return Sym.takeError();
+    return Sym;
 }
 
-JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
+Expected<ExecutorSymbolDef> JuliaOJIT::findUnmangledSymbol(StringRef Name)
 {
     return findSymbol(getMangledName(Name), true);
 }
 
-Expected<JITEvaluatedSymbol> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
+Expected<ExecutorSymbolDef> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
 {
     orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
     auto Sym = ES.lookup(SearchOrder, getMangledName(Name));
     return Sym;
 }
@@ -1557,7 +2217,7 @@ uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return addr->getAddress().getValue();
 }
 
 uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
@@ -1567,18 +2227,18 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(addr.getAddress());
+    return addr->getAddress().getValue();
 }
 
-StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst)
+StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst)
 {
     std::lock_guard<std::mutex> lock(RLST_mutex);
+    assert(Addr != (uint64_t)jl_fptr_wait_for_compiled_addr);
     std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
     if (fname->empty()) {
         std::string string_fname;
         raw_string_ostream stream_fname(string_fname);
         // try to pick an appropriate name that describes it
-        jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
         if (Addr == (uintptr_t)invoke) {
             stream_fname << "jsysw_";
         }
@@ -1591,7 +2251,7 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
         else {
             stream_fname << "jlsys_";
         }
-        const char* unadorned_name = jl_symbol_name(codeinst->def->def.method->name);
+        const char* unadorned_name = jl_symbol_name(jl_get_ci_mi(codeinst)->def.method->name);
         stream_fname << unadorned_name << "_" << RLST_inc++;
         *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable
         addGlobalMapping(*fname, Addr);
@@ -1599,34 +2259,88 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
     return *fname;
 }
 
-
 #ifdef JL_USE_JITLINK
-extern "C" orc::shared::CWrapperFunctionResult
-llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
+#define addAbsoluteToMap(map,name) \
+    (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name))
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     orc::SymbolMap GDBFunctions;
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction);
+    auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper);
     cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
+    (void)registerJITLoaderGDBWrapper;
     if (TM->getTargetTriple().isOSBinFormatMachO())
         ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
+#ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794
     else if (TM->getTargetTriple().isOSBinFormatELF())
         //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
-        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, registerJITLoaderGDBWrapper)));
+#endif
+}
+
+void JuliaOJIT::enableIntelJITEventListener()
+{
+#if JL_LLVM_VERSION >= 190000
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        orc::SymbolMap VTuneFunctions;
+        auto RegisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_registerVTuneImpl);
+        auto UnregisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_unregisterVTuneImpl);
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(VTuneSupportPlugin::Create(ES.getExecutorProcessControl(),
+        //                           JD, /*EmitDebugInfo=*/true,
+        //                           /*TestMode=*/false)));
+        bool EmitDebugInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<VTuneSupportPlugin>(
+            ES.getExecutorProcessControl(), RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo));
+    }
+#endif
+}
+
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    // implement when available in LLVM
+}
+
+void JuliaOJIT::enablePerfJITEventListener()
+{
+#if JL_LLVM_VERSION >= 180000
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        orc::SymbolMap PerfFunctions;
+        auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart);
+        auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd);
+        auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl);
+        cantFail(JD.define(orc::absoluteSymbols(PerfFunctions)));
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create(
+        //    ES.getExecutorProcessControl(), *JD, true, true)));
+        bool EmitDebugInfo = true, EmitUnwindInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<PerfSupportPlugin>(
+            ES.getExecutorProcessControl(), StartAddr, EndAddr, ImplAddr, EmitDebugInfo, EmitUnwindInfo));
+    }
+#endif
 }
 #else
+void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+{
+    if (L)
+        UnlockedObjectLayer.registerJITEventListener(*L);
+}
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
-
-void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+void JuliaOJIT::enableIntelJITEventListener()
 {
-    if (!L)
-        return;
-    this->ObjectLayer.registerJITEventListener(*L);
+    RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+}
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+}
+void JuliaOJIT::enablePerfJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
 }
 #endif
 
@@ -1647,30 +2361,33 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
-#ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
-    return total_size.load(std::memory_order_relaxed);
+    auto bytes = jl_atomic_load_relaxed(&jit_bytes_size);
+#ifndef JL_USE_JITLINK
+    size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
+    bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+#endif
+    return bytes;
 }
-#else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
 
-size_t JuliaOJIT::getTotalBytes() const
+void JuliaOJIT::addBytes(size_t bytes)
 {
-    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+    jl_atomic_fetch_add_relaxed(&jit_bytes_size, bytes);
 }
-#endif
 
 void JuliaOJIT::printTimers()
 {
-#ifdef JL_USE_NEW_PM
     for (auto &printer : PrintLLVMTimers) {
         printer();
     }
-#endif
     reportAndResetTimings();
 }
 
+void JuliaOJIT::optimizeDLSyms(Module &M) {
+    (*DLSymOpt)(M);
+}
+
 JuliaOJIT *jl_ExecutionEngine;
 
 // destructively move the contents of src into dest
@@ -1688,93 +2405,95 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
             assert(dest.getDataLayout() == src.getDataLayout() && "Cannot merge modules with different data layouts!");
             assert(dest.getTargetTriple() == src.getTargetTriple() && "Cannot merge modules with different target triples!");
 
-            for (Module::global_iterator I = src.global_begin(), E = src.global_end(); I != E;) {
-                GlobalVariable *sG = &*I;
-                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(sG->getName()));
-                ++I;
+            for (auto &SG : make_early_inc_range(src.globals())) {
+                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
                 // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                if (dG && !dG->hasLocalLinkage()) {
+                    if (SG.isDeclaration()) {
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     //// If we start using llvm.used, we need to enable and test this
-                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && sG->hasAppendingLinkage()) {
+                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && SG.hasAppendingLinkage()) {
                     //    auto *dCA = cast<ConstantArray>(dG->getInitializer());
-                    //    auto *sCA = cast<ConstantArray>(sG->getInitializer());
+                    //    auto *sCA = cast<ConstantArray>(SG.getInitializer());
                     //    SmallVector<Constant *, 16> Init;
                     //    for (auto &Op : dCA->operands())
                     //        Init.push_back(cast_or_null<Constant>(Op));
                     //    for (auto &Op : sCA->operands())
                     //        Init.push_back(cast_or_null<Constant>(Op));
-                    //    Type *Int8PtrTy = Type::getInt8PtrTy(dest.getContext());
-                    //    ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
+                    //    ArrayType *ATy = ArrayType::get(PointerType::get(dest.getContext()), Init.size());
                     //    GlobalVariable *GV = new GlobalVariable(dest, ATy, dG->isConstant(),
                     //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
                     //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
                     //    GV->copyAttributesFrom(dG);
-                    //    sG->replaceAllUsesWith(GV);
+                    //    SG.replaceAllUsesWith(GV);
                     //    dG->replaceAllUsesWith(GV);
-                    //    GV->takeName(sG);
-                    //    sG->eraseFromParent();
+                    //    GV->takeName(SG);
+                    //    SG.eraseFromParent();
                     //    dG->eraseFromParent();
                     //    continue;
                     //}
                     else {
-                        assert(dG->isDeclaration() || dG->getInitializer() == sG->getInitializer());
-                        dG->replaceAllUsesWith(sG);
+                        assert(dG->isDeclaration() || dG->getInitializer() == SG.getInitializer());
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
                 // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getGlobalList().push_back(sG);
+                SG.removeFromParent();
+                dest.insertGlobalVariable(&SG);
                 // Comdat is owned by the Module
-                sG->setComdat(nullptr);
+                SG.setComdat(nullptr);
             }
 
-            for (Module::iterator I = src.begin(), E = src.end(); I != E;) {
-                Function *sG = &*I;
-                Function *dG = cast_or_null<Function>(dest.getNamedValue(sG->getName()));
-                ++I;
+            for (auto &SG : make_early_inc_range(src)) {
+                Function *dG = cast_or_null<Function>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
                 // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                if (dG && !dG->hasLocalLinkage()) {
+                    if (SG.isDeclaration()) {
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     else {
                         assert(dG->isDeclaration());
-                        dG->replaceAllUsesWith(sG);
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
                 // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getFunctionList().push_back(sG);
+                SG.removeFromParent();
+                dest.getFunctionList().push_back(&SG);
                 // Comdat is owned by the Module
-                sG->setComdat(nullptr);
+                SG.setComdat(nullptr);
             }
 
-            for (Module::alias_iterator I = src.alias_begin(), E = src.alias_end(); I != E;) {
-                GlobalAlias *sG = &*I;
-                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(sG->getName()));
-                ++I;
-                if (dG) {
+            for (auto &SG : make_early_inc_range(src.aliases())) {
+                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
+                if (dG && !dG->hasLocalLinkage()) {
                     if (!dG->isDeclaration()) { // aliases are always definitions, so this test is reversed from the above two
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     else {
-                        dG->replaceAllUsesWith(sG);
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
-                sG->removeFromParent();
-                dest.getAliasList().push_back(sG);
+                SG.removeFromParent();
+                dest.insertAlias(&SG);
             }
 
             // metadata nodes need to be explicitly merged not just copied
@@ -1790,37 +2509,11 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
     });
 }
 
-// optimize memory by turning long strings into memoized copies, instead of
-// making a copy per object file of output.
-void JuliaOJIT::shareStrings(Module &M)
-{
-    ++InternedGlobals;
-    std::vector<GlobalVariable*> erase;
-    for (auto &GV : M.globals()) {
-        if (!GV.hasInitializer() || !GV.isConstant())
-            continue;
-        ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
-        if (CDS == nullptr)
-            continue;
-        StringRef data = CDS->getRawDataValues();
-        if (data.size() > 16) { // only for long strings: keep short ones as values
-            Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8);
-            Constant *v = ConstantExpr::getIntToPtr(
-                ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()),
-                GV.getType());
-            GV.replaceAllUsesWith(v);
-            erase.push_back(&GV);
-        }
-    }
-    for (auto GV : erase)
-        GV->eraseFromParent();
-}
-
 //TargetMachine pass-through methods
 
 std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
 {
-    return std::unique_ptr<TargetMachine>(getTarget()
+    auto NewTM = std::unique_ptr<TargetMachine>(getTarget()
         .createTargetMachine(
             getTargetTriple().str(),
             getTargetCPU(),
@@ -1829,6 +2522,8 @@ std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
             TM->getRelocationModel(),
             TM->getCodeModel(),
             TM->getOptLevel()));
+    fixupTM(*NewTM);
+    return NewTM;
 }
 
 const Triple& JuliaOJIT::getTargetTriple() const {
@@ -1855,94 +2550,64 @@ static void jl_decorate_module(Module &M) {
     if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
         // Add special values used by debuginfo to build the UnwindData table registration for Win64
         // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
-        // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-        M.appendModuleInlineAsm("\
-    .section .text                  \n\
-    .type   __UnwindData,@object    \n\
-    .p2align        2, 0x90         \n\
-    __UnwindData:                   \n\
-        .zero   12                  \n\
-        .size   __UnwindData, 12    \n\
-                                    \n\
-        .type   __catchjmp,@object  \n\
-        .p2align        2, 0x90     \n\
-    __catchjmp:                     \n\
-        .zero   12                  \n\
-        .size   __catchjmp, 12");
-    }
-}
-
-// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        std::vector<orc::ThreadSafeModule*> &Stack)
-{
-    // First check if the TSM is empty (already compiled)
-    if (!M)
-        return 0;
-    // Next check and record if it is on the stack somewhere
-    {
-        auto &Id = Queued[&M];
-        if (Id)
-            return Id;
-        Stack.push_back(&M);
-        Id = Stack.size();
-    }
-    // Finally work out the SCC
-    int depth = Stack.size();
-    int MergeUp = depth;
-    std::vector<orc::ThreadSafeModule*> Children;
-    M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT {
-        for (auto &F : m.global_objects()) {
-            if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Callee = NewExports.find(F.getName());
-                if (Callee != NewExports.end()) {
-                    auto *CM = Callee->second;
-                    if (*CM && CM != &M) {
-                        auto Down = Queued.find(CM);
-                        if (Down != Queued.end())
-                            MergeUp = std::min(MergeUp, Down->second);
-                        else
-                            Children.push_back(CM);
-                    }
-                }
-            }
-        }
-    });
-    assert(MergeUp > 0);
-    for (auto *CM : Children) {
-        int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
-        assert(Down <= (int)Stack.size());
-        if (Down)
-            MergeUp = std::min(MergeUp, Down);
-    }
-    if (MergeUp < depth)
-        return MergeUp;
-    while (1) {
-        // Not in a cycle (or at the top of it)
-        // remove SCC state and merge every CM from the cycle into M
-        orc::ThreadSafeModule *CM = Stack.back();
-        auto it = Queued.find(CM);
-        assert(it->second == (int)Stack.size());
-        Queued.erase(it);
-        Stack.pop_back();
-        if ((int)Stack.size() < depth) {
-            assert(&M == CM);
-            break;
-        }
-        jl_merge_module(M, std::move(*CM));
+        // and with JITLink it became difficult to change the content afterwards, but we
+        // would prefer that this simple content wasn't recompiled in every single module,
+        // so we emit the necessary PLT trampoline as inline assembly.
+        // This is somewhat duplicated with the .pdata section, but we haven't been able to
+        // use that yet due to relocation issues.
+#define ASM_USES_ELF // use ELF or COFF syntax based on FORCE_ELF
+        StringRef inline_asm(
+    ".section"
+#if JL_LLVM_VERSION >= 180000
+        " .ltext,\"ax\",@progbits\n"
+#else
+        " .text\n"
+#endif
+    ".globl __julia_personality\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __UnwindData,@object\n"
+#else
+    ".def __UnwindData\n"
+    ".scl 2\n"
+    ".type 0\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__UnwindData:\n"
+    "  .byte 0x09;\n" // version info, UNW_FLAG_EHANDLER
+    "  .byte 4;\n"    // size of prolog (bytes)
+    "  .byte 2;\n"    // count of unwind codes (slots)
+    "  .byte 0x05;\n" // frame register (rbp) = rsp
+    "  .byte 4;\n"    // second instruction
+    "  .byte 0x03;\n" // mov RBP, RSP
+    "  .byte 1;\n"    // first instruction
+    "  .byte 0x50;\n" // push RBP
+    "  .int __catchjmp - "
+#if JL_LLVM_VERSION >= 180000
+    ".ltext;\n" // Section-relative offset (if using COFF and JITLink, this can be relative to __ImageBase instead, though then we could possibly use pdata/xdata directly then)
+#else
+    ".text;\n"
+#endif
+    ".size __UnwindData, 12\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __catchjmp,@function\n"
+#else
+    ".def __catchjmp\n"
+    ".scl 2\n"
+    ".type 32\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__catchjmp:\n"
+    "  movabsq $__julia_personality, %rax\n"
+    "  jmpq *%rax\n"
+    ".size __catchjmp, . - __catchjmp\n"
+    "\n");
+        M.appendModuleInlineAsm(inline_asm);
     }
-    jl_ExecutionEngine->addModule(std::move(M));
-    return 0;
-}
-
-static uint64_t getAddressForFunction(StringRef fname)
-{
-    auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
-    assert(addr);
-    return addr;
+#undef ASM_USES_ELF
 }
 
 // helper function for adding a DLLImport (dlsym) address to the execution engine
@@ -1956,3 +2621,9 @@ size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
 }
+
+// API for adding bytes to record being owned by the JIT
+void jl_jit_add_bytes(size_t bytes)
+{
+    jl_ExecutionEngine->addBytes(bytes);
+}
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 3aa3998d3ac23..7198c9b2f0210 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -1,6 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include <llvm/ADT/MapVector.h>
+#include <llvm/ADT/StringSet.h>
+#include <llvm/Support/AllocatorBase.h>
 
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Constants.h>
@@ -8,6 +10,7 @@
 #include <llvm/IR/Value.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassTimingInfo.h>
 
 #include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
 #include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
@@ -22,9 +25,11 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "platform.h"
-
+#include "llvm-codegen-shared.h"
+#include "llvm-version.h"
 #include <stack>
 #include <queue>
+#include <tuple>
 
 // As of LLVM 13, there are two runtime JIT linker implementations, the older
 // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
@@ -42,23 +47,19 @@
 // and feature support (e.g. Windows, JITEventListeners for various profilers,
 // etc.). Thus, we currently only use JITLink where absolutely required, that is,
 // for Mac/aarch64 and Linux/aarch64.
-// #define JL_FORCE_JITLINK
+//#define JL_FORCE_JITLINK
 
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
 # define HAS_SANITIZER
 #endif
 // The sanitizers don't play well with our memory manager
 
-#if defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER)
+#if defined(JL_FORCE_JITLINK) || defined(_CPU_AARCH64_) || defined(HAS_SANITIZER)
+# define JL_USE_JITLINK
+#endif
+
+#if defined(_CPU_RISCV64_)
 # define JL_USE_JITLINK
-#else
-# if defined(_CPU_AARCH64_)
-#  if defined(_OS_LINUX_) && JL_LLVM_VERSION < 150000
-#   pragma message("On aarch64-gnu-linux, LLVM version >= 15 is required for JITLink; fallback suffers from occasional segfaults")
-#  else
-#   define JL_USE_JITLINK
-#  endif
-# endif
 #endif
 
 # include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
@@ -67,51 +68,56 @@
 
 using namespace llvm;
 
-extern "C" jl_cgparams_t jl_default_cgparams;
-
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
 
 void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) JL_NOTSAFEPOINT;
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false) JL_NOTSAFEPOINT;
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel) JL_NOTSAFEPOINT;
 void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src) JL_NOTSAFEPOINT;
 GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) JL_NOTSAFEPOINT;
 DataLayout jl_create_datalayout(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-static inline bool imaging_default() JL_NOTSAFEPOINT {
-    return jl_options.image_codegen || (jl_generating_output() && (!jl_options.incremental || jl_options.use_pkgimages));
-}
-
 struct OptimizationOptions {
     bool lower_intrinsics;
     bool dump_native;
     bool external_use;
     bool llvm_only;
+    bool always_inline;
+    bool enable_early_simplifications;
+    bool enable_early_optimizations;
+    bool enable_scalar_optimizations;
+    bool enable_loop_optimizations;
+    bool enable_vector_pipeline;
+    bool remove_ni;
+    bool cleanup;
+    bool warn_missed_transformations;
 
     static constexpr OptimizationOptions defaults(
         bool lower_intrinsics=true,
         bool dump_native=false,
         bool external_use=false,
-        bool llvm_only=false) {
-        return {lower_intrinsics, dump_native, external_use, llvm_only};
+        bool llvm_only=false,
+        bool always_inline=true,
+        bool enable_early_simplifications=true,
+        bool enable_early_optimizations=true,
+        bool enable_scalar_optimizations=true,
+        bool enable_loop_optimizations=true,
+        bool enable_vector_pipeline=true,
+        bool remove_ni=true,
+        bool cleanup=true,
+        bool warn_missed_transformations=false) {
+        return {lower_intrinsics, dump_native, external_use, llvm_only,
+                always_inline, enable_early_simplifications,
+                enable_early_optimizations, enable_scalar_optimizations,
+                enable_loop_optimizations, enable_vector_pipeline,
+                remove_ni, cleanup, warn_missed_transformations};
     }
 };
 
-// LLVM's new pass manager is scheduled to replace the legacy pass manager
-// for middle-end IR optimizations.
-#if JL_LLVM_VERSION >= 150000
-#define JL_USE_NEW_PM
-#endif
-
 struct NewPM {
     std::unique_ptr<TargetMachine> TM;
-    StandardInstrumentations SI;
-    std::unique_ptr<PassInstrumentationCallbacks> PIC;
-    PassBuilder PB;
-    ModulePassManager MPM;
     OptimizationLevel O;
-
+    OptimizationOptions options;
+    TimePassesHandler TimePasses;
     NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()) JL_NOTSAFEPOINT;
     ~NewPM() JL_NOTSAFEPOINT;
 
@@ -141,11 +147,11 @@ struct jl_locked_stream {
         std::unique_lock<std::mutex> lck;
         ios_t *&stream;
 
-        lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT
+        lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER
             : lck(mutex), stream(stream) {}
         lock(lock&) = delete;
         lock(lock&&) JL_NOTSAFEPOINT = default;
-        ~lock() JL_NOTSAFEPOINT = default;
+        ~lock() JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT = default;
 
         ios_t *&operator*() JL_NOTSAFEPOINT {
             return stream;
@@ -164,18 +170,23 @@ struct jl_locked_stream {
         }
     };
 
-    jl_locked_stream() JL_NOTSAFEPOINT = default;
-    ~jl_locked_stream() JL_NOTSAFEPOINT = default;
+    jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER = default;
+    ~jl_locked_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
 
     lock operator*() JL_NOTSAFEPOINT {
         return lock(mutex, stream);
     }
 };
 
-typedef struct _jl_llvm_functions_t {
+struct jl_llvm_functions_t {
     std::string functionObject;     // jlcall llvm Function name
     std::string specFunctionObject; // specialized llvm Function name
-} jl_llvm_functions_t;
+    jl_llvm_functions_t() JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t &operator=(const jl_llvm_functions_t&) JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t(const jl_llvm_functions_t &) JL_NOTSAFEPOINT = default;
+    jl_llvm_functions_t(jl_llvm_functions_t &&) JL_NOTSAFEPOINT = default;
+    ~jl_llvm_functions_t() JL_NOTSAFEPOINT = default;
+};
 
 struct jl_returninfo_t {
     llvm::FunctionCallee decl;
@@ -193,9 +204,18 @@ struct jl_returninfo_t {
     unsigned return_roots;
 };
 
-typedef std::tuple<jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool> jl_codegen_call_target_t;
+struct jl_codegen_call_target_t {
+    jl_returninfo_t::CallingConv cc;
+    unsigned return_roots;
+    llvm::Function *decl;
+    llvm::Function *oc;
+    bool specsig;
+};
 
-typedef struct _jl_codegen_params_t {
+typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
+
+typedef std::list<std::tuple<std::string, std::string, unsigned int>> CallFrames;
+struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
     DataLayout DL;
@@ -206,12 +226,14 @@ typedef struct _jl_codegen_params_t {
     }
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
-    std::vector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>> workqueue;
-    std::map<void*, GlobalVariable*> globals;
+    jl_workqueue_t workqueue;
+    std::map<void*, GlobalVariable*> global_targets;
+    jl_array_t *temporary_roots = nullptr;
     std::map<std::tuple<jl_code_instance_t*,bool>, GlobalVariable*> external_fns;
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
     DenseMap<Constant*, GlobalVariable*> mergedConstants;
+    llvm::MapVector<jl_method_instance_t*, std::tuple<jl_method_instance_t*, CallFrames>> enqueuers;
     // Map from symbol name (in a certain library) to its GV in sysimg and the
     // DL handle address in the current session.
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
@@ -232,22 +254,31 @@ typedef struct _jl_codegen_params_t {
     std::unique_ptr<Module> _shared_module;
     inline Module &shared_module();
     // inputs
-    size_t world = 0;
     const jl_cgparams_t *params = &jl_default_cgparams;
     bool cache = false;
     bool external_linkage = false;
-    bool imaging;
-    int debug_level;
-    _jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple)
-        : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()),
-            DL(std::move(DL)), TargetTriple(std::move(triple)), imaging(imaging_default()) {}
-} jl_codegen_params_t;
+    bool imaging_mode;
+    bool use_swiftcc = true;
+    jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) JL_NOTSAFEPOINT  JL_NOTSAFEPOINT_ENTER
+      : tsctx(std::move(ctx)),
+        tsctx_lock(tsctx.getLock()),
+        DL(std::move(DL)),
+        TargetTriple(std::move(triple)),
+        imaging_mode(1)
+    {
+        // LLVM's RISC-V back-end currently does not support the Swift calling convention
+        if (TargetTriple.isRISCV())
+            use_swiftcc = false;
+    }
+    jl_codegen_params_t(jl_codegen_params_t &&) JL_NOTSAFEPOINT = default;
+    ~jl_codegen_params_t() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
+};
 
 jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &M,
         jl_method_instance_t *mi,
         jl_code_info_t *src,
-        jl_value_t *jlrettype,
+        jl_value_t *abi,
         jl_codegen_params_t &params);
 
 jl_llvm_functions_t jl_emit_codeinst(
@@ -261,17 +292,19 @@ enum CompilationPolicy {
     Extern = 1,
 };
 
-typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_workqueue_t;
-
-void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
-    jl_codegen_params_t &params,
-    CompilationPolicy policy);
-
 Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt,
     jl_codegen_params_t &params);
 
+Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params) JL_NOTSAFEPOINT;
+void emit_specsig_to_fptr1(
+        Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
+        size_t nargs,
+        jl_codegen_params_t &params,
+        Function *target) JL_NOTSAFEPOINT;
+Function *get_or_emit_fptr1(StringRef Name, Module *M) JL_NOTSAFEPOINT;
+void jl_init_function(Function *F, const Triple &TT) JL_NOTSAFEPOINT;
+
 void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT;
 
 static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT
@@ -291,22 +324,61 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL
     return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope";
 }
 
-typedef JITSymbol JL_JITSymbol;
-// The type that is similar to SymbolInfo on LLVM 4.0 is actually
-// `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
-// is expected.
-typedef JITSymbol JL_SymbolInfo;
+template <size_t offset = 0>
+class MaxAlignedAllocImpl
+    : public AllocatorBase<MaxAlignedAllocImpl<offset>> {
+
+public:
+    MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default;
+
+    static Align alignment(size_t Size) JL_NOTSAFEPOINT {
+        // Define the maximum alignment we expect to require, from offset bytes off
+        // the returned pointer, this is >= alignof(std::max_align_t), which is too
+        // small often to actually use.
+        const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT;
+        if (Size <= offset)
+            return Align(1);
+        return Align(std::min((size_t)llvm::PowerOf2Ceil(Size - offset), MaxAlignment));
+    }
+
+    LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
+        Align MaxAlign = alignment(Size);
+        assert(Alignment < MaxAlign); (void)Alignment;
+        return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
+    }
+
+    inline LLVM_ATTRIBUTE_RETURNS_NONNULL
+    void * Allocate(size_t Size, size_t Alignment) {
+        return Allocate(Size, Align(Alignment));
+    }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Allocate;
+
+    void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Deallocate;
+
+private:
+};
+using MaxAlignedAlloc = MaxAlignedAllocImpl<>;
 
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
 using OptimizerResultT = Expected<orc::ThreadSafeModule>;
+using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryTy)>>;
 
 class JuliaOJIT {
+private:
+    // any verification the user wants to do when adding an OwningResource to the pool
+    template <typename AnyT>
+    static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { }
+    static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); }
 public:
 #ifdef JL_USE_JITLINK
     typedef orc::ObjectLinkingLayer ObjLayerT;
 #else
     typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
-#endif
     struct LockLayerT : public orc::ObjectLayer {
 
         LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {}
@@ -314,23 +386,27 @@ class JuliaOJIT {
 
         void emit(std::unique_ptr<orc::MaterializationResponsibility> R,
                             std::unique_ptr<MemoryBuffer> O) override {
+            JL_TIMING(LLVM_JIT, JIT_Link);
 #ifndef JL_USE_JITLINK
-            std::lock_guard<std::mutex> lock(EmissionMutex);
+            std::lock_guard<std::recursive_mutex> lock(EmissionMutex);
 #endif
             BaseLayer.emit(std::move(R), std::move(O));
         }
     private:
         orc::ObjectLayer &BaseLayer;
-        std::mutex EmissionMutex;
+        std::recursive_mutex EmissionMutex;
     };
+#endif
     typedef orc::IRCompileLayer CompileLayerT;
+    typedef orc::IRTransformLayer JITPointersLayerT;
     typedef orc::IRTransformLayer OptimizeLayerT;
+    typedef orc::IRTransformLayer OptSelLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
     template
     <typename ResourceT, size_t max = 0,
         typename BackingT = std::stack<ResourceT,
             std::conditional_t<max == 0,
-                SmallVector<ResourceT>,
+                SmallVector<ResourceT, 0>,
                 SmallVector<ResourceT, max>
             >
         >
@@ -347,11 +423,16 @@ class JuliaOJIT {
                 : pool(pool), resource(std::move(resource)) {}
             OwningResource(const OwningResource &) = delete;
             OwningResource &operator=(const OwningResource &) = delete;
-            OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default;
+            OwningResource(OwningResource &&other) JL_NOTSAFEPOINT
+                : pool(other.pool), resource(std::move(other.resource)) {
+                    other.resource.reset();
+                }
             OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default;
             ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE
-                if (resource)
+                if (resource) {
+                    verifyResource(*resource);
                     pool.release(std::move(*resource));
+                }
             }
             ResourceT release() JL_NOTSAFEPOINT {
                 ResourceT res(std::move(*resource));
@@ -384,7 +465,7 @@ class JuliaOJIT {
             }
             private:
             ResourcePool &pool;
-            llvm::Optional<ResourceT> resource;
+            std::optional<ResourceT> resource;
         };
 
         OwningResource operator*() JL_NOTSAFEPOINT {
@@ -436,35 +517,16 @@ class JuliaOJIT {
 
         std::unique_ptr<WNMutex> mutex;
     };
-    struct PipelineT {
-        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
-        CompileLayerT CompileLayer;
-        OptimizeLayerT OptimizeLayer;
-    };
 
-    struct OptSelLayerT : orc::IRLayer {
+    typedef ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPoolT;
 
-        template<size_t N>
-        OptSelLayerT(const std::array<std::unique_ptr<PipelineT>, N> &optimizers) JL_NOTSAFEPOINT
-            : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(),
-                optimizers[0]->OptimizeLayer.getManglingOptions()),
-            optimizers(optimizers.data()),
-            count(N) {
-            static_assert(N > 0, "Expected array with at least one optimizer!");
-        }
-        ~OptSelLayerT() JL_NOTSAFEPOINT = default;
+    struct DLSymOptimizer;
+    struct OptimizerT;
+    struct JITPointersT;
 
-        void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
-
-        private:
-        const std::unique_ptr<PipelineT> * const optimizers;
-        size_t count;
-    };
-
-private:
-    // Custom object emission notification handler for the JuliaOJIT
-    template <typename ObjT, typename LoadResult>
-    void registerObject(const ObjT &Obj, const LoadResult &LO);
+#ifndef JL_USE_JITLINK
+    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
+#endif
 
 public:
 
@@ -472,39 +534,31 @@ class JuliaOJIT {
     ~JuliaOJIT() JL_NOTSAFEPOINT;
 
     void enableJITDebuggingSupport() JL_NOTSAFEPOINT;
-#ifndef JL_USE_JITLINK
-    // JITLink doesn't support old JITEventListeners (yet).
-    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
-#endif
+    void enableIntelJITEventListener() JL_NOTSAFEPOINT;
+    void enableOProfileJITEventListener() JL_NOTSAFEPOINT;
+    void enablePerfJITEventListener() JL_NOTSAFEPOINT;
 
     orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT;
     void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT;
-    void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT;
+    void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
 
     //Methods for the C API
     Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
                             bool ShouldOptimize = false) JL_NOTSAFEPOINT;
     Error addObjectFile(orc::JITDylib &JD,
                         std::unique_ptr<MemoryBuffer> Obj) JL_NOTSAFEPOINT;
-    Expected<JITEvaluatedSymbol> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
-    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; };
+    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return CompileLayer; };
     orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; }
     orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; }
 
-    JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
-    JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    Expected<llvm::orc::ExecutorSymbolDef> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    SmallVector<uint64_t> findSymbols(ArrayRef<StringRef> Names) JL_NOTSAFEPOINT;
     uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT;
     uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT;
-    StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
-    auto getContext() JL_NOTSAFEPOINT {
-        return *ContextPool;
-    }
-    orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER?
-        return ContextPool.acquire();
-    }
-    void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE?
-        ContextPool.release(std::move(ctx));
-    }
+    StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
+    orc::ThreadSafeContext makeContext() JL_NOTSAFEPOINT;
     const DataLayout& getDataLayout() const JL_NOTSAFEPOINT;
 
     // TargetMachine pass-through methods
@@ -517,21 +571,26 @@ class JuliaOJIT {
     TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
 
     size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void addBytes(size_t bytes) JL_NOTSAFEPOINT;
     void printTimers() JL_NOTSAFEPOINT;
 
-    jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_emitted_mi_name_stream;
     }
-    jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_compiles_stream;
     }
-    jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT {
+    jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
         return dump_llvm_opt_stream;
     }
-private:
     std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT;
     std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT;
-    void shareStrings(Module &M) JL_NOTSAFEPOINT;
+
+    // Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls
+    // but may be called from inside safe-regions due to jit compilation locks
+    void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
+
+private:
 
     const std::unique_ptr<TargetMachine> TM;
     const DataLayout DL;
@@ -544,66 +603,57 @@ class JuliaOJIT {
     std::mutex RLST_mutex{};
     int RLST_inc = 0;
     DenseMap<void*, std::string> ReverseLocalSymbolTable;
+    SharedBytesT SharedBytes;
+
+    std::unique_ptr<DLSymOptimizer> DLSymOpt;
 
     //Compilation streams
     jl_locked_stream dump_emitted_mi_name_stream;
     jl_locked_stream dump_compiles_stream;
     jl_locked_stream dump_llvm_opt_stream;
 
-    std::vector<std::function<void()>> PrintLLVMTimers;
-
-    ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
+    std::mutex llvm_printing_mutex{};
+    SmallVector<std::function<void()>, 0> PrintLLVMTimers;
 
-#ifndef JL_USE_JITLINK
-    const std::shared_ptr<RTDyldMemoryManager> MemMgr;
-#else
-    std::atomic<size_t> total_size{0};
+    _Atomic(size_t) jit_bytes_size{0};
+    _Atomic(size_t) jitcounter{0};
+#ifdef JL_USE_JITLINK
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
-#endif
     ObjLayerT ObjectLayer;
-    LockLayerT LockLayer;
-    const std::array<std::unique_ptr<PipelineT>, 4> Pipelines;
+#else
+    const std::shared_ptr<RTDyldMemoryManager> MemMgr; // shared_ptr protected by LockLayerT.EmissionMutex
+    ObjLayerT UnlockedObjectLayer;
+    LockLayerT ObjectLayer;
+#endif
+    CompileLayerT CompileLayer;
+    std::unique_ptr<JITPointersT> JITPointers;
+    JITPointersLayerT JITPointersLayer;
+    std::unique_ptr<OptimizerT> Optimizers;
+    OptimizeLayerT OptimizeLayer;
     OptSelLayerT OptSelLayer;
-    CompileLayerT ExternalCompileLayer;
-
 };
 extern JuliaOJIT *jl_ExecutionEngine;
-std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT;
-inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT {
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT;
+inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT {
     auto lock = ctx.getLock();
-    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), imaging_mode, DL, triple), ctx);
+    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx);
 }
 
 Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {
     if (!_shared_module) {
-        _shared_module = jl_create_llvm_module("globals", getContext(), imaging, DL, TargetTriple);
+        _shared_module = jl_create_llvm_module("globals", getContext(), DL, TargetTriple);
     }
     return *_shared_module;
 }
+void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-Pass *createLowerPTLSPass(bool imaging_mode) JL_NOTSAFEPOINT;
-Pass *createCombineMulAddPass() JL_NOTSAFEPOINT;
-Pass *createFinalLowerGCPass() JL_NOTSAFEPOINT;
-Pass *createLateLowerGCFramePass() JL_NOTSAFEPOINT;
-Pass *createLowerExcHandlersPass() JL_NOTSAFEPOINT;
-Pass *createGCInvariantVerifierPass(bool Strong) JL_NOTSAFEPOINT;
-Pass *createPropagateJuliaAddrspaces() JL_NOTSAFEPOINT;
-Pass *createRemoveJuliaAddrspacesPass() JL_NOTSAFEPOINT;
-Pass *createRemoveNIPass() JL_NOTSAFEPOINT;
-Pass *createJuliaLICMPass() JL_NOTSAFEPOINT;
-Pass *createMultiVersioningPass(bool external_use) JL_NOTSAFEPOINT;
-Pass *createAllocOptPass() JL_NOTSAFEPOINT;
-Pass *createDemoteFloat16Pass() JL_NOTSAFEPOINT;
-Pass *createCPUFeaturesPass() JL_NOTSAFEPOINT;
-Pass *createLowerSimdLoopPass() JL_NOTSAFEPOINT;
+void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
 
 // NewPM
 #include "passes.h"
 
-// Whether the Function is an llvm or julia intrinsic.
-static inline bool isIntrinsicFunction(Function *F) JL_NOTSAFEPOINT
-{
-    return F->isIntrinsic() || F->getName().startswith("julia.");
-}
-
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#endif
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index 092a48be81930..62acce6ce1d65 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -4,8 +4,12 @@
 #define JL_EXPORTED_DATA_POINTERS(XX) \
     XX(jl_abstractarray_type) \
     XX(jl_abstractstring_type) \
+    XX(jl_addrspace_type) \
+    XX(jl_addrspace_typename) \
+    XX(jl_addrspacecore_type) \
     XX(jl_an_empty_string) \
     XX(jl_an_empty_vec_any) \
+    XX(jl_an_empty_memory_any) \
     XX(jl_anytuple_type) \
     XX(jl_anytuple_type_type) \
     XX(jl_any_type) \
@@ -17,6 +21,7 @@
     XX(jl_array_type) \
     XX(jl_array_typename) \
     XX(jl_array_uint8_type) \
+    XX(jl_array_uint32_type) \
     XX(jl_array_uint64_type) \
     XX(jl_atomicerror_type) \
     XX(jl_base_module) \
@@ -30,6 +35,7 @@
     XX(jl_const_type) \
     XX(jl_core_module) \
     XX(jl_datatype_type) \
+    XX(jl_debuginfo_type) \
     XX(jl_densearray_type) \
     XX(jl_diverror_exception) \
     XX(jl_emptysvec) \
@@ -41,11 +47,14 @@
     XX(jl_float16_type) \
     XX(jl_float32_type) \
     XX(jl_float64_type) \
+    XX(jl_bfloat16_type) \
     XX(jl_floatingpoint_type) \
     XX(jl_function_type) \
     XX(jl_binding_type) \
+    XX(jl_binding_partition_type) \
     XX(jl_globalref_type) \
     XX(jl_gotoifnot_type) \
+    XX(jl_enternode_type) \
     XX(jl_gotonode_type) \
     XX(jl_initerror_type) \
     XX(jl_int16_type) \
@@ -62,12 +71,24 @@
     XX(jl_llvmpointer_typename) \
     XX(jl_loaderror_type) \
     XX(jl_main_module) \
+    XX(jl_memory_any_type) \
     XX(jl_memory_exception) \
+    XX(jl_genericmemory_type) \
+    XX(jl_genericmemory_typename) \
+    XX(jl_memory_uint8_type) \
+    XX(jl_memory_uint16_type) \
+    XX(jl_memory_uint32_type) \
+    XX(jl_memory_uint64_type) \
+    XX(jl_memoryref_any_type) \
+    XX(jl_genericmemoryref_type) \
+    XX(jl_genericmemoryref_typename) \
+    XX(jl_memoryref_uint8_type) \
     XX(jl_methoderror_type) \
     XX(jl_method_instance_type) \
     XX(jl_method_match_type) \
     XX(jl_method_type) \
     XX(jl_methtable_type) \
+    XX(jl_missingcodeerror_type) \
     XX(jl_module_type) \
     XX(jl_n_threads_per_pool) \
     XX(jl_namedtuple_type) \
@@ -95,6 +116,7 @@
     XX(jl_simplevector_type) \
     XX(jl_slotnumber_type) \
     XX(jl_ssavalue_type) \
+    XX(jl_abioverride_type) \
     XX(jl_stackovf_exception) \
     XX(jl_string_type) \
     XX(jl_symbol_type) \
@@ -118,6 +140,7 @@
     XX(jl_uint8_type) \
     XX(jl_undefref_exception) \
     XX(jl_undefvarerror_type) \
+    XX(jl_fielderror_type) \
     XX(jl_unionall_type) \
     XX(jl_uniontype_type) \
     XX(jl_upsilonnode_type) \
@@ -126,6 +149,9 @@
     XX(jl_voidpointer_type) \
     XX(jl_void_type) \
     XX(jl_weakref_type) \
+    XX(jl_libdl_module) \
+    XX(jl_libdl_dlopen_func) \
+    XX(jl_precompilable_error) \
 
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index fd824131bdbda..b92380df7a49c 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -8,6 +8,7 @@
     XX(jl_alloc_array_1d) \
     XX(jl_alloc_array_2d) \
     XX(jl_alloc_array_3d) \
+    XX(jl_alloc_array_nd) \
     XX(jl_alloc_string) \
     XX(jl_alloc_svec) \
     XX(jl_alloc_svec_uninit) \
@@ -21,36 +22,23 @@
     XX(jl_apply_type1) \
     XX(jl_apply_type2) \
     XX(jl_argument_datatype) \
-    XX(jl_arraylen) \
-    XX(jl_arrayref) \
-    XX(jl_arrayset) \
-    XX(jl_arrayunset) \
-    XX(jl_array_cconvert_cstring) \
-    XX(jl_array_copy) \
-    XX(jl_array_del_at) \
-    XX(jl_array_del_beg) \
     XX(jl_array_del_end) \
     XX(jl_array_eltype) \
-    XX(jl_array_grow_at) \
-    XX(jl_array_grow_beg) \
     XX(jl_array_grow_end) \
-    XX(jl_array_isassigned) \
     XX(jl_array_ptr) \
     XX(jl_array_ptr_1d_append) \
     XX(jl_array_ptr_1d_push) \
-    XX(jl_array_ptr_copy) \
+    XX(jl_genericmemory_owner) \
+    XX(jl_genericmemoryref) \
     XX(jl_array_rank) \
-    XX(jl_array_size) \
-    XX(jl_array_sizehint) \
     XX(jl_array_to_string) \
-    XX(jl_array_typetagdata) \
-    XX(jl_array_validate_dims) \
     XX(jl_atexit_hook) \
     XX(jl_atomic_bool_cmpswap_bits) \
     XX(jl_atomic_cmpswap_bits) \
     XX(jl_atomic_error) \
     XX(jl_atomic_new_bits) \
     XX(jl_atomic_store_bits) \
+    XX(jl_atomic_storeonce_bits) \
     XX(jl_atomic_swap_bits) \
     XX(jl_backtrace_from_here) \
     XX(jl_base_relative_to) \
@@ -109,7 +97,6 @@
     XX(jl_cstr_to_string) \
     XX(jl_current_exception) \
     XX(jl_debug_method_invalidation) \
-    XX(jl_declare_constant) \
     XX(jl_defines_or_exports_p) \
     XX(jl_deprecate_binding) \
     XX(jl_dlclose) \
@@ -121,10 +108,10 @@
     XX(jl_egal__bits) \
     XX(jl_egal__bitstag) \
     XX(jl_eh_restore_state) \
+    XX(jl_eh_restore_state_noexcept) \
     XX(jl_enter_handler) \
     XX(jl_enter_threaded_region) \
     XX(jl_environ) \
-    XX(jl_eof_error) \
     XX(jl_eqtable_get) \
     XX(jl_eqtable_pop) \
     XX(jl_eqtable_put) \
@@ -152,10 +139,6 @@
     XX(jl_gc_add_ptr_finalizer) \
     XX(jl_gc_add_quiescent) \
     XX(jl_gc_allocobj) \
-    XX(jl_gc_alloc_0w) \
-    XX(jl_gc_alloc_1w) \
-    XX(jl_gc_alloc_2w) \
-    XX(jl_gc_alloc_3w) \
     XX(jl_gc_alloc_typed) \
     XX(jl_gc_big_alloc) \
     XX(jl_gc_collect) \
@@ -175,22 +158,23 @@
     XX(jl_gc_internal_obj_base_ptr) \
     XX(jl_gc_is_enabled) \
     XX(jl_gc_is_in_finalizer) \
+    XX(jl_gc_pool_live_bytes) \
     XX(jl_gc_live_bytes) \
     XX(jl_gc_managed_malloc) \
-    XX(jl_gc_managed_realloc) \
     XX(jl_gc_mark_queue_obj) \
     XX(jl_gc_mark_queue_objarray) \
     XX(jl_gc_max_internal_obj_size) \
     XX(jl_gc_new_weakref) \
     XX(jl_gc_new_weakref_th) \
     XX(jl_gc_num) \
-    XX(jl_gc_pool_alloc) \
+    XX(jl_gc_small_alloc) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
     XX(jl_gc_safepoint) \
     XX(jl_gc_schedule_foreign_sweepfunc) \
     XX(jl_gc_set_cb_notify_external_alloc) \
     XX(jl_gc_set_cb_notify_external_free) \
+    XX(jl_gc_set_cb_notify_gc_pressure) \
     XX(jl_gc_set_cb_post_gc) \
     XX(jl_gc_set_cb_pre_gc) \
     XX(jl_gc_set_cb_root_scanner) \
@@ -200,8 +184,9 @@
     XX(jl_gc_total_hrtime) \
     XX(jl_gdblookup) \
     XX(jl_generating_output) \
-    XX(jl_generic_function_def) \
+    XX(jl_declare_const_gf) \
     XX(jl_gensym) \
+    XX(jl_getaffinity) \
     XX(jl_getallocationgranularity) \
     XX(jl_getnameinfo) \
     XX(jl_getpagesize) \
@@ -212,6 +197,8 @@
     XX(jl_get_binding_or_error) \
     XX(jl_get_binding_wr) \
     XX(jl_get_cpu_name) \
+    XX(jl_get_cpu_features) \
+    XX(jl_cpu_has_fma) \
     XX(jl_get_current_task) \
     XX(jl_get_default_sysimg_path) \
     XX(jl_get_excstack) \
@@ -222,7 +209,6 @@
     XX(jl_get_JIT) \
     XX(jl_get_julia_bin) \
     XX(jl_get_julia_bindir) \
-    XX(jl_get_method_inferred) \
     XX(jl_get_module_compile) \
     XX(jl_get_module_infer) \
     XX(jl_get_module_of_binding) \
@@ -243,9 +229,9 @@
     XX(jl_get_world_counter) \
     XX(jl_get_zero_subnormals) \
     XX(jl_gf_invoke_lookup) \
+    XX(jl_method_lookup_by_tt) \
+    XX(jl_method_lookup) \
     XX(jl_gf_invoke_lookup_worlds) \
-    XX(jl_git_branch) \
-    XX(jl_git_commit) \
     XX(jl_global_event_loop) \
     XX(jl_has_empty_intersection) \
     XX(jl_has_free_typevars) \
@@ -254,7 +240,6 @@
     XX(jl_has_typevar_from_unionall) \
     XX(jl_hrtime) \
     XX(jl_idtable_rehash) \
-    XX(jl_infer_thunk) \
     XX(jl_init) \
     XX(jl_init_options) \
     XX(jl_init_restored_module) \
@@ -274,7 +259,6 @@
     XX(jl_ios_buffer_n) \
     XX(jl_ios_fd) \
     XX(jl_ios_get_nbyte_int) \
-    XX(jl_ir_flag_inferred) \
     XX(jl_ir_flag_has_fcall) \
     XX(jl_ir_flag_inlining) \
     XX(jl_ir_inlining_cost) \
@@ -287,6 +271,7 @@
     XX(jl_is_binding_deprecated) \
     XX(jl_is_char_signed) \
     XX(jl_is_const) \
+    XX(jl_is_assertsbuild) \
     XX(jl_is_debugbuild) \
     XX(jl_is_foreign_type) \
     XX(jl_is_identifier) \
@@ -324,20 +309,21 @@
     XX(jl_methtable_lookup) \
     XX(jl_mi_cache_insert) \
     XX(jl_module_build_id) \
-    XX(jl_module_export) \
     XX(jl_module_exports_p) \
     XX(jl_module_globalref) \
     XX(jl_module_import) \
     XX(jl_module_name) \
     XX(jl_module_names) \
     XX(jl_module_parent) \
+    XX(jl_module_getloc) \
+    XX(jl_module_public) \
+    XX(jl_module_public_p) \
     XX(jl_module_use) \
     XX(jl_module_using) \
     XX(jl_module_usings) \
     XX(jl_module_uuid) \
     XX(jl_native_alignment) \
     XX(jl_nb_available) \
-    XX(jl_new_array) \
     XX(jl_new_bits) \
     XX(jl_new_codeinst) \
     XX(jl_new_code_info_uninit) \
@@ -358,6 +344,8 @@
     XX(jl_new_typevar) \
     XX(jl_next_from_addrinfo) \
     XX(jl_normalize_to_compilable_sig) \
+    XX(jl_method_match_to_mi) \
+    XX(jl_get_unspecialized) \
     XX(jl_no_exc_handler) \
     XX(jl_object_id) \
     XX(jl_object_id_) \
@@ -374,6 +362,7 @@
     XX(jl_pointerref) \
     XX(jl_pointerset) \
     XX(jl_pop_handler) \
+    XX(jl_pop_handler_noexcept) \
     XX(jl_preload_sysimg_so) \
     XX(jl_prepend_cwd) \
     XX(jl_printf) \
@@ -388,19 +377,18 @@
     XX(jl_profile_maxlen_data) \
     XX(jl_profile_start_timer) \
     XX(jl_profile_stop_timer) \
-    XX(jl_ptrarrayref) \
     XX(jl_ptr_to_array) \
     XX(jl_ptr_to_array_1d) \
     XX(jl_queue_work) \
     XX(jl_raise_debugger) \
     XX(jl_readuntil) \
     XX(jl_cache_flags) \
+    XX(jl_match_cache_flags_current) \
     XX(jl_match_cache_flags) \
     XX(jl_read_verify_header) \
     XX(jl_realloc) \
     XX(jl_register_newmeth_tracer) \
-    XX(jl_reshape_array) \
-    XX(jl_resolve_globals_in_ir) \
+    XX(jl_resolve_definition_effects_in_ir) \
     XX(jl_restore_excstack) \
     XX(jl_restore_incremental) \
     XX(jl_restore_package_image_from_file) \
@@ -410,7 +398,10 @@
     XX(jl_rethrow_other) \
     XX(jl_running_on_valgrind) \
     XX(jl_safe_printf) \
+    XX(jl_safepoint_suspend_thread) \
+    XX(jl_safepoint_resume_thread) \
     XX(jl_SC_CLK_TCK) \
+    XX(jl_setaffinity) \
     XX(jl_set_ARGS) \
     XX(jl_set_const) \
     XX(jl_set_errno) \
@@ -431,7 +422,6 @@
     XX(jl_set_zero_subnormals) \
     XX(jl_sigatomic_begin) \
     XX(jl_sigatomic_end) \
-    XX(jl_sig_throw) \
     XX(jl_spawn) \
     XX(jl_specializations_get_linfo) \
     XX(jl_specializations_lookup) \
@@ -440,11 +430,9 @@
     XX(jl_stderr_obj) \
     XX(jl_stderr_stream) \
     XX(jl_stdin_stream) \
-    XX(jl_stdout_obj) \
     XX(jl_stdout_stream) \
     XX(jl_stored_inline) \
     XX(jl_string_ptr) \
-    XX(jl_string_to_array) \
     XX(jl_subtype) \
     XX(jl_subtype_env) \
     XX(jl_subtype_env_size) \
@@ -453,7 +441,6 @@
     XX(jl_svec2) \
     XX(jl_svec_copy) \
     XX(jl_svec_fill) \
-    XX(jl_svec_ref) \
     XX(jl_switch) \
     XX(jl_switchto) \
     XX(jl_symbol) \
@@ -467,6 +454,8 @@
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threadpoolid) \
+    XX(jl_get_ptls_rng) \
+    XX(jl_set_ptls_rng) \
     XX(jl_throw) \
     XX(jl_throw_out_of_memory_error) \
     XX(jl_too_few_args) \
@@ -477,8 +466,6 @@
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
     XX(jl_typeassert) \
-    XX(jl_typeinf_lock_begin) \
-    XX(jl_typeinf_lock_end) \
     XX(jl_typeinf_timing_begin) \
     XX(jl_typeinf_timing_end) \
     XX(jl_typename_str) \
@@ -489,7 +476,7 @@
     XX(jl_type_intersection) \
     XX(jl_type_intersection_with_env) \
     XX(jl_type_morespecific) \
-    XX(jl_type_morespecific_no_subtype) \
+    XX(jl_method_morespecific) \
     XX(jl_type_union) \
     XX(jl_type_unionall) \
     XX(jl_unbox_bool) \
@@ -509,6 +496,8 @@
     XX(jl_uncompress_argname_n) \
     XX(jl_uncompress_ir) \
     XX(jl_undefined_var_error) \
+    XX(jl_unwrap_unionall) \
+    XX(jl_has_no_field_error) \
     XX(jl_value_ptr) \
     XX(jl_ver_is_release) \
     XX(jl_ver_major) \
@@ -519,7 +508,6 @@
     XX(jl_vprintf) \
     XX(jl_wakeup_thread) \
     XX(jl_write_compiler_output) \
-    XX(jl_yield) \
 
 #define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \
     XX(jl_setjmp) \
@@ -528,6 +516,7 @@
 #define JL_CODEGEN_EXPORTED_FUNCS(YY) \
     YY(jl_dump_function_ir) \
     YY(jl_dump_method_asm) \
+    YY(jl_emit_codeinst_to_jit) \
     YY(jl_extern_c) \
     YY(jl_get_llvmf_defn) \
     YY(jl_get_llvm_function) \
@@ -536,15 +525,15 @@
     YY(jl_dump_native) \
     YY(jl_get_llvm_gvs) \
     YY(jl_get_llvm_external_fns) \
+    YY(jl_get_llvm_mis) \
     YY(jl_dump_function_asm) \
     YY(jl_LLVMCreateDisasm) \
     YY(jl_LLVMDisasmInstruction) \
     YY(jl_init_codegen) \
     YY(jl_getFunctionInfo) \
     YY(jl_register_fptrs) \
-    YY(jl_generate_fptr) \
     YY(jl_generate_fptr_for_unspecialized) \
-    YY(jl_generate_fptr_for_oc_wrapper) \
+    YY(jl_compile_codeinst) \
     YY(jl_compile_extern_c) \
     YY(jl_teardown_codegen) \
     YY(jl_jit_total_bytes) \
@@ -553,44 +542,12 @@
     YY(jl_dump_emitted_mi_name) \
     YY(jl_dump_llvm_opt) \
     YY(jl_dump_fptr_asm) \
+    YY(jl_emit_native) \
     YY(jl_get_function_id) \
     YY(jl_type_to_llvm) \
     YY(jl_getUnwindInfo) \
     YY(jl_get_libllvm) \
-    YY(jl_add_optimization_passes) \
-    YY(jl_build_newpm_pipeline) \
     YY(jl_register_passbuilder_callbacks) \
-    YY(LLVMExtraAddLowerSimdLoopPass) \
-    YY(LLVMExtraAddFinalLowerGCPass) \
-    YY(LLVMExtraAddPropagateJuliaAddrspaces) \
-    YY(LLVMExtraAddRemoveJuliaAddrspacesPass) \
-    YY(LLVMExtraAddCombineMulAddPass) \
-    YY(LLVMExtraAddMultiVersioningPass) \
-    YY(LLVMExtraAddLowerExcHandlersPass) \
-    YY(LLVMExtraAddLateLowerGCFramePass) \
-    YY(LLVMExtraJuliaLICMPass) \
-    YY(LLVMExtraAddAllocOptPass) \
-    YY(LLVMExtraAddLowerPTLSPass) \
-    YY(LLVMExtraAddRemoveNIPass) \
-    YY(LLVMExtraAddGCInvariantVerifierPass) \
-    YY(LLVMExtraAddDemoteFloat16Pass) \
-    YY(LLVMExtraAddCPUFeaturesPass) \
-    YY(LLVMExtraMPMAddCPUFeaturesPass) \
-    YY(LLVMExtraMPMAddRemoveNIPass) \
-    YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
-    YY(LLVMExtraMPMAddFinalLowerGCPass) \
-    YY(LLVMExtraMPMAddMultiVersioningPass) \
-    YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
-    YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
-    YY(LLVMExtraMPMAddLowerPTLSPass) \
-    YY(LLVMExtraFPMAddDemoteFloat16Pass) \
-    YY(LLVMExtraFPMAddCombineMulAddPass) \
-    YY(LLVMExtraFPMAddLateLowerGCPass) \
-    YY(LLVMExtraFPMAddAllocOptPass) \
-    YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \
-    YY(LLVMExtraFPMAddLowerExcHandlersPass) \
-    YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
-    YY(LLVMExtraLPMAddJuliaLICMPass) \
     YY(JLJITGetLLVMOrcExecutionSession) \
     YY(JLJITGetJuliaOJIT) \
     YY(JLJITGetExternalJITDylib) \
diff --git a/src/jl_uv.c b/src/jl_uv.c
index 281dd798dbb36..3498952622dce 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -39,21 +39,24 @@ static void walk_print_cb(uv_handle_t *h, void *arg)
     const char *type = uv_handle_type_name(h->type);
     if (!type)
         type = "<unknown>";
+    size_t resource_id; // fits an int or pid_t on Unix, HANDLE or PID on Windows
     uv_os_fd_t fd;
     if (h->type == UV_PROCESS)
-        fd = uv_process_get_pid((uv_process_t*)h);
-    else if (uv_fileno(h, &fd))
-        fd = (uv_os_fd_t)-1;
+        resource_id = (size_t)uv_process_get_pid((uv_process_t*)h);
+    else if (uv_fileno(h, &fd) == 0)
+        resource_id = (size_t)fd;
+    else
+        resource_id = -1;
     const char *pad = "                "; // 16 spaces
-    int npad = fd == -1 ? 0 : snprintf(NULL, 0, "%zd", (size_t)fd);
+    int npad = resource_id == -1 ? 0 : snprintf(NULL, 0, "%zd", resource_id);
     if (npad < 0)
         npad = 0;
     npad += strlen(type);
     pad += npad < strlen(pad) ? npad : strlen(pad);
-    if (fd == -1)
-        jl_safe_printf(" %s   %s@%p->%p\n", type,             pad, (void*)h, (void*)h->data);
+    if (resource_id == -1)
+        jl_safe_printf(" %s   %s%p->%p\n", type,             pad, (void*)h, (void*)h->data);
     else
-        jl_safe_printf(" %s[%zd] %s@%p->%p\n", type, (size_t)fd, pad, (void*)h, (void*)h->data);
+        jl_safe_printf(" %s[%zd] %s%p->%p\n", type, resource_id, pad, (void*)h, (void*)h->data);
 }
 
 static void wait_empty_func(uv_timer_t *t)
@@ -63,32 +66,38 @@ static void wait_empty_func(uv_timer_t *t)
     if (!uv_loop_alive(t->loop))
         return;
     jl_safe_printf("\n[pid %zd] waiting for IO to finish:\n"
-                   " TYPE[FD/PID]       @UV_HANDLE_T->DATA\n",
+                   " Handle type        uv_handle_t->data\n",
                    (size_t)uv_os_getpid());
     uv_walk(jl_io_loop, walk_print_cb, NULL);
+    if (jl_generating_output() && jl_options.incremental) {
+        jl_safe_printf("This means that a package has started a background task or event source that has not finished running. For precompilation to complete successfully, the event source needs to be closed explicitly. See the developer documentation on fixing precompilation hangs for more help.\n");
+    }
     jl_gc_collect(JL_GC_FULL);
 }
 
 void jl_wait_empty_begin(void)
 {
     JL_UV_LOCK();
-    if (wait_empty_worker.type != UV_TIMER && jl_io_loop) {
-        // try to purge anything that is just waiting for cleanup
-        jl_io_loop->stop_flag = 0;
-        uv_run(jl_io_loop, UV_RUN_NOWAIT);
-        uv_timer_init(jl_io_loop, &wait_empty_worker);
+    if (jl_io_loop) {
+        if (wait_empty_worker.type != UV_TIMER) {
+            // try to purge anything that is just waiting for cleanup
+            jl_io_loop->stop_flag = 0;
+            uv_run(jl_io_loop, UV_RUN_NOWAIT);
+            uv_timer_init(jl_io_loop, &wait_empty_worker);
+            uv_unref((uv_handle_t*)&wait_empty_worker);
+        }
+        // make sure this is running
         uv_update_time(jl_io_loop);
         uv_timer_start(&wait_empty_worker, wait_empty_func, 10, 15000);
-        uv_unref((uv_handle_t*)&wait_empty_worker);
     }
     JL_UV_UNLOCK();
 }
-
 void jl_wait_empty_end(void)
 {
-    JL_UV_LOCK();
-    uv_close((uv_handle_t*)&wait_empty_worker, NULL);
-    JL_UV_UNLOCK();
+    // n.b. caller must be holding jl_uv_mutex
+    if (wait_empty_worker.type == UV_TIMER)
+        // make sure this timer is stopped, but not destroyed in case the user calls jl_wait_empty_begin again
+        uv_timer_stop(&wait_empty_worker);
 }
 
 
@@ -130,11 +139,17 @@ void JL_UV_LOCK(void)
     }
 }
 
+/**
+ * @brief Begin an IO lock.
+ */
 JL_DLLEXPORT void jl_iolock_begin(void)
 {
     JL_UV_LOCK();
 }
 
+/**
+ * @brief End an IO lock.
+ */
 JL_DLLEXPORT void jl_iolock_end(void)
 {
     JL_UV_UNLOCK();
@@ -173,9 +188,12 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
         ct->world_age = last_age;
         return;
     }
-    if (handle == (uv_handle_t*)&signal_async || handle == (uv_handle_t*)&wait_empty_worker)
+    if (handle == (uv_handle_t*)&wait_empty_worker)
+        handle->type = UV_UNKNOWN_HANDLE;
+    else if (handle == (uv_handle_t*)&signal_async)
         return;
-    free(handle);
+    else
+        free(handle);
 }
 
 static void jl_uv_flush_close_callback(uv_write_t *req, int status)
@@ -218,9 +236,16 @@ static void uv_flush_callback(uv_write_t *req, int status)
     free(req);
 }
 
-// Turn a normal write into a blocking write (primarily for use from C and gdb).
-// Warning: This calls uv_run, so it can have unbounded side-effects.
-// Be care where you call it from! - the libuv loop is also not reentrant.
+/**
+ * @brief Flush a UV stream.
+ *
+ * Primarily used from C and gdb to convert a normal write operation on a UV stream
+ * into a blocking write. It calls uv_run, which can have unbounded side-effects.
+ * Caution is advised as the location from where this function is called is critical
+ * due to the non-reentrancy of the libuv loop.
+ *
+ * @param stream A pointer to `uv_stream_t` representing the stream to flush.
+ */
 JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream)
 {
     if (stream == (void*)STDIN_FILENO ||
@@ -252,27 +277,115 @@ JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream)
 
 // getters and setters
 // TODO: check if whoever calls these is thread-safe
+/**
+ * @brief Get the process ID of a UV process.
+ *
+ * @param p A pointer to `uv_process_t` representing the UV process.
+ * @return The process ID.
+ */
 JL_DLLEXPORT int jl_uv_process_pid(uv_process_t *p) { return p->pid; }
+
+/**
+ * @brief Get the data associated with a UV process.
+ *
+ * @param p A pointer to `uv_process_t` representing the UV process.
+ * @return A pointer to the process data.
+ */
 JL_DLLEXPORT void *jl_uv_process_data(uv_process_t *p) { return p->data; }
+
+/**
+ * @brief Get the base pointer of a UV buffer.
+ *
+ * @param buf A constant pointer to `uv_buf_t` representing the UV buffer.
+ * @return A pointer to the base of the buffer.
+ */
 JL_DLLEXPORT void *jl_uv_buf_base(const uv_buf_t *buf) { return buf->base; }
+
+/**
+ * @brief Get the length of a UV buffer.
+ *
+ * @param buf A constant pointer to `uv_buf_t` representing the UV buffer.
+ * @return The length of the buffer as `size_t`.
+ */
 JL_DLLEXPORT size_t jl_uv_buf_len(const uv_buf_t *buf) { return buf->len; }
+
+/**
+ * @brief Set the base pointer of a UV buffer.
+ *
+ * @param buf A pointer to `uv_buf_t` representing the UV buffer.
+ * @param b A pointer to `char` representing the new base of the buffer.
+ */
 JL_DLLEXPORT void jl_uv_buf_set_base(uv_buf_t *buf, char *b) { buf->base = b; }
+
+/**
+ * @brief Set the length of a UV buffer.
+ *
+ * @param buf A pointer to `uv_buf_t` representing the UV buffer.
+ * @param n The new length of the buffer as `size_t`.
+ */
 JL_DLLEXPORT void jl_uv_buf_set_len(uv_buf_t *buf, size_t n) { buf->len = n; }
+
+/**
+ * @brief Get the handle associated with a UV connect request.
+ *
+ * @param connect A pointer to `uv_connect_t` representing the connect request.
+ * @return A pointer to the associated handle.
+ */
 JL_DLLEXPORT void *jl_uv_connect_handle(uv_connect_t *connect) { return connect->handle; }
+
+/**
+ * @brief Get the file descriptor from a UV file structure.
+ *
+ * @param f A pointer to `jl_uv_file_t` representing the UV file.
+ * @return The file descriptor as `uv_os_fd_t`.
+ */
 JL_DLLEXPORT uv_os_fd_t jl_uv_file_handle(jl_uv_file_t *f) { return f->file; }
+
+/**
+ * @brief Get the data field from a UV request.
+ *
+ * @param req A pointer to `uv_req_t` representing the request.
+ * @return A pointer to the data associated with the request.
+ */
 JL_DLLEXPORT void *jl_uv_req_data(uv_req_t *req) { return req->data; }
+
+/**
+ * @brief Set the data field of a UV request.
+ *
+ * @param req A pointer to `uv_req_t` representing the request.
+ * @param data A pointer to the data to be associated with the request.
+ */
 JL_DLLEXPORT void jl_uv_req_set_data(uv_req_t *req, void *data) { req->data = data; }
+
+/**
+ * @brief Get the data field from a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` representing the handle.
+ * @return A pointer to the data associated with the handle.
+ */
 JL_DLLEXPORT void *jl_uv_handle_data(uv_handle_t *handle) { return handle->data; }
-JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
-extern _Atomic(unsigned) _threadedregion;
+/**
+ * @brief Get the handle associated with a UV write request.
+ *
+ * @param req A pointer to `uv_write_t` representing the write request.
+ * @return A pointer to the handle associated with the write request.
+ */
+JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
+/**
+ * @brief Process pending UV events.
+ *
+ * See also `uv_run` in the libuv documentation for status code enumeration.
+ *
+ * @return An integer indicating the status of the event processing.
+ */
 JL_DLLEXPORT int jl_process_events(void)
 {
     jl_task_t *ct = jl_current_task;
     uv_loop_t *loop = jl_io_loop;
     jl_gc_safepoint_(ct->ptls);
-    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == 0)) {
+    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == jl_atomic_load_relaxed(&io_loop_tid))) {
         if (jl_atomic_load_relaxed(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
             JL_PROBE_RT_START_PROCESS_EVENTS(ct);
             loop->stop_flag = 0;
@@ -293,6 +406,11 @@ static void jl_proc_exit_cleanup_cb(uv_process_t *process, int64_t exit_status,
     uv_close((uv_handle_t*)process, (uv_close_cb)&free);
 }
 
+/**
+ * @brief Close a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` that needs to be closed.
+ */
 JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
 {
     JL_UV_LOCK();
@@ -326,6 +444,11 @@ JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
     JL_UV_UNLOCK();
 }
 
+/**
+ * @brief Forcefully close a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` to be forcefully closed.
+ */
 JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle)
 {
     if (!uv_is_closing(handle)) { // avoid double-closing the stream
@@ -337,12 +460,23 @@ JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle)
     }
 }
 
+/**
+ * @brief Associate a Julia structure with a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` to be associated with a Julia structure.
+ * @param data Additional parameters representing the Julia structure to be associated.
+ */
 JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle,
                                                jl_value_t *data)
 {
     handle->data = data;
 }
 
+/**
+ * @brief Disassociate a Julia structure from a UV handle.
+ *
+ * @param handle A pointer to `uv_handle_t` from which the Julia structure will be disassociated.
+ */
 JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle)
 {
     handle->data = NULL;
@@ -350,6 +484,29 @@ JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle)
 
 #define UV_HANDLE_CLOSED 0x02
 
+/**
+ * @brief Spawn a new process.
+ *
+ * Spawns a new process to execute external programs or scripts within the context of the Julia application.
+ *
+ * @param name A C string representing the name or path of the executable to spawn.
+ * @param argv An array of C strings representing the arguments for the process. The array should be null-terminated.
+ * @param loop A pointer to `uv_loop_t` representing the event loop where the process is registered.
+ * @param proc A pointer to `uv_process_t` where the details of the spawned process are stored.
+ * @param stdio An array of `uv_stdio_container_t` representing the file descriptors for standard input, output, and error.
+ * @param nstdio An integer representing the number of elements in the stdio array.
+ * @param flags A uint32_t representing process creation flags.
+          See also `enum uv_process_flags` in the libuv documentation.
+ * @param env An array of C strings for setting environment variables. The array should be null-terminated.
+ * @param cwd A C string representing the current working directory for the process.
+ * @param cpumask A C string representing the CPU affinity mask for the process.
+          See also the `cpumask` field of the `uv_process_options_t` structure in the libuv documentation.
+ * @param cpumask_size The size of the cpumask.
+ * @param cb A function pointer to `uv_exit_cb` which is the callback function to be called upon process exit.
+ *
+ * @return An integer indicating the success or failure of the spawn operation. A return value of 0 indicates success,
+ *         while a non-zero value indicates an error.
+ */
 JL_DLLEXPORT int jl_spawn(char *name, char **argv,
                           uv_loop_t *loop, uv_process_t *proc,
                           uv_stdio_container_t *stdio, int nstdio,
@@ -478,7 +635,7 @@ JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
 {
     jl_task_t *ct = jl_get_current_task();
     // TODO: fix this cheating
-    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0)
+    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != jl_atomic_load_relaxed(&io_loop_tid))
 #ifdef _OS_WINDOWS_
         return WriteFile(handle, data, len, NULL, NULL);
 #else
@@ -506,25 +663,6 @@ JL_DLLEXPORT int jl_fs_read(uv_os_fd_t handle, char *data, size_t len)
     return ret;
 }
 
-JL_DLLEXPORT int jl_fs_read_byte(uv_os_fd_t handle)
-{
-    uv_fs_t req;
-    unsigned char c;
-    uv_buf_t buf[1];
-    buf[0].base = (char*)&c;
-    buf[0].len = 1;
-    int ret = uv_fs_read(unused_uv_loop_arg, &req, handle, buf, 1, -1, NULL);
-    uv_fs_req_cleanup(&req);
-    switch (ret) {
-    case -1: return ret;
-    case  0: jl_eof_error();
-    case  1: return (int)c;
-    default:
-        assert(0 && "jl_fs_read_byte: Invalid return value from uv_fs_read");
-        return -1;
-    }
-}
-
 JL_DLLEXPORT int jl_fs_close(uv_os_fd_t handle)
 {
     uv_fs_t req;
@@ -578,7 +716,7 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n)
 
     // TODO: Hack to make CoreIO thread-safer
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0) {
+    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != jl_atomic_load_relaxed(&io_loop_tid)) {
         if (stream == JL_STDOUT) {
             fd = UV_STDOUT_FD;
         }
@@ -973,31 +1111,39 @@ static inline int ishexchar(char c)
 
 JL_DLLEXPORT int jl_ispty(uv_pipe_t *pipe)
 {
-    if (pipe->type != UV_NAMED_PIPE) return 0;
+    char namebuf[0];
     size_t len = 0;
-    if (uv_pipe_getpeername(pipe, NULL, &len) != UV_ENOBUFS) return 0;
+    if (pipe->type != UV_NAMED_PIPE)
+        return 0;
+    if (uv_pipe_getpeername(pipe, namebuf, &len) != UV_ENOBUFS)
+        return 0;
     char *name = (char*)alloca(len + 1);
-    if (uv_pipe_getpeername(pipe, name, &len)) return 0;
+    if (uv_pipe_getpeername(pipe, name, &len))
+        return 0;
     name[len] = '\0';
     // return true if name matches regex:
     // ^\\\\?\\pipe\\(msys|cygwin)-[0-9a-z]{16}-[pt]ty[1-9][0-9]*-
     //jl_printf(JL_STDERR,"pipe_name: %s\n", name);
     int n = 0;
-    if (!strncmp(name,"\\\\?\\pipe\\msys-",14))
+    if (!strncmp(name, "\\\\?\\pipe\\msys-", 14))
         n = 14;
-    else if (!strncmp(name,"\\\\?\\pipe\\cygwin-",16))
+    else if (!strncmp(name, "\\\\?\\pipe\\cygwin-", 16))
         n = 16;
     else
         return 0;
     //jl_printf(JL_STDERR,"prefix pass\n");
     name += n;
     for (int n = 0; n < 16; n++)
-        if (!ishexchar(*name++)) return 0;
+        if (!ishexchar(*name++))
+            return 0;
     //jl_printf(JL_STDERR,"hex pass\n");
-    if ((*name++)!='-') return 0;
-    if (*name != 'p' && *name != 't') return 0;
+    if ((*name++)!='-')
+        return 0;
+    if (*name != 'p' && *name != 't')
+        return 0;
     name++;
-    if (*name++ != 't' || *name++ != 'y') return 0;
+    if (*name++ != 't' || *name++ != 'y')
+        return 0;
     //jl_printf(JL_STDERR,"tty pass\n");
     return 1;
 }
@@ -1014,6 +1160,8 @@ JL_DLLEXPORT uv_handle_type jl_uv_handle_type(uv_handle_t *handle)
 
 JL_DLLEXPORT int jl_tty_set_mode(uv_tty_t *handle, int mode)
 {
+    if (!handle)
+        return UV__EOF;
     if (handle->type != UV_TTY) return 0;
     uv_tty_mode_t mode_enum = UV_TTY_MODE_NORMAL;
     if (mode)
diff --git a/src/jlapi.c b/src/jlapi.c
index 0dffaac627288..defb2db6ac911 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -26,11 +26,28 @@ extern "C" {
 #include <fenv.h>
 #endif
 
+/**
+ * @brief Check if Julia is already initialized.
+ *
+ * Determine if Julia has been previously initialized
+ * via `jl_init` or `jl_init_with_image`.
+ *
+ * @return Returns 1 if Julia is initialized, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_is_initialized(void)
 {
     return jl_main_module != NULL;
 }
 
+/**
+ * @brief Set Julia command line arguments.
+ *
+ * Allows setting the command line arguments for Julia,
+ * similar to arguments passed in the main function of a C program.
+ *
+ * @param argc The number of command line arguments.
+ * @param argv Array of command line arguments.
+ */
 JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv)
 {
     if (jl_core_module != NULL) {
@@ -41,21 +58,30 @@ JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv)
             jl_set_const(jl_core_module, jl_symbol("ARGS"), (jl_value_t*)args);
             JL_GC_POP();
         }
-        assert(jl_array_len(args) == 0);
+        assert(jl_array_nrows(args) == 0);
         jl_array_grow_end(args, argc);
         int i;
         for (i = 0; i < argc; i++) {
             jl_value_t *s = (jl_value_t*)jl_cstr_to_string(argv[i]);
-            jl_arrayset(args, s, i);
+            jl_array_ptr_set(args, i, s);
         }
     }
 }
 
-// First argument is the usr/bin directory where the julia binary is, or NULL to guess.
-// Second argument is the path of a system image file (*.so).
-// A non-absolute path is interpreted as relative to the first argument path, or
-// relative to the default julia home dir.
-// The default is something like ../lib/julia/sys.so
+/**
+ * @brief Initialize Julia with a specified system image file.
+ *
+ * Initializes Julia by specifying the usr/bin directory where the Julia binary is
+ * and the path of a system image file (*.so). If the julia_bindir is NULL, the function
+ * attempts to guess the directory. The image_path is interpreted as a path to the system image
+ * file. A non-absolute path for the system image is considered relative to julia_bindir, or
+ * relative to the default Julia home directory. The default system image is typically
+ * something like ../lib/julia/sys.so.
+ *
+ * @param julia_bindir The usr/bin directory where the Julia binary is located, or NULL to guess.
+ * @param image_path The path of a system image file (*.so). Interpreted as relative to julia_bindir
+ *                   or the default Julia home directory if not an absolute path.
+ */
 JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
                                      const char *image_path)
 {
@@ -71,6 +97,12 @@ JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
     jl_exception_clear();
 }
 
+/**
+ * @brief Initialize the Julia runtime.
+ *
+ * Initializes the Julia runtime without any specific system image.
+ * It must be called before any other Julia API functions.
+ */
 JL_DLLEXPORT void jl_init(void)
 {
     char *libbindir = NULL;
@@ -105,6 +137,13 @@ static void _jl_exception_clear(jl_task_t *ct) JL_NOTSAFEPOINT
     ct->ptls->previous_exception = NULL;
 }
 
+/**
+ * @brief Evaluate a Julia expression from a string.
+ *
+ * @param str A C string containing the Julia expression to be evaluated.
+ * @return A pointer to `jl_value_t` representing the result of the evaluation.
+ *         Returns `NULL` if an error occurs during parsing or evaluation.
+ */
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
@@ -119,29 +158,50 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         r = NULL;
     }
     return r;
 }
 
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
+/**
+ * @brief Get the current exception in the Julia context.
+ *
+ * @return A pointer to `jl_value_t` representing the current exception.
+ *         Returns `NULL` if no exception is currently thrown.
+ */
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_current_task->excstack;
+    jl_excstack_t *s = ct->excstack;
     return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing;
 }
 
+/**
+ * @brief Check if an exception has occurred in the Julia context.
+ *
+ * @return A pointer to `jl_value_t` representing the exception that occurred.
+ *         Returns `NULL` if no exception has occurred.
+ */
 JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 {
     return jl_current_task->ptls->previous_exception;
 }
 
+/**
+ * @brief Clear the current exception in the Julia context.
+ *
+ */
 JL_DLLEXPORT void jl_exception_clear(void)
 {
     _jl_exception_clear(jl_current_task);
 }
 
-// get the name of a type as a string
+/**
+ * @brief Get the type name of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A C string containing the name of the type.
+ */
 JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v)
 {
     if (!jl_is_datatype(v))
@@ -149,32 +209,78 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v)
     return jl_symbol_name(((jl_datatype_t*)v)->name->name);
 }
 
-// get the name of typeof(v) as a string
+/**
+ * @brief Get the string representation of a Julia value's type.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A C string describing the type of the value.
+ */
 JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v)
 {
     return jl_typename_str((jl_value_t*)jl_typeof(v));
 }
 
+/**
+ * @brief Get the element type of a Julia array.
+ *
+ * @param a A pointer to `jl_value_t` representing the Julia array.
+ * @return A pointer to the type of the array elements.
+ */
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a)
 {
     return jl_tparam0(jl_typeof(a));
 }
 
+/**
+ * @brief Get the number of dimensions of a Julia array.
+ *
+ * Returns the rank (number of dimensions) of a Julia array.
+ *
+ * @param a A pointer to `jl_value_t` representing the Julia array.
+ * @return An integer representing the number of dimensions of the array.
+ */
 JL_DLLEXPORT int jl_array_rank(jl_value_t *a)
 {
     return jl_array_ndims(a);
 }
 
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d)
+/**
+ * @brief Get the size of a specific dimension of a Julia array.
+ *
+ * Returns the size (number of elements) of a specific dimension
+ * of a Julia array.
+ *
+ * @param a A pointer to `jl_array_t` representing the Julia array.
+ * @param d The dimension for which the size is requested.
+ * @return The size of the specified dimension of the array.
+ */
+JL_DLLEXPORT size_t jl_array_size(jl_array_t *a, int d)
 {
+    // n.b this functions only use was to violate the vector abstraction, so we have to continue to emulate that
+    if (d >= jl_array_ndims(a))
+        return a->ref.mem->length;
     return jl_array_dim(a, d);
 }
 
+/**
+ * @brief Get the C string pointer from a Julia string.
+ *
+ * @param s A pointer to `jl_value_t` representing the Julia string.
+ * @return A C string pointer containing the contents of the Julia string.
+ */
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s)
 {
     return jl_string_data(s);
 }
 
+/**
+ * @brief Call a Julia function with a specified number of arguments.
+ *
+ * @param f A pointer to `jl_function_t` representing the Julia function to call.
+ * @param args An array of pointers to `jl_value_t` representing the arguments.
+ * @param nargs The number of arguments in the array.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
 JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t nargs)
 {
     jl_value_t *v;
@@ -194,12 +300,20 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t n
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Call a Julia function with no arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_function_t` representing the Julia function to call.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
 JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
 {
     jl_value_t *v;
@@ -214,12 +328,21 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Call a Julia function with one argument.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_function_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the argument to the function.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
 JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
 {
     jl_value_t *v;
@@ -237,12 +360,22 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Call a Julia function with two arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_function_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the first argument.
+ * @param b A pointer to `jl_value_t` representing the second argument.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
 JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b)
 {
     jl_value_t *v;
@@ -261,12 +394,23 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Call a Julia function with three arguments.
+ *
+ * A specialized case of `jl_call` for simpler scenarios.
+ *
+ * @param f A pointer to `jl_function_t` representing the Julia function to call.
+ * @param a A pointer to `jl_value_t` representing the first argument.
+ * @param b A pointer to `jl_value_t` representing the second argument.
+ * @param c A pointer to `jl_value_t` representing the third argument.
+ * @return A pointer to `jl_value_t` representing the result of the function call.
+ */
 JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
                                   jl_value_t *b, jl_value_t *c)
 {
@@ -287,24 +431,23 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         _jl_exception_clear(ct);
     }
     JL_CATCH {
-        ct->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
-JL_DLLEXPORT void jl_yield(void)
-{
-    static jl_function_t *yieldfunc = NULL;
-    if (yieldfunc == NULL)
-        yieldfunc = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("yield"));
-    if (yieldfunc != NULL)
-        jl_call0(yieldfunc);
-}
-
+/**
+ * @brief Get a field from a Julia object.
+ *
+ * @param o A pointer to `jl_value_t` representing the Julia object.
+ * @param fld A C string representing the name of the field to retrieve.
+ * @return A pointer to `jl_value_t` representing the value of the field.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t *s = (jl_value_t*)jl_symbol(fld);
         int i = jl_field_index((jl_datatype_t*)jl_typeof(o), (jl_sym_t*)s, 1);
@@ -312,17 +455,29 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception(ct);
         v = NULL;
     }
     return v;
 }
 
+/**
+ * @brief Begin an atomic signal-protected region.
+ *
+ * Marks the start of a region of code that should be protected
+ * from interruption by asynchronous signals.
+ */
 JL_DLLEXPORT void jl_sigatomic_begin(void)
 {
     JL_SIGATOMIC_BEGIN();
 }
 
+/**
+ * @brief End an atomic signal-protected region.
+ *
+ * Marks the end of a region of code protected from asynchronous signals.
+ * It should be used in conjunction with `jl_sigatomic_begin` to define signal-protected regions.
+ */
 JL_DLLEXPORT void jl_sigatomic_end(void)
 {
     jl_task_t *ct = jl_current_task;
@@ -331,6 +486,11 @@ JL_DLLEXPORT void jl_sigatomic_end(void)
     JL_SIGATOMIC_END();
 }
 
+/**
+ * @brief Check if Julia is running in debug build mode.
+ *
+ * @return Returns 1 if Julia is in debug build mode, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT
 {
 #ifdef JL_DEBUG_BUILD
@@ -340,6 +500,24 @@ JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT
 #endif
 }
 
+/**
+ * @brief Check if Julia has been build with assertions enabled.
+ *
+ * @return Returns 1 if assertions are enabled, 0 otherwise.
+ */
+JL_DLLEXPORT int8_t jl_is_assertsbuild(void) JL_NOTSAFEPOINT {
+#ifndef JL_NDEBUG
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+/**
+ * @brief Check if Julia's memory debugging is enabled.
+ *
+ * @return Returns 1 if memory debugging is enabled, 0 otherwise.
+ */
 JL_DLLEXPORT int8_t jl_is_memdebug(void) JL_NOTSAFEPOINT {
 #ifdef MEMDEBUG
     return 1;
@@ -348,92 +526,143 @@ JL_DLLEXPORT int8_t jl_is_memdebug(void) JL_NOTSAFEPOINT {
 #endif
 }
 
+/**
+ * @brief Get the directory path of the Julia binary.
+ *
+ * @return A pointer to `jl_value_t` representing the directory path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_julia_bindir(void)
 {
     return jl_cstr_to_string(jl_options.julia_bindir);
 }
 
+/**
+ * @brief Get the path to the Julia binary.
+ *
+ * @return A pointer to `jl_value_t` representing the full path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_julia_bin(void)
 {
     return jl_cstr_to_string(jl_options.julia_bin);
 }
 
+/**
+ * @brief Get the path to the Julia system image file.
+ *
+ * @return A pointer to `jl_value_t` representing the system image file path as a Julia string.
+ */
 JL_DLLEXPORT jl_value_t *jl_get_image_file(void)
 {
     return jl_cstr_to_string(jl_options.image_file);
 }
 
+/**
+ * @brief Get the major version number of Julia.
+ *
+ * @return The major version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_major(void)
 {
     return JULIA_VERSION_MAJOR;
 }
 
+/**
+ * @brief Get the minor version number of Julia.
+ *
+ * @return The minor version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_minor(void)
 {
     return JULIA_VERSION_MINOR;
 }
 
+/**
+ * @brief Get the patch version number of Julia.
+ *
+ * @return The patch version number as an integer.
+ */
 JL_DLLEXPORT int jl_ver_patch(void)
 {
     return JULIA_VERSION_PATCH;
 }
 
+/**
+ * @brief Check if the current Julia version is a release version.
+ *
+ * @return Returns 1 if it is a release version, 0 otherwise.
+ */
 JL_DLLEXPORT int jl_ver_is_release(void)
 {
     return JULIA_VERSION_IS_RELEASE;
 }
 
+/**
+ * @brief Get the Julia version as a string.
+ *
+ * @return A C string containing the version information.
+ */
 JL_DLLEXPORT const char *jl_ver_string(void)
 {
    return JULIA_VERSION_STRING;
 }
 
-// return char* from String field in Base.GIT_VERSION_INFO
-static const char *git_info_string(const char *fld)
-{
-    static jl_value_t *GIT_VERSION_INFO = NULL;
-    if (!GIT_VERSION_INFO)
-        GIT_VERSION_INFO = jl_get_global(jl_base_module, jl_symbol("GIT_VERSION_INFO"));
-    jl_value_t *f = jl_get_field(GIT_VERSION_INFO, fld);
-    assert(jl_is_string(f));
-    return jl_string_data(f);
-}
-
-JL_DLLEXPORT const char *jl_git_branch(void)
-{
-    static const char *branch = NULL;
-    if (!branch) branch = git_info_string("branch");
-    return branch;
-}
-
-JL_DLLEXPORT const char *jl_git_commit(void)
-{
-    static const char *commit = NULL;
-    if (!commit) commit = git_info_string("commit");
-    return commit;
-}
-
-// Create function versions of some useful macros for GDB or FFI use
+/**
+ * @brief Convert a Julia value to a tagged value.
+ *
+ * Converts a Julia value into its corresponding tagged value representation.
+ * Tagged values include additional metadata used internally by the Julia runtime.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_taggedvalue_t` representing the tagged value.
+ */
 JL_DLLEXPORT jl_taggedvalue_t *(jl_astaggedvalue)(jl_value_t *v)
 {
     return jl_astaggedvalue(v);
 }
 
+/**
+ * @brief Convert a tagged value back to a Julia value.
+ *
+ * Converts a tagged value back into its original Julia value.
+ * It's the inverse operation of `jl_astaggedvalue`.
+ *
+ * @param v A pointer to `jl_taggedvalue_t` representing the tagged value.
+ * @return A pointer to `jl_value_t` representing the original Julia value.
+ */
 JL_DLLEXPORT jl_value_t *(jl_valueof)(jl_taggedvalue_t *v)
 {
     return jl_valueof(v);
 }
 
+/**
+ * @brief Get the type of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_value_t` representing the type of the value.
+ */
 JL_DLLEXPORT jl_value_t *(jl_typeof)(jl_value_t *v)
 {
     return jl_typeof(v);
 }
 
+/**
+ * @brief Get the field types of a Julia value.
+ *
+ * @param v A pointer to `jl_value_t` representing the Julia value.
+ * @return A pointer to `jl_value_t` representing the field types.
+ */
 JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v)
 {
     return (jl_value_t*)jl_get_fieldtypes((jl_datatype_t*)v);
 }
 
+/**
+ * @brief Check equality of two Julia values.
+ *
+ * @param a A pointer to `jl_value_t` representing the first Julia value.
+ * @param b A pointer to `jl_value_t` representing the second Julia value.
+ * @return Returns 1 if the values are equal, 0 otherwise.
+ */
 JL_DLLEXPORT int ijl_egal(jl_value_t *a, jl_value_t *b)
 {
     return jl_egal(a, b);
@@ -441,24 +670,56 @@ JL_DLLEXPORT int ijl_egal(jl_value_t *a, jl_value_t *b)
 
 
 #ifndef __clang_gcanalyzer__
+/**
+ * @brief Enter a state where concurrent garbage collection (GC) is considered unsafe.
+ *
+ * Marks the beginning of a code region where garbage collection operations are unsafe.
+ * Used to make it legal to access GC-managed state (almost anything)
+ *
+ * @return An `int8_t` state value representing the previous GC state.
+ */
 JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void)
 {
     jl_task_t *ct = jl_current_task;
     return jl_gc_unsafe_enter(ct->ptls);
 }
 
+/**
+ * @brief Leave the state where garbage collection is considered unsafe.
+ *
+ * Ends a code region where garbage collection was marked as unsafe.
+ * It restores the previous GC state using the state value returned by `jl_gc_unsafe_enter`.
+ *
+ * @param state The state value returned by `jl_gc_unsafe_enter` to restore the previous GC state.
+ */
 JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state)
 {
     jl_task_t *ct = jl_current_task;
     jl_gc_unsafe_leave(ct->ptls, state);
 }
 
+/**
+ * @brief Enter a state where garbage collection (GC) is considered safe.
+ *
+ * Marks the beginning of a code region where garbage collection operations are safe.
+ * Used to enable GC in sections of code where it was previously marked as unsafe.
+ *
+ * @return An `int8_t` state value representing the previous GC state.
+ */
 JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void)
 {
     jl_task_t *ct = jl_current_task;
     return jl_gc_safe_enter(ct->ptls);
 }
 
+/**
+ * @brief Leave the state where garbage collection is considered safe.
+ *
+ * Ends a code region where garbage collection was marked as safe.
+ * It restores the previous GC state using the state value returned by `jl_gc_safe_enter`.
+ *
+ * @param state The state value returned by `jl_gc_safe_enter` to restore the previous GC state.
+ */
 JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 {
     jl_task_t *ct = jl_current_task;
@@ -466,49 +727,118 @@ JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 }
 #endif
 
+/**
+ * @brief Trigger a garbage collection safepoint in a GC-unsafe region.
+ *
+ * Triggers a safepoint for garbage collection. Used to
+ * ensure that the garbage collector can run at specific points in the code,
+ * particularly in long-running operations or loops.
+ */
 JL_DLLEXPORT void jl_gc_safepoint(void)
 {
     jl_task_t *ct = jl_current_task;
     jl_gc_safepoint_(ct->ptls);
 }
 
+/**
+ * @brief Pause CPU execution for a brief moment.
+ *
+ * Used to pause the CPU briefly, typically to reduce power consumption
+ * or manage CPU resources more effectively in a tight loop or busy wait scenario.
+ */
 JL_DLLEXPORT void (jl_cpu_pause)(void)
 {
     jl_cpu_pause();
 }
 
+/**
+ * @brief Suspend CPU execution.
+ *
+ * Suspends CPU execution until a specific condition or event occurs.
+ */
 JL_DLLEXPORT void (jl_cpu_suspend)(void)
 {
     jl_cpu_suspend();
 }
 
+/**
+ * @brief Wake the CPU from a suspended state.
+ *
+ * Used to resume CPU execution after it has been suspended using `jl_cpu_suspend`.
+ */
 JL_DLLEXPORT void (jl_cpu_wake)(void)
 {
     jl_cpu_wake();
 }
 
+/**
+ * @brief Enable cumulative compile timing.
+ */
 JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void)
 {
     // Increment the flag to allow reentrant callers to `@time`.
     jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
 }
 
+/**
+ * @brief Disable cumulative compile timing.
+ */
 JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void)
 {
     // Decrement the flag when done measuring, allowing other callers to continue measuring.
     jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
 }
 
+/**
+ * @brief Get the cumulative compilation time in nanoseconds.
+ *
+ * @return The cumulative compilation time in nanoseconds.
+ */
 JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void)
 {
     return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 
+/**
+ * @brief Get the cumulative recompilation time in nanoseconds.
+ *
+ * @return The cumulative recompilation time in nanoseconds.
+ */
 JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
 {
     return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
 }
 
+/**
+ * @brief Enable per-task timing.
+ */
+JL_DLLEXPORT void jl_task_metrics_enable(void)
+{
+    // Increment the flag to allow reentrant callers.
+    jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
+}
+
+/**
+ * @brief Disable per-task timing.
+ */
+JL_DLLEXPORT void jl_task_metrics_disable(void)
+{
+    // Prevent decrementing the counter below zero
+    uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled);
+    while (enabled > 0) {
+        if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1))
+            break;
+    }
+}
+
+/**
+ * @brief Retrieve floating-point environment constants.
+ *
+ * Populates an array with constants related to the floating-point environment,
+ * such as rounding modes and exception flags.
+ *
+ * @param ret An array of integers to be populated with floating-point environment constants.
+ */
 JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
 {
     ret[0] = FE_INEXACT;
@@ -530,6 +860,14 @@ JL_DLLEXPORT int jl_get_fenv_rounding(void)
 {
     return fegetround();
 }
+
+/**
+ * @brief Set the floating-point rounding mode.
+ *
+ * @param i An integer representing the desired floating-point rounding mode.
+          See also "floating-point rounding" macros in `<fenv.h>`.
+ * @return An integer indicating the success or failure of setting the rounding mode.
+ */
 JL_DLLEXPORT int jl_set_fenv_rounding(int i)
 {
     return fesetround(i);
@@ -537,6 +875,7 @@ JL_DLLEXPORT int jl_set_fenv_rounding(int i)
 
 static int exec_program(char *program)
 {
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_load(jl_main_module, program);
     }
@@ -545,7 +884,7 @@ static int exec_program(char *program)
         //       printing directly to STDERR_FILENO.
         int shown_err = 0;
         jl_printf(JL_STDERR, "error during bootstrap:\n");
-        jl_value_t *exc = jl_current_exception();
+        jl_value_t *exc = jl_current_exception(ct);
         jl_value_t *showf = jl_base_module ? jl_get_function(jl_base_module, "show") : NULL;
         if (showf) {
             jl_value_t *errs = jl_stderr_obj();
@@ -574,18 +913,22 @@ static NOINLINE int true_main(int argc, char *argv[])
     jl_function_t *start_client = jl_base_module ?
         (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
 
+    jl_task_t *ct = jl_current_task;
     if (start_client) {
-        jl_task_t *ct = jl_current_task;
+        int ret = 1;
         JL_TRY {
             size_t last_age = ct->world_age;
             ct->world_age = jl_get_world_counter();
-            jl_apply(&start_client, 1);
+            jl_value_t *r = jl_apply(&start_client, 1);
+            if (jl_typeof(r) != (jl_value_t*)jl_int32_type)
+                jl_type_error("typeassert", (jl_value_t*)jl_int32_type, r);
+            ret = jl_unbox_int32(r);
             ct->world_age = last_age;
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
-        return 0;
+        return ret;
     }
 
     // run program if specified, otherwise enter REPL
@@ -627,7 +970,7 @@ static NOINLINE int true_main(int argc, char *argv[])
                 line = NULL;
             }
             jl_printf((JL_STREAM*)STDERR_FILENO, "\nparser error:\n");
-            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception(ct));
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
             jl_print_backtrace(); // written to STDERR_FILENO
         }
@@ -687,6 +1030,13 @@ static void rr_detach_teleport(void) {
 }
 #endif
 
+/**
+ * @brief Entry point for the Julia REPL (Read-Eval-Print Loop).
+ *
+ * @param argc The number of command-line arguments.
+ * @param argv Array of command-line arguments.
+ * @return An integer indicating the exit status of the REPL session.
+ */
 JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
 #ifdef USE_TRACY
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index d376bc27085ab..9c69da199c0cd 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -31,8 +31,6 @@
 
 ;; this is overwritten when we run in actual julia
 (define (defined-julia-global v) #f)
-(define (julia-current-file) 'none)
-(define (julia-current-line) 0)
 
 ;; parser entry points
 
@@ -140,7 +138,7 @@
 
 (define (toplevel-only-expr? e)
   (and (pair? e)
-       (or (memq (car e) '(toplevel line module import using export
+       (or (memq (car e) '(toplevel line module export public
                                     error incomplete))
            (and (memq (car e) '(global const)) (every symbol? (cdr e))))))
 
@@ -149,7 +147,7 @@
 (define (expand-toplevel-expr e file line)
   (cond ((or (atom? e) (toplevel-only-expr? e))
          (if (underscore-symbol? e)
-             (error "all-underscore identifier used as rvalue"))
+             (error "all-underscore identifiers are write-only and their values cannot be used in expressions"))
          e)
         (else
          (let ((last *in-expand*))
@@ -181,7 +179,10 @@
      ;; Abuse scm_to_julia here to convert arguments to warn. This is meant for
      ;; `Expr`s but should be good enough provided we're only passing simple
      ;; numbers, symbols and strings.
-     ((lowering-warning (lambda lst (set! warnings (cons (cons 'warn lst) warnings)))))
+     ((lowering-warning (lambda (level group warn_file warn_line . lst)
+        (let ((line (if (= warn_line 0) line warn_line))
+              (file (if (eq? warn_file 'none) file warn_file)))
+          (set! warnings (cons (list* 'warn level group (symbol (string file line)) file line lst) warnings))))))
      (let ((thunk (if stmt
                       (expand-to-thunk-stmt- expr file line)
                       (expand-to-thunk- expr file line))))
@@ -197,28 +198,6 @@
   (error-wrap (lambda ()
                 (julia-expand-macroscope expr))))
 
-;; construct default definitions of `eval` for non-bare modules
-;; called by jl_eval_module_expr
-(define (module-default-defs name file line)
-  (jl-expand-to-thunk
-   (let* ((loc  (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file))))
-          (x    (if (eq? name 'x) 'y 'x))
-          (mex  (if (eq? name 'mapexpr) 'map_expr 'mapexpr)))
-     `(block
-       (= (call eval ,x)
-          (block
-           ,@loc
-           (call (core eval) ,name ,x)))
-       (= (call include ,x)
-          (block
-           ,@loc
-           (call (core _call_latest) (top include) ,name ,x)))
-       (= (call include (:: ,mex (top Function)) ,x)
-          (block
-           ,@loc
-           (call (core _call_latest) (top include) ,mex ,name ,x)))))
-   file line))
-
 ; run whole frontend on a string. useful for testing.
 (define (fe str)
   (expand-toplevel-expr (julia-parse str) 'none 0))
diff --git a/src/jloptions.c b/src/jloptions.c
index 129ba9df2510e..c68b5ce193d98 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -8,6 +8,7 @@
 
 #include <unistd.h>
 #include <getopt.h>
+
 #include "julia_assert.h"
 
 #ifdef _OS_WINDOWS_
@@ -18,12 +19,69 @@ char *shlib_ext = ".dylib";
 char *shlib_ext = ".so";
 #endif
 
+/* This simple hand-crafted tolower exists to avoid locale-dependent effects in
+ * behaviors (and utf8proc_tolower wasn't linking properly on all platforms) */
+static char ascii_tolower(char c)
+{
+    if ('A' <= c && c <= 'Z')
+        return c - 'A' + 'a';
+    return c;
+}
+
 static const char system_image_path[256] = "\0" JL_SYSTEM_IMAGE_PATH;
 JL_DLLEXPORT const char *jl_get_default_sysimg_path(void)
 {
     return &system_image_path[1];
 }
 
+/* This function is also used by gc-stock.c to parse the
+ * JULIA_HEAP_SIZE_HINT environment variable. */
+uint64_t parse_heap_size_hint(const char *optarg, const char *option_name)
+{
+    long double value = 0.0;
+    char unit[4] = {0};
+    int nparsed = sscanf(optarg, "%Lf%3s", &value, unit);
+    if (nparsed == 0 || strlen(unit) > 2 || (strlen(unit) == 2 && ascii_tolower(unit[1]) != 'b')) {
+        jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+    }
+    uint64_t multiplier = 1ull;
+    switch (ascii_tolower(unit[0])) {
+        case '\0':
+        case 'b':
+            break;
+        case 'k':
+            multiplier <<= 10;
+            break;
+        case 'm':
+            multiplier <<= 20;
+            break;
+        case 'g':
+            multiplier <<= 30;
+            break;
+        case 't':
+            multiplier <<= 40;
+            break;
+        case '%':
+            if (value > 100)
+                jl_errorf("julia: invalid percentage specified in %s", option_name);
+            uint64_t mem = uv_get_total_memory();
+            uint64_t cmem = uv_get_constrained_memory();
+            if (cmem > 0 && cmem < mem)
+                mem = cmem;
+            multiplier = mem/100;
+            break;
+        default:
+            jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+            break;
+    }
+    long double sz = value * multiplier;
+    if (isnan(sz) || sz < 0) {
+        jl_errorf("julia: invalid argument to %s (%s)", option_name, optarg);
+    }
+    const long double limit = ldexpl(1.0, 64); // UINT64_MAX + 1
+    return sz < limit ? (uint64_t)sz : UINT64_MAX;
+}
+
 static int jl_options_initialized = 0;
 
 JL_DLLEXPORT void jl_init_options(void)
@@ -67,10 +125,12 @@ JL_DLLEXPORT void jl_init_options(void)
                         1,    // can_inline
                         JL_OPTIONS_POLLY_ON, // polly
                         NULL, // trace_compile
+                        NULL, // trace_dispatch
                         JL_OPTIONS_FAST_MATH_DEFAULT,
                         0,    // worker
                         NULL, // cookie
                         JL_OPTIONS_HANDLE_SIGNALS_ON,
+                        JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_NO,
                         JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
                         JL_OPTIONS_USE_COMPILED_MODULES_YES,
                         JL_OPTIONS_USE_PKGIMAGES_YES,
@@ -90,129 +150,183 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // strip-ir
                         0, // permalloc_pkgimg
                         0, // heap-size-hint
+                        0, // trace_compile_timing
+                        JL_TRIM_NO, // trim
+                        0, // task_metrics
     };
     jl_options_initialized = 1;
 }
 
 static const char usage[] = "\n    julia [switches] -- [programfile] [args...]\n\n";
 static const char opts[]  =
-    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):\n\n"
-    " -v, --version              Display version information\n"
-    " -h, --help                 Print this message (--help-hidden for more)\n"
-    " --help-hidden              Uncommon options not shown by `-h`\n\n"
+    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package\n"
+    "precompilation):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
+    " -v, --version                                 Display version information\n"
+    " -h, --help                                    Print command-line options (this message)\n"
+    " --help-hidden                                 Print uncommon options not shown by `-h`\n\n"
 
     // startup options
-    " --project[={<dir>|@.}]     Set <dir> as the home project/environment\n"
-    " -J, --sysimage <file>      Start up with the given system image file\n"
-    " -H, --home <dir>           Set location of `julia` executable\n"
-    " --startup-file={yes*|no}   Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`\n"
-    "                            environment variable is unset, load `~/.julia/config/startup.jl`\n"
-    " --handle-signals={yes*|no} Enable or disable Julia's default signal handlers\n"
-    " --sysimage-native-code={yes*|no}\n"
-    "                            Use native code from system image if available\n"
-    " --compiled-modules={yes*|no}\n"
-    "                            Enable or disable incremental precompilation of modules\n"
-    " --pkgimages={yes*|no}\n"
-    "                            Enable or disable usage of native code caching in the form of pkgimages ($)\n\n"
+    " --project[={<dir>|@temp|@.}]                  Set <dir> as the active project/environment.\n"
+    "                                               Or, create a temporary environment with `@temp`\n"
+    "                                               The default @. option will search through parent\n"
+    "                                               directories until a Project.toml or JuliaProject.toml\n"
+    "                                               file is found.\n"
+    " -J, --sysimage <file>                         Start up with the given system image file\n"
+    " -H, --home <dir>                              Set location of `julia` executable\n"
+    " --startup-file={yes*|no}                      Load `JULIA_DEPOT_PATH/config/startup.jl`; \n"
+    "                                               if `JULIA_DEPOT_PATH` environment variable is unset,\n"
+    "                                               load `~/.julia/config/startup.jl`\n"
+    " --handle-signals={yes*|no}                    Enable or disable Julia's default signal handlers\n"
+    " --sysimage-native-code={yes*|no}              Use native code from system image if available\n"
+    " --compiled-modules={yes*|no|existing|strict}  Enable or disable incremental precompilation of\n"
+    "                                               modules. The `existing` option allows use of existing\n"
+    "                                               compiled modules that were previously precompiled,\n"
+    "                                               but disallows creation of new precompile files.\n"
+    "                                               The `strict` option is similar, but will error if no\n"
+    "                                               precompile file is found.\n"
+    " --pkgimages={yes*|no|existing}                Enable or disable usage of native code caching in the\n"
+    "                                               form of pkgimages. The `existing` option allows use\n"
+    "                                               of existing pkgimages but disallows creation of new\n"
+    "                                               ones ($)\n\n"
 
     // actions
-    " -e, --eval <expr>          Evaluate <expr>\n"
-    " -E, --print <expr>         Evaluate <expr> and display the result\n"
-    " -L, --load <file>          Load <file> immediately on all processors\n\n"
+    " -e, --eval <expr>                             Evaluate <expr>\n"
+    " -E, --print <expr>                            Evaluate <expr> and display the result\n"
+    " -m, --module <Package> [args]                 Run entry point of `Package` (`@main` function) with\n"
+    "                                               `args'.\n"
+    " -L, --load <file>                             Load <file> immediately on all processors\n\n"
 
     // parallel options
-    " -t, --threads {auto|N[,auto|M]}\n"
-    "                           Enable N[+M] threads; N threads are assigned to the `default`\n"
-    "                           threadpool, and if M is specified, M threads are assigned to the\n"
-    "                           `interactive` threadpool; \"auto\" tries to infer a useful\n"
-    "                           default number of threads to use but the exact behavior might change\n"
-    "                           in the future. Currently sets N to the number of CPUs assigned to\n"
-    "                           this Julia process based on the OS-specific affinity assignment\n"
-    "                           interface if supported (Linux and Windows) or to the number of CPU\n"
-    "                           threads if not supported (MacOS) or if process affinity is not\n"
-    "                           configured, and sets M to 1.\n"
-    " --gcthreads=M[,N]         Use M threads for the mark phase of GC and N (0 or 1) threads for the concurrent sweeping phase of GC.\n"
-    "                           M is set to half of the number of compute threads and N is set to 0 if unspecified.\n"
-    " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
-    "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
-    " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
+    " -t, --threads {auto|N[,auto|M]}               Enable N[+M] threads; N threads are assigned to the\n"
+    "                                               `default` threadpool, and if M is specified, M\n"
+    "                                               threads are assigned to the `interactive`\n"
+    "                                               threadpool; `auto` tries to infer a useful\n"
+    "                                               default number of threads to use but the exact\n"
+    "                                               behavior might change in the future. Currently sets\n"
+    "                                               N to the number of CPUs assigned to this Julia\n"
+    "                                               process based on the OS-specific affinity assignment\n"
+    "                                               interface if supported (Linux and Windows) or to the\n"
+    "                                               number of CPU threads if not supported (MacOS) or if\n"
+    "                                               process affinity is not configured, and sets M to 1.\n"
+    " --gcthreads=N[,M]                             Use N threads for the mark phase of GC and M (0 or 1)\n"
+    "                                               threads for the concurrent sweeping phase of GC.\n"
+    "                                               N is set to the number of compute threads and\n"
+    "                                               M is set to 0 if unspecified.\n"
+    " -p, --procs {N|auto}                          Integer value N launches N additional local worker\n"
+    "                                               processes `auto` launches as many workers as the\n"
+    "                                               number of local CPU threads (logical cores).\n"
+    " --machine-file <file>                         Run processes on hosts listed in <file>\n\n"
 
     // interactive options
-    " -i, --interactive          Interactive mode; REPL runs and `isinteractive()` is true\n"
-    " -q, --quiet                Quiet startup: no banner, suppress REPL warnings\n"
-    " --banner={yes|no|auto*}    Enable or disable startup banner\n"
-    " --color={yes|no|auto*}     Enable or disable color text\n"
-    " --history-file={yes*|no}   Load or save history\n\n"
+    " -i, --interactive                             Interactive mode; REPL runs and\n"
+    "                                               `isinteractive()` is true.\n"
+    " -q, --quiet                                   Quiet startup: no banner, suppress REPL warnings\n"
+    " --banner={yes|no|short|auto*}                 Enable or disable startup banner\n"
+    " --color={yes|no|auto*}                        Enable or disable color text\n"
+    " --history-file={yes*|no}                      Load or save history\n\n"
 
     // error and warning options
-    " --depwarn={yes|no*|error}  Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)\n"
-    " --warn-overwrite={yes|no*} Enable or disable method overwrite warnings\n"
-    " --warn-scope={yes*|no}     Enable or disable warning for ambiguous top-level scope\n\n"
+    " --depwarn={yes|no*|error}                     Enable or disable syntax and method deprecation\n"
+    "                                               warnings (`error` turns warnings into errors)\n"
+    " --warn-overwrite={yes|no*}                    Enable or disable method overwrite warnings\n"
+    " --warn-scope={yes*|no}                        Enable or disable warning for ambiguous top-level\n"
+    "                                               scope\n\n"
 
     // code generation options
-    " -C, --cpu-target <target>  Limit usage of CPU features up to <target>; set to `help` to see the available options\n"
-    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level) ($)\n"
-    " --min-optlevel={0*,1,2,3}  Set a lower bound on the optimization level\n"
+    " -C, --cpu-target <target>                     Limit usage of CPU features up to <target>; set to\n"
+    "                                               `help` to see the available options\n"
+    " -O, --optimize={0|1|2*|3}                     Set the optimization level (level 3 if `-O` is used\n"
+    "                                               without a level) ($)\n"
+    " --min-optlevel={0*|1|2|3}                     Set a lower bound on the optimization level\n"
 #ifdef JL_DEBUG_BUILD
-        " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build ($)\n"
+    " -g, --debug-info=[{0|1|2*}]                   Set the level of debug info generation in the\n"
+    "                                               julia-debug build ($)\n"
 #else
-        " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level) ($)\n"
+    " -g, --debug-info=[{0|1*|2}]                   Set the level of debug info generation (level 2 if\n"
+    "                                               `-g` is used without a level) ($)\n"
 #endif
-    " --inline={yes*|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
-    " --check-bounds={yes|no|auto*}\n"
-    "                            Emit bounds checks always, never, or respect @inbounds declarations ($)\n"
+    " --inline={yes*|no}                            Control whether inlining is permitted, including\n"
+    "                                               overriding @inline declarations\n"
+    " --check-bounds={yes|no|auto*}                 Emit bounds checks always, never, or respect\n"
+    "                                               @inbounds declarations ($)\n"
+    " --math-mode={ieee|user*}                      Always follow `ieee` floating point semantics or\n"
+    "                                               respect `@fastmath` declarations\n\n"
 #ifdef USE_POLLY
-    " --polly={yes*|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
+    " --polly={yes*|no}                             Enable or disable the polyhedral optimizer Polly\n"
+    "                                               (overrides @polly declaration)\n"
 #endif
 
     // instrumentation options
-    " --code-coverage[={none*|user|all}]\n"
-    "                            Count executions of source lines (omitting setting is equivalent to `user`)\n"
-    " --code-coverage=@<path>\n"
-    "                            Count executions but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
+    " --code-coverage[={none*|user|all}]            Count executions of source lines (omitting setting is\n"
+    "                                               equivalent to `user`)\n"
+    " --code-coverage=@<path>                       Count executions but only in files that fall under\n"
+    "                                               the given file path/directory. The `@` prefix is\n"
+    "                                               required to select this option. A `@` with no path\n"
+    "                                               will track the current directory.\n"
 
-    " --code-coverage=tracefile.info\n"
-    "                            Append coverage information to the LCOV tracefile (filename supports format tokens)\n"
+    " --code-coverage=tracefile.info                Append coverage information to the LCOV tracefile\n"
+    "                                               (filename supports format tokens)\n"
 // TODO: These TOKENS are defined in `runtime_ccall.cpp`. A more verbose `--help` should include that list here.
-    " --track-allocation[={none*|user|all}]\n"
-    "                            Count bytes allocated by each source line (omitting setting is equivalent to `user`)\n"
-    " --track-allocation=@<path>\n"
-    "                            Count bytes but only in files that fall under the given file path/directory.\n"
-    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
-    "                            current directory.\n"
-    " --bug-report=KIND          Launch a bug report session. It can be used to start a REPL, run a script, or evaluate\n"
-    "                            expressions. It first tries to use BugReporting.jl installed in current environment and\n"
-    "                            fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n"
-    "                            --bug-report=help.\n\n"
-
-    " --heap-size-hint=<size>    Forces garbage collection if memory usage is higher than that value.\n"
-    "                            The memory hint might be specified in megabytes(500M) or gigabytes(1G)\n\n"
+    " --track-allocation[={none*|user|all}]         Count bytes allocated by each source line (omitting\n"
+    "                                               setting is equivalent to `user`)\n"
+    " --track-allocation=@<path>                    Count bytes but only in files that fall under the\n"
+    "                                               given file path/directory. The `@` prefix is required\n"
+    "                                               to select this option. A `@` with no path will track\n"
+    "                                               the current directory.\n"
+    " --bug-report=KIND                             Launch a bug report session. It can be used to start\n"
+    "                                               a REPL, run a script, or evaluate expressions. It\n"
+    "                                               first tries to use BugReporting.jl installed in\n"
+    "                                               current environment and fallbacks to the latest\n"
+    "                                               compatible BugReporting.jl if not. For more\n"
+    "                                               information, see --bug-report=help.\n\n"
+    " --heap-size-hint=<size>[<unit>]               Forces garbage collection if memory usage is higher\n"
+    "                                               than the given value. The value may be specified as a\n"
+    "                                               number of bytes, optionally in units of: B, K (kibibytes),\n"
+    "                                               M (mebibytes), G (gibibytes), T (tebibytes), or % (percentage\n"
+    "                                               of physical memory).\n\n"
 ;
 
 static const char opts_hidden[]  =
     "Switches (a '*' marks the default value, if applicable):\n\n"
+    " Option                                        Description\n"
+    " ---------------------------------------------------------------------------------------------------\n"
     // code generation options
-    " --compile={yes*|no|all|min}\n"
-    "                          Enable or disable JIT compiler, or request exhaustive or minimal compilation\n\n"
+    " --compile={yes*|no|all|min}                   Enable or disable JIT compiler, or request exhaustive\n"
+    "                                               or minimal compilation\n\n"
 
     // compiler output options
-    " --output-o <name>        Generate an object file (including system image data)\n"
-    " --output-ji <name>       Generate a system image data file (.ji)\n"
-    " --strip-metadata         Remove docstrings and source location info from system image\n"
-    " --strip-ir               Remove IR (intermediate representation) of compiled functions\n\n"
+    " --output-o <name>                             Generate an object file (including system image data)\n"
+    " --output-ji <name>                            Generate a system image data file (.ji)\n"
+    " --strip-metadata                              Remove docstrings and source location info from\n"
+    "                                               system image\n"
+    " --strip-ir                                    Remove IR (intermediate representation) of compiled\n"
+    "                                               functions\n\n"
 
-    // compiler debugging (see the devdocs for tips on using these options)
-    " --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
-    " --output-bc <name>       Generate LLVM bitcode (.bc)\n"
-    " --output-asm <name>      Generate an assembly file (.s)\n"
-    " --output-incremental={yes|no*}\n"
-    "                          Generate an incremental output file (rather than complete)\n"
-    " --trace-compile={stderr,name}\n"
-    "                          Print precompile statements for methods compiled during execution or save to a path\n"
-    " --image-codegen          Force generate code in imaging mode\n"
-    " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
+    // compiler debugging and experimental (see the devdocs for tips on using these options)
+    " --experimental                                Enable the use of experimental (alpha) features\n"
+    " --output-unopt-bc <name>                      Generate unoptimized LLVM bitcode (.bc)\n"
+    " --output-bc <name>                            Generate LLVM bitcode (.bc)\n"
+    " --output-asm <name>                           Generate an assembly file (.s)\n"
+    " --output-incremental={yes|no*}                Generate an incremental output file (rather than\n"
+    "                                               complete)\n"
+    " --trace-compile={stderr|name}                 Print precompile statements for methods compiled\n"
+    "                                               during execution or save to stderr or a path. Methods that\n"
+    "                                               were recompiled are printed in yellow or with a trailing\n"
+    "                                               comment if color is not supported\n"
+    " --trace-compile-timing                        If --trace-compile is enabled show how long each took to\n"
+    "                                               compile in ms\n"
+    " --task-metrics={yes|no*}                      Enable collection of per-task timing data.\n"
+    " --image-codegen                               Force generate code in imaging mode\n"
+    " --permalloc-pkgimg={yes|no*}                  Copy the data section of package images into memory\n"
+    " --trim={no*|safe|unsafe|unsafe-warn}\n"
+    "                                               Build a sysimage including only code provably reachable\n"
+    "                                               from methods marked by calling `entrypoint`. In unsafe\n"
+    "                                               mode, the resulting binary might be missing needed code\n"
+    "                                               and can throw errors. With unsafe-warn warnings will be\n"
+    "                                               printed for dynamic call sites that might lead to such\n"
+    "                                               errors. In safe mode compile-time errors are given instead.\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -233,6 +347,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_inline,
            opt_polly,
            opt_trace_compile,
+           opt_trace_compile_timing,
+           opt_trace_dispatch,
+           opt_task_metrics,
            opt_math_mode,
            opt_worker,
            opt_bind_to,
@@ -258,9 +375,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_ir,
            opt_heap_size_hint,
            opt_gc_threads,
-           opt_permalloc_pkgimg
+           opt_permalloc_pkgimg,
+           opt_trim,
+           opt_experimental_features,
     };
-    static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
+    static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
     static const struct option longopts[] = {
         // exposed command line options
         // NOTE: This set of required arguments need to be kept in sync
@@ -273,6 +392,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "banner",          required_argument, 0, opt_banner },
         { "home",            required_argument, 0, 'H' },
         { "eval",            required_argument, 0, 'e' },
+        { "module",          required_argument, 0, 'm' },
         { "print",           required_argument, 0, 'E' },
         { "load",            required_argument, 0, 'L' },
         { "bug-report",      required_argument, 0, opt_bug_report },
@@ -308,9 +428,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "inline",          required_argument, 0, opt_inline },
         { "polly",           required_argument, 0, opt_polly },
         { "trace-compile",   required_argument, 0, opt_trace_compile },
+        { "trace-compile-timing",  no_argument, 0, opt_trace_compile_timing },
+        { "trace-dispatch",  required_argument, 0, opt_trace_dispatch },
+        { "task-metrics",    required_argument, 0, opt_task_metrics },
         { "math-mode",       required_argument, 0, opt_math_mode },
         { "handle-signals",  required_argument, 0, opt_handle_signals },
         // hidden command line options
+        { "experimental",    no_argument,       0, opt_experimental_features },
         { "worker",          optional_argument, 0, opt_worker },
         { "bind-to",         required_argument, 0, opt_bind_to },
         { "lisp",            no_argument,       0, 1 },
@@ -320,6 +444,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "strip-ir",        no_argument,       0, opt_strip_ir },
         { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
         { "heap-size-hint",  required_argument, 0, opt_heap_size_hint },
+        { "trim",  optional_argument, 0, opt_trim },
         { 0, 0, 0, 0 }
     };
 
@@ -332,7 +457,6 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
     const char **cmds = NULL;
     int codecov = JL_LOG_NONE;
     int malloclog = JL_LOG_NONE;
-    int pkgimage_explicit = 0;
     int argc = *argcp;
     char **argv = *argvp;
     char *endptr;
@@ -411,6 +535,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case 'e': // eval
         case 'E': // print
         case 'L': // load
+        case 'm': // module
         case opt_bug_report: // bug
         {
             size_t sz = strlen(optarg) + 1;
@@ -424,6 +549,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             ncmds++;
             cmds[ncmds] = 0;
             jl_options.cmds = cmds;
+            if (c == 'm') {
+                optind -= 1;
+                goto parsing_args_done;
+            }
             break;
         }
         case 'J': // sysimage
@@ -444,8 +573,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.banner = 0;
             else if (!strcmp(optarg, "auto"))
                 jl_options.banner = -1;
+            else if (!strcmp(optarg, "short"))
+                jl_options.banner = 2;
             else
-                jl_errorf("julia: invalid argument to --banner={yes|no|auto} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --banner={yes|no|auto|short} (%s)", optarg);
+            break;
+        case opt_experimental_features:
+            jl_options.use_experimental_features = JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_YES;
             break;
         case opt_sysimage_native_code:
             if (!strcmp(optarg,"yes"))
@@ -460,17 +594,22 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_YES;
             else if (!strcmp(optarg,"no"))
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_EXISTING;
+            else if (!strcmp(optarg,"strict"))
+                jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_STRICT;
             else
-                jl_errorf("julia: invalid argument to --compiled-modules={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --compiled-modules={yes|no|existing|strict} (%s)", optarg);
             break;
         case opt_pkgimages:
-            pkgimage_explicit = 1;
             if (!strcmp(optarg,"yes"))
                 jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_YES;
             else if (!strcmp(optarg,"no"))
                 jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_EXISTING;
             else
-                jl_errorf("julia: invalid argument to --pkgimage={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --pkgimages={yes|no} (%s)", optarg);
             break;
         case 'C': // cpu-target
             jl_options.cpu_target = strdup(optarg);
@@ -735,7 +874,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --inline (%s)", optarg);
             }
             break;
-       case opt_polly:
+        case opt_polly:
             if (!strcmp(optarg,"yes"))
                 jl_options.polly = JL_OPTIONS_POLLY_ON;
             else if (!strcmp(optarg,"no"))
@@ -744,11 +883,19 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_errorf("julia: invalid argument to --polly (%s)", optarg);
             }
             break;
-         case opt_trace_compile:
+        case opt_trace_compile:
             jl_options.trace_compile = strdup(optarg);
             if (!jl_options.trace_compile)
                 jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
             break;
+        case opt_trace_compile_timing:
+            jl_options.trace_compile_timing = 1;
+            break;
+         case opt_trace_dispatch:
+            jl_options.trace_dispatch = strdup(optarg);
+            if (!jl_options.trace_dispatch)
+                jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
+            break;
         case opt_math_mode:
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
@@ -757,7 +904,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else if (!strcmp(optarg,"user"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_DEFAULT;
             else
-                jl_errorf("julia: invalid argument to --math-mode (%s)", optarg);
+                jl_errorf("julia: invalid argument to --math-mode={ieee|user} (%s)", optarg);
             break;
         case opt_worker:
             jl_options.worker = 1;
@@ -793,37 +940,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             jl_options.strip_ir = 1;
             break;
         case opt_heap_size_hint:
-            if (optarg != NULL) {
-                size_t endof = strlen(optarg);
-                long double value = 0.0;
-                if (sscanf(optarg, "%Lf", &value) == 1 && value > 1e-7) {
-                    char unit = optarg[endof - 1];
-                    uint64_t multiplier = 1ull;
-                    switch (unit) {
-                        case 'k':
-                        case 'K':
-                            multiplier <<= 10;
-                            break;
-                        case 'm':
-                        case 'M':
-                            multiplier <<= 20;
-                            break;
-                        case 'g':
-                        case 'G':
-                            multiplier <<= 30;
-                            break;
-                        case 't':
-                        case 'T':
-                            multiplier <<= 40;
-                            break;
-                        default:
-                            break;
-                    }
-                    jl_options.heap_size_hint = (uint64_t)(value * multiplier);
-                }
-            }
+            if (optarg != NULL)
+                jl_options.heap_size_hint = parse_heap_size_hint(optarg, "--heap-size-hint=<size>[<unit>]");
             if (jl_options.heap_size_hint == 0)
-                jl_errorf("julia: invalid argument to --heap-size-hint without memory size specified");
+                jl_errorf("julia: invalid memory size specified in --heap-size-hint=<size>[<unit>]");
 
             break;
         case opt_gc_threads:
@@ -838,7 +958,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 char *endptri;
                 long nsweepthreads = strtol(&endptr[1], &endptri, 10);
                 if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nsweepthreads < 0 || nsweepthreads > 1)
-                    jl_errorf("julia: --gcthreads=<n>,<m>; n must be 0 or 1");
+                    jl_errorf("julia: --gcthreads=<n>,<m>; m must be 0 or 1");
                 jl_options.nsweepthreads = (int8_t)nsweepthreads;
             }
             break;
@@ -850,17 +970,35 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else
                 jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
             break;
+        case opt_trim:
+            if (optarg == NULL || !strcmp(optarg,"safe"))
+                jl_options.trim = JL_TRIM_SAFE;
+            else if (!strcmp(optarg,"no"))
+                jl_options.trim = JL_TRIM_NO;
+            else if (!strcmp(optarg,"unsafe"))
+                jl_options.trim = JL_TRIM_UNSAFE;
+            else if (!strcmp(optarg,"unsafe-warn"))
+                jl_options.trim = JL_TRIM_UNSAFE_WARN;
+            else
+                jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
+            break;
+        case opt_task_metrics:
+            if (!strcmp(optarg, "no"))
+                jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF;
+            else if (!strcmp(optarg, "yes"))
+                jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
+            else
+                jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
         }
     }
-    if (codecov || malloclog) {
-        if (pkgimage_explicit && jl_options.use_pkgimages) {
-            jl_errorf("julia: Can't use --pkgimages=yes together "
-                      "with --track-allocation or --code-coverage.");
-        }
-        jl_options.use_pkgimages = 0;
+    parsing_args_done:
+    if (!jl_options.use_experimental_features) {
+        if (jl_options.trim != JL_TRIM_NO)
+            jl_errorf("julia: --trim is an experimental feature, you must enable it with --experimental");
     }
     jl_options.code_coverage = codecov;
     jl_options.malloc_log = malloclog;
diff --git a/src/jloptions.h b/src/jloptions.h
index 8649c405112d7..211122242cbbd 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -38,10 +38,12 @@ typedef struct {
     int8_t can_inline;
     int8_t polly;
     const char *trace_compile;
+    const char *trace_dispatch;
     int8_t fast_math;
     int8_t worker;
     const char *cookie;
     int8_t handle_signals;
+    int8_t use_experimental_features;
     int8_t use_sysimage_native_code;
     int8_t use_compiled_modules;
     int8_t use_pkgimages;
@@ -61,6 +63,9 @@ typedef struct {
     int8_t strip_ir;
     int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
+    int8_t trace_compile_timing;
+    int8_t trim;
+    int8_t task_metrics;
 } jl_options_t;
 
 #endif
diff --git a/src/jltypes.c b/src/jltypes.c
index 444923f600569..b478ce7ea98fd 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -20,7 +20,7 @@ extern "C" {
 #endif
 
 _Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
-jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(*small_typeof)]; // 16-bit aligned, like the GC
+jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(*ijl_small_typeof)]; // 16-bit aligned, like the GC
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
@@ -65,7 +65,9 @@ static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
         }
         if (jl_is_datatype(v)) {
             jl_datatype_t *dt = (jl_datatype_t*)v;
-            if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
+            if (dt->isconcretetype)
+                return 0;
+            if (dt->layout || !dt->name->mayinlinealloc)
                 return 0;
             if (dt->name == jl_namedtuple_typename)
                 return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
@@ -215,7 +217,7 @@ JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v)
 }
 
 // test whether a type has vars bound by the given environment
-static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
+int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
     while (1) {
         if (jl_is_typevar(v)) {
@@ -290,7 +292,13 @@ JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
 
 int jl_has_fixed_layout(jl_datatype_t *dt)
 {
-    if (dt->layout || dt->isconcretetype)
+    if (dt->isconcretetype)
+        return 1;
+    if (jl_is_genericmemory_type(dt)) { // GenericMemory{kind,addrspace,T} uses T for final layout, which is a parameter not a field however
+        // optionally: return !layout_uses_free_typevars(jl_tparam1(dt), env);
+        return 0;
+    }
+    if (dt->layout)
         return 1;
     if (dt->name->abstract)
         return 0;
@@ -313,15 +321,15 @@ int jl_has_fixed_layout(jl_datatype_t *dt)
 int jl_type_mappable_to_c(jl_value_t *ty)
 {
     assert(!jl_is_typevar(ty) && jl_is_type(ty));
+    if (jl_is_array_type(ty) || jl_is_genericmemory_type(ty) ||
+        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
+            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as boxed
     if (jl_is_structtype(ty))
         return jl_has_fixed_layout((jl_datatype_t*)ty) && ((jl_datatype_t*)ty)->name->atomicfields == NULL;
     if (jl_is_primitivetype(ty))
-        return 1;
-    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type)
-        return 1; // as boxed
-    if (jl_is_abstract_ref_type(ty) || jl_is_array_type(ty) ||
-        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
-            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as isbits
+    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type || jl_is_abstract_ref_type(ty))
         return 1; // as boxed
     return 0; // refuse to map Union and UnionAll to C
 }
@@ -333,7 +341,7 @@ JL_DLLEXPORT int jl_get_size(jl_value_t *val, size_t *pnt)
     if (jl_is_long(val)) {
         ssize_t slen = jl_unbox_long(val);
         if (slen < 0)
-            jl_errorf("size or dimension is negative: %d", slen);
+            jl_errorf("size or dimension is negative: %zd", slen);
         *pnt = slen;
         return 1;
     }
@@ -548,6 +556,90 @@ static void isort_union(jl_value_t **a, size_t len) JL_NOTSAFEPOINT
     }
 }
 
+static int simple_subtype(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion)
+{
+    assert(hasfree == (jl_has_free_typevars(a) | (jl_has_free_typevars(b) << 1)));
+    if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type)
+        return 1;
+    if (jl_egal(a, b))
+        return 1;
+    if (hasfree == 0) {
+        int mergeable = isUnion;
+        if (!mergeable) // issue #24521: don't merge Type{T} where typeof(T) varies
+            mergeable = !(jl_is_type_type(a) && jl_is_type_type(b) &&
+             jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b)));
+        return mergeable && jl_subtype(a, b);
+    }
+    if (jl_is_typevar(a)) {
+        jl_value_t *na = ((jl_tvar_t*)a)->ub;
+        hasfree &= (jl_has_free_typevars(na) | 2);
+        return simple_subtype(na, b, hasfree, isUnion);
+    }
+    if (jl_is_typevar(b)) {
+        jl_value_t *nb = ((jl_tvar_t*)b)->lb;
+        // This branch is not valid if `b` obeys diagonal rule,
+        // as it might normalize `Union` into a single `TypeVar`, e.g.
+        // Tuple{Union{Int,T},T} where {T>:Int} != Tuple{T,T} where {T>:Int}
+        if (is_leaf_bound(nb))
+            return 0;
+        hasfree &= ((jl_has_free_typevars(nb) << 1) | 1);
+        return simple_subtype(a, nb, hasfree, isUnion);
+    }
+    if (b==(jl_value_t*)jl_datatype_type || b==(jl_value_t*)jl_typeofbottom_type) {
+        // This branch is not valid for `Union`/`UnionAll`, e.g.
+        // (Type{Union{Int,T2} where {T2<:T1}} where {T1}){Int} == Type{Int64}
+        // (Type{Union{Int,T1}} where {T1}){Int} == Type{Int64}
+        return jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b;
+    }
+    return 0;
+}
+
+// merge Union{Tuple{}, Tuple{T}, Tuple{T, T, Vararg{T}}} into Tuple{Vararg{T}}
+// assumes temp is already sorted by number of type parameters
+STATIC_INLINE void merge_vararg_unions(jl_value_t **temp, size_t nt)
+{
+    for (size_t i = nt-1; i > 0; i--) {
+        // match types of form Tuple{T, ..., Vararg{T}}
+        jl_value_t *tt = temp[i];
+        if (!(tt && jl_is_tuple_type(tt))) continue;
+        size_t nfields = jl_nparams(tt);
+        if (nfields <= 1) continue;
+        jl_value_t *va = jl_tparam(tt, nfields-1);
+        if (jl_vararg_kind(va) != JL_VARARG_UNBOUND) continue;
+        jl_value_t *t = jl_unwrap_vararg(va);
+        for (size_t j = 0; j < nfields-1; j++)
+            if (!jl_egal(jl_tparam(tt, j), t)) goto outer_loop;
+
+        // look for Tuple{T, T, ...} then Tuple{T, ...}, etc
+        size_t min_elements = nfields-1;
+        for (long j = i-1; j >= 0; j--) {
+            jl_value_t *ttj = temp[j];
+            if (!jl_is_tuple_type(ttj)) break;
+            size_t nfieldsj = jl_nparams(ttj);
+            if (nfieldsj >= min_elements) continue;
+            if (nfieldsj != min_elements-1) break;
+            for (size_t k = 0; k < nfieldsj; k++)
+                if (!jl_egal(jl_tparam(ttj, k), t)) goto inner_loop;
+
+            temp[j] = NULL;
+            min_elements--;
+ inner_loop:
+            continue;
+        }
+
+        if (min_elements == nfields-1) continue;
+        jl_value_t** params;
+        JL_GC_PUSHARGS(params, min_elements+1);
+        for (size_t j = 0; j < min_elements; j++)
+            params[j] = t;
+        params[min_elements] = va;
+        temp[i] = jl_apply_type((jl_value_t*)jl_tuple_type, params, min_elements+1);
+        JL_GC_POP();
+ outer_loop:
+        continue;
+    }
+}
+
 JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 {
     if (n == 0)
@@ -572,17 +664,14 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
         int has_free = temp[i] != NULL && jl_has_free_typevars(temp[i]);
         for (j = 0; j < nt; j++) {
             if (j != i && temp[i] && temp[j]) {
-                if (temp[i] == jl_bottom_type ||
-                    temp[j] == (jl_value_t*)jl_any_type ||
-                    jl_egal(temp[i], temp[j]) ||
-                    (!has_free && !jl_has_free_typevars(temp[j]) &&
-                     jl_subtype(temp[i], temp[j]))) {
+                int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+                if (simple_subtype(temp[i], temp[j], has_free2, 1))
                     temp[i] = NULL;
-                }
             }
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     jl_value_t **ptu = &temp[nt];
     *ptu = jl_bottom_type;
     int k;
@@ -600,18 +689,9 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     return tu;
 }
 
-// note: this is turned off as `Union` doesn't do such normalization.
-// static int simple_subtype(jl_value_t *a, jl_value_t *b)
-// {
-//     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
-//         return 1;
-//     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
-//         return 1;
-//     return 0;
-// }
-
-static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
+static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion)
 {
+    assert(hasfree == (jl_has_free_typevars(a) | (jl_has_free_typevars(b) << 1)));
     int subab = 0, subba = 0;
     if (jl_egal(a, b)) {
         subab = subba = 1;
@@ -622,9 +702,9 @@ static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
     else if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type) {
         subba = 1;
     }
-    else if (hasfree) {
-        // subab = simple_subtype(a, b);
-        // subba = simple_subtype(b, a);
+    else if (hasfree != 0) {
+        subab = simple_subtype(a, b, hasfree, isUnion);
+        subba = simple_subtype(b, a, ((hasfree & 2) >> 1) | ((hasfree & 1) << 1), isUnion);
     }
     else if (jl_is_type_type(a) && jl_is_type_type(b) &&
              jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b))) {
@@ -656,10 +736,11 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
     // first remove cross-redundancy and check if `a >: b` or `a <: b`.
     for (i = 0; i < nta; i++) {
         if (temp[i] == NULL) continue;
-        int hasfree = jl_has_free_typevars(temp[i]);
+        int has_free = jl_has_free_typevars(temp[i]);
         for (j = nta; j < nt; j++) {
             if (temp[j] == NULL) continue;
-            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+            int subs = simple_subtype2(temp[i], temp[j], has_free2, 0);
             int subab = subs & 1, subba = subs >> 1;
             if (subab) {
                 temp[i] = NULL;
@@ -689,19 +770,14 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
         size_t jmax = i < nta ? nta : nt;
         for (j = jmin; j < jmax; j++) {
             if (j != i && temp[i] && temp[j]) {
-                if (temp[i] == jl_bottom_type ||
-                    temp[j] == (jl_value_t*)jl_any_type ||
-                    jl_egal(temp[i], temp[j]) ||
-                    (!has_free && !jl_has_free_typevars(temp[j]) &&
-                     // issue #24521: don't merge Type{T} where typeof(T) varies
-                     !(jl_is_type_type(temp[i]) && jl_is_type_type(temp[j]) && jl_typeof(jl_tparam0(temp[i])) != jl_typeof(jl_tparam0(temp[j]))) &&
-                     jl_subtype(temp[i], temp[j]))) {
+                int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+                if (simple_subtype(temp[i], temp[j], has_free2, 0))
                     temp[i] = NULL;
-                }
             }
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     temp[nt] = jl_bottom_type;
     size_t k;
     for (k = nt; k-- > 0; ) {
@@ -758,10 +834,11 @@ jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi)
     for (i = 0; i < nta; i++) {
         if (temp[i] == NULL) continue;
         all_disjoint = 0;
-        int hasfree = jl_has_free_typevars(temp[i]);
+        int has_free = jl_has_free_typevars(temp[i]);
         for (j = nta; j < nt; j++) {
             if (temp[j] == NULL) continue;
-            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int has_free2 = has_free | (jl_has_free_typevars(temp[j]) << 1);
+            int subs = simple_subtype2(temp[i], temp[j], has_free2, 0);
             int subab = subs & 1, subba = subs >> 1;
             if (subba && !subab) {
                 stemp[i] = -1;
@@ -833,7 +910,7 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (jl_options.depwarn) {
             if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
                 jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.");
-            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\n");
+            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\nTo make this warning an error, and hence obtain a stack trace, use `julia --depwarn=error`.\n");
         }
         jl_vararg_t *vm = (jl_vararg_t*)body;
         int T_has_tv = vm->T && jl_has_typevar(vm->T, v);
@@ -847,14 +924,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (T_has_tv) {
             jl_value_t *wrapped = jl_type_unionall(v, vm->T);
             JL_GC_PUSH1(&wrapped);
-            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1);
+            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1, 0);
             JL_GC_POP();
             return wrapped;
         }
         else {
             assert(N_has_tv);
             assert(vm->N == (jl_value_t*)v);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1, 0);
         }
     }
     if (!jl_is_type(body) && !jl_is_typevar(body))
@@ -1125,6 +1202,7 @@ static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 
 jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 {
+    newsz = newsz ? next_power_of_two(newsz) : 0;
     jl_value_t **ol = jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     while (1) {
@@ -1158,7 +1236,6 @@ static void cache_insert_type_linear(jl_datatype_t *type, ssize_t insert_at)
         jl_atomic_store_release(&type->name->linearcache, nc);
         jl_gc_wb(type->name, nc);
         cache = nc;
-        n = jl_svec_len(nc);
     }
     assert(jl_svecref(cache, insert_at) == jl_nothing);
     jl_svecset(cache, insert_at, (jl_value_t*)type); // todo: make this an atomic-store
@@ -1317,7 +1394,7 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check);
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
@@ -1325,7 +1402,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
                                      jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1, 0);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
@@ -1351,8 +1428,12 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
     JL_GC_PUSH1(&tc);
     jl_value_t *tc0 = tc;
     for (i=0; i < n; i++) {
-        if (!jl_is_unionall(tc0))
-            jl_error("too many parameters for type");
+        if (!jl_is_unionall(tc0)){
+            char *typ = "";
+            if (jl_is_datatype(tc0))
+                typ = jl_symbol_name_(((jl_datatype_t*)tc0)->name->name);
+            jl_errorf("too many parameters for type %s", typ);
+        }
         jl_value_t *pi = params[i];
 
         tc0 = ((jl_unionall_t*)tc0)->body;
@@ -1404,6 +1485,15 @@ JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value
     return jl_apply_type(tc, args, 2);
 }
 
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3)
+{
+    jl_value_t *args[3];
+    args[0] = p1;
+    args[1] = p2;
+    args[2] = p3;
+    return jl_apply_type(tc, args, 3);
+}
+
 jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
 {
     jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
@@ -1431,29 +1521,18 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *ty)
     return rettyp;
 }
 
-// used to expand an NTuple to a flat representation
-static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
-{
-    jl_value_t *p = NULL;
-    JL_GC_PUSH1(&p);
-    p = (jl_value_t*)jl_svec_fill(n, v);
-    p = jl_apply_tuple_type((jl_svec_t*)p);
-    JL_GC_POP();
-    return p;
-}
-
 JL_EXTENSION struct _jl_typestack_t {
     jl_datatype_t *tt;
     struct _jl_typestack_t *prev;
 };
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check);
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack);
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow);
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable);
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p)
 {
     jl_typeenv_t env = { u->var, p, NULL };
-    return inst_type_w_(u->body, &env, NULL, 1);
+    return inst_type_w_(u->body, &env, NULL, 1, 0);
 }
 
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
@@ -1462,18 +1541,27 @@ jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
     jl_value_t *t = NULL;
     JL_GC_PUSH2(&v, &t);
     jl_typeenv_t env = { u->var, (jl_value_t *)v, NULL };
-    t = inst_type_w_(u->body, &env, NULL, 0);
+    t = inst_type_w_(u->body, &env, NULL, 0, 0);
     t = jl_new_struct(jl_unionall_type, v, t);
     JL_GC_POP();
     return (jl_unionall_t*)t;
 }
 
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow)
+{
+    if (val == (jl_value_t*)var)
+        return t;
+    nothrow = jl_is_typevar(val) ? 0 : nothrow;
+    jl_typeenv_t env = { var, val, NULL };
+    return inst_type_w_(t, &env, NULL, 1, nothrow);
+}
+
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val)
 {
     if (val == (jl_value_t*)var)
         return t;
     jl_typeenv_t env = { var, val, NULL };
-    return inst_type_w_(t, &env, NULL, 1);
+    return inst_type_w_(t, &env, NULL, 1, 0);
 }
 
 jl_value_t *jl_unwrap_unionall(jl_value_t *v)
@@ -1519,6 +1607,118 @@ jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u)
     return t;
 }
 
+// Create a copy of type expression t where any occurrence of data type x is replaced by y.
+// If x does not occur in t, return t without any copy.
+// For example, jl_substitute_datatype(Foo{Bar}, Foo{T}, Qux{S}) is Qux{Bar}, with T and S
+// free type variables.
+// To substitute type variables, use jl_substitute_var instead.
+jl_value_t *jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y)
+{
+    if jl_is_datatype(t) {
+        jl_datatype_t *typ = (jl_datatype_t*)t;
+        // For datatypes call itself recursively on the parameters to form new parameters.
+        // Then, if typename(t) == typename(x), rewrap the wrapper of y around the new
+        // parameters. Otherwise, do the same around the wrapper of t.
+        // This ensures that the types and supertype are properly set.
+        // Start by check whether there is a parameter that needs replacing.
+        long i_firstnewparam = -1;
+        size_t nparams = jl_svec_len(typ->parameters);
+        jl_value_t *firstnewparam = NULL;
+        JL_GC_PUSH1(&firstnewparam);
+        for (size_t i = 0; i < nparams; i++) {
+            jl_value_t *param = NULL;
+            JL_GC_PUSH1(&param);
+            param = jl_svecref(typ->parameters, i);
+            firstnewparam = jl_substitute_datatype(param, x, y);
+            if (param != firstnewparam) {
+                i_firstnewparam = i;
+                JL_GC_POP();
+                break;
+            }
+            JL_GC_POP();
+        }
+        // If one of the parameters needs to be updated, or if the type name is that to
+        // substitute, create a new datataype
+        if (i_firstnewparam != -1 || typ->name == x->name) {
+            jl_datatype_t *uw = typ->name == x->name ? y : typ; // substitution occurs here
+            jl_value_t *wrapper = uw->name->wrapper;
+            jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(wrapper);
+            jl_svec_t *sv = jl_alloc_svec_uninit(jl_svec_len(uw->parameters));
+            JL_GC_PUSH1(&sv);
+            jl_value_t **vals = jl_svec_data(sv);
+            // no JL_GC_PUSHARGS(vals, ...) since GC is already aware of sv
+            for (long i = 0; i < i_firstnewparam; i++) { // copy the identical parameters
+                vals[i] = jl_svecref(typ->parameters, i); // value
+            }
+            if (i_firstnewparam != -1) { // insert the first non-identical parameter
+                vals[i_firstnewparam] = firstnewparam;
+            }
+            for (size_t i = i_firstnewparam+1; i < nparams; i++) { // insert the remaining parameters
+                vals[i] = jl_substitute_datatype(jl_svecref(typ->parameters, i), x, y);
+            }
+            if (jl_is_tuple_type(wrapper)) {
+                // special case for tuples, since the wrapper (Tuple) does not have as
+                // many parameters as t (it only has a Vararg instead).
+                t = jl_apply_tuple_type(sv, 0);
+            } else {
+                t = jl_instantiate_type_in_env((jl_value_t*)w, (jl_unionall_t*)wrapper, vals);
+            }
+            JL_GC_POP();
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_unionall(t) { // recursively call itself on body and var bounds
+        jl_unionall_t* ut = (jl_unionall_t*)t;
+        jl_value_t *lb = NULL;
+        jl_value_t *ub = NULL;
+        jl_value_t *body = NULL;
+        JL_GC_PUSH3(&lb, &ub, &body);
+        lb = jl_substitute_datatype(ut->var->lb, x, y);
+        ub = jl_substitute_datatype(ut->var->ub, x, y);
+        body = jl_substitute_datatype(ut->body, x, y);
+        if (lb != ut->var->lb || ub != ut->var->ub) {
+            jl_tvar_t *newtvar = jl_new_typevar(ut->var->name, lb, ub);
+            JL_GC_PUSH1(&newtvar);
+            body = jl_substitute_var(body, ut->var, (jl_value_t*)newtvar);
+            t = jl_new_struct(jl_unionall_type, newtvar, body);
+            JL_GC_POP();
+        }
+        else if (body != ut->body) {
+            t = jl_new_struct(jl_unionall_type, ut->var, body);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_uniontype(t) { // recursively call itself on a and b
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = jl_substitute_datatype(u->a, x, y);
+        b = jl_substitute_datatype(u->b, x, y);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_vararg(t) { // recursively call itself on T
+        jl_vararg_t *vt = (jl_vararg_t*)t;
+        if (vt->T) { // vt->T could be NULL
+            jl_value_t *rT = NULL;
+            JL_GC_PUSH1(&rT);
+            rT = jl_substitute_datatype(vt->T, x, y);
+            if (rT != vt->T) {
+                jl_task_t *ct = jl_current_task;
+                t = jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+                jl_set_typetagof((jl_vararg_t *)t, jl_vararg_tag, 0);
+                ((jl_vararg_t *)t)->T = rT;
+                ((jl_vararg_t *)t)->N = vt->N;
+            }
+            JL_GC_POP();
+        }
+    }
+    return t;
+}
+
 static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, size_t ntp,
                                      jl_value_t **iparams)
 {
@@ -1593,19 +1793,20 @@ static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int
     int failed = nofail;
     for (j = 0; j < n; j++) {
         jl_value_t *p = key[j];
+        size_t repeats = 1;
         if (jl_is_vararg(p)) {
             jl_vararg_t *vm = (jl_vararg_t*)p;
-            if (!nofail && vm->N)
-                return 0;
-            // 0x064eeaab is just a randomly chosen constant
-            hash = bitmix(vm->N ? type_hash(vm->N, &failed) : 0x064eeaab, hash);
-            if (failed && !nofail)
-                return 0;
+            if (vm->N && jl_is_long(vm->N))
+                repeats = jl_unbox_long(vm->N);
+            else
+                hash = bitmix(0x064eeaab, hash); // 0x064eeaab is just a randomly chosen constant
             p = vm->T ? vm->T : (jl_value_t*)jl_any_type;
         }
-        hash = bitmix(type_hash(p, &failed), hash);
+        unsigned hashp = type_hash(p, &failed);
         if (failed && !nofail)
             return 0;
+        while (repeats--)
+            hash = bitmix(hashp, hash);
     }
     hash = bitmix(~tn->hash, hash);
     return hash ? hash : 1;
@@ -1684,7 +1885,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
 }
 
-static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np)
+static int check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np, int nothrow)
 {
     jl_value_t *wrapper = tn->wrapper;
     jl_value_t **bounds;
@@ -1702,6 +1903,10 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         assert(jl_is_unionall(wrapper));
         jl_tvar_t *tv = ((jl_unionall_t*)wrapper)->var;
         if (!within_typevar(params[i], bounds[2*i], bounds[2*i+1])) {
+            if (nothrow) {
+                JL_GC_POP();
+                return 1;
+            }
             if (tv->lb != bounds[2*i] || tv->ub != bounds[2*i+1])
                 // pass a new version of `tv` containing the instantiated bounds
                 tv = jl_new_typevar(tv->name, bounds[2*i], bounds[2*i+1]);
@@ -1711,15 +1916,29 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
         int j;
         for (j = 2*i + 2; j < 2*np; j++) {
             jl_value_t *bj = bounds[j];
-            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type)
-                bounds[j] = jl_substitute_var(bj, tv, params[i]);
+            if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type) {
+                int isub = j & 1;
+                // use different nothrow level for lb and ub substitution.
+                // TODO: This assuming the top instantiation could only start with
+                // `nothrow == 2` or `nothrow == 0`. If `nothrow` is initially set to 1
+                // then we might miss some inner error, perhaps the normal path should
+                // also follow this rule？
+                jl_value_t *nb = jl_substitute_var_nothrow(bj, tv, params[i], nothrow ? (isub ? 2 : 1) : 0 );
+                if (nb == NULL) {
+                    assert(nothrow);
+                    JL_GC_POP();
+                    return 1;
+                }
+                bounds[j] = nb;
+            }
         }
         wrapper = ((jl_unionall_t*)wrapper)->body;
     }
     JL_GC_POP();
+    return 0;
 }
 
-jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
 {
     t = jl_unwrap_unionall(t);
     if (jl_is_datatype(t))
@@ -1734,7 +1953,7 @@ jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
     return NULL;
 }
 
-int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
+static int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
 {
     while (1) {
         if (v == (jl_value_t*)var) {
@@ -1791,13 +2010,13 @@ int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_c
 //  * `var` does not appear in invariant position
 //  * `var` appears at most once (in covariant position) and not in a `Vararg`
 //    unless the upper bound is concrete (diagonal rule)
-int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
+static int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
 {
     int cov_count = 0;
     return _may_substitute_ub(v, var, 0, &cov_count);
 }
 
-jl_value_t *normalize_unionalls(jl_value_t *t)
+static jl_value_t *normalize_unionalls(jl_value_t *t)
 {
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
@@ -1814,7 +2033,7 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     else if (jl_is_unionall(t)) {
         jl_unionall_t *u = (jl_unionall_t*)t;
         jl_value_t *body = normalize_unionalls(u->body);
-        JL_GC_PUSH1(&body);
+        JL_GC_PUSH2(&body, &t);
         if (body != u->body) {
             t = jl_new_struct(jl_unionall_type, u->var, body);
             u = (jl_unionall_t*)t;
@@ -1835,10 +2054,40 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     return t;
 }
 
+// used to expand an NTuple to a flat representation
+static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check, int nothrow)
+{
+    jl_value_t *p = NULL;
+    JL_GC_PUSH1(&p);
+    if (check) {
+        // Since we are skipping making the Vararg and skipping checks later,
+        // we inline the checks from jl_wrap_vararg here now
+        if (!jl_valid_type_param(t)) {
+            if (nothrow) {
+                JL_GC_POP();
+                return NULL;
+            }
+            jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        }
+        // jl_wrap_vararg sometimes simplifies the type, so we only do this 1 time, instead of for each n later
+        t = normalize_unionalls(t);
+        p = t;
+        jl_value_t *tw = extract_wrapper(t);
+        if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
+            t = tw;
+        p = t;
+        check = 0; // remember that checks are already done now
+    }
+    p = (jl_value_t*)jl_svec_fill(n, t);
+    p = jl_apply_tuple_type((jl_svec_t*)p, check);
+    JL_GC_POP();
+    return p;
+}
+
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env, int check)
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check, int nothrow)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
@@ -1869,8 +2118,11 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                     break;
                 }
             }
-            if (pi == jl_bottom_type)
+            if (pi == jl_bottom_type) {
+                if (nothrow)
+                    return NULL;
                 jl_errorf("Tuple field type cannot be Union{}");
+            }
             if (cacheable && !jl_is_concrete_type(pi))
                 cacheable = 0;
         }
@@ -1905,7 +2157,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             // normalize types equal to wrappers (prepare for Typeofwrapper)
             jl_value_t *tw = extract_wrapper(pi);
             if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) &&
-                    jl_types_equal(pi, tw)) {
+                    !jl_has_free_typevars(pi) && jl_types_equal(pi, tw)) {
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
@@ -1930,7 +2182,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     // for whether this is even valid
     if (check && !istuple) {
         assert(ntp > 0);
-        check_datatype_parameters(tn, iparams, ntp);
+        if (check_datatype_parameters(tn, iparams, ntp, nothrow))
+            return NULL;
     }
     else if (ntp == 0 && jl_emptytuple_type != NULL) {
         // empty tuple type case
@@ -1957,7 +2210,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (nt == 0 || !jl_has_free_typevars(va0)) {
                 if (ntp == 1) {
                     JL_GC_POP();
-                    return jl_tupletype_fill(nt, va0);
+                    return jl_tupletype_fill(nt, va0, 0, 0);
                 }
                 size_t i, l;
                 p = jl_alloc_svec(ntp - 1 + nt);
@@ -1966,31 +2219,27 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 l = ntp - 1 + nt;
                 for (; i < l; i++)
                     jl_svecset(p, i, va0);
-                jl_value_t *ndt = jl_apply_tuple_type(p);
+                size_t np = jl_svec_len(p);
+                jl_value_t **pp = jl_svec_data(p);
+                jl_value_t *ndt = inst_datatype_inner(jl_anytuple_type, p, pp, np, NULL, NULL, check, nothrow);
                 JL_GC_POP();
                 return ndt;
             }
         }
     }
 
-    // move array of instantiated parameters to heap; we need to keep it
-    if (p == NULL) {
-        p = jl_alloc_svec_uninit(ntp);
-        for (size_t i = 0; i < ntp; i++)
-            jl_svecset(p, i, iparams[i]);
-    }
-
     // try to simplify some type parameters
     if (check && tn != jl_type_typename) {
-        size_t i;
         int changed = 0;
         if (istuple) // normalization might change Tuple's, but not other types's, cacheable status
             cacheable = 1;
+        size_t i;
         for (i = 0; i < ntp; i++) {
-            jl_value_t *newp = normalize_unionalls(iparams[i]);
-            if (newp != iparams[i]) {
+            jl_value_t *pi = iparams[i];
+            jl_value_t *newp = normalize_unionalls(pi);
+            if (newp != pi) {
                 iparams[i] = newp;
-                jl_svecset(p, i, newp);
+                if (p) jl_gc_wb(p, newp);
                 changed = 1;
             }
             if (istuple && cacheable && !jl_is_concrete_type(newp))
@@ -2016,12 +2265,39 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         }
     }
 
+    // try to reduce duplication in objects (if the caller didn't already check) by
+    // comparing them against a list of objects already known to be globally rooted and
+    // swapping them as possible
+    if (check && jl_global_roots_list != NULL) {
+        for (size_t i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            if (cacheable || !jl_has_free_typevars(pi)) {
+                pi = jl_as_global_root(pi, cacheable);
+                if (pi != NULL) {
+                    iparams[i] = pi;
+                    if (p) jl_gc_wb(p, pi);
+                }
+            }
+        }
+    }
+
+    // move array of instantiated parameters to heap; we need to keep it
+    if (p == NULL) {
+        p = jl_alloc_svec_uninit(ntp);
+        for (size_t i = 0; i < ntp; i++) {
+            jl_svecset(p, i, iparams[i]);
+        }
+    }
+
+    ndt = jl_new_uninitialized_datatype();
+
+    // now that most allocations are done
     // acquire the write lock now that we know we need a new object
     // since we're going to immediately leak it globally via the instantiation stack
     if (cacheable) {
         JL_LOCK(&typecache_lock); // Might GC
         jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
-        if (lkup != NULL) {
+        if (lkup) {
             JL_UNLOCK(&typecache_lock); // Might GC
             JL_GC_POP();
             return lkup;
@@ -2029,10 +2305,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     // create and initialize new type
-    ndt = jl_new_uninitialized_datatype();
     ndt->isprimitivetype = dt->isprimitivetype;
     // Usually dt won't have ismutationfree set at this point, but it is
-    // overriden for `Type`, which we handle here.
+    // overridden for `Type`, which we handle here.
     ndt->ismutationfree = dt->ismutationfree;
     // associate these parameters with the new type on
     // the stack, in case one of its field types references it.
@@ -2045,6 +2320,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     ndt->parameters = p;
     jl_gc_wb(ndt, ndt->parameters);
     ndt->types = NULL; // to be filled in below
+    int invalid = 0;
     if (istuple) {
         ndt->types = p; // TODO: this may need to filter out certain types
     }
@@ -2052,30 +2328,70 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         jl_value_t *names_tup = jl_svecref(p, 0);
         jl_value_t *values_tt = jl_svecref(p, 1);
         if (!jl_has_free_typevars(names_tup) && !jl_has_free_typevars(values_tt)) {
-            if (!jl_is_tuple(names_tup))
-                jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+            if (!jl_is_tuple(names_tup)) {
+                if (!nothrow)
+                    jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
+                invalid = 1;
+            }
             size_t nf = jl_nfields(names_tup);
             for (size_t i = 0; i < nf; i++) {
                 jl_value_t *ni = jl_fieldref(names_tup, i);
-                if (!jl_is_symbol(ni))
-                    jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                if (!jl_is_symbol(ni)) {
+                    if (!nothrow)
+                        jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
+                    invalid = 1; break;
+                }
                 for (size_t j = 0; j < i; j++) {
-                    if (ni == jl_fieldref_noalloc(names_tup, j))
-                        jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                    if (ni == jl_fieldref_noalloc(names_tup, j)) {
+                        if (!nothrow)
+                            jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
+                        invalid = 1; break;
+                    }
+                }
+                if (invalid) break;
+            }
+            if (values_tt == jl_bottom_type && nf > 0) {
+                ndt->types = jl_svec_fill(nf, jl_bottom_type);
+            }
+            else {
+                if (!jl_is_datatype(values_tt)) {
+                    // should have been checked within `check_datatype_parameters`.
+                    jl_error("NamedTuple field type must be a tuple datatype");
                 }
+                if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf) {
+                    if (!nothrow)
+                        jl_error("NamedTuple names and field types must have matching lengths");
+                    invalid = 1;
+                }
+                ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             }
-            if (!jl_is_datatype(values_tt))
-                jl_error("NamedTuple field type must be a tuple type");
-            if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf)
-                jl_error("NamedTuple names and field types must have matching lengths");
-            ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             jl_gc_wb(ndt, ndt->types);
         }
         else {
-            ndt->types = jl_emptysvec; // XXX: this is essentially always false
+            ndt->types = jl_emptysvec; // XXX: this is essentially always incorrect
+        }
+    }
+    else if (tn == jl_genericmemoryref_typename || tn == jl_genericmemory_typename) {
+        jl_value_t *isatomic = jl_svecref(p, 0);
+        if (!jl_is_typevar(isatomic) && !jl_is_symbol(isatomic)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "isatomic parameter", (jl_value_t*)jl_symbol_type, isatomic);
+            invalid = 1;
+        }
+        jl_value_t *addrspace = jl_svecref(p, 2);
+        if (!jl_is_typevar(addrspace) && !jl_is_addrspace(addrspace)) {
+            if (!nothrow)
+                jl_type_error_rt("GenericMemory", "addrspace parameter", (jl_value_t*)jl_addrspace_type, addrspace);
+            invalid = 1;
         }
     }
 
+    if (nothrow && invalid) {
+        if (cacheable)
+            JL_UNLOCK(&typecache_lock);
+        JL_GC_POP();
+        return NULL;
+    }
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
     jl_precompute_memoized_dt(ndt, cacheable);
     if (primarydt->layout)
@@ -2085,7 +2401,14 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         ndt->super = jl_any_type;
     }
     else if (dt->super) {
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, check);
+        jl_value_t *super = inst_type_w_((jl_value_t*)dt->super, env, stack, check, nothrow);
+        if (nothrow && super == NULL) {
+            if (cacheable)
+                JL_UNLOCK(&typecache_lock);
+            JL_GC_POP();
+            return NULL;
+        }
+        ndt->super = (jl_datatype_t *)super;
         jl_gc_wb(ndt, ndt->super);
     }
     jl_svec_t *ftypes = dt->types;
@@ -2109,9 +2432,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         else if (cacheable) {
             // recursively instantiate the types of the fields
             if (dt->types == NULL)
-                ndt->types = jl_compute_fieldtypes(ndt, stack);
+                ndt->types = jl_compute_fieldtypes(ndt, stack, cacheable);
             else
-                ndt->types = inst_ftypes(ftypes, env, stack);
+                ndt->types = inst_ftypes(ftypes, env, stack, cacheable);
             jl_gc_wb(ndt, ndt->types);
         }
     }
@@ -2131,19 +2454,19 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     return (jl_value_t*)ndt;
 }
 
-static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
+static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params, int check)
 {
-    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, 1);
+    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, check, 0);
 }
 
-JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check)
 {
-    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params);
+    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params, check);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
 {
-    return jl_apply_tuple_type_v_(p, np, NULL);
+    return jl_apply_tuple_type_v_(p, np, NULL, 1);
 }
 
 jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf)
@@ -2172,22 +2495,23 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1, 0);
         JL_GC_POP();
     }
     return tt;
 }
 
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack)
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable)
 {
     size_t i;
     size_t lp = jl_svec_len(p);
     jl_svec_t *np = jl_alloc_svec(lp);
-    JL_GC_PUSH1(&np);
+    jl_value_t *pi = NULL;
+    JL_GC_PUSH2(&np, &pi);
     for (i = 0; i < lp; i++) {
-        jl_value_t *pi = jl_svecref(p, i);
+        pi = jl_svecref(p, i);
         JL_TRY {
-            pi = inst_type_w_(pi, env, stack, 1);
+            pi = inst_type_w_(pi, env, stack, 1, 0);
             if (!jl_is_type(pi) && !jl_is_typevar(pi)) {
                 pi = jl_bottom_type;
             }
@@ -2195,24 +2519,27 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
         JL_CATCH {
             pi = jl_bottom_type;
         }
-        jl_svecset(np, i, pi);
+        jl_value_t *globalpi = jl_as_global_root(pi, cacheable);
+        jl_svecset(np, i, globalpi ? globalpi : pi);
     }
     JL_GC_POP();
     return np;
 }
 
-static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     jl_datatype_t *tt = (jl_datatype_t*)t;
     jl_svec_t *tp = tt->parameters;
     size_t ntp = jl_svec_len(tp);
-    // Instantiate NTuple{3,Int}
+    // Instantiate Tuple{Vararg{T,N}} where T is fixed and N is known, such as Dims{3}
+    // And avoiding allocating the intermediate steps
     // Note this does not instantiate Tuple{Vararg{Int,3}}; that's done in inst_datatype_inner
+    // Note this does not instantiate NTuple{N,T}, since it is unnecessary and inefficient to expand that now
     if (jl_is_va_tuple(tt) && ntp == 1) {
-        // If this is a Tuple{Vararg{T,N}} with known N, expand it to
+        // If this is a Tuple{Vararg{T,N}} with known N and T, expand it to
         // a fixed-length tuple
         jl_value_t *T=NULL, *N=NULL;
-        jl_value_t *va = jl_unwrap_unionall(jl_tparam0(tt));
+        jl_value_t *va = jl_tparam0(tt);
         jl_value_t *ttT = jl_unwrap_vararg(va);
         jl_value_t *ttN = jl_unwrap_vararg_num(va);
         jl_typeenv_t *e = env;
@@ -2223,11 +2550,14 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
                 N = e->val;
             e = e->prev;
         }
-        if (T != NULL && N != NULL && jl_is_long(N)) {
+        if (T != NULL && N != NULL && jl_is_long(N)) { // TODO: && !jl_has_free_typevars(T) to match inst_datatype_inner, or even && jl_is_concrete_type(T)
+            // Since this is skipping jl_wrap_vararg, we inline the checks from it here
             ssize_t nt = jl_unbox_long(N);
-            if (nt < 0)
-                jl_errorf("size or dimension is negative: %zd", nt);
-            return jl_tupletype_fill(nt, T);
+            if (nt >= 0)
+                return jl_tupletype_fill(nt, T, check, nothrow);
+            if (nothrow)
+                return NULL;
+            jl_errorf("Vararg length is negative: %zd", nt);
         }
     }
     jl_value_t **iparams;
@@ -2239,23 +2569,36 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams[0] = (jl_value_t*)ip_heap;
         iparams = jl_svec_data(ip_heap);
     }
-    int bound = 0;
-    int i;
+    int i, bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow);
+        if (pi == NULL) {
+            assert(nothrow);
+            if (nothrow == 1 || (i == ntp-1 && jl_is_vararg(elt))) {
+                t = NULL;
+                break;
+            }
+            else {
+                pi = jl_bottom_type;
+            }
+        }
         iparams[i] = pi;
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
     }
-    if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
 
-static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check)
+// `nothrow` means that when type checking fails, the type instantiation should
+// return `NULL` instead of immediately throwing an error. If `nothrow` == 2 then
+// we further assume that the imprecise instantiation for non invariant parameters
+// is acceptable, and inner error (`NULL`) would be ignored.
+static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check, int nothrow)
 {
     size_t i;
     if (jl_is_typevar(t)) {
@@ -2275,42 +2618,73 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *var = NULL;
         jl_value_t *newbody = NULL;
         JL_GC_PUSH3(&lb, &var, &newbody);
-        lb = inst_type_w_(ua->var->lb, env, stack, check);
-        var = inst_type_w_(ua->var->ub, env, stack, check);
-        if (lb != ua->var->lb || var != ua->var->ub) {
-            var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
-        }
-        else {
-            var = (jl_value_t*)ua->var;
-        }
-        jl_typeenv_t newenv = { ua->var, var, env };
-        newbody = inst_type_w_(ua->body, &newenv, stack, check);
-        if (newbody == (jl_value_t*)jl_emptytuple_type) {
-            // NTuple{0} => Tuple{} can make a typevar disappear
-            t = (jl_value_t*)jl_emptytuple_type;
+        // set nothrow <= 1 to ensure lb's accuracy.
+        lb = inst_type_w_(ua->var->lb, env, stack, check, nothrow ? 1 : 0);
+        if (lb == NULL) {
+            assert(nothrow);
+            t = NULL;
+        }
+        if (t != NULL) {
+            var = inst_type_w_(ua->var->ub, env, stack, check, nothrow);
+            if (var == NULL) {
+                if (lb == jl_bottom_type)
+                    var = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            else if (lb != ua->var->lb || var != ua->var->ub) {
+                var = (jl_value_t*)jl_new_typevar(ua->var->name, lb, var);
+            }
+            else {
+                var = (jl_value_t*)ua->var;
+            }
         }
-        else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
-            // if t's parameters are not bound in the environment, return it uncopied (#9378)
-            t = jl_new_struct(jl_unionall_type, var, newbody);
+        if (t != NULL) {
+            jl_typeenv_t newenv = { ua->var, var, env };
+            newbody = inst_type_w_(ua->body, &newenv, stack, check, nothrow);
+            if (newbody == NULL) {
+                t = NULL;
+            }
+            else if (!jl_has_typevar(newbody, (jl_tvar_t *)var)) {
+                // inner instantiation might make a typevar disappear, e.g.
+                // NTuple{0,T} => Tuple{}
+                t = newbody;
+            }
+            else if (newbody != ua->body || var != (jl_value_t*)ua->var) {
+                // if t's parameters are not bound in the environment, return it uncopied (#9378)
+                t = jl_new_struct(jl_unionall_type, var, newbody);
+            }
         }
         JL_GC_POP();
         return t;
     }
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
-        jl_value_t *a = inst_type_w_(u->a, env, stack, check);
+        jl_value_t *a = inst_type_w_(u->a, env, stack, check, nothrow);
         jl_value_t *b = NULL;
         JL_GC_PUSH2(&a, &b);
-        b = inst_type_w_(u->b, env, stack, check);
+        b = inst_type_w_(u->b, env, stack, check, nothrow);
+        if (nothrow) {
+            // ensure jl_type_union nothrow.
+            if (a && !(jl_is_typevar(a) || jl_is_type(a)))
+                a = NULL;
+            if (b && !(jl_is_typevar(b) || jl_is_type(b)))
+                b = NULL;
+        }
         if (a != u->a || b != u->b) {
-            if (check) {
-                jl_value_t *uargs[2] = {a, b};
-                t = jl_type_union(uargs, 2);
-            }
-            else {
+            if (!check) {
                 // fast path for `jl_rename_unionall`.
                 t = jl_new_struct(jl_uniontype_type, a, b);
             }
+            else if (a == NULL || b == NULL) {
+                assert(nothrow);
+                t = nothrow == 1 ? NULL : a == NULL ? b : a;
+            }
+            else {
+                assert(a != NULL && b != NULL);
+                jl_value_t *uargs[2] = {a, b};
+                t = jl_type_union(uargs, 2);
+            }
         }
         JL_GC_POP();
         return t;
@@ -2321,13 +2695,22 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         jl_value_t *N = NULL;
         JL_GC_PUSH2(&T, &N);
         if (v->T) {
-            T = inst_type_w_(v->T, env, stack, check);
-            if (v->N)
-                N = inst_type_w_(v->N, env, stack, check);
-        }
-        if (T != v->T || N != v->N) {
-            t = (jl_value_t*)jl_wrap_vararg(T, N, check);
+            T = inst_type_w_(v->T, env, stack, check, nothrow);
+            if (T == NULL) {
+                if (nothrow == 2)
+                    T = jl_bottom_type;
+                else
+                    t = NULL;
+            }
+            if (t && v->N) {
+                // set nothrow <= 1 to ensure invariant parameter's accuracy.
+                N = inst_type_w_(v->N, env, stack, check, nothrow ? 1 : 0);
+                if (N == NULL)
+                    t = NULL;
+            }
         }
+        if (t && (T != v->T || N != v->N))
+            t = (jl_value_t*)jl_wrap_vararg(T, N, check, nothrow);
         JL_GC_POP();
         return t;
     }
@@ -2339,20 +2722,26 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         return t;
     jl_typename_t *tn = tt->name;
     if (tn == jl_tuple_typename)
-        return inst_tuple_w_(t, env, stack, check);
+        return inst_tuple_w_(t, env, stack, check, nothrow);
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
     int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
+        // set nothrow <= 1 to ensure invariant parameter's accuracy.
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check, nothrow ? 1 : 0);
+        if (pi == NULL) {
+            assert(nothrow);
+            t = NULL;
+            break;
+        }
         iparams[i] = pi;
         bound |= (pi != elt);
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
-    if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check);
+    if (t != NULL && bound)
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check, nothrow);
     JL_GC_POP();
     return t;
 }
@@ -2363,7 +2752,7 @@ static jl_value_t *instantiate_with(jl_value_t *t, jl_value_t **env, size_t n, j
         jl_typeenv_t en = { (jl_tvar_t*)env[0], env[1], te };
         return instantiate_with(t, &env[2], n-1, &en );
     }
-    return inst_type_w_(t, te, NULL, 1);
+    return inst_type_w_(t, te, NULL, 1, 0);
 }
 
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n)
@@ -2377,7 +2766,7 @@ static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *en
     if (jl_is_unionall(env->body))
         return _jl_instantiate_type_in_env(ty, (jl_unionall_t*)env->body, vals + 1, &en, stack);
     else
-        return inst_type_w_(ty, &en, stack, 1);
+        return inst_type_w_(ty, &en, stack, 1, 0);
 }
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals)
@@ -2399,8 +2788,10 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t)
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
 }
 
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow)
 {
+    int valid = 1;
+    jl_vararg_t *vm = NULL;
     jl_task_t *ct = jl_current_task;
     JL_GC_PUSH1(&t);
     if (check) {
@@ -2411,36 +2802,49 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
                 // values and not the bounds of variables.
                 /*
                 jl_tvar_t *N = (jl_tvar_t*)n;
-                if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
-                    jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                if (valid && !(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) {
+                    if (!nothrow)
+                        jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                    invalid = 1;
+                }
                 */
             }
-            else if (!jl_is_long(n)) {
-                jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+            else if (valid && !jl_is_long(n)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+                valid = 0;
             }
-            else if (jl_unbox_long(n) < 0) {
-                jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+            else if (valid && jl_unbox_long(n) < 0) {
+                if (!nothrow)
+                    jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+                valid = 0;
             }
         }
         if (t) {
-            if (!jl_valid_type_param(t)) {
-                jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+            if (valid && !jl_valid_type_param(t)) {
+                if (!nothrow)
+                    jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+                valid = 0;
+            }
+            if (valid) {
+                t = normalize_unionalls(t);
+                jl_value_t *tw = extract_wrapper(t);
+                if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
+                    t = tw;
             }
-            t = normalize_unionalls(t);
-            jl_value_t *tw = extract_wrapper(t);
-            if (tw && t != tw && jl_types_equal(t, tw))
-                t = tw;
         }
     }
-    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
-    jl_set_typetagof(vm, jl_vararg_tag, 0);
-    vm->T = t;
-    vm->N = n;
+    if (valid) {
+        vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+        jl_set_typetagof(vm, jl_vararg_tag, 0);
+        vm->T = t;
+        vm->N = n;
+    }
     JL_GC_POP();
     return vm;
 }
 
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack)
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable)
 {
     assert(st->name != jl_namedtuple_typename && st->name != jl_tuple_typename);
     jl_datatype_t *wt = (jl_datatype_t*)jl_unwrap_unionall(st->name->wrapper);
@@ -2460,7 +2864,7 @@ JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_RO
     jl_typestack_t top;
     top.tt = st;
     top.prev = (jl_typestack_t*)stack;
-    st->types = inst_ftypes(wt->types, &env[n - 1], &top);
+    st->types = inst_ftypes(wt->types, &env[n - 1], &top, cacheable);
     jl_gc_wb(st, st->types);
     return st->types;
 }
@@ -2477,7 +2881,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
     if (partial == NULL)
         return;
     if (n == 0) {
-        assert(jl_array_len(partial) == 0);
+        assert(jl_array_nrows(partial) == 0);
         return;
     }
 
@@ -2488,28 +2892,34 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
         env[i].prev = i == 0 ? NULL : &env[i - 1];
     }
 
-    for (j = 0; j < jl_array_len(partial); j++) {
+    for (j = 0; j < jl_array_nrows(partial); j++) {
         jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+        if (ndt == NULL)
+            continue;
         assert(jl_unwrap_unionall(ndt->name->wrapper) == (jl_value_t*)t);
         for (i = 0; i < n; i++)
             env[i].val = jl_svecref(ndt->parameters, i);
 
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1, 0);
         jl_gc_wb(ndt, ndt->super);
     }
 
     if (t->types != jl_emptysvec) {
-        for (j = 0; j < jl_array_len(partial); j++) {
+        for (j = 0; j < jl_array_nrows(partial); j++) {
             jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+            if (ndt == NULL)
+                continue;
             for (i = 0; i < n; i++)
                 env[i].val = jl_svecref(ndt->parameters, i);
             assert(ndt->types == NULL);
-            ndt->types = inst_ftypes(t->types, &env[n - 1], &top);
+            ndt->types = inst_ftypes(t->types, &env[n - 1], &top, 1);
             jl_gc_wb(ndt, ndt->types);
             if (ndt->isconcretetype) { // cacheable
                 jl_compute_field_offsets(ndt);
             }
+            jl_array_ptr_set(partial, j, NULL);
         }
+        t->name->partial = NULL;
     }
     else {
         assert(jl_field_names(t) == jl_emptysvec);
@@ -2524,19 +2934,13 @@ static jl_tvar_t *tvar(const char *name)
                           (jl_value_t*)jl_any_type);
 }
 
-void export_small_typeof(void)
+void export_jl_small_typeof(void)
 {
-    void *copy;
-#ifdef _OS_WINDOWS_
-    jl_dlsym(jl_libjulia_handle, "small_typeof", &copy, 1);
-#else
-    jl_dlsym(jl_libjulia_internal_handle, "small_typeof", &copy, 1);
-#endif
-    memcpy(copy, &small_typeof, sizeof(small_typeof));
+    memcpy(&jl_small_typeof, &ijl_small_typeof, sizeof(jl_small_typeof));
 }
 
 #define XX(name) \
-    small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type; \
+    ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type; \
     jl_##name##_type->smalltag = jl_##name##_tag;
 void jl_init_types(void) JL_GC_DISABLED
 {
@@ -2603,25 +3007,26 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->n_uninitialized = 15 - 2;
-    jl_typename_type->name->names = jl_perm_symsvec(15, "name", "module",
+    jl_typename_type->name->n_uninitialized = 16 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(16, "name", "module",
                                                     "names", "atomicfields", "constfields",
                                                     "wrapper", "Typeofwrapper", "cache", "linearcache",
                                                     "mt", "partial",
                                                     "hash", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
-                                                    "max_methods");
-    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
+                                                    "max_methods", "constprop_heuristic");
+    const static uint32_t typename_constfields[1] = { 0x00003a27 }; // (1<<0)|(1<<1)|(1<<2)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13) ; TODO: put back (1<<3)|(1<<4) in this list
     const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
     jl_typename_type->name->constfields = typename_constfields;
     jl_typename_type->name->atomicfields = typename_atomicfields;
     jl_precompute_memoized_dt(jl_typename_type, 1);
-    jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
+    jl_typename_type->types = jl_svec(16, jl_symbol_type, jl_any_type /*jl_module_type*/,
                                       jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
                                       jl_methtable_type, jl_any_type,
                                       jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
                                       jl_any_type /*jl_uint8_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
 
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
@@ -2718,8 +3123,9 @@ void jl_init_types(void) JL_GC_DISABLED
     XX(vararg);
     // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
     jl_vararg_type->name->mayinlinealloc = 0;
+    jl_vararg_type->ismutationfree = 1;
 
-    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0));
+    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0, 0));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
                                        jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
@@ -2730,7 +3136,8 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_anytuple_type->layout = NULL;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
-    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec);
+    jl_typeofbottom_type->super->layout = jl_typeofbottom_type->layout; // the only abstract type with a layout
+    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec, 0);
     jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
     jl_emptytuple_type->instance = jl_emptytuple;
 
@@ -2834,8 +3241,8 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 4);
-    const static uint32_t typemap_entry_constfields[1] = { 0x000003fe }; // (1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)
-    const static uint32_t typemap_entry_atomicfields[1] = { 0x00000001 }; // (1<<0)
+    const static uint32_t typemap_entry_constfields[1] = { 0x000003ce }; // (1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<7)|(1<<8)|(1<<9)
+    const static uint32_t typemap_entry_atomicfields[1] = { 0x00000031 }; // (1<<0)|(1<<4)|(1<<5)
     jl_typemap_entry_type->name->constfields = typemap_entry_constfields;
     jl_typemap_entry_type->name->atomicfields = typemap_entry_atomicfields;
 
@@ -2844,7 +3251,68 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_function_type->name->mt = NULL; // subtypes of Function have independent method tables
     jl_builtin_type->name->mt = NULL;  // so they don't share the Any type table
 
-    jl_svec_t *tv = jl_svec2(tvar("T"), tvar("N"));
+    jl_svec_t *tv;
+
+    jl_module_type =
+        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
+                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(module);
+    assert(jl_module_type->instance == NULL);
+    jl_compute_field_offsets(jl_module_type);
+
+    jl_binding_partition_type =
+        jl_new_datatype(jl_symbol("BindingPartition"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "restriction", "min_world", "max_world", "next", "reserved"),
+                        jl_svec(5, jl_uint64_type /* Special GC-supported union of Any and flags*/,
+                        jl_ulong_type, jl_ulong_type, jl_any_type/*jl_binding_partition_type*/, jl_ulong_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_partition_atomicfields[] = { 0b01101 }; // Set fields 1, 3, 4 as atomic
+    jl_binding_partition_type->name->atomicfields = binding_partition_atomicfields;
+
+    jl_binding_type =
+        jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(4, "globalref", "value", "partitions", "flags"),
+                        jl_svec(4, jl_any_type/*jl_globalref_type*/, jl_any_type, jl_binding_partition_type, jl_uint8_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_atomicfields[] = { 0x0005 }; // Set fields 1, 3 as atomic
+    jl_binding_type->name->atomicfields = binding_atomicfields;
+    const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
+    jl_binding_type->name->constfields = binding_constfields;
+
+    jl_globalref_type =
+        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "mod", "name", "binding"),
+                        jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
+                        jl_emptysvec, 0, 0, 3);
+
+    core = jl_new_module(jl_symbol("Core"), NULL);
+    core->parent = core;
+    jl_type_typename->mt->module = core;
+    jl_core_module = core;
+    core = NULL; // not ready yet to use
+
+    tv = jl_svec1(tvar("Backend"));
+    jl_addrspace_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("AddrSpace"), core, jl_any_type, tv, 8)->name;
+    jl_addrspace_type = (jl_unionall_t*)jl_addrspace_typename->wrapper;
+    jl_addrspacecore_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_addrspace_type, (jl_value_t*)jl_core_module);
+    jl_value_t *cpumem = jl_permbox8(jl_addrspacecore_type, 0, 0);
+
+    tv = jl_svec1(tvar("T"));
+    jl_ref_type = (jl_unionall_t*)
+        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
+
+    tv = jl_svec1(tvar("T"));
+    jl_pointer_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
+                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1),
+                             tv,
+                             sizeof(void*) * 8)->name;
+    jl_pointer_type = (jl_unionall_t*)jl_pointer_typename->wrapper;
+    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
+    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
+
+    tv = jl_svec2(tvar("T"), tvar("N"));
     jl_abstractarray_type = (jl_unionall_t*)
         jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractArray"), core,
                             jl_any_type, tv)->name->wrapper;
@@ -2855,22 +3323,67 @@ void jl_init_types(void) JL_GC_DISABLED
                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_abstractarray_type, jl_svec_data(tv), 2),
                             tv)->name->wrapper;
 
+    tv = jl_svec(3, tvar("isatomic"), tvar("T"), tvar("addrspace"));
+    jl_datatype_t *jl_memory_supertype = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_densearray_type, jl_svecref(tv, 1), jl_box_long(1));
+    jl_datatype_t *memory_datatype =
+        jl_new_datatype(jl_symbol("GenericMemory"), core, jl_memory_supertype, tv,
+                        jl_perm_symsvec(2, "length", "ptr"),
+                        jl_svec(2, jl_long_type, pointer_void),
+                        jl_emptysvec, 0, 1, 2);
+    jl_genericmemory_typename = memory_datatype->name;
+    jl_genericmemory_type = (jl_unionall_t*)jl_genericmemory_typename->wrapper;
+    const static uint32_t memory_constfields[1] = { 0x00000003 }; // (1<<1)|(1<<0)
+    memory_datatype->name->constfields = memory_constfields;
+    memory_datatype->ismutationfree = 0;
+
+    jl_datatype_t *jl_memoryref_supertype = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_ref_type, jl_svecref(tv, 1));
+    jl_datatype_t *memoryref_datatype =
+        jl_new_datatype(jl_symbol("GenericMemoryRef"), core, jl_memoryref_supertype, tv,
+                        jl_perm_symsvec(2, "ptr_or_offset", "mem"),
+                        jl_svec(2, pointer_void, memory_datatype),
+                        jl_emptysvec, 0, 0, 2);
+    jl_genericmemoryref_typename = memoryref_datatype->name;
+    jl_genericmemoryref_type = (jl_unionall_t*)jl_genericmemoryref_typename->wrapper;
+    memoryref_datatype->ismutationfree = 0;
+
+    jl_memory_any_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memory_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+    jl_memory_uint16_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint16_type, cpumem);
+    jl_memory_uint32_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint32_type, cpumem);
+    jl_memory_uint64_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint64_type, cpumem);
+    jl_memoryref_any_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memoryref_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+
     tv = jl_svec2(tvar("T"), tvar("N"));
-    jl_array_type = (jl_unionall_t*)
-        jl_new_datatype(jl_symbol("Array"), core,
+    jl_array_typename = jl_new_datatype(jl_symbol("Array"), core,
                         (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
-                        tv, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
-    jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
-    jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));
+                        tv,
+                        jl_perm_symsvec(2, "ref", "size"),
+                        jl_svec(2,
+                            jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, jl_svecref(tv, 0), cpumem),
+                            jl_apply_type1((jl_value_t*)jl_tuple_type, (jl_value_t*)jl_wrap_vararg((jl_value_t*)jl_long_type, jl_svecref(tv, 1), 0, 0))),
+                            jl_emptysvec, 0, 1, 2)->name;
+    jl_array_type = (jl_unionall_t*)jl_array_typename->wrapper;
 
     jl_array_any_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_any_type, jl_box_long(1));
     jl_array_symbol_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_symbol_type, jl_box_long(1));
     jl_array_uint8_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint8_type, jl_box_long(1));
+    jl_array_uint32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint32_type, jl_box_long(1));
     jl_array_int32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_int32_type, jl_box_long(1));
     jl_array_uint64_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint64_type, jl_box_long(1));
     jl_an_empty_vec_any = (jl_value_t*)jl_alloc_vec_any(0); // used internally
-    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_an_empty_memory_any = (jl_value_t*)jl_alloc_memory_any(0); // used internally
+    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+
+    // finish initializing module Core
+    core = jl_core_module;
+    jl_atomic_store_relaxed(&core->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    // export own name, so "using Foo" makes "Foo" itself visible
+    jl_set_const(core, core->name, (jl_value_t*)core);
+    jl_module_public(core, core->name, 1);
+    jl_set_const(core, jl_symbol("CPU"), (jl_value_t*)cpumem);
+    core = NULL;
 
     jl_expr_type =
         jl_new_datatype(jl_symbol("Expr"), core,
@@ -2879,13 +3392,6 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_symbol_type, jl_array_any_type),
                         jl_emptysvec, 0, 1, 2);
 
-    jl_module_type =
-        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
-                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
-    XX(module);
-    jl_module_type->instance = NULL;
-    jl_compute_field_offsets(jl_module_type);
-
     jl_value_t *symornothing[2] = { (jl_value_t*)jl_symbol_type, (jl_value_t*)jl_void_type };
     jl_linenumbernode_type =
         jl_new_datatype(jl_symbol("LineNumberNode"), core, jl_any_type, jl_emptysvec,
@@ -2894,10 +3400,10 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_emptysvec, 0, 0, 2);
 
     jl_lineinfonode_type =
-        jl_new_datatype(jl_symbol("LineInfoNode"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "module", "method", "file", "line", "inlined_at"),
-                        jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_int32_type, jl_int32_type),
-                        jl_emptysvec, 0, 0, 5);
+        jl_new_datatype(jl_symbol("LegacyLineInfoNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "file", "line", "inlined_at"),
+                        jl_svec(3, jl_symbol_type, jl_int32_type, jl_int32_type),
+                        jl_emptysvec, 0, 0, 3);
 
     jl_gotonode_type =
         jl_new_datatype(jl_symbol("GotoNode"), core, jl_any_type, jl_emptysvec,
@@ -2911,6 +3417,12 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_any_type, jl_long_type),
                         jl_emptysvec, 0, 0, 2);
 
+    jl_enternode_type =
+        jl_new_datatype(jl_symbol("EnterNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(2, "catch_dest", "scope"),
+                        jl_svec(2, jl_long_type, jl_any_type),
+                        jl_emptysvec, 0, 0, 1);
+
     jl_returnnode_type =
         jl_new_datatype(jl_symbol("ReturnNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
@@ -2953,16 +3465,30 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(1, jl_slotnumber_type),
                         jl_emptysvec, 0, 0, 1);
 
+    jl_debuginfo_type =
+        jl_new_datatype(jl_symbol("DebugInfo"), core,
+                        jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(4,
+                            "def",
+                            "linetable",
+                            "edges",
+                            "codelocs"),
+                        jl_svec(4,
+                            jl_any_type, // union(jl_method_instance_type, jl_method_type, jl_symbol_type),
+                            jl_any_type, // union(jl_nothing, jl_debuginfo_type)
+                            jl_simplevector_type, // memory{debuginfo}
+                            jl_string_type),
+                        jl_emptysvec, 0, 0, 4);
+    jl_debuginfo_type->name->mayinlinealloc = 0;
+
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(22,
                             "code",
-                            "codelocs",
+                            "debuginfo",
                             "ssavaluetypes",
                             "ssaflags",
-                            "method_for_inference_limit_heuristics",
-                            "linetable",
                             "slotnames",
                             "slotflags",
                             "slottypes",
@@ -2971,36 +3497,38 @@ void jl_init_types(void) JL_GC_DISABLED
                             "edges",
                             "min_world",
                             "max_world",
-                            "inferred",
+                            "method_for_inference_limit_heuristics",
+                            "nargs",
                             "propagate_inbounds",
                             "has_fcall",
                             "nospecializeinfer",
+                            "isva",
                             "inlining",
                             "constprop",
                             "purity",
                             "inlining_cost"),
                         jl_svec(22,
                             jl_array_any_type,
-                            jl_array_int32_type,
-                            jl_any_type,
-                            jl_array_uint8_type,
-                            jl_any_type,
+                            jl_debuginfo_type,
                             jl_any_type,
+                            jl_array_uint32_type,
                             jl_array_symbol_type,
                             jl_array_uint8_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            jl_any_type,
+                            jl_any_type, // prefers svec, but tolerates Vector{Any}
                             jl_ulong_type,
                             jl_ulong_type,
+                            jl_any_type,
+                            jl_ulong_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
                             jl_uint8_type,
                             jl_uint8_type,
-                            jl_uint8_type,
+                            jl_uint16_type,
                             jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 22);
@@ -3008,19 +3536,20 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(30,
+                        jl_perm_symsvec(31,
                             "name",
                             "module",
                             "file",
                             "line",
-                            "primary_world",
-                            "deleted_world", // !const
+                            "primary_world", // atomic
+                            "deleted_world", // atomic
                             "sig",
                             "specializations", // !const
                             "speckeyset", // !const
                             "slot_syms",
                             "external_mt",
                             "source", // !const
+                            "debuginfo", // !const
                             "unspecialized", // !const
                             "generator", // !const
                             "roots", // !const
@@ -3039,7 +3568,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "constprop",
                             "max_varargs",
                             "purity"),
-                        jl_svec(30,
+                        jl_svec(31,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
@@ -3048,10 +3577,11 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_ulong_type,
                             jl_type_type,
                             jl_any_type, // union(jl_simplevector_type, jl_method_instance_type),
-                            jl_array_type,
+                            jl_genericmemory_type, // union(jl_memory_uint8_type, jl_memory_uint16_type, jl_memory_uint32_type, jl_memory_uint64_type, jl_memory_any_type)
                             jl_string_type,
                             jl_any_type,
                             jl_any_type,
+                            jl_debuginfo_type,
                             jl_any_type, // jl_method_instance_type
                             jl_any_type,
                             jl_array_any_type,
@@ -3069,86 +3599,88 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_uint8_type,
                             jl_uint8_type,
-                            jl_uint8_type),
+                            jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 10);
-    //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25);
+    //const static uint32_t method_constfields[1] = { 0b0 }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<17)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25)|(1<<26)|(1<<27)|(1<<28)|(1<<29)|(1<<30);
     //jl_method_type->name->constfields = method_constfields;
+    const static uint32_t method_atomicfields[1] = { 0x00000030 }; // (1<<4)|(1<<5)
+    jl_method_type->name->atomicfields = method_atomicfields;
 
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(10,
+                        jl_perm_symsvec(7,
                             "def",
                             "specTypes",
                             "sparam_vals",
-                            "uninferred",
                             "backedges",
-                            "callbacks",
                             "cache",
-                            "inInference",
                             "cache_with_orig",
-                            "precompiled"),
-                        jl_svec(10,
+                            "flags"),
+                        jl_svec(7,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
-                            jl_any_type,
                             jl_array_any_type,
-                            jl_any_type,
-                            jl_any_type,
-                            jl_bool_type,
+                            jl_any_type/*jl_code_instance_type*/,
                             jl_bool_type,
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 3);
     // These fields should be constant, but Serialization wants to mutate them in initialization
-    //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
-    const static uint32_t method_instance_atomicfields[1] = { 0x00000248 }; // (1<<3)|(1<<6)|(1<<9);
-    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. TODO: except inInference
+    //const static uint32_t method_instance_constfields[1] = { 0b0000111 }; // fields 1, 2, 3
+    const static uint32_t method_instance_atomicfields[1]  = { 0b1010000 }; // fields 5, 7
+    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe.
     //jl_method_instance_type->name->constfields = method_instance_constfields;
     jl_method_instance_type->name->atomicfields = method_instance_atomicfields;
 
     jl_code_instance_type =
         jl_new_datatype(jl_symbol("CodeInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(15,
+                        jl_perm_symsvec(17,
                             "def",
+                            "owner",
                             "next",
                             "min_world",
                             "max_world",
                             "rettype",
+                            "exctype",
                             "rettype_const",
                             "inferred",
-                            //"edges",
+                            "debuginfo",
+                            "edges",
                             //"absolute_max",
-                            "ipo_purity_bits", "purity_bits",
-                            "argescapes",
-                            "isspecsig", "precompile", "relocatability",
+                            "ipo_purity_bits",
+                            "analysis_results",
+                            "specsigflags", "precompile",
                             "invoke", "specptr"), // function object decls
-                        jl_svec(15,
-                            jl_method_instance_type,
+                        jl_svec(17,
+                            jl_any_type,
+                            jl_any_type,
                             jl_any_type,
                             jl_ulong_type,
                             jl_ulong_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
-                            //jl_any_type,
+                            jl_any_type,
+                            jl_debuginfo_type,
+                            jl_simplevector_type,
                             //jl_bool_type,
-                            jl_uint32_type, jl_uint32_type,
+                            jl_uint32_type,
                             jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_uint8_type,
                             jl_any_type, jl_any_type), // fptrs
                         jl_emptysvec,
                         0, 1, 1);
-    jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
-    const static uint32_t code_instance_constfields[1]  = { 0b000001010110001 }; // Set fields 1, 5-6, 8, 10 as const
-    const static uint32_t code_instance_atomicfields[1] = { 0b110100101000010 }; // Set fields 2, 7, 9, 12, 14-15 as atomic
-    //Fields 3-4 are only operated on by construction and deserialization, so are const at runtime
-    //Fields 11 and 15 must be protected by locks, and thus all operations on jl_code_instance_t are threadsafe
+    jl_svecset(jl_code_instance_type->types, 2, jl_code_instance_type);
+    const static uint32_t code_instance_constfields[1]  = { 0b00001000011100011 }; // Set fields 1, 2, 6-8, 13 as const
+    const static uint32_t code_instance_atomicfields[1] = { 0b11110111100011100 }; // Set fields 3-5, 9-12, 14-17 as atomic
+    // Fields 4-5 are only operated on by construction and deserialization, so are effectively const at runtime
+    // Fields ipo_purity_bits and analysis_results are not currently threadsafe or reliable, as they get mutated after optimization, but are not declared atomic
+    // and there is no way to tell (during inference) if their value is finalized yet (to wait for them to be narrowed if applicable)
     jl_code_instance_type->name->constfields = code_instance_constfields;
     jl_code_instance_type->name->atomicfields = code_instance_atomicfields;
 
@@ -3173,23 +3705,15 @@ void jl_init_types(void) JL_GC_DISABLED
                                        jl_emptysvec, 0, 0, 4);
 
     // all Kinds share the Type method table (not the nonfunction one)
-    jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
-        jl_type_type_mt;
+    jl_unionall_type->name->mt =
+        jl_uniontype_type->name->mt =
+        jl_datatype_type->name->mt =
+        jl_typeofbottom_type->name->mt =
+            jl_type_type_mt;
 
     jl_intrinsic_type = jl_new_primitivetype((jl_value_t*)jl_symbol("IntrinsicFunction"), core,
                                              jl_builtin_type, jl_emptysvec, 32);
 
-    tv = jl_svec1(tvar("T"));
-    jl_ref_type = (jl_unionall_t*)
-        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
-
-    tv = jl_svec1(tvar("T"));
-    jl_pointer_type = (jl_unionall_t*)
-        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
-                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1), tv,
-                             sizeof(void*)*8)->name->wrapper;
-    jl_pointer_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->name;
-
     // LLVMPtr{T, AS} where {T, AS}
     jl_tvar_t *elvar = tvar("T");
     tv = jl_svec2(elvar, tvar("AS"));
@@ -3221,24 +3745,35 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(16,
+                        jl_perm_symsvec(27,
                                         "next",
                                         "queue",
                                         "storage",
                                         "donenotify",
                                         "result",
-                                        "logstate",
+                                        "scope",
                                         "code",
+                                        "_state",
+                                        "sticky",
+                                        "priority",
+                                        "_isexception",
+                                        "pad00",
+                                        "pad01",
+                                        "pad02",
                                         "rngState0",
                                         "rngState1",
                                         "rngState2",
                                         "rngState3",
                                         "rngState4",
-                                        "_state",
-                                        "sticky",
-                                        "_isexception",
-                                        "priority"),
-                        jl_svec(16,
+                                        "metrics_enabled",
+                                        "pad10",
+                                        "pad11",
+                                        "pad12",
+                                        "first_enqueued_at",
+                                        "last_started_running_at",
+                                        "running_time_ns",
+                                        "finished_at"),
+                        jl_svec(27,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -3246,39 +3781,38 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
+                                jl_uint8_type,
+                                jl_bool_type,
+                                jl_uint16_type,
+                                jl_bool_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
-                                jl_uint8_type,
-                                jl_bool_type,
                                 jl_bool_type,
-                                jl_uint16_type),
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint8_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type),
                         jl_emptysvec,
                         0, 1, 6);
     XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    // Set field 20 (metrics_enabled) as const
+    // Set fields 8 (_state) and 24-27 (metric counters) as atomic
+    const static uint32_t task_constfields[1]  = { 0b00000000000010000000000000000000 };
+    const static uint32_t task_atomicfields[1] = { 0b00000111100000000000000010000000 };
+    jl_task_type->name->constfields = task_constfields;
+    jl_task_type->name->atomicfields = task_atomicfields;
 
-    jl_binding_type =
-        jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "value", "globalref", "owner", "ty", "flags"),
-                        jl_svec(5, jl_any_type, jl_any_type/*jl_globalref_type*/, jl_any_type/*jl_binding_type*/, jl_type_type, jl_uint8_type),
-                        jl_emptysvec, 0, 1, 0);
-    const static uint32_t binding_atomicfields[] = { 0x0015 }; // Set fields 1, 3, 4 as atomic
-    jl_binding_type->name->atomicfields = binding_atomicfields;
-    const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
-    jl_binding_type->name->constfields = binding_constfields;
-
-    jl_globalref_type =
-        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(3, "mod", "name", "binding"),
-                        jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
-                        jl_emptysvec, 0, 0, 3);
-
-    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
-    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
         // N.B.: OpaqueClosure call code relies on specptr being field 5.
@@ -3308,6 +3842,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_typename_type->types, 12, jl_int32_type);
     jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 15, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 5, jl_module_type);
     jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
@@ -3315,30 +3850,33 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_methtable_type->types, 8, jl_long_type); // uint32_t plus alignment
     jl_svecset(jl_methtable_type->types, 9, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
-    jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
-    jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
-    jl_svecset(jl_code_instance_type->types, 13, jl_voidpointer_type);
-    jl_svecset(jl_code_instance_type->types, 14, jl_voidpointer_type);
-    jl_svecset(jl_binding_type->types, 1, jl_globalref_type);
-    jl_svecset(jl_binding_type->types, 2, jl_binding_type);
+    jl_svecset(jl_method_type->types, 13, jl_method_instance_type);
+    //jl_svecset(jl_debuginfo_type->types, 0, jl_method_instance_type); // union(jl_method_instance_type, jl_method_type, jl_symbol_type)
+    jl_svecset(jl_method_instance_type->types, 4, jl_code_instance_type);
+    jl_svecset(jl_code_instance_type->types, 15, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 16, jl_voidpointer_type);
+    jl_svecset(jl_binding_type->types, 0, jl_globalref_type);
+    jl_svecset(jl_binding_partition_type->types, 3, jl_binding_partition_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
     jl_compute_field_offsets(jl_uniontype_type);
     jl_compute_field_offsets(jl_tvar_type);
     jl_compute_field_offsets(jl_methtable_type);
-    jl_compute_field_offsets(jl_module_type);
     jl_compute_field_offsets(jl_method_instance_type);
     jl_compute_field_offsets(jl_code_instance_type);
     jl_compute_field_offsets(jl_unionall_type);
     jl_compute_field_offsets(jl_simplevector_type);
     jl_compute_field_offsets(jl_symbol_type);
+    jl_compute_field_offsets(jl_binding_partition_type);
 
     // override ismutationfree for builtin types that are mutable for identity
     jl_string_type->ismutationfree = jl_string_type->isidentityfree = 1;
     jl_symbol_type->ismutationfree = jl_symbol_type->isidentityfree = 1;
     jl_simplevector_type->ismutationfree = jl_simplevector_type->isidentityfree = 1;
     jl_datatype_type->ismutationfree = 1;
+    assert(((jl_datatype_t*)jl_array_any_type)->ismutationfree == 0);
+    assert(((jl_datatype_t*)jl_array_uint8_type)->ismutationfree == 0);
 
     // Technically not ismutationfree, but there's a separate system to deal
     // with mutations for global state.
@@ -3346,17 +3884,7 @@ void jl_init_types(void) JL_GC_DISABLED
     // Module object identity is determined by its name and parent name.
     jl_module_type->isidentityfree = 1;
 
-    // Array's mutable data is hidden, so we need to override it
-    ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_any_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_symbol_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_uint8_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_int32_type)->ismutationfree = 0;
-    ((jl_datatype_t*)jl_array_uint64_type)->ismutationfree = 0;
-
-    // override the preferred layout for a couple types
-    jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
-    export_small_typeof();
+    export_jl_small_typeof();
 }
 
 static jl_value_t *core(const char *name)
@@ -3364,6 +3892,8 @@ static jl_value_t *core(const char *name)
     return jl_get_global(jl_core_module, jl_symbol(name));
 }
 
+jl_debuginfo_t *jl_nulldebuginfo;
+
 // fetch references to things defined in boot.jl
 void post_boot_hooks(void)
 {
@@ -3379,6 +3909,8 @@ void post_boot_hooks(void)
     //XX(float32);
     jl_float64_type = (jl_datatype_t*)core("Float64");
     //XX(float64);
+    jl_bfloat16_type = (jl_datatype_t*)core("BFloat16");
+    //XX(bfloat16);
     jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
     jl_number_type  = (jl_datatype_t*)core("Number");
     jl_signed_type  = (jl_datatype_t*)core("Signed");
@@ -3398,6 +3930,7 @@ void post_boot_hooks(void)
     jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
     jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
     jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_fielderror_type     = (jl_datatype_t*)core("FieldError");
     jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
     jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
     jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
@@ -3408,6 +3941,8 @@ void post_boot_hooks(void)
     jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_missingcodeerror_type = (jl_datatype_t*)core("MissingCodeError");
+    jl_precompilable_error = jl_new_struct_uninit((jl_datatype_t*)core("PrecompilableError"));
     jl_pair_type           = core("Pair");
     jl_kwcall_func         = core("kwcall");
     jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
@@ -3415,6 +3950,8 @@ void post_boot_hooks(void)
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
+    jl_nulldebuginfo = (jl_debuginfo_t*)core("NullDebugInfo");
+    jl_abioverride_type = (jl_datatype_t*)core("ABIOverride");
 
     jl_init_box_caches();
 
@@ -3424,7 +3961,7 @@ void post_boot_hooks(void)
     for (size_t i = 0; i < jl_svec_len(bindings); i++) {
         if (table[i] != jl_nothing) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            jl_value_t *v = jl_get_binding_value(b);
             if (v) {
                 if (jl_is_unionall(v))
                     v = jl_unwrap_unionall(v);
@@ -3437,7 +3974,24 @@ void post_boot_hooks(void)
             }
         }
     }
-    export_small_typeof();
+
+    export_jl_small_typeof();
+}
+
+void post_image_load_hooks(void) {
+    // Ensure that `Base` has been loaded.
+    assert(jl_base_module != NULL);
+
+    jl_module_t *libc_module = (jl_module_t *)jl_get_global(jl_base_module, jl_symbol("Libc"));
+    if (libc_module) {
+        jl_libdl_module = (jl_module_t *)jl_get_global(libc_module, jl_symbol("Libdl"));
+    }
+    if (jl_libdl_module) {
+        jl_libdl_dlopen_func = jl_get_global(
+            jl_libdl_module,
+            jl_symbol("dlopen")
+        );
+    }
 }
 #undef XX
 
diff --git a/src/jsvm-emscripten/asyncify_setup.js b/src/jsvm-emscripten/asyncify_setup.js
deleted file mode 100644
index 6783206602fd0..0000000000000
--- a/src/jsvm-emscripten/asyncify_setup.js
+++ /dev/null
@@ -1,144 +0,0 @@
-Module.preRun.push(function() {
-    if (typeof Asyncify !== "undefined") {
-        Asyncify.instrumentWasmExports = function (exports) { return exports; };
-        Asyncify.handleSleep = function (startAsync) {
-            if (ABORT) return;
-            Module['noExitRuntime'] = true;
-            if (Asyncify.state === Asyncify.State.Normal) {
-                // Prepare to sleep. Call startAsync, and see what happens:
-                // if the code decided to call our callback synchronously,
-                // then no async operation was in fact begun, and we don't
-                // need to do anything.
-                var reachedCallback = false;
-                var reachedAfterCallback = false;
-                var task = get_current_task();
-                startAsync(function(returnValue) {
-                assert(!returnValue || typeof returnValue === 'number'); // old emterpretify API supported other stuff
-                if (ABORT) return;
-                Asyncify.returnValue = returnValue || 0;
-                reachedCallback = true;
-                if (!reachedAfterCallback) {
-                    // We are happening synchronously, so no need for async.
-                    return;
-                }
-                schedule_and_wait(task);
-                });
-                reachedAfterCallback = true;
-                if (!reachedCallback) {
-                    Module['_jl_task_wait']();
-                }
-            } else if (Asyncify.state === Asyncify.State.Rewinding) {
-                // Stop a resume.
-                finish_schedule_task();
-            } else {
-                abort('invalid state: ' + Asyncify.state);
-            }
-            return Asyncify.returnValue;
-        };
-    }
-});
-
-function get_current_task() {
-    return Module['_jl_get_current_task']();
-}
-
-function get_root_task() {
-    return Module['_jl_get_root_task']();
-}
-
-function task_ctx_ptr(task) {
-    return Module["_task_ctx_ptr"](task);
-}
-
-function ctx_save(ctx) {
-    var stackPtr = stackSave();
-
-    // Save the bottom of the C stack in the task context. It simultaneously
-    // serves as the top of the asyncify stack.
-    HEAP32[ctx + 4 >> 2] = stackPtr;
-
-    Asyncify.state = Asyncify.State.Unwinding;
-    Module['_asyncify_start_unwind'](ctx);
-    if (Browser.mainLoop.func) {
-        Browser.mainLoop.pause();
-    }
-}
-
-function do_start_task(old_stack)
-{
-    try {
-        // start_task is always the entry point for any task
-        Module['_start_task']();
-    } catch(e) {
-        stackRestore(old_stack)
-        if (e !== e+0 && e !== 'killed') throw e;
-        maybe_schedule_next();
-        return;
-    }
-    // Either unwind or normal exit. In either case, we're back at the main task
-    if (Asyncify.state === Asyncify.State.Unwinding) {
-        // We just finished unwinding for a sleep.
-        Asyncify.state = Asyncify.State.Normal;
-        Module['_asyncify_stop_unwind']();
-    }
-    stackRestore(old_stack);
-    maybe_schedule_next();
-}
-
-function schedule_and_wait(task) {
-    Module['_jl_schedule_task'](task);
-    Module['_jl_task_wait']();
-}
-
-function finish_schedule_task() {
-    Asyncify.state = Asyncify.State.Normal;
-    Module['_asyncify_stop_rewind']();
-}
-
-next_ctx = 0;
-next_need_start = true;
-function set_next_ctx(ctx, needs_start) {
-    next_ctx = ctx;
-    next_need_start = needs_start;
-}
-
-function root_ctx() {
-    return task_ctx_ptr(get_root_task())
-}
-
-function ctx_switch(lastt_ctx) {
-    if (lastt_ctx == root_ctx()) {
-        // If we're in the root context, switch to
-        // the new ctx now, else we'll get there after
-        // unwinding.
-        return schedule_next()
-    } else if (lastt_ctx == 0) {
-        throw 'killed';
-    } else {
-        return ctx_save(lastt_ctx);
-    }
-}
-
-function schedule_next()
-{
-    old_stack = stackSave();
-    var next_task_stack = HEAP32[next_ctx + 4 >> 2];
-    if (!next_need_start) {
-        Asyncify.state = Asyncify.State.Rewinding;
-        Module['_asyncify_start_rewind'](next_ctx);
-        if (Browser.mainLoop.func) {
-            Browser.mainLoop.resume();
-        }
-    }
-    next_ctx = -1;
-    stackRestore(next_task_stack);
-    do_start_task(old_stack)
-}
-
-function maybe_schedule_next() {
-    assert(next_ctx != -1);
-    if (next_ctx == root_ctx() || next_ctx == 0) {
-        return;
-    }
-    schedule_next()
-}
diff --git a/src/jsvm-emscripten/task.js b/src/jsvm-emscripten/task.js
deleted file mode 100644
index ba695a5a40052..0000000000000
--- a/src/jsvm-emscripten/task.js
+++ /dev/null
@@ -1,15 +0,0 @@
-mergeInto(LibraryManager.library, {
-  jl_set_fiber: function(ctx) {
-    set_next_ctx(ctx, false);
-    return ctx_switch(0)
-  },
-  jl_swap_fiber: function(lastt_ctx, ctx) {
-    set_next_ctx(ctx, false);
-    return ctx_switch(lastt_ctx)
-  },
-  jl_start_fiber: function(lastt_ctx, ctx) {
-    set_next_ctx(ctx, true);
-    return ctx_switch(lastt_ctx)
-  }
-});
-
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 210ba8f0ae07b..891a26bb0ea49 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -716,7 +716,7 @@
 
 ;; ";" at the top level produces a sequence of top level expressions
 (define (parse-stmts s)
-  (let ((ex (parse-Nary s (lambda (s) (parse-docstring s parse-eq))
+  (let ((ex (parse-Nary s (lambda (s) (parse-public s parse-eq))
                         '(#\;) 'toplevel (lambda (x) (eqv? x #\newline)) #f)))
     ;; check for unparsed junk after an expression
     (let ((t (peek-token s)))
@@ -1608,18 +1608,18 @@
        ((module baremodule)
         (let* ((name (parse-unary-prefix s))
                (loc  (line-number-node s))
-               (body (parse-block s (lambda (s) (parse-docstring s parse-eq)))))
+               (body (parse-block s (lambda (s) (parse-public s parse-eq)))))
           (if (reserved-word? name)
               (error (string "invalid module name \"" name "\"")))
           (expect-end s word)
           (list 'module (if (eq? word 'module) '(true) '(false)) name
                 `(block ,loc ,@(cdr body)))))
-       ((export)
+       ((export public)
         (let ((es (map macrocall-to-atsym
                        (parse-comma-separated s parse-unary-prefix))))
           (if (not (every symbol-or-interpolate? es))
-              (error "invalid \"export\" statement"))
-          `(export ,@es)))
+              (error (string "invalid \"" word "\" statement")))
+          `(,word ,@es)))
        ((import using)
         (parse-imports s word))
        ((do)
@@ -2610,15 +2610,23 @@
 
 (define (valid-modref? e)
   (and (length= e 3) (eq? (car e) '|.|) (pair? (caddr e))
-       (eq? (car (caddr e)) 'quote) (symbol? (cadr (caddr e)))
+       (or (eq? (car (caddr e)) 'quote)
+           (eq? (car (caddr e)) 'inert))
+       (symbol? (cadr (caddr e)))
        (or (symbol? (cadr e))
            (valid-modref? (cadr e)))))
 
 (define (macroify-name e . suffixes)
   (cond ((symbol? e) (symbol (apply string #\@ e suffixes)))
+        ((and (pair? e) (eq? (car e) 'quote))
+         `(quote ,(apply macroify-name (cadr e) suffixes)))
+        ((and (pair? e) (eq? (car e) 'inert))
+         `(inert ,(apply macroify-name (cadr e) suffixes)))
+        ((globalref? e)
+         `(globalref ,(cadr e) ,(apply macroify-name (caddr e) suffixes)))
         ((valid-modref? e)
          `(|.| ,(cadr e)
-               (quote ,(apply macroify-name (cadr (caddr e)) suffixes))))
+               ,(apply macroify-name (caddr e) suffixes)))
         (else (error (string "invalid macro usage \"@(" (deparse e) ")\"" )))))
 
 (define (macroify-call s call startloc)
@@ -2664,6 +2672,17 @@
            ;; string interpolation
            (eq? (car e) 'string))))
 
+(define (parse-public s production)
+  (if (eq? (peek-token s) 'public)
+      (let ((spc (ts:space? s)))
+        (take-token s)
+        (if (memv (peek-token s) '(#\( = #\[))
+            (begin ;; TODO: deprecation warning here
+                   (ts:put-back! s 'public spc)
+                   (parse-docstring s production))
+            (parse-resword s 'public)))
+      (parse-docstring s production)))
+
 (define (parse-docstring s production)
   (let ((startloc (line-number-node s)) ; be sure to use the line number from the head of the docstring
         (ex       (production s)))
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index cd11f46b1eb38..7fd2dc7409c0e 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -225,13 +225,23 @@
                  (if lb (list lb ub) (list ub))
                  (if lb (list lb '(core Any)) '())))))
 
+(define (is-method? x)
+  (if (and (pair? x) (eq? (car x) 'method))
+      (let ((name (cadr x)))
+        (if (and (pair? name) (eq? (car name) 'globalref))
+            (let ((name (caddr name)))
+              (if (symbol? name)
+                  #t
+                  #f))
+            (if (symbol? name)
+                #t
+                #f)))
+      #f))
+
 (define (method-expr-name m)
   (let ((name (cadr m)))
-     (let ((name (if (or (length= m 2) (not (pair? name)) (not (quoted? name))) name (cadr name))))
-       (cond ((not (pair? name)) name)
-             ((eq? (car name) 'outerref) (cadr name))
-             ;((eq? (car name) 'globalref) (caddr name))
-             (else name)))))
+      (cond ((globalref? name) (caddr name))
+            (else name))))
 
 ;; extract static parameter names from a (method ...) expression
 (define (method-expr-static-parameters m)
@@ -248,8 +258,7 @@
 
 (define (nodot-sym-ref? e)
   (or (symbol? e)
-      (and (length= e 3) (eq? (car e) 'globalref))
-      (and (length= e 2) (eq? (car e) 'outerref))))
+      (and (length= e 3) (eq? (car e) 'globalref))))
 
 ;; expressions of the form a.b.c... where everything is a symbol
 (define (sym-ref? e)
@@ -296,7 +305,8 @@
                                   (if (eq? n '|#self#|) (gensy) n))
                                 arg-names))))
     (let ((body (insert-after-meta body  ;; don't specialize on generator arguments
-                                   `((meta nospecialize ,@arg-names)))))
+                                   ;; arg-names slots start at 2 (after name)
+                                   `((meta nospecialize ,@(map (lambda (idx) `(slot ,(+ idx 2))) (iota (length arg-names))))))))
       `(block
         (global ,name)
         (function (call ,name ,@arg-names) ,body)))))
@@ -375,7 +385,7 @@
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
                            (let* ((gen    (generated-version body))
                                   (nongen (non-generated-version body))
-                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter))))
+                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter '()))))
                                   (gf     (make-generator-function gname names anames gen)))
                              (set! body (insert-after-meta
                                          nongen
@@ -508,14 +518,14 @@
                   sparams))
          (kw      (gensy))
          (kwdecl  `(|::| ,kw (core NamedTuple)))
-         (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
+         (rkw     (if (null? restkw) '() (symbol (string (car restkw) "..."))))
          (restkw  (map (lambda (v) `(|::| ,v (call (top pairs) (core NamedTuple)))) restkw))
          (mangled (let ((und (and name (undot-name name))))
                     (symbol (string (if (and name (= (string.char (string name) 0) #\#))
                                         ""
                                         "#")
                                     (or und '_) "#"
-                                    (string (current-julia-module-counter)))))))
+                                    (string (current-julia-module-counter '())))))))
       ;; this is a hack: nest these statements inside a call so they get closure
       ;; converted together, allowing all needed types to be defined before any methods.
       `(call (core ifelse) (false) (false) (block
@@ -555,7 +565,7 @@
           name positional-sparams
           `((|::|
              ;; if there are optional positional args, we need to be able to reference the function name
-             ,(if (any kwarg? pargl) (gensy) UNUSED)
+             ,(if (any kwarg? `(,@pargl ,@vararg)) (gensy) UNUSED)
              (call (core kwftype) ,ftype)) ,kwdecl ,@pargl ,@vararg)
           `(block
             ;; propagate method metadata to keyword sorter
@@ -607,16 +617,18 @@
                                ,tempslot)))
                    vars vals)
               `(block
-                (= ,rkw (call (top pairs)
-                              ,(if (null? keynames)
-                                   kw
-                                   `(call (top structdiff) ,kw (curly (core NamedTuple)
-                                                                      (tuple ,@(map quotify keynames)))))))
-                ,@(if (null? restkw)
-                      `((if (call (top isempty) ,rkw)
+                ,(if (null? restkw)
+                      `(if (call (top isempty)
+                                 (call (top diff_names)
+                                       (call (top keys) ,kw)
+                                       (tuple ,@(map quotify keynames))))
                             (null)
-                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg)))
-                      '())
+                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg))
+                      `(= ,rkw (call (top pairs)
+                                     ,(if (null? keynames)
+                                          kw
+                                          `(call (top structdiff) ,kw (curly (core NamedTuple)
+                                                                             (tuple ,@(map quotify keynames))))))))
                 (return (call ,mangled  ;; finally, call the core function
                               ,@keyvars
                               ,@(if (null? restkw) '() (list rkw))
@@ -662,7 +674,10 @@
                       (vals   (list-tail dfl n))
                       (absent (list-tail opt n)) ;; absent arguments
                       (body
-                       (if (any (lambda (defaultv)
+                       (if (any vararg? (butlast vals))
+                         ;; Forbid splat in all but the final default value
+                         (error "invalid \"...\" in non-final positional argument default value")
+                         (if (any (lambda (defaultv)
                                   ;; does any default val expression...
                                   (contains (lambda (e)
                                               ;; contain "e" such that...
@@ -681,7 +696,7 @@
                            ;; otherwise add all
                            `(block
                              ,@prologue
-                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals)))))
+                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals))))))
                  (method-def-expr- name sp passed body)))
              (iota (length opt)))
       ,(method-def-expr- name sparams overall-argl body rett))))
@@ -758,7 +773,7 @@
                           ,@(map make-decl field-names field-types))
                     (block
                      ,@locs
-                     (new (outerref ,name) ,@field-names)))
+                     (new (globalref (thismodule) ,name) ,@field-names)))
           #f))
          (any-ctor (if (or (not all-ctor) (any (lambda (t) (not (equal? t '(core Any))))
                                  field-types))
@@ -810,10 +825,10 @@
     (if (> nnv (length params))
         (error "too many type parameters specified in \"new{...}\"")))
   (let* ((Texpr (if (null? type-params)
-                    `(outerref ,Tname)
+                    `(globalref (thismodule) ,Tname)
                     (if selftype?
                         '|#ctor-self#|
-                        `(curly (outerref ,Tname)
+                        `(curly (globalref (thismodule) ,Tname)
                                 ,@type-params))))
          (tn (if (symbol? Texpr) Texpr (make-ssavalue)))
          (field-convert (lambda (fld fty val)
@@ -948,6 +963,19 @@
         (ctors-min-initialized (car expr))
         (ctors-min-initialized (cdr expr)))))
 
+(define (insert-struct-shim field-types name)
+  (map (lambda (x)
+      (expr-replace (lambda (y)
+                      (and (length= y 3) (eq? (car y) '|.|)
+                            (or (equal? (caddr y) `(quote ,name))
+                                (equal? (caddr y) `(inert ,name)))))
+                    x
+                    (lambda (y)
+                      `(call (core struct_name_shim)
+                              ,(cadr y) ,(caddr y)
+                              (thismodule) ,name))))
+        field-types))
+
 (define (struct-def-expr- name params bounds super fields0 mut)
   (receive
    (fields defs) (separate eventually-decl? fields0)
@@ -981,22 +1009,22 @@
                      (error (string "field name \"" (deparse v) "\" is not a symbol"))))
                field-names)
      `(block
-       (global ,name) (const ,name)
+       (global ,name)
        (scope-block
         (block
          (hardscope)
          (local-def ,name)
          ,@(map (lambda (v) `(local ,v)) params)
          ,@(map (lambda (n v) (make-assignment n (bounds-to-TypeVar v #t))) params bounds)
-         (toplevel-only struct (outerref ,name))
+         (toplevel-only struct (globalref (thismodule) ,name))
          (= ,name (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@params)
                         (call (core svec) ,@(map quotify field-names))
                         (call (core svec) ,@attrs)
                         ,mut ,min-initialized))
          (call (core _setsuper!) ,name ,super)
-         (if (isdefined (outerref ,name))
+         (if (call (core isdefinedglobal) (thismodule) (inert ,name) (false))
              (block
-              (= ,prev (outerref ,name))
+              (= ,prev (globalref (thismodule) ,name))
               (if (call (core _equiv_typedef) ,prev ,name)
                   ;; if this is compatible with an old definition, use the existing type object
                   ;; and its parameters
@@ -1007,11 +1035,10 @@
                                                               prev
                                                               params)
                                                       (quote parameters))))
-                               '()))
-                  ;; otherwise do an assignment to trigger an error
-                  (= (outerref ,name) ,name)))
-             (= (outerref ,name) ,name))
-         (call (core _typebody!) ,name (call (core svec) ,@field-types))
+                               '())))))
+         (call (core _typebody!) ,name (call (core svec) ,@(insert-struct-shim field-types name)))
+         (const (globalref (thismodule) ,name) ,name)
+         (latestworld)
          (null)))
        ;; "inner" constructors
        (scope-block
@@ -1047,7 +1074,7 @@
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1057,17 +1084,18 @@
        (= ,name (call (core _abstracttype) (thismodule) (inert ,name) (call (core svec) ,@params)))
        (call (core _setsuper!) ,name ,super)
        (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (if (&& (call (core isdefinedglobal) (thismodule) (inert ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
+       (latestworld)
        (null))))))
 
 (define (primitive-type-def-expr n name params super)
   (receive
    (params bounds) (sparam-name-bounds params)
    `(block
-     (global ,name) (const ,name)
+     (global ,name)
      (scope-block
       (block
        (local-def ,name)
@@ -1077,10 +1105,11 @@
        (= ,name (call (core _primitivetype) (thismodule) (inert ,name) (call (core svec) ,@params) ,n))
        (call (core _setsuper!) ,name ,super)
        (call (core _typebody!) ,name)
-       (if (&& (isdefined (outerref ,name))
-               (call (core _equiv_typedef) (outerref ,name) ,name))
+       (if (&& (call (core isdefinedglobal) (thismodule) (inert ,name) (false))
+               (call (core _equiv_typedef) (globalref (thismodule) ,name) ,name))
            (null)
-           (= (outerref ,name) ,name))
+           (const (globalref (thismodule) ,name) ,name))
+       (latestworld)
        (null))))))
 
 ;; take apart a type signature, e.g. T{X} <: S{Y}
@@ -1249,7 +1278,7 @@
                    (list a)))
          ;; TODO: always use a specific special name like #anon# or _, then ignore
          ;; this as a local variable name.
-         (name (symbol (string "#" (current-julia-module-counter)))))
+         (name (symbol (string "#" (current-julia-module-counter '())))))
     (expand-forms
      `(block (local ,name)
              (function
@@ -1341,15 +1370,18 @@
                (else (error "invalid let syntax"))))
              (else (error "invalid let syntax")))))))))
 
+(define (valid-macro-def-name? e)
+  (or (symbol? e) (valid-modref? e) (globalref? e)))
+
 (define (expand-macro-def e)
   (cond ((and (pair? (cadr e))
               (eq? (car (cadr e)) 'call)
-              (symbol? (cadr (cadr e))))
+              (valid-macro-def-name? (cadr (cadr e))))
          (let ((anames (remove-empty-parameters (cddr (cadr e)))))
            (if (has-parameters? anames)
                (error "macros cannot accept keyword arguments"))
            (expand-forms
-            `(function (call ,(symbol (string #\@ (cadr (cadr e))))
+            `(function (call ,(macroify-name (cadr (cadr e)))
                              (|::| __source__ (core LineNumberNode))
                              (|::| __module__ (core Module))
                              ,@(map (lambda (v)
@@ -1358,8 +1390,8 @@
                                           v))
                                     anames))
                        ,@(cddr e)))))
-        ((and (length= e 2) (symbol? (cadr e)))
-         (expand-forms `(function ,(symbol (string #\@ (cadr e))))))
+        ((and (length= e 2) (valid-macro-def-name? (cadr e)))
+         (expand-forms `(function ,(macroify-name (cadr e)))))
         (else
          (error "invalid macro definition"))))
 
@@ -1420,7 +1452,7 @@
                 (scope-block ,finalb)))))
           ((length> e 3)
            (and (length> e 6) (error "invalid \"try\" form"))
-           (let ((elseb (if (length= e 6) (cdddddr e) '())))
+           (let ((elseb (if (length= e 6) `((scope-block ,@(cdddddr e))) '())))
              (expand-forms
                `(,(if (null? elseb) 'trycatch 'trycatchelse)
                  (scope-block ,tryb)
@@ -1434,34 +1466,57 @@
           (else
            (error "invalid \"try\" form")))))
 
-(define (expand-unionall-def name type-ex)
+(define (expand-unionall-def name type-ex (allow-local #t))
   (if (and (pair? name)
            (eq? (car name) 'curly))
       (let ((name   (cadr name))
-            (params (cddr name)))
+            (params (cddr name))
+            (rr     (make-ssavalue)))
         (if (null? params)
             (error (string "empty type parameter list in \"" (deparse `(= (curly ,name) ,type-ex)) "\"")))
-        `(block
-          (const-if-global ,name)
-          ,(expand-forms
-            `(= ,name (where ,type-ex ,@params)))))
+          (expand-forms
+            `(block
+              (= ,rr (where ,type-ex ,@params))
+              (,(if allow-local 'assign-const-if-global 'const) ,name ,rr)
+              ,rr)))
       (expand-forms
        `(const (= ,name ,type-ex)))))
 
-;; take apart e.g. `const a::Int = 0` into `const a; a::Int = 0`
-(define (expand-const-decl e)
-  (let ((arg (cadr e)))
-    (if (atom? arg)
-        e
-        (case (car arg)
-          ((global local local-def)
-           (for-each (lambda (b) (if (not (assignment? b))
-                                     (error "expected assignment after \"const\"")))
-                     (cdr arg))
-           (expand-forms (expand-decls (car arg) (cdr arg) #t)))
-          ((= |::|)
-           (expand-forms (expand-decls 'const (cdr e) #f)))
-          (else (error "expected assignment after \"const\""))))))
+(define (filter-not-underscore syms)
+  (filter (lambda (x) (not (underscore-symbol? x))) syms))
+
+;; Expand `[global] const a::T = val`
+(define (expand-const-decl e (mustassgn #f))
+  (if (length= e 3) e
+    (let ((arg (cadr e)))
+      (if (atom? arg)
+          (if mustassgn
+            (error "expected assignment after \"const\"")
+            e)
+          (case (car arg)
+            ((global)
+              (expand-const-decl `(const ,(cadr arg)) #t))
+            ((=)
+              (cond
+                ;; `const f() = ...` - The `const` here is inoperative, but the syntax happened to work in earlier versions, so simply strip `const`.
+                ;; TODO: Consider whether to keep this in 2.0.
+                ((eventually-call? (cadr arg))
+                 (expand-forms arg))
+                ((and (pair? (cadr arg)) (eq? (caadr arg) 'curly))
+                 (expand-unionall-def (cadr arg) (caddr arg)))
+                ((and (pair? (cadr arg)) (eq? (caadr arg) 'tuple) (not (has-parameters? (cdr (cadr arg)))))
+                  ;; We need this case because `(f(), g()) = (1, 2)` goes through here, which cannot go via the `local` lowering below,
+                  ;; because the symbols come out wrong. Sigh... So much effort for such a syntax corner case.
+                  (expand-tuple-destruct (cdr (cadr arg)) (caddr arg) (lambda (assgn) `(,(car e) ,assgn))))
+                (else
+                 (let ((rr (make-ssavalue)))
+                   (expand-forms `(block
+                           (= ,rr ,(caddr arg))
+                           (scope-block (block (hardscope)
+                            (local (= ,(cadr arg) ,rr))
+                            ,.(map (lambda (v) `(,(car e) (globalref (thismodule) ,v) ,v)) (filter-not-underscore (lhs-vars (cadr arg))))
+                            ,rr))))))))
+            (else (error "expected assignment after \"const\"")))))))
 
 (define (expand-atomic-decl e)
   (error "unimplemented or unsupported atomic declaration"))
@@ -1469,7 +1524,7 @@
 (define (expand-local-or-global-decl e)
   (if (and (symbol? (cadr e)) (length= e 2))
       e
-      (expand-forms (expand-decls (car e) (cdr e) #f))))
+      (expand-forms (expand-decls (car e) (cdr e)))))
 
 ;; given a complex assignment LHS, return the symbol that will ultimately be assigned to
 (define (assigned-name e)
@@ -1479,37 +1534,36 @@
          (assigned-name (cadr e)))
         (else e)))
 
-;; local x, y=2, z => local x;local y;local z;y = 2
-(define (expand-decls what binds const?)
+;; local x, (y=2), z => local x;local y;local z;y = 2
+(define (expand-decls what binds)
   (if (not (list? binds))
       (error (string "invalid \"" what "\" declaration")))
   (let loop ((b       binds)
-             (vars    '())
+             (decls   '())
              (assigns '()))
     (if (null? b)
         `(block
-          ,.(if const?
-                (map (lambda (x) `(const ,x)) vars)
-                '())
-          ,.(map (lambda (x) `(,what ,x)) vars)
-          ,.(reverse assigns))
+          ,.(reverse decls)
+          ,.(reverse assigns)
+          ,.(if (null? assigns) `((null)) '()))
         (let ((x (car b)))
           (cond ((or (assignment-like? x) (function-def? x))
-                 (loop (cdr b)
-                       (append (lhs-decls (assigned-name (cadr x))) vars)
+                 (let ((new-vars (lhs-decls (assigned-name (cadr x)))))
+                  (loop (cdr b)
+                       (append (map (lambda (x) `(,what ,x)) new-vars) decls)
                        (cons `(,(car x) ,(all-decl-vars (cadr x)) ,(caddr x))
-                             assigns)))
+                             assigns))))
                 ((and (pair? x) (eq? (car x) '|::|))
                  (loop (cdr b)
-                       (cons (decl-var x) vars)
-                       (cons `(decl ,@(cdr x)) assigns)))
+                       (cons `(decl ,@(cdr x)) (cons `(,what ,(decl-var x)) decls))
+                       assigns))
                 ((symbol? x)
-                 (loop (cdr b) (cons x vars) assigns))
+                 (loop (cdr b) (cons `(,what, x) decls) assigns))
                 (else
                  (error (string "invalid syntax in \"" what "\" declaration"))))))))
 
 ;; convert (lhss...) = (tuple ...) to assignments, eliminating the tuple
-(define (tuple-to-assignments lhss0 x)
+(define (tuple-to-assignments lhss0 x wrap)
   (let loop ((lhss lhss0)
              (assigned lhss0)
              (rhss (cdr x))
@@ -1531,7 +1585,7 @@
                  (loop (cdr lhss)
                        (cons L assigned)
                        (cdr rhss)
-                       (cons (make-assignment L R) stmts)
+                       (cons (wrap (make-assignment L R)) stmts)
                        after
                        (cons R elts)))
                 ((vararg? L)
@@ -1542,7 +1596,7 @@
                        `(block ,@(reverse stmts)
                                (= ,temp (tuple ,@rhss))
                                ,@(reverse after)
-                               (= ,(cadr L) ,temp)
+                               ,(wrap `(= ,(cadr L) ,temp))
                                (unnecessary (tuple ,@(reverse elts) (... ,temp)))))
                      (let ((lhss- (reverse lhss))
                            (rhss- (reverse rhss))
@@ -1574,13 +1628,13 @@
                               (assigns (if (and (length= lhss- 1) (vararg? (car lhss-)))
                                            (begin
                                              (set-car! end
-                                                       (cons `(= ,(cadar lhss-) ,temp) (car end)))
+                                                       (cons (wrap `(= ,(cadar lhss-) ,temp)) (car end)))
                                              assigns)
                                            (append (if (> n 0)
                                                        `(,@assigns (local ,st))
                                                        assigns)
                                                    (destructure- 1 (reverse lhss-) temp
-                                                                 n st end)))))
+                                                                 n st end wrap)))))
                          (loop lhs-tail
                                (append (map (lambda (x) (if (vararg? x) (cadr x) x)) lhss-) assigned)
                                rhs-tail
@@ -1593,7 +1647,7 @@
                    `(block ,@(reverse stmts)
                            ,(make-assignment temp (cadr R))
                            ,@(reverse after)
-                           (= (tuple ,@lhss) ,temp)
+                           ,(wrap `(= (tuple ,@lhss) ,temp))
                            (unnecessary (tuple ,@(reverse elts) (... ,temp))))))
                 (else
                  (let ((temp (if (eventually-call? L) (gensy) (make-ssavalue))))
@@ -1603,11 +1657,11 @@
                          (if (symbol? temp)
                              (list* (make-assignment temp R) `(local-def ,temp) stmts)
                              (cons  (make-assignment temp R) stmts))
-                         (cons (make-assignment L temp) after)
+                         (cons (wrap (make-assignment L temp)) after)
                          (cons temp elts)))))))))
 
 ;; convert (lhss...) = x to tuple indexing
-(define (lower-tuple-assignment lhss x)
+(define (lower-tuple-assignment lhss x (wrap (lambda (x i) x)))
   (let ((t (make-ssavalue)))
     `(block
       (= ,t ,x)
@@ -1622,9 +1676,10 @@
                           `(block
                             (local-def ,temp)
                             (= ,temp (call (core getfield) ,t ,i))
-                            (= ,(car lhs) ,temp)))
-                        `(= ,(car lhs)
-                            (call (core getfield) ,t ,i)))
+                            ,(wrap `(= ,(car lhs) ,temp) i)))
+                        (wrap
+                          `(= ,(car lhs)
+                            (call (core getfield) ,t ,i)) i))
                     (loop (cdr lhs)
                           (+ i 1)))))
       ,t)))
@@ -1647,8 +1702,10 @@
                  (let ((g (make-ssavalue)))
                    (begin (set! a (cons `(= ,g ,x) a))
                           g)))))
-        (cons (cons (car e) (map arg-to-temp (cdr e)))
-              (reverse a)))))
+        (if (eq? (car e) 'let)
+          (cons (arg-to-temp e) (reverse a))
+          (cons (cons (car e) (map arg-to-temp (cdr e)))
+                (reverse a))))))
 
 (define (lower-kw-call f args)
   (let* ((para (if (has-parameters? args) (cdar args) '()))
@@ -1785,7 +1842,7 @@
   (let ((copied-vars  ;; variables not declared `outer` are copied in the innermost loop
          ;; TODO: maybe filter these to remove vars not assigned in the loop
          (delete-duplicates
-          (filter (lambda (x) (not (underscore-symbol? x)))
+          (filter-not-underscore
                   (apply append
                          (map lhs-vars
                               (filter (lambda (x) (not (outer? x))) (butlast lhss))))))))
@@ -1859,8 +1916,7 @@
                         ((and flat (pair? expr) (eq? (car expr) 'flatten))
                          (expand-generator (cadr expr) #t (delete-duplicates (append outervars myvars))))
                         ((pair? outervars)
-                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter (lambda (x) (not (underscore-symbol? x)))
-                                                                             outervars)))
+                         `(let (block ,@(map (lambda (v) `(= ,v ,v)) (filter-not-underscore outervars)))
                             ,expr))
                         (else expr))))
         `(-> ,argname (block ,@splat ,expr)))))))
@@ -2270,10 +2326,14 @@
 ;; `end`:  car collects statements to be executed afterwards.
 ;;         In general, actual assignments should only happen after
 ;;         the whole iterator is desctructured (https://github.com/JuliaLang/julia/issues/40574)
-(define (destructure- i lhss xx n st end)
+;;
+;; The `wrap` argument is a callback that will be called on all assignments to
+;; symbols `lhss`, e.g. to insert a `const` declaration.
+(define (destructure- i lhss xx n st end wrap)
   (if (null? lhss)
       '()
       (let* ((lhs  (car lhss))
+             (wrapfirst (lambda (x i) (if (= i 1) (wrap x) x)))
              (lhs- (cond ((or (symbol? lhs) (ssavalue? lhs))
                           lhs)
                          ((vararg? lhs)
@@ -2290,30 +2350,30 @@
             (error "multiple \"...\" on lhs of assignment"))
         (if (not (eq? lhs lhs-))
             (if (vararg? lhs)
-                (set-car! end (cons (expand-forms `(= ,(cadr lhs) ,(cadr lhs-))) (car end)))
-                (set-car! end (cons (expand-forms `(= ,lhs ,lhs-)) (car end)))))
+                (set-car! end (cons (expand-forms (wrap `(= ,(cadr lhs) ,(cadr lhs-)))) (car end)))
+                (set-car! end (cons (expand-forms (wrap `(= ,lhs ,lhs-))) (car end)))))
         (if (vararg? lhs-)
             (if (= i n)
                 (if (underscore-symbol? (cadr lhs-))
                     '()
                     (list (expand-forms
-                            `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st)))))))
+                            (wrap `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st))))))))
                 (let ((tail (if (eventually-call? lhs) (gensy) (make-ssavalue))))
                   (cons (expand-forms
                           (lower-tuple-assignment
                             (list (cadr lhs-) tail)
-                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st)))))
-                        (destructure- 1 (cdr lhss) tail (- n i) st end))))
+                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                        (destructure- 1 (cdr lhss) tail (- n i) st end wrap))))
             (cons (expand-forms
                     (lower-tuple-assignment
                       (if (= i n)
                           (list lhs-)
                           (list lhs- st))
                       `(call (top indexed_iterate)
-                             ,xx ,i ,@(if (eq? i 1) '() `(,st)))))
-                  (destructure- (+ i 1) (cdr lhss) xx n st end))))))
+                             ,xx ,i ,@(if (eq? i 1) '() `(,st))) wrapfirst))
+                  (destructure- (+ i 1) (cdr lhss) xx n st end wrap))))))
 
-(define (expand-tuple-destruct lhss x)
+(define (expand-tuple-destruct lhss x (wrap identity))
   (define (sides-match? l r)
     ;; l and r either have equal lengths, or r has a trailing ...
     (cond ((null? l)          (null? r))
@@ -2326,7 +2386,7 @@
            (sides-match? lhss (cdr x)))
       ;; (a, b, ...) = (x, y, ...)
       (expand-forms
-       (tuple-to-assignments lhss x))
+       (tuple-to-assignments lhss x wrap))
       ;; (a, b, ...) = other
       (begin
         ;; like memq, but if lhs is (... sym), check against sym instead
@@ -2347,7 +2407,7 @@
           `(block
             ,@(if (> n 0) `((local ,st)) '())
             ,@ini
-            ,@(destructure- 1 lhss xx n st end)
+            ,@(destructure- 1 lhss xx n st end wrap)
             ,@(reverse (car end))
             (unnecessary ,xx))))))
 
@@ -2360,7 +2420,7 @@
       `(= ,lhs ,rhs)))
 
 (define (expand-forms e)
-  (if (or (atom? e) (memq (car e) '(quote inert top core globalref outerref module toplevel ssavalue null true false meta using import export thismodule toplevel-only)))
+  (if (or (atom? e) (memq (car e) '(quote inert top core globalref module toplevel ssavalue null true false meta using import export public thismodule toplevel-only)))
       e
       (let ((ex (get expand-table (car e) #f)))
         (if ex
@@ -2368,6 +2428,18 @@
             (cons (car e)
                   (map expand-forms (cdr e)))))))
 
+(define (find pred e)
+  (let loop ((xs e))
+    (if (null? xs)
+        #f
+        (let ((elt (car xs)))
+          (if (pred elt)
+              elt
+              (loop (cdr xs)))))))
+
+(define (something e)
+  (find (lambda (x) (not (equal? x '(null)))) e))
+
 ;; table mapping expression head to a function expanding that form
 (define expand-table
   (table
@@ -2387,13 +2459,16 @@
 
    'opaque_closure
    (lambda (e)
-     (let* ((ty   (and (length> e 2) (expand-forms (cadr e))))
-            (F    (if (length> e 2) (caddr e) (cadr e)))
+     (let* ((argt  (something (list (expand-forms (cadr e)) #f)))
+            (rt_lb (something (list (expand-forms (caddr e)) #f)))
+            (rt_ub (something (list (expand-forms (cadddr e)) #f)))
+            (allow-partial (caddddr e))
+            (F             (cadddddr e))
             (isva (let* ((arglist (function-arglist F))
                          (lastarg (and (pair? arglist) (last arglist))))
-                    (if (and ty (any (lambda (arg)
+                    (if (and argt (any (lambda (arg)
                                        (let ((arg (if (vararg? arg) (cadr arg) arg)))
-                                         (not (equal? (arg-type arg) '(core Any)))))
+                                         (not (symbol? arg))))
                                      arglist))
                         (error "Opaque closure argument type may not be specified both in the method signature and separately"))
                     (if (or (varargexpr? lastarg) (vararg? lastarg))
@@ -2413,7 +2488,7 @@
        (let* ((argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
                                 (expand-forms `(curly (core Tuple) ,@argtypes))
                                 (reverse tvars))))
-         `(_opaque_closure ,(or ty argtype) ,isva ,(length argtypes) ,functionloc ,lam))))
+         `(_opaque_closure ,(or argt argtype) ,rt_lb ,rt_ub ,isva ,(length argtypes) ,allow-partial ,functionloc ,lam))))
 
    'block
    (lambda (e)
@@ -2491,7 +2566,7 @@
                                lhss)
                         (unnecessary ,rr)))))))
       ((or (and (symbol-like? lhs) (valid-name? lhs))
-           (globalref? lhs) (outerref? lhs))
+           (globalref? lhs))
        (sink-assignment lhs (expand-forms (caddr e))))
       ((atom? lhs)
        (error (string "invalid assignment location \"" (deparse lhs) "\"")))
@@ -2672,6 +2747,9 @@
                  ((and (eq? (identifier-name f) '^) (length= e 4) (integer? (cadddr e)))
                   (expand-forms
                    `(call (top literal_pow) ,f ,(caddr e) (call (call (core apply_type) (top Val) ,(cadddr e))))))
+                 ((eq? f 'include)
+                  (let ((r (make-ssavalue)))
+                    `(block (= ,r ,(map expand-forms e)) (latestworld-if-toplevel) ,r)))
                  (else
                   (map expand-forms e))))
          (map expand-forms e)))
@@ -2954,10 +3032,14 @@
                  (set! vars (cons v vars)))
              (if (not (length= e 2))
                  (find-assigned-vars- (caddr e)))))
+          ((assign-const-if-global)
+            ;; like v = val, except that if `v` turns out global(either
+            ;; implicitly or by explicit `global`), it gains an implicit `const`
+            (set! vars (cons (cadr e) vars)))
           ((=)
            (let ((v (decl-var (cadr e))))
              (find-assigned-vars- (caddr e))
-             (if (or (ssavalue? v) (globalref? v) (outerref? v) (underscore-symbol? v))
+             (if (or (ssavalue? v) (globalref? v) (underscore-symbol? v))
                  '()
                  (set! vars (cons v vars)))))
           (else
@@ -3005,7 +3087,7 @@
     (for-each (lambda (v) (push-var! tab v v)) sp)
     (for-each (lambda (v) (push-var! tab v v)) locals)
     (for-each (lambda (pair) (push-var! tab (car pair) (cdr pair))) renames)
-    (for-each (lambda (v) (push-var! tab v `(outerref ,v))) globals)
+    (for-each (lambda (v) (push-var! tab v `(globalref (thismodule) ,v))) globals)
     (for-each (lambda (v) (push-var! tab v v)) args)
     (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars tab)))
 
@@ -3035,13 +3117,13 @@
       (or (and (memq var (scope:args scope))    'argument)
           (and (memq var (scope:locals scope))  'local)
           (and (memq var (scope:globals scope))
-               (if (and exclude-top-level-globals
+              (if (and exclude-top-level-globals
                         (null? (lam:args (scope:lam scope)))
                         ;; don't inherit global decls from the outermost scope block
                         ;; in a top-level expression.
                         (or (not (scope:prev scope))
                             (not (scope:prev (scope:prev scope)))))
-                   'none 'global))
+                  'none 'global))
           (and (memq var (scope:sp scope))      'static-parameter)
           (var-kind var (scope:prev scope) exclude-top-level-globals))
       'none))
@@ -3069,16 +3151,23 @@
          (let ((val (and scope (get (scope:table scope) e #f))))
            (cond (val (car val))
                  ((underscore-symbol? e) e)
-                 (else `(outerref ,e)))))
+                 (else `(globalref (thismodule) ,e)))))
         ((or (not (pair? e)) (quoted? e) (memq (car e) '(toplevel symbolicgoto symboliclabel toplevel-only)))
          e)
         ((eq? (car e) 'global)
          (check-valid-name (cadr e))
          e)
+        ((eq? (car e) 'assign-const-if-global)
+           (if (eq? (var-kind (cadr e) scope) 'local)
+               (if (length= e 2) (null) `(= ,@(cdr e)))
+               `(const ,@(cdr e))))
         ((memq (car e) '(local local-def))
          (check-valid-name (cadr e))
          ;; remove local decls
          '(null))
+        ((memq (car e) '(using import export public))
+          ;; no scope resolution - identifiers remain raw symbols
+          e)
         ((eq? (car e) 'require-existing-local)
          (if (not (in-scope? (cadr e) scope))
              (error "no outer local variable declaration exists for \"for outer\""))
@@ -3167,7 +3256,6 @@
                                         vars)
                               t)
                             #f)))))
-
            (for-each (lambda (v)
                        (if (or (memq v locals-def) (memq v local-decls))
                            (error (string "variable \"" v "\" declared both local and global")))
@@ -3230,11 +3318,9 @@
                   (warn-var?! (cadr e) scope)
                   (= *scopewarn-opt* 1))
              (let* ((v    (cadr e))
-                    (loc  (extract-line-file loc))
-                    (line (if (= (car loc) 0) (julia-current-line) (car loc)))
-                    (file (if (eq? (cadr loc) 'none) (julia-current-file) (cadr loc))))
+                    (loc  (extract-line-file loc)))
                (lowering-warning
-                1000 'warn (symbol (string file line)) file line
+                1000 'warn (cadr loc) (car loc)
                 (string "Assignment to `" v "` in soft scope is ambiguous "
                         "because a global variable by the same name exists: "
                         "`" v "` will be treated as a new local. "
@@ -3259,7 +3345,7 @@
 (define (free-vars- e tab)
   (cond ((or (eq? e UNUSED) (underscore-symbol? e)) tab)
         ((symbol? e) (put! tab e #t))
-        ((and (pair? e) (eq? (car e) 'outerref)) tab)
+        ((and (pair? e) (eq? (car e) 'globalref)) tab)
         ((and (pair? e) (eq? (car e) 'break-block)) (free-vars- (caddr e) tab))
         ((and (pair? e) (eq? (car e) 'with-static-parameters)) (free-vars- (cadr e) tab))
         ((or (atom? e) (quoted? e)) tab)
@@ -3422,34 +3508,33 @@ f(x) = yt(x)
 (define (type-for-closure-parameterized name P names fields types super)
   (let ((n (length P))
         (s (make-ssavalue)))
-    `((thunk
-       (lambda ()
+    `((thunk ,(linearize `(lambda ()
          (() () 0 ())
-         (block (global ,name) (const ,name)
+         (block (global ,name)
                 ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
                 (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@P)
                             (call (core svec) ,@(map quotify fields))
                             (call (core svec))
                             (false) ,(length fields)))
                 (call (core _setsuper!) ,s ,super)
-                (= (outerref ,name) ,s)
+                (const (globalref (thismodule) ,name) ,s)
                 (call (core _typebody!) ,s (call (core svec) ,@types))
-                (return (null))))))))
+                (return (null)))))))))
 
 (define (type-for-closure name fields super)
   (let ((s (make-ssavalue)))
-    `((thunk (lambda ()
-               (() () 0 ())
-               (block (global ,name) (const ,name)
-                      (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
-                                  (call (core svec) ,@(map quotify fields))
-                                  (call (core svec))
-                                  (false) ,(length fields)))
-                      (call (core _setsuper!) ,s ,super)
-                      (= (outerref ,name) ,s)
-                      (call (core _typebody!) ,s
-                            (call (core svec) ,@(map (lambda (v) '(core Box)) fields)))
-                      (return (null))))))))
+    `((thunk ,(linearize `(lambda ()
+       (() () 0 ())
+       (block (global ,name)
+              (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
+                          (call (core svec) ,@(map quotify fields))
+                          (call (core svec))
+                          (false) ,(length fields)))
+              (call (core _setsuper!) ,s ,super)
+              (const (globalref (thismodule) ,name) ,s)
+              (call (core _typebody!) ,s
+                    (call (core svec) ,@(map (lambda (v) '(core Box)) fields)))
+              (return (null)))))))))
 
 ;; better versions of above, but they get handled wrong in many places
 ;; need to fix that in order to handle #265 fully (and use the definitions)
@@ -3477,14 +3562,14 @@ f(x) = yt(x)
 
 
 (define (vinfo:not-capt vi)
-  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 5))))
+  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 1))))
 
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
-(define (convert-lambda lam fname interp capt-sp opaq)
+(define (convert-lambda lam fname interp capt-sp opaq parsed-method-stack)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
@@ -3559,7 +3644,7 @@ f(x) = yt(x)
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
+(define (convert-assignment var rhs0 fname lam interp opaq parsed-method-stack globals locals)
   (cond
     ((symbol? var)
      (let* ((vi (get locals var #f))
@@ -3577,7 +3662,7 @@ f(x) = yt(x)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq parsed-method-stack (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3591,18 +3676,24 @@ f(x) = yt(x)
                  `(block (= ,rhs1 ,rhs0)
                          ,ex
                          ,rhs1))))))
-     ((or (outerref? var) (globalref? var))
+     ((globalref? var)
       (convert-global-assignment var rhs0 globals lam))
      ((ssavalue? var)
       `(= ,var ,rhs0))
      (else
        (error (string "invalid assignment location \"" (deparse var) "\"")))))
 
+(define (sig-type-expr namemap name expr)
+  (let ((newname (get namemap name expr)))
+    (if (symbol? newname)
+      `(globalref (thismodule) ,newname)
+      newname)))
+
 (define (rename-sig-types ex namemap)
   (pattern-replace
    (pattern-set
     (pattern-lambda (call (core (-/ Typeof)) name)
-                    (get namemap name __)))
+                    (sig-type-expr namemap name __)))
    ex))
 
 ;; replace leading (function) argument type with `typ`
@@ -3702,9 +3793,9 @@ f(x) = yt(x)
   (Set '(quote top core lineinfo line inert local-def unnecessary copyast
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
-         global globalref outerref const-if-global thismodule
+         global globalref assign-const-if-global thismodule
          const atomic null true false ssavalue isdefined toplevel module lambda
-         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
+         error gc_preserve_begin gc_preserve_end import using export public inline noinline purity)))
 
 (define (local-in? s lam (tab #f))
   (or (and tab (has? tab s))
@@ -3808,8 +3899,6 @@ f(x) = yt(x)
              (let ((prev  (table.clone live))
                    (decl- (table.clone decl)))
                (let ((result (eager-any visit (cdr e))))
-                 (if (eq? (car e) '_while)
-                     (kill))  ;; body might not have run
                  (leave-loop! decl-)
                  (if result
                      #t
@@ -3863,17 +3952,17 @@ f(x) = yt(x)
 (define (toplevel-preserving? e)
   (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals locals))))
+                                                  interp opaq parsed-method-stack globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq parsed-method-stack globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3882,11 +3971,17 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+;; must start with a hash and second character must be numeric
+(define (anon-function-name? str)
+  (and (>= (string-length str) 2)
+       (char=? (string.char str 0) #\#)
+       (char-numeric? (string.char str 1))))
+
+(define (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3905,7 +4000,10 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
+                                 ,(let ((convt (cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+                                    (if (or (symbol-like? convt) (quoted? convt))
+                                        convt
+                                        (renumber-assigned-ssavalues convt)))))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
@@ -3929,7 +4027,7 @@ f(x) = yt(x)
        ((atom? e) e)
        (else
         (case (car e)
-          ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e)
+          ((quote top core globalref thismodule lineinfo line break inert module toplevel null true false meta) e)
           ((toplevel-only)
            ;; hack to avoid generating a (method x) expr for struct types
            (if (eq? (cadr e) 'struct)
@@ -3937,8 +4035,8 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
-             (convert-assignment var rhs fname lam interp opaq globals locals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+             (convert-assignment var rhs fname lam interp opaq parsed-method-stack globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
            (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
@@ -3955,10 +4053,6 @@ f(x) = yt(x)
            (put! globals (binding-to-globalref (cadr e)) #f)
            e)
           ((atomic) e)
-          ((const-if-global)
-           (if (local-in? (cadr e) lam locals)
-               '(null)
-               `(const ,(cadr e))))
           ((isdefined) ;; convert isdefined expr to function for closure converted variables
            (let* ((sym (cadr e))
                   (vi (and (symbol? sym) (get locals sym #f)))
@@ -3976,11 +4070,14 @@ f(x) = yt(x)
                     (if (and (vinfo:asgn vi) (vinfo:capt vi))
                         `(call (core isdefined) ,sym (inert contents))
                         e))
-                   (else e))))
+                   (else (if (globalref? sym)
+                      `(call (core isdefinedglobal) ,(cadr sym) (inert ,(caddr sym)))
+                      e)))))
           ((_opaque_closure)
-           (let* ((isva  (caddr e))
-                  (nargs (cadddr e))
-                  (functionloc (caddddr e))
+           (let* ((isva  (car (cddddr e)))
+                  (nargs (cadr (cddddr e)))
+                  (allow-partial (caddr (cddddr e)))
+                  (functionloc   (cadddr (cddddr e)))
                   (lam2  (last e))
                   (vis   (lam:vinfo lam2))
                   (cvs   (map car (cadr vis))))
@@ -3992,8 +4089,8 @@ f(x) = yt(x)
                                            v)))
                                    cvs)))
                `(new_opaque_closure
-                 ,(cadr e) (call (core apply_type) (core Union)) (core Any)
-                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs)))
+                 ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial
+                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack))
                  ,@var-exprs))))
           ((method)
            (let* ((name  (method-expr-name e))
@@ -4002,12 +4099,12 @@ f(x) = yt(x)
                   (vis   (if short '(() () ()) (lam:vinfo lam2)))
                   (cvs   (map car (cadr vis)))
                   (local? (lambda (s) (and lam (symbol? s) (local-in? s lam locals))))
-                  (local (and (not (outerref? (cadr e))) (local? name)))
+                  (local (and (not (globalref? (cadr e))) (local? name)))
                   (sig      (and (not short) (caddr e)))
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals locals)))
+                                                fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4034,23 +4131,25 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals locals)
+                                          fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq parsed-method-stack (table)
                                                             (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
-                                        ,body)))))
+                                        ,body)))
+                          (latestworld)))
                        (else
-                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f)))
+                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f parsed-method-stack)))
                                (top-stmts (cdr exprs))
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
-                                           ,(julia-bq-macro newlam)))
+                                   (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
+                                           ,(julia-bq-macro newlam))
+                                   (latestworld))
                             ,@top-stmts))))
 
                  ;; local case - lift to a new type at top level
@@ -4058,9 +4157,11 @@ f(x) = yt(x)
                         (type-name  (or (get namemap name #f)
                                         (and name
                                              (symbol (string (if (= (string.char (string name) 0) #\#)
-                                                                 ""
-                                                                 "#")
-                                                             name "#" (current-julia-module-counter))))))
+                                                                  (if (anon-function-name? (string name))
+                                                                    (string "#" (current-julia-module-counter parsed-method-stack))
+                                                                    name)
+                                                                  (string "#" name))
+                                                              "#" (current-julia-module-counter parsed-method-stack))))))
                         (alldefs (expr-find-all
                                   (lambda (ex) (and (length> ex 2) (eq? (car ex) 'method)
                                                     (not (eq? ex e))
@@ -4147,17 +4248,17 @@ f(x) = yt(x)
                                      (contains (lambda (x) (eq? x 'kwftype)) sig))
                                     (renamemap (map cons closure-param-names closure-param-syms))
                                     (arg-defs (replace-vars
-                                               (fix-function-arg-type sig type-name iskw namemap closure-param-syms)
+                                               (fix-function-arg-type sig `(globalref (thismodule) ,type-name) iskw namemap closure-param-syms)
                                                renamemap)))
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
                                                                       (car (lam:args lam2)))
-                                                                  #f closure-param-names #f)))))))
+                                                                  #f closure-param-names #f parsed-method-stack)))))))
                         (mk-closure  ;; expression to make the closure
                          (let* ((var-exprs (map (lambda (v)
                                                   (let ((cv (assq v (cadr (lam:vinfo lam)))))
@@ -4172,11 +4273,11 @@ f(x) = yt(x)
                                     (filter identity (map (lambda (v ve)
                                                             (if (is-var-boxed? v lam)
                                                                 #f
-                                                                `(call (core typeof) ,ve)))
+                                                                `(call (core _typeof_captured_variable) ,ve)))
                                                           capt-vars var-exprs)))))
                            `(new ,(if (null? P)
-                                      type-name
-                                      `(call (core apply_type) ,type-name ,@P))
+                                      `(globalref (thismodule) ,type-name)
+                                      `(call (core apply_type) (globalref (thismodule) ,type-name) ,@P))
                                  ,@var-exprs))))
                    (if (pair? moved-vars)
                        (set-car! (lam:vinfo lam)
@@ -4186,16 +4287,19 @@ f(x) = yt(x)
                    (if (or exists (and short (pair? alldefs)))
                        `(toplevel-butfirst
                          (null)
+                         ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                          ,@sp-inits
-                         ,@mk-method)
+                         ,@mk-method
+                         (latestworld))
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq parsed-method-stack globals locals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
-                           ,@mk-method))))))))
+                           ,@mk-method
+                           (latestworld)))))))))
           ((lambda)  ;; happens inside (thunk ...) and generated function bodies
            (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
                      (list-tail (car (lam:vinfo e)) (length (lam:args e))))
@@ -4205,14 +4309,14 @@ f(x) = yt(x)
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
+                                       interp opaq parsed-method-stack globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
@@ -4227,15 +4331,23 @@ f(x) = yt(x)
                            (put! globals ref #t)
                            `(block
                              (toplevel-only set_binding_type! ,(cadr e))
-                             (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                             (globaldecl ,ref ,(caddr e))
+                             (null)))
                          `(call (core typeassert) ,@(cdr e))))
-                   fname lam namemap defined toplevel interp opaq globals locals))))
+                   fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))))))
+
+;; wrapper for `cl-convert-`
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (parsed-method-stack '()) (globals (table)) (locals (table)))
+  (if (is-method? e)
+      (let ((name (method-expr-name e)))
+        (cl-convert- e fname lam namemap defined toplevel interp opaq (cons name parsed-method-stack) globals locals))
+      (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
 
 (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
@@ -4251,21 +4363,32 @@ f(x) = yt(x)
   e)
 
 (define (valid-ir-argument? e)
-  (or (simple-atom? e) (symbol? e)
+  (or (simple-atom? e)
       (and (pair? e)
-           (memq (car e) '(quote inert top core globalref outerref
-                                 slot static_parameter boundscheck)))))
+           (memq (car e) '(quote inert top core
+                                 slot static_parameter)))))
 
 (define (valid-ir-rvalue? lhs e)
   (or (ssavalue? lhs)
       (valid-ir-argument? e)
       (and (symbol? lhs) (pair? e)
-           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure)))))
+           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure globalref)))))
 
 (define (valid-ir-return? e)
   ;; returning lambda directly is needed for @generated
   (or (valid-ir-argument? e) (and (pair? e) (memq (car e) '(lambda)))))
 
+(define (code-trivially-effect-free? e)
+  ;; determine whether the execution of this code can be observed.
+  ;; If not it may be deleted. In general, the only thing we can detect here
+  ;; is empty blocks that only have metadata in them.
+  (if (pair? e)
+    (case (car e)
+      ((block) (every code-trivially-effect-free? (cdr e)))
+      ((line null) #t)
+      (else #f))
+    #t))
+
 ;; this pass behaves like an interpreter on the given code.
 ;; to perform stateful operations, it calls `emit` to record that something
 ;; needs to be done. in value position, it returns an expression computing
@@ -4289,7 +4412,7 @@ f(x) = yt(x)
                               ;; be emitted at the exit of the block. Code
                               ;; should enter the finally block via `enter-finally-block`.
         (handler-goto-fixups '())  ;; `goto`s that might need `leave` exprs added
-        (handler-level 0)     ;; exception handler nesting depth
+        (handler-token-stack '())  ;; tokens identifying handler stack while active
         (catch-token-stack '())) ;; tokens identifying handler enter for current catch blocks
     (define (emit c)
       (or c (raise "missing value in IR"))
@@ -4320,12 +4443,13 @@ f(x) = yt(x)
         (emit `(= ,(car finally-handler) ,tag))
         (if need-goto
             (let ((label (cadr finally-handler))
-                  (dest-handler-level (cadddr finally-handler))
-                  (dest-tokens        (caddddr finally-handler)))
+                  (dest-handler-tokens (cadddr finally-handler))
+                  (dest-catch-tokens   (caddddr finally-handler)))
               ;; Leave current exception handling scope and jump to finally block
-              (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
+              (let ((pexc (pop-exc-expr catch-token-stack dest-catch-tokens)))
                 (if pexc (emit pexc)))
-              (emit `(leave ,(+ 1 (- handler-level dest-handler-level))))
+              (let ((plist (pop-handler-list handler-token-stack (cdr dest-handler-tokens) '())))
+                (emit `(leave ,@plist)))
               (emit `(goto ,label))))
         tag))
     (define (pop-exc-expr src-tokens dest-tokens)
@@ -4338,7 +4462,20 @@ f(x) = yt(x)
                                      (car s)
                                      (loop (cdr s))))))
             `(pop_exception ,restore-token))))
-    (define (emit-return x)
+    (define (pop-handler-list src-tokens dest-tokens lab)
+      (if (eq? src-tokens dest-tokens)
+          #f
+          (reverse
+            (let loop ((s src-tokens)
+                       (l '()))
+              (if (not (pair? s))
+                  (if (null? lab)
+                    (error "Attempt to jump into catch block")
+                    (error (string "cannot goto label \"" lab "\" inside try/catch block"))))
+              (if (eq? (cdr s) dest-tokens)
+                  (cons (car s) l)
+                  (loop (cdr s) (cons (car s) l)))))))
+    (define (emit-return tail x)
       (define (emit- x)
         (let* ((tmp (if ((if (null? catch-token-stack) valid-ir-return? simple-atom?) x)
                         #f
@@ -4347,35 +4484,39 @@ f(x) = yt(x)
               (begin (emit `(= ,tmp ,x)) tmp)
               x)))
       (define (actually-return x)
-        (let* ((x (if rett
-                      (compile (convert-for-type-decl (emit- x) rett #t lam) '() #t #f)
+        (let* ((x (begin0 (emit- x)
+                          ;; if we are adding an implicit return then mark it as having no location
+                          (if (not (eq? tail 'explicit))
+                              (emit '(line #f)))))
+               (x (if rett
+                      (compile (convert-for-type-decl x rett #t lam) '() #t #f)
                       x))
                (x (emit- x)))
           (let ((pexc (pop-exc-expr catch-token-stack '())))
             (if pexc (emit pexc)))
           (emit `(return ,x))))
       (if x
-          (if (> handler-level 0)
+          (if (null? handler-token-stack)
+              (actually-return x)
               (let ((tmp (cond ((and (simple-atom? x) (or (not (ssavalue? x)) (not finally-handler))) #f)
                                (finally-handler  (new-mutable-var))
                                (else             (make-ssavalue)))))
                 (if tmp (emit `(= ,tmp ,x)))
                 (if finally-handler
                     (enter-finally-block `(return ,(or tmp x)))
-                    (begin (emit `(leave ,handler-level))
+                    (begin (emit `(leave ,@handler-token-stack))
                            (actually-return (or tmp x))))
-                (or tmp x))
-              (actually-return x))))
+                (or tmp x)))))
     (define (emit-break labl)
-      (let ((lvl (caddr labl))
+      (let ((dest-handler-tokens (caddr labl))
             (dest-tokens (cadddr labl)))
-        (if (and finally-handler (> (cadddr finally-handler) lvl))
+        (if (and finally-handler (> (length (cadddr finally-handler)) (length dest-handler-tokens)))
             (enter-finally-block `(break ,labl))
             (begin
               (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
                 (if pexc (emit pexc)))
-              (if (> handler-level lvl)
-                  (emit `(leave ,(- handler-level lvl))))
+              (let ((plist (pop-handler-list handler-token-stack dest-handler-tokens '())))
+                (if plist (emit `(leave ,@plist))))
               (emit `(goto ,(cadr labl)))))))
     (define (new-mutable-var . name)
       (let ((g (if (null? name) (gensy) (named-gensy (car name)))))
@@ -4390,51 +4531,63 @@ f(x) = yt(x)
           ((struct_type)       "\"struct\" expression")
           ((method)            "method definition")
           ((set_binding_type!) (string "type declaration for global \"" (deparse (cadr e)) "\""))
+          ((latestworld)       "World age increment")
           (else                (string "\"" h "\" expression"))))
       (if (not (null? (cadr lam)))
           (error (string (head-to-text (car e)) " not at top level"))))
+    (define (valid-body-ir-argument? aval)
+      (or (valid-ir-argument? aval)
+          (and (symbol? aval) ; Arguments are always defined slots
+               (or (memq aval (lam:args lam))
+                   (let ((vi (get vinfo-table aval #f)))
+                     (and vi (vinfo:never-undef vi)))))))
+    (define (single-assign-var? aval)
+      (and (symbol? aval) ; Arguments are always sa
+           (or (memq aval (lam:args lam))
+               (let ((vi (get vinfo-table aval #f)))
+                 (and vi (vinfo:sa vi))))))
+    ;; TODO: We could also allow const globals here
+    (define (const-read-arg? x)
+      ;; Even if we have side effects, we know that singly-assigned
+      ;; locals cannot be affected them, so we can inline them anyway.
+      (or (simple-atom? x) (single-assign-var? x)
+        (and (pair? x)
+          (memq (car x) '(quote inert top core)))))
     ;; evaluate the arguments of a call, creating temporary locations as needed
     (define (compile-args lst break-labels)
       (if (null? lst) '()
-          (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
-                                                (and (pair? x)
-                                                     (memq (car x) '(quote inert top core globalref outerref boundscheck)))))
-                                lst)))
-            (let loop ((lst  lst)
-                       (vals '()))
-              (if (null? lst)
-                  (reverse! vals)
-                  (let* ((arg (car lst))
-                         (aval (or (compile arg break-labels #t #f)
-                                   ;; TODO: argument exprs that don't yield a value?
-                                   '(null))))
-                    (loop (cdr lst)
-                          (cons (if (and (not simple?)
-                                         (not (simple-atom? arg))
-                                         (not (simple-atom? aval))
-                                         (not (and (pair? arg)
-                                                   (memq (car arg) '(quote inert top core boundscheck))))
-                                         (not (and (symbol? aval) ;; function args are immutable and always assigned
-                                                   (memq aval (lam:args lam))))
-                                         (not (and (or (symbol? arg)
-                                                       (and (pair? arg)
-                                                            (memq (car arg) '(globalref outerref))))
-                                                   (or (null? (cdr lst))
-                                                       (null? vals)))))
-                                    (let ((tmp (make-ssavalue)))
-                                      (emit `(= ,tmp ,aval))
-                                      tmp)
-                                    aval)
-                                vals))))))))
+        ;; First check if all the arguments as simple (and therefore side-effect free).
+        ;; Otherwise, we need to use ssa values for all arguments to ensure proper
+        ;; left-to-right evaluation semantics.
+        (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
+                                              (and (pair? x)
+                                                   (memq (car x) '(quote inert top core globalref)))))
+                              lst)))
+          (let loop ((lst  lst)
+                     (vals '()))
+            (if (null? lst)
+                (reverse! vals)
+                (let* ((arg (car lst))
+                       (aval (or (compile arg break-labels #t #f)
+                                 ;; TODO: argument exprs that don't yield a value?
+                                 '(null))))
+                  (loop (cdr lst)
+                        (cons (if (and
+                                   (or simple? (const-read-arg? aval))
+                                   (valid-body-ir-argument? aval))
+                                  aval
+                                  (let ((tmp (make-ssavalue)))
+                                    (emit `(= ,tmp ,aval))
+                                    tmp))
+                              vals))))))))
     (define (compile-cond ex break-labels)
       (let ((cnd (or (compile ex break-labels #t #f)
                      ;; TODO: condition exprs that don't yield a value?
                      '(null))))
-        (if (not (valid-ir-argument? cnd))
+        (if (valid-body-ir-argument? cnd) cnd
             (let ((tmp (make-ssavalue)))
               (emit `(= ,tmp ,cnd))
-              tmp)
-            cnd)))
+              tmp))))
     (define (emit-cond cnd break-labels endl)
       (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
                       (flatten-ex 'block cnd)
@@ -4461,14 +4614,20 @@ f(x) = yt(x)
                                  (cdr cnd)
                                  (list cnd))))))
           tests))
+    (define (emit-assignment-or-setglobal lhs rhs)
+      (if (globalref? lhs)
+        (begin
+          (emit `(global ,lhs))
+          (emit `(call (top setglobal!) ,(cadr lhs) (inert ,(caddr lhs)) ,rhs)))
+        (emit `(= ,lhs ,rhs))))
     (define (emit-assignment lhs rhs)
       (if rhs
           (if (valid-ir-rvalue? lhs rhs)
-              (emit `(= ,lhs ,rhs))
+              (emit-assignment-or-setglobal lhs rhs)
               (let ((rr (make-ssavalue)))
                 (emit `(= ,rr ,rhs))
-                (emit `(= ,lhs ,rr))))
-          (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
+                (emit-assignment-or-setglobal lhs rr)))
+          (emit-assignment-or-setglobal lhs `(null))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
       #f)
     ;; the interpreter loop. `break-labels` keeps track of the labels to jump to
     ;; for all currently closing break-blocks.
@@ -4478,19 +4637,17 @@ f(x) = yt(x)
     ;; from the current function.
     (define (compile e break-labels value tail)
       (if (or (not (pair? e)) (memq (car e) '(null true false ssavalue quote inert top core copyast the_exception $
-                                                   globalref outerref thismodule cdecl stdcall fastcall thiscall llvmcall)))
+                                                   globalref thismodule cdecl stdcall fastcall thiscall llvmcall)))
           (let ((e1 (if (and arg-map (symbol? e))
                         (get arg-map e e)
                         e)))
             (if (and value (or (underscore-symbol? e)
-                               (and (pair? e) (or (eq? (car e) 'outerref)
-                                                  (eq? (car e) 'globalref))
+                               (and (pair? e) (eq? (car e) 'globalref)
                                     (underscore-symbol? (cadr e)))))
-                (error (string "all-underscore identifier used as rvalue" (format-loc current-loc))))
-            (cond (tail  (emit-return e1))
+                (error (string "all-underscore identifiers are write-only and their values cannot be used in expressions" (format-loc current-loc))))
+            (cond (tail  (emit-return tail e1))
                   (value e1)
                   ((symbol? e1) (emit e1) #f)  ;; keep symbols for undefined-var checking
-                  ((and (pair? e1) (eq? (car e1) 'outerref)) (emit e1) #f)  ;; keep globals for undefined-var checking
                   ((and (pair? e1) (eq? (car e1) 'globalref)) (emit e1) #f) ;; keep globals for undefined-var checking
                   (else #f)))
           (case (car e)
@@ -4514,17 +4671,17 @@ f(x) = yt(x)
                               (cons (cadr e) (cons fptr (cdddr e)))))
                            ;; Leave a literal lambda in place for later global expansion
                            ((eq? (car e) 'new_opaque_closure)
-                            (let* ((oc_method (car (list-tail (cdr e) 3))) ;; opaque_closure_method
+                            (let* ((oc_method (car (list-tail (cdr e) 4))) ;; opaque_closure_method
                                    (lambda (list-ref oc_method 5))
                                    (lambda (linearize lambda)))
                               (append
-                               (compile-args (list-head (cdr e) 3) break-labels)
+                               (compile-args (list-head (cdr e) 4) break-labels)
                                (list (append (butlast oc_method) (list lambda)))
-                               (compile-args (list-tail (cdr e) 4) break-labels))))
+                               (compile-args (list-tail (cdr e) 5) break-labels))))
                            ;; NOTE: 1st argument to cglobal treated same as for ccall
                            ((and (length> e 2)
                                  (or (eq? (cadr e) 'cglobal)
-                                     (equal? (cadr e) '(outerref cglobal))))
+                                     (equal? (cadr e) '(globalref (thismodule) cglobal))))
                             (append (list (cadr e))
                                     (if (atom-or-not-tuple-call? (caddr e))
                                         (compile-args (list (caddr e)) break-labels)
@@ -4533,7 +4690,7 @@ f(x) = yt(x)
                            (else
                             (compile-args (cdr e) break-labels))))
                     (callex (cons (car e) args)))
-               (cond (tail (emit-return callex))
+               (cond (tail (emit-return tail callex))
                      (value callex)
                      (else (emit callex)))))
             ((=)
@@ -4549,8 +4706,8 @@ f(x) = yt(x)
                                        rhs (make-ssavalue))))
                            (if (not (eq? rr rhs))
                                (emit `(= ,rr ,rhs)))
-                           (emit `(= ,lhs ,rr))
-                           (if tail (emit-return rr))
+                           (emit-assignment-or-setglobal lhs rr)
+                           (if tail (emit-return tail rr))
                            rr)
                          (emit-assignment lhs rhs))))))
             ((block)
@@ -4603,7 +4760,7 @@ f(x) = yt(x)
                   (if file-diff (set! filename last-fname))
                   v)))
             ((return)
-             (compile (cadr e) break-labels #t #t)
+             (compile (cadr e) break-labels #t 'explicit)
              #f)
             ((unnecessary)
              ;; `unnecessary` marks expressions generated by lowering that
@@ -4618,7 +4775,8 @@ f(x) = yt(x)
                (let ((v1 (compile (caddr e) break-labels value tail)))
                  (if val (emit-assignment val v1))
                  (if (and (not tail) (or (length> e 3) val))
-                     (emit end-jump))
+                     (begin (emit `(line #f))
+                            (emit end-jump)))
                  (let ((elselabel (make&mark-label)))
                    (for-each (lambda (test)
                                (set-car! (cddr test) elselabel))
@@ -4630,7 +4788,7 @@ f(x) = yt(x)
                    (if (not tail)
                        (set-car! (cdr end-jump) (make&mark-label))
                        (if (length= e 3)
-                           (emit-return v2)))
+                           (emit-return tail v2)))
                    val))))
             ((_while)
              (let* ((endl (make-label))
@@ -4652,7 +4810,7 @@ f(x) = yt(x)
             ((break-block)
              (let ((endl (make-label)))
                (compile (caddr e)
-                        (cons (list (cadr e) endl handler-level catch-token-stack)
+                        (cons (list (cadr e) endl handler-token-stack catch-token-stack)
                               break-labels)
                         #f #f)
                (mark-label endl))
@@ -4666,13 +4824,13 @@ f(x) = yt(x)
              (if (eq? (car e) 'symboliclabel)
                  (if (has? label-nesting (cadr e))
                      (error (string "label \"" (cadr e) "\" defined multiple times"))
-                     (put! label-nesting (cadr e) (list handler-level catch-token-stack))))
+                     (put! label-nesting (cadr e) (list handler-token-stack catch-token-stack))))
              (let ((m (get label-map (cadr e) #f)))
                (if m
                    (emit `(label ,m))
                    (put! label-map (cadr e) (make&mark-label)))
                (if tail
-                   (emit-return '(null))
+                   (emit-return tail '(null))
                    (if value (error "misplaced label")))))
             ((symbolicgoto)
              (let* ((m (get label-map (cadr e) #f))
@@ -4682,28 +4840,34 @@ f(x) = yt(x)
                (emit `(null))  ;; save space for `leave` that might be needed
                (emit `(goto ,m))
                (set! handler-goto-fixups
-                     (cons (list code handler-level catch-token-stack (cadr e)) handler-goto-fixups))
+                     (cons (list code handler-token-stack catch-token-stack (cadr e)) handler-goto-fixups))
                #f))
 
             ;; exception handlers are lowered using
-            ;; (= tok (enter L)) - push handler with catch block at label L, yielding token
+            ;; (= tok (enter L scope))
+            ;;      push handler with catch block at label L and scope `scope`, yielding token
+            ;;      `scope` is only recognized for tryfinally and may be omitted in the lowering
             ;; (leave n) - pop N exception handlers
             ;; (pop_exception tok) - pop exception stack back to state of associated enter
             ((trycatch tryfinally trycatchelse)
              (let ((handler-token (make-ssavalue))
                    (catch (make-label))
+                   (catchcode (if (eq? (car e) 'tryfinally) '(call (top rethrow)) (caddr e)))
                    (els   (and (eq? (car e) 'trycatchelse) (make-label)))
                    (endl  (make-label))
                    (last-finally-handler finally-handler)
-                   (finally           (if (eq? (car e) 'tryfinally) (new-mutable-var) #f))
+                   ;; Special case optimization: If the finally block is trivially empty, don't perform finally
+                   ;; lowering, just lower this as a try/catch block with rethrow and scope hnadling.
+                   (finally           (if (and (eq? (car e) 'tryfinally) (not (code-trivially-effect-free? (caddr e)))) (new-mutable-var) #f))
+                   (scope             (if (eq? (car e) 'tryfinally) (cdddr e) '()))
                    (my-finally-handler #f))
                ;; handler block entry
-               (emit `(= ,handler-token (enter ,catch)))
-               (set! handler-level (+ handler-level 1))
-               (if finally (begin (set! my-finally-handler (list finally endl '() handler-level catch-token-stack))
+               (emit `(= ,handler-token (enter ,catch ,@(compile-args scope break-labels))))
+               (set! handler-token-stack (cons handler-token handler-token-stack))
+               (if finally (begin (set! my-finally-handler (list finally endl '() handler-token-stack catch-token-stack))
                                   (set! finally-handler my-finally-handler)
                                   (emit `(= ,finally -1))))
-               (let* ((v1  (compile (cadr e) break-labels value #f)) ;; emit try block code
+               (let* ((v1 (compile (cadr e) break-labels value #f)) ;; emit try block code
                       (val (if (and value (not tail))
                                (new-mutable-var) #f)))
                  ;; handler block postfix
@@ -4711,26 +4875,30 @@ f(x) = yt(x)
                  (if tail
                      (begin (if els
                                 (begin (if (and (not val) v1) (emit v1))
-                                       (emit '(leave 1)))
-                                (if v1 (emit-return v1)))
+                                       (emit `(leave ,handler-token)))
+                                (if v1 (emit-return tail v1)))
                             (if (not finally) (set! endl #f)))
-                     (begin (emit '(leave 1))
+                     (begin (emit `(leave ,handler-token))
                             (emit `(goto ,(or els endl)))))
-                 (set! handler-level (- handler-level 1))
+                 (set! handler-token-stack (cdr handler-token-stack))
                  ;; emit else block
                  (if els
                      (begin (mark-label els)
                             (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
                               (if val (emit-assignment val v3)))
                             (if endl (emit `(goto ,endl)))))
-                 ;; emit either catch or finally block
+                 ;; emit either catch or finally block. A combined try/catch/finally block was split into
+                 ;; separate trycatch and tryfinally blocks earlier.
                  (mark-label catch)
-                 (emit `(leave 1))
                  (if finally
-                     (begin (enter-finally-block '(call (top rethrow)) #f) ;; enter block via exception
+                     (begin (set! finally-handler last-finally-handler)
+                            (set! catch-token-stack (cons handler-token catch-token-stack))
+                            (compile (caddr e) break-labels #f #f) ;; enter block via exception
+                            (emit '(call (top rethrow)))
+                            (emit-return tail '(null)) ; unreachable
+                            (set! catch-token-stack (cdr catch-token-stack))
                             (mark-label endl) ;; non-exceptional control flow enters here
-                            (set! finally-handler last-finally-handler)
-                            (compile (caddr e) break-labels #f #f)
+                            (compile (renumber-assigned-ssavalues (caddr e)) break-labels #f #f)
                             ;; emit actions to be taken at exit of finally
                             ;; block, depending on the tag variable `finally`
                             (let loop ((actions (caddr my-finally-handler)))
@@ -4744,14 +4912,14 @@ f(x) = yt(x)
                                           (emit `(= ,tmp (call (core ===) ,finally ,(caar actions))))
                                           (emit `(gotoifnot ,tmp ,skip))))
                                     (let ((ac (cdar actions)))
-                                      (cond ((eq? (car ac) 'return) (emit-return (cadr ac)))
+                                      (cond ((eq? (car ac) 'return) (emit-return tail (cadr ac)))
                                             ((eq? (car ac) 'break)  (emit-break (cadr ac)))
                                             (else ;; assumed to be a rethrow
                                              (emit ac))))
                                     (if skip (mark-label skip))
                                     (loop (cdr actions))))))
                      (begin (set! catch-token-stack (cons handler-token catch-token-stack))
-                            (let ((v2 (compile (caddr e) break-labels value tail)))
+                            (let ((v2 (compile catchcode break-labels value tail)))
                               (if val (emit-assignment val v2))
                               (if (not tail) (emit `(pop_exception ,handler-token)))
                                              ;; else done in emit-return from compile
@@ -4769,6 +4937,12 @@ f(x) = yt(x)
             ((global) ; keep global declarations as statements
              (if value (error "misplaced \"global\" declaration"))
              (emit e))
+            ((globaldecl)
+             (if value (error "misplaced \"global\" declaration"))
+             (if (atom? (caddr e)) (emit e)
+              (let ((rr (make-ssavalue)))
+                (emit `(= ,rr ,(caddr e)))
+                (emit `(globaldecl ,(cadr e) ,rr)))))
             ((local-def) #f)
             ((local) #f)
             ((moved-local)
@@ -4783,8 +4957,8 @@ f(x) = yt(x)
                          (set! global-const-error current-loc))
                      (emit e))))
             ((atomic) (error "misplaced atomic declaration"))
-            ((isdefined) (if tail (emit-return e) e))
-            ((boundscheck) (if tail (emit-return e) e))
+            ((isdefined throw_undef_if_not) (if tail (emit-return tail e) e))
+            ((boundscheck) (if tail (emit-return tail e) e))
 
             ((method)
              (if (not (null? (cadr lam)))
@@ -4805,20 +4979,25 @@ f(x) = yt(x)
                                    l))))
                    (emit `(method ,(or (cadr e) '(false)) ,sig ,lam))
                    (if value (compile '(null) break-labels value tail)))
-                 (cond (tail  (emit-return e))
+                 (cond (tail  (emit-return tail e))
                        (value e)
                        (else  (emit e)))))
             ((lambda)
              (let ((temp (linearize e)))
-               (cond (tail  (emit-return temp))
+               (cond (tail  (emit-return tail temp))
                      (value temp)
                      (else  (emit temp)))))
 
             ;; top level expressions
-            ((thunk module)
+            ((thunk)
              (check-top-level e)
              (emit e)
-             (if tail (emit-return '(null)))
+             (if tail (emit-return tail '(null)))
+             '(null))
+            ((module)
+             (check-top-level e)
+             (emit e)
+             (if tail (emit-return tail '(null)))
              '(null))
             ((toplevel-only)
              (check-top-level (cdr e))
@@ -4828,16 +5007,23 @@ f(x) = yt(x)
              (check-top-level e)
              (let ((val (make-ssavalue)))
                (emit `(= ,val ,e))
-               (if tail (emit-return val))
+               (if tail (emit-return tail val))
                val))
 
+            ((latestworld-if-toplevel)
+             (if (null? (cadr lam))
+               (emit `(latestworld)))
+             '(null))
+
             ;; other top level expressions
-            ((import using export)
+            ((import using export public latestworld)
              (check-top-level e)
-             (emit e)
+             (if (not (eq? (car e) 'latestworld))
+              (emit e))
+             (emit `(latestworld))
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (if (and tail (not have-ret?))
-                   (emit-return '(null))))
+                   (emit-return tail '(null))))
              '(null))
 
             ((gc_preserve_begin)
@@ -4845,7 +5031,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
+            ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline purity)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4861,13 +5047,17 @@ f(x) = yt(x)
                      (else
                       (emit e)))
                (if (and tail (not have-ret?))
-                   (emit-return '(null)))
+                   (emit-return tail '(null)))
                '(null)))
 
             ;; unsupported assignment operators
             ((≔ ⩴ ≕ :=)
              (error (string "unsupported assignment operator \"" (deparse (car e)) "\"")))
 
+            ;; bare :escape
+            ((escape)
+             (error (string "\"esc(...)\" used outside of macro expansion")))
+
             ((error)
              (error (cadr e)))
             (else
@@ -4883,21 +5073,18 @@ f(x) = yt(x)
     (compile e '() #t #t)
     (for-each (lambda (x)
                 (let ((point (car x))
-                      (hl    (cadr x))
-                      (src-tokens (caddr x))
+                      (src-handler-tokens (cadr x))
+                      (src-catch-tokens (caddr x))
                       (lab   (cadddr x)))
                   (let ((target-nesting (get label-nesting lab #f)))
                     (if (not target-nesting)
                         (error (string "label \"" lab "\" referenced but not defined")))
-                    (let ((target-level (car target-nesting)))
-                      (cond ((> target-level hl)
-                            (error (string "cannot goto label \"" lab "\" inside try/catch block")))
-                            ((= target-level hl)
-                             (set-cdr! point (cddr point))) ;; remove empty slot
-                            (else
-                             (set-car! (cdr point) `(leave ,(- hl target-level))))))
-                    (let ((pexc (pop-exc-expr src-tokens (cadr target-nesting))))
-                      (if pexc (set-cdr! point (cons pexc (cdr point))))))))
+                    (let ((target-handler-tokens (car target-nesting))
+                          (target-catch-tokens (cadr target-nesting)))
+                      (let ((plist (pop-handler-list src-handler-tokens target-handler-tokens lab)))
+                        (if plist (set-car! (cdr point) `(leave ,@plist))))
+                      (let ((pexc (pop-exc-expr src-catch-tokens target-catch-tokens)))
+                        (if pexc (set-cdr! point (cons pexc (cdr point)))))))))
               handler-goto-fixups)
     (if global-const-error
         (error (string "`global const` declaration not allowed inside function" (format-loc global-const-error))))
@@ -4966,19 +5153,21 @@ f(x) = yt(x)
              (list ,@(cadr vi)) ,(caddr vi) (list ,@(cadddr vi)))
        ,@(cdddr lam))))
 
-(define (make-lineinfo name file line (inlined-at #f))
-  `(lineinfo (thismodule) ,(if inlined-at '|macro expansion| name) ,file ,line ,(or inlined-at 0)))
+(define (make-lineinfo file line (inlined-at #f))
+  `(lineinfo ,file ,line ,(or inlined-at 0)))
 
 (define (set-lineno! lineinfo num)
-  (set-car! (cddddr lineinfo) num))
+  (set-car! (cddr lineinfo) num))
 
-(define (compact-ir body name file line)
+(define (compact-ir body file line)
   (let ((code         '(block))
         (locs         '(list))
         (linetable    '(list))
+        (linetablelen 0)
         (labltable    (table))
         (ssavtable    (table))
         (current-loc  0)
+        (nowhere      #f)
         (current-file file)
         (current-line line)
         (locstack     '())
@@ -4987,35 +5176,45 @@ f(x) = yt(x)
       (or e (raise "missing value in IR"))
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
-          (begin (set! linetable (cons (make-lineinfo name file line) linetable))
+          (begin (set! linetable (cons (make-lineinfo file line) linetable))
+                 (set! linetablelen (+ linetablelen 1))
                  (set! current-loc 1)))
       (set! code (cons e code))
       (set! i (+ i 1))
-      (set! locs (cons current-loc locs)))
+      (set! locs (cons (if nowhere 0 current-loc) locs))
+      (set! nowhere #f))
     (let loop ((stmts (cdr body)))
       (if (pair? stmts)
           (let ((e (car stmts)))
             (cond ((atom? e) (emit e))
                   ((eq? (car e) 'line)
-                   (if (and (= current-line 0) (length= e 2) (pair? linetable))
-                       ;; (line n) after push_loc just updates the line for the new file
-                       (begin (set-lineno! (car linetable) (cadr e))
-                              (set! current-line (cadr e)))
-                       (begin
-                         (set! current-line (cadr e))
-                         (if (pair? (cddr e))
-                             (set! current-file (caddr e)))
-                         (set! linetable (cons (if (null? locstack)
-                                                   (make-lineinfo name current-file current-line)
-                                                   (make-lineinfo name current-file current-line (caar locstack)))
-                                               linetable))
-                         (set! current-loc (- (length linetable) 1)))))
+                   (cond ((and (length= e 2) (not (cadr e)))
+                          ;; (line #f) marks that we are entering a generated statement
+                          ;; that should not be counted as belonging to the previous marked location,
+                          ;; for example `return` after a not-executed `if` arm in tail position.
+                          (set! nowhere #t))
+                         ((and (= current-line 0) (length= e 2) (pair? linetable))
+                          ;; (line n) after push_loc just updates the line for the new file
+                          (begin (set-lineno! (car linetable) (cadr e))
+                                 (set! current-line (cadr e))))
+                         (else
+                          (begin
+                            (set! current-line (cadr e))
+                            (if (pair? (cddr e))
+                                (set! current-file (caddr e)))
+                            (set! linetable (cons (if (null? locstack)
+                                                      (make-lineinfo current-file current-line)
+                                                      (make-lineinfo current-file current-line (caar locstack)))
+                                                  linetable))
+                            (set! linetablelen (+ linetablelen 1))
+                            (set! current-loc linetablelen)))))
                   ((and (length> e 2) (eq? (car e) 'meta) (eq? (cadr e) 'push_loc))
                    (set! locstack (cons (list current-loc current-line current-file) locstack))
                    (set! current-file (caddr e))
                    (set! current-line 0)
-                   (set! linetable (cons (make-lineinfo name current-file current-line current-loc) linetable))
-                   (set! current-loc (- (length linetable) 1)))
+                   (set! linetable (cons (make-lineinfo current-file current-line current-loc) linetable))
+                   (set! linetablelen (+ linetablelen 1))
+                   (set! current-loc linetablelen))
                   ((and (length= e 2) (eq? (car e) 'meta) (eq? (cadr e) 'pop_loc))
                    (let ((l (car locstack)))
                      (set! locstack (cdr locstack))
@@ -5039,7 +5238,6 @@ f(x) = yt(x)
 
 (define (renumber-lambda lam file line)
   (let* ((stuff (compact-ir (lam:body lam)
-                            (if (null? (cadr lam)) '|top-level scope| 'none)
                             file line))
          (code (aref stuff 0))
          (locs (aref stuff 1))
@@ -5051,7 +5249,8 @@ f(x) = yt(x)
     (define slot-table (symbol-to-idx-map (map car (car (lam:vinfo lam)))))
     (define sp-table (symbol-to-idx-map (lam:sp lam)))
     (define (renumber-stuff e)
-      (cond ((symbol? e)
+      (cond ((eq? e UNUSED) (error "Attempted to use slot marked unused"))
+            ((symbol? e)
              (let ((idx (get slot-table e #f)))
                (if idx
                    `(slot ,idx)
@@ -5059,19 +5258,19 @@ f(x) = yt(x)
                      (if idx
                          `(static_parameter ,idx)
                          e)))))
-            ((and (pair? e) (eq? (car e) 'outerref))
-             (cadr e))
             ((nospecialize-meta? e)
              ;; convert nospecialize vars to slot numbers
              `(meta ,(cadr e) ,@(map renumber-stuff (cddr e))))
-            ((or (atom? e) (quoted? e) (eq? (car e) 'global))
+            ((or (atom? e) (quoted? e) (memq (car e) '(using import export public global toplevel)))
              e)
             ((ssavalue? e)
              (let ((idx (get ssavalue-table (cadr e) #f)))
                (if (not idx) (begin (prn e) (prn lam) (error "ssavalue with no def")))
                `(ssavalue ,idx)))
-            ((memq (car e) '(goto enter))
-             (list* (car e) (get label-table (cadr e)) (cddr e)))
+            ((eq? (car e) 'goto)
+             `(goto ,(get label-table (cadr e))))
+            ((eq? (car e) 'enter)
+             `(enter ,(get label-table (cadr e)) ,@(map renumber-stuff (cddr e))))
             ((eq? (car e) 'gotoifnot)
              `(gotoifnot ,(renumber-stuff (cadr e)) ,(get label-table (caddr e))))
             ((eq? (car e) 'lambda)
diff --git a/src/julia.expmap.in b/src/julia.expmap.in
index 484c83a4b16b2..b28a714e75f69 100644
--- a/src/julia.expmap.in
+++ b/src/julia.expmap.in
@@ -1,43 +1,42 @@
 @JULIA_SHLIB_SYMBOL_VERSION@ {
   global:
     pthread*;
-    __stack_chk_guard;
-    asprintf;
+    __stack_chk_*;
+    asprintf*;
     bitvector_*;
     ios_*;
-    arraylist_grow;
-    small_arraylist_grow;
-    small_typeof;
+    arraylist_*;
+    small_arraylist_*;
     jl_*;
     ijl_*;
     _jl_mutex_*;
-    rec_backtrace;
+    rec_backtrace*;
     julia_*;
-    libsupport_init;
-    localtime_r;
-    memhash;
-    memhash32;
-    memhash32_seed;
-    memhash_seed;
-    restore_signals;
+    libsupport_init*;
+    localtime_r*;
+    memhash*;
+    memhash32*;
+    memhash32_seed*;
+    memhash_seed*;
+    restore_signals*;
     u8_*;
     uv_*;
-    add_library_mapping;
+    add_library_mapping*;
     utf8proc_*;
-    jlbacktrace;
-    jlbacktracet;
-    _IO_stdin_used;
-    _Z24jl_coverage_data_pointerN4llvm9StringRefEi;
-    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
-    _Z22jl_malloc_data_pointerN4llvm9StringRefEi;
+    jlbacktrace*;
+    jlbacktracet*;
+    _IO_stdin_used*; /* glibc expects this to be exported to detect which version of glibc is being used, see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=634261#109 for further details */
+    _Z24jl_coverage_data_pointerN4llvm9StringRefEi*;
+    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi*;
+    _Z22jl_malloc_data_pointerN4llvm9StringRefEi*;
     _jl_timing_*;
     LLVMExtra*;
     JLJIT*;
-    llvmGetPassPluginInfo;
+    llvmGetPassPluginInfo*;
 
     /* freebsd */
-    environ;
-    __progname;
+    environ*;
+    __progname*;
 
   local:
     *;
diff --git a/src/julia.h b/src/julia.h
index d2eb9a98a4a42..b5416568b7ae9 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -13,6 +13,7 @@
 #undef jl_setjmp
 #undef jl_longjmp
 #undef jl_egal
+#undef jl_genericmemory_owner
 #endif
 
 #include "julia_fasttls.h"
@@ -26,30 +27,23 @@
 
 #include <setjmp.h>
 #ifndef _OS_WINDOWS_
-#  define jl_jmp_buf sigjmp_buf
-#  if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
-#    define MAX_ALIGN 8
-#  elif defined(_CPU_AARCH64_)
-// int128 is 16 bytes aligned on aarch64
-#    define MAX_ALIGN 16
-#  elif defined(_P64)
-// Generically we assume MAX_ALIGN is sizeof(void*)
-#    define MAX_ALIGN 8
-#  else
-#    define MAX_ALIGN 4
-#  endif
+    #define jl_jmp_buf sigjmp_buf
 #else
-#  include "win32_ucontext.h"
-#  define jl_jmp_buf jmp_buf
-#  define MAX_ALIGN 8
+    #include "win32_ucontext.h"
+    #define jl_jmp_buf jmp_buf
 #endif
 
 // Define the largest size (bytes) of a properly aligned object that the
-// processor family and compiler typically supports without a lock
-// (assumed to be at least a pointer size). Since C is bad at handling 16-byte
-// types, we currently use 8 here as the default.
+// processor family (MAX_ATOMIC_SIZE) and compiler (MAX_POINTERATOMIC_SIZE)
+// typically supports without a lock (assumed to be at least a pointer size)
+// with MAX_POINTERATOMIC_SIZE >= MAX_ATOMIC_SIZE.
+#ifdef _P64
+#define MAX_ATOMIC_SIZE 16
+#define MAX_POINTERATOMIC_SIZE 16
+#else
 #define MAX_ATOMIC_SIZE 8
 #define MAX_POINTERATOMIC_SIZE 8
+#endif
 
 #ifdef _P64
 #define NWORDS(sz) (((sz)+7)>>3)
@@ -76,6 +70,7 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
 #ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
+#include "gc-interface.h"
 #include "julia_atomics.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
@@ -120,7 +115,8 @@ JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROO
 jl_value_t *_jl_valueof(jl_taggedvalue_t *tv JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_valueof(v) _jl_valueof((jl_taggedvalue_t*)(v))
 JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-#define jl_typeof(v) _jl_typeof((jl_value_t*)(v))
+#define jl_typeof(v) (_jl_typeof((jl_value_t*)(v)))
+#define jl_typetagof(v) ((uintptr_t)_jl_typeof((jl_value_t*)(v)))
 #else
 #define jl_astaggedvalue(v)                                             \
     ((jl_taggedvalue_t*)((char*)(v) - sizeof(jl_taggedvalue_t)))
@@ -128,6 +124,8 @@ JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFE
     ((jl_value_t*)((char*)(v) + sizeof(jl_taggedvalue_t)))
 #define jl_typeof(v)                                                    \
     jl_to_typeof(jl_typetagof(v))
+#define jl_typetagof(v)                                                 \
+    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #endif
 static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
@@ -135,8 +133,6 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
     jl_taggedvalue_t *tag = jl_astaggedvalue(v);
     jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
-#define jl_typetagof(v)                                                 \
-    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
 #define jl_typetagis(v,t) (jl_typetagof(v)==(uintptr_t)(t))
 #define jl_set_typetagof(v,t,gc) (jl_set_typeof((v), (void*)(((uintptr_t)(t) << 4) | (gc))))
@@ -167,47 +163,36 @@ typedef struct {
     // jl_value_t *data[];
 } jl_svec_t;
 
-typedef struct {
-    /*
-      how - allocation style
-      0 = data is inlined, or a foreign pointer we don't manage
-      1 = julia-allocated buffer that needs to be marked
-      2 = malloc-allocated pointer this array object manages
-      3 = has a pointer to the object that owns the data
-    */
-    uint16_t how:2;
-    uint16_t ndims:9;
-    uint16_t pooled:1;
-    uint16_t ptrarray:1; // representation is pointer array
-    uint16_t hasptr:1; // representation has embedded pointers
-    uint16_t isshared:1; // data is shared by multiple Arrays
-    uint16_t isaligned:1; // data allocated with memalign
-} jl_array_flags_t;
-
 JL_EXTENSION typedef struct {
     JL_DATA_TYPE
-    void *data;
     size_t length;
-    jl_array_flags_t flags;
-    uint16_t elsize;  // element size including alignment (dim 1 memory stride)
-    uint32_t offset;  // for 1-d only. does not need to get big.
-    size_t nrows;
-    union {
-        // 1d
-        size_t maxsize;
-        // Nd
-        size_t ncols;
-    };
-    // other dim sizes go here for ndims > 2
+    void *ptr;
+    // followed by padding and inline data, or owner pointer
+#ifdef _P64
+    // union {
+    //     jl_value_t *owner;
+    //     T inl[];
+    // };
+#else
+    //
+    // jl_value_t *owner;
+    // size_t padding[1];
+    // T inl[];
+#endif
+} jl_genericmemory_t;
+
+JL_EXTENSION typedef struct {
+    JL_DATA_TYPE
+    void *ptr_or_offset;
+    jl_genericmemory_t *mem;
+} jl_genericmemoryref_t;
 
-    // followed by alignment padding and inline data, or owner pointer
+JL_EXTENSION typedef struct {
+    JL_DATA_TYPE
+    jl_genericmemoryref_t ref;
+    size_t dimsize[]; // length for 1-D, otherwise length is mem->length
 } jl_array_t;
 
-// compute # of extra words needed to store dimensions
-STATIC_INLINE int jl_array_ndimwords(uint32_t ndims) JL_NOTSAFEPOINT
-{
-    return (ndims < 3 ? 0 : ndims-2);
-}
 
 typedef struct _jl_datatype_t jl_tupletype_t;
 struct _jl_code_instance_t;
@@ -246,7 +231,10 @@ JL_DLLEXPORT extern const jl_callptr_t jl_fptr_interpret_call_addr;
 
 JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_wait_for_compiled_addr;
+
 typedef struct _jl_line_info_node_t {
+    JL_DATA_TYPE
     struct _jl_module_t *module;
     jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
     jl_sym_t *file;
@@ -254,55 +242,79 @@ typedef struct _jl_line_info_node_t {
     int32_t inlined_at;
 } jl_line_info_node_t;
 
-// the following mirrors `struct EffectsOverride` in `base/compiler/effects.jl`
+struct jl_codeloc_t {
+    int32_t line;
+    int32_t to;
+    int32_t pc;
+};
+
+typedef struct _jl_debuginfo_t {
+    jl_value_t *def;
+    struct _jl_debuginfo_t *linetable; // or nothing
+    jl_svec_t *edges; // Memory{DebugInfo}
+    jl_value_t *codelocs; // String // Memory{UInt8} // compressed info
+} jl_debuginfo_t;
+
+// the following mirrors `struct EffectsOverride` in `base/expr.jl`
 typedef union __jl_purity_overrides_t {
     struct {
-        uint8_t ipo_consistent          : 1;
-        uint8_t ipo_effect_free         : 1;
-        uint8_t ipo_nothrow             : 1;
-        uint8_t ipo_terminates_globally : 1;
+        uint16_t ipo_consistent          : 1;
+        uint16_t ipo_effect_free         : 1;
+        uint16_t ipo_nothrow             : 1;
+        uint16_t ipo_terminates_globally : 1;
         // Weaker form of `terminates` that asserts
         // that any control flow syntactically in the method
         // is guaranteed to terminate, but does not make
         // assertions about any called functions.
-        uint8_t ipo_terminates_locally  : 1;
-        uint8_t ipo_notaskstate         : 1;
-        uint8_t ipo_inaccessiblememonly : 1;
+        uint16_t ipo_terminates_locally  : 1;
+        uint16_t ipo_notaskstate         : 1;
+        uint16_t ipo_inaccessiblememonly : 1;
+        uint16_t ipo_noub                : 1;
+        uint16_t ipo_noub_if_noinbounds  : 1;
+        uint16_t ipo_consistent_overlay  : 1;
+        uint16_t ipo_nortcall            : 1;
     } overrides;
-    uint8_t bits;
+    uint16_t bits;
 } _jl_purity_overrides_t;
 
+#define NUM_EFFECTS_OVERRIDES 11
+#define NUM_IR_FLAGS 3
+
 // This type describes a single function body
 typedef struct _jl_code_info_t {
+    JL_DATA_TYPE
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
-    jl_value_t *codelocs; // Int32 array of indices into the line table
+    jl_debuginfo_t *debuginfo; // Table of edge data for each statement
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
-    jl_array_t *ssaflags; // flags associated with each statement:
-        // 0 = inbounds
-        // 1 = inline
-        // 2 = noinline
-        // 3 = <reserved> strict-ieee (strictfp)
-        // 4 = effect-free (may be deleted if unused)
-        // 5-6 = <unused>
-        // 7 = has out-of-band info
+    jl_array_t *ssaflags; // 32 bits flags associated with each statement:
+        // 1 << 0 = inbounds region
+        // 1 << 1 = callsite inline region
+        // 1 << 2 = callsite noinline region
+        // 1 << 3-14 = purity
+        // 1 << 16+ = reserved for inference
     // miscellaneous data:
-    jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
-    jl_value_t *linetable; // Table of locations [TODO: make this volatile like slotnames]
     jl_array_t *slotnames; // names of local variables
     jl_array_t *slotflags;  // local var bit flags
-    // the following are optional transient properties (not preserved by compression--as they typically get stored elsewhere):
+    // the following is a deprecated property (not preserved by compression)
     jl_value_t *slottypes; // inferred types of slots
-    jl_value_t *rettype;
-    jl_method_instance_t *parent; // context (optionally, if available, otherwise nothing)
-    jl_value_t *edges; // forward edges to method instances that must be invalidated
+    // more inferred data:
+    jl_value_t *rettype; // return type relevant for fptr
+    jl_method_instance_t *parent; // context (after inference, otherwise nothing)
+    // the following are required to cache the method correctly
+    jl_value_t *edges; // forward edge info (svec preferred, but tolerates Array{Any} and nothing token)
     size_t min_world;
     size_t max_world;
+
+    // These may be used by generated functions to further constrain the resulting inputs.
+    jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
+    size_t nargs;
+
     // various boolean properties:
-    uint8_t inferred;
     uint8_t propagate_inbounds;
     uint8_t has_fcall;
     uint8_t nospecializeinfer;
+    uint8_t isva;
     // uint8 settings
     uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline
     uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
@@ -319,19 +331,20 @@ typedef struct _jl_method_t {
     struct _jl_module_t *module;
     jl_sym_t *file;
     int32_t line;
-    size_t primary_world;
-    size_t deleted_world;
+    _Atomic(size_t) primary_world;
+    _Atomic(size_t) deleted_world;
 
     // method's type signature. redundant with TypeMapEntry->specTypes
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
     _Atomic(jl_value_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
-    _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
+    _Atomic(jl_genericmemory_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
     jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
+    jl_debuginfo_t *debuginfo;  // fixed linetable from the source argument, null if not available
     _Atomic(jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
@@ -386,15 +399,18 @@ struct _jl_method_instance_t {
         jl_method_t *method; // method this is specialized from
     } def; // pointer back to the context for this code
     jl_value_t *specTypes;  // argument types this was specialized for
-    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sparam_syms
-    _Atomic(jl_value_t*) uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
-    jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
-    jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
+    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sig
+    jl_array_t *backedges; // list of code-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
     _Atomic(struct _jl_code_instance_t*) cache;
-    uint8_t inInference; // flags to tell if inference is running on this object
     uint8_t cache_with_orig; // !cache_with_specTypes
-    _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
+
+    // flags for this method instance
+    //   bit 0: generated by an explicit `precompile(...)`
+    //   bit 1: dispatched
+    _Atomic(uint8_t) flags;
 };
+#define JL_MI_FLAGS_MASK_PRECOMPILED    0x01
+#define JL_MI_FLAGS_MASK_DISPATCHED     0x02
 
 // OpaqueClosure
 typedef struct _jl_opaque_closure_t {
@@ -402,56 +418,56 @@ typedef struct _jl_opaque_closure_t {
     jl_value_t *captures;
     size_t world;
     jl_method_t *source;
-    jl_fptr_args_t invoke;
-    void *specptr;
+    jl_fptr_args_t invoke; // n.b. despite the similar name, this is not an invoke ABI (jl_call_t / julia.call2), but rather the fptr1 (jl_fptr_args_t / julia.call) ABI
+    void *specptr; // n.b. despite the similarity in field name, this is not arbitrary private data for jlcall, but rather the codegen ABI for specsig, and is mandatory if specsig is valid
 } jl_opaque_closure_t;
 
 // This type represents an executable operation
 typedef struct _jl_code_instance_t {
     JL_DATA_TYPE
-    jl_method_instance_t *def; // method this is specialized from
+    jl_value_t *def; // MethodInstance or ABIOverride
+    jl_value_t *owner; // Compiler token this belongs to, `jl_nothing` is reserved for native
     _Atomic(struct _jl_code_instance_t*) next; // pointer to the next cache entry
 
     // world range for which this object is valid to use
-    size_t min_world;
-    size_t max_world;
+    _Atomic(size_t) min_world;
+    _Atomic(size_t) max_world;
 
     // inference state cache
     jl_value_t *rettype; // return type for fptr
+    jl_value_t *exctype; // thrown type for fptr
     jl_value_t *rettype_const; // inferred constant return value, or null
-    _Atomic(jl_value_t *) inferred; // inferred jl_code_info_t (may be compressed), or jl_nothing, or null
-    //TODO: jl_array_t *edges; // stored information about edges from this object
+
+    // Inferred result. When part of the runtime cache, either
+    // - A jl_code_info_t (may be compressed) containing the inferred IR
+    // - jl_nothing, indicating that inference was completed, but the result was
+    //               deleted to save space.
+    // - null, indicating that inference was not yet completed or did not succeed
+    _Atomic(jl_value_t *) inferred;
+    _Atomic(jl_debuginfo_t *) debuginfo; // stored information about edges from this object (set once, with a happens-before both source and invoke)
+    _Atomic(jl_svec_t *) edges; // forward edge info
     //TODO: uint8_t absolute_max; // whether true max world is unknown
 
     // purity results
     // see also encode_effects() and decode_effects() in `base/compiler/effects.jl`,
-    uint32_t ipo_purity_bits;
-    // ipo_purity_flags:
-    //     uint8_t ipo_consistent          : 2;
-    //     uint8_t ipo_effect_free         : 2;
-    //     uint8_t ipo_nothrow             : 2;
-    //     uint8_t ipo_terminates          : 2;
-    //     uint8_t ipo_nonoverlayed        : 1;
-    //     uint8_t ipo_notaskstate         : 2;
-    //     uint8_t ipo_inaccessiblememonly : 2;
-    _Atomic(uint32_t) purity_bits;
+    _Atomic(uint32_t) ipo_purity_bits;
     // purity_flags:
-    //     uint8_t consistent          : 2;
+    //     uint8_t consistent          : 3;
     //     uint8_t effect_free         : 2;
-    //     uint8_t nothrow             : 2;
-    //     uint8_t terminates          : 2;
-    //     uint8_t nonoverlayed        : 1;
-    //     uint8_t notaskstate         : 2;
+    //     uint8_t nothrow             : 1;
+    //     uint8_t terminates          : 1;
+    //     uint8_t notaskstate         : 1;
     //     uint8_t inaccessiblememonly : 2;
-    jl_value_t *argescapes; // escape information of call arguments
+    //     uint8_t noub                : 2;
+    //     uint8_t nonoverlayed        : 2;
+    jl_value_t *analysis_results; // Analysis results about this code (IPO-safe)
 
     // compilation state cache
     _Atomic(uint8_t) specsigflags; // & 0b001 == specptr is a specialized function signature for specTypes->rettype
                                    // & 0b010 == invokeptr matches specptr
                                    // & 0b100 == From image
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
-    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
-    _Atomic(jl_callptr_t) invoke; // jlcall entry point
+    _Atomic(jl_callptr_t) invoke; // jlcall entry point usually, but if this codeinst belongs to an OC Method, then this is an jl_fptr_args_t fptr1 instead, unless it is not, because it is a special token object instead
     union _jl_generic_specptr_t {
         _Atomic(void*) fptr;
         _Atomic(jl_fptr_args_t) fptr1;
@@ -461,6 +477,13 @@ typedef struct _jl_code_instance_t {
     } specptr; // private data for `jlcall entry point
 } jl_code_instance_t;
 
+// May be used as the ->def field of a CodeInstance to override the ABI
+typedef struct _jl_abi_override_t {
+    JL_DATA_TYPE
+    jl_value_t *abi;
+    jl_method_instance_t *def;
+} jl_abi_override_t;
+
 // all values are callable as Functions
 typedef jl_value_t jl_function_t;
 
@@ -505,6 +528,7 @@ typedef struct {
     uint8_t mayinlinealloc:1;
     uint8_t _reserved:5;
     uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
+    uint8_t constprop_heustic; // override for inference's constprop heuristic
 } jl_typename_t;
 
 typedef struct {
@@ -538,9 +562,17 @@ typedef struct {
     uint32_t npointers; // number of pointers embedded inside
     int32_t first_ptr; // index of the first pointer (or -1)
     uint16_t alignment; // strictest alignment over all fields
-    uint16_t haspadding : 1; // has internal undefined bytes
-    uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
-    uint16_t padding : 13;
+    struct { // combine these fields into a struct so that we can take addressof them
+        uint16_t haspadding : 1; // has internal undefined bytes
+        uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
+        // metadata bit only for GenericMemory eltype layout
+        uint16_t arrayelem_isboxed : 1;
+        uint16_t arrayelem_isunion : 1;
+        // If set, this type's egality can be determined entirely by comparing
+        // the non-padding bits of this datatype.
+        uint16_t isbitsegal : 1;
+        uint16_t padding : 10;
+    } flags;
     // union {
     //     jl_fielddesc8_t field8[nfields];
     //     jl_fielddesc16_t field16[nfields];
@@ -587,18 +619,89 @@ typedef struct _jl_weakref_t {
     jl_value_t *value;
 } jl_weakref_t;
 
+// N.B: Needs to be synced with runtime_internals.jl
+enum jl_partition_kind {
+    // Constant: This binding partition is a constant declared using `const _ = ...`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST        = 0x0,
+    // Import Constant: This binding partition is a constant declared using `import A`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST_IMPORT = 0x1,
+    // Global: This binding partition is a global variable.
+    //  -> restriction holds the type restriction
+    BINDING_KIND_GLOBAL       = 0x2,
+    // Implicit: The binding was implicitly imported from a `using`'d module.
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPLICIT     = 0x3,
+    // Explicit: The binding was explicitly `using`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_EXPLICIT     = 0x4,
+    // Imported: The binding was explicitly `import`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPORTED     = 0x5,
+    // Failed: We attempted to import the binding, but the import was ambiguous
+    //  ->restriction is NULL.
+    BINDING_KIND_FAILED       = 0x6,
+    // Declared: The binding was declared using `global` or similar
+    //  ->restriction is NULL.
+    BINDING_KIND_DECLARED     = 0x7,
+    // Guard: The binding was looked at, but no global or import was resolved at the time
+    //  ->restriction is NULL.
+    BINDING_KIND_GUARD        = 0x8,
+    // Undef Constant: This binding partition is a constant declared using `const`, but
+    // without a value.
+    //  ->restriction is NULL
+    BINDING_KIND_UNDEF_CONST  = 0x9
+};
+
+#ifdef _P64
+// Union of a ptr and a 3 bit field.
+typedef uintptr_t jl_ptr_kind_union_t;
+#else
+typedef struct __attribute__((aligned(8))) { jl_value_t *val; size_t kind; } jl_ptr_kind_union_t;
+#endif
+typedef struct __attribute__((aligned(8))) _jl_binding_partition_t {
+    JL_DATA_TYPE
+    /* union {
+     *   // For ->kind == BINDING_KIND_GLOBAL
+     *   jl_value_t *type_restriction;
+     *   // For ->kind == BINDING_KIND_CONST(_IMPORT)
+     *   jl_value_t *constval;
+     *   // For ->kind in (BINDING_KIND_IMPLICIT, BINDING_KIND_EXPLICIT, BINDING_KIND_IMPORT)
+     *   jl_binding_t *imported;
+     * } restriction;
+     *
+     * Currently: Low 3 bits hold ->kind on _P64 to avoid needing >8 byte atomics
+     *
+     * This field is updated atomically with both kind and restriction. The following
+     * transitions are allowed and modeled by the system:
+     *
+     *  GUARD -> any
+     *  (DECLARED, FAILED) -> any non-GUARD
+     *  IMPLICIT -> {EXPLICIT, IMPORTED} (->restriction unchanged only)
+     *
+     * In addition, we permit (with warning about undefined behavior) changing the restriction
+     * pointer for CONST(_IMPORT).
+     *
+     * All other kind or restriction transitions are disallowed.
+     */
+    _Atomic(jl_ptr_kind_union_t) restriction;
+    size_t min_world;
+    _Atomic(size_t) max_world;
+    _Atomic(struct _jl_binding_partition_t *) next;
+    size_t reserved; // Reserved for ->kind. Currently this holds the low bits of ->restriction during serialization
+} jl_binding_partition_t;
+
 typedef struct _jl_binding_t {
     JL_DATA_TYPE
-    _Atomic(jl_value_t*) value;
     jl_globalref_t *globalref;  // cached GlobalRef for this binding
-    _Atomic(struct _jl_binding_t*) owner;  // for individual imported bindings (NULL until 'resolved')
-    _Atomic(jl_value_t*) ty;  // binding type
-    uint8_t constp:1;
-    uint8_t exportp:1;
-    uint8_t imported:1;
-    uint8_t usingfailed:1;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_binding_partition_t*) partitions;
+    uint8_t declared:1;
+    uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp`
+    uint8_t publicp:1; // exportp without publicp is not allowed.
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
-    uint8_t padding:2;
+    uint8_t padding:3;
 } jl_binding_t;
 
 typedef struct {
@@ -611,12 +714,13 @@ typedef struct _jl_module_t {
     jl_sym_t *name;
     struct _jl_module_t *parent;
     _Atomic(jl_svec_t*) bindings;
-    _Atomic(jl_array_t*) bindingkeyset; // index lookup by name into bindings
+    _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
+    jl_sym_t *file;
+    int32_t line;
     // hidden fields:
-    arraylist_t usings;  // modules with all bindings potentially imported
+    arraylist_t usings; /* arraylist of struct jl_module_using */  // modules with all bindings potentially imported
     jl_uuid_t build_id;
     jl_uuid_t uuid;
-    size_t primary_world;
     _Atomic(uint32_t) counter;
     int32_t nospecialize;  // global bit flags: initialization for new methods
     int8_t optlevel;
@@ -628,11 +732,18 @@ typedef struct _jl_module_t {
     intptr_t hash;
 } jl_module_t;
 
-typedef struct _jl_globalref_t {
+struct _jl_module_using {
+    jl_module_t *mod;
+    size_t min_world;
+    size_t max_world;
+};
+
+struct _jl_globalref_t {
+    JL_DATA_TYPE
     jl_module_t *mod;
     jl_sym_t *name;
     jl_binding_t *binding;
-} jl_globalref_t;
+};
 
 // one Type-to-Value entry
 typedef struct _jl_typemap_entry_t {
@@ -641,8 +752,8 @@ typedef struct _jl_typemap_entry_t {
     jl_tupletype_t *sig; // the type signature for this entry
     jl_tupletype_t *simplesig; // a simple signature for fast rejection
     jl_svec_t *guardsigs;
-    size_t min_world;
-    size_t max_world;
+    _Atomic(size_t) min_world;
+    _Atomic(size_t) max_world;
     union {
         jl_value_t *value; // generic accessor
         jl_method_instance_t *linfo; // [nullable] for guard entries
@@ -662,10 +773,10 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    _Atomic(jl_array_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
-    _Atomic(jl_array_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
-    _Atomic(jl_array_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
-    _Atomic(jl_array_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
+    _Atomic(jl_genericmemory_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
+    _Atomic(jl_genericmemory_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
     _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
@@ -677,11 +788,11 @@ typedef struct _jl_methtable_t {
     JL_DATA_TYPE
     jl_sym_t *name; // sometimes used for debug printing
     _Atomic(jl_typemap_t*) defs;
-    _Atomic(jl_array_t*) leafcache;
+    _Atomic(jl_genericmemory_t*) leafcache;
     _Atomic(jl_typemap_t*) cache;
     _Atomic(intptr_t) max_args;  // max # of non-vararg arguments in a signature
     jl_module_t *module; // sometimes used for debug printing
-    jl_array_t *backedges; // (sig, caller::MethodInstance) pairs
+    jl_array_t *backedges; // (sig, caller::CodeInstance) pairs
     jl_mutex_t writelock;
     uint8_t offs;  // 0, or 1 to skip splitting typemap on first (function) argument
     uint8_t frozen; // whether this accepts adding new methods
@@ -740,7 +851,7 @@ typedef struct {
     /* XX(slotnumber) */ \
     /* XX(ssavalue) */ \
     /* end of JL_SMALL_TYPEOF */
-enum jlsmall_typeof_tags {
+enum jl_small_typeof_tags {
     jl_null_tag = 0,
 #define XX(name) jl_##name##_tag,
     JL_SMALL_TYPEOF(XX)
@@ -749,13 +860,23 @@ enum jlsmall_typeof_tags {
     jl_bitstags_first = jl_char_tag, // n.b. bool is not considered a bitstype, since it can be compared by pointer
     jl_max_tags = 64
 };
-extern jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+extern JL_DLLIMPORT jl_datatype_t *jl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+#ifndef JL_LIBRARY_EXPORTS_INTERNAL
 static inline jl_value_t *jl_to_typeof(uintptr_t t)
 {
     if (t < (jl_max_tags << 4))
-        return (jl_value_t*)small_typeof[t / sizeof(*small_typeof)];
+        return (jl_value_t*)jl_small_typeof[t / sizeof(*jl_small_typeof)];
     return (jl_value_t*)t;
 }
+#else
+extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+static inline jl_value_t *jl_to_typeof(uintptr_t t)
+{
+    if (t < (jl_max_tags << 4))
+        return (jl_value_t*)ijl_small_typeof[t / sizeof(*ijl_small_typeof)];
+    return (jl_value_t*)t;
+}
+#endif
 
 
 // kinds
@@ -795,12 +916,20 @@ extern JL_DLLIMPORT jl_value_t *jl_bottom_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_instance_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_debuginfo_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_addrspace_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_addrspace_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_addrspacecore_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_abstractarray_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_densearray_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_array_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_array_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_genericmemory_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_genericmemory_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_genericmemoryref_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_genericmemoryref_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_weakref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_abstractstring_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_string_type JL_GLOBALLY_ROOTED;
@@ -811,16 +940,21 @@ extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_fielderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_missingcodeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_abioverride_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_readonlymemory_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_diverror_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_undefref_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_interrupt_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_precompilable_error JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_boundserror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_an_empty_vec_any JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_an_empty_memory_any JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_an_empty_string JL_GLOBALLY_ROOTED;
 
 extern JL_DLLIMPORT jl_datatype_t *jl_bool_type JL_GLOBALLY_ROOTED;
@@ -836,6 +970,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_uint64_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float16_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float32_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_bfloat16_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_floatingpoint_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_number_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_void_type JL_GLOBALLY_ROOTED;  // deprecated
@@ -857,13 +992,23 @@ extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_symbol_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_uint32_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint16_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_any_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memoryref_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memoryref_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_binding_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_binding_partition_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotoifnot_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_enternode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_returnnode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
@@ -883,6 +1028,8 @@ extern JL_DLLIMPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_kwcall_func JL_GLOBALLY_ROOTED;
 
+extern JL_DLLIMPORT jl_value_t    *jl_libdl_dlopen_func JL_GLOBALLY_ROOTED;
+
 // gc -------------------------------------------------------------------------
 
 struct _jl_gcframe_t {
@@ -914,8 +1061,10 @@ extern void JL_GC_PUSH2(void *, void *) JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH3(void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH4(void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH5(void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH6(void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH8(void *, void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH9(void *, void *, void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT;
 // This is necessary, because otherwise the analyzer considers this undefined
 // behavior and terminates the exploration
@@ -955,10 +1104,15 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 #define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7)                                           \
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+
 #define JL_GC_PUSH8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8)                                     \
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(8), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
 
+#define JL_GC_PUSH9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9)                               \
+  void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(9), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9}; \
+  jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+
 
 #define JL_GC_PUSHARGS(rts_var,n)                                                                       \
   rts_var = ((jl_value_t**)alloca(((n)+2)*sizeof(jl_value_t*)))+2;                                      \
@@ -971,58 +1125,39 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 
 #endif
 
-JL_DLLEXPORT int jl_gc_enable(int on);
-JL_DLLEXPORT int jl_gc_is_enabled(void);
-
-typedef enum {
-    JL_GC_AUTO = 0,         // use heuristics to determine the collection type
-    JL_GC_FULL = 1,         // force a full collection
-    JL_GC_INCREMENTAL = 2,  // force an incremental collection
-} jl_gc_collection_t;
-
-JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
-
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void);
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz);
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
-JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
-// Set GC memory trigger in bytes for greedy memory collecting
-JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
-JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
 
-JL_DLLEXPORT void jl_clear_malloc_data(void);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the current thread.
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 
 // GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT;
 
 STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
     // parent and ptr isa jl_value_t*
-    if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset
-                   (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young
+    if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
+                   (jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
         jl_gc_queue_root((jl_value_t*)parent);
 }
 
 STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
 {
     // if ptr is old
-    if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) {
+    if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3 /* GC_OLD_MARKED */)) {
         jl_gc_queue_root((jl_value_t*)ptr);
     }
 }
 
 STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
+    // 3 == GC_OLD_MARKED
     // ptr is an immutable object
     if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
         return; // parent is young or in remset
@@ -1031,13 +1166,19 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
     const jl_datatype_layout_t *ly = dt->layout;
     if (ly->npointers)
-        jl_gc_queue_multiroot((jl_value_t*)parent, ptr);
+        jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
 }
 
-JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
-JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
-                                         int isaligned, jl_value_t *owner);
 JL_DLLEXPORT void jl_gc_safepoint(void);
+JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
+JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct);
+JL_DLLEXPORT void jl_safepoint_resume_all_threads(struct _jl_task_t *ct);
+JL_DLLEXPORT int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT;
+
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
+size_t mtarraylist_length(small_arraylist_t *_a) JL_NOTSAFEPOINT;
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx) JL_NOTSAFEPOINT;
+void mtarraylist_push(small_arraylist_t *_a, void *elt) JL_NOTSAFEPOINT;
 
 // object accessors -----------------------------------------------------------
 
@@ -1074,16 +1215,97 @@ STATIC_INLINE jl_value_t *jl_svecset(
 }
 #endif
 
-#define jl_array_len(a)   (((jl_array_t*)(a))->length)
-#define jl_array_data(a)  ((void*)((jl_array_t*)(a))->data)
-#define jl_array_dim(a,i) ((&((jl_array_t*)(a))->nrows)[i])
-#define jl_array_dim0(a)  (((jl_array_t*)(a))->nrows)
-#define jl_array_nrows(a) (((jl_array_t*)(a))->nrows)
-#define jl_array_ndims(a) ((int32_t)(((jl_array_t*)a)->flags.ndims))
-#define jl_array_data_owner_offset(ndims) (offsetof(jl_array_t,ncols) + sizeof(size_t)*(1+jl_array_ndimwords(ndims))) // in bytes
-#define jl_array_data_owner(a) (*((jl_value_t**)((char*)a + jl_array_data_owner_offset(jl_array_ndims(a)))))
+#define jl_genericmemory_data_owner_field(a) (*(jl_value_t**)((jl_genericmemory_t*)(a) + 1))
+
+#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
+#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
+#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
+#define jl_tparam2(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 2)
+#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
+#define jl_array_data(a,t) ((t*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_data_(a) ((void*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_dim(a,i) (((jl_array_t*)(a))->dimsize[i])
+#define jl_array_dim0(a)  (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_nrows(a) (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_ndims(a) (*(size_t*)jl_tparam1(jl_typetagof(a)))
+#define jl_array_maxsize(a) (((jl_array_t*)(a))->ref.mem->length)
+#define jl_array_len(a)   (jl_array_ndims(a) == 1 ? jl_array_nrows(a) : jl_array_maxsize(a))
+
+/*
+  how - allocation style
+  0 = data is inlined
+  1 = owns the gc-managed data, exclusively (will free it)
+  2 = malloc-allocated pointer (does not own it)
+  3 = has a pointer to the String object that owns the data pointer (m must be isbits)
+*/
+STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    if (m->ptr == (void*)((char*)m + 16)) // JL_SMALL_BYTE_ALIGNMENT (from julia_internal.h)
+        return 0;
+    jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+    if (owner == (jl_value_t*)m)
+        return 1;
+    if (owner == NULL)
+        return 2;
+    return 3;
+}
+
+STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return (jl_value_t*)m;
+}
+
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT;
+
+#ifdef __clang_gcanalyzer__
+jl_value_t **jl_genericmemory_ptr_data(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
+#else
+#define jl_genericmemory_ptr_data(a)  ((jl_value_t**)((jl_genericmemory_t*)(a))->ptr)
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(m_->ptr)) + i);
+}
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x);
+    if (x) {
+        jl_gc_wb(m, x);
+    }
+    return (jl_value_t*)x;
+}
+#endif
+
+STATIC_INLINE uint8_t jl_memory_uint8_ref(void *m, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    return ((uint8_t*)m_->ptr)[i];
+}
+STATIC_INLINE void jl_memory_uint8_set(void *m, size_t i, uint8_t x) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    ((uint8_t*)m_->ptr)[i] = x;
+}
 
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(a->ref.mem);
+}
 
 #ifdef __clang_gcanalyzer__
 jl_value_t **jl_array_ptr_data(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
@@ -1092,25 +1314,22 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
 #else
-#define jl_array_ptr_data(a)  ((jl_value_t**)((jl_array_t*)(a))->data)
+#define jl_array_ptr_data(a) (jl_array_data(a, jl_value_t*))
 STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i);
+    return jl_atomic_load_relaxed(jl_array_data(a, _Atomic(jl_value_t*)) + i);
 }
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x);
+    jl_atomic_store_release(jl_array_data(a, _Atomic(jl_value_t*)) + i, (jl_value_t*)x);
     if (x) {
-        if (((jl_array_t*)a)->flags.how == 3) {
-            a = jl_array_data_owner(a);
-        }
-        jl_gc_wb(a, x);
+        jl_gc_wb(jl_array_owner((jl_array_t*)a), x);
     }
     return (jl_value_t*)x;
 }
@@ -1118,20 +1337,26 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 
 STATIC_INLINE uint8_t jl_array_uint8_ref(void *a, size_t i) JL_NOTSAFEPOINT
 {
-    assert(i < jl_array_len(a));
     assert(jl_typetagis(a, jl_array_uint8_type));
-    return ((uint8_t*)(jl_array_data(a)))[i];
+    assert(i < jl_array_len(a));
+    return jl_array_data(a, uint8_t)[i];
 }
 STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPOINT
 {
-    assert(i < jl_array_len(a));
     assert(jl_typetagis(a, jl_array_uint8_type));
-    ((uint8_t*)(jl_array_data(a)))[i] = x;
+    assert(i < jl_array_len(a));
+    jl_array_data(a, uint8_t)[i] = x;
+}
+STATIC_INLINE void jl_array_uint32_set(void *a, size_t i, uint32_t x) JL_NOTSAFEPOINT
+{
+    assert(i < jl_array_len(a));
+    assert(jl_typetagis(a, jl_array_uint32_type) || jl_typetagis(a, jl_array_int32_type));
+    jl_array_data(a, uint32_t)[i] = x;
 }
 
 #define jl_exprarg(e,n) jl_array_ptr_ref(((jl_expr_t*)(e))->args, n)
 #define jl_exprargset(e, n, v) jl_array_ptr_set(((jl_expr_t*)(e))->args, n, v)
-#define jl_expr_nargs(e) jl_array_len(((jl_expr_t*)(e))->args)
+#define jl_expr_nargs(e) jl_array_nrows(((jl_expr_t*)(e))->args)
 
 #define jl_fieldref(s,i) jl_get_nth_field(((jl_value_t*)(s)),i)
 #define jl_fieldref_noalloc(s,i) jl_get_nth_field_noalloc(((jl_value_t*)(s)),i)
@@ -1145,28 +1370,26 @@ STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPO
 #define jl_gotonode_label(x) (((intptr_t*)(x))[0])
 #define jl_gotoifnot_cond(x) (((jl_value_t**)(x))[0])
 #define jl_gotoifnot_label(x) (((intptr_t*)(x))[1])
+#define jl_enternode_catch_dest(x) (((intptr_t*)(x))[0])
+#define jl_enternode_scope(x) (((jl_value_t**)(x))[1])
 #define jl_globalref_mod(s) (*(jl_module_t**)(s))
 #define jl_globalref_name(s) (((jl_sym_t**)(s))[1])
 #define jl_quotenode_value(x) (((jl_value_t**)x)[0])
 #define jl_returnnode_value(x) (((jl_value_t**)x)[0])
 
-#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
-#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
-#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
-#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
-
 // get a pointer to the data in a datatype
 #define jl_data_ptr(v)  ((jl_value_t**)v)
 
 #define jl_string_data(s) ((char*)s + sizeof(void*))
 #define jl_string_len(s)  (*(size_t*)s)
 
-#define jl_gf_mtable(f) (((jl_datatype_t*)jl_typeof(f))->name->mt)
+#define jl_gf_ft_mtable(ft) (((jl_datatype_t*)ft)->name->mt)
+#define jl_gf_mtable(f) (jl_gf_ft_mtable(jl_typeof(f)))
 #define jl_gf_name(f)   (jl_gf_mtable(f)->name)
 
 // struct type info
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack);
-#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL))
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable);
+#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL, 0))
 STATIC_INLINE jl_svec_t *jl_field_names(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
     return st->name->names;
@@ -1181,10 +1404,24 @@ STATIC_INLINE jl_value_t *jl_field_type_concrete(jl_datatype_t *st JL_PROPAGATES
     return jl_svecref(st->types, i);
 }
 
-#define jl_datatype_size(t)    (((jl_datatype_t*)t)->layout->size)
-#define jl_datatype_align(t)   (((jl_datatype_t*)t)->layout->alignment)
-#define jl_datatype_nbits(t)   ((((jl_datatype_t*)t)->layout->size)*8)
-#define jl_datatype_nfields(t) (((jl_datatype_t*)(t))->layout->nfields)
+STATIC_INLINE int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
+{
+    return l->nfields == 0 && l->npointers > 0;
+}
+
+JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+
+#define jl_inlinedatatype_layout(t) (((jl_datatype_t*)t)->layout)
+STATIC_INLINE const jl_datatype_layout_t *jl_datatype_layout(jl_datatype_t *t) JL_NOTSAFEPOINT
+{
+    if (jl_is_layout_opaque(t->layout)) // e.g. GenericMemory
+        t = (jl_datatype_t*)jl_unwrap_unionall(t->name->wrapper);
+    return t->layout;
+}
+#define jl_datatype_size(t)    (jl_datatype_layout((jl_datatype_t*)(t))->size)
+#define jl_datatype_align(t)   (jl_datatype_layout((jl_datatype_t*)(t))->alignment)
+#define jl_datatype_nbits(t)   ((jl_datatype_layout((jl_datatype_t*)(t))->size)*8)
+#define jl_datatype_nfields(t) (jl_datatype_layout((jl_datatype_t*)(t))->nfields)
 
 JL_DLLEXPORT void *jl_symbol_name(jl_sym_t *s);
 // inline version with strong type check to detect typos in a `->name` chain
@@ -1212,23 +1449,23 @@ static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type) JL_NOTSAFEPOINT
 #define jl_dt_layout_fields(d) ((const char*)(d) + sizeof(jl_datatype_layout_t))
 static inline const char *jl_dt_layout_ptrs(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
 {
-    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->fielddesc_type) * l->nfields;
+    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->flags.fielddesc_type) * l->nfields;
 }
 
 #define DEFINE_FIELD_ACCESSORS(f)                                             \
     static inline uint32_t jl_field_##f(jl_datatype_t *st,                    \
                                         int i) JL_NOTSAFEPOINT                \
     {                                                                         \
-        const jl_datatype_layout_t *ly = st->layout;                          \
+        const jl_datatype_layout_t *ly = jl_datatype_layout(st);              \
         assert(i >= 0 && (size_t)i < ly->nfields);                            \
-        if (ly->fielddesc_type == 0) {                                        \
+        if (ly->flags.fielddesc_type == 0) {                                  \
             return ((const jl_fielddesc8_t*)jl_dt_layout_fields(ly))[i].f;    \
         }                                                                     \
-        else if (ly->fielddesc_type == 1) {                                   \
+        else if (ly->flags.fielddesc_type == 1) {                             \
             return ((const jl_fielddesc16_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
         else {                                                                \
-            assert(ly->fielddesc_type == 2);                                  \
+            assert(ly->flags.fielddesc_type == 2);                            \
             return ((const jl_fielddesc32_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
     }                                                                         \
@@ -1239,24 +1476,24 @@ DEFINE_FIELD_ACCESSORS(size)
 
 static inline int jl_field_isptr(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = jl_datatype_layout(st);
     assert(i >= 0 && (size_t)i < ly->nfields);
-    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->fielddesc_type) * i))->isptr;
+    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->flags.fielddesc_type) * i))->isptr;
 }
 
 static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = st->layout; // NOT jl_datatype_layout(st)
     assert(i >= 0 && (size_t)i < ly->npointers);
     const void *ptrs = jl_dt_layout_ptrs(ly);
-    if (ly->fielddesc_type == 0) {
+    if (ly->flags.fielddesc_type == 0) {
         return ((const uint8_t*)ptrs)[i];
     }
-    else if (ly->fielddesc_type == 1) {
+    else if (ly->flags.fielddesc_type == 1) {
         return ((const uint16_t*)ptrs)[i];
     }
     else {
-        assert(ly->fielddesc_type == 2);
+        assert(ly->flags.fielddesc_type == 2);
         return ((const uint32_t*)ptrs)[i];
     }
 }
@@ -1285,11 +1522,6 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 }
 
 
-static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
-{
-    return l->nfields == 0 && l->npointers > 0;
-}
-
 // basic predicates -----------------------------------------------------------
 #define jl_is_nothing(v)     (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing))
 #define jl_is_tuple(v)       (((jl_datatype_t*)jl_typeof(v))->name == jl_tuple_typename)
@@ -1320,10 +1552,12 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
 #define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
 #define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_binding_partition(v) jl_typetagis(v,jl_binding_partition_type)
 #define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
 #define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
 #define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
 #define jl_is_returnnode(v)  jl_typetagis(v,jl_returnnode_type)
+#define jl_is_enternode(v)   jl_typetagis(v,jl_enternode_type)
 #define jl_is_argument(v)    jl_typetagis(v,jl_argument_type)
 #define jl_is_pinode(v)      jl_typetagis(v,jl_pinode_type)
 #define jl_is_phinode(v)     jl_typetagis(v,jl_phinode_type)
@@ -1345,10 +1579,15 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_uint8pointer(v)jl_typetagis(v,jl_uint8pointer_type)
 #define jl_is_llvmpointer(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_llvmpointer_typename)
 #define jl_is_intrinsic(v)   jl_typetagis(v,jl_intrinsic_type)
-#define jl_array_isbitsunion(a) (!(((jl_array_t*)(a))->flags.ptrarray) && jl_is_uniontype(jl_tparam0(jl_typeof(a))))
+#define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_type)
+#define jl_is_abioverride(v) jl_typetagis(v,jl_abioverride_type)
+#define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion)
+#define jl_is_array_any(v)    jl_typetagis(v,jl_array_any_type)
 
 JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);
 
+int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT;
+
 STATIC_INLINE int jl_is_kind(jl_value_t *v) JL_NOTSAFEPOINT
 {
     return (v==(jl_value_t*)jl_uniontype_type || v==(jl_value_t*)jl_datatype_type ||
@@ -1381,23 +1620,23 @@ STATIC_INLINE int jl_is_structtype(void *v) JL_NOTSAFEPOINT
 
 STATIC_INLINE int jl_isbits(void *t) JL_NOTSAFEPOINT // corresponding to isbitstype() in julia
 {
-    return (jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype);
+    return jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype;
 }
 
 STATIC_INLINE int jl_is_datatype_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (d->instance != NULL);
+    return d->instance != NULL && d->layout->size == 0 && d->layout->npointers == 0;
 }
 
 STATIC_INLINE int jl_is_abstracttype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract);
+    return jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract;
 }
 
 STATIC_INLINE int jl_is_array_type(void *t) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(t) &&
-            ((jl_datatype_t*)(t))->name == jl_array_typename);
+    return jl_is_datatype(t) &&
+           ((jl_datatype_t*)(t))->name == jl_array_typename;
 }
 
 STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
@@ -1406,6 +1645,42 @@ STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
     return jl_is_array_type(t);
 }
 
+STATIC_INLINE int jl_is_genericmemory_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemory_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemory(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemory_type(t);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemoryref_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemoryref_type(t);
+}
+
+STATIC_INLINE int jl_is_addrspace_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_addrspace_typename);
+}
+
+STATIC_INLINE int jl_is_addrspace(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_addrspace_type(t);
+}
+
 
 STATIC_INLINE int jl_is_opaque_closure_type(void *t) JL_NOTSAFEPOINT
 {
@@ -1461,12 +1736,9 @@ STATIC_INLINE int jl_is_type_type(jl_value_t *v) JL_NOTSAFEPOINT
             ((jl_datatype_t*)(v))->name == ((jl_datatype_t*)jl_type_type->body)->name);
 }
 
-STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_is_genericmemory_zeroinit(jl_genericmemory_t *m) JL_NOTSAFEPOINT
 {
-    if (a->flags.ptrarray || a->flags.hasptr)
-        return 1;
-    jl_value_t *elty = jl_tparam0(jl_typeof(a));
-    return jl_is_datatype(elty) && ((jl_datatype_t*)elty)->zeroinit;
+    return ((jl_datatype_t*)jl_typeof(m))->zeroinit;
 }
 
 // object identity
@@ -1515,6 +1787,7 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body);
 JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT int jl_method_morespecific(jl_method_t *ma, jl_method_t *mb);
 
 STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT
 {
@@ -1535,9 +1808,10 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3);
 JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
 JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
-JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params);
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check); // if uncertain, set check=1
 JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
@@ -1556,10 +1830,11 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
 // constructors
 JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, const void *src);
 JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
-JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
-JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_atomic_storeonce_bits(jl_datatype_t *dt, char *dst, const jl_value_t *src, int nb) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
@@ -1578,14 +1853,16 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_gensym(void);
 JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world);
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_int8(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x) JL_NOTSAFEPOINT;
@@ -1653,44 +1930,52 @@ int jl_uniontype_size(jl_value_t *ty, size_t *sz);
 JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al);
 
 // arrays
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *dims);
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *dims);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *dims, int own_buffer);
 
 JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str);
 JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len);
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a);
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n);
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;  // 0-indexed
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i);  // 0-indexed
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);  // 0-indexed
 JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc);
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc);
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz);
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item);
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2);
 JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim);
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz);
 // property access
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a);
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a);
 JL_DLLEXPORT int jl_array_rank(jl_value_t *a);
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d);
+
+// genericmemory
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *dim);
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                           size_t nel, int own_buffer);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel);
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_memory(const char *str, size_t len);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype);
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n);
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *m, size_t i);  // 0-indexed
+
+JL_DLLEXPORT jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data);
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_ptrmemoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, size_t idx) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefswap(jl_genericmemoryref_t m, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefmodify(jl_genericmemoryref_t m, jl_value_t *op, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefreplace(jl_genericmemoryref_t m, jl_value_t *expected, jl_value_t *v, int isatomic);
+JL_DLLEXPORT jl_value_t *jl_memoryrefsetonce(jl_genericmemoryref_t m, jl_value_t *v, int isatomic);
 
 // strings
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s);
@@ -1700,6 +1985,7 @@ extern JL_DLLIMPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_libdl_module JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent);
 JL_DLLEXPORT void jl_set_module_nospecialize(jl_module_t *self, int on);
 JL_DLLEXPORT void jl_set_module_optlevel(jl_module_t *self, int lvl);
@@ -1713,13 +1999,12 @@ JL_DLLEXPORT int jl_get_module_max_methods(jl_module_t *m);
 // get binding for reading
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_if_bound(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import);
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
@@ -1730,13 +2015,18 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
 JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var);
+JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs);
+JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs);
+JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind);
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
 JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s);
+JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported);
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s);
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m);
@@ -1746,10 +2036,10 @@ STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 }
 
 // eq hash tables
-JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
-JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
-jl_value_t *jl_eqtable_getkey(jl_array_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
+JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
 
 // system information
 JL_DLLEXPORT int jl_errno(void) JL_NOTSAFEPOINT;
@@ -1789,7 +2079,9 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                const char *context,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
@@ -1802,18 +2094,6 @@ JL_DLLEXPORT void JL_NORETURN jl_bounds_error_tuple_int(jl_value_t **v,
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_unboxed_int(void *v, jl_value_t *vt, size_t i);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_ints(jl_value_t *v JL_MAYBE_UNROOTED,
                                                    size_t *idxs, size_t nidxs);
-JL_DLLEXPORT void JL_NORETURN jl_eof_error(void);
-
-// Return the exception currently being handled, or `jl_nothing`.
-//
-// The catch scope is determined dynamically so this works in functions called
-// from a catch block.  The returned value is gc rooted until we exit the
-// enclosing JL_CATCH.
-// FIXME: Teach the static analyzer about this rather than using
-// JL_GLOBALLY_ROOTED which is far too optimistic.
-JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
-JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 
 #define JL_NARGS(fname, min, max)                               \
     if (nargs < min) jl_too_few_args(#fname, min);              \
@@ -1835,7 +2115,7 @@ JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 typedef enum {
     JL_IMAGE_CWD = 0,
     JL_IMAGE_JULIA_HOME = 1,
-    //JL_IMAGE_LIBJULIA = 2,
+    JL_IMAGE_IN_MEMORY = 2
 } JL_IMAGE_SEARCH;
 
 JL_DLLIMPORT const char *jl_get_libdir(void);
@@ -1860,6 +2140,7 @@ JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t e
 JL_DLLEXPORT void jl_restore_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
 JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
@@ -1927,7 +2208,6 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr JL_MAYBE_UNROOTED);
 // IR representation
 JL_DLLEXPORT jl_value_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
 JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_value_t *data);
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_value_t *data) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) JL_NOTSAFEPOINT;
@@ -1936,13 +2216,15 @@ JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_value_t *data, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
+JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_value_t *cl, size_t pc) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts);
+JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_value_t *cl, size_t nstmts);
 
-
-JL_DLLEXPORT int jl_is_operator(char *sym);
-JL_DLLEXPORT int jl_is_unary_operator(char *sym);
-JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
-JL_DLLEXPORT int jl_is_syntactic_operator(char *sym);
-JL_DLLEXPORT int jl_operator_precedence(char *sym);
+JL_DLLEXPORT int jl_is_operator(const char *sym);
+JL_DLLEXPORT int jl_is_unary_operator(const char *sym);
+JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym);
+JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym);
+JL_DLLEXPORT int jl_operator_precedence(const char *sym);
 
 STATIC_INLINE int jl_vinfo_sa(uint8_t vi)
 {
@@ -1958,6 +2240,7 @@ STATIC_INLINE int jl_vinfo_usedundef(uint8_t vi)
 
 JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs);
 JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *meth);
+JL_DLLEXPORT jl_value_t *jl_invoke_oc(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *meth);
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *linfo);
 
 STATIC_INLINE jl_value_t *jl_apply(jl_value_t **args, uint32_t nargs)
@@ -1972,9 +2255,6 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t
 JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED,
                                   jl_value_t *b JL_MAYBE_UNROOTED, jl_value_t *c JL_MAYBE_UNROOTED);
 
-// interfacing with Task runtime
-JL_DLLEXPORT void jl_yield(void);
-
 // async signal handling ------------------------------------------------------
 
 JL_DLLEXPORT void jl_install_sigint_handler(void);
@@ -1991,6 +2271,7 @@ typedef struct _jl_excstack_t jl_excstack_t;
 typedef struct _jl_handler_t {
     jl_jmp_buf eh_ctx;
     jl_gcframe_t *gcstack;
+    jl_value_t *scope;
     struct _jl_handler_t *prev;
     int8_t gc_state;
     size_t locks_len;
@@ -2008,24 +2289,30 @@ typedef struct _jl_task_t {
     jl_value_t *tls;
     jl_value_t *donenotify;
     jl_value_t *result;
-    jl_value_t *logstate;
+    jl_value_t *scope;
     jl_function_t *start;
-    // 4 byte padding on 32-bit systems
-    // uint32_t padding0;
-    uint64_t rngState[JL_RNG_SIZE];
     _Atomic(uint8_t) _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
-    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
-    // 1 byte padding
-    // uint8_t padding1;
-    // multiqueue priority
     uint16_t priority;
+    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
+    uint8_t pad0[3];
+    // === 64 bytes (cache line)
+    uint64_t rngState[JL_RNG_SIZE];
+    // flag indicating whether or not to record timing metrics for this task
+    uint8_t metrics_enabled;
+    uint8_t pad1[3];
+    // timestamp this task first entered the run queue
+    _Atomic(uint64_t) first_enqueued_at;
+    // timestamp this task was most recently scheduled to run
+    _Atomic(uint64_t) last_started_running_at;
+    // time this task has spent running; updated when it yields or finishes.
+    _Atomic(uint64_t) running_time_ns;
+    // === 64 bytes (cache line)
+    // timestamp this task finished (i.e. entered state DONE or FAILED).
+    _Atomic(uint64_t) finished_at;
 
 // hidden state:
 
-#ifdef USE_TRACY
-    const char *name;
-#endif
     // id of owning thread - does not need to be defined until the task runs
     _Atomic(int16_t) tid;
     // threadpool id
@@ -2043,16 +2330,15 @@ typedef struct _jl_task_t {
     size_t world_age;
     // quick lookup for current ptls
     jl_ptls_t ptls; // == jl_all_tls_states[tid]
+#ifdef USE_TRACY
+    const char *name;
+#endif
     // saved exception stack
     jl_excstack_t *excstack;
     // current exception handler
     jl_handler_t *eh;
     // saved thread state
-    jl_ucontext_t ctx;
-    void *stkbuf; // malloc'd memory (either copybuf or stack)
-    size_t bufsz; // actual sizeof stkbuf
-    unsigned int copy_stack:31; // sizeof stack for copybuf
-    unsigned int started:1;
+    jl_ucontext_t ctx; // pointer into stkbuf, if suspended
 } jl_task_t;
 
 #define JL_TASK_STATE_RUNNABLE 0
@@ -2065,7 +2351,6 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
-JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
@@ -2074,13 +2359,32 @@ JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_RO
 extern JL_DLLIMPORT int jl_task_gcstack_offset;
 extern JL_DLLIMPORT int jl_task_ptls_offset;
 
+#ifdef __cplusplus
+}
+#endif
 #include "julia_locks.h"   // requires jl_task_t definition
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh);
-JL_DLLEXPORT void jl_pop_handler(int n);
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT;
+// Return the exception currently being handled, or `jl_nothing`.
+//
+// The catch scope is determined dynamically so this works in functions called
+// from a catch block.  The returned value is gc rooted until we exit the
+// enclosing JL_CATCH.
+// FIXME: Teach the static analyzer about this rather than using
+// JL_GLOBALLY_ROOTED which is far too optimistic.
+JL_DLLEXPORT jl_value_t *jl_current_exception(jl_task_t *ct) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_exception_occurred(void);
+JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT ;
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh);
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT;
 
 #if defined(_OS_WINDOWS_)
 #if defined(_COMPILER_GCC_)
@@ -2120,10 +2424,9 @@ void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #define jl_setjmp_name "sigsetjmp"
 #endif
 #define jl_setjmp(a,b) sigsetjmp(a,b)
-#if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
-// Bypass the ASAN longjmp wrapper - we're unpoisoning the stack ourselves.
-JL_DLLIMPORT int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, int val);
-#define jl_longjmp(a,b) __libc_siglongjmp(a,b)
+#if defined(_COMPILER_ASAN_ENABLED_) && defined(__GLIBC__)
+extern void (*real_siglongjmp)(jmp_buf _Buf, int _Value);
+#define jl_longjmp(a,b) real_siglongjmp(a,b)
 #else
 #define jl_longjmp(a,b) siglongjmp(a,b)
 #endif
@@ -2132,24 +2435,37 @@ JL_DLLIMPORT int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, i
 
 #ifdef __clang_gcanalyzer__
 
-// This is hard. Ideally we'd teach the static analyzer about the extra control
-// flow edges. But for now, just hide this as best we can
 extern int had_exception;
-#define JL_TRY if (1)
-#define JL_CATCH if (had_exception)
+
+// The analyzer assumes that the TRY block always executes to completion.
+// This can lead to both false positives and false negatives, since it doesn't model the fact that throwing always leaves the try block early.
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    __eh_ct->eh = &__eh;                                            \
+    for (i__try=1; i__try; i__try=0)
+
+#define JL_CATCH                                                    \
+    if (!had_exception)                                             \
+        jl_eh_restore_state_noexcept(__eh_ct, &__eh);               \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #else
 
-#define JL_TRY                                                    \
-    int i__tr, i__ca; jl_handler_t __eh;                          \
-    size_t __excstack_state = jl_excstack_state();                \
-    jl_enter_handler(&__eh);                                      \
-    if (!jl_setjmp(__eh.eh_ctx,0))                                \
-        for (i__tr=1; i__tr; i__tr=0, jl_eh_restore_state(&__eh))
+#define JL_TRY                                                      \
+    int i__try, i__catch; jl_handler_t __eh; jl_task_t *__eh_ct;    \
+    __eh_ct = jl_current_task;                                      \
+    size_t __excstack_state = jl_excstack_state(__eh_ct);           \
+    jl_enter_handler(__eh_ct, &__eh);                               \
+    if (!jl_setjmp(__eh.eh_ctx, 0))                                 \
+        for (i__try=1, __eh_ct->eh = &__eh; i__try; i__try=0, /* TRY BLOCK; */ jl_eh_restore_state_noexcept(__eh_ct, &__eh))
 
-#define JL_CATCH                                                \
-    else                                                        \
-        for (i__ca=1, jl_eh_restore_state(&__eh); i__ca; i__ca=0, jl_restore_excstack(__excstack_state))
+#define JL_CATCH                                                    \
+    else                                                            \
+        for (i__catch=1, jl_eh_restore_state(__eh_ct, &__eh); i__catch; i__catch=0, /* CATCH BLOCK; */ jl_restore_excstack(__eh_ct, __excstack_state))
 
 #endif
 
@@ -2184,19 +2500,24 @@ typedef struct {
 } jl_uv_file_t;
 
 #ifdef __GNUC__
-#define _JL_FORMAT_ATTR(type, str, arg) \
-    __attribute__((format(type, str, arg)))
+#  ifdef __MINGW32__
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(__MINGW_PRINTF_FORMAT, str, arg)))
+#  else
+#define _JL_FORMAT_ATTR(str, arg) \
+    __attribute__((format(printf, str, arg)))
+#  endif
 #else
-#define _JL_FORMAT_ATTR(type, str, arg)
+#define _JL_FORMAT_ATTR(str, arg)
 #endif
 
 JL_DLLEXPORT void jl_uv_puts(struct uv_stream_s *stream, const char *str, size_t n);
 JL_DLLEXPORT int jl_printf(struct uv_stream_s *s, const char *format, ...)
-    _JL_FORMAT_ATTR(printf, 2, 3);
+    _JL_FORMAT_ATTR(2, 3);
 JL_DLLEXPORT int jl_vprintf(struct uv_stream_s *s, const char *format, va_list args)
-    _JL_FORMAT_ATTR(printf, 2, 0);
+    _JL_FORMAT_ATTR(2, 0);
 JL_DLLEXPORT void jl_safe_printf(const char *str, ...) JL_NOTSAFEPOINT
-    _JL_FORMAT_ATTR(printf, 1, 2);
+    _JL_FORMAT_ATTR(1, 2);
 
 extern JL_DLLEXPORT JL_STREAM *JL_STDIN;
 extern JL_DLLEXPORT JL_STREAM *JL_STDOUT;
@@ -2209,7 +2530,6 @@ JL_DLLEXPORT int jl_termios_size(void);
 
 // showing and std streams
 JL_DLLEXPORT void jl_flush_cstdio(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT;
@@ -2231,6 +2551,8 @@ JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void);
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp);
 JL_DLLEXPORT char *jl_format_filename(const char *output_pattern);
 
+uint64_t parse_heap_size_hint(const char *optarg, const char *option_name);
+
 // Set julia-level ARGS array according to the arguments provided in
 // argc/argv
 JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv);
@@ -2291,15 +2613,29 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_OPTIONS_HANDLE_SIGNALS_ON 1
 #define JL_OPTIONS_HANDLE_SIGNALS_OFF 0
 
+#define JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_YES 1
+#define JL_OPTIONS_USE_EXPERIMENTAL_FEATURES_NO 0
+
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES 1
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO 0
 
+#define JL_OPTIONS_USE_COMPILED_MODULES_STRICT 3
+#define JL_OPTIONS_USE_COMPILED_MODULES_EXISTING 2
 #define JL_OPTIONS_USE_COMPILED_MODULES_YES 1
 #define JL_OPTIONS_USE_COMPILED_MODULES_NO 0
 
+#define JL_OPTIONS_USE_PKGIMAGES_EXISTING 2
 #define JL_OPTIONS_USE_PKGIMAGES_YES 1
 #define JL_OPTIONS_USE_PKGIMAGES_NO 0
 
+#define JL_TRIM_NO 0
+#define JL_TRIM_SAFE 1
+#define JL_TRIM_UNSAFE 2
+#define JL_TRIM_UNSAFE_WARN 3
+
+#define JL_OPTIONS_TASK_METRICS_OFF 0
+#define JL_OPTIONS_TASK_METRICS_ON 1
+
 // Version information
 #include <julia_version.h> // Generated file
 
@@ -2308,8 +2644,6 @@ JL_DLLEXPORT extern int jl_ver_minor(void);
 JL_DLLEXPORT extern int jl_ver_patch(void);
 JL_DLLEXPORT extern int jl_ver_is_release(void);
 JL_DLLEXPORT extern const char *jl_ver_string(void);
-JL_DLLEXPORT const char *jl_git_branch(void);
-JL_DLLEXPORT const char *jl_git_commit(void);
 
 // nullable struct representations
 typedef struct {
@@ -2333,8 +2667,6 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT;
 // codegen interface ----------------------------------------------------------
 // The root propagation here doesn't have to be literal, but callers should
 // ensure that the return value outlives the MethodInstance
-typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT,
-    size_t min_world, size_t max_world);
 typedef struct {
     int track_allocations;  // can we track allocations?
     int code_coverage;      // can we measure coverage?
@@ -2344,18 +2676,19 @@ typedef struct {
     int gnu_pubnames;       // can we emit the gnu pubnames debuginfo
     int debug_info_kind;    // Enum for line-table-only, line-directives-only,
                             // limited, standalone
-
+    int debug_info_level;   // equivalent to the -g level from the cli
     int safepoint_on_entry; // Emit a safepoint on entry to each function
     int gcstack_arg; // Pass the ptls value as an argument with swiftself
 
-    // Cache access. Default: jl_rettype_inferred.
-    jl_codeinstance_lookup_t lookup;
-
-    // If not `nothing`, rewrite all generic calls to call
-    // generic_context(f, args...) instead of f(args...).
-    jl_value_t *generic_context;
+    int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly
+    int trim; // can we emit dynamic dispatches?
 } jl_cgparams_t;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
+extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
+
+typedef struct {
+    int emit_metadata;
+} jl_emission_params_t;
 
 #ifdef __cplusplus
 }
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 4b120fd9e845b..13cbdbcd36f5b 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -10,6 +10,7 @@
 // Files that need `assert` should include this file after all other includes.
 // All files should also check `JL_NDEBUG` instead of `NDEBUG`.
 
+#pragma GCC visibility push(default)
 #ifdef NDEBUG
 #  ifndef JL_NDEBUG
 #    undef NDEBUG
@@ -28,3 +29,4 @@
 #    include <assert.h>
 #  endif
 #endif
+#pragma GCC visibility pop
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
index c4488f774c987..d05f0fafab28f 100644
--- a/src/julia_atomics.h
+++ b/src/julia_atomics.h
@@ -56,6 +56,15 @@ enum jl_memory_order {
     jl_memory_order_seq_cst
 };
 
+/**
+ * Cache line size
+*/
+#if (defined(_CPU_AARCH64_) && defined(_OS_DARWIN_)) || defined(_CPU_PPC64_)  // Apple silicon and PPC7+ have 128 byte cache lines
+#define JL_CACHE_BYTE_ALIGNMENT 128
+#else
+#define JL_CACHE_BYTE_ALIGNMENT 64
+#endif
+
 /**
  * Thread synchronization primitives:
  *
@@ -94,12 +103,12 @@ enum jl_memory_order {
 // this wrong thus we include the correct definitions here (with implicit
 // conversion), instead of using the macro version
 template<class T>
-T jl_atomic_load(std::atomic<T> *ptr)
+T jl_atomic_load(const std::atomic<T> *ptr)
 {
      return std::atomic_load<T>(ptr);
 }
 template<class T>
-T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+T jl_atomic_load_explicit(const std::atomic<T> *ptr, std::memory_order order)
 {
      return std::atomic_load_explicit<T>(ptr, order);
 }
@@ -165,6 +174,11 @@ bool jl_atomic_cmpswap_acqrel(std::atomic<T> *ptr, T *expected, S val)
 {
      return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_acq_rel, memory_order_acquire);
 }
+template<class T, class S>
+bool jl_atomic_cmpswap_release(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_release, memory_order_relaxed);
+}
 #define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
 template<class T, class S>
 T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
@@ -176,6 +190,7 @@ T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order
 {
      return std::atomic_exchange_explicit<T>(ptr, desired, order);
 }
+#define jl_atomic_exchange_release(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_reease)
 #define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
 extern "C" {
 #else
@@ -196,11 +211,15 @@ extern "C" {
     atomic_compare_exchange_strong(obj, expected, desired)
 #  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
-#define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
+#  define jl_atomic_cmpswap_release(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_release, memory_order_relaxed)
+#  define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_acq_rel, memory_order_acquire)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)       \
     atomic_exchange(obj, desired)
+#  define jl_atomic_exchange_release(obj, desired)      \
+    atomic_exchange_explicit(obj, desired, memory_order_release)
 #  define jl_atomic_exchange_relaxed(obj, desired)      \
     atomic_exchange_explicit(obj, desired, memory_order_relaxed)
 #  define jl_atomic_store(obj, val)                     \
@@ -247,6 +266,7 @@ extern "C" {
 #define _Atomic(T) T
 
 #undef jl_atomic_exchange
+#undef jl_atomic_exchange_release
 #undef jl_atomic_exchange_relaxed
 #define jl_atomic_exchange(obj, desired) \
     (__extension__({ \
@@ -255,10 +275,12 @@ extern "C" {
             *p__analyzer__ = (desired); \
             temp__analyzer__; \
         }))
+#define jl_atomic_exchange_release jl_atomic_exchange
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
 #undef jl_atomic_cmpswap
 #undef jl_atomic_cmpswap_acqrel
+#undef jl_atomic_cmpswap_release
 #undef jl_atomic_cmpswap_relaxed
 #define jl_atomic_cmpswap(obj, expected, desired) \
     (__extension__({ \
@@ -273,6 +295,7 @@ extern "C" {
             eq__analyzer__; \
         }))
 #define jl_atomic_cmpswap_acqrel jl_atomic_cmpswap
+#define jl_atomic_cmpswap_release jl_atomic_cmpswap
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
index 1c0929717b293..1f35d3693fefd 100644
--- a/src/julia_fasttls.h
+++ b/src/julia_fasttls.h
@@ -22,14 +22,9 @@ extern "C" {
 
 typedef struct _jl_gcframe_t jl_gcframe_t;
 
-#if defined(_OS_DARWIN_)
-#include <pthread.h>
-typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
-#else
 typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
-#endif
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_)
 #define JULIA_DEFINE_FAST_TLS                                                                   \
 static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
 JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore;                                        \
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index 27f0a6b5ec11c..05140e4b09ace 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -34,9 +34,13 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_al
 JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb,
         int enable);
 
+// Memory pressure callback
+typedef void (*jl_gc_cb_notify_gc_pressure_t)(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable);
+
 // Types for custom mark and sweep functions.
-typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj);
-typedef void (*jl_sweepfunc_t)(jl_value_t *obj);
+typedef uintptr_t (*jl_markfunc_t)(jl_ptls_t, jl_value_t *obj) JL_NOTSAFEPOINT;
+typedef void (*jl_sweepfunc_t)(jl_value_t *obj) JL_NOTSAFEPOINT;
 
 // Function to create a new foreign type with custom
 // mark and sweep functions.
@@ -56,10 +60,10 @@ JL_DLLEXPORT int jl_reinit_foreign_type(
         jl_markfunc_t markfunc,
         jl_sweepfunc_t sweepfunc);
 
-JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
-JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void);
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) JL_NOTSAFEPOINT;
 
 // Field layout descriptor for custom types that do
 // not fit Julia layout conventions. This is associated with
@@ -76,9 +80,9 @@ JL_DLLEXPORT void *jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty);
 // Queue an object or array of objects for scanning by the garbage collector.
 // These functions must only be called from within a root scanner callback
 // or from within a custom mark function.
-JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj);
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
-    jl_value_t **objs, size_t nobjs);
+    jl_value_t **objs, size_t nobjs) JL_NOTSAFEPOINT;
 
 // Sweep functions will not automatically be called for objects of
 // foreign types, as that may not always be desired. Only calling
@@ -129,7 +133,7 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
 // jl_typeof(obj) is an actual type object.
 //
 // NOTE: Only valid to call from within a GC context.
-JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p);
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT;
 
 // Return a non-null pointer to the start of the stack area if the task
 // has an associated stack buffer. In that case, *size will also contain
@@ -146,7 +150,7 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
 // and may not be tight.
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
-                                       char **total_start, char **total_end);
+                                       char **total_start, char **total_end) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/julia_internal.h b/src/julia_internal.h
index cf65521770681..00d603f26c7f2 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -13,9 +13,13 @@
 #include "support/strtod.h"
 #include "gc-alloc-profiler.h"
 #include "support/rle.h"
+#include <ctype.h>
+#include <stdint.h>
 #include <uv.h>
 #include <llvm-c/Types.h>
 #include <llvm-c/Orc.h>
+#include <llvm-version.h>
+
 #if !defined(_WIN32)
 #include <unistd.h>
 #else
@@ -62,7 +66,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
        that we're resetting to. The idea is to remove the poison from the frames
        that we're skipping over, since they won't be unwound. */
     uintptr_t top = jmpbuf_sp(buf);
-    uintptr_t bottom = (uintptr_t)ct->stkbuf;
+    uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase  - ct->ptls->stacksize : (char*)ct->ctx.stkbuf);
+    //uintptr_t bottom = (uintptr_t)&top;
     __asan_unpoison_stack_memory(bottom, top - bottom);
 }
 static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
@@ -96,22 +101,25 @@ JL_DLLIMPORT void *__tsan_get_current_fiber(void);
 JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
 JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
-#ifdef __cplusplus
-}
-#endif
 
-// Remove when C11 is required for C code.
-#ifndef static_assert
-#  ifndef __cplusplus
-// C11 should already have `static_assert` from `<assert.h>` so there's no need
-// to check C version.
-#    ifdef __GNUC__
-#      define static_assert _Static_assert
-#    else
-#      define static_assert(...)
-#    endif
-#  endif
-// For C++, C++11 or MSVC is required. Both provide `static_assert`.
+#ifndef _OS_WINDOWS_
+    #if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
+        #define MAX_ALIGN 8
+    #elif defined(_CPU_AARCH64_) || defined(_CPU_RISCV64_) || (JL_LLVM_VERSION >= 180000 && (defined(_CPU_X86_64_) || defined(_CPU_X86_)))
+    // int128 is 16 bytes aligned on aarch64 and riscv, and on x86 with LLVM >= 18
+        #define MAX_ALIGN 16
+    #elif defined(_P64)
+    // Generically we assume MAX_ALIGN is sizeof(void*)
+        #define MAX_ALIGN 8
+    #else
+        #define MAX_ALIGN 4
+    #endif
+#else
+    #if JL_LLVM_VERSION >= 180000
+        #define MAX_ALIGN 16
+    #else
+        #define MAX_ALIGN 8
+    #endif
 #endif
 
 #ifndef alignof
@@ -181,10 +189,8 @@ extern jl_mutex_t jl_uv_mutex;
 extern _Atomic(int) jl_uv_n_waiters;
 void JL_UV_LOCK(void);
 #define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+extern _Atomic(unsigned) _threadedregion;
+extern _Atomic(uint16_t) io_loop_tid;
 
 int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
 
@@ -193,7 +199,6 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
 // Returns time in nanosec
 JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
 JL_DLLEXPORT double jl_get_profile_peek_duration(void);
 JL_DLLEXPORT void jl_set_profile_peek_duration(double);
 
@@ -203,13 +208,52 @@ JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
 JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
 JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
 JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
+void jl_with_stackwalk_lock(void (*f)(void*) JL_NOTSAFEPOINT, void *ctx) JL_NOTSAFEPOINT;
+
+arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT;
+typedef struct {
+    size_t bt_size;
+    int tid;
+} jl_record_backtrace_result_t;
+JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, struct _jl_bt_element_t *bt_data,
+                                                              size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT;
+extern volatile struct _jl_bt_element_t *profile_bt_data_prof;
+extern volatile size_t profile_bt_size_max;
+extern volatile size_t profile_bt_size_cur;
+extern volatile int profile_running;
+extern volatile int profile_all_tasks;
+// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
+// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
+// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
+// Because of that, this lock must be held in `jl_profile_task` and `jl_gc_sweep_stack_pools_and_mtarraylist_buffers`.
+extern uv_mutex_t live_tasks_lock;
+// Ensures that we can safely write to `profile_bt_data_prof` and `profile_bt_size_cur`.
+// We want to avoid the case that:
+// - We start to profile a task very close to the profiling time window end.
+// - The profiling time window ends and we start to read the profile data in a compute thread.
+// - We write to the profile in a profiler thread while the compute thread is reading it.
+// Locking discipline: `bt_data_prof_lock` must be held inside the scope of `live_tasks_lock`.
+extern uv_mutex_t bt_data_prof_lock;
+#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
+#define PROFILE_STATE_THREAD_SLEEPING (2)
+#define PROFILE_STATE_WALL_TIME_PROFILING (3)
+void jl_profile_task(void);
 
 // number of cycles since power-on
 static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 {
 #if defined(_CPU_X86_64_)
+    // This is nopl 0(%rax, %rax, 1), but assembler are inconsistent about whether
+    // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use
+    // the 5 byte one.
+#define NOP5_OVERRIDE_NOP ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
     uint64_t low, high;
-    __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+    // This instruction sequence is promised by rr to be patchable. rr can usually
+    // also patch `rdtsc` in regular code, but without the preceding nop, there could
+    // be an interfering branch into the middle of rr's patch region. Using this
+    // sequence prevents a massive rr-induced slowdown if the compiler happens to emit
+    // an unlucky pattern. See https://github.com/rr-debugger/rr/pull/3580.
+    __asm__ volatile(NOP5_OVERRIDE_NOP "rdtsc" : "=a"(low), "=d"(high));
     return (high << 32) | low;
 #elif defined(_CPU_X86_)
     int64_t ret;
@@ -243,6 +287,11 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
     struct timeval tv;
     gettimeofday(&tv, NULL);
     return (int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(_CPU_RISCV64_)
+    // taken from https://github.com/google/benchmark/blob/3b3de69400164013199ea448f051d94d7fc7d81f/src/cycleclock.h#L190
+    uint64_t ret;
+    __asm__ volatile("rdcycle %0" : "=r"(ret));
+    return ret;
 #elif defined(_CPU_PPC64_)
     // This returns a time-base, which is not always precisely a cycle-count.
     // https://reviews.llvm.org/D78084
@@ -258,11 +307,17 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 
 #include "timing.h"
 
+extern JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void) JL_NOTSAFEPOINT;
+extern JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile) JL_NOTSAFEPOINT;
+
 // Global *atomic* integers controlling *process-wide* measurement of compilation time.
 extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
 extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
 extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;
 
+// Global *atomic* integer controlling *process-wide* task timing.
+extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled;
+
 #define jl_return_address() ((uintptr_t)__builtin_return_address(0))
 
 STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
@@ -282,24 +337,41 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 // without risk of creating pointers out of thin air
 // TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
 //       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
-static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+static inline void memmove_refs(_Atomic(void*) *dstp, _Atomic(void*) *srcp, size_t n) JL_NOTSAFEPOINT
 {
     size_t i;
-    _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp;
-    _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp;
     if (dstp < srcp || dstp > srcp + n) {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + i, jl_atomic_load_relaxed(srcpa + i));
+            jl_atomic_store_release(dstp + i, jl_atomic_load_relaxed(srcp + i));
         }
     }
     else {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1));
+            jl_atomic_store_release(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
         }
     }
 }
 
-// -- gc.c -- //
+static inline void memassign_safe(int hasptr, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
+{
+    assert(nb == jl_datatype_size(jl_typeof(src)));
+    if (hasptr) {
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((_Atomic(void*)*)dst, (_Atomic(void*)*)src, nptr);
+        nb -= nptr * sizeof(void*);
+        if (__likely(nb == 0))
+            return;
+        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
+        dst = dst + nptr * sizeof(void*);
+    }
+    else if (nb >= 16) {
+        memcpy(dst, jl_assume_aligned(src, 16), nb);
+        return;
+    }
+    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
+}
+
+// -- GC -- //
 
 #define GC_CLEAN  0 // freshly allocated
 #define GC_MARKED 1 // reachable and young
@@ -313,6 +385,7 @@ extern JL_DLLIMPORT jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_method_t *jl_opaque_closure_method JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
+extern jl_debuginfo_t *jl_nulldebuginfo JL_GLOBALLY_ROOTED;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
 extern tracer_cb jl_newmeth_tracer;
@@ -321,27 +394,24 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
 JL_DLLEXPORT extern arraylist_t jl_image_relocs;  // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_top_mods;  // external linkage: sysimg/pkgimages
 extern arraylist_t eytzinger_image_tree;
 extern arraylist_t eytzinger_idxs;
 
 extern JL_DLLEXPORT size_t jl_page_size;
-extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT size_t jl_typeinf_world;
 extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
-extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
+
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT extern int jl_lineno;
 JL_DLLEXPORT extern const char *jl_filename;
 
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
 JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
-extern uv_mutex_t gc_perm_lock;
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-void *jl_gc_perm_alloc(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
 void gc_sweep_sysimg(void);
 
 
@@ -364,24 +434,48 @@ static const int jl_gc_sizeclasses[] = {
     144, 160, 176, 192, 208, 224, 240, 256,
 
     // the following tables are computed for maximum packing efficiency via the formula:
-    // pg = 2^14
+    // pg = GC_SMALL_PAGE ? 2^12 : 2^14
     // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'
 
+#ifdef GC_SMALL_PAGE
+    // rng = 15:-1:2 (14 pools)
+    272, 288, 304, 336, 368, 400, 448, 496, 576, 672, 816, 1008, 1360, 2032
+//  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, /pool
+//  16, 64, 144, 64, 48, 96, 64, 128, 64, 64, 16, 64, 16, 32, bytes lost
+#else
     // rng = 60:-4:32 (8 pools)
     272, 288, 304, 336, 368, 400, 448, 496,
-//   60,  56,  53,  48,  44,  40,  36,  33, /pool
-//   64, 256, 272, 256, 192, 384, 256,  16, bytes lost
+//  60, 56, 53, 48, 44, 40, 36, 33, /pool
+//  64, 256, 272, 256, 192, 384, 256,  16, bytes lost
 
     // rng = 30:-2:16 (8 pools)
     544, 576, 624, 672, 736, 816, 896, 1008,
-//   30,  28,  26,  24,  22,  20,  18,  16, /pool
-//   64, 256, 160, 256, 192,  64, 256, 256, bytes lost
+//  30, 28, 26, 24, 22, 20, 18, 16, /pool
+//  64, 256, 160, 256, 192,  64, 256, 256, bytes lost
 
     // rng = 15:-1:8 (8 pools)
     1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032
-//    15,   14,   13,   12,   11,   10,    9,    8, /pool
-//    64,   32,  160,   64,   16,   64,  112,  128, bytes lost
+//   15, 14, 13, 12, 11, 10, 9, 8, /pool
+//   64, 32, 160, 64, 16, 64, 112,  128, bytes lost
+#endif
 };
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#  define JL_GC_N_POOLS 39
+#elif MAX_ALIGN > 4
+#  define JL_GC_N_POOLS 40
+#else
+#  define JL_GC_N_POOLS 41
+#endif
+#else
+#ifdef _P64
+#  define JL_GC_N_POOLS 49
+#elif MAX_ALIGN > 4
+#  define JL_GC_N_POOLS 50
+#else
+#  define JL_GC_N_POOLS 51
+#endif
+#endif
 static_assert(sizeof(jl_gc_sizeclasses) / sizeof(jl_gc_sizeclasses[0]) == JL_GC_N_POOLS, "");
 
 STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
@@ -391,7 +485,7 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
 #ifdef _P64
     (void)sz;
     return 16;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     return sz <= 4 ? 8 : 16;
 #else
     // szclass 8
@@ -408,7 +502,12 @@ JL_DLLEXPORT int jl_alignment(size_t sz) JL_NOTSAFEPOINT;
 
 // the following table is computed as:
 // [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
-static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
+static const uint8_t szclass_table[] =
+#ifdef GC_SMALL_PAGE
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,33,33,34,34,34,34,34,34,35,35,35,35,35,35,35,35,35,36,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38};
+#else
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,34,34,35,35,35,36,36,36,37,37,37,37,38,38,38,38,38,39,39,39,39,39,40,40,40,40,40,40,40,41,41,41,41,41,42,42,42,42,42,43,43,43,43,43,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+#endif
 static_assert(sizeof(szclass_table) == 128, "");
 
 STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
@@ -418,7 +517,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
     if (sz <= 8)
         return 0;
     const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     if (sz <= 8)
         return (sz >= 4 ? 1 : 0);
     const int N = 1;
@@ -436,7 +535,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
     if (sz >= 16 && sz <= 152) {
 #ifdef _P64
         const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
         const int N = 1;
 #else
         const int N = 2;
@@ -447,34 +546,11 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
 }
 
 #define JL_SMALL_BYTE_ALIGNMENT 16
-#define JL_CACHE_BYTE_ALIGNMENT 64
 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
 #define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
 static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
-STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    jl_value_t *v;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        v = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typeof(v, ty);
-    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
-    return v;
-}
-
 /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a
  * gc frame, until it has been fully initialized. An uninitialized value in a
  * gc frame can crash upon encountering the first safepoint. By delaying use of
@@ -496,7 +572,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty);
 // defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it
 const extern uint64_t _jl_buff_tag[3];
 #define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16))
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void);
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT;
 
 typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer
 STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
@@ -504,17 +580,6 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
     return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
-{
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
-                                                 sizeof(void*) * 2 : 16));
-    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
-                                                              sizeof(void*) % align);
-    uintptr_t tag = (uintptr_t)ty;
-    o->header = tag | GC_OLD_MARKED;
-    return jl_valueof(o);
-}
 jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
 jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
@@ -550,36 +615,14 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
 #endif
 #endif
 
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz);
-JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
-
-JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void);
-
-
-JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
+size_t memory_block_usable_size(void *mem, int isaligned) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
 void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
 
-void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;
-
-STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
-{
-    jl_gc_wb(bnd, val);
-}
-
-STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
-{
-    // if parent is marked and buf is not
-    if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) {
-        jl_task_t *ct = jl_current_task;
-        gc_setmark_buf(ct->ptls, bufptr, 3, minsz);
-    }
-}
-
 void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;
 void jl_print_gc_stats(JL_STREAM *s);
@@ -603,41 +646,83 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
 // -- helper types -- //
 
 typedef struct {
-    uint8_t inferred:1;
-    uint8_t propagate_inbounds:1;
-    uint8_t has_fcall:1;
-    uint8_t nospecializeinfer:1;
-    uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
-    uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t propagate_inbounds:1;
+    uint16_t has_fcall:1;
+    uint16_t nospecializeinfer:1;
+    uint16_t isva:1;
+    uint16_t nargsmatchesmethod:1;
+    uint16_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+    uint16_t has_ssaflags:1;
 } jl_code_info_flags_bitfield_t;
 
 typedef union {
     jl_code_info_flags_bitfield_t bits;
-    uint8_t packed;
+    uint16_t packed;
 } jl_code_info_flags_t;
 
 // -- functions -- //
 
-JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
+// Also defined in typeinfer.jl - See documentation there.
+#define SOURCE_MODE_NOT_REQUIRED            0x0
+#define SOURCE_MODE_ABI                     0x1
+
+JL_DLLEXPORT jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner);
+JL_DLLEXPORT void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src);
+void jl_engine_sweep(jl_ptls_t *gc_all_tls_states) JL_NOTSAFEPOINT;
+int jl_engine_hasreserved(jl_method_instance_t *m, jl_value_t *owner) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_code_instance_t *jl_type_infer(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t world, uint8_t source_mode);
+JL_DLLEXPORT jl_code_info_t *jl_gdbcodetyped1(jl_method_instance_t *mi, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
-        size_t min_world, size_t max_world);
-jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+        size_t min_world, size_t max_world, jl_debuginfo_t *di, jl_svec_t *edges);
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile);
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);
+JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
+
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
+        jl_method_instance_t *mi, jl_value_t *owner,
+        jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t effects, jl_value_t *analysis_results,
+        jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/);
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT;
+
+STATIC_INLINE jl_method_instance_t *jl_get_ci_mi(jl_code_instance_t *ci JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    jl_value_t *def = ci->def;
+    if (jl_is_abioverride(def))
+        return ((jl_abi_override_t*)def)->def;
+    assert(jl_is_method_instance(def));
+    return (jl_method_instance_t*)def;
+}
+
+JL_DLLEXPORT const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def) JL_NOTSAFEPOINT;
+JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT int jl_is_compiled_codeinst(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
+JL_DLLEXPORT void jl_compile_method_sig(jl_method_t *m, jl_value_t *types, jl_svec_t *sparams, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
+jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
 jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ast);
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
-JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
-                                           int binding_effects);
+JL_DLLEXPORT void jl_resolve_definition_effects_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+                                                      int binding_effects);
 
-int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT;
-int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller);
-void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller);
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_code_instance_t **caller) JL_NOTSAFEPOINT;
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller);
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_code_instance_t *caller);
 
 JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root);
 void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots);
@@ -649,7 +734,7 @@ int jl_valid_type_param(jl_value_t *v);
 
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
+void JL_NORETURN jl_method_error(jl_value_t *F, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -657,6 +742,7 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
 #define JL_CALLABLE(name)                                               \
     JL_DLLEXPORT jl_value_t *name(jl_value_t *F, jl_value_t **args, uint32_t nargs)
 
+JL_CALLABLE(jl_f_svec);
 JL_CALLABLE(jl_f_tuple);
 JL_CALLABLE(jl_f_intrinsic_call);
 JL_CALLABLE(jl_f_opaque_closure_call);
@@ -664,7 +750,7 @@ void jl_install_default_signal_handlers(void);
 void restore_signals(void);
 void jl_install_thread_signal_handler(jl_ptls_t ptls);
 
-JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
+JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_datatype_t *dt);
 
 extern uv_loop_t *jl_io_loop;
 JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream);
@@ -678,14 +764,23 @@ typedef struct jl_typeenv_t {
 int jl_tuple_isa(jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 int jl_tuple1_isa(jl_value_t *child1, jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 
+enum atomic_kind {
+    isatomic_none = 0,
+    isatomic_object = 1,
+    isatomic_field = 2
+};
+
 JL_DLLEXPORT int jl_has_intersect_type_not_kind(jl_value_t *t);
 int jl_subtype_invariant(jl_value_t *a, jl_value_t *b, int ta);
 int jl_has_concrete_subtype(jl_value_t *typ);
 jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf);
 jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1 JL_PROPAGATES_ROOT, jl_value_t **args, size_t nargs, int leaf);
 JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype);
+void jl_method_table_activate(jl_methtable_t *mt, jl_typemap_entry_t *newentry);
+jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype);
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
 int jl_obviously_unequal(jl_value_t *a, jl_value_t *b);
+int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v);
 int jl_has_fixed_layout(jl_datatype_t *t);
 JL_DLLEXPORT int jl_struct_try_layout(jl_datatype_t *dt);
@@ -694,16 +789,18 @@ jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
 int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
-JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT;
 // specificity comparison assuming !(a <: b) and !(b <: a)
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
+jl_value_t *jl_substitute_var_nothrow(jl_value_t *t, jl_tvar_t *var, jl_value_t *val, int nothrow);
 jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
+jl_value_t* jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y);
 int jl_count_union_components(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
@@ -712,7 +809,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
 JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check);
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
@@ -721,6 +818,13 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs,
 jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
 jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic);
 jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic);
+int set_nth_fieldonce(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
+jl_value_t *swap_bits(jl_value_t *ty, char *v, uint8_t *psel, jl_value_t *parent, jl_value_t *rhs, enum atomic_kind isatomic);
+jl_value_t *replace_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name);
+jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *expected, jl_value_t *rhs, enum atomic_kind isatomic);
+jl_value_t *modify_value(jl_value_t *ty, _Atomic(jl_value_t*) *p, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, int isatomic, jl_module_t *mod, jl_sym_t *name);
+jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_t *parent, jl_value_t *op, jl_value_t *rhs, enum atomic_kind isatomic);
+int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *owner, jl_value_t *rhs, enum atomic_kind isatomic);
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n);
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
 jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
@@ -732,8 +836,33 @@ jl_array_t *jl_get_loaded_modules(void);
 JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val);
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty);
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
+JL_DLLEXPORT void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type);
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno);
+
+STATIC_INLINE struct _jl_module_using *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_module_t *module_usings_getmod(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+
+#ifndef __clang_gcanalyzer__
+// The analyzer doesn't like looking through the arraylist, so just model the
+// access for it using this function
+STATIC_INLINE struct _jl_module_using *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
+    return (struct _jl_module_using *)&(m->usings.items[3*i]);
+}
+STATIC_INLINE jl_module_t *module_usings_getmod(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
+    return module_usings_getidx(m, i)->mod;
+}
+#endif
+
+STATIC_INLINE size_t module_usings_length(jl_module_t *m) JL_NOTSAFEPOINT {
+    return m->usings.len/3;
+}
+
+STATIC_INLINE size_t module_usings_max(jl_module_t *m) JL_NOTSAFEPOINT {
+    return m->usings.max/3;
+}
 
 jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e);
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *clos, jl_value_t **args, size_t nargs);
@@ -745,7 +874,8 @@ JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
 jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
                                        jl_module_t *inmodule, const char *file, int line);
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
+JL_DLLEXPORT jl_value_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt);
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
 
 jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
@@ -767,22 +897,157 @@ JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
 JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
 JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 void jl_compute_field_offsets(jl_datatype_t *st);
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
 JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
 JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *sym, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
-extern jl_array_t *jl_global_roots_table JL_GLOBALLY_ROOTED;
+extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED;
+extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED;
+extern arraylist_t *jl_entrypoint_mis;
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED;
+extern jl_svec_t *precompile_field_replace JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT void jl_set_precompile_field_replace(jl_value_t *val, jl_value_t *field, jl_value_t *newval) JL_GLOBALLY_ROOTED;
 
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_value_t *source,  jl_value_t **env, size_t nenv, int do_compile);
+jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred);
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
+EXTERN_INLINE_DECLARE enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    uint8_t bits = (pku & 0x7);
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+
+    if (val == NULL) {
+        if (bits == BINDING_KIND_IMPLICIT) {
+            return BINDING_KIND_GUARD;
+        }
+        if (bits == BINDING_KIND_CONST) {
+            return BINDING_KIND_UNDEF_CONST;
+        }
+    }
+
+    return (enum jl_partition_kind)bits;
+#else
+    return (enum jl_partition_kind)pku.kind;
+#endif
+}
+
+STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t JL_PROPAGATES_ROOT pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+    return val;
+#else
+    return pku.val;
+#endif
+}
+
+STATIC_INLINE jl_ptr_kind_union_t encode_restriction(jl_value_t *val, enum jl_partition_kind kind) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    if (kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED || kind == BINDING_KIND_FAILED || kind == BINDING_KIND_UNDEF_CONST)
+        assert(val == NULL);
+    else if (kind == BINDING_KIND_IMPLICIT || kind == BINDING_KIND_CONST)
+        assert(val != NULL);
+    if (kind == BINDING_KIND_GUARD)
+        kind = BINDING_KIND_IMPLICIT;
+    else if (kind == BINDING_KIND_UNDEF_CONST)
+        kind = BINDING_KIND_CONST;
+    assert((((uintptr_t)val) & 0x7) == 0);
+    return ((jl_ptr_kind_union_t)val) | kind;
+#else
+    jl_ptr_kind_union_t ret = { val, kind };
+    return ret;
+#endif
+}
+
+STATIC_INLINE int jl_bkind_is_some_import(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_IMPLICIT || kind == BINDING_KIND_EXPLICIT || kind == BINDING_KIND_IMPORTED;
+}
+
+STATIC_INLINE int jl_bkind_is_some_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT || kind == BINDING_KIND_UNDEF_CONST;
+}
+
+STATIC_INLINE int jl_bkind_is_defined_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT;
+}
+
+STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_FAILED || kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b JL_PROPAGATES_ROOT, size_t world);
+JL_DLLEXPORT jl_binding_partition_t *jl_get_binding_partition_all(jl_binding_t *b JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+
+EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT {
+    return decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+}
+
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace_all(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
+
+#ifndef __clang_analyzer__
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT
+{
+    while (1) {
+        if (!*bpart)
+            return encode_restriction(NULL, BINDING_KIND_GUARD);
+        jl_ptr_kind_union_t pku = jl_atomic_load_acquire(&(*bpart)->restriction);
+        if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+            return pku;
+        *bnd = (jl_binding_t*)decode_restriction_value(pku);
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace_all(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t min_world, size_t max_world) JL_NOTSAFEPOINT
+{
+    while (1) {
+        if (!*bpart)
+            return encode_restriction(NULL, BINDING_KIND_GUARD);
+        jl_ptr_kind_union_t pku = jl_atomic_load_acquire(&(*bpart)->restriction);
+        if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+            return pku;
+        *bnd = (jl_binding_t*)decode_restriction_value(pku);
+        *bpart = jl_get_binding_partition_all(*bnd, min_world, max_world);
+    }
+}
+#endif
+
+STATIC_INLINE int is10digit(char c) JL_NOTSAFEPOINT
+{
+    return (c >= '0' && c <= '9');
+}
+
+STATIC_INLINE int is_anonfn_typename(char *name)
+{
+    if (name[0] != '#' || name[1] == '#')
+        return 0;
+    char *other = strrchr(name, '#');
+    return other > &name[1] && is10digit(other[1]);
+}
+
+// Returns true for typenames of anounymous functions that have been canonicalized (i.e.
+// we mangled the name of the outermost enclosing function in their name).
+STATIC_INLINE int is_canonicalized_anonfn_typename(char *name) JL_NOTSAFEPOINT
+{
+    char *delim = strchr(&name[1], '#');
+    if (delim == NULL)
+        return 0;
+    if (delim[1] != '#')
+        return 0;
+    if (!is10digit(delim[2]))
+        return 0;
+    return 1;
+}
+
 // Each tuple can exist in one of 4 Vararg states:
 //   NONE: no vararg                            Tuple{Int,Float32}
 //   INT: vararg with integer length            Tuple{Int,Vararg{Float32,2}}
@@ -862,9 +1127,7 @@ void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo) JL_NOTSAFEPOINT;
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
-void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 void jl_init_int32_int64_cache(void);
 JL_DLLEXPORT void jl_init_options(void);
 
@@ -876,11 +1139,12 @@ void jl_init_threading(void);
 void jl_start_threads(void);
 
 // Whether the GC is running
+extern uv_mutex_t safepoint_lock;
 extern char *jl_safepoint_pages;
 STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
 {
     uintptr_t safepoint_addr = (uintptr_t)jl_safepoint_pages;
-    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
+    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4;
 }
 extern _Atomic(uint32_t) jl_gc_running;
 extern _Atomic(uint32_t) jl_gc_disable_counter;
@@ -896,7 +1160,7 @@ void jl_safepoint_init(void);
 // before calling this function. If the calling thread is to run the GC,
 // it should also wait for the mutator threads to hit a safepoint **AFTER**
 // this function returns
-int jl_safepoint_start_gc(void);
+int jl_safepoint_start_gc(jl_task_t *ct);
 // Can only be called by the thread that have got a `1` return value from
 // `jl_safepoint_start_gc()`. This disables the safepoint (for GC,
 // the `mprotect` may not be removed if there's pending SIGINT) and wake
@@ -906,8 +1170,9 @@ void jl_safepoint_end_gc(void);
 // Wait for the GC to finish
 // This function does **NOT** modify the `gc_state` to inform the GC thread
 // The caller should set it **BEFORE** calling this function.
-void jl_safepoint_wait_gc(void);
-
+void jl_safepoint_wait_gc(jl_task_t *ct) JL_NOTSAFEPOINT;
+void jl_safepoint_wait_thread_resume(jl_task_t *ct) JL_NOTSAFEPOINT;
+int8_t jl_safepoint_take_sleep_lock(jl_ptls_t ptls) JL_NOTSAFEPOINT_ENTER;
 // Set pending sigint and enable the mechanisms to deliver the sigint.
 void jl_safepoint_enable_sigint(void);
 // If the safepoint is enabled to deliver sigint, disable it
@@ -921,9 +1186,7 @@ int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void) JL_NOTSAFEPOINT;
 
 void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
-#if defined(_OS_DARWIN_)
-typedef pthread_key_t jl_pgcstack_key_t;
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
@@ -934,18 +1197,7 @@ JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t
 extern pthread_mutex_t in_signal_lock;
 #endif
 
-#if !defined(__clang_gcanalyzer__) && !defined(_OS_DARWIN_)
-static inline void jl_set_gc_and_wait(void)
-{
-    jl_task_t *ct = jl_current_task;
-    // reading own gc state doesn't need atomic ops since no one else
-    // should store to it.
-    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
-    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
-    jl_safepoint_wait_gc();
-    jl_atomic_store_release(&ct->ptls->gc_state, state);
-}
-#endif
+void jl_set_gc_and_wait(jl_task_t *ct); // n.b. not used on _OS_DARWIN_
 
 // Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image)
 STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
@@ -955,30 +1207,44 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
 
 size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
 
-uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
+// Query if this object is perm-allocated in an image.
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz);
-_Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz);
+_Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module);
-JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
+JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, int mt_cache);
 jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
-JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_value_t *owner, jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_value_t *jl_rettype_inferred_native(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_value_t *type, size_t world);
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
 jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller);
-JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_code_instance_t *caller);
+JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_code_instance_t *caller);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT int jl_mi_try_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                   jl_code_instance_t *expected_ci,
+                                   jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src);
 JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void);
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void);
+
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void);
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void);
+
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
@@ -1097,7 +1363,7 @@ typedef struct {
     char *func_name;
     char *file_name;
     int line;
-    jl_method_instance_t *linfo;
+    jl_code_instance_t *ci;
     int fromC;
     int inlined;
 } jl_frame_t;
@@ -1153,6 +1419,9 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
 #ifdef _OS_WINDOWS_
 JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
 #endif
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) JL_NOTSAFEPOINT;
+void jl_thread_resume(int tid) JL_NOTSAFEPOINT;
+
 // *to is NULL or malloc'd pointer, from is allowed to be NULL
 STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
 {
@@ -1209,23 +1478,47 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA
     return itr-2 - jl_excstack_bt_size(stack, itr);
 }
 // Exception stack manipulation
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size);
 
+// System util to get maximum RSS
+JL_DLLEXPORT size_t jl_maxrss(void);
+
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
-STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
-{
-    *unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
-}
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT
+
+//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99
+// for better performance, it does however require making users expect a 32bit random number.
+
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max)
 {
-    while ((*seed = 69069 * (*seed) + 362437) > unbias)
-        ;
-    return *seed % max;
+    if (max < 2)
+        return 0;
+    uint64_t mask = ~(uint64_t)0;
+    int zeros = __builtin_clzll(max);
+    int bits = CHAR_BIT * sizeof(uint64_t) - zeros;
+    mask = mask >> zeros;
+    do {
+        uint64_t value = 69069 * (*seed) + 362437;
+        *seed = value;
+        uint64_t x = value & mask;
+        if (x < max) {
+            return x;
+        }
+        int bits_left = zeros;
+        while (bits_left >= bits) {
+            value >>= bits;
+            x = value & mask;
+            if (x < max) {
+                return x;
+            }
+            bits_left -= bits;
+        }
+    } while (1);
 }
+
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
@@ -1270,7 +1563,7 @@ JL_DLLEXPORT unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
 STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
 {
-    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
+    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout) && !jl_is_array_type(ety));
 }
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
@@ -1301,6 +1594,8 @@ JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_mul_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_div_float(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_min_float(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT jl_value_t *jl_max_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fma_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 JL_DLLEXPORT jl_value_t *jl_muladd_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 
@@ -1361,31 +1656,38 @@ JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
 
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
 JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary);
 
-JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 
 // -- synchronization utilities -- //
 
 extern jl_mutex_t typecache_lock;
-extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 
 #if defined(__APPLE__)
-void jl_mach_gc_end(void);
+void jl_mach_gc_end(void) JL_NOTSAFEPOINT;
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2) JL_NOTSAFEPOINT;
 #endif
 
 // -- smallintset.c -- //
 
-typedef uint_t (*smallintset_hash)(size_t val, jl_svec_t *data);
-typedef int (*smallintset_eq)(size_t val, const void *key, jl_svec_t *data, uint_t hv);
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv);
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
+typedef uint_t (*smallintset_hash)(size_t val, jl_value_t *data);
+typedef int (*smallintset_eq)(size_t val, const void *key, jl_value_t *data, uint_t hv);
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop);
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data);
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np);
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz);
+JL_DLLEXPORT ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_idset_get(jl_genericmemory_t *keys JL_PROPAGATES_ROOT, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx);
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx);
+JL_DLLEXPORT ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
 
 // -- typemap.c -- //
 
@@ -1401,8 +1703,6 @@ struct jl_typemap_assoc {
     size_t const world;
     // outputs
     jl_svec_t *env; // subtype env (initialize to null to perform intersection without an environment)
-    size_t min_valid;
-    size_t max_valid;
 };
 
 jl_typemap_entry_t *jl_typemap_assoc_by_type(
@@ -1450,23 +1750,20 @@ void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_en
 
 // -- simplevector.c -- //
 
-// For codegen only.
-JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
-
 // check whether the specified number of arguments is compatible with the
 // specified number of parameters of the tuple type
 JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0);
 JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0);
+JL_DLLEXPORT jl_value_t *jl_argtype_without_function(jl_value_t *ftypes);
 
 JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
-void register_eh_frames(uint8_t *Addr, size_t Size);
-void deregister_eh_frames(uint8_t *Addr, size_t Size);
+void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
+void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
 
-STATIC_INLINE void *jl_get_frame_addr(void)
+STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
 {
 #ifdef __GNUC__
     return __builtin_frame_address(0);
@@ -1478,8 +1775,6 @@ STATIC_INLINE void *jl_get_frame_addr(void)
 #endif
 }
 
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a);
-
 // Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
 // julia side. If any of module, group, id, file or line are NULL, these will
 // be passed to the julia side as `nothing`.  If `kwargs` is NULL an empty set
@@ -1498,6 +1793,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_top_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_module_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_slot_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_export_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_public_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_import_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_quote_sym;
@@ -1530,6 +1826,8 @@ extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_globaldecl_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_local_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
@@ -1568,6 +1866,8 @@ extern JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_eval_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_include_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_atom_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_statement_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_all_sym;
@@ -1583,13 +1883,15 @@ extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_uninferred_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_latestworld_sym;
 
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
 
 struct _jl_image_fptrs_t;
 
-void jl_write_coverage_data(const char*);
+JL_DLLEXPORT void jl_write_coverage_data(const char*);
 void jl_write_malloc_log(void);
 
 #if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_)
@@ -1598,6 +1900,7 @@ void jl_write_malloc_log(void);
 #  define jl_unreachable() ((void)jl_assume(0))
 #endif
 
+extern uv_mutex_t symtab_lock;
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 
 // Tools for locally disabling spurious compiler warnings
@@ -1630,27 +1933,34 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
   #define JL_GC_ASSERT_LIVE(x) (void)(x)
 #endif
 
-JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT;
-//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT;
+#ifdef _OS_WINDOWS_
+// On Windows, weak symbols do not default to 0 due to a GCC bug
+// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90826), use symbol
+// aliases with a known value instead.
+#define JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(sym) __attribute__((weak,alias(#sym)))
+#define JL_WEAK_SYMBOL_DEFAULT(sym) &sym
+#else
+#define JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(sym) __attribute__((weak))
+#define JL_WEAK_SYMBOL_DEFAULT(sym) NULL
+#endif
+
+//JL_DLLEXPORT float julia__gnu_h2f_ieee(half param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT half julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT half julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT float julia__truncsfbf2(float param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT float julia__truncdfbf2(double param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT double julia__extendhfdf2(half n) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len);
 
 // -- exports from codegen -- //
 
-JL_DLLIMPORT jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+#define IR_FLAG_INBOUNDS 0x01
+
 JL_DLLIMPORT void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
-JL_DLLIMPORT void jl_generate_fptr_for_oc_wrapper(jl_code_instance_t *unspec);
+JL_DLLIMPORT int jl_compile_codeinst(jl_code_instance_t *unspec);
 JL_DLLIMPORT int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+JL_DLLIMPORT void jl_emit_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
 
 typedef struct {
     LLVMOrcThreadSafeModuleRef TSM;
@@ -1659,22 +1969,26 @@ typedef struct {
 
 JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
         char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
+JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params);
 JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
 JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw);
 
-JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world);
+typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
+JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, int trim, int cache, size_t world);
+JL_DLLIMPORT void *jl_emit_native(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _external_linkage);
 JL_DLLIMPORT void jl_dump_native(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
-        ios_t *z, ios_t *s);
-JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs);
-JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs);
+        ios_t *z, ios_t *s, jl_emission_params_t *params);
+JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, size_t *num_els, void **gvs);
+JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, size_t *num_els,
+                                           jl_code_instance_t *gvs);
 JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
         int32_t *func_idx, int32_t *specfunc_idx);
 JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
                                     jl_method_instance_t **linfos, size_t n);
-
+JL_DLLIMPORT void jl_get_llvm_mis(void *native_code, size_t *num_els,
+                                  jl_method_instance_t *MIs);
 JL_DLLIMPORT void jl_init_codegen(void);
 JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
 JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
diff --git a/src/julia_locks.h b/src/julia_locks.h
index 47e258f69aab2..92d67b34b1692 100644
--- a/src/julia_locks.h
+++ b/src/julia_locks.h
@@ -96,8 +96,41 @@ static inline void jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEP
 #define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
 #define JL_UNLOCK_NOGC(m) jl_mutex_unlock_nogc(m)
 
+JL_DLLEXPORT void jl_lock_value(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_unlock_value(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_lock_field(jl_mutex_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_unlock_field(jl_mutex_t *v) JL_NOTSAFEPOINT;
+
 #ifdef __cplusplus
 }
+
+#include <mutex>
+#include <condition_variable>
+// simple C++ shim around a std::unique_lock + gc-safe + disabled finalizers region
+// since we nearly always want that combination together
+class jl_unique_gcsafe_lock {
+public:
+    int8_t gc_state;
+    std::unique_lock<std::mutex> native;
+    explicit jl_unique_gcsafe_lock(std::mutex &native) JL_NOTSAFEPOINT_ENTER
+    {
+        jl_task_t *ct = jl_current_task;
+        gc_state = jl_gc_safe_enter(ct->ptls); // contains jl_gc_safepoint after enter
+        this->native = std::unique_lock<std::mutex>(native);
+        ct->ptls->engine_nqueued++; // disables finalizers until inference is finished on this method graph
+    }
+    jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &&native) = delete;
+    jl_unique_gcsafe_lock(jl_unique_gcsafe_lock &native) = delete;
+    ~jl_unique_gcsafe_lock() JL_NOTSAFEPOINT_LEAVE {
+        jl_task_t *ct = jl_current_task;
+        native.unlock();
+        jl_gc_safe_leave(ct->ptls, gc_state); // contains jl_gc_safepoint after leave
+        ct->ptls->engine_nqueued--; // enable finalizers (but don't run them until the next gc)
+    }
+    void wait(std::condition_variable& cond) JL_NOTSAFEPOINT {
+        cond.wait(native);
+    }
+};
 #endif
 
 #endif
diff --git a/src/julia_threads.h b/src/julia_threads.h
index f4c235243e684..b6ef65dc7fe52 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,7 +4,12 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
-#include "work-stealing-queue.h"
+#ifndef MMTK_GC
+#include "gc-tls-stock.h"
+#else
+#include "gc-tls-mmtk.h"
+#endif
+#include "gc-tls-common.h"
 #include "julia_atomics.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
@@ -18,6 +23,8 @@ extern "C" {
 
 JL_DLLEXPORT int16_t jl_threadid(void);
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT;
 
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
@@ -30,25 +37,31 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
 // JL_HAVE_ASM -- mostly setjmp
 // JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
 // JL_HAVE_UNW_CONTEXT -- libunwind-based
-// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
 // JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
-// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
 
 #ifdef _OS_WINDOWS_
 #define JL_HAVE_UCONTEXT
 typedef win32_ucontext_t jl_stack_context_t;
 typedef jl_stack_context_t _jl_ucontext_t;
+
+#elif defined(_OS_OPENBSD_)
+#define JL_HAVE_UNW_CONTEXT
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+typedef unw_context_t _jl_ucontext_t;
+typedef struct {
+    jl_jmp_buf uc_mcontext;
+} jl_stack_context_t;
+
 #else
 typedef struct {
     jl_jmp_buf uc_mcontext;
 } jl_stack_context_t;
 #if !defined(JL_HAVE_UCONTEXT) && \
     !defined(JL_HAVE_ASM) && \
-    !defined(JL_HAVE_UNW_CONTEXT) && \
-    !defined(JL_HAVE_SIGALTSTACK) && \
-    !defined(JL_HAVE_ASYNCIFY)
+    !defined(JL_HAVE_UNW_CONTEXT)
 #if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) ||  \
-     defined(_CPU_ARM_) || defined(_CPU_PPC64_))
+     defined(_CPU_ARM_) || defined(_CPU_PPC64_) || defined(_CPU_RISCV64_))
 #define JL_HAVE_ASM
 #endif
 #if 0
@@ -57,29 +70,14 @@ typedef struct {
 //#define JL_HAVE_UNW_CONTEXT
 //#elif defined(_OS_LINUX_)
 //#define JL_HAVE_UNW_CONTEXT
-#elif defined(_OS_EMSCRIPTEN_)
-#define JL_HAVE_ASYNCIFY
 #elif !defined(JL_HAVE_ASM)
 #define JL_HAVE_UNW_CONTEXT // optimistically?
 #endif
 #endif
 
-#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
+#if !defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)
 typedef jl_stack_context_t _jl_ucontext_t;
 #endif
-#if defined(JL_HAVE_ASYNCIFY)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN not currently supported with asyncify
-#endif
-typedef struct {
-    // This is the extent of the asyncify stack, but because the top of the
-    // asyncify stack (stacktop) is also the bottom of the C stack, we can
-    // reuse stacktop for both. N.B.: This matches the layout of the
-    // __asyncify_data struct.
-    void *stackbottom;
-    void *stacktop;
-} _jl_ucontext_t;
-#endif
 #pragma GCC visibility push(default)
 #if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
@@ -95,9 +93,13 @@ typedef ucontext_t _jl_ucontext_t;
 
 typedef struct {
     union {
-        _jl_ucontext_t ctx;
-        jl_stack_context_t copy_ctx;
+        _jl_ucontext_t *ctx;
+        jl_stack_context_t *copy_ctx;
     };
+    void *stkbuf; // malloc'd memory (either copybuf or stack)
+    size_t bufsz; // actual sizeof stkbuf
+    unsigned int copy_stack:31; // sizeof stack for copybuf
+    unsigned int started:1;
 #if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
@@ -109,7 +111,7 @@ typedef struct {
 
 // handle to reference an OS thread
 #ifdef _OS_WINDOWS_
-typedef DWORD jl_thread_t;
+typedef HANDLE jl_thread_t;
 #else
 typedef pthread_t jl_thread_t;
 #endif
@@ -122,83 +124,7 @@ typedef struct {
     uint32_t count;
 } jl_mutex_t;
 
-typedef struct {
-    jl_taggedvalue_t *freelist;   // root of list of free objects
-    jl_taggedvalue_t *newpages;   // root of list of chunks of free objects
-    uint16_t osize;      // size of objects in this pool
-} jl_gc_pool_t;
-
-typedef struct {
-    _Atomic(int64_t) allocd;
-    _Atomic(int64_t) freed;
-    _Atomic(uint64_t) malloc;
-    _Atomic(uint64_t) realloc;
-    _Atomic(uint64_t) poolalloc;
-    _Atomic(uint64_t) bigalloc;
-    _Atomic(uint64_t) freecall;
-} jl_thread_gc_num_t;
-
-typedef struct {
-    // variable for tracking weak references
-    arraylist_t weak_refs;
-    // live tasks started on this thread
-    // that are holding onto a stack from the pool
-    arraylist_t live_tasks;
-
-    // variables for tracking malloc'd arrays
-    struct _mallocarray_t *mallocarrays;
-    struct _mallocarray_t *mafreelist;
-
-    // variables for tracking big objects
-    struct _bigval_t *big_objects;
-
-    // variables for tracking "remembered set"
-    arraylist_t _remset[2]; // contains jl_value_t*
-    // lower bound of the number of pointers inside remembered values
-    int remset_nptr;
-    arraylist_t *remset;
-    arraylist_t *last_remset;
-
-    // variables for allocating objects from pools
-#ifdef _P64
-#  define JL_GC_N_POOLS 49
-#elif MAX_ALIGN == 8
-#  define JL_GC_N_POOLS 50
-#else
-#  define JL_GC_N_POOLS 51
-#endif
-    jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
-
-#define JL_N_STACK_POOLS 16
-    arraylist_t free_stacks[JL_N_STACK_POOLS];
-} jl_thread_heap_t;
-
-typedef struct {
-    ws_queue_t chunk_queue;
-    ws_queue_t ptr_queue;
-    arraylist_t reclaim_set;
-} jl_gc_markqueue_t;
-
-typedef struct {
-    // thread local increment of `perm_scanned_bytes`
-    size_t perm_scanned_bytes;
-    // thread local increment of `scanned_bytes`
-    size_t scanned_bytes;
-    // Number of queued big objects (<= 1024)
-    size_t nbig_obj;
-    // Array of queued big objects to be moved between the young list
-    // and the old list.
-    // A set low bit means that the object should be moved from the old list
-    // to the young list (`mark_reset_age`).
-    // Objects can only be put into this list when the mark bit is flipped to
-    // `1` (atomically). Combining with the sync after marking,
-    // this makes sure that a single objects can only appear once in
-    // the lists (the mark bit cannot be flipped to `0` without sweeping)
-    void *big_obj[1024];
-} jl_gc_mark_cache_t;
-
 struct _jl_bt_element_t;
-struct _jl_gc_pagemeta_t;
 
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
@@ -207,40 +133,44 @@ typedef struct _jl_tls_states_t {
     int16_t tid;
     int8_t threadpoolid;
     uint64_t rngseed;
-    volatile size_t *safepoint;
+    _Atomic(volatile size_t *) safepoint; // may be changed to the suspend page by any thread
     _Atomic(int8_t) sleep_check_state; // read/write from foreign threads
     // Whether it is safe to execute GC at the same time.
+#define JL_GC_STATE_UNSAFE 0
+    // gc_state = 0 means the thread is running Julia code and is not
+    //              safe to run concurrently to the GC
 #define JL_GC_STATE_WAITING 1
     // gc_state = 1 means the thread is doing GC or is waiting for the GC to
     //              finish.
 #define JL_GC_STATE_SAFE 2
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
+#define JL_GC_PARALLEL_COLLECTOR_THREAD 3
+    // gc_state = 3 means the thread is a parallel collector thread (i.e. never runs Julia code)
+#define JL_GC_CONCURRENT_COLLECTOR_THREAD 4
+    // gc_state = 4 means the thread is a concurrent collector thread (background sweeper thread that never runs Julia code)
     _Atomic(int8_t) gc_state; // read from foreign threads
     // execution of certain certain impure
     // statements is prohibited from certain
     // callbacks (such as generated functions)
     // as it may make compilation undecidable
-    int8_t in_pure_callback;
-    int8_t in_finalizer;
-    int8_t disable_gc;
+    int16_t in_pure_callback;
+    int16_t in_finalizer;
+    int16_t disable_gc;
     // Counter to disable finalizer **on the current thread**
     int finalizers_inhibited;
-    jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
-    jl_thread_gc_num_t gc_num;
+    jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen
+    jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs
+    small_arraylist_t lazily_freed_mtarraylist_buffers;
     volatile sig_atomic_t defer_signal;
     _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
     struct _jl_task_t *previous_task;
     struct _jl_task_t *root_task;
     struct _jl_timing_block_t *timing_stack;
+    // This is the location of our copy_stack
     void *stackbase;
     size_t stacksize;
-    union {
-        _jl_ucontext_t base_ctx; // base context of stack
-        // This hack is needed to support always_copy_stacks:
-        jl_stack_context_t copy_stack_ctx;
-    };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
@@ -258,20 +188,21 @@ typedef struct _jl_tls_states_t {
     int needs_resetstkoflw;
 #else
     void *signal_stack;
+    size_t signal_stack_size;
 #endif
     jl_thread_t system_id;
+    _Atomic(int16_t) suspend_count;
     arraylist_t finalizers;
-    struct _jl_gc_pagemeta_t *page_metadata_allocd;
-    struct _jl_gc_pagemeta_t *page_metadata_lazily_freed;
-    jl_gc_markqueue_t mark_queue;
-    jl_gc_mark_cache_t gc_cache;
-    arraylist_t sweep_objs;
     // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
+#ifdef _OS_DARWIN_
+    jl_jmp_buf *volatile safe_restore;
+#endif
 
     // currently-held locks, to be released when an exception is thrown
     small_arraylist_t locks;
+    size_t engine_nqueued;
 
     JULIA_DEBUG_SLEEPWAKE(
         uint64_t uv_run_enter;
@@ -287,10 +218,7 @@ typedef struct _jl_tls_states_t {
 #endif
 } jl_tls_states_t;
 
-#ifndef JL_LIBRARY_EXPORTS
-// deprecated (only for external consumers)
 JL_DLLEXPORT void *jl_get_ptls_states(void);
-#endif
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
@@ -328,26 +256,26 @@ void jl_sigint_safepoint(jl_ptls_t tls);
 // This triggers a SegFault when we are in GC
 // Assign it to a variable to make sure the compiler emit the load
 // and to avoid Clang warning for -Wunused-volatile-lvalue
-#define jl_gc_safepoint_(ptls) do {                     \
-        jl_signal_fence();                              \
-        size_t safepoint_load = *ptls->safepoint;       \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_gc_safepoint_(ptls) do {                                            \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[0];   \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
-#define jl_sigint_safepoint(ptls) do {                  \
-        jl_signal_fence();                              \
-        size_t safepoint_load = ptls->safepoint[-1];    \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_sigint_safepoint(ptls) do {                                         \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[-1];  \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
 #endif
 STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
                                      int8_t old_state)
 {
+    assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD);
+    assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
     jl_atomic_store_release(&ptls->gc_state, state);
-    // A safe point is required if we transition from GC-safe region to
-    // non GC-safe region.
-    if (old_state && !state)
+    if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
         jl_gc_safepoint_(ptls);
     return old_state;
 }
@@ -357,26 +285,31 @@ STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
     return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
 #ifdef __clang_gcanalyzer__
-int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; // this could be a safepoint, but we will assume it is not
-void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
-void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE; // this might not be a safepoint, but we have to assume it could be (statically)
+// these might not be a safepoint (if they are no-op safe=>safe transitions), but we have to assume it could be (statically)
+// however mark a delineated region in which safepoints would be not permissible
+int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_LEAVE;
+void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_ENTER;
+int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT_ENTER;
+void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE;
 #else
-#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, 0)
-#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), 0))
+#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_UNSAFE)
+#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_UNSAFE))
 #define jl_gc_safe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_SAFE)
 #define jl_gc_safe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_SAFE))
 #endif
 
 JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
 extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void);
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize);
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index d24c08b4b4930..194c6837860ca 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
     memop.isobjref = hasObjref(elty);
     auto &field = getField(offset, size, elty);
+    field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
+
     if (field.second.hasobjref != memop.isobjref)
         field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
     if (!isstore)
@@ -125,13 +127,23 @@ JL_USED_FUNC void AllocUseInfo::dump(llvm::raw_ostream &OS)
     OS << "hastypeof: " << hastypeof << '\n';
     OS << "refload: " << refload << '\n';
     OS << "refstore: " << refstore << '\n';
+    OS << "allockind:";
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+      OS << " uninitialized";
+    if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+      OS << " zeroed";
+    OS << '\n';
     OS << "Uses: " << uses.size() << '\n';
-    for (auto inst: uses)
+    for (auto inst: uses) {
         inst->print(OS);
+        OS << '\n';
+    }
     if (!preserves.empty()) {
         OS << "Preserves: " << preserves.size() << '\n';
-        for (auto inst: preserves)
+        for (auto inst: preserves) {
             inst->print(OS);
+            OS << '\n';
+        }
     }
     OS << "MemOps: " << memops.size() << '\n';
     for (auto &field: memops) {
@@ -164,8 +176,11 @@ JL_USED_FUNC void AllocUseInfo::dump()
 #define REMARK(remark)
 #endif
 
-void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
+void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
     required.use_info.reset();
+    Attribute allockind = I->getFnAttr(Attribute::AllocKind);
+    if (allockind.isValid())
+        required.use_info.allockind = allockind.getAllocKind();
     if (I->use_empty())
         return;
     CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
@@ -189,6 +204,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 auto elty = inst->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
@@ -233,6 +249,11 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 required.use_info.addrescaped = true;
                 return true;
             }
+            if (required.pass.gc_loaded_func == callee) {
+                // TODO add manual load->store forwarding
+                push_inst(inst);
+                return true;
+            }
             if (required.pass.typeof_func == callee) {
                 required.use_info.hastypeof = true;
                 assert(use->get() == I);
@@ -251,9 +272,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 }
                 LLVM_DEBUG(dbgs() << "Unknown call, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownCall",
                                                     inst)
-                           << "Unknown call, marking escape (" << ore::NV("Call", inst) << ")";
+                           << "Unknown call, marking escape (" << ore::NV("Call", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
@@ -267,9 +291,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
                 LLVM_DEBUG(dbgs() << "Object address is stored somewhere, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
                                                     inst)
-                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
@@ -280,6 +307,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 auto elty = storev->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
@@ -292,19 +320,26 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
                 LLVM_DEBUG(dbgs() << "Object address is cmpxchg/rmw-ed somewhere, marking escape\n");
                 REMARK([&]() {
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    inst->print(rso);
                     return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
                                                     inst)
-                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", StringRef(str)) << ")";
                 });
                 required.use_info.escaped = true;
                 return false;
             }
             required.use_info.hasload = true;
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            Type *elty = storev->getType();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
-                                                               cur.offset, storev->getType(),
+                                                               cur.offset, elty,
                                                                true, required.DL)) {
                 LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             }
             required.use_info.refload = true;
@@ -325,7 +360,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 else {
                     next_offset = apoffset.getLimitedValue();
                     if (next_offset > UINT32_MAX) {
-                        LLVM_DEBUG(dbgs() << "GEP inst exceeeds 32-bit offset\n");
+                        LLVM_DEBUG(dbgs() << "GEP inst exceeds 32-bit offset\n");
                         next_offset = UINT32_MAX;
                     }
                 }
@@ -341,9 +376,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
         }
         LLVM_DEBUG(dbgs() << "Unknown instruction, marking escape\n");
         REMARK([&]() {
+            std::string str;
+            llvm::raw_string_ostream rso(str);
+            inst->print(rso);
             return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownInst",
                                             inst)
-                   << "Unknown instruction, marking escape (" << ore::NV("Inst", inst) << ")";
+                   << "Unknown instruction, marking escape (" << ore::NV("Inst", StringRef(str)) << ")";
         });
         required.use_info.escaped = true;
         return false;
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 3bd80704a0888..20e9132d10b4c 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -46,6 +46,8 @@ namespace jl_alloc {
         bool hasaggr:1;
         bool multiloc:1;
         bool hasload:1;
+        // The alloc has a unboxed object at this offset.
+        bool hasunboxed:1;
         llvm::Type *elty;
         llvm::SmallVector<MemOp,4> accesses;
         Field(uint32_t size, llvm::Type *elty)
@@ -54,6 +56,7 @@ namespace jl_alloc {
               hasaggr(false),
               multiloc(false),
               hasload(false),
+              hasunboxed(false),
               elty(elty)
         {
         }
@@ -87,12 +90,17 @@ namespace jl_alloc {
         bool returned:1;
         // The object is used in an error function
         bool haserror:1;
+        // For checking attributes of "uninitialized" or "zeroed" or unknown
+        llvm::AllocFnKind allockind;
 
         // The alloc has a Julia object reference not in an explicit field.
         bool has_unknown_objref:1;
         // The alloc has an aggregate Julia object reference not in an explicit field.
         bool has_unknown_objrefaggr:1;
 
+        // The alloc has an unboxed object at an unknown offset.
+        bool has_unknown_unboxed:1;
+
         void reset()
         {
             escaped = false;
@@ -105,8 +113,10 @@ namespace jl_alloc {
             hasunknownmem = false;
             returned = false;
             haserror = false;
+            allockind = llvm::AllocFnKind::Unknown;
             has_unknown_objref = false;
             has_unknown_objrefaggr = false;
+            has_unknown_unboxed = false;
             uses.clear();
             preserves.clear();
             memops.clear();
@@ -153,7 +163,7 @@ namespace jl_alloc {
         }
     };
 
-    void runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
+    void runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
 }
 
 
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index b87a5a6799b0b..7dd794a4d8847 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -13,7 +13,6 @@
 #include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/CFG.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -80,6 +79,7 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  *
  * * load
  * * `pointer_from_objref`
+ * * `gc_loaded`
  * * Any real llvm intrinsics
  * * gc preserve intrinsics
  * * `ccall` gcroot array (`jl_roots` operand bundle)
@@ -95,7 +95,6 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  * TODO:
  * * Return twice
  * * Handle phi node.
- * * Look through `pointer_from_objref`.
  * * Handle jl_box*
  */
 
@@ -136,12 +135,13 @@ struct Optimizer {
     // insert llvm.lifetime.* calls for `ptr` with size `sz` based on the use of `orig`.
     void insertLifetime(Value *ptr, Constant *sz, Instruction *orig);
 
-    void checkInst(Instruction *I);
+    void checkInst(CallInst *I);
 
     void replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
                                  Instruction *orig_i, Instruction *new_i);
     void removeAlloc(CallInst *orig_inst);
-    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref);
+    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind);
+    void initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind);
     void splitOnStack(CallInst *orig_inst);
     void optimizeTag(CallInst *orig_inst);
 
@@ -224,8 +224,11 @@ void Optimizer::optimizeAll()
         checkInst(orig);
         if (use_info.escaped) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation escaped " << ore::NV("GC Allocation", orig);
+                    << "GC allocation escaped " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -233,8 +236,11 @@ void Optimizer::optimizeAll()
         }
         if (use_info.haserror || use_info.returned) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", orig);
+                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -243,8 +249,11 @@ void Optimizer::optimizeAll()
         if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
                                                            !use_info.refstore)) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemark(DEBUG_TYPE, "Dead Allocation", orig)
-                    << "GC allocation removed " << ore::NV("GC Allocation", orig);
+                    << "GC allocation removed " << ore::NV("GC Allocation", StringRef(str));
             });
             // No one took the address, no one reads anything and there's no meaningful
             // preserve of fields (either no preserve/ccall or no object reference fields)
@@ -252,10 +261,12 @@ void Optimizer::optimizeAll()
             removeAlloc(orig);
             continue;
         }
+        bool has_unboxed = use_info.has_unknown_unboxed;
         bool has_ref = use_info.has_unknown_objref;
         bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
+            has_unboxed |= field.hasunboxed;
             if (field.hasobjref) {
                 has_ref = true;
                 // This can be relaxed a little based on hasload
@@ -268,8 +279,11 @@ void Optimizer::optimizeAll()
         }
         if (has_refaggr) {
             REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
-                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", orig);
+                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", StringRef(str));
             });
             if (use_info.hastypeof)
                 optimizeTag(orig);
@@ -277,19 +291,41 @@ void Optimizer::optimizeAll()
         }
         if (!use_info.hasunknownmem && !use_info.addrescaped) {
             REMARK([&](){
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
                 return OptimizationRemark(DEBUG_TYPE, "Stack Split Allocation", orig)
-                    << "GC allocation split on stack " << ore::NV("GC Allocation", orig);
+                    << "GC allocation split on stack " << ore::NV("GC Allocation", StringRef(str));
             });
             // No one actually care about the memory layout of this object, split it.
             splitOnStack(orig);
             continue;
         }
+        // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine
+        // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }),
+        // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }.
+        // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value
+        if (has_unboxed && has_ref) {
+            REMARK([&]() {
+                std::string str;
+                llvm::raw_string_ostream rso(str);
+                orig->print(rso);
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", StringRef(str));
+            });
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
         REMARK([&](){
+            std::string str;
+            llvm::raw_string_ostream rso(str);
+            orig->print(rso);
             return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
-                << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
+                << "GC allocation moved to stack " << ore::NV("GC Allocation", StringRef(str));
         });
         // The object has no fields with mix reference access
-        moveToStack(orig, sz, has_ref);
+        moveToStack(orig, sz, has_ref, use_info.allockind);
     }
 }
 
@@ -311,7 +347,9 @@ bool Optimizer::isSafepoint(Instruction *inst)
         return false;
     if (auto callee = call->getCalledFunction()) {
         // Known functions emitted in codegen that are not safepoints
-        if (callee == pass.pointer_from_objref_func || callee->getName() == "memcmp") {
+        if (callee == pass.pointer_from_objref_func
+            || callee == pass.gc_loaded_func
+            || callee->getName() == "memcmp") {
             return false;
         }
     }
@@ -354,7 +392,7 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
     return -1;
 }
 
-void Optimizer::checkInst(Instruction *I)
+void Optimizer::checkInst(CallInst *I)
 {
     LLVM_DEBUG(dbgs() << "Running escape analysis on " << *I << "\n");
     jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, pass, *pass.DL};
@@ -363,7 +401,10 @@ void Optimizer::checkInst(Instruction *I)
         std::string suse_info;
         llvm::raw_string_ostream osuse_info(suse_info);
         use_info.dump(osuse_info);
-        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", I) << "\n" << ore::NV("UseInfo", osuse_info.str());
+        std::string str;
+        llvm::raw_string_ostream rso(str);
+        I->print(rso);
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", StringRef(str)) << "\n" << ore::NV("UseInfo", osuse_info.str());
     });
 }
 
@@ -400,6 +441,8 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
         auto bb = use->getParent();
         if (!bbs.insert(bb).second)
             continue;
+        if (pred_empty(bb))
+            continue; // No predecessors so the block is dead
         assert(lifetime_stack.empty());
         Lifetime::Frame cur{bb};
         while (true) {
@@ -476,7 +519,7 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
     // within the BB.
     // If some successors are live and others are dead, it's the first instruction in
     // the successors that are dead.
-    std::vector<Instruction*> first_dead;
+    SmallVector<Instruction*, 0> first_dead;
     for (auto bb: bbs) {
         bool has_use = false;
         for (auto succ: successors(bb)) {
@@ -567,7 +610,7 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     auto oldfType = call->getFunctionType();
     auto newfType = FunctionType::get(
             oldfType->getReturnType(),
-            makeArrayRef(argTys).slice(0, oldfType->getNumParams()),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
             oldfType->isVarArg());
 
     // Accumulate an array of overloaded types for the given intrinsic
@@ -597,9 +640,20 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     call->eraseFromParent();
 }
 
+void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind)
+{
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+        return;
+    assert(!buff->isArrayAllocation());
+    Type *T = buff->getAllocatedType();
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    prolog_builder.CreateMemSet(buff, ConstantInt::get(Type::getInt8Ty(prolog_builder.getContext()), 0), DL.getTypeAllocSize(T), buff->getAlign());
+
+}
+
 // This function should not erase any safepoint so that the lifetime marker can find and cache
 // all the original safepoints.
-void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
+void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind)
 {
     ++RemovedAllocs;
     ++StackAllocs;
@@ -629,7 +683,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
         buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
+        ptr = cast<Instruction>(buff);
     }
     else {
         Type *buffty;
@@ -639,12 +693,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
         buff = prolog_builder.CreateAlloca(buffty);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
+        ptr = cast<Instruction>(buff);
     }
     insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
-    Instruction *new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
-    if (orig_inst->getModule()->getDataLayout().getAllocaAddrSpace() != 0)
-        new_inst = cast<Instruction>(prolog_builder.CreateAddrSpaceCast(new_inst, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), orig_inst->getType()->getPointerAddressSpace())));
+    if (sz != 0 && !has_ref) { // TODO: fix has_ref case too
+        IRBuilder<> builder(orig_inst);
+        initializeAlloca(builder, buff, allockind);
+    }
+    Instruction *new_inst = cast<Instruction>(ptr);
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -692,10 +748,15 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             if (pass.pointer_from_objref_func == callee) {
-                call->replaceAllUsesWith(new_i);
+                call->replaceAllUsesWith(prolog_builder.CreateAddrSpaceCast(new_i, call->getCalledFunction()->getReturnType()));
                 call->eraseFromParent();
                 return;
             }
+            if (pass.gc_loaded_func == callee) {
+                // TODO: handle data pointer forwarding, length forwarding, and fence removal
+                user->replaceUsesOfWith(orig_i, Constant::getNullValue(orig_i->getType()));
+                return;
+            }
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
@@ -728,17 +789,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
-            auto replace_i = new_i;
-            Type *new_t = new_i->getType();
-            if (cast_t != new_t) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(cast_t->getContext().supportsTypedPointers());
-                replace_i = new BitCastInst(replace_i, cast_t, "", user);
-                replace_i->setDebugLoc(user->getDebugLoc());
-                replace_i->takeName(user);
-            }
-            push_frame(user, replace_i);
+            push_frame(user, new_i);
         }
         else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
             SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
@@ -873,8 +924,11 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
                 REMARK([&](){
+                    std::string str;
+                    llvm::raw_string_ostream rso(str);
+                    orig_inst->print(rso);
                     return OptimizationRemark(DEBUG_TYPE, "typeof", call)
-                        << "removed typeof call for GC allocation " << ore::NV("Alloc", orig_inst);
+                        << "removed typeof call for GC allocation " << ore::NV("Alloc", StringRef(str));
                 });
                 call->replaceAllUsesWith(tag);
                 // Push to the removed instructions to trigger `finalize` to
@@ -923,8 +977,9 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             allocty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), field.size);
         }
         slot.slot = prolog_builder.CreateAlloca(allocty);
-        insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
-                       ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        IRBuilder<> builder(orig_inst);
+        insertLifetime(slot.slot, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        initializeAlloca(builder, slot.slot, use_info.allockind);
         slots.push_back(std::move(slot));
     }
     const auto nslots = slots.size();
@@ -976,15 +1031,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         auto size = pass.DL->getTypeAllocSize(elty);
         Value *addr;
         if (offset % size == 0) {
-            addr = builder.CreateBitCast(slot.slot, elty->getPointerTo());
+            addr = slot.slot;
             if (offset != 0) {
                 addr = builder.CreateConstInBoundsGEP1_32(elty, addr, offset / size);
             }
         }
         else {
-            addr = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+            addr = slot.slot;
             addr = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), addr, offset);
-            addr = builder.CreateBitCast(addr, elty->getPointerTo());
         }
         return addr;
     };
@@ -1004,7 +1058,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 assert(slot.offset == offset);
                 newload = builder.CreateLoad(pass.T_prjlvalue, slot.slot);
                 // Assume the addrspace is correct.
-                val = builder.CreateBitCast(newload, load_ty);
+                val = newload;
             }
             else {
                 newload = builder.CreateLoad(load_ty, slot_gep(slot, offset, load_ty, builder));
@@ -1040,10 +1094,9 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                     store_ty = T_pjlvalue;
                 }
                 else {
-                    store_ty = PointerType::getWithSamePointeeType(T_pjlvalue, cast<PointerType>(store_ty)->getAddressSpace());
-                    store_val = builder.CreateBitCast(store_val, store_ty);
+                    store_ty = PointerType::get(T_pjlvalue->getContext(), store_ty->getPointerAddressSpace());
                 }
-                if (cast<PointerType>(store_ty)->getAddressSpace() != AddressSpace::Tracked)
+                if (store_ty->getPointerAddressSpace() != AddressSpace::Tracked)
                     store_val = builder.CreateAddrSpaceCast(store_val, pass.T_prjlvalue);
                 newstore = builder.CreateStore(store_val, slot.slot);
             }
@@ -1106,14 +1159,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 store->setOrdering(AtomicOrdering::NotAtomic);
                                 continue;
                             }
-                            auto ptr8 = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+                            Value *ptr_slot = slot.slot;
                             if (offset > slot.offset)
-                                ptr8 = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptr8,
+                                ptr_slot = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), slot.slot,
                                                                           offset - slot.offset);
                             auto sub_size = std::min(slot.offset + slot.size, offset + size) -
                                 std::max(offset, slot.offset);
                             // TODO: alignment computation
-                            builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0));
+                            builder.CreateMemSet(ptr_slot, val_arg, sub_size, MaybeAlign(0));
                         }
                         call->eraseFromParent();
                         return;
@@ -1167,7 +1220,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             for (auto &bundle: bundles) {
                 if (bundle.getTag() != "jl_roots")
                     continue;
-                std::vector<Value*> operands;
+                SmallVector<Value*, 0> operands;
                 for (auto op: bundle.inputs()) {
                     if (op == orig_i || isa<Constant>(op))
                         continue;
@@ -1230,8 +1283,8 @@ bool AllocOpt::doInitialization(Module &M)
 
     DL = &M.getDataLayout();
 
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
+    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
+    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { PointerType::get(M.getContext(), DL->getAllocaAddrSpace()) });
 
     return true;
 }
@@ -1247,44 +1300,13 @@ bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
     optimizer.optimizeAll();
     bool modified = optimizer.finalize();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     return modified;
 }
 
-struct AllocOptLegacy : public FunctionPass {
-    static char ID;
-    AllocOpt opt;
-    AllocOptLegacy() : FunctionPass(ID) {
-        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
-    bool doInitialization(Module &m) override {
-        return opt.doInitialization(m);
-    }
-    bool runOnFunction(Function &F) override {
-        return opt.runOnFunction(F, [this]() -> DominatorTree & {return getAnalysis<DominatorTreeWrapperPass>().getDomTree();});
-    }
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-};
-
-char AllocOptLegacy::ID = 0;
-static RegisterPass<AllocOptLegacy> X("AllocOpt", "Promote heap allocation to stack",
-                                false /* Only looks at CFG */,
-                                false /* Analysis Pass */);
-
-}
-
-Pass *createAllocOptPass()
-{
-    return new AllocOptLegacy();
-}
 
+} // anonymous namespace
 PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
     AllocOpt opt;
     bool modified = opt.doInitialization(*F.getParent());
@@ -1299,9 +1321,3 @@ PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createAllocOptPass());
-}
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
index 0ab140b42b8b7..d9551e0552f9c 100644
--- a/src/llvm-codegen-shared.h
+++ b/src/llvm-codegen-shared.h
@@ -1,17 +1,23 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include <optional>
 #include <utility>
 #include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/DebugLoc.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/Support/ModRef.h>
+
 #include "julia.h"
 
 #define STR(csym)           #csym
 #define XSTR(csym)          STR(csym)
 
+static constexpr std::nullopt_t None = std::nullopt;
+
 enum AddressSpace {
     Generic = 0,
     Tracked = 10,
@@ -59,9 +65,19 @@ namespace JuliaType {
         return llvm::FunctionType::get(T_prjlvalue, {
                 T_prjlvalue,  // function
                 T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C),
-                T_prjlvalue,  // linfo
-                }, // nargs
+                llvm::Type::getInt32Ty(C), // nargs
+                T_prjlvalue},  // linfo
+            false);
+    }
+
+    static inline auto get_jlfunc3_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        auto T = get_pjlvalue_ty(C, Derived);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T,  // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C)}, // nargs
             false);
     }
 
@@ -92,11 +108,10 @@ struct CountTrackedPointers {
     unsigned count = 0;
     bool all = true;
     bool derived = false;
-    CountTrackedPointers(llvm::Type *T);
+    CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
 };
 
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
-std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
+llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
 
 static inline void llvm_dump(llvm::Value *v)
 {
@@ -147,72 +162,49 @@ static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
 }
 
 static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
-{
-    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
-    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext()))
-        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), llvm::None));
-    return inst;
-}
-
-// bitcast a value, but preserve its address space when dealing with pointer types
-static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
 {
     using namespace llvm;
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        return builder.CreateBitCast(v, jl_value_addr);
-    }
-    else {
-        return builder.CreateBitCast(v, jl_value);
+    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
+    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext())) {
+        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), std::nullopt));
     }
+    return inst;
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
     const int pgcstack_offset = offsetof(jl_task_t, gcstack);
-    return builder.CreateInBoundsGEP(
-            T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
-            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
-            "current_task");
+    return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task");
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
+    auto T_ptr = builder.getPtrTy();
     const int ptls_offset = offsetof(jl_task_t, ptls);
-    llvm::Value *pptls = builder.CreateInBoundsGEP(
-            T_pjlvalue, current_task,
-            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-            "ptls_field");
-    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-            emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load");
     // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
     tbaa_decorate(tbaa, ptls_load);
-    return builder.CreateBitCast(ptls_load, T_ppjlvalue, "ptls");
+    return ptls_load;
 }
 
 // Get signal page through current task.
-static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     // return builder.CreateCall(prepare_call(reuse_signal_page_func));
-    auto T_psize = T_size->getPointerTo();
-    auto T_ppsize = T_psize->getPointerTo();
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    ptls = emit_bitcast_with_builder(builder, ptls, T_ppsize);
-    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
-            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    auto T_ptr = builder.getPtrTy();
+    auto i8 = builder.getInt8Ty();
+    int nthfield = offsetof(jl_tls_states_t, safepoint);
+    llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield);
     LoadInst *ptls_load = builder.CreateAlignedLoad(
-            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+            T_ptr, psafepoint, Align(sizeof(void *)), "safepoint");
     tbaa_decorate(tbaa, ptls_load);
     return ptls_load;
 }
@@ -226,7 +218,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
 static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
 {
     using namespace llvm;
-    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
     emit_signal_fence(builder);
     Module *M = builder.GetInsertBlock()->getModule();
     LLVMContext &C = builder.getContext();
@@ -237,10 +229,9 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
     else {
         Function *F = M->getFunction("julia.safepoint");
         if (!F) {
-            auto T_psize = T_size->getPointerTo();
-            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
+            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false);
             F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
-            F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
         }
         builder.CreateCall(F, {signal_page});
     }
@@ -251,9 +242,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
 {
     using namespace llvm;
     Type *T_int8 = state->getType();
-    llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
-    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef<Value*>(offset), "gc_state");
+    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+    Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
     if (old_state == nullptr) {
         old_state = builder.CreateLoad(T_int8, gc_state);
         cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
@@ -268,8 +258,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
     BasicBlock *passBB = BasicBlock::Create(builder.getContext(), "safepoint", builder.GetInsertBlock()->getParent());
     BasicBlock *exitBB = BasicBlock::Create(builder.getContext(), "after_safepoint", builder.GetInsertBlock()->getParent());
     Constant *zero8 = ConstantInt::get(T_int8, 0);
-    builder.CreateCondBr(builder.CreateAnd(builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
-                                           builder.CreateICmpEQ(state, zero8)),
+    builder.CreateCondBr(builder.CreateOr(builder.CreateICmpEQ(old_state, zero8), // if (!old_state || !state)
+                                          builder.CreateICmpEQ(state, zero8)),
                          passBB, exitBB);
     builder.SetInsertPoint(passBB);
     MDNode *tbaa = get_tbaa_const(builder.getContext());
@@ -289,7 +279,7 @@ static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm
 static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
 {
     using namespace llvm;
-    Value *old_state = builder.getInt8(0);
+    Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE);
     return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
 }
 
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 77f1baf6237c4..a6e963664b0f3 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -21,7 +21,6 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Support/Debug.h>
@@ -38,20 +37,20 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
 extern JuliaOJIT *jl_ExecutionEngine;
 
 // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
-Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
+std::optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
     if (TT.isAArch64()) {
         auto intr_name = intr.getName();
         auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
         return typ == "f32" || typ == "f64";
     } else {
-        return {};
+        return None;
     }
 }
 
 static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
     auto unconditional = always_have_fma(intr, TT);
-    if (unconditional.hasValue())
-        return unconditional.getValue();
+    if (unconditional)
+        return *unconditional;
 
     auto intr_name = intr.getName();
     auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
@@ -60,7 +59,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
     StringRef FS =
         FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
 
-    SmallVector<StringRef, 6> Features;
+    SmallVector<StringRef, 128> Features;
     FS.split(Features, ',');
     for (StringRef Feature : Features)
     if (TT.isARM()) {
@@ -68,7 +67,7 @@ static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTS
         return typ == "f32" || typ == "f64";
       else if (Feature == "+vfp4sp")
         return typ == "f32";
-    } else {
+    } else if (TT.isX86()) {
       if (Feature == "+fma" || Feature == "+fma4")
         return typ == "f32" || typ == "f64";
     }
@@ -95,7 +94,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
     for (auto &F: M.functions()) {
         auto FN = F.getName();
 
-        if (FN.startswith("julia.cpu.have_fma.")) {
+        if (FN.starts_with("julia.cpu.have_fma.")) {
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
@@ -110,7 +109,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
             I->eraseFromParent();
         }
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
+        assert(!verifyLLVMIR(M));
 #endif
         return true;
     } else {
@@ -125,33 +124,3 @@ PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct CPUFeaturesLegacy : public ModulePass {
-    static char ID;
-    CPUFeaturesLegacy() JL_NOTSAFEPOINT : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return lowerCPUFeatures(M);
-    }
-};
-
-char CPUFeaturesLegacy::ID = 0;
-static RegisterPass<CPUFeaturesLegacy>
-        Y("CPUFeatures",
-          "Lower calls to CPU feature testing intrinsics.",
-          false,
-          false);
-}
-
-Pass *createCPUFeaturesPass()
-{
-    return new CPUFeaturesLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCPUFeaturesPass());
-}
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index b2428860c2882..7f1b076897fc8 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -1,8 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// This pass finds floating-point operations on 16-bit (half precision) values, and replaces
-// them by equivalent operations on 32-bit (single precision) values surrounded by a fpext
-// and fptrunc. This ensures that the exact semantics of IEEE floating-point are preserved.
+// This pass finds floating-point operations on 16-bit values (half precision and bfloat),
+// and replaces them by equivalent operations on 32-bit (single precision) values surrounded
+// by a fpext and fptrunc. This ensures that the exact semantics of IEEE floating-point are
+// preserved.
 //
 // Without this pass, back-ends that do not natively support half-precision (e.g. x86_64)
 // similarly pattern-match half-precision operations with single-precision equivalents, but
@@ -20,7 +21,6 @@
 #include <llvm/Pass.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Verifier.h>
@@ -49,33 +49,36 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-static bool have_fp16(Function &caller, const Triple &TT) {
-    Attribute FSAttr = caller.getFnAttribute("target-features");
-    StringRef FS = "";
-    if (FSAttr.isValid())
-        FS = FSAttr.getValueAsString();
-    else if (jl_ExecutionEngine)
-        FS = jl_ExecutionEngine->getTargetFeatureString();
-    // else probably called from opt, just do nothing
-    if (TT.isAArch64()) {
-        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-            return true;
-        }
-    } else if (TT.getArch() == Triple::x86_64) {
-        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-            return true;
-        }
-    }
-    if (caller.hasFnAttribute("julia.hasfp16")) {
-        return true;
-    }
-    return false;
+static bool have_fp16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasfp16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision
+    if (TT.isPPC())
+        return false;
+
+    return true;
+}
+
+static bool have_bf16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasbf16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199:
+    // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf
+    return true;
 }
 
 static bool demoteFloat16(Function &F)
 {
     auto TT = Triple(F.getParent()->getTargetTriple());
-    if (have_fp16(F, TT))
+    auto has_fp16 = have_fp16(F, TT);
+    auto has_bf16 = have_bf16(F, TT);
+    if (has_fp16 && has_bf16)
         return false;
 
     auto &ctx = F.getContext();
@@ -83,14 +86,17 @@ static bool demoteFloat16(Function &F)
     SmallVector<Instruction *, 0> erase;
     for (auto &BB : F) {
         for (auto &I : BB) {
-            // extend Float16 operands to Float32
+            // check whether there's any 16-bit floating point operands to extend
             bool Float16 = I.getType()->getScalarType()->isHalfTy();
-            for (size_t i = 0; !Float16 && i < I.getNumOperands(); i++) {
+            bool BFloat16 = I.getType()->getScalarType()->isBFloatTy();
+            for (size_t i = 0; !BFloat16 && !Float16 && i < I.getNumOperands(); i++) {
                 Value *Op = I.getOperand(i);
-                if (Op->getType()->getScalarType()->isHalfTy())
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy())
                     Float16 = true;
+                else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy())
+                    BFloat16 = true;
             }
-            if (!Float16)
+            if (!Float16 && !BFloat16)
                 continue;
 
             switch (I.getOpcode()) {
@@ -114,11 +120,16 @@ static bool demoteFloat16(Function &F)
 
             IRBuilder<> builder(&I);
 
-            // extend Float16 operands to Float32
+            // extend 16-bit floating point operands
             SmallVector<Value *, 2> Operands(I.getNumOperands());
             for (size_t i = 0; i < I.getNumOperands(); i++) {
                 Value *Op = I.getOperand(i);
-                if (Op->getType()->getScalarType()->isHalfTy()) {
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy()) {
+                    // extend Float16 to Float32
+                    ++TotalExt;
+                    Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
+                } else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy()) {
+                    // extend BFloat16 to Float32
                     ++TotalExt;
                     Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
                 }
@@ -126,7 +137,7 @@ static bool demoteFloat16(Function &F)
             }
 
             // recreate the instruction if any operands changed,
-            // truncating the result back to Float16
+            // truncating the result back to the original type
             Value *NewI;
             ++TotalChanged;
             switch (I.getOpcode()) {
@@ -184,7 +195,7 @@ static bool demoteFloat16(Function &F)
         for (auto V : erase)
             V->eraseFromParent();
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
+        assert(!verifyLLVMIR(F));
 #endif
         return true;
     }
@@ -201,34 +212,3 @@ PreservedAnalyses DemoteFloat16Pass::run(Function &F, FunctionAnalysisManager &A
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-
-struct DemoteFloat16Legacy : public FunctionPass {
-    static char ID;
-    DemoteFloat16Legacy() : FunctionPass(ID){};
-
-private:
-    bool runOnFunction(Function &F) override {
-        return demoteFloat16(F);
-    }
-};
-
-char DemoteFloat16Legacy::ID = 0;
-static RegisterPass<DemoteFloat16Legacy>
-        Y("DemoteFloat16",
-          "Demote Float16 operations to Float32 equivalents.",
-          false,
-          false);
-} // end anonymous namespac
-
-Pass *createDemoteFloat16Pass()
-{
-    return new DemoteFloat16Legacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createDemoteFloat16Pass());
-}
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index e31bcb21199f5..76dcd944890ab 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -1,23 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "llvm-pass-helpers.h"
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "final_gc_lowering"
 STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
@@ -28,60 +11,14 @@ STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
 STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
 STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");
 
-using namespace llvm;
-
-// The final GC lowering pass. This pass lowers platform-agnostic GC
-// intrinsics to platform-dependent instruction sequences. The
-// intrinsics it targets are those produced by the late GC frame
-// lowering pass.
-//
-// This pass targets typical back-ends for which the standard Julia
-// runtime library is available. Atypical back-ends should supply
-// their own lowering pass.
-
-struct FinalLowerGC: private JuliaPassContext {
-    bool runOnFunction(Function &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
-
-private:
-    Function *queueRootFunc;
-    Function *poolAllocFunc;
-    Function *bigAllocFunc;
-    Function *allocTypedFunc;
-    Instruction *pgcstack;
-    Type *T_size;
-
-    // Lowers a `julia.new_gc_frame` intrinsic.
-    Value *lowerNewGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.push_gc_frame` intrinsic.
-    void lowerPushGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.pop_gc_frame` intrinsic.
-    void lowerPopGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.get_gc_frame_slot` intrinsic.
-    Value *lowerGetGCFrameSlot(CallInst *target, Function &F);
-
-    // Lowers a `julia.gc_alloc_bytes` intrinsic.
-    Value *lowerGCAllocBytes(CallInst *target, Function &F);
-
-    // Lowers a `julia.queue_gc_root` intrinsic.
-    Value *lowerQueueGCRoot(CallInst *target, Function &F);
-
-    // Lowers a `julia.safepoint` intrinsic.
-    Value *lowerSafepoint(CallInst *target, Function &F);
-};
-
-Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
+void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
     ++NewGCFrameCount;
     assert(target->arg_size() == 1);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);
 
     // Create the GC frame.
-    IRBuilder<> builder(target->getNextNode());
+    IRBuilder<> builder(target);
     auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
     gcframe_alloca->setAlignment(Align(16));
     // addrspacecast as needed for non-0 alloca addrspace
@@ -92,7 +29,8 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
     auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
     builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);
 
-    return gcframe;
+    target->replaceAllUsesWith(gcframe);
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
@@ -102,27 +40,25 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
     auto gcframe = target->getArgOperand(0);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
+    IRBuilder<> builder(target);
     StoreInst *inst = builder.CreateAlignedStore(
                 ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
-                builder.CreateBitCast(
-                        builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
-                        T_size->getPointerTo()),
+                builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),// GEP of 0 becomes a noop and eats the name
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
     inst = builder.CreateAlignedStore(
-            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*))),
+            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*)), "task.gcstack"),
             builder.CreatePointerCast(
-                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1),
+                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1, "frame.prev"),
                     PointerType::get(T_ppjlvalue, 0)),
             Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    inst = builder.CreateAlignedStore(
+    builder.CreateAlignedStore(
             gcframe,
-            builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
+            pgcstack,
             Align(sizeof(void*)));
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
@@ -131,21 +67,20 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
     assert(target->arg_size() == 1);
     auto gcframe = target->getArgOperand(0);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
     Instruction *gcpop =
         cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
-    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)));
+    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)), "frame.prev");
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
-        builder.CreateBitCast(pgcstack,
-            PointerType::get(T_prjlvalue, 0)),
+        pgcstack,
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
+void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
 {
     ++GetGCFrameSlotCount;
     assert(target->arg_size() == 2);
@@ -153,8 +88,7 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     auto index = target->getArgOperand(1);
 
     // Initialize an IR builder.
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
 
     // The first two slots are reserved, so we'll add two to the index.
     index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));
@@ -162,39 +96,37 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     // Lower the intrinsic as a GEP.
     auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
     gep->takeName(target);
-    return gep;
+    target->replaceAllUsesWith(gep);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
+void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
 {
     ++QueueGCRootCount;
     assert(target->arg_size() == 1);
     target->setCalledFunction(queueRootFunc);
-    return target;
 }
 
-Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
+void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
 {
     ++SafepointCount;
     assert(target->arg_size() == 1);
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
     Value* signal_page = target->getOperand(0);
-    Value* load = builder.CreateLoad(T_size, signal_page, true);
-    return load;
+    builder.CreateLoad(T_size, signal_page, true);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
+void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
     ++GCAllocBytesCount;
-    assert(target->arg_size() == 2);
+    assert(target->arg_size() == 3);
     CallInst *newI;
 
     IRBuilder<> builder(target);
-    builder.SetCurrentDebugLocation(target->getDebugLoc());
     auto ptls = target->getArgOperand(0);
-    Attribute derefAttr;
-
+    auto type = target->getArgOperand(2);
+    uint64_t derefBytes = 0;
     if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
         size_t sz = (size_t)CI->getZExtValue();
         // This is strongly architecture and OS dependent
@@ -203,104 +135,36 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
         if (offset < 0) {
             newI = builder.CreateCall(
                 bigAllocFunc,
-                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)) });
-            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*));
+                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type });
+            if (sz > 0)
+                derefBytes = sz;
         }
         else {
             auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
             auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
-            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
+            newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type });
+            if (sz > 0)
+                derefBytes = sz;
         }
     } else {
         auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
-        size = builder.CreateAdd(size, ConstantInt::get(T_size, sizeof(void*)));
-        newI = builder.CreateCall(allocTypedFunc, { ptls, size, ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext())) });
-        derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sizeof(void*));
+        // allocTypedFunc does not include the type tag in the allocation size!
+        newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
+        derefBytes = sizeof(void*);
     }
     newI->setAttributes(newI->getCalledFunction()->getAttributes());
-    newI->addRetAttr(derefAttr);
+    unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*));
+    newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align)));
+    if (derefBytes > 0)
+        newI->addDereferenceableRetAttr(derefBytes);
     newI->takeName(target);
-    return newI;
-}
-
-bool FinalLowerGC::doInitialization(Module &M) {
-    // Initialize platform-agnostic references.
-    initAll(M);
-
-    // Initialize platform-specific references.
-    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
-    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
-    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
-    T_size = M.getDataLayout().getIntPtrType(M.getContext());
-
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
-    unsigned j = 0;
-    for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
-        if (!functionList[i])
-            continue;
-        if (i != j)
-            functionList[j] = functionList[i];
-        j++;
-    }
-    if (j != 0)
-        appendToCompilerUsed(M, ArrayRef<GlobalValue*>(functionList, j));
-    return true;
-}
-
-bool FinalLowerGC::doFinalization(Module &M)
-{
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
-    queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
-    auto used = M.getGlobalVariable("llvm.compiler.used");
-    if (!used)
-        return false;
-    SmallPtrSet<Constant*, 16> InitAsSet(
-        functionList,
-        functionList + sizeof(functionList) / sizeof(void*));
-    bool changed = false;
-    SmallVector<Constant*, 16> init;
-    ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
-    for (auto &Op : CA->operands()) {
-        Constant *C = cast_or_null<Constant>(Op);
-        if (InitAsSet.count(C->stripPointerCasts())) {
-            changed = true;
-            continue;
-        }
-        init.push_back(C);
-    }
-    if (!changed)
-        return false;
-    used->eraseFromParent();
-    if (init.empty())
-        return true;
-    ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
-    used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
-                                    ConstantArray::get(ATy, init), "llvm.compiler.used");
-    used->setSection("llvm.metadata");
-    return true;
-}
-
-template<typename TIterator>
-static void replaceInstruction(
-    Instruction *oldInstruction,
-    Value *newInstruction,
-    TIterator &it)
-{
-    if (newInstruction != oldInstruction) {
-        oldInstruction->replaceAllUsesWith(newInstruction);
-        it = oldInstruction->eraseFromParent();
-    }
-    else {
-        ++it;
-    }
+    target->replaceAllUsesWith(newI);
+    target->eraseFromParent();
 }
 
 bool FinalLowerGC::runOnFunction(Function &F)
 {
-    // Check availability of functions again since they might have been deleted.
-    initFunctions(*F.getParent());
+    initAll(*F.getParent());
     if (!pgcstack_getter && !adoptthread_func) {
         LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
         return false;
@@ -313,125 +177,52 @@ bool FinalLowerGC::runOnFunction(Function &F)
         return false;
     }
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
-
-    // Acquire intrinsic functions.
-    auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
-    auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
-    auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
-    auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
-    auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
-    auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
-    auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
+    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
+    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
+    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
+    T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
 
     // Lower all calls to supported intrinsics.
-    for (BasicBlock &BB : F) {
-        for (auto it = BB.begin(); it != BB.end();) {
-            auto *CI = dyn_cast<CallInst>(&*it);
-            if (!CI) {
-                ++it;
+    for (auto &BB : F) {
+        for (auto &I : make_early_inc_range(BB)) {
+            auto *CI = dyn_cast<CallInst>(&I);
+            if (!CI)
                 continue;
-            }
 
             Value *callee = CI->getCalledOperand();
             assert(callee);
 
-            if (callee == newGCFrameFunc) {
-                replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
-            }
-            else if (callee == pushGCFrameFunc) {
-                lowerPushGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == popGCFrameFunc) {
-                lowerPopGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == getGCFrameSlotFunc) {
-                replaceInstruction(CI, lowerGetGCFrameSlot(CI, F), it);
-            }
-            else if (callee == GCAllocBytesFunc) {
-                replaceInstruction(CI, lowerGCAllocBytes(CI, F), it);
-            }
-            else if (callee == queueGCRootFunc) {
-                replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
-            }
-            else if (callee == safepointFunc) {
-                lowerSafepoint(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else {
-                ++it;
-            }
+#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
+            do { \
+                auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
+                if (intrinsic == callee) { \
+                    LOWER_INTRINSIC_FUNC(CI, F); \
+                } \
+            } while (0)
+
+            LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
+            LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
+            LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
+            LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
+            LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
+            LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
+            LOWER_INTRINSIC(safepoint, lowerSafepoint);
+
+#undef LOWER_INTRINSIC
         }
     }
 
     return true;
 }
 
-struct FinalLowerGCLegacy: public FunctionPass {
-    static char ID;
-    FinalLowerGCLegacy() : FunctionPass(ID), finalLowerGC(FinalLowerGC()) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-    bool doInitialization(Module &M) override;
-    bool doFinalization(Module &M) override;
-
-    FinalLowerGC finalLowerGC;
-};
-
-bool FinalLowerGCLegacy::runOnFunction(Function &F) {
-    return finalLowerGC.runOnFunction(F);
-}
-
-bool FinalLowerGCLegacy::doInitialization(Module &M) {
-    return finalLowerGC.doInitialization(M);
-}
-
-bool FinalLowerGCLegacy::doFinalization(Module &M) {
-    auto ret = finalLowerGC.doFinalization(M);
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
-#endif
-    return ret;
-}
-
-
-PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    auto finalLowerGC = FinalLowerGC();
-    bool modified = false;
-    modified |= finalLowerGC.doInitialization(M);
-    for (auto &F : M.functions()) {
-        if (F.isDeclaration())
-            continue;
-        modified |= finalLowerGC.runOnFunction(F);
-    }
-    modified |= finalLowerGC.doFinalization(M);
+    if (FinalLowerGC().runOnFunction(F)) {
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+        assert(!verifyLLVMIR(F));
 #endif
-    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-char FinalLowerGCLegacy::ID = 0;
-static RegisterPass<FinalLowerGCLegacy> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);
-
-Pass *createFinalLowerGCPass()
-{
-    return new FinalLowerGCLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createFinalLowerGCPass());
-}
diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h
new file mode 100644
index 0000000000000..7b2a4bb033203
--- /dev/null
+++ b/src/llvm-gc-interface-passes.h
@@ -0,0 +1,416 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  LLVM passes that may be partially modified by a third-party GC implementation.
+*/
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Types.h>
+
+#include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/SparseBitVector.h>
+#include <llvm/ADT/PostOrderIterator.h>
+#include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/Analysis/CFG.h>
+#include <llvm/Analysis/DomTreeUpdater.h>
+#include <llvm/Analysis/InstSimplifyFolder.h>
+#include <llvm/IR/Value.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/ModuleSlotTracker.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Pass.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+
+#include <llvm/InitializePasses.h>
+
+#include "llvm-codegen-shared.h"
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+#include "llvm-pass-helpers.h"
+#include <map>
+#include <string>
+
+#ifndef LLVM_GC_PASSES_H
+#define LLVM_GC_PASSES_H
+
+using namespace llvm;
+
+/* Julia GC Root Placement pass. For a general overview of the design of GC
+   root lowering, see the devdocs. This file is the actual implementation.
+
+   The actual algorithm is fairly straightforward. First recall the goal of this
+   pass:
+
+   Minimize the number of needed gc roots/stores to them subject to the constraint
+   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
+   a path after this point that contains a use of this pointer) is in some gc slot.
+
+   In particular, in order to understand this algorithm, it is important to
+   realize that the only places where rootedness matters is at safepoints.
+
+   Now, the primary phases of the algorithm are:
+
+   1. Local Scan
+
+      During this step, each Basic Block is inspected and analyzed for local
+      properties. In particular, we want to determine the ordering of any of
+      the following activities:
+
+        - Any Def of a gc-tracked pointer. In general Defs are the results of
+          calls or loads from appropriate memory locations. Phi nodes and
+          selects do complicate this story slightly as described below.
+        - Any use of a gc-tracked or derived pointer. As described in the
+          devdocs, a use is in general one of
+              a) a load from a tracked/derived value
+              b) a store to a tracked/derived value
+              c) a store OF a tracked/derived value
+              d) a use of a value as a call operand (including operand bundles)
+        - Any safepoint
+
+      Crucially, we also perform pointer numbering during the local scan,
+      assigning every Def a unique integer and caching the integer for each
+      derived pointer. This allows us to operate only on the set of Defs (
+      represented by these integers) for the rest of the algorithm. We also
+      maintain some local utility information that is needed by later passes
+      (see the BBState struct for details).
+
+    2. Dataflow Computation
+
+      This computation operates entirely over the function's control flow graph
+      and does not look into a basic block. The algorithm is essentially
+      textbook iterative data flow for liveness computation. However, the
+      data flow equations are slightly more complicated because we also
+      forward propagate rootedness information in addition to backpropagating
+      liveness.
+
+    3. Live Set Computation
+
+      With the liveness information from the previous step, we can now compute,
+      for every safepoint, the set of values live at that particular safepoint.
+      There are three pieces of information being combined here:
+           i. Values that needed to be live due to local analysis (e.g. there
+              was a def, then a safepoint, then a use). This was computed during
+              local analysis.
+          ii. Values that are live across the basic block (i.e. they are live
+              at every safepoint within the basic block). This relies entirely
+              on the liveness information.
+         iii. Values that are now live-out from the basic block (i.e. they are
+              live at every safepoint following their def). During local
+              analysis, we keep, for every safepoint, those values that would
+              be live if they were live out. Here we can check if they are
+              actually live-out and make the appropriate additions to the live
+              set.
+
+       Lastly, we also explicitly compute, for each value, the list of values
+       that are simultaneously live at some safepoint. This is known as an
+       "interference graph" and is the input to the next step.
+
+    4. GC Root coloring
+
+      Two values which are not simultaneously live at a safepoint can share the
+      same slot. This is an important optimization, because otherwise long
+      functions would have exceptionally large GC slots, reducing performance
+      and bloating the size of the stack. Assigning values to these slots is
+      equivalent to doing graph coloring on the interference graph - the graph
+      where nodes are values and two values have an edge if they are
+      simultaneously live at a safepoint - which we computed in the previous
+      step. Now graph coloring in general is a hard problem. However, for SSA
+      form programs, (and most programs in general, by virtue of their
+      structure), the resulting interference graphs are chordal and can be
+      colored optimally in linear time by performing greedy coloring in a
+      perfect elimination order. Now, our interference graphs are likely not
+      entirely chordal due to some non-SSA corner cases. However, using the same
+      algorithm should still give a very good coloring while having sufficiently
+      low runtime.
+
+    5. JLCall frame optimizations
+
+      Unlike earlier iterations of the gc root placement logic, jlcall frames
+      are no longer treated as a special case and need not necessarily be sunk
+      into the gc frame. Additionally, we now emit lifetime
+      intrinsics, so regular stack slot coloring will merge any jlcall frames
+      not sunk into the gc frame. Nevertheless performing such sinking can still
+      be profitable. Since all arguments to a jlcall are guaranteed to be live
+      at that call in some gc slot, we can attempt to rearrange the slots within
+      the gc-frame, or reuse slots not assigned at that particular location
+      for the gcframe. However, even without this optimization, stack frames
+      are at most two times larger than optimal (because regular stack coloring
+      can merge the jlcall allocas).
+
+      N.B.: This step is not yet implemented.
+
+    6. Root placement
+
+      This performs the actual insertion of the GCFrame pushes/pops, zeros out
+      the gc frame and creates the stores to the gc frame according to the
+      stack slot assignment computed in the previous step. GC frames stores
+      are generally sunk right before the first safe point that use them
+      (this is beneficial for code where the primary path does not have
+      safepoints, but some other path - e.g. the error path does). However,
+      if the first safepoint is not dominated by the definition (this can
+      happen due to the non-ssa corner cases), the store is inserted right after
+      the definition.
+
+    7. Cleanup
+
+      This step performs necessary cleanup before passing the IR to codegen. In
+      particular, it removes any calls to julia_from_objref intrinsics and
+      removes the extra operand bundles from ccalls. In the future it could
+      also strip the addrspace information from all values as this
+      information is no longer needed.
+
+
+  There are a couple important special cases that deserve special attention:
+
+    A. PHIs and Selects
+
+      In general PHIs and selects are treated as separate defs for the purposes
+      of the algorithm and their operands as uses of those values. It is
+      important to consider however WHERE the uses of PHI's operands are
+      located. It is neither at the start of the basic block, because the values
+      do not dominate the block (so can't really consider them live-in), nor
+      at the end of the predecessor (because they are actually live out).
+      Instead it is best to think of those uses as living on the edge between
+      the appropriate predecessor and the block containing the PHI.
+
+      Another concern is PHIs of derived values. Since we cannot simply root
+      these values by storing them to a GC slot, we need to insert a new,
+      artificial PHI that tracks the base pointers for the derived values. E.g.
+      in:
+
+      A:
+        %Abase = load addrspace(10) *...
+        %Aderived = addrspacecast %Abase to addrspace(11)
+      B:
+        %Bbase = load addrspace(10) *...
+        %Bderived = addrspacecast %Bbase to addrspace(11)
+      C:
+        %phi = phi [%Aderived, %A
+                    %Bderived, %B]
+
+      we will insert another phi in C to track the relevant base pointers:
+
+        %philift = phi [%Abase, %A
+                        %Bbase, %B]
+
+      We then pretend, for the purposes of numbering that %phi was derived from
+      %philift. Note that in order to be able to do this, we need to be able to
+      perform this lifting either during numbering or instruction scanning.
+
+    B. Vectors of pointers/Union representations
+
+      Since this pass runs very late in the pass pipeline, it runs after the
+      various vectorization passes. As a result, we have to potentially deal
+      with vectors of gc-tracked pointers. For the purposes of most of the
+      algorithm, we simply assign every element of the vector a separate number
+      and no changes are needed. However, those parts of the algorithm that
+      look at IR need to be aware of the possibility of encountering vectors of
+      pointers.
+
+      Similarly, unions (e.g. in call returns) are represented as a struct of
+      a gc-tracked value and an argument selector. We simply assign a single
+      number to this struct and proceed as if it was a single pointer. However,
+      this again requires care at the IR level.
+
+    C. Non mem2reg'd allocas
+
+      Under some circumstances, allocas will still be present in the IR when
+      we get to this pass. We don't try very hard to handle this case, and
+      simply sink the alloca into the GCFrame.
+*/
+
+// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
+// substantially smaller here doesn't actually save much memory because of malloc overhead.
+// Too large is bad also though - 4096 was found to be a reasonable middle ground.
+using LargeSparseBitVector = SparseBitVector<4096>;
+
+struct BBState {
+    // Uses in this BB
+    // These do not get updated after local analysis
+    LargeSparseBitVector Defs;
+    LargeSparseBitVector PhiOuts;
+    LargeSparseBitVector UpExposedUses;
+    // These get updated during dataflow
+    LargeSparseBitVector LiveIn;
+    LargeSparseBitVector LiveOut;
+    SmallVector<int, 0> Safepoints;
+    int TopmostSafepoint = -1;
+    bool HasSafepoint = false;
+    // Have we gone through this basic block in our local scan yet?
+    bool Done = false;
+};
+
+struct State {
+    Function *const F;
+    DominatorTree *DT;
+
+    // The maximum assigned value number
+    int MaxPtrNumber;
+    // The maximum assigned safepoint number
+    int MaxSafepointNumber;
+    // Cache of numbers assigned to IR values. This includes caching of numbers
+    // for derived values
+    std::map<Value *, int> AllPtrNumbering;
+    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
+    // The reverse of the previous maps
+    std::map<int, Value *> ReversePtrNumbering;
+    // Neighbors in the coloring interference graph. I.e. for each value, the
+    // indices of other values that are used simultaneously at some safe point.
+    SmallVector<LargeSparseBitVector, 0> Neighbors;
+    // The result of the local analysis
+    std::map<const BasicBlock *, BBState> BBStates;
+
+    // Refinement map. If all of the values are rooted
+    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
+    // the key is already rooted (but not the other way around).
+    // A value that can be refined to -2 never need any rooting or write barrier.
+    // A value that can be refined to -1 don't need local root but still need write barrier.
+    // At the end of `LocalScan` this map has a few properties
+    // 1. Values are either < 0 or dominates the key
+    // 2. Therefore this is a DAG
+    std::map<int, SmallVector<int, 1>> Refinements;
+
+    // GC preserves map. All safepoints dominated by the map key, but not any
+    // of its uses need to preserve the values listed in the map value.
+    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
+
+    // The assignment of numbers to safepoints. The indices in the map
+    // are indices into the next three maps which store safepoint properties
+    std::map<Instruction *, int> SafepointNumbering;
+
+    // Reverse mapping index -> safepoint
+    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
+
+    // Instructions that can return twice. For now, all values live at these
+    // instructions will get their own, dedicated GC frame slots, because they
+    // have unobservable control flow, so we can't be sure where they're
+    // actually live. All of these are also considered safepoints.
+    SmallVector<Instruction *, 0> ReturnsTwice;
+
+    // The set of values live at a particular safepoint
+    SmallVector< LargeSparseBitVector , 0> LiveSets;
+    // Those values that - if live out from our parent basic block - are live
+    // at this safepoint.
+    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
+    // The set of values that are kept alive by the callee.
+    SmallVector<SmallVector<int, 0>> CalleeRoots;
+    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
+    // they just get directly sunk into the root array.
+    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
+    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
+    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
+    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
+};
+
+
+struct LateLowerGCFrame:  private JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
+
+public:
+    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
+
+private:
+    CallInst *pgcstack;
+    Function *smallAllocFunc;
+
+    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
+                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
+    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F);
+    void NoteUse(State &S, BBState &BBS, Value *V, Function &F) {
+        NoteUse(S, BBS, V, BBS.UpExposedUses, F);
+    }
+
+    void LiftPhi(State &S, PHINode *Phi);
+    void LiftSelect(State &S, SelectInst *SI);
+    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
+    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
+    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
+
+    int Number(State &S, Value *V);
+    int NumberBase(State &S, Value *Base);
+    SmallVector<int, 0> NumberAll(State &S, Value *V);
+    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
+
+    void NoteOperandUses(State &S, BBState &BBS, Instruction &UI);
+    void MaybeTrackDst(State &S, MemTransferInst *MI);
+    void MaybeTrackStore(State &S, StoreInst *I);
+    State LocalScan(Function &F);
+    void ComputeLiveness(State &S);
+    void ComputeLiveSets(State &S);
+    std::pair<SmallVector<int, 0>, int> ColorRoots(const State &S);
+    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame);
+    void PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S, std::map<Value *, std::pair<int, int>>);
+    void CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified);
+    bool CleanupIR(Function &F, State *S, bool *CFGModified);
+    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
+    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
+    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
+    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
+    Value* lowerGCAllocBytesLate(CallInst *target, Function &F);
+};
+
+// The final GC lowering pass. This pass lowers platform-agnostic GC
+// intrinsics to platform-dependent instruction sequences. The
+// intrinsics it targets are those produced by the late GC frame
+// lowering pass.
+//
+// This pass targets typical back-ends for which the standard Julia
+// runtime library is available. Atypical back-ends should supply
+// their own lowering pass.
+
+struct FinalLowerGC: private JuliaPassContext {
+    bool runOnFunction(Function &F);
+
+private:
+    Function *queueRootFunc;
+    Function *smallAllocFunc;
+    Function *bigAllocFunc;
+    Function *allocTypedFunc;
+    Instruction *pgcstack;
+    Type *T_size;
+
+    // Lowers a `julia.new_gc_frame` intrinsic.
+    void lowerNewGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.push_gc_frame` intrinsic.
+    void lowerPushGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.pop_gc_frame` intrinsic.
+    void lowerPopGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.get_gc_frame_slot` intrinsic.
+    void lowerGetGCFrameSlot(CallInst *target, Function &F);
+
+    // Lowers a `julia.gc_alloc_bytes` intrinsic.
+    void lowerGCAllocBytes(CallInst *target, Function &F);
+
+    // Lowers a `julia.queue_gc_root` intrinsic.
+    void lowerQueueGCRoot(CallInst *target, Function &F);
+
+    // Lowers a `julia.safepoint` intrinsic.
+    void lowerSafepoint(CallInst *target, Function &F);
+};
+
+#endif // LLVM_GC_PASSES_H
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index 26288dc09379d..5badbca807569 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -14,7 +14,6 @@
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/Constants.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -63,8 +62,8 @@ struct GCInvariantVerifier : public InstVisitor<GCInvariantVerifier> {
 };
 
 void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
-    unsigned FromAS = cast<PointerType>(I.getSrcTy())->getAddressSpace();
-    unsigned ToAS = cast<PointerType>(I.getDestTy())->getAddressSpace();
+    unsigned FromAS = I.getSrcTy()->getPointerAddressSpace();
+    unsigned ToAS = I.getDestTy()->getPointerAddressSpace();
     if (FromAS == 0)
         return;
     Check(ToAS != AddressSpace::Loaded && FromAS != AddressSpace::Loaded,
@@ -79,10 +78,10 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
 }
 
 void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
-    if (VTy->isPointerTy()) {
+    if (VTy->isPtrOrPtrVectorTy()) {
         /* We currently don't obey this for arguments. That's ok - they're
            externally rooted. */
-        unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
+        unsigned AS = VTy->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal store of decayed value", &SI);
@@ -108,15 +107,15 @@ void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
 
 void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
     Type *Ty = LI.getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted &&
               AS != AddressSpace::Derived,
               "Illegal load of gc relevant value", &LI);
     }
     Ty = LI.getPointerOperand()->getType();
-    if (Ty->isPointerTy()) {
-        unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isPtrOrPtrVectorTy()) {
+        unsigned AS = Ty->getPointerAddressSpace();
         Check(AS != AddressSpace::CalleeRooted,
               "Illegal load of callee rooted value", &LI);
     }
@@ -130,18 +129,18 @@ void GCInvariantVerifier::visitReturnInst(ReturnInst &RI) {
     if (!RI.getReturnValue())
         return;
     Type *RTy = RI.getReturnValue()->getType();
-    if (!RTy->isPointerTy())
+    if (!RTy->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(RTy)->getAddressSpace();
+    unsigned AS = RTy->getPointerAddressSpace();
     Check(!isSpecialAS(AS) || AS == AddressSpace::Tracked,
           "Only gc tracked values may be directly returned", &RI);
 }
 
 void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     Type *Ty = GEP.getType();
-    if (!Ty->isPointerTy())
+    if (!Ty->isPtrOrPtrVectorTy())
         return;
-    unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
+    unsigned AS = Ty->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
     /* We're actually ok with GEPs here, as long as they don't feed into any
@@ -162,13 +161,18 @@ void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 void GCInvariantVerifier::visitCallInst(CallInst &CI) {
     Function *Callee = CI.getCalledFunction();
     if (Callee && (Callee->getName() == "julia.call" ||
-                   Callee->getName() == "julia.call2")) {
-        bool First = true;
+                   Callee->getName() == "julia.call2" ||
+                   Callee->getName() == "julia.call3")) {
+        unsigned Fixed = CI.getFunctionType()->getNumParams();
         for (Value *Arg : CI.args()) {
+            if (Fixed) {
+                Fixed--;
+                continue;
+            }
             Type *Ty = Arg->getType();
-            Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == (First ? 0 : AddressSpace::Tracked),
-                "Invalid derived pointer in jlcall", &CI);
-            First = false;
+            Check(Ty->isPtrOrPtrVectorTy() &&
+                      Ty->getPointerAddressSpace() == AddressSpace::Tracked,
+                  "Invalid derived pointer in jlcall", &CI);
         }
     }
 }
@@ -193,37 +197,3 @@ PreservedAnalyses GCInvariantVerifierPass::run(Function &F, FunctionAnalysisMana
     }
     return PreservedAnalyses::all();
 }
-
-struct GCInvariantVerifierLegacy : public FunctionPass {
-    static char ID;
-    bool Strong;
-    GCInvariantVerifierLegacy(bool Strong=false) : FunctionPass(ID), Strong(Strong) {}
-
-public:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.setPreservesAll();
-    }
-
-    bool runOnFunction(Function &F) override {
-        GCInvariantVerifier GIV(Strong);
-        GIV.visit(F);
-        if (GIV.Broken) {
-            abort();
-        }
-        return false;
-    }
-};
-
-char GCInvariantVerifierLegacy::ID = 0;
-static RegisterPass<GCInvariantVerifierLegacy> X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false);
-
-Pass *createGCInvariantVerifierPass(bool Strong) {
-    return new GCInvariantVerifierLegacy(Strong);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
-{
-    unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
-}
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index 8e03fe434a79c..baf844dffa89c 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -13,7 +13,6 @@
 #include <llvm/Analysis/ScalarEvolution.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/Utils/LoopUtils.h>
 
@@ -123,17 +122,6 @@ static bool makeLoopInvariant(Loop *L, Value *V, bool &Changed, Instruction *Ins
   return true; // All non-instructions are loop-invariant.
 }
 
-struct JuliaLICMPassLegacy : public LoopPass {
-    static char ID;
-    JuliaLICMPassLegacy() : LoopPass(ID) {};
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
-    protected:
-        void getAnalysisUsage(AnalysisUsage &AU) const override {
-            getLoopAnalysisUsage(AU);
-        }
-};
 struct JuliaLICM : public JuliaPassContext {
     function_ref<DominatorTree &()> GetDT;
     function_ref<LoopInfo &()> GetLI;
@@ -342,10 +330,9 @@ struct JuliaLICM : public JuliaPassContext {
                     moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
                     IRBuilder<> builder(preheader->getTerminator());
                     builder.SetCurrentDebugLocation(call->getDebugLoc());
-                    auto obj_i8 = builder.CreateBitCast(call, Type::getInt8PtrTy(call->getContext(), call->getType()->getPointerAddressSpace()));
                     // Note that this alignment is assuming the GC allocates at least pointer-aligned memory
                     auto align = Align(DL.getPointerSize(0));
-                    auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
+                    auto clear_obj = builder.CreateMemSet(call, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
                     if (MSSAU.getMemorySSA()) {
                         auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator);
                         MSSAU.insertDef(cast<MemoryDef>(clear_mdef), true);
@@ -355,37 +342,15 @@ struct JuliaLICM : public JuliaPassContext {
             }
         }
         if (changed && SE) {
-            SE->forgetLoopDispositions(L);
+            SE->forgetLoopDispositions();
         }
 #ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(*L->getHeader()->getParent(), &errs()));
+        assert(!verifyLLVMIR(*L));
 #endif
         return changed;
     }
 };
 
-bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
-    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto GetLI = [this]() -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    };
-    auto GetMSSA = []() {
-        return nullptr;
-    };
-    auto GetSE = []() {
-        return nullptr;
-    };
-    auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    return juliaLICM.runOnLoop(L, ORE);
-}
-
-char JuliaLICMPassLegacy::ID = 0;
-static RegisterPass<JuliaLICMPassLegacy>
-        Y("JuliaLICM", "LICM for julia specific intrinsics.",
-          false, false);
 } //namespace
 
 PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -417,14 +382,3 @@ PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createJuliaLICMPass()
-{
-    return new JuliaLICMPassLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createJuliaLICMPass());
-}
diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc
index 39030d60a44fc..523c9fbcd3402 100644
--- a/src/llvm-julia-passes.inc
+++ b/src/llvm-julia-passes.inc
@@ -1,27 +1,26 @@
 //Module passes
 #ifdef MODULE_PASS
-MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
-MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
-MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
-MODULE_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
-MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
-MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
-MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
-MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass())
+MODULE_PASS("CPUFeatures", CPUFeaturesPass())
+MODULE_PASS("RemoveNI", RemoveNIPass())
+MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass())
+MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass())
+MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass())
+MODULE_PASS("LowerPTLSPass", LowerPTLSPass())
 #endif
 
 //Function passes
 #ifdef FUNCTION_PASS
-FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass())
-FUNCTION_PASS("CombineMulAdd", CombineMulAddPass, CombineMulAddPass())
-FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass())
-FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass())
-FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass())
-FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass())
-FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass())
+FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass())
+FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass())
+FUNCTION_PASS("AllocOpt", AllocOptPass())
+FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass())
+FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass())
+FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass())
+FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass())
 #endif
 
 //Loop passes
 #ifdef LOOP_PASS
-LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
+LOOP_PASS("JuliaLICM", JuliaLICMPass())
+LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass())
 #endif
diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp
new file mode 100644
index 0000000000000..5539c8dbcf153
--- /dev/null
+++ b/src/llvm-late-gc-lowering-mmtk.cpp
@@ -0,0 +1,96 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "llvm-gc-interface-passes.h"
+
+Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
+{
+    assert(target->arg_size() == 3);
+
+    IRBuilder<> builder(target);
+    auto ptls = target->getArgOperand(0);
+    auto type = target->getArgOperand(2);
+    if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
+        size_t sz = (size_t)CI->getZExtValue();
+        // This is strongly architecture and OS dependent
+        int osize;
+        int offset = jl_gc_classify_pools(sz, &osize);
+        if (offset >= 0) {
+            // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc
+            // We do a slowpath/fastpath check and lower it only on the slowpath, returning
+            // the cursor and updating it in the fastpath.
+            auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
+            auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
+
+            // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
+            // Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
+            const bool INLINE_FASTPATH_ALLOCATION = true;
+
+            if (INLINE_FASTPATH_ALLOCATION) {
+                // Assuming we use the first immix allocator.
+                // FIXME: We should get the allocator index and type from MMTk.
+                auto allocator_offset = offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
+
+                auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
+                auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()),  allocator_offset + offsetof(ImmixAllocator, limit));
+
+                auto cursor_ptr = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
+                auto cursor = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, Align(sizeof(void *)), "cursor");
+
+                // offset = 8
+                auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
+                auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
+                auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
+                // alignment 16 (15 = 16 - 1)
+                auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
+                auto result = builder.CreateNSWAdd(cursor, delta, "result");
+
+                auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
+
+                auto limit_ptr = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
+                auto limit = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), limit_ptr, Align(sizeof(void *)), "limit");
+
+                auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
+
+                auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
+                auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction());
+
+                auto next_instr = target->getNextNode();
+                SmallVector<uint32_t, 2> Weights{1, 9};
+
+                MDBuilder MDB(F.getContext());
+                SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights));
+
+                builder.SetInsertPoint(next_instr);
+                auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow");
+
+                // slowpath
+                builder.SetInsertPoint(slowpath);
+                auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+                auto new_call = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
+                new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
+                builder.CreateBr(next_instr->getParent());
+
+                // fastpath
+                builder.SetInsertPoint(fastpath);
+                builder.CreateStore(new_cursor, cursor_ptr);
+
+                // ptls->gc_tls.gc_num.allocd += osize;
+                auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls_common) + offsetof(jl_gc_tls_states_common_t, gc_num));
+                auto pool_alloc_tls = builder.CreateInBoundsGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
+                auto pool_allocd = builder.CreateAlignedLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls, Align(sizeof(void *)));
+                auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
+                builder.CreateStore(pool_allocd_total, pool_alloc_tls);
+
+                auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
+                auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType());
+                builder.CreateBr(next_instr->getParent());
+
+                phiNode->addIncoming(new_call, slowpath);
+                phiNode->addIncoming(v_as_ptr, fastpath);
+                phiNode->takeName(target);
+                return phiNode;
+            }
+        }
+    }
+    return target;
+}
diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp
new file mode 100644
index 0000000000000..2a11487773396
--- /dev/null
+++ b/src/llvm-late-gc-lowering-stock.cpp
@@ -0,0 +1,9 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "llvm-gc-interface-passes.h"
+
+Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
+{
+    // Do nothing for the stock GC
+    return target;
+}
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 6d87abd68d7c2..7d6fba65a79e7 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -1,380 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/BitVector.h>
-#include <llvm/ADT/SparseBitVector.h>
-#include <llvm/ADT/PostOrderIterator.h>
-#include <llvm/ADT/SetVector.h>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/SmallSet.h>
-#include "llvm/Analysis/CFG.h"
-#include <llvm/IR/Value.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/Dominators.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/MDBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/ModuleSlotTracker.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/BasicBlockUtils.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include <llvm/InitializePasses.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "julia_assert.h"
-#include "llvm-pass-helpers.h"
-#include <map>
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
-using namespace llvm;
-
-/* Julia GC Root Placement pass. For a general overview of the design of GC
-   root lowering, see the devdocs. This file is the actual implementation.
-
-   The actual algorithm is fairly straightforward. First recall the goal of this
-   pass:
-
-   Minimize the number of needed gc roots/stores to them subject to the constraint
-   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
-   a path after this point that contains a use of this pointer) is in some gc slot.
-
-   In particular, in order to understand this algorithm, it is important to
-   realize that the only places where rootedness matters is at safepoints.
-
-   Now, the primary phases of the algorithm are:
-
-   1. Local Scan
-
-      During this step, each Basic Block is inspected and analyzed for local
-      properties. In particular, we want to determine the ordering of any of
-      the following activities:
-
-        - Any Def of a gc-tracked pointer. In general Defs are the results of
-          calls or loads from appropriate memory locations. Phi nodes and
-          selects do complicate this story slightly as described below.
-        - Any use of a gc-tracked or derived pointer. As described in the
-          devdocs, a use is in general one of
-              a) a load from a tracked/derived value
-              b) a store to a tracked/derived value
-              c) a store OF a tracked/derived value
-              d) a use of a value as a call operand (including operand bundles)
-        - Any safepoint
-
-      Crucially, we also perform pointer numbering during the local scan,
-      assigning every Def a unique integer and caching the integer for each
-      derived pointer. This allows us to operate only on the set of Defs (
-      represented by these integers) for the rest of the algorithm. We also
-      maintain some local utility information that is needed by later passes
-      (see the BBState struct for details).
-
-    2. Dataflow Computation
-
-      This computation operates entirely over the function's control flow graph
-      and does not look into a basic block. The algorithm is essentially
-      textbook iterative data flow for liveness computation. However, the
-      data flow equations are slightly more complicated because we also
-      forward propagate rootedness information in addition to backpropagating
-      liveness.
-
-    3. Live Set Computation
-
-      With the liveness information from the previous step, we can now compute,
-      for every safepoint, the set of values live at that particular safepoint.
-      There are three pieces of information being combined here:
-           i. Values that needed to be live due to local analysis (e.g. there
-              was a def, then a safepoint, then a use). This was computed during
-              local analysis.
-          ii. Values that are live across the basic block (i.e. they are live
-              at every safepoint within the basic block). This relies entirely
-              on the liveness information.
-         iii. Values that are now live-out from the basic block (i.e. they are
-              live at every safepoint following their def). During local
-              analysis, we keep, for every safepoint, those values that would
-              be live if they were live out. Here we can check if they are
-              actually live-out and make the appropriate additions to the live
-              set.
-
-       Lastly, we also explicitly compute, for each value, the list of values
-       that are simultaneously live at some safepoint. This is known as an
-       "interference graph" and is the input to the next step.
-
-    4. GC Root coloring
-
-      Two values which are not simultaneously live at a safepoint can share the
-      same slot. This is an important optimization, because otherwise long
-      functions would have exceptionally large GC slots, reducing performance
-      and bloating the size of the stack. Assigning values to these slots is
-      equivalent to doing graph coloring on the interference graph - the graph
-      where nodes are values and two values have an edge if they are
-      simultaneously live at a safepoint - which we computed in the previous
-      step. Now graph coloring in general is a hard problem. However, for SSA
-      form programs, (and most programs in general, by virtue of their
-      structure), the resulting interference graphs are chordal and can be
-      colored optimally in linear time by performing greedy coloring in a
-      perfect elimination order. Now, our interference graphs are likely not
-      entirely chordal due to some non-SSA corner cases. However, using the same
-      algorithm should still give a very good coloring while having sufficiently
-      low runtime.
-
-    5. JLCall frame optimizations
-
-      Unlike earlier iterations of the gc root placement logic, jlcall frames
-      are no longer treated as a special case and need not necessarily be sunk
-      into the gc frame. Additionally, we now emit lifetime
-      intrinsics, so regular stack slot coloring will merge any jlcall frames
-      not sunk into the gc frame. Nevertheless performing such sinking can still
-      be profitable. Since all arguments to a jlcall are guaranteed to be live
-      at that call in some gc slot, we can attempt to rearrange the slots within
-      the gc-frame, or re-use slots not assigned at that particular location
-      for the gcframe. However, even without this optimization, stack frames
-      are at most two times larger than optimal (because regular stack coloring
-      can merge the jlcall allocas).
-
-      N.B.: This step is not yet implemented.
-
-    6. Root placement
-
-      This performs the actual insertion of the GCFrame pushes/pops, zeros out
-      the gc frame and creates the stores to the gc frame according to the
-      stack slot assignment computed in the previous step. GC frames stores
-      are generally sunk right before the first safe point that use them
-      (this is beneficial for code where the primary path does not have
-      safepoints, but some other path - e.g. the error path does). However,
-      if the first safepoint is not dominated by the definition (this can
-      happen due to the non-ssa corner cases), the store is inserted right after
-      the definition.
-
-    7. Cleanup
-
-      This step performs necessary cleanup before passing the IR to codegen. In
-      particular, it removes any calls to julia_from_objref intrinsics and
-      removes the extra operand bundles from ccalls. In the future it could
-      also strip the addrspace information from all values as this
-      information is no longer needed.
-
-
-  There are a couple important special cases that deserve special attention:
-
-    A. PHIs and Selects
-
-      In general PHIs and selects are treated as separate defs for the purposes
-      of the algorithm and their operands as uses of those values. It is
-      important to consider however WHERE the uses of PHI's operands are
-      located. It is neither at the start of the basic block, because the values
-      do not dominate the block (so can't really consider them live-in), nor
-      at the end of the predecessor (because they are actually live out).
-      Instead it is best to think of those uses as living on the edge between
-      the appropriate predecessor and the block containing the PHI.
-
-      Another concern is PHIs of derived values. Since we cannot simply root
-      these values by storing them to a GC slot, we need to insert a new,
-      artificial PHI that tracks the base pointers for the derived values. E.g.
-      in:
-
-      A:
-        %Abase = load addrspace(10) *...
-        %Aderived = addrspacecast %Abase to addrspace(11)
-      B:
-        %Bbase = load addrspace(10) *...
-        %Bderived = addrspacecast %Bbase to addrspace(11)
-      C:
-        %phi = phi [%Aderived, %A
-                    %Bderived, %B]
-
-      we will insert another phi in C to track the relevant base pointers:
-
-        %philift = phi [%Abase, %A
-                        %Bbase, %B]
-
-      We then pretend, for the purposes of numbering that %phi was derived from
-      %philift. Note that in order to be able to do this, we need to be able to
-      perform this lifting either during numbering or instruction scanning.
-
-    B. Vectors of pointers/Union representations
-
-      Since this pass runs very late in the pass pipeline, it runs after the
-      various vectorization passes. As a result, we have to potentially deal
-      with vectors of gc-tracked pointers. For the purposes of most of the
-      algorithm, we simply assign every element of the vector a separate number
-      and no changes are needed. However, those parts of the algorithm that
-      look at IR need to be aware of the possibility of encountering vectors of
-      pointers.
-
-      Similarly, unions (e.g. in call returns) are represented as a struct of
-      a gc-tracked value and an argument selector. We simply assign a single
-      number to this struct and proceed as if it was a single pointer. However,
-      this again requires care at the IR level.
-
-    C. Non mem2reg'd allocas
-
-      Under some circumstances, allocas will still be present in the IR when
-      we get to this pass. We don't try very hard to handle this case, and
-      simply sink the alloca into the GCFrame.
-*/
-
-// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
-// substantially smaller here doesn't actually save much memory because of malloc overhead.
-// Too large is bad also though - 4096 was found to be a reasonable middle ground.
-using LargeSparseBitVector = SparseBitVector<4096>;
-
-struct BBState {
-    // Uses in this BB
-    // These do not get updated after local analysis
-    LargeSparseBitVector Defs;
-    LargeSparseBitVector PhiOuts;
-    LargeSparseBitVector UpExposedUses;
-    // These get updated during dataflow
-    LargeSparseBitVector LiveIn;
-    LargeSparseBitVector LiveOut;
-    std::vector<int> Safepoints;
-    int TopmostSafepoint = -1;
-    bool HasSafepoint = false;
-    // Have we gone through this basic block in our local scan yet?
-    bool Done = false;
-};
-
-struct State {
-    Function *const F;
-    DominatorTree *DT;
-
-    // The maximum assigned value number
-    int MaxPtrNumber;
-    // The maximum assigned safepoint number
-    int MaxSafepointNumber;
-    // Cache of numbers assigned to IR values. This includes caching of numbers
-    // for derived values
-    std::map<Value *, int> AllPtrNumbering;
-    std::map<Value *, std::vector<int>> AllCompositeNumbering;
-    // The reverse of the previous maps
-    std::map<int, Value *> ReversePtrNumbering;
-    // Neighbors in the coloring interference graph. I.e. for each value, the
-    // indices of other values that are used simultaneously at some safe point.
-    std::vector<LargeSparseBitVector> Neighbors;
-    // The result of the local analysis
-    std::map<const BasicBlock *, BBState> BBStates;
-
-    // Refinement map. If all of the values are rooted
-    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
-    // the key is already rooted (but not the other way around).
-    // A value that can be refined to -2 never need any rooting or write barrier.
-    // A value that can be refined to -1 don't need local root but still need write barrier.
-    // At the end of `LocalScan` this map has a few properties
-    // 1. Values are either < 0 or dominates the key
-    // 2. Therefore this is a DAG
-    std::map<int, SmallVector<int, 1>> Refinements;
-
-    // GC preserves map. All safepoints dominated by the map key, but not any
-    // of its uses need to preserve the values listed in the map value.
-    std::map<Instruction *, std::vector<int>> GCPreserves;
-
-    // The assignment of numbers to safepoints. The indices in the map
-    // are indices into the next three maps which store safepoint properties
-    std::map<Instruction *, int> SafepointNumbering;
-
-    // Reverse mapping index -> safepoint
-    std::vector<Instruction *> ReverseSafepointNumbering;
-
-    // Instructions that can return twice. For now, all values live at these
-    // instructions will get their own, dedicated GC frame slots, because they
-    // have unobservable control flow, so we can't be sure where they're
-    // actually live. All of these are also considered safepoints.
-    std::vector<Instruction *> ReturnsTwice;
-
-    // The set of values live at a particular safepoint
-    std::vector< LargeSparseBitVector > LiveSets;
-    // Those values that - if live out from our parent basic block - are live
-    // at this safepoint.
-    std::vector<std::vector<int>> LiveIfLiveOut;
-    // The set of values that are kept alive by the callee.
-    std::vector<std::vector<int>> CalleeRoots;
-    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
-    // they just get directly sunk into the root array.
-    std::vector<AllocaInst *> Allocas;
-    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
-    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
-    std::vector<std::pair<StoreInst *, unsigned>> TrackedStores;
-    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
-};
-
-
-
-struct LateLowerGCFrameLegacy: public FunctionPass {
-    static char ID;
-    LateLowerGCFrameLegacy() : FunctionPass(ID) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
-struct LateLowerGCFrame:  private JuliaPassContext {
-    function_ref<DominatorTree &()> GetDT;
-    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
-
-public:
-    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
-
-private:
-    CallInst *pgcstack;
-
-    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
-    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
-    void NoteUse(State &S, BBState &BBS, Value *V) {
-        NoteUse(S, BBS, V, BBS.UpExposedUses);
-    }
-
-    void LiftPhi(State &S, PHINode *Phi);
-    void LiftSelect(State &S, SelectInst *SI);
-    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
-    std::vector<Value*> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
-    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
-
-    int Number(State &S, Value *V);
-    int NumberBase(State &S, Value *Base);
-    std::vector<int> NumberAll(State &S, Value *V);
-    std::vector<int> NumberAllBase(State &S, Value *Base);
-
-    void NoteOperandUses(State &S, BBState &BBS, User &UI);
-    void MaybeTrackDst(State &S, MemTransferInst *MI);
-    void MaybeTrackStore(State &S, StoreInst *I);
-    State LocalScan(Function &F);
-    void ComputeLiveness(State &S);
-    void ComputeLiveSets(State &S);
-    std::vector<int> ColorRoots(const State &S);
-    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
-    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
-    void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
-    bool CleanupIR(Function &F, State *S, bool *CFGModified);
-    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
-    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
-    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
-    void RefineLiveSet(LargeSparseBitVector &LS, State &S, const std::vector<int> &CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
-};
-
 static unsigned getValueAddrSpace(Value *V) {
     return V->getType()->getPointerAddressSpace();
 }
@@ -394,16 +23,18 @@ static bool isSpecialPtr(Type *Ty) {
 
 // return how many Special pointers are in T (count > 0),
 // and if there is anything else in T (all == false)
-CountTrackedPointers::CountTrackedPointers(Type *T) {
+CountTrackedPointers::CountTrackedPointers(Type *T, bool ignore_loaded) {
     if (isa<PointerType>(T)) {
         if (isSpecialPtr(T)) {
+            if (ignore_loaded && T->getPointerAddressSpace() == AddressSpace::Loaded)
+                return;
             count++;
             if (T->getPointerAddressSpace() != AddressSpace::Tracked)
                 derived = true;
         }
     } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         for (Type *ElT : T->subtypes()) {
-            auto sub = CountTrackedPointers(ElT);
+            auto sub = CountTrackedPointers(ElT, ignore_loaded);
             count += sub.count;
             all &= sub.all;
             derived |= sub.derived;
@@ -419,6 +50,20 @@ CountTrackedPointers::CountTrackedPointers(Type *T) {
         all = false;
 }
 
+bool hasLoadedTy(Type *T) {
+    if (isa<PointerType>(T)) {
+        if (T->getPointerAddressSpace() == AddressSpace::Loaded)
+            return true;
+    } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
+        for (Type *ElT : T->subtypes()) {
+            if (hasLoadedTy(ElT))
+                return true;
+        }
+    }
+    return false;
+}
+
+
 unsigned getCompositeNumElements(Type *T) {
     if (auto *ST = dyn_cast<StructType>(T))
         return ST->getNumElements();
@@ -431,7 +76,7 @@ unsigned getCompositeNumElements(Type *T) {
 }
 
 // Walk through a Type, and record the element path to every tracked value inside
-void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
+void TrackCompositeType(Type *T, SmallVector<unsigned, 0> &Idxs, SmallVector<SmallVector<unsigned, 0>, 0> &Numberings) {
     if (isa<PointerType>(T)) {
         if (isSpecialPtr(T))
             Numberings.push_back(Idxs);
@@ -447,15 +92,14 @@ void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::v
     }
 }
 
-std::vector<std::vector<unsigned>> TrackCompositeType(Type *T) {
-    std::vector<unsigned> Idxs;
-    std::vector<std::vector<unsigned>> Numberings;
+SmallVector<SmallVector<unsigned, 0>, 0> TrackCompositeType(Type *T) {
+    SmallVector<unsigned, 0> Idxs;
+    SmallVector<SmallVector<unsigned, 0>, 0> Numberings;
     TrackCompositeType(T, Idxs, Numberings);
     return Numberings;
 }
 
 
-
 // Walk through simple expressions to until we hit something that requires root numbering
 // If the input value is a scalar (pointer), we may return a composite value as base
 // in which case the second member of the pair is the index of the value in the vector.
@@ -500,18 +144,19 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             CurrentV = EEI->getVectorOperand();
         }
         else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
-            if (auto PtrT = dyn_cast<PointerType>(LI->getType()->getScalarType())) {
-                if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                    CurrentV = LI->getPointerOperand();
-                    fld_idx = -1;
-                    if (!isSpecialPtr(CurrentV->getType())) {
-                        // This could really be anything, but it's not loaded
-                        // from a tracked pointer, so it doesn't matter what
-                        // it is--just pick something simple.
-                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                    }
-                    continue;
+            if (hasLoadedTy(LI->getType())) {
+                // This is the old (now deprecated) implementation for loaded.
+                // New code should use the gc_loaded intrinsic to ensure that
+                // the load is paired with the correct Tracked value.
+                CurrentV = LI->getPointerOperand();
+                fld_idx = -1;
+                if (!isSpecialPtr(CurrentV->getType())) {
+                    // This could really be anything, but it's not loaded
+                    // from a tracked pointer, so it doesn't matter what
+                    // it is--just pick something simple.
+                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                 }
+                continue;
             }
             // In general a load terminates a walk
             break;
@@ -533,36 +178,42 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             if (II->getIntrinsicID() == Intrinsic::masked_load ||
                 II->getIntrinsicID() == Intrinsic::masked_gather) {
                 if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                    if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                        if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                            Value *Mask = II->getOperand(2);
-                            Value *Passthrough = II->getOperand(3);
-                            if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
-                                assert(isa<UndefValue>(Passthrough) && "unimplemented");
-                                (void)Passthrough;
+                    if (hasLoadedTy(VTy->getElementType())) {
+                        Value *Mask = II->getOperand(2);
+                        Value *Passthrough = II->getOperand(3);
+                        if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
+                            assert(isa<UndefValue>(Passthrough) && "unimplemented");
+                            (void)Passthrough;
+                        }
+                        CurrentV = II->getOperand(0);
+                        if (II->getIntrinsicID() == Intrinsic::masked_load) {
+                            fld_idx = -1;
+                            if (!isSpecialPtr(CurrentV->getType())) {
+                                CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
                             }
-                            CurrentV = II->getOperand(0);
-                            if (II->getIntrinsicID() == Intrinsic::masked_load) {
-                                fld_idx = -1;
-                                if (!isSpecialPtr(CurrentV->getType())) {
-                                    CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                }
-                            } else {
-                                if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
-                                    if (!isSpecialPtr(VTy2->getElementType())) {
-                                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                        fld_idx = -1;
-                                    }
+                        } else {
+                            if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
+                                if (!isSpecialPtr(VTy2->getElementType())) {
+                                    CurrentV = ConstantPointerNull::get(PointerType::get(V->getContext(), 0));
+                                    fld_idx = -1;
                                 }
                             }
-                            continue;
                         }
+                        continue;
                     }
                 }
                 // In general a load terminates a walk
                 break;
             }
         }
+        else if (auto CI = dyn_cast<CallInst>(CurrentV)) {
+            auto callee = CI->getCalledFunction();
+            if (callee && callee->getName() == "julia.gc_loaded") {
+                CurrentV = CI->getArgOperand(0);
+                continue;
+            }
+            break;
+        }
         else {
             break;
         }
@@ -593,7 +244,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
     }
     else if (ValExpr.second != -1) {
         auto Tracked = TrackCompositeType(V->getType());
-        auto Idxs = makeArrayRef(Tracked.at(ValExpr.second));
+        auto Idxs = ArrayRef<unsigned>(Tracked[ValExpr.second]);
         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
@@ -602,26 +253,26 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
         if (T->getAddressSpace() != AddressSpace::Tracked) {
             // if V isn't tracked, get the shadow def
             auto Numbers = NumberAllBase(S, V);
-            int BaseNumber = Numbers.at(ValExpr.second);
+            int BaseNumber = Numbers[ValExpr.second];
             if (BaseNumber >= 0)
                 V = GetPtrForNumber(S, BaseNumber, InsertBefore);
             else
                 V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
             return V;
         }
+        IRBuilder<InstSimplifyFolder> foldbuilder(InsertBefore->getContext(), InstSimplifyFolder(InsertBefore->getModule()->getDataLayout()));
+        foldbuilder.SetInsertPoint(InsertBefore);
         if (Idxs.size() > IsVector)
-            V = ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs, "", InsertBefore);
+            V = foldbuilder.CreateExtractValue(V, IsVector ? IdxsNotVec : Idxs);
         if (IsVector)
-            V = ExtractElementInst::Create(V,
-                    ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()),
-                    "", InsertBefore);
+            V = foldbuilder.CreateExtractElement(V, ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()));
     }
     return V;
 }
 
-std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
+SmallVector<Value*, 0> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
     auto Numbers = NumberAllBase(S, BaseVec);
-    std::vector<Value*> V{Numbers.size()};
+    SmallVector<Value*, 0> V{Numbers.size()};
     Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     for (unsigned i = 0; i < V.size(); ++i) {
         if (Numbers[i] >= 0) // ignores undef and poison values
@@ -637,7 +288,7 @@ Value *LateLowerGCFrame::GetPtrForNumber(State &S, unsigned Num, Instruction *In
     Value *Val = S.ReversePtrNumbering[Num];
     unsigned Idx = -1;
     if (!isa<PointerType>(Val->getType())) {
-        const std::vector<int> &AllNums = S.AllCompositeNumbering[Val];
+        const SmallVector<int, 0> &AllNums = S.AllCompositeNumbering[Val];
         for (Idx = 0; Idx < AllNums.size(); ++Idx) {
             if ((unsigned)AllNums[Idx] == Num)
                 break;
@@ -654,20 +305,17 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         // already visited here--nothing to do
         return;
     }
-    std::vector<int> Numbers;
-    unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
-        ElementCount EC = VTy->getElementCount();
-        Numbers.resize(EC.getKnownMinValue(), -1);
-    }
-    else
-        assert(isa<PointerType>(SI->getType()) && "unimplemented");
     assert(!isTrackedValue(SI));
+    SmallVector<int, 0> Numbers;
+    unsigned NumRoots = 1;
+    Type *STy = SI->getType();
+    if (!isa<PointerType>(STy))
+        Numbers.resize(CountTrackedPointers(STy).count, -1);
     // find the base root for the arguments
     Value *TrueBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getTrueValue(), false), SI);
     Value *FalseBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getFalseValue(), false), SI);
-    std::vector<Value*> TrueBases;
-    std::vector<Value*> FalseBases;
+    SmallVector<Value*, 0> TrueBases;
+    SmallVector<Value*, 0> FalseBases;
     if (!isa<PointerType>(TrueBase->getType())) {
         TrueBases = MaybeExtractVector(S, TrueBase, SI);
         assert(TrueBases.size() == Numbers.size());
@@ -702,11 +350,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
                     "", SI);
         }
-        if (FalseElem->getType() != TrueElem->getType()) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(FalseElem->getContext().supportsTypedPointers());
-            FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
-        }
+        assert(FalseElem->getType() == TrueElem->getType());
         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[SelectBase] = Number;
@@ -737,22 +381,19 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
         return;
     // need to handle each element (may just be one scalar)
     SmallVector<PHINode *, 2> lifted;
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
-        NumRoots = VTy->getNumElements();
+    Type *PTy = Phi->getType();
+    if (!isa<PointerType>(PTy)) {
+        NumRoots = CountTrackedPointers(PTy).count;
         Numbers.resize(NumRoots);
     }
-    else {
-        // TODO: SVE
-        assert(isa<PointerType>(Phi->getType()) && "unimplemented");
-    }
     for (unsigned i = 0; i < NumRoots; ++i) {
         PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[lift] = Number;
         S.ReversePtrNumbering[Number] = lift;
-        if (!isa<VectorType>(Phi->getType()))
+        if (isa<PointerType>(PTy))
             S.AllPtrNumbering[Phi] = Number;
         else
             Numbers[i] = Number;
@@ -766,7 +407,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
         BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
         Instruction *Terminator = IncomingBB->getTerminator();
         Value *Base = MaybeExtractScalar(S, FindBaseValue(S, Incoming, false), Terminator);
-        std::vector<Value*> IncomingBases;
+        SmallVector<Value*, 0> IncomingBases;
         if (!isa<PointerType>(Base->getType())) {
             IncomingBases = MaybeExtractVector(S, Base, Terminator);
             assert(IncomingBases.size() == NumRoots);
@@ -778,29 +419,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
                 BaseElem = Base;
             else
                 BaseElem = IncomingBases[i];
-            if (BaseElem->getType() != T_prjlvalue) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(BaseElem->getContext().supportsTypedPointers());
-                auto &remap = CastedRoots[i][BaseElem];
-                if (!remap) {
-                    if (auto constant = dyn_cast<Constant>(BaseElem)) {
-                        remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
-                    } else {
-                        Instruction *InsertBefore;
-                        if (auto arg = dyn_cast<Argument>(BaseElem)) {
-                            InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
-                        } else {
-                            assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
-                            InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
-                        }
-                        while (isa<PHINode>(InsertBefore)) {
-                            InsertBefore = InsertBefore->getNextNonDebugInstruction();
-                        }
-                        remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
-                    }
-                }
-                BaseElem = remap;
-            }
+            assert(BaseElem->getType() == T_prjlvalue);
             lift->addIncoming(BaseElem, IncomingBB);
         }
     }
@@ -826,11 +445,11 @@ int LateLowerGCFrame::NumberBase(State &S, Value *CurrentV)
         Number = -1;
     } else if (isa<SelectInst>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftSelect(S, cast<SelectInst>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<PHINode>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftPhi(S, cast<PHINode>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<ExtractValueInst>(CurrentV)) {
         auto Numbers = NumberAllBase(S, CurrentV);
@@ -853,7 +472,7 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
         Number = NumberBase(S, CurrentV.first);
     } else {
         auto Numbers = NumberAllBase(S, CurrentV.first);
-        Number = Numbers.at(CurrentV.second);
+        Number = Numbers[CurrentV.second];
     }
     if (V != CurrentV.first)
         S.AllPtrNumbering[V] = Number;
@@ -861,18 +480,18 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
 }
 
 // assign pointer numbers to a def instruction
-std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
     if (isa<PointerType>(CurrentV->getType())) {
         auto it = S.AllPtrNumbering.find(CurrentV);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(CurrentV);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
 
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(CurrentV->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -881,16 +500,16 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers.resize(tracked.count, -1);
     }
     else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
-        std::vector<int> Numbers1 = NumberAll(S, SVI->getOperand(0));
-        std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
+        SmallVector<int, 0> Numbers1 = NumberAll(S, SVI->getOperand(0));
+        SmallVector<int, 0> Numbers2 = NumberAll(S, SVI->getOperand(1));
         auto Mask = SVI->getShuffleMask();
         for (auto idx : Mask) {
             if (idx == -1) {
                 Numbers.push_back(-1);
             } else if ((unsigned)idx < Numbers1.size()) {
-                Numbers.push_back(Numbers1.at(idx));
+                Numbers.push_back(Numbers1[idx]);
             } else {
-                Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
+                Numbers.push_back(Numbers2[idx - Numbers1.size()]);
             }
         }
     } else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
@@ -903,11 +522,11 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers = NumberAll(S, IVI->getAggregateOperand());
         auto Tracked = TrackCompositeType(IVI->getType());
         assert(Tracked.size() == Numbers.size());
-        std::vector<int> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
+        SmallVector<int, 0> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
         auto Idxs = IVI->getIndices();
         unsigned j = 0;
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -920,7 +539,7 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         assert(Tracked.size() == BaseNumbers.size());
         auto Idxs = EVI->getIndices();
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -938,10 +557,10 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
             llvm_unreachable("Unexpected generating operation for derived values");
         }
         if (isa<PointerType>(CurrentV->getType())) {
-            auto Number = S.AllPtrNumbering.at(CurrentV);
+            auto Number = S.AllPtrNumbering[CurrentV];
             Numbers.resize(1, Number);
         } else {
-            Numbers = S.AllCompositeNumbering.at(CurrentV);
+            Numbers = S.AllCompositeNumbering[CurrentV];
         }
     } else {
         assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
@@ -964,17 +583,17 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
 }
 
 // gets the pointer number for every gc tracked value inside V
-std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAll(State &S, Value *V) {
     if (isa<PointerType>(V->getType())) {
         auto it = S.AllPtrNumbering.find(V);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(V);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(V->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -1025,7 +644,7 @@ static bool HasBitSet(const BitVector &BV, unsigned Bit) {
     return Bit < BV.size() && BV[Bit];
 }
 
-static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
+static void NoteDef(State &S, BBState &BBS, int Num, const ArrayRef<int> &SafepointsSoFar) {
     assert(Num >= 0);
     MaybeResize(BBS, Num);
     assert(!BBS.Defs.test(Num) && "SSA Violation or misnumbering?");
@@ -1039,7 +658,9 @@ static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &Saf
     }
 }
 
-void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
+void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def,
+                                    const ArrayRef<int> &SafepointsSoFar,
+                                    SmallVector<int, 1> &&RefinedPtr) {
     Type *RT = Def->getType();
     if (isa<PointerType>(RT)) {
         if (!isSpecialPtr(RT))
@@ -1051,7 +672,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
             S.Refinements[Num] = std::move(RefinedPtr);
     }
     else {
-        std::vector<int> Nums = NumberAll(S, Def);
+        SmallVector<int, 0> Nums = NumberAll(S, Def);
         for (int Num : Nums) {
             NoteDef(S, BBS, Num, SafepointsSoFar);
             if (!RefinedPtr.empty())
@@ -1060,7 +681,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
     }
 }
 
-static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
+static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, SmallVectorImpl<int> &CalleeRoots) {
     int Number = ++S.MaxSafepointNumber;
     S.SafepointNumbering[CI] = Number;
     S.ReverseSafepointNumbering.push_back(CI);
@@ -1069,13 +690,20 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int>
     // in this BB (i.e. even when they don't participate in the dataflow
     // computation)
     S.LiveSets.push_back(BBS.UpExposedUses);
-    S.LiveIfLiveOut.push_back(std::vector<int>{});
+    S.LiveIfLiveOut.push_back(SmallVector<int, 0>{});
     S.CalleeRoots.push_back(std::move(CalleeRoots));
     return Number;
 }
 
-void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses) {
+void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F) {
     // Short circuit to avoid having to deal with vectors of constants, etc.
+//#ifndef NDEBUG
+//    if (isa<PointerType>(V->getType())) {
+//        if (isSpecialPtr(V->getType()))
+//            if (isa<UndefValue>(V) && !isa<PoisonValue>(V))
+//                F.dump();
+//    }
+//#endif
     if (isa<Constant>(V))
         return;
     if (isa<PointerType>(V->getType())) {
@@ -1087,7 +715,7 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitV
             Uses.set(Num);
         }
     } else {
-        std::vector<int> Nums = NumberAll(S, V);
+        SmallVector<int, 0> Nums = NumberAll(S, V);
         for (int Num : Nums) {
             if (Num < 0)
                 continue;
@@ -1097,9 +725,9 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitV
     }
 }
 
-void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) {
+void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, Instruction &UI) {
     for (Use &U : UI.operands()) {
-        NoteUse(S, BBS, U);
+        NoteUse(S, BBS, U, *UI.getFunction());
     }
 }
 
@@ -1190,7 +818,7 @@ static bool isLoadFromImmut(LoadInst *LI)
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
         return true;
     MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
-    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"}))
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
         return true;
     return false;
 }
@@ -1245,6 +873,10 @@ static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr
             task_local = true;
             return true;
         }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0), task_local, seen) &&
+                   isLoadFromConstGV(call->getArgOperand(1), task_local, seen);
+        }
     }
     if (isa<Argument>(v)) {
         task_local = true;
@@ -1269,8 +901,7 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
     auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
     assert(load_base); // Static analyzer
     auto gv = dyn_cast<GlobalVariable>(load_base);
-    if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
-               {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
+    if (isLoadFromImmut(LI)) {
         if (gv)
             return true;
         return isLoadFromConstGV(load_base, task_local, seen);
@@ -1505,20 +1136,18 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (II->getIntrinsicID() == Intrinsic::masked_load ||
                         II->getIntrinsicID() == Intrinsic::masked_gather) {
                         if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                            if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                                if (isSpecialPtr(PtrT)) {
-                                    // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
-                                    // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
-                                    Value *passthru = II->getArgOperand(3);
-                                    if (isa<UndefValue>(passthru)) {
-                                        II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
-                                    }
-                                    if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                                        // These are not real defs
-                                        continue;
-                                    }
+                            if (CountTrackedPointers(VTy->getElementType()).count) {
+                                // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
+                                // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
+                                Value *passthru = II->getArgOperand(3);
+                                if (isa<UndefValue>(passthru)) {
+                                    II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
                                 }
                             }
+                            if (hasLoadedTy(VTy->getElementType())) {
+                                // These are not real defs
+                                continue;
+                            }
                         }
                     }
                 }
@@ -1526,13 +1155,15 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 if (callee && callee == typeof_func) {
                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints, SmallVector<int, 1>{-2});
                 }
+                else if (callee && callee->getName() == "julia.gc_loaded") {
+                    continue;
+                }
                 else {
                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
                 }
                 if (CI->hasStructRetAttr()) {
                     Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
-                    assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
-                    auto tracked = CountTrackedPointers(ElT);
+                    auto tracked = CountTrackedPointers(ElT, true);
                     if (tracked.count) {
                         AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
                         assert(SRet);
@@ -1585,7 +1216,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 }
                 if (callee) {
                     if (callee == gc_preserve_begin_func) {
-                        std::vector<int> args;
+                        SmallVector<int, 0> args;
                         for (Use &U : CI->args()) {
                             Value *V = U;
                             if (isa<Constant>(V))
@@ -1597,7 +1228,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                                         args.push_back(Num);
                                 }
                             } else {
-                                std::vector<int> Nums = NumberAll(S, V);
+                                SmallVector<int, 0> Nums = NumberAll(S, V);
                                 for (int Num : Nums) {
                                     if (Num < 0)
                                         continue;
@@ -1613,25 +1244,26 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         callee == gc_preserve_end_func || callee == typeof_func ||
                         callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
                         callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
-                        callee == write_barrier_func ||
+                        callee->getName() == XSTR(jl_lock_field) || callee->getName() == XSTR(jl_unlock_field) ||
+                        callee == write_barrier_func || callee == gc_loaded_func || callee == pop_handler_noexcept_func ||
                         callee->getName() == "memcmp") {
                         continue;
                     }
-                    if (callee->hasFnAttribute(Attribute::ReadNone) ||
-                        callee->hasFnAttribute(Attribute::ReadOnly) ||
-                        callee->hasFnAttribute(Attribute::ArgMemOnly)) {
+                    if (callee->getMemoryEffects().onlyReadsMemory() ||
+                        callee->getMemoryEffects().onlyAccessesArgPointees()) {
                         continue;
                     }
                     if (MemTransferInst *MI = dyn_cast<MemTransferInst>(CI)) {
                         MaybeTrackDst(S, MI);
                     }
                 }
-                if (isa<IntrinsicInst>(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) ||
-                    CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) {
+                if (isa<IntrinsicInst>(CI) ||
+                    CI->getMemoryEffects().onlyAccessesArgPointees() ||
+                    CI->getMemoryEffects().onlyReadsMemory()) {
                     // Intrinsics are never safepoints.
                     continue;
                 }
-                std::vector<int> CalleeRoots;
+                SmallVector<int, 0> CalleeRoots;
                 for (Use &U : CI->args()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
@@ -1649,7 +1281,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         continue;
                     CalleeRoots.push_back(Num);
                 }
-                int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
+                int SafepointNumber = NoteSafepoint(S, BBS, CI, CalleeRoots);
                 BBS.HasSafepoint = true;
                 BBS.TopmostSafepoint = SafepointNumber;
                 BBS.Safepoints.push_back(SafepointNumber);
@@ -1677,9 +1309,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // task but we do need to issue write barriers for when the current task dies.
                     RefinedPtr.push_back(task_local ? -1 : -2);
                 }
-                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                if (!hasLoadedTy(Ty))
                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
-                }
                 NoteOperandUses(S, BBS, I);
             } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
                 Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
@@ -1725,14 +1356,14 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (isa<PointerType>(Phi->getType())) {
                         PHINumbers.push_back(Number(S, Phi));
                     } else {
-                        std::vector<int> Nums = NumberAll(S, Phi);
+                        SmallVector<int, 0> Nums = NumberAll(S, Phi);
                         for (int Num : Nums)
                             PHINumbers.push_back(Num);
                     }
                     unsigned nIncoming = Phi->getNumIncomingValues();
                     for (unsigned i = 0; i < nIncoming; ++i) {
                         BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)];
-                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts);
+                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts, F);
                     }
                 } else if (tracked.count) {
                     // We need to insert extra phis for the GC roots
@@ -1758,7 +1389,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
             } else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
                 Type *ElT = AI->getAllocatedType();
                 if (AI->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) {
-                    S.Allocas.push_back(AI);
+                    S.ArrayAllocas[AI] = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
                 }
             }
         }
@@ -1773,7 +1404,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
 static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
     Type *T_int32 = Type::getInt32Ty(V->getContext());
     if (isptr) {
-        std::vector<Value*> IdxList{Idxs.size() + 1};
+        SmallVector<Value*, 0> IdxList{Idxs.size() + 1};
         IdxList[0] = ConstantInt::get(T_int32, 0);
         for (unsigned j = 0; j < Idxs.size(); ++j) {
             IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
@@ -1792,11 +1423,13 @@ static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned>
         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
+        IRBuilder<InstSimplifyFolder> foldbuilder(irbuilder.getContext(), InstSimplifyFolder(irbuilder.GetInsertBlock()->getModule()->getDataLayout()));
+        foldbuilder.restoreIP(irbuilder.saveIP());
+        foldbuilder.SetCurrentDebugLocation(irbuilder.getCurrentDebugLocation());
         if (Idxs.size() > IsVector)
-            V = irbuilder.Insert(ExtractValueInst::Create(V, IsVector ? IdxsNotVec : Idxs));
+            V = foldbuilder.CreateExtractValue(V, IsVector ? IdxsNotVec : Idxs);
         if (IsVector)
-            V = irbuilder.Insert(ExtractElementInst::Create(V,
-                    ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back())));
+            V = foldbuilder.CreateExtractElement(V, ConstantInt::get(Type::getInt32Ty(V->getContext()), Idxs.back()));
     }
     return V;
 }
@@ -1813,9 +1446,9 @@ static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigne
     return (unsigned)offset;
 }
 
-std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
+SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
     auto Tracked = TrackCompositeType(STy);
-    std::vector<Value*> Ptrs;
+    SmallVector<Value*, 0> Ptrs;
     unsigned perm_idx = 0;
     auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
         if (perm_idx >= perm_offsets.size())
@@ -1837,28 +1470,27 @@ std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBu
         return false;
     };
     for (unsigned i = 0; i < Tracked.size(); ++i) {
-        auto Idxs = makeArrayRef(Tracked[i]);
+        auto Idxs = ArrayRef<unsigned>(Tracked[i]);
         if (ignore_field(Idxs))
             continue;
         Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
-        Ptrs.push_back(Elem);
+        if (isTrackedValue(Elem)) // ignore addrspace Loaded when it appears
+            Ptrs.push_back(Elem);
     }
     return Ptrs;
 }
 
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
-    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
-    for (unsigned i = 0; i < Ptrs.size(); ++i) {
-        Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
-        Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
-        assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
-        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
-        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
-        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    }
-    return Ptrs.size();
-}
-
+//static unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
+//    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
+//    for (unsigned i = 0; i < Ptrs.size(); ++i) {
+//        Value *Elem = Ptrs[i];
+//        Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*));
+//        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
+//        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
+//        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+//    }
+//    return Ptrs.size();
+//}
 
 // turn a memcpy into a set of loads
 void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
@@ -2004,7 +1636,7 @@ static bool IsIndirectlyRooted(const State &S, LargeSparseBitVector &Visited, La
     return rooted;
 }
 
-void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, const std::vector<int> &CalleeRoots)
+void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots)
 {
     // It is possible that a value is not directly rooted by the refinements in the live set, but rather
     // indirectly by following the edges of the refinement graph to all the values that root it.
@@ -2104,12 +1736,12 @@ struct PEOIterator {
         unsigned weight;
         unsigned pos;
     };
-    std::vector<Element> Elements;
-    std::vector<std::vector<int>> Levels;
-    const std::vector<LargeSparseBitVector> &Neighbors;
-    PEOIterator(const std::vector<LargeSparseBitVector> &Neighbors) : Neighbors(Neighbors) {
+    SmallVector<Element, 0> Elements;
+    SmallVector<SmallVector<int, 0>> Levels;
+    const SmallVector<LargeSparseBitVector, 0> &Neighbors;
+    PEOIterator(const SmallVector<LargeSparseBitVector, 0> &Neighbors) : Neighbors(Neighbors) {
         // Initialize State
-        std::vector<int> FirstLevel;
+        SmallVector<int, 0> FirstLevel;
         for (unsigned i = 0; i < Neighbors.size(); ++i) {
             FirstLevel.push_back(i);
             Element E{0, i};
@@ -2121,7 +1753,7 @@ struct PEOIterator {
         // Find the element in the highest bucket
         int NextElement = -1;
         while (NextElement == -1 && !Levels.empty()) {
-            std::vector<int> &LastLevel = Levels.back();
+            SmallVector<int, 0> &LastLevel = Levels.back();
             while (NextElement == -1 && !LastLevel.empty()) {
                 NextElement = LastLevel.back();
                 LastLevel.pop_back();
@@ -2131,7 +1763,7 @@ struct PEOIterator {
         }
         if (NextElement == -1)
             return NextElement;
-        // Make sure not to try to re-use this later.
+        // Make sure not to try to reuse this later.
         Elements[NextElement].weight = (unsigned)-1;
         // Raise neighbors
         for (int Neighbor : Neighbors[NextElement]) {
@@ -2146,7 +1778,7 @@ struct PEOIterator {
             // Raise the neighbor to the next level.
             NElement.weight += 1;
             if (NElement.weight >= Levels.size())
-                Levels.push_back(std::vector<int>{});
+                Levels.push_back(SmallVector<int, 0>{});
             Levels[NElement.weight].push_back(Neighbor);
             NElement.pos = Levels[NElement.weight].size()-1;
         }
@@ -2156,7 +1788,7 @@ struct PEOIterator {
     }
 };
 
-JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &Colors)
+JL_USED_FUNC static void dumpColorAssignments(const State &S, const ArrayRef<int> &Colors)
 {
     for (unsigned i = 0; i < Colors.size(); ++i) {
         if (Colors[i] == -1)
@@ -2167,8 +1799,8 @@ JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &
     }
 }
 
-std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
-    std::vector<int> Colors;
+std::pair<SmallVector<int, 0>, int> LateLowerGCFrame::ColorRoots(const State &S) {
+    SmallVector<int, 0> Colors;
     Colors.resize(S.MaxPtrNumber + 1, -1);
     PEOIterator Ordering(S.Neighbors);
     int PreAssignedColors = 0;
@@ -2209,23 +1841,21 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
         NewColor += PreAssignedColors;
         Colors[ActiveElement] = NewColor;
     }
-    return Colors;
+    return {Colors, PreAssignedColors};
 }
 
 // Size of T is assumed to be `sizeof(void*)`
 Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V)
 {
     assert(T == T_size || isa<PointerType>(T));
-    auto TV = cast<PointerType>(V->getType());
-    auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
-    return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
+    return builder.CreateInBoundsGEP(T, V, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr");
 }
 
 Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V)
 {
     auto addr = EmitTagPtr(builder, T_size, T_size, V);
     auto &M = *builder.GetInsertBlock()->getModule();
-    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0));
+    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag");
     load->setOrdering(AtomicOrdering::Unordered);
     load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
     MDBuilder MDB(load->getContext());
@@ -2281,6 +1911,50 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
 }
 
+void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
+    for (auto CI : WriteBarriers) {
+        auto parent = CI->getArgOperand(0);
+        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
+                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
+            CI->eraseFromParent();
+            continue;
+        }
+        if (CFGModified) {
+            *CFGModified = true;
+        }
+
+        IRBuilder<> builder(CI);
+        builder.SetCurrentDebugLocation(CI->getDebugLoc());
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
+        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
+        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
+        builder.SetInsertPoint(mayTrigTerm);
+        mayTrigTerm->getParent()->setName("may_trigger_wb");
+        Value *anyChldNotMarked = NULL;
+        for (unsigned i = 1; i < CI->arg_size(); i++) {
+            Value *child = CI->getArgOperand(i);
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
+            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
+            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
+        }
+        assert(anyChldNotMarked); // handled by all_of test above
+        MDBuilder MDB(parent->getContext());
+        SmallVector<uint32_t, 2> Weights{1, 9};
+        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
+                                                  MDB.createBranchWeights(Weights));
+        trigTerm->getParent()->setName("trigger_wb");
+        builder.SetInsertPoint(trigTerm);
+        if (CI->getCalledOperand() == write_barrier_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        }
+        else {
+            assert(false);
+        }
+        CI->eraseFromParent();
+    }
+}
+
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
     auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
@@ -2296,9 +1970,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     if (T_prjlvalue) {
         T_pprjlvalue = T_prjlvalue->getPointerTo();
         Frame = new AllocaInst(T_prjlvalue, allocaAddressSpace,
-            ConstantInt::get(T_int32, maxframeargs), "", StartOff);
+            ConstantInt::get(T_int32, maxframeargs), "jlcallframe", StartOff);
     }
-    std::vector<CallInst*> write_barriers;
+    SmallVector<CallInst*, 0> write_barriers;
     for (BasicBlock &BB : F) {
         for (auto it = BB.begin(); it != BB.end();) {
             Instruction *I = &*it;
@@ -2308,7 +1982,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             if (I->getMetadata(LLVMContext::MD_invariant_load))
                 I->setMetadata(LLVMContext::MD_invariant_load, NULL);
             if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
-                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const"})) {
+                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) {
                     MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
                     if (MutableTBAA != TBAA)
                         I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
@@ -2337,7 +2011,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 /* No replacement */
             } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
                 auto *obj = CI->getOperand(0);
-                auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
+                ASCI->takeName(CI);
+                CI->replaceAllUsesWith(ASCI);
+                UpdatePtrNumbering(CI, ASCI, S);
+            } else if (gc_loaded_func != nullptr && callee == gc_loaded_func) {
+                auto *obj = CI->getOperand(1);
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
                 ASCI->takeName(CI);
                 CI->replaceAllUsesWith(ASCI);
                 UpdatePtrNumbering(CI, ASCI, S);
@@ -2348,22 +2028,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
-                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
-                // `julia.gc_alloc_obj` except it doesn't set the tag.
-                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
-                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
-                auto newI = builder.CreateCall(
-                    allocBytesIntrinsic,
-                    {
-                        ptls,
-                        builder.CreateIntCast(
-                            CI->getArgOperand(1),
-                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
-                            false)
-                    });
-                newI->takeName(CI);
-
                 // LLVM alignment/bit check is not happy about addrspacecast and refuse
                 // to remove write barrier because of it.
                 // We pretty much only load using `T_size` so try our best to strip
@@ -2385,8 +2049,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     if (isLoadFromConstGV(LI, task_local) && getLoadValueAlign(LI) < 16) {
                         Type *T_int64 = Type::getInt64Ty(LI->getContext());
                         auto op = ConstantAsMetadata::get(ConstantInt::get(T_int64, 16));
-                        LI->setMetadata(LLVMContext::MD_align,
-                                        MDNode::get(LI->getContext(), { op }));
+                        LI->setMetadata(LLVMContext::MD_align, MDNode::get(LI->getContext(), { op }));
                     }
                 }
                 // As a last resort, if we didn't manage to strip down the tag
@@ -2402,7 +2065,35 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                         builder.CreateAlignmentAssumption(DL, tag, 16);
                     }
                 }
-                // Set the tag.
+
+                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
+                // `julia.gc_alloc_obj` except it specializes the call based on the constant
+                // size of the object to allocate, to save one indirection, and doesn't set
+                // the type tag. (Note that if the size is not a constant, it will call
+                // gc_alloc_obj, and will redundantly set the tag.)
+                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
+                auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
+                auto newI = builder.CreateCall(
+                    allocBytesIntrinsic,
+                    {
+                        ptls,
+                        builder.CreateIntCast(
+                            CI->getArgOperand(1),
+                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
+                            false),
+                        builder.CreatePtrToInt(tag, T_size),
+                    });
+                newI->setAttributes(allocBytesIntrinsic->getAttributes());
+                newI->addDereferenceableRetAttr(CI->getRetDereferenceableBytes());
+                newI->takeName(CI);
+                // Now, finally, set the tag. We do this in IR instead of in the C alloc
+                // function, to provide possible optimization opportunities. (I think? TBH
+                // the most recent editor of this code is not entirely clear on why we
+                // prefer to set the tag in the generated code. Providing optimization
+                // opportunities is the most likely reason; the tradeoff is slightly
+                // larger code size and increased compilation time, compiling this
+                // instruction at every allocation site, rather than once in the C alloc
+                // function.)
                 auto &M = *builder.GetInsertBlock()->getModule();
                 StoreInst *store = builder.CreateAlignedStore(
                     tag, EmitTagPtr(builder, tag_type, T_size, newI), M.getDataLayout().getPointerABIAlignment(0));
@@ -2435,14 +2126,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 ++it;
                 continue;
             } else if ((call_func && callee == call_func) ||
-                       (call2_func && callee == call2_func)) {
+                       (call2_func && callee == call2_func) ||
+                       (call3_func && callee == call3_func)) {
                 assert(T_prjlvalue);
                 size_t nargs = CI->arg_size();
                 size_t nframeargs = nargs-1;
-                if (callee == call_func)
-                    nframeargs -= 1;
-                else if (callee == call2_func)
+                if (callee == call2_func)
                     nframeargs -= 2;
+                else
+                    nframeargs -= 1;
                 SmallVector<Value*, 4> ReplacementArgs;
                 auto arg_it = CI->arg_begin();
                 assert(arg_it != CI->arg_end());
@@ -2461,13 +2153,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     // the julia.call signature is varargs, the optimizer is allowed
                     // to rewrite pointee types. It'll go away with opaque pointer
                     // types anyway.
-                    Builder.CreateAlignedStore(Builder.CreateBitCast(*arg_it, T_prjlvalue),
+                    Builder.CreateAlignedStore(*arg_it,
                             Builder.CreateInBoundsGEP(T_prjlvalue, Frame, ConstantInt::get(T_int32, slot++)),
                             Align(sizeof(void*)));
                 }
                 ReplacementArgs.push_back(nframeargs == 0 ?
                     (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) :
-                    (allocaAddressSpace ? Builder.CreateAddrSpaceCast(Frame, T_prjlvalue->getPointerTo(0)) : Frame));
+                    Builder.CreateAddrSpaceCast(Frame, T_prjlvalue->getPointerTo(0)));
                 ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs));
                 if (callee == call2_func) {
                     // move trailing arg to the end now
@@ -2475,7 +2167,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     ReplacementArgs.erase(ReplacementArgs.begin());
                     ReplacementArgs.push_back(front);
                 }
-                FunctionType *FTy = callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) : JuliaType::get_jlfunc_ty(CI->getContext());
+                FunctionType *FTy = callee == call3_func ? JuliaType::get_jlfunc3_ty(CI->getContext()) :
+                                    callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) :
+                                                           JuliaType::get_jlfunc_ty(CI->getContext());
                 CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI);
                 NewCall->setTailCallKind(CI->getTailCallKind());
                 auto callattrs = CI->getAttributes();
@@ -2506,43 +2200,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             ChangesMade = true;
         }
     }
-    for (auto CI : write_barriers) {
-        auto parent = CI->getArgOperand(0);
-        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
-                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
-            CI->eraseFromParent();
-            continue;
-        }
-        if (CFGModified) {
-            *CFGModified = true;
-        }
-        IRBuilder<> builder(CI);
-        builder.SetCurrentDebugLocation(CI->getDebugLoc());
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), 3);
-        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
-        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
-        builder.SetInsertPoint(mayTrigTerm);
-        Value *anyChldNotMarked = NULL;
-        for (unsigned i = 1; i < CI->arg_size(); i++) {
-            Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), 1);
-            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
-            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
-        }
-        assert(anyChldNotMarked); // handled by all_of test above
-        MDBuilder MDB(parent->getContext());
-        SmallVector<uint32_t, 2> Weights{1, 9};
-        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
-                                                  MDB.createBranchWeights(Weights));
-        builder.SetInsertPoint(trigTerm);
-        if (CI->getCalledOperand() == write_barrier_func) {
-            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
-        }
-        else {
-            assert(false);
-        }
-        CI->eraseFromParent();
-    }
+    CleanupWriteBarriers(F, S, write_barriers, CFGModified);
     if (maxframeargs == 0 && Frame) {
         Frame->eraseFromParent();
     }
@@ -2556,7 +2214,7 @@ static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, Stat
 {
     bool First = true;
     std::set<BasicBlock *> Visited;
-    std::vector<BasicBlock *> WorkList;
+    SmallVector<BasicBlock *, 0> WorkList;
     WorkList.push_back(BB);
     while (!WorkList.empty()) {
         BB = &*WorkList.back();
@@ -2588,28 +2246,37 @@ static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, Stat
 }
 
 void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
-                                         const std::vector<int> &Colors, Value *GCFrame,
+                                         ArrayRef<int> Colors, Value *GCFrame,
                                          Instruction *InsertBefore) {
     // Get the slot address.
     auto slotAddress = CallInst::Create(
         getOrDeclare(jl_intrinsics::getGCFrameSlot),
         {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
-        "", InsertBefore);
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
 
     Value *Val = GetPtrForNumber(S, R, InsertBefore);
     // Pointee types don't have semantics, so the optimizer is
     // free to rewrite them if convenient. We need to change
     // it back here for the store.
-    if (Val->getType() != T_prjlvalue) {
-        // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-        assert(Val->getContext().supportsTypedPointers());
-        Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
-    }
+    assert(Val->getType() == T_prjlvalue);
+    new StoreInst(Val, slotAddress, InsertBefore);
+}
+
+void LateLowerGCFrame::PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot,
+                                         ArrayRef<int> Colors, Value *GCFrame,
+                                         Instruction *InsertBefore) {
+    // Get the slot address.
+    auto slotAddress = CallInst::Create(
+        getOrDeclare(jl_intrinsics::getGCFrameSlot),
+        {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
+    // Reset the slot to NULL.
+    Value *Val = ConstantPointerNull::get(T_prjlvalue);
     new StoreInst(Val, slotAddress, InsertBefore);
 }
 
 void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
-                                          const std::vector<int> &Colors, Value *GCFrame)
+                                          ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame)
 {
     for (auto &BB : *S.F) {
         const BBState &BBS = S.BBStates[&BB];
@@ -2622,6 +2289,15 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
         for(auto rit = BBS.Safepoints.rbegin();
               rit != BBS.Safepoints.rend(); ++rit ) {
             const LargeSparseBitVector &NowLive = S.LiveSets[*rit];
+            // reset slots which are no longer alive
+            for (int Idx : *LastLive) {
+                if (Idx >= PreAssignedColors && !HasBitSet(NowLive, Idx)) {
+                    PlaceGCFrameReset(S, Idx, MinColorRoot, Colors, GCFrame,
+                      S.ReverseSafepointNumbering[*rit]);
+                }
+            }
+            // store values which are alive in this safepoint but
+            // haven't been stored in the GC frame before
             for (int Idx : NowLive) {
                 if (!HasBitSet(*LastLive, Idx)) {
                     PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
@@ -2633,7 +2309,8 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
     }
 }
 
-void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
+void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S,
+                                                std::map<Value *, std::pair<int, int>>) {
     auto F = S.F;
     auto T_int32 = Type::getInt32Ty(F->getContext());
     int MaxColor = -1;
@@ -2642,7 +2319,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             MaxColor = C;
 
     // Insert instructions for the actual gc frame
-    if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
+    if (MaxColor != -1 || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
         // Create and push a GC frame.
         auto gcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::newGCFrame),
@@ -2655,6 +2332,43 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             {gcframe, ConstantInt::get(T_int32, 0)});
         pushGcframe->insertAfter(pgcstack);
 
+        // we don't run memsetopt after this, so run a basic approximation of it
+        // that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store
+        Instruction *toerase = nullptr;
+        for (auto &I : F->getEntryBlock()) {
+            if (toerase)
+                toerase->eraseFromParent();
+            toerase = nullptr;
+            Value *ptr;
+            Value *value;
+            bool isvolatile;
+            if (auto *SI = dyn_cast<StoreInst>(&I)) {
+                ptr = SI->getPointerOperand();
+                value = SI->getValueOperand();
+                isvolatile = SI->isVolatile();
+            }
+            else if (auto *MSI = dyn_cast<MemSetInst>(&I)) {
+                ptr = MSI->getDest();
+                value = MSI->getValue();
+                isvolatile = MSI->isVolatile();
+            }
+            else {
+                continue;
+            }
+            ptr = ptr->stripInBoundsOffsets();
+            AllocaInst *AI = dyn_cast<AllocaInst>(ptr);
+            if (isa<GetElementPtrInst>(ptr))
+                break;
+            if (!S.ArrayAllocas.count(AI))
+                continue;
+            if (isvolatile || !isa<Constant>(value) || !cast<Constant>(value)->isNullValue())
+                break; // stop once we reach a pointer operation that couldn't be analyzed or isn't a null store
+            toerase = &I;
+        }
+        if (toerase)
+            toerase->eraseFromParent();
+        toerase = nullptr;
+
         // Replace Allocas
         unsigned AllocaSlot = 2; // first two words are metadata
         auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
@@ -2666,13 +2380,13 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
                 AllocaSlot = LLT_ALIGN(AllocaSlot, align);
             Instruction *slotAddress = CallInst::Create(
                 getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
             slotAddress->insertAfter(gcframe);
             slotAddress->takeName(AI);
 
             // Check for lifetime intrinsics on this alloca, we can't keep them
             // because we're changing the semantics
-            std::vector<CallInst*> ToDelete;
+            SmallVector<CallInst*, 0> ToDelete;
             RecursivelyVisit<IntrinsicInst>([&](Use &VU) {
                 IntrinsicInst *II = cast<IntrinsicInst>(VU.getUser());
                 if ((II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -2683,23 +2397,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             for (CallInst *II : ToDelete) {
                 II->eraseFromParent();
             }
-            if (slotAddress->getType() != AI->getType()) {
-                // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(slotAddress->getContext().supportsTypedPointers());
-                auto BCI  = new BitCastInst(slotAddress, AI->getType());
-                BCI->insertAfter(slotAddress);
-                slotAddress = BCI;
-            }
+            assert(slotAddress->getType() == AI->getType());
             AI->replaceAllUsesWith(slotAddress);
             AI->eraseFromParent();
             AI = NULL;
         };
-        for (AllocaInst *AI : S.Allocas) {
-            auto ns = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
-            replace_alloca(AI);
-            AllocaSlot += ns;
-        }
         for (auto AI : S.ArrayAllocas) {
             replace_alloca(AI.first);
             AllocaSlot += AI.second;
@@ -2711,16 +2413,12 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             for (unsigned i = 0; i < Store.second; ++i) {
                 auto slotAddress = CallInst::Create(
                     getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
                 slotAddress->insertAfter(gcframe);
                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
-                if (Elem->getType() != T_prjlvalue) {
-                    // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                    assert(Elem->getContext().supportsTypedPointers());
-                    Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
-                }
-                //auto Idxs = makeArrayRef(Tracked[i]);
+                assert(Elem->getType() == T_prjlvalue);
+                //auto Idxs = ArrayRef<unsigned>(Tracked[i]);
                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
                 (void)shadowStore;
@@ -2733,7 +2431,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         pushGcframe->setArgOperand(1, NRoots);
 
         // Insert GC frame stores
-        PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
+        PlaceGCFrameStores(S, AllocaSlot - 2, Colors, PreAssignedColors, gcframe);
         // Insert GCFrame pops
         for (auto &BB : *F) {
             if (isa<ReturnInst>(BB.getTerminator())) {
@@ -2748,6 +2446,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
 
 bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
     initAll(*F.getParent());
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
     if (!pgcstack_getter && !adoptthread_func)
         return CleanupIR(F, nullptr, CFGModified);
@@ -2758,23 +2457,36 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
 
     State S = LocalScan(F);
     ComputeLiveness(S);
-    std::vector<int> Colors = ColorRoots(S);
+    auto Colors = ColorRoots(S);
     std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
-    PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
+    PlaceRootsAndUpdateCalls(Colors.first, Colors.second, S, CallFrames);
     CleanupIR(F, &S, CFGModified);
-    return true;
-}
 
-bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
-    bool modified = lateLowerGCFrame.runOnFunction(F);
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
-#endif
-    return modified;
+
+    // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk
+    // For now, we do nothing for the Stock GC
+    auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes);
+
+    if (GCAllocBytes) {
+        for (auto it = GCAllocBytes->user_begin(); it != GCAllocBytes->user_end(); ) {
+            if (auto *CI = dyn_cast<CallInst>(*it)) {
+                *CFGModified = true;
+
+                assert(CI->getCalledOperand() == GCAllocBytes);
+
+                auto newI = lowerGCAllocBytesLate(CI, F);
+                if (newI != CI) {
+                    ++it;
+                    CI->replaceAllUsesWith(newI);
+                    CI->eraseFromParent();
+                    continue;
+                }
+            }
+            ++it;
+        }
+    }
+
+    return true;
 }
 
 PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
@@ -2786,7 +2498,7 @@ PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
     bool CFGModified = false;
     bool modified = lateLowerGCFrame.runOnFunction(F, &CFGModified);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     if (modified) {
         if (CFGModified) {
@@ -2797,17 +2509,3 @@ PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-
-char LateLowerGCFrameLegacy::ID = 0;
-static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
-
-Pass *createLateLowerGCFramePass() {
-    return new LateLowerGCFrameLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLateLowerGCFramePass());
-}
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 146c0fe701e9b..c359bf6c117ce 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -8,7 +8,7 @@
 
 #include <llvm/ADT/DepthFirstIterator.h>
 #include <llvm/ADT/Statistic.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/Constants.h>
@@ -17,7 +17,6 @@
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
@@ -38,13 +37,14 @@ using namespace llvm;
 /* Lowers Julia Exception Handlers and colors EH frames.
  *
  *  Our task is to lower:
- * call void @julia.except_enter()
+ * call {i32, ptr} @julia.except_enter(ct)
  * <...>
  * call void jl_pop_handler(1)
  *
  * to
  *
- * call void @jl_enter_handler(jl_handler *%buff)
+ * call void @jl_enter_handler(ct, jl_handler *%buff)
+ * call i32 @jl_setjmp(jmpbuf[] %buff, 0)
  * <...>
  * call void jl_pop_handler(1)
  *
@@ -61,7 +61,7 @@ using namespace llvm;
  *                 \                 /
  *              br i1 %cond, %left2, %right2
  *                 /                 \
- *           jl_pop_hander          ret
+ *           jl_pop_handler          ret
  *           ret
  *
  *    The frontend doesn't emit structures like this. However, the optimizer
@@ -82,25 +82,18 @@ namespace {
  * If the module doesn't have declarations for the jl_enter_handler and setjmp
  * functions, insert them.
  */
-static void ensure_enter_function(Module &M, const Triple &TT)
+static void ensure_enter_function(Module &M, Type *T_pjlvalue, const Triple &TT)
 {
-    auto T_int8  = Type::getInt8Ty(M.getContext());
-    auto T_pint8 = PointerType::get(T_int8, 0);
+    auto T_ptr = PointerType::get(M.getContext(), 0);
     auto T_void = Type::getVoidTy(M.getContext());
     auto T_int32 = Type::getInt32Ty(M.getContext());
     if (!M.getNamedValue(XSTR(jl_enter_handler))) {
-        std::vector<Type*> ehargs(0);
-        ehargs.push_back(T_pint8);
-        Function::Create(FunctionType::get(T_void, ehargs, false),
+        Function::Create(FunctionType::get(T_void, {T_pjlvalue, T_ptr}, false),
                          Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
     }
     if (!M.getNamedValue(jl_setjmp_name)) {
-        std::vector<Type*> args2(0);
-        args2.push_back(T_pint8);
-        if (!TT.isOSWindows()) {
-            args2.push_back(T_int32);
-        }
-        Function::Create(FunctionType::get(T_int32, args2, false),
+        Type *args2[] = {T_ptr, T_int32};
+        Function::Create(FunctionType::get(T_int32, ArrayRef(args2, TT.isOSWindows() ? 1 : 2), false),
                          Function::ExternalLinkage, jl_setjmp_name, &M)
             ->addFnAttr(Attribute::ReturnsTwice);
     }
@@ -112,14 +105,14 @@ static bool lowerExcHandlers(Function &F) {
     Function *except_enter_func = M.getFunction("julia.except_enter");
     if (!except_enter_func)
         return false; // No EH frames in this module
-    ensure_enter_function(M, TT);
+    ensure_enter_function(M, except_enter_func->getFunctionType()->getParamType(0), TT);
     Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
+    Function *leave_noexcept_func = M.getFunction(XSTR(jl_pop_handler_noexcept));
     Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
     Function *setjmp_func = M.getFunction(jl_setjmp_name);
-
-    auto T_pint8 = Type::getInt8PtrTy(M.getContext(), 0);
-    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
-    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
+    auto T_ptr = PointerType::get(M.getContext(), 0);
+    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_ptr });
+    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_ptr });
 
     /* Step 1: EH Depth Numbering */
     std::map<llvm::CallInst *, int> EnterDepth;
@@ -151,9 +144,9 @@ static bool lowerExcHandlers(Function &F) {
                 continue;
             if (Callee == except_enter_func)
                 EnterDepth[CI] = Depth++;
-            else if (Callee == leave_func) {
+            else if (Callee == leave_func || Callee == leave_noexcept_func) {
                 LeaveDepth[CI] = Depth;
-                Depth -= cast<ConstantInt>(CI->getArgOperand(0))->getLimitedValue();
+                Depth -= cast<ConstantInt>(CI->getArgOperand(1))->getLimitedValue();
             }
             assert(Depth >= 0);
             if (Depth > MaxDepth)
@@ -174,13 +167,13 @@ static bool lowerExcHandlers(Function &F) {
     Value *handler_sz64 = ConstantInt::get(Type::getInt64Ty(F.getContext()),
                                            sizeof(jl_handler_t));
     Instruction *firstInst = &F.getEntryBlock().front();
-    std::vector<Instruction *> buffs;
+    SmallVector<Instruction *, 0> buffs;
     unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
     for (int i = 0; i < MaxDepth; ++i) {
         auto *buff = new AllocaInst(Type::getInt8Ty(F.getContext()), allocaAddressSpace,
                 handler_sz, Align(16), "", firstInst);
         if (allocaAddressSpace) {
-            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, Type::getInt8PtrTy(F.getContext(), AddressSpace::Generic));
+            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, PointerType::get(F.getContext(), AddressSpace::Generic));
             buff_casted->insertAfter(buff);
             buffs.push_back(buff_casted);
         } else {
@@ -193,7 +186,7 @@ static bool lowerExcHandlers(Function &F) {
         assert(it.second >= 0);
         Instruction *buff = buffs[it.second];
         CallInst *enter = it.first;
-        auto new_enter = CallInst::Create(jlenter_func, buff, "", enter);
+        auto new_enter = CallInst::Create(jlenter_func, {enter->getArgOperand(0), buff}, "", enter);
         Value *lifetime_args[] = {
             handler_sz64,
             buff
@@ -201,10 +194,7 @@ static bool lowerExcHandlers(Function &F) {
         CallInst::Create(lifetime_start, lifetime_args, "", new_enter);
         CallInst *sj;
         if (!TT.isOSWindows()) {
-            // For LLVM 3.3 compatibility
-            Value *args[] = {buff,
-                            ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-            sj = CallInst::Create(setjmp_func, args, "", enter);
+            sj = CallInst::Create(setjmp_func, {buff, ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}, "", enter);
         } else {
             sj = CallInst::Create(setjmp_func, buff, "", enter);
         }
@@ -214,13 +204,31 @@ static bool lowerExcHandlers(Function &F) {
             new_enter->setMetadata(LLVMContext::MD_dbg, dbg);
             sj->setMetadata(LLVMContext::MD_dbg, dbg);
         }
-        enter->replaceAllUsesWith(sj);
+        SmallVector<Instruction*> ToErase;
+        for (auto *U : enter->users()) {
+            if (auto *EEI = dyn_cast<ExtractValueInst>(U)) {
+                if (EEI->getNumIndices() == 1) {
+                    if (EEI->getIndices()[0] == 0)
+                        EEI->replaceAllUsesWith(sj);
+                    else
+                        EEI->replaceAllUsesWith(buff);
+                    ToErase.push_back(EEI);
+                }
+            }
+        }
+        for (auto *EEI : ToErase)
+            EEI->eraseFromParent();
+        if (!enter->use_empty()) {
+            Value *agg = InsertValueInst::Create(UndefValue::get(enter->getType()), sj, ArrayRef<unsigned>(0), "", enter);
+            agg = InsertValueInst::Create(agg, buff, ArrayRef<unsigned>(1), "", enter);
+            enter->replaceAllUsesWith(agg);
+        }
         enter->eraseFromParent();
     }
     // Insert lifetime end intrinsics after every leave.
     for (auto it : LeaveDepth) {
         int StartDepth = it.second - 1;
-        int npops = cast<ConstantInt>(it.first->getArgOperand(0))->getLimitedValue();
+        int npops = cast<ConstantInt>(it.first->getArgOperand(1))->getLimitedValue();
         for (int i = 0; i < npops; ++i) {
             assert(StartDepth-i >= 0);
             Value *lifetime_args[] = {
@@ -240,40 +248,10 @@ PreservedAnalyses LowerExcHandlersPass::run(Function &F, FunctionAnalysisManager
 {
     bool modified = lowerExcHandlers(F);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-
-struct LowerExcHandlersLegacy : public FunctionPass {
-    static char ID;
-    LowerExcHandlersLegacy() : FunctionPass(ID)
-    {}
-    bool runOnFunction(Function &F) {
-        bool modified = lowerExcHandlers(F);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char LowerExcHandlersLegacy::ID = 0;
-static RegisterPass<LowerExcHandlersLegacy> X("LowerExcHandlers", "Lower Julia Exception Handlers",
-                                         false /* Only looks at CFG */,
-                                         false /* Analysis Pass */);
-
-Pass *createLowerExcHandlersPass()
-{
-    return new LowerExcHandlersLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerExcHandlersPass());
-}
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
deleted file mode 100644
index 29c0f7e2b10d6..0000000000000
--- a/src/llvm-muladd.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/Analysis/OptimizationRemarkEmitter.h>
-#include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/PassManager.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Operator.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-
-#include "julia.h"
-#include "julia_assert.h"
-
-#define DEBUG_TYPE "combine-muladd"
-#undef DEBUG
-
-using namespace llvm;
-STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
-
-#ifndef __clang_gcanalyzer__
-#define REMARK(remark) ORE.emit(remark)
-#else
-#define REMARK(remark) (void) 0;
-#endif
-
-/**
- * Combine
- * ```
- * %v0 = fmul ... %a, %b
- * %v = fadd contract ... %v0, %c
- * ```
- * to
- * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)`
- * when `%v0` has no other use
- */
-
-// Return true if we changed the mulOp
-static bool checkCombine(Value *maybeMul, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
-{
-    auto mulOp = dyn_cast<Instruction>(maybeMul);
-    if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
-        return false;
-    if (!mulOp->hasOneUse()) {
-        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
-        REMARK([&](){
-            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
-                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
-        });
-        return false;
-    }
-    // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
-    auto fmf = mulOp->getFastMathFlags();
-    if (!fmf.allowContract()) {
-        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
-        REMARK([&](){
-            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
-                << "marked for fma " << ore::NV("fmul", mulOp);
-        });
-        ++TotalContracted;
-        fmf.setAllowContract(true);
-        mulOp->copyFastMathFlags(fmf);
-        return true;
-    }
-    return false;
-}
-
-static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT
-{
-    OptimizationRemarkEmitter ORE(&F);
-    bool modified = false;
-    for (auto &BB: F) {
-        for (auto it = BB.begin(); it != BB.end();) {
-            auto &I = *it;
-            it++;
-            switch (I.getOpcode()) {
-            case Instruction::FAdd: {
-                if (!I.hasAllowContract())
-                    continue;
-                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
-                break;
-            }
-            case Instruction::FSub: {
-                if (!I.hasAllowContract())
-                    continue;
-                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
-                break;
-            }
-            default:
-                break;
-            }
-        }
-    }
-#ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
-#endif
-    return modified;
-}
-
-PreservedAnalyses CombineMulAddPass::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT
-{
-    if (combineMulAdd(F)) {
-        return PreservedAnalyses::allInSet<CFGAnalyses>();
-    }
-    return PreservedAnalyses::all();
-}
-
-
-struct CombineMulAddLegacy : public FunctionPass {
-    static char ID;
-    CombineMulAddLegacy() : FunctionPass(ID)
-    {}
-
-private:
-    bool runOnFunction(Function &F) override {
-        return combineMulAdd(F);
-    }
-};
-
-char CombineMulAddLegacy::ID = 0;
-static RegisterPass<CombineMulAddLegacy> X("CombineMulAdd", "Combine mul and add to muladd",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-Pass *createCombineMulAddPass()
-{
-    return new CombineMulAddLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCombineMulAddPass());
-}
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index e4ebbe9d3838a..a76d076ebd6f3 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -12,11 +12,11 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/SmallString.h>
 #include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/Statistic.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
@@ -24,7 +24,6 @@
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/Analysis/CallGraph.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/DebugInfoMetadata.h>
 #include <llvm/IR/Verifier.h>
@@ -48,11 +47,11 @@
 
 using namespace llvm;
 
-extern Optional<bool> always_have_fma(Function&, const Triple &TT);
+extern std::optional<bool> always_have_fma(Function&, const Triple &TT);
 
 namespace {
 constexpr uint32_t clone_mask =
-    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16;
+    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
 
 // Treat identical mapping as missing and return `def` in that case.
 // We mainly need this to identify cloned function using value map after LLVM cloning
@@ -98,14 +97,15 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
                 }
                 if (auto callee = call->getCalledFunction()) {
                     auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                    if (name.starts_with("llvm.muladd.") || name.starts_with("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
+                    else if (name.starts_with("julia.cpu.")) {
+                        if (name.starts_with("julia.cpu.have_fma.")) {
                             // for some platforms we know they always do (or don't) support
                             // FMA. in those cases we don't need to clone the function.
-                            if (!always_have_fma(*callee, TT).hasValue())
+                            // always_have_fma returns an optional<bool>
+                            if (!always_have_fma(*callee, TT))
                                 flag |= JL_TARGET_CLONE_CPU;
                         } else {
                             flag |= JL_TARGET_CLONE_CPU;
@@ -128,12 +128,14 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
             }
 
             for (size_t i = 0; i < I.getNumOperands(); i++) {
-                if(I.getOperand(i)->getType()->isHalfTy()){
+                if(I.getOperand(i)->getType()->isHalfTy()) {
                     flag |= JL_TARGET_CLONE_FLOAT16;
                 }
-                // Check for BFloat16 when they are added to julia can be done here
+                if(I.getOperand(i)->getType()->isBFloatTy()) {
+                    flag |= JL_TARGET_CLONE_BFLOAT16;
+                }
             }
-            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16;
+            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
             if (has_veccall && (flag & veccall_flags) == veccall_flags) {
                 return flag;
             }
@@ -179,12 +181,12 @@ struct TargetSpec {
     }
 };
 
-static Optional<std::vector<TargetSpec>> get_target_specs(Module &M) {
+static std::optional<SmallVector<TargetSpec, 0>> get_target_specs(Module &M) {
     auto md = M.getModuleFlag("julia.mv.specs");
     if (!md)
         return None;
     auto tup = cast<MDTuple>(md);
-    std::vector<TargetSpec> out(tup->getNumOperands());
+    SmallVector<TargetSpec, 0> out(tup->getNumOperands());
     for (unsigned i = 0; i < tup->getNumOperands(); i++) {
         out[i] = TargetSpec::fromMD(cast<MDTuple>(tup->getOperand(i).get()));
     }
@@ -192,7 +194,7 @@ static Optional<std::vector<TargetSpec>> get_target_specs(Module &M) {
 }
 
 static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
-    std::vector<Metadata *> md;
+    SmallVector<Metadata *, 0> md;
     md.reserve(specs.size());
     for (auto &spec: specs) {
         md.push_back(spec.toMD(M.getContext()));
@@ -203,14 +205,14 @@ static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
 static void annotate_module_clones(Module &M) {
     auto TT = Triple(M.getTargetTriple());
     CallGraph CG(M);
-    std::vector<Function *> orig_funcs;
+    SmallVector<Function *, 0> orig_funcs;
     for (auto &F: M) {
         if (F.isDeclaration())
             continue;
         orig_funcs.push_back(&F);
     }
     bool has_veccall = false;
-    std::vector<TargetSpec> specs;
+    SmallVector<TargetSpec, 0> specs;
     if (auto maybe_specs = get_target_specs(M)) {
         specs = std::move(*maybe_specs);
     } else {
@@ -221,10 +223,10 @@ static void annotate_module_clones(Module &M) {
         }
         set_target_specs(M, specs);
     }
-    std::vector<APInt> clones(orig_funcs.size(), APInt(specs.size(), 0));
+    SmallVector<APInt, 0> clones(orig_funcs.size(), APInt(specs.size(), 0));
     BitVector subtarget_cloned(orig_funcs.size());
 
-    std::vector<unsigned> func_infos(orig_funcs.size());
+    SmallVector<unsigned, 0> func_infos(orig_funcs.size());
     for (unsigned i = 0; i < orig_funcs.size(); i++) {
         func_infos[i] = collect_func_info(*orig_funcs[i], TT, has_veccall);
     }
@@ -340,7 +342,7 @@ struct CloneCtx {
         }
     };
     struct Group : Target {
-        std::vector<Target> clones;
+        SmallVector<Target, 0> clones;
         explicit Group(int base) :
             Target(base),
             clones{}
@@ -379,20 +381,19 @@ struct CloneCtx {
     void rewrite_alias(GlobalAlias *alias, Function* F);
 
     MDNode *tbaa_const;
-    std::vector<TargetSpec> specs;
-    std::vector<Group> groups{};
-    std::vector<Target *> linearized;
-    std::vector<Function*> fvars;
-    std::vector<Constant*> gvars;
+    SmallVector<TargetSpec, 0> specs;
+    SmallVector<Group, 0> groups{};
+    SmallVector<Target *, 0> linearized;
+    SmallVector<Function*, 0> fvars;
     Module &M;
     Type *T_size;
     Triple TT;
 
     // Map from original function to one based index in `fvars`
     std::map<const Function*,uint32_t> func_ids{};
-    std::vector<Function*> orig_funcs{};
+    SmallVector<Function*, 0> orig_funcs{};
     // GV addresses and their corresponding function id (i.e. 0-based index in `fvars`)
-    std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
+    SmallVector<std::pair<Constant*,uint32_t>, 0> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
     std::map<Function *, GlobalVariable*> extern_relocs;
@@ -400,7 +401,7 @@ struct CloneCtx {
 };
 
 template<typename T>
-static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
+static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
 {
     // Get information about sysimg export functions from the two global variables.
     // Strip them from the Module so that it's easier to handle the uses.
@@ -408,7 +409,7 @@ static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow
     assert(gv && gv->hasInitializer());
     ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
     unsigned nele = Ty->getArrayNumElements();
-    std::vector<T*> res(nele);
+    SmallVector<T*, 0> res(nele);
     ConstantArray *ary = nullptr;
     if (gv->getInitializer()->isNullValue()) {
         for (unsigned i = 0; i < nele; ++i)
@@ -440,7 +441,6 @@ CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     : tbaa_const(tbaa_make_child_with_context(M.getContext(), "jtbaa_const", nullptr, true).first),
       specs(*get_target_specs(M)),
       fvars(consume_gv<Function>(M, "jl_fvars", allow_bad_fvars)),
-      gvars(consume_gv<Constant>(M, "jl_gvars", false)),
       M(M),
       T_size(M.getDataLayout().getIntPtrType(M.getContext())),
       TT(M.getTargetTriple()),
@@ -449,7 +449,7 @@ CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     groups.emplace_back(0);
     linearized.resize(specs.size());
     linearized[0] = &groups[0];
-    std::vector<unsigned> group_ids(specs.size(), 0);
+    SmallVector<unsigned, 0> group_ids(specs.size(), 0);
     uint32_t ntargets = specs.size();
     for (uint32_t i = 1; i < ntargets; i++) {
         auto &spec = specs[i];
@@ -514,7 +514,7 @@ void CloneCtx::prepare_slots()
 
 void CloneCtx::clone_decls()
 {
-    std::vector<std::string> suffixes(specs.size());
+    SmallVector<std::string, 0> suffixes(specs.size());
     for (unsigned i = 1; i < specs.size(); i++) {
         suffixes[i] = "." + std::to_string(i);
     }
@@ -531,7 +531,7 @@ void CloneCtx::clone_decls()
             new_F->setVisibility(F->getVisibility());
             new_F->setDSOLocal(true);
             auto base_func = F;
-            if (specs[i].flags & JL_TARGET_CLONE_ALL)
+            if (!(specs[i].flags & JL_TARGET_CLONE_ALL))
                 base_func = static_cast<Group*>(linearized[specs[i].base])->base_func(F);
             (*linearized[i]->vmap)[base_func] = new_F;
         }
@@ -586,7 +586,7 @@ void CloneCtx::clone_bodies()
             }
             for (auto &target : groups[i].clones) {
                 prepare_vmap(*target.vmap);
-                auto target_F = cast_or_null<Function>(map_get(*target.vmap, F));
+                auto target_F = cast_or_null<Function>(map_get(*target.vmap, group_F));
                 if (target_F) {
                     if (!F->isDeclaration()) {
                         clone_function(group_F, target_F, *target.vmap);
@@ -674,6 +674,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
     trampoline->removeFnAttr("julia.mv.reloc");
     trampoline->removeFnAttr("julia.mv.clones");
     trampoline->addFnAttr("julia.mv.alias");
+    trampoline->setDLLStorageClass(alias->getDLLStorageClass());
     alias->eraseFromParent();
 
     uint32_t id;
@@ -687,10 +688,10 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
     ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
     ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
 
-    std::vector<Value *> Args;
+    SmallVector<Value *, 0> Args;
     for (auto &arg : trampoline->args())
         Args.push_back(&arg);
-    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
+    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, ArrayRef<Value *>(Args));
     if (F->isVarArg()) {
         assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
         call->setTailCallKind(CallInst::TCK_MustTail);
@@ -755,7 +756,7 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
     if (F->isDeclaration()) {
         auto extern_decl = extern_relocs.find(F);
         assert(extern_decl != extern_relocs.end() && "Missing extern relocation slot!");
-        return {(uint32_t)-1, extern_decl->second};
+        return {UINT32_MAX, extern_decl->second};
     }
     else {
         auto id = get_func_id(F);
@@ -875,46 +876,28 @@ static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
     if (ptr->getType()->isPointerTy())
         ptr = ConstantExpr::getPtrToInt(ptr, T_size);
     auto ptrdiff = ConstantExpr::getSub(ptr, base);
-    return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
+    return T_size->getPrimitiveSizeInBits() > 32 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
 }
 
-template<typename T>
-static Constant *emit_offset_table(Module &M, Type *T_size, const std::vector<T*> &vars, StringRef name, StringRef suffix)
+static void emit_table(Module &M, Type *T_size, ArrayRef<Constant*> vars, StringRef name, StringRef suffix)
 {
-    auto T_int32 = Type::getInt32Ty(M.getContext());
     uint32_t nvars = vars.size();
-    Constant *base = nullptr;
-    if (nvars > 0) {
-        base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo());
-        auto ga = GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
-                                       name + "_base" + suffix,
-                                       base, &M);
-        ga->setVisibility(GlobalValue::HiddenVisibility);
-        ga->setDSOLocal(true);
-    } else {
-        auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), name + "_base" + suffix);
-        gv->setVisibility(GlobalValue::HiddenVisibility);
-        gv->setDSOLocal(true);
-        base = gv;
-    }
-    auto vbase = ConstantExpr::getPtrToInt(base, T_size);
-    std::vector<Constant*> offsets(nvars + 1);
-    offsets[0] = ConstantInt::get(T_int32, nvars);
-    if (nvars > 0) {
-        offsets[1] = ConstantInt::get(T_int32, 0);
-        for (uint32_t i = 1; i < nvars; i++)
-            offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
-    }
-    ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
-    auto gv = new GlobalVariable(M, vars_type, true,
-                                  GlobalVariable::ExternalLinkage,
-                                  ConstantArray::get(vars_type, offsets),
-                                  name + "_offsets" + suffix);
+    SmallVector<Constant*,0> castvars(nvars);
+    for (size_t i = 0; i < nvars; i++)
+        castvars[i] = ConstantExpr::getBitCast(vars[i], T_size->getPointerTo());
+    auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, ConstantInt::get(T_size, nvars), name + "_count" + suffix);
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
+    ArrayType *vars_type = ArrayType::get(T_size->getPointerTo(), nvars);
+    gv = new GlobalVariable(M, vars_type, false,
+                            GlobalVariable::ExternalLinkage,
+                            ConstantArray::get(vars_type, castvars),
+                            name + "_ptrs" + suffix);
     gv->setVisibility(GlobalValue::HiddenVisibility);
     gv->setDSOLocal(true);
-    return vbase;
 }
 
+
 void CloneCtx::emit_metadata()
 {
     uint32_t nfvars = fvars.size();
@@ -929,11 +912,8 @@ void CloneCtx::emit_metadata()
     }
 
     // Store back the information about exported functions.
-    auto fbase = emit_offset_table(M, T_size, fvars, "jl_fvar", suffix);
-    auto gbase = emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
-
+    emit_table(M, T_size, ArrayRef<Constant*>((Constant* const*)fvars.data(), fvars.size()), "jl_fvar", suffix);
     M.getGlobalVariable("jl_fvar_idxs")->setName("jl_fvar_idxs" + suffix);
-    M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs" + suffix);
 
     uint32_t ntargets = specs.size();
 
@@ -942,11 +922,11 @@ void CloneCtx::emit_metadata()
     {
         auto T_int32 = Type::getInt32Ty(M.getContext());
         std::sort(gv_relocs.begin(), gv_relocs.end(),
-                         [] (const std::pair<Constant*,uint32_t> &lhs,
-                             const std::pair<Constant*,uint32_t> &rhs) {
+                         [] (const std::pair<Constant*, uint32_t> &lhs,
+                             const std::pair<Constant*, uint32_t> &rhs) {
                              return lhs.second < rhs.second;
                          });
-        std::vector<Constant*> values{nullptr};
+        SmallVector<Constant*, 0> values{nullptr};
         uint32_t gv_reloc_idx = 0;
         uint32_t ngv_relocs = gv_relocs.size();
         for (uint32_t id = 0; id < nfvars; id++) {
@@ -958,28 +938,31 @@ void CloneCtx::emit_metadata()
                  gv_reloc_idx++) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(T_size, gv_relocs[gv_reloc_idx].first, gbase));
+                values.push_back(gv_relocs[gv_reloc_idx].first);
             }
             auto it = const_relocs.find(id);
             if (it != const_relocs.end()) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(T_size, it->second, gbase));
+                values.push_back(it->second);
             }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
-        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
-                                      ConstantArray::get(vars_type, values),
-                                      "jl_clone_slots" + suffix);
+        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, nullptr, "jl_clone_slots" + suffix);
+        auto gbase = ConstantExpr::getPtrToInt(gv, T_size);
+        for (size_t i = 2; i < values.size(); i += 2)
+            values[i] = get_ptrdiff32(T_size, values[i], gbase);
+        gv->setInitializer(ConstantArray::get(vars_type, values));
         gv->setVisibility(GlobalValue::HiddenVisibility);
         gv->setDSOLocal(true);
     }
 
-    // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars_offsets`
+    // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars`
     {
-        std::vector<uint32_t> idxs;
-        std::vector<Constant*> offsets;
+        SmallVector<uint32_t, 0> idxs;
+        SmallVector<Constant*, 0> fptrs;
+        Type *Tfptr = T_size->getPointerTo();
         for (uint32_t i = 0; i < ntargets; i++) {
             auto tgt = linearized[i];
             auto &spec = specs[i];
@@ -995,7 +978,7 @@ void CloneCtx::emit_metadata()
                         idxs.push_back(j);
                     }
                     if (i != 0) {
-                        offsets.push_back(get_ptrdiff32(T_size, grp->base_func(fvars[j]), fbase));
+                        fptrs.push_back(grp->base_func(fvars[j]));
                     }
                 }
             }
@@ -1009,12 +992,12 @@ void CloneCtx::emit_metadata()
                         count++;
                         idxs.push_back(jl_sysimg_tag_mask | j);
                         auto f = map_get(*tgt->vmap, base_f, base_f);
-                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
+                        fptrs.push_back(cast<Function>(f));
                     }
                     else if (auto f = map_get(*tgt->vmap, base_f)) {
                         count++;
                         idxs.push_back(j);
-                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
+                        fptrs.push_back(cast<Function>(f));
                     }
                 }
             }
@@ -1026,11 +1009,13 @@ void CloneCtx::emit_metadata()
                                       idxval, "jl_clone_idxs" + suffix);
         gv1->setVisibility(GlobalValue::HiddenVisibility);
         gv1->setDSOLocal(true);
-        ArrayType *offsets_type = ArrayType::get(Type::getInt32Ty(M.getContext()), offsets.size());
+        for (size_t i = 0; i < fptrs.size(); i++)
+            fptrs[i] = ConstantExpr::getBitCast(fptrs[i], Tfptr);
+        ArrayType *offsets_type = ArrayType::get(Tfptr, fptrs.size());
         auto gv2 = new GlobalVariable(M, offsets_type, true,
                                       GlobalVariable::ExternalLinkage,
-                                      ConstantArray::get(offsets_type, offsets),
-                                      "jl_clone_offsets" + suffix);
+                                      ConstantArray::get(offsets_type, fptrs),
+                                      "jl_clone_ptrs" + suffix);
         gv2->setVisibility(GlobalValue::HiddenVisibility);
         gv2->setDSOLocal(true);
     }
@@ -1063,9 +1048,7 @@ static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
     }
 
     GlobalVariable *fvars = M.getGlobalVariable("jl_fvars");
-    GlobalVariable *gvars = M.getGlobalVariable("jl_gvars");
-    if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer()) ||
-                            !gvars || !gvars->hasInitializer() || !isa<ConstantArray>(gvars->getInitializer())))
+    if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer())))
         return false;
 
     CloneCtx clone(M, allow_bad_fvars);
@@ -1104,34 +1087,12 @@ static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
     // and collected all the shared/target-specific relocations.
     clone.emit_metadata();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
 
     return true;
 }
 
-struct MultiVersioningLegacy: public ModulePass {
-    static char ID;
-    MultiVersioningLegacy(bool allow_bad_fvars=false)
-        : ModulePass(ID), allow_bad_fvars(allow_bad_fvars)
-    {}
-
-private:
-    bool runOnModule(Module &M) override;
-    bool allow_bad_fvars;
-};
-
-bool MultiVersioningLegacy::runOnModule(Module &M)
-{
-    return runMultiVersioning(M, allow_bad_fvars);
-}
-
-
-char MultiVersioningLegacy::ID = 0;
-static RegisterPass<MultiVersioningLegacy> X("JuliaMultiVersioning", "JuliaMultiVersioning Pass",
-                                       false /* Only looks at CFG */,
-                                       false /* Analysis Pass */);
-
 } // anonymous namespace
 
 void multiversioning_preannotate(Module &M)
@@ -1149,14 +1110,3 @@ PreservedAnalyses MultiVersioningPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createMultiVersioningPass(bool allow_bad_fvars)
-{
-    return new MultiVersioningLegacy(allow_bad_fvars);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createMultiVersioningPass(false));
-}
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index b006f191937f5..ca25251040fb2 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -7,6 +7,8 @@
 
 #include "llvm-version.h"
 
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Module.h>
@@ -25,9 +27,9 @@ JuliaPassContext::JuliaPassContext()
 
         pgcstack_getter(nullptr), adoptthread_func(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
-        pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
-        typeof_func(nullptr), write_barrier_func(nullptr),
-        call_func(nullptr), call2_func(nullptr), module(nullptr)
+        pointer_from_objref_func(nullptr), gc_loaded_func(nullptr), alloc_obj_func(nullptr),
+        typeof_func(nullptr), write_barrier_func(nullptr), pop_handler_noexcept_func(nullptr),
+        call_func(nullptr), call2_func(nullptr), call3_func(nullptr), module(nullptr)
 {
 }
 
@@ -48,11 +50,14 @@ void JuliaPassContext::initFunctions(Module &M)
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
     pointer_from_objref_func = M.getFunction("julia.pointer_from_objref");
+    gc_loaded_func = M.getFunction("julia.gc_loaded");
     typeof_func = M.getFunction("julia.typeof");
     write_barrier_func = M.getFunction("julia.write_barrier");
     alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
+    pop_handler_noexcept_func = M.getFunction(XSTR(jl_pop_handler_noexcept));
     call_func = M.getFunction("julia.call");
     call2_func = M.getFunction("julia.call2");
+    call3_func = M.getFunction("julia.call3");
 }
 
 void JuliaPassContext::initAll(Module &M)
@@ -121,9 +126,14 @@ namespace jl_intrinsics {
 
     // Annotates a function with attributes suitable for GC allocation
     // functions. Specifically, the return value is marked noalias and nonnull.
-    // The allocation size is set to the first argument.
     static Function *addGCAllocAttributes(Function *target)
     {
+        auto FnAttrs = AttrBuilder(target->getContext());
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef));
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        target->addFnAttrs(FnAttrs);
         addRetAttr(target, Attribute::NoAlias);
         addRetAttr(target, Attribute::NonNull);
         return target;
@@ -151,7 +161,9 @@ namespace jl_intrinsics {
             auto intrinsic = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), T_size },
+                    { PointerType::get(ctx, 0),
+                        T_size,
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_BYTES_NAME);
@@ -214,7 +226,7 @@ namespace jl_intrinsics {
                     false),
                 Function::ExternalLinkage,
                 QUEUE_GC_ROOT_NAME);
-            intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return intrinsic;
         });
 
@@ -230,14 +242,14 @@ namespace jl_intrinsics {
                     false),
                 Function::ExternalLinkage,
                 SAFEPOINT_NAME);
-            intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return intrinsic;
         });
 }
 
 namespace jl_well_known {
     static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
-    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
     static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 
@@ -251,7 +263,7 @@ namespace jl_well_known {
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), T_size },
+                    { PointerType::get(ctx, 0), T_size , T_size},
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
@@ -259,20 +271,20 @@ namespace jl_well_known {
             return addGCAllocAttributes(bigAllocFunc);
         });
 
-    const WellKnownFunctionDescription GCPoolAlloc(
-        GC_POOL_ALLOC_NAME,
+    const WellKnownFunctionDescription GCSmallAlloc(
+        GC_SMALL_ALLOC_NAME,
         [](Type *T_size) {
             auto &ctx = T_size->getContext();
             auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
-            auto poolAllocFunc = Function::Create(
+            auto smallAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx) },
+                    { PointerType::get(ctx, 0), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
-                GC_POOL_ALLOC_NAME);
-            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
-            return addGCAllocAttributes(poolAllocFunc);
+                GC_SMALL_ALLOC_NAME);
+            smallAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(smallAllocFunc);
         });
 
     const WellKnownFunctionDescription GCQueueRoot(
@@ -287,7 +299,7 @@ namespace jl_well_known {
                     false),
                 Function::ExternalLinkage,
                 GC_QUEUE_ROOT_NAME);
-            func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
             return func;
         });
 
@@ -299,9 +311,9 @@ namespace jl_well_known {
             auto allocTypedFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
-                    { Type::getInt8PtrTy(ctx),
+                    { PointerType::get(ctx, 0),
                         T_size,
-                        Type::getInt8PtrTy(ctx) },
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_TYPED_NAME);
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 727f463dc50ef..d46f1f46634e6 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -56,11 +56,14 @@ struct JuliaPassContext {
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
     llvm::Function *pointer_from_objref_func;
+    llvm::Function *gc_loaded_func;
     llvm::Function *alloc_obj_func;
     llvm::Function *typeof_func;
     llvm::Function *write_barrier_func;
+    llvm::Function *pop_handler_noexcept_func;
     llvm::Function *call_func;
     llvm::Function *call2_func;
+    llvm::Function *call3_func;
 
     // Creates a pass context. Type and function pointers
     // are set to `nullptr`. Metadata nodes are initialized.
@@ -144,8 +147,8 @@ namespace jl_well_known {
     // `jl_gc_big_alloc`: allocates bytes.
     extern const WellKnownFunctionDescription GCBigAlloc;
 
-    // `jl_gc_pool_alloc`: allocates bytes.
-    extern const WellKnownFunctionDescription GCPoolAlloc;
+    // `jl_gc_small_alloc`: allocates bytes.
+    extern const WellKnownFunctionDescription GCSmallAlloc;
 
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
@@ -154,4 +157,6 @@ namespace jl_well_known {
     extern const WellKnownFunctionDescription GCAllocTyped;
 }
 
+void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info);
+
 #endif
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 9f6cfa1beb38e..06a52ad3dcb43 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -11,7 +11,6 @@
 #include <llvm/IR/ValueMap.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
@@ -43,8 +42,8 @@ using namespace llvm;
 struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrspacesVisitor> {
     DenseMap<Value *, Value *> LiftingMap;
     SmallPtrSet<Value *, 4> Visited;
-    std::vector<Instruction *> ToDelete;
-    std::vector<std::pair<Instruction *, Instruction *>> ToInsert;
+    SmallVector<Instruction *, 0> ToDelete;
+    SmallVector<std::pair<Instruction *, Instruction *>, 0> ToInsert;
 
 public:
     Value *LiftPointer(Module *M, Value *V, Instruction *InsertPt=nullptr);
@@ -57,18 +56,18 @@ struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrsp
     void visitMemTransferInst(MemTransferInst &MTI);
 
 private:
-    void PoisonValues(std::vector<Value *> &Worklist);
+    void PoisonValues(SmallVectorImpl<Value *> &Worklist);
 };
 
 static unsigned getValueAddrSpace(Value *V) {
-    return cast<PointerType>(V->getType())->getAddressSpace();
+    return V->getType()->getPointerAddressSpace();
 }
 
 static bool isSpecialAS(unsigned AS) {
     return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
 }
 
-void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklist) {
+void PropagateJuliaAddrspacesVisitor::PoisonValues(SmallVectorImpl<Value *> &Worklist) {
     while (!Worklist.empty()) {
         Value *CurrentV = Worklist.back();
         Worklist.pop_back();
@@ -83,7 +82,7 @@ void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklis
 
 Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruction *InsertPt) {
     SmallVector<Value *, 4> Stack;
-    std::vector<Value *> Worklist;
+    SmallVector<Value *, 0> Worklist;
     std::set<Value *> LocalVisited;
     unsigned allocaAddressSpace = M->getDataLayout().getAllocaAddrSpace();
     Worklist.push_back(V);
@@ -106,7 +105,6 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             }
             else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
                 if (LiftingMap.count(GEP)) {
-                    CurrentV = LiftingMap[GEP];
                     break;
                 } else if (Visited.count(GEP)) {
                     return nullptr;
@@ -141,7 +139,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
                 break;
             } else {
                 // Ok, we've reached a leaf - check if it is eligible for lifting
-                if (!CurrentV->getType()->isPointerTy() ||
+                if (!CurrentV->getType()->isPtrOrPtrVectorTy() ||
                     isSpecialAS(getValueAddrSpace(CurrentV))) {
                     // If not, poison all (recursive) users of this value, to prevent
                     // looking at them again in future iterations.
@@ -157,7 +155,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
     }
 
     // Go through and insert lifted versions of all instructions on the list.
-    std::vector<Value *> ToRevisit;
+    SmallVector<Value *, 0> ToRevisit;
     for (Value *V : Stack) {
         if (LiftingMap.count(V))
             continue;
@@ -165,14 +163,14 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
             Instruction *InstV = cast<Instruction>(V);
             Instruction *NewV = InstV->clone();
             ToInsert.push_back(std::make_pair(NewV, InstV));
-            Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), allocaAddressSpace);
+            Type *NewRetTy = PointerType::get(InstV->getType(), allocaAddressSpace);
             NewV->mutateType(NewRetTy);
             LiftingMap[InstV] = NewV;
             ToRevisit.push_back(NewV);
         }
     }
     auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * {
-        PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), allocaAddressSpace);
+        PointerType *TargetType = PointerType::get(CurrentV->getType(), allocaAddressSpace);
         while (!LiftingMap.count(CurrentV)) {
             if (isa<BitCastInst>(CurrentV))
                 CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
@@ -186,13 +184,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc
         }
         if (LiftingMap.count(CurrentV))
             CurrentV = LiftingMap[CurrentV];
-        if (CurrentV->getType() != TargetType) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(CurrentV->getContext().supportsTypedPointers());
-            auto *BCI = new BitCastInst(CurrentV, TargetType);
-            ToInsert.push_back(std::make_pair(BCI, InsertPt));
-            CurrentV = BCI;
-        }
+        assert(CurrentV->getType() == TargetType);
         return CurrentV;
     };
 
@@ -298,32 +290,11 @@ bool propagateJuliaAddrspaces(Function &F) {
     visitor.Visited.clear();
     return true;
 }
-
-struct PropagateJuliaAddrspacesLegacy : FunctionPass {
-    static char ID;
-
-    PropagateJuliaAddrspacesLegacy() : FunctionPass(ID) {}
-    bool runOnFunction(Function &F) override {
-        bool modified = propagateJuliaAddrspaces(F);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyFunction(F, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char PropagateJuliaAddrspacesLegacy::ID = 0;
-static RegisterPass<PropagateJuliaAddrspacesLegacy> X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false);
-
-Pass *createPropagateJuliaAddrspaces() {
-    return new PropagateJuliaAddrspacesLegacy();
-}
-
 PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysisManager &AM) {
     bool modified = propagateJuliaAddrspaces(F);
 
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyFunction(F, &errs()));
+    assert(!verifyLLVMIR(F));
 #endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
@@ -331,9 +302,3 @@ PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysi
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createPropagateJuliaAddrspaces());
-}
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 840efaebee032..e36136859517a 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -9,9 +9,8 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/TargetParser/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
@@ -64,7 +63,7 @@ struct LowerPTLS {
 
 void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 {
-    addFnAttr(pgcstack, Attribute::ReadNone);
+    pgcstack->addFnAttr(Attribute::getWithMemoryEffects(pgcstack->getContext(), MemoryEffects::none()));
     addFnAttr(pgcstack, Attribute::NoUnwind);
 }
 
@@ -86,14 +85,14 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
 
         // The add instruction clobbers flags
         if (offset) {
-            std::vector<Type*> args(0);
+            SmallVector<Type*, 0> args(0);
             args.push_back(offset->getType());
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
             tls = builder.CreateCall(tp, {offset}, "pgcstack");
         }
         else {
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
+            auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
             tls = builder.CreateCall(tp, {}, "tls_pgcstack");
@@ -110,6 +109,8 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
             asm_str = "mrs $0, tpidr_el0";
         } else if (TargetTriple.isARM()) {
             asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        } else if (TargetTriple.isRISCV()) {
+            asm_str = "mv $0, tp";
         } else if (TargetTriple.getArch() == Triple::x86_64) {
             asm_str = "movq %fs:0, $0";
         } else if (TargetTriple.getArch() == Triple::x86) {
@@ -119,11 +120,10 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
         }
         if (!offset)
             offset = ConstantInt::getSigned(T_size, jl_tls_offset);
-        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false);
+        auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false), asm_str, "=r", false);
         tls = builder.CreateCall(tp, {}, "thread_ptr");
-        tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
+        tls = builder.CreateInBoundsGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
     }
-    tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo());
     return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
 }
 
@@ -185,7 +185,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         builder.SetInsertPoint(fastTerm->getParent());
         fastTerm->removeFromParent();
         MDNode *tbaa = tbaa_gcframe;
-        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
         builder.Insert(fastTerm);
         phi->addIncoming(pgcstack, fastTerm->getParent());
         // emit pre-return cleanup
@@ -197,7 +197,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             for (auto &BB : *pgcstack->getParent()->getParent()) {
                 if (isa<ReturnInst>(BB.getTerminator())) {
                     builder.SetInsertPoint(BB.getTerminator());
-                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
+                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
                 }
             }
         }
@@ -329,6 +329,7 @@ bool LowerPTLS::run(bool *CFGModified)
                 }
             }
             if (pgcstack) {
+                pgcstack->takeName(call);
                 call->replaceAllUsesWith(pgcstack);
                 call->eraseFromParent();
                 continue;
@@ -342,31 +343,6 @@ bool LowerPTLS::run(bool *CFGModified)
     };
     return runOnGetter(false) + runOnGetter(true);
 }
-
-struct LowerPTLSLegacy: public ModulePass {
-    static char ID;
-    LowerPTLSLegacy(bool imaging_mode=false)
-        : ModulePass(ID),
-          imaging_mode(imaging_mode)
-    {}
-
-    bool imaging_mode;
-    bool runOnModule(Module &M) override {
-        LowerPTLS lower(M, imaging_mode);
-        bool modified = lower.run(nullptr);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char LowerPTLSLegacy::ID = 0;
-
-static RegisterPass<LowerPTLSLegacy> X("LowerPTLS", "LowerPTLS Pass",
-                                 false /* Only looks at CFG */,
-                                 false /* Analysis Pass */);
-
 } // anonymous namespace
 
 PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
@@ -374,7 +350,7 @@ PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
     bool CFGModified = false;
     bool modified = lower.run(&CFGModified);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
     if (modified) {
         if (CFGModified) {
@@ -385,14 +361,3 @@ PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createLowerPTLSPass(bool imaging_mode)
-{
-    return new LowerPTLSLegacy(imaging_mode);
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
-{
-    unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
-}
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index b964c20e3353e..bb492f467e74c 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -7,7 +7,6 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/InstIterator.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -45,15 +44,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
 
         DstTy = SrcTy;
         if (auto Ty = dyn_cast<PointerType>(SrcTy)) {
-            if (Ty->isOpaque()) {
-                DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
-            }
-            else {
-                //Remove once opaque pointer transition is complete
-                DstTy = PointerType::get(
-                        remapType(Ty->getNonOpaquePointerElementType()),
-                        ASRemapper(Ty->getAddressSpace()));
-            }
+            DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
         }
         else if (auto Ty = dyn_cast<FunctionType>(SrcTy)) {
             SmallVector<Type *, 4> Params;
@@ -154,18 +145,7 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     Ops.push_back(NewOp ? cast<Constant>(NewOp) : Op);
                 }
 
-                if (CE->getOpcode() == Instruction::GetElementPtr) {
-                    // GEP const exprs need to know the type of the source.
-                    // asserts remapType(typeof arg0) == typeof mapValue(arg0).
-                    Constant *Src = CE->getOperand(0);
-                    auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
-                    //Remove once opaque pointer transition is complete
-                    if (!ptrty->isOpaque()) {
-                        Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType());
-                        DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
-                    }
-                }
-                else
+                if (CE->getOpcode() != Instruction::GetElementPtr)
                     DstV = CE->getWithOperands(Ops, Ty);
             }
         }
@@ -209,7 +189,12 @@ bool RemoveNoopAddrSpaceCasts(Function *F)
                 LLVM_DEBUG(
                         dbgs() << "Removing noop address space cast:\n"
                                << I << "\n");
-                ASC->replaceAllUsesWith(ASC->getOperand(0));
+                if (ASC->getType() == ASC->getOperand(0)->getType()) {
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                } else {
+                    // uncanonicalized addrspacecast; just use the value
+                    ASC->replaceAllUsesWith(ASC->getOperand(0));
+                }
                 NoopCasts.push_back(ASC);
             }
         }
@@ -336,7 +321,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
         GlobalVariable *NGV = cast<GlobalVariable>(VMap[GV]);
         if (GV->hasInitializer())
-            NGV->setInitializer(MapValue(GV->getInitializer(), VMap));
+            NGV->setInitializer(MapValue(GV->getInitializer(), VMap, RF_None, &TypeRemapper, &Materializer));
 
         SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
         GV->getAllMetadata(MDs);
@@ -401,7 +386,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (GlobalAlias *GA : Aliases) {
         GlobalAlias *NGA = cast<GlobalAlias>(VMap[GA]);
         if (const Constant *C = GA->getAliasee())
-            NGA->setAliasee(MapValue(C, VMap));
+            NGA->setAliasee(MapValue(C, VMap, RF_None, &TypeRemapper, &Materializer));
 
         GA->setAliasee(nullptr);
     }
@@ -424,7 +409,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE;) {
         Function *F = &*FI++;
         if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
-            F->replaceAllUsesWith(Remangled.getValue());
+            F->replaceAllUsesWith(*Remangled);
             F->eraseFromParent();
         }
     }
@@ -433,42 +418,12 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 }
 
 
-struct RemoveAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    AddrspaceRemapFunction ASRemapper;
-    RemoveAddrspacesPassLegacy(
-            AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-        : ModulePass(ID), ASRemapper(ASRemapper){};
-
-public:
-    bool runOnModule(Module &M) override {
-        bool modified = removeAddrspaces(M, ASRemapper);
-#ifdef JL_VERIFY_PASSES
-        assert(!verifyModule(M, &errs()));
-#endif
-        return modified;
-    }
-};
-
-char RemoveAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveAddrspacesPassLegacy>
-        X("RemoveAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveAddrspacesPass(
-        AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-{
-    return new RemoveAddrspacesPassLegacy(ASRemapper);
-}
-
 RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
 
 PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
     bool modified = removeAddrspaces(M, ASRemapper);
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(M));
 #endif
     if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
@@ -490,32 +445,7 @@ unsigned removeJuliaAddrspaces(unsigned AS)
         return AS;
 }
 
-struct RemoveJuliaAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    RemoveAddrspacesPassLegacy Pass;
-    RemoveJuliaAddrspacesPassLegacy() : ModulePass(ID), Pass(removeJuliaAddrspaces){};
-
-    bool runOnModule(Module &M) override { return Pass.runOnModule(M); }
-};
-
-char RemoveJuliaAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveJuliaAddrspacesPassLegacy>
-        Y("RemoveJuliaAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveJuliaAddrspacesPass()
-{
-    return new RemoveJuliaAddrspacesPassLegacy();
-}
 
 PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
     return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
 }
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
-}
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index 5e8f54b98e417..85275dddb101e 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -6,7 +6,6 @@
 #include <llvm/Pass.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/Support/Debug.h>
 
 #include "julia.h"
@@ -43,33 +42,3 @@ PreservedAnalyses RemoveNIPass::run(Module &M, ModuleAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct RemoveNILegacy : public ModulePass {
-    static char ID;
-    RemoveNILegacy() : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return removeNI(M);
-    }
-};
-
-char RemoveNILegacy::ID = 0;
-static RegisterPass<RemoveNILegacy>
-        Y("RemoveNI",
-          "Remove non-integral address space.",
-          false,
-          false);
-}
-
-Pass *createRemoveNIPass()
-{
-    return new RemoveNILegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveNIPass());
-}
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index 21e2ec574d650..3faa9d9728e67 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -11,7 +11,6 @@
 //    as independent of each other.
 //
 // The pass hinges on a call to a marker function that has metadata attached to it.
-// To construct the pass call `createLowerSimdLoopPass`.
 
 #include "support/dtypes.h"
 
@@ -21,7 +20,7 @@
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopPass.h>
 #include <llvm/Analysis/OptimizationRemarkEmitter.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/Analysis/MemorySSA.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Verifier.h>
@@ -42,6 +41,7 @@ STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction
 STATISTIC(MaxChainLength, "Max length of reduction chain");
 STATISTIC(AddChains, "Addition reduction chains");
 STATISTIC(MulChains, "Multiply reduction chains");
+STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
 
 #ifndef __clang_gcanalyzer__
 #define REMARK(remark) ORE.emit(remark)
@@ -50,6 +50,49 @@ STATISTIC(MulChains, "Multiply reduction chains");
 #endif
 namespace {
 
+/**
+ * Combine
+ * ```
+ * %v0 = fmul ... %a, %b
+ * %v = fadd contract ... %v0, %c
+ * ```
+ * to
+ * %v0 = fmul contract ... %a, %b
+ * %v = fadd contract ... %v0, %c
+ * when `%v0` has no other use
+ */
+
+static bool checkCombine(Value *maybeMul, Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
+{
+    auto mulOp = dyn_cast<Instruction>(maybeMul);
+    if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
+        return false;
+    if (!L.contains(mulOp))
+        return false;
+    if (!mulOp->hasOneUse()) {
+        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
+                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
+        });
+        return false;
+    }
+    // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
+    auto fmf = mulOp->getFastMathFlags();
+    if (!fmf.allowContract()) {
+        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
+                << "marked for fma " << ore::NV("fmul", mulOp);
+        });
+        ++TotalContracted;
+        fmf.setAllowContract(true);
+        mulOp->copyFastMathFlags(fmf);
+        return true;
+    }
+    return false;
+}
+
 static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFEPOINT
 {
     switch (J->getOpcode()) {
@@ -73,7 +116,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE
 /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
 /// mark the ops as permitting reassociation/commuting.
 /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -84,7 +127,7 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe
         // Find the user of instruction I that is within loop L.
         for (User *UI : I->users()) { /*}*/
             Instruction *U = cast<Instruction>(UI);
-            if (L->contains(U)) {
+            if (L.contains(U)) {
                 if (J) {
                     LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n");
                     REMARK([&]() {
@@ -151,128 +194,116 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe
         });
         (*K)->setHasAllowReassoc(true);
         (*K)->setHasAllowContract(true);
+        switch ((*K)->getOpcode()) {
+            case Instruction::FAdd: {
+                if (!(*K)->hasAllowContract())
+                    continue;
+                // (*K)->getOperand(0)->print(dbgs());
+                // (*K)->getOperand(1)->print(dbgs());
+                checkCombine((*K)->getOperand(0), L, ORE);
+                checkCombine((*K)->getOperand(1), L, ORE);
+                break;
+            }
+            case Instruction::FSub: {
+                if (!(*K)->hasAllowContract())
+                    continue;
+                // (*K)->getOperand(0)->print(dbgs());
+                // (*K)->getOperand(1)->print(dbgs());
+                checkCombine((*K)->getOperand(0), L, ORE);
+                checkCombine((*K)->getOperand(1), L, ORE);
+                break;
+            }
+            default:
+                break;
+            }
+        if (SE)
+            SE->forgetValue(*K);
         ++length;
     }
     ReductionChainLength += length;
     MaxChainLength.updateMax(length);
 }
 
-static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI) JL_NOTSAFEPOINT
+static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
-    bool Changed = false;
-    std::vector<Instruction*> ToDelete;
-    for (User *U : marker->users()) {
-        ++TotalMarkedLoops;
-        Instruction *I = cast<Instruction>(U);
-        ToDelete.push_back(I);
-
-        BasicBlock *B = I->getParent();
-        OptimizationRemarkEmitter ORE(B->getParent());
-        LoopInfo &LI = GetLI(*B->getParent());
-        Loop *L = LI.getLoopFor(B);
-        if (!L) {
-            I->removeFromParent();
-            continue;
-        }
-
-        LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n");
-        bool simd = false;
-        bool ivdep = false;
-        SmallVector<Metadata *, 8> MDs;
-
-        BasicBlock *Lh = L->getHeader();
-        LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
-
-        // Reserve first location for self reference to the LoopID metadata node.
-        TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None);
-        MDs.push_back(TempNode.get());
-
-        // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep`
-        if (I->hasMetadataOtherThanDebugLoc()) {
-            MDNode *JLMD= I->getMetadata("julia.loopinfo");
-            if (JLMD) {
-                LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n");
-                for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) {
-                    Metadata *Op = JLMD->getOperand(i);
-                    const MDString *S = dyn_cast<MDString>(Op);
-                    if (S) {
-                        LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
-                        if (S->getString().startswith("julia")) {
-                            if (S->getString().equals("julia.simdloop"))
-                                simd = true;
-                            if (S->getString().equals("julia.ivdep"))
-                                ivdep = true;
-                            continue;
-                        }
-                    }
-                    MDs.push_back(Op);
-                }
-            }
-        }
-
-        LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
-
-        REMARK([=]() {
-            return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B)
-                << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
-        });
-
-        MDNode *n = L->getLoopID();
-        if (n) {
-            // Loop already has a LoopID so copy over Metadata
-            // original loop id is operand 0
-            for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) {
-                Metadata *Op = n->getOperand(i);
-                MDs.push_back(Op);
+    MDNode *LoopID = L.getLoopID();
+    if (!LoopID)
+        return false;
+    bool simd = false;
+    bool ivdep = false;
+
+    BasicBlock *Lh = L.getHeader();
+    LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
+
+    SmallVector<Metadata*, 4> MDs(1);
+    // First Operand is self-reference
+    // Drop `julia.` prefixes
+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+        Metadata *Op = LoopID->getOperand(i);
+        const MDString *S = dyn_cast<MDString>(Op);
+        if (S) {
+            LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
+            if (S->getString().starts_with("julia")) {
+                if (S->getString() == "julia.simdloop")
+                    simd = true;
+                if (S->getString() == "julia.ivdep")
+                    ivdep = true;
+                continue;
             }
         }
-        MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs);
-        // Replace the temporary node with a self-reference.
-        LoopID->replaceOperandWith(0, LoopID);
-        L->setLoopID(LoopID);
-        assert(L->getLoopID());
+        MDs.push_back(Op);
+    }
 
+    LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
+    if (!simd && !ivdep)
+        return false;
+    ++TotalMarkedLoops;
+    LLVMContext &Context = L.getHeader()->getContext();
+    LoopID = MDNode::get(Context, MDs);
+    // Set operand 0 to refer to the loop id itself
+    LoopID->replaceOperandWith(0, LoopID);
+    L.setLoopID(LoopID);
+
+    REMARK([&]() {
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader())
+            << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
+    });
+
+    // If ivdep is true we assume that there is no memory dependency between loop iterations
+    // This is a fairly strong assumption and does often not hold true for generic code.
+    if (ivdep) {
+        ++IVDepLoops;
         MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(LoopID));
-
-        // If ivdep is true we assume that there is no memory dependency between loop iterations
-        // This is a fairly strong assumption and does often not hold true for generic code.
-        if (ivdep) {
-            ++IVDepLoops;
-            // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
-            for (BasicBlock *BB : L->blocks()) {
-               for (Instruction &I : *BB) {
-                   if (I.mayReadOrWriteMemory()) {
-                       ++IVDepInstructions;
-                       I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
-                   }
-               }
+        // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
+        for (BasicBlock *BB : L.blocks()) {
+            for (Instruction &I : *BB) {
+                if (I.mayReadOrWriteMemory()) {
+                    ++IVDepInstructions;
+                    I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
+                }
             }
-            assert(L->isAnnotatedParallel());
         }
+        assert(L.isAnnotatedParallel());
+    }
 
-        if (simd) {
-            ++SimdLoops;
-            // Mark floating-point reductions as okay to reassociate/commute.
-            for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
-                if (PHINode *Phi = dyn_cast<PHINode>(I))
-                    enableUnsafeAlgebraIfReduction(Phi, L, ORE);
-                else
-                    break;
-            }
+    if (simd) {
+        ++SimdLoops;
+        // Mark floating-point reductions as okay to reassociate/commute.
+        for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
+            if (PHINode *Phi = dyn_cast<PHINode>(I))
+                enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE);
+            else
+                break;
         }
 
-        I->removeFromParent();
-
-        Changed = true;
+        if (SE)
+            SE->forgetLoopDispositions();
     }
 
-    for (Instruction *I : ToDelete)
-        I->deleteValue();
-    marker->eraseFromParent();
 #ifdef JL_VERIFY_PASSES
-    assert(!verifyModule(M, &errs()));
+    assert(!verifyLLVMIR(L));
 #endif
-    return Changed;
+    return true;
 }
 
 } // end anonymous namespace
@@ -283,78 +314,21 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
 /// prevent SIMDization.
 
 
-PreservedAnalyses LowerSIMDLoopPass::run(Module &M, ModuleAnalysisManager &AM)
-{
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    if (!loopinfo_marker)
-        return PreservedAnalyses::all();
-
-    FunctionAnalysisManager &FAM =
-      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U)
 
-    auto GetLI = [&FAM](Function &F) -> LoopInfo & {
-        return FAM.getResult<LoopAnalysis>(F);
-    };
-
-    if (markLoopInfo(M, loopinfo_marker, GetLI)) {
-        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
-        preserved.preserve<LoopAnalysis>();
+{
+    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
+    if (processLoop(L, ORE, &AR.SE)) {
+#ifdef JL_DEBUG_BUILD
+        if (AR.MSSA)
+            AR.MSSA->verifyMemorySSA();
+#endif
+        auto preserved = getLoopPassPreservedAnalyses();
+        preserved.preserveSet<CFGAnalyses>();
+        preserved.preserve<MemorySSAAnalysis>();
         return preserved;
     }
 
     return PreservedAnalyses::all();
 }
-
-namespace {
-class LowerSIMDLoopLegacy : public ModulePass {
-    //LowerSIMDLoop Impl;
-
-public:
-  static char ID;
-
-  LowerSIMDLoopLegacy() : ModulePass(ID) {
-  }
-
-  bool runOnModule(Module &M) override {
-    bool Changed = false;
-
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    auto GetLI = [this](Function &F) JL_NOTSAFEPOINT -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-    };
-
-    if (loopinfo_marker)
-        Changed |= markLoopInfo(M, loopinfo_marker, GetLI);
-
-    return Changed;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override
-  {
-      ModulePass::getAnalysisUsage(AU);
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.setPreservesCFG();
-  }
-};
-
-} // end anonymous namespace
-
-char LowerSIMDLoopLegacy::ID = 0;
-
-static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-Pass *createLowerSimdLoopPass()
-{
-    return new LowerSIMDLoopLegacy();
-}
-
-extern "C" JL_DLLEXPORT_CODEGEN
-void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerSimdLoopPass());
-}
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 01638b8d44a6e..061d80deb02f9 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -10,21 +10,12 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 140000
-    #error Only LLVM versions >= 14.0.0 are supported by Julia
+#if JL_LLVM_VERSION < 170000
+    #error Only LLVM versions >= 17.0.0 are supported by Julia
 #endif
 
-#if JL_LLVM_VERSION >= 160000
-#define JL_LLVM_OPAQUE_POINTERS 1
-#endif
-
-// Pre GCC 12 libgcc defined the ABI for Float16->Float32
-// to take an i16. GCC 12 silently changed the ABI to now pass
-// Float16 in Float32 registers.
-#if JL_LLVM_VERSION < 150000 || defined(_CPU_PPC64_) || defined(_CPU_PPC_)
-#define JULIA_FLOAT16_ABI 1
-#else
-#define JULIA_FLOAT16_ABI 2
+#if JL_LLVM_VERSION < 19000 && defined(_CPU_RISCV64_)
+    #error Only LLVM versions >= 19.0.0 are supported by Julia on RISC-V
 #endif
 
 #ifdef __cplusplus
diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp
index d56fb3a0497fa..8c48b5661f984 100644
--- a/src/llvm_api.cpp
+++ b/src/llvm_api.cpp
@@ -10,7 +10,6 @@
 #endif
 
 #include "jitlayers.h"
-#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Error.h>
@@ -21,6 +20,7 @@
 #include <llvm/Support/CBindingWrapping.h>
 #include <llvm/Support/MemoryBuffer.h>
 
+#if JL_LLVM_VERSION < 180000
 namespace llvm {
 namespace orc {
 class OrcV2CAPIHelper {
@@ -38,7 +38,7 @@ class OrcV2CAPIHelper {
 };
 } // namespace orc
 } // namespace llvm
-
+#endif
 
 typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef;
 typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
@@ -46,20 +46,17 @@ typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+#if JL_LLVM_VERSION >= 180000
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::SymbolStringPoolEntryUnsafe::PoolEntry,
+                                   LLVMOrcSymbolStringPoolEntryRef)
+#else
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility,
                                    LLVMOrcMaterializationResponsibilityRef)
 
-typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef;
-typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef;
-typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef;
-
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef)
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef)
-
 extern "C" {
 
 JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void)
@@ -113,7 +110,11 @@ JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef
 JLJITMangleAndIntern_impl(JuliaOJITRef JIT,
                                             const char *Name)
 {
+#if JL_LLVM_VERSION >= 180000
+    return wrap(orc::SymbolStringPoolEntryUnsafe::take(unwrap(JIT)->mangle(Name)).rawPtr());
+#else
     return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name)));
+#endif
 }
 
 JL_DLLEXPORT_CODEGEN const char *
@@ -140,27 +141,4 @@ JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT)
     return wrap(&unwrap(JIT)->getIRCompileLayer());
 }
 
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
-    JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \
-    { \
-        unwrap(PM)->addPass(CREATE_PASS); \
-    }
-
-#include "llvm-julia-passes.inc"
-
-#undef MODULE_PASS
-#undef CGSCC_PASS
-#undef FUNCTION_PASS
-#undef LOOP_PASS
-
 } // extern "C"
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index 93c442445d79a..2ab16f3ac6d67 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -6,6 +6,7 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
diff --git a/src/mach_excServer.c b/src/mach_excServer.c
index 7e99331fa8554..669fd0e2313aa 100644
--- a/src/mach_excServer.c
+++ b/src/mach_excServer.c
@@ -20,7 +20,7 @@
  * terms of an Apple operating system software license agreement.
  *
  * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * https://www.opensource.apple.com/apsl/ and read it before using this file.
  *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index e0e809eee08f1..f67145317dc7a 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -126,6 +126,16 @@
                                  (else '())))
                                (else '()))))))
 
+   ;; for/generator
+   (pattern-lambda (for assgn body)
+                   (if (eq? (car assgn) 'block)
+                    `(varlist ,@(map cadr (cdr assgn)))
+                     (cons 'varlist (cadr assgn))))
+   (pattern-lambda (generator body (filter filt . assgn))
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+   (pattern-lambda (generator body . assgn)
+                    (cons 'varlist (map (lambda (x) (cadr x)) assgn)))
+
    ;; macro definition
    (pattern-lambda (macro (call name . argl) body)
                    `(-> (tuple ,@argl) ,body))
@@ -184,18 +194,18 @@
       (unescape (cadr e))
       e))
 
-(define (unescape-global-lhs e env m parent-scope inarg)
+(define (unescape-global-lhs e env m lno parent-scope inarg)
   (cond ((not (pair? e)) e)
-        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg))
+        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m lno parent-scope inarg))
         ((memq (car e) '(parameters tuple))
          (list* (car e) (map (lambda (e)
-                          (unescape-global-lhs e env m parent-scope inarg))
+                          (unescape-global-lhs e env m lno parent-scope inarg))
                         (cdr e))))
         ((and (memq (car e) '(|::| kw)) (length= e 3))
-         (list (car e) (unescape-global-lhs (cadr e) env m parent-scope inarg)
-                       (resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+         (list (car e) (unescape-global-lhs (cadr e) env m lno parent-scope inarg)
+                       (resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
         (else
-         (resolve-expansion-vars-with-new-env e env m parent-scope inarg))))
+         (resolve-expansion-vars-with-new-env e env m lno parent-scope inarg))))
 
 (define (typedef-expr-name e)
   (cond ((atom? e) e)
@@ -219,30 +229,26 @@
               lst)))
 
 ;; get the name from a function formal argument expression, allowing `(escape x)`
-(define (try-arg-name v)
-  (cond ((symbol? v) (list v))
+(define (try-arg-name v (escaped #f))
+  (cond ((symbol? v) (if escaped '() (list v)))
         ((atom? v) '())
         (else
          (case (car v)
-           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v))))
-           ((... kw =) (try-arg-name (cadr v)))
-           ((escape) (list v))
-           ((hygienic-scope) (try-arg-name (cadr v)))
+           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v) escaped)))
+           ((... kw =) (try-arg-name (cadr v) escaped))
+           ((escape) (if escaped (list (cadr v)) '()))
+           ((hygienic-scope) (try-arg-name (cadr v) escaped))
+           ((tuple) (apply nconc (map (lambda (e) (try-arg-name e escaped)) (cdr v))))
            ((meta)  ;; allow certain per-argument annotations
             (if (nospecialize-meta? v #t)
-                (try-arg-name (caddr v))
+                (try-arg-name (caddr v) escaped)
                 '()))
            (else '())))))
 
 ;; get names from a formal argument list, specifying whether to include escaped ones
 (define (safe-arg-names lst (escaped #f))
   (apply nconc
-         (map (lambda (v)
-                (let ((vv (try-arg-name v)))
-                  (if (eq? escaped (and (pair? vv) (pair? (car vv)) (eq? (caar vv) 'escape)))
-                      (if escaped (list (cadar vv)) vv)
-                      '())))
-              lst)))
+         (map (lambda (v) (try-arg-name v escaped)) lst)))
 
 ;; arg names, looking only at positional args
 (define (safe-llist-positional-args lst (escaped #f))
@@ -280,18 +286,18 @@
 ;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside
 ;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`,
 ;; and we want `inarg` to be true for the `(x)` part.
-(define (resolve-in-lhs e env m parent-scope inarg)
-  (define (recur x) (resolve-in-lhs x env m parent-scope inarg))
-  (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+(define (resolve-in-lhs e env m lno parent-scope inarg)
+  (define (recur x) (resolve-in-lhs x env m lno parent-scope inarg))
+  (define (other x) (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
   (case (and (pair? e) (car e))
     ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e))))
     ((|::|)  `(|::| ,(recur (cadr e)) ,(other (caddr e))))
     ((call)  `(call ,(other (cadr e))
                     ,@(map (lambda (x)
-                             (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                            (cddr e))))
     ((tuple) `(tuple ,@(map (lambda (x)
-                              (resolve-expansion-vars-with-new-env x env m parent-scope #t))
+                              (resolve-expansion-vars-with-new-env x env m lno parent-scope #t))
                             (cdr e))))
     (else (other e))))
 
@@ -328,7 +334,7 @@
                    (keywords-introduced-by x))
               env)))))))
 
-(define (resolve-expansion-vars-with-new-env x env m parent-scope inarg (outermost #f))
+(define (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg (outermost #f))
   (resolve-expansion-vars-
    x
    (if (and (pair? x) (eq? (car x) 'let))
@@ -336,14 +342,50 @@
        ;; the same expression
        env
        (new-expansion-env-for x env outermost))
-   m parent-scope inarg))
+   m lno parent-scope inarg))
 
 (define (reescape ux x)
   (if (and (pair? x) (eq? (car x) 'escape))
-    (reescape '(escape ,ux) (cadr x)))
-    ux)
-
-(define (resolve-expansion-vars- e env m parent-scope inarg)
+    (reescape `(escape ,ux) (cadr x))
+    ux))
+
+;; type has special behavior: identifiers inside are
+;; field names, not expressions.
+(define (resolve-struct-field-expansion x env m lno parent-scope inarg)
+  (let ((ux (unescape x)))
+    (cond
+        ((atom? ux) ux)
+        ((and (pair? ux) (eq? (car ux) '|::|))
+         `(|::| ,(unescape (cadr ux))
+           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m lno parent-scope inarg)))
+        ((and (pair? ux) (memq (car ux) '(const atomic)))
+         `(,(car ux) ,(resolve-struct-field-expansion (reescape (cadr ux) x) env m lno parent-scope inarg)))
+        (else
+         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))))
+
+(define (resolve-letlike-assign bind env newenv m lno parent-scope inarg)
+  (if (assignment? bind)
+    (make-assignment
+      ;; expand binds in newenv with dummy RHS
+      (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
+                                    newenv m lno parent-scope inarg))
+      ;; expand initial values in old env
+      (resolve-expansion-vars- (caddr bind) env m lno parent-scope inarg))
+    ;; Just expand everything else that's not an assignment. N.B.: This includes
+    ;; assignments inside escapes, which probably need special handling (TODO).
+    (resolve-expansion-vars- bind newenv m lno parent-scope inarg)))
+
+(define (for-ranges-list ranges)
+  (if (eq? (car ranges) 'escape)
+    (map (lambda (range) `(escape ,range)) (for-ranges-list (cadr ranges)))
+    (if (eq? (car ranges) 'block)
+      (cdr ranges)
+      (list ranges))))
+
+(define (just-line? ex)
+  (and (pair? ex) (eq? (car ex) 'line) (atom? (cadr ex)) (or (atom? (caddr ex)) (nothing? (caddr ex)))))
+
+(define (resolve-expansion-vars- e env m lno parent-scope inarg)
   (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
         ((symbol? e)
@@ -362,31 +404,35 @@
                      (env (car scope))
                      (m (cadr scope))
                      (parent-scope (cdr parent-scope)))
-                (resolve-expansion-vars-with-new-env (cadr e) env m parent-scope inarg))))
+                (resolve-expansion-vars-with-new-env (cadr e) env m lno parent-scope inarg))))
            ((global)
             `(global
                ,@(map (lambda (arg)
                        (if (assignment? arg)
-                           `(= ,(unescape-global-lhs (cadr arg) env m parent-scope inarg)
-                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))
-                           (unescape-global-lhs arg env m parent-scope inarg)))
+                           `(= ,(unescape-global-lhs (cadr arg) env m lno parent-scope inarg)
+                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m lno parent-scope inarg))
+                           (unescape-global-lhs arg env m lno parent-scope inarg)))
                       (cdr e))))
-           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
+           ((toplevel) ; re-wrap Expr(:toplevel) in the current hygienic-scope(s)
+            `(toplevel
+               ,@(map (lambda (arg)
+                       ;; Minor optimization: A lot of toplevel exprs have just bare line numbers in them.
+                       ;; don't bother with the full rewrapping in that case (even though
+                       ;; this would be semantically legal) - lowering won't touch them anyways.
+                       (if (just-line? arg) arg
+                        (let loop ((parent-scope parent-scope) (m m) (lno lno) (arg arg))
+                          (let ((wrapped `(hygienic-scope ,arg ,m ,@lno)))
+                            (if (null? parent-scope) wrapped
+                              (loop (cdr parent-scope) (cadar parent-scope) (caddar parent-scope) wrapped))))))
+                        (cdr e))))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline purity) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
            ((struct)
-            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m parent-scope inarg)
-                     ;; type has special behavior: identifiers inside are
-                     ;; field names, not expressions.
+            `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m lno parent-scope inarg)
                      ,(map (lambda (x)
-                             (let ((ux (unescape x)))
-                                  (cond ((atom? ux) ux)
-                                        ((and (pair? ux) (eq? (car ux) '|::|))
-                                         `(|::| ,(unescape (cadr ux))
-                                           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg)))
-                                        (else
-                                         (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))))
+                            (resolve-struct-field-expansion x env m lno parent-scope inarg))
                            (cadddr e))))
 
            ((parameters)
@@ -397,17 +443,17 @@
                                 (x (if (and (not inarg) (symbol? ux))
                                        `(kw ,ux ,x)
                                        x)))
-                           (resolve-expansion-vars- x env m parent-scope #f)))
+                           (resolve-expansion-vars- x env m lno parent-scope #f)))
                        (cdr e))))
 
            ((->)
-            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg)
-                 ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m lno parent-scope inarg)
+                 ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))
 
            ((= function)
-             `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg)
+             `(,(car e) ,(resolve-in-lhs (cadr e) env m lno parent-scope inarg)
                         ,@(map (lambda (x)
-                                   (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                                   (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                                  (cddr e))))
 
            ((kw)
@@ -421,55 +467,67 @@
                 `(kw (|::|
                       ,@(if argname
                             (list (if inarg
-                                      (resolve-expansion-vars- argname env m parent-scope inarg)
+                                      (resolve-expansion-vars- argname env m lno parent-scope inarg)
                                       ;; in keyword arg A=B, don't transform "A"
                                       (unescape argname)))
                             '())
-                      ,(resolve-expansion-vars- type env m parent-scope inarg))
-                     ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))))
+                      ,(resolve-expansion-vars- type env m lno parent-scope inarg))
+                     ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg))))
              (else
               `(kw ,(if inarg
-                        (resolve-expansion-vars- (cadr e) env m parent-scope inarg)
+                        (resolve-expansion-vars- (cadr e) env m lno parent-scope inarg)
                         (unescape (cadr e)))
-                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))))
+                   ,(resolve-expansion-vars-with-new-env (caddr e) env m lno parent-scope inarg)))))
 
            ((let)
             (let* ((newenv (new-expansion-env-for e env))
-                   (body   (resolve-expansion-vars- (caddr e) newenv m parent-scope inarg))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
                    (binds  (let-binds e)))
               `(let (block
                      ,@(map
                         (lambda (bind)
-                          (if (assignment? bind)
-                              (make-assignment
-                               ;; expand binds in old env with dummy RHS
-                               (cadr (resolve-expansion-vars- (make-assignment (cadr bind) 0)
-                                                              newenv m parent-scope inarg))
-                               ;; expand initial values in old env
-                               (resolve-expansion-vars- (caddr bind) env m parent-scope inarg))
-                              (resolve-expansion-vars- bind newenv m parent-scope inarg)))
+                          (resolve-letlike-assign bind env newenv m lno parent-scope inarg))
                         binds))
                  ,body)))
+           ((for)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (caddr e) newenv m lno parent-scope inarg))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) (for-ranges-list (cadr e)))))
+              (if (length= expanded-ranges 1)
+                `(for ,@expanded-ranges ,body))
+                `(for (block ,@expanded-ranges) ,body)))
+           ((generator)
+            (let* ((newenv (new-expansion-env-for e env))
+                   (body   (resolve-expansion-vars- (cadr e) newenv m lno parent-scope inarg))
+                   (filt? (eq? (car (caddr e)) 'filter))
+                   (range-exprs (if filt? (cddr (caddr e)) (cddr e)))
+                   (filt (if filt? (resolve-expansion-vars- (cadr (caddr e)) newenv m lno parent-scope inarg)))
+                   (expanded-ranges (map (lambda (range)
+                      (resolve-letlike-assign range env newenv m lno parent-scope inarg)) range-exprs)))
+              (if filt?
+                `(generator ,body (filter ,filt ,@expanded-ranges))
+                `(generator ,body ,@expanded-ranges))))
            ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly
-             (let ((parent-scope (cons (list env m) parent-scope))
+             (let ((parent-scope (cons (list env m lno) parent-scope))
                    (body (cadr e))
                    (m (caddr e))
                    (lno  (cdddr e)))
-              (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t)))
+              (resolve-expansion-vars-with-new-env body env m lno parent-scope inarg #t)))
            ((tuple)
             (cons (car e)
                   (map (lambda (x)
                          (if (assignment? x)
                              `(= ,(unescape (cadr x))
-                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m parent-scope inarg))
-                             (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))
+                                 ,(resolve-expansion-vars-with-new-env (caddr x) env m lno parent-scope inarg))
+                             (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg)))
                        (cdr e))))
 
            ;; todo: trycatch
            (else
             (cons (car e)
                   (map (lambda (x)
-                         (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                         (resolve-expansion-vars-with-new-env x env m lno parent-scope inarg))
                        (cdr e))))))))
 
 ;; decl-var that also identifies f in f()=...
@@ -570,11 +628,11 @@
         (cdr v)
         '())))
 
-(define (resolve-expansion-vars e m)
+(define (resolve-expansion-vars e m lno)
   ;; expand binding form patterns
   ;; keep track of environment, rename locals to gensyms
   ;; and wrap globals in (globalref module var) for macro's home module
-  (resolve-expansion-vars-with-new-env e '() m '() #f #t))
+  (resolve-expansion-vars-with-new-env e '() m lno '() #f #t))
 
 (define (julia-expand-quotes e)
   (cond ((not (pair? e)) e)
@@ -590,11 +648,12 @@
   (cond ((not (pair? e)) e)
         ((eq? (car e) 'inert) e)
         ((eq? (car e) 'module) e)
+        ((eq? (car e) 'toplevel) e)
         ((eq? (car e) 'hygienic-scope)
          (let ((form (cadr e)) ;; form is the expression returned from expand-macros
                (modu (caddr e)) ;; m is the macro's def module
                (lno  (cdddr e))) ;; lno is (optionally) the line number node
-           (resolve-expansion-vars form modu)))
+           (resolve-expansion-vars form modu lno)))
         (else
          (map julia-expand-macroscopes- e))))
 
diff --git a/src/method.c b/src/method.c
index 06a05361a927d..0a58f0d5c482c 100644
--- a/src/method.c
+++ b/src/method.c
@@ -20,9 +20,6 @@ extern jl_value_t *jl_builtin_tuple;
 jl_methtable_t *jl_kwcall_mt;
 jl_method_t *jl_opaque_closure_method;
 
-jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
-
 static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
 {
     if (jl_is_svec(rt))
@@ -44,218 +41,160 @@ static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
 
 // Resolve references to non-locally-defined variables to become references to global
 // variables in `module` (unless the rvalue is one of the type parameters in `sparam_vals`).
-static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals,
+static jl_value_t *resolve_definition_effects(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals,
                                    int binding_effects, int eager_resolve)
 {
     if (jl_is_symbol(expr)) {
-        if (module == NULL)
-            return expr;
-        return jl_module_globalref(module, (jl_sym_t*)expr);
+        jl_error("Found raw symbol in code returned from lowering. Expected all symbols to have been resolved to GlobalRef or slots.");
     }
-    else if (jl_is_returnnode(expr)) {
-        jl_value_t *retval = jl_returnnode_value(expr);
-        if (retval) {
-            jl_value_t *val = resolve_globals(retval, module, sparam_vals, binding_effects, eager_resolve);
-            if (val != retval) {
-                JL_GC_PUSH1(&val);
-                expr = jl_new_struct(jl_returnnode_type, val);
-                JL_GC_POP();
-            }
-        }
+    if (!jl_is_expr(expr)) {
         return expr;
     }
-    else if (jl_is_gotoifnot(expr)) {
-        jl_value_t *cond = resolve_globals(jl_gotoifnot_cond(expr), module, sparam_vals, binding_effects, eager_resolve);
-        if (cond != jl_gotoifnot_cond(expr)) {
-            intptr_t label = jl_gotoifnot_label(expr);
-            JL_GC_PUSH1(&cond);
-            expr = jl_new_struct_uninit(jl_gotoifnot_type);
-            set_nth_field(jl_gotoifnot_type, expr, 0, cond, 0);
-            jl_gotoifnot_label(expr) = label;
-            JL_GC_POP();
-        }
+
+    jl_expr_t *e = (jl_expr_t*)expr;
+    if (e->head == jl_global_sym && binding_effects) {
+        // execute the side-effects of "global x" decl immediately:
+        // creates uninitialized mutable binding in module for each global
+        jl_eval_global_expr(module, e, 1);
+        return jl_nothing;
+    }
+    // These exprs are not fully linearized
+    if (e->head == jl_assign_sym) {
+        jl_exprargset(e, 1, resolve_definition_effects(jl_exprarg(e, 1), module, sparam_vals, binding_effects, eager_resolve));
+        return expr;
+    } else if (e->head == jl_new_opaque_closure_sym) {
+        jl_exprargset(e, 4, resolve_definition_effects(jl_exprarg(e, 4), module, sparam_vals, binding_effects, eager_resolve));
         return expr;
     }
-    else if (jl_is_expr(expr)) {
-        jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == jl_global_sym && binding_effects) {
-            // execute the side-effects of "global x" decl immediately:
-            // creates uninitialized mutable binding in module for each global
-            jl_eval_global_expr(module, e, 1);
-            expr = jl_nothing;
+    size_t nargs = jl_array_nrows(e->args);
+    if (e->head == jl_opaque_closure_method_sym) {
+        if (nargs != 5) {
+            jl_error("opaque_closure_method: invalid syntax");
         }
-        if (jl_is_toplevel_only_expr(expr) || e->head == jl_const_sym ||
-            e->head == jl_coverageeffect_sym || e->head == jl_copyast_sym ||
-            e->head == jl_quote_sym || e->head == jl_inert_sym ||
-            e->head == jl_meta_sym || e->head == jl_inbounds_sym ||
-            e->head == jl_boundscheck_sym || e->head == jl_loopinfo_sym ||
-            e->head == jl_aliasscope_sym || e->head == jl_popaliasscope_sym ||
-            e->head == jl_inline_sym || e->head == jl_noinline_sym) {
-            // ignore these
+        jl_value_t *name = jl_exprarg(e, 0);
+        jl_value_t *oc_nargs = jl_exprarg(e, 1);
+        int isva = jl_exprarg(e, 2) == jl_true;
+        jl_value_t *functionloc = jl_exprarg(e, 3);
+        jl_value_t *ci = jl_exprarg(e, 4);
+        if (!jl_is_code_info(ci)) {
+            jl_error("opaque_closure_method: lambda should be a CodeInfo");
+        } else if (!jl_is_long(oc_nargs)) {
+            jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
         }
-        else {
-            size_t i = 0, nargs = jl_array_len(e->args);
-            if (e->head == jl_opaque_closure_method_sym) {
-                if (nargs != 5) {
-                    jl_error("opaque_closure_method: invalid syntax");
-                }
-                jl_value_t *name = jl_exprarg(e, 0);
-                jl_value_t *oc_nargs = jl_exprarg(e, 1);
-                int isva = jl_exprarg(e, 2) == jl_true;
-                jl_value_t *functionloc = jl_exprarg(e, 3);
-                jl_value_t *ci = jl_exprarg(e, 4);
-                if (!jl_is_code_info(ci)) {
-                    jl_error("opaque_closure_method: lambda should be a CodeInfo");
-                } else if (!jl_is_long(oc_nargs)) {
-                    jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
-                }
-                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva);
-                return (jl_value_t*)m;
+        jl_method_t *m = jl_make_opaque_closure_method(module, name,
+            jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva, /*isinferred*/0);
+        return (jl_value_t*)m;
+    }
+    if (e->head == jl_cfunction_sym) {
+        JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
+        jl_task_t *ct = jl_current_task;
+        jl_value_t *typ = jl_exprarg(e, 0);
+        if (!jl_is_type(typ))
+            jl_error("first parameter to :cfunction must be a type");
+        if (typ == (jl_value_t*)jl_voidpointer_type) {
+            jl_value_t *a = jl_exprarg(e, 1);
+            JL_TYPECHK(cfunction method definition, quotenode, a);
+            *(jl_value_t**)a = jl_toplevel_eval(module, *(jl_value_t**)a);
+            jl_gc_wb(a, *(jl_value_t**)a);
+        }
+        jl_value_t *rt = jl_exprarg(e, 2);
+        jl_value_t *at = jl_exprarg(e, 3);
+        if (!jl_is_type(rt)) {
+            JL_TRY {
+                rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
             }
-            if (e->head == jl_cfunction_sym) {
-                JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
-                jl_value_t *typ = jl_exprarg(e, 0);
-                if (!jl_is_type(typ))
-                    jl_error("first parameter to :cfunction must be a type");
-                if (typ == (jl_value_t*)jl_voidpointer_type) {
-                    jl_value_t *a = jl_exprarg(e, 1);
-                    JL_TYPECHK(cfunction method definition, quotenode, a);
-                    *(jl_value_t**)a = jl_toplevel_eval(module, *(jl_value_t**)a);
-                    jl_gc_wb(a, *(jl_value_t**)a);
-                }
-                jl_value_t *rt = jl_exprarg(e, 2);
-                jl_value_t *at = jl_exprarg(e, 3);
-                if (!jl_is_type(rt)) {
-                    JL_TRY {
-                        rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 2, rt);
-                }
-                if (!jl_is_svec(at)) {
-                    JL_TRY {
-                        at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 3, at);
-                }
-                check_c_types("cfunction method definition", rt, at);
-                JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
-                JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
-                return expr;
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            if (e->head == jl_foreigncall_sym) {
-                JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects))
-                jl_value_t *rt = jl_exprarg(e, 1);
-                jl_value_t *at = jl_exprarg(e, 2);
-                if (!jl_is_type(rt)) {
-                    JL_TRY {
-                        rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate ccall return type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 1, rt);
-                }
-                if (!jl_is_svec(at)) {
-                    JL_TRY {
-                        at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
-                    }
-                    JL_CATCH {
-                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
-                            jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
-                        else
-                            jl_rethrow();
-                    }
-                    jl_exprargset(e, 2, at);
-                }
-                check_c_types("ccall method definition", rt, at);
-                JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
-                JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
-                jl_value_t *cc = jl_quotenode_value(jl_exprarg(e, 4));
-                if (!jl_is_symbol(cc)) {
-                    JL_TYPECHK(ccall method definition, tuple, cc);
-                    if (jl_nfields(cc) != 2) {
-                        jl_error("In ccall calling convention, expected two argument tuple or symbol.");
-                    }
-                    JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
-                    JL_TYPECHK(ccall method definition, uint8, jl_get_nth_field(cc, 1));
-                }
-                jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
-                i++;
+            jl_exprargset(e, 2, rt);
+        }
+        if (!jl_is_svec(at)) {
+            JL_TRY {
+                at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
+            }
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
+            }
+            jl_exprargset(e, 3, at);
+        }
+        check_c_types("cfunction method definition", rt, at);
+        JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
+        JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
+        return expr;
+    }
+    if (e->head == jl_foreigncall_sym) {
+        JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects))
+        jl_task_t *ct = jl_current_task;
+        jl_value_t *rt = jl_exprarg(e, 1);
+        jl_value_t *at = jl_exprarg(e, 2);
+        if (!jl_is_type(rt)) {
+            JL_TRY {
+                rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
             }
-            if (e->head == jl_method_sym || e->head == jl_module_sym) {
-                i++;
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate ccall return type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            for (; i < nargs; i++) {
-                // TODO: this should be making a copy, not mutating the source
-                jl_exprargset(e, i, resolve_globals(jl_exprarg(e, i), module, sparam_vals, binding_effects, eager_resolve));
+            jl_exprargset(e, 1, rt);
+        }
+        if (!jl_is_svec(at)) {
+            JL_TRY {
+                at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
             }
-            if (e->head == jl_call_sym && jl_expr_nargs(e) == 3 &&
-                    jl_is_globalref(jl_exprarg(e, 0)) &&
-                    jl_is_globalref(jl_exprarg(e, 1)) &&
-                    jl_is_quotenode(jl_exprarg(e, 2))) {
-                // replace module_expr.sym with GlobalRef(module, sym)
-                // for expressions pattern-matching to `getproperty(module_expr, :sym)` in a top-module
-                // (this is expected to help inference performance)
-                // TODO: this was broken by linear-IR
-                jl_value_t *s = jl_fieldref(jl_exprarg(e, 2), 0);
-                jl_value_t *me = jl_exprarg(e, 1);
-                jl_value_t *fe = jl_exprarg(e, 0);
-                jl_module_t *fe_mod = jl_globalref_mod(fe);
-                jl_sym_t *fe_sym = jl_globalref_name(fe);
-                jl_module_t *me_mod = jl_globalref_mod(me);
-                jl_sym_t *me_sym = jl_globalref_name(me);
-                if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
-                    if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
-                        jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp) {
-                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-                            if (v && jl_is_module(v))
-                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
-                        }
-                    }
-                }
+            JL_CATCH {
+                if (jl_typetagis(jl_current_exception(ct), jl_errorexception_type))
+                    jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
+                else
+                    jl_rethrow();
             }
-            if (e->head == jl_call_sym && nargs > 0 &&
-                    jl_is_globalref(jl_exprarg(e, 0))) {
-                // TODO: this hack should be deleted once llvmcall is fixed
-                jl_value_t *fe = jl_exprarg(e, 0);
-                jl_module_t *fe_mod = jl_globalref_mod(fe);
-                jl_sym_t *fe_sym = jl_globalref_name(fe);
-                if (jl_binding_resolved_p(fe_mod, fe_sym)) {
-                    // look at some known called functions
-                    jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
-                        size_t j;
-                        for (j = 1; j < nargs; j++) {
-                            if (!jl_is_quotenode(jl_exprarg(e, j)))
-                                break;
-                        }
-                        if (j == nargs) {
-                            jl_value_t *val = NULL;
-                            JL_TRY {
-                                val = jl_interpret_toplevel_expr_in(module, (jl_value_t*)e, NULL, sparam_vals);
-                            }
-                            JL_CATCH {
-                            }
-                            if (val)
-                                return val;
-                        }
+            jl_exprargset(e, 2, at);
+        }
+        check_c_types("ccall method definition", rt, at);
+        JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
+        JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
+        jl_value_t *cc = jl_quotenode_value(jl_exprarg(e, 4));
+        if (!jl_is_symbol(cc)) {
+            JL_TYPECHK(ccall method definition, tuple, cc);
+            if (jl_nfields(cc) != 2) {
+                jl_error("In ccall calling convention, expected two argument tuple or symbol.");
+            }
+            JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
+            JL_TYPECHK(ccall method definition, uint16, jl_get_nth_field(cc, 1));
+        }
+    }
+    if (e->head == jl_call_sym && nargs > 0 &&
+            jl_is_globalref(jl_exprarg(e, 0))) {
+        // TODO: this hack should be deleted once llvmcall is fixed
+        jl_value_t *fe = jl_exprarg(e, 0);
+        jl_module_t *fe_mod = jl_globalref_mod(fe);
+        jl_sym_t *fe_sym = jl_globalref_name(fe);
+        if (jl_binding_resolved_p(fe_mod, fe_sym)) {
+            // look at some known called functions
+            jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
+            if (jl_get_binding_value_if_const(b) == jl_builtin_tuple) {
+                size_t j;
+                for (j = 1; j < nargs; j++) {
+                    if (!jl_is_quotenode(jl_exprarg(e, j)))
+                        break;
+                }
+                if (j == nargs) {
+                    jl_value_t *val = NULL;
+                    JL_TRY {
+                        val = jl_interpret_toplevel_expr_in(module, (jl_value_t*)e, NULL, sparam_vals);
                     }
+                    JL_CATCH {
+                        val = NULL; // To make the analyzer happy see #define JL_TRY
+                    }
+                    if (val)
+                        return val;
                 }
             }
         }
@@ -263,13 +202,13 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
     return expr;
 }
 
-JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_definition_effects_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects)
 {
-    size_t i, l = jl_array_len(stmts);
+    size_t i, l = jl_array_nrows(stmts);
     for (i = 0; i < l; i++) {
         jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
-        jl_array_ptr_set(stmts, i, resolve_globals(stmt, m, sparam_vals, binding_effects, 0));
+        jl_array_ptr_set(stmts, i, resolve_definition_effects(stmt, m, sparam_vals, binding_effects, 0));
     }
 }
 
@@ -278,34 +217,159 @@ jl_value_t *expr_arg1(jl_value_t *expr) {
     return jl_array_ptr_ref(args, 0);
 }
 
+static jl_value_t *alloc_edges(arraylist_t *edges_list)
+{
+    jl_value_t *jledges = (jl_value_t*)jl_alloc_svec(edges_list->len);
+    jl_value_t *jledges2 = NULL;
+    jl_value_t *codelocs = NULL;
+    JL_GC_PUSH3(&jledges, &jledges2, &codelocs);
+    size_t i;
+    for (i = 0; i < edges_list->len; i++) {
+        arraylist_t *edge = (arraylist_t*)edges_list->items[i];
+        jl_value_t *file = (jl_value_t*)edge->items[0];
+        int32_t line = 0; // not preserved by lowering (and probably lost even before that)
+        arraylist_t *edges_list2 = (arraylist_t*)edge->items[1];
+        size_t j, nlocs = (edge->len - 2) / 3;
+        codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+        for (j = 0; j < nlocs; j++) {
+            jl_array_data(codelocs,int32_t)[3 * j + 0] = (intptr_t)edge->items[3 * j + 0 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 1] = (intptr_t)edge->items[3 * j + 1 + 2];
+            jl_array_data(codelocs,int32_t)[3 * j + 2] = (intptr_t)edge->items[3 * j + 2 + 2];
+        }
+        codelocs = (jl_value_t*)jl_compress_codelocs(line, codelocs, nlocs);
+        jledges2 = alloc_edges(edges_list2);
+        jl_value_t *debuginfo = jl_new_struct(jl_debuginfo_type, file, jl_nothing, jledges2, codelocs);
+        jledges2 = NULL;
+        jl_svecset(jledges, i, debuginfo);
+        free(edges_list2);
+        free(edge);
+    }
+    JL_GC_POP();
+    return jledges;
+}
+
+static void add_edge(arraylist_t *edges_list, arraylist_t *inlinestack, int32_t *p_to, int32_t *p_pc)
+{
+    jl_value_t *locinfo = (jl_value_t*)arraylist_pop(inlinestack);
+    jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+    int32_t line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+    size_t i;
+    arraylist_t *edge = NULL;
+    for (i = 0; i < edges_list->len; i++) {
+        edge = (arraylist_t*)edges_list->items[i];
+        if (edge->items[0] == filesym)
+            break;
+    }
+    if (i == edges_list->len) {
+        edge = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_t *edge_list2 = (arraylist_t*)malloc(sizeof(arraylist_t));
+        arraylist_new(edge, 0);
+        arraylist_new(edge_list2, 0);
+        arraylist_push(edge, (void*)filesym);
+        arraylist_push(edge, (void*)edge_list2);
+        arraylist_push(edges_list, (void*)edge);
+    }
+    *p_to = i + 1;
+    int32_t to = 0, pc = 0;
+    if (inlinestack->len) {
+        arraylist_t *edge_list2 = (arraylist_t*)edge->items[1];
+        add_edge(edge_list2, inlinestack, &to, &pc);
+    }
+    for (i = 2; i < edge->len; i += 3) {
+        if ((intptr_t)edge->items[i + 0] == line &&
+            (intptr_t)edge->items[i + 1] == to &&
+            (intptr_t)edge->items[i + 2] == pc) {
+            break;
+        }
+    }
+    if (i == edge->len) {
+        arraylist_push(edge, (void*)(intptr_t)line);
+        arraylist_push(edge, (void*)(intptr_t)to);
+        arraylist_push(edge, (void*)(intptr_t)pc);
+    }
+    *p_pc = (i - 2) / 3 + 1;
+}
+
+jl_debuginfo_t *jl_linetable_to_debuginfo(jl_array_t *codelocs_any, jl_array_t *linetable)
+{
+    size_t nlocs = jl_array_nrows(codelocs_any);
+    jl_value_t *toplocinfo = jl_array_ptr_ref(linetable, 0);
+    jl_sym_t *topfile = (jl_sym_t*)jl_fieldref_noalloc(toplocinfo, 0);
+    int32_t topline = jl_unbox_int32(jl_fieldref(toplocinfo, 1));
+    arraylist_t inlinestack;
+    arraylist_new(&inlinestack, 0);
+    arraylist_t edges_list;
+    arraylist_new(&edges_list, 0);
+    jl_value_t *jledges = NULL;
+    jl_value_t *codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs * 3);
+    jl_debuginfo_t *debuginfo = NULL;
+    JL_GC_PUSH3(&jledges, &codelocs, &debuginfo);
+    int32_t *codelocs32 = jl_array_data(codelocs,int32_t);
+    size_t j;
+    for (j = 0; j < nlocs; j++) {
+        size_t lineidx = jl_unbox_long(jl_array_ptr_ref((jl_array_t*)codelocs_any, j)); // 1 indexed!
+        while (lineidx != 0) {
+            jl_value_t *locinfo = jl_array_ptr_ref(linetable, lineidx - 1);
+            lineidx = jl_unbox_int32(jl_fieldref(locinfo, 2));
+            arraylist_push(&inlinestack, locinfo);
+        }
+        int32_t line = 0, to = 0, pc = 0;
+        if (inlinestack.len) {
+            jl_value_t *locinfo = (jl_value_t*)arraylist_pop(&inlinestack);
+            jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 0);
+            if (filesym == topfile)
+                line = jl_unbox_int32(jl_fieldref(locinfo, 1));
+            else
+                arraylist_push(&inlinestack, locinfo);
+            if (inlinestack.len) {
+                add_edge(&edges_list, &inlinestack, &to, &pc);
+            }
+        }
+        codelocs32[j * 3 + 0] = line;
+        codelocs32[j * 3 + 1] = to;
+        codelocs32[j * 3 + 2] = pc;
+    }
+    codelocs = (jl_value_t*)jl_compress_codelocs(topline, codelocs, nlocs);
+    jledges = alloc_edges(&edges_list);
+    debuginfo = (jl_debuginfo_t*)jl_new_struct(jl_debuginfo_type, topfile, jl_nothing, jledges, codelocs);
+    JL_GC_POP();
+    return debuginfo;
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
-static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
+jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
 {
+    jl_code_info_t *li = NULL;
+    JL_GC_PUSH1(&li);
+    li = jl_new_code_info_uninit();
+
+    jl_expr_t *arglist = (jl_expr_t*)jl_exprarg(ir, 0);
+    li->nargs = jl_array_len(arglist);
+
     assert(jl_is_expr(ir));
     jl_expr_t *bodyex = (jl_expr_t*)jl_exprarg(ir, 2);
-    jl_value_t *codelocs = jl_exprarg(ir, 3);
-    li->linetable = jl_exprarg(ir, 4);
-    size_t nlocs = jl_array_len(codelocs);
-    li->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs);
-    size_t j;
-    for (j = 0; j < nlocs; j++) {
-        jl_arrayset((jl_array_t*)li->codelocs, jl_box_int32(jl_unbox_long(jl_arrayref((jl_array_t*)codelocs, j))),
-                    j);
-    }
+
+    jl_array_t *codelocs_any = (jl_array_t*)jl_exprarg(ir, 3);
+    jl_array_t *linetable = (jl_array_t*)jl_exprarg(ir, 4);
+    li->debuginfo = jl_linetable_to_debuginfo(codelocs_any, linetable);
+    jl_gc_wb(li, li->debuginfo);
+
     assert(jl_is_expr(bodyex));
     jl_array_t *body = bodyex->args;
     li->code = body;
     jl_gc_wb(li, li->code);
-    size_t n = jl_array_len(body);
+    size_t n = jl_array_nrows(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, n);
     jl_gc_wb(li, li->ssaflags);
     int inbounds_depth = 0; // number of stacked inbounds
-    // isempty(inline_flags): no user annotation
-    // last(inline_flags) == 1: inline region
-    // last(inline_flags) == 0: noinline region
+    // isempty(inline_flags): no user callsite inline annotation
+    // last(inline_flags) == 1: callsite inline region
+    // last(inline_flags) == 0: callsite noinline region
     arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    arraylist_t *purity_exprs = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    size_t j;
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
         int is_flag_stmt = 0;
@@ -328,14 +392,32 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
                 else if (ma == (jl_value_t*)jl_no_constprop_sym)
                     li->constprop = 2;
                 else if (jl_is_expr(ma) && ((jl_expr_t*)ma)->head == jl_purity_sym) {
-                    if (jl_expr_nargs(ma) == 7) {
-                        li->purity.overrides.ipo_consistent = jl_unbox_bool(jl_exprarg(ma, 0));
-                        li->purity.overrides.ipo_effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
-                        li->purity.overrides.ipo_nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
-                        li->purity.overrides.ipo_terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
-                        li->purity.overrides.ipo_terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
-                        li->purity.overrides.ipo_notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
-                        li->purity.overrides.ipo_inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                    if (jl_expr_nargs(ma) == NUM_EFFECTS_OVERRIDES) {
+                        // N.B. this code allows multiple :purity expressions to be present in a single `:meta` node
+                        int8_t consistent = jl_unbox_bool(jl_exprarg(ma, 0));
+                        if (consistent) li->purity.overrides.ipo_consistent = consistent;
+                        int8_t effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
+                        if (effect_free) li->purity.overrides.ipo_effect_free = effect_free;
+                        int8_t nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
+                        if (nothrow) li->purity.overrides.ipo_nothrow = nothrow;
+                        int8_t terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
+                        if (terminates_globally) li->purity.overrides.ipo_terminates_globally = terminates_globally;
+                        int8_t terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
+                        if (terminates_locally) li->purity.overrides.ipo_terminates_locally = terminates_locally;
+                        int8_t notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
+                        if (notaskstate) li->purity.overrides.ipo_notaskstate = notaskstate;
+                        int8_t inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                        if (inaccessiblememonly) li->purity.overrides.ipo_inaccessiblememonly = inaccessiblememonly;
+                        int8_t noub = jl_unbox_bool(jl_exprarg(ma, 7));
+                        if (noub) li->purity.overrides.ipo_noub = noub;
+                        int8_t noub_if_noinbounds = jl_unbox_bool(jl_exprarg(ma, 8));
+                        if (noub_if_noinbounds) li->purity.overrides.ipo_noub_if_noinbounds = noub_if_noinbounds;
+                        int8_t consistent_overlay = jl_unbox_bool(jl_exprarg(ma, 9));
+                        if (consistent_overlay) li->purity.overrides.ipo_consistent_overlay = consistent_overlay;
+                        int8_t nortcall = jl_unbox_bool(jl_exprarg(ma, 10));
+                        if (nortcall) li->purity.overrides.ipo_nortcall = nortcall;
+                    } else {
+                        assert(jl_expr_nargs(ma) == 0);
                     }
                 }
                 else
@@ -380,31 +462,53 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             }
             bd[j] = jl_nothing;
         }
-        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym) {
-            jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_purity_sym) {
+            is_flag_stmt = 1;
+            size_t na = jl_expr_nargs(st);
+            if (na == NUM_EFFECTS_OVERRIDES)
+                arraylist_push(purity_exprs, (void*)st);
+            else {
+                assert(na == 0);
+                arraylist_pop(purity_exprs);
+            }
+            bd[j] = jl_nothing;
         }
-        else if (jl_is_expr(st) && (((jl_expr_t*)st)->head == jl_foreigncall_sym || ((jl_expr_t*)st)->head == jl_cfunction_sym)) {
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_boundscheck_sym)
+            // Don't set IR_FLAG_INBOUNDS on boundscheck at the same level
+            is_flag_stmt = 1;
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym)
+            jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
+        else if (jl_is_expr(st) && (((jl_expr_t*)st)->head == jl_foreigncall_sym || ((jl_expr_t*)st)->head == jl_cfunction_sym))
             li->has_fcall = 1;
-        }
         if (is_flag_stmt)
-            jl_array_uint8_set(li->ssaflags, j, 0);
+            jl_array_uint32_set(li->ssaflags, j, 0);
         else {
-            uint8_t flag = 0;
+            uint32_t flag = 0;
             if (inbounds_depth > 0)
-                flag |= 1 << 0;
+                flag |= IR_FLAG_INBOUNDS;
             if (inline_flags->len > 0) {
-                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                void* inline_flag = inline_flags->items[inline_flags->len-1];
                 flag |= 1 << (inline_flag ? 1 : 2);
             }
-            jl_array_uint8_set(li->ssaflags, j, flag);
+            int n_purity_exprs = purity_exprs->len;
+            if (n_purity_exprs > 0) {
+                // apply all purity overrides
+                for (int i = 0; i < n_purity_exprs; i++) {
+                    void* purity_expr = purity_exprs->items[i];
+                    for (int j = 0; j < NUM_EFFECTS_OVERRIDES; j++) {
+                        flag |= jl_unbox_bool(jl_exprarg((jl_value_t*)purity_expr, j)) ? (1 << (NUM_IR_FLAGS+j)) : 0;
+                    }
+                }
+            }
+            jl_array_uint32_set(li->ssaflags, j, flag);
         }
     }
-    assert(inline_flags->len == 0); // malformed otherwise
-    arraylist_free(inline_flags);
-    free(inline_flags);
+    assert(inline_flags->len == 0 && purity_exprs->len == 0); // malformed otherwise
+    arraylist_free(inline_flags); arraylist_free(purity_exprs);
+    free(inline_flags); free(purity_exprs);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
-    size_t nslots = jl_array_len(vis);
+    size_t nslots = jl_array_nrows(vis);
     jl_value_t *ssavalue_types = jl_array_ptr_ref(vinfo, 2);
     assert(jl_is_long(ssavalue_types));
     size_t nssavalue = jl_unbox_long(ssavalue_types);
@@ -436,6 +540,8 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
         jl_array_ptr_set(li->slotnames, i, name);
         jl_array_uint8_set(li->slotflags, i, vinfo_mask & jl_unbox_long(jl_array_ptr_ref(vi, 2)));
     }
+    JL_GC_POP();
+    return li;
 }
 
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
@@ -447,13 +553,10 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->def.value = NULL;
     mi->specTypes = NULL;
     mi->sparam_vals = jl_emptysvec;
-    jl_atomic_store_relaxed(&mi->uninferred, NULL);
     mi->backedges = NULL;
-    mi->callbacks = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
-    mi->inInference = 0;
     mi->cache_with_orig = 0;
-    jl_atomic_store_relaxed(&mi->precompiled, 0);
+    jl_atomic_store_relaxed(&mi->flags, 0);
     return mi;
 }
 
@@ -462,66 +565,32 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     jl_task_t *ct = jl_current_task;
     jl_code_info_t *src =
         (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
-                                       jl_code_info_type);
+                                     jl_code_info_type);
     src->code = NULL;
-    src->codelocs = NULL;
+    src->debuginfo = NULL;
     src->ssavaluetypes = NULL;
     src->ssaflags = NULL;
     src->method_for_inference_limit_heuristics = jl_nothing;
-    src->linetable = jl_nothing;
     src->slotflags = NULL;
     src->slotnames = NULL;
     src->slottypes = jl_nothing;
-    src->parent = (jl_method_instance_t*)jl_nothing;
     src->rettype = (jl_value_t*)jl_any_type;
+    src->edges = (jl_value_t*)jl_emptysvec;
+    src->parent = (jl_method_instance_t*)jl_nothing;
     src->min_world = 1;
     src->max_world = ~(size_t)0;
-    src->inferred = 0;
     src->propagate_inbounds = 0;
     src->has_fcall = 0;
     src->nospecializeinfer = 0;
-    src->edges = jl_nothing;
     src->constprop = 0;
     src->inlining = 0;
     src->purity.bits = 0;
+    src->nargs = 0;
+    src->isva = 0;
     src->inlining_cost = UINT16_MAX;
     return src;
 }
 
-jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
-{
-    jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    src = jl_new_code_info_uninit();
-    jl_code_info_set_ir(src, ir);
-    JL_GC_POP();
-    return src;
-}
-
-void jl_add_function_to_lineinfo(jl_code_info_t *ci, jl_value_t *func)
-{
-    // func may contain jl_symbol (function name), jl_method_t, or jl_method_instance_t
-    jl_array_t *li = (jl_array_t*)ci->linetable;
-    size_t i, n = jl_array_len(li);
-    jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
-    JL_GC_PUSH3(&rt, &lno, &inl);
-    for (i = 0; i < n; i++) {
-        jl_value_t *ln = jl_array_ptr_ref(li, i);
-        assert(jl_typetagis(ln, jl_lineinfonode_type));
-        jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
-        jl_value_t *file = jl_fieldref_noalloc(ln, 2);
-        lno = jl_fieldref(ln, 3);
-        inl = jl_fieldref(ln, 4);
-        // respect a given linetable if available
-        jl_value_t *ln_func = jl_fieldref_noalloc(ln, 1);
-        if (jl_is_symbol(ln_func) && (jl_sym_t*)ln_func == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
-            ln_func = func;
-        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_func, file, lno, inl);
-        jl_array_ptr_set(li, i, rt);
-    }
-    JL_GC_POP();
-}
-
 // invoke (compiling if necessary) the jlcall function pointer for a method template
 static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
         size_t world, jl_svec_t *sparam_vals, jl_value_t **args, uint32_t nargs)
@@ -553,32 +622,67 @@ JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *
     JL_GC_PUSH1(&func);
     if (jl_is_code_info(func)) {
         jl_array_t *stmts = (jl_array_t*)func->code;
-        jl_resolve_globals_in_ir(stmts, module, sparam_vals, 1);
+        jl_resolve_definition_effects_in_ir(stmts, module, sparam_vals, 1);
     }
     JL_GC_POP();
     return func;
 }
 
+JL_DLLEXPORT jl_code_instance_t *jl_cached_uninferred(jl_code_instance_t *codeinst, size_t world)
+{
+    for (; codeinst; codeinst = jl_atomic_load_relaxed(&codeinst->next)) {
+        if (codeinst->owner != (void*)jl_uninferred_sym)
+            continue;
+        if (jl_atomic_load_relaxed(&codeinst->min_world) <= world && world <= jl_atomic_load_relaxed(&codeinst->max_world)) {
+            return codeinst;
+        }
+    }
+    return NULL;
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, jl_code_instance_t *checked, size_t world, jl_code_instance_t *newci)
+{
+    while (!jl_mi_try_insert(mi, checked, newci)) {
+        jl_code_instance_t *new_checked = jl_atomic_load_relaxed(&mi->cache);
+        // Check if another thread inserted a CodeInstance that covers this world
+        jl_code_instance_t *other = jl_cached_uninferred(new_checked, world);
+        if (other)
+            return other;
+        checked = new_checked;
+    }
+    // Successfully inserted
+    return newci;
+}
+
 // Return a newly allocated CodeInfo for the function signature
 // effectively described by the tuple (specTypes, env, Method) inside linfo
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world)
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *mi, size_t world, jl_code_instance_t **cache)
 {
-    jl_value_t *uninferred = jl_atomic_load_relaxed(&linfo->uninferred);
-    if (uninferred) {
-        assert(jl_is_code_info(uninferred)); // make sure this did not get `nothing` put here
-        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)uninferred);
+    jl_code_instance_t *cache_ci = jl_atomic_load_relaxed(&mi->cache);
+    jl_code_instance_t *uninferred_ci = jl_cached_uninferred(cache_ci, world);
+    if (uninferred_ci) {
+        // The uninferred code is in `inferred`, but that is a bit of a misnomer here.
+        // This is the cached output the generated function (or top-level thunk).
+        // This cache has a non-standard owner (indicated by `->owner === :uninferred`),
+        // so it doesn't get confused for inference results.
+        jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&uninferred_ci->inferred);
+        assert(jl_is_code_info(src)); // make sure this did not get `nothing` put here
+        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)src);
     }
 
     JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION);
-    jl_value_t *tt = linfo->specTypes;
-    jl_method_t *def = linfo->def.method;
-    jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK);
+    jl_value_t *tt = mi->specTypes;
+    jl_method_t *def = mi->def.method;
+    jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
     jl_value_t *generator = def->generator;
     assert(generator != NULL);
     assert(jl_is_method(def));
     jl_code_info_t *func = NULL;
     jl_value_t *ex = NULL;
-    JL_GC_PUSH2(&ex, &func);
+    jl_value_t *kind = NULL;
+    jl_code_info_t *uninferred = NULL;
+    jl_code_instance_t *ci = NULL;
+    JL_GC_PUSH5(&ex, &func, &uninferred, &ci, &kind);
     jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
     int last_in = ct->ptls->in_pure_callback;
@@ -586,21 +690,23 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz
 
     JL_TRY {
         ct->ptls->in_pure_callback = 1;
-        ct->world_age = def->primary_world;
+        ct->world_age = jl_atomic_load_relaxed(&def->primary_world);
+        if (ct->world_age > jl_atomic_load_acquire(&jl_world_counter) || jl_atomic_load_relaxed(&def->deleted_world) < ct->world_age)
+            jl_error("The generator method cannot run until it is added to a method table.");
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
-        ex = jl_call_staged(def, generator, world, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
+        ex = jl_call_staged(def, generator, world, mi->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
 
         // do some post-processing
         if (jl_is_code_info(ex)) {
             func = (jl_code_info_t*)ex;
             jl_array_t *stmts = (jl_array_t*)func->code;
-            jl_resolve_globals_in_ir(stmts, def->module, linfo->sparam_vals, 1);
+            jl_resolve_definition_effects_in_ir(stmts, def->module, mi->sparam_vals, 1);
         }
         else {
             // Lower the user's expression and resolve references to the type parameters
-            func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
+            func = jl_expand_and_resolve(ex, def->module, mi->sparam_vals);
             if (!jl_is_code_info(func)) {
                 if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
                     ct->ptls->in_pure_callback = 0;
@@ -608,27 +714,80 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz
                 }
                 jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
             }
+            // TODO: This should ideally be in the lambda expression,
+            // but currently our isva determination is non-syntactic
+            func->isva = def->isva;
         }
-        jl_add_function_to_lineinfo(func, (jl_value_t*)def->name);
+        ex = NULL;
 
         // If this generated function has an opaque closure, cache it for
-        // correctness of method identity
-        for (int i = 0; i < jl_array_len(func->code); ++i) {
+        // correctness of method identity. In particular, other methods that call
+        // this method may end up referencing it in a PartialOpaque lattice element
+        // type. If the method identity were to change (for the same world age)
+        // in between invocations of this method, that return type inference would
+        // no longer be correct.
+        int needs_cache_for_correctness = 0;
+        for (int i = 0; i < jl_array_nrows(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
             if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
+                if (jl_expr_nargs(stmt) >= 4 && jl_is_bool(jl_exprarg(stmt, 3)) && !jl_unbox_bool(jl_exprarg(stmt, 3))) {
+                    // If this new_opaque_closure is prohibited from sourcing PartialOpaque,
+                    // there is no problem
+                    continue;
+                }
                 if (jl_options.incremental && jl_generating_output())
                     jl_error("Impossible to correctly handle OpaqueClosure inside @generated returned during precompile process.");
-                jl_value_t *uninferred = jl_copy_ast((jl_value_t*)func);
-                jl_value_t *old = NULL;
-                if (jl_atomic_cmpswap(&linfo->uninferred, &old, uninferred)) {
-                    jl_gc_wb(linfo, uninferred);
+                needs_cache_for_correctness = 1;
+                break;
+            }
+        }
+
+        if ((func->edges == jl_nothing || func->edges == (jl_value_t*)jl_emptysvec) && func->max_world == ~(size_t)0) {
+            if (func->min_world != 1) {
+                jl_error("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`");
+            }
+        }
+
+        if (cache || needs_cache_for_correctness) {
+            uninferred = (jl_code_info_t*)jl_copy_ast((jl_value_t*)func);
+            ci = jl_new_codeinst_for_uninferred(mi, uninferred);
+            jl_code_instance_t *cached_ci = jl_cache_uninferred(mi, cache_ci, world, ci);
+            if (cached_ci != ci) {
+                func = (jl_code_info_t*)jl_copy_ast(jl_atomic_load_relaxed(&cached_ci->inferred));
+                assert(jl_is_code_info(func));
+            }
+            else if (uninferred->edges != jl_nothing) {
+                // N.B.: This needs to match `store_backedges` on the julia side
+                jl_value_t *edges = uninferred->edges;
+                size_t l;
+                jl_value_t **data;
+                if (jl_is_svec(edges)) {
+                    l = jl_svec_len(edges);
+                    data = jl_svec_data(edges);
                 }
                 else {
-                    assert(jl_is_code_info(old));
-                    func = (jl_code_info_t*)old;
+                    l = jl_array_dim0(edges);
+                    data = jl_array_data(edges, jl_value_t*);
+                }
+                for (size_t i = 0; i < l; ) {
+                    kind = data[i++];
+                    if (jl_is_method_instance(kind)) {
+                        jl_method_instance_add_backedge((jl_method_instance_t*)kind, jl_nothing, ci);
+                    }
+                    else if (jl_is_mtable(kind)) {
+                        assert(i < l);
+                        ex = data[i++];
+                        jl_method_table_add_backedge((jl_methtable_t*)kind, ex, ci);
+                    }
+                    else {
+                        assert(i < l);
+                        ex = data[i++];
+                        jl_method_instance_add_backedge((jl_method_instance_t*)ex, kind, ci);
+                    }
                 }
-                break;
             }
+            if (cache)
+                *cache = cached_ci;
         }
 
         ct->ptls->in_pure_callback = last_in;
@@ -688,14 +847,13 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     m->nospecializeinfer = src->nospecializeinfer;
     m->constprop = src->constprop;
     m->purity.bits = src->purity.bits;
-    jl_add_function_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
     JL_GC_PUSH3(&copy, &sparam_vars, &src);
     assert(jl_typetagis(src->code, jl_array_any_type));
     jl_array_t *stmts = (jl_array_t*)src->code;
-    size_t i, n = jl_array_len(stmts);
+    size_t i, n = jl_array_nrows(stmts);
     copy = jl_alloc_vec_any(n);
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
@@ -767,20 +925,28 @@ JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
             }
         }
         else {
-            st = resolve_globals(st, m->module, sparam_vars, 1, 0);
+            st = resolve_definition_effects(st, m->module, sparam_vars, 1, 0);
         }
         jl_array_ptr_set(copy, i, st);
     }
     src = jl_copy_code_info(src);
+    src->isva = m->isva; // TODO: It would be nice to reverse this
+    assert(m->nargs == src->nargs);
     src->code = copy;
     jl_gc_wb(src, copy);
     m->slot_syms = jl_compress_argnames(src->slotnames);
     jl_gc_wb(m, m->slot_syms);
-    if (gen_only)
+    if (gen_only) {
         m->source = NULL;
-    else
-        m->source = (jl_value_t*)jl_compress_ir(m, src);
-    jl_gc_wb(m, m->source);
+    }
+    else {
+        m->debuginfo = src->debuginfo;
+        jl_gc_wb(m, m->debuginfo);
+        m->source = (jl_value_t*)src;
+        jl_gc_wb(m, m->source);
+        m->source = (jl_value_t*)jl_compress_ir(m, NULL);
+        jl_gc_wb(m, m->source);
+    }
     JL_GC_POP();
 }
 
@@ -790,7 +956,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     jl_method_t *m =
         (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
     jl_atomic_store_relaxed(&m->specializations, (jl_value_t*)jl_emptysvec);
-    jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&m->speckeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     m->sig = NULL;
     m->slot_syms = NULL;
     m->roots = NULL;
@@ -800,6 +966,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->module = module;
     m->external_mt = NULL;
     m->source = NULL;
+    m->debuginfo = NULL;
     jl_atomic_store_relaxed(&m->unspecialized, NULL);
     m->generator = NULL;
     m->name = NULL;
@@ -812,8 +979,8 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->recursion_relation = NULL;
     m->isva = 0;
     m->nargs = 0;
-    m->primary_world = 1;
-    m->deleted_world = ~(size_t)0;
+    jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+    jl_atomic_store_relaxed(&m->deleted_world, 1);
     m->is_for_opaque_closure = 0;
     m->nospecializeinfer = 0;
     m->constprop = 0;
@@ -830,27 +997,27 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
 // it will be the signature supplied in an `invoke` call.
 // If you don't need `invokesig`, you can set it to NULL on input.
 // Initialize iteration with `i = 0`. Returns `i` for the next backedge to be extracted.
-int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_code_instance_t **caller) JL_NOTSAFEPOINT
 {
     jl_value_t *item = jl_array_ptr_ref(list, i);
-    if (jl_is_method_instance(item)) {
-        // Not an `invoke` call, it's just the MethodInstance
+    if (jl_is_code_instance(item)) {
+        // Not an `invoke` call, it's just the CodeInstance
         if (invokesig != NULL)
             *invokesig = NULL;
-        *caller = (jl_method_instance_t*)item;
+        *caller = (jl_code_instance_t*)item;
         return i + 1;
     }
     assert(jl_is_type(item));
     // An `invoke` call, it's a (sig, MethodInstance) pair
     if (invokesig != NULL)
         *invokesig = item;
-    *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i + 1);
+    *caller = (jl_code_instance_t*)jl_array_ptr_ref(list, i + 1);
     if (*caller)
-        assert(jl_is_method_instance(*caller));
+        assert(jl_is_code_instance(*caller));
     return i + 2;
 }
 
-int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller)
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
     if (invokesig)
         jl_array_ptr_set(list, i++, invokesig);
@@ -858,7 +1025,7 @@ int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_inst
     return i;
 }
 
-void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller)
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_code_instance_t *caller)
 {
     if (invokesig)
         jl_array_ptr_1d_push(list, invokesig);
@@ -869,7 +1036,7 @@ void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *ca
 // method definition ----------------------------------------------------------
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva)
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred)
 {
     jl_method_t *m = jl_new_method_uninit(module);
     JL_GC_PUSH1(&m);
@@ -888,34 +1055,34 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     jl_value_t *file = jl_linenode_file(functionloc);
     m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
-    jl_method_set_source(m, ci);
+    if (isinferred) {
+        m->slot_syms = jl_compress_argnames(ci->slotnames);
+        jl_gc_wb(m, m->slot_syms);
+    } else {
+        jl_method_set_source(m, ci);
+    }
     JL_GC_POP();
     return m;
 }
 
-// empty generic function def
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd)
+JL_DLLEXPORT void jl_check_gf(jl_value_t *gf, jl_sym_t *name)
 {
-    jl_value_t *gf = NULL;
-
-    assert(name && bp);
-    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
+    if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    gf = jl_atomic_load_relaxed(bp);
-    if (gf != NULL) {
-        if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
-            jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    }
-    if (bnd)
-        bnd->constp = 1; // XXX: use jl_declare_constant and jl_checked_assignment
-    if (gf == NULL) {
-        gf = (jl_value_t*)jl_new_generic_function(name, module);
-        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
-        if (bnd) jl_gc_wb(bnd, gf);
+}
+
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name)
+{
+    jl_value_t *gf = jl_get_binding_value_if_const(b);
+    if (gf) {
+        jl_check_gf(gf, b->globalref->name);
+        return gf;
     }
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
+    gf = (jl_value_t*)jl_new_generic_function(name, mod);
+    jl_declare_constant_val(b, mod, name, gf);
     return gf;
 }
 
@@ -969,8 +1136,6 @@ JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGAT
     return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
 }
 
-jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
-
 JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                                         jl_methtable_t *mt,
                                         jl_code_info_t *f,
@@ -993,7 +1158,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     JL_GC_PUSH3(&f, &m, &argtype);
     size_t i, na = jl_svec_len(atypes);
 
-    argtype = jl_apply_tuple_type(atypes);
+    argtype = jl_apply_tuple_type(atypes, 1);
     if (!jl_is_datatype(argtype))
         jl_error("invalid type in method definition (Union{})");
 
@@ -1039,9 +1204,16 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
     for (i = 0; i < na; i++) {
         jl_value_t *elt = jl_svecref(atypes, i);
-        int isvalid = jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt);
-        if (elt == jl_bottom_type || (jl_is_vararg(elt) && jl_unwrap_vararg(elt) == jl_bottom_type))
-            isvalid = 0;
+        if (jl_is_vararg(elt)) {
+            if (i < na-1)
+                jl_exceptionf(jl_argumenterror_type,
+                              "Vararg on non-final argument in method definition for %s at %s:%d",
+                              jl_symbol_name(name),
+                              jl_symbol_name(file),
+                              line);
+            elt = jl_unwrap_vararg(elt);
+        }
+        int isvalid = (jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt)) && elt != jl_bottom_type;
         if (!isvalid) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
             if (argname == jl_unused_sym)
@@ -1059,12 +1231,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                               jl_symbol_name(file),
                               line);
         }
-        if (jl_is_vararg(elt) && i < na-1)
-            jl_exceptionf(jl_argumenterror_type,
-                          "Vararg on non-final argument in method definition for %s at %s:%d",
-                          jl_symbol_name(name),
-                          jl_symbol_name(file),
-                          line);
     }
     for (i = jl_svec_len(tvars); i > 0; i--) {
         jl_value_t *tv = jl_svecref(tvars, i - 1);
@@ -1099,16 +1265,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     m->line = line;
     jl_method_set_source(m, f);
 
-#ifdef RECORD_METHOD_ORDER
-    if (jl_all_methods == NULL)
-        jl_all_methods = jl_alloc_vec_any(0);
-#endif
-    if (jl_all_methods != NULL) {
-        while (jl_array_len(jl_all_methods) < m->primary_world)
-            jl_array_ptr_1d_push(jl_all_methods, NULL);
-        jl_array_ptr_1d_push(jl_all_methods, (jl_value_t*)m);
-    }
-
     jl_method_table_insert(mt, m, NULL);
     if (jl_newmeth_tracer)
         jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)m);
@@ -1153,10 +1309,6 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 //   at the time of writing the system image (such occur first in the list of
 //   roots). These are the cases with `key = 0` that do not prevent
 //   serialization.
-// - CodeInstances have a `relocatability` field which when 1 indicates that
-//   every root is "safe," meaning it was either added at sysimg creation or is
-//   tagged with a non-zero `key`. Even a single unsafe root will cause this to
-//   have value 0.
 
 // Get the key of the current (final) block of roots
 static uint64_t current_root_id(jl_array_t *root_blocks)
@@ -1164,10 +1316,10 @@ static uint64_t current_root_id(jl_array_t *root_blocks)
     if (!root_blocks)
         return 0;
     assert(jl_is_array(root_blocks));
-    size_t nx2 = jl_array_len(root_blocks);
+    size_t nx2 = jl_array_nrows(root_blocks);
     if (nx2 == 0)
         return 0;
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
     return blocks[nx2-2];
 }
 
@@ -1176,8 +1328,8 @@ static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
 {
     assert(jl_is_array(root_blocks));
     jl_array_grow_end(root_blocks, 2);
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
-    int nx2 = jl_array_len(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
+    int nx2 = jl_array_nrows(root_blocks);
     blocks[nx2-2] = modid;
     blocks[nx2-1] = len;
 }
@@ -1207,7 +1359,7 @@ JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_
     assert(jl_is_method(m));
     prepare_method_for_roots(m, modid);
     if (current_root_id(m->root_blocks) != modid)
-        add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+        add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_push(m->roots, root);
     JL_GC_POP();
 }
@@ -1219,7 +1371,7 @@ void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots)
     assert(jl_is_method(m));
     assert(jl_is_array(roots));
     prepare_method_for_roots(m, modid);
-    add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+    add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_append(m->roots, roots);
     JL_GC_POP();
 }
@@ -1233,7 +1385,7 @@ int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i)
         rr->index = i;
         return i < m->nroots_sysimg;
     }
-    rle_index_to_reference(rr, i, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    rle_index_to_reference(rr, i, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     if (rr->key)
         return 1;
     return i < m->nroots_sysimg;
@@ -1248,7 +1400,7 @@ jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index)
         return jl_array_ptr_ref(m->roots, index);
     }
     rle_reference rr = {key, index};
-    size_t i = rle_reference_to_index(&rr, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    size_t i = rle_reference_to_index(&rr, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     return jl_array_ptr_ref(m->roots, i);
 }
 
@@ -1257,11 +1409,11 @@ int nroots_with_key(jl_method_t *m, uint64_t key)
 {
     size_t nroots = 0;
     if (m->roots)
-        nroots = jl_array_len(m->roots);
+        nroots = jl_array_nrows(m->roots);
     if (!m->root_blocks)
         return key == 0 ? nroots : 0;
-    uint64_t *rletable = (uint64_t*)jl_array_data(m->root_blocks);
-    size_t j, nblocks2 = jl_array_len(m->root_blocks);
+    uint64_t *rletable = jl_array_data(m->root_blocks, uint64_t);
+    size_t j, nblocks2 = jl_array_nrows(m->root_blocks);
     int nwithkey = 0;
     for (j = 0; j < nblocks2; j+=2) {
         if (rletable[j] == key)
diff --git a/src/module.c b/src/module.c
index 89c4c6cdb674e..66049031f8790 100644
--- a/src/module.c
+++ b/src/module.c
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+//
 
 /*
   modules and top-level bindings
@@ -11,6 +12,65 @@
 extern "C" {
 #endif
 
+// In this translation unit and this translation unit only emit this symbol `extern` for use by julia
+EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT;
+extern inline enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT;
+
+static jl_binding_partition_t *new_binding_partition(void)
+{
+    jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type);
+    jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+    bpart->min_world = 0;
+    jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1);
+    jl_atomic_store_relaxed(&bpart->next, NULL);
+#ifdef _P64
+    bpart->reserved = 0;
+#endif
+    return bpart;
+}
+
+jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) {
+    if (!b)
+        return NULL;
+    assert(jl_is_binding(b));
+    jl_value_t *parent = (jl_value_t*)b;
+    _Atomic(jl_binding_partition_t *)*insert = &b->partitions;
+    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(insert);
+    size_t max_world = (size_t)-1;
+    jl_binding_partition_t *new_bpart = NULL;
+    while (1) {
+        while (bpart && world < bpart->min_world) {
+            insert = &bpart->next;
+            max_world = bpart->min_world - 1;
+            parent = (jl_value_t *)bpart;
+            bpart = jl_atomic_load_relaxed(&bpart->next);
+        }
+        if (bpart && world <= jl_atomic_load_relaxed(&bpart->max_world))
+            return bpart;
+        if (!new_bpart)
+            new_bpart = new_binding_partition();
+        jl_atomic_store_relaxed(&new_bpart->next, bpart);
+        jl_gc_wb_fresh(new_bpart, bpart);
+        new_bpart->min_world = bpart ? jl_atomic_load_relaxed(&bpart->max_world) + 1 : 0;
+        jl_atomic_store_relaxed(&new_bpart->max_world, max_world);
+        if (jl_atomic_cmpswap(insert, &bpart, new_bpart)) {
+            jl_gc_wb(parent, new_bpart);
+            return new_bpart;
+        }
+    }
+}
+
+jl_binding_partition_t *jl_get_binding_partition_all(jl_binding_t *b, size_t min_world, size_t max_world) {
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, min_world);
+    if (!bpart)
+        return NULL;
+    if (jl_atomic_load_relaxed(&bpart->max_world) < max_world)
+        return NULL;
+    return bpart;
+}
+
 JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_names)
 {
     jl_task_t *ct = jl_current_task;
@@ -28,29 +88,28 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     if (!m->build_id.lo)
         m->build_id.lo++; // build id 0 is invalid
     m->build_id.hi = ~(uint64_t)0;
-    m->primary_world = 0;
     jl_atomic_store_relaxed(&m->counter, 1);
     m->nospecialize = 0;
     m->optlevel = -1;
     m->compile = -1;
     m->infer = -1;
     m->max_methods = -1;
+    m->file = name; // Using the name as a placeholder is better than nothing
+    m->line = 0;
     m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
         bitmix(name->hash, parent->hash);
     JL_MUTEX_INIT(&m->lock, "module->lock");
     jl_atomic_store_relaxed(&m->bindings, jl_emptysvec);
-    jl_atomic_store_relaxed(&m->bindingkeyset, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&m->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     arraylist_new(&m->usings, 0);
-    JL_GC_PUSH1(&m);
     if (jl_core_module && default_names) {
+        JL_GC_PUSH1(&m);
         jl_module_using(m, jl_core_module);
-    }
-    // export own name, so "using Foo" makes "Foo" itself visible
-    if (default_names) {
+        // export own name, so "using Foo" makes "Foo" itself visible
         jl_set_const(m, name, (jl_value_t*)m);
+        jl_module_public(m, name, 1);
+        JL_GC_POP();
     }
-    jl_module_export(m, name);
-    JL_GC_POP();
     return m;
 }
 
@@ -61,7 +120,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent)
 
 uint32_t jl_module_next_counter(jl_module_t *m)
 {
-    return jl_atomic_fetch_add(&m->counter, 1);
+    return jl_atomic_fetch_add_relaxed(&m->counter, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t default_names)
@@ -163,9 +222,11 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin
     jl_task_t *ct = jl_current_task;
     jl_globalref_t *g = (jl_globalref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_globalref_t), jl_globalref_type);
     g->mod = mod;
-    jl_gc_wb(g, g->mod);
+    jl_gc_wb_fresh(g, g->mod);
     g->name = name;
+    jl_gc_wb_fresh(g, g->name);
     g->binding = b;
+    jl_gc_wb_fresh(g, g->binding);
     return g;
 }
 
@@ -175,31 +236,71 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
     assert(jl_is_module(mod) && jl_is_symbol(name));
     jl_binding_t *b = (jl_binding_t*)jl_gc_alloc(ct->ptls, sizeof(jl_binding_t), jl_binding_type);
     jl_atomic_store_relaxed(&b->value, NULL);
-    jl_atomic_store_relaxed(&b->owner, NULL);
-    jl_atomic_store_relaxed(&b->ty, NULL);
+    jl_atomic_store_relaxed(&b->partitions, NULL);
     b->globalref = NULL;
-    b->constp = 0;
     b->exportp = 0;
-    b->imported = 0;
+    b->publicp = 0;
     b->deprecated = 0;
-    b->usingfailed = 0;
-    b->padding = 0;
     JL_GC_PUSH1(&b);
     b->globalref = jl_new_globalref(mod, name, b);
+    jl_gc_wb(b, b->globalref);
+    jl_atomic_store_relaxed(&b->partitions, NULL);
     JL_GC_POP();
     return b;
 }
 
+extern jl_mutex_t jl_modules_mutex;
+
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
+{
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("new globals cannot be created in a generated function");
+    if (jl_options.incremental && jl_generating_output()) {
+        JL_LOCK(&jl_modules_mutex);
+        int open = ptrhash_has(&jl_current_modules, (void*)m);
+        if (!open && jl_module_init_order != NULL) {
+            size_t i, l = jl_array_len(jl_module_init_order);
+            for (i = 0; i < l; i++) {
+                if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
+                    open = 1;
+                    break;
+                }
+            }
+        }
+        JL_UNLOCK(&jl_modules_mutex);
+        if (!open) {
+            jl_errorf("Creating a new global in closed module `%s` (`%s`) breaks incremental compilation "
+                      "because the side effects will not be permanent.",
+                      jl_symbol_name(m->name), jl_symbol_name(var));
+        }
+    }
+}
+
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED;
 
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-
-    if (b) {
-        jl_binding_t *b2 = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
+                if (!alloc)
+                    jl_errorf("Global %s.%s does not exist and cannot be assigned.\n"
+                              "Note: Julia 1.9 and 1.10 inadvertently omitted this error check (#56933).\n"
+                              "Hint: Declare it using `global %s` inside `%s` before attempting assignment.",
+                              jl_symbol_name(m->name), jl_symbol_name(var),
+                              jl_symbol_name(var), jl_symbol_name(m->name));
+            }
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)jl_any_type, BINDING_KIND_GLOBAL);
+            if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                goto retry;
+            jl_gc_wb_knownold(bpart, jl_any_type);
+        } else {
             jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             if (from == m)
                 jl_errorf("cannot assign a value to imported variable %s.%s",
@@ -209,7 +310,6 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT,
                           jl_symbol_name(from->name), jl_symbol_name(var), jl_symbol_name(m->name));
         }
     }
-
     return b;
 }
 
@@ -222,75 +322,168 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
     return b->globalref->mod; // TODO: deprecate this?
 }
 
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (!jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
+    return decode_restriction_value(pku);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved_and_const(jl_binding_t *b)
+{
+    // Unlike jl_get_binding_value_if_const this doesn't try to allocate new binding partitions if they
+    // don't already exist, making this JL_NOTSAFEPOINT.
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+    if (!bpart)
+        return NULL;
+    size_t max_world = jl_atomic_load_relaxed(&bpart->max_world);
+    if (bpart->min_world > jl_current_task->world_age || jl_current_task->world_age > max_world)
+        return NULL;
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (!jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
+    return decode_restriction_value(pku);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_resolved(jl_binding_t *b)
+{
+    // Unlike jl_get_binding_value this doesn't try to allocate new binding partitions if they
+    // don't already exist, making this JL_NOTSAFEPOINT.
+    if (!b)
+        return NULL;
+    jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+    if (!bpart)
+        return NULL;
+    size_t max_world = jl_atomic_load_relaxed(&bpart->max_world);
+    if (bpart->min_world > jl_current_task->world_age || jl_current_task->world_age > max_world)
+        return NULL;
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_import(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_bpart_get_restriction_value(jl_binding_partition_t *bpart)
+{
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    jl_value_t *v = decode_restriction_value(pku);
+    if (!v)
+        jl_throw(jl_undefref_exception);
+    return v;
+}
+
+typedef struct _modstack_t {
+    jl_module_t *m;
+    jl_sym_t *var;
+    struct _modstack_t *prev;
+} modstack_t;
+static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
+
+JL_DLLEXPORT jl_value_t *jl_reresolve_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)))) {
+        jl_resolve_owner(b, b->globalref->mod, b->globalref->name, NULL);
+    }
+    return jl_get_binding_value_seqcst(b);
+}
+
 // get binding for adding a method
-// like jl_get_binding_wr, but has different error paths
+// like jl_get_binding_wr, but has different error paths and messages
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-
-    jl_binding_t *b2 = NULL;
-    if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) {
-        jl_value_t *f = jl_atomic_load_relaxed(&b2->value);
-        jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
+            }
+            return b;
+        }
+        jl_value_t *f = jl_get_binding_value_if_const(b);
         if (f == NULL) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
             jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
-                      jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
         }
         // TODO: we might want to require explicitly importing types to add constructors
         //       or we might want to drop this error entirely
-        if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_IMPORTED && !(f && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
-                      jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
         }
-        return b2;
+        return b;
     }
-
     return b;
 }
 
-typedef struct _modstack_t {
-    jl_module_t *m;
-    jl_sym_t *var;
-    struct _modstack_t *prev;
-} modstack_t;
-
-static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
-
-static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
-
-#ifndef __clang_gcanalyzer__
-// The analyzer doesn't like looking through the arraylist, so just model the
-// access for it using this function
-static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
-    return (jl_module_t*)m->usings.items[i];
-}
-#endif
-
-static int eq_bindings(jl_binding_t *owner, jl_binding_t *alias)
+static int eq_bindings(jl_binding_partition_t *owner, jl_binding_t *alias, size_t world)
 {
-    assert(owner == jl_atomic_load_relaxed(&owner->owner));
-    if (owner == alias)
-        return 1;
-    alias = jl_atomic_load_relaxed(&alias->owner);
-    if (owner == alias)
+    jl_ptr_kind_union_t owner_pku = jl_atomic_load_relaxed(&owner->restriction);
+    assert(decode_restriction_kind(owner_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(owner_pku) == BINDING_KIND_DECLARED ||
+           jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)));
+    jl_binding_partition_t *alias_bpart = jl_get_binding_partition(alias, world);
+    if (owner == alias_bpart)
         return 1;
-    if (owner->constp && alias->constp && jl_atomic_load_relaxed(&owner->value) && jl_atomic_load_relaxed(&alias->value) == jl_atomic_load_relaxed(&owner->value))
+    jl_ptr_kind_union_t alias_pku = jl_walk_binding_inplace(&alias, &alias_bpart, world);
+    if (jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)) &&
+        jl_bkind_is_some_constant(decode_restriction_kind(alias_pku)) &&
+        decode_restriction_value(owner_pku) &&
+        decode_restriction_value(alias_pku) == decode_restriction_value(owner_pku))
         return 1;
-    return 0;
+    return owner == alias_bpart;
 }
 
 // find a binding from a module's `usings` list
 static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, jl_module_t **from, modstack_t *st, int warn)
 {
     jl_binding_t *b = NULL;
+    jl_binding_partition_t *bpart = NULL;
     jl_module_t *owner = NULL;
     JL_LOCK(&m->lock);
-    int i = (int)m->usings.len - 1;
+    int i = (int)module_usings_length(m) - 1;
     JL_UNLOCK(&m->lock);
     for (; i >= 0; --i) {
         JL_LOCK(&m->lock);
-        jl_module_t *imp = module_usings_getidx(m, i);
+        jl_module_t *imp = module_usings_getmod(m, i);
         JL_UNLOCK(&m->lock);
         jl_binding_t *tempb = jl_get_module_binding(imp, var, 0);
         if (tempb != NULL && tempb->exportp) {
@@ -298,24 +491,24 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             if (tempb == NULL)
                 // couldn't resolve; try next using (see issue #6105)
                 continue;
-            assert(jl_atomic_load_relaxed(&tempb->owner) == tempb);
-            if (b != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempb, b)) {
+            jl_binding_partition_t *tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+            jl_ptr_kind_union_t tempb_pku = jl_atomic_load_relaxed(&tempbpart->restriction);
+            assert(decode_restriction_kind(tempb_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(tempb_pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(tempb_pku)));
+            (void)tempb_pku;
+            if (bpart != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempbpart, b, jl_current_task->world_age)) {
                 if (warn) {
                     // set usingfailed=1 to avoid repeating this warning
                     // the owner will still be NULL, so it can be later imported or defined
                     tempb = jl_get_module_binding(m, var, 1);
-                    tempb->usingfailed = 1;
-                    jl_printf(JL_STDERR,
-                              "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
-                              jl_symbol_name(owner->name),
-                              jl_symbol_name(imp->name), jl_symbol_name(var),
-                              jl_symbol_name(m->name));
+                    tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+                    jl_atomic_store_release(&tempbpart->restriction, encode_restriction(NULL, BINDING_KIND_FAILED));
                 }
                 return NULL;
             }
             if (owner == NULL || !tempb->deprecated) {
                 owner = imp;
                 b = tempb;
+                bpart = tempbpart;
             }
         }
     }
@@ -327,13 +520,14 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
 // this might not be the same as the owner of the binding, since the binding itself may itself have been imported from elsewhere
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b && !b->imported) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) != BINDING_KIND_GLOBAL) {
         // for implicitly imported globals, try to re-resolve it to find the module we got it from most directly
         jl_module_t *from = NULL;
-        b = using_resolve_binding(m, var, &from, NULL, 0);
-        if (b) {
-            if (b2 == NULL || jl_atomic_load_relaxed(&b->owner) == jl_atomic_load_relaxed(&b2->owner))
+        jl_binding_t *b2 = using_resolve_binding(m, var, &from, NULL, 0);
+        if (b2) {
+            jl_binding_partition_t *b2part = jl_get_binding_partition(b2, jl_current_task->world_age);
+            if (eq_bindings(b2part, b, jl_current_task->world_age))
                 return from;
             // if we did not find it (or accidentally found a different one), ignore this
         }
@@ -348,10 +542,16 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
 {
     if (b == NULL)
         b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 == NULL) {
-        if (b->usingfailed)
-            return NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) == BINDING_KIND_FAILED)
+        return NULL;
+    if (decode_restriction_kind(pku) == BINDING_KIND_DECLARED) {
+        return b;
+    }
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
+        jl_binding_t *b2 = NULL;
         modstack_t top = { m, var, st };
         modstack_t *tmp = st;
         for (; tmp != NULL; tmp = tmp->prev) {
@@ -366,19 +566,18 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
             return NULL;
         assert(from);
         JL_GC_PROMISE_ROOTED(from); // gc-analysis does not understand output parameters
+        JL_GC_PROMISE_ROOTED(b2);
         if (b2->deprecated) {
-            if (jl_atomic_load_relaxed(&b2->value) == jl_nothing) {
+            if (jl_get_binding_value(b2) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return NULL;
             }
         }
         // do a full import to prevent the result of this lookup from
         // changing, for example if this var is assigned to later.
-        jl_binding_t *owner = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &owner, b2)) {
-            // concurrent import
-            return owner;
-        }
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction((jl_value_t*)b2, BINDING_KIND_IMPLICIT)))
+            goto retry;
+        jl_gc_wb(bpart, b2);
         if (b2->deprecated) {
             b->deprecated = 1; // we will warn about this below, but we might want to warn at the use sites too
             if (m != jl_main_module && m != jl_base_module &&
@@ -393,27 +592,26 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
                 jl_binding_dep_message(from, var, b2);
             }
         }
+        return b2;
     }
-    assert(jl_atomic_load_relaxed(&b2->owner) == b2);
-    return b2;
-}
-
-JL_DLLEXPORT jl_binding_t *jl_get_binding_if_bound(jl_module_t *m, jl_sym_t *var)
-{
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->owner);
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    return b;
 }
 
-
 // get the current likely owner of binding when accessing m.var, without resolving the binding (it may change later)
 JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     jl_module_t *from = m;
-    if (b == NULL || (!b->usingfailed && jl_atomic_load_relaxed(&b->owner) == NULL))
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
         b = using_resolve_binding(m, var, &from, NULL, 0);
-    else
-        b = jl_atomic_load_relaxed(&b->owner);
+        bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    }
+    pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
     return b;
 }
 
@@ -421,13 +619,20 @@ JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     if (b == NULL)
         return jl_nothing;
-    b = jl_atomic_load_relaxed(&b->owner);
-    if (b == NULL)
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
         return jl_nothing;
-    jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
-    return ty ? ty : jl_nothing;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        // TODO: We would like to return the type of the constant, but
+        // currently code relies on this returning any to bypass conversion
+        // before an attempted assignment to a constant.
+        // return jl_typeof(jl_atomic_load_relaxed(&bpart->restriction));
+        return (jl_value_t*)jl_any_type;
+    }
+    return decode_restriction_value(pku);
 }
 
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
@@ -439,7 +644,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var
 {
     jl_binding_t *b = jl_get_binding(m, var);
     if (b == NULL)
-        jl_undefined_var_error(var);
+        jl_undefined_var_error(var, (jl_value_t*)m);
     // XXX: this only considers if the original is deprecated, not the binding in m
     if (b->deprecated)
         jl_binding_deprecation_warning(m, var, b);
@@ -458,7 +663,8 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && b->imported;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPORTED;
 }
 
 extern const char *jl_filename;
@@ -477,7 +683,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
     jl_binding_t *dep_message_binding = jl_get_binding(m, jl_symbol(dep_binding_name));
     jl_value_t *dep_message = NULL;
     if (dep_message_binding != NULL)
-        dep_message = jl_atomic_load_relaxed(&dep_message_binding->value);
+        dep_message = jl_get_binding_value(dep_message_binding);
     JL_GC_PUSH1(&dep_message);
     if (dep_message != NULL) {
         if (jl_is_string(dep_message)) {
@@ -488,7 +694,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
         }
     }
     else {
-        jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+        jl_value_t *v = jl_get_binding_value(b);
         dep_message = v; // use as gc-root
         if (v) {
             if (jl_is_type(v) || jl_is_module(v)) {
@@ -525,9 +731,12 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
                   jl_symbol_name(to->name));
     }
     else {
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(pku)));
+        (void)pku;
         if (b->deprecated) {
-            if (jl_atomic_load_relaxed(&b->value) == jl_nothing) {
+            if (jl_get_binding_value(b) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return;
             }
@@ -551,17 +760,30 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
             // importing a binding on top of itself. harmless.
             return;
         }
-        jl_binding_t *ownerto = NULL;
-        if (jl_atomic_cmpswap(&bto->owner, &ownerto, b)) {
-            bto->imported |= (explici != 0);
+        jl_binding_partition_t *btopart = jl_get_binding_partition(bto, jl_current_task->world_age);
+        jl_ptr_kind_union_t bto_pku = jl_atomic_load_relaxed(&btopart->restriction);
+retry:
+        if (decode_restriction_kind(bto_pku) == BINDING_KIND_GUARD ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_IMPLICIT ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_FAILED) {
+
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)b, (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+            if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                goto retry;
+            jl_gc_wb(btopart, b);
             bto->deprecated |= b->deprecated; // we already warned about this above, but we might want to warn at the use sites too
         }
         else {
-            if (eq_bindings(b, bto)) {
-                // already imported
-                bto->imported |= (explici != 0);
+            if (eq_bindings(bpart, bto, jl_current_task->world_age)) {
+                // already imported - potentially upgrade to _IMPORTED or _EXPLICIT
+                if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
+                    jl_ptr_kind_union_t new_pku = encode_restriction(decode_restriction_value(bto_pku), (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+                    if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                        goto retry;
+                    // No wb, because the value is unchanged
+                }
             }
-            else if (ownerto != bto) {
+            else if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
                 // already imported from somewhere else
                 jl_printf(JL_STDERR,
                           "WARNING: ignoring conflicting import of %s.%s into %s\n",
@@ -599,19 +821,24 @@ JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t
     module_import_(to, from, asname, s, 0);
 }
 
-
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
 {
     if (to == from)
         return;
     JL_LOCK(&to->lock);
-    for (size_t i = 0; i < to->usings.len; i++) {
-        if (from == to->usings.items[i]) {
+    for (size_t i = 0; i < module_usings_length(to); i++) {
+        if (from == module_usings_getmod(to, i)) {
             JL_UNLOCK(&to->lock);
             return;
         }
     }
-    arraylist_push(&to->usings, from);
+    struct _jl_module_using new_item = {
+        .mod = from,
+        .min_world = 0,
+        .max_world = (size_t)-1
+    };
+    arraylist_grow(&to->usings, sizeof(struct _jl_module_using)/sizeof(void*));
+    memcpy(&to->usings.items[to->usings.len-3], &new_item, sizeof(struct _jl_module_using));
     jl_gc_wb(to, from);
     JL_UNLOCK(&to->lock);
 
@@ -620,43 +847,68 @@ JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
     // silently override a "using" name. see issue #2054.
     jl_svec_t *table = jl_atomic_load_relaxed(&from->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (b->exportp && (jl_atomic_load_relaxed(&b->owner) == b || b->imported)) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (b->exportp && (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_IMPORTED)) {
             jl_sym_t *var = b->globalref->name;
             jl_binding_t *tob = jl_get_module_binding(to, var, 0);
-            if (tob && jl_atomic_load_relaxed(&tob->owner) != NULL &&
-                // don't warn for conflicts with the module name itself.
-                // see issue #4715
-                var != to->name &&
-                !eq_bindings(jl_atomic_load_relaxed(&tob->owner), b)) {
-                jl_printf(JL_STDERR,
-                          "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(var),
-                          jl_symbol_name(to->name));
+            if (tob) {
+                jl_binding_partition_t *tobpart = jl_get_binding_partition(tob, jl_current_task->world_age);
+                jl_ptr_kind_union_t tobpku = jl_walk_binding_inplace(&tob, &tobpart, jl_current_task->world_age);
+                if (tob && decode_restriction_kind(tobpku) != BINDING_KIND_GUARD &&
+                    // don't warn for conflicts with the module name itself.
+                    // see issue #4715
+                    var != to->name &&
+                    !eq_bindings(tobpart, b, jl_current_task->world_age)) {
+                    jl_printf(JL_STDERR,
+                            "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
+                            jl_symbol_name(from->name), jl_symbol_name(var),
+                            jl_symbol_name(to->name));
+                }
             }
         }
         table = jl_atomic_load_relaxed(&from->bindings);
     }
 }
 
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s)
+JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported)
 {
     jl_binding_t *b = jl_get_module_binding(from, s, 1);
-    b->exportp = 1;
+    if (b->publicp) {
+        // check for conflicting declarations
+        if (b->exportp && !exported)
+            jl_errorf("cannot declare %s.%s public; it is already declared exported",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+        if (!b->exportp && exported)
+            jl_errorf("cannot declare %s.%s exported; it is already declared public",
+                      jl_symbol_name(from->name), jl_symbol_name(s));
+    }
+    b->publicp = 1;
+    b->exportp |= exported;
 }
 
-JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var)
+JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import) // unlike most queries here, this is currently seq_cst
 {
-    jl_binding_t *b = jl_get_binding(m, var);
-    return b && (jl_atomic_load_relaxed(&b->value) != NULL);
+    jl_binding_t *b = jl_get_module_binding(m, var, allow_import);
+    if (!b)
+        return 0;
+    if (!allow_import) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (!bpart || jl_bkind_is_some_import(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+            return 0;
+        return jl_get_binding_value(b) != NULL;
+    }
+    return jl_reresolve_binding_value_seqcst(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && (b->exportp || jl_atomic_load_relaxed(&b->owner) == b);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && (b->exportp || decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL);
 }
 
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
@@ -665,21 +917,33 @@ JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
     return b && b->exportp;
 }
 
+JL_DLLEXPORT int jl_module_public_p(jl_module_t *m, jl_sym_t *var)
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && b->publicp;
+}
+
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && jl_atomic_load_relaxed(&b->owner) != NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+    return kind == BINDING_KIND_DECLARED || !jl_bkind_is_some_guard(kind);
 }
 
-static uint_t bindingkey_hash(size_t idx, jl_svec_t *data)
+uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
-    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
+    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_sym_t *var = b->globalref->name;
     return var->hash;
 }
 
-static int bindingkey_eq(size_t idx, const void *var, jl_svec_t *data, uint_t hv)
+static int bindingkey_eq(size_t idx, const void *var, jl_value_t *data, uint_t hv)
 {
+    if (idx >= jl_svec_len(data))
+        return 0; // We got a OOB access, probably due to a data race
     jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
     jl_sym_t *name = b->globalref->name;
     return var == name;
@@ -689,11 +953,12 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
 {
     uint_t hv = var->hash;
     for (int locked = 0; ; locked++) {
-        jl_array_t *bindingkeyset = jl_atomic_load_acquire(&m->bindingkeyset);
+        jl_genericmemory_t *bindingkeyset = jl_atomic_load_acquire(&m->bindingkeyset);
         jl_svec_t *bindings = jl_atomic_load_relaxed(&m->bindings);
-        ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, bindings, hv); // acquire
+        ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, (jl_value_t*)bindings, hv, 0); // acquire
         if (idx != -1) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(bindings, idx); // relaxed
+            JL_GC_PROMISE_ROOTED(b);
             if (locked)
                 JL_UNLOCK(&m->lock);
             return b;
@@ -725,7 +990,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
             jl_binding_t *b = new_binding(m, var);
             assert(jl_svecref(bindings, i) == jl_nothing);
             jl_svecset(bindings, i, b); // relaxed
-            jl_smallintset_insert(&m->bindingkeyset, (jl_value_t*)m, bindingkey_hash, i, bindings); // release
+            jl_smallintset_insert(&m->bindingkeyset, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings); // release
             JL_UNLOCK(&m->lock);
             return b;
         }
@@ -738,7 +1003,7 @@ JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr)
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
     // ignores b->deprecated
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->value);
+    return b == NULL ? NULL : jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
@@ -749,53 +1014,85 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
     // XXX: this only considers if the original is deprecated, not the binding in m
     if (b->deprecated)
         jl_binding_deprecation_warning(m, var, b);
-    return jl_atomic_load_relaxed(&b->value);
+    return jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
-    jl_binding_t *bp = jl_get_binding_wr(m, var);
+    jl_binding_t *bp = jl_get_binding_wr(m, var, 0);
     jl_checked_assignment(bp, m, var, val);
 }
 
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
     // this function is mostly only used during initialization, so the data races here are not too important to us
-    jl_binding_t *bp = jl_get_binding_wr(m, var);
-    if (jl_atomic_load_relaxed(&bp->value) == NULL) {
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
-        uint8_t constp = 0;
-        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
-        if (constp = bp->constp, bp->constp = 1, constp == 0) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
-                jl_gc_wb_binding(bp, val);
-                return;
-            }
-        }
+    jl_binding_t *bp = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bp, jl_current_task->world_age);
+    jl_atomic_store_release(&bpart->restriction, encode_restriction(val, BINDING_KIND_CONST));
+    jl_gc_wb(bpart, val);
+}
+
+void jl_invalidate_binding_refs(jl_globalref_t *ref, jl_binding_partition_t *invalidated_bpart, size_t new_world)
+{
+    static jl_value_t *invalidate_code_for_globalref = NULL;
+    if (invalidate_code_for_globalref == NULL && jl_base_module != NULL)
+        invalidate_code_for_globalref = jl_get_global(jl_base_module, jl_symbol("invalidate_code_for_globalref!"));
+    if (!invalidate_code_for_globalref)
+        jl_error("Binding invalidation is not permitted during bootstrap.");
+    if (jl_generating_output())
+        jl_error("Binding invalidation is not permitted during image generation.");
+    jl_value_t *boxed_world = jl_box_ulong(new_world);
+    JL_GC_PUSH1(&boxed_world);
+    jl_call3((jl_function_t*)invalidate_code_for_globalref, (jl_value_t*)ref, (jl_value_t*)invalidated_bpart, boxed_world);
+    JL_GC_POP();
+}
+
+extern jl_mutex_t world_counter_lock;
+JL_DLLEXPORT void jl_disable_binding(jl_globalref_t *gr)
+{
+    jl_binding_t *b = gr->binding;
+    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+
+    if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GUARD) {
+        // Already guard
+        return;
     }
-    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+
+    JL_LOCK(&world_counter_lock);
+    jl_task_t *ct = jl_current_task;
+    size_t last_world = ct->world_age;
+    size_t new_max_world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_atomic_store_release(&bpart->max_world, new_max_world);
+    ct->world_age = jl_typeinf_world;
+    jl_invalidate_binding_refs(gr, bpart, new_max_world);
+    ct->world_age = last_world;
+    jl_atomic_store_release(&jl_world_counter, new_max_world + 1);
+    JL_UNLOCK(&world_counter_lock);
 }
 
 JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    return jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && jl_atomic_load_relaxed(&b->value) != NULL;
+    return b && jl_get_binding_value(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 // set the deprecated flag for a binding:
@@ -825,7 +1122,6 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     if (b->deprecated == 1 && jl_options.depwarn) {
         if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
             jl_printf(JL_STDERR, "WARNING: ");
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
         jl_printf(JL_STDERR, "%s.%s is deprecated",
                   jl_symbol_name(m->name), jl_symbol_name(s));
         jl_binding_dep_message(m, s, b);
@@ -844,57 +1140,92 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     }
 }
 
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign)
 {
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
-        if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
-            JL_GC_PUSH1(&rhs); // callee-rooted
-            if (!jl_isa(rhs, old_ty))
-                jl_errorf("cannot assign an incompatible value to the global %s.%s.",
-                          jl_symbol_name(mod->name), jl_symbol_name(var));
+    JL_GC_PUSH1(&rhs); // callee-rooted
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        jl_value_t *old = decode_restriction_value(pku);
+        JL_GC_PROMISE_ROOTED(old);
+        if (jl_egal(rhs, old)) {
             JL_GC_POP();
+            return NULL;
         }
+        if (jl_typeof(rhs) == jl_typeof(old))
+            jl_errorf("invalid redefinition of constant %s.%s. This redefinition may be permitted using the `const` keyword.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+        else
+            jl_errorf("invalid redefinition of constant %s.%s.",
+                jl_symbol_name(mod->name), jl_symbol_name(var));
     }
-    if (b->constp) {
-        jl_value_t *old = NULL;
-        if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
-            jl_gc_wb_binding(b, rhs);
-            return;
-        }
-        if (jl_egal(rhs, old))
-            return;
-        if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
-            jl_errorf("invalid redefinition of constant %s.%s",
-                      jl_symbol_name(mod->name), jl_symbol_name(var));
+    jl_value_t *old_ty = decode_restriction_value(pku);
+    JL_GC_PROMISE_ROOTED(old_ty);
+    if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
+        if (!jl_isa(rhs, old_ty))
+            jl_errorf("cannot assign an incompatible value to the global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+    }
+    JL_GC_POP();
+    return old_ty;
+}
 
-        }
-        jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
-                       jl_symbol_name(mod->name), jl_symbol_name(var));
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+{
+    if (jl_check_binding_wr(b, mod, var, rhs, 1) != NULL) {
+        jl_atomic_store_release(&b->value, rhs);
+        jl_gc_wb(b, rhs);
     }
-    jl_atomic_store_release(&b->value, rhs);
-    jl_gc_wb_binding(b, rhs);
 }
 
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var)
+JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
+{
+    jl_check_binding_wr(b, mod, var, rhs, 0);
+    jl_value_t *old = jl_atomic_exchange(&b->value, rhs);
+    jl_gc_wb(b, rhs);
+    if (__unlikely(old == NULL))
+        jl_undefined_var_error(var, (jl_value_t*)mod);
+    return old;
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs)
 {
-    // n.b. jl_get_binding_wr should have ensured b->owner == b as mod.var
-    if (jl_atomic_load_relaxed(&b->owner) != b || (jl_atomic_load_relaxed(&b->value) != NULL && !b->constp)) {
-        jl_errorf("cannot declare %s.%s constant; it already has a value",
+    jl_value_t *ty = jl_check_binding_wr(b, mod, var, rhs, 0);
+    return replace_value(ty, &b->value, (jl_value_t*)b, expected, rhs, 1, mod, var);
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        jl_errorf("invalid redefinition of constant %s.%s",
                   jl_symbol_name(mod->name), jl_symbol_name(var));
-    }
-    b->constp = 1;
+    jl_value_t *ty = decode_restriction_value(pku);
+    JL_GC_PROMISE_ROOTED(ty);
+    return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, mod, var);
+}
+
+JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs )
+{
+    jl_check_binding_wr(b, mod, var, rhs, 0);
+    jl_value_t *old = NULL;
+    if (jl_atomic_cmpswap(&b->value, &old, rhs))
+        jl_gc_wb(b, rhs);
+    return old;
 }
 
 JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
 {
     JL_LOCK(&m->lock);
-    int j = m->usings.len;
+    int j = module_usings_length(m);
     jl_array_t *a = jl_alloc_array_1d(jl_array_any_type, j);
     JL_GC_PUSH1(&a);
     for (int i = 0; j > 0; i++) {
         j--;
-        jl_module_t *imp = (jl_module_t*)m->usings.items[i];
+        jl_module_t *imp = module_usings_getmod(m, i);
         jl_array_ptr_set(a, j, (jl_value_t*)imp);
     }
     JL_UNLOCK(&m->lock); // may gc
@@ -902,26 +1233,55 @@ JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
     return (jl_value_t*)a;
 }
 
-JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
+void _append_symbol_to_bindings_array(jl_array_t* a, jl_sym_t *name) {
+    jl_array_grow_end(a, 1);
+    //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
+    jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)name);
+}
+
+void append_module_names(jl_array_t* a, jl_module_t *m, int all, int imported, int usings)
 {
-    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
-    JL_GC_PUSH1(&a);
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
         jl_sym_t *asname = b->globalref->name;
         int hidden = jl_symbol_name(asname)[0]=='#';
-        if ((b->exportp ||
-             (imported && b->imported) ||
-             (jl_atomic_load_relaxed(&b->owner) == b && !b->imported && (all || m == jl_main_module))) &&
-            (all || (!b->deprecated && !hidden))) {
-            jl_array_grow_end(a, 1);
-            // n.b. change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-            jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)asname);
-        }
-        table = jl_atomic_load_relaxed(&m->bindings);
+        int main_public = (m == jl_main_module && !(asname == jl_eval_sym || asname == jl_include_sym));
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+        if (((b->publicp) ||
+             (imported && (kind == BINDING_KIND_CONST_IMPORT || kind == BINDING_KIND_IMPORTED)) ||
+             (usings && kind == BINDING_KIND_EXPLICIT) ||
+             ((kind == BINDING_KIND_GLOBAL || kind == BINDING_KIND_CONST || kind == BINDING_KIND_DECLARED) && (all || main_public))) &&
+            (all || (!b->deprecated && !hidden)))
+            _append_symbol_to_bindings_array(a, asname);
+    }
+}
+
+void append_exported_names(jl_array_t* a, jl_module_t *m, int all)
+{
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        if (b->exportp && (all || !b->deprecated))
+            _append_symbol_to_bindings_array(a, b->globalref->name);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported, int usings)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
+    JL_GC_PUSH1(&a);
+    append_module_names(a, m, all, imported, usings);
+    if (usings) {
+        // If `usings` is specified, traverse the list of `using`-ed modules and incorporate
+        // the names exported by those modules into the list.
+        for (int i = module_usings_length(m)-1; i >= 0; i--)
+            append_exported_names(a, module_usings_getmod(m, i), all);
     }
     JL_GC_POP();
     return (jl_value_t*)a;
@@ -938,11 +1298,20 @@ jl_module_t *jl_module_root(jl_module_t *m)
     }
 }
 
+JL_DLLEXPORT jl_sym_t *jl_module_getloc(jl_module_t *m, int32_t *line)
+{
+    if (line) {
+        *line = m->line;
+    }
+    return m->file;
+}
+
 JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
 // TODO: make this part of the module constructor and read-only?
 JL_DLLEXPORT void jl_set_module_uuid(jl_module_t *m, jl_uuid_t uuid) { m->uuid = uuid; }
+JL_DLLEXPORT void jl_set_module_parent(jl_module_t *m, jl_module_t *parent) { m->parent = parent; }
 
 int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT
 {
@@ -964,24 +1333,36 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
     JL_LOCK(&m->lock);
     jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
     for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (jl_atomic_load_relaxed(&b->owner) && jl_atomic_load_relaxed(&b->owner) != b && !b->imported)
-            jl_atomic_store_relaxed(&b->owner, NULL);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPLICIT) {
+            jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+        }
     }
     JL_UNLOCK(&m->lock);
 }
 
+JL_DLLEXPORT void jl_add_to_module_init_list(jl_value_t *mod)
+{
+    if (jl_module_init_order == NULL)
+        jl_module_init_order = jl_alloc_vec_any(0);
+    jl_array_ptr_1d_push(jl_module_init_order, mod);
+}
+
+JL_DLLEXPORT jl_svec_t *jl_module_get_bindings(jl_module_t *m)
+{
+    return jl_atomic_load_relaxed(&m->bindings);
+}
+
 JL_DLLEXPORT void jl_init_restored_module(jl_value_t *mod)
 {
     if (!jl_generating_output() || jl_options.incremental) {
         jl_module_run_initializer((jl_module_t*)mod);
     }
     else {
-        if (jl_module_init_order == NULL)
-            jl_module_init_order = jl_alloc_vec_any(0);
-        jl_array_ptr_1d_push(jl_module_init_order, mod);
+        jl_add_to_module_init_list(mod);
     }
 }
 
diff --git a/src/mtarraylist.c b/src/mtarraylist.c
new file mode 100644
index 0000000000000..0a0f3fe867e39
--- /dev/null
+++ b/src/mtarraylist.c
@@ -0,0 +1,81 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// this file provides some alternate API functions for small_arraylist (push and add)
+// which can be safely observed from other threads concurrently
+// there is only permitted to be a single writer thread (or a mutex)
+// but there can be any number of observers
+
+typedef struct {
+    _Atomic(size_t) len;
+    size_t max;
+    _Atomic(_Atomic(void*)*) items;
+    _Atomic(void*) _space[SMALL_AL_N_INLINE];
+} small_mtarraylist_t;
+
+// change capacity to at least newlen
+static void mtarraylist_resizeto(small_mtarraylist_t *a, size_t len, size_t newlen) JL_NOTSAFEPOINT
+{
+    size_t max = a->max;
+    if (newlen > max) {
+        size_t nm = max * 2;
+        if (nm == 0)
+            nm = 1;
+        while (newlen > nm)
+            nm *= 2;
+        void *olditems = (void*)jl_atomic_load_relaxed(&a->items);
+        void *p = calloc_s(nm * sizeof(void*));
+        memcpy(p, olditems, len * sizeof(void*));
+        jl_atomic_store_release(&a->items, (_Atomic(void*)*)p);
+        a->max = nm;
+        if (olditems != (void*)&a->_space[0]) {
+            jl_task_t *ct = jl_current_task;
+            small_arraylist_push(&ct->ptls->lazily_freed_mtarraylist_buffers, olditems);
+        }
+    }
+}
+
+// single-threaded
+void mtarraylist_push(small_arraylist_t *_a, void *elt)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, len + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[len], elt);
+    jl_atomic_store_release(&a->len, len + 1);
+}
+
+// single-threaded
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, idx + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[idx], elt);
+    if (jl_atomic_load_relaxed(&a->len) < idx + 1)
+        jl_atomic_store_release(&a->len, idx + 1);
+}
+
+// concurrent-safe
+size_t mtarraylist_length(small_arraylist_t *_a)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    return jl_atomic_load_relaxed(&a->len);
+}
+
+// concurrent-safe
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_acquire(&a->len);
+    if (idx >= len)
+        return NULL;
+    return jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&a->items)[idx]);
+}
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index d73beff0f8587..a10b5c617753c 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -50,7 +50,16 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     JL_GC_PUSH2(&sigtype, &selected_rt);
     sigtype = jl_argtype_with_function(captures, (jl_value_t*)argt);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    jl_method_instance_t *mi = NULL;
+    if (source->source) {
+        mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    }
+    else {
+        mi = (jl_method_instance_t *)jl_atomic_load_relaxed(&source->specializations);
+        if (!jl_subtype(sigtype, mi->specTypes)) {
+            jl_error("sigtype mismatch in optimized opaque closure");
+        }
+    }
     jl_task_t *ct = jl_current_task;
     size_t world = ct->world_age;
     jl_code_instance_t *ci = NULL;
@@ -58,12 +67,14 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
         ci = jl_compile_method_internal(mi, world);
     }
 
-    jl_fptr_args_t invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+    jl_fptr_args_t callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
     void *specptr = NULL;
 
     if (ci) {
-        invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
-        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+        uint8_t specsigflags;
+        jl_callptr_t invoke;
+        jl_read_codeinst_invoke(ci, &specsigflags, &invoke, &specptr, 1);
+        callptr = (jl_fptr_args_t)invoke; // codegen puts the object (or a jl_fptr_interpret_call token )here for us, even though it was the wrong type to put here
 
         selected_rt = ci->rettype;
         // If we're not allowed to generate a specsig with this, rt, fall
@@ -72,25 +83,27 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
         if (!jl_subtype(rt_lb, selected_rt)) {
             // TODO: It would be better to try to get a specialization with the
             // correct rt check here (or we could codegen a wrapper).
-            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            specptr = NULL; // this will force codegen of the unspecialized version
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
             jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype};
             selected_rt = jl_type_union(ts, 2);
         }
         if (!jl_subtype(ci->rettype, rt_ub)) {
             // TODO: It would be better to try to get a specialization with the
             // correct rt check here (or we could codegen a wrapper).
-            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            specptr = NULL; // this will force codegen of the unspecialized version
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
             selected_rt = jl_type_intersection(rt_ub, selected_rt);
         }
 
-        if (invoke == (jl_fptr_args_t) jl_fptr_interpret_call) {
-            invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+        if (callptr == (jl_fptr_args_t)jl_fptr_interpret_call) {
+            callptr = (jl_fptr_args_t)jl_interpret_opaque_closure;
         }
-        else if (invoke == (jl_fptr_args_t)jl_fptr_args && specptr) {
-            invoke = (jl_fptr_args_t)specptr;
+        else if (callptr == (jl_fptr_args_t)jl_fptr_args && specptr != NULL) {
+            callptr = (jl_fptr_args_t)specptr;
         }
-        else if (invoke == (jl_fptr_args_t)jl_fptr_const_return) {
-            invoke = jl_isa(ci->rettype_const, selected_rt) ?
+        else if (callptr == (jl_fptr_args_t)jl_fptr_const_return) {
+            callptr = jl_isa(ci->rettype_const, selected_rt) ?
                 (jl_fptr_args_t)jl_fptr_const_opaque_closure :
                 (jl_fptr_args_t)jl_fptr_const_opaque_closure_typeerror;
             captures = ci->rettype_const;
@@ -100,21 +113,22 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt);
     JL_GC_PROMISE_ROOTED(oc_type);
 
-    if (!specptr) {
-        sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt);
+    if (specptr == NULL) {
         jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec);
 
-        // OC wrapper methods are not world dependent
-        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0);
-        if (!jl_atomic_load_acquire(&ci->invoke))
-            jl_generate_fptr_for_oc_wrapper(ci);
+        // OC wrapper methods are not world dependent and have no edges or other info
+        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0, NULL, NULL);
+        if (!jl_atomic_load_acquire(&ci->invoke)) {
+            jl_emit_codeinst_to_jit(ci, NULL); // confusing this actually calls jl_emit_oc_wrapper and never actually compiles ci (which would be impossible since it cannot have source)
+            jl_compile_codeinst(ci);
+        }
         specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
     }
     jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
     oc->source = source;
     oc->captures = captures;
     oc->world = world;
-    oc->invoke = invoke;
+    oc->invoke = callptr;
     oc->specptr = specptr;
 
     JL_GC_POP();
@@ -131,35 +145,33 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_
     return oc;
 }
 
-jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
-    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
-
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-
 JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile)
+    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile, int isinferred)
 {
-    if (!ci->inferred)
-        jl_error("CodeInfo must already be inferred");
     jl_value_t *root = NULL, *sigtype = NULL;
     jl_code_instance_t *inst = NULL;
-    JL_GC_PUSH3(&root, &sigtype, &inst);
+    jl_svec_t *edges = NULL;
+    JL_GC_PUSH4(&root, &sigtype, &inst, &edges);
     root = jl_box_long(lineno);
     root = jl_new_struct(jl_linenumbernode_type, root, file);
-    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva, isinferred);
     root = (jl_value_t*)meth;
-    meth->primary_world = jl_current_task->world_age;
-
-    sigtype = jl_argtype_with_function(env, (jl_value_t*)argt);
-    jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
-    inst = jl_new_codeinst(mi, rt_ub, NULL, (jl_value_t*)ci,
-        0, meth->primary_world, -1, 0, 0, jl_nothing, 0);
-    jl_mi_cache_insert(mi, inst);
+    size_t world = jl_current_task->world_age;
+    // these are only legal in the current world since they are not in any tables
+    jl_atomic_store_release(&meth->primary_world, world);
+    jl_atomic_store_release(&meth->deleted_world, world);
+
+    if (isinferred) {
+        jl_value_t *argslotty = jl_array_ptr_ref(ci->slottypes, 0);
+        sigtype = jl_argtype_with_function_type(argslotty, (jl_value_t*)argt);
+        jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
+        edges = (jl_svec_t*)ci->edges;
+        if (!jl_is_svec(edges))
+            edges = jl_emptysvec; // OC doesn't really have edges, so just drop them for now
+        inst = jl_new_codeinst(mi, jl_nothing, rt_ub, (jl_value_t*)jl_any_type, NULL, (jl_value_t*)ci,
+            0, world, world, 0, jl_nothing, ci->debuginfo, edges);
+        jl_mi_cache_insert(mi, inst);
+    }
 
     jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile);
     JL_GC_POP();
@@ -168,10 +180,10 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
 
 JL_CALLABLE(jl_new_opaque_closure_jlcall)
 {
-    if (nargs < 4)
+    if (nargs < 5)
         jl_error("new_opaque_closure: Not enough arguments");
     return (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)args[0],
-        args[1], args[2], args[3], &args[4], nargs-4, 1);
+        args[1], args[2], args[4], &args[5], nargs-5, 1);
 }
 
 // check whether the specified number of arguments is compatible with the
@@ -195,7 +207,7 @@ int jl_tupletype_length_compat(jl_value_t *v, size_t nargs)
 
 JL_CALLABLE(jl_f_opaque_closure_call)
 {
-    jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F;
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)F;
     jl_value_t *argt = jl_tparam0(jl_typeof(oc));
     if (!jl_tupletype_length_compat(argt, nargs))
         jl_method_error(F, args, nargs + 1, oc->world);
diff --git a/src/options.h b/src/options.h
index b535d5ad4566f..0715069faab32 100644
--- a/src/options.h
+++ b/src/options.h
@@ -33,11 +33,6 @@
 // delete julia IR for non-inlineable functions after they're codegen'd
 #define JL_DELETE_NON_INLINEABLE 1
 
-// fill in the jl_all_methods in world-counter order
-// so that it is possible to map (in a debugger) from
-// an inferred world validity range back to the offending definition
-// #define RECORD_METHOD_ORDER
-
 // GC options -----------------------------------------------------------------
 
 // debugging options
@@ -64,19 +59,25 @@
 #endif
 #endif
 
+// GC_ASSERT_PARENT_VALIDITY will check whether an object is valid when **pushing**
+// it to the mark queue
+// #define GC_ASSERT_PARENT_VALIDITY
+
 // profiling options
 
 // GC_FINAL_STATS prints total GC stats at exit
 // #define GC_FINAL_STATS
 
-// MEMPROFILE prints pool summary statistics after every GC
-//#define MEMPROFILE
+// MEMPROFILE prints pool and large objects summary statistics after every GC
+// #define MEMPROFILE
 
 // GC_TIME prints time taken by each phase of GC
 // #define GC_TIME
 
-// OBJPROFILE counts objects by type
-// #define OBJPROFILE
+// pool allocator configuration options
+
+// GC_SMALL_PAGE allocates objects in 4k pages
+// #define GC_SMALL_PAGE
 
 
 // method dispatch profiling --------------------------------------------------
@@ -105,13 +106,15 @@
 
 // When not using COPY_STACKS the task-system is less memory efficient so
 // you probably want to choose a smaller default stack size (factor of 8-10)
+#if !defined(JL_STACK_SIZE)
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define JL_STACK_SIZE (64*1024*1024)
 #elif defined(_P64)
-#define JL_STACK_SIZE (4*1024*1024)
+#define JL_STACK_SIZE (8*1024*1024)
 #else
 #define JL_STACK_SIZE (2*1024*1024)
 #endif
+#endif
 
 // allow a suspended Task to restart on a different thread
 #define MIGRATE_TASKS
@@ -134,30 +137,13 @@
 // GC threads
 #define NUM_GC_THREADS_NAME             "JULIA_NUM_GC_THREADS"
 
+// heap size hint
+#define HEAP_SIZE_HINT                  "JULIA_HEAP_SIZE_HINT"
+
 // affinitization behavior
 #define MACHINE_EXCLUSIVE_NAME          "JULIA_EXCLUSIVE"
 #define DEFAULT_MACHINE_EXCLUSIVE       0
 
-// partr -- parallel tasks runtime options ------------------------------------
-
-// multiq
-    // number of heaps = MULTIQ_HEAP_C * nthreads
-#define MULTIQ_HEAP_C                   4
-    // how many in each heap
-#define MULTIQ_TASKS_PER_HEAP           129
-
-// parfor
-    // tasks = niters / (GRAIN_K * nthreads)
-#define GRAIN_K                         4
-
-// synchronization
-    // narrivers = ((GRAIN_K * nthreads) ^ ARRIVERS_P) + 1
-    // limit for number of recursive parfors
-#define ARRIVERS_P                      2
-    // nreducers = narrivers * REDUCERS_FRAC
-#define REDUCERS_FRAC                   1
-
-
 // sanitizer defaults ---------------------------------------------------------
 
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
diff --git a/src/partr.c b/src/partr.c
deleted file mode 100644
index 428389db7f218..0000000000000
--- a/src/partr.c
+++ /dev/null
@@ -1,531 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#include "julia.h"
-#include "julia_internal.h"
-#include "gc.h"
-#include "threading.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-// thread sleep state
-
-// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
-uint64_t sleep_threshold;
-
-// thread should not be sleeping--it might need to do work.
-static const int16_t not_sleeping = 0;
-
-// it is acceptable for the thread to be sleeping.
-static const int16_t sleeping = 1;
-
-// this thread is dead.
-static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
-
-// invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
-// invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
-// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
-// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
-// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
-// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
-// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
-// * Dequeuer:
-//   * 1: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
-// * Enqueuer:
-//   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
-// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
-// [^store_buffering_2]: and also
-// * Enqueuer:
-//   * 1a: `jl_atomic_store_relaxed(jl_uv_n_waiters, 1)` in `JL_UV_LOCK`
-//   * 1b: "cheap read" of `handle->pending` in `uv_async_send` (via `JL_UV_LOCK`) loads `0`
-// * Dequeuer:
-//   * 2a: store `2` to `handle->pending` in `uv_async_send` (via `JL_UV_LOCK` in `jl_task_get_next`)
-//   * 2b: `jl_atomic_load_relaxed(jl_uv_n_waiters)` in `jl_task_get_next` returns `0`
-// i.e., the dequeuer misses the `n_waiters` is set and enqueuer misses the `uv_stop` flag (in `signal_async`) transition to cleared
-
-JULIA_DEBUG_SLEEPWAKE(
-uint64_t wakeup_enter;
-uint64_t wakeup_leave;
-uint64_t io_wakeup_enter;
-uint64_t io_wakeup_leave;
-);
-
-JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
-{
-    // Try to acquire the lock on this task.
-    int16_t was = jl_atomic_load_relaxed(&task->tid);
-    if (was == tid)
-        return 1;
-    if (was == -1)
-        return jl_atomic_cmpswap(&task->tid, &was, tid);
-    return 0;
-}
-
-JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
-{
-    if (tpid < 0 || tpid >= jl_n_threadpools)
-        return 0;
-    task->threadpoolid = tpid;
-    return 1;
-}
-
-// GC functions used
-extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
-                                         jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT;
-
-// parallel task runtime
-// ---
-
-JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    // one-extend unbias back to 64-bits
-    return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
-}
-
-// initialize the threading infrastructure
-// (called only by the main thread)
-void jl_init_threadinginfra(void)
-{
-    /* initialize the synchronization trees pool */
-    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
-    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
-    if (cp) {
-        if (!strncasecmp(cp, "infinite", 8))
-            sleep_threshold = UINT64_MAX;
-        else
-            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
-    }
-}
-
-
-void JL_NORETURN jl_finish_task(jl_task_t *t);
-
-
-static inline int may_mark(void) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&gc_n_threads_marking) > 0);
-}
-
-// gc thread mark function
-void jl_gc_mark_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_mutex_lock(&gc_threads_lock);
-        while (!may_mark()) {
-            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
-        }
-        uv_mutex_unlock(&gc_threads_lock);
-        gc_mark_loop_parallel(ptls, 0);
-    }
-}
-
-// gc thread sweep function
-void jl_gc_sweep_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_sem_wait(&gc_sweep_assists_needed);
-        while (1) {
-            jl_gc_pagemeta_t *pg = pop_lf_page_metadata_back(&global_page_pool_lazily_freed);
-            if (pg == NULL) {
-                break;
-            }
-            jl_gc_free_page(pg);
-            push_lf_page_metadata_back(&global_page_pool_freed, pg);
-        }
-    }
-}
-
-// thread function: used by all mutator threads except the main thread
-void jl_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid, create heap, set up root task)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-    void *stack_lo, *stack_hi;
-    jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    // warning: this changes `jl_current_task`, so be careful not to call that from this function
-    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
-    JL_GC_PROMISE_ROOTED(ct);
-
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    (void)jl_gc_unsafe_enter(ptls);
-    jl_finish_task(ct); // noreturn
-}
-
-
-int jl_running_under_rr(int recheck)
-{
-#ifdef _OS_LINUX_
-#define RR_CALL_BASE 1000
-#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
-    static _Atomic(int) is_running_under_rr = 0;
-    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
-    if (rr == 0 || recheck) {
-        int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
-        if (ret == -1)
-            // Should always be ENOSYS, but who knows what people do for
-            // unknown syscalls with their seccomp filters, so just say
-            // that we don't have rr.
-            rr = 2;
-        else
-            rr = 1;
-        jl_atomic_store_relaxed(&is_running_under_rr, rr);
-    }
-    return rr == 1;
-#else
-    return 0;
-#endif
-}
-
-
-//  sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
-static int sleep_check_after_threshold(uint64_t *start_cycles)
-{
-    JULIA_DEBUG_SLEEPWAKE( return 1 ); // hammer on the sleep/wake logic much harder
-    /**
-     * This wait loop is a bit of a worst case for rr - it needs timer access,
-     * which are slow and it busy loops in user space, which prevents the
-     * scheduling logic from switching to other threads. Just don't bother
-     * trying to wait here
-     */
-    if (jl_running_under_rr(0))
-        return 1;
-    if (!(*start_cycles)) {
-        *start_cycles = jl_hrtime();
-        return 0;
-    }
-    uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
-    if (elapsed_cycles >= sleep_threshold) {
-        *start_cycles = 0;
-        return 1;
-    }
-    return 0;
-}
-
-
-static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    int8_t state = sleeping;
-
-    if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
-        if (jl_atomic_cmpswap_relaxed(&other->sleep_check_state, &state, not_sleeping)) {
-            JL_PROBE_RT_SLEEP_CHECK_WAKE(other, state);
-            uv_mutex_lock(&other->sleep_lock);
-            uv_cond_signal(&other->wake_signal);
-            uv_mutex_unlock(&other->sleep_lock);
-            return 1;
-        }
-    }
-    return 0;
-}
-
-
-static void wake_libuv(void) JL_NOTSAFEPOINT
-{
-    JULIA_DEBUG_SLEEPWAKE( io_wakeup_enter = cycleclock() );
-    jl_wake_libuv();
-    JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
-}
-
-/* ensure thread tid is awake if necessary */
-JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
-    int16_t self = jl_atomic_load_relaxed(&ct->tid);
-    if (tid != self)
-        jl_fence(); // [^store_buffering_1]
-    jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
-    JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
-    if (tid == self || tid == -1) {
-        // we're already awake, but make sure we'll exit uv_run
-        jl_ptls_t ptls = ct->ptls;
-        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping) {
-            jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping);
-            JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
-        }
-        if (uvlock == ct)
-            uv_stop(jl_global_event_loop());
-    }
-    else {
-        // something added to the sticky-queue: notify that thread
-        if (wake_thread(tid) && uvlock != ct) {
-            // check if we need to notify uv_run too
-            jl_fence();
-            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-            jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
-            // now that we have changed the thread to not-sleeping, ensure that
-            // either it has not yet acquired the libuv lock, or that it will
-            // observe the change of state to not_sleeping
-            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
-                wake_libuv();
-        }
-    }
-    // check if the other threads might be sleeping
-    if (tid == -1) {
-        // something added to the multi-queue: notify all threads
-        // in the future, we might want to instead wake some fraction of threads,
-        // and let each of those wake additional threads if they find work
-        int anysleep = 0;
-        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        for (tid = 0; tid < nthreads; tid++) {
-            if (tid != self)
-                anysleep |= wake_thread(tid);
-        }
-        // check if we need to notify uv_run too
-        if (uvlock != ct && anysleep) {
-            jl_fence();
-            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
-                wake_libuv();
-        }
-    }
-    JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
-}
-
-
-// get the next runnable task
-static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
-{
-    jl_gc_safepoint();
-    jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
-    if (jl_is_task(task)) {
-        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
-        jl_set_task_tid(task, self);
-        return task;
-    }
-    return NULL;
-}
-
-static int check_empty(jl_value_t *checkempty)
-{
-    return jl_apply_generic(checkempty, NULL, 0) == jl_true;
-}
-
-jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
-void jl_wait_empty_begin(void);
-void jl_wait_empty_end(void);
-
-void jl_task_wait_empty(void)
-{
-    jl_task_t *ct = jl_current_task;
-    if (jl_atomic_load_relaxed(&ct->tid) == 0 && jl_base_module) {
-        jl_wait_empty_begin();
-        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("wait"));
-        wait_empty = ct;
-        size_t lastage = ct->world_age;
-        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        if (f)
-            jl_apply_generic(f, NULL, 0);
-        ct->world_age = lastage;
-        wait_empty = NULL;
-        jl_wait_empty_end();
-    }
-}
-
-static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    // sleep_check_state is only transitioned from not_sleeping to sleeping
-    // by the thread itself. As a result, if this returns false, it will
-    // continue returning false. If it returns true, we know the total
-    // modification order of the fences.
-    jl_fence(); // [^store_buffering_1] [^store_buffering_2]
-    return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
-}
-
-extern _Atomic(unsigned) _threadedregion;
-
-JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
-{
-    jl_task_t *ct = jl_current_task;
-    uint64_t start_cycles = 0;
-
-    while (1) {
-        jl_task_t *task = get_next_task(trypoptask, q);
-        if (task)
-            return task;
-
-        // quick, race-y check to see if there seems to be any stuff in there
-        jl_cpu_pause();
-        if (!check_empty(checkempty)) {
-            start_cycles = 0;
-            continue;
-        }
-
-        jl_cpu_pause();
-        jl_ptls_t ptls = ct->ptls;
-        if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == 0 && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
-            // acquire sleep-check lock
-            jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
-            jl_fence(); // [^store_buffering_1]
-            JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
-            if (!check_empty(checkempty)) { // uses relaxed loads
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
-                }
-                continue;
-            }
-            task = get_next_task(trypoptask, q); // note: this should not yield
-            if (ptls != ct->ptls) {
-                // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
-                ptls = ct->ptls;
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                }
-                if (task)
-                    return task;
-                continue;
-            }
-            if (task) {
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                }
-                return task;
-            }
-
-
-            // IO is always permitted, but outside a threaded region, only
-            // thread 0 will process messages.
-            // Inside a threaded region, any thread can listen for IO messages,
-            // and one thread should win this race and watch the event loop,
-            // but we bias away from idle threads getting parked here.
-            //
-            // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
-            //  - After decrementing _threadedregion, the thread is required to
-            //    call jl_wakeup_thread(0), that will kick out any thread who is
-            //    already there, and then eventually thread 0 will get here.
-            //  - Inside a _threadedregion, there must exist at least one
-            //    thread that has a happens-before relationship on the libuv lock
-            //    before reaching this decision point in the code who will see
-            //    the lock as unlocked and thus must win this race here.
-            int uvlock = 0;
-            if (jl_atomic_load_relaxed(&_threadedregion)) {
-                uvlock = jl_mutex_trylock(&jl_uv_mutex);
-            }
-            else if (ptls->tid == 0) {
-                uvlock = 1;
-                JL_UV_LOCK();
-            }
-            else {
-                // Since we might have started some IO work, we might need
-                // to ensure tid = 0 will go watch that new event source.
-                // If trylock would have succeeded, that may have been our
-                // responsibility, so need to make sure thread 0 will take care
-                // of us.
-                if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
-                    jl_wakeup_thread(0);
-            }
-            if (uvlock) {
-                int enter_eventloop = may_sleep(ptls);
-                int active = 0;
-                if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
-                    // if we won the race against someone who actually needs
-                    // the lock to do real work, we need to let them have it instead
-                    enter_eventloop = 0;
-                if (enter_eventloop) {
-                    uv_loop_t *loop = jl_global_event_loop();
-                    loop->stop_flag = 0;
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
-                    active = uv_run(loop, UV_RUN_ONCE);
-                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
-                    jl_gc_safepoint();
-                }
-                JL_UV_UNLOCK();
-                // optimization: check again first if we may have work to do.
-                // Otherwise we got a spurious wakeup since some other thread
-                // that just wanted to steal libuv from us. We will just go
-                // right back to sleep on the individual wake signal to let
-                // them take it from us without conflict.
-                if (active || !may_sleep(ptls)) {
-                    start_cycles = 0;
-                    continue;
-                }
-                if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0) {
-                    // thread 0 is the only thread permitted to run the event loop
-                    // so it needs to stay alive, just spin-looping if necessary
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
-                    }
-                    start_cycles = 0;
-                    continue;
-                }
-            }
-
-            // the other threads will just wait for an individual wake signal to resume
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
-            int8_t gc_state = jl_gc_safe_enter(ptls);
-            uv_mutex_lock(&ptls->sleep_lock);
-            while (may_sleep(ptls)) {
-                if (ptls->tid == 0 && wait_empty) {
-                    task = wait_empty;
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
-                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
-                    }
-                    break;
-                }
-                uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
-            }
-            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
-            uv_mutex_unlock(&ptls->sleep_lock);
-            JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
-            jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
-            start_cycles = 0;
-            if (task) {
-                assert(task == wait_empty);
-                wait_empty = NULL;
-                return task;
-            }
-        }
-        else {
-            // maybe check the kernel for new messages too
-            jl_process_events();
-        }
-    }
-}
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/src/passes.h b/src/passes.h
index 2bb33d6eec60d..4c9cba164d049 100644
--- a/src/passes.h
+++ b/src/passes.h
@@ -15,10 +15,6 @@ struct DemoteFloat16Pass : PassInfoMixin<DemoteFloat16Pass> {
     static bool isRequired() { return true; }
 };
 
-struct CombineMulAddPass : PassInfoMixin<CombineMulAddPass> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
-};
-
 struct LateLowerGCPass : PassInfoMixin<LateLowerGCPass> {
     PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
@@ -46,30 +42,25 @@ struct GCInvariantVerifierPass : PassInfoMixin<GCInvariantVerifierPass> {
     static bool isRequired() { return true; }
 };
 
-// Module Passes
-struct CPUFeaturesPass : PassInfoMixin<CPUFeaturesPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
-    static bool isRequired() { return true; }
-};
-
-struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
+struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct LowerSIMDLoopPass : PassInfoMixin<LowerSIMDLoopPass> {
+// Module Passes
+struct CPUFeaturesPass : PassInfoMixin<CPUFeaturesPass> {
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
+struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct MultiVersioningPass : PassInfoMixin<MultiVersioningPass> {
     bool external_use;
-    MultiVersioningPass(bool external_use = false) : external_use(external_use) {}
+    MultiVersioningPass(bool external_use = false) JL_NOTSAFEPOINT : external_use(external_use) {}
     PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
@@ -103,6 +94,11 @@ struct JuliaLICMPass : PassInfoMixin<JuliaLICMPass> {
                           LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
 };
 
+struct LowerSIMDLoopPass : PassInfoMixin<LowerSIMDLoopPass> {
+    PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
+};
+
 #define MODULE_MARKER_PASS(NAME) \
     struct NAME##MarkerPass : PassInfoMixin<NAME##MarkerPass> { \
         PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \
@@ -146,4 +142,8 @@ MODULE_MARKER_PASS(BeforeCleanup)
 MODULE_MARKER_PASS(AfterCleanup)
 MODULE_MARKER_PASS(AfterOptimization)
 
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT;
+
 #endif
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index ca5992b6f3135..39f896ba656d2 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -12,23 +12,14 @@
 // analysis passes
 #include <llvm/Analysis/Passes.h>
 #include <llvm/Analysis/BasicAliasAnalysis.h>
+#include <llvm/Analysis/GlobalsModRef.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/Analysis/TypeBasedAliasAnalysis.h>
 #include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/IPO/InferFunctionAttrs.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
 
@@ -37,6 +28,8 @@
 #include <llvm/Transforms/IPO/Annotation2Metadata.h>
 #include <llvm/Transforms/IPO/ConstantMerge.h>
 #include <llvm/Transforms/IPO/ForceFunctionAttrs.h>
+#include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
 #include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
@@ -73,10 +66,11 @@
 #include <llvm/Transforms/Scalar/SimplifyCFG.h>
 #include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
 #include <llvm/Transforms/Utils/InjectTLIMappings.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #include <llvm/Transforms/Vectorize/VectorCombine.h>
-
 #ifdef _COMPILER_GCC_
 #pragma GCC diagnostic pop
 #endif
@@ -89,7 +83,6 @@
 #include "julia_assert.h"
 #include "passes.h"
 
-
 using namespace llvm;
 
 namespace {
@@ -156,10 +149,10 @@ namespace {
             // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
             // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
             // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
-            // MPM.addPass(ModuleAddressSanitizerPass(
-            //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
             //Let's assume the defaults are actually fine for our purposes
-            MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions()));
+            // MPM.addPass(AddressSanitizerPass(
+            //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
+            MPM.addPass(AddressSanitizerPass(AddressSanitizerOptions(), true, false));
         //   }
         };
         ASanPass(/*SanitizerKind::Address, */false);
@@ -182,10 +175,11 @@ namespace {
         // }
     }
 
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
-        if (!llvm_only)
-            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
+        if (!llvm_only){
+            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass(true)));
+        }
         MPM.addPass(VerifierPass());
     }
 #endif
@@ -208,9 +202,6 @@ namespace {
             // .sinkCommonInsts(true)
             ;
     }
-#if JL_LLVM_VERSION < 150000
-#define LICMOptions()
-#endif
 
 // At any given time exactly one of each pair of overloads is strictly unused
 #ifdef _COMPILER_GCC_
@@ -328,168 +319,192 @@ namespace {
 
 static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeEarlySimplificationMarkerPass());
-#ifdef JL_DEBUG_BUILD
+#ifdef JL_VERIFY_PASSES
     addVerificationPasses(MPM, options.llvm_only);
 #endif
-    // Place after verification in case we want to force it anyways
-    MPM.addPass(ForceFunctionAttrsPass());
-    invokePipelineStartCallbacks(MPM, PB, O);
-    MPM.addPass(Annotation2MetadataPass());
-    MPM.addPass(ConstantMergePass());
-    {
-        FunctionPassManager FPM;
-        FPM.addPass(LowerExpectIntrinsicPass());
-        if (O.getSpeedupLevel() >= 2) {
-            JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass()));
-        }
-        FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
-        if (O.getSpeedupLevel() >= 1) {
-            FPM.addPass(DCEPass());
-            FPM.addPass(SROAPass());
-        }
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    if (options.enable_early_simplifications) {
+      // Place after verification in case we want to force it anyways
+      MPM.addPass(ForceFunctionAttrsPass());
+      invokePipelineStartCallbacks(MPM, PB, O);
+      MPM.addPass(Annotation2MetadataPass());
+      MPM.addPass(InferFunctionAttrsPass());
+      MPM.addPass(ConstantMergePass());
+      {
+          FunctionPassManager FPM;
+          FPM.addPass(LowerExpectIntrinsicPass());
+          if (O.getSpeedupLevel() >= 2) {
+              JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass()));
+          }
+          // DCE must come before simplifycfg
+          // codegen can generate unused statements when generating builtin calls,
+          // and those dead statements can alter how simplifycfg optimizes the CFG
+          FPM.addPass(DCEPass());
+          FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
+          if (O.getSpeedupLevel() >= 1) {
+              // TODO check the LLVM 15 default.
+              FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+          }
+          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      }
+      invokeEarlySimplificationCallbacks(MPM, PB, O);
     }
-    invokeEarlySimplificationCallbacks(MPM, PB, O);
     MPM.addPass(AfterEarlySimplificationMarkerPass());
 }
 
 static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeEarlyOptimizationMarkerPass());
-    invokeOptimizerEarlyCallbacks(MPM, PB, O);
-    {
-        CGSCCPassManager CGPM;
-        invokeCGSCCCallbacks(CGPM, PB, O);
-        if (O.getSpeedupLevel() >= 2) {
-            FunctionPassManager FPM;
-            JULIA_PASS(FPM.addPass(AllocOptPass()));
-            FPM.addPass(Float2IntPass());
-            FPM.addPass(LowerConstantIntrinsicsPass());
-            CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-        }
-        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-    }
-    if (options.dump_native) {
-        JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use)));
-    }
-    JULIA_PASS(MPM.addPass(CPUFeaturesPass()));
-    if (O.getSpeedupLevel() >= 1) {
-        FunctionPassManager FPM;
-        if (O.getSpeedupLevel() >= 2) {
-            FPM.addPass(SROAPass());
-            // SROA can duplicate PHI nodes which can block LowerSIMD
-            FPM.addPass(InstCombinePass());
-            FPM.addPass(JumpThreadingPass());
-            FPM.addPass(CorrelatedValuePropagationPass());
-            FPM.addPass(ReassociatePass());
-            FPM.addPass(EarlyCSEPass());
-            JULIA_PASS(FPM.addPass(AllocOptPass()));
-        } else { // if (O.getSpeedupLevel() >= 1) (exactly)
-            FPM.addPass(InstCombinePass());
-            FPM.addPass(EarlyCSEPass());
-        }
-        invokePeepholeEPCallbacks(FPM, PB, O);
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    if (options.enable_early_optimizations) {
+      invokeOptimizerEarlyCallbacks(MPM, PB, O);
+      {
+          CGSCCPassManager CGPM;
+          invokeCGSCCCallbacks(CGPM, PB, O);
+          if (O.getSpeedupLevel() >= 2) {
+              FunctionPassManager FPM;
+              JULIA_PASS(FPM.addPass(AllocOptPass()));
+              FPM.addPass(Float2IntPass());
+              FPM.addPass(LowerConstantIntrinsicsPass());
+              CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+          }
+          MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+      }
+      if (O.getSpeedupLevel() >= 2) {
+          MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+      }
+      // MPM.addPass(createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+      if (options.dump_native) {
+          MPM.addPass(StripDeadPrototypesPass());
+          JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use)));
+      }
+      JULIA_PASS(MPM.addPass(CPUFeaturesPass()));
+      if (O.getSpeedupLevel() >= 1) {
+          FunctionPassManager FPM;
+          if (O.getSpeedupLevel() >= 2) {
+              // TODO check the LLVM 15 default.
+              FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+              // SROA can duplicate PHI nodes which can block LowerSIMD
+              FPM.addPass(InstCombinePass());
+              FPM.addPass(JumpThreadingPass());
+              FPM.addPass(CorrelatedValuePropagationPass());
+              FPM.addPass(ReassociatePass());
+              FPM.addPass(EarlyCSEPass());
+              JULIA_PASS(FPM.addPass(AllocOptPass()));
+          } else { // if (O.getSpeedupLevel() >= 1) (exactly)
+              FPM.addPass(InstCombinePass());
+              FPM.addPass(EarlyCSEPass());
+          }
+          invokePeepholeEPCallbacks(FPM, PB, O);
+          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      }
+      MPM.addPass(GlobalDCEPass());
     }
     MPM.addPass(AfterEarlyOptimizationMarkerPass());
 }
 
 static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeLoopOptimizationMarkerPass());
-    {
-        LoopPassManager LPM;
+    if (options.enable_loop_optimizations) {
+        {
+            LoopPassManager LPM;
+            LPM.addPass(LowerSIMDLoopPass());
+            if (O.getSpeedupLevel() >= 2) {
+                LPM.addPass(LoopRotatePass());
+            }
+            invokeLateLoopOptimizationCallbacks(LPM, PB, O);
+            //We don't know if the loop callbacks support MSSA
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
+        }
         if (O.getSpeedupLevel() >= 2) {
-            LPM.addPass(LoopRotatePass());
+            LoopPassManager LPM;
+            LPM.addPass(BeforeLICMMarkerPass());
+            LPM.addPass(LICMPass(LICMOptions()));
+            LPM.addPass(JuliaLICMPass());
+            LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
+            LPM.addPass(LICMPass(LICMOptions()));
+            LPM.addPass(JuliaLICMPass());
+            LPM.addPass(AfterLICMMarkerPass());
+            //LICM needs MemorySSA now, so we must use it
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
         }
-        invokeLateLoopOptimizationCallbacks(LPM, PB, O);
-        //We don't know if the loop callbacks support MSSA
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        LoopPassManager LPM;
-        LPM.addPass(BeforeLICMMarkerPass());
-        LPM.addPass(LICMPass(LICMOptions()));
-        LPM.addPass(JuliaLICMPass());
-        LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
-        LPM.addPass(LICMPass(LICMOptions()));
-        LPM.addPass(JuliaLICMPass());
-        LPM.addPass(AfterLICMMarkerPass());
-        //LICM needs MemorySSA now, so we must use it
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        FPM.addPass(IRCEPass());
-    }
-    {
-        LoopPassManager LPM;
-        LPM.addPass(BeforeLoopSimplificationMarkerPass());
         if (O.getSpeedupLevel() >= 2) {
-            LPM.addPass(LoopInstSimplifyPass());
-            LPM.addPass(LoopIdiomRecognizePass());
-            LPM.addPass(IndVarSimplifyPass());
-            LPM.addPass(LoopDeletionPass());
-            // This unroll will only unroll loops when the trip count is known and small,
-            // so that no loop remains
-            LPM.addPass(LoopFullUnrollPass());
+            FPM.addPass(IRCEPass());
+        }
+        {
+            LoopPassManager LPM;
+            LPM.addPass(BeforeLoopSimplificationMarkerPass());
+            if (O.getSpeedupLevel() >= 2) {
+                LPM.addPass(LoopInstSimplifyPass());
+                LPM.addPass(LoopIdiomRecognizePass());
+                LPM.addPass(IndVarSimplifyPass());
+                LPM.addPass(LoopDeletionPass());
+                // This unroll will only unroll loops when the trip count is known and small,
+                // so that no loop remains
+                LPM.addPass(LoopFullUnrollPass());
+            }
+            invokeLoopOptimizerEndCallbacks(LPM, PB, O);
+            LPM.addPass(AfterLoopSimplificationMarkerPass());
+            //We don't know if the loop end callbacks support MSSA
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
         }
-        invokeLoopOptimizerEndCallbacks(LPM, PB, O);
-        LPM.addPass(AfterLoopSimplificationMarkerPass());
-        //We don't know if the loop end callbacks support MSSA
-        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
     }
     FPM.addPass(AfterLoopOptimizationMarkerPass());
 }
 
 static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeScalarOptimizationMarkerPass());
-    if (O.getSpeedupLevel() >= 2) {
-        JULIA_PASS(FPM.addPass(AllocOptPass()));
-        FPM.addPass(SROAPass());
-        FPM.addPass(InstSimplifyPass());
-        FPM.addPass(GVNPass());
-        FPM.addPass(MemCpyOptPass());
-        FPM.addPass(SCCPPass());
-        FPM.addPass(CorrelatedValuePropagationPass());
-        FPM.addPass(DCEPass());
-        FPM.addPass(IRCEPass());
-        FPM.addPass(InstCombinePass());
-        FPM.addPass(JumpThreadingPass());
-    }
-    if (O.getSpeedupLevel() >= 3) {
-        FPM.addPass(GVNPass());
-    }
-    if (O.getSpeedupLevel() >= 2) {
-        FPM.addPass(DSEPass());
-        invokePeepholeEPCallbacks(FPM, PB, O);
-        FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
-        JULIA_PASS(FPM.addPass(AllocOptPass()));
-        {
-            LoopPassManager LPM;
-            LPM.addPass(LoopDeletionPass());
-            LPM.addPass(LoopInstSimplifyPass());
-            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+    if (options.enable_scalar_optimizations) {
+        if (O.getSpeedupLevel() >= 2) {
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            // TODO check the LLVM 15 default.
+            FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+            FPM.addPass(InstSimplifyPass());
+            FPM.addPass(GVNPass());
+            FPM.addPass(MemCpyOptPass());
+            FPM.addPass(SCCPPass());
+            FPM.addPass(CorrelatedValuePropagationPass());
+            FPM.addPass(DCEPass());
+            FPM.addPass(IRCEPass());
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(JumpThreadingPass());
+        }
+        if (O.getSpeedupLevel() >= 3) {
+            FPM.addPass(GVNPass());
         }
-        FPM.addPass(LoopDistributePass());
+        if (O.getSpeedupLevel() >= 2) {
+            FPM.addPass(DSEPass());
+            invokePeepholeEPCallbacks(FPM, PB, O);
+            FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            {
+                LoopPassManager LPM;
+                LPM.addPass(LoopDeletionPass());
+                LPM.addPass(LoopInstSimplifyPass());
+                FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+            }
+            FPM.addPass(LoopDistributePass());
+        }
+        invokeScalarOptimizerCallbacks(FPM, PB, O);
     }
-    invokeScalarOptimizerCallbacks(FPM, PB, O);
     FPM.addPass(AfterScalarOptimizationMarkerPass());
 }
 
 static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     FPM.addPass(BeforeVectorizationMarkerPass());
-    //TODO look into loop vectorize options
-    FPM.addPass(InjectTLIMappings());
-    FPM.addPass(LoopVectorizePass());
-    FPM.addPass(LoopLoadEliminationPass());
-    FPM.addPass(InstCombinePass());
-    FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
-    FPM.addPass(SLPVectorizerPass());
-    invokeVectorizerCallbacks(FPM, PB, O);
-    FPM.addPass(VectorCombinePass());
-    FPM.addPass(ADCEPass());
-    //TODO add BDCEPass here?
-    // This unroll will unroll vectorized loops
-    // as well as loops that we tried but failed to vectorize
-    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
+    if (options.enable_vector_pipeline) {
+        //TODO look into loop vectorize options
+        FPM.addPass(InjectTLIMappings());
+        FPM.addPass(LoopVectorizePass());
+        FPM.addPass(LoopLoadEliminationPass());
+        FPM.addPass(InstCombinePass());
+        FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+        FPM.addPass(SLPVectorizerPass());
+        invokeVectorizerCallbacks(FPM, PB, O);
+        FPM.addPass(VectorCombinePass());
+        FPM.addPass(ADCEPass());
+        //TODO add BDCEPass here?
+        // This unroll will unroll vectorized loops
+        // as well as loops that we tried but failed to vectorize
+        FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
+    }
     FPM.addPass(AfterVectorizationMarkerPass());
 }
 
@@ -505,24 +520,29 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
         }
         // Needed **before** LateLowerGCFrame on LLVM < 12
         // due to bug in `CreateAlignmentAssumption`.
+        assert(options.remove_ni);
         JULIA_PASS(MPM.addPass(RemoveNIPass()));
-        JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass())));
-        JULIA_PASS(MPM.addPass(FinalLowerGCPass()));
-        if (O.getSpeedupLevel() >= 2) {
+        {
             FunctionPassManager FPM;
-            FPM.addPass(GVNPass());
-            FPM.addPass(SCCPPass());
-            FPM.addPass(DCEPass());
+            JULIA_PASS(FPM.addPass(LateLowerGCPass()));
+            JULIA_PASS(FPM.addPass(FinalLowerGCPass()));
+            if (O.getSpeedupLevel() >= 2) {
+                FPM.addPass(DSEPass());
+                FPM.addPass(GVNPass());
+                FPM.addPass(SCCPPass());
+                FPM.addPass(DCEPass());
+            }
             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
         JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native)));
+        MPM.addPass(RemoveJuliaAddrspacesPass()); //TODO: Make this conditional on arches (GlobalISel doesn't like our addrsspaces)
         if (O.getSpeedupLevel() >= 1) {
             FunctionPassManager FPM;
             FPM.addPass(InstCombinePass());
             FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
             MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
-    } else {
+    } else if (!options.remove_ni) {
         JULIA_PASS(MPM.addPass(RemoveNIPass()));
     }
     MPM.addPass(AfterIntrinsicLoweringMarkerPass());
@@ -530,22 +550,23 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
 
 static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeCleanupMarkerPass());
-    if (O.getSpeedupLevel() >= 2) {
-        FunctionPassManager FPM;
-        JULIA_PASS(FPM.addPass(CombineMulAddPass()));
-        FPM.addPass(DivRemPairsPass());
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-    }
-    invokeOptimizerLastCallbacks(MPM, PB, O);
-    MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
-    addSanitizerPasses(MPM, O);
-    {
-        FunctionPassManager FPM;
-        JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
+    if (options.cleanup) {
         if (O.getSpeedupLevel() >= 2) {
-            FPM.addPass(GVNPass());
+            FunctionPassManager FPM;
+            FPM.addPass(DivRemPairsPass());
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+        invokeOptimizerLastCallbacks(MPM, PB, O);
+        MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+        addSanitizerPasses(MPM, O);
+        {
+            FunctionPassManager FPM;
+            JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
+            if (O.getSpeedupLevel() >= 2) {
+                FPM.addPass(GVNPass());
+            }
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
         }
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     MPM.addPass(AfterCleanupMarkerPass());
 }
@@ -553,9 +574,9 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi
 static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
     MPM.addPass(BeforeOptimizationMarkerPass());
     buildEarlySimplificationPipeline(MPM, PB, O, options);
-    MPM.addPass(AlwaysInlinerPass());
+    if (options.always_inline)
+        MPM.addPass(AlwaysInlinerPass());
     buildEarlyOptimizerPipeline(MPM, PB, O, options);
-    MPM.addPass(LowerSIMDLoopPass());
     {
         FunctionPassManager FPM;
         buildLoopOptimizerPipeline(FPM, PB, O, options);
@@ -563,7 +584,8 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
         if (O.getSpeedupLevel() >= 2) {
             buildVectorPipeline(FPM, PB, O, options);
         }
-        FPM.addPass(WarnMissedTransformationsPass());
+        if (options.warn_missed_transformations)
+            FPM.addPass(WarnMissedTransformationsPass());
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     buildIntrinsicLoweringPipeline(MPM, PB, O, options);
@@ -571,36 +593,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
     MPM.addPass(AfterOptimizationMarkerPass());
 }
 
-extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size,
-    int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT
-{
-    OptimizationLevel O;
-    switch (Size) {
-        case 1:
-            O = OptimizationLevel::Os;
-            break;
-        default:
-            O = OptimizationLevel::Oz;
-            break;
-        case 0:
-            switch (Speedup) {
-                case 0:
-                    O = OptimizationLevel::O0;
-                    break;
-                case 1:
-                    O = OptimizationLevel::O1;
-                    break;
-                case 2:
-                    O = OptimizationLevel::O2;
-                    break;
-                default:
-                    O = OptimizationLevel::O3;
-                    break;
-            }
-    }
-    buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
-                    OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only});
-}
 
 #undef JULIA_PASS
 
@@ -608,29 +600,29 @@ namespace {
 
     void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT {
 //Borrowed from LLVM PassBuilder.cpp:386
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS)                                         \
+#define MODULE_PASS(NAME, CREATE_PASS)                                         \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)      \
+#define MODULE_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)      \
 PIC.addClassToPassName(CLASS, NAME);
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS)                                       \
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)    \
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)    \
 PIC.addClassToPassName(CLASS, NAME);
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 #define LOOPNEST_PASS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS)                                           \
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)        \
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)        \
 PIC.addClassToPassName(CLASS, NAME);
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define CGSCC_PASS(NAME, CLASS, CREATE_PASS)                                          \
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
-#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)       \
+#define CGSCC_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER, PARAMS)       \
 PIC.addClassToPassName(CLASS, NAME);
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
 PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
@@ -673,13 +665,6 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
         PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization");
     }
 
-    auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT {
-        auto PIC = std::make_unique<PassInstrumentationCallbacks>();
-        adjustPIC(*PIC);
-        SI.registerCallbacks(*PIC);
-        return PIC;
-    }
-
     FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT {
 
         FunctionAnalysisManager FAM;
@@ -708,9 +693,8 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
 }
 
 NewPM::NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options) :
-    TM(std::move(TM)), SI(false), PIC(createPIC(SI)),
-    PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()),
-    MPM(createMPM(PB, O, options)), O(O) {}
+    TM(std::move(TM)), O(O), options(options), TimePasses() {}
+
 
 NewPM::~NewPM() = default;
 
@@ -736,14 +720,30 @@ void NewPM::run(Module &M) {
     //We must recreate the analysis managers every time
     //so that analyses from previous runs of the pass manager
     //do not hang around for the next run
-    AnalysisManagers AM{*TM, PB, O};
+    StandardInstrumentations SI(M.getContext(),false);
+    PassInstrumentationCallbacks PIC;
+    adjustPIC(PIC);
+    TimePasses.registerCallbacks(PIC);
+    FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
+    LoopAnalysisManager LAM;
+    CGSCCAnalysisManager CGAM;
+    ModuleAnalysisManager MAM;
+    SI.registerCallbacks(PIC, &MAM);
+    SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this
+    PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC);
+    PB.registerLoopAnalyses(LAM);
+    PB.registerFunctionAnalyses(FAM);
+    PB.registerCGSCCAnalyses(CGAM);
+    PB.registerModuleAnalyses(MAM);
+    PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+    ModulePassManager MPM = createMPM(PB, O, options);
 #ifndef __clang_gcanalyzer__ /* the analyzer cannot prove we have not added instrumentation callbacks with safepoints */
-    MPM.run(M, AM.MAM);
+    MPM.run(M, MAM);
 #endif
 }
 
 void NewPM::printTimers() {
-    SI.getTimePasses().print();
+    TimePasses.print();
 }
 
 OptimizationLevel getOptLevel(int optlevel) {
@@ -761,7 +761,7 @@ OptimizationLevel getOptLevel(int optlevel) {
 }
 
 //This part is also basically stolen from LLVM's PassBuilder.cpp file
-static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPipelineOptions(StringRef name) {
+static std::optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPipelineOptions(StringRef name) {
     if (name.consume_front("julia")) {
         auto O = OptimizationLevel::O2;
         auto options = OptimizationOptions::defaults();
@@ -773,7 +773,16 @@ static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJu
             OPTION(lower_intrinsics),
             OPTION(dump_native),
             OPTION(external_use),
-            OPTION(llvm_only)
+            OPTION(llvm_only),
+            OPTION(always_inline),
+            OPTION(enable_early_simplifications),
+            OPTION(enable_early_optimizations),
+            OPTION(enable_scalar_optimizations),
+            OPTION(enable_loop_optimizations),
+            OPTION(enable_vector_pipeline),
+            OPTION(remove_ni),
+            OPTION(cleanup),
+            OPTION(warn_missed_transformations)
 #undef OPTION
         };
         while (!name.empty()) {
@@ -811,7 +820,37 @@ static llvm::Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJu
         }
         return {{O, options}};
     }
-    return {};
+    return None;
+}
+
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Module);
+    if (verifyModule(M, &errs())) {
+        errs() << "Failed to verify module '" << M.getModuleIdentifier() << "', dumping entire module!\n\n";
+        errs() << M << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Function);
+    if (verifyFunction(F, &errs())) {
+        errs() << "Failed to verify function '" << F.getName() << "', dumping entire module!\n\n";
+        errs() << *F.getParent() << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Loop);
+    if (verifyFunction(*L.getHeader()->getParent(), &errs())) {
+        errs() << "Failed to verify loop '" << L << "', dumping entire module!\n\n";
+        errs() << *L.getHeader()->getModule() << "\n";
+        return true;
+    }
+    return false;
 }
 
 // new pass manager plugin
@@ -827,7 +866,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, FunctionPassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef FUNCTION_PASS
             if (Name.consume_front("GCInvariantVerifier")) {
@@ -849,7 +888,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, ModulePassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef MODULE_PASS
             if (Name.consume_front("LowerPTLSPass")) {
@@ -892,7 +931,7 @@ static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
     PB.registerPipelineParsingCallback(
         [](StringRef Name, LoopPassManager &PM,
            ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
 #include "llvm-julia-passes.inc"
 #undef LOOP_PASS
             return false;
@@ -908,3 +947,9 @@ extern "C" JL_DLLEXPORT_CODEGEN
 ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT {
       return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
 }
+
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
+{
+    PM->add(new TargetLibraryInfoWrapperPass(triple));
+    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
+}
diff --git a/src/precompile.c b/src/precompile.c
index a7174492cf0e1..c21cf5367fba6 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -35,27 +35,53 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
         //   uint64: length of src text
         //   char*: src text
         // At the end we write int32(0) as a terminal sentinel.
-        size_t len = jl_array_len(udeps);
+        size_t len = jl_array_nrows(udeps);
+        static jl_value_t *replace_depot_func = NULL;
+        if (!replace_depot_func)
+            replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+        static jl_value_t *normalize_depots_func = NULL;
+        if (!normalize_depots_func)
+            normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
         ios_t srctext;
+        jl_value_t *deptuple = NULL, *depots = NULL;
+        JL_GC_PUSH3(&deptuple, &udeps, &depots);
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        depots = jl_apply(&normalize_depots_func, 1);
+        ct->world_age = last_age;
         for (size_t i = 0; i < len; i++) {
-            jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
+            deptuple = jl_array_ptr_ref(udeps, i);
             jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
             // Dependencies declared with `include_dependency` are excluded
             // because these may not be Julia code (and could be huge)
             if (depmod != (jl_value_t*)jl_main_module) {
-                jl_value_t *dep = jl_fieldref(deptuple, 1);  // file abspath
-                const char *depstr = jl_string_data(dep);
-                if (!depstr[0])
+                jl_value_t *abspath = jl_fieldref(deptuple, 1);  // file abspath
+                const char *abspathstr = jl_string_data(abspath);
+                if (!abspathstr[0])
                     continue;
-                ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0);
+                ios_t *srctp = ios_file(&srctext, abspathstr, 1, 0, 0, 0);
                 if (!srctp) {
                     jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
-                            jl_string_data(dep));
+                              abspathstr);
                     continue;
                 }
-                size_t slen = jl_string_len(dep);
+
+                jl_value_t **replace_depot_args;
+                JL_GC_PUSHARGS(replace_depot_args, 3);
+                replace_depot_args[0] = replace_depot_func;
+                replace_depot_args[1] = abspath;
+                replace_depot_args[2] = depots;
+                jl_task_t *ct = jl_current_task;
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3);
+                ct->world_age = last_age;
+                JL_GC_POP();
+
+                size_t slen = jl_string_len(depalias);
                 write_int32(f, slen);
-                ios_write(f, depstr, slen);
+                ios_write(f, jl_string_data(depalias), slen);
                 posfile = ios_pos(f);
                 write_uint64(f, 0);   // placeholder for length of this file in bytes
                 uint64_t filelen = (uint64_t) ios_copyall(f, &srctext);
@@ -65,6 +91,7 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
                 ios_seek_end(f);
             }
         }
+        JL_GC_POP();
     }
     write_int32(f, 0); // mark the end of the source text
 }
@@ -75,7 +102,12 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
         return;
     }
 
-    jl_task_wait_empty();
+    jl_task_wait_empty(); // wait for most work to finish (except possibly finalizers)
+    jl_gc_collect(JL_GC_FULL);
+    jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers
+    jl_task_t *ct = jl_current_task;
+    jl_gc_enable_finalizers(ct, 0); // now disable finalizers, as they could schedule more work or make other unexpected changes to reachability
+    jl_task_wait_empty(); // then make sure we are the only thread alive that could be running user code past here
 
     if (!jl_module_init_order) {
         jl_printf(JL_STDERR, "WARNING: --output requested, but no modules defined during run\n");
@@ -86,21 +118,23 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
     jl_array_t *udeps = NULL;
     JL_GC_PUSH2(&worklist, &udeps);
     jl_module_init_order = jl_alloc_vec_any(0);
-    int i, l = jl_array_len(worklist);
+    int i, l = jl_array_nrows(worklist);
     for (i = 0; i < l; i++) {
-        jl_value_t *m = jl_ptrarrayref(worklist, i);
+        jl_value_t *m = jl_array_ptr_ref(worklist, i);
         jl_value_t *f = jl_get_global((jl_module_t*)m, jl_symbol("__init__"));
         if (f) {
             jl_array_ptr_1d_push(jl_module_init_order, m);
             int setting = jl_get_module_compile((jl_module_t*)m);
-            if (setting != JL_OPTIONS_COMPILE_OFF &&
-                setting != JL_OPTIONS_COMPILE_MIN) {
+            if ((setting != JL_OPTIONS_COMPILE_OFF && (jl_options.trim ||
+                (setting != JL_OPTIONS_COMPILE_MIN)))) {
                 // TODO: this would be better handled if moved entirely to jl_precompile
                 // since it's a slightly duplication of effort
                 jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
                 JL_GC_PUSH1(&tt);
                 tt = jl_apply_tuple_type_v(&tt, 1);
                 jl_compile_hint((jl_tupletype_t*)tt);
+                if (jl_options.trim)
+                    jl_add_entrypoint((jl_tupletype_t*)tt);
                 JL_GC_POP();
             }
         }
@@ -147,7 +181,7 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
                         jl_options.outputunoptbc,
                         jl_options.outputo,
                         jl_options.outputasm,
-                        z, targets);
+                        z, targets, NULL);
         jl_postoutput_hook();
     }
 
@@ -165,7 +199,12 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
             jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
         }
     }
+    if (jl_options.trim) {
+        exit(0); // Some finalizers need to run and we've blown up the bindings table
+        // TODO: Is this still needed
+    }
     JL_GC_POP();
+    jl_gc_enable_finalizers(ct, 1);
 }
 
 #ifdef __cplusplus
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 055ec4b3330f1..8906b3eb586d3 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -1,6 +1,8 @@
-// f{<:Union{...}}(...) is a common pattern
-// and expanding the Union may give a leaf function
-static void _compile_all_tvar_union(jl_value_t *methsig)
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// f(...) where {T<:Union{...}} is a common pattern
+// and expanding the Union may give some leaf functions
+static int _compile_all_tvar_union(jl_value_t *methsig)
 {
     int tvarslen = jl_subtype_env_size(methsig);
     jl_value_t *sigbody = methsig;
@@ -13,79 +15,86 @@ static void _compile_all_tvar_union(jl_value_t *methsig)
         assert(jl_is_unionall(sigbody));
         idx[i] = 0;
         env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var;
-        env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option
+        jl_value_t *tv = env[2 * i];
+        while (jl_is_typevar(tv))
+            tv = ((jl_tvar_t*)tv)->ub;
+        if (jl_is_abstracttype(tv) && !jl_is_type_type(tv)) {
+            JL_GC_POP();
+            return 0; // Any as TypeVar is common and not useful here to try to analyze further
+        }
+        env[2 * i + 1] = tv;
         sigbody = ((jl_unionall_t*)sigbody)->body;
     }
 
-    for (i = 0; i < tvarslen; /* incremented by inner loop */) {
-        jl_value_t **sig = &roots[0];
+    int all = 1;
+    int incr = 0;
+    while (!incr) {
+        for (i = 0, incr = 1; i < tvarslen; i++) {
+            jl_value_t *tv = env[2 * i];
+            while (jl_is_typevar(tv))
+                tv = ((jl_tvar_t*)tv)->ub;
+            if (jl_is_uniontype(tv)) {
+                size_t l = jl_count_union_components(tv);
+                size_t j = idx[i];
+                env[2 * i + 1] = jl_nth_union_component(tv, j);
+                ++j;
+                if (incr) {
+                    if (j == l) {
+                        idx[i] = 0;
+                    }
+                    else {
+                        idx[i] = j;
+                        incr = 0;
+                    }
+                }
+            }
+        }
+        jl_value_t *sig = NULL;
         JL_TRY {
             // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ?
             // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types
-            *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen);
+            sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen);
         }
         JL_CATCH {
-            goto getnext; // sigh, we found an invalid type signature. should we warn the user?
+            sig = NULL;
         }
-        if (!jl_has_concrete_subtype(*sig))
-            goto getnext; // signature wouldn't be callable / is invalid -- skip it
-        if (jl_is_concrete_type(*sig)) {
-            if (jl_compile_hint((jl_tupletype_t *)*sig))
-                goto getnext; // success
-        }
-
-    getnext:
-        for (i = 0; i < tvarslen; i++) {
-            jl_tvar_t *tv = (jl_tvar_t*)env[2 * i];
-            if (jl_is_uniontype(tv->ub)) {
-                size_t l = jl_count_union_components(tv->ub);
-                size_t j = idx[i];
-                if (j == l) {
-                    env[2 * i + 1] = jl_bottom_type;
-                    idx[i] = 0;
-                }
-                else {
-                    jl_value_t *ty = jl_nth_union_component(tv->ub, j);
-                    if (!jl_is_concrete_type(ty))
-                        ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty);
-                    env[2 * i + 1] = ty;
-                    idx[i] = j + 1;
-                    break;
-                }
-            }
-            else {
-                env[2 * i + 1] = (jl_value_t*)tv;
-            }
+        if (sig) {
+            roots[0] = sig;
+            if (jl_is_datatype(sig) && jl_has_concrete_subtype(sig))
+                all = all && jl_compile_hint((jl_tupletype_t*)sig);
+            else
+                all = 0;
         }
     }
     JL_GC_POP();
+    return all;
 }
 
 // f(::Union{...}, ...) is a common pattern
 // and expanding the Union may give a leaf function
-static void _compile_all_union(jl_value_t *sig)
+static int _compile_all_union(jl_value_t *sig)
 {
     jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig);
     size_t count_unions = 0;
+    size_t union_size = 1;
     size_t i, l = jl_svec_len(sigbody->parameters);
     jl_svec_t *p = NULL;
     jl_value_t *methsig = NULL;
 
     for (i = 0; i < l; i++) {
         jl_value_t *ty = jl_svecref(sigbody->parameters, i);
-        if (jl_is_uniontype(ty))
-            ++count_unions;
-        else if (ty == jl_bottom_type)
-            return; // why does this method exist?
-        else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) &&
-                 ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) ||
-                  ((jl_datatype_t*)ty)->name == jl_type_typename))
-            return; // no amount of union splitting will make this a leaftype signature
+        if (jl_is_uniontype(ty)) {
+            count_unions += 1;
+            union_size *= jl_count_union_components(ty);
+        }
+        else if (jl_is_datatype(ty) &&
+                 ((!((jl_datatype_t*)ty)->isconcretetype || jl_is_kind(ty)) &&
+                  ((jl_datatype_t*)ty)->name != jl_type_typename))
+            return 0; // no amount of union splitting will make this a dispatch signature
     }
 
-    if (count_unions == 0 || count_unions >= 6) {
-        _compile_all_tvar_union(sig);
-        return;
+    if (union_size <= 1 || union_size > 8) {
+        return _compile_all_tvar_union(sig);
     }
 
     int *idx = (int*)alloca(sizeof(int) * count_unions);
@@ -93,6 +102,7 @@ static void _compile_all_union(jl_value_t *sig)
         idx[i] = 0;
     }
 
+    int all = 1;
     JL_GC_PUSH2(&p, &methsig);
     int idx_ctr = 0, incr = 0;
     while (!incr) {
@@ -120,12 +130,14 @@ static void _compile_all_union(jl_value_t *sig)
                 jl_svecset(p, i, ty);
             }
         }
-        methsig = jl_apply_tuple_type(p);
+        methsig = jl_apply_tuple_type(p, 1);
         methsig = jl_rewrap_unionall(methsig, sig);
-        _compile_all_tvar_union(methsig);
+        if (!_compile_all_tvar_union(methsig))
+            all = 0;
     }
 
     JL_GC_POP();
+    return all;
 }
 
 static int compile_all_collect__(jl_typemap_entry_t *ml, void *env)
@@ -147,29 +159,32 @@ static int compile_all_collect_(jl_methtable_t *mt, void *env)
     return 1;
 }
 
-static void jl_compile_all_defs(jl_array_t *mis)
+static void jl_compile_all_defs(jl_array_t *mis, int all)
 {
     jl_array_t *allmeths = jl_alloc_vec_any(0);
     JL_GC_PUSH1(&allmeths);
 
     jl_foreach_reachable_mtable(compile_all_collect_, allmeths);
 
-    size_t i, l = jl_array_len(allmeths);
+    size_t world =  jl_atomic_load_acquire(&jl_world_counter);
+    size_t i, l = jl_array_nrows(allmeths);
     for (i = 0; i < l; i++) {
         jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i);
         if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) {
             // method has a single compilable specialization, e.g. its definition
             // signature is concrete. in this case we can just hint it.
-            jl_compile_hint((jl_tupletype_t*)m->sig);
+            jl_compile_method_sig(m, m->sig, jl_emptysvec, world);
         }
         else {
             // first try to create leaf signatures from the signature declaration and compile those
             _compile_all_union(m->sig);
 
-            // finally, compile a fully generic fallback that can work for all arguments
-            jl_method_instance_t *unspec = jl_get_unspecialized(m);
-            if (unspec)
-                jl_array_ptr_1d_push(mis, (jl_value_t*)unspec);
+            if (all) {
+                // finally, compile a fully generic fallback that can work for all arguments (even invoke)
+                jl_method_instance_t *unspec = jl_get_unspecialized(m);
+                if (unspec)
+                    jl_array_ptr_1d_push(mis, (jl_value_t*)unspec);
+            }
         }
     }
 
@@ -182,12 +197,14 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
         int do_compile = 0;
-        if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
+        if (codeinst->owner != jl_nothing) {
+            // TODO(vchuravy) native code caching for foreign interpreters
+        }
+        else if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
             jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
             if (inferred &&
-                inferred != jl_nothing &&
-                jl_ir_flag_inferred(inferred) &&
-                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
+                (jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL || inferred == jl_nothing ||
+                 ((jl_is_string(inferred) || jl_is_code_info(inferred)) && jl_ir_inlining_cost(inferred) == UINT16_MAX))) {
                 do_compile = 1;
             }
             else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
@@ -243,14 +260,12 @@ static void *jl_precompile_(jl_array_t *m, int external_linkage)
     jl_method_instance_t *mi = NULL;
     JL_GC_PUSH2(&m2, &mi);
     m2 = jl_alloc_vec_any(0);
-    for (size_t i = 0; i < jl_array_len(m); i++) {
+    for (size_t i = 0; i < jl_array_nrows(m); i++) {
         jl_value_t *item = jl_array_ptr_ref(m, i);
         if (jl_is_method_instance(item)) {
             mi = (jl_method_instance_t*)item;
-            size_t min_world = 0;
-            size_t max_world = ~(size_t)0;
             if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method))
-                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
+                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), 0);
             if (mi)
                 jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
         }
@@ -260,8 +275,7 @@ static void *jl_precompile_(jl_array_t *m, int external_linkage)
             jl_array_ptr_1d_push(m2, item);
         }
     }
-    void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage,
-                                         jl_atomic_load_acquire(&jl_world_counter));
+    void *native_code = jl_create_native(m2, NULL, 0, external_linkage, jl_atomic_load_acquire(&jl_world_counter));
     JL_GC_POP();
     return native_code;
 }
@@ -271,15 +285,20 @@ static void *jl_precompile(int all)
     // array of MethodInstances and ccallable aliases to include in the output
     jl_array_t *m = jl_alloc_vec_any(0);
     JL_GC_PUSH1(&m);
-    if (all)
-        jl_compile_all_defs(m);
+    jl_compile_all_defs(m, all);
     jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m);
     void *native_code = jl_precompile_(m, 0);
     JL_GC_POP();
     return native_code;
 }
 
-static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations)
+static int suppress_precompile = 0;
+JL_DLLEXPORT void jl_suppress_precompile(int suppress)
+{
+    suppress_precompile = suppress;
+}
+
+static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_ext_cis)
 {
     if (!worklist)
         return NULL;
@@ -287,35 +306,115 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
     // type signatures that were inferred but haven't been compiled
     jl_array_t *m = jl_alloc_vec_any(0);
     JL_GC_PUSH1(&m);
-    size_t i, n = jl_array_len(worklist);
-    for (i = 0; i < n; i++) {
-        jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
-        assert(jl_is_module(mod));
-        foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m);
-    }
-    n = jl_array_len(extext_methods);
-    for (i = 0; i < n; i++) {
-        jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i);
-        assert(jl_is_method(method));
-        jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
-        if (!jl_is_svec(specializations)) {
-            precompile_enq_specialization_((jl_method_instance_t*)specializations, m);
+    if (!suppress_precompile) {
+        size_t i, n = jl_array_nrows(worklist);
+        for (i = 0; i < n; i++) {
+            jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
+            assert(jl_is_module(mod));
+            foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m);
         }
-        else {
-            size_t j, l = jl_svec_len(specializations);
-            for (j = 0; j < l; j++) {
-                jl_value_t *mi = jl_svecref(specializations, j);
-                if (mi != jl_nothing)
-                    precompile_enq_specialization_((jl_method_instance_t*)mi, m);
+        n = jl_array_nrows(extext_methods);
+        for (i = 0; i < n; i++) {
+            jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i);
+            assert(jl_is_method(method));
+            jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+            if (!jl_is_svec(specializations)) {
+                precompile_enq_specialization_((jl_method_instance_t*)specializations, m);
+            }
+            else {
+                size_t j, l = jl_svec_len(specializations);
+                for (j = 0; j < l; j++) {
+                    jl_value_t *mi = jl_svecref(specializations, j);
+                    if (mi != jl_nothing)
+                        precompile_enq_specialization_((jl_method_instance_t*)mi, m);
+                }
+            }
+        }
+        if (new_ext_cis) {
+            n = jl_array_nrows(new_ext_cis);
+            for (i = 0; i < n; i++) {
+                jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i);
+                precompile_enq_specialization_(jl_get_ci_mi(ci), m);
             }
         }
-    }
-    n = jl_array_len(new_specializations);
-    for (i = 0; i < n; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
-        precompile_enq_specialization_(ci->def, m);
     }
     void *native_code = jl_precompile_(m, 1);
     JL_GC_POP();
     return native_code;
 }
+
+static int enq_ccallable_entrypoints_(jl_typemap_entry_t *def, void *closure)
+{
+    jl_method_t *m = def->func.method;
+    if (m->external_mt)
+        return 1;
+    if (m->ccallable)
+        jl_add_entrypoint((jl_tupletype_t*)jl_svecref(m->ccallable, 1));
+    return 1;
+}
+
+static int enq_ccallable_entrypoints(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), enq_ccallable_entrypoints_, env);
+}
+
+JL_DLLEXPORT void jl_add_ccallable_entrypoints(void)
+{
+    jl_foreach_reachable_mtable(enq_ccallable_entrypoints, NULL);
+}
+
+static void *jl_precompile_trimmed(size_t world)
+{
+    // array of MethodInstances and ccallable aliases to include in the output
+    jl_array_t *m = jl_alloc_vec_any(0);
+    jl_value_t *ccallable = NULL;
+    JL_GC_PUSH2(&m, &ccallable);
+    jl_method_instance_t *mi;
+    while (1) {
+        mi = (jl_method_instance_t*)arraylist_pop(jl_entrypoint_mis);
+        if (mi == NULL)
+            break;
+        assert(jl_is_method_instance(mi));
+
+        jl_array_ptr_1d_push(m, (jl_value_t*)mi);
+        ccallable = (jl_value_t *)mi->def.method->ccallable;
+        if (ccallable)
+            jl_array_ptr_1d_push(m, ccallable);
+    }
+
+    void *native_code = jl_create_native(m, NULL, jl_options.trim, 0, world);
+    JL_GC_POP();
+    return native_code;
+}
+
+static void jl_rebuild_methtables(arraylist_t* MIs, htable_t* mtables)
+{
+    size_t i;
+    for (i = 0; i < MIs->len; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)MIs->items[i];
+        jl_method_t *m = mi->def.method;
+        jl_methtable_t *old_mt = jl_method_get_table(m);
+        if ((jl_value_t *)old_mt == jl_nothing)
+            continue;
+        jl_sym_t *name = old_mt->name;
+        if (!ptrhash_has(mtables, old_mt))
+            ptrhash_put(mtables, old_mt, jl_new_method_table(name, m->module));
+        jl_methtable_t *mt = (jl_methtable_t*)ptrhash_get(mtables, old_mt);
+        size_t world =  jl_atomic_load_acquire(&jl_world_counter);
+        jl_value_t * lookup = jl_methtable_lookup(mt, m->sig, world);
+        // Check if the method is already in the new table, if not then insert it there
+        if (lookup == jl_nothing || (jl_method_t*)lookup != m) {
+            //TODO: should this be a function like unsafe_insert_method?
+            size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+            size_t max_world = jl_atomic_load_relaxed(&m->deleted_world);
+            jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+            jl_atomic_store_relaxed(&m->deleted_world, 1);
+            jl_typemap_entry_t *newentry = jl_method_table_add(mt, m, NULL);
+            jl_atomic_store_relaxed(&m->primary_world, min_world);
+            jl_atomic_store_relaxed(&m->deleted_world, max_world);
+            jl_atomic_store_relaxed(&newentry->min_world, min_world);
+            jl_atomic_store_relaxed(&newentry->max_world, max_world);
+        }
+    }
+
+}
diff --git a/src/processor.cpp b/src/processor.cpp
index 24a434af91ad3..3edebcc2f3ae6 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -4,6 +4,10 @@
 
 #include "llvm-version.h"
 #include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/MathExtras.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -107,13 +111,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx)
 }
 
 template<typename T>
-static inline void unset_bits(T &bits)
+static inline void unset_bits(T &bits) JL_NOTSAFEPOINT
 {
     (void)bits;
 }
 
 template<typename T, typename T1, typename... Rest>
-static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest)
+static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT
 {
     auto bitidx = static_cast<uint32_t>(_bitidx);
     auto u32idx = bitidx / 32;
@@ -142,7 +146,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val)
 template<size_t n>
 struct FeatureList {
     uint32_t eles[n];
-    uint32_t &operator[](size_t pos)
+    uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT
     {
         return eles[pos];
     }
@@ -154,7 +158,7 @@ struct FeatureList {
     {
         int cnt = 0;
         for (size_t i = 0; i < n; i++)
-            cnt += llvm::countPopulation(eles[i]);
+            cnt += llvm::popcount(eles[i]);
         return cnt;
     }
     inline bool empty() const
@@ -255,7 +259,7 @@ static inline void mask_features(const FeatureList<n> masks, uint32_t *features)
 }
 
 // Turn feature list to a string the LLVM accept
-static inline std::string join_feature_strs(const std::vector<std::string> &strs)
+static inline std::string join_feature_strs(const llvm::ArrayRef<std::string> &strs)
 {
     size_t nstr = strs.size();
     if (!nstr)
@@ -275,7 +279,7 @@ static inline void append_ext_features(std::string &features, const std::string
     features.append(ext_features);
 }
 
-static inline void append_ext_features(std::vector<std::string> &features,
+static inline void append_ext_features(llvm::SmallVectorImpl<std::string> &features,
                                        const std::string &ext_features)
 {
     if (ext_features.empty())
@@ -297,12 +301,6 @@ static inline void append_ext_features(std::vector<std::string> &features,
  * Target specific type/constant definitions, always enable.
  */
 
-struct FeatureName {
-    const char *name;
-    uint32_t bit; // bit index into a `uint32_t` array;
-    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
-};
-
 template<typename CPU, size_t n>
 struct CPUSpec {
     const char *name;
@@ -391,7 +389,7 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
             return feature.bit;
         }
     }
-    return (uint32_t)-1;
+    return UINT32_MAX;
 }
 
 // This is how we save the target identification.
@@ -399,13 +397,11 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
 // 1. CPU ID is less stable (they are not bound to hardware/OS API)
 // 2. We need to support CPU names that are not recognized by us and therefore doesn't have an ID
 // 3. CPU name is trivial to parse
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         uint32_t nfeature,
-                                                         const uint32_t *features_en,
-                                                         const uint32_t *features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, uint32_t nfeature, const uint32_t *features_en,
+                      const uint32_t *features_dis, llvm::StringRef ext_features)
 {
-    std::vector<uint8_t> res;
+    llvm::SmallVector<uint8_t, 0> res;
     auto add_data = [&] (const void *data, size_t sz) {
         if (sz == 0)
             return;
@@ -426,10 +422,9 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
 }
 
 template<size_t n>
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         const FeatureList<n> &features_en,
-                                                         const FeatureList<n> &features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, const FeatureList<n> &features_en,
+                      const FeatureList<n> &features_dis, llvm::StringRef ext_features)
 {
     return serialize_target_data(name, n, &features_en[0], &features_dis[0], ext_features);
 }
@@ -448,7 +443,7 @@ struct TargetData {
 // In addition to the serialized data, the first `uint32_t` gives the number of targets saved
 // and each target has a `uint32_t` flag before the serialized target data.
 template<size_t n>
-static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *data)
+static inline llvm::SmallVector<TargetData<n>, 0> deserialize_target_data(const uint8_t *data)
 {
     auto load_data = [&] (void *dest, size_t sz) {
         memcpy(dest, data, sz);
@@ -463,7 +458,7 @@ static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *
     };
     uint32_t ntarget;
     load_data(&ntarget, 4);
-    std::vector<TargetData<n>> res(ntarget);
+    llvm::SmallVector<TargetData<n>, 0> res(ntarget);
     for (uint32_t i = 0; i < ntarget; i++) {
         auto &target = res[i];
         load_data(&target.en.flags, 4);
@@ -505,12 +500,12 @@ static inline int get_clone_base(const char *start, const char *end)
 // Parse cmdline string. This handles `clone_all` and `base` special features.
 // Other feature names will be passed to `feature_cb` for target dependent parsing.
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>>
+static inline llvm::SmallVector<TargetData<n>, 0>
 parse_cmdline(const char *option, F &&feature_cb)
 {
     if (!option)
         option = "native";
-    std::vector<TargetData<n>> res;
+    llvm::SmallVector<TargetData<n>, 0> res;
     TargetData<n> arg{};
     auto reset_arg = [&] {
         res.push_back(arg);
@@ -617,13 +612,18 @@ parse_cmdline(const char *option, F &&feature_cb)
 
 // Cached version of command line parsing
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
+static inline llvm::SmallVector<TargetData<n>, 0> &get_cmdline_targets(F &&feature_cb)
 {
-    static std::vector<TargetData<n>> targets =
+    static llvm::SmallVector<TargetData<n>, 0> targets =
         parse_cmdline<n>(jl_options.cpu_target, std::forward<F>(feature_cb));
     return targets;
 }
 
+extern "C" {
+void *image_pointers_unavailable;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(image_pointers_unavailable) jl_image_pointers;
+}
+
 // Load sysimg, use the `callback` for dispatch and perform all relocations
 // for the selected target.
 template<typename F>
@@ -633,57 +633,60 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
     jl_image_t res{};
 
     const jl_image_pointers_t *pointers;
-    jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);
+    if (hdl == jl_exe_handle && &jl_image_pointers != JL_WEAK_SYMBOL_DEFAULT(image_pointers_unavailable))
+        pointers = (const jl_image_pointers_t *)&jl_image_pointers;
+    else
+        jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);
 
     const void *ids = pointers->target_data;
-    uint32_t target_idx = callback(ids);
+    jl_value_t* rejection_reason = nullptr;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t target_idx = callback(ids, &rejection_reason);
+    if (target_idx == UINT32_MAX) {
+        jl_error(jl_string_ptr(rejection_reason));
+    }
+    JL_GC_POP();
 
     if (pointers->header->version != 1) {
         jl_error("Image file is not compatible with this version of Julia");
     }
 
-    std::vector<const char *> fvars(pointers->header->nfvars);
-    std::vector<const char *> gvars(pointers->header->ngvars);
+    llvm::SmallVector<void*, 0> fvars(pointers->header->nfvars);
+    llvm::SmallVector<const char*, 0> gvars(pointers->header->ngvars);
 
-    std::vector<std::pair<uint32_t, const char *>> clones;
+    llvm::SmallVector<std::pair<uint32_t, void*>, 0> clones;
 
     for (unsigned i = 0; i < pointers->header->nshards; i++) {
         auto shard = pointers->shards[i];
 
-        // .data base
-        char *data_base = (char *)shard.gvar_base;
-
-        // .text base
-        const char *text_base = shard.fvar_base;
-
-        const int32_t *offsets = shard.fvar_offsets;
-        uint32_t nfunc = offsets[0];
+        void **fvar_shard = shard.fvar_ptrs;
+        uintptr_t nfunc = *shard.fvar_count;
         assert(nfunc <= pointers->header->nfvars);
-        offsets++;
         const int32_t *reloc_slots = shard.clone_slots;
         const uint32_t nreloc = reloc_slots[0];
-        reloc_slots += 1;
+        reloc_slots++;
         const uint32_t *clone_idxs = shard.clone_idxs;
-        const int32_t *clone_offsets = shard.clone_offsets;
+        void **clone_ptrs = shard.clone_ptrs;
         uint32_t tag_len = clone_idxs[0];
-        clone_idxs += 1;
+        clone_idxs++;
 
         assert(tag_len & jl_sysimg_tag_mask);
-        std::vector<const int32_t*> base_offsets = {offsets};
+        llvm::SmallVector<void**, 0> base_ptrs(0);
+        base_ptrs.push_back(fvar_shard);
         // Find target
-        for (uint32_t i = 0;i < target_idx;i++) {
+        for (uint32_t i = 0; i < target_idx; i++) {
             uint32_t len = jl_sysimg_val_mask & tag_len;
             if (jl_sysimg_tag_mask & tag_len) {
-                if (i != 0)
-                    clone_offsets += nfunc;
                 clone_idxs += len + 1;
+                if (i != 0)
+                    clone_ptrs += nfunc;
             }
             else {
-                clone_offsets += len;
+                clone_ptrs += len;
                 clone_idxs += len + 2;
             }
             tag_len = clone_idxs[-1];
-            base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
+            base_ptrs.push_back(tag_len & jl_sysimg_tag_mask ? clone_ptrs : nullptr);
         }
 
         bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
@@ -691,22 +694,22 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         if (clone_all) {
             // clone_all
             if (target_idx != 0) {
-                offsets = clone_offsets;
+                fvar_shard = clone_ptrs;
             }
         }
         else {
             uint32_t base_idx = clone_idxs[0];
             assert(base_idx < target_idx);
             if (target_idx != 0) {
-                offsets = base_offsets[base_idx];
-                assert(offsets);
+                fvar_shard = base_ptrs[base_idx];
+                assert(fvar_shard);
             }
             clone_idxs++;
             unsigned start = clones.size();
             clones.resize(start + tag_len);
             auto idxs = shard.fvar_idxs;
             for (unsigned i = 0; i < tag_len; i++) {
-                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_offsets[i] + text_base};
+                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_ptrs[i]};
             }
         }
         // Do relocation
@@ -714,13 +717,13 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         uint32_t len = jl_sysimg_val_mask & tag_len;
         for (uint32_t i = 0; i < len; i++) {
             uint32_t idx = clone_idxs[i];
-            int32_t offset;
+            void *fptr;
             if (clone_all) {
-                offset = offsets[idx];
+                fptr = fvar_shard[idx];
             }
             else if (idx & jl_sysimg_tag_mask) {
                 idx = idx & jl_sysimg_val_mask;
-                offset = clone_offsets[i];
+                fptr = clone_ptrs[i];
             }
             else {
                 continue;
@@ -730,9 +733,10 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
                 auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
                 if (reloc_idx == idx) {
                     found = true;
+                    const char *data_base = (const char*)shard.clone_slots;
                     auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
                     assert(slot);
-                    *slot = offset + text_base;
+                    *slot = fptr;
                 }
                 else if (reloc_idx > idx) {
                     break;
@@ -744,34 +748,35 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
 
         auto fidxs = shard.fvar_idxs;
         for (uint32_t i = 0; i < nfunc; i++) {
-            fvars[fidxs[i]] = text_base + offsets[i];
+            fvars[fidxs[i]] = fvar_shard[i];
         }
 
+        // .data base
         auto gidxs = shard.gvar_idxs;
         unsigned ngvars = shard.gvar_offsets[0];
         assert(ngvars <= pointers->header->ngvars);
+        char *data_base = (char*)shard.gvar_offsets;
         for (uint32_t i = 0; i < ngvars; i++) {
             gvars[gidxs[i]] = data_base + shard.gvar_offsets[i+1];
         }
     }
 
     if (!fvars.empty()) {
-        auto offsets = (int32_t *) malloc(sizeof(int32_t) * fvars.size());
-        res.fptrs.base = fvars[0];
+        auto ptrs = (void**) malloc(sizeof(void*) * fvars.size());
         for (size_t i = 0; i < fvars.size(); i++) {
             assert(fvars[i] && "Missing function pointer!");
-            offsets[i] = fvars[i] - res.fptrs.base;
+            ptrs[i] = fvars[i];
         }
-        res.fptrs.offsets = offsets;
-        res.fptrs.noffsets = fvars.size();
+        res.fptrs.ptrs = ptrs;
+        res.fptrs.nptrs = fvars.size();
     }
 
     if (!gvars.empty()) {
-        auto offsets = (int32_t *) malloc(sizeof(int32_t) * gvars.size());
-        res.gvars_base = (uintptr_t *)gvars[0];
+        auto offsets = (int32_t*)malloc(sizeof(int32_t) * gvars.size());
+        res.gvars_base = (const char*)pointers->header;
         for (size_t i = 0; i < gvars.size(); i++) {
             assert(gvars[i] && "Missing global variable pointer!");
-            offsets[i] = gvars[i] - (const char *)res.gvars_base;
+            offsets[i] = gvars[i] - res.gvars_base;
         }
         res.gvars_offsets = offsets;
         res.ngvars = gvars.size();
@@ -779,15 +784,18 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
 
     if (!clones.empty()) {
         assert(!fvars.empty());
-        std::sort(clones.begin(), clones.end());
-        auto clone_offsets = (int32_t *) malloc(sizeof(int32_t) * clones.size());
+        std::sort(clones.begin(), clones.end(),
+            [](const std::pair<uint32_t, const void*> &a, const std::pair<uint32_t, const void*> &b) {
+                return (a.first & jl_sysimg_val_mask) < (b.first & jl_sysimg_val_mask);
+        });
+        auto clone_ptrs = (void**) malloc(sizeof(void*) * clones.size());
         auto clone_idxs = (uint32_t *) malloc(sizeof(uint32_t) * clones.size());
         for (size_t i = 0; i < clones.size(); i++) {
             clone_idxs[i] = clones[i].first;
-            clone_offsets[i] = clones[i].second - res.fptrs.base;
+            clone_ptrs[i] = clones[i].second;
         }
         res.fptrs.clone_idxs = clone_idxs;
-        res.fptrs.clone_offsets = clone_offsets;
+        res.fptrs.clone_ptrs = clone_ptrs;
         res.fptrs.nclones = clones.size();
     }
 
@@ -812,7 +820,7 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
         *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
     }
 
-    res.small_typeof = pointers->small_typeof;
+    res.jl_small_typeof = pointers->jl_small_typeof;
 
     return res;
 }
@@ -848,24 +856,27 @@ static inline void check_cmdline(T &&cmdline, bool imaging)
 }
 
 struct SysimgMatch {
-    uint32_t best_idx{(uint32_t)-1};
+    uint32_t best_idx{UINT32_MAX};
     int vreg_size{0};
 };
 
 // Find the best match in the sysimg.
 // Select the best one based on the largest vector register and largest compatible feature set.
 template<typename S, typename T, typename F>
-static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size)
+static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason)
 {
     SysimgMatch match;
     bool match_name = false;
     int feature_size = 0;
+    llvm::SmallVector<const char *, 0> rejection_reasons;
+    rejection_reasons.reserve(sysimg.size());
     for (uint32_t i = 0; i < sysimg.size(); i++) {
         auto &imgt = sysimg[i];
         if (!(imgt.en.features & target.dis.features).empty()) {
             // Check sysimg enabled features against runtime disabled features
             // This is valid (and all what we can do)
             // even if one or both of the targets are unknown.
+            rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n");
             continue;
         }
         if (imgt.name == target.name) {
@@ -876,25 +887,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
             }
         }
         else if (match_name) {
+            rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n");
             continue;
         }
         int new_vsz = max_vector_size(imgt.en.features);
-        if (match.vreg_size > new_vsz)
+        if (match.vreg_size > new_vsz) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n");
             continue;
+        }
         int new_feature_size = imgt.en.features.nbits();
         if (match.vreg_size < new_vsz) {
             match.best_idx = i;
             match.vreg_size = new_vsz;
             feature_size = new_feature_size;
+            rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n");
             continue;
         }
-        if (new_feature_size < feature_size)
+        if (new_feature_size < feature_size) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n");
             continue;
+        }
         match.best_idx = i;
         feature_size = new_feature_size;
+        rejection_reasons.push_back("Updating best match to this target\n");
+    }
+    if (match.best_idx == UINT32_MAX) {
+        // Construct a nice error message for debugging purposes
+        std::string error_msg = "Unable to find compatible target in cached code image.\n";
+        for (size_t i = 0; i < rejection_reasons.size(); i++) {
+            error_msg += "Target ";
+            error_msg += std::to_string(i);
+            error_msg += " (";
+            error_msg += sysimg[i].name;
+            error_msg += "): ";
+            error_msg += rejection_reasons[i];
+        }
+        if (rejection_reason)
+            *rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size());
     }
-    if (match.best_idx == (uint32_t)-1)
-        jl_error("Unable to find compatible target in system image.");
     return match;
 }
 
@@ -933,6 +963,47 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
 
 }
 
+static std::string jl_get_cpu_name_llvm(void)
+{
+    return llvm::sys::getHostCPUName().str();
+}
+
+static std::string jl_get_cpu_features_llvm(void)
+{
+#if JL_LLVM_VERSION >= 190000
+    auto HostFeatures = llvm::sys::getHostCPUFeatures();
+#else
+    llvm::StringMap<bool> HostFeatures;
+    llvm::sys::getHostCPUFeatures(HostFeatures);
+#endif
+    std::string attr;
+    for (auto &ele: HostFeatures) {
+        if (ele.getValue()) {
+            if (!attr.empty()) {
+                attr.append(",+");
+            }
+            else {
+                attr.append("+");
+            }
+            attr.append(ele.getKey().str());
+        }
+    }
+    // Explicitly disabled features need to be added at the end so that
+    // they are not re-enabled by other features that implies them by default.
+    for (auto &ele: HostFeatures) {
+        if (!ele.getValue()) {
+            if (!attr.empty()) {
+                attr.append(",-");
+            }
+            else {
+                attr.append("-");
+            }
+            attr.append(ele.getKey().str());
+        }
+    }
+    return attr;
+}
+
 #if defined(_CPU_X86_) || defined(_CPU_X86_64_)
 
 #include "processor_x86.cpp"
@@ -946,3 +1017,40 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
 #include "processor_fallback.cpp"
 
 #endif
+
+JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+{
+    return jl_cstr_to_string(host_cpu_name().c_str());
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+{
+    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+}
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() {
+    auto specs = jl_get_llvm_clone_targets();
+    const uint32_t base_flags = 0;
+    llvm::SmallVector<uint8_t, 0> data;
+    auto push_i32 = [&] (uint32_t v) {
+        uint8_t buff[4];
+        memcpy(buff, &v, 4);
+        data.insert(data.end(), buff, buff + 4);
+    };
+    push_i32(specs.size());
+    for (uint32_t i = 0; i < specs.size(); i++) {
+        push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
+        auto &specdata = specs[i].data;
+        data.insert(data.end(), specdata.begin(), specdata.end());
+    }
+
+    jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size());
+    uint8_t *out = jl_array_data(arr, uint8_t);
+    memcpy(out, data.data(), data.size());
+    return arr;
+}
+
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) {
+    *fnames = feature_names;
+    *nf = nfeature_names;
+}
diff --git a/src/processor.h b/src/processor.h
index 3e83bbb2247d6..82a1121aaf7c4 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -41,6 +41,8 @@ enum {
     JL_TARGET_CLONE_CPU = 1 << 8,
     // Clone when the function uses fp16
     JL_TARGET_CLONE_FLOAT16 = 1 << 9,
+    // Clone when the function uses bf16
+    JL_TARGET_CLONE_BFLOAT16 = 1 << 10,
 };
 
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) JL_FEATURE_DEF(name, bit, llvmver)
@@ -63,32 +65,30 @@ static const uint32_t jl_sysimg_tag_mask = 0x80000000u;
 static const uint32_t jl_sysimg_val_mask = ~((uint32_t)0x80000000u);
 
 typedef struct _jl_image_fptrs_t {
-    // base function pointer
-    const char *base;
     // number of functions
-    uint32_t noffsets;
-    // function pointer offsets
-    const int32_t *offsets;
+    uint32_t nptrs;
+    // function pointers
+    void **ptrs;
 
     // Following fields contains the information about the selected target.
     // All of these fields are 0 if the selected targets have all the functions cloned.
-    // Instead the offsets are stored in `noffsets` and `offsets`.
+    // Instead the offsets are stored in `nptrs` and `ptrs`.
 
     // number of cloned functions
     uint32_t nclones;
-    // function pointer offsets of cloned functions
-    const int32_t *clone_offsets;
+    // function pointer of cloned functions
+    void **clone_ptrs;
     // sorted indices of the cloned functions (including the tag bit)
     const uint32_t *clone_idxs;
 } jl_image_fptrs_t;
 
 typedef struct {
     uint64_t base;
-    uintptr_t *gvars_base;
+    const char *gvars_base;
     const int32_t *gvars_offsets;
     uint32_t ngvars;
     jl_image_fptrs_t fptrs;
-    void **small_typeof;
+    void **jl_small_typeof;
 } jl_image_t;
 
 // The header for each image
@@ -107,31 +107,25 @@ typedef struct {
 
 // Per-shard data for image shards. Each image contains header->nshards of these.
 typedef struct {
-
-    // This is the base function pointer
-    // (all other function pointers are stored as offsets to this address)
-    const char *fvar_base;
-    // The array of function pointer offsets (`int32_t`) from the base pointer.
+    // The array of function pointers (`void*`).
     // This includes all julia functions in sysimg as well as all other functions that are cloned.
     // The default function pointer is used if the function is cloned.
-    // The first element is the size of the array, which should **NOT** be used as the number
+    // The first element is the size of the array, which should **NOT** be used is the number
     // of julia functions in the sysimg.
     // Each entry in this array uniquely identifies a function we are interested in
     // (the function may have multiple function pointers corresponding to different versions).
-    // In other sysimg info, all references to functions are stored as their `uint32_t` index
-    // in this array.
-    const int32_t *fvar_offsets;
+    const uintptr_t *fvar_count;
+    void **fvar_ptrs;
     // This is the mapping of shard function index -> global function index
     // staticdata.c relies on the same order of functions in the global function array being
     // the same as what it saw when serializing the global function array. However, partitioning
     // into multiple shards will cause functions to be reordered. This array is used to map
     // back to the original function array for loading.
     const uint32_t *fvar_idxs;
-    // This is the base data pointer
-    // (all other data pointers in this shard are stored as offsets to this address)
-    uintptr_t *gvar_base;
     // This array of global variable offsets (`int32_t`) from the base pointer.
     // Similar to fvar_offsets, but for gvars
+    // This is also the base data pointer
+    // (all data pointers in this shard are stored as offsets to this address)
     const int32_t *gvar_offsets;
     // This is the mapping of shard global variable index -> global global variable index
     // Similar to fvar_idxs, but for gvars
@@ -159,14 +153,12 @@ typedef struct {
     //  this array as the original/base function offsets.
     //  For other targets, this variable contains an offset array with the length defined in
     //  `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
-    const int32_t *clone_offsets;
+    void **clone_ptrs;
     //  Target-specific function indices.
     //  For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
     //  of the base target followed by an array of tagged function indices.
     //  The base target index is required to be smaller than the index of the current target
     //  and must be the default (`0`) or a `clone_all` target.
-    //  If it's not `0`, the function pointer array for the `clone_all` target will be used as
-    //  the base function pointer offsets instead.
     //  The tag bits for both the length and the indices are the top bit.
     //  A tagged length indicates that all of the functions are cloned and the indices follows
     //  are the ones that requires relocation. The base target index is omitted in this case.
@@ -175,10 +167,8 @@ typedef struct {
     //  all other cloned functions that requires relocation.
     //  A tagged index means that the function pointer should be filled into the GOT slots
     //  identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
-    //  (Note that a tagged index could corresponds to a functions pointer that's the same as
+    //  (Note that a tagged index could corresponds to a function's pointer that's the same as
     //  the base one since this is the only way we currently represent relocations.)
-    //  A tagged length implicitly tags all the indices and the indices will not have the tag bit
-    //  set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
     const uint32_t *clone_idxs;
 } jl_image_shard_t;
 
@@ -197,8 +187,8 @@ typedef struct {
     const jl_image_shard_t *shards; // points to header->nshards length array
     // The TLS data pointer
     const jl_image_ptls_t *ptls;
-    // A copy of small_typeof[]
-    void **small_typeof;
+    // A copy of jl_small_typeof[]
+    void **jl_small_typeof;
 
     //  serialized target data
     //  This contains the number of targets
@@ -221,10 +211,14 @@ jl_image_t jl_init_processor_pkgimg(void *hdl);
 
 // Return the name of the host CPU as a julia string.
 JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
+// Return the features of the host CPU as a julia string.
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
 // Dump the name and feature set of the host CPU
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
+// Check if the CPU has native FMA instructions;
 // For debugging only
 JL_DLLEXPORT void jl_dump_host_cpu(void);
-JL_DLLEXPORT void jl_check_pkgimage_clones(char* data);
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);
 
 JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
 JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
@@ -238,14 +232,14 @@ JL_DLLEXPORT int32_t jl_get_default_nans(void);
 #include <vector>
 
 extern JL_DLLEXPORT bool jl_processor_print_help;
-
+// NOLINTBEGIN(clang-diagnostic-return-type-c-linkage)
 /**
  * Returns the CPU name and feature string to be used by LLVM JIT.
  *
  * If the detected/specified CPU name is not available on the LLVM version specified,
  * a fallback CPU name will be used. Unsupported features will be ignored.
  */
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags) JL_NOTSAFEPOINT;
+extern "C" JL_DLLEXPORT std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags) JL_NOTSAFEPOINT;
 
 /**
  * Returns the CPU name and feature string to be used by LLVM disassembler.
@@ -260,7 +254,7 @@ struct jl_target_spec_t {
     // LLVM feature string
     std::string cpu_features;
     // serialized identification data
-    std::vector<uint8_t> data;
+    llvm::SmallVector<uint8_t, 0> data;
     // Clone condition.
     uint32_t flags;
     // Base target index.
@@ -269,9 +263,16 @@ struct jl_target_spec_t {
 /**
  * Return the list of targets to clone
  */
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
-std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT;
-std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT;
+extern "C" JL_DLLEXPORT llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
+// NOLINTEND(clang-diagnostic-return-type-c-linkage)
+struct FeatureName {
+    const char *name;
+    uint32_t bit; // bit index into a `uint32_t` array;
+    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
+};
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets();
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures);
 #endif
 
 #endif
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 0797fa4381f9d..d28e527ed44e8 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -11,7 +11,7 @@
 
 // This nesting is required to allow compilation on musl
 #define USE_DYN_GETAUXVAL
-#if defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
+#if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_)) && defined(_CPU_AARCH64_)
 #  undef USE_DYN_GETAUXVAL
 #  include <sys/auxv.h>
 #elif defined(__GLIBC_PREREQ)
@@ -164,7 +164,11 @@ enum class CPU : uint32_t {
     apple_a12,
     apple_a13,
     apple_a14,
+    apple_a15,
+    apple_a16,
     apple_m1,
+    apple_m2,
+    apple_m3,
     apple_s4,
     apple_s5,
 
@@ -203,7 +207,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -349,7 +353,11 @@ constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
 constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+constexpr auto apple_a15 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_a16 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
 constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+constexpr auto apple_m2 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
+constexpr auto apple_m3 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3, i8mm, bf16);
 // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
 // and sysctl -a hw.optional
 constexpr auto apple_s4 = apple_a12;
@@ -431,7 +439,11 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
     {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
     {"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
+    {"apple-a15", CPU::apple_a15, CPU::apple_a14, 160000, Feature::apple_a15},
+    {"apple-a16", CPU::apple_a16, CPU::apple_a14, 160000, Feature::apple_a16},
     {"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
+    {"apple-m2", CPU::apple_m2, CPU::apple_m1, 160000, Feature::apple_m2},
+    {"apple-m3", CPU::apple_m3, CPU::apple_m2, 180000, Feature::apple_m3},
     {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
     {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
     {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
@@ -461,7 +473,7 @@ static constexpr auto feature_masks = get_feature_masks(
 #undef JL_FEATURE_DEF
     -1);
 static const auto real_feature_masks =
-    feature_masks & FeatureList<feature_sz>{{(uint32_t)-1, (uint32_t)-1, 0}};
+    feature_masks & FeatureList<feature_sz>{{UINT32_MAX, UINT32_MAX, 0}};
 
 namespace Feature {
 enum : uint32_t {
@@ -699,16 +711,17 @@ static inline const char *find_cpu_name(uint32_t cpu)
 
 static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 {
+    using namespace llvm;
     char buffer[128];
     size_t bufferlen = 128;
     sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
-
-    if(strcmp(buffer,"Apple M1") == 0)
-        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
-    else if(strcmp(buffer,"Apple M1 Max") == 0)
-        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
-    else if(strcmp(buffer,"Apple M1 Pro") == 0)
+    StringRef cpu_name(buffer);
+    if (cpu_name.find("M1") != StringRef ::npos)
         return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+    else if (cpu_name.find("M2") != StringRef ::npos)
+        return std::make_pair((uint32_t)CPU::apple_m2, Feature::apple_m2);
+    else if (cpu_name.find("M3") != StringRef ::npos)
+        return std::make_pair((uint32_t)CPU::apple_m3, Feature::apple_m3);
     else
         return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
 }
@@ -724,7 +737,16 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #  define AT_HWCAP2 26
 #endif
 
-#if defined(USE_DYN_GETAUXVAL)
+#if defined(_OS_FREEBSD_)
+static inline unsigned long jl_getauxval(unsigned long type)
+{
+    unsigned long val;
+    if (elf_aux_info((int)type, &val, sizeof(val)) != 0) {
+        return 0;
+    }
+    return val;
+}
+#elif defined(USE_DYN_GETAUXVAL)
 static unsigned long getauxval_procfs(unsigned long type)
 {
     int fd = open("/proc/self/auxv", O_RDONLY);
@@ -817,7 +839,7 @@ template<typename T, typename F>
 static inline bool try_read_procfs_line(llvm::StringRef line, const char *prefix, T &out,
                                         bool &flag, F &&reset)
 {
-    if (!line.startswith(prefix))
+    if (!line.starts_with(prefix))
         return false;
     if (flag)
         reset();
@@ -1053,7 +1075,23 @@ static CPU get_cpu_name(CPUID cpuid)
             return CPU::apple_a14;
         case 0x22: // Icestorm m1
         case 0x23: // Firestorm m1
+        case 0x24:
+        case 0x25: // From https://github.com/AsahiLinux/m1n1/blob/3b9a71422e45209ef57c563e418f877bf54358be/src/chickens.c#L9
+        case 0x28:
+        case 0x29:
             return CPU::apple_m1;
+        case 0x30: // Blizzard m2
+        case 0x31: // Avalanche m2
+        case 0x32:
+        case 0x33:
+        case 0x34:
+        case 0x35:
+        case 0x38:
+        case 0x39:
+            return CPU::apple_m2;
+        case 0x49: // Everest m3
+        case 0x48: // Sawtooth m3
+            return CPU::apple_m3;
         default: return CPU::generic;
         }
     case 0x68: // 'h': Huaxintong Semiconductor
@@ -1195,7 +1233,7 @@ static bool check_cpu_arch_ver(uint32_t cpu, arm_arch arch)
     return true;
 }
 
-static void shrink_big_little(std::vector<std::pair<uint32_t,CPUID>> &list,
+static void shrink_big_little(llvm::SmallVectorImpl<std::pair<uint32_t,CPUID>> &list,
                               const CPU *cpus, uint32_t ncpu)
 {
     auto find = [&] (uint32_t name) {
@@ -1260,7 +1298,7 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #endif
 
     std::set<uint32_t> cpus;
-    std::vector<std::pair<uint32_t,CPUID>> list;
+    llvm::SmallVector<std::pair<uint32_t,CPUID>, 0> list;
     // Ideally the feature detection above should be enough.
     // However depending on the kernel version not all features are available
     // and it's also impossible to detect the ISA version which contains
@@ -1481,7 +1519,7 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
 #ifdef _CPU_AARCH64_
@@ -1493,7 +1531,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
         }
 #endif
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
@@ -1507,7 +1545,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -1561,7 +1599,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
 #endif
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -1573,7 +1611,9 @@ static uint32_t sysimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == UINT32_MAX)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -1586,7 +1626,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT)
 {
     TargetData<feature_sz> target = jit_targets.front();
     auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -1595,8 +1635,7 @@ static uint32_t pkgimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(pkgimg, target, max_vector_size);
-
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
     return match.best_idx;
 }
 
@@ -1647,7 +1686,7 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -1670,7 +1709,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     if (name == "apple-a7")
         name = "cyclone";
 #endif
-    std::vector<std::string> feature_strs;
+    llvm::SmallVector<std::string, 0> feature_strs;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -1738,7 +1777,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(feature_strs));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1797,9 +1836,20 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
                   cpus, ncpu_names);
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(host_cpu_name().c_str());
+#ifdef _CPU_AARCH64_
+    return jl_true;
+#else
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if (bits == 32 && test_nbit(features, Feature::vfp4sp))
+        return jl_true;
+    else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
+        return jl_true;
+    else
+        return jl_false;
+#endif
 }
 
 jl_image_t jl_init_processor_sysimg(void *hdl)
@@ -1818,12 +1868,18 @@ jl_image_t jl_init_processor_pkgimg(void *hdl)
     return parse_sysimg(hdl, pkgimg_init_cb);
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    pkgimg_init_cb(data);
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
@@ -1843,12 +1899,56 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    if (image_targets.empty())
+        jl_error("No targets specified");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        auto &features0 = image_targets[t.base].en.features;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+#ifdef _CPU_ARM_
+        static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_simd[] = {Feature::neon};
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+#endif
+    }
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1869,6 +1969,8 @@ std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
     return res;
 }
 
+#endif
+
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
     if (feature >= 32 * feature_sz)
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index 1aebde6dab90a..f8d9eb9fd9e73 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -2,6 +2,9 @@
 
 // Fallback processor detection and dispatch
 
+static constexpr FeatureName *feature_names = nullptr;
+static constexpr uint32_t nfeature_names = 0;
+
 namespace Fallback {
 
 static inline const std::string &host_cpu_name()
@@ -10,7 +13,7 @@ static inline const std::string &host_cpu_name()
     return name;
 }
 
-static const std::vector<TargetData<1>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<1>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char*, size_t, FeatureList<1>&) {
         return false;
@@ -18,7 +21,7 @@ static const std::vector<TargetData<1>> &get_cmdline_targets(void)
     return ::get_cmdline_targets<1>(feature_cb);
 }
 
-static std::vector<TargetData<1>> jit_targets;
+static llvm::SmallVector<TargetData<1>, 0> jit_targets;
 
 static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host)
 {
@@ -33,7 +36,7 @@ static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host
     return res;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -51,7 +54,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     TargetData<1> target = jit_targets.front();
     // Find the last name match or use the default one.
@@ -85,13 +88,13 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<1> &data)
 {
-    return std::make_pair(data.name, std::vector<std::string>{});
+    return std::make_pair(data.name, llvm::SmallVector<std::string, 0>{});
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<1> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -128,7 +131,7 @@ jl_image_t jl_init_processor_pkgimg(void *hdl)
     return parse_sysimg(hdl, pkgimg_init_cb);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
@@ -141,13 +144,27 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
                 jl_get_cpu_features_llvm(), {{}, 0}, {{}, 0}, 0});
     return res;
 }
-
-extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<1>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        t.en.flags |= JL_TARGET_CLONE_ALL;
+    }
+    if (image_targets.empty())
+        jl_error("No image targets found");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    for (auto &target: image_targets) {
         jl_target_spec_t ele;
         std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
         ele.data = serialize_target_data(target.name, target.en.features,
@@ -158,10 +175,11 @@ extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
     }
     return res;
 }
+#endif
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(host_cpu_name().c_str());
+    return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
 }
 
 JL_DLLEXPORT void jl_dump_host_cpu(void)
@@ -170,9 +188,15 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
     jl_safe_printf("Features: %s\n", jl_get_cpu_features_llvm().c_str());
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    pkgimg_init_cb(data);
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t)
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index e129b1239c7df..bf765be160ed2 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -4,6 +4,7 @@
 
 // CPUID
 
+#include "julia.h"
 extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
 {
     asm volatile (
@@ -94,9 +95,11 @@ enum class CPU : uint32_t {
     amd_znver1,
     amd_znver2,
     amd_znver3,
+    amd_znver4,
+    amd_znver5,
 };
 
-static constexpr size_t feature_sz = 11;
+static constexpr size_t feature_sz = 12;
 static constexpr FeatureName feature_names[] = {
 #define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
@@ -139,11 +142,13 @@ static constexpr FeatureDep deps[] = {
     {vpclmulqdq, avx},
     {vpclmulqdq, pclmul},
     {avxvnni, avx2},
+    {avxvnniint8, avx2},
+    {avxvnniint16, avx2},
+    {avxifma, avx2},
+    {avxneconvert, avx2},
     {avx512f, avx2},
     {avx512dq, avx512f},
     {avx512ifma, avx512f},
-    {avx512pf, avx512f},
-    {avx512er, avx512f},
     {avx512cd, avx512f},
     {avx512bw, avx512f},
     {avx512bf16, avx512bw},
@@ -159,6 +164,8 @@ static constexpr FeatureDep deps[] = {
     {avx512fp16, avx512vl},
     {amx_int8, amx_tile},
     {amx_bf16, amx_tile},
+    {amx_fp16, amx_tile},
+    {amx_complex, amx_tile},
     {sse4a, sse3},
     {xop, fma4},
     {fma4, avx},
@@ -166,6 +173,9 @@ static constexpr FeatureDep deps[] = {
     {xsaveopt, xsave},
     {xsavec, xsave},
     {xsaves, xsave},
+    {sha512, avx2},
+    {sm3, avx},
+    {sm4, avx2},
 };
 
 // We require cx16 on 64bit by default. This can be overwritten with `-cx16`
@@ -181,7 +191,7 @@ constexpr auto tremont = goldmont_plus | get_feature_masks(clwb, gfni);
 constexpr auto knl = get_feature_masks(sse3, ssse3, sse41, sse42, cx16, sahf, popcnt,
                                        aes, pclmul, avx, xsave, xsaveopt, rdrnd, f16c, fsgsbase,
                                        avx2, bmi, bmi2, fma, lzcnt, movbe, adx, rdseed, prfchw,
-                                       avx512f, avx512er, avx512cd, avx512pf, prefetchwt1);
+                                       avx512f, avx512cd);
 constexpr auto knm = knl | get_feature_masks(avx512vpopcntdq);
 constexpr auto yonah = get_feature_masks(sse3);
 constexpr auto prescott = yonah;
@@ -234,6 +244,9 @@ constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero
                                                     rdseed, sha, sse4a, xsavec);
 constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
 constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
+constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
+                                                   avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
+constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);
 
 }
 
@@ -295,6 +308,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"znver1", CPU::amd_znver1, CPU::generic, 0, Feature::znver1},
     {"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
     {"znver3", CPU::amd_znver3, CPU::amd_znver2, 120000, Feature::znver3},
+    {"znver4", CPU::amd_znver4, CPU::amd_znver3, 160000, Feature::znver4},
+    {"znver5", CPU::amd_znver5, CPU::amd_znver4, 190000, Feature::znver5},
 };
 static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
 
@@ -562,10 +577,19 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
         if (model >= 0x30)
             return CPU::amd_znver2;
         return CPU::amd_znver1;
-    case 0x19:  // AMD Family 19h
-        if (model <= 0x0f || model == 0x21)
+    case 25:  // AMD Family 19h
+        if (model <= 0x0f || (model >= 0x20 && model <= 0x5f))
             return CPU::amd_znver3;  // 00h-0Fh, 21h: Zen3
+        if ((model >= 0x10 && model <= 0x1f) ||
+            (model >= 0x60 && model <= 0x74) ||
+            (model >= 0x78 && model <= 0x7b) ||
+            (model >= 0xA0 && model <= 0xAf)) {
+                return CPU::amd_znver4;
+            }
         return CPU::amd_znver3; // fallback
+    case 26:
+        // if (model <= 0x77)
+        return CPU::amd_znver5;
     }
 }
 
@@ -573,7 +597,7 @@ template<typename T>
 static inline void features_disable_avx512(T &features)
 {
     using namespace Feature;
-    unset_bits(features, avx512f, avx512dq, avx512ifma, avx512pf, avx512er, avx512cd,
+    unset_bits(features, avx512f, avx512dq, avx512ifma, avx512cd,
                avx512bw, avx512vl, avx512vbmi, avx512vpopcntdq, avx512vbmi2, avx512vnni,
                avx512bitalg, avx512vp2intersect, avx512bf16);
 }
@@ -651,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
         int32_t info7[4];
         jl_cpuidex(info7, 7, 1);
         features[9] = info7[0];
+        features[10] = info7[1];
     }
     if (maxleaf >= 0x14) {
         int32_t info14[4];
         jl_cpuidex(info14, 0x14, 0);
-        features[10] = info14[1];
+        features[11] = info14[1];
     }
 
     // Fix up AVX bits to account for OS support and match LLVM model
@@ -696,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
     else {
         cpu = uint32_t(CPU::generic);
     }
-
+    /* Feature bits to register map
+    feature[0] = ecx
+    feature[1] = edx
+    feature[2] = leaf 7 ebx
+    feature[3] = leaf 7 ecx
+    feature[4] = leaf 7 edx
+    feature[5] = leaf 0x80000001 ecx
+    feature[6] = leaf 0x80000001 edx
+    feature[7] = leaf 0xd subleaf 1 eax
+    feature[8] = leaf 0x80000008 ebx
+    feature[9] = leaf 7 ebx subleaf 1 eax
+    feature[10] = leaf 7 ebx subleaf 1 ebx
+    feature[11] = leaf 0x14 ebx
+    */
     return std::make_pair(cpu, features);
 }
 
@@ -771,11 +809,11 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
-        if (fbit == (uint32_t)-1)
+        if (fbit == UINT32_MAX)
             return false;
         set_bit(list, fbit, true);
         return true;
@@ -789,7 +827,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -840,7 +878,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
     return 16;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t** rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -868,7 +906,9 @@ static uint32_t sysimg_init_cb(const void *id)
                  "virtualized environment.  Please read "
                  "https://docs.julialang.org/en/v1/devdocs/sysimg/ for more.");
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == UINT32_MAX)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -884,7 +924,7 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
-static uint32_t pkgimg_init_cb(const void *id)
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     TargetData<feature_sz> target = jit_targets.front();
     auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
@@ -893,10 +933,12 @@ static uint32_t pkgimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(pkgimg, target, max_vector_size);
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
     return match.best_idx;
 }
 
+//This function serves as a fallback during bootstrapping, at that point we don't have a sysimage with native code
+// so we won't call sysimg_init_cb, else this function shouldn't do anything.
 static void ensure_jit_target(bool imaging)
 {
     auto &cmdline = get_cmdline_targets();
@@ -933,7 +975,6 @@ static void ensure_jit_target(bool imaging)
                                                   Feature::vaes, Feature::vpclmulqdq,
                                                   Feature::sse4a, Feature::avx512f,
                                                   Feature::avx512dq, Feature::avx512ifma,
-                                                  Feature::avx512pf, Feature::avx512er,
                                                   Feature::avx512cd, Feature::avx512bw,
                                                   Feature::avx512vl, Feature::avx512vbmi,
                                                   Feature::avx512vpopcntdq, Feature::avxvnni,
@@ -959,10 +1000,17 @@ static void ensure_jit_target(bool imaging)
                 break;
             }
         }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -981,7 +1029,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
         name = "x86-64";
 #endif
     }
-    std::vector<std::string> features;
+    llvm::SmallVector<std::string, 0> features;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -1005,7 +1053,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(features));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1032,14 +1080,25 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
                   cpus, ncpu_names);
 }
 
-JL_DLLEXPORT void jl_check_pkgimage_clones(char *data)
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
 {
-    pkgimg_init_cb(data);
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == UINT32_MAX)
+        return rejection_reason;
+    return jl_nothing;
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
 {
-    return jl_cstr_to_string(host_cpu_name().c_str());
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
+        return jl_true;
+    else
+        return jl_false;
 }
 
 jl_image_t jl_init_processor_sysimg(void *hdl)
@@ -1058,26 +1117,94 @@ jl_image_t jl_init_processor_pkgimg(void *hdl)
     return parse_sysimg(hdl, pkgimg_init_cb);
 }
 
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
 
-extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
+const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
 {
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{"generic", "",
             {feature_masks, 0}, {{}, 0}, 0});
     return res;
 }
-
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+//This function parses the -C command line to figure out which targets to multiversion to.
+#ifndef __clang_gcanalyzer__
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
-    if (jit_targets.empty())
-        jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
-    for (auto &target: jit_targets) {
+    auto &cmdline = get_cmdline_targets();
+    check_cmdline(cmdline, true);
+    llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
+    for (auto &arg: cmdline) {
+        auto data = arg_target_data(arg, image_targets.empty());
+        image_targets.push_back(std::move(data));
+    }
+
+    auto ntargets = image_targets.size();
+    // Now decide the clone condition.
+    for (size_t i = 1; i < ntargets; i++) {
+        auto &t = image_targets[i];
+        if (t.en.flags & JL_TARGET_CLONE_ALL)
+            continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
+        // The most useful one in general...
+        t.en.flags |= JL_TARGET_CLONE_LOOP;
+        auto &features0 = image_targets[t.base].en.features;
+        // Special case for KNL/KNM since they're so different
+        if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
+            if ((t.name == "knl" || t.name == "knm") &&
+                image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
+                t.en.flags |= JL_TARGET_CLONE_ALL;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
+        static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
+                                                  Feature::sse41, Feature::sse42,
+                                                  Feature::avx, Feature::avx2,
+                                                  Feature::vaes, Feature::vpclmulqdq,
+                                                  Feature::sse4a, Feature::avx512f,
+                                                  Feature::avx512dq, Feature::avx512ifma,
+                                                  Feature::avx512cd, Feature::avx512bw,
+                                                  Feature::avx512vl, Feature::avx512vbmi,
+                                                  Feature::avx512vpopcntdq, Feature::avxvnni,
+                                                  Feature::avx512vbmi2, Feature::avx512vnni,
+                                                  Feature::avx512bitalg, Feature::avx512bf16,
+                                                  Feature::avx512vp2intersect, Feature::avx512fp16};
+        for (auto fe: clone_math) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_MATH;
+                break;
+            }
+        }
+        for (auto fe: clone_simd) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_SIMD;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
+    }
+    if (image_targets.empty())
+        jl_error("No targets specified");
+    llvm::SmallVector<jl_target_spec_t, 0> res;
+    for (auto &target: image_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
         for (auto &fename: feature_names) {
@@ -1097,6 +1224,7 @@ extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(
     }
     return res;
 }
+#endif
 
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 {
diff --git a/src/rtutils.c b/src/rtutils.c
index 01ea11014a6db..00a5b639d8683 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -127,11 +127,41 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error(const char *fname,
     jl_type_error_rt(fname, "", expected, got);
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope)
 {
-    if (!jl_undefvarerror_type)
-        jl_errorf("UndefVarError(%s)", jl_symbol_name(var));
-    jl_throw(jl_new_struct(jl_undefvarerror_type, var));
+    if (!jl_undefvarerror_type) {
+        const char *s1 = "";
+        const char *s2 = "";
+        if (scope) {
+            if (jl_is_symbol(scope)) {
+                s1 = ", :";
+                s2 = jl_symbol_name((jl_sym_t*)scope);
+            }
+            else if (jl_is_module(scope)) {
+                s1 = ", module ";
+                s2 = jl_symbol_name(((jl_module_t*)scope)->name);
+            }
+            else {
+                s1 = ", ";
+                s2 = "unknown scope";
+            }
+        }
+        jl_errorf("UndefVarError(%s%s%s)", jl_symbol_name(var), s1, s2);
+    }
+    JL_GC_PUSH1(&scope);
+    jl_throw(jl_new_struct(jl_undefvarerror_type, var, scope));
+}
+
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var)
+{
+    jl_throw(jl_new_struct(jl_fielderror_type, t, var));
+}
+
+JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str) // == jl_exceptionf(jl_argumenterror_type, "%s", str)
+{
+    jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str));
+    JL_GC_PUSH1(&msg);
+    jl_throw(jl_new_struct(jl_argumenterror_type, msg));
 }
 
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
@@ -196,14 +226,6 @@ JL_DLLEXPORT void JL_NORETURN jl_bounds_error_ints(jl_value_t *v JL_MAYBE_UNROOT
     jl_throw(jl_new_struct((jl_datatype_t*)jl_boundserror_type, v, t));
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_eof_error(void)
-{
-    jl_datatype_t *eof_error =
-        (jl_datatype_t*)jl_get_global(jl_base_module, jl_symbol("EOFError"));
-    assert(eof_error != NULL);
-    jl_throw(jl_new_struct(eof_error));
-}
-
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
 {
     if (!jl_isa(x,t))
@@ -224,17 +246,16 @@ JL_DLLEXPORT void __stack_chk_fail(void)
 
 // exceptions -----------------------------------------------------------------
 
-JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
+JL_DLLEXPORT void jl_enter_handler(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
     // Must have no safepoint
     eh->prev = ct->eh;
     eh->gcstack = ct->gcstack;
+    eh->scope = ct->scope;
     eh->gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     eh->locks_len = ct->ptls->locks.len;
     eh->defer_signal = ct->ptls->defer_signal;
     eh->world_age = ct->world_age;
-    ct->eh = eh;
 #ifdef ENABLE_TIMINGS
     eh->timing_stack = ct->ptls->timing_stack;
 #endif
@@ -245,9 +266,8 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 // * We leave a try block through normal control flow
 // * An exception causes a nonlocal jump to the catch block. In this case
 //   there's additional cleanup required, eg pushing the exception stack.
-JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
+JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh)
 {
-    jl_task_t *ct = jl_current_task;
 #ifdef _OS_WINDOWS_
     if (ct->ptls->needs_resetstkoflw) {
         _resetstkoflw();
@@ -257,11 +277,12 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
-    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_ptls_t ptls = ct->ptls;
+    sig_atomic_t old_defer_signal = ptls->defer_signal;
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
-    small_arraylist_t *locks = &ct->ptls->locks;
+    ct->scope = eh->scope;
+    small_arraylist_t *locks = &ptls->locks;
     int unlocks = locks->len > eh->locks_len;
     if (unlocks) {
         for (size_t i = locks->len; i > eh->locks_len; i--)
@@ -269,43 +290,69 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
         locks->len = eh->locks_len;
     }
     ct->world_age = eh->world_age;
-    ct->ptls->defer_signal = eh->defer_signal;
-    if (old_gc_state != eh->gc_state) {
-        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
-        if (old_gc_state) {
-            jl_gc_safepoint_(ct->ptls);
-        }
-    }
-    if (old_defer_signal && !eh->defer_signal) {
-        jl_sigint_safepoint(ct->ptls);
-    }
+    ptls->defer_signal = eh->defer_signal;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ptls->gc_state);
+    if (old_gc_state != eh->gc_state)
+        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
+    if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
+        jl_gc_safepoint_(ptls);
+    jl_value_t *exception = ptls->sig_exception;
+    JL_GC_PROMISE_ROOTED(exception);
+    if (exception) {
+        int8_t oldstate = jl_gc_unsafe_enter(ptls);
+        /* The temporary ptls->bt_data is rooted by special purpose code in the
+        GC. This exists only for the purpose of preserving bt_data until we
+        set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+        ptls->sig_exception = NULL;
+        jl_gc_unsafe_leave(ptls, oldstate);
+    }
+    if (old_defer_signal && !eh->defer_signal)
+        jl_sigint_safepoint(ptls);
     if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
             unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
     }
 }
 
-JL_DLLEXPORT void jl_pop_handler(int n)
+JL_DLLEXPORT void jl_eh_restore_state_noexcept(jl_task_t *ct, jl_handler_t *eh)
+{
+    assert(ct->gcstack == eh->gcstack && "Incorrect GC usage under try catch");
+    ct->scope = eh->scope;
+    ct->eh = eh->prev;
+    ct->ptls->defer_signal = eh->defer_signal; // optional, but certain try-finally (in stream.jl) may be slightly harder to write without this
+}
+
+JL_DLLEXPORT void jl_pop_handler(jl_task_t *ct, int n)
 {
-    jl_task_t *ct = jl_current_task;
     if (__unlikely(n <= 0))
         return;
     jl_handler_t *eh = ct->eh;
     while (--n > 0)
         eh = eh->prev;
-    jl_eh_restore_state(eh);
+    jl_eh_restore_state(ct, eh);
 }
 
-JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_pop_handler_noexcept(jl_task_t *ct, int n)
+{
+    if (__unlikely(n <= 0))
+        return;
+    jl_handler_t *eh = ct->eh;
+    while (--n > 0)
+        eh = eh->prev;
+    jl_eh_restore_state_noexcept(ct, eh);
+}
+
+JL_DLLEXPORT size_t jl_excstack_state(jl_task_t *ct) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     return s ? s->top : 0;
 }
 
-JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_restore_excstack(jl_task_t *ct, size_t state) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *s = ct->excstack;
     if (s) {
         assert(s->top >= state);
@@ -320,27 +367,27 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE
     dest->top = src->top;
 }
 
-static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
+static void jl_reserve_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
                                 size_t reserved_size)
 {
     jl_excstack_t *s = *stack;
     if (s && s->reserved_size >= reserved_size)
         return;
     size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size;
-    jl_task_t *ct = jl_current_task;
     jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz);
     new_s->top = 0;
     new_s->reserved_size = reserved_size;
     if (s)
         jl_copy_excstack(new_s, s);
     *stack = new_s;
+    jl_gc_wb(ct, new_s);
 }
 
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
+    jl_reserve_excstack(ct, stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
     jl_excstack_t *s = *stack;
     jl_bt_element_t *rawstack = jl_excstack_raw(s);
     memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size);
@@ -359,7 +406,10 @@ JL_DLLEXPORT void *(jl_symbol_name)(jl_sym_t *s)
 // WARNING: THIS FUNCTION IS NEVER CALLED BUT INLINE BY CCALL
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a)
 {
-    return a->data;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        return (char*)a->ref.mem->ptr + (size_t)jl_array_data_(a);
+    return jl_array_data_(a);
 }
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)
 {
@@ -522,20 +572,12 @@ JL_DLLEXPORT void jl_flush_cstdio(void) JL_NOTSAFEPOINT
     fflush(stderr);
 }
 
-JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT
-{
-    if (jl_base_module == NULL)
-        return NULL;
-    jl_binding_t *stdout_obj = jl_get_module_binding(jl_base_module, jl_symbol("stdout"), 0);
-    return stdout_obj ? jl_atomic_load_relaxed(&stdout_obj->value) : NULL;
-}
-
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
 {
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"), 0);
-    return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
+    return stderr_obj ? jl_get_binding_value_if_resolved(stderr_obj) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
@@ -567,7 +609,7 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
 JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_is_identifier(const char *str) JL_NOTSAFEPOINT
 {
     size_t i = 0;
     uint32_t wc = u8_nextchar(str, &i);
@@ -630,12 +672,10 @@ static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
     jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
     if (globname && dv->name->module) {
         jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0);
-        if (b && jl_atomic_load_relaxed(&b->owner) && b->constp) {
-            jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
-            // The `||` makes this function work for both function instances and function types.
-            if (bv == v || jl_typeof(bv) == v)
-                return 1;
-        }
+        jl_value_t *bv = jl_get_binding_value_if_resolved_and_const(b);
+        // The `||` makes this function work for both function instances and function types.
+        if (bv && (bv == v || jl_typeof(bv) == v))
+            return 1;
     }
     return 0;
 }
@@ -650,22 +690,64 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
     return 0;
 }
 
-static size_t jl_static_show_x_sym_escaped(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
+static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
 {
     size_t n = 0;
-
-    char *sn = jl_symbol_name(name);
-    int hidden = 0;
-    if (!(jl_is_identifier(sn) || jl_is_operator(sn))) {
-        hidden = 1;
+    if (wrap)
+        n += jl_printf(out, "\"");
+    if (!u8_isvalid(str, len)) {
+        // alternate print algorithm that preserves data if it's not UTF-8
+        static const char hexdig[] = "0123456789abcdef";
+        for (size_t i = 0; i < len; i++) {
+            uint8_t c = str[i];
+            if (c == '\\' || c == '"' || c == '$')
+                n += jl_printf(out, "\\%c", c);
+            else if (c >= 32 && c < 0x7f)
+                n += jl_printf(out, "%c", c);
+            else
+                n += jl_printf(out, "\\x%c%c", hexdig[c>>4], hexdig[c&0xf]);
+        }
     }
-
-    if (hidden) {
-        n += jl_printf(out, "var\"");
+    else {
+        int special = 0;
+        for (size_t i = 0; i < len; i++) {
+            uint8_t c = str[i];
+            if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
+                special = 1;
+                break;
+            }
+        }
+        if (!special) {
+            jl_uv_puts(out, str, len);
+            n += len;
+        }
+        else {
+            char buf[512];
+            size_t i = 0;
+            while (i < len) {
+                size_t r = u8_escape(buf, sizeof(buf), str, &i, len, "\"$", 0);
+                jl_uv_puts(out, buf, r - 1);
+                n += r - 1;
+            }
+        }
     }
-    n += jl_printf(out, "%s", sn);
-    if (hidden) {
+    if (wrap)
         n += jl_printf(out, "\"");
+    return n;
+}
+
+static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
+{
+    size_t n = 0;
+    const char *sn = jl_symbol_name(name);
+    int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
+    if (quoted) {
+        n += jl_printf(out, "var");
+        // TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
+        n += jl_static_show_string(out, sn, strlen(sn), 1);
+    }
+    else {
+        n += jl_printf(out, "%s", sn);
     }
     return n;
 }
@@ -751,7 +833,8 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         else {
             n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth, ctx);
             n += jl_printf(out, ".<toplevel thunk> -> ");
-            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth, ctx);
+            n += jl_static_show_x(out, jl_atomic_load_relaxed(&jl_cached_uninferred(
+                jl_atomic_load_relaxed(&li->cache), 1)->inferred), depth, ctx);
         }
     }
     else if (vt == jl_typename_type) {
@@ -783,11 +866,6 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         // Types are printed as a fully qualified name, with parameters, e.g.
         // `Base.Set{Int}`, and function types are printed as e.g. `typeof(Main.f)`
         jl_datatype_t *dv = (jl_datatype_t*)v;
-        jl_sym_t *globname;
-        int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
-        jl_sym_t *sym = globfunc ? globname : dv->name->name;
-        char *sn = jl_symbol_name(sym);
-        size_t quote = 0;
         if (dv->name == jl_tuple_typename) {
             if (dv == jl_tuple_type)
                 return jl_printf(out, "Tuple");
@@ -819,9 +897,25 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             return n;
         }
+        if (jl_genericmemory_type && dv->name == jl_genericmemory_typename) {
+            jl_value_t *isatomic = jl_tparam0(dv);
+            jl_value_t *el_type = jl_tparam1(dv);
+            jl_value_t *addrspace = jl_tparam2(dv);
+            if (isatomic == (jl_value_t*)jl_not_atomic_sym && addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+                n += jl_printf(out, "Memory{");
+                n += jl_static_show_x(out, el_type, depth, ctx);
+                n += jl_printf(out, "}");
+                return n;
+            }
+        }
         if (ctx.quiet) {
-            return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
+            return jl_static_show_symbol(out, dv->name->name);
         }
+        jl_sym_t *globname;
+        int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
+        jl_sym_t *sym = globfunc ? globname : dv->name->name;
+        char *sn = jl_symbol_name(sym);
+        size_t quote = 0;
         if (globfunc) {
             n += jl_printf(out, "typeof(");
         }
@@ -834,7 +928,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 quote = 1;
             }
         }
-        n += jl_static_show_x_sym_escaped(out, sym);
+        n += jl_static_show_symbol(out, sym);
         if (globfunc) {
             n += jl_printf(out, ")");
             if (quote) {
@@ -903,9 +997,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, "nothing");
     }
     else if (vt == jl_string_type) {
-        n += jl_printf(out, "\"");
-        jl_uv_puts(out, jl_string_data(v), jl_string_len(v)); n += jl_string_len(v);
-        n += jl_printf(out, "\"");
+        n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
     }
     else if (v == jl_bottom_type) {
         n += jl_printf(out, "Union{}");
@@ -954,7 +1046,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, ")");
             n += jl_printf(out, "<:");
         }
-        n += jl_static_show_x_sym_escaped(out, var->name);
+        n += jl_static_show_symbol(out, var->name);
         if (showbounds && (ub != (jl_value_t*)jl_any_type || lb != jl_bottom_type)) {
             // show type-var upper bound if it is defined, or if we showed the lower bound
             int ua = jl_is_unionall(ub);
@@ -972,18 +1064,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
             n += jl_printf(out, ".");
         }
-        n += jl_printf(out, "%s", jl_symbol_name(m->name));
+        n += jl_static_show_symbol(out, m->name);
     }
     else if (vt == jl_symbol_type) {
-        char *sn = jl_symbol_name((jl_sym_t*)v);
-        int quoted = !jl_is_identifier(sn) && jl_operator_precedence(sn) == 0;
-        if (quoted)
-            n += jl_printf(out, "Symbol(\"");
-        else
-            n += jl_printf(out, ":");
-        n += jl_printf(out, "%s", sn);
-        if (quoted)
-            n += jl_printf(out, "\")");
+        n += jl_printf(out, ":");
+        n += jl_static_show_symbol(out, (jl_sym_t*)v);
     }
     else if (vt == jl_ssavalue_type) {
         n += jl_printf(out, "SSAValue(%" PRIuPTR ")",
@@ -991,8 +1076,12 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_globalref_type) {
         n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
-        char *name = jl_symbol_name(jl_globalref_name(v));
-        n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
+        jl_sym_t *name = jl_globalref_name(v);
+        n += jl_printf(out, ".");
+        if (jl_is_operator(jl_symbol_name(name)))
+            n += jl_printf(out, ":(%s)", jl_symbol_name(name));
+        else
+            n += jl_static_show_symbol(out, name);
     }
     else if (vt == jl_gotonode_type) {
         n += jl_printf(out, "goto %" PRIuPTR, jl_gotonode_label(v));
@@ -1025,42 +1114,70 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
-        if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
-            n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
+        if (e->head == jl_assign_sym && jl_array_nrows(e->args) == 2) {
+            n += jl_static_show_x(out, jl_exprarg(e, 0), depth, ctx);
             n += jl_printf(out, " = ");
-            n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
+            n += jl_static_show_x(out, jl_exprarg(e, 1), depth, ctx);
         }
         else {
-            char sep = ' ';
-            n += jl_printf(out, "Expr(:%s", jl_symbol_name(e->head));
-            size_t i, len = jl_array_len(e->args);
+            n += jl_printf(out, "Expr(");
+            n += jl_static_show_x(out, (jl_value_t*)e->head, depth, ctx);
+            size_t i, len = jl_array_nrows(e->args);
             for (i = 0; i < len; i++) {
-                n += jl_printf(out, ",%c", sep);
-                n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
+                n += jl_printf(out, ", ");
+                n += jl_static_show_x(out, jl_exprarg(e, i), depth, ctx);
             }
             n += jl_printf(out, ")");
         }
     }
     else if (jl_array_type && jl_is_array_type(vt)) {
         n += jl_printf(out, "Array{");
-        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth, ctx);
-        n += jl_printf(out, ", (");
+        jl_value_t *el_type = jl_tparam0(vt);
+        n += jl_static_show_x(out, el_type, depth, ctx);
+        jl_array_t *av = (jl_array_t*)v;
         size_t i, ndims = jl_array_ndims(v);
+        n += jl_printf(out, ", %" PRIdPTR "}(dims=(", ndims);
         if (ndims == 1)
             n += jl_printf(out, "%" PRIdPTR ",", jl_array_dim0(v));
         else
             for (i = 0; i < ndims; i++)
                 n += jl_printf(out, (i > 0 ? ", %" PRIdPTR : "%" PRIdPTR), jl_array_dim(v, i));
-        n += jl_printf(out, ")}[");
-        size_t j, tlen = jl_array_len(v);
-        jl_array_t *av = (jl_array_t*)v;
-        jl_value_t *el_type = jl_tparam0(vt);
-        char *typetagdata = (!av->flags.ptrarray && jl_is_uniontype(el_type)) ? jl_array_typetagdata(av) : NULL;
+        n += jl_printf(out, "), mem=");
+        n += jl_static_show_x(out, (jl_value_t*)av->ref.mem, depth, ctx);
+        n += jl_printf(out, ")");
+    }
+    else if (jl_genericmemoryref_type && jl_is_genericmemoryref_type(vt)) {
+        jl_genericmemoryref_t *ref = (jl_genericmemoryref_t*)v;
+        n += jl_printf(out, "GenericMemoryRef(offset=");
+        size_t offset = (size_t)ref->ptr_or_offset;
+        if (ref->mem) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typeof(ref->mem))->layout;
+            if (layout->size != 0 && !layout->flags.arrayelem_isunion)
+                offset = ((char*)offset - (char*)ref->mem->ptr) / layout->size;
+        }
+        n += jl_printf(out, "%" PRIdPTR, offset);
+        n += jl_printf(out, ", ptr_or_offset=%p, mem=", ref->ptr_or_offset);
+        n += jl_static_show_x(out, (jl_value_t*)ref->mem, depth, ctx);
+    }
+    else if (jl_genericmemory_type && jl_is_genericmemory_type(vt)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        //jl_value_t *isatomic = jl_tparam0(vt);
+        jl_value_t *el_type = jl_tparam1(vt);
+        jl_value_t *addrspace = jl_tparam2(vt);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
+        size_t j, tlen = m->length;
+        n += jl_printf(out, "(%" PRIdPTR ", %p)[", tlen, m->ptr);
+        if (!(addrspace && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0)) {
+            n += jl_printf(out, "...]");
+            return n;
+        }
+        const char *typetagdata = NULL;
+        const jl_datatype_layout_t *layout = vt->layout;
         int nlsep = 0;
-        if (av->flags.ptrarray) {
+        if (layout->flags.arrayelem_isboxed) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
                 jl_value_t *p = *ptr;
                 if (p != NULL && (uintptr_t)p >= 4096U) {
                     jl_value_t *p_ty = jl_typeof(p);
@@ -1073,21 +1190,30 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 }
             }
         }
-        if (nlsep && tlen > 1)
-            n += jl_printf(out, "\n  ");
-        for (j = 0; j < tlen; j++) {
-            if (av->flags.ptrarray) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
-                n += jl_static_show_x(out, *ptr, depth, ctx);
-            }
-            else {
-                char *ptr = ((char*)av->data) + j * av->elsize;
-                n += jl_static_show_x_(out, (jl_value_t*)ptr,
-                        typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
-                        depth, ctx);
+        else if (layout->flags.arrayelem_isunion) {
+            typetagdata = jl_genericmemory_typetagdata(m);
+        }
+        if (layout->size == 0 && tlen >= 3) {
+            n += jl_static_show_x_(out, (jl_value_t*)m->ptr, (jl_datatype_t*)el_type, depth, ctx);
+            n += jl_printf(out, ", ...");
+        }
+        else {
+            if (nlsep && tlen > 1)
+                n += jl_printf(out, "\n  ");
+            for (size_t j = 0; j < tlen; j++) {
+                if (layout->flags.arrayelem_isboxed) {
+                    jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
+                    n += jl_static_show_x(out, *ptr, depth, ctx);
+                }
+                else {
+                    char *ptr = ((char*)m->ptr) + j * layout->size;
+                    n += jl_static_show_x_(out, (jl_value_t*)ptr,
+                            (jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type),
+                            depth, ctx);
+                }
+                if (j != tlen - 1)
+                    n += jl_printf(out, nlsep ? ",\n  " : ", ");
             }
-            if (j != tlen - 1)
-                n += jl_printf(out, nlsep ? ",\n  " : ", ");
         }
         n += jl_printf(out, "]");
     }
@@ -1123,7 +1249,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
         }
 
-        n += jl_static_show_x_sym_escaped(out, sym);
+        n += jl_static_show_symbol(out, sym);
 
         if (globfunc) {
             if (quote) {
@@ -1159,8 +1285,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
             for (; i < tlen; i++) {
                 if (!istuple) {
-                    jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
-                    n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
+                    jl_sym_t *fname = (jl_sym_t*)(isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i));
+                    if (fname == NULL || !jl_is_symbol(fname))
+                        n += jl_static_show_x(out, (jl_value_t*)fname, depth, ctx);
+                    else if (jl_is_operator(jl_symbol_name(fname)))
+                        n += jl_printf(out, "(%s)", jl_symbol_name(fname));
+                    else
+                        n += jl_static_show_symbol(out, fname);
+                    n += jl_printf(out, "=");
                 }
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
@@ -1293,9 +1425,10 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
         return n;
     }
     if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
+            ((jl_datatype_t*)ftype)->name->mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
-        n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
+        n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->mt->name);
     }
     else {
         n += jl_printf(s, "(::");
@@ -1394,10 +1527,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
         }
         jl_printf(str, "\n@ ");
         if (jl_is_string(file)) {
-            jl_uv_puts(str, jl_string_data(file), jl_string_len(file));
+            jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
         }
         else if (jl_is_symbol(file)) {
-            jl_printf(str, "%s", jl_symbol_name((jl_sym_t*)file));
+            jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
         }
         jl_printf(str, ":");
         jl_static_show(str, line);
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index 23793254c205d..2a6cb00961594 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -4,7 +4,7 @@
 #include <map>
 #include <string>
 #include <llvm/ADT/StringMap.h>
-#include <llvm/Support/Host.h>
+#include <llvm/TargetParser/Host.h>
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
@@ -66,56 +66,24 @@ void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *
 extern "C" JL_DLLEXPORT
 void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name)
 {
-    char *f_lib;
+    void *lib_ptr;
 
     if (jl_is_symbol(lib_val))
-        f_lib = jl_symbol_name((jl_sym_t*)lib_val);
+        lib_ptr = jl_get_library(jl_symbol_name((jl_sym_t*)lib_val));
     else if (jl_is_string(lib_val))
-        f_lib = jl_string_data(lib_val);
-    else
+        lib_ptr = jl_get_library(jl_string_data(lib_val));
+    else if (jl_libdl_dlopen_func != NULL) {
+        // Call `dlopen(lib_val)`; this is the correct path for the `LazyLibrary` case,
+        // but it also takes any other value, and so we define `dlopen(x::Any) = throw(TypeError(...))`.
+        lib_ptr = jl_unbox_voidpointer(jl_apply_generic(jl_libdl_dlopen_func, &lib_val, 1));
+    } else
         jl_type_error("ccall", (jl_value_t*)jl_symbol_type, lib_val);
     void *ptr;
-    jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
+    jl_dlsym(lib_ptr, f_name, &ptr, 1);
     return ptr;
 }
 
 // miscellany
-std::string jl_get_cpu_name_llvm(void)
-{
-    return llvm::sys::getHostCPUName().str();
-}
-
-std::string jl_get_cpu_features_llvm(void)
-{
-    StringMap<bool> HostFeatures;
-    llvm::sys::getHostCPUFeatures(HostFeatures);
-    std::string attr;
-    for (auto &ele: HostFeatures) {
-        if (ele.getValue()) {
-            if (!attr.empty()) {
-                attr.append(",+");
-            }
-            else {
-                attr.append("+");
-            }
-            attr.append(ele.getKey().str());
-        }
-    }
-    // Explicitly disabled features need to be added at the end so that
-    // they are not re-enabled by other features that implies them by default.
-    for (auto &ele: HostFeatures) {
-        if (!ele.getValue()) {
-            if (!attr.empty()) {
-                attr.append(",-");
-            }
-            else {
-                attr.append("-");
-            }
-            attr.append(ele.getKey().str());
-        }
-    }
-    return attr;
-}
 
 extern "C" JL_DLLEXPORT
 jl_value_t *jl_get_JIT(void)
@@ -351,7 +319,7 @@ jl_value_t *jl_get_cfunction_trampoline(
     uv_mutex_lock(&trampoline_lock);
     tramp = trampoline_alloc();
     ((void**)result)[0] = tramp;
-    tramp = init_trampoline(tramp, nval);
+    init_trampoline(tramp, nval);
     ptrhash_put(cache, (void*)fobj, result);
     uv_mutex_unlock(&trampoline_lock);
     return result;
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index ed320aa9a6c35..1790b9bd8d106 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -5,16 +5,15 @@
 //
 // this file assumes a little-endian processor, although that isn't too hard to fix
 // it also assumes two's complement negative numbers, which might be a bit harder to fix
-//
-// TODO: add half-float support
 
 #include "APInt-C.h"
 #include "julia.h"
 #include "julia_internal.h"
+#include "llvm-version.h"
 
 const unsigned int host_char_bit = 8;
 
-// float16 intrinsics
+// float16 conversion helpers
 
 static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
 {
@@ -185,56 +184,208 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
     return h;
 }
 
-JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param)
+static inline uint16_t double_to_half(double param) JL_NOTSAFEPOINT
 {
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // if Float16(res) is subnormal
+    if ((tempi&0x7fffffffu) < 0x38800000u) {
+        // shift so that the mantissa lines up where it would for normal Float16
+        uint32_t shift = 113u-((tempi & 0x7f800000u)>>23u);
+        if (shift<23u) {
+            tempi |= 0x00800000; // set implicit bit
+            tempi >>= shift;
+        }
+    }
+
+    // if we are halfway between 2 Float16 values
+    if ((tempi & 0x1fffu) == 0x1000u) {
+        memcpy(&tempi, &temp, sizeof(temp));
+        // adjust the value by 1 ULP in the direction that will make Float16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
+    }
+
+    return float_to_half(temp);
+}
+
+// x86-specific helpers for emulating the (B)Float16 ABI
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+#include <xmmintrin.h>
+__attribute__((unused)) static inline __m128 return_in_xmm(uint16_t input) JL_NOTSAFEPOINT {
+    __m128 xmm_output;
+    asm (
+        "movd %[input], %%xmm0\n\t"
+        "movss %%xmm0, %[xmm_output]\n\t"
+        : [xmm_output] "=x" (xmm_output)
+        : [input] "r" ((uint32_t)input)
+        : "xmm0"
+    );
+    return xmm_output;
+}
+__attribute__((unused)) static inline uint16_t take_from_xmm(__m128 xmm_input) JL_NOTSAFEPOINT {
+    uint32_t output;
+    asm (
+        "movss %[xmm_input], %%xmm0\n\t"
+        "movd %%xmm0, %[output]\n\t"
+        : [output] "=r" (output)
+        : [xmm_input] "x" (xmm_input)
+        : "xmm0"
+    );
+    return (uint16_t)output;
+}
+#endif
+
+// float16 conversion API
+
+// for use in APInt and other soft-float ABIs (i.e. without the ABI shenanigans from below)
+JL_DLLEXPORT uint16_t julia_float_to_half(float param) {
+    return float_to_half(param);
+}
+JL_DLLEXPORT uint16_t julia_double_to_half(double param) {
+    return double_to_half(param);
+}
+JL_DLLEXPORT float julia_half_to_float(uint16_t param) {
     return half_to_float(param);
 }
 
-JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param)
+// starting with GCC 12 and Clang 15, we have _Float16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 11) || \
+     (defined(__clang__) && __clang_major__ > 14)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_) && !defined(_CPU_RISCV64_)
+    #define FLOAT16_TYPE _Float16
+    #define FLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) (*(_Float16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    // on x86, we can use __m128; except on Windows where x64 calling
+    // conventions expect to pass __m128 by reference.
+    #define FLOAT16_TYPE __m128
+    #define FLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define FLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    // on PPC, pass Float16 as if it were an integer, similar to the old x86 ABI
+    // before _Float16
+    #define FLOAT16_TYPE uint16_t
+    #define FLOAT16_TO_UINT16(x) (x)
+    #define FLOAT16_FROM_UINT16(x) (x)
+#else
+    // otherwise, pass using floating-point calling conventions
+    #define FLOAT16_TYPE float
+    #define FLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
+#endif
+
+JL_DLLEXPORT float julia__gnu_h2f_ieee(FLOAT16_TYPE param)
 {
-    return float_to_half(param);
+    uint16_t param16 = FLOAT16_TO_UINT16(param);
+    return half_to_float(param16);
 }
 
-JL_DLLEXPORT uint16_t julia__truncdfhf2(double param)
+JL_DLLEXPORT FLOAT16_TYPE julia__gnu_f2h_ieee(float param)
 {
-    float res = (float)param;
-    uint32_t resi;
-    memcpy(&resi, &res, sizeof(res));
-    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
-        // shift so that the mantissa lines up where it would for normal Float16
-        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
-        if (shift<23u) {
-            resi |= 0x00800000; // set implicit bit
-            resi >>= shift;
-        }
-    }
-    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
-        memcpy(&resi, &res, sizeof(res));
-        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
-        memcpy(&res, &resi, sizeof(res));
+    uint16_t res = float_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
+}
+
+JL_DLLEXPORT FLOAT16_TYPE julia__truncdfhf2(double param)
+{
+    uint16_t res = double_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
+}
+
+
+// bfloat16 conversion helpers
+
+static inline uint16_t float_to_bfloat(float param) JL_NOTSAFEPOINT
+{
+    if (isnan(param))
+        return 0x7fc0;
+
+    uint32_t bits = *((uint32_t*) &param);
+
+    // round to nearest even
+    bits += 0x7fff + ((bits >> 16) & 1);
+    return (uint16_t)(bits >> 16);
+}
+
+static inline uint16_t double_to_bfloat(double param) JL_NOTSAFEPOINT
+{
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // bfloat16 uses the same exponent as float32, so we don't need special handling
+    // for subnormals when truncating float64 to bfloat16.
+
+    // if we are halfway between 2 bfloat16 values
+    if ((tempi & 0x1ffu) == 0x100u) {
+        // adjust the value by 1 ULP in the direction that will make bfloat16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
     }
-    return float_to_half(res);
-}
-
-//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) { return (double)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) { return (int32_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) { return (int64_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) { return (uint32_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) { return (uint64_t)julia__gnu_h2f_ieee(n); }
-//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) { return julia__gnu_f2h_ieee((float)n); }
-//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) { return julia__gnu_f2h_ieee((float)n); }
-//HANDLE_LIBCALL(F16, F128, __extendhftf2)
-//HANDLE_LIBCALL(F16, F80, __extendhfxf2)
-//HANDLE_LIBCALL(F80, F16, __truncxfhf2)
-//HANDLE_LIBCALL(F128, F16, __trunctfhf2)
-//HANDLE_LIBCALL(PPCF128, F16, __trunctfhf2)
-//HANDLE_LIBCALL(F16, I128, __fixhfti)
-//HANDLE_LIBCALL(F16, I128, __fixunshfti)
-//HANDLE_LIBCALL(I128, F16, __floattihf)
-//HANDLE_LIBCALL(I128, F16, __floatuntihf)
+
+    return float_to_bfloat(temp);
+}
+
+static inline float bfloat_to_float(uint16_t param) JL_NOTSAFEPOINT
+{
+    uint32_t bits = ((uint32_t)param) << 16;
+    float result;
+    memcpy(&result, &bits, sizeof(result));
+    return result;
+}
+
+// bfloat16 conversion API
+
+// for use in APInt (without the ABI shenanigans from below)
+uint16_t julia_float_to_bfloat(float param) {
+    return float_to_bfloat(param);
+}
+float julia_bfloat_to_float(uint16_t param) {
+    return bfloat_to_float(param);
+}
+
+// starting with GCC 13 and Clang 17, we have __bf16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 12) || \
+     (defined(__clang__) && __clang_major__ > 16)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_) && !defined(_CPU_RISCV64_)
+    #define BFLOAT16_TYPE __bf16
+    #define BFLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) (*(__bf16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI.
+// for more details, see similar code above that deals with Float16.
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    #define BFLOAT16_TYPE __m128
+    #define BFLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define BFLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    #define BFLOAT16_TYPE uint16_t
+    #define BFLOAT16_TO_UINT16(x) (x)
+    #define BFLOAT16_FROM_UINT16(x) (x)
+#else
+    #define BFLOAT16_TYPE float
+    #define BFLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
+#endif
+
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncsfbf2(float param) JL_NOTSAFEPOINT
+{
+    uint16_t res = float_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
+
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncdfbf2(double param) JL_NOTSAFEPOINT
+{
+    uint16_t res = double_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
 
 
 // run time version of bitcast intrinsic
@@ -434,9 +585,9 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
     char *pp = (char*)jl_unbox_long(p);
     jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_value_t *result = NULL;
+    JL_GC_PUSH1(&result);
     if (ety == (jl_value_t*)jl_any_type) {
-        jl_value_t *result;
-        JL_GC_PUSH1(&result);
         result = expected;
         int success;
         while (1) {
@@ -445,8 +596,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
                 break;
         }
         result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
-        JL_GC_POP();
-        return result;
     }
     else {
         if (jl_typeof(x) != ety)
@@ -454,8 +603,20 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         size_t nb = jl_datatype_size(ety);
         if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
             jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
-        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
+        int isptr = jl_field_isptr(rettyp, 0);
+        jl_task_t *ct = jl_current_task;
+        result = jl_gc_alloc(ct->ptls, isptr ? nb : jl_datatype_size(rettyp), isptr ? ety : (jl_value_t*)rettyp);
+        int success = jl_atomic_cmpswap_bits((jl_datatype_t*)ety, result, pp, expected, x, nb);
+        if (isptr) {
+            jl_value_t *z = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
+            *(jl_value_t**)z = result;
+            result = z;
+            nb = sizeof(jl_value_t*);
+        }
+        *((uint8_t*)result + nb) = success ? 1 : 0;
     }
+    JL_GC_POP();
+    return result;
 }
 
 JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
@@ -513,8 +674,7 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 
     void *ptr;
     jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
-    jl_value_t *jv = jl_gc_alloc_1w();
-    jl_set_typeof(jv, rt);
+    jl_value_t *jv = jl_gc_alloc(jl_current_task->ptls, sizeof(void*), rt);
     *(void**)jl_data_ptr(jv) = ptr;
     JL_GC_POP();
     return jv;
@@ -595,25 +755,39 @@ static inline unsigned jl_##name##nbits(unsigned runtime_nbits, void *pa) JL_NOT
 // nbits::number of bits in the *input*
 // c_type::c_type corresponding to nbits
 #define un_fintrinsic_ctype(OP, name, c_type) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     c_type a = *(c_type*)pa; \
-    OP((c_type*)pr, a); \
+    OP(ty, (c_type*)pr, a); \
 }
 
 #define un_fintrinsic_half(OP, name) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     uint16_t a = *(uint16_t*)pa; \
-    float A = julia__gnu_h2f_ieee(a); \
+    float A = half_to_float(a); \
     if (osize == 16) { \
         float R; \
-        OP(&R, A); \
-        *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
+        OP(ty, &R, A); \
+        *(uint16_t*)pr = float_to_half(R); \
     } else { \
-        OP((uint16_t*)pr, A); \
+        OP(ty, (uint16_t*)pr, A); \
     } \
-    }
+}
+
+#define un_fintrinsic_bfloat(OP, name) \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    float A = bfloat_to_float(a); \
+    if (osize == 16) { \
+        float R; \
+        OP(ty, &R, A); \
+        *(uint16_t*)pr = float_to_bfloat(R); \
+    } else { \
+        OP(ty, (uint16_t*)pr, A); \
+    } \
+}
 
 // float or integer inputs
 // OP::Function macro(inputa, inputb)
@@ -633,11 +807,24 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr)
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
     runtime_nbits = 16; \
     float R = OP(A, B); \
-    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define bi_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
+    runtime_nbits = 16; \
+    float R = OP(A, B); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 // float or integer inputs, bool output
@@ -658,8 +845,19 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    runtime_nbits = 16; \
+    return OP(A, B); \
+}
+
+#define bool_intrinsic_bfloat(OP, name) \
+static int jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
     runtime_nbits = 16; \
     return OP(A, B); \
 }
@@ -699,12 +897,27 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc,
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
     uint16_t c = *(uint16_t*)pc; \
-    float A = julia__gnu_h2f_ieee(a); \
-    float B = julia__gnu_h2f_ieee(b); \
-    float C = julia__gnu_h2f_ieee(c); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    float C = half_to_float(c); \
     runtime_nbits = 16; \
     float R = OP(A, B, C); \
-    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define ter_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pc, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    uint16_t c = *(uint16_t*)pc; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
+    float C = bfloat_to_float(c); \
+    runtime_nbits = 16; \
+    float R = OP(A, B, C); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 
@@ -820,7 +1033,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign
 
 // conversion operator
 
-typedef void (*intrinsic_cvt_t)(unsigned, void*, unsigned, void*);
+typedef void (*intrinsic_cvt_t)(jl_datatype_t*, void*, jl_datatype_t*, void*);
 typedef unsigned (*intrinsic_cvt_check_t)(unsigned, unsigned, void*);
 #define cvt_iintrinsic(LLVMOP, name) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
@@ -837,24 +1050,22 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const
     if (!jl_is_primitivetype(aty))
         jl_errorf("%s: value is not a primitive type", name);
     void *pa = jl_data_ptr(a);
-    unsigned isize = jl_datatype_size(aty);
     unsigned osize = jl_datatype_size(ty);
     void *pr = alloca(osize);
-    unsigned isize_bits = isize * host_char_bit;
-    unsigned osize_bits = osize * host_char_bit;
-    op(isize_bits, pa, osize_bits, pr);
+    op((jl_datatype_t*)aty, pa, (jl_datatype_t*)ty, pr);
     return jl_new_bits(ty, pr);
 }
 
 // floating point
 
 #define un_fintrinsic_withtype(OP, name) \
+un_fintrinsic_bfloat(OP, jl_##name##bf16) \
 un_fintrinsic_half(OP, jl_##name##16) \
 un_fintrinsic_ctype(OP, jl_##name##32, float) \
 un_fintrinsic_ctype(OP, jl_##name##64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 { \
-    return jl_fintrinsic_1(ty, a, #name, jl_##name##16, jl_##name##32, jl_##name##64); \
+    return jl_fintrinsic_1(ty, a, #name, jl_##name##bf16, jl_##name##16, jl_##name##32, jl_##name##64); \
 }
 
 #define un_fintrinsic(OP, name) \
@@ -864,9 +1075,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
     return jl_##name##_withtype(jl_typeof(a), a); \
 }
 
-typedef void (fintrinsic_op1)(unsigned, void*, void*);
+typedef void (fintrinsic_op1)(unsigned, jl_value_t*, void*, void*);
 
-static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
+static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *bfloatop, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_primitivetype(jl_typeof(a)))
@@ -880,13 +1091,16 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c
     switch (sz) {
     /* choose the right size c-type operation based on the input */
     case 2:
-        halfop(sz2 * host_char_bit, pa, pr);
+        if (jl_typeof(a) == (jl_value_t*)jl_float16_type)
+            halfop(sz2 * host_char_bit, ty, pa, pr);
+        else /*if (jl_typeof(a) == (jl_value_t*)jl_bfloat16_type)*/
+            bfloatop(sz2 * host_char_bit, ty, pa, pr);
         break;
     case 4:
-        floatop(sz2 * host_char_bit, pa, pr);
+        floatop(sz2 * host_char_bit, ty, pa, pr);
         break;
     case 8:
-        doubleop(sz2 * host_char_bit, pa, pr);
+        doubleop(sz2 * host_char_bit, ty, pa, pr);
         break;
     default:
         jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64", name);
@@ -1058,6 +1272,7 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa
 // floating point
 
 #define bi_fintrinsic(OP, name) \
+    bi_intrinsic_bfloat(OP, name) \
     bi_intrinsic_half(OP, name) \
     bi_intrinsic_ctype(OP, name, 32, float) \
     bi_intrinsic_ctype(OP, name, 64, double) \
@@ -1075,7 +1290,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        jl_##name##16(16, pa, pb, pr); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            jl_##name##16(16, pa, pb, pr); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            jl_##name##bf16(16, pa, pb, pr); \
         break; \
     case 4: \
         jl_##name##32(32, pa, pb, pr); \
@@ -1090,6 +1308,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 }
 
 #define bool_fintrinsic(OP, name) \
+    bool_intrinsic_bfloat(OP, name) \
     bool_intrinsic_half(OP, name) \
     bool_intrinsic_ctype(OP, name, 32, float) \
     bool_intrinsic_ctype(OP, name, 64, double) \
@@ -1106,7 +1325,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        cmp = jl_##name##16(16, pa, pb); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            cmp = jl_##name##16(16, pa, pb); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            cmp = jl_##name##bf16(16, pa, pb); \
         break; \
     case 4: \
         cmp = jl_##name##32(32, pa, pb); \
@@ -1121,6 +1343,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 }
 
 #define ter_fintrinsic(OP, name) \
+    ter_intrinsic_bfloat(OP, name) \
     ter_intrinsic_half(OP, name) \
     ter_intrinsic_ctype(OP, name, 32, float) \
     ter_intrinsic_ctype(OP, name, 64, double) \
@@ -1138,7 +1361,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        jl_##name##16(16, pa, pb, pc, pr); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            jl_##name##16(16, pa, pb, pc, pr); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            jl_##name##bf16(16, pa, pb, pc, pr); \
         break; \
     case 4: \
         jl_##name##32(32, pa, pb, pc, pr); \
@@ -1154,14 +1380,12 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
 
 // arithmetic
 #define neg(a) -a
-#define neg_float(pr, a) *pr = -a
+#define neg_float(ty, pr, a) *pr = -a
 un_iintrinsic_fast(LLVMNeg, neg, neg_int, u)
 #define add(a,b) a + b
 bi_iintrinsic_fast(LLVMAdd, add, add_int, u)
-bi_iintrinsic_fast(LLVMAdd, add, add_ptr, u)
 #define sub(a,b) a - b
 bi_iintrinsic_fast(LLVMSub, sub, sub_int, u)
-bi_iintrinsic_fast(LLVMSub, sub, sub_ptr, u)
 #define mul(a,b) a * b
 bi_iintrinsic_fast(LLVMMul, mul, mul_int, u)
 #define div(a,b) a / b
@@ -1174,13 +1398,50 @@ bi_iintrinsic_fast(LLVMURem, rem, urem_int, u)
 bi_iintrinsic_fast(jl_LLVMSMod, smod, smod_int,  )
 #define frem(a, b) \
     fp_select2(a, b, fmod)
-
 un_fintrinsic(neg_float,neg_float)
 bi_fintrinsic(add,add_float)
 bi_fintrinsic(sub,sub_float)
 bi_fintrinsic(mul,mul_float)
 bi_fintrinsic(div,div_float)
 
+float min_float(float x, float y) JL_NOTSAFEPOINT
+{
+    float diff = x - y;
+    float argmin = signbit(diff) ? x : y;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+double min_double(double x, double y) JL_NOTSAFEPOINT
+{
+    double diff = x - y;
+    double argmin = signbit(diff) ? x : y;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+#define _min(a, b) sizeof(a) == sizeof(float) ? min_float(a, b) : min_double(a, b)
+bi_fintrinsic(_min, min_float)
+
+float max_float(float x, float y) JL_NOTSAFEPOINT
+{
+    float diff = x - y;
+    float argmin = signbit(diff) ? y : x;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+double max_double(double x, double y) JL_NOTSAFEPOINT
+{
+    double diff = x - y;
+    double argmin = signbit(diff) ? x : y;
+    int is_nan = isnan(x) || isnan(y);
+    return is_nan ? diff : argmin;
+}
+
+#define _max(a, b) sizeof(a) == sizeof(float) ? max_float(a, b) : max_double(a, b)
+bi_fintrinsic(_max, max_float)
+
 // ternary operators //
 // runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
 #if defined(_OS_WINDOWS_)
@@ -1350,14 +1611,14 @@ bi_iintrinsic_cnvtb_fast(LLVMAShr, ashr_op, ashr_int, , 1)
 //un_iintrinsic_fast(LLVMByteSwap, bswap_op, bswap_int, u)
 un_iintrinsic_slow(LLVMByteSwap, bswap_int, u)
 //#define ctpop_op(a) __builtin_ctpop(a)
-//uu_iintrinsic_fast(LLVMCountPopulation, ctpop_op, ctpop_int, u)
-uu_iintrinsic_slow(LLVMCountPopulation, ctpop_int, u)
+//uu_iintrinsic_fast(LLVMPopcount, ctpop_op, ctpop_int, u)
+uu_iintrinsic_slow(LLVMPopcount, ctpop_int, u)
 //#define ctlz_op(a) __builtin_ctlz(a)
-//uu_iintrinsic_fast(LLVMCountLeadingZeros, ctlz_op, ctlz_int, u)
-uu_iintrinsic_slow(LLVMCountLeadingZeros, ctlz_int, u)
+//uu_iintrinsic_fast(LLVMCountl_zero, ctlz_op, ctlz_int, u)
+uu_iintrinsic_slow(LLVMCountl_zero, ctlz_int, u)
 //#define cttz_op(a) __builtin_cttz(a)
-//uu_iintrinsic_fast(LLVMCountTrailingZeros, cttz_op, cttz_int, u)
-uu_iintrinsic_slow(LLVMCountTrailingZeros, cttz_int, u)
+//uu_iintrinsic_fast(LLVMCountr_zero, cttz_op, cttz_int, u)
+uu_iintrinsic_slow(LLVMCountr_zero, cttz_int, u)
 #define not_op(a) ~a
 un_iintrinsic_fast(LLVMFlipAllBits, not_op, not_int, u)
 
@@ -1370,18 +1631,22 @@ cvt_iintrinsic(LLVMUItoFP, uitofp)
 cvt_iintrinsic(LLVMFPtoSI, fptosi)
 cvt_iintrinsic(LLVMFPtoUI, fptoui)
 
-#define fptrunc(pr, a) \
+#define fptrunc(tr, pr, a) \
         if (!(osize < 8 * sizeof(a))) \
             jl_error("fptrunc: output bitsize must be < input bitsize"); \
-        else if (osize == 16) \
-            *(uint16_t*)pr = julia__gnu_f2h_ieee(a); \
+        else if (osize == 16) { \
+            if ((jl_datatype_t*)tr == jl_float16_type) \
+                *(uint16_t*)pr = float_to_half(a); \
+            else /*if ((jl_datatype_t*)tr == jl_bfloat16_type)*/ \
+                *(uint16_t*)pr = float_to_bfloat(a); \
+        } \
         else if (osize == 32) \
             *(float*)pr = a; \
         else if (osize == 64) \
             *(double*)pr = a; \
         else \
             jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-#define fpext(pr, a) \
+#define fpext(tr, pr, a) \
         if (!(osize >= 8 * sizeof(a))) \
             jl_error("fpext: output bitsize must be >= input bitsize"); \
         if (osize == 32) \
@@ -1438,12 +1703,12 @@ checked_iintrinsic_div(LLVMRem_uov, checked_urem_int, u)
 #define flipsign(a, b) \
         (b >= 0) ? a : -a
 bi_iintrinsic_fast(jl_LLVMFlipSign, flipsign, flipsign_int,  )
-#define abs_float(pr, a)      *pr = fp_select(a, fabs)
-#define ceil_float(pr, a)     *pr = fp_select(a, ceil)
-#define floor_float(pr, a)    *pr = fp_select(a, floor)
-#define trunc_float(pr, a)    *pr = fp_select(a, trunc)
-#define rint_float(pr, a)     *pr = fp_select(a, rint)
-#define sqrt_float(pr, a)     *pr = fp_select(a, sqrt)
+#define abs_float(ty, pr, a)      *pr = fp_select(a, fabs)
+#define ceil_float(ty, pr, a)     *pr = fp_select(a, ceil)
+#define floor_float(ty, pr, a)    *pr = fp_select(a, floor)
+#define trunc_float(ty, pr, a)    *pr = fp_select(a, trunc)
+#define rint_float(ty, pr, a)     *pr = fp_select(a, rint)
+#define sqrt_float(ty, pr, a)     *pr = fp_select(a, sqrt)
 #define copysign_float(a, b)  fp_select2(a, b, copysign)
 
 un_fintrinsic(abs_float,abs_float)
@@ -1454,16 +1719,31 @@ un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm_fast)
+jl_value_t *jl_cpu_has_fma(int bits);
 
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
 {
-    JL_TYPECHK(arraylen, array, a);
-    return jl_box_long(jl_array_len((jl_array_t*)a));
+    JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
+    if (typ == (jl_value_t*)jl_float32_type)
+        return jl_cpu_has_fma(32);
+    else if (typ == (jl_value_t*)jl_float64_type)
+        return jl_cpu_has_fma(64);
+    else
+        return jl_false;
 }
 
-JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
+JL_DLLEXPORT jl_value_t *jl_add_ptr(jl_value_t *ptr, jl_value_t *offset)
+{
+    JL_TYPECHK(add_ptr, pointer, ptr);
+    JL_TYPECHK(add_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) + jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
+}
+
+JL_DLLEXPORT jl_value_t *jl_sub_ptr(jl_value_t *ptr, jl_value_t *offset)
 {
-    JL_TYPECHK(have_fma, datatype, typ);
-    // TODO: run-time feature check?
-    return jl_false;
+    JL_TYPECHK(sub_ptr, pointer, ptr);
+    JL_TYPECHK(sub_ptr, ulong, offset);
+    char *ptrval = (char*)jl_unbox_long(ptr) - jl_unbox_ulong(offset);
+    return jl_new_bits(jl_typeof(ptr), &ptrval);
 }
diff --git a/src/safepoint.c b/src/safepoint.c
index c6f9a42059d1a..7eab653edd089 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -30,7 +30,8 @@ char *jl_safepoint_pages = NULL;
 // so that both safepoint load and pending signal load falls in this page.
 // The initialization of the `safepoint` pointer is done `ti_initthread`
 // in `threading.c`.
-uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
+// The fourth page is the count of suspended threads
+uint16_t jl_safepoint_enable_cnt[4] = {0, 0, 0, 0};
 
 // This lock should be acquired before enabling/disabling the safepoint
 // or accessing one of the following variables:
@@ -43,17 +44,18 @@ uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
 // load/store so that threads waiting for the GC doesn't have to also
 // fight on the safepoint lock...
 uv_mutex_t safepoint_lock;
-uv_cond_t safepoint_cond;
+uv_cond_t safepoint_cond_begin;
+uv_cond_t safepoint_cond_end;
 
 static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (jl_safepoint_enable_cnt[idx]++ != 0) {
         // We expect this to be enabled at most twice
         // one for the GC, one for SIGINT.
         // Update this if this is not the case anymore in the future.
-        assert(jl_safepoint_enable_cnt[idx] <= 2);
+        assert(jl_safepoint_enable_cnt[idx] <= (idx == 3 ? INT16_MAX : 2));
         return;
     }
     // Now that we are requested to mprotect the page and it wasn't already.
@@ -62,14 +64,15 @@ static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_NOACCESS, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_NONE);
+    int r = mprotect(pageaddr, jl_page_size, PROT_NONE);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (--jl_safepoint_enable_cnt[idx] != 0) {
         assert(jl_safepoint_enable_cnt[idx] > 0);
         return;
@@ -81,20 +84,22 @@ static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_READONLY, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_READ);
+    int r = mprotect(pageaddr, jl_page_size, PROT_READ);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 void jl_safepoint_init(void)
 {
     uv_mutex_init(&safepoint_lock);
-    uv_cond_init(&safepoint_cond);
+    uv_cond_init(&safepoint_cond_begin);
+    uv_cond_init(&safepoint_cond_end);
     // jl_page_size isn't available yet.
     size_t pgsz = jl_getpagesize();
 #ifdef _OS_WINDOWS_
-    char *addr = (char*)VirtualAlloc(NULL, pgsz * 3, MEM_COMMIT, PAGE_READONLY);
+    char *addr = (char*)VirtualAlloc(NULL, pgsz * 4, MEM_COMMIT, PAGE_READONLY);
 #else
-    char *addr = (char*)mmap(0, pgsz * 3, PROT_READ,
+    char *addr = (char*)mmap(0, pgsz * 4, PROT_READ,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (addr == MAP_FAILED)
         addr = NULL;
@@ -104,16 +109,67 @@ void jl_safepoint_init(void)
         jl_gc_debug_critical_error();
         abort();
     }
+//    // If we able to skip past the faulting safepoint instruction conditionally,
+//    // then we can make this safepoint page unconditional. But otherwise we
+//    // only enable this page when required, though it gives us less
+//    // fine-grained control over individual resume.
+//    char *pageaddr = addr + pgsz * 3;
+//#ifdef _OS_WINDOWS_
+//    DWORD old_prot;
+//    VirtualProtect(pageaddr, pgsz, PAGE_NOACCESS, &old_prot);
+//#else
+//    int r = mprotect(pageaddr, pgsz, PROT_NONE);
+//    (void)r; //if (r) perror("mprotect");
+//#endif
     // The signal page is for the gc safepoint.
     // The page before it is the sigint pending flag.
     jl_safepoint_pages = addr;
 }
 
-int jl_safepoint_start_gc(void)
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
 {
-    // The thread should have set this already
-    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
+    JL_TIMING(GC, GC_Stop);
+#ifdef USE_TRACY
+    TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx;
+    TracyCZoneColor(ctx, 0x696969);
+#endif
+    assert(gc_n_threads);
+    if (gc_n_threads > 1)
+        jl_wake_libuv();
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            // This acquire load pairs with the release stores
+            // in the signal handler of safepoint so we are sure that
+            // all the stores on those threads are visible.
+            // We're currently also using atomic store release in mutator threads
+            // (in jl_gc_state_set), but we may want to use signals to flush the
+            // memory operations on those threads lazily instead.
+            while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) {
+                // Use system mutexes rather than spin locking to minimize wasted CPU time
+                // while we wait for other threads reach a safepoint.
+                // This is particularly important when run under rr.
+                uv_mutex_lock(&safepoint_lock);
+                if (!jl_atomic_load_relaxed(&ptls2->gc_state))
+                    uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
+                uv_mutex_unlock(&safepoint_lock);
+            }
+        }
+    }
+}
+
+int jl_safepoint_start_gc(jl_task_t *ct)
+{
+    // The thread should have just set this before entry
+    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING);
     uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    // make sure we are permitted to run GC now (we might be required to stop instead)
+    while (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        uv_mutex_unlock(&safepoint_lock);
+        jl_safepoint_wait_thread_resume(ct);
+        uv_mutex_lock(&safepoint_lock);
+    }
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
@@ -121,7 +177,7 @@ int jl_safepoint_start_gc(void)
     uint32_t running = 0;
     if (!jl_atomic_cmpswap(&jl_gc_running, &running, 1)) {
         uv_mutex_unlock(&safepoint_lock);
-        jl_safepoint_wait_gc();
+        jl_safepoint_wait_gc(ct);
         return 0;
     }
     // Foreign thread adoption disables the GC and waits for it to finish, however, that may
@@ -148,20 +204,36 @@ void jl_safepoint_end_gc(void)
     jl_safepoint_disable(2);
     jl_safepoint_disable(1);
     jl_atomic_store_release(&jl_gc_running, 0);
-#  ifdef __APPLE__
+#  ifdef _OS_DARWIN_
     // This wakes up other threads on mac.
     jl_mach_gc_end();
 #  endif
     uv_mutex_unlock(&safepoint_lock);
-    uv_cond_broadcast(&safepoint_cond);
+    uv_cond_broadcast(&safepoint_cond_end);
 }
 
-void jl_safepoint_wait_gc(void)
+void jl_set_gc_and_wait(jl_task_t *ct) // n.b. not used on _OS_DARWIN_
 {
-    jl_task_t *ct = jl_current_task; (void)ct;
-    JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
-    // The thread should have set this is already
-    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
+    // reading own gc state doesn't need atomic ops since no one else
+    // should store to it.
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    uv_mutex_unlock(&safepoint_lock);
+    jl_safepoint_wait_gc(ct);
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    jl_safepoint_wait_thread_resume(ct); // block in thread-suspend now if requested, after clearing the gc_state
+}
+
+// this is the core of jl_set_gc_and_wait
+void jl_safepoint_wait_gc(jl_task_t *ct) JL_NOTSAFEPOINT
+{
+    if (ct) {
+        JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
+        // The thread should have set this is already
+        assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != JL_GC_STATE_UNSAFE);
+    }
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -170,9 +242,145 @@ void jl_safepoint_wait_gc(void)
         // This is particularly important when run under rr.
         uv_mutex_lock(&safepoint_lock);
         if (jl_atomic_load_relaxed(&jl_gc_running))
-            uv_cond_wait(&safepoint_cond, &safepoint_lock);
+            uv_cond_wait(&safepoint_cond_end, &safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
+    }
+}
+
+// equivalent to jl_set_gc_and_wait, but waiting on resume-thread lock instead
+void jl_safepoint_wait_thread_resume(jl_task_t *ct)
+{
+    // n.b. we do not permit a fast-path here that skips the lock acquire since
+    // we otherwise have no synchronization point to ensure that this thread
+    // will observe the change to the safepoint, even though the other thread
+    // might have already observed our gc_state.
+    // if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return;
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&ct->ptls->sleep_lock);
+    if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        // defer this broadcast until we determine whether uv_cond_wait is really going to be needed
+        uv_mutex_unlock(&ct->ptls->sleep_lock);
+        uv_mutex_lock(&safepoint_lock);
+        uv_cond_broadcast(&safepoint_cond_begin);
+        uv_mutex_unlock(&safepoint_lock);
+        uv_mutex_lock(&ct->ptls->sleep_lock);
+        while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
+            uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
+    }
+    // must exit gc while still holding the mutex_unlock, so we know other
+    // threads in jl_safepoint_suspend_thread will observe this thread in the
+    // correct GC state, and not still stuck in JL_GC_STATE_WAITING
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    uv_mutex_unlock(&ct->ptls->sleep_lock);
+}
+// This takes the sleep lock and puts the thread in GC_SAFE
+int8_t jl_safepoint_take_sleep_lock(jl_ptls_t ptls)
+{
+    int8_t gc_state = jl_gc_safe_enter(ptls);
+    uv_mutex_lock(&ptls->sleep_lock);
+    if (jl_atomic_load_relaxed(&ptls->suspend_count)) {
+        // This dance with the locks is because we are not allowed to hold both these locks at the same time
+        // This avoids a situation where  jl_safepoint_suspend_thread loads our GC state and sees GC_UNSAFE
+        // But we are in the process of becoming GC_SAFE, and also trigger the old safepoint, this causes us
+        // to go sleep in scheduler and the suspender thread to go to sleep in safepoint_cond_begin meaning we hang
+        // To avoid this we do the broadcast below to force it to observe the new gc_state
+        uv_mutex_unlock(&ptls->sleep_lock);
+        uv_mutex_lock(&safepoint_lock);
+        uv_cond_broadcast(&safepoint_cond_begin);
         uv_mutex_unlock(&safepoint_lock);
+        uv_mutex_lock(&ptls->sleep_lock);
+    }
+    return gc_state;
+}
+
+// n.b. suspended threads may still run in the GC or GC safe regions
+// but shouldn't be observable, depending on which enum the user picks (only 1 and 2 are typically recommended here)
+// waitstate = 0 : do not wait for suspend to finish
+// waitstate = 1 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE)
+// waitstate = 2 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE) and that GC is not running on that thread
+// waitstate = 3 : wait for full suspend (gc_state == JL_GC_STATE_WAITING) -- this may never happen if thread is sleeping currently
+// if another thread comes along and calls jl_safepoint_resume, we also return early
+// return new suspend count on success, 0 on failure
+int jl_safepoint_suspend_thread(int tid, int waitstate)
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
+    uv_mutex_lock(&safepoint_lock);
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count) + 1;
+    jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count);
+    if (suspend_count == 1) { // first to suspend
+        jl_safepoint_enable(3);
+        jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 3 + sizeof(void*)));
+        if (jl_atomic_load(&_threadedregion) != 0 || tid == jl_atomic_load_relaxed(&io_loop_tid))
+            jl_wake_libuv(); // our integration with libuv right now doesn't handle except by waking it
+    }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    if (waitstate) {
+        // wait for suspend (or another thread to call resume)
+        if (waitstate >= 2) {
+            // We currently cannot distinguish if a thread is helping run GC or
+            // not, so assume it is running GC and wait for GC to finish first.
+            // It will be unable to reenter helping with GC because we have
+            // changed its safepoint page.
+            uv_mutex_unlock(&safepoint_lock);
+            jl_set_gc_and_wait(jl_current_task);
+            uv_mutex_lock(&safepoint_lock);
+        }
+        while (jl_atomic_load_acquire(&ptls2->suspend_count) != 0) {
+            int8_t state2 = jl_atomic_load_acquire(&ptls2->gc_state);
+            if (waitstate <= 2 && state2 != JL_GC_STATE_UNSAFE)
+                break;
+            if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
+                break;
+            uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
+        }
     }
+    uv_mutex_unlock(&safepoint_lock);
+    return suspend_count;
+}
+
+// return old suspend count on success, 0 on failure
+// n.b. threads often do not resume until after all suspended threads have been resumed!
+int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive yet or already dead
+        return 0;
+    }
+    uv_mutex_lock(&safepoint_lock);
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count);
+    if (suspend_count == 1) { // last to unsuspend
+        if (tid == 0)
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
+        else
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 + sizeof(void*)));
+        uv_cond_signal(&ptls2->wake_signal);
+#ifdef _OS_DARWIN_
+        jl_safepoint_resume_thread_mach(ptls2, tid);
+#endif
+        uv_cond_broadcast(&safepoint_cond_begin);
+    }
+    if (suspend_count != 0) {
+        jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count - 1);
+        if (suspend_count == 1)
+            jl_safepoint_disable(3);
+    }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_mutex_unlock(&safepoint_lock);
+    return suspend_count;
 }
 
 void jl_safepoint_enable_sigint(void)
diff --git a/src/scheduler.c b/src/scheduler.c
new file mode 100644
index 0000000000000..731a0c5146605
--- /dev/null
+++ b/src/scheduler.c
@@ -0,0 +1,574 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// thread sleep state
+
+// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
+uint64_t sleep_threshold;
+
+// thread should not be sleeping--it might need to do work.
+static const int16_t not_sleeping = 0;
+
+// it is acceptable for the thread to be sleeping.
+static const int16_t sleeping = 1;
+
+// this thread is dead.
+static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
+
+// a running count of how many threads are currently not_sleeping
+// plus a running count of the number of in-flight wake-ups
+// n.b. this may temporarily exceed jl_n_threads
+_Atomic(int) n_threads_running = 0;
+
+// invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
+// invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
+// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
+// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
+// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
+// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
+// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
+// * Dequeuer:
+//   * 1: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
+// * Enqueuer:
+//   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
+// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
+// [^store_buffering_2]: and also
+// * Enqueuer:
+//   * 1a: `jl_atomic_store_relaxed(jl_uv_n_waiters, 1)` in `JL_UV_LOCK`
+//   * 1b: "cheap read" of `handle->pending` in `uv_async_send` (via `JL_UV_LOCK`) loads `0`
+// * Dequeuer:
+//   * 2a: store `2` to `handle->pending` in `uv_async_send` (via `JL_UV_LOCK` in `jl_task_get_next`)
+//   * 2b: `jl_atomic_load_relaxed(jl_uv_n_waiters)` in `jl_task_get_next` returns `0`
+// i.e., the dequeuer misses the `n_waiters` is set and enqueuer misses the `uv_stop` flag (in `signal_async`) transition to cleared
+
+JULIA_DEBUG_SLEEPWAKE(
+uint64_t wakeup_enter;
+uint64_t wakeup_leave;
+uint64_t io_wakeup_enter;
+uint64_t io_wakeup_leave;
+);
+
+JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
+{
+    // Try to acquire the lock on this task.
+    int16_t was = jl_atomic_load_relaxed(&task->tid);
+    if (was == tid)
+        return 1;
+    if (was == -1)
+        return jl_atomic_cmpswap(&task->tid, &was, tid) || was == tid;
+    return 0;
+}
+
+JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
+{
+    if (tpid < -1 || tpid >= jl_n_threadpools)
+        return 0;
+    task->threadpoolid = tpid;
+    return 1;
+}
+
+// initialize the threading infrastructure
+// (called only by the main thread)
+void jl_init_threadinginfra(void)
+{
+    /* initialize the synchronization trees pool */
+    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
+    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
+    if (cp) {
+        if (!strncasecmp(cp, "infinite", 8))
+            sleep_threshold = UINT64_MAX;
+        else
+            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
+    }
+}
+
+
+void JL_NORETURN jl_finish_task(jl_task_t *ct);
+
+// thread function: used by all mutator threads except the main thread
+void jl_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid, create heap, set up root task)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_STATE_SAFE, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    (void)jl_gc_unsafe_enter(ptls);
+    jl_finish_task(ct); // noreturn
+}
+
+
+
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    uv_mutex_init(&ptls->sleep_lock);
+    uv_cond_init(&ptls->wake_signal);
+    // record that there is now another thread that may be used to schedule work
+    // we will decrement this again in scheduler_delete_thread, only slightly
+    // in advance of pthread_join (which hopefully itself also had been
+    // adopted by now and is included in n_threads_running too)
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    // n.b. this is the only point in the code where we ignore the invariants on the ordering of n_threads_running
+    // since we are being initialized from foreign code, we could not necessarily have expected or predicted that to happen
+}
+
+int jl_running_under_rr(int recheck)
+{
+#ifdef _OS_LINUX_
+#define RR_CALL_BASE 1000
+#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
+    static _Atomic(int) is_running_under_rr = 0;
+    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
+    if (rr == 0 || recheck) {
+        int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
+        if (ret == -1)
+            // Should always be ENOSYS, but who knows what people do for
+            // unknown syscalls with their seccomp filters, so just say
+            // that we don't have rr.
+            rr = 2;
+        else
+            rr = 1;
+        jl_atomic_store_relaxed(&is_running_under_rr, rr);
+    }
+    return rr == 1;
+#else
+    return 0;
+#endif
+}
+
+
+//  sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
+static int sleep_check_after_threshold(uint64_t *start_cycles) JL_NOTSAFEPOINT
+{
+    JULIA_DEBUG_SLEEPWAKE( return 1 ); // hammer on the sleep/wake logic much harder
+    /**
+     * This wait loop is a bit of a worst case for rr - it needs timer access,
+     * which are slow and it busy loops in user space, which prevents the
+     * scheduling logic from switching to other threads. Just don't bother
+     * trying to wait here
+     */
+    if (jl_running_under_rr(0))
+        return 1;
+    if (!(*start_cycles)) {
+        *start_cycles = jl_hrtime();
+        return 0;
+    }
+    uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
+    if (elapsed_cycles >= sleep_threshold) {
+        *start_cycles = 0;
+        return 1;
+    }
+    return 0;
+}
+
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // equivalent to wake_thread, without the assert on wasrunning
+    int8_t state = jl_atomic_load_relaxed(&ptls->sleep_check_state);
+    if (state == sleeping) {
+        if (jl_atomic_cmpswap_relaxed(&ptls->sleep_check_state, &state, not_sleeping)) {
+            // this notification will never be consumed, so we may have now
+            // introduced some inaccuracy into the count, but that is
+            // unavoidable with any asynchronous interruption
+            jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+        }
+    }
+}
+
+
+static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+        if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+            return 1;
+        }
+    }
+    int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1); // consume in-flight wakeup
+    assert(wasrunning > 1); (void)wasrunning;
+    return 0;
+}
+
+static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+    if (jl_atomic_load_relaxed(&ptls2->sleep_check_state) != not_sleeping) {
+        int8_t state = sleeping;
+        if (jl_atomic_cmpswap_relaxed(&ptls2->sleep_check_state, &state, not_sleeping)) {
+            int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1); // increment in-flight wakeup count
+            assert(wasrunning); (void)wasrunning;
+            JL_PROBE_RT_SLEEP_CHECK_WAKE(ptls2, state);
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
+static void wake_libuv(void) JL_NOTSAFEPOINT
+{
+    JULIA_DEBUG_SLEEPWAKE( io_wakeup_enter = cycleclock() );
+    jl_wake_libuv();
+    JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
+}
+
+void wakeup_thread(jl_task_t *ct, int16_t tid) JL_NOTSAFEPOINT { // Pass in ptls when we have it already available to save a lookup
+    int16_t self = jl_atomic_load_relaxed(&ct->tid);
+    if (tid != self)
+        jl_fence(); // [^store_buffering_1]
+    jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
+    JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
+    if (tid == self || tid == -1) {
+        // we're already awake, but make sure we'll exit uv_run
+        // and that n_threads_running is updated if this is now considered in-flight
+        jl_ptls_t ptls = ct->ptls;
+        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+            if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                assert(wasrunning); (void)wasrunning;
+                JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
+            }
+        }
+        if (uvlock == ct)
+            uv_stop(jl_global_event_loop());
+    }
+    else {
+        // something added to the sticky-queue: notify that thread
+        if (wake_thread(tid) && uvlock != ct) {
+            // check if we need to notify uv_run too
+            jl_fence();
+            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+            jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
+            // now that we have changed the thread to not-sleeping, ensure that
+            // either it has not yet acquired the libuv lock, or that it will
+            // observe the change of state to not_sleeping
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
+                wake_libuv();
+        }
+    }
+    // check if the other threads might be sleeping
+    if (tid == -1) {
+        // something added to the multi-queue: notify all threads
+        // in the future, we might want to instead wake some fraction of threads,
+        // and let each of those wake additional threads if they find work
+        int anysleep = 0;
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        for (tid = 0; tid < nthreads; tid++) {
+            if (tid != self)
+                anysleep |= wake_thread(tid);
+        }
+        // check if we need to notify uv_run too
+        if (uvlock != ct && anysleep) {
+            jl_fence();
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
+                wake_libuv();
+        }
+    }
+    JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
+}
+
+/* ensure thread tid is awake if necessary */
+JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    wakeup_thread(ct, tid);
+}
+
+// get the next runnable task
+static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
+{
+    jl_gc_safepoint();
+    jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
+    if (jl_is_task(task)) {
+        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
+        jl_set_task_tid(task, self);
+        return task;
+    }
+    return NULL;
+}
+
+static int check_empty(jl_value_t *checkempty)
+{
+    return jl_apply_generic(checkempty, NULL, 0) == jl_true;
+}
+
+jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
+void jl_wait_empty_begin(void);
+void jl_wait_empty_end(void);
+
+void jl_task_wait_empty(void)
+{
+    jl_task_t *ct = jl_current_task;
+    if (jl_atomic_load_relaxed(&ct->tid) == 0 && jl_base_module) {
+        jl_wait_empty_begin();
+        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("wait"));
+        wait_empty = ct;
+        size_t lastage = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (f)
+            jl_apply_generic(f, NULL, 0);
+        // we are back from jl_task_get_next now
+        ct->world_age = lastage;
+        wait_empty = NULL;
+        // TODO: move this lock acquire to before the wait_empty return and the
+        // unlock to the caller, so that we ensure new work (from uv_unref
+        // objects) didn't unexpectedly get scheduled and start running behind
+        // our back during the function return
+        JL_UV_LOCK();
+        jl_wait_empty_end();
+        JL_UV_UNLOCK();
+    }
+}
+
+static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // sleep_check_state is only transitioned from not_sleeping to sleeping
+    // by the thread itself. As a result, if this returns false, it will
+    // continue returning false. If it returns true, we know the total
+    // modification order of the fences.
+    jl_fence(); // [^store_buffering_1] [^store_buffering_2]
+    return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
+}
+
+
+JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
+{
+    jl_task_t *ct = jl_current_task;
+    uint64_t start_cycles = 0;
+
+    while (1) {
+        jl_task_t *task = get_next_task(trypoptask, q);
+        if (task)
+            return task;
+
+        // quick, race-y check to see if there seems to be any stuff in there
+        jl_cpu_pause();
+        if (!check_empty(checkempty)) {
+            start_cycles = 0;
+            continue;
+        }
+
+        jl_cpu_pause();
+        jl_ptls_t ptls = ct->ptls;
+        if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid) && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
+            // acquire sleep-check lock
+            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+            jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
+            jl_fence(); // [^store_buffering_1]
+            JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
+            if (!check_empty(checkempty)) { // uses relaxed loads
+                if (set_not_sleeping(ptls)) {
+                    JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
+                }
+                continue;
+            }
+            volatile int isrunning = 1;
+            JL_TRY {
+                task = get_next_task(trypoptask, q); // note: this should not yield
+                if (ptls != ct->ptls) {
+                    // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
+                    ptls = ct->ptls;
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
+                }
+                if (task) {
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    }
+                    continue; // jump to JL_CATCH
+                }
+
+                // IO is always permitted, but outside a threaded region, only
+                // thread 0 will process messages.
+                // Inside a threaded region, any thread can listen for IO messages,
+                // and one thread should win this race and watch the event loop,
+                // but we bias away from idle threads getting parked here.
+                //
+                // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
+                //  - After decrementing _threadedregion, the thread is required to
+                //    call jl_wakeup_thread(0), that will kick out any thread who is
+                //    already there, and then eventually thread 0 will get here.
+                //  - Inside a _threadedregion, there must exist at least one
+                //    thread that has a happens-before relationship on the libuv lock
+                //    before reaching this decision point in the code who will see
+                //    the lock as unlocked and thus must win this race here.
+                int uvlock = 0;
+                if (jl_atomic_load_relaxed(&_threadedregion)) {
+                    uvlock = jl_mutex_trylock(&jl_uv_mutex);
+                }
+                else if (ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                    uvlock = 1;
+                    JL_UV_LOCK();
+                }
+                else {
+                    // Since we might have started some IO work, we might need
+                    // to ensure tid = 0 will go watch that new event source.
+                    // If trylock would have succeeded, that may have been our
+                    // responsibility, so need to make sure thread 0 will take care
+                    // of us.
+                    if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
+                        jl_wakeup_thread(jl_atomic_load_relaxed(&io_loop_tid));
+
+                }
+                if (uvlock) {
+                    int enter_eventloop = may_sleep(ptls);
+                    int active = 0;
+                    if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
+                        // if we won the race against someone who actually needs
+                        // the lock to do real work, we need to let them have it instead
+                        enter_eventloop = 0;
+                    if (enter_eventloop) {
+                        uv_loop_t *loop = jl_global_event_loop();
+                        loop->stop_flag = 0;
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
+                        active = uv_run(loop, UV_RUN_ONCE);
+                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
+                        jl_gc_safepoint();
+                    }
+                    JL_UV_UNLOCK();
+                    // optimization: check again first if we may have work to do.
+                    // Otherwise we got a spurious wakeup since some other thread
+                    // that just wanted to steal libuv from us. We will just go
+                    // right back to sleep on the individual wake signal to let
+                    // them take it from us without conflict.
+                    if (active || !may_sleep(ptls)) {
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
+                    }
+                    if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == jl_atomic_load_relaxed(&io_loop_tid)) {
+                        // thread 0 is the only thread permitted to run the event loop
+                        // so it needs to stay alive, just spin-looping if necessary
+                        if (set_not_sleeping(ptls)) {
+                            JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                        }
+                        start_cycles = 0;
+                        continue; // jump to JL_CATCH
+                    }
+                }
+
+                // any thread which wants us running again will have to observe
+                // sleep_check_state==sleeping and increment n_threads_running for us
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+                assert(wasrunning);
+                isrunning = 0;
+                if (wasrunning == 1) {
+                    // This was the last running thread, and there is no thread with !may_sleep
+                    // so make sure io_loop_tid is notified to check wait_empty
+                    // TODO: this also might be a good time to check again that
+                    // libuv's queue is truly empty, instead of during delete_thread
+                    int16_t tid2 = 0;
+                    if (ptls->tid != tid2) {
+                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid2];
+                        uv_mutex_lock(&ptls2->sleep_lock);
+                        uv_cond_signal(&ptls2->wake_signal);
+                        uv_mutex_unlock(&ptls2->sleep_lock);
+                    }
+                }
+
+                // the other threads will just wait for an individual wake signal to resume
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
+                int8_t gc_state = jl_safepoint_take_sleep_lock(ptls); // This puts the thread in GC_SAFE and takes the sleep lock
+                while (may_sleep(ptls)) {
+                    if (ptls->tid == 0) {
+                        task = wait_empty;
+                        if (task && jl_atomic_load_relaxed(&n_threads_running) == 0) {
+                            wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                            assert(!wasrunning);
+                            wasrunning = !set_not_sleeping(ptls);
+                            assert(!wasrunning);
+                            JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                            if (!ptls->finalizers_inhibited)
+                                ptls->finalizers_inhibited++; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
+                            break;
+                        }
+                        task = NULL;
+                    }
+                    // else should we warn the user of certain deadlock here if tid == 0 && n_threads_running == 0?
+                    uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
+                }
+                assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+                assert(jl_atomic_load_relaxed(&n_threads_running));
+                start_cycles = 0;
+                uv_mutex_unlock(&ptls->sleep_lock);
+                JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
+                jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
+                if (task) {
+                    assert(task == wait_empty);
+                    wait_empty = NULL;
+                    continue;
+                }
+            }
+            JL_CATCH {
+                // probably SIGINT, but possibly a user mistake in trypoptask
+                if (!isrunning)
+                    jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+                set_not_sleeping(ptls);
+                jl_rethrow();
+            }
+            if (task)
+                return task;
+        }
+        else {
+            // maybe check the kernel for new messages too
+            jl_process_events();
+        }
+    }
+}
+
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    int notsleeping = jl_atomic_exchange_relaxed(&ptls->sleep_check_state, sleeping_like_the_dead) == not_sleeping;
+    jl_fence();
+    if (notsleeping) {
+        if (jl_atomic_load_relaxed(&n_threads_running) == 1) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[jl_atomic_load_relaxed(&io_loop_tid)];
+            // This was the last running thread, and there is no thread with !may_sleep
+            // so make sure tid 0 is notified to check wait_empty
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
+        }
+    }
+    else {
+        jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    }
+    wakeup_thread(jl_atomic_load_relaxed(&ptls->current_task), 0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/serialize.h b/src/serialize.h
index afcdcc31d66c4..549c1588073ff 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -7,66 +7,6 @@
 extern "C" {
 #endif
 
-#define TAG_SYMBOL              2
-#define TAG_SSAVALUE            3
-#define TAG_DATATYPE            4
-#define TAG_SLOTNUMBER          5
-#define TAG_SVEC                6
-#define TAG_ARRAY               7
-#define TAG_NULL                8
-#define TAG_EXPR                9
-#define TAG_PHINODE            10
-#define TAG_PHICNODE           11
-#define TAG_LONG_SYMBOL        12
-#define TAG_LONG_SVEC          13
-#define TAG_LONG_EXPR          14
-#define TAG_LONG_PHINODE       15
-#define TAG_LONG_PHICNODE      16
-#define TAG_METHODROOT         17
-#define TAG_STRING             18
-#define TAG_SHORT_INT64        19
-#define TAG_SHORT_GENERAL      20
-#define TAG_CNULL              21
-#define TAG_ARRAY1D            22
-#define TAG_SINGLETON          23
-#define TAG_MODULE             24
-#define TAG_TVAR               25
-#define TAG_METHOD_INSTANCE    26
-#define TAG_METHOD             27
-#define TAG_CODE_INSTANCE      28
-#define TAG_COMMONSYM          29
-#define TAG_NEARBYGLOBAL       30
-#define TAG_GLOBALREF          31
-#define TAG_CORE               32
-#define TAG_BASE               33
-#define TAG_BITYPENAME         34
-#define TAG_NEARBYMODULE       35
-#define TAG_INT32              36
-#define TAG_INT64              37
-#define TAG_UINT8              38
-#define TAG_VECTORTY           39
-#define TAG_PTRTY              40
-#define TAG_LONG_SSAVALUE      41
-#define TAG_LONG_METHODROOT    42
-#define TAG_SHORTER_INT64      43
-#define TAG_SHORT_INT32        44
-#define TAG_CALL1              45
-#define TAG_CALL2              46
-#define TAG_LINEINFO           47
-#define TAG_SHORT_BACKREF      48
-#define TAG_BACKREF            49
-#define TAG_UNIONALL           50
-#define TAG_GOTONODE           51
-#define TAG_QUOTENODE          52
-#define TAG_GENERAL            53
-#define TAG_GOTOIFNOT          54
-#define TAG_RETURNNODE         55
-#define TAG_ARGUMENT           56
-#define TAG_RELOC_METHODROOT   57
-#define TAG_BINDING            58
-
-#define LAST_TAG 58
-
 #define write_uint8(s, n) ios_putc((n), (s))
 #define read_uint8(s) ((uint8_t)ios_getc((s)))
 #define write_int8(s, n) write_uint8((s), (n))
@@ -134,12 +74,6 @@ static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
 #define read_uint(s) read_uint32(s)
 #endif
 
-
-void *jl_lookup_ser_tag(jl_value_t *v);
-void *jl_lookup_common_symbol(jl_value_t *v);
-jl_value_t *jl_deser_tag(uint8_t tag);
-jl_value_t *jl_deser_symbol(uint8_t tag);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/signal-handling.c b/src/signal-handling.c
index e241fd22ecb18..ff073cc82a0a5 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -18,46 +18,48 @@ extern "C" {
 #include <threading.h>
 
 // Profiler control variables
-// Note: these "static" variables are also used in "signals-*.c"
-static volatile jl_bt_element_t *bt_data_prof = NULL;
-static volatile size_t bt_size_max = 0;
-static volatile size_t bt_size_cur = 0;
+uv_mutex_t live_tasks_lock;
+uv_mutex_t bt_data_prof_lock;
+volatile jl_bt_element_t *profile_bt_data_prof = NULL;
+volatile size_t profile_bt_size_max = 0;
+volatile size_t profile_bt_size_cur = 0;
 static volatile uint64_t nsecprof = 0;
-static volatile int running = 0;
-static const    uint64_t GIGA = 1000000000ULL;
+volatile int profile_running = 0;
+volatile int profile_all_tasks = 0;
+static const uint64_t GIGA = 1000000000ULL;
 // Timers to take samples at intervals
 JL_DLLEXPORT void jl_profile_stop_timer(void);
-JL_DLLEXPORT int jl_profile_start_timer(void);
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t);
 
 ///////////////////////
 // Utility functions //
 ///////////////////////
 JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
 {
-    bt_size_max = maxsize;
+    profile_bt_size_max = maxsize;
     nsecprof = delay_nsec;
-    if (bt_data_prof != NULL)
-        free((void*)bt_data_prof);
-    bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
-    if (bt_data_prof == NULL && maxsize > 0)
+    if (profile_bt_data_prof != NULL)
+        free((void*)profile_bt_data_prof);
+    profile_bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
+    if (profile_bt_data_prof == NULL && maxsize > 0)
         return -1;
-    bt_size_cur = 0;
+    profile_bt_size_cur = 0;
     return 0;
 }
 
 JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
 {
-    return (uint8_t*) bt_data_prof;
+    return (uint8_t*) profile_bt_data_prof;
 }
 
 JL_DLLEXPORT size_t jl_profile_len_data(void)
 {
-    return bt_size_cur;
+    return profile_bt_size_cur;
 }
 
 JL_DLLEXPORT size_t jl_profile_maxlen_data(void)
 {
-    return bt_size_max;
+    return profile_bt_size_max;
 }
 
 JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
@@ -67,12 +69,12 @@ JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
 
 JL_DLLEXPORT void jl_profile_clear_data(void)
 {
-    bt_size_cur = 0;
+    profile_bt_size_cur = 0;
 }
 
 JL_DLLEXPORT int jl_profile_is_running(void)
 {
-    return running;
+    return profile_running;
 }
 
 // Any function that acquires this lock must be either a unmanaged thread
@@ -155,8 +157,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
     // The "modern Fisher–Yates shuffle" - O(n) algorithm
     // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
     for (int i = size; i-- > 1; ) {
-        uint64_t unbias = UINT64_MAX; // slightly biased, but i is very small
-        size_t j = cong(i, unbias, seed);
+        size_t j = cong(i + 1, seed); // cong is an open interval so we add 1
         uint64_t tmp = carray[j];
         carray[j] = carray[i];
         carray[i] = tmp;
@@ -185,7 +186,102 @@ JL_DLLEXPORT int jl_profile_is_buffer_full(void)
     // Declare buffer full if there isn't enough room to sample even just the
     // thread metadata and one max-sized frame. The `+ 6` is for the two block
     // terminator `0`'s plus the 4 metadata entries.
-    return bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > bt_size_max;
+    return profile_bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > profile_bt_size_max;
+}
+
+NOINLINE int failed_to_sample_task_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT;
+NOINLINE int failed_to_stop_thread_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT;
+
+#define PROFILE_TASK_DEBUG_FORCE_SAMPLING_FAILURE (0)
+#define PROFILE_TASK_DEBUG_FORCE_STOP_THREAD_FAILURE (0)
+
+void jl_profile_task(void)
+{
+    if (jl_profile_is_buffer_full()) {
+        // Buffer full: Delete the timer
+        jl_profile_stop_timer();
+        return;
+    }
+
+    jl_task_t *t = NULL;
+    int got_mutex = 0;
+    if (uv_mutex_trylock(&live_tasks_lock) != 0) {
+        goto collect_backtrace;
+    }
+    got_mutex = 1;
+
+    arraylist_t *tasks = jl_get_all_tasks_arraylist();
+    uint64_t seed = jl_rand();
+    const int n_max_random_attempts = 4;
+    // randomly select a task that is not done
+    for (int i = 0; i < n_max_random_attempts; i++) {
+        t = (jl_task_t*)tasks->items[cong(tasks->len, &seed)];
+        assert(t == NULL || jl_is_task(t));
+        if (t == NULL) {
+            continue;
+        }
+        int t_state = jl_atomic_load_relaxed(&t->_state);
+        if (t_state == JL_TASK_STATE_DONE) {
+            continue;
+        }
+        break;
+    }
+    arraylist_free(tasks);
+    free(tasks);
+
+collect_backtrace:
+
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running == 0) {
+        uv_mutex_unlock(&bt_data_prof_lock);
+        if (got_mutex) {
+            uv_mutex_unlock(&live_tasks_lock);
+        }
+        return;
+    }
+
+    jl_record_backtrace_result_t r = {0, INT16_MAX};
+    jl_bt_element_t *bt_data_prof = (jl_bt_element_t*)(profile_bt_data_prof + profile_bt_size_cur);
+    size_t bt_size_max = profile_bt_size_max - profile_bt_size_cur - 1;
+    if (t == NULL || PROFILE_TASK_DEBUG_FORCE_SAMPLING_FAILURE) {
+        // failed to find a task
+        r.bt_size = failed_to_sample_task_fun(bt_data_prof, bt_size_max, 0);
+    }
+    else {
+        if (!PROFILE_TASK_DEBUG_FORCE_STOP_THREAD_FAILURE) {
+            r = jl_record_backtrace(t, bt_data_prof, bt_size_max, 1);
+        }
+        // we failed to get a backtrace
+        if (r.bt_size == 0) {
+            r.bt_size = failed_to_stop_thread_fun(bt_data_prof, bt_size_max, 0);
+        }
+    }
+
+    // update the profile buffer size
+    profile_bt_size_cur += r.bt_size;
+
+    // store threadid but add 1 as 0 is preserved to indicate end of block
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = (uintptr_t)r.tid + 1;
+
+    // store task id (never null)
+    profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)t;
+
+    // store cpu cycle clock
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+    // the thread profiler uses this block to record whether the thread is not sleeping (1) or sleeping (2)
+    // let's use a dummy value which is not 1 or 2 to
+    // indicate that we are profiling a task, and therefore, this block is not about the thread state
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 3;
+
+    // Mark the end of this block with two 0's
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+    profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+
+    uv_mutex_unlock(&bt_data_prof_lock);
+    if (got_mutex) {
+        uv_mutex_unlock(&live_tasks_lock);
+    }
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
@@ -257,7 +353,8 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx);
 void jl_show_sigill(void *_ctx);
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_) \
     || (defined(_OS_LINUX_) && defined(_CPU_AARCH64_)) \
-    || (defined(_OS_LINUX_) && defined(_CPU_ARM_))
+    || (defined(_OS_LINUX_) && defined(_CPU_ARM_)) \
+    || (defined(_OS_LINUX_) && defined(_CPU_RISCV64_))
 static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
 {
     jl_jmp_buf *old_buf = jl_get_safe_restore();
@@ -286,24 +383,35 @@ void jl_set_profile_peek_duration(double t)
     profile_peek_duration = t;
 }
 
-uintptr_t profile_show_peek_cond_loc;
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
+jl_mutex_t profile_show_peek_cond_lock;
+static uv_async_t *profile_show_peek_cond_loc;
+JL_DLLEXPORT void jl_set_peek_cond(uv_async_t *cond)
 {
+    JL_LOCK_NOGC(&profile_show_peek_cond_lock);
     profile_show_peek_cond_loc = cond;
+    JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
 }
 
 static void jl_check_profile_autostop(void)
 {
-    if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
+    if (profile_show_peek_cond_loc != NULL && profile_autostop_time != -1.0 && jl_hrtime() > profile_autostop_time) {
         profile_autostop_time = -1.0;
         jl_profile_stop_timer();
         jl_safe_printf("\n==============================================================\n");
         jl_safe_printf("Profile collected. A report will print at the next yield point\n");
         jl_safe_printf("==============================================================\n\n");
-        uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
+        JL_LOCK_NOGC(&profile_show_peek_cond_lock);
+        if (profile_show_peek_cond_loc != NULL)
+            uv_async_send(profile_show_peek_cond_loc);
+        JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
     }
 }
 
+static void stack_overflow_warning(void)
+{
+    jl_safe_printf("Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable.\n");
+}
+
 #if defined(_WIN32)
 #include "signals-win.c"
 #else
@@ -330,8 +438,12 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx)
     return ((CONTEXT*)_ctx)->Rip;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     return ((ucontext_t*)_ctx)->uc_mcontext.pc;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.mc_gpregs.gp_elr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     return ((ucontext_t*)_ctx)->uc_mcontext.arm_pc;
+#elif defined(_OS_LINUX_) && defined(_CPU_RISCV64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.__gregs[REG_PC];
 #else
     // TODO for PPC
     return 0;
@@ -409,12 +521,28 @@ void jl_show_sigill(void *_ctx)
             jl_safe_printf("Invalid ARM instruction at %p: 0x%08" PRIx32 "\n", (void*)pc, inst);
         }
     }
+#elif defined(_OS_LINUX_) && defined(_CPU_RISCV64_)
+    uint32_t inst = 0;
+    size_t len = jl_safe_read_mem(pc, (char*)&inst, 4);
+    if (len < 2)
+        jl_safe_printf("Fault when reading instruction: %d bytes read\n", (int)len);
+    if (inst == 0x00100073 || // ebreak
+        inst == 0xc0001073 || // unimp (pseudo-instruction for illegal `csrrw x0, cycle, x0`)
+        (inst & ((1 << 16) - 1)) == 0x0000) { // c.unimp (compressed form)
+        // The signal might actually be SIGTRAP instead, doesn't hurt to handle it here though.
+        jl_safe_printf("Unreachable reached at %p\n", pc);
+    }
+    else {
+        jl_safe_printf("Invalid instruction at %p: 0x%08" PRIx32 "\n", pc, inst);
+    }
 #else
     // TODO for PPC
     (void)_ctx;
 #endif
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
 // make it invalid for a task to return from this point to its stack
 // this is generally quite an foolish operation, but does free you up to do
 // arbitrary things on this stack now without worrying about corrupt state that
@@ -426,11 +554,20 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
         ct->gcstack = NULL;
         ct->eh = NULL;
         ct->world_age = 1;
-        ct->ptls->locks.len = 0;
-        ct->ptls->in_pure_callback = 0;
-        ct->ptls->in_finalizer = 0;
-        ct->ptls->defer_signal = 0;
-        jl_atomic_store_release(&ct->ptls->gc_state, 0); // forceably exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
+        // Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
+        jl_ptls_t ptls = ct->ptls;
+        small_arraylist_t *locks = &ptls->locks;
+        for (size_t i = locks->len; i > 0; i--)
+            jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
+        locks->len = 0;
+        ptls->in_pure_callback = 0;
+        ptls->in_finalizer = 0;
+        ptls->defer_signal = 0;
+        // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
+        jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_UNSAFE);
+        surprise_wakeup(ptls);
+        // allow continuing to use a Task that should have already died--unsafe necromancy!
+        jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
     }
 }
 
@@ -442,6 +579,7 @@ void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *c
     size_t i, n = ct ? *bt_size : 0;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
+        // and also resets the state of ct and ptls so that some code can run on this task again
         jl_task_frame_noreturn(ct);
 #ifndef _OS_WINDOWS_
         sigset_t sset;
@@ -464,9 +602,9 @@ void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *c
         pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
 #endif
         if (si_code)
-            jl_safe_printf("\n[%d] signal (%d.%d): %s\n", getpid(), sig, si_code, strsignal(sig));
+            jl_safe_printf("\n[%d] signal %d (%d): %s\n", getpid(), sig, si_code, strsignal(sig));
         else
-            jl_safe_printf("\n[%d] signal (%d): %s\n", getpid(), sig, strsignal(sig));
+            jl_safe_printf("\n[%d] signal %d: %s\n", getpid(), sig, strsignal(sig));
     }
     jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
     if (context && ct) {
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 073ab2ebc33a6..1c4af2cf9d033 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -9,6 +9,7 @@
 #include <mach/task.h>
 #include <mach/mig_errors.h>
 #include <AvailabilityMacros.h>
+#include <stdint.h>
 #include "mach_excServer.c"
 
 #ifdef MAC_OS_X_VERSION_10_9
@@ -44,51 +45,103 @@ static void attach_exception_port(thread_port_t thread, int segv_only);
 
 // low 16 bits are the thread id, the next 8 bits are the original gc_state
 static arraylist_t suspended_threads;
-extern uv_mutex_t safepoint_lock;
-extern uv_cond_t safepoint_cond;
-void jl_mach_gc_end(void)
+extern uv_cond_t safepoint_cond_begin;
+
+#define GC_STATE_SHIFT 8*sizeof(int16_t)
+static inline int8_t decode_gc_state(uintptr_t item)
 {
-    // Requires the safepoint lock to be held
+    return (int8_t)(item >> GC_STATE_SHIFT);
+}
+
+static inline int16_t decode_tid(uintptr_t item)
+{
+    return (int16_t)item;
+}
+
+static inline uintptr_t encode_item(int16_t tid, int8_t gc_state)
+{
+    return (uintptr_t)tid | ((uintptr_t)gc_state << GC_STATE_SHIFT);
+}
+
+// see jl_safepoint_wait_thread_resume
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2)
+{
+    // must be called with uv_mutex_lock(&safepoint_lock) and uv_mutex_lock(&ptls2->sleep_lock) held (in that order)
     for (size_t i = 0; i < suspended_threads.len; i++) {
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
-        int16_t tid = (int16_t)item;
-        int8_t gc_state = (int8_t)(item >> 8);
-        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
+        if (tid != tid2)
+            continue;
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
         thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        suspended_threads.items[i] = suspended_threads.items[--suspended_threads.len];
+        break;
     }
-    suspended_threads.len = 0;
+    // thread hadn't actually reached a jl_mach_gc_wait call where we suspended it
 }
 
-// Suspend the thread and return `1` if the GC is running.
-// Otherwise return `0`
-static int jl_mach_gc_wait(jl_ptls_t ptls2,
-                           mach_port_t thread, int16_t tid)
+void jl_mach_gc_end(void)
+{
+    // must be called with uv_mutex_lock(&safepoint_lock) held
+    size_t j = 0;
+    for (size_t i = 0; i < suspended_threads.len; i++) {
+        uintptr_t item = (uintptr_t)suspended_threads.items[i];
+        int16_t tid = decode_tid(item);
+        int8_t gc_state = decode_gc_state(item);
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        uv_mutex_lock(&ptls2->sleep_lock);
+        if (jl_atomic_load_relaxed(&ptls2->suspend_count) == 0) {
+            jl_atomic_store_release(&ptls2->gc_state, gc_state);
+            thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        }
+        else {
+            // this is the check for jl_safepoint_wait_thread_resume
+            suspended_threads.items[j++] = (void*)item;
+        }
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    }
+    suspended_threads.len = j;
+}
+
+// implement jl_set_gc_and_wait from a different thread
+static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
 {
+    // relaxed, since we don't mind missing one--we will hit another soon (immediately probably)
     uv_mutex_lock(&safepoint_lock);
-    if (!jl_atomic_load_relaxed(&jl_gc_running)) {
-        // relaxed, since gets set to zero only while the safepoint_lock was held
-        // this means we can tell if GC is done before we got the message or
-        // the safepoint was enabled for SIGINT.
-        uv_mutex_unlock(&safepoint_lock);
-        return 0;
+    // Since this gets set to zero only while the safepoint_lock was held this
+    // means we can tell for sure if GC is done before we got the message or
+    // the safepoint was enabled for SIGINT instead.
+    int doing_gc = jl_atomic_load_relaxed(&jl_gc_running);
+    int do_suspend = doing_gc;
+    int relaxed_suspend_count = !doing_gc && jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+    if (relaxed_suspend_count) {
+        uv_mutex_lock(&ptls2->sleep_lock);
+        do_suspend = jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+        // only do_suspend while holding the sleep_lock, otherwise we might miss a resume
+    }
+    if (do_suspend) {
+        // Set the gc state of the thread, suspend and record it
+        //
+        // TODO: TSAN will complain that it never saw the faulting task do an
+        // atomic release (it was in the kernel). And our attempt here does
+        // nothing, since we are a different thread, and it is not transitive).
+        //
+        // This also means we are not making this thread available for GC work.
+        // Eventually, we should probably release this signal to the original
+        // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
+        // triggers a SIGSEGV and gets handled by the usual codepath for unix.
+        int8_t gc_state = jl_atomic_load_acquire(&ptls2->gc_state);
+        jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
+        uintptr_t item = encode_item(tid, gc_state);
+        arraylist_push(&suspended_threads, (void*)item);
+        thread_suspend(thread);
     }
-    // Otherwise, set the gc state of the thread, suspend and record it
-    // TODO: TSAN will complain that it never saw the faulting task do an
-    // atomic release (it was in the kernel). And our attempt here does
-    // nothing, since we are a different thread, and it is not transitive).
-    //
-    // This also means we are not making this thread available for GC work.
-    // Eventually, we should probably release this signal to the original
-    // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
-    // triggers a SIGSEGV and gets handled by the usual codepath for unix.
-    int8_t gc_state = ptls2->gc_state;
-    jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
-    uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
-    arraylist_push(&suspended_threads, (void*)item);
-    thread_suspend(thread);
+    if (relaxed_suspend_count)
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
     uv_mutex_unlock(&safepoint_lock);
-    return 1;
 }
 
 static mach_port_t segv_port = 0;
@@ -169,37 +222,92 @@ typedef arm_exception_state64_t host_exception_state_t;
 #define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT
 #endif
 
-static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
-                             void (*fptr)(void))
+// create a fake function that describes the variable manipulations in jl_call_in_state
+__attribute__((naked)) static void fake_stack_pop(void)
 {
 #ifdef _CPU_X86_64_
-    uintptr_t rsp = state->__rsp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa %rsp, 0\n" // CFA here uses %rsp directly
+        "  .cfi_offset %rip, 0\n" // previous value of %rip at CFA
+        "  .cfi_offset %rsp, 8\n" // previous value of %rsp at CFA
+        "  nop\n"
+    );
 #elif defined(_CPU_AARCH64_)
-    uintptr_t rsp = state->__sp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa sp, 0\n" // use sp as fp here
+        "  .cfi_offset lr, 0\n"
+        "  .cfi_offset sp, 8\n"
+        // Anything else got smashed, since we didn't explicitly copy all of the
+        // state object to the stack (to build a real sigreturn frame).
+        // This is also not quite valid, since the AArch64 DWARF spec lacks the ability to define how to restore the LR register correctly,
+        // so normally libunwind implementations on linux detect this function specially and hack around the invalid info:
+        // https://github.com/llvm/llvm-project/commit/c82deed6764cbc63966374baf9721331901ca958
+        " nop\n"
+    );
 #else
-#error "julia: throw-in-context not supported on this platform"
+CFI_NORETURN
 #endif
-    if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls2->signal_stack + sig_stack_size;
-    }
-    assert(rsp % 16 == 0);
+}
 
+static void jl_call_in_state(host_thread_state_t *state, void (*fptr)(void))
+{
+#ifdef _CPU_X86_64_
+    uintptr_t sp = state->__rsp;
+#elif defined(_CPU_AARCH64_)
+    uintptr_t sp = state->__sp;
+#endif
+    sp = (sp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    assert(sp % 16 == 0);
+    sp -= 16;
 #ifdef _CPU_X86_64_
-    rsp -= sizeof(void*);
-    state->__rsp = rsp; // set stack pointer
+    // set return address to NULL
+    *(uintptr_t*)sp = 0;
+    // pushq %sp
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rsp;
+    // pushq %rip
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rip;
+    // pushq .fake_stack_pop + 1; aka call from fake_stack_pop
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)&fake_stack_pop + 1;
+    state->__rsp = sp; // set stack pointer
     state->__rip = (uint64_t)fptr; // "call" the function
 #elif defined(_CPU_AARCH64_)
-    state->__sp = rsp;
-    state->__pc = (uint64_t)fptr;
-    state->__lr = 0;
+    // push {%sp, %pc + 4}
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__sp;
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)state->__pc;
+    state->__sp = sp; // x31
+    state->__pc = (uint64_t)fptr; // pc
+    state->__lr = (uintptr_t)&fake_stack_pop + 4; // x30
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
 }
 
+static void jl_longjmp_in_state(host_thread_state_t *state, jl_jmp_buf jmpbuf)
+{
+
+    if (!jl_simulate_longjmp(jmpbuf, (bt_context_t*)state)) {
+        // for sanitizer builds, fallback to calling longjmp on the original stack
+        // (this will fail for stack overflow, but that is hardly sanitizer-legal anyways)
+#ifdef _CPU_X86_64_
+    state->__rdi = (uintptr_t)jmpbuf;
+    state->__rsi = 1;
+#elif defined(_CPU_AARCH64_)
+    state->__x[0] = (uintptr_t)jmpbuf;
+    state->__x[1] = 1;
+#else
+#error "julia: jl_longjmp_in_state not supported on this platform"
+#endif
+        jl_call_in_state(state, (void (*)(void))longjmp);
+    }
+}
+
 #ifdef _CPU_X86_64_
 int is_write_fault(host_exception_state_t exc_state) {
     return exc_reg_is_write_fault(exc_state.__err);
@@ -221,14 +329,26 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    if (1) { // XXX: !jl_has_safe_restore(ptls2)
+    if (ptls2->safe_restore) {
+        jl_longjmp_in_state(&state, *ptls2->safe_restore);
+    }
+    else {
         assert(exception);
         ptls2->bt_size =
             rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
-                              NULL /*current_task?*/);
+                            NULL /*current_task?*/);
         ptls2->sig_exception = exception;
+        ptls2->io_wait = 0;
+        jl_task_t *ct = ptls2->current_task;
+        jl_handler_t *eh = ct->eh;
+        if (eh != NULL) {
+            asan_unpoison_task_stack(ct, &eh->eh_ctx);
+            jl_longjmp_in_state(&state, eh->eh_ctx);
+        }
+        else {
+            jl_no_exc_handler(exception, ct);
+        }
     }
-    jl_call_in_state(ptls2, &state, &jl_sig_throw);
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 }
@@ -236,14 +356,18 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
-        jl_task_t *ct = jl_get_current_task();
-        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
-        jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or jl_unwind_stepn
+        jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(context), *saferestore);
+        return;
     }
-    else {
-        sigdie_handler(sig, info, context);
+    jl_task_t *ct = jl_get_current_task();
+    if ((sig != SIGBUS || info->si_code == BUS_ADRERR) &&
+    !(ct == NULL || ct->ptls == NULL || jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING || ct->eh == NULL)
+    && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
     }
+    sigdie_handler(sig, info, context);
 }
 
 // n.b. mach_exc_server expects us to define this symbol locally
@@ -279,25 +403,27 @@ kern_return_t catch_mach_exception_raise(
     int nthreads = jl_atomic_load_acquire(&jl_n_threads);
     for (tid = 0; tid < nthreads; tid++) {
         jl_ptls_t _ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        if (jl_atomic_load_relaxed(&_ptls2->current_task) == NULL) {
+            // this thread is dead
+            continue;
+        }
         if (pthread_mach_thread_np(_ptls2->system_id) == thread) {
             ptls2 = _ptls2;
             break;
         }
     }
-    if (!ptls2 || ptls2->current_task == NULL) {
+    if (!ptls2) {
         // We don't know about this thread, let the kernel try another handler
         // instead. This shouldn't actually happen since we only register the
         // handler for the threads we know about.
         jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n");
         return KERN_INVALID_ARGUMENT;
     }
-    // XXX: jl_throw_in_thread or segv_handler will eventually check this, but
-    //      we would like to avoid some of this work if we could detect this earlier
-    // if (jl_has_safe_restore(ptls2)) {
-    //     jl_throw_in_thread(ptls2, thread, jl_stackovf_exception);
-    //     return KERN_SUCCESS;
-    // }
-    if (ptls2->gc_state == JL_GC_STATE_WAITING)
+    if (ptls2->safe_restore) {
+        jl_throw_in_thread(ptls2, thread, NULL);
+        return KERN_SUCCESS;
+    }
+    if (jl_atomic_load_acquire(&ptls2->gc_state) == JL_GC_STATE_WAITING)
         return KERN_FAILURE;
     if (exception == EXC_ARITHMETIC) {
         jl_throw_in_thread(ptls2, thread, jl_diverror_exception);
@@ -310,8 +436,7 @@ kern_return_t catch_mach_exception_raise(
     kern_return_t ret = thread_get_state(thread, HOST_EXCEPTION_STATE, (thread_state_t)&exc_state, &exc_count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
     if (jl_addr_is_safepoint(fault_addr) && !is_write_fault(exc_state)) {
-        if (jl_mach_gc_wait(ptls2, thread, tid))
-            return KERN_SUCCESS;
+        jl_mach_gc_wait(ptls2, thread, tid);
         if (ptls2->tid != 0)
             return KERN_SUCCESS;
         if (ptls2->defer_signal) {
@@ -323,10 +448,11 @@ kern_return_t catch_mach_exception_raise(
         }
         return KERN_SUCCESS;
     }
-    if (ptls2->current_task->eh == NULL)
+    if (jl_atomic_load_relaxed(&ptls2->current_task)->eh == NULL)
         return KERN_FAILURE;
     jl_value_t *excpt;
     if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
+        stack_overflow_warning();
         excpt = jl_stackovf_exception;
     }
     else if (is_write_fault(exc_state)) // false for alignment errors
@@ -372,7 +498,7 @@ kern_return_t catch_mach_exception_raise_state_identity(
 static void attach_exception_port(thread_port_t thread, int segv_only)
 {
     kern_return_t ret;
-    // http://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html
+    // https://www.opensource.apple.com/source/xnu/xnu-2782.1.97/osfmk/man/thread_set_exception_ports.html
     exception_mask_t mask = EXC_MASK_BAD_ACCESS;
     if (!segv_only)
         mask |= EXC_MASK_ARITHMETIC;
@@ -380,12 +506,12 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
     HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
 }
 
-static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
+static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     if (ptls2 == NULL) // this thread is not alive
         return 0;
-    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
     if (ct2 == NULL) // this thread is already dead
         return 0;
 
@@ -403,18 +529,18 @@ static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
     return 1;
 }
 
-static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
     (void)timeout;
-    static host_thread_state_t state;
+    host_thread_state_t state;
     if (!jl_thread_suspend_and_get_state2(tid, &state)) {
-        *ctx = NULL;
-        return;
+        return 0;
     }
-    *ctx = (unw_context_t*)&state;
+    *ctx = *(unw_context_t*)&state;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
@@ -455,7 +581,6 @@ static void jl_try_deliver_sigint(void)
 
 static void JL_NORETURN jl_exit_thread0_cb(int signo)
 {
-CFI_NORETURN
     jl_critical_error(signo, 0, NULL, jl_current_task);
     jl_atexit_hook(128);
     jl_raise(signo);
@@ -487,7 +612,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 #else
 #error Fill in first integer argument here
 #endif
-    jl_call_in_state(ptls2, &state, (void (*)(void))&jl_exit_thread0_cb);
+    jl_call_in_state(&state, (void (*)(void))&jl_exit_thread0_cb);
     unsigned int count = MACH_THREAD_STATE_COUNT;
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
@@ -589,8 +714,90 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
     jl_unlock_profile();
 }
 
-#define jl_lock_profile()       int keymgr_locked = jl_lock_profile_mach(1)
-#define jl_unlock_profile()     jl_unlock_profile_mach(1, keymgr_locked)
+void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
+{
+    int lockret = jl_lock_profile_mach(1);
+    f(ctx);
+    jl_unlock_profile_mach(1, lockret);
+}
+
+// assumes holding `jl_lock_profile_mach`
+void jl_profile_thread_mach(int tid)
+{
+    // if there is no space left, return early
+    if (jl_profile_is_buffer_full()) {
+        jl_profile_stop_timer();
+        return;
+    }
+    if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_prepare();
+    if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_prepare(); // briefly acquire the dlsym lock
+    host_thread_state_t state;
+    int valid_thread = jl_thread_suspend_and_get_state2(tid, &state);
+    unw_context_t *uc = (unw_context_t*)&state;
+    if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_parent(); // quickly release the dlsym lock
+    if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_parent();
+    if (!valid_thread)
+        return;
+    if (profile_running) {
+#ifdef LLVMLIBUNWIND
+        /*
+            *  Unfortunately compact unwind info is incorrectly generated for quite a number of
+            *  libraries by quite a large number of compilers. We can fall back to DWARF unwind info
+            *  in some cases, but in quite a number of cases (especially libraries not compiled in debug
+            *  mode, only the compact unwind info may be available). Even more unfortunately, there is no
+            *  way to detect such bogus compact unwind info (other than noticing the resulting segfault).
+            *  What we do here is ugly, but necessary until the compact unwind info situation improves.
+            *  We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
+            *  Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
+            *  entry will always be correct, and the number of cases in which this is an issue is rather small.
+            *  Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
+            *  and during stack unwinding we only ever read memory, but never write it.
+            */
+
+        forceDwarf = 0;
+        unw_getcontext(&profiler_uc); // will resume from this point if the next lines segfault at any point
+
+        if (forceDwarf == 0) {
+            // Save the backtrace
+            profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+        }
+        else if (forceDwarf == 1) {
+            profile_bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+        }
+        else if (forceDwarf == -1) {
+            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+        }
+
+        forceDwarf = -2;
+#else
+        profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, profile_bt_size_max - profile_bt_size_cur - 1, uc, NULL);
+#endif
+        jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        // store threadid but add 1 as 0 is preserved to indicate end of block
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls->tid + 1;
+
+        // store task id (never null)
+        profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+        // store cpu cycle clock
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+        // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+        int state = jl_atomic_load_relaxed(&ptls->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
+
+        // Mark the end of this block with two 0's
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+    }
+    // We're done! Resume the thread.
+    jl_thread_resume(tid);
+}
 
 void *mach_profile_listener(void *arg)
 {
@@ -609,88 +816,21 @@ void *mach_profile_listener(void *arg)
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
         int keymgr_locked = jl_lock_profile_mach(0);
-
         int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        int *randperm = profile_get_randperm(nthreads);
-        for (int idx = nthreads; idx-- > 0; ) {
-            // Stop the threads in the random or reverse round-robin order.
-            int i = randperm[idx];
-            // if there is no space left, break early
-            if (jl_profile_is_buffer_full()) {
-                jl_profile_stop_timer();
-                break;
-            }
-
-            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
-                _dyld_dlopen_atfork_prepare();
-            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
-                _dyld_atfork_prepare(); // briefly acquire the dlsym lock
-            host_thread_state_t state;
-            int valid_thread = jl_thread_suspend_and_get_state2(i, &state);
-            unw_context_t *uc = (unw_context_t*)&state;
-            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
-                _dyld_atfork_parent(); // quickly release the dlsym lock
-            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
-                _dyld_dlopen_atfork_parent();
-            if (!valid_thread)
-                continue;
-            if (running) {
-#ifdef LLVMLIBUNWIND
-                /*
-                 *  Unfortunately compact unwind info is incorrectly generated for quite a number of
-                 *  libraries by quite a large number of compilers. We can fall back to DWARF unwind info
-                 *  in some cases, but in quite a number of cases (especially libraries not compiled in debug
-                 *  mode, only the compact unwind info may be available). Even more unfortunately, there is no
-                 *  way to detect such bogus compact unwind info (other than noticing the resulting segfault).
-                 *  What we do here is ugly, but necessary until the compact unwind info situation improves.
-                 *  We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
-                 *  Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
-                 *  entry will always be correct, and the number of cases in which this is an issue is rather small.
-                 *  Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
-                 *  and during stack unwinding we only ever read memory, but never write it.
-                 */
-
-                forceDwarf = 0;
-                unw_getcontext(&profiler_uc); // will resume from this point if the next lines segfault at any point
-
-                if (forceDwarf == 0) {
-                    // Save the backtrace
-                    bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-                }
-                else if (forceDwarf == 1) {
-                    bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-                }
-                else if (forceDwarf == -1) {
-                    jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                }
-
-                forceDwarf = -2;
-#else
-                bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
-#endif
-                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
-
-                // store threadid but add 1 as 0 is preserved to indicate end of block
-                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
-
-                // store task id (never null)
-                bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
-
-                // store cpu cycle clock
-                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
-
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
-
-                // Mark the end of this block with two 0's
-                bt_data_prof[bt_size_cur++].uintptr = 0;
-                bt_data_prof[bt_size_cur++].uintptr = 0;
+        if (profile_all_tasks) {
+            // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+            jl_profile_task();
+        }
+        else {
+            int *randperm = profile_get_randperm(nthreads);
+            for (int idx = nthreads; idx-- > 0; ) {
+                // Stop the threads in random order.
+                int i = randperm[idx];
+                jl_profile_thread_mach(i);
             }
-            // We're done! Resume the thread.
-            jl_thread_resume(i, 0);
         }
         jl_unlock_profile_mach(0, keymgr_locked);
-        if (running) {
+        if (profile_running) {
             jl_check_profile_autostop();
             // Reset the alarm
             kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
@@ -699,7 +839,8 @@ void *mach_profile_listener(void *arg)
     }
 }
 
-JL_DLLEXPORT int jl_profile_start_timer(void)
+
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
     kern_return_t ret;
     if (!profile_started) {
@@ -728,7 +869,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     timerprof.tv_sec = nsecprof/GIGA;
     timerprof.tv_nsec = nsecprof%GIGA;
 
-    running = 1;
+    profile_running = 1;
+    profile_all_tasks = all_tasks;
     // ensure the alarm is running
     ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
     HANDLE_MACH_ERROR("clock_alarm", ret);
@@ -738,5 +880,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
 
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    running = 0;
+    uv_mutex_lock(&bt_data_prof_lock);
+    profile_running = 0;
+    profile_all_tasks = 0;
+    uv_mutex_unlock(&bt_data_prof_lock);
 }
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 4c21d25d3622c..91d3378068f84 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -9,6 +9,10 @@
 #include <pthread.h>
 #include <time.h>
 #include <errno.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+
 #if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
 #define MAP_ANONYMOUS MAP_ANON
 #endif
@@ -24,10 +28,12 @@
 #endif
 
 // Figure out the best signals/timers to use for this platform
-#ifdef __APPLE__ // Darwin's mach ports allow signal-free thread management
+#if defined(__APPLE__) // Darwin's mach ports allow signal-free thread management
 #define HAVE_MACH
 #define HAVE_KEVENT
-#else // generic Linux or BSD
+#elif defined(__OpenBSD__)
+#define HAVE_KEVENT
+#else // generic Linux or FreeBSD
 #define HAVE_TIMER
 #endif
 
@@ -35,16 +41,14 @@
 #include <sys/event.h>
 #endif
 
-// 8M signal stack, same as default stack size and enough
-// for reasonable finalizers.
-// Should also be enough for parallel GC when we have it =)
-#define sig_stack_size (8 * 1024 * 1024)
+// 8M signal stack, same as default stack size (though we barely use this)
+static const size_t sig_stack_size = 8 * 1024 * 1024;
 
 #include "julia_assert.h"
 
 // helper function for returning the unw_context_t inside a ucontext_t
 // (also used by stackwalk.c)
-bt_context_t *jl_to_bt_context(void *sigctx)
+bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -62,7 +66,11 @@ bt_context_t *jl_to_bt_context(void *sigctx)
 static int thread0_exit_count = 0;
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size);
 
-static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT;
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf);
+
+#if !defined(_OS_DARWIN_)
+static inline uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
@@ -76,32 +84,34 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.arm_sp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_X86_64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__rsp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__sp;
+#elif defined(_OS_LINUX_) && (defined(_CPU_RISCV64_))
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.__gregs[REG_SP];
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.mc_rsp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_gpregs.gp_sp;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    const struct sigcontext *ctx = (const struct sigcontext *)_ctx;
+    return ctx->sc_rsp;
 #else
     // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
 
-static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
+static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) JL_NOTSAFEPOINT
 {
     // One guard page for signal_stack.
-    return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
-             (char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
+    return ptls->signal_stack == NULL ||
+           ((char*)ptr >= (char*)ptls->signal_stack - jl_page_size &&
+            (char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
 }
 
-// Modify signal context `_ctx` so that `fptr` will execute when the signal
-// returns. `fptr` will execute on the signal stack, and must not return.
-// jl_call_in_ctx is also currently executing on that signal stack,
-// so be careful not to smash it
+// Modify signal context `_ctx` so that `fptr` will execute when the signal returns
+// The function `fptr` itself must not return.
 JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
@@ -111,47 +121,48 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls || !ptls->signal_stack) {
-        sigset_t sset;
-        sigemptyset(&sset);
-        sigaddset(&sset, sig);
-        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
-        fptr();
-        return;
-    }
     uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
-    if (is_addr_on_sigstack(ptls, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls->signal_stack + sig_stack_size;
-    }
-    assert(rsp % 16 == 0);
+    rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_RSP] = rsp;
     ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_rsp = rsp;
     ctx->uc_mcontext.mc_rip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_ESP] = rsp;
     ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_esp = rsp;
     ctx->uc_mcontext.mc_eip = (uintptr_t)fptr;
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+    struct sigcontext *ctx = (struct sigcontext *)_ctx;
+    rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
+    ctx->sc_rsp = rsp;
+    ctx->sc_rip = fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     ctx->uc_mcontext.sp = rsp;
     ctx->uc_mcontext.regs[29] = 0; // Clear link register (x29)
     ctx->uc_mcontext.pc = (uintptr_t)fptr;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.mc_gpregs.gp_sp = rsp;
+    ctx->uc_mcontext.mc_gpregs.gp_x[29] = 0; // Clear link register (x29)
+    ctx->uc_mcontext.mc_gpregs.gp_elr = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     uintptr_t target = (uintptr_t)fptr;
@@ -171,22 +182,11 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     ctx->uc_mcontext.arm_sp = rsp;
     ctx->uc_mcontext.arm_lr = 0; // Clear link register
     ctx->uc_mcontext.arm_pc = target;
-#elif defined(_OS_DARWIN_) && (defined(_CPU_X86_64_) || defined(_CPU_AARCH64_))
-    // Only used for SIGFPE.
-    // This doesn't seems to be reliable when the SIGFPE is generated
-    // from a divide-by-zero exception, which is now handled by
-    // `catch_exception_raise`. It works fine when a signal is received
-    // due to `kill`/`raise` though.
-    ucontext64_t *ctx = (ucontext64_t*)_ctx;
-#if defined(_CPU_X86_64_)
-    rsp -= sizeof(void*);
-    ctx->uc_mcontext64->__ss.__rsp = rsp;
-    ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr;
-#else
-    ctx->uc_mcontext64->__ss.__sp = rsp;
-    ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr;
-    ctx->uc_mcontext64->__ss.__lr = 0;
-#endif
+#elif defined(_OS_LINUX_) && (defined(_CPU_RISCV64_))
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.__gregs[REG_SP] = rsp;
+    ctx->uc_mcontext.__gregs[REG_RA] = 0; // Clear return address address (ra)
+    ctx->uc_mcontext.__gregs[REG_PC] = (uintptr_t)fptr;
 #else
 #pragma message("julia: throw-in-context not supported on this platform")
     // TODO Add support for PowerPC(64)?
@@ -197,30 +197,38 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     fptr();
 #endif
 }
+#endif
 
 static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
     jl_ptls_t ptls = ct->ptls;
-    if (!jl_get_safe_restore()) {
-        ptls->bt_size =
-            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
-                              ct->gcstack);
-        ptls->sig_exception = e;
+    assert(!jl_get_safe_restore());
+    ptls->bt_size =
+        rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                            ct->gcstack);
+    ptls->sig_exception = e;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp_in_ctx(sig, sigctx, eh->eh_ctx);
+    }
+    else {
+        jl_no_exc_handler(e, ct);
     }
-    jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_task_t *ct, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr) JL_NOTSAFEPOINT
 {
-    if (ct->copy_stack) {
+    if (ct->ctx.copy_stack) {
         jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
     }
-    return ((char*)addr > (char*)ct->stkbuf &&
-            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
+    return ((char*)addr > (char*)ct->ctx.stkbuf &&
+            (char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
@@ -229,15 +237,22 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
     uv_tty_reset_mode();
     if (sig == SIGILL)
         jl_show_sigill(context);
-    jl_critical_error(sig, info->si_code, jl_to_bt_context(context), jl_get_current_task());
+    jl_task_t *ct = jl_get_current_task();
+    jl_critical_error(sig, info->si_code, jl_to_bt_context(context), ct);
+    if (ct)
+        jl_atomic_store_relaxed(&ct->ptls->safepoint, (size_t*)NULL + 1);
     if (info->si_code == 0 ||
         info->si_code == SI_USER ||
 #ifdef SI_KERNEL
         info->si_code == SI_KERNEL ||
 #endif
         info->si_code == SI_QUEUE ||
+#ifdef SI_MESGQ
         info->si_code == SI_MESGQ ||
+#endif
+#ifdef SI_ASYNCIO
         info->si_code == SI_ASYNCIO ||
+#endif
 #ifdef SI_SIGIO
         info->si_code == SI_SIGIO ||
 #endif
@@ -252,7 +267,8 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
              sig != SIGFPE &&
              sig != SIGTRAP)
         raise(sig);
-    // fall-through return to re-execute faulting statement (but without the error handler)
+    // fall-through return to re-execute faulting statement (but without the
+    // error handler and the pgcstack having been destroyed)
 }
 
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
@@ -290,7 +306,28 @@ int exc_reg_is_write_fault(uintptr_t esr) {
 #if defined(HAVE_MACH)
 #include "signals-mach.c"
 #else
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <link.h>
+
+typedef struct {
+    void (*f)(void*) JL_NOTSAFEPOINT;
+    void *ctx;
+} callback_t;
+static int with_dl_iterate_phdr_lock(struct dl_phdr_info *info, size_t size, void *data)
+{
+    jl_lock_profile();
+    callback_t *callback = (callback_t*)data;
+    callback->f(callback->ctx);
+    jl_unlock_profile();
+    return 1; // only call this once
+}
 
+void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
+{
+    callback_t callback = {f, ctx};
+    dl_iterate_phdr(with_dl_iterate_phdr_lock, &callback);
+}
 
 #if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
 int is_write_fault(void *context) {
@@ -322,6 +359,18 @@ int is_write_fault(void *context) {
     ucontext_t *ctx = (ucontext_t*)context;
     return exc_reg_is_write_fault(ctx->uc_mcontext.mc_err);
 }
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+// FreeBSD seems not to expose a means of accessing ESR via `ucontext_t` on AArch64.
+// TODO: Is there an alternative approach that can be taken? ESR may become accessible
+// in a future release though.
+int is_write_fault(void *context) {
+    return 0;
+}
+#elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
+int is_write_fault(void *context) {
+    struct sigcontext *ctx = (struct sigcontext *)context;
+    return exc_reg_is_write_fault(ctx->sc_err);
+}
 #else
 #pragma message("Implement this query for consistent PROT_NONE handling")
 int is_write_fault(void *context) {
@@ -329,17 +378,19 @@ int is_write_fault(void *context) {
 }
 #endif
 
-static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
+static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) JL_NOTSAFEPOINT
 {
-    return (is_addr_on_sigstack(ptls, ptr) &&
+    return (ptls->signal_stack != NULL &&
+            is_addr_on_sigstack(ptls, ptr) &&
             is_addr_on_sigstack(ptls, (void*)jl_get_rsp_from_ctx(context)));
 }
 
 JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -348,10 +399,16 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
         return;
     }
     if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && jl_addr_is_safepoint((uintptr_t)info->si_addr) && !is_write_fault(context)) {
-        jl_set_gc_and_wait();
+        jl_set_gc_and_wait(ct);
         // Do not raise sigint on worker thread
         if (jl_atomic_load_relaxed(&ct->tid) != 0)
             return;
+        // n.b. if the user might have seen that we were in a state where it
+        // was safe to run GC concurrently, we might briefly enter a state
+        // where our execution is not consistent with the gc_state of this
+        // thread. That will quickly be rectified when we rerun the faulting
+        // instruction and end up right back here, or we start to run the
+        // exception handler and immediately hit the safepoint there.
         if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
         }
@@ -364,6 +421,7 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     if (ct->eh == NULL)
         sigdie_handler(sig, info, context);
     if ((sig != SIGBUS || info->si_code == BUS_ADRERR) && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
         jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
     else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
@@ -383,68 +441,91 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     }
 }
 
-#if !defined(JL_DISABLE_LIBUNWIND)
-static unw_context_t *signal_context;
-pthread_mutex_t in_signal_lock;
-static pthread_cond_t exit_signal_cond;
-static pthread_cond_t signal_caught_cond;
+pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
+static bt_context_t *usr2_signal_context; // protected by in_signal_lock
+static int exit_signal_cond = -1;
+static int signal_caught_cond = -1;
+static int signals_inflight = 0;
 
-static void jl_thread_suspend_and_get_state(int tid, int timeout, unw_context_t **ctx)
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
-    struct timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    ts.tv_sec += timeout;
+    int err;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
     if (ct2 == NULL) {
         // this thread is not alive or already dead
-        *ctx = NULL;
         pthread_mutex_unlock(&in_signal_lock);
-        return;
+        return 0;
     }
-    jl_atomic_store_release(&ptls2->signal_request, 1);
-    pthread_kill(ptls2->system_id, SIGUSR2);
-    // wait for thread to acknowledge
-    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
-    if (err == ETIMEDOUT) {
-        sig_atomic_t request = 1;
-        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
-            *ctx = NULL;
+    while (signals_inflight) {
+        // something is wrong, or there is already a usr2 in flight elsewhere
+        // try to wait for it to finish or wait for timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err == -1 || (event.revents & POLLIN) == 0) {
+            // not ready after timeout: cancel this request
             pthread_mutex_unlock(&in_signal_lock);
-            return;
+            return 0;
         }
-        // Request is either now 0 (meaning the other thread is waiting for
-        //   exit_signal_cond already),
-        // Or it is now -1 (meaning the other thread
-        //   is waiting for in_signal_lock, and we need to release that lock
-        //   here for a bit, until the other thread has a chance to get to the
-        //   exit_signal_cond)
-        if (request == -1) {
-            err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
-            assert(!err);
+        // consume it before continuing
+        eventfd_t got;
+        do {
+            err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(signals_inflight >= got);
+        signals_inflight -= got;
+    }
+    signals_inflight++;
+    sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1);
+    assert(request == 0 || request == -1);
+    request = 1;
+    err = pthread_kill(ptls2->system_id, SIGUSR2);
+    if (err == 0) {
+        // wait for thread to acknowledge or timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err != 1 || (event.revents & POLLIN) == 0)
+            err = -1;
+    }
+    if (err == -1) {
+        // not ready after timeout: try to cancel this request
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            signals_inflight--;
+            pthread_mutex_unlock(&in_signal_lock);
+            return 0;
         }
     }
+    eventfd_t got;
+    do {
+        err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+    } while (err == -1 && errno == EINTR);
+    if (err != sizeof(eventfd_t)) abort();
+    assert(signals_inflight >= got);
+    signals_inflight -= got;
+    signals_inflight++;
     // Now the other thread is waiting on exit_signal_cond (verify that here by
     // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
-    *ctx = signal_context;
+    request = jl_atomic_load_acquire(&ptls2->signal_request);
+    assert(request == 0 || request == -1); (void) request;
+    jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this
+    *ctx = *usr2_signal_context;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
-    pthread_cond_broadcast(&exit_signal_cond);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
-    // The other thread is waiting to leave exit_signal_cond (verify that here by
-    // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
+    int err;
+    eventfd_t got = 1;
+    err = write(exit_signal_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
     pthread_mutex_unlock(&in_signal_lock);
 }
-#endif
 
 // Throw jl_interrupt_exception if the master thread is in a signal async region
 // or if SIGINT happens too often.
@@ -453,9 +534,12 @@ static void jl_try_deliver_sigint(void)
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
+    pthread_mutex_lock(&in_signal_lock);
+    signals_inflight++;
     jl_atomic_store_release(&ptls2->signal_request, 2);
     // This also makes sure `sleep` is aborted.
     pthread_kill(ptls2->system_id, SIGUSR2);
+    pthread_mutex_unlock(&in_signal_lock);
 }
 
 // Write only by signal handling thread, read only by main thread
@@ -472,14 +556,14 @@ CFI_NORETURN
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 {
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
-    unw_context_t *signal_context;
+    bt_context_t signal_context;
     // This also makes sure `sleep` is aborted.
-    jl_thread_suspend_and_get_state(0, 30, &signal_context);
-    if (signal_context != NULL) {
+    if (jl_thread_suspend_and_get_state(0, 30, &signal_context)) {
         thread0_exit_signo = signo;
         ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
         memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
-        jl_thread_resume(0, -1); // resume with message 3 (call jl_exit_thread0_cb)
+        jl_atomic_store_release(&ptls2->signal_request, 3);
+        jl_thread_resume(0); // resume with message 3 (call jl_exit_thread0_cb)
     }
     else {
         // thread 0 is gone? just do the exit ourself
@@ -488,12 +572,13 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 }
 
 // request:
-// -1: beginning processing [invalid outside here]
+// -1: processing
 //  0: nothing [not from here]
-//  1: get state
+//  1: get state & wait for request
 //  2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
 //     is reached
 //  3: raise `thread0_exit_signo` and try to exit
+//  4: no-op
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
     jl_task_t *ct = jl_get_current_task();
@@ -503,26 +588,36 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
     if (ptls == NULL)
         return;
     int errno_save = errno;
-    // acknowledge that we saw the signal_request
-    sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, -1);
-#if !defined(JL_DISABLE_LIBUNWIND)
+    sig_atomic_t request = jl_atomic_load(&ptls->signal_request);
+    if (request == 0)
+        return;
+    if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1))
+        return;
     if (request == 1) {
-        pthread_mutex_lock(&in_signal_lock);
-        signal_context = jl_to_bt_context(ctx);
-        // acknowledge that we set the signal_caught_cond broadcast
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == -1); (void) request;
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_cond_wait(&exit_signal_cond, &in_signal_lock);
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == 1 || request == 3);
-        // acknowledge that we got the resume signal
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_mutex_unlock(&in_signal_lock);
+        usr2_signal_context = jl_to_bt_context(ctx);
+        // acknowledge that we saw the signal_request and set usr2_signal_context
+        int err;
+        eventfd_t got = 1;
+        err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+        if (err != sizeof(eventfd_t)) abort();
+        sig_atomic_t processing = -1;
+        jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
+        // wait for exit signal
+        do {
+            err = read(exit_signal_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(got == 1);
+        request = jl_atomic_exchange(&ptls->signal_request, -1);
+        usr2_signal_context = NULL;
+        assert(request == 2 || request == 3 || request == 4);
     }
-    else
-#endif
-    jl_atomic_exchange(&ptls->signal_request, 0); // returns -1
+    int err;
+    eventfd_t got = 1;
+    err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
+    sig_atomic_t processing = -1;
+    jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
     if (request == 2) {
         int force = jl_check_force_sigint();
         if (force || (!ptls->defer_signal && ptls->io_wait)) {
@@ -531,7 +626,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) // restarting jl_ or profile
+                jl_longjmp_in_ctx(sig, ctx, *saferestore);
+            else
+                jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
@@ -558,7 +657,7 @@ int timer_graceperiod_elapsed(void)
 static timer_t timerprof;
 static struct itimerspec itsprof;
 
-JL_DLLEXPORT int jl_profile_start_timer(void)
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
     struct sigevent sigprof;
 
@@ -567,10 +666,12 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     sigprof.sigev_notify = SIGEV_SIGNAL;
     sigprof.sigev_signo = SIGUSR1;
     sigprof.sigev_value.sival_ptr = &timerprof;
-    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
-    running = 1;
+    // Because SIGUSR1 is multipurpose, set `profile_running` before so that we know that the first SIGUSR1 came from the timer
+    profile_running = 1;
+    profile_all_tasks = all_tasks;
     if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) {
-        running = 0;
+        profile_running = 0;
+        profile_all_tasks = 0;
         return -2;
     }
 
@@ -580,7 +681,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     itsprof.it_value.tv_sec = nsecprof / GIGA;
     itsprof.it_value.tv_nsec = nsecprof % GIGA;
     if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) {
-        running = 0;
+        profile_running = 0;
+        profile_all_tasks = 0;
         return -3;
     }
     return 0;
@@ -588,11 +690,24 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
 
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    if (running) {
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running) {
         timer_delete(timerprof);
         last_timer_delete_time = jl_hrtime();
-        running = 0;
+        profile_running = 0;
     }
+    uv_mutex_unlock(&bt_data_prof_lock);
+}
+
+#elif defined(__OpenBSD__)
+
+JL_DLLEXPORT int jl_profile_start_timer(void)
+{
+    return -1;
+}
+
+JL_DLLEXPORT void jl_profile_stop_timer(void)
+{
 }
 
 #else
@@ -618,30 +733,41 @@ static void allocate_segv_handler(void)
     }
 }
 
-static void *alloc_sigstack(size_t *ssize)
-{
-    void *stk = jl_malloc_stack(ssize, NULL);
-    if (stk == NULL)
-        jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
-    return stk;
-}
-
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    size_t ssize = sig_stack_size;
-    void *signal_stack = alloc_sigstack(&ssize);
-    ptls->signal_stack = signal_stack;
+#ifdef HAVE_MACH
+    attach_exception_port(pthread_mach_thread_np(ptls->system_id), 0);
+#endif
     stack_t ss;
-    ss.ss_flags = 0;
-    ss.ss_size = ssize - 16;
-    ss.ss_sp = signal_stack;
-    if (sigaltstack(&ss, NULL) < 0) {
+    if (sigaltstack(NULL, &ss) < 0)
         jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+    if ((ss.ss_flags & SS_DISABLE) != SS_DISABLE)
+        return; // someone else appears to have already set this up, so just use that
+    size_t ssize = sig_stack_size;
+    void *signal_stack = jl_malloc_stack(&ssize, NULL);
+    ss.ss_flags = 0;
+    ss.ss_size = ssize;
+    assert(ssize != 0);
+
+#ifndef _OS_OPENBSD_
+    /* fallback to malloc(), but it isn't possible on OpenBSD */
+    if (signal_stack == NULL) {
+        signal_stack = malloc(ssize);
+        ssize = 0;
+        if (signal_stack == NULL)
+            jl_safe_printf("\nwarning: julia signal alt stack could not be allocated (StackOverflowError will be fatal on this thread).\n");
+        else
+            jl_safe_printf("\nwarning: julia signal stack allocated without guard page (launch foreign threads earlier to avoid this warning).\n");
     }
-
-#ifdef HAVE_MACH
-    attach_exception_port(pthread_mach_thread_np(ptls->system_id), 0);
 #endif
+
+    if (signal_stack != NULL) {
+        ss.ss_sp = signal_stack;
+        if (sigaltstack(&ss, NULL) < 0)
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        ptls->signal_stack = signal_stack;
+        ptls->signal_stack_size = ssize;
+    }
 }
 
 const static int sigwait_sigs[] = {
@@ -687,7 +813,7 @@ void trigger_profile_peek(void)
     jl_safe_printf("\n======================================================================================\n");
     jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
     jl_safe_printf("======================================================================================\n");
-    if (bt_size_max == 0){
+    if (profile_bt_size_max == 0) {
         // If the buffer hasn't been initialized, initialize with default size
         // Keep these values synchronized with Profile.default_init()
         if (jl_profile_init(10000000, 1000000) == -1) {
@@ -695,17 +821,100 @@ void trigger_profile_peek(void)
             return;
         }
     }
-    bt_size_cur = 0; // clear profile buffer
-    if (jl_profile_start_timer() < 0)
+    profile_bt_size_cur = 0; // clear profile buffer
+    if (jl_profile_start_timer(0) < 0)
         jl_safe_printf("ERROR: Could not start profile timer\n");
     else
         profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
 }
 
+#if !defined(JL_DISABLE_LIBUNWIND)
+
+static jl_bt_element_t signal_bt_data[JL_MAX_BT_SIZE + 1];
+static size_t signal_bt_size = 0;
+static void do_critical_profile(void *ctx)
+{
+    bt_context_t signal_context;
+    // sample each thread, round-robin style in reverse order
+    // (so that thread zero gets notified last)
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    for (int i = nthreads; i-- > 0; ) {
+        // notify thread to stop
+        if (!jl_thread_suspend_and_get_state(i, 1, &signal_context))
+            continue;
+
+        // do backtrace on thread contexts for critical signals
+        // this part must be signal-handler safe
+        signal_bt_size += rec_backtrace_ctx(signal_bt_data + signal_bt_size,
+                JL_MAX_BT_SIZE / nthreads - 1,
+                &signal_context, NULL);
+        signal_bt_data[signal_bt_size++].uintptr = 0;
+        jl_thread_resume(i);
+    }
+}
+
+static void do_profile(void *ctx)
+{
+    bt_context_t signal_context;
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    int *randperm = profile_get_randperm(nthreads);
+    for (int idx = nthreads; idx-- > 0; ) {
+        // Stop the threads in the random order.
+        int tid = randperm[idx];
+        // do backtrace for profiler
+        if (!profile_running)
+            return;
+        if (jl_profile_is_buffer_full()) {
+            // Buffer full: Delete the timer
+            jl_profile_stop_timer();
+            return;
+        }
+        // notify thread to stop
+        if (!jl_thread_suspend_and_get_state(tid, 1, &signal_context))
+            return;
+        // unwinding can fail, so keep track of the current state
+        // and restore from the SEGV handler if anything happens.
+        jl_jmp_buf *old_buf = jl_get_safe_restore();
+        jl_jmp_buf buf;
+
+        jl_set_safe_restore(&buf);
+        if (jl_setjmp(buf, 0)) {
+            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+        }
+        else {
+            // Get backtrace data
+            profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
+                    profile_bt_size_max - profile_bt_size_cur - 1, &signal_context, NULL);
+        }
+        jl_set_safe_restore(old_buf);
+
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+        // store threadid but add 1 as 0 is preserved to indicate end of block
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1;
+
+        // store task id (never null)
+        profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
+
+        // store cpu cycle clock
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
+
+        // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+        int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
+
+        // Mark the end of this block with two 0's
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+        profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+
+        // notify thread to resume
+        jl_thread_resume(tid);
+    }
+}
+#endif
+
 static void *signal_listener(void *arg)
 {
-    static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
-    static size_t bt_size = 0;
     sigset_t sset;
     int sig, critical, profile;
     jl_sigsetset(&sset);
@@ -801,13 +1010,13 @@ static void *signal_listener(void *arg)
         int doexit = critical;
 #ifdef SIGINFO
         if (sig == SIGINFO) {
-            if (running != 1)
+            if (profile_running != 1)
                 trigger_profile_peek();
             doexit = 0;
         }
 #else
         if (sig == SIGUSR1) {
-            if (running != 1 && timer_graceperiod_elapsed())
+            if (profile_running != 1 && timer_graceperiod_elapsed())
                 trigger_profile_peek();
             doexit = 0;
         }
@@ -837,83 +1046,22 @@ static void *signal_listener(void *arg)
             }
         }
 
-        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
-        bt_size = 0;
+        signal_bt_size = 0;
 #if !defined(JL_DISABLE_LIBUNWIND)
-        unw_context_t *signal_context;
-        // sample each thread, round-robin style in reverse order
-        // (so that thread zero gets notified last)
-        if (critical || profile) {
-            jl_lock_profile();
-            int *randperm;
-            if (profile)
-                 randperm = profile_get_randperm(nthreads);
-            for (int idx = nthreads; idx-- > 0; ) {
-                // Stop the threads in the random or reverse round-robin order.
-                int i = profile ? randperm[idx] : idx;
-                // notify thread to stop
-                jl_thread_suspend_and_get_state(i, 1, &signal_context);
-                if (signal_context == NULL)
-                    continue;
-
-                // do backtrace on thread contexts for critical signals
-                // this part must be signal-handler safe
-                if (critical) {
-                    bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                            JL_MAX_BT_SIZE / nthreads - 1,
-                            signal_context, NULL);
-                    bt_data[bt_size++].uintptr = 0;
-                }
-
-                // do backtrace for profiler
-                if (profile && running) {
-                    if (jl_profile_is_buffer_full()) {
-                        // Buffer full: Delete the timer
-                        jl_profile_stop_timer();
-                    }
-                    else {
-                        // unwinding can fail, so keep track of the current state
-                        // and restore from the SEGV handler if anything happens.
-                        jl_jmp_buf *old_buf = jl_get_safe_restore();
-                        jl_jmp_buf buf;
-
-                        jl_set_safe_restore(&buf);
-                        if (jl_setjmp(buf, 0)) {
-                            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                        } else {
-                            // Get backtrace data
-                            bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                                    bt_size_max - bt_size_cur - 1, signal_context, NULL);
-                        }
-                        jl_set_safe_restore(old_buf);
-
-                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
-
-                        // store threadid but add 1 as 0 is preserved to indicate end of block
-                        bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
-
-                        // store task id (never null)
-                        bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
-
-                        // store cpu cycle clock
-                        bt_data_prof[bt_size_cur++].uintptr = cycleclock();
-
-                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                        bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
-
-                        // Mark the end of this block with two 0's
-                        bt_data_prof[bt_size_cur++].uintptr = 0;
-                        bt_data_prof[bt_size_cur++].uintptr = 0;
-                    }
-                }
-
-                // notify thread to resume
-                jl_thread_resume(i, sig);
+        if (critical) {
+            jl_with_stackwalk_lock(do_critical_profile, NULL);
+        }
+        else if (profile) {
+            if (profile_all_tasks) {
+                // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+                jl_profile_task();
+            }
+            else {
+                jl_with_stackwalk_lock(do_profile, NULL);
             }
-            jl_unlock_profile();
         }
 #ifndef HAVE_MACH
-        if (profile && running) {
+        if (profile_running) {
             jl_check_profile_autostop();
 #if defined(HAVE_TIMER)
             timer_settime(timerprof, 0, &itsprof, NULL);
@@ -930,23 +1078,24 @@ static void *signal_listener(void *arg)
 //#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L && !HAVE_KEVENT
 //            si_code = info.si_code;
 //#endif
-            jl_exit_thread0(sig, bt_data, bt_size);
+            jl_exit_thread0(sig, signal_bt_data, signal_bt_size);
         }
         else if (critical) {
             // critical in this case actually means SIGINFO request
 #ifndef SIGINFO // SIGINFO already prints something similar automatically
-            int nrunning = 0;
+            int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+            int n_threads_running = 0;
             for (int idx = nthreads; idx-- > 0; ) {
                 jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
-                nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                n_threads_running += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
             }
-            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
+            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), n_threads_running, nthreads);
 #endif
 
             jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
             size_t i;
-            for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-                jl_print_bt_entry_codeloc(bt_data + i);
+            for (i = 0; i < signal_bt_size; i += jl_bt_entry_size(signal_bt_data + i)) {
+                jl_print_bt_entry_codeloc(signal_bt_data + i);
             }
         }
     }
@@ -962,10 +1111,12 @@ void restore_signals(void)
     jl_sigsetset(&sset);
     pthread_sigmask(SIG_SETMASK, &sset, 0);
 
-#if !defined(HAVE_MACH) && !defined(JL_DISABLE_LIBUNWIND)
+#if !defined(HAVE_MACH)
+    exit_signal_cond = eventfd(0, EFD_CLOEXEC);
+    signal_caught_cond = eventfd(0, EFD_CLOEXEC);
     if (pthread_mutex_init(&in_signal_lock, NULL) != 0 ||
-        pthread_cond_init(&exit_signal_cond, NULL) != 0 ||
-        pthread_cond_init(&signal_caught_cond, NULL) != 0) {
+            exit_signal_cond == -1 ||
+            signal_caught_cond == -1) {
         jl_error("SIGUSR pthread init failed");
     }
 #endif
@@ -978,8 +1129,9 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -989,21 +1141,63 @@ static void fpe_handler(int sig, siginfo_t *info, void *context)
         jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf)
+{
+#if defined(_OS_DARWIN_)
+    jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(_ctx), jmpbuf);
+#else
+    if (jl_simulate_longjmp(jmpbuf, jl_to_bt_context(_ctx)))
+        return;
+    sigset_t sset;
+    sigemptyset(&sset);
+    sigaddset(&sset, sig);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+    jl_longjmp(jmpbuf, 1);
+#endif
+}
+
 static void sigint_handler(int sig)
 {
     jl_sigint_passed = 1;
 }
 
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+static void sigtrap_handler(int sig, siginfo_t *info, void *context)
+{
+    uintptr_t pc = ((ucontext_t*)context)->uc_mcontext->__ss.__pc; // TODO: Do this in linux as well
+    uint32_t* code = (uint32_t*)(pc);                              // https://gcc.gnu.org/legacy-ml/gcc-patches/2013-11/msg02228.html
+    if (*code == 0xd4200020) { // brk #0x1 which is what LLVM defines as trap
+        signal(sig, SIG_DFL);
+        sig = SIGILL; // redefine this as as an "unreachable reached" error message
+        sigdie_handler(sig, info, context);
+    }
+}
+#endif
+
 void jl_install_default_signal_handlers(void)
 {
     struct sigaction actf;
     memset(&actf, 0, sizeof(struct sigaction));
     sigemptyset(&actf.sa_mask);
     actf.sa_sigaction = fpe_handler;
-    actf.sa_flags = SA_ONSTACK | SA_SIGINFO;
+    actf.sa_flags = SA_SIGINFO;
     if (sigaction(SIGFPE, &actf, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+    struct sigaction acttrap;
+    memset(&acttrap, 0, sizeof(struct sigaction));
+    sigemptyset(&acttrap.sa_mask);
+    acttrap.sa_sigaction = sigtrap_handler;
+    acttrap.sa_flags = SA_SIGINFO;
+    if (sigaction(SIGTRAP, &acttrap, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+#else
+    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
+        jl_error("fatal error: Couldn't set SIGTRAP");
+    }
+#endif
     struct sigaction actint;
     memset(&actint, 0, sizeof(struct sigaction));
     sigemptyset(&actint.sa_mask);
@@ -1015,9 +1209,6 @@ void jl_install_default_signal_handlers(void)
     if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
         jl_error("fatal error: Couldn't set SIGPIPE");
     }
-    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
-        jl_error("fatal error: Couldn't set SIGTRAP");
-    }
 
 #if defined(HAVE_MACH)
     allocate_mach_handler();
diff --git a/src/signals-win.c b/src/signals-win.c
index 5dd6b34558ca6..c8ae74f52dba4 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -4,7 +4,7 @@
 // Note that this file is `#include`d by "signal-handling.c"
 #include <mmsystem.h> // hidden by LEAN_AND_MEAN
 
-#define sig_stack_size 131072 // 128k reserved for SEGV handling
+static const size_t sig_stack_size = 131072; // 128k reserved for backtrace_fiber for stack overflow handling
 
 // Copied from MINGW_FLOAT_H which may not be found due to a collision with the builtin gcc float.h
 // eventually we can probably integrate this into OpenLibm.
@@ -86,9 +86,13 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, SIGTERM, SIGILL, SIGABRT
-        if (sig == SIGSEGV && jl_get_safe_restore()) {
-            signal(sig, (void (__cdecl *)(int))crt_sig_handler);
-            jl_sig_throw();
+        if (sig == SIGSEGV) { // restarting jl_ or profile
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) {
+                signal(sig, (void (__cdecl *)(int))crt_sig_handler);
+                jl_longjmp(*saferestore, 1);
+                return;
+            }
         }
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
@@ -109,6 +113,8 @@ static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
+    // print the warning (this mysteriously needs a lot of stack for the WriteFile syscall)
+    stack_overflow_warning();
     // collect the backtrace
     stkerror_ptls->bt_size =
         rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
@@ -124,42 +130,41 @@ void restore_signals(void)
     SetConsoleCtrlHandler(NULL, 0);
 }
 
-void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c);
+
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
 {
-    jl_task_t *ct = jl_current_task;
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        if (!jl_simulate_longjmp(*saferestore, ctxThread))
+            abort();
+        return;
+    }
+    assert(ct && excpt);
     jl_ptls_t ptls = ct->ptls;
-#if defined(_CPU_X86_64_)
-    DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
-#elif defined(_CPU_X86_)
-    DWORD32 Esp = (ctxThread->Esp & (DWORD32)-16) - 4;
-#else
-#error WIN16 not supported :P
-#endif
-    if (!jl_get_safe_restore()) {
-        assert(excpt != NULL);
-        ptls->bt_size = 0;
-        if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
-                                              ct->gcstack);
-        }
-        else if (have_backtrace_fiber) {
-            uv_mutex_lock(&backtrace_lock);
-            stkerror_ctx = ctxThread;
-            stkerror_ptls = ptls;
-            jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            uv_mutex_unlock(&backtrace_lock);
-        }
-        ptls->sig_exception = excpt;
+    ptls->bt_size = 0;
+    if (excpt != jl_stackovf_exception) {
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                          ct->gcstack);
+    }
+    else if (have_backtrace_fiber) {
+        uv_mutex_lock(&backtrace_lock);
+        stkerror_ctx = ctxThread;
+        stkerror_ptls = ptls;
+        jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
+        uv_mutex_unlock(&backtrace_lock);
+    }
+    ptls->sig_exception = excpt;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        if (!jl_simulate_longjmp(eh->eh_ctx, ctxThread))
+            abort();
+    }
+    else {
+        jl_no_exc_handler(excpt, ct);
     }
-#if defined(_CPU_X86_64_)
-    *(DWORD64*)Rsp = 0;
-    ctxThread->Rsp = Rsp;
-    ctxThread->Rip = (DWORD64)&jl_sig_throw;
-#elif defined(_CPU_X86_)
-    *(DWORD32*)Esp = 0;
-    ctxThread->Esp = Esp;
-    ctxThread->Eip = (DWORD)&jl_sig_throw;
-#endif
 }
 
 HANDLE hMainThread = INVALID_HANDLE_VALUE;
@@ -193,7 +198,8 @@ static void jl_try_deliver_sigint(void)
             jl_safe_printf("error: GetThreadContext failed\n");
             return;
         }
-        jl_throw_in_ctx(jl_interrupt_exception, &ctxThread);
+        jl_task_t *ct = jl_atomic_load_relaxed(&ptls2->current_task);
+        jl_throw_in_ctx(ct, jl_interrupt_exception, &ctxThread);
         ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
         if (!SetThreadContext(hMainThread, &ctxThread)) {
             jl_safe_printf("error: SetThreadContext failed\n");
@@ -237,20 +243,20 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
         case EXCEPTION_INT_DIVIDE_BY_ZERO:
             if (ct->eh != NULL) {
                 fpreset();
-                jl_throw_in_ctx(jl_diverror_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_diverror_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             break;
         case EXCEPTION_STACK_OVERFLOW:
             if (ct->eh != NULL) {
                 ptls->needs_resetstkoflw = 1;
-                jl_throw_in_ctx(jl_stackovf_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_stackovf_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             break;
         case EXCEPTION_ACCESS_VIOLATION:
             if (jl_addr_is_safepoint(ExceptionInfo->ExceptionRecord->ExceptionInformation[1])) {
-                jl_set_gc_and_wait();
+                jl_set_gc_and_wait(ct);
                 // Do not raise sigint on worker thread
                 if (ptls->tid != 0)
                     return EXCEPTION_CONTINUE_EXECUTION;
@@ -259,17 +265,17 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
                 }
                 else if (jl_safepoint_consume_sigint()) {
                     jl_clear_force_sigint();
-                    jl_throw_in_ctx(jl_interrupt_exception, ExceptionInfo->ContextRecord);
+                    jl_throw_in_ctx(ct, jl_interrupt_exception, ExceptionInfo->ContextRecord);
                 }
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             if (jl_get_safe_restore()) {
-                jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(NULL, NULL, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
             if (ct->eh != NULL) {
                 if (ExceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) { // writing to read-only memory (e.g. mmap)
-                    jl_throw_in_ctx(jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
+                    jl_throw_in_ctx(ct, jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
             }
@@ -326,7 +332,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
     default:
         jl_safe_printf("UNKNOWN"); break;
     }
-    jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+    jl_safe_printf(" at 0x%zx -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
     jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
 
     jl_critical_error(0, 0, ExceptionInfo->ContextRecord, ct);
@@ -344,77 +350,122 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)
 
 static volatile HANDLE hBtThread = 0;
 
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
+{
+    (void)timeout;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL) // this thread is not alive
+        return 0;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (ct2 == NULL) // this thread is already dead
+        return 0;
+    HANDLE hThread = ptls2->system_id;
+    if ((DWORD)-1 == SuspendThread(hThread))
+        return 0;
+    assert(sizeof(*ctx) == sizeof(CONTEXT));
+    memset(ctx, 0, sizeof(CONTEXT));
+    ctx->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+    if (!GetThreadContext(hThread, ctx)) {
+        if ((DWORD)-1 == ResumeThread(hThread))
+            abort();
+        return 0;
+    }
+    return 1;
+}
+
+void jl_thread_resume(int tid)
+{
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    HANDLE hThread = ptls2->system_id;
+    if ((DWORD)-1 == ResumeThread(hThread)) {
+        fputs("failed to resume main thread! aborting.", stderr);
+        abort();
+    }
+}
+
+void jl_lock_stackwalk(void)
+{
+    uv_mutex_lock(&jl_in_stackwalk);
+    jl_lock_profile();
+}
+
+void jl_unlock_stackwalk(void)
+{
+    jl_unlock_profile();
+    uv_mutex_unlock(&jl_in_stackwalk);
+}
+
+void jl_with_stackwalk_lock(void (*f)(void*), void *ctx)
+{
+    jl_lock_stackwalk();
+    f(ctx);
+    jl_unlock_stackwalk();
+}
+
+
 static DWORD WINAPI profile_bt( LPVOID lparam )
 {
     // Note: illegal to use jl_* functions from this thread except for profiling-specific functions
     while (1) {
         DWORD timeout_ms = nsecprof / (GIGA / 1000);
         Sleep(timeout_ms > 0 ? timeout_ms : 1);
-        if (running) {
+        if (profile_running) {
             if (jl_profile_is_buffer_full()) {
                 jl_profile_stop_timer(); // does not change the thread state
                 SuspendThread(GetCurrentThread());
                 continue;
             }
+            else if (profile_all_tasks) {
+                // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
+                jl_profile_task();
+            }
             else {
-                uv_mutex_lock(&jl_in_stackwalk);
-                jl_lock_profile();
-                if ((DWORD)-1 == SuspendThread(hMainThread)) {
-                    fputs("failed to suspend main thread. aborting profiling.", stderr);
-                    break;
-                }
+                // TODO: bring this up to parity with other OS by adding loop over tid here
+                jl_lock_stackwalk();
                 CONTEXT ctxThread;
-                memset(&ctxThread, 0, sizeof(CONTEXT));
-                ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
-                if (!GetThreadContext(hMainThread, &ctxThread)) {
-                    fputs("failed to get context from main thread. aborting profiling.", stderr);
+                if (!jl_thread_suspend_and_get_state(0, 0, &ctxThread)) {
+                    jl_unlock_stackwalk();
+                    fputs("failed to suspend main thread. aborting profiling.", stderr);
                     jl_profile_stop_timer();
+                    break;
                 }
-                else {
-                    // Get backtrace data
-                    bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                            bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
+                // Get backtrace data
+                profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur,
+                        profile_bt_size_max - profile_bt_size_cur - 1, &ctxThread, NULL);
 
-                    jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
+                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
 
-                    // store threadid but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls->tid + 1;
 
-                    // store task id (never null)
-                    bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+                // META_OFFSET_TASKID store task id (never null)
+                profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                    // store cpu cycle clock
-                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock();
 
-                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+                // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block)
+                int state = jl_atomic_load_relaxed(&ptls->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING;
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = state;
 
-                    // Mark the end of this block with two 0's
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                }
-                jl_unlock_profile();
-                uv_mutex_unlock(&jl_in_stackwalk);
-                if ((DWORD)-1 == ResumeThread(hMainThread)) {
-                    jl_profile_stop_timer();
-                    fputs("failed to resume main thread! aborting.", stderr);
-                    jl_gc_debug_critical_error();
-                    abort();
-                }
+                // Mark the end of this block with two 0's
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+                profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0;
+                jl_unlock_stackwalk();
+                jl_thread_resume(0);
                 jl_check_profile_autostop();
             }
         }
     }
-    jl_unlock_profile();
     uv_mutex_unlock(&jl_in_stackwalk);
     jl_profile_stop_timer();
-    hBtThread = 0;
+    hBtThread = NULL;
     return 0;
 }
 
 static volatile TIMECAPS timecaps;
 
-JL_DLLEXPORT int jl_profile_start_timer(void)
+JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
 {
     if (hBtThread == NULL) {
 
@@ -442,20 +493,24 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
             return -2;
         }
     }
-    if (running == 0) {
+    if (profile_running == 0) {
         // Failure to change the timer resolution is not fatal. However, it is important to
         // ensure that the timeBeginPeriod/timeEndPeriod is paired.
         if (TIMERR_NOERROR != timeBeginPeriod(timecaps.wPeriodMin))
             timecaps.wPeriodMin = 0;
     }
-    running = 1; // set `running` finally
+    profile_all_tasks = all_tasks;
+    profile_running = 1; // set `profile_running` finally
     return 0;
 }
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    if (running && timecaps.wPeriodMin)
+    uv_mutex_lock(&bt_data_prof_lock);
+    if (profile_running && timecaps.wPeriodMin)
         timeEndPeriod(timecaps.wPeriodMin);
-    running = 0;
+    profile_running = 0;
+    profile_all_tasks = 0;
+    uv_mutex_unlock(&bt_data_prof_lock);
 }
 
 void jl_install_default_signal_handlers(void)
diff --git a/src/simplevector.c b/src/simplevector.c
index 65217715ae55f..5f1fd744abd0c 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -79,7 +79,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a)
 {
     size_t n = jl_svec_len(a);
     jl_svec_t *c = jl_alloc_svec_uninit(n);
-    memmove_refs((void**)jl_svec_data(c), (void**)jl_svec_data(a), n);
+    memmove_refs((_Atomic(void*)*)jl_svec_data(c), (_Atomic(void*)*)jl_svec_data(a), n);
     return c;
 }
 
@@ -96,10 +96,3 @@ JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT
 {
     return jl_svec_len(t);
 }
-
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i)
-{
-    jl_value_t *v = jl_svecref(t, (size_t)i);
-    assert(v != NULL);
-    return v;
-}
diff --git a/src/smallintset.c b/src/smallintset.c
index fa647b57e7d3e..a80a18009c9db 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -24,87 +24,103 @@
 extern "C" {
 #endif
 
-static inline size_t jl_intref(const jl_array_t *arr, size_t idx) JL_NOTSAFEPOINT
+static inline size_t ignore_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        return jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx]);
+    return val == tombstone ? 0 : val;
+}
+static inline size_t jl_intref(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
+{
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
 
-static inline size_t jl_intref_acquire(const jl_array_t *arr, size_t idx) JL_NOTSAFEPOINT
+static inline size_t acquire_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx]);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        return jl_atomic_load_acquire(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx]);
+    return val == tombstone ? (size_t)-1 : val;
+}
+static inline size_t jl_intref_acquire(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
+{
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint32_t)*)arr->ptr)[idx]), UINT32_MAX);
     else
         abort();
 }
 
-static inline void jl_intset_release(const jl_array_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
+static inline void jl_intset_release(const jl_genericmemory_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        jl_atomic_store_release(&((_Atomic(uint8_t)*)jl_array_data(arr))[idx], val);
-    else if (el == (jl_value_t*)jl_uint16_type)
-        jl_atomic_store_release(&((_Atomic(uint16_t)*)jl_array_data(arr))[idx], val);
-    else if (el == (jl_value_t*)jl_uint32_type)
-        jl_atomic_store_release(&((_Atomic(uint32_t)*)jl_array_data(arr))[idx], val);
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        jl_atomic_store_release(&((_Atomic(uint8_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint16_type)
+        jl_atomic_store_release(&((_Atomic(uint16_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint32_type)
+        jl_atomic_store_release(&((_Atomic(uint32_t)*)arr->ptr)[idx], val);
     else
         abort();
 }
 
-static inline size_t jl_max_int(const jl_array_t *arr)
+static inline size_t jl_max_int(const jl_genericmemory_t *arr) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
         return 0xFF;
-    else if (el == (jl_value_t*)jl_uint16_type)
+    else if (el == jl_memory_uint16_type)
         return 0xFFFF;
-    else if (el == (jl_value_t*)jl_uint32_type)
+    else if (el == jl_memory_uint32_type)
         return 0xFFFFFFFF;
-    else if (el == (jl_value_t*)jl_any_type)
+    else if (el == jl_memory_any_type)
         return 0;
     else
         abort();
 }
 
-static jl_array_t *jl_alloc_int_1d(size_t np, size_t len)
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT
+{
+    size_t elsize;
+    jl_value_t *el = (jl_value_t*)jl_typetagof(a);
+    if (el == jl_memory_uint8_type)
+        elsize = sizeof(uint8_t);
+    else if (el == jl_memory_uint16_type)
+        elsize = sizeof(uint16_t);
+    else if (el == jl_memory_uint32_type)
+        elsize = sizeof(uint32_t);
+    else if (el == jl_memory_any_type)
+        elsize = 0;
+    else
+        abort();
+    memset(a->ptr, 0, a->length * elsize);
+}
+
+static jl_genericmemory_t *jl_alloc_int_1d(size_t np, size_t len)
 {
     jl_value_t *ty;
-    if (np < 0xFF) {
-        ty = jl_array_uint8_type;
-     }
-    else if (np < 0xFFFF) {
-        static jl_value_t *int16 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int16 == NULL)
-            int16 = jl_apply_array_type((jl_value_t*)jl_uint16_type, 1);
-        ty = int16;
-    }
-    else {
-        assert(np < 0x7FFFFFFF);
-        static jl_value_t *int32 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int32 == NULL)
-            int32 = jl_apply_array_type((jl_value_t*)jl_uint32_type, 1);
-        ty = int32;
-    }
-    jl_array_t *a = jl_alloc_array_1d(ty, len);
-    memset(a->data, 0, len * a->elsize);
+    if (np < 0xFF)
+        ty = jl_memory_uint8_type;
+    else if (np < 0xFFFF)
+        ty = jl_memory_uint16_type;
+    else
+        ty = jl_memory_uint32_type;
+    assert(np < 0x7FFFFFFF);
+    jl_genericmemory_t *a = jl_alloc_genericmemory(ty, len);
+    smallintset_empty(a);
     return a;
 }
 
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv)
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop)
 {
-    size_t sz = jl_array_len(cache);
+    size_t sz = cache->length;
     if (sz == 0)
         return -1;
     JL_GC_PUSH1(&cache);
@@ -118,8 +134,10 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
             JL_GC_POP();
             return -1;
         }
-        if (eq(val1 - 1, key, data, hv)) {
+        if (val1 != -1 && eq(val1 - 1, key, data, hv)) {
             JL_GC_POP();
+            if (pop)
+                jl_intset_release(cache, index, (size_t)-1); // replace with tombstone
             return val1 - 1;
         }
         index = (index + 1) & (sz - 1);
@@ -129,9 +147,9 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
     return -1;
 }
 
-static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
+static int smallintset_insert_(jl_genericmemory_t *a, uint_t hv, size_t val1) JL_NOTSAFEPOINT
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     if (sz <= 1)
         return 0;
     size_t orig, index, iter;
@@ -149,16 +167,17 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     } while (iter <= maxprobe && index != orig);
     return 0;
 }
+//}
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
-
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    if (val + 1 >  jl_max_int(a))
-        smallintset_rehash(pcache, parent, hash, data, jl_array_len(a), val + 1);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(pcache);
+    if (val + 1 >= jl_max_int(a)) {
+        a = smallintset_rehash(a, hash, data, a->length, val + 1);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
+    }
     while (1) {
-        a = jl_atomic_load_relaxed(pcache);
         if (smallintset_insert_(a, hash(val, data), val + 1))
             return;
 
@@ -168,21 +187,22 @@ void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, sma
         /* lots of time rehashing all the keys over and over. */
         size_t newsz;
         a = jl_atomic_load_relaxed(pcache);
-        size_t sz = jl_array_len(a);
+        size_t sz = a->length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
             newsz = sz << 1;
         else
             newsz = sz << 2;
-        smallintset_rehash(pcache, parent, hash, data, newsz, 0);
+        a = smallintset_rehash(a, hash, data, newsz, 0);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
     }
 }
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
     for (i = 0; i < sz; i += 1) {
         size_t val = jl_intref(a, i);
@@ -190,7 +210,7 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             np = val;
     }
     while (1) {
-        jl_array_t *newa = jl_alloc_int_1d(np, newsz);
+        jl_genericmemory_t *newa = jl_alloc_int_1d(np + 1, newsz);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
             size_t val1 = jl_intref(a, i);
@@ -201,16 +221,12 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             }
         }
         JL_GC_POP();
-        if (i == sz) {
-            jl_atomic_store_release(pcache, newa);
-            jl_gc_wb(parent, newa);
-            return;
-        }
+        if (i == sz)
+            return newa;
         newsz <<= 1;
     }
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 18bf4b2126938..f1d807908cf42 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -5,6 +5,7 @@
   utilities for walking the stack and looking up information about code addresses
 */
 #include <inttypes.h>
+#include "gc-common.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
@@ -82,7 +83,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         skip--;
     }
 #endif
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) // no point on windows, since RtlVirtualUnwind won't give us a second chance if the segfault happens in ntdll
     jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     jl_set_safe_restore(&buf);
@@ -207,7 +208,7 @@ NOINLINE size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize,
 //
 // The first `skip` frames are omitted, in addition to omitting the frame from
 // `rec_backtrace` itself.
-NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip)
+NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
 {
     bt_context_t context;
     memset(&context, 0, sizeof(context));
@@ -223,6 +224,24 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
     return bt_size;
 }
 
+NOINLINE int failed_to_sample_task_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
+{
+    if (maxsize < 1) {
+        return 0;
+    }
+    bt_data[0].uintptr = (uintptr_t) &failed_to_sample_task_fun;
+    return 1;
+}
+
+NOINLINE int failed_to_stop_thread_fun(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT
+{
+    if (maxsize < 1) {
+        return 0;
+    }
+    bt_data[0].uintptr = (uintptr_t) &failed_to_stop_thread_fun;
+    return 1;
+}
+
 static jl_value_t *array_ptr_void_type JL_ALWAYS_LEAFTYPE = NULL;
 // Return backtrace information as an svec of (bt1, bt2, [sp])
 //
@@ -260,21 +279,21 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
             uintptr_t *sp_ptr = NULL;
             if (returnsp) {
                 jl_array_grow_end(sp, maxincr);
-                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
+                sp_ptr = jl_array_data(sp, uintptr_t) + offset;
             }
             size_t size_incr = 0;
-            have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset,
+            have_more_frames = jl_unw_stepn(&cursor, jl_array_data(ip, jl_bt_element_t) + offset,
                                             &size_incr, sp_ptr, maxincr, skip, &pgcstack, 0);
             skip = 0;
             offset += size_incr;
         }
-        jl_array_del_end(ip, jl_array_len(ip) - offset);
+        jl_array_del_end(ip, jl_array_nrows(ip) - offset);
         if (returnsp)
-            jl_array_del_end(sp, jl_array_len(sp) - offset);
+            jl_array_del_end(sp, jl_array_nrows(sp) - offset);
 
         size_t n = 0;
-        jl_bt_element_t *bt_data = (jl_bt_element_t*)jl_array_data(ip);
-        while (n < jl_array_len(ip)) {
+        jl_bt_element_t *bt_data = jl_array_data(ip, jl_bt_element_t);
+        while (n < jl_array_nrows(ip)) {
             jl_bt_element_t *bt_entry = bt_data + n;
             if (!jl_bt_is_native(bt_entry)) {
                 size_t njlvals = jl_bt_num_jlvals(bt_entry);
@@ -303,7 +322,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
     bt = *btout = jl_alloc_array_1d(array_ptr_void_type, bt_size);
     static_assert(sizeof(jl_bt_element_t) == sizeof(void*),
                   "jl_bt_element_t is presented as Ptr{Cvoid} on julia side");
-    memcpy(bt->data, bt_data, bt_size * sizeof(jl_bt_element_t));
+    memcpy(jl_array_data(bt, jl_bt_element_t), bt_data, bt_size * sizeof(jl_bt_element_t));
     bt2 = *bt2out = jl_alloc_array_1d(jl_array_any_type, 0);
     // Scan the backtrace buffer for any gc-managed values
     for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
@@ -605,7 +624,7 @@ JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
             jl_svecset(r, 1, jl_empty_sym);
         free(frame.file_name);
         jl_svecset(r, 2, jl_box_long(frame.line));
-        jl_svecset(r, 3, frame.linfo != NULL ? (jl_value_t*)frame.linfo : jl_nothing);
+        jl_svecset(r, 3, frame.ci != NULL ? (jl_value_t*)frame.ci : jl_nothing);
         jl_svecset(r, 4, jl_box_bool(frame.fromC));
         jl_svecset(r, 5, jl_box_bool(frame.inlined));
     }
@@ -641,17 +660,91 @@ void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT
     for (i = 0; i < n; i++) {
         jl_frame_t frame = frames[i];
         if (!frame.func_name) {
-            jl_safe_printf("unknown function (ip: %p)\n", (void*)ip);
+            jl_safe_printf("unknown function (ip: %p) at %s\n", (void*)ip, frame.file_name ? frame.file_name : "(unknown file)");
         }
         else {
             jl_safe_print_codeloc(frame.func_name, frame.file_name, frame.line, frame.inlined);
             free(frame.func_name);
-            free(frame.file_name);
         }
+        free(frame.file_name);
     }
     free(frames);
 }
 
+const char *jl_debuginfo_file1(jl_debuginfo_t *debuginfo)
+{
+    jl_value_t *def = debuginfo->def;
+    if (jl_is_method_instance(def))
+        def = ((jl_method_instance_t*)def)->def.value;
+    if (jl_is_method(def))
+        def = (jl_value_t*)((jl_method_t*)def)->file;
+    if (jl_is_symbol(def))
+        return jl_symbol_name((jl_sym_t*)def);
+    return "<unknown>";
+}
+
+const char *jl_debuginfo_file(jl_debuginfo_t *debuginfo)
+{
+    jl_debuginfo_t *linetable = debuginfo->linetable;
+    while ((jl_value_t*)linetable != jl_nothing) {
+        debuginfo = linetable;
+        linetable = debuginfo->linetable;
+    }
+    return jl_debuginfo_file1(debuginfo);
+}
+
+jl_module_t *jl_debuginfo_module1(jl_value_t *debuginfo_def)
+{
+    if (jl_is_method_instance(debuginfo_def))
+        debuginfo_def = ((jl_method_instance_t*)debuginfo_def)->def.value;
+    if (jl_is_method(debuginfo_def))
+        debuginfo_def = (jl_value_t*)((jl_method_t*)debuginfo_def)->module;
+    if (jl_is_module(debuginfo_def))
+        return (jl_module_t*)debuginfo_def;
+    return NULL;
+}
+
+const char *jl_debuginfo_name(jl_value_t *func)
+{
+    if (func == NULL)
+        return "macro expansion";
+    if (jl_is_method_instance(func))
+        func = ((jl_method_instance_t*)func)->def.value;
+    if (jl_is_method(func))
+        func = (jl_value_t*)((jl_method_t*)func)->name;
+    if (jl_is_symbol(func))
+        return jl_symbol_name((jl_sym_t*)func);
+    if (jl_is_module(func))
+        return "top-level scope";
+    return "<unknown>";
+}
+
+// func == module : top-level
+// func == NULL : macro expansion
+static void jl_print_debugloc(jl_debuginfo_t *debuginfo, jl_value_t *func, size_t ip, int inlined) JL_NOTSAFEPOINT
+{
+    if (!jl_is_symbol(debuginfo->def)) // this is a path or
+        func = debuginfo->def; // this is inlined code
+    struct jl_codeloc_t stmt = jl_uncompress1_codeloc(debuginfo->codelocs, ip);
+    intptr_t edges_idx = stmt.to;
+    if (edges_idx) {
+        jl_debuginfo_t *edge = (jl_debuginfo_t*)jl_svecref(debuginfo->edges, edges_idx - 1);
+        assert(jl_typetagis(edge, jl_debuginfo_type));
+        jl_print_debugloc(edge, NULL, stmt.pc, 1);
+    }
+    intptr_t ip2 = stmt.line;
+    if (ip2 >= 0 && ip > 0 && (jl_value_t*)debuginfo->linetable != jl_nothing) {
+        jl_print_debugloc(debuginfo->linetable, func, ip2, 0);
+    }
+    else {
+        if (ip2 < 0) // set broken debug info to ignored
+            ip2 = 0;
+        const char *func_name = jl_debuginfo_name(func);
+        const char *file = jl_debuginfo_file(debuginfo);
+        jl_safe_print_codeloc(func_name, file, ip2, inlined);
+    }
+}
+
 // Print code location for backtrace buffer entry at *bt_entry
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
 {
@@ -659,33 +752,23 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
         jl_print_native_codeloc(bt_entry[0].uintptr);
     }
     else if (jl_bt_entry_tag(bt_entry) == JL_BT_INTERP_FRAME_TAG) {
-        size_t ip = jl_bt_entry_header(bt_entry);
+        size_t ip = jl_bt_entry_header(bt_entry); // zero-indexed
         jl_value_t *code = jl_bt_entry_jlvalue(bt_entry, 0);
-        if (jl_is_method_instance(code)) {
+        jl_value_t *def = (jl_value_t*)jl_core_module; // just used as a token here that isa Module
+        if (jl_is_code_instance(code)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)code;
+            def = (jl_value_t*)ci->def;
+            code = jl_atomic_load_relaxed(&ci->inferred);
+        } else if (jl_is_method_instance(code)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)code;
+            def = code;
             // When interpreting a method instance, need to unwrap to find the code info
-            code = jl_atomic_load_relaxed(&((jl_method_instance_t*)code)->uninferred);
+            code = mi->def.method->source;
         }
         if (jl_is_code_info(code)) {
             jl_code_info_t *src = (jl_code_info_t*)code;
             // See also the debug info handling in codegen.cpp.
-            // NB: debuginfoloc is 1-based!
-            intptr_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[ip];
-            while (debuginfoloc != 0) {
-                jl_line_info_node_t *locinfo = (jl_line_info_node_t*)
-                    jl_array_ptr_ref(src->linetable, debuginfoloc - 1);
-                assert(jl_typetagis(locinfo, jl_lineinfonode_type));
-                const char *func_name = "Unknown";
-                jl_value_t *method = locinfo->method;
-                if (jl_is_method_instance(method))
-                    method = ((jl_method_instance_t*)method)->def.value;
-                if (jl_is_method(method))
-                    method = (jl_value_t*)((jl_method_t*)method)->name;
-                if (jl_is_symbol(method))
-                    func_name = jl_symbol_name((jl_sym_t*)method);
-                jl_safe_print_codeloc(func_name, jl_symbol_name(locinfo->file),
-                                      locinfo->line, locinfo->inlined_at);
-                debuginfoloc = locinfo->inlined_at;
-            }
+            jl_print_debugloc(src->debuginfo, def, ip + 1, 0);
         }
         else {
             // If we're using this function something bad has already happened;
@@ -791,7 +874,7 @@ _os_tsd_get_direct(unsigned long slot)
 // Unconditionally defined ptrauth_strip (instead of using the ptrauth.h header)
 // since libsystem will likely be compiled with -mbranch-protection, and we currently are not.
 // code from https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
-static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
+static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) JL_NOTSAFEPOINT {
   // On the stack the link register is protected with Pointer
   // Authentication Code when compiled with -mbranch-protection.
   // Let's strip the PAC unconditionally because xpaclri is in the NOP space,
@@ -809,7 +892,7 @@ static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
 
 __attribute__((always_inline, pure))
 static __inline__ void**
-_os_tsd_get_base(void)
+_os_tsd_get_base(void) JL_NOTSAFEPOINT
 {
 #if defined(__arm__)
     uintptr_t tsd;
@@ -831,7 +914,7 @@ _os_tsd_get_base(void)
 #ifdef _os_tsd_get_base
 __attribute__((always_inline))
 static __inline__ void*
-_os_tsd_get_direct(unsigned long slot)
+_os_tsd_get_direct(unsigned long slot) JL_NOTSAFEPOINT
 {
     return _os_tsd_get_base()[slot];
 }
@@ -839,14 +922,14 @@ _os_tsd_get_direct(unsigned long slot)
 
 __attribute__((always_inline, pure))
 static __inline__ uintptr_t
-_os_ptr_munge_token(void)
+_os_ptr_munge_token(void) JL_NOTSAFEPOINT
 {
     return (uintptr_t)_os_tsd_get_direct(__TSD_PTR_MUNGE);
 }
 
 __attribute__((always_inline, pure))
 JL_UNUSED static __inline__ uintptr_t
-_os_ptr_munge(uintptr_t ptr)
+_os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT
 {
     return ptr ^ _os_ptr_munge_token();
 }
@@ -854,149 +937,207 @@ _os_ptr_munge(uintptr_t ptr)
 #endif
 
 
-extern bt_context_t *jl_to_bt_context(void *sigctx);
+extern bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT;
 
-void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
+// Some notes: this simulates a longjmp call occurring in context `c`, as if the
+// user was to set the PC in `c` to call longjmp and the PC in the longjmp to
+// return here. This helps work around many cases where siglongjmp out of a
+// signal handler is not supported (e.g. missing a _sigunaltstack call).
+// Additionally note that this doesn't restore the MXCSR or FP control word
+// (which some, but not most longjmp implementations do).  It also doesn't
+// support shadow stacks, so if those are in use, you might need to use a direct
+// jl_longjmp instead to leave the signal frame instead of relying on simulating
+// it and attempting to return normally.
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    ptls->bt_size = 0;
-    if (t == ct) {
-        ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        return;
-    }
-    if (t->copy_stack || !t->started || t->stkbuf == NULL)
-        return;
-    int16_t old = -1;
-    if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid)
-        return;
-    bt_context_t *context = NULL;
-#if defined(_OS_WINDOWS_)
-    bt_context_t c;
-    memset(&c, 0, sizeof(c));
-    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
-#if defined(_CPU_X86_64_)
-    c.Rbx = mctx->Rbx;
-    c.Rsp = mctx->Rsp;
-    c.Rbp = mctx->Rbp;
-    c.Rsi = mctx->Rsi;
-    c.Rdi = mctx->Rdi;
-    c.R12 = mctx->R12;
-    c.R13 = mctx->R13;
-    c.R14 = mctx->R14;
-    c.R15 = mctx->R15;
-    c.Rip = mctx->Rip;
-    memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
-#else
-    c.Eip = mctx->Eip;
-    c.Esp = mctx->Esp;
-    c.Ebp = mctx->Ebp;
-#endif
-    context = &c;
-#elif defined(JL_HAVE_UNW_CONTEXT)
-    context = &t->ctx.ctx;
-#elif defined(JL_HAVE_UCONTEXT)
-    context = jl_to_bt_context(&t->ctx.ctx);
-#elif defined(JL_HAVE_ASM)
-    bt_context_t c;
-    memset(&c, 0, sizeof(c));
- #if defined(_OS_LINUX_) && defined(__GLIBC__)
-    __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
-    mcontext_t *mc = &c.uc_mcontext;
-  #if defined(_CPU_X86_)
+#if (defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+    // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+    return 0;
+#elif defined(_OS_WINDOWS_)
+    _JUMP_BUFFER* _ctx = (_JUMP_BUFFER*)mctx;
+    #if defined(_CPU_X86_64_)
+    c->Rbx = _ctx->Rbx;
+    c->Rsp = _ctx->Rsp;
+    c->Rbp = _ctx->Rbp;
+    c->Rsi = _ctx->Rsi;
+    c->Rdi = _ctx->Rdi;
+    c->R12 = _ctx->R12;
+    c->R13 = _ctx->R13;
+    c->R14 = _ctx->R14;
+    c->R15 = _ctx->R15;
+    c->Rip = _ctx->Rip;
+    memcpy(&c->Xmm6, &_ctx->Xmm6, 10 * sizeof(_ctx->Xmm6)); // Xmm6-Xmm15
+    // c->MxCsr = _ctx->MxCsr;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    // c->SegGS[0] = _ctx->Frame;
+    c->Rax = 1;
+    c->Rsp += sizeof(void*);
+    assert(c->Rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_)
+    c->Ebp = _ctx->Ebp;
+    c->Ebx = _ctx->Ebx;
+    c->Edi = _ctx->Edi;
+    c->Esi = _ctx->Esi;
+    c->Esp = _ctx->Esp;
+    c->Eip = _ctx->Eip;
+    // c->SegFS[0] = _ctx->Registration;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    c->Eax = 1;
+    c->Esp += sizeof(void*);
+    assert(c->Esp % 16 == 0);
+    return 1;
+    #else
+    #error Windows is currently only supported on x86 and x86_64
+    #endif
+#elif defined(_OS_LINUX_) && defined(__GLIBC__)
+    __jmp_buf *_ctx = &mctx->__jmpbuf;
+    #if defined(_CPU_AARCH64_)
+    // Only on aarch64-linux libunwind uses a different struct than system's one:
+    // <https://github.com/libunwind/libunwind/blob/e63e024b72d35d4404018fde1a546fde976da5c5/include/libunwind-aarch64.h#L193-L205>.
+    struct unw_sigcontext *mc = &c->uc_mcontext;
+    #else
+    mcontext_t *mc = &c->uc_mcontext;
+    #endif
+    #if defined(_CPU_X86_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
-    mc->gregs[REG_EBX] = (*mctx)[0];
-    mc->gregs[REG_ESI] = (*mctx)[1];
-    mc->gregs[REG_EDI] = (*mctx)[2];
-    mc->gregs[REG_EBP] = (*mctx)[3];
-    mc->gregs[REG_ESP] = (*mctx)[4];
-    mc->gregs[REG_EIP] = (*mctx)[5];
+    mc->gregs[REG_EBX] = (*_ctx)[0];
+    mc->gregs[REG_ESI] = (*_ctx)[1];
+    mc->gregs[REG_EDI] = (*_ctx)[2];
+    mc->gregs[REG_EBP] = (*_ctx)[3];
+    mc->gregs[REG_ESP] = (*_ctx)[4];
+    mc->gregs[REG_EIP] = (*_ctx)[5];
     // ifdef PTR_DEMANGLE ?
     mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
     mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
-    context = &c;
-  #elif defined(_CPU_X86_64_)
+    mc->gregs[REG_EAX] = 1;
+    assert(mc->gregs[REG_ESP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_64_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
-    mc->gregs[REG_RBX] = (*mctx)[0];
-    mc->gregs[REG_RBP] = (*mctx)[1];
-    mc->gregs[REG_R12] = (*mctx)[2];
-    mc->gregs[REG_R13] = (*mctx)[3];
-    mc->gregs[REG_R14] = (*mctx)[4];
-    mc->gregs[REG_R15] = (*mctx)[5];
-    mc->gregs[REG_RSP] = (*mctx)[6];
-    mc->gregs[REG_RIP] = (*mctx)[7];
+    mc->gregs[REG_RBX] = (*_ctx)[0];
+    mc->gregs[REG_RBP] = (*_ctx)[1];
+    mc->gregs[REG_R12] = (*_ctx)[2];
+    mc->gregs[REG_R13] = (*_ctx)[3];
+    mc->gregs[REG_R14] = (*_ctx)[4];
+    mc->gregs[REG_R15] = (*_ctx)[5];
+    mc->gregs[REG_RSP] = (*_ctx)[6];
+    mc->gregs[REG_RIP] = (*_ctx)[7];
     // ifdef PTR_DEMANGLE ?
     mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
     mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
     mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
-    context = &c;
-  #elif defined(_CPU_ARM_)
+    mc->gregs[REG_RAX] = 1;
+    assert(mc->gregs[REG_RSP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_ARM_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
-    mc->arm_sp = (*mctx)[0];
-    mc->arm_lr = (*mctx)[1];
-    mc->arm_r4 = (*mctx)[2]; // aka v1
-    mc->arm_r5 = (*mctx)[3]; // aka v2
-    mc->arm_r6 = (*mctx)[4]; // aka v3
-    mc->arm_r7 = (*mctx)[5]; // aka v4
-    mc->arm_r8 = (*mctx)[6]; // aka v5
-    mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
-    mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
-    mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
+    mc->arm_sp = (*_ctx)[0];
+    mc->arm_lr = (*_ctx)[1];
+    mc->arm_r4 = (*_ctx)[2]; // aka v1
+    mc->arm_r5 = (*_ctx)[3]; // aka v2
+    mc->arm_r6 = (*_ctx)[4]; // aka v3
+    mc->arm_r7 = (*_ctx)[5]; // aka v4
+    mc->arm_r8 = (*_ctx)[6]; // aka v5
+    mc->arm_r9 = (*_ctx)[7]; // aka v6 aka sb
+    mc->arm_r10 = (*_ctx)[8]; // aka v7 aka sl
+    mc->arm_fp = (*_ctx)[10]; // aka v8 aka r11
     // ifdef PTR_DEMANGLE ?
     mc->arm_sp = ptr_demangle(mc->arm_sp);
     mc->arm_lr = ptr_demangle(mc->arm_lr);
     mc->arm_pc = mc->arm_lr;
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
+    mc->arm_r0 = 1;
+    assert(mc->arm_sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
     // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
     // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
     // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
     // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
     unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
-    mc->regs[19] = (*mctx)[0];
-    mc->regs[20] = (*mctx)[1];
-    mc->regs[21] = (*mctx)[2];
-    mc->regs[22] = (*mctx)[3];
-    mc->regs[23] = (*mctx)[4];
-    mc->regs[24] = (*mctx)[5];
-    mc->regs[25] = (*mctx)[6];
-    mc->regs[26] = (*mctx)[7];
-    mc->regs[27] = (*mctx)[8];
-    mc->regs[28] = (*mctx)[9];
-    mc->regs[29] = (*mctx)[10]; // aka fp
-    mc->regs[30] = (*mctx)[11]; // aka lr
-    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
-    mc->sp = (*mctx)[13];
-    mcfp->vregs[7] = (*mctx)[14]; // aka d8
-    mcfp->vregs[8] = (*mctx)[15]; // aka d9
-    mcfp->vregs[9] = (*mctx)[16]; // aka d10
-    mcfp->vregs[10] = (*mctx)[17]; // aka d11
-    mcfp->vregs[11] = (*mctx)[18]; // aka d12
-    mcfp->vregs[12] = (*mctx)[19]; // aka d13
-    mcfp->vregs[13] = (*mctx)[20]; // aka d14
-    mcfp->vregs[14] = (*mctx)[21]; // aka d15
+    mc->regs[19] = (*_ctx)[0];
+    mc->regs[20] = (*_ctx)[1];
+    mc->regs[21] = (*_ctx)[2];
+    mc->regs[22] = (*_ctx)[3];
+    mc->regs[23] = (*_ctx)[4];
+    mc->regs[24] = (*_ctx)[5];
+    mc->regs[25] = (*_ctx)[6];
+    mc->regs[26] = (*_ctx)[7];
+    mc->regs[27] = (*_ctx)[8];
+    mc->regs[28] = (*_ctx)[9];
+    mc->regs[29] = (*_ctx)[10]; // aka fp
+    mc->regs[30] = (*_ctx)[11]; // aka lr
+    // Yes, they did skip 12 when writing the code originally; and, no, I do not know why.
+    mc->sp = (*_ctx)[13];
+    mcfp->vregs[7] = (*_ctx)[14]; // aka d8
+    mcfp->vregs[8] = (*_ctx)[15]; // aka d9
+    mcfp->vregs[9] = (*_ctx)[16]; // aka d10
+    mcfp->vregs[10] = (*_ctx)[17]; // aka d11
+    mcfp->vregs[11] = (*_ctx)[18]; // aka d12
+    mcfp->vregs[12] = (*_ctx)[19]; // aka d13
+    mcfp->vregs[13] = (*_ctx)[20]; // aka d14
+    mcfp->vregs[14] = (*_ctx)[21]; // aka d15
     // ifdef PTR_DEMANGLE ?
     mc->sp = ptr_demangle(mc->sp);
     mc->regs[30] = ptr_demangle(mc->regs[30]);
     mc->pc = mc->regs[30];
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
-   (void)mc;
-   (void)c;
-   (void)mctx;
-  #endif
- #elif defined(_OS_DARWIN_)
-    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-  #if defined(_CPU_X86_64_)
+    mc->regs[0] = 1;
+    assert(mc->sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_RISCV64_)
+    // https://github.com/bminor/glibc/blob/master/sysdeps/riscv/bits/setjmp.h
+    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_riscv
+    mc->__gregs[1] = (*_ctx)->__pc;        // ra
+    mc->__gregs[8] = (*_ctx)->__regs[0];   // s0
+    mc->__gregs[9] = (*_ctx)->__regs[1];   // s1
+    mc->__gregs[18] = (*_ctx)->__regs[2];  // s2
+    mc->__gregs[19] = (*_ctx)->__regs[3];  // s3
+    mc->__gregs[20] = (*_ctx)->__regs[4];  // s4
+    mc->__gregs[21] = (*_ctx)->__regs[5];  // s5
+    mc->__gregs[22] = (*_ctx)->__regs[6];  // s6
+    mc->__gregs[23] = (*_ctx)->__regs[7];  // s7
+    mc->__gregs[24] = (*_ctx)->__regs[8];  // s8
+    mc->__gregs[25] = (*_ctx)->__regs[9];  // s9
+    mc->__gregs[26] = (*_ctx)->__regs[10]; // s10
+    mc->__gregs[27] = (*_ctx)->__regs[11]; // s11
+    mc->__gregs[2] = (*_ctx)->__sp;        // sp
+    #ifndef __riscv_float_abi_soft
+    mc->__fpregs.__d.__f[8] = (unsigned long long) (*_ctx)->__fpregs[0];   // fs0
+    mc->__fpregs.__d.__f[9] = (unsigned long long) (*_ctx)->__fpregs[1];   // fs1
+    mc->__fpregs.__d.__f[18] = (unsigned long long) (*_ctx)->__fpregs[2];  // fs2
+    mc->__fpregs.__d.__f[19] = (unsigned long long) (*_ctx)->__fpregs[3];  // fs3
+    mc->__fpregs.__d.__f[20] = (unsigned long long) (*_ctx)->__fpregs[4];  // fs4
+    mc->__fpregs.__d.__f[21] = (unsigned long long) (*_ctx)->__fpregs[5];  // fs5
+    mc->__fpregs.__d.__f[22] = (unsigned long long) (*_ctx)->__fpregs[6];  // fs6
+    mc->__fpregs.__d.__f[23] = (unsigned long long) (*_ctx)->__fpregs[7];  // fs7
+    mc->__fpregs.__d.__f[24] = (unsigned long long) (*_ctx)->__fpregs[8];  // fs8
+    mc->__fpregs.__d.__f[25] = (unsigned long long) (*_ctx)->__fpregs[9];  // fs9
+    mc->__fpregs.__d.__f[26] = (unsigned long long) (*_ctx)->__fpregs[10]; // fs10
+    mc->__fpregs.__d.__f[27] = (unsigned long long) (*_ctx)->__fpregs[11]; // fs11
+    #endif
+    // ifdef PTR_DEMANGLE ?
+    mc->__gregs[REG_SP] = ptr_demangle(mc->__gregs[REG_SP]);
+    mc->__gregs[REG_RA] = ptr_demangle(mc->__gregs[REG_RA]);
+    mc->__gregs[REG_PC] = mc->__gregs[REG_RA];
+    mc->__gregs[REG_A0] = 1;
+    assert(mc->__gregs[REG_SP] % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux")
+    (void)mc;
+    (void)mctx;
+    return 0;
+    #endif
+#elif defined(_OS_DARWIN_)
+    #if defined(_CPU_X86_64_)
     // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
-    x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
+    x86_thread_state64_t *mc = (x86_thread_state64_t*)c;
     mc->__rbx = ((uint64_t*)mctx)[0];
     mc->__rbp = ((uint64_t*)mctx)[1];
     mc->__rsp = ((uint64_t*)mctx)[2];
@@ -1005,18 +1146,20 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     mc->__r14 = ((uint64_t*)mctx)[5];
     mc->__r15 = ((uint64_t*)mctx)[6];
     mc->__rip = ((uint64_t*)mctx)[7];
-    // added in libsystem_plaform 177.200.16 (macOS Mojave 10.14.3)
+    // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
     // prior to that _os_ptr_munge_token was (hopefully) typically 0,
     // so x ^ 0 == x and this is a no-op
     mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
     mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
     mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
+    mc->__rax = 1;
+    assert(mc->__rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
     // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
     // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
     // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
-    arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
+    arm_thread_state64_t *mc = (arm_thread_state64_t*)c;
     mc->__x[19] = ((uint64_t*)mctx)[0];
     mc->__x[20] = ((uint64_t*)mctx)[1];
     mc->__x[21] = ((uint64_t*)mctx)[2];
@@ -1047,15 +1190,17 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
     mc->__pc = ptrauth_strip(mc->__lr, 0);
     mc->__pad = 0; // aka __ra_sign_state = not signed
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
+    mc->__x[0] = 1;
+    assert(mc->__sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin")
     (void)mctx;
-    (void)c;
-  #endif
- #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
-    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-    mcontext_t *mc = &c.uc_mcontext;
+    return 0;
+#endif
+#elif defined(_OS_FREEBSD_)
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_64_)
     // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
     mc->mc_rip = ((long*)mctx)[0];
     mc->mc_rbx = ((long*)mctx)[1];
@@ -1065,22 +1210,127 @@ void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     mc->mc_r13 = ((long*)mctx)[5];
     mc->mc_r14 = ((long*)mctx)[6];
     mc->mc_r15 = ((long*)mctx)[7];
-    context = &c;
- #else
-  #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
-  (void)c;
- #endif
-#elif defined(JL_HAVE_ASYNCIFY)
- #pragma message("jl_rec_backtrace not defined for ASYNCIFY")
-#elif defined(JL_HAVE_SIGALTSTACK)
- #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
+    mc->mc_rax = 1;
+    mc->mc_rsp += sizeof(void*);
+    assert(mc->mc_rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    mc->mc_gpregs.gp_x[19] = ((long*)mctx)[0];
+    mc->mc_gpregs.gp_x[20] = ((long*)mctx)[1];
+    mc->mc_gpregs.gp_x[21] = ((long*)mctx)[2];
+    mc->mc_gpregs.gp_x[22] = ((long*)mctx)[3];
+    mc->mc_gpregs.gp_x[23] = ((long*)mctx)[4];
+    mc->mc_gpregs.gp_x[24] = ((long*)mctx)[5];
+    mc->mc_gpregs.gp_x[25] = ((long*)mctx)[6];
+    mc->mc_gpregs.gp_x[26] = ((long*)mctx)[7];
+    mc->mc_gpregs.gp_x[27] = ((long*)mctx)[8];
+    mc->mc_gpregs.gp_x[28] = ((long*)mctx)[9];
+    mc->mc_gpregs.gp_x[29] = ((long*)mctx)[10];
+    mc->mc_gpregs.gp_lr = ((long*)mctx)[11];
+    mc->mc_gpregs.gp_sp = ((long*)mctx)[12];
+    mc->mc_fpregs.fp_q[7] = ((long*)mctx)[13];
+    mc->mc_fpregs.fp_q[8] = ((long*)mctx)[14];
+    mc->mc_fpregs.fp_q[9] = ((long*)mctx)[15];
+    mc->mc_fpregs.fp_q[10] = ((long*)mctx)[16];
+    mc->mc_fpregs.fp_q[11] = ((long*)mctx)[17];
+    mc->mc_fpregs.fp_q[12] = ((long*)mctx)[18];
+    mc->mc_fpregs.fp_q[13] = ((long*)mctx)[19];
+    mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20];
+    mc->mc_gpregs.gp_x[0] = 1;
+    assert(mc->mc_gpregs.gp_sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd")
+    (void)mctx;
+    return 0;
+    #endif
 #else
- #pragma message("jl_rec_backtrace not defined for unknown task system")
+return 0;
 #endif
-    if (context)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
+}
+
+typedef struct {
+    int16_t old;
+    bt_context_t *c;
+    int success;
+} suspend_t;
+static void suspend(void *ctx)
+{
+    suspend_t *suspenddata = (suspend_t*)ctx;
+    suspenddata->success = jl_thread_suspend_and_get_state(suspenddata->old, 1, suspenddata->c);
+}
+
+JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT
+{
+    int16_t tid = INT16_MAX;
+    jl_record_backtrace_result_t result = {0, tid};
+    jl_task_t *ct = NULL;
+    jl_ptls_t ptls = NULL;
+    if (!all_tasks_profiler) {
+        ct = jl_current_task;
+        ptls = ct->ptls;
+        ptls->bt_size = 0;
+        tid = ptls->tid;
+    }
+    if (t == ct) {
+        result.bt_size = rec_backtrace(bt_data, max_bt_size, 0);
+        result.tid = tid;
+        return result;
+    }
+    bt_context_t *context = NULL;
+    bt_context_t c;
+    int16_t old;
+    for (old = -1; !jl_atomic_cmpswap(&t->tid, &old, tid) && old != tid; old = -1) {
+        // if this task is already running somewhere, we need to stop the thread it is running on and query its state
+        suspend_t suspenddata = {old, &c};
+        jl_with_stackwalk_lock(suspend, &suspenddata);
+        if (!suspenddata.success) {
+            if (jl_atomic_load_relaxed(&t->tid) != old)
+                continue;
+            return result;
+        }
+        if (jl_atomic_load_relaxed(&t->tid) == old) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[old];
+            if (ptls2->previous_task == t || // we might print the wrong stack here, since we can't know whether we executed the swapcontext yet or not, but it at least avoids trying to access the state inside uc_mcontext which might not be set yet
+                (ptls2->previous_task == NULL && jl_atomic_load_relaxed(&ptls2->current_task) == t)) { // this case should be always accurate
+                // use the thread context for the unwind state
+                context = &c;
+            }
+            break;
+        }
+        // got the wrong thread stopped, try again
+        jl_thread_resume(old);
+    }
+    if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) {
+        // need to read the context from the task stored state
+        jl_jmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
+#if defined(_OS_WINDOWS_)
+        memset(&c, 0, sizeof(c));
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
+#elif defined(JL_HAVE_UNW_CONTEXT)
+        context = t->ctx.ctx;
+#elif defined(JL_HAVE_UCONTEXT)
+        context = jl_to_bt_context(t->ctx.ctx);
+#elif defined(JL_HAVE_ASM)
+        memset(&c, 0, sizeof(c));
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
+#else
+     #pragma message("jl_record_backtrace not defined for unknown task system")
+#endif
+    }
+    size_t bt_size = 0;
+    if (context) {
+        bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, all_tasks_profiler ? NULL : t->gcstack);
+    }
     if (old == -1)
         jl_atomic_store_relaxed(&t->tid, old);
+    else if (old != tid)
+        jl_thread_resume(old);
+    result.bt_size = bt_size;
+    result.tid = old;
+    return result;
 }
 
 //--------------------------------------------------
@@ -1107,17 +1357,21 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
     }
 }
 
-// Print backtrace for specified task
+// Print backtrace for specified task to jl_safe_printf stderr
 JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    jl_rec_backtrace(t);
-    size_t i, bt_size = ptls->bt_size;
+    ptls->bt_size = 0;
     jl_bt_element_t *bt_data = ptls->bt_data;
+    jl_record_backtrace_result_t r = jl_record_backtrace(t, bt_data, JL_MAX_BT_SIZE, 0);
+    size_t bt_size = r.bt_size;
+    size_t i;
     for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
+    if (bt_size == 0)
+        jl_safe_printf("      no backtrace recorded\n");
 }
 
 JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
@@ -1125,41 +1379,52 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
     jlbacktrace();
 }
 
-// Print backtraces for all live tasks, for all threads.
-// WARNING: this is dangerous and can crash if used outside of gdb, if
-// all of Julia's threads are not stopped!
+// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
 JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
 {
     size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
     jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (size_t i = 0; i < nthreads; i++) {
         jl_ptls_t ptls2 = allstates[i];
-        arraylist_t *live_tasks = &ptls2->heap.live_tasks;
-        size_t n = live_tasks->len;
+        if (gc_is_collector_thread(i)) {
+            jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1);
+            continue;
+        }
+        if (ptls2 == NULL) {
+            continue;
+        }
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        int t_state = JL_TASK_STATE_DONE;
+        jl_task_t *t = ptls2->root_task;
+        if (t != NULL)
+            t_state = jl_atomic_load_relaxed(&t->_state);
         jl_safe_printf("==== Thread %d created %zu live tasks\n",
-                ptls2->tid + 1, n + 1);
-        jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
-        jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                ptls2->root_task->sticky, ptls2->root_task->started,
-                jl_atomic_load_relaxed(&ptls2->root_task->_state),
-                jl_atomic_load_relaxed(&ptls2->root_task->tid) + 1);
-        jlbacktracet(ptls2->root_task);
-
-        void **lst = live_tasks->items;
-        for (size_t j = 0; j < live_tasks->len; j++) {
-            jl_task_t *t = (jl_task_t *)lst[j];
+                ptls2->tid + 1, n + (t_state != JL_TASK_STATE_DONE));
+        if (show_done || t_state != JL_TASK_STATE_DONE) {
+            jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
+            if (t != NULL) {
+                jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
+                        t->sticky, t->ctx.started, t_state,
+                        jl_atomic_load_relaxed(&t->tid) + 1);
+                jlbacktracet(t);
+            }
+            jl_safe_printf("     ---- End root task\n");
+        }
+
+        for (size_t j = 0; j < n; j++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, j);
+            if (t == NULL)
+                continue;
             int t_state = jl_atomic_load_relaxed(&t->_state);
-            if (!show_done && t_state == JL_TASK_STATE_DONE) {
+            if (!show_done && t_state == JL_TASK_STATE_DONE)
                 continue;
-            }
             jl_safe_printf("     ---- Task %zu (%p)\n", j + 1, t);
+            // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
             jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                    t->sticky, t->started, t_state,
+                    t->sticky, t->ctx.started, t_state,
                     jl_atomic_load_relaxed(&t->tid) + 1);
-            if (t->stkbuf != NULL)
-                jlbacktracet(t);
-            else
-                jl_safe_printf("      no stack\n");
+            jlbacktracet(t);
             jl_safe_printf("     ---- End task %zu\n", j + 1);
         }
         jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
diff --git a/src/staticdata.c b/src/staticdata.c
index df080bc68c88f..7fad87652b26a 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -71,7 +71,6 @@ External links:
 */
 #include <stdlib.h>
 #include <string.h>
-#include <stdbool.h>
 #include <stdio.h> // printf
 #include <inttypes.h> // PRIxPTR
 
@@ -89,6 +88,9 @@ External links:
 #include "valgrind.h"
 #include "julia_assert.h"
 
+static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1;
+JL_DLLEXPORT size_t jl_require_world = ~(size_t)0;
+
 #include "staticdata_utils.c"
 #include "precompile_utils.c"
 
@@ -99,7 +101,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    158
+#define NUM_TAGS    196
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -121,6 +123,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_array_type);
         INSERT_TAG(jl_expr_type);
         INSERT_TAG(jl_binding_type);
+        INSERT_TAG(jl_binding_partition_type);
         INSERT_TAG(jl_globalref_type);
         INSERT_TAG(jl_string_type);
         INSERT_TAG(jl_module_type);
@@ -133,6 +136,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_gotonode_type);
         INSERT_TAG(jl_quotenode_type);
         INSERT_TAG(jl_gotoifnot_type);
+        INSERT_TAG(jl_enternode_type);
         INSERT_TAG(jl_argument_type);
         INSERT_TAG(jl_returnnode_type);
         INSERT_TAG(jl_const_type);
@@ -177,6 +181,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_emptytuple_type);
         INSERT_TAG(jl_array_symbol_type);
         INSERT_TAG(jl_array_uint8_type);
+        INSERT_TAG(jl_array_uint32_type);
         INSERT_TAG(jl_array_int32_type);
         INSERT_TAG(jl_array_uint64_type);
         INSERT_TAG(jl_int32_type);
@@ -193,10 +198,25 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_float16_type);
         INSERT_TAG(jl_float32_type);
         INSERT_TAG(jl_float64_type);
+        INSERT_TAG(jl_bfloat16_type);
         INSERT_TAG(jl_floatingpoint_type);
         INSERT_TAG(jl_number_type);
         INSERT_TAG(jl_signed_type);
         INSERT_TAG(jl_pair_type);
+        INSERT_TAG(jl_genericmemory_type);
+        INSERT_TAG(jl_memory_any_type);
+        INSERT_TAG(jl_memory_uint8_type);
+        INSERT_TAG(jl_memory_uint16_type);
+        INSERT_TAG(jl_memory_uint32_type);
+        INSERT_TAG(jl_memory_uint64_type);
+        INSERT_TAG(jl_genericmemoryref_type);
+        INSERT_TAG(jl_memoryref_any_type);
+        INSERT_TAG(jl_memoryref_uint8_type);
+        INSERT_TAG(jl_addrspace_type);
+        INSERT_TAG(jl_addrspace_typename);
+        INSERT_TAG(jl_addrspacecore_type);
+        INSERT_TAG(jl_debuginfo_type);
+        INSERT_TAG(jl_abioverride_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
@@ -207,6 +227,8 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_namedtuple_typename);
         INSERT_TAG(jl_vecelement_typename);
         INSERT_TAG(jl_opaque_closure_typename);
+        INSERT_TAG(jl_genericmemory_typename);
+        INSERT_TAG(jl_genericmemoryref_typename);
 
         // special exceptions
         INSERT_TAG(jl_errorexception_type);
@@ -216,6 +238,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_loaderror_type);
         INSERT_TAG(jl_initerror_type);
         INSERT_TAG(jl_undefvarerror_type);
+        INSERT_TAG(jl_fielderror_type);
         INSERT_TAG(jl_stackovf_exception);
         INSERT_TAG(jl_diverror_exception);
         INSERT_TAG(jl_interrupt_exception);
@@ -224,6 +247,8 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_undefref_exception);
         INSERT_TAG(jl_readonlymemory_exception);
         INSERT_TAG(jl_atomicerror_type);
+        INSERT_TAG(jl_missingcodeerror_type);
+        INSERT_TAG(jl_precompilable_error);
 
         // other special values
         INSERT_TAG(jl_emptysvec);
@@ -232,6 +257,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_true);
         INSERT_TAG(jl_an_empty_string);
         INSERT_TAG(jl_an_empty_vec_any);
+        INSERT_TAG(jl_an_empty_memory_any);
         INSERT_TAG(jl_module_init_order);
         INSERT_TAG(jl_core_module);
         INSERT_TAG(jl_base_module);
@@ -243,6 +269,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_kwcall_mt);
         INSERT_TAG(jl_kwcall_func);
         INSERT_TAG(jl_opaque_closure_method);
+        INSERT_TAG(jl_nulldebuginfo);
 
         // some Core.Builtin Functions that we want to be able to reference:
         INSERT_TAG(jl_builtin_throw);
@@ -262,11 +289,18 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_swapfield);
         INSERT_TAG(jl_builtin_modifyfield);
         INSERT_TAG(jl_builtin_replacefield);
+        INSERT_TAG(jl_builtin_setfieldonce);
         INSERT_TAG(jl_builtin_fieldtype);
-        INSERT_TAG(jl_builtin_arrayref);
-        INSERT_TAG(jl_builtin_const_arrayref);
-        INSERT_TAG(jl_builtin_arrayset);
-        INSERT_TAG(jl_builtin_arraysize);
+        INSERT_TAG(jl_builtin_memorynew);
+        INSERT_TAG(jl_builtin_memoryref);
+        INSERT_TAG(jl_builtin_memoryrefoffset);
+        INSERT_TAG(jl_builtin_memoryrefget);
+        INSERT_TAG(jl_builtin_memoryrefset);
+        INSERT_TAG(jl_builtin_memoryref_isassigned);
+        INSERT_TAG(jl_builtin_memoryrefswap);
+        INSERT_TAG(jl_builtin_memoryrefmodify);
+        INSERT_TAG(jl_builtin_memoryrefreplace);
+        INSERT_TAG(jl_builtin_memoryrefsetonce);
         INSERT_TAG(jl_builtin_apply_type);
         INSERT_TAG(jl_builtin_applicable);
         INSERT_TAG(jl_builtin_invoke);
@@ -277,6 +311,12 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_compilerbarrier);
         INSERT_TAG(jl_builtin_getglobal);
         INSERT_TAG(jl_builtin_setglobal);
+        INSERT_TAG(jl_builtin_isdefinedglobal);
+        INSERT_TAG(jl_builtin_swapglobal);
+        INSERT_TAG(jl_builtin_modifyglobal);
+        INSERT_TAG(jl_builtin_replaceglobal);
+        INSERT_TAG(jl_builtin_setglobalonce);
+        INSERT_TAG(jl_builtin_current_scope);
         // n.b. must update NUM_TAGS when you add something here
 #undef INSERT_TAG
         assert(i == NUM_TAGS - 1);
@@ -290,11 +330,8 @@ static uintptr_t nsym_tag;
 // array of definitions for the predefined tagged object types
 // (reverse of symbol_table)
 static arraylist_t deser_sym;
-// Predefined tags that do not have special handling in `externally_linked`
-static htable_t external_objects;
 
 static htable_t serialization_order; // to break cycles, mark all objects that are serialized
-static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
 static htable_t nullptrs;
 // FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
 // must be "toplevel" in this queue. For types, parameters and field types must appear
@@ -308,6 +345,8 @@ static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_
 // jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i]   (0-offset indexing)
 arraylist_t jl_linkage_blobs;
 arraylist_t jl_image_relocs;
+// Keep track of which image corresponds to which top module.
+arraylist_t jl_top_mods;
 
 // Eytzinger tree of images. Used for very fast jl_object_in_image queries
 // See https://algorithmica.org/en/eytzinger
@@ -316,6 +355,21 @@ arraylist_t eytzinger_idxs;
 static uintptr_t img_min;
 static uintptr_t img_max;
 
+// HT_NOTFOUND is a valid integer ID, so we store the integer ids mangled.
+// This pair of functions mangles/demanges
+static size_t from_seroder_entry(void *entry)
+{
+    return (size_t)((char*)entry - (char*)HT_NOTFOUND - 1);
+}
+
+static void *to_seroder_entry(size_t idx)
+{
+    return (void*)((char*)HT_NOTFOUND + 1 + idx);
+}
+
+static htable_t new_methtables;
+static size_t precompilation_world;
+
 static int ptr_cmp(const void *l, const void *r)
 {
     uintptr_t left = *(const uintptr_t*)l;
@@ -422,34 +476,50 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
     return idx;
 }
 
-uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
+JL_DLLEXPORT uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
 {
     return eyt_obj_in_img(obj);
 }
 
+// Map an object to it's "owning" top module
+JL_DLLEXPORT jl_value_t *jl_object_top_module(jl_value_t* v) JL_NOTSAFEPOINT
+{
+    size_t idx = external_blob_index(v);
+    size_t lbids = n_linkage_blobs();
+    if (idx < lbids) {
+        return (jl_value_t*)jl_top_mods.items[idx];
+    }
+    // The object is runtime allocated
+    return (jl_value_t*)jl_nothing;
+}
+
 // hash of definitions for predefined function pointers
 static htable_t fptr_to_id;
 void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching data from LLVM
 
 // table of struct field addresses to rewrite during saving
 static htable_t field_replace;
+static htable_t bits_replace;
 
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
 static const jl_fptr_args_t id_to_fptrs[] = {
-    &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
+    &jl_f_throw, &jl_f_throw_methoderror, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
-    &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
+    &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined, &jl_f_isdefinedglobal,
     &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
-    &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
-    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
-    &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
+    &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield, &jl_f_setfieldonce,
+    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields, &jl_f_apply_type, &jl_f_memorynew,
+    &jl_f_memoryref, &jl_f_memoryrefoffset, &jl_f_memoryrefget, &jl_f_memoryref_isassigned,
+    &jl_f_memoryrefset,  &jl_f_memoryrefswap, &jl_f_memoryrefmodify, &jl_f_memoryrefreplace, &jl_f_memoryrefsetonce,
     &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
     &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
     &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
-    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
-    &jl_f_getglobal, &jl_f_setglobal, &jl_f_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
+    &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
+    &jl_f_getglobal, &jl_f_setglobal, &jl_f_swapglobal, &jl_f_modifyglobal, &jl_f_replaceglobal, &jl_f_setglobalonce,
+    &jl_f_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
+    &jl_f_current_scope,
     NULL };
 
 typedef struct {
@@ -459,9 +529,12 @@ typedef struct {
     ios_t *relocs;              // for (de)serializing relocs_list and gctags_list
     ios_t *gvar_record;         // serialized array mapping gvid => spos
     ios_t *fptr_record;         // serialized array mapping fptrid => spos
+    arraylist_t memowner_list;  // a list of memory locations that have shared owners
+    arraylist_t memref_list;    // a list of memoryref locations
     arraylist_t relocs_list;    // a list of (location, target) pairs, see description at top
     arraylist_t gctags_list;    //      "
     arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
+    arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization
     arraylist_t uniquing_objs;  // a list of locations that reference non-types that must be de-duplicated
     arraylist_t fixup_types;    // a list of locations of types requiring (re)caching
     arraylist_t fixup_objs;     // a list of locations of objects requiring (re)caching
@@ -478,13 +551,10 @@ typedef struct {
     jl_array_t *link_ids_gvars;
     jl_array_t *link_ids_external_fnvars;
     jl_ptls_t ptls;
-    htable_t callers_with_edges;
     jl_image_t *image;
     int8_t incremental;
 } jl_serializer_state;
 
-static jl_value_t *jl_idtable_type = NULL;
-static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
 static jl_sym_t *jl_docmeta_sym = NULL;
@@ -511,6 +581,8 @@ enum RefTags {
     ExternalLinkage     // reference to some other pkgimage
 };
 
+#define SYS_EXTERNAL_LINK_UNIT sizeof(void*)
+
 // calling conventions for internal entry points.
 // this is used to set the method-instance->invoke field
 typedef enum {
@@ -518,6 +590,7 @@ typedef enum {
     JL_API_BOXED,
     JL_API_CONST,
     JL_API_WITH_PARAMETERS,
+    JL_API_OC_CALL,
     JL_API_INTERPRETED,
     JL_API_BUILTIN,
     JL_API_MAX
@@ -558,9 +631,9 @@ typedef struct {
 static void *jl_sysimg_handle = NULL;
 static jl_image_t sysimage;
 
-static inline uintptr_t *sysimg_gvars(uintptr_t *base, const int32_t *offsets, size_t idx)
+static inline uintptr_t *sysimg_gvars(const char *base, const int32_t *offsets, size_t idx)
 {
-    return base + offsets[idx] / sizeof(base[0]);
+    return (uintptr_t*)(base + offsets[idx]);
 }
 
 JL_DLLEXPORT int jl_running_on_valgrind(void)
@@ -568,20 +641,25 @@ JL_DLLEXPORT int jl_running_on_valgrind(void)
     return RUNNING_ON_VALGRIND;
 }
 
+void *system_image_data_unavailable;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(system_image_data_unavailable) jl_system_image_data;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(system_image_data_unavailable) jl_system_image_size;
 static void jl_load_sysimg_so(void)
 {
-    int imaging_mode = jl_generating_output() && !jl_options.incremental;
-    // in --build mode only use sysimg data, not precompiled native code
-    if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) {
-        assert(sysimage.fptrs.base);
-    }
-    else {
-        memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs));
-    }
     const char *sysimg_data;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1);
+    assert(sysimage.fptrs.ptrs); // jl_init_processor_sysimg should already be run
+    if (jl_sysimg_handle == jl_exe_handle &&
+            &jl_system_image_data != JL_WEAK_SYMBOL_DEFAULT(system_image_data_unavailable))
+        sysimg_data = (const char*)&jl_system_image_data;
+    else
+        jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1);
     size_t *plen;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1);
+    if (jl_sysimg_handle == jl_exe_handle &&
+            &jl_system_image_size != JL_WEAK_SYMBOL_DEFAULT(system_image_data_unavailable))
+        plen = (size_t *)&jl_system_image_size;
+    else
+        jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1);
+    jl_gc_notify_image_load(sysimg_data, *plen);
     jl_restore_system_image_data(sysimg_data, *plen);
 }
 
@@ -612,7 +690,7 @@ static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTS
     else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         return 0;
     }
-    else if (jl_typetagis(v, jl_task_tag << 4)) {
+    else if (v == (jl_value_t*)s->ptls->root_task) {
         return 0;
     }
 
@@ -628,6 +706,14 @@ static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
         if (jl_is_method(m) && jl_object_in_image(m))
             return 1 + type_in_worklist(mi->specTypes);
     }
+    if (jl_is_binding(v)) {
+        jl_globalref_t *gr = ((jl_binding_t*)v)->globalref;
+        if (!gr)
+            return 0;
+        if (!jl_object_in_image((jl_value_t*)gr->mod))
+            return 0;
+        return 1;
+    }
     if (jl_is_datatype(v)) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
         if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars)
@@ -655,7 +741,8 @@ static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT
 
 static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT
 {
-    ptrhash_put(&field_replace, (void*)addr, newval);
+    if (*addr != newval)
+        ptrhash_put(&field_replace, (void*)addr, newval);
 }
 
 static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED
@@ -685,31 +772,60 @@ static uintptr_t jl_fptr_id(void *fptr)
         return *(uintptr_t*)pbp;
 }
 
+static int effects_foldable(uint32_t effects)
+{
+    // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true)
+    return ((effects & 0x7) == 0) && // is_consistent(effects)
+           (((effects >> 10) & 0x03) == 0) && // is_noub(effects)
+           (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects)
+           ((effects >> 6) & 0x01); // is_terminates(effects)
+}
+
+
 // `jl_queue_for_serialization` adds items to `serialization_order`
 #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
 
-
 static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
     jl_queue_for_serialization(s, m->name);
     jl_queue_for_serialization(s, m->parent);
-    jl_queue_for_serialization(s, m->bindings);
-    jl_queue_for_serialization(s, m->bindingkeyset);
-    if (jl_options.strip_metadata) {
+    if (jl_options.trim) {
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->bindings), 0, 1);
+    } else {
+        jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    }
+    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset));
+    if (jl_options.strip_metadata || jl_options.trim) {
         jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
         for (size_t i = 0; i < jl_svec_len(table); i++) {
-            jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
+            jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
             if ((void*)b == jl_nothing)
                 break;
-            jl_sym_t *name = b->globalref->name;
-            if (name == jl_docmeta_sym && jl_atomic_load_relaxed(&b->value))
-                record_field_change((jl_value_t**)&b->value, jl_nothing);
+            if (jl_options.strip_metadata) {
+                jl_sym_t *name = b->globalref->name;
+                if (name == jl_docmeta_sym && jl_get_binding_value(b))
+                    record_field_change((jl_value_t**)&b->value, jl_nothing);
+            }
+            if (jl_options.trim) {
+                jl_value_t *val = jl_get_binding_value(b);
+                // keep binding objects that are defined and ...
+                if (val &&
+                    // ... point to modules ...
+                    (jl_is_module(val) ||
+                     // ... or point to __init__ methods ...
+                     !strcmp(jl_symbol_name(b->globalref->name), "__init__") ||
+                     // ... or point to Base functions accessed by the runtime
+                     (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") ||
+                                              !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) {
+                    jl_queue_for_serialization(s, b);
+                }
+            }
         }
     }
 
-    for (size_t i = 0; i < m->usings.len; i++) {
-        jl_queue_for_serialization(s, (jl_value_t*)m->usings.items[i]);
+    for (size_t i = 0; i < module_usings_length(m); i++) {
+        jl_queue_for_serialization(s, module_usings_getmod(m, i));
     }
 }
 
@@ -725,24 +841,22 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
 {
     jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
     jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
+    const jl_datatype_layout_t *layout = t->layout;
 
     if (!recursive)
         goto done_fields;
 
     if (s->incremental && jl_is_datatype(v) && immediate) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
-        // ensure super is queued (though possibly not yet handled, since it may have cycles)
-        jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
         // ensure all type parameters are recached
         jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
-        jl_value_t *singleton = dt->instance;
-        if (singleton && needs_uniquing(singleton)) {
-            assert(jl_needs_serialization(s, singleton)); // should be true, since we visited dt
+        if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) {
+            assert(jl_needs_serialization(s, dt->instance)); // should be true, since we visited dt
             // do not visit dt->instance for our template object as it leads to unwanted cycles here
             // (it may get serialized from elsewhere though)
             record_field_change(&dt->instance, jl_nothing);
         }
-        immediate = 0; // do not handle remaining fields immediately (just field types remains)
+        goto done_fields; // for now
     }
     if (s->incremental && jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
@@ -752,16 +866,13 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             jl_queue_for_serialization(s, mi->def.value);
             jl_queue_for_serialization(s, mi->specTypes);
             jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
-            recursive = 0;
             goto done_fields;
         }
         else if (jl_is_method(def) && jl_object_in_image(def)) {
             // we only need 3 specific fields of this (the rest are restored afterward, if valid)
             // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
             // so must not be present here
-            record_field_change((jl_value_t**)&mi->uninferred, NULL);
             record_field_change((jl_value_t**)&mi->backedges, NULL);
-            record_field_change((jl_value_t**)&mi->callbacks, NULL);
             record_field_change((jl_value_t**)&mi->cache, NULL);
         }
         else {
@@ -774,6 +885,14 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
         // prevent this from happening, so we do not need to detect that user
         // error now.
     }
+    if (s->incremental && jl_is_binding(v)) {
+        if (needs_uniquing(v)) {
+            jl_binding_t *b = (jl_binding_t*)v;
+            jl_queue_for_serialization(s, b->globalref->mod);
+            jl_queue_for_serialization(s, b->globalref->name);
+            goto done_fields;
+        }
+    }
     if (s->incremental && jl_is_globalref(v)) {
         jl_globalref_t *gr = (jl_globalref_t*)v;
         if (jl_object_in_image((jl_value_t*)gr->mod)) {
@@ -783,8 +902,8 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     if (jl_is_typename(v)) {
         jl_typename_t *tn = (jl_typename_t*)v;
         // don't recurse into several fields (yet)
-        jl_queue_for_serialization_(s, (jl_value_t*)tn->cache, 0, 1);
-        jl_queue_for_serialization_(s, (jl_value_t*)tn->linearcache, 0, 1);
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->cache), 0, 1);
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->linearcache), 0, 1);
         if (s->incremental) {
             assert(!jl_object_in_image((jl_value_t*)tn->module));
             assert(!jl_object_in_image((jl_value_t*)tn->wrapper));
@@ -792,18 +911,46 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     }
     if (s->incremental && jl_is_code_instance(v)) {
         jl_code_instance_t *ci = (jl_code_instance_t*)v;
+        jl_method_instance_t *mi = jl_get_ci_mi(ci);
         // make sure we don't serialize other reachable cache entries of foreign methods
-        if (jl_object_in_image((jl_value_t*)ci->def->def.value)) {
+        // Should this now be:
+        // if (ci !in ci->defs->cache)
+        //     record_field_change((jl_value_t**)&ci->next, NULL);
+        // Why are we checking that the method/module this originates from is in_image?
+        // and then disconnect this CI?
+        if (jl_object_in_image((jl_value_t*)mi->def.value)) {
             // TODO: if (ci in ci->defs->cache)
             record_field_change((jl_value_t**)&ci->next, NULL);
         }
+        jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred);
+        if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized)
+            jl_method_t *def = mi->def.method;
+            if (jl_is_method(def)) { // don't delete toplevel code
+                int is_relocatable = jl_is_code_info(inferred) ||
+                    (jl_is_string(inferred) && jl_string_len(inferred) > 0 && jl_string_data(inferred)[jl_string_len(inferred) - 1]);
+                if (!is_relocatable) {
+                    record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
+                }
+                else if (def->source == NULL) {
+                    // don't delete code from optimized opaque closures that can't be reconstructed (and builtins)
+                }
+                else if (jl_atomic_load_relaxed(&ci->max_world) != ~(size_t)0 || // delete all code that cannot run
+                    jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant
+                    record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
+                }
+                else if (native_functions && // don't delete any code if making a ji file
+                         !effects_foldable(jl_atomic_load_relaxed(&ci->ipo_purity_bits)) && // don't delete code we may want for irinterp
+                         jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code
+                    // delete the code now: if we thought it was worth keeping, it would have been converted to object code
+                    record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
+                }
+            }
+        }
     }
 
-
     if (immediate) // must be things that can be recursively handled, and valid as type parameters
         assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));
 
-    const jl_datatype_layout_t *layout = t->layout;
     if (layout->npointers == 0) {
         // bitstypes do not require recursion
     }
@@ -816,22 +963,29 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     }
     else if (jl_is_array(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        const char *data = (const char*)jl_array_data(ar);
-        if (ar->flags.ptrarray) {
-            size_t i, l = jl_array_len(ar);
+        jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+        jl_queue_for_serialization_(s, mem, 1, immediate);
+    }
+    else if (jl_is_genericmemory(v)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        const char *data = (const char*)m->ptr;
+        if (jl_genericmemory_how(m) == 3) {
+            assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
+        }
+        else if (layout->flags.arrayelem_isboxed) {
+            size_t i, l = m->length;
             for (i = 0; i < l; i++) {
                 jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[i], 1);
                 jl_queue_for_serialization_(s, fld, 1, immediate);
             }
         }
-        else if (ar->flags.hasptr) {
-            uint16_t elsz = ar->elsize;
-            size_t i, l = jl_array_len(ar);
-            jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar));
-            size_t j, np = et->layout->npointers;
+        else if (layout->first_ptr >= 0) {
+            uint16_t elsz = layout->size;
+            size_t i, l = m->length;
+            size_t j, np = layout->npointers;
             for (i = 0; i < l; i++) {
                 for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset(et, j);
+                    uint32_t ptr = jl_ptr_offset(t, j);
                     jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], 1);
                     jl_queue_for_serialization_(s, fld, 1, immediate);
                 }
@@ -842,14 +996,34 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     else if (jl_typetagis(v, jl_module_tag << 4)) {
         jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
+    else if (jl_is_binding_partition(v)) {
+        jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+        jl_queue_for_serialization_(s, decode_restriction_value(jl_atomic_load_relaxed(&bpart->restriction)), 1, immediate);
+        jl_queue_for_serialization_(s, get_replaceable_field((jl_value_t**)&bpart->next, 0), 1, immediate);
+    }
     else if (layout->nfields > 0) {
+        if (jl_options.trim) {
+            if (jl_is_method(v)) {
+                jl_method_t *m = (jl_method_t *)v;
+                if (jl_is_svec(jl_atomic_load_relaxed(&m->specializations)))
+                    jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->specializations), 0, 1);
+            }
+            else if (jl_typetagis(v, jl_typename_type)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                if (tn->mt != NULL && !tn->mt->frozen) {
+                    jl_methtable_t * new_methtable = (jl_methtable_t *)ptrhash_get(&new_methtables, tn->mt);
+                    if (new_methtable != HT_NOTFOUND)
+                        record_field_change((jl_value_t **)&tn->mt, (jl_value_t*)new_methtable);
+                    else
+                        record_field_change((jl_value_t **)&tn->mt, NULL);
+                }
+            }
+        }
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
             int mutabl = t->name->mutabl;
-            if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                mutabl = 0;
             jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
             jl_queue_for_serialization_(s, fld, 1, immediate);
         }
@@ -859,24 +1033,38 @@ done_fields: ;
 
     // We've encountered an item we need to cache
     void **bp = ptrhash_bp(&serialization_order, v);
-    assert(*bp != (void*)(uintptr_t)-1);
-    if (s->incremental) {
-        void **bp2 = ptrhash_bp(&unique_ready, v);
-        if (*bp2 == HT_NOTFOUND)
-            assert(*bp == (void*)(uintptr_t)-2);
-        else if (*bp != (void*)(uintptr_t)-2)
-            return;
-    }
-    else {
-        assert(*bp == (void*)(uintptr_t)-2);
-    }
+    assert(*bp == (void*)(uintptr_t)-2);
     arraylist_push(&serialization_queue, (void*) v);
     size_t idx = serialization_queue.len - 1;
     assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
+    *bp = to_seroder_entry(idx);
 
-    *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
+    // DataType is very unusual, in that some of the fields need to be pre-order, and some
+    // (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
+    // try to promote itself to be immediate)
+    if (s->incremental && jl_is_datatype(v) && immediate && recursive) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        void **bp = ptrhash_bp(&serialization_order, (void*)dt->super);
+        if (*bp != (void*)-2) {
+            // if super is already on the stack of things to handle when this returns, do
+            // not try to handle it now
+            jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate);
+        }
+        immediate = 0;
+        char *data = (char*)jl_data_ptr(v);
+        size_t i, np = layout->npointers;
+        for (i = 0; i < np; i++) {
+            uint32_t ptr = jl_ptr_offset(t, i);
+            if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super))
+                continue; // skip the super field, since it might not be quite validly ordered
+            int mutabl = 1;
+            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
+            jl_queue_for_serialization_(s, fld, 1, immediate);
+        }
+    }
 }
 
+
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     if (!jl_needs_serialization(s, v))
@@ -893,28 +1081,19 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
     }
 
     void **bp = ptrhash_bp(&serialization_order, v);
-    if (*bp == HT_NOTFOUND) {
-        *bp = (void*)(uintptr_t)(immediate ? -2 : -1);
-    }
-    else {
-        if (!s->incremental || !immediate || !recursive)
-            return;
-        void **bp2 = ptrhash_bp(&unique_ready, v);
-        if (*bp2 == HT_NOTFOUND)
-            *bp2 = v; // now is unique_ready
-        else {
-            assert(*bp != (void*)(uintptr_t)-1);
-            return; // already was unique_ready
-        }
-        assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
-        if (*bp == (void*)(uintptr_t)-1)
-            *bp = (void*)(uintptr_t)-2; // now immediate
-    }
+    assert(!immediate || *bp != (void*)(uintptr_t)-2);
+    if (*bp == HT_NOTFOUND)
+        *bp = (void*)(uintptr_t)-1; // now enqueued
+    else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1)
+        return;
 
-    if (immediate)
+    if (immediate) {
+        *bp = (void*)(uintptr_t)-2; // now immediate
         jl_insert_into_serialization_queue(s, v, recursive, immediate);
-    else
+    }
+    else {
         arraylist_push(&object_worklist, (void*)v);
+    }
 }
 
 // Do a pre-order traversal of the to-serialize worklist, in the identical order
@@ -980,10 +1159,10 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         // We found the sysimg/pkg that this item links against
         // Compute the relocation code
         size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
-        offset /= sizeof(void*);
-        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
-        assert(n_linkage_blobs() == jl_array_len(s->buildid_depmods_idxs));
-        size_t depsidx = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[i]; // map from build_id_idx -> deps_idx
+        assert((offset % SYS_EXTERNAL_LINK_UNIT) == 0);
+        offset /= SYS_EXTERNAL_LINK_UNIT;
+        assert(n_linkage_blobs() == jl_array_nrows(s->buildid_depmods_idxs));
+        size_t depsidx = jl_array_data(s->buildid_depmods_idxs, uint32_t)[i]; // map from build_id_idx -> deps_idx
         assert(depsidx < INT32_MAX);
         if (depsidx < ((uintptr_t)1 << (RELOC_TAG_OFFSET - DEPS_IDX_OFFSET)) && offset < ((uintptr_t)1 << DEPS_IDX_OFFSET))
             // if it fits in a SysimageLinkage type, use that representation
@@ -991,8 +1170,9 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         // otherwise, we store the image key in `link_ids`
         assert(link_ids && jl_is_array(link_ids));
         jl_array_grow_end(link_ids, 1);
-        uint32_t *link_id_data  = (uint32_t*)jl_array_data(link_ids);  // wait until after the `grow`
-        link_id_data[jl_array_len(link_ids) - 1] = depsidx;
+        uint32_t *link_id_data  = jl_array_data(link_ids, uint32_t);  // wait until after the `grow`
+        link_id_data[jl_array_nrows(link_ids) - 1] = depsidx;
+        assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
         return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
     }
     return 0;
@@ -1005,19 +1185,19 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
 static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
 {
     assert(v != NULL && "cannot get backref to NULL object");
-    void *idx = HT_NOTFOUND;
     if (jl_is_symbol(v)) {
         void **pidx = ptrhash_bp(&symbol_table, v);
-        idx = *pidx;
+        void *idx = *pidx;
         if (idx == HT_NOTFOUND) {
             size_t l = strlen(jl_symbol_name((jl_sym_t*)v));
             write_uint32(s->symbols, l);
             ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
             size_t offset = ++nsym_tag;
             assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
-            idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
+            idx = to_seroder_entry(offset - 1);
             *pidx = idx;
         }
+        return ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
     }
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
@@ -1045,17 +1225,15 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
         assert(item && "no external linkage identified");
         return item;
     }
+    void *idx = ptrhash_get(&serialization_order, v);
     if (idx == HT_NOTFOUND) {
-        idx = ptrhash_get(&serialization_order, v);
-        if (idx == HT_NOTFOUND) {
-            jl_(jl_typeof(v));
-            jl_(v);
-        }
-        assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
-        assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
-        assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+        jl_(jl_typeof(v));
+        jl_(v);
     }
-    return (char*)idx - 1 - (char*)HT_NOTFOUND;
+    assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
+    assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
+    assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+    return ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
 }
 
 
@@ -1064,8 +1242,10 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o
     if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
         if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
-        else
+        else if (jl_is_method_instance(fld) || jl_is_binding(fld))
             arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
+        else
+            assert(0 && "unknown object type with needs_uniquing set");
     }
 }
 
@@ -1109,43 +1289,92 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     newm->parent = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent, s->link_ids_relocs));
-    newm->bindings = NULL;
+    jl_atomic_store_relaxed(&newm->bindings, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindings)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->bindings, s->link_ids_relocs));
-    newm->bindingkeyset = NULL;
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindings), s->link_ids_relocs));
+    jl_atomic_store_relaxed(&newm->bindingkeyset, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->bindingkeyset, s->link_ids_relocs));
-    newm->primary_world = ~(size_t)0;
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs));
+    newm->file = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, file)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->file, s->link_ids_relocs));
 
     // write out the usings list
     memset(&newm->usings._space, 0, sizeof(newm->usings._space));
     if (m->usings.items == &m->usings._space[0]) {
-        newm->usings.items = (void**)offsetof(jl_module_t, usings._space);
+        // Push these relocations here, to keep them in order. This pairs with the `newm->usings.items = ` below.
         arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings.items)));
         arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item));
         size_t i;
-        for (i = 0; i < m->usings.len; i++) {
-            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[i])));
-            arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i], s->link_ids_relocs));
+        for (i = 0; i < module_usings_length(m); i++) {
+            struct _jl_module_using *newm_data = module_usings_getidx(newm, i);
+            struct _jl_module_using *data = module_usings_getidx(m, i);
+            // TODO: Remove dead entries
+            newm_data->min_world = data->min_world;
+            newm_data->max_world = data->max_world;
+            if (s->incremental) {
+                if (data->max_world != (size_t)-1)
+                    newm_data->max_world = 0;
+                newm_data->min_world = 0;
+            }
+            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[3*i])));
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, data->mod, s->link_ids_relocs));
         }
+        newm->usings.items = (void**)offsetof(jl_module_t, usings._space);
     }
     else {
         newm->usings.items = (void**)tot;
         arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings.items)));
         arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item));
         size_t i;
-        for (i = 0; i < m->usings.len; i++) {
-            write_pointerfield(s, (jl_value_t*)m->usings.items[i]);
-            tot += sizeof(void*);
+        for (i = 0; i < module_usings_length(m); i++) {
+            struct _jl_module_using *data = module_usings_getidx(m, i);
+            write_pointerfield(s, (jl_value_t*)data->mod);
+            write_uint(s->s, data->min_world);
+            write_uint(s->s, data->max_world);
+            static_assert(sizeof(struct _jl_module_using) == 3*sizeof(void*), "_jl_module_using mismatch");
+            tot += sizeof(struct _jl_module_using);
         }
-        for (; i < m->usings.max; i++) {
+        for (; i < module_usings_max(m); i++) {
             write_pointer(s->s);
-            tot += sizeof(void*);
+            write_uint(s->s, 0);
+            write_uint(s->s, 0);
+            tot += sizeof(struct _jl_module_using);
         }
     }
     assert(ios_pos(s->s) - reloc_offset == tot);
 }
 
+static void record_memoryref(jl_serializer_state *s, size_t reloc_offset, jl_genericmemoryref_t ref) {
+    ios_t *f = s->s;
+    // make some header modifications in-place
+    jl_genericmemoryref_t *newref = (jl_genericmemoryref_t*)&f->buf[reloc_offset];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ref.mem))->layout;
+    if (!layout->flags.arrayelem_isunion && layout->size != 0) {
+        newref->ptr_or_offset = (void*)((char*)ref.ptr_or_offset - (char*)ref.mem->ptr); // relocation offset (bytes)
+        arraylist_push(&s->memref_list, (void*)reloc_offset); // relocation location
+        arraylist_push(&s->memref_list, NULL); // relocation target (ignored)
+    }
+}
+
+static void record_memoryrefs_inside(jl_serializer_state *s, jl_datatype_t *t, size_t reloc_offset, const char *data)
+{
+    assert(jl_is_datatype(t));
+    size_t i, nf = jl_datatype_nfields(t);
+    for (i = 0; i < nf; i++) {
+        size_t offset = jl_field_offset(t, i);
+        if (jl_field_isptr(t, i))
+            continue;
+        jl_value_t *ft = jl_field_type_concrete(t, i);
+        if (jl_is_uniontype(ft))
+            continue;
+        if (jl_is_genericmemoryref_type(ft))
+            record_memoryref(s, reloc_offset + offset, *(jl_genericmemoryref_t*)(data + offset));
+        else
+            record_memoryrefs_inside(s, (jl_datatype_t*)ft, reloc_offset + offset, data + offset);
+    }
+}
+
 static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT
 {
     for (size_t i = 0; i < globals->len; i++)
@@ -1188,25 +1417,36 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         JL_GC_PROMISE_ROOTED(v);
         assert(!(s->incremental && jl_object_in_image(v)));
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption");
+        assert((!jl_is_datatype_singleton(t) || t->instance == v) && "detected singleton construction corruption");
+        int mutabl = t->name->mutabl;
         ios_t *f = s->s;
         if (t->smalltag) {
             if (t->layout->npointers == 0 || t == jl_string_type) {
-                if (jl_datatype_nfields(t) == 0 || t->name->mutabl == 0 || t == jl_string_type) {
+                if (jl_datatype_nfields(t) == 0 || mutabl == 0 || t == jl_string_type) {
                     f = s->const_data;
                 }
             }
         }
 
-        // realign stream to expected gc alignment (16 bytes)
+        // realign stream to expected gc alignment (16 bytes) after tag
         uintptr_t skip_header_pos = ios_pos(f) + sizeof(jl_taggedvalue_t);
+        uintptr_t object_id_expected = mutabl &&
+                 t != jl_datatype_type &&
+                 t != jl_typename_type &&
+                 t != jl_string_type &&
+                 t != jl_simplevector_type &&
+                 t != jl_module_type;
+        if (object_id_expected)
+            skip_header_pos += sizeof(size_t);
         write_padding(f, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
 
         // write header
+        if (object_id_expected)
+            write_uint(f, jl_object_id(v));
         if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
         if (f == s->const_data)
-            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED);
+            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE);
         else
             write_gctaggedfield(s, t);
         size_t reloc_offset = ios_pos(f);
@@ -1215,7 +1455,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
 
         if (s->incremental) {
             if (needs_uniquing(v)) {
-                if (jl_is_method_instance(v)) {
+                if (jl_typetagis(v, jl_binding_type)) {
+                    jl_binding_t *b = (jl_binding_t*)v;
+                    if (b->globalref == NULL)
+                        jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
+                    write_pointerfield(s, (jl_value_t*)b->globalref->mod);
+                    write_pointerfield(s, (jl_value_t*)b->globalref->name);
+                    continue;
+                }
+                else if (jl_is_method_instance(v)) {
                     assert(f == s->s);
                     jl_method_instance_t *mi = (jl_method_instance_t*)v;
                     write_pointerfield(s, mi->def.value);
@@ -1223,120 +1471,149 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
                     continue;
                 }
-                else if (!jl_is_datatype(v)) {
+                else if (jl_is_datatype(v)) {
+                    for (size_t i = 0; i < s->uniquing_super.len; i++) {
+                        if (s->uniquing_super.items[i] == (void*)v) {
+                            s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super);
+                            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3));
+                        }
+                    }
+                }
+                else {
                     assert(jl_is_datatype_singleton(t) && "unreachable");
                 }
             }
             else if (needs_recaching(v)) {
                 arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
             }
-            else if (jl_typetagis(v, jl_binding_type)) {
-                jl_binding_t *b = (jl_binding_t*)v;
-                if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
-                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
-            }
         }
 
         // write data
         if (jl_is_array(v)) {
             assert(f == s->s);
             // Internal data for types in julia.h with `jl_array_t` field(s)
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
-            jl_value_t *et = jl_tparam0(jl_typeof(v));
-            size_t alen = jl_array_len(ar);
-            size_t datasize = alen * ar->elsize;
-            size_t tot = datasize;
-            int isbitsunion = jl_array_isbitsunion(ar);
-            if (isbitsunion)
-                tot += alen;
-            else if (ar->elsize == 1)
-                tot += 1;
-            int ndimwords = jl_array_ndimwords(ar->flags.ndims);
-            size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
+            size_t headersize = sizeof(jl_array_t) + jl_array_ndims(ar)*sizeof(size_t);
             ios_write(f, (char*)v, headersize);
-            size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
-            if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-                alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
             // make some header modifications in-place
             jl_array_t *newa = (jl_array_t*)&f->buf[reloc_offset];
-            if (newa->flags.ndims == 1)
-                newa->maxsize = alen;
-            newa->offset = 0;
-            newa->flags.how = 0;
-            newa->flags.pooled = 0;
-            newa->flags.isshared = 0;
-
-            // write data
-            if (!ar->flags.ptrarray && !ar->flags.hasptr) {
-                // Non-pointer eltypes get encoded in the const_data section
-                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
-                write_padding(s->const_data, data - ios_pos(s->const_data));
-                // write data and relocations
-                newa->data = NULL; // relocation offset
-                data /= sizeof(void*);
-                assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
-                if (jl_is_cpointer_type(et)) {
-                    // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
-                    const intptr_t *data = (const intptr_t*)jl_array_data(ar);
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        if (data[i] != -1)
-                            write_pointer(s->const_data);
-                        else
-                            ios_write(s->const_data, (char*)&data[i], sizeof(data[i]));
-                    }
-                }
-                else {
-                    if (isbitsunion) {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
-                        ios_write(s->const_data, jl_array_typetagdata(ar), alen);
+            newa->ref.mem = NULL; // relocation offset
+            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, ref.mem))); // relocation location
+            jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, mem, s->link_ids_relocs)); // relocation target
+            record_memoryref(s, reloc_offset + offsetof(jl_array_t, ref), ar->ref);
+        }
+        else if (jl_is_genericmemory(v)) {
+            assert(f == s->s);
+            // Internal data for types in julia.h with `jl_genericmemory_t` field(s)
+            jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+            const jl_datatype_layout_t *layout = t->layout;
+            size_t len = m->length;
+            // if (jl_genericmemory_how(m) == 3) {
+            //     jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+            //     write_uint(f, len);
+            //     write_pointerfield(s, owner);
+            //     write_pointerfield(s, owner);
+            //     jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+            //     assert(new_mem->ptr == NULL);
+            //     new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset
+            // }
+            // else
+            {
+                size_t datasize = len * layout->size;
+                size_t tot = datasize;
+                int isbitsunion = layout->flags.arrayelem_isunion;
+                if (isbitsunion)
+                    tot += len;
+                size_t headersize = sizeof(jl_genericmemory_t);
+                // copy header
+                ios_write(f, (char*)v, headersize);
+                // write data
+                if (!layout->flags.arrayelem_isboxed && layout->first_ptr < 0) {
+                    // set owner to NULL
+                    write_pointer(f);
+                    // Non-pointer eltypes get encoded in the const_data section
+                    size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
+                    if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+                        alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
+                    uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
+                    write_padding(s->const_data, data - ios_pos(s->const_data));
+                    // write data and relocations
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = NULL; // relocation offset
+                    data /= sizeof(void*);
+                    assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
+                    jl_value_t *et = jl_tparam1(t);
+                    if (jl_is_cpointer_type(et)) {
+                        // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
+                        const intptr_t *data = (const intptr_t*)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            if (data[i] != -1)
+                                write_pointer(s->const_data);
+                            else
+                                ios_write(s->const_data, (char*)&data[i], sizeof(data[i]));
+                        }
                     }
                     else {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), tot);
+                        if (isbitsunion) {
+                            ios_write(s->const_data, (char*)m->ptr, datasize);
+                            ios_write(s->const_data, jl_genericmemory_typetagdata(m), len);
+                        }
+                        else {
+                            ios_write(s->const_data, (char*)m->ptr, tot);
+                        }
+                    }
+                    if (len == 0) { // TODO: should we have a zero-page, instead of writing each type's fragment separately?
+                        write_padding(s->const_data, layout->size ? layout->size : isbitsunion);
+                    }
+                    else if (jl_genericmemory_how(m) == 3) {
+                        assert(jl_is_string(jl_genericmemory_data_owner_field(m)));
+                        write_padding(s->const_data, 1);
                     }
                 }
-            }
-            else {
-                // Pointer eltypes are encoded in the mutable data section
-                size_t data = LLT_ALIGN(ios_pos(f), alignment_amt);
-                size_t padding_amt = data - ios_pos(f);
-                headersize += padding_amt;
-                newa->data = (void*)headersize; // relocation offset
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
-                write_padding(f, padding_amt);
-                if (ar->flags.hasptr) {
-                    // copy all of the data first
-                    const char *data = (const char*)jl_array_data(ar);
-                    ios_write(f, data, datasize);
-                    // the rewrite all of the embedded pointers to null+relocation
-                    uint16_t elsz = ar->elsize;
-                    size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        for (j = 0; j < np; j++) {
-                            size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
-                            jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1);
-                            size_t fld_pos = reloc_offset + headersize + offset;
-                            if (fld != NULL) {
-                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location
-                                arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
-                                record_uniquing(s, fld, fld_pos);
+                else {
+                    // Pointer eltypes are encoded in the mutable data section
+                    headersize = LLT_ALIGN(headersize, JL_SMALL_BYTE_ALIGNMENT);
+                    size_t data = LLT_ALIGN(ios_pos(f), JL_SMALL_BYTE_ALIGNMENT);
+                    write_padding(f, data - ios_pos(f));
+                    assert(reloc_offset + headersize == ios_pos(f));
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = (void*)headersize; // relocation offset
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
+                    if (!layout->flags.arrayelem_isboxed) {
+                        // copy all of the data first
+                        const char *data = (const char*)m->ptr;
+                        ios_write(f, data, datasize);
+                        // the rewrite all of the embedded pointers to null+relocation
+                        uint16_t elsz = layout->size;
+                        size_t j, np = layout->first_ptr < 0 ? 0 : layout->npointers;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            for (j = 0; j < np; j++) {
+                                size_t offset = i * elsz + jl_ptr_offset(t, j) * sizeof(jl_value_t*);
+                                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1);
+                                size_t fld_pos = reloc_offset + headersize + offset;
+                                if (fld != NULL) {
+                                    arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location
+                                    arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
+                                    record_uniquing(s, fld, fld_pos);
+                                }
+                                memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                             }
-                            memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                         }
                     }
-                }
-                else {
-                    jl_value_t **data = (jl_value_t**)jl_array_data(ar);
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        jl_value_t *e = get_replaceable_field(&data[i], 1);
-                        write_pointerfield(s, e);
+                    else {
+                        jl_value_t **data = (jl_value_t**)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            jl_value_t *e = get_replaceable_field(&data[i], 1);
+                            write_pointerfield(s, e);
+                        }
                     }
                 }
             }
@@ -1366,7 +1643,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         }
         else if (jl_datatype_nfields(t) == 0) {
             // The object has no fields, so we just snapshot its byte representation
-            assert(!t->layout->npointers);
             assert(t->layout->npointers == 0);
             ios_write(f, (char*)v, jl_datatype_size(t));
         }
@@ -1389,6 +1665,26 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             ios_write(s->const_data, (char*)pdata, nb);
             write_pointer(f);
         }
+        else if (jl_is_binding_partition(v)) {
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+            jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+            jl_value_t *restriction_val = decode_restriction_value(pku);
+            static_assert(offsetof(jl_binding_partition_t, restriction) == 0, "BindingPartition layout mismatch");
+            write_pointerfield(s, restriction_val);
+#ifndef _P64
+            write_uint(f, decode_restriction_kind(pku));
+#endif
+            write_uint(f, bpart->min_world);
+            write_uint(f, jl_atomic_load_relaxed(&bpart->max_world));
+            write_pointerfield(s, (jl_value_t*)jl_atomic_load_relaxed(&bpart->next));
+#ifdef _P64
+            write_uint(f, decode_restriction_kind(pku)); // This will be moved back into place during deserialization (if necessary)
+            static_assert(sizeof(jl_binding_partition_t) == 5*sizeof(void*), "BindingPartition layout mismatch");
+#else
+            write_uint(f, 0);
+            static_assert(sizeof(jl_binding_partition_t) == 6*sizeof(void*), "BindingPartition layout mismatch");
+#endif
+        }
         else {
             // Generic object::DataType serialization by field
             const char *data = (const char*)v;
@@ -1400,7 +1696,23 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 write_padding(f, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
-                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(intptr_t*)slot != -1) {
+                jl_value_t *replace = (jl_value_t*)ptrhash_get(&bits_replace, (void*)slot);
+                if (replace != HT_NOTFOUND) {
+                    assert(t->name->mutabl && !jl_field_isptr(t, i));
+                    jl_value_t *rty = jl_typeof(replace);
+                    size_t sz = jl_datatype_size(rty);
+                    ios_write(f, (const char*)replace, sz);
+                    jl_value_t *ft = jl_field_type_concrete(t, i);
+                    int isunion = jl_is_uniontype(ft);
+                    unsigned nth = 0;
+                    if (!jl_find_union_component(ft, rty, &nth))
+                        assert(0 && "invalid field assignment to isbits union");
+                    assert(sz <= fsz - isunion);
+                    write_padding(f, fsz - sz - isunion);
+                    if (isunion)
+                        write_uint8(f, nth);
+                }
+                else if (t->name->mutabl && jl_is_cpointer_type(jl_field_type_concrete(t, i)) && *(intptr_t*)slot != -1) {
                     // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                     assert(!jl_field_isptr(t, i));
                     write_pointer(f);
@@ -1415,8 +1727,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
                 int mutabl = t->name->mutabl;
-                if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                    mutabl = 0;
                 jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], mutabl);
                 size_t fld_pos = offset + reloc_offset;
                 if (fld != NULL) {
@@ -1427,21 +1737,24 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
             }
 
-            // A few objects need additional handling beyond the generic serialization above
+            // Need do a tricky fieldtype walk an record all memoryref we find inlined in this value
+            record_memoryrefs_inside(s, t, reloc_offset, data);
 
+            // A few objects need additional handling beyond the generic serialization above
             if (s->incremental && jl_typetagis(v, jl_typemap_entry_type)) {
                 assert(f == s->s);
                 jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset];
-                if (newentry->max_world == ~(size_t)0) {
-                    if (newentry->min_world > 1) {
-                        newentry->min_world = ~(size_t)0;
+                if (jl_atomic_load_relaxed(&newentry->max_world) == ~(size_t)0) {
+                    if (jl_atomic_load_relaxed(&newentry->min_world) > 1) {
+                        jl_atomic_store_release(&newentry->min_world, ~(size_t)0);
+                        jl_atomic_store_release(&newentry->max_world, WORLD_AGE_REVALIDATION_SENTINEL);
                         arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                     }
                 }
                 else {
                     // garbage newentry - delete it :(
-                    newentry->min_world = 1;
-                    newentry->max_world = 0;
+                    jl_atomic_store_release(&newentry->min_world, 1);
+                    jl_atomic_store_release(&newentry->max_world, 0);
                 }
             }
             else if (jl_is_method(v)) {
@@ -1450,12 +1763,19 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 jl_method_t *m = (jl_method_t*)v;
                 jl_method_t *newm = (jl_method_t*)&f->buf[reloc_offset];
                 if (s->incremental) {
-                    if (newm->deleted_world != ~(size_t)0)
-                        newm->deleted_world = 1;
-                    else
-                        arraylist_push(&s->fixup_objs, (void*)reloc_offset);
-                    newm->primary_world = ~(size_t)0;
-                } else {
+                    if (jl_atomic_load_relaxed(&newm->deleted_world) == ~(size_t)0) {
+                        if (jl_atomic_load_relaxed(&newm->primary_world) > 1) {
+                            jl_atomic_store_relaxed(&newm->primary_world, ~(size_t)0); // min-world
+                            jl_atomic_store_relaxed(&newm->deleted_world, 1); // max_world
+                            arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                        }
+                    }
+                    else {
+                        jl_atomic_store_relaxed(&newm->primary_world, 1);
+                        jl_atomic_store_relaxed(&newm->deleted_world, 0);
+                    }
+                }
+                else {
                     newm->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
                 }
                 if (m->ccallable)
@@ -1464,42 +1784,41 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             else if (jl_is_method_instance(v)) {
                 assert(f == s->s);
                 jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
-                jl_atomic_store_relaxed(&newmi->precompiled, 0);
+                jl_atomic_store_relaxed(&newmi->flags, 0);
             }
             else if (jl_is_code_instance(v)) {
                 assert(f == s->s);
+
                 // Handle the native-code pointers
-                assert(f == s->s);
-                jl_code_instance_t *m = (jl_code_instance_t*)v;
-                jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
+                jl_code_instance_t *ci = (jl_code_instance_t*)v;
+                jl_code_instance_t *newci = (jl_code_instance_t*)&f->buf[reloc_offset];
 
                 if (s->incremental) {
-                    arraylist_push(&s->fixup_objs, (void*)reloc_offset);
-                    if (m->min_world > 1)
-                        newm->min_world = ~(size_t)0;     // checks that we reprocess this upon deserialization
-                    if (m->max_world != ~(size_t)0)
-                        newm->max_world = 0;
-                    else {
-                        if (m->inferred && ptrhash_has(&s->callers_with_edges, m->def))
-                            newm->max_world = 1;  // sentinel value indicating this will need validation
-                        if (m->min_world > 0 && m->inferred) {
-                            // TODO: also check if this object is part of the codeinst cache
-                            // will check on deserialize if this cache entry is still valid
+                    if (jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) {
+                        if (jl_atomic_load_relaxed(&newci->min_world) > 1) {
+                            //assert(jl_atomic_load_relaxed(&ci->edges) != jl_emptysvec); // some code (such as !==) might add a method lookup restriction but not keep the edges
+                            jl_atomic_store_release(&newci->min_world, ~(size_t)0);
+                            jl_atomic_store_release(&newci->max_world, WORLD_AGE_REVALIDATION_SENTINEL);
+                            arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                         }
                     }
+                    else {
+                        // garbage object - delete it :(
+                        jl_atomic_store_release(&newci->min_world, 1);
+                        jl_atomic_store_release(&newci->max_world, 0);
+                    }
                 }
-
-                newm->invoke = NULL;
-                newm->specsigflags = 0;
-                newm->specptr.fptr = NULL;
+                jl_atomic_store_relaxed(&newci->invoke, NULL);
+                jl_atomic_store_relaxed(&newci->specsigflags, 0);
+                jl_atomic_store_relaxed(&newci->specptr.fptr, NULL);
                 int8_t fptr_id = JL_API_NULL;
                 int8_t builtin_id = 0;
-                if (m->invoke == jl_fptr_const_return) {
+                if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
                     fptr_id = JL_API_CONST;
                 }
                 else {
-                    if (jl_is_method(m->def->def.method)) {
-                        builtin_id = jl_fptr_id(m->specptr.fptr);
+                    if (jl_is_method(jl_get_ci_mi(ci)->def.method)) {
+                        builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&ci->specptr.fptr));
                         if (builtin_id) { // found in the table of builtins
                             assert(builtin_id >= 2);
                             fptr_id = JL_API_BUILTIN;
@@ -1507,7 +1826,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         else {
                             int32_t invokeptr_id = 0;
                             int32_t specfptr_id = 0;
-                            jl_get_function_id(native_functions, m, &invokeptr_id, &specfptr_id); // see if we generated code for it
+                            jl_get_function_id(native_functions, ci, &invokeptr_id, &specfptr_id); // see if we generated code for it
                             if (invokeptr_id) {
                                 if (invokeptr_id == -1) {
                                     fptr_id = JL_API_BOXED;
@@ -1515,6 +1834,12 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                                 else if (invokeptr_id == -2) {
                                     fptr_id = JL_API_WITH_PARAMETERS;
                                 }
+                                else if (invokeptr_id == -3) {
+                                    abort();
+                                }
+                                else if (invokeptr_id == -4) {
+                                    fptr_id = JL_API_OC_CALL;
+                                }
                                 else {
                                     assert(invokeptr_id > 0);
                                     ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*));
@@ -1539,7 +1864,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         }
                     }
                 }
-                newm->invoke = NULL; // relocation offset
+                jl_atomic_store_relaxed(&newci->invoke, NULL); // relocation offset
                 if (fptr_id != JL_API_NULL) {
                     assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
@@ -1559,14 +1884,14 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     size_t nf = dt->layout->nfields;
                     size_t np = dt->layout->npointers;
                     size_t fieldsize = 0;
-                    uint8_t is_foreign_type = dt->layout->fielddesc_type == 3;
+                    uint8_t is_foreign_type = dt->layout->flags.fielddesc_type == 3;
                     if (!is_foreign_type) {
-                        fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
+                        fieldsize = jl_fielddesc_size(dt->layout->flags.fielddesc_type);
                     }
                     char *flddesc = (char*)dt->layout;
                     size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize;
                     if (!is_foreign_type && dt->layout->first_ptr != -1)
-                        fldsize += np << dt->layout->fielddesc_type;
+                        fldsize += np << dt->layout->flags.fielddesc_type;
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
                     write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
                     newdt->layout = NULL; // relocation offset
@@ -1582,6 +1907,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                         ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
                     }
                 }
+                void *superidx = ptrhash_get(&serialization_order, dt->super);
+                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super))
+                    arraylist_push(&s->uniquing_super, dt->super);
             }
             else if (jl_is_typename(v)) {
                 assert(f == s->s);
@@ -1616,16 +1944,16 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                 }
             }
-            else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
+            else if (jl_is_genericmemoryref(v)) {
                 assert(f == s->s);
-                // will need to rehash this, later (after types are fully constructed)
-                arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                record_memoryref(s, reloc_offset, *(jl_genericmemoryref_t*)v);
             }
             else {
                 write_padding(f, jl_datatype_size(t) - tot);
             }
         }
     }
+    assert(s->uniquing_super.len == 0);
 }
 
 // In deserialization, create Symbols and set up the
@@ -1746,13 +2074,17 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         }
         switch ((jl_callingconv_t)offset) {
         case JL_API_BOXED:
-            if (s->image->fptrs.base)
+            if (s->image->fptrs.nptrs)
                 return (uintptr_t)jl_fptr_args;
-            JL_FALLTHROUGH;
+            return (uintptr_t)NULL;
         case JL_API_WITH_PARAMETERS:
-            if (s->image->fptrs.base)
+            if (s->image->fptrs.nptrs)
                 return (uintptr_t)jl_fptr_sparam;
             return (uintptr_t)NULL;
+        case JL_API_OC_CALL:
+            if (s->image->fptrs.nptrs)
+                return (uintptr_t)jl_f_opaque_closure_call;
+            return (uintptr_t)NULL;
         case JL_API_CONST:
             return (uintptr_t)jl_fptr_const_return;
         case JL_API_INTERPRETED:
@@ -1772,20 +2104,20 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         size_t depsidx = 0;
 #endif
         assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
-        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     case ExternalLinkage: {
         assert(link_ids);
         assert(link_index);
         assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
-        uint32_t depsidx = ((uint32_t*)jl_array_data(link_ids))[*link_index];
+        uint32_t depsidx = jl_array_data(link_ids, uint32_t)[*link_index];
         *link_index += 1;
         assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
-        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     }
     abort();
@@ -1872,6 +2204,37 @@ static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint
     assert(!link_ids || link_index == jl_array_len(link_ids));
 }
 
+static void jl_read_memreflist(jl_serializer_state *s)
+{
+    uintptr_t base = (uintptr_t)s->s->buf;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
+    while (1) {
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            assert(s->relocs->bpos <= s->relocs->size);
+            assert((char *)current <= (char *)(s->relocs->buf + s->relocs->size));
+            int8_t c = *current++;
+            s->relocs->bpos += 1;
+
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
+            break;
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        jl_genericmemoryref_t *pv = (jl_genericmemoryref_t*)(base + pos);
+        size_t offset = (size_t)pv->ptr_or_offset;
+        pv->ptr_or_offset = (void*)((char*)pv->mem->ptr + offset);
+    }
+}
+
+
 static void jl_read_arraylist(ios_t *s, arraylist_t *list)
 {
     size_t list_len = read_uint(s);
@@ -1974,11 +2337,10 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
     jl_image_fptrs_t fvars = image->fptrs;
     // make these NULL now so we skip trying to restore GlobalVariable pointers later
     image->gvars_base = NULL;
-    image->fptrs.base = NULL;
-    if (fvars.base == NULL)
+    if (fvars.nptrs == 0)
         return;
 
-    memcpy(image->small_typeof, &small_typeof, sizeof(small_typeof));
+    memcpy(image->jl_small_typeof, &jl_small_typeof, sizeof(jl_small_typeof));
 
     int img_fvars_max = s->fptr_record->size / sizeof(void*);
     size_t i;
@@ -1998,26 +2360,24 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
                 offset = ~offset;
             }
             jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset);
-            uintptr_t base = (uintptr_t)fvars.base;
-            assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return);
-            assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL);
-            linfos[i] = codeinst->def;     // now it's a MethodInstance
-            int32_t offset = fvars.offsets[i];
+            assert(jl_is_method(jl_get_ci_mi(codeinst)->def.method) && jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return);
+            assert(specfunc ? jl_atomic_load_relaxed(&codeinst->invoke) != NULL : jl_atomic_load_relaxed(&codeinst->invoke) == NULL);
+            linfos[i] = jl_get_ci_mi(codeinst);     // now it's a MethodInstance
+            void *fptr = fvars.ptrs[i];
             for (; clone_idx < fvars.nclones; clone_idx++) {
                 uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask;
                 if (idx < i)
                     continue;
                 if (idx == i)
-                    offset = fvars.clone_offsets[clone_idx];
+                    fptr = fvars.clone_ptrs[clone_idx];
                 break;
             }
-            void *fptr = (void*)(base + offset);
             if (specfunc) {
-                codeinst->specptr.fptr = fptr;
-                codeinst->specsigflags = 0b111; // TODO: set only if confirmed to be true
+                jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                jl_atomic_store_relaxed(&codeinst->specsigflags, 0b111); // TODO: set only if confirmed to be true
             }
             else {
-                codeinst->invoke = (jl_callptr_t)fptr;
+                jl_atomic_store_relaxed(&codeinst->invoke,(jl_callptr_t)fptr);
             }
         }
     }
@@ -2085,11 +2445,12 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_
         uintptr_t v = *gv;
         if (i < external_fns_begin) {
             if (!jl_is_binding(v))
-                v = (uintptr_t)jl_as_global_root((jl_value_t*)v);
-        } else {
+                v = (uintptr_t)jl_as_global_root((jl_value_t*)v, 1);
+        }
+        else {
             jl_code_instance_t *codeinst = (jl_code_instance_t*) v;
-            assert(codeinst && (codeinst->specsigflags & 0b01) && codeinst->specptr.fptr);
-            v = (uintptr_t)codeinst->specptr.fptr;
+            assert(codeinst && (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b01) && jl_atomic_load_relaxed(&codeinst->specptr.fptr));
+            v = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         }
         *gv = v;
     }
@@ -2123,6 +2484,7 @@ static void jl_reinit_ccallable(arraylist_t *ccallable_list, char *base, void *s
 static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
 {
     size_t l = jl_svec_len(cache), i;
+    size_t sz = 0;
     if (l == 0)
         return cache;
     for (i = 0; i < l; i++) {
@@ -2131,14 +2493,19 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
             continue;
         if (ptrhash_get(&serialization_order, ti) == HT_NOTFOUND)
             jl_svecset(cache, i, jl_nothing);
+        else
+            sz += 1;
     }
+    if (sz < HT_N_INLINE)
+        sz = HT_N_INLINE;
+
     void *idx = ptrhash_get(&serialization_order, cache);
     assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
-    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache);
-    cache = cache_rehash_set(cache, l);
+    assert(serialization_queue.items[from_seroder_entry(idx)] == cache);
+    cache = cache_rehash_set(cache, sz);
     // redirect all references to the old cache to relocate to the new cache object
     ptrhash_put(&serialization_order, cache, idx);
-    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache;
+    serialization_queue.items[from_seroder_entry(idx)] = cache;
     return cache;
 }
 
@@ -2156,35 +2523,80 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
         jl_svecset(cache, ins++, jl_nothing);
 }
 
-static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
+uint_t bindingkey_hash(size_t idx, jl_value_t *data);
+
+static void jl_prune_module_bindings(jl_module_t * m) JL_GC_DISABLED
+{
+    jl_svec_t * bindings = jl_atomic_load_relaxed(&m->bindings);
+    size_t l = jl_svec_len(bindings), i;
+    arraylist_t bindings_list;
+    arraylist_new(&bindings_list, 0);
+    if (l == 0)
+        return;
+    for (i = 0; i < l; i++) {
+        jl_value_t *ti = jl_svecref(bindings, i);
+        if (ti == jl_nothing)
+            continue;
+        jl_binding_t *ref = ((jl_binding_t*)ti);
+        if (!((ptrhash_get(&serialization_order, ref) == HT_NOTFOUND) &&
+            (ptrhash_get(&serialization_order, ref->globalref) == HT_NOTFOUND))) {
+            jl_svecset(bindings, i, jl_nothing);
+            arraylist_push(&bindings_list, ref);
+        }
+    }
+    jl_genericmemory_t* bindingkeyset = jl_atomic_load_relaxed(&m->bindingkeyset);
+    _Atomic(jl_genericmemory_t*)bindingkeyset2;
+    jl_atomic_store_relaxed(&bindingkeyset2,(jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_svec_t *bindings2 = jl_alloc_svec_uninit(bindings_list.len);
+    for (i = 0; i < bindings_list.len; i++) {
+        jl_binding_t *ref = (jl_binding_t*)bindings_list.items[i];
+        jl_svecset(bindings2, i, ref);
+        jl_smallintset_insert(&bindingkeyset2, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings2);
+    }
+    void *idx = ptrhash_get(&serialization_order, bindings);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindings);
+    ptrhash_put(&serialization_order, bindings2, idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = bindings2;
+
+    idx = ptrhash_get(&serialization_order, bindingkeyset);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindingkeyset);
+    ptrhash_put(&serialization_order, jl_atomic_load_relaxed(&bindingkeyset2), idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = jl_atomic_load_relaxed(&bindingkeyset2);
+    jl_atomic_store_relaxed(&m->bindings, bindings2);
+    jl_atomic_store_relaxed(&m->bindingkeyset, jl_atomic_load_relaxed(&bindingkeyset2));
+    jl_gc_wb(m, bindings2);
+    jl_gc_wb(m, jl_atomic_load_relaxed(&bindingkeyset2));
+}
+
+static void strip_slotnames(jl_array_t *slotnames)
+{
+    // replace slot names with `?`, except unused_sym since the compiler looks at it
+    jl_sym_t *questionsym = jl_symbol("?");
+    int i, l = jl_array_len(slotnames);
+    for (i = 0; i < l; i++) {
+        jl_value_t *s = jl_array_ptr_ref(slotnames, i);
+        if (s != (jl_value_t*)jl_unused_sym)
+            jl_array_ptr_set(slotnames, i, questionsym);
+    }
+}
+
+static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, jl_code_instance_t *codeinst)
 {
     jl_code_info_t *ci = NULL;
     JL_GC_PUSH1(&ci);
     int compressed = 0;
     if (!jl_is_code_info(ci_)) {
         compressed = 1;
-        ci = jl_uncompress_ir(m, NULL, (jl_value_t*)ci_);
+        ci = jl_uncompress_ir(m, codeinst, (jl_value_t*)ci_);
     }
     else {
         ci = (jl_code_info_t*)ci_;
     }
-    // leave codelocs length the same so the compiler can assume that; just zero it
-    memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
-    // empty linetable
-    if (jl_is_array(ci->linetable))
-        jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
-    // replace slot names with `?`, except unused_sym since the compiler looks at it
-    jl_sym_t *questionsym = jl_symbol("?");
-    int i, l = jl_array_len(ci->slotnames);
-    for (i = 0; i < l; i++) {
-        jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
-        if (s != (jl_value_t*)jl_unused_sym)
-            jl_array_ptr_set(ci->slotnames, i, questionsym);
-    }
-    if (orig) {
-        m->slot_syms = jl_compress_argnames(ci->slotnames);
-        jl_gc_wb(m, m->slot_syms);
-    }
+    strip_slotnames(ci->slotnames);
+    ci->debuginfo = jl_nulldebuginfo;
+    jl_gc_wb(ci, ci->debuginfo);
     jl_value_t *ret = (jl_value_t*)ci;
     if (compressed)
         ret = (jl_value_t*)jl_compress_ir(m, ci);
@@ -2201,20 +2613,21 @@ static void strip_specializations_(jl_method_instance_t *mi)
         if (inferred && inferred != jl_nothing) {
             if (jl_options.strip_ir) {
                 record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing);
+                record_field_change((jl_value_t**)&codeinst->edges, (jl_value_t*)jl_emptysvec);
             }
             else if (jl_options.strip_metadata) {
-                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, 0);
+                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, codeinst);
                 if (jl_atomic_cmpswap_relaxed(&codeinst->inferred, &inferred, stripped)) {
                     jl_gc_wb(codeinst, stripped);
                 }
             }
         }
+        if (jl_options.strip_metadata)
+            record_field_change((jl_value_t**)&codeinst->debuginfo, (jl_value_t*)jl_nulldebuginfo);
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     if (jl_options.strip_ir) {
-        record_field_change((jl_value_t**)&mi->uninferred, NULL);
         record_field_change((jl_value_t**)&mi->backedges, NULL);
-        record_field_change((jl_value_t**)&mi->callbacks, NULL);
     }
 }
 
@@ -2224,29 +2637,48 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            if (m->unspecialized) {
-                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache);
-                if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
-                    // we have a generic compiled version, so can remove the IR
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+            int should_strip_ir = jl_options.trim;
+            if (!should_strip_ir) {
+                if (jl_atomic_load_relaxed(&m->unspecialized)) {
+                    jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
+                    if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
+                        // we have a generic compiled version, so can remove the IR
+                        should_strip_ir = 1;
+                    }
                 }
             }
-            if (!stripped_ir) {
+            if (!should_strip_ir) {
                 int mod_setting = jl_get_module_compile(m->module);
-                // if the method is declared not to be compiled, keep IR for interpreter
                 if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) {
-                    record_field_change(&m->source, jl_nothing);
-                    stripped_ir = 1;
+                    // if the method is declared not to be compiled, keep IR for interpreter
+                    should_strip_ir = 1;
                 }
             }
+            if (should_strip_ir) {
+                record_field_change(&m->source, jl_nothing);
+                record_field_change((jl_value_t**)&m->roots, NULL);
+                stripped_ir = 1;
+            }
         }
-        if (jl_options.strip_metadata && !stripped_ir) {
-            m->source = strip_codeinfo_meta(m, m->source, 1);
-            jl_gc_wb(m, m->source);
+        if (jl_options.strip_metadata) {
+            if (!stripped_ir) {
+                m->source = strip_codeinfo_meta(m, m->source, NULL);
+                jl_gc_wb(m, m->source);
+            }
+            jl_array_t *slotnames = jl_uncompress_argnames(m->slot_syms);
+            JL_GC_PUSH1(&slotnames);
+            strip_slotnames(slotnames);
+            m->slot_syms = jl_compress_argnames(slotnames);
+            jl_gc_wb(m, m->slot_syms);
+            JL_GC_POP();
         }
     }
-    jl_value_t *specializations = m->specializations;
+    if (jl_options.strip_metadata) {
+        record_field_change((jl_value_t**)&m->file, (jl_value_t*)jl_empty_sym);
+        m->line = 0;
+        record_field_change((jl_value_t**)&m->debuginfo, (jl_value_t*)jl_nulldebuginfo);
+    }
+    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
     if (!jl_is_svec(specializations)) {
         strip_specializations_((jl_method_instance_t*)specializations);
     }
@@ -2258,8 +2690,8 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
                 strip_specializations_((jl_method_instance_t*)mi);
         }
     }
-    if (m->unspecialized)
-        strip_specializations_(m->unspecialized);
+    if (jl_atomic_load_relaxed(&m->unspecialized))
+        strip_specializations_(jl_atomic_load_relaxed(&m->unspecialized));
     if (jl_options.strip_ir && m->root_blocks)
         record_field_change((jl_value_t**)&m->root_blocks, NULL);
     return 1;
@@ -2269,7 +2701,7 @@ static int strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
 {
     if (jl_options.strip_ir && mt->backedges)
         record_field_change((jl_value_t**)&mt->backedges, NULL);
-    return jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), strip_all_codeinfos__, NULL);
 }
 
 static void jl_strip_all_codeinfos(void)
@@ -2279,12 +2711,79 @@ static void jl_strip_all_codeinfos(void)
 
 // --- entry points ---
 
-jl_array_t *jl_global_roots_table;
+jl_genericmemory_t *jl_global_roots_list;
+jl_genericmemory_t *jl_global_roots_keyset;
 jl_mutex_t global_roots_lock;
+extern jl_mutex_t world_counter_lock;
+
+jl_mutex_t precompile_field_replace_lock;
+jl_svec_t *precompile_field_replace JL_GLOBALLY_ROOTED;
+
+static inline jl_value_t *get_checked_fieldindex(const char *name, jl_datatype_t *st, jl_value_t *v, jl_value_t *arg, int mutabl)
+{
+    if (mutabl) {
+        if (st == jl_module_type)
+            jl_error("cannot assign variables in other modules");
+        if (!st->name->mutabl)
+            jl_errorf("%s: immutable struct of type %s cannot be changed", name, jl_symbol_name(st->name->name));
+    }
+    size_t idx;
+    if (jl_is_long(arg)) {
+        idx = jl_unbox_long(arg) - 1;
+        if (idx >= jl_datatype_nfields(st))
+            jl_bounds_error(v, arg);
+    }
+    else if (jl_is_symbol(arg)) {
+        idx = jl_field_index(st, (jl_sym_t*)arg, 1);
+        arg = jl_box_long(idx);
+    }
+    else {
+        jl_value_t *ts[2] = {(jl_value_t*)jl_long_type, (jl_value_t*)jl_symbol_type};
+        jl_value_t *t = jl_type_union(ts, 2);
+        jl_type_error(name, t, arg);
+    }
+    if (mutabl && jl_field_isconst(st, idx)) {
+        jl_errorf("%s: const field .%s of type %s cannot be changed", name,
+                jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+    }
+    return arg;
+}
+
+JL_DLLEXPORT void jl_set_precompile_field_replace(jl_value_t *val, jl_value_t *field, jl_value_t *newval)
+{
+    if (!jl_generating_output())
+        return;
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(val);
+    jl_value_t *idx = get_checked_fieldindex("setfield!", st, val, field, 1);
+    JL_GC_PUSH1(&idx);
+    size_t idxval = jl_unbox_long(idx);
+    jl_value_t *ft = jl_field_type_concrete(st, idxval);
+    if (!jl_isa(newval, ft))
+        jl_type_error("setfield!", ft, newval);
+    JL_LOCK(&precompile_field_replace_lock);
+    if (precompile_field_replace == NULL) {
+        precompile_field_replace = jl_alloc_svec(3);
+        jl_svecset(precompile_field_replace, 0, jl_alloc_vec_any(0));
+        jl_svecset(precompile_field_replace, 1, jl_alloc_vec_any(0));
+        jl_svecset(precompile_field_replace, 2, jl_alloc_vec_any(0));
+    }
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 0), val);
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 1), idx);
+    jl_array_ptr_1d_push((jl_array_t*)jl_svecref(precompile_field_replace, 2), newval);
+    JL_GC_POP();
+    JL_UNLOCK(&precompile_field_replace_lock);
+}
+
 
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
-    if (jl_is_concrete_type(val) || jl_is_bool(val) || jl_is_symbol(val) ||
+    if (jl_is_datatype(val)) {
+        jl_datatype_t *dt = (jl_datatype_t*)val;
+        if (jl_unwrap_unionall(dt->name->wrapper) == val)
+            return 1;
+        return (jl_is_tuple_type(val) ? dt->isconcretetype : !dt->hasfreetypevars); // aka is_cacheable from jltypes.c
+    }
+    if (jl_is_bool(val) || jl_is_symbol(val) ||
             val == (jl_value_t*)jl_any_type || val == (jl_value_t*)jl_bottom_type || val == (jl_value_t*)jl_core_module)
         return 1;
     if (val == ((jl_datatype_t*)jl_typeof(val))->instance)
@@ -2292,10 +2791,21 @@ JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOT
     return 0;
 }
 
-JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED)
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
+{
+    t = jl_unwrap_unionall(t);
+    if (jl_is_datatype(t))
+        return ((jl_datatype_t*)t)->name->wrapper;
+    return NULL;
+}
+
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
 {
     if (jl_is_globally_rooted(val))
         return val;
+    jl_value_t *tw = extract_wrapper(val);
+    if (tw && (val == tw || jl_types_egal(val, tw)))
+        return tw;
     if (jl_is_uint8(val))
         return jl_box_uint8(jl_unbox_uint8(val));
     if (jl_is_int32(val)) {
@@ -2308,40 +2818,43 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED)
         if ((uint64_t)(n+512) < 1024)
             return jl_box_int64(n);
     }
-    JL_GC_PUSH1(&val);
+    // check table before acquiring lock to reduce writer contention
+    jl_value_t *rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
+    if (rval)
+        return rval;
     JL_LOCK(&global_roots_lock);
-    jl_value_t *rval = jl_eqtable_getkey(jl_global_roots_table, val, NULL);
+    rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
     if (rval) {
         val = rval;
     }
+    else if (insert) {
+        ssize_t idx;
+        jl_global_roots_list = jl_idset_put_key(jl_global_roots_list, val, &idx);
+        jl_global_roots_keyset = jl_idset_put_idx(jl_global_roots_list, jl_global_roots_keyset, idx);
+    }
     else {
-        jl_global_roots_table = jl_eqtable_put(jl_global_roots_table, val, jl_nothing, NULL);
+        val = NULL;
     }
     JL_UNLOCK(&global_roots_lock);
-    JL_GC_POP();
     return val;
 }
 
 static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key,
-                           /* outputs */  jl_array_t **extext_methods, jl_array_t **new_specializations,
-                                          jl_array_t **method_roots_list, jl_array_t **ext_targets, jl_array_t **edges)
+                           /* outputs */  jl_array_t **extext_methods JL_REQUIRE_ROOTED_SLOT,
+                                          jl_array_t **new_ext_cis JL_REQUIRE_ROOTED_SLOT,
+                                          jl_array_t **method_roots_list JL_REQUIRE_ROOTED_SLOT,
+                                          jl_array_t **edges JL_REQUIRE_ROOTED_SLOT)
 {
     // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
-    // ext_targets: [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods
-    //              ordinary dispatch: invokesig=NULL, callee is MethodInstance
-    //              `invoke` dispatch: invokesig is signature, callee is MethodInstance
-    //              abstract call: callee is signature
-    // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
-    assert(edges_map == NULL);
+    // edges: [caller1, ext_targets, ...] for worklist-owned methods calling external methods
 
     // Save the inferred code from newly inferred, external methods
-    *new_specializations = queue_external_cis(newly_inferred);
+    *new_ext_cis = queue_external_cis(newly_inferred);
 
     // Collect method extensions and edges data
-    JL_GC_PUSH1(&edges_map);
-    if (edges)
-        edges_map = jl_alloc_vec_any(0);
     *extext_methods = jl_alloc_vec_any(0);
+    internal_methods = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&internal_methods);
     jl_collect_methtable_from_mod(jl_type_type_mt, *extext_methods);
     jl_collect_methtable_from_mod(jl_nonfunction_mt, *extext_methods);
     size_t i, len = jl_array_len(mod_array);
@@ -2354,18 +2867,14 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
 
     if (edges) {
         size_t world = jl_atomic_load_acquire(&jl_world_counter);
-        jl_collect_missing_backedges(jl_type_type_mt);
-        jl_collect_missing_backedges(jl_nonfunction_mt);
-        // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges.
-        // Process this to extract `edges` and `ext_targets`.
-        *ext_targets = jl_alloc_vec_any(0);
-        *edges = jl_alloc_vec_any(0);
+        // Extract `new_ext_cis` and `edges` now (from info prepared by jl_collect_methcache_from_mod)
         *method_roots_list = jl_alloc_vec_any(0);
-        // Collect the new method roots
-        jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
-        jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
+        // Collect the new method roots for external specializations
+        jl_collect_new_roots(*method_roots_list, *new_ext_cis, worklist_key);
+        *edges = jl_alloc_vec_any(0);
+        jl_collect_internal_cis(*edges, world);
     }
-    assert(edges_map == NULL); // jl_collect_edges clears this when done
+    internal_methods = NULL;
 
     JL_GC_POP();
 }
@@ -2373,15 +2882,95 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
 // In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
 static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                                            jl_array_t *worklist, jl_array_t *extext_methods,
-                                           jl_array_t *new_specializations, jl_array_t *method_roots_list,
-                                           jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
+                                           jl_array_t *new_ext_cis, jl_array_t *method_roots_list,
+                                           jl_array_t *edges) JL_GC_DISABLED
 {
     htable_new(&field_replace, 0);
+    htable_new(&bits_replace, 0);
     // strip metadata and IR when requested
     if (jl_options.strip_metadata || jl_options.strip_ir)
         jl_strip_all_codeinfos();
+    // collect needed methods and replace method tables that are in the tags array
+    htable_new(&new_methtables, 0);
+    arraylist_t MIs;
+    arraylist_new(&MIs, 0);
+    arraylist_t gvars;
+    arraylist_new(&gvars, 0);
+    arraylist_t external_fns;
+    arraylist_new(&external_fns, 0);
+    // prepare hash table with any fields the user wanted us to rewrite during serialization
+    if (precompile_field_replace) {
+        jl_array_t *vals = (jl_array_t*)jl_svecref(precompile_field_replace, 0);
+        jl_array_t *fields = (jl_array_t*)jl_svecref(precompile_field_replace, 1);
+        jl_array_t *newvals = (jl_array_t*)jl_svecref(precompile_field_replace, 2);
+        size_t i, l = jl_array_nrows(vals);
+        assert(jl_array_nrows(fields) == l && jl_array_nrows(newvals) == l);
+        for (i = 0; i < l; i++) {
+            jl_value_t *val = jl_array_ptr_ref(vals, i);
+            size_t field = jl_unbox_long(jl_array_ptr_ref(fields, i));
+            jl_value_t *newval = jl_array_ptr_ref(newvals, i);
+            jl_datatype_t *st = (jl_datatype_t*)jl_typeof(val);
+            size_t offs = jl_field_offset(st, field);
+            char *fldaddr = (char*)val + offs;
+            if (jl_field_isptr(st, field)) {
+                record_field_change((jl_value_t**)fldaddr, newval);
+            }
+            else {
+                // replace the bits
+                ptrhash_put(&bits_replace, (void*)fldaddr, newval);
+                // and any pointers inside
+                jl_datatype_t *rty = (jl_datatype_t*)jl_typeof(newval);
+                const jl_datatype_layout_t *layout = rty->layout;
+                size_t j, np = layout->npointers;
+                for (j = 0; j < np; j++) {
+                    uint32_t ptr = jl_ptr_offset(rty, j);
+                    record_field_change((jl_value_t**)fldaddr + ptr, *(((jl_value_t**)newval) + ptr));
+                }
+            }
+        }
+    }
 
     int en = jl_gc_enable(0);
+    if (native_functions) {
+        size_t num_gvars, num_external_fns;
+        jl_get_llvm_gvs(native_functions, &num_gvars, NULL);
+        arraylist_grow(&gvars, num_gvars);
+        jl_get_llvm_gvs(native_functions, &num_gvars, gvars.items);
+        jl_get_llvm_external_fns(native_functions, &num_external_fns, NULL);
+        arraylist_grow(&external_fns, num_external_fns);
+        jl_get_llvm_external_fns(native_functions, &num_external_fns,
+                                 (jl_code_instance_t *)external_fns.items);
+        if (jl_options.trim) {
+            size_t num_mis;
+            jl_get_llvm_mis(native_functions, &num_mis, NULL);
+            arraylist_grow(&MIs, num_mis);
+            jl_get_llvm_mis(native_functions, &num_mis, (jl_method_instance_t *)MIs.items);
+        }
+    }
+    if (jl_options.trim) {
+        jl_rebuild_methtables(&MIs, &new_methtables);
+        jl_methtable_t *mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_type_type_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_type_type_mt = mt;
+        else
+            jl_type_type_mt = jl_new_method_table(jl_type_type_mt->name, jl_type_type_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_kwcall_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_kwcall_mt = mt;
+        else
+            jl_kwcall_mt = jl_new_method_table(jl_kwcall_mt->name, jl_kwcall_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_nonfunction_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_nonfunction_mt = mt;
+        else
+            jl_nonfunction_mt = jl_new_method_table(jl_nonfunction_mt->name, jl_nonfunction_mt->module);
+    }
+
     nsym_tag = 0;
     htable_new(&symbol_table, 0);
     htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
@@ -2390,7 +2979,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
         ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
     }
     htable_new(&serialization_order, 25000);
-    htable_new(&unique_ready, 0);
     htable_new(&nullptrs, 0);
     arraylist_new(&object_worklist, 0);
     arraylist_new(&serialization_queue, 0);
@@ -2410,9 +2998,12 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
     s.ptls = jl_current_task->ptls;
+    arraylist_new(&s.memowner_list, 0);
+    arraylist_new(&s.memref_list, 0);
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
     arraylist_new(&s.uniquing_types, 0);
+    arraylist_new(&s.uniquing_super, 0);
     arraylist_new(&s.uniquing_objs, 0);
     arraylist_new(&s.fixup_types, 0);
     arraylist_new(&s.fixup_objs, 0);
@@ -2422,17 +3013,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, 0);
     s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, 0);
     s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, 0);
-    htable_new(&s.callers_with_edges, 0);
     jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
 
-    arraylist_t gvars;
-    arraylist_t external_fns;
-    arraylist_new(&gvars, 0);
-    arraylist_new(&external_fns, 0);
-    if (native_functions) {
-        jl_get_llvm_gvs(native_functions, &gvars);
-        jl_get_llvm_external_fns(native_functions, &external_fns);
-    }
 
     if (worklist == NULL) {
         // empty!(Core.ARGS)
@@ -2443,8 +3025,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             }
         }
     }
-    jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
-    jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL;
     jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
     if (jl_bigint_type) {
         gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
@@ -2456,6 +3036,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
         }
     }
+    jl_genericmemory_t *global_roots_list = NULL;
+    jl_genericmemory_t *global_roots_keyset = NULL;
 
     { // step 1: record values (recursively) that need to go in the image
         size_t i;
@@ -2464,50 +3046,79 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                 jl_value_t *tag = *tags[i];
                 jl_queue_for_serialization(&s, tag);
             }
-            jl_queue_for_serialization(&s, jl_global_roots_table);
             jl_queue_for_serialization(&s, s.ptls->root_task->tls);
         }
         else {
-            // To ensure we don't have to manually update the list, go through all tags and queue any that are not otherwise
-            // judged to be externally-linked
-            htable_new(&external_objects, NUM_TAGS);
-            for (size_t i = 0; tags[i] != NULL; i++) {
-                jl_value_t *tag = *tags[i];
-                ptrhash_put(&external_objects, tag, tag);
-            }
             // Queue the worklist itself as the first item we serialize
             jl_queue_for_serialization(&s, worklist);
             jl_queue_for_serialization(&s, jl_module_init_order);
-            // Classify the CodeInstances with respect to their need for validation
-            classify_callers(&s.callers_with_edges, edges);
         }
         // step 1.1: as needed, serialize the data needed for insertion into the running system
         if (extext_methods) {
-            assert(ext_targets);
             assert(edges);
             // Queue method extensions
             jl_queue_for_serialization(&s, extext_methods);
             // Queue the new specializations
-            jl_queue_for_serialization(&s, new_specializations);
+            jl_queue_for_serialization(&s, new_ext_cis);
             // Queue the new roots
             jl_queue_for_serialization(&s, method_roots_list);
             // Queue the edges
-            jl_queue_for_serialization(&s, ext_targets);
             jl_queue_for_serialization(&s, edges);
         }
         jl_serialize_reachable(&s);
         // step 1.2: ensure all gvars are part of the sysimage too
         record_gvars(&s, &gvars);
         record_external_fns(&s, &external_fns);
+        if (jl_options.trim)
+            record_gvars(&s, &MIs);
         jl_serialize_reachable(&s);
-        // step 1.3: prune (garbage collect) some special weak references from
-        // built-in type caches
+        // step 1.3: prune (garbage collect) special weak references from the jl_global_roots_list
+        if (worklist == NULL) {
+            global_roots_list = jl_alloc_memory_any(0);
+            global_roots_keyset = jl_alloc_memory_any(0);
+            for (size_t i = 0; i < jl_global_roots_list->length; i++) {
+                jl_value_t *val = jl_genericmemory_ptr_ref(jl_global_roots_list, i);
+                if (val && ptrhash_get(&serialization_order, val) != HT_NOTFOUND) {
+                    ssize_t idx;
+                    global_roots_list = jl_idset_put_key(global_roots_list, val, &idx);
+                    global_roots_keyset = jl_idset_put_idx(global_roots_list, global_roots_keyset, idx);
+                }
+            }
+            jl_queue_for_serialization(&s, global_roots_list);
+            jl_queue_for_serialization(&s, global_roots_keyset);
+            jl_serialize_reachable(&s);
+        }
+        // step 1.4: prune (garbage collect) some special weak references from
+        // built-in type caches too
         for (i = 0; i < serialization_queue.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i];
-            if (jl_is_typename(tn)) {
-                tn->cache = jl_prune_type_cache_hash(tn->cache);
-                jl_gc_wb(tn, tn->cache);
-                jl_prune_type_cache_linear(tn->linearcache);
+            jl_value_t *v = (jl_value_t*)serialization_queue.items[i];
+            if (jl_options.trim) {
+                if (jl_is_method(v)){
+                    jl_method_t *m = (jl_method_t*)v;
+                    jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations);
+                    if (!jl_is_svec(specializations_))
+                        continue;
+
+                    jl_svec_t *specializations = (jl_svec_t *)specializations_;
+                    size_t l = jl_svec_len(specializations), i;
+                    for (i = 0; i < l; i++) {
+                        jl_value_t *mi = jl_svecref(specializations, i);
+                        if (mi == jl_nothing)
+                            continue;
+                        if (ptrhash_get(&serialization_order, mi) == HT_NOTFOUND)
+                            jl_svecset(specializations, i, jl_nothing);
+                    }
+                } else if (jl_is_module(v)) {
+                    jl_prune_module_bindings((jl_module_t*)v);
+                }
+            }
+            // Not else
+            if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                jl_atomic_store_relaxed(&tn->cache,
+                    jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache)));
+                jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
+                jl_prune_type_cache_linear(jl_atomic_load_relaxed(&tn->linearcache));
             }
         }
     }
@@ -2527,8 +3138,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
         jl_printf(
             JL_STDERR,
-            "ERROR: system image too large: sysimg.size is %jd but the limit is %" PRIxPTR "\n",
-            (intmax_t)sysimg.size,
+            "ERROR: system image too large: sysimg.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)sysimg.size,
             ((uintptr_t)1 << RELOC_TAG_OFFSET)
         );
         jl_exit(1);
@@ -2536,13 +3147,12 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     if (const_data.size / sizeof(void*) > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
         jl_printf(
             JL_STDERR,
-            "ERROR: system image too large: const_data.size is %jd but the limit is %" PRIxPTR "\n",
-            (intmax_t)const_data.size,
+            "ERROR: system image too large: const_data.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)const_data.size,
             ((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)
         );
         jl_exit(1);
     }
-    htable_free(&s.callers_with_edges);
 
     // step 3: combine all of the sections into one file
     assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0);
@@ -2573,6 +3183,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.relocs_list);
     jl_write_offsetlist(s.relocs, sysimg_size, &s.gctags_list);
     jl_write_offsetlist(s.relocs, sysimg_size, &s.relocs_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memowner_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memref_list);
     if (s.incremental) {
         jl_write_arraylist(s.relocs, &s.uniquing_types);
         jl_write_arraylist(s.relocs, &s.uniquing_objs);
@@ -2606,10 +3218,13 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                 jl_value_t *tag = *tags[i];
                 jl_write_value(&s, tag);
             }
-            jl_write_value(&s, jl_global_roots_table);
+            jl_write_value(&s, global_roots_list);
+            jl_write_value(&s, global_roots_keyset);
             jl_write_value(&s, s.ptls->root_task->tls);
             write_uint32(f, jl_get_gs_ctr());
-            write_uint(f, jl_atomic_load_acquire(&jl_world_counter));
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            // assert(world == precompilation_world); // This triggers on a normal build of julia
+            write_uint(f, world);
             write_uint(f, jl_typeinf_world);
         }
         else {
@@ -2624,19 +3239,18 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             }
             jl_write_value(&s, jl_module_init_order);
             jl_write_value(&s, extext_methods);
-            jl_write_value(&s, new_specializations);
+            jl_write_value(&s, new_ext_cis);
             jl_write_value(&s, method_roots_list);
-            jl_write_value(&s, ext_targets);
             jl_write_value(&s, edges);
         }
         write_uint32(f, jl_array_len(s.link_ids_gctags));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_relocs));
-        ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_gvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
         write_uint32(f, external_fns_begin);
         jl_write_arraylist(s.s, &s.ccallable_list);
     }
@@ -2645,19 +3259,25 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     arraylist_free(&object_worklist);
     arraylist_free(&serialization_queue);
     arraylist_free(&layout_table);
+    arraylist_free(&s.uniquing_types);
+    arraylist_free(&s.uniquing_super);
+    arraylist_free(&s.uniquing_objs);
+    arraylist_free(&s.fixup_types);
+    arraylist_free(&s.fixup_objs);
     arraylist_free(&s.ccallable_list);
+    arraylist_free(&s.memowner_list);
+    arraylist_free(&s.memref_list);
     arraylist_free(&s.relocs_list);
     arraylist_free(&s.gctags_list);
     arraylist_free(&gvars);
     arraylist_free(&external_fns);
     htable_free(&field_replace);
-    if (worklist)
-        htable_free(&external_objects);
+    htable_free(&bits_replace);
     htable_free(&serialization_order);
-    htable_free(&unique_ready);
     htable_free(&nullptrs);
     htable_free(&symbol_table);
     htable_free(&fptr_to_id);
+    htable_free(&new_methtables);
     nsym_tag = 0;
 
     jl_gc_enable(en);
@@ -2672,8 +3292,9 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
     write_uint8(f, jl_cache_flags());
     // write description of contents (name, uuid, buildid)
     write_worklist_for_header(f, worklist);
-    // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist
-    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header.
+    // Determine unique (module, abspath, fsize, hash, mtime) dependencies for the files defining modules in the worklist
+    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header
+    // (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path).
     // Also write Preferences.
     // last word of the dependency list is the end of the data / start of the srctextpos
     *srctextpos = write_dependency_list(f, worklist, udeps);  // srctextpos: position of srctext entry in header index (update later)
@@ -2686,6 +3307,10 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
 JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split,
                                          ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos)
 {
+    if (jl_options.strip_ir || jl_options.trim) {
+        // make sure this is precompiled for jl_foreach_reachable_mtable
+        jl_get_loaded_modules();
+    }
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
@@ -2705,24 +3330,24 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         ff = f;
     }
 
-    jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL;
-    jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
+    jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_ext_cis = NULL;
+    jl_array_t *method_roots_list = NULL, *edges = NULL;
     int64_t checksumpos = 0;
     int64_t checksumpos_ff = 0;
     int64_t datastartpos = 0;
-    JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+    JL_GC_PUSH5(&mod_array, &extext_methods, &new_ext_cis, &method_roots_list, &edges);
 
     if (worklist) {
         mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
         // Generate _native_data`
         if (_native_data != NULL) {
             jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
-                                          &extext_methods, &new_specializations, NULL, NULL, NULL);
+                                          &extext_methods, &new_ext_cis, NULL, NULL);
             jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
-            *_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations);
+            *_native_data = jl_precompile_worklist(worklist, extext_methods, new_ext_cis);
             jl_precompile_toplevel_module = NULL;
             extext_methods = NULL;
-            new_specializations = NULL;
+            new_ext_cis = NULL;
         }
         jl_write_header_for_incremental(f, worklist, mod_array, udeps, srctextpos, &checksumpos);
         if (emit_split) {
@@ -2735,7 +3360,11 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
     else if (_native_data != NULL) {
-        *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+        precompilation_world = jl_atomic_load_acquire(&jl_world_counter);
+        if (jl_options.trim)
+            *_native_data = jl_precompile_trimmed(precompilation_world);
+        else
+            *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
     }
 
     // Make sure we don't run any Julia code concurrently after this point
@@ -2745,7 +3374,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     ct->reentrant_timing |= 0b1000;
     if (worklist) {
         jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
-                                      &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+                                      &extext_methods, &new_ext_cis, &method_roots_list, &edges);
         if (!emit_split) {
             write_int32(f, 0); // No clone_targets
             write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
@@ -2757,7 +3386,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     }
     if (_native_data != NULL)
         native_functions = *_native_data;
-    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
+    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, method_roots_list, edges);
     if (_native_data != NULL)
         native_functions = NULL;
     // make sure we don't run any Julia code concurrently before this point
@@ -2812,10 +3441,12 @@ JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname)
 // Allow passing in a module handle directly, rather than a path
 JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 {
-    void* *jl_RTLD_DEFAULT_handle_pointer;
-    int symbol_found = jl_dlsym(handle, "jl_RTLD_DEFAULT_handle_pointer", (void **)&jl_RTLD_DEFAULT_handle_pointer, 0);
-    if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != *jl_RTLD_DEFAULT_handle_pointer)
-        jl_error("System image file failed consistency check: maybe opened the wrong version?");
+    void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
+    if (handle != jl_RTLD_DEFAULT_handle) {
+        int symbol_found = jl_dlsym(handle, "get_jl_RTLD_DEFAULT_handle_addr", (void **)&get_jl_RTLD_DEFAULT_handle_addr, 0);
+        if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != (get_jl_RTLD_DEFAULT_handle_addr()))
+            jl_error("System image file failed consistency check: maybe opened the wrong version?");
+    }
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
     jl_sysimg_handle = handle;
@@ -2832,15 +3463,21 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 #endif
 
 extern void rebuild_image_blob_tree(void);
-extern void export_small_typeof(void);
+extern void export_jl_small_typeof(void);
+
+// When an image is loaded with ignore_native, all subsequent image loads must ignore
+// native code in the cache-file since we can't gurantuee that there are no call edges
+// into the native code of the image. See https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395.
+int IMAGE_NATIVE_CODE_TAINTED = 0;
 
 static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
                                 /* outputs */    jl_array_t **restored,         jl_array_t **init_order,
-                                                 jl_array_t **extext_methods,
-                                                 jl_array_t **new_specializations, jl_array_t **method_roots_list,
-                                                 jl_array_t **ext_targets, jl_array_t **edges,
+                                                 jl_array_t **extext_methods, jl_array_t **internal_methods,
+                                                 jl_array_t **new_ext_cis, jl_array_t **method_roots_list,
+                                                 jl_array_t **edges,
                                                  char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
 {
+    jl_task_t *ct = jl_current_task;
     int en = jl_gc_enable(0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
     jl_serializer_state s = {0};
@@ -2852,12 +3489,20 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_current_task->ptls;
+    s.ptls = ct->ptls;
     jl_value_t **const*const tags = get_tags();
     htable_t new_dt_objs;
     htable_new(&new_dt_objs, 0);
     arraylist_new(&deser_sym, 0);
 
+    // in --build mode only use sysimg data, not precompiled native code
+    int imaging_mode = jl_generating_output() && !jl_options.incremental;
+    if (imaging_mode || jl_options.use_sysimage_native_code != JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES || IMAGE_NATIVE_CODE_TAINTED) {
+        memset(&image->fptrs, 0, sizeof(image->fptrs));
+        image->gvars_base = NULL;
+        IMAGE_NATIVE_CODE_TAINTED = 1;
+    }
+
     // step 1: read section map
     assert(ios_pos(f) == 0 && f->bm == bm_mem);
     size_t sizeof_sysdata = read_uint(f);
@@ -2899,8 +3544,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     assert(!ios_eof(f));
     s.s = f;
-    uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0;
-    uintptr_t offset_ext_targets = 0, offset_edges = 0;
+    uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_ext_cis = 0, offset_method_roots_list = 0;
+    uintptr_t offset_edges = 0;
     if (!s.incremental) {
         size_t i;
         for (i = 0; tags[i] != NULL; i++) {
@@ -2908,11 +3553,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             *tag = jl_read_value(&s);
         }
 #define XX(name) \
-        small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type;
+        ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type;
         JL_SMALL_TYPEOF(XX)
 #undef XX
-        export_small_typeof();
-        jl_global_roots_table = (jl_array_t*)jl_read_value(&s);
+        export_jl_small_typeof();
+        jl_global_roots_list = (jl_genericmemory_t*)jl_read_value(&s);
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_read_value(&s);
         // set typeof extra-special values now that we have the type set by tags above
         jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
         s.ptls->root_task->tls = jl_read_value(&s);
@@ -2921,54 +3567,51 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         jl_init_box_caches();
 
         uint32_t gs_ctr = read_uint32(f);
-        jl_atomic_store_release(&jl_world_counter, read_uint(f));
+        jl_require_world = read_uint(f);
+        jl_atomic_store_release(&jl_world_counter, jl_require_world);
         jl_typeinf_world = read_uint(f);
         jl_set_gs_ctr(gs_ctr);
     }
     else {
-        jl_atomic_fetch_add(&jl_world_counter, 1);
         offset_restored = jl_read_offset(&s);
         offset_init_order = jl_read_offset(&s);
         offset_extext_methods = jl_read_offset(&s);
-        offset_new_specializations = jl_read_offset(&s);
+        offset_new_ext_cis = jl_read_offset(&s);
         offset_method_roots_list = jl_read_offset(&s);
-        offset_ext_targets = jl_read_offset(&s);
         offset_edges = jl_read_offset(&s);
     }
     s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
     size_t nlinks_gctags = read_uint32(f);
     if (nlinks_gctags > 0) {
         s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, nlinks_gctags);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), nlinks_gctags * sizeof(uint32_t));
     }
     size_t nlinks_relocs = read_uint32(f);
     if (nlinks_relocs > 0) {
         s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, nlinks_relocs);
-        ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), nlinks_relocs * sizeof(uint32_t));
     }
     size_t nlinks_gvars = read_uint32(f);
     if (nlinks_gvars > 0) {
         s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_gvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), nlinks_gvars * sizeof(uint32_t));
     }
     size_t nlinks_external_fnvars = read_uint32(f);
     if (nlinks_external_fnvars > 0) {
         s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_external_fnvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint32_t));
+        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), nlinks_external_fnvars * sizeof(uint32_t));
     }
     uint32_t external_fns_begin = read_uint32(f);
     jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
     if (s.incremental) {
-        assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges);
+        assert(restored && init_order && extext_methods && internal_methods && new_ext_cis && method_roots_list && edges);
         *restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored);
         *init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order);
         *extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods);
-        *new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations);
+        *new_ext_cis = (jl_array_t*)jl_delayed_reloc(&s, offset_new_ext_cis);
         *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
-        *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
         *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
-        if (!*new_specializations)
-            *new_specializations = jl_alloc_vec_any(0);
+        *internal_methods = jl_alloc_vec_any(0);
     }
     s.s = NULL;
 
@@ -2987,6 +3630,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     size_t sizeof_tags = ios_pos(&relocs);
     (void)sizeof_tags;
     jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
+    jl_read_memreflist(&s); // memowner_list relocs (must come before memref_list reads the pointers and after general relocs computes the pointers)
+    jl_read_memreflist(&s); // memref_list relocs
     // s.link_ids_gvars will be processed in `jl_update_all_gvars`
     // s.link_ids_external_fns will be processed in `jl_update_all_gvars`
     jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs
@@ -3015,35 +3660,48 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_new(&cleanup_list, 0);
     arraylist_t delay_list;
     arraylist_new(&delay_list, 0);
+    JL_LOCK(&typecache_lock); // Might GC--prevent other threads from changing any type caches while we inspect them all
     for (size_t i = 0; i < s.uniquing_types.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
         // check whether we are operating on the typetag
         // (needing to ignore GC bits) or a regular field
-        int tag = (item & 1) == 1;
-        // check whether this is a gvar index
-        int gvar = (item & 2) == 2;
+        // and check whether this is a gvar index
+        int tag = (item & 3);
         item &= ~(uintptr_t)3;
         uintptr_t *pfld;
         jl_value_t **obj, *newobj;
-        if (gvar) {
+        if (tag == 3) {
+            obj = (jl_value_t**)(image_base + item);
+            pfld = NULL;
+            for (size_t i = 0; i < delay_list.len; i += 2) {
+                if (obj == (jl_value_t **)delay_list.items[i + 0]) {
+                    pfld = (uintptr_t*)delay_list.items[i + 1];
+                    delay_list.items[i + 1] = arraylist_pop(&delay_list);
+                    delay_list.items[i + 0] = arraylist_pop(&delay_list);
+                    break;
+                }
+            }
+            assert(pfld);
+        }
+        else if (tag == 2) {
             if (image->gvars_base == NULL)
                 continue;
             item >>= 2;
             assert(item < s.gvar_record->size / sizeof(reloc_t));
             pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
             obj = *(jl_value_t***)pfld;
-            assert(tag == 0);
         }
         else {
             pfld = (uintptr_t*)(image_base + item);
-            if (tag)
+            if (tag == 1)
                 obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
             else
                 obj = *(jl_value_t***)pfld;
             if ((char*)obj > (char*)pfld) {
+                // this must be the super field
                 assert(tag == 0);
-                arraylist_push(&delay_list, pfld);
                 arraylist_push(&delay_list, obj);
+                arraylist_push(&delay_list, pfld);
                 ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
                 *pfld = (uintptr_t)NULL;
                 continue;
@@ -3072,8 +3730,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                     // leave most fields undefined for now, but we may need instance later,
                     // and we overwrite the name field (field 0) now so preserve it too
                     if (dt->instance) {
-                        assert(dt->instance == jl_nothing);
-                        newdt->instance = dt->instance = jl_gc_permobj(0, newdt);
+                        if (dt->instance == jl_nothing)
+                            dt->instance = jl_gc_permobj(0, newdt);
+                        newdt->instance = dt->instance;
                     }
                     static_assert(offsetof(jl_datatype_t, name) == 0, "");
                     newdt->name = dt->name;
@@ -3088,30 +3747,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         }
         else {
             assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg));
-            assert(jl_is_datatype_singleton((jl_datatype_t*)otyp) && "unreachable");
             newobj = ((jl_datatype_t*)otyp)->instance;
-            assert(newobj != jl_nothing);
+            assert(newobj && newobj != jl_nothing);
             arraylist_push(&cleanup_list, (void*)obj);
         }
-        if (tag)
+        if (tag == 1)
             *pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
         else
             *pfld = (uintptr_t)newobj;
         assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
         assert(jl_typetagis(obj, otyp));
     }
-    // A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
-    // They cannot be instances though, since the type must fully exist before the singleton field can be allocated
-    for (size_t i = 0; i < delay_list.len; ) {
-        uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
-        jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
-        assert(jl_is_datatype(obj));
-        jl_datatype_t *dt = (jl_datatype_t*)obj[0];
-        assert(jl_is_datatype(dt));
-        jl_value_t *newobj = (jl_value_t*)dt;
-        *pfld = (uintptr_t)newobj;
-        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
-    }
+    assert(delay_list.len == 0);
     arraylist_free(&delay_list);
     // now that all the fields of dt are assigned and unique, copy them into
     // their final newdt memory location: this ensures we do not accidentally
@@ -3141,6 +3788,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     }
     arraylist_grow(&cleanup_list, -cleanup_list.len);
     // finally cache all our new types now
+    jl_safepoint_suspend_all_threads(ct); // past this point, it is now not safe to observe the intermediate states on other threads via reflection, so temporarily pause those
     for (size_t i = 0; i < new_dt_objs.size; i += 2) {
         void *dt = table[i + 1];
         if (dt != HT_NOTFOUND) {
@@ -3154,16 +3802,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         assert(jl_is_datatype(obj));
         jl_cache_type_((jl_datatype_t*)obj);
     }
+    JL_UNLOCK(&typecache_lock); // Might GC
+    jl_safepoint_resume_all_threads(ct); // TODO: move this later to also protect MethodInstance allocations, but we would need to acquire all jl_specializations_get_linfo and jl_module_globalref locks, which is hard
     // Perform fixups: things like updating world ages, inserting methods & specializations, etc.
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     for (size_t i = 0; i < s.uniquing_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
         // check whether this is a gvar index
-        int gvar = (item & 2) == 2;
+        int tag = (item & 3);
+        assert(tag == 0 || tag == 2);
         item &= ~(uintptr_t)3;
         uintptr_t *pfld;
         jl_value_t **obj, *newobj;
-        if (gvar) {
+        if (tag == 2) {
             if (image->gvars_base == NULL)
                 continue;
             item >>= 2;
@@ -3190,6 +3840,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 obj[0] = newobj;
             }
         }
+        else if (otyp == (uintptr_t)jl_binding_type) {
+            jl_value_t *m = obj[0];
+            if (jl_is_binding(m)) {
+                newobj = m; // already done
+            }
+            else {
+                arraylist_push(&cleanup_list, (void*)obj);
+                jl_value_t *name = obj[1];
+                newobj = (jl_value_t*)jl_get_module_binding((jl_module_t*)m, (jl_sym_t*)name, 1);
+                obj[0] = newobj;
+            }
+        }
         else {
             abort(); // should be unreachable
         }
@@ -3205,69 +3867,62 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         jl_value_t *t = jl_typeof(item);
         if (t == (jl_value_t*)jl_method_instance_type)
             memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored
+        else if (t == (jl_value_t*)jl_binding_type)
+            memset(o, 0xba, sizeof(jl_value_t*) * 3); // stored as mod/name
         o->bits.in_image = 1;
     }
     arraylist_free(&cleanup_list);
     for (size_t i = 0; i < s.fixup_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
         jl_value_t *obj = (jl_value_t*)(image_base + item);
-        if (jl_typetagis(obj, jl_typemap_entry_type)) {
-            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
-            entry->min_world = world;
-        }
-        else if (jl_is_method(obj)) {
-            jl_method_t *m = (jl_method_t*)obj;
-            m->primary_world = world;
+        if (jl_typetagis(obj, jl_typemap_entry_type) || jl_is_method(obj) || jl_is_code_instance(obj)) {
+            jl_array_ptr_1d_push(*internal_methods, obj);
+            assert(s.incremental);
         }
         else if (jl_is_method_instance(obj)) {
             jl_method_instance_t *newobj = jl_specializations_get_or_insert((jl_method_instance_t*)obj);
             assert(newobj == (jl_method_instance_t*)obj); // strict insertion expected
             (void)newobj;
         }
-        else if (jl_is_code_instance(obj)) {
-            jl_code_instance_t *ci = (jl_code_instance_t*)obj;
-            assert(s.incremental);
-            ci->min_world = world;
-            if (ci->max_world != 0)
-                jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
-        }
         else if (jl_is_globalref(obj)) {
-            continue; // wait until all the module binding tables have been initialized
+            jl_globalref_t *r = (jl_globalref_t*)obj;
+            if (r->binding == NULL) {
+                jl_globalref_t *gr = (jl_globalref_t*)jl_module_globalref(r->mod, r->name);
+                r->binding = gr->binding;
+                jl_gc_wb(r, gr->binding);
+            }
         }
         else if (jl_is_module(obj)) {
-            // rebuild the binding table for module v
+            // rebuild the usings table for module v
             // TODO: maybe want to hold the lock on `v`, but that only strongly matters for async / thread safety
             // and we are already bad at that
             jl_module_t *mod = (jl_module_t*)obj;
             mod->build_id.hi = checksum;
-            mod->primary_world = world;
             if (mod->usings.items != &mod->usings._space[0]) {
                 // arraylist_t assumes we called malloc to get this memory, so make that true now
                 void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*));
                 memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
                 mod->usings.items = newitems;
             }
+            // Move the binding bits back to their correct place
+#ifdef _P64
+            jl_svec_t *table = jl_atomic_load_relaxed(&mod->bindings);
+            for (size_t i = 0; i < jl_svec_len(table); i++) {
+                jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+                if ((jl_value_t*)b == jl_nothing)
+                    continue;
+                jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+                while (bpart) {
+                    jl_atomic_store_relaxed(&bpart->restriction,
+                        encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved));
+                    bpart->reserved = 0;
+                    bpart = jl_atomic_load_relaxed(&bpart->next);
+                }
+            }
+#endif
         }
         else {
-            // rehash IdDict
-            //assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename);
-            jl_array_t **a = (jl_array_t**)obj;
-            assert(jl_typetagis(*a, jl_array_any_type));
-            *a = jl_idtable_rehash(*a, jl_array_len(*a));
-            jl_gc_wb(obj, *a);
-        }
-    }
-    // Now pick up the globalref binding pointer field
-    for (size_t i = 0; i < s.fixup_objs.len; i++) {
-        uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
-        jl_value_t *obj = (jl_value_t*)(image_base + item);
-        if (jl_is_globalref(obj)) {
-            jl_globalref_t *r = (jl_globalref_t*)obj;
-            if (r->binding == NULL) {
-                jl_globalref_t *gr = (jl_globalref_t*)jl_module_globalref(r->mod, r->name);
-                r->binding = gr->binding;
-                jl_gc_wb(r, gr->binding);
-            }
+            abort();
         }
     }
     arraylist_free(&s.fixup_types);
@@ -3331,11 +3986,26 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     arraylist_push(&jl_linkage_blobs, (void*)image_base);
     arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
     arraylist_push(&jl_image_relocs, (void*)relocs_base);
+    if (restored == NULL) {
+        arraylist_push(&jl_top_mods, (void*)jl_top_module);
+    } else {
+        size_t len = jl_array_nrows(*restored);
+        assert(len > 0);
+        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(*restored, len-1);
+        // Ordinarily set during deserialization, but our compiler stub image,
+        // just returns a reference to the sysimage version, so we set it here.
+        topmod->build_id.hi = checksum;
+        assert(jl_is_module(topmod));
+        arraylist_push(&jl_top_mods, (void*)topmod);
+    }
     jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
     rebuild_image_blob_tree();
 
     // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
     jl_gc_enable(en);
+
+    if (s.incremental)
+        jl_add_methods(*extext_methods);
 }
 
 static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos, int64_t *datastartpos)
@@ -3346,7 +4016,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
                 "Precompile file header verification checks failed.");
     }
     uint8_t flags = read_uint8(f);
-    if (pkgimage && !jl_match_cache_flags(flags)) {
+    if (pkgimage && !jl_match_cache_flags_current(flags)) {
         return jl_get_exceptionf(jl_errorexception_type, "Pkgimage flags mismatch");
     }
     if (!pkgimage) {
@@ -3365,7 +4035,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
 }
 
 // TODO?: refactor to make it easier to create the "package inspector"
-static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
+static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
 {
     JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
     jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname);
@@ -3379,19 +4049,20 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
 
     assert(datastartpos > 0 && datastartpos < dataendpos);
     needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc;
-    jl_value_t *restored = NULL;
-    jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
-    jl_svec_t *cachesizes_sv = NULL;
     char *base;
     arraylist_t ccallable_list;
-    JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
+
+    jl_value_t *restored = NULL;
+    jl_array_t *init_order = NULL, *extext_methods = NULL, *internal_methods = NULL, *new_ext_cis = NULL, *method_roots_list = NULL, *edges = NULL;
+    jl_svec_t *cachesizes_sv = NULL;
+    JL_GC_PUSH8(&restored, &init_order, &extext_methods, &internal_methods, &new_ext_cis, &method_roots_list, &edges, &cachesizes_sv);
 
     { // make a permanent in-memory copy of f (excluding the header)
         ios_bufmode(f, bm_none);
         JL_SIGATOMIC_BEGIN();
         size_t len = dataendpos - datastartpos;
         char *sysimg;
-        bool success = !needs_permalloc;
+        int success = !needs_permalloc;
         ios_seek(f, datastartpos);
         if (needs_permalloc)
             sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
@@ -3408,21 +4079,44 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
                 ios_close(f);
             ios_static_buffer(f, sysimg, len);
             pkgcachesizes cachesizes;
-            jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
+            jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &internal_methods, &new_ext_cis, &method_roots_list,
+                                                 &edges, &base, &ccallable_list, &cachesizes);
             JL_SIGATOMIC_END();
 
-            // Insert method extensions
-            jl_insert_methods(extext_methods);
-            // No special processing of `new_specializations` is required because recaching handled it
+            // No special processing of `new_ext_cis` is required because recaching handled it
             // Add roots to methods
             jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
-            // Handle edges
-            size_t world = jl_atomic_load_acquire(&jl_world_counter);
-            jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
+            // Insert method extensions and handle edges
+            int new_methods = jl_array_nrows(extext_methods) > 0;
+            if (!new_methods) {
+                size_t i, l = jl_array_nrows(internal_methods);
+                for (i = 0; i < l; i++) {
+                    jl_value_t *obj = jl_array_ptr_ref(internal_methods, i);
+                    if (jl_is_method(obj)) {
+                        new_methods = 1;
+                        break;
+                    }
+                }
+            }
+            JL_LOCK(&world_counter_lock);
+            // allocate a world for the new methods, and insert them there, invalidating content as needed
+            size_t world = jl_atomic_load_relaxed(&jl_world_counter);
+            if (new_methods)
+                world += 1;
+            jl_activate_methods(extext_methods, internal_methods, world, pkgname);
+            // TODO: inject new_ext_cis into caches here, so the system can see them immediately as potential candidates (before validation)
+            // allow users to start running in this updated world
+            if (new_methods)
+                jl_atomic_store_release(&jl_world_counter, world);
+            // now permit more methods to be added again
+            JL_UNLOCK(&world_counter_lock);
+
             // reinit ccallables
-            jl_reinit_ccallable(&ccallable_list, base, NULL);
+            jl_reinit_ccallable(&ccallable_list, base, pkgimage_handle);
             arraylist_free(&ccallable_list);
 
+            jl_value_t *ext_edges = new_ext_cis ? (jl_value_t*)new_ext_cis : jl_nothing;
+
             if (completeinfo) {
                 cachesizes_sv = jl_alloc_svec(7);
                 jl_svecset(cachesizes_sv, 0, jl_box_long(cachesizes.sysdata));
@@ -3432,11 +4126,11 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
                 jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
                 jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
                 jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
-                restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
-                                                   ext_targets, edges, cachesizes_sv);
+                restored = (jl_value_t*)jl_svec(7, restored, init_order, edges, ext_edges,
+                                                   extext_methods, method_roots_list, cachesizes_sv);
             }
             else {
-                restored = (jl_value_t*)jl_svec(2, restored, init_order);
+                restored = (jl_value_t*)jl_svec(4, restored, init_order, edges, ext_edges);
             }
         }
     }
@@ -3451,11 +4145,11 @@ static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uin
     jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
+JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
 {
     ios_t f;
     ios_static_buffer(&f, (char*)buf, sz);
-    jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname, needs_permalloc);
+    jl_value_t *ret = jl_restore_package_image_from_stream(pkgimage_handle, &f, image, depmods, completeinfo, pkgname, needs_permalloc);
     ios_close(&f);
     return ret;
 }
@@ -3468,7 +4162,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d
             "Cache file \"%s\" not found.\n", fname);
     }
     jl_image_t pkgimage = {};
-    jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname, true);
+    jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, 1);
     ios_close(&f);
     return ret;
 }
@@ -3518,7 +4212,7 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len)
     JL_SIGATOMIC_END();
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname)
+JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname, int ignore_native)
 {
     void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY);
     if (!pkgimg_handle) {
@@ -3536,10 +4230,18 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j
     jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1);
     size_t *plen;
     jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1);
+    jl_gc_notify_image_load(pkgimg_data, *plen);
 
     jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle);
 
-    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false);
+    if (ignore_native) {
+        // Must disable using native code in possible downstream users of this code:
+        // https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395.
+        // The easiest way to do that is to disable it in all of them.
+        IMAGE_NATIVE_CODE_TAINTED = 1;
+    }
+
+    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, 0);
 
     return mod;
 }
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index bf1a830b608de..1985357321a3a 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -1,5 +1,5 @@
 // inverse of backedges graph (caller=>callees hash)
-jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
+jl_array_t *internal_methods JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
 
 static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT
 {
@@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
         jl_datatype_t *dt = (jl_datatype_t*)t;
         assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
         jl_datatype_t *super = dt->super;
-        // check if super is news, since then we must be new also
-        // (it is also possible that super is indeterminate now, wait for `t`
-        // to be resolved, then will be determined later and fixed up by the
-        // delay_list, for this and any other references to it).
-        while (super != jl_any_type) {
-            assert(super);
+        // fast-path: check if super is in news, since then we must be new also
+        // (it is also possible that super is indeterminate or NULL right now,
+        // waiting for `t` to be resolved, then will be determined later as
+        // soon as possible afterwards).
+        while (super != NULL && super != jl_any_type) {
             if (ptrhash_has(news, (void*)super))
                 return 1;
             if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
-               break; // fast-path for rejection of super
+               break; // the rest must all be non-new
             // otherwise super might be something that was not cached even though a later supertype might be
             // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
             super = super->super;
@@ -74,7 +73,7 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
 static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
 {
     assert(jl_is_array(worklist));
-    size_t len = jl_array_len(worklist);
+    size_t len = jl_array_nrows(worklist);
     if (len > 0) {
         jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1);
         assert(jl_is_module(topmod));
@@ -86,21 +85,26 @@ static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
 static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/;
 // Mutex for newly_inferred
 jl_mutex_t newly_inferred_mutex;
+extern jl_mutex_t world_counter_lock;
 
 // Register array of newly-inferred MethodInstances
 // This gets called as the first step of Base.include_package_for_output
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
 {
-    assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
+    assert(_newly_inferred == NULL || _newly_inferred == jl_nothing || jl_is_array(_newly_inferred));
+    if (_newly_inferred == jl_nothing)
+        _newly_inferred = NULL;
     newly_inferred = (jl_array_t*) _newly_inferred;
 }
 
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci)
 {
+    if (!newly_inferred)
+        return;
     JL_LOCK(&newly_inferred_mutex);
-    size_t end = jl_array_len(newly_inferred);
+    size_t end = jl_array_nrows(newly_inferred);
     jl_array_grow_end(newly_inferred, 1);
-    jl_arrayset(newly_inferred, ci, end);
+    jl_array_ptr_set(newly_inferred, end, ci);
     JL_UNLOCK(&newly_inferred_mutex);
 }
 
@@ -159,7 +163,8 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     if (jl_is_method(mod))
         mod = ((jl_method_t*)mod)->module;
     assert(jl_is_module(mod));
-    if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
+    uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
+    if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
         return 1;
     }
     if (!mi->backedges) {
@@ -169,7 +174,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     // HT_NOTFOUND: not yet analyzed
     // HT_NOTFOUND + 1: no link back
     // HT_NOTFOUND + 2: does link back
-    // HT_NOTFOUND + 3: does link back, and included in new_specializations already
+    // HT_NOTFOUND + 3: does link back, and included in new_ext_cis already
     // HT_NOTFOUND + 4 + depth: in-progress
     int found = (char*)*bp - (char*)HT_NOTFOUND;
     if (found)
@@ -177,12 +182,13 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     arraylist_push(stack, (void*)mi);
     int depth = stack->len;
     *bp = (void*)((char*)HT_NOTFOUND + 4 + depth); // preliminarily mark as in-progress
-    size_t i = 0, n = jl_array_len(mi->backedges);
+    size_t i = 0, n = jl_array_nrows(mi->backedges);
     int cycle = depth;
     while (i < n) {
-        jl_method_instance_t *be;
+        jl_code_instance_t *be;
         i = get_next_edge(mi->backedges, i, NULL, &be);
-        int child_found = has_backedge_to_worklist(be, visited, stack);
+        JL_GC_PROMISE_ROOTED(be); // get_next_edge propagates the edge for us here
+        int child_found = has_backedge_to_worklist(jl_get_ci_mi(be), visited, stack);
         if (child_found == 1 || child_found == 2) {
             // found what we were looking for, so terminate early
             found = 1;
@@ -222,55 +228,49 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
     htable_t visited;
     arraylist_t stack;
     assert(jl_is_array(list));
-    size_t n0 = jl_array_len(list);
+    size_t n0 = jl_array_nrows(list);
     htable_new(&visited, n0);
     arraylist_new(&stack, 0);
-    jl_array_t *new_specializations = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&new_specializations);
+    jl_array_t *new_ext_cis = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&new_ext_cis);
     for (i = n0; i-- > 0; ) {
         jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i);
         assert(jl_is_code_instance(ci));
-        if (!ci->relocatability)
-            continue;
-        jl_method_instance_t *mi = ci->def;
+        jl_method_instance_t *mi = jl_get_ci_mi(ci);
         jl_method_t *m = mi->def.method;
-        if (ci->inferred && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
+        if (ci->owner == jl_nothing && jl_atomic_load_relaxed(&ci->inferred) && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
             int found = has_backedge_to_worklist(mi, &visited, &stack);
             assert(found == 0 || found == 1 || found == 2);
             assert(stack.len == 0);
-            if (found == 1 && ci->max_world == ~(size_t)0) {
-                void **bp = ptrhash_bp(&visited, mi);
-                if (*bp != (void*)((char*)HT_NOTFOUND + 3)) {
-                    *bp = (void*)((char*)HT_NOTFOUND + 3);
-                    jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci);
-                }
+            if (found == 1 && jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) {
+                jl_array_ptr_1d_push(new_ext_cis, (jl_value_t*)ci);
             }
         }
     }
     htable_free(&visited);
     arraylist_free(&stack);
     JL_GC_POP();
-    // reverse new_specializations
-    n0 = jl_array_len(new_specializations);
-    jl_value_t **news = (jl_value_t**)jl_array_data(new_specializations);
+    // reverse new_ext_cis
+    n0 = jl_array_nrows(new_ext_cis);
+    jl_value_t **news = jl_array_data(new_ext_cis, jl_value_t*);
     for (i = 0; i < n0; i++) {
         jl_value_t *temp = news[i];
         news[i] = news[n0 - i - 1];
         news[n0 - i - 1] = temp;
     }
-    return new_specializations;
+    return new_ext_cis;
 }
 
 // New roots for external methods
-static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializations, uint64_t key)
+static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_ext_cis, uint64_t key)
 {
     htable_t mset;
     htable_new(&mset, 0);
-    size_t l = new_specializations ? jl_array_len(new_specializations) : 0;
+    size_t l = new_ext_cis ? jl_array_nrows(new_ext_cis) : 0;
     for (size_t i = 0; i < l; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i);
         assert(jl_is_code_instance(ci));
-        jl_method_t *m = ci->def->def.method;
+        jl_method_t *m = jl_get_ci_mi(ci)->def.method;
         assert(jl_is_method(m));
         ptrhash_put(&mset, (void*)m, (void*)m);
     }
@@ -289,10 +289,10 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializati
                 jl_array_ptr_1d_push(roots, (jl_value_t*)newroots);
                 rle_iter_state rootiter = rle_iter_init(0);
                 uint64_t *rletable = NULL;
-                size_t nblocks2 = 0, nroots = jl_array_len(m->roots), k = 0;
+                size_t nblocks2 = 0, nroots = jl_array_nrows(m->roots), k = 0;
                 if (m->root_blocks) {
-                    rletable = (uint64_t*)jl_array_data(m->root_blocks);
-                    nblocks2 = jl_array_len(m->root_blocks);
+                    rletable = jl_array_data(m->root_blocks, uint64_t);
+                    nblocks2 = jl_array_nrows(m->root_blocks);
                 }
                 while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2))
                     if (rootiter.key == key)
@@ -305,82 +305,19 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializati
     htable_free(&mset);
 }
 
-// Create the forward-edge map (caller => callees)
-// the intent of these functions is to invert the backedges tree
-// for anything that points to a method not part of the worklist
-//
-// from MethodTables
-static void jl_collect_missing_backedges(jl_methtable_t *mt)
-{
-    jl_array_t *backedges = mt->backedges;
-    if (backedges) {
-        size_t i, l = jl_array_len(backedges);
-        for (i = 1; i < l; i += 2) {
-            jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
-            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
-            if (edges == NULL) {
-                edges = jl_alloc_vec_any(0);
-                JL_GC_PUSH1(&edges);
-                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
-                JL_GC_POP();
-            }
-            jl_array_ptr_1d_push(edges, NULL);
-            jl_array_ptr_1d_push(edges, missing_callee);
-        }
-    }
-}
-
 
-// from MethodInstances
-static void collect_backedges(jl_method_instance_t *callee, int internal)
-{
-    jl_array_t *backedges = callee->backedges;
-    if (backedges) {
-        size_t i = 0, l = jl_array_len(backedges);
-        while (i < l) {
-            jl_value_t *invokeTypes;
-            jl_method_instance_t *caller;
-            i = get_next_edge(backedges, i, &invokeTypes, &caller);
-            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
-            if (edges == NULL) {
-                edges = jl_alloc_vec_any(0);
-                JL_GC_PUSH1(&edges);
-                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
-                JL_GC_POP();
-            }
-            jl_array_ptr_1d_push(edges, invokeTypes);
-            jl_array_ptr_1d_push(edges, (jl_value_t*)callee);
-        }
-    }
-}
-
-
-// For functions owned by modules not on the worklist, call this on each method.
+// For every method:
 // - if the method is owned by a worklist module, add it to the list of things to be
-//   fully serialized
-// - Collect all backedges (may be needed later when we invert this list).
+//   verified on reloading
+// - if the method is extext, record that it needs to be reinserted later in the method table
 static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
 {
     jl_array_t *s = (jl_array_t*)closure;
     jl_method_t *m = ml->func.method;
-    if (s && !jl_object_in_image((jl_value_t*)m->module)) {
-        jl_array_ptr_1d_push(s, (jl_value_t*)m);
-    }
-    if (edges_map == NULL)
-        return 1;
-    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-    if (!jl_is_svec(specializations)) {
-        jl_method_instance_t *callee = (jl_method_instance_t*)specializations;
-        collect_backedges(callee, !s);
-    }
-    else {
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if ((jl_value_t*)callee != jl_nothing)
-                collect_backedges(callee, !s);
-        }
+    if (!jl_object_in_image((jl_value_t*)m->module)) {
+        jl_array_ptr_1d_push(internal_methods, (jl_value_t*)m);
+        if (s)
+            jl_array_ptr_1d_push(s, (jl_value_t*)m); // extext
     }
     return 1;
 }
@@ -388,10 +325,8 @@ static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
 static int jl_collect_methtable_from_mod(jl_methtable_t *mt, void *env)
 {
     if (!jl_object_in_image((jl_value_t*)mt))
-        env = NULL; // do not collect any methods from here
+        env = NULL; // mark internal, not extext
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), jl_collect_methcache_from_mod, env);
-    if (env && edges_map)
-        jl_collect_missing_backedges(mt);
     return 1;
 }
 
@@ -403,171 +338,38 @@ static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m)
     foreach_mtable_in_module(m, jl_collect_methtable_from_mod, s);
 }
 
-static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges)
+static void jl_record_edges(jl_method_instance_t *caller, jl_array_t *edges)
 {
-    jl_array_t *callees = NULL;
-    JL_GC_PUSH2(&caller, &callees);
-    callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL);
-    if (callees != NULL) {
-        jl_array_ptr_1d_push(edges, (jl_value_t*)caller);
-        jl_array_ptr_1d_push(edges, (jl_value_t*)callees);
-        size_t i, l = jl_array_len(callees);
-        for (i = 1; i < l; i += 2) {
-            jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i);
-            if (c && jl_is_method_instance(c)) {
-                arraylist_push(wq, c);
-            }
-        }
+    jl_code_instance_t *ci = jl_atomic_load_relaxed(&caller->cache);
+    while (ci != NULL) {
+        if (jl_atomic_load_relaxed(&ci->edges) &&
+            jl_atomic_load_relaxed(&ci->edges) != jl_emptysvec &&
+            jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0)
+            jl_array_ptr_1d_push(edges, (jl_value_t*)ci);
+        ci = jl_atomic_load_relaxed(&ci->next);
     }
-    JL_GC_POP();
 }
 
-
 // Extract `edges` and `ext_targets` from `edges_map`
-// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges
-// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target
-static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *external_cis, size_t world)
+// `edges` = [caller1, ...], the list of codeinstances internal to methods
+static void jl_collect_internal_cis(jl_array_t *edges, size_t world)
 {
-    htable_t external_mis;
-    htable_new(&external_mis, 0);
-    if (external_cis) {
-        for (size_t i = 0; i < jl_array_len(external_cis); i++) {
-            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(external_cis, i);
-            jl_method_instance_t *mi = ci->def;
-            ptrhash_put(&external_mis, (void*)mi, (void*)mi);
-        }
-    }
-    arraylist_t wq;
-    arraylist_new(&wq, 0);
-    void **table = (void**)jl_array_data(edges_map);    // edges_map is caller => callees
-    size_t table_size = jl_array_len(edges_map);
-    for (size_t i = 0; i < table_size; i += 2) {
-        assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) &&
-               "edges_map changed during iteration");
-        jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
-        jl_array_t *callees = (jl_array_t*)table[i + 1];
-        if (callees == NULL)
-            continue;
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        if (!jl_object_in_image((jl_value_t*)caller->def.method->module) ||
-            ptrhash_get(&external_mis, caller) != HT_NOTFOUND) {
-            jl_record_edges(caller, &wq, edges);
-        }
-    }
-    htable_free(&external_mis);
-    while (wq.len) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq);
-        jl_record_edges(caller, &wq, edges);
-    }
-    arraylist_free(&wq);
-    edges_map = NULL;
-    htable_t edges_map2;
-    htable_new(&edges_map2, 0);
-    htable_t edges_ids;
-    size_t l = edges ? jl_array_len(edges) : 0;
-    htable_new(&edges_ids, l);
-    for (size_t i = 0; i < l / 2; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2);
-        void *target = (void*)((char*)HT_NOTFOUND + i + 1);
-        ptrhash_put(&edges_ids, (void*)caller, target);
-    }
-    // process target list to turn it into a memoized validity table
-    // and compute the old methods list, ready for serialization
-    jl_value_t *matches = NULL;
-    jl_array_t *callee_ids = NULL;
-    jl_value_t *sig = NULL;
-    JL_GC_PUSH3(&matches, &callee_ids, &sig);
-    for (size_t i = 0; i < l; i += 2) {
-        jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
-        size_t l = jl_array_len(callees);
-        callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1);
-        int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-        idxs[0] = 0;
-        size_t nt = 0;
-        for (size_t j = 0; j < l; j += 2) {
-            jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j);
-            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
-            assert(callee && "unsupported edge");
-
-            if (jl_is_method_instance(callee)) {
-                jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-                if (!jl_object_in_image((jl_value_t*)mt))
-                    continue;
-            }
-
-            // (nullptr, c) => call
-            // (invokeTypes, c) => invoke
-            // (nullptr, invokeTypes) => missing call
-            // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any)
-            void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee);
-            if (target == HT_NOTFOUND) {
-                size_t min_valid = 0;
-                size_t max_valid = ~(size_t)0;
-                if (invokeTypes) {
-                    assert(jl_is_method_instance(callee));
-                    jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-                    if ((jl_value_t*)mt == jl_nothing) {
-                        callee_ids = NULL; // invalid
-                        break;
-                    }
-                    else {
-                        matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid);
-                        if (matches == jl_nothing) {
-                            callee_ids = NULL; // invalid
-                            break;
-                        }
-                        matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
-                    }
-                }
-                else {
-                    if (jl_is_method_instance(callee)) {
-                        jl_method_instance_t *mi = (jl_method_instance_t*)callee;
-                        sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
-                    }
-                    else {
-                        sig = callee;
-                    }
-                    int ambig = 0;
-                    matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                            INT32_MAX, 0, world, &min_valid, &max_valid, &ambig);
-                    sig = NULL;
-                    if (matches == jl_nothing) {
-                        callee_ids = NULL; // invalid
-                        break;
-                    }
-                    size_t k;
-                    for (k = 0; k < jl_array_len(matches); k++) {
-                        jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
-                        jl_array_ptr_set(matches, k, match->method);
-                    }
-                }
-                jl_array_ptr_1d_push(ext_targets, invokeTypes);
-                jl_array_ptr_1d_push(ext_targets, callee);
-                jl_array_ptr_1d_push(ext_targets, matches);
-                target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3);
-                ptrhash_put(&edges_map2, (void*)callee, target);
-            }
-            idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
+    for (size_t i = 0; i < jl_array_nrows(internal_methods); i++) {
+        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(internal_methods, i);
+        jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
+        if (!jl_is_svec(specializations)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+            jl_record_edges(mi, edges);
         }
-        jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids
-        if (!callee_ids)
-            continue;
-        idxs[0] = nt;
-        // record place of every method in edges
-        // add method edges to the callee_ids list
-        for (size_t j = 0; j < l; j += 2) {
-            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
-            if (callee && jl_is_method_instance(callee)) {
-                void *target = ptrhash_get(&edges_ids, (void*)callee);
-                if (target != HT_NOTFOUND) {
-                    idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
-                }
+        else {
+            size_t j, l = jl_svec_len(specializations);
+            for (j = 0; j < l; j++) {
+                jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, j);
+                if ((jl_value_t*)mi != jl_nothing)
+                    jl_record_edges(mi, edges);
             }
         }
-        jl_array_del_end(callee_ids, l - nt);
     }
-    JL_GC_POP();
-    htable_free(&edges_map2);
 }
 
 // Headers
@@ -576,7 +378,7 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_arra
 static void write_mod_list(ios_t *s, jl_array_t *a)
 {
     size_t i;
-    size_t len = jl_array_len(a);
+    size_t len = jl_array_nrows(a);
     for (i = 0; i < len; i++) {
         jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i);
         assert(jl_is_module(m));
@@ -594,43 +396,77 @@ static void write_mod_list(ios_t *s, jl_array_t *a)
     write_int32(s, 0);
 }
 
-// OPT_LEVEL should always be the upper bits
 #define OPT_LEVEL 6
+#define DEBUG_LEVEL 1
 
 JL_DLLEXPORT uint8_t jl_cache_flags(void)
 {
     // OOICCDDP
     uint8_t flags = 0;
     flags |= (jl_options.use_pkgimages & 1); // 0-bit
-    flags |= (jl_options.debug_level & 3) << 1; // 1-2 bit
+    flags |= (jl_options.debug_level & 3) << DEBUG_LEVEL; // 1-2 bit
     flags |= (jl_options.check_bounds & 3) << 3; // 3-4 bit
     flags |= (jl_options.can_inline & 1) << 5; // 5-bit
     flags |= (jl_options.opt_level & 3) << OPT_LEVEL; // 6-7 bit
     return flags;
 }
 
-JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t flags)
+
+JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t requested_flags, uint8_t actual_flags)
 {
-    // 1. Check which flags are relevant
-    uint8_t current_flags = jl_cache_flags();
-    uint8_t supports_pkgimage = (current_flags & 1);
-    uint8_t is_pkgimage = (flags & 1);
+    uint8_t supports_pkgimage = (requested_flags & 1);
+    uint8_t is_pkgimage = (actual_flags & 1);
 
     // For .ji packages ignore other flags
     if (!supports_pkgimage && !is_pkgimage) {
         return 1;
     }
 
-    // 2. Check all flags, execept opt level must be exact
-    uint8_t mask = (1 << OPT_LEVEL)-1;
-    if ((flags & mask) != (current_flags & mask))
+    // If package images are optional, ignore that bit (it will be unset in requested_flags)
+    if (jl_options.use_pkgimages == JL_OPTIONS_USE_PKGIMAGES_EXISTING) {
+        actual_flags &= ~1;
+    }
+
+    // 2. Check all flags, except opt level and debug level must be exact
+    uint8_t mask = (~(3u << OPT_LEVEL) & ~(3u << DEBUG_LEVEL)) & 0x7f;
+    if ((actual_flags & mask) != (requested_flags & mask))
         return 0;
-    // 3. allow for higher optimization flags in cache
-    flags >>= OPT_LEVEL;
-    current_flags >>= OPT_LEVEL;
-    return flags >= current_flags;
+    // 3. allow for higher optimization and debug level flags in cache to minimize required compile option combinations
+    return ((actual_flags >> OPT_LEVEL) & 3) >= ((requested_flags >> OPT_LEVEL) & 3) &&
+           ((actual_flags >> DEBUG_LEVEL) & 3) >= ((requested_flags >> DEBUG_LEVEL) & 3);
+}
+
+JL_DLLEXPORT uint8_t jl_match_cache_flags_current(uint8_t flags)
+{
+    return jl_match_cache_flags(jl_cache_flags(), flags);
+}
+
+// return char* from String field in Base.GIT_VERSION_INFO
+static const char *git_info_string(const char *fld)
+{
+    static jl_value_t *GIT_VERSION_INFO = NULL;
+    if (!GIT_VERSION_INFO)
+        GIT_VERSION_INFO = jl_get_global(jl_base_module, jl_symbol("GIT_VERSION_INFO"));
+    jl_value_t *f = jl_get_field(GIT_VERSION_INFO, fld);
+    assert(jl_is_string(f));
+    return jl_string_data(f);
+}
+
+static const char *jl_git_branch(void)
+{
+    static const char *branch = NULL;
+    if (!branch) branch = git_info_string("branch");
+    return branch;
 }
 
+static const char *jl_git_commit(void)
+{
+    static const char *commit = NULL;
+    if (!commit) commit = git_info_string("commit");
+    return commit;
+}
+
+
 // "magic" string and version header of .ji file
 static const int JI_FORMAT_VERSION = 12;
 static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
@@ -655,13 +491,18 @@ static int64_t write_header(ios_t *s, uint8_t pkgimage)
     return checksumpos;
 }
 
+static int is_serialization_root_module(jl_module_t *mod) JL_NOTSAFEPOINT
+{
+    return mod->parent == jl_main_module || mod->parent == jl_base_module || mod->parent == mod;
+}
+
 // serialize information about the result of deserializing this file
 static void write_worklist_for_header(ios_t *s, jl_array_t *worklist)
 {
-    int i, l = jl_array_len(worklist);
+    int i, l = jl_array_nrows(worklist);
     for (i = 0; i < l; i++) {
         jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
-        if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+        if (is_serialization_root_module(workmod)) {
             size_t l = strlen(jl_symbol_name(workmod->name));
             write_int32(s, l);
             ios_write(s, jl_symbol_name(workmod->name), l);
@@ -675,7 +516,7 @@ static void write_worklist_for_header(ios_t *s, jl_array_t *worklist)
 
 static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT
 {
-    if (depmod->parent == jl_main_module || depmod->parent == depmod)
+    if (is_serialization_root_module(depmod))
         return;
     const char *mname = jl_symbol_name(depmod->name);
     size_t slen = strlen(mname);
@@ -708,27 +549,58 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
     ct->world_age = last_age;
 
+    static jl_value_t *replace_depot_func = NULL;
+    if (!replace_depot_func)
+        replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+    static jl_value_t *normalize_depots_func = NULL;
+    if (!normalize_depots_func)
+        normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
+
+    jl_value_t *depots = NULL, *prefs_hash = NULL, *prefs_list = NULL;
+    JL_GC_PUSH2(&depots, &prefs_list);
+    last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    depots = jl_apply(&normalize_depots_func, 1);
+    ct->world_age = last_age;
+
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
     initial_pos = ios_pos(s);
     write_uint64(s, 0);
-    size_t i, l = udeps ? jl_array_len(udeps) : 0;
+    size_t i, l = udeps ? jl_array_nrows(udeps) : 0;
     for (i = 0; i < l; i++) {
         jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
-        jl_value_t *dep = jl_fieldref(deptuple, 1);              // file abspath
-        size_t slen = jl_string_len(dep);
+        jl_value_t *deppath = jl_fieldref(deptuple, 1);
+
+        if (replace_depot_func) {
+            jl_value_t **replace_depot_args;
+            JL_GC_PUSHARGS(replace_depot_args, 3);
+            replace_depot_args[0] = replace_depot_func;
+            replace_depot_args[1] = deppath;
+            replace_depot_args[2] = depots;
+            ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+            deppath = (jl_value_t*)jl_apply(replace_depot_args, 3);
+            ct->world_age = last_age;
+            JL_GC_POP();
+        }
+
+        size_t slen = jl_string_len(deppath);
         write_int32(s, slen);
-        ios_write(s, jl_string_data(dep), slen);
-        write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2)));  // mtime
+        ios_write(s, jl_string_data(deppath), slen);
+        write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2)));    // fsize
+        write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3)));    // hash
+        write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 4)));  // mtime
         jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0);  // evaluating module
         jl_module_t *depmod_top = depmod;
-        while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
+        while (!is_serialization_root_module(depmod_top))
             depmod_top = depmod_top->parent;
         unsigned provides = 0;
-        size_t j, lj = jl_array_len(worklist);
+        size_t j, lj = jl_array_nrows(worklist);
         for (j = 0; j < lj; j++) {
             jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, j);
-            if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+            if (is_serialization_root_module(workmod)) {
                 ++provides;
                 if (workmod == depmod_top) {
                     write_int32(s, provides);
@@ -742,9 +614,6 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     write_int32(s, 0); // terminator, for ease of reading
 
     // Calculate Preferences hash for current package.
-    jl_value_t *prefs_hash = NULL;
-    jl_value_t *prefs_list = NULL;
-    JL_GC_PUSH1(&prefs_list);
     if (jl_base_module) {
         // Toplevel module is the module we're currently compiling, use it to get our preferences hash
         jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
@@ -772,7 +641,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 
     // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file.
     if (prefs_hash != NULL && prefs_list != NULL) {
-        size_t i, l = jl_array_len(prefs_list);
+        size_t i, l = jl_array_nrows(prefs_list);
         for (i = 0; i < l; i++) {
             jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i);
             size_t slen = jl_string_len(pref_name);
@@ -791,7 +660,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
         write_int32(s, 0);
         write_uint64(s, 0);
     }
-    JL_GC_POP(); // for prefs_list
+    JL_GC_POP(); // for depots, prefs_list
 
     // write a dummy file position to indicate the beginning of the source-text
     pos = ios_pos(s);
@@ -806,357 +675,79 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 // Deserialization
 
 // Add methods to external (non-worklist-owned) functions
-static void jl_insert_methods(jl_array_t *list)
+// mutating external to point at the new methodtable entry instead of the new method
+static void jl_add_methods(jl_array_t *external)
 {
-    size_t i, l = jl_array_len(list);
+    size_t i, l = jl_array_nrows(external);
     for (i = 0; i < l; i++) {
-        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i);
+        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(external, i);
         assert(jl_is_method(meth));
         assert(!meth->is_for_opaque_closure);
         jl_methtable_t *mt = jl_method_get_table(meth);
         assert((jl_value_t*)mt != jl_nothing);
-        jl_method_table_insert(mt, meth, NULL);
-    }
-}
-
-static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
-{
-    size_t i, l = jl_array_len(method_roots_list);
-    for (i = 0; i < l; i+=2) {
-        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i);
-        jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1);
-        if (roots) {
-            assert(jl_is_array(roots));
-            jl_append_method_roots(m, key, roots);
-        }
+        jl_typemap_entry_t *entry = jl_method_table_add(mt, meth, NULL);
+        jl_array_ptr_set(external, i, entry);
     }
 }
 
-
-// verify that these edges intersect with the same methods as before
-static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
+extern _Atomic(int) allow_new_worlds;
+static void jl_activate_methods(jl_array_t *external, jl_array_t *internal, size_t world, const char *pkgname)
 {
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
-    size_t i, l = jl_array_len(targets) / 3;
-    static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
-    if (ulong_array == NULL)
-        ulong_array = jl_apply_array_type((jl_value_t*)jl_ulong_type, 1);
-    jl_array_t *maxvalids = jl_alloc_array_1d(ulong_array, l);
-    memset(jl_array_data(maxvalids), 0, l * sizeof(size_t));
-    jl_value_t *loctag = NULL;
-    jl_value_t *matches = NULL;
-    jl_value_t *sig = NULL;
-    JL_GC_PUSH4(&maxvalids, &matches, &sig, &loctag);
+    size_t i, l = jl_array_nrows(internal);
     for (i = 0; i < l; i++) {
-        jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
-        jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
-        jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
-        size_t min_valid = 0;
-        size_t max_valid = ~(size_t)0;
-        if (invokesig) {
-            assert(callee && "unsupported edge");
-            jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
-            if ((jl_value_t*)mt == jl_nothing) {
-                max_valid = 0;
-            }
-            else {
-                matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, minworld, &min_valid, &max_valid);
-                if (matches == jl_nothing) {
-                     max_valid = 0;
-                }
-                else {
-                    matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
-                    if (matches != expected) {
-                        max_valid = 0;
-                    }
-                }
-            }
+        // allow_new_worlds doesn't matter here, since we aren't actually changing anything external
+        jl_value_t *obj = jl_array_ptr_ref(internal, i);
+        if (jl_typetagis(obj, jl_typemap_entry_type)) {
+            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
+            assert(jl_atomic_load_relaxed(&entry->min_world) == ~(size_t)0);
+            assert(jl_atomic_load_relaxed(&entry->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+            jl_atomic_store_release(&entry->min_world, world);
+            jl_atomic_store_release(&entry->max_world, ~(size_t)0);
+        }
+        else if (jl_is_method(obj)) {
+            jl_method_t *m = (jl_method_t*)obj;
+            assert(jl_atomic_load_relaxed(&m->primary_world) == ~(size_t)0);
+            assert(jl_atomic_load_relaxed(&m->deleted_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+            jl_atomic_store_release(&m->primary_world, world);
+            jl_atomic_store_release(&m->deleted_world, ~(size_t)0);
+        }
+        else if (jl_is_code_instance(obj)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)obj;
+            assert(jl_atomic_load_relaxed(&ci->min_world) == ~(size_t)0);
+            assert(jl_atomic_load_relaxed(&ci->max_world) == WORLD_AGE_REVALIDATION_SENTINEL);
+            jl_atomic_store_relaxed(&ci->min_world, world);
+            // n.b. ci->max_world is not updated until edges are verified
         }
         else {
-            if (jl_is_method_instance(callee)) {
-                jl_method_instance_t *mi = (jl_method_instance_t*)callee;
-                sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
-            }
-            else {
-                sig = callee;
-            }
-            assert(jl_is_array(expected));
-            int ambig = 0;
-            // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-            // len + 1 is to allow us to log causes of invalidation (SnoopCompile's @snoopr)
-            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                    _jl_debug_method_invalidation ? INT32_MAX : jl_array_len(expected),
-                    0, minworld, &min_valid, &max_valid, &ambig);
-            sig = NULL;
-            if (matches == jl_nothing) {
-                max_valid = 0;
-            }
-            else {
-                // setdiff!(matches, expected)
-                size_t j, k, ins = 0;
-                if (jl_array_len(matches) != jl_array_len(expected)) {
-                    max_valid = 0;
-                }
-                for (k = 0; k < jl_array_len(matches); k++) {
-                    jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method;
-                    size_t l = jl_array_len(expected);
-                    for (j = 0; j < l; j++)
-                        if (match == (jl_method_t*)jl_array_ptr_ref(expected, j))
-                            break;
-                    if (j == l) {
-                        // intersection has a new method or a method was
-                        // deleted--this is now probably no good, just invalidate
-                        // everything about it now
-                        max_valid = 0;
-                        if (!_jl_debug_method_invalidation)
-                            break;
-                        jl_array_ptr_set(matches, ins++, match);
-                    }
-                }
-                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation)
-                    jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins);
-            }
+            abort();
         }
-        ((size_t*)(jl_array_data(maxvalids)))[i] = max_valid;
-        if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee);
-            loctag = jl_cstr_to_string("insert_backedges_callee");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            loctag = jl_box_int32((int32_t)i);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches);
-        }
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig);
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee);
-        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
     }
-    JL_GC_POP();
-    return maxvalids;
-}
-
-// Combine all edges relevant to a method to initialize the maxvalids list
-static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
-{
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
-    jl_value_t *loctag = NULL;
-    jl_array_t *maxvalids2 = NULL;
-    JL_GC_PUSH2(&loctag, &maxvalids2);
-    size_t i, l = jl_array_len(edges) / 2;
-    maxvalids2 = jl_alloc_array_1d(jl_typeof(maxvalids), l);
-    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
-    memset(maxvalids2_data, 0, l * sizeof(size_t));
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-        assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
-        if (callee_ids == NULL) {
-            // serializing the edges had failed
-            maxvalids2_data[i] = 0;
+    l = jl_array_nrows(external);
+    if (l) {
+        if (!jl_atomic_load_relaxed(&allow_new_worlds)) {
+            jl_printf(JL_STDERR, "WARNING: Method changes for %s have been disabled via a call to disable_new_worlds.\n", pkgname);
+            return;
         }
-        else {
-            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-            size_t j;
-            maxvalids2_data[i] = ~(size_t)0;
-            for (j = 0; j < idxs[0]; j++) {
-                int32_t idx = idxs[j + 1];
-                size_t max_valid = ((size_t*)(jl_array_data(maxvalids)))[idx];
-                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
-                    loctag = jl_cstr_to_string("verify_methods");
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-                    loctag = jl_box_int32((int32_t)idx);
-                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-                }
-                if (max_valid < maxvalids2_data[i])
-                    maxvalids2_data[i] = max_valid;
-                if (max_valid == 0)
-                    break;
-            }
-        }
-        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
-        //ios_puts(maxvalid2_data[i] == ~(size_t)0 ? "valid\n" : "INVALID\n", ios_stderr);
-    }
-    JL_GC_POP();
-    return maxvalids2;
-}
-
-
-// Visit the entire call graph, starting from edges[idx] to determine if that method is valid
-// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-// and slightly modified with an early termination option once the computation reaches its minimum
-static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size_t idx, arraylist_t *visited, arraylist_t *stack)
-{
-    if (maxvalids2_data[idx] == 0) {
-        visited->items[idx] = (void*)1;
-        return 0;
-    }
-    size_t cycle = (size_t)visited->items[idx];
-    if (cycle != 0)
-        return cycle - 1; // depth remaining
-    jl_value_t *cause = NULL;
-    arraylist_push(stack, (void*)idx);
-    size_t depth = stack->len;
-    visited->items[idx] = (void*)(1 + depth);
-    jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
-    assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
-    int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-    size_t i, n = jl_array_len(callee_ids);
-    cycle = depth;
-    for (i = idxs[0] + 1; i < n; i++) {
-        int32_t childidx = idxs[i];
-        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, childidx, visited, stack);
-        size_t child_max_valid = maxvalids2_data[childidx];
-        if (child_max_valid < maxvalids2_data[idx]) {
-            maxvalids2_data[idx] = child_max_valid;
-            cause = jl_array_ptr_ref(edges, childidx * 2);
-        }
-        if (child_max_valid == 0) {
-            // found what we were looking for, so terminate early
-            break;
-        }
-        else if (child_cycle && child_cycle < cycle) {
-            // record the cycle will resolve at depth "cycle"
-            cycle = child_cycle;
-        }
-    }
-    size_t max_valid = maxvalids2_data[idx];
-    if (max_valid != 0 && cycle != depth)
-        return cycle;
-    // If we are the top of the current cycle, now mark all other parts of
-    // our cycle with what we found.
-    // Or if we found a failed edge, also mark all of the other parts of the
-    // cycle as also having an failed edge.
-    while (stack->len >= depth) {
-        size_t childidx = (size_t)arraylist_pop(stack);
-        assert(visited->items[childidx] == (void*)(2 + stack->len));
-        if (idx != childidx) {
-            if (max_valid < maxvalids2_data[childidx])
-                maxvalids2_data[childidx] = max_valid;
-        }
-        visited->items[childidx] = (void*)1;
-        if (_jl_debug_method_invalidation && max_valid != ~(size_t)0) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(edges, childidx * 2);
-            jl_value_t *loctag = NULL;
-            JL_GC_PUSH1(&loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
-            loctag = jl_cstr_to_string("verify_methods");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)cause);
-            JL_GC_POP();
+        for (i = 0; i < l; i++) {
+            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_array_ptr_ref(external, i);
+            jl_methtable_t *mt = jl_method_get_table(entry->func.method);
+            assert((jl_value_t*)mt != jl_nothing);
+            jl_method_table_activate(mt, entry);
         }
     }
-    return 0;
-}
-
-// Visit all entries in edges, verify if they are valid
-static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
-{
-    JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
-    arraylist_t stack, visited;
-    arraylist_new(&stack, 0);
-    size_t i, n = jl_array_len(edges) / 2;
-    arraylist_new(&visited, n);
-    memset(visited.items, 0, n * sizeof(size_t));
-    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
-    for (i = 0; i < n; i++) {
-        assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
-        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, i, &visited, &stack);
-        assert(child_cycle == 0); (void)child_cycle;
-        assert(stack.len == 0);
-        assert(visited.items[i] == (void*)1);
-    }
-    arraylist_free(&stack);
-    arraylist_free(&visited);
 }
 
-// Restore backedges to external targets
-// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
-// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
-static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list, size_t minworld)
+static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
 {
-    // determine which CodeInstance objects are still valid in our image
-    jl_array_t *valids = jl_verify_edges(ext_targets, minworld);
-    JL_GC_PUSH1(&valids);
-    valids = jl_verify_methods(edges, valids); // consumes edges valids, initializes methods valids
-    jl_verify_graph(edges, valids); // propagates methods valids for each edge
-    size_t i, l;
-
-    // next build a map from external MethodInstances to their CodeInstance for insertion
-    l = jl_array_len(ci_list);
-    htable_t visited;
-    htable_new(&visited, l);
-    for (i = 0; i < l; i++) {
-        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i);
-        assert(ci->min_world == minworld);
-        if (ci->max_world == 1) { // sentinel value: has edges to external callables
-            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
-        }
-        else {
-            assert(ci->max_world == ~(size_t)0);
-            jl_method_instance_t *caller = ci->def;
-            if (ci->inferred && jl_rettype_inferred(caller, minworld, ~(size_t)0) == jl_nothing) {
-                jl_mi_cache_insert(caller, ci);
-            }
-            //jl_static_show((jl_stream*)ios_stderr, (jl_value_t*)caller);
-            //ios_puts("free\n", ios_stderr);
-        }
-    }
-
-    // next enable any applicable new codes
-    l = jl_array_len(edges) / 2;
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        size_t maxvalid = ((size_t*)(jl_array_data(valids)))[i];
-        if (maxvalid == ~(size_t)0) {
-            // if this callee is still valid, add all the backedges
-            jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-            for (size_t j = 0; j < idxs[0]; j++) {
-                int32_t idx = idxs[j + 1];
-                jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
-                jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
-                if (callee && jl_is_method_instance(callee)) {
-                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
-                }
-                else {
-                    jl_value_t *sig = callee == NULL ? invokesig : callee;
-                    jl_methtable_t *mt = jl_method_table_for(sig);
-                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
-                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
-                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
-                    // This workaround exposes us to (rare) 265-violations.
-                    if ((jl_value_t*)mt != jl_nothing)
-                        jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
-                }
-            }
-        }
-        // then enable any methods associated with it
-        void *ci = ptrhash_get(&visited, (void*)caller);
-        //assert(ci != HT_NOTFOUND);
-        if (ci != HT_NOTFOUND) {
-            // have some new external code to use
-            assert(jl_is_code_instance(ci));
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            assert(codeinst->min_world == minworld && codeinst->inferred);
-            codeinst->max_world = maxvalid;
-            if (jl_rettype_inferred(caller, minworld, maxvalid) == jl_nothing) {
-                jl_mi_cache_insert(caller, codeinst);
-            }
+    size_t i, l = jl_array_nrows(method_roots_list);
+    for (i = 0; i < l; i+=2) {
+        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i);
+        jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1);
+        if (roots) {
+            assert(jl_is_array(roots));
+            jl_append_method_roots(m, key, roots);
         }
     }
-
-    htable_free(&visited);
-    JL_GC_POP();
-}
-
-static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges)
-{
-    size_t l = edges ? jl_array_len(edges) / 2 : 0;
-    for (size_t i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        ptrhash_put(callers_with_edges, (void*)caller, (void*)caller);
-    }
 }
 
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
@@ -1165,7 +756,7 @@ static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
         return jl_get_exceptionf(jl_errorexception_type,
                 "Main module uuid state is invalid for module deserialization.");
     }
-    size_t i, l = jl_array_len(depmods);
+    size_t i, l = jl_array_nrows(depmods);
     for (i = 0; ; i++) {
         size_t len = read_int32(s);
         if (len == 0 && i == l)
@@ -1227,11 +818,11 @@ static jl_array_t *image_to_depmodidx(jl_array_t *depmods)
 {
     if (!depmods)
         return NULL;
-    assert(jl_array_len(depmods) < INT32_MAX && "too many dependencies to serialize");
+    assert(jl_array_nrows(depmods) < INT32_MAX && "too many dependencies to serialize");
     size_t lbids = n_linkage_blobs();
-    size_t ldeps = jl_array_len(depmods);
+    size_t ldeps = jl_array_nrows(depmods);
     jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
-    int32_t *dmidxs = (int32_t*)jl_array_data(depmodidxs);
+    int32_t *dmidxs = jl_array_data(depmodidxs, int32_t);
     memset(dmidxs, -1, lbids * sizeof(int32_t));
     dmidxs[0] = 0; // the sysimg can also be found at idx 0, by construction
     for (size_t i = 0, j = 0; i < ldeps; i++) {
@@ -1251,9 +842,9 @@ static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
 {
     if (!depmods)
         return NULL;
-    size_t ldeps = jl_array_len(depmods);
+    size_t ldeps = jl_array_nrows(depmods);
     jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps + 1);
-    int32_t *imgidxs = (int32_t*)jl_array_data(imageidxs);
+    int32_t *imgidxs = jl_array_data(imageidxs, int32_t);
     imgidxs[0] = 0;
     for (size_t i = 0; i < ldeps; i++) {
         jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
diff --git a/src/subtype.c b/src/subtype.c
index 5b05bb288ffc4..a0b7bff4006ce 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -39,20 +39,24 @@ extern "C" {
 // Union type decision points are discovered while the algorithm works.
 // If a new Union decision is encountered, the `more` flag is set to tell
 // the forall/exists loop to grow the stack.
-// TODO: the stack probably needs to be artificially large because of some
-// deeper problem (see #21191) and could be shrunk once that is fixed
+
+typedef struct jl_bits_stack_t {
+    uint32_t data[16];
+    struct jl_bits_stack_t *next;
+} jl_bits_stack_t;
+
 typedef struct {
     int16_t depth;
     int16_t more;
     int16_t used;
-    uint32_t stack[100];  // stack of bits represented as a bit vector
+    jl_bits_stack_t stack;
 } jl_unionstate_t;
 
 typedef struct {
     int16_t depth;
     int16_t more;
     int16_t used;
-    void *stack;
+    uint8_t *stack;
 } jl_saved_unionstate_t;
 
 // Linked list storing the type variable environment. A new jl_varbinding_t
@@ -65,10 +69,11 @@ typedef struct jl_varbinding_t {
     jl_value_t *lb;
     jl_value_t *ub;
     int8_t right;       // whether this variable came from the right side of `A <: B`
-    int8_t occurs;      // occurs in any position
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
+    int8_t max_offset;  // record the maximum positive offset of the variable (up to 32)
+                        // max_offset < 0 if this variable occurs outside VarargNum.
     // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
     // let ub = var.ub ∩ type
     // 0 - var.ub <: type ? var : ub
@@ -77,6 +82,7 @@ typedef struct jl_varbinding_t {
     int8_t constraintkind;
     int8_t intvalued; // intvalued: must be integer-valued; i.e. occurs as N in Vararg{_,N}
     int8_t limited;
+    int8_t intersected; // whether this variable has been intersected
     int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
@@ -84,6 +90,14 @@ typedef struct jl_varbinding_t {
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
+typedef struct jl_ivarbinding_t {
+    jl_tvar_t **var;
+    jl_value_t **lb;
+    jl_value_t **ub;
+    jl_varbinding_t *root;
+    struct jl_ivarbinding_t *next;
+} jl_ivarbinding_t;
+
 // subtype algorithm state
 typedef struct jl_stenv_t {
     // N.B.: varbindings are created on the stack and rooted there
@@ -121,37 +135,111 @@ static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED J
 }
 #endif
 
+// union-stack tools
+
 static int statestack_get(jl_unionstate_t *st, int i) JL_NOTSAFEPOINT
 {
-    assert(i >= 0 && i < sizeof(st->stack) * 8);
+    assert(i >= 0 && i <= 32767); // limited by the depth bit.
     // get the `i`th bit in an array of 32-bit words
-    return (st->stack[i>>5] & (1u<<(i&31))) != 0;
+    jl_bits_stack_t *stack = &st->stack;
+    while (i >= sizeof(stack->data) * 8) {
+        // We should have set this bit.
+        assert(stack->next);
+        stack = stack->next;
+        i -= sizeof(stack->data) * 8;
+    }
+    return (stack->data[i>>5] & (1u<<(i&31))) != 0;
 }
 
 static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
 {
-    assert(i >= 0 && i < sizeof(st->stack) * 8);
+    assert(i >= 0 && i <= 32767); // limited by the depth bit.
+    jl_bits_stack_t *stack = &st->stack;
+    while (i >= sizeof(stack->data) * 8) {
+        if (__unlikely(stack->next == NULL)) {
+            stack->next = (jl_bits_stack_t *)malloc(sizeof(jl_bits_stack_t));
+            stack->next->next = NULL;
+        }
+        stack = stack->next;
+        i -= sizeof(stack->data) * 8;
+    }
     if (val)
-        st->stack[i>>5] |= (1u<<(i&31));
+        stack->data[i>>5] |= (1u<<(i&31));
     else
-        st->stack[i>>5] &= ~(1u<<(i&31));
+        stack->data[i>>5] &= ~(1u<<(i&31));
+}
+
+#define has_next_union_state(e, R) ((((R) ? &(e)->Runions : &(e)->Lunions)->more) != 0)
+
+static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->more == 0)
+        return 0;
+    // reset `used` and let `pick_union_decision` clean the stack.
+    state->used = state->more;
+    statestack_set(state, state->used - 1, 1);
+    return 1;
 }
 
-#define push_unionstate(saved, src)                                     \
-    do {                                                                \
-        (saved)->depth = (src)->depth;                                  \
-        (saved)->more = (src)->more;                                    \
-        (saved)->used = (src)->used;                                    \
-        (saved)->stack = alloca(((src)->used+7)/8);                     \
-        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+static int pick_union_decision(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->depth >= state->used) {
+        statestack_set(state, state->used, 0);
+        state->used++;
+    }
+    int ui = statestack_get(state, state->depth);
+    state->depth++;
+    if (ui == 0)
+        state->more = state->depth; // memorize that this was the deepest available choice
+    return ui;
+}
+
+static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    do {
+        if (pick_union_decision(e, R))
+            u = ((jl_uniontype_t*)u)->b;
+        else
+            u = ((jl_uniontype_t*)u)->a;
+    } while (jl_is_uniontype(u));
+    return u;
+}
+
+#define push_unionstate(saved, src)                                  \
+    do {                                                             \
+        (saved)->depth = (src)->depth;                               \
+        (saved)->more = (src)->more;                                 \
+        (saved)->used = (src)->used;                                 \
+        jl_bits_stack_t *srcstack = &(src)->stack;                   \
+        int pushbits = ((saved)->used+7)/8;                          \
+        (saved)->stack = (uint8_t *)alloca(pushbits);                \
+        for (int n = 0; n < pushbits; n += sizeof(srcstack->data)) { \
+            assert(srcstack != NULL);                                \
+            int rest = pushbits - n;                                 \
+            if (rest > sizeof(srcstack->data))                       \
+                rest = sizeof(srcstack->data);                       \
+            memcpy(&(saved)->stack[n], &srcstack->data, rest);       \
+            srcstack = srcstack->next;                               \
+        }                                                            \
     } while (0);
 
-#define pop_unionstate(dst, saved)                                      \
-    do {                                                                \
-        (dst)->depth = (saved)->depth;                                  \
-        (dst)->more = (saved)->more;                                    \
-        (dst)->used = (saved)->used;                                    \
-        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+#define pop_unionstate(dst, saved)                                  \
+    do {                                                            \
+        (dst)->depth = (saved)->depth;                              \
+        (dst)->more = (saved)->more;                                \
+        (dst)->used = (saved)->used;                                \
+        jl_bits_stack_t *dststack = &(dst)->stack;                  \
+        int popbits = ((saved)->used+7)/8;                          \
+        for (int n = 0; n < popbits; n += sizeof(dststack->data)) { \
+            assert(dststack != NULL);                               \
+            int rest = popbits - n;                                 \
+            if (rest > sizeof(dststack->data))                      \
+                rest = sizeof(dststack->data);                      \
+            memcpy(&dststack->data, &(saved)->stack[n], rest);      \
+            dststack = dststack->next;                              \
+        }                                                           \
     } while (0);
 
 static int current_env_length(jl_stenv_t *e)
@@ -170,7 +258,7 @@ typedef struct {
     int rdepth;
     int8_t _space[24]; // == 8 * 3
     jl_gcframe_t gcframe;
-    jl_value_t *roots[24];
+    jl_value_t *roots[24]; // == 8 * 3
 } jl_savedenv_t;
 
 static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
@@ -197,9 +285,9 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             roots[i++] = v->ub;
             roots[i++] = (jl_value_t*)v->innervars;
         }
-        se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
+        se->buf[j++] = v->max_offset;
         v = v->prev;
     }
     assert(i == nroots); (void)nroots;
@@ -254,6 +342,18 @@ static void free_env(jl_savedenv_t *se) JL_NOTSAFEPOINT
     se->buf = NULL;
 }
 
+static void free_stenv(jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    for (int R = 0; R < 2; R++) {
+        jl_bits_stack_t *temp = R ? e->Runions.stack.next : e->Lunions.stack.next;
+        while (temp != NULL) {
+            jl_bits_stack_t *next = temp->next;
+            free(temp);
+            temp = next;
+        }
+    }
+}
+
 static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPOINT
 {
     jl_value_t **roots = NULL;
@@ -278,9 +378,9 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
             v->ub = roots[i++];
             v->innervars = (jl_array_t*)roots[i++];
         }
-        v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
+        v->max_offset = se->buf[j++];
         v = v->prev;
     }
     assert(i == nroots); (void)nroots;
@@ -289,15 +389,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
-static void clean_occurs(jl_stenv_t *e)
-{
-    jl_varbinding_t *v = e->vars;
-    while (v) {
-        v->occurs = 0;
-        v = v->prev;
-    }
-}
-
 #define flip_offset(e) ((e)->Loffset *= -1)
 
 // type utilities
@@ -586,42 +677,6 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
 
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
 
-static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
-    if (state->more == 0)
-        return 0;
-    // reset `used` and let `pick_union_decision` clean the stack.
-    state->used = state->more;
-    statestack_set(state, state->used - 1, 1);
-    return 1;
-}
-
-static int pick_union_decision(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
-    if (state->depth >= state->used) {
-        statestack_set(state, state->used, 0);
-        state->used++;
-    }
-    int ui = statestack_get(state, state->depth);
-    state->depth++;
-    if (ui == 0)
-        state->more = state->depth; // memorize that this was the deepest available choice
-    return ui;
-}
-
-static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
-{
-    do {
-        if (pick_union_decision(e, R))
-            u = ((jl_uniontype_t*)u)->b;
-        else
-            u = ((jl_uniontype_t*)u)->a;
-    } while (jl_is_uniontype(u));
-    return u;
-}
-
 static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow);
 
 // subtype for variable bounds consistency check. needs its own forall/exists environment.
@@ -666,8 +721,6 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
 // of determining whether the variable is concrete.
 static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param) JL_NOTSAFEPOINT
 {
-    if (vb != NULL)
-        vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
         if (param == 2 && e->invdepth > vb->depth0) {
@@ -677,6 +730,10 @@ static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param)
         else if (vb->occurs_cov < 2) {
             vb->occurs_cov++;
         }
+        // Always set `max_offset` to `-1` during the 1st round intersection.
+        // Would be recovered in `intersect_varargs`/`subtype_tuple_varargs` if needed.
+        if (!vb->intersected)
+            vb->max_offset = -1;
     }
 }
 
@@ -796,7 +853,7 @@ static int subtype_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int R, int pa
 // check that a type is concrete or quasi-concrete (Type{T}).
 // this is used to check concrete typevars:
 // issubtype is false if the lower bound of a concrete type var is not concrete.
-static int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
+int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
 {
     if (v == jl_bottom_type)
         return 1;
@@ -846,7 +903,7 @@ static jl_value_t *fix_inferred_var_bound(jl_tvar_t *var, jl_value_t *ty JL_MAYB
         JL_GC_PUSH2(&ans, &vs);
         vs = jl_find_free_typevars(ty);
         int i;
-        for (i = 0; i < jl_array_len(vs); i++) {
+        for (i = 0; i < jl_array_nrows(vs); i++) {
             ans = jl_type_unionall((jl_tvar_t*)jl_array_ptr_ref(vs, i), ans);
         }
         ans = (jl_value_t*)jl_new_typevar(var->name, jl_bottom_type, ans);
@@ -872,10 +929,20 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
     // in the environment, rename to get a fresh var.
     JL_GC_PUSH1(&u);
     while (btemp != NULL) {
-        if (btemp->var == u->var ||
-            // outer var can only refer to inner var if bounds changed
+        int aliased = btemp->var == u->var ||
+            // outer var can only refer to inner var if bounds changed (mainly for subtyping path)
             (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
-            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
+            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var));
+        if (!aliased && btemp->innervars != NULL) {
+            for (size_t i = 0; i < jl_array_len(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t*)jl_array_ptr_ref(btemp->innervars, i);
+                if (ivar == u->var) {
+                    aliased = 1;
+                    break;
+                }
+            }
+        }
+        if (aliased) {
             u = jl_rename_unionall(u);
             break;
         }
@@ -888,7 +955,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
@@ -941,8 +1008,8 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
             jl_value_t *vl = btemp->lb;
             // TODO: this takes a significant amount of time
             if (btemp->depth0 != vb.depth0 &&
-                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_invariant(vu, vb.var)) ||
-                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_invariant(vl, vb.var)))) {
+                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 0)) ||
+                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 0)))) {
                 ans = 0; break;
             }
             btemp = btemp->prev;
@@ -953,7 +1020,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
     if (R && ans && e->envidx < e->envsz) {
         jl_value_t *val;
         if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
-            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout
+            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0, 0); // special token result that represents N::Int in the envout
         else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
             val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
         else if (vb.lb == vb.ub)
@@ -1008,39 +1075,30 @@ static int subtype_tuple_varargs(
     jl_value_t *xp0 = jl_unwrap_vararg(vtx); jl_value_t *xp1 = jl_unwrap_vararg_num(vtx);
     jl_value_t *yp0 = jl_unwrap_vararg(vty); jl_value_t *yp1 = jl_unwrap_vararg_num(vty);
 
+    jl_varbinding_t *xlv = NULL, *ylv = NULL;
+    if (xp1 && jl_is_typevar(xp1))
+        xlv = lookup(e, (jl_tvar_t*)xp1);
+    if (yp1 && jl_is_typevar(yp1))
+        ylv = lookup(e, (jl_tvar_t*)yp1);
+
+    int8_t max_offsetx = xlv ? xlv->max_offset : 0;
+    int8_t max_offsety = ylv ? ylv->max_offset : 0;
+
+    jl_value_t *xl = xlv ? xlv->lb : xp1;
+    jl_value_t *yl = ylv ? ylv->lb : yp1;
+
     if (!xp1) {
-        jl_value_t *yl = yp1;
-        if (yl) {
-            // Unconstrained on the left, constrained on the right
-            if (jl_is_typevar(yl)) {
-                jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                if (ylv)
-                    yl = ylv->lb;
-            }
-            if (jl_is_long(yl)) {
-                return 0;
-            }
-        }
+        // Unconstrained on the left, constrained on the right
+        if (yl && jl_is_long(yl))
+            return 0;
     }
     else {
-        jl_value_t *xl = jl_unwrap_vararg_num(vtx);
-        if (jl_is_typevar(xl)) {
-            jl_varbinding_t *xlv = lookup(e, (jl_tvar_t*)xl);
-            if (xlv)
-                xl = xlv->lb;
-        }
         if (jl_is_long(xl)) {
             if (jl_unbox_long(xl) + 1 == vx) {
                 // LHS is exhausted. We're a subtype if the RHS is either
                 // exhausted as well or unbounded (in which case we need to
                 // set it to 0).
-                jl_value_t *yl = jl_unwrap_vararg_num(vty);
                 if (yl) {
-                    if (jl_is_typevar(yl)) {
-                        jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                        if (ylv)
-                            yl = ylv->lb;
-                    }
                     if (jl_is_long(yl)) {
                         return jl_unbox_long(yl) + 1 == vy;
                     }
@@ -1090,6 +1148,8 @@ static int subtype_tuple_varargs(
         // appropriately.
         e->invdepth++;
         int ans = subtype((jl_value_t*)jl_any_type, yp1, e, 2);
+        if (ylv && !ylv->intersected)
+            ylv->max_offset = max_offsety;
         e->invdepth--;
         return ans;
     }
@@ -1130,6 +1190,10 @@ static int subtype_tuple_varargs(
         e->Loffset = 0;
     }
     JL_GC_POP();
+    if (ylv && !ylv->intersected)
+        ylv->max_offset = max_offsety;
+    if (xlv && !xlv->intersected)
+        xlv->max_offset = max_offsetx;
     e->invdepth--;
     return ans;
 }
@@ -1291,6 +1355,9 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
     return ans;
 }
 
+static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e);
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -1298,7 +1365,31 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     if (jl_is_uniontype(x)) {
-        if (x == y) return 1;
+        if (obviously_egal(x, y))
+            return 1;
+        if (e->Runions.depth == 0 && jl_is_typevar(y) && !jl_has_free_typevars(x)) {
+            // Similar to fast path for repeated elements: if there have been no outer
+            // unions on the right, and the right side is a typevar, then we can handle the
+            // typevar first before picking a union element, under the theory that it may
+            // be easy to match or reject this whole union in comparing and setting the lb
+            // and ub of the variable binding, without needing to examine each element.
+            // However, if x contains any free typevars, then each element with a free
+            // typevar must be handled separately from the union of all elements without
+            // free typevars, since the typevars presence might lead to those elements
+            // getting eliminated (omit_bad_union) or degenerate (Union{Ptr{T}, Ptr}) or
+            // combined (Union{T, S} where {T, S <: T}).
+            jl_tvar_t *yvar = (jl_tvar_t *)y;
+            jl_varbinding_t *yb = lookup(e, yvar);
+            while (e->intersection && yb != NULL && yb->lb == yb->ub && jl_is_typevar(yb->lb)) {
+                yvar = (jl_tvar_t *)yb->lb;
+                yb = lookup(e, yvar);
+            }
+            // Note: `x <: ∃y` performs a local ∀-∃ check between `x` and `yb->ub`.
+            // We need to ensure that there's no ∃ typevar as otherwise that check
+            // might cause false alarm due to the accumulated env change.
+            if (yb == NULL || yb->right == 0 || !has_exists_typevar(yb->ub, e))
+                return subtype_var(yvar, x, e, 1, param);
+        }
         x = pick_union_element(x, e, 0);
     }
     if (jl_is_uniontype(y)) {
@@ -1341,7 +1432,8 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
                 if (yy) record_var_occurrence(yy, e, param);
                 if (yr) {
                     record_var_occurrence(xx, e, param);
-                    return subtype(xx->lb, yy->ub, e, 0);
+                    int trysub = e->intersection ? try_subtype_by_bounds(xx->lb, yy->ub, e) : 0;
+                    return trysub || subtype(xx->lb, yy->ub, e, 0);
                 }
                 return var_lt((jl_tvar_t*)x, y, e, param);
             }
@@ -1473,37 +1565,29 @@ static int is_definite_length_tuple_type(jl_value_t *x)
     return k == JL_VARARG_NONE || k == JL_VARARG_INT;
 }
 
-static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore);
-
-static int may_contain_union_decision(jl_value_t *x, jl_stenv_t *e, jl_typeenv_t *log) JL_NOTSAFEPOINT
+static int is_exists_typevar(jl_value_t *x, jl_stenv_t *e)
 {
-    if (x == NULL || x == (jl_value_t*)jl_any_type || x == jl_bottom_type)
+    if (!jl_is_typevar(x))
         return 0;
-    if (jl_is_unionall(x))
-        return may_contain_union_decision(((jl_unionall_t *)x)->body, e, log);
-    if (jl_is_datatype(x)) {
-        jl_datatype_t *xd = (jl_datatype_t *)x;
-        for (int i = 0; i < jl_nparams(xd); i++) {
-            jl_value_t *param = jl_tparam(xd, i);
-            if (jl_is_vararg(param))
-                param = jl_unwrap_vararg(param);
-            if (may_contain_union_decision(param, e, log))
-                return 1;
+    jl_varbinding_t *vb = lookup(e, (jl_tvar_t *)x);
+    return vb && vb->right;
+}
+
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    jl_typeenv_t *env = NULL;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        if (v->right) {
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = v->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
         }
-        return 0;
-    }
-    if (!jl_is_typevar(x))
-        return jl_is_type(x);
-    jl_typeenv_t *t = log;
-    while (t != NULL) {
-        if (x == (jl_value_t *)t->var)
-            return 1;
-        t = t->prev;
+        v = v->prev;
     }
-    jl_typeenv_t newlog = { (jl_tvar_t*)x, NULL, log };
-    jl_varbinding_t *xb = lookup(e, (jl_tvar_t *)x);
-    return may_contain_union_decision(xb ? xb->lb : ((jl_tvar_t *)x)->lb, e, &newlog) ||
-           may_contain_union_decision(xb ? xb->ub : ((jl_tvar_t *)x)->ub, e, &newlog);
+    return env != NULL && jl_has_bound_typevars(x, env);
 }
 
 static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow)
@@ -1517,25 +1601,9 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t
     int kindy = !jl_has_free_typevars(y);
     if (kindx && kindy)
         return jl_subtype(x, y);
-    if (may_contain_union_decision(y, e, NULL) && pick_union_decision(e, 1) == 0) {
-        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-        e->Lunions.used = e->Runions.used = 0;
-        e->Lunions.depth = e->Runions.depth = 0;
-        e->Lunions.more = e->Runions.more = 0;
-        int count = 0, noRmore = 0;
-        sub = _forall_exists_subtype(x, y, e, param, &count, &noRmore);
-        pop_unionstate(&e->Runions, &oldRunions);
-        // we should not try the slow path if `forall_exists_subtype` has tested all cases;
-        // Once limit_slow == 1, also skip it if
-        // 1) `forall_exists_subtype` return false
-        // 2) the left `Union` looks big
-        if (limit_slow == -1)
-            limit_slow = kindx || kindy;
-        if (noRmore || (limit_slow && (count > 3  || !sub)))
-            e->Runions.more = oldRmore;
-    }
-    else {
-        // slow path
+    int has_exists = (!kindx && has_exists_typevar(x, e)) ||
+                     (!kindy && has_exists_typevar(y, e));
+    if (has_exists && (is_exists_typevar(x, e) != is_exists_typevar(y, e))) {
         e->Lunions.used = 0;
         while (1) {
             e->Lunions.more = 0;
@@ -1544,10 +1612,90 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t
             if (!sub || !next_union_state(e, 0))
                 break;
         }
+        return sub;
     }
+    if (limit_slow == -1)
+        limit_slow = kindx || kindy;
+    jl_savedenv_t se;
+    save_env(e, &se, has_exists);
+    int count, limited = 0, ini_count = 0;
+    jl_saved_unionstate_t latestLunions = {0, 0, 0, NULL};
+    while (1) {
+        count = ini_count;
+        if (ini_count == 0)
+            e->Lunions.used = 0;
+        else
+            pop_unionstate(&e->Lunions, &latestLunions);
+        while (1) {
+            e->Lunions.more = 0;
+            e->Lunions.depth = 0;
+            if (count < 4) count++;
+            sub = subtype(x, y, e, param);
+            if (limit_slow && count == 4)
+                limited = 1;
+            if (!sub || !next_union_state(e, 0))
+                break;
+            if (limited || !has_exists || e->Runions.more == oldRmore) {
+                // re-save env and freeze the ∃decision for previous ∀Union
+                // Note: We could ignore the rest `∃Union` decisions if `x` and `y`
+                // contain no ∃ typevar, as they have no effect on env.
+                ini_count = count;
+                push_unionstate(&latestLunions, &e->Lunions);
+                re_save_env(e, &se, has_exists);
+                e->Runions.more = oldRmore;
+            }
+        }
+        if (sub || e->Runions.more == oldRmore)
+            break;
+        assert(e->Runions.more > oldRmore);
+        next_union_state(e, 1);
+        restore_env(e, &se, has_exists); // also restore Rdepth here
+        e->Runions.more = oldRmore;
+    }
+    if (!sub)
+        assert(e->Runions.more == oldRmore);
+    else if (limited || !has_exists)
+        e->Runions.more = oldRmore;
+    free_env(&se);
     return sub;
 }
 
+static int equal_var(jl_tvar_t *v, jl_value_t *x, jl_stenv_t *e)
+{
+    assert(e->Loffset == 0);
+    // Theoretically bounds change would be merged for union inputs.
+    // But intersection is not happy as splitting helps to avoid circular env.
+    assert(!e->intersection || !jl_is_uniontype(x));
+    jl_varbinding_t *vb = lookup(e, v);
+    if (e->intersection && vb != NULL && vb->lb == vb->ub && jl_is_typevar(vb->lb))
+        return equal_var((jl_tvar_t *)vb->lb, x, e);
+    record_var_occurrence(vb, e, 2);
+    if (vb == NULL)
+        return e->ignore_free || (
+            local_forall_exists_subtype(x, v->lb, e, 2, !jl_has_free_typevars(x)) &&
+            local_forall_exists_subtype(v->ub, x, e, 0, 0));
+    if (!vb->right)
+        return local_forall_exists_subtype(x, vb->lb, e, 2, !jl_has_free_typevars(x)) &&
+               local_forall_exists_subtype(vb->ub, x, e, 0, 0);
+    if (vb->lb == x)
+        return var_lt(v, x, e, 0);
+    if (!subtype_ccheck(x, vb->ub, e))
+        return 0;
+    jl_value_t *lb = simple_join(vb->lb, x);
+    JL_GC_PUSH1(&lb);
+    if (!e->intersection || !jl_is_typevar(lb) || !reachable_var(lb, v, e))
+        vb->lb = lb;
+    JL_GC_POP();
+    if (vb->ub == x)
+        return 1;
+    if (!subtype_ccheck(vb->lb, x, e))
+        return 0;
+    // skip `simple_meet` here as we have proven `x <: vb->ub`
+    if (!e->intersection || !reachable_var(x, v, e))
+        vb->ub = x;
+    return 1;
+}
+
 static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     if (obviously_egal(x, y)) return 1;
@@ -1578,6 +1726,12 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
+    if (e->Loffset == 0 && jl_is_typevar(y) && jl_is_type(x) && (!e->intersection || !jl_is_uniontype(x))) {
+        // Fastpath for Type == TypeVar.
+        // Avoid duplicated `<:` check between adjacent `var_gt` and `var_lt`
+        return equal_var((jl_tvar_t *)y, x, e);
+    }
+
     jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
 
     int sub = local_forall_exists_subtype(x, y, e, 2, -1);
@@ -1614,7 +1768,7 @@ static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_savede
     }
 }
 
-static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore)
+static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     // The depth recursion has the following shape, after simplification:
     // ∀₁
@@ -1626,12 +1780,8 @@ static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, i
 
     e->Lunions.used = 0;
     int sub;
-    if (count) *count = 0;
-    if (noRmore) *noRmore = 1;
     while (1) {
         sub = exists_subtype(x, y, e, &se, param);
-        if (count) *count = (*count < 4) ? *count + 1 : 4;
-        if (noRmore) *noRmore = *noRmore && e->Runions.more == 0;
         if (!sub || !next_union_state(e, 0))
             break;
         re_save_env(e, &se, 1);
@@ -1641,11 +1791,6 @@ static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, i
     return sub;
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
-{
-    return _forall_exists_subtype(x, y, e, param, NULL, NULL);
-}
-
 static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
 {
     e->vars = NULL;
@@ -1665,6 +1810,8 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
     e->Lunions.used = 0;       e->Runions.used = 0;
+    e->Lunions.stack.next = NULL;
+    e->Runions.stack.next = NULL;
 }
 
 // subtyping entry points
@@ -1991,7 +2138,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                     if (var_occurs_invariant(body, (jl_tvar_t*)b))
                         return 0;
                 }
-                if (nparams_expanded_x > npy && jl_is_typevar(b) && concrete_min(a1) > 1) {
+                if (nparams_expanded_x > npy && jl_is_typevar(b) && is_leaf_typevar((jl_tvar_t *)b) && concrete_min(a1) > 1) {
                     // diagonal rule for 2 or more elements: they must all be concrete on the LHS
                     *subtype = 0;
                     return 1;
@@ -2002,7 +2149,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                 }
                 for (; i < nparams_expanded_x; i++) {
                     jl_value_t *a = (vx != JL_VARARG_NONE && i >= npx - 1) ? vxt : jl_tparam(x, i);
-                    if (i > npy && jl_is_typevar(b)) { // i == npy implies a == a1
+                    if (i > npy && jl_is_typevar(b) && is_leaf_typevar((jl_tvar_t *)b)) { // i == npy implies a == a1
                         // diagonal rule: all the later parameters are also constrained to be type-equal to the first
                         jl_value_t *a2 = a;
                         jl_value_t *au = jl_unwrap_unionall(a);
@@ -2094,6 +2241,7 @@ JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env,
     }
     init_stenv(&e, env, envsz);
     int subtype = forall_exists_subtype(x, y, &e, 0);
+    free_stenv(&e);
     assert(obvious_subtype == 3 || obvious_subtype == subtype || jl_has_free_typevars(x) || jl_has_free_typevars(y));
 #ifndef NDEBUG
     if (obvious_subtype == 0 || (obvious_subtype == 1 && envsz == 0))
@@ -2186,6 +2334,7 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     {
         init_stenv(&e, NULL, 0);
         int subtype = forall_exists_subtype(a, b, &e, 0);
+        free_stenv(&e);
         assert(subtype_ab == 3 || subtype_ab == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
 #ifndef NDEBUG
         if (subtype_ab != 0 && subtype_ab != 1) // ensures that running in a debugger doesn't change the result
@@ -2202,6 +2351,7 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     {
         init_stenv(&e, NULL, 0);
         int subtype = forall_exists_subtype(b, a, &e, 0);
+        free_stenv(&e);
         assert(subtype_ba == 3 || subtype_ba == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
 #ifndef NDEBUG
         if (subtype_ba != 0 && subtype_ba != 1) // ensures that running in a debugger doesn't change the result
@@ -2356,28 +2506,56 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
         return y;
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
-    // band-aid for #46736
-    if (obviously_egal(x, y))
+    // band-aid for #46736 #56040
+    if (obviously_in_union(x, y))
+        return y;
+    if (obviously_in_union(y, x))
         return x;
 
+    jl_varbinding_t *vars = NULL;
+    jl_varbinding_t *bbprev = NULL;
+    jl_varbinding_t *xb = jl_is_typevar(x) ? lookup(e, (jl_tvar_t *)x) : NULL;
+    jl_varbinding_t *yb = jl_is_typevar(y) ? lookup(e, (jl_tvar_t *)y) : NULL;
+    int simple_x = !jl_has_free_typevars(!jl_is_typevar(x) ? x : xb ? xb->ub : ((jl_tvar_t *)x)->ub);
+    int simple_y = !jl_has_free_typevars(!jl_is_typevar(y) ? y : yb ? yb->ub : ((jl_tvar_t *)y)->ub);
+    if (simple_x && simple_y && !(xb && yb)) {
+        vars = e->vars;
+        e->vars = xb ? xb : yb;
+        if (e->vars != NULL) {
+            bbprev = e->vars->prev;
+            e->vars->prev = NULL;
+        }
+    }
     jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth;
     e->invdepth = depth;
     jl_value_t *res = intersect_all(x, y, e);
     e->invdepth = savedepth;
     pop_unionstate(&e->Runions, &oldRunions);
+    if (bbprev) e->vars->prev = bbprev;
+    if (vars) e->vars = vars;
     return res;
 }
 
 static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
+    // band-aid for #56040
+    if (!jl_is_uniontype(x) && obviously_in_union((jl_value_t *)u, x))
+        return x;
+    int no_free = !jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u);
+    if (param == 2 || no_free) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
+        jl_varbinding_t *vars = NULL;
+        if (no_free) {
+            vars = e->vars;
+            e->vars = NULL;
+        }
         jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
         pop_unionstate(&e->Runions, &oldRunions);
+        if (vars) e->vars = vars;
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2450,7 +2628,7 @@ static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_
 
 static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
 
-// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circulation constraints.
+// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circular constraints.
 static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
     if (jl_is_uniontype(a))
@@ -2459,22 +2637,21 @@ static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
     else if (jl_is_uniontype(b))
         return try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->a, e) ||
                try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->b, e);
-    else if (jl_egal(a, b))
+    else if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || obviously_egal(a, b))
         return 1;
     else if (!jl_is_typevar(b))
         return 0;
-    jl_varbinding_t *vb = e->vars;
-    while (vb != NULL) {
-        if (subtype_by_bounds(b, (jl_value_t *)vb->var, e) && obviously_in_union(a, vb->ub))
-            return 1;
-        vb = vb->prev;
-    }
-    return 0;
+    else if (jl_is_typevar(a) && subtype_by_bounds(a, b, e))
+        return 1;
+    // check if `Union{a, ...} <: b`.
+    jl_varbinding_t *vb = lookup(e, (jl_tvar_t *)b);
+    jl_value_t *blb = vb ? vb->lb : ((jl_tvar_t *)b)->lb;
+    return obviously_in_union(a, blb);
 }
 
 static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
-    if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || try_subtype_by_bounds(a, b, e))
+    if (try_subtype_by_bounds(a, b, e))
         return 1;
     jl_savedenv_t se;
     save_env(e, &se, 1);
@@ -2721,32 +2898,30 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
         jl_tvar_t *var = ((jl_unionall_t *)u)->var;
         jl_value_t *ub = var->ub, *body = ((jl_unionall_t *)u)->body;
         assert(var != t);
-        if (!jl_has_typevar(var->lb, t)) {
-            JL_GC_PUSH3(&ub, &body, &var);
-            body = omit_bad_union(body, t);
-            if (!jl_has_typevar(body, var)) {
-                res = body;
+        JL_GC_PUSH3(&ub, &body, &var);
+        body = omit_bad_union(body, t);
+        if (!jl_has_typevar(body, var)) {
+            res = body;
+        }
+        else if (jl_has_typevar(var->lb, t)) {
+            res = jl_bottom_type;
+        }
+        else {
+            ub = omit_bad_union(ub, t);
+            if (ub == jl_bottom_type && var->lb != ub) {
+                res = jl_bottom_type;
             }
-            else {
-                ub = omit_bad_union(ub, t);
-                if (ub == jl_bottom_type && var->lb != ub) {
+            else if (obviously_egal(var->lb, ub)) {
+                res = jl_substitute_var_nothrow(body, var, ub, 2);
+                if (res == NULL)
                     res = jl_bottom_type;
+            }
+            else {
+                if (ub != var->ub) {
+                    var = jl_new_typevar(var->name, var->lb, ub);
+                    body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
                 }
-                else if (obviously_egal(var->lb, ub)) {
-                    JL_TRY {
-                        res = jl_substitute_var(body, var, ub);
-                    }
-                    JL_CATCH {
-                        res = jl_bottom_type;
-                    }
-                }
-                else {
-                    if (ub != var->ub) {
-                        var = jl_new_typevar(var->name, var->lb, ub);
-                        body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
-                    }
-                    res = jl_new_struct(jl_unionall_type, var, body);
-                }
+                res = jl_new_struct(jl_unionall_type, var, body);
             }
         }
         JL_GC_POP();
@@ -2767,12 +2942,56 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
     return res;
 }
 
+// TODO: fuse with reachable_var?
+static int has_typevar_via_flatten_env(jl_value_t *x, jl_tvar_t *t, jl_ivarbinding_t *allvars, int8_t *checked) {
+    if (jl_is_unionall(x)) {
+        jl_tvar_t *var = ((jl_unionall_t *)x)->var;
+        if (has_typevar_via_flatten_env(var->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(var->ub, t, allvars, checked))
+            return 1;
+        return has_typevar_via_flatten_env(((jl_unionall_t *)x)->body, t, allvars, checked);
+    }
+    else if (jl_is_uniontype(x)) {
+        return has_typevar_via_flatten_env(((jl_uniontype_t *)x)->a, t, allvars, checked) ||
+            has_typevar_via_flatten_env(((jl_uniontype_t *)x)->b, t, allvars, checked);
+    }
+    else if (jl_is_vararg(x)) {
+        jl_vararg_t *v = (jl_vararg_t *)x;
+        return (v->T && has_typevar_via_flatten_env(v->T, t, allvars, checked)) ||
+            (v->N && has_typevar_via_flatten_env(v->N, t, allvars, checked));
+    }
+    else if (jl_is_datatype(x)) {
+        for (size_t i = 0; i < jl_nparams(x); i++) {
+            if (has_typevar_via_flatten_env(jl_tparam(x, i), t, allvars, checked))
+                return 1;
+        }
+        return 0;
+    }
+    else if (jl_is_typevar(x)) {
+        if (t == (jl_tvar_t *)x)
+            return 1;
+        size_t ind = 0;
+        jl_ivarbinding_t *itemp = allvars;
+        while (itemp && *itemp->var != (jl_tvar_t *)x)
+        {
+            ind++;
+            itemp = itemp->next;
+        }
+        if (itemp == NULL || checked[ind])
+            return 0;
+        checked[ind] = 1;
+        return has_typevar_via_flatten_env(*itemp->lb, t, allvars, checked) ||
+            has_typevar_via_flatten_env(*itemp->ub, t, allvars, checked);
+    }
+    return 0;
+}
+
 // Caller might not have rooted `res`
 static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
-    jl_value_t *varval = NULL;
-    jl_tvar_t *newvar = vb->var;
-    JL_GC_PUSH2(&res, &newvar);
+    jl_value_t *varval = NULL, *ilb = NULL, *iub = NULL, *nivar = NULL;
+    jl_tvar_t *newvar = vb->var, *ivar = NULL;
+    JL_GC_PUSH6(&res, &newvar, &ivar, &nivar, &ilb, &iub);
     // try to reduce var to a single value
     if (jl_is_long(vb->ub) && jl_is_typevar(vb->lb)) {
         varval = vb->ub;
@@ -2805,126 +3024,304 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
     if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
-    // remove/replace/rewrap free occurrences of this var in the environment
-    jl_varbinding_t *wrap = NULL;
+    // flatten all innervar into a (reversed) list
+    size_t icount = 0;
+    if (vb->innervars)
+        icount += jl_array_nrows(vb->innervars);
+    for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+        if (btemp->innervars != NULL)
+            icount += jl_array_nrows(btemp->innervars);
+    }
+    jl_svec_t *p = NULL;
+    jl_value_t **iparams;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, icount < 22 ? 3*icount : 1);
+    if (icount < 22) {
+        iparams = roots;
+    }
+    else {
+        p = jl_alloc_svec(3*icount);
+        roots[0] = (jl_value_t*)p;
+        iparams = jl_svec_data(p);
+    }
+    jl_ivarbinding_t *allvars = NULL;
+    size_t niparams = 0;
+    if (vb->innervars) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
+            jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(vb->innervars, i);
+            jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+            inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+            inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+            inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+            inew->root = vb;
+            inew->next = allvars;
+            allvars = inew;
+        }
+    }
     for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-        if (jl_has_typevar(btemp->lb, vb->var)) {
-            if (vb->lb == (jl_value_t*)btemp->var) {
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &btemp->var;
+        inew->lb = &btemp->lb;
+        inew->ub = &btemp->ub;
+        inew->root = btemp;
+        inew->next = allvars;
+        allvars = inew;
+        if (btemp->innervars) {
+            for (size_t i = 0; i < jl_array_nrows(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t *)jl_array_ptr_ref(btemp->innervars, i);
+                jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+                inew->var = (jl_tvar_t **)&iparams[niparams++]; *inew->var = ivar;
+                inew->lb = &iparams[niparams++]; *inew->lb = ivar->lb;
+                inew->ub = &iparams[niparams++]; *inew->ub = ivar->ub;
+                inew->root = btemp;
+                inew->next = allvars;
+                allvars = inew;
+            }
+        }
+    }
+
+    // remove/replace/rewrap free occurrences of this var in the environment
+    int wrapped = 0;
+    jl_ivarbinding_t *pwrap = NULL;
+    int vcount = icount + current_env_length(e);
+    int8_t *checked = (int8_t *)alloca(vcount);
+    for (jl_ivarbinding_t *btemp = allvars, *pbtemp = NULL; btemp != NULL; btemp = btemp->next) {
+        int bdepth0 = btemp->root->depth0;
+        int innerflag = 0;
+        ivar = *btemp->var;
+        ilb = *btemp->lb;
+        iub = *btemp->ub;
+        if (jl_has_typevar(ilb, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->lb == (jl_value_t*)ivar) {
+                JL_GC_POP();
                 JL_GC_POP();
                 return jl_bottom_type;
             }
             if (varval) {
                 JL_TRY {
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, varval);
+                    *btemp->lb = jl_substitute_var(ilb, vb->var, varval);
                 }
                 JL_CATCH {
                     res = jl_bottom_type;
                 }
             }
-            else if (btemp->lb == (jl_value_t*)vb->var) {
-                btemp->lb = vb->lb;
-            }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                // if our variable is T, and some outer variable has constraint S = Ref{T},
-                // move the `where T` outside `where S` instead of putting it here. issue #21243.
-                if (newvar != vb->var)
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else if (ilb == (jl_value_t*)vb->var) {
+                *btemp->lb = vb->lb;
             }
             else {
-                btemp->lb = jl_new_struct(jl_unionall_type, vb->var, btemp->lb);
+                innerflag |= 1;
             }
-            assert((jl_value_t*)btemp->var != btemp->lb);
         }
-        if (jl_has_typevar(btemp->ub, vb->var)) {
-            if (vb->ub == (jl_value_t*)btemp->var) {
-                btemp->ub = omit_bad_union(btemp->ub, vb->var);
-                if (btemp->ub == jl_bottom_type && btemp->ub != btemp->lb) {
+        if (jl_has_typevar(iub, vb->var)) {
+            assert(btemp->root->var == ivar || bdepth0 == vb->depth0);
+            if (vb->ub == (jl_value_t*)ivar) {
+                *btemp->ub = omit_bad_union(iub, vb->var);
+                if (*btemp->ub == jl_bottom_type && *btemp->ub != *btemp->lb) {
+                    JL_GC_POP();
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
             }
             if (varval) {
-                JL_TRY {
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, varval);
-                }
-                JL_CATCH {
+                iub = jl_substitute_var_nothrow(iub, vb->var, varval, 2);
+                if (iub == NULL)
                     res = jl_bottom_type;
-                }
+                else
+                    *btemp->ub = iub;
             }
-            else if (btemp->ub == (jl_value_t*)vb->var) {
+            else if (iub == (jl_value_t*)vb->var) {
                 // TODO: this loses some constraints, such as in this test, where we replace T4<:S3 (e.g. T4==S3 since T4 only appears covariantly once) with T4<:Any
                 // a = Tuple{Float64,T3,T4} where T4 where T3
                 // b = Tuple{S2,Tuple{S3},S3} where S2 where S3
                 // Tuple{Float64, T3, T4} where {S3, T3<:Tuple{S3}, T4<:S3}
-                btemp->ub = vb->ub;
+                *btemp->ub = vb->ub;
             }
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
-                if (newvar != vb->var)
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
-                wrap = btemp;
+            else {
+                innerflag |= 2;
             }
-            else
-                btemp->ub = jl_new_struct(jl_unionall_type, vb->var, btemp->ub);
-            assert((jl_value_t*)btemp->var != btemp->ub);
+            if (innerflag) {
+                memset(checked, 0, vcount);
+                if (bdepth0 != vb->depth0 ||
+                    has_typevar_via_flatten_env(vb->lb, ivar, allvars, checked) ||
+                    has_typevar_via_flatten_env(vb->ub, ivar, allvars, checked)) {
+                    if (innerflag & 1)
+                        *btemp->lb = jl_new_struct(jl_unionall_type, vb->var, ilb);
+                    if (innerflag & 2)
+                        *btemp->ub = jl_new_struct(jl_unionall_type, vb->var, iub);
+                }
+                else {
+                    assert(btemp->root != vb);
+                    // if our variable is T, and some outer variable has constraint S = Ref{T},
+                    // move the `where T` outside `where S` instead of putting it here. issue #21243.
+                    if (newvar != vb->var) {
+                        if (innerflag & 1)
+                            *btemp->lb = jl_substitute_var(ilb, vb->var, (jl_value_t*)newvar);
+                        if (innerflag & 2)
+                            *btemp->ub = jl_substitute_var(iub, vb->var, (jl_value_t*)newvar);
+                    }
+                    if (!wrapped)
+                        pwrap = pbtemp;
+                    wrapped = 1;
+                }
+            }
+            assert((jl_value_t*)ivar != *btemp->lb);
+            assert((jl_value_t*)ivar != *btemp->ub);
+        }
+        pbtemp = btemp;
+    }
+
+    // Insert the newvar into the (reversed) var list if needed.
+    if (wrapped) {
+        jl_ivarbinding_t *wrap = pwrap == NULL ? allvars : pwrap->next;
+        jl_ivarbinding_t *inew = (jl_ivarbinding_t *)alloca(sizeof(jl_ivarbinding_t));
+        inew->var = &newvar;
+        inew->lb = &newvar->lb;
+        inew->ub = &newvar->ub;;
+        inew->root = wrap->root;
+        inew->next = wrap;
+        if (pwrap != NULL)
+            pwrap->next = inew;
+        else
+            allvars = inew;
+        vcount++;
+    }
+
+    // Re-sort the innervar inside the (reversed) var list.
+    // `jl_has_typevar` is used as the partial-ordering predicate.
+    // If this is slow, we could possibly switch to a simpler graph sort, such as Tarjan's SCC.
+    if (icount > 0) {
+        jl_ivarbinding_t *pib1 = NULL;
+#ifndef NDEBUG
+        size_t sort_count = 0;
+#endif
+        while (1) {
+            jl_ivarbinding_t *ib1 = pib1 == NULL ? allvars : pib1->next;
+            if (ib1 == NULL) break;
+            assert((++sort_count) <= (vcount * (vcount + 1)) >> 1);
+            int lbfree = jl_has_free_typevars(*ib1->lb);
+            int ubfree = jl_has_free_typevars(*ib1->ub);
+            if (lbfree || ubfree) {
+                int changed = 0;
+                jl_ivarbinding_t *pib2 = ib1, *ib2 = ib1->next;
+                while (ib2 != NULL) {
+                    int isinnervar = ib2->root->var != *ib2->var;
+                    if (isinnervar && ib1->root->depth0 == ib2->root->depth0 &&
+                        ((lbfree && jl_has_typevar(*ib1->lb, *ib2->var)) ||
+                         (ubfree && jl_has_typevar(*ib1->ub, *ib2->var)))) {
+                        pib2->next = ib2->next;
+                        ib2->next = ib1;
+                        ib2->root = ib1->root;
+                        if (pib1)
+                            pib1->next = ib2;
+                        else
+                            allvars = ib2;
+                        changed = 1;
+                        break;
+                    }
+                    pib2 = ib2;
+                    ib2 = ib2->next;
+                }
+                if (changed) continue;
+            }
+            pib1 = ib1;
+        }
+    }
+
+    // Freeze the innervars' lb/ub and perform substitution if needed.
+    for (jl_ivarbinding_t *btemp1 = allvars; btemp1 != NULL; btemp1 = btemp1->next) {
+        ivar = *btemp1->var;
+        ilb = *btemp1->lb;
+        iub = *btemp1->ub;
+        int isinnervar = btemp1->root->var != ivar;
+        if (isinnervar && (ivar->lb != ilb || ivar->ub != iub)) {
+            nivar = (jl_value_t *)jl_new_typevar(ivar->name, ilb, iub);
+            if (jl_has_typevar(res, ivar))
+                res = jl_substitute_var(res, ivar, nivar);
+            for (jl_ivarbinding_t *btemp2 = btemp1->next; btemp2 != NULL; btemp2 = btemp2->next) {
+                ilb = *btemp2->lb;
+                iub = *btemp2->ub;
+                if (jl_has_typevar(ilb, ivar))
+                    *btemp2->lb = jl_substitute_var(ilb, ivar, nivar);
+                if (jl_has_typevar(iub, ivar))
+                    *btemp2->ub = jl_substitute_var(iub, ivar, nivar);
+            }
+            if (!wrapped && !varval) {
+                // newvar also needs bounds substitution.
+                if (jl_has_typevar(vb->lb, ivar))
+                    vb->lb = jl_substitute_var(vb->lb, ivar, nivar);
+                if (jl_has_typevar(vb->ub, ivar))
+                    vb->ub = jl_substitute_var(vb->ub, ivar, nivar);
+            }
+            *btemp1->var = (jl_tvar_t *)nivar;
         }
     }
 
-    if (wrap) {
-        // We only assign the newvar with the outmost var.
-        // This make sure we never create a UnionAll with 2 identical vars.
-        if (wrap->innervars == NULL)
-            wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-        jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)newvar);
-        // TODO: should we move all the innervars here too?
+    // Switch back the innervars' storage.
+    while (1) {
+        jl_ivarbinding_t *btemp = allvars;
+        jl_varbinding_t *root = btemp ? btemp->root : vb;
+        size_t icount = 0;
+        while (btemp && btemp->root == root) {
+            btemp = btemp->next;
+            icount++;
+        }
+        if (root != vb) icount--;
+        if (root->innervars != NULL) {
+            jl_array_t *rinnervars = root->innervars;
+            JL_GC_PROMISE_ROOTED(rinnervars);
+            size_t len = jl_array_nrows(rinnervars);
+            if (icount > len)
+                jl_array_grow_end(rinnervars, icount - len);
+            if (icount < len)
+                jl_array_del_end(rinnervars, len - icount);
+        }
+        else if (icount > 0) {
+            root->innervars = jl_alloc_array_1d(jl_array_any_type, icount);
+        }
+        btemp = allvars;
+        for (size_t i = icount; i > 0; i--) {
+            jl_array_ptr_set(root->innervars, i - 1, (jl_value_t*)*btemp->var);
+            btemp = btemp->next;
+        }
+        if (root == vb) break;
+        assert(*btemp->var == root->var);
+        allvars = btemp->next;
+        assert(allvars == NULL || allvars->root != root);
     }
+    JL_GC_POP();
 
     // if `v` still occurs, re-wrap body in `UnionAll v` or eliminate the UnionAll
     if (jl_has_typevar(res, vb->var)) {
         if (varval) {
-            JL_TRY {
-                // you can construct `T{x} where x` even if T's parameter is actually
-                // limited. in that case we might get an invalid instantiation here.
-                res = jl_substitute_var(res, vb->var, varval);
-                // simplify chains of UnionAlls where bounds become equal
-                while (jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
-                                                             ((jl_unionall_t*)res)->var->ub))
-                    res = jl_instantiate_unionall((jl_unionall_t*)res, ((jl_unionall_t*)res)->var->lb);
+            // you can construct `T{x} where x` even if T's parameter is actually
+            // limited. in that case we might get an invalid instantiation here.
+            res = jl_substitute_var_nothrow(res, vb->var, varval, 2);
+            // simplify chains of UnionAlls where bounds become equal
+            while (res != NULL && jl_is_unionall(res) && obviously_egal(((jl_unionall_t*)res)->var->lb,
+                                                         ((jl_unionall_t*)res)->var->ub)) {
+                jl_unionall_t * ures = (jl_unionall_t *)res;
+                res = jl_substitute_var_nothrow(ures->body, ures->var, ures->var->lb, 2);
             }
-            JL_CATCH {
+            if (res == NULL)
                 res = jl_bottom_type;
-            }
         }
         else {
+            // re-fresh newvar if bounds changed.
+            if (vb->lb != newvar->lb || vb->ub != newvar->ub)
+                newvar = jl_new_typevar(newvar->name, vb->lb, vb->ub);
             if (newvar != vb->var)
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
-            if (!wrap)
+            if (!wrapped)
                 res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
 
     if (vb->innervars != NULL) {
-        for (size_t i = 0; i < jl_array_len(vb->innervars); i++) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
-            // the `btemp->prev` walk is only giving a sort of post-order guarantee (since we are
-            // iterating 2 trees at once), so once we set `wrap`, there might remain other branches
-            // of the type walk that now still may have incomplete bounds: finish those now too
-            jl_varbinding_t *wrap = NULL;
-            for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
-                if (btemp->depth0 == vb->depth0 && (jl_has_typevar(btemp->lb, var) || jl_has_typevar(btemp->ub, var))) {
-                    wrap = btemp;
-                }
-            }
-            if (wrap) {
-                if (wrap->innervars == NULL)
-                    wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-                jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)var);
-            }
-            else if (res != jl_bottom_type) {
-                if (jl_has_typevar(res, var))
-                    res = jl_type_unionall((jl_tvar_t*)var, res);
-            }
+            res = jl_type_unionall(var, res);
         }
     }
 
@@ -2943,9 +3340,6 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
 static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param, jl_varbinding_t *vb)
 {
     jl_varbinding_t *btemp = e->vars;
-    // if the var for this unionall (based on identity) already appears somewhere
-    // in the environment, rename to get a fresh var.
-    // TODO: might need to look inside types in btemp->lb and btemp->ub
     int envsize = 0;
     while (btemp != NULL) {
         envsize++;
@@ -2953,13 +3347,9 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
             vb->limited = 1;
             return t;
         }
-        if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
-            btemp->ub == (jl_value_t*)u->var) {
-            u = jl_rename_unionall(u);
-            break;
-        }
         btemp = btemp->prev;
     }
+    u = unalias_unionall(u, e);
     JL_GC_PUSH1(&u);
     vb->var = u->var;
     e->vars = vb;
@@ -3050,7 +3440,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res = NULL;
     jl_savedenv_t se;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
     save_env(e, &se, 1);
@@ -3058,6 +3448,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
     if (is_leaf_typevar(u->var) && noinv && always_occurs_cov(u->body, u->var, param))
         vb.constraintkind = 1;
     res = intersect_unionall_(t, u, e, R, param, &vb);
+    vb.intersected = 1;
     if (vb.limited) {
         // if the environment got too big, avoid tree recursion and propagate the flag
         if (e->vars)
@@ -3078,7 +3469,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
                 vb.ub = vb.var->ub;
             }
             restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+            vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -3125,7 +3516,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
             ii = (jl_value_t*)vmy;
         else {
             JL_GC_PUSH1(&ii);
-            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1, 0);
             JL_GC_POP();
         }
         return ii;
@@ -3134,10 +3525,12 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
     assert(e->Loffset == 0);
     e->Loffset = offset;
     jl_varbinding_t *xb = NULL, *yb = NULL;
+    int8_t max_offsetx = 0, max_offsety = 0;
     if (xp2) {
         assert(jl_is_typevar(xp2));
         xb = lookup(e, (jl_tvar_t*)xp2);
         if (xb) xb->intvalued = 1;
+        if (xb) max_offsetx = xb->max_offset;
         if (!yp2)
             i2 = bound_var_below((jl_tvar_t*)xp2, xb, e, 0);
     }
@@ -3145,6 +3538,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
         assert(jl_is_typevar(yp2));
         yb = lookup(e, (jl_tvar_t*)yp2);
         if (yb) yb->intvalued = 1;
+        if (yb) max_offsety = yb->max_offset;
         if (!xp2)
             i2 = bound_var_below((jl_tvar_t*)yp2, yb, e, 1);
     }
@@ -3159,14 +3553,27 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
     }
     assert(e->Loffset == offset);
     e->Loffset = 0;
-    if (i2 == jl_bottom_type)
+    if (i2 == jl_bottom_type) {
         ii = (jl_value_t*)jl_bottom_type;
-    else if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
-        ii = (jl_value_t*)vmx;
-    else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
-        ii = (jl_value_t*)vmy;
-    else
-        ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1);
+    }
+    else {
+        if (xb && !xb->intersected) {
+            xb->max_offset = max_offsetx;
+            if (offset > xb->max_offset && xb->max_offset >= 0)
+                xb->max_offset = offset > 32 ? 32 : offset;
+        }
+        if (yb && !yb->intersected) {
+            yb->max_offset = max_offsety;
+            if (-offset > yb->max_offset && yb->max_offset >= 0)
+                yb->max_offset = -offset > 32 ? 32 : -offset;
+        }
+        if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
+            ii = (jl_value_t*)vmx;
+        else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
+            ii = (jl_value_t*)vmy;
+        else
+            ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1, 0);
+    }
     JL_GC_POP();
     return ii;
 }
@@ -3185,6 +3592,24 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
         llx += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(xd, lx-1))) - 1;
     if (vvy == JL_VARARG_INT)
         lly += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(yd, ly-1))) - 1;
+    if (vvx == JL_VARARG_BOUND && (vvy == JL_VARARG_BOUND || vvy == JL_VARARG_UNBOUND)) {
+        jl_value_t *xlen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(xd, lx-1));
+        assert(xlen && jl_is_typevar(xlen));
+        jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xlen);
+        if (xb && xb->intersected && xb->max_offset > 0) {
+            assert(xb->max_offset <= 32);
+            llx += xb->max_offset;
+        }
+    }
+    if (vvy == JL_VARARG_BOUND && (vvx == JL_VARARG_BOUND || vvx == JL_VARARG_UNBOUND)) {
+        jl_value_t *ylen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(yd, ly-1));
+        assert(ylen && jl_is_typevar(ylen));
+        jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)ylen);
+        if (yb && yb->intersected && yb->max_offset > 0) {
+            assert(yb->max_offset <= 32);
+            lly += yb->max_offset;
+        }
+    }
 
     if ((vvx == JL_VARARG_NONE || vvx == JL_VARARG_INT) &&
         (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT)) {
@@ -3217,8 +3642,8 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
             assert(i == j && i == np);
             break;
         }
-        if (xi && jl_is_vararg(xi)) vx = vvx != JL_VARARG_INT;
-        if (yi && jl_is_vararg(yi)) vy = vvy != JL_VARARG_INT;
+        if (xi && jl_is_vararg(xi)) vx = vvx == JL_VARARG_UNBOUND || (vvx == JL_VARARG_BOUND && i == llx - 1);
+        if (yi && jl_is_vararg(yi)) vy = vvy == JL_VARARG_UNBOUND || (vvy == JL_VARARG_BOUND && j == lly - 1);
         if (xi == NULL || yi == NULL) {
             if (vx && intersect_vararg_length(xi, lly+1-llx, e, 0)) {
                 np = j;
@@ -3309,7 +3734,7 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
         else if (isy)
             res = (jl_value_t*)yd;
         else if (p)
-            res = jl_apply_tuple_type(p);
+            res = jl_apply_tuple_type(p, 1);
         else
             res = jl_apply_tuple_type_v(params, np);
     }
@@ -3553,7 +3978,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 if (xlb == xub && ylb == yub &&
                     jl_has_typevar(xlb, (jl_tvar_t *)y) &&
                     jl_has_typevar(ylb, (jl_tvar_t *)x)) {
-                    // specical case for e.g.
+                    // special case for e.g.
                     // 1) Val{Y}<:X<:Val{Y} && Val{X}<:Y<:Val{X}
                     // 2) Y<:X<:Y && Val{X}<:Y<:Val{X} => Val{Y}<:Y<:Val{Y}
                     ccheck = 0;
@@ -3745,73 +4170,12 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
-static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int count)
 {
-    if (count == 0)
-        alloc_env(e, se, 1);
-    jl_value_t **roots = NULL;
-    int nroots = 0;
-    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
-        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
-        assert(jl_is_svec(sv));
-        roots = jl_svec_data(sv);
-        nroots = jl_svec_len(sv);
-    }
-    else {
-        roots = se->roots;
-        nroots = se->gcframe.nroots >> 2;
-    }
-    int n = 0;
-    jl_varbinding_t *v = e->vars;
-    v = e->vars;
-    while (v != NULL) {
-        if (count == 0) {
-            // need to initialize this
-            se->buf[n] = 0;
-            se->buf[n+1] = 0;
-            se->buf[n+2] = 0;
-        }
-        if (v->occurs) {
-            // only merge lb/ub/innervars if this var occurs.
-            jl_value_t *b1, *b2;
-            b1 = roots[n];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->lb;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
-            b1 = roots[n+1];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->ub;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
-            b1 = roots[n+2];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = (jl_value_t*)v->innervars;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            if (b2 && b1 != b2) {
-                if (b1)
-                    jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
-                else
-                    roots[n+2] = b2;
-            }
-            // record the meeted vars.
-            se->buf[n] = 1;
-        }
-        // always merge occurs_inv/cov by max (never decrease)
-        if (v->occurs_inv > se->buf[n+1])
-            se->buf[n+1] = v->occurs_inv;
-        if (v->occurs_cov > se->buf[n+2])
-            se->buf[n+2] = v->occurs_cov;
-        n = n + 3;
-        v = v->prev;
+    if (count == 0) {
+        save_env(e, me, 1);
+        return 1;
     }
-    assert(n == nroots); (void)nroots;
-    return count + 1;
-}
-
-// merge untouched vars' info.
-static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
-{
     jl_value_t **merged = NULL;
     jl_value_t **saved = NULL;
     int nroots = 0;
@@ -3833,47 +4197,49 @@ static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
     }
     assert(nroots == current_env_length(e) * 3);
     assert(nroots % 3 == 0);
-    for (int n = 0; n < nroots; n = n + 3) {
-        if (merged[n] == NULL)
-            merged[n] = saved[n];
-        if (merged[n+1] == NULL)
-            merged[n+1] = saved[n+1];
-        jl_value_t *b1, *b2;
+    int m = 0, n = 0;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        jl_value_t *b0, *b1, *b2;
+        // merge `lb`
+        b0 = saved[n];
+        b1 = merged[n];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->lb;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n] = (b1 == b0 || b2 == b0) ? b0 : simple_meet(b1, b2, 0);
+        // merge `ub`
+        b0 = saved[n+1];
+        b1 = merged[n+1];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->ub;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n+1] = (b1 == b0 || b2 == b0) ? b0 : simple_join(b1, b2);
+        // merge `innervars`
         b1 = merged[n+2];
         JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-        b2 = saved[n+2];
-        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
+        b2 = (jl_value_t*)v->innervars;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
         if (b2 && b1 != b2) {
             if (b1)
                 jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
             else
                 merged[n+2] = b2;
         }
-        me->buf[n] |= se->buf[n];
-    }
-}
-
-static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
-{
-    jl_varbinding_t *v = e->vars;
-    // Here we pull in some typevar missed in fastpath.
-    while (v != NULL) {
-        v->occurs = v->occurs || jl_has_typevar(res, v->var);
-        assert(v->occurs == 0 || v->occurs == 1);
-        v = v->prev;
-    }
-    v = e->vars;
-    while (v != NULL) {
-        if (v->occurs == 1) {
-            jl_varbinding_t *v2 = e->vars;
-            while (v2 != NULL) {
-                if (v2 != v && v2->occurs == 0)
-                    v2->occurs = -(jl_has_typevar(v->lb, v2->var) || jl_has_typevar(v->ub, v2->var));
-                v2 = v2->prev;
-            }
-        }
+        // merge occurs_inv/cov by max (never decrease)
+        if (v->occurs_inv > me->buf[m])
+            me->buf[m] = v->occurs_inv;
+        if (v->occurs_cov > me->buf[m+1])
+            me->buf[m+1] = v->occurs_cov;
+        // merge max_offset by min
+        if (!v->intersected && v->max_offset < me->buf[m+2])
+            me->buf[m+2] = v->max_offset;
+        m = m + 3;
+        n = n + 3;
         v = v->prev;
     }
+    assert(n == nroots); (void)nroots;
+    return count + 1;
 }
 
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
@@ -3886,26 +4252,31 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     jl_savedenv_t se, me;
     save_env(e, &se, 1);
     int niter = 0, total_iter = 0;
-    clean_occurs(e);
     is[0] = intersect(x, y, e, 0); // root
-    if (is[0] != jl_bottom_type) {
-        expand_local_env(e, is[0]);
-        niter = merge_env(e, &me, niter);
+    if (is[0] == jl_bottom_type) {
+        restore_env(e, &se, 1);
+    }
+    else if (!e->emptiness_only && has_next_union_state(e, 1)) {
+        niter = merge_env(e, &me, &se, niter);
+        restore_env(e, &se, 1);
     }
-    restore_env(e, &se, 1);
     while (next_union_state(e, 1)) {
         if (e->emptiness_only && is[0] != jl_bottom_type)
             break;
         e->Runions.depth = 0;
         e->Runions.more = 0;
 
-        clean_occurs(e);
         is[1] = intersect(x, y, e, 0);
-        if (is[1] != jl_bottom_type) {
-            expand_local_env(e, is[1]);
-            niter = merge_env(e, &me, niter);
+        if (is[1] == jl_bottom_type) {
+            restore_env(e, &se, 1);
+        }
+        else if (niter > 0 || (!e->emptiness_only && has_next_union_state(e, 1))) {
+            niter = merge_env(e, &me, &se, niter);
+            restore_env(e, &se, 1);
+        }
+        else {
+            assert(is[0] == jl_bottom_type);
         }
-        restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
             is[0] = is[1];
         else if (is[1] != jl_bottom_type) {
@@ -3913,13 +4284,18 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
             is[0] = jl_type_union(is, 2);
         }
         total_iter++;
-        if (niter > 4 || total_iter > 400000) {
+        if (has_next_union_state(e, 1) && (niter > 4 || total_iter > 400000)) {
             is[0] = y;
+            // we give up precise intersection here, just restore the saved env
+            restore_env(e, &se, 1);
+            if (niter > 0) {
+                free_env(&me);
+                niter = 0;
+            }
             break;
         }
     }
     if (niter) {
-        final_merge_env(e, &me, &se);
         restore_env(e, &me, 1);
         free_env(&me);
     }
@@ -3946,7 +4322,9 @@ static jl_value_t *intersect_types(jl_value_t *x, jl_value_t *y, int emptiness_o
     init_stenv(&e, NULL, 0);
     e.intersection = e.ignore_free = 1;
     e.emptiness_only = emptiness_only;
-    return intersect_all(x, y, &e);
+    jl_value_t *ans = intersect_all(x, y, &e);
+    free_stenv(&e);
+    return ans;
 }
 
 JL_DLLEXPORT jl_value_t *jl_intersect_types(jl_value_t *x, jl_value_t *y)
@@ -4046,7 +4424,7 @@ static jl_value_t *switch_union_tuple(jl_value_t *a, jl_value_t *b)
         ts[1] = jl_tparam(b, i);
         jl_svecset(vec, i, jl_type_union(ts, 2));
     }
-    jl_value_t *ans = jl_apply_tuple_type(vec);
+    jl_value_t *ans = jl_apply_tuple_type(vec, 1);
     JL_GC_POP();
     return ans;
 }
@@ -4123,6 +4501,7 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
             memset(env, 0, szb*sizeof(void*));
         e.envsz = szb;
         *ans = intersect_all(a, b, &e);
+        free_stenv(&e);
         if (*ans == jl_bottom_type) goto bot;
         // TODO: code dealing with method signatures is not able to handle unions, so if
         // `a` and `b` are both tuples, we need to be careful and may not return a union,
@@ -4220,8 +4599,263 @@ int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv)
     return sub;
 }
 
+// type utils
+static void check_diagonal(jl_value_t *t, jl_varbinding_t *troot, int param)
+{
+    if (jl_is_uniontype(t)) {
+        int i, len = 0;
+        jl_varbinding_t *v;
+        for (v = troot; v != NULL; v = v->prev)
+            len++;
+        int8_t *occurs = (int8_t *)alloca(len);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++)
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+        check_diagonal(((jl_uniontype_t *)t)->a, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            int8_t occurs_inv = occurs[i] & 3;
+            int8_t occurs_cov = occurs[i] >> 2;
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+            v->occurs_inv = occurs_inv;
+            v->occurs_cov = occurs_cov;
+        }
+        check_diagonal(((jl_uniontype_t *)t)->b, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            if (v->occurs_inv < (occurs[i] & 3))
+                v->occurs_inv = occurs[i] & 3;
+            if (v->occurs_cov < (occurs[i] >> 2))
+                v->occurs_cov = occurs[i] >> 2;
+        }
+    }
+    else if (jl_is_unionall(t)) {
+        assert(troot != NULL);
+        jl_varbinding_t *v1 = troot, *v2 = troot->prev;
+        while (v2 != NULL) {
+            if (v2->var == ((jl_unionall_t *)t)->var) {
+                v1->prev = v2->prev;
+                break;
+            }
+            v1 = v2;
+            v2 = v2->prev;
+        }
+        check_diagonal(((jl_unionall_t *)t)->body, troot, param);
+        v1->prev = v2;
+    }
+    else if (jl_is_datatype(t)) {
+        int nparam = jl_is_tuple_type(t) ? 1 : 2;
+        if (nparam < param) nparam = param;
+        for (size_t i = 0; i < jl_nparams(t); i++) {
+            check_diagonal(jl_tparam(t, i), troot, nparam);
+        }
+    }
+    else if (jl_is_vararg(t)) {
+        jl_value_t *T = jl_unwrap_vararg(t);
+        jl_value_t *N = jl_unwrap_vararg_num(t);
+        int n = (N && jl_is_long(N)) ? jl_unbox_long(N) : 2;
+        if (T && n > 0) check_diagonal(T, troot, param);
+        if (T && n > 1) check_diagonal(T, troot, param);
+        if (N)          check_diagonal(N, troot, 2);
+    }
+    else if (jl_is_typevar(t)) {
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == (jl_tvar_t *)t) {
+                if (param == 1 && v->occurs_cov < 2) v->occurs_cov++;
+                if (param == 2 && v->occurs_inv < 2) v->occurs_inv++;
+                break;
+            }
+        }
+        if (v == NULL)
+            check_diagonal(((jl_tvar_t *)t)->ub, troot, 0);
+    }
+}
+
+static jl_value_t *insert_nondiagonal(jl_value_t *type, jl_varbinding_t *troot, int widen2ub)
+{
+    if (jl_is_typevar(type)) {
+        int concretekind = widen2ub > 1 ? 0 : 1;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->occurs_inv == 0 &&
+                v->occurs_cov > concretekind &&
+                v->var == (jl_tvar_t *)type)
+                break;
+        }
+        if (v != NULL) {
+            if (widen2ub) {
+                type = insert_nondiagonal(((jl_tvar_t *)type)->ub, troot, 2);
+            }
+            else {
+                // we must replace each covariant occurrence of newvar with a different newvar2<:newvar (diagonal rule)
+                if (v->innervars == NULL)
+                    v->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+                jl_value_t *newvar = NULL, *lb = v->var->lb, *ub = (jl_value_t *)v->var;
+                jl_array_t *innervars = v->innervars;
+                JL_GC_PUSH4(&newvar, &lb, &ub, &innervars);
+                newvar = (jl_value_t *)jl_new_typevar(v->var->name, lb, ub);
+                jl_array_ptr_1d_push(innervars, newvar);
+                JL_GC_POP();
+                type = newvar;
+            }
+        }
+    }
+    else if (jl_is_unionall(type)) {
+        jl_value_t *body = ((jl_unionall_t*)type)->body;
+        jl_tvar_t *var = ((jl_unionall_t*)type)->var;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == var)
+                break;
+        }
+        if (v) v->var = NULL; // Temporarily remove `type->var` from binding list.
+        jl_value_t *newbody = insert_nondiagonal(body, troot, widen2ub);
+        if (v) v->var = var; // And restore it after inner insertation.
+        jl_value_t *newvar = NULL;
+        JL_GC_PUSH3(&newbody, &newvar, &type);
+        if (body == newbody || jl_has_typevar(newbody, var)) {
+            if (body != newbody)
+                type = jl_new_struct(jl_unionall_type, var, newbody);
+            // n.b. we do not widen lb, since that would be the wrong direction
+            newvar = insert_nondiagonal(var->ub, troot, widen2ub);
+            if (newvar != var->ub) {
+                newvar = (jl_value_t*)jl_new_typevar(var->name, var->lb, newvar);
+                newbody = jl_apply_type1(type, newvar);
+                type = jl_type_unionall((jl_tvar_t*)newvar, newbody);
+            }
+        }
+        JL_GC_POP();
+    }
+    else if (jl_is_uniontype(type)) {
+        jl_value_t *a = ((jl_uniontype_t*)type)->a;
+        jl_value_t *b = ((jl_uniontype_t*)type)->b;
+        jl_value_t *newa = NULL;
+        jl_value_t *newb = NULL;
+        JL_GC_PUSH2(&newa, &newb);
+        newa = insert_nondiagonal(a, troot, widen2ub);
+        newb = insert_nondiagonal(b, troot, widen2ub);
+        if (newa != a || newb != b)
+            type = simple_union(newa, newb);
+        JL_GC_POP();
+    }
+    else if (jl_is_vararg(type)) {
+        // As for Vararg we'd better widen it's var to ub as otherwise they are still diagonal
+        jl_value_t *t = jl_unwrap_vararg(type);
+        jl_value_t *n = jl_unwrap_vararg_num(type);
+        if (widen2ub == 0)
+            widen2ub = !(n && jl_is_long(n)) || jl_unbox_long(n) > 1;
+        jl_value_t *newt = insert_nondiagonal(t, troot, widen2ub);
+        if (t != newt) {
+            JL_GC_PUSH1(&newt);
+            type = (jl_value_t *)jl_wrap_vararg(newt, n, 0, 0);
+            JL_GC_POP();
+        }
+    }
+    else if (jl_is_datatype(type)) {
+        if (jl_is_tuple_type(type)) {
+            jl_svec_t *newparams = NULL;
+            jl_value_t *newelt = NULL;
+            JL_GC_PUSH2(&newparams, &newelt);
+            for (size_t i = 0; i < jl_nparams(type); i++) {
+                jl_value_t *elt = jl_tparam(type, i);
+                newelt = insert_nondiagonal(elt, troot, widen2ub);
+                if (elt != newelt) {
+                    if (!newparams)
+                        newparams = jl_svec_copy(((jl_datatype_t*)type)->parameters);
+                    jl_svecset(newparams, i, newelt);
+                }
+            }
+            if (newparams)
+                type = (jl_value_t*)jl_apply_tuple_type(newparams, 1);
+            JL_GC_POP();
+        }
+    }
+    return type;
+}
+
+static jl_value_t *_widen_diagonal(jl_value_t *t, jl_varbinding_t *troot) {
+    check_diagonal(t, troot, 0);
+    int any_concrete = 0;
+    for (jl_varbinding_t *v = troot; v != NULL; v = v->prev)
+        any_concrete |= v->occurs_cov > 1 && v->occurs_inv == 0;
+    if (!any_concrete)
+        return t; // no diagonal
+    return insert_nondiagonal(t, troot, 0);
+}
+
+static jl_value_t *widen_diagonal(jl_value_t *t, jl_unionall_t *u, jl_varbinding_t *troot)
+{
+    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
+    jl_value_t *nt = NULL;
+    JL_GC_PUSH2(&vb.innervars, &nt);
+    if (jl_is_unionall(u->body))
+        nt = widen_diagonal(t, (jl_unionall_t *)u->body, &vb);
+    else
+        nt = _widen_diagonal(t, &vb);
+    if (vb.innervars != NULL) {
+        for (size_t i = 0; i < jl_array_nrows(vb.innervars); i++) {
+            jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb.innervars, i);
+            nt = jl_type_unionall(var, nt);
+        }
+    }
+    JL_GC_POP();
+    return nt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_widen_diagonal(jl_value_t *t, jl_unionall_t *ua)
+{
+    return widen_diagonal(t, ua, NULL);
+}
 
 // specificity comparison
+static int count_missing_wrap(jl_value_t *x, jl_typeenv_t *env)
+{
+    if (!jl_has_free_typevars(x))
+        return 0;
+    jl_typeenv_t *wrapped = NULL;
+    int count = 0;
+    for (jl_typeenv_t *env2 = env; env2 != NULL; env2 = env2->prev) {
+        int need_wrap = 0;
+        for (jl_typeenv_t *env3 = wrapped; env3 != NULL && need_wrap == 0; env3 = env3->prev) {
+            if (env3->var == env2->var)
+                need_wrap = -1;
+            else if (jl_has_typevar(env3->var->lb, env2->var) || jl_has_typevar(env3->var->ub, env2->var))
+                need_wrap = 1;
+        }
+        need_wrap = need_wrap == 0 ? jl_has_typevar(x, env2->var) :
+                    need_wrap == -1 ? 0 : 1;
+        if (need_wrap) {
+            count++;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = env2->var;
+            newenv->val = NULL;
+            newenv->prev = wrapped;
+            wrapped = newenv;
+        }
+    }
+    return count;
+}
+
+static int obvious_subtype_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *subtype, int wrapx, int wrapy)
+{
+    if (wrapx != 0 || wrapy != 0) {
+        int wrap_count = wrapx - wrapy;
+        while (wrap_count > 0 && jl_is_unionall(y))
+        {
+            y = ((jl_unionall_t*)y)->body;
+            wrap_count--;
+        }
+        while (wrap_count < 0 && jl_is_unionall(x))
+        {
+            x = ((jl_unionall_t*)x)->body;
+            wrap_count++;
+        }
+        if (wrap_count > 0) {
+            if (obvious_subtype(jl_unwrap_unionall(x), y, y0, subtype) && !*subtype)
+                return 1;
+            return 0;
+        }
+    }
+    return obvious_subtype(x, y, y0, subtype);
+}
 
 static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
@@ -4244,12 +4878,14 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
         a = b;
         b = temp;
     }
+    int wrapa = count_missing_wrap(a, env);
+    int wrapb = count_missing_wrap(b, env);
     // first check if a <: b has an obvious answer
     int subtype_ab = 2;
     if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
         subtype_ab = 1;
     }
-    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+    else if (obvious_subtype_msp(a, b, b0, &subtype_ab, wrapa, wrapb)) {
 #ifdef NDEBUG
         if (subtype_ab == 0)
             return 0;
@@ -4263,7 +4899,7 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
         subtype_ba = 1;
     }
-    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+    else if (obvious_subtype_msp(b, a, a0, &subtype_ba, wrapb, wrapa)) {
 #ifdef NDEBUG
         if (subtype_ba == 0)
             return 0;
@@ -4328,7 +4964,9 @@ static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *e
         return 1;
     }
     int obvious_sub = 2;
-    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+    int wrapx = count_missing_wrap(x, env);
+    int wrapy = count_missing_wrap(y, env);
+    if (obvious_subtype_msp(x, y, y0, &obvious_sub, wrapx, wrapy)) {
 #ifdef NDEBUG
         return obvious_sub;
 #endif
@@ -4793,6 +5431,26 @@ JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b)
     return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
+// Equivalent to `jl_type_morespecific` of the signatures, except that more recent
+// methods are more specific, iff the methods signatures are type-equal
+JL_DLLEXPORT int jl_method_morespecific(jl_method_t *ma, jl_method_t *mb)
+{
+    jl_value_t *a = (jl_value_t*)ma->sig;
+    jl_value_t *b = (jl_value_t*)mb->sig;
+    if (obviously_disjoint(a, b, 1))
+        return 0;
+    if (jl_has_free_typevars(a) || jl_has_free_typevars(b))
+        return 0;
+    if (jl_subtype(b, a)) {
+        if (jl_types_equal(a, b))
+            return jl_atomic_load_relaxed(&ma->primary_world) > jl_atomic_load_relaxed(&mb->primary_world);
+        return 0;
+    }
+    if (jl_subtype(a, b))
+        return 1;
+    return type_morespecific_(a, b, a, b, 0, NULL);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index 6ad2f0e2f28c9..8d4ef61ba251c 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -5,7 +5,7 @@
 
 #define AL_N_INLINE 29
 
-#define SMALL_AL_N_INLINE 6
+#define SMALL_AL_N_INLINE 5
 
 #ifdef __cplusplus
 extern "C" {
@@ -13,32 +13,33 @@ extern "C" {
 
 #include "analyzer_annotations.h"
 
-typedef struct {
+typedef struct { // 32 words
     size_t len;
     size_t max;
     void **items;
     void *_space[AL_N_INLINE];
 } arraylist_t;
 
-arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
-void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
 
-void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
 
-typedef struct {
-    uint32_t len;
-    uint32_t max;
+typedef struct { // 8 words
+    size_t len;
+    size_t max;
     void **items;
     void *_space[SMALL_AL_N_INLINE];
 } small_arraylist_t;
 
-small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
-void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
-void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index da570921c101c..6513370da4dae 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -96,27 +96,23 @@ typedef intptr_t ssize_t;
 #include <endian.h>
 #define LITTLE_ENDIAN  __LITTLE_ENDIAN
 #define BIG_ENDIAN     __BIG_ENDIAN
-#define PDP_ENDIAN     __PDP_ENDIAN
 #define BYTE_ORDER     __BYTE_ORDER
 #endif
 
-#if defined(__APPLE__) || defined(__FreeBSD__)
+#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <machine/endian.h>
 #define __LITTLE_ENDIAN  LITTLE_ENDIAN
 #define __BIG_ENDIAN     BIG_ENDIAN
-#define __PDP_ENDIAN     PDP_ENDIAN
 #define __BYTE_ORDER     BYTE_ORDER
 #endif
 
 #ifdef _OS_WINDOWS_
 #define __LITTLE_ENDIAN    1234
 #define __BIG_ENDIAN       4321
-#define __PDP_ENDIAN       3412
 #define __BYTE_ORDER       __LITTLE_ENDIAN
 #define __FLOAT_WORD_ORDER __LITTLE_ENDIAN
 #define LITTLE_ENDIAN      __LITTLE_ENDIAN
 #define BIG_ENDIAN         __BIG_ENDIAN
-#define PDP_ENDIAN         __PDP_ENDIAN
 #define BYTE_ORDER         __BYTE_ORDER
 #endif
 
@@ -127,6 +123,13 @@ typedef intptr_t ssize_t;
 #define STATIC_INLINE static inline
 #define FORCE_INLINE static inline __attribute__((always_inline))
 
+#ifdef _OS_WINDOWS_
+#define EXTERN_INLINE_DECLARE inline
+#else
+#define EXTERN_INLINE_DECLARE inline __attribute__ ((visibility("default")))
+#endif
+#define EXTERN_INLINE_DEFINE extern inline JL_DLLEXPORT
+
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
 #  define NOINLINE_DECL(f) __declspec(noinline) f
diff --git a/src/support/ios.c b/src/support/ios.c
index b5a168f705603..7f70112c82cc0 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -210,8 +210,10 @@ static char *_buf_realloc(ios_t *s, size_t sz)
         if (temp == NULL)
             return NULL;
         s->ownbuf = 1;
-        if (s->size > 0)
+        if (s->size > 0) {
+            assert(s->buf != NULL);
             memcpy(temp, s->buf, (size_t)s->size);
+        }
     }
 
     s->buf = temp;
@@ -600,12 +602,12 @@ int ios_eof(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
         return 0;
+    if (s->_eof)
+        return 1;
     if (s->bm == bm_mem)
-        return (s->_eof ? 1 : 0);
+        return 0;
     if (s->fd == -1)
         return 1;
-    if (s->_eof)
-        return 1;
     return 0;
     /*
     if (_fd_available(s->fd))
@@ -615,6 +617,12 @@ int ios_eof(ios_t *s)
     */
 }
 
+void ios_reseteof(ios_t *s)
+{
+    if (s->bm != bm_mem && s->fd != -1)
+        s->_eof = 0;
+}
+
 int ios_eof_blocking(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
@@ -721,8 +729,10 @@ char *ios_take_buffer(ios_t *s, size_t *psize)
         buf = (char*)LLT_ALLOC((size_t)s->size + 1);
         if (buf == NULL)
             return NULL;
-        if (s->size)
+        if (s->size) {
+            assert(s->buf != NULL);
             memcpy(buf, s->buf, (size_t)s->size);
+        }
     }
     else if (s->size == s->maxsize) {
         buf = (char*)LLT_REALLOC(s->buf, (size_t)s->size + 1);
@@ -832,7 +842,7 @@ size_t ios_copyall(ios_t *to, ios_t *from)
 
 #define LINE_CHUNK_SIZE 160
 
-size_t ios_copyuntil(ios_t *to, ios_t *from, char delim)
+size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep)
 {
     size_t total = 0, avail = (size_t)(from->size - from->bpos);
     while (!ios_eof(from)) {
@@ -850,9 +860,9 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim)
             avail = 0;
         }
         else {
-            size_t ntowrite = pd - (from->buf+from->bpos) + 1;
+            size_t ntowrite = pd - (from->buf+from->bpos) + (keep != 0);
             written = ios_write(to, from->buf+from->bpos, ntowrite);
-            from->bpos += ntowrite;
+            from->bpos += ntowrite + (keep == 0);
             total += written;
             return total;
         }
@@ -1217,9 +1227,11 @@ char *ios_readline(ios_t *s)
 {
     ios_t dest;
     ios_mem(&dest, 0);
-    ios_copyuntil(&dest, s, '\n');
+    ios_copyuntil(&dest, s, '\n', 1);
     size_t n;
-    return ios_take_buffer(&dest, &n);
+    char * ret = ios_take_buffer(&dest, &n);
+    ios_close(&dest);
+    return ret;
 }
 
 extern int vasprintf(char **strp, const char *fmt, va_list ap);
diff --git a/src/support/ios.h b/src/support/ios.h
index 2547555b5585d..6eab9e21c45b6 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -108,7 +108,7 @@ JL_DLLEXPORT int ios_get_writable(ios_t *s);
 JL_DLLEXPORT void ios_set_readonly(ios_t *s);
 JL_DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes);
 JL_DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from);
-JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite);
 // ensure at least n bytes are buffered if possible. returns # available.
 JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n);
diff --git a/src/support/platform.h b/src/support/platform.h
index 56f8cafbc89fa..816e2090b5a08 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -8,7 +8,7 @@
  * based of compiler-specific pre-defined macros. It is based on the
  * information that can be found at the following address:
  *
- *     http://sourceforge.net/p/predef/wiki/Home/
+ *     https://sourceforge.net/p/predef/wiki/Home/
  *
  * Possible values include:
  *      Compiler:
@@ -16,6 +16,7 @@
  *          _COMPILER_GCC_
  *      OS:
  *          _OS_FREEBSD_
+ *          _OS_OPENBSD_
  *          _OS_LINUX_
  *          _OS_WINDOWS_
  *          _OS_DARWIN_
@@ -26,6 +27,7 @@
  *          _CPU_X86_64_
  *          _CPU_AARCH64_
  *          _CPU_ARM_
+ *          _CPU_RISCV64_
  *          _CPU_WASM_
  */
 
@@ -81,6 +83,8 @@
 
 #if defined(__FreeBSD__)
 #define _OS_FREEBSD_
+#elif defined(__OpenBSD__)
+#define _OS_OPENBSD_
 #elif defined(__linux__)
 #define _OS_LINUX_
 #elif defined(_WIN32) || defined(_WIN64)
@@ -103,6 +107,8 @@
 #define _CPU_AARCH64_
 #elif defined(__arm__) || defined(_M_ARM)
 #define _CPU_ARM_
+#elif defined(__riscv) && __riscv_xlen == 64
+#define _CPU_RISCV64_
 #elif defined(__PPC64__)
 #define _CPU_PPC64_
 #elif defined(_ARCH_PPC)
diff --git a/src/support/strptime.c b/src/support/strptime.c
index ab75ee05ee8db..27c86c9e4f2b8 100644
--- a/src/support/strptime.c
+++ b/src/support/strptime.c
@@ -134,7 +134,7 @@ static const char * const nadt[5] = {
 
 /*
  * Table to determine the ordinal date for the start of a month.
- * Ref: http://en.wikipedia.org/wiki/ISO_week_date
+ * Ref: https://en.wikipedia.org/wiki/ISO_week_date
  */
 static const int start_of_month[2][13] = {
 	/* non-leap year */
@@ -147,7 +147,7 @@ static const int start_of_month[2][13] = {
  * Calculate the week day of the first day of a year. Valid for
  * the Gregorian calendar, which began Sept 14, 1752 in the UK
  * and its colonies. Ref:
- * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
+ * https://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
  */
 
 static int
diff --git a/src/support/strtod.c b/src/support/strtod.c
index 24f556d0c086b..e0ad1bf33435a 100644
--- a/src/support/strtod.c
+++ b/src/support/strtod.c
@@ -11,7 +11,7 @@
 extern "C" {
 #endif
 
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) && !defined(__OpenBSD__)
 // This code path should be used for systems that support the strtod_l function
 
 // Cache locale object
diff --git a/src/support/utf8.c b/src/support/utf8.c
index 42a420fb0c499..46a6515e9b753 100644
--- a/src/support/utf8.c
+++ b/src/support/utf8.c
@@ -27,11 +27,10 @@
 
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
-#define snprintf _snprintf
 #else
-#ifndef __FreeBSD__
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
 #include <alloca.h>
-#endif /* __FreeBSD__ */
+#endif /* !__FreeBSD__ && !__OpenBSD__ */
 #endif
 #include <assert.h>
 
@@ -410,7 +409,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
 }
 
 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
-                 int escape_quotes, int ascii)
+                 const char *escapes, int ascii)
 {
     size_t i = *pi, i0;
     uint32_t ch;
@@ -420,12 +419,9 @@ size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
 
     while (i<end && buf<blim) {
         // sz-11: leaves room for longest escape sequence
-        if (escape_quotes && src[i] == '"') {
-            buf += buf_put2c(buf, "\\\"");
-            i++;
-        }
-        else if (src[i] == '\\') {
-            buf += buf_put2c(buf, "\\\\");
+        if ((src[i] == '\\') || (escapes && strchr(escapes, src[i]))) {
+            *buf++ = '\\';
+            *buf++ = src[i];
             i++;
         }
         else {
@@ -571,8 +567,8 @@ int u8_isvalid(const char *str, size_t len)
             return 0;
         // Check for surrogate chars
         if (byt == 0xed && *pnt > 0x9f) return 0;
-	// Check for overlong encoding
-	if (byt == 0xe0 && *pnt < 0xa0) return 0;
+        // Check for overlong encoding
+        if (byt == 0xe0 && *pnt < 0xa0) return 0;
         pnt += 2;
     } else {                        // 4-byte sequence
         // Must have 3 valid continuation characters
diff --git a/src/support/utf8.h b/src/support/utf8.h
index 1d8e31c043838..eab86f602ee61 100644
--- a/src/support/utf8.h
+++ b/src/support/utf8.h
@@ -12,7 +12,7 @@ extern "C" {
 /* is c the start of a utf8 sequence? */
 #define isutf(c) (((c)&0xC0)!=0x80)
 
-#define UEOF ((uint32_t)-1)
+#define UEOF (UINT32_MAX)
 
 /* convert UTF-8 data to wide character */
 size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
@@ -63,7 +63,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
 
    sz is buf size in bytes. must be at least 12.
 
-   if escape_quotes is nonzero, quote characters will be escaped.
+   if escapes is given, given characters will also be escaped (in addition to \\).
 
    if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
 
@@ -75,7 +75,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
    returns number of bytes placed in buf, including a NUL terminator.
 */
 size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
-                 int escape_quotes, int ascii);
+                 const char *escapes, int ascii);
 
 /* utility predicates used by the above */
 int octal_digit(char c);
diff --git a/src/support/win32-clang-ABI-bug/optional b/src/support/win32-clang-ABI-bug/optional
new file mode 100644
index 0000000000000..fd2f7646e1766
--- /dev/null
+++ b/src/support/win32-clang-ABI-bug/optional
@@ -0,0 +1,532 @@
+//===- optional.h - Simple variant for passing optional values --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///  This file provides optional, a template class modeled in the spirit of
+///  OCaml's 'opt' variant.  The idea is to strongly type whether or not
+///  a value can be optional.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef JL_OPTIONAL_H
+#define JL_OPTIONAL_H
+
+//#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <new>
+#include <utility>
+#include <type_traits>
+#include "llvm-version.h"
+
+namespace std {
+
+//#include "llvm/ADT/None.h"
+/// A simple null object to allow implicit construction of optional<T>
+/// and similar types without having to spell out the specialization's name.
+// (constant value 1 in an attempt to workaround MSVC build issue... )
+enum class nullopt_t { nullopt = 1 };
+const nullopt_t nullopt = nullopt_t::nullopt;
+
+class raw_ostream;
+
+namespace optional_detail {
+
+/// Storage for any type.
+//
+// The specialization condition intentionally uses
+// llvm::is_trivially_{copy/move}_constructible instead of
+// std::is_trivially_{copy/move}_constructible. GCC versions prior to 7.4 may
+// instantiate the copy/move constructor of `T` when
+// std::is_trivially_{copy/move}_constructible is instantiated.  This causes
+// compilation to fail if we query the trivially copy/move constructible
+// property of a class which is not copy/move constructible.
+//
+// The current implementation of OptionalStorage insists that in order to use
+// the trivial specialization, the value_type must be trivially copy
+// constructible and trivially copy assignable due to =default implementations
+// of the copy/move constructor/assignment.  It does not follow that this is
+// necessarily the case std::is_trivially_copyable is true (hence the expanded
+// specialization condition).
+//
+// The move constructible / assignable conditions emulate the remaining behavior
+// of std::is_trivially_copyable.
+template <typename T,
+          bool = (std::is_trivially_copy_constructible<T>::value &&
+                  std::is_trivially_copy_assignable<T>::value &&
+                  (std::is_trivially_move_constructible<T>::value ||
+                   !std::is_move_constructible<T>::value) &&
+                  (std::is_trivially_move_assignable<T>::value ||
+                   !std::is_move_assignable<T>::value))>
+class OptionalStorage {
+  union {
+    char empty;
+    T val;
+  };
+  bool hasVal = false;
+
+public:
+  ~OptionalStorage() { reset(); }
+
+  constexpr OptionalStorage() noexcept : empty() {}
+
+  constexpr OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
+    if (other.has_value()) {
+      emplace(other.val);
+    }
+  }
+  constexpr OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
+    if (other.has_value()) {
+      emplace(std::move(other.val));
+    }
+  }
+
+  template <class... Args>
+  constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+      : val(std::forward<Args>(args)...), hasVal(true) {}
+
+  void reset() noexcept {
+    if (hasVal) {
+      val.~T();
+      hasVal = false;
+    }
+  }
+
+  constexpr bool has_value() const noexcept { return hasVal; }
+  constexpr bool hasValue() const noexcept { return hasVal; }
+
+  T &value() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &getValue() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &value() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &getValue() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &&value() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+  T &&getValue() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+
+  template <class... Args> void emplace(Args &&...args) {
+    reset();
+    ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (has_value()) {
+      val = y;
+    } else {
+      ::new ((void *)std::addressof(val)) T(y);
+      hasVal = true;
+    }
+    return *this;
+  }
+  OptionalStorage &operator=(T &&y) {
+    if (has_value()) {
+      val = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(val)) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
+  }
+
+  OptionalStorage &operator=(OptionalStorage const &other) {
+    if (other.has_value()) {
+      if (has_value()) {
+        val = other.val;
+      } else {
+        ::new ((void *)std::addressof(val)) T(other.val);
+        hasVal = true;
+      }
+    } else {
+      reset();
+    }
+    return *this;
+  }
+
+  OptionalStorage &operator=(OptionalStorage &&other) {
+    if (other.has_value()) {
+      if (has_value()) {
+        val = std::move(other.val);
+      } else {
+        ::new ((void *)std::addressof(val)) T(std::move(other.val));
+        hasVal = true;
+      }
+    } else {
+      reset();
+    }
+    return *this;
+  }
+};
+
+template <typename T> class OptionalStorage<T, true> {
+  union {
+    char empty;
+    T val;
+  };
+  bool hasVal = false;
+
+public:
+  ~OptionalStorage() = default;
+
+  constexpr OptionalStorage() noexcept : empty{} {}
+
+  constexpr OptionalStorage(OptionalStorage const &other) = default;
+  constexpr OptionalStorage(OptionalStorage &&other) = default;
+
+  OptionalStorage &operator=(OptionalStorage const &other) = default;
+  OptionalStorage &operator=(OptionalStorage &&other) = default;
+
+  template <class... Args>
+  constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+      : val(std::forward<Args>(args)...), hasVal(true) {}
+
+  void reset() noexcept {
+    if (hasVal) {
+      val.~T();
+      hasVal = false;
+    }
+  }
+
+  constexpr bool has_value() const noexcept { return hasVal; }
+  constexpr bool hasValue() const noexcept { return hasVal; }
+
+  T &value() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &getValue() &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &value() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  constexpr T const &getValue() const &noexcept {
+    assert(hasVal);
+    return val;
+  }
+  T &&value() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+  T &&getValue() &&noexcept {
+    assert(hasVal);
+    return std::move(val);
+  }
+
+  template <class... Args> void emplace(Args &&...args) {
+    reset();
+    ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (has_value()) {
+      val = y;
+    } else {
+      ::new ((void *)std::addressof(val)) T(y);
+      hasVal = true;
+    }
+    return *this;
+  }
+  OptionalStorage &operator=(T &&y) {
+    if (has_value()) {
+      val = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(val)) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
+  }
+};
+
+} // namespace optional_detail
+
+template <typename T> class optional {
+  optional_detail::OptionalStorage<T> Storage;
+
+public:
+  using value_type = T;
+
+  constexpr optional() = default;
+  constexpr optional(nullopt_t) {}
+
+  constexpr optional(const T &y) : Storage(in_place, y) {}
+  constexpr optional(const optional &O) = default;
+
+  constexpr optional(T &&y) : Storage(in_place, std::move(y)) {}
+  constexpr optional(optional &&O) = default;
+
+  template <typename... ArgTypes>
+  constexpr optional(in_place_t, ArgTypes &&...Args)
+      : Storage(in_place, std::forward<ArgTypes>(Args)...) {}
+
+  optional &operator=(T &&y) {
+    Storage = std::move(y);
+    return *this;
+  }
+  optional &operator=(optional &&O) = default;
+
+  /// Create a new object by constructing it in place with the given arguments.
+  template <typename... ArgTypes> void emplace(ArgTypes &&... Args) {
+    Storage.emplace(std::forward<ArgTypes>(Args)...);
+  }
+
+  static constexpr optional create(const T *y) {
+    return y ? optional(*y) : optional();
+  }
+
+  optional &operator=(const T &y) {
+    Storage = y;
+    return *this;
+  }
+  optional &operator=(const optional &O) = default;
+
+  void reset() { Storage.reset(); }
+
+  constexpr const T *getPointer() const { return &Storage.value(); }
+  T *getPointer() { return &Storage.value(); }
+  constexpr const T &value() const & { return Storage.value(); }
+  constexpr const T &getValue() const & { return Storage.value(); }
+  T &value() & { return Storage.value(); }
+  T &getValue() & { return Storage.value(); }
+
+  constexpr explicit operator bool() const { return has_value(); }
+  constexpr bool has_value() const { return Storage.has_value(); }
+  constexpr bool hasValue() const { return Storage.has_value(); }
+  constexpr const T *operator->() const { return getPointer(); }
+  T *operator->() { return getPointer(); }
+  constexpr const T &operator*() const & { return value(); }
+  T &operator*() & { return value(); }
+
+  template <typename U> constexpr T value_or(U &&alt) const & {
+    return has_value() ? value() : std::forward<U>(alt);
+  }
+  template <typename U>
+  [[deprecated("Use value_or instead.")]] constexpr T
+  getValueOr(U &&alt) const & {
+    return has_value() ? value() : std::forward<U>(alt);
+  }
+
+  /// Apply a function to the value if present; otherwise return nullopt.
+  template <class Function>
+  auto map(const Function &F) const & -> optional<decltype(F(value()))> {
+    if (*this)
+      return F(value());
+    return nullopt;
+  }
+
+  T &&value() && { return std::move(Storage.value()); }
+  T &&getValue() && { return std::move(Storage.value()); }
+  T &&operator*() && { return std::move(Storage.value()); }
+
+  template <typename U> T value_or(U &&alt) && {
+    return has_value() ? std::move(value()) : std::forward<U>(alt);
+  }
+  template <typename U>
+  [[deprecated("Use value_or instead.")]] T getValueOr(U &&alt) && {
+    return has_value() ? std::move(value()) : std::forward<U>(alt);
+  }
+
+  /// Apply a function to the value if present; otherwise return nullopt.
+  template <class Function>
+  auto map(const Function &F)
+      && -> optional<decltype(F(std::move(*this).value()))> {
+    if (*this)
+      return F(std::move(*this).value());
+    return nullopt;
+  }
+};
+
+//template <class T> llvm::hash_code hash_value(const optional<T> &O) {
+//  return O ? hash_combine(true, *O) : hash_value(false);
+//}
+
+template <typename T, typename U>
+constexpr bool operator==(const optional<T> &X, const optional<U> &Y) {
+  if (X && Y)
+    return *X == *Y;
+  return X.has_value() == Y.has_value();
+}
+
+template <typename T, typename U>
+constexpr bool operator!=(const optional<T> &X, const optional<U> &Y) {
+  return !(X == Y);
+}
+
+template <typename T, typename U>
+constexpr bool operator<(const optional<T> &X, const optional<U> &Y) {
+  if (X && Y)
+    return *X < *Y;
+  return X.has_value() < Y.has_value();
+}
+
+template <typename T, typename U>
+constexpr bool operator<=(const optional<T> &X, const optional<U> &Y) {
+  return !(Y < X);
+}
+
+template <typename T, typename U>
+constexpr bool operator>(const optional<T> &X, const optional<U> &Y) {
+  return Y < X;
+}
+
+template <typename T, typename U>
+constexpr bool operator>=(const optional<T> &X, const optional<U> &Y) {
+  return !(X < Y);
+}
+
+template <typename T>
+constexpr bool operator==(const optional<T> &X, nullopt_t) {
+  return !X;
+}
+
+template <typename T>
+constexpr bool operator==(nullopt_t, const optional<T> &X) {
+  return X == nullopt;
+}
+
+template <typename T>
+constexpr bool operator!=(const optional<T> &X, nullopt_t) {
+  return !(X == nullopt);
+}
+
+template <typename T>
+constexpr bool operator!=(nullopt_t, const optional<T> &X) {
+  return X != nullopt;
+}
+
+template <typename T> constexpr bool operator<(const optional<T> &, nullopt_t) {
+  return false;
+}
+
+template <typename T> constexpr bool operator<(nullopt_t, const optional<T> &X) {
+  return X.has_value();
+}
+
+template <typename T>
+constexpr bool operator<=(const optional<T> &X, nullopt_t) {
+  return !(nullopt < X);
+}
+
+template <typename T>
+constexpr bool operator<=(nullopt_t, const optional<T> &X) {
+  return !(X < nullopt);
+}
+
+template <typename T> constexpr bool operator>(const optional<T> &X, nullopt_t) {
+  return nullopt < X;
+}
+
+template <typename T> constexpr bool operator>(nullopt_t, const optional<T> &X) {
+  return X < nullopt;
+}
+
+template <typename T>
+constexpr bool operator>=(const optional<T> &X, nullopt_t) {
+  return nullopt <= X;
+}
+
+template <typename T>
+constexpr bool operator>=(nullopt_t, const optional<T> &X) {
+  return X <= nullopt;
+}
+
+template <typename T>
+constexpr bool operator==(const optional<T> &X, const T &Y) {
+  return X && *X == Y;
+}
+
+template <typename T>
+constexpr bool operator==(const T &X, const optional<T> &Y) {
+  return Y && X == *Y;
+}
+
+template <typename T>
+constexpr bool operator!=(const optional<T> &X, const T &Y) {
+  return !(X == Y);
+}
+
+template <typename T>
+constexpr bool operator!=(const T &X, const optional<T> &Y) {
+  return !(X == Y);
+}
+
+template <typename T>
+constexpr bool operator<(const optional<T> &X, const T &Y) {
+  return !X || *X < Y;
+}
+
+template <typename T>
+constexpr bool operator<(const T &X, const optional<T> &Y) {
+  return Y && X < *Y;
+}
+
+template <typename T>
+constexpr bool operator<=(const optional<T> &X, const T &Y) {
+  return !(Y < X);
+}
+
+template <typename T>
+constexpr bool operator<=(const T &X, const optional<T> &Y) {
+  return !(Y < X);
+}
+
+template <typename T>
+constexpr bool operator>(const optional<T> &X, const T &Y) {
+  return Y < X;
+}
+
+template <typename T>
+constexpr bool operator>(const T &X, const optional<T> &Y) {
+  return Y < X;
+}
+
+template <typename T>
+constexpr bool operator>=(const optional<T> &X, const T &Y) {
+  return !(X < Y);
+}
+
+template <typename T>
+constexpr bool operator>=(const T &X, const optional<T> &Y) {
+  return !(X < Y);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, nullopt_t);
+
+template <typename T, typename = decltype(std::declval<raw_ostream &>()
+                                          << std::declval<const T &>())>
+raw_ostream &operator<<(raw_ostream &OS, const optional<T> &O) {
+  if (O)
+    OS << *O;
+  else
+    OS << nullopt;
+  return OS;
+}
+
+} // end namespace
+
+#endif // JL_OPTIONAL_H
diff --git a/src/symbol.c b/src/symbol.c
index c9c0c0e533924..ef2c11e0842e8 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+uv_mutex_t symtab_lock;
 static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
@@ -35,7 +36,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
     jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
+    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(nb, 0, sizeof(void*), 0);
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
     jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
@@ -86,15 +87,15 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
     _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        uv_mutex_lock(&gc_perm_lock);
+        uv_mutex_lock(&symtab_lock);
         // Someone might have updated it, check and look up again
         if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&symtab_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&symtab_lock);
     }
     return node;
 }
@@ -129,7 +130,7 @@ JL_DLLEXPORT jl_sym_t *jl_gensym(void)
 {
     char name[16];
     char *n;
-    uint32_t ctr = jl_atomic_fetch_add(&gs_ctr, 1);
+    uint32_t ctr = jl_atomic_fetch_add_relaxed(&gs_ctr, 1);
     n = uint2str(&name[2], sizeof(name)-2, ctr, 10);
     *(--n) = '#'; *(--n) = '#';
     return jl_symbol(n);
@@ -153,7 +154,7 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len)
     name[1] = '#';
     name[2 + len] = '#';
     memcpy(name + 2, str, len);
-    uint32_t ctr = jl_atomic_fetch_add(&gs_ctr, 1);
+    uint32_t ctr = jl_atomic_fetch_add_relaxed(&gs_ctr, 1);
     n = uint2str(gs_name, sizeof(gs_name), ctr, 10);
     memcpy(name + 3 + len, n, sizeof(gs_name) - (n - gs_name));
     jl_sym_t *sym = _jl_symbol(name, alloc_len - (n - gs_name)- 1);
diff --git a/src/sys.c b/src/sys.c
index 2de4bc61a20b8..fa9054bb93e9a 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -102,7 +102,6 @@ JL_DLLEXPORT int32_t jl_nb_available(ios_t *s)
 
 // --- dir/file stuff ---
 
-JL_DLLEXPORT int jl_sizeof_uv_fs_t(void) { return sizeof(uv_fs_t); }
 JL_DLLEXPORT char *jl_uv_fs_t_ptr(uv_fs_t *req) { return (char*)req->ptr; }
 JL_DLLEXPORT char *jl_uv_fs_t_path(uv_fs_t *req) { return (char*)req->path; }
 
@@ -280,15 +279,16 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             return str;
         }
         a = jl_alloc_array_1d(jl_array_uint8_type, n - nchomp);
-        memcpy(jl_array_data(a), s->buf + s->bpos, n - nchomp);
+        memcpy(jl_array_data(a, uint8_t), s->buf + s->bpos, n - nchomp);
         s->bpos += n;
     }
     else {
         a = jl_alloc_array_1d(jl_array_uint8_type, 80);
         ios_t dest;
         ios_mem(&dest, 0);
-        ios_setbuf(&dest, (char*)a->data, 80, 0);
-        size_t n = ios_copyuntil(&dest, s, delim);
+        char *mem = jl_array_data(a, char);
+        ios_setbuf(&dest, (char*)mem, 80, 0);
+        size_t n = ios_copyuntil(&dest, s, delim, 1);
         if (chomp && n > 0 && dest.buf[n - 1] == delim) {
             n--;
             if (chomp == 2 && n > 0 && dest.buf[n - 1] == '\r') {
@@ -298,13 +298,11 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             assert(truncret == 0);
             (void)truncret; // ensure the variable is used to avoid warnings
         }
-        if (dest.buf != a->data) {
+        if (dest.buf != mem) {
             a = jl_take_buffer(&dest);
         }
         else {
-            a->length = n;
-            a->nrows = n;
-            ((char*)a->data)[n] = '\0';
+            a->dimsize[0] = n;
         }
         if (str) {
             JL_GC_PUSH1(&a);
@@ -316,6 +314,50 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
     return (jl_value_t*)a;
 }
 
+// read up to buflen bytes, including delim, into buf.  returns number of bytes read.
+JL_DLLEXPORT size_t jl_readuntil_buf(ios_t *s, uint8_t delim, uint8_t *buf, size_t buflen)
+{
+    // manually inlined common case
+    size_t avail = (size_t)(s->size - s->bpos);
+    if (avail > buflen) avail = buflen;
+    char *pd = (char*)memchr(s->buf + s->bpos, delim, avail);
+    if (pd) {
+        size_t n = pd - (s->buf + s->bpos) + 1;
+        memcpy(buf, s->buf + s->bpos, n);
+        s->bpos += n;
+        return n;
+    }
+    else {
+        size_t total = avail;
+        memcpy(buf, s->buf + s->bpos, avail);
+        s->bpos += avail;
+        if (avail == buflen) return total;
+
+        // code derived from ios_copyuntil
+        while (!ios_eof(s)) {
+            avail = ios_readprep(s, 160); // read LINE_CHUNK_SIZE
+            if (avail == 0) break;
+            if (total+avail > buflen) avail = buflen-total;
+            char *pd = (char*)memchr(s->buf+s->bpos, delim, avail);
+            if (pd == NULL) {
+                memcpy(buf+total, s->buf+s->bpos, avail);
+                s->bpos += avail;
+                total += avail;
+                if (buflen == total) return total;
+            }
+            else {
+                size_t ntowrite = pd - (s->buf+s->bpos) + 1;
+                memcpy(buf+total, s->buf+s->bpos, ntowrite);
+                s->bpos += ntowrite;
+                total += ntowrite;
+                return total;
+            }
+        }
+        s->_eof = 1;
+        return total;
+    }
+}
+
 JL_DLLEXPORT int jl_ios_buffer_n(ios_t *s, const size_t n)
 {
     size_t space, ret;
@@ -435,25 +477,10 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT
 {
-    int cpu = jl_cpu_threads();
-    int masksize = uv_cpumask_size();
-    if (masksize < 0 || jl_running_under_rr(0))
-        return cpu;
-    uv_thread_t tid = uv_thread_self();
-    char *cpumask = (char *)calloc(masksize, sizeof(char));
-    int err = uv_thread_getaffinity(&tid, cpumask, masksize);
-    if (err) {
-        free(cpumask);
-        jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
-                       err);
-        return cpu;
-    }
-    int n = 0;
-    for (size_t i = 0; i < masksize; i++) {
-        n += cpumask[i];
-    }
-    free(cpumask);
-    return n < cpu ? n : cpu;
+    // We want the more conservative estimate of the two.
+    int cpu_threads = jl_cpu_threads();
+    int available_parallelism = uv_available_parallelism();
+    return available_parallelism < cpu_threads ? available_parallelism : cpu_threads;
 }
 
 
@@ -588,10 +615,42 @@ JL_DLLEXPORT long jl_SC_CLK_TCK(void)
 #ifndef _OS_WINDOWS_
     return sysconf(_SC_CLK_TCK);
 #else
-    return 0;
+    return 1000; /* uv_cpu_info returns times in ms on Windows */
 #endif
 }
 
+#ifdef _OS_OPENBSD_
+// Helper for jl_pathname_for_handle()
+struct dlinfo_data {
+    void       *searched;
+    const char *result;
+};
+
+static int dlinfo_helper(struct dl_phdr_info *info, size_t size, void *vdata)
+{
+    struct dlinfo_data *data = (struct dlinfo_data *)vdata;
+    void *handle;
+
+    /* ensure dl_phdr_info at compile-time to be compatible with the one at runtime */
+    if (sizeof(*info) < size)
+        return -1;
+
+    /* dlopen the name */
+    handle = dlopen(info->dlpi_name, RTLD_LAZY | RTLD_NOLOAD);
+    if (handle == NULL)
+        return 0;
+
+    /* check if the opened library is the same as the searched handle */
+    if (data->searched == handle)
+        data->result = info->dlpi_name;
+
+    dlclose(handle);
+
+    /* continue if still not found */
+    return (data->result != NULL);
+}
+#endif
+
 // Takes a handle (as returned from dlopen()) and returns the absolute path to the image loaded
 JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 {
@@ -634,6 +693,14 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
     free(pth16);
     return filepath;
 
+#elif defined(_OS_OPENBSD_)
+    struct dlinfo_data data = {
+        .searched = handle,
+        .result = NULL,
+    };
+    dl_iterate_phdr(&dlinfo_helper, &data);
+    return data.result;
+
 #else // Linux, FreeBSD, ...
 
     struct link_map *map;
@@ -704,26 +771,11 @@ JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT size_t jl_maxrss(void)
 {
-#if defined(_OS_WINDOWS_)
-    PROCESS_MEMORY_COUNTERS counter;
-    GetProcessMemoryInfo( GetCurrentProcess( ), &counter, sizeof(counter) );
-    return (size_t)counter.PeakWorkingSetSize;
-
-// FIXME: `rusage` is available on OpenBSD, DragonFlyBSD and NetBSD as well.
-//        All of them return `ru_maxrss` in kilobytes.
-#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_)
-    struct rusage rusage;
-    getrusage( RUSAGE_SELF, &rusage );
-
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-    return (size_t)(rusage.ru_maxrss * 1024);
-#else
-    return (size_t)rusage.ru_maxrss;
-#endif
-
-#else
-    return (size_t)0;
-#endif
+    uv_rusage_t rusage;
+    if (uv_getrusage(&rusage) == 0) {
+        return rusage.ru_maxrss * 1024;
+    }
+    return 0;
 }
 
 // Simple `rand()` like function, with global seed and added thread-safety
@@ -732,13 +784,12 @@ static _Atomic(uint64_t) g_rngseed;
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
 {
     uint64_t max = UINT64_MAX;
-    uint64_t unbias = UINT64_MAX;
     uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
     uint64_t rngseed;
     uint64_t rnd;
     do {
         rngseed = rngseed0;
-        rnd = cong(max, unbias, &rngseed);
+        rnd = cong(max, &rngseed);
     } while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
     return rnd;
 }
diff --git a/src/task.c b/src/task.c
index 1dab8688cb079..d56d60eb58cb5 100644
--- a/src/task.c
+++ b/src/task.c
@@ -42,27 +42,34 @@ extern "C" {
 #endif
 
 #if defined(_COMPILER_ASAN_ENABLED_)
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) {
+#if __GLIBC__
+#include <dlfcn.h>
+// Bypass the ASAN longjmp wrapper - we are unpoisoning the stack ourselves,
+// since ASAN normally unpoisons far too much.
+// c.f. interceptor in jl_dlopen as well
+void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL;
+#endif
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
         __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) {
-    __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL);
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) {
+    __sanitizer_finish_switch_fiber(current->asan_fake_stack, NULL, NULL);
         //(const void**)&last->stkbuf,
         //&last->bufsz);
 }
 #else
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) JL_NOTSAFEPOINT {}
 #endif
 
 #if defined(_COMPILER_TSAN_ENABLED_)
@@ -78,19 +85,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
-            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
-        } \
-        _tsan_macro_ctx->tsan_state = NULL; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
-        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
-    } while (0)
-#endif
 #else
 // just do minimal type-checking on the arguments
 #define tsan_destroy_ctx(_ptls, _ctx) do { \
@@ -101,16 +95,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         (void)_tsan_macro_ctx; \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
@@ -127,18 +111,10 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
 #define ROOT_TASK_STACK_ADJUSTMENT 3000000
 #endif
 
-#ifdef JL_HAVE_ASYNCIFY
-// Switching logic is implemented in JavaScript
-#define STATIC_OR_JS JL_DLLEXPORT
-#else
-#define STATIC_OR_JS static
-#endif
-
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
-STATIC_OR_JS void jl_set_fiber(jl_ucontext_t *t);
-STATIC_OR_JS void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
-STATIC_OR_JS void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
-STATIC_OR_JS void jl_start_fiber_set(jl_ucontext_t *t);
+static void jl_set_fiber(jl_ucontext_t *t);
+static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
+static void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
+static void jl_start_fiber_set(jl_ucontext_t *t);
 
 #ifdef ALWAYS_COPY_STACKS
 # ifndef COPY_STACKS
@@ -197,7 +173,7 @@ static void JL_NO_ASAN JL_NO_MSAN memcpy_stack_a16(uint64_t *to, uint64_t *from,
     memcpy_noasan((char*)to_addr, (char*)from_addr, shadow_nb);
     memcpy_a16_noasan(jl_assume_aligned(to, 16), jl_assume_aligned(from, 16), nb);
 #elif defined(_COMPILER_MSAN_ENABLED_)
-# warning This function is imcompletely implemented for MSAN (TODO).
+# warning This function is incompletely implemented for MSAN (TODO).
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
 #else
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
@@ -214,17 +190,17 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     assert(stackbase > frame_addr);
     size_t nb = stackbase - frame_addr;
     void *buf;
-    if (lastt->bufsz < nb) {
-        asan_free_copy_stack(lastt->stkbuf, lastt->bufsz);
+    if (lastt->ctx.bufsz < nb) {
+        asan_free_copy_stack(lastt->ctx.stkbuf, lastt->ctx.bufsz);
         buf = (void*)jl_gc_alloc_buf(ptls, nb);
-        lastt->stkbuf = buf;
-        lastt->bufsz = nb;
+        lastt->ctx.stkbuf = buf;
+        lastt->ctx.bufsz = nb;
     }
     else {
-        buf = lastt->stkbuf;
+        buf = lastt->ctx.stkbuf;
     }
     *pt = NULL; // clear the gc-root for the target task before copying the stack for saving
-    lastt->copy_stack = nb;
+    lastt->ctx.copy_stack = nb;
     lastt->sticky = 1;
     memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
     // this task's stack could have been modified after
@@ -233,75 +209,124 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     jl_gc_wb_back(lastt);
 }
 
-JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
+JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
 {
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     if (!p) {
         // switch to a stackframe that's beyond the bounds of the last switch
-        p = _x;
-        if ((char*)&_x > _x) {
-            p = (char*)alloca((char*)&_x - _x);
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
         }
         restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
     }
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#else
+#pragma message("warning: CFI_NORETURN not implemented for this platform, so profiling of copy_stacks may segfault in this build")
+#endif
+#else
+CFI_NORETURN
+#endif
     memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    jl_setcontext(t->copy_ctx);
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
 #endif
     abort(); // unreachable
 }
 
-JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
+JL_NO_ASAN static void restore_stack2(jl_ucontext_t *t, jl_ptls_t ptls, jl_ucontext_t *lastt)
 {
     assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
-    char *_x = (char*)ptls->stackbase - nb;
-    void *_y = t->stkbuf;
-    assert(_x != NULL && _y != NULL);
-    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+    if (nb > 1) {
+        char *_x = (char*)ptls->stackbase - nb;
+        void *_y = t->stkbuf;
+        assert(_x != NULL && _y != NULL);
+        memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb);
+    }
+#if defined(_OS_WINDOWS_)
+    // jl_swapcontext and setjmp are the same on Windows, so we can just use swapcontext directly
+    tsan_switch_to_ctx(t);
+    jl_swapcontext(lastt->ctx, t->copy_ctx);
+#else
 #if defined(JL_HAVE_UNW_CONTEXT)
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx.ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-#elif defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK) || defined(_OS_WINDOWS_)
-    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
+#elif defined(JL_HAVE_ASM)
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
-    tsan_switch_to_copyctx(&t->ctx);
-#if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    tsan_switch_to_ctx(t);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
+#endif
+}
+
+JL_NO_ASAN static void NOINLINE restore_stack3(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
+{
+#if !defined(JL_HAVE_ASM)
+    char *_x = (char*)ptls->stackbase;
+    if (!p) {
+        // switch to a stackframe that's well beyond the bounds of the next switch
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
+        }
+        restore_stack3(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
+    }
+#endif
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#endif
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+CFI_NORETURN
 #endif
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // (doesn't return)
+    abort();
 }
+
 #endif
 
 /* Rooted by the base module */
 static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
-void JL_NORETURN jl_finish_task(jl_task_t *t)
+void JL_NORETURN jl_finish_task(jl_task_t *ct)
 {
-    jl_task_t *ct = jl_current_task;
     JL_PROBE_RT_FINISH_TASK(ct);
     JL_SIGATOMIC_BEGIN();
-    if (jl_atomic_load_relaxed(&t->_isexception))
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
+    if (ct->metrics_enabled) {
+        // [task] user_time -finished-> wait_time
+        assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0);
+        uint64_t now = jl_hrtime();
+        jl_atomic_store_relaxed(&ct->finished_at, now);
+        jl_atomic_fetch_add_relaxed(&ct->running_time_ns, now - jl_atomic_load_relaxed(&ct->last_started_running_at));
+    }
+    if (jl_atomic_load_relaxed(&ct->_isexception))
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED);
     else
-        jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
-    if (t->copy_stack) { // early free of stkbuf
-        asan_free_copy_stack(t->stkbuf, t->bufsz);
-        t->stkbuf = NULL;
+        jl_atomic_store_release(&ct->_state, JL_TASK_STATE_DONE);
+    if (ct->ctx.copy_stack) { // early free of stkbuf
+        asan_free_copy_stack(ct->ctx.stkbuf, ct->ctx.bufsz);
+        ct->ctx.stkbuf = NULL;
     }
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
@@ -315,12 +340,12 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
             jl_atomic_store_release(&task_done_hook_func, done);
     }
     if (done != NULL) {
-        jl_value_t *args[2] = {done, (jl_value_t*)t};
+        jl_value_t *args[2] = {done, (jl_value_t*)ct};
         JL_TRY {
             jl_apply(args, 2);
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception(), ct);
+            jl_no_exc_handler(jl_current_exception(ct), ct);
         }
     }
     jl_gc_debug_critical_error();
@@ -345,33 +370,33 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
     if (ptls2) {
         *ptid = jl_atomic_load_relaxed(&task->tid);
 #ifdef COPY_STACKS
-        if (task->copy_stack) {
+        if (task->ctx.copy_stack) {
             *size = ptls2->stacksize;
             return (char *)ptls2->stackbase - *size;
         }
 #endif
     }
-    *size = task->bufsz - off;
-    return (void *)((char *)task->stkbuf + off);
+    *size = task->ctx.bufsz - off;
+    return (void *)((char *)task->ctx.stkbuf + off);
 }
 
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
                                        char **total_start, char **total_end)
 {
-    if (!task->started) {
+    if (!task->ctx.started) {
         *total_start = *active_start = 0;
         *total_end = *active_end = 0;
         return;
     }
 
     jl_ptls_t ptls2 = task->ptls;
-    if (task->copy_stack && ptls2) {
+    if (task->ctx.copy_stack && ptls2) {
         *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
         *total_end = *active_end = (char*)ptls2->stackbase;
     }
-    else if (task->stkbuf) {
-        *total_start = *active_start = (char*)task->stkbuf;
+    else if (task->ctx.stkbuf) {
+        *total_start = *active_start = (char*)task->ctx.stkbuf;
 #ifndef _OS_WINDOWS_
         jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
         if (ptls0->root_task == task) {
@@ -384,12 +409,12 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
         }
 #endif
 
-        *total_end = *active_end = (char*)task->stkbuf + task->bufsz;
+        *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
 #ifdef COPY_STACKS
         // save_stack stores the stack of an inactive task in stkbuf, and the
         // actual number of used bytes in copy_stack.
-        if (task->copy_stack > 1)
-            *active_end = (char*)task->stkbuf + task->copy_stack;
+        if (task->ctx.copy_stack > 1)
+            *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack;
 #endif
     }
     else {
@@ -450,20 +475,16 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
 #endif
 
     int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
-    if (!t->started && !t->copy_stack) {
+    if (!t->ctx.started && !t->ctx.copy_stack) {
         // may need to allocate the stack
-        if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-            if (t->stkbuf == NULL) {
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+            if (t->ctx.stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
-                t->copy_stack = 1;
+                t->ctx.copy_stack = 1;
+                t->ctx.bufsz = 0;
                 t->sticky = 1;
-                t->bufsz = 0;
-                if (always_copy_stacks)
-                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-                else
-                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -471,28 +492,45 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
 
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    } lasttstate;
+
     if (killed) {
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
         lastt->gcstack = NULL;
         lastt->eh = NULL;
-        if (!lastt->copy_stack && lastt->stkbuf) {
+        if (!lastt->ctx.copy_stack && lastt->ctx.stkbuf) {
             // early free of stkbuf back to the pool
             jl_release_task_stack(ptls, lastt);
         }
     }
     else {
+        if (lastt->ctx.copy_stack) { // save the old copy-stack
+#ifdef _OS_WINDOWS_
+            lasttstate.copy_ctx.uc_stack.ss_sp = (char*)ptls->stackbase - ptls->stacksize;
+            lasttstate.copy_ctx.uc_stack.ss_size = ptls->stacksize;
+#endif
 #ifdef COPY_STACKS
-        if (lastt->copy_stack) { // save the old copy-stack
-            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
-                sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
-                // TODO: mutex unlock the thread we just switched from
+            if (jl_setjmp(lasttstate.copy_ctx.uc_mcontext, 0)) {
+#ifdef MIGRATE_TASKS
+                ptls = lastt->ptls;
+#endif
+                lastt->ctx.copy_ctx = NULL;
+                sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
                 return;
             }
-        }
-        else
+            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
+            lastt->ctx.copy_ctx = &lasttstate.copy_ctx;
+#else
+            abort();
 #endif
-        *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+        }
+        else {
+            *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+            lastt->ctx.ctx = &lasttstate.ctx;
+        }
     }
 
     // set up global state for new task and clear global state for old task
@@ -507,41 +545,44 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
     ptls->previous_task = lastt;
 #endif
 
-    if (t->started) {
+    if (t->ctx.started) {
+        if (t->ctx.copy_stack) {
 #ifdef COPY_STACKS
-        if (t->copy_stack) {
-            if (lastt->copy_stack) {
+            if (lastt->ctx.copy_stack) {
                 // Switching from copystack to copystack. Clear any shadow stack
                 // memory above the saved shadow stack.
-                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack;
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->ctx.copy_stack;
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 if (stackbottom < stacktop)
-                    asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                    asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
+            }
+            if (!killed && !lastt->ctx.copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                restore_stack2(&t->ctx, ptls, &lastt->ctx); // half jl_swap_fiber and half restore_stack
             }
-            if (!killed && !lastt->copy_stack) {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                restore_stack2(t, ptls, lastt);
-            } else {
-                tsan_switch_to_copyctx(&t->ctx);
+            else {
+                tsan_switch_to_ctx(&t->ctx);
                 if (killed) {
-                    sanitizer_start_switch_fiber_killed(ptls, t);
-                    tsan_destroy_copyctx(ptls, &lastt->ctx);
-                } else {
-                    sanitizer_start_switch_fiber(ptls, lastt, t);
+                    sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
+                    tsan_destroy_ctx(ptls, &lastt->ctx);
+                }
+                else {
+                    sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
                 }
 
-                if (lastt->copy_stack) {
-                    restore_stack(t, ptls, NULL); // (doesn't return)
+                if (lastt->ctx.copy_stack) {
+                    restore_stack(&t->ctx, ptls, NULL); // (doesn't return)
+                    abort();
                 }
                 else {
-                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                    restore_stack(&t->ctx, ptls, (char*)1); // (doesn't return)
+                    abort();
                 }
             }
-        }
-        else
 #endif
-        {
-            if (lastt->copy_stack) {
+        }
+        else {
+            if (lastt->ctx.copy_stack) {
                 // Switching away from a copystack to a non-copystack. Clear
                 // the whole shadow stack now, because otherwise we won't know
                 // how much stack memory to clear the next time we switch to
@@ -550,22 +591,23 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 // We're not restoring the stack, but we still need to unpoison the
                 // stack, so it starts with a pristine stack.
-                asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
             }
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
             else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                if (lastt->copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                if (lastt->ctx.copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
                     tsan_switch_to_ctx(&t->ctx);
                     jl_set_fiber(&t->ctx); // (doesn't return)
+                    abort();
                 }
                 else {
                     jl_swap_fiber(&lastt->ctx, &t->ctx);
@@ -574,41 +616,58 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
     else {
-        if (lastt->copy_stack) {
+#ifdef _COMPILER_TSAN_ENABLED_
+        t->ctx.tsan_state = __tsan_create_fiber(0);
+#endif
+        if (lastt->ctx.copy_stack) {
             uintptr_t stacktop = (uintptr_t)ptls->stackbase;
             uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
             // We're not restoring the stack, but we still need to unpoison the
             // stack, so it starts with a pristine stack.
-            asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
         }
-        if (t->copy_stack && always_copy_stacks) {
+        if (t->ctx.copy_stack) {
+#ifdef COPY_STACKS
             tsan_switch_to_ctx(&t->ctx);
+            // create a temporary non-copy_stack context for starting this fiber
+            jl_ucontext_t ctx = t->ctx;
+            ctx.ctx = NULL;
+            ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize;
+            ctx.bufsz = ptls->stacksize;
+            ctx.copy_stack = 0;
+            ctx.started = 0;
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
-            } else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
+                if (lastt->ctx.copy_stack)
+                    restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                else
+                    jl_start_fiber_set(&ctx);
+                abort();
+            }
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                abort();
+            }
+            else {
+                jl_start_fiber_swap(&lastt->ctx, &ctx);
             }
-#ifdef COPY_STACKS
-#if defined(_OS_WINDOWS_)
-            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+            abort();
 #endif
-#endif
-            abort(); // unreachable
         }
         else {
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
-            sanitizer_start_switch_fiber(ptls, lastt, t);
-            if (lastt->copy_stack) {
-                // Resume at the jl_setjmp earlier in this function
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here
                 tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
@@ -618,7 +677,14 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
             }
         }
     }
-    sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
+
+#ifdef MIGRATE_TASKS
+    ptls = lastt->ptls;
+#endif
+    assert(ptls);
+    assert(lastt == jl_atomic_load_relaxed(&ptls->current_task));
+    lastt->ctx.ctx = NULL;
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
 }
 
 JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
@@ -630,7 +696,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
         return;
     }
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
-    if (t->started && t->stkbuf == NULL)
+    if (t->ctx.started && t->ctx.stkbuf == NULL)
         jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
@@ -655,7 +721,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
     ptls->previous_task = NULL;
     assert(t != ct);
     assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
-    if (!t->sticky && !t->copy_stack)
+    if (!t->sticky && !t->ctx.copy_stack)
         jl_atomic_store_release(&t->tid, -1);
 #else
     assert(ptls == ct->ptls);
@@ -688,7 +754,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
     // NULL exception objects are used when rethrowing. we don't have a handler to process
     // the exception stack, so at least report the exception at the top of the stack.
     if (!e)
-        e = jl_current_exception();
+        e = jl_current_exception(ct);
 
     jl_printf((JL_STREAM*)STDERR_FILENO, "fatal: error thrown and no exception handler available.\n");
     jl_static_show((JL_STREAM*)STDERR_FILENO, e);
@@ -712,48 +778,31 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 #define pop_timings_stack() /* Nothing */
 #endif
 
-#define throw_internal_body(altstack)                                          \
-    assert(!jl_get_safe_restore());                                            \
-    jl_ptls_t ptls = ct->ptls;                                                 \
-    ptls->io_wait = 0;                                                         \
-    jl_gc_unsafe_enter(ptls);                                                  \
-    if (exception) {                                                           \
-        /* The temporary ptls->bt_data is rooted by special purpose code in the\
-           GC. This exists only for the purpose of preserving bt_data until we \
-           set ptls->bt_size=0 below. */                                       \
-        jl_push_excstack(&ct->excstack, exception,                             \
-                          ptls->bt_data, ptls->bt_size);                       \
-        ptls->bt_size = 0;                                                     \
-    }                                                                          \
-    assert(ct->excstack && ct->excstack->top);                                 \
-    jl_handler_t *eh = ct->eh;                                                 \
-    if (eh != NULL) {                                                          \
-        if (altstack) ptls->sig_exception = NULL;                              \
-        pop_timings_stack()                                                    \
-        asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
-        jl_longjmp(eh->eh_ctx, 1);                                             \
-    }                                                                          \
-    else {                                                                     \
-        jl_no_exc_handler(exception, ct);                                      \
-    }                                                                          \
-    assert(0);
-
 static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-CFI_NORETURN
     JL_GC_PUSH1(&exception);
-    throw_internal_body(0);
-    jl_unreachable();
-}
-
-/* On the signal stack, we don't want to create any asan frames, but we do on the
-   normal, stack, so we split this function in two, depending on which context
-   we're calling it in. This also lets us avoid making a GC frame on the altstack,
-   which might end up getting corrupted if we recur here through another signal. */
-JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
-{
-CFI_NORETURN
-    throw_internal_body(1);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->io_wait = 0;
+    jl_gc_unsafe_enter(ptls);
+    if (exception) {
+        /* The temporary ptls->bt_data is rooted by special purpose code in the\
+           GC. This exists only for the purpose of preserving bt_data until we
+           set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+    }
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        pop_timings_stack()
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp(eh->eh_ctx, 1);
+    }
+    else {
+        jl_no_exc_handler(exception, ct);
+    }
+    assert(0);
     jl_unreachable();
 }
 
@@ -783,24 +832,6 @@ JL_DLLEXPORT void jl_rethrow(void)
     throw_internal(ct, NULL);
 }
 
-// Special case throw for errors detected inside signal handlers.  This is not
-// (cannot be) called directly in the signal handler itself, but is returned to
-// after the signal handler exits.
-JL_DLLEXPORT JL_NO_ASAN void JL_NORETURN jl_sig_throw(void)
-{
-CFI_NORETURN
-    jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    jl_task_t *ct = jl_current_task;
-    if (safe_restore) {
-        asan_unpoison_task_stack(ct, safe_restore);
-        jl_longjmp(*safe_restore, 1);
-    }
-    jl_ptls_t ptls = ct->ptls;
-    jl_value_t *e = ptls->sig_exception;
-    JL_GC_PROMISE_ROOTED(e);
-    throw_internal_altstack(ct, e);
-}
-
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
@@ -855,153 +886,182 @@ The jl_rng_split function forks a task's RNG state in a way that is essentially
 guaranteed to avoid collisions between the RNG streams of all tasks. The main
 RNG is the xoshiro256++ RNG whose state is stored in rngState[0..3]. There is
 also a small internal RNG used for task forking stored in rngState[4]. This
-state is used to iterate a LCG (linear congruential generator), which is then
-put through four different variations of the strongest PCG output function,
-referred to as PCG-RXS-M-XS-64 [1]. This output function is invertible: it maps
-a 64-bit state to 64-bit output; which is one of the reasons it's not
-recommended for general purpose RNGs unless space is at a premium, but in our
-usage invertibility is actually a benefit, as is explained below.
+state is used to iterate a linear congruential generator (LCG), which is then
+combined with xoshiro256's state and put through four different variations of
+the strongest PCG output function, referred to as PCG-RXS-M-XS-64 [1].
 
 The goal of jl_rng_split is to perturb the state of each child task's RNG in
-such a way each that for an entire tree of tasks spawned starting with a given
-state in a root task, no two tasks have the same RNG state. Moreover, we want to
-do this in a way that is deterministic and repeatable based on (1) the root
-task's seed, (2) how many random numbers are generated, and (3) the task tree
-structure. The RNG state of a parent task is allowed to affect the initial RNG
-state of a child task, but the mere fact that a child was spawned should not
-alter the RNG output of the parent. This second requirement rules out using the
-main RNG to seed children -- some separate state must be maintained and changed
-upon forking a child task while leaving the main RNG state unchanged.
-
-The basic approach is that used by the DotMix [2] and SplitMix [3] RNG systems:
-each task is uniquely identified by a sequence of "pedigree" numbers, indicating
-where in the task tree it was spawned. This vector of pedigree coordinates is
-then reduced to a single value by computing a dot product with a common vector
-of random weights. The DotMix paper provides a proof that this dot product hash
-value (referred to as a "compression function") is collision resistant in the
-sense the the pairwise collision probability of two distinct tasks is 1/N where
-N is the number of possible weight values. Both DotMix and SplitMix use a prime
-value of N because the proof requires that the difference between two distinct
-pedigree coordinates must be invertible, which is guaranteed by N being prime.
-We take a different approach: we instead limit pedigree coordinates to being
-binary instead -- when a task spawns a child, both tasks share the same pedigree
-prefix, with the parent appending a zero and the child appending a one. This way
-a binary pedigree vector uniquely identifies each task. Moreover, since the
-coordinates are binary, the difference between coordinates is always one which
-is its own inverse regardless of whether N is prime or not. This allows us to
-compute the dot product modulo 2^64 using native machine arithmetic, which is
-considerably more efficient and simpler to implement than arithmetic in a prime
-modulus. It also means that when accumulating the dot product incrementally, as
-described in SplitMix, we don't need to multiply weights by anything, we simply
-add the random weight for the current task tree depth to the parent's dot
-product to derive the child's dot product.
-
-We use the LCG in rngState[4] to derive generate pseudorandom weights for the
-dot product. Each time a child is forked, we update the LCG in both parent and
-child tasks. In the parent, that's all we have to do -- the main RNG state
-remains unchanged (recall that spawning a child should *not* affect subsequence
-RNG draws in the parent). The next time the parent forks a child, the dot
-product weight used will be different, corresponding to being a level deeper in
-the binary task tree. In the child, we use the LCG state to generate four
-pseudorandom 64-bit weights (more below) and add each weight to one of the
-xoshiro256 state registers, rngState[0..3]. If we assume the main RNG remains
-unused in all tasks, then each register rngState[0..3] accumulates a different
-Dot/SplitMix dot product hash as additional child tasks are spawned. Each one is
-collision resistant with a pairwise collision chance of only 1/2^64. Assuming
-that the four pseudorandom 64-bit weight streams are sufficiently independent,
-the pairwise collision probability for distinct tasks is 1/2^256. If we somehow
-managed to spawn a trillion tasks, the probability of a collision would be on
-the order of 1/10^54. Practically impossible. Put another way, this is the same
-as the probability of two SHA256 hash values accidentally colliding, which we
-generally consider so unlikely as not to be worth worrying about.
-
-What about the random "junk" that's in the xoshiro256 state registers from
-normal use of the RNG? For a tree of tasks spawned with no intervening samples
-taken from the main RNG, all tasks start with the same junk which doesn't affect
-the chance of collision. The Dot/SplitMix papers even suggest adding a random
-base value to the dot product, so we can consider whatever happens to be in the
-xoshiro256 registers to be that. What if the main RNG gets used between task
-forks? In that case, the initial state registers will be different. The DotMix
-collision resistance proof doesn't apply without modification, but we can
-generalize the setup by adding a different base constant to each compression
-function and observe that we still have a 1/N chance of the weight value
-matching that exact difference. This proves collision resistance even between
-tasks whose dot product hashes are computed with arbitrary offsets. We can
-conclude that this scheme provides collision resistance even in the face of
-different starting states of the main RNG. Does this seem too good to be true?
-Perhaps another way of thinking about it will help. Suppose we seeded each task
-completely randomly. Then there would also be a 1/2^256 chance of collision,
-just as the DotMix proof gives. Essentially what the proof is telling us is that
-if the weights are chosen uniformly and uncorrelated with the rest of the
-compression function, then the dot product construction is a good enough way to
-pseudorandomly seed each task. From that perspective, it's easier to believe
-that adding an arbitrary constant to each seed doesn't worsen its randomness.
-
-This leaves us with the question of how to generate four pseudorandom weights to
-add to the rngState[0..3] registers at each depth of the task tree. The scheme
-used here is that a single 64-bit LCG state is iterated in both parent and child
-at each task fork, and four different variations of the PCG-RXS-M-XS-64 output
-function are applied to that state to generate four different pseudorandom
-weights. Another obvious way to generate four weights would be to iterate the
-LCG four times per task split. There are two main reasons we've chosen to use
-four output variants instead:
-
-1. Advancing four times per fork reduces the set of possible weights that each
-   register can be perturbed by from 2^64 to 2^60. Since collision resistance is
-   proportional to the number of possible weight values, that would reduce
-   collision resistance.
-
-2. It's easier to compute four PCG output variants in parallel. Iterating the
-   LCG is inherently sequential. Each PCG variant can be computed independently
-   from the LCG state. All four can even be computed at once with SIMD vector
-   instructions, but the compiler doesn't currently choose to do that.
-
-A key question is whether the approach of using four variations of PCG-RXS-M-XS
-is sufficiently random both within and between streams to provide the collision
-resistance we expect. We obviously can't test that with 256 bits, but we have
-tested it with a reduced state analogue using four PCG-RXS-M-XS-8 output
-variations applied to a common 8-bit LCG. Test results do indicate sufficient
-independence: a single register has collisions at 2^5 while four registers only
-start having collisions at 2^20, which is actually better scaling of collision
-resistance than we expect in theory. In theory, with one byte of resistance we
-have a 50% chance of some collision at 20, which matches, but four bytes gives a
-50% chance of collision at 2^17 and our (reduced size analogue) construction is
-still collision free at 2^19. This may be due to the next observation, which guarantees collision avoidance for certain shapes of task trees as a result of using an
-invertible RNG to generate weights.
-
-In the specific case where a parent task spawns a sequence of child tasks with
-no intervening usage of its main RNG, the parent and child tasks are actually
-_guaranteed_ to have different RNG states. This is true because the four PCG
-streams each produce every possible 2^64 bit output exactly once in the full
-2^64 period of the LCG generator. This is considered a weakness of PCG-RXS-M-XS
-when used as a general purpose RNG, but is quite beneficial in this application.
-Since each of up to 2^64 children will be perturbed by different weights, they
-cannot have hash collisions. What about parent colliding with child? That can
-only happen if all four main RNG registers are perturbed by exactly zero. This
-seems unlikely, but could it occur? Consider this part of each output function:
-
-    p ^= p >> ((p >> 59) + 5);
-    p *= m[i];
-    p ^= p >> 43
-
-It's easy to check that this maps zero to zero. An unchanged parent RNG can only
-happen if all four `p` values are zero at the end of this, which implies that
-they were all zero at the beginning. However, that is impossible since the four
-`p` values differ from `x` by different additive constants, so they cannot all
-be zero. Stated more generally, this non-collision property: assuming the main
-RNG isn't used between task forks, sibling and parent tasks cannot have RNG
-collisions. If the task tree structure is more deeply nested or if there are
-intervening uses of the main RNG, we're back to relying on "merely" 256 bits of
-collision resistance, but it's nice to know that in what is likely the most
-common case, RNG collisions are actually impossible. This fact may also explain
-better-than-theoretical collision resistance observed in our experiment with a
-reduced size analogue of our hashing system.
+such a way that for an entire tree of tasks spawned starting with a given root
+task state, no two tasks have the same RNG state. Moreover, we want to do this
+in a way that is deterministic and repeatable based on (1) the root task's seed,
+(2) how many random numbers are generated, and (3) the task tree structure. The
+RNG state of a parent task is allowed to affect the initial RNG state of a child
+task, but the mere fact that a child was spawned should not alter the RNG output
+of the parent. This second requirement rules out using the main RNG to seed
+children: if we use the main RNG, we either advance it, which affects the
+parent's RNG stream or, if we don't advance it, then every child would have an
+identical RNG stream. Therefore some separate state must be maintained and
+changed upon forking a child task while leaving the main RNG state unchanged.
+
+The basic approach is a generalization and simplification of that used in the
+DotMix [2] and SplitMix [3] RNG systems: each task is uniquely identified by a
+sequence of "pedigree" numbers, indicating where in the task tree it was
+spawned. This vector of pedigree coordinates is then reduced to a single value
+by computing a "dot product" with a shared vector of random weights. I write
+"dot product" in quotes because what we use is not an actual dot product. The
+linear dot product construction used in both DotMix and SplitMix was found by
+@foobar_iv2 [4] to allow easy construction of linear relationships between the
+main RNG states of tasks, which was in turn reflected in observable linear
+relationships between the outputs of their RNGs. This relationship was between a
+minimum of four tasks, so doesn't constitute a collision, per se, but is clearly
+undesirable and highlights a hazard of the plain dot product construction.
+
+As in DotMix and SplitMix, each task is assigned unique task "pedigree"
+coordinates. Our pedigree construction is a bit different and uses only binary
+coordinates rather than arbitrary integers. Each pedigree is an infinite
+sequence of ones and zeros with only finitely many ones. Each task has a "fork
+index": the root task has index 0; the fork index of the jth child task of a
+parent task with fork index i is i+j. The root task's coordinates are all zeros;
+each child task's coordinates are the same as its parents except at its fork
+index, where the parent has a zero while the child has a one; each task's
+coordinates after its fork index are all zeros. The last common ancestor of two
+tasks has coordinates that are the longest common prefix of their coordinates.
+
+Also as in DotMix and SplitMix, we generate a sequence of pseudorandom "weights"
+to combine with the coordinates of each task. This sequence is common across all
+tasks, and different mix values for tasks stem entirely from task coordinates
+being different. In DotMix and SplitMix the mix function is a literal dot
+product: the pseudorandom weights are multiplied by corresponding task
+coordinate and summed. While this does provably make collisions as unlikely as
+random seeding, this linear construction can be used to create linearly
+correlated states between more than two tasks. However, it turns out that the
+compression mixing construction need not be linear, nor commutative, nor
+associative. In fact, the mixing function need only be bijective in both
+arguments. This allows us to use a much more non-trivial mixing function and
+avoid any linear or other obvious correlations between related sets of tasks.
+
+We maintain an LCG in rngState[4] to generate pseudorandom weights. An LCG by
+itself is a very bad RNG, but we combine this one with xoshiro256 state
+registers in a non-trivial way and then apply the PCG-RXS-M-XS-64 output
+function to that. Even if the xoshiro256 states are all zeros, which they should
+never be, the output would be the same as PCG-RXS-M-XS-64, which is a solid
+statistical RNG. Each time a child is forked, we update the LCG in both parent
+and child tasks, corresponding to increasing the fork index. In the parent,
+that's all we have to do -- the main RNG state remains unchanged. Recall that
+spawning a child should not affect subsequent RNG draws in the parent. The next
+time the parent forks a child, the mixing weight used will be different. In the
+child, we use the LCG state to perturb the child's main RNG state registers,
+rngState[0..3].
+
+To generalize SplitMix's optimized dot product construction, we also compute
+each task's compression function value incrementally by combining the parent's
+compression value with pseudorandom weight corresponding with the child's fork
+index. Formally, if the parent's compression value is c then we can compute the
+child's compression value as c′ = f(c, wᵢ) where w is the vector of pseudorandom
+weights. What is f? It can be any function that is bijective in each argument
+for all values of the other argument:
+
+    * For all c: w ↦ f(c, w) is bijective
+    * For all w: c ↦ f(c, w) is bijective
+
+The proof that these requirements are sufficient to ensure collision resistance
+is in the linked discussion [4]. DotMix/SplitMix are a special case where f is
+just addition. Instead we use a much less simple mixing function:
+
+    1. We use (2c+1)(2w+1)÷2 % 2^64 to mix the bits of c and w
+    2. We then apply the PCG-RXS-M-XS-64 output function
+
+The first step thoroughly mixes the bits of the previous compression value and
+the pseudorandom weight value using multiplication, which is non-commutative
+with xoshiro's operations (xor, shift, rotate). This mixing function is a
+bijection on each argument witnessed by these inverses:
+
+    * c′ ↦ (2c′+1)(2w+1)⁻¹÷2 % 2^64
+    * w′ ↦ (2c+1)⁻¹(2w′+1)÷2 % 2^64
+
+Here (2w+1)⁻¹ is the modular inverse of (2w+1) mod 2^64, guaranteed to exist
+since 2w+1 is odd. The second PCG output step is a bijection and designed to be
+significantly non-linear -- non-linear enough to mask the linearity of the LCG
+that drives the PCG-RXS-M-XS-64 RNG and allows it to pass statistical RNG test
+suites despite having the same size state and output. In particular, since this
+mixing function is highly non-associative and non-linear, we (hopefully) don't
+have any discernible relationship between these values:
+
+    * c₀₀ = c
+    * c₁₀ = f(c, wᵢ)
+    * c₀₁ = f(c, wⱼ)
+    * c₁₁ = f(f(c, wᵢ), wⱼ)
+
+When f is simply `+` then these have a very obvious linear relationship:
+
+    c₀₀ + c₁₁ == c₁₀ + c₀₁
+
+This relationship holds regardless of what wᵢ and wⱼ are and allows easy
+creation of correlated tasks with the way we were previously using the
+DotMix/SplitMix construction. SplitMix itself does not output the raw dot
+product, probably because the authors were aware of this linearity issue;
+instead: they apply the MurmurHash3 finalizer to the dot-product to get an
+output that masks linear relationships. I had failed to understand the
+importance of that finalizer. One possible fix for our task splitting
+correlation issue would have been to also apply a non-linear finalizer
+(MurmurHash3 is one of the best) to our dot product before using it to perturb
+the xoshiro256 state. There are two problems with that fix, however:
+
+1. It requires accumulating the dot product somewhere. The old approach
+   accumulates dot products directly in the xoshiro registers; if we were to
+   accumulate and then finalize, the dot product has to be stored somewhere
+   in each task. We want our tasks to be as small as possible, so adding
+   another 64-bit field that we never change would be unfortunate.
+
+2. We still need to apply the PCG finalizer to the internal LCG in order to
+   generate dot product weights. SplitMix uses a shared static array of
+   1024 pre-generated random weights; we could do the same, but that limits
+   the number of task splits to a max of 1024 before weights have to be
+   reused. We can't use the LCG directly because it's highly linear and we
+   need four variations of the internal RNG stream for the four xoshiro256
+   registers. That means we'd have to apply the PCG finalizer, add it to
+   our dot product accumulator field in the child task, then apply the
+   MurmurHash3 finalizer to that dot product and use the result to purturb
+   the main RNG state.
+
+We avoid both problems by recognizing that the mixing function can be much less
+simple while still allowing the essential collision resistance proof to go
+through. We replace addition with a highly non-linear, non-associative mixing
+function that includes the PCG output function. This allows us to continue to use
+the xoshiro state registers for mixing function accumulation as well as for its
+primary purpose. It also obviates the need for double finalization: it would
+have been disastrous to use LCG state directly as weights for a linear
+construction like SplitMix, but using it as the input to a non-linear mixer that
+includes the strongest PCG output function is reasonable (and precisely what
+PCG-RXS-M-XS-64 does). Since the output of the mixing function is already
+non-linearly finalized, there's no need to apply yet another finalizer.
+
+Since there are four xoshiro256 registers that we want to behave independently
+as mix accumulators, we use four different variations on the mixing function,
+keyed by register index (0-3). Each variation first xors the LCG state with a
+different random constant before combining that value above with the old
+register state via multiplication. The PCG-RXS-M-XS-64 output function is then
+applied to that mixed state, with a different multiplier constant for each
+variation / register index. Xor is used in the first step since we multiply the
+result with the state immediately after and multiplication distributes over `+`
+and commutes with `*`, making both suspect options. Multiplication doesn't
+distribute over or commute with xor. We also use a different odd multiplier in
+PCG-RXS-M-XS-64 for each RNG register. These four sources of variation
+(different initial state, different xor constants, different xoshiro256 state,
+different PCG multipliers) are hopefully sufficient for each of the four outputs
+to behave statistically independently, in the sense that even if two different
+tasks happen to have a state collision in one 64-bit register, it is highly
+improbable that all four registers collide at the same time, giving an actual
+main RNG state collision.
 
 [1]: https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf
 
 [2]: http://supertech.csail.mit.edu/papers/dprng.pdf
 
 [3]: https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
+
+[4]:
+https://discourse.julialang.org/t/linear-relationship-between-xoshiro-tasks/110454
 */
 void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT
 {
@@ -1010,26 +1070,30 @@ void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSA
     src[4] = dst[4] = x * 0xd1342543de82ef95 + 1;
     // high spectrum multiplier from https://arxiv.org/abs/2001.05304
 
+    // random xor constants
     static const uint64_t a[4] = {
-        0xe5f8fa077b92a8a8, // random additive offsets...
-        0x7a0cd918958c124d,
-        0x86222f7d388588d4,
-        0xd30cbd35f2b64f52
+        0x214c146c88e47cb7,
+        0xa66d8cc21285aafa,
+        0x68c7ef2d7b1a54d4,
+        0xb053a7d7aa238c61
     };
+    // random odd multipliers
     static const uint64_t m[4] = {
         0xaef17502108ef2d9, // standard PCG multiplier
-        0xf34026eeb86766af, // random odd multipliers...
+        0xf34026eeb86766af,
         0x38fd70ad58dd9fbb,
         0x6677f9b93ab0c04d
     };
 
-    // PCG-RXS-M-XS output with four variants
+    // PCG-RXS-M-XS-64 output with four variants
     for (int i = 0; i < 4; i++) {
-        uint64_t p = x + a[i];
-        p ^= p >> ((p >> 59) + 5);
-        p *= m[i];
-        p ^= p >> 43;
-        dst[i] = src[i] + p; // SplitMix dot product
+        uint64_t c = src[i];
+        uint64_t w = x ^ a[i];
+        c += w*(2*c + 1); // c = (2c+1)(2w+1)÷2 % 2^64 (double bijection)
+        c ^= c >> ((c >> 59) + 5);
+        c *= m[i];
+        c ^= c >> 43;
+        dst[i] = c;
     }
 }
 
@@ -1039,26 +1103,28 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
     jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
-    t->copy_stack = 0;
+    t->ctx.copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
         if (always_copy_stacks) {
-            t->copy_stack = 1;
-            t->bufsz = 0;
+            t->ctx.copy_stack = 1;
+            t->ctx.bufsz = 0;
         }
         else {
-            t->bufsz = JL_STACK_SIZE;
+            t->ctx.bufsz = JL_STACK_SIZE;
         }
-        t->stkbuf = NULL;
+        t->ctx.stkbuf = NULL;
     }
     else {
         // user requested dedicated stack of a certain size
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
-        t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-        if (t->stkbuf == NULL)
+        t->ctx.bufsz = ssize;
+        t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.bufsz = 0;
             jl_throw(jl_memory_exception);
+        }
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
@@ -1068,8 +1134,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->result = jl_nothing;
     t->donenotify = completion_future;
     jl_atomic_store_relaxed(&t->_isexception, 0);
-    // Inherit logger state from parent task
-    t->logstate = ct->logstate;
+    // Inherit scope from parent task
+    t->scope = ct->scope;
     // Fork task-local random state from parent
     jl_rng_split(t->rngState, ct->rngState);
     // there is no active exception handler available on this stack yet
@@ -1077,30 +1143,26 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->sticky = 1;
     t->gcstack = NULL;
     t->excstack = NULL;
-    t->started = 0;
+    t->ctx.started = 0;
     t->priority = 0;
-    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
+    jl_atomic_store_relaxed(&t->tid, -1);
     t->threadpoolid = ct->threadpoolid;
     t->ptls = NULL;
     t->world_age = ct->world_age;
     t->reentrant_timing = 0;
+    t->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0;
+    jl_atomic_store_relaxed(&t->first_enqueued_at, 0);
+    jl_atomic_store_relaxed(&t->last_started_running_at, 0);
+    jl_atomic_store_relaxed(&t->running_time_ns, 0);
+    jl_atomic_store_relaxed(&t->finished_at, 0);
     jl_timing_task_init(t);
 
-#ifdef COPY_STACKS
-    if (!t->copy_stack) {
-#if defined(JL_DEBUG_BUILD)
-        memset(&t->ctx, 0, sizeof(t->ctx));
-#endif
-    }
-    else {
-        if (always_copy_stacks)
-            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-        else
-            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
-    }
-#endif
+    if (t->ctx.copy_stack)
+        t->ctx.copy_ctx = NULL;
+    else
+        t->ctx.ctx = NULL;
 #ifdef _COMPILER_TSAN_ENABLED_
-    t->ctx.tsan_state = __tsan_create_fiber(0);
+    t->ctx.tsan_state = NULL;
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
     t->ctx.asan_fake_stack = NULL;
@@ -1115,47 +1177,6 @@ JL_DLLEXPORT jl_task_t *jl_get_current_task(void)
     return pgcstack == NULL ? NULL : container_of(pgcstack, jl_task_t, gcstack);
 }
 
-
-#ifdef JL_HAVE_ASYNCIFY
-JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t)
-{
-    return &t->ctx.ctx;
-}
-
-JL_DLLEXPORT jl_value_t *jl_get_root_task(void)
-{
-    jl_task_t *ct = jl_current_task;
-    return (jl_value_t*)ct->ptls->root_task;
-}
-
-JL_DLLEXPORT void jl_task_wait()
-{
-    static jl_function_t *wait_func = NULL;
-    if (!wait_func) {
-        wait_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("wait"));
-    }
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
-    ct->world_age = jl_get_world_counter();
-    jl_apply(&wait_func, 1);
-    ct->world_age = last_age;
-}
-
-JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
-{
-    static jl_function_t *sched_func = NULL;
-    if (!sched_func) {
-        sched_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("schedule"));
-    }
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
-    ct->world_age = jl_get_world_counter();
-    jl_value_t *args[] = {(jl_value_t*)sched_func, (jl_value_t*)task};
-    jl_apply(args, 2);
-    ct->world_age = last_age;
-}
-#endif
-
 // Do one-time initializations for task system
 void jl_init_tasks(void) JL_GC_DISABLED
 {
@@ -1176,13 +1197,24 @@ void jl_init_tasks(void) JL_GC_DISABLED
         exit(1);
     }
 #endif
+#if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
+    void *libc_handle = dlopen("libc.so.6", RTLD_NOW | RTLD_NOLOAD);
+    if (libc_handle) {
+        *(void**)&real_siglongjmp = dlsym(libc_handle, "siglongjmp");
+        dlclose(libc_handle);
+    }
+    if (real_siglongjmp == NULL) {
+        jl_safe_printf("failed to get real siglongjmp\n");
+        exit(1);
+    }
+#endif
 }
 
 #if defined(_COMPILER_ASAN_ENABLED_)
-STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void);
+static void NOINLINE JL_NORETURN _start_task(void);
 #endif
 
-STATIC_OR_JS void NOINLINE JL_NORETURN JL_NO_ASAN start_task(void)
+static void NOINLINE JL_NORETURN JL_NO_ASAN start_task(void)
 {
 CFI_NORETURN
 #if defined(_COMPILER_ASAN_ENABLED_)
@@ -1194,11 +1226,11 @@ CFI_NORETURN
     jl_task_t *ct = jl_current_task;
 #endif
     jl_ptls_t ptls = ct->ptls;
-    sanitizer_finish_switch_fiber(ptls->previous_task, ct);
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx);
     _start_task();
 }
 
-STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void)
+static void NOINLINE JL_NORETURN _start_task(void)
 {
 CFI_NORETURN
 #endif
@@ -1208,6 +1240,7 @@ CFI_NORETURN
 #else
     jl_task_t *ct = jl_current_task;
 #endif
+    ct->ctx.ctx = NULL;
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
     assert(ptls->finalizers_inhibited == 0);
@@ -1215,16 +1248,22 @@ CFI_NORETURN
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
     ptls->previous_task = NULL;
-    if (!pt->sticky && !pt->copy_stack)
+    if (!pt->sticky && !pt->ctx.copy_stack)
         jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    ct->started = 1;
+    ct->ctx.started = 1;
+    if (ct->metrics_enabled) {
+        // [task] wait_time -started-> user_time
+        assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0);
+        assert(jl_atomic_load_relaxed(&ct->last_started_running_at) == 0);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, jl_hrtime());
+    }
     JL_PROBE_RT_START_TASK(ct);
     jl_timing_block_task_enter(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
-        jl_push_excstack(&ct->excstack, ct->result,
+        jl_push_excstack(ct, &ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
         res = ct->result;
     }
@@ -1238,7 +1277,7 @@ CFI_NORETURN
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
-            res = jl_current_exception();
+            res = jl_current_exception(ct);
             jl_atomic_store_relaxed(&ct->_isexception, 1);
             goto skip_pop_exception;
         }
@@ -1256,64 +1295,52 @@ skip_pop_exception:;
 #ifdef _OS_WINDOWS_
 #define setcontext jl_setcontext
 #define swapcontext jl_swapcontext
-#define makecontext jl_makecontext
 #endif
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static int make_fiber(jl_ucontext_t *t, _jl_ucontext_t *ctx)
 {
 #ifndef _OS_WINDOWS_
-    int r = getcontext(t);
-    if (r != 0)
-        jl_error("getcontext failed");
+    int r = getcontext(ctx);
+    if (r != 0) abort();
 #endif
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->uc_stack.ss_sp = stk;
-    t->uc_stack.ss_size = *ssize;
+    ctx->uc_stack.ss_sp = (char*)t->stkbuf;
+    ctx->uc_stack.ss_size = t->bufsz;
 #ifdef _OS_WINDOWS_
-    makecontext(t, &start_task);
+    jl_makecontext(ctx, &start_task);
 #else
-    t->uc_link = NULL;
-    makecontext(t, &start_task, 0);
+    ctx->uc_link = NULL;
+    makecontext(ctx, &start_task, 0);
 #endif
-    return (char*)stk;
+    return 1;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
+    setcontext(&ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
     assert(lastt);
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, &ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
-}
-#endif
-
-#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-#ifndef __clang_gcanalyzer__
-    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
-    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
-#endif
-    return stkbuf;
+    setcontext(t->ctx);
 }
 #endif
 
 #if defined(JL_HAVE_UNW_CONTEXT)
+#ifdef _OS_WINDOWS_
+#error unw_context_t not defined in Windows
+#endif
 static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 {
     volatile int returns = 0;
@@ -1327,15 +1354,15 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
     unw_resume(&c);
@@ -1343,14 +1370,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 #elif defined(JL_HAVE_ASM)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
     tsan_switch_to_ctx(t);
     jl_set_fiber(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx->uc_mcontext, 1);
 }
 #endif
 
@@ -1371,14 +1398,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
-    int r = unw_getcontext(&t->ctx);
+    int r = unw_getcontext(t->ctx);
     if (r)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
 #if defined __linux__
@@ -1394,43 +1421,46 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-    r = unw_getcontext(&t->ctx);
+    r = unw_getcontext(t->ctx);
     if (r != 0)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
     if (unw_set_reg(&c, UNW_REG_IP, fn))
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 #endif
 
 #if defined(JL_HAVE_ASM)
+#ifdef _OS_WINDOWS_
+#error JL_HAVE_ASM not defined in Windows
+#endif
 JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
 #ifdef JL_HAVE_UNW_CONTEXT
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #else
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #endif
     tsan_switch_to_ctx(t);
@@ -1438,8 +1468,9 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *
 }
 JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+CFI_NORETURN
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1478,6 +1509,14 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
                     // because all our addresses are word-aligned.
         " udf #0" // abort
         : : "r" (stk), "r"(fn) : "memory" );
+#elif defined(_CPU_RISCV64_)
+    asm volatile(
+        " mv sp, %0;\n"
+        " mv ra, zero;\n" // Clear return address register
+        " mv fp, zero;\n" // Clear frame pointer
+        " jr %1;\n" // call `fn` with fake stack frame
+        " ebreak" // abort
+        : : "r"(stk), "r"(fn) : "memory" );
 #elif defined(_CPU_PPC64_)
     // N.B.: There is two iterations of the PPC64 ABI.
     // v2 is current and used here. Make sure you have the
@@ -1508,115 +1547,6 @@ JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 }
 #endif
 
-#if defined(JL_HAVE_SIGALTSTACK)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static void start_basefiber(int sig)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
-        start_task(); // sanitizer_finish_switch_fiber is part of start_task
-}
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    stack_t uc_stack, osigstk;
-    struct sigaction sa, osa;
-    sigset_t set, oset;
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    // setup
-    jl_ptls_t ptls = jl_current_task->ptls;
-    _jl_ucontext_t base_ctx;
-    memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
-    sigfillset(&set);
-    if (pthread_sigmask(SIG_BLOCK, &set, &oset) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    uc_stack.ss_sp = stk;
-    uc_stack.ss_size = *ssize;
-    uc_stack.ss_flags = 0;
-    if (sigaltstack(&uc_stack, &osigstk) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    memset(&sa, 0, sizeof(sa));
-    sigemptyset(&sa.sa_mask);
-    sa.sa_handler = start_basefiber;
-    sa.sa_flags = SA_ONSTACK;
-    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    // emit signal
-    pthread_kill(pthread_self(), SIGUSR2); // initializes jl_basectx
-    sigdelset(&set, SIGUSR2);
-    sigsuspend(&set);
-    // cleanup
-    if (sigaction(SIGUSR2, &osa, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaction failed");
-    }
-    if (osigstk.ss_size < MINSTKSZ && (osigstk.ss_flags | SS_DISABLE))
-       osigstk.ss_size = MINSTKSZ;
-    if (sigaltstack(&osigstk, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("sigaltstack failed");
-    }
-    if (pthread_sigmask(SIG_SETMASK, &oset, NULL) != 0) {
-       jl_free_stack(stk, *ssize);
-       jl_error("pthread_sigmask failed");
-    }
-    if (&ptls->base_ctx != t) {
-        memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
-        memcpy(&ptls->base_ctx, &base_ctx, sizeof(base_ctx)); // restore COPY_STACKS context
-    }
-    return (char*)stk;
-}
-static void jl_start_fiber_set(jl_ucontext_t *t) {
-    jl_longjmp(t->ctx.uc_mcontext, 1); // (doesn't return)
-}
-static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    assert(lastt);
-    if (lastt && jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t);
-}
-static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t); // doesn't return
-}
-static void jl_set_fiber(jl_ucontext_t *t)
-{
-    jl_longjmp(t->ctx.uc_mcontext, 1);
-}
-#endif
-
-#if defined(JL_HAVE_ASYNCIFY)
-#if defined(_COMPILER_TSAN_ENABLED_)
-#error TSAN support not currently implemented for this tasking model
-#endif
-
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
-{
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->stackbottom = stk;
-    t->stacktop = ((char*)stk) + *ssize;
-    return (char*)stk;
-}
-// jl_*_fiber implemented in js
-#endif
-
 // Initialize a root task using the given stack.
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
@@ -1646,14 +1576,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ct->copy_stack = 1;
-        ct->stkbuf = NULL;
-        ct->bufsz = 0;
+        ct->ctx.copy_stack = 1;
+        ct->ctx.stkbuf = NULL;
+        ct->ctx.bufsz = 0;
     }
     else {
-        ct->copy_stack = 0;
-        ct->stkbuf = stack;
-        ct->bufsz = ssize;
+        ct->ctx.copy_stack = 0;
+        ct->ctx.stkbuf = stack;
+        ct->ctx.bufsz = ssize;
     }
 
 #ifdef USE_TRACY
@@ -1661,7 +1591,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     strcpy(unique_string, "Root");
     ct->name = unique_string;
 #endif
-    ct->started = 1;
+    ct->ctx.started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
     ct->tls = jl_nothing;
@@ -1670,7 +1600,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->result = jl_nothing;
     ct->donenotify = jl_nothing;
     jl_atomic_store_relaxed(&ct->_isexception, 0);
-    ct->logstate = jl_nothing;
+    ct->scope = jl_nothing;
     ct->eh = NULL;
     ct->gcstack = NULL;
     ct->excstack = NULL;
@@ -1680,11 +1610,25 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->ptls = ptls;
     ct->world_age = 1; // OK to run Julia code on this task
     ct->reentrant_timing = 0;
+    jl_atomic_store_relaxed(&ct->running_time_ns, 0);
+    jl_atomic_store_relaxed(&ct->finished_at, 0);
+    ct->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0;
+    if (ct->metrics_enabled) {
+        // [task] created -started-> user_time
+        uint64_t now = jl_hrtime();
+        jl_atomic_store_relaxed(&ct->first_enqueued_at, now);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, now);
+    }
+    else {
+        jl_atomic_store_relaxed(&ct->first_enqueued_at, 0);
+        jl_atomic_store_relaxed(&ct->last_started_running_at, 0);
+    }
     ptls->root_task = ct;
     jl_atomic_store_relaxed(&ptls->current_task, ct);
     JL_GC_PROMISE_ROOTED(ct);
     jl_set_pgcstack(&ct->gcstack);
     assert(jl_current_task == ct);
+    assert(jl_current_task->ptls == ptls);
 
 #ifdef _COMPILER_TSAN_ENABLED_
     ct->ctx.tsan_state = __tsan_get_current_fiber();
@@ -1700,21 +1644,18 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (always_copy_stacks) {
         // when this is set, we will attempt to corrupt the process stack to switch tasks,
         // although this is unreliable, and thus not recommended
-        ptls->stackbase = stack_hi;
-        ptls->stacksize = ssize;
-#ifdef _OS_WINDOWS_
-        ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
-        ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
-#endif
-        if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
-            start_task(); // sanitizer_finish_switch_fiber is part of start_task
+        ptls->stackbase = jl_get_frame_addr();
+        ptls->stacksize =  (char*)ptls->stackbase - (char*)stack_lo;
     }
     else {
-        ssize = JL_STACK_SIZE;
-        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        size_t bufsz = JL_STACK_SIZE;
+        void *stkbuf = jl_malloc_stack(&bufsz, NULL);
         if (stkbuf != NULL) {
-            ptls->stackbase = stkbuf + ssize;
-            ptls->stacksize = ssize;
+            ptls->stackbase = (char*)stkbuf + bufsz;
+            ptls->stacksize = bufsz;
+        }
+        else {
+            ptls->stacksize = 0;
         }
     }
 #endif
@@ -1727,7 +1668,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->started;
+    return t->ctx.started;
 }
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
diff --git a/src/threading.c b/src/threading.c
index e2eb686e3061a..77956786af3f4 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -18,7 +18,7 @@
 // For variant 1 JL_ELF_TLS_INIT_SIZE is the size of the thread control block (TCB)
 // For variant 2 JL_ELF_TLS_INIT_SIZE is 0
 #if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-#  if defined(_CPU_X86_64_) || defined(_CPU_X86_)
+#  if defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_RISCV64_)
 #    define JL_ELF_TLS_VARIANT 2
 #    define JL_ELF_TLS_INIT_SIZE 0
 #  elif defined(_CPU_AARCH64_)
@@ -49,6 +49,8 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
 JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
 JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;
 
+JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled = 0;
+
 JL_DLLEXPORT void *jl_get_ptls_states(void)
 {
     // mostly deprecated: use current_task instead
@@ -74,6 +76,16 @@ JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
 
 JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 {
+#ifdef _OS_DARWIN_
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL && ct->ptls) {
+        if (sr == NULL)
+            pthread_setspecific(jl_safe_restore_key, (void*)sr);
+        ct->ptls->safe_restore = sr;
+        if (sr == NULL)
+            return;
+    }
+#endif
     pthread_setspecific(jl_safe_restore_key, (void*)sr);
 }
 #endif
@@ -82,51 +94,17 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
-// lower level API) (Mac, Windows), we use the platform runtime API to create
+// lower level API) (Windows), we use the platform runtime API to create
 // TLS variable directly.
 // This is functionally equivalent to using `__thread` but can be
 // more efficient since we can have better control over the creation and
 // initialization of the TLS buffer.
 //
-// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable
+// On platforms that support native TLS (ELF platforms + Macos) we use a `__thread` variable
 // as the fallback in the shared object. For better efficiency, we also
 // create a `__thread` variable in the main executable using a static TLS
 // model.
-#if defined(_OS_DARWIN_)
-// Mac doesn't seem to have static TLS model so the runtime TLS getter
-// registration will only add overhead to TLS access. The `__thread` variables
-// are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_pgcstack_key;
-
-__attribute__((constructor)) void jl_init_tls(void)
-{
-    pthread_key_create(&jl_pgcstack_key, NULL);
-}
-
-JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
-{
-    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
-}
-
-void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
-{
-    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
-}
-
-void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
-{
-    // for codegen
-    *f = pthread_getspecific;
-    *k = jl_pgcstack_key;
-}
-
-
-JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
-{
-    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
-}
-
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
@@ -314,6 +292,9 @@ static uv_mutex_t tls_lock; // controls write-access to these variables:
 _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED;
 int jl_all_tls_states_size;
 static uv_cond_t cond;
+// concurrent reads are permitted, using the same pattern as mtsmall_arraylist
+// it is implemented separately because the API of direct jl_all_tls_states use is already widely prevalent
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 
 // return calling thread's ID
 JL_DLLEXPORT int16_t jl_threadid(void)
@@ -332,7 +313,19 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
         if (tid < n)
             return (int8_t)i;
     }
-    return 0; // everything else uses threadpool 0 (though does not become part of any threadpool)
+    return -1; // everything else uses threadpool -1 (does not belong to any threadpool)
+}
+
+// get thread local rng
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT
+{
+    return jl_current_task->ptls->rngseed;
+}
+
+// get thread local rng
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT
+{
+    jl_current_task->ptls->rngseed = new_seed;
 }
 
 jl_ptls_t jl_init_threadtls(int16_t tid)
@@ -347,7 +340,7 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
 #ifndef _OS_WINDOWS_
     pthread_setspecific(jl_task_exit_key, (void*)ptls);
 #endif
-    ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
+    ptls->system_id = uv_thread_self();
     ptls->rngseed = jl_rand();
     if (tid == 0)
         ptls->disable_gc = 1;
@@ -361,15 +354,15 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
         }
     }
 #endif
-    jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
+    jl_atomic_store_relaxed(&ptls->gc_state, JL_GC_STATE_UNSAFE); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
     if (tid == 0) {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size);
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
     }
     else {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
-                                    sizeof(size_t));
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
+                                sizeof(size_t)));
     }
     jl_bt_element_t *bt_data = (jl_bt_element_t*)
         malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
@@ -377,15 +370,13 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     ptls->bt_data = bt_data;
     small_arraylist_new(&ptls->locks, 0);
     jl_init_thread_heap(ptls);
-
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
+    jl_init_thread_scheduler(ptls);
 
     uv_mutex_lock(&tls_lock);
-    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     if (tid == -1)
         tid = jl_atomic_load_relaxed(&jl_n_threads);
     ptls->tid = tid;
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     if (jl_all_tls_states_size <= tid) {
         int i, newsize = jl_all_tls_states_size + tid + 2;
         jl_ptls_t *newpptls = (jl_ptls_t*)calloc(newsize, sizeof(jl_ptls_t));
@@ -403,18 +394,44 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     jl_fence();
     uv_mutex_unlock(&tls_lock);
 
+#if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND)
+    // ensures libunwind TLS space for this thread is allocated eagerly
+    // to make unwinding async-signal-safe even when using thread local caches.
+    unw_ensure_tls();
+#endif
+
     return ptls;
 }
 
+static _Atomic(jl_function_t*) init_task_lock_func JL_GLOBALLY_ROOTED = NULL;
+
+static void jl_init_task_lock(jl_task_t *ct)
+{
+    jl_function_t *done = jl_atomic_load_relaxed(&init_task_lock_func);
+    if (done == NULL) {
+        done = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("init_task_lock"));
+        if (done != NULL)
+            jl_atomic_store_release(&init_task_lock_func, done);
+    }
+    if (done != NULL) {
+        jl_value_t *args[2] = {done, (jl_value_t*)ct};
+        JL_TRY {
+            jl_apply(args, 2);
+        }
+        JL_CATCH {
+            jl_no_exc_handler(jl_current_exception(ct), ct);
+        }
+    }
+}
+
+
 JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
 {
     // `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running.
     // we can't use a normal safepoint because we don't have signal handlers yet.
-    // we also can't use jl_safepoint_wait_gc because that assumes we're in a task.
     jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
-    while (jl_atomic_load_acquire(&jl_gc_running)) {
-        jl_cpu_pause();
-    }
+    // pass NULL as a special token to indicate we are running on an unmanaged task
+    jl_safepoint_wait_gc(NULL);
     // this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a
     // foreign thread has asked to disable the GC, guaranteeing the order of events.
 
@@ -428,10 +445,38 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
     JL_GC_PROMISE_ROOTED(ct);
     uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL);
     jl_atomic_fetch_add(&jl_gc_disable_counter, -1);
+    ct->world_age = jl_get_world_counter(); // root_task sets world_age to 1
+    jl_init_task_lock(ct);
     return &ct->gcstack;
 }
 
+
+void jl_safepoint_suspend_all_threads(jl_task_t *ct)
+{
+    // TODO: prevent jl_n_threads changing or jl_safepoint_resume_thread calls on another thread
+    //uv_mutex_lock(&tls_lock);
+    //disallow_resume = ct->tid;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_suspend_thread(tid, 1);
+    };
+}
+
+void jl_safepoint_resume_all_threads(jl_task_t *ct)
+{
+    //uv_mutex_lock(&tls_lock);
+    //if (disallow_resume != ct->tid) return;
+    //uv_mutex_unlock(&tls_lock);
+    for (int16_t tid = 0; tid < jl_atomic_load_relaxed(&jl_n_threads); tid++) {
+        if (tid != jl_atomic_load_relaxed(&ct->tid))
+            jl_safepoint_resume_thread(tid);
+    };
+}
+
 void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
 static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 {
@@ -442,9 +487,55 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
     // safepoint until GC exit, in case GC was running concurrently while in
     // prior unsafe-region (before we let it release the stack memory)
     (void)jl_gc_unsafe_enter(ptls);
-    jl_atomic_store_relaxed(&ptls->sleep_check_state, 2); // dead, interpreted as sleeping and unwakeable
-    jl_fence();
-    jl_wakeup_thread(0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    scheduler_delete_thread(ptls);
+    // need to clear pgcstack and eh, but we can clear everything now too
+    jl_task_t *ct = jl_atomic_load_relaxed(&ptls->current_task);
+    jl_task_frame_noreturn(ct);
+    if (jl_set_task_tid(ptls->root_task, ptls->tid)) {
+        // the system will probably free this stack memory soon
+        // so prevent any other thread from accessing it later
+        if (ct != ptls->root_task)
+            jl_task_frame_noreturn(ptls->root_task);
+    }
+    else {
+        // Uh oh. The user cleared the sticky bit so it started running
+        // elsewhere, then called pthread_exit on this thread from another
+        // Task, which will free the stack memory of that root task soon. This
+        // is not recoverable. Though we could just hang here, a fatal message
+        // is likely better.
+        jl_safe_printf("fatal: thread exited from wrong Task.\n");
+        abort();
+    }
+    ptls->previous_exception = NULL;
+    // allow the page root_task is on to be freed
+    ptls->root_task = NULL;
+    jl_free_thread_gc_state(ptls);
+    // park in safe-region from here on (this may run GC again)
+    (void)jl_gc_safe_enter(ptls);
+    // try to free some state we do not need anymore
+#ifndef _OS_WINDOWS_
+    void *signal_stack = ptls->signal_stack;
+    size_t signal_stack_size = ptls->signal_stack_size;
+    if (signal_stack != NULL) {
+        stack_t ss;
+        if (sigaltstack(NULL, &ss))
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        if (ss.ss_sp == signal_stack) {
+            ss.ss_flags = SS_DISABLE;
+            if (sigaltstack(&ss, NULL) != 0) {
+                jl_errorf("warning: sigaltstack: %s (will leak this memory)", strerror(errno));
+                signal_stack = NULL;
+            }
+        }
+        if (signal_stack != NULL) {
+            if (signal_stack_size)
+                _jl_free_stack(ptls ,signal_stack, signal_stack_size);
+            else
+                free(signal_stack);
+        }
+        ptls->signal_stack = NULL;
+    }
+#endif
     // Acquire the profile write lock, to ensure we are not racing with the `kill`
     // call in the profile code which will also try to look at this thread.
     // We have no control over when the user calls pthread_join, so we must do
@@ -459,21 +550,7 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 #else
     pthread_mutex_lock(&in_signal_lock);
 #endif
-    // need to clear pgcstack and eh, but we can clear everything now too
-    jl_task_frame_noreturn(jl_atomic_load_relaxed(&ptls->current_task));
-    if (jl_set_task_tid(ptls->root_task, ptls->tid)) {
-        // the system will probably free this stack memory soon
-        // so prevent any other thread from accessing it later
-        jl_task_frame_noreturn(ptls->root_task);
-    }
-    else {
-        // Uh oh. The user cleared the sticky bit so it started running
-        // elsewhere, then called pthread_exit on this thread. This is not
-        // recoverable. Though we could just hang here, a fatal message is better.
-        jl_safe_printf("fatal: thread exited from wrong Task.\n");
-        abort();
-    }
-    jl_atomic_store_relaxed(&ptls->current_task, NULL); // dead
+    jl_atomic_store_relaxed(&ptls->current_task, NULL); // indicate dead
     // finally, release all of the locks we had grabbed
 #ifdef _OS_WINDOWS_
     jl_unlock_profile_wr();
@@ -484,8 +561,8 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 #else
     pthread_mutex_unlock(&in_signal_lock);
 #endif
-    // then park in safe-region
-    (void)jl_gc_safe_enter(ptls);
+    free(ptls->bt_data);
+    small_arraylist_free(&ptls->locks);
 }
 
 //// debugging hack: if we are exiting too fast for error message printing on threads,
@@ -493,7 +570,6 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 //// the other threads time to fail and emit their failure message
 //__attribute__((destructor)) static void _waitthreaddeath(void) { sleep(1); }
 
-JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
 JL_DLLEXPORT ssize_t jl_tls_offset = -1;
@@ -586,6 +662,8 @@ static void jl_check_tls(void)
     asm("mrs %0, tpidr_el0" : "=r"(tp));
 #elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
     asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(tp));
+#elif defined(_CPU_RISCV64_)
+    asm("mv %0, tp" : "=r"(tp));
 #else
 #  error "Cannot emit thread pointer for this architecture."
 #endif
@@ -655,6 +733,7 @@ void jl_init_threading(void)
         }
     }
 
+    int cpu = jl_cpu_threads();
     jl_n_markthreads = jl_options.nmarkthreads - 1;
     jl_n_sweepthreads = jl_options.nsweepthreads;
     if (jl_n_markthreads == -1) { // --gcthreads not specified
@@ -675,30 +754,41 @@ void jl_init_threading(void)
         }
         else {
             // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
-            // set the number of mark threads to half of compute threads
+            // set the number of mark threads to the number of compute threads
             // and number of sweep threads to 0
-            if (nthreads <= 1) {
-                jl_n_markthreads = 0;
-            }
-            else {
-                jl_n_markthreads = (nthreads / 2) - 1;
+            jl_n_markthreads = nthreads - 1; // -1 for the master (mutator) thread which may also do marking
+            // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
+            // cap the number of threads that may run the mark phase to
+            // the number of CPU cores
+            if (jl_n_markthreads + 1 >= cpu) {
+                jl_n_markthreads = cpu - 1;
             }
         }
     }
+    // warn the user if they try to run with a number
+    // of GC threads which is larger than the number
+    // of physical cores
+    if (jl_n_markthreads + 1 > cpu) {
+        jl_safe_printf("WARNING: running Julia with %d GC threads on %d CPU cores\n", jl_n_markthreads + 1, cpu);
+    }
     int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads;
 
+    if (strstr(jl_gc_active_impl(), "MMTk")) {
+        ngcthreads = 0;
+    }
+
     jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads;
     jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int));
     jl_n_threads_per_pool[0] = nthreadsi;
     jl_n_threads_per_pool[1] = nthreads;
-
+    assert(jl_all_tls_states_size > 0);
     jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
     jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
     jl_n_gcthreads = ngcthreads;
-    gc_first_tid = nthreads;
+    gc_first_tid = nthreads + nthreadsi;
 }
 
-static uv_barrier_t thread_init_done;
+uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
@@ -737,33 +827,24 @@ void jl_start_threads(void)
     uv_barrier_init(&thread_init_done, nthreads);
 
     // GC/System threads need to be after the worker threads.
-    int nworker_threads = nthreads - ngcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
 
-    for (i = 1; i < nthreads; ++i) {
+    for (i = 1; i < nmutator_threads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        if (i < nworker_threads) {
-            uv_thread_create(&uvtid, jl_threadfun, t);
-            if (exclusive) {
-                mask[i] = 1;
-                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-                mask[i] = 0;
-            }
-        }
-        else if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
-            uv_thread_create(&uvtid, jl_gc_sweep_threadfun, t);
-        }
-        else {
-            uv_thread_create(&uvtid, jl_gc_mark_threadfun, t);
+        uv_thread_create(&uvtid, jl_threadfun, t);
+        if (exclusive) {
+            mask[i] = 1;
+            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+            mask[i] = 0;
         }
         uv_thread_detach(&uvtid);
     }
-
-    uv_barrier_wait(&thread_init_done);
 }
 
-_Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading
+_Atomic(unsigned) _threadedregion; // keep track of whether to prioritize IO or threading
+_Atomic(uint16_t) io_loop_tid; // mark which thread is assigned to run the uv_loop
 
 JL_DLLEXPORT int jl_in_threaded_region(void)
 {
@@ -784,7 +865,27 @@ JL_DLLEXPORT void jl_exit_threaded_region(void)
         JL_UV_UNLOCK();
         // make sure thread 0 is not using the sleep_lock
         // so that it may enter the libuv event loop instead
-        jl_wakeup_thread(0);
+        jl_fence();
+        jl_wakeup_thread(jl_atomic_load_relaxed(&io_loop_tid));
+    }
+}
+
+JL_DLLEXPORT void jl_set_io_loop_tid(int16_t tid)
+{
+    if (tid < 0 || tid >= jl_atomic_load_relaxed(&jl_n_threads)) {
+        // TODO: do we care if this thread has exited or not started yet,
+        // since ptls2 might not be defined yet and visible on all threads yet
+        return;
+    }
+    jl_atomic_store_relaxed(&io_loop_tid, tid);
+    jl_fence();
+    if (jl_atomic_load_relaxed(&_threadedregion) == 0) {
+        // make sure the previous io_loop_tid leaves the libuv event loop
+        JL_UV_LOCK();
+        JL_UV_UNLOCK();
+        // make sure thread io_loop_tid is not using the sleep_lock
+        // so that it may enter the libuv event loop instead
+        jl_wakeup_thread(tid);
     }
 }
 
@@ -818,15 +919,20 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
             jl_profile_lock_acquired(lock);
             return;
         }
-        if (safepoint) {
-            jl_gc_safepoint_(self->ptls);
-        }
         if (jl_running_under_rr(0)) {
             // when running under `rr`, use system mutexes rather than spin locking
+            int8_t gc_state;
+            if (safepoint)
+                gc_state = jl_gc_safe_enter(self->ptls);
             uv_mutex_lock(&tls_lock);
             if (jl_atomic_load_relaxed(&lock->owner))
                 uv_cond_wait(&cond, &tls_lock);
             uv_mutex_unlock(&tls_lock);
+            if (safepoint)
+                jl_gc_safe_leave(self->ptls, gc_state);
+        }
+        else if (safepoint) {
+            jl_gc_safepoint_(self->ptls);
         }
         jl_cpu_suspend();
         owner = jl_atomic_load_relaxed(&lock->owner);
@@ -923,6 +1029,52 @@ JL_DLLEXPORT int jl_alignment(size_t sz)
     return jl_gc_alignment(sz);
 }
 
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_getaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_getaffinity(&uvtid, mask, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
+// Return values:
+//     0  == success
+//     1  == invalid thread id provided
+//     2  == ptls2 was NULL
+//     <0 == uv_thread_getaffinity exit code
+JL_DLLEXPORT int jl_setaffinity(int16_t tid, char *mask, int cpumasksize) {
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
+        return 1;
+
+    // TODO: use correct lock. system_id is only legal if the thread is alive.
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL)
+        return 2;
+    uv_thread_t uvtid = ptls2->system_id;
+
+    int ret_uv = uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+    if (ret_uv != 0)
+        return ret_uv;
+
+    return 0; // success
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/threading.h b/src/threading.h
index 73d2cd73fb70d..cb26537699713 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,6 +12,8 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
+extern uv_barrier_t thread_init_done;
+
 extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
@@ -25,8 +27,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT;
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
-void jl_gc_mark_threadfun(void *arg);
-void jl_gc_sweep_threadfun(void *arg);
+void jl_parallel_gc_threadfun(void *arg);
+void jl_concurrent_gc_threadfun(void *arg);
 void jl_threadfun(void *arg);
 
 #ifdef __cplusplus
diff --git a/src/timing.c b/src/timing.c
index d933f082c816e..265e50ad3dd74 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,7 +6,7 @@
 #include "options.h"
 #include "stdio.h"
 
-#if defined(USE_TRACY) || defined(USE_ITTAPI)
+#if defined(USE_TRACY) || defined(USE_ITTAPI) || defined(USE_NVTX)
 #define DISABLE_FREQUENT_EVENTS
 #endif
 
@@ -49,6 +49,10 @@ static arraylist_t jl_timing_ittapi_events;
 static jl_mutex_t jl_timing_ittapi_events_lock;
 #endif //USE_ITTAPI
 
+#ifdef USE_NVTX
+static nvtxDomainHandle_t jl_timing_nvtx_domain;
+#endif
+
 #ifdef USE_TIMING_COUNTS
 static int cmp_counts_events(const void *a, const void *b) {
     jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a;
@@ -139,6 +143,13 @@ void jl_init_timing(void)
     qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
           sizeof(const char *), indirect_strcmp);
 
+#ifdef USE_NVTX
+    jl_timing_nvtx_domain = nvtxDomainCreateA("julia");
+    for (int i = 0; i < JL_TIMING_SUBSYSTEM_LAST; i++) {
+        nvtxDomainNameCategoryA(jl_timing_nvtx_domain, i + 1, jl_timing_subsystems[i]);
+    }
+#endif
+
     int i __attribute__((unused)) = 0;
 #ifdef USE_ITTAPI
     i = 0;
@@ -317,6 +328,25 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
     event->ittapi_event = _jl_timing_ittapi_event_create(name);
 #endif // USE_ITTAPI
 
+#ifdef USE_NVTX
+    nvtxEventAttributes_t nvtx_attrs = {0};
+    nvtx_attrs.version = NVTX_VERSION;
+    nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+
+    nvtxStringHandle_t nvtx_message = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, name);
+    nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+    nvtx_attrs.message.registered = nvtx_message;
+
+    // 0 is the default (unnamed) category
+    nvtx_attrs.category = maybe_subsystem == JL_TIMING_SUBSYSTEM_LAST ? 0 : maybe_subsystem+1;
+
+    // simple Knuth hash to get nice colors
+    nvtx_attrs.colorType = NVTX_COLOR_ARGB;
+    nvtx_attrs.color = (nvtx_attrs.category * 2654435769) >> 8;
+
+    event->nvtx_attrs = nvtx_attrs;
+#endif // USE_NVTX
+
 #ifdef USE_TRACY
     event->tracy_srcloc.name = name;
     event->tracy_srcloc.function = function;
@@ -342,10 +372,12 @@ JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_
 JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) {
     assert(!block->is_running);
     if (!_jl_timing_enabled(block->event->subsystem)) return;
+    if (jl_get_pgcstack() == NULL) return; // not setup on this thread
 
     uint64_t t = cycleclock(); (void)t;
     _COUNTS_START(&block->counts_ctx, t);
     _ITTAPI_START(block);
+    _NVTX_START(block);
     _TRACY_START(block);
 
     jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack;
@@ -361,6 +393,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
     if (block->is_running) {
         uint64_t t = cycleclock(); (void)t;
         _ITTAPI_STOP(block);
+        _NVTX_STOP(block);
         _TRACY_STOP(block->tracy_ctx);
         _COUNTS_STOP(block, t);
 
diff --git a/src/timing.h b/src/timing.h
index 30f6ad0ab3b5c..61118cc3b41ab 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -66,7 +66,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
 #define HAVE_TIMING_SUPPORT
 #endif
 
-#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_NVTX ) || defined( USE_TIMING_COUNTS )
 #define ENABLE_TIMINGS
 #endif
 
@@ -115,6 +115,12 @@ typedef struct ___tracy_source_location_data TracySrcLocData;
 #include <ittapi/ittnotify.h>
 #endif
 
+#ifdef USE_NVTX
+#pragma GCC visibility push(default)
+#include <nvtx3/nvToolsExt.h>
+#pragma GCC visibility pop
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -160,8 +166,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
         X(METHOD_LOOKUP_SLOW)    \
         X(METHOD_LOOKUP_FAST)    \
         X(CODEINST_COMPILE)      \
-        X(LLVM_OPT)              \
-        X(LLVM_ORC)              \
+        X(LLVM_JIT)              \
         X(METHOD_MATCH)          \
         X(TYPE_CACHE_LOOKUP)     \
         X(TYPE_CACHE_INSERT)     \
@@ -175,6 +180,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
         X(LOAD_MODULE)           \
         X(LOAD_IMAGE)            \
         X(VERIFY_IMAGE)          \
+        X(VERIFY_IR)             \
         X(SAVE_MODULE)           \
         X(INIT_MODULE)           \
         X(LOCK_SPIN)             \
@@ -276,6 +282,20 @@ typedef struct _jl_timing_counts_t {
 #define _ITTAPI_STOP(block)
 #endif
 
+
+#ifdef USE_NVTX
+#define _NVTX_EVENT_MEMBER              nvtxEventAttributes_t nvtx_attrs;
+#define _NVTX_BLOCK_MEMBER              nvtxRangeId_t nvtx_rangeid;
+#define _NVTX_START(block)              (block)->nvtx_rangeid = nvtxDomainRangeStartEx(jl_timing_nvtx_domain, &(block)->event->nvtx_attrs)
+#define _NVTX_STOP(block)               nvtxDomainRangeEnd(jl_timing_nvtx_domain, (block)->nvtx_rangeid)
+#else
+#define _NVTX_EVENT_MEMBER
+#define _NVTX_BLOCK_MEMBER
+#define _NVTX_START(block)
+#define _NVTX_STOP(block)
+#endif
+
+
 /**
  * Top-level jl_timing implementation
  **/
@@ -292,6 +312,7 @@ extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST];
 struct _jl_timing_event_t { // typedef in julia.h
     _TRACY_EVENT_MEMBER
     _ITTAPI_EVENT_MEMBER
+    _NVTX_EVENT_MEMBER
     _COUNTS_EVENT_MEMBER
 
     int subsystem;
@@ -310,6 +331,7 @@ struct _jl_timing_block_t { // typedef in julia.h
 
     _TRACY_BLOCK_MEMBER
     _ITTAPI_BLOCK_MEMBER
+    _NVTX_BLOCK_MEMBER
     _COUNTS_BLOCK_MEMBER
 
     uint8_t is_running;
@@ -362,6 +384,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 #define _ITTAPI_COUNTER_MEMBER
 #endif
 
+#ifdef USE_NVTX
+#define _NVTX_COUNTER_MEMBER void * __nvtx_null;
+#else
+#define _NVTX_COUNTER_MEMBER
+#endif
+
 #ifdef USE_TRACY
 # define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
 # else
@@ -376,6 +404,7 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 
 typedef struct {
     _ITTAPI_COUNTER_MEMBER
+    _NVTX_COUNTER_MEMBER
     _TRACY_COUNTER_MEMBER
     _COUNTS_MEMBER
 } jl_timing_counter_t;
diff --git a/src/toplevel.c b/src/toplevel.c
index 51ff93488426f..fb217ec7cb52e 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -82,7 +82,7 @@ void jl_module_run_initializer(jl_module_t *m)
         }
         else {
             jl_rethrow_other(jl_new_struct(jl_initerror_type, m->name,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
         }
     }
 }
@@ -121,7 +121,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
 {
     jl_task_t *ct = jl_current_task;
     assert(ex->head == jl_module_sym);
-    if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
+    if (jl_array_nrows(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
         jl_error("syntax: malformed module expression");
     }
 
@@ -155,25 +155,31 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
     else {
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name);
-        jl_declare_constant(b, parent_module, name);
-        jl_value_t *old = NULL;
-        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
-            if (!jl_is_module(old)) {
-                jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+        jl_binding_t *b = jl_get_module_binding(parent_module, name, 1);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, ct->world_age);
+        jl_ptr_kind_union_t pku = encode_restriction(NULL, BINDING_KIND_UNDEF_CONST);
+        jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)newm, BINDING_KIND_CONST);
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku)) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_CONST) {
+                jl_declare_constant_val(b, parent_module, name, (jl_value_t*)newm);
+            } else {
+                // As a special exception allow binding replacement of modules
+                if (!jl_is_module(decode_restriction_value(pku))) {
+                    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+                }
+                if (jl_generating_output())
+                    jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
+                jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
+                pku = jl_atomic_exchange(&bpart->restriction, new_pku);
+            }
+            jl_gc_wb(bpart, newm);
+            if (decode_restriction_value(pku) != NULL && jl_is_module(decode_restriction_value(pku))) {
+                // create a hidden gc root for the old module
+                JL_LOCK(&jl_modules_mutex);
+                uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, decode_restriction_value(pku));
+                *refcnt += 1;
+                JL_UNLOCK(&jl_modules_mutex);
             }
-            if (jl_generating_output())
-                jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
-            jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
-            old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
-        }
-        jl_gc_wb_binding(b, newm);
-        if (old != NULL) {
-            // create a hidden gc root for the old module
-            JL_LOCK(&jl_modules_mutex);
-            uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)old);
-            *refcnt += 1;
-            JL_UNLOCK(&jl_modules_mutex);
         }
     }
 
@@ -188,7 +194,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
     int lineno = 0;
     const char *filename = "none";
-    if (jl_array_len(exprs) > 0) {
+    if (jl_array_nrows(exprs) > 0) {
         jl_value_t *lineex = jl_array_ptr_ref(exprs, 0);
         if (jl_is_linenode(lineex)) {
             lineno = jl_linenode_line(lineex);
@@ -200,44 +206,32 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     if (std_imports) {
         if (jl_base_module != NULL) {
             jl_add_standard_imports(newm);
+            jl_datatype_t *include_into = (jl_datatype_t *)jl_get_global(jl_base_module, jl_symbol("IncludeInto"));
+            if (include_into) {
+                form = jl_new_struct(include_into, newm);
+                jl_set_const(newm, jl_symbol("include"), form);
+            }
+        }
+        jl_datatype_t *eval_into = (jl_datatype_t *)jl_get_global(jl_core_module, jl_symbol("EvalInto"));
+        if (eval_into) {
+            form = jl_new_struct(eval_into, newm);
+            jl_set_const(newm, jl_symbol("eval"), form);
         }
-        // add `eval` function
-        form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno);
-        jl_toplevel_eval_flex(newm, form, 0, 1);
-        form = NULL;
     }
 
-    for (int i = 0; i < jl_array_len(exprs); i++) {
+    newm->file = jl_symbol(filename);
+    jl_gc_wb_knownold(newm, newm->file);
+    newm->line = lineno;
+
+    for (int i = 0; i < jl_array_nrows(exprs); i++) {
         // process toplevel form
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
+        form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, filename, lineno);
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        (void)jl_toplevel_eval_flex(newm, form, 1, 1);
+        (void)jl_toplevel_eval_flex(newm, form, 1, 1, &filename, &lineno);
     }
-    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     ct->world_age = last_age;
 
-#if 0
-    // some optional post-processing steps
-    size_t i;
-    jl_svec_t *table = jl_atomic_load_relaxed(&newm->bindings);
-    for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svec_ref(table, i);
-        if ((void*)b != jl_nothing) {
-            // remove non-exported macros
-            if (jl_symbol_name(b->name)[0]=='@' &&
-                !b->exportp && b->owner == b)
-                b->value = NULL;
-            // error for unassigned exports
-            /*
-            if (b->exportp && b->owner==b && b->value==NULL)
-                jl_errorf("identifier %s exported from %s is not initialized",
-                          jl_symbol_name(b->name), jl_symbol_name(newm->name));
-            */
-        }
-    }
-#endif
-
     JL_LOCK(&jl_modules_mutex);
     uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)newm);
     assert(*refcnt > (uintptr_t)HT_NOTFOUND);
@@ -254,7 +248,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     form = NULL;
     if (!jl_generating_output()) {
         if (!ptrhash_has(&jl_current_modules, (void*)newm->parent)) {
-            size_t i, l = jl_array_len(jl_module_init_order);
+            size_t i, l = jl_array_nrows(jl_module_init_order);
             size_t ns = 0;
             form = (jl_value_t*)jl_alloc_vec_any(0);
             for (i = 0; i < l; i++) {
@@ -273,7 +267,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     JL_UNLOCK(&jl_modules_mutex);
 
     if (form) {
-        size_t i, l = jl_array_len(form);
+        size_t i, l = jl_array_nrows(form);
         for (i = 0; i < l; i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(form, i);
             JL_GC_PROMISE_ROOTED(m);
@@ -287,13 +281,13 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     return (jl_value_t*)newm;
 }
 
-static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast)
+static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast, const char **toplevel_filename, int *toplevel_lineno)
 {
     jl_task_t *ct = jl_current_task;
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 3);
-    args[1] = jl_toplevel_eval_flex(m, x, fast, 0);
-    args[2] = jl_toplevel_eval_flex(m, f, fast, 0);
+    args[1] = jl_toplevel_eval_flex(m, x, fast, 0, toplevel_filename, toplevel_lineno);
+    args[2] = jl_toplevel_eval_flex(m, f, fast, 0, toplevel_filename, toplevel_lineno);
     if (jl_is_module(args[1])) {
         JL_TYPECHK(getglobal, symbol, args[2]);
         args[0] = jl_eval_global_var((jl_module_t*)args[1], (jl_sym_t*)args[2]);
@@ -309,30 +303,72 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     return args[0];
 }
 
-void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
-    // create uninitialized mutable binding for "global x" decl sometimes or probably
-    size_t i, l = jl_array_len(ex->args);
-    for (i = 0; i < l; i++) {
-        jl_value_t *arg = jl_exprarg(ex, i);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    jl_ptr_kind_union_t new_pku = encode_restriction(ty, BINDING_KIND_GLOBAL);
+    while (1) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL) {
+            if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+                if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                    break;
+                continue;
+            } else {
+                jl_errorf("cannot set type for imported global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(sym));
+            }
         }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_errorf("cannot set type for imported constant %s.%s.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
         }
-        if (!jl_binding_resolved_p(gm, gs)) {
-            jl_binding_t *b = jl_get_binding_wr(gm, gs);
-            if (set_type) {
-                jl_value_t *old_ty = NULL;
-                // maybe set the type too, perhaps
-                jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-            }
+        jl_value_t *old_ty = decode_restriction_value(pku);
+        JL_GC_PROMISE_ROOTED(old_ty);
+        if (!jl_types_equal(ty, old_ty)) {
+            jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
         }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+            break;
+    }
+    jl_gc_wb(bpart, ty);
+}
+
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var);
+void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type) {
+    // create uninitialized mutable binding for "global x" decl sometimes or probably
+    jl_module_t *gm;
+    jl_sym_t *gs;
+    assert(!jl_is_expr(arg)); // Should have been resolved before this
+    if (jl_is_globalref(arg)) {
+        gm = jl_globalref_mod(arg);
+        gs = jl_globalref_name(arg);
+    }
+    else {
+        assert(jl_is_symbol(arg));
+        gm = m;
+        gs = (jl_sym_t*)arg;
+    }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    while (decode_restriction_kind(pku) == BINDING_KIND_GUARD || decode_restriction_kind(pku) == BINDING_KIND_FAILED) {
+        check_safe_newbinding(gm, gs);
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(NULL, BINDING_KIND_DECLARED)))
+            break;
+    }
+    if (set_type) {
+        jl_binding_set_type(b, gm, gs, set_type);
+    }
+}
+
+void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type)
+{
+    size_t i, l = jl_array_nrows(ex->args);
+    for (i = 0; i < l; i++) {
+        jl_value_t *arg = jl_exprarg(ex, i);
+        jl_declare_global(m, arg, NULL);
     }
 }
 
@@ -352,7 +388,7 @@ JL_DLLEXPORT jl_module_t *jl_base_relative_to(jl_module_t *m)
     return jl_top_module;
 }
 
-static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *has_opaque)
+static void expr_attributes(jl_value_t *v, jl_array_t *body, int *has_ccall, int *has_defs, int *has_opaque)
 {
     if (!jl_is_expr(v))
         return;
@@ -390,14 +426,15 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
     else if (head == jl_call_sym && jl_expr_nargs(e) > 0) {
         jl_value_t *called = NULL;
         jl_value_t *f = jl_exprarg(e, 0);
+        if (jl_is_ssavalue(f)) {
+            f = jl_array_ptr_ref(body, ((jl_ssavalue_t*)f)->id - 1);
+        }
         if (jl_is_globalref(f)) {
             jl_module_t *mod = jl_globalref_mod(f);
             jl_sym_t *name = jl_globalref_name(f);
             if (jl_binding_resolved_p(mod, name)) {
                 jl_binding_t *b = jl_get_binding(mod, name);
-                if (b && b->constp) {
-                    called = jl_atomic_load_relaxed(&b->value);
-                }
+                called = jl_get_binding_value_if_const(b);
             }
         }
         else if (jl_is_quotenode(f)) {
@@ -414,10 +451,10 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
         return;
     }
     int i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_exprarg(e, i);
         if (jl_is_expr(a))
-            expr_attributes(a, has_ccall, has_defs, has_opaque);
+            expr_attributes(a, body, has_ccall, has_defs, has_opaque);
     }
 }
 
@@ -429,9 +466,9 @@ int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile)
     int has_ccall = 0, has_defs = 0, has_opaque = 0;
     if (include_force_compile && jl_has_meta(body, jl_force_compile_sym))
         return 1;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
-        expr_attributes(stmt, &has_ccall, &has_defs, &has_opaque);
+        expr_attributes(stmt, body, &has_ccall, &has_defs, &has_opaque);
         if (has_ccall)
             return 1;
     }
@@ -442,7 +479,7 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int
 {
     size_t i;
     *has_loops = 0;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
         if (!*has_loops) {
             if (jl_is_gotonode(stmt)) {
@@ -454,26 +491,29 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int
                     *has_loops = 1;
             }
         }
-        expr_attributes(stmt, has_ccall, has_defs, has_opaque);
+        expr_attributes(stmt, body, has_ccall, has_defs, has_opaque);
     }
     *forced_compile = jl_has_meta(body, jl_force_compile_sym);
 }
 
+extern size_t jl_require_world;
 static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED
 {
     JL_TIMING(LOAD_IMAGE, LOAD_Require);
     jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "%s", jl_symbol_name(var));
 
-    static jl_value_t *require_func = NULL;
-    int build_mode = jl_generating_output();
+    int build_mode = jl_options.incremental && jl_generating_output();
     jl_module_t *m = NULL;
     jl_task_t *ct = jl_current_task;
+    static jl_value_t *require_func = NULL;
     if (require_func == NULL && jl_base_module != NULL) {
         require_func = jl_get_global(jl_base_module, jl_symbol("require"));
     }
     if (require_func != NULL) {
         size_t last_age = ct->world_age;
-        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_atomic_load_acquire(&jl_world_counter));
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (build_mode && jl_require_world < ct->world_age)
+            ct->world_age = jl_require_world;
         jl_value_t *reqargs[3];
         reqargs[0] = require_func;
         reqargs[1] = (jl_value_t*)mod;
@@ -493,7 +533,7 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO
 static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PROPAGATES_ROOT,
                                      jl_array_t *args, jl_sym_t **name, const char *keyword) JL_GLOBALLY_ROOTED
 {
-    if (jl_array_len(args) == 0)
+    if (jl_array_nrows(args) == 0)
         jl_errorf("malformed \"%s\" statement", keyword);
     jl_sym_t *var = (jl_sym_t*)jl_array_ptr_ref(args, 0);
     size_t i = 1;
@@ -517,14 +557,14 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         else {
             m = call_require(where, var);
         }
-        if (i == jl_array_len(args))
+        if (i == jl_array_nrows(args))
             return m;
     }
     else {
         // `.A.B.C`: strip off leading dots by following parent links
         m = where;
         while (1) {
-            if (i >= jl_array_len(args))
+            if (i >= jl_array_nrows(args))
                 jl_error("invalid module path");
             var = (jl_sym_t*)jl_array_ptr_ref(args, i);
             if (var != jl_dot_sym)
@@ -541,7 +581,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
             jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
         if (var == jl_dot_sym)
             jl_errorf("invalid %s path: \".\" in identifier path", keyword);
-        if (i == jl_array_len(args)-1)
+        if (i == jl_array_nrows(args)-1)
             break;
         m = (jl_module_t*)jl_eval_global_var(m, var);
         JL_GC_PROMISE_ROOTED(m);
@@ -560,8 +600,10 @@ int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
          ((jl_expr_t*)e)->head == jl_import_sym ||
          ((jl_expr_t*)e)->head == jl_using_sym ||
          ((jl_expr_t*)e)->head == jl_export_sym ||
+         ((jl_expr_t*)e)->head == jl_public_sym ||
          ((jl_expr_t*)e)->head == jl_thunk_sym ||
          ((jl_expr_t*)e)->head == jl_global_sym ||
+         ((jl_expr_t*)e)->head == jl_globaldecl_sym ||
          ((jl_expr_t*)e)->head == jl_const_sym ||
          ((jl_expr_t*)e)->head == jl_toplevel_sym ||
          ((jl_expr_t*)e)->head == jl_error_sym ||
@@ -575,12 +617,13 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_sym_t *head = ex->head;
     if (head == jl_module_sym || head == jl_import_sym || head == jl_using_sym ||
-        head == jl_export_sym || head == jl_thunk_sym || head == jl_toplevel_sym ||
-        head == jl_error_sym || head == jl_incomplete_sym || head == jl_method_sym) {
+        head == jl_export_sym || head == jl_public_sym || head == jl_thunk_sym ||
+        head == jl_toplevel_sym || head == jl_error_sym || head == jl_incomplete_sym ||
+        head == jl_method_sym) {
         return 0;
     }
     if (head == jl_global_sym || head == jl_const_sym) {
-        size_t i, l = jl_array_len(ex->args);
+        size_t i, l = jl_array_nrows(ex->args);
         for (i = 0; i < l; i++) {
             jl_value_t *a = jl_exprarg(ex, i);
             if (!jl_is_symbol(a) && !jl_is_globalref(a))
@@ -591,13 +634,29 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
     return 1;
 }
 
-static jl_method_instance_t *method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src)
+{
+    // Do not compress this, we expect it to be shortlived.
+    jl_code_instance_t *ci = jl_new_codeinst(mi, (jl_value_t*)jl_uninferred_sym,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing,
+        (jl_value_t*)src, 0, src->min_world, src->max_world,
+        0, NULL, NULL, NULL);
+    return ci;
+}
+
+JL_DLLEXPORT jl_method_instance_t *jl_method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
 {
-    jl_method_instance_t *li = jl_new_method_instance_uninit();
-    jl_atomic_store_relaxed(&li->uninferred, (jl_value_t*)src);
-    li->specTypes = (jl_value_t*)jl_emptytuple_type;
-    li->def.module = module;
-    return li;
+    jl_method_instance_t *mi = jl_new_method_instance_uninit();
+    mi->specTypes = (jl_value_t*)jl_emptytuple_type;
+    mi->def.module = module;
+    JL_GC_PUSH1(&mi);
+
+    jl_code_instance_t *ci = jl_new_codeinst_for_uninferred(mi, src);
+    jl_atomic_store_relaxed(&mi->cache, ci);
+    jl_gc_wb(mi, ci);
+
+    JL_GC_POP();
+    return mi;
 }
 
 static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym_t *asname)
@@ -605,21 +664,21 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym
     assert(m);
     jl_sym_t *name = asname ? asname : import->name;
     // TODO: this is a bit race-y with what error message we might print
-    jl_binding_t *b = jl_get_module_binding(m, name, 0);
-    jl_binding_t *b2;
-    if (b != NULL && (b2 = jl_atomic_load_relaxed(&b->owner)) != NULL) {
-        if (b2->constp && jl_atomic_load_relaxed(&b2->value) == (jl_value_t*)import)
-            return;
-        if (b2 != b)
-            jl_errorf("importing %s into %s conflicts with an existing global",
-                      jl_symbol_name(name), jl_symbol_name(m->name));
-    }
-    else {
-        b = jl_get_binding_wr(m, name);
+    jl_binding_t *b = jl_get_module_binding(m, name, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GUARD && decode_restriction_kind(pku) != BINDING_KIND_FAILED) {
+        // Unlike regular constant declaration, we allow this as long as we eventually end up at a constant.
+        pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+        if (decode_restriction_kind(pku) == BINDING_KIND_CONST || decode_restriction_kind(pku) == BINDING_KIND_CONST_IMPORT) {
+            // Already declared (e.g. on another thread) or imported.
+            if (decode_restriction_value(pku) == (jl_value_t*)import)
+                return;
+        }
+        jl_errorf("importing %s into %s conflicts with an existing global",
+                    jl_symbol_name(name), jl_symbol_name(m->name));
     }
-    jl_declare_constant(b, m, name);
-    jl_checked_assignment(b, m, name, (jl_value_t*)import);
-    b->imported = 1;
+    jl_declare_constant_val2(b, m, name, (jl_value_t*)import, BINDING_KIND_CONST_IMPORT);
 }
 
 // in `import A.B: x, y, ...`, evaluate the `A.B` part if it exists
@@ -635,7 +694,7 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
                     jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
                     if (name != NULL) {
                         from = (jl_module_t*)jl_eval_global_var(from, name);
-                        if (!jl_is_module(from))
+                        if (!from || !jl_is_module(from))
                             jl_errorf("invalid %s path: \"%s\" does not name a module", keyword, jl_symbol_name(name));
                     }
                     return from;
@@ -659,46 +718,115 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword
 // Eval `throw(ErrorException(msg)))` in module `m`.
 // Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error
 // location in julia code gets into the backtrace.
-static void jl_eval_throw(jl_module_t *m, jl_value_t *exc)
+static void jl_eval_throw(jl_module_t *m, jl_value_t *exc, const char *filename, int lineno)
 {
     jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
     jl_exprargset(throw_ex, 0, jl_builtin_throw);
     jl_exprargset(throw_ex, 1, exc);
-    jl_toplevel_eval_flex(m, throw_ex, 0, 0);
+    jl_toplevel_eval_flex(m, throw_ex, 0, 0, &filename, &lineno);
     JL_GC_POP();
 }
 
 // Format error message and call jl_eval
-static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
+static void jl_eval_errorf(jl_module_t *m, const char *filename, int lineno, const char* fmt, ...)
 {
     va_list args;
     va_start(args, fmt);
     jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args);
     va_end(args);
     JL_GC_PUSH1(&exc);
-    jl_eval_throw(m, exc);
+    jl_eval_throw(m, exc, filename, lineno);
+    JL_GC_POP();
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val, enum jl_partition_kind constant_kind)
+{
+    JL_GC_PUSH1(&val);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    int did_warn = 0;
+    while (1) {
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            if (!val) {
+                break;
+            }
+            jl_value_t *old = decode_restriction_value(pku);
+            JL_GC_PROMISE_ROOTED(old);
+            if (jl_egal(val, old))
+                break;
+            if (!did_warn) {
+                if (jl_typeof(val) != jl_typeof(old) || jl_is_type(val) || jl_is_module(val))
+                    jl_errorf("invalid redefinition of constant %s.%s",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                else
+                    jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                did_warn = 1;
+            }
+        } else if (!jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (jl_bkind_is_some_import(decode_restriction_kind(pku))) {
+                jl_errorf("cannot declare %s.%s constant; it was already declared as an import",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            } else {
+                jl_errorf("cannot declare %s.%s constant; it was already declared global",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            }
+        }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(val, constant_kind))) {
+            jl_gc_wb(bpart, val);
+            break;
+        }
+    }
     JL_GC_POP();
+    return bpart;
 }
 
-jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded)
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val)
+{
+    return jl_declare_constant_val2(b, mod, var, val, val ? BINDING_KIND_CONST : BINDING_KIND_UNDEF_CONST);
+}
+
+JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val)
+{
+    jl_module_t *gm;
+    jl_sym_t *gs;
+    if (jl_is_globalref(arg)) {
+        gm = jl_globalref_mod(arg);
+        gs = jl_globalref_name(arg);
+    }
+    else {
+        assert(jl_is_symbol(arg));
+        gm = m;
+        gs = (jl_sym_t*)arg;
+    }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_declare_constant_val(b, gm, gs, val);
+}
+
+JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_expr(e)) {
         if (jl_is_linenode(e)) {
-            jl_lineno = jl_linenode_line(e);
+            *toplevel_lineno = jl_linenode_line(e);
             jl_value_t *file = jl_linenode_file(e);
             if (file != jl_nothing) {
                 assert(jl_is_symbol(file));
-                jl_filename = jl_symbol_name((jl_sym_t*)file);
+                *toplevel_filename = jl_symbol_name((jl_sym_t*)file);
             }
+            // Not thread safe. For debugging and last resort error messages (jl_critical_error) only.
+            jl_filename = *toplevel_filename;
+            jl_lineno = *toplevel_lineno;
             return jl_nothing;
         }
         if (jl_is_symbol(e)) {
             char *n = jl_symbol_name((jl_sym_t*)e), *n0 = n;
             while (*n == '_') ++n;
             if (*n == 0 && n > n0)
-                jl_eval_errorf(m, "all-underscore identifier used as rvalue");
+                jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "all-underscore identifiers are write-only and their values cannot be used in expressions");
         }
         return jl_interpret_toplevel_expr_in(m, e, NULL, NULL);
     }
@@ -707,12 +835,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     if (ex->head == jl_dot_sym && jl_expr_nargs(ex) != 1) {
         if (jl_expr_nargs(ex) != 2)
-            jl_eval_errorf(m, "syntax: malformed \".\" expression");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno, "syntax: malformed \".\" expression");
         jl_value_t *lhs = jl_exprarg(ex, 0);
         jl_value_t *rhs = jl_exprarg(ex, 1);
         // only handle `a.b` syntax here, so qualified names can be eval'd in pure contexts
         if (jl_is_quotenode(rhs) && jl_is_symbol(jl_fieldref(rhs, 0))) {
-            return jl_eval_dot_expr(m, lhs, rhs, fast);
+            return jl_eval_dot_expr(m, lhs, rhs, fast, toplevel_filename, toplevel_lineno);
         }
     }
 
@@ -722,12 +850,13 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     jl_method_instance_t *mfunc = NULL;
     jl_code_info_t *thk = NULL;
-    JL_GC_PUSH3(&mfunc, &thk, &ex);
+    jl_value_t *root = NULL;
+    JL_GC_PUSH4(&mfunc, &thk, &ex, &root);
 
     size_t last_age = ct->world_age;
     if (!expanded && jl_needs_lowering(e)) {
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
+        ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, *toplevel_filename, *toplevel_lineno);
         ct->world_age = last_age;
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
@@ -750,18 +879,19 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "using");
-                jl_module_t *u = import;
-                if (name != NULL)
-                    u = (jl_module_t*)jl_eval_global_var(import, name);
                 if (from) {
-                    // `using A: B` syntax
+                    // `using A: B` and `using A: B.c` syntax
                     jl_module_use(m, import, name);
                 }
                 else {
+                    jl_module_t *u = import;
+                    if (name != NULL)
+                        u = (jl_module_t*)jl_eval_global_var(import, name);
                     if (!jl_is_module(u))
-                        jl_eval_errorf(m, "invalid using path: \"%s\" does not name a module",
+                        jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                            "invalid using path: \"%s\" does not name a module",
                                        jl_symbol_name(name));
-                    // `using A.B` syntax
+                    // `using A` and `using A.B` syntax
                     jl_module_using(m, u);
                     if (m == jl_main_module && name == NULL) {
                         // TODO: for now, `using A` in Main also creates an explicit binding for `A`
@@ -785,7 +915,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                     continue;
                 }
             }
-            jl_eval_errorf(m, "syntax: malformed \"using\" statement");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: malformed \"using\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
@@ -832,59 +963,63 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                     continue;
                 }
             }
-            jl_eval_errorf(m, "syntax: malformed \"import\" statement");
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: malformed \"import\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == jl_export_sym) {
-        for (size_t i = 0; i < jl_array_len(ex->args); i++) {
+    else if (head == jl_export_sym || head == jl_public_sym) {
+        int exp = (head == jl_export_sym);
+        for (size_t i = 0; i < jl_array_nrows(ex->args); i++) {
             jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
             if (!jl_is_symbol(name))
-                jl_eval_errorf(m, "syntax: malformed \"export\" statement");
-            jl_module_export(m, name);
+                jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                     exp ? "syntax: malformed \"export\" statement" :
+                           "syntax: malformed \"public\" statement");
+            jl_module_public(m, name, exp);
         }
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_global_sym) {
-        jl_eval_global_expr(m, ex, 0);
+        size_t i, l = jl_array_nrows(ex->args);
+        for (i = 0; i < l; i++) {
+            jl_value_t *arg = jl_exprarg(ex, i);
+            jl_declare_global(m, arg, NULL);
+        }
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_const_sym) {
-        jl_sym_t *arg = (jl_sym_t*)jl_exprarg(ex, 0);
-        jl_module_t *gm;
-        jl_sym_t *gs;
-        if (jl_is_globalref(arg)) {
-            gm = jl_globalref_mod(arg);
-            gs = jl_globalref_name(arg);
-        }
-        else {
-            assert(jl_is_symbol(arg));
-            gm = m;
-            gs = (jl_sym_t*)arg;
-        }
-        jl_binding_t *b = jl_get_binding_wr(gm, gs);
-        jl_declare_constant(b, gm, gs);
+        jl_eval_const_decl(m, jl_exprarg(ex, 0), NULL);
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_toplevel_sym) {
         jl_value_t *res = jl_nothing;
         int i;
-        for (i = 0; i < jl_array_len(ex->args); i++) {
-            res = jl_toplevel_eval_flex(m, jl_array_ptr_ref(ex->args, i), fast, 0);
+        for (i = 0; i < jl_array_nrows(ex->args); i++) {
+            root = jl_array_ptr_ref(ex->args, i);
+            if (jl_needs_lowering(root)) {
+                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+                root = jl_expand_with_loc_warn(root, m, *toplevel_filename, *toplevel_lineno);
+            }
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+            res = jl_toplevel_eval_flex(m, root, fast, 1, toplevel_filename, toplevel_lineno);
         }
+        ct->world_age = last_age;
         JL_GC_POP();
         return res;
     }
     else if (head == jl_error_sym || head == jl_incomplete_sym) {
         if (jl_expr_nargs(ex) == 0)
-            jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
-            jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
-        jl_eval_throw(m, jl_exprarg(ex, 0));
+            jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+                "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
+        jl_eval_throw(m, jl_exprarg(ex, 0), *toplevel_filename, *toplevel_lineno);
     }
     else if (jl_is_symbol(ex)) {
         JL_GC_POP();
@@ -899,7 +1034,8 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
     if (!jl_is_code_info(thk) || !jl_typetagis(thk->code, jl_array_any_type)) {
-        jl_eval_errorf(m, "malformed \"thunk\" statement");
+        jl_eval_errorf(m, *toplevel_filename, *toplevel_lineno,
+            "malformed \"thunk\" statement");
     }
     body_attributes((jl_array_t*)thk->code, &has_ccall, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
@@ -911,16 +1047,13 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
         // use codegen
-        mfunc = method_instance_for_thunk(thk, m);
-        jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-        // Don't infer blocks containing e.g. method definitions, since it's probably not
-        // worthwhile and also unsound (see #24316).
-        // TODO: This is still not correct since an `eval` can happen elsewhere, but it
-        // helps in common cases.
+        mfunc = jl_method_instance_for_thunk(thk, m);
+        jl_resolve_definition_effects_in_ir((jl_array_t*)thk->code, m, NULL, 0);
+        // Don't infer blocks containing e.g. method definitions, since it's probably not worthwhile.
         size_t world = jl_atomic_load_acquire(&jl_world_counter);
         ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
-            (void)jl_type_infer(mfunc, world, 0);
+            (void)jl_type_infer(mfunc, world, SOURCE_MODE_ABI);
         }
         result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc);
         ct->world_age = last_age;
@@ -929,9 +1062,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         // use interpreter
         assert(thk);
         if (has_opaque) {
-            jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
+            jl_resolve_definition_effects_in_ir((jl_array_t*)thk->code, m, NULL, 0);
         }
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
+        ct->world_age = world;
         result = jl_interpret_toplevel_thunk(m, thk);
+        ct->world_age = last_age;
     }
 
     JL_GC_POP();
@@ -940,18 +1076,22 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v)
 {
-    return jl_toplevel_eval_flex(m, v, 1, 0);
+    const char *filename = jl_filename;
+    int lineno = jl_lineno;
+    return jl_toplevel_eval_flex(m, v, 1, 0, &filename, &lineno);
 }
 
 // Check module `m` is open for `eval/include`, or throw an error.
-static void jl_check_open_for(jl_module_t *m, const char* funcname)
+JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
 {
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("%s cannot be used in a generated function", fname);
     if (jl_options.incremental && jl_generating_output()) {
         if (m != jl_main_module) { // TODO: this was grand-fathered in
             JL_LOCK(&jl_modules_mutex);
             int open = ptrhash_has(&jl_current_modules, (void*)m);
             if (!open && jl_module_init_order != NULL) {
-                size_t i, l = jl_array_len(jl_module_init_order);
+                size_t i, l = jl_array_nrows(jl_module_init_order);
                 for (i = 0; i < l; i++) {
                     if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
                         open = 1;
@@ -965,31 +1105,24 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
                 jl_errorf("Evaluation into the closed module `%s` breaks incremental compilation "
                           "because the side effects will not be permanent. "
                           "This is likely due to some other module mutating `%s` with `%s` during "
-                          "precompilation - don't do this.", name, name, funcname);
+                          "precompilation - don't do this.", name, name, fname);
             }
         }
     }
 }
 
-JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
-{
-    if (jl_current_task->ptls->in_pure_callback)
-        jl_errorf("%s cannot be used in a generated function", fname);
-    jl_check_open_for(m, fname);
-}
-
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 {
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("eval cannot be used in a generated function");
-    jl_check_open_for(m, "eval");
+    jl_check_top_level_effect(m, "eval");
     jl_value_t *v = NULL;
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
+    jl_task_t *ct = jl_current_task;
     jl_lineno = 1;
     jl_filename = "none";
+    size_t last_age = ct->world_age;
     JL_TRY {
+        ct->world_age = jl_atomic_load_relaxed(&jl_world_counter);
         v = jl_toplevel_eval(m, ex);
     }
     JL_CATCH {
@@ -999,24 +1132,11 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
     }
     jl_lineno = last_lineno;
     jl_filename = last_filename;
+    ct->world_age = last_age;
     assert(v);
     return v;
 }
 
-JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m)
-{
-    jl_method_instance_t *li = method_instance_for_thunk(thk, m);
-    JL_GC_PUSH1(&li);
-    jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-    jl_task_t *ct = jl_current_task;
-    jl_code_info_t *src = jl_type_infer(li, ct->world_age, 0);
-    JL_GC_POP();
-    if (src)
-        return src->rettype;
-    return (jl_value_t*)jl_any_type;
-}
-
-
 //------------------------------------------------------------------------------
 // Code loading: combined parse+eval for include()
 
@@ -1029,10 +1149,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     if (!jl_is_string(text) || !jl_is_string(filename)) {
         jl_errorf("Expected `String`s for `text` and `filename`");
     }
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("cannot use include inside a generated function");
-    jl_check_open_for(module, "include");
+    jl_check_top_level_effect(module, "include");
 
     jl_value_t *result = jl_nothing;
     jl_value_t *ast = NULL;
@@ -1045,15 +1162,17 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
 
+    jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
-    size_t last_age = ct->world_age;
     int lineno = 0;
     jl_lineno = 0;
-    jl_filename = jl_string_data(filename);
-    int err = 0;
+    const char *filename_str = jl_string_data(filename);
+    jl_filename = filename_str;
 
     JL_TRY {
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         for (size_t i = 0; i < jl_expr_nargs(ast); i++) {
             expression = jl_exprarg(ast, i);
             if (jl_is_linenode(expression)) {
@@ -1062,28 +1181,26 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
                 jl_lineno = lineno;
                 continue;
             }
+            ct->world_age = jl_atomic_load_relaxed(&jl_world_counter);
             expression = jl_expand_with_loc_warn(expression, module,
                                                  jl_string_data(filename), lineno);
-            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-            result = jl_toplevel_eval_flex(module, expression, 1, 1);
+            ct->world_age = jl_atomic_load_relaxed(&jl_world_counter);
+            result = jl_toplevel_eval_flex(module, expression, 1, 1, &filename_str, &lineno);
         }
+        ct->world_age = last_age;
     }
     JL_CATCH {
-        result = jl_box_long(jl_lineno); // (ab)use result to root error line
-        err = 1;
-        goto finally; // skip jl_restore_excstack
-    }
-finally:
-    ct->world_age = last_age;
-    jl_lineno = last_lineno;
-    jl_filename = last_filename;
-    if (err) {
+        result = jl_box_long(lineno); // (ab)use result to root error line
+        jl_lineno = last_lineno;
+        jl_filename = last_filename;
         if (jl_loaderror_type == NULL)
             jl_rethrow();
         else
             jl_rethrow_other(jl_new_struct(jl_loaderror_type, filename, result,
-                                           jl_current_exception()));
+                                           jl_current_exception(ct)));
     }
+    jl_lineno = last_lineno;
+    jl_filename = last_filename;
     JL_GC_POP();
     return result;
 }
@@ -1166,6 +1283,21 @@ JL_DLLEXPORT jl_value_t *jl_prepend_cwd(jl_value_t *str)
     return jl_cstr_to_string(path);
 }
 
+JL_DLLEXPORT jl_value_t *jl_prepend_string(jl_value_t *prefix, jl_value_t *str)
+{
+    char path[1024];
+    const char *pstr = (const char*)jl_string_data(prefix);
+    size_t sz = strlen(pstr);
+    const char *fstr = (const char*)jl_string_data(str);
+    if (strlen(fstr) + sz >= sizeof(path)) {
+        jl_errorf("use a bigger buffer for jl_fullpath");
+    }
+    strcpy(path, pstr);
+    strcpy(path + sz, fstr);
+    return jl_cstr_to_string(path);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/typemap.c b/src/typemap.c
index 1bdbe52a974dd..b8b699e101fe5 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -277,21 +277,21 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return NULL;
     _Atomic(jl_value_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
 
-static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
+static void mtcache_hash_insert(_Atomic(jl_genericmemory_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
 {
     int inserted = 0;
-    jl_array_t *a = jl_atomic_load_relaxed(cache);
-    if (a == (jl_array_t*)jl_an_empty_vec_any) {
-        a = jl_alloc_vec_any(16);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(cache);
+    if (a == (jl_genericmemory_t*)jl_an_empty_memory_any) {
+        a = jl_alloc_memory_any(16);
         jl_atomic_store_release(cache, a);
         if (parent)
             jl_gc_wb(parent, a);
@@ -305,9 +305,9 @@ static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent,
     }
 }
 
-static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static jl_typemap_t *mtcache_hash_lookup(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return (jl_typemap_t*)jl_nothing;
     jl_typemap_t *ml = (jl_typemap_t*)jl_eqtable_get(cache, ty, jl_nothing);
     return ml;
@@ -315,17 +315,17 @@ static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, j
 
 // ----- Sorted Type Signature Lookup Matching ----- //
 
-static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr, void *closure)
+static int jl_typemap_memory_visitor(jl_genericmemory_t *a, jl_typemap_visitor_fptr fptr, void *closure)
 {
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
         if (d == NULL)
             continue;
-        if (jl_is_array(d)) {
-            if (!jl_typemap_array_visitor((jl_array_t*)d, fptr, closure))
+        if (jl_is_genericmemory(d)) {
+            if (!jl_typemap_memory_visitor((jl_genericmemory_t*)d, fptr, closure))
                 return 0;
         }
         else {
@@ -352,23 +352,23 @@ int jl_typemap_visitor(jl_typemap_t *cache, jl_typemap_visitor_fptr fptr, void *
 {
     if (jl_typeof(cache) == (jl_value_t*)jl_typemap_level_type) {
         jl_typemap_level_t *node = (jl_typemap_level_t*)cache;
-        jl_array_t *a;
+        jl_genericmemory_t *a;
         JL_GC_PUSH1(&a);
         a = jl_atomic_load_relaxed(&node->targ);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->arg1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->tname);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->name1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         if (!jl_typemap_node_visitor(jl_atomic_load_relaxed(&node->linear), fptr, closure))
             goto exit;
@@ -451,12 +451,12 @@ static int concrete_intersects(jl_value_t *t, jl_value_t *ty, int8_t tparam)
 
 // tparam bit 0 is ::Type{T} (vs. T)
 // tparam bit 1 is typename(T) (vs. T)
-static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty, int8_t tparam,
+static int jl_typemap_intersection_memory_visitor(jl_genericmemory_t *a, jl_value_t *ty, int8_t tparam,
                                                  int8_t offs, struct typemap_intersection_env *closure)
 {
     JL_GC_PUSH1(&a);
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
     unsigned height = 0;
     jl_datatype_t *tydt = jl_any_type;
     if (tparam & 2) {
@@ -492,8 +492,8 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
                     tname_intersection_dt(tydt, (jl_typename_t*)t, height)) {
                 if ((tparam & 1) && t == (jl_value_t*)jl_typeofbottom_type->name) // skip Type{Union{}} and Type{typeof(Union{})}, since the caller should have already handled those
                     continue;
-                if (jl_is_array(ml)) {
-                    if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, tparam & ~2, offs, closure))
+                if (jl_is_genericmemory(ml)) {
+                    if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, tparam & ~2, offs, closure))
                         goto exit;
                 }
                 else {
@@ -531,9 +531,9 @@ static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct t
     // that can be absolutely critical for speed
     register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
     for (;  ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (closure->max_valid < ml->min_world)
+        if (closure->max_valid < jl_atomic_load_relaxed(&ml->min_world))
             continue;
-        if (closure->min_valid > ml->max_world)
+        if (closure->min_valid > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         jl_svec_t **penv = NULL;
         if (closure->env) {
@@ -627,15 +627,15 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
             if (jl_has_free_typevars(ty))
                 ty = jl_rewrap_unionall(ty, closure->type);
             JL_GC_PUSH1(&ty);
-            jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
             int maybe_type = 0;
             int maybe_kind = 0;
             int exclude_typeofbottom = 0;
             jl_value_t *typetype = NULL;
             jl_value_t *name = NULL;
             // pre-check: optimized pre-intersection test to see if `ty` could intersect with any Type or Kind
-            if (targ != (jl_array_t*)jl_an_empty_vec_any || tname != (jl_array_t*)jl_an_empty_vec_any) {
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any || tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 maybe_kind = jl_has_intersect_kind_not_type(ty);
                 maybe_type = maybe_kind || jl_has_intersect_type_not_kind(ty);
                 if (maybe_type && !maybe_kind) {
@@ -651,7 +651,7 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                 }
             }
             // First check for intersections with methods defined on Type{T}, where T was a concrete type
-            if (targ != (jl_array_t*)jl_an_empty_vec_any && maybe_type &&
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type &&
                     (!typetype || jl_has_free_typevars(typetype) || is_cache_leaf(typetype, 1))) { // otherwise cannot contain this particular kind, so don't bother with checking
                 if (!exclude_typeofbottom) {
                     // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
@@ -680,18 +680,18 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                         // attempt semi-direct lookup of types via their names
                         // consider the type name first
                         jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
-                        if (jl_is_array(ml)) {
+                        if (jl_is_genericmemory(ml)) {
                             if (typetype && !jl_has_free_typevars(typetype)) {
                                 // direct lookup of leaf types
                                 if (is_cache_leaf(typetype, 1)) {
-                                    ml = mtcache_hash_lookup((jl_array_t*)ml, typetype);
+                                    ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, typetype);
                                     if (ml != jl_nothing) {
                                         if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                                     }
                                 }
                             }
                             else {
-                                if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
+                                if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
                             }
                         }
                         else if (ml != jl_nothing) {
@@ -699,19 +699,19 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                         }
                     }
                     else {
-                        // else an array scan is required to consider all the possible subtypes
-                        if (!jl_typemap_intersection_array_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
+                        // else a scan is required to consider all the possible subtypes
+                        if (!jl_typemap_intersection_memory_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
-            jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-            if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+            if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (is_cache_leaf(ty, 0)) {
                     jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     // direct lookup of leaf types
                     jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-                    if (jl_is_array(ml))
-                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
                         if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                     }
@@ -721,21 +721,21 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                     if (name && jl_type_extract_name_precise(ty, 0)) {
                         // direct lookup of leaf types
                         jl_value_t *ml = mtcache_hash_lookup(cachearg1, name);
-                        if (jl_is_array(ml)) {
-                            if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
+                        if (jl_is_genericmemory(ml)) {
+                            if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
                         }
                         else {
                             if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                         }
                     }
                     else {
-                        // else an array scan is required to check subtypes
-                        if (!jl_typemap_intersection_array_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                        // else a scan is required to check subtypes
+                        if (!jl_typemap_intersection_memory_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
             // Next check for intersections with methods defined on Type{T}, where T was not concrete (it might even have been a TypeVar), but had an extractable TypeName
-            if (tname != (jl_array_t*)jl_an_empty_vec_any && maybe_type) {
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type) {
                 if (!exclude_typeofbottom || (!typetype && jl_isa((jl_value_t*)jl_typeofbottom_type, ty))) {
                     // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
                     // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
@@ -775,13 +775,13 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes of typetype too
+                    // else a scan is required to check subtypes of typetype too
                     tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
-                    if (!jl_typemap_intersection_array_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
+                    if (!jl_typemap_intersection_memory_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 jl_value_t *name = jl_type_extract_name(ty);
                 if (name && jl_type_extract_name_precise(ty, 0)) {
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
@@ -798,8 +798,8 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                    // else a scan is required to check subtypes
+                    if (!jl_typemap_intersection_memory_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
             JL_GC_POP();
@@ -836,9 +836,7 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
     size_t n = jl_nparams(unw);
     int typesisva = n == 0 ? 0 : jl_is_vararg(jl_tparam(unw, n-1));
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (search->max_valid < ml->min_world)
-            continue;
-        if (search->min_valid > ml->max_world)
+        if (search->world < jl_atomic_load_relaxed(&ml->min_world) || search->world > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         size_t lensig = jl_nparams(jl_unwrap_unionall((jl_value_t*)ml->sig));
         if (lensig == n || (ml->va && lensig <= n+1)) {
@@ -877,24 +875,7 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
                     }
                 }
                 if (ismatch) {
-                    if (search->world < ml->min_world) {
-                        // ignore method table entries that are part of a later world
-                        if (search->max_valid >= ml->min_world)
-                            search->max_valid = ml->min_world - 1;
-                    }
-                    else if (search->world > ml->max_world) {
-                        // ignore method table entries that have been replaced in the current world
-                        if (search->min_valid <= ml->max_world)
-                            search->min_valid = ml->max_world + 1;
-                    }
-                    else {
-                        // intersect the env valid range with method's valid range
-                        if (search->min_valid < ml->min_world)
-                            search->min_valid = ml->min_world;
-                        if (search->max_valid > ml->max_world)
-                            search->max_valid = ml->max_world;
-                        return ml;
-                    }
+                    return ml;
                 }
             }
             if (resetenv)
@@ -908,7 +889,7 @@ static jl_typemap_entry_t *jl_typemap_entry_lookup_by_type(
         jl_typemap_entry_t *ml, struct jl_typemap_assoc *search)
 {
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (search->world < ml->min_world || search->world > ml->max_world)
+        if (search->world < jl_atomic_load_relaxed(&ml->min_world) || search->world > jl_atomic_load_relaxed(&ml->max_world))
             continue;
         // unroll the first few cases here, to the extent that is possible to do fast and easily
         jl_value_t *types = search->types;
@@ -989,12 +970,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
             if (jl_is_type_type(ty)) {
                 jl_value_t *a0 = jl_tparam0(ty);
                 if (is_cache_leaf(a0, 1)) {
-                    jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-                    if (targ != (jl_array_t*)jl_an_empty_vec_any) {
+                    jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+                    if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                         jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
                         jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
-                        if (jl_is_array(ml))
-                            ml = mtcache_hash_lookup((jl_array_t*)ml, a0);
+                        if (jl_is_genericmemory(ml))
+                            ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, a0);
                         if (ml != jl_nothing) {
                             jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                             if (li) return li;
@@ -1004,12 +985,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
             }
             if (is_cache_leaf(ty, 0)) {
-                jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-                if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
+                jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+                if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                     jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-                    if (jl_is_array(ml))
-                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
                         jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                         if (li) return li;
@@ -1020,8 +1001,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
         }
         if (ty || subtype) {
             // now look at the optimized TypeName caches
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-            if (tname != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 jl_value_t *a0 = ty && jl_is_type_type(ty) ? jl_type_extract_name(jl_tparam0(ty)) : NULL;
                 if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper);
@@ -1039,9 +1020,10 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     if (!ty || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
+                        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname); // reload after type-intersect
                         // couldn't figure out unique `a0` initial point, so scan all for matches
-                        size_t i, l = jl_array_len(tname);
-                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                        size_t i, l = tname->length;
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1057,8 +1039,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                     }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (ty) {
                     jl_value_t *a0 = jl_type_extract_name(ty);
                     if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
@@ -1079,8 +1061,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
-                    size_t i, l = jl_array_len(name1);
-                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(name1);
+                    size_t i, l = name1->length;
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1119,7 +1101,7 @@ jl_typemap_entry_t *jl_typemap_entry_assoc_exact(jl_typemap_entry_t *ml, jl_valu
     // some manually-unrolled common special cases
     while (ml->simplesig == (void*)jl_nothing && ml->guardsigs == jl_emptysvec && ml->isleafsig) {
         // use a tight loop for as long as possible
-        if (world >= ml->min_world && world <= ml->max_world) {
+        if (world >= jl_atomic_load_relaxed(&ml->min_world) && world <= jl_atomic_load_relaxed(&ml->max_world)) {
             if (n == jl_nparams(ml->sig) && jl_typeof(arg1) == jl_tparam(ml->sig, 0)) {
                 if (n == 1)
                     return ml;
@@ -1144,7 +1126,7 @@ jl_typemap_entry_t *jl_typemap_entry_assoc_exact(jl_typemap_entry_t *ml, jl_valu
     }
 
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
-        if (world < ml->min_world || world > ml->max_world)
+        if (world < jl_atomic_load_relaxed(&ml->min_world) || world > jl_atomic_load_relaxed(&ml->max_world))
             continue; // ignore replaced methods
         size_t lensig = jl_nparams(ml->sig);
         if (lensig == n || (ml->va && lensig <= n+1)) {
@@ -1198,26 +1180,26 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
         jl_value_t *a1 = (offs == 0 ? arg1 : args[offs - 1]);
         jl_value_t *ty = jl_typeof(a1);
         assert(jl_is_datatype(ty));
-        jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-        if (targ != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(a1, 1)) {
+        jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+        if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(a1, 1)) {
             jl_typename_t *name = a1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a1)->name;
             jl_value_t *ml_or_cache = mtcache_hash_lookup(targ, (jl_value_t*)name);
-            if (jl_is_array(ml_or_cache))
-                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, a1);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, a1);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-        if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(ty, 0)) {
+        jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+        if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(ty, 0)) {
             jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
             jl_value_t *ml_or_cache = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
-            if (jl_is_array(ml_or_cache))
-                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, ty);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, ty);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact((jl_typemap_t*)ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-        if (jl_is_kind(ty) && tname != (jl_array_t*)jl_an_empty_vec_any) {
+        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+        if (jl_is_kind(ty) && tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
             jl_value_t *name = jl_type_extract_name(a1);
             if (name) {
                 if (ty != (jl_value_t*)jl_datatype_type)
@@ -1235,8 +1217,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             }
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
-                size_t i, l = jl_array_len(tname);
-                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                size_t i, l = tname->length;
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
                     jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
@@ -1251,8 +1233,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
                 JL_GC_POP();
             }
         }
-        jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-        if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+        jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+        if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
             while (1) {
                 name1 = jl_atomic_load_relaxed(&cache->name1); // reload after tree descent (which may hit safepoints)
                 jl_typemap_t *ml_or_cache = mtcache_hash_lookup(
@@ -1297,23 +1279,23 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
     jl_typemap_level_t *cache =
         (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
-    jl_atomic_store_relaxed(&cache->arg1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->targ, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->name1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->tname, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->arg1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->targ, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->name1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->tname, (jl_genericmemory_t*)jl_an_empty_memory_any);
     jl_atomic_store_relaxed(&cache->linear, (jl_typemap_entry_t*)jl_nothing);
     jl_atomic_store_relaxed(&cache->any, jl_nothing);
     return cache;
 }
 
-static void jl_typemap_array_insert_(
-        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
         jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit);
 
 static jl_value_t *jl_method_convert_list_to_cache(
         jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t tparam, int8_t offs, int8_t doublesplit)
 {
-    jl_value_t *cache = doublesplit ? jl_an_empty_vec_any : (jl_value_t*)jl_new_typemap_level();
+    jl_value_t *cache = doublesplit ? jl_an_empty_memory_any : (jl_value_t*)jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
     JL_GC_PUSH3(&cache, &next, &ml);
     while (ml != (void*)jl_nothing) {
@@ -1336,7 +1318,7 @@ static jl_value_t *jl_method_convert_list_to_cache(
                 assert(jl_is_type_type(key));
                 key = jl_tparam0(key);
             }
-            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)&cache, key, ml, NULL, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)&cache, key, ml, NULL, 0, offs, NULL);
         }
         else
             jl_typemap_level_insert_(map, (jl_typemap_level_t*)cache, ml, offs);
@@ -1371,9 +1353,9 @@ static void jl_typemap_insert_generic(
         jl_typemap_entry_t *newrec, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
     jl_value_t *ml = jl_atomic_load_relaxed(pml);
-    if (jl_is_array(ml)) {
+    if (jl_is_genericmemory(ml)) {
         assert(doublesplit);
-        jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
         return;
     }
     if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
@@ -1389,7 +1371,7 @@ static void jl_typemap_insert_generic(
         jl_atomic_store_release(pml, ml);
         jl_gc_wb(parent, ml);
         if (doublesplit)
-            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
         else
             jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
@@ -1399,16 +1381,16 @@ static void jl_typemap_insert_generic(
         parent, newrec);
 }
 
-static void jl_typemap_array_insert_(
-        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
         jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
-    jl_array_t *cache = jl_atomic_load_relaxed(pcache);
+    jl_genericmemory_t *cache = jl_atomic_load_relaxed(pcache);
     _Atomic(jl_value_t*) *pml = mtcache_hash_lookup_bp(cache, key);
     if (pml == NULL)
         mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
     else
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
+        jl_typemap_insert_generic(map, pml, (jl_value_t*) cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
 }
 
 static void jl_typemap_level_insert_(
@@ -1451,13 +1433,13 @@ static void jl_typemap_level_insert_(
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
                 jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
-                jl_typemap_array_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
+                jl_typemap_memory_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
             jl_typename_t *name = t1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)t1)->name;
-            jl_typemap_array_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
+            jl_typemap_memory_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
             return;
         }
 
@@ -1467,12 +1449,12 @@ static void jl_typemap_level_insert_(
         if (jl_is_type_type(t1)) {
             a0 = jl_type_extract_name(jl_tparam0(t1));
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
+            jl_typemap_memory_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
             return;
         }
         a0 = jl_type_extract_name(t1);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
+            jl_typemap_memory_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
             return;
         }
     }
@@ -1516,8 +1498,8 @@ jl_typemap_entry_t *jl_typemap_alloc(
     newrec->func.value = newvalue;
     newrec->guardsigs = guardsigs;
     jl_atomic_store_relaxed(&newrec->next, (jl_typemap_entry_t*)jl_nothing);
-    newrec->min_world = min_world;
-    newrec->max_world = max_world;
+    jl_atomic_store_relaxed(&newrec->min_world, min_world);
+    jl_atomic_store_relaxed(&newrec->max_world, max_world);
     newrec->va = isva;
     newrec->issimplesig = issimplesig;
     newrec->isleafsig = isleafsig;
diff --git a/src/utils.scm b/src/utils.scm
index 97464b9a14e5a..79e3a280b9886 100644
--- a/src/utils.scm
+++ b/src/utils.scm
@@ -48,6 +48,13 @@
                 (any (lambda (y) (expr-contains-p p y filt))
                      (cdr expr))))))
 
+(define (expr-replace p expr repl)
+  (cond ((p expr) (repl expr))
+        ((and (pair? expr) (not (quoted? expr)))
+         (cons (car expr)
+               (map (lambda (x) (expr-replace p x repl)) (cdr expr))))
+        (else expr)))
+
 ;; find all subexprs satisfying `p`, applying `key` to each one
 (define (expr-find-all p expr key (filt (lambda (x) #t)))
   (if (filt expr)
diff --git a/src/win32_ucontext.c b/src/win32_ucontext.c
index c6d4372308004..ca54877f97728 100644
--- a/src/win32_ucontext.c
+++ b/src/win32_ucontext.c
@@ -62,6 +62,8 @@ void jl_makecontext(win32_ucontext_t *ucp, void (*func)(void))
     Registration[0].Handler = &__julia_personality;
     Registration[1].Next = (PEXCEPTION_REGISTRATION_RECORD)0xFFFFFFFF;
     Registration[1].Handler = UnHandler;
+#else
+#error jl_makecontext not defined for CPU type
 #endif
     stack_top -= sizeof(void*);
     *(void**)stack_top = 0; // push rta
diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h
index 38429e02886e9..9ec283b610e62 100644
--- a/src/work-stealing-queue.h
+++ b/src/work-stealing-queue.h
@@ -3,6 +3,8 @@
 #ifndef WORK_STEALING_QUEUE_H
 #define WORK_STEALING_QUEUE_H
 
+#include <stdalign.h>
+
 #include "julia_atomics.h"
 #include "assert.h"
 
@@ -34,10 +36,17 @@ static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOT
     return a;
 }
 
+static inline void free_ws_array(ws_array_t *a)
+{
+    free(a->buffer);
+    free(a);
+}
+
 typedef struct {
-    _Atomic(int64_t) top;
-    _Atomic(int64_t) bottom;
-    _Atomic(ws_array_t *) array;
+    // align to JL_CACHE_BYTE_ALIGNMENT
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) top;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(int64_t) bottom;
+    alignas(JL_CACHE_BYTE_ALIGNMENT) _Atomic(ws_array_t *) array;
 } ws_queue_t;
 
 static inline ws_array_t *ws_queue_push(ws_queue_t *q, void *elt, int32_t eltsz) JL_NOTSAFEPOINT
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index dec1745520d4c..5996091c5a0ef 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -21,6 +21,16 @@
 /SparseArrays
 /SHA-*
 /SHA
+/LazyArtifacts-*
+/LazyArtifacts
+/Distributed-*
+/Distributed
+/StyledStrings-*
+/StyledStrings
+/JuliaSyntaxHighlighting-*
+/JuliaSyntaxHighlighting
+/LinearAlgebra-*
+/LinearAlgebra
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
 /*.image
diff --git a/stdlib/ArgTools.version b/stdlib/ArgTools.version
index 0ae273bb18db6..914746c1a6900 100644
--- a/stdlib/ArgTools.version
+++ b/stdlib/ArgTools.version
@@ -1,4 +1,4 @@
 ARGTOOLS_BRANCH = master
-ARGTOOLS_SHA1 = 08b11b2707593d4d7f92e5f1b9dba7668285ff82
+ARGTOOLS_SHA1 = 1314758ad02ff5e9e5ca718920c6c633b467a84a
 ARGTOOLS_GIT_URL := https://github.com/JuliaIO/ArgTools.jl.git
 ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
diff --git a/stdlib/Artifacts/Project.toml b/stdlib/Artifacts/Project.toml
index 7251b79cea8c1..c4e5cc031375c 100644
--- a/stdlib/Artifacts/Project.toml
+++ b/stdlib/Artifacts/Project.toml
@@ -1,5 +1,6 @@
 name = "Artifacts"
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Artifacts/docs/src/index.md b/stdlib/Artifacts/docs/src/index.md
index 80f4c62cbf77f..1bd75832fb8d3 100644
--- a/stdlib/Artifacts/docs/src/index.md
+++ b/stdlib/Artifacts/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Artifacts/docs/src/index.md"
+```
+
 # Artifacts
 
 ```@meta
@@ -18,4 +22,7 @@ Artifacts.artifact_meta
 Artifacts.artifact_hash
 Artifacts.find_artifacts_toml
 Artifacts.@artifact_str
+Artifacts.artifact_exists
+Artifacts.artifact_path
+Artifacts.select_downloadable_artifacts
 ```
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index 70593bfadae05..e21db58b9445e 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -1,9 +1,17 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+Artifacts.jl is a Julia module that is used for managing and accessing
+artifacts in Julia packages. Artifacts are containers for
+platform-specific binaries, datasets, text, or any other kind of data
+that would be convenient to place within an immutable, life-cycled datastore.
+"""
 module Artifacts
 
 import Base: get, SHA1
-using Base.BinaryPlatforms, Base.TOML
+using Base.BinaryPlatforms: AbstractPlatform, Platform, HostPlatform
+using Base.BinaryPlatforms: tags, triplet, select_platform
+using Base.TOML: TOML
 
 export artifact_exists, artifact_path, artifact_meta, artifact_hash,
        select_downloadable_artifacts, find_artifacts_toml, @artifact_str
@@ -18,7 +26,7 @@ function parse_toml(path::String)
     Base.parsed_toml(path)
 end
 
-# keep in sync with Base.project_names and Base.manifest_names
+# keep in sync with Base.project_names
 const artifact_names = ("JuliaArtifacts.toml", "Artifacts.toml")
 
 const ARTIFACTS_DIR_OVERRIDE = Ref{Union{String,Nothing}}(nothing)
@@ -67,8 +75,8 @@ function parse_mapping(mapping::String, name::String, override_file::String)
     end
     return mapping
 end
-function parse_mapping(mapping::Dict, name::String, override_file::String)
-    return Dict(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
+function parse_mapping(mapping::Dict{String, Any}, name::String, override_file::String)
+    return Dict{String, Any}(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
 end
 # Fallthrough for invalid Overrides.toml files
 parse_mapping(mapping, name::String, override_file::String) = nothing
@@ -96,7 +104,7 @@ overriding to another artifact by its content-hash.
 const ARTIFACT_OVERRIDES = Ref{Union{Dict{Symbol,Any},Nothing}}(nothing)
 function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
     if ARTIFACT_OVERRIDES[] !== nothing && !force
-        return ARTIFACT_OVERRIDES[]
+        return ARTIFACT_OVERRIDES[]::Dict{Symbol,Any}
     end
 
     # We organize our artifact location overrides into two camps:
@@ -106,13 +114,8 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
     # Overrides per UUID/bound name are intercepted upon Artifacts.toml load, and new
     # entries within the "hash" overrides are generated on-the-fly.  Thus, all redirects
     # mechanistically happen through the "hash" overrides.
-    overrides = Dict{Symbol,Any}(
-        # Overrides by UUID
-        :UUID => Dict{Base.UUID,Dict{String,Union{String,SHA1}}}(),
-
-        # Overrides by hash
-        :hash => Dict{SHA1,Union{String,SHA1}}(),
-    )
+    overrides_uuid = Dict{Base.UUID,Dict{String,Union{String,SHA1}}}()
+    overrides_hash = Dict{SHA1,Union{String,SHA1}}()
 
     for override_file in reverse(artifacts_dirs("Overrides.toml"))
         !isfile(override_file) && continue
@@ -131,7 +134,6 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
             # Next, determine if this is a hash override or a UUID/name override
             if isa(mapping, String) || isa(mapping, SHA1)
                 # if this mapping is a direct mapping (e.g. a String), store it as a hash override
-                local hash_str
                 hash = tryparse(Base.SHA1, k)
                 if hash === nothing
                     @error("Invalid override in '$(override_file)': Invalid SHA1 hash '$(k)'")
@@ -139,12 +141,12 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
                 end
 
                 # If this mapping is the empty string, un-override it
-                if mapping == ""
-                    delete!(overrides[:hash], hash)
+                if mapping isa String && isempty(mapping)
+                    delete!(overrides_hash, hash)
                 else
-                    overrides[:hash][hash] = mapping
+                    overrides_hash[hash] = mapping
                 end
-            elseif isa(mapping, Dict)
+            elseif isa(mapping, Dict{String, Any})
                 # Convert `k` into a uuid
                 uuid = tryparse(Base.UUID, k)
                 if uuid === nothing
@@ -153,19 +155,18 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
                 end
 
                 # If this mapping is itself a dict, store it as a set of UUID/artifact name overrides
-                ovruuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
-                if !haskey(ovruuid, uuid)
-                    ovruuid[uuid] = Dict{String,Union{String,SHA1}}()
+                if !haskey(overrides_uuid, uuid)
+                    overrides_uuid[uuid] = Dict{String,Union{String,SHA1}}()
                 end
 
                 # For each name in the mapping, update appropriately
                 for (name, override_value) in mapping
                     # If the mapping for this name is the empty string, un-override it
-                    if override_value == ""
-                        delete!(ovruuid[uuid], name)
+                    if override_value isa String && isempty(override_value)
+                        delete!(overrides_uuid[uuid], name)
                     else
                         # Otherwise, store it!
-                        ovruuid[uuid][name] = override_value
+                        overrides_uuid[uuid][name] = override_value::Union{Base.SHA1, String}
                     end
                 end
             else
@@ -174,6 +175,12 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         end
     end
 
+    overrides = Dict{Symbol,Any}()
+    # Overrides by UUID
+    overrides[:UUID] = overrides_uuid
+    # Overrides by hash
+    overrides[:hash] = overrides_hash
+
     ARTIFACT_OVERRIDES[] = overrides
     return overrides
 end
@@ -190,11 +197,13 @@ Query the loaded `<DEPOT>/artifacts/Overrides.toml` settings for artifacts that
 redirected to a particular path or another content-hash.
 """
 function query_override(hash::SHA1; overrides::Dict{Symbol,Any} = load_overrides())
-    return map_override_path(get(overrides[:hash], hash, nothing))
+    overrides_hash = overrides[:hash]::Dict{SHA1,Union{String,SHA1}}
+    return map_override_path(get(overrides_hash, hash, nothing))
 end
 function query_override(pkg::Base.UUID, artifact_name::String; overrides::Dict{Symbol,Any} = load_overrides())
-    if haskey(overrides[:UUID], pkg)
-        return map_override_path(get(overrides[:UUID][pkg], artifact_name, nothing))
+    overrides_uuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
+    if haskey(overrides_uuid, pkg)
+        return map_override_path(get(overrides_uuid[pkg], artifact_name, nothing))
     end
     return nothing
 end
@@ -284,7 +293,7 @@ function unpack_platform(entry::Dict{String,Any}, name::String,
     delete!(tags, "os")
     delete!(tags, "arch")
     delete!(tags, "git-tree-sha1")
-    return Platform(entry["arch"], entry["os"], tags)
+    return Platform(entry["arch"]::String, entry["os"]::String, tags)
 end
 
 function pack_platform!(meta::Dict, p::AbstractPlatform)
@@ -326,8 +335,11 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
     # Insert just-in-time hash overrides by looking up the names of anything we need to
     # override for this UUID, and inserting new overrides for those hashes.
     overrides = load_overrides()
-    if haskey(overrides[:UUID], pkg_uuid)
-        pkg_overrides = overrides[:UUID][pkg_uuid]::Dict{String, <:Any}
+    overrides_uuid = overrides[:UUID]::Dict{Base.UUID,Dict{String,Union{String,SHA1}}}
+    overrides_hash = overrides[:hash]::Dict{SHA1,Union{String,SHA1}}
+
+    if haskey(overrides_uuid, pkg_uuid)
+        pkg_overrides = overrides_uuid[pkg_uuid]::Dict{String, <:Any}
 
         for name in keys(artifact_dict)
             # Skip names that we're not overriding
@@ -336,14 +348,16 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
             end
 
             # If we've got a platform-specific friend, override all hashes:
-            if isa(artifact_dict[name], Array)
-                for entry in artifact_dict[name]
-                    hash = SHA1(entry["git-tree-sha1"])
-                    overrides[:hash][hash] = overrides[:UUID][pkg_uuid][name]
+            artifact_dict_name = artifact_dict[name]
+            if isa(artifact_dict_name, Vector{Any})
+                for entry in artifact_dict_name
+                    entry = entry::Dict{String,Any}
+                    hash = SHA1(entry["git-tree-sha1"]::String)
+                    overrides_hash[hash] = overrides_uuid[pkg_uuid][name]
                 end
-            elseif isa(artifact_dict[name], Dict)
-                hash = SHA1(artifact_dict[name]["git-tree-sha1"])
-                overrides[:hash][hash] = overrides[:UUID][pkg_uuid][name]
+            elseif isa(artifact_dict_name, Dict{String, Any})
+                hash = SHA1(artifact_dict_name["git-tree-sha1"]::String)
+                overrides_hash[hash] = overrides_uuid[pkg_uuid][name]
             end
         end
     end
@@ -386,9 +400,9 @@ function artifact_meta(name::String, artifact_dict::Dict, artifacts_toml::String
 
     # If it's an array, find the entry that best matches our current platform
     if isa(meta, Vector)
-        dl_dict = Dict{AbstractPlatform,Dict{String,Any}}()
+        dl_dict = Dict{Platform,Dict{String,Any}}()
         for x in meta
-            x::Dict{String}
+            x = x::Dict{String, Any}
             dl_dict[unpack_platform(x, name, artifacts_toml)] = x
         end
         meta = select_platform(dl_dict, platform)
@@ -399,9 +413,12 @@ function artifact_meta(name::String, artifact_dict::Dict, artifacts_toml::String
     end
 
     # This is such a no-no, we are going to call it out right here, right now.
-    if meta !== nothing && !haskey(meta, "git-tree-sha1")
-        @error("Invalid artifacts file at $(artifacts_toml): artifact '$name' contains no `git-tree-sha1`!")
-        return nothing
+    if meta !== nothing
+        meta = meta::Dict{String, Any}
+        if !haskey(meta, "git-tree-sha1")
+            @error("Invalid artifacts file at $(artifacts_toml): artifact '$name' contains no `git-tree-sha1`!")
+            return nothing
+        end
     end
 
     # Return the full meta-dict.
@@ -426,7 +443,7 @@ function artifact_hash(name::String, artifacts_toml::String;
         return nothing
     end
 
-    return SHA1(meta["git-tree-sha1"])
+    return SHA1(meta["git-tree-sha1"]::String)
 end
 
 function select_downloadable_artifacts(artifact_dict::Dict, artifacts_toml::String;
@@ -525,11 +542,11 @@ function jointail(dir, tail)
     end
 end
 
-function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts))
-    moduleroot = Base.moduleroot(__module__)
-    if haskey(Base.module_keys, moduleroot)
+function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts
+    pkg = Base.PkgId(__module__)
+    if pkg.uuid !== nothing
         # Process overrides for this UUID, if we know what it is
-        process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+        process_overrides(artifact_dict, pkg.uuid)
     end
 
     # If the artifact exists, we're in the happy path and we can immediately
@@ -544,11 +561,11 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
     # If not, try determining what went wrong:
     meta = artifact_meta(name, artifact_dict, artifacts_toml; platform)
     if meta !== nothing && get(meta, "lazy", false)
-        if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed)
-            if nameof(lazyartifacts) in (:Pkg, :Artifacts)
+        if LazyArtifacts isa Module && isdefined(LazyArtifacts, :ensure_artifact_installed)
+            if nameof(LazyArtifacts) in (:Pkg, :Artifacts)
                 Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
             end
-            return jointail(lazyartifacts.ensure_artifact_installed(string(name), artifacts_toml; platform), path_tail)
+            return jointail(LazyArtifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
         end
         error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
     end
@@ -625,10 +642,9 @@ function artifact_slash_lookup(name::String, artifact_dict::Dict,
     if meta === nothing
         error("Cannot locate artifact '$(name)' for $(triplet(platform)) in '$(artifacts_toml)'")
     end
-    hash = SHA1(meta["git-tree-sha1"])
+    hash = SHA1(meta["git-tree-sha1"]::String)
     return artifact_name, artifact_path_tail, hash
 end
-
 """
     macro artifact_str(name)
 
@@ -676,35 +692,34 @@ macro artifact_str(name, platform=nothing)
     local artifact_dict = load_artifacts_toml(artifacts_toml)
 
     # Invalidate calling .ji file if Artifacts.toml file changes
-    Base.include_dependency(artifacts_toml)
+    Base.include_dependency(artifacts_toml, track_content = true)
 
     # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts
     # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported.
-    lazyartifacts = nothing
+    LazyArtifacts = nothing
     for module_name in (:LazyArtifacts, :Pkg, :Artifacts)
         if isdefined(__module__, module_name)
-            lazyartifacts = GlobalRef(__module__, module_name)
+            LazyArtifacts = GlobalRef(__module__, module_name)
             break
         end
     end
 
     # If `name` is a constant, (and we're using the default `Platform`) we can actually load
     # and parse the `Artifacts.toml` file now, saving the work from runtime.
-    if isa(name, AbstractString) && platform === nothing
-        # To support slash-indexing, we need to split the artifact name from the path tail:
+    if platform === nothing
         platform = HostPlatform()
+    end
+    if isa(name, AbstractString) && isa(platform, AbstractPlatform)
+        # To support slash-indexing, we need to split the artifact name from the path tail:
         artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform)
         return quote
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), Val($(LazyArtifacts)))::String
         end
     else
-        if platform === nothing
-            platform = :($(HostPlatform)())
-        end
         return quote
             local platform = $(esc(platform))
             local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform)
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, Val($(LazyArtifacts)))::String
         end
     end
 end
@@ -742,6 +757,6 @@ precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
 precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
 precompile(parse_mapping, (String, String, String))
 precompile(parse_mapping, (Dict{String, Any}, String, String))
-
+precompile(Tuple{typeof(Artifacts._artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Any})
 
 end # module Artifacts
diff --git a/stdlib/Artifacts/test/Artifacts.toml b/stdlib/Artifacts/test/Artifacts.toml
index 4b715b74c128b..5faf1012dec54 100644
--- a/stdlib/Artifacts/test/Artifacts.toml
+++ b/stdlib/Artifacts/test/Artifacts.toml
@@ -1,146 +1,163 @@
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
+git-tree-sha1 = "0835a23111b12d2aa5e1f7a852ed71e0b92e3425"
 os = "macos"
 
     [[HelloWorldC.download]]
-    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
+    sha256 = "4406a35689feaf532ff0347a11896449571e8a1c919e5550b01dfe10f2e64822"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-apple-darwin.tar.gz"
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
+git-tree-sha1 = "c82465bd6d0aa1369ff2fd961b73884d1f5de49a"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
+    sha256 = "5bfa84332c7ee485ca8e2eee216ad9fa77b2c43d5f261baa823e301b7c789ec4"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "aarch64"
-git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
+git-tree-sha1 = "cb4b8c88778c6cd93b6df38ec5b95a2678434f5d"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
+    sha256 = "924df1c2a386f79a2727a2f989393102649a24863214f2e88cb4a677d3d22e14"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "7db155cf8485fbeb23d30a305f76ece191db9dc4"
+os = "freebsd"
+
+    [[HelloWorldC.download]]
+    sha256 = "d86d992f428df1264d55d7ac886ccd0a0539fda82363bf5dda872d12ea742528"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.aarch64-unknown-freebsd.tar.gz"
 [[HelloWorldC]]
 arch = "armv6l"
 call_abi = "eabihf"
-git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
+git-tree-sha1 = "20a32b71145b67e708f63fb5880a7243727aec0f"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
+    sha256 = "6f0997b0aad387ba6e2402530642bb4ded85b0243460d2e4b13d94f2c8340a44"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv6l-linux-gnueabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv6l"
 call_abi = "eabihf"
-git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
+git-tree-sha1 = "c1179604ea37fa66ee6d5d592c7bbfd1f20292c3"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
+    sha256 = "0aca47bce6f09c38a7939277a593deb988123fe59f7992225a1ede8e174f1b06"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv6l-linux-musleabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv7l"
 call_abi = "eabihf"
-git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+git-tree-sha1 = "0a8e7b523ef6be31311aefe9983a488616e58201"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
+    sha256 = "f29f4da556d2b4ee9eaff7740aa0f9436406b75b0f1ec428e881a47ab7b7477b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv7l-linux-gnueabihf.tar.gz"
 [[HelloWorldC]]
 arch = "armv7l"
 call_abi = "eabihf"
-git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
+git-tree-sha1 = "ca94b4d87f1a276066a2994733142e35046c41dd"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
+    sha256 = "5fb4019d6d797e5e3860cfec90cab12f6865fa624e87b51c20220a44bb94846a"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.armv7l-linux-musleabihf.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
+git-tree-sha1 = "91376c8b0bc90c47076cab4e55bf77e86bb59076"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
+    sha256 = "b775c985231cd0626afd0111902a764c75c9a8a123b12e1f386a1c2af3cef799"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
+git-tree-sha1 = "b50220be02e9c839749f91a70694ae68c2712c8e"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
+    sha256 = "6aecc06cf803ad16703744610deb243a21b39e19ae1951a38977610881698f9e"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-linux-musl.tar.gz"
 [[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
+git-tree-sha1 = "cc9cfa3272d4d3844d6fcf8b6b971bd68dbc792f"
 os = "windows"
 
     [[HelloWorldC.download]]
-    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
+    sha256 = "bbf3276bcfc8223061c3b1cf8725425bfc33ac2929214ba57eecfd170d30f096"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.i686-w64-mingw32.tar.gz"
 [[HelloWorldC]]
 arch = "powerpc64le"
-git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
+git-tree-sha1 = "5e9c87fc4e3372c27a77061a49d97fa5002df0e4"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "e2a728b29124fc7408d6e47cc6fc943d0336d1386e56a3775a0665b34528881b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.powerpc64le-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "riscv64"
+git-tree-sha1 = "3c9b23e46b82ab59141bbbc042158af4037d846d"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
+    sha256 = "59e2250eab04924eb7167d3232e4b0176c53097e4b21f2f3e3621f1e39f43107"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.riscv64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+git-tree-sha1 = "2e1742c9c0addd693b0b025f7a1e7aa4c50a0e6c"
 os = "macos"
 
     [[HelloWorldC.download]]
-    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
+    sha256 = "c4f0c83ae4f72a039c33beb26ebb1d4c0fb739f34360102be79909a0dc17f47f"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-apple-darwin.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
+git-tree-sha1 = "8c8251b0c21615bce0701995eded26ac7697b5cc"
 libc = "glibc"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
+    sha256 = "974f7e1d1cdbebad149e51fed4f1b7c6a0b5ccfa350f7d252dfcf66c2dbf9f63"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-linux-gnu.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
+git-tree-sha1 = "cfaaf0517421585561e3b30dd6f53f6c14b2835f"
 libc = "musl"
 os = "linux"
 
     [[HelloWorldC.download]]
-    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
+    sha256 = "25d3d6ecc753f4dbbcaab0db7b6c20b29b0a79b0c31f7a26a0cf18c365d27809"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-linux-musl.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
+git-tree-sha1 = "8e8a17876a9c1147bae6a53a175344b805ee72d4"
 os = "freebsd"
 
     [[HelloWorldC.download]]
-    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
+    sha256 = "61a3f945941adbf75c87c1c28f05e95b187959fedf29ecaa36519c5d1941bf23"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-unknown-freebsd.tar.gz"
 [[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
+git-tree-sha1 = "6e1eb164b0651aa44621eac4dfa340d6e60295ef"
 os = "windows"
 
     [[HelloWorldC.download]]
-    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
+    sha256 = "1f10e46f7b073136f7f668de89096d631ae8bb8903547d588f6817f0b780b2fc"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.4.0+0/HelloWorldC.v1.4.0.x86_64-w64-mingw32.tar.gz"
 
 [socrates]
 git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl
index 67117217be549..cb81c16347abf 100644
--- a/stdlib/Artifacts/test/runtests.jl
+++ b/stdlib/Artifacts/test/runtests.jl
@@ -1,12 +1,92 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+import Base: SHA1
 
 using Artifacts, Test, Base.BinaryPlatforms
-using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform
+using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform, load_overrides
+using TOML
 
 # prepare for the package tests by ensuring the required artifacts are downloaded now
 artifacts_dir = mktempdir()
 run(addenv(`$(Base.julia_cmd()) --color=no $(joinpath(@__DIR__, "refresh_artifacts.jl")) $(artifacts_dir)`, "TERM"=>"dumb"))
 
+@testset "Load Overrides" begin
+    """
+        create_test_overrides_toml(temp_dir::String)
+
+    Create "Overrides.toml" in the given `temp_dir`.
+    """
+    function create_test_overrides_toml(temp_dir::String)
+        # Define the overrides
+        overrides = Dict(
+            "78f35e74ff113f02274ce60dab6e92b4546ef806" => "/path/to/replacement",
+            "c76f8cda85f83a06d17de6c57aabf9e294eb2537" => "fb886e813a4aed4147d5979fcdf27457d20aa35d",
+            "d57dbccd-ca19-4d82-b9b8-9d660942965b" => Dict(
+                "c_simple" => "/path/to/c_simple_dir",
+                "libfoo" => "fb886e813a4aed4147d5979fcdf27457d20aa35d"
+            )
+        )
+
+        # Get the artifacts directory
+        artifacts_dir = joinpath(temp_dir, "artifacts")
+
+        # Ensure the artifacts directory exists
+        isdir(artifacts_dir) || mkdir(artifacts_dir)
+
+        # Get the path to the Overrides.toml file
+        overrides_path = joinpath(artifacts_dir, "Overrides.toml")
+
+        # Create the Overrides.toml file
+        open(overrides_path, "w") do io
+            TOML.print(io, overrides)
+        end
+    end
+
+    # Specify the expected test result when depot path does not exist or no overriding happened
+    empty_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(),
+        :hash => Dict{SHA1, Union{SHA1, String}}()
+    )
+
+    # Specify the expected test result when overriding happened
+    expected_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(Base.UUID("d57dbccd-ca19-4d82-b9b8-9d660942965b") => Dict("c_simple" => "/path/to/c_simple_dir", "libfoo" => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))),
+        :hash => Dict{SHA1, Union{SHA1, String}}(SHA1("78f35e74ff113f02274ce60dab6e92b4546ef806") => "/path/to/replacement", SHA1("c76f8cda85f83a06d17de6c57aabf9e294eb2537") => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))
+    )
+
+    # Test `load_overrides()` works with *no* "Overrides.toml" file
+    @test load_overrides() == empty_output
+
+    # Create a temporary directory
+    mktempdir() do temp_dir
+        # Back up the old `DEPOT_PATH``
+        old_depot_path = copy(Base.DEPOT_PATH)
+
+        # Set `DEPOT_PATH` to that directory
+        empty!(Base.DEPOT_PATH)
+        push!(Base.DEPOT_PATH, temp_dir)
+
+        try
+            # Create "Overrides.toml" for the test
+            create_test_overrides_toml(temp_dir)
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file but non-nothing ARTIFACT_OVERRIDES[]
+            @test load_overrides() == empty_output
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file with force parameter, which overrides even when `ARTIFACT_OVERRIDES[] !== nothing``
+            @test load_overrides(force=true) == expected_output
+        finally # Make sure `DEPOT_PATH` will be restored to the status quo in the event of a bug
+            # Restore the old `DEPOT_PATH` to avoid messing with any other code
+            empty!(Base.DEPOT_PATH)
+            append!(Base.DEPOT_PATH, old_depot_path)
+        end
+    end
+    # Temporary directory and test "Overrides.toml" file will be automatically deleted when out of scope
+    # This means after this block, the system *should* behave like this test never happened.
+
+    # Test the "Overrides.toml" file is cleared back to the status quo
+    @test load_overrides(force=true) == empty_output
+end
+
 @testset "Artifact Paths" begin
     mktempdir() do tempdir
         with_artifacts_directory(tempdir) do
@@ -115,20 +195,37 @@ end
     with_artifacts_directory(artifacts_dir) do
         win64 = Platform("x86_64", "windows")
         mac64 = Platform("x86_64", "macos")
-        @test basename(@artifact_str("HelloWorldC", win64)) == "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
-        @test basename(@artifact_str("HelloWorldC", mac64)) == "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+        @test basename(@artifact_str("HelloWorldC", win64)) == "6e1eb164b0651aa44621eac4dfa340d6e60295ef"
+        @test basename(@artifact_str("HelloWorldC", mac64)) == "2e1742c9c0addd693b0b025f7a1e7aa4c50a0e6c"
     end
 end
 
+@testset "artifact_hash()" begin
+    # Use the Linus OS on an ARMv7L architecture for the tests to make tests reproducible
+    armv7l_linux = Platform("armv7l", "linux")
+
+    # Check the first key in Artifacts.toml is hashed correctly
+    @test artifact_hash("HelloWorldC", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("0a8e7b523ef6be31311aefe9983a488616e58201")
+
+    # Check the second key in Artifacts.toml is hashed correctly
+    @test artifact_hash("socrates", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("43563e7631a7eafae1f9f8d9d332e3de44ad7239")
+
+    # Check artifact_hash() works for any AbstractString
+    @test artifact_hash(SubString("HelloWorldC0", 1, 11), joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("0a8e7b523ef6be31311aefe9983a488616e58201")
+end
+
 @testset "select_downloadable_artifacts()" begin
     armv7l_linux = Platform("armv7l", "linux")
     artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux)
     @test length(keys(artifacts)) == 1
-    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "0a8e7b523ef6be31311aefe9983a488616e58201"
 
     artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux, include_lazy=true)
     @test length(keys(artifacts)) == 2
-    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "0a8e7b523ef6be31311aefe9983a488616e58201"
     @test artifacts["socrates"]["git-tree-sha1"] == "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
 end
 
@@ -161,6 +258,10 @@ end
 @testset "`Artifacts.artifact_names` and friends" begin
     n = length(Artifacts.artifact_names)
     @test length(Base.project_names) == n
-    @test length(Base.manifest_names) == n
+    @test length(Base.manifest_names) == 2n # there are two manifest names per project name
     @test length(Base.preferences_names) == n
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Artifacts))
+end
diff --git a/stdlib/Base64/Project.toml b/stdlib/Base64/Project.toml
index 68d63837fc385..14796beb7e21a 100644
--- a/stdlib/Base64/Project.toml
+++ b/stdlib/Base64/Project.toml
@@ -1,5 +1,6 @@
 name = "Base64"
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Base64/docs/src/index.md b/stdlib/Base64/docs/src/index.md
index 6bc647f8a2e67..26e9d70f2ff9f 100644
--- a/stdlib/Base64/docs/src/index.md
+++ b/stdlib/Base64/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Base64/docs/src/index.md"
+```
+
 # Base64
 
 ```@docs
diff --git a/stdlib/Base64/src/buffer.jl b/stdlib/Base64/src/buffer.jl
index 44a9c0931ac95..009a6d56cfde8 100644
--- a/stdlib/Base64/src/buffer.jl
+++ b/stdlib/Base64/src/buffer.jl
@@ -2,37 +2,37 @@
 
 # Data buffer for pipes.
 mutable struct Buffer
-    data::Vector{UInt8}
-    ptr::Ptr{UInt8}
+    const data::Memory{UInt8}
+    offset::Int
     size::Int
 
     function Buffer(bufsize)
-        data = Vector{UInt8}(undef, bufsize)
-        return new(data, pointer(data), 0)
+        data = Memory{UInt8}(undef, bufsize)
+        return new(data, 0, 0)
     end
 end
 
 Base.empty!(buffer::Buffer) = buffer.size = 0
-Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i)
-Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i)
+Base.getindex(buffer::Buffer, i::Integer) = buffer.data[buffer.offset + i]
+Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = buffer.data[buffer.offset + i] = v
 Base.firstindex(buffer::Buffer) = 1
 Base.lastindex(buffer::Buffer) = buffer.size
-Base.pointer(buffer::Buffer) = buffer.ptr
-capacity(buffer::Buffer) = Int(pointer(buffer.data, lastindex(buffer.data) + 1) - buffer.ptr)
+Base.pointer(buffer::Buffer) = pointer(buffer.data) + buffer.offset
+capacity(buffer::Buffer) = length(buffer.data) - buffer.offset
 
 function consumed!(buffer::Buffer, n::Integer)
     @assert n ≤ buffer.size
-    buffer.ptr += n
+    buffer.offset += n
     buffer.size -= n
 end
 
 function read_to_buffer(io::IO, buffer::Buffer)
-    offset = buffer.ptr - pointer(buffer.data)
+    offset = buffer.offset
     copyto!(buffer.data, 1, buffer.data, offset + 1, buffer.size)
-    buffer.ptr = pointer(buffer.data)
+    buffer.offset = 0
     if !eof(io)
         n = min(bytesavailable(io), capacity(buffer) - buffer.size)
-        unsafe_read(io, buffer.ptr + buffer.size, n)
+        unsafe_read(io, pointer(buffer) + buffer.size, n)
         buffer.size += n
     end
     return
diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl
index 11d0a3cca4348..145576f6ea3f4 100644
--- a/stdlib/Base64/test/runtests.jl
+++ b/stdlib/Base64/test/runtests.jl
@@ -1,7 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Random
-import Base64:
+using Base64:
+    Base64,
     Base64EncodePipe,
     base64encode,
     Base64DecodePipe,
@@ -142,3 +143,7 @@ end
         @test String(base64decode(splace(longEncodedText))) == longDecodedText
     end
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Base64))
+end
diff --git a/stdlib/CRC32c/Project.toml b/stdlib/CRC32c/Project.toml
index c1de88cbc7c52..d3ab5ff019503 100644
--- a/stdlib/CRC32c/Project.toml
+++ b/stdlib/CRC32c/Project.toml
@@ -1,5 +1,6 @@
 name = "CRC32c"
 uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/CRC32c/docs/src/index.md b/stdlib/CRC32c/docs/src/index.md
index 24a073d1e3938..c00a792232c70 100644
--- a/stdlib/CRC32c/docs/src/index.md
+++ b/stdlib/CRC32c/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/CRC32c/docs/src/index.md"
+```
+
 # CRC32c
 
 Standard library module for computing the CRC-32c checksum.
diff --git a/stdlib/CRC32c/src/CRC32c.jl b/stdlib/CRC32c/src/CRC32c.jl
index 35d2d4cb339d6..923f7333b4c17 100644
--- a/stdlib/CRC32c/src/CRC32c.jl
+++ b/stdlib/CRC32c/src/CRC32c.jl
@@ -7,7 +7,7 @@ See [`CRC32c.crc32c`](@ref) for more information.
 """
 module CRC32c
 
-import Base.FastContiguousSubArray
+import Base: DenseBytes
 
 export crc32c
 
@@ -15,9 +15,9 @@ export crc32c
     crc32c(data, crc::UInt32=0x00000000)
 
 Compute the CRC-32c checksum of the given `data`, which can be
-an `Array{UInt8}`, a contiguous subarray thereof, or a `String`.  Optionally, you can pass
-a starting `crc` integer to be mixed in with the checksum.  The `crc` parameter
-can be used to compute a checksum on data divided into chunks: performing
+an `Array{UInt8}`, a contiguous subarray thereof, an `AbstractVector{UInt8}`, or a `String`.
+Optionally, you can pass a starting `crc` integer to be mixed in with the checksum.
+The `crc` parameter can be used to compute a checksum on data divided into chunks: performing
 `crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
 (Technically, a little-endian checksum is computed.)
 
@@ -29,13 +29,31 @@ calling [`take!`](@ref).
 
 For a `String`, note that the result is specific to the UTF-8 encoding
 (a different checksum would be obtained from a different Unicode encoding).
-To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
+To checksum an `a::AbstractArray` of some other bitstype without padding,
+you can do `crc32c(vec(reinterpret(UInt8,a)))`,
 but note that the result may be endian-dependent.
 """
 function crc32c end
 
+function crc32c(a::AbstractVector{UInt8}, crc::UInt32=0x00000000)
+    # use block size 24576=8192*3, since that is the threshold for
+    # 3-way parallel SIMD code in the underlying jl_crc32c C function.
+    last = lastindex(a)
+    nb = length(a)
+    buf = Memory{UInt8}(undef, Int(min(nb, 24576)))
+    while nb > 0
+        n = min(nb, 24576)
+        copyto!(buf, 1, a, last - nb + 1, n)
+        crc = Base.unsafe_crc32c(buf, n % Csize_t, crc)
+        nb -= n
+    end
+    return crc
+end
+
+function crc32c(a::DenseBytes, crc::UInt32=0x00000000)
+    Base._crc32c(a, crc)
+end
 
-crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = Base._crc32c(a, crc)
 crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000) = Base._crc32c(s, crc)
 
 """
@@ -47,6 +65,5 @@ mixed with a starting `crc` integer.  If `nb` is not supplied, then
 """
 crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) = Base._crc32c(io, nb, crc)
 crc32c(io::IO, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
-crc32c(io::IOStream, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
 
 end
diff --git a/stdlib/CRC32c/test/runtests.jl b/stdlib/CRC32c/test/runtests.jl
index e9e933ee2451c..37b447e6d999a 100644
--- a/stdlib/CRC32c/test/runtests.jl
+++ b/stdlib/CRC32c/test/runtests.jl
@@ -3,12 +3,23 @@
 using Test, Random
 using CRC32c
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays: Origin
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays: Fill
+
 function test_crc32c(crc32c)
     # CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
     for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
         s = String(UInt8[1:n;])
         ss = SubString(String(UInt8[0:(n+1);]), 2:(n+1))
         @test crc32c(UInt8[1:n;]) == crc == crc32c(s) == crc32c(ss)
+        @test crc == crc32c(UInt8(1):UInt8(n))
+        m = Memory{UInt8}(undef, n)
+        m .= 1:n
+        @test crc == crc32c(m)
     end
 
     # test that crc parameter is equivalent to checksum of concatenated data,
@@ -45,6 +56,30 @@ function test_crc32c(crc32c)
             rm(f, force=true)
         end
     end
+
+    # test longer arrays to cover all the code paths in crc32c.c
+    LONG = 8192 # from crc32c.c
+    SHORT = 256 # from crc32c.c
+    n = LONG*3+SHORT*3+SHORT*2+64+7
+    bigg = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
+    for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
+        @test crc == crc32c(@view bigg[1+offset:end])
+    end
+
+    # test crc of AbstractVector{UInt8}
+    @test crc32c(Origin(0)(b"hello")) == crc32c(b"hello")
+    weird_vectors = [
+        view(rand(UInt8, 300000), 1:2:300000),
+        vec(reinterpret(UInt8, collect(Int64(1):Int64(4)))),
+        vec(reinterpret(UInt8, Int64(1):Int64(4))),
+        view([0x01, 0x02], UInt(1):UInt(2)),
+        Fill(0x00, UInt(100)),
+        Fill(0x00, big(100)),
+        reinterpret(UInt8, BitVector((true, false, true, false))),
+    ]
+    for a in weird_vectors
+        @test crc32c(a) == crc32c(collect(a))
+    end
 end
 unsafe_crc32c_sw(a, n, crc) =
     ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
@@ -55,6 +90,8 @@ function crc32c_sw(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
     unsafe_crc32c_sw(s, sizeof(s), crc)
 end
 
+crc32c_sw(a::AbstractVector{UInt8}, crc::UInt32=0x00000000) =
+    crc32c_sw(copyto!(Vector{UInt8}(undef, length(a)), a))
 function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
     nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
     buf = Vector{UInt8}(undef, min(nb, 24576))
@@ -68,3 +105,7 @@ end
 crc32c_sw(io::IO, crc::UInt32=0x00000000) = crc32c_sw(io, typemax(Int64), crc)
 test_crc32c(crc32c)
 test_crc32c(crc32c_sw)
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(CRC32c))
+end
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 4c7aa35a99730..2f8143a77d740 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -2,9 +2,9 @@ name = "CompilerSupportLibraries_jll"
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 # NOTE: When updating this, also make sure to update the value
-# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# `CSL_NEXT_GLIBCXX_VERSION` in `Make.inc`, to properly disable
 # automatic usage of BB-built CSLs on extremely up-to-date systems!
-version = "1.0.5+0"
+version = "1.3.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
index bd7a0571f9d5a..b4df77c5167da 100644
--- a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule CompilerSupportLibraries_jll
 using Base, Libdl, Base.BinaryPlatforms
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -15,6 +14,8 @@ export libgfortran, libstdcxx, libgomp
 const PATH = Ref("")
 const LIBPATH = Ref("")
 artifact_dir::String = ""
+libgcc_s_handle::Ptr{Cvoid} = C_NULL
+libgcc_s_path::String = ""
 libgfortran_handle::Ptr{Cvoid} = C_NULL
 libgfortran_path::String = ""
 libstdcxx_handle::Ptr{Cvoid} = C_NULL
diff --git a/stdlib/Dates/Project.toml b/stdlib/Dates/Project.toml
index fe225055bad98..45da6ad1a0152 100644
--- a/stdlib/Dates/Project.toml
+++ b/stdlib/Dates/Project.toml
@@ -1,5 +1,6 @@
 name = "Dates"
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
 
 [deps]
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index aa46f7b827f10..38b4f7ae86d29 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -1,7 +1,3 @@
-```@meta
-EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/docs/src/index.md"
-```
-
 # Dates
 
 ```@meta
@@ -22,7 +18,7 @@ represents a continuously increasing machine timeline based on the UT second [^1
 [`DateTime`](@ref) type is not aware of time zones (*naive*, in Python parlance),
 analogous to a *LocalDateTime* in Java 8. Additional time zone functionality
 can be added through the [TimeZones.jl package](https://github.com/JuliaTime/TimeZones.jl/), which
-compiles the [IANA time zone database](http://www.iana.org/time-zones). Both [`Date`](@ref) and
+compiles the [IANA time zone database](https://www.iana.org/time-zones). Both [`Date`](@ref) and
 [`DateTime`](@ref) are based on the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard, which follows the proleptic Gregorian calendar.
 One note is that the ISO 8601 standard is particular about BC/BCE dates. In general, the last
 day of the BC/BCE era, 1-12-31 BC/BCE, was followed by 1-1-1 AD/CE, thus no year zero exists.
@@ -97,7 +93,7 @@ parser know which periods to parse in each slot.
 
 As in the case of constructors above such as `Date(2013)`, delimited `DateFormat`s allow for
 missing parts of dates and times so long as the preceding parts are given. The other parts are given the usual
-default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
+default values. For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
 `Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
 1 AD/CE.)
 An empty string, however, always throws an `ArgumentError`.
@@ -343,12 +339,12 @@ First the mapping is loaded into the `LOCALES` variable:
 julia> french_months = ["janvier", "février", "mars", "avril", "mai", "juin",
                         "juillet", "août", "septembre", "octobre", "novembre", "décembre"];
 
-julia> french_monts_abbrev = ["janv","févr","mars","avril","mai","juin",
+julia> french_months_abbrev = ["janv","févr","mars","avril","mai","juin",
                               "juil","août","sept","oct","nov","déc"];
 
 julia> french_days = ["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"];
 
-julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_monts_abbrev, french_days, [""]);
+julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_months_abbrev, french_days, [""]);
 ```
 
  The above mentioned functions can then be used to perform the queries:
@@ -549,7 +545,7 @@ it could represent, in days, a value of 28, 29, 30, or 31 depending on the year
 Or a year could represent 365 or 366 days in the case of a leap year. [`Period`](@ref) types are
 simple [`Int64`](@ref) wrappers and are constructed by wrapping any `Int64` convertible type, i.e. `Year(1)`
 or `Month(3.0)`. Arithmetic between [`Period`](@ref) of the same type behave like integers, and
-limited `Period-Real` arithmetic is available.  You can extract the underlying integer with
+limited `Period-Real` arithmetic is available. You can extract the underlying integer with
 [`Dates.value`](@ref).
 
 ```jldoctest
@@ -688,9 +684,9 @@ value in the days field is uncertain.
 See the [API reference](@ref stdlib-dates-api) for additional information
 on methods exported from the `Dates` module.
 
-# [API reference](@id stdlib-dates-api)
+## [API reference](@id stdlib-dates-api)
 
-## Dates and Time Types
+### Dates and Time Types
 
 ```@docs
 Dates.Period
@@ -705,7 +701,7 @@ Dates.TimeZone
 Dates.UTC
 ```
 
-## Dates Functions
+### Dates Functions
 
 ```@docs
 Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
@@ -734,7 +730,7 @@ Dates.now(::Type{Dates.UTC})
 Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
 ```
 
-### Accessor Functions
+#### Accessor Functions
 
 ```@docs
 Dates.year
@@ -762,7 +758,7 @@ Dates.monthday
 Dates.yearmonthday
 ```
 
-### Query Functions
+#### Query Functions
 
 ```@docs
 Dates.dayname
@@ -781,7 +777,7 @@ Dates.quarterofyear
 Dates.dayofquarter
 ```
 
-### Adjuster Functions
+#### Adjuster Functions
 
 ```@docs
 Base.trunc(::Dates.TimeType, ::Type{Dates.Period})
@@ -801,7 +797,7 @@ Dates.tonext(::Function, ::Dates.TimeType)
 Dates.toprev(::Function, ::Dates.TimeType)
 ```
 
-### Periods
+#### Periods
 
 ```@docs
 Dates.Period(::Any)
@@ -812,7 +808,7 @@ Dates.default
 Dates.periods
 ```
 
-### Rounding Functions
+#### Rounding Functions
 
 `Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes)
 with `floor`, `ceil`, or `round`.
@@ -841,7 +837,7 @@ Dates.date2epochdays
 Dates.datetime2epochms
 ```
 
-### Conversion Functions
+#### Conversion Functions
 
 ```@docs
 Dates.today
diff --git a/stdlib/Dates/src/Dates.jl b/stdlib/Dates/src/Dates.jl
index a111ea24089c4..a4600a5f82043 100644
--- a/stdlib/Dates/src/Dates.jl
+++ b/stdlib/Dates/src/Dates.jl
@@ -32,7 +32,7 @@ for more information.
 """
 module Dates
 
-import Base: ==, isless, div, fld, mod, rem, gcd, lcm, +, -, *, /, %, broadcast
+import Base: ==, isless, div, fld, mod, rem, gcd, lcm, +, -, *, /, %
 using Printf: @sprintf
 
 using Base.Iterators
@@ -77,7 +77,7 @@ export Period, DatePeriod, TimePeriod,
        firstdayofmonth, lastdayofmonth,
        firstdayofyear, lastdayofyear,
        firstdayofquarter, lastdayofquarter,
-       adjust, tonext, toprev, tofirst, tolast,
+       tonext, toprev, tofirst, tolast,
        # io.jl
        ISODateTimeFormat, ISODateFormat, ISOTimeFormat, DateFormat, RFC1123Format, @dateformat_str
 
diff --git a/stdlib/Dates/src/accessors.jl b/stdlib/Dates/src/accessors.jl
index 05e9017303ef1..211b5678c90d8 100644
--- a/stdlib/Dates/src/accessors.jl
+++ b/stdlib/Dates/src/accessors.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Convert # of Rata Die days to proleptic Gregorian calendar y,m,d,w
-# Reference: http://mysite.verizon.net/aesir_research/date/date0.htm
+# Reference: https://www.researchgate.net/profile/Peter-Baum/publication/316558298_Date_Algorithms/links/5f90c3f992851c14bcdb0da6/Date-Algorithms.pdf
 function yearmonthday(days)
     z = days + 306; h = 100z - 25; a = fld(h, 3652425); b = a - fld(a, 4)
     y = fld(100b + h, 36525); c = b + z - 365y - fld(y, 4); m = div(5c + 456, 153)
diff --git a/stdlib/Dates/src/adjusters.jl b/stdlib/Dates/src/adjusters.jl
index 245e2678a9d77..0d6cea5dc3e6b 100644
--- a/stdlib/Dates/src/adjusters.jl
+++ b/stdlib/Dates/src/adjusters.jl
@@ -204,6 +204,41 @@ function adjust(df::DateFunction, start, step, limit)
     throw(ArgumentError("Adjustment limit reached: $limit iterations"))
 end
 
+"""
+    adjust(df, start[, step, limit]) -> TimeType
+    adjust(df, start) -> TimeType
+
+Adjusts the date in `start` until the `f::Function` passed using `df` returns `true`.
+The optional `step` parameter dictates the change in `start` on every iteration.
+If `limit` iterations occur, then an [`ArgumentError`](@ref) is thrown.
+
+The default values for parameters `start` and `limit` are 1 Day and 10,000 respectively.
+
+# Examples
+```jldoctest
+julia> Dates.adjust(date -> month(date) == 10, Date(2022, 1, 1), step=Month(3), limit=10)
+2022-10-01
+
+julia> Dates.adjust(date -> year(date) == 2025, Date(2022, 1, 1), step=Year(1), limit=4)
+2025-01-01
+
+julia> Dates.adjust(date -> day(date) == 15, Date(2022, 1, 1), step=Year(1), limit=3)
+ERROR: ArgumentError: Adjustment limit reached: 3 iterations
+Stacktrace:
+[...]
+
+julia> Dates.adjust(date -> month(date) == 10, Date(2022, 1, 1))
+2022-10-01
+
+julia> Dates.adjust(date -> year(date) == 2025, Date(2022, 1, 1))
+2025-01-01
+
+julia> Dates.adjust(date -> year(date) == 2224, Date(2022, 1, 1))
+ERROR: ArgumentError: Adjustment limit reached: 10000 iterations
+Stacktrace:
+[...]
+```
+"""
 function adjust(func::Function, start; step::Period=Day(1), limit::Int=10000)
     return adjust(DateFunction(func, start), start, step, limit)
 end
diff --git a/stdlib/Dates/src/arithmetic.jl b/stdlib/Dates/src/arithmetic.jl
index a847f749d0154..83a2873b43409 100644
--- a/stdlib/Dates/src/arithmetic.jl
+++ b/stdlib/Dates/src/arithmetic.jl
@@ -7,7 +7,8 @@
 # TimeType arithmetic
 (+)(x::TimeType) = x
 (-)(x::T, y::T) where {T<:TimeType} = x.instant - y.instant
-(-)(x::TimeType, y::TimeType) = -(promote(x, y)...)
+(-)(x::T, y::T) where {T<:AbstractDateTime} = x.instant - y.instant
+(-)(x::AbstractDateTime, y::AbstractDateTime) = -(promote(x, y)...)
 
 # Date-Time arithmetic
 """
diff --git a/stdlib/Dates/src/conversions.jl b/stdlib/Dates/src/conversions.jl
index 30f1f2581d1fa..0d413d2cf53a1 100644
--- a/stdlib/Dates/src/conversions.jl
+++ b/stdlib/Dates/src/conversions.jl
@@ -84,7 +84,7 @@ today() = Date(now())
 Return a `DateTime` corresponding to the user's system time as UTC/GMT.
 For other time zones, see the TimeZones.jl package.
 
-# Example
+# Examples
 ```julia
 julia> now(UTC)
 2023-01-04T10:52:24.864
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 257e86064c2fb..aa7019566093c 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -111,7 +111,25 @@ end
 
 ### Parse tokens
 
-for c in "yYmdHIMS"
+for c in "yY"
+    @eval begin
+        @inline function tryparsenext(d::DatePart{$c}, str, i, len)
+            val = tryparsenext_sign(str, i, len)
+            if val !== nothing
+                coefficient, i = val
+            else
+                coefficient = 1
+            end
+            # The sign character does not affect fixed length `DatePart`s
+            val = tryparsenext_base10(str, i, len, min_width(d), max_width(d))
+            val === nothing && return nothing
+            y, ii = val
+            return y * coefficient, ii
+        end
+    end
+end
+
+for c in "mdHIMS"
     @eval begin
         @inline function tryparsenext(d::DatePart{$c}, str, i, len)
             return tryparsenext_base10(str, i, len, min_width(d), max_width(d))
@@ -472,7 +490,7 @@ end
 Describes the ISO8601 formatting for a date and time. This is the default value for `Dates.format`
 of a `DateTime`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), ISODateTimeFormat)
 "2018-08-08T12:00:43.001"
@@ -486,7 +504,7 @@ default_format(::Type{DateTime}) = ISODateTimeFormat
 
 Describes the ISO8601 formatting for a date. This is the default value for `Dates.format` of a `Date`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(Date(2018, 8, 8), ISODateFormat)
 "2018-08-08"
@@ -500,7 +518,7 @@ default_format(::Type{Date}) = ISODateFormat
 
 Describes the ISO8601 formatting for a time. This is the default value for `Dates.format` of a `Time`.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(Time(12, 0, 43, 1), ISOTimeFormat)
 "12:00:43.001"
@@ -514,7 +532,7 @@ default_format(::Type{Time}) = ISOTimeFormat
 
 Describes the RFC1123 formatting for a date and time.
 
-# Example
+# Examples
 ```jldoctest
 julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), RFC1123Format)
 "Wed, 08 Aug 2018 12:00:43"
@@ -538,7 +556,7 @@ pattern given in the `format` string (see [`DateFormat`](@ref)  for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> DateTime("2020-01-01", "yyyy-mm-dd")
 2020-01-01T00:00:00
@@ -578,7 +596,7 @@ in the `format` string (see [`DateFormat`](@ref) for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> Date("2020-01-01", "yyyy-mm-dd")
 2020-01-01
@@ -618,7 +636,7 @@ in the `format` string (see [`DateFormat`](@ref) for syntax).
     that you create a [`DateFormat`](@ref) object instead and use that as the second
     argument to avoid performance loss when using the same format repeatedly.
 
-# Example
+# Examples
 ```jldoctest
 julia> Time("12:34pm", "HH:MMp")
 12:34:00
@@ -695,7 +713,7 @@ except that it does not truncate values longer than the width.
 When creating a `format` you can use any non-code characters as a separator. For example to
 generate the string "1996-01-15T00:00:00" you could use `format`: "yyyy-mm-ddTHH:MM:SS".
 Note that if you need to use a code character as a literal you can use the escape character
-backslash. The string "1996y01m" can be produced with the format "yyyy\\ymm\\m".
+backslash. The string "1996y01m" can be produced with the format raw"yyyy\\ymm\\m".
 """
 function format(dt::TimeType, f::AbstractString; locale::Locale=ENGLISH)
     format(dt, DateFormat(f, locale))
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index 62d44177de877..e8624cf9243c5 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -156,6 +156,18 @@ If successful, returns a 2-element tuple `(values, pos)`:
     end
 end
 
+@inline function tryparsenext_sign(str::AbstractString, i::Int, len::Int)
+    i > len && return nothing
+    c, ii = iterate(str, i)::Tuple{Char, Int}
+    if c == '+'
+        return 1, ii
+    elseif c == '-'
+        return -1, ii
+    else
+        return nothing
+    end
+end
+
 @inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0)
     i > len && return nothing
     min_pos = min_width <= 0 ? i : i + min_width - 1
@@ -200,14 +212,22 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     i, end_pos = firstindex(s), lastindex(s)
     i > end_pos && throw(ArgumentError("Cannot parse an empty string as a DateTime"))
 
+    coefficient = 1
     local dy
     dm = dd = Int64(1)
     th = tm = ts = tms = Int64(0)
 
+    # Optional sign
+    let val = tryparsenext_sign(s, i, end_pos)
+        if val !== nothing
+            coefficient, i = val
+        end
+    end
+
     let val = tryparsenext_base10(s, i, end_pos, 1)
         val === nothing && @goto error
         dy, i = val
-        i > end_pos && @goto error
+        i > end_pos && @goto done
     end
 
     c, i = iterate(s, i)::Tuple{Char, Int}
@@ -272,7 +292,7 @@ function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeF
     end
 
     @label done
-    return DateTime(dy, dm, dd, th, tm, ts, tms)
+    return DateTime(dy * coefficient, dm, dd, th, tm, ts, tms)
 
     @label error
     throw(ArgumentError("Invalid DateTime string"))
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 9b7e29496e642..8f28f95d4a90e 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -102,6 +102,7 @@ div(x::Period, y::Period, r::RoundingMode) = div(promote(x, y)..., r)
 Base.gcdx(a::T, b::T) where {T<:Period} = ((g, x, y) = gcdx(value(a), value(b)); return T(g), x, y)
 Base.abs(a::T) where {T<:Period} = T(abs(value(a)))
 Base.sign(x::Period) = sign(value(x))
+Base.signbit(x::Period) = signbit(value(x))
 
 # return (next coarser period, conversion factor):
 coarserperiod(::Type{P}) where {P<:Period} = (P, 1)
@@ -325,7 +326,7 @@ end
 Base.show(io::IO,x::CompoundPeriod) = print(io, string(x))
 
 Base.convert(::Type{T}, x::CompoundPeriod) where T<:Period =
-    isconcretetype(T) ? sum(T, x.periods) : throw(MethodError(convert,(T,x)))
+    isconcretetype(T) ? sum(T, x.periods; init = zero(T)) : throw(MethodError(convert,(T,x)))
 
 # E.g. Year(1) + Day(1)
 (+)(x::Period,y::Period) = CompoundPeriod(Period[x, y])
@@ -443,18 +444,18 @@ Base.isless(x::CompoundPeriod, y::Period) = x < CompoundPeriod(y)
 Base.isless(x::CompoundPeriod, y::CompoundPeriod) = tons(x) < tons(y)
 # truncating conversions to milliseconds, nanoseconds and days:
 # overflow can happen for periods longer than ~300,000 years
-toms(c::Nanosecond)  = div(value(c), 1000000)
-toms(c::Microsecond) = div(value(c), 1000)
+toms(c::Nanosecond)  = div(value(c), 1000000, RoundNearest)
+toms(c::Microsecond) = div(value(c), 1000, RoundNearest)
 toms(c::Millisecond) = value(c)
 toms(c::Second)      = 1000 * value(c)
 toms(c::Minute)      = 60000 * value(c)
 toms(c::Hour)        = 3600000 * value(c)
 toms(c::Period)      = 86400000 * days(c)
-toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(toms, c.periods))
+toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, toms(p))::Float64, c.periods)
 tons(x)              = toms(x) * 1000000
 tons(x::Microsecond) = value(x) * 1000
 tons(x::Nanosecond)  = value(x)
-tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(tons, c.periods))
+tons(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, tons(p))::Float64, c.periods)
 days(c::Millisecond) = div(value(c), 86400000)
 days(c::Second)      = div(value(c), 86400)
 days(c::Minute)      = div(value(c), 1440)
@@ -464,4 +465,8 @@ days(c::Week)        = 7 * value(c)
 days(c::Year)        = 365.2425 * value(c)
 days(c::Quarter)     = 91.310625 * value(c)
 days(c::Month)       = 30.436875 * value(c)
-days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(days, c.periods))
+days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : sum(p -> convert(Float64, days(p))::Float64, c.periods)
+seconds(x::Nanosecond) = value(x) / 1000000000
+seconds(x::Microsecond) = value(x) / 1000000
+seconds(x::Millisecond) = value(x) / 1000
+seconds(x::Period) = value(Second(x))
diff --git a/stdlib/Dates/src/rounding.jl b/stdlib/Dates/src/rounding.jl
index b5b6e52decba8..08a8218365d2c 100644
--- a/stdlib/Dates/src/rounding.jl
+++ b/stdlib/Dates/src/rounding.jl
@@ -84,6 +84,12 @@ function Base.floor(dt::DateTime, p::TimePeriod)
     return epochms2datetime(milliseconds - mod(milliseconds, value(Millisecond(p))))
 end
 
+function Base.floor(t::Time, p::TimePeriod)
+    value(p) < 1 && throw(DomainError(p))
+    nanoseconds = value(t)
+    return Time(Nanosecond(nanoseconds - mod(nanoseconds, value(Nanosecond(p)))))
+end
+
 """
     floor(x::Period, precision::T) where T <: Union{TimePeriod, Week, Day} -> T
 
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index 1d9769a05bd3d..1978864b92554 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -142,8 +142,28 @@ abstract type AbstractDateTime <: TimeType end
 """
     DateTime
 
-`DateTime` wraps a `UTInstant{Millisecond}` and interprets it according to the proleptic
-Gregorian calendar.
+`DateTime` represents a point in time according to the proleptic Gregorian calendar.
+The finest resolution of the time is millisecond (i.e., microseconds or
+nanoseconds cannot be represented by this type). The type supports fixed-point
+arithmetic, and thus is prone to underflowing (and overflowing). A notable
+consequence is rounding when adding a `Microsecond` or a `Nanosecond`:
+
+```jldoctest
+julia> dt = DateTime(2023, 8, 19, 17, 45, 32, 900)
+2023-08-19T17:45:32.900
+
+julia> dt + Millisecond(1)
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1000) # 1000us == 1ms
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(999) # 999us rounded to 1000us
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1499) # 1499 rounded to 1000us
+2023-08-19T17:45:32.901
+```
 """
 struct DateTime <: AbstractDateTime
     instant::UTInstant{Millisecond}
@@ -183,7 +203,7 @@ function totaldays(y, m, d)
 end
 
 # If the year is divisible by 4, except for every 100 years, except for every 400 years
-isleapyear(y) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
+isleapyear(y::Integer) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
 
 # Number of days in month
 const DAYSINMONTH = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
@@ -460,15 +480,20 @@ Base.hash(x::Time, h::UInt) =
     hash(hour(x), hash(minute(x), hash(second(x),
         hash(millisecond(x), hash(microsecond(x), hash(nanosecond(x), h))))))
 
-Base.sleep(duration::Period) = sleep(toms(duration) / 1000)
+Base.sleep(duration::Period) = sleep(seconds(duration))
 
 function Base.Timer(delay::Period; interval::Period=Second(0))
-    Timer(toms(delay) / 1000, interval=toms(interval) / 1000)
+    Timer(seconds(delay), interval=seconds(interval))
 end
 
 function Base.timedwait(testcb, timeout::Period; pollint::Period=Millisecond(100))
-    timedwait(testcb, toms(timeout) / 1000, pollint=toms(pollint) / 1000)
+    timedwait(testcb, seconds(timeout), pollint=seconds(pollint))
 end
 
 Base.OrderStyle(::Type{<:AbstractTime}) = Base.Ordered()
 Base.ArithmeticStyle(::Type{<:AbstractTime}) = Base.ArithmeticWraps()
+
+# minimal Base.TOML support
+Date(d::Base.TOML.Date) = Date(d.year, d.month, d.day)
+Time(t::Base.TOML.Time) = Time(t.hour, t.minute, t.second, t.ms)
+DateTime(dt::Base.TOML.DateTime) = DateTime(Date(dt.date), Time(dt.time))
diff --git a/stdlib/Dates/test/accessors.jl b/stdlib/Dates/test/accessors.jl
index b690a81d70e49..240de42eaa1dc 100644
--- a/stdlib/Dates/test/accessors.jl
+++ b/stdlib/Dates/test/accessors.jl
@@ -153,7 +153,7 @@ end
     @test Dates.week(Dates.Date(2010, 1, 1)) == 53
     @test Dates.week(Dates.Date(2010, 1, 2)) == 53
     @test Dates.week(Dates.Date(2010, 1, 2)) == 53
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=1999
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=1999
     dt = Dates.DateTime(1999, 12, 27)
     dt1 = Dates.Date(1999, 12, 27)
     check = (52, 52, 52, 52, 52, 52, 52, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2)
@@ -163,7 +163,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2000
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2000
     dt = Dates.DateTime(2000, 12, 25)
     dt1 = Dates.Date(2000, 12, 25)
     for i = 1:21
@@ -172,7 +172,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Test from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2030
+    # Test from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2030
     dt = Dates.DateTime(2030, 12, 23)
     dt1 = Dates.Date(2030, 12, 23)
     for i = 1:21
@@ -181,7 +181,7 @@ end
         dt = dt + Dates.Day(1)
         dt1 = dt1 + Dates.Day(1)
     end
-    # Tests from http://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2004
+    # Tests from https://www.epochconverter.com/date-and-time/weeknumbers-by-year.php?year=2004
     dt = Dates.DateTime(2004, 12, 20)
     dt1 = Dates.Date(2004, 12, 20)
     check = (52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 1, 1, 1, 1, 1, 1, 1)
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index 2e684815a3c86..333ba3a7c0088 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -11,10 +11,18 @@ using Dates
     @test Dates.CompoundPeriod(a - b) == Dates.Hour(12)
 end
 
+struct MonthlyDate <: TimeType
+    instant::Dates.UTInstant{Month}
+end
+struct OtherTime <: Dates.AbstractDateTime
+    instant::Dates.UTInstant{Nanosecond}
+end
 @testset "TimeType arithmetic" begin
-    a = Date(2023, 5, 1)
-    b = DateTime(2023, 5, 2)
-    @test b - a == Day(1)
+    @test_throws MethodError DateTime(2023, 5, 2) - Date(2023, 5, 1)
+    # check that - between two same-type TimeTypes works by default
+    @test MonthlyDate(Dates.UTInstant(Month(10))) - MonthlyDate(Dates.UTInstant(Month(1))) == Month(9)
+    # ... and between two same-type AbstractDateTimes
+    @test OtherTime(Dates.UTInstant(Nanosecond(2))) - OtherTime(Dates.UTInstant(Nanosecond(1))) == Nanosecond(1)
 end
 
 @testset "Wrapping arithmetic for Months" begin
@@ -263,6 +271,24 @@ end
         @test dt - Dates.Millisecond(1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
         @test dt + Dates.Millisecond(-1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
     end
+    @testset "DateTime-Microsecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Microsecond(1) == dt
+        @test dt + Dates.Microsecond(501) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Microsecond(1) == dt
+        @test dt - Dates.Microsecond(501) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
+    @testset "DateTime-Nanosecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Nanosecond(1) == dt
+        @test dt + Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Nanosecond(1) == dt
+        @test dt - Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
 end
 @testset "Date arithmetic" begin
     @testset "Date-Year arithmetic" begin
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 2c99ac45d0c58..98bc610784477 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -47,7 +47,7 @@ end
 end
 
 @testset "DateTime parsing" begin
-    # Useful reference for different locales: http://library.princeton.edu/departments/tsd/katmandu/reference/months.html
+    # Useful reference for different locales: https://library.princeton.edu/departments/tsd/katmandu/reference/months.html
 
     # Allow parsing of strings which are not representable as a TimeType
     str = "02/15/1996 25:00"
@@ -325,6 +325,23 @@ end
     # From Matt Bauman
     f = "yyyy-mm-ddTHH:MM:SS"
     @test Dates.DateTime("2014-05-28T16:46:04", f) == Dates.DateTime(2014, 5, 28, 16, 46, 04)
+
+    f = "yyyymmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "YYYYmmdd"
+    @test Dates.DateTime("20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(-2024, 5, 21)
+    @test Dates.DateTime("+20240521", f) == Dates.DateTime(2024, 5, 21)
+    f = "-yyyymmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
+    f = "-YYYYmmdd"
+    @test Dates.DateTime("-20240521", f) == Dates.DateTime(2024, 5, 21)
+    @test_throws ArgumentError Dates.DateTime("+20240521", f)
+    @test_throws ArgumentError Dates.DateTime("20240521", f)
 end
 
 @testset "Error handling" begin
@@ -403,6 +420,17 @@ end
     @test_throws ArgumentError parse(Date, "Foo, 12 Nov 2016 07:45:36", Dates.RFC1123Format)
 end
 
+@testset "ISODateTimeFormat" begin
+    dt = Dates.DateTime(2024, 5, 21, 10, 57, 22)
+    neg_dt = Dates.DateTime(-2024, 5, 21, 10, 57, 22)
+    @test parse(Dates.DateTime, "2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "+2024-05-21T10:57:22", Dates.ISODateTimeFormat) == dt
+    @test parse(Dates.DateTime, "-2024-05-21T10:57:22", Dates.ISODateTimeFormat) == neg_dt
+
+    @test_throws ArgumentError parse(Dates.DateTime, "-", Dates.ISODateTimeFormat)
+    @test_throws ArgumentError parse(Dates.DateTime, "+", Dates.ISODateTimeFormat)
+end
+
 @testset "Issue 15195" begin
     f = "YY"
     @test Dates.format(Dates.Date(1999), f) == "1999"
@@ -470,6 +498,9 @@ end
 # Issue #44003
 @test tryparse(Dates.Date, "2017", Dates.DateFormat(".s")) === nothing
 
+# Issue #52989
+@test Dates.DateTime("2000") == Dates.DateTime(2000)
+
 @testset "parse milliseconds, Issue #22100" begin
     @test Dates.DateTime("2017-Mar-17 00:00:00.0000", "y-u-d H:M:S.s") == Dates.DateTime(2017, 3, 17)
     @test Dates.parse_components(".1", Dates.DateFormat(".s")) == [Dates.Millisecond(100)]
@@ -618,4 +649,9 @@ end
     end
 end
 
+@testset "Issue #50328: parsing negative years" begin
+    @test Date("-2013-10-10") == Date(-2013, 10, 10)
+    @test Date("-2013") == Date(-2013, 01, 01)
+end
+
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index 7b23ffcb5d4e1..9c7d0deef8a11 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -30,6 +30,9 @@ using Test
     @test sign(t) == sign(t2) == 1
     @test sign(-t) == sign(-t2) == -1
     @test sign(Dates.Year(0)) == 0
+    @test signbit(t) == signbit(t2) == false
+    @test signbit(-t) == signbit(-t2) == true
+    @test signbit(Dates.Year(0)) == false
 end
 @testset "div/mod/gcd/lcm/rem" begin
     @test Dates.Year(10) % Dates.Year(4) == Dates.Year(2)
@@ -329,6 +332,14 @@ end
     @test Dates.default(Dates.Nanosecond) == zero(Dates.Nanosecond)
 end
 @testset "Conversions" begin
+    @test Dates.toms(1499 * us) == 1
+    @test Dates.toms(501 * us) == 1
+    @test Dates.toms(us) == 0
+
+    @test Dates.toms(1_499_999 * ns) == 1
+    @test Dates.toms(500_001 * ns) == 1
+    @test Dates.toms(ns) == 0
+
     @test Dates.toms(ms) == Dates.value(Dates.Millisecond(ms)) == 1
     @test Dates.toms(s)  == Dates.value(Dates.Millisecond(s)) == 1000
     @test Dates.toms(mi) == Dates.value(Dates.Millisecond(mi)) == 60000
@@ -343,6 +354,15 @@ end
     @test Dates.days(Dates.Hour(24)) == 1
     @test Dates.days(d) == 1
     @test Dates.days(w) == 7
+
+    @test Dates.seconds(ns) == 0.000000001
+    @test Dates.seconds(us) == 0.000001
+    @test Dates.seconds(ms) == 0.001
+    @test Dates.seconds(s) == 1
+    @test Dates.seconds(mi) == 60
+    @test Dates.seconds(h) == 3600
+    @test Dates.seconds(d) == 86400
+    @test Dates.seconds(w) == 604800
 end
 @testset "issue #9214" begin
     @test 2s + (7ms + 1ms) == (2s + 7ms) + 1ms == 1ms + (2s + 7ms) == 1ms + (1s + 7ms) + 1s == 1ms + (2s + 3d + 7ms) + (-3d) == (1ms + (2s + 3d)) + (7ms - 3d) == (1ms + (2s + 3d)) - (3d - 7ms)
@@ -523,6 +543,7 @@ end
     @test convert(Second, Minute(1) + Second(30)) === Second(90)
     @test convert(Minute, Minute(1) + Second(60)) === Minute(2)
     @test convert(Millisecond, Minute(1) + Second(30)) === Millisecond(90_000)
+    @test convert(Millisecond,  Dates.CompoundPeriod()) === Millisecond(0)
     @test_throws InexactError convert(Minute, Minute(1) + Second(30))
     @test_throws MethodError convert(Month, Minute(1) + Second(30))
     @test_throws MethodError convert(Second, Month(1) + Second(30))
diff --git a/stdlib/Dates/test/rounding.jl b/stdlib/Dates/test/rounding.jl
index 85c90981423d3..03c57c7a5bce3 100644
--- a/stdlib/Dates/test/rounding.jl
+++ b/stdlib/Dates/test/rounding.jl
@@ -188,7 +188,27 @@ end
     @test round(x, Dates.Microsecond) == Dates.Microsecond(2001000)
     @test round(x, Dates.Nanosecond) == x
 end
-
+@testset "Rounding Time" begin
+    x = Time(9, 25, 45, 25, 650, 500)
+    @test floor(x, Dates.Hour) == Time(9)
+    @test floor(x, Dates.Minute) == Time(9, 25)
+    @test floor(x, Dates.Second) == Time(9, 25, 45)
+    @test floor(x, Dates.Millisecond) == Time(9, 25, 45, 25)
+    @test floor(x, Dates.Microsecond) == Time(9, 25, 45, 25, 650)
+    @test floor(x, Dates.Nanosecond) == x
+    @test ceil(x, Dates.Hour) == Time(10)
+    @test ceil(x, Dates.Minute) == Time(9, 26)
+    @test ceil(x, Dates.Second) == Time(9, 25, 46)
+    @test ceil(x, Dates.Millisecond) == Time(9, 25, 45, 26)
+    @test ceil(x, Dates.Microsecond) == Time(9, 25, 45, 25, 651)
+    @test ceil(x, Dates.Nanosecond) == x
+    @test round(x, Dates.Hour) == Time(9)
+    @test round(x, Dates.Minute) == Time(9, 26)
+    @test round(x, Dates.Second) == Time(9, 25, 45)
+    @test round(x, Dates.Millisecond) == Time(9, 25, 45, 26)
+    @test round(x, Dates.Microsecond) == Time(9, 25, 45, 25, 651)
+    @test round(x, Dates.Nanosecond) == x
+end
 @testset "Rounding DateTime to Date" begin
     now_ = DateTime(2020, 9, 1, 13)
     for p in (Year, Month, Day)
diff --git a/stdlib/Dates/test/runtests.jl b/stdlib/Dates/test/runtests.jl
index de063135427a9..ad2ee43cedfb1 100644
--- a/stdlib/Dates/test/runtests.jl
+++ b/stdlib/Dates/test/runtests.jl
@@ -2,8 +2,14 @@
 
 module DateTests
 
+using Test, Dates
+
 for file in readlines(joinpath(@__DIR__, "testgroups"))
     include(file * ".jl")
 end
 
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Dates))
+end
+
 end
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 8823e56e41a2f..29395ccf3a271 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -41,6 +41,7 @@ end
     @test Dates.isleapyear(-1) == false
     @test Dates.isleapyear(4) == true
     @test Dates.isleapyear(-4) == true
+    @test_throws MethodError Dates.isleapyear(Dates.Year(1992))
 end
 # Create "test" check manually
 y = Dates.Year(1)
@@ -74,6 +75,12 @@ ms = Dates.Millisecond(1)
                          Dates.Hour(4), Dates.Second(10)) == Dates.DateTime(1, 2, 1, 4, 0, 10)
 end
 
+@testset "DateTime construction from Date and Time" begin
+    @test Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12)) == Dates.DateTime(2023, 08, 07, 12, 0, 0, 0)
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 42))
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 0, 42))
+end
+
 @testset "Date construction by parts" begin
     test = Dates.Date(Dates.UTD(734869))
     @test Dates.Date(2013) == test
@@ -256,7 +263,11 @@ end
 end
 
 @testset "issue #31524" begin
-    dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    # Ensure the result doesn't depend on local timezone, especially on macOS
+    # where an extra internal call to `mktime` is affected by timezone settings.
+    dt1 = withenv("TZ" => "UTC") do
+        Libc.strptime("%Y-%m-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    end
     dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
 
     time = Time(dt1)
@@ -273,6 +284,11 @@ end
 
 end
 
+@testset "timer" begin
+    @test hasmethod(Timer, (Period,))
+    @test hasmethod(Timer, (Function, Period))
+end
+
 @testset "timedwait" begin
     @test timedwait(() -> false, Second(0); pollint=Millisecond(1)) === :timed_out
 end
diff --git a/stdlib/Distributed.version b/stdlib/Distributed.version
new file mode 100644
index 0000000000000..4a7ab49defed2
--- /dev/null
+++ b/stdlib/Distributed.version
@@ -0,0 +1,4 @@
+DISTRIBUTED_BRANCH = master
+DISTRIBUTED_SHA1 = c6136853451677f1957bec20ecce13419cde3a12
+DISTRIBUTED_GIT_URL := https://github.com/JuliaLang/Distributed.jl
+DISTRIBUTED_TAR_URL = https://api.github.com/repos/JuliaLang/Distributed.jl/tarball/$1
diff --git a/stdlib/Distributed/Project.toml b/stdlib/Distributed/Project.toml
deleted file mode 100644
index ecec870290041..0000000000000
--- a/stdlib/Distributed/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "Distributed"
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[deps]
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[extras]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["LinearAlgebra", "Test"]
diff --git a/stdlib/Distributed/docs/src/index.md b/stdlib/Distributed/docs/src/index.md
deleted file mode 100644
index 00b40de49b396..0000000000000
--- a/stdlib/Distributed/docs/src/index.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# [Distributed Computing](@id man-distributed)
-Tools for distributed parallel processing.
-
-```@docs
-Distributed.addprocs
-Distributed.nprocs
-Distributed.nworkers
-Distributed.procs()
-Distributed.procs(::Integer)
-Distributed.workers
-Distributed.rmprocs
-Distributed.interrupt
-Distributed.myid
-Distributed.pmap
-Distributed.RemoteException
-Distributed.ProcessExitedException
-Distributed.Future
-Distributed.RemoteChannel
-Distributed.fetch(::Distributed.Future)
-Distributed.fetch(::RemoteChannel)
-Distributed.remotecall(::Any, ::Integer, ::Any...)
-Distributed.remotecall_wait(::Any, ::Integer, ::Any...)
-Distributed.remotecall_fetch(::Any, ::Integer, ::Any...)
-Distributed.remote_do(::Any, ::Integer, ::Any...)
-Distributed.put!(::RemoteChannel, ::Any...)
-Distributed.put!(::Distributed.Future, ::Any)
-Distributed.take!(::RemoteChannel, ::Any...)
-Distributed.isready(::RemoteChannel, ::Any...)
-Distributed.isready(::Distributed.Future)
-Distributed.AbstractWorkerPool
-Distributed.WorkerPool
-Distributed.CachingPool
-Distributed.default_worker_pool
-Distributed.clear!(::CachingPool)
-Distributed.remote
-Distributed.remotecall(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_wait(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_fetch(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remote_do(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.@spawnat
-Distributed.@fetch
-Distributed.@fetchfrom
-Distributed.@distributed
-Distributed.@everywhere
-Distributed.clear!(::Any, ::Any; ::Any)
-Distributed.remoteref_id
-Distributed.channel_from_id
-Distributed.worker_id_from_socket
-Distributed.cluster_cookie()
-Distributed.cluster_cookie(::Any)
-```
-
-## Cluster Manager Interface
-
-This interface provides a mechanism to launch and manage Julia workers on different cluster environments.
-There are two types of managers present in Base: `LocalManager`, for launching additional workers on the
-same host, and `SSHManager`, for launching on remote hosts via `ssh`. TCP/IP sockets are used to connect
-and transport messages between processes. It is possible for Cluster Managers to provide a different transport.
-
-```@docs
-Distributed.ClusterManager
-Distributed.WorkerConfig
-Distributed.launch
-Distributed.manage
-Distributed.kill(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.init_worker
-Distributed.start_worker
-Distributed.process_messages
-Distributed.default_addprocs_params
-```
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
deleted file mode 100644
index a7c5b1778b144..0000000000000
--- a/stdlib/Distributed/src/Distributed.jl
+++ /dev/null
@@ -1,119 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Tools for distributed parallel processing.
-"""
-module Distributed
-
-# imports for extension
-import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
-             hash, ==, kill, close, isopen, showerror, iterate, IteratorSize
-
-# imports for use
-using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap,
-            VERSION_STRING, binding_module, atexit, julia_exename,
-            julia_cmd, AsyncGenerator, acquire, release, invokelatest,
-            shell_escape_posixly, shell_escape_csh,
-            shell_escape_wincmd, escape_microsoft_c_args,
-            uv_error, something, notnothing, isbuffered, mapany
-using Base.Threads: Event
-
-using Serialization, Sockets
-import Serialization: serialize, deserialize
-import Sockets: connect, wait_connected
-
-# NOTE: clusterserialize.jl imports additional symbols from Serialization for use
-
-export
-    @spawn,
-    @spawnat,
-    @fetch,
-    @fetchfrom,
-    @everywhere,
-    @distributed,
-
-    AbstractWorkerPool,
-    addprocs,
-    CachingPool,
-    clear!,
-    ClusterManager,
-    default_worker_pool,
-    init_worker,
-    interrupt,
-    launch,
-    manage,
-    myid,
-    nprocs,
-    nworkers,
-    pmap,
-    procs,
-    remote,
-    remotecall,
-    remotecall_fetch,
-    remotecall_wait,
-    remote_do,
-    rmprocs,
-    workers,
-    WorkerPool,
-    RemoteChannel,
-    Future,
-    WorkerConfig,
-    RemoteException,
-    ProcessExitedException,
-
-    process_messages,
-    remoteref_id,
-    channel_from_id,
-    worker_id_from_socket,
-    cluster_cookie,
-    start_worker,
-
-# Used only by shared arrays.
-    check_same_host
-
-function _require_callback(mod::Base.PkgId)
-    if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
-        # broadcast top-level (e.g. from Main) import/using from node 1 (only)
-        @sync for p in procs()
-            p == 1 && continue
-            # Extensions are already loaded on workers by their triggers being loaded
-            # so no need to fire the callback upon extension being loaded on master.
-            Base.loading_extension && continue
-            @async_unwrap remotecall_wait(p) do
-                Base.require(mod)
-                nothing
-            end
-        end
-    end
-end
-
-const REF_ID = Threads.Atomic{Int}(1)
-next_ref_id() = Threads.atomic_add!(REF_ID, 1)
-
-struct RRID
-    whence::Int
-    id::Int
-
-    RRID() = RRID(myid(), next_ref_id())
-    RRID(whence, id) = new(whence, id)
-end
-
-hash(r::RRID, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::RRID, s::RRID) = (r.whence==s.whence && r.id==s.id)
-
-include("clusterserialize.jl")
-include("cluster.jl")   # cluster setup and management, addprocs
-include("messages.jl")
-include("process_messages.jl")  # process incoming messages
-include("remotecall.jl")  # the remotecall* api
-include("macros.jl")      # @spawn and friends
-include("workerpool.jl")
-include("pmap.jl")
-include("managers.jl")    # LocalManager and SSHManager
-include("precompile.jl")
-
-function __init__()
-    init_parallel()
-end
-
-end
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
deleted file mode 100644
index d8cc052967d50..0000000000000
--- a/stdlib/Distributed/src/cluster.jl
+++ /dev/null
@@ -1,1388 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    ClusterManager
-
-Supertype for cluster managers, which control workers processes as a cluster.
-Cluster managers implement how workers can be added, removed and communicated with.
-`SSHManager` and `LocalManager` are subtypes of this.
-"""
-abstract type ClusterManager end
-
-"""
-    WorkerConfig
-
-Type used by [`ClusterManager`](@ref)s to control workers added to their clusters. Some fields
-are used by all cluster managers to access a host:
-  * `io` -- the connection used to access the worker (a subtype of `IO` or `Nothing`)
-  * `host` -- the host address (either a `String` or `Nothing`)
-  * `port` -- the port on the host used to connect to the worker (either an `Int` or `Nothing`)
-
-Some are used by the cluster manager to add workers to an already-initialized host:
-  * `count` -- the number of workers to be launched on the host
-  * `exename` -- the path to the Julia executable on the host, defaults to `"\$(Sys.BINDIR)/julia"` or
-    `"\$(Sys.BINDIR)/julia-debug"`
-  * `exeflags` -- flags to use when launching Julia remotely
-
-The `userdata` field is used to store information for each worker by external managers.
-
-Some fields are used by `SSHManager` and similar managers:
-  * `tunnel` -- `true` (use tunneling), `false` (do not use tunneling), or [`nothing`](@ref) (use default for the manager)
-  * `multiplex` -- `true` (use SSH multiplexing for tunneling) or `false`
-  * `forward` -- the forwarding option used for `-L` option of ssh
-  * `bind_addr` -- the address on the remote host to bind to
-  * `sshflags` -- flags to use in establishing the SSH connection
-  * `max_parallel` -- the maximum number of workers to connect to in parallel on the host
-
-Some fields are used by both `LocalManager`s and `SSHManager`s:
-  * `connect_at` -- determines whether this is a worker-to-worker or driver-to-worker setup call
-  * `process` -- the process which will be connected (usually the manager will assign this during [`addprocs`](@ref))
-  * `ospid` -- the process ID according to the host OS, used to interrupt worker processes
-  * `environ` -- private dictionary used to store temporary information by Local/SSH managers
-  * `ident` -- worker as identified by the [`ClusterManager`](@ref)
-  * `connect_idents` -- list of worker ids the worker must connect to if using a custom topology
-  * `enable_threaded_blas` -- `true`, `false`, or `nothing`, whether to use threaded BLAS or not on the workers
-"""
-mutable struct WorkerConfig
-    # Common fields relevant to all cluster managers
-    io::Union{IO, Nothing}
-    host::Union{String, Nothing}
-    port::Union{Int, Nothing}
-
-    # Used when launching additional workers at a host
-    count::Union{Int, Symbol, Nothing}
-    exename::Union{String, Cmd, Nothing}
-    exeflags::Union{Cmd, Nothing}
-
-    # External cluster managers can use this to store information at a per-worker level
-    # Can be a dict if multiple fields need to be stored.
-    userdata::Any
-
-    # SSHManager / SSH tunnel connections to workers
-    tunnel::Union{Bool, Nothing}
-    multiplex::Union{Bool, Nothing}
-    forward::Union{String, Nothing}
-    bind_addr::Union{String, Nothing}
-    sshflags::Union{Cmd, Nothing}
-    max_parallel::Union{Int, Nothing}
-
-    # Used by Local/SSH managers
-    connect_at::Any
-
-    process::Union{Process, Nothing}
-    ospid::Union{Int, Nothing}
-
-    # Private dictionary used to store temporary information by Local/SSH managers.
-    environ::Union{Dict, Nothing}
-
-    # Connections to be setup depending on the network topology requested
-    ident::Any      # Worker as identified by the Cluster Manager.
-    # List of other worker idents this worker must connect with. Used with topology T_CUSTOM.
-    connect_idents::Union{Array, Nothing}
-
-    # Run multithreaded blas on worker
-    enable_threaded_blas::Union{Bool, Nothing}
-
-    function WorkerConfig()
-        wc = new()
-        for n in 1:fieldcount(WorkerConfig)
-            setfield!(wc, n, nothing)
-        end
-        wc
-    end
-end
-
-@enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED
-mutable struct Worker
-    id::Int
-    msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag
-    del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
-    add_msgs::Array{Any,1}
-    @atomic gcflag::Bool
-    state::WorkerState
-    c_state::Condition      # wait for state changes
-    ct_time::Float64        # creation time
-    conn_func::Any          # used to setup connections lazily
-
-    r_stream::IO
-    w_stream::IO
-    w_serializer::ClusterSerializer  # writes can happen from any task hence store the
-                                     # serializer as part of the Worker object
-    manager::ClusterManager
-    config::WorkerConfig
-    version::Union{VersionNumber, Nothing}   # Julia version of the remote process
-    initialized::Event
-
-    function Worker(id::Int, r_stream::IO, w_stream::IO, manager::ClusterManager;
-                             version::Union{VersionNumber, Nothing}=nothing,
-                             config::WorkerConfig=WorkerConfig())
-        w = Worker(id)
-        w.r_stream = r_stream
-        w.w_stream = buffer_writes(w_stream)
-        w.w_serializer = ClusterSerializer(w.w_stream)
-        w.manager = manager
-        w.config = config
-        w.version = version
-        set_worker_state(w, W_CONNECTED)
-        register_worker_streams(w)
-        w
-    end
-
-    Worker(id::Int) = Worker(id, nothing)
-    function Worker(id::Int, conn_func)
-        @assert id > 0
-        if haskey(map_pid_wrkr, id)
-            return map_pid_wrkr[id]
-        end
-        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
-        w.initialized = Event()
-        register_worker(w)
-        w
-    end
-
-    Worker() = Worker(get_next_pid())
-end
-
-function set_worker_state(w, state)
-    w.state = state
-    notify(w.c_state; all=true)
-end
-
-function check_worker_state(w::Worker)
-    if w.state === W_CREATED
-        if !isclusterlazy()
-            if PGRP.topology === :all_to_all
-                # Since higher pids connect with lower pids, the remote worker
-                # may not have connected to us yet. Wait for some time.
-                wait_for_conn(w)
-            else
-                error("peer $(w.id) is not connected to $(myid()). Topology : " * string(PGRP.topology))
-            end
-        else
-            w.ct_time = time()
-            if myid() > w.id
-                t = @async exec_conn_func(w)
-            else
-                # route request via node 1
-                t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
-            end
-            errormonitor(t)
-            wait_for_conn(w)
-        end
-    end
-end
-
-exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
-function exec_conn_func(w::Worker)
-    try
-        f = notnothing(w.conn_func)
-        # Will be called if some other task tries to connect at the same time.
-        w.conn_func = () -> wait_for_conn(w)
-        f()
-    catch e
-        w.conn_func = () -> throw(e)
-        rethrow()
-    end
-    nothing
-end
-
-function wait_for_conn(w)
-    if w.state === W_CREATED
-        timeout =  worker_timeout() - (time() - w.ct_time)
-        timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")
-
-        @async (sleep(timeout); notify(w.c_state; all=true))
-        wait(w.c_state)
-        w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
-    end
-    nothing
-end
-
-## process group creation ##
-
-mutable struct LocalProcess
-    id::Int
-    bind_addr::String
-    bind_port::UInt16
-    cookie::String
-    LocalProcess() = new(1)
-end
-
-worker_timeout() = parse(Float64, get(ENV, "JULIA_WORKER_TIMEOUT", "60.0"))
-
-
-## worker creation and setup ##
-"""
-    start_worker([out::IO=stdout], cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-
-`start_worker` is an internal function which is the default entry point for
-worker processes connecting via TCP/IP. It sets up the process as a Julia cluster
-worker.
-
-host:port information is written to stream `out` (defaults to stdout).
-
-The function reads the cookie from stdin if required, and  listens on a free port
-(or if specified, the port in the `--bind-to` command line option) and schedules
-tasks to process incoming TCP connections and requests. It also (optionally)
-closes stdin and redirects stderr to stdout.
-
-It does not return.
-"""
-start_worker(cookie::AbstractString=readline(stdin); kwargs...) = start_worker(stdout, cookie; kwargs...)
-function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-    init_multi()
-
-    if close_stdin # workers will not use it
-        redirect_stdin(devnull)
-        close(stdin)
-    end
-    stderr_to_stdout && redirect_stderr(stdout)
-
-    init_worker(cookie)
-    interface = IPv4(LPROC.bind_addr)
-    if LPROC.bind_port == 0
-        port_hint = 9000 + (getpid() % 1000)
-        (port, sock) = listenany(interface, UInt16(port_hint))
-        LPROC.bind_port = port
-    else
-        sock = listen(interface, LPROC.bind_port)
-    end
-    errormonitor(@async while isopen(sock)
-        client = accept(sock)
-        process_messages(client, client, true)
-    end)
-    print(out, "julia_worker:")  # print header
-    print(out, "$(string(LPROC.bind_port))#") # print port
-    print(out, LPROC.bind_addr)
-    print(out, '\n')
-    flush(out)
-
-    Sockets.nagle(sock, false)
-    Sockets.quickack(sock, true)
-
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        println(out, "PID = $(getpid())")
-    end
-
-    try
-        # To prevent hanging processes on remote machines, newly launched workers exit if the
-        # master process does not connect in time.
-        check_master_connect()
-        while true; wait(); end
-    catch err
-        print(stderr, "unhandled exception on $(myid()): $(err)\nexiting.\n")
-    end
-
-    close(sock)
-    exit(0)
-end
-
-
-function redirect_worker_output(ident, stream)
-    t = @async while !eof(stream)
-        line = readline(stream)
-        if startswith(line, "      From worker ")
-            # stdout's of "additional" workers started from an initial worker on a host are not available
-            # on the master directly - they are routed via the initial worker's stdout.
-            println(line)
-        else
-            println("      From worker $(ident):\t$line")
-        end
-    end
-    errormonitor(t)
-end
-
-struct LaunchWorkerError <: Exception
-    msg::String
-end
-
-Base.showerror(io::IO, e::LaunchWorkerError) = print(io, e.msg)
-
-# The default TCP transport relies on the worker listening on a free
-# port available and printing its bind address and port.
-# The master process uses this to connect to the worker and subsequently
-# setup a all-to-all network.
-function read_worker_host_port(io::IO)
-    t0 = time_ns()
-
-    # Wait at most for JULIA_WORKER_TIMEOUT seconds to read host:port
-    # info from the worker
-    timeout = worker_timeout() * 1e9
-    # We expect the first line to contain the host:port string. However, as
-    # the worker may be launched via ssh or a cluster manager like SLURM,
-    # ignore any informational / warning lines printed by the launch command.
-    # If we do not find the host:port string in the first 1000 lines, treat it
-    # as an error.
-
-    ntries = 1000
-    leader = String[]
-    try
-        while ntries > 0
-            readtask = @async readline(io)
-            yield()
-            while !istaskdone(readtask) && ((time_ns() - t0) < timeout)
-                sleep(0.05)
-            end
-            !istaskdone(readtask) && break
-
-            conninfo = fetch(readtask)
-            if isempty(conninfo) && !isopen(io)
-                throw(LaunchWorkerError("Unable to read host:port string from worker. Launch command exited with error?"))
-            end
-
-            ntries -= 1
-            bind_addr, port = parse_connection_info(conninfo)
-            if !isempty(bind_addr)
-                return bind_addr, port
-            end
-
-            # collect unmatched lines
-            push!(leader, conninfo)
-        end
-        close(io)
-        if ntries > 0
-            throw(LaunchWorkerError("Timed out waiting to read host:port string from worker."))
-        else
-            throw(LaunchWorkerError("Unexpected output from worker launch command. Host:port string not found."))
-        end
-    finally
-        for line in leader
-            println("\tFrom worker startup:\t", line)
-        end
-    end
-end
-
-function parse_connection_info(str)
-    m = match(r"^julia_worker:(\d+)#(.*)", str)
-    if m !== nothing
-        (String(m.captures[2]), parse(UInt16, m.captures[1]))
-    else
-        ("", UInt16(0))
-    end
-end
-
-"""
-    init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-
-Called by cluster managers implementing custom transports. It initializes a newly launched
-process as a worker. Command line argument `--worker[=<cookie>]` has the effect of initializing a
-process as a worker using TCP/IP sockets for transport.
-`cookie` is a [`cluster_cookie`](@ref).
-"""
-function init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-    myrole!(:worker)
-
-    # On workers, the default cluster manager connects via TCP sockets. Custom
-    # transports will need to call this function with their own manager.
-    global cluster_manager
-    cluster_manager = manager
-
-    # Since our pid has yet to be set, ensure no RemoteChannel / Future  have been created or addprocs() called.
-    @assert nprocs() <= 1
-    @assert isempty(PGRP.refs)
-    @assert isempty(client_refs)
-
-    # System is started in head node mode, cleanup related entries
-    empty!(PGRP.workers)
-    empty!(map_pid_wrkr)
-
-    cluster_cookie(cookie)
-    nothing
-end
-
-
-# The main function for adding worker processes.
-# `manager` is of type ClusterManager. The respective managers are responsible
-# for launching the workers. All keyword arguments (plus a few default values)
-# are available as a dictionary to the `launch` methods
-#
-# Only one addprocs can be in progress at any time
-#
-const worker_lock = ReentrantLock()
-
-"""
-    addprocs(manager::ClusterManager; kwargs...) -> List of process identifiers
-
-Launches worker processes via the specified cluster manager.
-
-For example, Beowulf clusters are supported via a custom cluster manager implemented in
-the package `ClusterManagers.jl`.
-
-The number of seconds a newly launched worker waits for connection establishment from the
-master can be specified via variable `JULIA_WORKER_TIMEOUT` in the worker process's
-environment. Relevant only when using TCP/IP as transport.
-
-To launch workers without blocking the REPL, or the containing function
-if launching workers programmatically, execute `addprocs` in its own task.
-
-# Examples
-
-```julia
-# On busy clusters, call `addprocs` asynchronously
-t = @async addprocs(...)
-```
-
-```julia
-# Utilize workers as and when they come online
-if nprocs() > 1   # Ensure at least one new worker is available
-   ....   # perform distributed execution
-end
-```
-
-```julia
-# Retrieve newly launched worker IDs, or any error messages
-if istaskdone(t)   # Check if `addprocs` has completed to ensure `fetch` doesn't block
-    if nworkers() == N
-        new_pids = fetch(t)
-    else
-        fetch(t)
-    end
-end
-```
-"""
-function addprocs(manager::ClusterManager; kwargs...)
-    init_multi()
-
-    cluster_mgmt_from_master_check()
-
-    lock(worker_lock)
-    try
-        addprocs_locked(manager::ClusterManager; kwargs...)
-    finally
-        unlock(worker_lock)
-    end
-end
-
-function addprocs_locked(manager::ClusterManager; kwargs...)
-    params = merge(default_addprocs_params(manager), Dict{Symbol,Any}(kwargs))
-    topology(Symbol(params[:topology]))
-
-    if PGRP.topology !== :all_to_all
-        params[:lazy] = false
-    end
-
-    if PGRP.lazy === nothing || nprocs() == 1
-        PGRP.lazy = params[:lazy]
-    elseif isclusterlazy() != params[:lazy]
-        throw(ArgumentError(string("Active workers with lazy=", isclusterlazy(),
-                                    ". Cannot set lazy=", params[:lazy])))
-    end
-
-    # References to launched workers, filled when each worker is fully initialized and
-    # has connected to all nodes.
-    launched_q = Int[]   # Asynchronously filled by the launch method
-
-    # The `launch` method should add an object of type WorkerConfig for every
-    # worker launched. It provides information required on how to connect
-    # to it.
-
-    # FIXME: launched should be a Channel, launch_ntfy should be a Threads.Condition
-    # but both are part of the public interface. This means we currently can't use
-    # `Threads.@spawn` in the code below.
-    launched = WorkerConfig[]
-    launch_ntfy = Condition()
-
-    # call manager's `launch` is a separate task. This allows the master
-    # process initiate the connection setup process as and when workers come
-    # online
-    t_launch = @async launch(manager, params, launched, launch_ntfy)
-
-    @sync begin
-        while true
-            if isempty(launched)
-                istaskdone(t_launch) && break
-                @async (sleep(1); notify(launch_ntfy))
-                wait(launch_ntfy)
-            end
-
-            if !isempty(launched)
-                wconfig = popfirst!(launched)
-                let wconfig=wconfig
-                    @async setup_launched_worker(manager, wconfig, launched_q)
-                end
-            end
-        end
-    end
-
-    Base.wait(t_launch)      # catches any thrown errors from the launch task
-
-    # Since all worker-to-worker setups may not have completed by the time this
-    # function returns to the caller, send the complete list to all workers.
-    # Useful for nprocs(), nworkers(), etc to return valid values on the workers.
-    all_w = workers()
-    for pid in all_w
-        remote_do(set_valid_processes, pid, all_w)
-    end
-
-    sort!(launched_q)
-end
-
-function set_valid_processes(plist::Array{Int})
-    for pid in setdiff(plist, workers())
-        myid() != pid && Worker(pid)
-    end
-end
-
-"""
-    default_addprocs_params(mgr::ClusterManager) -> Dict{Symbol, Any}
-
-Implemented by cluster managers. The default keyword parameters passed when calling
-`addprocs(mgr)`. The minimal set of options is available by calling
-`default_addprocs_params()`
-"""
-default_addprocs_params(::ClusterManager) = default_addprocs_params()
-default_addprocs_params() = Dict{Symbol,Any}(
-    :topology => :all_to_all,
-    :dir      => pwd(),
-    :exename  => joinpath(Sys.BINDIR, julia_exename()),
-    :exeflags => ``,
-    :env      => [],
-    :enable_threaded_blas => false,
-    :lazy => true)
-
-
-function setup_launched_worker(manager, wconfig, launched_q)
-    pid = create_worker(manager, wconfig)
-    push!(launched_q, pid)
-
-    # When starting workers on remote multi-core hosts, `launch` can (optionally) start only one
-    # process on the remote machine, with a request to start additional workers of the
-    # same type. This is done by setting an appropriate value to `WorkerConfig.cnt`.
-    cnt = something(wconfig.count, 1)
-    if cnt === :auto
-        cnt = wconfig.environ[:cpu_threads]
-    end
-    cnt = cnt - 1   # Removing self from the requested number
-
-    if cnt > 0
-        launch_n_additional_processes(manager, pid, wconfig, cnt, launched_q)
-    end
-end
-
-
-function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launched_q)
-    @sync begin
-        exename = notnothing(fromconfig.exename)
-        exeflags = something(fromconfig.exeflags, ``)
-        cmd = `$exename $exeflags`
-
-        new_addresses = remotecall_fetch(launch_additional, frompid, cnt, cmd)
-        for address in new_addresses
-            (bind_addr, port) = address
-
-            wconfig = WorkerConfig()
-            for x in [:host, :tunnel, :multiplex, :sshflags, :exeflags, :exename, :enable_threaded_blas]
-                Base.setproperty!(wconfig, x, Base.getproperty(fromconfig, x))
-            end
-            wconfig.bind_addr = bind_addr
-            wconfig.port = port
-
-            let wconfig=wconfig
-                @async begin
-                    pid = create_worker(manager, wconfig)
-                    remote_do(redirect_output_from_additional_worker, frompid, pid, port)
-                    push!(launched_q, pid)
-                end
-            end
-        end
-    end
-end
-
-function create_worker(manager, wconfig)
-    # only node 1 can add new nodes, since nobody else has the full list of address:port
-    @assert LPROC.id == 1
-    timeout = worker_timeout()
-
-    # initiate a connect. Does not wait for connection completion in case of TCP.
-    w = Worker()
-    local r_s, w_s
-    try
-        (r_s, w_s) = connect(manager, w.id, wconfig)
-    catch ex
-        try
-            deregister_worker(w.id)
-            kill(manager, w.id, wconfig)
-        finally
-            rethrow(ex)
-        end
-    end
-
-    w = Worker(w.id, r_s, w_s, manager; config=wconfig)
-    # install a finalizer to perform cleanup if necessary
-    finalizer(w) do w
-        if myid() == 1
-            manage(w.manager, w.id, w.config, :finalize)
-        end
-    end
-
-    # set when the new worker has finished connections with all other workers
-    ntfy_oid = RRID()
-    rr_ntfy_join = lookup_ref(ntfy_oid)
-    rr_ntfy_join.waitingfor = myid()
-
-    # Start a new task to handle inbound messages from connected worker in master.
-    # Also calls `wait_connected` on TCP streams.
-    process_messages(w.r_stream, w.w_stream, false)
-
-    # send address information of all workers to the new worker.
-    # Cluster managers set the address of each worker in `WorkerConfig.connect_at`.
-    # A new worker uses this to setup an all-to-all network if topology :all_to_all is specified.
-    # Workers with higher pids connect to workers with lower pids. Except process 1 (master) which
-    # initiates connections to all workers.
-
-    # Connection Setup Protocol:
-    # - Master sends 16-byte cookie followed by 16-byte version string and a JoinPGRP message to all workers
-    # - On each worker
-    #   - Worker responds with a 16-byte version followed by a JoinCompleteMsg
-    #   - Connects to all workers less than its pid. Sends the cookie, version and an IdentifySocket message
-    #   - Workers with incoming connection requests write back their Version and an IdentifySocketAckMsg message
-    # - On master, receiving a JoinCompleteMsg triggers rr_ntfy_join (signifies that worker setup is complete)
-
-    join_list = []
-    if PGRP.topology === :all_to_all
-        # need to wait for lower worker pids to have completed connecting, since the numerical value
-        # of pids is relevant to the connection process, i.e., higher pids connect to lower pids and they
-        # require the value of config.connect_at which is set only upon connection completion
-        for jw in PGRP.workers
-            if (jw.id != 1) && (jw.id < w.id)
-                (jw.state === W_CREATED) && wait(jw.c_state)
-                push!(join_list, jw)
-            end
-        end
-
-    elseif PGRP.topology === :custom
-        # wait for requested workers to be up before connecting to them.
-        filterfunc(x) = (x.id != 1) && isdefined(x, :config) &&
-            (notnothing(x.config.ident) in something(wconfig.connect_idents, []))
-
-        wlist = filter(filterfunc, PGRP.workers)
-        waittime = 0
-        while wconfig.connect_idents !== nothing &&
-              length(wlist) < length(wconfig.connect_idents)
-            if waittime >= timeout
-                error("peer workers did not connect within $timeout seconds")
-            end
-            sleep(1.0)
-            waittime += 1
-            wlist = filter(filterfunc, PGRP.workers)
-        end
-
-        for wl in wlist
-            (wl.state === W_CREATED) && wait(wl.c_state)
-            push!(join_list, wl)
-        end
-    end
-
-    all_locs = mapany(x -> isa(x, Worker) ?
-                      (something(x.config.connect_at, ()), x.id) :
-                      ((), x.id, true),
-                      join_list)
-    send_connection_hdr(w, true)
-    enable_threaded_blas = something(wconfig.enable_threaded_blas, false)
-    join_message = JoinPGRPMsg(w.id, all_locs, PGRP.topology, enable_threaded_blas, isclusterlazy())
-    send_msg_now(w, MsgHeader(RRID(0,0), ntfy_oid), join_message)
-
-    @async manage(w.manager, w.id, w.config, :register)
-    # wait for rr_ntfy_join with timeout
-    timedout = false
-    @async (sleep($timeout); timedout = true; put!(rr_ntfy_join, 1))
-    wait(rr_ntfy_join)
-    if timedout
-        error("worker did not connect within $timeout seconds")
-    end
-    lock(client_refs) do
-        delete!(PGRP.refs, ntfy_oid)
-    end
-
-    return w.id
-end
-
-
-# Called on the first worker on a remote host. Used to optimize launching
-# of multiple workers on a remote host (to leverage multi-core)
-
-additional_io_objs=Dict()
-function launch_additional(np::Integer, cmd::Cmd)
-    io_objs = Vector{Any}(undef, np)
-    addresses = Vector{Any}(undef, np)
-
-    for i in 1:np
-        io = open(detach(cmd), "r+")
-        write_cookie(io)
-        io_objs[i] = io.out
-    end
-
-    for (i,io) in enumerate(io_objs)
-        (host, port) = read_worker_host_port(io)
-        addresses[i] = (host, port)
-        additional_io_objs[port] = io
-    end
-
-    return addresses
-end
-
-function redirect_output_from_additional_worker(pid, port)
-    io = additional_io_objs[port]
-    redirect_worker_output("$pid", io)
-    delete!(additional_io_objs, port)
-    nothing
-end
-
-function check_master_connect()
-    timeout = worker_timeout() * 1e9
-    # If we do not have at least process 1 connect to us within timeout
-    # we log an error and exit, unless we're running on valgrind
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        return
-    end
-    @async begin
-        start = time_ns()
-        while !haskey(map_pid_wrkr, 1) && (time_ns() - start) < timeout
-            sleep(1.0)
-        end
-
-        if !haskey(map_pid_wrkr, 1)
-            print(stderr, "Master process (id 1) could not connect within $(timeout/1e9) seconds.\nexiting.\n")
-            exit(1)
-        end
-    end
-end
-
-
-"""
-    cluster_cookie() -> cookie
-
-Return the cluster cookie.
-"""
-cluster_cookie() = (init_multi(); LPROC.cookie)
-
-"""
-    cluster_cookie(cookie) -> cookie
-
-Set the passed cookie as the cluster cookie, then returns it.
-"""
-function cluster_cookie(cookie)
-    init_multi()
-    # The cookie must be an ASCII string with length <=  HDR_COOKIE_LEN
-    @assert isascii(cookie)
-    @assert length(cookie) <= HDR_COOKIE_LEN
-
-    cookie = rpad(cookie, HDR_COOKIE_LEN)
-
-    LPROC.cookie = cookie
-    cookie
-end
-
-
-let next_pid = 2    # 1 is reserved for the client (always)
-    global get_next_pid
-    function get_next_pid()
-        retval = next_pid
-        next_pid += 1
-        retval
-    end
-end
-
-mutable struct ProcessGroup
-    name::String
-    workers::Array{Any,1}
-    refs::Dict{RRID,Any}                  # global references
-    topology::Symbol
-    lazy::Union{Bool, Nothing}
-
-    ProcessGroup(w::Array{Any,1}) = new("pg-default", w, Dict(), :all_to_all, nothing)
-end
-const PGRP = ProcessGroup([])
-
-function topology(t)
-    @assert t in [:all_to_all, :master_worker, :custom]
-    if (PGRP.topology==t) || ((myid()==1) && (nprocs()==1)) || (myid() > 1)
-        PGRP.topology = t
-    else
-        error("Workers with Topology $(PGRP.topology) already exist. Requested Topology $(t) cannot be set.")
-    end
-    t
-end
-
-isclusterlazy() = something(PGRP.lazy, false)
-
-get_bind_addr(pid::Integer) = get_bind_addr(worker_from_id(pid))
-get_bind_addr(w::LocalProcess) = LPROC.bind_addr
-function get_bind_addr(w::Worker)
-    if w.config.bind_addr === nothing
-        if w.id != myid()
-            w.config.bind_addr = remotecall_fetch(get_bind_addr, w.id, w.id)
-        end
-    end
-    w.config.bind_addr
-end
-
-# globals
-const LPROC = LocalProcess()
-const LPROCROLE = Ref{Symbol}(:master)
-const HDR_VERSION_LEN=16
-const HDR_COOKIE_LEN=16
-const map_pid_wrkr = Dict{Int, Union{Worker, LocalProcess}}()
-const map_sock_wrkr = IdDict()
-const map_del_wrkr = Set{Int}()
-
-# whether process is a master or worker in a distributed setup
-myrole() = LPROCROLE[]
-function myrole!(proctype::Symbol)
-    LPROCROLE[] = proctype
-end
-
-# cluster management related API
-"""
-    myid()
-
-Get the id of the current process.
-
-# Examples
-```julia-repl
-julia> myid()
-1
-
-julia> remotecall_fetch(() -> myid(), 4)
-4
-```
-"""
-myid() = LPROC.id
-
-"""
-    nprocs()
-
-Get the number of available processes.
-
-# Examples
-```julia-repl
-julia> nprocs()
-3
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function nprocs()
-    if myid() == 1 || (PGRP.topology === :all_to_all && !isclusterlazy())
-        n = length(PGRP.workers)
-        # filter out workers in the process of being setup/shutdown.
-        for jw in PGRP.workers
-            if !isa(jw, LocalProcess) && (jw.state !== W_CONNECTED)
-                n = n - 1
-            end
-        end
-        return n
-    else
-        return length(PGRP.workers)
-    end
-end
-
-"""
-    nworkers()
-
-Get the number of available worker processes. This is one less than [`nprocs()`](@ref). Equal to
-`nprocs()` if `nprocs() == 1`.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> nprocs()
-3
-
-julia> nworkers()
-2
-```
-"""
-function nworkers()
-    n = nprocs()
-    n == 1 ? 1 : n-1
-end
-
-"""
-    procs()
-
-Return a list of all process identifiers, including pid 1 (which is not included by [`workers()`](@ref)).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> procs()
-3-element Array{Int64,1}:
- 1
- 2
- 3
-```
-"""
-function procs()
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        # filter out workers in the process of being setup/shutdown.
-        return Int[x.id for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-    else
-        return Int[x.id for x in PGRP.workers]
-    end
-end
-
-function id_in_procs(id)  # faster version of `id in procs()`
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        for x in PGRP.workers
-            if (x.id::Int) == id && (isa(x, LocalProcess) || (x::Worker).state === W_CONNECTED)
-                return true
-            end
-        end
-    else
-        for x in PGRP.workers
-            if (x.id::Int) == id
-                return true
-            end
-        end
-    end
-    return false
-end
-
-"""
-    procs(pid::Integer)
-
-Return a list of all process identifiers on the same physical node.
-Specifically all workers bound to the same ip-address as `pid` are returned.
-"""
-function procs(pid::Integer)
-    if myid() == 1
-        all_workers = [x for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-        if (pid == 1) || (isa(map_pid_wrkr[pid].manager, LocalManager))
-            Int[x.id for x in filter(w -> (w.id==1) || (isa(w.manager, LocalManager)), all_workers)]
-        else
-            ipatpid = get_bind_addr(pid)
-            Int[x.id for x in filter(w -> get_bind_addr(w) == ipatpid, all_workers)]
-        end
-    else
-        remotecall_fetch(procs, 1, pid)
-    end
-end
-
-"""
-    workers()
-
-Return a list of all worker process identifiers.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function workers()
-    allp = procs()
-    if length(allp) == 1
-       allp
-    else
-       filter(x -> x != 1, allp)
-    end
-end
-
-function cluster_mgmt_from_master_check()
-    if myid() != 1
-        throw(ErrorException("Only process 1 can add and remove workers"))
-    end
-end
-
-"""
-    rmprocs(pids...; waitfor=typemax(Int))
-
-Remove the specified workers. Note that only process 1 can add or remove
-workers.
-
-Argument `waitfor` specifies how long to wait for the workers to shut down:
-  - If unspecified, `rmprocs` will wait until all requested `pids` are removed.
-  - An [`ErrorException`](@ref) is raised if all workers cannot be terminated before
-    the requested `waitfor` seconds.
-  - With a `waitfor` value of 0, the call returns immediately with the workers
-    scheduled for removal in a different task. The scheduled [`Task`](@ref) object is
-    returned. The user should call [`wait`](@ref) on the task before invoking any other
-    parallel calls.
-
-# Examples
-```julia-repl
-\$ julia -p 5
-
-julia> t = rmprocs(2, 3, waitfor=0)
-Task (runnable) @0x0000000107c718d0
-
-julia> wait(t)
-
-julia> workers()
-3-element Array{Int64,1}:
- 4
- 5
- 6
-```
-"""
-function rmprocs(pids...; waitfor=typemax(Int))
-    cluster_mgmt_from_master_check()
-
-    pids = vcat(pids...)
-    if waitfor == 0
-        t = @async _rmprocs(pids, typemax(Int))
-        yield()
-        return t
-    else
-        _rmprocs(pids, waitfor)
-        # return a dummy task object that user code can wait on.
-        return @async nothing
-    end
-end
-
-function _rmprocs(pids, waitfor)
-    lock(worker_lock)
-    try
-        rmprocset = Union{LocalProcess, Worker}[]
-        for p in pids
-            if p == 1
-                @warn "rmprocs: process 1 not removed"
-            else
-                if haskey(map_pid_wrkr, p)
-                    w = map_pid_wrkr[p]
-                    set_worker_state(w, W_TERMINATING)
-                    kill(w.manager, p, w.config)
-                    push!(rmprocset, w)
-                end
-            end
-        end
-
-        start = time_ns()
-        while (time_ns() - start) < waitfor*1e9
-            all(w -> w.state === W_TERMINATED, rmprocset) && break
-            sleep(min(0.1, waitfor - (time_ns() - start)/1e9))
-        end
-
-        unremoved = [wrkr.id for wrkr in filter(w -> w.state !== W_TERMINATED, rmprocset)]
-        if length(unremoved) > 0
-            estr = string("rmprocs: pids ", unremoved, " not terminated after ", waitfor, " seconds.")
-            throw(ErrorException(estr))
-        end
-    finally
-        unlock(worker_lock)
-    end
-end
-
-
-"""
-    ProcessExitedException(worker_id::Int)
-
-After a client Julia process has exited, further attempts to reference the dead child will
-throw this exception.
-"""
-struct ProcessExitedException <: Exception
-    worker_id::Int
-end
-
-# No-arg constructor added for compatibility with Julia 1.0 & 1.1, should be deprecated in the future
-ProcessExitedException() = ProcessExitedException(-1)
-
-worker_from_id(i) = worker_from_id(PGRP, i)
-function worker_from_id(pg::ProcessGroup, i)
-    if !isempty(map_del_wrkr) && in(i, map_del_wrkr)
-        throw(ProcessExitedException(i))
-    end
-    w = get(map_pid_wrkr, i, nothing)
-    if w === nothing
-        if myid() == 1
-            error("no process with id $i exists")
-        end
-        w = Worker(i)
-        map_pid_wrkr[i] = w
-    else
-        w = w::Union{Worker, LocalProcess}
-    end
-    w
-end
-
-"""
-    worker_id_from_socket(s) -> pid
-
-A low-level API which, given a `IO` connection or a `Worker`,
-returns the `pid` of the worker it is connected to.
-This is useful when writing custom [`serialize`](@ref) methods for a type,
-which optimizes the data written out depending on the receiving process id.
-"""
-function worker_id_from_socket(s)
-    w = get(map_sock_wrkr, s, nothing)
-    if isa(w,Worker)
-        if s === w.r_stream || s === w.w_stream
-            return w.id
-        end
-    end
-    if isa(s,IOStream) && fd(s)==-1
-        # serializing to a local buffer
-        return myid()
-    end
-    return -1
-end
-
-
-register_worker(w) = register_worker(PGRP, w)
-function register_worker(pg, w)
-    push!(pg.workers, w)
-    map_pid_wrkr[w.id] = w
-end
-
-function register_worker_streams(w)
-    map_sock_wrkr[w.r_stream] = w
-    map_sock_wrkr[w.w_stream] = w
-end
-
-deregister_worker(pid) = deregister_worker(PGRP, pid)
-function deregister_worker(pg, pid)
-    pg.workers = filter(x -> !(x.id == pid), pg.workers)
-    w = pop!(map_pid_wrkr, pid, nothing)
-    if isa(w, Worker)
-        if isdefined(w, :r_stream)
-            pop!(map_sock_wrkr, w.r_stream, nothing)
-            if w.r_stream != w.w_stream
-                pop!(map_sock_wrkr, w.w_stream, nothing)
-            end
-        end
-
-        if myid() == 1 && (myrole() === :master) && isdefined(w, :config)
-            # Notify the cluster manager of this workers death
-            manage(w.manager, w.id, w.config, :deregister)
-            if PGRP.topology !== :all_to_all || isclusterlazy()
-                for rpid in workers()
-                    try
-                        remote_do(deregister_worker, rpid, pid)
-                    catch
-                    end
-                end
-            end
-        end
-    end
-    push!(map_del_wrkr, pid)
-
-    # delete this worker from our remote reference client sets
-    ids = []
-    tonotify = []
-    lock(client_refs) do
-        for (id, rv) in pg.refs
-            if in(pid, rv.clientset)
-                push!(ids, id)
-            end
-            if rv.waitingfor == pid
-                push!(tonotify, (id, rv))
-            end
-        end
-        for id in ids
-            del_client(pg, id, pid)
-        end
-
-        # throw exception to tasks waiting for this pid
-        for (id, rv) in tonotify
-            close(rv.c, ProcessExitedException(pid))
-            delete!(pg.refs, id)
-        end
-    end
-    return
-end
-
-
-function interrupt(pid::Integer)
-    @assert myid() == 1
-    w = map_pid_wrkr[pid]
-    if isa(w, Worker)
-        manage(w.manager, w.id, w.config, :interrupt)
-    end
-    return
-end
-
-"""
-    interrupt(pids::Integer...)
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-interrupt(pids::Integer...) = interrupt([pids...])
-
-"""
-    interrupt(pids::AbstractVector=workers())
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-function interrupt(pids::AbstractVector=workers())
-    @assert myid() == 1
-    @sync begin
-        for pid in pids
-            @async interrupt(pid)
-        end
-    end
-end
-
-wp_bind_addr(p::LocalProcess) = p.bind_addr
-wp_bind_addr(p) = p.config.bind_addr
-
-function check_same_host(pids)
-    if myid() != 1
-        return remotecall_fetch(check_same_host, 1, pids)
-    else
-        # We checkfirst if all test pids have been started using the local manager,
-        # else we check for the same bind_to addr. This handles the special case
-        # where the local ip address may change - as during a system sleep/awake
-        if all(p -> (p==1) || (isa(map_pid_wrkr[p].manager, LocalManager)), pids)
-            return true
-        else
-            first_bind_addr = notnothing(wp_bind_addr(map_pid_wrkr[pids[1]]))
-            return all(p -> notnothing(wp_bind_addr(map_pid_wrkr[p])) == first_bind_addr, pids[2:end])
-        end
-    end
-end
-
-function terminate_all_workers()
-    myid() != 1 && return
-
-    if nprocs() > 1
-        try
-            rmprocs(workers(); waitfor=5.0)
-        catch _ex
-            @warn "Forcibly interrupting busy workers" exception=_ex
-            # Might be computation bound, interrupt them and try again
-            interrupt(workers())
-            try
-                rmprocs(workers(); waitfor=5.0)
-            catch _ex2
-                @error "Unable to terminate all workers" exception=_ex2,catch_backtrace()
-            end
-        end
-    end
-end
-
-# initialize the local proc network address / port
-function init_bind_addr()
-    opts = JLOptions()
-    if opts.bindto != C_NULL
-        bind_to = split(unsafe_string(opts.bindto), ":")
-        bind_addr = string(parse(IPAddr, bind_to[1]))
-        if length(bind_to) > 1
-            bind_port = parse(Int,bind_to[2])
-        else
-            bind_port = 0
-        end
-    else
-        bind_port = 0
-        try
-            bind_addr = string(getipaddr())
-        catch
-            # All networking is unavailable, initialize bind_addr to the loopback address
-            # Will cause an exception to be raised only when used.
-            bind_addr = "127.0.0.1"
-        end
-    end
-    global LPROC
-    LPROC.bind_addr = bind_addr
-    LPROC.bind_port = UInt16(bind_port)
-end
-
-using Random: randstring
-
-let inited = false
-    # do initialization that's only needed when there is more than 1 processor
-    global function init_multi()
-        if !inited
-            inited = true
-            push!(Base.package_callbacks, _require_callback)
-            atexit(terminate_all_workers)
-            init_bind_addr()
-            cluster_cookie(randstring(HDR_COOKIE_LEN))
-        end
-        return nothing
-    end
-end
-
-function init_parallel()
-    start_gc_msgs_task()
-
-    # start in "head node" mode, if worker, will override later.
-    global PGRP
-    global LPROC
-    LPROC.id = 1
-    @assert isempty(PGRP.workers)
-    register_worker(LPROC)
-end
-
-write_cookie(io::IO) = print(io.in, string(cluster_cookie(), "\n"))
-
-function get_threads_spec(opts)
-    if opts.nthreads > 0
-        @assert opts.nthreadpools >= 1
-        @assert opts.nthreads_per_pool != C_NULL
-        thr = "$(unsafe_load(opts.nthreads_per_pool))"
-        if opts.nthreadpools == 2
-            thr = "$(thr),$(unsafe_load(opts.nthreads_per_pool, 2))"
-        end
-        `--threads=$(thr)`
-    else
-        ``
-    end
-end
-
-function get_gcthreads_spec(opts)
-    if opts.nmarkthreads > 0 || opts.nsweepthreads > 0
-        `--gcthreads=$(opts.nmarkthreads),$(opts.nsweepthreads)`
-    else
-        ``
-    end
-end
-
-# Starts workers specified by (-n|--procs) and --machine-file command line options
-function process_opts(opts)
-    # startup worker.
-    # opts.startupfile, opts.load, etc should should not be processed for workers.
-    if opts.worker == 1
-        # does not return
-        if opts.cookie != C_NULL
-            start_worker(unsafe_string(opts.cookie))
-        else
-            start_worker()
-        end
-    end
-
-    # Propagate --threads to workers
-    threads = get_threads_spec(opts)
-    # Propagate --gcthreads to workers
-    gcthreads = get_gcthreads_spec(opts)
-
-    exeflags = `$threads $gcthreads`
-
-    # add processors
-    if opts.nprocs > 0
-        addprocs(opts.nprocs; exeflags=exeflags)
-    end
-
-    # load processes from machine file
-    if opts.machine_file != C_NULL
-        addprocs(load_machine_file(unsafe_string(opts.machine_file)); exeflags=exeflags)
-    end
-    return nothing
-end
-
-
-function load_machine_file(path::AbstractString)
-    machines = []
-    for line in split(read(path, String),'\n'; keepempty=false)
-        s = split(line, '*'; keepempty=false)
-        map!(strip, s, s)
-        if length(s) > 1
-            cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
-            push!(machines,(s[2], cnt))
-        else
-            push!(machines,line)
-        end
-    end
-    return machines
-end
diff --git a/stdlib/Distributed/src/clusterserialize.jl b/stdlib/Distributed/src/clusterserialize.jl
deleted file mode 100644
index 0acd4ce68c45b..0000000000000
--- a/stdlib/Distributed/src/clusterserialize.jl
+++ /dev/null
@@ -1,254 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Serialization: serialize_cycle, deserialize_cycle, writetag,
-                     serialize_typename, deserialize_typename,
-                     TYPENAME_TAG, TASK_TAG, reset_state, serialize_type
-using Serialization.__deserialized_types__
-
-import Serialization: object_number, lookup_object_number, remember_object
-
-mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer
-    io::I
-    counter::Int
-    table::IdDict{Any,Any}
-    pending_refs::Vector{Int}
-
-    pid::Int                                     # Worker we are connected to.
-    tn_obj_sent::Set{UInt64}                     # TypeName objects sent
-    glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}}   # (key,value) -> (symbol, (hash_value, objectid))
-    glbs_in_tnobj::Dict{UInt64, Vector{Symbol}}  # Track globals referenced in
-                                                 # anonymous functions.
-    anonfunc_id::UInt64
-
-    function ClusterSerializer{I}(io::I) where I<:IO
-        new(io, 0, IdDict(), Int[], worker_id_from_socket(io),
-            Set{UInt64}(), Dict{UInt64, UInt64}(), Dict{UInt64, Vector{Symbol}}(), 0)
-    end
-end
-ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io)
-
-const object_numbers = WeakKeyDict()
-const obj_number_salt = Ref(0)
-function object_number(s::ClusterSerializer, @nospecialize(l))
-    global obj_number_salt, object_numbers
-    if haskey(object_numbers, l)
-        return object_numbers[l]
-    end
-    # a hash function that always gives the same number to the same
-    # object on the same machine, and is unique over all machines.
-    ln = obj_number_salt[]+(UInt64(myid())<<44)
-    obj_number_salt[] += 1
-    object_numbers[l] = ln
-    return ln::UInt64
-end
-
-const known_object_data = Dict{UInt64,Any}()
-
-function lookup_object_number(s::ClusterSerializer, n::UInt64)
-    return get(known_object_data, n, nothing)
-end
-
-function remember_object(s::ClusterSerializer, @nospecialize(o), n::UInt64)
-    known_object_data[n] = o
-    if isa(o, Core.TypeName) && !haskey(object_numbers, o)
-        # set up reverse mapping for serialize
-        object_numbers[o] = n
-    end
-    return nothing
-end
-
-function deserialize(s::ClusterSerializer, ::Type{Core.TypeName})
-    full_body_sent = deserialize(s)
-    number = read(s.io, UInt64)
-    if !full_body_sent
-        tn = lookup_object_number(s, number)::Core.TypeName
-        remember_object(s, tn, number)
-        deserialize_cycle(s, tn)
-    else
-        tn = deserialize_typename(s, number)
-    end
-
-    # retrieve arrays of global syms sent if any and deserialize them all.
-    foreach(sym->deserialize_global_from_main(s, sym), deserialize(s))
-    return tn
-end
-
-function serialize(s::ClusterSerializer, t::Core.TypeName)
-    serialize_cycle(s, t) && return
-    writetag(s.io, TYPENAME_TAG)
-
-    identifier = object_number(s, t)
-    send_whole = !(identifier in s.tn_obj_sent)
-    serialize(s, send_whole)
-    write(s.io, identifier)
-    if send_whole
-        # Track globals referenced in this anonymous function.
-        # This information is used to resend modified globals when we
-        # only send the identifier.
-        prev = s.anonfunc_id
-        s.anonfunc_id = identifier
-        serialize_typename(s, t)
-        s.anonfunc_id = prev
-        push!(s.tn_obj_sent, identifier)
-        finalizer(t) do x
-            cleanup_tname_glbs(s, identifier)
-        end
-    end
-
-    # Send global refs if required.
-    syms = syms_2b_sent(s, identifier)
-    serialize(s, syms)
-    foreach(sym->serialize_global_from_main(s, sym), syms)
-    nothing
-end
-
-function serialize(s::ClusterSerializer, g::GlobalRef)
-    # Record if required and then invoke the default GlobalRef serializer.
-    sym = g.name
-    if g.mod === Main && isdefined(g.mod, sym)
-        if (binding_module(Main, sym) === Main) && (s.anonfunc_id != 0) &&
-            !startswith(string(sym), "#") # Anonymous functions are handled via FULL_GLOBALREF_TAG
-
-            push!(get!(s.glbs_in_tnobj, s.anonfunc_id, []), sym)
-        end
-    end
-
-    invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
-end
-
-# Send/resend a global binding if
-# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
-# b) hash value has changed or
-# c) hash value is same but of a different object, i.e. objectid has changed or
-# d) is a bits type
-function syms_2b_sent(s::ClusterSerializer, identifier)
-    lst = Symbol[]
-    check_syms = get(s.glbs_in_tnobj, identifier, Symbol[])
-    for sym in check_syms
-        v = getfield(Main, sym)
-
-        if isbits(v)
-            push!(lst, sym)
-        else
-            if haskey(s.glbs_sent, sym)
-                # We have sent this binding before, see if it has changed.
-                hval, oid = s.glbs_sent[sym]
-                if hval != hash(sym, hash(v)) || oid != objectid(v)
-                    push!(lst, sym)
-                end
-            else
-                push!(lst, sym)
-            end
-        end
-    end
-    return unique(lst)
-end
-
-function serialize_global_from_main(s::ClusterSerializer, sym)
-    v = getfield(Main, sym)
-
-    if !isbits(v)
-        s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
-    end
-
-    serialize(s, isconst(Main, sym))
-    serialize(s, v)
-end
-
-function deserialize_global_from_main(s::ClusterSerializer, sym)
-    sym_isconst = deserialize(s)
-    v = deserialize(s)
-    if isdefined(Main, sym) && (sym_isconst || isconst(Main, sym))
-        if isequal(getfield(Main, sym), v)
-            # same value; ok
-            return nothing
-        else
-            @warn "Cannot transfer global variable $sym; it already has a value."
-            return nothing
-        end
-    end
-    if sym_isconst
-        ccall(:jl_set_const, Cvoid, (Any, Any, Any), Main, sym, v)
-    else
-        setglobal!(Main, sym, v)
-    end
-    return nothing
-end
-
-function cleanup_tname_glbs(s::ClusterSerializer, identifier)
-    delete!(s.glbs_in_tnobj, identifier)
-end
-
-# TODO: cleanup from s.tn_obj_sent
-
-
-# Specialized serialize-deserialize implementations for CapturedException to partially
-# recover from any deserialization errors in `CapturedException.ex`
-
-function serialize(s::ClusterSerializer, ex::CapturedException)
-    serialize_type(s, typeof(ex))
-    serialize(s, string(typeof(ex.ex))) # String type should not result in a deser error
-    serialize(s, ex.processed_bt)       # Currently should not result in a deser error
-    serialize(s, ex.ex)                 # can result in a UndefVarError on the remote node
-                                        # if a type used in ex.ex is undefined on the remote node.
-end
-
-function original_ex(s::ClusterSerializer, ex_str, remote_stktrace)
-    local pid_str = ""
-    try
-        pid_str = string(" from worker ", worker_id_from_socket(s.io))
-    catch
-    end
-
-    stk_str = remote_stktrace ? "Remote" : "Local"
-    ErrorException(string("Error deserializing a remote exception", pid_str, "\n",
-                          "Remote(original) exception of type ", ex_str, "\n",
-                          stk_str,  " stacktrace : "))
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:CapturedException})
-    ex_str = deserialize(s)
-    local bt
-    local capex
-    try
-        bt = deserialize(s)
-    catch e
-        throw(CompositeException([
-            original_ex(s, ex_str, false),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    try
-        capex = deserialize(s)
-    catch e
-        throw(CompositeException([
-            CapturedException(original_ex(s, ex_str, true), bt),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    return CapturedException(capex, bt)
-end
-
-"""
-    clear!(syms, pids=workers(); mod=Main)
-
-Clears global bindings in modules by initializing them to `nothing`.
-`syms` should be of type [`Symbol`](@ref) or a collection of `Symbol`s . `pids` and `mod`
-identify the processes and the module in which global variables are to be
-reinitialized. Only those names found to be defined under `mod` are cleared.
-
-An exception is raised if a global constant is requested to be cleared.
-"""
-function clear!(syms, pids=workers(); mod=Main)
-    @sync for p in pids
-        @async_unwrap remotecall_wait(clear_impl!, p, syms, mod)
-    end
-end
-clear!(sym::Symbol, pid::Int; mod=Main) = clear!([sym], [pid]; mod=mod)
-clear!(sym::Symbol, pids=workers(); mod=Main) = clear!([sym], pids; mod=mod)
-clear!(syms, pid::Int; mod=Main) = clear!(syms, [pid]; mod=mod)
-
-clear_impl!(syms, mod::Module) = foreach(x->clear_impl!(x,mod), syms)
-clear_impl!(sym::Symbol, mod::Module) = isdefined(mod, sym) && @eval(mod, global $sym = nothing)
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
deleted file mode 100644
index a767c7a40d9c9..0000000000000
--- a/stdlib/Distributed/src/macros.jl
+++ /dev/null
@@ -1,361 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-let nextidx = Threads.Atomic{Int}(0)
-    global nextproc
-    function nextproc()
-        idx = Threads.atomic_add!(nextidx, 1)
-        return workers()[(idx % nworkers()) + 1]
-    end
-end
-
-spawnat(p, thunk) = remotecall(thunk, p)
-
-spawn_somewhere(thunk) = spawnat(nextproc(),thunk)
-
-"""
-    @spawn expr
-
-Create a closure around an expression and run it on an automatically-chosen process,
-returning a [`Future`](@ref) to the result.
-This macro is deprecated; `@spawnat :any expr` should be used instead.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawn myid()
-Future(2, 1, 5, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawn myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    As of Julia 1.3 this macro is deprecated. Use `@spawnat :any` instead.
-"""
-macro spawn(expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    quote
-        local ref = spawn_somewhere($thunk)
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @spawnat p expr
-
-Create a closure around an expression and run the closure
-asynchronously on process `p`. Return a [`Future`](@ref) to the result.
-If `p` is the quoted literal symbol `:any`, then the system will pick a
-processor to use automatically.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawnat 2 myid()
-Future(2, 1, 3, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawnat :any myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    The `:any` argument is available as of Julia 1.3.
-"""
-macro spawnat(p, expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    if p === QuoteNode(:any)
-        spawncall = :(spawn_somewhere($thunk))
-    else
-        spawncall = :(spawnat($(esc(p)), $thunk))
-    end
-    quote
-        local ref = $spawncall
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @fetch expr
-
-Equivalent to `fetch(@spawnat :any expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetch myid()
-2
-
-julia> @fetch myid()
-3
-
-julia> @fetch myid()
-4
-
-julia> @fetch myid()
-2
-```
-"""
-macro fetch(expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, nextproc()))
-end
-
-"""
-    @fetchfrom
-
-Equivalent to `fetch(@spawnat p expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetchfrom 2 myid()
-2
-
-julia> @fetchfrom 4 myid()
-4
-```
-"""
-macro fetchfrom(p, expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, $(esc(p))))
-end
-
-# extract a list of modules to import from an expression
-extract_imports!(imports, x) = imports
-function extract_imports!(imports, ex::Expr)
-    if Meta.isexpr(ex, (:import, :using))
-        push!(imports, ex)
-    elseif Meta.isexpr(ex, :let)
-        extract_imports!(imports, ex.args[2])
-    elseif Meta.isexpr(ex, (:toplevel, :block))
-        for arg in ex.args
-            extract_imports!(imports, arg)
-        end
-    end
-    return imports
-end
-extract_imports(x) = extract_imports!(Any[], x)
-
-"""
-    @everywhere [procs()] expr
-
-Execute an expression under `Main` on all `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown. For example:
-
-    @everywhere bar = 1
-
-will define `Main.bar` on all current processes. Any processes added later
-(say with [`addprocs()`](@ref)) will not have the expression defined.
-
-Unlike [`@spawnat`](@ref), `@everywhere` does not capture any local variables.
-Instead, local variables can be broadcast using interpolation:
-
-    foo = 1
-    @everywhere bar = \$foo
-
-The optional argument `procs` allows specifying a subset of all
-processes to have execute the expression.
-
-Similar to calling `remotecall_eval(Main, procs, expr)`, but with two extra features:
-
-    - `using` and `import` statements run on the calling process first, to ensure
-      packages are precompiled.
-    - The current source file path used by `include` is propagated to other processes.
-"""
-macro everywhere(ex)
-    procs = GlobalRef(@__MODULE__, :procs)
-    return esc(:($(Distributed).@everywhere $procs() $ex))
-end
-
-macro everywhere(procs, ex)
-    imps = extract_imports(ex)
-    return quote
-        $(isempty(imps) ? nothing : Expr(:toplevel, imps...)) # run imports locally first
-        let ex = Expr(:toplevel, :(task_local_storage()[:SOURCE_PATH] = $(get(task_local_storage(), :SOURCE_PATH, nothing))), $(esc(Expr(:quote, ex)))),
-            procs = $(esc(procs))
-            remotecall_eval(Main, procs, ex)
-        end
-    end
-end
-
-"""
-    remotecall_eval(m::Module, procs, expression)
-
-Execute an expression under module `m` on the processes
-specified in `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown.
-
-See also [`@everywhere`](@ref).
-"""
-function remotecall_eval(m::Module, procs, ex)
-    @sync begin
-        run_locally = 0
-        for pid in procs
-            if pid == myid()
-                run_locally += 1
-            else
-                @async_unwrap remotecall_wait(Core.eval, pid, m, ex)
-            end
-        end
-        yield() # ensure that the remotecalls have had a chance to start
-
-        # execute locally last as we do not want local execution to block serialization
-        # of the request to remote nodes.
-        for _ in 1:run_locally
-            @async Core.eval(m, ex)
-        end
-    end
-    nothing
-end
-
-# optimized version of remotecall_eval for a single pid
-# and which also fetches the return value
-function remotecall_eval(m::Module, pid::Int, ex)
-    return remotecall_fetch(Core.eval, pid, m, ex)
-end
-
-
-# Statically split range [firstIndex,lastIndex] into equal sized chunks for np processors
-function splitrange(firstIndex::Int, lastIndex::Int, np::Int)
-    each, extras = divrem(lastIndex-firstIndex+1, np)
-    nchunks = each > 0 ? np : extras
-    chunks = Vector{UnitRange{Int}}(undef, nchunks)
-    lo = firstIndex
-    for i in 1:nchunks
-        hi = lo + each - 1
-        if extras > 0
-            hi += 1
-            extras -= 1
-        end
-        chunks[i] = lo:hi
-        lo = hi+1
-    end
-    return chunks
-end
-
-function preduce(reducer, f, R)
-    chunks = splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-    all_w = workers()[1:length(chunks)]
-
-    w_exec = Task[]
-    for (idx,pid) in enumerate(all_w)
-        t = Task(()->remotecall_fetch(f, pid, reducer, R, first(chunks[idx]), last(chunks[idx])))
-        schedule(t)
-        push!(w_exec, t)
-    end
-    reduce(reducer, Any[fetch(t) for t in w_exec])
-end
-
-function pfor(f, R)
-    t = @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-        @spawnat :any f(R, first(c), last(c))
-    end
-    errormonitor(t)
-end
-
-function make_preduce_body(var, body)
-    quote
-        function (reducer, R, lo::Int, hi::Int)
-            $(esc(var)) = R[lo]
-            ac = $(esc(body))
-            if lo != hi
-                for $(esc(var)) in R[(lo+1):hi]
-                    ac = reducer(ac, $(esc(body)))
-                end
-            end
-            ac
-        end
-    end
-end
-
-function make_pfor_body(var, body)
-    quote
-        function (R, lo::Int, hi::Int)
-            for $(esc(var)) in R[lo:hi]
-                $(esc(body))
-            end
-        end
-    end
-end
-
-"""
-    @distributed
-
-A distributed memory, parallel for loop of the form :
-
-    @distributed [reducer] for var = range
-        body
-    end
-
-The specified range is partitioned and locally executed across all workers. In case an
-optional reducer function is specified, `@distributed` performs local reductions on each worker
-with a final reduction on the calling process.
-
-Note that without a reducer function, `@distributed` executes asynchronously, i.e. it spawns
-independent tasks on all available workers and returns immediately without waiting for
-completion. To wait for completion, prefix the call with [`@sync`](@ref), like :
-
-    @sync @distributed for var = range
-        body
-    end
-"""
-macro distributed(args...)
-    na = length(args)
-    if na==1
-        loop = args[1]
-    elseif na==2
-        reducer = args[1]
-        loop = args[2]
-    else
-        throw(ArgumentError("wrong number of arguments to @distributed"))
-    end
-    if !isa(loop,Expr) || loop.head !== :for
-        error("malformed @distributed loop")
-    end
-    var = loop.args[1].args[1]
-    r = loop.args[1].args[2]
-    body = loop.args[2]
-    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
-        resize!(body.args, length(body.args) - 1)
-    end
-    if na==1
-        syncvar = esc(Base.sync_varname)
-        return quote
-            local ref = pfor($(make_pfor_body(var, body)), $(esc(r)))
-            if $(Expr(:islocal, syncvar))
-                put!($syncvar, ref)
-            end
-            ref
-        end
-    else
-        return :(preduce($(esc(reducer)), $(make_preduce_body(var, body)), $(esc(r))))
-    end
-end
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
deleted file mode 100644
index 57f58598e85dc..0000000000000
--- a/stdlib/Distributed/src/managers.jl
+++ /dev/null
@@ -1,757 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Built-in SSH and Local Managers
-
-struct SSHManager <: ClusterManager
-    machines::Dict
-
-    function SSHManager(machines)
-        # machines => array of machine elements
-        # machine => address or (address, cnt)
-        # address => string of form `[user@]host[:port] bind_addr[:bind_port]`
-        # cnt => :auto or number
-        # :auto launches NUM_CORES number of workers at address
-        # number launches the specified number of workers at address
-        mhist = Dict()
-        for m in machines
-            if isa(m, Tuple)
-                host=m[1]
-                cnt=m[2]
-            else
-                host=m
-                cnt=1
-            end
-            current_cnt = get(mhist, host, 0)
-
-            if isa(cnt, Number)
-                mhist[host] = isa(current_cnt, Number) ? current_cnt + Int(cnt) : Int(cnt)
-            else
-                mhist[host] = cnt
-            end
-        end
-        new(mhist)
-    end
-end
-
-
-function check_addprocs_args(manager, kwargs)
-    valid_kw_names = keys(default_addprocs_params(manager))
-    for keyname in keys(kwargs)
-        !(keyname in valid_kw_names) && throw(ArgumentError("Invalid keyword argument $(keyname)"))
-    end
-end
-
-# SSHManager
-
-# start and connect to processes via SSH, optionally through an SSH tunnel.
-# the tunnel is only used from the head (process 1); the nodes are assumed
-# to be mutually reachable without a tunnel, as is often the case in a cluster.
-# Default value of kw arg max_parallel is the default value of MaxStartups in sshd_config
-# A machine is either a <hostname> or a tuple of (<hostname>, count)
-"""
-    addprocs(machines; tunnel=false, sshflags=\`\`, max_parallel=10, kwargs...) -> List of process identifiers
-
-Add worker processes on remote machines via SSH. Configuration is done with keyword
-arguments (see below). In particular, the `exename` keyword can be used to specify
-the path to the `julia` binary on the remote machine(s).
-
-`machines` is a vector of "machine specifications" which are given as strings of
-the form `[user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user and `port`
-to the standard SSH port. If `[bind_addr[:port]]` is specified, other workers will connect
-to this worker at the specified `bind_addr` and `port`.
-
-It is possible to launch multiple processes on a remote host by using a tuple in the
-`machines` vector or the form `(machine_spec, count)`, where `count` is the number of
-workers to be launched on the specified host. Passing `:auto` as the worker count will
-launch as many workers as the number of CPU threads on the remote host.
-
-**Examples**:
-```julia
-addprocs([
-    "remote1",               # one worker on 'remote1' logging in with the current username
-    "user@remote2",          # one worker on 'remote2' logging in with the 'user' username
-    "user@remote3:2222",     # specifying SSH port to '2222' for 'remote3'
-    ("user@remote4", 4),     # launch 4 workers on 'remote4'
-    ("user@remote5", :auto), # launch as many workers as CPU threads on 'remote5'
-])
-```
-
-**Keyword arguments**:
-
-* `tunnel`: if `true` then SSH tunneling will be used to connect to the worker from the
-  master process. Default is `false`.
-
-* `multiplex`: if `true` then SSH multiplexing is used for SSH tunneling. Default is `false`.
-
-* `ssh`: the name or path of the SSH client executable used to start the workers.
-  Default is `"ssh"`.
-
-* `sshflags`: specifies additional ssh options, e.g. ``` sshflags=\`-i /home/foo/bar.pem\` ```
-
-* `max_parallel`: specifies the maximum number of workers connected to in parallel at a
-  host. Defaults to 10.
-
-* `shell`: specifies the type of shell to which ssh connects on the workers.
-
-    + `shell=:posix`: a POSIX-compatible Unix/Linux shell
-      (sh, ksh, bash, dash, zsh, etc.). The default.
-
-    + `shell=:csh`: a Unix C shell (csh, tcsh).
-
-    + `shell=:wincmd`: Microsoft Windows `cmd.exe`.
-
-* `dir`: specifies the working directory on the workers. Defaults to the host's current
-  directory (as found by `pwd()`)
-
-* `enable_threaded_blas`: if `true` then  BLAS will run on multiple threads in added
-  processes. Default is `false`.
-
-* `exename`: name of the `julia` executable. Defaults to `"\$(Sys.BINDIR)/julia"` or
-  `"\$(Sys.BINDIR)/julia-debug"` as the case may be. It is recommended that a common Julia
-  version is used on all remote machines because serialization and code distribution might
-  fail otherwise.
-
-* `exeflags`: additional flags passed to the worker processes.
-
-* `topology`: Specifies how the workers connect to each other. Sending a message between
-  unconnected workers results in an error.
-
-    + `topology=:all_to_all`: All processes are connected to each other. The default.
-
-    + `topology=:master_worker`: Only the driver process, i.e. `pid` 1 connects to the
-      workers. The workers do not connect to each other.
-
-    + `topology=:custom`: The `launch` method of the cluster manager specifies the
-      connection topology via fields `ident` and `connect_idents` in `WorkerConfig`.
-      A worker with a cluster manager identity `ident` will connect to all workers specified
-      in `connect_idents`.
-
-* `lazy`: Applicable only with `topology=:all_to_all`. If `true`, worker-worker connections
-  are setup lazily, i.e. they are setup at the first instance of a remote call between
-  workers. Default is true.
-
-* `env`: provide an array of string pairs such as
-  `env=["JULIA_DEPOT_PATH"=>"/depot"]` to request that environment variables
-  are set on the remote machine. By default only the environment variable
-  `JULIA_WORKER_TIMEOUT` is passed automatically from the local to the remote
-  environment.
-
-* `cmdline_cookie`: pass the authentication cookie via the `--worker` commandline
-   option. The (more secure) default behaviour of passing the cookie via ssh stdio
-   may hang with Windows workers that use older (pre-ConPTY) Julia or Windows versions,
-   in which case `cmdline_cookie=true` offers a work-around.
-
-!!! compat "Julia 1.6"
-    The keyword arguments `ssh`, `shell`, `env` and `cmdline_cookie`
-    were added in Julia 1.6.
-
-Environment variables:
-
-If the master process fails to establish a connection with a newly launched worker within
-60.0 seconds, the worker treats it as a fatal situation and terminates.
-This timeout can be controlled via environment variable `JULIA_WORKER_TIMEOUT`.
-The value of `JULIA_WORKER_TIMEOUT` on the master process specifies the number of seconds a
-newly launched worker waits for connection establishment.
-"""
-function addprocs(machines::AbstractVector; kwargs...)
-    manager = SSHManager(machines)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-default_addprocs_params(::SSHManager) =
-    merge(default_addprocs_params(),
-          Dict{Symbol,Any}(
-              :ssh            => "ssh",
-              :sshflags       => ``,
-              :shell          => :posix,
-              :cmdline_cookie => false,
-              :env            => [],
-              :tunnel         => false,
-              :multiplex      => false,
-              :max_parallel   => 10))
-
-function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy::Condition)
-    # Launch one worker on each unique host in parallel. Additional workers are launched later.
-    # Wait for all launches to complete.
-    @sync for (i, (machine, cnt)) in enumerate(manager.machines)
-        let machine=machine, cnt=cnt
-             @async try
-                launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
-            catch e
-                print(stderr, "exception launching on machine $(machine) : $(e)\n")
-            end
-        end
-    end
-    notify(launch_ntfy)
-end
-
-
-Base.show(io::IO, manager::SSHManager) = print(io, "SSHManager(machines=", manager.machines, ")")
-
-
-function parse_machine(machine::AbstractString)
-    hoststr = ""
-    portnum = nothing
-
-    if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
-        ipv6_end = findlast(']', machine)
-        if ipv6_end === nothing
-            throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine\""))
-        end
-        hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
-        machine_def = split(machine[ipv6_end : end] , ':')
-    else    # ipv4
-        machine_def = split(machine, ':')
-        hoststr = machine_def[1]
-    end
-
-    if length(machine_def) > 2
-        throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
-    end
-
-    if length(machine_def) == 2
-        portstr = machine_def[2]
-
-        portnum = tryparse(Int, portstr)
-        if portnum === nothing
-            msg = "invalid machine definition format string: invalid port format \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-
-        if portnum < 1 || portnum > 65535
-            msg = "invalid machine definition format string: invalid port number \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-    end
-    (hoststr, portnum)
-end
-
-function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, params::Dict, launched::Array, launch_ntfy::Condition)
-    shell = params[:shell]
-    ssh = params[:ssh]
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    tunnel = params[:tunnel]
-    multiplex = params[:multiplex]
-    cmdline_cookie = params[:cmdline_cookie]
-    env = Dict{String,String}(params[:env])
-
-    # machine could be of the format [user@]host[:port] bind_addr[:bind_port]
-    # machine format string is split on whitespace
-    machine_bind = split(machine)
-    if isempty(machine_bind)
-        throw(ArgumentError("invalid machine definition format string: \"$machine\$"))
-    end
-    if length(machine_bind) > 1
-        exeflags = `--bind-to $(machine_bind[2]) $exeflags`
-    end
-    if cmdline_cookie
-        exeflags = `$exeflags --worker=$(cluster_cookie())`
-    else
-        exeflags = `$exeflags --worker`
-    end
-
-    host, portnum = parse_machine(machine_bind[1])
-    portopt = portnum === nothing ? `` : `-p $portnum`
-    sshflags = `$(params[:sshflags]) $portopt`
-
-    if tunnel
-        # First it checks if ssh multiplexing has been already enabled and the master process is running.
-        # If it's already running, later ssh sessions also use the same ssh multiplexing session even if
-        # `multiplex` is not explicitly specified; otherwise the tunneling session launched later won't
-        # go to background and hang. This is because of OpenSSH implementation.
-        if success(`$ssh $sshflags -O check $host`)
-            multiplex = true
-        elseif multiplex
-            # automatically create an SSH multiplexing session at the next SSH connection
-            controlpath = "~/.ssh/julia-%r@%h:%p"
-            sshflags = `$sshflags -o ControlMaster=auto -o ControlPath=$controlpath -o ControlPersist=no`
-        end
-    end
-
-    # Build up the ssh command
-
-    # pass on some environment variables by default
-    for var in ["JULIA_WORKER_TIMEOUT"]
-        if !haskey(env, var) && haskey(ENV, var)
-            env[var] = ENV[var]
-        end
-    end
-
-    # Julia process with passed in command line flag arguments
-    if shell === :posix
-        # ssh connects to a POSIX shell
-
-        cmds = "exec $(shell_escape_posixly(exename)) $(shell_escape_posixly(exeflags))"
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            cmds = "export $(var)=$(shell_escape_posixly(val))\n$cmds"
-        end
-        # change working directory
-        cmds = "cd -- $(shell_escape_posixly(dir))\n$cmds"
-
-        # shell login (-l) with string command (-c) to launch julia process
-        remotecmd = shell_escape_posixly(`sh -l -c $cmds`)
-
-    elseif shell === :csh
-        # ssh connects to (t)csh
-
-        remotecmd = "exec $(shell_escape_csh(exename)) $(shell_escape_csh(exeflags))"
-
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            remotecmd = "setenv $(var) $(shell_escape_csh(val))\n$remotecmd"
-        end
-        # change working directory
-        if dir !== nothing && dir != ""
-            remotecmd = "cd $(shell_escape_csh(dir))\n$remotecmd"
-        end
-
-    elseif shell === :wincmd
-        # ssh connects to Windows cmd.exe
-
-        any(c -> c == '"', exename) && throw(ArgumentError("invalid exename"))
-
-        remotecmd = shell_escape_wincmd(escape_microsoft_c_args(exename, exeflags...))
-        # change working directory
-        if dir !== nothing && dir != ""
-            any(c -> c == '"', dir) && throw(ArgumentError("invalid dir"))
-            remotecmd = "pushd \"$(dir)\" && $remotecmd"
-        end
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z0-9_()[\]{}\$\\/#',;\.@!?*+-]+\z", var) || throw(ArgumentError("invalid env key $var"))
-            remotecmd = "set $(var)=$(shell_escape_wincmd(val))&& $remotecmd"
-        end
-
-    else
-        throw(ArgumentError("invalid shell"))
-    end
-
-    # remote launch with ssh with given ssh flags / host / port information
-    # -T → disable pseudo-terminal allocation
-    # -a → disable forwarding of auth agent connection
-    # -x → disable X11 forwarding
-    # -o ClearAllForwardings → option if forwarding connections and
-    #                          forwarded connections are causing collisions
-    cmd = `$ssh -T -a -x -o ClearAllForwardings=yes $sshflags $host $remotecmd`
-
-    # launch the remote Julia process
-
-    # detach launches the command in a new process group, allowing it to outlive
-    # the initial julia process (Ctrl-C and teardown methods are handled through messages)
-    # for the launched processes.
-    io = open(detach(cmd), "r+")
-    cmdline_cookie || write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.io = io.out
-    wconfig.host = host
-    wconfig.tunnel = tunnel
-    wconfig.multiplex = multiplex
-    wconfig.sshflags = sshflags
-    wconfig.exeflags = exeflags
-    wconfig.exename = exename
-    wconfig.count = cnt
-    wconfig.max_parallel = params[:max_parallel]
-    wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-
-
-    push!(launched, wconfig)
-    notify(launch_ntfy)
-end
-
-
-function manage(manager::SSHManager, id::Integer, config::WorkerConfig, op::Symbol)
-    id = Int(id)
-    if op === :interrupt
-        ospid = config.ospid
-        if ospid !== nothing
-            host = notnothing(config.host)
-            sshflags = notnothing(config.sshflags)
-            if !success(`ssh -T -a -x -o ClearAllForwardings=yes -n $sshflags $host "kill -2 $ospid"`)
-                @error "Error sending a Ctrl-C to julia worker $id on $host"
-            end
-        else
-            # This state can happen immediately after an addprocs
-            @error "Worker $id cannot be presently interrupted."
-        end
-    end
-end
-
-let tunnel_port = 9201
-    global next_tunnel_port
-    function next_tunnel_port()
-        retval = tunnel_port
-        if tunnel_port > 32000
-            tunnel_port = 9201
-        else
-            tunnel_port += 1
-        end
-        retval
-    end
-end
-
-
-"""
-    ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex) -> localport
-
-Establish an SSH tunnel to a remote worker.
-Return a port number `localport` such that `localhost:localport` connects to `host:port`.
-"""
-function ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex)
-    port = Int(port)
-    cnt = ntries = 100
-
-    # the connection is forwarded to `port` on the remote server over the local port `localport`
-    while cnt > 0
-        localport = next_tunnel_port()
-        if multiplex
-            # It assumes that an ssh multiplexing session has been already started by the remote worker.
-            cmd = `ssh $sshflags -O forward -L $localport:$bind_addr:$port $user@$host`
-        else
-            # if we cannot do port forwarding, fail immediately
-            # the -f option backgrounds the ssh session
-            # `sleep 60` command specifies that an allotted time of 60 seconds is allowed to start the
-            # remote julia process and establish the network connections specified by the process topology.
-            # If no connections are made within 60 seconds, ssh will exit and an error will be printed on the
-            # process that launched the remote process.
-            ssh = `ssh -T -a -x -o ExitOnForwardFailure=yes`
-            cmd = detach(`$ssh -f $sshflags $user@$host -L $localport:$bind_addr:$port sleep 60`)
-        end
-        if success(cmd)
-            return localport
-        end
-        cnt -= 1
-    end
-
-    throw(ErrorException(
-        string("unable to create SSH tunnel after ", ntries, " tries. No free port?")))
-end
-
-
-# LocalManager
-struct LocalManager <: ClusterManager
-    np::Int
-    restrict::Bool  # Restrict binding to 127.0.0.1 only
-end
-
-"""
-    addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...) -> List of process identifiers
-
-Launch `np` workers on the local host using the in-built `LocalManager`.
-
-Local workers inherit the current package environment (i.e., active project,
-[`LOAD_PATH`](@ref), and [`DEPOT_PATH`](@ref)) from the main process.
-
-**Keyword arguments**:
- - `restrict::Bool`: if `true` (default) binding is restricted to `127.0.0.1`.
- - `dir`, `exename`, `exeflags`, `env`, `topology`, `lazy`, `enable_threaded_blas`: same effect
-   as for `SSHManager`, see documentation for [`addprocs(machines::AbstractVector)`](@ref).
-
-!!! compat "Julia 1.9"
-    The inheriting of the package environment and the `env` keyword argument were
-    added in Julia 1.9.
-"""
-function addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...)
-    manager = LocalManager(np, restrict)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-Base.show(io::IO, manager::LocalManager) = print(io, "LocalManager()")
-
-function launch(manager::LocalManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    bind_to = manager.restrict ? `127.0.0.1` : `$(LPROC.bind_addr)`
-    env = Dict{String,String}(params[:env])
-
-    # TODO: Maybe this belongs in base/initdefs.jl as a package_environment() function
-    #       together with load_path() etc. Might be useful to have when spawning julia
-    #       processes outside of Distributed.jl too.
-    # JULIA_(LOAD|DEPOT)_PATH are used to populate (LOAD|DEPOT)_PATH on startup,
-    # but since (LOAD|DEPOT)_PATH might have changed they are re-serialized here.
-    # Users can opt-out of this by passing `env = ...` to addprocs(...).
-    pathsep = Sys.iswindows() ? ";" : ":"
-    if get(env, "JULIA_LOAD_PATH", nothing) === nothing
-        env["JULIA_LOAD_PATH"] = join(LOAD_PATH, pathsep)
-    end
-    if get(env, "JULIA_DEPOT_PATH", nothing) === nothing
-        env["JULIA_DEPOT_PATH"] = join(DEPOT_PATH, pathsep)
-    end
-
-    # If we haven't explicitly asked for threaded BLAS, prevent OpenBLAS from starting
-    # up with multiple threads, thereby sucking up a bunch of wasted memory on Windows.
-    if !params[:enable_threaded_blas] &&
-       get(env, "OPENBLAS_NUM_THREADS", nothing) === nothing
-        env["OPENBLAS_NUM_THREADS"] = "1"
-    end
-    # Set the active project on workers using JULIA_PROJECT.
-    # Users can opt-out of this by (i) passing `env = ...` or (ii) passing
-    # `--project=...` as `exeflags` to addprocs(...).
-    project = Base.ACTIVE_PROJECT[]
-    if project !== nothing && get(env, "JULIA_PROJECT", nothing) === nothing
-        env["JULIA_PROJECT"] = project
-    end
-
-    for i in 1:manager.np
-        cmd = `$(julia_cmd(exename)) $exeflags --bind-to $bind_to --worker`
-        io = open(detach(setenv(addenv(cmd, env), dir=dir)), "r+")
-        write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-function manage(manager::LocalManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :interrupt
-        kill(config.process, 2)
-    end
-end
-
-"""
-    launch(manager::ClusterManager, params::Dict, launched::Array, launch_ntfy::Condition)
-
-Implemented by cluster managers. For every Julia worker launched by this function, it should
-append a `WorkerConfig` entry to `launched` and notify `launch_ntfy`. The function MUST exit
-once all workers, requested by `manager` have been launched. `params` is a dictionary of all
-keyword arguments [`addprocs`](@ref) was called with.
-"""
-launch
-
-"""
-    manage(manager::ClusterManager, id::Integer, config::WorkerConfig. op::Symbol)
-
-Implemented by cluster managers. It is called on the master process, during a worker's
-lifetime, with appropriate `op` values:
-
-- with `:register`/`:deregister` when a worker is added / removed from the Julia worker pool.
-- with `:interrupt` when `interrupt(workers)` is called. The `ClusterManager`
-  should signal the appropriate worker with an interrupt signal.
-- with `:finalize` for cleanup purposes.
-"""
-manage
-
-# DefaultClusterManager for the default TCP transport - used by both SSHManager and LocalManager
-
-struct DefaultClusterManager <: ClusterManager
-end
-
-const tunnel_hosts_map = Dict{String, Semaphore}()
-
-"""
-    connect(manager::ClusterManager, pid::Int, config::WorkerConfig) -> (instrm::IO, outstrm::IO)
-
-Implemented by cluster managers using custom transports. It should establish a logical
-connection to worker with id `pid`, specified by `config` and return a pair of `IO`
-objects. Messages from `pid` to current process will be read off `instrm`, while messages to
-be sent to `pid` will be written to `outstrm`. The custom transport implementation must
-ensure that messages are delivered and received completely and in order.
-`connect(manager::ClusterManager.....)` sets up TCP/IP socket connections in-between
-workers.
-"""
-function connect(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    if config.connect_at !== nothing
-        # this is a worker-to-worker setup call.
-        return connect_w2w(pid, config)
-    end
-
-    # master connecting to workers
-    if config.io !== nothing
-        (bind_addr, port::Int) = read_worker_host_port(config.io)
-        pubhost = something(config.host, bind_addr)
-        config.host = pubhost
-        config.port = port
-    else
-        pubhost = notnothing(config.host)
-        port = notnothing(config.port)
-        bind_addr = something(config.bind_addr, pubhost)
-    end
-
-    tunnel = something(config.tunnel, false)
-
-    s = split(pubhost,'@')
-    user = ""
-    if length(s) > 1
-        user = s[1]
-        pubhost = s[2]
-    else
-        if haskey(ENV, "USER")
-            user = ENV["USER"]
-        elseif tunnel
-            error("USER must be specified either in the environment ",
-                  "or as part of the hostname when tunnel option is used")
-        end
-    end
-
-    if tunnel
-        if !haskey(tunnel_hosts_map, pubhost)
-            tunnel_hosts_map[pubhost] = Semaphore(something(config.max_parallel, typemax(Int)))
-        end
-        sem = tunnel_hosts_map[pubhost]
-
-        sshflags = notnothing(config.sshflags)
-        multiplex = something(config.multiplex, false)
-        acquire(sem)
-        try
-            (s, bind_addr, forward) = connect_to_worker_with_tunnel(pubhost, bind_addr, port, user, sshflags, multiplex)
-            config.forward = forward
-        finally
-            release(sem)
-        end
-    else
-        (s, bind_addr) = connect_to_worker(bind_addr, port)
-    end
-
-    config.bind_addr = bind_addr
-
-    # write out a subset of the connect_at required for further worker-worker connection setups
-    config.connect_at = (bind_addr, port)
-
-    if config.io !== nothing
-        let pid = pid
-            redirect_worker_output(pid, notnothing(config.io))
-        end
-    end
-
-    (s, s)
-end
-
-function connect_w2w(pid::Int, config::WorkerConfig)
-    (rhost, rport) = notnothing(config.connect_at)::Tuple{String, Int}
-    config.host = rhost
-    config.port = rport
-    (s, bind_addr) = connect_to_worker(rhost, rport)
-    (s,s)
-end
-
-const client_port = Ref{UInt16}(0)
-
-function socket_reuse_port(iptype)
-    if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-        sock = TCPSocket(delay = false)
-
-        # Some systems (e.g. Linux) require the port to be bound before setting REUSEPORT
-        bind_early = Sys.islinux()
-
-        bind_early && bind_client_port(sock, iptype)
-        rc = ccall(:jl_tcp_reuseport, Int32, (Ptr{Cvoid},), sock.handle)
-        if rc < 0
-            close(sock)
-
-            # This is an issue only on systems with lots of client connections, hence delay the warning
-            nworkers() > 128 && @warn "Error trying to reuse client port number, falling back to regular socket" maxlog=1
-
-            # provide a clean new socket
-            return TCPSocket()
-        end
-        bind_early || bind_client_port(sock, iptype)
-        return sock
-    else
-        return TCPSocket()
-    end
-end
-
-function bind_client_port(sock::TCPSocket, iptype)
-    bind_host = iptype(0)
-    if Sockets.bind(sock, bind_host, client_port[])
-        _addr, port = getsockname(sock)
-        client_port[] = port
-    end
-    return sock
-end
-
-function connect_to_worker(host::AbstractString, port::Integer)
-    # Avoid calling getaddrinfo if possible - involves a DNS lookup
-    # host may be a stringified ipv4 / ipv6 address or a dns name
-    bind_addr = nothing
-    try
-        bind_addr = parse(IPAddr,host)
-    catch
-        bind_addr = getaddrinfo(host)
-    end
-
-    iptype = typeof(bind_addr)
-    sock = socket_reuse_port(iptype)
-    connect(sock, bind_addr, UInt16(port))
-
-    (sock, string(bind_addr))
-end
-
-
-function connect_to_worker_with_tunnel(host::AbstractString, bind_addr::AbstractString, port::Integer, tunnel_user::AbstractString, sshflags, multiplex)
-    localport = ssh_tunnel(tunnel_user, host, bind_addr, UInt16(port), sshflags, multiplex)
-    s = connect("localhost", localport)
-    forward = "$localport:$bind_addr:$port"
-    (s, bind_addr, forward)
-end
-
-
-function cancel_ssh_tunnel(config::WorkerConfig)
-    host = notnothing(config.host)
-    sshflags = notnothing(config.sshflags)
-    tunnel = something(config.tunnel, false)
-    multiplex = something(config.multiplex, false)
-    if tunnel && multiplex
-        forward = notnothing(config.forward)
-        run(`ssh $sshflags -O cancel -L $forward $host`)
-    end
-end
-
-
-"""
-    kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-
-Implemented by cluster managers.
-It is called on the master process, by [`rmprocs`](@ref).
-It should cause the remote worker specified by `pid` to exit.
-`kill(manager::ClusterManager.....)` executes a remote `exit()`
-on `pid`.
-"""
-function kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    nothing
-end
-
-function kill(manager::SSHManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    cancel_ssh_tunnel(config)
-    nothing
-end
-
-function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeout = 15, term_timeout = 15)
-    # First, try sending `exit()` to the remote over the usual control channels
-    remote_do(exit, pid)
-
-    timer_task = @async begin
-        sleep(exit_timeout)
-
-        # Check to see if our child exited, and if not, send an actual kill signal
-        if !process_exited(config.process)
-            @warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
-            kill(config.process, Base.SIGTERM)
-
-            sleep(term_timeout)
-            if !process_exited(config.process)
-                @warn("Worker $(pid) ignored SIGTERM, sending SIGKILL")
-                kill(config.process, Base.SIGKILL)
-            end
-        end
-    end
-    errormonitor(timer_task)
-    return nothing
-end
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
deleted file mode 100644
index fe3e5ab90b028..0000000000000
--- a/stdlib/Distributed/src/messages.jl
+++ /dev/null
@@ -1,215 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-abstract type AbstractMsg end
-
-
-## Wire format description
-#
-# Each message has three parts, which are written in order to the worker's stream.
-#  1) A header of type MsgHeader is serialized to the stream (via `serialize`).
-#  2) A message of type AbstractMsg is then serialized.
-#  3) Finally, a fixed boundary of 10 bytes is written.
-
-# Message header stored separately from body to be able to send back errors if
-# a deserialization error occurs when reading the message body.
-struct MsgHeader
-    response_oid::RRID
-    notify_oid::RRID
-    MsgHeader(respond_oid=RRID(0,0), notify_oid=RRID(0,0)) =
-        new(respond_oid, notify_oid)
-end
-
-# Special oid (0,0) uses to indicate a null ID.
-# Used instead of Union{Int, Nothing} to decrease wire size of header.
-null_id(id) =  id == RRID(0, 0)
-
-struct CallMsg{Mode} <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct CallWaitMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct RemoteDoMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct ResultMsg <: AbstractMsg
-    value::Any
-end
-
-
-# Worker initialization messages
-struct IdentifySocketMsg <: AbstractMsg
-    from_pid::Int
-end
-
-struct IdentifySocketAckMsg <: AbstractMsg
-end
-
-struct JoinPGRPMsg <: AbstractMsg
-    self_pid::Int
-    other_workers::Array
-    topology::Symbol
-    enable_threaded_blas::Bool
-    lazy::Bool
-end
-struct JoinCompleteMsg <: AbstractMsg
-    cpu_threads::Int
-    ospid::Int
-end
-
-# Avoiding serializing AbstractMsg containers results in a speedup
-# of approximately 10%. Can be removed once module Serialization
-# has been suitably improved.
-
-const msgtypes = Any[CallWaitMsg, IdentifySocketAckMsg, IdentifySocketMsg,
-                     JoinCompleteMsg, JoinPGRPMsg, RemoteDoMsg, ResultMsg,
-                     CallMsg{:call}, CallMsg{:call_fetch}]
-
-for (idx, tname) in enumerate(msgtypes)
-    exprs = Any[ :(serialize(s, o.$fld)) for fld in fieldnames(tname) ]
-    @eval function serialize_msg(s::AbstractSerializer, o::$tname)
-        write(s.io, UInt8($idx))
-        $(exprs...)
-        return nothing
-    end
-end
-
-let msg_cases = :(@assert false "Message type index ($idx) expected to be between 1:$($(length(msgtypes)))")
-    for i = length(msgtypes):-1:1
-        mti = msgtypes[i]
-        msg_cases = :(if idx == $i
-                          $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
-                      else
-                          $msg_cases
-                      end)
-    end
-    @eval function deserialize_msg(s::AbstractSerializer)
-        idx = read(s.io, UInt8)
-        return $msg_cases
-    end
-end
-
-function send_msg_unknown(s::IO, header, msg)
-    error("attempt to send to unknown socket")
-end
-
-function send_msg(s::IO, header, msg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-
-function send_msg_now(s::IO, header, msg::AbstractMsg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg_now(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-function send_msg_now(w::Worker, header, msg)
-    send_msg_(w, header, msg, true)
-end
-
-function send_msg(w::Worker, header, msg)
-    send_msg_(w, header, msg, false)
-end
-
-function flush_gc_msgs(w::Worker)
-    if !isdefined(w, :w_stream)
-        return
-    end
-    add_msgs = nothing
-    del_msgs = nothing
-    @lock w.msg_lock begin
-        if !w.gcflag # No work needed for this worker
-            return
-        end
-        @atomic w.gcflag = false
-        if !isempty(w.add_msgs)
-            add_msgs = w.add_msgs
-            w.add_msgs = Any[]
-        end
-
-        if !isempty(w.del_msgs)
-            del_msgs = w.del_msgs
-            w.del_msgs = Any[]
-        end
-    end
-    if add_msgs !== nothing
-        remote_do(add_clients, w, add_msgs)
-    end
-    if del_msgs !== nothing
-        remote_do(del_clients, w, del_msgs)
-    end
-    return
-end
-
-# Boundary inserted between messages on the wire, used for recovering
-# from deserialization errors. Picked arbitrarily.
-# A size of 10 bytes indicates ~ ~1e24 possible boundaries, so chance of collision
-# with message contents is negligible.
-const MSG_BOUNDARY = UInt8[0x79, 0x8e, 0x8e, 0xf5, 0x6e, 0x9b, 0x2e, 0x97, 0xd5, 0x7d]
-
-# Faster serialization/deserialization of MsgHeader and RRID
-function serialize_hdr_raw(io, hdr)
-    write(io, hdr.response_oid.whence, hdr.response_oid.id, hdr.notify_oid.whence, hdr.notify_oid.id)
-end
-
-function deserialize_hdr_raw(io)
-    data = read!(io, Ref{NTuple{4,Int}}())[]
-    return MsgHeader(RRID(data[1], data[2]), RRID(data[3], data[4]))
-end
-
-function send_msg_(w::Worker, header, msg, now::Bool)
-    check_worker_state(w)
-    if myid() != 1 && !isa(msg, IdentifySocketMsg) && !isa(msg, IdentifySocketAckMsg)
-        wait(w.initialized)
-    end
-    io = w.w_stream
-    lock(io)
-    try
-        reset_state(w.w_serializer)
-        serialize_hdr_raw(io, header)
-        invokelatest(serialize_msg, w.w_serializer, msg)  # io is wrapped in w_serializer
-        write(io, MSG_BOUNDARY)
-
-        if !now && w.gcflag
-            flush_gc_msgs(w)
-        else
-            flush(io)
-        end
-    finally
-        unlock(io)
-    end
-end
-
-function flush_gc_msgs()
-    try
-        for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
-                flush_gc_msgs(w)
-            end
-        end
-    catch e
-        bt = catch_backtrace()
-        @async showerror(stderr, e, bt)
-    end
-end
-
-function send_connection_hdr(w::Worker, cookie=true)
-    # For a connection initiated from the remote side to us, we only send the version,
-    # else when we initiate a connection we first send the cookie followed by our version.
-    # The remote side validates the cookie.
-    if cookie
-        write(w.w_stream, LPROC.cookie)
-    end
-    write(w.w_stream, rpad(VERSION_STRING, HDR_VERSION_LEN)[1:HDR_VERSION_LEN])
-end
diff --git a/stdlib/Distributed/src/pmap.jl b/stdlib/Distributed/src/pmap.jl
deleted file mode 100644
index f884d47fff98e..0000000000000
--- a/stdlib/Distributed/src/pmap.jl
+++ /dev/null
@@ -1,300 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct BatchProcessingError <: Exception
-    data
-    ex
-end
-
-"""
-    pgenerate([::AbstractWorkerPool], f, c...) -> iterator
-
-Apply `f` to each element of `c` in parallel using available workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Results are returned in order as they become available.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability)
-for details.
-"""
-function pgenerate(p::AbstractWorkerPool, f, c)
-    if length(p) == 0
-        return AsyncGenerator(f, c; ntasks=()->nworkers(p))
-    end
-    batches = batchsplit(c, min_batch_count = length(p) * 3)
-    return Iterators.flatten(AsyncGenerator(remote(p, b -> asyncmap(f, b)), batches))
-end
-pgenerate(p::AbstractWorkerPool, f, c1, c...) = pgenerate(p, a->f(a...), zip(c1, c...))
-pgenerate(f, c) = pgenerate(default_worker_pool(), f, c)
-pgenerate(f, c1, c...) = pgenerate(a->f(a...), zip(c1, c...))
-
-"""
-    pmap(f, [::AbstractWorkerPool], c...; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) -> collection
-
-Transform collection `c` by applying `f` to each element using available
-workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability) for details.
-
-If a worker pool is not specified, all available workers, i.e., the default worker pool
-is used.
-
-By default, `pmap` distributes the computation over all specified workers. To use only the
-local process and distribute over tasks, specify `distributed=false`.
-This is equivalent to using [`asyncmap`](@ref). For example,
-`pmap(f, c; distributed=false)` is equivalent to `asyncmap(f,c; ntasks=()->nworkers())`
-
-`pmap` can also use a mix of processes and tasks via the `batch_size` argument. For batch sizes
-greater than 1, the collection is processed in multiple batches, each of length `batch_size` or less.
-A batch is sent as a single request to a free worker, where a local [`asyncmap`](@ref) processes
-elements from the batch using multiple concurrent tasks.
-
-Any error stops `pmap` from processing the remainder of the collection. To override this behavior
-you can specify an error handling function via argument `on_error` which takes in a single argument, i.e.,
-the exception. The function can stop the processing by rethrowing the error, or, to continue, return any value
-which is then returned inline with the results to the caller.
-
-Consider the following two examples. The first one returns the exception object inline,
-the second a 0 in place of any exception:
-```julia-repl
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=identity)
-4-element Array{Any,1}:
- 1
-  ErrorException("foo")
- 3
-  ErrorException("foo")
-
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=ex->0)
-4-element Array{Int64,1}:
- 1
- 0
- 3
- 0
-```
-
-Errors can also be handled by retrying failed computations. Keyword arguments `retry_delays` and
-`retry_check` are passed through to [`retry`](@ref) as keyword arguments `delays` and `check`
-respectively. If batching is specified, and an entire batch fails, all items in
-the batch are retried.
-
-Note that if both `on_error` and `retry_delays` are specified, the `on_error` hook is called
-before retrying. If `on_error` does not throw (or rethrow) an exception, the element will not
-be retried.
-
-Example: On errors, retry `f` on an element a maximum of 3 times without any delay between retries.
-```julia
-pmap(f, c; retry_delays = zeros(3))
-```
-
-Example: Retry `f` only if the exception is not of type [`InexactError`](@ref), with exponentially increasing
-delays up to 3 times. Return a `NaN` in place for all `InexactError` occurrences.
-```julia
-pmap(f, c; on_error = e->(isa(e, InexactError) ? NaN : rethrow()), retry_delays = ExponentialBackOff(n = 3))
-```
-"""
-function pmap(f, p::AbstractWorkerPool, c; distributed=true, batch_size=1, on_error=nothing,
-                                           retry_delays=[], retry_check=nothing)
-    f_orig = f
-    # Don't do remote calls if there are no workers.
-    if (length(p) == 0) || (length(p) == 1 && fetch(p.channel) == myid())
-        distributed = false
-    end
-
-    # Don't do batching if not doing remote calls.
-    if !distributed
-        batch_size = 1
-    end
-
-    # If not batching, do simple remote call.
-    if batch_size == 1
-        if on_error !== nothing
-            f = wrap_on_error(f, on_error)
-        end
-
-        if distributed
-            f = remote(p, f)
-        end
-
-        if length(retry_delays) > 0
-            f = wrap_retry(f, retry_delays, retry_check)
-        end
-
-        return asyncmap(f, c; ntasks=()->nworkers(p))
-    else
-        # During batch processing, We need to ensure that if on_error is set, it is called
-        # for each element in error, and that we return as many elements as the original list.
-        # retry, if set, has to be called element wise and we will do a best-effort
-        # to ensure that we do not call mapped function on the same element more than length(retry_delays).
-        # This guarantee is not possible in case of worker death / network errors, wherein
-        # we will retry the entire batch on a new worker.
-
-        handle_errors = ((on_error !== nothing) || (length(retry_delays) > 0))
-
-        # Unlike the non-batch case, in batch mode, we trap all errors and the on_error hook (if present)
-        # is processed later in non-batch mode.
-        if handle_errors
-            f = wrap_on_error(f, (x,e)->BatchProcessingError(x,e); capture_data=true)
-        end
-
-        f = wrap_batch(f, p, handle_errors)
-        results = asyncmap(f, c; ntasks=()->nworkers(p), batch_size=batch_size)
-
-        # process errors if any.
-        if handle_errors
-            process_batch_errors!(p, f_orig, results, on_error, retry_delays, retry_check)
-        end
-
-        return results
-    end
-end
-
-pmap(f, p::AbstractWorkerPool, c1, c...; kwargs...) = pmap(a->f(a...), p, zip(c1, c...); kwargs...)
-pmap(f, c; kwargs...) = pmap(f, default_worker_pool(), c; kwargs...)
-pmap(f, c1, c...; kwargs...) = pmap(a->f(a...), zip(c1, c...); kwargs...)
-
-function wrap_on_error(f, on_error; capture_data=false)
-    return x -> begin
-        try
-            f(x)
-        catch e
-            if capture_data
-                on_error(x, e)
-            else
-                on_error(e)
-            end
-        end
-    end
-end
-
-function wrap_retry(f, retry_delays, retry_check)
-    retry(delays=retry_delays, check=retry_check) do x
-        try
-            f(x)
-        catch e
-            rethrow(extract_exception(e))
-        end
-    end
-end
-
-function wrap_batch(f, p, handle_errors)
-    f = asyncmap_batch(f)
-    return batch -> begin
-        try
-            remotecall_fetch(f, p, batch)
-        catch e
-            if handle_errors
-                return Any[BatchProcessingError(b, e) for b in batch]
-            else
-                rethrow()
-            end
-        end
-    end
-end
-
-asyncmap_batch(f) = batch -> asyncmap(x->f(x...), batch)
-extract_exception(e) = isa(e, RemoteException) ? e.captured.ex : e
-
-
-function process_batch_errors!(p, f, results, on_error, retry_delays, retry_check)
-    # Handle all the ones in error in another pmap, with batch size set to 1
-    reprocess = Tuple{Int,BatchProcessingError}[]
-    for (idx, v) in enumerate(results)
-        if isa(v, BatchProcessingError)
-            push!(reprocess, (idx,v))
-        end
-    end
-
-    if length(reprocess) > 0
-        errors = [x[2] for x in reprocess]
-        exceptions = Any[x.ex for x in errors]
-        state = iterate(retry_delays)
-        state !== nothing && (state = state[2])
-        error_processed = let state=state
-            if (length(retry_delays)::Int > 0) &&
-                    (retry_check === nothing || all([retry_check(state,ex)[2] for ex in exceptions]))
-                # BatchProcessingError.data is a tuple of original args
-                pmap(x->f(x...), p, Any[x.data for x in errors];
-                        on_error = on_error, retry_delays = collect(retry_delays)[2:end::Int], retry_check = retry_check)
-            elseif on_error !== nothing
-                map(on_error, exceptions)
-            else
-                throw(CompositeException(exceptions))
-            end
-        end
-
-        for (idx, v) in enumerate(error_processed)
-            results[reprocess[idx][1]] = v
-        end
-    end
-    nothing
-end
-
-"""
-    head_and_tail(c, n) -> head, tail
-
-Return `head`: the first `n` elements of `c`;
-and `tail`: an iterator over the remaining elements.
-
-```jldoctest
-julia> b, c = Distributed.head_and_tail(1:10, 3)
-([1, 2, 3], Base.Iterators.Rest{UnitRange{Int64}, Int64}(1:10, 3))
-
-julia> collect(c)
-7-element Vector{Int64}:
-  4
-  5
-  6
-  7
-  8
-  9
- 10
-```
-"""
-function head_and_tail(c, n)
-    head = Vector{eltype(c)}(undef, n)
-    n == 0 && return (head, c)
-    i = 1
-    y = iterate(c)
-    y === nothing && return (resize!(head, 0), ())
-    head[i] = y[1]
-    while i < n
-        y = iterate(c, y[2])
-        y === nothing && return (resize!(head, i), ())
-        i += 1
-        head[i] = y[1]
-    end
-    return head, Iterators.rest(c, y[2])
-end
-
-"""
-    batchsplit(c; min_batch_count=1, max_batch_size=100) -> iterator
-
-Split a collection into at least `min_batch_count` batches.
-
-Equivalent to `partition(c, max_batch_size)` when `length(c) >> max_batch_size`.
-"""
-function batchsplit(c; min_batch_count=1, max_batch_size=100)
-    if min_batch_count < 1
-        throw(ArgumentError("min_batch_count must be ≥ 1, got $min_batch_count"))
-    end
-
-    if max_batch_size < 1
-        throw(ArgumentError("max_batch_size must be ≥ 1, got $max_batch_size"))
-    end
-
-    # Split collection into batches, then peek at the first few batches
-    batches = Iterators.partition(c, max_batch_size)
-    head, tail = head_and_tail(batches, min_batch_count)
-
-    # If there are not enough batches, use a smaller batch size
-    if length(head) < min_batch_count
-        batch_size = max(1, div(sum(length, head), min_batch_count))
-        return Iterators.partition(collect(Iterators.flatten(head)), batch_size)
-    end
-
-    return Iterators.flatten((head, tail))
-end
diff --git a/stdlib/Distributed/src/precompile.jl b/stdlib/Distributed/src/precompile.jl
deleted file mode 100644
index 87380f627db7a..0000000000000
--- a/stdlib/Distributed/src/precompile.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
-precompile(Tuple{typeof(Distributed.procs)})
-precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
-# This is disabled because it doesn't give much benefit
-# and the code in Distributed is poorly typed causing many invalidations
-# TODO: Maybe reenable now that Distributed is not in sysimage.
-#=
-    precompile_script *= """
-    using Distributed
-    addprocs(2)
-    pmap(x->iseven(x) ? 1 : 0, 1:4)
-    @distributed (+) for i = 1:100 Int(rand(Bool)) end
-    """
-=#
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
deleted file mode 100644
index 7bbf7cfde943b..0000000000000
--- a/stdlib/Distributed/src/process_messages.jl
+++ /dev/null
@@ -1,386 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# data stored by the owner of a remote reference
-def_rv_channel() = Channel(1)
-mutable struct RemoteValue
-    c::AbstractChannel
-    clientset::BitSet # Set of workerids that have a reference to this channel.
-                      # Keeping ids instead of a count aids in cleaning up upon
-                      # a worker exit.
-
-    waitingfor::Int   # processor we need to hear from to fill this, or 0
-
-    synctake::Union{ReentrantLock, Nothing}  # A lock used to synchronize the
-                      # specific case of a local put! / remote take! on an
-                      # unbuffered store. github issue #29932
-
-    function RemoteValue(c)
-        c_is_buffered = false
-        try
-            c_is_buffered = isbuffered(c)
-        catch
-        end
-
-        if c_is_buffered
-            return new(c, BitSet(), 0, nothing)
-        else
-            return new(c, BitSet(), 0, ReentrantLock())
-        end
-    end
-end
-
-wait(rv::RemoteValue) = wait(rv.c)
-
-# A wrapper type to handle issue #29932 which requires locking / unlocking of
-# RemoteValue.synctake outside of lexical scope.
-struct SyncTake
-    v::Any
-    rv::RemoteValue
-end
-
-## core messages: do, call, fetch, wait, ref, put! ##
-struct RemoteException <: Exception
-    pid::Int
-    captured::CapturedException
-end
-
-"""
-    capture_exception(ex::RemoteException, bt)
-
-Returns `ex::RemoteException` which has already captured a backtrace (via it's [`CapturedException`](@ref) field `captured`).
-"""
-Base.capture_exception(ex::RemoteException, bt) = ex
-
-"""
-    RemoteException(captured)
-
-Exceptions on remote computations are captured and rethrown locally.  A `RemoteException`
-wraps the `pid` of the worker and a captured exception. A `CapturedException` captures the
-remote exception and a serializable form of the call stack when the exception was raised.
-"""
-RemoteException(captured) = RemoteException(myid(), captured)
-function showerror(io::IO, re::RemoteException)
-    (re.pid != myid()) && print(io, "On worker ", re.pid, ":\n")
-    showerror(io, re.captured)
-end
-
-function run_work_thunk(thunk::Function, print_error::Bool)
-    local result
-    try
-        result = thunk()
-    catch err
-        ce = CapturedException(err, catch_backtrace())
-        result = RemoteException(ce)
-        print_error && showerror(stderr, ce)
-    end
-    return result
-end
-function run_work_thunk(rv::RemoteValue, thunk)
-    put!(rv, run_work_thunk(thunk, false))
-    nothing
-end
-
-function schedule_call(rid, thunk)
-    return lock(client_refs) do
-        rv = RemoteValue(def_rv_channel())
-        (PGRP::ProcessGroup).refs[rid] = rv
-        push!(rv.clientset, rid.whence)
-        errormonitor(@async run_work_thunk(rv, thunk))
-        return rv
-    end
-end
-
-
-function deliver_result(sock::IO, msg, oid, value)
-    #print("$(myid()) sending result $oid\n")
-    if msg === :call_fetch || isa(value, RemoteException)
-        val = value
-    else
-        val = :OK
-    end
-    try
-        send_msg_now(sock, MsgHeader(oid), ResultMsg(val))
-    catch e
-        # terminate connection in case of serialization error
-        # otherwise the reading end would hang
-        @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-        wid = worker_id_from_socket(sock)
-        close(sock)
-        if myid()==1
-            rmprocs(wid)
-        elseif wid == 1
-            exit(1)
-        else
-            remote_do(rmprocs, 1, wid)
-        end
-    end
-end
-
-## message event handlers ##
-function process_messages(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool=true)
-    errormonitor(@async process_tcp_streams(r_stream, w_stream, incoming))
-end
-
-function process_tcp_streams(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool)
-    Sockets.nagle(r_stream, false)
-    Sockets.quickack(r_stream, true)
-    wait_connected(r_stream)
-    if r_stream != w_stream
-        Sockets.nagle(w_stream, false)
-        Sockets.quickack(w_stream, true)
-        wait_connected(w_stream)
-    end
-    message_handler_loop(r_stream, w_stream, incoming)
-end
-
-"""
-    process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-
-Called by cluster managers using custom transports. It should be called when the custom
-transport implementation receives the first message from a remote worker. The custom
-transport must manage a logical connection to the remote worker and provide two
-`IO` objects, one for incoming messages and the other for messages addressed to the
-remote worker.
-If `incoming` is `true`, the remote peer initiated the connection.
-Whichever of the pair initiates the connection sends the cluster cookie and its
-Julia version number to perform the authentication handshake.
-
-See also [`cluster_cookie`](@ref).
-"""
-function process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-    errormonitor(@async message_handler_loop(r_stream, w_stream, incoming))
-end
-
-function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
-    wpid=0          # the worker r_stream is connected to.
-    boundary = similar(MSG_BOUNDARY)
-    try
-        version = process_hdr(r_stream, incoming)
-        serializer = ClusterSerializer(r_stream)
-
-        # The first message will associate wpid with r_stream
-        header = deserialize_hdr_raw(r_stream)
-        msg = deserialize_msg(serializer)
-        handle_msg(msg, header, r_stream, w_stream, version)
-        wpid = worker_id_from_socket(r_stream)
-        @assert wpid > 0
-
-        readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-        while true
-            reset_state(serializer)
-            header = deserialize_hdr_raw(r_stream)
-            # println("header: ", header)
-
-            try
-                msg = invokelatest(deserialize_msg, serializer)
-            catch e
-                # Deserialization error; discard bytes in stream until boundary found
-                boundary_idx = 1
-                while true
-                    # This may throw an EOF error if the terminal boundary was not written
-                    # correctly, triggering the higher-scoped catch block below
-                    byte = read(r_stream, UInt8)
-                    if byte == MSG_BOUNDARY[boundary_idx]
-                        boundary_idx += 1
-                        if boundary_idx > length(MSG_BOUNDARY)
-                            break
-                        end
-                    else
-                        boundary_idx = 1
-                    end
-                end
-
-                # remotecalls only rethrow RemoteExceptions. Any other exception is treated as
-                # data to be returned. Wrap this exception in a RemoteException.
-                remote_err = RemoteException(myid(), CapturedException(e, catch_backtrace()))
-                # println("Deserialization error. ", remote_err)
-                if !null_id(header.response_oid)
-                    ref = lookup_ref(header.response_oid)
-                    put!(ref, remote_err)
-                end
-                if !null_id(header.notify_oid)
-                    deliver_result(w_stream, :call_fetch, header.notify_oid, remote_err)
-                end
-                continue
-            end
-            readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-            # println("got msg: ", typeof(msg))
-            handle_msg(msg, header, r_stream, w_stream, version)
-        end
-    catch e
-        # Check again as it may have been set in a message handler but not propagated to the calling block above
-        if wpid < 1
-            wpid = worker_id_from_socket(r_stream)
-        end
-
-        if wpid < 1
-            println(stderr, e, CapturedException(e, catch_backtrace()))
-            println(stderr, "Process($(myid())) - Unknown remote, closing connection.")
-        elseif !(wpid in map_del_wrkr)
-            werr = worker_from_id(wpid)
-            oldstate = werr.state
-            set_worker_state(werr, W_TERMINATED)
-
-            # If unhandleable error occurred talking to pid 1, exit
-            if wpid == 1
-                if isopen(w_stream)
-                    @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-                end
-                exit(1)
-            end
-
-            # Will treat any exception as death of node and cleanup
-            # since currently we do not have a mechanism for workers to reconnect
-            # to each other on unhandled errors
-            deregister_worker(wpid)
-        end
-
-        close(r_stream)
-        close(w_stream)
-
-        if (myid() == 1) && (wpid > 1)
-            if oldstate != W_TERMINATING
-                println(stderr, "Worker $wpid terminated.")
-                rethrow()
-            end
-        end
-
-        return nothing
-    end
-end
-
-function process_hdr(s, validate_cookie)
-    if validate_cookie
-        cookie = read(s, HDR_COOKIE_LEN)
-        if length(cookie) < HDR_COOKIE_LEN
-            error("Cookie read failed. Connection closed by peer.")
-        end
-
-        self_cookie = cluster_cookie()
-        for i in 1:HDR_COOKIE_LEN
-            if UInt8(self_cookie[i]) != cookie[i]
-                error("Process($(myid())) - Invalid connection credentials sent by remote.")
-            end
-        end
-    end
-
-    # When we have incompatible julia versions trying to connect to each other,
-    # and can be detected, raise an appropriate error.
-    # For now, just return the version.
-    version = read(s, HDR_VERSION_LEN)
-    if length(version) < HDR_VERSION_LEN
-        error("Version read failed. Connection closed by peer.")
-    end
-
-    return VersionNumber(strip(String(version)))
-end
-
-function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
-    schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-end
-function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        v = run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), false)
-        if isa(v, SyncTake)
-            try
-                deliver_result(w_stream, :call_fetch, header.notify_oid, v.v)
-            finally
-                unlock(v.rv.synctake)
-            end
-        else
-            deliver_result(w_stream, :call_fetch, header.notify_oid, v)
-        end
-        nothing
-    end)
-end
-
-function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        rv = schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-        deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
-        nothing
-    end)
-end
-
-function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    errormonitor(@async run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), true))
-end
-
-function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
-    put!(lookup_ref(header.response_oid), msg.value)
-end
-
-function handle_msg(msg::IdentifySocketMsg, header, r_stream, w_stream, version)
-    # register a new peer worker connection
-    w = Worker(msg.from_pid, r_stream, w_stream, cluster_manager; version=version)
-    send_connection_hdr(w, false)
-    send_msg_now(w, MsgHeader(), IdentifySocketAckMsg())
-    notify(w.initialized)
-end
-
-function handle_msg(msg::IdentifySocketAckMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    w.version = version
-end
-
-function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
-    LPROC.id = msg.self_pid
-    controller = Worker(1, r_stream, w_stream, cluster_manager; version=version)
-    notify(controller.initialized)
-    register_worker(LPROC)
-    topology(msg.topology)
-
-    if !msg.enable_threaded_blas
-        Base.disable_library_threading()
-    end
-
-    lazy = msg.lazy
-    PGRP.lazy = lazy
-
-    @sync for (connect_at, rpid) in msg.other_workers
-        wconfig = WorkerConfig()
-        wconfig.connect_at = connect_at
-
-        let rpid=rpid, wconfig=wconfig
-            if lazy
-                # The constructor registers the object with a global registry.
-                Worker(rpid, ()->connect_to_peer(cluster_manager, rpid, wconfig))
-            else
-                @async connect_to_peer(cluster_manager, rpid, wconfig)
-            end
-        end
-    end
-
-    send_connection_hdr(controller, false)
-    send_msg_now(controller, MsgHeader(RRID(0,0), header.notify_oid), JoinCompleteMsg(Sys.CPU_THREADS, getpid()))
-end
-
-function connect_to_peer(manager::ClusterManager, rpid::Int, wconfig::WorkerConfig)
-    try
-        (r_s, w_s) = connect(manager, rpid, wconfig)
-        w = Worker(rpid, r_s, w_s, manager; config=wconfig)
-        process_messages(w.r_stream, w.w_stream, false)
-        send_connection_hdr(w, true)
-        send_msg_now(w, MsgHeader(), IdentifySocketMsg(myid()))
-        notify(w.initialized)
-    catch e
-        @error "Error on $(myid()) while connecting to peer $rpid, exiting" exception=e,catch_backtrace()
-        exit(1)
-    end
-end
-
-function handle_msg(msg::JoinCompleteMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    environ = something(w.config.environ, Dict())
-    environ[:cpu_threads] = msg.cpu_threads
-    w.config.environ = environ
-    w.config.ospid = msg.ospid
-    w.version = version
-
-    ntfy_channel = lookup_ref(header.notify_oid)
-    put!(ntfy_channel, w.id)
-
-    push!(default_worker_pool(), w.id)
-end
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
deleted file mode 100644
index 0b1143d855510..0000000000000
--- a/stdlib/Distributed/src/remotecall.jl
+++ /dev/null
@@ -1,800 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import Base: eltype
-
-abstract type AbstractRemoteRef end
-
-"""
-    client_refs
-
-Tracks whether a particular `AbstractRemoteRef`
-(identified by its RRID) exists on this worker.
-
-The `client_refs` lock is also used to synchronize access to `.refs` and associated `clientset` state.
-"""
-const client_refs = WeakKeyDict{AbstractRemoteRef, Nothing}() # used as a WeakKeySet
-
-"""
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing)
-
-A `Future` is a placeholder for a single computation
-of unknown termination status and time.
-For multiple potential computations, see `RemoteChannel`.
-See `remoteref_id` for identifying an `AbstractRemoteRef`.
-"""
-mutable struct Future <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-    lock::ReentrantLock
-    @atomic v::Union{Some{Any}, Nothing}
-
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing) =
-        (r = new(w,rrid.whence,rrid.id,ReentrantLock(),v); return test_existing_ref(r))
-
-    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],ReentrantLock(),t[4])  # Useful for creating dummy, zeroed-out instances
-end
-
-"""
-    RemoteChannel(pid::Integer=myid())
-
-Make a reference to a `Channel{Any}(1)` on process `pid`.
-The default `pid` is the current process.
-
-    RemoteChannel(f::Function, pid::Integer=myid())
-
-Create references to remote channels of a specific size and type. `f` is a function that
-when executed on `pid` must return an implementation of an `AbstractChannel`.
-
-For example, `RemoteChannel(()->Channel{Int}(10), pid)`, will return a reference to a
-channel of type `Int` and size 10 on `pid`.
-
-The default `pid` is the current process.
-"""
-mutable struct RemoteChannel{T<:AbstractChannel} <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-
-    function RemoteChannel{T}(w::Int, rrid::RRID) where T<:AbstractChannel
-        r = new(w, rrid.whence, rrid.id)
-        return test_existing_ref(r)
-    end
-
-    function RemoteChannel{T}(t::Tuple) where T<:AbstractChannel
-        return new(t[1],t[2],t[3])
-    end
-end
-
-function test_existing_ref(r::AbstractRemoteRef)
-    found = getkey(client_refs, r, nothing)
-    if found !== nothing
-        @assert r.where > 0
-        if isa(r, Future)
-            # this is only for copying the reference from Future to RemoteRef (just created)
-            fv_cache = @atomic :acquire found.v
-            rv_cache = @atomic :monotonic r.v
-            if fv_cache === nothing && rv_cache !== nothing
-                # we have recd the value from another source, probably a deserialized ref, send a del_client message
-                send_del_client(r)
-                @lock found.lock begin
-                    @atomicreplace found.v nothing => rv_cache
-                end
-            end
-        end
-        return found::typeof(r)
-    end
-
-    client_refs[r] = nothing
-    finalizer(finalize_ref, r)
-    return r
-end
-
-function finalize_ref(r::AbstractRemoteRef)
-    if r.where > 0 # Handle the case of the finalizer having been called manually
-        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
-            try
-                delete!(client_refs.ht, r) # direct removal avoiding locks
-                if isa(r, RemoteChannel)
-                    send_del_client_no_lock(r)
-                else
-                    # send_del_client only if the reference has not been set
-                    v_cache = @atomic :monotonic r.v
-                    v_cache === nothing && send_del_client_no_lock(r)
-                    @atomic :monotonic r.v = nothing
-                end
-                r.where = 0
-            finally
-                unlock(client_refs.lock)
-            end
-        else
-            finalizer(finalize_ref, r)
-            return nothing
-        end
-    end
-    nothing
-end
-
-"""
-    Future(pid::Integer=myid())
-
-Create a `Future` on process `pid`.
-The default `pid` is the current process.
-"""
-Future(pid::Integer=myid()) = Future(pid, RRID())
-Future(w::LocalProcess) = Future(w.id)
-Future(w::Worker) = Future(w.id)
-
-RemoteChannel(pid::Integer=myid()) = RemoteChannel{Channel{Any}}(pid, RRID())
-
-function RemoteChannel(f::Function, pid::Integer=myid())
-    remotecall_fetch(pid, f, RRID()) do f, rrid
-        rv=lookup_ref(rrid, f)
-        RemoteChannel{typeof(rv.c)}(myid(), rrid)
-    end
-end
-
-Base.eltype(::Type{RemoteChannel{T}}) where {T} = eltype(T)
-
-hash(r::AbstractRemoteRef, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::AbstractRemoteRef, s::AbstractRemoteRef) = (r.whence==s.whence && r.id==s.id)
-
-"""
-    remoteref_id(r::AbstractRemoteRef) -> RRID
-
-`Future`s and `RemoteChannel`s are identified by fields:
-
-* `where` - refers to the node where the underlying object/storage
-  referred to by the reference actually exists.
-
-* `whence` - refers to the node the remote reference was created from.
-  Note that this is different from the node where the underlying object
-  referred to actually exists. For example calling `RemoteChannel(2)`
-  from the master process would result in a `where` value of 2 and
-  a `whence` value of 1.
-
-* `id` is unique across all references created from the worker specified by `whence`.
-
-Taken together,  `whence` and `id` uniquely identify a reference across all workers.
-
-`remoteref_id` is a low-level API which returns a `RRID`
-object that wraps `whence` and `id` values of a remote reference.
-"""
-remoteref_id(r::AbstractRemoteRef) = RRID(r.whence, r.id)
-
-"""
-    channel_from_id(id) -> c
-
-A low-level API which returns the backing `AbstractChannel` for an `id` returned by
-[`remoteref_id`](@ref).
-The call is valid only on the node where the backing channel exists.
-"""
-function channel_from_id(id)
-    rv = lock(client_refs) do
-        return get(PGRP.refs, id, false)
-    end
-    if rv === false
-        throw(ErrorException("Local instance of remote reference not found"))
-    end
-    return rv.c
-end
-
-lookup_ref(rrid::RRID, f=def_rv_channel) = lookup_ref(PGRP, rrid, f)
-function lookup_ref(pg, rrid, f)
-    return lock(client_refs) do
-        rv = get(pg.refs, rrid, false)
-        if rv === false
-            # first we've heard of this ref
-            rv = RemoteValue(invokelatest(f))
-            pg.refs[rrid] = rv
-            push!(rv.clientset, rrid.whence)
-        end
-        return rv
-    end::RemoteValue
-end
-
-"""
-    isready(rr::Future)
-
-Determine whether a [`Future`](@ref) has a value stored to it.
-
-If the argument `Future` is owned by a different node, this call will block to wait for the answer.
-It is recommended to wait for `rr` in a separate task instead
-or to use a local [`Channel`](@ref) as a proxy:
-
-```julia
-p = 1
-f = Future(p)
-errormonitor(@async put!(f, remotecall_fetch(long_computation, p)))
-isready(f)  # will not block
-```
-"""
-function isready(rr::Future)
-    v_cache = @atomic rr.v
-    v_cache === nothing || return true
-
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c), rr.where, rid)
-    end
-end
-
-"""
-    isready(rr::RemoteChannel, args...)
-
-Determine whether a [`RemoteChannel`](@ref) has a value stored to it.
-Note that this function can cause race conditions, since by the
-time you receive its result it may no longer be true. However,
-it can be safely used on a [`Future`](@ref) since they are assigned only once.
-"""
-function isready(rr::RemoteChannel, args...)
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c, args...)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c, args...), rr.where, rid)
-    end
-end
-
-del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
-
-del_client(id, client) = del_client(PGRP, id, client)
-function del_client(pg, id, client)
-    lock(client_refs) do
-        _del_client(pg, id, client)
-    end
-    nothing
-end
-
-function _del_client(pg, id, client)
-    rv = get(pg.refs, id, false)
-    if rv !== false
-        delete!(rv.clientset, client)
-        if isempty(rv.clientset)
-            delete!(pg.refs, id)
-            #print("$(myid()) collected $id\n")
-        end
-    end
-    nothing
-end
-
-function del_clients(pairs::Vector)
-    for p in pairs
-        del_client(p[1], p[2])
-    end
-end
-
-# The task below is coalescing the `flush_gc_msgs` call
-# across multiple producers, see `send_del_client`,
-# and `send_add_client`.
-# XXX: Is this worth the additional complexity?
-#      `flush_gc_msgs` has to iterate over all connected workers.
-const any_gc_flag = Threads.Condition()
-function start_gc_msgs_task()
-    errormonitor(
-        Threads.@spawn begin
-            while true
-                lock(any_gc_flag) do
-                    # this might miss events
-                    wait(any_gc_flag)
-                end
-                # Use invokelatest() so that custom message transport streams
-                # for workers can be defined in a newer world age than the Task
-                # which runs the loop here.
-                invokelatest(flush_gc_msgs) # handles throws internally
-            end
-        end
-    )
-end
-
-# Function can be called within a finalizer
-function send_del_client(rr)
-    if rr.where == myid()
-        del_client(rr)
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function send_del_client_no_lock(rr)
-    # for gc context to avoid yields
-    if rr.where == myid()
-        _del_client(PGRP, remoteref_id(rr), myid())
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function publish_del_msg!(w::Worker, msg)
-    lock(w.msg_lock) do
-        push!(w.del_msgs, msg)
-        @atomic w.gcflag = true
-    end
-    lock(any_gc_flag) do
-        notify(any_gc_flag)
-    end
-end
-
-function process_worker(rr)
-    w = worker_from_id(rr.where)::Worker
-    msg = (remoteref_id(rr), myid())
-
-    # Needs to acquire a lock on the del_msg queue
-    T = Threads.@spawn begin
-        publish_del_msg!($w, $msg)
-    end
-    Base.errormonitor(T)
-
-    return
-end
-
-function add_client(id, client)
-    lock(client_refs) do
-        rv = lookup_ref(id)
-        push!(rv.clientset, client)
-    end
-    nothing
-end
-
-function add_clients(pairs::Vector)
-    for p in pairs
-        add_client(p[1], p[2]...)
-    end
-end
-
-function send_add_client(rr::AbstractRemoteRef, i)
-    if rr.where == myid()
-        add_client(remoteref_id(rr), i)
-    elseif (i != rr.where) && id_in_procs(rr.where)
-        # don't need to send add_client if the message is already going
-        # to the processor that owns the remote ref. it will add_client
-        # itself inside deserialize().
-        w = worker_from_id(rr.where)
-        lock(w.msg_lock) do
-            push!(w.add_msgs, (remoteref_id(rr), i))
-            @atomic w.gcflag = true
-        end
-        lock(any_gc_flag) do
-            notify(any_gc_flag)
-        end
-    end
-end
-
-channel_type(rr::RemoteChannel{T}) where {T} = T
-
-function serialize(s::ClusterSerializer, f::Future)
-    v_cache = @atomic f.v
-    if v_cache === nothing
-        p = worker_id_from_socket(s.io)
-        (p !== f.where) && send_add_client(f, p)
-    end
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, f)
-end
-
-function serialize(s::ClusterSerializer, rr::RemoteChannel)
-    p = worker_id_from_socket(s.io)
-    (p !== rr.where) && send_add_client(rr, p)
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, rr)
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:Future})
-    fc = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t) # deserialized copy
-    f2 = Future(fc.where, RRID(fc.whence, fc.id), fc.v) # ctor adds to client_refs table
-
-    # 1) send_add_client() is not executed when the ref is being serialized
-    #    to where it exists, hence do it here.
-    # 2) If we have received a 'fetch'ed Future or if the Future ctor found an
-    #    already 'fetch'ed instance in client_refs (Issue #25847), we should not
-    #    track it in the backing RemoteValue store.
-    f2v_cache = @atomic f2.v
-    if f2.where == myid() && f2v_cache === nothing
-        add_client(remoteref_id(f2), myid())
-    end
-    f2
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:RemoteChannel})
-    rr = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t)
-    if rr.where == myid()
-        # send_add_client() is not executed when the ref is being
-        # serialized to where it exists
-        add_client(remoteref_id(rr), myid())
-    end
-    # call ctor to make sure this rr gets added to the client_refs table
-    RemoteChannel{channel_type(rr)}(rr.where, RRID(rr.whence, rr.id))
-end
-
-# Future and RemoteChannel are serializable only in a running cluster.
-# Serialize zeroed-out values to non ClusterSerializer objects
-function serialize(s::AbstractSerializer, ::Future)
-    zero_fut = Future((0,0,0,nothing))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_fut)
-end
-
-function serialize(s::AbstractSerializer, ::RemoteChannel)
-    zero_rc = RemoteChannel{Channel{Any}}((0,0,0))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_rc)
-end
-
-
-# make a thunk to call f on args in a way that simulates what would happen if
-# the function were sent elsewhere
-function local_remotecall_thunk(f, args, kwargs)
-    return ()->invokelatest(f, args...; kwargs...)
-end
-
-function remotecall(f, w::LocalProcess, args...; kwargs...)
-    rr = Future(w)
-    schedule_call(remoteref_id(rr), local_remotecall_thunk(f, args, kwargs))
-    return rr
-end
-
-function remotecall(f, w::Worker, args...; kwargs...)
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr)), CallMsg{:call}(f, args, kwargs))
-    return rr
-end
-
-"""
-    remotecall(f, id::Integer, args...; kwargs...) -> Future
-
-Call a function `f` asynchronously on the given arguments on the specified process.
-Return a [`Future`](@ref).
-Keyword arguments, if any, are passed through to `f`.
-"""
-remotecall(f, id::Integer, args...; kwargs...) = remotecall(f, worker_from_id(id), args...; kwargs...)
-
-function remotecall_fetch(f, w::LocalProcess, args...; kwargs...)
-    v=run_work_thunk(local_remotecall_thunk(f,args, kwargs), false)
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-function remotecall_fetch(f, w::Worker, args...; kwargs...)
-    # can be weak, because the program will have no way to refer to the Ref
-    # itself, it only gets the result.
-    oid = RRID()
-    rv = lookup_ref(oid)
-    rv.waitingfor = w.id
-    send_msg(w, MsgHeader(RRID(0,0), oid), CallMsg{:call_fetch}(f, args, kwargs))
-    v = take!(rv)
-    lock(client_refs) do
-        delete!(PGRP.refs, oid)
-    end
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-"""
-    remotecall_fetch(f, id::Integer, args...; kwargs...)
-
-Perform `fetch(remotecall(...))` in one message.
-Keyword arguments, if any, are passed through to `f`.
-Any remote exceptions are captured in a
-[`RemoteException`](@ref) and thrown.
-
-See also [`fetch`](@ref) and [`remotecall`](@ref).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> remotecall_fetch(sqrt, 2, 4)
-2.0
-
-julia> remotecall_fetch(sqrt, 2, -4)
-ERROR: On worker 2:
-DomainError with -4.0:
-sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
-...
-```
-"""
-remotecall_fetch(f, id::Integer, args...; kwargs...) =
-    remotecall_fetch(f, worker_from_id(id), args...; kwargs...)
-
-remotecall_wait(f, w::LocalProcess, args...; kwargs...) = wait(remotecall(f, w, args...; kwargs...))
-
-function remotecall_wait(f, w::Worker, args...; kwargs...)
-    prid = RRID()
-    rv = lookup_ref(prid)
-    rv.waitingfor = w.id
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr), prid), CallWaitMsg(f, args, kwargs))
-    v = fetch(rv.c)
-    lock(client_refs) do
-        delete!(PGRP.refs, prid)
-    end
-    isa(v, RemoteException) && throw(v)
-    return rr
-end
-
-"""
-    remotecall_wait(f, id::Integer, args...; kwargs...)
-
-Perform a faster `wait(remotecall(...))` in one message on the `Worker` specified by worker id `id`.
-Keyword arguments, if any, are passed through to `f`.
-
-See also [`wait`](@ref) and [`remotecall`](@ref).
-"""
-remotecall_wait(f, id::Integer, args...; kwargs...) =
-    remotecall_wait(f, worker_from_id(id), args...; kwargs...)
-
-function remote_do(f, w::LocalProcess, args...; kwargs...)
-    # the LocalProcess version just performs in local memory what a worker
-    # does when it gets a :do message.
-    # same for other messages on LocalProcess.
-    thk = local_remotecall_thunk(f, args, kwargs)
-    schedule(Task(thk))
-    nothing
-end
-
-function remote_do(f, w::Worker, args...; kwargs...)
-    send_msg(w, MsgHeader(), RemoteDoMsg(f, args, kwargs))
-    nothing
-end
-
-
-"""
-    remote_do(f, id::Integer, args...; kwargs...) -> nothing
-
-Executes `f` on worker `id` asynchronously.
-Unlike [`remotecall`](@ref), it does not store the
-result of computation, nor is there a way to wait for its completion.
-
-A successful invocation indicates that the request has been accepted for execution on
-the remote node.
-
-While consecutive `remotecall`s to the same worker are serialized in the order they are
-invoked, the order of executions on the remote worker is undetermined. For example,
-`remote_do(f1, 2); remotecall(f2, 2); remote_do(f3, 2)` will serialize the call
-to `f1`, followed by `f2` and `f3` in that order. However, it is not guaranteed that `f1`
-is executed before `f3` on worker 2.
-
-Any exceptions thrown by `f` are printed to [`stderr`](@ref) on the remote worker.
-
-Keyword arguments, if any, are passed through to `f`.
-"""
-remote_do(f, id::Integer, args...; kwargs...) = remote_do(f, worker_from_id(id), args...; kwargs...)
-
-# have the owner of rr call f on it
-function call_on_owner(f, rr::AbstractRemoteRef, args...)
-    rid = remoteref_id(rr)
-    if rr.where == myid()
-        f(rid, args...)
-    else
-        remotecall_fetch(f, rr.where, rid, args...)
-    end
-end
-
-function wait_ref(rid, caller, args...)
-    v = fetch_ref(rid, args...)
-    if isa(v, RemoteException)
-        if myid() == caller
-            throw(v)
-        else
-            return v
-        end
-    end
-    nothing
-end
-
-"""
-    wait(r::Future)
-
-Wait for a value to become available for the specified [`Future`](@ref).
-"""
-wait(r::Future) = (v_cache = @atomic r.v; v_cache !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
-
-"""
-    wait(r::RemoteChannel, args...)
-
-Wait for a value to become available on the specified [`RemoteChannel`](@ref).
-"""
-wait(r::RemoteChannel, args...) = (call_on_owner(wait_ref, r, myid(), args...); r)
-
-"""
-    fetch(x::Future)
-
-Wait for and get the value of a [`Future`](@ref). The fetched value is cached locally.
-Further calls to `fetch` on the same reference return the cached value. If the remote value
-is an exception, throws a [`RemoteException`](@ref) which captures the remote exception and backtrace.
-"""
-function fetch(r::Future)
-    v_cache = @atomic r.v
-    v_cache !== nothing && return something(v_cache)
-
-    if r.where == myid()
-        rv, v_cache = @lock r.lock begin
-            v_cache = @atomic :monotonic r.v
-            rv = v_cache === nothing ? lookup_ref(remoteref_id(r)) : nothing
-            rv, v_cache
-        end
-
-        if v_cache !== nothing
-            return something(v_cache)
-        else
-            v_local = fetch(rv.c)
-        end
-    else
-        v_local = call_on_owner(fetch_ref, r)
-    end
-
-    v_cache = @atomic r.v
-
-    if v_cache === nothing # call_on_owner case
-        v_old, status = @lock r.lock begin
-            @atomicreplace r.v nothing => Some(v_local)
-        end
-        # status == true - when value obtained through call_on_owner
-        # status == false - any other situation: atomicreplace fails, because by the time the lock is obtained cache will be populated
-        # why? local put! performs caching and putting into channel under r.lock
-
-        # for local put! use the cached value, for call_on_owner cases just take the v_local as it was just cached in r.v
-
-        # remote calls getting the value from `call_on_owner` used to return the value directly without wrapping it in `Some(x)`
-        # so we're doing the same thing here
-        if status
-            send_del_client(r)
-            return v_local
-        else # this `v_cache` is returned at the end of the function
-            v_cache = v_old
-        end
-    end
-
-    send_del_client(r)
-    something(v_cache)
-end
-
-fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
-
-"""
-    fetch(c::RemoteChannel)
-
-Wait for and get a value from a [`RemoteChannel`](@ref). Exceptions raised are the
-same as for a [`Future`](@ref). Does not remove the item fetched.
-"""
-fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)::eltype(r)
-
-isready(rv::RemoteValue, args...) = isready(rv.c, args...)
-
-"""
-    put!(rr::Future, v)
-
-Store a value to a [`Future`](@ref) `rr`.
-`Future`s are write-once remote references.
-A `put!` on an already set `Future` throws an `Exception`.
-All asynchronous remote calls return `Future`s and set the
-value to the return value of the call upon completion.
-"""
-function put!(r::Future, v)
-    if r.where == myid()
-        rid = remoteref_id(r)
-        rv = lookup_ref(rid)
-        isready(rv) && error("Future can be set only once")
-        @lock r.lock begin
-            put!(rv, v) # this notifies the tasks waiting on the channel in fetch
-            set_future_cache(r, v) # set the cache before leaving the lock, so that the notified tasks already see it cached
-        end
-        del_client(rid, myid())
-    else
-        @lock r.lock begin # same idea as above if there were any local tasks fetching on this Future
-            call_on_owner(put_future, r, v, myid())
-            set_future_cache(r, v)
-        end
-    end
-    r
-end
-
-function set_future_cache(r::Future, v)
-    _, ok = @atomicreplace r.v nothing => Some(v)
-    ok || error("internal consistency error detected for Future")
-end
-
-function put_future(rid, v, caller)
-    rv = lookup_ref(rid)
-    isready(rv) && error("Future can be set only once")
-    put!(rv, v)
-    # The caller has the value and hence can be removed from the remote store.
-    del_client(rid, caller)
-    nothing
-end
-
-
-put!(rv::RemoteValue, args...) = put!(rv.c, args...)
-function put_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    put!(rv, args...)
-    if myid() == caller && rv.synctake !== nothing
-        # Wait till a "taken" value is serialized out - github issue #29932
-        lock(rv.synctake)
-        unlock(rv.synctake)
-    end
-    nothing
-end
-
-"""
-    put!(rr::RemoteChannel, args...)
-
-Store a set of values to the [`RemoteChannel`](@ref).
-If the channel is full, blocks until space is available.
-Return the first argument.
-"""
-put!(rr::RemoteChannel, args...) = (call_on_owner(put_ref, rr, myid(), args...); rr)
-
-# take! is not supported on Future
-
-take!(rv::RemoteValue, args...) = take!(rv.c, args...)
-function take_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    synctake = false
-    if myid() != caller && rv.synctake !== nothing
-        # special handling for local put! / remote take! on unbuffered channel
-        # github issue #29932
-        synctake = true
-        lock(rv.synctake)
-    end
-
-    v = try
-        take!(rv, args...)
-    catch e
-        # avoid unmatched unlock when exception occurs
-        # github issue #33972
-        synctake && unlock(rv.synctake)
-        rethrow(e)
-    end
-
-    isa(v, RemoteException) && (myid() == caller) && throw(v)
-
-    if synctake
-        return SyncTake(v, rv)
-    else
-        return v
-    end
-end
-
-"""
-    take!(rr::RemoteChannel, args...)
-
-Fetch value(s) from a [`RemoteChannel`](@ref) `rr`,
-removing the value(s) in the process.
-"""
-take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)::eltype(rr)
-
-# close and isopen are not supported on Future
-
-close_ref(rid) = (close(lookup_ref(rid).c); nothing)
-close(rr::RemoteChannel) = call_on_owner(close_ref, rr)
-
-isopen_ref(rid) = isopen(lookup_ref(rid).c)
-isopen(rr::RemoteChannel) = call_on_owner(isopen_ref, rr)
-
-getindex(r::RemoteChannel) = fetch(r)
-getindex(r::Future) = fetch(r)
-
-getindex(r::Future, args...) = getindex(fetch(r), args...)
-function getindex(r::RemoteChannel, args...)
-    if r.where == myid()
-        return getindex(fetch(r), args...)
-    end
-    return remotecall_fetch(getindex, r.where, r, args...)
-end
-
-function iterate(c::RemoteChannel, state=nothing)
-    if isopen(c) || isready(c)
-        try
-            return (take!(c), nothing)
-        catch e
-            if isa(e, InvalidStateException) ||
-                (isa(e, RemoteException) &&
-                isa(e.captured.ex, InvalidStateException) &&
-                e.captured.ex.state === :closed)
-                return nothing
-            end
-            rethrow()
-        end
-    else
-        return nothing
-    end
-end
-
-IteratorSize(::Type{<:RemoteChannel}) = SizeUnknown()
diff --git a/stdlib/Distributed/src/workerpool.jl b/stdlib/Distributed/src/workerpool.jl
deleted file mode 100644
index 5dd1c07044e09..0000000000000
--- a/stdlib/Distributed/src/workerpool.jl
+++ /dev/null
@@ -1,370 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    AbstractWorkerPool
-
-Supertype for worker pools such as [`WorkerPool`](@ref) and [`CachingPool`](@ref).
-An `AbstractWorkerPool` should implement:
-  - [`push!`](@ref) - add a new worker to the overall pool (available + busy)
-  - [`put!`](@ref) - put back a worker to the available pool
-  - [`take!`](@ref) - take a worker from the available pool (to be used for remote function execution)
-  - [`length`](@ref) - number of workers available in the overall pool
-  - [`isready`](@ref) - return false if a `take!` on the pool would block, else true
-
-The default implementations of the above (on a `AbstractWorkerPool`) require fields
-  - `channel::Channel{Int}`
-  - `workers::Set{Int}`
-where `channel` contains free worker pids and `workers` is the set of all workers associated with this pool.
-"""
-abstract type AbstractWorkerPool end
-
-mutable struct WorkerPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-    ref::RemoteChannel
-
-    WorkerPool(c::Channel, ref::RemoteChannel) = new(c, Set{Int}(), ref)
-end
-
-function WorkerPool()
-    wp = WorkerPool(Channel{Int}(typemax(Int)), RemoteChannel())
-    put!(wp.ref, WeakRef(wp))
-    wp
-end
-
-"""
-    WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-
-Create a `WorkerPool` from a vector or range of worker ids.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> WorkerPool([2, 3])
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([2, 3]), RemoteChannel{Channel{Any}}(1, 1, 6))
-
-julia> WorkerPool(2:4)
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 7))
-```
-"""
-function WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-    pool = WorkerPool()
-    foreach(w->push!(pool, w), workers)
-    return pool
-end
-
-# On workers where this pool has been serialized to, instantiate with a dummy local channel.
-WorkerPool(ref::RemoteChannel) = WorkerPool(Channel{Int}(1), ref)
-
-function serialize(S::AbstractSerializer, pool::WorkerPool)
-    # Allow accessing a worker pool from other processors. When serialized,
-    # initialize the `ref` to point to self and only send the ref.
-    # Other workers will forward all put!, take!, calls to the process owning
-    # the ref (and hence the pool).
-    Serialization.serialize_type(S, typeof(pool))
-    serialize(S, pool.ref)
-end
-
-deserialize(S::AbstractSerializer, t::Type{T}) where {T<:WorkerPool} = T(deserialize(S))
-
-wp_local_push!(pool::AbstractWorkerPool, w::Int) = (push!(pool.workers, w); put!(pool.channel, w); pool)
-wp_local_length(pool::AbstractWorkerPool) = length(pool.workers)
-wp_local_isready(pool::AbstractWorkerPool) = isready(pool.channel)
-
-function wp_local_put!(pool::AbstractWorkerPool, w::Int)
-    # In case of default_worker_pool, the master is implicitly considered a worker, i.e.,
-    # it is not present in pool.workers.
-    # Confirm the that the worker is part of a pool before making it available.
-    w in pool.workers && put!(pool.channel, w)
-    w
-end
-
-function wp_local_workers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return [1]
-    else
-        return collect(pool.workers)
-    end
-end
-
-function wp_local_nworkers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return 1
-    else
-        return length(pool.workers)
-    end
-end
-
-function wp_local_take!(pool::AbstractWorkerPool)
-    # Find an active worker
-    worker = 0
-    while true
-        if length(pool) == 0
-            if pool === default_worker_pool()
-                # No workers, the master process is used as a worker
-                worker = 1
-                break
-            else
-                throw(ErrorException("No active worker available in pool"))
-            end
-        end
-
-        worker = take!(pool.channel)
-        if id_in_procs(worker)
-            break
-        else
-            delete!(pool.workers, worker) # Remove invalid worker from pool
-        end
-    end
-    return worker
-end
-
-function remotecall_pool(rc_f, f, pool::AbstractWorkerPool, args...; kwargs...)
-    worker = take!(pool)
-    try
-        rc_f(f, worker, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
-
-# Check if pool is local or remote and forward calls if required.
-# NOTE: remotecall_fetch does it automatically, but this will be more efficient as
-# it avoids the overhead associated with a local remotecall.
-
-for (func, rt) = ((:length, Int), (:isready, Bool), (:workers, Vector{Int}), (:nworkers, Int), (:take!, Int))
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool)
-            if pool.ref.where != myid()
-                return remotecall_fetch(ref->($func_local)(fetch(ref).value), pool.ref.where, pool.ref)::$rt
-            else
-                return ($func_local)(pool)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool) = ($func_local)(pool)
-    end
-end
-
-for func = (:push!, :put!)
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool, w::Int)
-            if pool.ref.where != myid()
-                return remotecall_fetch((ref, w)->($func_local)(fetch(ref).value, w), pool.ref.where, pool.ref, w)
-            else
-                return ($func_local)(pool, w)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool, w::Int) = ($func_local)(pool, w)
-    end
-end
-
-
-"""
-    remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall(f, pid, ....)`. Wait for and take a free worker from `pool` and perform a `remotecall` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall(maximum, wp, A)
-Future(2, 1, 6, nothing)
-```
-In this example, the task ran on pid 2, called from pid 1.
-"""
-remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall_wait(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remotecall_wait` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall_wait(maximum, wp, A)
-Future(3, 1, 9, nothing)
-
-julia> fetch(f)
-0.9995177101692958
-```
-"""
-remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_wait, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) -> result
-
-[`WorkerPool`](@ref) variant of `remotecall_fetch(f, pid, ....)`. Waits for and takes a free worker from `pool` and
-performs a `remotecall_fetch` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> remotecall_fetch(maximum, wp, A)
-0.9995177101692958
-```
-"""
-remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_fetch, f, pool, args...; kwargs...)
-
-"""
-    remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) -> nothing
-
-[`WorkerPool`](@ref) variant of `remote_do(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remote_do` on it.
-"""
-remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remote_do, f, pool, args...; kwargs...)
-
-const _default_worker_pool = Ref{Union{AbstractWorkerPool, Nothing}}(nothing)
-
-"""
-    default_worker_pool()
-
-[`AbstractWorkerPool`](@ref) containing idle [`workers`](@ref) - used by `remote(f)` and [`pmap`](@ref)
-(by default). Unless one is explicitly set via `default_worker_pool!(pool)`, the default worker pool is
-initialized to a [`WorkerPool`](@ref).
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> default_worker_pool()
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:3), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 4))
-```
-"""
-function default_worker_pool()
-    # On workers retrieve the default worker pool from the master when accessed
-    # for the first time
-    if _default_worker_pool[] === nothing
-        if myid() == 1
-            _default_worker_pool[] = WorkerPool()
-        else
-            _default_worker_pool[] = remotecall_fetch(()->default_worker_pool(), 1)
-        end
-    end
-    return _default_worker_pool[]
-end
-
-"""
-    default_worker_pool!(pool::AbstractWorkerPool)
-
-Set a [`AbstractWorkerPool`](@ref) to be used by `remote(f)` and [`pmap`](@ref) (by default).
-"""
-function default_worker_pool!(pool::AbstractWorkerPool)
-    _default_worker_pool[] = pool
-end
-
-"""
-    remote([p::AbstractWorkerPool], f) -> Function
-
-Return an anonymous function that executes function `f` on an available worker
-(drawn from [`WorkerPool`](@ref) `p` if provided) using [`remotecall_fetch`](@ref).
-"""
-remote(f) = (args...; kwargs...)->remotecall_fetch(f, default_worker_pool(), args...; kwargs...)
-remote(p::AbstractWorkerPool, f) = (args...; kwargs...)->remotecall_fetch(f, p, args...; kwargs...)
-
-mutable struct CachingPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-
-    # Mapping between a tuple (worker_id, f) and a RemoteChannel
-    map_obj2ref::IdDict{Tuple{Int, Function}, RemoteChannel}
-
-    function CachingPool()
-        wp = new(Channel{Int}(typemax(Int)), Set{Int}(), IdDict{Tuple{Int, Function}, RemoteChannel}())
-        finalizer(clear!, wp)
-        wp
-    end
-end
-
-serialize(s::AbstractSerializer, cp::CachingPool) = throw(ErrorException("CachingPool objects are not serializable."))
-
-"""
-    CachingPool(workers::Vector{Int})
-
-An implementation of an `AbstractWorkerPool`.
-[`remote`](@ref), [`remotecall_fetch`](@ref),
-[`pmap`](@ref) (and other remote calls which execute functions remotely)
-benefit from caching the serialized/deserialized functions on the worker nodes,
-especially closures (which may capture large amounts of data).
-
-The remote cache is maintained for the lifetime of the returned `CachingPool` object.
-To clear the cache earlier, use `clear!(pool)`.
-
-For global variables, only the bindings are captured in a closure, not the data.
-`let` blocks can be used to capture global data.
-
-# Examples
-```julia
-const foo = rand(10^8);
-wp = CachingPool(workers())
-let foo = foo
-    pmap(i -> sum(foo) + i, wp, 1:100);
-end
-```
-
-The above would transfer `foo` only once to each worker.
-
-"""
-function CachingPool(workers::Vector{Int})
-    pool = CachingPool()
-    for w in workers
-        push!(pool, w)
-    end
-    return pool
-end
-
-"""
-    clear!(pool::CachingPool) -> pool
-
-Removes all cached functions from all participating workers.
-"""
-function clear!(pool::CachingPool)
-    for (_,rr) in pool.map_obj2ref
-        finalize(rr)
-    end
-    empty!(pool.map_obj2ref)
-    pool
-end
-
-exec_from_cache(rr::RemoteChannel, args...; kwargs...) = fetch(rr)(args...; kwargs...)
-function exec_from_cache(f_ref::Tuple{Function, RemoteChannel}, args...; kwargs...)
-    put!(f_ref[2], f_ref[1])        # Cache locally
-    f_ref[1](args...; kwargs...)
-end
-
-function remotecall_pool(rc_f, f, pool::CachingPool, args...; kwargs...)
-    worker = take!(pool)
-    f_ref = get(pool.map_obj2ref, (worker, f), (f, RemoteChannel(worker)))
-    isa(f_ref, Tuple) && (pool.map_obj2ref[(worker, f)] = f_ref[2])   # Add to tracker
-
-    try
-        rc_f(exec_from_cache, worker, f_ref, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
deleted file mode 100644
index 43e02c92b5a81..0000000000000
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ /dev/null
@@ -1,1921 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Distributed, Random, Serialization, Sockets
-import Distributed: launch, manage
-
-@test cluster_cookie() isa String
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-
-@test Distributed.extract_imports(:(begin; import Foo, Bar; let; using Baz; end; end)) ==
-      Any[:(import Foo, Bar), :(using Baz)]
-
-# Test a few "remote" invocations when no workers are present
-@test remote(myid)() == 1
-@test pmap(identity, 1:100) == [1:100...]
-@test 100 == @distributed (+) for i in 1:100
-        1
-    end
-
-addprocs_with_testenv(4)
-@test nprocs() == 5
-
-# distributed loading of packages
-
-# setup
-@everywhere begin
-    old_act_proj = Base.ACTIVE_PROJECT[]
-    pushfirst!(Base.LOAD_PATH, "@")
-    Base.ACTIVE_PROJECT[] = joinpath(Sys.BINDIR, "..", "share", "julia", "test", "TestPkg")
-end
-
-# cause precompilation of TestPkg to avoid race condition
-Base.compilecache(Base.identify_package("TestPkg"))
-
-@everywhere using TestPkg
-@everywhere using TestPkg
-
-@everywhere begin
-    Base.ACTIVE_PROJECT[] = old_act_proj
-    popfirst!(Base.LOAD_PATH)
-end
-
-@everywhere using Test, Random, LinearAlgebra
-
-id_me = myid()
-id_other = filter(x -> x != id_me, procs())[rand(1:(nprocs()-1))]
-
-# Test role
-@everywhere using Distributed
-@test Distributed.myrole() === :master
-for wid = workers()
-    wrole = remotecall_fetch(wid) do
-        Distributed.myrole()
-    end
-    @test wrole === :worker
-end
-
-# Test remote()
-let
-    pool = default_worker_pool()
-
-    count = 0
-    count_condition = Condition()
-
-    function remote_wait(c)
-        @async_logerr begin
-            count += 1
-            remote(take!)(c)
-            count -= 1
-            notify(count_condition)
-        end
-        yield()
-    end
-
-    testchannels = [RemoteChannel() for i in 1:nworkers()]
-    testcount = 0
-    @test isready(pool) == true
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in testchannels
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in reverse(testchannels)
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-end
-
-# Test Futures
-function testf(id)
-    f=Future(id)
-    @test isready(f) == false
-    @test f.v === nothing
-    put!(f, :OK)
-    @test isready(f) == true
-    @test f.v !== nothing
-
-    @test_throws ErrorException put!(f, :OK) # Cannot put! to a already set future
-    @test_throws MethodError take!(f) # take! is unsupported on a Future
-
-    @test fetch(f) === :OK
-end
-
-testf(id_me)
-testf(id_other)
-
-function poll_while(f::Function; timeout_seconds::Integer = 120)
-    start_time = time_ns()
-    while f()
-        sleep(1)
-        if ( ( time_ns() - start_time )/1e9 ) > timeout_seconds
-            @error "Timed out" timeout_seconds
-            return false
-        end
-    end
-    return true
-end
-
-function _getenv_include_thread_unsafe()
-    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
-    default_value = "false"
-    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
-    b = parse(Bool, environment_variable_value)::Bool
-    return b
-end
-const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
-function include_thread_unsafe_tests()
-    if Threads.maxthreadid() > 1
-        if _env_include_thread_unsafe
-            return true
-        end
-        msg = "Skipping a thread-unsafe test because `Threads.maxthreadid() > 1`"
-        @warn msg Threads.maxthreadid()
-        Test.@test_broken false
-        return false
-    end
-    return true
-end
-
-# Distributed GC tests for Futures
-function test_futures_dgc(id)
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-
-    # remote value should be deleted after a fetch
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    @test fetch(f) == id
-    @test f.v !== nothing
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-
-    # if unfetched, it should be deleted after a finalize
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    finalize(f)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-end
-
-test_futures_dgc(id_me)
-test_futures_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till all references are fetched.
-wid1 = workers()[1]
-wid2 = workers()[2]
-f = remotecall(myid, wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f)
-
-@test fetch(f) == wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-remotecall_fetch(r->(fetch(fetch(r)); yield()), wid2, fstore)
-sleep(0.5) # to ensure that wid2 gc messages have been executed on wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-
-# put! should release remote reference since it would have been cached locally
-f = Future(wid1)
-fid = remoteref_id(f)
-
-# should not be created remotely till accessed
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-# create it remotely
-isready(f)
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-put!(f, :OK)
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-@test fetch(f) === :OK
-
-# RemoteException should be thrown on a put! when another process has set the value
-f = Future(wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f) # send f to wid2
-put!(f, :OK) # set value from master
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-
-testval = remotecall_fetch(wid2, fstore) do x
-    try
-        put!(fetch(x), :OK)
-        return 0
-    catch e
-        if isa(e, RemoteException)
-            return 1
-        else
-            return 2
-        end
-    end
-end
-@test testval == 1
-
-# Issue number #25847
-@everywhere function f25847(ref)
-    fetch(ref)
-    return true
-end
-
-f = remotecall_wait(identity, id_other, ones(10))
-rrid = Distributed.RRID(f.whence, f.id)
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-finalize(f)
-yield() # flush gc msgs
-@test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(Distributed.PGRP.refs, chk_rrid)), id_other, rrid))
-
-# Distributed GC tests for RemoteChannels
-function test_remoteref_dgc(id)
-    rr = RemoteChannel(id)
-    put!(rr, :OK)
-    rrid = remoteref_id(rr)
-
-    # remote value should be deleted after finalizing the ref
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    @test fetch(rr) === :OK
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    finalize(rr)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid))
-end
-test_remoteref_dgc(id_me)
-test_remoteref_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till the other worker has also finalized.
-let wid1 = workers()[1],
-    wid2 = workers()[2],
-    rr = RemoteChannel(wid1),
-    rrid = remoteref_id(rr),
-    fstore = RemoteChannel(wid2)
-
-    put!(fstore, rr)
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    finalize(rr) # finalize locally
-    yield() # flush gc msgs
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
-    sleep(0.5) # to ensure that wid2 messages have been executed on wid1
-    @test poll_while(() -> remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid))
-end
-
-# Tests for issue #23109 - should not hang.
-f = @spawnat :any rand(1, 1)
-Base.Experimental.@sync begin
-    for _ in 1:10
-        @async fetch(f)
-    end
-end
-
-wid1, wid2 = workers()[1:2]
-f = @spawnat wid1 rand(1,1)
-Base.Experimental.@sync begin
-    @async fetch(f)
-    @async remotecall_fetch(()->fetch(f), wid2)
-end
-
-
-@test fetch(@spawnat id_other myid()) == id_other
-@test (@fetchfrom id_other myid()) == id_other
-
-pids=[]
-for i in 1:nworkers()
-    push!(pids, @fetch myid())
-end
-@test sort(pids) == sort(workers())
-
-
-# test getindex on Futures and RemoteChannels
-function test_indexing(rr)
-    a = rand(5,5)
-    put!(rr, a)
-    @test rr[2,3] == a[2,3]
-    @test rr[] == a
-end
-
-test_indexing(Future())
-test_indexing(Future(id_other))
-test_indexing(RemoteChannel())
-test_indexing(RemoteChannel(id_other))
-
-# Test ser/deser to non-ClusterSerializer objects.
-function test_regular_io_ser(ref::Distributed.AbstractRemoteRef)
-    io = IOBuffer()
-    serialize(io, ref)
-    seekstart(io)
-    ref2 = deserialize(io)
-    for fld in fieldnames(typeof(ref))
-        v = getfield(ref2, fld)
-        if isa(v, Number)
-            @test v === zero(typeof(v))
-        elseif fld === :lock
-            @test v isa ReentrantLock
-            @test !islocked(v)
-        elseif v !== nothing
-            error(string("Add test for field ", fld))
-        end
-    end
-end
-
-test_regular_io_ser(Future())
-test_regular_io_ser(RemoteChannel())
-
-# Test @distributed load balancing - all processors should get either M or M+1
-# iterations out of the loop range for some M.
-ids = @distributed((a,b)->[a;b], for i=1:7; myid(); end)
-workloads = Int[sum(ids .== i) for i in 2:nprocs()]
-@test maximum(workloads) - minimum(workloads) <= 1
-
-# @distributed reduction should work even with very short ranges
-@test @distributed(+, for i=1:2; i; end) == 3
-
-@test_throws ArgumentError sleep(-1)
-@test_throws ArgumentError timedwait(()->false, 0.1, pollint=-0.5)
-
-# specify pids for pmap
-@test sort(workers()[1:2]) == sort(unique(pmap(x->(sleep(0.1);myid()), WorkerPool(workers()[1:2]), 1:10)))
-
-# Testing buffered  and unbuffered reads
-# This large array should write directly to the socket
-a = fill(1, 10^6)
-@test a == remotecall_fetch((x)->x, id_other, a)
-
-# Not a bitstype, should be buffered
-s = [randstring() for x in 1:10^5]
-@test s == remotecall_fetch((x)->x, id_other, s)
-
-#large number of small requests
-num_small_requests = 10000
-@test fill(id_other, num_small_requests) == [remotecall_fetch(myid, id_other) for i in 1:num_small_requests]
-
-# test parallel sends of large arrays from multiple tasks to the same remote worker
-ntasks = 10
-rr_list = [Channel(1) for x in 1:ntasks]
-
-for rr in rr_list
-    local rr
-    let rr = rr
-        @async try
-            for i in 1:10
-                a = rand(2*10^5)
-                @test a == remotecall_fetch(x->x, id_other, a)
-                yield()
-            end
-            put!(rr, :OK)
-        catch
-            put!(rr, :ERROR)
-        end
-    end
-end
-
-@test [fetch(rr) for rr in rr_list] == [:OK for x in 1:ntasks]
-
-function test_channel(c)
-    @test isopen(c) == true
-    put!(c, 1)
-    put!(c, "Hello")
-    put!(c, 5.0)
-
-    @test isready(c) == true
-    @test isopen(c) == true
-    @test fetch(c) == 1
-    @test fetch(c) == 1   # Should not have been popped previously
-    @test take!(c) == 1
-    @test take!(c) == "Hello"
-    @test fetch(c) == 5.0
-    @test take!(c) == 5.0
-    @test isready(c) == false
-    @test isopen(c) == true
-    close(c)
-    @test isopen(c) == false
-end
-
-test_channel(Channel(10))
-test_channel(RemoteChannel(()->Channel(10)))
-
-c=Channel{Int}(1)
-@test_throws MethodError put!(c, "Hello")
-
-# test channel iterations
-function test_iteration(in_c, out_c)
-    t=@async for v in in_c
-        put!(out_c, v)
-    end
-
-    @test isopen(in_c) == true
-    put!(in_c, 1)
-    @test take!(out_c) == 1
-    put!(in_c, "Hello")
-    close(in_c)
-    @test take!(out_c) == "Hello"
-    @test isopen(in_c) == false
-    @test_throws InvalidStateException put!(in_c, :foo)
-    yield()
-    @test istaskdone(t) == true
-end
-
-test_iteration(Channel(10), Channel(10))
-test_iteration(RemoteChannel(() -> Channel(10)), RemoteChannel(() -> Channel(10)))
-
-@everywhere function test_iteration_take(ch)
-    count = 0
-    for x in ch
-        count += 1
-    end
-    return count
-end
-
-@everywhere function test_iteration_put(ch, total)
-    for i in 1:total
-        put!(ch, i)
-    end
-    close(ch)
-end
-
-let ch = RemoteChannel(() -> Channel(1))
-    @async test_iteration_put(ch, 10)
-    @test 10 == @fetchfrom id_other test_iteration_take(ch)
-    # now reverse
-    ch = RemoteChannel(() -> Channel(1))
-    @spawnat id_other test_iteration_put(ch, 10)
-    @test 10 == test_iteration_take(ch)
-end
-
-# make sure exceptions propagate when waiting on Tasks
-@test_throws CompositeException (@sync (@async error("oops")))
-try
-    @sync begin
-        for i in 1:5
-            @async error(i)
-        end
-    end
-    error("unexpected")
-catch ex
-    @test typeof(ex) == CompositeException
-    @test length(ex) == 5
-    @test typeof(ex.exceptions[1]) == TaskFailedException
-    @test typeof(ex.exceptions[1].task.exception) == ErrorException
-    # test start, next, and done
-    for (i, i_ex) in enumerate(ex)
-        @test i == parse(Int, i_ex.task.exception.msg)
-    end
-    # test showerror
-    err_str = sprint(showerror, ex)
-    err_one_str = sprint(showerror, ex.exceptions[1])
-    @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
-end
-@test sprint(showerror, CompositeException()) == "CompositeException()\n"
-
-function test_remoteexception_thrown(expr)
-    try
-        expr()
-        error("unexpected")
-    catch ex
-        @test typeof(ex) == RemoteException
-        @test typeof(ex.captured) == CapturedException
-        @test typeof(ex.captured.ex) == ErrorException
-        @test ex.captured.ex.msg == "foobar"
-    end
-end
-
-for id in [id_other, id_me]
-    local id
-    test_remoteexception_thrown() do
-        remotecall_fetch(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        remotecall_wait(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        wait(remotecall(id) do
-            throw(ErrorException("foobar"))
-        end)
-    end
-end
-
-# make sure the stackframe from the remote error can be serialized
-let ex
-    try
-        remotecall_fetch(id_other) do
-            @eval module AModuleLocalToOther
-                foo() = throw(ErrorException("A.error"))
-                foo()
-            end
-        end
-    catch ex
-    end
-    @test (ex::RemoteException).pid == id_other
-    @test ((ex.captured::CapturedException).ex::ErrorException).msg == "A.error"
-    bt = ex.captured.processed_bt::Array{Any,1}
-    @test length(bt) > 1
-    frame, repeated = bt[1]::Tuple{Base.StackTraces.StackFrame, Int}
-    @test frame.func === :foo
-    @test frame.linfo === nothing
-    @test repeated == 1
-end
-
-# pmap tests. Needs at least 4 processors dedicated to the below tests. Which we currently have
-# since the distributed tests are now spawned as a separate set.
-
-# Test all combinations of pmap keyword args.
-pmap_args = [
-                (:distributed, [:default, false]),
-                (:batch_size, [:default,2]),
-                (:on_error, [:default, e -> (e.msg == "foobar" ? true : rethrow())]),
-                (:retry_delays, [:default, fill(0.001, 1000)]),
-                (:retry_check, [:default, (s,e) -> (s,endswith(e.msg,"foobar"))]),
-            ]
-
-kwdict = Dict()
-function walk_args(i)
-    if i > length(pmap_args)
-        kwargs = []
-        for (k,v) in kwdict
-            if v !== :default
-                push!(kwargs, (k,v))
-            end
-        end
-
-        data = 1:100
-
-        testw = kwdict[:distributed] === false ? [1] : workers()
-
-        if kwdict[:retry_delays] !== :default
-            mapf = x -> iseven(myid()) ? error("notfoobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    if isodd(p)
-                        @test p in pids
-                    else
-                        @test !(p in pids)
-                    end
-                end
-            end
-        elseif kwdict[:on_error] === :default
-            mapf = x -> (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    @test p in pids
-                end
-            end
-        else
-            mapf = x -> iseven(x) ? error("foobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                w = testw
-                for (idx,x) in enumerate(data)
-                    if iseven(x)
-                        @test pmap_res[idx] == true
-                    else
-                        @test pmap_res[idx][1] == x*2
-                        @test pmap_res[idx][2] in w
-                    end
-                end
-            end
-        end
-
-        try
-            results_test(pmap(mapf, data; kwargs...))
-        catch
-            println("pmap executing with args : ", kwargs)
-            rethrow()
-        end
-
-        return
-    end
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][1]
-    walk_args(i+1)
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][2]
-    walk_args(i+1)
-end
-
-# Start test for various kw arg combinations
-walk_args(1)
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "generic_map_tests.jl"))
-empty_pool = WorkerPool([myid()])
-pmap_fallback = (f, c...) -> pmap(f, empty_pool, c...)
-generic_map_tests(pmap_fallback)
-
-# pmap with various types. Test for equivalence with map
-run_map_equivalence_tests(pmap)
-@test pmap(uppercase, "Hello World!") == map(uppercase, "Hello World!")
-
-
-# Simple test for pmap throws error
-let error_thrown = false
-    try
-        pmap(x -> x == 50 ? error("foobar") : x, 1:100)
-    catch e
-        @test e.captured.ex.msg == "foobar"
-        error_thrown = true
-    end
-    @test error_thrown
-end
-
-# Test pmap with a generator type iterator
-@test [1:100...] == pmap(x->x, Base.Generator(x->(sleep(0.0001); x), 1:100))
-
-# Test pgenerate
-n = 10
-as = [rand(4,4) for i in 1:n]
-bs = deepcopy(as)
-cs = collect(Distributed.pgenerate(x->(sleep(rand()*0.1); svd(x)), bs))
-svdas = map(svd, as)
-for i in 1:n
-    @test cs[i].U ≈ svdas[i].U
-    @test cs[i].S ≈ svdas[i].S
-    @test cs[i].V ≈ svdas[i].V
-end
-
-# Test that the default worker pool cycles through all workers
-pmap(_->myid(), 1:nworkers())  # priming run
-@test nworkers() == length(unique(pmap(_->myid(), 1:100)))
-
-# Test same behaviour when executed on a worker
-@test nworkers() == length(unique(remotecall_fetch(()->pmap(_->myid(), 1:100), id_other)))
-
-# Same tests with custom worker pools.
-wp = WorkerPool(workers())
-@test nworkers() == length(unique(pmap(_->myid(), wp, 1:100)))
-@test nworkers() == length(unique(remotecall_fetch(wp->pmap(_->myid(), wp, 1:100), id_other, wp)))
-wp = WorkerPool(2:3)
-@test sort(unique(pmap(_->myid(), wp, 1:100))) == [2,3]
-
-# CachingPool tests
-wp = CachingPool(workers())
-@test [1:100...] == pmap(x->x, wp, 1:100)
-
-clear!(wp)
-@test length(wp.map_obj2ref) == 0
-
-# default_worker_pool! tests
-wp_default = Distributed.default_worker_pool()
-try
-    local wp = CachingPool(workers())
-    Distributed.default_worker_pool!(wp)
-    @test [1:100...] == pmap(x->x, wp, 1:100)
-    @test !isempty(wp.map_obj2ref)
-    clear!(wp)
-    @test isempty(wp.map_obj2ref)
-finally
-    Distributed.default_worker_pool!(wp_default)
-end
-
-# The below block of tests are usually run only on local development systems, since:
-# - tests which print errors
-# - addprocs tests are memory intensive
-# - ssh addprocs requires sshd to be running locally with passwordless login enabled.
-# The test block is enabled by defining env JULIA_TESTFULL=1
-
-DoFullTest = Base.get_bool_env("JULIA_TESTFULL", false)
-
-if DoFullTest
-    println("Testing exception printing on remote worker from a `remote_do` call")
-    println("Please ensure the remote error and backtrace is displayed on screen")
-
-    remote_do(id_other) do
-        throw(ErrorException("TESTING EXCEPTION ON REMOTE DO. PLEASE IGNORE"))
-    end
-    sleep(0.5)  # Give some time for the above error to be printed
-
-    println("\n\nThe following 'invalid connection credentials' error messages are to be ignored.")
-    all_w = workers()
-    # Test sending fake data to workers. The worker processes will print an
-    # error message but should not terminate.
-    for w in Distributed.PGRP.workers
-        if isa(w, Distributed.Worker)
-            local s = connect(w.config.host, w.config.port)
-            write(s, randstring(32))
-        end
-    end
-    @test workers() == all_w
-    @test all([p == remotecall_fetch(myid, p) for p in all_w])
-
-if Sys.isunix() # aka have ssh
-    function test_n_remove_pids(new_pids)
-        for p in new_pids
-            w_in_remote = sort(remotecall_fetch(workers, p))
-            try
-                @test intersect(new_pids, w_in_remote) == new_pids
-            catch
-                print("p       :     $p\n")
-                print("newpids :     $new_pids\n")
-                print("w_in_remote : $w_in_remote\n")
-                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
-                rethrow()
-            end
-        end
-
-        remotecall_fetch(rmprocs, 1, new_pids)
-    end
-
-    print("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.\n")
-    print("Please ensure: \n")
-    print("1) sshd is running locally with passwordless login enabled.\n")
-    print("2) Env variable USER is defined and is the ssh user.\n")
-    print("3) Port 9300 is not in use.\n")
-
-    sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR `
-    #Issue #9951
-    hosts=[]
-    localhost_aliases = ["localhost", string(getipaddr()), "127.0.0.1"]
-    num_workers = parse(Int,(get(ENV, "JULIA_ADDPROCS_NUM", "9")))
-
-    for i in 1:(num_workers/length(localhost_aliases))
-        append!(hosts, localhost_aliases)
-    end
-
-    print("\nTesting SSH addprocs with $(length(hosts)) workers...\n")
-    new_pids = addprocs_with_testenv(hosts; sshflags=sshflags)
-    @test length(new_pids) == length(hosts)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with :auto\n")
-    new_pids = addprocs_with_testenv(["localhost", ("127.0.0.1", :auto), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == (2 + Sys.CPU_THREADS)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with numeric counts\n")
-    new_pids = addprocs_with_testenv([("localhost", 2), ("127.0.0.1", 2), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel (SSH multiplexing)\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, multiplex=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    controlpath = joinpath(homedir(), ".ssh", "julia-$(ENV["USER"])@localhost:22")
-    @test issocket(controlpath)
-    test_n_remove_pids(new_pids)
-    @test :ok == timedwait(()->!issocket(controlpath), 10.0; pollint=0.5)
-
-    print("\nAll supported formats for hostname\n")
-    h1 = "localhost"
-    user = ENV["USER"]
-    h2 = "$user@$h1"
-    h3 = "$h2:22"
-    h4 = "$h3 $(string(getipaddr()))"
-    h5 = "$h4:9300"
-
-    new_pids = addprocs_with_testenv([h1, h2, h3, h4, h5]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nkeyword arg exename\n")
-    for exename in [`$(joinpath(Sys.BINDIR, Base.julia_exename()))`, "$(joinpath(Sys.BINDIR, Base.julia_exename()))"]
-        for addp_func in [()->addprocs_with_testenv(["localhost"]; exename=exename, exeflags=test_exeflags, sshflags=sshflags),
-                          ()->addprocs_with_testenv(1; exename=exename, exeflags=test_exeflags)]
-
-            local new_pids = addp_func()
-            @test length(new_pids) == 1
-            test_n_remove_pids(new_pids)
-        end
-    end
-
-end # unix-only
-end # full-test
-
-let t = @task 42
-    schedule(t, ErrorException(""), error=true)
-    @test_throws TaskFailedException(t) Base.wait(t)
-end
-
-# issue #8207
-let A = Any[]
-    @distributed (+) for i in (push!(A,1); 1:2)
-        i
-    end
-    @test length(A) == 1
-end
-
-# issue #13168
-function f13168(n)
-    val = 0
-    for i = 1:n
-        val += sum(rand(n, n)^2)
-    end
-    return val
-end
-let t = schedule(@task f13168(100))
-    @test t.state === :runnable
-    @test t.queue !== nothing
-    @test_throws ErrorException schedule(t)
-    yield()
-    @test t.state === :done
-    @test t.queue === nothing
-    @test_throws ErrorException schedule(t)
-    @test isa(fetch(t), Float64)
-end
-
-# issue #13122
-@test remotecall_fetch(identity, workers()[1], C_NULL) === C_NULL
-
-# issue #11062
-function t11062()
-    @async v11062 = 1
-    v11062 = 2
-end
-
-@test t11062() == 2
-
-# issue #15406
-v15406 = remotecall_wait(() -> 1, id_other)
-fetch(v15406)
-remotecall_wait(fetch, id_other, v15406)
-
-
-# issue #43396
-# Covers the remote fetch where the value returned is `nothing`
-# May be caused by attempting to unwrap a non-`Some` type with `something`
-# `call_on_owner` ref fetches return values not wrapped in `Some`
-# and have to be returned directly
-@test nothing === fetch(remotecall(() -> nothing, workers()[1]))
-@test 10 === fetch(remotecall(() -> 10, workers()[1]))
-
-
-# Test various forms of remotecall* invocations
-
-@everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
-
-function test_f_args(result, args...; kwargs...)
-    @test fetch(remotecall(args...; kwargs...)) == result
-    @test fetch(remotecall_wait(args...; kwargs...)) == result
-    @test remotecall_fetch(args...; kwargs...) == result
-
-    # A visual test - remote_do should NOT print any errors
-    remote_do(args...; kwargs...)
-end
-
-for tid in [id_other, id_me, default_worker_pool()]
-    test_f_args(1, f_args, tid, 1)
-    test_f_args(3, f_args, tid, 1, 2)
-    test_f_args(5, f_args, tid, 1; kw1=4)
-    test_f_args(13, f_args, tid, 1; kw1=4, kw2=8)
-    test_f_args(15, f_args, tid, 1, 2; kw1=4, kw2=8)
-end
-
-# Test remote_do
-f=Future(id_me)
-remote_do(fut->put!(fut, myid()), id_me, f)
-@test fetch(f) == id_me
-
-f=Future(id_other)
-remote_do(fut->put!(fut, myid()), id_other, f)
-@test fetch(f) == id_other
-
-# Github issue #29932
-rc_unbuffered = RemoteChannel(()->Channel{Vector{Float64}}(0))
-@test eltype(rc_unbuffered) == Vector{Float64}
-
-@async begin
-    # Trigger direct write (no buffering) of largish array
-    array_sz = Int(Base.SZ_UNBUFFERED_IO/8) + 1
-    largev = zeros(array_sz)
-    for i in 1:10
-        largev[1] = float(i)
-        put!(rc_unbuffered, largev)
-    end
-end
-
-@test remotecall_fetch(rc -> begin
-        for i in 1:10
-            take!(rc)[1] != float(i) && error("Failed")
-        end
-        return :OK
-    end, id_other, rc_unbuffered) === :OK
-
-# github issue 33972
-rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
-close(rc_unbuffered_other)
-try; take!(rc_unbuffered_other); catch; end
-@test !remotecall_fetch(rc -> islocked(Distributed.lookup_ref(remoteref_id(rc)).synctake),
-                        id_other, rc_unbuffered_other)
-
-# github PR #14456
-n = DoFullTest ? 6 : 5
-for i = 1:10^n
-    fetch(@spawnat myid() myid())
-end
-
-# issue #15451
-@test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7
-
-# issue #16091
-mutable struct T16091 end
-wid = workers()[1]
-@test try
-    remotecall_fetch(()->T16091, wid)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-@test try
-    remotecall_fetch(identity, wid, T16091)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-
-f16091a() = 1
-remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
-@test remotecall_fetch(f16091a, wid) === 2
-@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1
-
-# these will only heisen-fail, since it depends on the gensym counter collisions:
-f16091b = () -> 1
-remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
-@test remotecall_fetch(f16091b, 2) === 1
-# Global anonymous functions are over-written...
-@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 1
-
-# ...while local anonymous functions are by definition, local.
-let
-    f16091c = () -> 1
-    @test remotecall_fetch(f16091c, 2) === 1
-    @test remotecall_fetch(
-        myid -> begin
-            let
-                f16091c = () -> 2
-                remotecall_fetch(f16091c, myid)
-            end
-        end, wid, myid()) === 2
-end
-
-# issue #16451
-rng=RandomDevice()
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-rand(rng)
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-# serialization tests
-wrkr1 = workers()[1]
-wrkr2 = workers()[end]
-
-@test remotecall_fetch(p->remotecall_fetch(myid, p), wrkr1, wrkr2) == wrkr2
-
-# Send f to wrkr1 and wrkr2. Then try calling f on wrkr2 from wrkr1
-f_myid = ()->myid()
-@test wrkr1 == remotecall_fetch(f_myid, wrkr1)
-@test wrkr2 == remotecall_fetch(f_myid, wrkr2)
-@test wrkr2 == remotecall_fetch((f, p)->remotecall_fetch(f, p), wrkr1, f_myid, wrkr2)
-
-# Deserialization error recovery test
-# locally defined module, but unavailable on workers
-module LocalFoo
-    global foo=1
-end
-
-let
-    @test_throws RemoteException remotecall_fetch(()->LocalFoo.foo, 2)
-
-    bad_thunk = ()->NonexistentModule.f()
-    @test_throws RemoteException remotecall_fetch(bad_thunk, 2)
-
-    # Test that the stream is still usable
-    @test remotecall_fetch(()->:test,2) === :test
-    ref = remotecall(bad_thunk, 2)
-    @test_throws RemoteException fetch(ref)
-end
-
-# Test calling @everywhere from a module not defined on the workers
-module LocalBar
-    using Distributed
-    bar() = @everywhere new_bar()=myid()
-end
-LocalBar.bar()
-for p in procs()
-    @test p == remotecall_fetch(new_bar, p)
-end
-
-# @everywhere (remotecall_eval) behaviors (#22589)
-let (p, p2) = filter!(p -> p != myid(), procs())
-    @test (myid() + 1) == @everywhere myid() (myid() + 1)
-    @test (p * 2) == @everywhere p (myid() * 2)
-    @test 1 == @everywhere p defined_on_p = 1
-    @test !@isdefined defined_on_p
-    @test !isdefined(Main, :defined_on_p)
-    @test remotecall_fetch(isdefined, p, Main, :defined_on_p)
-    @test !remotecall_fetch(isdefined, p2, Main, :defined_on_p)
-    @test nothing === @everywhere [p, p] defined_on_p += 1
-    @test 3 === @everywhere p defined_on_p
-    let ref = Ref(0)
-        @test nothing ===
-            @everywhere [myid(), p, myid(), myid(), p] begin
-                Test.@test Main === @__MODULE__
-                $ref[] += 1
-            end
-        @test ref[] == 3
-    end
-    function test_throw_on(procs, msg)
-        try
-            @everywhere procs error($msg)
-            error("test failed to throw")
-        catch excpt
-            if procs isa Int
-                ex = Any[excpt]
-            else
-                ex = (excpt::CompositeException).exceptions
-            end
-            for (p, ex) in zip(procs, ex)
-                local p
-                if procs isa Int || p != myid()
-                    @test (ex::RemoteException).pid == p
-                    ex = ((ex::RemoteException).captured::CapturedException).ex
-                else
-                    ex = (ex::TaskFailedException).task.exception
-                end
-                @test (ex::ErrorException).msg == msg
-            end
-        end
-    end
-    test_throw_on(p, "everywhere on p")
-    test_throw_on(myid(), "everywhere on myid")
-    test_throw_on([p, myid()], "everywhere on myid and p")
-    test_throw_on([p2, p], "everywhere on p and p2")
-end
-
-# Test addprocs enable_threaded_blas parameter
-
-function get_remote_num_threads(processes_added)
-    return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
-end
-
-function test_blas_config(pid, expected)
-    for worker in Distributed.PGRP.workers
-        if worker.id == pid
-            @test worker.config.enable_threaded_blas == expected
-            return
-        end
-    end
-end
-
-function test_add_procs_threaded_blas()
-    master_blas_thread_count = BLAS.get_num_threads()
-    if master_blas_thread_count === nothing
-        @warn "Skipping blas num threads tests due to unsupported blas version"
-        return
-    end
-
-    # Test with default enable_threaded_blas false
-    processes_added = addprocs_with_testenv(2)
-    for proc_id in processes_added
-        test_blas_config(proc_id, false)
-    end
-
-    # Master thread should not have changed
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # Threading disabled in children by default
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count == 1
-    end
-    rmprocs(processes_added)
-
-    processes_added = addprocs_with_testenv(2, enable_threaded_blas=true)
-    for proc_id in processes_added
-        test_blas_config(proc_id, true)
-    end
-
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
-    # depending on the machine, the BLAS version, and BLAS configuration, so
-    # we need a very lenient test.
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count >= 1
-    end
-    rmprocs(processes_added)
-end
-test_add_procs_threaded_blas()
-
-#19687
-if false ### TODO: The logic that is supposed to implement this is racy - Disabled for now
-# ensure no race conditions between rmprocs and addprocs
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    @spawnat p sleep(5)
-    rmprocs(p; waitfor=0)
-end
-
-# Test if a wait has been called on rmprocs(...;waitfor=0), further remotecalls
-# don't throw errors.
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    np = nprocs()
-    @spawnat p sleep(5)
-    Base.wait(rmprocs(p; waitfor=0))
-    for pid in procs()
-        @test pid == remotecall_fetch(myid, pid)
-    end
-    @test nprocs() == np - 1
-end
-
-# Test that an exception is thrown if workers are unable to be removed within requested time.
-if DoFullTest
-    pids=addprocs_with_testenv(4);
-    @test_throws ErrorException rmprocs(pids; waitfor=0.001);
-    # wait for workers to be removed
-    while any(in(procs()), pids)
-        sleep(0.1)
-    end
-end
-end
-
-# Test addprocs/rmprocs from master node only
-for f in [ ()->addprocs(1; exeflags=test_exeflags), ()->rmprocs(workers()) ]
-    local f
-    try
-        remotecall_fetch(f, id_other)
-        error("Unexpected")
-    catch ex
-        @test isa(ex, RemoteException)
-        @test ex.captured.ex.msg == "Only process 1 can add and remove workers"
-    end
-end
-
-# Test the following addprocs error conditions
-# - invalid host name - github issue #20372
-# - julia exe exiting with an error
-# - timeout reading host:port from worker stdout
-# - host:port not found in worker stdout in the first 1000 lines
-
-struct ErrorSimulator <: ClusterManager
-    mode
-end
-
-function launch(manager::ErrorSimulator, params::Dict, launched::Array, c::Condition)
-    exename = params[:exename]
-    dir = params[:dir]
-
-    cmd = `$(Base.julia_cmd(exename)) --startup-file=no`
-    if manager.mode === :timeout
-        cmd = `$cmd -e "sleep(10)"`
-    elseif manager.mode === :ntries
-        cmd = `$cmd -e "[println(x) for x in 1:1001]"`
-    elseif manager.mode === :exit
-        cmd = `$cmd -e "exit(-1)"`
-    else
-        error("Unknown mode")
-    end
-    io = open(detach(setenv(cmd, dir=dir)))
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-    notify(c)
-end
-
-testruns = Any[]
-
-if DoFullTest
-    append!(testruns, [(()->addprocs_with_testenv(["errorhost20372"]), "Unable to read host:port string from worker. Launch command exited with error?", ())])
-end
-
-append!(testruns, [
-    (()->addprocs_with_testenv(ErrorSimulator(:exit)), "Unable to read host:port string from worker. Launch command exited with error?", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:ntries)), "Unexpected output from worker launch command. Host:port string not found.", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:timeout)), "Timed out waiting to read host:port string from worker.", ("JULIA_WORKER_TIMEOUT"=>"1",))
-])
-
-for (addp_testf, expected_errstr, env) in testruns
-    old_stdout = stdout
-    stdout_out, stdout_in = redirect_stdout()
-    stdout_txt = @async filter!(readlines(stdout_out)) do s
-            return !startswith(s, "\tFrom worker startup:\t")
-        end
-    try
-        withenv(env...) do
-            addp_testf()
-        end
-        error("Unexpected")
-    catch ex
-        redirect_stdout(old_stdout)
-        close(stdout_in)
-        @test isempty(fetch(stdout_txt))
-        @test isa(ex, CompositeException)
-        @test ex.exceptions[1].task.exception.msg == expected_errstr
-    end
-end
-
-
-# Auto serialization of globals from Main.
-# bitstypes
-global v1 = 1
-@test remotecall_fetch(()->v1, id_other) == v1
-@test remotecall_fetch(()->isdefined(Main, :v1), id_other)
-for i in 2:5
-    global v1 = i
-    @test remotecall_fetch(()->v1, id_other) == i
-end
-
-# non-bitstypes
-global v2 = zeros(10)
-for i in 1:5
-    v2[i] = i
-    @test remotecall_fetch(()->v2, id_other) == v2
-end
-
-# Different global bindings to the same object
-global v3 = fill(1., 10)
-global v4 = v3
-@test remotecall_fetch(()->v3, id_other) == remotecall_fetch(()->v4, id_other)
-@test remotecall_fetch(()->isdefined(Main, :v3), id_other)
-@test remotecall_fetch(()->isdefined(Main, :v4), id_other)
-
-# Global references to Types and Modules should work if they are locally defined
-global v5 = Int
-global v6 = Distributed
-@test remotecall_fetch(()->v5, id_other) === Int
-@test remotecall_fetch(()->v6, id_other) === Distributed
-
-struct FooStructLocal end
-module FooModLocal end
-v5 = FooStructLocal
-v6 = FooModLocal
-@test_throws RemoteException remotecall_fetch(()->v5, id_other)
-@test_throws RemoteException remotecall_fetch(()->v6, id_other)
-
-@everywhere struct FooStructEverywhere end
-@everywhere module FooModEverywhere end
-v5 = FooStructEverywhere
-v6 = FooModEverywhere
-@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
-@test remotecall_fetch(()->v6, id_other) === FooModEverywhere
-
-# hash value same but different object instance
-v7 = ones(10)
-oid1 = objectid(v7)
-hval1 = hash(v7)
-@test v7 == @fetchfrom id_other v7
-remote_oid1 = @fetchfrom id_other objectid(v7)
-
-v7 = ones(10)
-@test oid1 != objectid(v7)
-@test hval1 == hash(v7)
-@test remote_oid1 != @fetchfrom id_other objectid(v7)
-
-
-# Github issue #31252
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-v31252 = :b
-@test :b == @fetchfrom id_other v31252
-
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-
-# Test that a global is not being repeatedly serialized when
-# a) referenced multiple times in the closure
-# b) hash value has not changed.
-
-@everywhere begin
-    using Serialization
-    global testsercnt_d = Dict()
-    mutable struct TestSerCnt
-        v
-    end
-    import Base.hash, Base.==
-    hash(x::TestSerCnt, h::UInt) = hash(hash(x.v), h)
-    ==(x1::TestSerCnt, x2::TestSerCnt) = (x1.v == x2.v)
-
-    function Serialization.serialize(s::AbstractSerializer, t::TestSerCnt)
-        Serialization.serialize_type(s, TestSerCnt)
-        serialize(s, t.v)
-        global testsercnt_d
-        cnt = get!(testsercnt_d, objectid(t), 0)
-        testsercnt_d[objectid(t)] = cnt+1
-    end
-
-    Serialization.deserialize(s::AbstractSerializer, ::Type{TestSerCnt}) = TestSerCnt(deserialize(s))
-end
-
-# hash value of tsc is not changed
-global tsc = TestSerCnt(zeros(10))
-for i in 1:5
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized only once
-@test testsercnt_d[objectid(tsc)] == 1
-
-# hash values are changed
-n=5
-testsercnt_d[objectid(tsc)] = 0
-for i in 1:n
-    tsc.v[i] = i
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized as many times as the loop
-@test testsercnt_d[objectid(tsc)] == n
-
-# Multiple references in a closure should be serialized only once.
-global mrefs = TestSerCnt(fill(1.,10))
-@test remotecall_fetch(()->(mrefs.v, 2*mrefs.v, 3*mrefs.v), id_other) == (fill(1.,10), fill(2.,10), fill(3.,10))
-@test testsercnt_d[objectid(mrefs)] == 1
-
-
-# nested anon functions
-global f1 = x->x
-global f2 = x->f1(x)
-v = rand()
-@test remotecall_fetch(f2, id_other, v) == v
-@test remotecall_fetch(x->f2(x), id_other, v) == v
-
-# consts
-const c1 = fill(1., 10)
-@test remotecall_fetch(()->c1, id_other) == c1
-@test remotecall_fetch(()->isconst(Main, :c1), id_other)
-
-# Test same calls with local vars
-function wrapped_var_ser_tests()
-    # bitstypes
-    local lv1 = 1
-    @test remotecall_fetch(()->lv1, id_other) == lv1
-    @test !remotecall_fetch(()->isdefined(Main, :lv1), id_other)
-    for i in 2:5
-        lv1 = i
-        @test remotecall_fetch(()->lv1, id_other) == i
-    end
-
-    # non-bitstypes
-    local lv2 = zeros(10)
-    for i in 1:5
-        lv2[i] = i
-        @test remotecall_fetch(()->lv2, id_other) == lv2
-    end
-
-    # nested anon functions
-    local lf1 = x->x
-    local lf2 = x->lf1(x)
-    v = rand()
-    @test remotecall_fetch(lf2, id_other, v) == v
-    @test remotecall_fetch(x->lf2(x), id_other, v) == v
-end
-
-wrapped_var_ser_tests()
-
-# Test internal data structures being cleaned up upon gc.
-global ids_cleanup = fill(1., 6)
-global ids_func = ()->ids_cleanup
-
-clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
-@test remotecall_fetch(ids_func, id_other) == ids_cleanup
-
-# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
-
-# reported github issues - Mostly tests with globals and various distributed macros
-#2669, #5390
-v2669=10
-@test fetch(@spawnat :any (1+v2669)) == 11
-
-#12367
-refs = []
-if true
-    n = 10
-    for p in procs()
-        push!(refs, @spawnat p begin
-            @sync for i in 1:n
-                nothing
-            end
-        end)
-    end
-end
-foreach(wait, refs)
-
-#6760
-if true
-    a = 2
-    x = @distributed (vcat) for k=1:2
-        sin(a)
-    end
-end
-@test x == map(_->sin(2), 1:2)
-
-let thrown = false
-    try
-        remotecall_fetch(sqrt, 2, -1)
-    catch e
-        thrown = true
-        local b = IOBuffer()
-        showerror(b, e)
-        @test occursin("sqrt was called with a negative real argument", String(take!(b)))
-    end
-    @test thrown
-end
-
-# issue #34333
-let
-    @test fetch(remotecall(Float64, id_other, 1)) == Float64(1)
-    @test fetch(remotecall_wait(Float64, id_other, 1)) == Float64(1)
-    @test remotecall_fetch(Float64, id_other, 1) == Float64(1)
-end
-
-#19463
-function foo19463()
-    w1 = workers()[1]
-    w2 = workers()[2]
-    w3 = workers()[3]
-
-    b1 = () -> 1
-    b2 = () -> fetch(@spawnat w1 b1()) + 1
-    b3 = () -> fetch(@spawnat w2 b2()) + 1
-    b4 = () -> fetch(@spawnat w3 b3()) + 1
-    b4()
-end
-@test foo19463() == 4
-
-# Testing clear!
-function setup_syms(n, pids)
-    syms = []
-    for i in 1:n
-        symstr = string("clrtest", randstring())
-        sym = Symbol(symstr)
-        eval(:(global $sym = rand()))
-        for p in pids
-            eval(:(@test $sym == remotecall_fetch(()->$sym, $p)))
-            eval(:(@test remotecall_fetch(isdefined, $p, Main, Symbol($symstr))))
-        end
-        push!(syms, sym)
-    end
-    syms
-end
-
-function test_clear(syms, pids)
-    for p in pids
-        for sym in syms
-            remote_val = remotecall_fetch(()->getfield(Main, sym), p)
-            @test remote_val === nothing
-            @test remote_val != getfield(Main, sym)
-        end
-    end
-end
-
-syms = setup_syms(1, [id_other])
-clear!(syms[1], id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(1, workers())
-clear!(syms[1], workers())
-test_clear(syms, workers())
-
-syms = setup_syms(3, [id_other])
-clear!(syms, id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(3, workers())
-clear!(syms, workers())
-test_clear(syms, workers())
-
-# Test partial recovery from a deserialization error in CapturedException
-try
-    expr = quote
-                mutable struct DontExistOn1
-                    x
-                end
-                throw(BoundsError(DontExistOn1(1), 1))
-           end
-
-    remotecall_fetch(()->eval(expr), id_other)
-    error("unexpected")
-catch ex
-    @test isa(ex.captured.ex.exceptions[1].ex, ErrorException)
-    @test occursin("BoundsError", ex.captured.ex.exceptions[1].ex.msg)
-    @test ex.captured.ex.exceptions[2].ex == UndefVarError(:DontExistOn1)
-end
-
-let
-    # creates a new worker in a different folder and tries to include file
-    tmp_dir = mktempdir()
-    tmp_dir2 = joinpath(tmp_dir, "2")
-    tmp_file = joinpath(tmp_dir2, "testfile")
-    tmp_file2 = joinpath(tmp_dir2, "testfile2")
-    proc = addprocs_with_testenv(1, dir=tmp_dir)
-    try
-        mkdir(tmp_dir2)
-        write(tmp_file, "23.32 + 32 + myid() + include(\"testfile2\")")
-        write(tmp_file2, "myid() * 2")
-        function test_include_fails_to_open_file(fname)
-            try
-                include(fname)
-            catch exc
-                path = joinpath(@__DIR__, fname)
-                @test exc isa SystemError
-                @test exc.prefix == "opening file $(repr(path))"
-            end
-        end
-        test_include_fails_to_open_file("testfile")
-        test_include_fails_to_open_file("testfile2")
-        test_include_fails_to_open_file(joinpath("2", "testfile2"))
-        @test include(tmp_file) == 58.32
-        @test remotecall_fetch(include, proc[1], joinpath("2", "testfile")) == 55.32 + proc[1] * 3
-    finally
-        rmprocs(proc)
-        rm(tmp_file, force=true)
-        rm(tmp_file2, force=true)
-        rm(tmp_dir2, force=true)
-        #rm(tmp_dir, force=true)
-    end
-end
-# cookie and command line option `--worker` tests. remove workers, set cookie and test
-struct WorkerArgTester <: ClusterManager
-    worker_opt
-    write_cookie
-end
-
-function launch(manager::WorkerArgTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) $(manager.worker_opt)`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    io = open(cmd, "r+")
-    manager.write_cookie && Distributed.write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::WorkerArgTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-nprocs()>1 && rmprocs(workers())
-
-npids = addprocs_with_testenv(WorkerArgTester(`--worker`, true))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("")  # An empty string is a valid cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("foobar") # custom cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=foobar`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-
-# tests for start_worker options to retain stdio (issue #31035)
-struct RetainStdioTester <: ClusterManager
-    close_stdin::Bool
-    stderr_to_stdout::Bool
-end
-
-function launch(manager::RetainStdioTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    jlcmd = "using Distributed; start_worker(\"\"; close_stdin=$(manager.close_stdin), stderr_to_stdout=$(manager.stderr_to_stdout));"
-    cmd = detach(setenv(`$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) -e $jlcmd`, dir=dir))
-    proc = open(cmd, "r+")
-
-    wconfig = WorkerConfig()
-    wconfig.process = proc
-    wconfig.io = proc.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::RetainStdioTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-
-nprocs()>1 && rmprocs(workers())
-cluster_cookie("")
-
-for close_stdin in (true, false), stderr_to_stdout in (true, false)
-    local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
-    @test remotecall_fetch(myid, npids[1]) == npids[1]
-    if close_stdin
-        @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
-    else
-        @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
-    end
-    @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
-    rmprocs(npids)
-end
-
-# Issue # 22865
-# Must be run on a new cluster, i.e., all workers must be in the same state.
-@assert nprocs() == 1
-p1,p2 = addprocs_with_testenv(2)
-@everywhere f22865(p) = remotecall_fetch(x->x.*2, p, fill(1.,2))
-@test fill(2.,2) == remotecall_fetch(f22865, p1, p2)
-rmprocs(p1, p2)
-
-function reuseport_tests()
-    # Run the test on all processes.
-    results = asyncmap(procs()) do p
-        remotecall_fetch(p) do
-            ports_lower = []        # ports of pids lower than myid()
-            ports_higher = []       # ports of pids higher than myid()
-            for w in Distributed.PGRP.workers
-                w.id == myid() && continue
-                port = Sockets._sockname(w.r_stream, true)[2]
-                if (w.id == 1)
-                    # master connects to workers
-                    push!(ports_higher, port)
-                elseif w.id < myid()
-                    push!(ports_lower, port)
-                elseif w.id > myid()
-                    push!(ports_higher, port)
-                end
-            end
-            @assert (length(ports_lower) + length(ports_higher)) == nworkers()
-            for portset in [ports_lower, ports_higher]
-                if (length(portset) > 0) && (length(unique(portset)) != 1)
-                    @warn "SO_REUSEPORT TESTS FAILED. UNSUPPORTED/OLDER UNIX VERSION?"
-                    return 0
-                end
-            end
-            return myid()
-        end
-    end
-
-    # Ensure that the code has indeed been successfully executed everywhere
-    @test all(in(results), procs())
-end
-
-# Test that the client port is reused. SO_REUSEPORT may not be supported on
-# all UNIX platforms, Linux kernels prior to 3.9 and older versions of OSX
-@assert nprocs() == 1
-addprocs_with_testenv(4; lazy=false)
-if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-    reuseport_tests()
-else
-    @info "SO_REUSEPORT is unsupported, skipping reuseport tests"
-end
-
-# issue #27933
-a27933 = :_not_defined_27933
-@test remotecall_fetch(()->a27933, first(workers())) === a27933
-
-# PR #28651
-for T in (UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64)
-    local n = @distributed (+) for i in Base.OneTo(T(10))
-        i
-    end
-    @test n == 55
-end
-
-# issue #28966
-let code = """
-    import Distributed
-    Distributed.addprocs(1)
-    Distributed.@everywhere f() = myid()
-    for w in Distributed.workers()
-        @assert Distributed.remotecall_fetch(f, w) == w
-    end
-    """
-    @test success(`$(Base.julia_cmd()) --startup-file=no -e $code`)
-end
-
-# PR 32431: tests for internal Distributed.head_and_tail
-let (h, t) = Distributed.head_and_tail(1:10, 3)
-    @test h == 1:3
-    @test collect(t) == 4:10
-end
-let (h, t) = Distributed.head_and_tail(1:10, 0)
-    @test h == []
-    @test collect(t) == 1:10
-end
-let (h, t) = Distributed.head_and_tail(1:3, 5)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(1:3, 3)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 3)
-    @test h == []
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 0)
-    @test h == []
-    @test collect(t) == []
-end
-
-# issue #35937
-let e = @test_throws RemoteException pmap(1) do _
-            wait(@async error(42))
-        end
-    # check that the inner TaskFailedException is correctly formed & can be printed
-    es = sprint(showerror, e.value)
-    @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
-    @test contains(es, "\n\n    nested task error:")
-    @test contains(es, "\n\n    nested task error: 42\n")
-end
-
-# issue #27429, propagate relative `include` path to workers
-@everywhere include("includefile.jl")
-for p in procs()
-    @test @fetchfrom(p, i27429) == 27429
-end
-
-# Propagation of package environments for local workers (#28781)
-let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
-    project = mkdir(joinpath(tmp, "project"))
-    depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
-    load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@"]
-    pathsep = Sys.iswindows() ? ";" : ":"
-    env = Dict(
-        "JULIA_DEPOT_PATH" => join(depots, pathsep),
-        "JULIA_LOAD_PATH" => join(load_path, pathsep),
-        # Explicitly propagate `TMPDIR`, in the event that we're running on a
-        # CI system where `TMPDIR` is special.
-        "TMPDIR" => dirname(tmp),
-    )
-    setupcode = """
-    using Distributed, Test
-    @everywhere begin
-        depot_path() = DEPOT_PATH
-        load_path() = LOAD_PATH
-        active_project() = Base.ACTIVE_PROJECT[]
-    end
-    """
-    testcode = setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
-        @test remotecall_fetch(load_path, w)           == LOAD_PATH
-        @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
-        @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
-        @test remotecall_fetch(Base.active_project, w) == Base.active_project()
-    end
-    """
-    # No active project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # --project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
-              $(repr(project))
-    end
-    """
-    cmd = setenv(`$(julia) --project=$(project) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_PROJECT
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`,
-                 (env["JULIA_PROJECT"] = project; env))
-    @test success(cmd)
-    # Pkg.activate(...)
-    activateish = """
-    Base.ACTIVE_PROJECT[] = $(repr(project))
-    using Distributed
-    addprocs(1)
-    """
-    cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_(LOAD|DEPOT)_PATH
-    shufflecode = """
-    d = reverse(DEPOT_PATH)
-    append!(empty!(DEPOT_PATH), d)
-    l = reverse(LOAD_PATH)
-    append!(empty!(LOAD_PATH), l)
-    """
-    addcode = """
-    using Distributed
-    addprocs(1) # after shuffling
-    """
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
-        @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
-    end
-    """
-    cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
-    @test success(cmd)
-    # Mismatch when shuffling after proc addition
-    failcode = shufflecode * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
-        @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(failcode)`, env)
-    @test success(cmd)
-    # Passing env or exeflags to addprocs(...) to override defaults
-    envcode = """
-    using Distributed
-    project = mktempdir()
-    env = Dict(
-        "JULIA_LOAD_PATH" => string(LOAD_PATH[1], $(repr(pathsep)), "@stdlib"),
-        "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
-        "TMPDIR" => ENV["TMPDIR"],
-    )
-    addprocs(1; env = env, exeflags = `--project=\$(project)`)
-    env["JULIA_PROJECT"] = project
-    addprocs(1; env = env)
-    """ * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
-        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1], "@stdlib"]
-        @test remotecall_fetch(active_project, w)      == project
-        @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
-    end
-    """
-    cmd = setenv(`$(julia) -e $(envcode)`, env)
-    @test success(cmd)
-end end
-
-include("splitrange.jl")
-
-# Clear all workers for timeout tests (issue #45785)
-rmprocs(workers())
-begin
-    # First, assert that we get no messages when we close a cooperative worker
-    w = only(addprocs(1))
-    @test_nowarn begin
-        wait(rmprocs([w]))
-    end
-
-    # Next, ensure we get a log message when a worker does not cleanly exit
-    w = only(addprocs(1))
-    @test_logs (:warn, r"sending SIGTERM") begin
-        remote_do(w) do
-            # Cause the 'exit()' message that `rmprocs()` sends to do nothing
-            Core.eval(Base, :(exit() = nothing))
-        end
-        wait(rmprocs([w]))
-    end
-end
-
-# Run topology tests last after removing all workers, since a given
-# cluster at any time only supports a single topology.
-rmprocs(workers())
-include("topology.jl")
diff --git a/stdlib/Distributed/test/includefile.jl b/stdlib/Distributed/test/includefile.jl
deleted file mode 100644
index faea6c11aaf6a..0000000000000
--- a/stdlib/Distributed/test/includefile.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# this is used to test that relative include paths work on other processes
-
-i27429 = 27429
diff --git a/stdlib/Distributed/test/managers.jl b/stdlib/Distributed/test/managers.jl
deleted file mode 100644
index 7971222c7511a..0000000000000
--- a/stdlib/Distributed/test/managers.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Sockets
-using Distributed: parse_machine, SSHManager, LocalManager
-
-@test parse_machine("127.0.0.1") == ("127.0.0.1", nothing)
-@test parse_machine("127.0.0.1:80") == ("127.0.0.1", 80)
-@test parse_machine("[2001:db8::1]") == ("2001:db8::1", nothing)
-@test parse_machine("[2001:db8::1]:443") == ("2001:db8::1", 443)
-
-@test parse_machine("127.0.0.1:90") == ("127.0.0.1", 90)
-@test parse_machine("127.0.0.1:1") == ("127.0.0.1", 1)
-@test parse_machine("127.0.0.1:65535") == ("127.0.0.1", 65535)
-
-@test_throws ArgumentError parse_machine("127.0.0.1:-1")
-@test_throws ArgumentError parse_machine("127.0.0.1:0")
-@test_throws ArgumentError parse_machine("127.0.0.1:65536")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:443:888")
-@test_throws ArgumentError parse_machine("[2001:db8::1")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:aaa")
-
-@test occursin(r"^SSHManager\(machines=.*\)$",
-               sprint((t,x) -> show(t, "text/plain", x), SSHManager("127.0.0.1")))
-@test sprint((t,x) -> show(t, "text/plain", x), LocalManager(1, true)) == "LocalManager()"
diff --git a/stdlib/Distributed/test/runtests.jl b/stdlib/Distributed/test/runtests.jl
deleted file mode 100644
index d34d07cc48a21..0000000000000
--- a/stdlib/Distributed/test/runtests.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Run the distributed test outside of the main driver since it needs its own
-# set of dedicated workers.
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
-
-cmd = `$test_exename $test_exeflags $disttestfile`
-
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
-    error("Distributed test failed, cmd : $cmd")
-end
-
-include("managers.jl")
diff --git a/stdlib/Distributed/test/splitrange.jl b/stdlib/Distributed/test/splitrange.jl
deleted file mode 100644
index 1cb12e1952b7d..0000000000000
--- a/stdlib/Distributed/test/splitrange.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Distributed: splitrange
-
-@test splitrange(1, 11, 1) == Array{UnitRange{Int64},1}([1:11])
-@test splitrange(0, 10, 1) == Array{UnitRange{Int64},1}([0:10])
-@test splitrange(-1, 9, 1) == Array{UnitRange{Int64},1}([-1:9])
-
-@test splitrange(1, 11, 2) == Array{UnitRange{Int64},1}([1:6,7:11])
-@test splitrange(0, 10, 2) == Array{UnitRange{Int64},1}([0:5,6:10])
-@test splitrange(-1, 9, 2) == Array{UnitRange{Int64},1}([-1:4,5:9])
-
-@test splitrange(1, 11, 3) == Array{UnitRange{Int64},1}([1:4,5:8,9:11])
-@test splitrange(0, 10, 3) == Array{UnitRange{Int64},1}([0:3,4:7,8:10])
-@test splitrange(-1, 9, 3) == Array{UnitRange{Int64},1}([-1:2,3:6,7:9])
-
-@test splitrange(1, 3, 3) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(1, 3, 4) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(0, 2, 3) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(0, 2, 4) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(-1, 1, 3) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-@test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-oa = OffsetArray([123, -345], (-2,))
-
-@everywhere using Test
-@sync @distributed for i in eachindex(oa)
-    @test i ∈ (-1, 0)
-end
diff --git a/stdlib/Distributed/test/topology.jl b/stdlib/Distributed/test/topology.jl
deleted file mode 100644
index fc969323bc587..0000000000000
--- a/stdlib/Distributed/test/topology.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Random
-
-pids = addprocs_with_testenv(4; topology="master_worker")
-
-let p1 = pids[1], p2 = pids[2]
-    @test_throws RemoteException remotecall_fetch(()->remotecall_fetch(myid, p2), p1)
-end
-
-function test_worker_counts()
-    # check if the nprocs/nworkers/workers are the same on the remaining workers
-    np=nprocs()
-    nw=nworkers()
-    ws=sort(workers())
-
-    for p in workers()
-        @test (true, true, true) == remotecall_fetch(p, np, nw, ws) do x,y,z
-            (x==nprocs(), y==nworkers(), z==sort(workers()))
-        end
-    end
-end
-
-function remove_workers_and_test()
-    while nworkers() > 0
-        rmprocs(workers()[1])
-        test_worker_counts()
-        if nworkers() == nprocs()
-            break
-        end
-    end
-end
-
-remove_workers_and_test()
-
-# connect even pids to other even pids, odd to odd.
-mutable struct TopoTestManager <: ClusterManager
-    np::Integer
-end
-
-function launch(manager::TopoTestManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) --worker`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    for i in 1:manager.np
-        io = open(cmd, "r+")
-        Distributed.write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.ident = i
-        wconfig.connect_idents = Vector(i+2:2:manager.np)
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-const map_pid_ident=Dict()
-function manage(manager::TopoTestManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :register
-        map_pid_ident[id] = config.ident
-    elseif op === :interrupt
-        kill(config.process, 2)
-    end
-end
-
-addprocs_with_testenv(TopoTestManager(8); topology="custom")
-
-while true
-    if any(x->get(map_pid_ident, x, 0)==0, workers())
-        yield()
-    else
-        break
-    end
-end
-
-let p1, p2
-for p1 in workers()
-    for p2 in workers()
-        i1 = map_pid_ident[p1]
-        i2 = map_pid_ident[p2]
-        if (iseven(i1) && iseven(i2)) || (isodd(i1) && isodd(i2))
-            @test p2 == remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        else
-            @test_throws RemoteException remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        end
-    end
-end
-end
-
-remove_workers_and_test()
-
-# test `lazy` connection setup
-function def_count_conn()
-    @everywhere function count_connected_workers()
-        count(x -> isa(x, Distributed.Worker) && isdefined(x, :r_stream) && isopen(x.r_stream),
-                Distributed.PGRP.workers)
-    end
-end
-
-addprocs_with_testenv(8)
-def_count_conn()
-
-# Test for 10 random combinations
-wl = workers()
-combinations = []
-while length(combinations) < 10
-    from = rand(wl)
-    to = rand(wl)
-    if from == to || ((from,to) in combinations) || ((to,from) in combinations)
-        continue
-    else
-        push!(combinations, (from,to))
-    end
-end
-
-# Initially only master-worker connections ought to be setup
-expected_num_conns = 8
-let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-    @test num_conns == expected_num_conns
-end
-
-for (i, (from,to)) in enumerate(combinations)
-    remotecall_wait(topid->remotecall_fetch(myid, topid), from, to)
-    global expected_num_conns += 2    # one connection endpoint on both from and to
-    let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-        @test num_conns == expected_num_conns
-    end
-end
-
-# With lazy=false, all connections ought to be setup during `addprocs`
-rmprocs(workers())
-addprocs_with_testenv(8; lazy=false)
-def_count_conn()
-@test sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers())) == 64
-
-# Cannot add more workers with a different `lazy` value
-@test_throws ArgumentError addprocs_with_testenv(1; lazy=true)
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index c6db08779e947..40004d8337091 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = f97c72fbd726e208a04c53791b35cc34c747569f
+DOWNLOADS_SHA1 = e692e77fb5427bf3c6e81514b323c39a88217eec
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/Project.toml b/stdlib/FileWatching/Project.toml
index 1da637fd4259d..5edcfdadd085d 100644
--- a/stdlib/FileWatching/Project.toml
+++ b/stdlib/FileWatching/Project.toml
@@ -1,5 +1,6 @@
 name = "FileWatching"
 uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index a420d49232345..15d4e39a45117 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -1,11 +1,21 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/FileWatching/docs/src/index.md"
+```
+
 # [File Events](@id lib-filewatching)
 
 ```@docs
-FileWatching.poll_fd
-FileWatching.poll_file
-FileWatching.watch_file
-FileWatching.watch_folder
-FileWatching.unwatch_folder
+poll_fd
+poll_file
+watch_file
+watch_folder
+unwatch_folder
+```
+```@docs
+FileMonitor
+FolderMonitor
+PollingFileWatcher
+FDWatcher
 ```
 
 # Pidfile
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 2a654547ae6e3..7c743ce634193 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -6,7 +6,7 @@ Utilities for monitoring files and file descriptors for events.
 module FileWatching
 
 export
-    # one-shot API (returns results):
+    # one-shot API (returns results, race-y):
     watch_file, # efficient for small numbers of files
     watch_folder, # efficient for large numbers of files
     unwatch_folder,
@@ -22,11 +22,11 @@ export
     trymkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
-    _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
-    iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
-    |, getproperty, propertynames
-import Base.Filesystem.StatStruct
+    uv_req_data, uv_req_set_data, associate_julia_struct, disassociate_julia_struct,
+    _sizeof_uv_poll, _sizeof_uv_fs, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
+    iolock_begin, iolock_end, preserve_handle, unpreserve_handle,
+    isreadable, iswritable, isopen, |, getproperty, propertynames
+import Base.Filesystem: StatStruct, uv_fs_req_cleanup
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
@@ -38,13 +38,13 @@ const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
-    timedout::Bool
+    timedout::Bool # aka canceled
     FileEvent(r::Bool, c::Bool, t::Bool) = new(r, c, t)
 end
 FileEvent() = FileEvent(false, false, true)
 FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
                                       (flags & UV_CHANGE) != 0,
-                                      false)
+                                      iszero(flags))
 |(a::FileEvent, b::FileEvent) =
     FileEvent(a.renamed | b.renamed,
               a.changed | b.changed,
@@ -78,34 +78,183 @@ isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
 |(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
+# Callback functions
+
+function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FileMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            t.ioerrno = status
+            notify_error(t.notify, _UVError("FileMonitor", status))
+            uvfinalize(t)
+        elseif events != t.events
+            events = t.events |= events
+            notify(t.notify, all=false)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FolderMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FolderMonitor", status))
+        else
+            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
+            push!(t.channel, fname => FileEvent(events))
+            notify(t.notify)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
+    t = @handle_as handle _FDWatcher
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FDWatcher", status))
+        else
+            t.events |= events
+            if t.active[1] || t.active[2]
+                if isempty(t.notify)
+                    # if we keep hearing about events when nobody appears to be listening,
+                    # stop the poll to save cycles
+                    t.active = (false, false)
+                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
+                end
+            end
+            notify(t.notify, events)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fspollcb(req::Ptr{Cvoid})
+    pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher
+    pfw.active = false
+    unpreserve_handle(pfw)
+    @assert pointer(pfw.stat_req) == req
+    r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req))
+    statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req)
+    curr_stat = StatStruct(pfw.file, statbuf, r)
+    uv_fs_req_cleanup(req)
+    lock(pfw.notify)
+    try
+        if !isempty(pfw.notify) # must discard the update if nobody watching
+            if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat)
+                if r == 0
+                    pfw.prev_stat = curr_stat
+                end
+                pfw.ioerrno = r
+                notify(pfw.notify, true)
+            end
+            pfw.timer = Timer(pfw.interval) do t
+                # async task
+                iolock_begin()
+                lock(pfw.notify)
+                try
+                    if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock
+                        pfw.timer = nothing
+                        @assert !pfw.active
+                        if isopen(pfw) && !isempty(pfw.notify)
+                            preserve_handle(pfw)
+                            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+                            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+                            err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM
+                            pfw.active = true
+                        end
+                    end
+                finally
+                    unlock(pfw.notify)
+                end
+                iolock_end()
+                nothing
+            end
+        end
+    finally
+        unlock(pfw.notify)
+    end
+    nothing
+end
+
+# Types
+
+"""
+    FileMonitor(path::AbstractString)
+
+Watch file or directory `path` (which must exist) for changes until a change occurs. This
+function does not poll the file system and instead uses platform-specific functionality to
+receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+`fm = FileMonitor(path)` acts like an auto-reset Event, so `wait(fm)` blocks until there has
+been at least one event in the file originally at the given path and then returns an object
+with boolean fields `renamed`, `changed`, `timedout` summarizing all changes that have
+occurred since the last call to `wait` returned.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FileMonitor
     @atomic handle::Ptr{Cvoid}
-    file::String
-    notify::Base.ThreadSynchronizer
-    events::Int32
-    active::Bool
+    const file::String
+    const notify::Base.ThreadSynchronizer
+    events::Int32 # accumulator for events that occurred since the last wait call, similar to Event with autoreset
+    ioerrno::Int32 # record the error, if any occurs (unlikely)
     FileMonitor(file::AbstractString) = FileMonitor(String(file))
     function FileMonitor(file::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, file, Base.ThreadSynchronizer(), 0, false)
+        this = new(handle, file, Base.ThreadSynchronizer(), 0, 0)
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
         if err != 0
             Libc.free(handle)
-            throw(_UVError("FileMonitor", err))
+            uv_error("FileMonitor", err)
         end
-        iolock_end()
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
+        uv_error("FileMonitor (start)",
+                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
+                       this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0))
+        iolock_end()
         return this
     end
 end
 
+
+"""
+    FolderMonitor(folder::AbstractString)
+
+Watch a file or directory `path` for changes until a change has occurred. This function does
+not poll the file system and instead uses platform-specific functionality to receive
+notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+This acts similar to a Channel, so calling `take!` (or `wait`) blocks until some change has
+occurred. The `wait` function will return a pair where the first field is the name of the
+changed file (if available) and the second field is an object with boolean fields `renamed`
+and `changed`, giving the event that occurred on it.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FolderMonitor
     @atomic handle::Ptr{Cvoid}
     # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    notify::Base.ThreadSynchronizer
-    channel::Vector{Any} # eltype = Pair{String, FileEvent}
+    const notify::Base.ThreadSynchronizer
+    const channel::Vector{Any} # eltype = Pair{String, FileEvent}
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
@@ -118,6 +267,7 @@ mutable struct FolderMonitor
             throw(_UVError("FolderMonitor", err))
         end
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
                        handle, uv_jl_fseventscb_folder::Ptr{Cvoid}, folder, 0))
@@ -126,36 +276,55 @@ mutable struct FolderMonitor
     end
 end
 
+# this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose
+# https://github.com/libuv/libuv/issues/4543
+"""
+    PollingFileWatcher(path::AbstractString, interval_s::Real=5.007)
+
+Monitor a file for changes by polling `stat` every `interval_s` seconds until a change
+occurs or `timeout_s` seconds have elapsed. The `interval_s` should be a long period; the
+default is 5.007 seconds. Call `stat` on it to get the most recent, but old, result.
+
+This acts like an auto-reset Event, so calling `wait` blocks until the `stat` result has
+changed since the previous value captured upon entry to the `wait` call. The `wait` function
+will return a pair of status objects `(previous, current)` once any `stat` change is
+detected since the previous time that `wait` was called. The `previous` status is always a
+`StatStruct`, but it may have all of the fields zeroed (indicating the file didn't
+previously exist, or wasn't previously accessible).
+
+The `current` status object may be a `StatStruct`, an `EOFError` (if the wait is canceled by
+closing this object), or some other `Exception` subtype (if the `stat` operation failed: for
+example, if the path is removed). Note that `stat` value may be outdated if the file has
+changed again multiple times.
+
+Using [`FileMonitor`](@ref) for this operation is preferred, since it is more reliable and
+efficient, although in some situations it may not be available.
+"""
 mutable struct PollingFileWatcher
-    @atomic handle::Ptr{Cvoid}
     file::String
-    interval::UInt32
-    notify::Base.ThreadSynchronizer
-    active::Bool
-    curr_error::Int32
-    curr_stat::StatStruct
+    interval::Float64
+    const notify::Base.ThreadSynchronizer # lock protects all fields which can be changed (including interval and file, if you really must)
+    timer::Union{Nothing,Timer}
+    const stat_req::Memory{UInt8}
+    active::Bool # whether there is already an uv_fspollcb in-flight, so to speak
+    closed::Bool # whether the user has explicitly destroyed this
+    ioerrno::Int32 # the stat errno as of the last result
+    prev_stat::StatStruct # the stat as of the last successful result
     PollingFileWatcher(file::AbstractString, interval::Float64=5.007) = PollingFileWatcher(String(file), interval)
     function PollingFileWatcher(file::String, interval::Float64=5.007) # same default as nodejs
-        handle = Libc.malloc(_sizeof_uv_fs_poll)
-        this = new(handle, file, round(UInt32, interval * 1000), Base.ThreadSynchronizer(), false, 0, StatStruct())
-        associate_julia_struct(handle, this)
-        iolock_begin()
-        err = ccall(:uv_fs_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
-        if err != 0
-            Libc.free(handle)
-            throw(_UVError("PollingFileWatcher", err))
-        end
-        finalizer(uvfinalize, this)
-        iolock_end()
+        stat_req = Memory{UInt8}(undef, Int(_sizeof_uv_fs))
+        this = new(file, interval, Base.ThreadSynchronizer(), nothing, stat_req, false, false, 0, StatStruct())
+        uv_req_set_data(stat_req, this)
+        wait(this) # initialize with the current stat before return
         return this
     end
 end
 
 mutable struct _FDWatcher
     @atomic handle::Ptr{Cvoid}
-    fdnum::Int # this is NOT the file descriptor
+    const fdnum::Int # this is NOT the file descriptor
     refcount::Tuple{Int, Int}
-    notify::Base.ThreadSynchronizer
+    const notify::Base.ThreadSynchronizer
     events::Int32
     active::Tuple{Bool, Bool}
 
@@ -164,10 +333,13 @@ mutable struct _FDWatcher
         @static if Sys.isunix()
             _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
             function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
-                if !readable && !writable
+                fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+                if fdnum <= 0
+                    throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+                elseif !readable && !writable
                     throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
                 end
-                fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+
                 iolock_begin()
                 if fdnum > length(FDWatchers)
                     old_len = length(FDWatchers)
@@ -232,12 +404,19 @@ mutable struct _FDWatcher
     @static if Sys.iswindows()
         _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
+            fdnum = Core.Intrinsics.bitcast(Int32, fd) + 1
+            if fdnum <= 0
+                throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+            end
+
             handle = Libc._get_osfhandle(fd)
             return _FDWatcher(handle, readable, writable)
         end
         _FDWatcher(fd::WindowsRawSocket, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
-            if !readable && !writable
+            if fd == Base.INVALID_OS_HANDLE
+                throw(ArgumentError("Passed file descriptor fd=$(fd) is not a valid file descriptor"))
+            elseif !readable && !writable
                 throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
             end
 
@@ -264,9 +443,28 @@ mutable struct _FDWatcher
     end
 end
 
+"""
+    FDWatcher(fd::Union{RawFD,WindowsRawSocket}, readable::Bool, writable::Bool)
+
+Monitor a file descriptor `fd` for changes in the read or write availability.
+
+The keyword arguments determine which of read and/or write status should be monitored; at
+least one of them must be set to `true`.
+
+The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
+giving the result of the polling.
+
+This acts like a level-set event, so calling `wait` blocks until one of those conditions is
+met, but then continues to return without blocking until the condition is cleared (either
+there is no more to read, or no more space in the write buffer, or both).
+
+!!! warning
+    You must call `close` manually, when finished with this object, before the fd
+    argument is closed. Failure to do so risks serious crashes.
+"""
 mutable struct FDWatcher
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
-    watcher::_FDWatcher
+    const watcher::_FDWatcher
     mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
         return FDWatcher(fd, FDEvent(readable, writable, false, false))
@@ -317,7 +515,7 @@ function close(t::FDWatcher)
     close(t.watcher, mask)
 end
 
-function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function uvfinalize(uv::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if uv.handle != C_NULL
         disassociate_julia_struct(uv) # close (and free) without notify
@@ -326,7 +524,7 @@ function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
-function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function close(t::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if t.handle != C_NULL
         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
@@ -334,6 +532,21 @@ function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
+function close(pfw::PollingFileWatcher)
+    timer = nothing
+    lock(pfw.notify)
+    try
+        pfw.closed = true
+        notify(pfw.notify, false)
+        timer = pfw.timer
+        pfw.timer = nothing
+    finally
+        unlock(pfw.notify)
+    end
+    timer === nothing || close(timer)
+    nothing
+end
+
 function _uv_hook_close(uv::_FDWatcher)
     # fyi: jl_atexit_hook can cause this to get called too
     Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
@@ -341,24 +554,11 @@ function _uv_hook_close(uv::_FDWatcher)
     nothing
 end
 
-function _uv_hook_close(uv::PollingFileWatcher)
-    lock(uv.notify)
-    try
-        uv.active = false
-        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, StatStruct())
-    finally
-        unlock(uv.notify)
-    end
-    nothing
-end
-
 function _uv_hook_close(uv::FileMonitor)
     lock(uv.notify)
     try
-        uv.active = false
         Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, FileEvent())
+        notify(uv.notify)
     finally
         unlock(uv.notify)
     end
@@ -378,174 +578,11 @@ end
 
 isopen(fm::FileMonitor) = fm.handle != C_NULL
 isopen(fm::FolderMonitor) = fm.handle != C_NULL
-isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = !pfw.closed
 isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
 isopen(pfw::FDWatcher) = !pfw.mask.timedout
 
-function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FileMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FileMonitor", status))
-        else
-            t.events |= events
-            notify(t.notify, FileEvent(events))
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FolderMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FolderMonitor", status))
-        else
-            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
-            push!(t.channel, fname => FileEvent(events))
-            notify(t.notify)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
-    t = @handle_as handle _FDWatcher
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FDWatcher", status))
-        else
-            t.events |= events
-            if t.active[1] || t.active[2]
-                if isempty(t.notify)
-                    # if we keep hearing about events when nobody appears to be listening,
-                    # stop the poll to save cycles
-                    t.active = (false, false)
-                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
-                end
-            end
-            notify(t.notify, events)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr)
-    t = @handle_as handle PollingFileWatcher
-    old_status = t.curr_error
-    t.curr_error = status
-    if status == 0
-        t.curr_stat = StatStruct(convert(Ptr{UInt8}, curr))
-    end
-    if status == 0 || status != old_status
-        prev_stat = StatStruct(convert(Ptr{UInt8}, prev))
-        lock(t.notify)
-        try
-            notify(t.notify, prev_stat)
-        finally
-            unlock(t.notify)
-        end
-    end
-    nothing
-end
-
-function __init__()
-    global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
-    global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
-    global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-    global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-
-    Base.mkpidlock_hook = mkpidlock
-    Base.trymkpidlock_hook = trymkpidlock
-    Base.parse_pidfile_hook = Pidfile.parse_pidfile
-
-    nothing
-end
-
-function start_watching(t::_FDWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
-    readable = t.refcount[1] > 0
-    writable = t.refcount[2] > 0
-    if t.active[1] != readable || t.active[2] != writable
-        # make sure the READABLE / WRITEABLE state is updated
-        uv_error("FDWatcher (start)",
-                 ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
-                       t.handle,
-                       (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
-                       uv_jl_pollcb::Ptr{Cvoid}))
-        t.active = (readable, writable)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::PollingFileWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed"))
-    if !t.active
-        uv_error("PollingFileWatcher (start)",
-                 ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32),
-                       t.handle, uv_jl_fspollcb::Ptr{Cvoid}, t.file, t.interval))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::PollingFileWatcher)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("PollingFileWatcher (stop)",
-                     ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::FileMonitor)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed"))
-    if !t.active
-        uv_error("FileMonitor (start)",
-                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
-                       t.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, t.file, 0))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::FileMonitor)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("FileMonitor (stop)",
-                     ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
+Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat)
 
 # n.b. this _wait may return spuriously early with a timedout event
 function _wait(fdw::_FDWatcher, mask::FDEvent)
@@ -557,7 +594,20 @@ function _wait(fdw::_FDWatcher, mask::FDEvent)
         if !isopen(fdw) # !open
             throw(EOFError())
         elseif events.timedout
-            start_watching(fdw) # make sure the poll is active
+            fdw.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
+            # start_watching to make sure the poll is active
+            readable = fdw.refcount[1] > 0
+            writable = fdw.refcount[2] > 0
+            if fdw.active[1] != readable || fdw.active[2] != writable
+                # make sure the READABLE / WRITEABLE state is updated
+                uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
+                uv_error("FDWatcher (start)",
+                         ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
+                               fdw.handle,
+                               (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
+                               uv_jl_pollcb::Ptr{Cvoid}))
+                fdw.active = (readable, writable)
+            end
             iolock_end()
             return FDEvent(wait(fdw.notify)::Int32)
         else
@@ -625,52 +675,88 @@ end
 
 function wait(pfw::PollingFileWatcher)
     iolock_begin()
-    preserve_handle(pfw)
     lock(pfw.notify)
-    local prevstat
+    prevstat = pfw.prev_stat
+    havechange = false
+    timer = nothing
     try
-        start_watching(pfw)
+        # we aren't too strict about the first interval after `wait`, but rather always
+        # check right away to see if it had immediately changed again, and then repeatedly
+        # after interval again until success
+        pfw.closed && throw(ArgumentError("PollingFileWatcher is closed"))
+        timer = pfw.timer
+        pfw.timer = nothing # disable Timer callback
+        # start_watching
+        if !pfw.active
+            preserve_handle(pfw)
+            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+            err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM
+            pfw.active = true
+        end
         iolock_end()
-        prevstat = wait(pfw.notify)::StatStruct
+        havechange = wait(pfw.notify)::Bool
         unlock(pfw.notify)
         iolock_begin()
-        lock(pfw.notify)
-    finally
-        unlock(pfw.notify)
-        unpreserve_handle(pfw)
+    catch
+        # stop_watching: cleanup any timers from before or after starting this wait before it failed, if there are no other watchers
+        latetimer = nothing
+        try
+            if isempty(pfw.notify)
+                latetimer = pfw.timer
+                pfw.timer = nothing
+            end
+        finally
+            unlock(pfw.notify)
+        end
+        if timer !== nothing || latetimer !== nothing
+            iolock_end()
+            timer === nothing || close(timer)
+            latetimer === nothing || close(latetimer)
+            iolock_begin()
+        end
+        rethrow()
     end
-    stop_watching(pfw)
     iolock_end()
-    if pfw.handle == C_NULL
+    timer === nothing || close(timer) # cleanup resources so we don't hang on exit
+    if !havechange # user canceled by calling close
         return prevstat, EOFError()
-    elseif pfw.curr_error != 0
-        return prevstat, _UVError("PollingFileWatcher", pfw.curr_error)
+    end
+    # grab the most up-to-date stat result as of this time, even if it was a bit newer than
+    # the notify call (unlikely, as there would need to be a concurrent call to wait)
+    lock(pfw.notify)
+    currstat = pfw.prev_stat
+    ioerrno = pfw.ioerrno
+    unlock(pfw.notify)
+    if ioerrno == 0
+        @assert currstat.ioerrno == 0
+        return prevstat, currstat
+    elseif ioerrno in (Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)
+        return prevstat, StatStruct(pfw.file, Ptr{UInt8}(0), ioerrno)
     else
-        return prevstat, pfw.curr_stat
+        return prevstat, _UVError("PollingFileWatcher", ioerrno)
     end
 end
 
 function wait(m::FileMonitor)
-    iolock_begin()
+    m.handle == C_NULL && throw(EOFError())
     preserve_handle(m)
     lock(m.notify)
-    local events
     try
-        start_watching(m)
-        iolock_end()
-        events = wait(m.notify)::FileEvent
-        events |= FileEvent(m.events)
-        m.events = 0
-        unlock(m.notify)
-        iolock_begin()
-        lock(m.notify)
+        while true
+            m.handle == C_NULL && throw(EOFError())
+            events = @atomicswap :not_atomic m.events = 0
+            events == 0 || return FileEvent(events)
+            if m.ioerrno != 0
+                uv_error("FileMonitor", m.ioerrno)
+            end
+            wait(m.notify)
+        end
     finally
         unlock(m.notify)
         unpreserve_handle(m)
     end
-    stop_watching(m)
-    iolock_end()
-    return events
 end
 
 function wait(m::FolderMonitor)
@@ -689,6 +775,7 @@ function wait(m::FolderMonitor)
         end
     return evt::Pair{String, FileEvent}
 end
+Base.take!(m::FolderMonitor) = wait(m) # Channel-like API
 
 
 """
@@ -702,6 +789,10 @@ least one of them must be set to `true`.
 
 The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
 giving the result of the polling.
+
+This is a thin wrapper over calling `wait` on a [`FDWatcher`](@ref), which implements the
+functionality but requires the user to call `close` manually when finished with it, or risk
+serious crashes.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
     mask = FDEvent(readable, writable, false, false)
@@ -727,7 +818,7 @@ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}},
                     end
                 end
             catch ex
-                ex isa EOFError() || rethrow()
+                ex isa EOFError || rethrow()
                 return FDEvent()
             end
         else
@@ -759,6 +850,15 @@ giving the result of watching the file.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This is a thin wrapper over calling `wait` on a [`FileMonitor`](@ref). This function has a
+small race window between consecutive calls to `watch_file` where the file might change
+without being detected. To avoid this race, use
+
+    fm = FileMonitor(path)
+    wait(fm)
+
+directly, re-using the same `fm` each time you `wait`.
 """
 function watch_file(s::String, timeout_s::Float64=-1.0)
     fm = FileMonitor(s)
@@ -769,7 +869,12 @@ function watch_file(s::String, timeout_s::Float64=-1.0)
                 close(fm)
             end
         end
-        return wait(fm)
+        try
+            return wait(fm)
+        catch ex
+            ex isa EOFError && return FileEvent()
+            rethrow()
+        end
     finally
         close(fm)
         @isdefined(timer) && close(timer)
@@ -780,7 +885,7 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6
 """
     watch_folder(path::AbstractString, timeout_s::Real=-1)
 
-Watches a file or directory `path` for changes until a change has occurred or `timeout_s`
+Watch a file or directory `path` for changes until a change has occurred or `timeout_s`
 seconds have elapsed. This function does not poll the file system and instead uses platform-specific
 functionality to receive notifications from the operating system (e.g. via inotify on Linux).
 See the NodeJS documentation linked below for details.
@@ -794,10 +899,12 @@ giving the event.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This function is a thin wrapper over calling `wait` on a [`FolderMonitor`](@ref), with added timeout support.
 """
 watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s)
 function watch_folder(s::String, timeout_s::Real=-1)
-    fm = get!(watched_folders, s) do
+    fm = @lock watched_folders get!(watched_folders[], s) do
         return FolderMonitor(s)
     end
     local timer
@@ -844,12 +951,12 @@ It is not recommended to do this while another task is waiting for
 """
 unwatch_folder(s::AbstractString) = unwatch_folder(String(s))
 function unwatch_folder(s::String)
-    fm = pop!(watched_folders, s, nothing)
+    fm = @lock watched_folders pop!(watched_folders[], s, nothing)
     fm === nothing || close(fm)
     nothing
 end
 
-const watched_folders = Dict{String, FolderMonitor}()
+const watched_folders = Lockable(Dict{String, FolderMonitor}())
 
 """
     poll_file(path::AbstractString, interval_s::Real=5.007, timeout_s::Real=-1) -> (previous::StatStruct, current)
@@ -863,11 +970,15 @@ The `previous` status is always a `StatStruct`, but it may have all of the field
 (indicating the file didn't previously exist, or wasn't previously accessible).
 
 The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed),
-or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist).
+or some other `Exception` subtype (if the `stat` operation failed: for example, if the path does not exist).
 
-To determine when a file was modified, compare `current isa StatStruct && mtime(prev) != mtime(current)` to detect
-notification of changes. However, using [`watch_file`](@ref) for this operation is preferred, since
-it is more reliable and efficient, although in some situations it may not be available.
+To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect
+notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation
+is preferred, since it is more reliable and efficient, although in some situations it may not be available.
+
+This is a thin wrapper over calling `wait` on a [`PollingFileWatcher`](@ref), which implements
+the functionality, but this function has a small race window between consecutive calls to
+`poll_file` where the file might change without being detected.
 """
 function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1)
     pfw = PollingFileWatcher(s, Float64(interval_seconds))
@@ -878,12 +989,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
                 close(pfw)
             end
         end
-        statdiff = wait(pfw)
-        if isa(statdiff[2], IOError)
-            # file didn't initially exist, continue watching for it to be created (or the error to change)
-            statdiff = wait(pfw)
-        end
-        return statdiff
+        return wait(pfw)
     finally
         close(pfw)
         @isdefined(timer) && close(timer)
@@ -893,4 +999,11 @@ end
 include("pidfile.jl")
 import .Pidfile: mkpidlock, trymkpidlock
 
+function __init__()
+    Base.mkpidlock_hook = mkpidlock
+    Base.trymkpidlock_hook = trymkpidlock
+    Base.parse_pidfile_hook = Pidfile.parse_pidfile
+    nothing
+end
+
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 6d40414e20db2..6862aaa9f8453 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -4,20 +4,19 @@ module Pidfile
 export mkpidlock, trymkpidlock
 
 using Base:
-    IOError, UV_EEXIST, UV_ESRCH,
+    IOError, UV_EEXIST, UV_ESRCH, UV_ENOENT,
     Process
 
-using Base.Libc: rand
-
 using Base.Filesystem:
     File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL,
     rename, samefile, path_separator
 
-using ..FileWatching: watch_file
+using ..FileWatching: FileMonitor
 using Base.Sys: iswindows
 
 """
-    mkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+    mkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    mkpidlock(at::String, proc::Process; kwopts...)
 
 Create a pidfile lock for the path "at" for the current process
 or the process identified by pid or proc. Can take a function to execute once locked,
@@ -32,7 +31,8 @@ Optional keyword arguments:
  - `mode`: file access mode (modified by the process umask). Defaults to world-readable.
  - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
  - `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
-     The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
+     The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
+     Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
      By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
      estimated normal completion time.
  - `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
@@ -42,13 +42,13 @@ Optional keyword arguments:
 function mkpidlock end
 
 """
-    trymkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+    trymkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    trymkpidlock(at::String, proc::Process; kwopts...)
 
 Like `mkpidlock` except returns `false` instead of waiting if the file is already locked.
 
 !!! compat "Julia 1.10"
     This function requires at least Julia 1.10.
-
 """
 function trymkpidlock end
 
@@ -63,7 +63,7 @@ mutable struct LockMonitor
         atdir, atname = splitdir(at)
         isempty(atdir) && (atdir = pwd())
         at = realpath(atdir) * path_separator * atname
-        fd = open_exclusive(at; stale_age=stale_age, kwopts...)
+        fd = open_exclusive(at; stale_age, refresh, kwopts...)
         update = nothing
         try
             write_pidfile(fd, pid)
@@ -75,6 +75,7 @@ mutable struct LockMonitor
             lock = new(at, fd, update)
             finalizer(close, lock)
         catch ex
+            update === nothing || close(update)
             tryrmopenfile(at)
             close(fd)
             rethrow(ex)
@@ -98,10 +99,13 @@ end
 function mkpidlock(at::String, proc::Process; kwopts...)
     lock = mkpidlock(at, getpid(proc); kwopts...)
     closer = @async begin
-        wait(proc)
-        close(lock)
+        try
+            wait(proc)
+        finally
+            close(lock)
+        end
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(closer)
+    Base.errormonitor(closer)
     return lock
 end
 
@@ -184,15 +188,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
 end
 
 """
-    stale_pidfile(path::String, stale_age::Real) :: Bool
+    stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool
 
 Helper function for `open_exclusive` for deciding if a pidfile is stale.
 """
-function stale_pidfile(path::String, stale_age::Real)
+function stale_pidfile(path::String, stale_age::Real, refresh::Real)
     pid, hostname, age = parse_pidfile(path)
     age < -stale_age && @warn "filesystem time skew detected" path=path
+    longer_factor = refresh == 0 ? 25 : 5
     if age > stale_age
-        if (age > stale_age * 25) || !isvalidpid(hostname, pid)
+        if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
             return true
         end
     end
@@ -219,7 +224,7 @@ struct PidlockedError <: Exception
 end
 
 """
-    open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File
+    open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File
 
 Create a new a file for read-write advisory-exclusive access.
 If `wait` is `false` then error out if the lock files exist
@@ -231,13 +236,14 @@ function open_exclusive(path::String;
                         mode::Integer = 0o444 #= read-only =#,
                         poll_interval::Real = 10 #= seconds =#,
                         wait::Bool = true #= return on failure if false =#,
-                        stale_age::Real = 0 #= disabled =#)
+                        stale_age::Real = 0 #= disabled =#,
+                        refresh::Real = stale_age/2)
     # fast-path: just try to open it
     file = tryopen_exclusive(path, mode)
     file === nothing || return file
     if !wait
         if file === nothing && stale_age > 0
-            if stale_age > 0 && stale_pidfile(path, stale_age)
+            if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
                 @warn "attempting to remove probably stale pidfile" path=path
                 tryrmopenfile(path)
             end
@@ -250,20 +256,44 @@ function open_exclusive(path::String;
         end
     end
     # fall-back: wait for the lock
-
+    watch = Lockable(Core.Box(nothing))
     while true
-        # start the file-watcher prior to checking for the pidfile existence
-        t = @async try
-            watch_file(path, poll_interval)
+        # now try again to create it
+        # try to start the file-watcher prior to checking for the pidfile existence
+        watch = try
+            FileMonitor(path)
         catch ex
             isa(ex, IOError) || rethrow(ex)
-            sleep(poll_interval) # if the watch failed, convert to just doing a sleep
+            ex.code != UV_ENOENT # if the file was deleted in the meantime, don't sleep at all, even if the lock fails
         end
-        # now try again to create it
-        file = tryopen_exclusive(path, mode)
-        file === nothing || return file
-        Base.wait(t) # sleep for a bit before trying again
-        if stale_age > 0 && stale_pidfile(path, stale_age)
+        timeout = nothing
+        if watch isa FileMonitor && stale_age > 0
+            let watch = watch
+                timeout = Timer(stale_age) do t
+                    close(watch)
+                end
+            end
+        end
+        try
+            file = tryopen_exclusive(path, mode)
+            file === nothing || return file
+            if watch isa FileMonitor
+                try
+                    Base.wait(watch) # will time-out after stale_age passes
+                catch ex
+                    isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex)
+                end
+            end
+            if watch === true # if the watch failed, convert to just doing a sleep
+                sleep(poll_interval)
+            end
+        finally
+            # something changed about the path, so watch is now possibly monitoring the wrong file handle
+            # it will need to be recreated just before the next tryopen_exclusive attempt
+            timeout isa Timer && close(timeout)
+            watch isa FileMonitor && close(watch)
+        end
+        if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
             # if the file seems stale, try to remove it before attempting again
             # set stale_age to zero so we won't attempt again, even if the attempt fails
             stale_age -= stale_age
diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl
index c2cb0c88a1b1e..3464a24175632 100644
--- a/stdlib/FileWatching/test/pidfile.jl
+++ b/stdlib/FileWatching/test/pidfile.jl
@@ -203,18 +203,33 @@ end
 
 @assert !ispath("pidfile")
 @testset "open_exclusive: break lock" begin
-    # test for stale_age
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
-    try
-        write_pidfile(f, getpid())
-    finally
+    @testset "using stale_age without lock refreshing" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
         close(f)
+        @test 20 < t < 50
+        rm("pidfile")
+    end
+
+    @testset "using stale_age with lock refreshing on (default)" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
+        close(f)
+        @test 20 < t < 50
+        rm("pidfile")
     end
-    @test t < 2
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
-    close(f)
-    @test 20 < t < 50
-    rm("pidfile")
 
     t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
     close(f)
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 75b17b5f0e511..def555154264d 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, FileWatching
 using Base: uv_error, Experimental
+using Base.Filesystem: StatStruct
 
 @testset "FileWatching" begin
 
@@ -24,7 +25,7 @@ for i in 1:n
         uv_error("pipe", ccall(:uv_pipe, Cint, (Ptr{NTuple{2, Base.OS_HANDLE}}, Cint, Cint), Ref(pipe_fds, i), 0, 0))
     end
     Ctype = Sys.iswindows() ? Ptr{Cvoid} : Cint
-    FDmax = Sys.iswindows() ? 0x7fff : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
+    FDmax = Sys.iswindows() ? typemax(Int32) : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
     fd_in_limits =
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][1])) <= FDmax &&
         0 <= Int(Base.cconvert(Ctype, pipe_fds[i][2])) <= FDmax
@@ -161,19 +162,20 @@ test2_12992()
 #######################################################################
 # This section tests file watchers.                                   #
 #######################################################################
-F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple()  # platforms where F_GETPATH is available
+F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple() # platforms where F_GETPATH is available
 F_PATH = F_GETPATH ? "afile.txt" : ""
 dir = mktempdir()
 file = joinpath(dir, "afile.txt")
 
 # initialize a watch_folder instance and create afile.txt
 function test_init_afile()
-    @test isempty(FileWatching.watched_folders)
+    watched_folders = FileWatching.watched_folders
+    @test @lock watched_folders isempty(watched_folders[])
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test @elapsed(@test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))) <= 0.5
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     @test unwatch_folder(dir) === nothing
-    @test isempty(FileWatching.watched_folders)
+    @test @lock watched_folders isempty(watched_folders[])
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent())))
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) <= 0.5
     @test unwatch_folder(dir) === nothing
@@ -203,7 +205,7 @@ function test_init_afile()
     @test unwatch_folder(dir) === nothing
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test 0.9 <= @elapsed(@test(watch_folder(dir, 1) == ("" => FileWatching.FileEvent())))
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     nothing
 end
 
@@ -218,7 +220,7 @@ function test_timeout(tval)
         @async test_file_poll(channel, 10, tval)
         tr = take!(channel)
     end
-    @test tr[1] === Base.Filesystem.StatStruct() && tr[2] === EOFError()
+    @test ispath(tr[1]::StatStruct) && tr[2] === EOFError()
     @test tval <= t_elapsed
 end
 
@@ -231,7 +233,7 @@ function test_touch(slval)
     write(f, "Hello World\n")
     close(f)
     tr = take!(channel)
-    @test ispath(tr[1]) && ispath(tr[2])
+    @test ispath(tr[1]::StatStruct) && ispath(tr[2]::StatStruct)
     fetch(t)
 end
 
@@ -276,7 +278,7 @@ function test_dirmonitor_wait(tval)
             end
         end
         fname, events = wait(fm)::Pair
-        @test fname == F_PATH
+        @test fname == basename(file)
         @test events.changed && !events.timedout && !events.renamed
         close(fm)
     end
@@ -435,16 +437,21 @@ end
 @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT),
              watch_folder("____nonexistent_file", 10))
 @test(@elapsed(
-    @test(poll_file("____nonexistent_file", 1, 3.1) ===
-          (Base.Filesystem.StatStruct(), EOFError()))) > 3)
+    @test(poll_file("____nonexistent_file", 1, 3.1) ==
+          (StatStruct(), EOFError()))) > 3)
 
 unwatch_folder(dir)
-@test isempty(FileWatching.watched_folders)
+@test @lock FileWatching.watched_folders isempty(FileWatching.watched_folders[])
 rm(file)
 rm(dir)
 
+# Test that creating a FDWatcher with a (probably) negative FD fails
+@test_throws ArgumentError FDWatcher(RawFD(-1), true, true)
+
 @testset "Pidfile" begin
     include("pidfile.jl")
 end
 
+@test isempty(Docs.undocumented_names(FileWatching))
+
 end # testset
diff --git a/stdlib/Future/Project.toml b/stdlib/Future/Project.toml
index ffdbaf94b9853..c09489812ce01 100644
--- a/stdlib/Future/Project.toml
+++ b/stdlib/Future/Project.toml
@@ -1,5 +1,6 @@
 name = "Future"
 uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/Future/docs/src/index.md b/stdlib/Future/docs/src/index.md
index dcb1a36541b6e..99250296f2c7d 100644
--- a/stdlib/Future/docs/src/index.md
+++ b/stdlib/Future/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Future/docs/src/index.md"
+```
+
 # Future
 
 The `Future` module implements future behavior of already existing functions,
diff --git a/stdlib/Future/test/runtests.jl b/stdlib/Future/test/runtests.jl
index 6deffe74d891c..6e02f17358ab3 100644
--- a/stdlib/Future/test/runtests.jl
+++ b/stdlib/Future/test/runtests.jl
@@ -2,3 +2,7 @@
 
 using Test
 using Future
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Future))
+end
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
index 510b6f6a49c60..a31688d0a9c07 100644
--- a/stdlib/GMP_jll/Project.toml
+++ b/stdlib/GMP_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "GMP_jll"
 uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
-version = "6.2.1+2"
+version = "6.3.0+2"
 
 [deps]
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
index fde2fc15acf90..ae8b3c0b3e7d5 100644
--- a/stdlib/GMP_jll/src/GMP_jll.jl
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/GMP_jll.jl
 baremodule GMP_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/GMP_jll/test/runtests.jl b/stdlib/GMP_jll/test/runtests.jl
index 7c0d877945231..b2b35b98cbe17 100644
--- a/stdlib/GMP_jll/test/runtests.jl
+++ b/stdlib/GMP_jll/test/runtests.jl
@@ -4,5 +4,5 @@ using Test, Libdl, GMP_jll
 
 @testset "GMP_jll" begin
     vn = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
-    @test vn == v"6.2.1"
+    @test vn == v"6.3.0"
 end
diff --git a/stdlib/InteractiveUtils/Project.toml b/stdlib/InteractiveUtils/Project.toml
index e13902375e005..53cc9218eff5d 100644
--- a/stdlib/InteractiveUtils/Project.toml
+++ b/stdlib/InteractiveUtils/Project.toml
@@ -1,5 +1,6 @@
 name = "InteractiveUtils"
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
 
 [deps]
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index 5ee8e57adc848..69b68a27e4e81 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -1,6 +1,12 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/InteractiveUtils/docs/src/index.md"
+```
+
 # [Interactive Utilities](@id man-interactive-utils)
 
-This module is intended for interactive work. It is loaded automatically in [interactive mode](@ref command-line-interface).
+The `InteractiveUtils` module provides utilities for interactive use of Julia,
+such as code introspection and clipboard access.
+It is intended for interactive work and is loaded automatically in [interactive mode](@ref command-line-interface).
 
 ```@docs
 InteractiveUtils.apropos
@@ -27,5 +33,7 @@ InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
 InteractiveUtils.@time_imports
+InteractiveUtils.@trace_compile
+InteractiveUtils.@trace_dispatch
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 25f06250c3f8f..4a320282610cd 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -1,17 +1,23 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+The `InteractiveUtils` module provides utilities for interactive use of Julia,
+such as code introspection and clipboard access.
+It is intended for interactive work and is loaded automatically in interactive mode.
+"""
 module InteractiveUtils
 
 Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard, @trace_compile, @trace_dispatch,
+    @activate
 
 import Base.Docs.apropos
 
-using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquoted, summarysize,
-    signature_type, format_bytes
+using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, summarysize,
+    signature_type, format_bytes, isbindingresolved
 using Base.Libc
 using Markdown
 
@@ -23,12 +29,12 @@ include("clipboard.jl")
 """
     varinfo(m::Module=Main, pattern::Regex=r""; all=false, imported=false, recursive=false, sortby::Symbol=:name, minsize::Int=0)
 
-Return a markdown table giving information about exported global variables in a module, optionally restricted
+Return a markdown table giving information about public global variables in a module, optionally restricted
 to those matching `pattern`.
 
 The memory consumption estimate is an approximate lower bound on the size of the internal structure of the object.
 
-- `all` : also list non-exported objects defined in the module, deprecated objects, and compiler-generated objects.
+- `all` : also list non-public objects defined in the module, deprecated objects, and compiler-generated objects.
 - `imported` : also list objects explicitly imported from other modules.
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
@@ -99,8 +105,25 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     if !isempty(Base.GIT_VERSION_INFO.commit_short)
         println(io, "Commit $(Base.GIT_VERSION_INFO.commit_short) ($(Base.GIT_VERSION_INFO.date_string))")
     end
-    if Base.isdebugbuild()
-        println(io, "DEBUG build")
+    official_release = Base.TAGGED_RELEASE_BANNER == "Official https://julialang.org release"
+    if Base.isdebugbuild() || !isempty(Base.TAGGED_RELEASE_BANNER) || (Base.GIT_VERSION_INFO.tagged_commit && !official_release)
+        println(io, "Build Info:")
+        if Base.isdebugbuild()
+            println(io, "  DEBUG build")
+        end
+        if !isempty(Base.TAGGED_RELEASE_BANNER)
+            println(io, "  ", Base.TAGGED_RELEASE_BANNER)
+        end
+        if Base.GIT_VERSION_INFO.tagged_commit && !official_release
+            println(io,
+                """
+
+                    Note: This is an unofficial build, please report bugs to the project
+                    responsible for this build and not to the Julia project unless you can
+                    reproduce the issue using official builds available at https://julialang.org/downloads
+                """
+            )
+        end
     end
     println(io, "Platform Info:")
     println(io, "  OS: ", Sys.iswindows() ? "Windows" : Sys.isapple() ?
@@ -142,9 +165,10 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
         println(io)
     end
     println(io, "  WORD_SIZE: ", Sys.WORD_SIZE)
-    println(io, "  LIBM: ",Base.libm_name)
     println(io, "  LLVM: libLLVM-",Base.libllvm_version," (", Sys.JIT, ", ", Sys.CPU_NAME, ")")
-    println(io, "  Threads: ", Threads.maxthreadid(), " on ", Sys.CPU_THREADS, " virtual cores")
+    println(io, "  GC: ", unsafe_string(ccall(:jl_gc_active_impl, Ptr{UInt8}, ())))
+    println(io, """Threads: $(Threads.nthreads(:default)) default, $(Threads.nthreads(:interactive)) interactive, \
+      $(Threads.ngcthreads()) GC (on $(Sys.CPU_THREADS) virtual cores)""")
 
     function is_nonverbose_env(k::String)
         return occursin(r"^JULIA_|^DYLD_|^LD_", k)
@@ -185,6 +209,8 @@ The optional second argument restricts the search to a particular module or func
 
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
+
+See also: [`methods`](@ref).
 """
 function methodswith(@nospecialize(t::Type), @nospecialize(f::Base.Callable), meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
@@ -238,7 +264,7 @@ function _subtypes_in!(mods::Array, x::Type)
         m = pop!(mods)
         xt = xt::DataType
         for s in names(m, all = true)
-            if isdefined(m, s) && !isdeprecated(m, s)
+            if isbindingresolved(m, s) && !isdeprecated(m, s) && isdefined(m, s)
                 t = getfield(m, s)
                 dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
                 if isa(dt, DataType)
@@ -314,7 +340,7 @@ export peakflops
 function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
     # Base.depwarn("`peakflops` has moved to the LinearAlgebra module, " *
     #              "add `using LinearAlgebra` to your imports.", :peakflops)
-    let LinearAlgebra = Base.require(Base.PkgId(
+    let LinearAlgebra = Base.require_stdlib(Base.PkgId(
             Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra"))
         return LinearAlgebra.peakflops(n, eltype=eltype, ntrials=ntrials, parallel=parallel)
     end
@@ -329,14 +355,15 @@ function report_bug(kind)
     if Base.locate_package(BugReportingId) === nothing
         @info "Package `BugReporting` not found - attempting temporary installation"
         # Create a temporary environment and add BugReporting
-        let Pkg = Base.require(Base.PkgId(
+        let Pkg = Base.require_stdlib(Base.PkgId(
             Base.UUID((0x44cfe95a_1eb2_52ea,0xb672_e2afdf69b78f)), "Pkg"))
             mktempdir() do tmp
                 old_load_path = copy(LOAD_PATH)
                 push!(empty!(LOAD_PATH), joinpath(tmp, "Project.toml"))
                 old_active_project = Base.ACTIVE_PROJECT[]
                 Base.ACTIVE_PROJECT[] = nothing
-                Pkg.add(Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid))
+                pkgspec = @invokelatest Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid)
+                @invokelatest Pkg.add(pkgspec)
                 BugReporting = Base.require(BugReportingId)
                 append!(empty!(LOAD_PATH), old_load_path)
                 Base.ACTIVE_PROJECT[] = old_active_project
@@ -345,7 +372,7 @@ function report_bug(kind)
     else
         BugReporting = Base.require(BugReportingId)
     end
-    return Base.invokelatest(BugReporting.make_interactive_report, kind, ARGS)
+    return @invokelatest BugReporting.make_interactive_report(kind, ARGS)
 end
 
 end
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index c2abda9a60cc3..6bcd61584a2b8 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -100,7 +100,7 @@ elseif Sys.iswindows()
         pdata == C_NULL && return cleanup(:GlobalAlloc)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         plock == C_NULL && return cleanup(:GlobalLock)
-        GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, x_u16), sizeof(x_u16))
+        GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, Base.cconvert(Ptr{UInt16}, x_u16)), sizeof(x_u16))
         unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata)
         (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail
         pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index 646028575d052..1aa83a19285ff 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -54,15 +54,84 @@ function is_expected_union(u::Union)
     return true
 end
 
+function print_warntype_codeinfo(io::IO, src::Core.CodeInfo, @nospecialize(rettype), nargs::Int; lineprinter, label_dynamic_calls)
+    if src.slotnames !== nothing
+        slotnames = Base.sourceinfo_slotnames(src)
+        io = IOContext(io, :SOURCE_SLOTNAMES => slotnames)
+        slottypes = src.slottypes
+        nargs > 0 && println(io, "Arguments")
+        for i = 1:length(slotnames)
+            if i == nargs + 1
+                println(io, "Locals")
+            end
+            print(io, "  ", slotnames[i])
+            if isa(slottypes, Vector{Any})
+                warntype_type_printer(io; type=slottypes[i], used=true)
+            end
+            println(io)
+        end
+    end
+    print(io, "Body")
+    warntype_type_printer(io; type=rettype, used=true)
+    println(io)
+    irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer; label_dynamic_calls)
+    Base.IRShow.show_ir(io, src, irshow_config)
+    println(io)
+end
+
+function print_warntype_mi(io::IO, mi::Core.MethodInstance)
+    println(io, mi)
+    print(io, "  from ")
+    println(io, mi.def)
+    if !isempty(mi.sparam_vals)
+        println(io, "Static Parameters")
+        sig = mi.def.sig
+        warn_color = Base.warn_color() # more mild user notification
+        for i = 1:length(mi.sparam_vals)
+            sig = sig::UnionAll
+            name = sig.var.name
+            val = mi.sparam_vals[i]
+            print_highlighted(io::IO, v::String, color::Symbol) =
+                if highlighting[:warntype]
+                    Base.printstyled(io, v; color)
+                else
+                    Base.print(io, v)
+                end
+            if val isa TypeVar
+                if val.lb === Union{}
+                    print(io, "  ", name, " <: ")
+                    print_highlighted(io, "$(val.ub)", warn_color)
+                elseif val.ub === Any
+                    print(io, "  ", sig.var.name, " >: ")
+                    print_highlighted(io, "$(val.lb)", warn_color)
+                else
+                    print(io, "  ")
+                    print_highlighted(io, "$(val.lb)", warn_color)
+                    print(io, " <: ", sig.var.name, " <: ")
+                    print_highlighted(io, "$(val.ub)", warn_color)
+                end
+            elseif val isa typeof(Vararg)
+                print(io, "  ", name, "::")
+                print_highlighted(io, "Int", warn_color)
+            else
+                print(io, "  ", sig.var.name, " = ")
+                print_highlighted(io, "$(val)", :cyan) # show the "good" type
+            end
+            println(io)
+            sig = sig.body
+        end
+    end
+end
+
 """
     code_warntype([io::IO], f, types; debuginfo=:default)
 
 Prints lowered and type-inferred ASTs for the methods matching the given generic function
 and type signature to `io` which defaults to `stdout`. The ASTs are annotated in such a way
-as to cause "non-leaf" types which may be problematic for performance to be emphasized
+as to cause non-concrete types which may be problematic for performance to be emphasized
 (if color is available, displayed in red). This serves as a warning of potential type instability.
 
-Not all non-leaf types are particularly problematic for performance, and the performance
+Not all non-concrete types are particularly problematic for performance, and the performance
 characteristics of a particular type is an implementation detail of the compiler.
 `code_warntype` will err on the side of coloring types red if they might be a performance
 concern, so some types may be colored red even if they do not impact performance.
@@ -70,94 +139,46 @@ Small unions of concrete types are usually not a concern, so these are highlight
 
 Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to specify the verbosity of code comments.
 
-See [`@code_warntype`](@ref man-code-warntype) for more information.
+See the [`@code_warntype`](@ref man-code-warntype) section in the Performance Tips page of the manual for more information.
+
+See also: [`@code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref), [`code_native`](@ref).
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
+function code_warntype(io::IO, @nospecialize(f), @nospecialize(tt=Base.default_tt(f));
+                       world=Base.get_world_counter(),
+                       interp::Base.Compiler.AbstractInterpreter=Base.Compiler.NativeInterpreter(world),
                        debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     debuginfo = Base.IRShow.debuginfo(debuginfo)
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
-    for (src, rettype) in code_typed(f, t; optimize, kwargs...)
-        if !(src isa Core.CodeInfo)
-            println(io, src)
-            println(io, "  failed to infer")
-            continue
-        end
-        lambda_io::IOContext = io
-        p = src.parent
-        nargs::Int = 0
-        if p isa Core.MethodInstance
-            println(io, p)
-            print(io, "  from ")
-            println(io, p.def)
-            p.def isa Method && (nargs = p.def.nargs)
-            if !isempty(p.sparam_vals)
-                println(io, "Static Parameters")
-                sig = p.def.sig
-                warn_color = Base.warn_color() # more mild user notification
-                for i = 1:length(p.sparam_vals)
-                    sig = sig::UnionAll
-                    name = sig.var.name
-                    val = p.sparam_vals[i]
-                    print_highlighted(io::IO, v::String, color::Symbol) =
-                        if highlighting[:warntype]
-                            Base.printstyled(io, v; color)
-                        else
-                            Base.print(io, v)
-                        end
-                    if val isa TypeVar
-                        if val.lb === Union{}
-                            print(io, "  ", name, " <: ")
-                            print_highlighted(io, "$(val.ub)", warn_color)
-                        elseif val.ub === Any
-                            print(io, "  ", sig.var.name, " >: ")
-                            print_highlighted(io, "$(val.lb)", warn_color)
-                        else
-                            print(io, "  ")
-                            print_highlighted(io, "$(val.lb)", warn_color)
-                            print(io, " <: ", sig.var.name, " <: ")
-                            print_highlighted(io, "$(val.ub)", warn_color)
-                        end
-                    elseif val isa typeof(Vararg)
-                        print(io, "  ", name, "::")
-                        print_highlighted(io, "Int", warn_color)
-                    else
-                        print(io, "  ", sig.var.name, " = ")
-                        print_highlighted(io, "$(val)", :cyan) # show the "good" type
-                    end
-                    println(io)
-                    sig = sig.body
-                end
-            end
-        end
-        if src.slotnames !== nothing
-            slotnames = Base.sourceinfo_slotnames(src)
-            lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => slotnames)
-            slottypes = src.slottypes
-            nargs > 0 && println(io, "Arguments")
-            for i = 1:length(slotnames)
-                if i == nargs + 1
-                    println(io, "Locals")
-                end
-                print(io, "  ", slotnames[i])
-                if isa(slottypes, Vector{Any})
-                    warntype_type_printer(io; type=slottypes[i], used=true)
-                end
-                println(io)
-            end
+    nargs::Int = 0
+    if isa(f, Core.OpaqueClosure)
+        isa(f.source, Method) && (nargs = f.source.nargs)
+        print_warntype_codeinfo(io, Base.code_typed_opaque_closure(f, tt)[1]..., nargs;
+                                lineprinter, label_dynamic_calls = optimize)
+        return nothing
+    end
+    tt = Base.signature_type(f, tt)
+    matches = findall(tt, Base.Compiler.method_table(interp))
+    matches === nothing && Base.raise_match_failure(:code_warntype, tt)
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        src = Base.Compiler.typeinf_code(interp, match, optimize)
+        mi = Base.Compiler.specialize_method(match)
+        mi.def isa Method && (nargs = (mi.def::Method).nargs)
+        print_warntype_mi(io, mi)
+        if src isa Core.CodeInfo
+            print_warntype_codeinfo(io, src, src.rettype, nargs;
+                                    lineprinter, label_dynamic_calls = optimize)
+        else
+            println(io, "  inference not successful")
         end
-        print(io, "Body")
-        warntype_type_printer(io; type=rettype, used=true)
-        println(io)
-        irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer)
-        Base.IRShow.show_ir(lambda_io, src, irshow_config)
-        println(io)
     end
     nothing
 end
-code_warntype(@nospecialize(f), @nospecialize(t=Base.default_tt(f)); kwargs...) =
-    code_warntype(stdout, f, t; kwargs...)
+code_warntype(args...; kwargs...) = (@nospecialize; code_warntype(stdout, args...; kwargs...))
 
-import Base.CodegenParams
+using Base: CodegenParams
 
 const GENERIC_SIG_WARNING = "; WARNING: This code may not match what actually runs.\n"
 const OC_MISMATCH_WARNING =
@@ -170,15 +191,8 @@ const OC_MISMATCH_WARNING =
 
 function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
                         raw::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol, binary::Bool)
-        params = CodegenParams(debug_info_kind=Cint(0),
-                               safepoint_on_entry=raw, gcstack_arg=raw)
-        _dump_function(f, t, native, wrapper, raw, dump_module, syntax,
-                       optimize, debuginfo, binary, params)
-end
-function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
-                        raw::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+                        optimize::Bool, debuginfo::Symbol, binary::Bool,
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
@@ -188,20 +202,20 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
     if !isa(f, Core.OpaqueClosure)
         world = Base.get_world_counter()
         match = Base._which(signature_type(f, t); world)
-        mi = Core.Compiler.specialize_method(match)
+        mi = Base.specialize_method(match)
         # TODO: use jl_is_cacheable_sig instead of isdispatchtuple
         isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
     else
         world = UInt64(f.world)
-        if Core.Compiler.is_source_inferred(f.source.source)
+        tt = Base.to_tuple_type(t)
+        if !isdefined(f.source, :source)
             # OC was constructed from inferred source. There's only one
             # specialization and we can't infer anything more precise either.
             world = f.source.primary_world
             mi = f.source.specializations::Core.MethodInstance
-            Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING)
+            Base.hasintersect(typeof(f).parameters[1], tt) || (warning = OC_MISMATCH_WARNING)
         else
-            mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec())
-            actual = isdispatchtuple(mi.specTypes)
+            mi = Base.specialize_method(f.source, Tuple{typeof(f.captures), tt.parameters...}, Core.svec())
             isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
         end
     end
@@ -215,15 +229,29 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
         if syntax !== :att && syntax !== :intel
             throw(ArgumentError("'syntax' must be either :intel or :att"))
         end
-        if dump_module
-            # we want module metadata, so use LLVM to generate assembly output
-            str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params)
-        else
-            # if we don't want the module metadata, just disassemble what our JIT has
+        str = ""
+        if !dump_module
+            # if we don't want the module metadata, attempt to disassemble what our JIT has
             str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary)
         end
+        if isempty(str)
+            # if that failed (or we want metadata), use LLVM to generate more accurate assembly output
+            if !isa(f, Core.OpaqueClosure)
+                src = Base.Compiler.typeinf_code(Base.Compiler.NativeInterpreter(world), mi, true)
+            else
+                src, rt = Base.get_oc_code_rt(nothing, f, tt, true)
+            end
+            src isa Core.CodeInfo || error("failed to infer source for $mi")
+            str = _dump_function_native_assembly(mi, src, wrapper, syntax, debuginfo, binary, raw, params)
+        end
     else
-        str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params)
+        if !isa(f, Core.OpaqueClosure)
+            src = Base.Compiler.typeinf_code(Base.Compiler.NativeInterpreter(world), mi, true)
+        else
+            src, rt = Base.get_oc_code_rt(nothing, f, tt, true)
+        end
+        src isa Core.CodeInfo || error("failed to infer source for $mi")
+        str = _dump_function_llvm(mi, src, wrapper, !raw, dump_module, optimize, debuginfo, params)
     end
     str = warning * str
     return str
@@ -243,11 +271,11 @@ struct LLVMFDump
     f::Ptr{Cvoid} # opaque
 end
 
-function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
+function _dump_function_native_assembly(mi::Core.MethodInstance, src::Core.CodeInfo,
                                         wrapper::Bool, syntax::Symbol, debuginfo::Symbol,
                                         binary::Bool, raw::Bool, params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any, wrapper::Bool,
                              true::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool,
@@ -257,12 +285,12 @@ function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
 end
 
 function _dump_function_llvm(
-        mi::Core.MethodInstance, world::UInt, wrapper::Bool,
+        mi::Core.MethodInstance, src::Core.CodeInfo, wrapper::Bool,
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt,
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, src::Any,
                              wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
     str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool,
@@ -280,20 +308,20 @@ If the `optimize` keyword is unset, the code will be shown before LLVM optimizat
 All metadata and dbg.* calls are removed from the printed bitcode. For the full IR, set the `raw` keyword to true.
 To dump the entire module that encapsulates the function (with declarations), set the `dump_module` keyword to true.
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
+
+See also: [`@code_llvm`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_native`](@ref).
 """
-function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
-                   dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
-    d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false)
+function code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                   raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default,
+                   params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false, params)
     if highlighting[:llvm] && get(io, :color, false)::Bool
         print_llvm(io, d)
     else
         print(io, d)
     end
 end
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
-    code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
-code_llvm(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
+code_llvm(args...; kwargs...) = (@nospecialize; code_llvm(stdout, args...; kwargs...))
 
 """
     code_native([io=stdout,], f, types; syntax=:intel, debuginfo=:default, binary=false, dump_module=true)
@@ -307,21 +335,20 @@ generic function and type signature to `io`.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
 * If `raw` is `false`, uninteresting instructions (like the safepoint function prologue) are elided.
 
-See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref)
+See also: [`@code_native`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref).
 """
 function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
                      dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false,
-                     debuginfo::Symbol=:default, binary::Bool=false)
-    d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary)
+                     debuginfo::Symbol=:default, binary::Bool=false,
+                     params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary, params)
     if highlighting[:native] && get(io, :color, false)::Bool
         print_native(io, d)
     else
         print(io, d)
     end
 end
-code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, debuginfo::Symbol=:default, binary::Bool=false) =
-    code_native(stdout, f, types; dump_module, syntax, raw, debuginfo, binary)
-code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
+code_native(args...; kwargs...) = (@nospecialize; code_native(stdout, args...; kwargs...))
 
 ## colorized IR and assembly printing
 
@@ -345,7 +372,7 @@ const llvm_types =
 const llvm_cond = r"^(?:[ou]?eq|[ou]?ne|[uso][gl][te]|ord|uno)$" # true|false
 
 function print_llvm_tokens(io, tokens)
-    m = match(r"^((?:[^\s:]+:)?)(\s*)(.*)", tokens)
+    m = match(r"^((?:[^\"\s:]+:|\"[^\"]*\":)?)(\s*)(.*)", tokens)
     if m !== nothing
         label, spaces, tokens = m.captures
         printstyled_ll(io, label, :label, spaces)
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 539e9b12f4071..6d1d75f1072ea 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -77,7 +77,7 @@ already work:
 - pycharm
 - bbedit
 
-# Example:
+# Examples
 
 The following defines the usage of terminal-based `emacs`:
 
@@ -223,7 +223,10 @@ Edit a file or directory optionally providing a line number to edit the file at.
 Return to the `julia` prompt when you quit the editor. The editor can be changed
 by setting `JULIA_EDITOR`, `VISUAL` or `EDITOR` as an environment variable.
 
-See also [`define_editor`](@ref).
+!!! compat "Julia 1.9"
+    The `column` argument requires at least Julia 1.9.
+
+See also [`InteractiveUtils.define_editor`](@ref).
 """
 function edit(path::AbstractString, line::Integer=0, column::Integer=0)
     path isa String || (path = convert(String, path))
@@ -255,7 +258,7 @@ method to edit. For modules, open the main source file. The module needs to be l
     `edit` on modules requires at least Julia 1.1.
 
 To ensure that the file can be opened at the given line, you may need to call
-`define_editor` first.
+`InteractiveUtils.define_editor` first.
 """
 function edit(@nospecialize f)
     ms = methods(f).ms
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 53242a422140b..68afc40976275 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -2,7 +2,12 @@
 
 # macro wrappers for various reflection functions
 
-import Base: typesof, insert!, replace_ref_begin_end!, infer_effects
+using Base: typesof, insert!, replace_ref_begin_end!,
+    infer_return_type, infer_exception_type, infer_effects, code_ircode
+
+# defined in Base so it's possible to time all imports, including InteractiveUtils and its deps
+# via. `Base.@time_imports` etc.
+import Base: @time_imports, @trace_compile, @trace_dispatch
 
 separate_kwargs(args...; kwargs...) = (args, values(kwargs))
 
@@ -36,6 +41,10 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
     if Meta.isexpr(ex0, :ref)
         ex0 = replace_ref_begin_end!(ex0)
     end
+    # assignments get bypassed: @edit a = f(x) <=> @edit f(x)
+    if isa(ex0, Expr) && ex0.head == :(=) && isa(ex0.args[1], Symbol) && isempty(kws)
+        return gen_call_with_extracted_types(__module__, fcn, ex0.args[2])
+    end
     if isa(ex0, Expr)
         if ex0.head === :do && Meta.isexpr(get(ex0.args, 1, nothing), :call)
             if length(ex0.args) != 2
@@ -102,6 +111,11 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
                        $(kws...))
             end
         elseif ex0.head === :call
+            if ex0.args[1] === :^ && length(ex0.args) >= 3 && isa(ex0.args[3], Int)
+                return Expr(:call, fcn, :(Base.literal_pow),
+                            Expr(:call, typesof, esc(ex0.args[1]), esc(ex0.args[2]),
+                                 esc(Val(ex0.args[3]))))
+            end
             return Expr(:call, fcn, esc(ex0.args[1]),
                         Expr(:call, typesof, map(esc, ex0.args[2:end])...),
                         kws...)
@@ -212,37 +226,19 @@ macro which(ex0::Symbol)
     return :(which($__module__, $ex0))
 end
 
-for fname in [:code_warntype, :code_llvm, :code_native, :infer_effects]
-    @eval begin
-        macro ($fname)(ex0...)
-            gen_call_with_extracted_types_and_kwargs(__module__, $(Expr(:quote, fname)), ex0)
-        end
-    end
-end
-
-macro code_typed(ex0...)
-    thecall = gen_call_with_extracted_types_and_kwargs(__module__, :code_typed, ex0)
-    quote
-        local results = $thecall
-        length(results) == 1 ? results[1] : results
-    end
-end
-
-macro code_lowered(ex0...)
-    thecall = gen_call_with_extracted_types_and_kwargs(__module__, :code_lowered, ex0)
-    quote
-        local results = $thecall
-        length(results) == 1 ? results[1] : results
+for fname in [:code_warntype, :code_llvm, :code_native,
+              :infer_return_type, :infer_effects, :infer_exception_type]
+    @eval macro ($fname)(ex0...)
+        gen_call_with_extracted_types_and_kwargs(__module__, $(QuoteNode(fname)), ex0)
     end
 end
 
-macro time_imports(ex)
-    quote
-        try
-            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
-            $(esc(ex))
-        finally
-            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+for fname in [:code_typed, :code_lowered, :code_ircode]
+    @eval macro ($fname)(ex0...)
+        thecall = gen_call_with_extracted_types_and_kwargs(__module__, $(QuoteNode(fname)), ex0)
+        quote
+            local results = $thecall
+            length(results) == 1 ? results[1] : results
         end
     end
 end
@@ -297,6 +293,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
     @code_typed optimize=true foo(x)
 
 to control whether additional optimizations, such as inlining, are also applied.
+
+See also: [`code_typed`](@ref), [`@code_warntype`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_typed
 
@@ -305,6 +303,8 @@ to control whether additional optimizations, such as inlining, are also applied.
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_lowered`](@ref) on the resulting expression.
+
+See also: [`code_lowered`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_lowered
 
@@ -313,6 +313,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_warntype`](@ref) on the resulting expression.
+
+See also: [`code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_warntype
 
@@ -331,6 +333,8 @@ by putting them and their value before the function call, like this:
 `raw` makes all metadata and dbg.* calls visible.
 `debuginfo` may be one of `:source` (default) or `:none`,  to specify the verbosity of code comments.
 `dump_module` prints the entire module that encapsulates the function.
+
+See also: [`code_llvm`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_native`](@ref).
 """
 :@code_llvm
 
@@ -350,7 +354,7 @@ by putting it before the function call, like this:
 * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
 
-See also: [`code_native`](@ref), [`@code_llvm`](@ref), [`@code_typed`](@ref) and [`@code_lowered`](@ref)
+See also: [`code_native`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref).
 """
 :@code_native
 
@@ -392,3 +396,97 @@ julia> @time_imports using CSV
 
 """
 :@time_imports
+
+"""
+    @trace_compile
+
+A macro to execute an expression and show any methods that were compiled (or recompiled in yellow),
+like the julia args `--trace-compile=stderr --trace-compile-timing` but specifically for a call.
+
+```julia-repl
+julia> @trace_compile rand(2,2) * rand(2,2)
+#=   39.1 ms =# precompile(Tuple{typeof(Base.rand), Int64, Int64})
+#=  102.0 ms =# precompile(Tuple{typeof(Base.:(*)), Array{Float64, 2}, Array{Float64, 2}})
+2×2 Matrix{Float64}:
+ 0.421704  0.864841
+ 0.211262  0.444366
+```
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_compile
+
+"""
+    @trace_dispatch
+
+A macro to execute an expression and report methods that were compiled via dynamic dispatch,
+like the julia arg `--trace-dispatch=stderr` but specifically for a call.
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_dispatch
+
+"""
+    @activate Component
+
+Activate a newly loaded copy of an otherwise builtin component. The `Component`
+to be activated will be resolved using the ordinary rules of module resolution
+in the current environment.
+
+When using `@activate`, additional options for a component may be specified in
+square brackets `@activate Compiler[:option1, :option]`
+
+Currently `@activate Compiler` is the only available component that may be
+activatived.
+
+For `@activate Compiler`, the following options are available:
+1. `:reflection` - Activate the compiler for reflection purposes only.
+                   The ordinary reflection functionality in `Base` and `InteractiveUtils`.
+                   Will use the newly loaded compiler. Note however, that these reflection
+                   functions will still interact with the ordinary native cache (both loading
+                   and storing). An incorrect compiler implementation may thus corrupt runtime
+                   state if reflection is used. Use external packages like `Cthulhu.jl`
+                   introspecting compiler behavior with a separated cache partition.
+
+2. `:codegen`   - Activate the compiler for internal codegen purposes. The new compiler
+                  will be invoked whenever the runtime requests compilation.
+
+`@activate Compiler` without options is equivalent to `@activate Compiler[:reflection]`.
+
+"""
+macro activate(what)
+    options = Symbol[]
+    if Meta.isexpr(what, :ref)
+        Component = what.args[1]
+        for i = 2:length(what.args)
+            arg = what.args[i]
+            if !isa(arg, QuoteNode) || !isa(arg.value, Symbol)
+                error("Usage Error: Option $arg is not a symbol")
+            end
+            push!(options, arg.value)
+        end
+    else
+        Component = what
+    end
+    if !isa(Component, Symbol)
+        error("Usage Error: Component $Component is not a symbol")
+    end
+    allowed_components = (:Compiler,)
+    if !(Component in allowed_components)
+        error("Usage Error: Component $Component is not recognized. Expected one of $allowed_components")
+    end
+    s = gensym()
+    if Component === :Compiler && isempty(options)
+        push!(options, :reflection)
+    end
+    options = map(options) do opt
+        Expr(:kw, opt, true)
+    end
+    Expr(:toplevel,
+        esc(:(import $Component as $s)),
+        esc(:($s.activate!(;$(options...)))))
+end
diff --git a/stdlib/InteractiveUtils/test/highlighting.jl b/stdlib/InteractiveUtils/test/highlighting.jl
index bac52e2945b5e..f49464557f926 100644
--- a/stdlib/InteractiveUtils/test/highlighting.jl
+++ b/stdlib/InteractiveUtils/test/highlighting.jl
@@ -34,7 +34,10 @@ end
     c = Base.text_colors[Base.warn_color()]
     InteractiveUtils.highlighting[:warntype] = false
     code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
-    @test !occursin(c, String(take!(io)))
+    @test !any([
+        occursin("Body", line) && occursin(c, line)
+        for line in split(String(take!(io)), "\n")
+    ])
     InteractiveUtils.highlighting[:warntype] = true
     code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
     @test occursin(c, String(take!(io)))
@@ -72,7 +75,7 @@ end
     @test occursin("\e", String(take!(io)))
 end
 
-function hilight_llvm(s)
+function highlight_llvm(s)
     io = IOBuffer()
     InteractiveUtils.print_llvm(IOContext(io, :color=>true), s)
     r = String(take!(io))
@@ -82,7 +85,7 @@ function hilight_llvm(s)
     flush(stdout)
     r
 end
-function hilight_native(s, arch)
+function highlight_native(s, arch)
     io = IOBuffer()
     InteractiveUtils.print_native(IOContext(io, :color=>true), s, arch)
     r = String(take!(io))
@@ -92,8 +95,8 @@ function hilight_native(s, arch)
     flush(stdout)
     r
 end
-hilight_x86(s) = hilight_native(s, :x86)
-hilight_arm(s) = hilight_native(s, :arm)
+highlight_x86(s) = highlight_native(s, :x86)
+highlight_arm(s) = highlight_native(s, :arm)
 
 function esc_code(s)
     io = IOBuffer()
@@ -124,41 +127,48 @@ const XU = B * "}" * XB
 
 @testset "LLVM IR" begin
     @testset "comment" begin
-        @test hilight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
-    @testset "lavel" begin
-        @test hilight_llvm("top:") == "$(L)top:$(XL)\n"
+    @testset "label" begin
+        @test highlight_llvm("top:") == "$(L)top:$(XL)\n"
 
-        @test hilight_llvm("L7:\t\t; preds = %top") ==
+        @test highlight_llvm("L7:\t\t; preds = %top") ==
             "$(L)L7:$(XL)\t\t$(C); preds = %top$(XC)\n"
+
+        @test highlight_llvm("  %\"box::GenericMemoryRef13\" = add i64 0, 0") ==
+            "  $(V)%\"box::GenericMemoryRef13\"$(XV) $EQU " *
+            "$(I)add$(XI) $(T)i64$(XT) $(N)0$(XN)$COM $(N)0$(XN)\n"
+
+        @test highlight_llvm("  \"label-as-string\":\t\t; preds = %top") ==
+            "  $(L)\"label-as-string\":$(XL)\t\t$(C); preds = %top$(XC)\n"
     end
     @testset "define" begin
-        @test hilight_llvm("define double @julia_func_1234(float) {") ==
+        @test highlight_llvm("define double @julia_func_1234(float) {") ==
             "$(K)define$(XK) $(T)double$(XT) " *
             "$(F)@julia_func_1234$(XF)$P$(T)float$(XT)$XP $U\n"
 
-        @test hilight_llvm("}") == "$XU\n"
+        @test highlight_llvm("}") == "$XU\n"
     end
 
     @testset "declare" begin
-        @test hilight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
+        @test highlight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
             "$(K)declare$(XK) $(T)i32$(XT) " *
             "$(F)@jl_setjmp$(XF)$P$(T)i8$(XT)$(D)*$(XD)$XP $(D)#2$(XD)\n"
     end
 
     @testset "type" begin
-        @test hilight_llvm("%jl_value_t = type opaque") ==
+        @test highlight_llvm("%jl_value_t = type opaque") ==
             "$(V)%jl_value_t$(XV) $EQU $(K)type$(XK) $(T)opaque$(XT)\n"
     end
 
     @testset "target" begin
         datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
-        @test hilight_llvm("target datalayout = \"$datalayout\"") ==
+        @test highlight_llvm("target datalayout = \"$datalayout\"") ==
             "$(K)target$(XK) $(K)datalayout$(XK) $EQU $(V)\"$datalayout\"$(XV)\n"
     end
 
     @testset "attributes" begin
-        @test hilight_llvm(
+        @test highlight_llvm(
             """attributes #1 = { uwtable "frame-pointer"="all" }""") ==
             "$(K)attributes$(XK) $(D)#1$(XD) $EQU " *
             "$U $(K)uwtable$(XK) $(V)\"frame-pointer\"$(XV)$EQU" *
@@ -166,57 +176,57 @@ const XU = B * "}" * XB
     end
 
     @testset "terminator" begin
-        @test hilight_llvm("  ret i8 %12") ==
+        @test highlight_llvm("  ret i8 %12") ==
             "  $(I)ret$(XI) $(T)i8$(XT) $(V)%12$(XV)\n"
 
-        @test hilight_llvm("  br i1 %2, label %L6, label %L4") ==
+        @test highlight_llvm("  br i1 %2, label %L6, label %L4") ==
             "  $(I)br$(XI) $(T)i1$(XT) $(V)%2$(XV)$COM " *
             "$(T)label$(XT) $(L)%L6$(XL)$COM $(T)label$(XT) $(L)%L4$(XL)\n"
 
-        @test hilight_llvm("  br label %L5") ==
+        @test highlight_llvm("  br label %L5") ==
             "  $(I)br$(XI) $(T)label$(XT) $(L)%L5$(XL)\n"
 
-        @test hilight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
+        @test highlight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
     end
 
     @testset "arithmetic" begin
-        @test hilight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
+        @test highlight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
             "   $(V)%11$(XV) $EQU $(I)add$(XI) $(K)nuw$(XK) $(K)nsw$(XK) " *
             "$(T)i64$(XT) $(V)%value_phi10$(XV)$COM $(N)1$(XN)\n"
 
-        @test hilight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
+        @test highlight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
             "   $(V)%13$(XV) $EQU $(I)fadd$(XI) " *
             "$(T)double$(XT) $(V)%12$(XV)$COM $(N)-2.000000e+00$(XN)\n"
 
-        @test hilight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
+        @test highlight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
             "      $(V)%21$(XV) $EQU $(I)fmul$(XI) $(K)contract$(XK) " *
             "$(T)double$(XT) $(V)%20$(XV)$COM $(N)0x0123456789ABCDEF$(XN)\n"
     end
 
     @testset "bitwise" begin
-        @test hilight_llvm("   %31 = shl i64 %value_phi4, 52") ==
+        @test highlight_llvm("   %31 = shl i64 %value_phi4, 52") ==
             "   $(V)%31$(XV) $EQU " *
             "$(I)shl$(XI) $(T)i64$(XT) $(V)%value_phi4$(XV)$COM $(N)52$(XN)\n"
     end
 
     @testset "aggregate" begin
-        @test hilight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
+        @test highlight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
             "    $(V)%4$(XV) $EQU $(I)extractvalue$(XI) " *
             "$U $(T)i64$(XT)$COM $(T)i1$(XT) $XU $(V)%1$(XV)$COM $(N)0$(XN)\n"
     end
 
     @testset "memory access" begin
-        @test hilight_llvm("  %dims = alloca [1 x i64], align 8") ==
+        @test highlight_llvm("  %dims = alloca [1 x i64], align 8") ==
             "  $(V)%dims$(XV) $EQU $(I)alloca$(XI) " *
             "$S$(N)1$(XN) $(D)x$(XD) $(T)i64$(XT)$XS$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    %51 = load i32," *
+        @test highlight_llvm("    %51 = load i32," *
                            " i32* inttoptr (i64 226995504 to i32*), align 16") ==
             "    $(V)%51$(XV) $EQU $(I)load$(XI) $(T)i32$(XT)$COM " *
             "$(T)i32$(XT)$(D)*$(XD) $(K)inttoptr$(XK) $P$(T)i64$(XT) $(N)226995504$(XN) " *
             "$(K)to$(XK) $(T)i32$(XT)$(D)*$(XD)$XP$COM $(K)align$(XK) $(N)16$(XN)\n"
 
-        @test hilight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
+        @test highlight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
                            "%jl_value_t addrspace(10)* addrspace(11)* %52, align 8") ==
             "    $(V)%53$(XV) $EQU $(I)load$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD)$COM " *
@@ -224,37 +234,37 @@ const XU = B * "}" * XB
             "$(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%52$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
+        @test highlight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
             "    $(I)store$(XI) $(T)i64$(XT) $(V)%61$(XV)$COM " *
             "$(T)i64$(XT) $(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%60$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
+        @test highlight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
                            "%jl_value_t addrspace(10)*** %63, align 8") ==
             "  $(I)store$(XI) $(K)volatile$(XK) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)**$(XD) $(V)%62$(XV)$COM " *
             "$(V)%jl_value_t$(XV) $(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)***$(XD) " *
             "$(V)%63$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
+        @test highlight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
             "     $(V)%71$(XV) $EQU $(I)getelementptr$(XI) $(T)i8$(XT)$COM " *
             "$(T)i8$(XT)$(D)*$(XD) $(V)%70$(XV)$COM $(T)i64$(XT) $(N)8$(XN)\n"
     end
 
     @testset "conversion" begin
-        @test hilight_llvm("  %22 = zext i1 %21 to i8") ==
+        @test highlight_llvm("  %22 = zext i1 %21 to i8") ==
             "  $(V)%22$(XV) $EQU $(I)zext$(XI) $(T)i1$(XT) $(V)%21$(XV) " *
             "$(K)to$(XK) $(T)i8$(XT)\n"
 
-        @test hilight_llvm("     %24 = sitofp i64 %23 to double") ==
+        @test highlight_llvm("     %24 = sitofp i64 %23 to double") ==
             "     $(V)%24$(XV) $EQU $(I)sitofp$(XI) $(T)i64$(XT) $(V)%23$(XV) " *
             "$(K)to$(XK) $(T)double$(XT)\n"
 
-        @test hilight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
+        @test highlight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
             "  $(V)%26$(XV) $EQU $(I)ptrtoint$(XI) $(T)i8$(XT)$(D)*$(XD) " *
             "$(V)%25$(XV) $(K)to$(XK) $(T)i64$(XT)\n"
 
-        @test hilight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
+        @test highlight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
                            "to [2 x i16] addrspace(10)*") ==
             "  $(V)%28$(XV) $EQU $(I)bitcast$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD) $(V)%27$(XV) " *
@@ -263,20 +273,20 @@ const XU = B * "}" * XB
     end
 
     @testset "other" begin
-        @test hilight_llvm("  %31 = icmp slt i64 %30, 0") ==
+        @test highlight_llvm("  %31 = icmp slt i64 %30, 0") ==
             "  $(V)%31$(XV) $EQU $(I)icmp$(XI) $(I)slt$(XI) " *
             "$(T)i64$(XT) $(V)%30$(XV)$COM $(N)0$(XN)\n"
 
-        @test hilight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
+        @test highlight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
             "  $(V)%value_phi34$(XV) $EQU $(I)phi$(XI) $(T)double$(XT) " *
             "$S $(V)%33$(XV)$COM $(L)%L50$(XL) $XS$COM " *
             "$S $(V)%32$(XV)$COM $(L)%L60$(XL) $XS\n"
 
-        @test hilight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
+        @test highlight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
             "   $(V)%.v$(XV) $EQU $(I)select$(XI) $(T)i1$(XT) $(V)%35$(XV)$COM " *
             "$(T)i64$(XT) $(V)%36$(XV)$COM $(T)i64$(XT) $(N)63$(XN)\n"
 
-        @test hilight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
+        @test highlight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
             "   $(V)%38$(XV) $EQU $(I)call$(XI) $(T)i64$(XT) " *
             "$(F)@llvm.cttz.i64$(XF)$P$(T)i64$(XT) $(V)%37$(XV)$COM " *
             "$(T)i1$(XT) $(K)false$(XK)$XP\n"
@@ -285,133 +295,133 @@ end
 
 @testset "x86 ASM" begin
     @testset "comment" begin
-        @test hilight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_x86("L123:") == "$(L)L123:$(XL)\n"
+        @test highlight_x86("L123:") == "$(L)L123:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_x86("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_x86("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
         # AT&T
-        @test hilight_x86("\tretq") == "\t$(I)retq$(XI)\n"
+        @test highlight_x86("\tretq") == "\t$(I)retq$(XI)\n"
 
         # Intel
-        @test hilight_x86("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_x86("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
         # AT&T
-        @test hilight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
+        @test highlight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
 
-        @test hilight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
+        @test highlight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
             "\t$(I)pushl$(XI)\t$(N)\$4294967295$(XN)\t$(C)# imm = 0xFFFFFFFF$(XC)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnopw\t%cs:(%rax,%rax)") ==
+        @test highlight_x86("\tnopw\t%cs:(%rax,%rax)") ==
             "\t$(I)nopw$(XI)\t$(V)%cs$(XV)$COL$P$(V)%rax$(XV)$COM$(V)%rax$(XV)$XP\n"
 
         # Intel
-        @test hilight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
+        @test highlight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
 
-        @test hilight_x86("\tpush\t4294967295") ==
+        @test highlight_x86("\tpush\t4294967295") ==
             "\t$(I)push$(XI)\t$(N)4294967295$(XN)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnop\tword ptr cs:[rax + rax]") ==
+        @test highlight_x86("\tnop\tword ptr cs:[rax + rax]") ==
             "\t$(I)nop$(XI)\t$(K)word$(XK) $(K)ptr$(XK) " *
             "$(V)cs$(XV)$COL$S$(V)rax$(XV) $(D)+$(XD) $(V)rax$(XV)$XS\n"
     end
     @testset "2-operand" begin
         # AT&T
-        @test hilight_x86("\tshrq\t\$63, %rcx") ==
+        @test highlight_x86("\tshrq\t\$63, %rcx") ==
             "\t$(I)shrq$(XI)\t$(N)\$63$(XN)$COM $(V)%rcx$(XV)\n"
 
-        @test hilight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
+        @test highlight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$P$(V)%rsi$(XV)$COM$(V)%rdx$(XV)$COM$(N)8$(XN)$XP" *
             "$COM $(V)%xmm1$(XV)\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
+        @test highlight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
             "\t$(I)movabsq$(XI)\t$(F)\$\"#string#338\"$(XF)$COM $(V)%rax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tshr\trcx, 63") ==
+        @test highlight_x86("\tshr\trcx, 63") ==
             "\t$(I)shr$(XI)\t$(V)rcx$(XV)$COM $(N)63$(XN)\n"
 
-        @test hilight_x86(
+        @test highlight_x86(
             "\tvmovsd\txmm1, dword ptr [rsi + 8*rdx]\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$(V)xmm1$(XV)$COM $(K)dword$(XK) $(K)ptr$(XK) " *
             "$S$(V)rsi$(XV) $(D)+$(XD) $(N)8$(XN)$(D)*$(XD)$(V)rdx$(XV)$XS" *
             "\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabs\trax, offset \"#string#338\"") ==
+        @test highlight_x86("\tmovabs\trax, offset \"#string#338\"") ==
             "\t$(I)movabs$(XI)\t$(V)rax$(XV)$COM " *
             "$(K)offset$(XK) $(F)\"#string#338\"$(XF)\n"
     end
     @testset "3-operand" begin
         # AT&T
-        @test hilight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
+        @test highlight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
             "\t$(I)vaddsd$(XI)\t$P$(V)%rax$(XV)$XP$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%xmm0$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
+        @test highlight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
             "\t$(I)vaddsd$(XI)\t$(V)xmm0$(XV)$COM $(V)xmm0$(XV)$COM " *
             "$(K)qword$(XK) $(K)ptr$(XK) $S$(V)rax$(XV)$XS\n"
     end
     @testset "4-operand" begin
         # AT&T
-        @test hilight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
+        @test highlight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
             "\t$(I)vroundsd$(XI)\t$(N)\$4$(XN)$COM " *
             "$(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
+        @test highlight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
             "\t$(I)vroundsd$(XI)\t" *
             "$(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(N)4$(XN)\n"
     end
     @testset "AVX-512" begin
         # AT&T
-        @test hilight_x86("\tvmovaps\t(%eax), %zmm0") ==
+        @test highlight_x86("\tvmovaps\t(%eax), %zmm0") ==
             "\t$(I)vmovaps$(XI)\t$P$(V)%eax$(XV)$XP$COM $(V)%zmm0$(XV)\n"
 
-        @test hilight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
+        @test highlight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
             "\t$(I)vpaddd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm1$(XV) $U$(V)%k1$(XV)$XU\n"
 
-        @test hilight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
+        @test highlight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
             "\t$(I)vdivpd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm0$(XV) $U$(V)%k1$(XV)$XU $U$(K)z$(XK)$XU\n"
 
-        @test hilight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
+        @test highlight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
             "\t$(I)vdivps$(XI)\t$P$(V)%ebx$(XV)$XP$U$(K)1to16$(XK)$XU$COM " *
             "$(V)%zmm5$(XV)$COM $(V)%zmm4$(XV)\n"
 
-        @test hilight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
+        @test highlight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
             "\t$(I)vcvtsd2si$(XI)\t$U$(K)rn-sae$(XK)$XU$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%eax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
+        @test highlight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
             "\t$(I)vmovaps$(XI)\t$(V)zmm0$(XV)$COM " *
             "$(K)zmmword$(XK) $(K)ptr$(XK) $S$(V)eax$(XV)$XS\n"
 
-        @test hilight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
+        @test highlight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
             "\t$(I)vpaddd$(XI)\t$(V)zmm1$(XV) $U$(V)k1$(XV)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
+        @test highlight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
             "\t$(I)vdivpd$(XI)\t$(V)zmm0$(XV) $U$(V)k1$(XV)$XU $U$(K)z$(XK)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
+        @test highlight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
             "\t$(I)vdivps$(XI)\t$(V)zmm4$(XV)$COM $(V)zmm5$(XV)$COM " *
             "$(K)dword$(XK) $(K)ptr$(XK) $S$(V)ebx$(XV)$XS$U$(K)1to16$(XK)$XU\n"
 
-        @test hilight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
+        @test highlight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
             "\t$(I)vcvtsd2si$(XI)\t$(V)eax$(XV)$COM " *
             "$(V)xmm0$(XV)$COM $U$(K)rn-sae$(XK)$XU\n"
     end
@@ -419,74 +429,74 @@ end
 
 @testset "ARM ASM" begin
     @testset "comment" begin
-        @test hilight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_arm("L45:") == "$(L)L45:$(XL)\n"
+        @test highlight_arm("L45:") == "$(L)L45:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_arm("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_arm("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
-        @test hilight_arm("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_arm("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
-        @test hilight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
+        @test highlight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
 
-        @test hilight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
+        @test highlight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
 
-        @test hilight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
+        @test highlight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
 
-        @test hilight_arm("\tpop\t{r11, pc}") ==
+        @test highlight_arm("\tpop\t{r11, pc}") ==
             "\t$(I)pop$(XI)\t$U$(V)r11$(XV)$COM $(V)pc$(XV)$XU\n"
     end
     @testset "2-operand" begin
-        @test hilight_arm("\tcmp\tx10, #2047\t// =2047") ==
+        @test highlight_arm("\tcmp\tx10, #2047\t// =2047") ==
             "\t$(I)cmp$(XI)\t$(V)x10$(XV)$COM $(N)#2047$(XN)\t$(C)// =2047$(XC)\n"
 
-        @test hilight_arm("\tldr\td1, [x10]") ==
+        @test highlight_arm("\tldr\td1, [x10]") ==
             "\t$(I)ldr$(XI)\t$(V)d1$(XV)$COM $S$(V)x10$(XV)$XS\n"
 
-        @test hilight_arm("\tstr\tx30, [sp, #-16]!") ==
+        @test highlight_arm("\tstr\tx30, [sp, #-16]!") ==
             "\t$(I)str$(XI)\t$(V)x30$(XV)$COM " *
             "$S$(V)sp$(XV)$COM $(N)#-16$(XN)$XS$(K)!$(XK)\n"
 
-        @test hilight_arm("\tmov\tv0.16b, v1.16b") ==
+        @test highlight_arm("\tmov\tv0.16b, v1.16b") ==
             "\t$(I)mov$(XI)\t$(V)v0.16b$(XV)$COM $(V)v1.16b$(XV)\n"
     end
     @testset "3-operand" begin
-        @test hilight_arm("\tfmul\td2, d0, d2") ==
+        @test highlight_arm("\tfmul\td2, d0, d2") ==
             "\t$(I)fmul$(XI)\t$(V)d2$(XV)$COM $(V)d0$(XV)$COM $(V)d2$(XV)\n"
 
-        @test hilight_arm("\tmovk\tx10, #65535, lsl #32") ==
+        @test highlight_arm("\tmovk\tx10, #65535, lsl #32") ==
             "\t$(I)movk$(XI)\t$(V)x10$COM $(N)#65535$(XN)$COM $(K)lsl$(XK) $(N)#32$(XN)\n"
 
-        @test hilight_arm("\tcneg\tx8, x8, ge") ==
+        @test highlight_arm("\tcneg\tx8, x8, ge") ==
             "\t$(I)cneg$(XI)\t$(V)x8$(XV)$COM $(V)x8$(XV)$COM $(K)ge$(XK)\n"
     end
     @testset "4-operand" begin
-        @test hilight_arm("\tadd\tx8, x9, x8, lsl #52") ==
+        @test highlight_arm("\tadd\tx8, x9, x8, lsl #52") ==
             "\t$(I)add$(XI)\t$(V)x8$(XV)$COM $(V)x9$(XV)$COM $(V)x8$(XV)$COM " *
             "$(K)lsl$(XK) $(N)#52$(XN)\n"
 
-        @test hilight_arm("\tfcsel\td1, d0, d1, eq") ==
+        @test highlight_arm("\tfcsel\td1, d0, d1, eq") ==
             "\t$(I)fcsel$(XI)\t" *
             "$(V)d1$(XV)$COM $(V)d0$(XV)$COM $(V)d1$(XV)$COM $(K)eq$(XK)\n"
     end
     @testset "NEON" begin
-        hilight_arm("\tvmul.f32\tq8, q9, q8") ==
+        highlight_arm("\tvmul.f32\tq8, q9, q8") ==
             "\t$(I)vmul.f32$(XI)\t$(V)q8$(XV)$COM $(V)q9$(XV)$COM $(V)q8$(XV)\n"
-        hilight_arm("\tvcvt.s32.f64\ts2, d20") ==
+        highlight_arm("\tvcvt.s32.f64\ts2, d20") ==
             "\t$(I)vcvt.s32.f64$(XI)\t$(V)s2$(XV)$COM $(V)d20$(XV)\n"
-        hilight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
+        highlight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
             "\t$(I)vld1.32$(XI)\t$U$(V)d18$(XV)$COM $(V)d19$(XV)$XU$COM $S$(V)r1$(XV)$XS\n"
     end
     @testset "SVE" begin
-        hilight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
+        highlight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
             "\t$(I)ld1d$(XI)\t$(V)z1.d$(XV)$COM " *
             "$(V)p0$(XV)$(K)/z$(XK)$COM " *
             "$S$(V)x0$(XV)$COM $(V)x4$(XV)$COM $(K)lsl$(XK) $(N)#3$(XN)$XS\n"
-        hilight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
+        highlight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
     end
 end
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index 5f90491fd8151..0de67fea69dea 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -138,6 +138,11 @@ tag = "ANY"
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float64}, tag)
 @test !warntype_hastag(ImportIntrinsics15819.sqrt15819, Tuple{Float32}, tag)
 
+@testset "code_warntype OpaqueClosure" begin
+    g = Base.Experimental.@opaque Tuple{Float64}->_ x -> 0.0
+    @test warntype_hastag(g, Tuple{Float64}, "::Float64")
+end
+
 end # module WarnType
 
 # Adds test for PR #17636
@@ -229,7 +234,7 @@ module Tmp14173
 end
 varinfo(Tmp14173) # warm up
 const MEMDEBUG = ccall(:jl_is_memdebug, Bool, ())
-@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 100000)
+@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 125000)
 
 # PR #24997: test that `varinfo` doesn't fail when encountering `missing`
 module A
@@ -279,14 +284,50 @@ let x..y = 0
     @test (@which 1..2).name === :..
 end
 
+# issue #53691
+let a = -1
+    @test (@which 2^a).name === :^
+    @test (@which 2^0x1).name === :^
+end
+
+let w = Vector{Any}(undef, 9)
+    @testset "@which x^literal" begin
+        w[1] = @which 2^0
+        w[2] = @which 2^1
+        w[3] = @which 2^2
+        w[4] = @which 2^3
+        w[5] = @which 2^-1
+        w[6] = @which 2^-2
+        w[7] = @which 2^10
+        w[8] = @which big(2.0)^1
+        w[9] = @which big(2.0)^-1
+        @test all(getproperty.(w, :name) .=== :literal_pow)
+        @test length(Set(w)) == length(w) # all methods distinct
+    end
+end
+
+# PR 53713
+if Int === Int64
+    # literal_pow only for exponents x: -2^63 <= x < 2^63 #53860 (all Int)
+    @test (@which 2^-9223372036854775809).name === :^
+    @test (@which 2^-9223372036854775808).name === :literal_pow
+    @test (@which 2^9223372036854775807).name === :literal_pow
+    @test (@which 2^9223372036854775808).name === :^
+elseif Int === Int32
+    # literal_pow only for exponents x: -2^31 <= x < 2^31 #53860 (all Int)
+    @test (@which 2^-2147483649).name === :^
+    @test (@which 2^-2147483648).name === :literal_pow
+    @test (@which 2^2147483647).name === :literal_pow
+    @test (@which 2^2147483648).name === :^
+end
+
 # issue #13464
 try
     @which x = 1
     error("unexpected")
 catch err13464
-    @test startswith(err13464.msg, "expression is not a function call, or is too complex")
+    @test startswith(err13464.msg, "expression is not a function call")
 end
-
 module MacroTest
 export @macrotest
 macro macrotest(x::Int, y::Symbol) end
@@ -330,7 +371,9 @@ let _true = Ref(true), f, g, h
 end
 
 # manually generate a broken function, which will break codegen
-# and make sure Julia doesn't crash
+# and make sure Julia doesn't crash (when using a non-asserts build)
+is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
+if !is_asserts()
 @eval @noinline Base.@constprop :none f_broken_code() = 0
 let m = which(f_broken_code, ())
    let src = Base.uncompressed_ast(m)
@@ -345,39 +388,59 @@ _true = true
 # and show that we can still work around it
 @noinline g_broken_code() = _true ? 0 : h_broken_code()
 @noinline h_broken_code() = (g_broken_code(); f_broken_code())
-let err = tempname(),
+let errf = tempname(),
     old_stderr = stderr,
-    new_stderr = open(err, "w")
+    new_stderr = open(errf, "w")
     try
         redirect_stderr(new_stderr)
+        @test occursin("f_broken_code", sprint(code_native, h_broken_code, ()))
+        Libc.flush_cstdio()
         println(new_stderr, "start")
         flush(new_stderr)
-        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test_throws "could not compile the specified method" sprint(io -> code_native(io, f_broken_code, (), dump_module=true))
+        Libc.flush_cstdio()
+        println(new_stderr, "middle")
+        flush(new_stderr)
+        @test !isempty(sprint(io -> code_native(io, f_broken_code, (), dump_module=false)))
+        Libc.flush_cstdio()
+        println(new_stderr, "later")
+        flush(new_stderr)
+        @test invokelatest(g_broken_code) == 0
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
-        @eval @test g_broken_code() == 0
     finally
+        Libc.flush_cstdio()
         redirect_stderr(old_stderr)
         close(new_stderr)
-        let errstr = read(err, String)
+        let errstr = read(errf, String)
             @test startswith(errstr, """start
-                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
-                ErrorException(\"unsupported or misplaced expression \"invalid\" in function f_broken_code\")
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
                 """) || errstr
-            @test !endswith(errstr, "\nend\n") || errstr
+            @test occursin("""\nmiddle
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test occursin("""\nlater
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test endswith(errstr, "\nend\n") || errstr
         end
-        rm(err)
+        rm(errf)
     end
 end
+end
 
 # Issue #33163
 A33163(x; y) = x + y
 B33163(x) = x
-@test (@code_typed A33163(1, y=2))[1].inferred
-@test !(@code_typed optimize=false A33163(1, y=2))[1].inferred
-@test !(@code_typed optimize=false B33163(1))[1].inferred
+let
+    (@code_typed A33163(1, y=2))[1]
+    (@code_typed optimize=false A33163(1, y=2))[1]
+    (@code_typed optimize=false B33163(1))[1]
+end
 
 @test_throws MethodError (@code_lowered wrongkeyword=true 3 + 4)
 
@@ -411,10 +474,11 @@ a14637 = A14637(0)
 @test (@code_typed max.(1 .+ 3, 5 - 7))[2] == Int
 f36261(x,y) = 3x + 4y
 A36261 = Float64[1.0, 2.0, 3.0]
-@test (@code_typed f36261.(A36261, pi))[1].inferred
-@test (@code_typed f36261.(A36261, 1 .+ pi))[1].inferred
-@test (@code_typed f36261.(A36261, 1 + pi))[1].inferred
-
+let
+    @code_typed f36261.(A36261, pi)[1]
+    @code_typed f36261.(A36261, 1 .+ pi)[1]
+    @code_typed f36261.(A36261, 1 + pi)[1]
+end
 
 module ReflectionTest
 using Test, Random, InteractiveUtils
@@ -483,9 +547,9 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
     output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
-    code_native(buf, linear_foo, ())
-    output = String(take!(buf))
-    @test occursin(rgx, output)
+    code_native(buf, linear_foo, (), debuginfo = :none)
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
+    @test !occursin(rgx, output)
 
     @testset "binary" begin
         # check the RET instruction (opcode: C3)
@@ -640,10 +704,11 @@ end
 # macro options should accept both literals and variables
 let
     opt = false
-    @test !(first(@code_typed optimize=opt sum(1:10)).inferred)
+    @test length(first(@code_typed optimize=opt sum(1:10)).code) ==
+        length((@code_lowered sum(1:10)).code)
 end
 
-@testset "@time_imports" begin
+@testset "@time_imports, @trace_compile, @trace_dispatch" begin
     mktempdir() do dir
         cd(dir) do
             try
@@ -652,7 +717,16 @@ end
                 write(foo_file,
                     """
                     module Foo3242
-                    foo() = 1
+                    function foo()
+                        Base.Experimental.@force_compile
+                        foo(1)
+                    end
+                    foo(x) = x
+                    function bar()
+                        Base.Experimental.@force_compile
+                        bar(1)
+                    end
+                    bar(x) = x
                     end
                     """)
 
@@ -669,6 +743,27 @@ end
 
                 @test occursin("ms  Foo3242", String(buf))
 
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_compile @eval Foo3242.foo()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms =# precompile(", String(buf))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_dispatch @eval Foo3242.bar()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("precompile(", String(buf))
             finally
                 filter!((≠)(dir), LOAD_PATH)
             end
@@ -700,6 +795,9 @@ end
 @testset "code_llvm on opaque_closure" begin
     let ci = code_typed(+, (Int, Int))[1][1]
         ir = Core.Compiler.inflate_ir(ci)
+        ir.argtypes[1] = Tuple{}
+        @test ir.debuginfo.def === nothing
+        ir.debuginfo.def = Symbol(@__FILE__)
         oc = Core.OpaqueClosure(ir)
         @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
         let io = IOBuffer()
@@ -721,3 +819,30 @@ end
 end
 
 @test Base.infer_effects(sin, (Int,)) == InteractiveUtils.@infer_effects sin(42)
+@test Base.infer_return_type(sin, (Int,)) == InteractiveUtils.@infer_return_type sin(42)
+@test Base.infer_exception_type(sin, (Int,)) == InteractiveUtils.@infer_exception_type sin(42)
+@test first(InteractiveUtils.@code_ircode sin(42)) isa Core.Compiler.IRCode
+@test first(InteractiveUtils.@code_ircode optimize_until="Inlining" sin(42)) isa Core.Compiler.IRCode
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(InteractiveUtils))
+end
+
+# issue https://github.com/JuliaIO/ImageMagick.jl/issues/235
+module OuterModule
+    module InternalModule
+        struct MyType
+            x::Int
+        end
+
+        Base.@deprecate_binding MyOldType MyType
+
+        export MyType
+    end
+    using .InternalModule
+    export MyType, MyOldType
+end # module
+@testset "Subtypes and deprecations" begin
+    using .OuterModule
+    @test_nowarn subtypes(Integer);
+end
diff --git a/stdlib/JuliaSyntaxHighlighting.version b/stdlib/JuliaSyntaxHighlighting.version
new file mode 100644
index 0000000000000..2a409c721d32b
--- /dev/null
+++ b/stdlib/JuliaSyntaxHighlighting.version
@@ -0,0 +1,4 @@
+JULIASYNTAXHIGHLIGHTING_BRANCH = main
+JULIASYNTAXHIGHLIGHTING_SHA1 = 19bd57b89c648592155156049addf67e0638eab1
+JULIASYNTAXHIGHLIGHTING_GIT_URL := https://github.com/julialang/JuliaSyntaxHighlighting.jl.git
+JULIASYNTAXHIGHLIGHTING_TAR_URL = https://api.github.com/repos/julialang/JuliaSyntaxHighlighting.jl/tarball/$1
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
index 90d867ca0f7da..1aafd275d99b7 100644
--- a/stdlib/LLD_jll/Project.toml
+++ b/stdlib/LLD_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLD_jll"
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "15.0.7+5"
+version = "18.1.7+3"
 
 [deps]
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
@@ -9,8 +9,8 @@ libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.9"
-libLLVM_jll = "15.0.7"
+julia = "1.11"
+libLLVM_jll = "18.1.7"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl
index 55ccec9cc4005..9b8365dddcf0b 100644
--- a/stdlib/LLD_jll/src/LLD_jll.jl
+++ b/stdlib/LLD_jll/src/LLD_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LLD_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index 36c24111d4d31..e102af311abec 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "12.0.1+0"
+version = "19.1.4+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
index 5c4026291a673..429e35b91d3f2 100644
--- a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LLVMLibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LazyArtifacts.version b/stdlib/LazyArtifacts.version
new file mode 100644
index 0000000000000..8988e27bcb4ac
--- /dev/null
+++ b/stdlib/LazyArtifacts.version
@@ -0,0 +1,4 @@
+LAZYARTIFACTS_BRANCH = main
+LAZYARTIFACTS_SHA1 = e4cfc39598c238f75bdfdbdb3f82c9329a5af59c
+LAZYARTIFACTS_GIT_URL := https://github.com/JuliaPackaging/LazyArtifacts.jl.git
+LAZYARTIFACTS_TAR_URL = https://api.github.com/repos/JuliaPackaging/LazyArtifacts.jl/tarball/$1
diff --git a/stdlib/LazyArtifacts/Project.toml b/stdlib/LazyArtifacts/Project.toml
deleted file mode 100644
index ea9afc9d12dba..0000000000000
--- a/stdlib/LazyArtifacts/Project.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-name = "LazyArtifacts"
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[deps]
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/stdlib/LazyArtifacts/docs/src/index.md b/stdlib/LazyArtifacts/docs/src/index.md
deleted file mode 100644
index 9de6b219c6988..0000000000000
--- a/stdlib/LazyArtifacts/docs/src/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Lazy Artifacts
-
-```@meta
-DocTestSetup = :(using LazyArtifacts)
-```
-
-In order for a package to download artifacts lazily, `LazyArtifacts` must be
-explicitly listed as a dependency of that package.
-
-For further information on artifacts, see [Artifacts](@ref).
diff --git a/stdlib/LazyArtifacts/src/LazyArtifacts.jl b/stdlib/LazyArtifacts/src/LazyArtifacts.jl
deleted file mode 100644
index b783276ac6081..0000000000000
--- a/stdlib/LazyArtifacts/src/LazyArtifacts.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module LazyArtifacts
-
-# reexport the Artifacts API
-using Artifacts: Artifacts,
-       artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-export artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-
-# define a function for satisfying lazy Artifact downloads
-using Pkg.Artifacts: ensure_artifact_installed
-
-end
diff --git a/stdlib/LazyArtifacts/test/Artifacts.toml b/stdlib/LazyArtifacts/test/Artifacts.toml
deleted file mode 100644
index 4b715b74c128b..0000000000000
--- a/stdlib/LazyArtifacts/test/Artifacts.toml
+++ /dev/null
@@ -1,155 +0,0 @@
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
-[[HelloWorldC]]
-arch = "powerpc64le"
-git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
-os = "freebsd"
-
-    [[HelloWorldC.download]]
-    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
-
-[socrates]
-git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
-lazy = true
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
-    sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
-    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
diff --git a/stdlib/LazyArtifacts/test/runtests.jl b/stdlib/LazyArtifacts/test/runtests.jl
deleted file mode 100644
index 1c8bbee269144..0000000000000
--- a/stdlib/LazyArtifacts/test/runtests.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using LazyArtifacts
-using Test
-
-mktempdir() do tempdir
-    LazyArtifacts.Artifacts.with_artifacts_directory(tempdir) do
-        redirect_stderr(devnull) do
-            socrates_dir = artifact"socrates"
-            @test isdir(socrates_dir)
-        end
-        ex = @test_throws ErrorException artifact"HelloWorldC"
-        @test startswith(ex.value.msg, "Artifact \"HelloWorldC\" was not found")
-    end
-end
-
-# Need to set depwarn flag before testing deprecations
-@test success(run(setenv(`$(Base.julia_cmd()) --depwarn=no --startup-file=no -e '
-    using Artifacts, Pkg
-    using Test
-    mktempdir() do tempdir
-        Artifacts.with_artifacts_directory(tempdir) do
-            redirect_stderr(devnull) do
-                socrates_dir = @test_logs(
-                        (:warn, "using Pkg instead of using LazyArtifacts is deprecated"),
-                        artifact"socrates")
-                @test isdir(socrates_dir)
-            end
-        end
-    end'`,
-    dir=@__DIR__)))
diff --git a/stdlib/LibCURL_jll/Project.toml b/stdlib/LibCURL_jll/Project.toml
index 0ef46598b3118..61d44beac14e1 100644
--- a/stdlib/LibCURL_jll/Project.toml
+++ b/stdlib/LibCURL_jll/Project.toml
@@ -1,11 +1,11 @@
 name = "LibCURL_jll"
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "8.0.1+0"
+version = "8.11.1+1"
 
 [deps]
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
 nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -17,4 +17,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 test = ["Test"]
 
 [compat]
-julia = "1.8"
+julia = "1.11"
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index cd67bfac0006a..5c1c2aa14b23a 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -3,8 +3,11 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibCURL_jll.jl
 
 baremodule LibCURL_jll
-using Base, Libdl, nghttp2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+using Base, Libdl, nghttp2_jll, LibSSH2_jll, Zlib_jll
+if !(Sys.iswindows() || Sys.isapple())
+    # On Windows and macOS we use system SSL/crypto libraries
+    using OpenSSL_jll
+end
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LibGit2/Project.toml b/stdlib/LibGit2/Project.toml
index da78f70fa1005..8432a32cd240b 100644
--- a/stdlib/LibGit2/Project.toml
+++ b/stdlib/LibGit2/Project.toml
@@ -1,8 +1,9 @@
 name = "LibGit2"
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
 
 [deps]
-Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
 NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
diff --git a/stdlib/LibGit2/docs/src/index.md b/stdlib/LibGit2/docs/src/index.md
index 3205c4c5d6987..aa4ebf2e784b6 100644
--- a/stdlib/LibGit2/docs/src/index.md
+++ b/stdlib/LibGit2/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/LibGit2/docs/src/index.md"
+```
+
 # LibGit2
 
 The LibGit2 module provides bindings to [libgit2](https://libgit2.org/), a portable C library that
diff --git a/stdlib/LibGit2/src/LibGit2.jl b/stdlib/LibGit2/src/LibGit2.jl
index 6a797937ccf0b..04435dd577c19 100644
--- a/stdlib/LibGit2/src/LibGit2.jl
+++ b/stdlib/LibGit2/src/LibGit2.jl
@@ -6,14 +6,15 @@ Interface to [libgit2](https://libgit2.org/).
 module LibGit2
 
 import Base: ==
-using Base: something, notnothing
-using Base64: base64decode
+using Base: something
 using NetworkOptions
 using Printf: @printf
 using SHA: sha1, sha256
 
 export with, GitRepo, GitConfig
 
+using LibGit2_jll
+
 const GITHUB_REGEX =
     r"^(?:(?:ssh://)?git@|git://|https://(?:[\w\.\+\-]+@)?)github.com[:/](([^/].+)/(.+?))(?:\.git)?$"i
 
@@ -594,6 +595,44 @@ function clone(repo_url::AbstractString, repo_path::AbstractString;
     return repo
 end
 
+"""
+    connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION; kwargs...)
+
+Open a connection to a remote. `direction` can be either `DIRECTION_FETCH`
+or `DIRECTION_PUSH`.
+
+The keyword arguments are:
+  * `credentials::Creds=nothing`: provides credentials and/or settings when authenticating
+    against a private repository.
+  * `callbacks::Callbacks=Callbacks()`: user provided callbacks and payloads.
+"""
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION;
+                 credentials::Creds=nothing,
+                 callbacks::Callbacks=Callbacks())
+    cred_payload = reset!(CredentialPayload(credentials))
+    if !haskey(callbacks, :credentials)
+        callbacks[:credentials] = (credentials_cb(), cred_payload)
+    elseif haskey(callbacks, :credentials) && credentials !== nothing
+        throw(ArgumentError(string(
+            "Unable to both use the provided `credentials` as a payload when the ",
+            "`callbacks` also contain a credentials payload.")))
+    end
+
+    remote_callbacks = RemoteCallbacks(callbacks)
+    try
+        connect(rmt, direction, remote_callbacks)
+    catch err
+        if isa(err, GitError) && err.code === Error.EAUTH
+            reject(cred_payload)
+        else
+            Base.shred!(cred_payload)
+        end
+        rethrow()
+    end
+    approve(cred_payload)
+    return rmt
+end
+
 """ git reset [<committish>] [--] <pathspecs>... """
 function reset!(repo::GitRepo, committish::AbstractString, pathspecs::AbstractString...)
     obj = GitObject(repo, isempty(committish) ? Consts.HEAD_FILE : committish)
@@ -983,7 +1022,7 @@ function ensure_initialized()
 end
 
 @noinline function initialize()
-    @check ccall((:git_libgit2_init, :libgit2), Cint, ())
+    @check ccall((:git_libgit2_init, libgit2), Cint, ())
 
     cert_loc = NetworkOptions.ca_roots()
     cert_loc !== nothing && set_ssl_cert_locations(cert_loc)
@@ -991,7 +1030,7 @@ end
     atexit() do
         # refcount zero, no objects to be finalized
         if Threads.atomic_sub!(REFCOUNT, 1) == 1
-            ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+            ccall((:git_libgit2_shutdown, libgit2), Cint, ())
         end
     end
 end
@@ -1003,7 +1042,7 @@ function set_ssl_cert_locations(cert_loc)
     else # files, /dev/null, non-existent paths, etc.
         cert_file = cert_loc
     end
-    ret = @ccall "libgit2".git_libgit2_opts(
+        ret = @ccall libgit2.git_libgit2_opts(
         Consts.SET_SSL_CERT_LOCATIONS::Cint;
         cert_file::Cstring,
         cert_dir::Cstring)::Cint
@@ -1029,7 +1068,7 @@ end
 Sets the system tracing configuration to the specified level.
 """
 function trace_set(level::Union{Integer,Consts.GIT_TRACE_LEVEL}, cb=trace_cb())
-    @check @ccall "libgit2".git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
+    @check @ccall libgit2.git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
 end
 
 end # module
diff --git a/stdlib/LibGit2/src/blame.jl b/stdlib/LibGit2/src/blame.jl
index 3aa94e30200b4..e441189bdd423 100644
--- a/stdlib/LibGit2/src/blame.jl
+++ b/stdlib/LibGit2/src/blame.jl
@@ -11,9 +11,9 @@ which commits to probe - see [`BlameOptions`](@ref) for more information.
 function GitBlame(repo::GitRepo, path::AbstractString; options::BlameOptions=BlameOptions())
     ensure_initialized()
     blame_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_blame_file, :libgit2), Cint,
+    @check ccall((:git_blame_file, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{BlameOptions}),
-                   blame_ptr_ptr, repo.ptr, path, Ref(options))
+                   blame_ptr_ptr, repo, path, Ref(options))
     return GitBlame(repo, blame_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ that function later.
 """
 function counthunks(blame::GitBlame)
     ensure_initialized()
-    return ccall((:git_blame_get_hunk_count, :libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
+    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame)
 end
 
 function Base.getindex(blame::GitBlame, i::Integer)
@@ -36,7 +36,7 @@ function Base.getindex(blame::GitBlame, i::Integer)
     end
     ensure_initialized()
     GC.@preserve blame begin
-        hunk_ptr = ccall((:git_blame_get_hunk_byindex, :libgit2),
+        hunk_ptr = ccall((:git_blame_get_hunk_byindex, libgit2),
                           Ptr{BlameHunk},
                           (Ptr{Cvoid}, Csize_t), blame.ptr, i-1)
         elem = unsafe_load(hunk_ptr)
diff --git a/stdlib/LibGit2/src/blob.jl b/stdlib/LibGit2/src/blob.jl
index efd7a14c9c6f7..af1a16574b51e 100644
--- a/stdlib/LibGit2/src/blob.jl
+++ b/stdlib/LibGit2/src/blob.jl
@@ -2,7 +2,7 @@
 
 function Base.length(blob::GitBlob)
     ensure_initialized()
-    return ccall((:git_blob_rawsize, :libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
+    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob)
 end
 
 """
@@ -20,7 +20,7 @@ is binary and not valid Unicode.
 """
 function rawcontent(blob::GitBlob)
     ensure_initialized()
-    ptr = ccall((:git_blob_rawcontent, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
+    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob)
     copy(unsafe_wrap(Array, ptr, (length(blob),), own = false))
 end
 
@@ -47,7 +47,7 @@ the first 8000 bytes.
 """
 function isbinary(blob::GitBlob)
     ensure_initialized()
-    bin_flag = ccall((:git_blob_is_binary, :libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
+    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob)
     return bin_flag == 1
 end
 
@@ -67,9 +67,9 @@ id = LibGit2.addblob!(repo, blob_file)
 function addblob!(repo::GitRepo, path::AbstractString)
     ensure_initialized()
     id_ref = Ref{GitHash}()
-    @check ccall((:git_blob_create_from_disk, :libgit2), Cint,
+    @check ccall((:git_blob_create_from_disk, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                 id_ref, repo.ptr, path)
+                 id_ref, repo, path)
     return id_ref[]
 end
 
diff --git a/stdlib/LibGit2/src/callbacks.jl b/stdlib/LibGit2/src/callbacks.jl
index 3bc6463140d5f..c4156d4a44c71 100644
--- a/stdlib/LibGit2/src/callbacks.jl
+++ b/stdlib/LibGit2/src/callbacks.jl
@@ -9,7 +9,7 @@ function mirror_callback(remote::Ptr{Ptr{Cvoid}}, repo_ptr::Ptr{Cvoid},
     ensure_initialized()
     # Create the remote with a mirroring url
     fetch_spec = "+refs/*:refs/*"
-    err = ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    err = ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
                 remote, repo_ptr, name, url, fetch_spec)
     err != 0 && return Cint(err)
@@ -43,7 +43,7 @@ end
 function user_abort()
     ensure_initialized()
     # Note: Potentially it could be better to just throw a Julia error.
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, user cancelled credential request.")
     return Cint(Error.EUSER)
@@ -51,7 +51,7 @@ end
 
 function prompt_limit()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, maximum number of prompts reached.")
     return Cint(Error.EAUTH)
@@ -59,7 +59,7 @@ end
 
 function exhausted_abort()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:git_error_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "All authentication methods have failed.")
     return Cint(Error.EAUTH)
@@ -79,7 +79,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
 
     # first try ssh-agent if credentials support its usage
     if p.use_ssh_agent && username_ptr != Cstring(C_NULL) && (!revised || !isfilled(cred))
-        err = ccall((:git_cred_ssh_key_from_agent, :libgit2), Cint,
+        err = ccall((:git_cred_ssh_key_from_agent, libgit2), Cint,
                     (Ptr{Ptr{Cvoid}}, Cstring), libgit2credptr, username_ptr)
 
         p.use_ssh_agent = false  # use ssh-agent only one time
@@ -175,7 +175,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
     if !revised
         return exhausted_abort()
     end
-    return ccall((:git_cred_ssh_key_new, :libgit2), Cint,
+    return ccall((:git_cred_ssh_key_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pubkey, cred.prvkey, cred.pass)
 end
@@ -195,9 +195,9 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
     if p.use_git_helpers && (!revised || !isfilled(cred))
         git_cred = GitCredential(p.config, p.url)
 
-         # Use `deepcopy` to ensure shredding the `git_cred` does not shred the `cred`s copy
+         # Use `copy` to ensure shredding the `git_cred` does not shred the `cred`s copy
         cred.user = something(git_cred.username, "")
-        cred.pass = deepcopy(something(git_cred.password, ""))
+        cred.pass = git_cred.password !== nothing ? copy(git_cred.password) : ""
         Base.shred!(git_cred)
         revised = true
 
@@ -235,7 +235,7 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
         return exhausted_abort()
     end
 
-    return ccall((:git_cred_userpass_plaintext_new, :libgit2), Cint,
+    return ccall((:git_cred_userpass_plaintext_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pass)
 end
@@ -292,7 +292,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
             cred = explicit
 
             # Copy explicit credentials to avoid mutating approved credentials.
-            # invalidation fix from cred being non-inferrable
+            # invalidation fix from cred being non-inferable
             p.credential = Base.invokelatest(deepcopy, cred)
 
             if isa(cred, SSHCredential)
@@ -307,7 +307,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
 
             # Perform a deepcopy as we do not want to mutate approved cached credentials
             if haskey(cache, cred_id)
-                # invalidation fix from cache[cred_id] being non-inferrable
+                # invalidation fix from cache[cred_id] being non-inferable
                 p.credential = Base.invokelatest(deepcopy, cache[cred_id])
             end
         end
@@ -339,7 +339,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
     if err == 0
         if p.explicit !== nothing
             ensure_initialized()
-            ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
+            ccall((:git_error_set_str, libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
                   "The explicitly provided credential is incompatible with the requested " *
                   "authentication methods.")
         end
diff --git a/stdlib/LibGit2/src/commit.jl b/stdlib/LibGit2/src/commit.jl
index 5d3c666af4bbb..d76a31791e4c4 100644
--- a/stdlib/LibGit2/src/commit.jl
+++ b/stdlib/LibGit2/src/commit.jl
@@ -14,8 +14,8 @@ function message(c::GitCommit, raw::Bool=false)
     ensure_initialized()
     GC.@preserve c begin
         local msg_ptr::Cstring
-        msg_ptr = raw ? ccall((:git_commit_message_raw, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
-                        ccall((:git_commit_message, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
+        msg_ptr = raw ? ccall((:git_commit_message_raw, libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
+                        ccall((:git_commit_message, libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
         if msg_ptr == C_NULL
             return nothing
         end
@@ -33,7 +33,7 @@ the person who made changes to the relevant file(s). See also [`committer`](@ref
 function author(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_author, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_author, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         @assert ptr != C_NULL
         sig = Signature(ptr)
     end
@@ -51,7 +51,7 @@ a `committer` who committed it.
 function committer(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_committer, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_committer, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         sig = Signature(ptr)
     end
     return sig
@@ -73,16 +73,18 @@ function commit(repo::GitRepo,
     ensure_initialized()
     commit_id_ptr = Ref(GitHash())
     nparents = length(parents)
-    parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
-    @check ccall((:git_commit_create, :libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
-                  Ptr{SignatureStruct}, Ptr{SignatureStruct},
-                  Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
-                  Csize_t, Ptr{Ptr{Cvoid}}),
-                 commit_id_ptr, repo.ptr, isempty(refname) ? C_NULL : refname,
-                 author.ptr, committer.ptr,
-                 C_NULL, msg, tree.ptr,
-                 nparents, nparents > 0 ? parentptrs : C_NULL)
+    GC.@preserve parents begin
+        parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
+        @check ccall((:git_commit_create, libgit2), Cint,
+                     (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
+                      Ptr{SignatureStruct}, Ptr{SignatureStruct},
+                      Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
+                      Csize_t, Ptr{Ptr{Cvoid}}),
+                     commit_id_ptr, repo, isempty(refname) ? C_NULL : refname,
+                     author, committer,
+                     C_NULL, msg, tree,
+                     nparents, nparents > 0 ? parentptrs : C_NULL)
+    end
     return commit_id_ptr[]
 end
 
@@ -147,3 +149,45 @@ function commit(repo::GitRepo, msg::AbstractString;
     end
     return commit_id
 end
+
+"""
+    parentcount(c::GitCommit)
+
+Get the number of parents of this commit.
+
+See also [`parent`](@ref), [`parent_id`](@ref).
+"""
+parentcount(c::GitCommit) =
+    Int(ccall((:git_commit_parentcount, libgit2), Cuint, (Ptr{Cvoid},), c))
+
+"""
+    parent(c::GitCommit, n)
+
+Get the `n`-th (1-based) parent of the commit.
+
+See also [`parentcount`](@ref), [`parent_id`](@ref).
+"""
+function parent(c::GitCommit, n)
+    ptr_ref = Ref{Ptr{Cvoid}}()
+    @check ccall((:git_commit_parent, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cuint), ptr_ref, c, n - 1)
+    return GitCommit(c.owner, ptr_ref[])
+end
+
+"""
+    parent_id(c::GitCommit, n)
+
+Get the oid of the `n`-th (1-based) parent for a commit.
+
+See also [`parentcount`](@ref), [`parent`](@ref).
+"""
+function parent_id(c::GitCommit, n)
+    oid_ptr = ccall((:git_commit_parent_id, libgit2), Ptr{GitHash},
+                    (Ptr{Cvoid}, Cuint), c, n - 1)
+    if oid_ptr == C_NULL
+        # 0-based indexing mimicking the error message from libgit2
+        throw(GitError(Error.Invalid, Error.ENOTFOUND,
+                       "parent $(n - 1) does not exist"))
+    end
+    return unsafe_load(oid_ptr)
+end
diff --git a/stdlib/LibGit2/src/config.jl b/stdlib/LibGit2/src/config.jl
index a54cd352aa063..0bee705259ca6 100644
--- a/stdlib/LibGit2/src/config.jl
+++ b/stdlib/LibGit2/src/config.jl
@@ -13,7 +13,7 @@ function GitConfig(path::AbstractString,
     ensure_initialized()
     # create new config object
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_new, :libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
+    @check ccall((:git_config_new, libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     try
         addfile(cfg, path, level, repo, force)
@@ -34,8 +34,8 @@ used.
 function GitConfig(repo::GitRepo)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_config, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_config, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo)
     return GitConfig(repo, cfg_ptr_ptr[])
 end
 
@@ -49,16 +49,16 @@ options outside a specific git repository.
 function GitConfig(level::Consts.GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_open_default, :libgit2), Cint,
+    @check ccall((:git_config_open_default, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     if level != Consts.CONFIG_LEVEL_DEFAULT
         glb_cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
         tmpcfg = cfg
         try
-            @check ccall((:git_config_open_level, :libgit2), Cint,
+            @check ccall((:git_config_open_level, libgit2), Cint,
                          (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint),
-                          glb_cfg_ptr_ptr, cfg.ptr, Cint(level))
+                          glb_cfg_ptr_ptr, cfg, Cint(level))
             cfg = GitConfig(glb_cfg_ptr_ptr[])
         finally
             close(tmpcfg)
@@ -90,22 +90,22 @@ function addfile(cfg::GitConfig, path::AbstractString,
                  force::Bool=false)
     ensure_initialized()
     @static if LibGit2.VERSION >= v"0.27.0"
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Ptr{Cvoid}, Cint),
-                     cfg.ptr, path, Cint(level), isa(repo, GitRepo) ? repo.ptr : C_NULL, Cint(force))
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
+                     (Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}, Cint),
+                     cfg, path, Cint(level), isa(repo, GitRepo) ? repo : C_NULL, Cint(force))
     else
         repo === nothing || error("repo argument is not supported in this version of LibGit2")
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Cint),
-                     cfg.ptr, path, Cint(level), Cint(force))
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
+                     (Ptr{Cvoid}, Cstring, Cint, Cint),
+                     cfg, path, Cint(level), Cint(force))
     end
 end
 
 function get(::Type{<:AbstractString}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_config_get_string_buf, :libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c.ptr, name)
+    @check ccall((:git_config_get_string_buf, libgit2), Cint,
+                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c, name)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -115,24 +115,24 @@ end
 function get(::Type{Bool}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_bool, :libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_bool, libgit2), Cint,
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return Bool(val_ptr[])
 end
 
 function get(::Type{Int32}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_int32, :libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_int32, libgit2), Cint,
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
 function get(::Type{Int64}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cintmax_t(0))
-    @check ccall((:git_config_get_int64, :libgit2), Cint,
-          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+    @check ccall((:git_config_get_int64, libgit2), Cint,
+          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -164,69 +164,69 @@ end
 
 function set!(c::GitConfig, name::AbstractString, value::AbstractString)
     ensure_initialized()
-    @check ccall((:git_config_set_string, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cstring), c.ptr, name, value)
+    @check ccall((:git_config_set_string, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cstring), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Bool)
     ensure_initialized()
     bval = Int32(value)
-    @check ccall((:git_config_set_bool, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, bval)
+    @check ccall((:git_config_set_bool, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, bval)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int32)
     ensure_initialized()
-    @check ccall((:git_config_set_int32, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, value)
+    @check ccall((:git_config_set_int32, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int64)
     ensure_initialized()
-    @check ccall((:git_config_set_int64, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cintmax_t), c.ptr, name, value)
+    @check ccall((:git_config_set_int64, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring, Cintmax_t), c, name, value)
 end
 
 function GitConfigIter(cfg::GitConfig)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg.ptr)
+    @check ccall((:git_config_iterator_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::AbstractString)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, C_NULL)
+                  ci_ptr, cfg, name, C_NULL)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::AbstractString, value::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, value.pattern)
+                  ci_ptr, cfg, name, value.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
 function GitConfigIter(cfg::GitConfig, name::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_glob_new, :libgit2), Cint,
+    @check ccall((:git_config_iterator_glob_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                  ci_ptr, cfg.ptr, name.pattern)
+                  ci_ptr, cfg, name.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
 function Base.iterate(ci::GitConfigIter, state=nothing)
     ensure_initialized()
     entry_ptr_ptr = Ref{Ptr{ConfigEntry}}(C_NULL)
-    err = ccall((:git_config_next, :libgit2), Cint,
-                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci.ptr)
+    err = ccall((:git_config_next, libgit2), Cint,
+                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci)
     if err == Cint(Error.GIT_OK)
         return (unsafe_load(entry_ptr_ptr[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/consts.jl b/stdlib/LibGit2/src/consts.jl
index f3a460108db6b..1a523b381982b 100644
--- a/stdlib/LibGit2/src/consts.jl
+++ b/stdlib/LibGit2/src/consts.jl
@@ -2,7 +2,7 @@
 
 module Consts
 
-import ..LibGit2: version, ensure_initialized
+import ..LibGit2: version
 
 const HEAD_FILE  = "HEAD"
 const FETCH_HEAD  = "FETCH_HEAD"
@@ -10,12 +10,15 @@ const REMOTE_ORIGIN = "origin"
 
 # objs
 @enum(OBJECT,
-      OBJ_ANY    = -2,
-      OBJ_BAD    = -1,
-      OBJ_COMMIT = 1,
-      OBJ_TREE   = 2,
-      OBJ_BLOB   = 3,
-      OBJ_TAG    = 4)
+      OBJ_ANY       = -2,
+      OBJ_BAD       = -1,
+      OBJ_COMMIT    = 1,
+      OBJ_TREE      = 2,
+      OBJ_BLOB      = 3,
+      OBJ_TAG       = 4,
+      OBJ_OFS_DELTA = 6,
+      OBJ_REF_DELTA = 7)
+const OBJ_INVALID = OBJ_BAD
 
 #revwalk
 const SORT_NONE        = Cint(0)
@@ -26,8 +29,10 @@ const SORT_REVERSE     = Cint(1 << 2)
 # refs
 const REF_INVALID  = Cint(0)
 const REF_OID      = Cint(1)
+const REF_DIRECT   = REF_OID
 const REF_SYMBOLIC = Cint(2)
 const REF_LISTALL  = REF_OID | REF_SYMBOLIC
+const REF_ALL      = REF_LISTALL
 
 # blame
 const BLAME_NORMAL                          = Cuint(0)
@@ -36,10 +41,11 @@ const BLAME_TRACK_COPIES_SAME_COMMIT_MOVES  = Cuint(1 << 1)
 const BLAME_TRACK_COPIES_SAME_COMMIT_COPIES = Cuint(1 << 2)
 const BLAME_TRACK_COPIES_ANY_COMMIT_COPIES  = Cuint(1 << 3)
 const BLAME_FIRST_PARENT                    = Cuint(1 << 4)
+const BLAME_USE_MAILMAP                     = Cuint(1 << 5)
+const BLAME_IGNORE_WHITESPACE               = Cuint(1 << 6)
 
 # checkout
-const CHECKOUT_NONE                    = Cuint(0)
-const CHECKOUT_SAFE                    = Cuint(1 << 0)
+const CHECKOUT_SAFE                    = Cuint(0)
 const CHECKOUT_FORCE                   = Cuint(1 << 1)
 const CHECKOUT_RECREATE_MISSING        = Cuint(1 << 2)
 const CHECKOUT_ALLOW_CONFLICTS         = Cuint(1 << 4)
@@ -57,6 +63,10 @@ const CHECKOUT_DONT_OVERWRITE_IGNORED  = Cuint(1 << 19)
 const CHECKOUT_CONFLICT_STYLE_MERGE    = Cuint(1 << 20)
 const CHECKOUT_CONFLICT_STYLE_DIFF3    = Cuint(1 << 21)
 const CHECKOUT_DONT_REMOVE_EXISTING    = Cuint(1 << 22)
+const CHECKOUT_DONT_WRITE_INDEX        = Cuint(1 << 23)
+const CHECKOUT_DRY_RUN                 = Cuint(1 << 24)
+const CHECKOUT_CONFLICT_STYLE_ZDIFF3   = Cuint(1 << 25)
+const CHECKOUT_NONE                    = Cuint(1 << 30)
 
 const CHECKOUT_UPDATE_SUBMODULES       = Cuint(1 << 16)
 const CHECKOUT_UPDATE_SUBMODULES_IF_CHANGED = Cuint(1 << 17)
@@ -87,6 +97,11 @@ const DIFF_IGNORE_CASE                = Cuint(1 << 10)
 const DIFF_DISABLE_PATHSPEC_MATCH     = Cuint(1 << 12)
 const DIFF_SKIP_BINARY_CHECK          = Cuint(1 << 13)
 const DIFF_ENABLE_FAST_UNTRACKED_DIRS = Cuint(1 << 14)
+const DIFF_UPDATE_INDEX               = Cuint(1 << 15)
+const DIFF_INCLUDE_UNREADABLE         = Cuint(1 << 16)
+const DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = Cuint(1 << 17)
+const DIFF_INDENT_HEURISTIC           = Cuint(1 << 18)
+const DIFF_IGNORE_BLANK_LINES         = Cuint(1 << 19)
 
 const DIFF_FORCE_TEXT               = Cuint(1 << 20)
 const DIFF_FORCE_BINARY             = Cuint(1 << 21)
@@ -97,16 +112,20 @@ const DIFF_SHOW_UNTRACKED_CONTENT   = Cuint(1 << 25)
 const DIFF_SHOW_UNMODIFIED          = Cuint(1 << 26)
 const DIFF_PATIENCE                 = Cuint(1 << 28)
 const DIFF_MINIMAL                  = Cuint(1 << 29)
+const DIFF_SHOW_BINARY              = Cuint(1 << 30)
 
 const DIFF_FLAG_BINARY     = Cuint(1 << 0)
 const DIFF_FLAG_NOT_BINARY = Cuint(1 << 1)
 const DIFF_FLAG_VALID_OID  = Cuint(1 << 2)
+const DIFF_FLAG_EXISTS     = Cuint(1 << 3)
+const DIFF_FLAG_VALID_SIZE = Cuint(1 << 4)
 
 const DIFF_FORMAT_PATCH        = Cuint(1)
 const DIFF_FORMAT_PATCH_HEADER = Cuint(2)
 const DIFF_FORMAT_RAW          = Cuint(3)
 const DIFF_FORMAT_NAME_ONLY    = Cuint(4)
 const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
+const DIFF_FORMAT_PATCH_ID     = Cuint(6)
 
 @enum(DELTA_STATUS, DELTA_UNMODIFIED = Cint(0),
                     DELTA_ADDED      = Cint(1),
@@ -116,7 +135,9 @@ const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
                     DELTA_COPIED     = Cint(5),
                     DELTA_IGNORED    = Cint(6),
                     DELTA_UNTRACKED  = Cint(7),
-                    DELTA_TYPECHANGE = Cint(8))
+                    DELTA_TYPECHANGE = Cint(8),
+                    DELTA_UNREADABLE = Cint(9),
+                    DELTA_CONFLICTED = Cint(10))
 
 # index
 const IDXENTRY_NAMEMASK   = (0x0fff)
@@ -165,7 +186,8 @@ const INDEX_STAGE_ANY = Cint(-1)
 @enum(GIT_MERGE, MERGE_FIND_RENAMES     = 1 << 0,
                  MERGE_FAIL_ON_CONFLICT = 1 << 1,
                  MERGE_SKIP_REUC        = 1 << 2,
-                 MERGE_NO_RECURSIVE     = 1 << 3)
+                 MERGE_NO_RECURSIVE     = 1 << 3,
+                 MERGE_VIRTUAL_BASE     = 1 << 4)
 
 @enum(GIT_MERGE_FILE, MERGE_FILE_DEFAULT                  = 0,       # Defaults
                       MERGE_FILE_STYLE_MERGE              = 1 << 0,  # Create standard conflicted merge files
@@ -175,7 +197,13 @@ const INDEX_STAGE_ANY = Cint(-1)
                       MERGE_FILE_IGNORE_WHITESPACE_CHANGE = 1 << 4,  # Ignore changes in amount of whitespace
                       MERGE_FILE_IGNORE_WHITESPACE_EOL    = 1 << 5,  # Ignore whitespace at end of line
                       MERGE_FILE_DIFF_PATIENCE            = 1 << 6,  # Use the "patience diff" algorithm
-                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7)  # Take extra time to find minimal diff
+                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7,  # Take extra time to find minimal diff
+                      MERGE_FILE_STYLE_ZDIFF3             = 1 << 8,  # Create zdiff3 ("zealous diff3")-style files
+
+                      # Do not produce file conflicts when common regions have
+                      # changed; keep the conflict markers in the file and accept
+                      # that as the merge result.
+                      MERGE_FILE_ACCEPT_CONFLICTS         = 1 << 9)
 """ Option flags for git merge file favoritism.
   * `MERGE_FILE_FAVOR_NORMAL`: if both sides of the merge have changes to a section,
     make a note of the conflict in the index which `git checkout` will use to create
@@ -316,6 +344,7 @@ const STATUS_OPT_INCLUDE_UNREADABLE_AS_UNTRACKED  = Cuint(1 << 15)
 # certificate types from `enum git_cert_t` in `cert.h`.
 const CERT_TYPE_TLS = 1 # GIT_CERT_X509
 const CERT_TYPE_SSH = 2 # GIT_CERT_HOSTKEY_LIBSSH2
+const CERT_TYPE_STRARRAY = 3 # GIT_CERT_STRARRAY
 
 # certificate callback return values
 const PASSTHROUGH = -30
@@ -326,6 +355,7 @@ const CERT_ACCEPT =  0
 const CERT_SSH_MD5    = 1 << 0
 const CERT_SSH_SHA1   = 1 << 1
 const CERT_SSH_SHA256 = 1 << 2
+const CERT_SSH_RAW    = 1 << 3
 
 # libssh2 known host constants
 const LIBSSH2_KNOWNHOST_TYPE_PLAIN  = 1
@@ -341,6 +371,10 @@ const LIBSSH2_KNOWNHOST_CHECK_MISMATCH = 1
 const LIBSSH2_KNOWNHOST_CHECK_NOTFOUND = 2
 const LIBSSH2_KNOWNHOST_CHECK_FAILURE  = 3
 
+# Constants for fetch depth (shallowness of fetch).
+const FETCH_DEPTH_FULL = 0
+const FETCH_DEPTH_UNSHALLOW = 2147483647
+
 @enum(GIT_SUBMODULE_IGNORE, SUBMODULE_IGNORE_UNSPECIFIED  = -1, # use the submodule's configuration
                             SUBMODULE_IGNORE_NONE         = 1,  # any change or untracked == dirty
                             SUBMODULE_IGNORE_UNTRACKED    = 2,  # dirty if tracked files change
@@ -357,9 +391,11 @@ Option flags for `GitRepo`.
 @enum(GIT_REPOSITORY_OPEN, REPOSITORY_OPEN_DEFAULT   = 0,
                            REPOSITORY_OPEN_NO_SEARCH = 1<<0,
                            REPOSITORY_OPEN_CROSS_FS  = 1<<1,
-                           REPOSITORY_OPEN_BARE      = 1<<2)
+                           REPOSITORY_OPEN_BARE      = 1<<2,
+                           REPOSITORY_OPEN_NO_DOTGIT = 1<<3,
+                           REPOSITORY_OPEN_FROM_ENV  = 1<<4)
 
-@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2)
+@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2, BRANCH_ALL = 1 | 2)
 
 @enum(GIT_FILEMODE, FILEMODE_UNREADABLE          = 0o000000,
                     FILEMODE_TREE                = 0o040000,
@@ -381,7 +417,32 @@ Option flags for `GitRepo`.
                    FEATURE_SSH     = Cuint(1 << 2),
                    FEATURE_NSEC    = Cuint(1 << 3))
 
-if version() >= v"0.24.0"
+if version() >= v"1.8.0"
+    @doc """
+    Priority level of a config file.
+
+    These priority levels correspond to the natural escalation logic (from higher to lower) when searching for config entries in git.
+
+    * `CONFIG_LEVEL_DEFAULT` - Open the global, XDG and system configuration files if any available.
+    * `CONFIG_LEVEL_PROGRAMDATA` - System-wide on Windows, for compatibility with portable git
+    * `CONFIG_LEVEL_SYSTEM` - System-wide configuration file; `/etc/gitconfig` on Linux systems
+    * `CONFIG_LEVEL_XDG` - XDG compatible configuration file; typically `~/.config/git/config`
+    * `CONFIG_LEVEL_GLOBAL` - User-specific configuration file (also called Global configuration file); typically `~/.gitconfig`
+    * `CONFIG_LEVEL_LOCAL` - Repository specific configuration file; `\$WORK_DIR/.git/config` on non-bare repos
+    * `CONFIG_LEVEL_WORKTREE` - Worktree specific configuration file; `\$GIT_DIR/config.worktree`
+    * `CONFIG_LEVEL_APP` - Application specific configuration file; freely defined by applications
+    * `CONFIG_HIGHEST_LEVEL` - Represents the highest level available config file (i.e. the most specific config file available that actually is loaded)
+    """
+    @enum(GIT_CONFIG, CONFIG_LEVEL_DEFAULT     = 0,
+                      CONFIG_LEVEL_PROGRAMDATA = 1,
+                      CONFIG_LEVEL_SYSTEM      = 2,
+                      CONFIG_LEVEL_XDG         = 3,
+                      CONFIG_LEVEL_GLOBAL      = 4,
+                      CONFIG_LEVEL_LOCAL       = 5,
+                      CONFIG_LEVEL_WORKTREE    = 6,
+                      CONFIG_LEVEL_APP         = 7,
+                      CONFIG_HIGHEST_LEVEL     =-1)
+elseif version() >= v"0.24.0"
     @doc """
     Priority level of a config file.
 
@@ -432,19 +493,49 @@ Global library options.
 
 These are used to select which global option to set or get and are used in `git_libgit2_opts()`.
 """
-@enum(GIT_OPT, GET_MWINDOW_SIZE         = 0,
-               SET_MWINDOW_SIZE         = 1,
-               GET_MWINDOW_MAPPED_LIMIT = 2,
-               SET_MWINDOW_MAPPED_LIMIT = 3,
-               GET_SEARCH_PATH          = 4,
-               SET_SEARCH_PATH          = 5,
-               SET_CACHE_OBJECT_LIMIT   = 6,
-               SET_CACHE_MAX_SIZE       = 7,
-               ENABLE_CACHING           = 8,
-               GET_CACHED_MEMORY        = 9,
-               GET_TEMPLATE_PATH        = 10,
-               SET_TEMPLATE_PATH        = 11,
-               SET_SSL_CERT_LOCATIONS   = 12)
+@enum(GIT_OPT, GET_MWINDOW_SIZE = 0,
+               SET_MWINDOW_SIZE,
+               GET_MWINDOW_MAPPED_LIMIT,
+               SET_MWINDOW_MAPPED_LIMIT,
+               GET_SEARCH_PATH,
+               SET_SEARCH_PATH,
+               SET_CACHE_OBJECT_LIMIT,
+               SET_CACHE_MAX_SIZE,
+               ENABLE_CACHING,
+               GET_CACHED_MEMORY,
+               GET_TEMPLATE_PATH,
+               SET_TEMPLATE_PATH,
+               SET_SSL_CERT_LOCATIONS,
+               SET_USER_AGENT,
+               ENABLE_STRICT_OBJECT_CREATION,
+               ENABLE_STRICT_SYMBOLIC_REF_CREATION,
+               SET_SSL_CIPHERS,
+               GET_USER_AGENT,
+               ENABLE_OFS_DELTA,
+               ENABLE_FSYNC_GITDIR,
+               GET_WINDOWS_SHAREMODE,
+               SET_WINDOWS_SHAREMODE,
+               ENABLE_STRICT_HASH_VERIFICATION,
+               SET_ALLOCATOR,
+               ENABLE_UNSAVED_INDEX_SAFETY,
+               GET_PACK_MAX_OBJECTS,
+               SET_PACK_MAX_OBJECTS,
+               DISABLE_PACK_KEEP_FILE_CHECKS,
+               ENABLE_HTTP_EXPECT_CONTINUE,
+               GET_MWINDOW_FILE_LIMIT,
+               SET_MWINDOW_FILE_LIMIT,
+               SET_ODB_PACKED_PRIORITY,
+               SET_ODB_LOOSE_PRIORITY,
+               GET_EXTENSIONS,
+               SET_EXTENSIONS,
+               GET_OWNER_VALIDATION,
+               SET_OWNER_VALIDATION,
+               GET_HOMEDIR,
+               SET_HOMEDIR,
+               SET_SERVER_CONNECT_TIMEOUT,
+               GET_SERVER_CONNECT_TIMEOUT,
+               SET_SERVER_TIMEOUT,
+               GET_SERVER_TIMEOUT)
 
 """
 Option flags for `GitProxy`.
@@ -468,4 +559,14 @@ Option flags for `GitProxy`.
     TRACE_TRACE
 end
 
+# The type of object id
+@enum(GIT_OID_TYPE,
+      OID_DEFAULT = 0,
+      OID_SHA1 = 1)
+
+# Direction of the connection.
+@enum(GIT_DIRECTION,
+      DIRECTION_FETCH = 0,
+      DIRECTION_PUSH = 1)
+
 end
diff --git a/stdlib/LibGit2/src/diff.jl b/stdlib/LibGit2/src/diff.jl
index f2aa2feb2c2e9..a3f2cafe62e96 100644
--- a/stdlib/LibGit2/src/diff.jl
+++ b/stdlib/LibGit2/src/diff.jl
@@ -27,13 +27,13 @@ function diff_tree(repo::GitRepo, tree::GitTree, pathspecs::AbstractString=""; c
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     if cached
-        @check ccall((:git_diff_tree_to_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
     else
-        @check ccall((:git_diff_tree_to_workdir_with_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_workdir_with_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, isempty(pathspecs) ? C_NULL : pathspecs)
     end
     return GitDiff(repo, diff_ptr_ptr[])
 end
@@ -51,9 +51,9 @@ to compare a commit on another branch with the current latest commit on `master`
 function diff_tree(repo::GitRepo, oldtree::GitTree, newtree::GitTree)
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_tree_to_tree, :libgit2), Cint,
+    @check ccall((:git_diff_tree_to_tree, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                   diff_ptr_ptr, repo.ptr, oldtree.ptr, newtree.ptr, C_NULL)
+                   diff_ptr_ptr, repo, oldtree, newtree, C_NULL)
     return GitDiff(repo, diff_ptr_ptr[])
 end
 
@@ -67,9 +67,9 @@ files were changed, how many insertions were made, and how many deletions were m
 function GitDiffStats(diff::GitDiff)
     ensure_initialized()
     diff_stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_get_stats, :libgit2), Cint,
+    @check ccall((:git_diff_get_stats, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}),
-                  diff_stat_ptr_ptr, diff.ptr)
+                  diff_stat_ptr_ptr, diff)
     return GitDiffStats(diff.owner, diff_stat_ptr_ptr[])
 end
 
@@ -83,7 +83,7 @@ are to be included or not).
 """
 function files_changed(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_files_changed, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -96,7 +96,7 @@ are to be included or not).
 """
 function insertions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_insertions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -109,12 +109,12 @@ are to be included or not).
 """
 function deletions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_deletions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 function count(diff::GitDiff)
     ensure_initialized()
-    return ccall((:git_diff_num_deltas, :libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
+    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff)
 end
 
 function Base.getindex(diff::GitDiff, i::Integer)
@@ -122,10 +122,12 @@ function Base.getindex(diff::GitDiff, i::Integer)
         throw(BoundsError(diff, (i,)))
     end
     ensure_initialized()
-    delta_ptr = ccall((:git_diff_get_delta, :libgit2),
-                      Ptr{DiffDelta},
-                      (Ptr{Cvoid}, Csize_t), diff.ptr, i-1)
-    return unsafe_load(delta_ptr)
+    GC.@preserve diff begin # preserve `diff` object until return of `unsafe_load`
+        delta_ptr = ccall((:git_diff_get_delta, libgit2),
+                          Ptr{DiffDelta},
+                          (Ptr{Cvoid}, Csize_t), diff, i-1)
+        return unsafe_load(delta_ptr)
+    end
 end
 
 function Base.show(io::IO, diff_stat::GitDiffStats)
diff --git a/stdlib/LibGit2/src/error.jl b/stdlib/LibGit2/src/error.jl
index 219b8cdf88e69..6647d803d3193 100644
--- a/stdlib/LibGit2/src/error.jl
+++ b/stdlib/LibGit2/src/error.jl
@@ -3,6 +3,7 @@
 module Error
 
 import ..LibGit2: ensure_initialized
+using LibGit2_jll
 
 export GitError
 
@@ -18,7 +19,7 @@ export GitError
             EUNMERGED       = Cint(-10), # merge in progress prevented op
             ENONFASTFORWARD = Cint(-11), # ref not fast-forwardable
             EINVALIDSPEC    = Cint(-12), # name / ref not in valid format
-            EMERGECONFLICT  = Cint(-13), # merge conflict prevented op
+            ECONFLICT       = Cint(-13), # Checkout conflicts prevented operation
             ELOCKED         = Cint(-14), # lock file prevented op
             EMODIFIED       = Cint(-15), # ref value does not match expected
             EAUTH           = Cint(-16), # authentication error
@@ -26,13 +27,23 @@ export GitError
             EAPPLIED        = Cint(-18), # patch/merge has already been applied
             EPEEL           = Cint(-19), # the requested peel operation is not possible
             EEOF            = Cint(-20), # unexpected EOF
+            EINVALID        = Cint(-21), # Invalid operation or input
+            EUNCOMMITTED    = Cint(-22), # Uncommitted changes in index prevented operation
+            EDIRECTORY      = Cint(-23), # The operation is not valid for a directory
+            EMERGECONFLICT  = Cint(-24), # A merge conflict exists and cannot continue
+
             PASSTHROUGH     = Cint(-30), # internal only
             ITEROVER        = Cint(-31), # signals end of iteration
             RETRY           = Cint(-32), # internal only
             EMISMATCH       = Cint(-33), # hashsum mismatch in object
             EINDEXDIRTY     = Cint(-34), # unsaved changes in the index would be overwritten
             EAPPLYFAIL      = Cint(-35), # patch application failed
-            EOWNER          = Cint(-36)) # the object is not owned by the current user
+            EOWNER          = Cint(-36), # the object is not owned by the current user
+            TIMEOUT         = Cint(-37), # The operation timed out
+            EUNCHANGED      = Cint(-38), # There were no changes
+            ENOTSUPPORTED   = Cint(-39), # An option is not supported
+            EREADONLY       = Cint(-40), # The subject is read-only
+)
 
 @enum(Class, None,
              NoMemory,
@@ -68,7 +79,9 @@ export GitError
              Patch,
              WorkTree,
              SHA1,
-             HTTP)
+             HTTP,
+             Internal,
+             Grafts)
 
 struct ErrorStruct
     message::Ptr{UInt8}
@@ -84,7 +97,7 @@ Base.show(io::IO, err::GitError) = print(io, "GitError(Code:$(err.code), Class:$
 
 function last_error()
     ensure_initialized()
-    err = ccall((:giterr_last, :libgit2), Ptr{ErrorStruct}, ())
+    err = ccall((:git_error_last, libgit2), Ptr{ErrorStruct}, ())
     if err != C_NULL
         err_obj   = unsafe_load(err)
         err_class = Class(err_obj.class)
diff --git a/stdlib/LibGit2/src/gitcredential.jl b/stdlib/LibGit2/src/gitcredential.jl
index 7ff20ca1fdf2c..ea97d87d444ae 100644
--- a/stdlib/LibGit2/src/gitcredential.jl
+++ b/stdlib/LibGit2/src/gitcredential.jl
@@ -183,16 +183,16 @@ end
 
 function run!(helper::GitCredentialHelper, operation::AbstractString, cred::GitCredential)
     cmd = `$(helper.cmd) $operation`
-    p = open(cmd, "r+")
-
-    # Provide the helper with the credential information we know
-    write(p, cred)
-    write(p, "\n")
-    t = @async close(p.in)
-
-    # Process the response from the helper
-    Base.read!(p, cred)
-    wait(p)
+    open(cmd, "r+") do p
+        # Provide the helper with the credential information we know
+        write(p, cred)
+        write(p, "\n")
+        t = @async close(p.in)
+
+        # Process the response from the helper
+        Base.read!(p, cred)
+        wait(t)
+    end
 
     return cred
 end
diff --git a/stdlib/LibGit2/src/index.jl b/stdlib/LibGit2/src/index.jl
index b8baf624540b0..81e8e75d59585 100644
--- a/stdlib/LibGit2/src/index.jl
+++ b/stdlib/LibGit2/src/index.jl
@@ -8,8 +8,8 @@ Load the index file for the repository `repo`.
 function GitIndex(repo::GitRepo)
     ensure_initialized()
     idx_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_index, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_index, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo)
     return GitIndex(repo, idx_ptr_ptr[])
 end
 
@@ -25,7 +25,7 @@ has changed since the last time it was loaded into `idx`.
 """
 function read!(idx::GitIndex, force::Bool = false)
     ensure_initialized()
-    @check ccall((:git_index_read, :libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
+    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx, Cint(force))
     return idx
 end
 
@@ -36,7 +36,7 @@ Write the state of index `idx` to disk using a file lock.
 """
 function write!(idx::GitIndex)
     ensure_initialized()
-    @check ccall((:git_index_write, :libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
+    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx)
     return idx
 end
 
@@ -51,8 +51,8 @@ repository cannot be bare. `idx` must not contain any files with conflicts.
 function write_tree!(idx::GitIndex)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_index_write_tree, :libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx.ptr)
+    @check ccall((:git_index_write_tree, libgit2), Cint,
+                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx)
     return oid_ptr[]
 end
 
@@ -73,8 +73,8 @@ Read the tree `tree` (or the tree pointed to by `treehash` in the repository own
 """
 function read_tree!(idx::GitIndex, tree::GitTree)
     ensure_initialized()
-    @check ccall((:git_index_read_tree, :libgit2), Cint,
-                 (Ptr{Cvoid}, Ptr{Cvoid}), idx.ptr, tree.ptr)
+    @check ccall((:git_index_read_tree, libgit2), Cint,
+                 (Ptr{Cvoid}, Ptr{Cvoid}), idx, tree)
 end
 read_tree!(idx::GitIndex, hash::AbstractGitHash) =
     read_tree!(idx, GitTree(repository(idx), hash))
@@ -104,9 +104,9 @@ with respect to ignored files:
 function add!(idx::GitIndex, files::AbstractString...;
               flags::Cuint = Consts.INDEX_ADD_DEFAULT)
     ensure_initialized()
-    @check ccall((:git_index_add_all, :libgit2), Cint,
+    @check ccall((:git_index_add_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), flags, C_NULL, C_NULL)
+                 idx, collect(files), flags, C_NULL, C_NULL)
 end
 
 """
@@ -120,9 +120,9 @@ database.
 """
 function update!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_update_all, :libgit2), Cint,
+    @check ccall((:git_index_update_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 """
@@ -134,9 +134,9 @@ of the `repo`).
 """
 function remove!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_remove_all, :libgit2), Cint,
+    @check ccall((:git_index_remove_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 function add!(repo::GitRepo, files::AbstractString...;
@@ -173,13 +173,13 @@ end
 
 function count(idx::GitIndex)
     ensure_initialized()
-    return ccall((:git_index_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
+    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx)
 end
 
 function Base.getindex(idx::GitIndex, i::Integer)
     ensure_initialized()
     GC.@preserve idx begin
-        ie_ptr = ccall((:git_index_get_byindex, :libgit2),
+        ie_ptr = ccall((:git_index_get_byindex, libgit2),
                        Ptr{IndexEntry},
                        (Ptr{Cvoid}, Csize_t), idx.ptr, i-1)
         ie_ptr == C_NULL && return nothing
@@ -191,8 +191,8 @@ end
 function Base.findall(path::String, idx::GitIndex)
     ensure_initialized()
     pos_ref = Ref{Csize_t}(0)
-    ret = ccall((:git_index_find, :libgit2), Cint,
-                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx.ptr, path)
+    ret = ccall((:git_index_find, libgit2), Cint,
+                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx, path)
     ret == Cint(Error.ENOTFOUND) && return nothing
     return pos_ref[]+1
 end
@@ -210,7 +210,7 @@ of a multi-branch "octopus" merge, stages `2`, `3`, and `4` might be used).
 """
 function stage(ie::IndexEntry)
     ensure_initialized()
-    return ccall((:git_index_entry_stage, :libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
+    return ccall((:git_index_entry_stage, libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
 end
 
 function Base.show(io::IO, idx::GitIndex)
diff --git a/stdlib/LibGit2/src/merge.jl b/stdlib/LibGit2/src/merge.jl
index 0b2ddab1e8512..8bd8d1e4b64e9 100644
--- a/stdlib/LibGit2/src/merge.jl
+++ b/stdlib/LibGit2/src/merge.jl
@@ -16,27 +16,27 @@ branch head described using `GitReference`.
 function GitAnnotated(repo::GitRepo, commit_id::GitHash)
     ensure_initialized()
     ann_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_lookup, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
-                   ann_ptr_ptr, repo.ptr, Ref(commit_id))
+                   ann_ptr_ptr, repo, Ref(commit_id))
     return GitAnnotated(repo, ann_ptr_ptr[])
 end
 
 function GitAnnotated(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_ref, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_ref, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
-                   ann_ref_ref, repo.ptr, ref.ptr)
+                   ann_ref_ref, repo, ref)
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
 function GitAnnotated(repo::GitRepo, fh::FetchHead)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_fetchhead, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_fetchhead, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{GitHash}),
-                   ann_ref_ref, repo.ptr, fh.name, fh.url, Ref(fh.oid))
+                   ann_ref_ref, repo, fh.name, fh.url, Ref(fh.oid))
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -49,7 +49,7 @@ end
 function GitHash(ann::GitAnnotated)
     ensure_initialized()
     GC.@preserve ann begin
-        oid = unsafe_load(ccall((:git_annotated_commit_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
+        oid = unsafe_load(ccall((:git_annotated_commit_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
     end
     return oid
 end
@@ -88,9 +88,11 @@ function merge_analysis(repo::GitRepo, anns::Vector{GitAnnotated})
     preference = Ref{Cint}(0)
     anns_ref = Ref(Base.map(a->a.ptr, anns), 1)
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge_analysis, :libgit2), Cint,
-                  (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
-                   analysis, preference, repo.ptr, anns_ref, anns_size)
+    GC.@preserve anns begin
+        @check ccall((:git_merge_analysis, libgit2), Cint,
+                     (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
+                     analysis, preference, repo, anns_ref, anns_size)
+    end
     return analysis[], preference[]
 end
 
@@ -147,11 +149,13 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated};
                 checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge, :libgit2), Cint,
-                  (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
-                   Ptr{MergeOptions}, Ptr{CheckoutOptions}),
-                   repo.ptr, Base.map(x->x.ptr, anns), anns_size,
-                   Ref(merge_opts), Ref(checkout_opts))
+    GC.@preserve anns begin
+        @check ccall((:git_merge, libgit2), Cint,
+                     (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
+                      Ptr{MergeOptions}, Ptr{CheckoutOptions}),
+                     repo, Base.map(x->x.ptr, anns), anns_size,
+                     Ref(merge_opts), Ref(checkout_opts))
+    end
     @info "Review and commit merged changes"
     return true
 end
@@ -261,9 +265,9 @@ function merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)
     oid2_ptr = Ref(GitHash(two))
     moid_ptr = Ref(GitHash())
     moid = try
-        @check ccall((:git_merge_base, :libgit2), Cint,
+        @check ccall((:git_merge_base, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{GitHash}, Ptr{GitHash}),
-                moid_ptr, repo.ptr, oid1_ptr, oid2_ptr)
+                moid_ptr, repo, oid1_ptr, oid2_ptr)
         moid_ptr[]
     catch e
         GitHash()
diff --git a/stdlib/LibGit2/src/oid.jl b/stdlib/LibGit2/src/oid.jl
index 937684439419f..fae0d3737a429 100644
--- a/stdlib/LibGit2/src/oid.jl
+++ b/stdlib/LibGit2/src/oid.jl
@@ -13,7 +13,7 @@ function GitHash(ptr::Ptr{UInt8})
     end
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_oid_fromraw, :libgit2), Cint,
+    @check ccall((:git_oid_fromraw, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{UInt8}), oid_ptr, ptr)
     return oid_ptr[]
 end
@@ -43,7 +43,7 @@ function GitHash(id::AbstractString)
     end
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     return oid_ptr[]
 end
@@ -56,7 +56,7 @@ Construct a `GitShortHash` from the data stored in the given [`Buffer`](@ref).
 function GitShortHash(buf::Buffer)
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, buf.ptr, buf.size)
     GitShortHash(oid_ptr[], buf.size)
 end
@@ -71,7 +71,7 @@ function GitShortHash(id::AbstractString)
     bstr = String(id)
     len = sizeof(bstr)
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     GitShortHash(oid_ptr[], len)
 end
@@ -113,7 +113,7 @@ function GitHash(ref::GitReference)
     reftype(ref) != Consts.REF_OID && return GitHash()
     ensure_initialized()
     GC.@preserve ref begin
-        oid_ptr = ccall((:git_reference_target, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
+        oid_ptr = ccall((:git_reference_target, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
         oid_ptr == C_NULL && return GitHash()
         oid = GitHash(oid_ptr)
     end
@@ -131,9 +131,9 @@ function GitHash(repo::GitRepo, ref_name::AbstractString)
     isempty(repo) && return GitHash()
     ensure_initialized()
     oid_ptr  = Ref(GitHash())
-    @check ccall((:git_reference_name_to_id, :libgit2), Cint,
+    @check ccall((:git_reference_name_to_id, libgit2), Cint,
                     (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                     oid_ptr, repo.ptr, ref_name)
+                     oid_ptr, repo, ref_name)
     return oid_ptr[]
 end
 
@@ -144,7 +144,7 @@ Get the identifier (`GitHash`) of `obj`.
 """
 function GitHash(obj::GitObject)
     ensure_initialized()
-    GitHash(ccall((:git_object_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
+    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj))
 end
 
 ==(obj1::GitObject, obj2::GitObject) = GitHash(obj1) == GitHash(obj2)
@@ -159,8 +159,8 @@ unambiguously identify the object in the repository.
 function GitShortHash(obj::GitObject)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_object_short_id, :libgit2), Cint,
-                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj.ptr)
+    @check ccall((:git_object_short_id, libgit2), Cint,
+                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj)
     sid = GitShortHash(buf_ref[])
     free(buf_ref)
     return sid
@@ -187,7 +187,7 @@ Base.hash(id::GitHash, h::UInt) = hash(id.val, h)
 
 function Base.cmp(id1::GitHash, id2::GitHash)
     ensure_initialized()
-    Int(ccall((:git_oid_cmp, :libgit2), Cint,
+    Int(ccall((:git_oid_cmp, libgit2), Cint,
               (Ptr{GitHash}, Ptr{GitHash}),
               Ref(id1), Ref(id2)))
 end
@@ -195,7 +195,7 @@ function Base.cmp(id1::GitShortHash, id2::GitShortHash)
     ensure_initialized()
     # shortened hashes appear at the beginning of the order, i.e.
     # 000 < 01 < 010 < 011 < 0112
-    c = Int(ccall((:git_oid_ncmp, :libgit2), Cint,
+    c = Int(ccall((:git_oid_ncmp, libgit2), Cint,
                   (Ptr{GitHash}, Ptr{GitHash}, Csize_t),
                   Ref(id1.hash), Ref(id2.hash), min(id1.len, id2.len)))
     return c == 0 ? cmp(id1.len, id2.len) : c
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index 51b52ef006c38..e4abf5a85cc92 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -5,17 +5,17 @@ function GitRebase(repo::GitRepo, branch::GitAnnotated, upstream::GitAnnotated;
                    opts::RebaseOptions = RebaseOptions())
     ensure_initialized()
     rebase_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_rebase_init, :libgit2), Cint,
+    @check ccall((:git_rebase_init, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Cvoid}, Ptr{RebaseOptions}),
-                   rebase_ptr_ptr, repo.ptr, branch.ptr, upstream.ptr,
-                   onto === nothing ? C_NULL : onto.ptr, Ref(opts))
+                   rebase_ptr_ptr, repo, branch, upstream,
+                   onto === nothing ? C_NULL : onto, Ref(opts))
     return GitRebase(repo, rebase_ptr_ptr[])
 end
 
 function count(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 """
@@ -28,7 +28,7 @@ has not yet been called or iteration over `rb` has not yet begun), return
 """
 function current(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_current, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 function Base.getindex(rb::GitRebase, i::Integer)
@@ -37,7 +37,7 @@ function Base.getindex(rb::GitRebase, i::Integer)
     end
     ensure_initialized()
     GC.@preserve rb begin
-        rb_op_ptr = ccall((:git_rebase_operation_byindex, :libgit2),
+        rb_op_ptr = ccall((:git_rebase_operation_byindex, libgit2),
                           Ptr{RebaseOperation},
                           (Ptr{Cvoid}, Csize_t), rb.ptr, i-1)
         rb_op = unsafe_load(rb_op_ptr)
@@ -49,7 +49,7 @@ function Base.iterate(rb::GitRebase, state=nothing)
     ensure_initialized()
     rb_op_ptr_ptr = Ref{Ptr{RebaseOperation}}(C_NULL)
     GC.@preserve rb begin
-        err = ccall((:git_rebase_next, :libgit2), Cint,
+        err = ccall((:git_rebase_next, libgit2), Cint,
                     (Ptr{Ptr{RebaseOperation}}, Ptr{Cvoid}),
                     rb_op_ptr_ptr, rb.ptr)
         if err == Cint(Error.GIT_OK)
@@ -78,9 +78,9 @@ function commit(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
     try
-        @check ccall((:git_rebase_commit, :libgit2), Error.Code,
+        @check ccall((:git_rebase_commit, libgit2), Error.Code,
                      (Ptr{GitHash}, Ptr{Cvoid}, Ptr{SignatureStruct}, Ptr{SignatureStruct}, Ptr{UInt8}, Ptr{UInt8}),
-                      oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
+                      oid_ptr, rb, C_NULL, sig, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
         err isa GitError && err.code === Error.EAPPLIED && return nothing
@@ -100,8 +100,8 @@ rebase had completed), and `-1` for other errors.
 """
 function abort(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_abort, :libgit2), Csize_t,
-                      (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_abort, libgit2), Csize_t,
+                      (Ptr{Cvoid},), rb)
 end
 
 """
@@ -113,7 +113,7 @@ rebase finishes successfully, `-1` if there is an error.
 """
 function finish(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
-    return ccall((:git_rebase_finish, :libgit2), Csize_t,
+    return ccall((:git_rebase_finish, libgit2), Csize_t,
                   (Ptr{Cvoid}, Ptr{SignatureStruct}),
-                   rb.ptr, sig.ptr)
+                   rb, sig)
 end
diff --git a/stdlib/LibGit2/src/reference.jl b/stdlib/LibGit2/src/reference.jl
index c05b09ddfc518..de6be0dbe9543 100644
--- a/stdlib/LibGit2/src/reference.jl
+++ b/stdlib/LibGit2/src/reference.jl
@@ -3,9 +3,9 @@
 function GitReference(repo::GitRepo, refname::AbstractString)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_lookup, :libgit2), Cint,
+    @check ccall((:git_reference_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname)
+                   ref_ptr_ptr, repo, refname)
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -13,9 +13,9 @@ function GitReference(repo::GitRepo, obj_oid::GitHash, refname::AbstractString =
                       force::Bool=false, msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_create, :libgit2), Cint,
+    @check ccall((:git_reference_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{GitHash}, Cint, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname, Ref(obj_oid), Cint(force),
+                   ref_ptr_ptr, repo, refname, Ref(obj_oid), Cint(force),
                    isempty(msg) ? C_NULL : msg)
     return GitReference(repo, ref_ptr_ptr[])
 end
@@ -28,8 +28,8 @@ to this branch will have no parents.
 """
 function isorphan(repo::GitRepo)
     ensure_initialized()
-    r = @check ccall((:git_repository_head_unborn, :libgit2), Cint,
-                     (Ptr{Cvoid},), repo.ptr)
+    r = @check ccall((:git_repository_head_unborn, libgit2), Cint,
+                     (Ptr{Cvoid},), repo)
     r != 0
 end
 
@@ -41,8 +41,8 @@ Return a `GitReference` to the current HEAD of `repo`.
 function head(repo::GitRepo)
     ensure_initialized()
     head_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_head, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo.ptr)
+    @check ccall((:git_repository_head, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo)
     return GitReference(repo, head_ptr_ptr[])
 end
 
@@ -68,7 +68,7 @@ function shortname(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_shorthand, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -85,7 +85,7 @@ Return a `Cint` corresponding to the type of `ref`:
 """
 function reftype(ref::GitReference)
     ensure_initialized()
-    return ccall((:git_reference_type, :libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
+    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref)
 end
 
 """
@@ -100,7 +100,7 @@ function fullname(ref::GitReference)
     reftype(ref) == Consts.REF_OID && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        rname = ccall((:git_reference_symbolic_target, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        rname = ccall((:git_reference_symbolic_target, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         rname == C_NULL && return ""
         name = unsafe_string(rname)
     end
@@ -116,7 +116,7 @@ function name(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_name, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_name, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -128,7 +128,7 @@ function branch(ref::GitReference)
     ensure_initialized()
     str_ptr_ptr = Ref{Cstring}()
     GC.@preserve ref begin
-        @check ccall((:git_branch_name, :libgit2), Cint,
+        @check ccall((:git_branch_name, libgit2), Cint,
                       (Ptr{Cstring}, Ptr{Cvoid},), str_ptr_ptr, ref.ptr)
         str = unsafe_string(str_ptr_ptr[])
     end
@@ -138,32 +138,32 @@ end
 function ishead(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_branch_is_head, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_branch_is_head, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function isbranch(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_branch, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_branch, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function istag(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_tag, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_tag, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
 function isremote(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_remote, :libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+    err = ccall((:git_reference_is_remote, libgit2), Cint,
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -199,8 +199,8 @@ then `ref` will be peeled until an object other than a [`GitTag`](@ref) is obtai
 function peel(::Type{T}, ref::GitReference) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_peel, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref.ptr, Consts.OBJECT(T))
+    @check ccall((:git_reference_peel, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref, Consts.OBJECT(T))
     return T(ref.owner, obj_ptr_ptr[])
 end
 peel(ref::GitReference) = peel(GitObject, ref)
@@ -213,9 +213,9 @@ Get a list of all reference names in the `repo` repository.
 function ref_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_reference_list, :libgit2), Cint,
-                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_reference_list, libgit2), Cint,
+                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -235,9 +235,9 @@ function create_branch(repo::GitRepo,
                        force::Bool=false)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_create, :libgit2), Cint,
+    @check ccall((:git_branch_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Cint),
-                   ref_ptr_ptr, repo.ptr, bname, commit_obj.ptr, Cint(force))
+                   ref_ptr_ptr, repo, bname, commit_obj, Cint(force))
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -248,7 +248,7 @@ Delete the branch pointed to by `branch`.
 """
 function delete_branch(branch::GitReference)
     ensure_initialized()
-    @check ccall((:git_branch_delete, :libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
+    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch)
 end
 
 """
@@ -259,8 +259,8 @@ Set the HEAD of `repo` to the object pointed to by `ref`.
 function head!(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ref_name = name(ref)
-    @check ccall((:git_repository_set_head, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, ref_name)
+    @check ccall((:git_repository_set_head, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring), repo, ref_name)
     return ref
 end
 
@@ -280,9 +280,9 @@ function lookup_branch(repo::GitRepo,
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     branch_type = remote ? Consts.BRANCH_REMOTE : Consts.BRANCH_LOCAL
-    err = ccall((:git_branch_lookup, :libgit2), Cint,
+    err = ccall((:git_branch_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Cint),
-                  ref_ptr_ptr, repo.ptr, branch_name, branch_type)
+                  ref_ptr_ptr, repo, branch_name, branch_type)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -307,8 +307,8 @@ function upstream(ref::GitReference)
     isempty(ref) && return nothing
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_branch_upstream, :libgit2), Cint,
-                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref.ptr)
+    err = ccall((:git_branch_upstream, libgit2), Cint,
+                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -326,17 +326,17 @@ repository(ref::GitReference) = ref.owner
 function target!(ref::GitReference, new_oid::GitHash; msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_set_target, :libgit2), Cint,
+    @check ccall((:git_reference_set_target, libgit2), Cint,
              (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Cstring),
-             ref_ptr_ptr, ref.ptr, Ref(new_oid), isempty(msg) ? C_NULL : msg)
+             ref_ptr_ptr, ref, Ref(new_oid), isempty(msg) ? C_NULL : msg)
     return GitReference(ref.owner, ref_ptr_ptr[])
 end
 
 function GitBranchIter(repo::GitRepo, flags::Cint=Cint(Consts.BRANCH_LOCAL))
     ensure_initialized()
     bi_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_iterator_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo.ptr, flags)
+    @check ccall((:git_branch_iterator_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo, flags)
     return GitBranchIter(repo, bi_ptr[])
 end
 
@@ -344,9 +344,9 @@ function Base.iterate(bi::GitBranchIter, state=nothing)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     btype = Ref{Cint}()
-    err = ccall((:git_branch_next, :libgit2), Cint,
+    err = ccall((:git_branch_next, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cint}, Ptr{Cvoid}),
-                  ref_ptr_ptr, btype, bi.ptr)
+                  ref_ptr_ptr, btype, bi)
     if err == Cint(Error.GIT_OK)
         return ((GitReference(bi.owner, ref_ptr_ptr[]), btype[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/remote.jl b/stdlib/LibGit2/src/remote.jl
index 384a3b21bdbfa..5b815f946fb17 100644
--- a/stdlib/LibGit2/src/remote.jl
+++ b/stdlib/LibGit2/src/remote.jl
@@ -14,9 +14,9 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create, :libgit2), Cint,
+    @check ccall((:git_remote_create, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -37,9 +37,9 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url, refspec)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString, fetch_spec::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    @check ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url, fetch_spec)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url, fetch_spec)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -57,12 +57,25 @@ remote = LibGit2.GitRemoteAnon(repo, repo_url)
 function GitRemoteAnon(repo::GitRepo, url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_anonymous, :libgit2), Cint,
+    @check ccall((:git_remote_create_anonymous, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, url)
+                rmt_ptr_ptr, repo, url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
+"""
+    GitRemoteDetached(url::AbstractString) -> GitRemote
+
+Create a remote without a connected local repo.
+"""
+function GitRemoteDetached(url::AbstractString)
+    ensure_initialized()
+    rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
+    @check ccall((:git_remote_create_detached, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Cstring), rmt_ptr_ptr, url)
+    return GitRemote(rmt_ptr_ptr[])
+end
+
 """
     lookup_remote(repo::GitRepo, remote_name::AbstractString) -> Union{GitRemote, Nothing}
 
@@ -80,9 +93,9 @@ LibGit2.lookup_remote(repo, remote_name) # will return nothing
 function lookup_remote(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_remote_lookup, :libgit2), Cint,
+    err = ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, remote_name)
+                rmt_ptr_ptr, repo, remote_name)
     if err == Int(Error.GIT_OK)
         return GitRemote(repo, rmt_ptr_ptr[])
     elseif err == Int(Error.ENOTFOUND)
@@ -95,9 +108,9 @@ end
 function get(::Type{GitRemote}, repo::GitRepo, rmt_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_lookup, :libgit2), Cint,
+    @check ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name)
+                rmt_ptr_ptr, repo, rmt_name)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -120,9 +133,11 @@ julia> LibGit2.url(remote)
 """
 function url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_url, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -144,9 +159,11 @@ julia> LibGit2.push_url(LibGit2.get(LibGit2.GitRemote, repo, "origin"))
 """
 function push_url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_pushurl, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -170,9 +187,11 @@ julia> name(remote)
 """
 function name(rmt::GitRemote)
     ensure_initialized()
-    name_ptr = ccall((:git_remote_name, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    name_ptr == C_NULL && return ""
-    return unsafe_string(name_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        name_ptr == C_NULL && return ""
+        return unsafe_string(name_ptr)
+    end
 end
 
 """
@@ -194,9 +213,9 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 function fetch_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_fetch_refspecs, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_get_fetch_refspecs, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -224,9 +243,9 @@ String["refs/heads/master"]
 function push_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_push_refspecs, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_get_push_refspecs, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -247,8 +266,8 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 """
 function add_fetch!(repo::GitRepo, rmt::GitRemote, fetch_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_fetch, :libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+    @check ccall((:git_remote_add_fetch, libgit2), Cint,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), fetch_spec)
 end
 
@@ -276,8 +295,8 @@ String["refs/heads/master"]
 """
 function add_push!(repo::GitRepo, rmt::GitRemote, push_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_push, :libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+    @check ccall((:git_remote_add_push, libgit2), Cint,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), push_spec)
 end
 
@@ -296,9 +315,9 @@ function fetch(rmt::GitRemote, refspecs::Vector{<:AbstractString};
                msg::AbstractString="")
     ensure_initialized()
     msg = "libgit2.fetch: $msg"
-    @check ccall((:git_remote_fetch, :libgit2), Cint,
+    @check ccall((:git_remote_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{FetchOptions}, Cstring),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
 end
 
 """
@@ -321,9 +340,9 @@ The keyword arguments are:
 function push(rmt::GitRemote, refspecs::Vector{<:AbstractString};
               force::Bool = false, options::PushOptions = PushOptions())
     ensure_initialized()
-    @check ccall((:git_remote_push, :libgit2), Cint,
+    @check ccall((:git_remote_push, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{PushOptions}),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
 end
 
 """
@@ -333,9 +352,9 @@ Delete the `remote_name` from the git `repo`.
 """
 function remote_delete(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_delete, :libgit2), Cint,
+    @check ccall((:git_remote_delete, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring),
-                 repo.ptr, remote_name)
+                 repo, remote_name)
 end
 
 Base.show(io::IO, rmt::GitRemote) = print(io, "GitRemote:\nRemote name: ", name(rmt), " url: ", url(rmt))
@@ -352,9 +371,9 @@ function set_remote_fetch_url end
 
 function set_remote_fetch_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_url, :libgit2), Cint,
+    @check ccall((:git_remote_set_url, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_fetch_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -375,9 +394,9 @@ function set_remote_push_url end
 
 function set_remote_push_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_pushurl, :libgit2), Cint,
+    @check ccall((:git_remote_set_pushurl, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_push_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -414,3 +433,65 @@ function set_remote_url(path::AbstractString, remote_name::AbstractString, url::
         set_remote_url(repo, remote_name, url)
     end
 end
+
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION,
+                 callbacks::RemoteCallbacks)
+    @check ccall((:git_remote_connect, libgit2),
+                 Cint, (Ptr{Cvoid}, Cint, Ref{RemoteCallbacks}, Ptr{Cvoid}, Ptr{Cvoid}),
+                 rmt, direction, callbacks, C_NULL, C_NULL)
+    return rmt
+end
+
+"""
+    connected(rmt::GitRemote)
+
+Check whether the remote is connected
+"""
+function connected(rmt::GitRemote)
+    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt) != 0
+end
+
+"""
+    disconnect(rmt::GitRemote)
+
+Close the connection to the remote.
+"""
+function disconnect(rmt::GitRemote)
+    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt)
+    return
+end
+
+"""
+    default_branch(rmt::GitRemote)
+
+Retrieve the name of the remote's default branch.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function default_branch(rmt::GitRemote)
+    buf_ref = Ref(Buffer())
+    @check ccall((:git_remote_default_branch, libgit2), Cint,
+                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt)
+    buf = buf_ref[]
+    str = unsafe_string(buf.ptr, buf.size)
+    free(buf_ref)
+    return str
+end
+
+"""
+    ls(rmt::GitRemote) -> Vector{GitRemoteHead}
+
+Get the remote repository's reference advertisement list.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function ls(rmt::GitRemote)
+    nheads = Ref{Csize_t}()
+    head_refs = Ref{Ptr{Ptr{_GitRemoteHead}}}()
+    @check ccall((:git_remote_ls, libgit2), Cint,
+                 (Ptr{Ptr{Ptr{_GitRemoteHead}}}, Ptr{Csize_t}, Ptr{Cvoid}),
+                 head_refs, nheads, rmt)
+    head_ptr = head_refs[]
+    return [GitRemoteHead(unsafe_load(unsafe_load(head_ptr, i)))
+            for i in 1:nheads[]]
+end
diff --git a/stdlib/LibGit2/src/repository.jl b/stdlib/LibGit2/src/repository.jl
index 994d0a9f32875..9c8d379578b96 100644
--- a/stdlib/LibGit2/src/repository.jl
+++ b/stdlib/LibGit2/src/repository.jl
@@ -8,7 +8,7 @@ Open a git repository at `path`.
 function GitRepo(path::AbstractString)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open, :libgit2), Cint,
+    @check ccall((:git_repository_open, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring), repo_ptr_ptr, path)
     return GitRepo(repo_ptr_ptr[])
 end
@@ -23,7 +23,7 @@ function GitRepoExt(path::AbstractString, flags::Cuint = Cuint(Consts.REPOSITORY
     ensure_initialized()
     separator = @static Sys.iswindows() ? ";" : ":"
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open_ext, :libgit2), Cint,
+    @check ccall((:git_repository_open_ext, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cuint, Cstring),
                  repo_ptr_ptr, path, flags, separator)
     return GitRepo(repo_ptr_ptr[])
@@ -32,7 +32,7 @@ end
 function cleanup(r::GitRepo)
     if r.ptr != C_NULL
         ensure_initialized()
-        @check ccall((:git_repository__cleanup, :libgit2), Cint, (Ptr{Cvoid},), r.ptr)
+        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r)
     end
 end
 
@@ -46,7 +46,7 @@ is `true`, no working directory will be created.
 function init(path::AbstractString, bare::Bool=false)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_init, :libgit2), Cint,
+    @check ccall((:git_repository_init, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Cstring, Cuint), repo_ptr_ptr, path, bare)
     return GitRepo(repo_ptr_ptr[])
 end
@@ -97,7 +97,7 @@ tree, and no tracking information for remote branches or configurations is prese
 function isbare(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return ccall((:git_repository_is_bare, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
+    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo) == 1
 end
 
 """
@@ -109,7 +109,7 @@ Determine if `repo` is detached - that is, whether its HEAD points to a commit
 function isattached(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    ccall((:git_repository_head_detached, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
+    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo) != 1
 end
 
 @doc """
@@ -139,14 +139,21 @@ function (::Type{T})(repo::GitRepo, spec::AbstractString) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_revparse_single, :libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo.ptr, spec)
+    @check ccall((:git_revparse_single, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo, spec)
+    obj_ptr = obj_ptr_ptr[]
     # check object is of correct type
     if T != GitObject && T != GitUnknownObject
-        t = Consts.OBJECT(obj_ptr_ptr[])
-        t == Consts.OBJECT(T) || throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        t = Consts.OBJECT(obj_ptr)
+        if t != Consts.OBJECT(T)
+            if obj_ptr != C_NULL
+                # free result
+                ccall((:git_object_free, libgit2), Cvoid, (Ptr{Cvoid},), obj_ptr)
+            end
+            throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        end
     end
-    return T(repo, obj_ptr_ptr[])
+    return T(repo, obj_ptr)
 end
 
 function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
@@ -155,9 +162,9 @@ function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup, :libgit2), Cint,
+    @check ccall((:git_object_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -167,9 +174,9 @@ function (::Type{T})(repo::GitRepo, oid::GitShortHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup_prefix, :libgit2), Cint,
+    @check ccall((:git_object_lookup_prefix, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Csize_t, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, oid.len, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, oid.len, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -190,8 +197,10 @@ See also [`workdir`](@ref), [`path`](@ref).
 function gitdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return unsafe_string(ccall((:git_repository_path, :libgit2), Cstring,
-                        (Ptr{Cvoid},), repo.ptr))
+    GC.@preserve repo begin
+        return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
+                                   (Ptr{Cvoid},), repo))
+    end
 end
 
 """
@@ -211,10 +220,12 @@ See also [`gitdir`](@ref), [`path`](@ref).
 function workdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    sptr = ccall((:git_repository_workdir, :libgit2), Cstring,
-                (Ptr{Cvoid},), repo.ptr)
-    sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
-    return unsafe_string(sptr)
+    GC.@preserve repo begin
+        sptr = ccall((:git_repository_workdir, libgit2), Cstring,
+                     (Ptr{Cvoid},), repo)
+        sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
+        return unsafe_string(sptr)
+    end
 end
 
 """
@@ -255,8 +266,8 @@ function peel(::Type{T}, obj::GitObject) where T<:GitObject
     ensure_initialized()
     new_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
-    @check ccall((:git_object_peel, :libgit2), Cint,
-                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj.ptr, Consts.OBJECT(T))
+    @check ccall((:git_object_peel, libgit2), Cint,
+                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj, Consts.OBJECT(T))
 
     return T(obj.owner, new_ptr_ptr[])
 end
@@ -285,9 +296,9 @@ function GitDescribeResult(committish::GitObject;
                            options::DescribeOptions=DescribeOptions())
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_describe_commit, :libgit2), Cint,
+    @check ccall((:git_describe_commit, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, committish.ptr, Ref(options))
+                 result_ptr_ptr, committish, Ref(options))
     return GitDescribeResult(committish.owner, result_ptr_ptr[])
 end
 
@@ -312,9 +323,9 @@ function GitDescribeResult(repo::GitRepo; options::DescribeOptions=DescribeOptio
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_describe_workdir, :libgit2), Cint,
+    @check ccall((:git_describe_workdir, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, repo.ptr, Ref(options))
+                 result_ptr_ptr, repo, Ref(options))
     return GitDescribeResult(repo, result_ptr_ptr[])
 end
 
@@ -329,9 +340,9 @@ Formatting options are controlled by the keyword argument:
 function format(result::GitDescribeResult; options::DescribeFormatOptions=DescribeFormatOptions())
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_describe_format, :libgit2), Cint,
+    @check ccall((:git_describe_format, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Ptr{DescribeFormatOptions}),
-                 buf_ref, result.ptr, Ref(options))
+                 buf_ref, result, Ref(options))
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -355,9 +366,9 @@ function checkout_tree(repo::GitRepo, obj::GitObject;
                        options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_tree, :libgit2), Cint,
+    @check ccall((:git_checkout_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, obj.ptr, Ref(options))
+                 repo, obj, Ref(options))
 end
 
 """
@@ -371,10 +382,10 @@ function checkout_index(repo::GitRepo, idx::Union{GitIndex, Nothing} = nothing;
                         options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_index, :libgit2), Cint,
+    @check ccall((:git_checkout_index, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr,
-                 idx === nothing ? C_NULL : idx.ptr,
+                 repo,
+                 idx === nothing ? C_NULL : idx,
                  Ref(options))
 end
 
@@ -391,9 +402,9 @@ Update the index and working tree of `repo` to match the commit pointed to by HE
 function checkout_head(repo::GitRepo; options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_head, :libgit2), Cint,
+    @check ccall((:git_checkout_head, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, Ref(options))
+                 repo, Ref(options))
 end
 
 """
@@ -410,19 +421,19 @@ The keyword argument `options` sets checkout and merge options for the cherrypic
 function cherrypick(repo::GitRepo, commit::GitCommit; options::CherrypickOptions = CherrypickOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_cherrypick, :libgit2), Cint,
+    @check ccall((:git_cherrypick, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CherrypickOptions}),
-                 repo.ptr, commit.ptr, Ref(options))
+                 repo, commit, Ref(options))
 end
 
 """Updates some entries, determined by the `pathspecs`, in the index from the target commit tree."""
 function reset!(repo::GitRepo, obj::Union{GitObject, Nothing}, pathspecs::AbstractString...)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset_default, :libgit2), Cint,
+    @check ccall((:git_reset_default, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{StrArrayStruct}),
-                 repo.ptr,
-                 obj === nothing ? C_NULL : obj.ptr,
+                 repo,
+                 obj === nothing ? C_NULL : obj,
                  collect(pathspecs))
     return head_oid(repo)
 end
@@ -432,9 +443,9 @@ function reset!(repo::GitRepo, obj::GitObject, mode::Cint;
                checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset, :libgit2), Cint,
+    @check ccall((:git_reset, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Cint, Ptr{CheckoutOptions}),
-                  repo.ptr, obj.ptr, mode, Ref(checkout_opts))
+                  repo, obj, mode, Ref(checkout_opts))
     return head_oid(repo)
 end
 
@@ -456,7 +467,7 @@ function clone(repo_url::AbstractString, repo_path::AbstractString,
     ensure_initialized()
     clone_opts_ref = Ref(clone_opts)
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_clone, :libgit2), Cint,
+    @check ccall((:git_clone, libgit2), Cint,
             (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Ref{CloneOptions}),
             repo_ptr_ptr, repo_url, repo_path, clone_opts_ref)
     return GitRepo(repo_ptr_ptr[])
@@ -490,9 +501,9 @@ function fetchheads(repo::GitRepo)
     fh = FetchHead[]
     ffcb = fetchhead_foreach_cb()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_repository_fetchhead_foreach, :libgit2), Cint,
+    @check ccall((:git_repository_fetchhead_foreach, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Any),
-                 repo.ptr, ffcb, fh)
+                 repo, ffcb, fh)
     return fh
 end
 
@@ -505,9 +516,9 @@ function remotes(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @assert repo.ptr != C_NULL
-    @check ccall((:git_remote_list, :libgit2), Cint,
-                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_remote_list, libgit2), Cint,
+                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     return res
 end
diff --git a/stdlib/LibGit2/src/signature.jl b/stdlib/LibGit2/src/signature.jl
index 9c13bc2256ef5..17013121db9ad 100644
--- a/stdlib/LibGit2/src/signature.jl
+++ b/stdlib/LibGit2/src/signature.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function Signature(ptr::Ptr{SignatureStruct})
+    @assert ptr != C_NULL
     sig   = unsafe_load(ptr)::SignatureStruct
     name  = unsafe_string(sig.name)
     email = unsafe_string(sig.email)
@@ -13,7 +14,7 @@ Signature(sig::GitSignature) = Signature(sig.ptr)
 function Signature(name::AbstractString, email::AbstractString)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_now, :libgit2), Cint,
+    @check ccall((:git_signature_now, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring), sig_ptr_ptr, name, email)
     sig = GitSignature(sig_ptr_ptr[])
     s = Signature(sig.ptr)
@@ -31,7 +32,7 @@ end
 function Base.convert(::Type{GitSignature}, sig::Signature)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_new, :libgit2), Cint,
+    @check ccall((:git_signature_new, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring, Int64, Cint),
                  sig_ptr_ptr, sig.name, sig.email, sig.time, sig.time_offset)
     return GitSignature(sig_ptr_ptr[])
@@ -66,7 +67,7 @@ end
 function default_signature(repo::GitRepo)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_default, :libgit2), Cint,
-                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo.ptr)
+    @check ccall((:git_signature_default, libgit2), Cint,
+                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo)
     return GitSignature(sig_ptr_ptr[])
 end
diff --git a/stdlib/LibGit2/src/status.jl b/stdlib/LibGit2/src/status.jl
index cd871681e4ae9..c048e68c2b2bc 100644
--- a/stdlib/LibGit2/src/status.jl
+++ b/stdlib/LibGit2/src/status.jl
@@ -12,23 +12,23 @@ submodules or not. See [`StatusOptions`](@ref) for more information.
 function GitStatus(repo::GitRepo; status_opts=StatusOptions())
     ensure_initialized()
     stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_status_list_new, :libgit2), Cint,
+    @check ccall((:git_status_list_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{StatusOptions}),
-                  stat_ptr_ptr, repo.ptr, Ref(status_opts))
+                  stat_ptr_ptr, repo, Ref(status_opts))
     return GitStatus(repo, stat_ptr_ptr[])
 end
 
 function Base.length(status::GitStatus)
     ensure_initialized()
-    return Int(ccall((:git_status_list_entrycount, :libgit2), Csize_t,
-                      (Ptr{Ptr{Cvoid}},), status.ptr))
+    return Int(ccall((:git_status_list_entrycount, libgit2), Csize_t,
+                         (Ptr{Cvoid},), status))
 end
 
 function Base.getindex(status::GitStatus, i::Integer)
     1 <= i <= length(status) || throw(BoundsError())
     ensure_initialized()
     GC.@preserve status begin
-        entry_ptr = ccall((:git_status_byindex, :libgit2),
+        entry_ptr = ccall((:git_status_byindex, libgit2),
                           Ptr{StatusEntry},
                           (Ptr{Cvoid}, Csize_t),
                           status.ptr, i-1)
@@ -49,9 +49,9 @@ and needs to be staged and committed.
 function status(repo::GitRepo, path::String)
     ensure_initialized()
     status_ptr = Ref{Cuint}(0)
-    ret =  ccall((:git_status_file, :libgit2), Cint,
+    ret =  ccall((:git_status_file, libgit2), Cint,
                   (Ref{Cuint}, Ptr{Cvoid}, Cstring),
-                  status_ptr, repo.ptr, path)
+                  status_ptr, repo, path)
     (ret == Cint(Error.ENOTFOUND) || ret == Cint(Error.EAMBIGUOUS)) && return nothing
     return status_ptr[]
 end
diff --git a/stdlib/LibGit2/src/strarray.jl b/stdlib/LibGit2/src/strarray.jl
index db0803680f72b..78e38a9502128 100644
--- a/stdlib/LibGit2/src/strarray.jl
+++ b/stdlib/LibGit2/src/strarray.jl
@@ -1,6 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-
 function Base.cconvert(::Type{Ptr{StrArrayStruct}}, x::Vector)
     str_ref = Base.cconvert(Ref{Cstring}, x)
     sa_ref = Ref(StrArrayStruct(Base.unsafe_convert(Ref{Cstring}, str_ref), length(x)))
@@ -10,6 +9,8 @@ function Base.unsafe_convert(::Type{Ptr{StrArrayStruct}}, rr::Tuple{Ref{StrArray
     Base.unsafe_convert(Ptr{StrArrayStruct}, first(rr))
 end
 
-function Base.convert(::Type{Vector{String}}, sa::StrArrayStruct)
-    [unsafe_string(unsafe_load(sa.strings, i)) for i = 1:sa.count]
+Base.length(sa::StrArrayStruct) = sa.count
+function Base.iterate(sa::StrArrayStruct, state=1)
+    state > sa.count && return nothing
+    (unsafe_string(unsafe_load(sa.strings, state)), state+1)
 end
diff --git a/stdlib/LibGit2/src/tag.jl b/stdlib/LibGit2/src/tag.jl
index 4209a4e2f917d..73f010590e9c1 100644
--- a/stdlib/LibGit2/src/tag.jl
+++ b/stdlib/LibGit2/src/tag.jl
@@ -8,9 +8,9 @@ Get a list of all tags in the git repository `repo`.
 function tag_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_tag_list, :libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
-    res = convert(Vector{String}, sa_ref[])
+    @check ccall((:git_tag_list, libgit2), Cint,
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
+    res = collect(sa_ref[])
     free(sa_ref)
     res
 end
@@ -22,8 +22,8 @@ Remove the git tag `tag` from the repository `repo`.
 """
 function tag_delete(repo::GitRepo, tag::AbstractString)
     ensure_initialized()
-    @check ccall((:git_tag_delete, :libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, tag)
+    @check ccall((:git_tag_delete, libgit2), Cint,
+                  (Ptr{Cvoid}, Cstring), repo, tag)
 end
 
 """
@@ -46,9 +46,9 @@ function tag_create(repo::GitRepo, tag::AbstractString, commit::Union{AbstractSt
         commit_obj === nothing && return oid_ptr[] # return empty oid
         with(convert(GitSignature, sig)) do git_sig
             ensure_initialized()
-            @check ccall((:git_tag_create, :libgit2), Cint,
+            @check ccall((:git_tag_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Ptr{SignatureStruct}, Cstring, Cint),
-                  oid_ptr, repo.ptr, tag, commit_obj.ptr, git_sig.ptr, msg, Cint(force))
+                  oid_ptr, repo, tag, commit_obj, git_sig, msg, Cint(force))
         end
     end
     return oid_ptr[]
@@ -62,7 +62,7 @@ The name of `tag` (e.g. `"v0.5"`).
 function name(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        str_ptr = ccall((:git_tag_name, :libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
+        str_ptr = ccall((:git_tag_name, libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
         str_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_string(str_ptr)
     end
@@ -78,7 +78,7 @@ The `GitHash` of the target object of `tag`.
 function target(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        oid_ptr = ccall((:git_tag_target_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
+        oid_ptr = ccall((:git_tag_target_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
         oid_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_load(oid_ptr)
     end
diff --git a/stdlib/LibGit2/src/tree.jl b/stdlib/LibGit2/src/tree.jl
index 1ef8a2eb75003..4c507aaba8e48 100644
--- a/stdlib/LibGit2/src/tree.jl
+++ b/stdlib/LibGit2/src/tree.jl
@@ -2,7 +2,7 @@
 
 function GitTree(c::GitCommit)
     tree_out = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_commit_tree, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
+    @check ccall((:git_commit_tree, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
     GitTree(repository(c), tree_out[])
 end
 
@@ -35,9 +35,9 @@ function treewalk(f, tree::GitTree, post::Bool = false)
             entry = GitTreeEntry(tree, entry_ptr, false)
             return f(root, entry)
         end, Cint, (Cstring, Ptr{Cvoid}, Ref{Vector{Any}}))
-    err = ccall((:git_tree_walk, :libgit2), Cint,
+    err = ccall((:git_tree_walk, libgit2), Cint,
                 (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Any),
-                tree.ptr, post, cbf, payload)
+                tree, post, cbf, payload)
     if err < 0
         err_class, _ = Error.last_error()
         if err_class != Error.Callback
@@ -58,8 +58,10 @@ Return the filename of the object on disk to which `te` refers.
 """
 function filename(te::GitTreeEntry)
     ensure_initialized()
-    str = ccall((:git_tree_entry_name, :libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
-    str != C_NULL && return unsafe_string(str)
+    GC.@preserve te begin
+        str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
+        str != C_NULL && return unsafe_string(str)
+    end
     return nothing
 end
 
@@ -70,7 +72,7 @@ Return the UNIX filemode of the object on disk to which `te` refers as an intege
 """
 function filemode(te::GitTreeEntry)
     ensure_initialized()
-    return ccall((:git_tree_entry_filemode, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te)
 end
 
 """
@@ -81,7 +83,7 @@ one of the types which [`objtype`](@ref) returns, e.g. a `GitTree` or `GitBlob`.
 """
 function entrytype(te::GitTreeEntry)
     ensure_initialized()
-    otype = ccall((:git_tree_entry_type, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te)
     return objtype(Consts.OBJECT(otype))
 end
 
@@ -93,7 +95,7 @@ Return the [`GitHash`](@ref) of the object to which `te` refers.
 function entryid(te::GitTreeEntry)
     ensure_initialized()
     GC.@preserve te begin
-        oid_ptr = ccall((:git_tree_entry_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
+        oid_ptr = ccall((:git_tree_entry_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
         oid = GitHash(oid_ptr)
     end
     return oid
@@ -101,7 +103,7 @@ end
 
 function count(tree::GitTree)
     ensure_initialized()
-    return ccall((:git_tree_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
+    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree)
 end
 
 function Base.getindex(tree::GitTree, i::Integer)
@@ -109,9 +111,9 @@ function Base.getindex(tree::GitTree, i::Integer)
         throw(BoundsError(tree, i))
     end
     ensure_initialized()
-    te_ptr = ccall((:git_tree_entry_byindex, :libgit2),
+    te_ptr = ccall((:git_tree_entry_byindex, libgit2),
                    Ptr{Cvoid},
-                   (Ptr{Cvoid}, Csize_t), tree.ptr, i-1)
+                   (Ptr{Cvoid}, Csize_t), tree, i-1)
     return GitTreeEntry(tree, te_ptr, false)
 end
 
@@ -133,7 +135,7 @@ function (::Type{T})(te::GitTreeEntry) where T<:GitObject
     ensure_initialized()
     repo = repository(te)
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_tree_entry_to_object, :libgit2), Cint,
+    @check ccall((:git_tree_entry_to_object, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
                    obj_ptr_ptr, repo, te)
     return T(repo, obj_ptr_ptr[])
@@ -162,7 +164,7 @@ function _getindex(tree::GitTree, target::AbstractString)
     end
 
     entry = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_tree_entry_bypath, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
+    err = ccall((:git_tree_entry_bypath, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
     err == Int(Error.ENOTFOUND) && return nothing
     err < 0 && throw(Error.GitError(err))
     entry = GitTreeEntry(tree, entry[], true #= N.B.: Most other lookups need false here =#)
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index 0b653f9b6ad21..181baa4991a9b 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -2,7 +2,7 @@
 
 using Base: something
 import Base.@kwdef
-import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG
+import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG, GIT_OID_TYPE
 
 const OID_RAWSZ = 20
 const OID_HEXSZ = OID_RAWSZ * 2
@@ -78,7 +78,7 @@ When fetching data from LibGit2, a typical usage would look like:
 ```julia
 sa_ref = Ref(StrArrayStruct())
 @check ccall(..., (Ptr{StrArrayStruct},), sa_ref)
-res = convert(Vector{String}, sa_ref[])
+res = collect(sa_ref[])
 free(sa_ref)
 ```
 In particular, note that `LibGit2.free` should be called afterward on the `Ref` object.
@@ -99,7 +99,7 @@ StrArrayStruct() = StrArrayStruct(C_NULL, 0)
 
 function free(sa_ref::Base.Ref{StrArrayStruct})
     ensure_initialized()
-    ccall((:git_strarray_free, :libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
+    ccall((:git_strarray_free, libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
 end
 
 """
@@ -126,7 +126,7 @@ Buffer() = Buffer(C_NULL, 0, 0)
 
 function free(buf_ref::Base.Ref{Buffer})
     ensure_initialized()
-    ccall((:git_buf_free, :libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
+    ccall((:git_buf_free, libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
 end
 
 """
@@ -237,6 +237,9 @@ Matches the [`git_remote_callbacks`](https://libgit2.org/libgit2/#HEAD/type/git_
     @static if LibGit2.VERSION >= v"0.99.0"
         resolve_url::Ptr{Cvoid}        = C_NULL
     end
+    @static if LibGit2.VERSION >= v"1.9.0"
+        update_refs::Ptr{Cvoid}        = C_NULL
+    end
 end
 @assert Base.allocatedinline(RemoteCallbacks)
 
@@ -346,6 +349,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.25.0"
         proxy_opts::ProxyOptions       = ProxyOptions()
     end
+    @static if LibGit2.VERSION >= v"1.7.0"
+        depth::Cuint                   = Cuint(Consts.FETCH_DEPTH_FULL)
+    end
     @static if LibGit2.VERSION >= v"1.4.0"
         follow_redirects::Cuint        = Cuint(0)
     end
@@ -439,6 +445,9 @@ The fields represent:
     # options controlling how the diff text is generated
     context_lines::UInt32                    = UInt32(3)
     interhunk_lines::UInt32                  = UInt32(0)
+    @static if LibGit2.VERSION >= v"1.7.0"
+        oid_type::GIT_OID_TYPE               = Consts.OID_DEFAULT
+    end
     id_abbrev::UInt16                        = UInt16(7)
     max_size::Int64                          = Int64(512*1024*1024) #512Mb
     old_prefix::Cstring                      = Cstring(C_NULL)
@@ -672,6 +681,8 @@ The fields represent:
      for more information.
   * `custom_headers`: only relevant if the LibGit2 version is greater than or equal to `0.24.0`.
      Extra headers needed for the push operation.
+  * `remote_push_options`: only relevant if the LibGit2 version is greater than or equal to `1.8.0`.
+     "Push options" to deliver to the remote.
 """
 @kwdef struct PushOptions
     version::Cuint                     = Cuint(1)
@@ -686,6 +697,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.24.0"
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
+    @static if LibGit2.VERSION >= v"1.8.0"
+        remote_push_options::StrArrayStruct = StrArrayStruct()
+    end
 end
 @assert Base.allocatedinline(PushOptions)
 
@@ -907,10 +921,19 @@ Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_conf
 struct ConfigEntry
     name::Cstring
     value::Cstring
+    @static if LibGit2.VERSION >= v"1.8.0"
+        backend_type::Cstring
+        origin_path::Cstring
+    end
     include_depth::Cuint
     level::GIT_CONFIG
-    free::Ptr{Cvoid}
-    payload::Ptr{Cvoid} # User is not permitted to read or write this field
+    @static if LibGit2.VERSION < v"1.9.0"
+        free::Ptr{Cvoid}
+    end
+    @static if LibGit2.VERSION < v"1.8.0"
+        # In 1.8.0, the unused payload value has been removed
+        payload::Ptr{Cvoid}
+    end
 end
 @assert Base.allocatedinline(ConfigEntry)
 
@@ -918,6 +941,17 @@ function Base.show(io::IO, ce::ConfigEntry)
     print(io, "ConfigEntry(\"", unsafe_string(ce.name), "\", \"", unsafe_string(ce.value), "\")")
 end
 
+"""
+    LibGit2.ConfigBackendEntry
+
+Matches the [`git_config_backend_entry`](https://libgit2.org/libgit2/#HEAD/type/git_config_backend_entry) struct.
+"""
+struct ConfigBackendEntry
+    entry::ConfigEntry
+    free::Ptr{Cvoid}
+end
+@assert Base.allocatedinline(ConfigBackendEntry)
+
 """
     LibGit2.split_cfg_entry(ce::LibGit2.ConfigEntry) -> Tuple{String,String,String,String}
 
@@ -1004,7 +1038,7 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
     (:GitRepo,           nothing,                 :AbstractGitObject, :git_repository),
     (:GitConfig,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_config),
     (:GitIndex,          :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_index),
-    (:GitRemote,         :GitRepo,                :AbstractGitObject, :git_remote),
+    (:GitRemote,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_remote),
     (:GitRevWalker,      :GitRepo,                :AbstractGitObject, :git_revwalk),
     (:GitReference,      :GitRepo,                :AbstractGitObject, :git_reference),
     (:GitDescribeResult, :GitRepo,                :AbstractGitObject, :git_describe_result),
@@ -1039,7 +1073,6 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
     else
         @eval mutable struct $typ <: $sup
             owner::$owntyp
@@ -1054,21 +1087,21 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
         if isa(owntyp, Expr) && owntyp.args[1] === :Union && owntyp.args[3] === :Nothing
             @eval begin
                 $typ(ptr::Ptr{Cvoid}, fin::Bool=true) = $typ(nothing, ptr, fin)
             end
         end
     end
+    @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, obj::$typ) = obj.ptr
     @eval function Base.close(obj::$typ)
         if obj.ptr != C_NULL
             ensure_initialized()
-            ccall(($(string(cname, :_free)), :libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
+            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj)
             obj.ptr = C_NULL
             if Threads.atomic_sub!(REFCOUNT, 1) == 1
                 # will the last finalizer please turn out the lights?
-                ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+                ccall((:git_libgit2_shutdown, libgit2), Cint, ())
             end
         end
     end
@@ -1098,10 +1131,11 @@ end
 function Base.close(obj::GitSignature)
     if obj.ptr != C_NULL
         ensure_initialized()
-        ccall((:git_signature_free, :libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
+        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj)
         obj.ptr = C_NULL
     end
 end
+Base.unsafe_convert(::Type{Ptr{SignatureStruct}}, obj::GitSignature) = obj.ptr
 
 # Structure has the same layout as SignatureStruct
 mutable struct Signature
@@ -1120,15 +1154,20 @@ The fields represent:
     * `final_commit_id`: the [`GitHash`](@ref) of the commit where this section was last changed.
     * `final_start_line_number`: the *one based* line number in the file where the
        hunk starts, in the *final* version of the file.
-    * `final_signature`: the signature of the person who last modified this hunk. You will
+    * `final_signature`: the signature of the author of `final_commit_id`. You will
+       need to pass this to `Signature` to access its fields.
+    * `final_committer`: the signature of the committer of `final_commit_id`. You will
        need to pass this to `Signature` to access its fields.
     * `orig_commit_id`: the [`GitHash`](@ref) of the commit where this hunk was first found.
     * `orig_path`: the path to the file where the hunk originated. This may be different
        than the current/final path, for instance if the file has been moved.
     * `orig_start_line_number`: the *one based* line number in the file where the
        hunk starts, in the *original* version of the file at `orig_path`.
-    * `orig_signature`: the signature of the person who introduced this hunk. You will
+    * `orig_signature`: the signature of the author who introduced this hunk. You will
+       need to pass this to `Signature` to access its fields.
+    * `orig_committer`: the signature of the committer who introduced this hunk. You will
        need to pass this to `Signature` to access its fields.
+    * `summary`: a string summary.
     * `boundary`: `'1'` if the original commit is a "boundary" commit (for instance, if it's
        equal to an oldest commit set in `options`).
 """
@@ -1138,12 +1177,21 @@ The fields represent:
     final_commit_id::GitHash              = GitHash()
     final_start_line_number::Csize_t      = Csize_t(0)
     final_signature::Ptr{SignatureStruct} = Ptr{SignatureStruct}(C_NULL)
+    @static if LibGit2.VERSION >= v"1.9.0"
+        final_committer::Ptr{SignatureStruct} = Ptr{SignatureStruct}(C_NULL)
+    end
 
     orig_commit_id::GitHash               = GitHash()
     orig_path::Cstring                    = Cstring(C_NULL)
     orig_start_line_number::Csize_t       = Csize_t(0)
     orig_signature::Ptr{SignatureStruct}  = Ptr{SignatureStruct}(C_NULL)
+    @static if LibGit2.VERSION >= v"1.9.0"
+        orig_committer::Ptr{SignatureStruct}  = Ptr{SignatureStruct}(C_NULL)
+    end
 
+    @static if LibGit2.VERSION >= v"1.9.0"
+        summary::Cstring                      = Cstring(C_NULL)
+    end
     boundary::Char                        = '\0'
 end
 @assert Base.allocatedinline(BlameHunk)
@@ -1197,7 +1245,7 @@ Consts.OBJECT(::Type{GitObject})        = Consts.OBJ_ANY
 
 function Consts.OBJECT(ptr::Ptr{Cvoid})
     ensure_initialized()
-    ccall((:git_object_type, :libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
+    ccall((:git_object_type, libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
 end
 
 """
@@ -1480,3 +1528,26 @@ end
 
 # Useful for functions which can handle various kinds of credentials
 const Creds = Union{CredentialPayload, AbstractCredential, CachedCredentials, Nothing}
+
+struct _GitRemoteHead
+    available_local::Cint
+    oid::GitHash
+    loid::GitHash
+    name::Cstring
+    symref_target::Cstring
+end
+
+struct GitRemoteHead
+    available_local::Bool
+    oid::GitHash
+    loid::GitHash
+    name::String
+    symref_target::Union{Nothing,String}
+    function GitRemoteHead(head::_GitRemoteHead)
+        name = unsafe_string(head.name)
+        symref_target = (head.symref_target != C_NULL ?
+            unsafe_string(head.symref_target) : nothing)
+        return new(head.available_local != 0,
+                   head.oid, head.loid, name, symref_target)
+    end
+end
diff --git a/stdlib/LibGit2/src/utils.jl b/stdlib/LibGit2/src/utils.jl
index 5234e9b6fc291..f62663a6ea4ca 100644
--- a/stdlib/LibGit2/src/utils.jl
+++ b/stdlib/LibGit2/src/utils.jl
@@ -37,7 +37,7 @@ function version()
     major = Ref{Cint}(0)
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
-    @check ccall((:git_libgit2_version, :libgit2), Cint,
+    @check ccall((:git_libgit2_version, libgit2), Cint,
                  (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch)
     return VersionNumber(major[], minor[], patch[])
 end
@@ -72,7 +72,7 @@ Return a list of git features the current version of libgit2 supports, such as
 threading or using HTTPS or SSH.
 """
 function features()
-    feat = ccall((:git_libgit2_features, :libgit2), Cint, ())
+    feat = ccall((:git_libgit2_features, libgit2), Cint, ())
     res = Consts.GIT_FEATURE[]
     for f in instances(Consts.GIT_FEATURE)
         isset(feat, Cuint(f)) && Base.push!(res, f)
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index 468e6899a7aa8..239009a014c1e 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -21,16 +21,16 @@ Since the `GitHash` is unique to a commit, `cnt` will be `1`.
 function GitRevWalker(repo::GitRepo)
     ensure_initialized()
     w_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_revwalk_new, :libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo.ptr)
+    @check ccall((:git_revwalk_new, libgit2), Cint,
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo)
     return GitRevWalker(repo, w_ptr[])
 end
 
 function Base.iterate(w::GitRevWalker, state=nothing)
     ensure_initialized()
     id_ptr = Ref(GitHash())
-    err = ccall((:git_revwalk_next, :libgit2), Cint,
-                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w.ptr)
+    err = ccall((:git_revwalk_next, libgit2), Cint,
+                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w)
     if err == Cint(Error.GIT_OK)
         return (id_ptr[], nothing)
     elseif err == Cint(Error.ITEROVER)
@@ -51,7 +51,7 @@ during the walk.
 """
 function push_head!(w::GitRevWalker)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_head, :libgit2), Cint, (Ptr{Cvoid},), w.ptr)
+    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w)
     return w
 end
 
@@ -64,20 +64,20 @@ of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
-    @check ccall((:git_revwalk_push, :libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
+    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w, Ref(cid))
     return w
 end
 
 function push!(w::GitRevWalker, range::AbstractString)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_range, :libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
+    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w, range)
     return w
 end
 
 function Base.sort!(w::GitRevWalker; by::Cint = Consts.SORT_NONE, rev::Bool=false)
     ensure_initialized()
     rev && (by |= Consts.SORT_REVERSE)
-    @check ccall((:git_revwalk_sorting, :libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
+    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w, by)
     return w
 end
 
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
index 7dbbd10af6f67..b186f67c65cf1 100644
--- a/stdlib/LibGit2/test/libgit2-tests.jl
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -3,117 +3,16 @@
 module LibGit2Tests
 
 import LibGit2
+using LibGit2_jll
 using Test
 using Random, Serialization, Sockets
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
-import .Main.FakePTYs: with_fake_pty
-
-const timeout = 60
-
-function challenge_prompt(code::Expr, challenges)
-    input_code = tempname()
-    open(input_code, "w") do fp
-        serialize(fp, code)
-    end
-    output_file = tempname()
-    torun = """
-        import LibGit2
-        using Serialization
-        result = open($(repr(input_code))) do fp
-            eval(deserialize(fp))
-        end
-        open($(repr(output_file)), "w") do fp
-            serialize(fp, result)
-        end"""
-    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
-    try
-        challenge_prompt(cmd, challenges)
-        return open(output_file, "r") do fp
-            deserialize(fp)
-        end
-    finally
-        isfile(output_file) && rm(output_file)
-        isfile(input_code) && rm(input_code)
-    end
-    return nothing
-end
-
-function challenge_prompt(cmd::Cmd, challenges)
-    function format_output(output)
-        str = read(seekstart(output), String)
-        isempty(str) && return ""
-        return "Process output found:\n\"\"\"\n$str\n\"\"\""
-    end
-    out = IOBuffer()
-    with_fake_pty() do pts, ptm
-        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
-        Base.close_stdio(pts)
-
-        # Kill the process if it takes too long. Typically occurs when process is waiting
-        # for input.
-        timer = Channel{Symbol}(1)
-        watcher = @async begin
-            waited = 0
-            while waited < timeout && process_running(p)
-                sleep(1)
-                waited += 1
-            end
-
-            if process_running(p)
-                kill(p)
-                put!(timer, :timeout)
-            elseif success(p)
-                put!(timer, :success)
-            else
-                put!(timer, :failure)
-            end
-
-            # SIGKILL stubborn processes
-            if process_running(p)
-                sleep(3)
-                process_running(p) && kill(p, Base.SIGKILL)
-            end
-            wait(p)
-        end
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt as basic_challenge_prompt
 
-        wroteall = false
-        try
-            for (challenge, response) in challenges
-                write(out, readuntil(ptm, challenge, keep=true))
-                if !isopen(ptm)
-                    error("Could not locate challenge: \"$challenge\". ",
-                          format_output(out))
-                end
-                write(ptm, response)
-            end
-            wroteall = true
-
-            # Capture output from process until `pts` is closed
-            write(out, ptm)
-        catch ex
-            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
-                # ignore EIO from `ptm` after `pts` dies
-                error("Process failed possibly waiting for a response. ",
-                      format_output(out))
-            end
-        end
-
-        status = fetch(timer)
-        close(ptm)
-        if status !== :success
-            if status === :timeout
-                error("Process timed out possibly waiting for a response. ",
-                      format_output(out))
-            else
-                error("Failed process. ", format_output(out), "\n", p)
-            end
-        end
-        wait(watcher)
-    end
-    nothing
-end
+challenge_prompt(code::Expr, challenges) = basic_challenge_prompt(code, challenges; pkgs=["LibGit2"])
+challenge_prompt(cmd::Cmd, challenges) = basic_challenge_prompt(cmd, challenges)
 
 const LIBGIT2_MIN_VER = v"1.0.0"
 const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
@@ -129,7 +28,7 @@ end
 function get_global_dir()
     buf = Ref(LibGit2.Buffer())
 
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
         LibGit2.Consts.GET_SEARCH_PATH::Cint;
         LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
         buf::Ptr{LibGit2.Buffer})::Cint
@@ -139,7 +38,7 @@ function get_global_dir()
 end
 
 function set_global_dir(dir)
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
         LibGit2.Consts.SET_SEARCH_PATH::Cint;
         LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
         dir::Cstring)::Cint
@@ -196,7 +95,7 @@ end
     p = ["XXX","YYY"]
     a = Base.cconvert(Ptr{LibGit2.StrArrayStruct}, p)
     b = Base.unsafe_convert(Ptr{LibGit2.StrArrayStruct}, a)
-    @test p == convert(Vector{String}, unsafe_load(b))
+    @test p == collect(unsafe_load(b))
     @noinline gcuse(a) = a
     gcuse(a)
 end
@@ -928,6 +827,14 @@ mktempdir() do dir
                     @test cmtr.email == test_sig.email
                     @test LibGit2.message(cmt) == commit_msg1
 
+                    # test that the parent is correct
+                    @test LibGit2.parentcount(cmt) == 0
+                    LibGit2.with(LibGit2.GitCommit(repo, commit_oid3)) do cmt3
+                        @test LibGit2.parentcount(cmt3) == 1
+                        @test LibGit2.parent_id(cmt3, 1) == commit_oid1
+                        @test LibGit2.GitHash(LibGit2.parent(cmt3, 1)) == commit_oid1
+                    end
+
                     # test showing the commit
                     showstr = split(sprint(show, cmt), "\n")
                     # the time of the commit will vary so just test the first two parts
@@ -1163,7 +1070,7 @@ mktempdir() do dir
 
                 # test workaround for git_tree_walk issue
                 # https://github.com/libgit2/libgit2/issues/4693
-                ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring),
+                ccall((:git_error_set_str, libgit2), Cvoid, (Cint, Cstring),
                       Cint(LibGit2.Error.Invalid), "previous error")
                 try
                     # file needs to exist in tree in order to trigger the stop walk condition
@@ -1245,6 +1152,7 @@ mktempdir() do dir
 
     function setup_clone_repo(cache_repo::AbstractString, path::AbstractString; name="AAAA", email="BBBB@BBBB.COM")
         repo = LibGit2.clone(cache_repo, path)
+        LibGit2.fetch(repo)
         # need to set this for merges to succeed
         cfg = LibGit2.GitConfig(repo)
         LibGit2.set!(cfg, "user.name", name)
diff --git a/stdlib/LibGit2/test/online-tests.jl b/stdlib/LibGit2/test/online-tests.jl
index 96b6bf5b22371..4c5f346894b3d 100644
--- a/stdlib/LibGit2/test/online-tests.jl
+++ b/stdlib/LibGit2/test/online-tests.jl
@@ -90,6 +90,23 @@ mktempdir() do dir
     end
 end
 
+@testset "Remote" begin
+    repo_url = "https://github.com/JuliaLang/Example.jl"
+    LibGit2.with(LibGit2.GitRemoteDetached(repo_url)) do remote
+        @test !LibGit2.connected(remote)
+        c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+        LibGit2.connect(remote, LibGit2.Consts.DIRECTION_FETCH, credentials=c)
+        @test LibGit2.connected(remote)
+        remote_heads = LibGit2.ls(remote)
+        default_branch = LibGit2.default_branch(remote)
+        @test !isempty(remote_heads)
+        @test startswith(default_branch, "refs/heads/")
+        @test any(head.name == default_branch for head in remote_heads)
+        LibGit2.disconnect(remote)
+        @test !LibGit2.connected(remote)
+    end
+end
+
 # needs to be run in separate process so it can re-initialize libgit2
 # with a useless self-signed certificate authority root certificate
 file = joinpath(@__DIR__, "bad_ca_roots.jl")
diff --git a/stdlib/LibGit2/test/runtests.jl b/stdlib/LibGit2/test/runtests.jl
index 88aea77f25671..4d2f4f9104c4e 100644
--- a/stdlib/LibGit2/test/runtests.jl
+++ b/stdlib/LibGit2/test/runtests.jl
@@ -1,6 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test
+using Test, LibGit2
+
 @testset verbose=true "LibGit2 $test" for test in eachline(joinpath(@__DIR__, "testgroups"))
     include("$test.jl")
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(LibGit2))
+end
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index 4c16c1fb72e42..216fe9c3c6b41 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -1,9 +1,9 @@
 name = "LibGit2_jll"
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.6.1+0"
+version = "1.9.0+0"
 
 [deps]
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
index f8e814f1f7c30..c69deb4a9d932 100644
--- a/stdlib/LibGit2_jll/src/LibGit2_jll.jl
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -3,8 +3,11 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibGit2_jll.jl
 
 baremodule LibGit2_jll
-using Base, Libdl, MbedTLS_jll, LibSSH2_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+using Base, Libdl, LibSSH2_jll
+if !(Sys.iswindows() || Sys.isapple())
+    # On Windows and macOS we use system SSL/crypto libraries
+    using OpenSSL_jll
+end
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -21,9 +24,9 @@ libgit2_path::String = ""
 if Sys.iswindows()
     const libgit2 = "libgit2.dll"
 elseif Sys.isapple()
-    const libgit2 = "@rpath/libgit2.1.6.dylib"
+    const libgit2 = "@rpath/libgit2.1.9.dylib"
 else
-    const libgit2 = "libgit2.so.1.6"
+    const libgit2 = "libgit2.so.1.9"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
index 32ada173f01a0..06edefe335a2f 100644
--- a/stdlib/LibGit2_jll/test/runtests.jl
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -7,5 +7,5 @@ using Test, Libdl, LibGit2_jll
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
     @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
-    @test VersionNumber(major[], minor[], patch[]) == v"1.6.1"
+    @test VersionNumber(major[], minor[], patch[]) == v"1.9.0"
 end
diff --git a/stdlib/LibSSH2_jll/Project.toml b/stdlib/LibSSH2_jll/Project.toml
index 8334a86d1c23a..09f07b559344c 100644
--- a/stdlib/LibSSH2_jll/Project.toml
+++ b/stdlib/LibSSH2_jll/Project.toml
@@ -1,9 +1,9 @@
 name = "LibSSH2_jll"
 uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-version = "1.10.2+0"
+version = "1.11.3+1"
 
 [deps]
-MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
index a809f7a912d6b..e9392fe34a918 100644
--- a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -3,8 +3,11 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibSSH2_jll.jl
 
 baremodule LibSSH2_jll
-using Base, Libdl, MbedTLS_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+using Base, Libdl
+if !Sys.iswindows()
+    # On Windows we use system SSL/crypto libraries
+    using OpenSSL_jll
+end
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/LibSSH2_jll/test/runtests.jl b/stdlib/LibSSH2_jll/test/runtests.jl
index 58cfd9ac024cc..9a05270317752 100644
--- a/stdlib/LibSSH2_jll/test/runtests.jl
+++ b/stdlib/LibSSH2_jll/test/runtests.jl
@@ -3,6 +3,9 @@
 using Test, Libdl, LibSSH2_jll
 
 @testset "LibSSH2_jll" begin
-    # We use a `startswith()` here because when built from source, this returns "1.9.0_DEV"
-    vn = startswith(unsafe_string(ccall((:libssh2_version, libssh2), Cstring, (Cint,), 0)), "1.9.0")
+    vn = unsafe_string(ccall((:libssh2_version, libssh2), Cstring, (Cint,), 0))
+    # Depending on how LibSSH2_jll was installed (downloaded from
+    # BinaryBuilder or built from source here), the version number is
+    # either "1.11.1" or "1.11.1_DEV", respectively.
+    @test startswith(vn, "1.11.1")
 end
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 2954809921440..c6ec3ae228647 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+13"
+version = "2.0.1+20"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
index f6714fae536e9..febc47f168ab9 100644
--- a/stdlib/LibUV_jll/src/LibUV_jll.jl
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -4,43 +4,7 @@
 
 baremodule LibUV_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libuv
-
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-artifact_dir::String = ""
-libuv_handle::Ptr{Cvoid} = C_NULL
-libuv_path::String = ""
-
-if Sys.iswindows()
-    const libuv = "libuv-2.dll"
-elseif Sys.isapple()
-    const libuv = "@rpath/libuv.2.dylib"
-else
-    const libuv = "libuv.so.2"
-end
-
-function __init__()
-    global libuv_handle = dlopen(libuv)
-    global libuv_path = dlpath(libuv_handle)
-    global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libuv_path)
-    push!(LIBPATH_list, LIBPATH[])
-end
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libuv_path() = libuv_path
+# NOTE: This file is currently empty, as we link libuv statically for now.
 
 end  # module LibUV_jll
diff --git a/stdlib/LibUV_jll/test/runtests.jl b/stdlib/LibUV_jll/test/runtests.jl
index 26c50b92c0c2d..0615edebaa070 100644
--- a/stdlib/LibUV_jll/test/runtests.jl
+++ b/stdlib/LibUV_jll/test/runtests.jl
@@ -1,8 +1,3 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Libdl, LibUV_jll
-
-@testset "LibUV_jll" begin
-    vn = VersionNumber(unsafe_string(ccall((:uv_version_string, libuv), Cstring, ())))
-    @test vn == v"2.0.0-dev"
-end
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index 1f5f695a26ba4..b43f1c537ce5a 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.5.0+4"
+version = "1.8.1+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
index 12abeaf598151..f97b18443b6fd 100644
--- a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule LibUnwind_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/Libdl/Project.toml b/stdlib/Libdl/Project.toml
index 26e5bf0cdefd7..7fab4b9334260 100644
--- a/stdlib/Libdl/Project.toml
+++ b/stdlib/Libdl/Project.toml
@@ -1,5 +1,6 @@
 name = "Libdl"
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Libdl/docs/src/index.md b/stdlib/Libdl/docs/src/index.md
index 62f9837831d55..2d7ef2fffc41a 100644
--- a/stdlib/Libdl/docs/src/index.md
+++ b/stdlib/Libdl/docs/src/index.md
@@ -1,3 +1,11 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Libdl/docs/src/index.md"
+```
+
+```@docs
+Libdl
+```
+
 # Dynamic Linker
 
 ```@docs
diff --git a/stdlib/Libdl/src/Libdl.jl b/stdlib/Libdl/src/Libdl.jl
index df3f62c807fed..2a8f800c69194 100644
--- a/stdlib/Libdl/src/Libdl.jl
+++ b/stdlib/Libdl/src/Libdl.jl
@@ -1,13 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The Libdl module in Julia provides specialized and lower-level facilities for dynamic linking with shared libraries. While Julia
+inherently supports linking to runtime shared libraries through the `ccall` intrinsic, `Libdl` extends this capability by offering additional, more
+granular control. It enables users to search for shared libraries both in memory and the filesystem, manually load them with specific runtime linker options, and look up
+library symbols as low-level pointers.
+"""
 module Libdl
 # Just re-export Base.Libc.Libdl:
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
 
 import Base.Libc.Libdl: DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
                         RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-                        dlpath, find_library, dlext, dllist
+                        dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath,
+                        BundledLazyLibraryPath, default_rtld_flags, add_dependency!
 
 end # module
diff --git a/stdlib/Libdl/test/runtests.jl b/stdlib/Libdl/test/runtests.jl
index 6863e28959b5e..ef7b8abf83337 100644
--- a/stdlib/Libdl/test/runtests.jl
+++ b/stdlib/Libdl/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-import Libdl
+using Libdl
 
 # these could fail on an embedded installation
 # but for now, we don't handle that case
@@ -27,8 +27,6 @@ end
 @test_throws ArgumentError Libdl.dlsym(C_NULL, :foo)
 @test_throws ArgumentError Libdl.dlsym_e(C_NULL, :foo)
 
-cd(@__DIR__) do
-
 # Find the library directory by finding the path of libjulia-internal (or libjulia-internal-debug,
 # as the case may be) to get the private library directory
 private_libdir = if Base.DARWIN_FRAMEWORK
@@ -267,4 +265,71 @@ mktempdir() do dir
     end
 end
 
+## Tests for LazyLibrary
+@testset "LazyLibrary" begin; mktempdir() do dir
+    lclf_path = joinpath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lclb_path = joinpath(private_libdir, "libccalllazybar.$(Libdl.dlext)")
+
+    # Ensure that our modified copy of `libccalltest` is not currently loaded
+    @test !any(contains.(dllist(), lclf_path))
+    @test !any(contains.(dllist(), lclb_path))
+
+    # Create a `LazyLibrary` structure that loads `libccalllazybar`
+    global lclf_loaded = false
+    global lclb_loaded = false
+
+    # We don't provide `dlclose()` on `LazyLibrary`'s, you have to manage it yourself:
+    function close_libs()
+        global lclf_loaded = false
+        global lclb_loaded = false
+        if libccalllazybar.handle != C_NULL
+            dlclose(libccalllazybar.handle)
+        end
+        if libccalllazyfoo.handle != C_NULL
+            dlclose(libccalllazyfoo.handle)
+        end
+        @atomic libccalllazyfoo.handle = C_NULL
+        @atomic libccalllazybar.handle = C_NULL
+        @test !any(contains.(dllist(), lclf_path))
+        @test !any(contains.(dllist(), lclb_path))
+    end
+
+    global libccalllazyfoo = LazyLibrary(lclf_path; on_load_callback=() -> global lclf_loaded = true)
+    global libccalllazybar = LazyLibrary(lclb_path; dependencies=[libccalllazyfoo], on_load_callback=() -> global lclb_loaded = true)
+
+    # Creating `LazyLibrary` doesn't actually load anything
+    @test !lclf_loaded
+    @test !lclb_loaded
+
+    # Explicitly calling `dlopen()` does:
+    dlopen(libccalllazybar)
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that the library gets loaded when you use `ccall()`
+    @test ccall((:bar, libccalllazybar), Cint, (Cint,), 2) == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `@ccall` works:
+    @test @ccall(libccalllazybar.bar(2::Cint)::Cint) == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `dlpath()` works
+    @test dlpath(libccalllazybar) == realpath(string(libccalllazybar.path))
+    @test lclf_loaded
+    close_libs()
+
+    # Test that we can use lazily-evaluated library names:
+    libname = LazyLibraryPath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lazy_name_lazy_lib = LazyLibrary(libname)
+    @test dlpath(lazy_name_lazy_lib) == realpath(string(libname))
+end; end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Libdl))
 end
diff --git a/stdlib/LinearAlgebra.version b/stdlib/LinearAlgebra.version
new file mode 100644
index 0000000000000..3dac27119172a
--- /dev/null
+++ b/stdlib/LinearAlgebra.version
@@ -0,0 +1,4 @@
+LINEARALGEBRA_BRANCH = master
+LINEARALGEBRA_SHA1 = 1137b4c7fa8297cef17c4ae0982d7d89d4ab7dd8
+LINEARALGEBRA_GIT_URL := https://github.com/JuliaLang/LinearAlgebra.jl.git
+LINEARALGEBRA_TAR_URL = https://api.github.com/repos/JuliaLang/LinearAlgebra.jl/tarball/$1
diff --git a/stdlib/LinearAlgebra/Project.toml b/stdlib/LinearAlgebra/Project.toml
deleted file mode 100644
index 46653aa795209..0000000000000
--- a/stdlib/LinearAlgebra/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "LinearAlgebra"
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
-OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[targets]
-test = ["Test", "Random"]
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
deleted file mode 100644
index 00ce21ed6fcae..0000000000000
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ /dev/null
@@ -1,860 +0,0 @@
-# [Linear Algebra](@id man-linalg)
-
-```@meta
-DocTestSetup = :(using LinearAlgebra)
-```
-
-In addition to (and as part of) its support for multi-dimensional arrays, Julia provides native implementations
-of many common and useful linear algebra operations which can be loaded with `using LinearAlgebra`. Basic operations, such as [`tr`](@ref), [`det`](@ref),
-and [`inv`](@ref) are all supported:
-
-```jldoctest
-julia> A = [1 2 3; 4 1 6; 7 8 1]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  1  6
- 7  8  1
-
-julia> tr(A)
-3
-
-julia> det(A)
-104.0
-
-julia> inv(A)
-3×3 Matrix{Float64}:
- -0.451923   0.211538    0.0865385
-  0.365385  -0.192308    0.0576923
-  0.240385   0.0576923  -0.0673077
-```
-
-As well as other useful operations, such as finding eigenvalues or eigenvectors:
-
-```jldoctest
-julia> A = [-4. -17.; 2. 2.]
-2×2 Matrix{Float64}:
- -4.0  -17.0
-  2.0    2.0
-
-julia> eigvals(A)
-2-element Vector{ComplexF64}:
- -1.0 - 5.0im
- -1.0 + 5.0im
-
-julia> eigvecs(A)
-2×2 Matrix{ComplexF64}:
-  0.945905-0.0im        0.945905+0.0im
- -0.166924+0.278207im  -0.166924-0.278207im
-```
-
-In addition, Julia provides many [factorizations](@ref man-linalg-factorizations) which can be used to
-speed up problems such as linear solve or matrix exponentiation by pre-factorizing a matrix into a form
-more amenable (for performance or memory reasons) to the problem. See the documentation on [`factorize`](@ref)
-for more information. As an example:
-
-```jldoctest
-julia> A = [1.5 2 -4; 3 -1 -6; -10 2.3 4]
-3×3 Matrix{Float64}:
-   1.5   2.0  -4.0
-   3.0  -1.0  -6.0
- -10.0   2.3   4.0
-
-julia> factorize(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-3×3 Matrix{Float64}:
-  1.0    0.0       0.0
- -0.15   1.0       0.0
- -0.3   -0.132196  1.0
-U factor:
-3×3 Matrix{Float64}:
- -10.0  2.3     4.0
-   0.0  2.345  -3.4
-   0.0  0.0    -5.24947
-```
-
-Since `A` is not Hermitian, symmetric, triangular, tridiagonal, or bidiagonal, an LU factorization may be the
-best we can do. Compare with:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> factorize(B)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-3×3 Tridiagonal{Float64, Vector{Float64}}:
- -1.64286   0.0   ⋅
-  0.0      -2.8  0.0
-   ⋅        0.0  5.0
-U factor:
-3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.142857  -0.8
-  ⋅   1.0       -0.6
-  ⋅    ⋅         1.0
-permutation:
-3-element Vector{Int64}:
- 1
- 2
- 3
-```
-
-Here, Julia was able to detect that `B` is in fact symmetric, and used a more appropriate factorization.
-Often it's possible to write more efficient code for a matrix that is known to have certain properties e.g.
-it is symmetric, or tridiagonal. Julia provides some special types so that you can "tag" matrices as having
-these properties. For instance:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> sB = Symmetric(B)
-3×3 Symmetric{Float64, Matrix{Float64}}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-```
-
-`sB` has been tagged as a matrix that's (real) symmetric, so for later operations we might perform on it,
-such as eigenfactorization or computing matrix-vector products, efficiencies can be found by only referencing
-half of it. For example:
-
-```jldoctest
-julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
-3×3 Matrix{Float64}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> sB = Symmetric(B)
-3×3 Symmetric{Float64, Matrix{Float64}}:
-  1.5   2.0  -4.0
-  2.0  -1.0  -3.0
- -4.0  -3.0   5.0
-
-julia> x = [1; 2; 3]
-3-element Vector{Int64}:
- 1
- 2
- 3
-
-julia> sB\x
-3-element Vector{Float64}:
- -1.7391304347826084
- -1.1086956521739126
- -1.4565217391304346
-```
-
-The `\` operation here performs the linear solution. The left-division operator is pretty
-powerful and it's easy to write compact, readable code that is flexible enough to solve all
-sorts of systems of linear equations.
-
-## Special matrices
-
-[Matrices with special symmetries and structures](http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274)
-arise often in linear algebra and are frequently associated with various matrix factorizations.
-Julia features a rich collection of special matrix types, which allow for fast computation with
-specialized routines that are specially developed for particular matrix types.
-
-The following tables summarize the types of special matrices that have been implemented in Julia,
-as well as whether hooks to various optimized methods for them in LAPACK are available.
-
-| Type                          | Description                                                                                   |
-|:----------------------------- |:--------------------------------------------------------------------------------------------- |
-| [`Symmetric`](@ref)           | [Symmetric matrix](https://en.wikipedia.org/wiki/Symmetric_matrix)                            |
-| [`Hermitian`](@ref)           | [Hermitian matrix](https://en.wikipedia.org/wiki/Hermitian_matrix)                            |
-| [`UpperTriangular`](@ref)     | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix)                    |
-| [`UnitUpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
-| [`LowerTriangular`](@ref)     | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix)                    |     |
-| [`UnitLowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
-| [`UpperHessenberg`](@ref)     | Upper [Hessenberg matrix](https://en.wikipedia.org/wiki/Hessenberg_matrix)
-| [`Tridiagonal`](@ref)         | [Tridiagonal matrix](https://en.wikipedia.org/wiki/Tridiagonal_matrix)                        |
-| [`SymTridiagonal`](@ref)      | Symmetric tridiagonal matrix                                                                  |
-| [`Bidiagonal`](@ref)          | Upper/lower [bidiagonal matrix](https://en.wikipedia.org/wiki/Bidiagonal_matrix)              |
-| [`Diagonal`](@ref)            | [Diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix)                              |
-| [`UniformScaling`](@ref)      | [Uniform scaling operator](https://en.wikipedia.org/wiki/Uniform_scaling)                     |
-
-### Elementary operations
-
-| Matrix type                   | `+` | `-` | `*` | `\` | Other functions with optimized methods                      |
-|:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- |
-| [`Symmetric`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`Hermitian`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`UpperTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UnitUpperTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`LowerTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UnitLowerTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
-| [`UpperHessenberg`](@ref)     |     |     |     | MM  | [`inv`](@ref), [`det`](@ref)                                |
-| [`SymTridiagonal`](@ref)      | M   | M   | MS  | MV  | [`eigmax`](@ref), [`eigmin`](@ref)                          |
-| [`Tridiagonal`](@ref)         | M   | M   | MS  | MV  |                                                             |
-| [`Bidiagonal`](@ref)          | M   | M   | MS  | MV  |                                                             |
-| [`Diagonal`](@ref)            | M   | M   | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref), [`/`](@ref) |
-| [`UniformScaling`](@ref)      | M   | M   | MVS | MVS | [`/`](@ref)                                                 |
-
-Legend:
-
-| Key        | Description                                                   |
-|:---------- |:------------------------------------------------------------- |
-| M (matrix) | An optimized method for matrix-matrix operations is available |
-| V (vector) | An optimized method for matrix-vector operations is available |
-| S (scalar) | An optimized method for matrix-scalar operations is available |
-
-### Matrix factorizations
-
-| Matrix type                   | LAPACK | [`eigen`](@ref) | [`eigvals`](@ref) | [`eigvecs`](@ref) | [`svd`](@ref) | [`svdvals`](@ref) |
-|:----------------------------- |:------ |:------------- |:----------------- |:----------------- |:------------- |:----------------- |
-| [`Symmetric`](@ref)           | SY     |               | ARI               |                   |               |                   |
-| [`Hermitian`](@ref)           | HE     |               | ARI               |                   |               |                   |
-| [`UpperTriangular`](@ref)     | TR     | A             | A                 | A                 |               |                   |
-| [`UnitUpperTriangular`](@ref) | TR     | A             | A                 | A                 |               |                   |
-| [`LowerTriangular`](@ref)     | TR     | A             | A                 | A                 |               |                   |
-| [`UnitLowerTriangular`](@ref) | TR     | A             | A                 | A                 |               |                   |
-| [`SymTridiagonal`](@ref)      | ST     | A             | ARI               | AV                |               |                   |
-| [`Tridiagonal`](@ref)         | GT     |               |                   |                   |               |                   |
-| [`Bidiagonal`](@ref)          | BD     |               |                   |                   | A             | A                 |
-| [`Diagonal`](@ref)            | DI     |               | A                 |                   |               |                   |
-
-Legend:
-
-| Key          | Description                                                                                                                     | Example              |
-|:------------ |:------------------------------------------------------------------------------------------------------------------------------- |:-------------------- |
-| A (all)      | An optimized method to find all the characteristic values and/or vectors is available                                           | e.g. `eigvals(M)`    |
-| R (range)    | An optimized method to find the `il`th through the `ih`th characteristic values are available                                   | `eigvals(M, il, ih)` |
-| I (interval) | An optimized method to find the characteristic values in the interval [`vl`, `vh`] is available                                 | `eigvals(M, vl, vh)` |
-| V (vectors)  | An optimized method to find the characteristic vectors corresponding to the characteristic values `x=[x1, x2,...]` is available | `eigvecs(M, x)`      |
-
-### The uniform scaling operator
-
-A [`UniformScaling`](@ref) operator represents a scalar times the identity operator, `λ*I`. The identity
-operator `I` is defined as a constant and is an instance of `UniformScaling`. The size of these
-operators are generic and match the other matrix in the binary operations [`+`](@ref), [`-`](@ref),
-[`*`](@ref) and [`\`](@ref). For `A+I` and `A-I` this means that `A` must be square. Multiplication
-with the identity operator `I` is a noop (except for checking that the scaling factor is one)
-and therefore almost without overhead.
-
-To see the `UniformScaling` operator in action:
-
-```jldoctest
-julia> U = UniformScaling(2);
-
-julia> a = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> a + U
-2×2 Matrix{Int64}:
- 3  2
- 3  6
-
-julia> a * U
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> [a U]
-2×4 Matrix{Int64}:
- 1  2  2  0
- 3  4  0  2
-
-julia> b = [1 2 3; 4 5 6]
-2×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
-
-julia> b - U
-ERROR: DimensionMismatch: matrix is not square: dimensions are (2, 3)
-Stacktrace:
-[...]
-```
-
-If you need to solve many systems of the form `(A+μI)x = b` for the same `A` and different `μ`, it might be beneficial
-to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function.
-Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
-operations like determinants.
-
-## [Matrix factorizations](@id man-linalg-factorizations)
-
-[Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
-compute the factorization of a matrix into a product of matrices, and are one of the central concepts
-in (numerical) linear algebra.
-
-The following table summarizes the types of matrix factorizations that have been implemented in
-Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
-of the Linear Algebra documentation.
-
-| Type               | Description                                                                                                    |
-|:------------------ |:-------------------------------------------------------------------------------------------------------------- |
-| `BunchKaufman`     | Bunch-Kaufman factorization                                                                                    |
-| `Cholesky`         | [Cholesky factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition)                                 |
-| `CholeskyPivoted`  | [Pivoted](https://en.wikipedia.org/wiki/Pivot_element) Cholesky factorization                                  |
-| `LDLt`             | [LDL(T) factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition)                 |
-| `LU`               | [LU factorization](https://en.wikipedia.org/wiki/LU_decomposition)                                             |
-| `QR`               | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition)                                             |
-| `QRCompactWY`      | Compact WY form of the QR factorization                                                                        |
-| `QRPivoted`        | Pivoted [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition)                                     |
-| `LQ`               | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) of `transpose(A)`                           |
-| `Hessenberg`       | [Hessenberg decomposition](http://mathworld.wolfram.com/HessenbergDecomposition.html)                          |
-| `Eigen`            | [Spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix)                         |
-| `GeneralizedEigen` | [Generalized spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix#Generalized_eigenvalue_problem)                            |
-| `SVD`              | [Singular value decomposition](https://en.wikipedia.org/wiki/Singular_value_decomposition)                     |
-| `GeneralizedSVD`   | [Generalized SVD](https://en.wikipedia.org/wiki/Generalized_singular_value_decomposition#Higher_order_version) |
-| `Schur`            | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition)                                       |
-| `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
-
-Adjoints and transposes of [`Factorization`](@ref) objects are lazily wrapped in
-`AdjointFactorization` and `TransposeFactorization` objects, respectively. Generically,
-transpose of real `Factorization`s are wrapped as `AdjointFactorization`.
-
-## [Orthogonal matrices (`AbstractQ`)](@id man-linalg-abstractq)
-
-Some matrix factorizations generate orthogonal/unitary "matrix" factors. These
-factorizations include QR-related factorizations obtained from calls to [`qr`](@ref), i.e.,
-`QR`, `QRCompactWY` and `QRPivoted`, the Hessenberg factorization obtained from calls to
-[`hessenberg`](@ref), and the LQ factorization obtained from [`lq`](@ref). While these
-orthogonal/unitary factors admit a matrix representation, their internal representation
-is, for performance and memory reasons, different. Hence, they should be rather viewed as
-matrix-backed, function-based linear operators. In particular, reading, for instance, a
-column of its matrix representation requires running "matrix"-vector multiplication code,
-rather than simply reading out data from memory (possibly filling parts of the vector with
-structural zeros). Another clear distinction from other, non-triangular matrix types is
-that the underlying multiplication code allows for in-place modification during multiplication.
-Furthermore, objects of specific `AbstractQ` subtypes as those created via [`qr`](@ref),
-[`hessenberg`](@ref) and [`lq`](@ref) can behave like a square or a rectangular matrix
-depending on context:
-
-```julia
-julia> using LinearAlgebra
-
-julia> Q = qr(rand(3,2)).Q
-3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-
-julia> Matrix(Q)
-3×2 Matrix{Float64}:
- -0.320597   0.865734
- -0.765834  -0.475694
- -0.557419   0.155628
-
-julia> Q*I
-3×3 Matrix{Float64}:
- -0.320597   0.865734  -0.384346
- -0.765834  -0.475694  -0.432683
- -0.557419   0.155628   0.815514
-
-julia> Q*ones(2)
-3-element Vector{Float64}:
-  0.5451367118802273
- -1.241527373086654
- -0.40179067589600226
-
-julia> Q*ones(3)
-3-element Vector{Float64}:
-  0.16079054743832022
- -1.674209978965636
-  0.41372375588835797
-
-julia> ones(1,2) * Q'
-1×3 Matrix{Float64}:
- 0.545137  -1.24153  -0.401791
-
-julia> ones(1,3) * Q'
-1×3 Matrix{Float64}:
- 0.160791  -1.67421  0.413724
-```
-
-Due to this distinction from dense or structured matrices, the abstract `AbstractQ` type
-does not subtype `AbstractMatrix`, but instead has its own type hierarchy. Custom types
-that subtype `AbstractQ` can rely on generic fallbacks if the following interface is satisfied.
-For example, for
-
-```julia
-struct MyQ{T} <: LinearAlgebra.AbstractQ{T}
-    # required fields
-end
-```
-
-provide overloads for
-
-```julia
-Base.size(Q::MyQ) # size of corresponding square matrix representation
-Base.convert(::Type{AbstractQ{T}}, Q::MyQ) # eltype promotion [optional]
-LinearAlgebra.lmul!(Q::MyQ, x::AbstractVecOrMat) # left-multiplication
-LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) # right-multiplication
-```
-
-If `eltype` promotion is not of interest, the `convert` method is unnecessary, since by
-default `convert(::Type{AbstractQ{T}}, Q::AbstractQ{T})` returns `Q` itself.
-Adjoints of `AbstractQ`-typed objects are lazily wrapped in an `AdjointQ` wrapper type,
-which requires its own `LinearAlgebra.lmul!` and `LinearAlgebra.rmul!` methods. Given this
-set of methods, any `Q::MyQ` can be used like a matrix, preferably in a multiplicative
-context: multiplication via `*` with scalars, vectors and matrices from left and right,
-obtaining a matrix representation of `Q` via `Matrix(Q)` (or `Q*I`) and indexing into the
-matrix representation all work. In contrast, addition and subtraction as well as more
-generally broadcasting over elements in the matrix representation fail because that would
-be highly inefficient. For such use cases, consider computing the matrix representation
-up front and cache it for future reuse.
-
-## Standard functions
-
-Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
-Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com).
-Other sparse solvers are available as Julia packages.
-
-```@docs
-Base.:*(::AbstractMatrix, ::AbstractMatrix)
-Base.:\(::AbstractMatrix, ::AbstractVecOrMat)
-Base.:/(::AbstractVecOrMat, ::AbstractVecOrMat)
-LinearAlgebra.SingularException
-LinearAlgebra.PosDefException
-LinearAlgebra.ZeroPivotException
-LinearAlgebra.dot
-LinearAlgebra.dot(::Any, ::Any, ::Any)
-LinearAlgebra.cross
-LinearAlgebra.axpy!
-LinearAlgebra.axpby!
-LinearAlgebra.rotate!
-LinearAlgebra.reflect!
-LinearAlgebra.factorize
-LinearAlgebra.Diagonal
-LinearAlgebra.Bidiagonal
-LinearAlgebra.SymTridiagonal
-LinearAlgebra.Tridiagonal
-LinearAlgebra.Symmetric
-LinearAlgebra.Hermitian
-LinearAlgebra.LowerTriangular
-LinearAlgebra.UpperTriangular
-LinearAlgebra.UnitLowerTriangular
-LinearAlgebra.UnitUpperTriangular
-LinearAlgebra.UpperHessenberg
-LinearAlgebra.UniformScaling
-LinearAlgebra.I
-LinearAlgebra.UniformScaling(::Integer)
-LinearAlgebra.Factorization
-LinearAlgebra.LU
-LinearAlgebra.lu
-LinearAlgebra.lu!
-LinearAlgebra.Cholesky
-LinearAlgebra.CholeskyPivoted
-LinearAlgebra.cholesky
-LinearAlgebra.cholesky!
-LinearAlgebra.lowrankupdate
-LinearAlgebra.lowrankdowndate
-LinearAlgebra.lowrankupdate!
-LinearAlgebra.lowrankdowndate!
-LinearAlgebra.LDLt
-LinearAlgebra.ldlt
-LinearAlgebra.ldlt!
-LinearAlgebra.QR
-LinearAlgebra.QRCompactWY
-LinearAlgebra.QRPivoted
-LinearAlgebra.qr
-LinearAlgebra.qr!
-LinearAlgebra.LQ
-LinearAlgebra.lq
-LinearAlgebra.lq!
-LinearAlgebra.BunchKaufman
-LinearAlgebra.bunchkaufman
-LinearAlgebra.bunchkaufman!
-LinearAlgebra.Eigen
-LinearAlgebra.GeneralizedEigen
-LinearAlgebra.eigvals
-LinearAlgebra.eigvals!
-LinearAlgebra.eigmax
-LinearAlgebra.eigmin
-LinearAlgebra.eigvecs
-LinearAlgebra.eigen
-LinearAlgebra.eigen!
-LinearAlgebra.Hessenberg
-LinearAlgebra.hessenberg
-LinearAlgebra.hessenberg!
-LinearAlgebra.Schur
-LinearAlgebra.GeneralizedSchur
-LinearAlgebra.schur
-LinearAlgebra.schur!
-LinearAlgebra.ordschur
-LinearAlgebra.ordschur!
-LinearAlgebra.SVD
-LinearAlgebra.GeneralizedSVD
-LinearAlgebra.svd
-LinearAlgebra.svd!
-LinearAlgebra.svdvals
-LinearAlgebra.svdvals!
-LinearAlgebra.Givens
-LinearAlgebra.givens
-LinearAlgebra.triu
-LinearAlgebra.triu!
-LinearAlgebra.tril
-LinearAlgebra.tril!
-LinearAlgebra.diagind
-LinearAlgebra.diag
-LinearAlgebra.diagm
-LinearAlgebra.rank
-LinearAlgebra.norm
-LinearAlgebra.opnorm
-LinearAlgebra.normalize!
-LinearAlgebra.normalize
-LinearAlgebra.cond
-LinearAlgebra.condskeel
-LinearAlgebra.tr
-LinearAlgebra.det
-LinearAlgebra.logdet
-LinearAlgebra.logabsdet
-Base.inv(::AbstractMatrix)
-LinearAlgebra.pinv
-LinearAlgebra.nullspace
-Base.kron
-Base.kron!
-LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat})
-Base.cis(::AbstractMatrix)
-Base.:^(::AbstractMatrix, ::Number)
-Base.:^(::Number, ::AbstractMatrix)
-LinearAlgebra.log(::StridedMatrix)
-LinearAlgebra.sqrt(::StridedMatrix)
-LinearAlgebra.cos(::StridedMatrix{<:Real})
-LinearAlgebra.sin(::StridedMatrix{<:Real})
-LinearAlgebra.sincos(::StridedMatrix{<:Real})
-LinearAlgebra.tan(::StridedMatrix{<:Real})
-LinearAlgebra.sec(::StridedMatrix)
-LinearAlgebra.csc(::StridedMatrix)
-LinearAlgebra.cot(::StridedMatrix)
-LinearAlgebra.cosh(::StridedMatrix)
-LinearAlgebra.sinh(::StridedMatrix)
-LinearAlgebra.tanh(::StridedMatrix)
-LinearAlgebra.sech(::StridedMatrix)
-LinearAlgebra.csch(::StridedMatrix)
-LinearAlgebra.coth(::StridedMatrix)
-LinearAlgebra.acos(::StridedMatrix)
-LinearAlgebra.asin(::StridedMatrix)
-LinearAlgebra.atan(::StridedMatrix)
-LinearAlgebra.asec(::StridedMatrix)
-LinearAlgebra.acsc(::StridedMatrix)
-LinearAlgebra.acot(::StridedMatrix)
-LinearAlgebra.acosh(::StridedMatrix)
-LinearAlgebra.asinh(::StridedMatrix)
-LinearAlgebra.atanh(::StridedMatrix)
-LinearAlgebra.asech(::StridedMatrix)
-LinearAlgebra.acsch(::StridedMatrix)
-LinearAlgebra.acoth(::StridedMatrix)
-LinearAlgebra.lyap
-LinearAlgebra.sylvester
-LinearAlgebra.issuccess
-LinearAlgebra.issymmetric
-LinearAlgebra.isposdef
-LinearAlgebra.isposdef!
-LinearAlgebra.istril
-LinearAlgebra.istriu
-LinearAlgebra.isdiag
-LinearAlgebra.ishermitian
-Base.transpose
-LinearAlgebra.transpose!
-LinearAlgebra.Transpose
-LinearAlgebra.TransposeFactorization
-Base.adjoint
-LinearAlgebra.adjoint!
-LinearAlgebra.Adjoint
-LinearAlgebra.AdjointFactorization
-Base.copy(::Union{Transpose,Adjoint})
-LinearAlgebra.stride1
-LinearAlgebra.checksquare
-LinearAlgebra.peakflops
-LinearAlgebra.hermitianpart
-LinearAlgebra.hermitianpart!
-```
-
-## Low-level matrix operations
-
-In many cases there are in-place versions of matrix operations that allow you to supply
-a pre-allocated output vector or matrix.  This is useful when optimizing critical code in order
-to avoid the overhead of repeated allocations. These in-place operations are suffixed with `!`
-below (e.g. `mul!`) according to the usual Julia convention.
-
-```@docs
-LinearAlgebra.mul!
-LinearAlgebra.lmul!
-LinearAlgebra.rmul!
-LinearAlgebra.ldiv!
-LinearAlgebra.rdiv!
-```
-
-## BLAS functions
-
-In Julia (as in much of scientific computation), dense linear-algebra operations are based on
-the [LAPACK library](http://www.netlib.org/lapack/), which in turn is built on top of basic linear-algebra
-building-blocks known as the [BLAS](http://www.netlib.org/blas/). There are highly optimized
-implementations of BLAS available for every computer architecture, and sometimes in high-performance
-linear algebra routines it is useful to call the BLAS functions directly.
-
-`LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions
-that overwrite one of the input arrays have names ending in `'!'`.  Usually, a BLAS function has
-four methods defined, for [`Float32`](@ref), [`Float64`](@ref), [`ComplexF32`](@ref Complex),
-and [`ComplexF64`](@ref Complex) arrays.
-
-### [BLAS character arguments](@id stdlib-blas-chars)
-
-Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`),
-which triangle of a matrix to reference (`uplo` or `ul`),
-whether the diagonal of a triangular matrix can be assumed to
-be all ones (`dA`) or which side of a matrix multiplication
-the input argument belongs on (`side`). The possibilities are:
-
-#### [Multiplication order](@id stdlib-blas-side)
-
-| `side` | Meaning                                                             |
-|:-------|:--------------------------------------------------------------------|
-| `'L'`  | The argument goes on the *left* side of a matrix-matrix operation.  |
-| `'R'`  | The argument goes on the *right* side of a matrix-matrix operation. |
-
-#### [Triangle referencing](@id stdlib-blas-uplo)
-
-| `uplo`/`ul` | Meaning                                               |
-|:------------|:------------------------------------------------------|
-| `'U'`       | Only the *upper* triangle of the matrix will be used. |
-| `'L'`       | Only the *lower* triangle of the matrix will be used. |
-
-#### [Transposition operation](@id stdlib-blas-trans)
-
-| `trans`/`tX` | Meaning                                                 |
-|:-------------|:--------------------------------------------------------|
-| `'N'`        | The input matrix `X` is not transposed or conjugated.   |
-| `'T'`        | The input matrix `X` will be transposed.                |
-| `'C'`        | The input matrix `X` will be conjugated and transposed. |
-
-#### [Unit diagonal](@id stdlib-blas-diag)
-
-| `diag`/`dX` | Meaning                                                   |
-|:------------|:----------------------------------------------------------|
-| `'N'`       | The diagonal values of the matrix `X` will be read.       |
-| `'U'`       | The diagonal of the matrix `X` is assumed to be all ones. |
-
-```@docs
-LinearAlgebra.BLAS
-LinearAlgebra.BLAS.set_num_threads
-LinearAlgebra.BLAS.get_num_threads
-```
-
-BLAS functions can be divided into three groups, also called three levels,
-depending on when they were first proposed, the type of input parameters,
-and the complexity of the operation.
-
-### Level 1 BLAS functions
-
-The level 1 BLAS functions were first proposed in [(Lawson, 1979)][Lawson-1979] and
-define operations between scalars and vectors.
-
-[Lawson-1979]: https://dl.acm.org/doi/10.1145/355841.355847
-
-```@docs
-# xROTG
-# xROTMG
-LinearAlgebra.BLAS.rot!
-# xROTM
-# xSWAP
-LinearAlgebra.BLAS.scal!
-LinearAlgebra.BLAS.scal
-LinearAlgebra.BLAS.blascopy!
-# xAXPY!
-# xAXPBY!
-LinearAlgebra.BLAS.dot
-LinearAlgebra.BLAS.dotu
-LinearAlgebra.BLAS.dotc
-# xxDOT
-LinearAlgebra.BLAS.nrm2
-LinearAlgebra.BLAS.asum
-LinearAlgebra.BLAS.iamax
-```
-
-### Level 2 BLAS functions
-
-The level 2 BLAS functions were published in [(Dongarra, 1988)][Dongarra-1988],
-and define matrix-vector operations.
-
-[Dongarra-1988]: https://dl.acm.org/doi/10.1145/42288.42291
-
-**return a vector**
-
-```@docs
-LinearAlgebra.BLAS.gemv!
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gbmv!
-LinearAlgebra.BLAS.gbmv
-LinearAlgebra.BLAS.hemv!
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any)
-# hbmv!, hbmv
-LinearAlgebra.BLAS.hpmv!
-LinearAlgebra.BLAS.symv!
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.sbmv!
-LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.spmv!
-LinearAlgebra.BLAS.trmv!
-LinearAlgebra.BLAS.trmv
-# xTBMV
-# xTPMV
-LinearAlgebra.BLAS.trsv!
-LinearAlgebra.BLAS.trsv
-# xTBSV
-# xTPSV
-```
-
-**return a matrix**
-
-```@docs
-LinearAlgebra.BLAS.ger!
-# xGERU
-# xGERC
-LinearAlgebra.BLAS.her!
-# xHPR
-# xHER2
-# xHPR2
-LinearAlgebra.BLAS.syr!
-LinearAlgebra.BLAS.spr!
-# xSYR2
-# xSPR2
-```
-
-### Level 3 BLAS functions
-
-The level 3 BLAS functions were published in [(Dongarra, 1990)][Dongarra-1990],
-and define matrix-matrix operations.
-
-[Dongarra-1990]: https://dl.acm.org/doi/10.1145/77626.79170
-
-```@docs
-LinearAlgebra.BLAS.gemm!
-LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symm!
-LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemm!
-LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.syrk!
-LinearAlgebra.BLAS.syrk
-LinearAlgebra.BLAS.herk!
-LinearAlgebra.BLAS.herk
-LinearAlgebra.BLAS.syr2k!
-LinearAlgebra.BLAS.syr2k
-LinearAlgebra.BLAS.her2k!
-LinearAlgebra.BLAS.her2k
-LinearAlgebra.BLAS.trmm!
-LinearAlgebra.BLAS.trmm
-LinearAlgebra.BLAS.trsm!
-LinearAlgebra.BLAS.trsm
-```
-
-## LAPACK functions
-
-`LinearAlgebra.LAPACK` provides wrappers for some of the LAPACK functions for linear algebra.
- Those functions that overwrite one of the input arrays have names ending in `'!'`.
-
-Usually a function has 4 methods defined, one each for [`Float64`](@ref), [`Float32`](@ref),
-`ComplexF64` and `ComplexF32` arrays.
-
-Note that the LAPACK API provided by Julia can and will change in the future. Since this API is
-not user-facing, there is no commitment to support/deprecate this specific set of functions in
-future releases.
-
-```@docs
-LinearAlgebra.LAPACK
-LinearAlgebra.LAPACK.gbtrf!
-LinearAlgebra.LAPACK.gbtrs!
-LinearAlgebra.LAPACK.gebal!
-LinearAlgebra.LAPACK.gebak!
-LinearAlgebra.LAPACK.gebrd!
-LinearAlgebra.LAPACK.gelqf!
-LinearAlgebra.LAPACK.geqlf!
-LinearAlgebra.LAPACK.geqrf!
-LinearAlgebra.LAPACK.geqp3!
-LinearAlgebra.LAPACK.gerqf!
-LinearAlgebra.LAPACK.geqrt!
-LinearAlgebra.LAPACK.geqrt3!
-LinearAlgebra.LAPACK.getrf!
-LinearAlgebra.LAPACK.tzrzf!
-LinearAlgebra.LAPACK.ormrz!
-LinearAlgebra.LAPACK.gels!
-LinearAlgebra.LAPACK.gesv!
-LinearAlgebra.LAPACK.getrs!
-LinearAlgebra.LAPACK.getri!
-LinearAlgebra.LAPACK.gesvx!
-LinearAlgebra.LAPACK.gelsd!
-LinearAlgebra.LAPACK.gelsy!
-LinearAlgebra.LAPACK.gglse!
-LinearAlgebra.LAPACK.geev!
-LinearAlgebra.LAPACK.gesdd!
-LinearAlgebra.LAPACK.gesvd!
-LinearAlgebra.LAPACK.ggsvd!
-LinearAlgebra.LAPACK.ggsvd3!
-LinearAlgebra.LAPACK.geevx!
-LinearAlgebra.LAPACK.ggev!
-LinearAlgebra.LAPACK.ggev3!
-LinearAlgebra.LAPACK.gtsv!
-LinearAlgebra.LAPACK.gttrf!
-LinearAlgebra.LAPACK.gttrs!
-LinearAlgebra.LAPACK.orglq!
-LinearAlgebra.LAPACK.orgqr!
-LinearAlgebra.LAPACK.orgql!
-LinearAlgebra.LAPACK.orgrq!
-LinearAlgebra.LAPACK.ormlq!
-LinearAlgebra.LAPACK.ormqr!
-LinearAlgebra.LAPACK.ormql!
-LinearAlgebra.LAPACK.ormrq!
-LinearAlgebra.LAPACK.gemqrt!
-LinearAlgebra.LAPACK.posv!
-LinearAlgebra.LAPACK.potrf!
-LinearAlgebra.LAPACK.potri!
-LinearAlgebra.LAPACK.potrs!
-LinearAlgebra.LAPACK.pstrf!
-LinearAlgebra.LAPACK.ptsv!
-LinearAlgebra.LAPACK.pttrf!
-LinearAlgebra.LAPACK.pttrs!
-LinearAlgebra.LAPACK.trtri!
-LinearAlgebra.LAPACK.trtrs!
-LinearAlgebra.LAPACK.trcon!
-LinearAlgebra.LAPACK.trevc!
-LinearAlgebra.LAPACK.trrfs!
-LinearAlgebra.LAPACK.stev!
-LinearAlgebra.LAPACK.stebz!
-LinearAlgebra.LAPACK.stegr!
-LinearAlgebra.LAPACK.stein!
-LinearAlgebra.LAPACK.syconv!
-LinearAlgebra.LAPACK.sysv!
-LinearAlgebra.LAPACK.sytrf!
-LinearAlgebra.LAPACK.sytri!
-LinearAlgebra.LAPACK.sytrs!
-LinearAlgebra.LAPACK.hesv!
-LinearAlgebra.LAPACK.hetrf!
-LinearAlgebra.LAPACK.hetri!
-LinearAlgebra.LAPACK.hetrs!
-LinearAlgebra.LAPACK.syev!
-LinearAlgebra.LAPACK.syevr!
-LinearAlgebra.LAPACK.syevd!
-LinearAlgebra.LAPACK.sygvd!
-LinearAlgebra.LAPACK.bdsqr!
-LinearAlgebra.LAPACK.bdsdc!
-LinearAlgebra.LAPACK.gecon!
-LinearAlgebra.LAPACK.gehrd!
-LinearAlgebra.LAPACK.orghr!
-LinearAlgebra.LAPACK.gees!
-LinearAlgebra.LAPACK.gges!
-LinearAlgebra.LAPACK.gges3!
-LinearAlgebra.LAPACK.trexc!
-LinearAlgebra.LAPACK.trsen!
-LinearAlgebra.LAPACK.tgsen!
-LinearAlgebra.LAPACK.trsyl!
-LinearAlgebra.LAPACK.hseqr!
-```
-
-```@meta
-DocTestSetup = nothing
-```
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
deleted file mode 100644
index 386de771d666f..0000000000000
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ /dev/null
@@ -1,699 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Linear algebra module. Provides array arithmetic,
-matrix factorizations and other linear algebra related
-functionality.
-"""
-module LinearAlgebra
-
-import Base: \, /, *, ^, +, -, ==
-import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
-    asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy,
-    copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor,
-    getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle,
-    kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims,
-    power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin,
-    sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
-    vec, view, zero
-using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix,
-    @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
-    splat
-using Base.Broadcast: Broadcasted, broadcasted
-using Base.PermutedDimsArrays: CommutativeOps
-using OpenBLAS_jll
-using libblastrampoline_jll
-import Libdl
-
-export
-# Modules
-    LAPACK,
-    BLAS,
-
-# Types
-    Adjoint,
-    Transpose,
-    SymTridiagonal,
-    Tridiagonal,
-    Bidiagonal,
-    Factorization,
-    BunchKaufman,
-    Cholesky,
-    CholeskyPivoted,
-    ColumnNorm,
-    Eigen,
-    GeneralizedEigen,
-    GeneralizedSVD,
-    GeneralizedSchur,
-    Hessenberg,
-    LU,
-    LDLt,
-    NoPivot,
-    RowNonZero,
-    QR,
-    QRPivoted,
-    LQ,
-    Schur,
-    SVD,
-    Hermitian,
-    RowMaximum,
-    Symmetric,
-    LowerTriangular,
-    UpperTriangular,
-    UnitLowerTriangular,
-    UnitUpperTriangular,
-    UpperHessenberg,
-    Diagonal,
-    UniformScaling,
-
-# Functions
-    axpy!,
-    axpby!,
-    bunchkaufman,
-    bunchkaufman!,
-    cholesky,
-    cholesky!,
-    cond,
-    condskeel,
-    copyto!,
-    copy_transpose!,
-    cross,
-    adjoint,
-    adjoint!,
-    det,
-    diag,
-    diagind,
-    diagm,
-    dot,
-    eigen,
-    eigen!,
-    eigmax,
-    eigmin,
-    eigvals,
-    eigvals!,
-    eigvecs,
-    factorize,
-    givens,
-    hermitianpart,
-    hermitianpart!,
-    hessenberg,
-    hessenberg!,
-    isdiag,
-    ishermitian,
-    isposdef,
-    isposdef!,
-    issuccess,
-    issymmetric,
-    istril,
-    istriu,
-    kron,
-    kron!,
-    ldiv!,
-    ldlt!,
-    ldlt,
-    logabsdet,
-    logdet,
-    lowrankdowndate,
-    lowrankdowndate!,
-    lowrankupdate,
-    lowrankupdate!,
-    lu,
-    lu!,
-    lyap,
-    mul!,
-    lmul!,
-    rmul!,
-    norm,
-    normalize,
-    normalize!,
-    nullspace,
-    ordschur!,
-    ordschur,
-    pinv,
-    qr,
-    qr!,
-    lq,
-    lq!,
-    opnorm,
-    rank,
-    rdiv!,
-    reflect!,
-    rotate!,
-    schur,
-    schur!,
-    svd,
-    svd!,
-    svdvals!,
-    svdvals,
-    sylvester,
-    tr,
-    transpose,
-    transpose!,
-    tril,
-    triu,
-    tril!,
-    triu!,
-
-# Operators
-    \,
-    /,
-
-# Constants
-    I
-
-const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32}
-const BlasReal = Union{Float64,Float32}
-const BlasComplex = Union{ComplexF64,ComplexF32}
-
-if USE_BLAS64
-    const BlasInt = Int64
-else
-    const BlasInt = Int32
-end
-
-
-abstract type Algorithm end
-struct DivideAndConquer <: Algorithm end
-struct QRIteration <: Algorithm end
-
-abstract type PivotingStrategy end
-struct NoPivot <: PivotingStrategy end
-struct RowNonZero <: PivotingStrategy end
-struct RowMaximum <: PivotingStrategy end
-struct ColumnNorm <: PivotingStrategy end
-
-# Check that stride of matrix/vector is 1
-# Writing like this to avoid splatting penalty when called with multiple arguments,
-# see PR 16416
-"""
-    stride1(A) -> Int
-
-Return the distance between successive array elements
-in dimension 1 in units of element size.
-
-# Examples
-```jldoctest
-julia> A = [1,2,3,4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> LinearAlgebra.stride1(A)
-1
-
-julia> B = view(A, 2:2:4)
-2-element view(::Vector{Int64}, 2:2:4) with eltype Int64:
- 2
- 4
-
-julia> LinearAlgebra.stride1(B)
-2
-```
-"""
-stride1(x) = stride(x,1)
-stride1(x::Array) = 1
-stride1(x::DenseArray) = stride(x, 1)::Int
-
-@inline chkstride1(A...) = _chkstride1(true, A...)
-@noinline _chkstride1(ok::Bool) = ok || error("matrix does not have contiguous columns")
-@inline _chkstride1(ok::Bool, A, B...) = _chkstride1(ok & (stride1(A) == 1), B...)
-
-"""
-    LinearAlgebra.checksquare(A)
-
-Check that a matrix is square, then return its common dimension.
-For multiple arguments, return a vector.
-
-# Examples
-```jldoctest
-julia> A = fill(1, (4,4)); B = fill(1, (5,5));
-
-julia> LinearAlgebra.checksquare(A, B)
-2-element Vector{Int64}:
- 4
- 5
-```
-"""
-function checksquare(A)
-    m,n = size(A)
-    m == n || throw(DimensionMismatch("matrix is not square: dimensions are $(size(A))"))
-    m
-end
-
-function checksquare(A...)
-    sizes = Int[]
-    for a in A
-        size(a,1)==size(a,2) || throw(DimensionMismatch("matrix is not square: dimensions are $(size(a))"))
-        push!(sizes, size(a,1))
-    end
-    return sizes
-end
-
-function char_uplo(uplo::Symbol)
-    if uplo === :U
-        return 'U'
-    elseif uplo === :L
-        return 'L'
-    else
-        throw_uplo()
-    end
-end
-
-function sym_uplo(uplo::Char)
-    if uplo == 'U'
-        return :U
-    elseif uplo == 'L'
-        return :L
-    else
-        throw_uplo()
-    end
-end
-
-@noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)"))
-
-"""
-    ldiv!(Y, A, B) -> Y
-
-Compute `A \\ B` in-place and store the result in `Y`, returning the result.
-
-The argument `A` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `ldiv!` usually also require fine-grained
-control over the factorization of `A`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-
-# Examples
-```jldoctest
-julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
-
-julia> X = [1; 2.5; 3];
-
-julia> Y = zero(X);
-
-julia> ldiv!(Y, qr(A), X);
-
-julia> Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
-```
-"""
-ldiv!(Y, A, B)
-
-"""
-    ldiv!(A, B)
-
-Compute `A \\ B` in-place and overwriting `B` to store the result.
-
-The argument `A` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `ldiv!` usually also require fine-grained
-control over the factorization of `A`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-
-# Examples
-```jldoctest
-julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
-
-julia> X = [1; 2.5; 3];
-
-julia> Y = copy(X);
-
-julia> ldiv!(qr(A), X);
-
-julia> X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
-```
-"""
-ldiv!(A, B)
-
-
-"""
-    rdiv!(A, B)
-
-Compute `A / B` in-place and overwriting `A` to store the result.
-
-The argument `B` should *not* be a matrix.  Rather, instead of matrices it should be a
-factorization object (e.g. produced by [`factorize`](@ref) or [`cholesky`](@ref)).
-The reason for this is that factorization itself is both expensive and typically allocates memory
-(although it can also be done in-place via, e.g., [`lu!`](@ref)),
-and performance-critical situations requiring `rdiv!` usually also require fine-grained
-control over the factorization of `B`.
-
-!!! note
-    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
-    these are already in a factorized form
-"""
-rdiv!(A, B)
-
-"""
-    copy_oftype(A, T)
-
-Creates a copy of `A` with eltype `T`. No assertions about mutability of the result are
-made. When `eltype(A) == T`, then this calls `copy(A)` which may be overloaded for custom
-array types. Otherwise, this calls `convert(AbstractArray{T}, A)`.
-"""
-copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A)
-copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A)
-
-"""
-    copymutable_oftype(A, T)
-
-Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`.
-
-The resulting matrix typically has similar algebraic structure as `A`. For
-example, supplying a tridiagonal matrix results in another tridiagonal matrix.
-In general, the type of the output corresponds to that of `similar(A, T)`.
-
-In LinearAlgebra, mutable copies (of some desired eltype) are created to be passed
-to in-place algorithms (such as `ldiv!`, `rdiv!`, `lu!` and so on). If the specific
-algorithm is known to preserve the algebraic structure, use `copymutable_oftype`.
-If the algorithm is known to return a dense matrix (or some wrapper backed by a dense
-matrix), then use `copy_similar`.
-
-See also: `Base.copymutable`, `copy_similar`.
-"""
-copymutable_oftype(A::AbstractArray, ::Type{S}) where {S} = copyto!(similar(A, S), A)
-
-"""
-    copy_similar(A, T)
-
-Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`.
-
-Compared to `copymutable_oftype`, the result can be more flexible. In general, the type
-of the output corresponds to that of the three-argument method `similar(A, T, size(A))`.
-
-See also: `copymutable_oftype`.
-"""
-copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
-
-
-include("adjtrans.jl")
-include("transpose.jl")
-
-include("exceptions.jl")
-include("generic.jl")
-
-include("blas.jl")
-include("matmul.jl")
-include("lapack.jl")
-
-include("dense.jl")
-include("tridiag.jl")
-include("triangular.jl")
-
-include("factorization.jl")
-include("eigen.jl")
-include("svd.jl")
-include("symmetric.jl")
-include("cholesky.jl")
-include("lu.jl")
-include("bunchkaufman.jl")
-include("diagonal.jl")
-include("symmetriceigen.jl")
-include("bidiag.jl")
-include("uniformscaling.jl")
-include("qr.jl")
-include("lq.jl")
-include("hessenberg.jl")
-include("abstractq.jl")
-include("givens.jl")
-include("special.jl")
-include("bitarray.jl")
-include("ldlt.jl")
-include("schur.jl")
-include("structuredbroadcast.jl")
-include("deprecated.jl")
-
-const ⋅ = dot
-const × = cross
-export ⋅, ×
-
-wrapper_char(::AbstractArray) = 'N'
-wrapper_char(::Adjoint) = 'C'
-wrapper_char(::Adjoint{<:Real}) = 'T'
-wrapper_char(::Transpose) = 'T'
-wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h'
-wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's'
-wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's'
-
-function wrap(A::AbstractVecOrMat, tA::AbstractChar)
-    if tA == 'N'
-        return A
-    elseif tA == 'T'
-        return transpose(A)
-    elseif tA == 'C'
-        return adjoint(A)
-    elseif tA == 'H'
-        return Hermitian(A, :U)
-    elseif tA == 'h'
-        return Hermitian(A, :L)
-    elseif tA == 'S'
-        return Symmetric(A, :U)
-    else # tA == 's'
-        return Symmetric(A, :L)
-    end
-end
-
-_unwrap(A::AbstractVecOrMat) = A
-
-## convenience methods
-## return only the solution of a least squares problem while avoiding promoting
-## vectors to matrices.
-_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
-_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
-
-# SymTridiagonal ev can be the same length as dv, but the last element is
-# ignored. However, some methods can fail if they read the entire ev
-# rather than just the meaningful elements. This is a helper function
-# for getting only the meaningful elements of ev. See #41089
-_evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
-
-## append right hand side with zeros if necessary
-_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
-_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
-
-# convert to Vector, if necessary
-_makevector(x::Vector) = x
-_makevector(x::AbstractVector) = Vector(x)
-
-# append a zero element / drop the last element
-_pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
-_droplast!(A) = deleteat!(A, lastindex(A))
-
-# some trait like this would be cool
-# onedefined(::Type{T}) where {T} = hasmethod(one, (T,))
-# but we are actually asking for oneunit(T), that is, however, defined for generic T as
-# `T(one(T))`, so the question is equivalent for whether one(T) is defined
-onedefined(::Type) = false
-onedefined(::Type{<:Number}) = true
-
-# initialize return array for op(A, B)
-_init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(matprod(oneunit(TA), oneunit(TB))) :
-        promote_op(matprod, TA, TB)
-_init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    (onedefined(TA) && onedefined(TB)) ?
-        typeof(op(oneunit(TA), oneunit(TB))) :
-        promote_op(op, TA, TB)
-_initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
-    similar(C, _init_eltype(op, TA, TB), size(C))
-
-# General fallback definition for handling under- and overdetermined system as well as square problems
-# While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
-# which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
-# we restrict this method to only the LAPACK factorizations in LinearAlgebra.
-# The definition is put here since it explicitly references all the Factorization structs so it has
-# to be located after all the files that define the structs.
-const LAPACKFactorizations{T,S} = Union{
-    BunchKaufman{T,S},
-    Cholesky{T,S},
-    LQ{T,S},
-    LU{T,S},
-    QR{T,S},
-    QRCompactWY{T,S},
-    QRPivoted{T,S},
-    SVD{T,<:Real,S}}
-
-(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
-(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
-(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
-
-function ldiv(F::Factorization, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(F)
-    if m != size(B, 1)
-        throw(DimensionMismatch("arguments must have the same number of rows"))
-    end
-
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    FF = Factorization{TFB}(F)
-
-    # For wide problem we (often) compute a minimum norm solution. The solution
-    # is larger than the right hand side so we use size(F, 2).
-    BB = _zeros(TFB, B, n)
-
-    if n > size(B, 1)
-        # Underdetermined
-        copyto!(view(BB, 1:m, :), B)
-    else
-        copyto!(BB, B)
-    end
-
-    ldiv!(FF, BB)
-
-    # For tall problems, we compute a least squares solution so only part
-    # of the rhs should be returned from \ while ldiv! uses (and returns)
-    # the complete rhs
-    return _cut_B(BB, 1:n)
-end
-# disambiguate
-(\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
-(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    ldiv(F, B)
-(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    ldiv(F, B)
-
-"""
-    LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
-
-`peakflops` computes the peak flop rate of the computer by using double precision
-[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
-multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using
-multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
-[`BLAS.set_num_threads(n)`](@ref).
-
-If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements
-of type `eltype` for calculating the peak flop rate.
-
-By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument
-is provided, `peakflops` will use those many trials for picking the best timing.
-
-If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
-the worker processors. The flop rate of the entire parallel computer is returned. When
-running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
-of the problem that is solved on each processor.
-
-!!! compat "Julia 1.1"
-    This function requires at least Julia 1.1. In Julia 1.0 it is available from
-    the standard library `InteractiveUtils`.
-"""
-function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
-    t = zeros(Float64, ntrials)
-    for i=1:ntrials
-        a = ones(eltype,n,n)
-        t[i] = @elapsed a2 = a*a
-        @assert a2[1,1] == n
-    end
-
-    if parallel
-        let Distributed = Base.require(Base.PkgId(
-                Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers())))
-        end
-    else
-        return 2*Float64(n)^3 / minimum(t)
-    end
-end
-
-
-function versioninfo(io::IO=stdout)
-    indent = "  "
-    config = BLAS.get_config()
-    build_flags = join(string.(config.build_flags), ", ")
-    println(io, "BLAS: ", BLAS.libblastrampoline, " (", build_flags, ")")
-    for lib in config.loaded_libs
-        interface = uppercase(string(lib.interface))
-        println(io, indent, "--> ", lib.libname, " (", interface, ")")
-    end
-    println(io, "Threading:")
-    println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize())
-    println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid())
-    println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
-    println(io, "Relevant environment variables:")
-    env_var_names = [
-        "JULIA_NUM_THREADS",
-        "MKL_DYNAMIC",
-        "MKL_NUM_THREADS",
-         # OpenBLAS has a hierarchy of environment variables for setting the
-         # number of threads, see
-         # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
-        ("OPENBLAS_NUM_THREADS", "GOTO_NUM_THREADS", "OMP_NUM_THREADS"),
-    ]
-    printed_at_least_one_env_var = false
-    print_var(io, indent, name) = println(io, indent, name, " = ", ENV[name])
-    for name in env_var_names
-        if name isa Tuple
-            # If `name` is a Tuple, then find the first environment which is
-            # defined, and disregard the following ones.
-            for nm in name
-                if haskey(ENV, nm)
-                    print_var(io, indent, nm)
-                    printed_at_least_one_env_var = true
-                    break
-                end
-            end
-        else
-            if haskey(ENV, name)
-                print_var(io, indent, name)
-                printed_at_least_one_env_var = true
-            end
-        end
-    end
-    if !printed_at_least_one_env_var
-        println(io, indent, "[none]")
-    end
-    return nothing
-end
-
-function __init__()
-    try
-        BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true)
-        BLAS.check()
-    catch ex
-        Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
-    end
-    # register a hook to disable BLAS threading
-    Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
-
-    # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
-        @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
-        else
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
-        end
-    end
-end
-
-end # module LinearAlgebra
diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl
deleted file mode 100644
index 93358d052d50b..0000000000000
--- a/stdlib/LinearAlgebra/src/abstractq.jl
+++ /dev/null
@@ -1,575 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-abstract type AbstractQ{T} end
-
-struct AdjointQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
-    Q::S
-end
-
-parent(adjQ::AdjointQ) = adjQ.Q
-eltype(::Type{<:AbstractQ{T}}) where {T} = T
-ndims(::AbstractQ) = 2
-
-# inversion/adjoint/transpose
-inv(Q::AbstractQ) = Q'
-adjoint(Q::AbstractQ) = AdjointQ(Q)
-transpose(Q::AbstractQ{<:Real}) = AdjointQ(Q)
-transpose(Q::AbstractQ) = error("transpose not implemented for $(typeof(Q)). Consider using adjoint instead of transpose.")
-adjoint(adjQ::AdjointQ) = adjQ.Q
-
-# promotion with AbstractMatrix, at least for equal eltypes
-promote_rule(::Type{<:AbstractMatrix{T}}, ::Type{<:AbstractQ{T}}) where {T} =
-    (@inline; Union{AbstractMatrix{T},AbstractQ{T}})
-
-# conversion
-# the following eltype promotion should be defined for each subtype `QType`
-# convert(::Type{AbstractQ{T}}, Q::QType) where {T} = QType{T}(Q)
-# and then care has to be taken that
-# QType{T}(Q::QType{T}) where T = ...
-# is implemented as a no-op
-
-# the following conversion method ensures functionality when the above method is not defined
-# (as for HessenbergQ), but no eltype conversion is required either (say, in multiplication)
-convert(::Type{AbstractQ{T}}, Q::AbstractQ{T}) where {T} = Q
-convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ
-convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)'
-
-# ... to matrix
-collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q)
-Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix
-Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ))))
-Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
-Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
-Array(Q::AbstractQ) = Matrix(Q)
-convert(::Type{T}, Q::AbstractQ) where {T<:AbstractArray} = T(Q)
-# legacy
-@deprecate(convert(::Type{AbstractMatrix{T}}, Q::AbstractQ) where {T},
-    convert(LinearAlgebra.AbstractQ{T}, Q))
-
-function size(Q::AbstractQ, dim::Integer)
-    if dim < 1
-        throw(BoundsError())
-    elseif dim <= 2 # && 1 <= dim
-        return size(Q)[dim]
-    else # 2 < dim
-        return 1
-    end
-end
-size(adjQ::AdjointQ) = reverse(size(adjQ.Q))
-
-# comparison
-(==)(Q::AbstractQ, A::AbstractMatrix) = lmul!(Q, Matrix{eltype(Q)}(I, size(A))) == A
-(==)(A::AbstractMatrix, Q::AbstractQ) = Q == A
-(==)(Q::AbstractQ, P::AbstractQ) = Matrix(Q) == Matrix(P)
-isapprox(Q::AbstractQ, A::AbstractMatrix; kwargs...) =
-    isapprox(lmul!(Q, Matrix{eltype(Q)}(I, size(A))), A, kwargs...)
-isapprox(A::AbstractMatrix, Q::AbstractQ; kwargs...) = isapprox(Q, A, kwargs...)
-isapprox(Q::AbstractQ, P::AbstractQ; kwargs...) = isapprox(Matrix(Q), Matrix(P), kwargs...)
-
-# pseudo-array behaviour, required for indexing with `begin` or `end`
-axes(Q::AbstractQ) = map(Base.oneto, size(Q))
-axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1)
-
-copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
-copy(Q::AbstractQ) = copymutable(Q)
-
-# getindex
-@inline function getindex(Q::AbstractQ, inds...)
-    @boundscheck Base.checkbounds_indices(Bool, axes(Q), inds) || Base.throw_boundserror(Q, inds)
-    return _getindex(Q, inds...)
-end
-@inline getindex(Q::AbstractQ, ::Colon) = copymutable(Q)[:]
-@inline getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
-
-@inline _getindex(Q::AbstractQ, inds...) = @inbounds copymutable(Q)[inds...]
-@inline function _getindex(Q::AbstractQ, ::Colon, J::AbstractVector{<:Integer})
-    Y = zeros(eltype(Q), size(Q, 2), length(J))
-    @inbounds for (i,j) in enumerate(J)
-        Y[j,i] = oneunit(eltype(Q))
-    end
-    lmul!(Q, Y)
-end
-@inline _getindex(Q::AbstractQ, I::AbstractVector{Int}, J::AbstractVector{Int}) = @inbounds Q[:,J][I,:]
-@inline function _getindex(Q::AbstractQ, ::Colon, j::Int)
-    y = zeros(eltype(Q), size(Q, 2))
-    y[j] = oneunit(eltype(Q))
-    lmul!(Q, y)
-end
-@inline _getindex(Q::AbstractQ, i::Int, j::Int) = @inbounds Q[:,j][i]
-
-# needed because AbstractQ does not subtype AbstractMatrix
-qr(Q::AbstractQ{T}, arg...; kwargs...) where {T} = qr!(Matrix{_qreltype(T)}(Q), arg...; kwargs...)
-lq(Q::AbstractQ{T}, arg...; kwargs...) where {T} = lq!(Matrix{lq_eltype(T)}(Q), arg...; kwargs...)
-hessenberg(Q::AbstractQ{T}) where {T} = hessenberg!(Matrix{eigtype(T)}(Q))
-
-# needed when used interchangeably with AbstractMatrix (analogous to views of ranges)
-view(A::AbstractQ, I...) = getindex(A, I...)
-
-# specialization avoiding the fallback using slow `getindex`
-function copyto!(dest::AbstractMatrix, src::AbstractQ)
-    copyto!(dest, I)
-    lmul!(src, dest)
-end
-# needed to resolve method ambiguities
-function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
-    if perm == (1, 2)
-        copyto!(parent(dest), src)
-    else
-        @assert perm == (2, 1) # there are no other permutations of two indices
-        if T <: Real
-            copyto!(parent(dest), I)
-            lmul!(src', parent(dest))
-        else
-            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
-            tmp = similar(parent(dest))
-            copyto!(tmp, I)
-            rmul!(tmp, src)
-            permutedims!(parent(dest), tmp, (2, 1))
-        end
-    end
-    return dest
-end
-
-function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ)
-    print(io, Base.dims2string(size(Q)), ' ', summary(Q))
-end
-
-# multiplication
-# generically, treat AbstractQ like a matrix with its definite size
-qsize_check(Q::AbstractQ, B::AbstractVecOrMat) =
-    size(Q, 2) == size(B, 1) ||
-        throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))"))
-qsize_check(A::AbstractVecOrMat, Q::AbstractQ) =
-    size(A, 2) == size(Q, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))"))
-qsize_check(Q::AbstractQ, P::AbstractQ) =
-    size(Q, 2) == size(P, 1) ||
-        throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))"))
-
-(*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ
-function (*)(Q::AbstractQ, b::Number)
-    T = promote_type(eltype(Q), typeof(b))
-    lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q)))
-end
-function (*)(Q::AbstractQ, B::AbstractVector)
-    T = promote_type(eltype(Q), eltype(B))
-    qsize_check(Q, B)
-    mul!(similar(B, T, size(Q, 1)), convert(AbstractQ{T}, Q), B)
-end
-function (*)(Q::AbstractQ, B::AbstractMatrix)
-    T = promote_type(eltype(Q), eltype(B))
-    qsize_check(Q, B)
-    mul!(similar(B, T, (size(Q, 1), size(B, 2))), convert(AbstractQ{T}, Q), B)
-end
-
-(*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q
-function (*)(a::Number, Q::AbstractQ)
-    T = promote_type(typeof(a), eltype(Q))
-    rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q))
-end
-function (*)(A::AbstractVector, Q::AbstractQ)
-    T = promote_type(eltype(A), eltype(Q))
-    qsize_check(A, Q)
-    return mul!(similar(A, T, length(A)), A, convert(AbstractQ{T}, Q))
-end
-function (*)(A::AbstractMatrix, Q::AbstractQ)
-    T = promote_type(eltype(A), eltype(Q))
-    qsize_check(A, Q)
-    return mul!(similar(A, T, (size(A, 1), size(Q, 2))), A, convert(AbstractQ{T}, Q))
-end
-(*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')'
-
-### Q*Q (including adjoints)
-(*)(Q::AbstractQ, P::AbstractQ) = Q * (P*I)
-
-### mul!
-function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat,AbstractQ}) where {T}
-    require_one_based_indexing(C, B)
-    mB, nB = size(B, 1), size(B, 2)
-    mC, nC = size(C, 1), size(C, 2)
-    qsize_check(Q, B)
-    nB != nC && throw(DimensionMismatch())
-    if mB < mC
-        inds = CartesianIndices(axes(B))
-        copyto!(view(C, inds), B)
-        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
-        return lmul!(Q, C)
-    else
-        return lmul!(Q, copyto!(C, B))
-    end
-end
-function mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat, Q::AbstractQ{T}) where {T}
-    require_one_based_indexing(C, A)
-    mA, nA = size(A, 1), size(A, 2)
-    mC, nC = size(C, 1), size(C, 2)
-    mA != mC && throw(DimensionMismatch())
-    qsize_check(A, Q)
-    if nA < nC
-        inds = CartesianIndices(axes(A))
-        copyto!(view(C, inds), A)
-        C[CartesianIndices((axes(C, 1), nA+1:nC))] .= zero(T)
-        return rmul!(C, Q)
-    else
-        return rmul!(copyto!(C, A), Q)
-    end
-end
-
-### division
-\(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A
-/(A::AbstractVecOrMat, Q::AbstractQ) = A*Q'
-ldiv!(Q::AbstractQ, A::AbstractVecOrMat) = lmul!(Q', A)
-ldiv!(C::AbstractVecOrMat, Q::AbstractQ, A::AbstractVecOrMat) = mul!(C, Q', A)
-rdiv!(A::AbstractVecOrMat, Q::AbstractQ) = rmul!(A, Q')
-
-logabsdet(Q::AbstractQ) = (d = det(Q); return log(abs(d)), sign(d))
-function logdet(A::AbstractQ)
-    d, s = logabsdet(A)
-    return d + log(s)
-end
-
-###########################################################
-################ Q from QR decompositions #################
-###########################################################
-
-"""
-    QRPackedQ <: LinearAlgebra.AbstractQ
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
-[`QRPivoted`](@ref) format.
-"""
-struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-
-    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors, τ)
-        new{T,S,C}(factors, τ)
-    end
-end
-QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
-QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QRPackedQ{T,S,typeof(τ)}(factors, τ), false)
-
-"""
-    QRCompactWYQ <: LinearAlgebra.AbstractQ
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
-format.
-"""
-struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
-    factors::M
-    T::C
-
-    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors, T)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWYQ{S,M,typeof(T)}(factors, T), false)
-
-QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
-QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
-
-# override generic square fallback
-Matrix{T}(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {T,S} =
-    convert(Matrix{T}, lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
-Matrix(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {S} = Matrix{S}(Q)
-
-convert(::Type{AbstractQ{T}}, Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
-convert(::Type{AbstractQ{T}}, Q::QRCompactWYQ) where {T} = QRCompactWYQ{T}(Q)
-
-size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
-    size(Q.factors, dim == 2 ? 1 : dim)
-size(Q::Union{QRCompactWYQ,QRPackedQ}) = (n = size(Q.factors, 1); (n, n))
-
-## Multiplication
-### QB
-lmul!(A::QRCompactWYQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
-lmul!(A::QRPackedQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
-function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = min(mA,nA):-1:1
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = A.τ[k]*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
-
-### QcB
-lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'T', Q.factors, Q.T, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'C', Q.factors, Q.T, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.ormqr!('L', 'T', Q.factors, Q.τ, B))
-lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.ormqr!('L', 'C', Q.factors, Q.τ, B))
-function lmul!(adjA::AdjointQ{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    A = adjA.Q
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = 1:min(mA,nA)
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = conj(A.τ[k])*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
-
-### AQ
-rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
-    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
-rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
-    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
-function rmul!(A::AbstractVecOrMat, Q::QRPackedQ)
-    require_one_based_indexing(A)
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = 1:min(mQ,nQ)
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*Q.τ[k]
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
-
-### AQc
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'T', Q.factors, Q.T, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'C', Q.factors, Q.T, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A))
-function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ})
-    require_one_based_indexing(A)
-    Q = adjQ.Q
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = min(mQ,nQ):-1:1
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*conj(Q.τ[k])
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
-
-det(Q::QRPackedQ) = _det_tau(Q.τ)
-det(Q::QRCompactWYQ) =
-    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
-         1:size(Q.T, 1):size(Q.T, 2))
-
-_diagview(A) = @view A[diagind(A)]
-
-# Compute `det` from the number of Householder reflections.  Handle
-# the case `Q.τ` contains zeros.
-_det_tau(τs::AbstractVector{<:Real}) =
-    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
-
-# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
-# (where `c = v'v`) of each Householder reflector.  As we know that the
-# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
-# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
-# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
-# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
-_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
-
-###########################################################
-######## Q from Hessenberg decomposition ##################
-###########################################################
-
-"""
-    HessenbergQ <: AbstractQ
-
-Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
-a `HessenbergQ` object, which is an implicit representation of the unitary
-matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
-This `F.Q` object can be efficiently multiplied by matrices or vectors,
-and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
-"""
-struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
-    uplo::Char
-    factors::S
-    τ::W
-    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
-        new(uplo, factors, τ)
-    end
-end
-HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
-HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
-
-size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
-
-# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
-const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
-
-## reconstruct the original matrix
-Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
-Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
-
-lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-
-lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
-lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
-    (Q = adjQ.Q; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-
-lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
-lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
-
-# flexible left-multiplication (and adjoint right-multiplication)
-qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) =
-    size(B, 1) in size(Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
-qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) =
-    (Q = adjQ.Q; size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")))
-
-det(Q::HessenbergQ) = _det_tau(Q.τ)
-
-###########################################################
-################ Q from LQ decomposition ##################
-###########################################################
-
-struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-end
-
-LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
-@deprecate(AbstractMatrix{T}(Q::LQPackedQ) where {T},
-    convert(AbstractQ{T}, Q),
-    false)
-Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors), A.τ))
-convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
-
-# size(Q::LQPackedQ) yields the shape of Q's square form
-size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n)
-
-## Multiplication
-# out-of-place right application of LQPackedQs
-#
-# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
-# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
-# and if so effectively apply Q's square form to A without additional shenanigans; and
-# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
-# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
-# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
-# A as necessary for check (1) to pass (if possible) and then applying Q's square form
-
-qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) =
-    size(B, 1) in size(adjQ.Q.factors) ||
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))"))
-qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) =
-    size(A, 2) in size(Q.factors) ||
-        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
-
-# in-place right-application of LQPackedQs
-# these methods require that the applied-to matrix's (A's) number of columns
-# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
-# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
-# as its (nQ-by-nQ) square form)
-rmul!(A::StridedVecOrMat{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
-    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
-rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
-
-### QB / QcB
-lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
-lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
-
-# In LQ factorization, `Q` is expressed as the product of the adjoint of the
-# reflectors.  Thus, `det` has to be conjugated.
-det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
deleted file mode 100644
index 875e8cefcb66e..0000000000000
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ /dev/null
@@ -1,512 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
-
-# note that Adjoint and Transpose must be able to wrap not only vectors and matrices
-# but also factorizations, rotations, and other linear algebra objects, including
-# user-defined such objects. so do not restrict the wrapped type.
-"""
-    Adjoint
-
-Lazy wrapper type for an adjoint view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`.
-Usually, the `Adjoint` constructor should not be called directly, use [`adjoint`](@ref)
-instead. To materialize the view use [`copy`](@ref).
-
-This type is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 0 0]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> Adjoint(A)
-2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 3-2im  0+0im
- 9-2im  0+0im
-```
-"""
-struct Adjoint{T,S} <: AbstractMatrix{T}
-    parent::S
-end
-"""
-    Transpose
-
-Lazy wrapper type for a transpose view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`.
-Usually, the `Transpose` constructor should not be called directly, use [`transpose`](@ref)
-instead. To materialize the view use [`copy`](@ref).
-
-This type is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [2 3; 0 0]
-2×2 Matrix{Int64}:
- 2  3
- 0  0
-
-julia> Transpose(A)
-2×2 transpose(::Matrix{Int64}) with eltype Int64:
- 2  0
- 3  0
-```
-"""
-struct Transpose{T,S} <: AbstractMatrix{T}
-    parent::S
-end
-
-# basic outer constructors
-Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A)
-Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A)
-
-"""
-    adj_or_trans(::AbstractArray) -> adjoint|transpose|identity
-    adj_or_trans(::Type{<:AbstractArray}) -> adjoint|transpose|identity
-
-Return [`adjoint`](@ref) from an `Adjoint` type or object and
-[`transpose`](@ref) from a `Transpose` type or object. Otherwise,
-return [`identity`](@ref). Note that `Adjoint` and `Transpose` have
-to be the outer-most wrapper object for a non-`identity` function to be
-returned.
-"""
-adj_or_trans(::T) where {T<:AbstractArray} = adj_or_trans(T)
-adj_or_trans(::Type{<:AbstractArray}) = identity
-adj_or_trans(::Type{<:Adjoint}) = adjoint
-adj_or_trans(::Type{<:Transpose}) = transpose
-
-"""
-    inplace_adj_or_trans(::AbstractArray) -> adjoint!|transpose!|copyto!
-    inplace_adj_or_trans(::Type{<:AbstractArray}) -> adjoint!|transpose!|copyto!
-
-Return [`adjoint!`](@ref) from an `Adjoint` type or object and
-[`transpose!`](@ref) from a `Transpose` type or object. Otherwise,
-return [`copyto!`](@ref). Note that `Adjoint` and `Transpose` have
-to be the outer-most wrapper object for a non-`identity` function to be
-returned.
-"""
-inplace_adj_or_trans(::T) where {T <: AbstractArray} = inplace_adj_or_trans(T)
-inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto!
-inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint!
-inplace_adj_or_trans(::Type{<:Transpose}) = transpose!
-
-_unwrap(A::Adjoint)   = parent(A)
-_unwrap(A::Transpose) = parent(A)
-
-Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent)
-Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent))
-
-# wrapping lowercase quasi-constructors
-"""
-    A'
-    adjoint(A)
-
-Lazy adjoint (conjugate transposition). Note that `adjoint` is applied recursively to
-elements.
-
-For number types, `adjoint` returns the complex conjugate, and therefore it is equivalent to
-the identity function for real numbers.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims).
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 0  0]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> B = A' # equivalently adjoint(A)
-2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 3-2im  0+0im
- 9-2im  0+0im
-
-julia> B isa Adjoint
-true
-
-julia> adjoint(B) === A # the adjoint of an adjoint unwraps the parent
-true
-
-julia> Adjoint(B) # however, the constructor always wraps its argument
-2×2 adjoint(adjoint(::Matrix{Complex{Int64}})) with eltype Complex{Int64}:
- 3+2im  9+2im
- 0+0im  0+0im
-
-julia> B[1,2] = 4 + 5im; # modifying B will modify A automatically
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 4-5im  0+0im
-```
-
-For real matrices, the `adjoint` operation is equivalent to a `transpose`.
-
-```jldoctest
-julia> A = reshape([x for x in 1:4], 2, 2)
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-
-julia> A'
-2×2 adjoint(::Matrix{Int64}) with eltype Int64:
- 1  2
- 3  4
-
-julia> adjoint(A) == transpose(A)
-true
-```
-
-The adjoint of an `AbstractVector` is a row-vector:
-```jldoctest
-julia> x = [3, 4im]
-2-element Vector{Complex{Int64}}:
- 3 + 0im
- 0 + 4im
-
-julia> x'
-1×2 adjoint(::Vector{Complex{Int64}}) with eltype Complex{Int64}:
- 3+0im  0-4im
-
-julia> x'x # compute the dot product, equivalently x' * x
-25 + 0im
-```
-
-For a matrix of matrices, the individual blocks are recursively operated on:
-```jldoctest
-julia> A = reshape([x + im*x for x in 1:4], 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 1+1im  3+3im
- 2+2im  4+4im
-
-julia> C = reshape([A, 2A, 3A, 4A], 2, 2)
-2×2 Matrix{Matrix{Complex{Int64}}}:
- [1+1im 3+3im; 2+2im 4+4im]  [3+3im 9+9im; 6+6im 12+12im]
- [2+2im 6+6im; 4+4im 8+8im]  [4+4im 12+12im; 8+8im 16+16im]
-
-julia> C'
-2×2 adjoint(::Matrix{Matrix{Complex{Int64}}}) with eltype Adjoint{Complex{Int64}, Matrix{Complex{Int64}}}:
- [1-1im 2-2im; 3-3im 4-4im]    [2-2im 4-4im; 6-6im 8-8im]
- [3-3im 6-6im; 9-9im 12-12im]  [4-4im 8-8im; 12-12im 16-16im]
-```
-"""
-adjoint(A::AbstractVecOrMat) = Adjoint(A)
-
-"""
-    transpose(A)
-
-Lazy transpose. Mutating the returned object should appropriately mutate `A`. Often,
-but not always, yields `Transpose(A)`, where `Transpose` is a lazy transpose wrapper. Note
-that this operation is recursive.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims), which is non-recursive.
-
-# Examples
-```jldoctest
-julia> A = [3 2; 0 0]
-2×2 Matrix{Int64}:
- 3  2
- 0  0
-
-julia> B = transpose(A)
-2×2 transpose(::Matrix{Int64}) with eltype Int64:
- 3  0
- 2  0
-
-julia> B isa Transpose
-true
-
-julia> transpose(B) === A # the transpose of a transpose unwraps the parent
-true
-
-julia> Transpose(B) # however, the constructor always wraps its argument
-2×2 transpose(transpose(::Matrix{Int64})) with eltype Int64:
- 3  2
- 0  0
-
-julia> B[1,2] = 4; # modifying B will modify A automatically
-
-julia> A
-2×2 Matrix{Int64}:
- 3  2
- 4  0
-```
-
-For complex matrices, the `adjoint` operation is equivalent to a conjugate-transpose.
-```jldoctest
-julia> A = reshape([Complex(x, x) for x in 1:4], 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 1+1im  3+3im
- 2+2im  4+4im
-
-julia> adjoint(A) == conj(transpose(A))
-true
-```
-
-The `transpose` of an `AbstractVector` is a row-vector:
-```jldoctest
-julia> v = [1,2,3]
-3-element Vector{Int64}:
- 1
- 2
- 3
-
-julia> transpose(v) # returns a row-vector
-1×3 transpose(::Vector{Int64}) with eltype Int64:
- 1  2  3
-
-julia> transpose(v) * v # compute the dot product
-14
-```
-
-For a matrix of matrices, the individual blocks are recursively operated on:
-```jldoctest
-julia> C = [1 3; 2 4]
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-
-julia> D = reshape([C, 2C, 3C, 4C], 2, 2) # construct a block matrix
-2×2 Matrix{Matrix{Int64}}:
- [1 3; 2 4]  [3 9; 6 12]
- [2 6; 4 8]  [4 12; 8 16]
-
-julia> transpose(D) # blocks are recursively transposed
-2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
- [1 2; 3 4]   [2 4; 6 8]
- [3 6; 9 12]  [4 8; 12 16]
-```
-"""
-transpose(A::AbstractVecOrMat) = Transpose(A)
-
-# unwrapping lowercase quasi-constructors
-adjoint(A::Adjoint) = A.parent
-transpose(A::Transpose) = A.parent
-adjoint(A::Transpose{<:Real}) = A.parent
-transpose(A::Adjoint{<:Real}) = A.parent
-
-# printing
-function Base.showarg(io::IO, v::Adjoint, toplevel)
-    print(io, "adjoint(")
-    Base.showarg(io, parent(v), false)
-    print(io, ')')
-    toplevel && print(io, " with eltype ", eltype(v))
-    return nothing
-end
-function Base.showarg(io::IO, v::Transpose, toplevel)
-    print(io, "transpose(")
-    Base.showarg(io, parent(v), false)
-    print(io, ')')
-    toplevel && print(io, " with eltype ", eltype(v))
-    return nothing
-end
-
-# some aliases for internal convenience use
-const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
-const AdjointAbsVec{T} = Adjoint{T,<:AbstractVector}
-const AdjointAbsMat{T} = Adjoint{T,<:AbstractMatrix}
-const TransposeAbsVec{T} = Transpose{T,<:AbstractVector}
-const TransposeAbsMat{T} = Transpose{T,<:AbstractMatrix}
-const AdjOrTransAbsVec{T} = AdjOrTrans{T,<:AbstractVector}
-const AdjOrTransAbsMat{T} = AdjOrTrans{T,<:AbstractMatrix}
-
-# for internal use below
-wrapperop(_) = identity
-wrapperop(::Adjoint) = adjoint
-wrapperop(::Transpose) = transpose
-
-# the following fallbacks can be removed if Adjoint/Transpose are restricted to AbstractVecOrMat
-size(A::AdjOrTrans) = reverse(size(A.parent))
-axes(A::AdjOrTrans) = reverse(axes(A.parent))
-# AbstractArray interface, basic definitions
-length(A::AdjOrTrans) = length(A.parent)
-size(v::AdjOrTransAbsVec) = (1, length(v.parent))
-size(A::AdjOrTransAbsMat) = reverse(size(A.parent))
-axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...)
-axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
-IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
-IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
-@propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1]))
-@propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i)
-@propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
-@propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T
-@propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v)
-@propagate_inbounds setindex!(A::AdjOrTransAbsMat, x, i::Int, j::Int) = (setindex!(A.parent, wrapperop(A)(x), j, i); A)
-# AbstractArray interface, additional definitions to retain wrapper over vectors where appropriate
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, ::Colon, is::AbstractArray{Int}) = wrapperop(v)(v.parent[is])
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, ::Colon, ::Colon) = wrapperop(v)(v.parent[:])
-
-# conversion of underlying storage
-convert(::Type{Adjoint{T,S}}, A::Adjoint) where {T,S} = Adjoint{T,S}(convert(S, A.parent))::Adjoint{T,S}
-convert(::Type{Transpose{T,S}}, A::Transpose) where {T,S} = Transpose{T,S}(convert(S, A.parent))::Transpose{T,S}
-
-# Strides and pointer for transposed strided arrays — but only if the elements are actually stored in memory
-Base.strides(A::Adjoint{<:Real, <:AbstractVector}) = (stride(A.parent, 2), stride(A.parent, 1))
-Base.strides(A::Transpose{<:Any, <:AbstractVector}) = (stride(A.parent, 2), stride(A.parent, 1))
-# For matrices it's slightly faster to use reverse and avoid calling stride twice
-Base.strides(A::Adjoint{<:Real, <:AbstractMatrix}) = reverse(strides(A.parent))
-Base.strides(A::Transpose{<:Any, <:AbstractMatrix}) = reverse(strides(A.parent))
-
-Base.unsafe_convert(::Type{Ptr{T}}, A::Adjoint{<:Real, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
-Base.unsafe_convert(::Type{Ptr{T}}, A::Transpose{<:Any, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
-
-Base.elsize(::Type{<:Adjoint{<:Real, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
-Base.elsize(::Type{<:Transpose{<:Any, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
-
-# for vectors, the semantics of the wrapped and unwrapped types differ
-# so attempt to maintain both the parent and wrapper type insofar as possible
-similar(A::AdjOrTransAbsVec) = wrapperop(A)(similar(A.parent))
-similar(A::AdjOrTransAbsVec, ::Type{T}) where {T} = wrapperop(A)(similar(A.parent, Base.promote_op(wrapperop(A), T)))
-# for matrices, the semantics of the wrapped and unwrapped types are generally the same
-# and as you are allocating with similar anyway, you might as well get something unwrapped
-similar(A::AdjOrTrans) = similar(A.parent, eltype(A), axes(A))
-similar(A::AdjOrTrans, ::Type{T}) where {T} = similar(A.parent, T, axes(A))
-similar(A::AdjOrTrans, ::Type{T}, dims::Dims{N}) where {T,N} = similar(A.parent, T, dims)
-
-# AbstractMatrix{T} constructor for adjtrans vector: preserve wrapped type
-AbstractMatrix{T}(A::AdjOrTransAbsVec) where {T} = wrapperop(A)(AbstractVector{T}(A.parent))
-
-# sundry basic definitions
-parent(A::AdjOrTrans) = A.parent
-vec(v::TransposeAbsVec{<:Number}) = parent(v)
-vec(v::AdjointAbsVec{<:Real}) = parent(v)
-
-### concatenation
-# preserve Adjoint/Transpose wrapper around vectors
-# to retain the associated semantics post-concatenation
-hcat(avs::Union{Number,AdjointAbsVec}...) = _adjoint_hcat(avs...)
-hcat(tvs::Union{Number,TransposeAbsVec}...) = _transpose_hcat(tvs...)
-_adjoint_hcat(avs::Union{Number,AdjointAbsVec}...) = adjoint(vcat(map(adjoint, avs)...))
-_transpose_hcat(tvs::Union{Number,TransposeAbsVec}...) = transpose(vcat(map(transpose, tvs)...))
-typed_hcat(::Type{T}, avs::Union{Number,AdjointAbsVec}...) where {T} = adjoint(typed_vcat(T, map(adjoint, avs)...))
-typed_hcat(::Type{T}, tvs::Union{Number,TransposeAbsVec}...) where {T} = transpose(typed_vcat(T, map(transpose, tvs)...))
-# otherwise-redundant definitions necessary to prevent hitting the concat methods in LinearAlgebra/special.jl
-hcat(avs::Adjoint{<:Any,<:Vector}...) = _adjoint_hcat(avs...)
-hcat(tvs::Transpose{<:Any,<:Vector}...) = _transpose_hcat(tvs...)
-hcat(avs::Adjoint{T,Vector{T}}...) where {T} = _adjoint_hcat(avs...)
-hcat(tvs::Transpose{T,Vector{T}}...) where {T} = _transpose_hcat(tvs...)
-# TODO unify and allow mixed combinations
-
-
-### higher order functions
-# preserve Adjoint/Transpose wrapper around vectors
-# to retain the associated semantics post-map/broadcast
-#
-# note that the caller's operation f operates in the domain of the wrapped vectors' entries.
-# hence the adjoint->f->adjoint shenanigans applied to the parent vectors' entries.
-map(f, avs::AdjointAbsVec...) = adjoint(map((xs...) -> adjoint(f(adjoint.(xs)...)), parent.(avs)...))
-map(f, tvs::TransposeAbsVec...) = transpose(map((xs...) -> transpose(f(transpose.(xs)...)), parent.(tvs)...))
-quasiparentt(x) = parent(x); quasiparentt(x::Number) = x # to handle numbers in the defs below
-quasiparenta(x) = parent(x); quasiparenta(x::Number) = conj(x) # to handle numbers in the defs below
-broadcast(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
-broadcast(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
-# Hack to preserve behavior after #32122; this needs to be done with a broadcast style instead to support dotted fusion
-Broadcast.broadcast_preserving_zero_d(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
-Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
-# TODO unify and allow mixed combinations with a broadcast style
-
-
-### reductions
-# faster to sum the Array than to work through the wrapper (but only in commutative reduction ops as in Base/permuteddimsarray.jl)
-Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Transpose, dims::Colon) =
-    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
-Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Adjoint, dims::Colon) =
-    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
-# in prod, use fast path only in the commutative case to avoid surprises
-Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Transpose{<:Union{Real,Complex}}, dims::Colon) =
-    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
-Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Adjoint{<:Union{Real,Complex}}, dims::Colon) =
-    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
-# count allows for optimization only if the parent array has Bool eltype
-Base._count(::typeof(identity), A::Transpose{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
-Base._count(::typeof(identity), A::Adjoint{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
-Base._any(f, A::Transpose, ::Colon) = Base._any(f∘transpose, parent(A), :)
-Base._any(f, A::Adjoint, ::Colon) = Base._any(f∘adjoint, parent(A), :)
-Base._all(f, A::Transpose, ::Colon) = Base._all(f∘transpose, parent(A), :)
-Base._all(f, A::Adjoint, ::Colon) = Base._all(f∘adjoint, parent(A), :)
-# sum(A'; dims)
-Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::TransposeAbsMat) =
-    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::AdjointAbsMat) =
-    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::TransposeAbsMat{<:Union{Real,Complex}}) =
-    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
-Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::AdjointAbsMat{<:Union{Real,Complex}}) =
-    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
-
-switch_dim12(B::AbstractVector) = permutedims(B)
-switch_dim12(B::AbstractVector{<:Number}) = transpose(B) # avoid allocs due to permutedims
-switch_dim12(B::AbstractArray{<:Any,0}) = B
-switch_dim12(B::AbstractArray) = PermutedDimsArray(B, (2, 1, ntuple(Base.Fix1(+,2), ndims(B) - 2)...))
-
-### linear algebra
-
-(-)(A::Adjoint)   = Adjoint(  -A.parent)
-(-)(A::Transpose) = Transpose(-A.parent)
-
-tr(A::Adjoint) = adjoint(tr(parent(A)))
-tr(A::Transpose) = transpose(tr(parent(A)))
-
-## multiplication *
-
-function _dot_nonrecursive(u, v)
-    lu = length(u)
-    if lu != length(v)
-        throw(DimensionMismatch("first array has length $(lu) which does not match the length of the second, $(length(v))."))
-    end
-    if lu == 0
-        zero(eltype(u)) * zero(eltype(v))
-    else
-        sum(uu*vv for (uu, vv) in zip(u, v))
-    end
-end
-
-# Adjoint/Transpose-vector * vector
-*(u::AdjointAbsVec{<:Number}, v::AbstractVector{<:Number}) = dot(u.parent, v)
-*(u::TransposeAbsVec{T}, v::AbstractVector{T}) where {T<:Real} = dot(u.parent, v)
-*(u::AdjOrTransAbsVec, v::AbstractVector) = _dot_nonrecursive(u, v)
-
-
-# vector * Adjoint/Transpose-vector
-*(u::AbstractVector, v::AdjOrTransAbsVec) = broadcast(*, u, v)
-# Adjoint/Transpose-vector * Adjoint/Transpose-vector
-# (necessary for disambiguation with fallback methods in linalg/matmul)
-*(u::AdjointAbsVec, v::AdjointAbsVec) = throw(MethodError(*, (u, v)))
-*(u::TransposeAbsVec, v::TransposeAbsVec) = throw(MethodError(*, (u, v)))
-
-# AdjOrTransAbsVec{<:Any,<:AdjOrTransAbsVec} is a lazy conj vectors
-# We need to expand the combinations to avoid ambiguities
-(*)(u::TransposeAbsVec, v::AdjointAbsVec{<:Any,<:TransposeAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::AdjointAbsVec,   v::AdjointAbsVec{<:Any,<:TransposeAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::TransposeAbsVec, v::TransposeAbsVec{<:Any,<:AdjointAbsVec}) = _dot_nonrecursive(u, v)
-(*)(u::AdjointAbsVec,   v::TransposeAbsVec{<:Any,<:AdjointAbsVec}) = _dot_nonrecursive(u, v)
-
-## pseudoinversion
-pinv(v::AdjointAbsVec, tol::Real = 0) = pinv(v.parent, tol).parent
-pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent
-
-
-## left-division \
-\(u::AdjOrTransAbsVec, v::AdjOrTransAbsVec) = pinv(u) * v
-
-
-## right-division /
-/(u::AdjointAbsVec, A::AbstractMatrix) = adjoint(adjoint(A) \ u.parent)
-/(u::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A) \ u.parent)
-/(u::AdjointAbsVec, A::TransposeAbsMat) = adjoint(conj(A.parent) \ u.parent) # technically should be adjoint(copy(adjoint(copy(A))) \ u.parent)
-/(u::TransposeAbsVec, A::AdjointAbsMat) = transpose(conj(A.parent) \ u.parent) # technically should be transpose(copy(transpose(copy(A))) \ u.parent)
-
-## complex conjugate
-conj(A::Transpose) = adjoint(A.parent)
-conj(A::Adjoint) = transpose(A.parent)
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::AdjOrTrans,i::Integer,j::Integer,s::AbstractString)
-    Base.replace_in_print_matrix(parent(A), j, i, s)
-end
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
deleted file mode 100644
index 192272cc61e98..0000000000000
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ /dev/null
@@ -1,961 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Bidiagonal matrices
-struct Bidiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dv::V      # diagonal
-    ev::V      # sub/super diagonal
-    uplo::Char # upper bidiagonal ('U') or lower ('L')
-    function Bidiagonal{T,V}(dv, ev, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dv, ev)
-        if length(ev) != max(length(dv)-1, 0)
-            throw(DimensionMismatch("length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
-        end
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,V}(dv, ev, uplo)
-    end
-end
-function Bidiagonal{T,V}(dv, ev, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, char_uplo(uplo))
-end
-function Bidiagonal{T}(dv::AbstractVector, ev::AbstractVector, uplo::Union{Symbol,AbstractChar}) where {T}
-    Bidiagonal(convert(AbstractVector{T}, dv)::AbstractVector{T},
-               convert(AbstractVector{T}, ev)::AbstractVector{T},
-               uplo)
-end
-function Bidiagonal{T,V}(A::Bidiagonal) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(A.dv, A.ev, A.uplo)
-end
-
-"""
-    Bidiagonal(dv::V, ev::V, uplo::Symbol) where V <: AbstractVector
-
-Constructs an upper (`uplo=:U`) or lower (`uplo=:L`) bidiagonal matrix using the
-given diagonal (`dv`) and off-diagonal (`ev`) vectors. The result is of type `Bidiagonal`
-and provides efficient specialized linear solvers, but may be converted into a regular
-matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short). The length of `ev`
-must be one less than the length of `dv`.
-
-# Examples
-```jldoctest
-julia> dv = [1, 2, 3, 4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> ev = [7, 8, 9]
-3-element Vector{Int64}:
- 7
- 8
- 9
-
-julia> Bu = Bidiagonal(dv, ev, :U) # ev is on the first superdiagonal
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  7  ⋅  ⋅
- ⋅  2  8  ⋅
- ⋅  ⋅  3  9
- ⋅  ⋅  ⋅  4
-
-julia> Bl = Bidiagonal(dv, ev, :L) # ev is on the first subdiagonal
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅  ⋅
- 7  2  ⋅  ⋅
- ⋅  8  3  ⋅
- ⋅  ⋅  9  4
-```
-"""
-function Bidiagonal(dv::V, ev::V, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, uplo)
-end
-function Bidiagonal(dv::V, ev::V, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, uplo)
-end
-
-#To allow Bidiagonal's where the "dv" is Vector{T} and "ev" Vector{S},
-#where T and S can be promoted
-function Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
-    TS = promote_type(T,S)
-    return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
-end
-
-"""
-    Bidiagonal(A, uplo::Symbol)
-
-Construct a `Bidiagonal` matrix from the main diagonal of `A` and
-its first super- (if `uplo=:U`) or sub-diagonal (if `uplo=:L`).
-
-# Examples
-```jldoctest
-julia> A = [1 1 1 1; 2 2 2 2; 3 3 3 3; 4 4 4 4]
-4×4 Matrix{Int64}:
- 1  1  1  1
- 2  2  2  2
- 3  3  3  3
- 4  4  4  4
-
-julia> Bidiagonal(A, :U) # contains the main diagonal and first superdiagonal of A
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  1  ⋅  ⋅
- ⋅  2  2  ⋅
- ⋅  ⋅  3  3
- ⋅  ⋅  ⋅  4
-
-julia> Bidiagonal(A, :L) # contains the main diagonal and first subdiagonal of A
-4×4 Bidiagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅  ⋅
- 2  2  ⋅  ⋅
- ⋅  3  3  ⋅
- ⋅  ⋅  4  4
-```
-"""
-function Bidiagonal(A::AbstractMatrix, uplo::Symbol)
-    Bidiagonal(diag(A, 0), diag(A, uplo === :U ? 1 : -1), uplo)
-end
-
-
-Bidiagonal(A::Bidiagonal) = A
-Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A
-Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo)
-
-bidiagzero(::Bidiagonal{T}, i, j) where {T} = zero(T)
-function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
-    Tel = eltype(eltype(A.dv))
-    if i < j && A.uplo == 'U' #= top right zeros =#
-        return zeros(Tel, size(A.ev[i], 1), size(A.ev[j-1], 2))
-    elseif j < i && A.uplo == 'L' #= bottom left zeros =#
-        return zeros(Tel, size(A.ev[i-1], 1), size(A.ev[j], 2))
-    else
-        return zeros(Tel, size(A.dv[i], 1), size(A.dv[j], 2))
-    end
-end
-
-@inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    if i == j
-        return @inbounds isassigned(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds isassigned(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds isassigned(A.ev, j)
-    else
-        return true
-    end
-end
-
-@inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds A.dv[i]
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds A.ev[i]
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds A.ev[j]
-    else
-        return bidiagzero(A, i, j)
-    end
-end
-
-@inline function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.dv[i] = x
-    elseif A.uplo == 'U' && (i == j - 1)
-        @inbounds A.ev[i] = x
-    elseif A.uplo == 'L' && (i == j + 1)
-        @inbounds A.ev[j] = x
-    elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off the ",
-            "$(istriu(A) ? "upper" : "lower") bidiagonal band to a nonzero value ($x)")))
-    end
-    return x
-end
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Bidiagonal,i::Integer,j::Integer,s::AbstractString)
-    if A.uplo == 'U'
-        i==j || i==j-1 ? s : Base.replace_with_centered_mark(s)
-    else
-        i==j || i==j+1 ? s : Base.replace_with_centered_mark(s)
-    end
-end
-
-#Converting from Bidiagonal to dense Matrix
-function Matrix{T}(A::Bidiagonal) where T
-    n = size(A, 1)
-    B = Matrix{T}(undef, n, n)
-    n == 0 && return B
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i = 1:n - 1
-        B[i,i] = A.dv[i]
-        if A.uplo == 'U'
-            B[i,i+1] = A.ev[i]
-        else
-            B[i+1,i] = A.ev[i]
-        end
-    end
-    B[n,n] = A.dv[n]
-    return B
-end
-Matrix(A::Bidiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(A)
-Array(A::Bidiagonal) = Matrix(A)
-promote_rule(::Type{Matrix{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
-    @isdefined(T) && @isdefined(S) ? Matrix{promote_type(T,S)} : Matrix
-promote_rule(::Type{Matrix}, ::Type{<:Bidiagonal}) = Matrix
-
-#Converting from Bidiagonal to Tridiagonal
-function Tridiagonal{T}(A::Bidiagonal) where T
-    dv = convert(AbstractVector{T}, A.dv)
-    ev = convert(AbstractVector{T}, A.ev)
-    z = fill!(similar(ev), zero(T))
-    A.uplo == 'U' ? Tridiagonal(z, dv, ev) : Tridiagonal(ev, dv, z)
-end
-promote_rule(::Type{<:Tridiagonal{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
-    @isdefined(T) && @isdefined(S) ? Tridiagonal{promote_type(T,S)} : Tridiagonal
-promote_rule(::Type{<:Tridiagonal}, ::Type{<:Bidiagonal}) = Tridiagonal
-
-# When asked to convert Bidiagonal to AbstractMatrix{T}, preserve structure by converting to Bidiagonal{T} <: AbstractMatrix{T}
-AbstractMatrix{T}(A::Bidiagonal) where {T} = convert(Bidiagonal{T}, A)
-
-convert(::Type{T}, m::AbstractMatrix) where {T<:Bidiagonal} = m isa T ? m : T(m)::T
-
-similar(B::Bidiagonal, ::Type{T}) where {T} = Bidiagonal(similar(B.dv, T), similar(B.ev, T), B.uplo)
-similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(B.dv, T, dims)
-
-tr(B::Bidiagonal) = sum(B.dv)
-
-function kron(A::Diagonal, B::Bidiagonal)
-    # `_droplast!` is only guaranteed to work with `Vector`
-    kdv = _makevector(kron(diag(A), B.dv))
-    kev = _droplast!(_makevector(kron(diag(A), _pushzero(B.ev))))
-    Bidiagonal(kdv, kev, B.uplo)
-end
-
-###################
-# LAPACK routines #
-###################
-
-#Singular values
-svdvals!(M::Bidiagonal{<:BlasReal}) = LAPACK.bdsdc!(M.uplo, 'N', M.dv, M.ev)[1]
-function svd!(M::Bidiagonal{<:BlasReal}; full::Bool = false)
-    d, e, U, Vt, Q, iQ = LAPACK.bdsdc!(M.uplo, 'I', M.dv, M.ev)
-    SVD(U, d, Vt)
-end
-function svd(M::Bidiagonal; kw...)
-    svd!(copy(M), kw...)
-end
-
-####################
-# Generic routines #
-####################
-
-function show(io::IO, M::Bidiagonal)
-    # TODO: make this readable and one-line
-    summary(io, M)
-    print(io, ":\n diag:")
-    print_matrix(io, (M.dv)')
-    print(io, M.uplo == 'U' ? "\n super:" : "\n sub:")
-    print_matrix(io, (M.ev)')
-end
-
-size(M::Bidiagonal) = (length(M.dv), length(M.dv))
-function size(M::Bidiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.dv)
-    else
-        return 1
-    end
-end
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval ($func)(M::Bidiagonal) = Bidiagonal(($func)(M.dv), ($func)(M.ev), M.uplo)
-end
-
-adjoint(B::Bidiagonal) = Adjoint(B)
-transpose(B::Bidiagonal) = Transpose(B)
-adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(conj(B.dv), conj(B.ev), B.uplo == 'U' ? :L : :U)
-transpose(B::Bidiagonal{<:Number}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
-permutedims(B::Bidiagonal) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? 'L' : 'U')
-function permutedims(B::Bidiagonal, perm)
-    Base.checkdims_perm(B, B, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(B) : B
-end
-function Base.copy(aB::Adjoint{<:Any,<:Bidiagonal})
-    B = aB.parent
-    return Bidiagonal(map(x -> copy.(adjoint.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
-end
-function Base.copy(tB::Transpose{<:Any,<:Bidiagonal})
-    B = tB.parent
-    return Bidiagonal(map(x -> copy.(transpose.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
-end
-
-iszero(M::Bidiagonal) = iszero(M.dv) && iszero(M.ev)
-isone(M::Bidiagonal) = all(isone, M.dv) && iszero(M.ev)
-function istriu(M::Bidiagonal, k::Integer=0)
-    if M.uplo == 'U'
-        if k <= 0
-            return true
-        elseif k == 1
-            return iszero(M.dv)
-        else # k >= 2
-            return iszero(M.dv) && iszero(M.ev)
-        end
-    else # M.uplo == 'L'
-        if k <= -1
-            return true
-        elseif k == 0
-            return iszero(M.ev)
-        else # k >= 1
-            return iszero(M.ev) && iszero(M.dv)
-        end
-    end
-end
-function istril(M::Bidiagonal, k::Integer=0)
-    if M.uplo == 'U'
-        if k >= 1
-            return true
-        elseif k == 0
-            return iszero(M.ev)
-        else # k <= -1
-            return iszero(M.ev) && iszero(M.dv)
-        end
-    else # M.uplo == 'L'
-        if k >= 0
-            return true
-        elseif k == -1
-            return iszero(M.dv)
-        else # k <= -2
-            return iszero(M.dv) && iszero(M.ev)
-        end
-    end
-end
-isdiag(M::Bidiagonal) = iszero(M.ev)
-
-function tril!(M::Bidiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif M.uplo == 'U' && k < 0
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif k < -1
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'U' && k == 0
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'L' && k == -1
-        fill!(M.dv, zero(T))
-    end
-    return M
-end
-
-function triu!(M::Bidiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif M.uplo == 'L' && k > 0
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif k > 1
-        fill!(M.dv, zero(T))
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'L' && k == 0
-        fill!(M.ev, zero(T))
-    elseif M.uplo == 'U' && k == 1
-        fill!(M.dv, zero(T))
-    end
-    return M
-end
-
-function diag(M::Bidiagonal{T}, n::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.dv, length(M.dv)), M.dv)
-    elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
-    elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-function +(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo || length(A.dv) == 0
-        Bidiagonal(A.dv+B.dv, A.ev+B.ev, A.uplo)
-    else
-        newdv = A.dv+B.dv
-        Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.ev), newdv, typeof(newdv)(A.ev)) : (typeof(newdv)(A.ev), newdv, typeof(newdv)(B.ev)))...)
-    end
-end
-
-function -(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo || length(A.dv) == 0
-        Bidiagonal(A.dv-B.dv, A.ev-B.ev, A.uplo)
-    else
-        newdv = A.dv-B.dv
-        Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.ev), newdv, typeof(newdv)(A.ev)) : (typeof(newdv)(A.ev), newdv, typeof(newdv)(-B.ev)))...)
-    end
-end
-
--(A::Bidiagonal)=Bidiagonal(-A.dv,-A.ev,A.uplo)
-*(A::Bidiagonal, B::Number) = Bidiagonal(A.dv*B, A.ev*B, A.uplo)
-*(B::Number, A::Bidiagonal) = Bidiagonal(B*A.dv, B*A.ev, A.uplo)
-/(A::Bidiagonal, B::Number) = Bidiagonal(A.dv/B, A.ev/B, A.uplo)
-\(B::Number, A::Bidiagonal) = Bidiagonal(B\A.dv, B\A.ev, A.uplo)
-
-function ==(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo
-        return A.dv == B.dv && A.ev == B.ev
-    else
-        return iszero(A.ev) && iszero(B.ev) && A.dv == B.dv
-    end
-end
-
-const BandedMatrix = Union{Bidiagonal,Diagonal,Tridiagonal,SymTridiagonal} # or BiDiTriSym
-const BiTriSym = Union{Bidiagonal,Tridiagonal,SymTridiagonal}
-const BiTri = Union{Bidiagonal,Tridiagonal}
-@inline mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-
-function check_A_mul_B!_sizes(C, A, B)
-    mA, nA = size(A)
-    mB, nB = size(B)
-    mC, nC = size(C)
-    if mA != mC
-        throw(DimensionMismatch("first dimension of A, $mA, and first dimension of output C, $mC, must match"))
-    elseif nA != mB
-        throw(DimensionMismatch("second dimension of A, $nA, and first dimension of B, $mB, must match"))
-    elseif nB != nC
-        throw(DimensionMismatch("second dimension of output C, $nC, and second dimension of B, $nB, must match"))
-    end
-end
-
-# function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal
-# to avoid allocations in _mul! below (#24324, #24578)
-_diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du
-_diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev
-function _diag(A::Bidiagonal, k)
-    if k == 0
-        return A.dv
-    elseif (A.uplo == 'L' && k == -1) || (A.uplo == 'U' && k == 1)
-        return A.ev
-    else
-        return diag(A, k)
-    end
-end
-
-function _mul!(C::AbstractMatrix, A::BiTriSym, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    # We use `_rmul_or_fill!` instead of `_modify!` here since using
-    # `_modify!` in the following loop will not update the
-    # off-diagonal elements for non-zero beta.
-    _rmul_or_fill!(C, _add.beta)
-    iszero(_add.alpha) && return C
-    Al = _diag(A, -1)
-    Ad = _diag(A, 0)
-    Au = _diag(A, 1)
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2, 1])
-        C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2])
-        C[1,3] += _add(A[1,2]*B[2,3])
-        # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1])
-        C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2])
-        C[2,3] += _add(A[2,2]*B[2,3] + A[2,3]*B[3,3])
-        C[2,4] += _add(A[2,3]*B[3,4])
-        for j in 3:n-2
-            Ajj₋1   = Al[j-1]
-            Ajj     = Ad[j]
-            Ajj₊1   = Au[j]
-            Bj₋1j₋2 = Bl[j-2]
-            Bj₋1j₋1 = Bd[j-1]
-            Bj₋1j   = Bu[j-1]
-            Bjj₋1   = Bl[j-1]
-            Bjj     = Bd[j]
-            Bjj₊1   = Bu[j]
-            Bj₊1j   = Bl[j]
-            Bj₊1j₊1 = Bd[j+1]
-            Bj₊1j₊2 = Bu[j+1]
-            C[j,j-2]  += _add( Ajj₋1*Bj₋1j₋2)
-            C[j, j-1] += _add(Ajj₋1*Bj₋1j₋1 + Ajj*Bjj₋1)
-            C[j, j  ] += _add(Ajj₋1*Bj₋1j   + Ajj*Bjj       + Ajj₊1*Bj₊1j)
-            C[j, j+1] += _add(Ajj  *Bjj₊1   + Ajj₊1*Bj₊1j₊1)
-            C[j, j+2] += _add(Ajj₊1*Bj₊1j₊2)
-        end
-        # row before last of C
-        C[n-1,n-3] += _add(A[n-1,n-2]*B[n-2,n-3])
-        C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2] + A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1])
-        C[n-1,n  ] += _add(A[n-1,n-1]*B[n-1,n  ] + A[n-1,  n]*B[n  ,n  ])
-        # last row of C
-        C[n,n-2] += _add(A[n,n-1]*B[n-1,n-2])
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1])
-        C[n,n  ] += _add(A[n,n-1]*B[n-1,n  ] + A[n,n]*B[n,n  ])
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    iszero(n) && return C
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    _rmul_or_fill!(C, _add.beta)  # see the same use above
-    iszero(_add.alpha) && return C
-    Al = _diag(A, -1)
-    Ad = _diag(A, 0)
-    Au = _diag(A, 1)
-    Bd = B.diag
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,2]*B[2,2])
-        # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,3]*B[3,3])
-        for j in 3:n-2
-            C[j, j-1] += _add(Al[j-1]*Bd[j-1])
-            C[j, j  ] += _add(Ad[j  ]*Bd[j  ])
-            C[j, j+1] += _add(Au[j  ]*Bd[j+1])
-        end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,  n]*B[n  ,n  ])
-        # last row of C
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1])
-        C[n,n  ] += _add(A[n,n  ]*B[n,  n  ])
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, B)
-    nA = size(A,1)
-    nB = size(B,2)
-    if !(size(C,1) == size(B,1) == nA)
-        throw(DimensionMismatch("A has first dimension $nA, B has $(size(B,1)), C has $(size(C,1)) but all must match"))
-    end
-    if size(C,2) != nB
-        throw(DimensionMismatch("A has second dimension $nA, B has $(size(B,2)), C has $(size(C,2)) but all must match"))
-    end
-    iszero(nA) && return C
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    l = _diag(A, -1)
-    d = _diag(A, 0)
-    u = _diag(A, 1)
-    @inbounds begin
-        for j = 1:nB
-            b₀, b₊ = B[1, j], B[2, j]
-            _modify!(_add, d[1]*b₀ + u[1]*b₊, C, (1, j))
-            for i = 2:nA - 1
-                b₋, b₀, b₊ = b₀, b₊, B[i + 1, j]
-                _modify!(_add, l[i - 1]*b₋ + d[i]*b₀ + u[i]*b₊, C, (i, j))
-            end
-            _modify!(_add, l[nA - 1]*b₀ + d[nA]*b₊, C, (nA, j))
-        end
-    end
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    n = size(A,1)
-    m = size(B,2)
-    if n <= 3 || m <= 1
-        return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    end
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first and last column of C
-        B11 = Bd[1]
-        B21 = Bl[1]
-        Bmm = Bd[m]
-        Bm₋1m = Bu[m-1]
-        for i in 1:n
-            _modify!(_add, A[i,1] * B11 + A[i, 2] * B21, C, (i, 1))
-            _modify!(_add, A[i, m-1] * Bm₋1m + A[i, m] * Bmm, C, (i, m))
-        end
-        # middle columns of C
-        for j = 2:m-1
-            Bj₋1j = Bu[j-1]
-            Bjj = Bd[j]
-            Bj₊1j = Bl[j]
-            for i = 1:n
-                _modify!(_add, A[i, j-1] * Bj₋1j + A[i, j]*Bjj + A[i, j+1] * Bj₊1j, C, (i, j))
-            end
-        end
-    end # inbounds
-    C
-end
-
-function _mul!(C::AbstractMatrix, A::Diagonal, B::BiTriSym, _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
-    n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    _rmul_or_fill!(C, _add.beta)  # see the same use above
-    iszero(_add.alpha) && return C
-    Ad = A.diag
-    Bl = _diag(B, -1)
-    Bd = _diag(B, 0)
-    Bu = _diag(B, 1)
-    @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,1]*B[1,2])
-        # second row of C
-        C[2,1] += _add(A[2,2]*B[2,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,2]*B[2,3])
-        for j in 3:n-2
-            Ajj       = Ad[j]
-            C[j, j-1] += _add(Ajj*Bl[j-1])
-            C[j, j  ] += _add(Ajj*Bd[j])
-            C[j, j+1] += _add(Ajj*Bu[j])
-        end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,n-1]*B[n-1,n  ])
-        # last row of C
-        C[n,n-1] += _add(A[n,n]*B[n,n-1])
-        C[n,n  ] += _add(A[n,n]*B[n,n  ])
-    end # inbounds
-    C
-end
-
-function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(A, TS, size(A)), A, B)
-    return B.uplo == 'U' ? UpperTriangular(C) : C
-end
-
-function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(A, TS, size(A)), A, B)
-    return B.uplo == 'L' ? LowerTriangular(C) : C
-end
-
-function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(B, TS, size(B)), A, B)
-    return A.uplo == 'U' ? UpperTriangular(C) : C
-end
-
-function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    C = mul!(similar(B, TS, size(B)), A, B)
-    return A.uplo == 'L' ? LowerTriangular(C) : C
-end
-
-function *(A::Diagonal, B::SymTridiagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
-function *(A::SymTridiagonal, B::Diagonal)
-    TS = promote_op(*, eltype(A), eltype(B))
-    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
-    mul!(out, A, B)
-end
-
-function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(B, 1) == ny) || throw(DimensionMismatch())
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y)))
-        return dot(x[1], B.dv[1], y[1])
-    end
-    ev, dv = B.ev, B.dv
-    @inbounds if B.uplo == 'U'
-        x₀ = x[1]
-        r = dot(x[1], dv[1], y[1])
-        for j in 2:nx-1
-            x₋, x₀ = x₀, x[j]
-            r += dot(adjoint(ev[j-1])*x₋ + adjoint(dv[j])*x₀, y[j])
-        end
-        r += dot(adjoint(ev[nx-1])*x₀ + adjoint(dv[nx])*x[nx], y[nx])
-        return r
-    else # B.uplo == 'L'
-        x₀ = x[1]
-        x₊ = x[2]
-        r = dot(adjoint(dv[1])*x₀ + adjoint(ev[1])*x₊, y[1])
-        for j in 2:nx-1
-            x₀, x₊ = x₊, x[j+1]
-            r += dot(adjoint(dv[j])*x₀ + adjoint(ev[j])*x₊, y[j])
-        end
-        r += dot(x₊, dv[nx], y[nx])
-        return r
-    end
-end
-
-#Linear solvers
-#Generic solver using naive substitution
-ldiv!(A::Bidiagonal, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
-    require_one_based_indexing(c, A, b)
-    N = size(A, 2)
-    mb, nb = size(b, 1), size(b, 2)
-    if N != mb
-        throw(DimensionMismatch("second dimension of A, $N, does not match first dimension of b, $mb"))
-    end
-    mc, nc = size(c, 1), size(c, 2)
-    if mc != mb || nc != nb
-        throw(DimensionMismatch("size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
-    end
-
-    if N == 0
-        return copyto!(c, b)
-    end
-
-    zi = findfirst(iszero, A.dv)
-    isnothing(zi) || throw(SingularException(zi))
-
-    @inbounds for j in 1:nb
-        if A.uplo == 'L' #do colwise forward substitution
-            c[1,j] = bi1 = A.dv[1] \ b[1,j]
-            for i in 2:N
-                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i - 1] * bi1)
-            end
-        else #do colwise backward substitution
-            c[N,j] = bi1 = A.dv[N] \ b[N,j]
-            for i in (N - 1):-1:1
-                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i] * bi1)
-            end
-        end
-    end
-    return c
-end
-ldiv!(A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(c::AbstractVecOrMat, A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (t = adj_or_trans(A); _rdiv!(t(c), t(b), t(A)); return c)
-
-### Generic promotion methods and fallbacks
-\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(_initarray(\, eltype(A), eltype(B), B), A, B)
-\(xA::AdjOrTrans{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(xA) \ B
-
-### Triangular specializations
-for tri in (:UpperTriangular, :UnitUpperTriangular)
-    @eval function \(B::Bidiagonal, U::$tri)
-        A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-    @eval function \(U::$tri, B::Bidiagonal)
-        A = ldiv!(_initarray(\, eltype(U), eltype(B), U), U, B)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-end
-for tri in (:LowerTriangular, :UnitLowerTriangular)
-    @eval function \(B::Bidiagonal, L::$tri)
-        A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-    @eval function \(L::$tri, B::Bidiagonal)
-        A = ldiv!(_initarray(\, eltype(L), eltype(B), L), L, B)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-end
-
-### Diagonal specialization
-function \(B::Bidiagonal, D::Diagonal)
-    A = ldiv!(_initarray(\, eltype(B), eltype(D), D), B, D)
-    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
-end
-
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    mc, nc = size(C)
-    if mc != m || nc != n
-        throw(DimensionMismatch("expect output to have size ($m, $n), but got ($mc, $nc)"))
-    end
-
-    zi = findfirst(iszero, B.dv)
-    isnothing(zi) || throw(SingularException(zi))
-
-    if B.uplo == 'L'
-        diagB = B.dv[n]
-        for i in 1:m
-            C[i,n] = A[i,n] / diagB
-        end
-        for j in n-1:-1:1
-            diagB = B.dv[j]
-            offdiagB = B.ev[j]
-            for i in 1:m
-                C[i,j] = (A[i,j] - C[i,j+1]*offdiagB)/diagB
-            end
-        end
-    else
-        diagB = B.dv[1]
-        for i in 1:m
-            C[i,1] = A[i,1] / diagB
-        end
-        for j in 2:n
-            diagB = B.dv[j]
-            offdiagB = B.ev[j-1]
-            for i = 1:m
-                C[i,j] = (A[i,j] - C[i,j-1]*offdiagB)/diagB
-            end
-        end
-    end
-    C
-end
-rdiv!(A::AbstractMatrix, B::Bidiagonal) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) =
-    (t = adj_or_trans(B); ldiv!(t(C), t(B), t(A)); return C)
-
-/(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(_initarray(/, eltype(A), eltype(B), A), A, B)
-
-### Triangular specializations
-for tri in (:UpperTriangular, :UnitUpperTriangular)
-    @eval function /(U::$tri, B::Bidiagonal)
-        A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-    @eval function /(B::Bidiagonal, U::$tri)
-        A = _rdiv!(_initarray(/, eltype(B), eltype(U), U), B, U)
-        return B.uplo == 'U' ? UpperTriangular(A) : A
-    end
-end
-for tri in (:LowerTriangular, :UnitLowerTriangular)
-    @eval function /(L::$tri, B::Bidiagonal)
-        A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-    @eval function /(B::Bidiagonal, L::$tri)
-        A = _rdiv!(_initarray(/, eltype(B), eltype(L), L), B, L)
-        return B.uplo == 'L' ? LowerTriangular(A) : A
-    end
-end
-
-### Diagonal specialization
-function /(D::Diagonal, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(D), eltype(B), D), D, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
-end
-
-/(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = A / copy(B)
-/(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = A / copy(B)
-# disambiguation
-/(A::AdjointAbsVec, B::Bidiagonal) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Bidiagonal) = transpose(transpose(B) \ parent(A))
-/(A::AdjointAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
-/(A::AdjointAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
-
-factorize(A::Bidiagonal) = A
-function inv(B::Bidiagonal{T}) where T
-    n = size(B, 1)
-    dest = zeros(typeof(inv(oneunit(T))), (n, n))
-    ldiv!(dest, B, Diagonal{typeof(one(T)/one(T))}(I, n))
-    return B.uplo == 'U' ? UpperTriangular(dest) : LowerTriangular(dest)
-end
-
-# Eigensystems
-eigvals(M::Bidiagonal) = copy(M.dv)
-function eigvecs(M::Bidiagonal{T}) where T
-    n = length(M.dv)
-    Q = Matrix{T}(undef, n,n)
-    blks = [0; findall(iszero, M.ev); n]
-    v = zeros(T, n)
-    if M.uplo == 'U'
-        for idx_block = 1:length(blks) - 1, i = blks[idx_block] + 1:blks[idx_block + 1] #index of eigenvector
-            fill!(v, zero(T))
-            v[blks[idx_block] + 1] = one(T)
-            for j = blks[idx_block] + 1:i - 1 #Starting from j=i, eigenvector elements will be 0
-                v[j+1] = (M.dv[i] - M.dv[j])/M.ev[j] * v[j]
-            end
-            c = norm(v)
-            for j = 1:n
-                Q[j, i] = v[j] / c
-            end
-        end
-    else
-        for idx_block = 1:length(blks) - 1, i = blks[idx_block + 1]:-1:blks[idx_block] + 1 #index of eigenvector
-            fill!(v, zero(T))
-            v[blks[idx_block+1]] = one(T)
-            for j = (blks[idx_block+1] - 1):-1:max(1, (i - 1)) #Starting from j=i, eigenvector elements will be 0
-                v[j] = (M.dv[i] - M.dv[j+1])/M.ev[j] * v[j+1]
-            end
-            c = norm(v)
-            for j = 1:n
-                Q[j, i] = v[j] / c
-            end
-        end
-    end
-    Q #Actually Triangular
-end
-eigen(M::Bidiagonal) = Eigen(eigvals(M), eigvecs(M))
-
-Base._sum(A::Bidiagonal, ::Colon) = sum(A.dv) + sum(A.ev)
-function Base._sum(A::Bidiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.dv)
-    if n == 0
-        # Just to be sure. This shouldn't happen since there is a check whether
-        # length(A.dv) == length(A.ev) + 1 in the constructor.
-        return res
-    elseif n == 1
-        res[1] = A.dv[1]
-        return res
-    end
-    @inbounds begin
-        if (dims == 1 && A.uplo == 'U') || (dims == 2 && A.uplo == 'L')
-            res[1] = A.dv[1]
-            for i = 2:length(A.dv)
-                res[i] = A.ev[i-1] + A.dv[i]
-            end
-        elseif (dims == 1 && A.uplo == 'L') || (dims == 2 && A.uplo == 'U')
-            for i = 1:length(A.dv)-1
-                res[i] = A.ev[i] + A.dv[i]
-            end
-            res[end] = A.dv[end]
-        elseif dims >= 3
-            if A.uplo == 'U'
-                for i = 1:length(A.dv)-1
-                    res[i,i]   = A.dv[i]
-                    res[i,i+1] = A.ev[i]
-                end
-            else
-                for i = 1:length(A.dv)-1
-                    res[i,i]   = A.dv[i]
-                    res[i+1,i] = A.ev[i]
-                end
-            end
-            res[end,end] = A.dv[end]
-        end
-    end
-    res
-end
diff --git a/stdlib/LinearAlgebra/src/bitarray.jl b/stdlib/LinearAlgebra/src/bitarray.jl
deleted file mode 100644
index d1857c3c38659..0000000000000
--- a/stdlib/LinearAlgebra/src/bitarray.jl
+++ /dev/null
@@ -1,272 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function dot(x::BitVector, y::BitVector)
-    # simplest way to mimic Array dot behavior
-    length(x) == length(y) || throw(DimensionMismatch())
-    s = 0
-    xc = x.chunks
-    yc = y.chunks
-    @inbounds for i = 1:length(xc)
-        s += count_ones(xc[i] & yc[i])
-    end
-    s
-end
-
-## slower than the unpacked version, which is MUCH slower
-#  than blas'd (this one saves storage though, keeping it commented
-#  just in case)
-#function aTb(A::BitMatrix, B::BitMatrix)
-    #(mA, nA) = size(A)
-    #(mB, nB) = size(B)
-    #C = falses(nA, nB)
-    #if mA != mB; throw(DimensionMismatch()) end
-    #if mA == 0; return C; end
-    #col_ch = num_bit_chunks(mA)
-    ## TODO: avoid using aux chunks and copy (?)
-    #aux_chunksA = zeros(UInt64, col_ch)
-    #aux_chunksB = [zeros(UInt64, col_ch) for j=1:nB]
-    #for j = 1:nB
-        #Base.copy_chunks!(aux_chunksB[j], 1, B.chunks, (j-1)*mA+1, mA)
-    #end
-    #for i = 1:nA
-        #Base.copy_chunks!(aux_chunksA, 1, A.chunks, (i-1)*mA+1, mA)
-        #for j = 1:nB
-            #for k = 1:col_ch
-                ## TODO: improve
-                #C[i, j] += count_ones(aux_chunksA[k] & aux_chunksB[j][k])
-            #end
-        #end
-    #end
-    #C
-#end
-
-#aCb(A::BitMatrix{T}, B::BitMatrix{S}) where {T,S} = aTb(A, B)
-
-function triu(B::BitMatrix, k::Integer=0)
-    m,n = size(B)
-    if !(-m + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least",
-            "$(-m + 1) and at most $(n + 1) in an $m-by-$n matrix")))
-    end
-    A = falses(m,n)
-    Ac = A.chunks
-    Bc = B.chunks
-    for i = max(k+1,1):n
-        j = clamp((i - 1) * m + 1, 1, i * m)
-        Base.copy_chunks!(Ac, j, Bc, j, min(i-k, m))
-    end
-    A
-end
-
-function tril(B::BitMatrix, k::Integer=0)
-    m,n = size(B)
-    if !(-m - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-m - 1) and at most $(n - 1) in an $m-by-$n matrix")))
-    end
-    A = falses(m, n)
-    Ac = A.chunks
-    Bc = B.chunks
-    for i = 1:min(n, m+k)
-        j = clamp((i - 1) * m + i - k, 1, i * m)
-        Base.copy_chunks!(Ac, j, Bc, j, max(m-i+k+1, 0))
-    end
-    A
-end
-
-## diag
-
-function diag(B::BitMatrix)
-    n = minimum(size(B))
-    v = similar(B, n)
-    for i = 1:n
-        v[i] = B[i,i]
-    end
-    v
-end
-
-## norm and rank
-
-svd(A::BitMatrix) = svd(float(A))
-qr(A::BitMatrix) = qr(float(A))
-
-## kron
-
-@inline function kron!(R::BitVector, a::BitVector, b::BitVector)
-    m = length(a)
-    n = length(b)
-    @boundscheck length(R) == n*m || throw(DimensionMismatch())
-    Rc = R.chunks
-    bc = b.chunks
-    for j = 1:m
-        a[j] && Base.copy_chunks!(Rc, (j-1)*n+1, bc, 1, n)
-    end
-    return R
-end
-
-function kron(a::BitVector, b::BitVector)
-    m = length(a)
-    n = length(b)
-    R = falses(n * m)
-    return @inbounds kron!(R, a, b)
-end
-
-function kron!(R::BitMatrix, a::BitMatrix, b::BitMatrix)
-    mA,nA = size(a)
-    mB,nB = size(b)
-    @boundscheck size(R) == (mA*mB, nA*nB) || throw(DimensionMismatch())
-
-    for i = 1:mA
-        ri = (1:mB) .+ ((i-1)*mB)
-        for j = 1:nA
-            if a[i,j]
-                rj = (1:nB) .+ ((j-1)*nB)
-                R[ri,rj] = b
-            end
-        end
-    end
-    return R
-end
-
-function kron(a::BitMatrix, b::BitMatrix)
-    mA,nA = size(a)
-    mB,nB = size(b)
-    R = falses(mA*mB, nA*nB)
-    return @inbounds kron!(R, a, b)
-end
-
-## Structure query functions
-
-issymmetric(A::BitMatrix) = size(A, 1)==size(A, 2) && count(!iszero, A - copy(A'))==0
-ishermitian(A::BitMatrix) = issymmetric(A)
-
-function nonzero_chunks(chunks::Vector{UInt64}, pos0::Int, pos1::Int)
-    k0, l0 = Base.get_chunks_id(pos0)
-    k1, l1 = Base.get_chunks_id(pos1)
-
-    delta_k = k1 - k0
-
-    z = UInt64(0)
-    u = ~z
-    if delta_k == 0
-        msk_0 = (u << l0) & ~(u << l1 << 1)
-    else
-        msk_0 = (u << l0)
-        msk_1 = ~(u << l1 << 1)
-    end
-
-    @inbounds begin
-        (chunks[k0] & msk_0) == z || return true
-        delta_k == 0 && return false
-        for i = k0 + 1 : k1 - 1
-            chunks[i] == z || return true
-        end
-        (chunks[k1] & msk_1)==z || return true
-    end
-    return false
-end
-
-function istriu(A::BitMatrix)
-    m, n = size(A)
-    for j = 1:min(n,m-1)
-        stride = (j-1) * m
-        nonzero_chunks(A.chunks, stride+j+1, stride+m) && return false
-    end
-    return true
-end
-
-function istril(A::BitMatrix)
-    m, n = size(A)
-    (m == 0 || n == 0) && return true
-    for j = 2:n
-        stride = (j-1) * m
-        nonzero_chunks(A.chunks, stride+1, stride+min(j-1,m)) && return false
-    end
-    return true
-end
-
-# fast 8x8 bit transpose from Henry S. Warrens's "Hacker's Delight"
-# http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
-function transpose8x8(x::UInt64)
-    y = x
-    t = xor(y, y >>> 7) & 0x00aa00aa00aa00aa
-    y = xor(y, t, t << 7)
-    t = xor(y, y >>> 14) & 0x0000cccc0000cccc
-    y = xor(y, t, t << 14)
-    t = xor(y, y >>> 28) & 0x00000000f0f0f0f0
-    return xor(y, t, t << 28)
-end
-
-function form_8x8_chunk(Bc::Vector{UInt64}, i1::Int, i2::Int, m::Int, cgap::Int, cinc::Int, nc::Int, msk8::UInt64)
-    x = UInt64(0)
-
-    k, l = Base.get_chunks_id(i1 + (i2 - 1) * m)
-    r = 0
-    for j = 1:8
-        k > nc && break
-        x |= ((Bc[k] >>> l) & msk8) << r
-        if l + 8 >= 64 && nc > k
-            r0 = 8 - Base._mod64(l + 8)
-            x |= (Bc[k + 1] & (msk8 >>> r0)) << (r + r0)
-        end
-        k += cgap + (l + cinc >= 64 ? 1 : 0)
-        l = Base._mod64(l + cinc)
-        r += 8
-    end
-    return x
-end
-
-# note: assumes B is filled with 0's
-function put_8x8_chunk(Bc::Vector{UInt64}, i1::Int, i2::Int, x::UInt64, m::Int, cgap::Int, cinc::Int, nc::Int, msk8::UInt64)
-    k, l = Base.get_chunks_id(i1 + (i2 - 1) * m)
-    r = 0
-    for j = 1:8
-        k > nc && break
-        Bc[k] |= ((x >>> r) & msk8) << l
-        if l + 8 >= 64 && nc > k
-            r0 = 8 - Base._mod64(l + 8)
-            Bc[k + 1] |= ((x >>> (r + r0)) & (msk8 >>> r0))
-        end
-        k += cgap + (l + cinc >= 64 ? 1 : 0)
-        l = Base._mod64(l + cinc)
-        r += 8
-    end
-    return
-end
-
-adjoint(B::Union{BitVector,BitMatrix}) = Adjoint(B)
-transpose(B::Union{BitVector,BitMatrix}) = Transpose(B)
-Base.copy(B::Adjoint{Bool,BitMatrix}) = transpose!(falses(size(B)), B.parent)
-Base.copy(B::Transpose{Bool,BitMatrix}) = transpose!(falses(size(B)), B.parent)
-function transpose!(C::BitMatrix, B::BitMatrix)
-    @boundscheck size(C) == reverse(size(B)) || throw(DimensionMismatch())
-    l1, l2 = size(B)
-
-    cgap1, cinc1 = Base._div64(l1), Base._mod64(l1)
-    cgap2, cinc2 = Base._div64(l2), Base._mod64(l2)
-
-    Bc = B.chunks
-    Cc = C.chunks
-
-    nc = length(Bc)
-
-    for i = 1:8:l1
-        msk8_1 = UInt64(0xff)
-        if (l1 < i + 7)
-            msk8_1 >>>= i + 7 - l1
-        end
-
-        for j = 1:8:l2
-            x = form_8x8_chunk(Bc, i, j, l1, cgap1, cinc1, nc, msk8_1)
-            x = transpose8x8(x)
-
-            msk8_2 = UInt64(0xff)
-            if (l2 < j + 7)
-                msk8_2 >>>= j + 7 - l2
-            end
-
-            put_8x8_chunk(Cc, j, i, x, l2, cgap2, cinc2, nc, msk8_2)
-        end
-    end
-    return C
-end
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
deleted file mode 100644
index 8da19baee5045..0000000000000
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ /dev/null
@@ -1,2140 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Interface to BLAS subroutines.
-"""
-module BLAS
-
-import Base: copyto!
-using Base: require_one_based_indexing, USE_BLAS64
-
-export
-# Note: `xFUNC_NAME` is a placeholder for not exported BLAS functions
-#   ref: http://www.netlib.org/blas/blasqr.pdf
-# Level 1
-    # xROTG
-    # xROTMG
-    rot!,
-    # xROTM
-    # xSWAP
-    scal!,
-    scal,
-    blascopy!,
-    # xAXPY!,
-    # xAXPBY!,
-    # xDOT
-    dotc,
-    dotu,
-    # xxDOT
-    nrm2,
-    asum,
-    iamax,
-# Level 2
-    gemv!,
-    gemv,
-    gbmv!,
-    gbmv,
-    hemv!,
-    hemv,
-    # xHBMV
-    hpmv!,
-    symv!,
-    symv,
-    sbmv!,
-    sbmv,
-    spmv!,
-    trmv!,
-    trmv,
-    # xTBMV
-    # xTPMV
-    trsv!,
-    trsv,
-    # xTBSV
-    # xTPSV
-    ger!,
-    # xGERU
-    # xGERC
-    her!,
-    # xHPR
-    # xHER2
-    # xHPR2
-    syr!,
-    spr!,
-    # xSYR2
-    # xSPR2
-# Level 3
-    gemm!,
-    gemm,
-    symm!,
-    symm,
-    hemm!,
-    hemm,
-    syrk!,
-    syrk,
-    herk!,
-    herk,
-    syr2k!,
-    syr2k,
-    her2k!,
-    her2k,
-    trmm!,
-    trmm,
-    trsm!,
-    trsm
-
-using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1
-
-include("lbt.jl")
-
-# Legacy bindings that some packages (such as NNlib.jl) use.
-# We maintain these for backwards-compatibility but new packages
-# should not look at these, instead preferring to parse the output
-# of BLAS.get_config()
-const libblas = libblastrampoline
-const liblapack = libblastrampoline
-
-vendor() = :lbt
-
-"""
-    get_config()
-
-Return an object representing the current `libblastrampoline` configuration.
-
-!!! compat "Julia 1.7"
-    `get_config()` requires at least Julia 1.7.
-"""
-get_config() = lbt_get_config()
-
-if USE_BLAS64
-    macro blasfunc(x)
-        return Expr(:quote, Symbol(x, "64_"))
-    end
-else
-    macro blasfunc(x)
-        return Expr(:quote, x)
-    end
-end
-
-_tryparse_env_int(key) = tryparse(Int, get(ENV, key, ""))
-
-
-"""
-    set_num_threads(n::Integer)
-    set_num_threads(::Nothing)
-
-Set the number of threads the BLAS library should use equal to `n::Integer`.
-
-Also accepts `nothing`, in which case julia tries to guess the default number of threads.
-Passing `nothing` is discouraged and mainly exists for historical reasons.
-"""
-set_num_threads(nt::Integer)::Nothing = lbt_set_num_threads(Int32(nt))
-function set_num_threads(::Nothing)
-    nt = something(
-        _tryparse_env_int("OPENBLAS_NUM_THREADS"),
-        _tryparse_env_int("OMP_NUM_THREADS"),
-        _tryparse_env_int("VECLIB_MAXIMUM_THREADS"),
-        max(1, Sys.CPU_THREADS ÷ 2),
-    )
-    return set_num_threads(nt)
-end
-
-"""
-    get_num_threads()
-
-Get the number of threads the BLAS library is using.
-
-!!! compat "Julia 1.6"
-    `get_num_threads` requires at least Julia 1.6.
-"""
-get_num_threads()::Int = lbt_get_num_threads()
-
-function check()
-    # TODO: once we have bitfields of the BLAS functions that are actually forwarded,
-    # ensure that we have a complete set here (warning on an incomplete BLAS implementation)
-    config = get_config()
-
-    # Ensure that one of our loaded libraries satisfies our interface requirement
-    interface = USE_BLAS64 ? :ilp64 : :lp64
-    if !any(lib.interface == interface for lib in config.loaded_libs)
-        interfacestr = uppercase(string(interface))
-        @error("No loaded BLAS libraries were built with $(interfacestr) support")
-        println("Quitting.")
-        exit()
-    end
-end
-
-"Check that upper/lower (for special matrices) is correctly specified"
-function chkuplo(uplo::AbstractChar)
-    if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    uplo
-end
-
-# Level 1
-# A help function to pick the pointer and inc for 1d like inputs.
-@inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing)
-    Base._checkcontiguous(Bool, x) && return pointer(x), 1 # simplify runtime check when possible
-    st, ptr = checkedstride(x), pointer(x)
-    isnothing(stride0check) || (st == 0 && throw(stride0check))
-    ptr += min(st, 0) * sizeof(eltype(x)) * (length(x) - 1)
-    ptr, st
-end
-function checkedstride(x::AbstractArray)
-    szs::Dims = size(x)
-    sts::Dims = strides(x)
-    _, st, n = Base.merge_adjacent_dim(szs, sts)
-    n === ndims(x) && return st
-    throw(ArgumentError("only support vector like inputs"))
-end
-## copy
-
-"""
-    blascopy!(n, X, incx, Y, incy)
-
-Copy `n` elements of array `X` with stride `incx` to array `Y` with stride `incy`. Returns `Y`.
-"""
-function blascopy! end
-
-for (fname, elty) in ((:dcopy_,:Float64),
-                      (:scopy_,:Float32),
-                      (:zcopy_,:ComplexF64),
-                      (:ccopy_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-        function blascopy!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, DX, incx, DY, incy)
-            DY
-        end
-    end
-end
-
-
-## rot
-
-"""
-    rot!(n, X, incx, Y, incy, c, s)
-
-Overwrite `X` with `c*X + s*Y` and `Y` with `-conj(s)*X + c*Y` for the first `n` elements of array `X` with stride `incx` and
-first `n` elements of array `Y` with stride `incy`. Returns `X` and `Y`.
-
-!!! compat "Julia 1.5"
-    `rot!` requires at least Julia 1.5.
-"""
-function rot! end
-
-for (fname, elty, cty, sty, lib) in ((:drot_, :Float64, :Float64, :Float64, libblastrampoline),
-                                     (:srot_, :Float32, :Float32, :Float32, libblastrampoline),
-                                     (:zdrot_, :ComplexF64, :Float64, :Float64, libblastrampoline),
-                                     (:csrot_, :ComplexF32, :Float32, :Float32, libblastrampoline),
-                                     (:zrot_, :ComplexF64, :Float64, :ComplexF64, libblastrampoline),
-                                     (:crot_, :ComplexF32, :Float32, :ComplexF32, libblastrampoline))
-    @eval begin
-        # SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
-        function rot!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer, C::$cty, S::$sty)
-            ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$cty}, Ref{$sty}),
-                 n, DX, incx, DY, incy, C, S)
-            DX, DY
-        end
-    end
-end
-
-## scal
-
-"""
-    scal!(n, a, X, incx)
-    scal!(a, X)
-
-Overwrite `X` with `a*X` for the first `n` elements of array `X` with stride `incx`. Returns `X`.
-
-If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
-"""
-function scal! end
-
-"""
-    scal(n, a, X, incx)
-    scal(a, X)
-
-Return `X` scaled by `a` for the first `n` elements of array `X` with stride `incx`.
-
-If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
-"""
-function scal end
-
-for (fname, elty) in ((:dscal_,:Float64),
-                      (:sscal_,:Float32),
-                      (:zscal_,:ComplexF64),
-                      (:cscal_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DSCAL(N,DA,DX,INCX)
-        function scal!(n::Integer, DA::$elty, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
-                  n, DA, DX, incx)
-            DX
-        end
-
-        function scal!(DA::$elty, DX::AbstractArray{$elty})
-            p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve DX scal!(length(DX), DA, p, abs(st))
-            DX
-        end
-    end
-end
-scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx)
-scal(DA, DX) = scal!(DA, copy(DX))
-
-## dot
-
-"""
-    dot(n, X, incx, Y, incy)
-
-Dot product of two vectors consisting of `n` elements of array `X` with stride `incx` and
-`n` elements of array `Y` with stride `incy`.
-
-# Examples
-```jldoctest
-julia> BLAS.dot(10, fill(1.0, 10), 1, fill(1.0, 20), 2)
-10.0
-```
-"""
-function dot end
-
-"""
-    dotc(n, X, incx, U, incy)
-
-Dot function for two complex vectors, consisting of `n` elements of array `X`
-with stride `incx` and `n` elements of array `U` with stride `incy`,
-conjugating the first vector.
-
-# Examples
-```jldoctest
-julia> BLAS.dotc(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2)
-10.0 - 10.0im
-```
-"""
-function dotc end
-
-"""
-    dotu(n, X, incx, Y, incy)
-
-Dot function for two complex vectors consisting of `n` elements of array `X`
-with stride `incx` and `n` elements of array `Y` with stride `incy`.
-
-# Examples
-```jldoctest
-julia> BLAS.dotu(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2)
--10.0 + 10.0im
-```
-"""
-function dotu end
-
-for (fname, elty) in ((:cblas_ddot,:Float64),
-                      (:cblas_sdot,:Float32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dot(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $elty,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt),
-                 n, DX, incx, DY, incy)
-        end
-    end
-end
-for (fname, elty) in ((:cblas_zdotc_sub,:ComplexF64),
-                      (:cblas_cdotc_sub,:ComplexF32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dotc(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
-                 n, DX, incx, DY, incy, result)
-            result[]
-        end
-    end
-end
-for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64),
-                      (:cblas_cdotu_sub,:ComplexF32))
-    @eval begin
-                #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,INCY,N
-                # *     ..
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION DX(*),DY(*)
-        function dotu(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
-                 n, DX, incx, DY, incy, result)
-            result[]
-        end
-    end
-end
-
-for (elty, f) in ((Float32, :dot), (Float64, :dot),
-                  (ComplexF32, :dotc), (ComplexF64, :dotc),
-                  (ComplexF32, :dotu), (ComplexF64, :dotu))
-    @eval begin
-        function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty})
-            n, m = length(x), length(y)
-            n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m"))
-            GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...)
-        end
-    end
-end
-
-## nrm2
-
-"""
-    nrm2(n, X, incx)
-
-2-norm of a vector consisting of `n` elements of array `X` with stride `incx`.
-
-# Examples
-```jldoctest
-julia> BLAS.nrm2(4, fill(1.0, 8), 2)
-2.0
-
-julia> BLAS.nrm2(1, fill(1.0, 8), 2)
-1.0
-```
-"""
-function nrm2 end
-
-for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64),
-                                (:snrm2_,:Float32,:Float32),
-                                (:dznrm2_,:ComplexF64,:Float64),
-                                (:scnrm2_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE DNRM2(N,X,INCX)
-        function nrm2(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, X, incx)
-        end
-    end
-end
-# openblas returns 0 for negative stride
-function nrm2(x::AbstractArray)
-    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    GC.@preserve x nrm2(length(x), p, abs(st))
-end
-
-## asum
-
-"""
-    asum(n, X, incx)
-
-Sum of the magnitudes of the first `n` elements of array `X` with stride `incx`.
-
-For a real array, the magnitude is the absolute value. For a complex array, the
-magnitude is the sum of the absolute value of the real part and the absolute value
-of the imaginary part.
-
-# Examples
-```jldoctest
-julia> BLAS.asum(5, fill(1.0im, 10), 2)
-5.0
-
-julia> BLAS.asum(2, fill(1.0im, 10), 5)
-2.0
-```
-"""
-function asum end
-
-for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64),
-                                (:sasum_,:Float32,:Float32),
-                                (:dzasum_,:ComplexF64,:Float64),
-                                (:scasum_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ASUM(N, X, INCX)
-        function asum(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, X, incx)
-        end
-    end
-end
-function asum(x::AbstractArray)
-    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    GC.@preserve x asum(length(x), p, abs(st))
-end
-
-## axpy
-
-"""
-    axpy!(a, X, Y)
-
-Overwrite `Y` with `X*a + Y`, where `a` is a scalar. Return `Y`.
-
-# Examples
-```jldoctest
-julia> x = [1.; 2; 3];
-
-julia> y = [4. ;; 5 ;; 6];
-
-julia> BLAS.axpy!(2, x, y)
-1×3 Matrix{Float64}:
- 6.0  9.0  12.0
-```
-"""
-function axpy! end
-
-for (fname, elty) in ((:daxpy_,:Float64),
-                      (:saxpy_,:Float32),
-                      (:zaxpy_,:ComplexF64),
-                      (:caxpy_,:ComplexF32))
-    @eval begin
-                # SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-                # DY <- DA*DX + DY
-                #*     .. Scalar Arguments ..
-                #      DOUBLE PRECISION DA
-                #      INTEGER INCX,INCY,N
-                #*     .. Array Arguments ..
-                #      DOUBLE PRECISION DX(*),DY(*)
-        function axpy!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer, dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 n, alpha, dx, incx, dy, incy)
-            dy
-        end
-    end
-end
-
-function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat
-    if length(x) != length(y)
-        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
-    end
-    GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)...,
-        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
-    y
-end
-
-function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti},
-               y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch("ranges of differing lengths"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(ArgumentError(lazy"range out of bounds for x, of length $(length(x))"))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(ArgumentError(lazy"range out of bounds for y, of length $(length(y))"))
-    end
-    GC.@preserve x y axpy!(
-        length(rx),
-        T(alpha),
-        pointer(x, minimum(rx)),
-        step(rx),
-        pointer(y, minimum(ry)),
-        step(ry))
-
-    return y
-end
-
-"""
-    axpby!(a, X, b, Y)
-
-Overwrite `Y` with `X*a + Y*b`, where `a` and `b` are scalars. Return `Y`.
-
-# Examples
-```jldoctest
-julia> x = [1., 2, 3];
-
-julia> y = [4., 5, 6];
-
-julia> BLAS.axpby!(2., x, 3., y)
-3-element Vector{Float64}:
- 14.0
- 19.0
- 24.0
-```
-"""
-function axpby! end
-
-for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32),
-                      (:zaxpby_,:ComplexF64), (:caxpby_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DAXPBY(N,DA,DX,INCX,DB,DY,INCY)
-        # DY <- DA*DX + DB*DY
-        #*     .. Scalar Arguments ..
-        #      DOUBLE PRECISION DA,DB
-        #      INTEGER INCX,INCY,N
-        #*     .. Array Arguments ..
-        #      DOUBLE PRECISION DX(*),DY(*)
-        function axpby!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty},
-                        AbstractArray{$elty}}, incx::Integer, beta::($elty),
-                        dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
-                n, alpha, dx, incx, beta, dy, incy)
-            dy
-        end
-    end
-end
-
-function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat
-    require_one_based_indexing(x, y)
-    if length(x) != length(y)
-        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
-    end
-    GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta),
-        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
-    y
-end
-
-## iamax
-for (fname, elty) in ((:idamax_,:Float64),
-                      (:isamax_,:Float32),
-                      (:izamax_,:ComplexF64),
-                      (:icamax_,:ComplexF32))
-    @eval begin
-        function iamax(n::Integer, dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblastrampoline),BlasInt,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                n, dx, incx)
-        end
-    end
-end
-function iamax(dx::AbstractArray)
-    p, st = vec_pointer_stride(dx)
-    st <= 0 && return BlasInt(0)
-    iamax(length(dx), p, st)
-end
-
-"""
-    iamax(n, dx, incx)
-    iamax(dx)
-
-Find the index of the element of `dx` with the maximum absolute value. `n` is the length of `dx`, and `incx` is the
-stride. If `n` and `incx` are not provided, they assume default values of `n=length(dx)` and `incx=stride1(dx)`.
-"""
-iamax
-
-# Level 2
-## mv
-### gemv
-for (fname, elty) in ((:dgemv_,:Float64),
-                      (:sgemv_,:Float32),
-                      (:zgemv_,:ComplexF64),
-                      (:cgemv_,:ComplexF32))
-    @eval begin
-             #SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             #*     .. Scalar Arguments ..
-             #      DOUBLE PRECISION ALPHA,BETA
-             #      INTEGER INCX,INCY,LDA,M,N
-             #      CHARACTER TRANS
-             #*     .. Array Arguments ..
-             #      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function gemv!(trans::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractVecOrMat{$elty}, X::AbstractVector{$elty},
-                       beta::Union{($elty), Bool}, Y::AbstractVector{$elty})
-            require_one_based_indexing(A, X, Y)
-            m,n = size(A,1),size(A,2)
-            if trans == 'N' && (length(X) != n || length(Y) != m)
-                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), X has length $(length(X)) and Y has length $(length(Y))"))
-            elseif trans == 'C' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch(lazy"the adjoint of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
-            elseif trans == 'T' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch(lazy"the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
-            end
-            chkstride1(A)
-            lda = stride(A,2)
-            pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed"))
-            pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed"))
-            pA = pointer(A)
-            if lda < 0
-                pA += (size(A, 2) - 1) * lda * sizeof($elty)
-                lda = -lda
-                trans == 'N' ? (sX = -sX) : (sY = -sY)
-            end
-            lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`")
-            lda = max(1, size(A,1), lda)
-            GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 trans, size(A,1), size(A,2), alpha,
-                 pA, lda, pX, sX,
-                 beta, pY, sY, 1)
-            Y
-        end
-        function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
-            gemv!(trans, alpha, A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2))))
-        end
-        function gemv(trans::AbstractChar, A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
-            gemv!(trans, one($elty), A, X, zero($elty), similar(X, $elty, size(A, (trans == 'N' ? 1 : 2))))
-        end
-    end
-end
-
-"""
-    gemv!(tA, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y` or `alpha*A'x + beta*y`
-according to [`tA`](@ref stdlib-blas-trans).
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-gemv!
-
-"""
-    gemv(tA, alpha, A, x)
-
-Return `alpha*A*x` or `alpha*A'x` according to [`tA`](@ref stdlib-blas-trans).
-`alpha` is a scalar.
-"""
-gemv(tA, alpha, A, x)
-
-"""
-    gemv(tA, A, x)
-
-Return `A*x` or `A'x` according to [`tA`](@ref stdlib-blas-trans).
-"""
-gemv(tA, A, x)
-
-### (GB) general banded matrix-vector multiplication
-
-"""
-    gbmv!(trans, m, kl, ku, alpha, A, x, beta, y)
-
-Update vector `y` as `alpha*A*x + beta*y` or `alpha*A'*x + beta*y` according to [`trans`](@ref stdlib-blas-trans).
-The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl`
-sub-diagonals and `ku` super-diagonals. `alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function gbmv! end
-
-"""
-    gbmv(trans, m, kl, ku, alpha, A, x)
-
-Return `alpha*A*x` or `alpha*A'*x` according to [`trans`](@ref stdlib-blas-trans).
-The matrix `A` is a general band matrix of dimension `m` by `size(A,2)` with `kl` sub-diagonals and `ku`
-super-diagonals, and `alpha` is a scalar.
-"""
-function gbmv end
-
-for (fname, elty) in ((:dgbmv_,:Float64),
-                      (:sgbmv_,:Float32),
-                      (:zgbmv_,:ComplexF64),
-                      (:cgbmv_,:ComplexF32))
-    @eval begin
-             # SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,KL,KU,LDA,M,N
-             #       CHARACTER TRANS
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function gbmv!(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer,
-                       alpha::Union{($elty), Bool}, A::AbstractMatrix{$elty},
-                       x::AbstractVector{$elty}, beta::Union{($elty), Bool},
-                       y::AbstractVector{$elty})
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Clong),
-                 trans, m, size(A,2), kl,
-                 ku, alpha, A, max(1,stride(A,2)),
-                 px, stx, beta, py, sty, 1)
-            y
-        end
-        function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            leny = trans == 'N' ? m : n
-            gbmv!(trans, m, kl, ku, alpha, A, x, zero($elty), similar(x, $elty, leny))
-        end
-        function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            gbmv(trans, m, kl, ku, one($elty), A, x)
-        end
-    end
-end
-
-### symv
-
-"""
-    symv!(ul, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function symv! end
-
-for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline),
-                           (:ssymv_,:Float32,libblastrampoline),
-                           (:zsymv_,:ComplexF64,libblastrampoline),
-                           (:csymv_,:ComplexF32,libblastrampoline))
-    # Note that the complex symv are not BLAS but auiliary functions in LAPACK
-    @eval begin
-             #      SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             #     .. Scalar Arguments ..
-             #      DOUBLE PRECISION ALPHA,BETA
-             #      INTEGER INCX,INCY,LDA,N
-             #      CHARACTER UPLO
-             #     .. Array Arguments ..
-             #      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function symv!(uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, x::AbstractVector{$elty},
-                       beta::Union{($elty), Bool}, y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != n
-                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
-            end
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
-            end
-            if m != length(y)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, n, alpha, A,
-                 max(1,stride(A,2)), px, stx, beta,
-                 py, sty, 1)
-            y
-        end
-        function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-                symv!(uplo, alpha, A, x, zero($elty), similar(x))
-        end
-        function symv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            symv(uplo, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    symv(ul, alpha, A, x)
-
-Return `alpha*A*x`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` is a scalar.
-"""
-symv(ul, alpha, A, x)
-
-"""
-    symv(ul, A, x)
-
-Return `A*x`. `A` is assumed to be symmetric.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-symv(ul, A, x)
-
-### hemv
-"""
-    hemv!(ul, alpha, A, x, beta, y)
-
-Update the vector `y` as `alpha*A*x + beta*y`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` and `beta` are scalars. Return the updated `y`.
-"""
-function hemv! end
-
-for (fname, elty) in ((:zhemv_,:ComplexF64),
-                      (:chemv_,:ComplexF32))
-    @eval begin
-        function hemv!(uplo::AbstractChar, α::Union{$elty, Bool}, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, β::Union{$elty, Bool}, y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != n
-                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
-            end
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
-            end
-            if m != length(y)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
-            end
-            chkstride1(A)
-            lda = max(1, stride(A, 2))
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong),
-                uplo, n, α, A,
-                lda, px, stx, β,
-                py, sty, 1)
-            y
-        end
-        function hemv(uplo::AbstractChar, α::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hemv!(uplo, α, A, x, zero($elty), similar(x))
-        end
-        function hemv(uplo::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hemv(uplo, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    hemv(ul, alpha, A, x)
-
-Return `alpha*A*x`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-`alpha` is a scalar.
-"""
-hemv(ul, alpha, A, x)
-
-"""
-    hemv(ul, A, x)
-
-Return `A*x`. `A` is assumed to be Hermitian.
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-hemv(ul, A, x)
-
-### hpmv!, (HP) Hermitian packed matrix-vector operation defined as y := alpha*A*x + beta*y.
-for (fname, elty) in ((:zhpmv_, :ComplexF64),
-                      (:chpmv_, :ComplexF32))
-    @eval begin
-        # SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-        # Y <- ALPHA*AP*X + BETA*Y
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA,BETA
-        #       INTEGER INCX,INCY,N
-        #       CHARACTER UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(N,N),X(N),Y(N)
-        function hpmv!(uplo::AbstractChar,
-                       n::Integer,
-                       α::$elty,
-                       AP::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incx::Integer,
-                       β::$elty,
-                       y::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incy::Integer)
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # AP,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ref{$elty},     # β,
-                   Ptr{$elty},     # y, output
-                   Ref{BlasInt},   # incy
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  AP,
-                  x,
-                  incx,
-                  β,
-                  y,
-                  incy,
-                  1)
-            return y
-        end
-    end
-end
-
-function hpmv!(uplo::AbstractChar,
-               α::Number, AP::AbstractArray{T}, x::AbstractArray{T},
-               β::Number, y::AbstractArray{T}) where {T <: BlasComplex}
-    require_one_based_indexing(AP, x, y)
-    N = length(x)
-    if N != length(y)
-        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
-    end
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-    GC.@preserve x y hpmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
-    y
-end
-
-"""
-    hpmv!(uplo, α, AP, x, β, y)
-
-Update vector `y` as `α*A*x + β*y`, where `A` is a Hermitian matrix provided
-in packed format `AP`.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-Hermitian matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-Hermitian matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar inputs `α` and `β` must be complex or real numbers.
-
-The array inputs `x`, `y` and `AP` must all be of `ComplexF32` or `ComplexF64` type.
-
-Return the updated `y`.
-
-!!! compat "Julia 1.5"
-    `hpmv!` requires at least Julia 1.5.
-"""
-hpmv!
-
-### sbmv, (SB) symmetric banded matrix-vector multiplication
-for (fname, elty) in ((:dsbmv_,:Float64),
-                      (:ssbmv_,:Float32))
-    @eval begin
-             #       SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,K,LDA,N
-             #       CHARACTER UPLO
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function sbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), px, stx,
-                 beta, py, sty, 1)
-            y
-        end
-        function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            sbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n))
-        end
-        function sbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            sbmv(uplo, k, one($elty), A, x)
-        end
-    end
-end
-
-"""
-    sbmv(uplo, k, alpha, A, x)
-
-Return `alpha*A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k`
-super-diagonals stored in the argument `A`.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-sbmv(uplo, k, alpha, A, x)
-
-"""
-    sbmv(uplo, k, A, x)
-
-Return `A*x` where `A` is a symmetric band matrix of order `size(A,2)` with `k`
-super-diagonals stored in the argument `A`.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-sbmv(uplo, k, A, x)
-
-"""
-    sbmv!(uplo, k, alpha, A, x, beta, y)
-
-Update vector `y` as `alpha*A*x + beta*y` where `A` is a symmetric band matrix of order
-`size(A,2)` with `k` super-diagonals stored in the argument `A`. The storage layout for `A`
-is described the reference BLAS module, level-2 BLAS at
-<http://www.netlib.org/lapack/explore-html/>.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `A` is used.
-
-Return the updated `y`.
-"""
-sbmv!
-
-### spmv!, (SP) symmetric packed matrix-vector operation defined as y := alpha*A*x + beta*y.
-for (fname, elty) in ((:dspmv_, :Float64),
-                      (:sspmv_, :Float32))
-    @eval begin
-        # SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-        # Y <- ALPHA*AP*X + BETA*Y
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA,BETA
-        #       INTEGER INCX,INCY,N
-        #       CHARACTER UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(N,N),X(N),Y(N)
-        function spmv!(uplo::AbstractChar,
-                       n::Integer,
-                       α::$elty,
-                       AP::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incx::Integer,
-                       β::$elty,
-                       y::Union{Ptr{$elty}, AbstractArray{$elty}},
-                       incy::Integer)
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # AP,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ref{$elty},     # β,
-                   Ptr{$elty},     # y, out
-                   Ref{BlasInt},   # incy
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  AP,
-                  x,
-                  incx,
-                  β,
-                  y,
-                  incy,
-                  1)
-            return y
-        end
-    end
-end
-
-function spmv!(uplo::AbstractChar,
-               α::Real, AP::AbstractArray{T}, x::AbstractArray{T},
-               β::Real, y::AbstractArray{T}) where {T <: BlasReal}
-    require_one_based_indexing(AP, x, y)
-    N = length(x)
-    if N != length(y)
-        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
-    end
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-    GC.@preserve x y spmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
-    y
-end
-
-"""
-    spmv!(uplo, α, AP, x, β, y)
-
-Update vector `y` as `α*A*x + β*y`, where `A` is a symmetric matrix provided
-in packed format `AP`.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar inputs `α` and `β` must be real.
-
-The array inputs `x`, `y` and `AP` must all be of `Float32` or `Float64` type.
-
-Return the updated `y`.
-
-!!! compat "Julia 1.5"
-    `spmv!` requires at least Julia 1.5.
-"""
-spmv!
-
-### spr!, (SP) symmetric packed matrix-vector operation defined as A := alpha*x*x' + A
-for (fname, elty) in ((:dspr_, :Float64),
-                      (:sspr_, :Float32))
-    @eval begin
-        function spr!(uplo::AbstractChar,
-                      n::Integer,
-                      α::$elty,
-                      x::Union{Ptr{$elty}, AbstractArray{$elty}},
-                      incx::Integer,
-                      AP::Union{Ptr{$elty}, AbstractArray{$elty}})
-
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8},     # uplo,
-                   Ref{BlasInt},   # n,
-                   Ref{$elty},     # α,
-                   Ptr{$elty},     # x,
-                   Ref{BlasInt},   # incx,
-                   Ptr{$elty},     # AP,
-                   Clong),         # length of uplo
-                  uplo,
-                  n,
-                  α,
-                  x,
-                  incx,
-                  AP,
-                  1)
-            return AP
-        end
-    end
-end
-
-function spr!(uplo::AbstractChar,
-              α::Real, x::AbstractArray{T},
-              AP::AbstractArray{T}) where {T <: BlasReal}
-    chkuplo(uplo)
-    require_one_based_indexing(AP, x)
-    N = length(x)
-    if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
-    end
-    chkstride1(AP)
-    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-    return GC.@preserve x spr!(uplo, N, T(α), px, stx , AP)
-end
-
-"""
-    spr!(uplo, α, x, AP)
-
-Update matrix `A` as `A+α*x*x'`, where `A` is a symmetric matrix provided
-in packed format `AP` and `x` is a vector.
-
-With `uplo = 'U'`, the array AP must contain the upper triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
-respectively, and so on.
-
-With `uplo = 'L'`, the array AP must contain the lower triangular part of the
-symmetric matrix packed sequentially, column by column, so that `AP[1]`
-contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
-respectively, and so on.
-
-The scalar input `α` must be real.
-
-The array inputs `x` and `AP` must all be of `Float32` or `Float64` type.
-Return the updated `AP`.
-
-!!! compat "Julia 1.8"
-    `spr!` requires at least Julia 1.8.
-"""
-spr!
-
-### hbmv, (HB) Hermitian banded matrix-vector multiplication
-for (fname, elty) in ((:zhbmv_,:ComplexF64),
-                      (:chbmv_,:ComplexF32))
-    @eval begin
-             #       SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER INCX,INCY,K,LDA,N
-             #       CHARACTER UPLO
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-        function hbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x, y)
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), px, stx,
-                 beta, py, sty, 1)
-            y
-        end
-        function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            n = size(A,2)
-            hbmv!(uplo, k, alpha, A, x, zero($elty), similar(x, $elty, n))
-        end
-        function hbmv(uplo::AbstractChar, k::Integer, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            hbmv(uplo, k, one($elty), A, x)
-        end
-    end
-end
-
-### trmv, Triangular matrix-vector multiplication
-
-"""
-    trmv(ul, tA, dA, A, b)
-
-Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trmv end
-
-"""
-    trmv!(ul, tA, dA, A, b)
-
-Return `op(A)*b`, where `op` is determined by [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-The multiplication occurs in-place on `b`.
-"""
-function trmv! end
-
-for (fname, elty) in ((:dtrmv_,:Float64),
-                        (:strmv_,:Float32),
-                        (:ztrmv_,:ComplexF64),
-                        (:ctrmv_,:ComplexF32))
-    @eval begin
-                #       SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-                # *     .. Scalar Arguments ..
-                #       INTEGER INCX,LDA,N
-                #       CHARACTER DIAG,TRANS,UPLO
-                # *     .. Array Arguments ..
-                #       DOUBLE PRECISION A(LDA,*),X(*)
-        function trmv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if n != length(x)
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong, Clong),
-                 uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
-            x
-        end
-        function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            trmv!(uplo, trans, diag, A, copy(x))
-        end
-    end
-end
-
-### trsv, Triangular matrix-vector solve
-
-"""
-    trsv!(ul, tA, dA, A, b)
-
-Overwrite `b` with the solution to `A*x = b` or one of the other two variants determined by
-[`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo).
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Return the updated `b`.
-"""
-function trsv! end
-
-"""
-    trsv(ul, tA, dA, A, b)
-
-Return the solution to `A*x = b` or one of the other two variants determined by
-[`tA`](@ref stdlib-blas-trans) and [`ul`](@ref stdlib-blas-uplo).
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trsv end
-
-for (fname, elty) in ((:dtrsv_,:Float64),
-                        (:strsv_,:Float32),
-                        (:ztrsv_,:ComplexF64),
-                        (:ctrsv_,:ComplexF32))
-    @eval begin
-                #       SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-                #       .. Scalar Arguments ..
-                #       INTEGER INCX,LDA,N
-                #       CHARACTER DIAG,TRANS,UPLO
-                #       .. Array Arguments ..
-                #       DOUBLE PRECISION A(LDA,*),X(*)
-        function trsv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if n != length(x)
-                throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))"))
-            end
-            chkstride1(A)
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong, Clong),
-                 uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
-            x
-        end
-        function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
-            trsv!(uplo, trans, diag, A, copy(x))
-        end
-    end
-end
-
-### ger
-
-"""
-    ger!(alpha, x, y, A)
-
-Rank-1 update of the matrix `A` with vectors `x` and `y` as `alpha*x*y' + A`.
-"""
-function ger! end
-
-for (fname, elty) in ((:dger_,:Float64),
-                      (:sger_,:Float32),
-                      (:zgerc_,:ComplexF64),
-                      (:cgerc_,:ComplexF32))
-    @eval begin
-        function ger!(α::$elty, x::AbstractVector{$elty}, y::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A, x, y)
-            m, n = size(A)
-            if m != length(x) || n != length(y)
-                throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}),
-                 m, n, α, px, stx, py, sty, A, max(1,stride(A,2)))
-            A
-        end
-    end
-end
-
-### syr
-
-"""
-    syr!(uplo, alpha, x, A)
-
-Rank-1 update of the symmetric matrix `A` with vector `x` as `alpha*x*transpose(x) + A`.
-[`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`.
-"""
-function syr! end
-
-for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline),
-                           (:ssyr_,:Float32,libblastrampoline),
-                           (:zsyr_,:ComplexF64,libblastrampoline),
-                           (:csyr_,:ComplexF32,libblastrampoline))
-    @eval begin
-        function syr!(uplo::AbstractChar, α::$elty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if length(x) != n
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 uplo, n, α, px, stx, A, max(1,stride(A, 2)))
-            A
-        end
-    end
-end
-
-### her
-
-"""
-    her!(uplo, alpha, x, A)
-
-Methods for complex arrays only. Rank-1 update of the Hermitian matrix `A` with vector `x`
-as `alpha*x*x' + A`.
-[`uplo`](@ref stdlib-blas-uplo) controls which triangle of `A` is updated. Returns `A`.
-"""
-function her! end
-
-for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64),
-                             (:cher_,:ComplexF32, :Float32))
-    @eval begin
-        function her!(uplo::AbstractChar, α::$relty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, x)
-            n = checksquare(A)
-            if length(x) != n
-                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
-            end
-            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
-            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong),
-                 uplo, n, α, px, stx, A, max(1,stride(A,2)), 1)
-            A
-        end
-    end
-end
-
-# Level 3
-## (GE) general matrix-matrix multiplication
-
-"""
-    gemm!(tA, tB, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or the other three variants according to
-[`tA`](@ref stdlib-blas-trans) and `tB`. Return the updated `C`.
-"""
-function gemm! end
-
-for (gemm, elty) in
-        ((:dgemm_,:Float64),
-         (:sgemm_,:Float32),
-         (:zgemm_,:ComplexF64),
-         (:cgemm_,:ComplexF32))
-    @eval begin
-             # SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             # *     .. Scalar Arguments ..
-             #       DOUBLE PRECISION ALPHA,BETA
-             #       INTEGER K,LDA,LDB,LDC,M,N
-             #       CHARACTER TRANSA,TRANSB
-             # *     .. Array Arguments ..
-             #       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function gemm!(transA::AbstractChar, transB::AbstractChar,
-                       alpha::Union{($elty), Bool},
-                       A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
-                       beta::Union{($elty), Bool},
-                       C::AbstractVecOrMat{$elty})
-#           if any([stride(A,1), stride(B,1), stride(C,1)] .!= 1)
-#               error("gemm!: BLAS module requires contiguous matrix columns")
-#           end  # should this be checked on every call?
-            require_one_based_indexing(A, B, C)
-            m = size(A, transA == 'N' ? 1 : 2)
-            ka = size(A, transA == 'N' ? 2 : 1)
-            kb = size(B, transB == 'N' ? 1 : 2)
-            n = size(B, transB == 'N' ? 2 : 1)
-            if ka != kb || m != size(C,1) || n != size(C,2)
-                throw(DimensionMismatch(lazy"A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($gemm), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Clong, Clong),
-                 transA, transB, m, n,
-                 ka, alpha, A, max(1,stride(A,2)),
-                 B, max(1,stride(B,2)), beta, C,
-                 max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function gemm(transA::AbstractChar, transB::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            gemm!(transA, transB, alpha, A, B, zero($elty), similar(B, $elty, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1))))
-        end
-        function gemm(transA::AbstractChar, transB::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            gemm(transA, transB, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    gemm(tA, tB, alpha, A, B)
-
-Return `alpha*A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
-"""
-gemm(tA, tB, alpha, A, B)
-
-"""
-    gemm(tA, tB, A, B)
-
-Return `A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
-"""
-gemm(tA, tB, A, B)
-
-
-## (SY) symmetric matrix-matrix and matrix-vector multiplication
-for (mfname, elty) in ((:dsymm_,:Float64),
-                       (:ssymm_,:Float32),
-                       (:zsymm_,:ComplexF64),
-                       (:csymm_,:ComplexF32))
-    @eval begin
-             #     SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #     .. Scalar Arguments ..
-             #     DOUBLE PRECISION ALPHA,BETA
-             #     INTEGER LDA,LDB,LDC,M,N
-             #     CHARACTER SIDE,UPLO
-             #     .. Array Arguments ..
-             #     DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function symm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
-                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            m, n = size(C)
-            j = checksquare(A)
-            M, N = size(B)
-            if side == 'L'
-                if j != m
-                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
-                end
-                if N != n
-                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
-                end
-                if j != M
-                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
-                end
-            else
-                if j != n
-                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
-                end
-                if N != j
-                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
-                end
-                if M != m
-                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
-                end
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong),
-                 side, uplo, m, n,
-                 alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
-                 1, 1)
-            C
-        end
-        function symm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            symm!(side, uplo, alpha, A, B, zero($elty), similar(B))
-        end
-        function symm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            symm(side, uplo, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    symm(side, ul, alpha, A, B)
-
-Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only
-the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-symm(side, ul, alpha, A, B)
-
-"""
-    symm(side, ul, A, B)
-
-Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo)
-triangle of `A` is used.
-"""
-symm(side, ul, A, B)
-
-"""
-    symm!(side, ul, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be symmetric. Only the [`ul`](@ref stdlib-blas-uplo) triangle of
-`A` is used. Return the updated `C`.
-"""
-symm!
-
-## (HE) Hermitian matrix-matrix and matrix-vector multiplication
-for (mfname, elty) in ((:zhemm_,:ComplexF64),
-                       (:chemm_,:ComplexF32))
-    @eval begin
-             #     SUBROUTINE DHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #     .. Scalar Arguments ..
-             #     DOUBLE PRECISION ALPHA,BETA
-             #     INTEGER LDA,LDB,LDC,M,N
-             #     CHARACTER SIDE,UPLO
-             #     .. Array Arguments ..
-             #     DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function hemm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
-                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            m, n = size(C)
-            j = checksquare(A)
-            M, N = size(B)
-            if side == 'L'
-                if j != m
-                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
-                end
-                if N != n
-                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
-                end
-                if j != M
-                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
-                end
-            else
-                if j != n
-                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
-                end
-                if N != j
-                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
-                end
-                if M != m
-                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
-                end
-            end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
-                 Clong, Clong),
-                 side, uplo, m, n,
-                 alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
-                 1, 1)
-            C
-        end
-        function hemm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            hemm!(side, uplo, alpha, A, B, zero($elty), similar(B))
-        end
-        function hemm(side::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            hemm(side, uplo, one($elty), A, B)
-        end
-    end
-end
-
-"""
-    hemm(side, ul, alpha, A, B)
-
-Return `alpha*A*B` or `alpha*B*A` according to [`side`](@ref stdlib-blas-side).
-`A` is assumed to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle
-of `A` is used.
-"""
-hemm(side, ul, alpha, A, B)
-
-"""
-    hemm(side, ul, A, B)
-
-Return `A*B` or `B*A` according to [`side`](@ref stdlib-blas-side). `A` is assumed
-to be Hermitian. Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-"""
-hemm(side, ul, A, B)
-
-"""
-    hemm!(side, ul, alpha, A, B, beta, C)
-
-Update `C` as `alpha*A*B + beta*C` or `alpha*B*A + beta*C` according to
-[`side`](@ref stdlib-blas-side). `A` is assumed to be Hermitian. Only the
-[`ul`](@ref stdlib-blas-uplo) triangle of `A` is used. Return the updated `C`.
-"""
-hemm!
-
-## syrk
-
-"""
-    syrk!(uplo, trans, alpha, A, beta, C)
-
-Rank-k update of the symmetric matrix `C` as `alpha*A*transpose(A) + beta*C` or
-`alpha*transpose(A)*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
-"""
-function syrk! end
-
-"""
-    syrk(uplo, trans, alpha, A)
-
-Return either the upper triangle or the lower triangle of `A`,
-according to [`uplo`](@ref stdlib-blas-uplo),
-of `alpha*A*transpose(A)` or `alpha*transpose(A)*A`,
-according to [`trans`](@ref stdlib-blas-trans).
-"""
-function syrk end
-
-for (fname, elty) in ((:dsyrk_,:Float64),
-                      (:ssyrk_,:Float32),
-                      (:zsyrk_,:ComplexF64),
-                      (:csyrk_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-        # *     .. Scalar Arguments ..
-        #       REAL ALPHA,BETA
-        #       INTEGER K,LDA,LDC,N
-        #       CHARACTER TRANS,UPLO
-        # *     .. Array Arguments ..
-        #       REAL A(LDA,*),C(LDC,*)
-        function syrk!(uplo::AbstractChar, trans::AbstractChar,
-                      alpha::Union{($elty), Bool}, A::AbstractVecOrMat{$elty},
-                      beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            k  = size(A, trans == 'N' ? 2 : 1)
-            chkstride1(A)
-            chkstride1(C)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                  uplo, trans, n, k,
-                  alpha, A, max(1,stride(A,2)), beta,
-                  C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-    end
-end
-function syrk(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat)
-    T = eltype(A)
-    n = size(A, trans == 'N' ? 1 : 2)
-    syrk!(uplo, trans, convert(T,alpha), A, zero(T), similar(A, T, (n, n)))
-end
-syrk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat) = syrk(uplo, trans, one(eltype(A)), A)
-
-"""
-    herk!(uplo, trans, alpha, A, beta, C)
-
-Methods for complex arrays only. Rank-k update of the Hermitian matrix `C` as
-`alpha*A*A' + beta*C` or `alpha*A'*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is updated. Returns `C`.
-"""
-function herk! end
-
-"""
-    herk(uplo, trans, alpha, A)
-
-Methods for complex arrays only. Returns the [`uplo`](@ref stdlib-blas-uplo)
-triangle of `alpha*A*A'` or `alpha*A'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-function herk end
-
-for (fname, elty, relty) in ((:zherk_, :ComplexF64, :Float64),
-                             (:cherk_, :ComplexF32, :Float32))
-    @eval begin
-        # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-        # *     .. Scalar Arguments ..
-        #       REAL ALPHA,BETA
-        #       INTEGER K,LDA,LDC,N
-        #       CHARACTER TRANS,UPLO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX A(LDA,*),C(LDC,*)
-        function herk!(uplo::AbstractChar, trans::AbstractChar,
-                        α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
-                        β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n
-                throw(DimensionMismatch(lazy"the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
-            end
-            chkstride1(A)
-            chkstride1(C)
-            k  = size(A, trans == 'N' ? 2 : 1)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
-                    Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                    uplo, trans, n, k,
-                    α, A, max(1,stride(A,2)), β,
-                    C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
-            n = size(A, trans == 'N' ? 1 : 2)
-            herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
-        end
-        herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
-    end
-end
-
-## syr2k
-for (fname, elty) in ((:dsyr2k_,:Float64),
-                      (:ssyr2k_,:Float32),
-                      (:zsyr2k_,:ComplexF64),
-                      (:csyr2k_,:ComplexF32))
-    @eval begin
-            #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-            #
-            #       .. Scalar Arguments ..
-            #       REAL PRECISION ALPHA,BETA
-            #       INTEGER K,LDA,LDB,LDC,N
-            #       CHARACTER TRANS,UPLO
-            #       ..
-            #       .. Array Arguments ..
-            #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-        function syr2k!(uplo::AbstractChar, trans::AbstractChar,
-                        alpha::($elty), A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
-                        beta::($elty), C::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            k  = size(A, trans == 'N' ? 2 : 1)
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), beta,
-                 C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-    end
-end
-
-"""
-    syr2k!(uplo, trans, alpha, A, B, beta, C)
-
-Rank-2k update of the symmetric matrix `C` as
-`alpha*A*transpose(B) + alpha*B*transpose(A) + beta*C` or
-`alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C`
-according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
-"""
-function syr2k! end
-
-"""
-    syr2k(uplo, trans, alpha, A, B)
-
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of
-`alpha*A*transpose(B) + alpha*B*transpose(A)` or
-`alpha*transpose(A)*B + alpha*transpose(B)*A`,
-according to [`trans`](@ref stdlib-blas-trans).
-"""
-function syr2k(uplo::AbstractChar, trans::AbstractChar, alpha::Number, A::AbstractVecOrMat, B::AbstractVecOrMat)
-    T = eltype(A)
-    n = size(A, trans == 'N' ? 1 : 2)
-    syr2k!(uplo, trans, convert(T,alpha), A, B, zero(T), similar(A, T, (n, n)))
-end
-"""
-    syr2k(uplo, trans, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
-or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
-
-for (fname, elty1, elty2) in ((:zher2k_,:ComplexF64,:Float64), (:cher2k_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-        #
-        #       .. Scalar Arguments ..
-        #       COMPLEX ALPHA
-        #       REAL BETA
-        #       INTEGER K,LDA,LDB,LDC,N
-        #       CHARACTER TRANS,UPLO
-        #       ..
-        #       .. Array Arguments ..
-        #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-        function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
-                        A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
-                        beta::($elty2), C::AbstractMatrix{$elty1})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B, C)
-            n = checksquare(C)
-            nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
-            chkstride1(A)
-            chkstride1(B)
-            chkstride1(C)
-            k  = size(A, trans == 'N' ? 2 : 1)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
-                    Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}, Clong, Clong),
-                    uplo, trans, n, k,
-                    alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                    beta, C, max(1,stride(C,2)), 1, 1)
-            C
-        end
-        function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
-            n = size(A, trans == 'N' ? 1 : 2)
-            her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
-        end
-        her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) =
-            her2k(uplo, trans, one($elty1), A, B)
-    end
-end
-
-"""
-    her2k!(uplo, trans, alpha, A, B, beta, C)
-
-Rank-2k update of the Hermitian matrix `C` as
-`alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C`
-according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
-"""
-function her2k! end
-
-"""
-    her2k(uplo, trans, alpha, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
-or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-her2k(uplo, trans, alpha, A, B)
-
-"""
-    her2k(uplo, trans, A, B)
-
-Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
-or `A'*B + B'*A`, according to [`trans`](@ref stdlib-blas-trans).
-"""
-her2k(uplo, trans, A, B)
-
-## (TR) Triangular matrix and vector multiplication and solution
-
-"""
-    trmm!(side, ul, tA, dA, alpha, A, B)
-
-Update `B` as `alpha*A*B` or one of the other three variants determined by
-[`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Return the updated `B`.
-"""
-function trmm! end
-
-"""
-    trmm(side, ul, tA, dA, alpha, A, B)
-
-Return `alpha*A*B` or one of the other three variants determined by
-[`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trmm end
-
-"""
-    trsm!(side, ul, tA, dA, alpha, A, B)
-
-Overwrite `B` with the solution to `A*X = alpha*B` or one of the other three variants
-determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-Returns the updated `B`.
-"""
-function trsm! end
-
-"""
-    trsm(side, ul, tA, dA, alpha, A, B)
-
-Return the solution to `A*X = alpha*B` or one of the other three variants determined by
-determined by [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
-Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
-[`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
-are assumed to be all ones.
-"""
-function trsm end
-
-for (mmname, smname, elty) in
-        ((:dtrmm_,:dtrsm_,:Float64),
-         (:strmm_,:strsm_,:Float32),
-         (:ztrmm_,:ztrsm_,:ComplexF64),
-         (:ctrmm_,:ctrsm_,:ComplexF32))
-    @eval begin
-        #       SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA
-        #       INTEGER LDA,LDB,M,N
-        #       CHARACTER DIAG,SIDE,TRANSA,UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-        function trmm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::Number,
-                       A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B)
-            m, n = size(B)
-            nA = checksquare(A)
-            if nA != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"size of A, $(size(A)), doesn't match $side size of B with dims, $(size(B))"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            ccall((@blasfunc($mmname), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Clong, Clong, Clong, Clong),
-                  side, uplo, transa, diag, m, n,
-                  alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                  1, 1, 1, 1)
-            B
-        end
-        function trmm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
-                      alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            trmm!(side, uplo, transa, diag, alpha, A, copy(B))
-        end
-        #       SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-        # *     .. Scalar Arguments ..
-        #       DOUBLE PRECISION ALPHA
-        #       INTEGER LDA,LDB,M,N
-        #       CHARACTER DIAG,SIDE,TRANSA,UPLO
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-        function trsm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
-                       alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkuplo(uplo)
-            require_one_based_indexing(A, B)
-            m, n = size(B)
-            k = checksquare(A)
-            if k != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"size of A is ($k,$k), size of B is ($m,$n), side is $side, and transa='$transa'"))
-            end
-            chkstride1(A)
-            chkstride1(B)
-            ccall((@blasfunc($smname), libblastrampoline), Cvoid,
-                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                    Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Clong, Clong, Clong, Clong),
-                   side, uplo, transa, diag,
-                   m, n, alpha, A,
-                   max(1,stride(A,2)), B, max(1,stride(B,2)),
-                   1, 1, 1, 1)
-            B
-        end
-        function trsm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            trsm!(side, uplo, transa, diag, alpha, A, copy(B))
-        end
-    end
-end
-
-end # module
-
-function copyto!(dest::Array{T}, rdest::AbstractRange{Ti},
-                 src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
-    if minimum(rdest) < 1 || maximum(rdest) > length(dest)
-        throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))"))
-    end
-    if minimum(rsrc) < 1 || maximum(rsrc) > length(src)
-        throw(ArgumentError(lazy"range out of bounds for src, of length $(length(src))"))
-    end
-    if length(rdest) != length(rsrc)
-        throw(DimensionMismatch(lazy"ranges must be of the same length"))
-    end
-    GC.@preserve src dest BLAS.blascopy!(
-        length(rsrc),
-        pointer(src, minimum(rsrc)),
-        step(rsrc),
-        pointer(dest, minimum(rdest)),
-        step(rdest))
-
-    return dest
-end
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
deleted file mode 100644
index d1019a1a4ea5a..0000000000000
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ /dev/null
@@ -1,392 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Create an extractor that extracts the modified original matrix, e.g.
-## LD for BunchKaufman, UL for CholeskyDense, LU for LUDense and
-## define size methods for Factorization types using it.
-
-"""
-    BunchKaufman <: Factorization
-
-Matrix factorization type of the Bunch-Kaufman factorization of a symmetric or
-Hermitian matrix `A` as `P'UDU'P` or `P'LDL'P`, depending on whether the upper
-(the default) or the lower triangle is stored in `A`. If `A` is complex symmetric
-then `U'` and `L'` denote the unconjugated transposes, i.e. `transpose(U)` and
-`transpose(L)`, respectively. This is the return type of [`bunchkaufman`](@ref),
-the corresponding matrix factorization function.
-
-If `S::BunchKaufman` is the factorization object, the components can be obtained
-via `S.D`, `S.U` or `S.L` as appropriate given `S.uplo`, and `S.p`.
-
-Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
-as appropriate given `S.uplo`, and `S.p`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 3]
-2×2 Matrix{Int64}:
- 1  2
- 2  3
-
-julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- -0.333333  0.0
-  0.0       3.0
-U factor:
-2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.666667
-  ⋅   1.0
-permutation:
-2-element Vector{Int64}:
- 1
- 2
-
-julia> d, u, p = S; # destructuring via iteration
-
-julia> d == S.D && u == S.U && p == S.p
-true
-
-julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- 3.0   0.0
- 0.0  -0.333333
-L factor:
-2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0        ⋅
- 0.666667  1.0
-permutation:
-2-element Vector{Int64}:
- 2
- 1
-```
-"""
-struct BunchKaufman{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
-    LD::S
-    ipiv::P
-    uplo::Char
-    symmetric::Bool
-    rook::Bool
-    info::BlasInt
-
-    function BunchKaufman{T,S,P}(LD, ipiv, uplo, symmetric, rook, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
-        require_one_based_indexing(LD)
-        new{T,S,P}(LD, ipiv, uplo, symmetric, rook, info)
-    end
-end
-BunchKaufman(A::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, uplo::AbstractChar,
-             symmetric::Bool, rook::Bool, info::BlasInt) where {T} =
-        BunchKaufman{T,typeof(A),typeof(ipiv)}(A, ipiv, uplo, symmetric, rook, info)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(BunchKaufman{T,S}(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
-           BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info), false)
-
-# iteration for destructuring into components
-Base.iterate(S::BunchKaufman) = (S.D, Val(:UL))
-Base.iterate(S::BunchKaufman, ::Val{:UL}) = (S.uplo == 'L' ? S.L : S.U, Val(:p))
-Base.iterate(S::BunchKaufman, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::BunchKaufman, ::Val{:done}) = nothing
-
-
-"""
-    bunchkaufman!(A, rook::Bool=false; check = true) -> BunchKaufman
-
-`bunchkaufman!` is the same as [`bunchkaufman`](@ref), but saves space by overwriting the
-input `A`, instead of creating a copy.
-"""
-function bunchkaufman!(A::RealHermSymComplexSym{<:BlasReal,<:StridedMatrix},
-                       rook::Bool = false; check::Bool = true)
-    LD, ipiv, info = rook ? LAPACK.sytrf_rook!(A.uplo, A.data) : LAPACK.sytrf!(A.uplo, A.data)
-    check && checknonsingular(info)
-    BunchKaufman(LD, ipiv, A.uplo, true, rook, info)
-end
-function bunchkaufman!(A::Hermitian{<:BlasComplex,<:StridedMatrix},
-                       rook::Bool = false; check::Bool = true)
-    LD, ipiv, info = rook ? LAPACK.hetrf_rook!(A.uplo, A.data) : LAPACK.hetrf!(A.uplo, A.data)
-    check && checknonsingular(info)
-    BunchKaufman(LD, ipiv, A.uplo, false, rook, info)
-end
-function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check::Bool = true)
-    if ishermitian(A)
-        return bunchkaufman!(Hermitian(A), rook; check = check)
-    elseif issymmetric(A)
-        return bunchkaufman!(Symmetric(A), rook; check = check)
-    else
-        throw(ArgumentError("Bunch-Kaufman decomposition is only valid for symmetric or Hermitian matrices"))
-    end
-end
-
-"""
-    bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman
-
-Compute the Bunch-Kaufman [^Bunch1977] factorization of a symmetric or
-Hermitian matrix `A` as `P'*U*D*U'*P` or `P'*L*D*L'*P`, depending on
-which triangle is stored in `A`, and return a [`BunchKaufman`](@ref) object.
-Note that if `A` is complex symmetric then `U'` and `L'` denote
-the unconjugated transposes, i.e. `transpose(U)` and `transpose(L)`.
-
-Iterating the decomposition produces the components `S.D`, `S.U` or `S.L`
-as appropriate given `S.uplo`, and `S.p`.
-
-If `rook` is `true`, rook pivoting is used. If `rook` is false,
-rook pivoting is not used.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-The following functions are available for `BunchKaufman` objects:
-[`size`](@ref), `\\`, [`inv`](@ref), [`issymmetric`](@ref),
-[`ishermitian`](@ref), [`getindex`](@ref).
-
-[^Bunch1977]: J R Bunch and L Kaufman, Some stable methods for calculating inertia and solving symmetric linear systems, Mathematics of Computation 31:137 (1977), 163-179. [url](http://www.ams.org/journals/mcom/1977-31-137/S0025-5718-1977-0428694-0/).
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 3]
-2×2 Matrix{Int64}:
- 1  2
- 2  3
-
-julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- -0.333333  0.0
-  0.0       3.0
-U factor:
-2×2 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  0.666667
-  ⋅   1.0
-permutation:
-2-element Vector{Int64}:
- 1
- 2
-
-julia> d, u, p = S; # destructuring via iteration
-
-julia> d == S.D && u == S.U && p == S.p
-true
-
-julia> S.U*S.D*S.U' - S.P*A*S.P'
-2×2 Matrix{Float64}:
- 0.0  0.0
- 0.0  0.0
-
-julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
-D factor:
-2×2 Tridiagonal{Float64, Vector{Float64}}:
- 3.0   0.0
- 0.0  -0.333333
-L factor:
-2×2 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0        ⋅
- 0.666667  1.0
-permutation:
-2-element Vector{Int64}:
- 2
- 1
-
-julia> S.L*S.D*S.L' - A[S.p, S.p]
-2×2 Matrix{Float64}:
- 0.0  0.0
- 0.0  0.0
-```
-"""
-bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
-
-BunchKaufman{T}(B::BunchKaufman) where {T} =
-    BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
-Factorization{T}(B::BunchKaufman) where {T} = BunchKaufman{T}(B)
-
-size(B::BunchKaufman) = size(getfield(B, :LD))
-size(B::BunchKaufman, d::Integer) = size(getfield(B, :LD), d)
-issymmetric(B::BunchKaufman) = B.symmetric
-ishermitian(B::BunchKaufman{T}) where T = T<:Real || !B.symmetric
-
-function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar, rook::Bool) where T
-    require_one_based_indexing(v)
-    p = T[1:maxi;]
-    uploL = uplo == 'L'
-    i = uploL ? 1 : maxi
-    # if uplo == 'U' we construct the permutation backwards
-    @inbounds while 1 <= i <= length(v)
-        vi = v[i]
-        if vi > 0 # the 1x1 blocks
-            p[i], p[vi] = p[vi], p[i]
-            i += uploL ? 1 : -1
-        else # the 2x2 blocks
-            if rook
-                p[i], p[-vi] = p[-vi], p[i]
-            end
-            if uploL
-                vp = rook ? -v[i+1] : -vi
-                p[i + 1], p[vp] = p[vp], p[i + 1]
-                i += 2
-            else # 'U'
-                vp = rook ? -v[i-1] : -vi
-                p[i - 1], p[vp] = p[vp], p[i - 1]
-                i -= 2
-            end
-        end
-    end
-    return p
-end
-
-function getproperty(B::BunchKaufman{T,<:StridedMatrix}, d::Symbol) where {T<:BlasFloat}
-    n = size(B, 1)
-    if d === :p
-        return _ipiv2perm_bk(getfield(B, :ipiv), n, getfield(B, :uplo), B.rook)
-    elseif d === :P
-        return Matrix{T}(I, n, n)[:,invperm(B.p)]
-    elseif d === :L || d === :U || d === :D
-        if getfield(B, :rook)
-            LUD, od = LAPACK.syconvf_rook!(getfield(B, :uplo), 'C', copy(getfield(B, :LD)), getfield(B, :ipiv))
-        else
-            LUD, od = LAPACK.syconv!(getfield(B, :uplo), copy(getfield(B, :LD)), getfield(B, :ipiv))
-        end
-        if d === :D
-            if getfield(B, :uplo) == 'L'
-                odl = od[1:n - 1]
-                return Tridiagonal(odl, diag(LUD), getfield(B, :symmetric) ? odl : conj.(odl))
-            else # 'U'
-                odu = od[2:n]
-                return Tridiagonal(getfield(B, :symmetric) ? odu : conj.(odu), diag(LUD), odu)
-            end
-        elseif d === :L
-            if getfield(B, :uplo) == 'L'
-                return UnitLowerTriangular(LUD)
-            else
-                throw(ArgumentError("factorization is U*D*transpose(U) but you requested L"))
-            end
-        else # :U
-            if B.uplo == 'U'
-                return UnitUpperTriangular(LUD)
-            else
-                throw(ArgumentError("factorization is L*D*transpose(L) but you requested U"))
-            end
-        end
-    else
-        getfield(B, d)
-    end
-end
-
-Base.propertynames(B::BunchKaufman, private::Bool=false) =
-    (:p, :P, :L, :U, :D, (private ? fieldnames(typeof(B)) : ())...)
-
-issuccess(B::BunchKaufman) = B.info == 0
-
-function adjoint(B::BunchKaufman)
-    if ishermitian(B)
-        return B
-    else
-        throw(ArgumentError("adjoint not implemented for complex symmetric matrices"))
-    end
-end
-
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
-    if issuccess(B)
-        summary(io, B); println(io)
-        println(io, "D factor:")
-        show(io, mime, B.D)
-        println(io, "\n$(B.uplo) factor:")
-        show(io, mime, B.uplo == 'L' ? B.L : B.U)
-        println(io, "\npermutation:")
-        show(io, mime, B.p)
-    else
-        print(io, "Failed factorization of type $(typeof(B))")
-    end
-end
-
-function inv(B::BunchKaufman{<:BlasReal,<:StridedMatrix})
-    if B.rook
-        copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-    else
-        copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-    end
-end
-
-function inv(B::BunchKaufman{<:BlasComplex,<:StridedMatrix})
-    if issymmetric(B)
-        if B.rook
-            copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
-        else
-            copytri!(LAPACK.sytri!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
-        end
-    else
-        if B.rook
-            copytri!(LAPACK.hetri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-        else
-            copytri!(LAPACK.hetri!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
-        end
-    end
-end
-
-function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasReal}
-    if B.rook
-        LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
-    else
-        LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
-    end
-end
-function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasComplex}
-    if B.rook
-        if issymmetric(B)
-            LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
-        else
-            LAPACK.hetrs_rook!(B.uplo, B.LD, B.ipiv, R)
-        end
-    else
-        if issymmetric(B)
-            LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
-        else
-            LAPACK.hetrs!(B.uplo, B.LD, B.ipiv, R)
-        end
-    end
-end
-
-function logabsdet(F::BunchKaufman)
-    M = F.LD
-    p = F.ipiv
-    n = size(F.LD, 1)
-
-    if !issuccess(F)
-        return eltype(F)(-Inf), zero(eltype(F))
-    end
-    s = one(real(eltype(F)))
-    i = 1
-    abs_det = zero(real(eltype(F)))
-    while i <= n
-        if p[i] > 0
-            elm = M[i,i]
-            s *= sign(elm)
-            abs_det += log(abs(elm))
-            i += 1
-        else
-            # 2x2 pivot case. Make sure not to square before the subtraction by scaling
-            # with the off-diagonal element. This is safe because the off diagonal is
-            # always large for 2x2 pivots.
-            if F.uplo == 'U'
-                elm = M[i, i + 1]*(M[i,i]/M[i, i + 1]*M[i + 1, i + 1] -
-                    (issymmetric(F) ? M[i, i + 1] : conj(M[i, i + 1])))
-                s *= sign(elm)
-                abs_det += log(abs(elm))
-            else
-                elm = M[i + 1,i]*(M[i, i]/M[i + 1, i]*M[i + 1, i + 1] -
-                    (issymmetric(F) ? M[i + 1, i] : conj(M[i + 1, i])))
-                s *= sign(elm)
-                abs_det += log(abs(elm))
-            end
-            i += 2
-        end
-    end
-    return abs_det, s
-end
-
-## reconstruct the original matrix
-## TODO: understand the procedure described at
-## http://www.nag.com/numeric/FL/nagdoc_fl22/pdf/F07/f07mdf.pdf
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
deleted file mode 100644
index 82f138db7d7b9..0000000000000
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ /dev/null
@@ -1,835 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-##########################
-# Cholesky Factorization #
-##########################
-
-# The dispatch structure in the cholesky, and cholesky! methods is a bit
-# complicated and some explanation is therefore provided in the following
-#
-# In the methods below, LAPACK is called when possible, i.e. StridedMatrices with Float32,
-# Float64, ComplexF32, and ComplexF64 element types. For other element or
-# matrix types, the unblocked Julia implementation in _chol! is used. For cholesky
-# and cholesky! pivoting is supported through a RowMaximum() argument. A type argument is
-# necessary for type stability since the output of cholesky and cholesky! is either
-# Cholesky or CholeskyPivoted. The latter is only
-# supported for the four LAPACK element types. For other types, e.g. BigFloats RowMaximum() will
-# give an error. It is required that the input is Hermitian (including real symmetric) either
-# through the Hermitian and Symmetric views or exact symmetric or Hermitian elements which
-# is checked for and an error is thrown if the check fails.
-
-# The internal structure is as follows
-# - _chol! returns the factor and info without checking positive definiteness
-# - cholesky/cholesky! returns Cholesky without checking positive definiteness
-
-# FixMe? The dispatch below seems overly complicated. One simplification could be to
-# merge the two Cholesky types into one. It would remove the need for Val completely but
-# the cost would be extra unnecessary/unused fields for the unpivoted Cholesky and runtime
-# checks of those fields before calls to LAPACK to check which version of the Cholesky
-# factorization the type represents.
-"""
-    Cholesky <: Factorization
-
-Matrix factorization type of the Cholesky factorization of a dense symmetric/Hermitian
-positive definite matrix `A`. This is the return type of [`cholesky`](@ref),
-the corresponding matrix factorization function.
-
-The triangular Cholesky factor can be obtained from the factorization `F::Cholesky`
-via `F.L` and `F.U`, where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
-
-The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
-[`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
-
-Iterating the decomposition produces the components `L` and `U`.
-
-# Examples
-```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
-
-julia> C = cholesky(A)
-Cholesky{Float64, Matrix{Float64}}
-U factor:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.U
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.L
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
-  2.0   ⋅    ⋅
-  6.0  1.0   ⋅
- -8.0  5.0  3.0
-
-julia> C.L * C.U == A
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-struct Cholesky{T,S<:AbstractMatrix} <: Factorization{T}
-    factors::S
-    uplo::Char
-    info::BlasInt
-
-    function Cholesky{T,S}(factors, uplo, info) where {T,S<:AbstractMatrix}
-        require_one_based_indexing(factors)
-        new(factors, uplo, info)
-    end
-end
-Cholesky(A::AbstractMatrix{T}, uplo::Symbol, info::Integer) where {T} =
-    Cholesky{T,typeof(A)}(A, char_uplo(uplo), info)
-Cholesky(A::AbstractMatrix{T}, uplo::AbstractChar, info::Integer) where {T} =
-    Cholesky{T,typeof(A)}(A, uplo, info)
-Cholesky(U::UpperTriangular{T}) where {T} = Cholesky{T,typeof(U.data)}(U.data, 'U', 0)
-Cholesky(L::LowerTriangular{T}) where {T} = Cholesky{T,typeof(L.data)}(L.data, 'L', 0)
-
-# iteration for destructuring into components
-Base.iterate(C::Cholesky) = (C.L, Val(:U))
-Base.iterate(C::Cholesky, ::Val{:U}) = (C.U, Val(:done))
-Base.iterate(C::Cholesky, ::Val{:done}) = nothing
-
-
-"""
-    CholeskyPivoted
-
-Matrix factorization type of the pivoted Cholesky factorization of a dense symmetric/Hermitian
-positive semi-definite matrix `A`. This is the return type of [`cholesky(_, ::RowMaximum)`](@ref),
-the corresponding matrix factorization function.
-
-The triangular Cholesky factor can be obtained from the factorization `F::CholeskyPivoted`
-via `F.L` and `F.U`, and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'`
-with `Ur = F.U[1:F.rank, :]` and `Lr = F.L[:, 1:F.rank]`, or alternatively
-`A ≈ Up' * Up ≈ Lp * Lp'` with `Up = F.U[1:F.rank, invperm(F.p)]` and
-`Lp = F.L[invperm(F.p), 1:F.rank]`.
-
-The following functions are available for `CholeskyPivoted` objects:
-[`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
-
-Iterating the decomposition produces the components `L` and `U`.
-
-# Examples
-```jldoctest
-julia> X = [1.0, 2.0, 3.0, 4.0];
-
-julia> A = X * X';
-
-julia> C = cholesky(A, RowMaximum(), check = false)
-CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
-U factor with rank 1:
-4×4 UpperTriangular{Float64, Matrix{Float64}}:
- 4.0  2.0  3.0  1.0
-  ⋅   0.0  6.0  2.0
-  ⋅    ⋅   9.0  3.0
-  ⋅    ⋅    ⋅   1.0
-permutation:
-4-element Vector{Int64}:
- 4
- 2
- 3
- 1
-
-julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-struct CholeskyPivoted{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    uplo::Char
-    piv::P
-    rank::BlasInt
-    tol::Real
-    info::BlasInt
-
-    function CholeskyPivoted{T,S,P}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
-        require_one_based_indexing(factors)
-        new{T,S,P}(factors, uplo, piv, rank, tol, info)
-    end
-end
-CholeskyPivoted(A::AbstractMatrix{T}, uplo::AbstractChar, piv::AbstractVector{<:Integer},
-                rank::Integer, tol::Real, info::Integer) where T =
-    CholeskyPivoted{T,typeof(A),typeof(piv)}(A, uplo, piv, rank, tol, info)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(CholeskyPivoted{T,S}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix},
-           CholeskyPivoted{T,S,typeof(piv)}(factors, uplo, piv, rank, tol, info), false)
-
-
-# iteration for destructuring into components
-Base.iterate(C::CholeskyPivoted) = (C.L, Val(:U))
-Base.iterate(C::CholeskyPivoted, ::Val{:U}) = (C.U, Val(:done))
-Base.iterate(C::CholeskyPivoted, ::Val{:done}) = nothing
-
-
-# make a copy that allow inplace Cholesky factorization
-choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
-cholcopy(A::AbstractMatrix) = eigencopy_oftype(A, choltype(A))
-
-# _chol!. Internal methods for calling unpivoted Cholesky
-## BLAS/LAPACK element types
-function _chol!(A::StridedMatrix{<:BlasFloat}, ::Type{UpperTriangular})
-    C, info = LAPACK.potrf!('U', A)
-    return UpperTriangular(C), info
-end
-function _chol!(A::StridedMatrix{<:BlasFloat}, ::Type{LowerTriangular})
-    C, info = LAPACK.potrf!('L', A)
-    return LowerTriangular(C), info
-end
-
-## Non BLAS/LAPACK element types (generic)
-function _chol!(A::AbstractMatrix, ::Type{UpperTriangular})
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    realdiag = eltype(A) <: Complex
-    @inbounds begin
-        for k = 1:n
-            Akk = realdiag ? real(A[k,k]) : A[k,k]
-            for i = 1:k - 1
-                Akk -= realdiag ? abs2(A[i,k]) : A[i,k]'A[i,k]
-            end
-            A[k,k] = Akk
-            Akk, info = _chol!(Akk, UpperTriangular)
-            if info != 0
-                return UpperTriangular(A), convert(BlasInt, k)
-            end
-            A[k,k] = Akk
-            AkkInv = inv(copy(Akk'))
-            for j = k + 1:n
-                for i = 1:k - 1
-                    A[k,j] -= A[i,k]'A[i,j]
-                end
-                A[k,j] = AkkInv*A[k,j]
-            end
-        end
-    end
-    return UpperTriangular(A), convert(BlasInt, 0)
-end
-function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    realdiag = eltype(A) <: Complex
-    @inbounds begin
-        for k = 1:n
-            Akk = realdiag ? real(A[k,k]) : A[k,k]
-            for i = 1:k - 1
-                Akk -= realdiag ? abs2(A[k,i]) : A[k,i]*A[k,i]'
-            end
-            A[k,k] = Akk
-            Akk, info = _chol!(Akk, LowerTriangular)
-            if info != 0
-                return LowerTriangular(A), convert(BlasInt, k)
-            end
-            A[k,k] = Akk
-            AkkInv = inv(Akk)
-            for j = 1:k - 1
-                @simd for i = k + 1:n
-                    A[i,k] -= A[i,j]*A[k,j]'
-                end
-            end
-            for i = k + 1:n
-                A[i,k] *= AkkInv'
-            end
-        end
-     end
-    return LowerTriangular(A), convert(BlasInt, 0)
-end
-
-## Numbers
-function _chol!(x::Number, _)
-    rx = real(x)
-    iszero(rx) && return (rx, convert(BlasInt, 1))
-    rxr = sqrt(abs(rx))
-    rval =  convert(promote_type(typeof(x), typeof(rxr)), rxr)
-    return (rval, convert(BlasInt, rx != abs(x)))
-end
-
-## for StridedMatrices, check that matrix is symmetric/Hermitian
-
-# cholesky!. Destructive methods for computing Cholesky factorization of real symmetric
-# or Hermitian matrix
-## No pivoting (default)
-function cholesky!(A::RealHermSymComplexHerm, ::NoPivot = NoPivot(); check::Bool = true)
-    C, info = _chol!(A.data, A.uplo == 'U' ? UpperTriangular : LowerTriangular)
-    check && checkpositivedefinite(info)
-    return Cholesky(C.data, A.uplo, info)
-end
-
-### for AbstractMatrix, check that matrix is symmetric/Hermitian
-"""
-    cholesky!(A::AbstractMatrix, NoPivot(); check = true) -> Cholesky
-
-The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
-instead of creating a copy. An [`InexactError`](@ref) exception is thrown if
-the factorization produces a number not representable by the element type of
-`A`, e.g. for integer types.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 50]
-2×2 Matrix{Int64}:
- 1   2
- 2  50
-
-julia> cholesky!(A)
-ERROR: InexactError: Int64(6.782329983125268)
-Stacktrace:
-[...]
-```
-"""
-function cholesky!(A::AbstractMatrix, ::NoPivot = NoPivot(); check::Bool = true)
-    checksquare(A)
-    if !ishermitian(A) # return with info = -1 if not Hermitian
-        check && checkpositivedefinite(-1)
-        return Cholesky(A, 'U', convert(BlasInt, -1))
-    else
-        return cholesky!(Hermitian(A), NoPivot(); check = check)
-    end
-end
-@deprecate cholesky!(A::StridedMatrix, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
-@deprecate cholesky!(A::RealHermSymComplexHerm, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
-
-## With pivoting
-### BLAS/LAPACK element types
-function cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix},
-                   ::RowMaximum; tol = 0.0, check::Bool = true)
-    AA, piv, rank, info = LAPACK.pstrf!(A.uplo, A.data, tol)
-    C = CholeskyPivoted{eltype(AA),typeof(AA),typeof(piv)}(AA, A.uplo, piv, rank, tol, info)
-    check && chkfullrank(C)
-    return C
-end
-@deprecate cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-### Non BLAS/LAPACK element types (generic). Since generic fallback for pivoted Cholesky
-### is not implemented yet we throw an error
-cholesky!(A::RealHermSymComplexHerm{<:Real}, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    throw(ArgumentError("generic pivoted Cholesky factorization is not implemented yet"))
-@deprecate cholesky!(A::RealHermSymComplexHerm{<:Real}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-### for AbstractMatrix, check that matrix is symmetric/Hermitian
-"""
-    cholesky!(A::AbstractMatrix, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
-
-The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
-instead of creating a copy. An [`InexactError`](@ref) exception is thrown if the
-factorization produces a number not representable by the element type of `A`,
-e.g. for integer types.
-"""
-function cholesky!(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true)
-    checksquare(A)
-    if !ishermitian(A)
-        C = CholeskyPivoted(A, 'U', Vector{BlasInt}(),convert(BlasInt, 1),
-                            tol, convert(BlasInt, -1))
-        check && chkfullrank(C)
-        return C
-    else
-        return cholesky!(Hermitian(A), RowMaximum(); tol = tol, check = check)
-    end
-end
-@deprecate cholesky!(A::StridedMatrix, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
-
-# cholesky. Non-destructive methods for computing Cholesky factorization of real symmetric
-# or Hermitian matrix
-## No pivoting (default)
-"""
-    cholesky(A, NoPivot(); check = true) -> Cholesky
-
-Compute the Cholesky factorization of a dense symmetric positive definite matrix `A`
-and return a [`Cholesky`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref) or [`Hermitian`](@ref)
-[`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
-
-The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
-where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
-
-The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
-[`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
-
-If you have a matrix `A` that is slightly non-Hermitian due to roundoff errors in its construction,
-wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it as perfectly Hermitian.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-# Examples
-```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
-
-julia> C = cholesky(A)
-Cholesky{Float64, Matrix{Float64}}
-U factor:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.U
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 2.0  6.0  -8.0
-  ⋅   1.0   5.0
-  ⋅    ⋅    3.0
-
-julia> C.L
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
-  2.0   ⋅    ⋅
-  6.0  1.0   ⋅
- -8.0  5.0  3.0
-
-julia> C.L * C.U == A
-true
-```
-"""
-cholesky(A::AbstractMatrix, ::NoPivot=NoPivot(); check::Bool = true) =
-    cholesky!(cholcopy(A); check)
-@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
-
-function cholesky(A::AbstractMatrix{Float16}, ::NoPivot=NoPivot(); check::Bool = true)
-    X = cholesky!(cholcopy(A); check = check)
-    return Cholesky{Float16}(X)
-end
-@deprecate cholesky(A::Union{StridedMatrix{Float16},RealHermSymComplexHerm{Float16,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
-
-## With pivoting
-"""
-    cholesky(A, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
-
-Compute the pivoted Cholesky factorization of a dense symmetric positive semi-definite matrix `A`
-and return a [`CholeskyPivoted`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref)
-or [`Hermitian`](@ref) [`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
-
-The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
-and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'` with `Ur = F.U[1:F.rank, :]`
-and `Lr = F.L[:, 1:F.rank]`, or alternatively `A ≈ Up' * Up ≈ Lp * Lp'` with
-`Up = F.U[1:F.rank, invperm(F.p)]` and `Lp = F.L[invperm(F.p), 1:F.rank]`.
-
-The following functions are available for `CholeskyPivoted` objects:
-[`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
-
-The argument `tol` determines the tolerance for determining the rank.
-For negative values, the tolerance is the machine precision.
-
-If you have a matrix `A` that is slightly non-Hermitian due to roundoff errors in its construction,
-wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it as perfectly Hermitian.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-# Examples
-```jldoctest
-julia> X = [1.0, 2.0, 3.0, 4.0];
-
-julia> A = X * X';
-
-julia> C = cholesky(A, RowMaximum(), check = false)
-CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
-U factor with rank 1:
-4×4 UpperTriangular{Float64, Matrix{Float64}}:
- 4.0  2.0  3.0  1.0
-  ⋅   0.0  6.0  2.0
-  ⋅    ⋅   9.0  3.0
-  ⋅    ⋅    ⋅   1.0
-permutation:
-4-element Vector{Int64}:
- 4
- 2
- 3
- 1
-
-julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
-true
-
-julia> l, u = C; # destructuring via iteration
-
-julia> l == C.L && u == C.U
-true
-```
-"""
-cholesky(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    cholesky!(cholcopy(A), RowMaximum(); tol, check)
-@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{true}; tol = 0.0, check::Bool = true) cholesky(A, RowMaximum(); tol, check) false
-
-function cholesky(A::AbstractMatrix{Float16}, ::RowMaximum; tol = 0.0, check::Bool = true)
-    X = cholesky!(cholcopy(A), RowMaximum(); tol, check)
-    return CholeskyPivoted{Float16}(X)
-end
-
-## Number
-function cholesky(x::Number, uplo::Symbol=:U)
-    C, info = _chol!(x, uplo)
-    xf = fill(C, 1, 1)
-    Cholesky(xf, uplo, info)
-end
-
-
-function Cholesky{T}(C::Cholesky) where T
-    Cnew = convert(AbstractMatrix{T}, C.factors)
-    Cholesky{T, typeof(Cnew)}(Cnew, C.uplo, C.info)
-end
-Factorization{T}(C::Cholesky{T}) where {T} = C
-Factorization{T}(C::Cholesky) where {T} = Cholesky{T}(C)
-CholeskyPivoted{T}(C::CholeskyPivoted{T}) where {T} = C
-CholeskyPivoted{T}(C::CholeskyPivoted) where {T} =
-    CholeskyPivoted(AbstractMatrix{T}(C.factors),C.uplo,C.piv,C.rank,C.tol,C.info)
-Factorization{T}(C::CholeskyPivoted{T}) where {T} = C
-Factorization{T}(C::CholeskyPivoted) where {T} = CholeskyPivoted{T}(C)
-
-AbstractMatrix(C::Cholesky) = C.uplo == 'U' ? C.U'C.U : C.L*C.L'
-AbstractArray(C::Cholesky) = AbstractMatrix(C)
-Matrix(C::Cholesky) = Array(AbstractArray(C))
-Array(C::Cholesky) = Matrix(C)
-
-function AbstractMatrix(F::CholeskyPivoted)
-    ip = invperm(F.p)
-    U = F.U[1:F.rank,ip]
-    U'U
-end
-AbstractArray(F::CholeskyPivoted) = AbstractMatrix(F)
-Matrix(F::CholeskyPivoted) = Array(AbstractArray(F))
-Array(F::CholeskyPivoted) = Matrix(F)
-
-copy(C::Cholesky) = Cholesky(copy(C.factors), C.uplo, C.info)
-copy(C::CholeskyPivoted) = CholeskyPivoted(copy(C.factors), C.uplo, C.piv, C.rank, C.tol, C.info)
-
-size(C::Union{Cholesky, CholeskyPivoted}) = size(C.factors)
-size(C::Union{Cholesky, CholeskyPivoted}, d::Integer) = size(C.factors, d)
-
-function getproperty(C::Cholesky, d::Symbol)
-    Cfactors = getfield(C, :factors)
-    Cuplo    = getfield(C, :uplo)
-    if d === :U
-        return UpperTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
-    elseif d === :L
-        return LowerTriangular(Cuplo === char_uplo(d) ? Cfactors : copy(Cfactors'))
-    elseif d === :UL
-        return (Cuplo === 'U' ? UpperTriangular(Cfactors) : LowerTriangular(Cfactors))
-    else
-        return getfield(C, d)
-    end
-end
-Base.propertynames(F::Cholesky, private::Bool=false) =
-    (:U, :L, :UL, (private ? fieldnames(typeof(F)) : ())...)
-
-function getproperty(C::CholeskyPivoted{T}, d::Symbol) where {T}
-    Cfactors = getfield(C, :factors)
-    Cuplo    = getfield(C, :uplo)
-    if d === :U
-        return UpperTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
-    elseif d === :L
-        return LowerTriangular(sym_uplo(Cuplo) == d ? Cfactors : copy(Cfactors'))
-    elseif d === :p
-        return getfield(C, :piv)
-    elseif d === :P
-        n = size(C, 1)
-        P = zeros(T, n, n)
-        for i = 1:n
-            P[getfield(C, :piv)[i], i] = one(T)
-        end
-        return P
-    else
-        return getfield(C, d)
-    end
-end
-Base.propertynames(F::CholeskyPivoted, private::Bool=false) =
-    (:U, :L, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
-
-adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky)
-    if issuccess(C)
-        summary(io, C); println(io)
-        println(io, "$(C.uplo) factor:")
-        show(io, mime, C.UL)
-    else
-        print(io, "Failed factorization of type $(typeof(C))")
-    end
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::CholeskyPivoted)
-    summary(io, C); println(io)
-    println(io, "$(C.uplo) factor with rank $(rank(C)):")
-    show(io, mime, C.uplo == 'U' ? C.U : C.L)
-    println(io, "\npermutation:")
-    show(io, mime, C.p)
-end
-
-ldiv!(C::Cholesky{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.potrs!(C.uplo, C.factors, B)
-
-function ldiv!(C::Cholesky, B::AbstractVecOrMat)
-    if C.uplo == 'L'
-        return ldiv!(adjoint(LowerTriangular(C.factors)), ldiv!(LowerTriangular(C.factors), B))
-    else
-        return ldiv!(UpperTriangular(C.factors), ldiv!(adjoint(UpperTriangular(C.factors)), B))
-    end
-end
-
-function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedVector{T}) where T<:BlasFloat
-    invpermute!(LAPACK.potrs!(C.uplo, C.factors, permute!(B, C.piv)), C.piv)
-end
-function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedMatrix{T}) where T<:BlasFloat
-    n = size(C, 1)
-    for i=1:size(B, 2)
-        permute!(view(B, 1:n, i), C.piv)
-    end
-    LAPACK.potrs!(C.uplo, C.factors, B)
-    for i=1:size(B, 2)
-        invpermute!(view(B, 1:n, i), C.piv)
-    end
-    B
-end
-
-function ldiv!(C::CholeskyPivoted, B::AbstractVector)
-    if C.uplo == 'L'
-        ldiv!(adjoint(LowerTriangular(C.factors)),
-            ldiv!(LowerTriangular(C.factors), permute!(B, C.piv)))
-    else
-        ldiv!(UpperTriangular(C.factors),
-            ldiv!(adjoint(UpperTriangular(C.factors)), permute!(B, C.piv)))
-    end
-    invpermute!(B, C.piv)
-end
-
-function ldiv!(C::CholeskyPivoted, B::AbstractMatrix)
-    n = size(C, 1)
-    for i in 1:size(B, 2)
-        permute!(view(B, 1:n, i), C.piv)
-    end
-    if C.uplo == 'L'
-        ldiv!(adjoint(LowerTriangular(C.factors)),
-            ldiv!(LowerTriangular(C.factors), B))
-    else
-        ldiv!(UpperTriangular(C.factors),
-            ldiv!(adjoint(UpperTriangular(C.factors)), B))
-    end
-    for i in 1:size(B, 2)
-        invpermute!(view(B, 1:n, i), C.piv)
-    end
-    B
-end
-
-function rdiv!(B::AbstractMatrix, C::Cholesky)
-    if C.uplo == 'L'
-        return rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))), LowerTriangular(C.factors))
-    else
-        return rdiv!(rdiv!(B, UpperTriangular(C.factors)), adjoint(UpperTriangular(C.factors)))
-    end
-end
-
-function LinearAlgebra.rdiv!(B::AbstractMatrix, C::CholeskyPivoted)
-    n = size(C, 2)
-    for i in 1:size(B, 1)
-        permute!(view(B, i, 1:n), C.piv)
-    end
-    if C.uplo == 'L'
-        rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))),
-            LowerTriangular(C.factors))
-    else
-        rdiv!(rdiv!(B, UpperTriangular(C.factors)),
-            adjoint(UpperTriangular(C.factors)))
-    end
-    for i in 1:size(B, 1)
-        invpermute!(view(B, i, 1:n), C.piv)
-    end
-    B
-end
-
-isposdef(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
-
-function det(C::Cholesky)
-    dd = one(real(eltype(C)))
-    @inbounds for i in 1:size(C.factors,1)
-        dd *= real(C.factors[i,i])^2
-    end
-    return dd
-end
-
-function logdet(C::Cholesky)
-    dd = zero(real(eltype(C)))
-    @inbounds for i in 1:size(C.factors,1)
-        dd += log(real(C.factors[i,i]))
-    end
-    dd + dd # instead of 2.0dd which can change the type
-end
-
-function det(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        return zero(real(eltype(C)))
-    else
-        dd = one(real(eltype(C)))
-        for i in 1:size(C.factors,1)
-            dd *= real(C.factors[i,i])^2
-        end
-        return dd
-    end
-end
-
-function logdet(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        return real(eltype(C))(-Inf)
-    else
-        dd = zero(real(eltype(C)))
-        for i in 1:size(C.factors,1)
-            dd += log(real(C.factors[i,i]))
-        end
-        return dd + dd # instead of 2.0dd which can change the type
-    end
-end
-
-logabsdet(C::Union{Cholesky, CholeskyPivoted}) = logdet(C), one(eltype(C)) # since C is p.s.d.
-
-inv!(C::Cholesky{<:BlasFloat,<:StridedMatrix}) =
-    copytri!(LAPACK.potri!(C.uplo, C.factors), C.uplo, true)
-
-inv(C::Cholesky{<:BlasFloat,<:StridedMatrix}) = inv!(copy(C))
-
-function inv(C::CholeskyPivoted{<:BlasFloat,<:StridedMatrix})
-    ipiv = invperm(C.piv)
-    copytri!(LAPACK.potri!(C.uplo, copy(C.factors)), C.uplo, true)[ipiv, ipiv]
-end
-
-function chkfullrank(C::CholeskyPivoted)
-    if C.rank < size(C.factors, 1)
-        throw(RankDeficientException(C.info))
-    end
-end
-
-rank(C::CholeskyPivoted) = C.rank
-
-"""
-    lowrankupdate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
-`CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses `O(n^2)`
-operations. The input factorization `C` is updated in place such that on exit `C == CC`.
-The vector `v` is destroyed during the computation.
-"""
-function lowrankupdate!(C::Cholesky, v::AbstractVector)
-    A = C.factors
-    n = length(v)
-    if size(C, 1) != n
-        throw(DimensionMismatch("updating vector must fit size of factorization"))
-    end
-    if C.uplo == 'U'
-        conj!(v)
-    end
-
-    for i = 1:n
-
-        # Compute Givens rotation
-        c, s, r = givensAlgorithm(A[i,i], v[i])
-
-        # Store new diagonal element
-        A[i,i] = r
-
-        # Update remaining elements in row/column
-        if C.uplo == 'U'
-            for j = i + 1:n
-                Aij = A[i,j]
-                vj  = v[j]
-                A[i,j]  =   c*Aij + s*vj
-                v[j]    = -s'*Aij + c*vj
-            end
-        else
-            for j = i + 1:n
-                Aji = A[j,i]
-                vj  = v[j]
-                A[j,i]  =   c*Aji + s*vj
-                v[j]    = -s'*Aji + c*vj
-            end
-        end
-    end
-    return C
-end
-
-"""
-    lowrankdowndate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
-`CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses `O(n^2)`
-operations. The input factorization `C` is updated in place such that on exit `C == CC`.
-The vector `v` is destroyed during the computation.
-"""
-function lowrankdowndate!(C::Cholesky, v::AbstractVector)
-    A = C.factors
-    n = length(v)
-    if size(C, 1) != n
-        throw(DimensionMismatch("updating vector must fit size of factorization"))
-    end
-    if C.uplo == 'U'
-        conj!(v)
-    end
-
-    for i = 1:n
-
-        Aii = A[i,i]
-
-        # Compute Givens rotation
-        s = conj(v[i]/Aii)
-        s2 = abs2(s)
-        if s2 > 1
-            throw(LinearAlgebra.PosDefException(i))
-        end
-        c = sqrt(1 - abs2(s))
-
-        # Store new diagonal element
-        A[i,i] = c*Aii
-
-        # Update remaining elements in row/column
-        if C.uplo == 'U'
-            for j = i + 1:n
-                vj = v[j]
-                Aij = (A[i,j] - s*vj)/c
-                A[i,j] = Aij
-                v[j] = -s'*Aij + c*vj
-            end
-        else
-            for j = i + 1:n
-                vj = v[j]
-                Aji = (A[j,i] - s*vj)/c
-                A[j,i] = Aji
-                v[j] = -s'*Aji + c*vj
-            end
-        end
-    end
-    return C
-end
-
-"""
-    lowrankupdate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
-then `CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses
-`O(n^2)` operations.
-"""
-lowrankupdate(C::Cholesky, v::AbstractVector) = lowrankupdate!(copy(C), copy(v))
-
-"""
-    lowrankdowndate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
-
-Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
-then `CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses
-`O(n^2)` operations.
-"""
-lowrankdowndate(C::Cholesky, v::AbstractVector) = lowrankdowndate!(copy(C), copy(v))
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
deleted file mode 100644
index b8a44159de8bd..0000000000000
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ /dev/null
@@ -1,1695 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Linear algebra functions for dense matrices in column major format
-
-## BLAS cutoff threshold constants
-
-#TODO const DOT_CUTOFF = 128
-const ASUM_CUTOFF = 32
-const NRM2_CUTOFF = 32
-
-# Generic cross-over constant based on benchmarking on a single thread with an i7 CPU @ 2.5GHz
-# L1 cache: 32K, L2 cache: 256K, L3 cache: 6144K
-# This constant should ideally be determined by the actual CPU cache size
-const ISONE_CUTOFF = 2^21 # 2M
-
-function isone(A::AbstractMatrix)
-    m, n = size(A)
-    m != n && return false # only square matrices can satisfy x == one(x)
-    if sizeof(A) < ISONE_CUTOFF
-        _isone_triacheck(A, m)
-    else
-        _isone_cachefriendly(A, m)
-    end
-end
-
-@inline function _isone_triacheck(A::AbstractMatrix, m::Int)
-    @inbounds for i in 1:m, j in i:m
-        if i == j
-            isone(A[i,i]) || return false
-        else
-            iszero(A[i,j]) && iszero(A[j,i]) || return false
-        end
-    end
-    return true
-end
-
-# Inner loop over rows to be friendly to the CPU cache
-@inline function _isone_cachefriendly(A::AbstractMatrix, m::Int)
-    @inbounds for i in 1:m, j in 1:m
-        if i == j
-            isone(A[i,i]) || return false
-        else
-            iszero(A[j,i]) || return false
-        end
-    end
-    return true
-end
-
-
-"""
-    isposdef!(A) -> Bool
-
-Test whether a matrix is positive definite (and Hermitian) by trying to perform a
-Cholesky factorization of `A`, overwriting `A` in the process.
-See also [`isposdef`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1. 2.; 2. 50.];
-
-julia> isposdef!(A)
-true
-
-julia> A
-2×2 Matrix{Float64}:
- 1.0  2.0
- 2.0  6.78233
-```
-"""
-isposdef!(A::AbstractMatrix) =
-    ishermitian(A) && isposdef(cholesky!(Hermitian(A); check = false))
-
-"""
-    isposdef(A) -> Bool
-
-Test whether a matrix is positive definite (and Hermitian) by trying to perform a
-Cholesky factorization of `A`.
-
-See also [`isposdef!`](@ref), [`cholesky`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2; 2 50]
-2×2 Matrix{Int64}:
- 1   2
- 2  50
-
-julia> isposdef(A)
-true
-```
-"""
-isposdef(A::AbstractMatrix) =
-    ishermitian(A) && isposdef(cholesky(Hermitian(A); check = false))
-isposdef(x::Number) = imag(x)==0 && real(x) > 0
-
-function norm(x::StridedVector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasFloat,TI<:Integer}
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    GC.@preserve x BLAS.nrm2(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx))
-end
-
-norm1(x::Union{Array{T},StridedVector{T}}) where {T<:BlasReal} =
-    length(x) < ASUM_CUTOFF ? generic_norm1(x) : BLAS.asum(x)
-
-norm2(x::Union{Array{T},StridedVector{T}}) where {T<:BlasFloat} =
-    length(x) < NRM2_CUTOFF ? generic_norm2(x) : BLAS.nrm2(x)
-
-"""
-    triu!(M, k::Integer)
-
-Return the upper triangle of `M` starting from the `k`th superdiagonal,
-overwriting `M` in the process.
-
-# Examples
-```jldoctest
-julia> M = [1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5]
-5×5 Matrix{Int64}:
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-
-julia> triu!(M, 1)
-5×5 Matrix{Int64}:
- 0  2  3  4  5
- 0  0  3  4  5
- 0  0  0  4  5
- 0  0  0  0  5
- 0  0  0  0  0
-```
-"""
-function triu!(M::AbstractMatrix, k::Integer)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j in 1:min(n, m + k)
-        for i in max(1, j - k + 1):m
-            M[i,j] = zero(M[i,j])
-        end
-    end
-    M
-end
-
-triu(M::Matrix, k::Integer) = triu!(copy(M), k)
-
-"""
-    tril!(M, k::Integer)
-
-Return the lower triangle of `M` starting from the `k`th superdiagonal, overwriting `M` in
-the process.
-
-# Examples
-```jldoctest
-julia> M = [1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5; 1 2 3 4 5]
-5×5 Matrix{Int64}:
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-
-julia> tril!(M, 2)
-5×5 Matrix{Int64}:
- 1  2  3  0  0
- 1  2  3  4  0
- 1  2  3  4  5
- 1  2  3  4  5
- 1  2  3  4  5
-```
-"""
-function tril!(M::AbstractMatrix, k::Integer)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j in max(1, k + 1):n
-        @inbounds for i in 1:min(j - k - 1, m)
-            M[i,j] = zero(M[i,j])
-        end
-    end
-    M
-end
-tril(M::Matrix, k::Integer) = tril!(copy(M), k)
-
-"""
-    fillband!(A::AbstractMatrix, x, l, u)
-
-Fill the band between diagonals `l` and `u` with the value `x`.
-"""
-function fillband!(A::AbstractMatrix{T}, x, l, u) where T
-    require_one_based_indexing(A)
-    m, n = size(A)
-    xT = convert(T, x)
-    for j in 1:n
-        for i in max(1,j-u):min(m,j-l)
-            @inbounds A[i, j] = xT
-        end
-    end
-    return A
-end
-
-diagind(m::Integer, n::Integer, k::Integer=0) =
-    k <= 0 ? range(1-k, step=m+1, length=min(m+k, n)) : range(k*m+1, step=m+1, length=min(m, n-k))
-
-"""
-    diagind(M, k::Integer=0)
-
-An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
-
-See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> diagind(A,-1)
-2:4:6
-```
-"""
-function diagind(A::AbstractMatrix, k::Integer=0)
-    require_one_based_indexing(A)
-    diagind(size(A,1), size(A,2), k)
-end
-
-"""
-    diag(M, k::Integer=0)
-
-The `k`th diagonal of a matrix, as a vector.
-
-See also [`diagm`](@ref), [`diagind`](@ref), [`Diagonal`](@ref), [`isdiag`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> diag(A,1)
-2-element Vector{Int64}:
- 2
- 6
-```
-"""
-diag(A::AbstractMatrix, k::Integer=0) = A[diagind(A,k)]
-
-"""
-    diagm(kv::Pair{<:Integer,<:AbstractVector}...)
-    diagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...)
-
-Construct a matrix from `Pair`s of diagonals and vectors.
-Vector `kv.second` will be placed on the `kv.first` diagonal.
-By default the matrix is square and its size is inferred
-from `kv`, but a non-square size `m`×`n` (padded with zeros as needed)
-can be specified by passing `m,n` as the first arguments.
-For repeated diagonal indices `kv.first` the values in the corresponding
-vectors `kv.second` will be added.
-
-`diagm` constructs a full matrix; if you want storage-efficient
-versions with fast arithmetic, see [`Diagonal`](@ref), [`Bidiagonal`](@ref)
-[`Tridiagonal`](@ref) and [`SymTridiagonal`](@ref).
-
-# Examples
-```jldoctest
-julia> diagm(1 => [1,2,3])
-4×4 Matrix{Int64}:
- 0  1  0  0
- 0  0  2  0
- 0  0  0  3
- 0  0  0  0
-
-julia> diagm(1 => [1,2,3], -1 => [4,5])
-4×4 Matrix{Int64}:
- 0  1  0  0
- 4  0  2  0
- 0  5  0  3
- 0  0  0  0
-
-julia> diagm(1 => [1,2,3], 1 => [1,2,3])
-4×4 Matrix{Int64}:
- 0  2  0  0
- 0  0  4  0
- 0  0  0  6
- 0  0  0  0
-```
-"""
-diagm(kv::Pair{<:Integer,<:AbstractVector}...) = _diagm(nothing, kv...)
-diagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...) = _diagm((Int(m),Int(n)), kv...)
-function _diagm(size, kv::Pair{<:Integer,<:AbstractVector}...)
-    A = diagm_container(size, kv...)
-    for p in kv
-        inds = diagind(A, p.first)
-        for (i, val) in enumerate(p.second)
-            A[inds[i]] += val
-        end
-    end
-    return A
-end
-function diagm_size(size::Nothing, kv::Pair{<:Integer,<:AbstractVector}...)
-    mnmax = mapreduce(x -> length(x.second) + abs(Int(x.first)), max, kv; init=0)
-    return mnmax, mnmax
-end
-function diagm_size(size::Tuple{Int,Int}, kv::Pair{<:Integer,<:AbstractVector}...)
-    mmax = mapreduce(x -> length(x.second) - min(0,Int(x.first)), max, kv; init=0)
-    nmax = mapreduce(x -> length(x.second) + max(0,Int(x.first)), max, kv; init=0)
-    m, n = size
-    (m ≥ mmax && n ≥ nmax) || throw(DimensionMismatch("invalid size=$size"))
-    return m, n
-end
-function diagm_container(size, kv::Pair{<:Integer,<:AbstractVector}...)
-    T = promote_type(map(x -> eltype(x.second), kv)...)
-    # For some type `T`, `zero(T)` is not a `T` and `zeros(T, ...)` fails.
-    U = promote_type(T, typeof(zero(T)))
-    return zeros(U, diagm_size(size, kv...)...)
-end
-diagm_container(size, kv::Pair{<:Integer,<:BitVector}...) =
-    falses(diagm_size(size, kv...)...)
-
-"""
-    diagm(v::AbstractVector)
-    diagm(m::Integer, n::Integer, v::AbstractVector)
-
-Construct a matrix with elements of the vector as diagonal elements.
-By default, the matrix is square and its size is given by
-`length(v)`, but a non-square size `m`×`n` can be specified
-by passing `m,n` as the first arguments.
-
-# Examples
-```jldoctest
-julia> diagm([1,2,3])
-3×3 Matrix{Int64}:
- 1  0  0
- 0  2  0
- 0  0  3
-```
-"""
-diagm(v::AbstractVector) = diagm(0 => v)
-diagm(m::Integer, n::Integer, v::AbstractVector) = diagm(m, n, 0 => v)
-
-function tr(A::Matrix{T}) where T
-    n = checksquare(A)
-    t = zero(T)
-    @inbounds @simd for i in 1:n
-        t += A[i,i]
-    end
-    t
-end
-
-_kronsize(A::AbstractMatrix, B::AbstractMatrix) = map(*, size(A), size(B))
-_kronsize(A::AbstractMatrix, B::AbstractVector) = (size(A, 1)*length(B), size(A, 2))
-_kronsize(A::AbstractVector, B::AbstractMatrix) = (length(A)*size(B, 1), size(B, 2))
-
-"""
-    kron!(C, A, B)
-
-Computes the Kronecker product of `A` and `B` and stores the result in `C`,
-overwriting the existing content of `C`. This is the in-place version of [`kron`](@ref).
-
-!!! compat "Julia 1.6"
-    This function requires Julia 1.6 or later.
-"""
-function kron!(C::AbstractVecOrMat, A::AbstractVecOrMat, B::AbstractVecOrMat)
-    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
-    _kron!(C, A, B)
-end
-function kron!(c::AbstractVector, a::AbstractVector, b::AbstractVector)
-    length(c) == length(a) * length(b) || throw(DimensionMismatch("kron!"))
-    m = firstindex(c)
-    @inbounds for i in eachindex(a)
-        ai = a[i]
-        for k in eachindex(b)
-            c[m] = ai*b[k]
-            m += 1
-        end
-    end
-    return c
-end
-kron!(c::AbstractVecOrMat, a::AbstractVecOrMat, b::Number) = mul!(c, a, b)
-kron!(c::AbstractVecOrMat, a::Number, b::AbstractVecOrMat) = mul!(c, a, b)
-
-function _kron!(C, A::AbstractMatrix, B::AbstractMatrix)
-    m = firstindex(C)
-    @inbounds for j in axes(A,2), l in axes(B,2), i in axes(A,1)
-        Aij = A[i,j]
-        for k in axes(B,1)
-            C[m] = Aij*B[k,l]
-            m += 1
-        end
-    end
-    return C
-end
-function _kron!(C, A::AbstractMatrix, b::AbstractVector)
-    m = firstindex(C)
-    @inbounds for j in axes(A,2), i in axes(A,1)
-        Aij = A[i,j]
-        for k in eachindex(b)
-            C[m] = Aij*b[k]
-            m += 1
-        end
-    end
-    return C
-end
-function _kron!(C, a::AbstractVector, B::AbstractMatrix)
-    m = firstindex(C)
-    @inbounds for l in axes(B,2), i in eachindex(a)
-        ai = a[i]
-        for k in axes(B,1)
-            C[m] = ai*B[k,l]
-            m += 1
-        end
-    end
-    return C
-end
-
-"""
-    kron(A, B)
-
-Computes the Kronecker product of two vectors, matrices or numbers.
-
-For real vectors `v` and `w`, the Kronecker product is related to the outer product by
-`kron(v,w) == vec(w * transpose(v))` or
-`w * transpose(v) == reshape(kron(v,w), (length(w), length(v)))`.
-Note how the ordering of `v` and `w` differs on the left and right
-of these expressions (due to column-major storage).
-For complex vectors, the outer product `w * v'` also differs by conjugation of `v`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> B = [im 1; 1 -im]
-2×2 Matrix{Complex{Int64}}:
- 0+1im  1+0im
- 1+0im  0-1im
-
-julia> kron(A, B)
-4×4 Matrix{Complex{Int64}}:
- 0+1im  1+0im  0+2im  2+0im
- 1+0im  0-1im  2+0im  0-2im
- 0+3im  3+0im  0+4im  4+0im
- 3+0im  0-3im  4+0im  0-4im
-
-julia> v = [1, 2]; w = [3, 4, 5];
-
-julia> w*transpose(v)
-3×2 Matrix{Int64}:
- 3   6
- 4   8
- 5  10
-
-julia> reshape(kron(v,w), (length(w), length(v)))
-3×2 Matrix{Int64}:
- 3   6
- 4   8
- 5  10
-```
-"""
-function kron(A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}) where {T,S}
-    R = Matrix{promote_op(*,T,S)}(undef, _kronsize(A, B))
-    return kron!(R, A, B)
-end
-function kron(a::AbstractVector{T}, b::AbstractVector{S}) where {T,S}
-    c = Vector{promote_op(*,T,S)}(undef, length(a)*length(b))
-    return kron!(c, a, b)
-end
-kron(a::Number, b::Union{Number, AbstractVecOrMat}) = a * b
-kron(a::AbstractVecOrMat, b::Number) = a * b
-kron(a::AdjointAbsVec, b::AdjointAbsVec) = adjoint(kron(adjoint(a), adjoint(b)))
-kron(a::AdjOrTransAbsVec, b::AdjOrTransAbsVec) = transpose(kron(transpose(a), transpose(b)))
-
-# Matrix power
-(^)(A::AbstractMatrix, p::Integer) = p < 0 ? power_by_squaring(inv(A), -p) : power_by_squaring(A, p)
-function (^)(A::AbstractMatrix{T}, p::Integer) where T<:Integer
-    # make sure that e.g. [1 1;1 0]^big(3)
-    # gets promotes in a similar way as 2^big(3)
-    TT = promote_op(^, T, typeof(p))
-    return power_by_squaring(convert(AbstractMatrix{TT}, A), p)
-end
-function integerpow(A::AbstractMatrix{T}, p) where T
-    TT = promote_op(^, T, typeof(p))
-    return (TT == T ? A : convert(AbstractMatrix{TT}, A))^Integer(p)
-end
-function schurpow(A::AbstractMatrix, p)
-    if istriu(A)
-        # Integer part
-        retmat = A ^ floor(p)
-        # Real part
-        if p - floor(p) == 0.5
-            # special case: A^0.5 === sqrt(A)
-            retmat = retmat * sqrt(A)
-        else
-            retmat = retmat * powm!(UpperTriangular(float.(A)), real(p - floor(p)))
-        end
-    else
-        S,Q,d = Schur{Complex}(schur(A))
-        # Integer part
-        R = S ^ floor(p)
-        # Real part
-        if p - floor(p) == 0.5
-            # special case: A^0.5 === sqrt(A)
-            R = R * sqrt(S)
-        else
-            R = R * powm!(UpperTriangular(float.(S)), real(p - floor(p)))
-        end
-        retmat = Q * R * Q'
-    end
-
-    # if A has nonpositive real eigenvalues, retmat is a nonprincipal matrix power.
-    if isreal(retmat)
-        return real(retmat)
-    else
-        return retmat
-    end
-end
-function (^)(A::AbstractMatrix{T}, p::Real) where T
-    n = checksquare(A)
-
-    # Quicker return if A is diagonal
-    if isdiag(A)
-        TT = promote_op(^, T, typeof(p))
-        retmat = copymutable_oftype(A, TT)
-        for i in 1:n
-            retmat[i, i] = retmat[i, i] ^ p
-        end
-        return retmat
-    end
-
-    # For integer powers, use power_by_squaring
-    isinteger(p) && return integerpow(A, p)
-
-    # If possible, use diagonalization
-    if issymmetric(A)
-        return (Symmetric(A)^p)
-    end
-    if ishermitian(A)
-        return (Hermitian(A)^p)
-    end
-
-    # Otherwise, use Schur decomposition
-    return schurpow(A, p)
-end
-
-"""
-    ^(A::AbstractMatrix, p::Number)
-
-Matrix power, equivalent to ``\\exp(p\\log(A))``
-
-# Examples
-```jldoctest
-julia> [1 2; 0 3]^3
-2×2 Matrix{Int64}:
- 1  26
- 0  27
-```
-"""
-(^)(A::AbstractMatrix, p::Number) = exp(p*log(A))
-
-# Matrix exponential
-
-"""
-    exp(A::AbstractMatrix)
-
-Compute the matrix exponential of `A`, defined by
-
-```math
-e^A = \\sum_{n=0}^{\\infty} \\frac{A^n}{n!}.
-```
-
-For symmetric or Hermitian `A`, an eigendecomposition ([`eigen`](@ref)) is
-used, otherwise the scaling and squaring algorithm (see [^H05]) is chosen.
-
-[^H05]: Nicholas J. Higham, "The squaring and scaling method for the matrix exponential revisited", SIAM Journal on Matrix Analysis and Applications, 26(4), 2005, 1179-1193. [doi:10.1137/090768539](https://doi.org/10.1137/090768539)
-
-# Examples
-```jldoctest
-julia> A = Matrix(1.0I, 2, 2)
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-
-julia> exp(A)
-2×2 Matrix{Float64}:
- 2.71828  0.0
- 0.0      2.71828
-```
-"""
-exp(A::AbstractMatrix) = exp!(copy_similar(A, eigtype(eltype(A))))
-exp(A::AdjointAbsMat) = adjoint(exp(parent(A)))
-exp(A::TransposeAbsMat) = transpose(exp(parent(A)))
-
-"""
-    cis(A::AbstractMatrix)
-
-More efficient method for `exp(im*A)` of square matrix `A`
-(especially if `A` is `Hermitian` or real-`Symmetric`).
-
-See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref).
-
-!!! compat "Julia 1.7"
-    Support for using `cis` with matrices was added in Julia 1.7.
-
-# Examples
-```jldoctest
-julia> cis([π 0; 0 π]) ≈ -I
-true
-```
-"""
-cis(A::AbstractMatrix) = exp(im * A)  # fallback
-cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
-
-exp_maybe_inplace(A::StridedMatrix{<:Union{ComplexF32, ComplexF64}}) = exp!(A)
-exp_maybe_inplace(A) = exp(A)
-
-"""
-    ^(b::Number, A::AbstractMatrix)
-
-Matrix exponential, equivalent to ``\\exp(\\log(b)A)``.
-
-!!! compat "Julia 1.1"
-    Support for raising `Irrational` numbers (like `ℯ`)
-    to a matrix was added in Julia 1.1.
-
-# Examples
-```jldoctest
-julia> 2^[1 2; 0 3]
-2×2 Matrix{Float64}:
- 2.0  6.0
- 0.0  8.0
-
-julia> ℯ^[1 2; 0 3]
-2×2 Matrix{Float64}:
- 2.71828  17.3673
- 0.0      20.0855
-```
-"""
-Base.:^(b::Number, A::AbstractMatrix) = exp!(log(b)*A)
-# method for ℯ to explicitly elide the log(b) multiplication
-Base.:^(::Irrational{:ℯ}, A::AbstractMatrix) = exp(A)
-
-## Destructive matrix exponential using algorithm from Higham, 2008,
-## "Functions of Matrices: Theory and Computation", SIAM
-function exp!(A::StridedMatrix{T}) where T<:BlasFloat
-    n = checksquare(A)
-    if ishermitian(A)
-        return copytri!(parent(exp(Hermitian(A))), 'U', true)
-    end
-    ilo, ihi, scale = LAPACK.gebal!('B', A)    # modifies A
-    nA   = opnorm(A, 1)
-    ## For sufficiently small nA, use lower order Padé-Approximations
-    if (nA <= 2.1)
-        if nA > 0.95
-            C = T[17643225600.,8821612800.,2075673600.,302702400.,
-                     30270240.,   2162160.,    110880.,     3960.,
-                           90.,         1.]
-        elseif nA > 0.25
-            C = T[17297280.,8648640.,1995840.,277200.,
-                     25200.,   1512.,     56.,     1.]
-        elseif nA > 0.015
-            C = T[30240.,15120.,3360.,
-                    420.,   30.,   1.]
-        else
-            C = T[120.,60.,12.,1.]
-        end
-        A2 = A * A
-        # Compute U and V: Even/odd terms in Padé numerator & denom
-        # Expansion of k=1 in for loop
-        P = A2
-        U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P
-        V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
-        for k in 2:(div(length(C), 2) - 1)
-            P *= A2
-            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
-            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
-        end
-
-        U = A * U
-
-        # Padé approximant:  (V-U)\(V+U)
-        tmp1, tmp2 = A, A2 # Reuse already allocated arrays
-        tmp1 .= V .- U
-        tmp2 .= V .+ U
-        X = LAPACK.gesv!(tmp1, tmp2)[1]
-    else
-        s  = log2(nA/5.4)               # power of 2 later reversed by squaring
-        if s > 0
-            si = ceil(Int,s)
-            A ./= convert(T,2^si)
-        end
-        CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
-                1187353796428800.,  129060195264000.,  10559470521600.,
-                    670442572800.,      33522128640.,      1323241920.,
-                        40840800.,           960960.,           16380.,
-                             182.,                1.]
-        A2 = A * A
-        A4 = A2 * A2
-        A6 = A2 * A4
-        tmp1, tmp2 = similar(A6), similar(A6)
-
-        # Allocation economical version of:
-        # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
-        #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
-        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
-        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
-        mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
-        U = mul!(tmp2, A6, tmp1, true, true)
-        U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
-
-        # Allocation economical version of:
-        # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
-        #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
-        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
-        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
-        mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
-        V = mul!(tmp2, A6, tmp1, true, true)
-
-        tmp1 .= V .+ U
-        tmp2 .= V .- U # tmp2 already contained V but this seems more readable
-        X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
-
-        if s > 0
-            # Repeated squaring to compute X = r_13^(2^si)
-            for t=1:si
-                mul!(tmp2, X, X)
-                X, tmp2 = tmp2, X
-            end
-        end
-    end
-
-    # Undo the balancing
-    for j = ilo:ihi
-        scj = scale[j]
-        for i = 1:n
-            X[j,i] *= scj
-        end
-        for i = 1:n
-            X[i,j] /= scj
-        end
-    end
-
-    if ilo > 1       # apply lower permutations in reverse order
-        for j in (ilo-1):-1:1; rcswap!(j, Int(scale[j]), X) end
-    end
-    if ihi < n       # apply upper permutations in forward order
-        for j in (ihi+1):n;    rcswap!(j, Int(scale[j]), X) end
-    end
-    X
-end
-
-## Swap rows i and j and columns i and j in X
-function rcswap!(i::Integer, j::Integer, X::AbstractMatrix{<:Number})
-    for k = 1:size(X,1)
-        X[k,i], X[k,j] = X[k,j], X[k,i]
-    end
-    for k = 1:size(X,2)
-        X[i,k], X[j,k] = X[j,k], X[i,k]
-    end
-end
-
-"""
-    log(A::AbstractMatrix)
-
-If `A` has no negative real eigenvalue, compute the principal matrix logarithm of `A`, i.e.
-the unique matrix ``X`` such that ``e^X = A`` and ``-\\pi < Im(\\lambda) < \\pi`` for all
-the eigenvalues ``\\lambda`` of ``X``. If `A` has nonpositive eigenvalues, a nonprincipal
-matrix function is returned whenever possible.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
-used, if `A` is triangular an improved version of the inverse scaling and squaring method is
-employed (see [^AH12] and [^AHR13]). If `A` is real with no negative eigenvalues, then
-the real Schur form is computed. Otherwise, the complex Schur form is computed. Then
-the upper (quasi-)triangular algorithm in [^AHR13] is used on the upper (quasi-)triangular
-factor.
-
-[^AH12]: Awad H. Al-Mohy and Nicholas J. Higham, "Improved inverse  scaling and squaring algorithms for the matrix logarithm", SIAM Journal on Scientific Computing, 34(4), 2012, C153-C169. [doi:10.1137/110852553](https://doi.org/10.1137/110852553)
-
-[^AHR13]: Awad H. Al-Mohy, Nicholas J. Higham and Samuel D. Relton, "Computing the Fréchet derivative of the matrix logarithm and estimating the condition number", SIAM Journal on Scientific Computing, 35(4), 2013, C394-C410. [doi:10.1137/120885991](https://doi.org/10.1137/120885991)
-
-# Examples
-```jldoctest
-julia> A = Matrix(2.7182818*I, 2, 2)
-2×2 Matrix{Float64}:
- 2.71828  0.0
- 0.0      2.71828
-
-julia> log(A)
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-```
-"""
-function log(A::AbstractMatrix)
-    # If possible, use diagonalization
-    if ishermitian(A)
-        logHermA = log(Hermitian(A))
-        return ishermitian(logHermA) ? copytri!(parent(logHermA), 'U', true) : parent(logHermA)
-    elseif istriu(A)
-        return triu!(parent(log(UpperTriangular(A))))
-    elseif isreal(A)
-        SchurF = schur(real(A))
-        if istriu(SchurF.T)
-            logA = SchurF.Z * log(UpperTriangular(SchurF.T)) * SchurF.Z'
-        else
-            # real log exists whenever all eigenvalues are positive
-            is_log_real = !any(x -> isreal(x) && real(x) ≤ 0, SchurF.values)
-            if is_log_real
-                logA = SchurF.Z * log_quasitriu(SchurF.T) * SchurF.Z'
-            else
-                SchurS = Schur{Complex}(SchurF)
-                logA = SchurS.Z * log(UpperTriangular(SchurS.T)) * SchurS.Z'
-            end
-        end
-        return eltype(A) <: Complex ? complex(logA) : logA
-    else
-        SchurF = schur(A)
-        return SchurF.vectors * log(UpperTriangular(SchurF.T)) * SchurF.vectors'
-    end
-end
-
-log(A::AdjointAbsMat) = adjoint(log(parent(A)))
-log(A::TransposeAbsMat) = transpose(log(parent(A)))
-
-"""
-    sqrt(A::AbstractMatrix)
-
-If `A` has no negative real eigenvalues, compute the principal matrix square root of `A`,
-that is the unique matrix ``X`` with eigenvalues having positive real part such that
-``X^2 = A``. Otherwise, a nonprincipal square root is returned.
-
-If `A` is real-symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
-used to compute the square root.   For such matrices, eigenvalues λ that
-appear to be slightly negative due to roundoff errors are treated as if they were zero.
-More precisely, matrices with all eigenvalues `≥ -rtol*(max |λ|)` are treated as semidefinite
-(yielding a Hermitian square root), with negative eigenvalues taken to be zero.
-`rtol` is a keyword argument to `sqrt` (in the Hermitian/real-symmetric case only) that
-defaults to machine precision scaled by `size(A,1)`.
-
-Otherwise, the square root is determined by means of the
-Björck-Hammarling method [^BH83], which computes the complex Schur form ([`schur`](@ref))
-and then the complex square root of the triangular factor.
-If a real square root exists, then an extension of this method [^H87] that computes the real
-Schur form and then the real square root of the quasi-triangular factor is instead used.
-
-[^BH83]:
-
-    Åke Björck and Sven Hammarling, "A Schur method for the square root of a matrix",
-    Linear Algebra and its Applications, 52-53, 1983, 127-140.
-    [doi:10.1016/0024-3795(83)80010-X](https://doi.org/10.1016/0024-3795(83)80010-X)
-
-[^H87]:
-
-    Nicholas J. Higham, "Computing real square roots of a real matrix",
-    Linear Algebra and its Applications, 88-89, 1987, 405-430.
-    [doi:10.1016/0024-3795(87)90118-2](https://doi.org/10.1016/0024-3795(87)90118-2)
-
-# Examples
-```jldoctest
-julia> A = [4 0; 0 4]
-2×2 Matrix{Int64}:
- 4  0
- 0  4
-
-julia> sqrt(A)
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  2.0
-```
-"""
-sqrt(::AbstractMatrix)
-
-function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}}
-    if checksquare(A) == 0
-        return copy(A)
-    elseif ishermitian(A)
-        sqrtHermA = sqrt(Hermitian(A))
-        return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA)
-    elseif istriu(A)
-        return triu!(parent(sqrt(UpperTriangular(A))))
-    elseif isreal(A)
-        SchurF = schur(real(A))
-        if istriu(SchurF.T)
-            sqrtA = SchurF.Z * sqrt(UpperTriangular(SchurF.T)) * SchurF.Z'
-        else
-            # real sqrt exists whenever no eigenvalues are negative
-            is_sqrt_real = !any(x -> isreal(x) && real(x) < 0, SchurF.values)
-            # sqrt_quasitriu uses LAPACK functions for non-triu inputs
-            if typeof(sqrt(zero(T))) <: BlasFloat && is_sqrt_real
-                sqrtA = SchurF.Z * sqrt_quasitriu(SchurF.T) * SchurF.Z'
-            else
-                SchurS = Schur{Complex}(SchurF)
-                sqrtA = SchurS.Z * sqrt(UpperTriangular(SchurS.T)) * SchurS.Z'
-            end
-        end
-        return eltype(A) <: Complex ? complex(sqrtA) : sqrtA
-    else
-        SchurF = schur(A)
-        return SchurF.vectors * sqrt(UpperTriangular(SchurF.T)) * SchurF.vectors'
-    end
-end
-
-sqrt(A::AdjointAbsMat) = adjoint(sqrt(parent(A)))
-sqrt(A::TransposeAbsMat) = transpose(sqrt(parent(A)))
-
-function inv(A::StridedMatrix{T}) where T
-    checksquare(A)
-    if istriu(A)
-        Ai = triu!(parent(inv(UpperTriangular(A))))
-    elseif istril(A)
-        Ai = tril!(parent(inv(LowerTriangular(A))))
-    else
-        Ai = inv!(lu(A))
-        Ai = convert(typeof(parent(Ai)), Ai)
-    end
-    return Ai
-end
-
-"""
-    cos(A::AbstractMatrix)
-
-Compute the matrix cosine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the cosine. Otherwise, the cosine is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> cos(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
-  0.291927  -0.708073
- -0.708073   0.291927
-```
-"""
-function cos(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        return copytri!(parent(cos(Symmetric(A))), 'U')
-    end
-    T = complex(float(eltype(A)))
-    return real(exp!(T.(im .* A)))
-end
-function cos(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        return copytri!(parent(cos(Hermitian(A))), 'U', true)
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    @. X = (X + $exp!(T(-im*A))) / 2
-    return X
-end
-
-"""
-    sin(A::AbstractMatrix)
-
-Compute the matrix sine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the sine. Otherwise, the sine is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> sin(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
- 0.454649  0.454649
- 0.454649  0.454649
-```
-"""
-function sin(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        return copytri!(parent(sin(Symmetric(A))), 'U')
-    end
-    T = complex(float(eltype(A)))
-    return imag(exp!(T.(im .* A)))
-end
-function sin(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        return copytri!(parent(sin(Hermitian(A))), 'U', true)
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    Y = exp!(T.(.-im .* A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i]/2, Y[i]/2
-        X[i] = Complex(imag(x)-imag(y), real(y)-real(x))
-    end
-    return X
-end
-
-"""
-    sincos(A::AbstractMatrix)
-
-Compute the matrix sine and cosine of a square matrix `A`.
-
-# Examples
-```jldoctest
-julia> S, C = sincos(fill(1.0, (2,2)));
-
-julia> S
-2×2 Matrix{Float64}:
- 0.454649  0.454649
- 0.454649  0.454649
-
-julia> C
-2×2 Matrix{Float64}:
-  0.291927  -0.708073
- -0.708073   0.291927
-```
-"""
-function sincos(A::AbstractMatrix{<:Real})
-    if issymmetric(A)
-        symsinA, symcosA = sincos(Symmetric(A))
-        sinA = copytri!(parent(symsinA), 'U')
-        cosA = copytri!(parent(symcosA), 'U')
-        return sinA, cosA
-    end
-    T = complex(float(eltype(A)))
-    c, s = reim(exp!(T.(im .* A)))
-    return s, c
-end
-function sincos(A::AbstractMatrix{<:Complex})
-    if ishermitian(A)
-        hermsinA, hermcosA = sincos(Hermitian(A))
-        sinA = copytri!(parent(hermsinA), 'U', true)
-        cosA = copytri!(parent(hermcosA), 'U', true)
-        return sinA, cosA
-    end
-    T = complex(float(eltype(A)))
-    X = exp!(T.(im .* A))
-    Y = exp!(T.(.-im .* A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i]/2, Y[i]/2
-        X[i] = Complex(imag(x)-imag(y), real(y)-real(x))
-        Y[i] = x+y
-    end
-    return X, Y
-end
-
-"""
-    tan(A::AbstractMatrix)
-
-Compute the matrix tangent of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the tangent. Otherwise, the tangent is determined by calling [`exp`](@ref).
-
-# Examples
-```jldoctest
-julia> tan(fill(1.0, (2,2)))
-2×2 Matrix{Float64}:
- -1.09252  -1.09252
- -1.09252  -1.09252
-```
-"""
-function tan(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(tan(Hermitian(A))), 'U', true)
-    end
-    S, C = sincos(A)
-    S /= C
-    return S
-end
-
-"""
-    cosh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic cosine of a square matrix `A`.
-"""
-function cosh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(cosh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    @. X = (X + $exp!(float(-A))) / 2
-    return X
-end
-
-"""
-    sinh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic sine of a square matrix `A`.
-"""
-function sinh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(sinh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    @. X = (X - $exp!(float(-A))) / 2
-    return X
-end
-
-"""
-    tanh(A::AbstractMatrix)
-
-Compute the matrix hyperbolic tangent of a square matrix `A`.
-"""
-function tanh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(tanh(Hermitian(A))), 'U', true)
-    end
-    X = exp(A)
-    Y = exp!(float.(.-A))
-    @inbounds for i in eachindex(X)
-        x, y = X[i], Y[i]
-        X[i] = x - y
-        Y[i] = x + y
-    end
-    X /= Y
-    return X
-end
-
-"""
-    acos(A::AbstractMatrix)
-
-Compute the inverse matrix cosine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse cosine. Otherwise, the inverse cosine is determined by using
-[`log`](@ref) and [`sqrt`](@ref).  For the theory and logarithmic formulas used to compute
-this function, see [^AH16_1].
-
-[^AH16_1]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> acos(cos([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5-8.32667e-17im  0.1+0.0im
- -0.2+2.63678e-16im  0.3-3.46945e-16im
-```
-"""
-function acos(A::AbstractMatrix)
-    if ishermitian(A)
-        acosHermA = acos(Hermitian(A))
-        return isa(acosHermA, Hermitian) ? copytri!(parent(acosHermA), 'U', true) : parent(acosHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(-im * log(U + im * sqrt(I - U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    asin(A::AbstractMatrix)
-
-Compute the inverse matrix sine of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse sine. Otherwise, the inverse sine is determined by using [`log`](@ref)
-and [`sqrt`](@ref).  For the theory and logarithmic formulas used to compute this function,
-see [^AH16_2].
-
-[^AH16_2]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> asin(sin([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5-4.16334e-17im  0.1-5.55112e-17im
- -0.2+9.71445e-17im  0.3-1.249e-16im
-```
-"""
-function asin(A::AbstractMatrix)
-    if ishermitian(A)
-        asinHermA = asin(Hermitian(A))
-        return isa(asinHermA, Hermitian) ? copytri!(parent(asinHermA), 'U', true) : parent(asinHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(-im * log(im * U + sqrt(I - U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    atan(A::AbstractMatrix)
-
-Compute the inverse matrix tangent of a square matrix `A`.
-
-If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is used to
-compute the inverse tangent. Otherwise, the inverse tangent is determined by using
-[`log`](@ref).  For the theory and logarithmic formulas used to compute this function, see
-[^AH16_3].
-
-[^AH16_3]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-
-# Examples
-```julia-repl
-julia> atan(tan([0.5 0.1; -0.2 0.3]))
-2×2 Matrix{ComplexF64}:
-  0.5+1.38778e-17im  0.1-2.77556e-17im
- -0.2+6.93889e-17im  0.3-4.16334e-17im
-```
-"""
-function atan(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(atan(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = im * UpperTriangular(SchurF.T)
-    R = triu!(parent(log((I + U) / (I - U)) / 2im))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    acosh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix cosine of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_4].
-
-[^AH16_4]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function acosh(A::AbstractMatrix)
-    if ishermitian(A)
-        acoshHermA = acosh(Hermitian(A))
-        return isa(acoshHermA, Hermitian) ? copytri!(parent(acoshHermA), 'U', true) : parent(acoshHermA)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log(U + sqrt(U - I) * sqrt(U + I))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    asinh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix sine of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_5].
-
-[^AH16_5]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function asinh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(asinh(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log(U + sqrt(I + U^2))))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-"""
-    atanh(A::AbstractMatrix)
-
-Compute the inverse hyperbolic matrix tangent of a square matrix `A`.  For the theory and
-logarithmic formulas used to compute this function, see [^AH16_6].
-
-[^AH16_6]: Mary Aprahamian and Nicholas J. Higham, "Matrix Inverse Trigonometric and Inverse Hyperbolic Functions: Theory and Algorithms", MIMS EPrint: 2016.4. [https://doi.org/10.1137/16M1057577](https://doi.org/10.1137/16M1057577)
-"""
-function atanh(A::AbstractMatrix)
-    if ishermitian(A)
-        return copytri!(parent(atanh(Hermitian(A))), 'U', true)
-    end
-    SchurF = Schur{Complex}(schur(A))
-    U = UpperTriangular(SchurF.T)
-    R = triu!(parent(log((I + U) / (I - U)) / 2))
-    return SchurF.Z * R * SchurF.Z'
-end
-
-for (finv, f, finvh, fh, fn) in ((:sec, :cos, :sech, :cosh, "secant"),
-                                 (:csc, :sin, :csch, :sinh, "cosecant"),
-                                 (:cot, :tan, :coth, :tanh, "cotangent"))
-    name = string(finv)
-    hname = string(finvh)
-    @eval begin
-        @doc """
-            $($name)(A::AbstractMatrix)
-
-        Compute the matrix $($fn) of a square matrix `A`.
-        """ ($finv)(A::AbstractMatrix{T}) where {T} = inv(($f)(A))
-        @doc """
-            $($hname)(A::AbstractMatrix)
-
-        Compute the matrix hyperbolic $($fn) of square matrix `A`.
-        """ ($finvh)(A::AbstractMatrix{T}) where {T} = inv(($fh)(A))
-    end
-end
-
-for (tfa, tfainv, hfa, hfainv, fn) in ((:asec, :acos, :asech, :acosh, "secant"),
-                                       (:acsc, :asin, :acsch, :asinh, "cosecant"),
-                                       (:acot, :atan, :acoth, :atanh, "cotangent"))
-    tname = string(tfa)
-    hname = string(hfa)
-    @eval begin
-        @doc """
-            $($tname)(A::AbstractMatrix)
-        Compute the inverse matrix $($fn) of `A`. """ ($tfa)(A::AbstractMatrix{T}) where {T} = ($tfainv)(inv(A))
-        @doc """
-            $($hname)(A::AbstractMatrix)
-        Compute the inverse matrix hyperbolic $($fn) of `A`. """ ($hfa)(A::AbstractMatrix{T}) where {T} = ($hfainv)(inv(A))
-    end
-end
-
-"""
-    factorize(A)
-
-Compute a convenient factorization of `A`, based upon the type of the input matrix.
-`factorize` checks `A` to see if it is symmetric/triangular/etc. if `A` is passed
-as a generic matrix. `factorize` checks every element of `A` to verify/rule out
-each property. It will short-circuit as soon as it can rule out symmetry/triangular
-structure. The return value can be reused for efficient solving of multiple
-systems. For example: `A=factorize(A); x=A\\b; y=A\\C`.
-
-| Properties of `A`          | type of factorization                          |
-|:---------------------------|:-----------------------------------------------|
-| Positive-definite          | Cholesky (see [`cholesky`](@ref))  |
-| Dense Symmetric/Hermitian  | Bunch-Kaufman (see [`bunchkaufman`](@ref)) |
-| Sparse Symmetric/Hermitian | LDLt (see [`ldlt`](@ref))      |
-| Triangular                 | Triangular                                     |
-| Diagonal                   | Diagonal                                       |
-| Bidiagonal                 | Bidiagonal                                     |
-| Tridiagonal                | LU (see [`lu`](@ref))            |
-| Symmetric real tridiagonal | LDLt (see [`ldlt`](@ref))      |
-| General square             | LU (see [`lu`](@ref))            |
-| General non-square         | QR (see [`qr`](@ref))            |
-
-If `factorize` is called on a Hermitian positive-definite matrix, for instance, then `factorize`
-will return a Cholesky factorization.
-
-# Examples
-```jldoctest
-julia> A = Array(Bidiagonal(fill(1.0, (5, 5)), :U))
-5×5 Matrix{Float64}:
- 1.0  1.0  0.0  0.0  0.0
- 0.0  1.0  1.0  0.0  0.0
- 0.0  0.0  1.0  1.0  0.0
- 0.0  0.0  0.0  1.0  1.0
- 0.0  0.0  0.0  0.0  1.0
-
-julia> factorize(A) # factorize will check to see that A is already factorized
-5×5 Bidiagonal{Float64, Vector{Float64}}:
- 1.0  1.0   ⋅    ⋅    ⋅
-  ⋅   1.0  1.0   ⋅    ⋅
-  ⋅    ⋅   1.0  1.0   ⋅
-  ⋅    ⋅    ⋅   1.0  1.0
-  ⋅    ⋅    ⋅    ⋅   1.0
-```
-This returns a `5×5 Bidiagonal{Float64}`, which can now be passed to other linear algebra functions
-(e.g. eigensolvers) which will use specialized methods for `Bidiagonal` types.
-"""
-function factorize(A::AbstractMatrix{T}) where T
-    m, n = size(A)
-    if m == n
-        if m == 1 return A[1] end
-        utri    = true
-        utri1   = true
-        herm    = true
-        sym     = true
-        for j = 1:n-1, i = j+1:m
-            if utri1
-                if A[i,j] != 0
-                    utri1 = i == j + 1
-                    utri = false
-                end
-            end
-            if sym
-                sym &= A[i,j] == A[j,i]
-            end
-            if herm
-                herm &= A[i,j] == conj(A[j,i])
-            end
-            if !(utri1|herm|sym) break end
-        end
-        ltri = true
-        ltri1 = true
-        for j = 3:n, i = 1:j-2
-            ltri1 &= A[i,j] == 0
-            if !ltri1 break end
-        end
-        if ltri1
-            for i = 1:n-1
-                if A[i,i+1] != 0
-                    ltri &= false
-                    break
-                end
-            end
-            if ltri
-                if utri
-                    return Diagonal(A)
-                end
-                if utri1
-                    return Bidiagonal(diag(A), diag(A, -1), :L)
-                end
-                return LowerTriangular(A)
-            end
-            if utri
-                return Bidiagonal(diag(A), diag(A, 1), :U)
-            end
-            if utri1
-                # TODO: enable once a specialized, non-dense bunchkaufman method exists
-                # if (herm & (T <: Complex)) | sym
-                    # return bunchkaufman(SymTridiagonal(diag(A), diag(A, -1)))
-                # end
-                return lu(Tridiagonal(diag(A, -1), diag(A), diag(A, 1)))
-            end
-        end
-        if utri
-            return UpperTriangular(A)
-        end
-        if herm
-            cf = cholesky(A; check = false)
-            if cf.info == 0
-                return cf
-            else
-                return factorize(Hermitian(A))
-            end
-        end
-        if sym
-            return factorize(Symmetric(A))
-        end
-        return lu(A)
-    end
-    qr(A, ColumnNorm())
-end
-factorize(A::Adjoint)   =   adjoint(factorize(parent(A)))
-factorize(A::Transpose) = transpose(factorize(parent(A)))
-factorize(a::Number)    = a # same as how factorize behaves on Diagonal types
-
-## Moore-Penrose pseudoinverse
-
-"""
-    pinv(M; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    pinv(M, rtol::Real) = pinv(M; rtol=rtol) # to be deprecated in Julia 2.0
-
-Computes the Moore-Penrose pseudoinverse.
-
-For matrices `M` with floating point elements, it is convenient to compute
-the pseudoinverse by inverting only singular values greater than
-`max(atol, rtol*σ₁)` where `σ₁` is the largest singular value of `M`.
-
-The optimal choice of absolute (`atol`) and relative tolerance (`rtol`) varies
-both with the value of `M` and the intended application of the pseudoinverse.
-The default relative tolerance is `n*ϵ`, where `n` is the size of the smallest
-dimension of `M`, and `ϵ` is the [`eps`](@ref) of the element type of `M`.
-
-For inverting dense ill-conditioned matrices in a least-squares sense,
-`rtol = sqrt(eps(real(float(oneunit(eltype(M))))))` is recommended.
-
-For more information, see [^issue8859], [^B96], [^S84], [^KY88].
-
-# Examples
-```jldoctest
-julia> M = [1.5 1.3; 1.2 1.9]
-2×2 Matrix{Float64}:
- 1.5  1.3
- 1.2  1.9
-
-julia> N = pinv(M)
-2×2 Matrix{Float64}:
-  1.47287   -1.00775
- -0.930233   1.16279
-
-julia> M * N
-2×2 Matrix{Float64}:
- 1.0          -2.22045e-16
- 4.44089e-16   1.0
-```
-
-[^issue8859]: Issue 8859, "Fix least squares", [https://github.com/JuliaLang/julia/pull/8859](https://github.com/JuliaLang/julia/pull/8859)
-
-[^B96]: Åke Björck, "Numerical Methods for Least Squares Problems",  SIAM Press, Philadelphia, 1996, "Other Titles in Applied Mathematics", Vol. 51. [doi:10.1137/1.9781611971484](http://epubs.siam.org/doi/book/10.1137/1.9781611971484)
-
-[^S84]: G. W. Stewart, "Rank Degeneracy", SIAM Journal on Scientific and Statistical Computing, 5(2), 1984, 403-413. [doi:10.1137/0905030](http://epubs.siam.org/doi/abs/10.1137/0905030)
-
-[^KY88]: Konstantinos Konstantinides and Kung Yao, "Statistical analysis of effective singular values in matrix rank determination", IEEE Transactions on Acoustics, Speech and Signal Processing, 36(5), 1988, 757-763. [doi:10.1109/29.1585](https://doi.org/10.1109/29.1585)
-"""
-function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(float(oneunit(T))))*min(size(A)...))*iszero(atol)) where T
-    m, n = size(A)
-    Tout = typeof(zero(T)/sqrt(oneunit(T) + oneunit(T)))
-    if m == 0 || n == 0
-        return similar(A, Tout, (n, m))
-    end
-    if isdiag(A)
-        indA = diagind(A)
-        dA = view(A, indA)
-        maxabsA = maximum(abs, dA)
-        tol = max(rtol * maxabsA, atol)
-        B = fill!(similar(A, Tout, (n, m)), 0)
-        indB = diagind(B)
-        B[indB] .= (x -> abs(x) > tol ? pinv(x) : zero(x)).(dA)
-        return B
-    end
-    SVD         = svd(A)
-    tol         = max(rtol*maximum(SVD.S), atol)
-    Stype       = eltype(SVD.S)
-    Sinv        = fill!(similar(A, Stype, length(SVD.S)), 0)
-    index       = SVD.S .> tol
-    Sinv[index] .= pinv.(view(SVD.S, index))
-    return SVD.Vt' * (Diagonal(Sinv) * SVD.U')
-end
-function pinv(x::Number)
-    xi = inv(x)
-    return ifelse(isfinite(xi), xi, zero(xi))
-end
-
-## Basis for null space
-
-"""
-    nullspace(M; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    nullspace(M, rtol::Real) = nullspace(M; rtol=rtol) # to be deprecated in Julia 2.0
-
-Computes a basis for the nullspace of `M` by including the singular
-vectors of `M` whose singular values have magnitudes smaller than `max(atol, rtol*σ₁)`,
-where `σ₁` is `M`'s largest singular value.
-
-By default, the relative tolerance `rtol` is `n*ϵ`, where `n`
-is the size of the smallest dimension of `M`, and `ϵ` is the [`eps`](@ref) of
-the element type of `M`.
-
-# Examples
-```jldoctest
-julia> M = [1 0 0; 0 1 0; 0 0 0]
-3×3 Matrix{Int64}:
- 1  0  0
- 0  1  0
- 0  0  0
-
-julia> nullspace(M)
-3×1 Matrix{Float64}:
- 0.0
- 0.0
- 1.0
-
-julia> nullspace(M, rtol=3)
-3×3 Matrix{Float64}:
- 0.0  1.0  0.0
- 1.0  0.0  0.0
- 0.0  0.0  1.0
-
-julia> nullspace(M, atol=0.95)
-3×1 Matrix{Float64}:
- 0.0
- 0.0
- 1.0
-```
-"""
-function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(oneunit(eltype(A))))))*iszero(atol))
-    m, n = size(A, 1), size(A, 2)
-    (m == 0 || n == 0) && return Matrix{eigtype(eltype(A))}(I, n, n)
-    SVD = svd(A; full=true)
-    tol = max(atol, SVD.S[1]*rtol)
-    indstart = sum(s -> s .> tol, SVD.S) + 1
-    return copy((@view SVD.Vt[indstart:end,:])')
-end
-
-"""
-    cond(M, p::Real=2)
-
-Condition number of the matrix `M`, computed using the operator `p`-norm. Valid values for
-`p` are `1`, `2` (default), or `Inf`.
-"""
-function cond(A::AbstractMatrix, p::Real=2)
-    if p == 2
-        v = svdvals(A)
-        maxv = maximum(v)
-        return iszero(maxv) ? oftype(real(maxv), Inf) : maxv / minimum(v)
-    elseif p == 1 || p == Inf
-        checksquare(A)
-        try
-            Ainv = inv(A)
-            return opnorm(A, p)*opnorm(Ainv, p)
-        catch e
-            if isa(e, LAPACKException) || isa(e, SingularException)
-                return convert(float(real(eltype(A))), Inf)
-            else
-                rethrow()
-            end
-        end
-    end
-    throw(ArgumentError("p-norm must be 1, 2 or Inf, got $p"))
-end
-
-## Lyapunov and Sylvester equation
-
-# AX + XB + C = 0
-
-"""
-    sylvester(A, B, C)
-
-Computes the solution `X` to the Sylvester equation `AX + XB + C = 0`, where `A`, `B` and
-`C` have compatible dimensions and `A` and `-B` have no eigenvalues with equal real part.
-
-# Examples
-```jldoctest
-julia> A = [3. 4.; 5. 6]
-2×2 Matrix{Float64}:
- 3.0  4.0
- 5.0  6.0
-
-julia> B = [1. 1.; 1. 2.]
-2×2 Matrix{Float64}:
- 1.0  1.0
- 1.0  2.0
-
-julia> C = [1. 2.; -2. 1]
-2×2 Matrix{Float64}:
-  1.0  2.0
- -2.0  1.0
-
-julia> X = sylvester(A, B, C)
-2×2 Matrix{Float64}:
- -4.46667   1.93333
-  3.73333  -1.8
-
-julia> A*X + X*B ≈ -C
-true
-```
-"""
-function sylvester(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix)
-    T = promote_type(float(eltype(A)), float(eltype(B)), float(eltype(C)))
-    return sylvester(copy_similar(A, T), copy_similar(B, T), copy_similar(C, T))
-end
-function sylvester(A::AbstractMatrix{T}, B::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
-    RA, QA = schur(A)
-    RB, QB = schur(B)
-    D = QA' * C * QB
-    D .= .-D
-    Y, scale = LAPACK.trsyl!('N', 'N', RA, RB, D)
-    rmul!(QA * Y * QB', inv(scale))
-end
-
-Base.@propagate_inbounds function _sylvester_2x1!(A, B, C)
-    b = B[1]
-    a21, a12 = A[2, 1], A[1, 2]
-    m11 = b + A[1, 1]
-    m22 = b + A[2, 2]
-    d = m11 * m22 - a12 * a21
-    c1, c2 = C
-    C[1] = (a12 * c2 - m22 * c1) / d
-    C[2] = (a21 * c1 - m11 * c2) / d
-    return C
-end
-Base.@propagate_inbounds function _sylvester_1x2!(A, B, C)
-    a = A[1]
-    b21, b12 = B[2, 1], B[1, 2]
-    m11 = a + B[1, 1]
-    m22 = a + B[2, 2]
-    d = m11 * m22 - b21 * b12
-    c1, c2 = C
-    C[1] = (b21 * c2 - m22 * c1) / d
-    C[2] = (b12 * c1 - m11 * c2) / d
-    return C
-end
-function _sylvester_2x2!(A, B, C)
-    _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
-    rmul!(C, -inv(scale))
-    return C
-end
-
-sylvester(a::Union{Real,Complex}, b::Union{Real,Complex}, c::Union{Real,Complex}) = -c / (a + b)
-
-# AX + XA' + C = 0
-
-"""
-    lyap(A, C)
-
-Computes the solution `X` to the continuous Lyapunov equation `AX + XA' + C = 0`, where no
-eigenvalue of `A` has a zero real part and no two eigenvalues are negative complex
-conjugates of each other.
-
-# Examples
-```jldoctest
-julia> A = [3. 4.; 5. 6]
-2×2 Matrix{Float64}:
- 3.0  4.0
- 5.0  6.0
-
-julia> B = [1. 1.; 1. 2.]
-2×2 Matrix{Float64}:
- 1.0  1.0
- 1.0  2.0
-
-julia> X = lyap(A, B)
-2×2 Matrix{Float64}:
-  0.5  -0.5
- -0.5   0.25
-
-julia> A*X + X*A' ≈ -B
-true
-```
-"""
-function lyap(A::AbstractMatrix, C::AbstractMatrix)
-    T = promote_type(float(eltype(A)), float(eltype(C)))
-    return lyap(copy_similar(A, T), copy_similar(C, T))
-end
-function lyap(A::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
-    R, Q = schur(A)
-    D = Q' * C * Q
-    D .= .-D
-    Y, scale = LAPACK.trsyl!('N', T <: Complex ? 'C' : 'T', R, R, D)
-    rmul!(Q * Y * Q', inv(scale))
-end
-lyap(a::Union{Real,Complex}, c::Union{Real,Complex}) = -c/(2real(a))
diff --git a/stdlib/LinearAlgebra/src/deprecated.jl b/stdlib/LinearAlgebra/src/deprecated.jl
deleted file mode 100644
index 28c090634a2d8..0000000000000
--- a/stdlib/LinearAlgebra/src/deprecated.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# To be deprecated in 2.0
-rank(A::AbstractMatrix, tol::Real) = rank(A,rtol=tol)
-nullspace(A::AbstractVector, tol::Real) = nullspace(reshape(A, length(A), 1), rtol= tol)
-nullspace(A::AbstractMatrix, tol::Real) = nullspace(A, rtol=tol)
-pinv(A::AbstractMatrix{T}, tol::Real) where T = pinv(A, rtol=tol)
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
deleted file mode 100644
index 29c190e87df72..0000000000000
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ /dev/null
@@ -1,911 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Diagonal matrices
-
-struct Diagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    diag::V
-
-    function Diagonal{T,V}(diag) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(diag)
-        new{T,V}(diag)
-    end
-end
-Diagonal{T,V}(d::Diagonal) where {T,V<:AbstractVector{T}} = Diagonal{T,V}(d.diag)
-Diagonal(v::AbstractVector{T}) where {T} = Diagonal{T,typeof(v)}(v)
-Diagonal{T}(v::AbstractVector) where {T} = Diagonal(convert(AbstractVector{T}, v)::AbstractVector{T})
-
-function Base.promote_rule(A::Type{<:Diagonal{<:Any,V}}, B::Type{<:Diagonal{<:Any,W}}) where {V,W}
-    X = promote_type(V, W)
-    T = eltype(X)
-    isconcretetype(T) && return Diagonal{T,X}
-    return typejoin(A, B)
-end
-
-"""
-    Diagonal(V::AbstractVector)
-
-Construct a lazy matrix with `V` as its diagonal.
-
-See also [`UniformScaling`](@ref) for the lazy identity matrix `I`,
-[`diagm`](@ref) to make a dense matrix, and [`diag`](@ref) to extract diagonal elements.
-
-# Examples
-```jldoctest
-julia> d = Diagonal([1, 10, 100])
-3×3 Diagonal{$Int, Vector{$Int}}:
- 1   ⋅    ⋅
- ⋅  10    ⋅
- ⋅   ⋅  100
-
-julia> diagm([7, 13])
-2×2 Matrix{$Int}:
- 7   0
- 0  13
-
-julia> ans + I
-2×2 Matrix{Int64}:
- 8   0
- 0  14
-
-julia> I(2)
-2×2 Diagonal{Bool, Vector{Bool}}:
- 1  ⋅
- ⋅  1
-```
-
-Note that a one-column matrix is not treated like a vector, but instead calls the
-method `Diagonal(A::AbstractMatrix)` which extracts 1-element `diag(A)`:
-
-```jldoctest
-julia> A = transpose([7.0 13.0])
-2×1 transpose(::Matrix{Float64}) with eltype Float64:
-  7.0
- 13.0
-
-julia> Diagonal(A)
-1×1 Diagonal{Float64, Vector{Float64}}:
- 7.0
-```
-"""
-Diagonal(V::AbstractVector)
-
-"""
-    Diagonal(A::AbstractMatrix)
-
-Construct a matrix from the diagonal of `A`.
-
-# Examples
-```jldoctest
-julia> A = permutedims(reshape(1:15, 5, 3))
-3×5 Matrix{Int64}:
-  1   2   3   4   5
-  6   7   8   9  10
- 11  12  13  14  15
-
-julia> Diagonal(A)
-3×3 Diagonal{$Int, Vector{$Int}}:
- 1  ⋅   ⋅
- ⋅  7   ⋅
- ⋅  ⋅  13
-
-julia> diag(A, 2)
-3-element Vector{$Int}:
-  3
-  9
- 15
-```
-"""
-Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
-Diagonal{T}(A::AbstractMatrix) where T = Diagonal{T}(diag(A))
-function convert(::Type{T}, A::AbstractMatrix) where T<:Diagonal
-    checksquare(A)
-    isdiag(A) ? T(A) : throw(InexactError(:convert, T, A))
-end
-
-Diagonal(D::Diagonal) = D
-Diagonal{T}(D::Diagonal{T}) where {T} = D
-Diagonal{T}(D::Diagonal) where {T} = Diagonal{T}(D.diag)
-
-AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
-Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D)
-Array(D::Diagonal{T}) where {T} = Matrix(D)
-function Matrix{T}(D::Diagonal) where {T}
-    n = size(D, 1)
-    B = Matrix{T}(undef, n, n)
-    n > 1 && fill!(B, zero(T))
-    @inbounds for i in 1:n
-        B[i,i] = D.diag[i]
-    end
-    return B
-end
-
-"""
-    Diagonal{T}(undef, n)
-
-Construct an uninitialized `Diagonal{T}` of length `n`. See `undef`.
-"""
-Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef, n))
-
-similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
-similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(D.diag, T, dims)
-
-copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
-
-size(D::Diagonal) = (n = length(D.diag); (n,n))
-
-function size(D::Diagonal,d::Integer)
-    if d<1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    end
-    return d<=2 ? length(D.diag) : 1
-end
-
-@inline function Base.isassigned(D::Diagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, D, i, j) || return false
-    if i == j
-        @inbounds r = isassigned(D.diag, i)
-    else
-        r = true
-    end
-    r
-end
-
-@inline function getindex(D::Diagonal, i::Int, j::Int)
-    @boundscheck checkbounds(D, i, j)
-    if i == j
-        @inbounds r = D.diag[i]
-    else
-        r = diagzero(D, i, j)
-    end
-    r
-end
-diagzero(::Diagonal{T}, i, j) where {T} = zero(T)
-diagzero(D::Diagonal{<:AbstractMatrix{T}}, i, j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
-
-function setindex!(D::Diagonal, v, i::Int, j::Int)
-    @boundscheck checkbounds(D, i, j)
-    if i == j
-        @inbounds D.diag[i] = v
-    elseif !iszero(v)
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j) to a nonzero value ($v)"))
-    end
-    return v
-end
-
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Diagonal,i::Integer,j::Integer,s::AbstractString)
-    i==j ? s : Base.replace_with_centered_mark(s)
-end
-
-parent(D::Diagonal) = D.diag
-
-ishermitian(D::Diagonal{<:Real}) = true
-ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
-ishermitian(D::Diagonal) = all(ishermitian, D.diag)
-issymmetric(D::Diagonal{<:Number}) = true
-issymmetric(D::Diagonal) = all(issymmetric, D.diag)
-isposdef(D::Diagonal) = all(isposdef, D.diag)
-
-factorize(D::Diagonal) = D
-
-real(D::Diagonal) = Diagonal(real(D.diag))
-imag(D::Diagonal) = Diagonal(imag(D.diag))
-
-iszero(D::Diagonal) = all(iszero, D.diag)
-isone(D::Diagonal) = all(isone, D.diag)
-isdiag(D::Diagonal) = all(isdiag, D.diag)
-isdiag(D::Diagonal{<:Number}) = true
-istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
-istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
-function triu!(D::Diagonal{T}, k::Integer=0) where T
-    n = size(D,1)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 0
-        fill!(D.diag, zero(T))
-    end
-    return D
-end
-
-function tril!(D::Diagonal{T}, k::Integer=0) where T
-    n = size(D,1)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < 0
-        fill!(D.diag, zero(T))
-    end
-    return D
-end
-
-(==)(Da::Diagonal, Db::Diagonal) = Da.diag == Db.diag
-(-)(A::Diagonal) = Diagonal(-A.diag)
-(+)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag + Db.diag)
-(-)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag - Db.diag)
-
-for f in (:+, :-)
-    @eval function $f(D::Diagonal, S::Symmetric)
-        return Symmetric($f(D, S.data), sym_uplo(S.uplo))
-    end
-    @eval function $f(S::Symmetric, D::Diagonal)
-        return Symmetric($f(S.data, D), sym_uplo(S.uplo))
-    end
-    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
-        return Hermitian($f(D, H.data), sym_uplo(H.uplo))
-    end
-    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
-        return Hermitian($f(H.data, D), sym_uplo(H.uplo))
-    end
-end
-
-(*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
-(*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
-(/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
-(\)(x::Number, D::Diagonal) = Diagonal(x \ D.diag)
-(^)(D::Diagonal, a::Number) = Diagonal(D.diag .^ a)
-(^)(D::Diagonal, a::Real) = Diagonal(D.diag .^ a) # for disambiguation
-(^)(D::Diagonal, a::Integer) = Diagonal(D.diag .^ a) # for disambiguation
-Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
-    Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
-Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
-
-function _muldiag_size_check(A, B)
-    nA = size(A, 2)
-    mB = size(B, 1)
-    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch("second dimension of A, $nA, does not match first dimension of B, $mB"))
-    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch("second dimension of D, $nA, does not match length of V, $mB"))
-    nA == mB || throw_dimerr(B, nA, mB)
-    return nothing
-end
-# the output matrix should have the same size as the non-diagonal input matrix or vector
-@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch("output matrix has size: $szC, but should have size $szA"))
-_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
-_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
-_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
-function _size_check_out(C, A)
-    szA = size(A)
-    szC = size(C)
-    szA == szC || throw_dimerr(szC, szA)
-    return nothing
-end
-function _muldiag_size_check(C, A, B)
-    _muldiag_size_check(A, B)
-    _size_check_out(C, A, B)
-end
-
-function (*)(Da::Diagonal, Db::Diagonal)
-    _muldiag_size_check(Da, Db)
-    return Diagonal(Da.diag .* Db.diag)
-end
-
-function (*)(D::Diagonal, V::AbstractVector)
-    _muldiag_size_check(D, V)
-    return D.diag .* V
-end
-
-(*)(A::AbstractMatrix, D::Diagonal) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A, D)
-(*)(A::HermOrSym, D::Diagonal) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A, D)
-(*)(D::Diagonal, A::AbstractMatrix) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), D, A)
-(*)(D::Diagonal, A::HermOrSym) =
-    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), D, A)
-
-rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
-lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
-
-function (*)(A::AdjOrTransAbsMat, D::Diagonal)
-    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
-    rmul!(Ac, D)
-end
-function (*)(D::Diagonal, A::AdjOrTransAbsMat)
-    Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
-    lmul!(D, Ac)
-end
-
-function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out, B)
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out, beta)
-    else
-        if bis0
-            @inbounds for j in axes(B, 2)
-                @simd for i in axes(B, 1)
-                    out[i,j] = D.diag[i] * B[i,j] * alpha
-                end
-            end
-        else
-            @inbounds for j in axes(B, 2)
-                @simd for i in axes(B, 1)
-                    out[i,j] = D.diag[i] * B[i,j] * alpha + out[i,j] * beta
-                end
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out, A)
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out, beta)
-    else
-        if bis0
-            @inbounds for j in axes(A, 2)
-                dja = D.diag[j] * alpha
-                @simd for i in axes(A, 1)
-                    out[i,j] = A[i,j] * dja
-                end
-            end
-        else
-            @inbounds for j in axes(A, 2)
-                dja = D.diag[j] * alpha
-                @simd for i in axes(A, 1)
-                    out[i,j] = A[i,j] * dja + out[i,j] * beta
-                end
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    d1 = D1.diag
-    d2 = D2.diag
-    alpha, beta = _add.alpha, _add.beta
-    if iszero(alpha)
-        _rmul_or_fill!(out.diag, beta)
-    else
-        if bis0
-            @inbounds @simd for i in eachindex(out.diag)
-                out.diag[i] = d1[i] * d2[i] * alpha
-            end
-        else
-            @inbounds @simd for i in eachindex(out.diag)
-                out.diag[i] = d1[i] * d2[i] * alpha + out.diag[i] * beta
-            end
-        end
-    end
-    return out
-end
-function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-    require_one_based_indexing(out)
-    alpha, beta = _add.alpha, _add.beta
-    mA = size(D1, 1)
-    d1 = D1.diag
-    d2 = D2.diag
-    _rmul_or_fill!(out, beta)
-    if !iszero(alpha)
-        @inbounds @simd for i in 1:mA
-            out[i,i] += d1[i] * d2[i] * alpha
-        end
-    end
-    return out
-end
-
-function _mul_diag!(out, A, B, _add)
-    _muldiag_size_check(out, A, B)
-    __muldiag!(out, A, B, _add)
-    return out
-end
-
-_mul!(out::AbstractVecOrMat, D::Diagonal, V::AbstractVector, _add) =
-    _mul_diag!(out, D, V, _add)
-_mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, _add) =
-    _mul_diag!(out, D, B, _add)
-_mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, _add) =
-    _mul_diag!(out, A, D, _add)
-_mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, _add) =
-    _mul_diag!(C, Da, Db, _add)
-_mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
-    _mul_diag!(C, Da, Db, _add)
-
-function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
-    _muldiag_size_check(Da, A)
-    _muldiag_size_check(A, Db)
-    return broadcast(*, Da.diag, A, permutedims(Db.diag))
-end
-
-function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
-    _muldiag_size_check(Da, Db)
-    _muldiag_size_check(Db, Dc)
-    return Diagonal(Da.diag .* Db.diag .* Dc.diag)
-end
-
-/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D))), A, D)
-/(A::HermOrSym, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D)), size(A)), A, D)
-
-rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
-# avoid copy when possible via internal 3-arg backend
-function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
-    require_one_based_indexing(A)
-    dd = D.diag
-    m, n = size(A, 1), size(A, 2)
-    if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    end
-    @inbounds for j in 1:n
-        ddj = dd[j]
-        iszero(ddj) && throw(SingularException(j))
-        for i in 1:m
-            B[i, j] = A[i, j] / ddj
-        end
-    end
-    B
-end
-
-function \(D::Diagonal, B::AbstractVector)
-    j = findfirst(iszero, D.diag)
-    isnothing(j) || throw(SingularException(j))
-    return D.diag .\ B
-end
-\(D::Diagonal, B::AbstractMatrix) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B))), D, B)
-\(D::Diagonal, B::HermOrSym) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B)), size(B)), D, B)
-
-ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
-function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
-    dd = D.diag
-    d = length(dd)
-    m, n = size(A, 1), size(A, 2)
-    m′, n′ = size(B, 1), size(B, 2)
-    m == d || throw(DimensionMismatch("right hand side has $m rows but D is $d by $d"))
-    (m, n) == (m′, n′) || throw(DimensionMismatch("expect output to be $m by $n, but got $m′ by $n′"))
-    j = findfirst(iszero, D.diag)
-    isnothing(j) || throw(SingularException(j))
-    @inbounds for j = 1:n, i = 1:m
-        B[i, j] = dd[i] \ A[i, j]
-    end
-    B
-end
-
-# Optimizations for \, / between Diagonals
-\(D::Diagonal, B::Diagonal) = ldiv!(similar(B, promote_op(\, eltype(D), eltype(B))), D, B)
-/(A::Diagonal, D::Diagonal) = _rdiv!(similar(A, promote_op(/, eltype(A), eltype(D))), A, D)
-function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal)
-    n, k = length(Db.diag), length(Da.diag)
-    n == k || throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    j = findfirst(iszero, Da.diag)
-    isnothing(j) || throw(SingularException(j))
-    Dc.diag .= Db.diag ./ Da.diag
-    Dc
-end
-ldiv!(Dc::Diagonal, Da::Diagonal, Db::Diagonal) = Diagonal(ldiv!(Dc.diag, Da, Db.diag))
-
-# optimizations for (Sym)Tridiagonal and Diagonal
-@propagate_inbounds _getudiag(T::Tridiagonal, i) = T.du[i]
-@propagate_inbounds _getudiag(S::SymTridiagonal, i) = S.ev[i]
-@propagate_inbounds _getdiag(T::Tridiagonal, i) = T.d[i]
-@propagate_inbounds _getdiag(S::SymTridiagonal, i) = symmetric(S.dv[i], :U)::symmetric_type(eltype(S.dv))
-@propagate_inbounds _getldiag(T::Tridiagonal, i) = T.dl[i]
-@propagate_inbounds _getldiag(S::SymTridiagonal, i) = transpose(S.ev[i])
-
-function (\)(D::Diagonal, S::SymTridiagonal)
-    T = promote_op(\, eltype(D), eltype(S))
-    du = similar(S.ev, T, max(length(S.dv)-1, 0))
-    d  = similar(S.dv, T, length(S.dv))
-    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
-    ldiv!(Tridiagonal(dl, d, du), D, S)
-end
-(\)(D::Diagonal, T::Tridiagonal) = ldiv!(similar(T, promote_op(\, eltype(D), eltype(T))), D, T)
-function ldiv!(T::Tridiagonal, D::Diagonal, S::Union{SymTridiagonal,Tridiagonal})
-    m = size(S, 1)
-    dd = D.diag
-    if (k = length(dd)) != m
-        throw(DimensionMismatch("diagonal matrix is $k by $k but right hand side has $m rows"))
-    end
-    if length(T.d) != m
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
-    end
-    m == 0 && return T
-    j = findfirst(iszero, dd)
-    isnothing(j) || throw(SingularException(j))
-    ddj = dd[1]
-    T.d[1] = ddj \ _getdiag(S, 1)
-    @inbounds if m > 1
-        T.du[1] = ddj \ _getudiag(S, 1)
-        for j in 2:m-1
-            ddj = dd[j]
-            T.dl[j-1] = ddj \ _getldiag(S, j-1)
-            T.d[j]  = ddj \ _getdiag(S, j)
-            T.du[j] = ddj \ _getudiag(S, j)
-        end
-        ddj = dd[m]
-        T.dl[m-1] = ddj \ _getldiag(S, m-1)
-        T.d[m] = ddj \ _getdiag(S, m)
-    end
-    return T
-end
-
-function (/)(S::SymTridiagonal, D::Diagonal)
-    T = promote_op(\, eltype(D), eltype(S))
-    du = similar(S.ev, T, max(length(S.dv)-1, 0))
-    d  = similar(S.dv, T, length(S.dv))
-    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
-    _rdiv!(Tridiagonal(dl, d, du), S, D)
-end
-(/)(T::Tridiagonal, D::Diagonal) = _rdiv!(similar(T, promote_op(/, eltype(T), eltype(D))), T, D)
-function _rdiv!(T::Tridiagonal, S::Union{SymTridiagonal,Tridiagonal}, D::Diagonal)
-    n = size(S, 2)
-    dd = D.diag
-    if (k = length(dd)) != n
-        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
-    end
-    if length(T.d) != n
-        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
-    end
-    n == 0 && return T
-    j = findfirst(iszero, dd)
-    isnothing(j) || throw(SingularException(j))
-    ddj = dd[1]
-    T.d[1] = _getdiag(S, 1) / ddj
-    @inbounds if n > 1
-        T.dl[1] = _getldiag(S, 1) / ddj
-        for j in 2:n-1
-            ddj = dd[j]
-            T.dl[j] = _getldiag(S, j) / ddj
-            T.d[j] = _getdiag(S, j) / ddj
-            T.du[j-1] = _getudiag(S, j-1) / ddj
-        end
-        ddj = dd[n]
-        T.d[n] = _getdiag(S, n) / ddj
-        T.du[n-1] = _getudiag(S, n-1) / ddj
-    end
-    return T
-end
-
-# Optimizations for [l/r]mul!, l/rdiv!, *, / and \ between Triangular and Diagonal.
-# These functions are generally more efficient if we calculate the whole data field.
-# The following code implements them in a unified pattern to avoid missing.
-@inline function _setdiag!(data, f, diag, diag′ = nothing)
-    @inbounds for i in 1:length(diag)
-        data[i,i] = isnothing(diag′) ? f(diag[i]) : f(diag[i],diag′[i])
-    end
-    data
-end
-for Tri in (:UpperTriangular, :LowerTriangular)
-    UTri = Symbol(:Unit, Tri)
-    # 2 args
-    for (fun, f) in zip((:*, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv))
-        @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D))
-        @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag))
-    end
-    for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv))
-        @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data))
-        @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag))
-    end
-    # 3-arg ldiv!
-    @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
-    @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
-    # 3-arg mul! is disambiguated in special.jl
-    # 5-arg mul!
-    @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta))
-    @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-        α, β = _add.alpha, _add.beta
-        iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = bis0 ? nothing : diag(C)
-        data = mul!(C.data, D, A.data, α, β)
-        $Tri(_setdiag!(data, _add, D.diag, diag′))
-    end
-    @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta))
-    @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
-        α, β = _add.alpha, _add.beta
-        iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = bis0 ? nothing : diag(C)
-        data = mul!(C.data, A.data, D, α, β)
-        $Tri(_setdiag!(data, _add, D.diag, diag′))
-    end
-end
-
-@inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal)
-    valA = A.diag; nA = length(valA)
-    valB = B.diag; nB = length(valB)
-    nC = checksquare(C)
-    @boundscheck nC == nA*nB ||
-        throw(DimensionMismatch("expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    @inbounds for i = 1:nA, j = 1:nB
-        idx = (i-1)*nB+j
-        C[idx, idx] = valA[i] * valB[j]
-    end
-    return C
-end
-
-kron(A::Diagonal, B::Diagonal) = Diagonal(kron(A.diag, B.diag))
-
-function kron(A::Diagonal, B::SymTridiagonal)
-    kdv = kron(diag(A), B.dv)
-    # We don't need to drop the last element
-    kev = kron(diag(A), _pushzero(_evview(B)))
-    SymTridiagonal(kdv, kev)
-end
-function kron(A::Diagonal, B::Tridiagonal)
-    # `_droplast!` is only guaranteed to work with `Vector`
-    kd = _makevector(kron(diag(A), B.d))
-    kdl = _droplast!(_makevector(kron(diag(A), _pushzero(B.dl))))
-    kdu = _droplast!(_makevector(kron(diag(A), _pushzero(B.du))))
-    Tridiagonal(kdl, kd, kdu)
-end
-
-@inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
-    require_one_based_indexing(B)
-    (mA, nA) = size(A)
-    (mB, nB) = size(B)
-    (mC, nC) = size(C)
-    @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    m = 1
-    @inbounds for j = 1:nA
-        A_jj = A[j,j]
-        for k = 1:nB
-            for l = 1:mB
-                C[m] = A_jj * B[l,k]
-                m += 1
-            end
-            m += (nA - 1) * mB
-        end
-        m += mB
-    end
-    return C
-end
-
-@inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal)
-    require_one_based_indexing(A)
-    (mA, nA) = size(A)
-    (mB, nB) = size(B)
-    (mC, nC) = size(C)
-    @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
-        throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
-    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
-    m = 1
-    @inbounds for j = 1:nA
-        for l = 1:mB
-            Bll = B[l,l]
-            for k = 1:mA
-                C[m] = A[k,j] * Bll
-                m += nB
-            end
-            m += 1
-        end
-        m -= nB
-    end
-    return C
-end
-
-conj(D::Diagonal) = Diagonal(conj(D.diag))
-transpose(D::Diagonal{<:Number}) = D
-transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
-adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
-adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
-adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
-permutedims(D::Diagonal) = D
-permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
-
-function diag(D::Diagonal{T}, k::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of k
-    if k == 0
-        return copyto!(similar(D.diag, length(D.diag)), D.diag)
-    elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
-            "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
-    end
-end
-tr(D::Diagonal) = sum(tr, D.diag)
-det(D::Diagonal) = prod(det, D.diag)
-function logdet(D::Diagonal{<:Complex}) # make sure branch cut is correct
-    z = sum(log, D.diag)
-    complex(real(z), rem2pi(imag(z), RoundNearest))
-end
-
-# Matrix functions
-for f in (:exp, :cis, :log, :sqrt,
-          :cos, :sin, :tan, :csc, :sec, :cot,
-          :cosh, :sinh, :tanh, :csch, :sech, :coth,
-          :acos, :asin, :atan, :acsc, :asec, :acot,
-          :acosh, :asinh, :atanh, :acsch, :asech, :acoth)
-    @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
-end
-
-function inv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    for i = 1:length(D.diag)
-        if iszero(D.diag[i])
-            throw(SingularException(i))
-        end
-        Di[i] = inv(D.diag[i])
-    end
-    Diagonal(Di)
-end
-
-function pinv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    for i = 1:length(D.diag)
-        if !iszero(D.diag[i])
-            invD = inv(D.diag[i])
-            if isfinite(invD)
-                Di[i] = invD
-                continue
-            end
-        end
-        # fallback
-        Di[i] = zero(T)
-    end
-    Diagonal(Di)
-end
-function pinv(D::Diagonal{T}, tol::Real) where T
-    Di = similar(D.diag, typeof(inv(oneunit(T))))
-    if !isempty(D.diag)
-        maxabsD = maximum(abs, D.diag)
-        for i = 1:length(D.diag)
-            if abs(D.diag[i]) > tol*maxabsD
-                invD = inv(D.diag[i])
-                if isfinite(invD)
-                    Di[i] = invD
-                    continue
-                end
-            end
-            # fallback
-            Di[i] = zero(T)
-        end
-    end
-    Diagonal(Di)
-end
-
-#Eigensystem
-eigvals(D::Diagonal{<:Number}; permute::Bool=true, scale::Bool=true) = copy(D.diag)
-eigvals(D::Diagonal; permute::Bool=true, scale::Bool=true) =
-    [eigvals(x) for x in D.diag] #For block matrices, etc.
-eigvecs(D::Diagonal) = Matrix{eltype(D)}(I, size(D))
-function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=nothing)
-    if any(!isfinite, D.diag)
-        throw(ArgumentError("matrix contains Infs or NaNs"))
-    end
-    Td = Base.promote_op(/, eltype(D), eltype(D))
-    λ = eigvals(D)
-    if !isnothing(sortby)
-        p = sortperm(λ; alg=QuickSort, by=sortby)
-        λ = λ[p]
-        evecs = zeros(Td, size(D))
-        @inbounds for i in eachindex(p)
-            evecs[p[i],i] = one(Td)
-        end
-    else
-        evecs = Matrix{Td}(I, size(D))
-    end
-    Eigen(λ, evecs)
-end
-function eigen(Da::Diagonal, Db::Diagonal; sortby::Union{Function,Nothing}=nothing)
-    if any(!isfinite, Da.diag) || any(!isfinite, Db.diag)
-        throw(ArgumentError("matrices contain Infs or NaNs"))
-    end
-    if any(iszero, Db.diag)
-        throw(ArgumentError("right-hand side diagonal matrix is singular"))
-    end
-    return GeneralizedEigen(eigen(Db \ Da; sortby)...)
-end
-function eigen(A::AbstractMatrix, D::Diagonal; sortby::Union{Function,Nothing}=nothing)
-    if any(iszero, D.diag)
-        throw(ArgumentError("right-hand side diagonal matrix is singular"))
-    end
-    if size(A, 1) == size(A, 2) && isdiag(A)
-        return eigen(Diagonal(A), D; sortby)
-    elseif all(isposdef, D.diag)
-        S = promote_type(eigtype(eltype(A)), eltype(D))
-        return eigen(A, cholesky(Diagonal{S}(D)); sortby)
-    else
-        return eigen!(D \ A; sortby)
-    end
-end
-
-#Singular system
-svdvals(D::Diagonal{<:Number}) = sort!(abs.(D.diag), rev = true)
-svdvals(D::Diagonal) = [svdvals(v) for v in D.diag]
-function svd(D::Diagonal{T}) where {T<:Number}
-    d = D.diag
-    s = abs.(d)
-    piv = sortperm(s, rev = true)
-    S = s[piv]
-    Td  = typeof(oneunit(T)/oneunit(T))
-    U = zeros(Td, size(D))
-    Vt = copy(U)
-    for i in 1:length(d)
-        j = piv[i]
-        U[j,i] = d[j] / S[i]
-        Vt[i,j] = one(Td)
-    end
-    return SVD(U, S, Vt)
-end
-
-# disambiguation methods: * and / of Diagonal and Adj/Trans AbsVec
-*(u::AdjointAbsVec, D::Diagonal) = (D'u')'
-*(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) * transpose(u))
-*(x::AdjointAbsVec,   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-*(x::TransposeAbsVec, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-/(u::AdjointAbsVec, D::Diagonal) = (D' \ u')'
-/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ transpose(u))
-# disambiguation methods: Call unoptimized version for user defined AbstractTriangular.
-*(A::AbstractTriangular, D::Diagonal) = @invoke *(A::AbstractMatrix, D::Diagonal)
-*(D::Diagonal, A::AbstractTriangular) = @invoke *(D::Diagonal, A::AbstractMatrix)
-
-dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x, D, y)
-
-dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
-function dot(D::Diagonal, B::AbstractMatrix)
-    size(D) == size(B) || throw(DimensionMismatch("Matrix sizes $(size(D)) and $(size(B)) differ"))
-    return dot(D.diag, view(B, diagind(B)))
-end
-
-dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
-
-function _mapreduce_prod(f, x, D::Diagonal, y)
-    if !(length(x) == length(D.diag) == length(y))
-        throw(DimensionMismatch("x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
-    end
-    if isempty(x) && isempty(D) && isempty(y)
-        return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
-    else
-        return mapreduce(t -> f(t[1], t[2], t[3]), +, zip(x, D.diag, y))
-    end
-end
-
-function cholesky!(A::Diagonal, ::NoPivot = NoPivot(); check::Bool = true)
-    info = 0
-    for (i, di) in enumerate(A.diag)
-        if isreal(di) && real(di) > 0
-            A.diag[i] = √di
-        elseif check
-            throw(PosDefException(i))
-        else
-            info = i
-            break
-        end
-    end
-    Cholesky(A, 'U', convert(BlasInt, info))
-end
-@deprecate cholesky!(A::Diagonal, ::Val{false}; check::Bool = true) cholesky!(A::Diagonal, NoPivot(); check) false
-@deprecate cholesky(A::Diagonal, ::Val{false}; check::Bool = true) cholesky(A::Diagonal, NoPivot(); check) false
-
-inv(C::Cholesky{<:Any,<:Diagonal}) = Diagonal(map(inv∘abs2, C.factors.diag))
-
-cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
-cholcopy(A::RealHermSymComplexHerm{<:Any,<:Diagonal}) = Diagonal(copy_similar(diag(A), choltype(A)))
-
-function getproperty(C::Cholesky{<:Any,<:Diagonal}, d::Symbol)
-    Cfactors = getfield(C, :factors)
-    if d in (:U, :L, :UL)
-        return Cfactors
-    else
-        return getfield(C, d)
-    end
-end
-
-Base._sum(A::Diagonal, ::Colon) = sum(A.diag)
-function Base._sum(A::Diagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    if dims <= 2
-        for i = 1:length(A.diag)
-            @inbounds res[i] = A.diag[i]
-        end
-    else
-        for i = 1:length(A.diag)
-            @inbounds res[i,i] = A.diag[i]
-        end
-    end
-    res
-end
-
-function logabsdet(A::Diagonal)
-     mapreduce(x -> (log(abs(x)), sign(x)), ((d1, s1), (d2, s2)) -> (d1 + d2, s1 * s2),
-               A.diag)
-end
-
-function Base.muladd(A::Diagonal, B::Diagonal, z::Diagonal)
-    Diagonal(A.diag .* B.diag .+ z.diag)
-end
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
deleted file mode 100644
index 489bfa4665c7a..0000000000000
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ /dev/null
@@ -1,675 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Eigendecomposition
-"""
-    Eigen <: Factorization
-
-Matrix factorization type of the eigenvalue/spectral decomposition of a square
-matrix `A`. This is the return type of [`eigen`](@ref), the corresponding matrix
-factorization function.
-
-If `F::Eigen` is the factorization object, the eigenvalues can be obtained via
-`F.values` and the eigenvectors as the columns of the matrix `F.vectors`.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-# Examples
-```jldoctest
-julia> F = eigen([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-Eigen{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-values:
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-vectors:
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> F.values
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-
-julia> F.vectors
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-struct Eigen{T,V,S<:AbstractMatrix,U<:AbstractVector} <: Factorization{T}
-    values::U
-    vectors::S
-    Eigen{T,V,S,U}(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V,S,U} =
-        new(values, vectors)
-end
-Eigen(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V} =
-    Eigen{T,V,typeof(vectors),typeof(values)}(values, vectors)
-
-# Generalized eigenvalue problem.
-"""
-    GeneralizedEigen <: Factorization
-
-Matrix factorization type of the generalized eigenvalue/spectral decomposition of
-`A` and `B`. This is the return type of [`eigen`](@ref), the corresponding
-matrix factorization function, when called with two matrix arguments.
-
-If `F::GeneralizedEigen` is the factorization object, the eigenvalues can be obtained via
-`F.values` and the eigenvectors as the columns of the matrix `F.vectors`.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> F = eigen(A, B)
-GeneralizedEigen{ComplexF64, ComplexF64, Matrix{ComplexF64}, Vector{ComplexF64}}
-values:
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-vectors:
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> F.values
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> F.vectors
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-struct GeneralizedEigen{T,V,S<:AbstractMatrix,U<:AbstractVector} <: Factorization{T}
-    values::U
-    vectors::S
-    GeneralizedEigen{T,V,S,U}(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V,S,U} =
-        new(values, vectors)
-end
-GeneralizedEigen(values::AbstractVector{V}, vectors::AbstractMatrix{T}) where {T,V} =
-    GeneralizedEigen{T,V,typeof(vectors),typeof(values)}(values, vectors)
-
-# iteration for destructuring into components
-Base.iterate(S::Union{Eigen,GeneralizedEigen}) = (S.values, Val(:vectors))
-Base.iterate(S::Union{Eigen,GeneralizedEigen}, ::Val{:vectors}) = (S.vectors, Val(:done))
-Base.iterate(S::Union{Eigen,GeneralizedEigen}, ::Val{:done}) = nothing
-
-isposdef(A::Union{Eigen,GeneralizedEigen}) = isreal(A.values) && all(x -> x > 0, A.values)
-
-# pick a canonical ordering to avoid returning eigenvalues in "random" order
-# as is the LAPACK default (for complex λ — LAPACK sorts by λ for the Hermitian/Symmetric case)
-eigsortby(λ::Real) = λ
-eigsortby(λ::Complex) = (real(λ),imag(λ))
-function sorteig!(λ::AbstractVector, X::AbstractMatrix, sortby::Union{Function,Nothing}=eigsortby)
-    if sortby !== nothing && !issorted(λ, by=sortby)
-        p = sortperm(λ; alg=QuickSort, by=sortby)
-        permute!(λ, p)
-        Base.permutecols!!(X, p)
-    end
-    return λ, X
-end
-sorteig!(λ::AbstractVector, sortby::Union{Function,Nothing}=eigsortby) = sortby === nothing ? λ : sort!(λ, by=sortby)
-
-"""
-    eigen!(A; permute, scale, sortby)
-    eigen!(A, B; sortby)
-
-Same as [`eigen`](@ref), but saves space by overwriting the input `A` (and
-`B`), instead of creating a copy.
-"""
-function eigen!(A::StridedMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    n = size(A, 2)
-    n == 0 && return Eigen(zeros(T, 0), zeros(T, 0, 0))
-    issymmetric(A) && return eigen!(Symmetric(A), sortby=sortby)
-    A, WR, WI, VL, VR, _ = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)
-    iszero(WI) && return Eigen(sorteig!(WR, VR, sortby)...)
-    evec = zeros(Complex{T}, n, n)
-    j = 1
-    while j <= n
-        if WI[j] == 0
-            evec[:,j] = view(VR, :, j)
-        else
-            for i = 1:n
-                evec[i,j]   = VR[i,j] + im*VR[i,j+1]
-                evec[i,j+1] = VR[i,j] - im*VR[i,j+1]
-            end
-            j += 1
-        end
-        j += 1
-    end
-    return Eigen(sorteig!(complex.(WR, WI), evec, sortby)...)
-end
-
-function eigen!(A::StridedMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    n = size(A, 2)
-    n == 0 && return Eigen(zeros(T, 0), zeros(T, 0, 0))
-    ishermitian(A) && return eigen!(Hermitian(A), sortby=sortby)
-    eval, evec = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)[[2,4]]
-    return Eigen(sorteig!(eval, evec, sortby)...)
-end
-
-"""
-    eigen(A; permute::Bool=true, scale::Bool=true, sortby) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. This corresponds to solving an eigenvalue problem of the form
-`Ax =  λx`, where `A` is a matrix, `x` is an eigenvector, and `λ` is an eigenvalue.
-(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-For general nonsymmetric matrices it is possible to specify how the matrix is balanced
-before the eigenvector calculation. The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm. The default is `true` for both options.
-
-By default, the eigenvalues and vectors are sorted lexicographically by `(real(λ),imag(λ))`.
-A different comparison function `by(λ)` can be passed to `sortby`, or you can pass
-`sortby=nothing` to leave the eigenvalues in an arbitrary order.   Some special matrix types
-(e.g. [`Diagonal`](@ref) or [`SymTridiagonal`](@ref)) may implement their own sorting convention and not
-accept a `sortby` keyword.
-
-# Examples
-```jldoctest
-julia> F = eigen([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-Eigen{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-values:
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-vectors:
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> F.values
-3-element Vector{Float64}:
-  1.0
-  3.0
- 18.0
-
-julia> F.vectors
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T
-    isdiag(A) && return eigen(Diagonal{eigtype(T)}(diag(A)); sortby)
-    ishermitian(A) && return eigen!(eigencopy_oftype(Hermitian(A), eigtype(T)); sortby)
-    AA = eigencopy_oftype(A, eigtype(T))
-    return eigen!(AA; permute, scale, sortby)
-end
-function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where {T <: Union{Float16,Complex{Float16}}}
-    isdiag(A) && return eigen(Diagonal{eigtype(T)}(diag(A)); sortby)
-    E = if ishermitian(A)
-        eigen!(eigencopy_oftype(Hermitian(A), eigtype(T)); sortby)
-    else
-        eigen!(eigencopy_oftype(A, eigtype(T)); permute, scale, sortby)
-    end
-    values = convert(AbstractVector{isreal(E.values) ? Float16 : Complex{Float16}}, E.values)
-    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
-    return Eigen(values, vectors)
-end
-eigen(x::Number) = Eigen([x], fill(one(x), 1, 1))
-
-"""
-    eigvecs(A; permute::Bool=true, scale::Bool=true, `sortby`) -> Matrix
-
-Return a matrix `M` whose columns are the eigenvectors of `A`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.) The `permute`, `scale`, and `sortby` keywords are the same as
-for [`eigen`](@ref).
-
-# Examples
-```jldoctest
-julia> eigvecs([1.0 0.0 0.0; 0.0 3.0 0.0; 0.0 0.0 18.0])
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-```
-"""
-eigvecs(A::Union{Number, AbstractMatrix}; kws...) =
-    eigvecs(eigen(A; kws...))
-eigvecs(F::Union{Eigen, GeneralizedEigen}) = F.vectors
-
-eigvals(F::Union{Eigen, GeneralizedEigen}) = F.values
-
-"""
-    eigvals!(A; permute::Bool=true, scale::Bool=true, sortby) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-The `permute`, `scale`, and `sortby` keywords are the same as for [`eigen`](@ref).
-
-!!! note
-    The input matrix `A` will not contain its eigenvalues after `eigvals!` is
-    called on it - `A` is used as a workspace.
-
-# Examples
-```jldoctest
-julia> A = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> eigvals!(A)
-2-element Vector{Float64}:
- -0.3722813232690143
-  5.372281323269014
-
-julia> A
-2×2 Matrix{Float64}:
- -0.372281  -1.0
-  0.0        5.37228
-```
-"""
-function eigvals!(A::StridedMatrix{<:BlasReal}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby)
-    issymmetric(A) && return sorteig!(eigvals!(Symmetric(A)), sortby)
-    _, valsre, valsim, _ = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'N', 'N', A)
-    return sorteig!(iszero(valsim) ? valsre : complex.(valsre, valsim), sortby)
-end
-function eigvals!(A::StridedMatrix{<:BlasComplex}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby)
-    ishermitian(A) && return sorteig!(eigvals(Hermitian(A)), sortby)
-    return sorteig!(LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'N', 'N', A)[2], sortby)
-end
-
-# promotion type to use for eigenvalues of a Matrix{T}
-eigtype(T) = promote_type(Float32, typeof(zero(T)/sqrt(abs2(one(T)))))
-
-"""
-    eigvals(A; permute::Bool=true, scale::Bool=true, sortby) -> values
-
-Return the eigenvalues of `A`.
-
-For general non-symmetric matrices it is possible to specify how the matrix is balanced
-before the eigenvalue calculation. The `permute`, `scale`, and `sortby` keywords are
-the same as for [`eigen`](@ref).
-
-# Examples
-```jldoctest
-julia> diag_matrix = [1 0; 0 4]
-2×2 Matrix{Int64}:
- 1  0
- 0  4
-
-julia> eigvals(diag_matrix)
-2-element Vector{Float64}:
- 1.0
- 4.0
-```
-"""
-eigvals(A::AbstractMatrix{T}; kws...) where T =
-    eigvals!(eigencopy_oftype(A, eigtype(T)); kws...)
-
-"""
-For a scalar input, `eigvals` will return a scalar.
-
-# Example
-```jldoctest
-julia> eigvals(-2)
--2
-```
-"""
-eigvals(x::Number; kwargs...) = imag(x) == 0 ? real(x) : x
-
-"""
-    eigmax(A; permute::Bool=true, scale::Bool=true)
-
-Return the largest eigenvalue of `A`.
-The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm.
-Note that if the eigenvalues of `A` are complex,
-this method will fail, since complex numbers cannot
-be sorted.
-
-# Examples
-```jldoctest
-julia> A = [0 im; -im 0]
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+1im
- 0-1im  0+0im
-
-julia> eigmax(A)
-1.0
-
-julia> A = [0 im; -1 0]
-2×2 Matrix{Complex{Int64}}:
-  0+0im  0+1im
- -1+0im  0+0im
-
-julia> eigmax(A)
-ERROR: DomainError with Complex{Int64}[0+0im 0+1im; -1+0im 0+0im]:
-`A` cannot have complex eigenvalues.
-Stacktrace:
-[...]
-```
-"""
-function eigmax(A::Union{Number, AbstractMatrix}; permute::Bool=true, scale::Bool=true)
-    v = eigvals(A, permute = permute, scale = scale)
-    if eltype(v)<:Complex
-        throw(DomainError(A, "`A` cannot have complex eigenvalues."))
-    end
-    maximum(v)
-end
-
-"""
-    eigmin(A; permute::Bool=true, scale::Bool=true)
-
-Return the smallest eigenvalue of `A`.
-The option `permute=true` permutes the matrix to become
-closer to upper triangular, and `scale=true` scales the matrix by its diagonal elements to
-make rows and columns more equal in norm.
-Note that if the eigenvalues of `A` are complex,
-this method will fail, since complex numbers cannot
-be sorted.
-
-# Examples
-```jldoctest
-julia> A = [0 im; -im 0]
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+1im
- 0-1im  0+0im
-
-julia> eigmin(A)
--1.0
-
-julia> A = [0 im; -1 0]
-2×2 Matrix{Complex{Int64}}:
-  0+0im  0+1im
- -1+0im  0+0im
-
-julia> eigmin(A)
-ERROR: DomainError with Complex{Int64}[0+0im 0+1im; -1+0im 0+0im]:
-`A` cannot have complex eigenvalues.
-Stacktrace:
-[...]
-```
-"""
-function eigmin(A::Union{Number, AbstractMatrix};
-                permute::Bool=true, scale::Bool=true)
-    v = eigvals(A, permute = permute, scale = scale)
-    if eltype(v)<:Complex
-        throw(DomainError(A, "`A` cannot have complex eigenvalues."))
-    end
-    minimum(v)
-end
-
-inv(A::Eigen) = A.vectors * inv(Diagonal(A.values)) / A.vectors
-det(A::Eigen) = prod(A.values)
-
-# Generalized eigenproblem
-function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    issymmetric(A) && isposdef(B) && return eigen!(Symmetric(A), Symmetric(B), sortby=sortby)
-    n = size(A, 1)
-    if LAPACK.version() < v"3.6.0"
-        alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
-    else
-        alphar, alphai, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
-    end
-    iszero(alphai) && return GeneralizedEigen(sorteig!(alphar ./ beta, vr, sortby)...)
-
-    vecs = zeros(Complex{T}, n, n)
-    j = 1
-    while j <= n
-        if alphai[j] == 0
-            vecs[:,j] = view(vr, :, j)
-        else
-            for i = 1:n
-                vecs[i,j  ] = vr[i,j] + im*vr[i,j+1]
-                vecs[i,j+1] = vr[i,j] - im*vr[i,j+1]
-            end
-            j += 1
-        end
-        j += 1
-    end
-    return GeneralizedEigen(sorteig!(complex.(alphar, alphai)./beta, vecs, sortby)...)
-end
-
-function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    ishermitian(A) && isposdef(B) && return eigen!(Hermitian(A), Hermitian(B), sortby=sortby)
-    if LAPACK.version() < v"3.6.0"
-        alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
-    else
-        alpha, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
-    end
-    return GeneralizedEigen(sorteig!(alpha./beta, vr, sortby)...)
-end
-
-"""
-    eigen(A, B; sortby) -> GeneralizedEigen
-
-Compute the generalized eigenvalue decomposition of `A` and `B`, returning a
-[`GeneralizedEigen`](@ref) factorization object `F` which contains the generalized eigenvalues in
-`F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`.
-This corresponds to solving a generalized eigenvalue problem of the form
-`Ax =  λBx`, where `A, B` are matrices, `x` is an eigenvector, and `λ` is an eigenvalue.
-(The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-By default, the eigenvalues and vectors are sorted lexicographically by `(real(λ),imag(λ))`.
-A different comparison function `by(λ)` can be passed to `sortby`, or you can pass
-`sortby=nothing` to leave the eigenvalues in an arbitrary order.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> F = eigen(A, B);
-
-julia> F.values
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> F.vectors
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-
-julia> vals, vecs = F; # destructuring via iteration
-
-julia> vals == F.values && vecs == F.vectors
-true
-```
-"""
-function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    eigen!(copy_similar(A, S), copy_similar(B, S); kws...)
-end
-eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1))
-
-"""
-    LinearAlgebra.eigencopy_oftype(A::AbstractMatrix, ::Type{S})
-
-Creates a dense copy of `A` with eltype `S` by calling `copy_similar(A, S)`.
-In the case of `Hermitian` or `Symmetric` matrices additionally retains the wrapper,
-together with the `uplo` field.
-"""
-eigencopy_oftype(A, S) = copy_similar(A, S)
-
-"""
-    eigvals!(A, B; sortby) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A` (and `B`),
-instead of creating copies.
-
-!!! note
-    The input matrices `A` and `B` will not contain their eigenvalues after
-    `eigvals!` is called. They are used as workspaces.
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> eigvals!(A, B)
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-
-julia> A
-2×2 Matrix{Float64}:
- -0.0  -1.0
-  1.0  -0.0
-
-julia> B
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-```
-"""
-function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
-    issymmetric(A) && isposdef(B) && return sorteig!(eigvals!(Symmetric(A), Symmetric(B)), sortby)
-    if LAPACK.version() < v"3.6.0"
-        alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
-    else
-        alphar, alphai, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
-    end
-    return sorteig!((iszero(alphai) ? alphar : complex.(alphar, alphai))./beta, sortby)
-end
-function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
-    ishermitian(A) && isposdef(B) && return sorteig!(eigvals!(Hermitian(A), Hermitian(B)), sortby)
-    if LAPACK.version() < v"3.6.0"
-        alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
-    else
-        alpha, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
-    end
-    return sorteig!(alpha./beta, sortby)
-end
-
-"""
-    eigvals(A, B) -> values
-
-Compute the generalized eigenvalues of `A` and `B`.
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> eigvals(A,B)
-2-element Vector{ComplexF64}:
- 0.0 - 1.0im
- 0.0 + 1.0im
-```
-"""
-function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigvals!(copy_similar(A, S), copy_similar(B, S); kws...)
-end
-
-"""
-    eigvecs(A, B) -> Matrix
-
-Return a matrix `M` whose columns are the generalized eigenvectors of `A` and `B`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.)
-
-# Examples
-```jldoctest
-julia> A = [1 0; 0 -1]
-2×2 Matrix{Int64}:
- 1   0
- 0  -1
-
-julia> B = [0 1; 1 0]
-2×2 Matrix{Int64}:
- 0  1
- 1  0
-
-julia> eigvecs(A, B)
-2×2 Matrix{ComplexF64}:
-  0.0+1.0im   0.0-1.0im
- -1.0+0.0im  -1.0-0.0im
-```
-"""
-eigvecs(A::AbstractMatrix, B::AbstractMatrix; kws...) = eigvecs(eigen(A, B; kws...))
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{Eigen,GeneralizedEigen})
-    summary(io, F); println(io)
-    println(io, "values:")
-    show(io, mime, F.values)
-    println(io, "\nvectors:")
-    show(io, mime, F.vectors)
-end
-
-function Base.hash(F::Eigen, h::UInt)
-    return hash(F.values, hash(F.vectors, hash(Eigen, h)))
-end
-function Base.:(==)(A::Eigen, B::Eigen)
-    return A.values == B.values && A.vectors == B.vectors
-end
-function Base.isequal(A::Eigen, B::Eigen)
-    return isequal(A.values, B.values) && isequal(A.vectors, B.vectors)
-end
-
-# Conversion methods
-
-## Can we determine the source/result is Real?  This is not stored in the type Eigen
-AbstractMatrix(F::Eigen) = F.vectors * Diagonal(F.values) / F.vectors
-AbstractArray(F::Eigen) = AbstractMatrix(F)
-Matrix(F::Eigen) = Array(AbstractArray(F))
-Array(F::Eigen) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/exceptions.jl b/stdlib/LinearAlgebra/src/exceptions.jl
deleted file mode 100644
index a8d81aad3e067..0000000000000
--- a/stdlib/LinearAlgebra/src/exceptions.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-export LAPACKException,
-       SingularException,
-       PosDefException,
-       RankDeficientException,
-       ZeroPivotException
-
-struct LAPACKException <: Exception
-    info::BlasInt
-end
-
-"""
-    SingularException
-
-Exception thrown when the input matrix has one or more zero-valued eigenvalues, and is not invertible.
-A linear solve involving such a matrix cannot be computed.
-The `info` field indicates the location of (one of) the singular value(s).
-"""
-struct SingularException <: Exception
-    info::BlasInt
-end
-
-"""
-    PosDefException
-
-Exception thrown when the input matrix was not [positive definite](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix).
-Some linear algebra functions and factorizations are only applicable to positive definite matrices.
-The `info` field indicates the location of (one of) the eigenvalue(s) which is (are) less than/equal to 0.
-"""
-struct PosDefException <: Exception
-    info::BlasInt
-end
-function Base.showerror(io::IO, ex::PosDefException)
-    print(io, "PosDefException: matrix is not ")
-    if ex.info == -1
-        print(io, "Hermitian")
-    else
-        print(io, "positive definite")
-    end
-    print(io, "; Cholesky factorization failed.")
-end
-
-struct RankDeficientException <: Exception
-    info::BlasInt
-end
-
-"""
-    ZeroPivotException <: Exception
-
-Exception thrown when a matrix factorization/solve encounters a zero in a pivot (diagonal)
-position and cannot proceed.  This may *not* mean that the matrix is singular:
-it may be fruitful to switch to a different factorization such as pivoted LU
-that can re-order variables to eliminate spurious zero pivots.
-The `info` field indicates the location of (one of) the zero pivot(s).
-"""
-struct ZeroPivotException <: Exception
-    info::BlasInt
-end
-function Base.showerror(io::IO, ex::ZeroPivotException)
-    print(io, "ZeroPivotException: factorization encountered one or more zero pivots. Consider switching to a pivoted LU factorization.")
-end
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
deleted file mode 100644
index 8c35a23e6b6d5..0000000000000
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ /dev/null
@@ -1,202 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Matrix factorizations and decompositions
-"""
-    LinearAlgebra.Factorization
-
-Abstract type for [matrix factorizations](https://en.wikipedia.org/wiki/Matrix_decomposition)
-a.k.a. matrix decompositions.
-See [online documentation](@ref man-linalg-factorizations) for a list of available
-matrix factorizations.
-"""
-abstract type Factorization{T} end
-
-"""
-    AdjointFactorization
-
-Lazy wrapper type for the adjoint of the underlying `Factorization` object. Usually, the
-`AdjointFactorization` constructor should not be called directly, use
-[`adjoint(:: Factorization)`](@ref) instead.
-"""
-struct AdjointFactorization{T,S<:Factorization} <: Factorization{T}
-    parent::S
-end
-AdjointFactorization(F::Factorization) =
-    AdjointFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
-
-"""
-    TransposeFactorization
-
-Lazy wrapper type for the transpose of the underlying `Factorization` object. Usually, the
-`TransposeFactorization` constructor should not be called directly, use
-[`transpose(:: Factorization)`](@ref) instead.
-"""
-struct TransposeFactorization{T,S<:Factorization} <: Factorization{T}
-    parent::S
-end
-TransposeFactorization(F::Factorization) =
-    TransposeFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
-
-eltype(::Type{<:Factorization{T}}) where {T} = T
-size(F::AdjointFactorization) = reverse(size(parent(F)))
-size(F::TransposeFactorization) = reverse(size(parent(F)))
-size(F::Union{AdjointFactorization,TransposeFactorization}, d::Integer) = d in (1, 2) ? size(F)[d] : 1
-parent(F::Union{AdjointFactorization,TransposeFactorization}) = F.parent
-
-"""
-    adjoint(F::Factorization)
-
-Lazy adjoint of the factorization `F`. By default, returns an
-[`AdjointFactorization`](@ref) wrapper.
-"""
-adjoint(F::Factorization) = AdjointFactorization(F)
-"""
-    transpose(F::Factorization)
-
-Lazy transpose of the factorization `F`. By default, returns a [`TransposeFactorization`](@ref),
-except for `Factorization`s with real `eltype`, in which case returns an [`AdjointFactorization`](@ref).
-"""
-transpose(F::Factorization) = TransposeFactorization(F)
-transpose(F::Factorization{<:Real}) = AdjointFactorization(F)
-adjoint(F::AdjointFactorization) = F.parent
-transpose(F::TransposeFactorization) = F.parent
-transpose(F::AdjointFactorization{<:Real}) = F.parent
-conj(A::TransposeFactorization) = adjoint(A.parent)
-conj(A::AdjointFactorization) = transpose(A.parent)
-
-checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
-checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
-checknonsingular(info, ::RowNonZero) = info == 0 || throw(SingularException(info))
-checknonsingular(info, ::NoPivot) = info == 0 || throw(ZeroPivotException(info))
-checknonsingular(info) = checknonsingular(info, RowMaximum())
-
-"""
-    issuccess(F::Factorization)
-
-Test that a factorization of a matrix succeeded.
-
-!!! compat "Julia 1.6"
-    `issuccess(::CholeskyPivoted)` requires Julia 1.6 or later.
-
-```jldoctest
-julia> F = cholesky([1 0; 0 1]);
-
-julia> issuccess(F)
-true
-
-julia> F = lu([1 0; 0 0]; check = false);
-
-julia> issuccess(F)
-false
-```
-"""
-issuccess(F::Factorization)
-
-function logdet(F::Factorization)
-    d, s = logabsdet(F)
-    return d + log(s)
-end
-
-function det(F::Factorization)
-    d, s = logabsdet(F)
-    return exp(d)*s
-end
-
-convert(::Type{T}, f::T) where {T<:Factorization} = f
-convert(::Type{T}, f::Factorization) where {T<:Factorization} = T(f)::T
-
-convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)::T
-
-### General promotion rules
-Factorization{T}(F::Factorization{T}) where {T} = F
-# This no longer looks odd since the return _is_ a Factorization!
-Factorization{T}(A::AdjointFactorization) where {T} =
-    adjoint(Factorization{T}(parent(A)))
-Factorization{T}(A::TransposeFactorization) where {T} =
-    transpose(Factorization{T}(parent(A)))
-inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
-
-Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
-Base.:(==)(  F::T, G::T) where {T<:Factorization} = all(f -> getfield(F, f) == getfield(G, f), 1:nfields(F))
-Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, f), getfield(G, f)), 1:nfields(F))::Bool
-
-function Base.show(io::IO, x::AdjointFactorization)
-    print(io, "adjoint of ")
-    show(io, parent(x))
-end
-function Base.show(io::IO, x::TransposeFactorization)
-    print(io, "transpose of ")
-    show(io, parent(x))
-end
-function Base.show(io::IO, ::MIME"text/plain", x::AdjointFactorization)
-    print(io, "adjoint of ")
-    show(io, MIME"text/plain"(), parent(x))
-end
-function Base.show(io::IO, ::MIME"text/plain", x::TransposeFactorization)
-    print(io, "transpose of ")
-    show(io, MIME"text/plain"(), parent(x))
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns or rows
-function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal}
-    require_one_based_indexing(B)
-    c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
-    x = ldiv!(F, c2r)
-    return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B))
-end
-# don't do the reinterpretation for [Adjoint/Transpose]Factorization
-(\)(F::TransposeFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    conj!(adjoint(parent(F)) \ conj.(B))
-(\)(F::AdjointFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    @invoke \(F::typeof(F), B::VecOrMat)
-
-function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where {T<:BlasReal}
-    require_one_based_indexing(B)
-    x = rdiv!(copy(reinterpret(T, B)), F)
-    return copy(reinterpret(Complex{T}, x))
-end
-# don't do the reinterpretation for [Adjoint/Transpose]Factorization
-(/)(B::VecOrMat{Complex{T}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
-    conj!(adjoint(parent(F)) \ conj.(B))
-(/)(B::VecOrMat{Complex{T}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
-    @invoke /(B::VecOrMat{Complex{T}}, F::Factorization{T})
-
-function (\)(F::Factorization, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(F)) \ oneunit(eltype(B)))
-    ldiv!(F, copy_similar(B, TFB))
-end
-(\)(F::TransposeFactorization, B::AbstractVecOrMat) = conj!(adjoint(F.parent) \ conj.(B))
-
-function (/)(B::AbstractMatrix, F::Factorization)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    rdiv!(copy_similar(B, TFB), F)
-end
-(/)(A::AbstractMatrix, F::AdjointFactorization) = adjoint(adjoint(F) \ adjoint(A))
-(/)(A::AbstractMatrix, F::TransposeFactorization) = transpose(transpose(F) \ transpose(A))
-
-function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
-    require_one_based_indexing(Y, B)
-    m, n = size(A)
-    if m > n
-        Bc = copy(B)
-        ldiv!(A, Bc)
-        return copyto!(Y, 1, Bc, 1, n)
-    else
-        return ldiv!(A, copyto!(Y, B))
-    end
-end
-function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
-    require_one_based_indexing(Y, B)
-    m, n = size(A)
-    if m > n
-        Bc = copy(B)
-        ldiv!(A, Bc)
-        return copyto!(Y, view(Bc, 1:n, :))
-    else
-        copyto!(view(Y, 1:m, :), view(B, 1:m, :))
-        return ldiv!(A, Y)
-    end
-end
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
deleted file mode 100644
index 9cbe3f76ccfb9..0000000000000
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ /dev/null
@@ -1,1890 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## linalg.jl: Some generic Linear Algebra definitions
-
-# Elements of `out` may not be defined (e.g., for `BigFloat`). To make
-# `mul!(out, A, B)` work for such cases, `out .*ₛ beta` short-circuits
-# `out * beta`.  Using `broadcasted` to avoid the multiplication
-# inside this function.
-function *ₛ end
-Broadcast.broadcasted(::typeof(*ₛ), out, beta) =
-    iszero(beta::Number) ? false : broadcasted(*, out, beta)
-
-"""
-    MulAddMul(alpha, beta)
-
-A callable for operating short-circuiting version of `x * alpha + y * beta`.
-
-# Examples
-```jldoctest
-julia> using LinearAlgebra: MulAddMul
-
-julia> _add = MulAddMul(1, 0);
-
-julia> _add(123, nothing)
-123
-
-julia> MulAddMul(12, 34)(56, 78) == 56 * 12 + 78 * 34
-true
-```
-"""
-struct MulAddMul{ais1, bis0, TA, TB}
-    alpha::TA
-    beta::TB
-end
-
-@inline function MulAddMul(alpha::TA, beta::TB) where {TA,TB}
-    if isone(alpha)
-        if iszero(beta)
-            return MulAddMul{true,true,TA,TB}(alpha, beta)
-        else
-            return MulAddMul{true,false,TA,TB}(alpha, beta)
-        end
-    else
-        if iszero(beta)
-            return MulAddMul{false,true,TA,TB}(alpha, beta)
-        else
-            return MulAddMul{false,false,TA,TB}(alpha, beta)
-        end
-    end
-end
-
-MulAddMul() = MulAddMul{true,true,Bool,Bool}(true, false)
-
-@inline (::MulAddMul{true})(x) = x
-@inline (p::MulAddMul{false})(x) = x * p.alpha
-@inline (::MulAddMul{true, true})(x, _) = x
-@inline (p::MulAddMul{false, true})(x, _) = x * p.alpha
-@inline (p::MulAddMul{true, false})(x, y) = x + y * p.beta
-@inline (p::MulAddMul{false, false})(x, y) = x * p.alpha + y * p.beta
-
-"""
-    _modify!(_add::MulAddMul, x, C, idx)
-
-Short-circuiting version of `C[idx] = _add(x, C[idx])`.
-
-Short-circuiting the indexing `C[idx]` is necessary for avoiding `UndefRefError`
-when mutating an array of non-primitive numbers such as `BigFloat`.
-
-# Examples
-```jldoctest
-julia> using LinearAlgebra: MulAddMul, _modify!
-
-julia> _add = MulAddMul(1, 0);
-       C = Vector{BigFloat}(undef, 1);
-
-julia> _modify!(_add, 123, C, 1)
-
-julia> C
-1-element Vector{BigFloat}:
- 123.0
-```
-"""
-@inline @propagate_inbounds function _modify!(p::MulAddMul{ais1, bis0},
-                                              x, C, idx′) where {ais1, bis0}
-    # `idx′` may be an integer, a tuple of integer, or a `CartesianIndex`.
-    #  Let `CartesianIndex` constructor normalize them so that it can be
-    # used uniformly.  It also acts as a workaround for performance penalty
-    # of splatting a number (#29114):
-    idx = CartesianIndex(idx′)
-    if bis0
-        C[idx] = p(x)
-    else
-        C[idx] = p(x, C[idx])
-    end
-    return
-end
-
-@inline function _rmul_or_fill!(C::AbstractArray, beta::Number)
-    if isempty(C)
-        return C
-    end
-    if iszero(beta)
-        fill!(C, zero(eltype(C)))
-    else
-        rmul!(C, beta)
-    end
-    return C
-end
-
-
-function generic_mul!(C::AbstractArray, X::AbstractArray, s::Number, _add::MulAddMul)
-    if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not match the length of the second, $(length(X))."))
-    end
-    for (IC, IX) in zip(eachindex(C), eachindex(X))
-        @inbounds _modify!(_add, X[IX] * s, C, IC)
-    end
-    C
-end
-
-function generic_mul!(C::AbstractArray, s::Number, X::AbstractArray, _add::MulAddMul)
-    if length(C) != length(X)
-        throw(DimensionMismatch("first array has length $(length(C)) which does not
-match the length of the second, $(length(X))."))
-    end
-    for (IC, IX) in zip(eachindex(C), eachindex(X))
-        @inbounds _modify!(_add, s * X[IX], C, IC)
-    end
-    C
-end
-
-@inline function mul!(C::AbstractArray, s::Number, X::AbstractArray, alpha::Number, beta::Number)
-    if axes(C) == axes(X)
-        C .= (s .* X) .*ₛ alpha .+ C .*ₛ beta
-    else
-        generic_mul!(C, s, X, MulAddMul(alpha, beta))
-    end
-    return C
-end
-@inline function mul!(C::AbstractArray, X::AbstractArray, s::Number, alpha::Number, beta::Number)
-    if axes(C) == axes(X)
-        C .= (X .* s) .*ₛ alpha .+ C .*ₛ beta
-    else
-        generic_mul!(C, X, s, MulAddMul(alpha, beta))
-    end
-    return C
-end
-
-# For better performance when input and output are the same array
-# See https://github.com/JuliaLang/julia/issues/8415#issuecomment-56608729
-"""
-    rmul!(A::AbstractArray, b::Number)
-
-Scale an array `A` by a scalar `b` overwriting `A` in-place.  Use
-[`lmul!`](@ref) to multiply scalar from left.  The scaling operation
-respects the semantics of the multiplication [`*`](@ref) between an
-element of `A` and `b`.  In particular, this also applies to
-multiplication involving non-finite numbers such as `NaN` and `±Inf`.
-
-!!! compat "Julia 1.1"
-    Prior to Julia 1.1, `NaN` and `±Inf` entries in `A` were treated
-    inconsistently.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> rmul!(A, 2)
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> rmul!([NaN], 0.0)
-1-element Vector{Float64}:
- NaN
-```
-"""
-function rmul!(X::AbstractArray, s::Number)
-    @simd for I in eachindex(X)
-        @inbounds X[I] *= s
-    end
-    X
-end
-
-
-"""
-    lmul!(a::Number, B::AbstractArray)
-
-Scale an array `B` by a scalar `a` overwriting `B` in-place.  Use
-[`rmul!`](@ref) to multiply scalar from right.  The scaling operation
-respects the semantics of the multiplication [`*`](@ref) between `a`
-and an element of `B`.  In particular, this also applies to
-multiplication involving non-finite numbers such as `NaN` and `±Inf`.
-
-!!! compat "Julia 1.1"
-    Prior to Julia 1.1, `NaN` and `±Inf` entries in `B` were treated
-    inconsistently.
-
-# Examples
-```jldoctest
-julia> B = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> lmul!(2, B)
-2×2 Matrix{Int64}:
- 2  4
- 6  8
-
-julia> lmul!(0.0, [Inf])
-1-element Vector{Float64}:
- NaN
-```
-"""
-function lmul!(s::Number, X::AbstractArray)
-    @simd for I in eachindex(X)
-        @inbounds X[I] = s*X[I]
-    end
-    X
-end
-
-"""
-    rdiv!(A::AbstractArray, b::Number)
-
-Divide each entry in an array `A` by a scalar `b` overwriting `A`
-in-place.  Use [`ldiv!`](@ref) to divide scalar from left.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0; 3.0 4.0]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> rdiv!(A, 2.0)
-2×2 Matrix{Float64}:
- 0.5  1.0
- 1.5  2.0
-```
-"""
-function rdiv!(X::AbstractArray, s::Number)
-    @simd for I in eachindex(X)
-        @inbounds X[I] /= s
-    end
-    X
-end
-
-"""
-    ldiv!(a::Number, B::AbstractArray)
-
-Divide each entry in an array `B` by a scalar `a` overwriting `B`
-in-place.  Use [`rdiv!`](@ref) to divide scalar from right.
-
-# Examples
-```jldoctest
-julia> B = [1.0 2.0; 3.0 4.0]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> ldiv!(2.0, B)
-2×2 Matrix{Float64}:
- 0.5  1.0
- 1.5  2.0
-```
-"""
-function ldiv!(s::Number, X::AbstractArray)
-    @simd for I in eachindex(X)
-        @inbounds X[I] = s\X[I]
-    end
-    X
-end
-ldiv!(Y::AbstractArray, s::Number, X::AbstractArray) = Y .= s .\ X
-
-# Generic fallback. This assumes that B and Y have the same sizes.
-ldiv!(Y::AbstractArray, A::AbstractMatrix, B::AbstractArray) = ldiv!(A, copyto!(Y, B))
-
-
-"""
-    cross(x, y)
-    ×(x,y)
-
-Compute the cross product of two 3-vectors.
-
-# Examples
-```jldoctest
-julia> a = [0;1;0]
-3-element Vector{Int64}:
- 0
- 1
- 0
-
-julia> b = [0;0;1]
-3-element Vector{Int64}:
- 0
- 0
- 1
-
-julia> cross(a,b)
-3-element Vector{Int64}:
- 1
- 0
- 0
-```
-"""
-function cross(a::AbstractVector, b::AbstractVector)
-    if !(length(a) == length(b) == 3)
-        throw(DimensionMismatch("cross product is only defined for vectors of length 3"))
-    end
-    a1, a2, a3 = a
-    b1, b2, b3 = b
-    [a2*b3-a3*b2, a3*b1-a1*b3, a1*b2-a2*b1]
-end
-
-"""
-    triu(M)
-
-Upper triangle of a matrix.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> triu(a)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 0.0  1.0  1.0  1.0
- 0.0  0.0  1.0  1.0
- 0.0  0.0  0.0  1.0
-```
-"""
-triu(M::AbstractMatrix) = triu!(copy(M))
-
-"""
-    tril(M)
-
-Lower triangle of a matrix.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a)
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 1.0  1.0  0.0  0.0
- 1.0  1.0  1.0  0.0
- 1.0  1.0  1.0  1.0
-```
-"""
-tril(M::AbstractMatrix) = tril!(copy(M))
-
-"""
-    triu(M, k::Integer)
-
-Return the upper triangle of `M` starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> triu(a,3)
-4×4 Matrix{Float64}:
- 0.0  0.0  0.0  1.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
-
-julia> triu(a,-3)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-```
-"""
-triu(M::AbstractMatrix,k::Integer) = triu!(copy(M),k)
-
-"""
-    tril(M, k::Integer)
-
-Return the lower triangle of `M` starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = fill(1.0, (4,4))
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a,3)
-4×4 Matrix{Float64}:
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
- 1.0  1.0  1.0  1.0
-
-julia> tril(a,-3)
-4×4 Matrix{Float64}:
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0
- 1.0  0.0  0.0  0.0
-```
-"""
-tril(M::AbstractMatrix,k::Integer) = tril!(copy(M),k)
-
-"""
-    triu!(M)
-
-Upper triangle of a matrix, overwriting `M` in the process.
-See also [`triu`](@ref).
-"""
-triu!(M::AbstractMatrix) = triu!(M,0)
-
-"""
-    tril!(M)
-
-Lower triangle of a matrix, overwriting `M` in the process.
-See also [`tril`](@ref).
-"""
-tril!(M::AbstractMatrix) = tril!(M,0)
-
-diag(A::AbstractVector) = throw(ArgumentError("use diagm instead of diag to construct a diagonal matrix"))
-
-###########################################################################################
-# Dot products and norms
-
-# special cases of norm; note that they don't need to handle isempty(x)
-generic_normMinusInf(x) = float(mapreduce(norm, min, x))
-
-generic_normInf(x) = float(mapreduce(norm, max, x))
-
-generic_norm1(x) = mapreduce(float ∘ norm, +, x)
-
-# faster computation of norm(x)^2, avoiding overflow for integers
-norm_sqr(x) = norm(x)^2
-norm_sqr(x::Number) = abs2(x)
-norm_sqr(x::Union{T,Complex{T},Rational{T}}) where {T<:Integer} = abs2(float(x))
-
-function generic_norm2(x)
-    maxabs = normInf(x)
-    (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
-    (v, s) = iterate(x)::Tuple
-    T = typeof(maxabs)
-    if isfinite(length(x)*maxabs*maxabs) && !iszero(maxabs*maxabs) # Scaling not necessary
-        sum::promote_type(Float64, T) = norm_sqr(v)
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            sum += norm_sqr(v)
-        end
-        ismissing(sum) && return missing
-        return convert(T, sqrt(sum))
-    else
-        sum = abs2(norm(v)/maxabs)
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            sum += (norm(v)/maxabs)^2
-        end
-        ismissing(sum) && return missing
-        return convert(T, maxabs*sqrt(sum))
-    end
-end
-
-# Compute L_p norm ‖x‖ₚ = sum(abs(x).^p)^(1/p)
-# (Not technically a "norm" for p < 1.)
-function generic_normp(x, p)
-    (v, s) = iterate(x)::Tuple
-    if p > 1 || p < -1 # might need to rescale to avoid overflow
-        maxabs = p > 1 ? normInf(x) : normMinusInf(x)
-        (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
-        T = typeof(maxabs)
-    else
-        T = typeof(float(norm(v)))
-    end
-    spp::promote_type(Float64, T) = p
-    if -1 <= p <= 1 || (isfinite(length(x)*maxabs^spp) && !iszero(maxabs^spp)) # scaling not necessary
-        sum::promote_type(Float64, T) = norm(v)^spp
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            ismissing(v) && return missing
-            sum += norm(v)^spp
-        end
-        return convert(T, sum^inv(spp))
-    else # rescaling
-        sum = (norm(v)/maxabs)^spp
-        ismissing(sum) && return missing
-        while true
-            y = iterate(x, s)
-            y === nothing && break
-            (v, s) = y
-            ismissing(v) && return missing
-            sum += (norm(v)/maxabs)^spp
-        end
-        return convert(T, maxabs*sum^inv(spp))
-    end
-end
-
-normMinusInf(x) = generic_normMinusInf(x)
-normInf(x) = generic_normInf(x)
-norm1(x) = generic_norm1(x)
-norm2(x) = generic_norm2(x)
-normp(x, p) = generic_normp(x, p)
-
-
-"""
-    norm(A, p::Real=2)
-
-For any iterable container `A` (including arrays of any dimension) of numbers (or any
-element type for which `norm` is defined), compute the `p`-norm (defaulting to `p=2`) as if
-`A` were a vector of the corresponding length.
-
-The `p`-norm is defined as
-```math
-\\|A\\|_p = \\left( \\sum_{i=1}^n | a_i | ^p \\right)^{1/p}
-```
-with ``a_i`` the entries of ``A``, ``| a_i |`` the [`norm`](@ref) of ``a_i``, and
-``n`` the length of ``A``. Since the `p`-norm is computed using the [`norm`](@ref)s
-of the entries of `A`, the `p`-norm of a vector of vectors is not compatible with
-the interpretation of it as a block vector in general if `p != 2`.
-
-`p` can assume any numeric value (even though not all values produce a
-mathematically valid vector norm). In particular, `norm(A, Inf)` returns the largest value
-in `abs.(A)`, whereas `norm(A, -Inf)` returns the smallest. If `A` is a matrix and `p=2`,
-then this is equivalent to the Frobenius norm.
-
-The second argument `p` is not necessarily a part of the interface for `norm`, i.e. a custom
-type may only implement `norm(A)` without second argument.
-
-Use [`opnorm`](@ref) to compute the operator norm of a matrix.
-
-# Examples
-```jldoctest
-julia> v = [3, -2, 6]
-3-element Vector{Int64}:
-  3
- -2
-  6
-
-julia> norm(v)
-7.0
-
-julia> norm(v, 1)
-11.0
-
-julia> norm(v, Inf)
-6.0
-
-julia> norm([1 2 3; 4 5 6; 7 8 9])
-16.881943016134134
-
-julia> norm([1 2 3 4 5 6 7 8 9])
-16.881943016134134
-
-julia> norm(1:9)
-16.881943016134134
-
-julia> norm(hcat(v,v), 1) == norm(vcat(v,v), 1) != norm([v,v], 1)
-true
-
-julia> norm(hcat(v,v), 2) == norm(vcat(v,v), 2) == norm([v,v], 2)
-true
-
-julia> norm(hcat(v,v), Inf) == norm(vcat(v,v), Inf) != norm([v,v], Inf)
-true
-```
-"""
-function norm(itr, p::Real=2)
-    isempty(itr) && return float(norm(zero(eltype(itr))))
-    if p == 2
-        return norm2(itr)
-    elseif p == 1
-        return norm1(itr)
-    elseif p == Inf
-        return normInf(itr)
-    elseif p == 0
-        return typeof(float(norm(first(itr))))(count(!iszero, itr))
-    elseif p == -Inf
-        return normMinusInf(itr)
-    else
-        normp(itr, p)
-    end
-end
-
-"""
-    norm(x::Number, p::Real=2)
-
-For numbers, return ``\\left( |x|^p \\right)^{1/p}``.
-
-# Examples
-```jldoctest
-julia> norm(2, 1)
-2.0
-
-julia> norm(-2, 1)
-2.0
-
-julia> norm(2, 2)
-2.0
-
-julia> norm(-2, 2)
-2.0
-
-julia> norm(2, Inf)
-2.0
-
-julia> norm(-2, Inf)
-2.0
-```
-"""
-@inline function norm(x::Number, p::Real=2)
-    afx = abs(float(x))
-    if p == 0
-        if iszero(x)
-            return zero(afx)
-        elseif !isnan(x)
-            return oneunit(afx)
-        else
-            return afx
-        end
-    else
-        return afx
-    end
-end
-norm(::Missing, p::Real=2) = missing
-
-# special cases of opnorm
-function opnorm1(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m, n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    Tsum = promote_type(Float64, Tnorm)
-    nrm::Tsum = 0
-    @inbounds begin
-        for j = 1:n
-            nrmj::Tsum = 0
-            for i = 1:m
-                nrmj += norm(A[i,j])
-            end
-            nrm = max(nrm,nrmj)
-        end
-    end
-    return convert(Tnorm, nrm)
-end
-
-function opnorm2(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m,n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    if m == 0 || n == 0 return zero(Tnorm) end
-    if m == 1 || n == 1 return norm2(A) end
-    return svdvals(A)[1]
-end
-
-function opnormInf(A::AbstractMatrix{T}) where T
-    require_one_based_indexing(A)
-    m,n = size(A)
-    Tnorm = typeof(float(real(zero(T))))
-    Tsum = promote_type(Float64, Tnorm)
-    nrm::Tsum = 0
-    @inbounds begin
-        for i = 1:m
-            nrmi::Tsum = 0
-            for j = 1:n
-                nrmi += norm(A[i,j])
-            end
-            nrm = max(nrm,nrmi)
-        end
-    end
-    return convert(Tnorm, nrm)
-end
-
-
-"""
-    opnorm(A::AbstractMatrix, p::Real=2)
-
-Compute the operator norm (or matrix norm) induced by the vector `p`-norm,
-where valid values of `p` are `1`, `2`, or `Inf`. (Note that for sparse matrices,
-`p=2` is currently not implemented.) Use [`norm`](@ref) to compute the Frobenius
-norm.
-
-When `p=1`, the operator norm is the maximum absolute column sum of `A`:
-```math
-\\|A\\|_1 = \\max_{1 ≤ j ≤ n} \\sum_{i=1}^m | a_{ij} |
-```
-with ``a_{ij}`` the entries of ``A``, and ``m`` and ``n`` its dimensions.
-
-When `p=2`, the operator norm is the spectral norm, equal to the largest
-singular value of `A`.
-
-When `p=Inf`, the operator norm is the maximum absolute row sum of `A`:
-```math
-\\|A\\|_\\infty = \\max_{1 ≤ i ≤ m} \\sum _{j=1}^n | a_{ij} |
-```
-
-# Examples
-```jldoctest
-julia> A = [1 -2 -3; 2 3 -1]
-2×3 Matrix{Int64}:
- 1  -2  -3
- 2   3  -1
-
-julia> opnorm(A, Inf)
-6.0
-
-julia> opnorm(A, 1)
-5.0
-```
-"""
-function opnorm(A::AbstractMatrix, p::Real=2)
-    if p == 2
-        return opnorm2(A)
-    elseif p == 1
-        return opnorm1(A)
-    elseif p == Inf
-        return opnormInf(A)
-    else
-        throw(ArgumentError("invalid p-norm p=$p. Valid: 1, 2, Inf"))
-    end
-end
-
-"""
-    opnorm(x::Number, p::Real=2)
-
-For numbers, return ``\\left( |x|^p \\right)^{1/p}``.
-This is equivalent to [`norm`](@ref).
-"""
-@inline opnorm(x::Number, p::Real=2) = norm(x, p)
-
-"""
-    opnorm(A::Adjoint{<:Any,<:AbstracVector}, q::Real=2)
-    opnorm(A::Transpose{<:Any,<:AbstracVector}, q::Real=2)
-
-For Adjoint/Transpose-wrapped vectors, return the operator ``q``-norm of `A`, which is
-equivalent to the `p`-norm with value `p = q/(q-1)`. They coincide at `p = q = 2`.
-Use [`norm`](@ref) to compute the `p` norm of `A` as a vector.
-
-The difference in norm between a vector space and its dual arises to preserve
-the relationship between duality and the dot product, and the result is
-consistent with the operator `p`-norm of a `1 × n` matrix.
-
-# Examples
-```jldoctest
-julia> v = [1; im];
-
-julia> vc = v';
-
-julia> opnorm(vc, 1)
-1.0
-
-julia> norm(vc, 1)
-2.0
-
-julia> norm(v, 1)
-2.0
-
-julia> opnorm(vc, 2)
-1.4142135623730951
-
-julia> norm(vc, 2)
-1.4142135623730951
-
-julia> norm(v, 2)
-1.4142135623730951
-
-julia> opnorm(vc, Inf)
-2.0
-
-julia> norm(vc, Inf)
-1.0
-
-julia> norm(v, Inf)
-1.0
-```
-"""
-opnorm(v::TransposeAbsVec, q::Real) = q == Inf ? norm(v.parent, 1) : norm(v.parent, q/(q-1))
-opnorm(v::AdjointAbsVec, q::Real) = q == Inf ? norm(conj(v.parent), 1) : norm(conj(v.parent), q/(q-1))
-opnorm(v::AdjointAbsVec) = norm(conj(v.parent))
-opnorm(v::TransposeAbsVec) = norm(v.parent)
-
-norm(v::AdjOrTrans, p::Real) = norm(v.parent, p)
-
-"""
-    dot(x, y)
-    x ⋅ y
-
-Compute the dot product between two vectors. For complex vectors, the first
-vector is conjugated.
-
-`dot` also works on arbitrary iterable objects, including arrays of any dimension,
-as long as `dot` is defined on the elements.
-
-`dot` is semantically equivalent to `sum(dot(vx,vy) for (vx,vy) in zip(x, y))`,
-with the added restriction that the arguments must have equal lengths.
-
-`x ⋅ y` (where `⋅` can be typed by tab-completing `\\cdot` in the REPL) is a synonym for
-`dot(x, y)`.
-
-# Examples
-```jldoctest
-julia> dot([1; 1], [2; 3])
-5
-
-julia> dot([im; im], [1; 1])
-0 - 2im
-
-julia> dot(1:5, 2:6)
-70
-
-julia> x = fill(2., (5,5));
-
-julia> y = fill(3., (5,5));
-
-julia> dot(x, y)
-150.0
-```
-"""
-function dot end
-
-function dot(x, y) # arbitrary iterables
-    ix = iterate(x)
-    iy = iterate(y)
-    if ix === nothing
-        if iy !== nothing
-            throw(DimensionMismatch("x and y are of different lengths!"))
-        end
-        return dot(zero(eltype(x)), zero(eltype(y)))
-    end
-    if iy === nothing
-        throw(DimensionMismatch("x and y are of different lengths!"))
-    end
-    (vx, xs) = ix
-    (vy, ys) = iy
-    s = dot(vx, vy)
-    while true
-        ix = iterate(x, xs)
-        iy = iterate(y, ys)
-        ix === nothing && break
-        iy === nothing && break
-        (vx, xs), (vy, ys) = ix, iy
-        s += dot(vx, vy)
-    end
-    if !(iy === nothing && ix === nothing)
-        throw(DimensionMismatch("x and y are of different lengths!"))
-    end
-    return s
-end
-
-dot(x::Number, y::Number) = conj(x) * y
-
-function dot(x::AbstractArray, y::AbstractArray)
-    lx = length(x)
-    if lx != length(y)
-        throw(DimensionMismatch("first array has length $(lx) which does not match the length of the second, $(length(y))."))
-    end
-    if lx == 0
-        return dot(zero(eltype(x)), zero(eltype(y)))
-    end
-    s = zero(dot(first(x), first(y)))
-    for (Ix, Iy) in zip(eachindex(x), eachindex(y))
-        @inbounds s += dot(x[Ix], y[Iy])
-    end
-    s
-end
-
-function dot(x::Adjoint{<:Union{Real,Complex}}, y::Adjoint{<:Union{Real,Complex}})
-    return conj(dot(parent(x), parent(y)))
-end
-dot(x::Transpose, y::Transpose) = dot(parent(x), parent(y))
-
-"""
-    dot(x, A, y)
-
-Compute the generalized dot product `dot(x, A*y)` between two vectors `x` and `y`,
-without storing the intermediate result of `A*y`. As for the two-argument
-[`dot(_,_)`](@ref), this acts recursively. Moreover, for complex vectors, the
-first vector is conjugated.
-
-!!! compat "Julia 1.4"
-    Three-argument `dot` requires at least Julia 1.4.
-
-# Examples
-```jldoctest
-julia> dot([1; 1], [1 2; 3 4], [2; 3])
-26
-
-julia> dot(1:5, reshape(1:25, 5, 5), 2:6)
-4850
-
-julia> ⋅(1:5, reshape(1:25, 5, 5), 2:6) == dot(1:5, reshape(1:25, 5, 5), 2:6)
-true
-```
-"""
-dot(x, A, y) = dot(x, A*y) # generic fallback for cases that are not covered by specialized methods
-
-function dot(x::AbstractVector, A::AbstractMatrix, y::AbstractVector)
-    (axes(x)..., axes(y)...) == axes(A) || throw(DimensionMismatch())
-    T = typeof(dot(first(x), first(A), first(y)))
-    s = zero(T)
-    i₁ = first(eachindex(x))
-    x₁ = first(x)
-    @inbounds for j in eachindex(y)
-        yj = y[j]
-        if !iszero(yj)
-            temp = zero(adjoint(A[i₁,j]) * x₁)
-            @simd for i in eachindex(x)
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            s += dot(temp, yj)
-        end
-    end
-    return s
-end
-dot(x::AbstractVector, adjA::Adjoint, y::AbstractVector) = adjoint(dot(y, adjA.parent, x))
-dot(x::AbstractVector, transA::Transpose{<:Real}, y::AbstractVector) = adjoint(dot(y, transA.parent, x))
-
-###########################################################################################
-
-"""
-    rank(A::AbstractMatrix; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
-    rank(A::AbstractMatrix, rtol::Real)
-
-Compute the numerical rank of a matrix by counting how many outputs of
-`svdvals(A)` are greater than `max(atol, rtol*σ₁)` where `σ₁` is `A`'s largest
-calculated singular value. `atol` and `rtol` are the absolute and relative
-tolerances, respectively. The default relative tolerance is `n*ϵ`, where `n`
-is the size of the smallest dimension of `A`, and `ϵ` is the [`eps`](@ref) of
-the element type of `A`.
-
-!!! note
-    Numerical rank can be a sensitive and imprecise characterization of
-    ill-conditioned matrices with singular values that are close to the threshold
-    tolerance `max(atol, rtol*σ₁)`. In such cases, slight perturbations to the
-    singular-value computation or to the matrix can change the result of `rank`
-    by pushing one or more singular values across the threshold. These variations
-    can even occur due to changes in floating-point errors between different Julia
-    versions, architectures, compilers, or operating systems.
-
-!!! compat "Julia 1.1"
-    The `atol` and `rtol` keyword arguments requires at least Julia 1.1.
-    In Julia 1.0 `rtol` is available as a positional argument, but this
-    will be deprecated in Julia 2.0.
-
-# Examples
-```jldoctest
-julia> rank(Matrix(I, 3, 3))
-3
-
-julia> rank(diagm(0 => [1, 0, 2]))
-2
-
-julia> rank(diagm(0 => [1, 0.001, 2]), rtol=0.1)
-2
-
-julia> rank(diagm(0 => [1, 0.001, 2]), rtol=0.00001)
-3
-
-julia> rank(diagm(0 => [1, 0.001, 2]), atol=1.5)
-1
-```
-"""
-function rank(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol))
-    isempty(A) && return 0 # 0-dimensional case
-    s = svdvals(A)
-    tol = max(atol, rtol*s[1])
-    count(>(tol), s)
-end
-rank(x::Union{Number,AbstractVector}) = iszero(x) ? 0 : 1
-
-"""
-    tr(M)
-
-Matrix trace. Sums the diagonal elements of `M`.
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> tr(A)
-5
-```
-"""
-function tr(A::AbstractMatrix)
-    checksquare(A)
-    sum(diag(A))
-end
-tr(x::Number) = x
-
-#kron(a::AbstractVector, b::AbstractVector)
-#kron(a::AbstractMatrix{T}, b::AbstractMatrix{S}) where {T,S}
-
-#det(a::AbstractMatrix)
-
-"""
-    inv(M)
-
-Matrix inverse. Computes matrix `N` such that
-`M * N = I`, where `I` is the identity matrix.
-Computed by solving the left-division
-`N = M \\ I`.
-
-# Examples
-```jldoctest
-julia> M = [2 5; 1 3]
-2×2 Matrix{Int64}:
- 2  5
- 1  3
-
-julia> N = inv(M)
-2×2 Matrix{Float64}:
-  3.0  -5.0
- -1.0   2.0
-
-julia> M*N == N*M == Matrix(I, 2, 2)
-true
-```
-"""
-function inv(A::AbstractMatrix{T}) where T
-    n = checksquare(A)
-    S = typeof(zero(T)/one(T))      # dimensionful
-    S0 = typeof(zero(T)/oneunit(T)) # dimensionless
-    dest = Matrix{S0}(I, n, n)
-    ldiv!(factorize(convert(AbstractMatrix{S}, A)), dest)
-end
-inv(A::Adjoint) = adjoint(inv(parent(A)))
-inv(A::Transpose) = transpose(inv(parent(A)))
-
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T<:Real} = _vectorpinv(transpose, v, tol)
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T<:Complex} = _vectorpinv(adjoint, v, tol)
-pinv(v::AbstractVector{T}, tol::Real = real(zero(T))) where {T} = _vectorpinv(adjoint, v, tol)
-function _vectorpinv(dualfn::Tf, v::AbstractVector{Tv}, tol) where {Tv,Tf}
-    res = dualfn(similar(v, typeof(zero(Tv) / (abs2(one(Tv)) + abs2(one(Tv))))))
-    den = sum(abs2, v)
-    # as tol is the threshold relative to the maximum singular value, for a vector with
-    # single singular value σ=√den, σ ≦ tol*σ is equivalent to den=0 ∨ tol≥1
-    if iszero(den) || tol >= one(tol)
-        fill!(res, zero(eltype(res)))
-    else
-        res .= dualfn(v) ./ den
-    end
-    return res
-end
-
-# this method is just an optimization: literal negative powers of A are
-# already turned by literal_pow into powers of inv(A), but for A^-1 this
-# would turn into inv(A)^1 = copy(inv(A)), which makes an extra copy.
-@inline Base.literal_pow(::typeof(^), A::AbstractMatrix, ::Val{-1}) = inv(A)
-
-"""
-    \\(A, B)
-
-Matrix division using a polyalgorithm. For input matrices `A` and `B`, the result `X` is
-such that `A*X == B` when `A` is square. The solver that is used depends upon the structure
-of `A`.  If `A` is upper or lower triangular (or diagonal), no factorization of `A` is
-required and the system is solved with either forward or backward substitution.
-For non-triangular square matrices, an LU factorization is used.
-
-For rectangular `A` the result is the minimum-norm least squares solution computed by a
-pivoted QR factorization of `A` and a rank estimate of `A` based on the R factor.
-
-When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, the `LDLt`
-factorization does not use pivoting during the numerical factorization and therefore the
-procedure can fail even for invertible matrices.
-
-See also: [`factorize`](@ref), [`pinv`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 0; 1 -2]; B = [32; -4];
-
-julia> X = A \\ B
-2-element Vector{Float64}:
- 32.0
- 18.0
-
-julia> A * X == B
-true
-```
-"""
-function (\)(A::AbstractMatrix, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if m == n
-        if istril(A)
-            if istriu(A)
-                return Diagonal(A) \ B
-            else
-                return LowerTriangular(A) \ B
-            end
-        end
-        if istriu(A)
-            return UpperTriangular(A) \ B
-        end
-        return lu(A) \ B
-    end
-    return qr(A, ColumnNorm()) \ B
-end
-
-(\)(a::AbstractVector, b::AbstractArray) = pinv(a) * b
-"""
-    A / B
-
-Matrix right-division: `A / B` is equivalent to `(B' \\ A')'` where [`\\`](@ref) is the left-division operator.
-For square matrices, the result `X` is such that `A == X*B`.
-
-See also: [`rdiv!`](@ref).
-
-# Examples
-```jldoctest
-julia> A = Float64[1 4 5; 3 9 2]; B = Float64[1 4 2; 3 4 2; 8 7 1];
-
-julia> X = A / B
-2×3 Matrix{Float64}:
- -0.65   3.75  -1.2
-  3.25  -2.75   1.0
-
-julia> isapprox(A, X*B)
-true
-
-julia> isapprox(X, A*pinv(B))
-true
-```
-"""
-function (/)(A::AbstractVecOrMat, B::AbstractVecOrMat)
-    size(A,2) != size(B,2) && throw(DimensionMismatch("Both inputs should have the same number of columns"))
-    return copy(adjoint(adjoint(B) \ adjoint(A)))
-end
-
-cond(x::Number) = iszero(x) ? Inf : 1.0
-cond(x::Number, p) = cond(x)
-
-#Skeel condition numbers
-condskeel(A::AbstractMatrix, p::Real=Inf) = opnorm(abs.(inv(A))*abs.(A), p)
-
-"""
-    condskeel(M, [x, p::Real=Inf])
-
-```math
-\\kappa_S(M, p) = \\left\\Vert \\left\\vert M \\right\\vert \\left\\vert M^{-1} \\right\\vert \\right\\Vert_p \\\\
-\\kappa_S(M, x, p) = \\frac{\\left\\Vert \\left\\vert M \\right\\vert \\left\\vert M^{-1} \\right\\vert \\left\\vert x \\right\\vert \\right\\Vert_p}{\\left \\Vert x \\right \\Vert_p}
-```
-
-Skeel condition number ``\\kappa_S`` of the matrix `M`, optionally with respect to the
-vector `x`, as computed using the operator `p`-norm. ``\\left\\vert M \\right\\vert``
-denotes the matrix of (entry wise) absolute values of ``M``;
-``\\left\\vert M \\right\\vert_{ij} = \\left\\vert M_{ij} \\right\\vert``.
-Valid values for `p` are `1`, `2` and `Inf` (default).
-
-This quantity is also known in the literature as the Bauer condition number, relative
-condition number, or componentwise relative condition number.
-"""
-function condskeel(A::AbstractMatrix, x::AbstractVector, p::Real=Inf)
-    norm(abs.(inv(A))*(abs.(A)*abs.(x)), p) / norm(x, p)
-end
-
-issymmetric(A::AbstractMatrix{<:Real}) = ishermitian(A)
-
-"""
-    issymmetric(A) -> Bool
-
-Test whether a matrix is symmetric.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> issymmetric(a)
-true
-
-julia> b = [1 im; -im 1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+1im
- 0-1im  1+0im
-
-julia> issymmetric(b)
-false
-```
-"""
-function issymmetric(A::AbstractMatrix)
-    indsm, indsn = axes(A)
-    if indsm != indsn
-        return false
-    end
-    for i = first(indsn):last(indsn), j = (i):last(indsn)
-        if A[i,j] != transpose(A[j,i])
-            return false
-        end
-    end
-    return true
-end
-
-issymmetric(x::Number) = x == x
-
-"""
-    ishermitian(A) -> Bool
-
-Test whether a matrix is Hermitian.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> ishermitian(a)
-true
-
-julia> b = [1 im; -im 1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+1im
- 0-1im  1+0im
-
-julia> ishermitian(b)
-true
-```
-"""
-function ishermitian(A::AbstractMatrix)
-    indsm, indsn = axes(A)
-    if indsm != indsn
-        return false
-    end
-    for i = indsn, j = i:last(indsn)
-        if A[i,j] != adjoint(A[j,i])
-            return false
-        end
-    end
-    return true
-end
-
-ishermitian(x::Number) = (x == conj(x))
-
-"""
-    istriu(A::AbstractMatrix, k::Integer = 0) -> Bool
-
-Test whether `A` is upper triangular starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> istriu(a)
-false
-
-julia> istriu(a, -1)
-true
-
-julia> b = [1 im; 0 -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+1im
- 0+0im  -1+0im
-
-julia> istriu(b)
-true
-
-julia> istriu(b, 1)
-false
-```
-"""
-function istriu(A::AbstractMatrix, k::Integer = 0)
-    require_one_based_indexing(A)
-    return _istriu(A, k)
-end
-istriu(x::Number) = true
-
-@inline function _istriu(A::AbstractMatrix, k)
-    m, n = size(A)
-    for j in 1:min(n, m + k - 1)
-        all(iszero, view(A, max(1, j - k + 1):m, j)) || return false
-    end
-    return true
-end
-
-"""
-    istril(A::AbstractMatrix, k::Integer = 0) -> Bool
-
-Test whether `A` is lower triangular starting from the `k`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> istril(a)
-false
-
-julia> istril(a, 1)
-true
-
-julia> b = [1 0; -im -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+0im
- 0-1im  -1+0im
-
-julia> istril(b)
-true
-
-julia> istril(b, -1)
-false
-```
-"""
-function istril(A::AbstractMatrix, k::Integer = 0)
-    require_one_based_indexing(A)
-    return _istril(A, k)
-end
-istril(x::Number) = true
-
-@inline function _istril(A::AbstractMatrix, k)
-    m, n = size(A)
-    for j in max(1, k + 2):n
-        all(iszero, view(A, 1:min(j - k - 1, m), j)) || return false
-    end
-    return true
-end
-
-"""
-    isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) -> Bool
-
-Test whether `A` is banded with lower bandwidth starting from the `kl`th superdiagonal
-and upper bandwidth extending through the `ku`th superdiagonal.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> LinearAlgebra.isbanded(a, 0, 0)
-false
-
-julia> LinearAlgebra.isbanded(a, -1, 1)
-true
-
-julia> b = [1 0; -im -1] # lower bidiagonal
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+0im
- 0-1im  -1+0im
-
-julia> LinearAlgebra.isbanded(b, 0, 0)
-false
-
-julia> LinearAlgebra.isbanded(b, -1, 0)
-true
-```
-"""
-isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) = istriu(A, kl) && istril(A, ku)
-
-"""
-    isdiag(A) -> Bool
-
-Test whether a matrix is diagonal in the sense that `iszero(A[i,j])` is true unless `i == j`.
-Note that it is not necessary for `A` to be square;
-if you would also like to check that, you need to check that `size(A, 1) == size(A, 2)`.
-
-# Examples
-```jldoctest
-julia> a = [1 2; 2 -1]
-2×2 Matrix{Int64}:
- 1   2
- 2  -1
-
-julia> isdiag(a)
-false
-
-julia> b = [im 0; 0 -im]
-2×2 Matrix{Complex{Int64}}:
- 0+1im  0+0im
- 0+0im  0-1im
-
-julia> isdiag(b)
-true
-
-julia> c = [1 0 0; 0 2 0]
-2×3 Matrix{Int64}:
- 1  0  0
- 0  2  0
-
-julia> isdiag(c)
-true
-
-julia> d = [1 0 0; 0 2 3]
-2×3 Matrix{Int64}:
- 1  0  0
- 0  2  3
-
-julia> isdiag(d)
-false
-```
-"""
-isdiag(A::AbstractMatrix) = isbanded(A, 0, 0)
-isdiag(x::Number) = true
-
-"""
-    axpy!(α, x::AbstractArray, y::AbstractArray)
-
-Overwrite `y` with `x * α + y` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`.
-
-# Examples
-```jldoctest
-julia> x = [1; 2; 3];
-
-julia> y = [4; 5; 6];
-
-julia> axpy!(2, x, y)
-3-element Vector{Int64}:
-  6
-  9
- 12
-```
-"""
-function axpy!(α, x::AbstractArray, y::AbstractArray)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $n, but y has length $(length(y))"))
-    end
-    iszero(α) && return y
-    for (IY, IX) in zip(eachindex(y), eachindex(x))
-        @inbounds y[IY] += x[IX]*α
-    end
-    return y
-end
-
-function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractArray, ry::AbstractArray{<:Integer})
-    if length(rx) != length(ry)
-        throw(DimensionMismatch("rx has length $(length(rx)), but ry has length $(length(ry))"))
-    elseif !checkindex(Bool, eachindex(IndexLinear(), x), rx)
-        throw(BoundsError(x, rx))
-    elseif !checkindex(Bool, eachindex(IndexLinear(), y), ry)
-        throw(BoundsError(y, ry))
-    end
-    iszero(α) && return y
-    for (IY, IX) in zip(eachindex(ry), eachindex(rx))
-        @inbounds y[ry[IY]] += x[rx[IX]]*α
-    end
-    return y
-end
-
-"""
-    axpby!(α, x::AbstractArray, β, y::AbstractArray)
-
-Overwrite `y` with `x * α + y * β` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`.
-
-# Examples
-```jldoctest
-julia> x = [1; 2; 3];
-
-julia> y = [4; 5; 6];
-
-julia> axpby!(2, x, 2, y)
-3-element Vector{Int64}:
- 10
- 14
- 18
-```
-"""
-function axpby!(α, x::AbstractArray, β, y::AbstractArray)
-    if length(x) != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    iszero(α) && isone(β) && return y
-    for (IX, IY) in zip(eachindex(x), eachindex(y))
-        @inbounds y[IY] = x[IX]*α + y[IY]*β
-    end
-    y
-end
-
-DenseLike{T} = Union{DenseArray{T}, Base.StridedReshapedArray{T}, Base.StridedReinterpretArray{T}}
-StridedVecLike{T} = Union{DenseLike{T}, Base.FastSubArray{T,<:Any,<:DenseLike{T}}}
-axpy!(α::Number, x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpy!(α, x, y)
-axpby!(α::Number, x::StridedVecLike{T}, β::Number, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpby!(α, x, β, y)
-function axpy!(α::Number,
-    x::StridedVecLike{T}, rx::AbstractRange{<:Integer},
-    y::StridedVecLike{T}, ry::AbstractRange{<:Integer},
-) where {T<:BlasFloat}
-    if Base.has_offset_axes(rx, ry)
-        return @invoke axpy!(α,
-            x::AbstractArray, rx::AbstractArray{<:Integer},
-            y::AbstractArray, ry::AbstractArray{<:Integer},
-        )
-    end
-    @views BLAS.axpy!(α, x[rx], y[ry])
-    return y
-end
-
-"""
-    rotate!(x, y, c, s)
-
-Overwrite `x` with `c*x + s*y` and `y` with `-conj(s)*x + c*y`.
-Returns `x` and `y`.
-
-!!! compat "Julia 1.5"
-    `rotate!` requires at least Julia 1.5.
-"""
-function rotate!(x::AbstractVector, y::AbstractVector, c, s)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    @inbounds for i = 1:n
-        xi, yi = x[i], y[i]
-        x[i] =       c *xi + s*yi
-        y[i] = -conj(s)*xi + c*yi
-    end
-    return x, y
-end
-
-"""
-    reflect!(x, y, c, s)
-
-Overwrite `x` with `c*x + s*y` and `y` with `conj(s)*x - c*y`.
-Returns `x` and `y`.
-
-!!! compat "Julia 1.5"
-    `reflect!` requires at least Julia 1.5.
-"""
-function reflect!(x::AbstractVector, y::AbstractVector, c, s)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    if n != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
-    end
-    @inbounds for i = 1:n
-        xi, yi = x[i], y[i]
-        x[i] =      c *xi + s*yi
-        y[i] = conj(s)*xi - c*yi
-    end
-    return x, y
-end
-
-# Elementary reflection similar to LAPACK. The reflector is not Hermitian but
-# ensures that tridiagonalization of Hermitian matrices become real. See lawn72
-@inline function reflector!(x::AbstractVector{T}) where {T}
-    require_one_based_indexing(x)
-    n = length(x)
-    n == 0 && return zero(eltype(x))
-    @inbounds begin
-        ξ1 = x[1]
-        normu = norm(x)
-        if iszero(normu)
-            return zero(ξ1/normu)
-        end
-        ν = T(copysign(normu, real(ξ1)))
-        ξ1 += ν
-        x[1] = -ν
-        for i = 2:n
-            x[i] /= ξ1
-        end
-    end
-    ξ1/ν
-end
-
-"""
-    reflectorApply!(x, τ, A)
-
-Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`.
-"""
-@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
-    require_one_based_indexing(x)
-    m, n = size(A, 1), size(A, 2)
-    if length(x) != m
-        throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
-    end
-    m == 0 && return A
-    @inbounds for j = 1:n
-        Aj, xj = view(A, 2:m, j), view(x, 2:m)
-        vAj = conj(τ)*(A[1, j] + dot(xj, Aj))
-        A[1, j] -= vAj
-        axpy!(-vAj, xj, Aj)
-    end
-    return A
-end
-
-"""
-    det(M)
-
-Matrix determinant.
-
-See also: [`logdet`](@ref) and [`logabsdet`](@ref).
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> det(M)
-2.0
-```
-"""
-function det(A::AbstractMatrix{T}) where {T}
-    if istriu(A) || istril(A)
-        S = promote_type(T, typeof((one(T)*zero(T) + zero(T))/one(T)))
-        return convert(S, det(UpperTriangular(A)))
-    end
-    return det(lu(A; check = false))
-end
-det(x::Number) = x
-
-# Resolve Issue #40128
-det(A::AbstractMatrix{BigInt}) = det_bareiss(A)
-
-"""
-    logabsdet(M)
-
-Log of absolute value of matrix determinant. Equivalent to
-`(log(abs(det(M))), sign(det(M)))`, but may provide increased accuracy and/or speed.
-
-# Examples
-```jldoctest
-julia> A = [-1. 0.; 0. 1.]
-2×2 Matrix{Float64}:
- -1.0  0.0
-  0.0  1.0
-
-julia> det(A)
--1.0
-
-julia> logabsdet(A)
-(0.0, -1.0)
-
-julia> B = [2. 0.; 0. 1.]
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  1.0
-
-julia> det(B)
-2.0
-
-julia> logabsdet(B)
-(0.6931471805599453, 1.0)
-```
-"""
-logabsdet(A::AbstractMatrix) = logabsdet(lu(A, check=false))
-
-logabsdet(a::Number) = log(abs(a)), sign(a)
-
-"""
-    logdet(M)
-
-Log of matrix determinant. Equivalent to `log(det(M))`, but may provide
-increased accuracy and/or speed.
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> logdet(M)
-0.6931471805599453
-
-julia> logdet(Matrix(I, 3, 3))
-0.0
-```
-"""
-function logdet(A::AbstractMatrix)
-    d,s = logabsdet(A)
-    return d + log(s)
-end
-
-logdet(A) = log(det(A))
-
-const NumberArray{T<:Number} = AbstractArray{T}
-
-exactdiv(a, b) = a/b
-exactdiv(a::Integer, b::Integer) = div(a, b)
-
-"""
-    det_bareiss!(M)
-
-Calculates the determinant of a matrix using the
-[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm) using
-inplace operations.
-
-# Examples
-```jldoctest
-julia> M = [1 0; 2 2]
-2×2 Matrix{Int64}:
- 1  0
- 2  2
-
-julia> LinearAlgebra.det_bareiss!(M)
-2
-```
-"""
-function det_bareiss!(M)
-    n = checksquare(M)
-    sign, prev = Int8(1), one(eltype(M))
-    for i in 1:n-1
-        if iszero(M[i,i]) # swap with another col to make nonzero
-            swapto = findfirst(!iszero, @view M[i,i+1:end])
-            isnothing(swapto) && return zero(prev)
-            sign = -sign
-            Base.swapcols!(M, i, i + swapto)
-        end
-        for k in i+1:n, j in i+1:n
-            M[j,k] = exactdiv(M[j,k]*M[i,i] - M[j,i]*M[i,k], prev)
-        end
-        prev = M[i,i]
-    end
-    return sign * M[end,end]
-end
-"""
-    LinearAlgebra.det_bareiss(M)
-
-Calculates the determinant of a matrix using the
-[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm).
-Also refer to [`det_bareiss!`](@ref).
-"""
-det_bareiss(M) = det_bareiss!(copy(M))
-
-
-
-"""
-    promote_leaf_eltypes(itr)
-
-For an (possibly nested) iterable object `itr`, promote the types of leaf
-elements.  Equivalent to `promote_type(typeof(leaf1), typeof(leaf2), ...)`.
-Currently supports only numeric leaf elements.
-
-# Examples
-```jldoctest
-julia> a = [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
-3-element Vector{Any}:
-  Any[1, 2, [3, 4]]
- 5.0
-  Any[0 + 6im, [7.0, 8.0]]
-
-julia> LinearAlgebra.promote_leaf_eltypes(a)
-ComplexF64 (alias for Complex{Float64})
-```
-"""
-promote_leaf_eltypes(x::Union{AbstractArray{T},Tuple{T,Vararg{T}}}) where {T<:Number} = T
-promote_leaf_eltypes(x::Union{AbstractArray{T},Tuple{T,Vararg{T}}}) where {T<:NumberArray} = eltype(T)
-promote_leaf_eltypes(x::T) where {T} = T
-promote_leaf_eltypes(x::Union{AbstractArray,Tuple}) = mapreduce(promote_leaf_eltypes, promote_type, x; init=Bool)
-
-# isapprox: approximate equality of arrays [like isapprox(Number,Number)]
-# Supports nested arrays; e.g., for `a = [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]`
-# `a ≈ a` is `true`.
-function isapprox(x::AbstractArray, y::AbstractArray;
-    atol::Real=0,
-    rtol::Real=Base.rtoldefault(promote_leaf_eltypes(x),promote_leaf_eltypes(y),atol),
-    nans::Bool=false, norm::Function=norm)
-    d = norm(x - y)
-    if isfinite(d)
-        return iszero(rtol) ? d <= atol : d <= max(atol, rtol*max(norm(x), norm(y)))
-    else
-        # Fall back to a component-wise approximate comparison
-        # (mapreduce instead of all for greater generality [#44893])
-        return mapreduce((a, b) -> isapprox(a, b; rtol=rtol, atol=atol, nans=nans), &, x, y)
-    end
-end
-
-"""
-    normalize!(a::AbstractArray, p::Real=2)
-
-Normalize the array `a` in-place so that its `p`-norm equals unity,
-i.e. `norm(a, p) == 1`.
-See also [`normalize`](@ref) and [`norm`](@ref).
-"""
-function normalize!(a::AbstractArray, p::Real=2)
-    nrm = norm(a, p)
-    __normalize!(a, nrm)
-end
-
-@inline function __normalize!(a::AbstractArray, nrm)
-    # The largest positive floating point number whose inverse is less than infinity
-    δ = inv(prevfloat(typemax(nrm)))
-    if nrm ≥ δ # Safe to multiply with inverse
-        invnrm = inv(nrm)
-        rmul!(a, invnrm)
-    else # scale elements to avoid overflow
-        εδ = eps(one(nrm))/δ
-        rmul!(a, εδ)
-        rmul!(a, inv(nrm*εδ))
-    end
-    return a
-end
-
-"""
-    normalize(a, p::Real=2)
-
-Normalize `a` so that its `p`-norm equals unity,
-i.e. `norm(a, p) == 1`. For scalars, this is similar to sign(a),
-except normalize(0) = NaN.
-See also [`normalize!`](@ref), [`norm`](@ref), and [`sign`](@ref).
-
-# Examples
-```jldoctest
-julia> a = [1,2,4];
-
-julia> b = normalize(a)
-3-element Vector{Float64}:
- 0.2182178902359924
- 0.4364357804719848
- 0.8728715609439696
-
-julia> norm(b)
-1.0
-
-julia> c = normalize(a, 1)
-3-element Vector{Float64}:
- 0.14285714285714285
- 0.2857142857142857
- 0.5714285714285714
-
-julia> norm(c, 1)
-1.0
-
-julia> a = [1 2 4 ; 1 2 4]
-2×3 Matrix{Int64}:
- 1  2  4
- 1  2  4
-
-julia> norm(a)
-6.48074069840786
-
-julia> normalize(a)
-2×3 Matrix{Float64}:
- 0.154303  0.308607  0.617213
- 0.154303  0.308607  0.617213
-
-julia> normalize(3, 1)
-1.0
-
-julia> normalize(-8, 1)
--1.0
-
-julia> normalize(0, 1)
-NaN
-```
-"""
-function normalize(a::AbstractArray, p::Real = 2)
-    nrm = norm(a, p)
-    if !isempty(a)
-        aa = copymutable_oftype(a, typeof(first(a)/nrm))
-        return __normalize!(aa, nrm)
-    else
-        T = typeof(zero(eltype(a))/nrm)
-        return T[]
-    end
-end
-
-normalize(x) = x / norm(x)
-normalize(x, p::Real) = x / norm(x, p)
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
deleted file mode 100644
index c37df41f9567c..0000000000000
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ /dev/null
@@ -1,428 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# givensAlgorithm functions are derived from LAPACK, see below
-
-abstract type AbstractRotation{T} end
-struct AdjointRotation{T,S<:AbstractRotation{T}} <: AbstractRotation{T}
-    R::S
-end
-
-transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R)). Consider using adjoint instead of transpose.")
-
-(*)(R::AbstractRotation, A::AbstractVector) = _rot_mul_vecormat(R, A)
-(*)(R::AbstractRotation, A::AbstractMatrix) = _rot_mul_vecormat(R, A)
-function _rot_mul_vecormat(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    lmul!(convert(AbstractRotation{TS}, R), copy_similar(A, TS))
-end
-
-(*)(A::AbstractVector, R::AbstractRotation) = _vecormat_mul_rot(A, R)
-(*)(A::AbstractMatrix, R::AbstractRotation) = _vecormat_mul_rot(A, R)
-function _vecormat_mul_rot(A::AbstractVecOrMat{T}, R::AbstractRotation{S}) where {T,S}
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    rmul!(copy_similar(A, TS), convert(AbstractRotation{TS}, R))
-end
-
-"""
-    LinearAlgebra.Givens(i1,i2,c,s) -> G
-
-A Givens rotation linear operator. The fields `c` and `s` represent the cosine and sine of
-the rotation angle, respectively. The `Givens` type supports left multiplication `G*A` and
-conjugated transpose right multiplication `A*G'`. The type doesn't have a `size` and can
-therefore be multiplied with matrices of arbitrary size as long as `i2<=size(A,2)` for
-`G*A` or `i2<=size(A,1)` for `A*G'`.
-
-See also [`givens`](@ref).
-"""
-struct Givens{T} <: AbstractRotation{T}
-    i1::Int
-    i2::Int
-    c::T
-    s::T
-end
-struct Rotation{T} <: AbstractRotation{T}
-    rotations::Vector{Givens{T}}
-end
-
-convert(::Type{T}, r::T) where {T<:AbstractRotation} = r
-convert(::Type{T}, r::AbstractRotation) where {T<:AbstractRotation} = T(r)::T
-convert(::Type{AbstractRotation{T}}, r::AdjointRotation) where {T} = convert(AbstractRotation{T}, r.R)'
-convert(::Type{AbstractRotation{T}}, r::AdjointRotation{T}) where {T} = r
-
-Givens(i1, i2, c, s) = Givens(i1, i2, promote(c, s)...)
-Givens{T}(G::Givens{T}) where {T} = G
-Givens{T}(G::Givens) where {T} = Givens(G.i1, G.i2, convert(T, G.c), convert(T, G.s))
-Rotation{T}(R::Rotation{T}) where {T} = R
-Rotation{T}(R::Rotation) where {T} = Rotation{T}([Givens{T}(g) for g in R.rotations])
-AbstractRotation{T}(G::Givens) where {T} = Givens{T}(G)
-AbstractRotation{T}(R::Rotation) where {T} = Rotation{T}(R)
-
-adjoint(G::Givens) = Givens(G.i1, G.i2, G.c', -G.s)
-adjoint(R::AbstractRotation) = AdjointRotation(R)
-adjoint(adjR::AdjointRotation) = adjR.R
-
-Base.copy(aR::AdjointRotation{T,Rotation{T}}) where {T} =
-    Rotation{T}([r' for r in Iterators.reverse(aR.R.rotations)])
-
-floatmin2(::Type{Float32}) = reinterpret(Float32, 0x26000000)
-floatmin2(::Type{Float64}) = reinterpret(Float64, 0x21a0000000000000)
-floatmin2(::Type{T}) where {T} = (twopar = 2one(T); twopar^trunc(Integer,log(floatmin(T)/eps(T))/log(twopar)/twopar))
-
-# derived from LAPACK's dlartg
-# Copyright:
-# Univ. of Tennessee
-# Univ. of California Berkeley
-# Univ. of Colorado Denver
-# NAG Ltd.
-function givensAlgorithm(f::T, g::T) where T<:AbstractFloat
-    onepar = one(T)
-    twopar = 2one(T)
-    T0 = typeof(onepar) # dimensionless
-    zeropar = T0(zero(T)) # must be dimensionless
-
-    # need both dimensionful and dimensionless versions of these:
-    safmn2 = floatmin2(T0)
-    safmn2u = floatmin2(T)
-    safmx2 = one(T)/safmn2
-    safmx2u = oneunit(T)/safmn2
-
-    if g == 0
-        cs = onepar
-        sn = zeropar
-        r = f
-    elseif f == 0
-        cs = zeropar
-        sn = onepar
-        r = g
-    else
-        f1 = f
-        g1 = g
-        scalepar = max(abs(f1), abs(g1))
-        if scalepar >= safmx2u
-            count = 0
-            while true
-                count += 1
-                f1 *= safmn2
-                g1 *= safmn2
-                scalepar = max(abs(f1), abs(g1))
-                if scalepar < safmx2u break end
-            end
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-            for i = 1:count
-                r *= safmx2
-            end
-        elseif scalepar <= safmn2u
-            count = 0
-            while true
-                count += 1
-                f1 *= safmx2
-                g1 *= safmx2
-                scalepar = max(abs(f1), abs(g1))
-                if scalepar > safmn2u break end
-            end
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-            for i = 1:count
-                r *= safmn2
-            end
-        else
-            r = sqrt(f1*f1 + g1*g1)
-            cs = f1/r
-            sn = g1/r
-        end
-        if abs(f) > abs(g) && cs < 0
-            cs = -cs
-            sn = -sn
-            r = -r
-        end
-    end
-    return cs, sn, r
-end
-
-# derived from LAPACK's zlartg
-# Copyright:
-# Univ. of Tennessee
-# Univ. of California Berkeley
-# Univ. of Colorado Denver
-# NAG Ltd.
-function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
-    twopar, onepar = 2one(T), one(T)
-    T0 = typeof(onepar) # dimensionless
-    zeropar = T0(zero(T)) # must be dimensionless
-    czero = complex(zeropar)
-
-    abs1(ff) = max(abs(real(ff)), abs(imag(ff)))
-    safmin = floatmin(T0)
-    safmn2 = floatmin2(T0)
-    safmn2u = floatmin2(T)
-    safmx2 = one(T)/safmn2
-    safmx2u = oneunit(T)/safmn2
-    scalepar = max(abs1(f), abs1(g))
-    fs = f
-    gs = g
-    count = 0
-    if scalepar >= safmx2u
-        while true
-            count += 1
-            fs *= safmn2
-            gs *= safmn2
-            scalepar *= safmn2
-            if scalepar < safmx2u break end
-        end
-    elseif scalepar <= safmn2u
-        if g == 0
-            cs = onepar
-            sn = czero
-            r = f
-            return cs, sn, r
-        end
-        while true
-            count -= 1
-            fs *= safmx2
-            gs *= safmx2
-            scalepar *= safmx2
-            if scalepar > safmn2u break end
-        end
-    end
-    f2 = abs2(fs)
-    g2 = abs2(gs)
-    if f2 <= max(g2, oneunit(T))*safmin
-        # This is a rare case: F is very small.
-        if f == 0
-            cs = zero(T)
-            r = complex(hypot(real(g), imag(g)))
-            # do complex/real division explicitly with two real divisions
-            d = hypot(real(gs), imag(gs))
-            sn = complex(real(gs)/d, -imag(gs)/d)
-            return cs, sn, r
-        end
-        f2s = hypot(real(fs), imag(fs))
-        # g2 and g2s are accurate
-        # g2 is at least safmin, and g2s is at least safmn2
-        g2s = sqrt(g2)
-        # error in cs from underflow in f2s is at most
-        # unfl / safmn2 .lt. sqrt(unfl*eps) .lt. eps
-        # if max(g2,one)=g2, then f2 .lt. g2*safmin,
-        # and so cs .lt. sqrt(safmin)
-        # if max(g2,one)=one, then f2 .lt. safmin
-        # and so cs .lt. sqrt(safmin)/safmn2 = sqrt(eps)
-        # therefore, cs = f2s/g2s / sqrt( 1 + (f2s/g2s)**2 ) = f2s/g2s
-        cs = f2s/g2s
-        # make sure abs(ff) = 1
-        # do complex/real division explicitly with 2 real divisions
-        if abs1(f) > 1
-            d = hypot(real(f), imag(f))
-            ff = complex(real(f)/d, imag(f)/d)
-        else
-            dr = safmx2*real(f)
-            di = safmx2*imag(f)
-            d = hypot(dr, di)
-            ff = complex(dr/d, di/d)
-        end
-        sn = ff*complex(real(gs)/g2s, -imag(gs)/g2s)
-        r = cs*f + sn*g
-    else
-        # This is the most common case.
-        # Neither F2 nor F2/G2 are less than SAFMIN
-        # F2S cannot overflow, and it is accurate
-        f2s = sqrt(onepar + g2/f2)
-        # do the f2s(real)*fs(complex) multiply with two real multiplies
-        r = complex(f2s*real(fs), f2s*imag(fs))
-        cs = onepar/f2s
-        d = f2 + g2
-        # do complex/real division explicitly with two real divisions
-        sn = complex(real(r)/d, imag(r)/d)
-        sn *= conj(gs)
-        if count != 0
-            if count > 0
-                for i = 1:count
-                    r *= safmx2
-                end
-            else
-                for i = 1:-count
-                    r *= safmn2
-                end
-            end
-        end
-    end
-    return cs, sn, r
-end
-
-# enable for unitful quantities
-function givensAlgorithm(f::T, g::T) where T
-    fs = f / oneunit(T)
-    gs = g / oneunit(T)
-    typeof(fs) === T && typeof(gs) === T &&
-    !isa(fs, Union{AbstractFloat,Complex{<:AbstractFloat}}) &&
-    throw(MethodError(givensAlgorithm, (fs, gs)))
-
-    c, s, r = givensAlgorithm(fs, gs)
-    return c, s, r * oneunit(T)
-end
-
-givensAlgorithm(f, g) = givensAlgorithm(promote(float(f), float(g))...)
-
-"""
-
-    givens(f::T, g::T, i1::Integer, i2::Integer) where {T} -> (G::Givens, r::T)
-
-Computes the Givens rotation `G` and scalar `r` such that for any vector `x` where
-```
-x[i1] = f
-x[i2] = g
-```
-the result of the multiplication
-```
-y = G*x
-```
-has the property that
-```
-y[i1] = r
-y[i2] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-function givens(f::T, g::T, i1::Integer, i2::Integer) where T
-    if i1 == i2
-        throw(ArgumentError("Indices must be distinct."))
-    end
-    c, s, r = givensAlgorithm(f, g)
-    if i1 > i2
-        s = -conj(s)
-        i1, i2 = i2, i1
-    end
-    Givens(i1, i2, c, s), r
-end
-"""
-    givens(A::AbstractArray, i1::Integer, i2::Integer, j::Integer) -> (G::Givens, r)
-
-Computes the Givens rotation `G` and scalar `r` such that the result of the multiplication
-```
-B = G*A
-```
-has the property that
-```
-B[i1,j] = r
-B[i2,j] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
-    givens(A[i1,j], A[i2,j], i1, i2)
-
-
-"""
-    givens(x::AbstractVector, i1::Integer, i2::Integer) -> (G::Givens, r)
-
-Computes the Givens rotation `G` and scalar `r` such that the result of the multiplication
-```
-B = G*x
-```
-has the property that
-```
-B[i1] = r
-B[i2] = 0
-```
-
-See also [`LinearAlgebra.Givens`](@ref).
-"""
-givens(x::AbstractVector, i1::Integer, i2::Integer) = givens(x[i1], x[i2], i1, i2)
-
-function getindex(G::Givens, i::Integer, j::Integer)
-    if i == j
-        if i == G.i1 || i == G.i2
-            G.c
-        else
-            oneunit(G.c)
-        end
-    elseif i == G.i1 && j == G.i2
-        G.s
-    elseif i == G.i2 && j == G.i1
-        -conj(G.s)
-    else
-        zero(G.s)
-    end
-end
-
-@inline function lmul!(G::Givens, A::AbstractVecOrMat)
-    require_one_based_indexing(A)
-    m, n = size(A, 1), size(A, 2)
-    if G.i2 > m
-        throw(DimensionMismatch("column indices for rotation are outside the matrix"))
-    end
-    @inbounds for i = 1:n
-        a1, a2 = A[G.i1,i], A[G.i2,i]
-        A[G.i1,i] =       G.c *a1 + G.s*a2
-        A[G.i2,i] = -conj(G.s)*a1 + G.c*a2
-    end
-    return A
-end
-@inline function rmul!(A::AbstractMatrix, G::Givens)
-    require_one_based_indexing(A)
-    m, n = size(A, 1), size(A, 2)
-    if G.i2 > n
-        throw(DimensionMismatch("column indices for rotation are outside the matrix"))
-    end
-    @inbounds for i = 1:m
-        a1, a2 = A[i,G.i1], A[i,G.i2]
-        A[i,G.i1] = a1*G.c - a2*G.s'
-        A[i,G.i2] = a1*G.s + a2*G.c
-    end
-    return A
-end
-
-function lmul!(G::Givens, R::Rotation)
-    push!(R.rotations, G)
-    return R
-end
-function rmul!(R::Rotation, G::Givens)
-    pushfirst!(R.rotations, G)
-    return R
-end
-
-function lmul!(R::Rotation, A::AbstractVecOrMat)
-    @inbounds for i in eachindex(R.rotations)
-        lmul!(R.rotations[i], A)
-    end
-    return A
-end
-function rmul!(A::AbstractMatrix, R::Rotation)
-    @inbounds for i in eachindex(R.rotations)
-        rmul!(A, R.rotations[i])
-    end
-    return A
-end
-
-function lmul!(adjR::AdjointRotation{<:Any,<:Rotation}, A::AbstractVecOrMat)
-    R = adjR.R
-    @inbounds for i in eachindex(R.rotations)
-        lmul!(adjoint(R.rotations[i]), A)
-    end
-    return A
-end
-function rmul!(A::AbstractMatrix, adjR::AdjointRotation{<:Any,<:Rotation})
-    R = adjR.R
-    @inbounds for i in eachindex(R.rotations)
-        rmul!(A, adjoint(R.rotations[i]))
-    end
-    return A
-end
-
-function *(G1::Givens{S}, G2::Givens{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation{TS}([convert(AbstractRotation{TS}, G2), convert(AbstractRotation{TS}, G1)])
-end
-*(G::Givens{T}...) where {T} = Rotation([reverse(G)...])
-function *(G::Givens{S}, R::Rotation{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation(vcat(convert(AbstractRotation{TS}, R).rotations, convert(AbstractRotation{TS}, G)))
-end
-function *(R::Rotation{S}, G::Givens{T}) where {S,T}
-    TS = promote_type(T, S)
-    Rotation(vcat(convert(AbstractRotation{TS}, G), convert(AbstractRotation{TS}, R).rotations))
-end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
deleted file mode 100644
index 179f93f2cd6f2..0000000000000
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ /dev/null
@@ -1,610 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-######################################################################################
-# Upper-Hessenberg matrices H+μI, analogous to the UpperTriangular type
-
-"""
-    UpperHessenberg(A::AbstractMatrix)
-
-Construct an `UpperHessenberg` view of the matrix `A`.
-Entries of `A` below the first subdiagonal are ignored.
-
-!!! compat "Julia 1.3"
-    This type was added in Julia 1.3.
-
-Efficient algorithms are implemented for `H \\ b`, `det(H)`, and similar.
-
-See also the [`hessenberg`](@ref) function to factor any matrix into a similar
-upper-Hessenberg matrix.
-
-If `F::Hessenberg` is the factorization object, the unitary matrix can be accessed
-with `F.Q` and the Hessenberg matrix with `F.H`. When `Q` is extracted, the resulting
-type is the `HessenbergQ` object, and may be converted to a regular matrix with
-[`convert(Array, _)`](@ref) (or `Array(_)` for short).
-
-Iterating the decomposition produces the factors `F.Q` and `F.H`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3 4; 5 6 7 8; 9 10 11 12; 13 14 15 16]
-4×4 Matrix{Int64}:
-  1   2   3   4
-  5   6   7   8
-  9  10  11  12
- 13  14  15  16
-
-julia> UpperHessenberg(A)
-4×4 UpperHessenberg{Int64, Matrix{Int64}}:
- 1   2   3   4
- 5   6   7   8
- ⋅  10  11  12
- ⋅   ⋅  15  16
-```
-"""
-struct UpperHessenberg{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
-    data::S
-
-    function UpperHessenberg{T,S}(data) where {T,S<:AbstractMatrix{T}}
-        require_one_based_indexing(data)
-        new{T,S}(data)
-    end
-end
-UpperHessenberg(H::UpperHessenberg) = H
-UpperHessenberg{T}(A::AbstractMatrix) where {T} = UpperHessenberg(convert(AbstractMatrix{T}, A))
-UpperHessenberg{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H.data)
-UpperHessenberg(A::AbstractMatrix) = UpperHessenberg{eltype(A),typeof(A)}(A)
-Matrix(H::UpperHessenberg{T}) where {T} = Matrix{T}(H)
-Array(H::UpperHessenberg) = Matrix(H)
-size(H::UpperHessenberg, d) = size(H.data, d)
-size(H::UpperHessenberg) = size(H.data)
-parent(H::UpperHessenberg) = H.data
-
-# similar behaves like UpperTriangular
-similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T))
-similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims)
-
-AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg(AbstractMatrix{T}(H.data))
-
-copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
-real(H::UpperHessenberg{<:Real}) = H
-real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
-imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1))
-
-function istriu(A::UpperHessenberg, k::Integer=0)
-    k <= -1 && return true
-    return _istriu(A, k)
-end
-
-function Matrix{T}(H::UpperHessenberg) where T
-    m,n = size(H)
-    return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1)
-end
-
-Base.isassigned(H::UpperHessenberg, i::Int, j::Int) =
-    i <= j+1 ? isassigned(H.data, i, j) : true
-
-getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} =
-    i <= j+1 ? convert(T, H.data[i,j]) : zero(T)
-
-function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer)
-    if i > j+1
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function Base.replace_in_print_matrix(A::UpperHessenberg, i::Integer, j::Integer, s::AbstractString)
-    return i <= j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-Base.copy(A::Adjoint{<:Any,<:UpperHessenberg}) = tril!(adjoint!(similar(A.parent.data), A.parent.data), 1)
-Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = tril!(transpose!(similar(A.parent.data), A.parent.data), 1)
-
--(A::UpperHessenberg) = UpperHessenberg(-A.data)
-rmul!(H::UpperHessenberg, x::Number) = (rmul!(H.data, x); H)
-lmul!(x::Number, H::UpperHessenberg) = (lmul!(x, H.data); H)
-
-fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H)
-
-+(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data)
--(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data)
-
-for T = (:UniformScaling, :Diagonal, :Bidiagonal, :Tridiagonal, :SymTridiagonal,
-         :UpperTriangular, :UnitUpperTriangular)
-    for op = (:+, :-)
-        @eval begin
-            $op(H::UpperHessenberg, x::$T) = UpperHessenberg($op(H.data, x))
-            $op(x::$T, H::UpperHessenberg) = UpperHessenberg($op(x, H.data))
-        end
-    end
-end
-
-for T = (:Number, :UniformScaling, :Diagonal)
-    @eval begin
-        *(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data * x)
-        *(x::$T, H::UpperHessenberg) = UpperHessenberg(x * H.data)
-        /(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data / x)
-        \(x::$T, H::UpperHessenberg) = UpperHessenberg(x \ H.data)
-    end
-end
-
-function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
-    HH = _mulmattri!(_initarray(*, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
-    HH = _multrimat!(_initarray(*, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-
-function /(H::UpperHessenberg, U::UpperTriangular)
-    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-function /(H::UpperHessenberg, U::UnitUpperTriangular)
-    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
-    UpperHessenberg(HH)
-end
-
-function \(U::UpperTriangular, H::UpperHessenberg)
-    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-function \(U::UnitUpperTriangular, H::UpperHessenberg)
-    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
-    UpperHessenberg(HH)
-end
-
-# Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
-# (in-place in x) by the RQ algorithm from:
-#
-#    G. Henry, "The shifted Hessenberg system solve computation," Tech. Rep. 94–163,
-#    Center for Appl. Math., Cornell University (1994).
-#
-# as reviewed in
-#
-#    C. Beattie et al., "A note on shifted Hessenberg systems and frequency
-#    response computation," ACM Trans. Math. Soft. 38, pp. 12:6–12:16 (2011)
-#
-# (Note, however, that there is apparently a typo in Algorithm 1 of the
-#  Beattie paper: the Givens rotation uses u(k), not H(k,k) - σ.)
-#
-# Essentially, it works by doing a Givens RQ factorization of H+µI from
-# right to left, and doing backsubstitution *simultaneously*.
-
-# solve (H+μI)X = B, storing result in B
-function ldiv!(F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false)
-    checksquare(F)
-    m = size(F,1)
-    m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m"))
-    require_one_based_indexing(B)
-    n = size(B,2)
-    H = F.data
-    μ = shift
-    u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated col of H-μI
-    copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m]
-    u[m] += μ
-    X = B # not a copy, just rename to match paper
-    cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
-    @inbounds for k = m:-1:2
-        c, s, ρ = givensAlgorithm(u[k], H[k,k-1])
-        cs[k] = (c, s)
-        for i = 1:n
-            X[k,i] /= ρ
-            t₁ = s * X[k,i]; t₂ = c * X[k,i]
-            @simd for j = 1:k-2
-                X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁
-            end
-            X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] + μ) * t₁
-        end
-        @simd for j = 1:k-2
-            u[j] = H[j,k-1]*c - u[j]*s'
-        end
-        u[k-1] = (H[k-1,k-1] + μ) * c - u[k-1]*s'
-    end
-    for i = 1:n
-        τ₁ = X[1,i] / u[1]
-        @inbounds for j = 2:m
-            τ₂ = X[j,i]
-            c, s = cs[j]
-            X[j-1,i] = c*τ₁ + s*τ₂
-            τ₁ = c*τ₂ - s'τ₁
-        end
-        X[m,i] = τ₁
-    end
-    return X
-end
-
-# solve X(H+μI) = B, storing result in B
-#
-# Note: this can be derived from the Henry (1994) algorithm
-# by transformation to F(Hᵀ+µI)F FXᵀ = FBᵀ, where
-# F is the permutation matrix that reverses the order
-# of rows/cols.  Essentially, we take the ldiv! algorithm,
-# swap indices of H and X to transpose, and reverse the
-# order of the H indices (or the order of the loops).
-function rdiv!(B::AbstractMatrix, F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    m = size(F,1)
-    m != size(B,2) && throw(DimensionMismatch("wrong right-hand-side # cols != $m"))
-    require_one_based_indexing(B)
-    n = size(B,1)
-    H = F.data
-    μ = shift
-    u = Vector{typeof(zero(eltype(H))+μ)}(undef, m) # for last rotated row of H-μI
-    u .= @view H[1,:]
-    u[1] += μ
-    X = B # not a copy, just rename to match paper
-    cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
-    @inbounds for k = 1:m-1
-        c, s, ρ = givensAlgorithm(u[k], H[k+1,k])
-        cs[k] = (c, s)
-        for i = 1:n
-            X[i,k] /= ρ
-            t₁ = s * X[i,k]; t₂ = c * X[i,k]
-            @simd for j = k+2:m
-                X[i,j] -= u[j]*t₂ + H[k+1,j]*t₁
-            end
-            X[i,k+1] -= u[k+1]*t₂ + (H[k+1,k+1] + μ) * t₁
-        end
-        @simd for j = k+2:m
-            u[j] = H[k+1,j]*c - u[j]*s'
-        end
-        u[k+1] = (H[k+1,k+1] + μ) * c - u[k+1]*s'
-    end
-    for i = 1:n
-        τ₁ = X[i,m] / u[m]
-        @inbounds for j = m-1:-1:1
-            τ₂ = X[i,j]
-            c, s = cs[j]
-            X[i,j+1] = c*τ₁ + s*τ₂
-            τ₁ = c*τ₂ - s'τ₁
-        end
-        X[i,1] = τ₁
-    end
-    return X
-end
-
-# Hessenberg-matrix determinant formula for H+μI based on:
-#
-#    N. D. Cahill, J. R. D’Errico, D. A. Narayan, and J. Y. Narayan, "Fibonacci determinants,"
-#    College Math. J. 33, pp. 221-225 (2003).
-#
-# as reviewed in Theorem 2.1 of:
-#
-#    K. Kaygisiz and A. Sahin, "Determinant and permanent of Hessenberg matrix and generalized Lucas polynomials,"
-#    arXiv:1111.4067 (2011).
-#
-# Cost is O(m²) with O(m) storage.
-function det(F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    H = F.data
-    m = size(H,1)
-    μ = shift
-    m == 0 && return one(zero(eltype(H)) + μ)
-    determinant = H[1,1] + μ
-    prevdeterminant = one(determinant)
-    m == 1 && return determinant
-    prods = Vector{typeof(determinant)}(undef, m-1) # temporary storage for partial products
-    @inbounds for n = 2:m
-        prods[n-1] = prevdeterminant
-        prevdeterminant = determinant
-        determinant *= H[n,n] + μ
-        h = H[n,n-1]
-        @simd for r = n-1:-2:2
-            determinant -= H[r,n] * (prods[r] *= h) - H[r-1,n] * (prods[r-1] *= h)
-        end
-        if iseven(n)
-            determinant -= H[1,n] * (prods[1] *= h)
-        end
-    end
-    return determinant
-end
-
-# O(m²) log-determinant based on first doing Givens RQ to put H+μI into upper-triangular form and then
-# taking the product of the diagonal entries.   The trick is that we only need O(m) temporary storage,
-# because we don't need to store the whole Givens-rotated matrix, only the most recent column.
-# We do RQ (column rotations) rather than QR (row rotations) for more consecutive memory access.
-# (We could also use it for det instead of the Cahill algorithm above.  Cahill is slightly faster
-#  for very small matrices where you are likely to use det, and also uses only ± and * so it can
-#  be applied to Hessenberg matrices over other number fields.)
-function logabsdet(F::UpperHessenberg; shift::Number=false)
-    checksquare(F)
-    H = F.data
-    m = size(H,1)
-    μ = shift
-    P = one(zero(eltype(H)) + μ)
-    logdeterminant = zero(real(P))
-    m == 0 && return (logdeterminant, P)
-    g = Vector{typeof(P)}(undef, m) # below, g is the k-th col of Givens-rotated H+μI matrix
-    copyto!(g, 1, H, m*(m-1)+1, m) # g .= H[:,m]
-    g[m] += μ
-    @inbounds for k = m:-1:2
-        c, s, ρ = givensAlgorithm(g[k], H[k,k-1])
-        logdeterminant += log(abs(ρ))
-        P *= sign(ρ)
-        g[k-1] = c*(H[k-1,k-1] + μ) - s'*g[k-1]
-        @simd for j = 1:k-2
-            g[j] = c*H[j,k-1] - s'*g[j]
-        end
-    end
-    logdeterminant += log(abs(g[1]))
-    P *= sign(g[1])
-    return (logdeterminant, P)
-end
-
-function dot(x::AbstractVector, H::UpperHessenberg, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(H, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(H)), zero(eltype(y)))
-    end
-    x₁ = x[1]
-    r = dot(x₁, H[1,1], y[1])
-    r += dot(x[2], H[2,1], y[1])
-    @inbounds for j in 2:m-1
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(H[1,j]) * x₁
-            @simd for i in 2:j+1
-                temp += adjoint(H[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    ym = y[m]
-    if !iszero(ym)
-        temp = adjoint(H[1,m]) * x₁
-        @simd for i in 2:m
-            temp += adjoint(H[i,m]) * x[i]
-        end
-        r += dot(temp, ym)
-    end
-    return r
-end
-
-######################################################################################
-# Hessenberg factorizations Q(H+μI)Q' of A+μI:
-
-"""
-    Hessenberg <: Factorization
-
-A `Hessenberg` object represents the Hessenberg factorization `QHQ'` of a square
-matrix, or a shift `Q(H+μI)Q'` thereof, which is produced by the [`hessenberg`](@ref) function.
-"""
-struct Hessenberg{T,SH<:AbstractMatrix,S<:AbstractMatrix,W<:AbstractVector,V<:Number} <: Factorization{T}
-    H::SH # UpperHessenberg or SymTridiagonal
-    uplo::Char
-    factors::S # reflector data in uplo triangle, may share data with H
-    τ::W # more Q (reflector) data
-    μ::V # diagonal shift for copy-free (F+μI) \ b solves and similar
-end
-Hessenberg(factors::AbstractMatrix, τ::AbstractVector, H::AbstractMatrix=UpperHessenberg(factors), uplo::AbstractChar='L'; μ::Number=false) =
-    Hessenberg{typeof(zero(eltype(factors))+μ),typeof(H),typeof(factors),typeof(τ),typeof(μ)}(H, uplo, factors, τ, μ)
-Hessenberg(F::Hessenberg) = F
-Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo; μ=μ)
-
-copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ)
-copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ)
-size(F::Hessenberg, d::Integer) = size(F.H, d)
-size(F::Hessenberg) = size(F.H)
-
-transpose(F::Hessenberg{<:Real}) = F'
-transpose(::Hessenberg) =
-    throw(ArgumentError("transpose of Hessenberg decomposition is not supported, consider using adjoint"))
-
-# iteration for destructuring into components
-Base.iterate(S::Hessenberg) = (S.Q, Val(:H))
-Base.iterate(S::Hessenberg, ::Val{:H}) = (S.H, Val(:μ))
-Base.iterate(S::Hessenberg, ::Val{:μ}) = (S.μ, Val(:done))
-Base.iterate(S::Hessenberg, ::Val{:done}) = nothing
-
-hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...)
-
-function hessenberg!(A::Union{Symmetric{<:BlasReal,<:StridedMatrix},Hermitian{<:BlasFloat,<:StridedMatrix}})
-    factors, τ, d, e = LAPACK.hetrd!(A.uplo, A.data)
-    return Hessenberg(factors, τ, SymTridiagonal(d, e), A.uplo)
-end
-
-"""
-    hessenberg!(A) -> Hessenberg
-
-`hessenberg!` is the same as [`hessenberg`](@ref), but saves space by overwriting
-the input `A`, instead of creating a copy.
-"""
-hessenberg!(A::AbstractMatrix)
-
-"""
-    hessenberg(A) -> Hessenberg
-
-Compute the Hessenberg decomposition of `A` and return a `Hessenberg` object. If `F` is the
-factorization object, the unitary matrix can be accessed with `F.Q` (of type `LinearAlgebra.HessenbergQ`)
-and the Hessenberg matrix with `F.H` (of type [`UpperHessenberg`](@ref)), either of
-which may be converted to a regular matrix with `Matrix(F.H)` or `Matrix(F.Q)`.
-
-If `A` is [`Hermitian`](@ref) or real-[`Symmetric`](@ref), then the Hessenberg
-decomposition produces a real-symmetric tridiagonal matrix and `F.H` is of type
-[`SymTridiagonal`](@ref).
-
-Note that the shifted factorization `A+μI = Q (H+μI) Q'` can be
-constructed efficiently by `F + μ*I` using the [`UniformScaling`](@ref)
-object [`I`](@ref), which creates a new `Hessenberg` object with shared storage
-and a modified shift.   The shift of a given `F` is obtained by `F.μ`.
-This is useful because multiple shifted solves `(F + μ*I) \\ b`
-(for different `μ` and/or `b`) can be performed efficiently once `F` is created.
-
-Iterating the decomposition produces the factors `F.Q, F.H, F.μ`.
-
-# Examples
-```jldoctest
-julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.]
-3×3 Matrix{Float64}:
- 4.0  9.0  7.0
- 4.0  4.0  1.0
- 4.0  3.0  2.0
-
-julia> F = hessenberg(A)
-Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool}
-Q factor: 3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}
-H factor:
-3×3 UpperHessenberg{Float64, Matrix{Float64}}:
-  4.0      -11.3137       -1.41421
- -5.65685    5.0           2.0
-   ⋅        -8.88178e-16   1.0
-
-julia> F.Q * F.H * F.Q'
-3×3 Matrix{Float64}:
- 4.0  9.0  7.0
- 4.0  4.0  1.0
- 4.0  3.0  2.0
-
-julia> q, h = F; # destructuring via iteration
-
-julia> q == F.Q && h == F.H
-true
-```
-"""
-hessenberg(A::AbstractMatrix{T}) where T =
-    hessenberg!(eigencopy_oftype(A, eigtype(T)))
-
-function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
-    summary(io, F)
-    if !iszero(F.μ)
-        print("\nwith shift μI for μ = ", F.μ)
-    end
-    print(io, "\nQ factor: ")
-    show(io, mime, F.Q)
-    println(io, "\nH factor:")
-    show(io, mime, F.H)
-end
-
-function getproperty(F::Hessenberg, d::Symbol)
-    d === :Q && return HessenbergQ(F)
-    return getfield(F, d)
-end
-
-Base.propertynames(F::Hessenberg, private::Bool=false) =
-    (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...)
-
-AbstractArray(F::Hessenberg) = AbstractMatrix(F)
-Matrix(F::Hessenberg) = Array(AbstractArray(F))
-Array(F::Hessenberg) = Matrix(F)
-function AbstractMatrix(F::Hessenberg)
-    Q = F.Q
-    A = rmul!(lmul!(Q, Matrix{eltype(Q)}(F.H)), Q')
-    μ = F.μ
-    if iszero(μ)
-        return A
-    elseif typeof(zero(eltype(A))+μ) <: eltype(A) # can shift A in-place
-        for i = 1:size(A,1)
-            @inbounds A[i,i] += μ
-        end
-        return A
-    else
-        return A + μ*I # allocate another matrix, e.g. if A is real and μ is complex
-    end
-end
-
-# multiply x by the entries of M in the upper-k triangle, which contains
-# the entries of the upper-Hessenberg matrix H for k=-1
-function rmul_triu!(M::AbstractMatrix, x, k::Integer=0)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j = 1:n, i = 1:min(j-k,m)
-        @inbounds M[i,j] *= x
-    end
-    return M
-end
-function lmul_triu!(x, M::AbstractMatrix, k::Integer=0)
-    require_one_based_indexing(M)
-    m, n = size(M)
-    for j = 1:n, i = 1:min(j-k,m)
-        @inbounds M[i,j] = x * M[i,j]
-    end
-    return M
-end
-
-# when H is UpperHessenberg, it shares data with F.factors
-# multiply Hessenberg by scalar (but don't modify lower triangle of F.H.data)
-rmul!(F::Hessenberg{<:Any,<:UpperHessenberg{T}}, x::T) where {T<:Number} = Hessenberg(rmul_triu!(F.factors, x, -1), F.τ; μ=F.μ*x)
-lmul!(x::T, F::Hessenberg{<:Any,<:UpperHessenberg{T}}) where {T<:Number} = Hessenberg(lmul_triu!(x, F.factors, -1), F.τ; μ=x*F.μ)
-
-rmul!(F::Hessenberg{<:Any,<:SymTridiagonal{T}}, x::T) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(F.H.dv*x, F.H.ev*x), F.uplo; μ=F.μ*x)
-lmul!(x::T, F::Hessenberg{<:Any,<:SymTridiagonal{T}}) where {T<:Number} = Hessenberg(F.factors, F.τ, SymTridiagonal(x*F.H.dv, x*F.H.ev), F.uplo; μ=x*F.μ)
-
-# Promote F * x or x * F.  In general, we don't know how to do promotions
-# that would change the element type of F.H, however.
-function (*)(F::Hessenberg{<:Any,<:AbstractMatrix{T}}, x::S) where {T,S<:Number}
-    TS = typeof(zero(T) * x)
-    if TS === T
-        return rmul!(copy(F), convert(T, x))
-    else
-        throw(MethodError(*, (F, x)))
-    end
-end
-function (*)(x::S, F::Hessenberg{<:Any,<:AbstractMatrix{T}}) where {T,S<:Number}
-    TS = typeof(zero(T) * x)
-    if TS === T
-        return lmul!(convert(T, x), copy(F))
-    else
-        throw(MethodError(*, (x, F)))
-    end
-end
--(F::Hessenberg) = F * -one(eltype(F.H))
-
-# shift Hessenberg by λI
-+(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ + J.λ)
-+(J::UniformScaling, F::Hessenberg) = Hessenberg(F, J.λ + F.μ)
--(F::Hessenberg, J::UniformScaling) = Hessenberg(F, F.μ - J.λ)
--(J::UniformScaling, F::Hessenberg) = Hessenberg(-F, J.λ - F.μ)
-
-function ldiv!(F::Hessenberg, B::AbstractVecOrMat)
-    Q = F.Q
-    if iszero(F.μ)
-        return lmul!(Q, ldiv!(F.H, lmul!(Q', B)))
-    else
-        return lmul!(Q, ldiv!(F.H, lmul!(Q', B); shift=F.μ))
-    end
-end
-
-function rdiv!(B::AbstractMatrix, F::Hessenberg)
-    Q = F.Q
-    return rmul!(rdiv!(rmul!(B, Q), F.H; shift=F.μ), Q')
-end
-
-# handle case of real H and complex μ — we need to work around the
-# fact that we can't multiple a real F.Q by a complex matrix directly in LAPACK
-function ldiv!(F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}}, B::AbstractVecOrMat{<:Complex})
-    Q = F.Q
-    Br = lmul!(Q', real(B))
-    Bi = lmul!(Q', imag(B))
-    ldiv!(F.H, B .= Complex.(Br,Bi); shift=F.μ)
-    Br .= real.(B); Bi .= imag.(B)
-    Br = lmul!(Q, Br)
-    Bi = lmul!(Q, Bi)
-    return B .= Complex.(Br,Bi)
-end
-function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:AbstractMatrix{<:Real}})
-    Q = F.Q
-    Br = rmul!(real(B), Q)
-    Bi = rmul!(imag(B), Q)
-    rdiv!(B .= Complex.(Br,Bi), F.H; shift=F.μ)
-    Br .= real.(B); Bi .= imag.(B)
-    Br = rmul!(Br, Q')
-    Bi = rmul!(Bi, Q')
-    return B .= Complex.(Br,Bi)
-end
-
-ldiv!(F::AdjointFactorization{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
-rdiv!(B::AbstractMatrix, F::AdjointFactorization{<:Any,<:Hessenberg}) = ldiv!(F', B')'
-
-det(F::Hessenberg) = det(F.H; shift=F.μ)
-logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ)
-function logdet(F::Hessenberg)
-    d,s = logabsdet(F)
-    return d + log(s)
-end
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
deleted file mode 100644
index 6353f9fa8d266..0000000000000
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ /dev/null
@@ -1,6944 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module LAPACK
-@doc """
-Interfaces to LAPACK subroutines.
-""" LAPACK
-
-using ..LinearAlgebra.BLAS: @blasfunc, chkuplo
-
-using ..LinearAlgebra: libblastrampoline, BlasFloat, BlasInt, LAPACKException, DimensionMismatch,
-    SingularException, PosDefException, chkstride1, checksquare,triu, tril, dot
-
-using Base: iszero, require_one_based_indexing
-
-
-# Legacy binding maintained for backwards-compatibility but new packages
-# should not look at this, instead preferring to parse the output
-# of BLAS.get_config()
-const liblapack = libblastrampoline
-
-#Generic LAPACK error handlers
-"""
-Handle only negative LAPACK error codes
-
-*NOTE* use only if the positive error code is useful.
-"""
-function chkargsok(ret::BlasInt)
-    if ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
-    end
-end
-
-"Handle all nonzero info codes"
-function chklapackerror(ret::BlasInt)
-    if ret == 0
-        return
-    elseif ret < 0
-        throw(ArgumentError("invalid argument #$(-ret) to LAPACK call"))
-    else # ret > 0
-        throw(LAPACKException(ret))
-    end
-end
-
-function chknonsingular(ret::BlasInt)
-    if ret > 0
-        throw(SingularException(ret))
-    end
-end
-
-function chkposdef(ret::BlasInt)
-    if ret > 0
-        throw(PosDefException(ret))
-    end
-end
-
-"Check that {c}transpose is correctly specified"
-function chktrans(trans::AbstractChar)
-    if !(trans == 'N' || trans == 'C' || trans == 'T')
-        throw(ArgumentError("trans argument must be 'N' (no transpose), 'T' (transpose), or 'C' (conjugate transpose), got $trans"))
-    end
-    trans
-end
-
-"Check that left/right hand side multiply is correctly specified"
-function chkside(side::AbstractChar)
-    if !(side == 'L' || side == 'R')
-        throw(ArgumentError("side argument must be 'L' (left hand multiply) or 'R' (right hand multiply), got $side"))
-    end
-    side
-end
-
-"Check that unit diagonal flag is correctly specified"
-function chkdiag(diag::AbstractChar)
-    if !(diag == 'U' || diag =='N')
-        throw(ArgumentError("diag argument must be 'U' (unit diagonal) or 'N' (non-unit diagonal), got $diag"))
-    end
-    diag
-end
-
-subsetrows(X::AbstractVector, Y::AbstractArray, k) = Y[1:k]
-subsetrows(X::AbstractMatrix, Y::AbstractArray, k) = Y[1:k, :]
-
-function chkfinite(A::AbstractMatrix)
-    for a in A
-        if !isfinite(a)
-            throw(ArgumentError("matrix contains Infs or NaNs"))
-        end
-    end
-    return true
-end
-
-function chkuplofinite(A::AbstractMatrix, uplo::AbstractChar)
-    require_one_based_indexing(A)
-    m, n = size(A)
-    if uplo == 'U'
-        @inbounds for j in 1:n, i in 1:j
-            if !isfinite(A[i,j])
-                throw(ArgumentError("matrix contains Infs or NaNs"))
-            end
-        end
-    else
-        @inbounds for j in 1:n, i in j:m
-            if !isfinite(A[i,j])
-                throw(ArgumentError("matrix contains Infs or NaNs"))
-            end
-        end
-    end
-end
-
-# LAPACK version number
-function version()
-    major = Ref{BlasInt}(0)
-    minor = Ref{BlasInt}(0)
-    patch = Ref{BlasInt}(0)
-    ccall((@blasfunc(ilaver_), libblastrampoline), Cvoid,
-          (Ptr{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-          major, minor, patch)
-    return VersionNumber(major[], minor[], patch[])
-end
-
-# (GB) general banded matrices, LU decomposition and solver
-for (gbtrf, gbtrs, elty) in
-    ((:dgbtrf_,:dgbtrs_,:Float64),
-     (:sgbtrf_,:sgbtrs_,:Float32),
-     (:zgbtrf_,:zgbtrs_,:ComplexF64),
-     (:cgbtrf_,:cgbtrs_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, KL, KU, LDAB, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   AB( LDAB, * )
-        function gbtrf!(kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix{$elty})
-            require_one_based_indexing(AB)
-            chkstride1(AB)
-            n    = size(AB, 2)
-            mnmn = min(m, n)
-            ipiv = similar(AB, BlasInt, mnmn)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gbtrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                  m, n, kl, ku, AB, max(1,stride(AB,2)), ipiv, info)
-            chklapackerror(info[])
-            AB, ipiv
-        end
-
-        # SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO)
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
-        function gbtrs!(trans::AbstractChar, kl::Integer, ku::Integer, m::Integer,
-                        AB::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                        B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(AB, B)
-            chkstride1(AB, B, ipiv)
-            chktrans(trans)
-            info = Ref{BlasInt}()
-            n    = size(AB,2)
-            if m != n || m != size(B,1)
-                throw(DimensionMismatch("matrix AB has dimensions $(size(AB)), but right hand side matrix B has dimensions $(size(B))"))
-            end
-            ccall((@blasfunc($gbtrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  trans, n, kl, ku, size(B,2), AB, max(1,stride(AB,2)), ipiv,
-                  B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    gbtrf!(kl, ku, m, AB) -> (AB, ipiv)
-
-Compute the LU factorization of a banded matrix `AB`. `kl` is the first
-subdiagonal containing a nonzero band, `ku` is the last superdiagonal
-containing one, and `m` is the first dimension of the matrix `AB`. Returns
-the LU factorization in-place and `ipiv`, the vector of pivots used.
-"""
-gbtrf!(kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix)
-
-"""
-    gbtrs!(trans, kl, ku, m, AB, ipiv, B)
-
-Solve the equation `AB * X = B`. `trans` determines the orientation of `AB`. It may
-be `N` (no transpose), `T` (transpose), or `C` (conjugate transpose). `kl` is the
-first subdiagonal containing a nonzero band, `ku` is the last superdiagonal
-containing one, and `m` is the first dimension of the matrix `AB`. `ipiv` is the vector
-of pivots returned from `gbtrf!`. Returns the vector or matrix `X`, overwriting `B` in-place.
-"""
-gbtrs!(trans::AbstractChar, kl::Integer, ku::Integer, m::Integer, AB::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-## (GE) general matrices: balancing and back-transforming
-for (gebal, gebak, elty, relty) in
-    ((:dgebal_, :dgebak_, :Float64, :Float64),
-     (:sgebal_, :sgebak_, :Float32, :Float32),
-     (:zgebal_, :zgebak_, :ComplexF64, :Float64),
-     (:cgebal_, :cgebak_, :ComplexF32, :Float32))
-    @eval begin
-        #     SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOB
-        #      INTEGER            IHI, ILP, INFO, LDA, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), SCALE( * )
-        function gebal!(job::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            ihi = Ref{BlasInt}()
-            ilo = Ref{BlasInt}()
-            scale = similar(A, $relty, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gebal), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
-                  job, n, A, max(1,stride(A,2)), ilo, ihi, scale, info, 1)
-            chklapackerror(info[])
-            ilo[], ihi[], scale
-        end
-
-        #     SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOB, SIDE
-        #      INTEGER            IHI, ILP, INFO, LDV, M, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   SCALE( * ), V( LDV, * )
-        function gebak!(job::AbstractChar, side::AbstractChar,
-                        ilo::BlasInt, ihi::BlasInt, scale::AbstractVector{$relty},
-                        V::AbstractMatrix{$elty})
-            require_one_based_indexing(scale, V)
-            chkstride1(scale, V)
-            chkside(side)
-            chkfinite(V) # balancing routines don't support NaNs and Infs
-            n = checksquare(V)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gebak), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong),
-                  job, side, size(V,1), ilo, ihi, scale, n, V, max(1,stride(V,2)), info,
-                  1, 1)
-            chklapackerror(info[])
-            V
-        end
-    end
-end
-
-"""
-    gebal!(job, A) -> (ilo, ihi, scale)
-
-Balance the matrix `A` before computing its eigensystem or Schur factorization.
-`job` can be one of `N` (`A` will not be permuted or scaled), `P` (`A` will only
-be permuted), `S` (`A` will only be scaled), or `B` (`A` will be both permuted
-and scaled). Modifies `A` in-place and returns `ilo`, `ihi`, and `scale`. If
-permuting was turned on, `A[i,j] = 0` if `j > i` and `1 < j < ilo` or `j > ihi`.
-`scale` contains information about the scaling/permutations performed.
-"""
-gebal!(job::AbstractChar, A::AbstractMatrix)
-
-"""
-    gebak!(job, side, ilo, ihi, scale, V)
-
-Transform the eigenvectors `V` of a matrix balanced using `gebal!` to
-the unscaled/unpermuted eigenvectors of the original matrix. Modifies `V`
-in-place. `side` can be `L` (left eigenvectors are transformed) or `R`
-(right eigenvectors are transformed).
-"""
-gebak!(job::AbstractChar, side::AbstractChar, ilo::BlasInt, ihi::BlasInt, scale::AbstractVector, V::AbstractMatrix)
-
-# (GE) general matrices, direct decompositions
-#
-# These mutating functions take as arguments all the values they
-# return, even if the value of the function does not depend on them
-# (e.g. the tau argument).  This is so that a factorization can be
-# updated in place.  The condensed mutating functions, usually a
-# function of A only, are defined after this block.
-for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty) in
-    ((:dgebrd_,:dgelqf_,:dgeqlf_,:dgeqrf_,:dgeqp3_,:dgeqrt_,:dgeqrt3_,:dgerqf_,:dgetrf_,:Float64,:Float64),
-     (:sgebrd_,:sgelqf_,:sgeqlf_,:sgeqrf_,:sgeqp3_,:sgeqrt_,:sgeqrt3_,:sgerqf_,:sgetrf_,:Float32,:Float32),
-     (:zgebrd_,:zgelqf_,:zgeqlf_,:zgeqrf_,:zgeqp3_,:zgeqrt_,:zgeqrt3_,:zgerqf_,:zgetrf_,:ComplexF64,:Float64),
-     (:cgebrd_,:cgelqf_,:cgeqlf_,:cgeqrf_,:cgeqp3_,:cgeqrt_,:cgeqrt3_,:cgerqf_,:cgetrf_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
-        #                    INFO )
-        # .. Scalar Arguments ..
-        # INTEGER            INFO, LDA, LWORK, M, N
-        # .. Array Arguments ..
-        #  DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
-        #           TAUQ( * ), WORK( * )
-        function gebrd!(A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n  = size(A)
-            k     = min(m, n)
-            d     = similar(A, $relty, k)
-            e     = similar(A, $relty, k)
-            tauq  = similar(A, $elty, k)
-            taup  = similar(A, $elty, k)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gebrd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$elty},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                     m, n, A, max(1,stride(A,2)),
-                     d, e, tauq, taup,
-                     work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, d, e, tauq, taup
-        end
-
-        # SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function gelqf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m     = BlasInt(size(A, 1))
-            n     = BlasInt(size(A, 2))
-            lda   = BlasInt(max(1,stride(A, 2)))
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelqf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, lda, tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqlf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m     = BlasInt(size(A, 1))
-            n     = BlasInt(size(A, 2))
-            lda   = BlasInt(max(1,stride(A, 2)))
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geqlf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, lda, tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqp3!(A::AbstractMatrix{$elty}, jpvt::AbstractVector{BlasInt}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, jpvt, tau)
-            chkstride1(A,jpvt,tau)
-            m,n = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            if length(jpvt) != n
-                throw(DimensionMismatch("jpvt has length $(length(jpvt)), but needs length $n"))
-            end
-            lda = stride(A,2)
-            if lda == 0
-                return A, tau, jpvt
-            end # Early exit
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            cmplx = eltype(A)<:Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 2n)
-            end
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
-                          (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}),
-                          m, n, A, lda,
-                          jpvt, tau, work, lwork,
-                          rwork, info)
-                else
-                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
-                          (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}),
-                          m, n, A, lda,
-                          jpvt, tau, work,
-                          lwork, info)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, tau, jpvt
-        end
-
-        function geqrt!(A::AbstractMatrix{$elty}, T::AbstractMatrix{$elty})
-            require_one_based_indexing(A, T)
-            chkstride1(A)
-            m, n = size(A)
-            minmn = min(m, n)
-            nb = size(T, 1)
-            if nb > minmn
-                throw(ArgumentError("block size $nb > $minmn too large"))
-            end
-            lda = max(1, stride(A,2))
-            work = Vector{$elty}(undef, nb*n)
-            if n > 0
-                info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{BlasInt}),
-                     m, n, nb, A,
-                     lda, T, max(1,stride(T,2)), work,
-                     info)
-                chklapackerror(info[])
-            end
-            A, T
-        end
-
-        function geqrt3!(A::AbstractMatrix{$elty}, T::AbstractMatrix{$elty})
-            require_one_based_indexing(A, T)
-            chkstride1(A)
-            chkstride1(T)
-            m, n = size(A)
-            p, q = size(T)
-            if m < n
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but should have more rows than columns"))
-            end
-            if p != n || q != n
-                throw(DimensionMismatch("block reflector T has dimensions ($p,$q), but should have dimensions ($n,$n)"))
-            end
-            if n > 0
-                info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt3), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                     m, n, A, max(1, stride(A, 2)),
-                     T, max(1,stride(T,2)), info)
-                chklapackerror(info[])
-            end
-            A, T
-        end
-
-        ## geqrfp! - positive elements on diagonal of R - not defined yet
-        # SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function geqrf!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n  = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($geqrf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = max(BlasInt(1),BlasInt(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function gerqf!(A::AbstractMatrix{$elty},tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n  = size(A)
-            if length(tau) != min(m,n)
-                throw(DimensionMismatch("tau has length $(length(tau)), but needs length $(min(m,n))"))
-            end
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($gerqf), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = max(BlasInt(m), BlasInt(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        # SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, M, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * )
-        function getrf!(A::AbstractMatrix{$elty}; check = true)
-            require_one_based_indexing(A)
-            check && chkfinite(A)
-            chkstride1(A)
-            m, n = size(A)
-            lda  = max(1,stride(A, 2))
-            ipiv = similar(A, BlasInt, min(m,n))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($getrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                   Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                  m, n, A, lda, ipiv, info)
-            chkargsok(info[])
-            A, ipiv, info[] #Error code is stored in LU factorization type
-        end
-    end
-end
-
-"""
-    gebrd!(A) -> (A, d, e, tauq, taup)
-
-Reduce `A` in-place to bidiagonal form `A = QBP'`. Returns `A`, containing the
-bidiagonal matrix `B`; `d`, containing the diagonal elements of `B`; `e`,
-containing the off-diagonal elements of `B`; `tauq`, containing the
-elementary reflectors representing `Q`; and `taup`, containing the
-elementary reflectors representing `P`.
-"""
-gebrd!(A::AbstractMatrix)
-
-"""
-    gelqf!(A, tau)
-
-Compute the `LQ` factorization of `A`, `A = LQ`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns
-`A` and `tau` modified in-place.
-"""
-gelqf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    geqlf!(A, tau)
-
-Compute the `QL` factorization of `A`, `A = QL`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-geqlf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    geqp3!(A, [jpvt, tau]) -> (A, tau, jpvt)
-
-Compute the pivoted `QR` factorization of `A`, `AP = QR` using BLAS level 3.
-`P` is a pivoting matrix, represented by `jpvt`. `tau` stores the elementary
-reflectors. The arguments `jpvt` and `tau` are optional and allow
-for passing preallocated arrays. When passed, `jpvt` must have length greater
-than or equal to `n` if `A` is an `(m x n)` matrix and `tau` must have length
-greater than or equal to the smallest dimension of `A`.
-
-`A`, `jpvt`, and `tau` are modified in-place.
-"""
-geqp3!(A::AbstractMatrix, jpvt::AbstractVector{BlasInt}, tau::AbstractVector)
-
-function geqp3!(A::AbstractMatrix{<:BlasFloat}, jpvt::AbstractVector{BlasInt})
-    m, n = size(A)
-    geqp3!(A, jpvt, similar(A, min(m, n)))
-end
-
-function geqp3!(A::AbstractMatrix{<:BlasFloat})
-    m, n = size(A)
-    geqp3!(A, zeros(BlasInt, n), similar(A, min(m, n)))
-end
-
-"""
-    geqrt!(A, T)
-
-Compute the blocked `QR` factorization of `A`, `A = QR`. `T` contains upper
-triangular block reflectors which parameterize the elementary reflectors of
-the factorization. The first dimension of `T` sets the block size and it must
-be between 1 and `n`. The second dimension of `T` must equal the smallest
-dimension of `A`.
-
-Returns `A` and `T` modified in-place.
-"""
-geqrt!(A::AbstractMatrix, T::AbstractMatrix)
-
-"""
-    geqrt3!(A, T)
-
-Recursively computes the blocked `QR` factorization of `A`, `A = QR`. `T`
-contains upper triangular block reflectors which parameterize the
-elementary reflectors of the factorization.  The first dimension of `T` sets the
-block size and it must be between 1 and `n`. The second dimension of `T` must
-equal the smallest dimension of `A`.
-
-Returns `A` and `T` modified in-place.
-"""
-geqrt3!(A::AbstractMatrix, T::AbstractMatrix)
-
-"""
-    geqrf!(A, tau)
-
-Compute the `QR` factorization of `A`, `A = QR`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-geqrf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    gerqf!(A, tau)
-
-Compute the `RQ` factorization of `A`, `A = RQ`. `tau` contains scalars
-which parameterize the elementary reflectors of the factorization. `tau`
-must have length greater than or equal to the smallest dimension of `A`.
-
-Returns `A` and `tau` modified in-place.
-"""
-gerqf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    getrf!(A) -> (A, ipiv, info)
-
-Compute the pivoted `LU` factorization of `A`, `A = LU`.
-
-Returns `A`, modified in-place, `ipiv`, the pivoting information, and an `info`
-code which indicates success (`info = 0`), a singular value in `U`
-(`info = i`, in which case `U[i,i]` is singular), or an error code (`info < 0`).
-"""
-getrf!(A::AbstractMatrix, tau::AbstractVector)
-
-"""
-    gelqf!(A) -> (A, tau)
-
-Compute the `LQ` factorization of `A`, `A = LQ`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-gelqf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); gelqf!(A, similar(A, min(m, n))))
-
-"""
-    geqlf!(A) -> (A, tau)
-
-Compute the `QL` factorization of `A`, `A = QL`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-geqlf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); geqlf!(A, similar(A, min(m, n))))
-
-"""
-    geqrt!(A, nb) -> (A, T)
-
-Compute the blocked `QR` factorization of `A`, `A = QR`. `nb` sets the block size
-and it must be between 1 and `n`, the second dimension of `A`.
-
-Returns `A`, modified in-place, and `T`, which contains upper
-triangular block reflectors which parameterize the elementary reflectors of
-the factorization.
-"""
-geqrt!(A::AbstractMatrix{<:BlasFloat}, nb::Integer) = geqrt!(A, similar(A, nb, minimum(size(A))))
-
-"""
-    geqrt3!(A) -> (A, T)
-
-Recursively computes the blocked `QR` factorization of `A`, `A = QR`.
-
-Returns `A`, modified in-place, and `T`, which contains upper triangular block
-reflectors which parameterize the elementary reflectors of the factorization.
-"""
-geqrt3!(A::AbstractMatrix{<:BlasFloat}) = (n = size(A, 2); geqrt3!(A, similar(A, n, n)))
-
-"""
-    geqrf!(A) -> (A, tau)
-
-Compute the `QR` factorization of `A`, `A = QR`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-geqrf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); geqrf!(A, similar(A, min(m, n))))
-
-"""
-    gerqf!(A) -> (A, tau)
-
-Compute the `RQ` factorization of `A`, `A = RQ`.
-
-Returns `A`, modified in-place, and `tau`, which contains scalars
-which parameterize the elementary reflectors of the factorization.
-"""
-gerqf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); gerqf!(A, similar(A, min(m, n))))
-
-## Tools to compute and apply elementary reflectors
-for (larfg, elty) in
-    ((:dlarfg_, Float64),
-     (:slarfg_, Float32),
-     (:zlarfg_, ComplexF64),
-     (:clarfg_, ComplexF32))
-    @eval begin
-        #        .. Scalar Arguments ..
-        #        INTEGER            incx, n
-        #        DOUBLE PRECISION   alpha, tau
-        #        ..
-        #        .. Array Arguments ..
-        #        DOUBLE PRECISION   x( * )
-        function larfg!(x::AbstractVector{$elty})
-            N    = BlasInt(length(x))
-            α    = Ref{$elty}(x[1])
-            incx = BlasInt(1)
-            τ    = Ref{$elty}(0)
-            ccall((@blasfunc($larfg), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}),
-                N, α, pointer(x, 2), incx, τ)
-            @inbounds x[1] = one($elty)
-            return τ[]
-        end
-    end
-end
-
-for (larf, elty) in
-    ((:dlarf_, Float64),
-     (:slarf_, Float32),
-     (:zlarf_, ComplexF64),
-     (:clarf_, ComplexF32))
-    @eval begin
-        #        .. Scalar Arguments ..
-        #        CHARACTER          side
-        #        INTEGER            incv, ldc, m, n
-        #        DOUBLE PRECISION   tau
-        #        ..
-        #        .. Array Arguments ..
-        #        DOUBLE PRECISION   c( ldc, * ), v( * ), work( * )
-        function larf!(side::AbstractChar, v::AbstractVector{$elty},
-                       τ::$elty, C::AbstractMatrix{$elty}, work::AbstractVector{$elty})
-            m, n = size(C)
-            chkside(side)
-            ldc = max(1, stride(C, 2))
-            l = side == 'L' ? n : m
-            incv  = BlasInt(1)
-            ccall((@blasfunc($larf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Clong),
-                side, m, n, v, incv,
-                τ, C, ldc, work, 1)
-            return C
-        end
-
-        function larf!(side::AbstractChar, v::AbstractVector{$elty},
-                       τ::$elty, C::AbstractMatrix{$elty})
-            m, n = size(C)
-            chkside(side)
-            lwork = side == 'L' ? n : m
-            return larf!(side, v, τ, C, Vector{$elty}(undef,lwork))
-        end
-    end
-end
-
-## Complete orthogonaliztion tools
-for (tzrzf, ormrz, elty) in
-    ((:dtzrzf_,:dormrz_,:Float64),
-     (:stzrzf_,:sormrz_,:Float32),
-     (:ztzrzf_,:zunmrz_,:ComplexF64),
-     (:ctzrzf_,:cunmrz_,:ComplexF32))
-    @eval begin
-         #       SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
-         #
-         #       .. Scalar Arguments ..
-         #       INTEGER            INFO, LDA, LWORK, M, N
-         #       ..
-         #       .. Array Arguments ..
-         #       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
-        function tzrzf!(A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n = size(A)
-            if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
-            end
-            lda = max(1, stride(A,2))
-            tau = similar(A, $elty, m)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($tzrzf), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                    m, n, A, lda,
-                    tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-
-        #       SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
-        #                          WORK, LWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          SIDE, TRANS
-        #       INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
-        #       ..
-        #       .. Array Arguments ..
-        #       COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormrz!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractMatrix{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, tau, C)
-            m, n = size(C)
-            k = length(tau)
-            l = size(A, 2) - size(A, 1)
-            lda = max(1, stride(A,2))
-            ldc = max(1, stride(C,2))
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrz), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    side, trans, m, n,
-                    k, l, A, lda,
-                    tau, C, ldc, work,
-                    lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-"""
-    ormrz!(side, trans, A, tau, C)
-
-Multiplies the matrix `C` by `Q` from the transformation supplied by
-`tzrzf!`. Depending on `side` or `trans` the multiplication can be
-left-sided (`side = L, Q*C`) or right-sided (`side = R, C*Q`) and `Q`
-can be unmodified (`trans = N`), transposed (`trans = T`), or conjugate
-transposed (`trans = C`). Returns matrix `C` which is modified in-place
-with the result of the multiplication.
-"""
-ormrz!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractMatrix)
-
-"""
-    tzrzf!(A) -> (A, tau)
-
-Transforms the upper trapezoidal matrix `A` to upper triangular form in-place.
-Returns `A` and `tau`, the scalar parameters for the elementary reflectors
-of the transformation.
-"""
-tzrzf!(A::AbstractMatrix)
-
-## (GE) general matrices, solvers with factorization, solver and inverse
-for (gels, gesv, getrs, getri, elty) in
-    ((:dgels_,:dgesv_,:dgetrs_,:dgetri_,:Float64),
-     (:sgels_,:sgesv_,:sgetrs_,:sgetri_,:Float32),
-     (:zgels_,:zgesv_,:zgetrs_,:zgetri_,:ComplexF64),
-     (:cgels_,:cgesv_,:cgetrs_,:cgetri_,:ComplexF32))
-    @eval begin
-        #      SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,INFO)
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
-        function gels!(trans::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chktrans(trans)
-            chkstride1(A, B)
-            btrn  = trans == 'T'
-            m, n  = size(A)
-            if size(B,1) != (btrn ? n : m)
-                throw(DimensionMismatch("matrix A has dimensions ($m,$n), transposed: $btrn, but leading dimension of B is $(size(B,1))"))
-            end
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gels), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      (btrn ? 'T' : 'N'), m, n, size(B,2), A, max(1,stride(A,2)),
-                      B, max(1,stride(B,2)), work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            k   = min(m, n)
-            F   = m < n ? tril(A[1:k, 1:k]) : triu(A[1:k, 1:k])
-            ssr = Vector{$elty}(undef, size(B, 2))
-            for i = 1:size(B,2)
-                x = zero($elty)
-                for j = k+1:size(B,1)
-                    x += abs2(B[j,i])
-                end
-                ssr[i] = x
-            end
-            F, subsetrows(B, B, k), ssr
-        end
-
-        # SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function gesv!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            if size(B,1) != n
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv = similar(A, BlasInt, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gesv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B, A, ipiv
-        end
-
-        #     SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          TRANS
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function getrs!(trans::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chktrans(trans)
-            chkstride1(A, B, ipiv)
-            n = checksquare(A)
-            if n != size(B, 1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
-            end
-            if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
-            end
-            nrhs = size(B, 2)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($getrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        #     SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
-        #*     .. Scalar Arguments ..
-        #      INTEGER            INFO, LDA, LWORK, N
-        #*     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function getri!(A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            require_one_based_indexing(A, ipiv)
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            if n != length(ipiv)
-                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs $n"))
-            end
-            lda = max(1,stride(A, 2))
-            lwork = BlasInt(-1)
-            work  = Vector{$elty}(undef, 1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($getri), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      n, A, lda, ipiv, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    gels!(trans, A, B) -> (F, B, ssr)
-
-Solves the linear equation `A * X = B`, `transpose(A) * X = B`, or `adjoint(A) * X = B` using
-a QR or LQ factorization. Modifies the matrix/vector `B` in place with the
-solution. `A` is overwritten with its `QR` or `LQ` factorization. `trans`
-may be one of `N` (no modification), `T` (transpose), or `C` (conjugate
-transpose). `gels!` searches for the minimum norm/least squares solution.
-`A` may be under or over determined. The solution is returned in `B`.
-"""
-gels!(trans::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    gesv!(A, B) -> (B, A, ipiv)
-
-Solves the linear equation `A * X = B` where `A` is a square matrix using
-the `LU` factorization of `A`. `A` is overwritten with its `LU`
-factorization and `B` is overwritten with the solution `X`. `ipiv` contains the
-pivoting information for the `LU` factorization of `A`.
-"""
-gesv!(A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    getrs!(trans, A, ipiv, B)
-
-Solves the linear equation `A * X = B`, `transpose(A) * X = B`, or `adjoint(A) * X = B` for
-square `A`. Modifies the matrix/vector `B` in place with the solution. `A`
-is the `LU` factorization from `getrf!`, with `ipiv` the pivoting
-information. `trans` may be one of `N` (no modification), `T` (transpose),
-or `C` (conjugate transpose).
-"""
-getrs!(trans::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-"""
-    getri!(A, ipiv)
-
-Computes the inverse of `A`, using its `LU` factorization found by
-`getrf!`. `ipiv` is the pivot information output and `A`
-contains the `LU` factorization of `getrf!`. `A` is overwritten with
-its inverse.
-"""
-getri!(A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-for (gesvx, elty) in
-    ((:dgesvx_,:Float64),
-     (:sgesvx_,:Float32))
-    @eval begin
-        #     SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
-        #                        EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
-        #                        WORK, IWORK, INFO )
-        #
-        #     .. Scalar Arguments ..
-        #     CHARACTER          EQUED, FACT, TRANS
-        #     INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
-        #     DOUBLE PRECISION   RCOND
-        #     ..
-        #     .. Array Arguments ..
-        #     INTEGER            IPIV( * ), IWORK( * )
-        #     DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
-        #    $                   BERR( * ), C( * ), FERR( * ), R( * ),
-        #    $                   WORK( * ), X( LDX, *
-        #
-        function gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
-                        R::AbstractVector{$elty}, C::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, AF, ipiv, R, C, B)
-            chktrans(trans)
-            chkstride1(ipiv, R, C, B)
-            n    = checksquare(A)
-            lda  = stride(A,2)
-            n    = checksquare(AF)
-            ldaf = stride(AF,2)
-            nrhs = size(B,2)
-            ldb  = stride(B,2)
-            rcond = Ref{$elty}()
-            ferr  = similar(A, $elty, nrhs)
-            berr  = similar(A, $elty, nrhs)
-            work  = Vector{$elty}(undef, 4n)
-            iwork = Vector{BlasInt}(undef, n)
-            info  = Ref{BlasInt}()
-            X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
-              (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-               Ref{UInt8}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-               Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-              fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, iwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            if info[] == n + 1
-                @warn "Matrix is singular to working precision"
-            else
-                chknonsingular(info[])
-            end
-            #WORK(1) contains the reciprocal pivot growth factor norm(A)/norm(U)
-            X, equed, R, C, B, rcond[], ferr, berr, work[1]
-        end
-
-        function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            n = size(A,1)
-            X, equed, R, C, B, rcond, ferr, berr, rpgf =
-                gesvx!('N', 'N', A,
-                       similar(A, $elty, n, n),
-                       similar(A, BlasInt, n),
-                       'N',
-                       similar(A, $elty, n),
-                       similar(A, $elty, n),
-                       B)
-            X, rcond, ferr, berr, rpgf
-        end
-    end
-end
-for (gesvx, elty, relty) in
-    ((:zgesvx_,:ComplexF64,:Float64),
-     (:cgesvx_,:ComplexF32 ,:Float32))
-    @eval begin
-        #     SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
-        #                        EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
-        #                        WORK, RWORK, INFO )
-        #
-        #     .. Scalar Arguments ..
-        #     CHARACTER          EQUED, FACT, TRANS
-        #     INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
-        #     DOUBLE PRECISION   RCOND
-        #     ..
-        #     .. Array Arguments ..
-        #     INTEGER            IPIV( * )
-        #     DOUBLE PRECISION   BERR( * ), C( * ), FERR( * ), R( * ),
-        #    $                   RWORK( * )
-        #     COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
-        #    $                   WORK( * ), X( LDX, * )
-        function gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        AF::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, equed::AbstractChar,
-                        R::AbstractVector{$relty}, C::AbstractVector{$relty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, AF, ipiv, R, C, B)
-            chktrans(trans)
-            chkstride1(A, AF, ipiv, R, C, B)
-            n   = checksquare(A)
-            lda = stride(A,2)
-            n   = checksquare(AF)
-            ldaf = stride(AF,2)
-            nrhs = size(B,2)
-            ldb = stride(B,2)
-            rcond = Ref{$relty}()
-            ferr  = similar(A, $relty, nrhs)
-            berr  = similar(A, $relty, nrhs)
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, 2n)
-            info  = Ref{BlasInt}()
-            X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
-              (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-               Ref{UInt8}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-               Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$relty},
-               Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
-              fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, rwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            if info[] == n + 1
-                @warn "Matrix is singular to working precision"
-            else
-                chknonsingular(info[])
-            end
-            #RWORK(1) contains the reciprocal pivot growth factor norm(A)/norm(U)
-            X, equed, R, C, B, rcond[], ferr, berr, rwork[1]
-        end
-
-        #Wrapper for the no-equilibration, no-transpose calculation
-        function gesvx!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            n = size(A,1)
-            X, equed, R, C, B, rcond, ferr, berr, rpgf =
-                gesvx!('N', 'N', A,
-                       similar(A, $elty, n, n),
-                       similar(A, BlasInt, n),
-                       'N',
-                       similar(A, $relty, n),
-                       similar(A, $relty, n),
-                       B)
-            X, rcond, ferr, berr, rpgf
-        end
-    end
-end
-
-"""
-    gesvx!(fact, trans, A, AF, ipiv, equed, R, C, B) -> (X, equed, R, C, B, rcond, ferr, berr, work)
-
-Solves the linear equation `A * X = B` (`trans = N`), `transpose(A) * X = B`
-(`trans = T`), or `adjoint(A) * X = B` (`trans = C`) using the `LU` factorization
-of `A`. `fact` may be `E`, in which case `A` will be equilibrated and copied
-to `AF`; `F`, in which case `AF` and `ipiv` from a previous `LU` factorization
-are inputs; or `N`, in which case `A` will be copied to `AF` and then
-factored. If `fact = F`, `equed` may be `N`, meaning `A` has not been
-equilibrated; `R`, meaning `A` was multiplied by `Diagonal(R)` from the left;
-`C`, meaning `A` was multiplied by `Diagonal(C)` from the right; or `B`, meaning
-`A` was multiplied by `Diagonal(R)` from the left and `Diagonal(C)` from the right.
-If `fact = F` and `equed = R` or `B` the elements of `R` must all be positive.
-If `fact = F` and `equed = C` or `B` the elements of `C` must all be positive.
-
-Returns the solution `X`; `equed`, which is an output if `fact` is not `N`,
-and describes the equilibration that was performed; `R`, the row equilibration
-diagonal; `C`, the column equilibration diagonal; `B`, which may be overwritten
-with its equilibrated form `Diagonal(R)*B` (if `trans = N` and `equed = R,B`) or
-`Diagonal(C)*B` (if `trans = T,C` and `equed = C,B`); `rcond`, the reciprocal
-condition number of `A` after equilbrating; `ferr`, the forward error bound for
-each solution vector in `X`; `berr`, the forward error bound for each solution
-vector in `X`; and `work`, the reciprocal pivot growth factor.
-"""
-gesvx!(fact::AbstractChar, trans::AbstractChar, A::AbstractMatrix, AF::AbstractMatrix,
-    ipiv::AbstractVector{BlasInt}, equed::AbstractChar, R::AbstractVector, C::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    gesvx!(A, B)
-
-The no-equilibration, no-transpose simplification of `gesvx!`.
-"""
-gesvx!(A::AbstractMatrix, B::AbstractVecOrMat)
-
-for (gelsd, gelsy, elty) in
-    ((:dgelsd_,:dgelsy_,:Float64),
-     (:sgelsd_,:sgelsy_,:Float32))
-    @eval begin
-        # SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-        #      $                   WORK, LWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
-        function gelsd!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=-one($elty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n  = size(A)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            s     = similar(A, $elty, min(m, n))
-            rnk   = Ref{BlasInt}()
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            for i = 1:2  # first call returns lwork as work[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
-                      m, n, size(B,2),
-                      A, max(1,stride(A,2)), newB, max(1,stride(B,2),n),
-                      s, $elty(rcond), rnk, work,
-                      lwork, iwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    resize!(iwork, iwork[1])
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-
-        #       SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
-        #      $                   WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsy!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=eps($elty))
-            require_one_based_indexing(A, B)
-            chkstride1(A)
-            m = size(A, 1)
-            n = size(A, 2)
-            nrhs = size(B, 2)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            lda = max(1, stride(A,2))
-            ldb = max(1, stride(newB,2))
-            jpvt = zeros(BlasInt, n)
-            rnk = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    m, n, nrhs, A,
-                    lda, newB, ldb, jpvt,
-                    $elty(rcond), rnk, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-    end
-end
-
-for (gelsd, gelsy, elty, relty) in
-    ((:zgelsd_,:zgelsy_,:ComplexF64,:Float64),
-     (:cgelsd_,:cgelsy_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
-        #      $                   WORK, LWORK, RWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), S( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsd!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=-one($relty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n  = size(A)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            s     = similar(A, $relty, min(m, n))
-            rnk   = Ref{BlasInt}()
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 1)
-            iwork = Vector{BlasInt}(undef, 1)
-            for i = 1:2  # first call returns lwork as work[1], rwork length as rwork[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                       Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ref{BlasInt}, Ref{BlasInt}),
-                      m, n, size(B,2), A,
-                      max(1,stride(A,2)), newB, max(1,stride(B,2),n), s,
-                      $relty(rcond), rnk, work, lwork,
-                      rwork, iwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    resize!(rwork, BlasInt(rwork[1]))
-                    resize!(iwork, iwork[1])
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-
-        #       SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
-        #      $                   WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
-        #       DOUBLE PRECISION   RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            JPVT( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function gelsy!(A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, rcond::Real=eps($relty))
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            nrhs = size(B, 2)
-            if size(B, 1) != m
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)) but needs $m"))
-            end
-            newB = [B; zeros($elty, max(0, n - size(B, 1)), size(B, 2))]
-            lda = max(1, m)
-            ldb = max(1, m, n)
-            jpvt = zeros(BlasInt, n)
-            rnk = Ref{BlasInt}(1)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{BlasInt}),
-                    m, n, nrhs, A,
-                    lda, newB, ldb, jpvt,
-                    $relty(rcond), rnk, work, lwork,
-                    rwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            subsetrows(B, newB, n), rnk[]
-        end
-    end
-end
-
-"""
-    gelsd!(A, B, rcond) -> (B, rnk)
-
-Computes the least norm solution of `A * X = B` by finding the `SVD`
-factorization of `A`, then dividing-and-conquering the problem. `B`
-is overwritten with the solution `X`. Singular values below `rcond`
-will be treated as zero. Returns the solution in `B` and the effective rank
-of `A` in `rnk`.
-"""
-gelsd!(A::AbstractMatrix, B::AbstractVecOrMat, rcond::Real)
-
-"""
-    gelsy!(A, B, rcond) -> (B, rnk)
-
-Computes the least norm solution of `A * X = B` by finding the full `QR`
-factorization of `A`, then dividing-and-conquering the problem. `B`
-is overwritten with the solution `X`. Singular values below `rcond`
-will be treated as zero. Returns the solution in `B` and the effective rank
-of `A` in `rnk`.
-"""
-gelsy!(A::AbstractMatrix, B::AbstractVecOrMat, rcond::Real)
-
-for (gglse, elty) in ((:dgglse_, :Float64),
-                      (:sgglse_, :Float32),
-                      (:zgglse_, :ComplexF64),
-                      (:cgglse_, :ComplexF32))
-    @eval begin
-        # SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, LDA, LDB, LWORK, M, N, P
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( * ), D( * ),
-        #      $                   WORK( * ), X( * )
-        function gglse!(A::AbstractMatrix{$elty}, c::AbstractVector{$elty},
-                        B::AbstractMatrix{$elty}, d::AbstractVector{$elty})
-            require_one_based_indexing(A, c, B, d)
-            chkstride1(A, c, B, d)
-            m, n = size(A)
-            p = size(B, 1)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)), needs $n"))
-            end
-            if length(c) != m
-                throw(DimensionMismatch("c has length $(length(c)), needs $m"))
-            end
-            if length(d) != p
-                throw(DimensionMismatch("d has length $(length(d)), needs $p"))
-            end
-            X = zeros($elty, n)
-            info  = Ref{BlasInt}()
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gglse), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}),
-                      m, n, p, A, max(1,stride(A,2)), B, max(1,stride(B,2)), c, d, X,
-                      work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            X, dot(view(c, n - p + 1:m), view(c, n - p + 1:m))
-        end
-    end
-end
-
-"""
-    gglse!(A, c, B, d) -> (X,res)
-
-Solves the equation `A * x = c` where `x` is subject to the equality
-constraint `B * x = d`. Uses the formula `||c - A*x||^2 = 0` to solve.
-Returns `X` and the residual sum-of-squares.
-"""
-gglse!(A::AbstractMatrix, c::AbstractVector, B::AbstractMatrix, d::AbstractVector)
-
-# (GE) general matrices eigenvalue-eigenvector and singular value decompositions
-for (geev, gesvd, gesdd, ggsvd, elty, relty) in
-    ((:dgeev_,:dgesvd_,:dgesdd_,:dggsvd_,:Float64,:Float64),
-     (:sgeev_,:sgesvd_,:sgesdd_,:sggsvd_,:Float32,:Float32),
-     (:zgeev_,:zgesvd_,:zgesdd_,:zggsvd_,:ComplexF64,:Float64),
-     (:cgeev_,:cgesvd_,:cgesdd_,:cggsvd_,:ComplexF32,:Float32))
-    @eval begin
-        #      SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
-        #      $                  LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WI( * ), WORK( * ), WR( * )
-        function geev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lvecs = jobvl == 'V'
-            rvecs = jobvr == 'V'
-            VL    = similar(A, $elty, (n, lvecs ? n : 0))
-            VR    = similar(A, $elty, (n, rvecs ? n : 0))
-            cmplx = eltype(A) <: Complex
-            if cmplx
-                W     = similar(A, $elty, n)
-                rwork = similar(A, $relty, 2n)
-            else
-                WR    = similar(A, $elty, n)
-                WI    = similar(A, $elty, n)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                          jobvl, jobvr, n, A, max(1,stride(A,2)), W, VL, n, VR, n,
-                          work, lwork, rwork, info, 1, 1)
-                else
-                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                          jobvl, jobvr, n, A, max(1,stride(A,2)), WR, WI, VL, n,
-                          VR, n, work, lwork, info, 1, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            cmplx ? (W, VL, VR) : (WR, WI, VL, VR)
-        end
-
-        #    SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
-        #                   LWORK, IWORK, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          JOBZ
-        #      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
-        #*     ..
-        #*     .. Array Arguments ..
-        #      INTEGER            IWORK( * )
-        #      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
-        #                        VT( LDVT, * ), WORK( * )
-        function gesdd!(job::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n   = size(A)
-            minmn  = min(m, n)
-            if job == 'A'
-                U  = similar(A, $elty, (m, m))
-                VT = similar(A, $elty, (n, n))
-            elseif job == 'S'
-                U  = similar(A, $elty, (m, minmn))
-                VT = similar(A, $elty, (minmn, n))
-            elseif job == 'O'
-                U  = similar(A, $elty, (m, m >= n ? 0 : m))
-                VT = similar(A, $elty, (n, m >= n ? n : 0))
-            else
-                U  = similar(A, $elty, (m, 0))
-                VT = similar(A, $elty, (n, 0))
-            end
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            S      = similar(A, $relty, minmn)
-            cmplx  = eltype(A)<:Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, job == 'N' ? 7*minmn : minmn*max(5*minmn+7, 2*max(m,n)+2*minmn+1))
-            end
-            iwork  = Vector{BlasInt}(undef, 8*minmn)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-                          job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, iwork, info, 1)
-                else
-                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-                          job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, iwork, info, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    # Work around issue with truncated Float32 representation of lwork in
-                    # sgesdd by using nextfloat. See
-                    # http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=13&t=4587&p=11036&hilit=sgesdd#p11036
-                    # and
-                    # https://github.com/scipy/scipy/issues/5401
-                    lwork = round(BlasInt, nextfloat(real(work[1])))
-                    resize!(work, lwork)
-                end
-            end
-            if job == 'O'
-                if m >= n
-                    return (A, S, VT)
-                else
-                    # ()__
-                    # ||::Z__
-                    # ||::|:::Z____
-                    # ||::|:::|====|
-                    # ||==|===|====|
-                    # ||""|===|====|
-                    # ||  `"""|====|
-                    # ||      `""""`
-                    return (U, S, A)
-                end
-            end
-            return (U, S, VT)
-        end
-
-        # SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBU, JOBVT
-        #       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
-        #      $                   VT( LDVT, * ), WORK( * )
-        function gesvd!(jobu::AbstractChar, jobvt::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            m, n   = size(A)
-            minmn  = min(m, n)
-            S      = similar(A, $relty, minmn)
-            U      = similar(A, $elty, jobu  == 'A' ? (m, m) : (jobu  == 'S' ? (m, minmn) : (m, 0)))
-            VT     = similar(A, $elty, jobvt == 'A' ? (n, n) : (jobvt == 'S' ? (minmn, n) : (n, 0)))
-            work   = Vector{$elty}(undef, 1)
-            cmplx  = eltype(A) <: Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 5minmn)
-            end
-            lwork  = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                if cmplx
-                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                          jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, info, 1, 1)
-                else
-                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
-                          (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                           Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                          jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, info, 1, 1)
-                end
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if jobu == 'O'
-                return (A, S, VT)
-            elseif jobvt == 'O'
-                    # =============|===========|()
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # #::::::
-                                   # # # # # # #
-                                   # # # # # # #
-                                   # # # # # # #
-                return (U, S, A)   # # # # # # #
-            else                   # # # # # # #
-                return (U, S, VT)  # # # # # # #
-
-            end
-        end
-
-        #       SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
-        #      $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
-        #      $                   RWORK, IWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBQ, JOBU, JOBV
-        #       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   ALPHA( * ), BETA( * ), RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
-        #      $                   U( LDU, * ), V( LDV, * ), WORK( * )
-        function ggsvd!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Vector{BlasInt}(undef, 1)
-            l = Vector{BlasInt}(undef, 1)
-            lda = max(1,stride(A, 2))
-            ldb = max(1,stride(B, 2))
-            alpha = similar(A, $relty, n)
-            beta = similar(A, $relty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, max(3n, m, p) + n)
-            cmplx = eltype(A) <: Complex
-            if cmplx
-                rwork = Vector{$relty}(undef, 2n)
-            end
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            if cmplx
-                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, rwork, iwork, info,
-                    1, 1, 1)
-            else
-                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, iwork, info,
-                    1, 1, 1)
-            end
-            chklapackerror(info[])
-            if m - k[1] - l[1] >= 0
-                R = triu(A[1:k[1] + l[1],n - k[1] - l[1] + 1:n])
-            else
-                R = triu([A[1:m, n - k[1] - l[1] + 1:n]; B[m - k[1] + 1:l[1], n - k[1] - l[1] + 1:n]])
-            end
-            U, V, Q, alpha, beta, k[1], l[1], R
-        end
-    end
-end
-
-"""
-    geev!(jobvl, jobvr, A) -> (W, VL, VR)
-
-Finds the eigensystem of `A`. If `jobvl = N`, the left eigenvectors of
-`A` aren't computed. If `jobvr = N`, the right eigenvectors of `A`
-aren't computed. If `jobvl = V` or `jobvr = V`, the corresponding
-eigenvectors are computed. Returns the eigenvalues in `W`, the right
-eigenvectors in `VR`, and the left eigenvectors in `VL`.
-"""
-geev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix)
-
-"""
-    gesdd!(job, A) -> (U, S, VT)
-
-Finds the singular value decomposition of `A`, `A = U * S * V'`,
-using a divide and conquer approach. If `job = A`, all the columns of `U` and
-the rows of `V'` are computed. If `job = N`, no columns of `U` or rows of `V'`
-are computed. If `job = O`, `A` is overwritten with the columns of (thin) `U`
-and the rows of (thin) `V'`. If `job = S`, the columns of (thin) `U` and the
-rows of (thin) `V'` are computed and returned separately.
-"""
-gesdd!(job::AbstractChar, A::AbstractMatrix)
-
-"""
-    gesvd!(jobu, jobvt, A) -> (U, S, VT)
-
-Finds the singular value decomposition of `A`, `A = U * S * V'`.
-If `jobu = A`, all the columns of `U` are computed. If `jobvt = A` all the rows
-of `V'` are computed. If `jobu = N`, no columns of `U` are computed. If
-`jobvt = N` no rows of `V'` are computed. If `jobu = O`, `A` is overwritten with
-the columns of (thin) `U`. If `jobvt = O`, `A` is overwritten with the rows
-of (thin) `V'`. If `jobu = S`, the columns of (thin) `U` are computed
-and returned separately. If `jobvt = S` the rows of (thin) `V'` are
-computed and returned separately. `jobu` and `jobvt` can't both be `O`.
-
-Returns `U`, `S`, and `Vt`, where `S` are the singular values of `A`.
-"""
-gesvd!(jobu::AbstractChar, jobvt::AbstractChar, A::AbstractMatrix)
-
-"""
-    ggsvd!(jobu, jobv, jobq, A, B) -> (U, V, Q, alpha, beta, k, l, R)
-
-Finds the generalized singular value decomposition of `A` and `B`, `U'*A*Q = D1*R`
-and `V'*B*Q = D2*R`. `D1` has `alpha` on its diagonal and `D2` has `beta` on its
-diagonal. If `jobu = U`, the orthogonal/unitary matrix `U` is computed. If
-`jobv = V` the orthogonal/unitary matrix `V` is computed. If `jobq = Q`,
-the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv` or `jobq` is
-`N`, that matrix is not computed. This function is only available in LAPACK
-versions prior to 3.6.0.
-"""
-ggsvd!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-
-for (f, elty) in ((:dggsvd3_, :Float64),
-                  (:sggsvd3_, :Float32))
-    @eval begin
-        function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Ref{BlasInt}()
-            l = Ref{BlasInt}()
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                    Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, lwork, iwork, info,
-                    1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            if m - k[] - l[] >= 0
-                R = triu(A[1:k[] + l[],n - k[] - l[] + 1:n])
-            else
-                R = triu([A[1:m, n - k[] - l[] + 1:n]; B[m - k[] + 1:l[], n - k[] - l[] + 1:n]])
-            end
-            return U, V, Q, alpha, beta, k[], l[], R
-        end
-    end
-end
-
-for (f, elty, relty) in ((:zggsvd3_, :ComplexF64, :Float64),
-                         (:cggsvd3_, :ComplexF32, :Float32))
-    @eval begin
-        function ggsvd3!(jobu::AbstractChar, jobv::AbstractChar, jobq::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            m, n = size(A)
-            if size(B, 2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs $n"))
-            end
-            p = size(B, 1)
-            k = Vector{BlasInt}(undef, 1)
-            l = Vector{BlasInt}(undef, 1)
-            lda = max(1,stride(A, 2))
-            ldb = max(1,stride(B, 2))
-            alpha = similar(A, $relty, n)
-            beta = similar(A, $relty, n)
-            ldu = max(1, m)
-            U = jobu == 'U' ? similar(A, $elty, ldu, m) : similar(A, $elty, 0)
-            ldv = max(1, p)
-            V = jobv == 'V' ? similar(A, $elty, ldv, p) : similar(A, $elty, 0)
-            ldq = max(1, n)
-            Q = jobq == 'Q' ? similar(A, $elty, ldq, n) : similar(A, $elty, 0)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                    Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong, Clong),
-                    jobu, jobv, jobq, m,
-                    n, p, k, l,
-                    A, lda, B, ldb,
-                    alpha, beta, U, ldu,
-                    V, ldv, Q, ldq,
-                    work, lwork, rwork, iwork,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            if m - k[1] - l[1] >= 0
-                R = triu(A[1:k[1] + l[1],n - k[1] - l[1] + 1:n])
-            else
-                R = triu([A[1:m, n - k[1] - l[1] + 1:n]; B[m - k[1] + 1:l[1], n - k[1] - l[1] + 1:n]])
-            end
-            return U, V, Q, alpha, beta, k[1], l[1], R
-        end
-    end
-end
-
-"""
-    ggsvd3!(jobu, jobv, jobq, A, B) -> (U, V, Q, alpha, beta, k, l, R)
-
-Finds the generalized singular value decomposition of `A` and `B`, `U'*A*Q = D1*R`
-and `V'*B*Q = D2*R`. `D1` has `alpha` on its diagonal and `D2` has `beta` on its
-diagonal. If `jobu = U`, the orthogonal/unitary matrix `U` is computed. If
-`jobv = V` the orthogonal/unitary matrix `V` is computed. If `jobq = Q`,
-the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv`, or `jobq` is
-`N`, that matrix is not computed. This function requires LAPACK 3.6.0.
-"""
-ggsvd3!
-
-## Expert driver and generalized eigenvalue problem
-for (geevx, ggev, ggev3, elty) in
-    ((:dgeevx_,:dggev_,:dggev3_,:Float64),
-     (:sgeevx_,:sggev_,:sggev3_,:Float32))
-    @eval begin
-        #     SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
-        #                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
-        #                          RCONDE, RCONDV, WORK, LWORK, IWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          BALANC, JOBVL, JOBVR, SENSE
-        #       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
-        #       DOUBLE PRECISION   ABNRM
-        #       ..
-        #       .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), RCONDE( * ), RCONDV( * ),
-        #      $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WI( * ), WORK( * ), WR( * )
-        function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            wr = similar(A, $elty, n)
-            wi = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
-            end
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 0
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            VL = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 0
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            VR = similar(A, $elty, ldvr, n)
-            ilo = Ref{BlasInt}()
-            ihi = Ref{BlasInt}()
-            scale = similar(A, $elty, n)
-            abnrm = Ref{$elty}()
-            rconde = similar(A, $elty, n)
-            rcondv = similar(A, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iworksize = 0
-            if sense == 'N' || sense == 'E'
-                iworksize = 0
-            elseif sense == 'V' || sense == 'B'
-                iworksize = 2*n - 2
-            else
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
-            end
-            iwork = Vector{BlasInt}(undef, iworksize)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                       Clong, Clong, Clong, Clong),
-                       balanc, jobvl, jobvr, sense,
-                       n, A, lda, wr,
-                       wi, VL, max(1,ldvl), VR,
-                       max(1,ldvr), ilo, ihi, scale,
-                       abnrm, rconde, rcondv, work,
-                       lwork, iwork, info,
-                       1, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            A, wr, wi, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv
-        end
-
-        #       SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
-        #      $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
-        #      $                   VR( LDVR, * ), WORK( * )
-        function ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n, m = checksquare(A,B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alphar,
-                    alphai, beta, vl, ldvl,
-                    vr, ldvr, work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alphar, alphai, beta, vl, vr
-        end
-
-        #       SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
-        #      $                   BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
-        #      $                   VR( LDVR, * ), WORK( * )
-        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n, m = checksquare(A,B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alphar,
-                    alphai, beta, vl, ldvl,
-                    vr, ldvr, work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alphar, alphai, beta, vl, vr
-        end
-    end
-end
-
-for (geevx, ggev, ggev3, elty, relty) in
-    ((:zgeevx_,:zggev_,:zggev3_,:ComplexF64,:Float64),
-     (:cgeevx_,:cggev_,:cggev3_,:ComplexF32,:Float32))
-    @eval begin
-        #     SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
-        #                          LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
-        #                          RCONDV, WORK, LWORK, RWORK, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          BALANC, JOBVL, JOBVR, SENSE
-        #       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
-        #       DOUBLE PRECISION   ABNRM
-        #       ..
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   RCONDE( * ), RCONDV( * ), RWORK( * ),
-        #      $                   SCALE( * )
-        #       COMPLEX*16         A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   W( * ), WORK( * )
-        function geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix{$elty})
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            lda = max(1,stride(A,2))
-            w = similar(A, $elty, n)
-            if balanc ∉ ['N', 'P', 'S', 'B']
-                throw(ArgumentError("balanc must be 'N', 'P', 'S', or 'B', but $balanc was passed"))
-            end
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 0
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            VL = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 0
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            if sense ∉ ['N','E','V','B']
-                throw(ArgumentError("sense must be 'N', 'E', 'V' or 'B', but $sense was passed"))
-            end
-            VR = similar(A, $elty, ldvr, n)
-            ilo = Ref{BlasInt}()
-            ihi = Ref{BlasInt}()
-            scale = similar(A, $relty, n)
-            abnrm = Ref{$relty}()
-            rconde = similar(A, $relty, n)
-            rcondv = similar(A, $relty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{$relty},
-                       Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ref{BlasInt}, Clong, Clong, Clong, Clong),
-                       balanc, jobvl, jobvr, sense,
-                       n, A, lda, w,
-                       VL, max(1,ldvl), VR, max(1,ldvr),
-                       ilo, ihi, scale, abnrm,
-                       rconde, rcondv, work, lwork,
-                       rwork, info, 1, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            A, w, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv
-        end
-
-        # SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
-        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WORK( * )
-        function ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alpha,
-                    beta, vl, ldvl, vr,
-                    ldvr, work, lwork, rwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alpha, beta, vl, vr
-        end
-
-        # SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
-        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVL, JOBVR
-        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
-        #      $                   WORK( * )
-        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvl = 0
-            if jobvl == 'V'
-                ldvl = n
-            elseif jobvl == 'N'
-                ldvl = 1
-            else
-                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
-            end
-            vl = similar(A, $elty, ldvl, n)
-            ldvr = 0
-            if jobvr == 'V'
-                ldvr = n
-            elseif jobvr == 'N'
-                ldvr = 1
-            else
-                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
-            end
-            vr = similar(A, $elty, ldvr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Clong, Clong),
-                    jobvl, jobvr, n, A,
-                    lda, B, ldb, alpha,
-                    beta, vl, ldvl, vr,
-                    ldvr, work, lwork, rwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                end
-            end
-            alpha, beta, vl, vr
-        end
-    end
-end
-
-"""
-    geevx!(balanc, jobvl, jobvr, sense, A) -> (A, w, VL, VR, ilo, ihi, scale, abnrm, rconde, rcondv)
-
-Finds the eigensystem of `A` with matrix balancing. If `jobvl = N`, the
-left eigenvectors of `A` aren't computed. If `jobvr = N`, the right
-eigenvectors of `A` aren't computed. If `jobvl = V` or `jobvr = V`, the
-corresponding eigenvectors are computed. If `balanc = N`, no balancing is
-performed. If `balanc = P`, `A` is permuted but not scaled. If
-`balanc = S`, `A` is scaled but not permuted. If `balanc = B`, `A` is
-permuted and scaled. If `sense = N`, no reciprocal condition numbers are
-computed. If `sense = E`, reciprocal condition numbers are computed for
-the eigenvalues only. If `sense = V`, reciprocal condition numbers are
-computed for the right eigenvectors only. If `sense = B`, reciprocal
-condition numbers are computed for the right eigenvectors and the
-eigenvectors. If `sense = E,B`, the right and left eigenvectors must be
-computed.
-"""
-geevx!(balanc::AbstractChar, jobvl::AbstractChar, jobvr::AbstractChar, sense::AbstractChar, A::AbstractMatrix)
-
-"""
-    ggev!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
-
-Finds the generalized eigendecomposition of `A` and `B`. If `jobvl = N`,
-the left eigenvectors aren't computed. If `jobvr = N`, the right
-eigenvectors aren't computed. If `jobvl = V` or `jobvr = V`, the
-corresponding eigenvectors are computed.
-"""
-ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-"""
-    ggev3!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
-
-Finds the generalized eigendecomposition of `A` and `B` using a blocked
-algorithm. If `jobvl = N`, the left eigenvectors aren't computed. If
-`jobvr = N`, the right eigenvectors aren't computed. If `jobvl = V` or
-`jobvr = V`, the corresponding eigenvectors are computed.  This function
-requires LAPACK 3.6.0.
-"""
-ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-# One step incremental condition estimation of max/min singular values
-for (laic1, elty) in
-    ((:dlaic1_,:Float64),
-     (:slaic1_,:Float32))
-    @eval begin
-        #  SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
-        #
-        #  .. Scalar Arguments ..
-        #  INTEGER            J, JOB
-        #  DOUBLE PRECISION   C, GAMMA, S, SEST, SESTPR
-        #  ..
-        #  .. Array Arguments ..
-        #  DOUBLE PRECISION   W( J ), X( J )
-        function laic1!(job::Integer, x::AbstractVector{$elty},
-                        sest::$elty, w::AbstractVector{$elty}, gamma::$elty)
-            require_one_based_indexing(x, w)
-            j = length(x)
-            if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
-            end
-            sestpr = Vector{$elty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$elty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{$elty},
-                 Ptr{$elty}),
-                job, j, x, sest,
-                w, gamma, sestpr, s,
-                c)
-            sestpr[1], s[1], c[1]
-        end
-    end
-end
-for (laic1, elty, relty) in
-    ((:zlaic1_,:ComplexF64,:Float64),
-     (:claic1_,:ComplexF32,:Float32))
-    @eval begin
-       #  SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
-       #
-       #  .. Scalar Arguments ..
-       #  INTEGER            J, JOB
-       #  DOUBLE PRECISION   SEST, SESTPR
-       #  COMPLEX*16         C, GAMMA, S
-       #  ..
-       #  .. Array Arguments ..
-       #  COMPLEX*16         W( J ), X( J )
-        function laic1!(job::Integer, x::AbstractVector{$elty},
-                        sest::$relty, w::AbstractVector{$elty}, gamma::$elty)
-            require_one_based_indexing(x, w)
-            j = length(x)
-            if j != length(w)
-                throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
-            end
-            sestpr = Vector{$relty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$relty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$relty}, Ptr{$elty},
-                 Ptr{$elty}),
-                job, j, x, sest,
-                w, gamma, sestpr, s,
-                c)
-            sestpr[1], s[1], c[1]
-        end
-    end
-end
-
-# (GT) General tridiagonal, decomposition, solver and direct solver
-for (gtsv, gttrf, gttrs, elty) in
-    ((:dgtsv_,:dgttrf_,:dgttrs_,:Float64),
-     (:sgtsv_,:sgttrf_,:sgttrs_,:Float32),
-     (:zgtsv_,:zgttrf_,:zgttrs_,:ComplexF64),
-     (:cgtsv_,:cgttrf_,:cgttrs_,:ComplexF32))
-    @eval begin
-        #       SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * )
-        function gtsv!(dl::AbstractVector{$elty}, d::AbstractVector{$elty}, du::AbstractVector{$elty},
-                       B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(dl, d, du, B)
-            chkstride1(B, dl, d, du)
-            n = length(d)
-            if !(n >= length(dl) >= n - 1)
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $n or $(n - 1)"))
-            end
-            if !(n >= length(du) >= n - 1)
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $n or $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
-            end
-            if n == 0
-                return B # Early exit if possible
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gtsv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), dl, d, du, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-
-        #       SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, N
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   D( * ), DL( * ), DU( * ), DU2( * )
-        function gttrf!(dl::AbstractVector{$elty}, d::AbstractVector{$elty}, du::AbstractVector{$elty})
-            require_one_based_indexing(dl, d, du)
-            chkstride1(dl,d,du)
-            n    = length(d)
-            if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
-            end
-            if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
-            end
-            du2  = similar(d, $elty, n-2)
-            ipiv = similar(d, BlasInt, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ptr{BlasInt}, Ptr{BlasInt}),
-                  n, dl, d, du, du2, ipiv, info)
-            chklapackerror(info[])
-            dl, d, du, du2, ipiv
-        end
-
-        #       SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          TRANS
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
-        function gttrs!(trans::AbstractChar, dl::AbstractVector{$elty}, d::AbstractVector{$elty},
-                        du::AbstractVector{$elty}, du2::AbstractVector{$elty}, ipiv::AbstractVector{BlasInt},
-                        B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(dl, d, du, du2, ipiv, B)
-            chktrans(trans)
-            chkstride1(B, ipiv, dl, d, du, du2)
-            n = length(d)
-            if length(dl) != n - 1
-                throw(DimensionMismatch("subdiagonal has length $(length(dl)), but should be $(n - 1)"))
-            end
-            if length(du) != n - 1
-                throw(DimensionMismatch("superdiagonal has length $(length(du)), but should be $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrs), libblastrampoline), Cvoid,
-                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                    Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                   trans, n, size(B,2), dl, d, du, du2, ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-         end
-    end
-end
-
-"""
-    gtsv!(dl, d, du, B)
-
-Solves the equation `A * X = B` where `A` is a tridiagonal matrix with
-`dl` on the subdiagonal, `d` on the diagonal, and `du` on the
-superdiagonal.
-
-Overwrites `B` with the solution `X` and returns it.
-"""
-gtsv!(dl::AbstractVector, d::AbstractVector, du::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    gttrf!(dl, d, du) -> (dl, d, du, du2, ipiv)
-
-Finds the `LU` factorization of a tridiagonal matrix with `dl` on the
-subdiagonal, `d` on the diagonal, and `du` on the superdiagonal.
-
-Modifies `dl`, `d`, and `du` in-place and returns them and the second
-superdiagonal `du2` and the pivoting vector `ipiv`.
-"""
-gttrf!(dl::AbstractVector, d::AbstractVector, du::AbstractVector)
-
-"""
-    gttrs!(trans, dl, d, du, du2, ipiv, B)
-
-Solves the equation `A * X = B` (`trans = N`), `transpose(A) * X = B` (`trans = T`),
-or `adjoint(A) * X = B` (`trans = C`) using the `LU` factorization computed by
-`gttrf!`. `B` is overwritten with the solution `X`.
-"""
-gttrs!(trans::AbstractChar, dl::AbstractVector, d::AbstractVector, du::AbstractVector, du2::AbstractVector,
-       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-## (OR) orthogonal (or UN, unitary) matrices, extractors and multiplication
-for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
-    ((:dorglq_,:dorgqr_,:dorgql_,:dorgrq_,:dormlq_,:dormqr_,:dormql_,:dormrq_,:dgemqrt_,:Float64),
-     (:sorglq_,:sorgqr_,:sorgql_,:sorgrq_,:sormlq_,:sormqr_,:sormql_,:sormrq_,:sgemqrt_,:Float32),
-     (:zunglq_,:zungqr_,:zungql_,:zungrq_,:zunmlq_,:zunmqr_,:zunmql_,:zunmrq_,:zgemqrt_,:ComplexF64),
-     (:cunglq_,:cungqr_,:cungql_,:cungrq_,:cunmlq_,:cunmqr_,:cunmql_,:cunmrq_,:cgemqrt_,:ComplexF32))
-    @eval begin
-        # SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orglq!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            n = size(A, 2)
-            m = min(n, size(A, 1))
-            if k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orglq), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A, max(1,stride(A,2)), tau, work, lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if m < size(A,1)
-                A[1:m,:]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgqr!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m = size(A, 1)
-            n = min(m, size(A, 2))
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgqr), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if n < size(A,2)
-                A[:,1:n]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgql!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m = size(A, 1)
-            n = min(m, size(A, 2))
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgql), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            if n < size(A,2)
-                A[:,1:n]
-            else
-                A
-            end
-        end
-
-        # SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       INTEGER            INFO, K, LDA, LWORK, M, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgrq!(A::AbstractMatrix{$elty}, tau::AbstractVector{$elty}, k::Integer = length(tau))
-            require_one_based_indexing(A, tau)
-            chkstride1(A,tau)
-            m, n = size(A)
-            if n < m
-                throw(DimensionMismatch("input matrix A has dimensions ($m,$n), but cannot have fewer columns than rows"))
-            end
-            if k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgrq), libblastrampoline), Cvoid,
-                      (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      m, n, k, A,
-                      max(1,stride(A,2)), tau, work, lwork,
-                      info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-
-        #      SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, LWORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormlq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nA = size(A, 2)
-            k   = length(tau)
-            if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $n, must equal the second dimension of A, $nA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormlq), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormqr!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            mA  = size(A, 1)
-            k   = length(tau)
-            if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormqr), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n,
-                      k, A, max(1,stride(A,2)), tau,
-                      C, max(1, stride(C,2)), work, lwork,
-                      info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormql!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            mA  = size(A, 1)
-            k   = length(tau)
-            if side == 'L' && m != mA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'R' && n != mA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormql), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                       Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n,
-                      k, A, max(1,stride(A,2)), tau,
-                      C, max(1, stride(C,2)), work, lwork,
-                      info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        #      SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
-        #                         WORK, LWORK, INFO )
-        #      .. Scalar Arguments ..
-        #      CHARACTER          SIDE, TRANS
-        #      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
-        function ormrq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, tau, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(A, C, tau)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nA  = size(A, 2)
-            k   = length(tau)
-            if side == 'L' && m != nA
-                throw(DimensionMismatch("for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'R' && n != nA
-                throw(DimensionMismatch("for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $nA"))
-            end
-            if side == 'L' && k > m
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= m = $m"))
-            end
-            if side == 'R' && k > n
-                throw(DimensionMismatch("invalid number of reflectors: k = $k should be <= n = $n"))
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrq), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-
-        function gemqrt!(side::AbstractChar, trans::AbstractChar, V::AbstractMatrix{$elty}, T::AbstractMatrix{$elty}, C::AbstractVecOrMat{$elty})
-            require_one_based_indexing(V, T, C)
-            chktrans(trans)
-            chkside(side)
-            chkstride1(V, T, C)
-            m,n = ndims(C) == 2 ? size(C) : (size(C, 1), 1)
-            nb, k = size(T)
-            if k == 0
-                return C
-            end
-            if side == 'L'
-                if !(0 <= k <= m)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $m"))
-                end
-                if m != size(V,1)
-                    throw(DimensionMismatch("first dimensions of C, $m, and V, $(size(V,1)) must match"))
-                end
-                ldv = stride(V,2)
-                if ldv < max(1, m)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
-                end
-                wss = n*k
-            elseif side == 'R'
-                if !(0 <= k <= n)
-                    throw(DimensionMismatch("wrong value for k = $k: must be between 0 and $n"))
-                end
-                if n != size(V,1)
-                    throw(DimensionMismatch("second dimension of C, $n, and first dimension of V, $(size(V,1)) must match"))
-                end
-                ldv = stride(V,2)
-                if ldv < max(1, n)
-                    throw(DimensionMismatch("Q and C don't fit! The stride of V, $ldv, is too small"))
-                end
-                wss = m*k
-            end
-            if !(1 <= nb <= k)
-                throw(DimensionMismatch("wrong value for nb = $nb, which must be between 1 and $k"))
-            end
-            ldc = stride(C, 2)
-            work = Vector{$elty}(undef, wss)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gemqrt), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                side, trans, m, n,
-                k, nb, V, ldv,
-                T, max(1,stride(T,2)), C, max(1,ldc),
-                work, info, 1, 1)
-            chklapackerror(info[])
-            return C
-        end
-    end
-end
-
-"""
-    orglq!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `LQ` factorization after calling
-`gelqf!` on `A`. Uses the output of `gelqf!`. `A` is overwritten by `Q`.
-"""
-orglq!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgqr!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `QR` factorization after calling
-`geqrf!` on `A`. Uses the output of `geqrf!`. `A` is overwritten by `Q`.
-"""
-orgqr!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgql!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `QL` factorization after calling
-`geqlf!` on `A`. Uses the output of `geqlf!`. `A` is overwritten by `Q`.
-"""
-orgql!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    orgrq!(A, tau, k = length(tau))
-
-Explicitly finds the matrix `Q` of a `RQ` factorization after calling
-`gerqf!` on `A`. Uses the output of `gerqf!`. `A` is overwritten by `Q`.
-"""
-orgrq!(A::AbstractMatrix, tau::AbstractVector, k::Integer = length(tau))
-
-"""
-    ormlq!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `LQ` factorization of `A` computed using
-`gelqf!`. `C` is overwritten.
-"""
-ormlq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormqr!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QR` factorization of `A` computed using
-`geqrf!`. `C` is overwritten.
-"""
-ormqr!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormql!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QL` factorization of `A` computed using
-`geqlf!`. `C` is overwritten.
-"""
-ormql!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    ormrq!(side, trans, A, tau, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `RQ` factorization of `A` computed using
-`gerqf!`. `C` is overwritten.
-"""
-ormrq!(side::AbstractChar, trans::AbstractChar, A::AbstractMatrix, tau::AbstractVector, C::AbstractVecOrMat)
-
-"""
-    gemqrt!(side, trans, V, T, C)
-
-Computes `Q * C` (`trans = N`), `transpose(Q) * C` (`trans = T`), `adjoint(Q) * C`
-(`trans = C`) for `side = L` or the equivalent right-sided multiplication
-for `side = R` using `Q` from a `QR` factorization of `A` computed using
-`geqrt!`. `C` is overwritten.
-"""
-gemqrt!(side::AbstractChar, trans::AbstractChar, V::AbstractMatrix, T::AbstractMatrix, C::AbstractVecOrMat)
-
-# (PO) positive-definite symmetric matrices,
-for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
-    ((:dposv_,:dpotrf_,:dpotri_,:dpotrs_,:dpstrf_,:Float64,:Float64),
-     (:sposv_,:spotrf_,:spotri_,:spotrs_,:spstrf_,:Float32,:Float32),
-     (:zposv_,:zpotrf_,:zpotri_,:zpotrs_,:zpstrf_,:ComplexF64,:Float64),
-     (:cposv_,:cpotrf_,:cpotri_,:cpotrs_,:cpstrf_,:ComplexF32,:Float32))
-    @eval begin
-        #     SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function posv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($posv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), B, max(1,stride(B,2)), info, 1)
-            chkargsok(info[])
-            chkposdef(info[])
-            A, B
-        end
-
-        # SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * )
-        function potrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            checksquare(A)
-            chkuplo(uplo)
-            lda = max(1,stride(A,2))
-            if lda == 0
-                return A, 0
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potrf), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, size(A,1), A, lda, info, 1)
-            chkargsok(info[])
-            #info[] > 0 means the leading minor of order info[] is not positive definite
-            #ordinarily, throw Exception here, but return error code here
-            #this simplifies isposdef! and factorize
-            return A, info[] # info stored in Cholesky
-        end
-
-        #       SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * )
-        function potri!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            chkuplo(uplo)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, size(A,1), A, max(1,stride(A,2)), info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #     SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, LDB, N, NRHS
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function potrs!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A, B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            nrhs = size(B,2)
-            if size(B,1) != n
-                throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
-            end
-            lda = max(1,stride(A,2))
-            if lda == 0 || nrhs == 0
-                return B
-            end
-            ldb = max(1,stride(B,2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($potrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                   uplo, n, nrhs, A,
-                   lda, B, ldb, info, 1)
-            chklapackerror(info[])
-            return B
-        end
-
-        #       SUBROUTINE DPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO )
-        #       .. Scalar Arguments ..
-        #       DOUBLE PRECISION   TOL
-        #       INTEGER            INFO, LDA, N, RANK
-        #       CHARACTER          UPLO
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( 2*N )
-        #       INTEGER            PIV( N )
-        function pstrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tol::Real)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            piv  = similar(A, BlasInt, n)
-            rank = Vector{BlasInt}(undef, 1)
-            work = Vector{$rtyp}(undef, 2n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pstrf), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Ptr{BlasInt}, Ref{$rtyp}, Ptr{$rtyp}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), piv, rank, tol, work, info, 1)
-            chkargsok(info[])
-            A, piv, rank[1], info[] #Stored in CholeskyPivoted
-        end
-    end
-end
-
-"""
-    posv!(uplo, A, B) -> (A, B)
-
-Finds the solution to `A * X = B` where `A` is a symmetric or Hermitian
-positive definite matrix. If `uplo = U` the upper Cholesky decomposition
-of `A` is computed. If `uplo = L` the lower Cholesky decomposition of `A`
-is computed. `A` is overwritten by its Cholesky decomposition. `B` is
-overwritten with the solution `X`.
-"""
-posv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    potrf!(uplo, A)
-
-Computes the Cholesky (upper if `uplo = U`, lower if `uplo = L`)
-decomposition of positive-definite matrix `A`. `A` is overwritten and
-returned with an info code.
-"""
-potrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    potri!(uplo, A)
-
-Computes the inverse of positive-definite matrix `A` after calling
-`potrf!` to find its (upper if `uplo = U`, lower if `uplo = L`) Cholesky
-decomposition.
-
-`A` is overwritten by its inverse and returned.
-"""
-potri!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    potrs!(uplo, A, B)
-
-Finds the solution to `A * X = B` where `A` is a symmetric or Hermitian
-positive definite matrix whose Cholesky decomposition was computed by
-`potrf!`. If `uplo = U` the upper Cholesky decomposition of `A` was
-computed. If `uplo = L` the lower Cholesky decomposition of `A` was
-computed. `B` is overwritten with the solution `X`.
-"""
-potrs!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    pstrf!(uplo, A, tol) -> (A, piv, rank, info)
-
-Computes the (upper if `uplo = U`, lower if `uplo = L`) pivoted Cholesky
-decomposition of positive-definite matrix `A` with a user-set tolerance
-`tol`. `A` is overwritten by its Cholesky decomposition.
-
-Returns `A`, the pivots `piv`, the rank of `A`, and an `info` code. If `info = 0`,
-the factorization succeeded. If `info = i > 0 `, then `A` is indefinite or
-rank-deficient.
-"""
-pstrf!(uplo::AbstractChar, A::AbstractMatrix, tol::Real)
-
-# (PT) positive-definite, symmetric, tri-diagonal matrices
-# Direct solvers for general tridiagonal and symmetric positive-definite tridiagonal
-for (ptsv, pttrf, elty, relty) in
-    ((:dptsv_,:dpttrf_,:Float64,:Float64),
-     (:sptsv_,:spttrf_,:Float32,:Float32),
-     (:zptsv_,:zpttrf_,:ComplexF64,:Float64),
-     (:cptsv_,:cpttrf_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
-        function ptsv!(D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($ptsv), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), D, E, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-
-        #       SUBROUTINE DPTTRF( N, D, E, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, N
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   D( * ), E( * )
-        function pttrf!(D::AbstractVector{$relty}, E::AbstractVector{$elty})
-            require_one_based_indexing(D, E)
-            chkstride1(D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrf), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ptr{BlasInt}),
-                  n, D, E, info)
-            chklapackerror(info[])
-            D, E
-        end
-    end
-end
-
-"""
-    ptsv!(D, E, B)
-
-Solves `A * X = B` for positive-definite tridiagonal `A`. `D` is the
-diagonal of `A` and `E` is the off-diagonal. `B` is overwritten with the
-solution `X` and returned.
-"""
-ptsv!(D::AbstractVector, E::AbstractVector, B::AbstractVecOrMat)
-
-"""
-    pttrf!(D, E)
-
-Computes the LDLt factorization of a positive-definite tridiagonal matrix
-with `D` as diagonal and `E` as off-diagonal. `D` and `E` are overwritten
-and returned.
-"""
-pttrf!(D::AbstractVector, E::AbstractVector)
-
-for (pttrs, elty, relty) in
-    ((:dpttrs_,:Float64,:Float64),
-     (:spttrs_,:Float32,:Float32))
-    @eval begin
-        #       SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO )
-        #       .. Scalar Arguments ..
-        #       INTEGER            INFO, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
-        function pttrs!(D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
-                  (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  n, size(B,2), D, E, B, max(1,stride(B,2)), info)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (pttrs, elty, relty) in
-    ((:zpttrs_,:ComplexF64,:Float64),
-     (:cpttrs_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   D( * )
-        #       COMPLEX*16         B( LDB, * ), E( * )
-        function pttrs!(uplo::AbstractChar, D::AbstractVector{$relty}, E::AbstractVector{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(D, E, B)
-            chkstride1(B, D, E)
-            chkuplo(uplo)
-            n = length(D)
-            if length(E) != n - 1
-                throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
-            end
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), D, E, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    pttrs!(D, E, B)
-
-Solves `A * X = B` for positive-definite tridiagonal `A` with diagonal
-`D` and off-diagonal `E` after computing `A`'s LDLt factorization using
-`pttrf!`. `B` is overwritten with the solution `X`.
-"""
-pttrs!(D::AbstractVector, E::AbstractVector, B::AbstractVecOrMat)
-
-## (TR) triangular matrices: solver and inverse
-for (trtri, trtrs, elty) in
-    ((:dtrtri_,:dtrtrs_,:Float64),
-     (:strtri_,:strtrs_,:Float32),
-     (:ztrtri_,:ztrtrs_,:ComplexF64),
-     (:ctrtri_,:ctrtrs_,:ComplexF32))
-    @eval begin
-        #     SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          DIAG, UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      DOUBLE PRECISION   A( LDA, * )
-        function trtri!(uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkdiag(diag)
-            lda = max(1,stride(A, 2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trtri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong, Clong),
-                  uplo, diag, n, A, lda, info, 1, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #      SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          DIAG, TRANS, UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function trtrs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                        A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chktrans(trans)
-            chkdiag(diag)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trtrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  uplo, trans, diag, n, size(B,2), A, max(1,stride(A,2)),
-                  B, max(1,stride(B,2)), info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-"""
-    trtri!(uplo, diag, A)
-
-Finds the inverse of (upper if `uplo = U`, lower if `uplo = L`)
-triangular matrix `A`. If `diag = N`, `A` has non-unit diagonal elements.
-If `diag = U`, all diagonal elements of `A` are one. `A` is overwritten
-with its inverse.
-"""
-trtri!(uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix)
-
-"""
-    trtrs!(uplo, trans, diag, A, B)
-
-Solves `A * X = B` (`trans = N`), `transpose(A) * X = B` (`trans = T`), or
-`adjoint(A) * X = B` (`trans = C`) for (upper if `uplo = U`, lower if `uplo = L`)
-triangular matrix `A`. If `diag = N`, `A` has non-unit diagonal elements.
-If `diag = U`, all diagonal elements of `A` are one. `B` is overwritten
-with the solution `X`.
-"""
-trtrs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-#Eigenvector computation and condition number estimation
-for (trcon, trevc, trrfs, elty) in
-    ((:dtrcon_,:dtrevc_,:dtrrfs_,:Float64),
-     (:strcon_,:strevc_,:strrfs_,:Float32))
-    @eval begin
-        # SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
-        #                  IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, NORM, UPLO
-        # INTEGER            INFO, LDA, N
-        # DOUBLE PRECISION   RCOND
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkdiag(diag)
-            n = checksquare(A)
-            chkuplo(uplo)
-            rcond = Ref{$elty}()
-            work  = Vector{$elty}(undef, 3n)
-            iwork = Vector{BlasInt}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, iwork, info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-
-        # SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
-        #                    LDVR, MM, M, WORK, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          HOWMNY, SIDE
-        # INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
-        # ..
-        # .. Array Arguments ..
-        # LOGICAL            SELECT( * )
-        # DOUBLE PRECISION   T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #$                   WORK( * )
-        function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
-                        VL::AbstractMatrix{$elty} = similar(T),
-                        VR::AbstractMatrix{$elty} = similar(T))
-            require_one_based_indexing(select, T, VL, VR)
-            # Extract
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
-            end
-            n, mm = checksquare(T), size(VL, 2)
-            ldt, ldvl, ldvr = stride(T, 2), stride(VL, 2), stride(VR, 2)
-
-            # Check
-            chkstride1(T, select, VL, VR)
-
-            # Allocate
-            m = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 3n)
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                side, howmny, select, n,
-                T, ldt, VL, ldvl,
-                VR, ldvr, mm, m,
-                work, info, 1, 1)
-            chklapackerror(info[])
-
-            #Decide what exactly to return
-            if howmny == 'S' #compute selected eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return select, VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return select, VR[:,1:m[]]
-                else #side == 'B' #both eigenvectors
-                    return select, VL[:,1:m[]], VR[:,1:m[]]
-                end
-            else #compute all eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return VR[:,1:m[]]
-                else #side == 'B' #both eigenvectors
-                    return VL[:,1:m[]], VR[:,1:m[]]
-                end
-            end
-        end
-
-        # SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
-        #                    LDX, FERR, BERR, WORK, IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, TRANS, UPLO
-        # INTEGER            INFO, LDA, LDB, LDX, N, NRHS
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
-        #$                   WORK( * ), X( LDX, * )
-        function trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, X::AbstractVecOrMat{$elty},
-                Ferr::AbstractVector{$elty} = similar(B, $elty, size(B,2)),
-                Berr::AbstractVector{$elty} = similar(B, $elty, size(B,2)))
-            require_one_based_indexing(A, B, X, Ferr, Berr)
-            chkstride1(A, B, X, Ferr, Berr)
-            chktrans(trans)
-            chkuplo(uplo)
-            chkdiag(diag)
-            n = size(A,2)
-            nrhs = size(B,2)
-            if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
-            end
-            work = Vector{$elty}(undef, 3n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-                uplo, trans, diag, n,
-                nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, iwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            Ferr, Berr
-        end
-    end
-end
-
-for (trcon, trevc, trrfs, elty, relty) in
-    ((:ztrcon_,:ztrevc_,:ztrrfs_,:ComplexF64,:Float64),
-     (:ctrcon_,:ctrevc_,:ctrrfs_,:ComplexF32, :Float32))
-    @eval begin
-        # SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
-        #                   RWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, NORM, UPLO
-        # INTEGER            INFO, LDA, N
-        # DOUBLE PRECISION   RCOND
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   RWORK( * )
-        # COMPLEX*16         A( LDA, * ), WORK( * )
-        function trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkdiag(diag)
-            rcond = Ref{$relty}(1)
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt},
-                   Clong, Clong, Clong),
-                  norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, rwork, info,
-                  1, 1, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-
-        # SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
-        #                    LDVR, MM, M, WORK, RWORK, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          HOWMNY, SIDE
-        # INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
-        # ..
-        # .. Array Arguments ..
-        # LOGICAL            SELECT( * )
-        # DOUBLE PRECISION   RWORK( * )
-        # COMPLEX*16         T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
-        #$                   WORK( * )
-        function trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty},
-                        VL::AbstractMatrix{$elty} = similar(T),
-                        VR::AbstractMatrix{$elty} = similar(T))
-            require_one_based_indexing(select, T, VL, VR)
-            # Extract
-            n, mm = checksquare(T), size(VL, 2)
-            ldt, ldvl, ldvr = stride(T, 2), stride(VL, 2), stride(VR, 2)
-
-            # Check
-            chkstride1(T, select, VL, VR)
-            if side ∉ ['L','R','B']
-                throw(ArgumentError("side argument must be 'L' (left eigenvectors), 'R' (right eigenvectors), or 'B' (both), got $side"))
-            end
-
-            # Allocate
-            m = Ref{BlasInt}()
-            work = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                side, howmny, select, n,
-                T, ldt, VL, ldvl,
-                VR, ldvr, mm, m,
-                work, rwork, info, 1, 1)
-            chklapackerror(info[])
-
-            #Decide what exactly to return
-            if howmny == 'S' #compute selected eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return select, VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return select, VR[:,1:m[]]
-                else #side=='B' #both eigenvectors
-                    return select, VL[:,1:m[]], VR[:,1:m[]]
-                end
-            else #compute all eigenvectors
-                if side == 'L' #left eigenvectors only
-                    return VL[:,1:m[]]
-                elseif side == 'R' #right eigenvectors only
-                    return VR[:,1:m[]]
-                else #side=='B' #both eigenvectors
-                    return VL[:,1:m[]], VR[:,1:m[]]
-                end
-            end
-        end
-
-        # SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
-        #                    LDX, FERR, BERR, WORK, IWORK, INFO )
-        # .. Scalar Arguments ..
-        # CHARACTER          DIAG, TRANS, UPLO
-        # INTEGER            INFO, LDA, LDB, LDX, N, NRHS
-        # .. Array Arguments ..
-        # INTEGER            IWORK( * )
-        # DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
-        #$                   WORK( * ), X( LDX, * )
-        function trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar,
-                        A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty}, X::AbstractVecOrMat{$elty},
-                        Ferr::AbstractVector{$relty} = similar(B, $relty, size(B,2)),
-                        Berr::AbstractVector{$relty} = similar(B, $relty, size(B,2)))
-            require_one_based_indexing(A, B, X, Ferr, Berr)
-            chkstride1(A, B, X, Ferr, Berr)
-            chktrans(trans)
-            chkuplo(uplo)
-            chkdiag(diag)
-            n = size(A,2)
-            nrhs = size(B,2)
-            if nrhs != size(X,2)
-                throw(DimensionMismatch("second dimensions of B, $nrhs, and X, $(size(X,2)), must match"))
-            end
-            work  = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
-                uplo, trans, diag, n,
-                nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, rwork, info, 1, 1, 1)
-            chklapackerror(info[])
-            Ferr, Berr
-        end
-    end
-end
-
-"""
-    trcon!(norm, uplo, diag, A)
-
-Finds the reciprocal condition number of (upper if `uplo = U`, lower if
-`uplo = L`) triangular matrix `A`. If `diag = N`, `A` has non-unit
-diagonal elements. If `diag = U`, all diagonal elements of `A` are one.
-If `norm = I`, the condition number is found in the infinity norm. If
-`norm = O` or `1`, the condition number is found in the one norm.
-"""
-trcon!(norm::AbstractChar, uplo::AbstractChar, diag::AbstractChar, A::AbstractMatrix)
-
-"""
-    trevc!(side, howmny, select, T, VL = similar(T), VR = similar(T))
-
-Finds the eigensystem of an upper triangular matrix `T`. If `side = R`,
-the right eigenvectors are computed. If `side = L`, the left
-eigenvectors are computed. If `side = B`, both sets are computed. If
-`howmny = A`, all eigenvectors are found. If `howmny = B`, all
-eigenvectors are found and backtransformed using `VL` and `VR`. If
-`howmny = S`, only the eigenvectors corresponding to the values in
-`select` are computed.
-"""
-trevc!(side::AbstractChar, howmny::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix,
-        VL::AbstractMatrix = similar(T), VR::AbstractMatrix = similar(T))
-
-"""
-    trrfs!(uplo, trans, diag, A, B, X, Ferr, Berr) -> (Ferr, Berr)
-
-Estimates the error in the solution to `A * X = B` (`trans = N`),
-`transpose(A) * X = B` (`trans = T`), `adjoint(A) * X = B` (`trans = C`) for `side = L`,
-or the equivalent equations a right-handed `side = R` `X * A` after
-computing `X` using `trtrs!`. If `uplo = U`, `A` is upper triangular.
-If `uplo = L`, `A` is lower triangular. If `diag = N`, `A` has non-unit
-diagonal elements. If `diag = U`, all diagonal elements of `A` are one.
-`Ferr` and `Berr` are optional inputs. `Ferr` is the forward error and
-`Berr` is the backward error, each component-wise.
-"""
-trrfs!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat,
-       X::AbstractVecOrMat, Ferr::AbstractVector, Berr::AbstractVector)
-
-## (ST) Symmetric tridiagonal - eigendecomposition
-for (stev, stebz, stegr, stein, elty) in
-    ((:dstev_,:dstebz_,:dstegr_,:dstein_,:Float64),
-     (:sstev_,:sstebz_,:sstegr_,:sstein_,:Float32)
-#     , (:zstev_,:ComplexF64)  Need to rewrite for ZHEEV, rwork, etc.
-#     , (:cstev_,:ComplexF32)
-     )
-    @eval begin
-        function stev!(job::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            if length(ev) != n - 1 && length(ev) != n
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
-            end
-            Zmat = similar(dv, $elty, (n, job != 'N' ? n : 0))
-            work = Vector{$elty}(undef, max(1, 2n-2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($stev), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  job, n, dv, ev, Zmat, n, work, info, 1)
-            chklapackerror(info[])
-            dv, Zmat
-        end
-
-        #*  DSTEBZ computes the eigenvalues of a symmetric tridiagonal
-        #*  matrix T.  The user may ask for all eigenvalues, all eigenvalues
-        #*  in the half-open interval (VL, VU], or the IL-th through IU-th
-        #*  eigenvalues.
-        function stebz!(range::AbstractChar, order::AbstractChar, vl::$elty, vu::$elty, il::Integer, iu::Integer, abstol::Real, dv::AbstractVector{$elty}, ev::AbstractVector{$elty})
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
-            end
-            m = Ref{BlasInt}()
-            nsplit = Vector{BlasInt}(undef, 1)
-            w = similar(dv, $elty, n)
-            tmp = 0.0
-            iblock = similar(dv, BlasInt,n)
-            isplit = similar(dv, BlasInt,n)
-            work = Vector{$elty}(undef, 4*n)
-            iwork = Vector{BlasInt}(undef, 3*n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($stebz), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{$elty},
-                Ref{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
-                Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                range, order, n, vl,
-                vu, il, iu, abstol,
-                dv, ev, m, nsplit,
-                w, iblock, isplit, work,
-                iwork, info, 1, 1)
-            chklapackerror(info[])
-            w[1:m[]], iblock[1:m[]], isplit[1:nsplit[1]]
-        end
-
-        function stegr!(jobz::AbstractChar, range::AbstractChar, dv::AbstractVector{$elty}, ev::AbstractVector{$elty}, vl::Real, vu::Real, il::Integer, iu::Integer)
-            require_one_based_indexing(dv, ev)
-            chkstride1(dv, ev)
-            n = length(dv)
-            ne = length(ev)
-            if ne == n - 1
-                eev = [ev; zero($elty)]
-            elseif ne == n
-                eev = copy(ev)
-                eev[n] = zero($elty)
-            else
-                throw(DimensionMismatch("ev has length $ne but needs one less than or equal to dv's length, $n)"))
-            end
-
-            abstol = Vector{$elty}(undef, 1)
-            m = Ref{BlasInt}()
-            w = similar(dv, $elty, n)
-            ldz = jobz == 'N' ? 1 : n
-            Z = similar(dv, $elty, ldz, range == 'I' ? iu-il+1 : n)
-            isuppz = similar(dv, BlasInt, 2*size(Z, 2))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($stegr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{$elty}, Ref{$elty}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                    Clong, Clong),
-                    jobz, range, n, dv,
-                    eev, vl, vu, il,
-                    iu, abstol, m, w,
-                    Z, ldz, isuppz, work,
-                    lwork, iwork, liwork, info,
-                    1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            m[] == length(w) ? w : w[1:m[]], m[] == size(Z, 2) ? Z : Z[:,1:m[]]
-        end
-
-        function stein!(dv::AbstractVector{$elty}, ev_in::AbstractVector{$elty}, w_in::AbstractVector{$elty}, iblock_in::AbstractVector{BlasInt}, isplit_in::AbstractVector{BlasInt})
-            require_one_based_indexing(dv, ev_in, w_in, iblock_in, isplit_in)
-            chkstride1(dv, ev_in, w_in, iblock_in, isplit_in)
-            n = length(dv)
-            ne = length(ev_in)
-            if ne == n - 1
-                ev = [ev_in; zero($elty)]
-            elseif ne == n
-                ev = copy(ev_in)
-                ev[n] = zero($elty)
-            else
-                throw(DimensionMismatch("ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
-            end
-            ldz = n #Leading dimension
-            #Number of eigenvalues to find
-            if !(1 <= length(w_in) <= n)
-                throw(DimensionMismatch("w_in has length $(length(w_in)), but needs to be between 1 and $n"))
-            end
-            m = length(w_in)
-            #If iblock and isplit are invalid input, assume worst-case block partitioning,
-            # i.e. set the block scheme to be the entire matrix
-            iblock = similar(dv, BlasInt,n)
-            isplit = similar(dv, BlasInt,n)
-            w = similar(dv, $elty,n)
-            if length(iblock_in) < m #Not enough block specifications
-                iblock[1:m] = fill(BlasInt(1), m)
-                w[1:m] = sort(w_in)
-            else
-                iblock[1:m] = iblock_in
-                w[1:m] = w_in #Assume user has sorted the eigenvalues properly
-            end
-            if length(isplit_in) < 1 #Not enough block specifications
-                isplit[1] = n
-            else
-                isplit[1:length(isplit_in)] = isplit_in
-            end
-            z = similar(dv, $elty,(n,m))
-            work  = Vector{$elty}(undef, 5*n)
-            iwork = Vector{BlasInt}(undef, n)
-            ifail = Vector{BlasInt}(undef, m)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($stein), libblastrampoline), Cvoid,
-                (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Ptr{BlasInt}),
-                n, dv, ev, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info)
-            chklapackerror(info[])
-            if any(ifail .!= 0)
-                # TODO: better error message / type
-                error("failed to converge eigenvectors:\n$(findall(!iszero, ifail))")
-            end
-            z
-        end
-    end
-end
-stegr!(jobz::AbstractChar, dv::AbstractVector, ev::AbstractVector) = stegr!(jobz, 'A', dv, ev, 0.0, 0.0, 0, 0)
-
-# Allow user to skip specification of iblock and isplit
-stein!(dv::AbstractVector, ev::AbstractVector, w_in::AbstractVector) = stein!(dv, ev, w_in, zeros(BlasInt,0), zeros(BlasInt,0))
-# Allow user to specify just one eigenvector to get in stein!
-stein!(dv::AbstractVector, ev::AbstractVector, eval::Real) = stein!(dv, ev, [eval], zeros(BlasInt,0), zeros(BlasInt,0))
-
-"""
-    stev!(job, dv, ev) -> (dv, Zmat)
-
-Computes the eigensystem for a symmetric tridiagonal matrix with `dv` as
-diagonal and `ev` as off-diagonal. If `job = N` only the eigenvalues are
-found and returned in `dv`. If `job = V` then the eigenvectors are also found
-and returned in `Zmat`.
-"""
-stev!(job::AbstractChar, dv::AbstractVector, ev::AbstractVector)
-
-"""
-    stebz!(range, order, vl, vu, il, iu, abstol, dv, ev) -> (dv, iblock, isplit)
-
-Computes the eigenvalues for a symmetric tridiagonal matrix with `dv` as
-diagonal and `ev` as off-diagonal. If `range = A`, all the eigenvalues
-are found. If `range = V`, the eigenvalues in the half-open interval
-`(vl, vu]` are found. If `range = I`, the eigenvalues with indices between
-`il` and `iu` are found. If `order = B`, eigvalues are ordered within a
-block. If `order = E`, they are ordered across all the blocks.
-`abstol` can be set as a tolerance for convergence.
-"""
-stebz!(range::AbstractChar, order::AbstractChar, vl, vu, il::Integer, iu::Integer, abstol::Real, dv::AbstractVector, ev::AbstractVector)
-
-"""
-    stegr!(jobz, range, dv, ev, vl, vu, il, iu) -> (w, Z)
-
-Computes the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) for a symmetric tridiagonal matrix with `dv` as diagonal
-and `ev` as off-diagonal. If `range = A`, all the eigenvalues
-are found. If `range = V`, the eigenvalues in the half-open interval
-`(vl, vu]` are found. If `range = I`, the eigenvalues with indices between
-`il` and `iu` are found. The eigenvalues are returned in `w` and the eigenvectors
-in `Z`.
-"""
-stegr!(jobz::AbstractChar, range::AbstractChar, dv::AbstractVector, ev::AbstractVector, vl::Real, vu::Real, il::Integer, iu::Integer)
-
-"""
-    stein!(dv, ev_in, w_in, iblock_in, isplit_in)
-
-Computes the eigenvectors for a symmetric tridiagonal matrix with `dv`
-as diagonal and `ev_in` as off-diagonal. `w_in` specifies the input
-eigenvalues for which to find corresponding eigenvectors. `iblock_in`
-specifies the submatrices corresponding to the eigenvalues in `w_in`.
-`isplit_in` specifies the splitting points between the submatrix blocks.
-"""
-stein!(dv::AbstractVector, ev_in::AbstractVector, w_in::AbstractVector, iblock_in::AbstractVector{BlasInt}, isplit_in::AbstractVector{BlasInt})
-
-## (SY) symmetric real matrices - Bunch-Kaufman decomposition,
-## solvers (direct and factored) and inverse.
-for (syconv, sysv, sytrf, sytri, sytrs, elty) in
-    ((:dsyconv_,:dsysv_,:dsytrf_,:dsytri_,:dsytrs_,:Float64),
-     (:ssyconv_,:ssysv_,:ssytrf_,:ssytri_,:ssytrs_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYCONV( UPLO, WAY, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO, WAY
-        #       INTEGER            INFO, LDA, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
-            chklapackerror(info[])
-            A, work
-        end
-
-        #       SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #                         LWORK, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info,  1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, ipiv, info[]
-        end
-
-        #       SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-#         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 @assertargsok
-#                 chknonsingular(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-        #      SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #       SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function sytrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-# Rook-pivoting variants of symmetric-matrix algorithms
-for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
-    ((:dsysv_rook_,:dsytrf_rook_,:dsytri_rook_,:dsytrs_rook_,:dsyconvf_rook_,:Float64),
-     (:ssysv_rook_,:ssytrf_rook_,:ssytri_rook_,:ssytrs_rook_,:ssyconvf_rook_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYSV_ROOK(UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #                             LWORK, INFO )
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE DSYTRF_ROOK(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            return A, ipiv, info[]
-        end
-
-        #      SUBROUTINE DSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        #     .. Scalar Arguments ..
-        #      CHARACTER          UPLO
-        #      INTEGER            INFO, LDA, N
-        #     .. Array Arguments ..
-        #      INTEGER            IPIV( * )
-        #      DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chkargsok(info[])
-            chknonsingular(info[])
-            A
-        end
-
-        #       SUBROUTINE DSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        #
-        #       .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        #       .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
-        function sytrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        # SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          UPLO, WAY
-        # INTEGER            INFO, LDA, N
-        # ..
-        # .. Array Arguments ..
-        # INTEGER            IPIV( * )
-        # DOUBLE PRECISION   A( LDA, * ), E( * )
-        function syconvf_rook!(uplo::AbstractChar, way::AbstractChar,
-                                A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                                e::AbstractVector{$elty} = Vector{$elty}(undef, length(ipiv)))
-            require_one_based_indexing(A, ipiv, e)
-            # extract
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-
-            # check
-            chkuplo(uplo)
-            if way != 'C' && way != 'R'
-                throw(ArgumentError("way must be C or R"))
-            end
-            if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
-            end
-            if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
-            end
-
-            # allocate
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                 Clong, Clong),
-                uplo, way, n, A,
-                lda, e, ipiv, info,
-                1, 1)
-
-            chklapackerror(info[])
-            return A, e
-        end
-    end
-end
-
-## (SY) hermitian matrices - eigendecomposition, Bunch-Kaufman decomposition,
-## solvers (direct and factored) and inverse.
-for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
-    ((:zsyconv_,:zhesv_,:zhetrf_,:zhetri_,:zhetrs_,:ComplexF64, :Float64),
-     (:csyconv_,:chesv_,:chetrf_,:chetri_,:chetrs_,:ComplexF32, :Float32))
-    @eval begin
-       #   SUBROUTINE ZSYCONV( UPLO, WAY, N, A, LDA, IPIV, WORK, INFO )
-       #
-       #        .. Scalar Arguments ..
-       #        CHARACTER          UPLO, WAY
-       #        INTEGER            INFO, LDA, N
-       #        ..
-       #        .. Array Arguments ..
-       #        INTEGER            IPIV( * )
-       #        COMPLEX*16         A( LDA, * ), WORK( * )
-        function syconv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
-            chklapackerror(info[])
-            A, work
-        end
-
-        #       SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function hesv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-#       SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-# *     .. Scalar Arguments ..
-#       CHARACTER          UPLO
-#       INTEGER            INFO, LDA, LWORK, N
-# *     ..
-# *     .. Array Arguments ..
-#       INTEGER            IPIV( * )
-#       COMPLEX*16         A( LDA, * ), WORK( * )
-#         function hetri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 chklapackerror(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-
-        #       SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function hetrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (hesv, hetrf, hetri, hetrs, elty, relty) in
-    ((:zhesv_rook_,:zhetrf_rook_,:zhetri_rook_,:zhetrs_rook_,:ComplexF64, :Float64),
-     (:chesv_rook_,:chetrf_rook_,:chetri_rook_,:chetrs_rook_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function hesv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-        #       SUBROUTINE ZHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function hetrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                             ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (sysv, sytrf, sytri, sytrs, elty, relty) in
-    ((:zsysv_,:zsytrf_,:zsytri_,:zsytrs_,:ComplexF64, :Float64),
-     (:csysv_,:csytrf_,:csytri_,:csytrs_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #      $                  LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-#       SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-# *     .. Scalar Arguments ..
-#       CHARACTER          UPLO
-#       INTEGER            INFO, LDA, LWORK, N
-# *     ..
-# *     .. Array Arguments ..
-#       INTEGER            IPIV( * )
-#       COMPLEX*16         A( LDA, * ), WORK( * )
-#         function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::Vector{BlasInt})
-#             chkstride1(A)
-#             n = checksquare(A)
-#             chkuplo(uplo)
-#             work  = Vector{$elty}(undef, 1)
-#             lwork = BlasInt(-1)
-#             info  = Ref{BlasInt}()
-#             for i in 1:2
-#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-#                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
-#                 chklapackerror(info[])
-#                 if lwork < 0
-#                     lwork = BlasInt(real(work[1]))
-#                     work = Vector{$elty}(undef, lwork)
-#                 end
-#             end
-#             A
-#         end
-
-        #       SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytri!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function sytrs!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                       ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info,  1)
-            chklapackerror(info[])
-            B
-        end
-    end
-end
-
-for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
-    ((:zsysv_rook_,:zsytrf_rook_,:zsytri_rook_,:zsytrs_rook_,:zsyconvf_rook_,:ComplexF64, :Float64),
-     (:csysv_rook_,:csytrf_rook_,:csytri_rook_,:csytrs_rook_,:csyconvf_rook_,:ComplexF32, :Float32))
-    @eval begin
-        #       SUBROUTINE ZSYSV_ROOK(UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
-        #      $                      LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sysv_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, B)
-            chkstride1(A,B)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            ipiv  = similar(A, BlasInt, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info, 1)
-                chkargsok(info[])
-                chknonsingular(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            B, A, ipiv
-        end
-
-        #       SUBROUTINE ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytrf_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            ipiv = similar(A, BlasInt, n)
-            if n == 0
-                return A, ipiv, zero(BlasInt)
-            end
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, ipiv, info[]
-        end
-
-        #       SUBROUTINE ZSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytri_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
-            chkstride1(A, ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
-            chklapackerror(info[])
-            A
-        end
-
-        #       SUBROUTINE ZSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LDB, N, NRHS
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IPIV( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * )
-        function sytrs_rook!(uplo::AbstractChar, A::AbstractMatrix{$elty},
-                             ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat{$elty})
-            require_one_based_indexing(A, ipiv, B)
-            chkstride1(A,B,ipiv)
-            n = checksquare(A)
-            chkuplo(uplo)
-            if n != size(B,1)
-                throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
-            end
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
-            chklapackerror(info[])
-            B
-        end
-
-        # SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, IPIV, E, INFO )
-        #
-        # .. Scalar Arguments ..
-        # CHARACTER          UPLO, WAY
-        # INTEGER            INFO, LDA, N
-        # ..
-        # .. Array Arguments ..
-        # INTEGER            IPIV( * )
-        # COMPLEX*16         A( LDA, * ), E( * )
-        function syconvf_rook!(uplo::AbstractChar, way::AbstractChar,
-                                A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt},
-                                e::AbstractVector{$elty} = Vector{$elty}(undef, length(ipiv)))
-            require_one_based_indexing(A, ipiv, e)
-            chkstride1(A, ipiv, e)
-
-            # extract
-            n   = checksquare(A)
-            lda = stride(A, 2)
-
-            # check
-            chkuplo(uplo)
-            if way != 'C' && way != 'R'
-                throw(ArgumentError("way must be 'C' or 'R'"))
-            end
-            if length(ipiv) != n
-                throw(ArgumentError("length of pivot vector was $(length(ipiv)) but should have been $n"))
-            end
-            if length(e) != n
-                throw(ArgumentError("length of e vector was $(length(e)) but should have been $n"))
-            end
-
-            # allocate
-            info = Ref{BlasInt}()
-
-            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                 Clong, Clong),
-                uplo, way, n, A,
-                max(1, lda), e, ipiv, info,
-                1, 1)
-
-            chklapackerror(info[])
-            return A, e
-        end
-    end
-end
-
-"""
-    syconv!(uplo, A, ipiv) -> (A, work)
-
-Converts a symmetric matrix `A` (which has been factorized into a
-triangular matrix) into two matrices `L` and `D`. If `uplo = U`, `A`
-is upper triangular. If `uplo = L`, it is lower triangular. `ipiv` is
-the pivot vector from the triangular factorization. `A` is overwritten
-by `L` and `D`.
-"""
-syconv!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    sysv!(uplo, A, B) -> (B, A, ipiv)
-
-Finds the solution to `A * X = B` for symmetric matrix `A`. If `uplo = U`,
-the upper half of `A` is stored. If `uplo = L`, the lower half is stored.
-`B` is overwritten by the solution `X`. `A` is overwritten by its
-Bunch-Kaufman factorization. `ipiv` contains pivoting information about the
-factorization.
-"""
-sysv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    sytrf!(uplo, A) -> (A, ipiv, info)
-
-Computes the Bunch-Kaufman factorization of a symmetric matrix `A`. If
-`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
-half is stored.
-
-Returns `A`, overwritten by the factorization, a pivot vector `ipiv`, and
-the error code `info` which is a non-negative integer. If `info` is positive
-the matrix is singular and the diagonal part of the factorization is exactly
-zero at position `info`.
-"""
-sytrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    sytri!(uplo, A, ipiv)
-
-Computes the inverse of a symmetric matrix `A` using the results of
-`sytrf!`. If `uplo = U`, the upper half of `A` is stored. If `uplo = L`,
-the lower half is stored. `A` is overwritten by its inverse.
-"""
-sytri!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    sytrs!(uplo, A, ipiv, B)
-
-Solves the equation `A * X = B` for a symmetric matrix `A` using the
-results of `sytrf!`. If `uplo = U`, the upper half of `A` is stored.
-If `uplo = L`, the lower half is stored. `B` is overwritten by the
-solution `X`.
-"""
-sytrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-
-"""
-    hesv!(uplo, A, B) -> (B, A, ipiv)
-
-Finds the solution to `A * X = B` for Hermitian matrix `A`. If `uplo = U`,
-the upper half of `A` is stored. If `uplo = L`, the lower half is stored.
-`B` is overwritten by the solution `X`. `A` is overwritten by its
-Bunch-Kaufman factorization. `ipiv` contains pivoting information about the
-factorization.
-"""
-hesv!(uplo::AbstractChar, A::AbstractMatrix, B::AbstractVecOrMat)
-
-"""
-    hetrf!(uplo, A) -> (A, ipiv, info)
-
-Computes the Bunch-Kaufman factorization of a Hermitian matrix `A`. If
-`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
-half is stored.
-
-Returns `A`, overwritten by the factorization, a pivot vector `ipiv`, and
-the error code `info` which is a non-negative integer. If `info` is positive
-the matrix is singular and the diagonal part of the factorization is exactly
-zero at position `info`.
-"""
-hetrf!(uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    hetri!(uplo, A, ipiv)
-
-Computes the inverse of a Hermitian matrix `A` using the results of
-`sytrf!`. If `uplo = U`, the upper half of `A` is stored. If `uplo = L`,
-the lower half is stored. `A` is overwritten by its inverse.
-"""
-hetri!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
-
-"""
-    hetrs!(uplo, A, ipiv, B)
-
-Solves the equation `A * X = B` for a Hermitian matrix `A` using the
-results of `sytrf!`. If `uplo = U`, the upper half of `A` is stored.
-If `uplo = L`, the lower half is stored. `B` is overwritten by the
-solution `X`.
-"""
-hetrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
-
-# Symmetric (real) eigensolvers
-for (syev, syevr, syevd, sygvd, elty) in
-    ((:dsyev_,:dsyevr_,:dsyevd_,:dsygvd_,:Float64),
-     (:ssyev_,:ssyevr_,:ssyevd_,:ssygvd_,:Float32))
-    @eval begin
-        #       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            W     = similar(A, $elty, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                      jobz, uplo, n, A, max(1,stride(A,2)), W, work, lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
-        #      $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
-        #      $                   IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, RANGE, UPLO
-        #       INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N
-        #       DOUBLE PRECISION   ABSTOL, VL, VU
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            ISUPPZ( * ), IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
-        function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
-                        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplofinite(A, uplo)
-            if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu = $iu), which must be between 1 and n = $n"))
-            end
-            if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
-            end
-            lda = stride(A,2)
-            m = Ref{BlasInt}()
-            W = similar(A, $elty, n)
-            ldz = n
-            if jobz == 'N'
-                Z = similar(A, $elty, ldz, 0)
-            elseif jobz == 'V'
-                Z = similar(A, $elty, ldz, n)
-            end
-            isuppz = similar(A, BlasInt, 2*n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
-                    jobz, range, uplo, n,
-                    A, max(1,lda), vl, vu,
-                    il, iu, abstol, m,
-                    W, Z, max(1,ldz), isuppz,
-                    work, lwork, iwork, liwork,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
-        end
-        syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
-            syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
-
-        #       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
-        #      $                   IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LIWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplofinite(A, uplo)
-            lda = stride(A,2)
-            m = Ref{BlasInt}()
-            W = similar(A, $elty, n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}, Clong, Clong),
-                    jobz, uplo, n, A, max(1,lda),
-                    W, work, lwork, iwork, liwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        # Generalized eigenproblem
-        #           SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
-        #      $                   LWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
-        function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            w = similar(A, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    itype, jobz, uplo, n,
-                    A, lda, B, ldb,
-                    w, work, lwork, iwork,
-                    liwork, info, 1, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(work[1])
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            chkposdef(info[])
-            w, A, B
-        end
-    end
-end
-# Hermitian eigensolvers
-for (syev, syevr, syevd, sygvd, elty, relty) in
-    ((:zheev_,:zheevr_,:zheevd_,:zhegvd_,:ComplexF64,:Float64),
-     (:cheev_,:cheevr_,:cheevd_,:chegvd_,:ComplexF32,:Float32))
-    @eval begin
-        # SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            W     = similar(A, $relty, n)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, max(1, 3n-2))
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
-                      Clong, Clong),
-                      jobz, uplo, n, A, stride(A,2), W, work, lwork, rwork, info,
-                      1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
-        #      $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
-        #      $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, RANGE, UPLO
-        #       INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK,
-        #      $                   M, N
-        #       DOUBLE PRECISION   ABSTOL, VL, VU
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            ISUPPZ( * ), IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * ), Z( LDZ, * )
-        function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
-                        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            if range == 'I' && !(1 <= il <= iu <= n)
-                throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
-            end
-            if range == 'V' && vl >= vu
-                throw(ArgumentError("lower boundary, $vl, must be less than upper boundary, $vu"))
-            end
-            lda = max(1,stride(A,2))
-            m = Ref{BlasInt}()
-            W = similar(A, $relty, n)
-            if jobz == 'N'
-                ldz = 1
-                Z = similar(A, $elty, ldz, 0)
-            elseif jobz == 'V'
-                ldz = n
-                Z = similar(A, $elty, ldz, n)
-            end
-            isuppz = similar(A, BlasInt, 2*n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            rwork  = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
-                      (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                       Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
-                       Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                       Clong, Clong, Clong),
-                      jobz, range, uplo, n,
-                      A, lda, vl, vu,
-                      il, iu, abstol, m,
-                      W, Z, ldz, isuppz,
-                      work, lwork, rwork, lrwork,
-                      iwork, liwork, info,
-                      1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
-        end
-        syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
-            syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
-
-        #       SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
-        #      $                   LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            chkuplofinite(A, uplo)
-            n = checksquare(A)
-            lda = max(1, stride(A,2))
-            m = Ref{BlasInt}()
-            W = similar(A, $relty, n)
-            work   = Vector{$elty}(undef, 1)
-            lwork  = BlasInt(-1)
-            rwork  = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            iwork  = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info   = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevd), liblapack), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
-                    Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    jobz, uplo, n, A, stride(A,2),
-                    W, work, lwork, rwork, lrwork,
-                    iwork, liwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                end
-            end
-            jobz == 'V' ? (W, A) : W
-        end
-
-        #       SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
-        #      $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBZ, UPLO
-        #       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   RWORK( * ), W( * )
-        #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
-        function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            chkuplofinite(A, uplo)
-            chkuplofinite(B, uplo)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            w = similar(A, $relty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 1)
-            lrwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                     Clong, Clong),
-                    itype, jobz, uplo, n,
-                    A, lda, B, ldb,
-                    w, work, lwork, rwork,
-                    lrwork, iwork, liwork, info,
-                    1, 1)
-                chkargsok(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = iwork[1]
-                    resize!(iwork, liwork)
-                    lrwork = BlasInt(rwork[1])
-                    resize!(rwork, lrwork)
-                end
-            end
-            chkposdef(info[])
-            w, A, B
-        end
-    end
-end
-
-"""
-    syev!(jobz, uplo, A)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
-"""
-syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    syevr!(jobz, range, uplo, A, vl, vu, il, iu, abstol) -> (W, Z)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used. If
-`range = A`, all the eigenvalues are found. If `range = V`, the
-eigenvalues in the half-open interval `(vl, vu]` are found.
-If `range = I`, the eigenvalues with indices between `il` and `iu` are
-found. `abstol` can be set as a tolerance for convergence.
-
-The eigenvalues are returned in `W` and the eigenvectors in `Z`.
-"""
-syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix,
-       vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
-
-"""
-    syevd!(jobz, uplo, A)
-
-Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
-(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
-of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
-
-Use the divide-and-conquer method, instead of the QR iteration used by
-`syev!` or multiple relatively robust representations used by `syevr!`.
-See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
-a comparison of the accuracy and performatce of different methods.
-"""
-syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
-
-"""
-    sygvd!(itype, jobz, uplo, A, B) -> (w, A, B)
-
-Finds the generalized eigenvalues (`jobz = N`) or eigenvalues and
-eigenvectors (`jobz = V`) of a symmetric matrix `A` and symmetric
-positive-definite matrix `B`. If `uplo = U`, the upper triangles
-of `A` and `B` are used. If `uplo = L`, the lower triangles of `A` and
-`B` are used. If `itype = 1`, the problem to solve is
-`A * x = lambda * B * x`. If `itype = 2`, the problem to solve is
-`A * B * x = lambda * x`. If `itype = 3`, the problem to solve is
-`B * A * x = lambda * x`.
-"""
-sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-## (BD) Bidiagonal matrices - singular value decomposition
-for (bdsqr, relty, elty) in
-    ((:dbdsqr_,:Float64,:Float64),
-     (:sbdsqr_,:Float32,:Float32),
-     (:zbdsqr_,:Float64,:ComplexF64),
-     (:cbdsqr_,:Float32,:ComplexF32))
-    @eval begin
-        function bdsqr!(uplo::AbstractChar, d::AbstractVector{$relty}, e_::AbstractVector{$relty},
-                        Vt::AbstractMatrix{$elty}, U::AbstractMatrix{$elty}, C::AbstractMatrix{$elty})
-            require_one_based_indexing(d, e_, Vt, U, C)
-            chkstride1(d, e_, Vt, U, C)
-            # Extract number
-            n = length(d)
-            ncvt, nru, ncc = size(Vt, 2), size(U, 1), size(C, 2)
-            ldvt, ldu, ldc = max(1, stride(Vt,2)), max(1, stride(U, 2)), max(1, stride(C,2))
-            # Do checks
-            chkuplo(uplo)
-            if length(e_) != n - 1
-                throw(DimensionMismatch("off-diagonal has length $(length(e_)) but should have length $(n - 1)"))
-            end
-            if ncvt > 0 && ldvt < n
-                throw(DimensionMismatch("leading dimension of Vt, $ldvt, must be at least $n"))
-            end
-            if ldu < nru
-                throw(DimensionMismatch("leading dimension of U, $ldu, must be at least $nru"))
-            end
-            if size(U, 2) != n
-                throw(DimensionMismatch("U must have $n columns but has $(size(U, 2))"))
-            end
-            if ncc > 0 && ldc < n
-                throw(DimensionMismatch("leading dimension of C, $ldc, must be at least $n"))
-            end
-            # Allocate
-            work = Vector{$relty}(undef, 4n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($bdsqr), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
-                uplo, n, ncvt, nru,
-                ncc, d, e_, Vt,
-                ldvt, U, ldu, C,
-                ldc, work, info, 1)
-            chklapackerror(info[])
-            d, Vt, U, C #singular values in descending order, P**T * VT, U * Q, Q**T * C
-        end
-    end
-end
-
-"""
-    bdsqr!(uplo, d, e_, Vt, U, C) -> (d, Vt, U, C)
-
-Computes the singular value decomposition of a bidiagonal matrix with
-`d` on the diagonal and `e_` on the off-diagonal. If `uplo = U`, `e_` is
-the superdiagonal. If `uplo = L`, `e_` is the subdiagonal. Can optionally also
-compute the product `Q' * C`.
-
-Returns the singular values in `d`, and the matrix `C` overwritten with `Q' * C`.
-"""
-bdsqr!(uplo::AbstractChar, d::AbstractVector, e_::AbstractVector, Vt::AbstractMatrix, U::AbstractMatrix, C::AbstractMatrix)
-
-#Defined only for real types
-for (bdsdc, elty) in
-    ((:dbdsdc_,:Float64),
-     (:sbdsdc_,:Float32))
-    @eval begin
-        #*  DBDSDC computes the singular value decomposition (SVD) of a real
-        #*  N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
-        #*  using a divide and conquer method
-        #*     .. Scalar Arguments ..
-        #      CHARACTER          COMPQ, UPLO
-        #      INTEGER            INFO, LDU, LDVT, N
-        #*     ..
-        #*     .. Array Arguments ..
-        #      INTEGER            IQ( * ), IWORK( * )
-        #      DOUBLE PRECISION   D( * ), E( * ), Q( * ), U( LDU, * ),
-        #     $                   VT( LDVT, * ), WORK( * )
-        function bdsdc!(uplo::AbstractChar, compq::AbstractChar, d::AbstractVector{$elty}, e_::AbstractVector{$elty})
-            require_one_based_indexing(d, e_)
-            chkstride1(d, e_)
-            n, ldiq, ldq, ldu, ldvt = length(d), 1, 1, 1, 1
-            chkuplo(uplo)
-            if compq == 'N'
-                lwork = 6*n
-            elseif compq == 'P'
-                @warn "COMPQ='P' is not tested"
-                #TODO turn this into an actual LAPACK call
-                #smlsiz=ilaenv(9, $elty === :Float64 ? 'dbdsqr' : 'sbdsqr', string(uplo, compq), n,n,n,n)
-                smlsiz=100 #For now, completely overkill
-                ldq = n*(11+2*smlsiz+8*round(Int,log((n/(smlsiz+1)))/log(2)))
-                ldiq = n*(3+3*round(Int,log(n/(smlsiz+1))/log(2)))
-                lwork = 6*n
-            elseif compq == 'I'
-                ldvt=ldu=max(1, n)
-                lwork=3*n^2 + 4*n
-            else
-                throw(ArgumentError("COMPQ argument must be 'N', 'P' or 'I', got $(repr(compq))"))
-            end
-            u  = similar(d, $elty, (ldu,  n))
-            vt = similar(d, $elty, (ldvt, n))
-            q  = similar(d, $elty, ldq)
-            iq = similar(d, BlasInt, ldiq)
-            work  = Vector{$elty}(undef, lwork)
-            iwork = Vector{BlasInt}(undef, 8n)
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($bdsdc), libblastrampoline), Cvoid,
-               (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
-                Clong, Clong),
-                uplo, compq, n, d, e_,
-                u, ldu, vt, ldvt,
-                q, iq, work, iwork, info,
-                1, 1)
-            chklapackerror(info[])
-            d, e_, u, vt, q, iq
-        end
-    end
-end
-
-"""
-    bdsdc!(uplo, compq, d, e_) -> (d, e, u, vt, q, iq)
-
-Computes the singular value decomposition of a bidiagonal matrix with `d` on the
-diagonal and `e_` on the off-diagonal using a divide and conqueq method.
-If `uplo = U`, `e_` is the superdiagonal. If `uplo = L`, `e_` is the subdiagonal.
-If `compq = N`, only the singular values are found. If `compq = I`, the singular
-values and vectors are found. If `compq = P`, the singular values
-and vectors are found in compact form. Only works for real types.
-
-Returns the singular values in `d`, and if `compq = P`, the compact singular
-vectors in `iq`.
-"""
-bdsdc!(uplo::AbstractChar, compq::AbstractChar, d::AbstractVector, e_::AbstractVector)
-
-for (gecon, elty) in
-    ((:dgecon_,:Float64),
-     (:sgecon_,:Float32))
-    @eval begin
-        #  SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          NORM
-        #       INTEGER            INFO, LDA, N
-        #       DOUBLE PRECISION   ANORM, RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$elty)
-            chkstride1(A)
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-            rcond = Ref{$elty}()
-            work = Vector{$elty}(undef, 4n)
-            iwork = Vector{BlasInt}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  normtype, n, A, lda, anorm, rcond, work, iwork,
-                  info, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-    end
-end
-
-for (gecon, elty, relty) in
-    ((:zgecon_,:ComplexF64,:Float64),
-     (:cgecon_,:ComplexF32,:Float32))
-    @eval begin
-        #       SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK,
-        #      $                   INFO )
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          NORM
-        #       INTEGER            INFO, LDA, N
-        #       DOUBLE PRECISION   ANORM, RCOND
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function gecon!(normtype::AbstractChar, A::AbstractMatrix{$elty}, anorm::$relty)
-            chkstride1(A)
-            n = checksquare(A)
-            lda = max(1, stride(A, 2))
-            rcond = Ref{$relty}()
-            work = Vector{$elty}(undef, 2n)
-            rwork = Vector{$relty}(undef, 2n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{$relty}, Ref{$relty}, Ptr{$elty}, Ptr{$relty},
-                   Ptr{BlasInt}, Clong),
-                  normtype, n, A, lda, anorm, rcond, work, rwork,
-                  info, 1)
-            chklapackerror(info[])
-            rcond[]
-        end
-    end
-end
-
-"""
-    gecon!(normtype, A, anorm)
-
-Finds the reciprocal condition number of matrix `A`. If `normtype = I`,
-the condition number is found in the infinity norm. If `normtype = O` or
-`1`, the condition number is found in the one norm. `A` must be the
-result of `getrf!` and `anorm` is the norm of `A` in the relevant norm.
-"""
-gecon!(normtype::AbstractChar, A::AbstractMatrix, anorm)
-
-for (gehrd, elty) in
-    ((:dgehrd_,:Float64),
-     (:sgehrd_,:Float32),
-     (:zgehrd_,:ComplexF64),
-     (:cgehrd_,:ComplexF32))
-    @eval begin
-
-        #                 .. Scalar Arguments ..
-        #       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
-        function gehrd!(ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            tau = similar(A, $elty, max(0,n - 1))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gehrd), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    n, ilo, ihi, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau
-        end
-    end
-end
-gehrd!(A::AbstractMatrix) = gehrd!(1, size(A, 1), A)
-
-"""
-    gehrd!(ilo, ihi, A) -> (A, tau)
-
-Converts a matrix `A` to Hessenberg form. If `A` is balanced with `gebal!`
-then `ilo` and `ihi` are the outputs of `gebal!`. Otherwise they should be
-`ilo = 1` and `ihi = size(A,2)`. `tau` contains the elementary reflectors of
-the factorization.
-"""
-gehrd!(ilo::Integer, ihi::Integer, A::AbstractMatrix)
-
-for (orghr, elty) in
-    ((:dorghr_,:Float64),
-     (:sorghr_,:Float32),
-     (:zunghr_,:ComplexF64),
-     (:cunghr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orghr!(ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A, tau)
-            n = checksquare(A)
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orghr), libblastrampoline), Cvoid,
-                    (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
-                    n, ilo, ihi, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    orghr!(ilo, ihi, A, tau)
-
-Explicitly finds `Q`, the orthogonal/unitary matrix from `gehrd!`. `ilo`,
-`ihi`, `A`, and `tau` must correspond to the input/output to `gehrd!`.
-"""
-orghr!(ilo::Integer, ihi::Integer, A::AbstractMatrix, tau::AbstractVector)
-
-for (ormhr, elty) in
-    ((:dormhr_,:Float64),
-     (:sormhr_,:Float32),
-     (:zunmhr_,:ComplexF64),
-     (:cunmhr_,:ComplexF32))
-    @eval begin
-        # .. Scalar Arguments ..
-        # CHARACTER          side, trans
-        # INTEGER            ihi, ilo, info, lda, ldc, lwork, m, n
-        # ..
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   a( lda, * ), c( ldc, * ), tau( * ), work( * )
-        function ormhr!(side::AbstractChar, trans::AbstractChar, ilo::Integer, ihi::Integer, A::AbstractMatrix{$elty},
-            tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-
-            require_one_based_indexing(A, tau, C)
-            chkstride1(A, tau, C)
-            n = checksquare(A)
-            mC, nC = size(C, 1), size(C, 2)
-
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            if (side == 'L' && mC != n) || (side == 'R' && nC != n)
-                throw(DimensionMismatch("A and C matrices are not conformable"))
-            end
-
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormhr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
-                    side, trans, mC, nC,
-                    ilo, ihi, A, max(1, stride(A, 2)),
-                    tau, C, max(1, stride(C, 2)), work,
-                    lwork, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-for (hseqr, elty) in
-    ((:zhseqr_,:ComplexF64),
-     (:chseqr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOB, COMPZ
-        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
-        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
-                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            require_one_based_indexing(H, Z)
-            chkstride1(H)
-            n = checksquare(H)
-            checksquare(Z) == n || throw(DimensionMismatch())
-            ldh = max(1, stride(H, 2))
-            ldz = max(1, stride(Z, 2))
-            w = similar(H, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}),
-                    job, compz, n, ilo, ihi,
-                    H, ldh, w, Z, ldz, work,
-                    lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            H, Z, w
-        end
-    end
-end
-
-for (hseqr, elty) in
-    ((:dhseqr_,:Float64),
-     (:shseqr_,:Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOB, COMPZ
-        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
-        # *     ..
-        # *     .. Array Arguments ..
-        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
-        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
-                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            require_one_based_indexing(H, Z)
-            chkstride1(H)
-            n = checksquare(H)
-            checksquare(Z) == n || throw(DimensionMismatch())
-            ldh = max(1, stride(H, 2))
-            ldz = max(1, stride(Z, 2))
-            wr = similar(H, $elty, n)
-            wi = similar(H, $elty, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}),
-                    job, compz, n, ilo, ihi,
-                    H, ldh, wr, wi, Z, ldz, work,
-                    lwork, info)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            H, Z, complex.(wr, wi)
-        end
-    end
-end
-hseqr!(H::StridedMatrix{T}, Z::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'V', 1, size(H, 1), H, Z)
-hseqr!(H::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'I', 1, size(H, 1), H, similar(H))
-
-"""
-    hseqr!(job, compz, ilo, ihi, H, Z) -> (H, Z, w)
-
-Computes all eigenvalues and (optionally) the Schur factorization of a matrix
-reduced to Hessenberg form. If `H` is balanced with `gebal!`
-then `ilo` and `ihi` are the outputs of `gebal!`. Otherwise they should be
-`ilo = 1` and `ihi = size(H,2)`. `tau` contains the elementary reflectors of
-the factorization.
-"""
-hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer, H::AbstractMatrix, Z::AbstractMatrix)
-
-for (hetrd, elty) in
-    ((:dsytrd_,Float64),
-     (:ssytrd_,Float32),
-     (:zhetrd_,ComplexF64),
-     (:chetrd_,ComplexF32))
-    relty = real(elty)
-    @eval begin
-
-        #                 .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION  A( LDA, * ), D( * ), E( * ), TAU( * ), WORK( * )
-        function hetrd!(uplo::AbstractChar, A::AbstractMatrix{$elty})
-            chkstride1(A)
-            n = checksquare(A)
-            chkuplo(uplo)
-            chkfinite(A) # balancing routines don't support NaNs and Infs
-            tau = similar(A, $elty, max(0,n - 1))
-            d = Vector{$relty}(undef, n)
-            e = Vector{$relty}(undef, max(0,n - 1))
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrd), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$relty}, Ptr{$relty},
-                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
-                    uplo, n, A, max(1, stride(A, 2)), d, e, tau, work, lwork, info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, tau, d, e
-        end
-    end
-end
-
-"""
-    hetrd!(uplo, A) -> (A, tau, d, e)
-
-Converts a Hermitian matrix `A` to real-symmetric tridiagonal Hessenberg form.
-If `uplo = U`, the upper half of `A` is stored; if `uplo = L`, the lower half is stored.
-`tau` contains the elementary reflectors of the factorization, `d` contains the
-diagonal and `e` contains the upper/lower diagonal.
-"""
-hetrd!(uplo::AbstractChar, A::AbstractMatrix)
-
-for (orgtr, elty) in
-    ((:dorgtr_,:Float64),
-     (:sorgtr_,:Float32),
-     (:zungtr_,:ComplexF64),
-     (:cungtr_,:ComplexF32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          UPLO
-        #       INTEGER            INFO, LDA, LWORK, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
-        function orgtr!(uplo::AbstractChar, A::AbstractMatrix{$elty}, tau::AbstractVector{$elty})
-            require_one_based_indexing(A, tau)
-            chkstride1(A, tau)
-            n = checksquare(A)
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            chkuplo(uplo)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgtr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}, Clong),
-                    uplo, n, A,
-                    max(1, stride(A, 2)), tau, work, lwork,
-                    info, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A
-        end
-    end
-end
-
-"""
-    orgtr!(uplo, A, tau)
-
-Explicitly finds `Q`, the orthogonal/unitary matrix from `hetrd!`. `uplo`,
-`A`, and `tau` must correspond to the input/output to `hetrd!`.
-"""
-orgtr!(uplo::AbstractChar, A::AbstractMatrix, tau::AbstractVector)
-
-for (ormtr, elty) in
-    ((:dormtr_,:Float64),
-     (:sormtr_,:Float32),
-     (:zunmtr_,:ComplexF64),
-     (:cunmtr_,:ComplexF32))
-    @eval begin
-        # .. Scalar Arguments ..
-        # CHARACTER          side, trans, uplo
-        # INTEGER            info, lda, ldc, lwork, m, n
-        # ..
-        # .. Array Arguments ..
-        # DOUBLE PRECISION   a( lda, * ), c( ldc, * ), tau( * ), work( * )
-        function ormtr!(side::AbstractChar, uplo::AbstractChar, trans::AbstractChar, A::AbstractMatrix{$elty},
-                        tau::AbstractVector{$elty}, C::AbstractVecOrMat{$elty})
-
-            require_one_based_indexing(A, tau, C)
-            chkstride1(A, tau, C)
-            n = checksquare(A)
-            chkuplo(uplo)
-            mC, nC = size(C, 1), size(C, 2)
-
-            if n - length(tau) != 1
-                throw(DimensionMismatch("tau has length $(length(tau)), needs $(n - 1)"))
-            end
-            if (side == 'L' && mC != n) || (side == 'R' && nC != n)
-                throw(DimensionMismatch("A and C matrices are not conformable"))
-            end
-
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormtr), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                     Ptr{$elty}, Ref{BlasInt},
-                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
-                    side, uplo, trans, mC, nC,
-                    A, max(1, stride(A, 2)),
-                    tau, C, max(1, stride(C, 2)), work,
-                    lwork, info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            C
-        end
-    end
-end
-
-for (gees, gges, gges3, elty) in
-    ((:dgees_,:dgges_,:dgges3_,:Float64),
-     (:sgees_,:sgges_,:sgges3_,:Float32))
-    @eval begin
-        #     .. Scalar Arguments ..
-        #     CHARACTER          JOBVS, SORT
-        #     INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
-        #     ..
-        #     .. Array Arguments ..
-        #     LOGICAL            BWORK( * )
-        #     DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
-        #    $                   WR( * )
-        function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            n     = checksquare(A)
-            sdim  = Vector{BlasInt}(undef, 1)
-            wr    = similar(A, $elty, n)
-            wi    = similar(A, $elty, n)
-            vs    = similar(A, $elty, jobvs == 'V' ? n : 0, n)
-            ldvs  = max(size(vs, 1), 1)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
-                    jobvs, 'N', C_NULL, n,
-                        A, max(1, stride(A, 2)), sdim, wr,
-                        wi, vs, ldvs, work,
-                        lwork, C_NULL, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, vs, iszero(wi) ? wr : complex.(wr, wi)
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
-        #      $                   VSR( LDVSR, * ), WORK( * )
-        function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1,stride(A, 2)), B,
-                    max(1,stride(B, 2)), sdim, alphar, alphai,
-                    beta, vsl, ldvsl, vsr,
-                    ldvsr, work, lwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
-        #      $                   VSR( LDVSR, * ), WORK( * )
-        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alphar = similar(A, $elty, n)
-            alphai = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1,stride(A, 2)), B,
-                    max(1,stride(B, 2)), sdim, alphar, alphai,
-                    beta, vsl, ldvsl, vsr,
-                    ldvsr, work, lwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-    end
-end
-
-for (gees, gges, gges3, elty, relty) in
-    ((:zgees_,:zgges_,:zgges3_,:ComplexF64,:Float64),
-     (:cgees_,:cgges_,:cgges3_,:ComplexF32,:Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVS, SORT
-        #       INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
-        function gees!(jobvs::AbstractChar, A::AbstractMatrix{$elty})
-            require_one_based_indexing(A)
-            chkstride1(A)
-            n     = checksquare(A)
-            sort  = 'N'
-            sdim  = BlasInt(0)
-            w     = similar(A, $elty, n)
-            vs    = similar(A, $elty, jobvs == 'V' ? n : 1, n)
-            ldvs  = max(size(vs, 1), 1)
-            work  = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, n)
-            info  = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$relty}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
-                    jobvs, sort, C_NULL, n,
-                        A, max(1, stride(A, 2)), sdim, w,
-                        vs, ldvs, work, lwork,
-                        rwork, C_NULL, info, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, vs, w
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
-        #      $                   WORK( * )
-        function gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1, stride(A, 2)), B,
-                    max(1, stride(B, 2)), sdim, alpha, beta,
-                    vsl, ldvsl, vsr, ldvsr,
-                    work, lwork, rwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          JOBVSL, JOBVSR, SORT
-        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            BWORK( * )
-        #       DOUBLE PRECISION   RWORK( * )
-        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
-        #      $                   WORK( * )
-        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
-            chkstride1(A, B)
-            n, m = checksquare(A, B)
-            if n != m
-                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
-            end
-            sdim = BlasInt(0)
-            alpha = similar(A, $elty, n)
-            beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? max(1, n) : 1
-            vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? max(1, n) : 1
-            vsr = similar(A, $elty, ldvsr, n)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            rwork = Vector{$relty}(undef, 8n)
-            info = Ref{BlasInt}()
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ref{BlasInt}, Clong, Clong, Clong),
-                    jobvsl, jobvsr, 'N', C_NULL,
-                    n, A, max(1, stride(A, 2)), B,
-                    max(1, stride(B, 2)), sdim, alpha, beta,
-                    vsl, ldvsl, vsr, ldvsr,
-                    work, lwork, rwork, C_NULL,
-                    info, 1, 1, 1)
-                chklapackerror(info[])
-                if i == 1
-                    lwork = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
-        end
-    end
-end
-
-"""
-    gees!(jobvs, A) -> (A, vs, w)
-
-Computes the eigenvalues (`jobvs = N`) or the eigenvalues and Schur
-vectors (`jobvs = V`) of matrix `A`. `A` is overwritten by its Schur form.
-
-Returns `A`, `vs` containing the Schur vectors, and `w`, containing the
-eigenvalues.
-"""
-gees!(jobvs::AbstractChar, A::AbstractMatrix)
-
-
-"""
-    gges!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
-
-Computes the generalized eigenvalues, generalized Schur form, left Schur
-vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
-`B`.
-
-The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
-vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
-"""
-gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-"""
-    gges3!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
-
-Computes the generalized eigenvalues, generalized Schur form, left Schur
-vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
-`B` using a blocked algorithm. This function requires LAPACK 3.6.0.
-
-The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
-vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
-"""
-gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
-
-for (trexc, trsen, tgsen, elty) in
-    ((:dtrexc_, :dtrsen_, :dtgsen_, :Float64),
-     (:strexc_, :strsen_, :stgsen_, :Float32))
-    @eval begin
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          COMPQ
-        #       INTEGER            IFST, ILST, INFO, LDQ, LDT, N
-        # *     ..
-        # *     .. Array Arguments ..
-        #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
-        function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            work = Vector{$elty}(undef, n)
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
-                  (Ref{UInt8},  Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ptr{BlasInt}, Clong),
-                  compq, n,
-                  T, ldt, Q, ldq,
-                  ifst, ilst,
-                  work, info, 1)
-            chklapackerror(info[])
-            T, Q
-        end
-        trexc!(ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trexc!('V', ifst, ilst, T, Q)
-
-        # *     .. Scalar Arguments ..
-        #       CHARACTER          COMPQ, JOB
-        #       INTEGER            INFO, LDQ, LDT, LIWORK, LWORK, M, N
-        #       DOUBLE PRECISION   S, SEP
-        # *     ..
-        # *     .. Array Arguments ..
-        #       LOGICAL            SELECT( * )
-        #       INTEGER            IWORK( * )
-        #       DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), WR( * )
-        function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
-                        T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q, select)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            wr = similar(T, $elty, n)
-            wi = similar(T, $elty, n)
-            m = sum(select)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            liwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            s = Ref{$elty}(zero($elty))
-            sep = Ref{$elty}(zero($elty))
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong),
-                    job, compq, select, n,
-                    T, ldt, Q, ldq,
-                    wr, wi, m, s, sep,
-                    work, lwork, iwork, liwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            T, Q, iszero(wi) ? wr : complex.(wr, wi), s[], sep[]
-        end
-        trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trsen!('N', 'V', select, T, Q)
-
-        #        .. Scalar Arguments ..
-        #        LOGICAL            WANTQ, WANTZ
-        #        INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
-        #       $                   M, N
-        #        DOUBLE PRECISION   PL, PR
-        #        ..
-        #        .. Array Arguments ..
-        #        LOGICAL            SELECT( * )
-        #        INTEGER            IWORK( * )
-        #        DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
-        #       $                   B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ),
-        #       $                   WORK( * ), Z( LDZ, * )
-        #        ..
-        function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
-                        Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            chkstride1(select, S, T, Q, Z)
-            n, nt, nq, nz = checksquare(S, T, Q, Z)
-            if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
-            end
-            if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
-            end
-            if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
-            end
-            lds = max(1, stride(S, 2))
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            ldz = max(1, stride(Z, 2))
-            m = sum(select)
-            alphai = similar(T, $elty, n)
-            alphar = similar(T, $elty, n)
-            beta = similar(T, $elty, n)
-            lwork = BlasInt(-1)
-            work = Vector{$elty}(undef, 1)
-            liwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
-                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}),
-                    0, 1, 1, select,
-                    n, S, lds, T,
-                    ldt, alphar, alphai, beta,
-                    Q, ldq, Z, ldz,
-                    m, C_NULL, C_NULL, C_NULL,
-                    work, lwork, iwork, liwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            S, T, complex.(alphar, alphai), beta, Q, Z
-        end
-    end
-end
-
-for (trexc, trsen, tgsen, elty, relty) in
-    ((:ztrexc_, :ztrsen_, :ztgsen_, :ComplexF64, :Float64),
-     (:ctrexc_, :ctrsen_, :ctgsen_, :ComplexF32, :Float32))
-    @eval begin
-        #      .. Scalar Arguments ..
-        #      CHARACTER          COMPQ
-        #      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
-        #      ..
-        #      .. Array Arguments ..
-        #      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
-        function trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
-                  (Ref{UInt8},  Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{BlasInt}, Clong),
-                  compq, n,
-                  T, ldt, Q, ldq,
-                  ifst, ilst,
-                  info,  1)
-            chklapackerror(info[])
-            T, Q
-        end
-        trexc!(ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trexc!('V', ifst, ilst, T, Q)
-
-        #      .. Scalar Arguments ..
-        #      CHARACTER          COMPQ, JOB
-        #      INTEGER            INFO, LDQ, LDT, LWORK, M, N
-        #      DOUBLE PRECISION   S, SEP
-        #      ..
-        #      .. Array Arguments ..
-        #      LOGICAL            SELECT( * )
-        #      COMPLEX            Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * )
-        function trsen!(job::AbstractChar, compq::AbstractChar, select::AbstractVector{BlasInt},
-                        T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty})
-            chkstride1(select, T, Q)
-            n = checksquare(T)
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            w = similar(T, $elty, n)
-            m = sum(select)
-            work = Vector{$elty}(undef, 1)
-            lwork = BlasInt(-1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            s = Ref{$relty}(zero($relty))
-            sep = Ref{$relty}(zero($relty))
-            for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
-                    (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ref{$relty},
-                    Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}, Clong, Clong),
-                    job, compq, select, n,
-                    T, ldt, Q, ldq,
-                    w, m, s, sep,
-                    work, lwork,
-                    info, 1, 1)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                end
-            end
-            T, Q, w, s[], sep[]
-        end
-        trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
-            trsen!('N', 'V', select, T, Q)
-
-        #        .. Scalar Arguments ..
-        #        LOGICAL            WANTQ, WANTZ
-        #        INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
-        #       $                   M, N
-        #        DOUBLE PRECISION   PL, PR
-        #        ..
-        #        .. Array Arguments ..
-        #        LOGICAL            SELECT( * )
-        #        INTEGER            IWORK( * )
-        #        DOUBLE PRECISION   DIF( * )
-        #        COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
-        #       $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
-        #        ..
-        function tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix{$elty}, T::AbstractMatrix{$elty},
-                        Q::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
-            chkstride1(select, S, T, Q, Z)
-            n, nt, nq, nz = checksquare(S, T, Q, Z)
-            if n != nt
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and T, ($nt,$nt), must match"))
-            end
-            if n != nq
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Q, ($nq,$nq), must match"))
-            end
-            if n != nz
-                throw(DimensionMismatch("dimensions of S, ($n,$n), and Z, ($nz,$nz), must match"))
-            end
-            lds = max(1, stride(S, 2))
-            ldt = max(1, stride(T, 2))
-            ldq = max(1, stride(Q, 2))
-            ldz = max(1, stride(Z, 2))
-            m = sum(select)
-            alpha = similar(T, $elty, n)
-            beta = similar(T, $elty, n)
-            lwork = BlasInt(-1)
-            work = Vector{$elty}(undef, 1)
-            liwork = BlasInt(-1)
-            iwork = Vector{BlasInt}(undef, 1)
-            info = Ref{BlasInt}()
-            select = convert(Array{BlasInt}, select)
-            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
-                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
-                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}),
-                    0, 1, 1, select,
-                    n, S, lds, T,
-                    ldt, alpha, beta,
-                    Q, ldq, Z, ldz,
-                    m, C_NULL, C_NULL, C_NULL,
-                    work, lwork, iwork, liwork,
-                    info)
-                chklapackerror(info[])
-                if i == 1 # only estimated optimal lwork, liwork
-                    lwork  = BlasInt(real(work[1]))
-                    resize!(work, lwork)
-                    liwork = BlasInt(real(iwork[1]))
-                    resize!(iwork, liwork)
-                end
-            end
-            S, T, alpha, beta, Q, Z
-        end
-    end
-end
-
-"""
-    trexc!(compq, ifst, ilst, T, Q) -> (T, Q)
-    trexc!(ifst, ilst, T, Q) -> (T, Q)
-
-Reorder the Schur factorization `T` of a matrix, such that the diagonal block
-of `T` with row index `ifst` is moved to row index `ilst`. If `compq = V`, the Schur
-vectors `Q` are reordered. If `compq = N` they are not modified. The 4-arg method
-calls the 5-arg method with `compq = V`.
-"""
-trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix, Q::AbstractMatrix)
-
-"""
-    trsen!(job, compq, select, T, Q) -> (T, Q, w, s, sep)
-    trsen!(select, T, Q) -> (T, Q, w, s, sep)
-
-Reorder the Schur factorization of a matrix and optionally finds reciprocal
-condition numbers. If `job = N`, no condition numbers are found. If `job = E`,
-only the condition number for this cluster of eigenvalues is found. If
-`job = V`, only the condition number for the invariant subspace is found.
-If `job = B` then the condition numbers for the cluster and subspace are
-found. If `compq = V` the Schur vectors `Q` are updated. If `compq = N`
-the Schur vectors are not modified. `select` determines which
-eigenvalues are in the cluster. The 3-arg method calls the 5-arg method
-with `job = N` and `compq = V`.
-
-Returns `T`, `Q`, reordered eigenvalues in `w`, the condition number of the
-cluster of eigenvalues `s`, and the condition number of the invariant subspace
-`sep`.
-"""
-trsen!(compq::AbstractChar, job::AbstractChar, select::AbstractVector{BlasInt}, T::AbstractMatrix, Q::AbstractMatrix)
-
-"""
-    tgsen!(select, S, T, Q, Z) -> (S, T, alpha, beta, Q, Z)
-
-Reorders the vectors of a generalized Schur decomposition. `select` specifies
-the eigenvalues in each cluster.
-"""
-tgsen!(select::AbstractVector{BlasInt}, S::AbstractMatrix, T::AbstractMatrix, Q::AbstractMatrix, Z::AbstractMatrix)
-
-for (fn, elty, relty) in ((:dtrsyl_, :Float64, :Float64),
-                   (:strsyl_, :Float32, :Float32),
-                   (:ztrsyl_, :ComplexF64, :Float64),
-                   (:ctrsyl_, :ComplexF32, :Float32))
-    @eval begin
-        function trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix{$elty},
-                        B::AbstractMatrix{$elty}, C::AbstractMatrix{$elty}, isgn::Int=1)
-            require_one_based_indexing(A, B, C)
-            chkstride1(A, B, C)
-            m, n = checksquare(A), checksquare(B)
-            lda = max(1, stride(A, 2))
-            ldb = max(1, stride(B, 2))
-            m1, n1 = size(C)
-            if m != m1 || n != n1
-                throw(DimensionMismatch("dimensions of A, ($m,$n), and C, ($m1,$n1), must match"))
-            end
-            ldc = max(1, stride(C, 2))
-            scale = Ref{$relty}()
-            info  = Ref{BlasInt}()
-            ccall((@blasfunc($fn), libblastrampoline), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
-                transa, transb, isgn, m, n,
-                A, lda, B, ldb, C, ldc,
-                scale, info, 1, 1)
-            chklapackerror(info[])
-            C, scale[]
-        end
-    end
-end
-
-"""
-    trsyl!(transa, transb, A, B, C, isgn=1) -> (C, scale)
-
-Solves the Sylvester matrix equation `A * X +/- X * B = scale*C` where `A` and
-`B` are both quasi-upper triangular. If `transa = N`, `A` is not modified.
-If `transa = T`, `A` is transposed. If `transa = C`, `A` is conjugate
-transposed. Similarly for `transb` and `B`. If `isgn = 1`, the equation
-`A * X + X * B = scale * C` is solved. If `isgn = -1`, the equation
-`A * X - X * B = scale * C` is solved.
-
-Returns `X` (overwriting `C`) and `scale`.
-"""
-trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, isgn::Int=1)
-
-end # module
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
deleted file mode 100644
index b133741611adc..0000000000000
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ /dev/null
@@ -1,314 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## This file contains libblastrampoline-specific APIs
-
-# Keep these in sync with `src/libblastrampoline_internal.h`
-struct lbt_library_info_t
-    libname::Cstring
-    handle::Ptr{Cvoid}
-    suffix::Cstring
-    active_forwards::Ptr{UInt8}
-    interface::Int32
-    complex_retstyle::Int32
-    f2c::Int32
-    cblas::Int32
-end
-const LBT_INTERFACE_LP64    = 32
-const LBT_INTERFACE_ILP64   = 64
-const LBT_INTERFACE_UNKNOWN = -1
-const LBT_INTERFACE_MAP = Dict(
-    LBT_INTERFACE_LP64    => :lp64,
-    LBT_INTERFACE_ILP64   => :ilp64,
-    LBT_INTERFACE_UNKNOWN => :unknown,
-)
-const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
-
-const LBT_F2C_PLAIN         =  0
-const LBT_F2C_REQUIRED      =  1
-const LBT_F2C_UNKNOWN       = -1
-const LBT_F2C_MAP = Dict(
-    LBT_F2C_PLAIN    => :plain,
-    LBT_F2C_REQUIRED => :required,
-    LBT_F2C_UNKNOWN  => :unknown,
-)
-const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
-
-const LBT_COMPLEX_RETSTYLE_NORMAL   =  0
-const LBT_COMPLEX_RETSTYLE_ARGUMENT =  1
-const LBT_COMPLEX_RETSTYLE_UNKNOWN  = -1
-const LBT_COMPLEX_RETSTYLE_MAP = Dict(
-    LBT_COMPLEX_RETSTYLE_NORMAL   => :normal,
-    LBT_COMPLEX_RETSTYLE_ARGUMENT => :argument,
-    LBT_COMPLEX_RETSTYLE_UNKNOWN  => :unknown,
-)
-const LBT_INV_COMPLEX_RETSTYLE_MAP = Dict(v => k for (k, v) in LBT_COMPLEX_RETSTYLE_MAP)
-
-const LBT_CBLAS_CONFORMANT =  0
-const LBT_CBLAS_DIVERGENT  =  1
-const LBT_CBLAS_UNKNOWN    = -1
-const LBT_CBLAS_MAP = Dict(
-    LBT_CBLAS_CONFORMANT => :conformant,
-    LBT_CBLAS_DIVERGENT  => :divergent,
-    LBT_CBLAS_UNKNOWN    => :unknown,
-)
-const LBT_INV_CBLAS_MAP = Dict(v => k for (k, v) in LBT_CBLAS_MAP)
-
-struct LBTLibraryInfo
-    libname::String
-    handle::Ptr{Cvoid}
-    suffix::String
-    active_forwards::Vector{UInt8}
-    interface::Symbol
-    complex_retstyle::Symbol
-    f2c::Symbol
-    cblas::Symbol
-
-    function LBTLibraryInfo(lib_info::lbt_library_info_t, num_exported_symbols::UInt32)
-        return new(
-            unsafe_string(lib_info.libname),
-            lib_info.handle,
-            unsafe_string(lib_info.suffix),
-            unsafe_wrap(Vector{UInt8}, lib_info.active_forwards, div(num_exported_symbols,8)+1),
-            LBT_INTERFACE_MAP[lib_info.interface],
-            LBT_COMPLEX_RETSTYLE_MAP[lib_info.complex_retstyle],
-            LBT_F2C_MAP[lib_info.f2c],
-            LBT_CBLAS_MAP[lib_info.cblas],
-        )
-    end
-end
-
-struct lbt_config_t
-    loaded_libs::Ptr{Ptr{lbt_library_info_t}}
-    build_flags::UInt32
-    exported_symbols::Ptr{Cstring}
-    num_exported_symbols::UInt32
-end
-const LBT_BUILDFLAGS_DEEPBINDLESS     = 0x01
-const LBT_BUILDFLAGS_F2C_CAPABLE      = 0x02
-const LBT_BUILDFLAGS_CBLAS_DIVERGENCE = 0x04
-const LBT_BUILDFLAGS_COMPLEX_RETSTYLE = 0x08
-const LBT_BUILDFLAGS_SYMBOL_TRIMMING  = 0x10
-const LBT_BUILDFLAGS_MAP = Dict(
-    LBT_BUILDFLAGS_DEEPBINDLESS => :deepbindless,
-    LBT_BUILDFLAGS_F2C_CAPABLE => :f2c_capable,
-    LBT_BUILDFLAGS_CBLAS_DIVERGENCE => :cblas_divergence,
-    LBT_BUILDFLAGS_COMPLEX_RETSTYLE => :complex_retstyle,
-    LBT_BUILDFLAGS_SYMBOL_TRIMMING  => :symbol_trimming,
-)
-
-struct LBTConfig
-    loaded_libs::Vector{LBTLibraryInfo}
-    build_flags::Vector{Symbol}
-    exported_symbols::Vector{String}
-
-    function LBTConfig(config::lbt_config_t)
-        # Decode OR'ed flags into a list of names
-        build_flag_names = Symbol[]
-        for (flag, name) in LBT_BUILDFLAGS_MAP
-            if config.build_flags & flag != 0x00
-                push!(build_flag_names, name)
-            end
-        end
-
-        # Load all exported symbol names
-        exported_symbols = String[]
-        for sym_idx in 1:config.num_exported_symbols
-            str_ptr = unsafe_load(config.exported_symbols, sym_idx)
-            if str_ptr != C_NULL
-                push!(exported_symbols, unsafe_string(str_ptr))
-            else
-                @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]")
-            end
-        end
-
-        # Unpack library info structures
-        libs = LBTLibraryInfo[]
-        idx = 1
-        lib_ptr = unsafe_load(config.loaded_libs, idx)
-        while lib_ptr != C_NULL
-            push!(libs, LBTLibraryInfo(unsafe_load(lib_ptr), config.num_exported_symbols))
-
-            idx += 1
-            lib_ptr = unsafe_load(config.loaded_libs, idx)
-        end
-        return new(
-            libs,
-            build_flag_names,
-            exported_symbols,
-        )
-    end
-end
-
-Base.show(io::IO, lbt::LBTLibraryInfo) = print(io, "LBTLibraryInfo(", basename(lbt.libname), ", ", lbt.interface, ")")
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTLibraryInfo)
-    summary(io, lbt); println(io)
-    println(io, "├ Library: ", basename(lbt.libname))
-    println(io, "├ Interface: ", lbt.interface)
-    println(io, "├ Complex return style: ", lbt.complex_retstyle)
-    println(io, "├ F2C: ", lbt.f2c)
-      print(io, "└ CBLAS: ", lbt.cblas)
-end
-
-function Base.show(io::IO, lbt::LBTConfig)
-    if length(lbt.loaded_libs) <= 3
-        print(io, "LBTConfig(")
-        gen = (string("[", uppercase(string(l.interface)), "] ",
-            basename(l.libname)) for l in lbt.loaded_libs)
-        print(io, join(gen, ", "))
-        print(io, ")")
-    else
-        print(io, "LBTConfig(...)")
-    end
-end
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTConfig)
-    summary(io, lbt); println(io)
-    println(io, "Libraries: ")
-    for (i,l) in enumerate(lbt.loaded_libs)
-        char = i == length(lbt.loaded_libs) ? "└" : "├"
-        interface_str = if l.interface === :ilp64
-            "ILP64"
-        elseif l.interface === :lp64
-            " LP64"
-        else
-            "UNKWN"
-        end
-        print(io, char, " [", interface_str,"] ", basename(l.libname))
-        i !== length(lbt.loaded_libs) && println()
-    end
-end
-
-mutable struct ConfigCache
-    @atomic config::Union{Nothing,LBTConfig}
-    lock::ReentrantLock
-end
-
-# In the event that users want to call `lbt_get_config()` multiple times (e.g. for
-# runtime checks of which BLAS vendor is providing a symbol), let's cache the value
-# and clear it only when someone calls something that would cause it to change.
-const _CACHED_CONFIG = ConfigCache(nothing, ReentrantLock())
-
-function lbt_get_config()
-    config = @atomic :acquire _CACHED_CONFIG.config
-    config === nothing || return config
-    return lock(_CACHED_CONFIG.lock) do
-        local config = @atomic :monotonic _CACHED_CONFIG.config
-        config === nothing || return config
-        config_ptr = ccall((:lbt_get_config, libblastrampoline), Ptr{lbt_config_t}, ())
-        @atomic :release _CACHED_CONFIG.config = LBTConfig(unsafe_load(config_ptr))
-    end
-end
-
-function _clear_config_with(f)
-    lock(_CACHED_CONFIG.lock) do
-        @atomic :release _CACHED_CONFIG.config = nothing
-        f()
-    end
-end
-
-function lbt_get_num_threads()
-    return ccall((:lbt_get_num_threads, libblastrampoline), Int32, ())
-end
-
-function lbt_set_num_threads(nthreads)
-    return ccall((:lbt_set_num_threads, libblastrampoline), Cvoid, (Int32,), nthreads)
-end
-
-function lbt_forward(path::AbstractString; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
-    _clear_config_with() do
-        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring),
-                     path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
-    end
-end
-
-function lbt_set_default_func(addr)
-    _clear_config_with() do
-        return ccall((:lbt_set_default_func, libblastrampoline), Cvoid, (Ptr{Cvoid},), addr)
-    end
-end
-
-function lbt_get_default_func()
-    return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
-end
-
-"""
-    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
-
-Return the `LBTLibraryInfo` that represents the backing library for the given symbol
-exported from libblastrampoline.  This allows us to discover which library will service
-a particular BLAS call from Julia code.  This method returns `nothing` if either of the
-following conditions are met:
-
- * No loaded library exports the desired symbol (the default function will be called)
- * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
-
-If the given `symbol_name` is not contained within the list of exported symbols, an
-`ArgumentError` will be thrown.
-"""
-function lbt_find_backing_library(symbol_name, interface::Symbol;
-                                  config::LBTConfig = lbt_get_config())
-    if interface ∉ (:ilp64, :lp64)
-        throw(ArgumentError("Invalid interface specification: '$(interface)'"))
-    end
-    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
-    if symbol_idx === nothing
-        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
-    end
-    # Convert to zero-indexed
-    symbol_idx -= 1
-
-    forward_byte_offset = div(symbol_idx, 8)
-    forward_byte_mask = 1 << mod(symbol_idx, 8)
-    for lib in filter(l -> l.interface == interface, config.loaded_libs)
-        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
-            return lib
-        end
-    end
-
-    # No backing library was found
-    return nothing
-end
-
-
-## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
-## bizarre and complex setups to be created.  If you run into strange errors while using
-## it, the first thing you should ask yourself is whether you've set things up properly.
-function lbt_set_forward(symbol_name, addr, interface,
-                         complex_retstyle = LBT_COMPLEX_RETSTYLE_NORMAL,
-                         f2c = LBT_F2C_PLAIN; verbose::Bool = false)
-    _clear_config_with() do
-        return ccall(
-            (:lbt_set_forward, libblastrampoline),
-            Int32,
-            (Cstring, Ptr{Cvoid}, Int32, Int32, Int32, Int32),
-            string(symbol_name),
-            addr,
-            Int32(interface),
-            Int32(complex_retstyle),
-            Int32(f2c),
-            verbose ? Int32(1) : Int32(0),
-        )
-    end
-end
-function lbt_set_forward(symbol_name, addr, interface::Symbol,
-                         complex_retstyle::Symbol = :normal,
-                         f2c::Symbol = :plain; kwargs...)
-    return lbt_set_forward(symbol_name, addr,
-                           LBT_INV_INTERFACE_MAP[interface],
-                           LBT_INV_COMPLEX_RETSTYLE_MAP[complex_retstyle],
-                           LBT_INV_F2C_MAP[f2c];
-                           kwargs...)
-end
-
-function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
-    return ccall(
-        (:lbt_get_forward, libblastrampoline),
-        Ptr{Cvoid},
-        (Cstring, Int32, Int32),
-        string(symbol_name),
-        Int32(interface),
-        Int32(f2c),
-    )
-end
-function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
-    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
-end
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
deleted file mode 100644
index d3d6234961c44..0000000000000
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ /dev/null
@@ -1,224 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    LDLt <: Factorization
-
-Matrix factorization type of the `LDLt` factorization of a real [`SymTridiagonal`](@ref)
-matrix `S` such that `S = L*Diagonal(d)*L'`, where `L` is a [`UnitLowerTriangular`](@ref)
-matrix and `d` is a vector. The main use of an `LDLt` factorization `F = ldlt(S)`
-is to solve the linear system of equations `Sx = b` with `F\\b`. This is the
-return type of [`ldlt`](@ref), the corresponding matrix factorization function.
-
-The individual components of the factorization `F::LDLt` can be accessed via `getproperty`:
-
-| Component | Description                                 |
-|:---------:|:--------------------------------------------|
-| `F.L`     | `L` (unit lower triangular) part of `LDLt`  |
-| `F.D`     | `D` (diagonal) part of `LDLt`               |
-| `F.Lt`    | `Lt` (unit upper triangular) part of `LDLt` |
-| `F.d`     | diagonal values of `D` as a `Vector`        |
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> F = ldlt(S)
-LDLt{Float64, SymTridiagonal{Float64, Vector{Float64}}}
-L factor:
-3×3 UnitLowerTriangular{Float64, SymTridiagonal{Float64, Vector{Float64}}}:
- 1.0        ⋅         ⋅
- 0.333333  1.0        ⋅
- 0.0       0.545455  1.0
-D factor:
-3×3 Diagonal{Float64, Vector{Float64}}:
- 3.0   ⋅        ⋅
-  ⋅   3.66667   ⋅
-  ⋅    ⋅       3.90909
-```
-"""
-struct LDLt{T,S<:AbstractMatrix{T}} <: Factorization{T}
-    data::S
-
-    function LDLt{T,S}(data) where {T,S<:AbstractMatrix{T}}
-        require_one_based_indexing(data)
-        new{T,S}(data)
-    end
-end
-LDLt(data::AbstractMatrix{T}) where {T} = LDLt{T,typeof(data)}(data)
-LDLt{T}(data::AbstractMatrix) where {T} = LDLt(convert(AbstractMatrix{T}, data)::AbstractMatrix{T})
-
-size(S::LDLt) = size(S.data)
-size(S::LDLt, i::Integer) = size(S.data, i)
-
-LDLt{T,S}(F::LDLt{T,S}) where {T,S<:AbstractMatrix{T}} = F
-LDLt{T,S}(F::LDLt) where {T,S<:AbstractMatrix{T}} = LDLt{T,S}(convert(S, F.data)::S)
-LDLt{T}(F::LDLt{T}) where {T} = F
-LDLt{T}(F::LDLt) where {T} = LDLt(convert(AbstractMatrix{T}, F.data)::AbstractMatrix{T})
-
-Factorization{T}(F::LDLt{T}) where {T} = F
-Factorization{T}(F::LDLt) where {T} = LDLt{T}(F)
-
-function getproperty(F::LDLt{<:Any, <:SymTridiagonal}, d::Symbol)
-    Fdata = getfield(F, :data)
-    if d === :d
-        return Fdata.dv
-    elseif d === :D
-        return Diagonal(Fdata.dv)
-    elseif d === :L
-        return UnitLowerTriangular(Fdata)
-    elseif d === :Lt
-        return UnitUpperTriangular(Fdata)
-    else
-        return getfield(F, d)
-    end
-end
-
-adjoint(F::LDLt{<:Real,<:SymTridiagonal}) = F
-adjoint(F::LDLt) = LDLt(copy(adjoint(F.data)))
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LDLt)
-    summary(io, F); println(io)
-    println(io, "L factor:")
-    show(io, mime, F.L)
-    println(io, "\nD factor:")
-    show(io, mime, F.D)
-end
-
-# SymTridiagonal
-"""
-    ldlt!(S::SymTridiagonal) -> LDLt
-
-Same as [`ldlt`](@ref), but saves space by overwriting the input `S`, instead of creating a copy.
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> ldltS = ldlt!(S);
-
-julia> ldltS === S
-false
-
-julia> S
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0       0.333333   ⋅
- 0.333333  3.66667   0.545455
-  ⋅        0.545455  3.90909
-```
-"""
-function ldlt!(S::SymTridiagonal{T,V}) where {T,V}
-    n = size(S,1)
-    d = S.dv
-    e = S.ev
-    @inbounds for i in 1:n-1
-        iszero(d[i]) && throw(ZeroPivotException(i))
-        e[i] /= d[i]
-        d[i+1] -= e[i]^2*d[i]
-    end
-    return LDLt{T,SymTridiagonal{T,V}}(S)
-end
-
-"""
-    ldlt(S::SymTridiagonal) -> LDLt
-
-Compute an `LDLt` (i.e., ``LDL^T``) factorization of the real symmetric tridiagonal matrix `S` such that `S = L*Diagonal(d)*L'`
-where `L` is a unit lower triangular matrix and `d` is a vector. The main use of an `LDLt`
-factorization `F = ldlt(S)` is to solve the linear system of equations `Sx = b` with `F\\b`.
-
-See also [`bunchkaufman`](@ref) for a similar, but pivoted, factorization of arbitrary symmetric or Hermitian matrices.
-
-# Examples
-```jldoctest
-julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 3.0  1.0   ⋅
- 1.0  4.0  2.0
-  ⋅   2.0  5.0
-
-julia> ldltS = ldlt(S);
-
-julia> b = [6., 7., 8.];
-
-julia> ldltS \\ b
-3-element Vector{Float64}:
- 1.7906976744186047
- 0.627906976744186
- 1.3488372093023255
-
-julia> S \\ b
-3-element Vector{Float64}:
- 1.7906976744186047
- 0.627906976744186
- 1.3488372093023255
-```
-"""
-function ldlt(M::SymTridiagonal{T}; shift::Number=false) where T
-    S = typeof((zero(T)+shift)/one(T))
-    Mₛ = SymTridiagonal{S}(copymutable_oftype(M.dv, S), copymutable_oftype(M.ev, S))
-    if !iszero(shift)
-        Mₛ.dv .+= shift
-    end
-    return ldlt!(Mₛ)
-end
-
-factorize(S::SymTridiagonal) = ldlt(S)
-
-function ldiv!(S::LDLt{<:Any,<:SymTridiagonal}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    n, nrhs = size(B, 1), size(B, 2)
-    if size(S,1) != n
-        throw(DimensionMismatch("Matrix has dimensions $(size(S)) but right hand side has first dimension $n"))
-    end
-    d = S.data.dv
-    l = S.data.ev
-    @inbounds begin
-        for i = 2:n
-            li1 = l[i-1]
-            @simd for j = 1:nrhs
-                B[i,j] -= li1*B[i-1,j]
-            end
-        end
-        dn = d[n]
-        @simd for j = 1:nrhs
-            B[n,j] /= dn
-        end
-        for i = n-1:-1:1
-            di = d[i]
-            li = l[i]
-            @simd for j = 1:nrhs
-                B[i,j] /= di
-                B[i,j] -= li*B[i+1,j]
-            end
-        end
-    end
-    return B
-end
-
-rdiv!(B::AbstractVecOrMat, S::LDLt{<:Any,<:SymTridiagonal}) =
-    transpose(ldiv!(S, transpose(B)))
-
-function logabsdet(F::LDLt{<:Any,<:SymTridiagonal})
-    it = (F.data[i,i] for i in 1:size(F, 1))
-    return sum(log∘abs, it), prod(sign, it)
-end
-
-# Conversion methods
-function SymTridiagonal(F::LDLt{<:Any, <:SymTridiagonal})
-    e = copy(F.data.ev)
-    d = copy(F.data.dv)
-    e .*= d[1:end-1]
-    d[2:end] += e .* F.data.ev
-    SymTridiagonal(d, e)
-end
-AbstractMatrix(F::LDLt) = SymTridiagonal(F)
-AbstractArray(F::LDLt) = AbstractMatrix(F)
-Matrix(F::LDLt) = Array(AbstractArray(F))
-Array(F::LDLt) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
deleted file mode 100644
index 07d918c4374a5..0000000000000
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ /dev/null
@@ -1,203 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# LQ Factorizations
-"""
-    LQ <: Factorization
-
-Matrix factorization type of the `LQ` factorization of a matrix `A`. The `LQ`
-decomposition is the [`QR`](@ref) decomposition of `transpose(A)`. This is the return
-type of [`lq`](@ref), the corresponding matrix factorization function.
-
-If `S::LQ` is the factorization object, the lower triangular component can be
-obtained via `S.L`, and the orthogonal/unitary component via `S.Q`, such that
-`A ≈ S.L*S.Q`.
-
-Iterating the decomposition produces the components `S.L` and `S.Q`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> S = lq(A)
-LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-2×2 Matrix{Float64}:
- -8.60233   0.0
-  4.41741  -0.697486
-Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
-
-julia> S.L * S.Q
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> l, q = S; # destructuring via iteration
-
-julia> l == S.L &&  q == S.Q
-true
-```
-"""
-struct LQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
-    factors::S
-    τ::C
-
-    function LQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-LQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    LQ{T,typeof(factors),typeof(τ)}(factors, τ)
-LQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    LQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(LQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           LQ{T,S,typeof(τ)}(factors, τ), false)
-
-# iteration for destructuring into components
-Base.iterate(S::LQ) = (S.L, Val(:Q))
-Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
-Base.iterate(S::LQ, ::Val{:done}) = nothing
-
-"""
-    lq!(A) -> LQ
-
-Compute the [`LQ`](@ref) factorization of `A`, using the input
-matrix as a workspace. See also [`lq`](@ref).
-"""
-lq!(A::StridedMatrix{<:BlasFloat}) = LQ(LAPACK.gelqf!(A)...)
-
-"""
-    lq(A) -> S::LQ
-
-Compute the LQ decomposition of `A`. The decomposition's lower triangular
-component can be obtained from the [`LQ`](@ref) object `S` via `S.L`, and the
-orthogonal/unitary component via `S.Q`, such that `A ≈ S.L*S.Q`.
-
-Iterating the decomposition produces the components `S.L` and `S.Q`.
-
-The LQ decomposition is the QR decomposition of `transpose(A)`, and it is useful
-in order to compute the minimum-norm solution `lq(A) \\ b` to an underdetermined
-system of equations (`A` has more columns than rows, but has full row rank).
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> S = lq(A)
-LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-2×2 Matrix{Float64}:
- -8.60233   0.0
-  4.41741  -0.697486
-Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
-
-julia> S.L * S.Q
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> l, q = S; # destructuring via iteration
-
-julia> l == S.L &&  q == S.Q
-true
-```
-"""
-lq(A::AbstractMatrix{T}) where {T} = lq!(copy_similar(A, lq_eltype(T)))
-lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
-
-lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
-
-copy(A::LQ) = LQ(copy(A.factors), copy(A.τ))
-
-LQ{T}(A::LQ) where {T} = LQ(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::LQ) where {T} = LQ{T}(A)
-
-AbstractMatrix(A::LQ) = A.L*A.Q
-AbstractArray(A::LQ) = AbstractMatrix(A)
-Matrix(A::LQ) = Array(AbstractArray(A))
-Array(A::LQ) = Matrix(A)
-
-transpose(F::LQ{<:Real}) = F'
-transpose(::LQ) =
-    throw(ArgumentError("transpose of LQ decomposition is not supported, consider using adjoint"))
-
-Base.copy(F::AdjointFactorization{T,<:LQ{T}}) where {T} =
-    QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
-
-function getproperty(F::LQ, d::Symbol)
-    m, n = size(F)
-    if d === :L
-        return tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
-    elseif d === :Q
-        return LQPackedQ(getfield(F, :factors), getfield(F, :τ))
-    else
-        return getfield(F, d)
-    end
-end
-
-Base.propertynames(F::LQ, private::Bool=false) =
-    (:L, :Q, (private ? fieldnames(typeof(F)) : ())...)
-
-# getindex(A::LQPackedQ, i::Integer, j::Integer) =
-#     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
-    summary(io, F); println(io)
-    println(io, "L factor:")
-    show(io, mime, F.L)
-    print(io, "\nQ factor: ")
-    show(io, mime, F.Q)
-end
-
-size(F::LQ, dim::Integer) = size(getfield(F, :factors), dim)
-size(F::LQ)               = size(getfield(F, :factors))
-
-## Multiplication by LQ
-function lmul!(A::LQ, B::AbstractVecOrMat)
-    lmul!(LowerTriangular(A.L), view(lmul!(A.Q, B), 1:size(A,1), axes(B,2)))
-    return B
-end
-function *(A::LQ{TA}, B::AbstractVecOrMat{TB}) where {TA,TB}
-    TAB = promote_type(TA, TB)
-    _cut_B(lmul!(convert(Factorization{TAB}, A), copy_similar(B, TAB)), 1:size(A,1))
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns
-function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
-    require_one_based_indexing(B)
-    X = zeros(T, size(F,2), 2*size(B,2))
-    X[1:size(B,1), 1:size(B,2)] .= real.(B)
-    X[1:size(B,1), size(B,2)+1:size(X,2)] .= imag.(B)
-    ldiv!(F, X)
-    return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(X, div(length(X), 2), 2))))),
-                           isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2)))
-end
-
-
-function ldiv!(A::LQ, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(A)
-    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
-
-    ldiv!(LowerTriangular(A.L), view(B, 1:size(A,1), axes(B,2)))
-    return lmul!(adjoint(A.Q), B)
-end
-
-function ldiv!(Fadj::AdjointFactorization{<:Any,<:LQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(Fadj)
-    m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
-
-    F = parent(Fadj)
-    lmul!(F.Q, B)
-    ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
-    return B
-end
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
deleted file mode 100644
index 5d69090f27e44..0000000000000
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ /dev/null
@@ -1,758 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-####################
-# LU Factorization #
-####################
-"""
-    LU <: Factorization
-
-Matrix factorization type of the `LU` factorization of a square matrix `A`. This
-is the return type of [`lu`](@ref), the corresponding matrix factorization function.
-
-The individual components of the factorization `F::LU` can be accessed via [`getproperty`](@ref):
-
-| Component | Description                              |
-|:----------|:-----------------------------------------|
-| `F.L`     | `L` (unit lower triangular) part of `LU` |
-| `F.U`     | `U` (upper triangular) part of `LU`      |
-| `F.p`     | (right) permutation `Vector`             |
-| `F.P`     | (right) permutation `Matrix`             |
-
-Iterating the factorization produces the components `F.L`, `F.U`, and `F.p`.
-
-# Examples
-```jldoctest
-julia> A = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> F = lu(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> F.L * F.U == A[F.p, :]
-true
-
-julia> l, u, p = lu(A); # destructuring via iteration
-
-julia> l == F.L && u == F.U && p == F.p
-true
-```
-"""
-struct LU{T,S<:AbstractMatrix{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    ipiv::P
-    info::BlasInt
-
-    function LU{T,S,P}(factors, ipiv, info) where {T, S<:AbstractMatrix{T}, P<:AbstractVector{<:Integer}}
-        require_one_based_indexing(factors)
-        new{T,S,P}(factors, ipiv, info)
-    end
-end
-LU(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, info::BlasInt) where {T} =
-    LU{T,typeof(factors),typeof(ipiv)}(factors, ipiv, info)
-LU{T}(factors::AbstractMatrix, ipiv::AbstractVector{<:Integer}, info::Integer) where {T} =
-    LU(convert(AbstractMatrix{T}, factors), ipiv, BlasInt(info))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(LU{T,S}(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer},
-                   info::BlasInt) where {T,S},
-           LU{T,S,typeof(ipiv)}(factors, ipiv, info), false)
-
-# iteration for destructuring into components
-Base.iterate(S::LU) = (S.L, Val(:U))
-Base.iterate(S::LU, ::Val{:U}) = (S.U, Val(:p))
-Base.iterate(S::LU, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::LU, ::Val{:done}) = nothing
-
-# LU prefers transpose over adjoint in the real case, override the generic fallback
-adjoint(F::LU{<:Real}) = TransposeFactorization(F)
-transpose(F::LU{<:Real}) = TransposeFactorization(F)
-
-# the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting stategy
-lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
-function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat}
-    lpt = LAPACK.getrf!(A; check)
-    check && checknonsingular(lpt[3])
-    return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3])
-end
-function lu!(A::HermOrSym{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
-    copytri!(A.data, A.uplo, isa(A, Hermitian))
-    lu!(A.data, pivot; check = check)
-end
-# for backward compatibility
-# TODO: remove towards Julia v2
-@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{true}; check::Bool = true) lu!(A, RowMaximum(); check=check)
-@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{false}; check::Bool = true) lu!(A, NoPivot(); check=check)
-
-"""
-    lu!(A, pivot = RowMaximum(); check = true) -> LU
-
-`lu!` is the same as [`lu`](@ref), but saves space by overwriting the
-input `A`, instead of creating a copy. An [`InexactError`](@ref)
-exception is thrown if the factorization produces a number not representable by the
-element type of `A`, e.g. for integer types.
-
-# Examples
-```jldoctest
-julia> A = [4. 3.; 6. 3.]
-2×2 Matrix{Float64}:
- 4.0  3.0
- 6.0  3.0
-
-julia> F = lu!(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> iA = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> lu!(iA)
-ERROR: InexactError: Int64(0.6666666666666666)
-Stacktrace:
-[...]
-```
-"""
-lu!(A::AbstractMatrix, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(eltype(A)); check::Bool = true) =
-    generic_lufact!(A, pivot; check = check)
-function generic_lufact!(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
-                         check::Bool = true) where {T}
-    LAPACK.chkfinite(A)
-    # Extract values
-    m, n = size(A)
-    minmn = min(m,n)
-
-    # Initialize variables
-    info = 0
-    ipiv = Vector{BlasInt}(undef, minmn)
-    @inbounds begin
-        for k = 1:minmn
-            # find index max
-            kp = k
-            if pivot === RowMaximum() && k < m
-                amax = abs(A[k, k])
-                for i = k+1:m
-                    absi = abs(A[i,k])
-                    if absi > amax
-                        kp = i
-                        amax = absi
-                    end
-                end
-            elseif pivot === RowNonZero()
-                for i = k:m
-                    if !iszero(A[i,k])
-                        kp = i
-                        break
-                    end
-                end
-            end
-            ipiv[k] = kp
-            if !iszero(A[kp,k])
-                if k != kp
-                    # Interchange
-                    for i = 1:n
-                        tmp = A[k,i]
-                        A[k,i] = A[kp,i]
-                        A[kp,i] = tmp
-                    end
-                end
-                # Scale first column
-                Akkinv = inv(A[k,k])
-                for i = k+1:m
-                    A[i,k] *= Akkinv
-                end
-            elseif info == 0
-                info = k
-            end
-            # Update the rest
-            for j = k+1:n
-                for i = k+1:m
-                    A[i,j] -= A[i,k]*A[k,j]
-                end
-            end
-        end
-    end
-    check && checknonsingular(info, pivot)
-    return LU{T,typeof(A),typeof(ipiv)}(A, ipiv, convert(BlasInt, info))
-end
-
-function lutype(T::Type)
-    # In generic_lufact!, the elements of the lower part of the matrix are
-    # obtained using the division of two matrix elements. Hence their type can
-    # be different (e.g. the division of two types with the same unit is a type
-    # without unit).
-    # The elements of the upper part are obtained by U - U * L
-    # where U is an upper part element and L is a lower part element.
-    # Therefore, the types LT, UT should be invariant under the map:
-    # (LT, UT) -> begin
-    #     L = oneunit(UT) / oneunit(UT)
-    #     U = oneunit(UT) - oneunit(UT) * L
-    #     typeof(L), typeof(U)
-    # end
-    # The following should handle most cases
-    UT = typeof(oneunit(T) - oneunit(T) * (oneunit(T) / (oneunit(T) + zero(T))))
-    LT = typeof(oneunit(UT) / oneunit(UT))
-    S = promote_type(T, LT, UT)
-end
-
-lupivottype(::Type{T}) where {T} = RowMaximum()
-
-# for all other types we must promote to a type which is stable under division
-"""
-    lu(A, pivot = RowMaximum(); check = true) -> F::LU
-
-Compute the LU factorization of `A`.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-In most cases, if `A` is a subtype `S` of `AbstractMatrix{T}` with an element
-type `T` supporting `+`, `-`, `*` and `/`, the return type is `LU{T,S{T}}`.
-
-In general, LU factorization involves a permutation of the rows of the matrix
-(corresponding to the `F.p` output described below), known as "pivoting" (because it
-corresponds to choosing which row contains the "pivot", the diagonal entry of `F.U`).
-One of the following pivoting strategies can be selected via the optional `pivot` argument:
-
-* `RowMaximum()` (default): the standard pivoting strategy; the pivot corresponds
-  to the element of maximum absolute value among the remaining, to be factorized rows.
-  This pivoting strategy requires the element type to also support [`abs`](@ref) and
-  [`<`](@ref). (This is generally the only numerically stable option for floating-point
-  matrices.)
-* `RowNonZero()`: the pivot corresponds to the first non-zero element among the remaining,
-  to be factorized rows.  (This corresponds to the typical choice in hand calculations, and
-  is also useful for more general algebraic number types that support [`iszero`](@ref) but
-  not `abs` or `<`.)
-* `NoPivot()`: pivoting turned off (may fail if a zero entry is encountered).
-
-The individual components of the factorization `F` can be accessed via [`getproperty`](@ref):
-
-| Component | Description                         |
-|:----------|:------------------------------------|
-| `F.L`     | `L` (lower triangular) part of `LU` |
-| `F.U`     | `U` (upper triangular) part of `LU` |
-| `F.p`     | (right) permutation `Vector`        |
-| `F.P`     | (right) permutation `Matrix`        |
-
-Iterating the factorization produces the components `F.L`, `F.U`, and `F.p`.
-
-The relationship between `F` and `A` is
-
-`F.L*F.U == A[F.p, :]`
-
-`F` further supports the following functions:
-
-| Supported function               | `LU` | `LU{T,Tridiagonal{T}}` |
-|:---------------------------------|:-----|:-----------------------|
-| [`/`](@ref)                      | ✓    |                        |
-| [`\\`](@ref)                     | ✓    | ✓                      |
-| [`inv`](@ref)                    | ✓    | ✓                      |
-| [`det`](@ref)                    | ✓    | ✓                      |
-| [`logdet`](@ref)                 | ✓    | ✓                      |
-| [`logabsdet`](@ref)              | ✓    | ✓                      |
-| [`size`](@ref)                   | ✓    | ✓                      |
-
-# Examples
-```jldoctest
-julia> A = [4 3; 6 3]
-2×2 Matrix{Int64}:
- 4  3
- 6  3
-
-julia> F = lu(A)
-LU{Float64, Matrix{Float64}, Vector{Int64}}
-L factor:
-2×2 Matrix{Float64}:
- 1.0       0.0
- 0.666667  1.0
-U factor:
-2×2 Matrix{Float64}:
- 6.0  3.0
- 0.0  1.0
-
-julia> F.L * F.U == A[F.p, :]
-true
-
-julia> l, u, p = lu(A); # destructuring via iteration
-
-julia> l == F.L && u == F.U && p == F.p
-true
-```
-"""
-function lu(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
-    lu!(_lucopy(A, lutype(T)), pivot; check = check)
-end
-# TODO: remove for Julia v2.0
-@deprecate lu(A::AbstractMatrix, ::Val{true}; check::Bool = true) lu(A, RowMaximum(); check=check)
-@deprecate lu(A::AbstractMatrix, ::Val{false}; check::Bool = true) lu(A, NoPivot(); check=check)
-
-_lucopy(A::AbstractMatrix, T) = copy_similar(A, T)
-_lucopy(A::HermOrSym, T)      = copymutable_oftype(A, T)
-_lucopy(A::Tridiagonal, T)    = copymutable_oftype(A, T)
-
-lu(S::LU) = S
-function lu(x::Number; check::Bool=true)
-    info = x == 0 ? one(BlasInt) : zero(BlasInt)
-    check && checknonsingular(info)
-    return LU(fill(x, 1, 1), BlasInt[1], info)
-end
-
-function LU{T}(F::LU) where T
-    M = convert(AbstractMatrix{T}, F.factors)
-    LU{T,typeof(M),typeof(F.ipiv)}(M, F.ipiv, F.info)
-end
-LU{T,S,P}(F::LU) where {T,S,P} = LU{T,S,P}(convert(S, F.factors), convert(P, F.ipiv), F.info)
-Factorization{T}(F::LU{T}) where {T} = F
-Factorization{T}(F::LU) where {T} = LU{T}(F)
-
-copy(A::LU{T,S,P}) where {T,S,P} = LU{T,S,P}(copy(A.factors), copy(A.ipiv), A.info)
-
-size(A::LU)    = size(getfield(A, :factors))
-size(A::LU, i::Integer) = size(getfield(A, :factors), i)
-
-function ipiv2perm(v::AbstractVector{T}, maxi::Integer) where T
-    require_one_based_indexing(v)
-    p = T[1:maxi;]
-    @inbounds for i in 1:length(v)
-        p[i], p[v[i]] = p[v[i]], p[i]
-    end
-    return p
-end
-
-function getproperty(F::LU{T}, d::Symbol) where T
-    m, n = size(F)
-    if d === :L
-        L = tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
-        for i = 1:min(m,n); L[i,i] = one(T); end
-        return L
-    elseif d === :U
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :p
-        return ipiv2perm(getfield(F, :ipiv), m)
-    elseif d === :P
-        return Matrix{T}(I, m, m)[:,invperm(F.p)]
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::LU, private::Bool=false) =
-    (:L, :U, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-issuccess(F::LU) = F.info == 0
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LU)
-    if issuccess(F)
-        summary(io, F); println(io)
-        println(io, "L factor:")
-        show(io, mime, F.L)
-        println(io, "\nU factor:")
-        show(io, mime, F.U)
-    else
-        print(io, "Failed factorization of type $(typeof(F))")
-    end
-end
-
-_apply_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, length(A.ipiv) : -1 : 1, B)
-
-function _ipiv_rows!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
-    for i = order
-        if i != A.ipiv[i]
-            _swap_rows!(B, i, A.ipiv[i])
-        end
-    end
-    B
-end
-
-function _swap_rows!(B::AbstractVector, i::Integer, j::Integer)
-    B[i], B[j] = B[j], B[i]
-    B
-end
-
-function _swap_rows!(B::AbstractMatrix, i::Integer, j::Integer)
-    for col = 1 : size(B, 2)
-        B[i,col], B[j,col] = B[j,col], B[i,col]
-    end
-    B
-end
-
-_apply_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, length(A.ipiv) : -1 : 1, B)
-
-function _ipiv_cols!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
-    for i = order
-        if i != A.ipiv[i]
-            _swap_cols!(B, i, A.ipiv[i])
-        end
-    end
-    B
-end
-
-function _swap_cols!(B::AbstractVector, i::Integer, j::Integer)
-    _swap_rows!(B, i, j)
-end
-
-function _swap_cols!(B::AbstractMatrix, i::Integer, j::Integer)
-    for row = 1 : size(B, 1)
-        B[row,i], B[row,j] = B[row,j], B[row,i]
-    end
-    B
-end
-
-function rdiv!(A::AbstractVecOrMat, B::LU)
-    rdiv!(rdiv!(A, UpperTriangular(B.factors)), UnitLowerTriangular(B.factors))
-    _apply_inverse_ipiv_cols!(B, A)
-end
-
-ldiv!(A::LU{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.getrs!('N', A.factors, A.ipiv, B)
-
-function ldiv!(A::LU, B::AbstractVecOrMat)
-    _apply_ipiv_rows!(A, B)
-    ldiv!(UpperTriangular(A.factors), ldiv!(UnitLowerTriangular(A.factors), B))
-end
-
-ldiv!(transA::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (A = transA.parent; LAPACK.getrs!('T', A.factors, A.ipiv, B))
-
-function ldiv!(transA::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
-    A = transA.parent
-    ldiv!(transpose(UnitLowerTriangular(A.factors)), ldiv!(transpose(UpperTriangular(A.factors)), B))
-    _apply_inverse_ipiv_rows!(A, B)
-end
-
-ldiv!(adjA::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.getrs!('C', A.factors, A.ipiv, B))
-
-function ldiv!(adjA::AdjointFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
-    A = adjA.parent
-    ldiv!(adjoint(UnitLowerTriangular(A.factors)), ldiv!(adjoint(UpperTriangular(A.factors)), B))
-    _apply_inverse_ipiv_rows!(A, B)
-end
-
-(\)(A::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
-    LAPACK.getrs!('C', A.parent.factors, A.parent.ipiv, copy(B))
-(\)(A::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
-    LAPACK.getrs!('T', A.parent.factors, A.parent.ipiv, copy(B))
-
-function det(F::LU{T}) where T
-    n = checksquare(F)
-    issuccess(F) || return zero(T)
-    P = one(T)
-    c = 0
-    @inbounds for i = 1:n
-        P *= F.factors[i,i]
-        if F.ipiv[i] != i
-            c += 1
-        end
-    end
-    s = (isodd(c) ? -one(T) : one(T))
-    return P * s
-end
-
-function logabsdet(F::LU{T}) where T  # return log(abs(det)) and sign(det)
-    n = checksquare(F)
-    issuccess(F) || return log(zero(real(T))), log(one(T))
-    c = 0
-    P = one(T)
-    abs_det = zero(real(T))
-    @inbounds for i = 1:n
-        dg_ii = F.factors[i,i]
-        P *= sign(dg_ii)
-        if F.ipiv[i] != i
-            c += 1
-        end
-        abs_det += log(abs(dg_ii))
-    end
-    s = ifelse(isodd(c), -one(real(T)), one(real(T))) * P
-    abs_det, s
-end
-
-inv!(A::LU{<:BlasFloat,<:StridedMatrix}) =
-    LAPACK.getri!(A.factors, A.ipiv)
-inv!(A::LU{T,<:StridedMatrix}) where {T} =
-    ldiv!(A.factors, copy(A), Matrix{T}(I, size(A, 1), size(A, 1)))
-inv(A::LU{<:BlasFloat,<:StridedMatrix}) = inv!(copy(A))
-
-# Tridiagonal
-
-# See dgttrf.f
-function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T,V}
-    # Extract values
-    n = size(A, 1)
-
-    # Initialize variables
-    info = 0
-    ipiv = Vector{BlasInt}(undef, n)
-    dl = A.dl
-    d = A.d
-    du = A.du
-    if dl === du
-        throw(ArgumentError("off-diagonals of `A` must not alias"))
-    end
-    # Check if Tridiagonal matrix already has du2 for pivoting
-    has_du2_defined = isdefined(A, :du2) && length(A.du2) == max(0, n-2)
-    if has_du2_defined
-        du2 = A.du2::V
-    else
-        du2 = similar(d, max(0, n-2))::V
-    end
-    fill!(du2, 0)
-
-    @inbounds begin
-        for i = 1:n
-            ipiv[i] = i
-        end
-        for i = 1:n-2
-            # pivot or not?
-            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
-                # No interchange
-                if d[i] != 0
-                    fact = dl[i]/d[i]
-                    dl[i] = fact
-                    d[i+1] -= fact*du[i]
-                    du2[i] = 0
-                end
-            else
-                # Interchange
-                fact = d[i]/dl[i]
-                d[i] = dl[i]
-                dl[i] = fact
-                tmp = du[i]
-                du[i] = d[i+1]
-                d[i+1] = tmp - fact*d[i+1]
-                du2[i] = du[i+1]
-                du[i+1] = -fact*du[i+1]
-                ipiv[i] = i+1
-            end
-        end
-        if n > 1
-            i = n-1
-            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
-                if d[i] != 0
-                    fact = dl[i]/d[i]
-                    dl[i] = fact
-                    d[i+1] -= fact*du[i]
-                end
-            else
-                fact = d[i]/dl[i]
-                d[i] = dl[i]
-                dl[i] = fact
-                tmp = du[i]
-                du[i] = d[i+1]
-                d[i+1] = tmp - fact*d[i+1]
-                ipiv[i] = i+1
-            end
-        end
-        # check for a zero on the diagonal of U
-        for i = 1:n
-            if d[i] == 0
-                info = i
-                break
-            end
-        end
-    end
-    B = has_du2_defined ? A : Tridiagonal{T,V}(dl, d, du, du2)
-    check && checknonsingular(info, pivot)
-    return LU{T,Tridiagonal{T,V},typeof(ipiv)}(B, ipiv, convert(BlasInt, info))
-end
-
-factorize(A::Tridiagonal) = lu(A)
-
-function getproperty(F::LU{T,Tridiagonal{T,V}}, d::Symbol) where {T,V}
-    m, n = size(F)
-    if d === :L
-        dl = getfield(getfield(F, :factors), :dl)
-        L = Array(Bidiagonal(fill!(similar(dl, n), one(T)), dl, d))
-        for i = 2:n
-            tmp = L[getfield(F, :ipiv)[i], 1:i - 1]
-            L[getfield(F, :ipiv)[i], 1:i - 1] = L[i, 1:i - 1]
-            L[i, 1:i - 1] = tmp
-        end
-        return L
-    elseif d === :U
-        U = Array(Bidiagonal(getfield(getfield(F, :factors), :d), getfield(getfield(F, :factors), :du), d))
-        for i = 1:n - 2
-            U[i,i + 2] = getfield(getfield(F, :factors), :du2)[i]
-        end
-        return U
-    elseif d === :p
-        return ipiv2perm(getfield(F, :ipiv), m)
-    elseif d === :P
-        return Matrix{T}(I, m, m)[:,invperm(F.p)]
-    end
-    return getfield(F, d)
-end
-
-# See dgtts2.f
-function ldiv!(A::LU{T,Tridiagonal{T,V}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            for i = 1:n-1
-                ip = ipiv[i]
-                tmp = B[i+1-ip+i,j] - dl[i]*B[ip,j]
-                B[i,j] = B[ip,j]
-                B[i+1,j] = tmp
-            end
-            B[n,j] /= d[n]
-            if n > 1
-                B[n-1,j] = (B[n-1,j] - du[n-1]*B[n,j])/d[n-1]
-            end
-            for i = n-2:-1:1
-                B[i,j] = (B[i,j] - du[i]*B[i+1,j] - du2[i]*B[i+2,j])/d[i]
-            end
-        end
-    end
-    return B
-end
-
-function ldiv!(transA::TransposeFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    A = transA.parent
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            B[1,j] /= d[1]
-            if n > 1
-                B[2,j] = (B[2,j] - du[1]*B[1,j])/d[2]
-            end
-            for i = 3:n
-                B[i,j] = (B[i,j] - du[i-1]*B[i-1,j] - du2[i-2]*B[i-2,j])/d[i]
-            end
-            for i = n-1:-1:1
-                if ipiv[i] == i
-                    B[i,j] = B[i,j] - dl[i]*B[i+1,j]
-                else
-                    tmp = B[i+1,j]
-                    B[i+1,j] = B[i,j] - dl[i]*tmp
-                    B[i,j] = tmp
-                end
-            end
-        end
-    end
-    return B
-end
-
-# Ac_ldiv_B!(A::LU{T,Tridiagonal{T}}, B::AbstractVecOrMat) where {T<:Real} = At_ldiv_B!(A,B)
-function ldiv!(adjA::AdjointFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
-    require_one_based_indexing(B)
-    A = adjA.parent
-    n = size(A,1)
-    if n != size(B,1)
-        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
-    end
-    nrhs = size(B,2)
-    dl = A.factors.dl
-    d = A.factors.d
-    du = A.factors.du
-    du2 = A.factors.du2
-    ipiv = A.ipiv
-    @inbounds begin
-        for j = 1:nrhs
-            B[1,j] /= conj(d[1])
-            if n > 1
-                B[2,j] = (B[2,j] - conj(du[1])*B[1,j])/conj(d[2])
-            end
-            for i = 3:n
-                B[i,j] = (B[i,j] - conj(du[i-1])*B[i-1,j] - conj(du2[i-2])*B[i-2,j])/conj(d[i])
-            end
-            for i = n-1:-1:1
-                if ipiv[i] == i
-                    B[i,j] = B[i,j] - conj(dl[i])*B[i+1,j]
-                else
-                    tmp = B[i+1,j]
-                    B[i+1,j] = B[i,j] - conj(dl[i])*tmp
-                    B[i,j] = tmp
-                end
-            end
-        end
-    end
-    return B
-end
-
-rdiv!(B::AbstractMatrix, A::LU) = transpose(ldiv!(transpose(A), transpose(B)))
-rdiv!(B::AbstractMatrix, A::TransposeFactorization{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B)))
-rdiv!(B::AbstractMatrix, A::AdjointFactorization{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B)))
-
-# Conversions
-AbstractMatrix(F::LU) = (F.L * F.U)[invperm(F.p),:]
-AbstractArray(F::LU) = AbstractMatrix(F)
-Matrix(F::LU) = Array(AbstractArray(F))
-Array(F::LU) = Matrix(F)
-
-function Tridiagonal(F::LU{T,Tridiagonal{T,V}}) where {T,V}
-    n = size(F, 1)
-
-    dl  = copy(F.factors.dl)
-    d   = copy(F.factors.d)
-    du  = copy(F.factors.du)
-    du2 = copy(F.factors.du2)
-
-    for i = n - 1:-1:1
-        li         = dl[i]
-        dl[i]      = li*d[i]
-        d[i + 1]  += li*du[i]
-        if i < n - 1
-            du[i + 1] += li*du2[i]
-        end
-
-        if F.ipiv[i] != i
-            tmp   = dl[i]
-            dl[i] = d[i]
-            d[i]  = tmp
-
-            tmp      = d[i + 1]
-            d[i + 1] = du[i]
-            du[i]    = tmp
-
-            if i < n - 1
-                tmp       = du[i + 1]
-                du[i + 1] = du2[i]
-                du2[i]    = tmp
-            end
-        end
-    end
-    return Tridiagonal(dl, d, du)
-end
-AbstractMatrix(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Tridiagonal(F)
-AbstractArray(F::LU{T,Tridiagonal{T,V}}) where {T,V} = AbstractMatrix(F)
-Matrix(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Array(AbstractArray(F))
-Array(F::LU{T,Tridiagonal{T,V}}) where {T,V} = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
deleted file mode 100644
index e375108f6a831..0000000000000
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ /dev/null
@@ -1,1259 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# matmul.jl: Everything to do with dense matrix multiplication
-
-# Matrix-matrix multiplication
-
-AdjOrTransStridedMat{T} = Union{Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
-StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
-StridedMaybeAdjOrTransVecOrMat{T} = Union{StridedVecOrMat{T}, AdjOrTrans{<:Any, <:StridedVecOrMat{T}}}
-
-_parent(A) = A
-_parent(A::Adjoint) = parent(A)
-_parent(A::Transpose) = parent(A)
-
-matprod(x, y) = x*y + x*y
-
-# dot products
-
-dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasReal} = BLAS.dot(x, y)
-dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasComplex} = BLAS.dotc(x, y)
-
-function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasReal,TI<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(BoundsError(y, ry))
-    end
-    GC.@preserve x y BLAS.dot(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
-end
-
-function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasComplex,TI<:Integer}
-    if length(rx) != length(ry)
-        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
-    end
-    if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(BoundsError(x, rx))
-    end
-    if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(BoundsError(y, ry))
-    end
-    GC.@preserve x y BLAS.dotc(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
-end
-
-function *(transx::Transpose{<:Any,<:StridedVector{T}}, y::StridedVector{T}) where {T<:BlasComplex}
-    x = transx.parent
-    return BLAS.dotu(x, y)
-end
-
-# Matrix-vector multiplication
-function (*)(A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
-    TS = promote_op(matprod, T, S)
-    y = isconcretetype(TS) ? convert(AbstractVector{TS}, x) : x
-    mul!(similar(x, TS, size(A,1)), A, y)
-end
-function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, axes(A,1)), A, x)
-end
-
-# these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-(*)(a::AbstractVector, tB::TransposeAbsMat) = reshape(a, length(a), 1) * tB
-(*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
-
-@inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
-                alpha::Number, beta::Number) =
-    generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta))
-# BLAS cases
-# equal eltypes
-@inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-# Real (possibly transposed) matrix times complex vector.
-# Multiply the matrix with the real and imaginary parts separately
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-# Complex matrix times real vector.
-# Reinterpret the matrix as a real matrix and do real matvec computation.
-# works only in cooperation with BLAS when A is untransposed (tA == 'N')
-# but that check is included in gemv! anyway
-@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
-    gemv!(y, tA, A, x, _add.alpha, _add.beta)
-
-# Vector-Matrix multiplication
-(*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
-(*)(x::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A)*transpose(x))
-
-# Matrix-matrix multiplication
-"""
-    *(A::AbstractMatrix, B::AbstractMatrix)
-
-Matrix multiplication.
-
-# Examples
-```jldoctest
-julia> [1 1; 0 1] * [1 0; 1 1]
-2×2 Matrix{Int64}:
- 2  1
- 1  1
-```
-"""
-function (*)(A::AbstractMatrix, B::AbstractMatrix)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))), A, B)
-end
-# optimization for dispatching to BLAS, e.g. *(::Matrix{Float32}, ::Matrix{Float64})
-# but avoiding the case *(::Matrix{<:BlasComplex}, ::Matrix{<:BlasReal})
-# which is better handled by reinterpreting rather than promotion
-function (*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
-end
-function (*)(A::StridedMaybeAdjOrTransMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasComplex})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
-end
-
-# Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
-# first matrix as a real matrix and carry out real matrix matrix multiply
-function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         convert(AbstractArray{TS}, A),
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
-end
-function (*)(A::AdjOrTransStridedMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
-    TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         copymutable_oftype(A, TS), # remove AdjOrTrans to use reinterpret trick below
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
-end
-# the following case doesn't seem to benefit from the translation A*B = (B' * A')'
-function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasComplex})
-    temp = real(B)
-    R = A * temp
-    temp .= imag.(B)
-    I = A * temp
-    Complex.(R, I)
-end
-(*)(A::AdjOrTransStridedMat{<:BlasReal}, B::StridedMatrix{<:BlasComplex}) = copy(transpose(transpose(B) * parent(A)))
-(*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::AdjOrTransStridedMat{<:BlasComplex}) = copy(wrapperop(B)(parent(B) * transpose(A)))
-
-"""
-    muladd(A, y, z)
-
-Combined multiply-add, `A*y .+ z`, for matrix-matrix or matrix-vector multiplication.
-The result is always the same size as `A*y`, but `z` may be smaller, or a scalar.
-
-!!! compat "Julia 1.6"
-     These methods require Julia 1.6 or later.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; z=[0, 100];
-
-julia> muladd(A, B, z)
-2×2 Matrix{Float64}:
-   3.0    3.0
- 107.0  107.0
-```
-"""
-function Base.muladd(A::AbstractMatrix, y::AbstractVecOrMat, z::Union{Number, AbstractArray})
-    Ay = A * y
-    for d in 1:ndims(Ay)
-        # Same error as Ay .+= z would give, to match StridedMatrix method:
-        size(z,d) > size(Ay,d) && throw(DimensionMismatch("array could not be broadcast to match destination"))
-    end
-    for d in ndims(Ay)+1:ndims(z)
-        # Similar error to what Ay + z would give, to match (Any,Any,Any) method:
-        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
-            axes(z), ", must have singleton at dim ", d)))
-    end
-    Ay .+ z
-end
-
-function Base.muladd(u::AbstractVector, v::AdjOrTransAbsVec, z::Union{Number, AbstractArray})
-    if size(z,1) > length(u) || size(z,2) > length(v)
-        # Same error as (u*v) .+= z:
-        throw(DimensionMismatch("array could not be broadcast to match destination"))
-    end
-    for d in 3:ndims(z)
-        # Similar error to (u*v) + z:
-        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
-            axes(z), ", must have singleton at dim ", d)))
-    end
-    (u .* v) .+ z
-end
-
-Base.muladd(x::AdjointAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
-    muladd(A', x', z')'
-Base.muladd(x::TransposeAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
-    transpose(muladd(transpose(A), transpose(x), transpose(z)))
-
-function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, y::AbstractVector{<:Number}, z::Union{Number, AbstractVector})
-    T = promote_type(eltype(A), eltype(y), eltype(z))
-    C = similar(A, T, axes(A,1))
-    C .= z
-    mul!(C, A, y, true, true)
-end
-
-function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, B::StridedMaybeAdjOrTransMat{<:Number}, z::Union{Number, AbstractVecOrMat})
-    T = promote_type(eltype(A), eltype(B), eltype(z))
-    C = similar(A, T, axes(A,1), axes(B,2))
-    C .= z
-    mul!(C, A, B, true, true)
-end
-
-"""
-    mul!(Y, A, B) -> Y
-
-Calculates the matrix-matrix or matrix-vector product ``AB`` and stores the result in `Y`,
-overwriting the existing value of `Y`. Note that `Y` must not be aliased with either `A` or
-`B`.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; Y = similar(B); mul!(Y, A, B);
-
-julia> Y
-2×2 Matrix{Float64}:
- 3.0  3.0
- 7.0  7.0
-```
-
-# Implementation
-For custom matrix and vector types, it is recommended to implement
-5-argument `mul!` rather than implementing 3-argument `mul!` directly
-if possible.
-"""
-@inline function mul!(C, A, B)
-    return mul!(C, A, B, true, false)
-end
-
-"""
-    mul!(C, A, B, α, β) -> C
-
-Combined inplace matrix-matrix or matrix-vector multiply-add ``A B α + C β``.
-The result is stored in `C` by overwriting it.  Note that `C` must not be
-aliased with either `A` or `B`.
-
-!!! compat "Julia 1.3"
-    Five-argument `mul!` requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; C=[1.0 2.0; 3.0 4.0];
-
-julia> mul!(C, A, B, 100.0, 10.0) === C
-true
-
-julia> C
-2×2 Matrix{Float64}:
- 310.0  320.0
- 730.0  740.0
-```
-"""
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
-    generic_matmatmul!(
-        C,
-        wrapper_char(A),
-        wrapper_char(B),
-        _unwrap(A),
-        _unwrap(B),
-        MulAddMul(α, β)
-    )
-
-"""
-    rmul!(A, B)
-
-Calculate the matrix-matrix product ``AB``, overwriting `A`, and return the result.
-Here, `B` must be of special matrix type, like, e.g., [`Diagonal`](@ref),
-[`UpperTriangular`](@ref) or [`LowerTriangular`](@ref), or of some orthogonal type,
-see [`QR`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [0 1; 1 0];
-
-julia> B = UpperTriangular([1 2; 0 3]);
-
-julia> rmul!(A, B);
-
-julia> A
-2×2 Matrix{Int64}:
- 0  3
- 1  2
-
-julia> A = [1.0 2.0; 3.0 4.0];
-
-julia> F = qr([0 1; -1 0]);
-
-julia> rmul!(A, F.Q)
-2×2 Matrix{Float64}:
- 2.0  1.0
- 4.0  3.0
-```
-"""
-rmul!(A, B)
-
-"""
-    lmul!(A, B)
-
-Calculate the matrix-matrix product ``AB``, overwriting `B`, and return the result.
-Here, `A` must be of special matrix type, like, e.g., [`Diagonal`](@ref),
-[`UpperTriangular`](@ref) or [`LowerTriangular`](@ref), or of some orthogonal type,
-see [`QR`](@ref).
-
-# Examples
-```jldoctest
-julia> B = [0 1; 1 0];
-
-julia> A = UpperTriangular([1 2; 0 3]);
-
-julia> lmul!(A, B);
-
-julia> B
-2×2 Matrix{Int64}:
- 2  1
- 3  0
-
-julia> B = [1.0 2.0; 3.0 4.0];
-
-julia> F = qr([0 1; -1 0]);
-
-julia> lmul!(F.Q, B)
-2×2 Matrix{Float64}:
- 3.0  4.0
- 1.0  2.0
-```
-"""
-lmul!(A, B)
-
-# THE one big BLAS dispatch
-@inline function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                                    _add::MulAddMul=MulAddMul()) where {T<:BlasFloat}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        if tA == 'T' && tB == 'N' && A === B
-            return syrk_wrapper!(C, 'T', A, _add)
-        elseif tA == 'N' && tB == 'T' && A === B
-            return syrk_wrapper!(C, 'N', A, _add)
-        elseif tA == 'C' && tB == 'N' && A === B
-            return herk_wrapper!(C, 'C', A, _add)
-        elseif tA == 'N' && tB == 'C' && A === B
-            return herk_wrapper!(C, 'N', A, _add)
-        else
-            return gemm_wrapper!(C, tA, tB, A, B, _add)
-        end
-    end
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        if (tA == 'S' || tA == 's') && tB == 'N'
-            return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'S' || tB == 's') && tA == 'N'
-            return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C)
-        elseif (tA == 'H' || tA == 'h') && tB == 'N'
-            return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C)
-        elseif (tB == 'H' || tB == 'h') && tA == 'N'
-            return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C)
-        end
-    end
-    return _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-end
-
-# Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
-@inline function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                    _add::MulAddMul=MulAddMul()) where {T<:BlasReal}
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B, _add)
-    else
-        _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-    end
-end
-
-
-# Supporting functions for matrix multiplication
-
-# copy transposed(adjoint) of upper(lower) side-diagonals. Optionally include diagonal.
-@inline function copytri!(A::AbstractMatrix, uplo::AbstractChar, conjugate::Bool=false, diag::Bool=false)
-    n = checksquare(A)
-    off = diag ? 0 : 1
-    if uplo == 'U'
-        for i = 1:n, j = (i+off):n
-            A[j,i] = conjugate ? adjoint(A[i,j]) : transpose(A[i,j])
-        end
-    elseif uplo == 'L'
-        for i = 1:n, j = (i+off):n
-            A[i,j] = conjugate ? adjoint(A[j,i]) : transpose(A[j,i])
-        end
-    else
-        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    A
-end
-
-function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{T},
-               α::Number=true, β::Number=false) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) && # We only check input's stride here.
-        if tA in ('N', 'T', 'C')
-            return BLAS.gemv!(tA, alpha, A, x, beta, y)
-        elseif tA in ('S', 's')
-            return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y)
-        elseif tA in ('H', 'h')
-            return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y)
-        end
-    end
-    if tA in ('S', 's', 'H', 'h')
-        # re-wrap again and use plain ('N') matvec mul algorithm,
-        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
-    else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
-    end
-end
-
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-    α::Number = true, β::Number = false) where {T<:BlasReal}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
-        !iszero(stride(x, 1))
-        BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
-        return y
-    else
-        Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
-        return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β))
-    end
-end
-
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-    α::Number = true, β::Number = false) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    nA != length(x) &&
-        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
-    mA != length(y) &&
-        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
-    mA == 0 && return y
-    nA == 0 && return _rmul_or_fill!(y, β)
-    alpha, beta = promote(α, β, zero(T))
-    @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
-        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) && tA in ('N', 'T', 'C')
-        xfl = reinterpret(reshape, T, x) # Use reshape here.
-        yfl = reinterpret(reshape, T, y)
-        BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
-        BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
-        return y
-    elseif tA in ('S', 's', 'H', 'h')
-        # re-wrap again and use plain ('N') matvec mul algorithm,
-        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
-        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
-    else
-        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
-    end
-end
-
-function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
-        _add = MulAddMul()) where {T<:BlasFloat}
-    nC = checksquare(C)
-    if tA == 'T'
-        (nA, mA) = size(A,1), size(A,2)
-        tAt = 'N'
-    else
-        (mA, nA) = size(A,1), size(A,2)
-        tAt = 'T'
-    end
-    if nC != mA
-        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
-    end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
-
-    # BLAS.syrk! only updates symmetric C
-    # alternatively, make non-zero β a show-stopper for BLAS.syrk!
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-        if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
-            return copytri!(BLAS.syrk!('U', tA, alpha, A, beta, C), 'U')
-        end
-    end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
-end
-
-function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
-        _add = MulAddMul()) where {T<:BlasReal}
-    nC = checksquare(C)
-    if tA == 'C'
-        (nA, mA) = size(A,1), size(A,2)
-        tAt = 'N'
-    else
-        (mA, nA) = size(A,1), size(A,2)
-        tAt = 'C'
-    end
-    if nC != mA
-        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
-    end
-    if mA == 0 || nA == 0 || iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, _add)
-    end
-    if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, _add)
-    end
-
-    # Result array does not need to be initialized as long as beta==0
-    #    C = Matrix{T}(undef, mA, mA)
-
-    if iszero(_add.beta) || issymmetric(C)
-        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-        if (alpha isa Union{Bool,T} &&
-            beta isa Union{Bool,T} &&
-            stride(A, 1) == stride(C, 1) == 1 &&
-            stride(A, 2) >= size(A, 1) &&
-            stride(C, 2) >= size(C, 1))
-            return copytri!(BLAS.herk!('U', tA, alpha, A, beta, C), 'U', true)
-        end
-    end
-    return gemm_wrapper!(C, tA, tAt, A, A, _add)
-end
-
-function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
-                      A::StridedVecOrMat{T},
-                      B::StridedVecOrMat{T}) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    C = similar(B, T, mA, nB)
-    if all(in(('N', 'T', 'C')), (tA, tB))
-        gemm_wrapper!(C, tA, tB, A, B)
-    else
-        _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add)
-    end
-end
-
-function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
-                       A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasFloat}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-
-    if nA != mB
-        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
-    end
-
-    if C === A || B === C
-        throw(ArgumentError("output matrix must not be aliased with input matrix"))
-    end
-
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-    if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1))
-        return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
-    end
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
-                       A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                       _add = MulAddMul()) where {T<:BlasReal}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-
-    if nA != mB
-        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
-    end
-
-    if C === A || B === C
-        throw(ArgumentError("output matrix must not be aliased with input matrix"))
-    end
-
-    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
-        if size(C) != (mA, nB)
-            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
-        end
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    if mA == 2 && nA == 2 && nB == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == 3 && nA == 3 && nB == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-
-    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
-
-    # Make-sure reinterpret-based optimization is BLAS-compatible.
-    if (alpha isa Union{Bool,T} &&
-        beta isa Union{Bool,T} &&
-        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
-        stride(A, 2) >= size(A, 1) &&
-        stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1) && tA == 'N')
-        BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
-        return C
-    end
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-# blas.jl defines matmul for floats; other integer and mixed precision
-# cases are handled here
-
-lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1))
-
-function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
-        copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
-    else
-        LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
-    end
-    B
-end
-
-function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
-    if tM == 'N'
-        LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
-    else
-        copyto!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
-    end
-    B
-end
-
-# TODO: It will be faster for large matrices to convert to float,
-# call BLAS, and convert back to required type.
-
-# NOTE: the generic version is also called as fallback for
-#       strides != 1 cases
-
-@inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
-                                    _add::MulAddMul = MulAddMul())
-    Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
-    return _generic_matvecmul!(C, ta, Anew, B, _add)
-end
-
-function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
-                            _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    @assert tA in ('N', 'T', 'C')
-    mB = length(B)
-    mA, nA = lapack_size(tA, A)
-    if mB != nA
-        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), vector B has length $mB"))
-    end
-    if mA != length(C)
-        throw(DimensionMismatch(lazy"result C has length $(length(C)), needs length $mA"))
-    end
-
-    Astride = size(A, 1)
-
-    @inbounds begin
-    if tA == 'T'  # fastest case
-        if nA == 0
-            for k = 1:mA
-                _modify!(_add, false, C, k)
-            end
-        else
-            for k = 1:mA
-                aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
-                for i = 1:nA
-                    s += transpose(A[aoffs+i]) * B[i]
-                end
-                _modify!(_add, s, C, k)
-            end
-        end
-    elseif tA == 'C'
-        if nA == 0
-            for k = 1:mA
-                _modify!(_add, false, C, k)
-            end
-        else
-            for k = 1:mA
-                aoffs = (k-1)*Astride
-                s = zero(A[aoffs + 1]*B[1] + A[aoffs + 1]*B[1])
-                for i = 1:nA
-                    s += A[aoffs + i]'B[i]
-                end
-                _modify!(_add, s, C, k)
-            end
-        end
-    else # tA == 'N'
-        for i = 1:mA
-            if !iszero(_add.beta)
-                C[i] *= _add.beta
-            elseif mB == 0
-                C[i] = false
-            else
-                C[i] = zero(A[i]*B[1] + A[i]*B[1])
-            end
-        end
-        for k = 1:mB
-            aoffs = (k-1)*Astride
-            b = _add(B[k])
-            for i = 1:mA
-                C[i] += A[aoffs + i] * b
-            end
-        end
-    end
-    end # @inbounds
-    C
-end
-
-function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S}) where {T,S}
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    C = similar(B, promote_op(matprod, T, S), mA, nB)
-    generic_matmatmul!(C, tA, tB, A, B)
-end
-
-const tilebufsize = 10800  # Approximately 32k/3
-
-function generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul)
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    mC, nC = size(C)
-
-    if iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == nA == mB == nB == mC == nC == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == nA == mB == nB == mC == nC == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-    A, tA = tA in ('H', 'h', 'S', 's') ? (wrap(A, tA), 'N') : (A, tA)
-    B, tB = tB in ('H', 'h', 'S', 's') ? (wrap(B, tB), 'N') : (B, tB)
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S},
-                             _add::MulAddMul) where {T,S,R}
-    @assert tA in ('N', 'T', 'C') && tB in ('N', 'T', 'C')
-    require_one_based_indexing(C, A, B)
-
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    if mB != nA
-        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)"))
-    end
-    if size(C,1) != mA || size(C,2) != nB
-        throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs ($mA,$nB)"))
-    end
-
-    if iszero(_add.alpha) || isempty(A) || isempty(B)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    tile_size = 0
-    if isbitstype(R) && isbitstype(T) && isbitstype(S) && (tA == 'N' || tB != 'N')
-        tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T), 1)))
-    end
-    @inbounds begin
-    if tile_size > 0
-        sz = (tile_size, tile_size)
-        Atile = Array{T}(undef, sz)
-        Btile = Array{S}(undef, sz)
-
-        z1 = zero(A[1, 1]*B[1, 1] + A[1, 1]*B[1, 1])
-        z = convert(promote_type(typeof(z1), R), z1)
-
-        if mA < tile_size && nA < tile_size && nB < tile_size
-            copy_transpose!(Atile, 1:nA, 1:mA, tA, A, 1:mA, 1:nA)
-            copyto!(Btile, 1:mB, 1:nB, tB, B, 1:mB, 1:nB)
-            for j = 1:nB
-                boff = (j-1)*tile_size
-                for i = 1:mA
-                    aoff = (i-1)*tile_size
-                    s = z
-                    for k = 1:nA
-                        s += Atile[aoff+k] * Btile[boff+k]
-                    end
-                    _modify!(_add, s, C, (i,j))
-                end
-            end
-        else
-            Ctile = Array{R}(undef, sz)
-            for jb = 1:tile_size:nB
-                jlim = min(jb+tile_size-1,nB)
-                jlen = jlim-jb+1
-                for ib = 1:tile_size:mA
-                    ilim = min(ib+tile_size-1,mA)
-                    ilen = ilim-ib+1
-                    fill!(Ctile, z)
-                    for kb = 1:tile_size:nA
-                        klim = min(kb+tile_size-1,mB)
-                        klen = klim-kb+1
-                        copy_transpose!(Atile, 1:klen, 1:ilen, tA, A, ib:ilim, kb:klim)
-                        copyto!(Btile, 1:klen, 1:jlen, tB, B, kb:klim, jb:jlim)
-                        for j=1:jlen
-                            bcoff = (j-1)*tile_size
-                            for i = 1:ilen
-                                aoff = (i-1)*tile_size
-                                s = z
-                                for k = 1:klen
-                                    s += Atile[aoff+k] * Btile[bcoff+k]
-                                end
-                                Ctile[bcoff+i] += s
-                            end
-                        end
-                    end
-                    if isone(_add.alpha) && iszero(_add.beta)
-                        copyto!(C, ib:ilim, jb:jlim, Ctile, 1:ilen, 1:jlen)
-                    else
-                        C[ib:ilim, jb:jlim] .= @views _add.(Ctile[1:ilen, 1:jlen], C[ib:ilim, jb:jlim])
-                    end
-                end
-            end
-        end
-    else
-        # Multiplication for non-plain-data uses the naive algorithm
-        if tA == 'N'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*transpose(B[j, 1]) + A[i, 1]*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k] * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[j, 1]' + A[i, 1]*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        elseif tA == 'T'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[1, j] + transpose(A[1, i])*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*transpose(B[j, 1]) + transpose(A[1, i])*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[j, 1]' + transpose(A[1, i])*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * adjoint(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        else
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[1, j] + A[1, i]'*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*transpose(B[j, 1]) + A[1, i]'*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += adjoint(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[j, 1]' + A[1, i]'*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        end
-    end
-    end # @inbounds
-    C
-end
-
-
-# multiply 2x2 matrices
-function matmul2x2(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
-    matmul2x2!(similar(B, promote_op(matprod, T, S), 2, 2), tA, tB, A, B)
-end
-
-function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (2,2))
-        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
-    end
-    @inbounds begin
-    if tA == 'N'
-        A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2]
-    elseif tA == 'T'
-        # TODO making these lazy could improve perf
-        A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1]))
-        A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2]))
-    elseif tA == 'C'
-        # TODO making these lazy could improve perf
-        A11 = copy(A[1,1]'); A12 = copy(A[2,1]')
-        A21 = copy(A[1,2]'); A22 = copy(A[2,2]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L)
-    end
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2];
-        B21 = B[2,1]; B22 = B[2,2]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L)
-    end
-    _modify!(_add, A11*B11 + A12*B21, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22, C, (1,2))
-    _modify!(_add, A21*B11 + A22*B21, C, (2,1))
-    _modify!(_add, A21*B12 + A22*B22, C, (2,2))
-    end # inbounds
-    C
-end
-
-# Multiply 3x3 matrices
-function matmul3x3(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S}
-    matmul3x3!(similar(B, promote_op(matprod, T, S), 3, 3), tA, tB, A, B)
-end
-
-function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                    _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C, A, B)
-    if !(size(A) == size(B) == size(C) == (3,3))
-        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
-    end
-    @inbounds begin
-    if tA == 'N'
-        A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3]
-        A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3]
-        A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3]
-    elseif tA == 'T'
-        # TODO making these lazy could improve perf
-        A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
-        A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2]))
-        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = copy(transpose(A[3,3]))
-    elseif tA == 'C'
-        # TODO making these lazy could improve perf
-        A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]')
-        A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]')
-        A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]')
-    elseif tA == 'S'
-        A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U)
-    elseif tA == 's'
-        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
-        A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L)
-    elseif tA == 'H'
-        A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
-        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3]
-        A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U)
-    else # if tA == 'h'
-        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1]))
-        A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2]))
-        A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L)
-    end
-
-    if tB == 'N'
-        B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3]
-        B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3]
-        B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3]
-    elseif tB == 'T'
-        # TODO making these lazy could improve perf
-        B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2]))
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = copy(transpose(B[3,3]))
-    elseif tB == 'C'
-        # TODO making these lazy could improve perf
-        B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]')
-        B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]')
-        B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]')
-    elseif tB == 'S'
-        B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U)
-    elseif tB == 's'
-        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
-        B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L)
-    elseif tB == 'H'
-        B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
-        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3]
-        B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U)
-    else # if tB == 'h'
-        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1]))
-        B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2]))
-        B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L)
-    end
-
-    _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1))
-    _modify!(_add, A11*B12 + A12*B22 + A13*B32, C, (1,2))
-    _modify!(_add, A11*B13 + A12*B23 + A13*B33, C, (1,3))
-
-    _modify!(_add, A21*B11 + A22*B21 + A23*B31, C, (2,1))
-    _modify!(_add, A21*B12 + A22*B22 + A23*B32, C, (2,2))
-    _modify!(_add, A21*B13 + A22*B23 + A23*B33, C, (2,3))
-
-    _modify!(_add, A31*B11 + A32*B21 + A33*B31, C, (3,1))
-    _modify!(_add, A31*B12 + A32*B22 + A33*B32, C, (3,2))
-    _modify!(_add, A31*B13 + A32*B23 + A33*B33, C, (3,3))
-    end # inbounds
-    C
-end
-
-const RealOrComplex = Union{Real,Complex}
-
-# Three-argument *
-"""
-    *(A, B::AbstractMatrix, C)
-    A * B * C * D
-
-Chained multiplication of 3 or 4 matrices is done in the most efficient sequence,
-based on the sizes of the arrays. That is, the number of scalar multiplications needed
-for `(A * B) * C` (with 3 dense matrices) is compared to that for `A * (B * C)`
-to choose which of these to execute.
-
-If the last factor is a vector, or the first a transposed vector, then it is efficient
-to deal with these first. In particular `x' * B * y` means `(x' * B) * y`
-for an ordinary column-major `B::Matrix`. Unlike `dot(x, B, y)`, this
-allocates an intermediate array.
-
-If the first or last factor is a number, this will be fused with the matrix
-multiplication, using 5-arg [`mul!`](@ref).
-
-See also [`muladd`](@ref), [`dot`](@ref).
-
-!!! compat "Julia 1.7"
-    These optimisations require at least Julia 1.7.
-"""
-*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector) = A * (B*x)
-
-*(tu::AdjOrTransAbsVec, B::AbstractMatrix, v::AbstractVector) = (tu*B) * v
-*(tu::AdjOrTransAbsVec, B::AdjOrTransAbsMat, v::AbstractVector) = tu * (B*v)
-
-*(A::AbstractMatrix, x::AbstractVector, γ::Number) = mat_vec_scalar(A,x,γ)
-*(A::AbstractMatrix, B::AbstractMatrix, γ::Number) = mat_mat_scalar(A,B,γ)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractVector{<:RealOrComplex}) =
-    mat_vec_scalar(B,C,α)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}) =
-    mat_mat_scalar(B,C,α)
-
-*(α::Number, u::AbstractVector, tv::AdjOrTransAbsVec) = broadcast(*, α, u, tv)
-*(u::AbstractVector, tv::AdjOrTransAbsVec, γ::Number) = broadcast(*, u, tv, γ)
-*(u::AbstractVector, tv::AdjOrTransAbsVec, C::AbstractMatrix) = u * (tv*C)
-
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix) = _tri_matmul(A,B,C)
-*(tv::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix) = (tv*B) * C
-
-function _tri_matmul(A,B,C,δ=nothing)
-    n,m = size(A)
-    # m,k == size(B)
-    k,l = size(C)
-    costAB_C = n*m*k + n*k*l  # multiplications, allocations n*k + n*l
-    costA_BC = m*k*l + n*m*l  #                              m*l + n*l
-    if costA_BC < costAB_C
-        isnothing(δ) ? A * (B*C) : A * mat_mat_scalar(B,C,δ)
-    else
-        isnothing(δ) ? (A*B) * C : mat_mat_scalar(A*B, C, δ)
-    end
-end
-
-# Fast path for two arrays * one scalar is opt-in, via mat_vec_scalar and mat_mat_scalar.
-
-mat_vec_scalar(A, x, γ) = A * (x * γ)  # fallback
-mat_vec_scalar(A::StridedMaybeAdjOrTransMat, x::StridedVector, γ) = _mat_vec_scalar(A, x, γ)
-mat_vec_scalar(A::AdjOrTransAbsVec, x::StridedVector, γ) = (A * x) * γ
-
-function _mat_vec_scalar(A, x, γ)
-    T = promote_type(eltype(A), eltype(x), typeof(γ))
-    C = similar(A, T, axes(A,1))
-    mul!(C, A, x, γ, false)
-end
-
-mat_mat_scalar(A, B, γ) = (A*B) * γ # fallback
-mat_mat_scalar(A::StridedMaybeAdjOrTransMat, B::StridedMaybeAdjOrTransMat, γ) =
-    _mat_mat_scalar(A, B, γ)
-
-function _mat_mat_scalar(A, B, γ)
-    T = promote_type(eltype(A), eltype(B), typeof(γ))
-    C = similar(A, T, axes(A,1), axes(B,2))
-    mul!(C, A, B, γ, false)
-end
-
-mat_mat_scalar(A::AdjointAbsVec, B, γ) = (γ' * (A * B)')' # preserving order, adjoint reverses
-mat_mat_scalar(A::AdjointAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
-    mat_vec_scalar(B', A', γ')'
-
-mat_mat_scalar(A::TransposeAbsVec, B, γ) = transpose(γ * transpose(A * B))
-mat_mat_scalar(A::TransposeAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
-    transpose(mat_vec_scalar(transpose(B), transpose(A), γ))
-
-
-# Four-argument *, by type
-*(α::Number, β::Number, C::AbstractMatrix, x::AbstractVector) = (α*β) * C * x
-*(α::Number, β::Number, C::AbstractMatrix, D::AbstractMatrix) = (α*β) * C * D
-*(α::Number, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = α * B * (C*x)
-*(α::Number, vt::AdjOrTransAbsVec, C::AbstractMatrix, x::AbstractVector) = α * (vt*C*x)
-*(α::RealOrComplex, vt::AdjOrTransAbsVec{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
-    (α*vt*C) * D # solves an ambiguity
-
-*(A::AbstractMatrix, x::AbstractVector, γ::Number, δ::Number) = A * x * (γ*δ)
-*(A::AbstractMatrix, B::AbstractMatrix, γ::Number, δ::Number) = A * B * (γ*δ)
-*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector, δ::Number, ) = A * (B*x*δ)
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, x::AbstractVector, δ::Number) = (vt*B*x) * δ
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = (vt*B) * C * δ
-
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = A * B * (C*x)
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) = (vt*B) * C * D
-*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = vt * B * (C*x)
-
-# Four-argument *, by size
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = _tri_matmul(A,B,C,δ)
-*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
-    _tri_matmul(B,C,D,α)
-*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) =
-    _quad_matmul(A,B,C,D)
-
-function _quad_matmul(A,B,C,D)
-    c1 = _mul_cost((A,B),(C,D))
-    c2 = _mul_cost(((A,B),C),D)
-    c3 = _mul_cost(A,(B,(C,D)))
-    c4 = _mul_cost((A,(B,C)),D)
-    c5 = _mul_cost(A,((B,C),D))
-    cmin = min(c1,c2,c3,c4,c5)
-    if c1 == cmin
-        (A*B) * (C*D)
-    elseif c2 == cmin
-        ((A*B) * C) * D
-    elseif c3 == cmin
-        A * (B * (C*D))
-    elseif c4 == cmin
-        (A * (B*C)) * D
-    else
-        A * ((B*C) * D)
-    end
-end
-@inline _mul_cost(A::AbstractMatrix) = 0
-@inline _mul_cost((A,B)::Tuple) = _mul_cost(A,B)
-@inline _mul_cost(A,B) = _mul_cost(A) + _mul_cost(B) + *(_mul_sizes(A)..., last(_mul_sizes(B)))
-@inline _mul_sizes(A::AbstractMatrix) = size(A)
-@inline _mul_sizes((A,B)::Tuple) = first(_mul_sizes(A)), last(_mul_sizes(B))
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
deleted file mode 100644
index fe40fec78e801..0000000000000
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ /dev/null
@@ -1,754 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# QR Factorization
-"""
-    QR <: Factorization
-
-A QR matrix factorization stored in a packed format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A = Q R
-```
-
-where ``Q`` is an orthogonal/unitary matrix and ``R`` is upper triangular.
-The matrix ``Q`` is stored as a sequence of Householder reflectors ``v_i``
-and coefficients ``\\tau_i`` where:
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T).
-```
-
-Iterating the decomposition produces the components `Q` and `R`.
-
-The object has two fields:
-
-* `factors` is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format where
-    ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
-
-* `τ` is a vector  of length `min(m,n)` containing the coefficients ``\tau_i``.
-"""
-struct QR{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
-    factors::S
-    τ::C
-
-    function QR{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-QR(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QR{T,typeof(factors),typeof(τ)}(factors, τ)
-QR{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QR(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QR{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QR{T,S,typeof(τ)}(factors, τ), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QR) = (S.Q, Val(:R))
-Base.iterate(S::QR, ::Val{:R}) = (S.R, Val(:done))
-Base.iterate(S::QR, ::Val{:done}) = nothing
-
-# Note. For QRCompactWY factorization without pivoting, the WY representation based method introduced in LAPACK 3.4
-"""
-    QRCompactWY <: Factorization
-
-A QR matrix factorization stored in a compact blocked format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A = Q R
-```
-
-where ``Q`` is an orthogonal/unitary matrix and ``R`` is upper triangular. It is similar
-to the [`QR`](@ref) format except that the orthogonal/unitary matrix ``Q`` is stored in
-*Compact WY* format [^Schreiber1989].  For the block size ``n_b``, it is stored as
-a `m`×`n` lower trapezoidal matrix ``V`` and a matrix ``T = (T_1 \\; T_2 \\; ... \\;
-T_{b-1} \\; T_b')`` composed of ``b = \\lceil \\min(m,n) / n_b \\rceil`` upper triangular
-matrices ``T_j`` of size ``n_b``×``n_b`` (``j = 1, ..., b-1``) and an upper trapezoidal
-``n_b``×``\\min(m,n) - (b-1) n_b`` matrix ``T_b'`` (``j=b``) whose upper square part
-denoted with ``T_b`` satisfying
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T)
-= \\prod_{j=1}^{b} (I - V_j T_j V_j^T)
-```
-
-such that ``v_i`` is the ``i``th column of ``V``, ``\\tau_i`` is the ``i``th element
-of `[diag(T_1); diag(T_2); …; diag(T_b)]`, and ``(V_1 \\; V_2 \\; ... \\; V_b)``
-is the left `m`×`min(m, n)` block of ``V``.  When constructed using [`qr`](@ref),
-the block size is given by ``n_b = \\min(m, n, 36)``.
-
-Iterating the decomposition produces the components `Q` and `R`.
-
-The object has two fields:
-
-* `factors`, as in the [`QR`](@ref) type, is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format such
-    that `V = I + tril(F.factors, -1)`.
-
-* `T` is a ``n_b``-by-``\\min(m,n)`` matrix as described above. The subdiagonal elements
-  for each triangular matrix ``T_j`` are ignored.
-
-!!! note
-
-    This format should not to be confused with the older *WY* representation
-    [^Bischof1987].
-
-
-[^Bischof1987]: C Bischof and C Van Loan, "The WY representation for products of Householder matrices", SIAM J Sci Stat Comput 8 (1987), s2-s13. [doi:10.1137/0908009](https://doi.org/10.1137/0908009)
-
-[^Schreiber1989]: R Schreiber and C Van Loan, "A storage-efficient WY representation for products of Householder transformations", SIAM J Sci Stat Comput 10 (1989), 53-57. [doi:10.1137/0910005](https://doi.org/10.1137/0910005)
-"""
-struct QRCompactWY{S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}} <: Factorization{S}
-    factors::M
-    T::C
-
-    function QRCompactWY{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWY(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWY{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWY{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWY(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWY{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWY{S,M,typeof(T)}(factors, T), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QRCompactWY) = (S.Q, Val(:R))
-Base.iterate(S::QRCompactWY, ::Val{:R}) = (S.R, Val(:done))
-Base.iterate(S::QRCompactWY, ::Val{:done}) = nothing
-
-# returns upper triangular views of all non-undef values of `qr(A).T`:
-#
-# julia> sparse(qr(A).T .== qr(A).T)
-# 36×100 SparseMatrixCSC{Bool, Int64} with 1767 stored entries:
-# ⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠀⠂⠛⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿
-# ⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⢀⠐⠙⢿⣿⣿⣿⣿
-# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⢀⢙⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠁⠀⡀⠀⠙⢿⣿⣿
-# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⠀⠄⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⡀⠀⠀⢀⠀⠀⠙⢿
-# ⠀⡀⠀⠀⠀⠀⠀⠀⠂⠒⠒⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⡀⠀⠀
-# ⠀⠀⠀⠀⠀⠀⠀⠀⣈⡀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠂⠀⢀⠀
-#
-function _triuppers_qr(T)
-    blocksize, cols = size(T)
-    return Iterators.map(0:div(cols - 1, blocksize)) do i
-        n = min(blocksize, cols - i * blocksize)
-        return UpperTriangular(view(T, 1:n, (1:n) .+ i * blocksize))
-    end
-end
-
-function Base.hash(F::QRCompactWY, h::UInt)
-    return hash(F.factors, foldr(hash, _triuppers_qr(F.T); init=hash(QRCompactWY, h)))
-end
-function Base.:(==)(A::QRCompactWY, B::QRCompactWY)
-    return A.factors == B.factors && all(splat(==), zip(_triuppers_qr.((A.T, B.T))...))
-end
-function Base.isequal(A::QRCompactWY, B::QRCompactWY)
-    return isequal(A.factors, B.factors) && all(zip(_triuppers_qr.((A.T, B.T))...)) do (a, b)
-        isequal(a, b)::Bool
-    end
-end
-
-"""
-    QRPivoted <: Factorization
-
-A QR matrix factorization with column pivoting in a packed format, typically obtained from
-[`qr`](@ref). If ``A`` is an `m`×`n` matrix, then
-
-```math
-A P = Q R
-```
-
-where ``P`` is a permutation matrix, ``Q`` is an orthogonal/unitary matrix and ``R`` is
-upper triangular. The matrix ``Q`` is stored as a sequence of Householder reflectors:
-
-```math
-Q = \\prod_{i=1}^{\\min(m,n)} (I - \\tau_i v_i v_i^T).
-```
-
-Iterating the decomposition produces the components `Q`, `R`, and `p`.
-
-The object has three fields:
-
-* `factors` is an `m`×`n` matrix.
-
-  - The upper triangular part contains the elements of ``R``, that is `R =
-    triu(F.factors)` for a `QR` object `F`.
-
-  - The subdiagonal part contains the reflectors ``v_i`` stored in a packed format where
-    ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
-
-* `τ` is a vector of length `min(m,n)` containing the coefficients ``\tau_i``.
-
-* `jpvt` is an integer vector of length `n` corresponding to the permutation ``P``.
-"""
-struct QRPivoted{T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
-    factors::S
-    τ::C
-    jpvt::P
-
-    function QRPivoted{T,S,C,P}(factors, τ, jpvt) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}}
-        require_one_based_indexing(factors, τ, jpvt)
-        new{T,S,C,P}(factors, τ, jpvt)
-    end
-end
-QRPivoted(factors::AbstractMatrix{T}, τ::AbstractVector{T},
-          jpvt::AbstractVector{<:Integer}) where {T} =
-    QRPivoted{T,typeof(factors),typeof(τ),typeof(jpvt)}(factors, τ, jpvt)
-QRPivoted{T}(factors::AbstractMatrix, τ::AbstractVector,
-             jpvt::AbstractVector{<:Integer}) where {T} =
-    QRPivoted(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ), jpvt)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPivoted{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T},
-                          jpvt::AbstractVector{<:Integer}) where {T,S},
-           QRPivoted{T,S,typeof(τ),typeof(jpvt)}(factors, τ, jpvt), false)
-
-# iteration for destructuring into components
-Base.iterate(S::QRPivoted) = (S.Q, Val(:R))
-Base.iterate(S::QRPivoted, ::Val{:R}) = (S.R, Val(:p))
-Base.iterate(S::QRPivoted, ::Val{:p}) = (S.p, Val(:done))
-Base.iterate(S::QRPivoted, ::Val{:done}) = nothing
-
-function qrfactUnblocked!(A::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(A)
-    m, n = size(A)
-    τ = zeros(T, min(m,n))
-    for k = 1:min(m - 1 + !(T<:Real), n)
-        x = view(A, k:m, k)
-        τk = reflector!(x)
-        τ[k] = τk
-        reflectorApply!(x, τk, view(A, k:m, k + 1:n))
-    end
-    QR(A, τ)
-end
-
-# Find index for columns with largest two norm
-function indmaxcolumn(A::AbstractMatrix)
-    mm = norm(view(A, :, 1))
-    ii = 1
-    for i = 2:size(A, 2)
-        mi = norm(view(A, :, i))
-        if abs(mi) > mm
-            mm = mi
-            ii = i
-        end
-    end
-    return ii
-end
-
-function qrfactPivotedUnblocked!(A::AbstractMatrix)
-    m, n = size(A)
-    piv = Vector(UnitRange{BlasInt}(1,n))
-    τ = Vector{eltype(A)}(undef, min(m,n))
-    for j = 1:min(m,n)
-
-        # Find column with maximum norm in trailing submatrix
-        jm = indmaxcolumn(view(A, j:m, j:n)) + j - 1
-
-        if jm != j
-            # Flip elements in pivoting vector
-            tmpp = piv[jm]
-            piv[jm] = piv[j]
-            piv[j] = tmpp
-
-            # Update matrix with
-            for i = 1:m
-                tmp = A[i,jm]
-                A[i,jm] = A[i,j]
-                A[i,j] = tmp
-            end
-        end
-
-        # Compute reflector of columns j
-        x = view(A, j:m, j)
-        τj = reflector!(x)
-        τ[j] = τj
-
-        # Update trailing submatrix with reflector
-        reflectorApply!(x, τj, view(A, j:m, j+1:n))
-    end
-    return QRPivoted{eltype(A), typeof(A), typeof(τ), typeof(piv)}(A, τ, piv)
-end
-
-# LAPACK version
-qr!(A::StridedMatrix{<:BlasFloat}, ::NoPivot; blocksize=36) =
-    QRCompactWY(LAPACK.geqrt!(A, min(min(size(A)...), blocksize))...)
-qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...)
-
-# Generic fallbacks
-
-"""
-    qr!(A, pivot = NoPivot(); blocksize)
-
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`AbstractMatrix`](@ref),
-but saves space by overwriting the input `A`, instead of creating a copy.
-An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
-representable by the element type of `A`, e.g. for integer types.
-
-!!! compat "Julia 1.4"
-    The `blocksize` keyword argument requires Julia 1.4 or later.
-
-# Examples
-```jldoctest
-julia> a = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> qr!(a)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor: 2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-R factor:
-2×2 Matrix{Float64}:
- -3.16228  -4.42719
-  0.0      -0.632456
-
-julia> a = [1 2; 3 4]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> qr!(a)
-ERROR: InexactError: Int64(3.1622776601683795)
-Stacktrace:
-[...]
-```
-"""
-qr!(A::AbstractMatrix, ::NoPivot) = qrfactUnblocked!(A)
-qr!(A::AbstractMatrix, ::ColumnNorm) = qrfactPivotedUnblocked!(A)
-qr!(A::AbstractMatrix) = qr!(A, NoPivot())
-# TODO: Remove in Julia v2.0
-@deprecate qr!(A::AbstractMatrix, ::Val{true})  qr!(A, ColumnNorm())
-@deprecate qr!(A::AbstractMatrix, ::Val{false}) qr!(A, NoPivot())
-
-_qreltype(::Type{T}) where T = typeof(zero(T)/sqrt(abs2(one(T))))
-
-"""
-    qr(A, pivot = NoPivot(); blocksize) -> F
-
-Compute the QR factorization of the matrix `A`: an orthogonal (or unitary if `A` is
-complex-valued) matrix `Q`, and an upper triangular matrix `R` such that
-
-```math
-A = Q R
-```
-
-The returned object `F` stores the factorization in a packed format:
-
- - if `pivot == ColumnNorm()` then `F` is a [`QRPivoted`](@ref) object,
-
- - otherwise if the element type of `A` is a BLAS type ([`Float32`](@ref), [`Float64`](@ref),
-   `ComplexF32` or `ComplexF64`), then `F` is a [`QRCompactWY`](@ref) object,
-
- - otherwise `F` is a [`QR`](@ref) object.
-
-The individual components of the decomposition `F` can be retrieved via property accessors:
-
- - `F.Q`: the orthogonal/unitary matrix `Q`
- - `F.R`: the upper triangular matrix `R`
- - `F.p`: the permutation vector of the pivot ([`QRPivoted`](@ref) only)
- - `F.P`: the permutation matrix of the pivot ([`QRPivoted`](@ref) only)
-
-Iterating the decomposition produces the components `Q`, `R`, and if extant `p`.
-
-The following functions are available for the `QR` objects: [`inv`](@ref), [`size`](@ref),
-and [`\\`](@ref). When `A` is rectangular, `\\` will return a least squares
-solution and if the solution is not unique, the one with smallest norm is returned. When
-`A` is not full rank, factorization with (column) pivoting is required to obtain a minimum
-norm solution.
-
-Multiplication with respect to either full/square or non-full/square `Q` is allowed, i.e. both `F.Q*F.R`
-and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix with
-[`Matrix`](@ref). This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then
-`Matrix(F.Q)` yields an `m`×`n` matrix with orthonormal columns.  To retrieve the "full" Q factor, an
-`m`×`m` orthogonal matrix, use `F.Q*I` or `collect(F.Q)`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m`
-orthogonal matrix.
-
-The block size for QR decomposition can be specified by keyword argument
-`blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
-It is ignored when `blocksize > minimum(size(A))`. See [`QRCompactWY`](@ref).
-
-!!! compat "Julia 1.4"
-    The `blocksize` keyword argument requires Julia 1.4 or later.
-
-# Examples
-```jldoctest
-julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0]
-3×2 Matrix{Float64}:
- 3.0  -6.0
- 4.0  -8.0
- 0.0   1.0
-
-julia> F = qr(A)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor: 3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
-R factor:
-2×2 Matrix{Float64}:
- -5.0  10.0
-  0.0  -1.0
-
-julia> F.Q * F.R == A
-true
-```
-
-!!! note
-    `qr` returns multiple types because LAPACK uses several representations
-    that minimize the memory storage requirements of products of Householder
-    elementary reflectors, so that the `Q` and `R` matrices can be stored
-    compactly rather as two separate dense matrices.
-"""
-function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
-    require_one_based_indexing(A)
-    AA = copy_similar(A, _qreltype(T))
-    return qr!(AA, arg...; kwargs...)
-end
-# TODO: remove in Julia v2.0
-@deprecate qr(A::AbstractMatrix, ::Val{false}; kwargs...) qr(A, NoPivot(); kwargs...)
-@deprecate qr(A::AbstractMatrix, ::Val{true}; kwargs...)  qr(A, ColumnNorm(); kwargs...)
-
-qr(x::Number) = qr(fill(x,1,1))
-function qr(v::AbstractVector)
-    require_one_based_indexing(v)
-    qr(reshape(v, (length(v), 1)))
-end
-
-# Conversions
-QR{T}(A::QR) where {T} = QR(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::QR{T}) where {T} = A
-Factorization{T}(A::QR) where {T} = QR{T}(A)
-QRCompactWY{T}(A::QRCompactWY) where {T} = QRCompactWY(convert(AbstractMatrix{T}, A.factors), convert(AbstractMatrix{T}, A.T))
-Factorization{T}(A::QRCompactWY{T}) where {T} = A
-Factorization{T}(A::QRCompactWY) where {T} = QRCompactWY{T}(A)
-AbstractMatrix(F::Union{QR,QRCompactWY}) = F.Q * F.R
-AbstractArray(F::Union{QR,QRCompactWY}) = AbstractMatrix(F)
-Matrix(F::Union{QR,QRCompactWY}) = Array(AbstractArray(F))
-Array(F::Union{QR,QRCompactWY}) = Matrix(F)
-QRPivoted{T}(A::QRPivoted) where {T} = QRPivoted(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ), A.jpvt)
-Factorization{T}(A::QRPivoted{T}) where {T} = A
-Factorization{T}(A::QRPivoted) where {T} = QRPivoted{T}(A)
-AbstractMatrix(F::QRPivoted) = (F.Q * F.R)[:,invperm(F.p)]
-AbstractArray(F::QRPivoted) = AbstractMatrix(F)
-Matrix(F::QRPivoted) = Array(AbstractArray(F))
-Array(F::QRPivoted) = Matrix(F)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{QR, QRCompactWY, QRPivoted})
-    summary(io, F); println(io)
-    print(io, "Q factor: ")
-    show(io, mime, F.Q)
-    println(io, "\nR factor:")
-    show(io, mime, F.R)
-    if F isa QRPivoted
-        println(io, "\npermutation:")
-        show(io, mime, F.p)
-    end
-end
-
-function getproperty(F::QR, d::Symbol)
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRPackedQ(getfield(F, :factors), F.τ)
-    else
-        getfield(F, d)
-    end
-end
-function getproperty(F::QRCompactWY, d::Symbol)
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRCompactWYQ(getfield(F, :factors), F.T)
-    else
-        getfield(F, d)
-    end
-end
-Base.propertynames(F::Union{QR,QRCompactWY}, private::Bool=false) =
-    (:R, :Q, (private ? fieldnames(typeof(F)) : ())...)
-
-function getproperty(F::QRPivoted{T}, d::Symbol) where T
-    m, n = size(F)
-    if d === :R
-        return triu!(getfield(F, :factors)[1:min(m,n), 1:n])
-    elseif d === :Q
-        return QRPackedQ(getfield(F, :factors), F.τ)
-    elseif d === :p
-        return getfield(F, :jpvt)
-    elseif d === :P
-        p = F.p
-        n = length(p)
-        P = zeros(T, n, n)
-        for i in 1:n
-            P[p[i],i] = one(T)
-        end
-        return P
-    else
-        getfield(F, d)
-    end
-end
-Base.propertynames(F::QRPivoted, private::Bool=false) =
-    (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
-
-transpose(F::Union{QR{<:Real},QRPivoted{<:Real},QRCompactWY{<:Real}}) = F'
-transpose(::Union{QR,QRPivoted,QRCompactWY}) =
-    throw(ArgumentError("transpose of QR decomposition is not supported, consider using adjoint"))
-
-size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
-size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
-
-
-function ldiv!(A::QRCompactWY{T}, b::AbstractVector{T}) where {T}
-    require_one_based_indexing(b)
-    m, n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
-    return b
-end
-function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(B)
-    m, n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
-    return B
-end
-
-# Julia implementation similar to xgelsy
-function ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}, rcond::Real) where {T<:BlasFloat}
-    require_one_based_indexing(B)
-    m, n = size(A)
-
-    if m > size(B, 1) || n > size(B, 1)
-        throw(DimensionMismatch("B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
-    end
-
-    if length(A.factors) == 0 || length(B) == 0
-        return B, 0
-    end
-
-    @inbounds begin
-        smin = smax = abs(A.factors[1])
-
-        if smax == 0
-            return fill!(B, 0), 0
-        end
-
-        mn = min(m, n)
-
-        # allocate temporary work space
-        tmp  = Vector{T}(undef, 2mn)
-        wmin = view(tmp, 1:mn)
-        wmax = view(tmp, mn+1:2mn)
-
-        rnk = 1
-        wmin[1] = 1
-        wmax[1] = 1
-
-        while rnk < mn
-            i = rnk + 1
-
-            smin, s1, c1 = LAPACK.laic1!(2, view(wmin, 1:rnk), smin, view(A.factors, 1:rnk, i), A.factors[i,i])
-            smax, s2, c2 = LAPACK.laic1!(1, view(wmax, 1:rnk), smax, view(A.factors, 1:rnk, i), A.factors[i,i])
-
-            if smax*rcond > smin
-                break
-            end
-
-            for j in 1:rnk
-                wmin[j] *= s1
-                wmax[j] *= s2
-            end
-            wmin[i] = c1
-            wmax[i] = c2
-
-            rnk += 1
-        end
-
-        if rnk < n
-            C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
-            work = vec(C)
-        else
-            C, τ = A.factors, A.τ
-            work = resize!(tmp, n)
-        end
-
-        lmul!(adjoint(A.Q), view(B, 1:m, :))
-        ldiv!(UpperTriangular(view(C, 1:rnk, 1:rnk)), view(B, 1:rnk, :))
-
-        if rnk < n
-            B[rnk+1:n,:] .= zero(T)
-            LAPACK.ormrz!('L', T <: Complex ? 'C' : 'T', C, τ, view(B, 1:n, :))
-        end
-
-        for j in axes(B, 2)
-            for i in 1:n
-                work[A.p[i]] = B[i,j]
-            end
-            for i in 1:n
-                B[i,j] = work[i]
-            end
-        end
-    end
-
-    return B, rnk
-end
-
-ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractVector{T}) where {T<:BlasFloat} =
-    vec(ldiv!(A, reshape(B, length(B), 1)))
-ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
-    ldiv!(A, B, min(size(A)...)*eps(real(T)))[1]
-
-function _wide_qr_ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
-    m, n = size(A)
-    minmn = min(m,n)
-    mB, nB = size(B)
-    lmul!(adjoint(A.Q), view(B, 1:m, :))
-    R = A.R # makes a copy, used as a buffer below
-    @inbounds begin
-        if n > m # minimum norm solution
-            τ = zeros(T,m)
-            for k = m:-1:1 # Trapezoid to triangular by elementary operation
-                x = view(R, k, [k; m + 1:n])
-                τk = reflector!(x)
-                τ[k] = conj(τk)
-                for i = 1:k - 1
-                    vRi = R[i,k]
-                    for j = m + 1:n
-                        vRi += R[i,j]*x[j - m + 1]'
-                    end
-                    vRi *= τk
-                    R[i,k] -= vRi
-                    for j = m + 1:n
-                        R[i,j] -= vRi*x[j - m + 1]
-                    end
-                end
-            end
-        end
-        ldiv!(UpperTriangular(view(R, :, 1:minmn)), view(B, 1:minmn, :))
-        if n > m # Apply elementary transformation to solution
-            B[m + 1:mB,1:nB] .= zero(T)
-            for j = 1:nB
-                for k = 1:m
-                    vBj = B[k,j]'
-                    for i = m + 1:n
-                        vBj += B[i,j]'*R[k,i]'
-                    end
-                    vBj *= τ[k]
-                    B[k,j] -= vBj'
-                    for i = m + 1:n
-                        B[i,j] -= R[k,i]'*vBj'
-                    end
-                end
-            end
-        end
-    end
-    return B
-end
-
-
-function ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
-    m, n = size(A)
-    m < n && return _wide_qr_ldiv!(A, B)
-
-    lmul!(adjoint(A.Q), view(B, 1:m, :))
-    R = A.factors
-    ldiv!(UpperTriangular(view(R,1:n,:)), view(B, 1:n, :))
-    return B
-end
-function ldiv!(A::QR, B::AbstractVector)
-    ldiv!(A, reshape(B, length(B), 1))
-    return B
-end
-
-function ldiv!(A::QRPivoted, b::AbstractVector)
-    ldiv!(QR(A.factors,A.τ), b)
-    b[1:size(A.factors, 2)] = view(b, 1:size(A.factors, 2))[invperm(A.jpvt)]
-    b
-end
-function ldiv!(A::QRPivoted, B::AbstractMatrix)
-    ldiv!(QR(A.factors, A.τ), B)
-    B[1:size(A.factors, 2),:] = view(B, 1:size(A.factors, 2), :)[invperm(A.jpvt),:]
-    B
-end
-
-function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
-    # Apply permutation but only to the top part of the solution vector since
-    # it's padded with zeros for underdetermined problems
-    B[1:length(F.p), :] = B[F.p, :]
-    return B
-end
-_apply_permutation!(::Factorization, B::AbstractVecOrMat) = B
-
-function ldiv!(Fadj::AdjointFactorization{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    m, n = size(Fadj)
-
-    # We don't allow solutions overdetermined systems
-    if m > n
-        throw(DimensionMismatch("overdetermined systems are not supported"))
-    end
-    if n != size(B, 1)
-        throw(DimensionMismatch("inputs should have the same number of rows"))
-    end
-    F = parent(Fadj)
-
-    B = _apply_permutation!(F, B)
-
-    # For underdetermined system, the triangular solve should only be applied to the top
-    # part of B that contains the rhs. For square problems, the view corresponds to B itself
-    ldiv!(LowerTriangular(adjoint(F.R)), view(B, 1:size(F.R, 2), :))
-    lmul!(F.Q, B)
-
-    return B
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret the complex
-# rhs as a real rhs with twice the number of columns.
-
-# convenience methods to compute the return size correctly for vectors and matrices
-_ret_size(A::Factorization, b::AbstractVector) = (max(size(A, 2), length(b)),)
-_ret_size(A::Factorization, B::AbstractMatrix) = (max(size(A, 2), size(B, 1)), size(B, 2))
-
-function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{T}}) where T<:BlasReal
-    require_one_based_indexing(BIn)
-    m, n = size(A)
-    m == size(BIn, 1) || throw(DimensionMismatch("left hand side has $m rows, but right hand side has $(size(BIn,1)) rows"))
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    B = reshape(copy(transpose(reinterpret(T, reshape(BIn, (1, length(BIn)))))), size(BIn, 1), 2*size(BIn, 2))
-
-    X = _zeros(T, B, n)
-    X[1:size(B, 1), :] = B
-
-    ldiv!(A, X)
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    XX = reshape(collect(reinterpret(Complex{T}, copy(transpose(reshape(X, div(length(X), 2), 2))))), _ret_size(A, BIn))
-    return _cut_B(XX, 1:n)
-end
-
-##TODO:  Add methods for rank(A::QRP{T}) and adjust the (\) method accordingly
-##       Add rcond methods for Cholesky, LU, QR and QRP types
-## Lower priority: Add LQ, QL and RQ factorizations
-
-# FIXME! Should add balancing option through xgebal
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
deleted file mode 100644
index 7257544ff872e..0000000000000
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ /dev/null
@@ -1,449 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Schur decomposition
-"""
-    Schur <: Factorization
-
-Matrix factorization type of the Schur factorization of a matrix `A`. This is the
-return type of [`schur(_)`](@ref), the corresponding matrix factorization function.
-
-If `F::Schur` is the factorization object, the (quasi) triangular Schur factor can
-be obtained via either `F.Schur` or `F.T` and the orthogonal/unitary Schur vectors
-via `F.vectors` or `F.Z` such that `A = F.vectors * F.Schur * F.vectors'`. The
-eigenvalues of `A` can be obtained with `F.values`.
-
-Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> F.vectors * F.Schur * F.vectors'
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> t, z, vals = F; # destructuring via iteration
-
-julia> t == F.T && z == F.Z && vals == F.values
-true
-```
-"""
-struct Schur{Ty,S<:AbstractMatrix,C<:AbstractVector} <: Factorization{Ty}
-    T::S
-    Z::S
-    values::C
-    Schur{Ty,S,C}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
-                  values::AbstractVector) where {Ty,S,C} = new(T, Z, values)
-end
-Schur(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}, values::AbstractVector) where {Ty} =
-    Schur{Ty, typeof(T), typeof(values)}(T, Z, values)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(Schur{Ty,S}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
-                       values::AbstractVector) where {Ty,S},
-           Schur{Ty,S,typeof(values)}(T, Z, values))
-
-# iteration for destructuring into components
-Base.iterate(S::Schur) = (S.T, Val(:Z))
-Base.iterate(S::Schur, ::Val{:Z}) = (S.Z, Val(:values))
-Base.iterate(S::Schur, ::Val{:values}) = (S.values, Val(:done))
-Base.iterate(S::Schur, ::Val{:done}) = nothing
-
-"""
-    schur!(A) -> F::Schur
-
-Same as [`schur`](@ref) but uses the input argument `A` as workspace.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur!(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> A
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-```
-"""
-schur!(A::StridedMatrix{<:BlasFloat}) = Schur(LinearAlgebra.LAPACK.gees!('V', A)...)
-
-schur!(A::UpperHessenberg{T}) where {T<:BlasFloat} = Schur(LinearAlgebra.LAPACK.hseqr!(parent(A))...)
-
-"""
-    schur(A) -> F::Schur
-
-Computes the Schur factorization of the matrix `A`. The (quasi) triangular Schur factor can
-be obtained from the `Schur` object `F` with either `F.Schur` or `F.T` and the
-orthogonal/unitary Schur vectors can be obtained with `F.vectors` or `F.Z` such that
-`A = F.vectors * F.Schur * F.vectors'`. The eigenvalues of `A` can be obtained with `F.values`.
-
-For real `A`, the Schur factorization is "quasitriangular", which means that it
-is upper-triangular except with 2×2 diagonal blocks for any conjugate pair
-of complex eigenvalues; this allows the factorization to be purely real even
-when there are complex eigenvalues.  To obtain the (complex) purely upper-triangular
-Schur factorization from a real quasitriangular factorization, you can use
-`Schur{Complex}(schur(A))`.
-
-Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
-
-# Examples
-```jldoctest
-julia> A = [5. 7.; -2. -4.]
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> F = schur(A)
-Schur{Float64, Matrix{Float64}, Vector{Float64}}
-T factor:
-2×2 Matrix{Float64}:
- 3.0   9.0
- 0.0  -2.0
-Z factor:
-2×2 Matrix{Float64}:
-  0.961524  0.274721
- -0.274721  0.961524
-eigenvalues:
-2-element Vector{Float64}:
-  3.0
- -2.0
-
-julia> F.vectors * F.Schur * F.vectors'
-2×2 Matrix{Float64}:
-  5.0   7.0
- -2.0  -4.0
-
-julia> t, z, vals = F; # destructuring via iteration
-
-julia> t == F.T && z == F.Z && vals == F.values
-true
-```
-"""
-schur(A::AbstractMatrix{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
-schur(A::UpperHessenberg{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
-function schur(A::RealHermSymComplexHerm)
-    F = eigen(A; sortby=nothing)
-    return Schur(typeof(F.vectors)(Diagonal(F.values)), F.vectors, F.values)
-end
-function schur(A::Union{UnitUpperTriangular{T},UpperTriangular{T}}) where {T}
-    t = eigtype(T)
-    Z = copy_similar(A, t)
-    return Schur(Z, Matrix{t}(I, size(A)), convert(Vector{t}, diag(A)))
-end
-function schur(A::Union{UnitLowerTriangular{T},LowerTriangular{T}}) where {T}
-    t = eigtype(T)
-    # double flip the matrix A
-    Z = copy_similar(A, t)
-    reverse!(reshape(Z, :))
-    # construct "reverse" identity
-    n = size(A, 1)
-    J = zeros(t, n, n)
-    for i in axes(J, 2)
-       J[n+1-i, i] = oneunit(t)
-    end
-    return Schur(Z, J, convert(Vector{t}, diag(A)))
-end
-function schur(A::Bidiagonal{T}) where {T}
-    t = eigtype(T)
-    if A.uplo == 'U'
-        return Schur(Matrix{t}(A), Matrix{t}(I, size(A)), Vector{t}(A.dv))
-    else # A.uplo == 'L'
-        # construct "reverse" identity
-        n = size(A, 1)
-        J = zeros(t, n, n)
-        for i in axes(J, 2)
-            J[n+1-i, i] = oneunit(t)
-        end
-        dv = reverse!(Vector{t}(A.dv))
-        ev = reverse!(Vector{t}(A.ev))
-        return Schur(Matrix{t}(Bidiagonal(dv, ev, 'U')), J, dv)
-    end
-end
-
-function getproperty(F::Schur, d::Symbol)
-    if d === :Schur
-        return getfield(F, :T)
-    elseif d === :vectors
-        return getfield(F, :Z)
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::Schur) =
-    (:Schur, :vectors, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Schur)
-    summary(io, F); println(io)
-    println(io, "T factor:")
-    show(io, mime, F.T)
-    println(io, "\nZ factor:")
-    show(io, mime, F.Z)
-    println(io, "\neigenvalues:")
-    show(io, mime, F.values)
-end
-
-# convert a (standard-form) quasi-triangular real Schur factorization into a
-# triangular complex Schur factorization.
-#
-# Based on the "triangularize" function from GenericSchur.jl,
-# released under the MIT "Expat" license by @RalphAS
-function Schur{CT}(S::Schur{<:Real}) where {CT<:Complex}
-    Tr = S.T
-    T = CT.(Tr)
-    Z = CT.(S.Z)
-    n = size(T,1)
-    for j=n:-1:2
-        if !iszero(Tr[j,j-1])
-            # We want a unitary similarity transform from
-            # ┌   ┐      ┌     ┐
-            # │a b│      │w₁  x│
-            # │c a│ into │0  w₂│ where bc < 0 (a,b,c real)
-            # └   ┘      └     ┘
-            # If we write it as
-            # ┌     ┐
-            # │u  v'│
-            # │-v u'│
-            # └     ┘
-            # and make the Ansatz that u is real (so v is imaginary),
-            # we arrive at a Givens rotation:
-            # θ = atan(sqrt(-Tr[j,j-1]/Tr[j-1,j]))
-            # s,c = sin(θ), cos(θ)
-            s = sqrt(abs(Tr[j,j-1]))
-            c = sqrt(abs(Tr[j-1,j]))
-            r = hypot(s,c)
-            G = Givens(j-1,j,complex(c/r),im*(-s/r))
-            lmul!(G,T)
-            rmul!(T,G')
-            rmul!(Z,G')
-        end
-    end
-    return Schur(triu!(T),Z,diag(T))
-end
-
-Schur{Complex}(S::Schur{<:Complex}) = S
-Schur{T}(S::Schur{T}) where {T} = S
-Schur{T}(S::Schur) where {T} = Schur(T.(S.T), T.(S.Z), T <: Real && !(eltype(S.values) <: Real) ? complex(T).(S.values) : T.(S.values))
-
-"""
-    ordschur!(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
-
-Same as [`ordschur`](@ref) but overwrites the factorization `F`.
-"""
-function ordschur!(schur::Schur, select::Union{Vector{Bool},BitVector})
-    _, _, vals = _ordschur!(schur.T, schur.Z, select)
-    schur.values[:] = vals
-    return schur
-end
-
-_ordschur(T::StridedMatrix{Ty}, Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-    _ordschur!(copy(T), copy(Z), select)
-
-_ordschur!(T::StridedMatrix{Ty}, Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-    LinearAlgebra.LAPACK.trsen!(convert(Vector{BlasInt}, select), T, Z)[1:3]
-
-"""
-    ordschur(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
-
-Reorders the Schur factorization `F` of a matrix `A = Z*T*Z'` according to the logical array
-`select` returning the reordered factorization `F` object. The selected eigenvalues appear
-in the leading diagonal of `F.Schur` and the corresponding leading columns of
-`F.vectors` form an orthogonal/unitary basis of the corresponding right invariant
-subspace. In the real case, a complex conjugate pair of eigenvalues must be either both
-included or both excluded via `select`.
-"""
-ordschur(schur::Schur, select::Union{Vector{Bool},BitVector}) =
-    Schur(_ordschur(schur.T, schur.Z, select)...)
-
-"""
-    GeneralizedSchur <: Factorization
-
-Matrix factorization type of the generalized Schur factorization of two matrices
-`A` and `B`. This is the return type of [`schur(_, _)`](@ref), the corresponding
-matrix factorization function.
-
-If `F::GeneralizedSchur` is the factorization object, the (quasi) triangular Schur
-factors can be obtained via `F.S` and `F.T`, the left unitary/orthogonal Schur
-vectors via `F.left` or `F.Q`, and the right unitary/orthogonal Schur vectors can
-be obtained with `F.right` or `F.Z` such that `A=F.left*F.S*F.right'` and
-`B=F.left*F.T*F.right'`. The generalized eigenvalues of `A` and `B` can be obtained
-with `F.α./F.β`.
-
-Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
-`F.α`, and `F.β`.
-"""
-struct GeneralizedSchur{Ty,M<:AbstractMatrix,A<:AbstractVector,B<:AbstractVector{Ty}} <: Factorization{Ty}
-    S::M
-    T::M
-    α::A
-    β::B
-    Q::M
-    Z::M
-    function GeneralizedSchur{Ty,M,A,B}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                                        alpha::AbstractVector, beta::AbstractVector{Ty},
-                                        Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M,A,B}
-        new{Ty,M,A,B}(S, T, alpha, beta, Q, Z)
-    end
-end
-function GeneralizedSchur(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                          alpha::AbstractVector, beta::AbstractVector{Ty},
-                          Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where Ty
-    GeneralizedSchur{Ty, typeof(S), typeof(alpha), typeof(beta)}(S, T, alpha, beta, Q, Z)
-end
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(GeneralizedSchur{Ty,M}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
-                                 alpha::AbstractVector, beta::AbstractVector{Ty},
-                                 Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M},
-           GeneralizedSchur{Ty,M,typeof(alpha),typeof(beta)}(S, T, alpha, beta, Q, Z))
-
-# iteration for destructuring into components
-Base.iterate(S::GeneralizedSchur) = (S.S, Val(:T))
-Base.iterate(S::GeneralizedSchur, ::Val{:T}) = (S.T, Val(:Q))
-Base.iterate(S::GeneralizedSchur, ::Val{:Q}) = (S.Q, Val(:Z))
-Base.iterate(S::GeneralizedSchur, ::Val{:Z}) = (S.Z, Val(:α))
-Base.iterate(S::GeneralizedSchur, ::Val{:α}) = (S.α, Val(:β))
-Base.iterate(S::GeneralizedSchur, ::Val{:β}) = (S.β, Val(:done))
-Base.iterate(S::GeneralizedSchur, ::Val{:done}) = nothing
-
-"""
-    schur!(A::StridedMatrix, B::StridedMatrix) -> F::GeneralizedSchur
-
-Same as [`schur`](@ref) but uses the input matrices `A` and `B` as workspace.
-"""
-function schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
-    if LAPACK.version() < v"3.6.0"
-        GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
-    else
-        GeneralizedSchur(LinearAlgebra.LAPACK.gges3!('V', 'V', A, B)...)
-    end
-end
-
-"""
-    schur(A, B) -> F::GeneralizedSchur
-
-Computes the Generalized Schur (or QZ) factorization of the matrices `A` and `B`. The
-(quasi) triangular Schur factors can be obtained from the `Schur` object `F` with `F.S`
-and `F.T`, the left unitary/orthogonal Schur vectors can be obtained with `F.left` or
-`F.Q` and the right unitary/orthogonal Schur vectors can be obtained with `F.right` or
-`F.Z` such that `A=F.left*F.S*F.right'` and `B=F.left*F.T*F.right'`. The
-generalized eigenvalues of `A` and `B` can be obtained with `F.α./F.β`.
-
-Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
-`F.α`, and `F.β`.
-"""
-function schur(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return schur!(copy_similar(A, S), copy_similar(B, S))
-end
-
-"""
-    ordschur!(F::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) -> F::GeneralizedSchur
-
-Same as `ordschur` but overwrites the factorization `F`.
-"""
-function ordschur!(gschur::GeneralizedSchur, select::Union{Vector{Bool},BitVector})
-    _, _, α, β, _, _ = _ordschur!(gschur.S, gschur.T, gschur.Q, gschur.Z, select)
-    gschur.α[:] = α
-    gschur.β[:] = β
-    return gschur
-end
-
-_ordschur(S::StridedMatrix{Ty}, T::StridedMatrix{Ty}, Q::StridedMatrix{Ty},
-    Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-        _ordschur!(copy(S), copy(T), copy(Q), copy(Z), select)
-
-_ordschur!(S::StridedMatrix{Ty}, T::StridedMatrix{Ty}, Q::StridedMatrix{Ty},
-    Z::StridedMatrix{Ty}, select::Union{Vector{Bool},BitVector}) where {Ty<:BlasFloat} =
-        LinearAlgebra.LAPACK.tgsen!(convert(Vector{BlasInt}, select), S, T, Q, Z)
-
-"""
-    ordschur(F::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) -> F::GeneralizedSchur
-
-Reorders the Generalized Schur factorization `F` of a matrix pair `(A, B) = (Q*S*Z', Q*T*Z')`
-according to the logical array `select` and returns a GeneralizedSchur object `F`. The
-selected eigenvalues appear in the leading diagonal of both `F.S` and `F.T`, and the
-left and right orthogonal/unitary Schur vectors are also reordered such that
-`(A, B) = F.Q*(F.S, F.T)*F.Z'` still holds and the generalized eigenvalues of `A`
-and `B` can still be obtained with `F.α./F.β`.
-"""
-ordschur(gschur::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) =
-    GeneralizedSchur(_ordschur(gschur.S, gschur.T, gschur.Q, gschur.Z, select)...)
-
-function getproperty(F::GeneralizedSchur, d::Symbol)
-    if d === :values
-        return getfield(F, :α) ./ getfield(F, :β)
-    elseif d === :alpha
-        return getfield(F, :α)
-    elseif d === :beta
-        return getfield(F, :β)
-    elseif d === :left
-        return getfield(F, :Q)
-    elseif d === :right
-        return getfield(F, :Z)
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::GeneralizedSchur) =
-    (:values, :left, :right, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::GeneralizedSchur)
-    summary(io, F); println(io)
-    println(io, "S factor:")
-    show(io, mime, F.S)
-    println(io, "\nT factor:")
-    show(io, mime, F.T)
-    println(io, "\nQ factor:")
-    show(io, mime, F.Q)
-    println(io, "\nZ factor:")
-    show(io, mime, F.Z)
-    println(io, "\nα:")
-    show(io, mime, F.α)
-    println(io, "\nβ:")
-    show(io, mime, F.β)
-end
-
-# Conversion
-AbstractMatrix(F::Schur) = (F.Z * F.T) * F.Z'
-AbstractArray(F::Schur) = AbstractMatrix(F)
-Matrix(F::Schur) = Array(AbstractArray(F))
-Array(F::Schur) = Matrix(F)
-
-copy(F::Schur) = Schur(copy(F.T), copy(F.Z), copy(F.values))
-copy(F::GeneralizedSchur) = GeneralizedSchur(copy(F.S), copy(F.T), copy(F.α), copy(F.β), copy(F.Q), copy(F.Z))
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
deleted file mode 100644
index 1744a2301f48a..0000000000000
--- a/stdlib/LinearAlgebra/src/special.jl
+++ /dev/null
@@ -1,360 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Methods operating on different special matrix types
-
-# Interconversion between special matrix types
-
-# conversions from Diagonal to other special matrix types
-Bidiagonal(A::Diagonal) = Bidiagonal(A.diag, fill!(similar(A.diag, length(A.diag)-1), 0), :U)
-SymTridiagonal(A::Diagonal) = SymTridiagonal(A.diag, fill!(similar(A.diag, length(A.diag)-1), 0))
-Tridiagonal(A::Diagonal) = Tridiagonal(fill!(similar(A.diag, length(A.diag)-1), 0), A.diag,
-                                       fill!(similar(A.diag, length(A.diag)-1), 0))
-
-# conversions from Bidiagonal to other special matrix types
-Diagonal(A::Bidiagonal) = Diagonal(A.dv)
-SymTridiagonal(A::Bidiagonal) =
-    iszero(A.ev) ? SymTridiagonal(A.dv, A.ev) :
-        throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-Tridiagonal(A::Bidiagonal) =
-    Tridiagonal(A.uplo == 'U' ? fill!(similar(A.ev), 0) : A.ev, A.dv,
-                A.uplo == 'U' ? A.ev : fill!(similar(A.ev), 0))
-
-# conversions from SymTridiagonal to other special matrix types
-Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
-
-# These can fail when ev has the same length as dv
-# TODO: Revisit when a good solution for #42477 is found
-Bidiagonal(A::SymTridiagonal) =
-    iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-Tridiagonal(A::SymTridiagonal) =
-    Tridiagonal(copy(A.ev), A.dv, A.ev)
-
-# conversions from Tridiagonal to other special matrix types
-Diagonal(A::Tridiagonal) = Diagonal(A.d)
-Bidiagonal(A::Tridiagonal) =
-    iszero(A.dl) ? Bidiagonal(A.d, A.du, :U) :
-    iszero(A.du) ? Bidiagonal(A.d, A.dl, :L) :
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-
-# conversions from AbstractTriangular to special matrix types
-Bidiagonal(A::AbstractTriangular) =
-    isbanded(A, 0, 1) ? Bidiagonal(diag(A, 0), diag(A,  1), :U) : # is upper bidiagonal
-    isbanded(A, -1, 0) ? Bidiagonal(diag(A, 0), diag(A, -1), :L) : # is lower bidiagonal
-        throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-
-_lucopy(A::Bidiagonal, T) = copymutable_oftype(Tridiagonal(A), T)
-_lucopy(A::Diagonal, T)   = copymutable_oftype(Tridiagonal(A), T)
-function _lucopy(A::SymTridiagonal, T)
-    du = copy_similar(_evview(A), T)
-    dl = copy.(transpose.(du))
-    d  = copy_similar(A.dv, T)
-    return Tridiagonal(dl, d, du)
-end
-
-const ConvertibleSpecialMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,AbstractTriangular}
-const PossibleTriangularMatrix = Union{Diagonal, Bidiagonal, AbstractTriangular}
-
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Diagonal}       = m isa T ? m :
-    isdiag(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Diagonal"))
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:SymTridiagonal} = m isa T ? m :
-    issymmetric(m) && isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Tridiagonal}    = m isa T ? m :
-    isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Tridiagonal"))
-
-convert(::Type{T}, m::Union{LowerTriangular,UnitLowerTriangular}) where {T<:LowerTriangular} = m isa T ? m : T(m)::T
-convert(::Type{T}, m::Union{UpperTriangular,UnitUpperTriangular}) where {T<:UpperTriangular} = m isa T ? m : T(m)::T
-
-convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:LowerTriangular} = m isa T ? m :
-    istril(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as LowerTriangular"))
-convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:UpperTriangular} = m isa T ? m :
-    istriu(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as UpperTriangular"))
-
-# Constructs two method definitions taking into account (assumed) commutativity
-# e.g. @commutative f(x::S, y::T) where {S,T} = x+y is the same is defining
-#     f(x::S, y::T) where {S,T} = x+y
-#     f(y::T, x::S) where {S,T} = f(x, y)
-macro commutative(myexpr)
-    @assert Base.is_function_def(myexpr) # Make sure it is a function definition
-    y = copy(myexpr.args[1].args[2:end])
-    reverse!(y)
-    reversed_call = Expr(:(=), Expr(:call,myexpr.args[1].args[1],y...), myexpr.args[1])
-    esc(Expr(:block, myexpr, reversed_call))
-end
-
-for op in (:+, :-)
-    for (matrixtype, uplo, converttype) in ((:UpperTriangular, 'U', :UpperTriangular),
-                                            (:UnitUpperTriangular, 'U', :UpperTriangular),
-                                            (:LowerTriangular, 'L', :LowerTriangular),
-                                            (:UnitLowerTriangular, 'L', :LowerTriangular))
-        @eval begin
-            function ($op)(A::$matrixtype, B::Bidiagonal)
-                if B.uplo == $uplo
-                    ($op)(A, convert($converttype, B))
-                else
-                    ($op).(A, B)
-                end
-            end
-
-            function ($op)(A::Bidiagonal, B::$matrixtype)
-                if A.uplo == $uplo
-                    ($op)(convert($converttype, A), B)
-                else
-                    ($op).(A, B)
-                end
-            end
-        end
-    end
-end
-
-# disambiguation between triangular and banded matrices, banded ones "dominate"
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix) = _mul!(C, A, B, MulAddMul())
-mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular) = _mul!(C, A, B, MulAddMul())
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
-mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular, alpha::Number, beta::Number) =
-    _mul!(C, A, B, MulAddMul(alpha, beta))
-
-function *(H::UpperHessenberg, B::Bidiagonal)
-    T = promote_op(matprod, eltype(H), eltype(B))
-    A = mul!(similar(H, T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-function *(B::Bidiagonal, H::UpperHessenberg)
-    T = promote_op(matprod, eltype(B), eltype(H))
-    A = mul!(similar(H, T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function /(H::UpperHessenberg, B::Bidiagonal)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
-    A = _rdiv!(similar(H, T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function \(B::Bidiagonal, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
-    A = ldiv!(similar(H, T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-# specialized +/- for structured matrices. If these are removed, it falls
-# back to broadcasting which has ~2-10x speed regressions.
-# For the other structure matrix pairs, broadcasting works well.
-
-# For structured matrix types with different non-zero diagonals the underlying
-# representations must be promoted to the same type.
-# For example, in Diagonal + Bidiagonal only the main diagonal is touched so
-# the off diagonal could be a different type after the operation resulting in
-# an error. See issue #28994
-
-@commutative function (+)(A::Bidiagonal, B::Diagonal)
-    newdv = A.dv + B.diag
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-function (-)(A::Bidiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-function (-)(A::Diagonal, B::Bidiagonal)
-    newdv = A.diag - B.dv
-    Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
-end
-
-@commutative function (+)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag + B.dv
-    SymTridiagonal(A.diag + B.dv, typeof(newdv)(B.ev))
-end
-
-function (-)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag - B.dv
-    SymTridiagonal(newdv, typeof(newdv)(-B.ev))
-end
-
-function (-)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
-end
-
-# this set doesn't have the aforementioned problem
-
-@commutative (+)(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
-
-@commutative function (+)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag + B.d
-    Tridiagonal(typeof(newdv)(B.dl), newdv, typeof(newdv)(B.du))
-end
-
-function (-)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag - B.d
-    Tridiagonal(typeof(newdv)(-B.dl), newdv, typeof(newdv)(-B.du))
-end
-
-function (-)(A::Tridiagonal, B::Diagonal)
-    newdv = A.d - B.diag
-    Tridiagonal(typeof(newdv)(A.dl), newdv, typeof(newdv)(A.du))
-end
-
-@commutative function (+)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv + B.d
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.dl), newdv, A.ev+B.du) : (A.ev+B.dl, newdv, typeof(newdv)(B.du)))...)
-end
-
-function (-)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv - B.d
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.dl), newdv, A.ev-B.du) : (A.ev-B.dl, newdv, typeof(newdv)(-B.du)))...)
-end
-
-function (-)(A::Tridiagonal, B::Bidiagonal)
-    newdv = A.d - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.dl), newdv, A.du-B.ev) : (A.dl-B.ev, newdv, typeof(newdv)(A.du)))...)
-end
-
-@commutative function (+)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv + B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
-end
-
-function (-)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
-end
-
-function (-)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
-end
-
-@commutative function (+)(A::Tridiagonal, B::UniformScaling)
-    newd = A.d .+ Ref(B)
-    Tridiagonal(typeof(newd)(A.dl), newd, typeof(newd)(A.du))
-end
-
-@commutative function (+)(A::SymTridiagonal, B::UniformScaling)
-    newdv = A.dv .+ Ref(B)
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
-end
-
-@commutative function (+)(A::Bidiagonal, B::UniformScaling)
-    newdv = A.dv .+ Ref(B)
-    Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
-end
-
-@commutative function (+)(A::Diagonal, B::UniformScaling)
-    Diagonal(A.diag .+ Ref(B))
-end
-
-# StructuredMatrix - UniformScaling = StructuredMatrix + (-UniformScaling) =>
-# no need to define reversed order
-function (-)(A::UniformScaling, B::Tridiagonal)
-    d = Ref(A) .- B.d
-    Tridiagonal(convert(typeof(d), -B.dl), d, convert(typeof(d), -B.du))
-end
-function (-)(A::UniformScaling, B::SymTridiagonal)
-    dv = Ref(A) .- B.dv
-    SymTridiagonal(dv, convert(typeof(dv), -B.ev))
-end
-function (-)(A::UniformScaling, B::Bidiagonal)
-    dv = Ref(A) .- B.dv
-    Bidiagonal(dv, convert(typeof(dv), -B.ev), B.uplo)
-end
-function (-)(A::UniformScaling, B::Diagonal)
-    Diagonal(Ref(A) .- B.diag)
-end
-
-## Diagonal construction from UniformScaling
-Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
-Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
-
-Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
-    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
-
-_diag_or_value(A::Diagonal) = A.diag
-_diag_or_value(A::UniformScaling) = A.λ
-
-# fill[stored]! methods
-fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
-fillstored!(A::Bidiagonal, x) = (fill!(A.dv, x); fill!(A.ev, x); A)
-fillstored!(A::Tridiagonal, x) = (fill!(A.dl, x); fill!(A.d, x); fill!(A.du, x); A)
-fillstored!(A::SymTridiagonal, x) = (fill!(A.dv, x); fill!(A.ev, x); A)
-
-_small_enough(A::Union{Diagonal, Bidiagonal}) = size(A, 1) <= 1
-_small_enough(A::Tridiagonal) = size(A, 1) <= 2
-_small_enough(A::SymTridiagonal) = size(A, 1) <= 2
-
-function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
-    xT = convert(eltype(A), x)
-    (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
-    throw(ArgumentError("array of type $(typeof(A)) and size $(size(A)) can
-    not be filled with $x, since some of its entries are constrained."))
-end
-
-one(D::Diagonal) = Diagonal(one.(D.diag))
-one(A::Bidiagonal{T}) where T = Bidiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))), A.uplo)
-one(A::Tridiagonal{T}) where T = Tridiagonal(fill!(similar(A.du, typeof(one(T))), zero(one(T))), fill!(similar(A.d, typeof(one(T))), one(T)), fill!(similar(A.dl, typeof(one(T))), zero(one(T))))
-one(A::SymTridiagonal{T}) where T = SymTridiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))))
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
-    @eval one(A::$t) = $t(one(parent(A)))
-    @eval oneunit(A::$t) = $t(oneunit(parent(A)))
-end
-
-zero(D::Diagonal) = Diagonal(zero.(D.diag))
-oneunit(D::Diagonal) = Diagonal(oneunit.(D.diag))
-
-isdiag(A::HermOrSym{<:Any,<:Diagonal}) = isdiag(parent(A))
-dot(x::AbstractVector, A::RealHermSymComplexSym{<:Real,<:Diagonal}, y::AbstractVector) =
-    dot(x, A.data, y)
-
-# equals and approx equals methods for structured matrices
-# SymTridiagonal == Tridiagonal is already defined in tridiag.jl
-
-==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
-==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
-==(B::Bidiagonal, A::Diagonal) = A == B
-==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
-==(B::Tridiagonal, A::Diagonal) = A == B
-
-function ==(A::Bidiagonal, B::Tridiagonal)
-    if A.uplo == 'U'
-        return iszero(B.dl) && A.dv == B.d && A.ev == B.du
-    else
-        return iszero(B.du) && A.dv == B.d && A.ev == B.dl
-    end
-end
-==(B::Tridiagonal, A::Bidiagonal) = A == B
-
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
-==(B::SymTridiagonal, A::Bidiagonal) = A == B
-
-# concatenation
-const _SpecialArrays = Union{Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal}
-const _Symmetric_DenseArrays{T,A<:Matrix} = Symmetric{T,A}
-const _Hermitian_DenseArrays{T,A<:Matrix} = Hermitian{T,A}
-const _Triangular_DenseArrays{T,A<:Matrix} = AbstractTriangular{T,A}
-const _Annotated_DenseArrays = Union{_SpecialArrays, _Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
-const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
-const _DenseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
-const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
-
-promote_to_array_type(::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
-
-Base._cat(dims, xs::_DenseConcatGroup...) = Base._cat_t(dims, promote_eltype(xs...), xs...)
-vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
-hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
-# For performance, specially handle the case where the matrices/vectors have homogeneous eltype
-Base._cat(dims, xs::_TypedDenseConcatGroup{T}...) where {T} = Base._cat_t(dims, T, xs...)
-vcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_vcat(T, A...)
-hcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hvcat(T, rows, xs...)
-
-# factorizations
-function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot = NoPivot(); check::Bool = true)
-    T = choltype(eltype(S))
-    B = Bidiagonal{T}(diag(S, 0), diag(S, S.uplo == 'U' ? 1 : -1), sym_uplo(S.uplo))
-    cholesky!(Hermitian(B, sym_uplo(S.uplo)), NoPivot(); check = check)
-end
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
deleted file mode 100644
index 02e39b199679b..0000000000000
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ /dev/null
@@ -1,256 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Broadcast styles
-import Base.Broadcast
-using Base.Broadcast: DefaultArrayStyle, Broadcasted, tail
-
-struct StructuredMatrixStyle{T} <: Broadcast.AbstractArrayStyle{2} end
-StructuredMatrixStyle{T}(::Val{2}) where {T} = StructuredMatrixStyle{T}()
-StructuredMatrixStyle{T}(::Val{N}) where {T,N} = Broadcast.DefaultArrayStyle{N}()
-
-const StructuredMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular}
-for ST in Base.uniontypes(StructuredMatrix)
-    @eval Broadcast.BroadcastStyle(::Type{<:$ST}) = $(StructuredMatrixStyle{ST}())
-end
-
-# Promotion of broadcasts between structured matrices. This is slightly unusual
-# as we define them symmetrically. This allows us to have a fallback to DefaultArrayStyle{2}().
-# Diagonal can cavort with all the other structured matrix types.
-# Bidiagonal doesn't know if it's upper or lower, so it becomes Tridiagonal
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Diagonal}) =
-    StructuredMatrixStyle{Diagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Bidiagonal}) =
-    StructuredMatrixStyle{Bidiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{Diagonal}) =
-    StructuredMatrixStyle{Bidiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
-    StructuredMatrixStyle{Tridiagonal}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{UpperTriangular}()
-
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
-    StructuredMatrixStyle{Matrix}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
-    StructuredMatrixStyle{Matrix}()
-
-# Make sure that `StructuredMatrixStyle{Matrix}` doesn't ever end up falling
-# through and give back `DefaultArrayStyle{2}`
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle) = T
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{Matrix}) = T
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle{Matrix}) = T
-
-# All other combinations fall back to the default style
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, ::StructuredMatrixStyle) = DefaultArrayStyle{2}()
-
-# And a definition akin to similar using the structured type:
-structured_broadcast_alloc(bc, ::Type{Diagonal}, ::Type{ElType}, n) where {ElType} =
-    Diagonal(Array{ElType}(undef, n))
-# Bidiagonal is tricky as we need to know if it's upper or lower. The promotion
-# system will return Tridiagonal when there's more than one Bidiagonal, but when
-# there's only one, we need to make figure out upper or lower
-merge_uplos(::Nothing, ::Nothing) = nothing
-merge_uplos(a, ::Nothing) = a
-merge_uplos(::Nothing, b) = b
-merge_uplos(a, b) = a == b ? a : 'T'
-
-find_uplo(a::Bidiagonal) = a.uplo
-find_uplo(a) = nothing
-find_uplo(bc::Broadcasted) = mapfoldl(find_uplo, merge_uplos, Broadcast.cat_nested(bc), init=nothing)
-
-function structured_broadcast_alloc(bc, ::Type{Bidiagonal}, ::Type{ElType}, n) where {ElType}
-    uplo = n > 0 ? find_uplo(bc) : 'U'
-    n1 = max(n - 1, 0)
-    if uplo == 'T'
-        return Tridiagonal(Array{ElType}(undef, n1), Array{ElType}(undef, n), Array{ElType}(undef, n1))
-    end
-    return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
-end
-structured_broadcast_alloc(bc, ::Type{SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
-    SymTridiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{Tridiagonal}, ::Type{ElType}, n) where {ElType} =
-    Tridiagonal(Array{ElType}(undef, n-1),Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{LowerTriangular}, ::Type{ElType}, n) where {ElType} =
-    LowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UpperTriangular}, ::Type{ElType}, n) where {ElType} =
-    UpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
-    UnitLowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
-    UnitUpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
-    Matrix(Array{ElType}(undef, n, n))
-
-# A _very_ limited list of structure-preserving functions known at compile-time. This list is
-# derived from the formerly-implemented `broadcast` methods in 0.6. Note that this must
-# preserve both zeros and ones (for Unit***erTriangular) and symmetry (for SymTridiagonal)
-const TypeFuncs = Union{typeof(round),typeof(trunc),typeof(floor),typeof(ceil)}
-isstructurepreserving(bc::Broadcasted) = isstructurepreserving(bc.f, bc.args...)
-isstructurepreserving(::Union{typeof(abs),typeof(big)}, ::StructuredMatrix) = true
-isstructurepreserving(::TypeFuncs, ::StructuredMatrix) = true
-isstructurepreserving(::TypeFuncs, ::Ref{<:Type}, ::StructuredMatrix) = true
-function isstructurepreserving(::typeof(Base.literal_pow), ::Ref{typeof(^)}, ::StructuredMatrix, ::Ref{Val{N}}) where N
-    return N isa Integer && N > 0
-end
-isstructurepreserving(f, args...) = false
-
-"""
-    iszerodefined(T::Type)
-
-Return a `Bool` indicating whether `iszero` is well-defined for objects of type
-`T`. By default, this function returns `false` unless `T <: Number`. Note that
-this function may return `true` even if `zero(::T)` is not defined as long as
-`iszero(::T)` has a method that does not requires `zero(::T)`.
-
-This function is used to determine if mapping the elements of an array with
-a specific structure of nonzero elements preserve this structure.
-For instance, it is used to determine whether the output of
-`tuple.(Diagonal([1, 2]))` is `Diagonal([(1,), (2,)])` or
-`[(1,) (0,); (0,) (2,)]`. For this, we need to determine whether `(0,)` is
-considered to be zero. `iszero((0,))` falls back to `(0,) == zero((0,))` which
-fails as `zero(::Tuple{Int})` is not defined. However,
-`iszerodefined(::Tuple{Int})` is `false` hence we falls back to the comparison
-`(0,) == 0` which returns `false` and decides that the correct output is
-`[(1,) (0,); (0,) (2,)]`.
-"""
-iszerodefined(::Type) = false
-iszerodefined(::Type{<:Number}) = true
-iszerodefined(::Type{<:AbstractArray{T}}) where T = iszerodefined(T)
-
-fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
-# Like sparse matrices, we assume that the zero-preservation property of a broadcasted
-# expression is stable.  We can test the zero-preservability by applying the function
-# in cases where all other arguments are known scalars against a zero from the structured
-# matrix. If any non-structured matrix argument is not a known scalar, we give up.
-fzero(x::Number) = x
-fzero(::Type{T}) where T = T
-fzero(r::Ref) = r[]
-fzero(t::Tuple{Any}) = t[1]
-fzero(S::StructuredMatrix) = zero(eltype(S))
-fzero(x) = missing
-function fzero(bc::Broadcast.Broadcasted)
-    args = map(fzero, bc.args)
-    return any(ismissing, args) ? missing : bc.f(args...)
-end
-
-function Base.similar(bc::Broadcasted{StructuredMatrixStyle{T}}, ::Type{ElType}) where {T,ElType}
-    inds = axes(bc)
-    if isstructurepreserving(bc) || (fzeropreserving(bc) && !(T <: Union{SymTridiagonal,UnitLowerTriangular,UnitUpperTriangular}))
-        return structured_broadcast_alloc(bc, T, ElType, length(inds[1]))
-    end
-    return similar(convert(Broadcasted{DefaultArrayStyle{ndims(bc)}}, bc), ElType)
-end
-
-isvalidstructbc(dest, bc::Broadcasted{T}) where {T<:StructuredMatrixStyle} =
-    Broadcast.combine_styles(dest, bc) === Broadcast.combine_styles(dest) &&
-    (isstructurepreserving(bc) || fzeropreserving(bc))
-
-isvalidstructbc(dest::Bidiagonal, bc::Broadcasted{StructuredMatrixStyle{Bidiagonal}}) =
-    (size(dest, 1) < 2 || find_uplo(bc) == dest.uplo) &&
-    (isstructurepreserving(bc) || fzeropreserving(bc))
-
-function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.diag[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    return dest
-end
-
-function copyto!(dest::Bidiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    if dest.uplo == 'U'
-        for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        end
-    else
-        for i = 1:size(dest, 1)-1
-            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
-        end
-    end
-    return dest
-end
-
-function copyto!(dest::SymTridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    for i = 1:size(dest, 1)-1
-        v = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        v == (@inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
-        dest.ev[i] = v
-    end
-    return dest
-end
-
-function copyto!(dest::Tridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for i in axs[1]
-        dest.d[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
-    end
-    for i = 1:size(dest, 1)-1
-        dest.du[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        dest.dl[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
-    end
-    return dest
-end
-
-function copyto!(dest::LowerTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for j in axs[2]
-        for i in j:axs[1][end]
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
-        end
-    end
-    return dest
-end
-
-function copyto!(dest::UpperTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
-    axs = axes(dest)
-    axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
-    for j in axs[2]
-        for i in 1:j
-            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
-        end
-    end
-    return dest
-end
-
-# We can also implement `map` and its promotion in terms of broadcast with a stricter dimension check
-function map(f, A::StructuredMatrix, Bs::StructuredMatrix...)
-    sz = size(A)
-    all(map(B->size(B)==sz, Bs)) || throw(DimensionMismatch("dimensions must match"))
-    return f.(A, Bs...)
-end
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
deleted file mode 100644
index c1b886f616f02..0000000000000
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ /dev/null
@@ -1,577 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Singular Value Decomposition
-"""
-    SVD <: Factorization
-
-Matrix factorization type of the singular value decomposition (SVD) of a matrix `A`.
-This is the return type of [`svd(_)`](@ref), the corresponding matrix factorization function.
-
-If `F::SVD` is the factorization object, `U`, `S`, `V` and `Vt` can be obtained
-via `F.U`, `F.S`, `F.V` and `F.Vt`, such that `A = U * Diagonal(S) * Vt`.
-The singular values in `S` are sorted in descending order.
-
-Iterating the decomposition produces the components `U`, `S`, and `V`.
-
-# Examples
-```jldoctest
-julia> A = [1. 0. 0. 0. 2.; 0. 0. 3. 0. 0.; 0. 0. 0. 0. 0.; 0. 2. 0. 0. 0.]
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> F = svd(A)
-SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
-U factor:
-4×4 Matrix{Float64}:
- 0.0  1.0   0.0  0.0
- 1.0  0.0   0.0  0.0
- 0.0  0.0   0.0  1.0
- 0.0  0.0  -1.0  0.0
-singular values:
-4-element Vector{Float64}:
- 3.0
- 2.23606797749979
- 2.0
- 0.0
-Vt factor:
-4×5 Matrix{Float64}:
- -0.0        0.0  1.0  -0.0  0.0
-  0.447214   0.0  0.0   0.0  0.894427
-  0.0       -1.0  0.0   0.0  0.0
-  0.0        0.0  0.0   1.0  0.0
-
-julia> F.U * Diagonal(F.S) * F.Vt
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> u, s, v = F; # destructuring via iteration
-
-julia> u == F.U && s == F.S && v == F.V
-true
-```
-"""
-struct SVD{T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}} <: Factorization{T}
-    U::M
-    S::C
-    Vt::M
-    function SVD{T,Tr,M,C}(U, S, Vt) where {T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}}
-        require_one_based_indexing(U, S, Vt)
-        new{T,Tr,M,C}(U, S, Vt)
-    end
-end
-SVD(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr} =
-    SVD{T,Tr,typeof(U),typeof(S)}(U, S, Vt)
-SVD{T}(U::AbstractArray, S::AbstractVector{Tr}, Vt::AbstractArray) where {T,Tr} =
-    SVD(convert(AbstractArray{T}, U),
-        convert(AbstractVector{Tr}, S),
-        convert(AbstractArray{T}, Vt))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(SVD{T,Tr,M}(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr,M},
-           SVD{T,Tr,M,typeof(S)}(U, S, Vt))
-
-SVD{T}(F::SVD) where {T} = SVD(
-    convert(AbstractMatrix{T}, F.U),
-    convert(AbstractVector{real(T)}, F.S),
-    convert(AbstractMatrix{T}, F.Vt))
-Factorization{T}(F::SVD) where {T} = SVD{T}(F)
-
-# iteration for destructuring into components
-Base.iterate(S::SVD) = (S.U, Val(:S))
-Base.iterate(S::SVD, ::Val{:S}) = (S.S, Val(:V))
-Base.iterate(S::SVD, ::Val{:V}) = (S.V, Val(:done))
-Base.iterate(S::SVD, ::Val{:done}) = nothing
-
-
-default_svd_alg(A) = DivideAndConquer()
-
-
-"""
-    svd!(A; full::Bool = false, alg::Algorithm = default_svd_alg(A)) -> SVD
-
-`svd!` is the same as [`svd`](@ref), but saves space by
-overwriting the input `A`, instead of creating a copy. See documentation of [`svd`](@ref) for details.
-"""
-function svd!(A::StridedMatrix{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
-    m, n = size(A)
-    if m == 0 || n == 0
-        u, s, vt = (Matrix{T}(I, m, full ? m : n), real(zeros(T,0)), Matrix{T}(I, n, n))
-    else
-        u, s, vt = _svd!(A, full, alg)
-    end
-    SVD(u, s, vt)
-end
-function svd!(A::StridedVector{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
-    m = length(A)
-    normA = norm(A)
-    if iszero(normA)
-        return SVD(Matrix{T}(I, m, full ? m : 1), [normA], ones(T, 1, 1))
-    elseif !full
-        normalize!(A)
-        return SVD(reshape(A, (m, 1)), [normA], ones(T, 1, 1))
-    else
-        u, s, vt = _svd!(reshape(A, (m, 1)), full, alg)
-        return SVD(u, s, vt)
-    end
-end
-
-_svd!(A::StridedMatrix{T}, full::Bool, alg::Algorithm) where {T<:BlasFloat} =
-    throw(ArgumentError("Unsupported value for `alg` keyword."))
-_svd!(A::StridedMatrix{T}, full::Bool, alg::DivideAndConquer) where {T<:BlasFloat} =
-    LAPACK.gesdd!(full ? 'A' : 'S', A)
-function _svd!(A::StridedMatrix{T}, full::Bool, alg::QRIteration) where {T<:BlasFloat}
-    c = full ? 'A' : 'S'
-    u, s, vt = LAPACK.gesvd!(c, c, A)
-end
-
-
-
-"""
-    svd(A; full::Bool = false, alg::Algorithm = default_svd_alg(A)) -> SVD
-
-Compute the singular value decomposition (SVD) of `A` and return an `SVD` object.
-
-`U`, `S`, `V` and `Vt` can be obtained from the factorization `F` with `F.U`,
-`F.S`, `F.V` and `F.Vt`, such that `A = U * Diagonal(S) * Vt`.
-The algorithm produces `Vt` and hence `Vt` is more efficient to extract than `V`.
-The singular values in `S` are sorted in descending order.
-
-Iterating the decomposition produces the components `U`, `S`, and `V`.
-
-If `full = false` (default), a "thin" SVD is returned. For an ``M
-\\times N`` matrix `A`, in the full factorization `U` is ``M \\times M``
-and `V` is ``N \\times N``, while in the thin factorization `U` is ``M
-\\times K`` and `V` is ``N \\times K``, where ``K = \\min(M,N)`` is the
-number of singular values.
-
-If `alg = DivideAndConquer()` a divide-and-conquer algorithm is used to calculate the SVD.
-Another (typically slower but more accurate) option is `alg = QRIteration()`.
-
-!!! compat "Julia 1.3"
-    The `alg` keyword argument requires Julia 1.3 or later.
-
-# Examples
-```jldoctest
-julia> A = rand(4,3);
-
-julia> F = svd(A); # Store the Factorization Object
-
-julia> A ≈ F.U * Diagonal(F.S) * F.Vt
-true
-
-julia> U, S, V = F; # destructuring via iteration
-
-julia> A ≈ U * Diagonal(S) * V'
-true
-
-julia> Uonly, = svd(A); # Store U only
-
-julia> Uonly == U
-true
-```
-"""
-function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
-    svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
-end
-function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
-    A = svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
-    return SVD{T}(A)
-end
-function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
-    SVD(x == 0 ? fill(one(x), 1, 1) : fill(x/abs(x), 1, 1), [abs(x)], fill(one(x), 1, 1))
-end
-function svd(x::Integer; full::Bool = false, alg::Algorithm = default_svd_alg(x))
-    svd(float(x), full = full, alg = alg)
-end
-function svd(A::Adjoint; full::Bool = false, alg::Algorithm = default_svd_alg(A))
-    s = svd(A.parent, full = full, alg = alg)
-    return SVD(s.Vt', s.S, s.U')
-end
-function svd(A::Transpose; full::Bool = false, alg::Algorithm = default_svd_alg(A))
-    s = svd(A.parent, full = full, alg = alg)
-    return SVD(transpose(s.Vt), s.S, transpose(s.U))
-end
-
-function getproperty(F::SVD, d::Symbol)
-    if d === :V
-        return getfield(F, :Vt)'
-    else
-        return getfield(F, d)
-    end
-end
-
-Base.propertynames(F::SVD, private::Bool=false) =
-    private ? (:V, fieldnames(typeof(F))...) : (:U, :S, :V, :Vt)
-
-"""
-    svdvals!(A)
-
-Return the singular values of `A`, saving space by overwriting the input.
-See also [`svdvals`](@ref) and [`svd`](@ref).
-"""
-svdvals!(A::StridedMatrix{T}) where {T<:BlasFloat} = isempty(A) ? zeros(real(T), 0) : LAPACK.gesdd!('N', A)[2]
-svdvals!(A::StridedVector{T}) where {T<:BlasFloat} = svdvals!(reshape(A, (length(A), 1)))
-
-"""
-    svdvals(A)
-
-Return the singular values of `A` in descending order.
-
-# Examples
-```jldoctest
-julia> A = [1. 0. 0. 0. 2.; 0. 0. 3. 0. 0.; 0. 0. 0. 0. 0.; 0. 2. 0. 0. 0.]
-4×5 Matrix{Float64}:
- 1.0  0.0  0.0  0.0  2.0
- 0.0  0.0  3.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0
- 0.0  2.0  0.0  0.0  0.0
-
-julia> svdvals(A)
-4-element Vector{Float64}:
- 3.0
- 2.23606797749979
- 2.0
- 0.0
-```
-"""
-svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(eigencopy_oftype(A, eigtype(T)))
-svdvals(A::AbstractVector{T}) where {T} = [convert(eigtype(T), norm(A))]
-svdvals(x::Number) = abs(x)
-svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
-
-### SVD least squares ###
-function ldiv!(A::SVD{T}, B::AbstractVecOrMat) where T
-    m, n = size(A)
-    k = searchsortedlast(A.S, eps(real(T))*A.S[1], rev=true)
-    mul!(view(B, 1:n, :), view(A.Vt, 1:k, :)', view(A.S, 1:k) .\ (view(A.U, :, 1:k)' * _cut_B(B, 1:m)))
-    return B
-end
-
-function inv(F::SVD{T}) where T
-    @inbounds for i in eachindex(F.S)
-        iszero(F.S[i]) && throw(SingularException(i))
-    end
-    k = searchsortedlast(F.S, eps(real(T))*F.S[1], rev=true)
-    @views (F.S[1:k] .\ F.Vt[1:k, :])' * F.U[:,1:k]'
-end
-
-size(A::SVD, dim::Integer) = dim == 1 ? size(A.U, dim) : size(A.Vt, dim)
-size(A::SVD) = (size(A, 1), size(A, 2))
-
-function adjoint(F::SVD)
-    return SVD(F.Vt', F.S, F.U')
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::SVD{<:Any,<:Any,<:AbstractArray,<:AbstractVector})
-    summary(io, F); println(io)
-    println(io, "U factor:")
-    show(io, mime, F.U)
-    println(io, "\nsingular values:")
-    show(io, mime, F.S)
-    println(io, "\nVt factor:")
-    show(io, mime, F.Vt)
-end
-
-# Generalized svd
-"""
-    GeneralizedSVD <: Factorization
-
-Matrix factorization type of the generalized singular value decomposition (SVD)
-of two matrices `A` and `B`, such that `A = F.U*F.D1*F.R0*F.Q'` and
-`B = F.V*F.D2*F.R0*F.Q'`. This is the return type of [`svd(_, _)`](@ref), the
-corresponding matrix factorization function.
-
-For an M-by-N matrix `A` and P-by-N matrix `B`,
-
-- `U` is a M-by-M orthogonal matrix,
-- `V` is a P-by-P orthogonal matrix,
-- `Q` is a N-by-N orthogonal matrix,
-- `D1` is a M-by-(K+L) diagonal matrix with 1s in the first K entries,
-- `D2` is a P-by-(K+L) matrix whose top right L-by-L block is diagonal,
-- `R0` is a (K+L)-by-N matrix whose rightmost (K+L)-by-(K+L) block is
-           nonsingular upper block triangular,
-
-`K+L` is the effective numerical rank of the matrix `[A; B]`.
-
-Iterating the decomposition produces the components `U`, `V`, `Q`, `D1`, `D2`, and `R0`.
-
-The entries of `F.D1` and `F.D2` are related, as explained in the LAPACK
-documentation for the
-[generalized SVD](http://www.netlib.org/lapack/lug/node36.html) and the
-[xGGSVD3](http://www.netlib.org/lapack/explore-html/d6/db3/dggsvd3_8f.html)
-routine which is called underneath (in LAPACK 3.6.0 and newer).
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> F = svd(A, B)
-GeneralizedSVD{Float64, Matrix{Float64}, Float64, Vector{Float64}}
-U factor:
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-V factor:
-2×2 Matrix{Float64}:
- -0.0  -1.0
-  1.0   0.0
-Q factor:
-2×2 Matrix{Float64}:
- 1.0  0.0
- 0.0  1.0
-D1 factor:
-2×2 Matrix{Float64}:
- 0.707107  0.0
- 0.0       0.707107
-D2 factor:
-2×2 Matrix{Float64}:
- 0.707107  0.0
- 0.0       0.707107
-R0 factor:
-2×2 Matrix{Float64}:
- 1.41421   0.0
- 0.0      -1.41421
-
-julia> F.U*F.D1*F.R0*F.Q'
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> F.V*F.D2*F.R0*F.Q'
-2×2 Matrix{Float64}:
- -0.0  1.0
-  1.0  0.0
-```
-"""
-struct GeneralizedSVD{T,S<:AbstractMatrix,Tr,C<:AbstractVector{Tr}} <: Factorization{T}
-    U::S
-    V::S
-    Q::S
-    a::C
-    b::C
-    k::Int
-    l::Int
-    R::S
-    function GeneralizedSVD{T,S,Tr,C}(U, V, Q, a, b, k, l, R) where {T,S<:AbstractMatrix{T},Tr,C<:AbstractVector{Tr}}
-        new{T,S,Tr,C}(U, V, Q, a, b, k, l, R)
-    end
-end
-GeneralizedSVD(U::AbstractMatrix{T}, V::AbstractMatrix{T}, Q::AbstractMatrix{T},
-              a::AbstractVector{Tr}, b::AbstractVector{Tr}, k::Int, l::Int,
-              R::AbstractMatrix{T}) where {T, Tr} =
-    GeneralizedSVD{T,typeof(U),Tr,typeof(a)}(U, V, Q, a, b, k, l, R)
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(GeneralizedSVD{T,S}(U, V, Q, a, b, k, l, R) where {T, S},
-           GeneralizedSVD{T,S,real(T),typeof(a)}(U, V, Q, a, b, k, l, R))
-
-# iteration for destructuring into components
-Base.iterate(S::GeneralizedSVD) = (S.U, Val(:V))
-Base.iterate(S::GeneralizedSVD, ::Val{:V}) = (S.V, Val(:Q))
-Base.iterate(S::GeneralizedSVD, ::Val{:Q}) = (S.Q, Val(:D1))
-Base.iterate(S::GeneralizedSVD, ::Val{:D1}) = (S.D1, Val(:D2))
-Base.iterate(S::GeneralizedSVD, ::Val{:D2}) = (S.D2, Val(:R0))
-Base.iterate(S::GeneralizedSVD, ::Val{:R0}) = (S.R0, Val(:done))
-Base.iterate(S::GeneralizedSVD, ::Val{:done}) = nothing
-
-"""
-    svd!(A, B) -> GeneralizedSVD
-
-`svd!` is the same as [`svd`](@ref), but modifies the arguments
-`A` and `B` in-place, instead of making copies. See documentation of [`svd`](@ref) for details.
-"""
-function svd!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
-    # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-    if LAPACK.version() < v"3.6.0"
-        U, V, Q, a, b, k, l, R = LAPACK.ggsvd!('U', 'V', 'Q', A, B)
-    else
-        U, V, Q, a, b, k, l, R = LAPACK.ggsvd3!('U', 'V', 'Q', A, B)
-    end
-    GeneralizedSVD(U, V, Q, a, b, Int(k), Int(l), R)
-end
-svd(A::AbstractMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
-    svd!(copy_similar(A, T), copy_similar(B, T))
-
-"""
-
-    svd(A, B) -> GeneralizedSVD
-
-Compute the generalized SVD of `A` and `B`, returning a `GeneralizedSVD` factorization
-object `F` such that `[A;B] = [F.U * F.D1; F.V * F.D2] * F.R0 * F.Q'`
-
-- `U` is a M-by-M orthogonal matrix,
-- `V` is a P-by-P orthogonal matrix,
-- `Q` is a N-by-N orthogonal matrix,
-- `D1` is a M-by-(K+L) diagonal matrix with 1s in the first K entries,
-- `D2` is a P-by-(K+L) matrix whose top right L-by-L block is diagonal,
-- `R0` is a (K+L)-by-N matrix whose rightmost (K+L)-by-(K+L) block is
-           nonsingular upper block triangular,
-
-`K+L` is the effective numerical rank of the matrix `[A; B]`.
-
-Iterating the decomposition produces the components `U`, `V`, `Q`, `D1`, `D2`, and `R0`.
-
-The generalized SVD is used in applications such as when one wants to compare how much belongs
-to `A` vs. how much belongs to `B`, as in human vs yeast genome, or signal vs noise, or between
-clusters vs within clusters. (See Edelman and Wang for discussion: https://arxiv.org/abs/1901.00485)
-
-It decomposes `[A; B]` into `[UC; VS]H`, where `[UC; VS]` is a natural orthogonal basis for the
-column space of `[A; B]`, and `H = RQ'` is a natural non-orthogonal basis for the rowspace of `[A;B]`,
-where the top rows are most closely attributed to the `A` matrix, and the bottom to the `B` matrix.
-The multi-cosine/sine matrices `C` and `S` provide a multi-measure of how much `A` vs how much `B`,
-and `U` and `V` provide directions in which these are measured.
-
-# Examples
-```jldoctest
-julia> A = randn(3,2); B=randn(4,2);
-
-julia> F = svd(A, B);
-
-julia> U,V,Q,C,S,R = F;
-
-julia> H = R*Q';
-
-julia> [A; B] ≈ [U*C; V*S]*H
-true
-
-julia> [A; B] ≈ [F.U*F.D1; F.V*F.D2]*F.R0*F.Q'
-true
-
-julia> Uonly, = svd(A,B);
-
-julia> U == Uonly
-true
-```
-"""
-function svd(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA),TB)
-    return svd!(copy_similar(A, S), copy_similar(B, S))
-end
-# This method can be heavily optimized but it is probably not critical
-# and might introduce bugs or inconsistencies relative to the 1x1 matrix
-# version
-svd(x::Number, y::Number) = svd(fill(x, 1, 1), fill(y, 1, 1))
-
-@inline function getproperty(F::GeneralizedSVD{T}, d::Symbol) where T
-    Fa = getfield(F, :a)
-    Fb = getfield(F, :b)
-    Fk = getfield(F, :k)
-    Fl = getfield(F, :l)
-    FU = getfield(F, :U)
-    FV = getfield(F, :V)
-    FQ = getfield(F, :Q)
-    FR = getfield(F, :R)
-    if d === :alpha
-        return Fa
-    elseif d === :beta
-        return Fb
-    elseif d === :vals || d === :S
-        return Fa[1:Fk + Fl] ./ Fb[1:Fk + Fl]
-    elseif d === :D1
-        m = size(FU, 1)
-        if m - Fk - Fl >= 0
-            return [Matrix{T}(I, Fk, Fk)  zeros(T, Fk, Fl)            ;
-                    zeros(T, Fl, Fk)      Diagonal(Fa[Fk + 1:Fk + Fl]);
-                    zeros(T, m - Fk - Fl, Fk + Fl)                    ]
-        else
-            return [Matrix{T}(I, m, Fk) [zeros(T, Fk, m - Fk); Diagonal(Fa[Fk + 1:m])] zeros(T, m, Fk + Fl - m)]
-        end
-    elseif d === :D2
-        m = size(FU, 1)
-        p = size(FV, 1)
-        if m - Fk - Fl >= 0
-            return [zeros(T, Fl, Fk) Diagonal(Fb[Fk + 1:Fk + Fl]); zeros(T, p - Fl, Fk + Fl)]
-        else
-            return [zeros(T, p, Fk) [Diagonal(Fb[Fk + 1:m]); zeros(T, Fk + p - m, m - Fk)] [zeros(T, m - Fk, Fk + Fl - m); Matrix{T}(I, Fk + p - m, Fk + Fl - m)]]
-        end
-    elseif d === :R0
-        n = size(FQ, 1)
-        return [zeros(T, Fk + Fl, n - Fk - Fl) FR]
-    else
-        getfield(F, d)
-    end
-end
-
-Base.propertynames(F::GeneralizedSVD) =
-    (:alpha, :beta, :vals, :S, :D1, :D2, :R0, fieldnames(typeof(F))...)
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::GeneralizedSVD{<:Any,<:AbstractArray})
-    summary(io, F); println(io)
-    println(io, "U factor:")
-    show(io, mime, F.U)
-    println(io, "\nV factor:")
-    show(io, mime, F.V)
-    println(io, "\nQ factor:")
-    show(io, mime, F.Q)
-    println(io, "\nD1 factor:")
-    show(io, mime, F.D1)
-    println(io, "\nD2 factor:")
-    show(io, mime, F.D2)
-    println(io, "\nR0 factor:")
-    show(io, mime, F.R0)
-end
-
-"""
-    svdvals!(A, B)
-
-Return the generalized singular values from the generalized singular value
-decomposition of `A` and `B`, saving space by overwriting `A` and `B`.
-See also [`svd`](@ref) and [`svdvals`](@ref).
-"""
-function svdvals!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
-    # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-    if LAPACK.version() < v"3.6.0"
-        _, _, _, a, b, k, l, _ = LAPACK.ggsvd!('N', 'N', 'N', A, B)
-    else
-        _, _, _, a, b, k, l, _ = LAPACK.ggsvd3!('N', 'N', 'N', A, B)
-    end
-    a[1:k + l] ./ b[1:k + l]
-end
-
-"""
-    svdvals(A, B)
-
-Return the generalized singular values from the generalized singular value
-decomposition of `A` and `B`. See also [`svd`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1. 0.; 0. -1.]
-2×2 Matrix{Float64}:
- 1.0   0.0
- 0.0  -1.0
-
-julia> B = [0. 1.; 1. 0.]
-2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
-
-julia> svdvals(A, B)
-2-element Vector{Float64}:
- 1.0
- 1.0
-```
-"""
-function svdvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return svdvals!(copy_similar(A, S), copy_similar(B, S))
-end
-svdvals(x::Number, y::Number) = abs(x/y)
-
-# Conversion
-AbstractMatrix(F::SVD) = (F.U * Diagonal(F.S)) * F.Vt
-AbstractArray(F::SVD) = AbstractMatrix(F)
-Matrix(F::SVD) = Array(AbstractArray(F))
-Array(F::SVD) = Matrix(F)
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
deleted file mode 100644
index fa3464e93230b..0000000000000
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ /dev/null
@@ -1,865 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Symmetric and Hermitian matrices
-struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
-    data::S
-    uplo::Char
-
-    function Symmetric{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
-        require_one_based_indexing(data)
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,S}(data, uplo)
-    end
-end
-"""
-    Symmetric(A, uplo=:U)
-
-Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
-triangle of the matrix `A`.
-
-`Symmetric` views are mainly useful for real-symmetric matrices, for which
-specialized algorithms (e.g. for eigenproblems) are enabled for `Symmetric` types.
-More generally, see also [`Hermitian(A)`](@ref) for Hermitian matrices `A == A'`, which
-is effectively equivalent to `Symmetric` for real matrices but is also useful for
-complex matrices.  (Whereas complex `Symmetric` matrices are supported but have few
-if any specialized algorithms.)
-
-To compute the symmetric part of a real matrix, or more generally the Hermitian part `(A + A') / 2` of
-a real or complex matrix `A`, use [`hermitianpart`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> Supper = Symmetric(A)
-3×3 Symmetric{Int64, Matrix{Int64}}:
- 1  2  3
- 2  5  6
- 3  6  9
-
-julia> Slower = Symmetric(A, :L)
-3×3 Symmetric{Int64, Matrix{Int64}}:
- 1  4  7
- 4  5  8
- 7  8  9
-
-julia> hermitianpart(A)
-3×3 Hermitian{Float64, Matrix{Float64}}:
- 1.0  3.0  5.0
- 3.0  5.0  7.0
- 5.0  7.0  9.0
-```
-
-Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if
-`A == transpose(A)`).
-"""
-function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
-    checksquare(A)
-    return symmetric_type(typeof(A))(A, char_uplo(uplo))
-end
-
-"""
-    symmetric(A, uplo=:U)
-
-Construct a symmetric view of `A`. If `A` is a matrix, `uplo` controls whether the upper
-(if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
-other one. If `A` is a `Number`, it is returned as is.
-
-If a symmetric view of a matrix is to be constructed of which the elements are neither
-matrices nor numbers, an appropriate method of `symmetric` has to be implemented. In that
-case, `symmetric_type` has to be implemented, too.
-"""
-symmetric(A::AbstractMatrix, uplo::Symbol) = Symmetric(A, uplo)
-symmetric(A::Number, ::Symbol) = A
-
-"""
-    symmetric_type(T::Type)
-
-The type of the object returned by `symmetric(::T, ::Symbol)`. For matrices, this is an
-appropriately typed `Symmetric`, for `Number`s, it is the original type. If `symmetric` is
-implemented for a custom type, so should be `symmetric_type`, and vice versa.
-"""
-function symmetric_type(::Type{T}) where {S, T<:AbstractMatrix{S}}
-    return Symmetric{Union{S, promote_op(transpose, S), symmetric_type(S)}, T}
-end
-function symmetric_type(::Type{T}) where {S<:Number, T<:AbstractMatrix{S}}
-    return Symmetric{S, T}
-end
-function symmetric_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S}}
-    return Symmetric{AbstractMatrix, T}
-end
-symmetric_type(::Type{T}) where {T<:Number} = T
-
-struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
-    data::S
-    uplo::Char
-
-    function Hermitian{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
-        require_one_based_indexing(data)
-        (uplo != 'U' && uplo != 'L') && throw_uplo()
-        new{T,S}(data, uplo)
-    end
-end
-"""
-    Hermitian(A, uplo=:U)
-
-Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
-triangle of the matrix `A`.
-
-To compute the Hermitian part of `A`, use [`hermitianpart`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2+2im 3-3im; 4 5 6-6im; 7 8+8im 9]
-3×3 Matrix{Complex{Int64}}:
- 1+0im  2+2im  3-3im
- 4+0im  5+0im  6-6im
- 7+0im  8+8im  9+0im
-
-julia> Hupper = Hermitian(A)
-3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  2+2im  3-3im
- 2-2im  5+0im  6-6im
- 3+3im  6+6im  9+0im
-
-julia> Hlower = Hermitian(A, :L)
-3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  4+0im  7+0im
- 4+0im  5+0im  8-8im
- 7+0im  8+8im  9+0im
-
-julia> hermitianpart(A)
-3×3 Hermitian{ComplexF64, Matrix{ComplexF64}}:
- 1.0+0.0im  3.0+1.0im  5.0-1.5im
- 3.0-1.0im  5.0+0.0im  7.0-7.0im
- 5.0+1.5im  7.0+7.0im  9.0+0.0im
-```
-
-Note that `Hupper` will not be equal to `Hlower` unless `A` is itself Hermitian (e.g. if `A == adjoint(A)`).
-
-All non-real parts of the diagonal will be ignored.
-
-```julia
-Hermitian(fill(complex(1,1), 1, 1)) == fill(1, 1, 1)
-```
-"""
-function Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
-    n = checksquare(A)
-    return hermitian_type(typeof(A))(A, char_uplo(uplo))
-end
-
-"""
-    hermitian(A, uplo=:U)
-
-Construct a hermitian view of `A`. If `A` is a matrix, `uplo` controls whether the upper
-(if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
-other one. If `A` is a `Number`, its real part is returned converted back to the input
-type.
-
-If a hermitian view of a matrix is to be constructed of which the elements are neither
-matrices nor numbers, an appropriate method of `hermitian` has to be implemented. In that
-case, `hermitian_type` has to be implemented, too.
-"""
-hermitian(A::AbstractMatrix, uplo::Symbol) = Hermitian(A, uplo)
-hermitian(A::Number, ::Symbol) = convert(typeof(A), real(A))
-
-"""
-    hermitian_type(T::Type)
-
-The type of the object returned by `hermitian(::T, ::Symbol)`. For matrices, this is an
-appropriately typed `Hermitian`, for `Number`s, it is the original type. If `hermitian` is
-implemented for a custom type, so should be `hermitian_type`, and vice versa.
-"""
-function hermitian_type(::Type{T}) where {S, T<:AbstractMatrix{S}}
-    return Hermitian{Union{S, promote_op(adjoint, S), hermitian_type(S)}, T}
-end
-function hermitian_type(::Type{T}) where {S<:Number, T<:AbstractMatrix{S}}
-    return Hermitian{S, T}
-end
-function hermitian_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S}}
-    return Hermitian{AbstractMatrix, T}
-end
-hermitian_type(::Type{T}) where {T<:Number} = T
-
-_unwrap(A::Hermitian) = parent(A)
-_unwrap(A::Symmetric) = parent(A)
-
-for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric))
-    @eval begin
-        $S(A::$S) = A
-        function $S(A::$S, uplo::Symbol)
-            if A.uplo == char_uplo(uplo)
-                return A
-            else
-                throw(ArgumentError("Cannot construct $($S); uplo doesn't match"))
-            end
-        end
-        $S(A::$H) = $S(A, sym_uplo(A.uplo))
-        function $S(A::$H, uplo::Symbol)
-            if A.uplo == char_uplo(uplo)
-                if $H === Hermitian && !(eltype(A) <: Real) &&
-                    any(!isreal, A.data[i] for i in diagind(A.data))
-
-                    throw(ArgumentError("Cannot construct $($S)($($H))); diagonal contains complex values"))
-                end
-                return $S(A.data, sym_uplo(A.uplo))
-            else
-                throw(ArgumentError("Cannot construct $($S); uplo doesn't match"))
-            end
-        end
-    end
-end
-
-convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Symmetric} = m isa T ? m : T(m)::T
-convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Hermitian} = m isa T ? m : T(m)::T
-
-const HermOrSym{T,        S} = Union{Hermitian{T,S}, Symmetric{T,S}}
-const RealHermSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}}
-const RealHermSymComplexHerm{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Hermitian{Complex{T},S}}
-const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Symmetric{Complex{T},S}}
-
-size(A::HermOrSym, d) = size(A.data, d)
-size(A::HermOrSym) = size(A.data)
-@inline function Base.isassigned(A::HermOrSym, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    @inbounds if i == j || ((A.uplo == 'U') == (i < j))
-        return isassigned(A.data, i, j)
-    else
-        return isassigned(A.data, j, i)
-    end
-end
-
-@inline function getindex(A::Symmetric, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    @inbounds if i == j
-        return symmetric(A.data[i, j], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
-    elseif (A.uplo == 'U') == (i < j)
-        return A.data[i, j]
-    else
-        return transpose(A.data[j, i])
-    end
-end
-@inline function getindex(A::Hermitian, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    @inbounds if i == j
-        return hermitian(A.data[i, j], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
-    elseif (A.uplo == 'U') == (i < j)
-        return A.data[i, j]
-    else
-        return adjoint(A.data[j, i])
-    end
-end
-
-function setindex!(A::Symmetric, v, i::Integer, j::Integer)
-    i == j || throw(ArgumentError("Cannot set a non-diagonal index in a symmetric matrix"))
-    setindex!(A.data, v, i, j)
-end
-
-function setindex!(A::Hermitian, v, i::Integer, j::Integer)
-    if i != j
-        throw(ArgumentError("Cannot set a non-diagonal index in a Hermitian matrix"))
-    elseif !isreal(v)
-        throw(ArgumentError("Cannot set a diagonal entry in a Hermitian matrix to a nonreal value"))
-    else
-        setindex!(A.data, v, i, j)
-    end
-end
-
-diag(A::Symmetric) = symmetric.(diag(parent(A)), sym_uplo(A.uplo))
-diag(A::Hermitian) = hermitian.(diag(parent(A)), sym_uplo(A.uplo))
-
-isdiag(A::HermOrSym) = isdiag(A.uplo == 'U' ? UpperTriangular(A.data) : LowerTriangular(A.data))
-
-# For A<:Union{Symmetric,Hermitian}, similar(A[, neweltype]) should yield a matrix with the same
-# symmetry type, uplo flag, and underlying storage type as A. The following methods cover these cases.
-similar(A::Symmetric, ::Type{T}) where {T} = Symmetric(similar(parent(A), T), ifelse(A.uplo == 'U', :U, :L))
-# If the Hermitian constructor's check ascertaining that the wrapped matrix's
-# diagonal is strictly real is removed, the following method can be simplified.
-function similar(A::Hermitian, ::Type{T}) where T
-    B = similar(parent(A), T)
-    for i in 1:size(B, 1) B[i, i] = 0 end
-    return Hermitian(B, ifelse(A.uplo == 'U', :U, :L))
-end
-# On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
-# storage type of A (not wrapped in a symmetry type). The following method covers these cases.
-similar(A::Union{Symmetric,Hermitian}, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
-
-# Conversion
-function Matrix(A::Symmetric)
-    B = copytri!(convert(Matrix, copy(A.data)), A.uplo)
-    for i = 1:size(A, 1)
-        B[i,i] = symmetric(A[i,i], sym_uplo(A.uplo))::symmetric_type(eltype(A.data))
-    end
-    return B
-end
-function Matrix(A::Hermitian)
-    B = copytri!(convert(Matrix, copy(A.data)), A.uplo, true)
-    for i = 1:size(A, 1)
-        B[i,i] = hermitian(A[i,i], sym_uplo(A.uplo))::hermitian_type(eltype(A.data))
-    end
-    return B
-end
-Array(A::Union{Symmetric,Hermitian}) = convert(Matrix, A)
-
-parent(A::HermOrSym) = A.data
-Symmetric{T,S}(A::Symmetric{T,S}) where {T,S<:AbstractMatrix{T}} = A
-Symmetric{T,S}(A::Symmetric) where {T,S<:AbstractMatrix{T}} = Symmetric{T,S}(convert(S,A.data),A.uplo)
-AbstractMatrix{T}(A::Symmetric) where {T} = Symmetric(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
-Hermitian{T,S}(A::Hermitian{T,S}) where {T,S<:AbstractMatrix{T}} = A
-Hermitian{T,S}(A::Hermitian) where {T,S<:AbstractMatrix{T}} = Hermitian{T,S}(convert(S,A.data),A.uplo)
-AbstractMatrix{T}(A::Hermitian) where {T} = Hermitian(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
-
-copy(A::Symmetric{T,S}) where {T,S} = (B = copy(A.data); Symmetric{T,typeof(B)}(B,A.uplo))
-copy(A::Hermitian{T,S}) where {T,S} = (B = copy(A.data); Hermitian{T,typeof(B)}(B,A.uplo))
-
-function copyto!(dest::Symmetric, src::Symmetric)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
-    else
-        transpose!(dest.data, src.data)
-    end
-    return dest
-end
-
-function copyto!(dest::Hermitian, src::Hermitian)
-    if src.uplo == dest.uplo
-        copyto!(dest.data, src.data)
-    else
-        adjoint!(dest.data, src.data)
-    end
-    return dest
-end
-
-# fill[stored]!
-fill!(A::HermOrSym, x) = fillstored!(A, x)
-function fillstored!(A::HermOrSym{T}, x) where T
-    xT = convert(T, x)
-    if isa(A, Hermitian)
-        isreal(xT) || throw(ArgumentError("cannot fill Hermitian matrix with a nonreal value"))
-    end
-    if A.uplo == 'U'
-        fillband!(A.data, xT, 0, size(A,2)-1)
-    else # A.uplo == 'L'
-        fillband!(A.data, xT, 1-size(A,1), 0)
-    end
-    return A
-end
-
-Base.isreal(A::HermOrSym{<:Real}) = true
-function Base.isreal(A::HermOrSym)
-    n = size(A, 1)
-    @inbounds if A.uplo == 'U'
-        for j in 1:n
-            for i in 1:(j - (A isa Hermitian))
-                if !isreal(A.data[i,j])
-                    return false
-                end
-            end
-        end
-    else
-        for j in 1:n
-            for i in (j + (A isa Hermitian)):n
-                if !isreal(A.data[i,j])
-                    return false
-                end
-            end
-        end
-    end
-    return true
-end
-
-ishermitian(A::Hermitian) = true
-ishermitian(A::Symmetric{<:Real}) = true
-ishermitian(A::Symmetric{<:Complex}) = isreal(A)
-issymmetric(A::Hermitian{<:Real}) = true
-issymmetric(A::Hermitian{<:Complex}) = isreal(A)
-issymmetric(A::Symmetric) = true
-
-adjoint(A::Hermitian) = A
-transpose(A::Symmetric) = A
-adjoint(A::Symmetric{<:Real}) = A
-transpose(A::Hermitian{<:Real}) = A
-adjoint(A::Symmetric) = Adjoint(A)
-transpose(A::Hermitian) = Transpose(A)
-
-real(A::Symmetric{<:Real}) = A
-real(A::Hermitian{<:Real}) = A
-real(A::Symmetric) = Symmetric(real(A.data), sym_uplo(A.uplo))
-real(A::Hermitian) = Hermitian(real(A.data), sym_uplo(A.uplo))
-imag(A::Symmetric) = Symmetric(imag(A.data), sym_uplo(A.uplo))
-
-Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
-    Symmetric(copy(adjoint(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
-Base.copy(A::Transpose{<:Any,<:Hermitian}) =
-    Hermitian(copy(transpose(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
-
-tr(A::Symmetric) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
-tr(A::Hermitian) = real(tr(A.data))
-
-Base.conj(A::HermOrSym) = typeof(A)(conj(A.data), A.uplo)
-Base.conj!(A::HermOrSym) = typeof(A)(conj!(A.data), A.uplo)
-
-# tril/triu
-function tril(A::Hermitian, k::Integer=0)
-    if A.uplo == 'U' && k <= 0
-        return tril!(copy(A.data'),k)
-    elseif A.uplo == 'U' && k > 0
-        return tril!(copy(A.data'),-1) + tril!(triu(A.data),k)
-    elseif A.uplo == 'L' && k <= 0
-        return tril(A.data,k)
-    else
-        return tril(A.data,-1) + tril!(triu!(copy(A.data')),k)
-    end
-end
-
-function tril(A::Symmetric, k::Integer=0)
-    if A.uplo == 'U' && k <= 0
-        return tril!(copy(transpose(A.data)),k)
-    elseif A.uplo == 'U' && k > 0
-        return tril!(copy(transpose(A.data)),-1) + tril!(triu(A.data),k)
-    elseif A.uplo == 'L' && k <= 0
-        return tril(A.data,k)
-    else
-        return tril(A.data,-1) + tril!(triu!(copy(transpose(A.data))),k)
-    end
-end
-
-function triu(A::Hermitian, k::Integer=0)
-    if A.uplo == 'U' && k >= 0
-        return triu(A.data,k)
-    elseif A.uplo == 'U' && k < 0
-        return triu(A.data,1) + triu!(tril!(copy(A.data')),k)
-    elseif A.uplo == 'L' && k >= 0
-        return triu!(copy(A.data'),k)
-    else
-        return triu!(copy(A.data'),1) + triu!(tril(A.data),k)
-    end
-end
-
-function triu(A::Symmetric, k::Integer=0)
-    if A.uplo == 'U' && k >= 0
-        return triu(A.data,k)
-    elseif A.uplo == 'U' && k < 0
-        return triu(A.data,1) + triu!(tril!(copy(transpose(A.data))),k)
-    elseif A.uplo == 'L' && k >= 0
-        return triu!(copy(transpose(A.data)),k)
-    else
-        return triu!(copy(transpose(A.data)),1) + triu!(tril(A.data),k)
-    end
-end
-
-for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:Hermitian, :adjoint, :real)]
-    @eval begin
-        function dot(A::$T, B::$T)
-            n = size(A, 2)
-            if n != size(B, 2)
-                throw(DimensionMismatch("A has dimensions $(size(A)) but B has dimensions $(size(B))"))
-            end
-
-            dotprod = zero(dot(first(A), first(B)))
-            @inbounds if A.uplo == 'U' && B.uplo == 'U'
-                for j in 1:n
-                    for i in 1:(j - 1)
-                        dotprod += 2 * $real(dot(A.data[i, j], B.data[i, j]))
-                    end
-                    dotprod += dot(A[j, j], B[j, j])
-                end
-            elseif A.uplo == 'L' && B.uplo == 'L'
-                for j in 1:n
-                    dotprod += dot(A[j, j], B[j, j])
-                    for i in (j + 1):n
-                        dotprod += 2 * $real(dot(A.data[i, j], B.data[i, j]))
-                    end
-                end
-            elseif A.uplo == 'U' && B.uplo == 'L'
-                for j in 1:n
-                    for i in 1:(j - 1)
-                        dotprod += 2 * $real(dot(A.data[i, j], $trans(B.data[j, i])))
-                    end
-                    dotprod += dot(A[j, j], B[j, j])
-                end
-            else
-                for j in 1:n
-                    dotprod += dot(A[j, j], B[j, j])
-                    for i in (j + 1):n
-                        dotprod += 2 * $real(dot(A.data[i, j], $trans(B.data[j, i])))
-                    end
-                end
-            end
-            return dotprod
-        end
-    end
-end
-
-(-)(A::Symmetric) = Symmetric(-A.data, sym_uplo(A.uplo))
-(-)(A::Hermitian) = Hermitian(-A.data, sym_uplo(A.uplo))
-
-## Addition/subtraction
-for f ∈ (:+, :-), (Wrapper, conjugation) ∈ ((:Hermitian, :adjoint), (:Symmetric, :transpose))
-    @eval begin
-        function $f(A::$Wrapper, B::$Wrapper)
-            if A.uplo == B.uplo
-                return $Wrapper($f(parent(A), parent(B)), sym_uplo(A.uplo))
-            elseif A.uplo == 'U'
-                return $Wrapper($f(parent(A), $conjugation(parent(B))), :U)
-            else
-                return $Wrapper($f($conjugation(parent(A)), parent(B)), :U)
-            end
-        end
-    end
-end
-
-for f in (:+, :-)
-    @eval begin
-        $f(A::Hermitian, B::Symmetric{<:Real}) = $f(A, Hermitian(parent(B), sym_uplo(B.uplo)))
-        $f(A::Symmetric{<:Real}, B::Hermitian) = $f(Hermitian(parent(A), sym_uplo(A.uplo)), B)
-        $f(A::SymTridiagonal, B::Symmetric) = Symmetric($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Symmetric, B::SymTridiagonal) = Symmetric($f(A.data, B), sym_uplo(A.uplo))
-        $f(A::SymTridiagonal{<:Real}, B::Hermitian) = Hermitian($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Hermitian, B::SymTridiagonal{<:Real}) = Hermitian($f(A.data, B), sym_uplo(A.uplo))
-    end
-end
-
-*(A::HermOrSym, B::HermOrSym) = A * copyto!(similar(parent(B)), B)
-
-function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    n = length(x)
-    (n == length(y) == size(A, 1)) || throw(DimensionMismatch())
-    data = A.data
-    r = dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    iszero(n) && return r
-    if A.uplo == 'U'
-        @inbounds for j = 1:length(y)
-            r += dot(x[j], real(data[j,j]), y[j])
-            @simd for i = 1:j-1
-                Aij = data[i,j]
-                r += dot(x[i], Aij, y[j]) + dot(x[j], adjoint(Aij), y[i])
-            end
-        end
-    else # A.uplo == 'L'
-        @inbounds for j = 1:length(y)
-            r += dot(x[j], real(data[j,j]), y[j])
-            @simd for i = j+1:length(y)
-                Aij = data[i,j]
-                r += dot(x[i], Aij, y[j]) + dot(x[j], adjoint(Aij), y[i])
-            end
-        end
-    end
-    return r
-end
-
-# Scaling with Number
-*(A::Symmetric, x::Number) = Symmetric(A.data*x, sym_uplo(A.uplo))
-*(x::Number, A::Symmetric) = Symmetric(x*A.data, sym_uplo(A.uplo))
-*(A::Hermitian, x::Real) = Hermitian(A.data*x, sym_uplo(A.uplo))
-*(x::Real, A::Hermitian) = Hermitian(x*A.data, sym_uplo(A.uplo))
-/(A::Symmetric, x::Number) = Symmetric(A.data/x, sym_uplo(A.uplo))
-/(A::Hermitian, x::Real) = Hermitian(A.data/x, sym_uplo(A.uplo))
-
-factorize(A::HermOrSym) = _factorize(A)
-function _factorize(A::HermOrSym{T}; check::Bool=true) where T
-    TT = typeof(sqrt(oneunit(T)))
-    if isdiag(A)
-        return Diagonal(A)
-    elseif TT <: BlasFloat
-        return bunchkaufman(A; check=check)
-    else # fallback
-        return lu(A; check=check)
-    end
-end
-
-det(A::RealHermSymComplexHerm) = real(det(_factorize(A; check=false)))
-det(A::Symmetric{<:Real}) = det(_factorize(A; check=false))
-det(A::Symmetric) = det(_factorize(A; check=false))
-
-\(A::HermOrSym, B::AbstractVector) = \(factorize(A), B)
-# Bunch-Kaufman solves can not utilize BLAS-3 for multiple right hand sides
-# so using LU is faster for AbstractMatrix right hand side
-\(A::HermOrSym, B::AbstractMatrix) = \(isdiag(A) ? Diagonal(A) : lu(A), B)
-
-function _inv(A::HermOrSym)
-    n = checksquare(A)
-    B = inv!(lu(A))
-    conjugate = isa(A, Hermitian)
-    # symmetrize
-    if A.uplo == 'U' # add to upper triangle
-        @inbounds for i = 1:n, j = i:n
-            B[i,j] = conjugate ? (B[i,j] + conj(B[j,i])) / 2 : (B[i,j] + B[j,i]) / 2
-        end
-    else # A.uplo == 'L', add to lower triangle
-        @inbounds for i = 1:n, j = i:n
-            B[j,i] = conjugate ? (B[j,i] + conj(B[i,j])) / 2 : (B[j,i] + B[i,j]) / 2
-        end
-    end
-    B
-end
-# StridedMatrix restriction seems necessary due to inv! call in _inv above
-inv(A::Hermitian{<:Any,<:StridedMatrix}) = Hermitian(_inv(A), sym_uplo(A.uplo))
-inv(A::Symmetric{<:Any,<:StridedMatrix}) = Symmetric(_inv(A), sym_uplo(A.uplo))
-
-function svd(A::RealHermSymComplexHerm; full::Bool=false)
-    vals, vecs = eigen(A)
-    I = sortperm(vals; by=abs, rev=true)
-    permute!(vals, I)
-    Base.permutecols!!(vecs, I)         # left-singular vectors
-    V = copy(vecs)                      # right-singular vectors
-    # shifting -1 from singular values to right-singular vectors
-    @inbounds for i = 1:length(vals)
-        if vals[i] < 0
-            vals[i] = -vals[i]
-            for j = 1:size(V,1); V[j,i] = -V[j,i]; end
-        end
-    end
-    return SVD(vecs, vals, V')
-end
-
-function svdvals!(A::RealHermSymComplexHerm)
-    vals = eigvals!(A)
-    for i = 1:length(vals)
-        vals[i] = abs(vals[i])
-    end
-    return sort!(vals, rev = true)
-end
-
-# Matrix functions
-^(A::Symmetric{<:Real}, p::Integer) = sympow(A, p)
-^(A::Symmetric{<:Complex}, p::Integer) = sympow(A, p)
-function sympow(A::Symmetric, p::Integer)
-    if p < 0
-        return Symmetric(Base.power_by_squaring(inv(A), -p))
-    else
-        return Symmetric(Base.power_by_squaring(A, p))
-    end
-end
-function ^(A::Symmetric{<:Real}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    F = eigen(A)
-    if all(λ -> λ ≥ 0, F.values)
-        return Symmetric((F.vectors * Diagonal((F.values).^p)) * F.vectors')
-    else
-        return Symmetric((F.vectors * Diagonal((complex(F.values)).^p)) * F.vectors')
-    end
-end
-function ^(A::Symmetric{<:Complex}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    return Symmetric(schurpow(A, p))
-end
-function ^(A::Hermitian, p::Integer)
-    if p < 0
-        retmat = Base.power_by_squaring(inv(A), -p)
-    else
-        retmat = Base.power_by_squaring(A, p)
-    end
-    for i = 1:size(A,1)
-        retmat[i,i] = real(retmat[i,i])
-    end
-    return Hermitian(retmat)
-end
-function ^(A::Hermitian{T}, p::Real) where T
-    isinteger(p) && return integerpow(A, p)
-    F = eigen(A)
-    if all(λ -> λ ≥ 0, F.values)
-        retmat = (F.vectors * Diagonal((F.values).^p)) * F.vectors'
-        if T <: Real
-            return Hermitian(retmat)
-        else
-            for i = 1:size(A,1)
-                retmat[i,i] = real(retmat[i,i])
-            end
-            return Hermitian(retmat)
-        end
-    else
-        return (F.vectors * Diagonal((complex(F.values).^p))) * F.vectors'
-    end
-end
-
-for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            return Symmetric((F.vectors * Diagonal(($func).(F.values))) * F.vectors')
-        end
-        function ($func)(A::Hermitian{<:Complex})
-            n = checksquare(A)
-            F = eigen(A)
-            retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-            for i = 1:n
-                retmat[i,i] = real(retmat[i,i])
-            end
-            return Hermitian(retmat)
-        end
-    end
-end
-
-function cis(A::Union{RealHermSymComplexHerm,SymTridiagonal{<:Real}})
-    F = eigen(A)
-    # The returned matrix is unitary, and is complex-symmetric for real A
-    return F.vectors .* cis.(F.values') * F.vectors'
-end
-
-for func in (:acos, :asin)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            if all(λ -> -1 ≤ λ ≤ 1, F.values)
-                retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-            return Symmetric(retmat)
-        end
-        function ($func)(A::Hermitian{<:Complex})
-            n = checksquare(A)
-            F = eigen(A)
-            if all(λ -> -1 ≤ λ ≤ 1, F.values)
-                retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-                for i = 1:n
-                    retmat[i,i] = real(retmat[i,i])
-                end
-                return Hermitian(retmat)
-            else
-                return (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-        end
-    end
-end
-
-function acosh(A::HermOrSym{<:Real})
-    F = eigen(A)
-    if all(λ -> λ ≥ 1, F.values)
-        retmat = (F.vectors * Diagonal(acosh.(F.values))) * F.vectors'
-    else
-        retmat = (F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors'
-    end
-    return Symmetric(retmat)
-end
-function acosh(A::Hermitian{<:Complex})
-    n = checksquare(A)
-    F = eigen(A)
-    if all(λ -> λ ≥ 1, F.values)
-        retmat = (F.vectors * Diagonal(acosh.(F.values))) * F.vectors'
-        for i = 1:n
-            retmat[i,i] = real(retmat[i,i])
-        end
-        return Hermitian(retmat)
-    else
-        return (F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors'
-    end
-end
-
-function sincos(A::HermOrSym{<:Real})
-    n = checksquare(A)
-    F = eigen(A)
-    S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
-    for i in 1:n
-        S.diag[i], C.diag[i] = sincos(F.values[i])
-    end
-    return Symmetric((F.vectors * S) * F.vectors'), Symmetric((F.vectors * C) * F.vectors')
-end
-function sincos(A::Hermitian{<:Complex})
-    n = checksquare(A)
-    F = eigen(A)
-    S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
-    for i in 1:n
-        S.diag[i], C.diag[i] = sincos(F.values[i])
-    end
-    retmatS, retmatC = (F.vectors * S) * F.vectors', (F.vectors * C) * F.vectors'
-    for i = 1:n
-        retmatS[i,i] = real(retmatS[i,i])
-        retmatC[i,i] = real(retmatC[i,i])
-    end
-    return Hermitian(retmatS), Hermitian(retmatC)
-end
-
-
-for func in (:log, :sqrt)
-    # sqrt has rtol arg to handle matrices that are semidefinite up to roundoff errors
-    rtolarg = func === :sqrt ? Any[Expr(:kw, :(rtol::Real), :(eps(real(float(one(T))))*size(A,1)))] : Any[]
-    rtolval = func === :sqrt ? :(-maximum(abs, F.values) * rtol) : 0
-    @eval begin
-        function ($func)(A::HermOrSym{T}; $(rtolarg...)) where {T<:Real}
-            F = eigen(A)
-            λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
-            if all(λ -> λ ≥ λ₀, F.values)
-                retmat = (F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
-            end
-            return Symmetric(retmat)
-        end
-
-        function ($func)(A::Hermitian{T}; $(rtolarg...)) where {T<:Complex}
-            n = checksquare(A)
-            F = eigen(A)
-            λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
-            if all(λ -> λ ≥ λ₀, F.values)
-                retmat = (F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors'
-                for i = 1:n
-                    retmat[i,i] = real(retmat[i,i])
-                end
-                return Hermitian(retmat)
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex(F.values)))) * F.vectors'
-                return retmat
-            end
-        end
-    end
-end
-
-"""
-    hermitianpart(A, uplo=:U) -> Hermitian
-
-Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
-[`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
-of `A`; it is also sometimes called the "operator real part". The optional argument `uplo` controls the corresponding argument of the
-[`Hermitian`](@ref) view. For real matrices, the latter is equivalent to a
-[`Symmetric`](@ref) view.
-
-See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
-
-!!! compat "Julia 1.10"
-    This function requires Julia 1.10 or later.
-"""
-hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
-
-"""
-    hermitianpart!(A, uplo=:U) -> Hermitian
-
-Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
-[`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
-part of `A`.
-
-See also [`hermitianpart`](@ref) for the corresponding out-of-place operation.
-
-!!! compat "Julia 1.10"
-    This function requires Julia 1.10 or later.
-"""
-hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart!(A), uplo)
-
-_hermitianpart(A::AbstractMatrix) = _hermitianpart!(copy_similar(A, Base.promote_op(/, eltype(A), Int)))
-_hermitianpart(a::Number) = real(a)
-
-function _hermitianpart!(A::AbstractMatrix)
-    require_one_based_indexing(A)
-    n = checksquare(A)
-    @inbounds for j in 1:n
-        A[j, j] = _hermitianpart(A[j, j])
-        for i in 1:j-1
-            A[i, j] = val = (A[i, j] + adjoint(A[j, i])) / 2
-            A[j, i] = adjoint(val)
-        end
-    end
-    return A
-end
-
-## structured matrix printing ##
-function Base.replace_in_print_matrix(A::HermOrSym,i::Integer,j::Integer,s::AbstractString)
-    ijminmax = minmax(i, j)
-    inds = A.uplo == 'U' ? ijminmax : reverse(ijminmax)
-    Base.replace_in_print_matrix(parent(A), inds..., s)
-end
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
deleted file mode 100644
index 279577c31d664..0000000000000
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ /dev/null
@@ -1,220 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# preserve HermOrSym wrapper
-eigencopy_oftype(A::Hermitian, S) = Hermitian(copy_similar(A, S), sym_uplo(A.uplo))
-eigencopy_oftype(A::Symmetric, S) = Symmetric(copy_similar(A, S), sym_uplo(A.uplo))
-
-# Eigensolvers for symmetric and Hermitian matrices
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
-    Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
-
-function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), sortby=sortby)
-end
-
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
-    Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-The [`UnitRange`](@ref) `irange` specifies indices of the sorted eigenvalues to search for.
-
-!!! note
-    If `irange` is not `1:n`, where `n` is the dimension of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, irange::UnitRange)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), irange)
-end
-
-eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    Eigen(LAPACK.syevr!('V', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> Eigen
-
-Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-`vl` is the lower bound of the window of eigenvalues to search for, and `vu` is the upper bound.
-
-!!! note
-    If [`vl`, `vu`] does not contain all eigenvalues of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    S = eigtype(eltype(A))
-    eigen!(eigencopy_oftype(A, S), vl, vh)
-end
-
-function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
-    vals = LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
-    !isnothing(sortby) && sort!(vals, by=sortby)
-    return vals
-end
-
-function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), sortby=sortby)
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`irange` is a range of eigenvalue *indices* to search for - for instance, the 2nd to 8th eigenvalues.
-"""
-eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
-    LAPACK.syevr!('N', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Return the eigenvalues of `A`. It is possible to calculate only a subset of the
-eigenvalues by specifying a [`UnitRange`](@ref) `irange` covering indices of the sorted eigenvalues,
-e.g. the 2nd to 8th eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, 2:2)
-1-element Vector{Float64}:
- 0.9999999999999996
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, irange::UnitRange)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), irange)
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`vl` is the lower bound of the interval to search for eigenvalues, and `vu` is the upper bound.
-"""
-eigvals!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    LAPACK.syevr!('N', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Return the eigenvalues of `A`. It is possible to calculate only a subset of the eigenvalues
-by specifying a pair `vl` and `vu` for the lower and upper boundaries of the eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, -1, 2)
-1-element Vector{Float64}:
- 1.0000000000000009
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    S = eigtype(eltype(A))
-    eigvals!(eigencopy_oftype(A, S), vl, vh)
-end
-
-eigmax(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1]
-
-function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
-end
-
-function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-
-function eigen(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    if ishermitian(A)
-        eigen!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
-    else
-        eigen!(copy_similar(A, eigtype(eltype(A))), C; sortby)
-    end
-end
-function eigen!(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    # Cholesky decomposition based eigenvalues and eigenvectors
-    vals, w = eigen!(UtiAUi!(A, C.U))
-    vecs = C.U \ w
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-
-# Perform U' \ A / U in-place, where U::Union{UpperTriangular,Diagonal}
-UtiAUi!(A, U) = _UtiAUi!(A, U)
-UtiAUi!(A::Symmetric, U) = Symmetric(_UtiAUi!(copytri!(parent(A), A.uplo), U), sym_uplo(A.uplo))
-UtiAUi!(A::Hermitian, U) = Hermitian(_UtiAUi!(copytri!(parent(A), A.uplo, true), U), sym_uplo(A.uplo))
-_UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U)
-
-function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA), TB)
-    return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
-end
-
-function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
-    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-    isnothing(sortby) || sort!(vals, by=sortby)
-    return vals
-end
-function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
-    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-    isnothing(sortby) || sort!(vals, by=sortby)
-    return vals
-end
-eigvecs(A::HermOrSym) = eigvecs(eigen(A))
-
-function eigvals(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
-    if ishermitian(A)
-        eigvals!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
-    else
-        eigvals!(copy_similar(A, eigtype(eltype(A))), C; sortby)
-    end
-end
-function eigvals!(A::AbstractMatrix{T}, C::Cholesky{T, <:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:Number}
-    # Cholesky decomposition based eigenvalues
-    return eigvals!(UtiAUi!(A, C.U); sortby)
-end
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
deleted file mode 100644
index 9d70ac3add34b..0000000000000
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ /dev/null
@@ -1,212 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-adjoint(a::AbstractArray) = error("adjoint not defined for $(typeof(a)). Consider using `permutedims` for higher-dimensional arrays.")
-transpose(a::AbstractArray) = error("transpose not defined for $(typeof(a)). Consider using `permutedims` for higher-dimensional arrays.")
-
-## Matrix transposition ##
-
-"""
-    transpose!(dest,src)
-
-Transpose array `src` and store the result in the preallocated array `dest`, which should
-have a size corresponding to `(size(src,2),size(src,1))`. No in-place transposition is
-supported and unexpected results will happen if `src` and `dest` have overlapping memory
-regions.
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 8+7im  4+6im]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-
-julia> B = zeros(Complex{Int64}, 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+0im
- 0+0im  0+0im
-
-julia> transpose!(B, A);
-
-julia> B
-2×2 Matrix{Complex{Int64}}:
- 3+2im  8+7im
- 9+2im  4+6im
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-```
-"""
-transpose!(B::AbstractMatrix, A::AbstractMatrix) = transpose_f!(transpose, B, A)
-
-"""
-    adjoint!(dest,src)
-
-Conjugate transpose array `src` and store the result in the preallocated array `dest`, which
-should have a size corresponding to `(size(src,2),size(src,1))`. No in-place transposition
-is supported and unexpected results will happen if `src` and `dest` have overlapping memory
-regions.
-
-# Examples
-```jldoctest
-julia> A = [3+2im 9+2im; 8+7im  4+6im]
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-
-julia> B = zeros(Complex{Int64}, 2, 2)
-2×2 Matrix{Complex{Int64}}:
- 0+0im  0+0im
- 0+0im  0+0im
-
-julia> adjoint!(B, A);
-
-julia> B
-2×2 Matrix{Complex{Int64}}:
- 3-2im  8-7im
- 9-2im  4-6im
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 3+2im  9+2im
- 8+7im  4+6im
-```
-"""
-adjoint!(B::AbstractMatrix, A::AbstractMatrix) = transpose_f!(adjoint, B, A)
-function transpose!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    copyto!(B, A)
-end
-function transpose!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    copyto!(B, A)
-end
-function adjoint!(B::AbstractVector, A::AbstractMatrix)
-    axes(B,1) == axes(A,2) && axes(A,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    ccopy!(B, A)
-end
-function adjoint!(B::AbstractMatrix, A::AbstractVector)
-    axes(B,2) == axes(A,1) && axes(B,1) == 1:1 || throw(DimensionMismatch("transpose"))
-    ccopy!(B, A)
-end
-
-const transposebaselength=64
-function transpose_f!(f, B::AbstractMatrix, A::AbstractMatrix)
-    inds = axes(A)
-    axes(B,1) == inds[2] && axes(B,2) == inds[1] || throw(DimensionMismatch(string(f)))
-
-    m, n = length(inds[1]), length(inds[2])
-    if m*n<=4*transposebaselength
-        @inbounds begin
-            for j = inds[2]
-                for i = inds[1]
-                    B[j,i] = f(A[i,j])
-                end
-            end
-        end
-    else
-        transposeblock!(f,B,A,m,n,first(inds[1])-1,first(inds[2])-1)
-    end
-    return B
-end
-function transposeblock!(f, B::AbstractMatrix, A::AbstractMatrix, m::Int, n::Int, offseti::Int, offsetj::Int)
-    if m*n<=transposebaselength
-        @inbounds begin
-            for j = offsetj .+ (1:n)
-                for i = offseti .+ (1:m)
-                    B[j,i] = f(A[i,j])
-                end
-            end
-        end
-    elseif m>n
-        newm=m>>1
-        transposeblock!(f,B,A,newm,n,offseti,offsetj)
-        transposeblock!(f,B,A,m-newm,n,offseti+newm,offsetj)
-    else
-        newn=n>>1
-        transposeblock!(f,B,A,m,newn,offseti,offsetj)
-        transposeblock!(f,B,A,m,n-newn,offseti,offsetj+newn)
-    end
-    return B
-end
-
-function ccopy!(B, A)
-    RB, RA = eachindex(B), eachindex(A)
-    if RB == RA
-        for i = RB
-            B[i] = adjoint(A[i])
-        end
-    else
-        for (i,j) = zip(RB, RA)
-            B[i] = adjoint(A[j])
-        end
-    end
-    return B
-end
-
-"""
-    copy(A::Transpose)
-    copy(A::Adjoint)
-
-Eagerly evaluate the lazy matrix transpose/adjoint.
-Note that the transposition is applied recursively to elements.
-
-This operation is intended for linear algebra usage - for general data manipulation see
-[`permutedims`](@ref Base.permutedims), which is non-recursive.
-
-# Examples
-```jldoctest
-julia> A = [1 2im; -3im 4]
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0+2im
- 0-3im  4+0im
-
-julia> T = transpose(A)
-2×2 transpose(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
- 1+0im  0-3im
- 0+2im  4+0im
-
-julia> copy(T)
-2×2 Matrix{Complex{Int64}}:
- 1+0im  0-3im
- 0+2im  4+0im
-```
-"""
-copy(::Union{Transpose,Adjoint})
-
-Base.copy(A::TransposeAbsMat) = transpose!(similar(A.parent, reverse(axes(A.parent))), A.parent)
-Base.copy(A::AdjointAbsMat) = adjoint!(similar(A.parent, reverse(axes(A.parent))), A.parent)
-
-function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
-                         A::AbstractVecOrMat, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int})
-    if length(ir_dest) != length(jr_src)
-        throw(ArgumentError(LazyString("source and destination must have same size (got ",
-                                   length(jr_src)," and ",length(ir_dest),")")))
-    end
-    if length(jr_dest) != length(ir_src)
-        throw(ArgumentError(LazyString("source and destination must have same size (got ",
-                                   length(ir_src)," and ",length(jr_dest),")")))
-    end
-    @boundscheck checkbounds(B, ir_dest, jr_dest)
-    @boundscheck checkbounds(A, ir_src, jr_src)
-    idest = first(ir_dest)
-    for jsrc in jr_src
-        jdest = first(jr_dest)
-        for isrc in ir_src
-            B[idest,jdest] = A[isrc,jsrc]
-            jdest += step(jr_dest)
-        end
-        idest += step(ir_dest)
-    end
-    return B
-end
-
-function copy_similar(A::AdjointAbsMat, ::Type{T}) where {T}
-    C = similar(A, T, size(A))
-    adjoint!(C, parent(A))
-end
-function copy_similar(A::TransposeAbsMat, ::Type{T}) where {T}
-    C = similar(A, T, size(A))
-    transpose!(C, parent(A))
-end
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
deleted file mode 100644
index 295a46f1522a5..0000000000000
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ /dev/null
@@ -1,2493 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## Triangular
-
-# could be renamed to Triangular when that name has been fully deprecated
-abstract type AbstractTriangular{T,S<:AbstractMatrix} <: AbstractMatrix{T} end
-
-# First loop through all methods that don't need special care for upper/lower and unit diagonal
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
-          :UnitUpperTriangular)
-    @eval begin
-        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T,S}
-            data::S
-
-            function $t{T,S}(data) where {T,S<:AbstractMatrix{T}}
-                require_one_based_indexing(data)
-                checksquare(data)
-                new{T,S}(data)
-            end
-        end
-        $t(A::$t) = A
-        $t{T}(A::$t{T}) where {T} = A
-        function $t(A::AbstractMatrix)
-            return $t{eltype(A), typeof(A)}(A)
-        end
-        function $t{T}(A::AbstractMatrix) where T
-            $t(convert(AbstractMatrix{T}, A))
-        end
-
-        function $t{T}(A::$t) where T
-            Anew = convert(AbstractMatrix{T}, A.data)
-            $t(Anew)
-        end
-        Matrix(A::$t{T}) where {T} = Matrix{T}(A)
-
-        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
-
-        size(A::$t, d) = size(A.data, d)
-        size(A::$t) = size(A.data)
-
-        # For A<:AbstractTriangular, similar(A[, neweltype]) should yield a matrix with the same
-        # triangular type and underlying storage type as A. The following method covers these cases.
-        similar(A::$t, ::Type{T}) where {T} = $t(similar(parent(A), T))
-        # On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
-        # storage type of A (not wrapped in a triangular type). The following method covers these cases.
-        similar(A::$t, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
-
-        copy(A::$t) = $t(copy(A.data))
-
-        real(A::$t{<:Real}) = A
-        real(A::$t{<:Complex}) = (B = real(A.data); $t(B))
-    end
-end
-
-similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UpperTriangular(similar(parent(parent(A)), T))
-similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitUpperTriangular(similar(parent(parent(A)), T))
-similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    LowerTriangular(similar(parent(parent(A)), T))
-similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
-    UnitLowerTriangular(similar(parent(parent(A)), T))
-
-
-"""
-    LowerTriangular(A::AbstractMatrix)
-
-Construct a `LowerTriangular` view of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> LowerTriangular(A)
-3×3 LowerTriangular{Float64, Matrix{Float64}}:
- 1.0   ⋅    ⋅
- 4.0  5.0   ⋅
- 7.0  8.0  9.0
-```
-"""
-LowerTriangular
-"""
-    UpperTriangular(A::AbstractMatrix)
-
-Construct an `UpperTriangular` view of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UpperTriangular(A)
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 1.0  2.0  3.0
-  ⋅   5.0  6.0
-  ⋅    ⋅   9.0
-```
-"""
-UpperTriangular
-"""
-    UnitLowerTriangular(A::AbstractMatrix)
-
-Construct a `UnitLowerTriangular` view of the matrix `A`.
-Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
-of `A` on its diagonal.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UnitLowerTriangular(A)
-3×3 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0   ⋅    ⋅
- 4.0  1.0   ⋅
- 7.0  8.0  1.0
-```
-"""
-UnitLowerTriangular
-"""
-    UnitUpperTriangular(A::AbstractMatrix)
-
-Construct an `UnitUpperTriangular` view of the matrix `A`.
-Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
-of `A` on its diagonal.
-
-# Examples
-```jldoctest
-julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
-3×3 Matrix{Float64}:
- 1.0  2.0  3.0
- 4.0  5.0  6.0
- 7.0  8.0  9.0
-
-julia> UnitUpperTriangular(A)
-3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
- 1.0  2.0  3.0
-  ⋅   1.0  6.0
-  ⋅    ⋅   1.0
-```
-"""
-UnitUpperTriangular
-
-const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}}
-const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
-const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
-
-imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
-imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
-imag(A::UnitLowerTriangular) = LowerTriangular(tril!(imag(A.data),-1))
-imag(A::UnitUpperTriangular) = UpperTriangular(triu!(imag(A.data),1))
-
-Array(A::AbstractTriangular) = Matrix(A)
-parent(A::UpperOrLowerTriangular) = A.data
-
-# then handle all methods that requires specific handling of upper/lower and unit diagonal
-
-function Matrix{T}(A::LowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    B
-end
-function Matrix{T}(A::UnitLowerTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    tril!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-function Matrix{T}(A::UpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    B
-end
-function Matrix{T}(A::UnitUpperTriangular) where T
-    B = Matrix{T}(undef, size(A, 1), size(A, 1))
-    copyto!(B, A.data)
-    triu!(B)
-    for i = 1:size(B,1)
-        B[i,i] = oneunit(T)
-    end
-    B
-end
-
-function full!(A::LowerTriangular)
-    B = A.data
-    tril!(B)
-    B
-end
-function full!(A::UnitLowerTriangular)
-    B = A.data
-    tril!(B)
-    for i = 1:size(A,1)
-        B[i,i] = oneunit(eltype(B))
-    end
-    B
-end
-function full!(A::UpperTriangular)
-    B = A.data
-    triu!(B)
-    B
-end
-function full!(A::UnitUpperTriangular)
-    B = A.data
-    triu!(B)
-    for i = 1:size(A,1)
-        B[i,i] = oneunit(eltype(B))
-    end
-    B
-end
-
-Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) =
-    i > j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::LowerTriangular, i::Int, j::Int) =
-    i >= j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) =
-    i < j ? isassigned(A.data, i, j) : true
-Base.isassigned(A::UpperTriangular, i::Int, j::Int) =
-    i <= j ? isassigned(A.data, i, j) : true
-
-getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
-    i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::LowerTriangular, i::Integer, j::Integer) =
-    i >= j ? A.data[i,j] : zero(A.data[j,i])
-getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
-    i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::UpperTriangular, i::Integer, j::Integer) =
-    i <= j ? A.data[i,j] : zero(A.data[j,i])
-
-function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
-    if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
-    if i > j
-        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
-            "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
-    elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
-    if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
-    if i < j
-        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
-            "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
-    elseif i == j
-        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
-            "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
-    else
-        A.data[i,j] = x
-    end
-    return A
-end
-
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
-                                      i::Integer, j::Integer, s::AbstractString)
-    return i <= j ? s : Base.replace_with_centered_mark(s)
-end
-function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangular},
-                                      i::Integer, j::Integer, s::AbstractString)
-    return i >= j ? s : Base.replace_with_centered_mark(s)
-end
-
-function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
-    k >= 0 && return true
-    return _istril(A, k)
-end
-function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
-    k <= 0 && return true
-    return _istriu(A, k)
-end
-istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
-istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
-istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
-istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
-
-function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k < 0
-        fill!(A.data, zero(T))
-        return A
-    elseif k == 0
-        for j in 1:n, i in 1:j-1
-            A.data[i,j] = zero(T)
-        end
-        return A
-    else
-        return UpperTriangular(tril!(A.data,k))
-    end
-end
-triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data, k))
-
-function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k < 0
-        fill!(A.data, zero(T))
-        return UpperTriangular(A.data)
-    elseif k == 0
-        fill!(A.data, zero(T))
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return UpperTriangular(A.data)
-    else
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return UpperTriangular(tril!(A.data,k))
-    end
-end
-
-function triu!(A::UnitUpperTriangular, k::Integer=0)
-    for i in diagind(A)
-        A.data[i] = oneunit(eltype(A))
-    end
-    return triu!(UpperTriangular(A.data), k)
-end
-
-function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
-    n = size(A,1)
-    if k > 0
-        fill!(A.data, zero(T))
-        return A
-    elseif k == 0
-        for j in 1:n, i in j+1:n
-            A.data[i,j] = zero(T)
-        end
-        return A
-    else
-        return LowerTriangular(triu!(A.data, k))
-    end
-end
-
-tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data, k))
-
-function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
-    n = size(A,1)
-    if k > 0
-        fill!(A.data, zero(T))
-        return LowerTriangular(A.data)
-    elseif k == 0
-        fill!(A.data, zero(T))
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return LowerTriangular(A.data)
-    else
-        for i in diagind(A)
-            A.data[i] = oneunit(T)
-        end
-        return LowerTriangular(triu!(A.data, k))
-    end
-end
-
-function tril!(A::UnitLowerTriangular, k::Integer=0)
-    for i in diagind(A)
-        A.data[i] = oneunit(eltype(A))
-    end
-    return tril!(LowerTriangular(A.data), k)
-end
-
-adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
-adjoint(A::UpperTriangular) = LowerTriangular(adjoint(A.data))
-adjoint(A::UnitLowerTriangular) = UnitUpperTriangular(adjoint(A.data))
-adjoint(A::UnitUpperTriangular) = UnitLowerTriangular(adjoint(A.data))
-transpose(A::LowerTriangular) = UpperTriangular(transpose(A.data))
-transpose(A::UpperTriangular) = LowerTriangular(transpose(A.data))
-transpose(A::UnitLowerTriangular) = UnitUpperTriangular(transpose(A.data))
-transpose(A::UnitUpperTriangular) = UnitLowerTriangular(transpose(A.data))
-
-transpose!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L', false, true))
-transpose!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L', false, true))
-transpose!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U', false, true))
-transpose!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U', false, true))
-adjoint!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L' , true, true))
-adjoint!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L' , true, true))
-adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true))
-adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
-
-diag(A::LowerTriangular) = diag(A.data)
-diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
-diag(A::UpperTriangular) = diag(A.data)
-diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
-
-# Unary operations
--(A::LowerTriangular) = LowerTriangular(-A.data)
--(A::UpperTriangular) = UpperTriangular(-A.data)
-function -(A::UnitLowerTriangular)
-    Anew = -A.data
-    for i = 1:size(A, 1)
-        Anew[i, i] = -A[i, i]
-    end
-    LowerTriangular(Anew)
-end
-function -(A::UnitUpperTriangular)
-    Anew = -A.data
-    for i = 1:size(A, 1)
-        Anew[i, i] = -A[i, i]
-    end
-    UpperTriangular(Anew)
-end
-
-tr(A::LowerTriangular) = tr(A.data)
-tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
-tr(A::UpperTriangular) = tr(A.data)
-tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
-
-# copy and scale
-function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
-    n = size(B,1)
-    for j = 1:n
-        for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
-            @inbounds A[i,j] = B[i,j]
-        end
-    end
-    return A
-end
-function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
-    n = size(B,1)
-    for j = 1:n
-        for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
-            @inbounds A[i,j] = B[i,j]
-        end
-    end
-    return A
-end
-
-# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
-# be permissive here and require compatibility later in _triscale!
-@inline mul!(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular, C::Number, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
-@inline mul!(A::UpperOrLowerTriangular, B::Number, C::UpperOrLowerTriangular, alpha::Number, beta::Number) =
-    _triscale!(A, B, C, MulAddMul(alpha, beta))
-
-function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = 1:j
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = 1:j
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = 1:(j - 1)
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = 1:(j - 1)
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = j:n
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        for i = j:n
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = (j + 1):n
-            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
-        end
-    end
-    return A
-end
-function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
-    n = checksquare(B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    for j = 1:n
-        @inbounds _modify!(_add, c, A, (j,j))
-        for i = (j + 1):n
-            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
-        end
-    end
-    return A
-end
-
-rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
-lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
-
-function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    x₁ = x[1]
-    r = dot(x₁, A[1,1], y[1])
-    @inbounds for j in 2:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[1,j]) * x₁
-            @simd for i in 2:j
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::UnitUpperTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    x₁ = first(x)
-    r = dot(x₁, y[1])
-    @inbounds for j in 2:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[1,j]) * x₁
-            @simd for i in 2:j-1
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-            r += dot(x[j], yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::LowerTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    r = zero(typeof(dot(first(x), first(A), first(y))))
-    @inbounds for j in 1:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = adjoint(A[j,j]) * x[j]
-            @simd for i in j+1:m
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-function dot(x::AbstractVector, A::UnitLowerTriangular, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m = size(A, 1)
-    (length(x) == m == length(y)) || throw(DimensionMismatch())
-    if iszero(m)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    r = zero(typeof(dot(first(x), first(y))))
-    @inbounds for j in 1:m
-        yj = y[j]
-        if !iszero(yj)
-            temp = x[j]
-            @simd for i in j+1:m
-                temp += adjoint(A[i,j]) * x[i]
-            end
-            r += dot(temp, yj)
-        end
-    end
-    return r
-end
-
-fillstored!(A::LowerTriangular, x)     = (fillband!(A.data, x, 1-size(A,1), 0); A)
-fillstored!(A::UnitLowerTriangular, x) = (fillband!(A.data, x, 1-size(A,1), -1); A)
-fillstored!(A::UpperTriangular, x)     = (fillband!(A.data, x, 0, size(A,2)-1); A)
-fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1); A)
-
-# Binary operations
-+(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data + B.data)
-+(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data + B.data)
-+(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data + triu(B.data, 1) + I)
-+(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data + tril(B.data, -1) + I)
-+(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) + B.data + I)
-+(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) + B.data + I)
-+(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
-+(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
-+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B)
-
--(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data - B.data)
--(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data - B.data)
--(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data - triu(B.data, 1) - I)
--(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data - tril(B.data, -1) - I)
--(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) - B.data + I)
--(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) - B.data + I)
--(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
--(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
--(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
-
-######################
-# BlasFloat routines #
-######################
-
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _multrimat!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _multrimat!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _mulmattri!(C, A, B)
-mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _multrimat!(C, A, B)
-
-for TC in (:AbstractVector, :AbstractMatrix)
-    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
-        end
-    end
-end
-for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
-                    (:AbstractMatrix, :AbstractTriangular),
-                    (:AbstractTriangular, :AbstractTriangular)
-                )
-    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-        end
-    end
-end
-
-
-# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
-_multrimat!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
-    lmul!(A, inplace_adj_or_trans(B)(C, _parent(B)))
-_mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = rmul!(copyto!(C, A), B)
-
-# preserve triangular structure in in-place multiplication
-for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
-                        (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
-                        (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
-                        (:UnitUpperTriangular, :UnitUpperTriangular, :UnitUpperTriangular),
-                        (:LowerTriangular, :LowerTriangular, :LowerTriangular),
-                        (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
-                        (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
-                        (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
-    @eval function _multrimat!(C::$cty, A::$aty, B::$bty)
-        _multrimat!(parent(C), A, B)
-        return C
-    end
-end
-
-# direct multiplication/division
-for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
-                            (:UnitLowerTriangular, 'L', 'U'),
-                            (:UpperTriangular, 'U', 'N'),
-                            (:UnitUpperTriangular, 'U', 'U'))
-    @eval begin
-        # Vector multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'N', $isunitc, A.data, b)
-
-        # Matrix multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'N', $isunitc, one(T), A.data, B)
-        rmul!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
-        # Left division
-        ldiv!(A::$t{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'N', $isunitc, A.data, B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
-        # Matrix inverse
-        inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-            $t{T,S}(LAPACK.trtri!($uploc, $isunitc, A.data))
-
-        # Error bounds for triangular solve
-        errorbounds(A::$t{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trrfs!($uploc, 'N', $isunitc, A.data, B, X)
-
-        # Condition numbers
-        function cond(A::$t{<:BlasFloat,<:StridedMatrix}, p::Real=2)
-            checksquare(A)
-            if p == 1
-                return inv(LAPACK.trcon!('O', $uploc, $isunitc, A.data))
-            elseif p == Inf
-                return inv(LAPACK.trcon!('I', $uploc, $isunitc, A.data))
-            else # use fallback
-                return cond(copyto!(similar(parent(A)), A), p)
-            end
-        end
-    end
-end
-
-# adjoint/transpose multiplication ('uploc' reversed)
-for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
-                            (:UnitLowerTriangular, 'U', 'U'),
-                            (:UpperTriangular, 'L', 'N'),
-                            (:UnitUpperTriangular, 'L', 'U'))
-    @eval begin
-        # Vector multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasReal} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasComplex} =
-            BLAS.trmv!($uploc, 'C', $isunitc, parent(parent(A)), b)
-
-        # Matrix multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasComplex} =
-            BLAS.trmm!('L', $uploc, 'C', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasReal} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trmm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-
-        # Left division
-        ldiv!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-            LAPACK.trtrs!($uploc, 'C', $isunitc, parent(parent(A)), B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trsm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-    end
-end
-
-# redirect back to BLAS
-for t in (:UpperTriangular, :UnitUpperTriangular, :LowerTriangular, :UnitLowerTriangular)
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        lmul!(A, copyto!(C, B))
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rmul!(copyto!(C, A), B)
-
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-        ldiv!(A, copyto!(C, B))
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-        rdiv!(copyto!(C, A), B)
-end
-
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
-    @eval function inv(A::$t{T}) where {T}
-        S = typeof(inv(oneunit(T)))
-        if S <: BlasFloat || S === T # i.e. A is unitless
-            $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
-        else
-            J = (one(T)*I)(size(A, 1))
-            $t(ldiv!(similar(A, S, size(A)), A, J))
-        end
-    end
-end
-
-errorbounds(A::AbstractTriangular{T,<:AbstractMatrix}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
-    error("not implemented yet! Please submit a pull request.")
-function errorbounds(A::AbstractTriangular{TA,<:AbstractMatrix}, X::AbstractVecOrMat{TX}, B::AbstractVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
-    TAXB = promote_type(TA, TB, TX, Float32)
-    errorbounds(convert(AbstractMatrix{TAXB}, A), convert(AbstractArray{TAXB}, X), convert(AbstractArray{TAXB}, B))
-end
-
-# Eigensystems
-## Notice that trecv works for quasi-triangular matrices and therefore the lower sub diagonal must be zeroed before calling the subroutine
-function eigvecs(A::UpperTriangular{<:BlasFloat,<:StridedMatrix})
-    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
-end
-function eigvecs(A::UnitUpperTriangular{<:BlasFloat,<:StridedMatrix})
-    for i = 1:size(A, 1)
-        A.data[i,i] = 1
-    end
-    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
-end
-function eigvecs(A::LowerTriangular{<:BlasFloat,<:StridedMatrix})
-    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
-end
-function eigvecs(A::UnitLowerTriangular{<:BlasFloat,<:StridedMatrix})
-    for i = 1:size(A, 1)
-        A.data[i,i] = 1
-    end
-    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
-end
-
-####################
-# Generic routines #
-####################
-
-for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
-                   (LowerTriangular, UnitLowerTriangular))
-    @eval begin
-        (*)(A::$t, x::Number) = $t(A.data*x)
-
-        function (*)(A::$unitt, x::Number)
-            B = A.data*x
-            for i = 1:size(A, 1)
-                B[i,i] = x
-            end
-            $t(B)
-        end
-
-        (*)(x::Number, A::$t) = $t(x*A.data)
-
-        function (*)(x::Number, A::$unitt)
-            B = x*A.data
-            for i = 1:size(A, 1)
-                B[i,i] = x
-            end
-            $t(B)
-        end
-
-        (/)(A::$t, x::Number) = $t(A.data/x)
-
-        function (/)(A::$unitt, x::Number)
-            B = A.data/x
-            invx = inv(x)
-            for i = 1:size(A, 1)
-                B[i,i] = invx
-            end
-            $t(B)
-        end
-
-        (\)(x::Number, A::$t) = $t(x\A.data)
-
-        function (\)(x::Number, A::$unitt)
-            B = x\A.data
-            invx = inv(x)
-            for i = 1:size(A, 1)
-                B[i,i] = invx
-            end
-            $t(B)
-        end
-
-        lmul!(A::$t, B::AbstractVecOrMat)     = @inline _multrimat!(B, A, B)
-        lmul!(A::$unitt, B::AbstractVecOrMat) = @inline _multrimat!(B, A, B)
-
-        rmul!(A::AbstractMatrix, B::$t)     = @inline _mulmattri!(A, A, B)
-        rmul!(A::AbstractMatrix, B::$unitt) = @inline _mulmattri!(A, A, B)
-    end
-end
-
-## Generic triangular multiplication
-function _multrimat!(C::AbstractVecOrMat, A::UpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in 1:m
-            Cij = A.data[i,i] * B[i,j]
-            for k in i + 1:m
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::UnitUpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in 1:m
-            Cij = oneunit(eltype(A)) * B[i,j]
-            for k in i + 1:m
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::LowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in m:-1:1
-            Cij = A.data[i,i] * B[i,j]
-            for k in 1:i - 1
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _multrimat!(C::AbstractVecOrMat, A::UnitLowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(C, A, B)
-    m, n = size(B, 1), size(B, 2)
-    N = size(A, 1)
-    if m != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != N || nc != n
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
-    end
-    @inbounds for j in 1:n
-        for i in m:-1:1
-            Cij = oneunit(eltype(A)) * B[i,j]
-            for k in 1:i - 1
-                Cij += A.data[i,k] * B[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Cij = A[i,j] * B.data[j,j]
-            for k in 1:j - 1
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Cij = A[i,j] * oneunit(eltype(B))
-            for k in 1:j - 1
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Cij = A[i,j] * B.data[j,j]
-            for k in j + 1:n
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A, 1), size(A, 2)
-    N = size(B, 1)
-    if n != N
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
-    end
-    mc, nc = size(C, 1), size(C, 2)
-    if mc != m || nc != N
-        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Cij = A[i,j] * oneunit(eltype(B))
-            for k in j + 1:n
-                Cij += A[i,k] * B.data[k,j]
-            end
-            C[i,j] = Cij
-        end
-    end
-    return C
-end
-
-#Generic solver using naive substitution
-# manually hoisting b[j] significantly improves performance as of Dec 2015
-# manually eliding bounds checking significantly improves performance as of Dec 2015
-# directly indexing A.data rather than A significantly improves performance as of Dec 2015
-# replacing repeated references to A.data with [Adata = A.data and references to Adata]
-# does not significantly impact performance as of Dec 2015
-# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
-# does not significantly impact performance as of Dec 2015
-ldiv!(A::AbstractTriangular, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-function ldiv!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix)
-    require_one_based_indexing(C, A, B)
-    nA, mA = size(A)
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
-    end
-    if size(C) != size(B)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
-    end
-    @inbounds for (c, b) in zip(eachcol(C), eachcol(B))
-        ldiv!(c, A, b)
-    end
-    C
-end
-@inline function ldiv!(c::AbstractVector, A::AbstractTriangular, b::AbstractVector)
-    @boundscheck begin
-        require_one_based_indexing(c, A, b)
-        n = size(A, 2)
-        if !(n == length(b))
-            throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-        end
-        if !(n == length(c))
-            throw(DimensionMismatch("length of output c, $(length(c)), does not match length of right hand side b, $(length(b))"))
-        end
-    end
-    return _ldiv!(c, A, b)
-end
-
-_uconvert_copyto!(c, b, oA) = (c .= Ref(oA) .\ b)
-_uconvert_copyto!(c::AbstractArray{T}, b::AbstractArray{T}, _) where {T} = copyto!(c, b)
-
-@inline _ustrip(a) = oneunit(a) \ a
-@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
-
-# all of the following _ldiv! methods are "unsafe" in that they assume one-based indexing
-# and compatible sizes
-function _ldiv!(c::AbstractVector, A::UpperTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in n:-1:1
-        ajj = A.data[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        cj = c[j] = _ustrip(ajj) \ c[j]
-        for i in j-1:-1:1
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::UnitUpperTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in n:-1:1
-        cj = c[j]
-        for i in 1:j-1
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::LowerTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in 1:n
-        ajj = A.data[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        cj = c[j] = _ustrip(ajj) \ c[j]
-        for i in j+1:n
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, A::UnitLowerTriangular, b::AbstractVector)
-    n = size(A, 2)
-    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
-    @inbounds for j in 1:n
-        cj = c[j]
-        for i in j+1:n
-            c[i] -= _ustrip(A.data[i,j]) * cj
-        end
-    end
-    return c
-end
-
-
-# in the following transpose and conjugate transpose naive substitution variants,
-# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
-function _ldiv!(c::AbstractVector, xA::UpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    n = size(A, 2)
-    @inbounds for j in n:-1:1
-        ajj = A[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        bj = b[j]
-        for i in j+1:n
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = tfun(ajj) \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::UnitUpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    oA = oneunit(eltype(A))
-    n = size(A, 2)
-    @inbounds for j in n:-1:1
-        bj = b[j]
-        for i in j+1:n
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = oA \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::LowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    n = size(A, 2)
-    @inbounds for j in 1:n
-        ajj = A[j,j]
-        iszero(ajj) && throw(SingularException(j))
-        bj = b[j]
-        for i in 1:j-1
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = tfun(ajj) \ bj
-    end
-    return c
-end
-function _ldiv!(c::AbstractVector, xA::UnitLowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
-    tfun = adj_or_trans(parent(xA))
-    A = parent(parent(xA))
-    oA = oneunit(eltype(A))
-    n = size(A, 2)
-    @inbounds for j in 1:n
-        bj = b[j]
-        for i in 1:j-1
-            bj -= tfun(A[i,j]) * c[i]
-        end
-        c[j] = oA \ bj
-    end
-    return c
-end
-
-rdiv!(A::AbstractMatrix, B::AbstractTriangular) = @inline _rdiv!(A, A, B)
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Aij = A[i,j]
-            for k in 1:j - 1
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            iszero(B.data[j,j]) && throw(SingularException(j))
-            C[i,j] = Aij / B.data[j,j]
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in 1:n
-            Aij = A[i,j]
-            for k in 1:j - 1
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            C[i,j] = Aij / oneunit(eltype(B))
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Aij = A[i,j]
-            for k in j + 1:n
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            iszero(B.data[j,j]) && throw(SingularException(j))
-            C[i,j] = Aij / B.data[j,j]
-        end
-    end
-    C
-end
-function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(C, A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    if size(C) != size(A)
-        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
-    end
-    @inbounds for i in 1:m
-        for j in n:-1:1
-            Aij = A[i,j]
-            for k in j + 1:n
-                Aij -= C[i,k]*B.data[k,j]
-            end
-            C[i,j] = Aij / oneunit(eltype(B))
-        end
-    end
-    C
-end
-
-lmul!(A::UpperTriangular,     B::UpperTriangular) = UpperTriangular(lmul!(A, triu!(B.data)))
-lmul!(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(lmul!(A, triu!(B.data)))
-lmul!(A::LowerTriangular,     B::LowerTriangular) = LowerTriangular(lmul!(A, tril!(B.data)))
-lmul!(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(lmul!(A, tril!(B.data)))
-
-ldiv!(A::UpperTriangular,     B::UpperTriangular) = UpperTriangular(ldiv!(A, triu!(B.data)))
-ldiv!(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(ldiv!(A, triu!(B.data)))
-ldiv!(A::LowerTriangular,     B::LowerTriangular) = LowerTriangular(ldiv!(A, tril!(B.data)))
-ldiv!(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(ldiv!(A, tril!(B.data)))
-
-rdiv!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rdiv!(triu!(A.data), B))
-rdiv!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rdiv!(triu!(A.data), B))
-rdiv!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rdiv!(tril!(A.data), B))
-rdiv!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rdiv!(tril!(A.data), B))
-
-rmul!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rmul!(triu!(A.data), B))
-rmul!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rmul!(triu!(A.data), B))
-rmul!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rmul!(tril!(A.data), B))
-rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(A.data), B))
-
-# Promotion
-## Promotion methods in matmul don't apply to triangular multiplication since
-## it is inplace. Hence we have to make very similar definitions, but without
-## allocation of a result array. For multiplication and unit diagonal division
-## the element type doesn't have to be stable under division whereas that is
-## necessary in the general triangular solve problem.
-
-_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
-    _init_eltype(*, TA, TB)
-_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
-    _init_eltype(op, TA, TB)
-## The general promotion methods
-function *(A::AbstractTriangular, B::AbstractTriangular)
-    TAB = _init_eltype(*, eltype(A), eltype(B))
-    if TAB <: BlasFloat
-        lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-    else
-        mul!(similar(B, TAB, size(B)), A, B)
-    end
-end
-
-for mat in (:AbstractVector, :AbstractMatrix)
-    ### Multiplication with triangle to the left and hence rhs cannot be transposed.
-    @eval function *(A::AbstractTriangular, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _init_eltype(*, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            mul!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
-    @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            ldiv!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
-    @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
-        require_one_based_indexing(B)
-        TAB = _init_eltype(\, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
-        else
-            ldiv!(similar(B, TAB, size(B)), A, B)
-        end
-    end
-    ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
-    @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
-        require_one_based_indexing(A)
-        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-        else
-            _rdiv!(similar(A, TAB, size(A)), A, B)
-        end
-    end
-    ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
-    @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
-        require_one_based_indexing(A)
-        TAB = _init_eltype(/, eltype(A), eltype(B))
-        if TAB <: BlasFloat
-            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-        else
-            _rdiv!(similar(A, TAB, size(A)), A, B)
-        end
-    end
-end
-### Multiplication with triangle to the right and hence lhs cannot be transposed.
-# Only for AbstractMatrix, hence outside the above loop.
-function *(A::AbstractMatrix, B::AbstractTriangular)
-    require_one_based_indexing(A)
-    TAB = _init_eltype(*, eltype(A), eltype(B))
-    if TAB <: BlasFloat
-        rmul!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
-    else
-        mul!(similar(A, TAB, size(A)), A, B)
-    end
-end
-# ambiguity resolution with definitions in matmul.jl
-*(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
-*(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
-
-## Some Triangular-Triangular cases. We might want to write tailored methods
-## for these cases, but I'm not sure it is worth it.
-for f in (:*, :\)
-    @eval begin
-        ($f)(A::LowerTriangular, B::LowerTriangular) =
-            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
-        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
-            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
-        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
-            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
-        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
-            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
-        ($f)(A::UpperTriangular, B::UpperTriangular) =
-            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
-        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
-            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
-        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
-            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
-        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
-            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
-    end
-end
-(/)(A::LowerTriangular, B::LowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
-(/)(A::LowerTriangular, B::UnitLowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
-(/)(A::UnitLowerTriangular, B::LowerTriangular) =
-    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
-(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
-    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
-(/)(A::UpperTriangular, B::UpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
-(/)(A::UpperTriangular, B::UnitUpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
-(/)(A::UnitUpperTriangular, B::UpperTriangular) =
-    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
-(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
-    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
-
-# Complex matrix power for upper triangular factor, see:
-#   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
-#     SIAM J. Matrix Anal. & Appl., 32 (3), (2011) 1056–1078.
-#   Higham and Lin, "An improved Schur-Padé algorithm for fractional powers of
-#     a matrix and their Fréchet derivatives", SIAM. J. Matrix Anal. & Appl.,
-#     34(3), (2013) 1341–1360.
-function powm!(A0::UpperTriangular{<:BlasFloat}, p::Real)
-    if abs(p) >= 1
-        throw(ArgumentError("p must be a real number in (-1,1), got $p"))
-    end
-
-    normA0 = opnorm(A0, 1)
-    rmul!(A0, 1/normA0)
-
-    theta = [1.53e-5, 2.25e-3, 1.92e-2, 6.08e-2, 1.25e-1, 2.03e-1, 2.84e-1]
-    n = checksquare(A0)
-
-    A, m, s = invsquaring(A0, theta)
-    A = I - A
-
-    # Compute accurate diagonal of I - T
-    sqrt_diag!(A0, A, s)
-    for i = 1:n
-        A[i, i] = -A[i, i]
-    end
-    # Compute the Padé approximant
-    c = 0.5 * (p - m) / (2 * m - 1)
-    triu!(A)
-    S = c * A
-    Stmp = similar(S)
-    for j = m-1:-1:1
-        j4 = 4 * j
-        c = (-p - j) / (j4 + 2)
-        for i = 1:n
-            @inbounds S[i, i] = S[i, i] + 1
-        end
-        copyto!(Stmp, S)
-        mul!(S, A, c)
-        ldiv!(Stmp, S.data)
-
-        c = (p - j) / (j4 - 2)
-        for i = 1:n
-            @inbounds S[i, i] = S[i, i] + 1
-        end
-        copyto!(Stmp, S)
-        mul!(S, A, c)
-        ldiv!(Stmp, S.data)
-    end
-    for i = 1:n
-        S[i, i] = S[i, i] + 1
-    end
-    copyto!(Stmp, S)
-    mul!(S, A, -p)
-    ldiv!(Stmp, S.data)
-    for i = 1:n
-        @inbounds S[i, i] = S[i, i] + 1
-    end
-
-    blockpower!(A0, S, p/(2^s))
-    for m = 1:s
-        mul!(Stmp.data, S, S)
-        copyto!(S, Stmp)
-        blockpower!(A0, S, p/(2^(s-m)))
-    end
-    rmul!(S, normA0^p)
-    return S
-end
-powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::Real)))
-
-# Complex matrix logarithm for the upper triangular factor, see:
-#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
-#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
-#   Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
-#     logarithm and estimating the condition number", SIAM J. Sci. Comput.,
-#     35(4), (2013), C394–C410.
-#
-# Based on the code available at http://eprints.ma.man.ac.uk/1851/02/logm.zip,
-# Copyright (c) 2011, Awad H. Al-Mohy and Nicholas J. Higham
-# Julia version relicensed with permission from original authors
-log(A::UpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
-log(A::UnitUpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
-log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
-log(A::UnitLowerTriangular) = copy(transpose(log(copy(transpose(A)))))
-
-function log_quasitriu(A0::AbstractMatrix{T}) where T<:BlasFloat
-    # allocate real A if log(A) will be real and complex A otherwise
-    n = checksquare(A0)
-    if isreal(A0) && (!istriu(A0) || !any(x -> real(x) < zero(real(T)), diag(A0)))
-        A = T <: Complex ? real(A0) : copy(A0)
-    else
-        A = T <: Complex ? copy(A0) : complex(A0)
-    end
-    if A0 isa UnitUpperTriangular
-        A = UpperTriangular(parent(A))
-        @inbounds for i in 1:n
-            A[i,i] = 1
-        end
-    end
-    Y0 = _log_quasitriu!(A0, A)
-    # return complex result for complex input
-    Y = T <: Complex ? complex(Y0) : Y0
-
-    if A0 isa UpperTriangular || A0 isa UnitUpperTriangular
-        return UpperTriangular(Y)
-    else
-        return Y
-    end
-end
-# type-stable implementation of log_quasitriu
-# A is a copy of A0 that is overwritten while computing the result. It has the same eltype
-# as the result.
-function _log_quasitriu!(A0, A)
-    # Find Padé degree m and s while replacing A with A^(1/2^s)
-    m, s = _find_params_log_quasitriu!(A)
-
-    # Compute accurate superdiagonal of A
-    _pow_superdiag_quasitriu!(A, A0, 0.5^s)
-
-    # Compute accurate block diagonal of A
-    _sqrt_pow_diag_quasitriu!(A, A0, s)
-
-    # Get the Gauss-Legendre quadrature points and weights
-    R = zeros(Float64, m, m)
-    for i = 1:m - 1
-        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
-        R[i+1,i] = R[i,i+1]
-    end
-    x,V = eigen(R)
-    w = Vector{Float64}(undef, m)
-    for i = 1:m
-        x[i] = (x[i] + 1) / 2
-        w[i] = V[1,i]^2
-    end
-
-    # Compute the Padé approximation
-    t = eltype(A)
-    n = size(A, 1)
-    Y = zeros(t, n, n)
-    B = similar(A)
-    for k = 1:m
-        B .= t(x[k]) .* A
-        @inbounds for i in 1:n
-            B[i,i] += 1
-        end
-        Y .+= t(w[k]) .* rdiv_quasitriu!(A, B)
-    end
-
-    # Scale back
-    lmul!(2.0^s, Y)
-
-    # Compute accurate diagonal and superdiagonal of log(A)
-    _log_diag_quasitriu!(Y, A0)
-
-    return Y
-end
-
-# Auxiliary functions for matrix logarithm and matrix power
-
-# Find Padé degree m and s while replacing A with A^(1/2^s)
-#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
-#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
-#   from Algorithm 4.1
-function _find_params_log_quasitriu!(A)
-    maxsqrt = 100
-    theta = [1.586970738772063e-005,
-         2.313807884242979e-003,
-         1.938179313533253e-002,
-         6.209171588994762e-002,
-         1.276404810806775e-001,
-         2.060962623452836e-001,
-         2.879093714241194e-001]
-    tmax = size(theta, 1)
-    n = size(A, 1)
-    p = 0
-    m = 0
-
-    # Find s0, the smallest s such that the ρ(triu(A)^(1/2^s) - I) ≤ theta[tmax], where ρ(X)
-    # is the spectral radius of X
-    d = complex.(@view(A[diagind(A)]))
-    dm1 = d .- 1
-    s = 0
-    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
-        d .= sqrt.(d)
-        dm1 .= d .- 1
-        s = s + 1
-    end
-    s0 = s
-
-    # Compute repeated roots
-    for k = 1:min(s, maxsqrt)
-        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
-    end
-
-    # these three never needed at the same time, so reuse the same temporary
-    AmI = AmI4 = AmI5 = A - I
-    AmI2 = AmI * AmI
-    AmI3 = AmI2 * AmI
-    d2 = sqrt(opnorm(AmI2, 1))
-    d3 = cbrt(opnorm(AmI3, 1))
-    alpha2 = max(d2, d3)
-    foundm = false
-    if alpha2 <= theta[2]
-        m = alpha2 <= theta[1] ? 1 : 2
-        foundm = true
-    end
-
-    while !foundm
-        more_sqrt = false
-        mul!(AmI4, AmI2, AmI2)
-        d4 = opnorm(AmI4, 1)^(1/4)
-        alpha3 = max(d3, d4)
-        if alpha3 <= theta[tmax]
-            local j
-            for outer j = 3:tmax
-                if alpha3 <= theta[j]
-                    break
-                end
-            end
-            if j <= 6
-                m = j
-                break
-            elseif alpha3 / 2 <= theta[5] && p < 2
-                more_sqrt = true
-                p = p + 1
-           end
-        end
-
-        if !more_sqrt
-            mul!(AmI5, AmI3, AmI2)
-            d5 = opnorm(AmI5, 1)^(1/5)
-            alpha4 = max(d4, d5)
-            eta = min(alpha3, alpha4)
-            if eta <= theta[tmax]
-                j = 0
-                for outer j = 6:tmax
-                    if eta <= theta[j]
-                        m = j
-                        break
-                    end
-                end
-                break
-            end
-        end
-
-        if s == maxsqrt
-            m = tmax
-            break
-        end
-        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
-        copyto!(AmI, A)
-        for i in 1:n
-            @inbounds AmI[i,i] -= 1
-        end
-        mul!(AmI2, AmI, AmI)
-        mul!(AmI3, AmI2, AmI)
-        d3 = cbrt(opnorm(AmI3, 1))
-        s = s + 1
-    end
-    return m, s
-end
-
-# Compute accurate diagonal of A = A0^s - I
-function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
-    n = checksquare(A0)
-    T = eltype(A)
-    @inbounds for i = 1:n
-        a = complex(A0[i,i])
-        A[i,i] = _sqrt_pow(a, s)
-    end
-end
-# Compute accurate block diagonal of A = A0^s - I for upper quasi-triangular A0 produced
-# by the Schur decomposition. Diagonal is made of 1x1 and 2x2 blocks.
-# 2x2 blocks are real with non-negative conjugate pair eigenvalues
-function _sqrt_pow_diag_quasitriu!(A, A0, s)
-    n = checksquare(A0)
-    t = typeof(sqrt(zero(eltype(A))))
-    i = 1
-    @inbounds while i < n
-        if iszero(A0[i+1,i])  # 1x1 block
-            A[i,i] = _sqrt_pow(t(A0[i,i]), s)
-            i += 1
-        else  # real 2x2 block
-            @views _sqrt_pow_diag_block_2x2!(A[i:i+1,i:i+1], A0[i:i+1,i:i+1], s)
-            i += 2
-        end
-    end
-    if i == n  # last block is 1x1
-        @inbounds A[n,n] = _sqrt_pow(t(A0[n,n]), s)
-    end
-    return A
-end
-# compute a^(1/2^s)-1
-#   Al-Mohy, "A more accurate Briggs method for the logarithm",
-#      Numer. Algorithms, 59, (2012), 393–402.
-#   Algorithm 2
-function _sqrt_pow(a::Number, s)
-    T = typeof(sqrt(zero(a)))
-    s == 0 && return T(a) - 1
-    s0 = s
-    if imag(a) >= 0 && real(a) <= 0 && !iszero(a)  # angle(a) ≥ π / 2
-        a = sqrt(a)
-        s0 = s - 1
-    end
-    z0 = a - 1
-    a = sqrt(a)
-    r = 1 + a
-    for j = 1:s0-1
-        a = sqrt(a)
-        r = r * (1 + a)
-    end
-    return z0 / r
-end
-# compute A0 = A^(1/2^s)-I for 2x2 real matrices A and A0
-# A has non-negative conjugate pair eigenvalues
-# "Improved Inverse Scaling and Squaring Algorithms for the Matrix Logarithm"
-# SIAM J. Sci. Comput., 34(4), (2012) C153–C169. doi: 10.1137/110852553
-# Algorithm 5.1
-Base.@propagate_inbounds function _sqrt_pow_diag_block_2x2!(A, A0, s)
-    _sqrt_real_2x2!(A, A0)
-    if isone(s)
-        A[1,1] -= 1
-        A[2,2] -= 1
-    else
-        # Z = A - I
-        z11, z21, z12, z22 = A[1,1] - 1, A[2,1], A[1,2], A[2,2] - 1
-        # A = sqrt(A)
-        _sqrt_real_2x2!(A, A)
-        # P = A + I
-        p11, p21, p12, p22 = A[1,1] + 1, A[2,1], A[1,2], A[2,2] + 1
-        for i in 1:(s - 2)
-            # A = sqrt(A)
-            _sqrt_real_2x2!(A, A)
-            a11, a21, a12, a22 = A[1,1], A[2,1], A[1,2], A[2,2]
-            # P += P * A
-            r11 = p11*(1 + a11) + p12*a21
-            r22 = p21*a12 + p22*(1 + a22)
-            p21 = p21*(1 + a11) + p22*a21
-            p12 = p11*a12 + p12*(1 + a22)
-            p11 = r11
-            p22 = r22
-        end
-        # A = Z / P
-        c = inv(p11*p22 - p21*p12)
-        A[1,1] = (p22*z11 - p21*z12) * c
-        A[2,1] = (p22*z21 - p21*z22) * c
-        A[1,2] = (p11*z12 - p12*z11) * c
-        A[2,2] = (p11*z22 - p12*z21) * c
-    end
-    return A
-end
-# Compute accurate superdiagonal of A = A0^s - I for upper quasi-triangular A0 produced
-# by a Schur decomposition.
-# Higham and Lin, "A Schur–Padé Algorithm for Fractional Powers of a Matrix"
-# SIAM J. Matrix Anal. Appl., 32(3), (2011), 1056–1078.
-# Equation 5.6
-# see also blockpower for when A0 is upper triangular
-function _pow_superdiag_quasitriu!(A, A0, p)
-    n = checksquare(A0)
-    t = eltype(A)
-    k = 1
-    @inbounds while k < n
-        if !iszero(A[k+1,k])
-            k += 2
-            continue
-        end
-        if !(k == n - 1 || iszero(A[k+2,k+1]))
-            k += 3
-            continue
-        end
-        Ak = t(A0[k,k])
-        Akp1 = t(A0[k+1,k+1])
-
-        Akp = Ak^p
-        Akp1p = Akp1^p
-
-        if Ak == Akp1
-            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
-        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-        else
-            logAk = log(Ak)
-            logAkp1 = log(Akp1)
-            z = (Akp1 - Ak)/(Akp1 + Ak)
-            if abs(z) > 1
-                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-            else
-                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
-                A[k,k+1] = A0[k,k+1] * dd
-            end
-        end
-        k += 1
-    end
-end
-
-# Compute accurate block diagonal and superdiagonal of A = log(A0) for upper
-# quasi-triangular A0 produced by the Schur decomposition.
-function _log_diag_quasitriu!(A, A0)
-    n = checksquare(A0)
-    t = eltype(A)
-    k = 1
-    @inbounds while k < n
-        if iszero(A0[k+1,k])  # 1x1 block
-            Ak = t(A0[k,k])
-            logAk = log(Ak)
-            A[k,k] = logAk
-            if k < n - 2 && iszero(A0[k+2,k+1])
-                Akp1 = t(A0[k+1,k+1])
-                logAkp1 = log(Akp1)
-                A[k+1,k+1] = logAkp1
-                if Ak == Akp1
-                    A[k,k+1] = A0[k,k+1] / Ak
-                elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-                    A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
-                else
-                    z = (Akp1 - Ak)/(Akp1 + Ak)
-                    if abs(z) > 1
-                        A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
-                    else
-                        w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                        A[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
-                    end
-                end
-                k += 2
-            else
-                k += 1
-            end
-        else  # real 2x2 block
-            @views _log_diag_block_2x2!(A[k:k+1,k:k+1], A0[k:k+1,k:k+1])
-            k += 2
-        end
-    end
-    if k == n  # last 1x1 block
-        @inbounds A[n,n] = log(t(A0[n,n]))
-    end
-    return A
-end
-# compute A0 = log(A) for 2x2 real matrices A and A0, where A0 is a diagonal 2x2 block
-# produced by real Schur decomposition.
-# Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
-# logarithm and estimating the condition number", SIAM J. Sci. Comput.,
-# 35(4), (2013), C394–C410.
-# Eq. 6.1
-Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
-    a, b, c = A0[1,1], A0[1,2], A0[2,1]
-    # avoid underflow/overflow for large/small b and c
-    s = sqrt(abs(b)) * sqrt(abs(c))
-    θ = atan(s, a)
-    t = θ / s
-    au = abs(a)
-    if au > s
-        a1 = log1p((s / au)^2) / 2 + log(au)
-    else
-        a1 = log1p((au / s)^2) / 2 + log(s)
-    end
-    A[1,1] = a1
-    A[2,1] = c*t
-    A[1,2] = b*t
-    A[2,2] = a1
-    return A
-end
-
-# Used only by powm at the moment
-# Repeatedly compute the square roots of A so that in the end its
-# eigenvalues are close enough to the positive real line
-function invsquaring(A0::UpperTriangular, theta)
-    require_one_based_indexing(theta)
-    # assumes theta is in ascending order
-    maxsqrt = 100
-    tmax = size(theta, 1)
-    n = checksquare(A0)
-    A = complex(copy(A0))
-    p = 0
-    m = 0
-
-    # Compute repeated roots
-    d = complex(diag(A))
-    dm1 = d .- 1
-    s = 0
-    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
-        d .= sqrt.(d)
-        dm1 .= d .- 1
-        s = s + 1
-    end
-    s0 = s
-    for k = 1:min(s, maxsqrt)
-        A = sqrt(A)
-    end
-
-    AmI = A - I
-    d2 = sqrt(opnorm(AmI^2, 1))
-    d3 = cbrt(opnorm(AmI^3, 1))
-    alpha2 = max(d2, d3)
-    foundm = false
-    if alpha2 <= theta[2]
-        m = alpha2 <= theta[1] ? 1 : 2
-        foundm = true
-    end
-
-    while !foundm
-        more = false
-        if s > s0
-            d3 = cbrt(opnorm(AmI^3, 1))
-        end
-        d4 = opnorm(AmI^4, 1)^(1/4)
-        alpha3 = max(d3, d4)
-        if alpha3 <= theta[tmax]
-            local j
-            for outer j = 3:tmax
-                if alpha3 <= theta[j]
-                    break
-                elseif alpha3 / 2 <= theta[5] && p < 2
-                    more = true
-                    p = p + 1
-                end
-            end
-            if j <= 6
-                m = j
-                foundm = true
-                break
-            elseif alpha3 / 2 <= theta[5] && p < 2
-                more = true
-                p = p + 1
-           end
-        end
-
-        if !more
-            d5 = opnorm(AmI^5, 1)^(1/5)
-            alpha4 = max(d4, d5)
-            eta = min(alpha3, alpha4)
-            if eta <= theta[tmax]
-                j = 0
-                for outer j = 6:tmax
-                    if eta <= theta[j]
-                        m = j
-                        break
-                    end
-                    break
-                end
-            end
-            if s == maxsqrt
-                m = tmax
-                break
-            end
-            A = sqrt(A)
-            AmI = A - I
-            s = s + 1
-        end
-    end
-
-    # Compute accurate superdiagonal of T
-    p = 1 / 2^s
-    A = complex(A)
-    blockpower!(A, A0, p)
-    return A,m,s
-end
-
-# Compute accurate diagonal and superdiagonal of A = A0^p
-function blockpower!(A::UpperTriangular, A0::UpperTriangular, p)
-    n = checksquare(A0)
-    @inbounds for k = 1:n-1
-        Ak = complex(A0[k,k])
-        Akp1 = complex(A0[k+1,k+1])
-
-        Akp = Ak^p
-        Akp1p = Akp1^p
-
-        A[k,k] = Akp
-        A[k+1,k+1] = Akp1p
-
-        if Ak == Akp1
-            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
-        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-        else
-            logAk = log(Ak)
-            logAkp1 = log(Akp1)
-            z = (Akp1 - Ak)/(Akp1 + Ak)
-            if abs(z) > 1
-                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
-            else
-                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
-                A[k,k+1] = A0[k,k+1] * dd
-            end
-        end
-    end
-end
-
-# Unwinding number
-unw(x::Real) = 0
-unw(x::Number) = ceil((imag(x) - pi) / (2 * pi))
-
-# compute A / B for upper quasi-triangular B, possibly overwriting B
-function rdiv_quasitriu!(A, B)
-    n = checksquare(A)
-    AG = copy(A)
-    # use Givens rotations to annihilate 2x2 blocks
-    @inbounds for k in 1:(n-1)
-        s = B[k+1,k]
-        iszero(s) && continue  # 1x1 block
-        G = first(givens(B[k+1,k+1], s, k, k+1))
-        rmul!(B, G)
-        rmul!(AG, G)
-    end
-    return rdiv!(AG, UpperTriangular(B))
-end
-
-# End of auxiliary functions for matrix logarithm and matrix power
-
-sqrt(A::UpperTriangular) = sqrt_quasitriu(A)
-function sqrt(A::UnitUpperTriangular{T}) where T
-    B = A.data
-    n = checksquare(B)
-    t = typeof(sqrt(zero(T)))
-    R = Matrix{t}(I, n, n)
-    tt = typeof(oneunit(t)*oneunit(t))
-    half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
-    @inbounds for j = 1:n
-        for i = j-1:-1:1
-            r::tt = B[i,j]
-            @simd for k = i+1:j-1
-                r -= R[i,k]*R[k,j]
-            end
-            iszero(r) || (R[i,j] = half*r)
-        end
-    end
-    return UnitUpperTriangular(R)
-end
-sqrt(A::LowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
-sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
-
-# Auxiliary functions for matrix square root
-
-# square root of upper triangular or real upper quasitriangular matrix
-function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
-    n = checksquare(A0)
-    T = eltype(A0)
-    Tr = typeof(sqrt(real(zero(T))))
-    Tc = typeof(sqrt(complex(zero(T))))
-    if isreal(A0)
-        is_sqrt_real = true
-        if istriu(A0)
-            for i in 1:n
-                Aii = real(A0[i,i])
-                if Aii < zero(Aii)
-                    is_sqrt_real = false
-                    break
-                end
-            end
-        end
-        if is_sqrt_real
-            R = zeros(Tr, n, n)
-            A = real(A0)
-        else
-            R = zeros(Tc, n, n)
-            A = A0
-        end
-    else
-        A = A0
-        R = zeros(Tc, n, n)
-    end
-    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
-    Rc = eltype(A0) <: Real ? R : complex(R)
-    if A0 isa UpperTriangular
-        return UpperTriangular(Rc)
-    elseif A0 isa UnitUpperTriangular
-        return UnitUpperTriangular(Rc)
-    else
-        return Rc
-    end
-end
-
-# in-place recursive sqrt of upper quasi-triangular matrix A from
-# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
-# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
-# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
-function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
-    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
-        _sqrt_quasitriu_block!(R, A)
-    else  # compute blockwise recursion
-        split = div(n, 2)
-        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
-        r1 = 1:split
-        r2 = (split + 1):n
-        n1, n2 = split, n - split
-        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
-        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
-        # solve diagonal blocks recursively
-        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
-        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
-        # solve off-diagonal block
-        R12 .= .- A12
-        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
-    end
-    return R
-end
-
-function _sqrt_quasitriu_block!(R, A)
-    _sqrt_quasitriu_diag_block!(R, A)
-    _sqrt_quasitriu_offdiag_block!(R, A)
-    return R
-end
-
-function _sqrt_quasitriu_diag_block!(R, A)
-    n = size(R, 1)
-    ta = eltype(R) <: Complex ? complex(eltype(A)) : eltype(A)
-    i = 1
-    @inbounds while i < n
-        if iszero(A[i + 1, i])
-            R[i, i] = sqrt(ta(A[i, i]))
-            i += 1
-        else
-            # this branch is never reached when A is complex triangular
-            @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
-            i += 2
-        end
-    end
-    if i == n
-        R[n, n] = sqrt(ta(A[n, n]))
-    end
-    return R
-end
-
-function _sqrt_quasitriu_offdiag_block!(R, A)
-    n = size(R, 1)
-    j = 1
-    @inbounds while j ≤ n
-        jsize_is_2 = j < n && !iszero(A[j + 1, j])
-        i = j - 1
-        while i > 0
-            isize_is_2 = i > 1 && !iszero(A[i, i - 1])
-            if isize_is_2
-                if jsize_is_2
-                    _sqrt_quasitriu_offdiag_block_2x2!(R, A, i - 1, j)
-                else
-                    _sqrt_quasitriu_offdiag_block_2x1!(R, A, i - 1, j)
-                end
-                i -= 2
-            else
-                if jsize_is_2
-                    _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
-                else
-                    _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
-                end
-                i -= 1
-            end
-        end
-        j += 2 - !jsize_is_2
-    end
-    return R
-end
-
-# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
-# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
-# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
-Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
-    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
-    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
-    # avoid overflow/underflow of μ
-    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
-    μ = sqrt(abs(a12)) * sqrt(abs(a21))
-    α = _real_sqrt(θ, μ)
-    c = 2α
-    R[1, 1] = α
-    R[2, 1] = a21 / c
-    R[1, 2] = a12 / c
-    R[2, 2] = α
-    return R
-end
-
-# real part of square root of θ+im*μ
-@inline function _real_sqrt(θ, μ)
-    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
-    return θ ≥ 0 ? t : μ / 2t
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
-    Rii = R[i, i]
-    Rjj = R[j, j]
-    iszero(Rii) && iszero(Rjj) && return R
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r = tt(-A[i, j])
-    @simd for k in (i + 1):(j - 1)
-        r += R[i, k] * R[k, j]
-    end
-    iszero(r) && return R
-    R[i, j] = sylvester(Rii, Rjj, r)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
-    jrange = j:(j + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r1 = tt(-A[i, j])
-    r2 = tt(-A[i, j + 1])
-    @simd for k in (i + 1):(j - 1)
-        rik = R[i, k]
-        r1 += rik * R[k, j]
-        r2 += rik * R[k, j + 1]
-    end
-    Rjj = @view R[jrange, jrange]
-    Rij = @view R[i, jrange]
-    Rij[1] = r1
-    Rij[2] = r2
-    _sylvester_1x2!(R[i, i], Rjj, Rij)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x1!(R, A, i, j)
-    irange = i:(i + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    r1 = tt(-A[i, j])
-    r2 = tt(-A[i + 1, j])
-    @simd for k in (i + 2):(j - 1)
-        rkj = R[k, j]
-        r1 += R[i, k] * rkj
-        r2 += R[i + 1, k] * rkj
-    end
-    Rii = @view R[irange, irange]
-    Rij = @view R[irange, j]
-    Rij[1] = r1
-    Rij[2] = r2
-    @views _sylvester_2x1!(Rii, R[j, j], Rij)
-    return R
-end
-
-Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
-    irange = i:(i + 1)
-    jrange = j:(j + 1)
-    t = eltype(R)
-    tt = typeof(zero(t)*zero(t))
-    for i′ in irange, j′ in jrange
-        Cij = tt(-A[i′, j′])
-        @simd for k in (i + 2):(j - 1)
-            Cij += R[i′, k] * R[k, j′]
-        end
-        R[i′, j′] = Cij
-    end
-    Rii = @view R[irange, irange]
-    Rjj = @view R[jrange, jrange]
-    Rij = @view R[irange, jrange]
-    if !iszero(Rij) && !all(isnan, Rij)
-        _sylvester_2x2!(Rii, Rjj, Rij)
-    end
-    return R
-end
-
-# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
-# A and B are no greater than blockwidth, based on Algorithm 1 from
-# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
-# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
-# 28(4), https://doi.org/10.1145/592843.592845.
-# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
-# computing one of the blocks.
-function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
-    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
-        _sylvester_quasitriu_base!(A, B, C; raise=raise)
-    elseif nA ≥ 2nB ≥ 2
-        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    elseif nB ≥ 2nA ≥ 2
-        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    else
-        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
-    end
-    return C
-end
-function _sylvester_quasitriu_base!(A, B, C; raise=true)
-    try
-        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
-        rmul!(C, -inv(scale))
-    catch e
-        if !(e isa LAPACKException) || raise
-            throw(e)
-        end
-    end
-    return C
-end
-function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
-    iA = div(nA, 2)
-    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
-    rA1, rA2 = 1:iA, (iA + 1):nA
-    nA1, nA2 = iA, nA-iA
-    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
-    C1, C2 = @views C[rA1,:], C[rA2,:]
-    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
-    mul!(C1, A12, C2, true, true)
-    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
-    return C
-end
-function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
-    iB = div(nB, 2)
-    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
-    rB1, rB2 = 1:iB, (iB + 1):nB
-    nB1, nB2 = iB, nB-iB
-    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
-    C1, C2 = @views C[:,rB1], C[:,rB2]
-    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
-    mul!(C2, C1, B12, true, true)
-    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
-    return C
-end
-function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
-    iA = div(nA, 2)
-    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
-    iB = div(nB, 2)
-    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
-    rA1, rA2 = 1:iA, (iA + 1):nA
-    nA1, nA2 = iA, nA-iA
-    rB1, rB2 = 1:iB, (iB + 1):nB
-    nB1, nB2 = iB, nB-iB
-    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
-    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
-    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
-    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
-    mul!(C11, A12, C21, true, true)
-    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
-    mul!(C22, C21, B12, true, true)
-    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
-    mul!(C12, A12, C22, true, true)
-    mul!(C12, C11, B12, true, true)
-    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
-    return C
-end
-
-# End of auxiliary functions for matrix square root
-
-# Generic eigensystems
-eigvals(A::AbstractTriangular) = diag(A)
-function eigvecs(A::AbstractTriangular{T}) where T
-    TT = promote_type(T, Float32)
-    if TT <: BlasFloat
-        return eigvecs(convert(AbstractMatrix{TT}, A))
-    else
-        throw(ArgumentError("eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
-    end
-end
-det(A::UnitUpperTriangular{T}) where {T} = one(T)
-det(A::UnitLowerTriangular{T}) where {T} = one(T)
-logdet(A::UnitUpperTriangular{T}) where {T} = zero(T)
-logdet(A::UnitLowerTriangular{T}) where {T} = zero(T)
-logabsdet(A::UnitUpperTriangular{T}) where {T} = zero(T), one(T)
-logabsdet(A::UnitLowerTriangular{T}) where {T} = zero(T), one(T)
-det(A::UpperTriangular) = prod(diag(A.data))
-det(A::LowerTriangular) = prod(diag(A.data))
-function logabsdet(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
-    sgn = one(T)
-    abs_det = zero(real(T))
-    @inbounds for i in 1:size(A,1)
-        diag_i = A.data[i,i]
-        sgn *= sign(diag_i)
-        abs_det += log(abs(diag_i))
-    end
-    return abs_det, sgn
-end
-
-eigen(A::AbstractTriangular) = Eigen(eigvals(A), eigvecs(A))
-
-# Generic singular systems
-for func in (:svd, :svd!, :svdvals)
-    @eval begin
-        ($func)(A::AbstractTriangular; kwargs...) = ($func)(copyto!(similar(parent(A)), A); kwargs...)
-    end
-end
-
-factorize(A::AbstractTriangular) = A
-
-# disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
-/(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
-/(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
-# disambiguation methods: /(Transpose of AbsVec, <:AbstractTriangular)
-/(u::TransposeAbsVec, A::Union{LowerTriangular,UpperTriangular}) = transpose(transpose(A) \ u.parent)
-/(u::TransposeAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = transpose(transpose(A) \ u.parent)
-# disambiguation methods: /(Transpose of AbsVec, Adj/Trans of <:AbstractTriangular)
-for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
-                               (:UnitLowerTriangular, :UnitUpperTriangular),
-                               (:UpperTriangular, :LowerTriangular),
-                               (:UnitUpperTriangular, :UnitLowerTriangular))
-    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
-    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
-end
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
deleted file mode 100644
index 13f6a1bb70756..0000000000000
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ /dev/null
@@ -1,870 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#### Specialized matrix types ####
-
-## (complex) symmetric tridiagonal matrices
-struct SymTridiagonal{T, V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dv::V                        # diagonal
-    ev::V                        # superdiagonal
-    function SymTridiagonal{T, V}(dv, ev) where {T, V<:AbstractVector{T}}
-        require_one_based_indexing(dv, ev)
-        if !(length(dv) - 1 <= length(ev) <= length(dv))
-            throw(DimensionMismatch("subdiagonal has wrong length. Has length $(length(ev)), but should be either $(length(dv) - 1) or $(length(dv))."))
-        end
-        new{T, V}(dv, ev)
-    end
-end
-
-"""
-    SymTridiagonal(dv::V, ev::V) where V <: AbstractVector
-
-Construct a symmetric tridiagonal matrix from the diagonal (`dv`) and first
-sub/super-diagonal (`ev`), respectively. The result is of type `SymTridiagonal`
-and provides efficient specialized eigensolvers, but may be converted into a
-regular matrix with [`convert(Array, _)`](@ref) (or `Array(_)` for short).
-
-For `SymTridiagonal` block matrices, the elements of `dv` are symmetrized.
-The argument `ev` is interpreted as the superdiagonal. Blocks from the
-subdiagonal are (materialized) transpose of the corresponding superdiagonal blocks.
-
-# Examples
-```jldoctest
-julia> dv = [1, 2, 3, 4]
-4-element Vector{Int64}:
- 1
- 2
- 3
- 4
-
-julia> ev = [7, 8, 9]
-3-element Vector{Int64}:
- 7
- 8
- 9
-
-julia> SymTridiagonal(dv, ev)
-4×4 SymTridiagonal{Int64, Vector{Int64}}:
- 1  7  ⋅  ⋅
- 7  2  8  ⋅
- ⋅  8  3  9
- ⋅  ⋅  9  4
-
-julia> A = SymTridiagonal(fill([1 2; 3 4], 3), fill([1 2; 3 4], 2));
-
-julia> A[1,1]
-2×2 Symmetric{Int64, Matrix{Int64}}:
- 1  2
- 2  4
-
-julia> A[1,2]
-2×2 Matrix{Int64}:
- 1  2
- 3  4
-
-julia> A[2,1]
-2×2 Matrix{Int64}:
- 1  3
- 2  4
-```
-"""
-SymTridiagonal(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T}(dv, ev)
-SymTridiagonal{T}(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T,V}(dv, ev)
-function SymTridiagonal{T}(dv::AbstractVector, ev::AbstractVector) where {T}
-    SymTridiagonal(convert(AbstractVector{T}, dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, ev)::AbstractVector{T})
-end
-
-"""
-    SymTridiagonal(A::AbstractMatrix)
-
-Construct a symmetric tridiagonal matrix from the diagonal and first superdiagonal
-of the symmetric matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 2 4 5; 3 5 6]
-3×3 Matrix{Int64}:
- 1  2  3
- 2  4  5
- 3  5  6
-
-julia> SymTridiagonal(A)
-3×3 SymTridiagonal{Int64, Vector{Int64}}:
- 1  2  ⋅
- 2  4  5
- ⋅  5  6
-
-julia> B = reshape([[1 2; 2 3], [1 2; 3 4], [1 3; 2 4], [1 2; 2 3]], 2, 2);
-
-julia> SymTridiagonal(B)
-2×2 SymTridiagonal{Matrix{Int64}, Vector{Matrix{Int64}}}:
- [1 2; 2 3]  [1 3; 2 4]
- [1 2; 3 4]  [1 2; 2 3]
-```
-"""
-function SymTridiagonal(A::AbstractMatrix)
-    if (diag(A, 1) == transpose.(diag(A, -1))) && all(issymmetric.(diag(A, 0)))
-        SymTridiagonal(diag(A, 0), diag(A, 1))
-    else
-        throw(ArgumentError("matrix is not symmetric; cannot convert to SymTridiagonal"))
-    end
-end
-
-SymTridiagonal{T,V}(S::SymTridiagonal{T,V}) where {T,V<:AbstractVector{T}} = S
-SymTridiagonal{T,V}(S::SymTridiagonal) where {T,V<:AbstractVector{T}} =
-    SymTridiagonal(convert(V, S.dv)::V, convert(V, S.ev)::V)
-SymTridiagonal{T}(S::SymTridiagonal{T}) where {T} = S
-SymTridiagonal{T}(S::SymTridiagonal) where {T} =
-    SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
-SymTridiagonal(S::SymTridiagonal) = S
-
-AbstractMatrix{T}(S::SymTridiagonal) where {T} =
-    SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
-function Matrix{T}(M::SymTridiagonal) where T
-    n = size(M, 1)
-    Mf = Matrix{T}(undef, n, n)
-    n == 0 && return Mf
-    n > 2 && fill!(Mf, zero(T))
-    @inbounds for i = 1:n-1
-        Mf[i,i] = symmetric(M.dv[i], :U)
-        Mf[i+1,i] = transpose(M.ev[i])
-        Mf[i,i+1] = M.ev[i]
-    end
-    Mf[n,n] = symmetric(M.dv[n], :U)
-    return Mf
-end
-Matrix(M::SymTridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
-Array(M::SymTridiagonal) = Matrix(M)
-
-size(A::SymTridiagonal) = (length(A.dv), length(A.dv))
-function size(A::SymTridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d<=2
-        return length(A.dv)
-    else
-        return 1
-    end
-end
-
-similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T), similar(S.ev, T))
-similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(S.dv, T, dims)
-
-copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
-    (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval ($func)(M::SymTridiagonal) = SymTridiagonal(($func)(M.dv), ($func)(M.ev))
-end
-
-transpose(S::SymTridiagonal) = S
-adjoint(S::SymTridiagonal{<:Real}) = S
-adjoint(S::SymTridiagonal) = Adjoint(S)
-permutedims(S::SymTridiagonal) = S
-function permutedims(S::SymTridiagonal, perm)
-    Base.checkdims_perm(S, S, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(S) : S
-end
-Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
-
-ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
-issymmetric(S::SymTridiagonal) = true
-
-tr(S::SymTridiagonal) = sum(S.dv)
-
-function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    absn = abs(n)
-    if absn == 0
-        return copyto!(similar(M.dv, length(M.dv)), M.dv)
-    elseif absn == 1
-        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
-    elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-function diag(M::SymTridiagonal, n::Integer=0)
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.dv, length(M.dv)), symmetric.(M.dv, :U))
-    elseif n == 1
-        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
-    elseif n == -1
-        return copyto!(similar(M.ev, length(M.dv)-1), transpose.(_evview(M)))
-    elseif n <= size(M,1)
-        throw(ArgumentError("requested diagonal contains undefined zeros of an array type"))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-+(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, _evview(A)+_evview(B))
--(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, _evview(A)-_evview(B))
--(A::SymTridiagonal) = SymTridiagonal(-A.dv, -A.ev)
-*(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv*B, A.ev*B)
-*(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
-/(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
-\(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
-==(A::SymTridiagonal{<:Number}, B::SymTridiagonal{<:Number}) =
-    (A.dv == B.dv) && (_evview(A) == _evview(B))
-==(A::SymTridiagonal, B::SymTridiagonal) =
-    size(A) == size(B) && all(i -> A[i,i] == B[i,i], axes(A, 1)) && (_evview(A) == _evview(B))
-
-function dot(x::AbstractVector, S::SymTridiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(S, 1) == ny) || throw(DimensionMismatch("dot"))
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y)))
-        return dot(x[1], S.dv[1], y[1])
-    end
-    dv, ev = S.dv, S.ev
-    @inbounds begin
-        x₀ = x[1]
-        x₊ = x[2]
-        sub = transpose(ev[1])
-        r = dot(adjoint(dv[1])*x₀ + adjoint(sub)*x₊, y[1])
-        for j in 2:nx-1
-            x₋, x₀, x₊ = x₀, x₊, x[j+1]
-            sup, sub = transpose(sub), transpose(ev[j])
-            r += dot(adjoint(sup)*x₋ + adjoint(dv[j])*x₀ + adjoint(sub)*x₊, y[j])
-        end
-        r += dot(adjoint(transpose(sub))*x₀ + adjoint(dv[nx])*x₊, y[nx])
-    end
-    return r
-end
-
-(\)(T::SymTridiagonal, B::AbstractVecOrMat) = ldlt(T)\B
-
-# division with optional shift for use in shifted-Hessenberg solvers (hessenberg.jl):
-ldiv!(A::SymTridiagonal, B::AbstractVecOrMat; shift::Number=false) = ldiv!(ldlt(A, shift=shift), B)
-rdiv!(B::AbstractVecOrMat, A::SymTridiagonal; shift::Number=false) = rdiv!(B, ldlt(A, shift=shift))
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...)
-eigen(A::SymTridiagonal{T}) where T = eigen!(copymutable_oftype(A, eigtype(T)))
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
-    Eigen(LAPACK.stegr!('V', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)...)
-eigen(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigen!(copymutable_oftype(A, eigtype(T)), irange)
-
-eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
-    Eigen(LAPACK.stegr!('V', 'V', A.dv, A.ev, vl, vu, 0, 0)...)
-eigen(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigen!(copymutable_oftype(A, eigtype(T)), vl, vu)
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = LAPACK.stev!('N', A.dv, A.ev)[1]
-eigvals(A::SymTridiagonal{T}) where T = eigvals!(copymutable_oftype(A, eigtype(T)))
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
-    LAPACK.stegr!('N', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)[1]
-eigvals(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigvals!(copymutable_oftype(A, eigtype(T)), irange)
-
-eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
-    LAPACK.stegr!('N', 'V', A.dv, A.ev, vl, vu, 0, 0)[1]
-eigvals(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigvals!(copymutable_oftype(A, eigtype(T)), vl, vu)
-
-#Computes largest and smallest eigenvalue
-eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::SymTridiagonal) = eigvals(A, 1:1)[1]
-
-#Compute selected eigenvectors only corresponding to particular eigenvalues
-eigvecs(A::SymTridiagonal) = eigen(A).vectors
-
-"""
-    eigvecs(A::SymTridiagonal[, eigvals]) -> Matrix
-
-Return a matrix `M` whose columns are the eigenvectors of `A`. (The `k`th eigenvector can
-be obtained from the slice `M[:, k]`.)
-
-If the optional vector of eigenvalues `eigvals` is specified, `eigvecs`
-returns the specific corresponding eigenvectors.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-
-julia> eigvecs(A)
-3×3 Matrix{Float64}:
-  0.418304  -0.83205      0.364299
- -0.656749  -7.39009e-16  0.754109
-  0.627457   0.5547       0.546448
-
-julia> eigvecs(A, [1.])
-3×1 Matrix{Float64}:
-  0.8320502943378438
-  4.263514128092366e-17
- -0.5547001962252291
-```
-"""
-eigvecs(A::SymTridiagonal{<:BlasFloat,<:StridedVector}, eigvals::Vector{<:Real}) = LAPACK.stein!(A.dv, A.ev, eigvals)
-
-function svdvals!(A::SymTridiagonal)
-    vals = eigvals!(A)
-    return sort!(map!(abs, vals, vals); rev=true)
-end
-
-# tril and triu
-
-function istriu(M::SymTridiagonal, k::Integer=0)
-    if k <= -1
-        return true
-    elseif k == 0
-        return iszero(_evview(M))
-    else # k >= 1
-        return iszero(_evview(M)) && iszero(M.dv)
-    end
-end
-istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
-iszero(M::SymTridiagonal) =  iszero(_evview(M)) && iszero(M.dv)
-isone(M::SymTridiagonal) =  iszero(_evview(M)) && all(isone, M.dv)
-isdiag(M::SymTridiagonal) =  iszero(_evview(M))
-
-
-function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < -1
-        fill!(M.ev, zero(T))
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    elseif k == -1
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k == 0
-        return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k >= 1
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    end
-end
-
-function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
-    n = length(M.dv)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 1
-        fill!(M.ev, zero(T))
-        fill!(M.dv, zero(T))
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    elseif k == 1
-        fill!(M.dv, zero(T))
-        return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k == 0
-        return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k <= -1
-        return Tridiagonal(M.ev,M.dv,copy(M.ev))
-    end
-end
-
-###################
-# Generic methods #
-###################
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::SymTridiagonal, i::Integer, j::Integer, s::AbstractString)
-    i==j-1||i==j||i==j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-# Implements the determinant using principal minors
-# a, b, c are assumed to be the subdiagonal, diagonal, and superdiagonal of
-# a tridiagonal matrix.
-#Reference:
-#    R. Usmani, "Inversion of a tridiagonal Jacobi matrix",
-#    Linear Algebra and its Applications 212-213 (1994), pp.413-414
-#    doi:10.1016/0024-3795(94)90414-6
-function det_usmani(a::V, b::V, c::V, shift::Number=0) where {T,V<:AbstractVector{T}}
-    require_one_based_indexing(a, b, c)
-    n = length(b)
-    θa = oneunit(T)+zero(shift)
-    if n == 0
-        return θa
-    end
-    θb = b[1]+shift
-    for i in 2:n
-        θb, θa = (b[i]+shift)*θb - a[i-1]*c[i-1]*θa, θb
-    end
-    return θb
-end
-
-# det with optional diagonal shift for use with shifted Hessenberg factorizations
-det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift)
-logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift))
-
-@inline function Base.isassigned(A::SymTridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(Bool, A, i, j) || return false
-    if i == j
-        return @inbounds isassigned(A.dv, i)
-    elseif i == j + 1
-        return @inbounds isassigned(A.ev, j)
-    elseif i + 1 == j
-        return @inbounds isassigned(A.ev, i)
-    else
-        return true
-    end
-end
-
-@inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return symmetric((@inbounds A.dv[i]), :U)::symmetric_type(eltype(A.dv))
-    elseif i == j + 1
-        return copy(transpose(@inbounds A.ev[j])) # materialized for type stability
-    elseif i + 1 == j
-        return @inbounds A.ev[i]
-    else
-        return zero(T)
-    end
-end
-
-@inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.dv[i] = x
-    else
-        throw(ArgumentError("cannot set off-diagonal entry ($i, $j)"))
-    end
-    return x
-end
-
-## Tridiagonal matrices ##
-struct Tridiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
-    dl::V    # sub-diagonal
-    d::V     # diagonal
-    du::V    # sup-diagonal
-    du2::V   # supsup-diagonal for pivoting in LU
-    function Tridiagonal{T,V}(dl, d, du) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dl, d, du)
-        n = length(d)
-        if (length(dl) != n-1 || length(du) != n-1) && !(length(d) == 0 && length(dl) == 0 && length(du) == 0)
-            throw(ArgumentError(string("cannot construct Tridiagonal from incompatible ",
-                "lengths of subdiagonal, diagonal and superdiagonal: ",
-                "($(length(dl)), $(length(d)), $(length(du)))")))
-        end
-        new{T,V}(dl, d, du)
-    end
-    # constructor used in lu!
-    function Tridiagonal{T,V}(dl, d, du, du2) where {T,V<:AbstractVector{T}}
-        require_one_based_indexing(dl, d, du, du2)
-        # length checks?
-        new{T,V}(dl, d, du, du2)
-    end
-end
-
-"""
-    Tridiagonal(dl::V, d::V, du::V) where V <: AbstractVector
-
-Construct a tridiagonal matrix from the first subdiagonal, diagonal, and first superdiagonal,
-respectively. The result is of type `Tridiagonal` and provides efficient specialized linear
-solvers, but may be converted into a regular matrix with
-[`convert(Array, _)`](@ref) (or `Array(_)` for short).
-The lengths of `dl` and `du` must be one less than the length of `d`.
-
-# Examples
-```jldoctest
-julia> dl = [1, 2, 3];
-
-julia> du = [4, 5, 6];
-
-julia> d = [7, 8, 9, 0];
-
-julia> Tridiagonal(dl, d, du)
-4×4 Tridiagonal{Int64, Vector{Int64}}:
- 7  4  ⋅  ⋅
- 1  8  5  ⋅
- ⋅  2  9  6
- ⋅  ⋅  3  0
-```
-"""
-Tridiagonal(dl::V, d::V, du::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du)
-Tridiagonal(dl::V, d::V, du::V, du2::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du, du2)
-function Tridiagonal{T}(dl::AbstractVector, d::AbstractVector, du::AbstractVector) where {T}
-    Tridiagonal(map(x->convert(AbstractVector{T}, x), (dl, d, du))...)
-end
-function Tridiagonal{T,V}(A::Tridiagonal) where {T,V<:AbstractVector{T}}
-    Tridiagonal{T,V}(A.dl, A.d, A.du)
-end
-
-"""
-    Tridiagonal(A)
-
-Construct a tridiagonal matrix from the first sub-diagonal,
-diagonal and first super-diagonal of the matrix `A`.
-
-# Examples
-```jldoctest
-julia> A = [1 2 3 4; 1 2 3 4; 1 2 3 4; 1 2 3 4]
-4×4 Matrix{Int64}:
- 1  2  3  4
- 1  2  3  4
- 1  2  3  4
- 1  2  3  4
-
-julia> Tridiagonal(A)
-4×4 Tridiagonal{Int64, Vector{Int64}}:
- 1  2  ⋅  ⋅
- 1  2  3  ⋅
- ⋅  2  3  4
- ⋅  ⋅  3  4
-```
-"""
-Tridiagonal(A::AbstractMatrix) = Tridiagonal(diag(A,-1), diag(A,0), diag(A,1))
-
-Tridiagonal(A::Tridiagonal) = A
-Tridiagonal{T}(A::Tridiagonal{T}) where {T} = A
-function Tridiagonal{T}(A::Tridiagonal) where {T}
-    dl, d, du = map(x->convert(AbstractVector{T}, x)::AbstractVector{T},
-                    (A.dl, A.d, A.du))
-    if isdefined(A, :du2)
-        Tridiagonal(dl, d, du, convert(AbstractVector{T}, A.du2)::AbstractVector{T})
-    else
-        Tridiagonal(dl, d, du)
-    end
-end
-
-size(M::Tridiagonal) = (length(M.d), length(M.d))
-function size(M::Tridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension d must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.d)
-    else
-        return 1
-    end
-end
-
-function Matrix{T}(M::Tridiagonal) where {T}
-    A = Matrix{T}(undef, size(M))
-    n = length(M.d)
-    n == 0 && return A
-    n > 2 && fill!(A, zero(T))
-    for i in 1:n-1
-        A[i,i] = M.d[i]
-        A[i+1,i] = M.dl[i]
-        A[i,i+1] = M.du[i]
-    end
-    A[n,n] = M.d[n]
-    A
-end
-Matrix(M::Tridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
-Array(M::Tridiagonal) = Matrix(M)
-
-similar(M::Tridiagonal, ::Type{T}) where {T} = Tridiagonal(similar(M.dl, T), similar(M.d, T), similar(M.du, T))
-similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(M.d, T, dims)
-
-# Operations on Tridiagonal matrices
-copyto!(dest::Tridiagonal, src::Tridiagonal) = (copyto!(dest.dl, src.dl); copyto!(dest.d, src.d); copyto!(dest.du, src.du); dest)
-
-#Elementary operations
-for func in (:conj, :copy, :real, :imag)
-    @eval function ($func)(M::Tridiagonal)
-        Tridiagonal(($func)(M.dl), ($func)(M.d), ($func)(M.du))
-    end
-end
-
-adjoint(S::Tridiagonal) = Adjoint(S)
-transpose(S::Tridiagonal) = Transpose(S)
-adjoint(S::Tridiagonal{<:Real}) = Tridiagonal(S.du, S.d, S.dl)
-transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
-permutedims(T::Tridiagonal) = Tridiagonal(T.du, T.d, T.dl)
-function permutedims(T::Tridiagonal, perm)
-    Base.checkdims_perm(T, T, perm)
-    NTuple{2}(perm) == (2, 1) ? permutedims(T) : T
-end
-Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
-Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(x -> copy.(transpose.(x)), (S.du, S.d, S.dl))...))
-
-ishermitian(S::Tridiagonal) = all(ishermitian, S.d) && all(Iterators.map((x, y) -> x == y', S.du, S.dl))
-issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y) -> x == transpose(y), S.du, S.dl))
-
-\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:AbstractVecOrMat}) = copy(A) \ B
-
-function diag(M::Tridiagonal{T}, n::Integer=0) where T
-    # every branch call similar(..., ::Int) to make sure the
-    # same vector type is returned independent of n
-    if n == 0
-        return copyto!(similar(M.d, length(M.d)), M.d)
-    elseif n == -1
-        return copyto!(similar(M.dl, length(M.dl)), M.dl)
-    elseif n == 1
-        return copyto!(similar(M.du, length(M.du)), M.du)
-    elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
-    else
-        throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
-            "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
-    end
-end
-
-@inline function Base.isassigned(A::Tridiagonal, i::Int, j::Int)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds isassigned(A.d, i)
-    elseif i == j + 1
-        return @inbounds isassigned(A.dl, j)
-    elseif i + 1 == j
-        return @inbounds isassigned(A.du, i)
-    else
-        return true
-    end
-end
-
-@inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        return @inbounds A.d[i]
-    elseif i == j + 1
-        return @inbounds A.dl[j]
-    elseif i + 1 == j
-        return @inbounds A.du[i]
-    else
-        return zero(T)
-    end
-end
-
-@inline function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    if i == j
-        @inbounds A.d[i] = x
-    elseif i - j == 1
-        @inbounds A.dl[j] = x
-    elseif j - i == 1
-        @inbounds A.du[i] = x
-    elseif !iszero(x)
-        throw(ArgumentError(string("cannot set entry ($i, $j) off ",
-            "the tridiagonal band to a nonzero value ($x)")))
-    end
-    return x
-end
-
-## structured matrix methods ##
-function Base.replace_in_print_matrix(A::Tridiagonal,i::Integer,j::Integer,s::AbstractString)
-    i==j-1||i==j||i==j+1 ? s : Base.replace_with_centered_mark(s)
-end
-
-
-#tril and triu
-
-iszero(M::Tridiagonal) = iszero(M.dl) && iszero(M.d) && iszero(M.du)
-isone(M::Tridiagonal) = iszero(M.dl) && all(isone, M.d) && iszero(M.du)
-function istriu(M::Tridiagonal, k::Integer=0)
-    if k <= -1
-        return true
-    elseif k == 0
-        return iszero(M.dl)
-    elseif k == 1
-        return iszero(M.dl) && iszero(M.d)
-    else # k >= 2
-        return iszero(M.dl) && iszero(M.d) && iszero(M.du)
-    end
-end
-function istril(M::Tridiagonal, k::Integer=0)
-    if k >= 1
-        return true
-    elseif k == 0
-        return iszero(M.du)
-    elseif k == -1
-        return iszero(M.du) && iszero(M.d)
-    else # k <= -2
-        return iszero(M.du) && iszero(M.d) && iszero(M.dl)
-    end
-end
-isdiag(M::Tridiagonal) = iszero(M.dl) && iszero(M.du)
-
-function tril!(M::Tridiagonal{T}, k::Integer=0) where T
-    n = length(M.d)
-    if !(-n - 1 <= k <= n - 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
-    elseif k < -1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == -1
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == 0
-        fill!(M.du, zero(T))
-    end
-    return M
-end
-
-function triu!(M::Tridiagonal{T}, k::Integer=0) where T
-    n = length(M.d)
-    if !(-n + 1 <= k <= n + 1)
-        throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
-            "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
-    elseif k > 1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-        fill!(M.du, zero(T))
-    elseif k == 1
-        fill!(M.dl, zero(T))
-        fill!(M.d, zero(T))
-    elseif k == 0
-        fill!(M.dl, zero(T))
-    end
-    return M
-end
-
-tr(M::Tridiagonal) = sum(M.d)
-
-###################
-# Generic methods #
-###################
-
-+(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl+B.dl, A.d+B.d, A.du+B.du)
--(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl-B.dl, A.d-B.d, A.du-B.du)
--(A::Tridiagonal) = Tridiagonal(-A.dl, -A.d, -A.du)
-*(A::Tridiagonal, B::Number) = Tridiagonal(A.dl*B, A.d*B, A.du*B)
-*(B::Number, A::Tridiagonal) = Tridiagonal(B*A.dl, B*A.d, B*A.du)
-/(A::Tridiagonal, B::Number) = Tridiagonal(A.dl/B, A.d/B, A.du/B)
-\(B::Number, A::Tridiagonal) = Tridiagonal(B\A.dl, B\A.d, B\A.du)
-
-==(A::Tridiagonal, B::Tridiagonal) = (A.dl==B.dl) && (A.d==B.d) && (A.du==B.du)
-function ==(A::Tridiagonal, B::SymTridiagonal)
-    iseq = all(Iterators.map((x, y) -> x == transpose(y), A.du, A.dl))
-    iseq = iseq && A.du == _evview(B)
-    iseq && all(Iterators.map((x, y) -> x == symmetric(y, :U), A.d, B.dv))
-end
-==(A::SymTridiagonal, B::Tridiagonal) = B == A
-
-det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
-
-AbstractMatrix{T}(M::Tridiagonal) where {T} = Tridiagonal{T}(M)
-Tridiagonal{T}(M::SymTridiagonal{T}) where {T} = Tridiagonal(M)
-function SymTridiagonal{T}(M::Tridiagonal) where T
-    if issymmetric(M)
-        return SymTridiagonal{T}(convert(AbstractVector{T},M.d), convert(AbstractVector{T},M.dl))
-    else
-        throw(ArgumentError("Tridiagonal is not symmetric, cannot convert to SymTridiagonal"))
-    end
-end
-
-Base._sum(A::Tridiagonal, ::Colon) = sum(A.d) + sum(A.dl) + sum(A.du)
-function Base._sum(A::SymTridiagonal, ::Colon)
-    se = sum(_evview(A))
-    symmetric(sum(A.dv), :U) + se + transpose(se)
-end
-
-function Base._sum(A::Tridiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.d)
-    if n == 0
-        return res
-    elseif n == 1
-        res[1] = A.d[1]
-        return res
-    end
-    @inbounds begin
-        if dims == 1
-            res[1] = A.dl[1] + A.d[1]
-            for i = 2:n-1
-                res[i] = A.dl[i] + A.d[i] + A.du[i-1]
-            end
-            res[n] = A.d[n] + A.du[n-1]
-        elseif dims == 2
-            res[1] = A.d[1] + A.du[1]
-            for i = 2:n-1
-                res[i] = A.dl[i-1] + A.d[i] + A.du[i]
-            end
-            res[n] = A.dl[n-1] + A.d[n]
-        elseif dims >= 3
-            for i = 1:n-1
-                res[i,i+1] = A.du[i]
-                res[i,i]   = A.d[i]
-                res[i+1,i] = A.dl[i]
-            end
-            res[n,n] = A.d[n]
-        end
-    end
-    res
-end
-
-function Base._sum(A::SymTridiagonal, dims::Integer)
-    res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
-    n = length(A.dv)
-    if n == 0
-        return res
-    elseif n == 1
-        res[1] = A.dv[1]
-        return res
-    end
-    @inbounds begin
-        if dims == 1
-            res[1] = transpose(A.ev[1]) + symmetric(A.dv[1], :U)
-            for i = 2:n-1
-                res[i] = transpose(A.ev[i]) + symmetric(A.dv[i], :U) + A.ev[i-1]
-            end
-            res[n] = symmetric(A.dv[n], :U) + A.ev[n-1]
-        elseif dims == 2
-            res[1] = symmetric(A.dv[1], :U) + A.ev[1]
-            for i = 2:n-1
-                res[i] = transpose(A.ev[i-1]) + symmetric(A.dv[i], :U) + A.ev[i]
-            end
-            res[n] = transpose(A.ev[n-1]) + symmetric(A.dv[n], :U)
-        elseif dims >= 3
-            for i = 1:n-1
-                res[i,i+1] = A.ev[i]
-                res[i,i]   = symmetric(A.dv[i], :U)
-                res[i+1,i] = transpose(A.ev[i])
-            end
-            res[n,n] = symmetric(A.dv[n], :U)
-        end
-    end
-    res
-end
-
-function dot(x::AbstractVector, A::Tridiagonal, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    nx, ny = length(x), length(y)
-    (nx == size(A, 1) == ny) || throw(DimensionMismatch())
-    if nx ≤ 1
-        nx == 0 && return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-        return dot(x[1], A.d[1], y[1])
-    end
-    @inbounds begin
-        x₀ = x[1]
-        x₊ = x[2]
-        dl, d, du = A.dl, A.d, A.du
-        r = dot(adjoint(d[1])*x₀ + adjoint(dl[1])*x₊, y[1])
-        for j in 2:nx-1
-            x₋, x₀, x₊ = x₀, x₊, x[j+1]
-            r += dot(adjoint(du[j-1])*x₋ + adjoint(d[j])*x₀ + adjoint(dl[j])*x₊, y[j])
-        end
-        r += dot(adjoint(du[nx-1])*x₀ + adjoint(d[nx])*x₊, y[nx])
-    end
-    return r
-end
-
-function cholesky(S::SymTridiagonal, ::NoPivot = NoPivot(); check::Bool = true)
-    if !ishermitian(S)
-        check && checkpositivedefinite(-1)
-        return Cholesky(S, 'U', convert(BlasInt, -1))
-    end
-    T = choltype(eltype(S))
-    cholesky!(Hermitian(Bidiagonal{T}(diag(S, 0), diag(S, 1), :U)), NoPivot(); check = check)
-end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
deleted file mode 100644
index 21ae8a1bb913a..0000000000000
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ /dev/null
@@ -1,541 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import Base: copy, adjoint, getindex, show, transpose, one, zero, inv,
-             hcat, vcat, hvcat, ^
-
-"""
-    UniformScaling{T<:Number}
-
-Generically sized uniform scaling operator defined as a scalar times
-the identity operator, `λ*I`. Although without an explicit `size`, it
-acts similarly to a matrix in many cases and includes support for some
-indexing. See also [`I`](@ref).
-
-!!! compat "Julia 1.6"
-     Indexing using ranges is available as of Julia 1.6.
-
-# Examples
-```jldoctest
-julia> J = UniformScaling(2.)
-UniformScaling{Float64}
-2.0*I
-
-julia> A = [1. 2.; 3. 4.]
-2×2 Matrix{Float64}:
- 1.0  2.0
- 3.0  4.0
-
-julia> J*A
-2×2 Matrix{Float64}:
- 2.0  4.0
- 6.0  8.0
-
-julia> J[1:2, 1:2]
-2×2 Matrix{Float64}:
- 2.0  0.0
- 0.0  2.0
-```
-"""
-struct UniformScaling{T<:Number}
-    λ::T
-end
-
-"""
-    I
-
-An object of type [`UniformScaling`](@ref), representing an identity matrix of any size.
-
-# Examples
-```jldoctest
-julia> fill(1, (5,6)) * I == fill(1, (5,6))
-true
-
-julia> [1 2im 3; 1im 2 3] * I
-2×3 Matrix{Complex{Int64}}:
- 1+0im  0+2im  3+0im
- 0+1im  2+0im  3+0im
-```
-"""
-const I = UniformScaling(true)
-
-"""
-    (I::UniformScaling)(n::Integer)
-
-Construct a `Diagonal` matrix from a `UniformScaling`.
-
-!!! compat "Julia 1.2"
-     This method is available as of Julia 1.2.
-
-# Examples
-```jldoctest
-julia> I(3)
-3×3 Diagonal{Bool, Vector{Bool}}:
- 1  ⋅  ⋅
- ⋅  1  ⋅
- ⋅  ⋅  1
-
-julia> (0.7*I)(3)
-3×3 Diagonal{Float64, Vector{Float64}}:
- 0.7   ⋅    ⋅
-  ⋅   0.7   ⋅
-  ⋅    ⋅   0.7
-```
-"""
-(I::UniformScaling)(n::Integer) = Diagonal(fill(I.λ, n))
-
-eltype(::Type{UniformScaling{T}}) where {T} = T
-ndims(J::UniformScaling) = 2
-Base.has_offset_axes(::UniformScaling) = false
-getindex(J::UniformScaling, i::Integer,j::Integer) = ifelse(i==j,J.λ,zero(J.λ))
-
-getindex(J::UniformScaling, n::Integer, m::AbstractVector{<:Integer}) = getindex(J, m, n)
-function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::Integer) where T
-    v = zeros(T, axes(n))
-    @inbounds for (i,ii) in pairs(n)
-        if ii == m
-            v[i] = J.λ
-        end
-    end
-    return v
-end
-
-function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::AbstractVector{<:Integer}) where T
-    A = zeros(T, axes(n)..., axes(m)...)
-    @inbounds for (j,jj) in pairs(m), (i,ii) in pairs(n)
-        if ii == jj
-            A[i,j] = J.λ
-        end
-    end
-    return A
-end
-
-function show(io::IO, ::MIME"text/plain", J::UniformScaling)
-    s = "$(J.λ)"
-    if occursin(r"\w+\s*[\+\-]\s*\w+", s)
-        s = "($s)"
-    end
-    print(io, typeof(J), "\n$s*I")
-end
-copy(J::UniformScaling) = UniformScaling(J.λ)
-
-Base.convert(::Type{UniformScaling{T}}, J::UniformScaling) where {T} = UniformScaling(convert(T, J.λ))::UniformScaling{T}
-
-conj(J::UniformScaling) = UniformScaling(conj(J.λ))
-real(J::UniformScaling) = UniformScaling(real(J.λ))
-imag(J::UniformScaling) = UniformScaling(imag(J.λ))
-
-transpose(J::UniformScaling) = J
-adjoint(J::UniformScaling) = UniformScaling(conj(J.λ))
-
-one(::Type{UniformScaling{T}}) where {T} = UniformScaling(one(T))
-one(J::UniformScaling{T}) where {T} = one(UniformScaling{T})
-oneunit(::Type{UniformScaling{T}}) where {T} = UniformScaling(oneunit(T))
-oneunit(J::UniformScaling{T}) where {T} = oneunit(UniformScaling{T})
-zero(::Type{UniformScaling{T}}) where {T} = UniformScaling(zero(T))
-zero(J::UniformScaling{T}) where {T} = zero(UniformScaling{T})
-
-isdiag(::UniformScaling) = true
-istriu(::UniformScaling) = true
-istril(::UniformScaling) = true
-issymmetric(::UniformScaling) = true
-ishermitian(J::UniformScaling) = isreal(J.λ)
-isposdef(J::UniformScaling) = isposdef(J.λ)
-
-(+)(J::UniformScaling, x::Number) = J.λ + x
-(+)(x::Number, J::UniformScaling) = x + J.λ
-(-)(J::UniformScaling, x::Number) = J.λ - x
-(-)(x::Number, J::UniformScaling) = x - J.λ
-
-(+)(J::UniformScaling)                      = UniformScaling(+J.λ)
-(+)(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ+J2.λ)
-(+)(B::BitArray{2}, J::UniformScaling)      = Array(B) + J
-(+)(J::UniformScaling, B::BitArray{2})      = J + Array(B)
-(+)(J::UniformScaling, A::AbstractMatrix)   = A + J
-
-(-)(J::UniformScaling)                      = UniformScaling(-J.λ)
-(-)(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ-J2.λ)
-(-)(B::BitArray{2}, J::UniformScaling)      = Array(B) - J
-(-)(J::UniformScaling, B::BitArray{2})      = J - Array(B)
-(-)(A::AbstractMatrix, J::UniformScaling)   = A + (-J)
-
-# matrix functions
-for f in ( :exp,   :log,
-           :expm1, :log1p,
-           :sqrt,  :cbrt,
-           :sin,   :cos,   :tan,
-           :asin,  :acos,  :atan,
-           :csc,   :sec,   :cot,
-           :acsc,  :asec,  :acot,
-           :sinh,  :cosh,  :tanh,
-           :asinh, :acosh, :atanh,
-           :csch,  :sech,  :coth,
-           :acsch, :asech, :acoth )
-    @eval Base.$f(J::UniformScaling) = UniformScaling($f(J.λ))
-end
-
-# Unit{Lower/Upper}Triangular matrices become {Lower/Upper}Triangular under
-# addition with a UniformScaling
-for (t1, t2) in ((:UnitUpperTriangular, :UpperTriangular),
-                 (:UnitLowerTriangular, :LowerTriangular))
-    @eval begin
-        function (+)(UL::$t1, J::UniformScaling)
-            ULnew = copymutable_oftype(UL.data, Base.promote_op(+, eltype(UL), typeof(J)))
-            for i in axes(ULnew, 1)
-                ULnew[i,i] = one(ULnew[i,i]) + J
-            end
-            return ($t2)(ULnew)
-        end
-    end
-end
-
-# Adding a complex UniformScaling to the diagonal of a Hermitian
-# matrix breaks the hermiticity, if the UniformScaling is non-real.
-# However, to preserve type stability, we do not special-case a
-# UniformScaling{<:Complex} that happens to be real.
-function (+)(A::Hermitian, J::UniformScaling{<:Complex})
-    TS = Base.promote_op(+, eltype(A), typeof(J))
-    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
-    for i in diagind(B)
-        B[i] = A[i] + J
-    end
-    return B
-end
-
-function (-)(J::UniformScaling{<:Complex}, A::Hermitian)
-    TS = Base.promote_op(+, eltype(A), typeof(J))
-    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
-    B .= .-B
-    for i in diagind(B)
-        B[i] = J - A[i]
-    end
-    return B
-end
-
-function (+)(A::AbstractMatrix, J::UniformScaling)
-    checksquare(A)
-    B = copymutable_oftype(A, Base.promote_op(+, eltype(A), typeof(J)))
-    for i in intersect(axes(A,1), axes(A,2))
-        @inbounds B[i,i] += J
-    end
-    return B
-end
-
-function (-)(J::UniformScaling, A::AbstractMatrix)
-    checksquare(A)
-    B = convert(AbstractMatrix{Base.promote_op(+, eltype(A), typeof(J))}, -A)
-    for i in intersect(axes(A,1), axes(A,2))
-        @inbounds B[i,i] += J
-    end
-    return B
-end
-
-inv(J::UniformScaling) = UniformScaling(inv(J.λ))
-opnorm(J::UniformScaling, p::Real=2) = opnorm(J.λ, p)
-
-pinv(J::UniformScaling) = ifelse(iszero(J.λ),
-                          UniformScaling(zero(inv(J.λ))),  # type stability
-                          UniformScaling(inv(J.λ)))
-
-function det(J::UniformScaling{T}) where T
-    if isone(J.λ)
-        one(T)
-    elseif iszero(J.λ)
-        zero(T)
-    else
-        throw(ArgumentError("Determinant of UniformScaling is only well-defined when λ = 0 or 1."))
-    end
-end
-
-function tr(J::UniformScaling{T}) where T
-    if iszero(J.λ)
-        zero(T)
-    else
-        throw(ArgumentError("Trace of UniformScaling is only well-defined when λ = 0"))
-    end
-end
-
-*(J1::UniformScaling, J2::UniformScaling) = UniformScaling(J1.λ*J2.λ)
-*(B::BitArray{2}, J::UniformScaling) = *(Array(B), J::UniformScaling)
-*(J::UniformScaling, B::BitArray{2}) = *(J::UniformScaling, Array(B))
-*(A::AbstractMatrix, J::UniformScaling) = A*J.λ
-*(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) * J
-*(J::UniformScaling, A::AbstractVecOrMat) = J.λ*A
-*(x::Number, J::UniformScaling) = UniformScaling(x*J.λ)
-*(J::UniformScaling, x::Number) = UniformScaling(J.λ*x)
-
-/(J1::UniformScaling, J2::UniformScaling) = J2.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ/J2.λ)
-/(J::UniformScaling, A::AbstractMatrix) =
-    (invA = inv(A); lmul!(J.λ, convert(AbstractMatrix{promote_type(eltype(J),eltype(invA))}, invA)))
-/(A::AbstractMatrix, J::UniformScaling) = J.λ == 0 ? throw(SingularException(1)) : A/J.λ
-/(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) / J
-
-/(J::UniformScaling, x::Number) = UniformScaling(J.λ/x)
-
-\(J1::UniformScaling, J2::UniformScaling) = J1.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ\J2.λ)
-\(J::UniformScaling, A::AbstractVecOrMat) = J.λ == 0 ? throw(SingularException(1)) : J.λ\A
-\(A::AbstractMatrix, J::UniformScaling) =
-    (invA = inv(A); rmul!(convert(AbstractMatrix{promote_type(eltype(invA),eltype(J))}, invA), J.λ))
-\(F::Factorization, J::UniformScaling) = F \ J(size(F,1))
-
-\(x::Number, J::UniformScaling) = UniformScaling(x\J.λ)
-
-@inline mul!(C::AbstractMatrix, A::AbstractMatrix, J::UniformScaling, alpha::Number, beta::Number) =
-    mul!(C, A, J.λ, alpha, beta)
-@inline mul!(C::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat, alpha::Number, beta::Number) =
-    mul!(C, J.λ, B, alpha, beta)
-
-function mul!(out::AbstractMatrix{T}, a::Number, B::UniformScaling, α::Number, β::Number) where {T}
-    checksquare(out)
-    if iszero(β)  # zero contribution of the out matrix
-        fill!(out, zero(T))
-    elseif !isone(β)
-        rmul!(out, β)
-    end
-    s = convert(T, a*B.λ*α)
-    if !iszero(s)
-        @inbounds for i in diagind(out)
-            out[i] += s
-        end
-    end
-    return out
-end
-@inline mul!(out::AbstractMatrix, A::UniformScaling, b::Number, α::Number, β::Number)=
-    mul!(out, A.λ, UniformScaling(b), α, β)
-rmul!(A::AbstractMatrix, J::UniformScaling) = rmul!(A, J.λ)
-lmul!(J::UniformScaling, B::AbstractVecOrMat) = lmul!(J.λ, B)
-rdiv!(A::AbstractMatrix, J::UniformScaling) = rdiv!(A, J.λ)
-ldiv!(J::UniformScaling, B::AbstractVecOrMat) = ldiv!(J.λ, B)
-ldiv!(Y::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat) = (Y .= J.λ .\ B)
-
-Broadcast.broadcasted(::typeof(*), x::Number,J::UniformScaling) = UniformScaling(x*J.λ)
-Broadcast.broadcasted(::typeof(*), J::UniformScaling,x::Number) = UniformScaling(J.λ*x)
-
-Broadcast.broadcasted(::typeof(/), J::UniformScaling,x::Number) = UniformScaling(J.λ/x)
-
-Broadcast.broadcasted(::typeof(\), x::Number,J::UniformScaling) = UniformScaling(x\J.λ)
-
-(^)(J::UniformScaling, x::Number) = UniformScaling((J.λ)^x)
-Base.literal_pow(::typeof(^), J::UniformScaling, x::Val) = UniformScaling(Base.literal_pow(^, J.λ, x))
-
-Broadcast.broadcasted(::typeof(^), J::UniformScaling, x::Number) = UniformScaling(J.λ^x)
-function Broadcast.broadcasted(::typeof(Base.literal_pow), ::typeof(^), J::UniformScaling, x::Val)
-    UniformScaling(Base.literal_pow(^, J.λ, x))
-end
-
-==(J1::UniformScaling,J2::UniformScaling) = (J1.λ == J2.λ)
-
-## equality comparison with UniformScaling
-==(J::UniformScaling, A::AbstractMatrix) = A == J
-function ==(A::AbstractMatrix, J::UniformScaling)
-    require_one_based_indexing(A)
-    size(A, 1) == size(A, 2) || return false
-    iszero(J.λ) && return iszero(A)
-    isone(J.λ) && return isone(A)
-    return A == J.λ*one(A)
-end
-function ==(A::StridedMatrix, J::UniformScaling)
-    size(A, 1) == size(A, 2) || return false
-    iszero(J.λ) && return iszero(A)
-    isone(J.λ) && return isone(A)
-    for j in axes(A, 2), i in axes(A, 1)
-        ifelse(i == j, A[i, j] == J.λ, iszero(A[i, j])) || return false
-    end
-    return true
-end
-
-isequal(A::AbstractMatrix, J::UniformScaling) = false
-isequal(J::UniformScaling, A::AbstractMatrix) = false
-
-function isapprox(J1::UniformScaling{T}, J2::UniformScaling{S};
-            atol::Real=0, rtol::Real=Base.rtoldefault(T,S,atol), nans::Bool=false) where {T<:Number,S<:Number}
-    isapprox(J1.λ, J2.λ, rtol=rtol, atol=atol, nans=nans)
-end
-function isapprox(J::UniformScaling, A::AbstractMatrix;
-                  atol::Real = 0,
-                  rtol::Real = Base.rtoldefault(promote_leaf_eltypes(A), eltype(J), atol),
-                  nans::Bool = false, norm::Function = norm)
-    n = checksquare(A)
-    normJ = norm === opnorm             ? abs(J.λ) :
-            norm === LinearAlgebra.norm ? abs(J.λ) * sqrt(n) :
-                                          norm(Diagonal(fill(J.λ, n)))
-    return norm(A - J) <= max(atol, rtol * max(norm(A), normJ))
-end
-isapprox(A::AbstractMatrix, J::UniformScaling; kwargs...) = isapprox(J, A; kwargs...)
-
-"""
-    copyto!(dest::AbstractMatrix, src::UniformScaling)
-
-Copies a [`UniformScaling`](@ref) onto a matrix.
-
-!!! compat "Julia 1.1"
-    In Julia 1.0 this method only supported a square destination matrix. Julia 1.1. added
-    support for a rectangular matrix.
-"""
-function copyto!(A::AbstractMatrix, J::UniformScaling)
-    require_one_based_indexing(A)
-    fill!(A, 0)
-    λ = J.λ
-    for i = 1:min(size(A,1),size(A,2))
-        @inbounds A[i,i] = λ
-    end
-    return A
-end
-
-function copyto!(A::Diagonal, J::UniformScaling)
-    A.diag .= J.λ
-    return A
-end
-function copyto!(A::Union{Bidiagonal, SymTridiagonal}, J::UniformScaling)
-    A.ev .= 0
-    A.dv .= J.λ
-    return A
-end
-function copyto!(A::Tridiagonal, J::UniformScaling)
-    A.dl .= 0
-    A.du .= 0
-    A.d .= J.λ
-    return A
-end
-
-function cond(J::UniformScaling{T}) where T
-    onereal = inv(one(real(J.λ)))
-    return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
-end
-
-# promote_to_arrays(n,k, T, A...) promotes any UniformScaling matrices
-# in A to matrices of type T and sizes given by n[k:end].  n is an array
-# so that the same promotion code can be used for hvcat.  We pass the type T
-# so that we can re-use this code for sparse-matrix hcat etcetera.
-promote_to_arrays_(n::Int, ::Type, a::Number) = a
-promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = Matrix(J, n, n)
-promote_to_arrays_(n::Int, ::Type, A::AbstractVecOrMat) = A
-promote_to_arrays(n,k, ::Type) = ()
-promote_to_arrays(n,k, ::Type{T}, A) where {T} = (promote_to_arrays_(n[k], T, A),)
-promote_to_arrays(n,k, ::Type{T}, A, B) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B))
-promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
-promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
-promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
-
-_us2number(A) = A
-_us2number(J::UniformScaling) = J.λ
-
-for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
-    @eval begin
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
-        # if there's a Number present, J::UniformScaling must be 1x1-dimensional
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $f(map(_us2number, A)...)
-        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-            n = -1
-            for a in A
-                if !isa(a, UniformScaling)
-                    require_one_based_indexing(a)
-                    na = size(a,$dim)
-                    n >= 0 && n != na &&
-                        throw(DimensionMismatch(string("number of ", $name,
-                            " of each array must match (got ", n, " and ", na, ")")))
-                    n = na
-                end
-            end
-            n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
-            return cat(promote_to_arrays(fill(n, length(A)), 1, array_type, A...)..., dims=Val(3-$dim))
-        end
-    end
-end
-
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
-function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-    require_one_based_indexing(A...)
-    nr = length(rows)
-    sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
-    n = fill(-1, length(A))
-    needcols = false # whether we also need to infer some sizes from the column count
-    j = 0
-    for i = 1:nr # infer UniformScaling sizes from row counts, if possible:
-        ni = -1 # number of rows in this block-row, -1 indicates unknown
-        for k = 1:rows[i]
-            if !isa(A[j+k], UniformScaling)
-                na = size(A[j+k], 1)
-                ni >= 0 && ni != na &&
-                    throw(DimensionMismatch("mismatch in number of rows"))
-                ni = na
-            end
-        end
-        if ni >= 0
-            for k = 1:rows[i]
-                n[j+k] = ni
-            end
-        else # row consisted only of UniformScaling objects
-            needcols = true
-        end
-        j += rows[i]
-    end
-    if needcols # some sizes still unknown, try to infer from column count
-        nc = -1
-        j = 0
-        for i = 1:nr
-            nci = 0
-            rows[i] > 0 && n[j+1] == -1 && (j += rows[i]; continue)
-            for k = 1:rows[i]
-                nci += isa(A[j+k], UniformScaling) ? n[j+k] : size(A[j+k], 2)
-            end
-            nc >= 0 && nc != nci && throw(DimensionMismatch("mismatch in number of columns"))
-            nc = nci
-            j += rows[i]
-        end
-        nc == -1 && throw(ArgumentError("sizes of UniformScalings could not be inferred"))
-        j = 0
-        for i = 1:nr
-            if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
-                nci, r = divrem(nc, rows[i])
-                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
-                for k = 1:rows[i]
-                    n[j+k] = nci
-                end
-            end
-            j += rows[i]
-        end
-    end
-    Amat = promote_to_arrays(n, 1, array_type, A...)
-    # We have two methods for promote_to_array_type, one returning Matrix and
-    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
-    # case, we cannot call hvcat for the promoted UniformScalings because this
-    # causes a stack overflow. In the sparse case, however, we cannot call
-    # typed_hvcat because we need a sparse output.
-    if array_type == Matrix
-        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
-    else
-        return hvcat(rows, Amat...)
-    end
-end
-
-## Matrix construction from UniformScaling
-function Matrix{T}(s::UniformScaling, dims::Dims{2}) where {T}
-    A = zeros(T, dims)
-    v = T(s.λ)
-    for i in diagind(dims...)
-        @inbounds A[i] = v
-    end
-    return A
-end
-Matrix{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, Dims((m, n)))
-Matrix(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, Dims((m, n)))
-Matrix(s::UniformScaling, dims::Dims{2}) = Matrix{eltype(s)}(s, dims)
-Array{T}(s::UniformScaling, dims::Dims{2}) where {T} = Matrix{T}(s, dims)
-Array{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, m, n)
-Array(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, m, n)
-Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
-
-dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
-dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
-
-dot(x::AbstractVector, J::UniformScaling, y::AbstractVector) = dot(x, J.λ, y)
-dot(x::AbstractVector, a::Number, y::AbstractVector) = sum(t -> dot(t[1], a, t[2]), zip(x, y))
-dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
-
-# muladd
-Base.muladd(A::UniformScaling, B::UniformScaling, z::UniformScaling) =
-    UniformScaling(A.λ * B.λ + z.λ)
diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl
deleted file mode 100644
index 83a26c6050484..0000000000000
--- a/stdlib/LinearAlgebra/test/abstractq.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAbstractQ
-
-using Test
-using LinearAlgebra
-using LinearAlgebra: AbstractQ, AdjointQ
-import LinearAlgebra: lmul!, rmul!
-import Base: size, convert
-
-n = 5
-
-@testset "custom AbstractQ type" begin
-    struct MyQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
-        Q::S
-    end
-    MyQ{T}(Q::AbstractQ) where {T} = (P = convert(AbstractQ{T}, Q); MyQ{T,typeof(P)}(P))
-    MyQ(Q::MyQ) = Q
-
-    Base.size(Q::MyQ) = size(Q.Q)
-    LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B)
-    LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B)
-    LinearAlgebra.rmul!(A::AbstractVecOrMat, Q::MyQ) = rmul!(A, Q.Q)
-    LinearAlgebra.rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q')
-    Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q)
-    LinearAlgebra.det(Q::MyQ) = det(Q.Q)
-
-    for T in (Float64, ComplexF64)
-        A = rand(T, n, n)
-        F = qr(A)
-        Q = MyQ(F.Q)
-        @test ndims(Q) == 2
-        T <: Real && @test transpose(Q) == adjoint(Q)
-        T <: Complex && @test_throws ErrorException transpose(Q)
-        @test convert(AbstractQ{complex(T)}, Q) isa MyQ{complex(T)}
-        @test convert(AbstractQ{complex(T)}, Q') isa AdjointQ{<:complex(T),<:MyQ{complex(T)}}
-        @test Q*I ≈ Q.Q*I rtol=2eps(real(T))
-        @test Q'*I ≈ Q.Q'*I rtol=2eps(real(T))
-        @test I*Q ≈ Q.Q*I rtol=2eps(real(T))
-        @test I*Q' ≈ I*Q.Q' rtol=2eps(real(T))
-        @test abs(det(Q)) ≈ 1
-        @test logabsdet(Q)[1] ≈ 0 atol=2n*eps(real(T))
-        y = rand(T, n)
-        @test Q * y ≈ Q.Q * y ≈ Q' \ y ≈ ldiv!(Q', copy(y)) ≈ ldiv!(zero(y), Q', y)
-        @test Q'y ≈ Q.Q' * y ≈ Q \ y ≈ ldiv!(Q, copy(y)) ≈ ldiv!(zero(y), Q, y)
-        @test y'Q ≈ y'Q.Q ≈ y' / Q'
-        @test y'Q' ≈ y'Q.Q' ≈ y' / Q
-        y = Matrix(y')
-        @test y*Q ≈ y*Q.Q ≈ y / Q' ≈ rdiv!(copy(y), Q')
-        @test y*Q' ≈ y*Q.Q' ≈ y / Q ≈ rdiv!(copy(y), Q)
-        Y = rand(T, n, n); X = similar(Y)
-        for transQ in (identity, adjoint), transY in (identity, adjoint), Y in (Y, Y')
-            @test mul!(X, transQ(Q), transY(Y)) ≈ transQ(Q) * transY(Y) ≈ transQ(Q.Q) * transY(Y)
-            @test mul!(X, transY(Y), transQ(Q)) ≈ transY(Y) * transQ(Q) ≈ transY(Y) * transQ(Q.Q)
-        end
-        @test convert(Matrix, Q) ≈ Matrix(Q) ≈ Q[:,:] ≈ copyto!(zeros(T, size(Q)), Q) ≈ Q.Q*I
-        @test convert(Matrix, Q') ≈ Matrix(Q') ≈ (Q')[:,:] ≈ copyto!(zeros(T, size(Q)), Q') ≈ Q.Q'*I
-        @test Q[1,:] == Q.Q[1,:] == view(Q, 1, :)
-        @test Q[:,1] == Q.Q[:,1] == view(Q, :, 1)
-        @test Q[1,1] == Q.Q[1,1]
-        @test Q[:] == Q.Q[:]
-        @test Q[:,1:3] == Q.Q[:,1:3] == view(Q, :, 1:3)
-        @test Q[:,1:3] ≈ Matrix(Q)[:,1:3]
-        @test Q[2:3,2:3] == view(Q, 2:3, 2:3) ≈ Matrix(Q)[2:3,2:3]
-        @test_throws BoundsError Q[0,1]
-        @test_throws BoundsError Q[n+1,1]
-        @test_throws BoundsError Q[1,0]
-        @test_throws BoundsError Q[1,n+1]
-        @test_throws BoundsError Q[:,1:n+1]
-        @test_throws BoundsError Q[:,0:n]
-        for perm in ((1, 2), (2, 1))
-            P = PermutedDimsArray(zeros(T, size(Q)), perm)
-            @test copyto!(P, Q) ≈ Matrix(Q)
-        end
-        x = randn(T)
-        @test x * Q ≈ (x*I)*Q ≈ x * Q.Q
-        @test Q * x ≈ Q*(x*I) ≈ Q.Q * x
-        @test x * Q' ≈ (x*I)* Q' ≈ x * Q.Q'
-        @test Q' * x ≈ Q'*(x*I) ≈ Q.Q' * x
-        x = rand(T, 1)
-        Q = MyQ(qr(rand(T, 1, 1)).Q)
-        @test x * Q ≈ x * Q.Q
-        @test x * Q' ≈ x * Q.Q'
-        @test Q * x ≈ Q.Q * x
-        @test Q' * x ≈ Q.Q' * x
-    end
-    A = rand(Float64, 5, 3)
-    F = qr(A)
-    Q = MyQ(F.Q)
-    Prect = Matrix(F.Q)
-    Psquare = collect(F.Q)
-    @test Q == Prect
-    @test Q == Psquare
-    @test Q == F.Q*I
-    @test Q ≈ Prect
-    @test Q ≈ Psquare
-    @test Q ≈ F.Q*I
-end
-
-end # module
diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl
deleted file mode 100644
index 3fff8289242f7..0000000000000
--- a/stdlib/LinearAlgebra/test/addmul.jl
+++ /dev/null
@@ -1,223 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAddmul
-
-using Base: rtoldefault
-using Test
-using LinearAlgebra
-using LinearAlgebra: AbstractTriangular
-using Random
-
-_rand(::Type{T}) where {T <: AbstractFloat} = T(randn())
-_rand(::Type{T}) where {F, T <: Complex{F}} = T(_rand(F), _rand(F))
-_rand(::Type{T}) where {T <: Integer} =
-    T(rand(max(typemin(T), -10):min(typemax(T), 10)))
-_rand(::Type{BigInt}) = BigInt(_rand(Int))
-
-function _rand(A::Type{<:Array}, shape)
-    T = eltype(A)
-    data = T[_rand(T) for _ in 1:prod(shape)]
-    return copy(reshape(data, shape))
-end
-
-constructor_of(::Type{T}) where T = getfield(parentmodule(T), nameof(T))
-
-function _rand(A::Type{<: AbstractArray}, shape)
-    data = _rand(Array{eltype(A)}, shape)
-    T = constructor_of(A)
-    if A <: Union{Bidiagonal, Hermitian, Symmetric}
-        return T(data, rand([:U, :L]))
-        # Maybe test with both :U and :L?
-    end
-    return T(data)
-end
-
-_rand(A::Type{<: SymTridiagonal{T}}, shape) where {T} =
-    SymTridiagonal(_rand(Symmetric{T}, shape))
-
-const FloatOrC = Union{AbstractFloat, Complex{<: AbstractFloat}}
-const IntegerOrC = Union{Integer, Complex{<: Integer}}
-const LTri = Union{LowerTriangular, UnitLowerTriangular, Diagonal}
-const UTri = Union{UpperTriangular, UnitUpperTriangular, Diagonal}
-
-needsquare(::Type{<:Matrix}) = false
-needsquare(::Type) = true
-
-testdata = []
-
-sizecandidates = 1:4
-floattypes = [
-    Float64, Float32, ComplexF64, ComplexF32,  # BlasFloat
-    BigFloat,
-]
-inttypes = [
-    Int,
-    BigInt,
-]
-# `Bool` can be added to `inttypes` but it's hard to handle
-# `InexactError` bug that is mentioned in:
-# https://github.com/JuliaLang/julia/issues/30094#issuecomment-440175887
-alleltypes = [floattypes; inttypes]
-celtypes = [Float64, ComplexF64, BigFloat, Int]
-
-mattypes = [
-    Matrix,
-    Bidiagonal,
-    Diagonal,
-    Hermitian,
-    LowerTriangular,
-    SymTridiagonal,
-    Symmetric,
-    Tridiagonal,
-    UnitLowerTriangular,
-    UnitUpperTriangular,
-    UpperTriangular,
-]
-
-isnanfillable(::AbstractArray) = false
-isnanfillable(::Array{<:AbstractFloat}) = true
-isnanfillable(A::AbstractArray{<:AbstractFloat}) = parent(A) isa Array
-
-"""
-Sample `n` elements from `S` on average but make sure at least one
-element is sampled.
-"""
-function sample(S, n::Real)
-    length(S) <= n && return S
-    xs = randsubseq(S, n / length(S))
-    return length(xs) > 0 ? xs : rand(S, 1)  # sample at least one
-end
-
-function inputeltypes(celt, alleltypes = alleltypes)
-    # Skip if destination type is "too small"
-    celt <: Bool && return []
-    filter(alleltypes) do aelt
-        celt <: Real && aelt <: Complex && return false
-        !(celt <: BigFloat) && aelt <: BigFloat && return false
-        !(celt <: BigInt) && aelt <: BigInt && return false
-        celt <: IntegerOrC && aelt <: FloatOrC && return false
-        if celt <: IntegerOrC && !(celt <: BigInt)
-            typemin(celt) > typemin(aelt) && return false
-            typemax(celt) < typemax(aelt) && return false
-        end
-        return true
-    end
-end
-# Note: using `randsubseq` instead of `rand` to avoid repetition.
-
-function inputmattypes(cmat, mattypes = mattypes)
-    # Skip if destination type is "too small"
-    cmat <: Union{Bidiagonal, Tridiagonal, SymTridiagonal,
-                  UnitLowerTriangular, UnitUpperTriangular,
-                  Hermitian, Symmetric} && return []
-    filter(mattypes) do amat
-        cmat <: Diagonal && (amat <: Diagonal || return false)
-        cmat <: LowerTriangular && (amat <: LTri || return false)
-        cmat <: UpperTriangular && (amat <: UTri || return false)
-        return true
-    end
-end
-
-n_samples = 1.5
-# n_samples = Inf  # to try all combinations
-for cmat in mattypes,
-    amat in sample(inputmattypes(cmat), n_samples),
-    bmat in sample(inputmattypes(cmat), n_samples),
-    celt in celtypes,
-    aelt in sample(inputeltypes(celt), n_samples),
-    belt in sample(inputeltypes(celt), n_samples)
-
-    push!(testdata, (cmat{celt}, amat{aelt}, bmat{belt}))
-end
-
-@testset "mul!(::$TC, ::$TA, ::$TB, α, β)" for (TC, TA, TB) in testdata
-    if needsquare(TA)
-        na1 = na2 = rand(sizecandidates)
-    else
-        na1, na2 = rand(sizecandidates, 2)
-    end
-    if needsquare(TB)
-        nb2 = na2
-    elseif needsquare(TC)
-        nb2 = na1
-    else
-        nb2 = rand(sizecandidates)
-    end
-    asize = (na1, na2)
-    bsize = (na2, nb2)
-    csize = (na1, nb2)
-
-    @testset for α in Any[true, eltype(TC)(1), _rand(eltype(TC))],
-                 β in Any[false, eltype(TC)(0), _rand(eltype(TC))]
-
-        C = _rand(TC, csize)
-        A = _rand(TA, asize)
-        B = _rand(TB, bsize)
-
-        # This is similar to how `isapprox` choose `rtol` (when
-        # `atol=0`) but consider all number types involved:
-        rtol = max(rtoldefault.(real.(eltype.((C, A, B))))...,
-                   rtoldefault.(real.(typeof.((α, β))))...)
-
-        Cc = copy(C)
-        Ac = Matrix(A)
-        Bc = Matrix(B)
-        returned_mat = mul!(C, A, B, α, β)
-        @test returned_mat === C
-        # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-        @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
-
-        y = C[:, 1]
-        x = B[:, 1]
-        yc = Vector(y)
-        xc = Vector(x)
-        returned_vec = mul!(y, A, x, α, β)
-        @test returned_vec === y
-        @test collect(returned_vec) ≈ α * Ac * xc + β * yc  rtol=rtol
-
-        if TC <: Matrix
-            @testset "adjoint and transpose" begin
-                @testset for fa in [identity, adjoint, transpose],
-                             fb in [identity, adjoint, transpose]
-                    fa === fb === identity && continue
-
-                    Af = fa === identity ? A : fa(_rand(TA, reverse(asize)))
-                    Bf = fb === identity ? B : fb(_rand(TB, reverse(bsize)))
-
-                    Ac = collect(Af)
-                    Bc = collect(Bf)
-                    Cc = collect(C)
-
-                    returned_mat = mul!(C, Af, Bf, α, β)
-                    @test returned_mat === C
-                    # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                    @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
-                end
-            end
-        end
-
-        if isnanfillable(C)
-            @testset "β = 0 ignores C .= NaN" begin
-                parent(C) .= NaN
-                Ac = Matrix(A)
-                Bc = Matrix(B)
-                returned_mat = mul!(C, A, B, α, zero(eltype(C)))
-                @test returned_mat === C
-                # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                @test_skip collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
-            end
-        end
-
-        if isnanfillable(A)
-            @testset "α = 0 ignores A .= NaN" begin
-                parent(A) .= NaN
-                Cc = copy(C)
-                returned_mat = mul!(C, A, B, zero(eltype(A)), β)
-                @test returned_mat === C
-                @test collect(returned_mat) ≈ β * Cc  rtol=rtol
-            end
-        end
-    end
-end
-
-end  # module
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
deleted file mode 100644
index 2362ec7fb28f2..0000000000000
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ /dev/null
@@ -1,674 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestAdjointTranspose
-
-using Test, LinearAlgebra
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-@testset "Adjoint and Transpose inner constructor basics" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    # Adjoint/Transpose eltype must match the type of the Adjoint/Transpose of the input eltype
-    @test_throws TypeError Adjoint{Float64,Vector{Int}}(intvec)[1,1]
-    @test_throws TypeError Adjoint{Float64,Matrix{Int}}(intmat)[1,1]
-    @test_throws TypeError Transpose{Float64,Vector{Int}}(intvec)[1,1]
-    @test_throws TypeError Transpose{Float64,Matrix{Int}}(intmat)[1,1]
-    # Adjoint/Transpose wrapped array type must match the input array type
-    @test_throws TypeError Adjoint{Int,Vector{Float64}}(intvec)[1,1]
-    @test_throws TypeError Adjoint{Int,Matrix{Float64}}(intmat)[1,1]
-    @test_throws TypeError Transpose{Int,Vector{Float64}}(intvec)[1,1]
-    @test_throws TypeError Transpose{Int,Matrix{Float64}}(intmat)[1,1]
-    # Adjoint/Transpose inner constructor basic functionality, concrete scalar eltype
-    @test (Adjoint{Int,Vector{Int}}(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
-    @test (Adjoint{Int,Matrix{Int}}(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
-    @test (Transpose{Int,Vector{Int}}(intvec)::Transpose{Int,Vector{Int}}).parent === intvec
-    @test (Transpose{Int,Matrix{Int}}(intmat)::Transpose{Int,Matrix{Int}}).parent === intmat
-    # Adjoint/Transpose inner constructor basic functionality, abstract scalar eltype
-    anyvec, anymat = Any[1, 2], Any[1 2; 3 4]
-    @test (Adjoint{Any,Vector{Any}}(anyvec)::Adjoint{Any,Vector{Any}}).parent === anyvec
-    @test (Adjoint{Any,Matrix{Any}}(anymat)::Adjoint{Any,Matrix{Any}}).parent === anymat
-    @test (Transpose{Any,Vector{Any}}(anyvec)::Transpose{Any,Vector{Any}}).parent === anyvec
-    @test (Transpose{Any,Matrix{Any}}(anymat)::Transpose{Any,Matrix{Any}}).parent === anymat
-    # Adjoint/Transpose inner constructor basic functionality, concrete array eltype
-    intvecvec = [[1, 2], [3, 4]]
-    intmatmat = [[[1 2]] [[3 4]] [[5 6]]; [[7 8]] [[9 10]] [[11 12]]]
-    @test (X = Adjoint{Adjoint{Int,Vector{Int}},Vector{Vector{Int}}}(intvecvec);
-            isa(X, Adjoint{Adjoint{Int,Vector{Int}},Vector{Vector{Int}}}) && X.parent === intvecvec)
-    @test (X = Adjoint{Adjoint{Int,Matrix{Int}},Matrix{Matrix{Int}}}(intmatmat);
-            isa(X, Adjoint{Adjoint{Int,Matrix{Int}},Matrix{Matrix{Int}}}) && X.parent === intmatmat)
-    @test (X = Transpose{Transpose{Int,Vector{Int}},Vector{Vector{Int}}}(intvecvec);
-            isa(X, Transpose{Transpose{Int,Vector{Int}},Vector{Vector{Int}}}) && X.parent === intvecvec)
-    @test (X = Transpose{Transpose{Int,Matrix{Int}},Matrix{Matrix{Int}}}(intmatmat);
-            isa(X, Transpose{Transpose{Int,Matrix{Int}},Matrix{Matrix{Int}}}) && X.parent === intmatmat)
-end
-
-@testset "Adjoint and Transpose outer constructor basics" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    # the wrapped array's eltype strictly determines the Adjoint/Transpose eltype
-    # so Adjoint{T}/Transpose{T} constructors are somewhat unnecessary and error-prone
-    # so ascertain that such calls throw whether or not T and the input eltype are compatible
-    @test_throws MethodError Adjoint{Int}(intvec)
-    @test_throws MethodError Adjoint{Int}(intmat)
-    @test_throws MethodError Adjoint{Float64}(intvec)
-    @test_throws MethodError Adjoint{Float64}(intmat)
-    @test_throws MethodError Transpose{Int}(intvec)
-    @test_throws MethodError Transpose{Int}(intmat)
-    @test_throws MethodError Transpose{Float64}(intvec)
-    @test_throws MethodError Transpose{Float64}(intmat)
-    # Adjoint/Transpose outer constructor basic functionality, concrete scalar eltype
-    @test (Adjoint(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
-    @test (Adjoint(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
-    @test (Transpose(intvec)::Transpose{Int,Vector{Int}}).parent === intvec
-    @test (Transpose(intmat)::Transpose{Int,Matrix{Int}}).parent === intmat
-    # the tests for the inner constructors exercise abstract scalar and concrete array eltype, forgoing here
-end
-
-@testset "Adjoint and Transpose add additional layers to already-wrapped objects" begin
-    intvec, intmat = [1, 2], [1 2; 3 4]
-    @test (A = Adjoint(Adjoint(intvec))::Adjoint{Int,Adjoint{Int,Vector{Int}}}; A.parent.parent === intvec)
-    @test (A = Adjoint(Adjoint(intmat))::Adjoint{Int,Adjoint{Int,Matrix{Int}}}; A.parent.parent === intmat)
-    @test (A = Transpose(Transpose(intvec))::Transpose{Int,Transpose{Int,Vector{Int}}}; A.parent.parent === intvec)
-    @test (A = Transpose(Transpose(intmat))::Transpose{Int,Transpose{Int,Matrix{Int}}}; A.parent.parent === intmat)
-end
-
-@testset "Adjoint and Transpose basic AbstractArray functionality" begin
-    # vectors and matrices with real scalar eltype, and their adjoints/transposes
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    tintvec, tintmat = [1 2], [1 4; 2 5; 3 6]
-    @testset "length methods" begin
-        @test length(Adjoint(intvec)) == length(intvec)
-        @test length(Adjoint(intmat)) == length(intmat)
-        @test length(Transpose(intvec)) == length(intvec)
-        @test length(Transpose(intmat)) == length(intmat)
-    end
-    @testset "size methods" begin
-        @test size(Adjoint(intvec)) == (1, length(intvec))
-        @test size(Adjoint(intmat)) == reverse(size(intmat))
-        @test size(Transpose(intvec)) == (1, length(intvec))
-        @test size(Transpose(intmat)) == reverse(size(intmat))
-    end
-    @testset "indices methods" begin
-        @test axes(Adjoint(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
-        @test axes(Adjoint(intmat)) == reverse(axes(intmat))
-        @test axes(Transpose(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
-        @test axes(Transpose(intmat)) == reverse(axes(intmat))
-    end
-    @testset "IndexStyle methods" begin
-        @test IndexStyle(Adjoint(intvec)) == IndexLinear()
-        @test IndexStyle(Adjoint(intmat)) == IndexCartesian()
-        @test IndexStyle(Transpose(intvec)) == IndexLinear()
-        @test IndexStyle(Transpose(intmat)) == IndexCartesian()
-    end
-    # vectors and matrices with complex scalar eltype, and their adjoints/transposes
-    complexintvec, complexintmat = [1im, 2im], [1im 2im 3im; 4im 5im 6im]
-    tcomplexintvec, tcomplexintmat = [1im 2im], [1im 4im; 2im 5im; 3im 6im]
-    acomplexintvec, acomplexintmat = conj.(tcomplexintvec), conj.(tcomplexintmat)
-    # vectors and matrices with real-vector and real-matrix eltype, and their adjoints/transposes
-    intvecvec = [[1, 2], [3, 4]]
-    tintvecvec = [[[1 2]] [[3 4]]]
-    intmatmat = [[[1 2]] [[3  4]] [[ 5  6]];
-                 [[7 8]] [[9 10]] [[11 12]]]
-    tintmatmat = [[hcat([1, 2])] [hcat([7, 8])];
-                  [hcat([3, 4])] [hcat([9, 10])];
-                  [hcat([5, 6])] [hcat([11, 12])]]
-    # vectors and matrices with complex-vector and complex-matrix eltype, and their adjoints/transposes
-    complexintvecvec, complexintmatmat = im .* (intvecvec, intmatmat)
-    tcomplexintvecvec, tcomplexintmatmat = im .* (tintvecvec, tintmatmat)
-    acomplexintvecvec, acomplexintmatmat = conj.(tcomplexintvecvec), conj.(tcomplexintmatmat)
-    @testset "getindex methods, elementary" begin
-        # implicitly test elementary definitions, for arrays with concrete real scalar eltype
-        @test Adjoint(intvec) == tintvec
-        @test Adjoint(intmat) == tintmat
-        @test Transpose(intvec) == tintvec
-        @test Transpose(intmat) == tintmat
-        # implicitly test elementary definitions, for arrays with concrete complex scalar eltype
-        @test Adjoint(complexintvec) == acomplexintvec
-        @test Adjoint(complexintmat) == acomplexintmat
-        @test Transpose(complexintvec) == tcomplexintvec
-        @test Transpose(complexintmat) == tcomplexintmat
-        # implicitly test elementary definitions, for arrays with concrete real-array eltype
-        @test Adjoint(intvecvec) == tintvecvec
-        @test Adjoint(intmatmat) == tintmatmat
-        @test Transpose(intvecvec) == tintvecvec
-        @test Transpose(intmatmat) == tintmatmat
-        # implicitly test elementary definitions, for arrays with concrete complex-array type
-        @test Adjoint(complexintvecvec) == acomplexintvecvec
-        @test Adjoint(complexintmatmat) == acomplexintmatmat
-        @test Transpose(complexintvecvec) == tcomplexintvecvec
-        @test Transpose(complexintmatmat) == tcomplexintmatmat
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::AbstractArray{Int}) methods that preserve wrapper type" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, [1, 2]] == Adjoint(intvec)
-        @test Transpose(intvec)[:, [1, 2]] == Transpose(intvec)
-        @test Adjoint(complexintvec)[:, [1, 2]] == Adjoint(complexintvec)
-        @test Transpose(complexintvec)[:, [1, 2]] == Transpose(complexintvec)
-        # for arrays with concrete array eltype
-        @test Adjoint(intvecvec)[:, [1, 2]] == Adjoint(intvecvec)
-        @test Transpose(intvecvec)[:, [1, 2]] == Transpose(intvecvec)
-        @test Adjoint(complexintvecvec)[:, [1, 2]] == Adjoint(complexintvecvec)
-        @test Transpose(complexintvecvec)[:, [1, 2]] == Transpose(complexintvecvec)
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::Colon) methods that preserve wrapper type" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, :] == Adjoint(intvec)
-        @test Transpose(intvec)[:, :] == Transpose(intvec)
-        @test Adjoint(complexintvec)[:, :] == Adjoint(complexintvec)
-        @test Transpose(complexintvec)[:, :] == Transpose(complexintvec)
-        # for arrays with concrete array elype
-        @test Adjoint(intvecvec)[:, :] == Adjoint(intvecvec)
-        @test Transpose(intvecvec)[:, :] == Transpose(intvecvec)
-        @test Adjoint(complexintvecvec)[:, :] == Adjoint(complexintvecvec)
-        @test Transpose(complexintvecvec)[:, :] == Transpose(complexintvecvec)
-    end
-    @testset "getindex(::AdjOrTransVec, ::Colon, ::Int) should preserve wrapper type on result entries" begin
-        # for arrays with concrete scalar eltype
-        @test Adjoint(intvec)[:, 2] == intvec[2:2]
-        @test Transpose(intvec)[:, 2] == intvec[2:2]
-        @test Adjoint(complexintvec)[:, 2] == conj.(complexintvec[2:2])
-        @test Transpose(complexintvec)[:, 2] == complexintvec[2:2]
-        # for arrays with concrete array eltype
-        @test Adjoint(intvecvec)[:, 2] == Adjoint.(intvecvec[2:2])
-        @test Transpose(intvecvec)[:, 2] == Transpose.(intvecvec[2:2])
-        @test Adjoint(complexintvecvec)[:, 2] == Adjoint.(complexintvecvec[2:2])
-        @test Transpose(complexintvecvec)[:, 2] == Transpose.(complexintvecvec[2:2])
-    end
-    @testset "setindex! methods" begin
-        # for vectors with real scalar eltype
-        @test (wv = Adjoint(copy(intvec));
-                wv === setindex!(wv, 3, 2) &&
-                 wv == setindex!(copy(tintvec), 3, 1, 2)    )
-        @test (wv = Transpose(copy(intvec));
-                wv === setindex!(wv, 4, 2) &&
-                 wv == setindex!(copy(tintvec), 4, 1, 2)    )
-        # for matrices with real scalar eltype
-        @test (wA = Adjoint(copy(intmat));
-                wA === setindex!(wA, 7, 3, 1) &&
-                 wA == setindex!(copy(tintmat), 7, 3, 1)    )
-        @test (wA = Transpose(copy(intmat));
-                wA === setindex!(wA, 7, 3, 1) &&
-                 wA == setindex!(copy(tintmat), 7, 3, 1)    )
-        # for vectors with complex scalar eltype
-        @test (wz = Adjoint(copy(complexintvec));
-                wz === setindex!(wz, 3im, 2) &&
-                 wz == setindex!(copy(acomplexintvec), 3im, 1, 2)   )
-        @test (wz = Transpose(copy(complexintvec));
-                wz === setindex!(wz, 4im, 2) &&
-                 wz == setindex!(copy(tcomplexintvec), 4im, 1, 2)   )
-        # for  matrices with complex scalar eltype
-        @test (wZ = Adjoint(copy(complexintmat));
-                wZ === setindex!(wZ, 7im, 3, 1) &&
-                 wZ == setindex!(copy(acomplexintmat), 7im, 3, 1)   )
-        @test (wZ = Transpose(copy(complexintmat));
-                wZ === setindex!(wZ, 7im, 3, 1) &&
-                 wZ == setindex!(copy(tcomplexintmat), 7im, 3, 1)   )
-        # for vectors with concrete real-vector eltype
-        @test (wv = Adjoint(copy(intvecvec));
-                wv === setindex!(wv, Adjoint([5, 6]), 2) &&
-                 wv == setindex!(copy(tintvecvec), [5 6], 2))
-        @test (wv = Transpose(copy(intvecvec));
-                wv === setindex!(wv, Transpose([5, 6]), 2) &&
-                 wv == setindex!(copy(tintvecvec), [5 6], 2))
-        # for matrices with concrete real-matrix eltype
-        @test (wA = Adjoint(copy(intmatmat));
-                wA === setindex!(wA, Adjoint([13 14]), 3, 1) &&
-                 wA == setindex!(copy(tintmatmat), hcat([13, 14]), 3, 1))
-        @test (wA = Transpose(copy(intmatmat));
-                wA === setindex!(wA, Transpose([13 14]), 3, 1) &&
-                 wA == setindex!(copy(tintmatmat), hcat([13, 14]), 3, 1))
-        # for vectors with concrete complex-vector eltype
-        @test (wz = Adjoint(copy(complexintvecvec));
-                wz === setindex!(wz, Adjoint([5im, 6im]), 2) &&
-                 wz == setindex!(copy(acomplexintvecvec), [-5im -6im], 2))
-        @test (wz = Transpose(copy(complexintvecvec));
-                wz === setindex!(wz, Transpose([5im, 6im]), 2) &&
-                 wz == setindex!(copy(tcomplexintvecvec), [5im 6im], 2))
-        # for matrices with concrete complex-matrix eltype
-        @test (wZ = Adjoint(copy(complexintmatmat));
-                wZ === setindex!(wZ, Adjoint([13im 14im]), 3, 1) &&
-                 wZ == setindex!(copy(acomplexintmatmat), hcat([-13im, -14im]), 3, 1))
-        @test (wZ = Transpose(copy(complexintmatmat));
-                wZ === setindex!(wZ, Transpose([13im 14im]), 3, 1) &&
-                 wZ == setindex!(copy(tcomplexintmatmat), hcat([13im, 14im]), 3, 1))
-    end
-end
-
-@testset "Adjoint and Transpose convert methods that convert underlying storage" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    @test convert(Adjoint{Float64,Vector{Float64}}, Adjoint(intvec))::Adjoint{Float64,Vector{Float64}} == Adjoint(intvec)
-    @test convert(Adjoint{Float64,Matrix{Float64}}, Adjoint(intmat))::Adjoint{Float64,Matrix{Float64}} == Adjoint(intmat)
-    @test convert(Transpose{Float64,Vector{Float64}}, Transpose(intvec))::Transpose{Float64,Vector{Float64}} == Transpose(intvec)
-    @test convert(Transpose{Float64,Matrix{Float64}}, Transpose(intmat))::Transpose{Float64,Matrix{Float64}} == Transpose(intmat)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Adjoint and Transpose convert methods to AbstractArray" begin
-    # tests corresponding to #34995
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    statvec = ImmutableArray(intvec)
-    statmat = ImmutableArray(intmat)
-
-    @test convert(AbstractArray{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
-    @test convert(AbstractArray{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
-    @test convert(AbstractArray{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
-    @test convert(AbstractArray{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
-    @test convert(AbstractMatrix{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
-    @test convert(AbstractMatrix{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
-    @test convert(AbstractMatrix{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
-    @test convert(AbstractMatrix{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
-end
-
-@testset "Adjoint and Transpose similar methods" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    # similar with no additional specifications, vector (rewrapping) semantics
-    @test size(similar(Adjoint(intvec))::Adjoint{Int,Vector{Int}}) == size(Adjoint(intvec))
-    @test size(similar(Transpose(intvec))::Transpose{Int,Vector{Int}}) == size(Transpose(intvec))
-    # similar with no additional specifications, matrix (no-rewrapping) semantics
-    @test size(similar(Adjoint(intmat))::Matrix{Int}) == size(Adjoint(intmat))
-    @test size(similar(Transpose(intmat))::Matrix{Int}) == size(Transpose(intmat))
-    # similar with element type specification, vector (rewrapping) semantics
-    @test size(similar(Adjoint(intvec), Float64)::Adjoint{Float64,Vector{Float64}}) == size(Adjoint(intvec))
-    @test size(similar(Transpose(intvec), Float64)::Transpose{Float64,Vector{Float64}}) == size(Transpose(intvec))
-    # similar with element type specification, matrix (no-rewrapping) semantics
-    @test size(similar(Adjoint(intmat), Float64)::Matrix{Float64}) == size(Adjoint(intmat))
-    @test size(similar(Transpose(intmat), Float64)::Matrix{Float64}) == size(Transpose(intmat))
-    # similar with element type and arbitrary dims specifications
-    shape = (2, 2, 2)
-    @test size(similar(Adjoint(intvec), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Adjoint(intmat), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Transpose(intvec), Float64, shape)::Array{Float64,3}) == shape
-    @test size(similar(Transpose(intmat), Float64, shape)::Array{Float64,3}) == shape
-end
-
-@testset "Adjoint and Transpose parent methods" begin
-    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
-    @test parent(Adjoint(intvec)) === intvec
-    @test parent(Adjoint(intmat)) === intmat
-    @test parent(Transpose(intvec)) === intvec
-    @test parent(Transpose(intmat)) === intmat
-end
-
-@testset "Adjoint and Transpose vector vec methods" begin
-    intvec = [1, 2]
-    @test vec(Adjoint(intvec)) === intvec
-    @test vec(Transpose(intvec)) === intvec
-    cvec = [1 + 1im]
-    @test vec(cvec')[1] == cvec[1]'
-    mvec = [[1 2; 3 4+5im]];
-    @test vec(transpose(mvec))[1] == transpose(mvec[1])
-    @test vec(adjoint(mvec))[1] == adjoint(mvec[1])
-end
-
-@testset "horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers" begin
-    # horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers
-    # should preserve the Adjoint/Transpose-wrapper to preserve semantics downstream
-    vec, tvec, avec = [1im, 2im], [1im 2im], [-1im -2im]
-    vecvec = [[1im, 2im], [3im, 4im]]
-    tvecvec = [[[1im 2im]] [[3im 4im]]]
-    avecvec = [[[-1im -2im]] [[-3im -4im]]]
-    # for arrays with concrete scalar eltype
-    @test hcat(Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == hcat(avec, avec)
-    @test hcat(Adjoint(vec), 1, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == hcat(avec, 1, avec)
-    @test hcat(Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == hcat(tvec, tvec)
-    @test hcat(Transpose(vec), 1, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == hcat(tvec, 1, tvec)
-    # for arrays with concrete array eltype
-    @test hcat(Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == hcat(avecvec, avecvec)
-    @test hcat(Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == hcat(tvecvec, tvecvec)
-end
-
-@testset "map/broadcast over Adjoint/Transpose-wrapped vectors and Numbers" begin
-    # map and broadcast over Adjoint/Transpose-wrapped vectors and Numbers
-    # should preserve the Adjoint/Transpose-wrapper to preserve semantics downstream
-    vec, tvec, avec = [1im, 2im], [1im 2im], [-1im -2im]
-    vecvec = [[1im, 2im], [3im, 4im]]
-    tvecvec = [[[1im 2im]] [[3im 4im]]]
-    avecvec = [[[-1im -2im]] [[-3im -4im]]]
-    # unary map over wrapped vectors with concrete scalar eltype
-    @test map(-, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == -avec
-    @test map(-, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == -tvec
-    # unary map over wrapped vectors with concrete array eltype
-    @test map(-, Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -avecvec
-    @test map(-, Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -tvecvec
-    # binary map over wrapped vectors with concrete scalar eltype
-    @test map(+, Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec
-    @test map(+, Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec
-    # binary map over wrapped vectors with concrete array eltype
-    @test map(+, Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == avecvec + avecvec
-    @test map(+, Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == tvecvec + tvecvec
-    # unary broadcast over wrapped vectors with concrete scalar eltype
-    @test broadcast(-, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == -avec
-    @test broadcast(-, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == -tvec
-    # unary broadcast over wrapped vectors with concrete array eltype
-    @test broadcast(-, Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -avecvec
-    @test broadcast(-, Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == -tvecvec
-    # binary broadcast over wrapped vectors with concrete scalar eltype
-    @test broadcast(+, Adjoint(vec), Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec
-    @test broadcast(+, Transpose(vec), Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec
-    # binary broadcast over wrapped vectors with concrete array eltype
-    @test broadcast(+, Adjoint(vecvec), Adjoint(vecvec))::Adjoint{Adjoint{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == avecvec + avecvec
-    @test broadcast(+, Transpose(vecvec), Transpose(vecvec))::Transpose{Transpose{Complex{Int},Vector{Complex{Int}}},Vector{Vector{Complex{Int}}}} == tvecvec + tvecvec
-    # trinary broadcast over wrapped vectors with concrete scalar eltype and numbers
-    @test broadcast(+, Adjoint(vec), 1, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec .+ 1
-    @test broadcast(+, Transpose(vec), 1, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1
-    @test broadcast(+, Adjoint(vec), 1im, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec .+ 1im
-    @test broadcast(+, Transpose(vec), 1im, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1im
-end
-
-@testset "Adjoint/Transpose-wrapped vector multiplication" begin
-    realvec, realmat = [1, 2, 3], [1 2 3; 4 5 6; 7 8 9]
-    complexvec, complexmat = [1im, 2, -3im], [1im 2 3; 4 5 -6im; 7im 8 9]
-    # Adjoint/Transpose-vector * vector
-    @test Adjoint(realvec) * realvec == dot(realvec, realvec)
-    @test Transpose(realvec) * realvec == dot(realvec, realvec)
-    @test Adjoint(complexvec) * complexvec == dot(complexvec, complexvec)
-    @test Transpose(complexvec) * complexvec == dot(conj(complexvec), complexvec)
-    # vector * Adjoint/Transpose-vector
-    @test realvec * Adjoint(realvec) == broadcast(*, realvec, reshape(realvec, (1, 3)))
-    @test realvec * Transpose(realvec) == broadcast(*, realvec, reshape(realvec, (1, 3)))
-    @test complexvec * Adjoint(complexvec) == broadcast(*, complexvec, reshape(conj(complexvec), (1, 3)))
-    @test complexvec * Transpose(complexvec) == broadcast(*, complexvec, reshape(complexvec, (1, 3)))
-    # Adjoint/Transpose-vector * matrix
-    @test (Adjoint(realvec) * realmat)::Adjoint{Int,Vector{Int}} ==
-        reshape(copy(Adjoint(realmat)) * realvec, (1, 3))
-    @test (Transpose(realvec) * realmat)::Transpose{Int,Vector{Int}} ==
-        reshape(copy(Transpose(realmat)) * realvec, (1, 3))
-    @test (Adjoint(complexvec) * complexmat)::Adjoint{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(conj(copy(Adjoint(complexmat)) * complexvec), (1, 3))
-    @test (Transpose(complexvec) * complexmat)::Transpose{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(copy(Transpose(complexmat)) * complexvec, (1, 3))
-    # Adjoint/Transpose-vector * Adjoint/Transpose-matrix
-    @test (Adjoint(realvec) * Adjoint(realmat))::Adjoint{Int,Vector{Int}} ==
-        reshape(realmat * realvec, (1, 3))
-    @test (Transpose(realvec) * Transpose(realmat))::Transpose{Int,Vector{Int}} ==
-        reshape(realmat * realvec, (1, 3))
-    @test (Adjoint(complexvec) * Adjoint(complexmat))::Adjoint{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(conj(complexmat * complexvec), (1, 3))
-    @test (Transpose(complexvec) * Transpose(complexmat))::Transpose{Complex{Int},Vector{Complex{Int}}} ==
-        reshape(complexmat * complexvec, (1, 3))
-end
-
-@testset "Adjoint/Transpose-wrapped vector pseudoinversion" begin
-    realvec, complexvec = [1, 2, 3, 4], [1im, 2, 3im, 4]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 4)), reshape(complexvec, (1, 4))
-    # pinv(Adjoint/Transpose-vector) should match matrix equivalents
-    # TODO tighten type asserts once pinv yields Transpose/Adjoint
-    @test pinv(Adjoint(realvec))::Vector{Float64} ≈ pinv(rowrealvec)
-    @test pinv(Transpose(realvec))::Vector{Float64} ≈ pinv(rowrealvec)
-    @test pinv(Adjoint(complexvec))::Vector{ComplexF64} ≈ pinv(conj(rowcomplexvec))
-    @test pinv(Transpose(complexvec))::Vector{ComplexF64} ≈ pinv(rowcomplexvec)
-end
-
-@testset "Adjoint/Transpose-wrapped vector left-division" begin
-    realvec, complexvec = [1., 2., 3., 4.,], [1.0im, 2., 3.0im, 4.]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 4)), reshape(complexvec, (1, 4))
-    # \(Adjoint/Transpose-vector, Adjoint/Transpose-vector) should mat matrix equivalents
-    @test Adjoint(realvec)\Adjoint(realvec) ≈ rowrealvec\rowrealvec
-    @test Transpose(realvec)\Transpose(realvec) ≈ rowrealvec\rowrealvec
-    @test Adjoint(complexvec)\Adjoint(complexvec) ≈ conj(rowcomplexvec)\conj(rowcomplexvec)
-    @test Transpose(complexvec)\Transpose(complexvec) ≈ rowcomplexvec\rowcomplexvec
-end
-
-@testset "Adjoint/Transpose-wrapped vector right-division" begin
-    realvec, realmat = [1, 2, 3], [1 0 0; 0 2 0; 0 0 3]
-    complexvec, complexmat = [1im, 2, -3im], [2im 0 0; 0 3 0; 0 0 -5im]
-    rowrealvec, rowcomplexvec = reshape(realvec, (1, 3)), reshape(complexvec, (1, 3))
-    # /(Adjoint/Transpose-vector, matrix)
-    @test (Adjoint(realvec) / realmat)::Adjoint ≈ rowrealvec / realmat
-    @test (Adjoint(complexvec) / complexmat)::Adjoint ≈ conj(rowcomplexvec) / complexmat
-    @test (Transpose(realvec) / realmat)::Transpose ≈ rowrealvec / realmat
-    @test (Transpose(complexvec) / complexmat)::Transpose ≈ rowcomplexvec / complexmat
-    # /(Adjoint/Transpose-vector, Adjoint matrix)
-    @test (Adjoint(realvec) / Adjoint(realmat))::Adjoint ≈ rowrealvec / copy(Adjoint(realmat))
-    @test (Adjoint(complexvec) / Adjoint(complexmat))::Adjoint ≈ conj(rowcomplexvec) / copy(Adjoint(complexmat))
-    @test (Transpose(realvec) / Adjoint(realmat))::Transpose ≈ rowrealvec / copy(Adjoint(realmat))
-    @test (Transpose(complexvec) / Adjoint(complexmat))::Transpose ≈ rowcomplexvec / copy(Adjoint(complexmat))
-    # /(Adjoint/Transpose-vector, Transpose matrix)
-    @test (Adjoint(realvec) / Transpose(realmat))::Adjoint ≈ rowrealvec / copy(Transpose(realmat))
-    @test (Adjoint(complexvec) / Transpose(complexmat))::Adjoint ≈ conj(rowcomplexvec) / copy(Transpose(complexmat))
-    @test (Transpose(realvec) / Transpose(realmat))::Transpose ≈ rowrealvec / copy(Transpose(realmat))
-    @test (Transpose(complexvec) / Transpose(complexmat))::Transpose ≈ rowcomplexvec / copy(Transpose(complexmat))
-end
-
-@testset "norm and opnorm of Adjoint/Transpose-wrapped vectors" begin
-    # definitions are in base/linalg/generic.jl
-    realvec, complexvec = [3, -4], [3im, -4im]
-    # one norm result should be sum(abs.(realvec)) == 7
-    # two norm result should be sqrt(sum(abs.(realvec))) == 5
-    # inf norm result should be maximum(abs.(realvec)) == 4
-    for v in (realvec, complexvec)
-        @test norm(Adjoint(v)) ≈ 5
-        @test norm(Adjoint(v), 1) ≈ 7
-        @test norm(Adjoint(v), Inf) ≈ 4
-        @test norm(Transpose(v)) ≈ 5
-        @test norm(Transpose(v), 1) ≈ 7
-        @test norm(Transpose(v), Inf) ≈ 4
-    end
-    # one opnorm result should be maximum(abs.(realvec)) == 4
-    # two opnorm result should be sqrt(sum(abs.(realvec))) == 5
-    # inf opnorm result should be sum(abs.(realvec)) == 7
-    for v in (realvec, complexvec)
-        @test opnorm(Adjoint(v)) ≈ 5
-        @test opnorm(Adjoint(v), 1) ≈ 4
-        @test opnorm(Adjoint(v), Inf) ≈ 7
-        @test opnorm(Transpose(v)) ≈ 5
-        @test opnorm(Transpose(v), 1) ≈ 4
-        @test opnorm(Transpose(v), Inf) ≈ 7
-    end
-end
-
-@testset "adjoint and transpose of Numbers" begin
-    @test adjoint(1) == 1
-    @test adjoint(1.0) == 1.0
-    @test adjoint(1im) == -1im
-    @test adjoint(1.0im) == -1.0im
-    @test transpose(1) == 1
-    @test transpose(1.0) == 1.0
-    @test transpose(1im) == 1im
-    @test transpose(1.0im) == 1.0im
-end
-
-@testset "adjoint!(a, b) return a" begin
-    a = fill(1.0+im, 5)
-    b = fill(1.0+im, 1, 5)
-    @test adjoint!(a, b) === a
-    @test adjoint!(b, a) === b
-end
-
-@testset "aliasing with adjoint and transpose" begin
-    A = collect(reshape(1:25, 5, 5)) .+ rand.().*im
-    B = copy(A)
-    B .= B'
-    @test B == A'
-    B = copy(A)
-    B .= transpose(B)
-    @test B == transpose(A)
-    B = copy(A)
-    B .= B .* B'
-    @test B == A .* A'
-end
-
-@testset "test show methods for $t of Factorizations" for t in (adjoint, transpose)
-    A = randn(ComplexF64, 4, 4)
-    F = lu(A)
-    Fop = t(F)
-    @test sprint(show, Fop) ==
-                  "$t of "*sprint(show, parent(Fop))
-    @test sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
-                  "$t of "*sprint((io, t) -> show(io, MIME"text/plain"(), t), parent(Fop))
-end
-
-@testset "showarg" begin
-    io = IOBuffer()
-
-    A = ones(Float64, 3,3)
-
-    B = Adjoint(A)
-    @test summary(B) == "3×3 adjoint(::Matrix{Float64}) with eltype Float64"
-    @test Base.showarg(io, B, false) === nothing
-    @test String(take!(io)) == "adjoint(::Matrix{Float64})"
-
-    B = Transpose(A)
-    @test summary(B) == "3×3 transpose(::Matrix{Float64}) with eltype Float64"
-    @test Base.showarg(io, B, false) === nothing
-    @test String(take!(io)) == "transpose(::Matrix{Float64})"
-end
-
-@testset "strided transposes" begin
-    for t in (Adjoint, Transpose)
-        @test strides(t(rand(3))) == (3, 1)
-        @test strides(t(rand(3,2))) == (3, 1)
-        @test strides(t(view(rand(3, 2), :))) == (6, 1)
-        @test strides(t(view(rand(3, 2), :, 1:2))) == (3, 1)
-
-        A = rand(3)
-        @test pointer(t(A)) === pointer(A)
-        B = rand(3,1)
-        @test pointer(t(B)) === pointer(B)
-    end
-    @test_throws MethodError strides(Adjoint(rand(3) .+ rand(3).*im))
-    @test_throws MethodError strides(Adjoint(rand(3, 2) .+ rand(3, 2).*im))
-    @test strides(Transpose(rand(3) .+ rand(3).*im)) == (3, 1)
-    @test strides(Transpose(rand(3, 2) .+ rand(3, 2).*im)) == (3, 1)
-
-    C = rand(3) .+ rand(3).*im
-    @test_throws ErrorException pointer(Adjoint(C))
-    @test pointer(Transpose(C)) === pointer(C)
-    D = rand(3,2) .+ rand(3,2).*im
-    @test_throws ErrorException pointer(Adjoint(D))
-    @test pointer(Transpose(D)) === pointer(D)
-end
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-@testset "offset axes" begin
-    s = Base.Slice(-3:3)'
-    @test axes(s) === (Base.OneTo(1), Base.IdentityUnitRange(-3:3))
-    @test collect(LinearIndices(s)) == reshape(1:7, 1, 7)
-    @test collect(CartesianIndices(s)) == reshape([CartesianIndex(1,i) for i = -3:3], 1, 7)
-    @test s[1] == -3
-    @test s[7] ==  3
-    @test s[4] ==  0
-    @test_throws BoundsError s[0]
-    @test_throws BoundsError s[8]
-    @test s[1,-3] == -3
-    @test s[1, 3] ==  3
-    @test s[1, 0] ==  0
-    @test_throws BoundsError s[1,-4]
-    @test_throws BoundsError s[1, 4]
-end
-
-@testset "specialized conj of Adjoint/Transpose" begin
-    realmat = [1 2; 3 4]
-    complexmat = ComplexF64[1+im 2; 3 4-im]
-    nested = [[complexmat] [-complexmat]; [0complexmat] [3complexmat]]
-    @testset "AdjOrTrans{...,$(typeof(i))}" for i in (
-                                                      realmat, vec(realmat),
-                                                      complexmat, vec(complexmat),
-                                                      nested, vec(nested),
-                                                     )
-        for (t,type) in ((transpose, Adjoint), (adjoint, Transpose))
-            M = t(i)
-            @test conj(M) isa type
-            @test conj(M) == conj(collect(M))
-            @test conj(conj(M)) === M
-        end
-    end
-    # test if `conj(transpose(::Hermitian))` is a no-op
-    hermitian = Hermitian([1 2+im; 2-im 3])
-    @test conj(transpose(hermitian)) === hermitian
-end
-
-@testset "empty and mismatched lengths" begin
-    # issue 36678
-    @test_throws DimensionMismatch [1, 2]' * [1,2,3]
-    @test Int[]' * Int[] == 0
-    @test transpose(Int[]) * Int[] == 0
-end
-
-@testset "reductions: $adjtrans" for adjtrans in (transpose, adjoint)
-    for (reduction, reduction!, op) in ((sum, sum!, +), (prod, prod!, *), (minimum, minimum!, min), (maximum, maximum!, max))
-        T = op in (max, min) ? Float64 : ComplexF64
-        mat = rand(T, 3,5)
-        rd1 = zeros(T, 1, 3)
-        rd2 = zeros(T, 5, 1)
-        rd3 = zeros(T, 1, 1)
-        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
-        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
-        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
-        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
-
-        @test reduction!(rd1, adjtrans(mat)) ≈ reduction!(rd1, copy(adjtrans(mat)))
-        @test reduction!(rd2, adjtrans(mat)) ≈ reduction!(rd2, copy(adjtrans(mat)))
-        @test reduction!(rd3, adjtrans(mat)) ≈ reduction!(rd3, copy(adjtrans(mat)))
-
-        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
-        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
-        @test reduction(imag, adjtrans(mat), dims=2) ≈ reduction(imag, copy(adjtrans(mat)), dims=2)
-        @test reduction(imag, adjtrans(mat), dims=(1,2)) ≈ reduction(imag, copy(adjtrans(mat)), dims=(1,2))
-
-        @test Base.mapreducedim!(imag, op, rd1, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd1, copy(adjtrans(mat)))
-        @test Base.mapreducedim!(imag, op, rd2, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd2, copy(adjtrans(mat)))
-        @test Base.mapreducedim!(imag, op, rd3, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd3, copy(adjtrans(mat)))
-
-        op in (max, min) && continue
-        mat = [rand(T,2,2) for _ in 1:3, _ in 1:5]
-        rd1 = fill(zeros(T, 2, 2), 1, 3)
-        rd2 = fill(zeros(T, 2, 2), 5, 1)
-        rd3 = fill(zeros(T, 2, 2), 1, 1)
-        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
-        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
-        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
-        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
-
-        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
-        @test reduction(x -> x[1,2], adjtrans(mat)) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)))
-        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
-        @test reduction(x -> x[1,2], adjtrans(mat), dims=1) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)), dims=1)
-    end
-    # see #46605
-    Ac = [1 2; 3 4]'
-    @test mapreduce(identity, (x, y) -> 10x+y, copy(Ac)) == mapreduce(identity, (x, y) -> 10x+y, Ac) == 1234
-    @test extrema([3,7,4]') == (3, 7)
-    @test mapreduce(x -> [x;;;], +, [1, 2, 3]') == sum(x -> [x;;;], [1, 2, 3]') == [6;;;]
-    @test mapreduce(string, *, [1 2; 3 4]') == mapreduce(string, *, copy([1 2; 3 4]')) == "1234"
-end
-
-@testset "trace" begin
-    for T in (Float64, ComplexF64), t in (adjoint, transpose)
-        A = randn(T, 10, 10)
-        @test tr(t(A)) == tr(copy(t(A))) == t(tr(A))
-    end
-end
-
-@testset "structured printing" begin
-    D = Diagonal(1:3)
-    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D)
-    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
-    D = Diagonal((1:3)*im)
-    D2 = Diagonal((1:3)*(-im))
-    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
-    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D2)
-
-    struct OneHotVecOrMat{N} <: AbstractArray{Bool,N}
-        inds::NTuple{N,Int}
-        sz::NTuple{N,Int}
-    end
-    Base.size(x::OneHotVecOrMat) = x.sz
-    function Base.getindex(x::OneHotVecOrMat{N}, inds::Vararg{Int,N}) where {N}
-        checkbounds(x, inds...)
-        inds == x.inds
-    end
-    Base.replace_in_print_matrix(o::OneHotVecOrMat{1}, i::Integer, j::Integer, s::AbstractString) =
-        o.inds == (i,) ? s : Base.replace_with_centered_mark(s)
-    Base.replace_in_print_matrix(o::OneHotVecOrMat{2}, i::Integer, j::Integer, s::AbstractString) =
-        o.inds == (i,j) ? s : Base.replace_with_centered_mark(s)
-
-    o = OneHotVecOrMat((2,), (4,))
-    @test sprint(Base.print_matrix, Transpose(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
-    @test sprint(Base.print_matrix, Adjoint(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
-end
-
-end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/ambiguous_exec.jl b/stdlib/LinearAlgebra/test/ambiguous_exec.jl
deleted file mode 100644
index 7b89c0a457afb..0000000000000
--- a/stdlib/LinearAlgebra/test/ambiguous_exec.jl
+++ /dev/null
@@ -1,21 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, LinearAlgebra
-let ambig = detect_ambiguities(LinearAlgebra; recursive=true)
-    @test isempty(ambig)
-    ambig = Set{Any}(((m1.sig, m2.sig) for (m1, m2) in ambig))
-    expect = []
-    good = true
-    while !isempty(ambig)
-        sigs = pop!(ambig)
-        i = findfirst(==(sigs), expect)
-        if i === nothing
-            println(stderr, "push!(expect, (", sigs[1], ", ", sigs[2], "))")
-            good = false
-            continue
-        end
-        deleteat!(expect, i)
-    end
-    @test isempty(expect)
-    @test good
-end
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
deleted file mode 100644
index d13009780b975..0000000000000
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ /dev/null
@@ -1,825 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBidiagonal
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasReal, BlasFloat
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-include("testutils.jl") # test_approx_eq_modphase
-
-n = 10 #Size of test matrix
-Random.seed!(1)
-
-@testset for relty in (Int, Float32, Float64, BigFloat), elty in (relty, Complex{relty})
-    if relty <: AbstractFloat
-        dv = convert(Vector{elty}, randn(n))
-        ev = convert(Vector{elty}, randn(n-1))
-        if (elty <: Complex)
-            dv += im*convert(Vector{elty}, randn(n))
-            ev += im*convert(Vector{elty}, randn(n-1))
-        end
-    elseif relty <: Integer
-        dv = convert(Vector{elty}, rand(1:10, n))
-        ev = convert(Vector{elty}, rand(1:10, n-1))
-        if (elty <: Complex)
-            dv += im*convert(Vector{elty}, rand(1:10, n))
-            ev += im*convert(Vector{elty}, rand(1:10, n-1))
-        end
-    end
-    dv0 = zeros(elty, 0)
-    ev0 = zeros(elty, 0)
-
-    @testset "Constructors" begin
-        for (x, y) in ((dv0, ev0), (dv, ev), (GenericArray(dv), GenericArray(ev)))
-            # from vectors
-            ubd = Bidiagonal(x, y, :U)
-            lbd = Bidiagonal(x, y, :L)
-            @test ubd != lbd || x === dv0
-            @test ubd.dv === x
-            @test lbd.ev === y
-            @test_throws ArgumentError Bidiagonal(x, y, :R)
-            @test_throws ArgumentError Bidiagonal(x, y, 'R')
-            x == dv0 || @test_throws DimensionMismatch Bidiagonal(x, x, :U)
-            @test_throws MethodError Bidiagonal(x, y)
-            # from matrix
-            @test Bidiagonal(ubd, :U) == Bidiagonal(Matrix(ubd), :U) == ubd
-            @test Bidiagonal(lbd, :L) == Bidiagonal(Matrix(lbd), :L) == lbd
-            # from its own type
-            @test typeof(ubd)(ubd) === ubd
-            @test typeof(lbd)(lbd) === lbd
-        end
-        @test eltype(Bidiagonal{elty}([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == elty
-        @test eltype(Bidiagonal([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == Float32 # promotion test
-        @test isa(Bidiagonal{elty,Vector{elty}}(GenericArray(dv), ev, :U), Bidiagonal{elty,Vector{elty}})
-        @test_throws MethodError Bidiagonal(dv, GenericArray(ev), :U)
-        @test_throws MethodError Bidiagonal(GenericArray(dv), ev, :U)
-        BI = Bidiagonal([1,2,3,4], [1,2,3], :U)
-        @test Bidiagonal(BI) === BI
-        @test isa(Bidiagonal{elty}(BI), Bidiagonal{elty})
-    end
-
-    @testset "getindex, setindex!, size, and similar" begin
-        ubd = Bidiagonal(dv, ev, :U)
-        lbd = Bidiagonal(dv, ev, :L)
-        # bidiagonal getindex / upper & lower
-        @test_throws BoundsError ubd[n + 1, 1]
-        @test_throws BoundsError ubd[1, n + 1]
-        @test ubd[2, 2] == dv[2]
-        # bidiagonal getindex / upper
-        @test ubd[2, 3] == ev[2]
-        @test iszero(ubd[3, 2])
-        # bidiagonal getindex / lower
-        @test lbd[3, 2] == ev[2]
-        @test iszero(lbd[2, 3])
-        # bidiagonal setindex! / upper
-        cubd = copy(ubd)
-        @test_throws ArgumentError ubd[2, 1] = 1
-        @test_throws ArgumentError ubd[3, 1] = 1
-        @test (cubd[2, 1] = 0; cubd == ubd)
-        @test ((cubd[1, 2] = 10) == 10; cubd[1, 2] == 10)
-        # bidiagonal setindex! / lower
-        clbd = copy(lbd)
-        @test_throws ArgumentError lbd[1, 2] = 1
-        @test_throws ArgumentError lbd[1, 3] = 1
-        @test (clbd[1, 2] = 0; clbd == lbd)
-        @test ((clbd[2, 1] = 10) == 10; clbd[2, 1] == 10)
-        # bidiagonal setindex! / upper & lower
-        @test_throws BoundsError ubd[n + 1, 1] = 1
-        @test_throws BoundsError ubd[1, n + 1] = 1
-        @test ((cubd[2, 2] = 10) == 10; cubd[2, 2] == 10)
-        # bidiagonal size
-        @test_throws ArgumentError size(ubd, 0)
-        @test size(ubd, 1) == size(ubd, 2) == n
-        @test size(ubd, 3) == 1
-        # bidiagonal similar
-        @test isa(similar(ubd), Bidiagonal{elty})
-        @test similar(ubd).uplo == ubd.uplo
-        @test isa(similar(ubd, Int), Bidiagonal{Int})
-        @test similar(ubd, Int).uplo == ubd.uplo
-        @test isa(similar(ubd, (3, 2)), Matrix)
-        @test isa(similar(ubd, Int, (3, 2)), Matrix{Int})
-
-        # setindex! when off diagonal is zero bug
-        Bu = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'U')
-        Bl = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'L')
-        @test_throws ArgumentError Bu[5, 4] = 1
-        @test_throws ArgumentError Bl[4, 5] = 1
-    end
-
-    @testset "show" begin
-        BD = Bidiagonal(dv, ev, :U)
-        dstring = sprint(Base.print_matrix,BD.dv')
-        estring = sprint(Base.print_matrix,BD.ev')
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n super:$estring"
-        BD = Bidiagonal(dv,ev,:L)
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n sub:$estring"
-    end
-
-    @testset for uplo in (:U, :L)
-        T = Bidiagonal(dv, ev, uplo)
-
-        @testset "Constructor and basic properties" begin
-            @test size(T, 1) == size(T, 2) == n
-            @test size(T) == (n, n)
-            @test Array(T) == diagm(0 => dv, (uplo === :U ? 1 : -1) => ev)
-            @test Bidiagonal(Array(T), uplo) == T
-            @test big.(T) == T
-            @test Array(abs.(T)) == abs.(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-            @test Array(real(T)) == real(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-            @test Array(imag(T)) == imag(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
-        end
-
-        @testset for func in (conj, transpose, adjoint)
-            @test func(func(T)) == T
-        end
-
-        @testset "permutedims(::Bidiagonal)" begin
-            @test permutedims(permutedims(T)) === T
-            @test permutedims(T) == transpose.(transpose(T))
-            @test permutedims(T, [1, 2]) === T
-            @test permutedims(T, (2, 1)) == permutedims(T)
-        end
-
-        @testset "triu and tril" begin
-            zerosdv = zeros(elty, length(dv))
-            zerosev = zeros(elty, length(ev))
-            bidiagcopy(dv, ev, uplo) = Bidiagonal(copy(dv), copy(ev), uplo)
-
-            @test istril(Bidiagonal(dv,ev,:L))
-            @test istril(Bidiagonal(dv,ev,:L), 1)
-            @test !istril(Bidiagonal(dv,ev,:L), -1)
-            @test istril(Bidiagonal(zerosdv,ev,:L), -1)
-            @test !istril(Bidiagonal(zerosdv,ev,:L), -2)
-            @test istril(Bidiagonal(zerosdv,zerosev,:L), -2)
-            @test !istril(Bidiagonal(dv,ev,:U))
-            @test istril(Bidiagonal(dv,ev,:U), 1)
-            @test !istril(Bidiagonal(dv,ev,:U), -1)
-            @test !istril(Bidiagonal(zerosdv,ev,:U), -1)
-            @test istril(Bidiagonal(zerosdv,zerosev,:U), -1)
-            @test tril!(bidiagcopy(dv,ev,:U),-1) == Bidiagonal(zerosdv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),-1) == Bidiagonal(zerosdv,ev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U),-2) == Bidiagonal(zerosdv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),-2) == Bidiagonal(zerosdv,zerosev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U),1)  == Bidiagonal(dv,ev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L),1)  == Bidiagonal(dv,ev,:L)
-            @test tril!(bidiagcopy(dv,ev,:U))    == Bidiagonal(dv,zerosev,:U)
-            @test tril!(bidiagcopy(dv,ev,:L))    == Bidiagonal(dv,ev,:L)
-            @test_throws ArgumentError tril!(bidiagcopy(dv, ev, :U), -n - 2)
-            @test_throws ArgumentError tril!(bidiagcopy(dv, ev, :U), n)
-
-            @test istriu(Bidiagonal(dv,ev,:U))
-            @test istriu(Bidiagonal(dv,ev,:U), -1)
-            @test !istriu(Bidiagonal(dv,ev,:U), 1)
-            @test istriu(Bidiagonal(zerosdv,ev,:U), 1)
-            @test !istriu(Bidiagonal(zerosdv,ev,:U), 2)
-            @test istriu(Bidiagonal(zerosdv,zerosev,:U), 2)
-            @test !istriu(Bidiagonal(dv,ev,:L))
-            @test istriu(Bidiagonal(dv,ev,:L), -1)
-            @test !istriu(Bidiagonal(dv,ev,:L), 1)
-            @test !istriu(Bidiagonal(zerosdv,ev,:L), 1)
-            @test istriu(Bidiagonal(zerosdv,zerosev,:L), 1)
-            @test triu!(bidiagcopy(dv,ev,:L),1)  == Bidiagonal(zerosdv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U),1)  == Bidiagonal(zerosdv,ev,:U)
-            @test triu!(bidiagcopy(dv,ev,:U),2)  == Bidiagonal(zerosdv,zerosev,:U)
-            @test triu!(bidiagcopy(dv,ev,:L),2)  == Bidiagonal(zerosdv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U),-1) == Bidiagonal(dv,ev,:U)
-            @test triu!(bidiagcopy(dv,ev,:L),-1) == Bidiagonal(dv,ev,:L)
-            @test triu!(bidiagcopy(dv,ev,:L))    == Bidiagonal(dv,zerosev,:L)
-            @test triu!(bidiagcopy(dv,ev,:U))    == Bidiagonal(dv,ev,:U)
-            @test_throws ArgumentError triu!(bidiagcopy(dv, ev, :U), -n)
-            @test_throws ArgumentError triu!(bidiagcopy(dv, ev, :U), n + 2)
-            @test !isdiag(Bidiagonal(dv,ev,:U))
-            @test !isdiag(Bidiagonal(dv,ev,:L))
-            @test isdiag(Bidiagonal(dv,zerosev,:U))
-            @test isdiag(Bidiagonal(dv,zerosev,:L))
-        end
-
-        @testset "iszero and isone" begin
-            for uplo in (:U, :L)
-                BDzero = Bidiagonal(zeros(elty, 10), zeros(elty, 9), uplo)
-                BDone = Bidiagonal(ones(elty, 10), zeros(elty, 9), uplo)
-                BDmix = Bidiagonal(zeros(elty, 10), zeros(elty, 9), uplo)
-                BDmix[end,end] = one(elty)
-
-                @test iszero(BDzero)
-                @test !isone(BDzero)
-                @test !iszero(BDone)
-                @test isone(BDone)
-                @test !iszero(BDmix)
-                @test !isone(BDmix)
-            end
-        end
-
-        @testset "trace" begin
-            for uplo in (:U, :L)
-                B = Bidiagonal(dv, ev, uplo)
-                if relty <: Integer
-                    @test tr(B) == tr(Matrix(B))
-                else
-                    @test tr(B) ≈ tr(Matrix(B)) rtol=2eps(relty)
-                end
-            end
-        end
-
-        Tfull = Array(T)
-        @testset "Linear solves" begin
-            if relty <: AbstractFloat
-                c = convert(Matrix{elty}, randn(n,n))
-                b = convert(Matrix{elty}, randn(n, 2))
-                if (elty <: Complex)
-                    b += im*convert(Matrix{elty}, randn(n, 2))
-                end
-            elseif relty <: Integer
-                c = convert(Matrix{elty}, rand(1:10, n, n))
-                b = convert(Matrix{elty}, rand(1:10, n, 2))
-                if (elty <: Complex)
-                    b += im*convert(Matrix{elty}, rand(1:10, n, 2))
-                end
-            end
-            condT = cond(map(ComplexF64,Tfull))
-            promty = typeof((zero(relty)*zero(relty) + zero(relty)*zero(relty))/one(relty))
-            if relty != BigFloat
-                x = transpose(T)\transpose(c)
-                tx = transpose(Tfull) \ transpose(c)
-                elty <: AbstractFloat && @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch transpose(T)\transpose(b)
-                x = T'\copy(transpose(c))
-                tx = Tfull'\copy(transpose(c))
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch T'\copy(transpose(b))
-                x = T\transpose(c)
-                tx = Tfull\transpose(c)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @test_throws DimensionMismatch T\transpose(b)
-            end
-            offsizemat = Matrix{elty}(undef, n+1, 2)
-            @test_throws DimensionMismatch T \ offsizemat
-            @test_throws DimensionMismatch transpose(T) \ offsizemat
-            @test_throws DimensionMismatch T' \ offsizemat
-
-            if elty <: BigFloat
-                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
-                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
-            end
-            let bb = b, cc = c
-                for atype in ("Array", "SubArray")
-                    if atype == "Array"
-                        b = bb
-                        c = cc
-                    else
-                        b = view(bb, 1:n)
-                        c = view(cc, 1:n, 1:2)
-                    end
-                end
-                x = T \ b
-                tx = Tfull \ b
-                @test_throws DimensionMismatch ldiv!(T, Vector{elty}(undef, n+1))
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = transpose(T) \ b
-                tx = transpose(Tfull) \ b
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = copy(transpose(b)) / T
-                tx = copy(transpose(b)) / Tfull
-                @test_throws DimensionMismatch rdiv!(Matrix{elty}(undef, 1, n+1), T)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = copy(transpose(b)) / transpose(T)
-                tx = copy(transpose(b)) / transpose(Tfull)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                @testset "Generic Mat-vec ops" begin
-                    @test T*b ≈ Tfull*b
-                    @test T'*b ≈ Tfull'*b
-                    if relty != BigFloat # not supported by pivoted QR
-                        @test T/b' ≈ Tfull/b'
-                    end
-                end
-            end
-            zdv = Vector{elty}(undef, 0)
-            zev = Vector{elty}(undef, 0)
-            zA  = Bidiagonal(zdv, zev, :U)
-            zb  = Vector{elty}(undef, 0)
-            @test ldiv!(zA, zb) === zb
-            @testset "linear solves with abstract matrices" begin
-                diag = b[:,1]
-                D = Diagonal(diag)
-                x = T \ D
-                tx = Tfull \ D
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = D / T
-                tx = D / Tfull
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = transpose(T) \ D
-                tx = transpose(Tfull) \ D
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-                x = D / transpose(T)
-                tx = D / transpose(Tfull)
-                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
-            end
-            @testset "Specialized multiplication/division" begin
-                getval(x) = x
-                getval(x::Furlong) = x.val
-                function _bidiagdivmultest(T,
-                        x,
-                        typemul=T.uplo == 'U' ? UpperTriangular : Matrix,
-                        typediv=T.uplo == 'U' ? UpperTriangular : Matrix,
-                        typediv2=T.uplo == 'U' ? UpperTriangular : Matrix)
-                    TM = Matrix(T)
-                    @test map(getval, (T*x)::typemul) ≈ map(getval, TM*x)
-                    @test map(getval, (x*T)::typemul) ≈ map(getval, x*TM)
-                    @test map(getval, (x\T)::typediv) ≈ map(getval, x\TM)
-                    @test map(getval, (T/x)::typediv) ≈ map(getval, TM/x)
-                    if !isa(x, Number)
-                        @test map(getval, Array((T\x)::typediv2)) ≈ map(getval, Array(TM\x))
-                        @test map(getval, Array((x/T)::typediv2)) ≈ map(getval, Array(x/TM))
-                    end
-                    return nothing
-                end
-                A = Matrix(T)
-                for t in (T, Furlong.(T)), (A, dv, ev) in ((A, dv, ev), (Furlong.(A), Furlong.(dv), Furlong.(ev)))
-                    _bidiagdivmultest(t, 5, Bidiagonal, Bidiagonal)
-                    _bidiagdivmultest(t, 5I, Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
-                    _bidiagdivmultest(t, Diagonal(dv), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
-                    _bidiagdivmultest(t, UpperTriangular(A))
-                    _bidiagdivmultest(t, UnitUpperTriangular(A))
-                    _bidiagdivmultest(t, LowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
-                    _bidiagdivmultest(t, UnitLowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :U), Matrix, Matrix, Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :L), Matrix, Matrix, Matrix)
-                end
-            end
-        end
-
-        if elty <: BlasReal
-            @testset "$f" for f in (floor, trunc, round, ceil)
-                @test (f.(Int, T))::Bidiagonal == Bidiagonal(f.(Int, T.dv), f.(Int, T.ev), T.uplo)
-                @test (f.(T))::Bidiagonal == Bidiagonal(f.(T.dv), f.(T.ev), T.uplo)
-            end
-        end
-
-        @testset "diag" begin
-            @test (@inferred diag(T))::typeof(dv) == dv
-            @test (@inferred diag(T, uplo === :U ? 1 : -1))::typeof(dv) == ev
-            @test (@inferred diag(T,2))::typeof(dv) == zeros(elty, n-2)
-            @test_throws ArgumentError diag(T, -n - 1)
-            @test_throws ArgumentError diag(T,  n + 1)
-            # test diag with another wrapped vector type
-            gdv, gev = GenericArray(dv), GenericArray(ev)
-            G = Bidiagonal(gdv, gev, uplo)
-            @test (@inferred diag(G))::typeof(gdv) == gdv
-            @test (@inferred diag(G, uplo === :U ? 1 : -1))::typeof(gdv) == gev
-            @test (@inferred diag(G,2))::typeof(gdv) == GenericArray(zeros(elty, n-2))
-        end
-
-        @testset "Eigensystems" begin
-            if relty <: AbstractFloat
-                d1, v1 = eigen(T)
-                d2, v2 = eigen(map(elty<:Complex ? ComplexF64 : Float64,Tfull), sortby=nothing)
-                @test (uplo === :U ? d1 : reverse(d1)) ≈ d2
-                if elty <: Real
-                    test_approx_eq_modphase(v1, uplo === :U ? v2 : v2[:,n:-1:1])
-                end
-            end
-        end
-
-        @testset "Singular systems" begin
-            if (elty <: BlasReal)
-                @test AbstractArray(svd(T)) ≈ AbstractArray(svd!(copy(Tfull)))
-                @test svdvals(Tfull) ≈ svdvals(T)
-                u1, d1, v1 = svd(Tfull)
-                u2, d2, v2 = svd(T)
-                @test d1 ≈ d2
-                if elty <: Real
-                    test_approx_eq_modphase(u1, u2)
-                    test_approx_eq_modphase(copy(v1), copy(v2))
-                end
-                @test 0 ≈ norm(u2*Diagonal(d2)*v2'-Tfull) atol=n*max(n^2*eps(relty),norm(u1*Diagonal(d1)*v1'-Tfull))
-                @inferred svdvals(T)
-                @inferred svd(T)
-            end
-        end
-
-        @testset "Binary operations" begin
-            @test -T == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test convert(elty,-1.0) * T == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test T / convert(elty,-1.0) == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @test T * convert(elty,-1.0) == Bidiagonal(-T.dv,-T.ev,T.uplo)
-            @testset for uplo2 in (:U, :L)
-                dv = convert(Vector{elty}, relty <: AbstractFloat ? randn(n) : rand(1:10, n))
-                ev = convert(Vector{elty}, relty <: AbstractFloat ? randn(n-1) : rand(1:10, n-1))
-                T2 = Bidiagonal(dv, ev, uplo2)
-                Tfull2 = Array(T2)
-                for op in (+, -, *)
-                    @test Array(op(T, T2)) ≈ op(Tfull, Tfull2)
-                end
-            end
-            # test pass-through of mul! for SymTridiagonal*Bidiagonal
-            TriSym = SymTridiagonal(T.dv, T.ev)
-            @test Array(TriSym*T) ≈ Array(TriSym)*Array(T)
-            # test pass-through of mul! for AbstractTriangular*Bidiagonal
-            Tri = UpperTriangular(diagm(1 => T.ev))
-            Dia = Diagonal(T.dv)
-            @test Array(Tri*T) ≈ Array(Tri)*Array(T)
-            # test mul! itself for these types
-            for AA in (Tri, Dia)
-                for f in (identity, transpose, adjoint)
-                    C = rand(elty, n, n)
-                    D = copy(C) + 2.0 * Array(f(AA) * T)
-                    mul!(C, f(AA), T, 2.0, 1.0) ≈ D
-                end
-            end
-            # test mul! for BiTrySym * adjoint/transpose AbstractMat
-            for f in (identity, transpose, adjoint)
-                C = relty == Int ? rand(float(elty), n, n) : rand(elty, n, n)
-                B = rand(elty, n, n)
-                D = copy(C) + 2.0 * Array(T*f(B))
-                mul!(C, T, f(B), 2.0, 1.0) ≈ D
-            end
-
-            # Issue #31870
-            # Bi/Tri/Sym times Diagonal
-            Diag = Diagonal(rand(elty, 10))
-            BidiagU = Bidiagonal(rand(elty, 10), rand(elty, 9), 'U')
-            BidiagL = Bidiagonal(rand(elty, 10), rand(elty, 9), 'L')
-            Tridiag = Tridiagonal(rand(elty, 9), rand(elty, 10), rand(elty, 9))
-            SymTri = SymTridiagonal(rand(elty, 10), rand(elty, 9))
-
-            mats = Any[Diag, BidiagU, BidiagL, Tridiag, SymTri]
-            for a in mats
-                for b in mats
-                    @test a*b ≈ Matrix(a)*Matrix(b)
-                end
-            end
-
-            @test typeof(BidiagU*Diag) <: Bidiagonal
-            @test typeof(BidiagL*Diag) <: Bidiagonal
-            @test typeof(Tridiag*Diag) <: Tridiagonal
-            @test typeof(SymTri*Diag)  <: Tridiagonal
-
-            @test typeof(BidiagU*Diag) <: Bidiagonal
-            @test typeof(Diag*BidiagL) <: Bidiagonal
-            @test typeof(Diag*Tridiag) <: Tridiagonal
-            @test typeof(Diag*SymTri)  <: Tridiagonal
-        end
-
-        @test inv(T)*Tfull ≈ Matrix(I, n, n)
-        @test factorize(T) === T
-    end
-    BD = Bidiagonal(dv, ev, :U)
-    @test Matrix{ComplexF64}(BD) == BD
-end
-
-# Issue 10742 and similar
-let A = Bidiagonal([1,2,3], [0,0], :U)
-    @test istril(A)
-    @test isdiag(A)
-end
-
-# test construct from range
-@test Bidiagonal(1:3, 1:2, :U) == [1 1 0; 0 2 2; 0 0 3]
-
-@testset "promote_rule" begin
-    A = Bidiagonal(fill(1f0,10),fill(1f0,9),:U)
-    B = rand(Float64,10,10)
-    C = Tridiagonal(rand(Float64,9),rand(Float64,10),rand(Float64,9))
-    @test promote_rule(Matrix{Float64}, Bidiagonal{Float64}) == Matrix{Float64}
-    @test promote(B,A) == (B, convert(Matrix{Float64}, A))
-    @test promote(B,A) isa Tuple{Matrix{Float64}, Matrix{Float64}}
-    @test promote(C,A) == (C,Tridiagonal(zeros(Float64,9),convert(Vector{Float64},A.dv),convert(Vector{Float64},A.ev)))
-    @test promote(C,A) isa Tuple{Tridiagonal, Tridiagonal}
-end
-
-using LinearAlgebra: fillstored!, UnitLowerTriangular
-@testset "fill! and fillstored!" begin
-    let # fillstored!
-        A = Tridiagonal(randn(2), randn(3), randn(2))
-        @test fillstored!(A, 3) == Tridiagonal([3, 3], [3, 3, 3], [3, 3])
-        B = Bidiagonal(randn(3), randn(2), :U)
-        @test fillstored!(B, 2) == Bidiagonal([2,2,2], [2,2], :U)
-        S = SymTridiagonal(randn(3), randn(2))
-        @test fillstored!(S, 1) == SymTridiagonal([1,1,1], [1,1])
-        Ult = UnitLowerTriangular(randn(3,3))
-        @test fillstored!(Ult, 3) == UnitLowerTriangular([1 0 0; 3 1 0; 3 3 1])
-    end
-    let # fill!(exotic, 0)
-        exotic_arrays = Any[Tridiagonal(randn(3), randn(4), randn(3)),
-        Bidiagonal(randn(3), randn(2), rand([:U,:L])),
-        SymTridiagonal(randn(3), randn(2)),
-        Diagonal(randn(5)),
-        # LowerTriangular(randn(3,3)), # AbstractTriangular fill! deprecated, see below
-        # UpperTriangular(randn(3,3)) # AbstractTriangular fill! deprecated, see below
-        ]
-        for A in exotic_arrays
-            @test iszero(fill!(A, 0))
-        end
-
-        # Diagonal fill! is no longer deprecated. See #29780
-        # AbstractTriangular fill! was defined as fillstored!,
-        # not matching the general behavior of fill!, and so it has been deprecated.
-        # In a future dev cycle, this fill! methods should probably be reintroduced
-        # with behavior matching that of fill! for other structured matrix types.
-        # In the interim, equivalently test fillstored! below
-        @test iszero(fillstored!(Diagonal(fill(1, 3)), 0))
-        @test iszero(fillstored!(LowerTriangular(fill(1, 3, 3)), 0))
-        @test iszero(fillstored!(UpperTriangular(fill(1, 3, 3)), 0))
-    end
-    let # fill!(small, x)
-        val = randn()
-        b = Bidiagonal(randn(1,1), :U)
-        st = SymTridiagonal(randn(1,1))
-        d = Diagonal(rand(1))
-        for x in (b, st, d)
-            @test Array(fill!(x, val)) == fill!(Array(x), val)
-        end
-        b = Bidiagonal(randn(2,2), :U)
-        st = SymTridiagonal(randn(3), randn(2))
-        t = Tridiagonal(randn(3,3))
-        d = Diagonal(rand(3))
-        for x in (b, t, st, d)
-            @test_throws ArgumentError fill!(x, val)
-            @test Array(fill!(x, 0)) == fill!(Array(x), 0)
-        end
-    end
-end
-
-@testset "pathological promotion (#24707)" begin
-    @test promote_type(Matrix{Int}, Bidiagonal{Tuple{S}} where S<:Integer) <: Matrix
-    @test promote_type(Matrix{Tuple{T}} where T<:Integer, Bidiagonal{Tuple{S}} where S<:Integer) <: Matrix
-    @test promote_type(Matrix{Tuple{T}} where T<:Integer, Bidiagonal{Int}) <: Matrix
-    @test promote_type(Tridiagonal{Int}, Bidiagonal{Tuple{S}} where S<:Integer) <: Tridiagonal
-    @test promote_type(Tridiagonal{Tuple{T}} where T<:Integer, Bidiagonal{Tuple{S}} where S<:Integer) <: Tridiagonal
-    @test promote_type(Tridiagonal{Tuple{T}} where T<:Integer, Bidiagonal{Int}) <: Tridiagonal
-end
-
-@testset "solve with matrix elements" begin
-    A = triu(tril(randn(9, 9), 3), -3)
-    b = randn(9)
-    Alb = Bidiagonal(Any[tril(A[1:3,1:3]), tril(A[4:6,4:6]), tril(A[7:9,7:9])],
-                     Any[triu(A[4:6,1:3]), triu(A[7:9,4:6])], 'L')
-    Aub = Bidiagonal(Any[triu(A[1:3,1:3]), triu(A[4:6,4:6]), triu(A[7:9,7:9])],
-                     Any[tril(A[1:3,4:6]), tril(A[4:6,7:9])], 'U')
-    bb = Any[b[1:3], b[4:6], b[7:9]]
-    @test vcat((Alb\bb)...) ≈ LowerTriangular(A)\b
-    @test vcat((Aub\bb)...) ≈ UpperTriangular(A)\b
-    Alb = Bidiagonal([tril(A[1:3,1:3]), tril(A[4:6,4:6]), tril(A[7:9,7:9])],
-                     [triu(A[4:6,1:3]), triu(A[7:9,4:6])], 'L')
-    Aub = Bidiagonal([triu(A[1:3,1:3]), triu(A[4:6,4:6]), triu(A[7:9,7:9])],
-                     [tril(A[1:3,4:6]), tril(A[4:6,7:9])], 'U')
-    d = [randn(3,3) for _ in 1:3]
-    dl = [randn(3,3) for _ in 1:2]
-    B = [randn(3,3) for _ in 1:3, _ in 1:3]
-    for W in (UpperTriangular, LowerTriangular), t in (identity, adjoint, transpose)
-        @test Matrix(t(Alb) \ W(B)) ≈ t(Alb) \ Matrix(W(B))
-        @test Matrix(t(Aub) \ W(B)) ≈ t(Aub) \ Matrix(W(B))
-        @test Matrix(W(B) / t(Alb)) ≈ Matrix(W(B)) / t(Alb)
-        @test Matrix(W(B) / t(Aub)) ≈ Matrix(W(B)) / t(Aub)
-    end
-end
-
-@testset "sum, mapreduce" begin
-    Bu = Bidiagonal([1,2,3], [1,2], :U)
-    Budense = Matrix(Bu)
-    Bl = Bidiagonal([1,2,3], [1,2], :L)
-    Bldense = Matrix(Bl)
-    @test sum(Bu) == 9
-    @test sum(Bl) == 9
-    @test_throws ArgumentError sum(Bu, dims=0)
-    @test sum(Bu, dims=1) == sum(Budense, dims=1)
-    @test sum(Bu, dims=2) == sum(Budense, dims=2)
-    @test sum(Bu, dims=3) == sum(Budense, dims=3)
-    @test typeof(sum(Bu, dims=1)) == typeof(sum(Budense, dims=1))
-    @test mapreduce(one, min, Bu, dims=1) == mapreduce(one, min, Budense, dims=1)
-    @test mapreduce(one, min, Bu, dims=2) == mapreduce(one, min, Budense, dims=2)
-    @test mapreduce(one, min, Bu, dims=3) == mapreduce(one, min, Budense, dims=3)
-    @test typeof(mapreduce(one, min, Bu, dims=1)) == typeof(mapreduce(one, min, Budense, dims=1))
-    @test mapreduce(zero, max, Bu, dims=1) == mapreduce(zero, max, Budense, dims=1)
-    @test mapreduce(zero, max, Bu, dims=2) == mapreduce(zero, max, Budense, dims=2)
-    @test mapreduce(zero, max, Bu, dims=3) == mapreduce(zero, max, Budense, dims=3)
-    @test typeof(mapreduce(zero, max, Bu, dims=1)) == typeof(mapreduce(zero, max, Budense, dims=1))
-    @test_throws ArgumentError sum(Bl, dims=0)
-    @test sum(Bl, dims=1) == sum(Bldense, dims=1)
-    @test sum(Bl, dims=2) == sum(Bldense, dims=2)
-    @test sum(Bl, dims=3) == sum(Bldense, dims=3)
-    @test typeof(sum(Bl, dims=1)) == typeof(sum(Bldense, dims=1))
-    @test mapreduce(one, min, Bl, dims=1) == mapreduce(one, min, Bldense, dims=1)
-    @test mapreduce(one, min, Bl, dims=2) == mapreduce(one, min, Bldense, dims=2)
-    @test mapreduce(one, min, Bl, dims=3) == mapreduce(one, min, Bldense, dims=3)
-    @test typeof(mapreduce(one, min, Bl, dims=1)) == typeof(mapreduce(one, min, Bldense, dims=1))
-    @test mapreduce(zero, max, Bl, dims=1) == mapreduce(zero, max, Bldense, dims=1)
-    @test mapreduce(zero, max, Bl, dims=2) == mapreduce(zero, max, Bldense, dims=2)
-    @test mapreduce(zero, max, Bl, dims=3) == mapreduce(zero, max, Bldense, dims=3)
-    @test typeof(mapreduce(zero, max, Bl, dims=1)) == typeof(mapreduce(zero, max, Bldense, dims=1))
-
-    Bu = Bidiagonal([2], Int[], :U)
-    Budense = Matrix(Bu)
-    Bl = Bidiagonal([2], Int[], :L)
-    Bldense = Matrix(Bl)
-    @test sum(Bu) == 2
-    @test sum(Bl) == 2
-    @test_throws ArgumentError sum(Bu, dims=0)
-    @test sum(Bu, dims=1) == sum(Budense, dims=1)
-    @test sum(Bu, dims=2) == sum(Budense, dims=2)
-    @test sum(Bu, dims=3) == sum(Budense, dims=3)
-    @test typeof(sum(Bu, dims=1)) == typeof(sum(Budense, dims=1))
-end
-
-@testset "empty sub-diagonal" begin
-    # `mul!` must use non-specialized method when sub-diagonal is empty
-    A = [1 2 3 4]'
-    @test A * Tridiagonal(ones(1, 1)) == A
-end
-
-@testset "generalized dot" begin
-    for elty in (Float64, ComplexF64), n in (5, 1)
-        dv = randn(elty, n)
-        ev = randn(elty, n-1)
-        x = randn(elty, n)
-        y = randn(elty, n)
-        for uplo in (:U, :L)
-            B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, B*y) ≈ dot(x, Matrix(B), y)
-        end
-        dv = Vector{elty}(undef, 0)
-        ev = Vector{elty}(undef, 0)
-        x = Vector{elty}(undef, 0)
-        y = Vector{elty}(undef, 0)
-        for uplo in (:U, :L)
-            B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) === zero(elty)
-        end
-    end
-end
-
-@testset "multiplication of bidiagonal and triangular matrix" begin
-    n = 5
-    for eltyB in (Int, ComplexF64)
-        if eltyB == Int
-            BU = Bidiagonal(rand(1:7, n), rand(1:7, n - 1), :U)
-            BL = Bidiagonal(rand(1:7, n), rand(1:7, n - 1), :L)
-        else
-            BU = Bidiagonal(randn(eltyB, n), randn(eltyB, n - 1), :U)
-            BL = Bidiagonal(randn(eltyB, n), randn(eltyB, n - 1), :L)
-        end
-        for eltyT in (Int, ComplexF64)
-            for TriT in (LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular)
-                if eltyT == Int
-                    T = TriT(rand(1:7, n, n))
-                else
-                    T = TriT(randn(eltyT, n, n))
-                end
-                for B in (BU, BL)
-                    MB = Matrix(B)
-                    MT = Matrix(T)
-                    for transB in (identity, adjoint, transpose), transT in (identity, adjoint, transpose)
-                        @test transB(B) * transT(T) ≈ transB(MB) * transT(MT)
-                        @test transT(T) * transB(B) ≈ transT(MT) * transB(MB)
-                    end
-                end
-            end
-        end
-    end
-end
-
-struct MyNotANumberType
-    n::Float64
-end
-Base.zero(n::MyNotANumberType)      = MyNotANumberType(zero(Float64))
-Base.zero(T::Type{MyNotANumberType}) = MyNotANumberType(zero(Float64))
-Base.copy(n::MyNotANumberType)      = MyNotANumberType(copy(n.n))
-Base.transpose(n::MyNotANumberType) = n
-
-@testset "transpose for a non-numeric eltype" begin
-    @test !(MyNotANumberType(1.0) isa Number)
-    a = [MyNotANumberType(1.0), MyNotANumberType(2.0), MyNotANumberType(3.0)]
-    b = [MyNotANumberType(5.0), MyNotANumberType(6.0)]
-    B = Bidiagonal(a, b, :U)
-    tB = transpose(B)
-    @test tB == Bidiagonal(a, b, :L)
-    @test transpose(copy(tB)) == B
-end
-
-@testset "empty bidiagonal matrices" begin
-    dv0 = zeros(0)
-    ev0 = zeros(0)
-    zm = zeros(0, 0)
-    ubd = Bidiagonal(dv0, ev0, :U)
-    lbd = Bidiagonal(dv0, ev0, :L)
-    @test size(ubd) == (0, 0)
-    @test_throws BoundsError getindex(ubd, 1, 1)
-    @test_throws BoundsError setindex!(ubd, 0.0, 1, 1)
-    @test similar(ubd) == ubd
-    @test similar(lbd, Int) == zeros(Int, 0, 0)
-    @test ubd == zm
-    @test lbd == zm
-    @test ubd == lbd
-    @test ubd * ubd == ubd
-    @test lbd + lbd == lbd
-    @test lbd' == ubd
-    @test ubd' == lbd
-    @test triu(ubd, 1) == ubd
-    @test triu(lbd, 1) == ubd
-    @test tril(ubd, -1) == ubd
-    @test tril(lbd, -1) == ubd
-    @test_throws ArgumentError triu(ubd)
-    @test_throws ArgumentError tril(ubd)
-    @test sum(ubd) == 0.0
-    @test reduce(+, ubd) == 0.0
-    @test reduce(+, ubd, dims=1) == zeros(1, 0)
-    @test reduce(+, ubd, dims=2) == zeros(0, 1)
-    @test hcat(ubd, ubd) == zm
-    @test vcat(ubd, lbd) == zm
-    @test hcat(lbd, ones(0, 3)) == ones(0, 3)
-    @test fill!(copy(ubd), 1.0) == ubd
-    @test map(abs, ubd) == zm
-    @test lbd .+ 1 == zm
-    @test lbd + ubd isa Bidiagonal
-    @test lbd .+ ubd isa Bidiagonal
-    @test ubd * 5 == ubd
-    @test ubd .* 3 == ubd
-end
-
-@testset "non-commutative algebra (#39701)" begin
-    A = Bidiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4)), :U)
-    c = Quaternion(1,2,3,4)
-    @test A * c ≈ Matrix(A) * c
-    @test A / c ≈ Matrix(A) / c
-    @test c * A ≈ c * Matrix(A)
-    @test c \ A ≈ c \ Matrix(A)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    dv = ImmutableArray([1, 2, 3, 4])
-    ev = ImmutableArray([7, 8, 9])
-    Bu = Bidiagonal(dv, ev, :U)
-    Bl = Bidiagonal(dv, ev, :L)
-
-    @test convert(AbstractArray{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
-    @test convert(AbstractMatrix{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
-    @test convert(AbstractArray{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
-    @test convert(AbstractMatrix{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
-end
-
-@testset "block-bidiagonal matrix indexing" begin
-    dv = [ones(4,3), ones(2,2).*2, ones(2,3).*3, ones(4,4).*4]
-    evu = [ones(4,2), ones(2,3).*2, ones(2,4).*3]
-    evl = [ones(2,3), ones(2,2).*2, ones(4,3).*3]
-    BU = Bidiagonal(dv, evu, :U)
-    BL = Bidiagonal(dv, evl, :L)
-    # check that all the matrices along a column have the same number of columns,
-    # and the matrices along a row have the same number of rows
-    for j in axes(BU, 2), i in 2:size(BU, 1)
-        @test size(BU[i,j], 2) == size(BU[1,j], 2)
-        @test size(BU[i,j], 1) == size(BU[i,1], 1)
-        if j < i || j > i + 1
-            @test iszero(BU[i,j])
-        end
-    end
-    for j in axes(BL, 2), i in 2:size(BL, 1)
-        @test size(BL[i,j], 2) == size(BL[1,j], 2)
-        @test size(BL[i,j], 1) == size(BL[i,1], 1)
-        if j < i-1 || j > i
-            @test iszero(BL[i,j])
-        end
-    end
-
-    M = ones(2,2)
-    for n in 0:1
-        dv = fill(M, n)
-        ev = fill(M, 0)
-        B = Bidiagonal(dv, ev, :U)
-        @test B == Matrix{eltype(B)}(B)
-    end
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Fill" begin
-        for len in (4, InfiniteArrays.Infinity())
-            d = FillArrays.Fill(1, len)
-            ud = FillArrays.Fill(0, len-1)
-            B = Bidiagonal(d, ud, :U)
-            @test copyto!(B, I) === B
-        end
-    end
-    B = Bidiagonal(fill(2, 4), fill(3, 3), :U)
-    copyto!(B, I)
-    @test all(isone, diag(B))
-    @test all(iszero, diag(B, 1))
-end
-
-end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
deleted file mode 100644
index 4252d9ee7938b..0000000000000
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ /dev/null
@@ -1,724 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBLAS
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasReal, BlasComplex
-using Libdl: dlsym, dlopen
-fabs(x::Real) = abs(x)
-fabs(x::Complex) = abs(real(x)) + abs(imag(x))
-
-# help function to build packed storage
-function pack(A, uplo)
-    AP = eltype(A)[]
-    n = size(A, 1)
-    for j in 1:n, i in (uplo === :L ? (j:n) : (1:j))
-        push!(AP, A[i,j])
-    end
-    return AP
-end
-
-@testset "vec_pointer_stride" begin
-    a = float(rand(1:20,4,4,4))
-    @test BLAS.asum(a) == sum(a) # dense case
-    @test BLAS.asum(view(a,1:2:4,:,:)) == sum(view(a,1:2:4,:,:)) # vector like
-    @test BLAS.asum(view(a,1:3,2:2,3:3)) == sum(view(a,1:3,2:2,3:3))
-    @test BLAS.asum(view(a,1:1,1:3,1:1)) == sum(view(a,1:1,1:3,1:1))
-    @test BLAS.asum(view(a,1:1,1:1,1:3)) == sum(view(a,1:1,1:1,1:3))
-    @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like
-    @test_throws ArgumentError BLAS.asum(view(a,1:2,1:1,1:3))
-end
-Random.seed!(100)
-## BLAS tests - testing the interface code to BLAS routines
-@testset for elty in [Float32, Float64, ComplexF32, ComplexF64]
-
-    @testset "syr2k!" begin
-        U = randn(elty, 5, 2)
-        V = randn(elty, 5, 2)
-        @test tril(LinearAlgebra.BLAS.syr2k('L','N',U,V)) ≈ tril(U*transpose(V) + V*transpose(U))
-        @test triu(LinearAlgebra.BLAS.syr2k('U','N',U,V)) ≈ triu(U*transpose(V) + V*transpose(U))
-        @test tril(LinearAlgebra.BLAS.syr2k('L','T',U,V)) ≈ tril(transpose(U)*V + transpose(V)*U)
-        @test triu(LinearAlgebra.BLAS.syr2k('U','T',U,V)) ≈ triu(transpose(U)*V + transpose(V)*U)
-    end
-
-    if elty in (ComplexF32, ComplexF64)
-        @testset "her2k!" begin
-            U = randn(elty, 5, 2)
-            V = randn(elty, 5, 2)
-            @test tril(LinearAlgebra.BLAS.her2k('L','N',U,V)) ≈ tril(U*V' + V*U')
-            @test triu(LinearAlgebra.BLAS.her2k('U','N',U,V)) ≈ triu(U*V' + V*U')
-            @test tril(LinearAlgebra.BLAS.her2k('L','C',U,V)) ≈ tril(U'*V + V'*U)
-            @test triu(LinearAlgebra.BLAS.her2k('U','C',U,V)) ≈ triu(U'*V + V'*U)
-        end
-    end
-
-    o4 = fill(elty(1), 4)
-    z4 = zeros(elty, 4)
-
-    I4 = Matrix{elty}(I, 4, 4)
-    I43 = Matrix{elty}(I, 4, 3)
-    L4 = tril(fill(elty(1), 4,4))
-    U4 = triu(fill(elty(1), 4,4))
-    Z4 = zeros(elty, (4,4))
-
-    elm1 = elty(-1)
-    el2 = elty(2)
-    v14 = elty[1:4;]
-    v41 = elty[4:-1:1;]
-
-    let n = 10
-        @testset "dot products" begin
-            if elty <: Real
-                x1 = randn(elty, n)
-                x2 = randn(elty, n)
-                @test BLAS.dot(x1,x2) ≈ sum(x1.*x2)
-                @test_throws DimensionMismatch BLAS.dot(x1,rand(elty, n + 1))
-            else
-                z1 = randn(elty, n)
-                z2 = randn(elty, n)
-                @test BLAS.dotc(z1,z2) ≈ sum(conj(z1).*z2)
-                @test BLAS.dotu(z1,z2) ≈ sum(z1.*z2)
-                @test_throws DimensionMismatch BLAS.dotc(z1,rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.dotu(z1,rand(elty, n + 1))
-            end
-        end
-        @testset "iamax" begin
-            x = randn(elty, n)
-            @test BLAS.iamax(x) == findmax(fabs, x)[2]
-        end
-        @testset "rot!" begin
-            x = randn(elty, n)
-            y = randn(elty, n)
-            c = rand(real(elty))
-            for sty in unique!([real(elty), elty])
-                s = rand(sty)
-                x2 = copy(x)
-                y2 = copy(y)
-                BLAS.rot!(n, x, 1, y, 1, c, s)
-                @test x ≈ c*x2 + s*y2
-                @test y ≈ -conj(s)*x2 + c*y2
-            end
-        end
-        @testset "axp(b)y" begin
-            x1 = randn(elty, n)
-            x2 = randn(elty, n)
-            α  = rand(elty)
-            β  = rand(elty)
-            for X1 in (x1, view(x1,n:-1:1)), X2 in (x2, view(x2, n:-1:1))
-                @test BLAS.axpy!(α,deepcopy(X1),deepcopy(X2)) ≈ α*X1 + X2
-                @test BLAS.axpby!(α,deepcopy(X1),β,deepcopy(X2)) ≈ α*X1 + β*X2
-            end
-            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
-                @test BLAS.axpy!(α,copy(x1),ind1,copy(x2),ind2) ≈ x2 + α*(ind1 == ind2 ? x1 : reverse(x1))
-            end
-            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
-            @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
-            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
-            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
-            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
-        end
-        @testset "nrm2, iamax, and asum for StridedVectors" begin
-            a = rand(elty,n)
-            for ind in (2:2:n, n:-2:2)
-                b = view(a, ind, 1)
-                @test BLAS.nrm2(b) ≈ sqrt(sum(abs2, b))
-                @test BLAS.asum(b) ≈ sum(fabs, b)
-                @test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0)
-            end
-        end
-        @testset "scal" begin
-            α = rand(elty)
-            a = rand(elty,n)
-            @test BLAS.scal(n,α,a,1) ≈ α * a
-            for v in (a, view(a, n:-1:1))
-                @test BLAS.scal!(α, deepcopy(v)) ≈ α * v
-            end
-        end
-
-        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
-            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2))
-
-            A = rand(elty,n,n)
-            α = rand(elty)
-
-            @test BLAS.ger!(α,x,y,copy(A)) ≈ A + α*x*y'
-            @test_throws DimensionMismatch BLAS.ger!(α,Vector{elty}(undef,n+1),y,copy(A))
-
-            A = rand(elty,n,n)
-            A = A + transpose(A)
-            @test issymmetric(A)
-            @test triu(BLAS.syr!('U',α,x,copy(A))) ≈ triu(A + α*x*transpose(x))
-            @test_throws DimensionMismatch BLAS.syr!('U',α,Vector{elty}(undef,n+1),copy(A))
-
-            if elty <: Complex
-                A = rand(elty,n,n)
-                A = A + A'
-                α = real(α)
-                @test triu(BLAS.her!('U',α,x,copy(A))) ≈ triu(A + α*x*x')
-                @test_throws DimensionMismatch BLAS.her!('U',α,Vector{elty}(undef,n+1),copy(A))
-            end
-        end
-        @testset "copy" begin
-            x1 = randn(elty, n)
-            x2 = randn(elty, n)
-            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
-                @test x2 === BLAS.copyto!(x2, ind1, x1, ind2) == (ind1 == ind2 ? x1 : reverse(x1))
-            end
-            @test_throws DimensionMismatch BLAS.copyto!(x2, 1:n, x1, 1:(n - 1))
-            @test_throws ArgumentError BLAS.copyto!(x1, 0:div(n, 2), x2, 1:(div(n, 2) + 1))
-            @test_throws ArgumentError BLAS.copyto!(x1, 1:(div(n, 2) + 1), x2, 0:div(n, 2))
-        end
-        @testset "trmv and trsv" begin
-            A = rand(elty,n,n)
-            x = rand(elty,n)
-            xerr = Vector{elty}(undef,n+1)
-            for uplo in ('U', 'L'), diag in ('U','N'), trans in ('N', 'T', 'C')
-                Wrapper = if uplo == 'U'
-                    diag == 'U' ? UnitUpperTriangular : UpperTriangular
-                else
-                    diag == 'U' ? UnitLowerTriangular : LowerTriangular
-                end
-                fun = trans == 'N' ? identity : trans == 'T' ? transpose : adjoint
-                fullA = collect(fun(Wrapper(A)))
-                @testset "trmv" begin
-                    @test BLAS.trmv(uplo,trans,diag,A,x) ≈ fullA * x
-                    @test_throws DimensionMismatch BLAS.trmv(uplo,trans,diag,A,xerr)
-                    for xx in (x, view(x, n:-1:1))
-                        @test BLAS.trmv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA * xx
-                    end
-                end
-                @testset "trsv" begin
-                    @test BLAS.trsv(uplo,trans,diag,A,x) ≈ fullA \ x
-                    @test_throws DimensionMismatch BLAS.trsv(uplo,trans,diag,A,xerr)
-                    for xx in (x, view(x, n:-1:1))
-                        @test BLAS.trsv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA \ xx
-                    end
-                end
-            end
-        end
-        @testset "symmetric/Hermitian multiplication" begin
-            x = rand(elty,n)
-            A = rand(elty,n,n)
-            y = rand(elty, n)
-            α = randn(elty)
-            β = randn(elty)
-            Aherm = A + A'
-            Asymm = A + transpose(A)
-            offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
-            @testset "symv and hemv" for uplo in ('U', 'L')
-                @test BLAS.symv(uplo,Asymm,x) ≈ Asymm*x
-                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                    @test BLAS.symv!(uplo,α,Asymm,xx,β,deepcopy(yy)) ≈ α * Asymm * xx + β * yy
-                end
-                @test_throws DimensionMismatch BLAS.symv!(uplo,α,Asymm,x,β,offsizevec)
-                @test_throws DimensionMismatch BLAS.symv(uplo,offsizemat,x)
-                if elty <: BlasComplex
-                    @test BLAS.hemv(uplo,Aherm,x) ≈ Aherm*x
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.hemv!(uplo,α,Aherm,xx,β,deepcopy(yy)) ≈ α * Aherm * xx + β * yy
-                    end
-                    @test_throws DimensionMismatch BLAS.hemv(uplo,offsizemat,x)
-                    @test_throws DimensionMismatch BLAS.hemv!(uplo,one(elty),Aherm,x,one(elty),offsizevec)
-                end
-            end
-
-            @testset "symm error throwing" begin
-                Cnn, Cnm, Cmn = Matrix{elty}.(undef,((n,n), (n,n-1), (n-1,n)))
-                @test_throws DimensionMismatch BLAS.symm('L','U',Cnm,Cnn)
-                @test_throws DimensionMismatch BLAS.symm('R','U',Cmn,Cnn)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cmn)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cnm)
-                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cmn,one(elty),Cnn)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnm,one(elty),Cmn)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnn,one(elty),Cnm)
-                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cmn,one(elty),Cnn)
-                if elty <: BlasComplex
-                    @test_throws DimensionMismatch BLAS.hemm('L','U',Cnm,Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm('R','U',Cmn,Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cmn)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cnm)
-                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cmn,one(elty),Cnn)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnm,one(elty),Cmn)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnn,one(elty),Cnm)
-                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cmn,one(elty),Cnn)
-                end
-            end
-        end
-        @testset "trmm error throwing" begin
-            Cnn, Cmn, Cnm = Matrix{elty}.(undef,((n,n), (n+1,n), (n,n+1)))
-            @test_throws DimensionMismatch BLAS.trmm('L','U','N','N',one(elty),triu(Cnn),Cmn)
-            @test_throws DimensionMismatch BLAS.trmm('R','U','N','N',one(elty),triu(Cnn),Cnm)
-        end
-
-        # hpmv!
-        if elty in (ComplexF32, ComplexF64)
-            @testset "hpmv!" begin
-                # Both matrix dimensions n coincide, as we have Hermitian matrices.
-                # Define the inputs and outputs of hpmv!, y = α*A*x+β*y
-                α = rand(elty)
-                A = rand(elty, n, n)
-                x = rand(elty, n)
-                β = rand(elty)
-                y = rand(elty, n)
-                for uplo in (:L, :U)
-                    Cuplo = String(uplo)[1]
-                    AH = Hermitian(A, uplo)
-                    # Create lower/upper triangular packing of AL
-                    AP = pack(AH, uplo)
-                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
-                        @test BLAS.hpmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AH*xx + β*yy
-                    end
-                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
-                    @test_throws ErrorException BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
-                    AP′ = view(AP, 1:length(AP′) - 1)
-                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
-                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
-                end
-            end
-        end
-
-        # spmv!
-        if elty in (Float32, Float64)
-            @testset "spmv!" begin
-                # Both matrix dimensions n coincide, as we have symmetric matrices.
-                # Define the inputs and outputs of spmv!, y = α*A*x+β*y
-                α = rand(elty)
-                A = rand(elty, n, n)
-                x = rand(elty, n)
-                β = rand(elty)
-                y = rand(elty, n)
-                for uplo in (:L, :U)
-                    Cuplo = String(uplo)[1]
-                    AS = Symmetric(A, uplo)
-                    # Create lower/upper triangular packing of AL
-                    AP = pack(AS, uplo)
-                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
-                        @test BLAS.spmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AS*xx + β*yy
-                    end
-                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
-                    @test_throws ErrorException BLAS.spmv!(Cuplo, α, AP′, x, β, y)
-                    AP′ = view(AP, 1:length(AP′) - 1)
-                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, y)
-                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
-                end
-            end
-        end
-
-        # spr!
-        if elty in (Float32, Float64)
-            @testset "spr! $elty" begin
-                α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Symmetric(M, :L)
-                AU = Symmetric(M, :U)
-                for x in (rand(elty, n), view(rand(elty, n), n:-1:1))
-                    ALP_result_julia_lower = pack(α*x*x' + AL, :L)
-                    ALP_result_blas_lower = pack(AL, :L)
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower
-                    ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
-                    ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
-                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
-                    @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
-
-                    AUP_result_julia_upper = pack(α*x*x' + AU, :U)
-                    AUP_result_blas_upper = pack(AU, :U)
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper
-                    AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
-                    AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
-                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
-                    @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
-                end
-            end
-        end
-
-        #trsm
-        A = triu(rand(elty,n,n))
-        B = rand(elty,(n,n))
-        @test BLAS.trsm('L','U','N','N',one(elty),A,B) ≈ A\B
-
-        #will work for SymTridiagonal,Tridiagonal,Bidiagonal!
-        @testset "banded matrix mv" begin
-            @testset "gbmv" begin
-                TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
-                x  = rand(elty, n)
-                #put TD into the BLAS format!
-                fTD = zeros(elty,3,n)
-                fTD[1,2:n] = TD.du
-                fTD[2,:] = TD.d
-                fTD[3,1:n-1] = TD.dl
-                @test BLAS.gbmv('N',n,1,1,fTD,x) ≈ TD*x
-                y = rand(elty, n)
-                α = randn(elty)
-                β = randn(elty)
-                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                    @test BLAS.gbmv!('N',n,1,1,α,fTD,xx,β,deepcopy(yy)) ≈ α * TD * xx + β * yy
-                end
-            end
-            #will work for SymTridiagonal only!
-            @testset "sbmv and hbmv" begin
-                x = rand(elty,n)
-                if elty <: BlasReal
-                    ST  = SymTridiagonal(rand(elty,n),rand(elty,n-1))
-                    #put TD into the BLAS format!
-                    fST = zeros(elty,2,n)
-                    fST[1,2:n] = ST.ev
-                    fST[2,:] = ST.dv
-                    @test BLAS.sbmv('U',1,fST,x) ≈ ST*x
-                    y = rand(elty, n)
-                    α = randn(elty)
-                    β = randn(elty)
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.sbmv!('U',1,α,fST,xx,β,deepcopy(yy)) ≈ α * ST * xx + β * yy
-                    end
-                else
-                    dv = rand(real(elty),n)
-                    ev = rand(elty,n-1)
-                    bH = zeros(elty,2,n)
-                    bH[1,2:n] = ev
-                    bH[2,:] = dv
-                    fullH = diagm(0 => dv, -1 => conj(ev), 1 => ev)
-                    @test BLAS.hbmv('U',1,bH,x) ≈ fullH*x
-                    y = rand(elty, n)
-                    α = randn(elty)
-                    β = randn(elty)
-                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
-                        @test BLAS.hbmv!('U',1,α,bH,xx,β,deepcopy(yy)) ≈ α * fullH * xx + β * yy
-                    end
-                end
-            end
-        end
-    end
-
-    @testset "gemv" begin
-        @test all(BLAS.gemv('N', I4, o4) .== o4)
-        @test all(BLAS.gemv('T', I4, o4) .== o4)
-        @test all(BLAS.gemv('N', el2, I4, o4) .== el2 * o4)
-        @test all(BLAS.gemv('T', el2, I4, o4) .== el2 * o4)
-        @test_throws DimensionMismatch BLAS.gemv('N',I43,o4)
-        o4cp = copy(o4)
-        @test_throws DimensionMismatch BLAS.gemv!('T',one(elty),I43,o4,one(elty),o4cp)
-        @test_throws DimensionMismatch BLAS.gemv!('C',one(elty),I43,o4,one(elty),o4cp)
-        @test all(BLAS.gemv!('N', one(elty), I4, o4, elm1, o4cp) .== z4)
-        @test all(o4cp .== z4)
-        o4cp[:] = o4
-        @test all(BLAS.gemv!('T', one(elty), I4, o4, elm1, o4cp) .== z4)
-        @test all(o4cp .== z4)
-        @test all(BLAS.gemv('N', U4, o4) .== v41)
-        @test all(BLAS.gemv('N', U4, o4) .== v41)
-        @testset "non-standard strides" begin
-            A = rand(elty, 3, 4)
-            x = rand(elty, 5)
-            for y = (view(ones(elty, 5), 1:2:5), view(ones(elty, 7), 6:-2:2))
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,2:2:4]*x[1:3:4] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,4:-2:2]*x[1:3:4] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,2:2:4]*x[4:-3:1] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,4:-2:2]*x[4:-3:1] + 3*ycopy
-                ycopy = copy(y)
-                @test BLAS.gemv!('N', elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:1), elty(3), y) ≈ 2*A[:,1:1]*x[1:1] + 3*ycopy # stride(A,2) == 0
-            end
-            @test BLAS.gemv!('N', elty(1), zeros(elty, 0, 5), zeros(elty, 5), elty(1), zeros(elty, 0)) == elty[] # empty matrix, stride(A,2) == 0
-            @test BLAS.gemv('N', elty(-1), view(A, 2:3, 1:2:3), view(x, 2:-1:1)) ≈ -1*A[2:3,1:2:3]*x[2:-1:1]
-            @test BLAS.gemv('N', view(A, 2:3, 3:-2:1), view(x, 1:2:3)) ≈ A[2:3,3:-2:1]*x[1:2:3]
-            for (trans, f) = (('T',transpose), ('C',adjoint))
-                for y = (view(ones(elty, 3), 1:2:3), view(ones(elty, 5), 4:-2:2))
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[1:2:5] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[1:2:5] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[5:-2:1] + 3*ycopy
-                    ycopy = copy(y)
-                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[5:-2:1] + 3*ycopy
-                end
-                @test BLAS.gemv!(trans, elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:2:5), elty(3), elty[1]) ≈ 2*f(A[:,1:1])*x[1:2:5] + elty[3] # stride(A,2) == 0
-            end
-            for trans = ('N', 'T', 'C')
-                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2:3, 1:2), view(x, 1:2)) # stride(A,1) must be 1
-            end
-        end
-    end
-    @testset "gemm" begin
-        @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
-        @test all(BLAS.gemm('N', 'T', I4, I4) .== I4)
-        @test all(BLAS.gemm('T', 'N', I4, I4) .== I4)
-        @test all(BLAS.gemm('T', 'T', I4, I4) .== I4)
-        @test all(BLAS.gemm('N', 'N', el2, I4, I4) .== el2 * I4)
-        @test all(BLAS.gemm('N', 'T', el2, I4, I4) .== el2 * I4)
-        @test all(BLAS.gemm('T', 'N', el2, I4, I4) .== el2 * I4)
-        @test all(LinearAlgebra.BLAS.gemm('T', 'T', el2, I4, I4) .== el2 * I4)
-        I4cp = copy(I4)
-        @test all(BLAS.gemm!('N', 'N', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('N', 'T', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('T', 'N', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        I4cp[:] = I4
-        @test all(BLAS.gemm!('T', 'T', one(elty), I4, I4, elm1, I4cp) .== Z4)
-        @test all(I4cp .== Z4)
-        @test all(BLAS.gemm('N', 'N', I4, U4) .== U4)
-        @test all(BLAS.gemm('N', 'T', I4, U4) .== L4)
-        @test_throws DimensionMismatch BLAS.gemm!('N','N', one(elty), I4, I4, elm1, Matrix{elty}(I, 5, 5))
-        @test_throws DimensionMismatch BLAS.gemm!('N','N', one(elty), I43, I4, elm1, I4)
-        @test_throws DimensionMismatch BLAS.gemm!('T','N', one(elty), I43, I4, elm1, I43)
-        @test_throws DimensionMismatch BLAS.gemm!('N','T', one(elty), I43, I43, elm1, I43)
-        @test_throws DimensionMismatch BLAS.gemm!('T','T', one(elty), I43, I43, elm1, Matrix{elty}(I, 3, 4))
-    end
-    @testset "gemm compared to (sy)(he)rk" begin
-        if eltype(elm1) <: Complex
-            @test all(triu(BLAS.herk('U', 'N', U4)) .== triu(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(tril(BLAS.herk('L', 'N', U4)) .== tril(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(triu(BLAS.herk('U', 'N', L4)) .== triu(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(tril(BLAS.herk('L', 'N', L4)) .== tril(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(triu(BLAS.herk('U', 'C', U4)) .== triu(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(tril(BLAS.herk('L', 'C', U4)) .== tril(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(triu(BLAS.herk('U', 'C', L4)) .== triu(BLAS.gemm('T', 'N', L4, L4)))
-            @test all(tril(BLAS.herk('L', 'C', L4)) .== tril(BLAS.gemm('T', 'N', L4, L4)))
-            ans = similar(L4)
-            @test all(tril(BLAS.herk('L','C', L4)) .== tril(BLAS.herk!('L', 'C', real(one(elty)), L4, real(zero(elty)), ans)))
-            @test all(LinearAlgebra.copytri!(ans, 'L') .== LinearAlgebra.BLAS.gemm('T', 'N', L4, L4))
-            @test_throws DimensionMismatch BLAS.herk!('L','N',real(one(elty)),Matrix{elty}(I, 5, 5),real(one(elty)), Matrix{elty}(I, 6, 6))
-        else
-            @test all(triu(BLAS.syrk('U', 'N', U4)) .== triu(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(tril(BLAS.syrk('L', 'N', U4)) .== tril(BLAS.gemm('N', 'T', U4, U4)))
-            @test all(triu(BLAS.syrk('U', 'N', L4)) .== triu(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(tril(BLAS.syrk('L', 'N', L4)) .== tril(BLAS.gemm('N', 'T', L4, L4)))
-            @test all(triu(BLAS.syrk('U', 'T', U4)) .== triu(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(tril(BLAS.syrk('L', 'T', U4)) .== tril(BLAS.gemm('T', 'N', U4, U4)))
-            @test all(triu(BLAS.syrk('U', 'T', L4)) .== triu(BLAS.gemm('T', 'N', L4, L4)))
-            @test all(tril(BLAS.syrk('L', 'T', L4)) .== tril(BLAS.gemm('T', 'N', L4, L4)))
-            ans = similar(L4)
-            @test all(tril(BLAS.syrk('L','T', L4)) .== tril(BLAS.syrk!('L', 'T', one(elty), L4, zero(elty), ans)))
-            @test all(LinearAlgebra.copytri!(ans, 'L') .== BLAS.gemm('T', 'N', L4, L4))
-            @test_throws DimensionMismatch BLAS.syrk!('L','N',one(elty), Matrix{elty}(I, 5, 5),one(elty), Matrix{elty}(I, 6, 6))
-        end
-    end
-end
-
-@testset "syr for eltype $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    A = rand(elty, 5, 5)
-    @test triu(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('U', one(elty), A[1,:], zeros(elty, 5, 5))
-    @test tril(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('L', one(elty), A[1,:], zeros(elty, 5, 5))
-    @test triu(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('U', one(elty), view(A, 1, :), zeros(elty, 5, 5))
-    @test tril(A[1,:] * transpose(A[1,:])) ≈ BLAS.syr!('L', one(elty), view(A, 1, :), zeros(elty, 5, 5))
-end
-
-@testset "her for eltype $elty" for elty in (ComplexF32, ComplexF64)
-    A = rand(elty, 5, 5)
-    @test triu(A[1,:] * A[1,:]') ≈ BLAS.her!('U', one(real(elty)), A[1,:], zeros(elty, 5, 5))
-    @test tril(A[1,:] * A[1,:]') ≈ BLAS.her!('L', one(real(elty)), A[1,:], zeros(elty, 5, 5))
-    @test triu(A[1,:] * A[1,:]') ≈ BLAS.her!('U', one(real(elty)), view(A, 1, :), zeros(elty, 5, 5))
-    @test tril(A[1,:] * A[1,:]') ≈ BLAS.her!('L', one(real(elty)), view(A, 1, :), zeros(elty, 5, 5))
-end
-
-struct WrappedArray{T,N} <: AbstractArray{T,N}
-    A::Array{T,N}
-end
-
-Base.size(A::WrappedArray) = size(A.A)
-Base.getindex(A::WrappedArray, i::Int) = A.A[i]
-Base.getindex(A::WrappedArray{T, N}, I::Vararg{Int, N}) where {T, N} = A.A[I...]
-Base.setindex!(A::WrappedArray, v, i::Int) = setindex!(A.A, v, i)
-Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setindex!(A.A, v, I...)
-Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
-
-Base.strides(A::WrappedArray) = strides(A.A)
-Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
-
-@testset "strided interface adjtrans" begin
-    x = WrappedArray([1, 2, 3, 4])
-    @test stride(x,1) == 1
-    @test stride(x,2) == stride(x,3) == 4
-    @test strides(x') == strides(transpose(x)) == (4,1)
-    @test pointer(x') == pointer(transpose(x)) == pointer(x)
-    @test_throws BoundsError stride(x,0)
-
-    A = WrappedArray([1 2; 3 4; 5 6])
-    @test stride(A,1) == 1
-    @test stride(A,2) == 3
-    @test stride(A,3) == stride(A,4) >= 6
-    @test strides(A') == strides(transpose(A)) == (3,1)
-    @test pointer(A') == pointer(transpose(A)) == pointer(A)
-    @test_throws BoundsError stride(A,0)
-
-    y = WrappedArray([1+im, 2, 3, 4])
-    @test strides(transpose(y)) == (4,1)
-    @test pointer(transpose(y)) == pointer(y)
-    @test_throws MethodError strides(y')
-    @test_throws ErrorException pointer(y')
-
-    B = WrappedArray([1+im 2; 3 4; 5 6])
-    @test strides(transpose(B)) == (3,1)
-    @test pointer(transpose(B)) == pointer(B)
-    @test_throws MethodError strides(B')
-    @test_throws ErrorException pointer(B')
-
-    @test_throws MethodError stride(1:5,0)
-    @test_throws MethodError stride(1:5,1)
-    @test_throws MethodError stride(1:5,2)
-    @test_throws MethodError strides(transpose(1:5))
-    @test_throws MethodError strides((1:5)')
-    @test_throws ErrorException pointer(transpose(1:5))
-    @test_throws ErrorException pointer((1:5)')
-end
-
-@testset "strided interface blas" begin
-    for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    # Level 1
-        x = WrappedArray(elty[1, 2, 3, 4])
-        y = WrappedArray(elty[5, 6, 7, 8])
-        BLAS.blascopy!(2, x, 1, y, 2)
-        @test y == WrappedArray(elty[1, 6, 2, 8])
-        BLAS.scal!(2, elty(2), x, 1)
-        @test x == WrappedArray(elty[2, 4, 3, 4])
-        @test BLAS.nrm2(1, x, 2) == elty(2)
-        @test BLAS.nrm2(x) == BLAS.nrm2(x.A)
-        BLAS.asum(x) == elty(13)
-        BLAS.axpy!(4, elty(2), x, 1, y, 1)
-        @test y == WrappedArray(elty[5, 14, 8, 16])
-        BLAS.axpby!(elty(2), x, elty(3), y)
-        @test y == WrappedArray(elty[19, 50, 30, 56])
-        @test BLAS.iamax(x) == 2
-
-        M = fill(elty(1.0), 3, 3)
-        @test BLAS.scal!(elty(2), view(M,:,2)) === view(M,:,2)
-        @test BLAS.scal!(elty(3), view(M,3,:)) === view(M,3,:)
-        @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.]
-    # Level 2
-        A = WrappedArray(elty[1 2; 3 4])
-        x = WrappedArray(elty[1, 2])
-        y = WrappedArray(elty[3, 4])
-        @test BLAS.gemv!('N', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[13, 26])
-        @test BLAS.gbmv!('N', 2, 1, 0, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[15, 40])
-        @test BLAS.symv!('U', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[25, 60])
-        @test BLAS.trmv!('U', 'N', 'N', A, y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[145, 240])
-        @test BLAS.trsv!('U', 'N', 'N', A, y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[25,60])
-        @test BLAS.ger!(elty(2), x, y, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[51 122; 103 244])
-        @test BLAS.syr!('L', elty(2), x, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[53 122; 107 252])
-    # Level 3
-        A = WrappedArray(elty[1 2; 3 4])
-        B = WrappedArray(elty[5 6; 7 8])
-        C = WrappedArray(elty[9 10; 11 12])
-        BLAS.gemm!('N', 'N', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([47 54; 97 112])
-        BLAS.symm!('L', 'U', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([85 98; 173 200])
-        BLAS.syrk!('U', 'N', elty(2), A, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([95 120; 173 250])
-        BLAS.syr2k!('U', 'N', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([163 244; 173 462])
-        BLAS.trmm!('L', 'U', 'N', 'N', elty(2), A, B) isa WrappedArray{elty,2}
-        @test B == WrappedArray([38 44; 56 64])
-        BLAS.trsm!('L', 'U', 'N', 'N', elty(2), A, B) isa WrappedArray{elty,2}
-        @test B == WrappedArray([20 24; 28 32])
-    end
-    for elty in (Float32, Float64)
-    # Level 1
-        x = WrappedArray(elty[1, 2, 3, 4])
-        y = WrappedArray(elty[5, 6, 7, 8])
-        @test BLAS.dot(2, x, 1, y, 2) == elty(19)
-    # Level 2
-        A = WrappedArray(elty[1 2; 3 4])
-        x = WrappedArray(elty[1, 2])
-        y = WrappedArray(elty[3, 4])
-        BLAS.sbmv!('U', 1, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[17,24])
-    end
-    for elty in (ComplexF32, ComplexF64)
-    # Level 1
-        x = WrappedArray(elty[1+im, 2+2im, 3+3im, 4+im])
-        y = WrappedArray(elty[5-im, 6-2im, 7-3im, 8-im])
-        @test BLAS.dotc(2, x, 1, y, 2) == elty(12-26im)
-        @test BLAS.dotu(2, x, 1, y, 2) == elty(26+12im)
-    # Level 2
-        A = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        x = WrappedArray(elty[1+im, 2+2im])
-        y = WrappedArray(elty[5-im, 6-2im])
-        @test BLAS.hemv!('U', elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[7+17im, 30+14im])
-        BLAS.hbmv!('U', 1, elty(2), A, x, elty(1), y) isa WrappedArray{elty,1}
-        @test y == WrappedArray(elty[13+39im, 54+30im])
-        @test BLAS.her!('L', real(elty(2)), x, A) isa WrappedArray{elty,2}
-        @test A == WrappedArray(elty[5 2+2im; 11+3im 20])
-    # Level 3
-        A = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        B = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        C = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im])
-        @test BLAS.hemm!('L', 'U', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([3+27im 6+38im; 35+27im 52+36im])
-        @test BLAS.herk!('U', 'N', real(elty(2)), A, real(elty(1)), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([23 50+38im; 35+27im 152])
-        @test BLAS.her2k!('U', 'N', elty(2), A, B, real(elty(1)), C) isa WrappedArray{elty,2}
-        @test C == WrappedArray([63 138+38im; 35+27im 352])
-    end
-end
-
-@testset "get_set_num_threads" begin
-    default = BLAS.get_num_threads()
-    @test default isa Int
-    @test default > 0
-    BLAS.set_num_threads(1)
-    @test BLAS.get_num_threads() === 1
-    BLAS.set_num_threads(default)
-    @test BLAS.get_num_threads() === default
-end
-
-@testset "test for 0-strides" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    A = randn(elty, 10, 10);
-    a = view([randn(elty)], 1 .+ 0(1:10))
-    b = view([randn(elty)], 1 .+ 0(1:10))
-    α, β = randn(elty), randn(elty)
-    @testset "dot/dotc/dotu" begin
-        if elty <: Real
-            @test BLAS.dot(a,b) ≈ sum(a.*b)
-        else
-            @test BLAS.dotc(a,b) ≈ sum(conj(a).*b)
-            @test BLAS.dotu(a,b) ≈ sum(a.*b)
-        end
-    end
-    @testset "axp(b)y!" begin
-        @test BLAS.axpy!(α,a,copy(b)) ≈ α*a + b
-        @test BLAS.axpby!(α,a,β,copy(b)) ≈ α*a + β*b
-        @test_throws "dest" BLAS.axpy!(α,a,b)
-        @test_throws "dest" BLAS.axpby!(α,a,β,b)
-    end
-    @test BLAS.iamax(a) == 0
-    @test_throws "dest" BLAS.scal!(b[1], a)
-    @testset "nrm2/asum" begin # OpenBLAS always return 0.0
-        @test_throws "input" BLAS.nrm2(a)
-        @test_throws "input" BLAS.asum(a)
-    end
-    # All level2 reject 0-stride array.
-    @testset "gemv!" begin
-        @test_throws "input" BLAS.gemv!('N', true, A, a, false, copy(b))
-        @test_throws "dest" BLAS.gemv!('N', true, A, copy(a), false, b)
-    end
-end
-
-# Make sure we can use `Base.libblas_name`.  Avoid causing
-# https://github.com/JuliaLang/julia/issues/48427 again.
-@testset "libblas_name" begin
-    dot_sym = dlsym(dlopen(Base.libblas_name), "cblas_ddot" * (Sys.WORD_SIZE == 64 ? "64_" : ""))
-    @test 23.0 === @ccall $(dot_sym)(2::Int, [2.0, 3.0]::Ref{Cdouble}, 1::Int, [4.0, 5.0]::Ref{Cdouble}, 1::Int)::Cdouble
-end
-
-end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
deleted file mode 100644
index 613e4d09a3cc6..0000000000000
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ /dev/null
@@ -1,199 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestBunchKaufman
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-using Base: getproperty
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(12343212)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-@testset "$eltya argument A" for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    asym = transpose(a) + a                  # symmetric indefinite
-    aher = a' + a                  # Hermitian indefinite
-    apd  = a' * a                  # Positive-definite
-    for (a, a2, aher, apd) in ((a, a2, aher, apd),
-                               (view(a, 1:n, 1:n),
-                                view(a2, 1:n, 1:n),
-                                view(aher, 1:n, 1:n),
-                                view(apd , 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        # check that factorize gives a Bunch-Kaufman
-        @test isa(factorize(asym), LinearAlgebra.BunchKaufman)
-        @test isa(factorize(aher), LinearAlgebra.BunchKaufman)
-        @testset "$uplo Bunch-Kaufman factor of indefinite matrix" for uplo in (:L, :U)
-            bc1 = bunchkaufman(Hermitian(aher, uplo))
-            @test LinearAlgebra.issuccess(bc1)
-            @test logabsdet(bc1)[1] ≈ log(abs(det(bc1)))
-            if eltya <: Real
-                @test logabsdet(bc1)[2] == sign(det(bc1))
-            else
-                @test logabsdet(bc1)[2] ≈ sign(det(bc1))
-            end
-            @test inv(bc1)*aher ≈ Matrix(I, n, n)
-            @testset for rook in (false, true)
-                @test inv(bunchkaufman(Symmetric(transpose(a) + a, uplo), rook))*(transpose(a) + a) ≈ Matrix(I, n, n)
-                if eltya <: BlasFloat
-                    # test also bunchkaufman! without explicit type tag
-                    # no bunchkaufman! method for Int ... yet
-                    @test inv(bunchkaufman!(transpose(a) + a, rook))*(transpose(a) + a) ≈ Matrix(I, n, n)
-                end
-                @test size(bc1) == size(bc1.LD)
-                @test size(bc1, 1) == size(bc1.LD, 1)
-                @test size(bc1, 2) == size(bc1.LD, 2)
-                if eltya <: BlasReal
-                    @test_throws ArgumentError bunchkaufman(a)
-                end
-                # Test extraction of factors
-                if eltya <: Real
-                    @test getproperty(bc1, uplo)*bc1.D*getproperty(bc1, uplo)' ≈ aher[bc1.p, bc1.p]
-                    @test getproperty(bc1, uplo)*bc1.D*getproperty(bc1, uplo)' ≈ bc1.P*aher*bc1.P'
-                end
-
-                bc1 = bunchkaufman(Symmetric(asym, uplo))
-                @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ asym[bc1.p, bc1.p]
-                @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ bc1.P*asym*transpose(bc1.P)
-                @test_throws ErrorException bc1.Z
-                @test_throws ArgumentError uplo === :L ? bc1.U : bc1.L
-            end
-            # test Base.iterate
-            ref_objs = (bc1.D, uplo === :L ? bc1.L : bc1.U, bc1.p)
-            for (bki, bkobj) in enumerate(bc1)
-                @test bkobj == ref_objs[bki]
-            end
-            if eltya <: BlasFloat
-                @test convert(LinearAlgebra.BunchKaufman{eltya}, bc1) === bc1
-                @test convert(LinearAlgebra.Factorization{eltya}, bc1) === bc1
-                if eltya <: BlasReal
-                    @test convert(LinearAlgebra.Factorization{Float16}, bc1) == convert(LinearAlgebra.BunchKaufman{Float16}, bc1)
-                elseif eltya <: BlasComplex
-                    @test convert(LinearAlgebra.Factorization{ComplexF16}, bc1) == convert(LinearAlgebra.BunchKaufman{ComplexF16}, bc1)
-                end
-            end
-            @test Base.propertynames(bc1) == (:p, :P, :L, :U, :D)
-        end
-
-        @testset "$eltyb argument B" for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-            b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-            for b in (b, view(b, 1:n, 1:2))
-                εb = eps(abs(float(one(eltyb))))
-                ε = max(εa,εb)
-
-                @testset "$uplo Bunch-Kaufman factor of indefinite matrix" for uplo in (:L, :U)
-                    bc1 = bunchkaufman(Hermitian(aher, uplo))
-                    @test aher*(bc1\b) ≈ b atol=1000ε
-                end
-
-                @testset "$uplo Bunch-Kaufman factors of a pos-def matrix" for uplo in (:U, :L)
-                    @testset "rook pivoting: $rook" for rook in (false, true)
-                        bc2 = bunchkaufman(Hermitian(apd, uplo), rook)
-                        @test LinearAlgebra.issuccess(bc2)
-                        bks = split(sprint(show, "text/plain", bc2), "\n")
-                        @test bks[1] == summary(bc2)
-                        @test bks[2] == "D factor:"
-                        @test bks[4+n] == "$uplo factor:"
-                        @test bks[6+2n] == "permutation:"
-                        @test logdet(bc2) ≈ log(det(bc2))
-                        @test logabsdet(bc2)[1] ≈ log(abs(det(bc2)))
-                        @test logabsdet(bc2)[2] == sign(det(bc2))
-                        @test inv(bc2)*apd ≈ Matrix(I, n, n)
-                        @test apd*(bc2\b) ≈ b rtol=eps(cond(apd))
-                        @test ishermitian(bc2)
-                        @test !issymmetric(bc2) || eltya <: Real
-                    end
-                end
-            end
-        end
-    end
-end
-
-@testset "Singular matrices" begin
-    R = Float64[1 0; 0 0]
-    C = ComplexF64[1 0; 0 0]
-    for A in (R, Symmetric(R), C, Hermitian(C))
-        @test_throws SingularException bunchkaufman(A)
-        @test_throws SingularException bunchkaufman!(copy(A))
-        @test_throws SingularException bunchkaufman(A; check = true)
-        @test_throws SingularException bunchkaufman!(copy(A); check = true)
-        @test !issuccess(bunchkaufman(A; check = false))
-        @test !issuccess(bunchkaufman!(copy(A); check = false))
-    end
-    F = bunchkaufman(R; check = false)
-    @test sprint(show, "text/plain", F) == "Failed factorization of type $(typeof(F))"
-end
-
-@testset "test example due to @timholy in PR 15354" begin
-    A = rand(6,5); A = complex(A'*A) # to avoid calling the real-lhs-complex-rhs method
-    F = cholesky(A);
-    v6 = rand(ComplexF64, 6)
-    v5 = view(v6, 1:5)
-    @test F\v5 == F\v6[1:5]
-end
-
-@testset "issue #32080" begin
-    A = Symmetric([-5 -9 9; -9 4 1; 9 1 2])
-    B = bunchkaufman(A, true)
-    @test B.U * B.D * B.U' ≈ A[B.p, B.p]
-end
-
-@test_throws DomainError logdet(bunchkaufman([-1 -1; -1 1]))
-@test logabsdet(bunchkaufman([8 4; 4 2]; check = false))[1] == -Inf
-
-@testset "0x0 matrix" begin
-    for ul in (:U, :L)
-        B = bunchkaufman(Symmetric(ones(0, 0), ul))
-        @test isa(B, BunchKaufman)
-        @test B.D == Tridiagonal([], [], [])
-        @test B.P == ones(0, 0)
-        @test B.p == []
-        if ul === :U
-            @test B.U == UnitUpperTriangular(ones(0, 0))
-            @test_throws ArgumentError B.L
-        else
-            @test B.L == UnitLowerTriangular(ones(0, 0))
-            @test_throws ArgumentError B.U
-        end
-    end
-end
-
-@testset "adjoint of BunchKaufman" begin
-    Ar = randn(5, 5)
-    Ar = Ar + Ar'
-    Actmp = complex.(randn(5, 5), randn(5, 5))
-    Ac1 = Actmp + Actmp'
-    Ac2 = Actmp + transpose(Actmp)
-    b = ones(size(Ar, 1))
-
-    F = bunchkaufman(Ar)
-    @test F\b == F'\b
-
-    F = bunchkaufman(Ac1)
-    @test F\b == F'\b
-
-    F = bunchkaufman(Ac2)
-    @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
-end
-
-@testset "BunchKaufman for AbstractMatrix" begin
-    S = SymTridiagonal(fill(2.0, 4), ones(3))
-    B = bunchkaufman(S)
-    @test B.U * B.D * B.U' ≈ S
-end
-
-end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
deleted file mode 100644
index a795eb8d44a03..0000000000000
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ /dev/null
@@ -1,551 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestCholesky
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted,
-    PosDefException, RankDeficientException, chkfullrank
-
-function unary_ops_tests(a, ca, tol; n=size(a, 1))
-    @test inv(ca)*a ≈ Matrix(I, n, n)
-    @test a*inv(ca) ≈ Matrix(I, n, n)
-    @test abs((det(ca) - det(a))/det(ca)) <= tol # Ad hoc, but statistically verified, revisit
-    @test logdet(ca) ≈ logdet(a)
-    @test logdet(ca) ≈ log(det(ca))  # logdet is less likely to overflow
-    logabsdet_ca = logabsdet(ca)
-    logabsdet_a = logabsdet(a)
-    @test logabsdet_ca[1] ≈ logabsdet_a[1]
-    @test logabsdet_ca[2] ≈ logabsdet_a[2]
-    @test isposdef(ca)
-    @test_throws ErrorException ca.Z
-    @test size(ca) == size(a)
-    @test Array(copy(ca)) ≈ a
-end
-
-function factor_recreation_tests(a_U, a_L)
-    c_U = cholesky(a_U)
-    c_L = cholesky(a_L)
-    cl  = c_L.U
-    ls = c_L.L
-    @test Array(c_U) ≈ Array(c_L) ≈ a_U
-    @test ls*ls' ≈ a_U
-    @test triu(c_U.factors) ≈ c_U.U
-    @test tril(c_L.factors) ≈ c_L.L
-    @test istriu(cl)
-    @test cl'cl ≈ a_U
-    @test cl'cl ≈ a_L
-end
-
-@testset "core functionality" begin
-    n = 10
-
-    # Split n into 2 parts for tests needing two matrices
-    n1 = div(n, 2)
-    n2 = 2*n1
-
-    Random.seed!(12344)
-
-    areal = randn(n,n)/2
-    aimg  = randn(n,n)/2
-    a2real = randn(n,n)/2
-    a2img  = randn(n,n)/2
-    breal = randn(n,2)/2
-    bimg  = randn(n,2)/2
-
-    for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-        a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-        a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-
-        ε = εa = eps(abs(float(one(eltya))))
-
-        # Test of symmetric pos. def. strided matrix
-        apd  = a'*a
-        @inferred cholesky(apd)
-        capd  = factorize(apd)
-        r     = capd.U
-        κ     = cond(apd, 1) #condition number
-
-        unary_ops_tests(apd, capd, ε*κ*n)
-        if eltya != Int
-            @test Factorization{eltya}(capd) === capd
-            if eltya <: Real
-                @test Array(Factorization{complex(eltya)}(capd)) ≈ Array(factorize(complex(apd)))
-                @test eltype(Factorization{complex(eltya)}(capd)) == complex(eltya)
-            end
-        end
-        @testset "throw for non-square input" begin
-            A = rand(eltya, 2, 3)
-            @test_throws DimensionMismatch cholesky(A)
-            @test_throws DimensionMismatch cholesky!(A)
-        end
-
-        #Test error bound on reconstruction of matrix: LAWNS 14, Lemma 2.1
-
-        #these tests were failing on 64-bit linux when inside the inner loop
-        #for eltya = ComplexF32 and eltyb = Int. The E[i,j] had NaN32 elements
-        #but only with Random.seed!(1234321) set before the loops.
-        E = abs.(apd - r'*r)
-        for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-        end
-        E = abs.(apd - Matrix(capd))
-        for i=1:n, j=1:n
-            @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-        end
-        @test LinearAlgebra.issuccess(capd)
-        @inferred(logdet(capd))
-
-        apos = apd[1,1]
-        @test all(x -> x ≈ √apos, cholesky(apos).factors)
-
-        # Test cholesky with Symmetric/Hermitian upper/lower
-        apds  = Symmetric(apd)
-        apdsL = Symmetric(apd, :L)
-        apdh  = Hermitian(apd)
-        apdhL = Hermitian(apd, :L)
-        if eltya <: Real
-            capds = cholesky(apds)
-            unary_ops_tests(apds, capds, ε*κ*n)
-            if eltya <: BlasReal
-                capds = cholesky!(copy(apds))
-                unary_ops_tests(apds, capds, ε*κ*n)
-            end
-            ulstring = sprint((t, s) -> show(t, "text/plain", s), capds.UL)
-            @test sprint((t, s) -> show(t, "text/plain", s), capds) == "$(typeof(capds))\nU factor:\n$ulstring"
-        else
-            capdh = cholesky(apdh)
-            unary_ops_tests(apdh, capdh, ε*κ*n)
-            capdh = cholesky!(copy(apdh))
-            unary_ops_tests(apdh, capdh, ε*κ*n)
-            capdh = cholesky!(copy(apd))
-            unary_ops_tests(apd, capdh, ε*κ*n)
-            ulstring = sprint((t, s) -> show(t, "text/plain", s), capdh.UL)
-            @test sprint((t, s) -> show(t, "text/plain", s), capdh) == "$(typeof(capdh))\nU factor:\n$ulstring"
-        end
-
-        # test cholesky of 2x2 Strang matrix
-        S = SymTridiagonal{eltya}([2, 2], [-1])
-        for uplo in (:U, :L)
-            @test Matrix(@inferred cholesky(Hermitian(S, uplo))) ≈ S
-            if eltya <: Real
-                @test Matrix(@inferred cholesky(Symmetric(S, uplo))) ≈ S
-            end
-        end
-        @test Matrix(cholesky(S).U) ≈ [2 -1; 0 sqrt(eltya(3))] / sqrt(eltya(2))
-        @test Matrix(cholesky(S)) ≈ S
-
-        # test extraction of factor and re-creating original matrix
-        if eltya <: Real
-            factor_recreation_tests(apds, apdsL)
-        else
-            factor_recreation_tests(apdh, apdhL)
-        end
-
-        #pivoted upper Cholesky
-        if eltya != BigFloat
-            cpapd = cholesky(apdh, RowMaximum())
-            unary_ops_tests(apdh, cpapd, ε*κ*n)
-            @test rank(cpapd) == n
-            @test all(diff(diag(real(cpapd.factors))).<=0.) # diagonal should be non-increasing
-
-            @test cpapd.P*cpapd.L*cpapd.U*cpapd.P' ≈ apd
-        end
-
-        for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-            b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-            εb = eps(abs(float(one(eltyb))))
-            ε = max(εa,εb)
-
-            for b in (b, view(b, 1:n, 1)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                x = capd\b
-                @test norm(x-apd\b,1)/norm(x,1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(apd*x-b,1)/norm(b,1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-
-                @test norm(a*(capd\(a'*b)) - b,1)/norm(b,1) <= ε*κ*n # Ad hoc, revisit
-
-                if eltya != BigFloat && eltyb != BigFloat
-                    lapd = cholesky(apdhL)
-                    @test norm(apd * (lapd\b) - b)/norm(b) <= ε*κ*n
-                    @test norm(apd * (lapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
-
-                if eltya != BigFloat && eltyb != BigFloat # Note! Need to implement pivoted Cholesky decomposition in julia
-
-                    cpapd = cholesky(apdh, RowMaximum())
-                    @test norm(apd * (cpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (cpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-
-                    lpapd = cholesky(apdhL, RowMaximum())
-                    @test norm(apd * (lpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
-                    @test norm(apd * (lpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
-                end
-            end
-        end
-
-        for eltyb in (Float64, ComplexF64)
-            Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
-            Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = (eltya <: Complex || eltyb <: Complex) ? complex.(Breal, Bimg) : Breal
-            εb = eps(abs(float(one(eltyb))))
-            ε = max(εa,εb)
-
-            for B in (B, view(B, 1:n, 1:n)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                BB = copy(B)
-                ldiv!(capd, BB)
-                @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(apdh, RowMaximum())
-                    BB = copy(B)
-                    ldiv!(cpapd, BB)
-                    @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
-            end
-        end
-
-        @testset "solve with generic Cholesky" begin
-            Breal = convert(Matrix{BigFloat}, randn(n,n)/2)
-            Bimg  = convert(Matrix{BigFloat}, randn(n,n)/2)
-            B = eltya <: Complex ? complex.(Breal, Bimg) : Breal
-            εb = eps(abs(float(one(eltype(B)))))
-            ε = max(εa,εb)
-
-            for B in (B, view(B, 1:n, 1:n)) # Array and SubArray
-
-                # Test error bound on linear solver: LAWNS 14, Theorem 2.1
-                # This is a surprisingly loose bound
-                cpapd = cholesky(eltya <: Complex ? apdh : apds)
-                BB = copy(B)
-                rdiv!(BB, cpapd)
-                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                cpapd = cholesky(eltya <: Complex ? apdhL : apdsL)
-                BB = copy(B)
-                rdiv!(BB, cpapd)
-                @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                if eltya != BigFloat
-                    cpapd = cholesky(eltya <: Complex ? apdh : apds, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    cpapd = cholesky(eltya <: Complex ? apdhL : apdsL, RowMaximum())
-                    BB = copy(B)
-                    rdiv!(BB, cpapd)
-                    @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                end
-            end
-        end
-        if eltya <: BlasFloat
-            @testset "generic cholesky!" begin
-                if eltya <: Complex
-                    A = complex.(randn(5,5), randn(5,5))
-                else
-                    A = randn(5,5)
-                end
-                A = convert(Matrix{eltya}, A'A)
-                @test Matrix(cholesky(A).L) ≈ Matrix(invoke(LinearAlgebra._chol!, Tuple{AbstractMatrix, Type{LowerTriangular}}, copy(A), LowerTriangular)[1])
-                @test Matrix(cholesky(A).U) ≈ Matrix(invoke(LinearAlgebra._chol!, Tuple{AbstractMatrix, Type{UpperTriangular}}, copy(A), UpperTriangular)[1])
-            end
-        end
-    end
-end
-
-@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64, BigFloat)
-    A = T[1 2; 2 1]
-    B = T[1 2; 0 1]
-    C = T[2 0; 0 0]
-    # check = (true|false)
-    for M in (A, Hermitian(A), B, C)
-        @test_throws PosDefException cholesky(M)
-        @test_throws PosDefException cholesky!(copy(M))
-        @test_throws PosDefException cholesky(M; check = true)
-        @test_throws PosDefException cholesky!(copy(M); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M; check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
-    end
-    if T !== BigFloat # generic pivoted cholesky is not implemented
-        for M in (A, Hermitian(A), B)
-            @test_throws RankDeficientException cholesky(M, RowMaximum())
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
-            @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
-            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
-            @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
-            @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
-            C = cholesky(M, RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-            C = cholesky!(copy(M), RowMaximum(); check = false)
-            @test_throws RankDeficientException chkfullrank(C)
-        end
-    end
-    @test !isposdef(A)
-    str = sprint((io, x) -> show(io, "text/plain", x), cholesky(A; check = false))
-end
-
-@testset "Cholesky factor of Matrix with non-commutative elements, here 2x2-matrices" begin
-    X = Matrix{Float64}[0.1*rand(2,2) for i in 1:3, j = 1:3]
-    L = Matrix(LinearAlgebra._chol!(X*X', LowerTriangular)[1])
-    U = Matrix(LinearAlgebra._chol!(X*X', UpperTriangular)[1])
-    XX = Matrix(X*X')
-
-    @test sum(sum(norm, L*L' - XX)) < eps()
-    @test sum(sum(norm, U'*U - XX)) < eps()
-end
-
-@testset "Non-strided Cholesky solves" begin
-    B = randn(5, 5)
-    v = rand(5)
-    @test cholesky(Diagonal(v)) \ B ≈ Diagonal(v) \ B
-    @test B / cholesky(Diagonal(v)) ≈ B / Diagonal(v)
-    @test inv(cholesky(Diagonal(v)))::Diagonal ≈ Diagonal(1 ./ v)
-end
-
-struct WrappedVector{T} <: AbstractVector{T}
-    data::Vector{T}
-end
-Base.copy(v::WrappedVector) = WrappedVector(copy(v.data))
-Base.size(v::WrappedVector) = size(v.data)
-Base.getindex(v::WrappedVector, i::Integer) = getindex(v.data, i)
-Base.setindex!(v::WrappedVector, val, i::Integer) = setindex!(v.data, val, i)
-
-@testset "cholesky up- and downdates" begin
-    A = complex.(randn(10,5), randn(10, 5))
-    v = complex.(randn(5), randn(5))
-    w = WrappedVector(v)
-    for uplo in (:U, :L)
-        AcA = A'*A
-        BcB = AcA + v*v'
-        BcB = (BcB + BcB')/2
-        F = cholesky(Hermitian(AcA, uplo))
-        G = cholesky(Hermitian(BcB, uplo))
-        @test getproperty(lowrankupdate(F, v), uplo) ≈ getproperty(G, uplo)
-        @test getproperty(lowrankupdate(F, w), uplo) ≈ getproperty(G, uplo)
-        @test_throws DimensionMismatch lowrankupdate(F, Vector{eltype(v)}(undef,length(v)+1))
-        @test getproperty(lowrankdowndate(G, v), uplo) ≈ getproperty(F, uplo)
-        @test getproperty(lowrankdowndate(G, w), uplo) ≈ getproperty(F, uplo)
-        @test_throws DimensionMismatch lowrankdowndate(G, Vector{eltype(v)}(undef,length(v)+1))
-    end
-end
-
-@testset "issue #13243, unexpected nans in complex cholesky" begin
-    apd = [5.8525753f0 + 0.0f0im -0.79540455f0 + 0.7066077f0im 0.98274714f0 + 1.3824869f0im 2.619998f0 + 1.8532984f0im -1.8306153f0 - 1.2336911f0im 0.32275113f0 + 0.015575029f0im 2.1968813f0 + 1.0640624f0im 0.27894387f0 + 0.97911835f0im 3.0476584f0 + 0.18548489f0im 0.3842994f0 + 0.7050991f0im
-        -0.79540455f0 - 0.7066077f0im 8.313246f0 + 0.0f0im -1.8076122f0 - 0.8882447f0im 0.47806996f0 + 0.48494184f0im 0.5096429f0 - 0.5395974f0im -0.7285097f0 - 0.10360408f0im -1.1760061f0 - 2.7146957f0im -0.4271084f0 + 0.042899966f0im -1.7228563f0 + 2.8335886f0im 1.8942566f0 + 0.6389735f0im
-        0.98274714f0 - 1.3824869f0im -1.8076122f0 + 0.8882447f0im 9.367975f0 + 0.0f0im -0.1838578f0 + 0.6468568f0im -1.8338387f0 + 0.7064959f0im 0.041852742f0 - 0.6556877f0im 2.5673025f0 + 1.9732997f0im -1.1148382f0 - 0.15693812f0im 2.4704504f0 - 1.0389464f0im 1.0858271f0 - 1.298006f0im
-        2.619998f0 - 1.8532984f0im 0.47806996f0 - 0.48494184f0im -0.1838578f0 - 0.6468568f0im 3.1117508f0 + 0.0f0im -1.956626f0 + 0.22825956f0im 0.07081801f0 - 0.31801307f0im 0.3698375f0 - 0.5400855f0im 0.80686307f0 + 1.5315914f0im 1.5649154f0 - 1.6229297f0im -0.112077385f0 + 1.2014246f0im
-        -1.8306153f0 + 1.2336911f0im 0.5096429f0 + 0.5395974f0im -1.8338387f0 - 0.7064959f0im -1.956626f0 - 0.22825956f0im 3.6439795f0 + 0.0f0im -0.2594722f0 + 0.48786148f0im -0.47636223f0 - 0.27821827f0im -0.61608654f0 - 2.01858f0im -2.7767487f0 + 1.7693765f0im 0.048102796f0 - 0.9741874f0im
-        0.32275113f0 - 0.015575029f0im -0.7285097f0 + 0.10360408f0im 0.041852742f0 + 0.6556877f0im 0.07081801f0 + 0.31801307f0im -0.2594722f0 - 0.48786148f0im 3.624376f0 + 0.0f0im -1.6697118f0 + 0.4017511f0im -1.4397877f0 - 0.7550918f0im -0.31456697f0 - 1.0403451f0im -0.31978557f0 + 0.13701046f0im
-        2.1968813f0 - 1.0640624f0im -1.1760061f0 + 2.7146957f0im 2.5673025f0 - 1.9732997f0im 0.3698375f0 + 0.5400855f0im -0.47636223f0 + 0.27821827f0im -1.6697118f0 - 0.4017511f0im 6.8273163f0 + 0.0f0im -0.10051322f0 + 0.24303961f0im 1.4415971f0 + 0.29750675f0im 1.221786f0 - 0.85654986f0im
-        0.27894387f0 - 0.97911835f0im -0.4271084f0 - 0.042899966f0im -1.1148382f0 + 0.15693812f0im 0.80686307f0 - 1.5315914f0im -0.61608654f0 + 2.01858f0im -1.4397877f0 + 0.7550918f0im -0.10051322f0 - 0.24303961f0im 3.4057708f0 + 0.0f0im -0.5856801f0 - 1.0203559f0im 0.7103452f0 + 0.8422135f0im
-        3.0476584f0 - 0.18548489f0im -1.7228563f0 - 2.8335886f0im 2.4704504f0 + 1.0389464f0im 1.5649154f0 + 1.6229297f0im -2.7767487f0 - 1.7693765f0im -0.31456697f0 + 1.0403451f0im 1.4415971f0 - 0.29750675f0im -0.5856801f0 + 1.0203559f0im 7.005772f0 + 0.0f0im -0.9617417f0 - 1.2486815f0im
-        0.3842994f0 - 0.7050991f0im 1.8942566f0 - 0.6389735f0im 1.0858271f0 + 1.298006f0im -0.112077385f0 - 1.2014246f0im 0.048102796f0 + 0.9741874f0im -0.31978557f0 - 0.13701046f0im 1.221786f0 + 0.85654986f0im 0.7103452f0 - 0.8422135f0im -0.9617417f0 + 1.2486815f0im 3.4629636f0 + 0.0f0im]
-    b = [-0.905011814118756 + 0.2847570854574069im -0.7122162951294634 - 0.630289556702497im
-        -0.7620356655676837 + 0.15533508334193666im 0.39947219167701153 - 0.4576746001199889im
-        -0.21782716937787788 - 0.9222220085490986im -0.727775859267237 + 0.50638268521728im
-        -1.0509472322215125 + 0.5022165705328413im -0.7264975746431271 + 0.31670415674097235im
-        -0.6650468984506477 - 0.5000967284800251im -0.023682508769195098 + 0.18093440285319276im
-        -0.20604111555491242 + 0.10570814584017311im 0.562377322638969 - 0.2578030745663871im
-        -0.3451346708401685 + 1.076948486041297im 0.9870834574024372 - 0.2825689605519449im
-        0.25336108035924787 + 0.975317836492159im 0.0628393808469436 - 0.1253397353973715im
-        0.11192755545114 - 0.1603741874112385im 0.8439562576196216 + 1.0850814110398734im
-        -1.0568488936791578 - 0.06025820467086475im 0.12696236014017806 - 0.09853584666755086im]
-    cholesky(Hermitian(apd, :L), RowMaximum()) \ b
-    r = factorize(apd).U
-    E = abs.(apd - r'*r)
-    ε = eps(abs(float(one(ComplexF32))))
-    n = 10
-    for i=1:n, j=1:n
-        @test E[i,j] <= (n+1)ε/(1-(n+1)ε)*real(sqrt(apd[i,i]*apd[j,j]))
-    end
-end
-
-@testset "fail for non-BLAS element types" begin
-    @test_throws ArgumentError cholesky!(Hermitian(rand(Float16, 5,5)), RowMaximum())
-end
-
-@testset "cholesky Diagonal" begin
-    # real
-    d = abs.(randn(3)) .+ 0.1
-    D = Diagonal(d)
-    CD = cholesky(D)
-    CM = cholesky(Matrix(D))
-    @test CD isa Cholesky{Float64}
-    @test CD.U ≈ Diagonal(.√d) ≈ CM.U
-    @test D ≈ CD.L * CD.U
-    @test CD.info == 0
-
-    F = cholesky(Hermitian(I(3)))
-    @test F isa Cholesky{Float64,<:Diagonal}
-    @test Matrix(F) ≈ I(3)
-
-    # real, failing
-    @test_throws PosDefException cholesky(Diagonal([1.0, -2.0]))
-    Dnpd = cholesky(Diagonal([1.0, -2.0]); check = false)
-    @test Dnpd.info == 2
-
-    # complex
-    D = complex(D)
-    CD = cholesky(Hermitian(D))
-    CM = cholesky(Matrix(Hermitian(D)))
-    @test CD isa Cholesky{ComplexF64,<:Diagonal}
-    @test CD.U ≈ Diagonal(.√d) ≈ CM.U
-    @test D ≈ CD.L * CD.U
-    @test CD.info == 0
-
-    # complex, failing
-    D[2, 2] = 0.0 + 0im
-    @test_throws PosDefException cholesky(D)
-    Dnpd = cholesky(D; check = false)
-    @test Dnpd.info == 2
-
-    # InexactError for Int
-    @test_throws InexactError cholesky!(Diagonal([2, 1]))
-end
-
-@testset "Cholesky for AbstractMatrix" begin
-    S = SymTridiagonal(fill(2.0, 4), ones(3))
-    C = cholesky(S)
-    @test C.L * C.U ≈ S
-end
-
-@testset "constructor with non-BlasInt arguments" begin
-
-    x = rand(5,5)
-    chol = cholesky(x'x)
-
-    factors, uplo, info = chol.factors, chol.uplo, chol.info
-
-    @test Cholesky(factors, uplo, Int32(info)) == chol
-    @test Cholesky(factors, uplo, Int64(info)) == chol
-
-    cholp = cholesky(x'x, RowMaximum())
-
-    factors, uplo, piv, rank, tol, info =
-        cholp.factors, cholp.uplo, cholp.piv, cholp.rank, cholp.tol, cholp.info
-
-    @test CholeskyPivoted(factors, uplo, piv, Int32(rank), tol, info) == cholp
-    @test CholeskyPivoted(factors, uplo, piv, Int64(rank), tol, info) == cholp
-
-    @test CholeskyPivoted(factors, uplo, piv, rank, tol, Int32(info)) == cholp
-    @test CholeskyPivoted(factors, uplo, piv, rank, tol, Int64(info)) == cholp
-
-end
-
-@testset "issue #33704, casting low-rank CholeskyPivoted to Matrix" begin
-    A = randn(1,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    @test B ≈ Matrix(C)
-end
-
-@testset "CholeskyPivoted and Factorization" begin
-    A = randn(8,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    @test CholeskyPivoted{eltype(C)}(C) === C
-    @test Factorization{eltype(C)}(C) === C
-    @test Array(CholeskyPivoted{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
-    @test Array(Factorization{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
-    @test eltype(Factorization{complex(eltype(C))}(C)) == complex(eltype(C))
-end
-
-@testset "REPL printing of CholeskyPivoted" begin
-    A = randn(8,8)
-    B = A'A
-    C = cholesky(B, RowMaximum(), check=false)
-    cholstring = sprint((t, s) -> show(t, "text/plain", s), C)
-    rankstring = "$(C.uplo) factor with rank $(rank(C)):"
-    factorstring = sprint((t, s) -> show(t, "text/plain", s), C.uplo == 'U' ? C.U : C.L)
-    permstring   = sprint((t, s) -> show(t, "text/plain", s), C.p)
-    @test cholstring == "$(summary(C))\n$rankstring\n$factorstring\npermutation:\n$permstring"
-end
-
-@testset "destructuring for Cholesky[Pivoted]" begin
-    for val in (NoPivot(), RowMaximum())
-        A = rand(8, 8)
-        B = A'A
-        C = cholesky(B, val, check=false)
-        l, u = C
-        @test l == C.L
-        @test u == C.U
-    end
-end
-
-@testset "issue #37356, diagonal elements of hermitian generic matrix" begin
-    B = Hermitian(hcat([one(BigFloat) + im]))
-    @test Matrix(cholesky(B)) ≈ B
-    C = Hermitian(hcat([one(BigFloat) + im]), :L)
-    @test Matrix(cholesky(C)) ≈ C
-end
-
-@testset "constructing a Cholesky factor from a triangular matrix" begin
-    A = [1.0 2.0; 3.0 4.0]
-    let
-        U = UpperTriangular(A)
-        C = Cholesky(U)
-        @test C isa Cholesky{Float64}
-        @test C.U == U
-        @test C.L == U'
-    end
-    let
-        L = LowerTriangular(A)
-        C = Cholesky(L)
-        @test C isa Cholesky{Float64}
-        @test C.L == L
-        @test C.U == L'
-    end
-end
-
-@testset "adjoint of Cholesky" begin
-    A = randn(5, 5)
-    A = A'A
-    F = cholesky(A)
-    b = ones(size(A, 1))
-    @test F\b == F'\b
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = cholesky(A)
-    B32 = cholesky(Float32.(A))
-    @test B isa Cholesky{Float16, Matrix{Float16}}
-    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
-    @test B.UL isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.U ≈ B32.U
-    @test B.L ≈ B32.L
-    @test B.UL ≈ B32.UL
-    @test Matrix(B) ≈ A
-    B = cholesky(A, RowMaximum())
-    B32 = cholesky(Float32.(A), RowMaximum())
-    @test B isa CholeskyPivoted{Float16,Matrix{Float16}}
-    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
-    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
-    @test B.U ≈ B32.U
-    @test B.L ≈ B32.L
-    @test Matrix(B) ≈ A
-end
-
-@testset "det and logdet" begin
-    A = [4083 3825 5876 2048 4470 5490;
-         3825 3575 5520 1920 4200 5140;
-         5876 5520 8427 2940 6410 7903;
-         2048 1920 2940 1008 2240 2740;
-         4470 4200 6410 2240 4875 6015;
-         5490 5140 7903 2740 6015 7370]
-    B = cholesky(A, RowMaximum(), check=false)
-    @test det(B)  ==  0.0
-    @test det(B)  ≈  det(A) atol=eps()
-    @test logdet(B)  ==  -Inf
-    @test logabsdet(B)[1] == -Inf
- end
-
-end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
deleted file mode 100644
index efeedf93ebd1f..0000000000000
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ /dev/null
@@ -1,1232 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestDense
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal
-
-@testset "Check that non-floats are correctly promoted" begin
-    @test [1 0 0; 0 1 0]\[1,1] ≈ [1;1;0]
-end
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234323)
-
-@testset "Matrix condition number" begin
-    ainit = rand(n, n)
-    @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        ainit = convert(Matrix{elty}, ainit)
-        for a in (copy(ainit), view(ainit, 1:n, 1:n))
-            ainv = inv(a)
-            @test cond(a, 1)   == opnorm(a, 1)  *opnorm(ainv, 1)
-            @test cond(a, Inf) == opnorm(a, Inf)*opnorm(ainv, Inf)
-            @test cond(a[:, 1:5]) == (\)(extrema(svdvals(a[:, 1:5]))...)
-            @test_throws ArgumentError cond(a,3)
-        end
-    end
-    @testset "Singular matrices" for p in (1, 2, Inf)
-        @test cond(zeros(Int, 2, 2), p) == Inf
-        @test cond(zeros(2, 2), p)      == Inf
-        @test cond([0 0; 1 1], p)       == Inf
-        @test cond([0. 0.; 1. 1.], p)   == Inf
-    end
-    @testset "Issue #33547, condition number of 2x2 matrix" begin
-        M = [1.0 -2.0
-            -2.0 -1.5]
-        @test cond(M, 1) ≈ 2.227272727272727
-    end
-    @testset "Condition numbers of a non-random matrix" begin
-        # To ensure that we detect any regressions in the underlying functions
-        Mars= [11  24   7  20   3
-                4  12  25   8  16
-               17   5  13  21   9
-               10  18   1  14  22
-               23   6  19   2  15]
-        @test cond(Mars, 1)   ≈ 7.1
-        @test cond(Mars, 2)   ≈ 6.181867355918493
-        @test cond(Mars, Inf) ≈ 7.1
-    end
-end
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-@testset "For A containing $eltya" for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    ainit = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    ainit2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    ε = εa = eps(abs(float(one(eltya))))
-
-    apd  = ainit'*ainit # symmetric positive-definite
-    @testset "Positive definiteness" begin
-        @test !isposdef(ainit)
-        @test isposdef(apd)
-        if eltya != Int # cannot perform cholesky! for Matrix{Int}
-            @test !isposdef!(copy(ainit))
-            @test isposdef!(copy(apd))
-        end
-    end
-    @testset "For b containing $eltyb" for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        binit = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-        for (a, b) in ((copy(ainit), copy(binit)), (view(ainit, 1:n, 1:n), view(binit, 1:n, 1:2)))
-            @testset "Solve square general system of equations" begin
-                κ = cond(a,1)
-                x = a \ b
-                @test_throws DimensionMismatch b'\b
-                @test_throws DimensionMismatch b\b'
-                @test norm(a*x - b, 1)/norm(b) < ε*κ*n*2 # Ad hoc, revisit!
-                @test zeros(eltya,n)\fill(eltya(1),n) ≈ (zeros(eltya,n,1)\fill(eltya(1),n,1))[1,1]
-            end
-
-            @testset "Test nullspace" begin
-                a15null = nullspace(a[:,1:n1]')
-                @test rank([a[:,1:n1] a15null]) == 10
-                @test norm(a[:,1:n1]'a15null,Inf) ≈ zero(eltya) atol=300ε
-                @test norm(a15null'a[:,1:n1],Inf) ≈ zero(eltya) atol=400ε
-                @test size(nullspace(b), 2) == 0
-                @test size(nullspace(b, rtol=0.001), 2) == 0
-                @test size(nullspace(b, atol=100*εb), 2) == 0
-                @test size(nullspace(b, 100*εb), 2) == 0
-                @test nullspace(zeros(eltya,n)) == Matrix(I, 1, 1)
-                @test nullspace(zeros(eltya,n), 0.1) == Matrix(I, 1, 1)
-                # test empty cases
-                @test @inferred(nullspace(zeros(n, 0))) == Matrix(I, 0, 0)
-                @test @inferred(nullspace(zeros(0, n))) == Matrix(I, n, n)
-                # test vector cases
-                @test size(@inferred nullspace(a[:, 1])) == (1, 0)
-                @test size(@inferred nullspace(zero(a[:, 1]))) == (1, 1)
-                @test nullspace(zero(a[:, 1]))[1,1] == 1
-                # test adjortrans vectors, including empty ones
-                @test size(@inferred nullspace(a[:, 1]')) == (n, n - 1)
-                @test @inferred(nullspace(a[1:0, 1]')) == Matrix(I, 0, 0)
-                @test size(@inferred nullspace(b[1, :]')) == (2, 1)
-                @test @inferred(nullspace(b[1, 1:0]')) == Matrix(I, 0, 0)
-                @test size(@inferred nullspace(transpose(a[:, 1]))) == (n, n - 1)
-                @test size(@inferred nullspace(transpose(b[1, :]))) == (2, 1)
-            end
-        end
-    end # for eltyb
-
-    for (a, a2) in ((copy(ainit), copy(ainit2)), (view(ainit, 1:n, 1:n), view(ainit2, 1:n, 1:n)))
-        @testset "Test pinv" begin
-            pinva15 = pinv(a[:,1:n1])
-            @test a[:,1:n1]*pinva15*a[:,1:n1] ≈ a[:,1:n1]
-            @test pinva15*a[:,1:n1]*pinva15 ≈ pinva15
-            pinva15 = pinv(a[:,1:n1]') # the Adjoint case
-            @test a[:,1:n1]'*pinva15*a[:,1:n1]' ≈ a[:,1:n1]'
-            @test pinva15*a[:,1:n1]'*pinva15 ≈ pinva15
-
-            @test size(pinv(Matrix{eltya}(undef,0,0))) == (0,0)
-        end
-
-        @testset "Lyapunov/Sylvester" begin
-            x = lyap(a, a2)
-            @test -a2 ≈ a*x + x*a'
-            y = lyap(a', a2')
-            @test y ≈ lyap(Array(a'), Array(a2'))
-            @test -a2' ≈ a'y + y*a
-            z = lyap(Tridiagonal(a)', Diagonal(a2))
-            @test z ≈ lyap(Array(Tridiagonal(a)'), Array(Diagonal(a2)))
-            @test -Diagonal(a2) ≈ Tridiagonal(a)'*z + z*Tridiagonal(a)
-            x2 = sylvester(a[1:3, 1:3], a[4:n, 4:n], a2[1:3,4:n])
-            @test -a2[1:3, 4:n] ≈ a[1:3, 1:3]*x2 + x2*a[4:n, 4:n]
-            y2 = sylvester(a[1:3, 1:3]', a[4:n, 4:n]', a2[4:n,1:3]')
-            @test y2 ≈ sylvester(Array(a[1:3, 1:3]'), Array(a[4:n, 4:n]'), Array(a2[4:n,1:3]'))
-            @test -a2[4:n, 1:3]' ≈ a[1:3, 1:3]'*y2 + y2*a[4:n, 4:n]'
-            z2 = sylvester(Tridiagonal(a[1:3, 1:3]), Diagonal(a[4:n, 4:n]), a2[1:3,4:n])
-            @test z2 ≈ sylvester(Array(Tridiagonal(a[1:3, 1:3])), Array(Diagonal(a[4:n, 4:n])), Array(a2[1:3,4:n]))
-            @test -a2[1:3, 4:n] ≈ Tridiagonal(a[1:3, 1:3])*z2 + z2*Diagonal(a[4:n, 4:n])
-        end
-
-        @testset "Matrix square root" begin
-            asq = sqrt(a)
-            @test asq*asq ≈ a
-            @test sqrt(transpose(a))*sqrt(transpose(a)) ≈ transpose(a)
-            @test sqrt(adjoint(a))*sqrt(adjoint(a)) ≈ adjoint(a)
-            asym = a + a' # symmetric indefinite
-            asymsq = sqrt(asym)
-            @test asymsq*asymsq ≈ asym
-            @test sqrt(transpose(asym))*sqrt(transpose(asym)) ≈ transpose(asym)
-            @test sqrt(adjoint(asym))*sqrt(adjoint(asym)) ≈ adjoint(asym)
-            if eltype(a) <: Real  # real square root
-                apos = a * a
-                @test sqrt(apos)^2 ≈ apos
-                @test eltype(sqrt(apos)) <: Real
-                # test that real but Complex input produces Complex output
-                @test sqrt(complex(apos)) ≈ sqrt(apos)
-                @test eltype(sqrt(complex(apos))) <: Complex
-            end
-        end
-
-        @testset "Powers" begin
-            if eltya <: AbstractFloat
-                z = zero(eltya)
-                t = convert(eltya,2)
-                r = convert(eltya,2.5)
-                @test a^z ≈ Matrix(I, size(a))
-                @test a^t ≈ a^2
-                @test Matrix{eltya}(I, n, n)^r ≈ Matrix(I, size(a))
-            end
-        end
-    end # end for loop over arraytype
-
-    @testset "Factorize" begin
-        d = rand(eltya,n)
-        e = rand(eltya,n-1)
-        e2 = rand(eltya,n-1)
-        f = rand(eltya,n-2)
-        A = diagm(0 => d)
-        @test factorize(A) == Diagonal(d)
-        A += diagm(-1 => e)
-        @test factorize(A) == Bidiagonal(d,e,:L)
-        A += diagm(-2 => f)
-        @test factorize(A) == LowerTriangular(A)
-        A = diagm(0 => d, 1 => e)
-        @test factorize(A) == Bidiagonal(d,e,:U)
-        if eltya <: Real
-            A = diagm(0 => d, 1 => e, -1 => e)
-            @test Matrix(factorize(A)) ≈ Matrix(factorize(SymTridiagonal(d,e)))
-            A = diagm(0 => d, 1 => e, -1 => e, 2 => f, -2 => f)
-            @test inv(factorize(A)) ≈ inv(factorize(Symmetric(A)))
-        end
-        A = diagm(0 => d, 1 => e, -1 => e2)
-        @test Matrix(factorize(A)) ≈ Matrix(factorize(Tridiagonal(e2,d,e)))
-        A = diagm(0 => d, 1 => e, 2 => f)
-        @test factorize(A) == UpperTriangular(A)
-
-        x = rand(eltya)
-        @test factorize(x) == x
-    end
-end # for eltya
-
-@testset "Test diagm for vectors" begin
-    @test diagm(zeros(50)) == diagm(0 => zeros(50))
-    @test diagm(ones(50)) == diagm(0 => ones(50))
-    v = randn(500)
-    @test diagm(v) == diagm(0 => v)
-    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
-end
-
-@testset "Non-square diagm" begin
-    x = [7, 8]
-    for m=1:4, n=2:4
-        if m < 2 || n < 3
-            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
-            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
-        else
-            M = zeros(m,n)
-            M[1:2,1:3] = [7 7 0; 0 8 8]
-            @test diagm(m,n, 0 => x,  1 => x) == M
-            @test diagm(n,m, 0 => x,  -1 => x) == M'
-        end
-    end
-end
-
-@testset "Test pinv (rtol, atol)" begin
-    M = [1 0 0; 0 1 0; 0 0 0]
-    @test pinv(M,atol=1)== zeros(3,3)
-    @test pinv(M,rtol=0.5)== M
-end
-
-@testset "Test inv of matrix of NaNs" begin
-    for eltya in (NaN16, NaN32, NaN32)
-        r = fill(eltya, 2, 2)
-        @test_throws ArgumentError inv(r)
-        c = fill(complex(eltya, eltya), 2, 2)
-        @test_throws ArgumentError inv(c)
-    end
-end
-
-@testset "test out of bounds triu/tril" begin
-    local m, n = 5, 7
-    ainit = rand(m, n)
-    for a in (copy(ainit), view(ainit, 1:m, 1:n))
-        @test triu(a, -m) == a
-        @test triu(a, n + 2) == zero(a)
-        @test tril(a, -m - 2) == zero(a)
-        @test tril(a, n) == a
-    end
-end
-
-@testset "triu M > N case bug fix" begin
-    mat=[1 2;
-         3 4;
-         5 6;
-         7 8]
-    res=[1 2;
-         3 4;
-         0 6;
-         0 0]
-    @test triu(mat, -1) == res
-end
-
-@testset "Tests norms" begin
-    nnorm = 10
-    mmat = 10
-    nmat = 8
-    @testset "For $elty" for elty in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int32, Int64, BigInt)
-        x = fill(elty(1),10)
-        @testset "Vector" begin
-            xs = view(x,1:2:10)
-            @test norm(x, -Inf) ≈ 1
-            @test norm(x, -1) ≈ 1/10
-            @test norm(x, 0) ≈ 10
-            @test norm(x, 1) ≈ 10
-            @test norm(x, 2) ≈ sqrt(10)
-            @test norm(x, 3) ≈ cbrt(10)
-            @test norm(x, Inf) ≈ 1
-            if elty <: LinearAlgebra.BlasFloat
-                @test norm(x, 1:4) ≈ 2
-                @test_throws BoundsError norm(x,-1:4)
-                @test_throws BoundsError norm(x,1:11)
-            end
-            @test norm(xs, -Inf) ≈ 1
-            @test norm(xs, -1) ≈ 1/5
-            @test norm(xs, 0) ≈ 5
-            @test norm(xs, 1) ≈ 5
-            @test norm(xs, 2) ≈ sqrt(5)
-            @test norm(xs, 3) ≈ cbrt(5)
-            @test norm(xs, Inf) ≈ 1
-        end
-
-        @testset "Issue #12552:" begin
-            if real(elty) <: AbstractFloat
-                for p in [-Inf,-1,1,2,3,Inf]
-                    @test isnan(norm(elty[0,NaN],p))
-                    @test isnan(norm(elty[NaN,0],p))
-                end
-            end
-        end
-
-        @testset "Number" begin
-            norm(x[1:1]) === norm(x[1], -Inf)
-            norm(x[1:1]) === norm(x[1], 0)
-            norm(x[1:1]) === norm(x[1], 1)
-            norm(x[1:1]) === norm(x[1], 2)
-            norm(x[1:1]) === norm(x[1], Inf)
-        end
-
-        @testset "Absolute homogeneity, triangle inequality, & vectorized versions" begin
-            for i = 1:10
-                xinit = elty <: Integer ? convert(Vector{elty}, rand(1:10, nnorm)) :
-                        elty <: Complex ? convert(Vector{elty}, complex.(randn(nnorm), randn(nnorm))) :
-                        convert(Vector{elty}, randn(nnorm))
-                yinit = elty <: Integer ? convert(Vector{elty}, rand(1:10, nnorm)) :
-                        elty <: Complex ? convert(Vector{elty}, complex.(randn(nnorm), randn(nnorm))) :
-                        convert(Vector{elty}, randn(nnorm))
-                α = elty <: Integer ? randn() :
-                    elty <: Complex ? convert(elty, complex(randn(),randn())) :
-                    convert(elty, randn())
-                for (x, y) in ((copy(xinit), copy(yinit)), (view(xinit,1:2:nnorm), view(yinit,1:2:nnorm)))
-                    # Absolute homogeneity
-                    @test norm(α*x,-Inf) ≈ abs(α)*norm(x,-Inf)
-                    @test norm(α*x,-1) ≈ abs(α)*norm(x,-1)
-                    @test norm(α*x,1) ≈ abs(α)*norm(x,1)
-                    @test norm(α*x) ≈ abs(α)*norm(x) # two is default
-                    @test norm(α*x,3) ≈ abs(α)*norm(x,3)
-                    @test norm(α*x,Inf) ≈ abs(α)*norm(x,Inf)
-
-                    # Triangle inequality
-                    @test norm(x + y,1) <= norm(x,1) + norm(y,1)
-                    @test norm(x + y) <= norm(x) + norm(y) # two is default
-                    @test norm(x + y,3) <= norm(x,3) + norm(y,3)
-                    @test norm(x + y,Inf) <= norm(x,Inf) + norm(y,Inf)
-
-                    # Against vectorized versions
-                    @test norm(x,-Inf) ≈ minimum(abs.(x))
-                    @test norm(x,-1) ≈ inv(sum(1 ./ abs.(x)))
-                    @test norm(x,0) ≈ sum(x .!= 0)
-                    @test norm(x,1) ≈ sum(abs.(x))
-                    @test norm(x) ≈ sqrt(sum(abs2.(x)))
-                    @test norm(x,3) ≈ cbrt(sum(abs.(x).^3.))
-                    @test norm(x,Inf) ≈ maximum(abs.(x))
-                end
-            end
-        end
-
-        @testset "Matrix (Operator) opnorm" begin
-            A = fill(elty(1),10,10)
-            As = view(A,1:5,1:5)
-            @test opnorm(A, 1) ≈ 10
-            elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test opnorm(A, 2) ≈ 10
-            @test opnorm(A, Inf) ≈ 10
-            @test opnorm(As, 1) ≈ 5
-            elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test opnorm(As, 2) ≈ 5
-            @test opnorm(As, Inf) ≈ 5
-        end
-
-        @testset "Absolute homogeneity, triangle inequality, & norm" begin
-            for i = 1:10
-                Ainit = elty <: Integer ? convert(Matrix{elty}, rand(1:10, mmat, nmat)) :
-                        elty <: Complex ? convert(Matrix{elty}, complex.(randn(mmat, nmat), randn(mmat, nmat))) :
-                        convert(Matrix{elty}, randn(mmat, nmat))
-                Binit = elty <: Integer ? convert(Matrix{elty}, rand(1:10, mmat, nmat)) :
-                        elty <: Complex ? convert(Matrix{elty}, complex.(randn(mmat, nmat), randn(mmat, nmat))) :
-                        convert(Matrix{elty}, randn(mmat, nmat))
-                α = elty <: Integer ? randn() :
-                    elty <: Complex ? convert(elty, complex(randn(),randn())) :
-                    convert(elty, randn())
-                for (A, B) in ((copy(Ainit), copy(Binit)), (view(Ainit,1:nmat,1:nmat), view(Binit,1:nmat,1:nmat)))
-                    # Absolute homogeneity
-                    @test norm(α*A,1) ≈ abs(α)*norm(A,1)
-                    elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test norm(α*A) ≈ abs(α)*norm(A) # two is default
-                    @test norm(α*A,Inf) ≈ abs(α)*norm(A,Inf)
-
-                    # Triangle inequality
-                    @test norm(A + B,1) <= norm(A,1) + norm(B,1)
-                    elty <: Union{BigFloat,Complex{BigFloat},BigInt} || @test norm(A + B) <= norm(A) + norm(B) # two is default
-                    @test norm(A + B,Inf) <= norm(A,Inf) + norm(B,Inf)
-
-                    # norm
-                    for p in (-Inf, Inf, (-2:3)...)
-                        @test norm(A, p) == norm(vec(A), p)
-                    end
-                end
-            end
-
-            @testset "issue #10234" begin
-                if elty <: AbstractFloat || elty <: Complex
-                    z = zeros(elty, 100)
-                    z[1] = -Inf
-                    for p in [-2,-1.5,-1,-0.5,0.5,1,1.5,2,Inf]
-                        @test norm(z, p) == (p < 0 ? 0 : Inf)
-                        @test norm(elty[Inf],p) == Inf
-                    end
-                end
-            end
-        end
-    end
-
-    @testset "issue #10234" begin
-        @test norm(Any[Inf],-2) == norm(Any[Inf],-1) == norm(Any[Inf],1) == norm(Any[Inf],1.5) == norm(Any[Inf],2) == norm(Any[Inf],Inf) == Inf
-    end
-
-    @testset "overflow/underflow in norms" begin
-        @test norm(Float64[1e-300, 1], -3)*1e300 ≈ 1
-        @test norm(Float64[1e300, 1], 3)*1e-300 ≈ 1
-    end
-end
-
-## Issue related tests
-@testset "issue #1447" begin
-    A = [1.0+0.0im 0; 0 1]
-    B = pinv(A)
-    for i = 1:4
-        @test A[i] ≈ B[i]
-    end
-end
-
-@testset "issue #2246" begin
-    A = [1 2 0 0; 0 1 0 0; 0 0 0 0; 0 0 0 0]
-    Asq = sqrt(A)
-    @test Asq*Asq ≈ A
-    A2 = view(A, 1:2, 1:2)
-    A2sq = sqrt(A2)
-    @test A2sq*A2sq ≈ A2
-
-    N = 3
-    @test log(det(Matrix(1.0I, N, N))) ≈ logdet(Matrix(1.0I, N, N))
-end
-
-@testset "issue #2637" begin
-    a = [1, 2, 3]
-    b = [4, 5, 6]
-    @test kron(Matrix(I, 2, 2), Matrix(I, 2, 2)) == Matrix(I, 4, 4)
-    @test kron(a,b) == [4,5,6,8,10,12,12,15,18]
-    @test kron(a',b') == [4 5 6 8 10 12 12 15 18]
-    @test kron(a,b')  == [4 5 6; 8 10 12; 12 15 18]
-    @test kron(a',b)  == [4 8 12; 5 10 15; 6 12 18]
-    @test kron(a, Matrix(1I, 2, 2)) == [1 0; 0 1; 2 0; 0 2; 3 0; 0 3]
-    @test kron(Matrix(1I, 2, 2), a) == [ 1 0; 2 0; 3 0; 0 1; 0 2; 0 3]
-    @test kron(Matrix(1I, 2, 2), 2) == Matrix(2I, 2, 2)
-    @test kron(3, Matrix(1I, 3, 3)) == Matrix(3I, 3, 3)
-    @test kron(a,2) == [2, 4, 6]
-    @test kron(b',2) == [8 10 12]
-end
-
-@testset "kron!" begin
-    a = [1.0, 0.0]
-    b = [0.0, 1.0]
-    @test kron!([1.0, 0.0], b, 0.5) == [0.0; 0.5]
-    @test kron!([1.0, 0.0], 0.5, b) == [0.0; 0.5]
-    c = Vector{Float64}(undef, 4)
-    kron!(c, a, b)
-    @test c == [0.0; 1.0; 0.0; 0.0]
-    c = Matrix{Float64}(undef, 2, 2)
-    kron!(c, a, b')
-    @test c == [0.0 1.0; 0.0 0.0]
-end
-
-@testset "kron adjoint" begin
-    a = [1+im, 2, 3]
-    b = [4, 5, 6+7im]
-    @test kron(a', b') isa Adjoint
-    @test kron(a', b') == kron(a, b)'
-    @test kron(transpose(a), b') isa Transpose
-    @test kron(transpose(a), b') == kron(permutedims(a), collect(b'))
-    @test kron(transpose(a), transpose(b)) isa Transpose
-    @test kron(transpose(a), transpose(b)) == transpose(kron(a, b))
-end
-
-@testset "issue #4796" begin
-    dim=2
-    S=zeros(Complex,dim,dim)
-    T=zeros(Complex,dim,dim)
-    fill!(T, 1)
-    z = 2.5 + 1.5im
-    S[1] = z
-    @test S*T == [z z; 0 0]
-
-    # similar issue for Array{Real}
-    @test Real[1 2] * Real[1.5; 2.0] == Real[5.5]
-end
-
-@testset "Matrix exponential" begin
-    @testset "Tests for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-        eA1 = convert(Matrix{elty}, [147.866622446369 127.781085523181  127.781085523182;
-                                     183.765138646367 183.765138646366  163.679601723179;
-                                      71.797032399996  91.8825693231832 111.968106246371]')
-        @test exp(A1) ≈ eA1
-        @test exp(adjoint(A1)) ≈ adjoint(eA1)
-        @test exp(transpose(A1)) ≈ transpose(eA1)
-        for f in (sin, cos, sinh, cosh, tanh, tan)
-            @test f(adjoint(A1)) ≈ f(copy(adjoint(A1)))
-        end
-
-        A2  = convert(Matrix{elty},
-                      [29.87942128909879    0.7815750847907159 -2.289519314033932;
-                       0.7815750847907159 25.72656945571064    8.680737820540137;
-                       -2.289519314033932   8.680737820540137  34.39400925519054])
-        eA2 = convert(Matrix{elty},
-                      [  5496313853692458.0 -18231880972009236.0 -30475770808580460.0;
-                       -18231880972009252.0  60605228702221920.0 101291842930249760.0;
-                       -30475770808580480.0 101291842930249728.0 169294411240851968.0])
-        @test exp(A2) ≈ eA2
-        @test exp(adjoint(A2)) ≈ adjoint(eA2)
-        @test exp(transpose(A2)) ≈ transpose(eA2)
-
-        A3  = convert(Matrix{elty}, [-131 19 18;-390 56 54;-387 57 52])
-        eA3 = convert(Matrix{elty}, [-1.50964415879218 -5.6325707998812  -4.934938326092;
-                                      0.367879439109187 1.47151775849686  1.10363831732856;
-                                      0.135335281175235 0.406005843524598 0.541341126763207]')
-        @test exp(A3) ≈ eA3
-        @test exp(adjoint(A3)) ≈ adjoint(eA3)
-        @test exp(transpose(A3)) ≈ transpose(eA3)
-
-        A4 = convert(Matrix{elty}, [0.25 0.25; 0 0])
-        eA4 = convert(Matrix{elty}, [1.2840254166877416 0.2840254166877415; 0 1])
-        @test exp(A4) ≈ eA4
-        @test exp(adjoint(A4)) ≈ adjoint(eA4)
-        @test exp(transpose(A4)) ≈ transpose(eA4)
-
-        A5 = convert(Matrix{elty}, [0 0.02; 0 0])
-        eA5 = convert(Matrix{elty}, [1 0.02; 0 1])
-        @test exp(A5) ≈ eA5
-        @test exp(adjoint(A5)) ≈ adjoint(eA5)
-        @test exp(transpose(A5)) ≈ transpose(eA5)
-
-        # Hessenberg
-        @test hessenberg(A1).H ≈ convert(Matrix{elty},
-                                                 [4.000000000000000  -1.414213562373094  -1.414213562373095
-                                                  -1.414213562373095   4.999999999999996  -0.000000000000000
-                                                  0  -0.000000000000002   3.000000000000000])
-
-        # cis always returns a complex matrix
-        if elty <: Real
-            eltyim = Complex{elty}
-        else
-            eltyim = elty
-        end
-
-        @test cis(A1) ≈ convert(Matrix{eltyim}, [-0.339938 + 0.000941506im   0.772659  - 0.8469im     0.52745  + 0.566543im;
-                                                  0.650054 - 0.140179im     -0.0762135 + 0.284213im   0.38633  - 0.42345im ;
-                                                  0.650054 - 0.140179im      0.913779  + 0.143093im  -0.603663 - 0.28233im ]) rtol=7e-7
-    end
-
-    @testset "Additional tests for $elty" for elty in (Float64, ComplexF64)
-        A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                     1/3 1/4 1/5 1/6;
-                                     1/4 1/5 1/6 1/7;
-                                     1/5 1/6 1/7 1/8])
-        @test exp(log(A4)) ≈ A4
-        @test exp(log(transpose(A4))) ≈ transpose(A4)
-        @test exp(log(adjoint(A4))) ≈ adjoint(A4)
-
-        A5  = convert(Matrix{elty}, [1 1 0 1; 0 1 1 0; 0 0 1 1; 1 0 0 1])
-        @test exp(log(A5)) ≈ A5
-        @test exp(log(transpose(A5))) ≈ transpose(A5)
-        @test exp(log(adjoint(A5))) ≈ adjoint(A5)
-
-        A6  = convert(Matrix{elty}, [-5 2 0 0 ; 1/2 -7 3 0; 0 1/3 -9 4; 0 0 1/4 -11])
-        @test exp(log(A6)) ≈ A6
-        @test exp(log(transpose(A6))) ≈ transpose(A6)
-        @test exp(log(adjoint(A6))) ≈ adjoint(A6)
-
-        A7  = convert(Matrix{elty}, [1 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1])
-        @test exp(log(A7)) ≈ A7
-        @test exp(log(transpose(A7))) ≈ transpose(A7)
-        @test exp(log(adjoint(A7))) ≈ adjoint(A7)
-    end
-
-    @testset "Integer promotion tests" begin
-        for (elty1, elty2) in ((Int64, Float64), (Complex{Int64}, ComplexF64))
-            A4int  = convert(Matrix{elty1}, [1 2; 3 4])
-            A4float  = convert(Matrix{elty2}, A4int)
-            @test exp(A4int) == exp(A4float)
-        end
-    end
-
-    @testset "^ tests" for elty in (Float32, Float64, ComplexF32, ComplexF64, Int32, Int64)
-        # should all be exact as the lhs functions are simple aliases
-        @test ℯ^(fill(elty(2), (4,4))) == exp(fill(elty(2), (4,4)))
-        @test 2^(fill(elty(2), (4,4))) == exp(log(2)*fill(elty(2), (4,4)))
-        @test 2.0^(fill(elty(2), (4,4))) == exp(log(2.0)*fill(elty(2), (4,4)))
-    end
-
-    A8 = 100 * [-1+1im 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1]
-    @test exp(log(A8)) ≈ A8
-end
-
-@testset "Matrix trigonometry" begin
-    @testset "Tests for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A1  = convert(Matrix{elty}, [3 2 0; 1 3 1; 1 1 3])
-        A2  = convert(Matrix{elty},
-                      [3.975884257819758 0.15631501695814318 -0.4579038628067864;
-                       0.15631501695814318 4.545313891142127 1.7361475641080275;
-                       -0.4579038628067864 1.7361475641080275 6.478801851038108])
-        A3 = convert(Matrix{elty}, [0.25 0.25; 0 0])
-        A4 = convert(Matrix{elty}, [0 0.02; 0 0])
-
-        cosA1 = convert(Matrix{elty},[-0.18287716254368605 -0.29517205254584633 0.761711400552759;
-                                      0.23326967400345625 0.19797853773269333 -0.14758602627292305;
-                                      0.23326967400345636 0.6141253742798355 -0.5637328628200653])
-        sinA1 = convert(Matrix{elty}, [0.2865568596627417 -1.107751980582015 -0.13772915374386513;
-                                       -0.6227405671629401 0.2176922827908092 -0.5538759902910078;
-                                       -0.6227405671629398 -0.6916051440348725 0.3554214365346742])
-        @test cos(A1) ≈ cosA1
-        @test sin(A1) ≈ sinA1
-
-        cosA2 = convert(Matrix{elty}, [-0.6331745163802187 0.12878366262380136 -0.17304181968301532;
-                                       0.12878366262380136 -0.5596234510748788 0.5210483146041339;
-                                       -0.17304181968301532 0.5210483146041339 0.002263776356015268])
-        sinA2 = convert(Matrix{elty},[-0.6677253518411841 -0.32599318928375437 0.020799609079003523;
-                                      -0.32599318928375437 -0.04568726058081066 0.5388748740270427;
-                                      0.020799609079003523 0.5388748740270427 0.6385462428126032])
-        @test cos(A2) ≈ cosA2
-        @test sin(A2) ≈ sinA2
-
-        cosA3 = convert(Matrix{elty}, [0.9689124217106446 -0.031087578289355197; 0.0 1.0])
-        sinA3 = convert(Matrix{elty}, [0.24740395925452285 0.24740395925452285; 0.0 0.0])
-        @test cos(A3) ≈ cosA3
-        @test sin(A3) ≈ sinA3
-
-        cosA4 = convert(Matrix{elty}, [1.0 0.0; 0.0 1.0])
-        sinA4 = convert(Matrix{elty}, [0.0 0.02; 0.0 0.0])
-        @test cos(A4) ≈ cosA4
-        @test sin(A4) ≈ sinA4
-
-        # Identities
-        for (i, A) in enumerate((A1, A2, A3, A4))
-            @test sincos(A) == (sin(A), cos(A))
-            @test cos(A)^2 + sin(A)^2 ≈ Matrix(I, size(A))
-            @test cos(A) ≈ cos(-A)
-            @test sin(A) ≈ -sin(-A)
-            @test tan(A) ≈ sin(A) / cos(A)
-
-            @test cos(A) ≈ real(exp(im*A))
-            @test sin(A) ≈ imag(exp(im*A))
-            @test cos(A) ≈ real(cis(A))
-            @test sin(A) ≈ imag(cis(A))
-            @test cis(A) ≈ cos(A) + im * sin(A)
-
-            @test cosh(A) ≈ 0.5 * (exp(A) + exp(-A))
-            @test sinh(A) ≈ 0.5 * (exp(A) - exp(-A))
-            @test cosh(A) ≈ cosh(-A)
-            @test sinh(A) ≈ -sinh(-A)
-
-            # Some of the following identities fail for A3, A4 because the matrices are singular
-            if i in (1, 2)
-                @test sec(A) ≈ inv(cos(A))
-                @test csc(A) ≈ inv(sin(A))
-                @test cot(A) ≈ inv(tan(A))
-                @test sech(A) ≈ inv(cosh(A))
-                @test csch(A) ≈ inv(sinh(A))
-                @test coth(A) ≈ inv(tanh(A))
-            end
-            # The following identities fail for A1, A2 due to rounding errors;
-            # probably needs better algorithm for the general case
-            if i in (3, 4)
-                @test cosh(A)^2 - sinh(A)^2 ≈ Matrix(I, size(A))
-                @test tanh(A) ≈ sinh(A) / cosh(A)
-            end
-        end
-    end
-
-    @testset "Additional tests for $elty" for elty in (ComplexF32, ComplexF64)
-        A5 = convert(Matrix{elty}, [1im 2; 0.02+0.5im 3])
-
-        @test sincos(A5) == (sin(A5), cos(A5))
-
-        @test cos(A5)^2 + sin(A5)^2 ≈ Matrix(I, size(A5))
-        @test cosh(A5)^2 - sinh(A5)^2 ≈ Matrix(I, size(A5))
-        @test cos(A5)^2 + sin(A5)^2 ≈ Matrix(I, size(A5))
-        @test tan(A5) ≈ sin(A5) / cos(A5)
-        @test tanh(A5) ≈ sinh(A5) / cosh(A5)
-
-        @test sec(A5) ≈ inv(cos(A5))
-        @test csc(A5) ≈ inv(sin(A5))
-        @test cot(A5) ≈ inv(tan(A5))
-        @test sech(A5) ≈ inv(cosh(A5))
-        @test csch(A5) ≈ inv(sinh(A5))
-        @test coth(A5) ≈ inv(tanh(A5))
-
-        @test cos(A5) ≈ 0.5 * (exp(im*A5) + exp(-im*A5))
-        @test sin(A5) ≈ -0.5im * (exp(im*A5) - exp(-im*A5))
-        @test cos(A5) ≈ 0.5 * (cis(A5) + cis(-A5))
-        @test sin(A5) ≈ -0.5im * (cis(A5) - cis(-A5))
-
-        @test cosh(A5) ≈ 0.5 * (exp(A5) + exp(-A5))
-        @test sinh(A5) ≈ 0.5 * (exp(A5) - exp(-A5))
-    end
-
-    @testset "Additional tests for $elty" for elty in (Int32, Int64, Complex{Int32}, Complex{Int64})
-        A1 = convert(Matrix{elty}, [1 2; 3 4])
-        A2 = convert(Matrix{elty}, [1 2; 2 1])
-
-        cosA1 = convert(Matrix{float(elty)}, [0.855423165077998 -0.11087638101074865;
-                                              -0.16631457151612294 0.689108593561875])
-        cosA2 = convert(Matrix{float(elty)}, [-0.22484509536615283 -0.7651474012342925;
-                                              -0.7651474012342925 -0.22484509536615283])
-
-        @test cos(A1) ≈ cosA1
-        @test cos(A2) ≈ cosA2
-
-        sinA1 = convert(Matrix{float(elty)}, [-0.46558148631373036 -0.14842445991317652;
-                                              -0.22263668986976476 -0.6882181761834951])
-        sinA2 = convert(Matrix{float(elty)}, [-0.3501754883740146 0.4912954964338818;
-                                              0.4912954964338818 -0.3501754883740146])
-
-        @test sin(A1) ≈ sinA1
-        @test sin(A2) ≈ sinA2
-    end
-
-    @testset "Inverse functions for $elty" for elty in (Float32, Float64)
-        A1 = convert(Matrix{elty}, [0.244637  -0.63578;
-                                    0.22002    0.189026])
-        A2 = convert(Matrix{elty}, [1.11656   -0.098672   0.158485;
-                                    -0.098672   0.100933  -0.107107;
-                                    0.158485  -0.107107   0.612404])
-
-        for A in (A1, A2)
-            @test cos(acos(cos(A))) ≈ cos(A)
-            @test sin(asin(sin(A))) ≈ sin(A)
-            @test tan(atan(tan(A))) ≈ tan(A)
-            @test cosh(acosh(cosh(A))) ≈ cosh(A)
-            @test sinh(asinh(sinh(A))) ≈ sinh(A)
-            @test tanh(atanh(tanh(A))) ≈ tanh(A)
-            @test sec(asec(sec(A))) ≈ sec(A)
-            @test csc(acsc(csc(A))) ≈ csc(A)
-            @test cot(acot(cot(A))) ≈ cot(A)
-            @test sech(asech(sech(A))) ≈ sech(A)
-            @test csch(acsch(csch(A))) ≈ csch(A)
-            @test coth(acoth(coth(A))) ≈ coth(A)
-        end
-    end
-
-    @testset "Inverse functions for $elty" for elty in (ComplexF32, ComplexF64)
-        A1 = convert(Matrix{elty}, [ 0.143721-0.0im       -0.138386-0.106905im;
-                                     -0.138386+0.106905im   0.306224-0.0im])
-        A2 = convert(Matrix{elty}, [1im 2; 0.02+0.5im 3])
-        A3 = convert(Matrix{elty}, [0.138721-0.266836im 0.0971722-0.13715im 0.205046-0.137136im;
-                                    -0.0154974-0.00358254im 0.152163-0.445452im 0.0314575-0.536521im;
-                                    -0.387488+0.0294059im -0.0448773+0.114305im 0.230684-0.275894im])
-        for A in (A1, A2, A3)
-            @test cos(acos(cos(A))) ≈ cos(A)
-            @test sin(asin(sin(A))) ≈ sin(A)
-            @test tan(atan(tan(A))) ≈ tan(A)
-            @test cosh(acosh(cosh(A))) ≈ cosh(A)
-            @test sinh(asinh(sinh(A))) ≈ sinh(A)
-            @test tanh(atanh(tanh(A))) ≈ tanh(A)
-            @test sec(asec(sec(A))) ≈ sec(A)
-            @test csc(acsc(csc(A))) ≈ csc(A)
-            @test cot(acot(cot(A))) ≈ cot(A)
-            @test sech(asech(sech(A))) ≈ sech(A)
-            @test csch(acsch(csch(A))) ≈ csch(A)
-            @test coth(acoth(coth(A))) ≈ coth(A)
-
-            # Definition of principal values (Aprahamian & Higham, 2016, pp. 4-5)
-            abstol = sqrt(eps(real(elty))) * norm(acosh(A))
-            @test all(z -> (0 < real(z) < π ||
-                            abs(real(z)) < abstol && imag(z) >= 0 ||
-                            abs(real(z) - π) < abstol && imag(z) <= 0),
-                      eigen(acos(A)).values)
-            @test all(z -> (-π/2 < real(z) < π/2 ||
-                            abs(real(z) + π/2) < abstol && imag(z) >= 0 ||
-                            abs(real(z) - π/2) < abstol && imag(z) <= 0),
-                      eigen(asin(A)).values)
-            @test all(z -> (-π < imag(z) < π && real(z) > 0 ||
-                            0 <= imag(z) < π && abs(real(z)) < abstol ||
-                            abs(imag(z) - π) < abstol && real(z) >= 0),
-                      eigen(acosh(A)).values)
-            @test all(z -> (-π/2 < imag(z) < π/2 ||
-                            abs(imag(z) + π/2) < abstol && real(z) <= 0 ||
-                            abs(imag(z) - π/2) < abstol && real(z) <= 0),
-                      eigen(asinh(A)).values)
-        end
-    end
-end
-
-@testset "issue 5116" begin
-    A9  = [0 10 0 0; -1 0 0 0; 0 0 0 0; -2 0 0 0]
-    eA9 = [-0.999786072879326  -0.065407069689389   0.0   0.0
-           0.006540706968939  -0.999786072879326   0.0   0.0
-           0.0                 0.0                 1.0   0.0
-           0.013081413937878  -3.999572145758650   0.0   1.0]
-    @test exp(A9) ≈ eA9
-
-    A10  = [ 0. 0. 0. 0. ; 0. 0. -im 0.; 0. im 0. 0.; 0. 0. 0. 0.]
-    eA10 = [ 1.0+0.0im   0.0+0.0im                 0.0+0.0im                0.0+0.0im
-            0.0+0.0im   1.543080634815244+0.0im   0.0-1.175201193643801im  0.0+0.0im
-            0.0+0.0im   0.0+1.175201193643801im   1.543080634815243+0.0im  0.0+0.0im
-            0.0+0.0im   0.0+0.0im                 0.0+0.0im                1.0+0.0im]
-    @test exp(A10) ≈ eA10
-end
-
-@testset "Additional matrix logarithm tests" for elty in (Float64, ComplexF64)
-    A11 = convert(Matrix{elty}, [3 2; -5 -3])
-    @test exp(log(A11)) ≈ A11
-
-    A13 = convert(Matrix{elty}, [2 0; 0 2])
-    @test typeof(log(A13)) == Array{elty, 2}
-
-    T = elty == Float64 ? Symmetric : Hermitian
-    @test typeof(log(T(A13))) == T{elty, Array{elty, 2}}
-
-    A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-    logA1 = convert(Matrix{elty}, [1.329661349 0.5302876358 -0.06818951543;
-                                    0.2310490602 1.295566591 0.2651438179;
-                                    0.2310490602 0.1969543025 1.363756107])
-    @test log(A1) ≈ logA1
-    @test exp(log(A1)) ≈ A1
-    @test typeof(log(A1)) == Matrix{elty}
-
-    A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                 1/3 1/4 1/5 1/6;
-                                 1/4 1/5 1/6 1/7;
-                                 1/5 1/6 1/7 1/8])
-    logA4 = convert(Matrix{elty}, [-1.73297159 1.857349738 0.4462766564 0.2414170219;
-                                    1.857349738 -5.335033737 2.994142974 0.5865285289;
-                                    0.4462766564 2.994142974 -7.351095988 3.318413247;
-                                    0.2414170219 0.5865285289 3.318413247 -5.444632124])
-    @test log(A4) ≈ logA4
-    @test exp(log(A4)) ≈ A4
-    @test typeof(log(A4)) == Matrix{elty}
-
-    # real triu matrix
-    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
-    logA5 = convert(Matrix{elty}, [0.0 0.9241962407465937 0.5563245488984037;
-                                   0.0 1.3862943611198906 1.0136627702704109;
-                                   0.0 0.0 1.791759469228055])
-    @test log(A5) ≈ logA5
-    @test exp(log(A5)) ≈ A5
-    @test typeof(log(A5)) == Matrix{elty}
-
-    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
-    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
-                                1 3 3 2 3 1;
-                                3 3 3 1 1 2;
-                                2 1 2 2 2 2;
-                                1 1 2 2 3 1;
-                                2 2 2 2 1 3])
-    @test exp(log(A6)) ≈ A6
-    @test typeof(log(A6)) == Matrix{elty}
-
-    # real quasitriangular schur form with a negative eigenvalue
-    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
-                                1 2 1 3 1 2;
-                                3 1 2 3 2 1;
-                                3 1 2 2 2 1;
-                                3 1 3 1 2 1;
-                                1 1 3 1 1 3])
-    @test exp(log(A7)) ≈ A7
-    @test typeof(log(A7)) == Matrix{complex(elty)}
-
-    if elty <: Complex
-        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
-                                    1 + 1im -1 + 1im 1 + 1im;
-                                    1 - 1im 1 + 1im -1 - 1im])
-        logA8 = convert(
-            Matrix{elty},
-            [0.9478628953131517 + 1.3725201223387407im -0.2547157147532057 + 0.06352318334299434im 0.8560050197863862 - 1.0471975511965979im;
-             -0.2547157147532066 + 0.06352318334299467im -0.16285783922644065 + 0.2617993877991496im 0.2547157147532063 + 2.1579182857361894im;
-             0.8560050197863851 - 1.0471975511965974im 0.25471571475320665 + 2.1579182857361903im 0.9478628953131519 - 0.8489213467404436im],
-        )
-        @test log(A8) ≈ logA8
-        @test exp(log(A8)) ≈ A8
-        @test typeof(log(A8)) == Matrix{elty}
-    end
-end
-
-@testset "matrix logarithm is type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
-    A1 = randn(elty, 4, 4)
-    @inferred Union{Matrix{elty},Matrix{complex(elty)}} log(A1)
-end
-
-@testset "Additional matrix square root tests" for elty in (Float64, ComplexF64)
-    A11 = convert(Matrix{elty}, [3 2; -5 -3])
-    @test sqrt(A11)^2 ≈ A11
-
-    A13 = convert(Matrix{elty}, [2 0; 0 2])
-    @test typeof(sqrt(A13)) == Array{elty, 2}
-
-    T = elty == Float64 ? Symmetric : Hermitian
-    @test typeof(sqrt(T(A13))) == T{elty, Array{elty, 2}}
-
-    A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
-    sqrtA1 = convert(Matrix{elty}, [1.971197119306979 0.5113118387140085 -0.03301921523780871;
-                                   0.23914631173809942 1.9546875116880718 0.2556559193570036;
-                                   0.23914631173810008 0.22263670411919556 1.9877067269258815])
-    @test sqrt(A1) ≈ sqrtA1
-    @test sqrt(A1)^2 ≈ A1
-    @test typeof(sqrt(A1)) == Matrix{elty}
-
-    A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
-                                 1/3 1/4 1/5 1/6;
-                                 1/4 1/5 1/6 1/7;
-                                 1/5 1/6 1/7 1/8])
-                                 sqrtA4 = convert(
-        Matrix{elty},
-        [0.590697761556362 0.3055006800405779 0.19525404749300546 0.14007621469988107;
-         0.30550068004057784 0.2825388389385975 0.21857572599211642 0.17048692323164674;
-         0.19525404749300565 0.21857572599211622 0.21155429252242863 0.18976816626246887;
-         0.14007621469988046 0.17048692323164724 0.1897681662624689 0.20075085592778794],
-    )
-    @test sqrt(A4) ≈ sqrtA4
-    @test sqrt(A4)^2 ≈ A4
-    @test typeof(sqrt(A4)) == Matrix{elty}
-
-    # real triu matrix
-    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
-    sqrtA5 = convert(Matrix{elty}, [1.0 0.6666666666666666 0.6525169217864183;
-                                   0.0 2.0 1.1237243569579454;
-                                   0.0 0.0 2.449489742783178])
-    @test sqrt(A5) ≈ sqrtA5
-    @test sqrt(A5)^2 ≈ A5
-    @test typeof(sqrt(A5)) == Matrix{elty}
-
-    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
-    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
-                                1 3 3 2 3 1;
-                                3 3 3 1 1 2;
-                                2 1 2 2 2 2;
-                                1 1 2 2 3 1;
-                                2 2 2 2 1 3])
-    @test sqrt(A6)^2 ≈ A6
-    @test typeof(sqrt(A6)) == Matrix{elty}
-
-    # real quasitriangular schur form with a negative eigenvalue
-    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
-                                1 2 1 3 1 2;
-                                3 1 2 3 2 1;
-                                3 1 2 2 2 1;
-                                3 1 3 1 2 1;
-                                1 1 3 1 1 3])
-    @test sqrt(A7)^2 ≈ A7
-    @test typeof(sqrt(A7)) == Matrix{complex(elty)}
-
-    if elty <: Complex
-        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
-                                    1 + 1im -1 + 1im 1 + 1im;
-                                    1 - 1im 1 + 1im -1 - 1im])
-        sqrtA8 = convert(
-            Matrix{elty},
-            [1.2559748527474284 + 0.6741878819930323im 0.20910077991005582 + 0.24969165051825476im 0.591784212275146 - 0.6741878819930327im;
-             0.2091007799100553 + 0.24969165051825515im 0.3320953202361413 + 0.2915044496279425im 0.33209532023614136 + 1.0568713143581219im;
-             0.5917842122751455 - 0.674187881993032im 0.33209532023614147 + 1.0568713143581223im 0.7147787526012315 - 0.6323750828833452im],
-        )
-        @test sqrt(A8) ≈ sqrtA8
-        @test sqrt(A8)^2 ≈ A8
-        @test typeof(sqrt(A8)) == Matrix{elty}
-    end
-end
-
-@testset "issue #40141" begin
-    x = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() -1]
-    @test sqrt(x)^2 ≈ x
-
-    x2 =  [-1 -eps() 0 0; 3eps() -1 0 0; 0 0 -1 -3eps(); 0 0 eps() -1]
-    @test sqrt(x2)^2 ≈ x2
-
-    x3 = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() Inf]
-    @test all(isnan, sqrt(x3))
-
-    # test overflow/underflow handled
-    x4 = [0 -1e200; 1e200 0]
-    @test sqrt(x4)^2 ≈ x4
-
-    x5 = [0 -1e-200; 1e-200 0]
-    @test sqrt(x5)^2 ≈ x5
-
-    x6 = [1.0 1e200; -1e-200 1.0]
-    @test sqrt(x6)^2 ≈ x6
-end
-
-@testset "matrix logarithm block diagonal underflow/overflow" begin
-    x1 = [0 -1e200; 1e200 0]
-    @test exp(log(x1)) ≈ x1
-
-    x2 = [0 -1e-200; 1e-200 0]
-    @test exp(log(x2)) ≈ x2
-
-    x3 = [1.0 1e200; -1e-200 1.0]
-    @test exp(log(x3)) ≈ x3
-end
-
-@testset "issue #7181" begin
-    A = [ 1  5  9
-          2  6 10
-          3  7 11
-          4  8 12 ]
-    @test diag(A,-5) == []
-    @test diag(A,-4) == []
-    @test diag(A,-3) == [4]
-    @test diag(A,-2) == [3,8]
-    @test diag(A,-1) == [2,7,12]
-    @test diag(A, 0) == [1,6,11]
-    @test diag(A, 1) == [5,10]
-    @test diag(A, 2) == [9]
-    @test diag(A, 3) == []
-    @test diag(A, 4) == []
-
-    @test diag(zeros(0,0)) == []
-    @test diag(zeros(0,0),1) == []
-    @test diag(zeros(0,0),-1) == []
-
-    @test diag(zeros(1,0)) == []
-    @test diag(zeros(1,0),-1) == []
-    @test diag(zeros(1,0),1) == []
-    @test diag(zeros(1,0),-2) == []
-
-    @test diag(zeros(0,1)) == []
-    @test diag(zeros(0,1),1) == []
-    @test diag(zeros(0,1),-1) == []
-    @test diag(zeros(0,1),2) == []
-end
-
-@testset "issue #39857" begin
-    @test lyap(1.0+2.0im, 3.0+4.0im) == -1.5 - 2.0im
-end
-
-@testset "Matrix to real power" for elty in (Float64, ComplexF64)
-# Tests proposed at Higham, Deadman: Testing Matrix Function Algorithms Using Identities, March 2014
-    #Aa : only positive real eigenvalues
-    Aa = convert(Matrix{elty}, [5 4 2 1; 0 1 -1 -1; -1 -1 3 0; 1 1 -1 2])
-
-    #Ab : both positive and negative real eigenvalues
-    Ab = convert(Matrix{elty}, [1 2 3; 4 7 1; 2 1 4])
-
-    #Ac : complex eigenvalues
-    Ac = convert(Matrix{elty}, [5 4 2 1;0 1 -1 -1;-1 -1 3 6;1 1 -1 5])
-
-    #Ad : defective Matrix
-    Ad = convert(Matrix{elty}, [3 1; 0 3])
-
-    #Ah : Hermitian Matrix
-    Ah = convert(Matrix{elty}, [3 1; 1 3])
-    if elty <: LinearAlgebra.BlasComplex
-        Ah += [0 im; -im 0]
-    end
-
-    #ADi : Diagonal Matrix
-    ADi = convert(Matrix{elty}, [3 0; 0 3])
-    if elty <: LinearAlgebra.BlasComplex
-        ADi += [im 0; 0 im]
-    end
-
-    for A in (Aa, Ab, Ac, Ad, Ah, ADi)
-        @test A^(1/2) ≈ sqrt(A)
-        @test A^(-1/2) ≈ inv(sqrt(A))
-        @test A^(3/4) ≈ sqrt(A) * sqrt(sqrt(A))
-        @test A^(-3/4) ≈ inv(A) * sqrt(sqrt(A))
-        @test A^(17/8) ≈ A^2 * sqrt(sqrt(sqrt(A)))
-        @test A^(-17/8) ≈ inv(A^2 * sqrt(sqrt(sqrt(A))))
-        @test (A^0.2)^5 ≈ A
-        @test (A^(2/3))*(A^(1/3)) ≈ A
-        @test (A^im)^(-im) ≈ A
-    end
-end
-
-@testset "diagonal integer matrix to real power" begin
-    A = Matrix(Diagonal([1, 2, 3]))
-    @test A^2.3 ≈ float(A)^2.3
-end
-
-@testset "issue #23366 (Int Matrix to Int power)" begin
-    @testset "Tests for $elty" for elty in (Int128, Int16, Int32, Int64, Int8,
-                                            UInt128, UInt16, UInt32, UInt64, UInt8,
-                                            BigInt)
-        #@info "Testing $elty"
-        @test elty[1 1;1 0]^-1 == [0  1;  1 -1]
-        @test elty[1 1;1 0]^-2 == [1 -1; -1  2]
-        @test (@inferred elty[1 1;1 0]^2) == elty[2 1;1 1]
-        I_ = elty[1 0;0 1]
-        @test I_^-1 == I_
-        if !(elty<:Unsigned)
-            @test (@inferred (-I_)^-1) == -I_
-            @test (@inferred (-I_)^-2) == I_
-        end
-        # make sure that type promotion for ^(::Matrix{<:Integer}, ::Integer)
-        # is analogous to type promotion for ^(::Integer, ::Integer)
-        # e.g. [1 1;1 0]^big(10000) should return Matrix{BigInt}, the same
-        # way as 2^big(10000) returns BigInt
-        for elty2 = (Int64, BigInt)
-            TT = Base.promote_op(^, elty, elty2)
-            @test (@inferred elty[1 1;1 0]^elty2(1))::Matrix{TT} == [1 1;1 0]
-        end
-    end
-end
-
-@testset "Least squares solutions" begin
-    a = [fill(1, 20) 1:20 1:20]
-    b = reshape(Matrix(1.0I, 8, 5), 20, 2)
-    @testset "Tests for type $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        a = convert(Matrix{elty}, a)
-        b = convert(Matrix{elty}, b)
-
-        # Vector rhs
-        x = a[:,1:2]\b[:,1]
-        @test ((a[:,1:2]*x-b[:,1])'*(a[:,1:2]*x-b[:,1]))[1] ≈ convert(elty, 2.546616541353384)
-
-        # Matrix rhs
-        x = a[:,1:2]\b
-        @test det((a[:,1:2]*x-b)'*(a[:,1:2]*x-b)) ≈ convert(elty, 4.437969924812031)
-
-        # Rank deficient
-        x = a\b
-        @test det((a*x-b)'*(a*x-b)) ≈ convert(elty, 4.437969924812031)
-
-        # Underdetermined minimum norm
-        x = convert(Matrix{elty}, [1 0 0; 0 1 -1]) \ convert(Vector{elty}, [1,1])
-        @test x ≈ convert(Vector{elty}, [1, 0.5, -0.5])
-
-        # symmetric, positive definite
-        @test inv(convert(Matrix{elty}, [6. 2; 2 1])) ≈ convert(Matrix{elty}, [0.5 -1; -1 3])
-
-        # symmetric, indefinite
-        @test inv(convert(Matrix{elty}, [1. 2; 2 1])) ≈ convert(Matrix{elty}, [-1. 2; 2 -1]/3)
-    end
-end
-
-function test_rdiv_pinv_consistency(a, b)
-    @test a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
-    @test typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
-end
-function test_ldiv_pinv_consistency(a, b)
-    @test (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
-    @test typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
-end
-function test_div_pinv_consistency(a, b)
-    test_rdiv_pinv_consistency(a, b)
-    test_ldiv_pinv_consistency(a, b)
-end
-
-@testset "/ and \\ consistency with pinv for vectors" begin
-    @testset "Tests for type $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        c = rand(elty, 5)
-        r = (elty <: Complex ? adjoint : transpose)(rand(elty, 5))
-        cm = rand(elty, 5, 1)
-        rm = rand(elty, 1, 5)
-        @testset "dot products" begin
-            test_div_pinv_consistency(r, c)
-            test_div_pinv_consistency(rm, c)
-            test_div_pinv_consistency(r, cm)
-            test_div_pinv_consistency(rm, cm)
-        end
-        @testset "outer products" begin
-            test_div_pinv_consistency(c, r)
-            test_div_pinv_consistency(cm, rm)
-        end
-        @testset "matrix/vector" begin
-            m = rand(5, 5)
-            test_ldiv_pinv_consistency(m, c)
-            test_rdiv_pinv_consistency(r, m)
-        end
-    end
-end
-
-@testset "test ops on Numbers for $elty" for elty in [Float32,Float64,ComplexF32,ComplexF64]
-    a = rand(elty)
-    @test isposdef(one(elty))
-    @test lyap(one(elty),a) == -a/2
-end
-
-@testset "strides" begin
-    a = rand(10)
-    b = view(a,2:2:10)
-    @test LinearAlgebra.stride1(a) == 1
-    @test LinearAlgebra.stride1(b) == 2
-end
-
-@testset "inverse of Adjoint" begin
-    A = randn(n, n)
-
-    @test @inferred(inv(A'))*A'                     ≈ I
-    @test @inferred(inv(transpose(A)))*transpose(A) ≈ I
-
-    B = complex.(A, randn(n, n))
-
-    @test @inferred(inv(B'))*B'                     ≈ I
-    @test @inferred(inv(transpose(B)))*transpose(B) ≈ I
-end
-
-@testset "Factorize fallback for Adjoint/Transpose" begin
-    a = rand(Complex{Int8}, n, n)
-    @test Array(transpose(factorize(Transpose(a)))) ≈ Array(factorize(a))
-    @test transpose(factorize(transpose(a))) == factorize(a)
-    @test Array(adjoint(factorize(Adjoint(a)))) ≈ Array(factorize(a))
-    @test adjoint(factorize(adjoint(a))) == factorize(a)
-end
-
-@testset "Matrix log issue #32313" begin
-    for A in ([30 20; -50 -30], [10.0im 0; 0 -10.0im], randn(6,6))
-        @test exp(log(A)) ≈ A
-    end
-end
-
-@testset "Matrix log PR #33245" begin
-    # edge case for divided difference
-    A1 = triu(ones(3,3),1) + diagm([1.0, -2eps()-1im, -eps()+0.75im])
-    @test exp(log(A1)) ≈ A1
-    # case where no sqrt is needed (s=0)
-    A2 = [1.01 0.01 0.01; 0 1.01 0.01; 0 0 1.01]
-    @test exp(log(A2)) ≈ A2
-end
-
-@testset "sqrt of empty Matrix of type $T" for T in [Int,Float32,Float64,ComplexF32,ComplexF64]
-    @test sqrt(Matrix{T}(undef, 0, 0)) == Matrix{T}(undef, 0, 0)
-    @test_throws DimensionMismatch sqrt(Matrix{T}(undef, 0, 3))
-end
-
-struct TypeWithoutZero end
-Base.zero(::Type{TypeWithoutZero}) = TypeWithZero()
-struct TypeWithZero end
-Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
-Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
-Base.:+(x::TypeWithZero, ::TypeWithoutZero) = x
-
-@testset "diagm for type with no zero" begin
-    @test diagm(0 => [TypeWithoutZero()]) isa Matrix{TypeWithZero}
-end
-
-end # module TestDense
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
deleted file mode 100644
index 2a8248d9ca716..0000000000000
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ /dev/null
@@ -1,1183 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestDiagonal
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasFloat, BlasComplex
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-const n=12 # Size of matrix problem to test
-Random.seed!(1)
-
-@testset for relty in (Float32, Float64, BigFloat), elty in (relty, Complex{relty})
-    dd=convert(Vector{elty}, randn(n))
-    vv=convert(Vector{elty}, randn(n))
-    UU=convert(Matrix{elty}, randn(n,n))
-    if elty <: Complex
-        dd+=im*convert(Vector{elty}, randn(n))
-        vv+=im*convert(Vector{elty}, randn(n))
-        UU+=im*convert(Matrix{elty}, randn(n,n))
-    end
-    D = Diagonal(dd)
-    DM = Matrix(Diagonal(dd))
-
-    @testset "constructor" begin
-        for x in (dd, GenericArray(dd))
-            @test Diagonal(x)::Diagonal{elty,typeof(x)} == DM
-            @test Diagonal(x).diag === x
-            @test Diagonal{elty}(x)::Diagonal{elty,typeof(x)} == DM
-            @test Diagonal{elty}(x).diag === x
-            @test Diagonal{elty}(D) === D
-        end
-        @test eltype(Diagonal{elty}([1,2,3,4])) == elty
-        @test isa(Diagonal{elty,Vector{elty}}(GenericArray([1,2,3,4])), Diagonal{elty,Vector{elty}})
-        @test isa(Diagonal{elty}(rand(Int,n,n)), Diagonal{elty,Vector{elty}})
-        DI = Diagonal([1,2,3,4])
-        @test Diagonal(DI) === DI
-        @test isa(Diagonal{elty}(DI), Diagonal{elty})
-        # issue #26178
-        @test_throws MethodError convert(Diagonal, [1,2,3,4])
-        @test_throws DimensionMismatch convert(Diagonal, [1 2 3 4])
-        @test_throws InexactError convert(Diagonal, ones(2,2))
-    end
-
-    @testset "Basic properties" begin
-        @test_throws ArgumentError size(D,0)
-        @test typeof(convert(Diagonal{ComplexF32},D)) <: Diagonal{ComplexF32}
-        @test typeof(convert(AbstractMatrix{ComplexF32},D)) <: Diagonal{ComplexF32}
-
-        @test Array(real(D)) == real(DM)
-        @test Array(abs.(D)) == abs.(DM)
-        @test Array(imag(D)) == imag(DM)
-
-        @test parent(D) == dd
-        @test D[1,1] == dd[1]
-        @test D[1,2] == 0
-
-        @test issymmetric(D)
-        @test isdiag(D)
-        @test isdiag(Diagonal([[1 0; 0 1], [1 0; 0 1]]))
-        @test !isdiag(Diagonal([[1 0; 0 1], [1 0; 1 1]]))
-        @test istriu(D)
-        @test istriu(D, -1)
-        @test !istriu(D, 1)
-        @test istriu(Diagonal(zero(diag(D))), 1)
-        @test istril(D)
-        @test !istril(D, -1)
-        @test istril(D, 1)
-        @test istril(Diagonal(zero(diag(D))), -1)
-        if elty <: Real
-            @test ishermitian(D)
-        end
-    end
-
-    @testset "diag" begin
-        @test_throws ArgumentError diag(D,  n+1)
-        @test_throws ArgumentError diag(D, -n-1)
-        @test (@inferred diag(D))::typeof(dd) == dd
-        @test (@inferred diag(D, 0))::typeof(dd) == dd
-        @test (@inferred diag(D, 1))::typeof(dd) == zeros(elty, n-1)
-        DG = Diagonal(GenericArray(dd))
-        @test (@inferred diag(DG))::typeof(GenericArray(dd)) == GenericArray(dd)
-        @test (@inferred diag(DG, 1))::typeof(GenericArray(dd)) == GenericArray(zeros(elty, n-1))
-    end
-
-
-    @testset "Simple unary functions" begin
-        for op in (-,)
-            @test op(D)==op(DM)
-        end
-
-        for func in (det, tr)
-            @test func(D) ≈ func(DM) atol=n^2*eps(relty)*(1+(elty<:Complex))
-        end
-        if relty <: BlasFloat
-            for func in (exp, cis, sinh, cosh, tanh, sech, csch, coth)
-                @test func(D) ≈ func(DM) atol=n^3*eps(relty)
-            end
-            @test log(Diagonal(abs.(D.diag))) ≈ log(abs.(DM)) atol=n^3*eps(relty)
-        end
-        if elty <: BlasComplex
-            for func in (logdet, sqrt, sin, cos, tan, sec, csc, cot,
-                         asin, acos, atan, asec, acsc, acot,
-                         asinh, acosh, atanh, asech, acsch, acoth)
-                @test func(D) ≈ func(DM) atol=n^2*eps(relty)*2
-            end
-        end
-    end
-
-    @testset "Two-dimensional Euler formula for Diagonal" begin
-        @test cis(Diagonal([π, π])) ≈ -I
-    end
-
-    @testset "Linear solve" begin
-        for (v, U) in ((vv, UU), (view(vv, 1:n), view(UU, 1:n, 1:2)))
-            @test D*v ≈ DM*v atol=n*eps(relty)*(1+(elty<:Complex))
-            @test D*U ≈ DM*U atol=n^2*eps(relty)*(1+(elty<:Complex))
-
-            @test transpose(U)*D ≈ transpose(U)*Array(D)
-            @test U'*D ≈ U'*Array(D)
-
-            if relty != BigFloat
-                atol_two = 2n^2 * eps(relty) * (1 + (elty <: Complex))
-                atol_three = 2n^3 * eps(relty) * (1 + (elty <: Complex))
-                @test D\v ≈ DM\v atol=atol_two
-                @test D\U ≈ DM\U atol=atol_three
-                @test ldiv!(D, copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(transpose(D), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(adjoint(conj(D)), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(D, copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(transpose(D), copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(adjoint(conj(D)), copy(U)) ≈ DM\U atol=atol_three
-                # this method tests AbstractMatrix/AbstractVec for second arg
-                Usym_bad = Symmetric(ones(elty, n+1, n+1))
-                @test_throws DimensionMismatch ldiv!(D, copy(Usym_bad))
-
-                @test ldiv!(zero(v), D, copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(v), transpose(D), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(v), adjoint(conj(D)), copy(v)) ≈ DM\v atol=atol_two
-                @test ldiv!(zero(U), D, copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(zero(U), transpose(D), copy(U)) ≈ DM\U atol=atol_three
-                @test ldiv!(zero(U), adjoint(conj(D)), copy(U)) ≈ DM\U atol=atol_three
-
-                Uc = copy(U')
-                target = rmul!(Uc, Diagonal(inv.(D.diag)))
-                @test rdiv!(Uc, D) ≈ target atol=atol_three
-                @test_throws DimensionMismatch rdiv!(Matrix{elty}(I, n-1, n-1), D)
-                @test_throws SingularException rdiv!(Uc, Diagonal(fill!(similar(D.diag), 0)))
-                @test rdiv!(Uc, transpose(D)) ≈ target atol=atol_three
-                @test rdiv!(Uc, adjoint(conj(D))) ≈ target atol=atol_three
-                @test ldiv!(D, Matrix{eltype(D)}(I, size(D))) ≈ D \ Matrix{eltype(D)}(I, size(D)) atol=atol_three
-                @test_throws DimensionMismatch ldiv!(D, fill(elty(1), n + 1))
-                @test_throws SingularException ldiv!(Diagonal(zeros(relty, n)), copy(v))
-                b = rand(elty, n, n)
-                @test ldiv!(D, copy(b)) ≈ Array(D)\Array(b)
-                @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), copy(b))
-                b = view(rand(elty, n), Vector(1:n))
-                b2 = copy(b)
-                c = ldiv!(D, b)
-                d = Array(D)\b2
-                @test c ≈ d
-                @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), b)
-                b = rand(elty, n+1, n+1)
-                @test_throws DimensionMismatch ldiv!(D, copy(b))
-                b = view(rand(elty, n+1), Vector(1:n+1))
-                @test_throws DimensionMismatch ldiv!(D, b)
-            end
-        end
-    end
-    d = convert(Vector{elty}, randn(n))
-    D2 = Diagonal(d)
-    DM2= Matrix(Diagonal(d))
-    @testset "Binary operations" begin
-        for op in (+, -, *)
-            @test Array(op(D, D2)) ≈ op(DM, DM2)
-        end
-        @testset "with plain numbers" begin
-            a = rand()
-            @test Array(a*D) ≈ a*DM
-            @test Array(D*a) ≈ DM*a
-            @test Array(D/a) ≈ DM/a
-            if elty <: Real
-                @test Array(abs.(D)^a) ≈ abs.(DM)^a
-            else
-                @test Array(D^a) ≈ DM^a
-            end
-            @test Diagonal(1:100)^2 == Diagonal((1:100).^2)
-            p = 3
-            @test Diagonal(1:100)^p == Diagonal((1:100).^p)
-            @test Diagonal(1:100)^(-1) == Diagonal(inv.(1:100))
-            @test Diagonal(1:100)^2.0 == Diagonal((1:100).^2.0)
-            @test Diagonal(1:100)^(2.0+0im) == Diagonal((1:100).^(2.0+0im))
-        end
-
-        if relty <: BlasFloat
-            for b in (rand(elty,n,n), rand(elty,n))
-                @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
-                @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
-                @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
-            end
-        end
-
-        #a few missing mults
-        bd = Bidiagonal(D2)
-        @test D*transpose(D2) ≈ Array(D)*transpose(Array(D2))
-        @test D2*transpose(D) ≈ Array(D2)*transpose(Array(D))
-        @test D2*D' ≈ Array(D2)*Array(D)'
-
-        #division of two Diagonals
-        @test D/D2 ≈ Diagonal(D.diag./D2.diag)
-        @test D\D2 ≈ Diagonal(D2.diag./D.diag)
-
-        # QR \ Diagonal
-        A = rand(elty, n, n)
-        qrA = qr(A)
-        @test qrA \ D ≈ A \ D
-
-        # HermOrSym
-        A     = rand(elty, n, n)
-        Asym  = Symmetric(A + transpose(A), :U)
-        Aherm = Hermitian(A + adjoint(A), :U)
-        for op in (+, -)
-            @test op(Asym, D) isa Symmetric
-            @test Array(op(Asym, D)) ≈ Array(Symmetric(op(Array(Asym), Array(D))))
-            @test op(D, Asym) isa Symmetric
-            @test Array(op(D, Asym)) ≈ Array(Symmetric(op(Array(D), Array(Asym))))
-            if !(elty <: Real)
-                Dr = real(D)
-                @test op(Aherm, Dr) isa Hermitian
-                @test Array(op(Aherm, Dr)) ≈ Array(Hermitian(op(Array(Aherm), Array(Dr))))
-                @test op(Dr, Aherm) isa Hermitian
-                @test Array(op(Dr, Aherm)) ≈ Array(Hermitian(op(Array(Dr), Array(Aherm))))
-            end
-        end
-        @test Array(D*transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
-        @test Array(D*adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
-        @test Array(D*transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
-        @test Array(D*adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
-        @test Array(transpose(Asym)*transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
-        @test Array(transpose(D)*transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
-        @test Array(adjoint(Aherm)*adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
-        @test Array(adjoint(D)*adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
-
-        # Performance specialisations for A*_mul_B!
-        vvv = similar(vv)
-        @test (r = Matrix(D) * vv   ; mul!(vvv, D, vv)  ≈ r ≈ vvv)
-        @test (r = Matrix(D)' * vv  ; mul!(vvv, adjoint(D), vv) ≈ r ≈ vvv)
-        @test (r = transpose(Matrix(D)) * vv ; mul!(vvv, transpose(D), vv) ≈ r ≈ vvv)
-
-        UUU = similar(UU)
-        for transformA in (identity, adjoint, transpose)
-            for transformD in (identity, adjoint, transpose)
-                @test mul!(UUU, transformA(UU), transformD(D)) ≈  transformA(UU) * Matrix(transformD(D))
-                @test mul!(UUU, transformD(D), transformA(UU)) ≈  Matrix(transformD(D)) * transformA(UU)
-            end
-        end
-
-        alpha = elty(randn())  # randn(elty) does not work with BigFloat
-        beta = elty(randn())
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * Matrix(D) * vv + beta * vvv
-            mul!(vvv, D, vv, alpha, beta)  ≈ r ≈ vvv
-        end
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * Matrix(D)' * vv + beta * vvv
-            mul!(vvv, adjoint(D), vv, alpha, beta) ≈ r ≈ vvv
-        end
-        @test begin
-            vvv = similar(vv)
-            vvv .= randn(size(vvv))  # randn!(vvv) does not work with BigFloat
-            r = alpha * transpose(Matrix(D)) * vv + beta * vvv
-            mul!(vvv, transpose(D), vv, alpha, beta) ≈ r ≈ vvv
-        end
-
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * Matrix(D) * UU + beta * UUU
-            mul!(UUU, D, UU, alpha, beta) ≈ r ≈ UUU
-        end
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * Matrix(D)' * UU + beta * UUU
-            mul!(UUU, adjoint(D), UU, alpha, beta) ≈ r ≈ UUU
-        end
-        @test begin
-            UUU = similar(UU)
-            UUU .= randn(size(UUU))  # randn!(UUU) does not work with BigFloat
-            r = alpha * transpose(Matrix(D)) * UU + beta * UUU
-            mul!(UUU, transpose(D), UU, alpha, beta) ≈ r ≈ UUU
-        end
-
-        # make sure that mul!(A, {Adj|Trans}(B)) works with B as a Diagonal
-        VV = Array(D)
-        DD = copy(D)
-        r  = VV * Matrix(D)
-        @test Array(rmul!(VV, DD)) ≈ r ≈ Array(D)*Array(D)
-        DD = copy(D)
-        r  = VV * transpose(Array(D))
-        @test Array(rmul!(VV, transpose(DD))) ≈ r
-        DD = copy(D)
-        r  = VV * Array(D)'
-        @test Array(rmul!(VV, adjoint(DD))) ≈ r
-
-        # kron
-        D3 = Diagonal(convert(Vector{elty}, rand(n÷2)))
-        DM3= Matrix(D3)
-        @test Matrix(kron(D, D3)) ≈ kron(DM, DM3)
-        M4 = rand(elty, n÷2, n÷2)
-        @test kron(D3, M4) ≈ kron(DM3, M4)
-        @test kron(M4, D3) ≈ kron(M4, DM3)
-        X = [ones(1,1) for i in 1:2, j in 1:2]
-        @test kron(I(2), X)[1,3] == zeros(1,1)
-        X = [ones(2,2) for i in 1:2, j in 1:2]
-        @test kron(I(2), X)[1,3] == zeros(2,2)
-    end
-    @testset "iszero, isone, triu, tril" begin
-        Dzero = Diagonal(zeros(elty, 10))
-        Done = Diagonal(ones(elty, 10))
-        Dmix = Diagonal(zeros(elty, 10))
-        Dmix[end,end] = one(elty)
-        @test iszero(Dzero)
-        @test !isone(Dzero)
-        @test !iszero(Done)
-        @test isone(Done)
-        @test !iszero(Dmix)
-        @test !isone(Dmix)
-        @test istriu(D)
-        @test istril(D)
-        @test iszero(triu(D,1))
-        @test triu(D,0)  == D
-        @test triu(D,-1) == D
-        @test tril(D,1)  == D
-        @test iszero(tril(D,-1))
-        @test tril(D,0)  == D
-        @test_throws ArgumentError tril(D, -n - 2)
-        @test_throws ArgumentError tril(D, n)
-        @test_throws ArgumentError triu(D, -n)
-        @test_throws ArgumentError triu(D, n + 2)
-    end
-
-    # factorize
-    @test factorize(D) == D
-
-    @testset "Eigensystem" begin
-        eigD = eigen(D)
-        @test Diagonal(eigD.values) == D
-        @test eigD.vectors == Matrix(I, size(D))
-        eigsortD = eigen(D, sortby=LinearAlgebra.eigsortby)
-        @test eigsortD.values !== D.diag
-        @test eigsortD.values == sort(D.diag, by=LinearAlgebra.eigsortby)
-        @test Matrix(eigsortD) == D
-    end
-
-    @testset "ldiv" begin
-        v = rand(n + 1)
-        @test_throws DimensionMismatch D\v
-        v = rand(n)
-        @test D\v ≈ DM\v
-        V = rand(n + 1, n)
-        @test_throws DimensionMismatch D\V
-        V = rand(n, n)
-        @test D\V ≈ DM\V
-    end
-
-    @testset "conj and transpose" begin
-        @test transpose(D) == D
-        if elty <: Real
-            @test transpose(D) === D
-            @test adjoint(D) === D
-        elseif elty <: BlasComplex
-            @test Array(conj(D)) ≈ conj(DM)
-            @test adjoint(D) == conj(D)
-            local D2 = copy(D)
-            local D2adj = adjoint(D2)
-            D2adj[1,1] = rand(eltype(D2adj))
-            @test D2[1,1] == adjoint(D2adj[1,1])
-            @test D2adj' === D2
-        end
-        # Translates to Ac/t_mul_B, which is specialized after issue 21286
-        @test(D' * vv == conj(D) * vv)
-        @test(transpose(D) * vv == D * vv)
-    end
-
-    # logdet and logabsdet
-    if relty <: Real
-        lD = Diagonal(convert(Vector{relty}, rand(n)))
-        lM = Matrix(lD)
-        @test logdet(lD) ≈ logdet(lM)
-        d1, s1 = @inferred logabsdet(lD)
-        d2, s2 = logabsdet(lM)
-        @test d1 ≈ d2
-        @test s1 == s2
-        @test logdet(Diagonal(relty[-1,-2])) ≈ log(2)
-        @test_throws DomainError logdet(Diagonal(relty[-1,-2,-3]))
-    end
-
-    @testset "similar" begin
-        @test isa(similar(D), Diagonal{elty})
-        @test isa(similar(D, Int), Diagonal{Int})
-        @test isa(similar(D, (3,2)), Matrix{elty})
-        @test isa(similar(D, Int, (3,2)), Matrix{Int})
-    end
-
-    # Issue number 10036
-    # make sure issymmetric/ishermitian work for
-    # non-real diagonal matrices
-    @testset "issymmetric/hermitian for complex Diagonal" begin
-        @test issymmetric(D2)
-        @test ishermitian(D2)
-        if elty <: Complex
-            dc = d .+ elty(1im)
-            D3 = Diagonal(dc)
-            @test issymmetric(D3)
-            @test !ishermitian(D3)
-        end
-    end
-
-    @testset "svd (#11120/#11247)" begin
-        U, s, V = svd(D)
-        @test (U*Diagonal(s))*V' ≈ D
-        @test svdvals(D) == s
-        @test svd(D).V == V
-    end
-
-    @testset "svd/eigen with Diagonal{Furlong}" begin
-        Du = Furlong.(D)
-        @test Du isa Diagonal{<:Furlong{1}}
-        F = svd(Du)
-        U, s, V = F
-        @test map(x -> x.val, Matrix(F)) ≈ map(x -> x.val, Du)
-        @test svdvals(Du) == s
-        @test U isa AbstractMatrix{<:Furlong{0}}
-        @test V isa AbstractMatrix{<:Furlong{0}}
-        @test s isa AbstractVector{<:Furlong{1}}
-        E = eigen(Du)
-        vals, vecs = E
-        @test Matrix(E) == Du
-        @test vals isa AbstractVector{<:Furlong{1}}
-        @test vecs isa AbstractMatrix{<:Furlong{0}}
-    end
-end
-
-@testset "rdiv! (#40887)" begin
-    @test rdiv!(Matrix(Diagonal([2.0, 3.0])), Diagonal(2:3)) == Diagonal([1.0, 1.0])
-    @test rdiv!(fill(3.0, 3, 3), 3.0I(3)) == ones(3,3)
-end
-
-@testset "kron (issue #40595)" begin
-    # custom array type to test that kron on Diagonal matrices preserves types of the parents if possible
-    struct KronTestArray{T, N, AT} <: AbstractArray{T, N}
-        data::AT
-    end
-    KronTestArray(data::AbstractArray) = KronTestArray{eltype(data), ndims(data), typeof(data)}(data)
-    Base.size(A::KronTestArray) = size(A.data)
-    LinearAlgebra.kron(A::KronTestArray, B::KronTestArray) = KronTestArray(kron(A.data, B.data))
-    Base.getindex(K::KronTestArray{<:Any,N}, i::Vararg{Int,N}) where {N} = K.data[i...]
-
-    A = KronTestArray([1, 2, 3]);
-    @test kron(A, A) isa KronTestArray
-    Ad = Diagonal(A);
-    @test kron(Ad, Ad).diag isa KronTestArray
-    @test kron(Ad, Ad).diag == kron([1, 2, 3], [1, 2, 3])
-end
-
-# Define a vector type that does not support `deleteat!`, to ensure that `kron` handles this
-struct SimpleVector{T} <: AbstractVector{T}
-    vec::Vector{T}
-end
-SimpleVector(x::SimpleVector) = SimpleVector(Vector(x.vec))
-SimpleVector{T}(::UndefInitializer, n::Integer) where {T} = SimpleVector(Vector{T}(undef, n))
-Base.:(==)(x::SimpleVector, y::SimpleVector) = x == y
-Base.axes(x::SimpleVector) = axes(x.vec)
-Base.convert(::Type{Vector{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x.vec)
-Base.convert(::Type{Vector}, x::SimpleVector{T}) where {T} = convert(Vector{T}, x)
-Base.convert(::Type{Array{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x)
-Base.convert(::Type{Array}, x::SimpleVector) = convert(Vector, x)
-Base.copyto!(x::SimpleVector, y::SimpleVector) = (copyto!(x.vec, y.vec); x)
-Base.eltype(::Type{SimpleVector{T}}) where {T} = T
-Base.getindex(x::SimpleVector, ind...) = getindex(x.vec, ind...)
-Base.kron(x::SimpleVector, y::SimpleVector) = SimpleVector(kron(x.vec, y.vec))
-Base.promote_rule(::Type{<:AbstractVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = Vector{promote_type(T, U)}
-Base.promote_rule(::Type{SimpleVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = SimpleVector{promote_type(T, U)}
-Base.setindex!(x::SimpleVector, val, ind...) = (setindex!(x.vec, val, ind...), x)
-Base.similar(x::SimpleVector, ::Type{T}) where {T} = SimpleVector(similar(x.vec, T))
-Base.similar(x::SimpleVector, ::Type{T}, dims::Dims{1}) where {T} = SimpleVector(similar(x.vec, T, dims))
-Base.size(x::SimpleVector) = size(x.vec)
-
-@testset "kron (issue #46456)" for repr in Any[identity, SimpleVector]
-    A = Diagonal(repr(randn(10)))
-    BL = Bidiagonal(repr(randn(10)), repr(randn(9)), :L)
-    BU = Bidiagonal(repr(randn(10)), repr(randn(9)), :U)
-    C = SymTridiagonal(repr(randn(10)), repr(randn(9)))
-    Cl = SymTridiagonal(repr(randn(10)), repr(randn(10)))
-    D = Tridiagonal(repr(randn(9)), repr(randn(10)), repr(randn(9)))
-    @test kron(A, BL)::Bidiagonal == kron(Array(A), Array(BL))
-    @test kron(A, BU)::Bidiagonal == kron(Array(A), Array(BU))
-    @test kron(A, C)::SymTridiagonal == kron(Array(A), Array(C))
-    @test kron(A, Cl)::SymTridiagonal == kron(Array(A), Array(Cl))
-    @test kron(A, D)::Tridiagonal == kron(Array(A), Array(D))
-end
-
-@testset "svdvals and eigvals (#11120/#11247)" begin
-    D = Diagonal(Matrix{Float64}[randn(3,3), randn(2,2)])
-    @test sort([svdvals(D)...;], rev = true) ≈ svdvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
-    @test sort([eigvals(D)...;], by=LinearAlgebra.eigsortby) ≈ eigvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
-end
-
-@testset "eigvals should return a copy of the diagonal" begin
-    D = Diagonal([1, 2, 3])
-    lam = eigvals(D)
-    D[3,3] = 4 # should not affect lam
-    @test lam == [1, 2, 3]
-end
-
-@testset "eigmin (#27847)" begin
-    for _ in 1:100
-        d = randn(rand(1:10))
-        D = Diagonal(d)
-        @test eigmin(D) == minimum(d)
-    end
-end
-
-@testset "isposdef" begin
-    @test isposdef(Diagonal(1.0 .+ rand(n)))
-    @test !isposdef(Diagonal(-1.0 * rand(n)))
-    @test isposdef(Diagonal(complex(1.0, 0.0) .+ rand(n)))
-    @test !isposdef(Diagonal(complex(1.0, 1.0) .+ rand(n)))
-    @test isposdef(Diagonal([[1 0; 0 1], [1 0; 0 1]]))
-    @test !isposdef(Diagonal([[1 0; 0 1], [1 0; 1 1]]))
-end
-
-@testset "getindex" begin
-    d = randn(n)
-    D = Diagonal(d)
-    # getindex bounds checking
-    @test_throws BoundsError D[0, 0]
-    @test_throws BoundsError D[-1, -2]
-    @test_throws BoundsError D[n, n + 1]
-    @test_throws BoundsError D[n + 1, n]
-    @test_throws BoundsError D[n + 1, n + 1]
-    # getindex on and off the diagonal
-    for i in 1:n, j in 1:n
-        @test D[i, j] == (i == j ? d[i] : 0)
-    end
-end
-
-@testset "setindex!" begin
-    d = randn(n)
-    D = Diagonal(d)
-    # setindex! bounds checking
-    @test_throws BoundsError D[0, 0] = 0
-    @test_throws BoundsError D[-1 , -2] = 0
-    @test_throws BoundsError D[n, n + 1] = 0
-    @test_throws BoundsError D[n + 1, n] = 0
-    @test_throws BoundsError D[n + 1, n + 1] = 0
-    for i in 1:n, j in 1:n
-        if i == j
-            # setindex on! the diagonal
-            @test ((D[i, j] = i) == i; D[i, j] == i)
-        else
-            # setindex! off the diagonal
-            @test ((D[i, j] = 0) == 0; iszero(D[i, j]))
-            @test_throws ArgumentError D[i, j] = 1
-        end
-    end
-end
-
-@testset "inverse" begin
-    for d in Any[randn(n), Int[], [1, 2, 3], [1im, 2im, 3im], [1//1, 2//1, 3//1], [1+1im//1, 2//1, 3im//1]]
-        D = Diagonal(d)
-        @test inv(D) ≈ inv(Array(D))
-    end
-    @test_throws SingularException inv(Diagonal(zeros(n)))
-    @test_throws SingularException inv(Diagonal([0, 1, 2]))
-    @test_throws SingularException inv(Diagonal([0im, 1im, 2im]))
-end
-
-@testset "pseudoinverse" begin
-    for d in Any[randn(n), zeros(n), Int[], [0, 2, 0.003], [0im, 1+2im, 0.003im], [0//1, 2//1, 3//100], [0//1, 1//1+2im, 3im//100]]
-        D = Diagonal(d)
-        @test pinv(D) ≈ pinv(Array(D))
-        @test pinv(D, 1.0e-2) ≈ pinv(Array(D), 1.0e-2)
-    end
-end
-
-# allow construct from range
-@test all(Diagonal(range(1, stop=3, length=3)) .== Diagonal([1.0,2.0,3.0]))
-
-# Issue 12803
-for t in (Float32, Float64, Int, ComplexF64, Rational{Int})
-    @test Diagonal(Matrix{t}[fill(t(1), 2, 2), fill(t(1), 3, 3)])[2,1] == zeros(t, 3, 2)
-end
-
-# Issue 15401
-@test Matrix(1.0I, 5, 5) \ Diagonal(fill(1.,5)) == Matrix(I, 5, 5)
-
-@testset "Triangular and Diagonal" begin
-    function _test_matrix(type)
-        if type == Int
-            return rand(1:9, 5, 5)
-        else
-            return randn(type, 5, 5)
-        end
-    end
-    types = (Float64, Int, ComplexF64)
-    for ta in types
-        D = Diagonal(_test_matrix(ta))
-        for tb in types
-            B = _test_matrix(tb)
-            Tmats = (LowerTriangular(B), UnitLowerTriangular(B), UpperTriangular(B), UnitUpperTriangular(B))
-            restypes = (LowerTriangular, LowerTriangular, UpperTriangular, UpperTriangular)
-            for (T, rtype) in zip(Tmats, restypes)
-                adjtype = (rtype == LowerTriangular) ? UpperTriangular : LowerTriangular
-
-                # Triangular * Diagonal
-                R = T * D
-                @test R ≈ Array(T) * Array(D)
-                @test isa(R, rtype)
-
-                # Diagonal * Triangular
-                R = D * T
-                @test R ≈ Array(D) * Array(T)
-                @test isa(R, rtype)
-
-                # Adjoint of Triangular * Diagonal
-                R = T' * D
-                @test R ≈ Array(T)' * Array(D)
-                @test isa(R, adjtype)
-
-                # Diagonal * Adjoint of Triangular
-                R = D * T'
-                @test R ≈ Array(D) * Array(T)'
-                @test isa(R, adjtype)
-
-                # Transpose of Triangular * Diagonal
-                R = transpose(T) * D
-                @test R ≈ transpose(Array(T)) * Array(D)
-                @test isa(R, adjtype)
-
-                # Diagonal * Transpose of Triangular
-                R = D * transpose(T)
-                @test R ≈ Array(D) * transpose(Array(T))
-                @test isa(R, adjtype)
-            end
-        end
-    end
-end
-
-let D1 = Diagonal(rand(5)), D2 = Diagonal(rand(5))
-    @test LinearAlgebra.rmul!(copy(D1),D2) == D1*D2
-    @test LinearAlgebra.lmul!(D1,copy(D2)) == D1*D2
-    @test LinearAlgebra.rmul!(copy(D1),transpose(D2)) == D1*transpose(D2)
-    @test LinearAlgebra.lmul!(transpose(D1),copy(D2)) == transpose(D1)*D2
-    @test LinearAlgebra.rmul!(copy(D1),adjoint(D2)) == D1*adjoint(D2)
-    @test LinearAlgebra.lmul!(adjoint(D1),copy(D2)) == adjoint(D1)*D2
-end
-
-@testset "multiplication of a Diagonal with a Matrix" begin
-    A = collect(reshape(1:8, 4, 2));
-    B = BigFloat.(A);
-    DL = Diagonal(collect(axes(A, 1)));
-    DR = Diagonal(Float16.(collect(axes(A, 2))));
-
-    @test DL * A == collect(DL) * A
-    @test A * DR == A * collect(DR)
-    @test DL * B == collect(DL) * B
-    @test B * DR == B * collect(DR)
-
-    A = reshape([ones(2,2), ones(2,2)*2, ones(2,2)*3, ones(2,2)*4], 2, 2)
-    Ac = collect(A)
-    D = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
-    Dc = collect(D)
-    @test A * D == Ac * Dc
-    @test D * A == Dc * Ac
-    @test D * D == Dc * Dc
-
-    AS = similar(A)
-    mul!(AS, A, D, true, false)
-    @test AS == A * D
-
-    D2 = similar(D)
-    mul!(D2, D, D)
-    @test D2 == D * D
-
-    copyto!(D2, D)
-    lmul!(D, D2)
-    @test D2 == D * D
-    copyto!(D2, D)
-    rmul!(D2, D)
-    @test D2 == D * D
-end
-
-@testset "multiplication of 2 Diagonal and a Matrix (#46400)" begin
-    A = randn(10, 10)
-    D = Diagonal(randn(10))
-    D2 = Diagonal(randn(10))
-    @test D * A * D2 ≈ D * (A * D2)
-    @test D * A * D2 ≈ (D * A) * D2
-    @test_throws DimensionMismatch Diagonal(ones(9)) * A * D2
-    @test_throws DimensionMismatch D * A * Diagonal(ones(9))
-end
-
-@testset "multiplication of QR Q-factor and Diagonal (#16615 spot test)" begin
-    D = Diagonal(randn(5))
-    Q = qr(randn(5, 5)).Q
-    @test D * Q' == Array(D) * Q'
-    Q = qr(randn(5, 5), ColumnNorm()).Q
-    @test_throws ArgumentError lmul!(Q, D)
-end
-
-@testset "block diagonal matrices" begin
-    D = Diagonal([[1 2; 3 4], [1 2; 3 4]])
-    Dherm = Diagonal([[1 1+im; 1-im 1], [1 1+im; 1-im 1]])
-    Dsym = Diagonal([[1 1+im; 1+im 1], [1 1+im; 1+im 1]])
-    @test adjoint(D) == Diagonal([[1 3; 2 4], [1 3; 2 4]])
-    @test transpose(D) == Diagonal([[1 3; 2 4], [1 3; 2 4]])
-    @test adjoint(Dherm) == Dherm
-    @test transpose(Dherm) == Diagonal([[1 1-im; 1+im 1], [1 1-im; 1+im 1]])
-    @test adjoint(Dsym) == Diagonal([[1 1-im; 1-im 1], [1 1-im; 1-im 1]])
-    @test transpose(Dsym) == Dsym
-
-    v = [[1, 2], [3, 4]]
-    @test Dherm' * v == Dherm * v
-    @test transpose(D) * v == [[7, 10], [15, 22]]
-
-    @test issymmetric(D) == false
-    @test issymmetric(Dherm) == false
-    @test issymmetric(Dsym) == true
-
-    @test ishermitian(D) == false
-    @test ishermitian(Dherm) == true
-    @test ishermitian(Dsym) == false
-
-    @test exp(D) == Diagonal([exp([1 2; 3 4]), exp([1 2; 3 4])])
-    @test cis(D) == Diagonal([cis([1 2; 3 4]), cis([1 2; 3 4])])
-    @test log(D) == Diagonal([log([1 2; 3 4]), log([1 2; 3 4])])
-    @test sqrt(D) == Diagonal([sqrt([1 2; 3 4]), sqrt([1 2; 3 4])])
-
-    @test tr(D) == 10
-    @test det(D) == 4
-
-    M = [1 2; 3 4]
-    for n in 0:1
-        D = Diagonal(fill(M, n))
-        @test D == Matrix{eltype(D)}(D)
-    end
-end
-
-@testset "linear solve for block diagonal matrices" begin
-    D = Diagonal([rand(2,2) for _ in 1:5])
-    b = [rand(2,2) for _ in 1:5]
-    B = [rand(2,2) for _ in 1:5, _ in 1:5]
-    @test ldiv!(D, copy(b)) ≈ Diagonal(inv.(D.diag)) * b
-    @test ldiv!(D, copy(B)) ≈ Diagonal(inv.(D.diag)) * B
-    @test rdiv!(copy(B), D) ≈ B * Diagonal(inv.(D.diag))
-end
-
-@testset "multiplication with Symmetric/Hermitian" begin
-    for T in (Float64, ComplexF64)
-        D = Diagonal(randn(T, n))
-        A = randn(T, n, n); A = A'A
-        S = Symmetric(A)
-        H = Hermitian(A)
-        for (transform1, transform2) in ((identity,  identity),
-                (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose) )
-            @test *(transform1(D), transform2(S)) ≈ *(transform1(Matrix(D)), transform2(Matrix(S)))
-            @test *(transform1(D), transform2(H)) ≈ *(transform1(Matrix(D)), transform2(Matrix(H)))
-            @test *(transform1(S), transform2(D)) ≈ *(transform1(Matrix(S)), transform2(Matrix(D)))
-            @test *(transform1(S), transform2(H)) ≈ *(transform1(Matrix(S)), transform2(Matrix(H)))
-        end
-    end
-end
-
-@testset "multiplication of transposes of Diagonal (#22428)" begin
-    for T in (Float64, ComplexF64)
-        D = Diagonal(randn(T, 5, 5))
-        B = Diagonal(randn(T, 5, 5))
-        DD = Diagonal([randn(T, 2, 2), rand(T, 2, 2)])
-        BB = Diagonal([randn(T, 2, 2), rand(T, 2, 2)])
-        fullDD = copyto!(Matrix{Matrix{T}}(undef, 2, 2), DD)
-        fullBB = copyto!(Matrix{Matrix{T}}(undef, 2, 2), BB)
-        for (transform1, transform2) in ((identity,  identity),
-                (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose))
-            @test *(transform1(D), transform2(B))::typeof(D) ≈ *(transform1(Matrix(D)), transform2(Matrix(B))) atol=2 * eps()
-            @test *(transform1(DD), transform2(BB))::typeof(DD) == *(transform1(fullDD), transform2(fullBB))
-        end
-        M = randn(T, 5, 5)
-        MM = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        for transform in (identity, adjoint, transpose)
-            @test lmul!(transform(D), copy(M)) ≈ *(transform(Matrix(D)), M)
-            @test rmul!(copy(M), transform(D)) ≈ *(M, transform(Matrix(D)))
-            @test lmul!(transform(DD), copy(MM)) ≈ *(transform(fullDD), MM)
-            @test rmul!(copy(MM), transform(DD)) ≈ *(MM, transform(fullDD))
-        end
-    end
-end
-
-@testset "Diagonal of adjoint/transpose vectors (#23649)" begin
-    @test Diagonal(adjoint([1, 2, 3])) == Diagonal([1 2 3])
-    @test Diagonal(transpose([1, 2, 3])) == Diagonal([1 2 3])
-end
-
-@testset "Multiplication with adjoint and transpose vectors (#26863)" begin
-    x = collect(1:2)
-    xt = transpose(x)
-    A = reshape([[1 2; 3 4], zeros(Int,2,2), zeros(Int, 2, 2), [5 6; 7 8]], 2, 2)
-    D = Diagonal(A)
-    @test x'*D == x'*A == collect(x')*D == collect(x')*A
-    @test xt*D == xt*A == collect(xt)*D == collect(xt)*A
-    outadjxD = similar(x'*D); outtrxD = similar(xt*D);
-    mul!(outadjxD, x', D)
-    @test outadjxD == x'*D
-    mul!(outtrxD, xt, D)
-    @test outtrxD == xt*D
-
-    D1 = Diagonal([[1 2; 3 4]])
-    @test D1 * x' == D1 * collect(x') == collect(D1) * collect(x')
-    @test D1 * xt == D1 * collect(xt) == collect(D1) * collect(xt)
-    outD1adjx = similar(D1 * x'); outD1trx = similar(D1 * xt);
-    mul!(outadjxD, D1, x')
-    @test outadjxD == D1*x'
-    mul!(outtrxD, D1, xt)
-    @test outtrxD == D1*xt
-
-    y = [x, x]
-    yt = transpose(y)
-    @test y'*D*y == (y'*D)*y == (y'*A)*y
-    @test yt*D*y == (yt*D)*y == (yt*A)*y
-    outadjyD = similar(y'*D); outtryD = similar(yt*D);
-    outadjyD2 = similar(collect(y'*D)); outtryD2 = similar(collect(yt*D));
-    mul!(outadjyD, y', D)
-    mul!(outadjyD2, y', D)
-    @test outadjyD == outadjyD2 == y'*D
-    mul!(outtryD, yt, D)
-    mul!(outtryD2, yt, D)
-    @test outtryD == outtryD2 == yt*D
-end
-
-@testset "Multiplication of single element Diagonal (#36746, #40726)" begin
-    @test_throws DimensionMismatch Diagonal(randn(1)) * randn(5)
-    @test_throws DimensionMismatch Diagonal(randn(1)) * Diagonal(randn(3, 3))
-    A = [1 0; 0 2]
-    v = [3, 4]
-    @test Diagonal(A) * v == A * v
-    @test Diagonal(A) * Diagonal(A) == A * A
-    @test_throws DimensionMismatch [1 0;0 1] * Diagonal([2 3])   # Issue #40726
-    @test_throws DimensionMismatch lmul!(Diagonal([1]), [1,2,3]) # nearby
-end
-
-@testset "Multiplication of a Diagonal with an OffsetArray" begin
-    # Offset indices should throw
-    D = Diagonal(1:4)
-    A = OffsetArray(rand(4,4), 2, 2)
-    @test_throws ArgumentError D * A
-    @test_throws ArgumentError A * D
-    @test_throws ArgumentError mul!(similar(A, size(A)), A, D)
-    @test_throws ArgumentError mul!(similar(A, size(A)), D, A)
-end
-
-@testset "Triangular division by Diagonal #27989" begin
-    K = 5
-    for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        U = UpperTriangular(randn(elty, K, K))
-        L = LowerTriangular(randn(elty, K, K))
-        D = Diagonal(randn(elty, K))
-        @test (U / D)::UpperTriangular{elty} == UpperTriangular(Matrix(U) / Matrix(D))
-        @test (L / D)::LowerTriangular{elty} == LowerTriangular(Matrix(L) / Matrix(D))
-        @test (D \ U)::UpperTriangular{elty} == UpperTriangular(Matrix(D) \ Matrix(U))
-        @test (D \ L)::LowerTriangular{elty} == LowerTriangular(Matrix(D) \ Matrix(L))
-    end
-end
-
-@testset "(Sym)Tridiagonal division by Diagonal" begin
-    for K in (5, 1), elty in (Float64, ComplexF32), overlength in (1, 0)
-        S = SymTridiagonal(randn(elty, K), randn(elty, K-overlength))
-        T = Tridiagonal(randn(elty, K-1), randn(elty, K), randn(elty, K-1))
-        D = Diagonal(randn(elty, K))
-        D0 = Diagonal(zeros(elty, K))
-        @test (D \ S)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(S))
-        @test (D \ T)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(T))
-        @test (S / D)::Tridiagonal{elty} == Tridiagonal(Matrix(S) / Matrix(D))
-        @test (T / D)::Tridiagonal{elty} == Tridiagonal(Matrix(T) / Matrix(D))
-        @test_throws SingularException D0 \ S
-        @test_throws SingularException D0 \ T
-        @test_throws SingularException S / D0
-        @test_throws SingularException T / D0
-    end
-    # 0-length case
-    S = SymTridiagonal(Float64[], Float64[])
-    T = Tridiagonal(Float64[], Float64[], Float64[])
-    D = Diagonal(Float64[])
-    @test (D \ S)::Tridiagonal{Float64} == T
-    @test (D \ T)::Tridiagonal{Float64} == T
-    @test (S / D)::Tridiagonal{Float64} == T
-    @test (T / D)::Tridiagonal{Float64} == T
-    # matrix eltype case
-    K = 5
-    for elty in (Float64, ComplexF32), overlength in (1, 0)
-        S = SymTridiagonal([rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-overlength])
-        T = Tridiagonal([rand(elty, 2, 2) for _ in 1:K-1], [rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-1])
-        D = Diagonal(randn(elty, K))
-        SM = fill(zeros(elty, 2, 2), K, K)
-        TM = copy(SM)
-        SM[1,1] = S[1,1]; TM[1,1] = T[1,1]
-        for j in 2:K
-            SM[j,j-1] = S[j,j-1]; SM[j,j] = S[j,j]; SM[j-1,j] = S[j-1,j]
-            TM[j,j-1] = T[j,j-1]; TM[j,j] = T[j,j]; TM[j-1,j] = T[j-1,j]
-        end
-        for (M, Mm) in ((S, SM), (T, TM))
-            DS = D \ M
-            @test DS isa Tridiagonal
-            DM = D \ Mm
-            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
-            DS = M / D
-            @test DS isa Tridiagonal
-            DM = Mm / D
-            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
-        end
-    end
-    # eltype promotion case
-    S = SymTridiagonal(rand(-20:20, K), rand(-20:20, K-1))
-    T = Tridiagonal(rand(-20:20, K-1), rand(-20:20, K), rand(-20:20, K-1))
-    D = Diagonal(rand(1:20, K))
-    @test (D \ S)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(S))
-    @test (D \ T)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(T))
-    @test (S / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(S) / Matrix(D))
-    @test (T / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(T) / Matrix(D))
-end
-
-@testset "eigenvalue sorting" begin
-    D = Diagonal([0.4, 0.2, -1.3])
-    @test eigvals(D) == eigen(D).values == [0.4, 0.2, -1.3] # not sorted by default
-    @test eigvals(Matrix(D)) == eigen(Matrix(D)).values == [-1.3, 0.2, 0.4] # sorted even if diagonal special case is detected
-    E = eigen(D, sortby=abs) # sortby keyword supported for eigen(::Diagonal)
-    @test E.values == [0.2, 0.4, -1.3]
-    @test E.vectors == [0 1 0; 1 0 0; 0 0 1]
-end
-
-@testset "sum, mapreduce" begin
-    D = Diagonal([1,2,3])
-    Ddense = Matrix(D)
-    @test sum(D) == 6
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-    @test mapreduce(one, min, D, dims=1) == mapreduce(one, min, Ddense, dims=1)
-    @test mapreduce(one, min, D, dims=2) == mapreduce(one, min, Ddense, dims=2)
-    @test mapreduce(one, min, D, dims=3) == mapreduce(one, min, Ddense, dims=3)
-    @test typeof(mapreduce(one, min, D, dims=1)) == typeof(mapreduce(one, min, Ddense, dims=1))
-    @test mapreduce(zero, max, D, dims=1) == mapreduce(zero, max, Ddense, dims=1)
-    @test mapreduce(zero, max, D, dims=2) == mapreduce(zero, max, Ddense, dims=2)
-    @test mapreduce(zero, max, D, dims=3) == mapreduce(zero, max, Ddense, dims=3)
-    @test typeof(mapreduce(zero, max, D, dims=1)) == typeof(mapreduce(zero, max, Ddense, dims=1))
-
-    D = Diagonal(Int[])
-    Ddense = Matrix(D)
-    @test sum(D) == 0
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-
-    D = Diagonal(Int[2])
-    Ddense = Matrix(D)
-    @test sum(D) == 2
-    @test_throws ArgumentError sum(D, dims=0)
-    @test sum(D, dims=1) == sum(Ddense, dims=1)
-    @test sum(D, dims=2) == sum(Ddense, dims=2)
-    @test sum(D, dims=3) == sum(Ddense, dims=3)
-    @test typeof(sum(D, dims=1)) == typeof(sum(Ddense, dims=1))
-end
-
-@testset "logabsdet for generic eltype" begin
-    d = Any[1, -2.0, -3.0]
-    D = Diagonal(d)
-    d1, s1 = logabsdet(D)
-    @test d1 ≈ sum(log ∘ abs, d)
-    @test s1 == prod(sign, d)
-end
-
-@testset "Empty (#35424) & size checks (#47060)" begin
-    @test zeros(0)'*Diagonal(zeros(0))*zeros(0) === 0.0
-    @test transpose(zeros(0))*Diagonal(zeros(Complex{Int}, 0))*zeros(0) === 0.0 + 0.0im
-    @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
-    @test_throws DimensionMismatch zeros(2)' * Diagonal(zeros(2)) * zeros(3)
-    @test_throws DimensionMismatch zeros(3)' * Diagonal(zeros(2)) * zeros(2)
-    @test_throws DimensionMismatch dot(zeros(2), Diagonal(zeros(2)), zeros(3))
-    @test_throws DimensionMismatch dot(zeros(3), Diagonal(zeros(2)), zeros(2))
-end
-
-@testset "Diagonal(undef)" begin
-    d = Diagonal{Float32}(undef, 2)
-    @test length(d.diag) == 2
-end
-
-@testset "permutedims (#39447)" begin
-    for D in (Diagonal(zeros(5)), Diagonal(zeros(5) .+ 1im), Diagonal([[1,2],[3,4]]))
-        @test permutedims(D) === permutedims(D,(1,2)) === permutedims(D,(2,1)) === D
-        @test_throws ArgumentError permutedims(D,(1,3))
-    end
-end
-
-@testset "Inner product" begin
-    A = Diagonal(rand(10) .+ im)
-    B = Diagonal(rand(10) .+ im)
-    @test dot(A, B) ≈ dot(Matrix(A), B)
-    @test dot(A, B) ≈ dot(A, Matrix(B))
-    @test dot(A, B) ≈ dot(Matrix(A), Matrix(B))
-    @test dot(A, B) ≈ conj(dot(B, A))
-end
-
-@testset "eltype relaxation(#41015)" begin
-    A = rand(3,3)
-    for trans in (identity, adjoint, transpose)
-        @test ldiv!(trans(I(3)), A) == A
-        @test rdiv!(A, trans(I(3))) == A
-    end
-end
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    d = ImmutableArray([1, 2, 3, 4])
-    D = Diagonal(d)
-
-    @test convert(AbstractArray{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
-    @test convert(AbstractMatrix{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
-end
-
-@testset "divisions functionality" for elty in (Int, Float64, ComplexF64)
-    B = Diagonal(rand(elty,5,5))
-    x = rand(elty)
-    @test \(x, B) == /(B, x)
-end
-
-@testset "promotion" begin
-    for (v1, v2) in (([true], [1]), ([zeros(2,2)], [zeros(Int, 2,2)]))
-        T = promote_type(eltype(v1), eltype(v2))
-        V = promote_type(typeof(v1), typeof(v2))
-        d1 = Diagonal(v1)
-        d2 = Diagonal(v2)
-        v = [d1, d2]
-        @test (@inferred eltype(v)) == Diagonal{T, V}
-    end
-    # test for a type for which promote_type doesn't lead to a concrete eltype
-    struct MyArrayWrapper{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
-       a :: A
-    end
-    Base.size(M::MyArrayWrapper) = size(M.a)
-    Base.axes(M::MyArrayWrapper) = axes(M.a)
-    Base.length(M::MyArrayWrapper) = length(M.a)
-    Base.getindex(M::MyArrayWrapper, i::Int...) = M.a[i...]
-    Base.setindex!(M::MyArrayWrapper, v, i::Int...) = M.a[i...] = v
-    d1 = Diagonal(MyArrayWrapper(1:3))
-    d2 = Diagonal(MyArrayWrapper(1.0:3.0))
-    c = [d1, d2]
-    @test c[1] == d1
-    @test c[2] == d2
-end
-
-@testset "zero and one" begin
-    D1 = Diagonal(rand(3))
-    @test D1 + zero(D1) == D1
-    @test D1 * one(D1) == D1
-    @test D1 * oneunit(D1) == D1
-    @test oneunit(D1) isa typeof(D1)
-    D2 = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
-    @test D2 + zero(D2) == D2
-    @test D2 * one(D2) == D2
-    @test D2 * oneunit(D2) == D2
-    @test oneunit(D2) isa typeof(D2)
-    D3 = Diagonal([D2, D2]);
-    @test D3 + zero(D3) == D3
-    @test D3 * one(D3) == D3
-    @test D3 * oneunit(D3) == D3
-    @test oneunit(D3) isa typeof(D3)
-end
-
-@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular))
-    A = randn(4, 4)
-    TriA = Tri(A)
-    UTriA = UTri(A)
-    D = Diagonal(1.0:4.0)
-    DM = Matrix(D)
-    DMF = factorize(DM)
-    outTri = similar(TriA)
-    out = similar(A)
-    # 2 args
-    for fun in (*, rmul!, rdiv!, /)
-        @test fun(copy(TriA), D)::Tri == fun(Matrix(TriA), D)
-        @test fun(copy(UTriA), D)::Tri == fun(Matrix(UTriA), D)
-    end
-    for fun in (*, lmul!, ldiv!, \)
-        @test fun(D, copy(TriA))::Tri == fun(D, Matrix(TriA))
-        @test fun(D, copy(UTriA))::Tri == fun(D, Matrix(UTriA))
-    end
-    # 3 args
-    @test outTri === ldiv!(outTri, D, TriA)::Tri == ldiv!(out, D, Matrix(TriA))
-    @test outTri === ldiv!(outTri, D, UTriA)::Tri == ldiv!(out, D, Matrix(UTriA))
-    @test outTri === mul!(outTri, D, TriA)::Tri == mul!(out, D, Matrix(TriA))
-    @test outTri === mul!(outTri, D, UTriA)::Tri == mul!(out, D, Matrix(UTriA))
-    @test outTri === mul!(outTri, TriA, D)::Tri == mul!(out, Matrix(TriA), D)
-    @test outTri === mul!(outTri, UTriA, D)::Tri == mul!(out, Matrix(UTriA), D)
-    # 5 args
-    @test outTri === mul!(outTri, D, TriA, 2, 1)::Tri == mul!(out, D, Matrix(TriA), 2, 1)
-    @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1)
-    @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1)
-    @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1)
-end
-
-struct SMatrix1{T} <: AbstractArray{T,2}
-    elt::T
-end
-Base.:(==)(A::SMatrix1, B::SMatrix1) = A.elt == B.elt
-Base.zero(::Type{SMatrix1{T}}) where {T} = SMatrix1(zero(T))
-Base.iszero(A::SMatrix1) = iszero(A.elt)
-Base.getindex(A::SMatrix1, inds...) = A.elt
-Base.size(::SMatrix1) = (1, 1)
-@testset "map for Diagonal matrices (#46292)" begin
-    A = Diagonal([1])
-    @test A isa Diagonal{Int,Vector{Int}}
-    @test 2*A isa Diagonal{Int,Vector{Int}}
-    @test A.+1 isa Matrix{Int}
-    # Numeric element types remain diagonal
-    B = map(SMatrix1, A)
-    @test B == fill(SMatrix1(1), 1, 1)
-    @test B isa Diagonal{SMatrix1{Int},Vector{SMatrix1{Int}}}
-    # Non-numeric element types become dense
-    C = map(a -> SMatrix1(string(a)), A)
-    @test C == fill(SMatrix1(string(1)), 1, 1)
-    @test C isa Matrix{SMatrix1{String}}
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Fill" begin
-        for len in (4, InfiniteArrays.Infinity())
-            d = FillArrays.Fill(1, len)
-            D = Diagonal(d)
-            @test copyto!(D, I) === D
-        end
-    end
-    D = Diagonal(fill(2, 2))
-    copyto!(D, I)
-    @test all(isone, diag(D))
-end
-
-@testset "diagonal triple multiplication (#49005)" begin
-    n = 10
-    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n))) isa Diagonal
-    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n+1))))
-    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n+1), Diagonal(ones(n+1))))
-    @test_throws DimensionMismatch (*(Diagonal(ones(n+1)), Diagonal(1:n), Diagonal(ones(n))))
-
-    # currently falls back to two-term *
-    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
-end
-
-end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
deleted file mode 100644
index 413a8df0474fa..0000000000000
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ /dev/null
@@ -1,246 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestEigen
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted, UtiAUi!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(12343219)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    aa = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = aa' + aa                  # symmetric indefinite
-    apd  = aa' * aa                 # symmetric positive-definite
-    for (a, asym, apd) in ((aa, asym, apd),
-                           (view(aa, 1:n, 1:n),
-                            view(asym, 1:n, 1:n),
-                            view(apd, 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        α = rand(eltya)
-        β = rand(eltya)
-        eab = eigen(α,β)
-        @test eab.values == eigvals(fill(α,1,1),fill(β,1,1))
-        @test eab.vectors == eigvecs(fill(α,1,1),fill(β,1,1))
-
-        @testset "non-symmetric eigen decomposition" begin
-            d, v = eigen(a)
-            for i in 1:size(a,2)
-                @test a*v[:,i] ≈ d[i]*v[:,i]
-            end
-            f = eigen(a)
-            @test det(a) ≈ det(f)
-            @test inv(a) ≈ inv(f)
-            @test isposdef(a) == isposdef(f)
-            @test eigvals(f) === f.values
-            @test eigvecs(f) === f.vectors
-            @test Array(f) ≈ a
-
-            for T in (Tridiagonal(a), Hermitian(Tridiagonal(a)))
-                f = eigen(T)
-                d, v = f
-                for i in 1:size(a,2)
-                    @test T*v[:,i] ≈ d[i]*v[:,i]
-                end
-                @test det(T) ≈ det(f)
-                @test inv(T) ≈ inv(f)
-            end
-
-            num_fact = eigen(one(eltya))
-            @test num_fact.values[1] == one(eltya)
-            h = asym
-            @test minimum(eigvals(h)) ≈ eigmin(h)
-            @test maximum(eigvals(h)) ≈ eigmax(h)
-            @test_throws DomainError eigmin(a - a')
-            @test_throws DomainError eigmax(a - a')
-        end
-        @testset "symmetric generalized eigenproblem" begin
-            if isa(a, Array)
-                asym_sg = asym[1:n1, 1:n1]
-                a_sg = a[:,n1+1:n2]
-            else
-                asym_sg = view(asym, 1:n1, 1:n1)
-                a_sg = view(a, 1:n, n1+1:n2)
-            end
-            ASG2 = a_sg'a_sg
-            f = eigen(asym_sg, ASG2)
-            @test asym_sg*f.vectors ≈ (ASG2*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(asym_sg, ASG2)
-            @test prod(f.values) ≈ prod(eigvals(asym_sg/(ASG2))) atol=200ε
-            @test eigvecs(asym_sg, ASG2) == f.vectors
-            @test eigvals(f) === f.values
-            @test eigvecs(f) === f.vectors
-            @test_throws ErrorException f.Z
-
-            d,v = eigen(asym_sg, ASG2)
-            @test d == f.values
-            @test v == f.vectors
-
-            # solver for in-place U' \ A / U (#14896)
-            if !(eltya <: Integer)
-                for atyp in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
-                    for utyp in (UpperTriangular, Diagonal), uplo in (:L, :U)
-                        A = atyp(asym_sg, uplo)
-                        U = utyp(ASG2)
-                        @test UtiAUi!(copy(A), U) ≈ U' \ A / U
-                    end
-                end
-            end
-
-            # matrices of different types (#14896)
-            D = Diagonal(ASG2)
-            for uplo in (:L, :U)
-                if eltya <: Real
-                    fs = eigen(Symmetric(asym_sg, uplo), ASG2)
-                    @test fs.values ≈ f.values
-                    @test abs.(fs.vectors) ≈ abs.(f.vectors)  # may change sign
-                    gs = eigen(Symmetric(asym_sg, uplo), D)
-                    @test Symmetric(asym_sg, uplo)*gs.vectors ≈ (D*gs.vectors) * Diagonal(gs.values)
-                end
-                fh = eigen(Hermitian(asym_sg, uplo), ASG2)
-                @test fh.values ≈ f.values
-                @test abs.(fh.vectors) ≈ abs.(f.vectors)  # may change sign
-                gh = eigen(Hermitian(asym_sg, uplo), D)
-                @test Hermitian(asym_sg, uplo)*gh.vectors ≈ (D*gh.vectors) * Diagonal(gh.values)
-                gd = eigen(Matrix(Hermitian(ASG2, uplo)), D)
-                @test Hermitian(ASG2, uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-                gd = eigen(Hermitian(Tridiagonal(ASG2), uplo), D)
-                @test Hermitian(Tridiagonal(ASG2), uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            end
-            gd = eigen(D, D)
-            @test all(≈(1), gd.values)
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(Matrix(D), D)
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(D, Matrix(D))
-            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-            gd = eigen(Tridiagonal(ASG2), Matrix(D))
-            @test Tridiagonal(ASG2) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
-        end
-        @testset "Non-symmetric generalized eigenproblem" begin
-            if isa(a, Array)
-                a1_nsg = a[1:n1, 1:n1]
-                a2_nsg = a[n1+1:n2, n1+1:n2]
-            else
-                a1_nsg = view(a, 1:n1, 1:n1)
-                a2_nsg = view(a, n1+1:n2, n1+1:n2)
-            end
-            sortfunc = x -> real(x) + imag(x)
-            f = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test a1_nsg*f.vectors ≈ (a2_nsg*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test prod(f.values) ≈ prod(eigvals(a1_nsg/a2_nsg, sortby = sortfunc)) atol=50000ε
-            @test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
-            @test_throws ErrorException f.Z
-
-            g = eigen(a1_nsg, Diagonal(1:n1))
-            @test a1_nsg*g.vectors ≈ (Diagonal(1:n1)*g.vectors) * Diagonal(g.values)
-
-            d,v = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
-            @test d == f.values
-            @test v == f.vectors
-        end
-    end
-end
-
-@testset "eigenvalue computations with NaNs" begin
-    for eltya in (NaN16, NaN32, NaN)
-        @test_throws(ArgumentError, eigen(fill(eltya, 1, 1)))
-        @test_throws(ArgumentError, eigen(fill(eltya, 2, 2)))
-        test_matrix = rand(typeof(eltya),3,3)
-        test_matrix[1,3] = eltya
-        @test_throws(ArgumentError, eigen(test_matrix))
-        @test_throws(ArgumentError, eigvals(test_matrix))
-        @test_throws(ArgumentError, eigvecs(test_matrix))
-        @test_throws(ArgumentError, eigen(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigvals(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigvecs(Symmetric(test_matrix)))
-        @test_throws(ArgumentError, eigen(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigvals(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigvecs(Hermitian(test_matrix)))
-        @test_throws(ArgumentError, eigen(Hermitian(complex.(test_matrix))))
-        @test_throws(ArgumentError, eigvals(Hermitian(complex.(test_matrix))))
-        @test_throws(ArgumentError, eigvecs(Hermitian(complex.(test_matrix))))
-        @test eigen(Symmetric(test_matrix, :L)) isa Eigen
-        @test eigen(Hermitian(test_matrix, :L)) isa Eigen
-    end
-end
-
-# test a matrix larger than 140-by-140 for #14174
-let aa = rand(200, 200)
-    for a in (aa, view(aa, 1:n, 1:n))
-        f = eigen(a)
-        @test a ≈ f.vectors * Diagonal(f.values) / f.vectors
-    end
-end
-
-@testset "rational promotion: issue #24935" begin
-    A = [1//2 0//1; 0//1 2//3]
-    for λ in (eigvals(A), @inferred(eigvals(Symmetric(A))))
-        @test λ isa Vector{Float64}
-        @test λ ≈ [0.5, 2/3]
-    end
-end
-
-@testset "text/plain (REPL) printing of Eigen and GeneralizedEigen" begin
-    A, B = randn(5,5), randn(5,5)
-    e    = eigen(A)
-    ge   = eigen(A, B)
-    valsstring = sprint((t, s) -> show(t, "text/plain", s), e.values)
-    vecsstring = sprint((t, s) -> show(t, "text/plain", s), e.vectors)
-    factstring = sprint((t, s) -> show(t, "text/plain", s), e)
-    @test factstring == "$(summary(e))\nvalues:\n$valsstring\nvectors:\n$vecsstring"
-end
-
-@testset "eigen of an Adjoint" begin
-    Random.seed!(4)
-    A = randn(3,3)
-    @test eigvals(A') == eigvals(copy(A'))
-    @test eigen(A')   == eigen(copy(A'))
-    @test eigmin(A') == eigmin(copy(A'))
-    @test eigmax(A') == eigmax(copy(A'))
-end
-
-@testset "equality of eigen factorizations" begin
-    A = randn(3, 3)
-    @test eigen(A) == eigen(A)
-    @test hash(eigen(A)) == hash(eigen(A))
-    @test isequal(eigen(A), eigen(A))
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = eigen(A)
-    B32 = eigen(Float32.(A))
-    C = Float16[3 -2; 4 -1]
-    D = eigen(C)
-    D32 = eigen(Float32.(C))
-    F = eigen(complex(C))
-    F32 = eigen(complex(Float32.(C)))
-    @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
-    @test B.values isa Vector{Float16}
-    @test B.vectors isa Matrix{Float16}
-    @test B.values ≈ B32.values
-    @test B.vectors ≈ B32.vectors
-    @test D isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
-    @test D.values isa Vector{ComplexF16}
-    @test D.vectors isa Matrix{ComplexF16}
-    @test D.values ≈ D32.values
-    @test D.vectors ≈ D32.vectors
-    @test F isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
-    @test F.values isa Vector{ComplexF16}
-    @test F.vectors isa Matrix{ComplexF16}
-    @test F.values ≈ F32.values
-    @test F.vectors ≈ F32.vectors
-end
-
-end # module TestEigen
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
deleted file mode 100644
index 72233293ff515..0000000000000
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ /dev/null
@@ -1,94 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestFactorization
-using Test, LinearAlgebra
-
-@testset "equality for factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    eigen,
-    hessenberg,
-    lq,
-    lu,
-    qr,
-    x -> qr(x, ColumnNorm()),
-    svd,
-    schur,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F, G = f(A), f(A)
-
-    @test F == G
-    @test isequal(F, G)
-    @test hash(F) == hash(G)
-
-    f === hessenberg && continue
-
-    # change all arrays in F to have eltype Float32
-    F = typeof(F).name.wrapper(Base.mapany(1:nfields(F)) do i
-        x = getfield(F, i)
-        return x isa AbstractArray{Float64} ? Float32.(x) : x
-    end...)
-    # round all arrays in G to the nearest Float64 representable as Float32
-    G = typeof(G).name.wrapper(Base.mapany(1:nfields(G)) do i
-        x = getfield(G, i)
-        return x isa AbstractArray{Float64} ? Float64.(Float32.(x)) : x
-    end...)
-
-    @test F == G broken=!(f === eigen || f === qr)
-    @test isequal(F, G) broken=!(f === eigen || f === qr)
-    @test hash(F) == hash(G)
-end
-
-@testset "size for factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    hessenberg,
-    lq,
-    lu,
-    qr,
-    x -> qr(x, ColumnNorm()),
-    svd,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F = f(A)
-    @test size(F) == size(A)
-    @test size(F') == size(A')
-end
-
-@testset "size for transpose factorizations - $f" for f in Any[
-    bunchkaufman,
-    cholesky,
-    x -> cholesky(x, RowMaximum()),
-    hessenberg,
-    lq,
-    lu,
-    svd,
-]
-    A = randn(3, 3)
-    A = A * A' # ensure A is pos. def. and symmetric
-    F = f(A)
-    @test size(F) == size(A)
-    @test size(transpose(F)) == size(transpose(A))
-end
-
-@testset "equality of QRCompactWY" begin
-    A = rand(100, 100)
-    F, G = qr(A), qr(A)
-
-    @test F == G
-    @test isequal(F, G)
-    @test hash(F) == hash(G)
-
-    G.T[28, 100] = 42
-
-    @test F != G
-    @test !isequal(F, G)
-    @test hash(F) != hash(G)
-end
-
-end
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
deleted file mode 100644
index 33eb50d58836a..0000000000000
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ /dev/null
@@ -1,628 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestGeneric
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-isdefined(Main, :DualNumbers) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "DualNumbers.jl"))
-using .Main.DualNumbers
-
-Random.seed!(123)
-
-n = 5 # should be odd
-
-@testset for elty in (Int, Rational{BigInt}, Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-    # In the long run, these tests should step through Strang's
-    #  axiomatic definition of determinants.
-    # If all axioms are satisfied and all the composition rules work,
-    #  all determinants will be correct except for floating point errors.
-    if elty != Rational{BigInt}
-        @testset "det(A::Matrix)" begin
-            # The determinant of the identity matrix should always be 1.
-            for i = 1:10
-                A = Matrix{elty}(I, i, i)
-                @test det(A) ≈ one(elty)
-            end
-
-            # The determinant of a Householder reflection matrix should always be -1.
-            for i = 1:10
-                A = Matrix{elty}(I, 10, 10)
-                A[i, i] = -one(elty)
-                @test det(A) ≈ -one(elty)
-            end
-
-            # The determinant of a rotation matrix should always be 1.
-            if elty != Int
-                for theta = convert(Vector{elty}, pi ./ [1:4;])
-                    R = [cos(theta) -sin(theta);
-                         sin(theta) cos(theta)]
-                    @test convert(elty, det(R)) ≈ one(elty)
-                end
-            end
-        end
-    end
-    if elty <: Int
-        A = rand(-n:n, n, n) + 10I
-    elseif elty <: Rational
-        A = Rational{BigInt}[rand(-n:n)/rand(1:n) for i = 1:n, j = 1:n] + 10I
-    elseif elty <: Real
-        A = convert(Matrix{elty}, randn(n,n)) + 10I
-    else
-        A = convert(Matrix{elty}, complex.(randn(n,n), randn(n,n)))
-    end
-
-    @testset "logdet and logabsdet" begin
-        @test logdet(A[1,1]) == log(det(A[1,1]))
-        @test logdet(A) ≈ log(det(A))
-        @test logabsdet(A)[1] ≈ log(abs(det(A)))
-        @test logabsdet(Matrix{elty}(-I, n, n))[2] == -1
-        infinity = convert(float(elty), Inf)
-        @test logabsdet(zeros(elty, n, n)) == (-infinity, zero(elty))
-        if elty <: Real
-            @test logabsdet(A)[2] == sign(det(A))
-            @test_throws DomainError logdet(Matrix{elty}(-I, n, n))
-        else
-            @test logabsdet(A)[2] ≈ sign(det(A))
-        end
-        # logabsdet for Number"
-        x = A[1, 1] # getting a number of type elty
-        X = fill(x, 1, 1)
-        @test logabsdet(x)[1] ≈ logabsdet(X)[1]
-        @test logabsdet(x)[2] ≈ logabsdet(X)[2]
-    end
-
-    @testset "det with nonstandard Number type" begin
-        elty <: Real && @test det(Dual.(triu(A), zero(A))) isa Dual
-    end
-end
-
-@testset "diag" begin
-    A = Matrix(1.0I, 4, 4)
-    @test diag(A) == fill(1, 4)
-    @test diag(view(A, 1:3, 1:3)) == fill(1, 3)
-    @test diag(view(A, 1:2, 1:2)) == fill(1, 2)
-    @test_throws ArgumentError diag(rand(10))
-end
-
-@testset "generic axpy" begin
-    x = ['a','b','c','d','e']
-    y = ['a','b','c','d','e']
-    α, β = 'f', 'g'
-    @test_throws DimensionMismatch axpy!(α, x, ['g'])
-    @test_throws DimensionMismatch axpby!(α, x, β, ['g'])
-    @test_throws BoundsError axpy!(α, x, Vector(-1:5), y, Vector(1:7))
-    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(-1:5))
-    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(1:7))
-    @test_throws DimensionMismatch axpy!(α, x, Vector(1:3), y, Vector(1:5))
-end
-
-@test !issymmetric(fill(1,5,3))
-@test !ishermitian(fill(1,5,3))
-@test (x = fill(1,3); cross(x,x) == zeros(3))
-@test_throws DimensionMismatch cross(fill(1,3), fill(1,4))
-@test_throws DimensionMismatch cross(fill(1,2), fill(1,3))
-
-@test tr(Bidiagonal(fill(1,5),fill(0,4),:U)) == 5
-
-
-@testset "array and subarray" begin
-    aa = reshape([1.:6;], (2,3))
-    for a in (aa, view(aa, 1:2, 1:2))
-        am, an = size(a)
-        @testset "Scaling with rmul! and lmul" begin
-            @test rmul!(copy(a), 5.) == a*5
-            @test lmul!(5., copy(a)) == a*5
-            b = randn(2048)
-            subB = view(b, :, :)
-            @test rmul!(copy(b), 5.) == b*5
-            @test rmul!(copy(subB), 5.) == subB*5
-            @test lmul!(Diagonal([1.; 2.]), copy(a)) == a.*[1; 2]
-            @test lmul!(Diagonal([1; 2]), copy(a)) == a.*[1; 2]
-            @test rmul!(copy(a), Diagonal(1.:an)) == a.*Vector(1:an)'
-            @test rmul!(copy(a), Diagonal(1:an)) == a.*Vector(1:an)'
-            @test_throws DimensionMismatch lmul!(Diagonal(Vector{Float64}(undef,am+1)), a)
-            @test_throws DimensionMismatch rmul!(a, Diagonal(Vector{Float64}(undef,an+1)))
-        end
-
-        @testset "Scaling with rdiv! and ldiv!" begin
-            @test rdiv!(copy(a), 5.) == a/5
-            @test ldiv!(5., copy(a)) == a/5
-            @test ldiv!(zero(a), 5., copy(a)) == a/5
-        end
-
-        @testset "Scaling with 3-argument mul!" begin
-            @test mul!(similar(a), 5., a) == a*5
-            @test mul!(similar(a), a, 5.) == a*5
-            @test mul!(similar(a), Diagonal([1.; 2.]), a) == a.*[1; 2]
-            @test mul!(similar(a), Diagonal([1; 2]), a)   == a.*[1; 2]
-            @test_throws DimensionMismatch mul!(similar(a), Diagonal(Vector{Float64}(undef, am+1)), a)
-            @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 2), a, Diagonal(Vector{Float64}(undef, an+1)))
-            @test_throws DimensionMismatch mul!(similar(a), a, Diagonal(Vector{Float64}(undef, an+1)))
-            @test mul!(similar(a), a, Diagonal(1.:an)) == a.*Vector(1:an)'
-            @test mul!(similar(a), a, Diagonal(1:an))  == a.*Vector(1:an)'
-        end
-
-        @testset "Scaling with 5-argument mul!" begin
-            @test mul!(copy(a), 5., a, 10, 100) == a*150
-            @test mul!(copy(a), a, 5., 10, 100) == a*150
-            @test mul!(vec(copy(a)), 5., a, 10, 100) == vec(a*150)
-            @test mul!(vec(copy(a)), a, 5., 10, 100) == vec(a*150)
-            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], 5., a, 10, 100)
-            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], a, 5., 10, 100)
-            @test mul!(copy(a), Diagonal([1.; 2.]), a, 10, 100) == 10a.*[1; 2] .+ 100a
-            @test mul!(copy(a), Diagonal([1; 2]), a, 10, 100)   == 10a.*[1; 2] .+ 100a
-            @test mul!(copy(a), a, Diagonal(1.:an), 10, 100) == 10a.*Vector(1:an)' .+ 100a
-            @test mul!(copy(a), a, Diagonal(1:an), 10, 100)  == 10a.*Vector(1:an)' .+ 100a
-        end
-    end
-end
-
-@testset "scale real matrix by complex type" begin
-    @test_throws InexactError rmul!([1.0], 2.0im)
-    @test isequal([1.0] * 2.0im,             ComplexF64[2.0im])
-    @test isequal(2.0im * [1.0],             ComplexF64[2.0im])
-    @test isequal(Float32[1.0] * 2.0f0im,    ComplexF32[2.0im])
-    @test isequal(Float32[1.0] * 2.0im,      ComplexF64[2.0im])
-    @test isequal(Float64[1.0] * 2.0f0im,    ComplexF64[2.0im])
-    @test isequal(Float32[1.0] * big(2.0)im, Complex{BigFloat}[2.0im])
-    @test isequal(Float64[1.0] * big(2.0)im, Complex{BigFloat}[2.0im])
-    @test isequal(BigFloat[1.0] * 2.0im,     Complex{BigFloat}[2.0im])
-    @test isequal(BigFloat[1.0] * 2.0f0im,   Complex{BigFloat}[2.0im])
-end
-@testset "* and mul! for non-commutative scaling" begin
-    q = Quaternion(0.44567, 0.755871, 0.882548, 0.423612)
-    qmat = [Quaternion(0.015007, 0.355067, 0.418645, 0.318373)]
-    @test lmul!(q, copy(qmat)) != rmul!(copy(qmat), q)
-    @test q*qmat ≉ qmat*q
-    @test conj(q*qmat) ≈ conj(qmat)*conj(q)
-    @test q * (q \ qmat) ≈ qmat ≈ (qmat / q) * q
-    @test q\qmat ≉ qmat/q
-    alpha = Quaternion(rand(4)...)
-    beta = Quaternion(0, 0, 0, 0)
-    @test mul!(copy(qmat), qmat, q, alpha, beta) ≈ qmat * q * alpha
-    @test mul!(copy(qmat), q, qmat, alpha, beta) ≈ q * qmat * alpha
-end
-@testset "ops on Numbers" begin
-    @testset for elty in [Float32,Float64,ComplexF32,ComplexF64]
-        a = rand(elty)
-        @test tr(a)            == a
-        @test rank(zero(elty)) == 0
-        @test rank(one(elty))  == 1
-        @test !isfinite(cond(zero(elty)))
-        @test cond(a)          == one(elty)
-        @test cond(a,1)        == one(elty)
-        @test issymmetric(a)
-        @test ishermitian(one(elty))
-        @test det(a) == a
-        @test norm(a) == abs(a)
-        @test norm(a, 0) == 1
-        @test norm(0, 0) == 0
-    end
-
-    @test !issymmetric(NaN16)
-    @test !issymmetric(NaN32)
-    @test !issymmetric(NaN)
-    @test norm(NaN)    === NaN
-    @test norm(NaN, 0) === NaN
-end
-
-@test rank(zeros(4)) == 0
-@test rank(1:10) == 1
-@test rank(fill(0, 0, 0)) == 0
-@test rank([1.0 0.0; 0.0 0.9],0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],rtol=0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],atol=0.95) == 1
-@test rank([1.0 0.0; 0.0 0.9],atol=0.95,rtol=0.95)==1
-@test qr(big.([0 1; 0 0])).R == [0 1; 0 0]
-
-@test norm([2.4e-322, 4.4e-323]) ≈ 2.47e-322
-@test norm([2.4e-322, 4.4e-323], 3) ≈ 2.4e-322
-@test_throws ArgumentError opnorm(Matrix{Float64}(undef,5,5),5)
-
-# operator norm for zero-dimensional domain is zero (see #40370)
-@testset "opnorm" begin
-    for m in (0, 1, 2)
-        @test @inferred(opnorm(fill(1,0,m))) == 0.0
-        @test @inferred(opnorm(fill(1,m,0))) == 0.0
-    end
-    for m in (1, 2)
-        @test @inferred(opnorm(fill(1im,1,m))) ≈ sqrt(m)
-        @test @inferred(opnorm(fill(1im,m,1))) ≈ sqrt(m)
-    end
-    @test @inferred(opnorm(fill(1,2,2))) ≈ 2
-end
-
-@testset "generic norm for arrays of arrays" begin
-    x = Vector{Int}[[1,2], [3,4]]
-    @test @inferred(norm(x)) ≈ sqrt(30)
-    @test norm(x, 0) == length(x)
-    @test norm(x, 1) ≈ 5+sqrt(5)
-    @test norm(x, 3) ≈ cbrt(5^3  +sqrt(5)^3)
-end
-
-@testset "norm of transpose/adjoint equals norm of parent #32739" begin
-    for t in (transpose, adjoint), elt in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-        # Vector/matrix of scalars
-        for sz in ((2,), (2, 3))
-            A = rand(elt, sz...)
-            Aᵀ = t(A)
-            @test norm(Aᵀ) ≈ norm(Matrix(Aᵀ))
-        end
-
-        # Vector/matrix of vectors/matrices
-        for sz_outer in ((2,), (2, 3)), sz_inner in ((3,), (1, 2))
-            A = [rand(elt, sz_inner...) for _ in CartesianIndices(sz_outer)]
-            Aᵀ = t(A)
-            @test norm(Aᵀ) ≈ norm(Matrix(Matrix.(Aᵀ)))
-        end
-    end
-end
-
-@testset "rotate! and reflect!" begin
-    x = rand(ComplexF64, 10)
-    y = rand(ComplexF64, 10)
-    c = rand(Float64)
-    s = rand(ComplexF64)
-
-    x2 = copy(x)
-    y2 = copy(y)
-    rotate!(x, y, c, s)
-    @test x ≈ c*x2 + s*y2
-    @test y ≈ -conj(s)*x2 + c*y2
-    @test_throws DimensionMismatch rotate!([x; x], y, c, s)
-
-    x3 = copy(x)
-    y3 = copy(y)
-    reflect!(x, y, c, s)
-    @test x ≈ c*x3 + s*y3
-    @test y ≈ conj(s)*x3 - c*y3
-    @test_throws DimensionMismatch reflect!([x; x], y, c, s)
-end
-
-@testset "LinearAlgebra.reflectorApply!" begin
-    for T in (Float64, ComplexF64)
-        x = rand(T, 6)
-        τ = rand(T)
-        A = rand(T, 6)
-        B = LinearAlgebra.reflectorApply!(x, τ, copy(A))
-        C = LinearAlgebra.reflectorApply!(x, τ, reshape(copy(A), (length(A), 1)))
-        @test B[1] ≈ C[1] ≈ A[1] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))
-        @test B[2:end] ≈ C[2:end] ≈ A[2:end] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))*x[2:end]
-    end
-end
-
-@testset "axp(b)y! for element type without commutative multiplication" begin
-    α = [1 2; 3 4]
-    β = [5 6; 7 8]
-    x = fill([ 9 10; 11 12], 3)
-    y = fill([13 14; 15 16], 3)
-    axpy = axpy!(α, x, deepcopy(y))
-    axpby = axpby!(α, x, β, deepcopy(y))
-    @test axpy == x .* [α] .+ y
-    @test axpy != [α] .* x .+ y
-    @test axpby == x .* [α] .+ y .* [β]
-    @test axpby != [α] .* x .+ [β] .* y
-    axpy = axpy!(zero(α), x, deepcopy(y))
-    axpby = axpby!(zero(α), x, one(β), deepcopy(y))
-    @test axpy == y
-    @test axpy == y
-    @test axpby == y
-    @test axpby == y
-end
-
-@testset "axpy! for x and y of different dimensions" begin
-    α = 5
-    x = 2:5
-    y = fill(1, 2, 4)
-    rx = [1 4]
-    ry = [2 8]
-    @test axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
-end
-
-@testset "axp(b)y! for non strides input" begin
-    a = rand(5, 5)
-    @test axpby!(1, Hermitian(a), 1, zeros(size(a))) == Hermitian(a)
-    @test axpby!(1, 1.:5, 1, zeros(5)) == 1.:5
-    @test axpy!(1, Hermitian(a), zeros(size(a))) == Hermitian(a)
-    @test axpy!(1, 1.:5, zeros(5)) == 1.:5
-end
-
-@testset "LinearAlgebra.axp(b)y! for stride-vector like input" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = rand(T, 5, 5)
-        @test axpby!(1, view(a, :, 1:5), 1, zeros(T, size(a))) == a
-        @test axpy!(1, view(a, :, 1:5), zeros(T, size(a))) == a
-        b = view(a, 25:-2:1)
-        @test axpby!(1, b, 1, zeros(T, size(b))) == b
-        @test axpy!(1, b, zeros(T, size(b))) == b
-    end
-end
-
-@testset "norm and normalize!" begin
-    vr = [3.0, 4.0]
-    for Tr in (Float32, Float64)
-        for T in (Tr, Complex{Tr})
-            v = convert(Vector{T}, vr)
-            @test norm(v) == 5.0
-            w = normalize(v)
-            @test norm(w - [0.6, 0.8], Inf) < eps(Tr)
-            @test norm(w) == 1.0
-            @test norm(normalize!(copy(v)) - w, Inf) < eps(Tr)
-            @test isempty(normalize!(T[]))
-        end
-    end
-end
-
-@testset "normalize for multidimensional arrays" begin
-
-    for arr in (
-        fill(10.0, ()),  # 0 dim
-        [1.0],           # 1 dim
-        [1.0 2.0 3.0; 4.0 5.0 6.0], # 2-dim
-        rand(1,2,3),                # higher dims
-        rand(1,2,3,4),
-        Dual.(randn(2,3), randn(2,3)),
-        OffsetArray([-1,0], (-2,))  # no index 1
-    )
-        @test normalize(arr) == normalize!(copy(arr))
-        @test size(normalize(arr)) == size(arr)
-        @test axes(normalize(arr)) == axes(arr)
-        @test vec(normalize(arr)) == normalize(vec(arr))
-    end
-
-    @test typeof(normalize([1 2 3; 4 5 6])) == Array{Float64,2}
-end
-
-@testset "normalize for scalars" begin
-    @test normalize(8.0) == 1.0
-    @test normalize(-3.0) == -1.0
-    @test normalize(-3.0, 1) == -1.0
-    @test isnan(normalize(0.0))
-end
-
-@testset "Issue #30466" begin
-    @test norm([typemin(Int), typemin(Int)], Inf) == -float(typemin(Int))
-    @test norm([typemin(Int), typemin(Int)], 1) == -2float(typemin(Int))
-end
-
-@testset "potential overflow in normalize!" begin
-    δ = inv(prevfloat(typemax(Float64)))
-    v = [δ, -δ]
-
-    @test norm(v) === 7.866824069956793e-309
-    w = normalize(v)
-    @test w ≈ [1/√2, -1/√2]
-    @test norm(w) === 1.0
-    @test norm(normalize!(v) - w, Inf) < eps()
-end
-
-@testset "normalize with Infs. Issue 29681." begin
-    @test all(isequal.(normalize([1, -1, Inf]),
-                       [0.0, -0.0, NaN]))
-    @test all(isequal.(normalize([complex(1), complex(0, -1), complex(Inf, -Inf)]),
-                       [0.0 + 0.0im, 0.0 - 0.0im, NaN + NaN*im]))
-end
-
-@testset "Issue 14657" begin
-    @test det([true false; false true]) == det(Matrix(1I, 2, 2))
-end
-
-@test_throws ArgumentError LinearAlgebra.char_uplo(:Z)
-
-@testset "Issue 17650" begin
-    @test [0.01311489462160816, Inf] ≈ [0.013114894621608135, Inf]
-end
-
-@testset "Issue 19035" begin
-    @test LinearAlgebra.promote_leaf_eltypes([1, 2, [3.0, 4.0]]) == Float64
-    @test LinearAlgebra.promote_leaf_eltypes([[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]) == ComplexF64
-    @test [1, 2, 3] ≈ [1, 2, 3]
-    @test [[1, 2], [3, 4]] ≈ [[1, 2], [3, 4]]
-    @test [[1, 2], [3, 4]] ≈ [[1.0-eps(), 2.0+eps()], [3.0+2eps(), 4.0-1e8eps()]]
-    @test [[1, 2], [3, 4]] ≉ [[1.0-eps(), 2.0+eps()], [3.0+2eps(), 4.0-1e9eps()]]
-    @test [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]] ≈ [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
-end
-
-@testset "Issue 40128" begin
-    @test det(BigInt[9 1 8 0; 0 0 8 7; 7 6 8 3; 2 9 7 7])::BigInt == -1
-    @test det(BigInt[1 big(2)^65+1; 3 4])::BigInt == (4 - 3*(big(2)^65+1))
-end
-
-# Minimal modulo number type - but not subtyping Number
-struct ModInt{n}
-    k
-    ModInt{n}(k) where {n} = new(mod(k,n))
-    ModInt{n}(k::ModInt{n}) where {n} = k
-end
-Base.:+(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k + b.k)
-Base.:-(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k - b.k)
-Base.:*(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k * b.k)
-Base.:-(a::ModInt{n}) where {n} = ModInt{n}(-a.k)
-Base.inv(a::ModInt{n}) where {n} = ModInt{n}(invmod(a.k, n))
-Base.:/(a::ModInt{n}, b::ModInt{n}) where {n} = a*inv(b)
-
-Base.isfinite(a::ModInt{n}) where {n} = isfinite(a.k)
-Base.zero(::Type{ModInt{n}}) where {n} = ModInt{n}(0)
-Base.zero(::ModInt{n}) where {n} = ModInt{n}(0)
-Base.one(::Type{ModInt{n}}) where {n} = ModInt{n}(1)
-Base.one(::ModInt{n}) where {n} = ModInt{n}(1)
-Base.conj(a::ModInt{n}) where {n} = a
-LinearAlgebra.lupivottype(::Type{ModInt{n}}) where {n} = RowNonZero()
-Base.adjoint(a::ModInt{n}) where {n} = ModInt{n}(conj(a))
-Base.transpose(a::ModInt{n}) where {n} = a  # see Issue 20978
-LinearAlgebra.Adjoint(a::ModInt{n}) where {n} = adjoint(a)
-LinearAlgebra.Transpose(a::ModInt{n}) where {n} = transpose(a)
-
-@testset "Issue 22042" begin
-    A = [ModInt{2}(1) ModInt{2}(0); ModInt{2}(1) ModInt{2}(1)]
-    b = [ModInt{2}(1), ModInt{2}(0)]
-
-    @test A*(A\b) == b
-    @test A*(lu(A)\b) == b
-    @test A*(lu(A, NoPivot())\b) == b
-    @test A*(lu(A, RowNonZero())\b) == b
-    @test_throws MethodError lu(A, RowMaximum())
-
-    # Needed for pivoting:
-    Base.abs(a::ModInt{n}) where {n} = a
-    Base.:<(a::ModInt{n}, b::ModInt{n}) where {n} = a.k < b.k
-    @test A*(lu(A, RowMaximum())\b) == b
-
-    A = [ModInt{2}(0) ModInt{2}(1); ModInt{2}(1) ModInt{2}(1)]
-    @test A*(A\b) == b
-    @test A*(lu(A)\b) == b
-    @test A*(lu(A, RowMaximum())\b) == b
-    @test A*(lu(A, RowNonZero())\b) == b
-end
-
-@testset "Issue 18742" begin
-    @test_throws DimensionMismatch ones(4,5)/zeros(3,6)
-    @test_throws DimensionMismatch ones(4,5)\zeros(3,6)
-end
-@testset "fallback throws properly for AbstractArrays with dimension > 2" begin
-    @test_throws ErrorException adjoint(rand(2,2,2,2))
-    @test_throws ErrorException transpose(rand(2,2,2,2))
-end
-
-@testset "generic functions for checking whether matrices have banded structure" begin
-    using LinearAlgebra: isbanded
-    pentadiag = [1 2 3; 4 5 6; 7 8 9]
-    tridiag   = [1 2 0; 4 5 6; 0 8 9]
-    ubidiag   = [1 2 0; 0 5 6; 0 0 9]
-    lbidiag   = [1 0 0; 4 5 0; 0 8 9]
-    adiag     = [1 0 0; 0 5 0; 0 0 9]
-    @testset "istriu" begin
-        @test !istriu(pentadiag)
-        @test istriu(pentadiag, -2)
-        @test !istriu(tridiag)
-        @test istriu(tridiag, -1)
-        @test istriu(ubidiag)
-        @test !istriu(ubidiag, 1)
-        @test !istriu(lbidiag)
-        @test istriu(lbidiag, -1)
-        @test istriu(adiag)
-    end
-    @testset "istril" begin
-        @test !istril(pentadiag)
-        @test istril(pentadiag, 2)
-        @test !istril(tridiag)
-        @test istril(tridiag, 1)
-        @test !istril(ubidiag)
-        @test istril(ubidiag, 1)
-        @test istril(lbidiag)
-        @test !istril(lbidiag, -1)
-        @test istril(adiag)
-    end
-    @testset "isbanded" begin
-        @test isbanded(pentadiag, -2, 2)
-        @test !isbanded(pentadiag, -1, 2)
-        @test !isbanded(pentadiag, -2, 1)
-        @test isbanded(tridiag, -1, 1)
-        @test !isbanded(tridiag, 0, 1)
-        @test !isbanded(tridiag, -1, 0)
-        @test isbanded(ubidiag, 0, 1)
-        @test !isbanded(ubidiag, 1, 1)
-        @test !isbanded(ubidiag, 0, 0)
-        @test isbanded(lbidiag, -1, 0)
-        @test !isbanded(lbidiag, 0, 0)
-        @test !isbanded(lbidiag, -1, -1)
-        @test isbanded(adiag, 0, 0)
-        @test !isbanded(adiag, -1, -1)
-        @test !isbanded(adiag, 1, 1)
-    end
-    @testset "isdiag" begin
-        @test !isdiag(tridiag)
-        @test !isdiag(ubidiag)
-        @test !isdiag(lbidiag)
-        @test isdiag(adiag)
-    end
-end
-
-@testset "missing values" begin
-    @test ismissing(norm(missing))
-    x = [5, 6, missing]
-    y = [missing, 5, 6]
-    for p in (-Inf, -1, 1, 2, 3, Inf)
-        @test ismissing(norm(x, p))
-        @test ismissing(norm(y, p))
-    end
-    @test_broken ismissing(norm(x, 0))
-end
-
-@testset "peakflops" begin
-    @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0
-end
-
-@testset "NaN handling: Issue 28972" begin
-    @test all(isnan, rmul!([NaN], 0.0))
-    @test all(isnan, rmul!(Any[NaN], 0.0))
-    @test all(isnan, lmul!(0.0, [NaN]))
-    @test all(isnan, lmul!(0.0, Any[NaN]))
-
-    @test all(!isnan, rmul!([NaN], false))
-    @test all(!isnan, rmul!(Any[NaN], false))
-    @test all(!isnan, lmul!(false, [NaN]))
-    @test all(!isnan, lmul!(false, Any[NaN]))
-end
-
-@testset "adjtrans dot" begin
-    for t in (transpose, adjoint), T in (ComplexF64, Quaternion{Float64})
-        x, y = t(rand(T, 10)), t(rand(T, 10))
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t(rand(T, 10, 5)), t(rand(T, 10, 5))
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
-        y = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
-        X, Y = copy(x), copy(y)
-        @test dot(x, y) ≈ dot(X, Y)
-        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
-    end
-end
-
-@testset "generalized dot #32739" begin
-    for elty in (Int, Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-        n = 10
-        if elty <: Int
-            A = rand(-n:n, n, n)
-            x = rand(-n:n, n)
-            y = rand(-n:n, n)
-        elseif elty <: Real
-            A = convert(Matrix{elty}, randn(n,n))
-            x = rand(elty, n)
-            y = rand(elty, n)
-        else
-            A = convert(Matrix{elty}, complex.(randn(n,n), randn(n,n)))
-            x = rand(elty, n)
-            y = rand(elty, n)
-        end
-        @test dot(x, A, y) ≈ dot(A'x, y) ≈ *(x', A, y) ≈ (x'A)*y
-        @test dot(x, A', y) ≈ dot(A*x, y) ≈ *(x', A', y) ≈ (x'A')*y
-        elty <: Real && @test dot(x, transpose(A), y) ≈ dot(x, transpose(A)*y) ≈ *(x', transpose(A), y) ≈ (x'*transpose(A))*y
-        B = reshape([A], 1, 1)
-        x = [x]
-        y = [y]
-        @test dot(x, B, y) ≈ dot(B'x, y)
-        @test dot(x, B', y) ≈ dot(B*x, y)
-        elty <: Real && @test dot(x, transpose(B), y) ≈ dot(x, transpose(B)*y)
-    end
-end
-
-@testset "condskeel #34512" begin
-    A = rand(3, 3)
-    @test condskeel(A) ≈ condskeel(A, [8,8,8])
-end
-
-end # module TestGeneric
diff --git a/stdlib/LinearAlgebra/test/givens.jl b/stdlib/LinearAlgebra/test/givens.jl
deleted file mode 100644
index a2556b45d1280..0000000000000
--- a/stdlib/LinearAlgebra/test/givens.jl
+++ /dev/null
@@ -1,115 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestGivens
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: Givens, Rotation
-
-# Test givens rotations
-@testset "Test Givens for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    if elty <: Real
-        raw_A = convert(Matrix{elty}, randn(10,10))
-    else
-        raw_A = convert(Matrix{elty}, complex.(randn(10,10),randn(10,10)))
-    end
-    @testset for A in (raw_A, view(raw_A, 1:10, 1:10))
-        Ac = copy(A)
-        R = Rotation(Givens{elty}[])
-        T = Rotation(Givens{elty}[])
-        for j = 1:8
-            for i = j+2:10
-                G, _ = givens(A, j+1, i, j)
-                lmul!(G, A)
-                rmul!(A, adjoint(G))
-                lmul!(G, R)
-                rmul!(T, G)
-
-                @test lmul!(G, Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
-
-                @testset "transposes" begin
-                    @test (@inferred G'*G)*Matrix(elty(1)I, 10, 10) ≈ Matrix(I, 10, 10)
-                    @test (G*Matrix(elty(1)I, 10, 10))*G' ≈ Matrix(I, 10, 10)
-                    @test (@inferred copy(R'))*(R*Matrix(elty(1)I, 10, 10)) ≈ Matrix(I, 10, 10)
-                    @test_throws ErrorException transpose(G)
-                    @test_throws ErrorException transpose(R)
-                end
-            end
-        end
-        @test (R')' === R
-        # test products of Givens and Rotations
-        for r in (R, T, *(R.rotations...), *(R.rotations[1], *(R.rotations[2:end]...)))
-            @test r * A ≈ (A' * r')' ≈ lmul!(r, copy(A))
-            @test A * r ≈ (r' * A')' ≈ rmul!(copy(A), r)
-            @test r' * A ≈ lmul!(r', copy(A))
-            @test A * r' ≈ rmul!(copy(A), r')
-        end
-        @test_throws ArgumentError givens(A, 3, 3, 2)
-        @test_throws ArgumentError givens(one(elty),zero(elty),2,2)
-        G, _ = givens(one(elty),zero(elty),11,12)
-        @test_throws DimensionMismatch lmul!(G, A)
-        @test_throws DimensionMismatch rmul!(A, adjoint(G))
-        @test abs.(A) ≈ abs.(hessenberg(Ac).H)
-        @test opnorm(R*Matrix{elty}(I, 10, 10)) ≈ one(elty)
-
-        I10 = Matrix{elty}(I, 10, 10)
-        G, _ = givens(one(elty),zero(elty),9,10)
-        @test (G*I10)' * (G*I10) ≈ I10
-        K, _ = givens(zero(elty),one(elty),9,10)
-        @test (K*I10)' * (K*I10) ≈ I10
-    end
-
-    @testset "Givens * vectors" begin
-        for x in (raw_A[:,1], view(raw_A, :, 1))
-            G, r = @inferred  givens(x[2], x[4], 2, 4)
-            @test (G*x)[2] ≈ r
-            @test abs((G*x)[4]) < eps(real(elty))
-
-            G, r = @inferred givens(x, 2, 4)
-            @test (G*x)[2] ≈ r
-            @test abs((G*x)[4]) < eps(real(elty))
-
-            G, r = givens(x, 4, 2)
-            @test (G*x)[4] ≈ r
-            @test abs((G*x)[2]) < eps(real(elty))
-        end
-        d = rand(4)
-        l = d[1]
-        g2, l = givens(l, d[2], 1, 2)
-        g3, l = givens(l, d[3], 1, 3)
-        g4, l = givens(l, d[4], 1, 4)
-        @test g2*(g3*d) ≈ g2*g3*d ≈ (g2*g3)*d
-        @test g2*g3*g4 isa Rotation
-    end
-end
-
-# 36430
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "testing dimensions with Furlongs" begin
-    @test_throws MethodError givens(Furlong(1.0), Furlong(2.0), 1, 2)
-end
-
-const TNumber = Union{Float64,ComplexF64}
-struct MockUnitful{T<:TNumber} <: Number
-    data::T
-    MockUnitful(data::T) where T<:TNumber = new{T}(data)
-end
-import Base: *, /, one, oneunit
-*(a::MockUnitful{T}, b::T) where T<:TNumber = MockUnitful(a.data * b)
-*(a::T, b::MockUnitful{T}) where T<:TNumber = MockUnitful(a * b.data)
-*(a::MockUnitful{T}, b::MockUnitful{T}) where T<:TNumber = MockUnitful(a.data * b.data)
-/(a::MockUnitful{T}, b::MockUnitful{T}) where T<:TNumber = a.data / b.data
-one(::Type{<:MockUnitful{T}}) where T = one(T)
-oneunit(::Type{<:MockUnitful{T}}) where T = MockUnitful(one(T))
-
-@testset "unitful givens rotation unitful $T " for T in (Float64, ComplexF64)
-    g, r = givens(MockUnitful(T(3)), MockUnitful(T(4)), 1, 2)
-    @test g.c ≈ 3/5
-    @test g.s ≈ 4/5
-    @test r.data ≈ 5.0
-end
-
-end # module TestGivens
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
deleted file mode 100644
index 61e498211ca7b..0000000000000
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ /dev/null
@@ -1,241 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestHessenberg
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-# for tuple tests below
-≅(x,y) = all(p -> p[1] ≈ p[2], zip(x,y))
-
-let n = 10
-    Random.seed!(1234321)
-
-    Areal  = randn(n,n)/2
-    Aimg   = randn(n,n)/2
-    b_ = randn(n)
-    B_ = randn(n,3)
-
-    # UpperHessenberg methods not covered by the tests below
-    @testset "UpperHessenberg" begin
-        A = Areal
-        H = UpperHessenberg(A)
-        AH = triu(A,-1)
-        for k in -2:2
-            @test istril(H, k) == istril(AH, k)
-            @test istriu(H, k) == istriu(AH, k)
-            @test (k <= -1 ? istriu(H, k) : !istriu(H, k))
-        end
-        @test UpperHessenberg(H) === H
-        @test parent(H) === A
-        @test Matrix(H) == Array(H) == H == AH
-        @test real(H) == real(AH)
-        @test real(UpperHessenberg{ComplexF64}(A)) == H
-        @test real(UpperHessenberg{ComplexF64}(H)) == H
-        sim = similar(H, ComplexF64)
-        @test sim isa UpperHessenberg{ComplexF64}
-        @test size(sim) == size(H)
-        for x in (2,2+3im)
-            @test x*H == H*x == x*AH
-            for op in (+,-)
-                @test op(H,x*I) == op(AH,x*I) == op(op(x*I,H))
-                @test op(H,x*I)*x == op(AH,x*I)*x == x*op(H,x*I)
-            end
-        end
-        @test [H[i,j] for i=1:size(H,1), j=1:size(H,2)] == triu(A,-1)
-        H1 = LinearAlgebra.fillstored!(copy(H), 1)
-        @test H1 == triu(fill(1, n,n), -1)
-        @test tril(H1.data,-2) == tril(H.data,-2)
-        A2, H2 = copy(A), copy(H)
-        A2[1:4,3]=H2[1:4,3]=1:4
-        H2[5,3]=0
-        @test H2 == triu(A2,-1)
-        @test_throws ArgumentError H[5,3]=1
-        Hc = UpperHessenberg(Areal + im .* Aimg)
-        AHc = triu(Areal + im .* Aimg,-1)
-        @test real(Hc) == real(AHc)
-        @test imag(Hc) == imag(AHc)
-        @test Array(copy(adjoint(Hc))) == adjoint(Array(Hc))
-        @test Array(copy(transpose(Hc))) == transpose(Array(Hc))
-        @test rmul!(copy(Hc), 2.0) == lmul!(2.0, copy(Hc))
-        H = UpperHessenberg(Areal)
-        @test Array(Hc + H) == Array(Hc) + Array(H)
-        @test Array(Hc - H) == Array(Hc) - Array(H)
-        @testset "Preserve UpperHessenberg shape (issue #39388)" begin
-            for H = (UpperHessenberg(Areal), UpperHessenberg(Furlong.(Areal)))
-                if eltype(H) <: Furlong
-                    A = Furlong.(rand(n,n))
-                    d = Furlong.(rand(n))
-                    dl = Furlong.(rand(n-1))
-                    du = Furlong.(rand(n-1))
-                    us = Furlong(1)*I
-                else
-                    A = rand(n,n)
-                    d = rand(n)
-                    dl = rand(n-1)
-                    du = rand(n-1)
-                    us = 1*I
-                end
-                @testset "$op" for op = (+,-)
-                    for x = (us, Diagonal(d), Bidiagonal(d,dl,:U), Bidiagonal(d,dl,:L),
-                             Tridiagonal(dl,d,du), SymTridiagonal(d,dl),
-                             UpperTriangular(A), UnitUpperTriangular(A))
-                        @test op(H,x) == op(Array(H),x)
-                        @test op(x,H) == op(x,Array(H))
-                        @test op(H,x) isa UpperHessenberg
-                        @test op(x,H) isa UpperHessenberg
-                    end
-                end
-            end
-            H = UpperHessenberg(Areal)
-            A = randn(n,n)
-            d = randn(n)
-            dl = randn(n-1)
-            @testset "Multiplication/division" begin
-                for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
-                            UpperTriangular(A), UnitUpperTriangular(A))
-                    @test (H*x)::UpperHessenberg ≈ Array(H)*x
-                    @test (x*H)::UpperHessenberg ≈ x*Array(H)
-                    @test H/x ≈ Array(H)/x# broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test x\H ≈ x\Array(H)# broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test H/x isa UpperHessenberg
-                    @test x\H isa UpperHessenberg
-                end
-                x = Bidiagonal(d, dl, :L)
-                @test H*x == Array(H)*x
-                @test x*H == x*Array(H)
-                @test H/x == Array(H)/x
-                @test x\H == x\Array(H)
-            end
-            H = UpperHessenberg(Furlong.(Areal))
-            for A in (A, Furlong.(A))
-                @testset "Multiplication/division Furlong" begin
-                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
-                                UpperTriangular(A), UnitUpperTriangular(A))
-                        @test map(x -> x.val, (H*x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)*x)
-                        @test map(x -> x.val, (x*H)::UpperHessenberg) ≈ map(x -> x.val, x*Array(H))
-                        @test map(x -> x.val, (H/x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)/x)
-                        @test map(x -> x.val, (x\H)::UpperHessenberg) ≈ map(x -> x.val, x\Array(H))
-                    end
-                    x = Bidiagonal(d, dl, :L)
-                    @test H*x == Array(H)*x
-                    @test x*H == x*Array(H)
-                    @test H/x == Array(H)/x
-                    @test x\H == x\Array(H)
-                end
-            end
-        end
-    end
-
-    @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int), herm in (false, true)
-        A_ = eltya == Int ?
-                rand(1:7, n, n) :
-                convert(Matrix{eltya}, eltya <: Complex ?
-                    complex.(Areal, Aimg) :
-                    Areal)
-        A = herm ? Hermitian(A_ + A_') : A_
-
-        H = hessenberg(A)
-        @test Hessenberg(H) === H
-        eltyh = eltype(H)
-        @test size(H.Q, 1) == size(A, 1)
-        @test size(H.Q, 2) == size(A, 2)
-        @test size(H.Q) == size(A)
-        @test size(H) == size(A)
-        @test_throws ErrorException H.Z
-        @test convert(Array, H) ≈ A
-        @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)'
-        @test (H.Q' * A) * H.Q ≈ H.H
-        #getindex for HessenbergQ
-        @test H.Q[1,1] ≈ Array(H.Q)[1,1]
-        @test det(H.Q) ≈ det(Matrix(H.Q))
-        @test logabsdet(H.Q)[1] ≈ logabsdet(Matrix(H.Q))[1] atol=2n*eps(float(real(eltya)))
-
-        # REPL show
-        hessstring = sprint((t, s) -> show(t, "text/plain", s), H)
-        qstring = sprint((t, s) -> show(t, "text/plain", s), H.Q)
-        hstring = sprint((t, s) -> show(t, "text/plain", s), H.H)
-        @test hessstring == "$(summary(H))\nQ factor: $qstring\nH factor:\n$hstring"
-
-        #iterate
-        q,h = H
-        @test q == H.Q
-        @test h == H.H
-
-        @test convert(Array, 2 * H) ≈ 2 * A ≈ convert(Array, H * 2)
-        @test convert(Array, H + 2I) ≈ A + 2I ≈ convert(Array, 2I + H)
-        @test convert(Array, H + (2+4im)I) ≈ A + (2+4im)I ≈ convert(Array, (2+4im)I + H)
-        @test convert(Array, H - 2I) ≈ A - 2I ≈ -convert(Array, 2I - H)
-        @test convert(Array, -H) == -convert(Array, H)
-        @test convert(Array, 2*(H + (2+4im)I)) ≈ 2A + (4+8im)I
-
-        b = convert(Vector{eltype(H)}, b_)
-        B = convert(Matrix{eltype(H)}, B_)
-        @test H \ b ≈ A \ b ≈ H \ complex(b)
-        @test H \ B ≈ A \ B ≈ H \ complex(B)
-        @test (H - I) \ B ≈ (A - I) \ B
-        @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B
-        @test b' / H ≈ b' / A ≈ complex.(b') / H
-        @test B' / H ≈ B' / A ≈ complex(B') / H
-        @test B' / (H - I) ≈ B' / (A - I)
-        @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I)
-        @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B
-        @test B' / (H - (3+4im)I)' ≈ B' / (A - (3+4im)I)'
-
-        for shift in (0,1,3+4im)
-            @test det(H + shift*I) ≈ det(A + shift*I)
-            @test logabsdet(H + shift*I) ≅ logabsdet(A + shift*I)
-        end
-
-        HM = Matrix(h)
-        @test dot(b, h, b) ≈ dot(h'b, b) ≈ dot(b, HM, b) ≈ dot(HM'b, b)
-        c = b .+ 1
-        @test dot(b, h, c) ≈ dot(h'b, c) ≈ dot(b, HM, c) ≈ dot(HM'b, c)
-    end
-end
-
-@testset "hessenberg(::AbstractMatrix)" begin
-    n = 10
-    A = Tridiagonal(rand(n-1), rand(n), rand(n-1))
-    H = hessenberg(A)
-    @test convert(Array, H) ≈ A
-end
-
-# check logdet on a matrix that has a positive determinant
-let A = [0.5 0.1 0.9 0.4; 0.9 0.7 0.5 0.4; 0.3 0.4 0.9 0.0; 0.4 0.0 0.0 0.5]
-    @test logdet(hessenberg(A)) ≈ logdet(A) ≈ -3.5065578973199822
-end
-
-@testset "Base.propertynames" begin
-    F =  hessenberg([4. 9. 7.; 4. 4. 1.; 4. 3. 2.])
-    @test Base.propertynames(F) == (:Q, :H, :μ)
-    @test Base.propertynames(F, true) == (:Q, :H, :μ, :τ, :factors, :uplo)
-end
-
-@testset "adjoint of Hessenberg" begin
-    Ar = randn(5, 5)
-    Ac = complex.(randn(5, 5), randn(5, 5))
-    b = ones(size(Ar, 1))
-
-    for A in (Ar, Ac)
-        F = hessenberg(A)
-        @test A'\b ≈ F'\b
-    end
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    A = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
-    H = UpperHessenberg(A)
-
-    @test convert(AbstractArray{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
-    @test convert(AbstractMatrix{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
-end
-
-end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
deleted file mode 100644
index 2c5d92541af93..0000000000000
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ /dev/null
@@ -1,732 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLAPACK
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasInt
-
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkuplo('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkside('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chkdiag('Z')
-@test_throws ArgumentError LinearAlgebra.LAPACK.chktrans('Z')
-
-@testset "syevr" begin
-    Random.seed!(123)
-    Ainit = randn(5,5)
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        if elty == ComplexF32 || elty == ComplexF64
-            A = complex.(Ainit, Ainit)
-        else
-            A = Ainit
-        end
-        A = convert(Array{elty, 2}, A)
-        Asym = A'A
-        vals, Z = LAPACK.syevr!('V', copy(Asym))
-        @test Z*(Diagonal(vals)*Z') ≈ Asym
-        @test all(vals .> 0.0)
-        @test LAPACK.syevr!('N', 'V', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[vals .< 1.0]
-        @test LAPACK.syevr!('N', 'I', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[4:5]
-        @test vals ≈ LAPACK.syev!('N', 'U', copy(Asym))
-        @test vals ≈ LAPACK.syevd!('N', 'U', copy(Asym))
-        vals_test, Z_test = LAPACK.syev!('V', 'U', copy(Asym))
-        @test vals_test ≈ vals
-        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
-        vals_test, Z_test = LAPACK.syevd!('V', 'U', copy(Asym))
-        @test vals_test ≈ vals
-        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
-        @test_throws DimensionMismatch LAPACK.sygvd!(1, 'V', 'U', copy(Asym), zeros(elty, 6, 6))
-    end
-end
-
-@testset "gglse" begin
-    let
-        @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-            A = convert(Array{elty, 2}, [1 1 1 1; 1 3 1 1; 1 -1 3 1; 1 1 1 3; 1 1 1 -1])
-            c = convert(Array{elty, 1}, [2, 1, 6, 3, 1])
-            B = convert(Array{elty, 2}, [1 1 1 -1; 1 -1 1 1; 1 1 -1 1])
-            d = convert(Array{elty, 1}, [1, 3, -1])
-            @test LAPACK.gglse!(A, c, B, d)[1] ≈ convert(Array{elty}, [0.5, -0.5, 1.5, 0.5])
-        end
-    end
-end
-
-@testset "gebrd, bdsqr, throw for bdsdc" begin
-    let
-        n = 10
-        @testset for elty in (Float32, Float64)
-            d, e = convert(Vector{elty}, randn(n)), convert(Vector{elty}, randn(n - 1))
-            U, Vt, C = Matrix{elty}(I, n, n), Matrix{elty}(I, n, n), Matrix{elty}(I, n, n)
-            s, _ = LAPACK.bdsqr!('U', copy(d), copy(e), Vt, U, C)
-            @test Array(Bidiagonal(d, e, :U)) ≈ U*Diagonal(s)*Vt
-
-            @test_throws ArgumentError LAPACK.bdsqr!('A', d, e, Vt, U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, [e; 1], Vt, U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt[1:end - 1, :], U, C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt, U[:,1:end - 1], C)
-            @test_throws DimensionMismatch LAPACK.bdsqr!('U', d, e, Vt, U, C[1:end - 1, :])
-
-            @test_throws ArgumentError LAPACK.bdsdc!('U','Z',d,e)
-
-            A = rand(elty,n,n)
-            B = copy(A)
-            B, d, e, tauq, taup = LAPACK.gebrd!(B)
-            U, Vt, C = Matrix{elty}(I, n, n), Matrix{elty}(I, n, n), Matrix{elty}(I, n, n)
-            s, _ = LAPACK.bdsqr!('U',d,e[1:n-1],Vt, U, C)
-            @test s ≈ svdvals(A)
-        end
-    end
-end
-
-@testset "Issue #7886" begin
-    let
-        x, r = LAPACK.gelsy!([0 1; 0 2; 0 3.], [2, 4, 6.])
-        @test x ≈ [0,2]
-        @test r == 1
-    end
-end
-
-@testset "geqrt(3)" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = copy(A)
-        C,T = LAPACK.geqrt!(A,zeros(elty,10,10))
-        D,S = LAPACK.geqrt3!(A,zeros(elty,10,10))
-        @test C ≈ D
-    end
-end
-
-@testset "gbtrf and gbtrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        d = rand(elty,6)
-        dl = rand(elty,5)
-        du = rand(elty,5)
-        dl2 = rand(elty,4)
-        AB = zeros(elty,6,6)
-        AB[6,1:4] = dl2
-        AB[5,1:5] = dl
-        AB[4,:] = d
-        AB[3,2:6] = du
-        AB,ipiv = LAPACK.gbtrf!(2,1,6,AB)
-        C = rand(elty,6,6)
-        D = copy(C)
-        D = LAPACK.gbtrs!('N',2,1,6,AB,ipiv,D)
-        A = diagm(-2 => dl2, -1 => dl, 0 => d, 1 => du)
-        @test A\C ≈ D
-        @test_throws DimensionMismatch LAPACK.gbtrs!('N',2,1,6,AB,ipiv,Matrix{elty}(undef,7,6))
-        @test_throws LinearAlgebra.LAPACKException LAPACK.gbtrf!(2,1,6,zeros(elty,6,6))
-    end
-end
-
-
-@testset "geqp3, geqrt error handling" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        x10, x11 = Vector{elty}.(undef, (10, 11))
-        y10, y11 = Vector{LinearAlgebra.BlasInt}.(undef, (10, 11))
-        A10x10, A11x10, A10x11, A11x11 = Matrix{elty}.(undef, ((10,10), (11,10), (10,11), (11,11)))
-        @test_throws DimensionMismatch LAPACK.geqlf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.gelqf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.geqp3!(A10x10, y11, x10)
-        @test_throws DimensionMismatch LAPACK.geqp3!(A10x10, y10, x11)
-        @test_throws ArgumentError LAPACK.geqrt!(A10x10, A11x10)
-        @test_throws DimensionMismatch LAPACK.geqrt3!(A10x10, A11x10)
-        @test_throws DimensionMismatch LAPACK.geqrt3!(A10x11, A11x11)
-        @test_throws DimensionMismatch LAPACK.geqrf!(A10x10, x11)
-        @test_throws DimensionMismatch LAPACK.gerqf!(A10x10, x11)
-    end
-end
-
-@testset "gels, gesv, getrs, getri error handling" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A10x10, B11x11 = Matrix{elty}.(undef, ((10,10), (11,11)))
-        x10, x11 = Vector{LinearAlgebra.BlasInt}.(undef, (10, 11))
-        @test_throws DimensionMismatch LAPACK.gels!('N',A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.gels!('T',A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.gesv!(A10x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getrs!('N',A10x10,x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getrs!('T',A10x10,x10,B11x11)
-        @test_throws DimensionMismatch LAPACK.getri!(A10x10,x11)
-    end
-end
-
-@testset "gelsy, gelsd" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty, 10, 10)
-        B = rand(elty, 10, 10)
-        C, j = LAPACK.gelsd!(copy(A),copy(B))
-        D, k = LAPACK.gelsy!(copy(A),copy(B))
-        @test C ≈ D rtol=4*eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.gelsd!(A,rand(elty,12,10))
-        @test_throws DimensionMismatch LAPACK.gelsy!(A,rand(elty,12,10))
-    end
-end
-
-@testset "gglse errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,10),rand(elty,12,11),zeros(elty,12))
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,11),rand(elty,10,10),zeros(elty,10))
-        @test_throws DimensionMismatch LAPACK.gglse!(A,zeros(elty,10),rand(elty,10,10),zeros(elty,11))
-    end
-end
-
-@testset "gesvd, ggsvd" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,5)
-        U,S,V = svd(A)
-        lU,lS,lVt = LAPACK.gesvd!('S','S',A)
-        @test U ≈ lU
-        @test S ≈ lS
-        @test V' ≈ lVt
-        B = rand(elty,10,10)
-        # xggsvd3 replaced xggsvd in LAPACK 3.6.0
-        if LAPACK.version() < v"3.6.0"
-            @test_throws DimensionMismatch LAPACK.ggsvd!('S','S','S',A,B)
-        else
-            @test_throws DimensionMismatch LAPACK.ggsvd3!('S','S','S',A,B)
-        end
-    end
-end
-
-@testset "geevx, ggev, ggev3 errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,10,10)
-        @test_throws ArgumentError LAPACK.geevx!('M','N','N','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','Z','N','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','N','Z','N',A)
-        @test_throws ArgumentError LAPACK.geevx!('N','N','N','Z',A)
-        @test_throws ArgumentError LAPACK.ggev!('N','B',A,B)
-        @test_throws ArgumentError LAPACK.ggev!('B','N',A,B)
-        @test_throws DimensionMismatch LAPACK.ggev!('N','N',A,zeros(elty,12,12))
-        @test_throws ArgumentError LAPACK.ggev3!('N','B',A,B)
-        @test_throws ArgumentError LAPACK.ggev3!('B','N',A,B)
-        @test_throws DimensionMismatch LAPACK.ggev3!('N','N',A,zeros(elty,12,12))
-    end
-end
-
-@testset "gebal/gebak" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        typescale = log10(eps(real(elty))) / 3 * 2
-        A = rand(elty,10,10) * Diagonal(exp10.(range(typescale, stop=-typescale, length=10)))
-        B = copy(A)
-        ilo, ihi, scale = LAPACK.gebal!('S',B)
-        Bvs = eigvecs(B)
-        Avs = eigvecs(A)
-        Bvs = LAPACK.gebak!('S','R',ilo,ihi,scale,Bvs)
-        @test norm(diff(Avs ./ Bvs, dims=1)) < 100 * eps(abs(float(one(elty))))
-    end
-end
-
-@testset "gels" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Random.seed!(913)
-        A = rand(elty,10,10)
-        X = rand(elty,10)
-        B,Y,z = LAPACK.gels!('N',copy(A),copy(X))
-        @test A\X ≈ Y
-    end
-end
-
-@testset "getrf/getri" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        iA = inv(A)
-        A, ipiv = LAPACK.getrf!(A)
-        A = LAPACK.getri!(A, ipiv)
-        @test A ≈ iA
-    end
-end
-
-@testset "geev" begin
-    # complex is easier for now
-    @testset for elty in (ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        Aw, Avl, Avr = LAPACK.geev!('N','V',copy(A))
-        fA = eigen(A, sortby=nothing)
-        @test fA.values  ≈ Aw
-        @test fA.vectors ≈ Avr
-    end
-end
-
-@testset "gtsv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        du = rand(elty,9)
-        d  = rand(elty,10)
-        dl = rand(elty,9)
-        b  = rand(elty,10)
-        c = Tridiagonal(dl,d,du) \ b
-        b = LAPACK.gtsv!(dl,d,du,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.gtsv!(zeros(elty,11),d,du,b)
-        @test_throws DimensionMismatch LAPACK.gtsv!(dl,d,zeros(elty,11),b)
-        @test_throws DimensionMismatch LAPACK.gtsv!(dl,d,du,zeros(elty,11))
-        @test LAPACK.gtsv!(elty[],elty[],elty[],elty[]) == elty[]
-    end
-end
-
-@testset "gttrs,gttrf errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        du = rand(elty,9)
-        d  = rand(elty,10)
-        dl = rand(elty,9)
-        b  = rand(elty,10)
-        y10 = Vector{BlasInt}(undef, 10)
-        x9, x11 = Vector{elty}.(undef, (9, 11))
-        @test_throws DimensionMismatch LAPACK.gttrf!(x11, d, du)
-        @test_throws DimensionMismatch LAPACK.gttrf!(dl, d, x11)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', x11, d, du, x9, y10, b)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, x11, x9, y10, b)
-        @test_throws DimensionMismatch LAPACK.gttrs!('N', dl, d, du, x9, y10, x11)
-        A = lu(Tridiagonal(dl,d,du))
-        b  = rand(elty,10,5)
-        c = copy(b)
-        dl,d,du,du2,ipiv = LAPACK.gttrf!(dl,d,du)
-        c = LAPACK.gttrs!('N',dl,d,du,du2,ipiv,c)
-        @test A\b ≈ c
-    end
-end
-
-@testset "orglq and friends errors" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A,tau = LAPACK.gelqf!(A)
-        @test_throws DimensionMismatch LAPACK.orglq!(A,tau,11)
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormlq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormlq!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        B = copy(A)
-        C = LAPACK.orglq!(B,tau)
-        @test LAPACK.ormlq!('R','N',A,tau, Matrix{elty}(I, 10, 10)) ≈ C
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.geqrf!(A)
-        @test_throws DimensionMismatch LAPACK.orgqr!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgqr!(B,tau) ≈ LAPACK.ormqr!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormqr!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormqr!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.geqlf!(A)
-        @test_throws DimensionMismatch LAPACK.orgql!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgql!(B,tau) ≈ LAPACK.ormql!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormql!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormql!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,10)
-        A,tau = LAPACK.gerqf!(A)
-        @test_throws DimensionMismatch LAPACK.orgrq!(A,tau,11)
-        B = copy(A)
-        @test LAPACK.orgrq!(B,tau) ≈ LAPACK.ormrq!('R','N',A,tau,Matrix{elty}(I, 10, 10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,tau,rand(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.ormrq!('R','N',A,zeros(elty,11),rand(elty,10,10))
-        @test_throws DimensionMismatch LAPACK.ormrq!('L','N',A,zeros(elty,11),rand(elty,10,10))
-
-        A = rand(elty,10,11)
-        Q = copy(A)
-        Q,tau = LAPACK.gerqf!(Q)
-        R = triu(Q[:,2:11])
-        LAPACK.orgrq!(Q,tau)
-        @test Q*Q' ≈ Matrix(I, 10, 10)
-        @test R*Q ≈ A
-        @test_throws DimensionMismatch LAPACK.orgrq!(zeros(elty,11,10),zeros(elty,10))
-
-        C = rand(elty,10,10)
-        V = rand(elty,10,10)
-        T = zeros(elty,10,11)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-
-        C = rand(elty,10,10)
-        V = rand(elty,11,10)
-        T = zeros(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-
-        # test size(T) = (nb,k) ensures 1 <= nb <= k
-        T = zeros(elty,10,10)
-        V = rand(elty,5,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('L','N',V,T,C)
-        C = rand(elty,10,10)
-        V = rand(elty,10,10)
-        T = zeros(elty,11,10)
-        @test_throws DimensionMismatch LAPACK.gemqrt!('R','N',V,T,C)
-
-        @test_throws DimensionMismatch LAPACK.orghr!(1, 10, C, zeros(elty,11))
-    end
-end
-
-@testset "sytri, sytrs, and sytrf" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = A + transpose(A) #symmetric!
-        B = copy(A)
-        B,ipiv = LAPACK.sytrf!('U',B)
-        @test triu(inv(A)) ≈ triu(LAPACK.sytri!('U',B,ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs!('U',B,ipiv,rand(elty,11,5))
-        @test LAPACK.sytrf!('U',zeros(elty,0,0)) == (zeros(elty,0,0),zeros(BlasInt,0),zero(BlasInt))
-    end
-
-    # Rook-pivoting variants
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty, 10, 10)
-        A = A + transpose(A) #symmetric!
-        B = copy(A)
-        B,ipiv = LAPACK.sytrf_rook!('U', B)
-        @test triu(inv(A)) ≈ triu(LAPACK.sytri_rook!('U', B, ipiv)) rtol=eps(cond(A))
-        @test_throws DimensionMismatch LAPACK.sytrs_rook!('U', B, ipiv, rand(elty, 11, 5))
-        @test LAPACK.sytrf_rook!('U',zeros(elty, 0, 0)) == (zeros(elty, 0, 0),zeros(BlasInt, 0),zero(BlasInt))
-        A = rand(elty, 10, 10)
-        A = A + transpose(A) #symmetric!
-        b = rand(elty, 10)
-        c = A \ b
-        cnd = cond(A)
-        b,A = LAPACK.sysv_rook!('U', A, b)
-        @test b ≈ c rtol=eps(cnd)
-        @test_throws DimensionMismatch LAPACK.sysv_rook!('U',A,rand(elty,11))
-
-        # syconvf_rook error handling
-        # way argument is wrong
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'U', A, rand(BlasInt, 10))
-        # ipiv has wrong length
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'R', A, rand(BlasInt, 9))
-        # e has wrong length
-        @test_throws ArgumentError LAPACK.syconvf_rook!('U', 'R', A, rand(BlasInt, 10), rand(elty, 9))
-    end
-end
-
-@testset "hetrf, hetrs" begin
-    @testset for elty in (ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        B = copy(A)
-        B,ipiv = LAPACK.hetrf!('U',B)
-        @test_throws DimensionMismatch LAPACK.hetrs!('U',B,ipiv,rand(elty,11,5))
-        @test_throws DimensionMismatch LAPACK.hetrs_rook!('U',B,ipiv,rand(elty,11,5))
-    end
-end
-
-@testset "stev, stebz, stein, stegr" begin
-    @testset for elty in (Float32, Float64)
-        d = rand(elty,10)
-        e = rand(elty,9)
-        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,11))
-        @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,rand(elty,10))
-        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,11),zero(elty),zero(elty),0,0)
-        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,11),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
-        @test_throws DimensionMismatch LAPACK.stein!(d,e,zeros(elty,11),zeros(BlasInt,10),zeros(BlasInt,10))
-    end
-end
-
-@testset "trtri & trtrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        A = triu(A)
-        B = copy(A)
-        @test inv(A) ≈ LAPACK.trtri!('U','N',B)
-        @test_throws DimensionMismatch LAPACK.trtrs!('U','N','N',B,zeros(elty,11,10))
-    end
-end
-
-@testset "larfg & larf" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        ## larfg
-        Random.seed!(0)
-        x  = rand(elty, 5)
-        v  = copy(x)
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        H = (I - τ*v*v')
-        # for complex input, LAPACK wants a conjugate transpose of H (check clarfg docs)
-        y = elty <: Complex ? H'*x : H*x
-        # we have rotated a vector
-        @test norm(y) ≈ norm(x)
-        # an annihilated almost all the first column
-        @test norm(y[2:end], Inf) < 10*eps(real(one(elty)))
-
-        ## larf
-        C = rand(elty, 5, 5)
-        C_norm = norm(C, 2)
-        v = C[1:end, 1]
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        LinearAlgebra.LAPACK.larf!('L', v, conj(τ), C)
-        # we have applied a unitary transformation
-        @test norm(C, 2) ≈ C_norm
-        # an annihilated almost all the first column
-        @test norm(C[2:end, 1], Inf) < 10*eps(real(one(elty)))
-
-        # apply left and right
-        C1 = rand(elty, 5, 5)
-        C2 = rand(elty, 5, 5)
-        C = C2*C1
-
-        v = C1[1:end, 1]
-        τ = LinearAlgebra.LAPACK.larfg!(v)
-        LinearAlgebra.LAPACK.larf!('L', v,      τ, C1)
-        LinearAlgebra.LAPACK.larf!('R', v, conj(τ), C2)
-        @test C ≈ C2*C1
-    end
-end
-
-@testset "tgsen, tzrzf, & trsyl" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Z = zeros(elty,10,10)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,zeros(elty,11,11),Z,Z)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,zeros(elty,11,11),Z)
-        @test_throws DimensionMismatch LAPACK.tgsen!(zeros(BlasInt,10),Z,Z,Z,zeros(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.trsyl!('N','N',Z,Z,zeros(elty,11,11))
-        @test_throws DimensionMismatch LAPACK.tzrzf!(zeros(elty,10,5))
-
-        A = triu(rand(elty,4,4))
-        V = view(A, 1:2, :)
-        M = Matrix(V)
-        @test LAPACK.tzrzf!(V) == LAPACK.tzrzf!(M)
-    end
-end
-
-@testset "sysv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        Random.seed!(123)
-        A = rand(elty,10,10)
-        A = A + transpose(A) #symmetric!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.sysv!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.sysv!('U',A,rand(elty,11))
-    end
-end
-
-@testset "hesv" begin
-    @testset for elty in (ComplexF32, ComplexF64)
-        Random.seed!(935)
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.hesv!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv!('U',A,rand(elty,11))
-        A = rand(elty,10,10)
-        A = A + A' #hermitian!
-        b = rand(elty,10)
-        c = A \ b
-        b,A = LAPACK.hesv_rook!('U',A,b)
-        @test b ≈ c
-        @test_throws DimensionMismatch LAPACK.hesv_rook!('U',A,rand(elty,11))
-    end
-end
-
-@testset "ptsv" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        dv = fill(elty(1),10)
-        ev = zeros(elty,9)
-        rdv = real(dv)
-        A = SymTridiagonal(dv,ev)
-        if elty <: Complex
-            A = Tridiagonal(conj(ev),dv,ev)
-        end
-        B = rand(elty,10,10)
-        C = copy(B)
-        @test A\B ≈ LAPACK.ptsv!(rdv,ev,C)
-        @test_throws DimensionMismatch LAPACK.ptsv!(rdv,Vector{elty}(undef,10),C)
-        @test_throws DimensionMismatch LAPACK.ptsv!(rdv,ev,Matrix{elty}(undef,11,11))
-    end
-end
-
-@testset "pttrf and pttrs" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        dv = fill(elty(1),10)
-        ev = zeros(elty,9)
-        rdv = real(dv)
-        A = SymTridiagonal(dv,ev)
-        if elty <: Complex
-            A = Tridiagonal(conj(ev),dv,ev)
-        end
-        rdv,ev = LAPACK.pttrf!(rdv,ev)
-        @test_throws DimensionMismatch LAPACK.pttrf!(rdv,dv)
-        B = rand(elty,10,10)
-        C = copy(B)
-        if elty <: Complex
-            @test A\B ≈ LAPACK.pttrs!('U',rdv,ev,C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,Vector{elty}(undef,10),C)
-            @test_throws DimensionMismatch LAPACK.pttrs!('U',rdv,ev,Matrix{elty}(undef,11,11))
-        else
-            @test A\B ≈ LAPACK.pttrs!(rdv,ev,C)
-            @test_throws DimensionMismatch LAPACK.pttrs!(rdv,Vector{elty}(undef,10),C)
-            @test_throws DimensionMismatch LAPACK.pttrs!(rdv,ev,Matrix{elty}(undef,11,11))
-        end
-    end
-end
-
-@testset "posv and some errors for friends" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        local n = 10
-        A = rand(elty,n,n)/100
-        A += real(diagm(0 => n*real(rand(elty,n))))
-        if elty <: Complex
-            A = A + A'
-        else
-            A = A + transpose(A)
-        end
-        B = rand(elty,n,n)
-        D = copy(A)
-        C = copy(B)
-        D,C = LAPACK.posv!('U',D,C)
-        @test A\B ≈ C
-        offsizemat = Matrix{elty}(undef, n+1, n+1)
-        @test_throws DimensionMismatch LAPACK.posv!('U', D, offsizemat)
-        @test_throws DimensionMismatch LAPACK.potrs!('U', D, offsizemat)
-
-        @test LAPACK.potrs!('U',Matrix{elty}(undef,0,0),elty[]) == elty[]
-    end
-end
-
-@testset "gesvx" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,10,5)
-        C = copy(A)
-        D = copy(B)
-        X, rcond, f, b, r = LAPACK.gesvx!(C,D)
-        @test X ≈ A\B rtol=inv(rcond)*eps(real(elty))
-    end
-end
-
-@testset "gees, gges, gges3 error throwing" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        A = rand(elty,10,10)
-        B = rand(elty,11,11)
-        @test_throws DimensionMismatch LAPACK.gges!('V','V',A,B)
-        @test_throws DimensionMismatch LAPACK.gges3!('V','V',A,B)
-    end
-end
-
-@testset "trrfs & trevc" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        T = triu(rand(elty,10,10))
-        v = eigvecs(T, sortby=nothing)[:,1]
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vr = LAPACK.trevc!('R','S',select,copy(T))
-        @test Vr ≈ v
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vl = LAPACK.trevc!('L','S',select,copy(T))
-        select = zeros(LinearAlgebra.BlasInt,10)
-        select[1] = 1
-        select,Vln,Vrn = LAPACK.trevc!('B','S',select,copy(T))
-        @test Vrn ≈ v
-        @test Vln ≈ Vl
-        @test_throws ArgumentError LAPACK.trevc!('V','S',select,copy(T))
-        @test_throws DimensionMismatch LAPACK.trrfs!('U','N','N',T,rand(elty,10,10),rand(elty,10,11))
-    end
-end
-
-@testset "laic1" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        @test_throws DimensionMismatch LAPACK.laic1!(1,rand(elty,10),real(rand(elty)),rand(elty,11),rand(elty))
-    end
-end
-
-@testset "trsen" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for job in ('N', 'E', 'V', 'B')
-            for c in ('V', 'N')
-                A = convert(Matrix{elty}, [7 2 2 1; 1 5 2 0; 0 3 9 4; 1 1 1 4])
-                T,Q,d = schur(A)
-                s, sep = LinearAlgebra.LAPACK.trsen!(job,c,Array{LinearAlgebra.BlasInt}([0,1,0,0]),T,Q)[4:5]
-                @test d[1] ≈ T[2,2]
-                @test d[2] ≈ T[1,1]
-                if c == 'V'
-                    @test  Q*T*Q' ≈ A
-                end
-                if job == 'N' || job == 'V'
-                    @test iszero(s)
-                else
-                    @test s ≈ 0.8080423 atol=1e-6
-                end
-                if job == 'N' || job == 'E'
-                    @test iszero(sep)
-                else
-                    @test sep ≈ 2. atol=3e-1
-                end
-            end
-        end
-    end
-end
-
-@testset "trexc" begin
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for c in ('V', 'N')
-            A = convert(Matrix{elty}, [7 2 2 1; 1 5 2 0; 0 3 9 4; 1 1 1 4])
-            T,Q,d = schur(A)
-            LinearAlgebra.LAPACK.trexc!(c,LinearAlgebra.BlasInt(1),LinearAlgebra.BlasInt(2),T,Q)
-            @test d[1] ≈ T[2,2]
-            @test d[2] ≈ T[1,1]
-            if c == 'V'
-                @test Q*T*Q' ≈ A
-            end
-        end
-    end
-end
-
-@testset "Julia vs LAPACK" begin
-    # Test our own linear algebra functionality against LAPACK
-    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
-        for nn in (5,10,15)
-            if elty <: Real
-                A = convert(Matrix{elty}, randn(10,nn))
-            else
-                A = convert(Matrix{elty}, complex.(randn(10,nn),randn(10,nn)))
-            end    ## LU (only equal for real because LAPACK uses different absolute value when choosing permutations)
-            if elty <: Real
-                FJulia  = LinearAlgebra.generic_lufact!(copy(A))
-                FLAPACK = LinearAlgebra.LAPACK.getrf!(copy(A))
-                @test FJulia.factors ≈ FLAPACK[1]
-                @test FJulia.ipiv ≈ FLAPACK[2]
-                @test FJulia.info ≈ FLAPACK[3]
-            end
-
-            ## QR
-            FJulia  = LinearAlgebra.qrfactUnblocked!(copy(A))
-            FLAPACK = LinearAlgebra.LAPACK.geqrf!(copy(A))
-            @test FJulia.factors ≈ FLAPACK[1]
-            @test FJulia.τ ≈ FLAPACK[2]
-        end
-    end
-end
-
-# Issue 13976
-let A = [NaN 0.0 NaN; 0 0 0; NaN 0 NaN]
-    @test_throws ArgumentError exp(A)
-end
-
-# Issue 14065 (and 14220)
-let A = [NaN NaN; NaN NaN]
-    @test_throws ArgumentError eigen(A)
-end
-
-# Issue #42762 https://github.com/JuliaLang/julia/issues/42762
-# Tests geqrf! and gerqf! with null column dimensions
-a = zeros(2,0), zeros(0)
-@test LinearAlgebra.LAPACK.geqrf!(a...) === a
-@test LinearAlgebra.LAPACK.gerqf!(a...) === a
-
-# Issue #49489: https://github.com/JuliaLang/julia/issues/49489
-# Dimension mismatch between A and ipiv causes segfaults
-@testset "issue #49489" begin
-    A = randn(23,23)
-    b = randn(23)
-    ipiv = collect(1:20)
-    @test_throws DimensionMismatch LinearAlgebra.LAPACK.getrs!('N', A, ipiv, b)
-end
-
-end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/ldlt.jl b/stdlib/LinearAlgebra/test/ldlt.jl
deleted file mode 100644
index 51abf31086091..0000000000000
--- a/stdlib/LinearAlgebra/test/ldlt.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLDLT
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(123)
-
-@testset "Factorization conversions of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    @test Factorization{eltype(S)}(F) === F
-    @test Array(Factorization{complex(eltype(S))}(F)) ≈ Array(ldlt(complex(S)))
-    @test eltype(Factorization{complex(eltype(S))}) == complex(eltype(S))
-end
-
-@testset "eltype conversions of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    Fc = LDLt{ComplexF32}(F.data)
-    @test Fc isa LDLt{ComplexF32}
-    @test Array(Fc) ≈ ComplexF32.(Array(S))
-end
-
-@testset "Accessing fields of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    @test getproperty(F, :L) == transpose(getproperty(F, :Lt))
-    @test getproperty(F, :d) == diag(getproperty(F, :D), 0)
-end
-
-@testset "REPL printing of LDLT" begin
-    S = SymTridiagonal(randn(5), randn(4))
-    F = ldlt(S)
-    ldltstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    lstring = sprint((t, s) -> show(t, "text/plain", s), F.L)
-    dstring = sprint((t, s) -> show(t, "text/plain", s), F.D)
-    @test ldltstring == "$(summary(F))\nL factor:\n$lstring\nD factor:\n$dstring"
-end
-
-end # module TestLDLT
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
deleted file mode 100644
index 6bdc4efa5d6dd..0000000000000
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ /dev/null
@@ -1,237 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLQ
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, rmul!, lmul!
-
-m = 10
-
-Random.seed!(1234321)
-
-asquare = randn(ComplexF64, m, m) / 2
-awide = randn(ComplexF64, m, m+3) / 2
-bcomplex = randn(ComplexF64, m, 2) / 2
-
-# helper functions to unambiguously recover explicit forms of an LQPackedQ
-squareQ(Q::LinearAlgebra.LQPackedQ) = (n = size(Q.factors, 2); lmul!(Q, Matrix{eltype(Q)}(I, n, n)))
-rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64), n in (m, size(awide, 2))
-    adata = m == n ? asquare : awide
-    a = convert(Matrix{eltya}, eltya <: Complex ? adata : real(adata))
-    ε = εa = eps(abs(float(one(eltya))))
-    n1 = n ÷ 2
-
-    α = rand(eltya)
-    aα = fill(α,1,1)
-    @test lq(α).L*lq(α).Q ≈ lq(aα).L*lq(aα).Q
-    @test abs(lq(α).Q[1,1]) ≈ one(eltya)
-
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        b = eltyb == Int ? rand(1:5, m, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? bcomplex : real(bcomplex))
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-
-        tab = promote_type(eltya,eltyb)
-
-        @testset for isview in (false,true)
-            let a = isview ? view(a, 1:m - 1, 1:n - 1) : a, b = isview ? view(b, 1:m - 1) : b, m = m - isview, n = n - isview
-                lqa = lq(a)
-                x = lqa\b
-                l, q = lqa.L, lqa.Q
-                qra = qr(a, ColumnNorm())
-                @testset "Basic ops" begin
-                    @test size(lqa,1) == size(a,1)
-                    @test size(lqa,3) == 1
-                    @test size(lqa.Q,3) == 1
-                    @test Base.propertynames(lqa) == (:L, :Q)
-                    ref_obs = (l, q)
-                    for (ii, lq_obj) in enumerate(lqa)
-                        @test ref_obs[ii] == lq_obj
-                    end
-                    @test_throws ErrorException lqa.Z
-                    @test Array(copy(adjoint(lqa))) ≈ a'
-                    @test q*squareQ(q)' ≈ Matrix(I, n, n)
-                    @test l*q ≈ a
-                    @test Array(lqa) ≈ a
-                    @test Array(copy(lqa)) ≈ a
-                    @test LinearAlgebra.Factorization{eltya}(lqa) === lqa
-                    @test Matrix{eltya}(q) isa Matrix{eltya}
-                    # test Array{T}(LQPackedQ{T})
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @testset "Binary ops" begin
-                    k = size(a, 2)
-                    T = Tridiagonal(rand(eltya, k-1), rand(eltya, k), rand(eltya, k-1))
-                    @test lq(T) * T ≈ T * T rtol=3000ε
-                    @test lqa * T ≈ a * T rtol=3000ε
-                    @test a*x ≈ b rtol=3000ε
-                    @test x ≈ qra \ b rtol=3000ε
-                    @test lqa*x ≈ a*x rtol=3000ε
-                    @test (sq = size(q.factors, 2); *(Matrix{eltyb}(I, sq, sq), adjoint(q))*squareQ(q)) ≈ Matrix(I, n, n) rtol=5000ε
-                    if eltya != Int
-                        @test Matrix{eltyb}(I, n, n)*q ≈ Matrix(I, n, n) * convert(LinearAlgebra.AbstractQ{tab}, q)
-                    end
-                    @test q*x ≈ squareQ(q)*x rtol=100ε
-                    @test q'*x ≈ squareQ(q)'*x rtol=100ε
-                    @test a*q ≈ a*squareQ(q) rtol=100ε
-                    @test a*q' ≈ a*squareQ(q)' rtol=100ε
-                    @test q*a'≈ squareQ(q)*a' rtol=100ε
-                    @test q'*a' ≈ squareQ(q)'*a' rtol=100ε
-                    @test_throws DimensionMismatch q*x[1:n1 + 1]
-                    @test_throws DimensionMismatch adjoint(q) * Matrix{eltya}(undef,m+2,m+2)
-                    @test_throws DimensionMismatch Matrix{eltyb}(undef,m+2,m+2)*q
-                    if isa(a, DenseArray) && isa(b, DenseArray)
-                        # use this to test 2nd branch in mult code
-                        pad_a = vcat(I, a)
-                        pad_x = hcat(I, x)
-                        @test pad_a*q ≈ pad_a*squareQ(q) rtol=100ε
-                        @test q'*pad_x ≈ squareQ(q)'*pad_x rtol=100ε
-                    end
-                end
-            end
-        end
-
-        @testset "Matmul with LQ factorizations" begin
-            lqa = lq(a[:,1:n1])
-            l,q = lqa.L, lqa.Q
-            @test rectangularQ(q)*rectangularQ(q)' ≈ Matrix(I, n1, n1)
-            @test squareQ(q)'*squareQ(q) ≈ Matrix(I, n1, n1)
-            @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-            @test lmul!(adjoint(q), rectangularQ(q)) ≈ Matrix(I, n1, n1)
-            @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1), adjoint(q))
-            @test_throws BoundsError size(q,-1)
-        end
-    end
-end
-
-@testset "getindex on LQPackedQ (#23733)" begin
-    local m, n
-    function getqs(F::LinearAlgebra.LQ)
-        implicitQ = F.Q
-        sq = size(implicitQ.factors, 2)
-        explicitQ = lmul!(implicitQ, Matrix{eltype(implicitQ)}(I, sq, sq))
-        return implicitQ, explicitQ
-    end
-
-    m, n = 3, 3 # reduced Q 3-by-3, full Q 3-by-3
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[m, 1] == explicitQ[m, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[m, n] == explicitQ[m, n]
-
-    m, n = 3, 4 # reduced Q 3-by-4, full Q 4-by-4
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[m, 1] == explicitQ[m, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[m, n] == explicitQ[m, n]
-    @test implicitQ[m+1, 1] == explicitQ[m+1, 1]
-    @test implicitQ[m+1, n] == explicitQ[m+1, n]
-
-    m, n = 4, 3 # reduced Q 3-by-3, full Q 3-by-3
-    implicitQ, explicitQ = getqs(lq(randn(m, n)))
-    @test implicitQ[1, 1] == explicitQ[1, 1]
-    @test implicitQ[n, 1] == explicitQ[n, 1]
-    @test implicitQ[1, n] == explicitQ[1, n]
-    @test implicitQ[n, n] == explicitQ[n, n]
-end
-
-@testset "size on LQPackedQ (#23780)" begin
-    # size(Q::LQPackedQ) yields the shape of Q's full/square form
-    for ((mA, nA), nQ) in (
-        ((3, 3), 3), # A 3-by-3 => full/square Q 3-by-3
-        ((3, 4), 4), # A 3-by-4 => full/square Q 4-by-4
-        ((4, 3), 3) )# A 4-by-3 => full/square Q 3-by-3
-        @test size(lq(randn(mA, nA)).Q) == (nQ, nQ)
-    end
-end
-
-@testset "postmultiplication with / right-application of LQPackedQ (#23779)" begin
-    function getqs(F::LinearAlgebra.LQ)
-        implicitQ = F.Q
-        explicitQ = lmul!(implicitQ, Matrix{eltype(implicitQ)}(I, size(implicitQ)...))
-        return implicitQ, explicitQ
-    end
-    # for any shape m-by-n of LQ-factored matrix, where Q is an LQPackedQ
-    # A_mul_B*(C, Q) (Ac_mul_B*(C, Q)) operations should work for
-    # *-by-n (n-by-*) C, which we test below via n-by-n C
-    for (mA, nA) in ((3, 3), (3, 4), (4, 3))
-        implicitQ, explicitQ = getqs(lq(randn(mA, nA)))
-        C = randn(nA, nA)
-        @test *(C, implicitQ) ≈ *(C, explicitQ)
-        @test *(C, adjoint(implicitQ)) ≈ *(C, adjoint(explicitQ))
-        @test *(adjoint(C), implicitQ) ≈ *(adjoint(C), explicitQ)
-        @test *(adjoint(C), adjoint(implicitQ)) ≈ *(adjoint(C), adjoint(explicitQ))
-    end
-    # where the LQ-factored matrix has at least as many rows m as columns n,
-    # Q's full/square and reduced/rectangular forms have the same shape (n-by-n). hence we expect
-    # _only_ *-by-n (n-by-*) C to work in A_mul_B*(C, Q) (Ac_mul_B*(C, Q)) ops.
-    # and hence the n-by-n C tests above suffice.
-    #
-    # where the LQ-factored matrix has more columns n than rows m,
-    # Q's full/square form is n-by-n whereas its reduced/rectangular form is m-by-n.
-    # hence we need also test *-by-m C with
-    # A*_mul_B(C, Q) ops, as below via m-by-m C.
-    mA, nA = 3, 4
-    implicitQ, explicitQ = getqs(lq(randn(mA, nA)))
-    C = randn(mA, mA)
-    zeroextCright = hcat(C, zeros(eltype(C), mA))
-    zeroextCdown = vcat(C, zeros(eltype(C), (1, mA)))
-    @test *(C, implicitQ) ≈ *(zeroextCright, explicitQ)
-    @test *(adjoint(C), implicitQ) ≈ *(adjoint(zeroextCdown), explicitQ)
-    @test_throws DimensionMismatch C * adjoint(implicitQ)
-    @test_throws DimensionMismatch adjoint(C) * adjoint(implicitQ)
-end
-
-@testset "det(Q::LQPackedQ)" begin
-    @testset for n in 1:3, m in 1:3
-        @testset "real" begin
-            _, Q = lq(randn(n, m))
-            @test det(Q) ≈ det(Q*I)
-            @test abs(det(Q)) ≈ 1
-        end
-        @testset "complex" begin
-            _, Q = lq(randn(ComplexF64, n, m))
-            @test det(Q) ≈ det(Q*I)
-            @test abs(det(Q)) ≈ 1
-        end
-    end
-end
-
-@testset "REPL printing" begin
-    bf = IOBuffer()
-    show(bf, "text/plain", lq(Matrix(I, 4, 4)))
-    seekstart(bf)
-    @test String(take!(bf)) == """
-LinearAlgebra.LQ{Float64, Matrix{Float64}, Vector{Float64}}
-L factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0
-Q factor: 4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}"""
-end
-
-@testset "adjoint of LQ" begin
-    n = 5
-
-    for b in (ones(n), ones(n, 2), ones(Complex{Float64}, n, 2))
-        for A in (
-            randn(n, n),
-            # Tall problems become least squares problems similarly to QR
-            randn(n - 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            F = lq(A)
-            @test A'\b ≈ F'\b
-        end
-        @test_throws DimensionMismatch lq(randn(n, n + 2))'\b
-    end
-
-end
-
-end # module TestLQ
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
deleted file mode 100644
index aa73bee6ddc38..0000000000000
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ /dev/null
@@ -1,467 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestLU
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: ldiv!, BlasReal, BlasInt, BlasFloat, rdiv!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234324)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-creal = randn(n)/2
-cimg  = randn(n)/2
-dureal = randn(n-1)/2
-duimg  = randn(n-1)/2
-dlreal = randn(n-1)/2
-dlimg  = randn(n-1)/2
-dreal = randn(n)/2
-dimg  = randn(n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    a = eltya == Int ? rand(1:7, n, n) :
-        convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    d = if eltya == Int
-        Tridiagonal(rand(1:7, n-1), rand(1:7, n), rand(1:7, n-1))
-    elseif eltya <: Complex
-        convert(Tridiagonal{eltya}, Tridiagonal(
-            complex.(dlreal, dlimg), complex.(dreal, dimg), complex.(dureal, duimg)))
-    else
-        convert(Tridiagonal{eltya}, Tridiagonal(dlreal, dreal, dureal))
-    end
-    εa = eps(abs(float(one(eltya))))
-
-    if eltya <: BlasFloat
-        @testset "LU factorization for Number" begin
-            num = rand(eltya)
-            @test (lu(num)...,) == (hcat(one(eltya)), hcat(num), [1])
-            @test convert(Array, lu(num)) ≈ eltya[num]
-        end
-        @testset "Balancing in eigenvector calculations" begin
-            A = convert(Matrix{eltya}, [ 3.0     -2.0      -0.9     2*eps(real(one(eltya)));
-                                       -2.0      4.0       1.0    -eps(real(one(eltya)));
-                                       -eps(real(one(eltya)))/4  eps(real(one(eltya)))/2  -1.0     0;
-                                       -0.5     -0.5       0.1     1.0])
-            F = eigen(A, permute=false, scale=false)
-            @test F.vectors*Diagonal(F.values)/F.vectors ≈ A
-            F = eigen(A)
-            # @test norm(F.vectors*Diagonal(F.values)/F.vectors - A) > 0.01
-        end
-    end
-    κ  = cond(a,1)
-    @testset "(Automatic) Square LU decomposition" begin
-        lua   = factorize(a)
-        @test_throws ErrorException lua.Z
-        l,u,p = lua.L, lua.U, lua.p
-        ll,ul,pl = @inferred lu(a)
-        @test ll * ul ≈ a[pl,:]
-        @test l*u ≈ a[p,:]
-        @test (l*u)[invperm(p),:] ≈ a
-        @test a * inv(lua) ≈ Matrix(I, n, n)
-        @test copy(lua) == lua
-        if eltya <: BlasFloat
-            # test conversion of LU factorization's numerical type
-            bft = eltya <: Real ? LinearAlgebra.LU{BigFloat} : LinearAlgebra.LU{Complex{BigFloat}}
-            bflua = convert(bft, lua)
-            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=εa*norm(a)
-            @test Factorization{eltya}(lua) === lua
-            # test Factorization with different eltype
-            if eltya <: BlasReal
-                @test Array(Factorization{Float16}(lua)) ≈ Array(lu(convert(Matrix{Float16}, a)))
-                @test eltype(Factorization{Float16}(lua)) == Float16
-            end
-        end
-        # compact printing
-        lstring = sprint(show,l)
-        ustring = sprint(show,u)
-    end
-    κd    = cond(Array(d),1)
-    @testset "Tridiagonal LU" begin
-        lud = @inferred lu(d)
-        @test LinearAlgebra.issuccess(lud)
-        @test @inferred(lu(lud)) == lud
-        @test_throws ErrorException lud.Z
-        @test lud.L*lud.U ≈ lud.P*Array(d)
-        @test lud.L*lud.U ≈ Array(d)[lud.p,:]
-        @test AbstractArray(lud) ≈ d
-        @test Array(lud) ≈ d
-        if eltya != Int
-            dlu = convert.(eltya, [1, 1])
-            dia = convert.(eltya, [-2, -2, -2])
-            tri = Tridiagonal(dlu, dia, dlu)
-            @test_throws ArgumentError lu!(tri)
-        end
-    end
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        b  = eltyb == Int ? rand(1:5, n, 2) :
-            convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        c  = eltyb == Int ? rand(1:5, n) :
-            convert(Vector{eltyb}, eltyb <: Complex ? complex.(creal, cimg) : creal)
-        εb = eps(abs(float(one(eltyb))))
-        ε  = max(εa,εb)
-        @testset "(Automatic) Square LU decomposition" begin
-            lua   = factorize(a)
-            let Bs = copy(b), Cs = copy(c)
-                for (bb, cc) in ((Bs, Cs), (view(Bs, 1:n, 1), view(Cs, 1:n)))
-                    @test norm(a*(lua\bb) - bb, 1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(a'*(lua'\bb) - bb, 1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(a'*(lua'\a') - a', 1) < ε*κ*n^2
-                    @test norm(a*(lua\cc) - cc, 1) < ε*κ*n # cc is a vector
-                    @test norm(a'*(lua'\cc) - cc, 1) < ε*κ*n # cc is a vector
-                    @test AbstractArray(lua) ≈ a
-                    @test norm(transpose(a)*(transpose(lua)\bb) - bb,1) < ε*κ*n*2 # Two because the right hand side has two columns
-                    @test norm(transpose(a)*(transpose(lua)\cc) - cc,1) < ε*κ*n
-                end
-
-                # Test whether Ax_ldiv_B!(y, LU, x) indeed overwrites y
-                resultT = typeof(oneunit(eltyb) / oneunit(eltya))
-
-                b_dest = similar(b, resultT)
-                c_dest = similar(c, resultT)
-
-                ldiv!(b_dest, lua, b)
-                ldiv!(c_dest, lua, c)
-                @test norm(b_dest - lua \ b, 1) < ε*κ*2n
-                @test norm(c_dest - lua \ c, 1) < ε*κ*n
-
-                ldiv!(b_dest, transpose(lua), b)
-                ldiv!(c_dest, transpose(lua), c)
-                @test norm(b_dest - transpose(lua) \ b, 1) < ε*κ*2n
-                @test norm(c_dest - transpose(lua) \ c, 1) < ε*κ*n
-
-                ldiv!(b_dest, adjoint(lua), b)
-                ldiv!(c_dest, adjoint(lua), c)
-                @test norm(b_dest - lua' \ b, 1) < ε*κ*2n
-                @test norm(c_dest - lua' \ c, 1) < ε*κ*n
-
-                if eltyb != Int && !(eltya <: Complex) || eltya <: Complex && eltyb <: Complex
-                    p = Matrix(b')
-                    q = Matrix(c')
-                    p_dest = copy(p)
-                    q_dest = copy(q)
-                    rdiv!(p_dest, lua)
-                    rdiv!(q_dest, lua)
-                    @test norm(p_dest - p / lua, 1) < ε*κ*2n
-                    @test norm(q_dest - q / lua, 1) < ε*κ*n
-                end
-            end
-            if eltya <: BlasFloat && eltyb <: BlasFloat
-                e = rand(eltyb,n,n)
-                @test norm(e/lua - e/a,1) < ε*κ*n^2
-            end
-        end
-        @testset "Tridiagonal LU" begin
-            lud   = factorize(d)
-            f = zeros(eltyb, n+1)
-            @test_throws DimensionMismatch lud\f
-            @test_throws DimensionMismatch transpose(lud)\f
-            @test_throws DimensionMismatch lud'\f
-            @test_throws DimensionMismatch LinearAlgebra.ldiv!(transpose(lud), f)
-            let Bs = copy(b)
-                for bb in (Bs, view(Bs, 1:n, 1))
-                    @test norm(d*(lud\bb) - bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
-                    if eltya <: Real
-                        @test norm((transpose(lud)\bb) - Array(transpose(d))\bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
-                        if eltya != Int && eltyb != Int
-                            @test norm(LinearAlgebra.ldiv!(transpose(lud), copy(bb)) - Array(transpose(d))\bb, 1) < ε*κd*n*2
-                        end
-                    end
-                    if eltya <: Complex
-                        dummy_factor = 2.5
-                        # TODO: Remove dummy_factor, this test started failing when the RNG stream changed
-                        # so the factor was added.
-                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2*dummy_factor # Two because the right hand side has two columns
-                    end
-                end
-            end
-            if eltya <: BlasFloat && eltyb <: BlasFloat
-                e = rand(eltyb,n,n)
-                @test norm(e/lud - e/d,1) < ε*κ*n^2
-                @test norm((transpose(lud)\e') - Array(transpose(d))\e',1) < ε*κd*n^2
-                #test singular
-                du = rand(eltya,n-1)
-                dl = rand(eltya,n-1)
-                dd = rand(eltya,n)
-                dd[1] = zero(eltya)
-                du[1] = zero(eltya)
-                dl[1] = zero(eltya)
-                zT = Tridiagonal(dl,dd,du)
-                @test !LinearAlgebra.issuccess(lu(zT; check = false))
-            end
-        end
-        @testset "Thin LU" begin
-            lua   = @inferred lu(a[:,1:n1])
-            @test lua.L*lua.U ≈ lua.P*a[:,1:n1]
-        end
-        @testset "Fat LU" begin
-            lua   = @inferred lu(a[1:n1,:])
-            @test lua.L*lua.U ≈ lua.P*a[1:n1,:]
-        end
-    end
-
-    @testset "LU of Symmetric/Hermitian" begin
-        for HS in (Hermitian(a'a), Symmetric(a'a))
-            luhs = @inferred lu(HS)
-            @test luhs.L*luhs.U ≈ luhs.P*Matrix(HS)
-        end
-    end
-
-    @testset "Factorization of symtridiagonal dense matrix with zero ldlt-pivot (#38026)" begin
-        A = [0.0 -1.0 0.0 0.0
-            -1.0 0.0 0.0 0.0
-            0.0 0.0 0.0 -1.0
-            0.0 0.0 -1.0 0.0]
-        F = factorize(A)
-        @test all((!isnan).(Matrix(F)))
-    end
-end
-
-@testset "Small tridiagonal matrices" for T in (Float64, ComplexF64)
-    A = Tridiagonal(T[], T[1], T[])
-    @test inv(A) == A
-end
-
-@testset "Singular matrices" for T in (Float64, ComplexF64)
-    A = T[1 2; 0 0]
-    @test_throws SingularException lu(A)
-    @test_throws SingularException lu!(copy(A))
-    @test_throws SingularException lu(A; check = true)
-    @test_throws SingularException lu!(copy(A); check = true)
-    @test !issuccess(lu(A; check = false))
-    @test !issuccess(lu!(copy(A); check = false))
-    @test_throws ZeroPivotException lu(A, NoPivot())
-    @test_throws ZeroPivotException lu!(copy(A), NoPivot())
-    @test_throws ZeroPivotException lu(A, NoPivot(); check = true)
-    @test_throws ZeroPivotException lu!(copy(A), NoPivot(); check = true)
-    @test !issuccess(lu(A, NoPivot(); check = false))
-    @test !issuccess(lu!(copy(A), NoPivot(); check = false))
-    F = lu(A; check = false)
-    @test sprint((io, x) -> show(io, "text/plain", x), F) ==
-        "Failed factorization of type $(typeof(F))"
-end
-
-@testset "conversion" begin
-    Random.seed!(4)
-    a = Tridiagonal(rand(9),rand(10),rand(9))
-    fa = Array(a)
-    falu = lu(fa)
-    alu = lu(a)
-    falu = convert(typeof(falu),alu)
-    @test Array(alu) == fa
-    @test AbstractArray(alu) == fa
-end
-
-@testset "Rational Matrices" begin
-    ## Integrate in general tests when more linear algebra is implemented in julia
-    a = convert(Matrix{Rational{BigInt}}, rand(1:10//1,n,n))/n
-    b = rand(1:10,n,2)
-    @inferred lu(a)
-    lua   = factorize(a)
-    l,u,p = lua.L, lua.U, lua.p
-    @test l*u ≈ a[p,:]
-    @test l[invperm(p),:]*u ≈ a
-    @test a*inv(lua) ≈ Matrix(I, n, n)
-    let Bs = b
-        for b in (Bs, view(Bs, 1:n, 1))
-            @test a*(lua\b) ≈ b
-        end
-    end
-    @test @inferred(det(a)) ≈ det(Array{Float64}(a))
-end
-
-@testset "Rational{BigInt} and BigFloat Hilbert Matrix" begin
-    ## Hilbert Matrix (very ill conditioned)
-    ## Testing Rational{BigInt} and BigFloat version
-    nHilbert = 50
-    H = Rational{BigInt}[1//(i+j-1) for i = 1:nHilbert,j = 1:nHilbert]
-    Hinv = Rational{BigInt}[(-1)^(i+j)*(i+j-1)*binomial(nHilbert+i-1,nHilbert-j)*
-        binomial(nHilbert+j-1,nHilbert-i)*binomial(i+j-2,i-1)^2
-        for i = big(1):nHilbert,j=big(1):nHilbert]
-    @test inv(H) == Hinv
-    setprecision(2^10) do
-        @test norm(Array{Float64}(inv(float(H)) - float(Hinv))) < 1e-100
-    end
-end
-
-@testset "logdet" begin
-    @test @inferred(logdet(ComplexF32[1.0f0 0.5f0; 0.5f0 -1.0f0])) === 0.22314355f0 + 3.1415927f0im
-    @test_throws DomainError logdet([1 1; 1 -1])
-end
-
-@testset "REPL printing" begin
-        bf = IOBuffer()
-        show(bf, "text/plain", lu(Matrix(I, 4, 4)))
-        seekstart(bf)
-        @test String(take!(bf)) == """
-LinearAlgebra.LU{Float64, Matrix{Float64}, Vector{$Int}}
-L factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0
-U factor:
-4×4 Matrix{Float64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0"""
-end
-
-@testset "propertynames" begin
-    names = sort!(collect(string.(Base.propertynames(lu(rand(3,3))))))
-    @test names == ["L", "P", "U", "p"]
-    allnames = sort!(collect(string.(Base.propertynames(lu(rand(3,3)), true))))
-    @test allnames == ["L", "P", "U", "factors", "info", "ipiv", "p"]
-end
-
-include("trickyarithmetic.jl")
-
-@testset "lu with type whose sum is another type" begin
-    A = TrickyArithmetic.A[1 2; 3 4]
-    ElT = TrickyArithmetic.D{TrickyArithmetic.C,TrickyArithmetic.C}
-    B = lu(A, NoPivot())
-    @test B isa LinearAlgebra.LU{ElT,Matrix{ElT}}
-end
-
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "lu factorization with dimension type" begin
-    n = 4
-    A = Matrix(Furlong(1.0) * I, n, n)
-    F = lu(A).factors
-    @test Diagonal(F) == Diagonal(A)
-    # upper triangular part has a unit Furlong{1}
-    @test all(x -> typeof(x) == Furlong{1, Float64}, F[i,j] for j=1:n for i=1:j)
-    # lower triangular part is unitless Furlong{0}
-    @test all(x -> typeof(x) == Furlong{0, Float64}, F[i,j] for j=1:n for i=j+1:n)
-end
-
-@testset "Issue #30917. Determinant of integer matrix" begin
-    @test det([1 1 0 0 1 0 0 0
-               1 0 1 0 0 1 0 0
-               1 0 0 1 0 0 1 0
-               0 1 1 1 0 0 0 0
-               0 1 0 0 0 0 1 1
-               0 0 1 0 1 0 0 1
-               0 0 0 1 1 1 0 0
-               0 0 0 0 1 1 0 1]) ≈ 6
-end
-
-@testset "Issue #33177. No ldiv!(LU, Adjoint)" begin
-    A = [1 0; 1 1]
-    B = [1 2; 2 8]
-    F = lu(B)
-    @test (A  / F') * B == A
-    @test (A' / F') * B == A'
-
-    a = complex.(randn(2), randn(2))
-    @test (a' / F') * B ≈ a'
-    @test (transpose(a) / F') * B ≈ transpose(a)
-
-    A = complex.(randn(2, 2), randn(2, 2))
-    @test (A' / F') * B ≈ A'
-    @test (transpose(A) / F') * B ≈ transpose(A)
-end
-
-@testset "0x0 matrix" begin
-    A = ones(0, 0)
-    F = lu(A)
-    @test F.U == ones(0, 0)
-    @test F.L == ones(0, 0)
-    @test F.P == ones(0, 0)
-    @test F.p == []
-end
-
-@testset "more rdiv! methods" begin
-    for elty in (Float16, Float64, ComplexF64), transform in (transpose, adjoint)
-        A = randn(elty, 5, 5)
-        C = copy(A)
-        B = randn(elty, 5, 5)
-        @test rdiv!(transform(A), transform(lu(B))) ≈ transform(C) / transform(B)
-    end
-end
-
-@testset "transpose(A) / lu(B)' should not overwrite A (#36657)" begin
-    for elty in (Float16, Float64, ComplexF64)
-        A = randn(elty, 5, 5)
-        B = randn(elty, 5, 5)
-        C = copy(A)
-        a = randn(elty, 5)
-        c = copy(a)
-        @test transpose(A) / lu(B)' ≈ transpose(A) / B'
-        @test transpose(a) / lu(B)' ≈ transpose(a) / B'
-        @test A == C
-        @test a == c
-    end
-end
-
-@testset "lu on *diagonal matrices" begin
-    dl = rand(3)
-    d = rand(4)
-    Bl = Bidiagonal(d, dl, :L)
-    Bu = Bidiagonal(d, dl, :U)
-    Tri = Tridiagonal(dl, d, dl)
-    Sym = SymTridiagonal(d, dl)
-    D = Diagonal(d)
-    b = ones(4)
-    B = rand(4,4)
-    for A in (Bl, Bu, Tri, Sym, D), pivot in (NoPivot(), RowMaximum())
-        @test A\b ≈ lu(A, pivot)\b
-        @test B/A ≈ B/lu(A, pivot)
-        @test B/A ≈ B/Matrix(A)
-        @test Matrix(lu(A, pivot)) ≈ A
-        @test @inferred(lu(A)) isa LU
-        if A isa Union{Bidiagonal, Diagonal, Tridiagonal, SymTridiagonal}
-            @test lu(A) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-            @test lu(A, pivot) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-            @test lu(A, pivot; check = false) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
-        end
-    end
-end
-
-@testset "can push to vector after 3-arg ldiv! (#43507)" begin
-    u = rand(3)
-    A = rand(3,3)
-    b = rand(3)
-    ldiv!(u,lu(A),b)
-    push!(b,4.0)
-    @test length(b) == 4
-end
-
-@testset "NaN matrix should throw error" begin
-    for eltya in (NaN16, NaN32, NaN64, BigFloat(NaN))
-        r = fill(eltya, 2, 3)
-        c = fill(complex(eltya, eltya), 2, 3)
-        @test_throws ArgumentError lu(r)
-        @test_throws ArgumentError lu(c)
-    end
-end
-
-@testset "more generic ldiv! #35419" begin
-    A = rand(3, 3)
-    b = rand(3)
-    @test A * ldiv!(lu(A), Base.ReshapedArray(copy(b)', (3,), ())) ≈ b
-end
-
-@testset "generic lu!" begin
-    A = rand(3,3); B = deepcopy(A); C = A[2:3,2:3]
-    Asub1 = @view(A[2:3,2:3])
-    F1 = lu!(Asub1)
-    Asub2 = @view(B[[2,3],[2,3]])
-    F2 = lu!(Asub2)
-    @test Matrix(F1) ≈ Matrix(F2) ≈ C
-end
-
-end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
deleted file mode 100644
index e6000a4b24e2d..0000000000000
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ /dev/null
@@ -1,1010 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestMatmul
-
-using Base: rtoldefault
-using Test, LinearAlgebra, Random
-using LinearAlgebra: mul!, Symmetric, Hermitian
-
-## Test Julia fallbacks to BLAS routines
-
-mul_wrappers = [
-    m -> m,
-    m -> Symmetric(m, :U),
-    m -> Symmetric(m, :L),
-    m -> Hermitian(m, :U),
-    m -> Hermitian(m, :L),
-    m -> adjoint(m),
-    m -> transpose(m)]
-
-@testset "matrices with zero dimensions" begin
-    for (dimsA, dimsB, dimsC) in (
-        ((0, 5), (5, 3), (0, 3)),
-        ((3, 5), (5, 0), (3, 0)),
-        ((3, 0), (0, 4), (3, 4)),
-        ((0, 5), (5, 0), (0, 0)),
-        ((0, 0), (0, 4), (0, 4)),
-        ((3, 0), (0, 0), (3, 0)),
-        ((0, 0), (0, 0), (0, 0)))
-        @test Matrix{Float64}(undef, dimsA) * Matrix{Float64}(undef, dimsB) == zeros(dimsC)
-    end
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
-end
-@testset "2x2 matmul" begin
-    AA = [1 2; 3 4]
-    BB = [5 6; 7 8]
-    AAi = AA + (0.5 * im) .* BB
-    BBi = BB + (2.5 * im) .* AA[[2, 1], [2, 1]]
-    for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test A * B == [19 22; 43 50]
-        @test *(transpose(A), B) == [26 30; 38 44]
-        @test *(A, transpose(B)) == [17 23; 39 53]
-        @test *(transpose(A), transpose(B)) == [23 31; 34 46]
-    end
-    for Ai in (copy(AAi), view(AAi, 1:2, 1:2)), Bi in (copy(BBi), view(BBi, 1:2, 1:2))
-        @test Ai * Bi == [-21+53.5im -4.25+51.5im; -12+95.5im 13.75+85.5im]
-        @test *(adjoint(Ai), Bi) == [68.5-12im 57.5-28im; 88-3im 76.5-25im]
-        @test *(Ai, adjoint(Bi)) == [64.5+5.5im 43+31.5im; 104-18.5im 80.5+31.5im]
-        @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im]
-        @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2]
-    end
-    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
-        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
-    end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB)
-end
-@testset "3x3 matmul" begin
-    AA = [1 2 3; 4 5 6; 7 8 9] .- 5
-    BB = [1 0 5; 6 -10 3; 2 -4 -1]
-    AAi = AA + (0.5 * im) .* BB
-    BBi = BB + (2.5 * im) .* AA[[2, 1, 3], [2, 3, 1]]
-    for A in (copy(AA), view(AA, 1:3, 1:3)), B in (copy(BB), view(BB, 1:3, 1:3))
-        @test A * B == [-26 38 -27; 1 -4 -6; 28 -46 15]
-        @test *(adjoint(A), B) == [-6 2 -25; 3 -12 -18; 12 -26 -11]
-        @test *(A, adjoint(B)) == [-14 0 6; 4 -3 -3; 22 -6 -12]
-        @test *(adjoint(A), adjoint(B)) == [6 -8 -6; 12 -9 -9; 18 -10 -12]
-    end
-    for Ai in (copy(AAi), view(AAi, 1:3, 1:3)), Bi in (copy(BBi), view(BBi, 1:3, 1:3))
-        @test Ai * Bi == [-44.75+13im 11.75-25im -38.25+30im; -47.75-16.5im -51.5+51.5im -56+6im; 16.75-4.5im -53.5+52im -15.5im]
-        @test *(adjoint(Ai), Bi) == [-21+2im -1.75+49im -51.25+19.5im; 25.5+56.5im -7-35.5im 22+35.5im; -3+12im -32.25+43im -34.75-2.5im]
-        @test *(Ai, adjoint(Bi)) == [-20.25+15.5im -28.75-54.5im 22.25+68.5im; -12.25+13im -15.5+75im -23+27im; 18.25+im 1.5+94.5im -27-54.5im]
-        @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im]
-        @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3]
-    end
-    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
-        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
-    end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB)
-end
-
-# Generic AbstractArrays
-module MyArray15367
-using Test, Random
-
-struct MyArray{T,N} <: AbstractArray{T,N}
-    data::Array{T,N}
-end
-
-Base.size(A::MyArray) = size(A.data)
-Base.getindex(A::MyArray, indices...) = A.data[indices...]
-
-A = MyArray(rand(4, 5))
-b = rand(5)
-@test A * b ≈ A.data * b
-end
-
-@testset "Generic integer matrix multiplication" begin
-    AA = [1 2 3; 4 5 6] .- 3
-    BB = [2 -2; 3 -5; -4 7]
-    for A in (copy(AA), view(AA, 1:2, 1:3)), B in (copy(BB), view(BB, 1:3, 1:2))
-        @test A * B == [-7 9; -4 9]
-        @test *(transpose(A), transpose(B)) == [-6 -11 15; -6 -13 18; -6 -15 21]
-    end
-    AA = fill(1, 2, 100)
-    BB = fill(1, 100, 3)
-    for A in (copy(AA), view(AA, 1:2, 1:100)), B in (copy(BB), view(BB, 1:100, 1:3))
-        @test A * B == [100 100 100; 100 100 100]
-    end
-    AA = rand(1:20, 5, 5) .- 10
-    BB = rand(1:20, 5, 5) .- 10
-    CC = Matrix{Int}(undef, size(AA, 1), size(BB, 2))
-    for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5, 1:5)), C in (copy(CC), view(CC, 1:5, 1:5))
-        @test *(transpose(A), B) == A' * B
-        @test *(A, transpose(B)) == A * B'
-        # Preallocated
-        @test mul!(C, A, B) == A * B
-        @test mul!(C, transpose(A), B) == A' * B
-        @test mul!(C, A, transpose(B)) == A * B'
-        @test mul!(C, transpose(A), transpose(B)) == A' * B'
-        @test LinearAlgebra.mul!(C, adjoint(A), transpose(B)) == A' * transpose(B)
-
-        # Inplace multiply-add
-        α = rand(-10:10)
-        β = rand(-10:10)
-        rand!(C, -10:10)
-        βC = β * C
-        _C0 = copy(C)
-        C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), A, B, α, β) == α * A * B .+ βC
-        @test mul!(C0(), transpose(A), B, α, β) == α * A' * B .+ βC
-        @test mul!(C0(), A, transpose(B), α, β) == α * A * B' .+ βC
-        @test mul!(C0(), transpose(A), transpose(B), α, β) == α * A' * B' .+ βC
-        @test mul!(C0(), adjoint(A), transpose(B), α, β) == α * A' * transpose(B) .+ βC
-
-        #test DimensionMismatch for generic_matmatmul
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(A), transpose(fill(1, 4, 4)))
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(fill(1, 4, 4)), transpose(B))
-    end
-    vv = [1, 2]
-    CC = Matrix{Int}(undef, 2, 2)
-    for v in (copy(vv), view(vv, 1:2)), C in (copy(CC), view(CC, 1:2, 1:2))
-        @test @inferred(mul!(C, v, adjoint(v))) == [1 2; 2 4]
-
-        C .= [1 0; 0 1]
-        @test @inferred(mul!(C, v, adjoint(v), 2, 3)) == [5 4; 4 11]
-    end
-end
-
-@testset "generic_matvecmul" begin
-    AA = rand(5, 5)
-    BB = rand(5)
-    for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5))
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(zeros(6), 'N', A, B)
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(B, 'N', A, zeros(6))
-    end
-    vv = [1, 2, 3]
-    CC = Matrix{Int}(undef, 3, 3)
-    for v in (copy(vv), view(vv, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, v, transpose(v)) == v * v'
-        C .= C0 = rand(-10:10, size(C))
-        @test mul!(C, v, transpose(v), 2, 3) == 2v * v' .+ 3C0
-    end
-    vvf = map(Float64, vv)
-    CC = Matrix{Float64}(undef, 3, 3)
-    for vf in (copy(vvf), view(vvf, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, vf, transpose(vf)) == vf * vf'
-        C .= C0 = rand(eltype(C), size(C))
-        @test mul!(C, vf, transpose(vf), 2, 3) ≈ 2vf * vf' .+ 3C0
-    end
-end
-
-@testset "generic_matvecmul for vectors of vectors" begin
-    @testset "matrix of scalars" begin
-        u = [[1, 2], [3, 4]]
-        A = [1 2; 3 4]
-        v = [[0, 0], [0, 0]]
-        Au = [[7, 10], [15, 22]]
-        @test A * u == Au
-        mul!(v, A, u)
-        @test v == Au
-        mul!(v, A, u, 2, -1)
-        @test v == Au
-    end
-
-    @testset "matrix of matrices" begin
-        u = [[1, 2], [3, 4]]
-        A = Matrix{Matrix{Int}}(undef, 2, 2)
-        A[1, 1] = [1 2; 3 4]
-        A[1, 2] = [5 6; 7 8]
-        A[2, 1] = [9 10; 11 12]
-        A[2, 2] = [13 14; 15 16]
-        v = [[0, 0], [0, 0]]
-        Au = [[44, 64], [124, 144]]
-        @test A * u == Au
-        mul!(v, A, u)
-        @test v == Au
-        mul!(v, A, u, 2, -1)
-        @test v == Au
-    end
-end
-
-@testset "generic_matmatmul for matrices of vectors" begin
-    B = Matrix{Vector{Int}}(undef, 2, 2)
-    B[1, 1] = [1, 2]
-    B[2, 1] = [3, 4]
-    B[1, 2] = [5, 6]
-    B[2, 2] = [7, 8]
-    A = [1 2; 3 4]
-    C = Matrix{Vector{Int}}(undef, 2, 2)
-    AB = Matrix{Vector{Int}}(undef, 2, 2)
-    AB[1, 1] = [7, 10]
-    AB[2, 1] = [15, 22]
-    AB[1, 2] = [19, 22]
-    AB[2, 2] = [43, 50]
-    @test A * B == AB
-    mul!(C, A, B)
-    @test C == AB
-    mul!(C, A, B, 2, -1)
-    @test C == AB
-    LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(2, -1))
-    @test C == AB
-end
-
-@testset "fallbacks & such for BlasFloats" begin
-    AA = rand(Float64, 6, 6)
-    BB = rand(Float64, 6, 6)
-    CC = zeros(Float64, 6, 6)
-    for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
-
-        # Inplace multiply-add
-        α = rand(Float64)
-        β = rand(Float64)
-        rand!(C)
-        βC = β * C
-        _C0 = copy(C)
-        C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), transpose(A), transpose(B), α, β) ≈ α * transpose(A) * transpose(B) .+ βC
-        @test mul!(C0(), A, adjoint(B), α, β) ≈ α * A * transpose(B) .+ βC
-        @test mul!(C0(), adjoint(A), B, α, β) ≈ α * transpose(A) * B .+ βC
-    end
-end
-
-@testset "mixed Blas-non-Blas matmul" begin
-    AA = rand(-10:10, 6, 6)
-    BB = rand(Float64, 6, 6)
-    CC = zeros(Float64, 6, 6)
-    for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, A, B) == A * B
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
-    end
-end
-
-@testset "matrix algebra with subarrays of floats (stride != 1)" begin
-    A = reshape(map(Float64, 1:20), 5, 4)
-    Aref = A[1:2:end, 1:2:end]
-    Asub = view(A, 1:2:5, 1:2:4)
-    b = [1.2, -2.5]
-    @test (Aref * b) == (Asub * b)
-    @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
-    @test *(Asub, transpose(Asub)) == *(Aref, transpose(Aref))
-    Ai = A .+ im
-    Aref = Ai[1:2:end, 1:2:end]
-    Asub = view(Ai, 1:2:5, 1:2:4)
-    @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
-    @test *(Asub, adjoint(Asub)) == *(Aref, adjoint(Aref))
-end
-
-@testset "matrix x matrix with negative stride" begin
-    M = reshape(map(Float64, 1:77), 7, 11)
-    N = reshape(map(Float64, 1:63), 9, 7)
-    U = view(M, 7:-1:1, 11:-2:1)
-    V = view(N, 7:-1:2, 7:-1:1)
-    @test U * V ≈ Matrix(U) * Matrix(V)
-end
-
-@testset "dot product of subarrays of vectors (floats, negative stride, issue #37767)" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = Vector{T}(3:2:7)
-        b = Vector{T}(1:10)
-        v = view(b, 7:-2:3)
-        @test dot(a, Vector(v)) ≈ 67.0
-        @test dot(a, v) ≈ 67.0
-        @test dot(v, a) ≈ 67.0
-        @test dot(Vector(v), Vector(v)) ≈ 83.0
-        @test dot(v, v) ≈ 83.0
-    end
-end
-
-@testset "dot product of stride-vector like input" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        a = randn(T, 10)
-        b = view(a, 1:10)
-        c = reshape(b, 5, 2)
-        d = view(c, :, 1:2)
-        r = sum(abs2, a)
-        for x in (a,b,c,d), y in (a,b,c,d)
-            @test dot(x, y) ≈ r
-        end
-    end
-end
-
-@testset "Complex matrix x real MatOrVec etc (issue #29224)" for T in (Float32, Float64)
-    A0 = randn(complex(T), 10, 10)
-    B0 = randn(T, 10, 10)
-    @testset "Combination Mat{$(complex(T))} Mat{$T}" for Bax1 in (1:5, 2:2:10), Bax2 in (1:5, 2:2:10)
-        B = view(A0, Bax1, Bax2)
-        tB = transpose(B)
-        Bd, tBd = copy(B), copy(tB)
-        for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
-            A = view(A0, Aax1, Aax2)
-            AB_correct = copy(A) * Bd
-            AtB_correct = copy(A) * tBd
-            @test A*Bd ≈ AB_correct # view times matrix
-            @test A*B ≈ AB_correct # view times view
-            @test A*tBd ≈ AtB_correct # view times transposed matrix
-            @test A*tB ≈ AtB_correct # view times transposed view
-        end
-    end
-    x = randn(T, 10)
-    y0 = similar(A0, 20)
-    @testset "Combination Mat{$(complex(T))} Vec{$T}" for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
-        A = view(A0, Aax1, Aax2)
-        Ad = copy(A)
-        for indx in (1:5, 1:2:10, 6:-1:2)
-            vx = view(x, indx)
-            dx = x[indx]
-            Ax_correct = Ad*dx
-            @test A*vx ≈ A*dx ≈ Ad*vx ≈ Ax_correct # view/matrix times view/vector
-            for indy in (1:2:2size(A,1), size(A,1):-1:1)
-                y = view(y0, indy)
-                @test mul!(y, A, vx) ≈ mul!(y, A, dx) ≈ mul!(y, Ad, vx) ≈
-                    mul!(y, Ad, dx) ≈ Ax_correct   # test for uncontiguous dest
-            end
-        end
-    end
-end
-
-@testset "real matrix x complex vec" begin
-    _matmulres(M, v) = [mapreduce(*, +, row, v) for row in eachrow(M)]
-    testmatmul(M, v) = @test M * v ≈ _matmulres(M, v)
-
-    @testset for T in (Float32, Float64), n = (4, 5)
-        M1 = reshape(Vector{T}(1:n^2), n, n)
-        M2 = reinterpret(reshape, T, [Tuple(T(i + j) for j in 1:n) for i in 1:n])
-        v = convert(Vector{Complex{T}}, (1:n) .+ im .* (4 .+ (1:n)))
-
-        for M in (M1, M2)
-            M_view_cont = @view M[:, :]
-            v_view_cont = @view v[:]
-            for _M in (M, M_view_cont), _v in (v, v_view_cont)
-                testmatmul(_M, _v)
-            end
-
-            # construct a view with strides(M, 1) == 1 and strides(M, 2) != 1
-            ax_noncont = 1:2:n
-            n1 = length(ax_noncont)
-            M_view_noncont = @view M[1:n1, ax_noncont]
-            v_view_noncont = @view v[ax_noncont]
-            testmatmul(M_view_noncont, v_view_noncont)
-
-            @testset for op in (transpose, adjoint)
-                for _M in (M, M_view_cont), _v in (v, v_view_cont)
-                    _M2 = op(_M)
-                    testmatmul(_M2, _v)
-                end
-                _M2 = op(M_view_noncont)
-                testmatmul(_M2, v_view_noncont)
-            end
-        end
-    end
-end
-
-@testset "matrix x vector with negative lda or 0 stride" for T in (Float32, Float64)
-    for TA in (T, complex(T)), TB in (T, complex(T))
-        A = view(randn(TA, 10, 10), 1:10, 10:-1:1) # negative lda
-        v = view([randn(TB)], 1 .+ 0(1:10)) # 0 stride
-        Ad, vd = copy(A), copy(v)
-        @test Ad * vd ≈ A * vd ≈ Ad * v ≈ A * v
-    end
-end
-
-@testset "issue #15286" begin
-    A = reshape(map(Float64, 1:20), 5, 4)
-    C = zeros(8, 8)
-    sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64, -9:10), 5, 4)
-    @test mul!(sC, transpose(A), A) == A' * A
-    @test mul!(sC, transpose(A), B) == A' * B
-
-    Aim = A .- im
-    C = zeros(ComplexF64, 8, 8)
-    sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64, -9:10), 5, 4) .+ im
-    @test mul!(sC, adjoint(Aim), Aim) == Aim' * Aim
-    @test mul!(sC, adjoint(Aim), B) == Aim' * B
-end
-
-@testset "syrk & herk" begin
-    AA = reshape(1:1503, 501, 3) .- 750.0
-    res = Float64[135228751 9979252 -115270247; 9979252 10481254 10983256; -115270247 10983256 137236759]
-    for A in (copy(AA), view(AA, 1:501, 1:3))
-        @test *(transpose(A), A) == res
-        @test *(adjoint(A), transpose(copy(A'))) == res
-    end
-    cutoff = 501
-    A = reshape(1:6*cutoff, 2 * cutoff, 3) .- (6 * cutoff) / 2
-    Asub = view(A, 1:2:2*cutoff, 1:3)
-    Aref = A[1:2:2*cutoff, 1:3]
-    @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
-    Ai = A .- im
-    Asub = view(Ai, 1:2:2*cutoff, 1:3)
-    Aref = Ai[1:2:2*cutoff, 1:3]
-    @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
-
-    A5x5, A6x5 = Matrix{Float64}.(undef, ((5, 5), (6, 5)))
-    @test_throws DimensionMismatch LinearAlgebra.syrk_wrapper!(A5x5, 'N', A6x5)
-    @test_throws DimensionMismatch LinearAlgebra.herk_wrapper!(A5x5, 'N', A6x5)
-end
-
-@testset "matmul for types w/o sizeof (issue #1282)" begin
-    AA = fill(complex(1, 1), 10, 10)
-    for A in (copy(AA), view(AA, 1:10, 1:10))
-        A2 = A^2
-        @test A2[1, 1] == 20im
-    end
-end
-
-@testset "mul! (scaling)" begin
-    A5x5, b5, C5x6 = Array{Float64}.(undef, ((5, 5), 5, (5, 6)))
-    for A in (A5x5, view(A5x5, :, :)), b in (b5, view(b5, :)), C in (C5x6, view(C5x6, :, :))
-        @test_throws DimensionMismatch mul!(A, Diagonal(b), C)
-    end
-end
-
-@testset "muladd" begin
-    A23 = reshape(1:6, 2, 3) .+ 0
-    B34 = reshape(1:12, 3, 4) .+ im
-    u2 = [10, 20]
-    v3 = [3, 5, 7] .+ im
-    w4 = [11, 13, 17, 19im]
-
-    @testset "matrix-matrix" begin
-        @test muladd(A23, B34, 0) == A23 * B34
-        @test muladd(A23, B34, 100) == A23 * B34 .+ 100
-        @test muladd(A23, B34, u2) == A23 * B34 .+ u2
-        @test muladd(A23, B34, w4') == A23 * B34 .+ w4'
-        @test_throws DimensionMismatch muladd(B34, A23, 1)
-        @test muladd(ones(1, 3), ones(3, 4), ones(1, 4)) == fill(4.0, 1, 4)
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4))
-
-        # broadcasting fallback method allows trailing dims
-        @test muladd(A23, B34, ones(2, 4, 1)) == A23 * B34 + ones(2, 4, 1)
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4, 1))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(1, 4, 9))
-        # and catches z::Array{T,0}
-        @test muladd(A23, B34, fill(0)) == A23 * B34
-    end
-    @testset "matrix-vector" begin
-        @test muladd(A23, v3, 0) == A23 * v3
-        @test muladd(A23, v3, 100) == A23 * v3 .+ 100
-        @test muladd(A23, v3, u2) == A23 * v3 .+ u2
-        @test muladd(A23, v3, im) isa Vector{Complex{Int}}
-        @test muladd(ones(1, 3), ones(3), ones(1)) == [4]
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7))
-
-        # fallback
-        @test muladd(A23, v3, ones(2, 1, 1)) == A23 * v3 + ones(2, 1, 1)
-        @test_throws DimensionMismatch muladd(A23, v3, ones(2, 2))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7, 1))
-        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(1, 7))
-        @test muladd(A23, v3, fill(0)) == A23 * v3
-    end
-    @testset "adjoint-matrix" begin
-        @test muladd(v3', B34, 0) isa Adjoint
-        @test muladd(v3', B34, 2im) == v3' * B34 .+ 2im
-        @test muladd(v3', B34, w4') == v3' * B34 .+ w4'
-
-        # via fallback
-        @test muladd(v3', B34, ones(1, 4)) == (B34' * v3 + ones(4, 1))'
-        @test_throws DimensionMismatch muladd(v3', B34, ones(7, 4))
-        @test_throws DimensionMismatch muladd(v3', B34, ones(1, 4, 7))
-        @test muladd(v3', B34, fill(0)) == v3' * B34 # does not make an Adjoint
-    end
-    @testset "vector-adjoint" begin
-        @test muladd(u2, v3', 0) isa Matrix
-        @test muladd(u2, v3', 99) == u2 * v3' .+ 99
-        @test muladd(u2, v3', A23) == u2 * v3' .+ A23
-
-        @test muladd(u2, v3', ones(2, 3, 1)) == u2 * v3' + ones(2, 3, 1)
-        @test_throws DimensionMismatch muladd(u2, v3', ones(2, 3, 4))
-        @test_throws DimensionMismatch muladd([1], v3', ones(7, 3))
-        @test muladd(u2, v3', fill(0)) == u2 * v3'
-    end
-    @testset "dot" begin # all use muladd(::Any, ::Any, ::Any)
-        @test muladd(u2', u2, 0) isa Number
-        @test muladd(v3', v3, im) == dot(v3, v3) + im
-        @test muladd(u2', u2, [1]) == [dot(u2, u2) + 1]
-        @test_throws DimensionMismatch muladd(u2', u2, [1, 1]) == [dot(u2, u2) + 1]
-        @test muladd(u2', u2, fill(0)) == dot(u2, u2)
-    end
-    @testset "arrays of arrays" begin
-        vofm = [rand(1:9, 2, 2) for _ in 1:3]
-        Mofm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
-
-        @test muladd(vofm', vofm, vofm[1]) == vofm' * vofm .+ vofm[1] # inner
-        @test muladd(vofm, vofm', Mofm) == vofm * vofm' .+ Mofm       # outer
-        @test muladd(vofm', Mofm, vofm') == vofm' * Mofm .+ vofm'     # bra-mat
-        @test muladd(Mofm, Mofm, vofm) == Mofm * Mofm .+ vofm         # mat-mat
-        @test muladd(Mofm, vofm, vofm) == Mofm * vofm .+ vofm         # mat-vec
-    end
-end
-
-@testset "muladd & structured matrices" begin
-    A33 = reshape(1:9, 3, 3) .+ im
-    v3 = [3, 5, 7im]
-
-    # no special treatment
-    @test muladd(Symmetric(A33), Symmetric(A33), 1) == Symmetric(A33) * Symmetric(A33) .+ 1
-    @test muladd(Hermitian(A33), Hermitian(A33), v3) == Hermitian(A33) * Hermitian(A33) .+ v3
-    @test muladd(adjoint(A33), transpose(A33), A33) == A33' * transpose(A33) .+ A33
-
-    u1 = muladd(UpperTriangular(A33), UpperTriangular(A33), Diagonal(v3))
-    @test u1 isa UpperTriangular
-    @test u1 == UpperTriangular(A33) * UpperTriangular(A33) + Diagonal(v3)
-
-    # diagonal
-    @test muladd(Diagonal(v3), Diagonal(A33), Diagonal(v3)).diag == ([1, 5, 9] .+ im .+ 1) .* v3
-
-    # uniformscaling
-    @test muladd(Diagonal(v3), I, I).diag == v3 .+ 1
-    @test muladd(2 * I, 3 * I, I).λ == 7
-    @test muladd(A33, A33', I) == A33 * A33' + I
-
-    # https://github.com/JuliaLang/julia/issues/38426
-    @test @evalpoly(A33, 1.0 * I, 1.0 * I) == I + A33
-    @test @evalpoly(A33, 1.0 * I, 1.0 * I, 1.0 * I) == I + A33 + A33^2
-end
-
-# issue #6450
-@test dot(Any[1.0, 2.0], Any[3.5, 4.5]) === 12.5
-
-@testset "dot" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    x = convert(Vector{elty}, [1.0, 2.0, 3.0])
-    y = convert(Vector{elty}, [3.5, 4.5, 5.5])
-    @test_throws DimensionMismatch dot(x, 1:2, y, 1:3)
-    @test_throws BoundsError dot(x, 1:4, y, 1:4)
-    @test_throws BoundsError dot(x, 1:3, y, 2:4)
-    @test dot(x, 1:2, y, 1:2) == convert(elty, 12.5)
-    @test transpose(x) * y == convert(elty, 29.0)
-    X = convert(Matrix{elty}, [1.0 2.0; 3.0 4.0])
-    Y = convert(Matrix{elty}, [1.5 2.5; 3.5 4.5])
-    @test dot(X, Y) == convert(elty, 35.0)
-    Z = Matrix{elty}[reshape(1:4, 2, 2), fill(1, 2, 2)]
-    @test dot(Z, Z) == convert(elty, 34.0)
-end
-
-dot1(x, y) = invoke(dot, Tuple{Any,Any}, x, y)
-dot2(x, y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x, y)
-@testset "generic dot" begin
-    AA = [1+2im 3+4im; 5+6im 7+8im]
-    BB = [2+7im 4+1im; 3+8im 6+5im]
-    for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test dot(A, B) == dot(vec(A), vec(B)) == dot1(A, B) == dot2(A, B) == dot(float.(A), float.(B))
-        @test dot(Int[], Int[]) == 0 == dot1(Int[], Int[]) == dot2(Int[], Int[])
-        @test_throws MethodError dot(Any[], Any[])
-        @test_throws MethodError dot1(Any[], Any[])
-        @test_throws MethodError dot2(Any[], Any[])
-        for n1 = 0:2, n2 = 0:2, d in (dot, dot1, dot2)
-            if n1 != n2
-                @test_throws DimensionMismatch d(1:n1, 1:n2)
-            else
-                @test d(1:n1, 1:n2) ≈ norm(1:n1)^2
-            end
-        end
-    end
-end
-
-@testset "Issue 11978" begin
-    A = Matrix{Matrix{Float64}}(undef, 2, 2)
-    A[1, 1] = Matrix(1.0I, 3, 3)
-    A[2, 2] = Matrix(1.0I, 2, 2)
-    A[1, 2] = Matrix(1.0I, 3, 2)
-    A[2, 1] = Matrix(1.0I, 2, 3)
-    b = Vector{Vector{Float64}}(undef, 2)
-    b[1] = fill(1.0, 3)
-    b[2] = fill(1.0, 2)
-    @test A * b == Vector{Float64}[[2, 2, 1], [2, 2]]
-end
-
-@test_throws ArgumentError LinearAlgebra.copytri!(Matrix{Float64}(undef, 10, 10), 'Z')
-
-@testset "Issue 30055" begin
-    B = [1+im 2+im 3+im; 4+im 5+im 6+im; 7+im 9+im im]
-    A = UpperTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    A = LowerTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    B = Matrix{Matrix{Complex{Int}}}(undef, 2, 2)
-    B[1, 1] = [1+im 2+im; 3+im 4+im]
-    B[2, 1] = [1+2im 1+3im; 1+3im 1+4im]
-    B[1, 2] = [7+im 8+2im; 9+3im 4im]
-    B[2, 2] = [9+im 8+im; 7+im 6+im]
-    A = UpperTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-    A = LowerTriangular(B)
-    @test copy(transpose(A)) == transpose(A)
-    @test copy(A') == A'
-end
-
-@testset "gemv! and gemm_wrapper for $elty" for elty in [Float32, Float64, ComplexF64, ComplexF32]
-    A10x10, x10, x11 = Array{elty}.(undef, ((10, 10), 10, 11))
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x10, 'N', A10x10, x11)
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x11, 'N', A10x10, x10)
-    @test LinearAlgebra.gemv!(elty[], 'N', Matrix{elty}(undef, 0, 0), elty[]) == elty[]
-    @test LinearAlgebra.gemv!(x10, 'N', Matrix{elty}(undef, 10, 0), elty[]) == zeros(elty, 10)
-
-    I0x0 = Matrix{elty}(I, 0, 0)
-    I10x10 = Matrix{elty}(I, 10, 10)
-    I10x11 = Matrix{elty}(I, 10, 11)
-    @test LinearAlgebra.gemm_wrapper('N', 'N', I10x10, I10x10) == I10x10
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I10x11, I10x10)
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I0x0, I0x0)
-
-    A = rand(elty, 3, 3)
-    @test LinearAlgebra.matmul3x3('T', 'N', A, Matrix{elty}(I, 3, 3)) == transpose(A)
-end
-
-@testset "#13593, #13488" begin
-    aa = rand(3, 3)
-    bb = rand(3, 3)
-    for a in (copy(aa), view(aa, 1:3, 1:3)), b in (copy(bb), view(bb, 1:3, 1:3))
-        @test_throws ArgumentError mul!(a, a, b)
-        @test_throws ArgumentError mul!(a, b, a)
-        @test_throws ArgumentError mul!(a, a, a)
-    end
-end
-
-@testset "#35163" begin
-    # typemax(Int32) * Int32(1) + Int32(1) * Int32(1) should wrap around
-    # not promote to Int64, convert to Int32 and throw inexacterror
-    val = mul!(Int32[1], fill(typemax(Int32), 1, 1), Int32[1], Int32(1), Int32(1))
-    @test val[1] == typemin(Int32)
-end
-
-# Number types that lack conversion to the destination type
-struct RootInt
-    i::Int
-end
-import Base: *, adjoint, transpose
-import LinearAlgebra: Adjoint, Transpose
-(*)(x::RootInt, y::RootInt) = x.i * y.i
-adjoint(x::RootInt) = x
-transpose(x::RootInt) = x
-Adjoint(x::RootInt) = x
-Transpose(x::RootInt) = x
-# TODO once Adjoint/Transpose constructors call adjoint/transpose recursively
-# rather than Adjoint/Transpose, the additional definitions should become unnecessary
-
-@test Base.promote_op(*, RootInt, RootInt) === Int
-
-@testset "#14293" begin
-    a = [RootInt(3)]
-    C = [0;;]
-    mul!(C, a, transpose(a))
-    @test C[1] == 9
-    C = [1;;]
-    mul!(C, a, transpose(a), 2, 3)
-    @test C[1] == 21
-    a = [RootInt(2), RootInt(10)]
-    @test a * adjoint(a) == [4 20; 20 100]
-    A = [RootInt(3) RootInt(5)]
-    @test A * a == [56]
-end
-
-function test_mul(C, A, B)
-    mul!(C, A, B)
-    @test Array(A) * Array(B) ≈ C
-    @test A * B ≈ C
-
-    # This is similar to how `isapprox` choose `rtol` (when `atol=0`)
-    # but consider all number types involved:
-    rtol = max(rtoldefault.(real.(eltype.((C, A, B))))...)
-
-    rand!(C)
-    T = promote_type(eltype.((A, B))...)
-    α = rand(T)
-    β = rand(T)
-    βArrayC = β * Array(C)
-    βC = β * C
-    mul!(C, A, B, α, β)
-    @test α * Array(A) * Array(B) .+ βArrayC ≈ C rtol = rtol
-    @test α * A * B .+ βC ≈ C rtol = rtol
-end
-
-@testset "mul! vs * for special types" begin
-    eltypes = [Float32, Float64, Int64]
-    for k in [3, 4, 10]
-        T = rand(eltypes)
-        bi1 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
-        bi2 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
-        tri1 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
-        tri2 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
-        stri1 = SymTridiagonal(rand(T, k), rand(T, k - 1))
-        stri2 = SymTridiagonal(rand(T, k), rand(T, k - 1))
-        C = rand(T, k, k)
-        specialmatrices = (bi1, bi2, tri1, tri2, stri1, stri2)
-        for A in specialmatrices
-            B = specialmatrices[rand(1:length(specialmatrices))]
-            test_mul(C, A, B)
-        end
-        for S in specialmatrices
-            l = rand(1:6)
-            B = randn(k, l)
-            C = randn(k, l)
-            test_mul(C, S, B)
-            A = randn(l, k)
-            C = randn(l, k)
-            test_mul(C, A, S)
-        end
-    end
-    for T in eltypes
-        A = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
-        B = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
-        C = randn(2, 2)
-        test_mul(C, A, B)
-        B = randn(2, 9)
-        C = randn(2, 9)
-        test_mul(C, A, B)
-    end
-    let
-        tri44 = Tridiagonal(randn(3), randn(4), randn(3))
-        tri33 = Tridiagonal(randn(2), randn(3), randn(2))
-        full43 = randn(4, 3)
-        full24 = randn(2, 4)
-        full33 = randn(3, 3)
-        full44 = randn(4, 4)
-        @test_throws DimensionMismatch mul!(full43, tri44, tri33)
-        @test_throws DimensionMismatch mul!(full44, tri44, tri33)
-        @test_throws DimensionMismatch mul!(full44, tri44, full43)
-        @test_throws DimensionMismatch mul!(full43, tri33, full43)
-        @test_throws DimensionMismatch mul!(full43, full43, tri44)
-    end
-end
-
-# #18218
-module TestPR18218
-using Test
-import Base.*, Base.+, Base.zero
-struct TypeA
-    x::Int
-end
-Base.convert(::Type{TypeA}, x::Int) = TypeA(x)
-struct TypeB
-    x::Int
-end
-struct TypeC
-    x::Int
-end
-Base.convert(::Type{TypeC}, x::Int) = TypeC(x)
-zero(c::TypeC) = TypeC(0)
-zero(::Type{TypeC}) = TypeC(0)
-(*)(x::Int, a::TypeA) = TypeB(x * a.x)
-(*)(a::TypeA, x::Int) = TypeB(a.x * x)
-(+)(a::Union{TypeB,TypeC}, b::Union{TypeB,TypeC}) = TypeC(a.x + b.x)
-A = TypeA[1 2; 3 4]
-b = [1, 2]
-d = A * b
-@test typeof(d) == Vector{TypeC}
-@test d == TypeC[5, 11]
-end
-
-@testset "VecOrMat of Vectors" begin
-    X = rand(ComplexF64, 3, 3)
-    Xv1 = [X[:, j] for i in 1:1, j in 1:3]
-    Xv2 = [transpose(X[i, :]) for i in 1:3]
-    Xv3 = [transpose(X[i, :]) for i in 1:3, j in 1:1]
-
-    XX = X * X
-    XtX = transpose(X) * X
-    XcX = X' * X
-    XXt = X * transpose(X)
-    XtXt = transpose(XX)
-    XcXt = X' * transpose(X)
-    XXc = X * X'
-    XtXc = transpose(X) * X'
-    XcXc = X' * X'
-
-    @test (Xv1*Xv2)[1] ≈ XX
-    @test (Xv1*Xv3)[1] ≈ XX
-    @test transpose(Xv1) * Xv1 ≈ XtX
-    @test transpose(Xv2) * Xv2 ≈ XtX
-    @test (transpose(Xv3)*Xv3)[1] ≈ XtX
-    @test Xv1' * Xv1 ≈ XcX
-    @test Xv2' * Xv2 ≈ XcX
-    @test (Xv3'*Xv3)[1] ≈ XcX
-    @test (Xv1*transpose(Xv1))[1] ≈ XXt
-    @test Xv2 * transpose(Xv2) ≈ XXt
-    @test Xv3 * transpose(Xv3) ≈ XXt
-    @test transpose(Xv1) * transpose(Xv2) ≈ XtXt
-    @test transpose(Xv1) * transpose(Xv3) ≈ XtXt
-    @test Xv1' * transpose(Xv2) ≈ XcXt
-    @test Xv1' * transpose(Xv3) ≈ XcXt
-    @test (Xv1*Xv1')[1] ≈ XXc
-    @test Xv2 * Xv2' ≈ XXc
-    @test Xv3 * Xv3' ≈ XXc
-    @test transpose(Xv1) * Xv2' ≈ XtXc
-    @test transpose(Xv1) * Xv3' ≈ XtXc
-    @test Xv1' * Xv2' ≈ XcXc
-    @test Xv1' * Xv3' ≈ XcXc
-end
-
-@testset "method ambiguity" begin
-    # Ambiguity test is run inside a clean process.
-    # https://github.com/JuliaLang/julia/issues/28804
-    script = joinpath(@__DIR__, "ambiguous_exec.jl")
-    cmd = `$(Base.julia_cmd()) --startup-file=no $script`
-    @test success(pipeline(cmd; stdout = stdout, stderr = stderr))
-end
-
-struct A32092
-    x::Float64
-end
-Base.:+(x::Float64, a::A32092) = x + a.x
-Base.:*(x::Float64, a::A32092) = x * a.x
-@testset "Issue #32092" begin
-    @test ones(2, 2) * [A32092(1.0), A32092(2.0)] == fill(3.0, (2,))
-end
-
-@testset "strong zero" begin
-    @testset for α in Any[false, 0.0, 0], n in 1:4
-        C = ones(n, n)
-        A = fill!(zeros(n, n), NaN)
-        B = ones(n, n)
-        @test mul!(copy(C), A, B, α, 1.0) == C
-    end
-end
-
-@testset "CartesianIndex handling in _modify!" begin
-    C = rand(10, 10)
-    A = rand(10, 10)
-    @test mul!(view(C, 1:10, 1:10), A, 0.5) == A * 0.5
-end
-
-@testset "Issue #33214: tiled generic mul!" begin
-    n = 100
-    A = rand(n, n)
-    B = rand(n, n)
-    C = zeros(n, n)
-    mul!(C, A, B, -1 + 0im, 0)
-    D = -A * B
-    @test D ≈ C
-
-    # Just in case dispatching on the surface API `mul!` is changed in the future,
-    # let's test the function where the tiled multiplication is defined.
-    fill!(C, 0)
-    LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(-1, 0))
-    @test D ≈ C
-end
-
-@testset "size zero types in matrix mult (see issue 39362)" begin
-    A = [missing missing; missing missing]
-    v = [missing, missing]
-    @test (A * v == v) === missing
-    M = fill(1.0, 2, 2)
-    a = fill(missing, 2, 1)
-    @test (a' * M * a == fill(missing, 1, 1)) === missing
-end
-
-
-@testset "multiplication of empty matrices without calling zero" begin
-    r, c = rand(0:9, 2)
-    A = collect(Number, rand(r, c))
-    B = rand(c, 0)
-    C = A * B
-    @test size(C) == (r, 0)
-    @test_throws MethodError zero(eltype(C))
-end
-
-@testset "Issue #33873: genmatmul! with empty operands" begin
-    @test Matrix{Any}(undef, 0, 2) * Matrix{Any}(undef, 2, 3) == Matrix{Any}(undef, 0, 3)
-    @test_throws MethodError Matrix{Any}(undef, 2, 0) * Matrix{Any}(undef, 0, 3)
-    @test Matrix{Int}(undef, 2, 0) * Matrix{Int}(undef, 0, 3) == zeros(Int, 2, 3)
-end
-
-@testset "3-arg *, order by type" begin
-    x = [1, 2im]
-    y = [im, 20, 30 + 40im]
-    z = [-1, 200 + im, -3]
-    A = [1 2 3im; 4 5 6+im]
-    B = [-10 -20; -30 -40]
-    a = 3 + im * round(Int, 10^6 * (pi - 3))
-    b = 123
-
-    @test x' * A * y == (x' * A) * y == x' * (A * y)
-    @test y' * A' * x == (y' * A') * x == y' * (A' * x)
-    @test y' * transpose(A) * x == (y' * transpose(A)) * x == y' * (transpose(A) * x)
-
-    @test B * A * y == (B * A) * y == B * (A * y)
-
-    @test a * A * y == (a * A) * y == a * (A * y)
-    @test A * y * a == (A * y) * a == A * (y * a)
-
-    @test a * B * A == (a * B) * A == a * (B * A)
-    @test B * A * a == (B * A) * a == B * (A * a)
-
-    @test a * y' * z == (a * y') * z == a * (y' * z)
-    @test y' * z * a == (y' * z) * a == y' * (z * a)
-
-    @test a * y * z' == (a * y) * z' == a * (y * z')
-    @test y * z' * a == (y * z') * a == y * (z' * a)
-
-    @test a * x' * A == (a * x') * A == a * (x' * A)
-    @test x' * A * a == (x' * A) * a == x' * (A * a)
-    @test a * x' * A isa Adjoint{<:Any,<:Vector}
-
-    @test a * transpose(x) * A == (a * transpose(x)) * A == a * (transpose(x) * A)
-    @test transpose(x) * A * a == (transpose(x) * A) * a == transpose(x) * (A * a)
-    @test a * transpose(x) * A isa Transpose{<:Any,<:Vector}
-
-    @test x' * B * A == (x' * B) * A == x' * (B * A)
-    @test x' * B * A isa Adjoint{<:Any,<:Vector}
-
-    @test y * x' * A == (y * x') * A == y * (x' * A)
-    y31 = reshape(y, 3, 1)
-    @test y31 * x' * A == (y31 * x') * A == y31 * (x' * A)
-
-    vm = [rand(1:9, 2, 2) for _ in 1:3]
-    Mm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
-
-    @test vm' * Mm * vm == (vm' * Mm) * vm == vm' * (Mm * vm)
-    @test Mm * Mm' * vm == (Mm * Mm') * vm == Mm * (Mm' * vm)
-    @test vm' * Mm * Mm == (vm' * Mm) * Mm == vm' * (Mm * Mm)
-    @test Mm * Mm' * Mm == (Mm * Mm') * Mm == Mm * (Mm' * Mm)
-end
-
-@testset "3-arg *, order by size" begin
-    M44 = randn(4, 4)
-    M24 = randn(2, 4)
-    M42 = randn(4, 2)
-    @test M44 * M44 * M44 ≈ (M44 * M44) * M44 ≈ M44 * (M44 * M44)
-    @test M42 * M24 * M44 ≈ (M42 * M24) * M44 ≈ M42 * (M24 * M44)
-    @test M44 * M42 * M24 ≈ (M44 * M42) * M24 ≈ M44 * (M42 * M24)
-end
-
-@testset "4-arg *, by type" begin
-    y = [im, 20, 30 + 40im]
-    z = [-1, 200 + im, -3]
-    a = 3 + im * round(Int, 10^6 * (pi - 3))
-    b = 123
-    M = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
-    N = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
-
-    @test a * b * M * y == (a * b) * (M * y)
-    @test a * b * M * N == (a * b) * (M * N)
-    @test a * M * N * y == (a * M) * (N * y)
-    @test a * y' * M * z == (a * y') * (M * z)
-    @test a * y' * M * N == (a * y') * (M * N)
-
-    @test M * y * a * b == (M * y) * (a * b)
-    @test M * N * a * b == (M * N) * (a * b)
-    @test M * N * y * a == (a * M) * (N * y)
-    @test y' * M * z * a == (a * y') * (M * z)
-    @test y' * M * N * a == (a * y') * (M * N)
-
-    @test M * N * conj(M) * y == (M * N) * (conj(M) * y)
-    @test y' * M * N * conj(M) == (y' * M) * (N * conj(M))
-    @test y' * M * N * z == (y' * M) * (N * z)
-end
-
-@testset "4-arg *, by size" begin
-    for shift in 1:5
-        s1, s2, s3, s4, s5 = circshift(3:7, shift)
-        a = randn(s1, s2)
-        b = randn(s2, s3)
-        c = randn(s3, s4)
-        d = randn(s4, s5)
-
-        # _quad_matmul
-        @test *(a, b, c, d) ≈ (a * b) * (c * d)
-
-        # _tri_matmul(A,B,B,δ)
-        @test *(11.1, b, c, d) ≈ (11.1 * b) * (c * d)
-        @test *(a, b, c, 99.9) ≈ (a * b) * (c * 99.9)
-    end
-end
-
-@testset "Issue #46865: mul!() with non-const alpha, beta" begin
-    f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1])
-    alphas = [1.0]
-    betas = [0.5]
-    for d in [2,3,4]  # test native small-matrix cases as well as BLAS
-        A = rand(d,d)
-        B = copy(A)
-        C = copy(A)
-        f!(C, A, B, alphas, betas)
-        @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0
-    end
-end
-
-end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/pinv.jl b/stdlib/LinearAlgebra/test/pinv.jl
deleted file mode 100644
index c7268865a0505..0000000000000
--- a/stdlib/LinearAlgebra/test/pinv.jl
+++ /dev/null
@@ -1,186 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestPinv
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(12345)
-
-function hilb(T::Type, n::Integer)
-    a = Matrix{T}(undef, n, n)
-    for i=1:n
-        for j=1:n
-            a[j,i]=one(T)/(i+j-one(T))
-        end
-    end
-    return a
-end
-hilb(n::Integer) = hilb(Float64,n)
-
-function hilb(T::Type, m::Integer, n::Integer)
-    a = Matrix{T}(undef, m, n)
-    for i=1:n
-        for j=1:m
-            a[j,i]=one(T)/(i+j-one(T))
-        end
-    end
-    return a
-end
-hilb(m::Integer, n::Integer) = hilb(Float64,m,n)
-
-function onediag(T::Type, m::Integer, n::Integer)
-    a=zeros(T,m,n)
-    for i=1:min(n,m)
-        a[i,i]=one(T)/(float(i)^5)
-    end
-    a[1,1] = 0
-    a[min(m,n),min(m,n)] = 0
-    return a
-end
-onediag(m::Integer, n::Integer) = onediag(Float64, m::Integer, n::Integer)
-
-function onediag_sparse(T::Type, n::Integer)
-    a=zeros(T,n)
-    for i=1:n
-        a[i]=one(T)/(float(i)^5)
-    end
-    a[1] = 0
-    a[n] = 0
-    return Diagonal(a)
-end
-onediag_sparse(n::Integer) = onediag_sparse(Float64, n::Integer)
-
-function tridiag(T::Type, m::Integer, n::Integer)
-    a=zeros(T,m,n)
-    for i=1:min(n,m)
-        a[i,i]=one(T)/(float(i)^5)
-    end
-    for i=1:min(n,m)-1
-        a[i+1,i]=2*one(T)/(float(i)^5)
-        a[1,i+1]=2*one(T)/(float(i)^5)
-    end
-    return a
-end
-tridiag(m::Integer, n::Integer) = tridiag(Float64, m::Integer, n::Integer)
-
-function test_pinv(a,tol1,tol2)
-    m,n = size(a)
-
-    apinv = @inferred pinv(a)
-    @test size(apinv) == (n,m)
-    @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol1
-    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol1
-    b = a*randn(n)
-    x = apinv*b
-    @test norm(a*x-b)/norm(b) ≈ 0 atol=tol1
-
-    apinv = @inferred pinv(a,sqrt(eps(real(one(eltype(a))))))
-    @test size(apinv) == (n,m)
-    @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol2
-    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol2
-    b = a*randn(n)
-    x = apinv*b
-    @test norm(a*x-b)/norm(b) ≈ 0 atol=tol2
-end
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64)
-    @testset for (m, n) in [(1000, 100), (100, 100), (100, 1000)]
-        default_tol = (real(one(eltya))) * max(m,n) * 10
-        tol1 = 1e-2
-        tol2 = 1e-5
-        if real(eltya) == Float32
-            tol1 = 1e0
-            tol2 = 1e-2
-        end
-        @testset "dense/ill-conditioned matrix" begin
-            a = hilb(eltya, m, n)
-            test_pinv(a, tol1, tol2)
-        end
-        @testset "dense/diagonal matrix" begin
-            a = onediag(eltya, m, n)
-            test_pinv(a, default_tol, default_tol)
-        end
-        @testset "dense/tri-diagonal matrix" begin
-            a = tridiag(eltya, m, n)
-            test_pinv(a, default_tol, tol2)
-        end
-        @testset "Diagonal matrix" begin
-            a = onediag_sparse(eltya, m)
-            test_pinv(a, default_tol, default_tol)
-        end
-        @testset "Vector" begin
-            a = rand(eltya, m)
-            apinv = @inferred pinv(a)
-            @test pinv(hcat(a)) ≈ apinv
-            @test isa(apinv, eltya <: Complex ? Adjoint{eltya} : Transpose{eltya})
-        end
-        @testset "Adjoint/Transpose vector" begin
-            a = rand(eltya, m)'
-            apinv = @inferred pinv(a)
-            @test pinv(vcat(a)) ≈ apinv
-            @test apinv isa Vector{eltya}
-        end
-    end
-
-    @testset "zero valued numbers/vectors/matrices" begin
-        a = pinv(zero(eltya))
-        @test a ≈ 0.0
-
-        a = pinv([zero(eltya); zero(eltya)])
-        @test a[1] ≈ 0.0
-        @test a[2] ≈ 0.0
-
-        a = pinv([zero(eltya); zero(eltya)]')
-        @test a[1] ≈ 0.0
-        @test a[2] ≈ 0.0
-
-        a = pinv(Diagonal([zero(eltya); zero(eltya)]))
-        @test a.diag[1] ≈ 0.0
-        @test a.diag[2] ≈ 0.0
-    end
-
-    @testset "hermitian matrices" begin
-        Q = ones(2,2)
-        C = pinv(Hermitian(Q))/0.25
-        @test C ≈ ones(2,2)
-    end
-
-    @testset "non-square diagonal matrices" begin
-        A = eltya[1 0 ; 0 1 ; 0 0]
-        B = pinv(A)
-        @test A*B*A ≈ A
-        @test B*A*B ≈ B
-
-        A = eltya[1 0 0 ; 0 1 0]
-        B = pinv(A)
-        @test A*B*A ≈ A
-        @test B*A*B ≈ B
-    end
-
-    if eltya <: LinearAlgebra.BlasReal
-        @testset "sub-normal numbers/vectors/matrices" begin
-            a = pinv(floatmin(eltya)/100)
-            @test a ≈ 0.0
-            # Complex subnormal
-            a = pinv(floatmin(eltya)/100*(1+1im))
-            @test a ≈ 0.0
-
-            a = pinv([floatmin(eltya); floatmin(eltya)]/100)
-            @test a[1] ≈ 0.0
-            @test a[2] ≈ 0.0
-            # Complex subnormal
-            a = pinv([floatmin(eltya); floatmin(eltya)]/100*(1+1im))
-            @test a[1] ≈ 0.0
-            @test a[2] ≈ 0.0
-            a = pinv(Diagonal([floatmin(eltya); floatmin(eltya)]/100))
-            @test a.diag[1] ≈ 0.0
-            @test a.diag[2] ≈ 0.0
-            # Complex subnormal
-            a = pinv(Diagonal([floatmin(eltya); floatmin(eltya)]/100*(1+1im)))
-            @test a.diag[1] ≈ 0.0
-            @test a.diag[2] ≈ 0.0
-        end
-    end
-end
-
-end # module TestPinv
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
deleted file mode 100644
index 184971da304f7..0000000000000
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ /dev/null
@@ -1,507 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestQR
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted, rmul!, lmul!
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234325)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-a2real = randn(n,n)/2
-a2img  = randn(n,n)/2
-breal = randn(n,2)/2
-bimg  = randn(n,2)/2
-
-# helper functions to unambiguously recover explicit forms of an implicit QR Q
-squareQ(Q::LinearAlgebra.AbstractQ) = Q*I
-rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    raw_a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    raw_a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    asym = raw_a' + raw_a                  # symmetric indefinite
-    apd  = raw_a' * raw_a                 # symmetric positive-definite
-    ε = εa = eps(abs(float(one(eltya))))
-
-    @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        raw_b = eltyb == Int ? rand(1:5, n, 2) : convert(Matrix{eltyb}, eltyb <: Complex ? complex.(breal, bimg) : breal)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa, εb)
-        tab = promote_type(eltya, eltyb)
-
-        @testset "QR decomposition of a Number" begin
-            α = rand(eltyb)
-            aα = fill(α, 1, 1)
-            @test qr(α).Q * qr(α).R ≈ qr(aα).Q * qr(aα).R
-            @test abs(qr(α).Q[1,1]) ≈ one(eltyb)
-        end
-
-        for (a, b) in ((raw_a, raw_b),
-               (view(raw_a, 1:n-1, 1:n-1), view(raw_b, 1:n-1, 1)))
-            a_1 = size(a, 1)
-            @testset "QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a)
-                q, r  = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
-                @test q'*Matrix(1.0I, a_1, a_1)' ≈ squareQ(q)'
-                @test squareQ(q)'q ≈ Matrix(I, a_1, a_1)
-                @test Matrix(1.0I, a_1, a_1)'q' ≈ squareQ(q)'
-                @test q*r ≈ a
-                @test a*(qra\b) ≈ b atol=3000ε
-                @test Array(qra) ≈ a
-                sq = size(q.factors, 2)
-                @test *(Matrix{eltyb}(I, sq, sq), adjoint(q)) * squareQ(q) ≈ Matrix(I, sq, sq) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab}, q))
-                    ac = copy(a)
-                    @test qr!(a[:, 1:5])\b == qr!(view(ac, :, 1:5))\b
-                end
-                qrstring = sprint((t, s) -> show(t, "text/plain", s), qra)
-                rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
-                qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
-                @test qrstring == "$(summary(qra))\nQ factor: $qstring\nR factor:\n$rstring"
-                # iterate
-                q, r = qra
-                @test q*r ≈ a
-                # property names
-                @test Base.propertynames(qra)       == (:R, :Q)
-            end
-            @testset "Thin QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a[:, 1:n1], NoPivot())
-                q,r   = qra.Q, qra.R
-                @test_throws ErrorException qra.Z
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q'*rectangularQ(q) ≈ Matrix(I, a_1, n1)
-                @test q*r ≈ a[:, 1:n1]
-                @test q*b[1:n1] ≈ rectangularQ(q)*b[1:n1] atol=100ε
-                @test q*b ≈ squareQ(q)*b atol=100ε
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ rectangularQ(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1 + 1]
-                @test_throws DimensionMismatch b[1:n1 + 1]*q'
-                sq = size(q.factors, 2)
-                @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                # iterate
-                q, r = qra
-                @test q*r ≈ a[:, 1:n1]
-                # property names
-                @test Base.propertynames(qra)       == (:R, :Q)
-            end
-            @testset "(Automatic) Fat (pivoted) QR decomposition" begin
-                @inferred qr(a, ColumnNorm())
-
-                qrpa  = factorize(a[1:n1,:])
-                q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
-                p = qrpa.p
-                @test q'*squareQ(q) ≈ Matrix(I, n1, n1)
-                @test q*squareQ(q)' ≈ Matrix(I, n1, n1)
-                sq = size(q, 2);
-                @test (UpperTriangular(Matrix{eltya}(I, sq, sq))*q')*squareQ(q) ≈ Matrix(I, n1, n1)
-                @test q*r ≈ (isa(qrpa,QRPivoted) ? a[1:n1,p] : a[1:n1,:])
-                @test q*r[:,invperm(p)] ≈ a[1:n1,:]
-                @test q*r*transpose(qrpa.P) ≈ a[1:n1,:]
-                @test a[1:n1,:]*(qrpa\b[1:n1]) ≈ b[1:n1] atol=5000ε
-                @test Array(qrpa) ≈ a[1:5,:]
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1+1]
-                @test_throws DimensionMismatch b[1:n1+1]*q'
-                if eltya != Int
-                    @test Matrix{eltyb}(I, n1, n1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                # iterate
-                q, r, p = qrpa
-                @test q*r[:,invperm(p)] ≈ a[1:n1,:]
-                # property names
-                @test Base.propertynames(qrpa)       == (:R, :Q, :p, :P)
-            end
-            @testset "(Automatic) Thin (pivoted) QR decomposition" begin
-                qrpa  = factorize(a[:,1:n1])
-                q,r = qrpa.Q, qrpa.R
-                @test_throws ErrorException qrpa.Z
-                p = qrpa.p
-                @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
-                @test q*squareQ(q)' ≈ Matrix(I, a_1, a_1)
-                @test q*r ≈ a[:,p]
-                @test q*r[:,invperm(p)] ≈ a[:,1:n1]
-                @test Array(qrpa) ≈ a[:,1:5]
-                if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
-                end
-                @test_throws DimensionMismatch q*b[1:n1+1]
-                @test_throws DimensionMismatch b[1:n1+1]*q'
-                sq = size(q.factors, 2)
-                @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
-                if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
-                end
-                qrstring = sprint((t, s) -> show(t, "text/plain", s), qrpa)
-                rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
-                qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
-                pstring  = sprint((t, s) -> show(t, "text/plain", s), p)
-                @test qrstring == "$(summary(qrpa))\nQ factor: $qstring\nR factor:\n$rstring\npermutation:\n$pstring"
-                # iterate
-                q, r, p = qrpa
-                @test q*r[:,invperm(p)] ≈ a[:,1:n1]
-                # property names
-                @test Base.propertynames(qrpa)       == (:R, :Q, :p, :P)
-            end
-        end
-        if eltya != Int
-            @testset "Matmul with QR factorizations" begin
-                a = raw_a
-                qrpa = factorize(a[:,1:n1])
-                q, r = qrpa.Q, qrpa.R
-                @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-                @test rmul!(squareQ(q), adjoint(q)) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1), adjoint(q))
-                @test_throws ErrorException size(q,-1)
-                @test_throws DimensionMismatch LinearAlgebra.lmul!(q,zeros(eltya,n1+1))
-                @test_throws DimensionMismatch LinearAlgebra.lmul!(adjoint(q), zeros(eltya,n1+1))
-
-                b = similar(a); rand!(b)
-                c = similar(a)
-                d = similar(a[:,1:n1])
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(d, q, r) ≈ q*r ≈ a[:,qrpa.p]
-                @test mul!(c, q', b) ≈ q'*b
-                @test mul!(d, q', a[:,qrpa.p])[1:n1,:] ≈ r
-                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
-                @test mul!(c, b, q) ≈ b*q
-                @test mul!(c, b, q') ≈ b*q'
-                @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
-
-                qra = qr(a[:,1:n1], NoPivot())
-                q, r = qra.Q, qra.R
-                @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
-                @test rmul!(squareQ(q), adjoint(q)) ≈ Matrix(I, n, n)
-                @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),adjoint(q))
-                @test_throws ErrorException size(q,-1)
-                @test_throws DimensionMismatch q * Matrix{Int8}(I, n+4, n+4)
-
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(d, q, r) ≈ a[:,1:n1]
-                @test mul!(c, q', b) ≈ q'*b
-                @test mul!(d, q', a[:,1:n1])[1:n1,:] ≈ r
-                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
-                @test mul!(c, b, q) ≈ b*q
-                @test mul!(c, b, q') ≈ b*q'
-                @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
-
-                b = similar(a[:,1]); rand!(b)
-                c = similar(a[:,1])
-                d = similar(a[:,1])
-                @test mul!(c, q, b) ≈ q*b
-                @test mul!(c, q', b) ≈ q'*b
-                @test_throws DimensionMismatch mul!(Vector{eltya}(undef, n+1), q, b)
-            end
-        end
-    end
-end
-
-@testset "transpose errors" begin
-    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3)))
-    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3), NoPivot()))
-    @test_throws ArgumentError transpose(qr(big.(randn(ComplexF64,3,3))))
-end
-
-@testset "Issue 7304" begin
-    A = [-√.5 -√.5; -√.5 √.5]
-    Q = rectangularQ(qr(A).Q)
-    @test norm(A-Q) < eps()
-end
-
-@testset "qr on AbstractVector" begin
-    vr = [3.0, 4.0]
-    for Tr in (Float32, Float64)
-        for T in (Tr, Complex{Tr})
-            v = convert(Vector{T}, vr)
-            nv, nm = qr(v)
-            @test norm(nv*Matrix(I, (2,2)) - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
-            @test nm == fill(-5.0, 1, 1)
-        end
-    end
-end
-
-@testset "QR on Ints" begin
-    # not sure what to do about this edge case now that we build decompositions
-    # for qr(...), so for now just commenting this out
-    # @test qr(Int[]) == (Int[],1)
-
-    B = rand(7,2)
-    @test (1:7)\B ≈ Vector(1:7)\B
-end
-
-@testset "Issue 16520" begin
-    @test_throws DimensionMismatch rand(3,2)\(1:5)
-end
-
-@testset "Issue 22810" begin
-    A = zeros(1, 2)
-    B = zeros(1, 1)
-    @test A \ B == zeros(2, 1)
-    @test qr(A, ColumnNorm()) \ B == zeros(2, 1)
-end
-
-@testset "Issue 24107" begin
-    A = rand(200,2)
-    @test A \ range(0, stop=1, length=200) == A \ Vector(range(0, stop=1, length=200))
-end
-
-@testset "Issue 24589. Promotion of rational matrices" begin
-    A = rand(1//1:5//5, 4,3)
-    @test Matrix(first(qr(A))) == Matrix(first(qr(float(A))))
-end
-
-@testset "Issue Test Factorization fallbacks for rectangular problems" begin
-    A  = randn(3,2)
-    Ac = copy(A')
-    b  = randn(3)
-    b0 = copy(b)
-    c  = randn(2)
-    B  = randn(3,3)
-    B0 = copy(B)
-    C  = randn(2,3)
-    @test A \b ≈ ldiv!(c, qr(A ), b)
-    @test b == b0
-    @test A \B ≈ ldiv!(C, qr(A ), B)
-    @test B == B0
-    c0 = copy(c)
-    C0 = copy(C)
-    @test Ac\c ≈ ldiv!(b, qr(Ac, ColumnNorm()), c)
-    @test c0 == c
-    @test Ac\C ≈ ldiv!(B, qr(Ac, ColumnNorm()), C)
-    @test C0 == C
-end
-
-@testset "Issue reflector of zero-length vector" begin
-    a = [2.0]
-    x = view(a,1:0)
-    τ = LinearAlgebra.reflector!(view(x,1:0))
-    @test τ == 0.0
-
-    b = reshape([3.0],1,1)
-    @test isempty(LinearAlgebra.reflectorApply!(x, τ, view(b,1:0,:)))
-    @test b[1] == 3.0
-end
-
-@testset "det(Q::Union{QRCompactWYQ, QRPackedQ})" begin
-    # 40 is the number larger than the default block size 36 of QRCompactWY
-    @testset for n in [1:3; 40], m in [1:3; 40], pivot in (NoPivot(), ColumnNorm())
-        @testset "real" begin
-            @testset for k in 0:min(n, m, 5)
-                A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
-                Q, = qr(A, pivot)
-                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
-                @test abs(det(Q)) ≈ 1
-            end
-        end
-        @testset "complex" begin
-            @testset for k in 0:min(n, m, 5)
-                A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
-                Q, = qr(A, pivot)
-                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
-                @test abs(det(Q)) ≈ 1
-            end
-        end
-    end
-end
-
-@testset "inv(::AbstractQ)" begin
-    for T in (Float64, ComplexF64)
-        Q = qr(randn(T,5,5)).Q
-        @test inv(Q) === Q'
-        @test inv(Q)' === inv(Q') === Q
-    end
-end
-
-@testset "QR factorization of Q" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        Q1, R1 = qr(randn(T,5,5))
-        Q2, R2 = qr(Q1)
-        @test Matrix(Q1) ≈ Matrix(Q2)
-        @test R2 ≈ I
-    end
-end
-
-@testset "Generation of orthogonal matrices" begin
-    for T in (Float32, Float64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        O = Q * Diagonal(sign.(diag(R)))
-        @test O' * O ≈ I
-    end
-end
-
-@testset "Multiplication of Q by special matrices" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        Qmat = Matrix(Q)
-        D = Diagonal(randn(T,n))
-        @test Q * D ≈ Qmat * D
-        @test D * Q ≈ D * Qmat
-        J = 2*I
-        @test Q * J ≈ Qmat * J
-        @test J * Q ≈ J * Qmat
-    end
-end
-
-@testset "copyto! for Q" begin
-    for T in (Float32, Float64, ComplexF32, ComplexF64)
-        n = 5
-        Q, R = qr(randn(T,n,n))
-        Qmat = Matrix(Q)
-        dest1 = Matrix{T}(undef, size(Q))
-        copyto!(dest1, Q)
-        @test dest1 ≈ Qmat
-        dest2 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (1, 2))
-        copyto!(dest2, Q)
-        @test dest2 ≈ Qmat
-        dest3 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (2, 1))
-        copyto!(dest3, Q)
-        @test dest3 ≈ Qmat
-    end
-end
-
-@testset "adjoint of QR" begin
-    n = 5
-    B = randn(5, 2)
-
-    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
-        @testset "size(A)=$(size(A))" for A in (
-            randn(n, n),
-            # Wide problems become minimum norm (in x) problems similarly to LQ
-            randn(n + 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            @testset "QRCompactWY" begin
-                F = qr(A)
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-
-            @testset "QR" begin
-                F = LinearAlgebra.qrfactUnblocked!(copy(A))
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-
-            @testset "QRPivoted" begin
-                F = LinearAlgebra.qr(A, ColumnNorm())
-                x = F'\b
-                @test x ≈ A'\b
-                @test length(size(x)) == length(size(b))
-            end
-        end
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n))'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1))'\b
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    LinearAlgebra.qrfactUnblocked!(randn(n - 2, n))'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") LinearAlgebra.qrfactUnblocked!(randn(n, n + 1))'\b
-        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n), ColumnNorm())'\b
-        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1), ColumnNorm())'\b
-    end
-end
-
-@testset "issue #38974" begin
-    A = qr(ones(3, 1))
-    B = I(3)
-    C = B*A.Q'
-    @test C ≈ A.Q * Matrix(I, 3, 3)
-    @test A.Q' * B ≈ A.Q * Matrix(I, 3, 3)
-end
-
-@testset "convert between eltypes" begin
-    a = rand(Float64, 10, 5)
-    qra = qr(a)
-    qrwy = LinearAlgebra.QRCompactWY{Float32}(qra.factors, qra.T)
-    @test Array(qrwy) ≈ Array(qr(Float32.(a)))
-    @test eltype(qrwy.factors) == eltype(qrwy.T) == Float32
-    qra = qr(a, ColumnNorm())
-    qrp = QRPivoted{Float32}(qra.factors, qra.τ, qra.jpvt)
-    @test Array(qrp) ≈ Array(qr(Float32.(a), ColumnNorm()))
-    @test eltype(qrp.factors) == eltype(qrp.τ) == Float32
-    a = rand(Float16, 10, 5)
-    qra = qr(a)
-    qrnonblas = QR{ComplexF16}(qra.factors, qra.τ)
-    @test Array(qrnonblas) ≈ Array(qr(ComplexF16.(a)))
-    @test eltype(qrnonblas.factors) == eltype(qrnonblas.τ) == ComplexF16
-end
-
-# We use approximate equals to get MKL.jl tests to pass.
-@testset "optimized getindex for an AbstractQ" begin
-    for T in [Float64, ComplexF64]
-        Q = qr(rand(T, 4, 4))
-        Q2 = Q.Q
-        M = Matrix(Q2)
-        for j in axes(M, 2)
-            @test Q2[:, j] ≈ M[:, j]
-            for i in axes(M, 1)
-                @test Q2[i, :] ≈ M[i, :]
-                @test Q2[i, j] ≈ M[i, j]
-            end
-        end
-        @test Q2[:] ≈ M[:]
-        @test Q2[:, :] ≈ M[:, :]
-        @test Q2[:, :, :] ≈ M[:, :, :]
-    end
-    # Check that getindex works if copy returns itself (#44729)
-    struct MyIdentity{T} <: LinearAlgebra.AbstractQ{T} end
-    Base.size(::MyIdentity, dim::Integer) = dim in (1,2) ? 2 : 1
-    Base.size(::MyIdentity) = (2, 2)
-    Base.copy(J::MyIdentity) = J
-    LinearAlgebra.lmul!(::MyIdentity{T}, M::Array{T}) where {T} = M
-    @test MyIdentity{Float64}()[1,:] == [1.0, 0.0]
-end
-
-@testset "issue #48911" begin
-    # testcase in the original issue
-    # test ldiv!(::QRPivoted, ::AbstractVector)
-    A = Complex{BigFloat}[1+im 1-im]
-    b = Complex{BigFloat}[3+im]
-    x = A\b
-    AF = Complex{Float64}[1+im 1-im]
-    bf = Complex{Float64}[3+im]
-    xf = AF\bf
-    @test x ≈ xf
-
-    # test ldiv!(::QRPivoted, ::AbstractVector)
-    A = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    b = Complex{BigFloat}[1+im; 0]
-    x = A\b
-    AF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    bf = Complex{Float64}[1+im; 0]
-    xf = AF\bf
-    @test x ≈ xf
-
-    # test ldiv!(::QRPivoted, ::AbstractMatrix)
-    C = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    D = Complex{BigFloat}[1+im 1-im; 0 0]
-    x = C\D
-    CF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
-    DF = Complex{Float64}[1+im 1-im; 0 0]
-    xf = CF\DF
-    @test x ≈ xf
-end
-
-end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
deleted file mode 100644
index c9a5d92dbdae8..0000000000000
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ /dev/null
@@ -1,221 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSchur
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-
-n = 10
-
-# Split n into 2 parts for tests needing two matrices
-n1 = div(n, 2)
-n2 = 2*n1
-
-Random.seed!(1234321)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = a' + a                 # symmetric indefinite
-    apd  = a' * a                 # symmetric positive-definite
-    for (a, asym, apd) in ((a, asym, apd),
-                           (view(a, 1:n, 1:n),
-                            view(asym, 1:n, 1:n),
-                            view(apd, 1:n, 1:n)))
-        ε = εa = eps(abs(float(one(eltya))))
-
-        d,v = eigen(a)
-        f   = schur(a)
-        @test f.vectors*f.Schur*f.vectors' ≈ a
-        @test sort(real(f.values)) ≈ sort(real(d))
-        @test sort(imag(f.values)) ≈ sort(imag(d))
-        @test istriu(f.Schur) || eltype(a)<:Real
-        @test convert(Array, f) ≈ a
-        @test_throws ErrorException f.A
-
-        sch, vecs, vals = schur(UpperTriangular(triu(a)))
-        @test vecs*sch*vecs' ≈ triu(a)
-        sch, vecs, vals = schur(UnitUpperTriangular(triu(a)))
-        @test vecs*sch*vecs' ≈ UnitUpperTriangular(triu(a))
-        sch, vecs, vals = schur(LowerTriangular(tril(a)))
-        @test vecs*sch*vecs' ≈ tril(a)
-        sch, vecs, vals = schur(UnitLowerTriangular(tril(a)))
-        @test vecs*sch*vecs' ≈ UnitLowerTriangular(tril(a))
-        sch, vecs, vals = schur(Hermitian(asym))
-        @test vecs*sch*vecs' ≈ asym
-        sch, vecs, vals = schur(Symmetric(a + transpose(a)))
-        @test vecs*sch*vecs' ≈ a + transpose(a)
-        sch, vecs, vals = schur(Tridiagonal(a + transpose(a)))
-        @test vecs*sch*vecs' ≈ Tridiagonal(a + transpose(a))
-        sch, vecs, vals = schur(Bidiagonal(a, :U))
-        @test vecs*sch*vecs' ≈ Bidiagonal(a, :U)
-        sch, vecs, vals = schur(Bidiagonal(a, :L))
-        @test vecs*sch*vecs' ≈ Bidiagonal(a, :L)
-
-        tstring = sprint((t, s) -> show(t, "text/plain", s), f.T)
-        zstring = sprint((t, s) -> show(t, "text/plain", s), f.Z)
-        vstring = sprint((t, s) -> show(t, "text/plain", s), f.values)
-        fstring = sprint((t, s) -> show(t, "text/plain", s), f)
-        @test fstring == "$(summary(f))\nT factor:\n$tstring\nZ factor:\n$(zstring)\neigenvalues:\n$vstring"
-        @testset "Reorder Schur" begin
-            # use asym for real schur to enforce tridiag structure
-            # avoiding partly selection of conj. eigenvalues
-            ordschura = eltya <: Complex ? a : asym
-            S = schur(ordschura)
-            select = bitrand(n)
-            O = ordschur(S, select)
-            sum(select) != 0 && @test S.values[findall(select)] ≈ O.values[1:sum(select)]
-            @test O.vectors*O.Schur*O.vectors' ≈ ordschura
-            @test_throws ErrorException f.A
-            Snew = LinearAlgebra.Schur(S.T, S.Z, S.values)
-            SchurNew = ordschur!(copy(Snew), select)
-            @test O.vectors ≈ SchurNew.vectors
-            @test O.Schur ≈ SchurNew.Schur
-        end
-
-        if isa(a, Array)
-            a1_sf = a[1:n1, 1:n1]
-            a2_sf = a[n1+1:n2, n1+1:n2]
-        else
-            a1_sf = view(a, 1:n1, 1:n1)
-            a2_sf = view(a, n1+1:n2, n1+1:n2)
-        end
-        @testset "Generalized Schur" begin
-            f = schur(a1_sf, a2_sf)
-            @test f.Q*f.S*f.Z' ≈ a1_sf
-            @test f.Q*f.T*f.Z' ≈ a2_sf
-            @test istriu(f.S) || eltype(a)<:Real
-            @test istriu(f.T) || eltype(a)<:Real
-            @test_throws ErrorException f.A
-
-            sstring = sprint((t, s) -> show(t, "text/plain", s), f.S)
-            tstring = sprint((t, s) -> show(t, "text/plain", s), f.T)
-            qstring = sprint((t, s) -> show(t, "text/plain", s), f.Q)
-            zstring = sprint((t, s) -> show(t, "text/plain", s), f.Z)
-            αstring = sprint((t, s) -> show(t, "text/plain", s), f.α)
-            βstring = sprint((t, s) -> show(t, "text/plain", s), f.β)
-            fstring = sprint((t, s) -> show(t, "text/plain", s), f)
-            @test fstring == "$(summary(f))\nS factor:\n$sstring\nT factor:\n$(tstring)\nQ factor:\n$(qstring)\nZ factor:\n$(zstring)\nα:\n$αstring\nβ:\n$βstring"
-        end
-        @testset "Reorder Generalized Schur" begin
-            NS = schur(a1_sf, a2_sf)
-            # Currently just testing with selecting gen eig values < 1
-            select = abs2.(NS.values) .< 1
-            m = sum(select)
-            S = ordschur(NS, select)
-            # Make sure that the new factorization still factors matrix
-            @test S.Q*S.S*S.Z' ≈ a1_sf
-            @test S.Q*S.T*S.Z' ≈ a2_sf
-            # Make sure that we have sorted it correctly
-            @test NS.values[findall(select)] ≈ S.values[1:m]
-
-            Snew = LinearAlgebra.GeneralizedSchur(NS.S, NS.T, NS.alpha, NS.beta, NS.Q, NS.Z)
-            SchurNew = ordschur!(copy(Snew), select)
-            @test S.Q ≈ SchurNew.Q
-            @test S.S ≈ SchurNew.S
-            @test S.T ≈ SchurNew.T
-            @test S.Z ≈ SchurNew.Z
-            @test S.alpha ≈ SchurNew.alpha
-            @test S.beta  ≈ SchurNew.beta
-            sS,sT,sQ,sZ = schur(a1_sf,a2_sf)
-            @test NS.Q ≈ sQ
-            @test NS.T ≈ sT
-            @test NS.S ≈ sS
-            @test NS.Z ≈ sZ
-        end
-    end
-    @testset "0x0 matrix" for A in (zeros(eltya, 0, 0), view(rand(eltya, 2, 2), 1:0, 1:0))
-        T, Z, λ = LinearAlgebra.schur(A)
-        @test T == A
-        @test Z == A
-        @test λ == zeros(0)
-    end
-
-    if eltya <: Real
-        @testset "quasitriangular to triangular" begin
-            S = schur(a)
-            SC = Schur{Complex}(S)
-            @test eltype(SC) == complex(eltype(S))
-            @test istriu(SC.T)
-            @test SC.Z*SC.Z' ≈ I
-            @test SC.Z*SC.T*SC.Z' ≈ a
-            @test sort(SC.values,by=LinearAlgebra.eigsortby) ≈ sort(S.values,by=LinearAlgebra.eigsortby)
-            @test Schur{Complex}(SC) === SC === Schur{eltype(SC)}(SC)
-            @test Schur{eltype(S)}(S) === S
-            if eltype(S) === Float32
-                S64 = Schur{Float64}(S)
-                @test eltype(S64) == Float64
-                @test S64.Z == S.Z
-                @test S64.T == S.T
-                @test S64.values == S.values
-            end
-        end
-    end
-
-    @testset "0x0 $eltya matrices" begin
-        A = zeros(eltya, 0, 0)
-        B = zeros(eltya, 0, 0)
-        S = LinearAlgebra.schur(A, B)
-        @test S.S == A
-        @test S.T == A
-        @test S.Q == A
-        @test S.Z == A
-        @test S.alpha == zeros(0)
-        @test S.beta == zeros(0)
-    end
-end
-
-@testset "Generalized Schur convergence" begin
-    # Check for convergence issues, #40279
-    problematic_pencils = [
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493963 -6.625127119626611 0.0 0.0 0.0 0.0 0.0 -1.0; -3.312563559813306 3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 3.7796350217469814 0.0 0.0 -3.3125635598133054 0.0 0.0 0.0 -1.0 6.418270043493963 0.0 0.0 -6.625127119626611 0.0 0.0; 0.0 0.0 0.0 3.779635021746982 -3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493964 -6.625127119626612 0.0 -1.0 0.0; 0.0 0.0 0.0 -3.3125635598133054 3.7796350217469814 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626611 6.418270043493963 -1.0 0.0 0.0; 0.0 0.0 -3.312563559813306 0.0 0.0 3.779635021746982 0.0 0.0 0.0 0.0 -6.625127119626612 0.0 -1.0 6.418270043493964 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 -1.0 0.0 0.0 6.418270043493963 -6.625127119626611; 0.0 0.0 0.0 0.0 0.0 0.0 -3.312563559813306 3.779635021746982 -1.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 0.0 0.0 3.312563559813306 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.779635021746982 3.3125635598133054 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.312563559813306 -3.7796350217469814 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 0.0 0.0 -3.779635021746982 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 -1.0 0.0 0.0 0.0 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 -1.0 -2.62 0.0 0.0 0.0 0.0 0.0; 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0; 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0; 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 0.0 0.0 0.0 0.0 -1.0; -0.10323794456968927 0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.33748484079831426 0.0 0.0 -0.10323794456968927 0.0 0.0 0.0 -1.0 -2.5940303184033713 0.0 0.0 -0.20647588913937853 0.0 0.0; 0.0 0.0 0.0 0.3374848407983142 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 -1.0 0.0; 0.0 0.0 0.0 -0.10323794456968927 0.33748484079831426 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0; 0.0 0.0 -0.10323794456968927 0.0 0.0 0.3374848407983142 0.0 0.0 0.0 0.0 -0.20647588913937853 0.0 -1.0 -2.5940303184033713 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 -1.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853; 0.0 0.0 0.0 0.0 0.0 0.0 -0.10323794456968927 0.3374848407983142 -1.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.0 0.0 0.10323794456968927 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3374848407983142 0.10323794456968927 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.33748484079831426 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 0.0 0.0 -0.3374848407983142 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 1.7391668762048442 -1.309613611600033 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.619227223200066 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0; -1.3096136116000332 1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.6192272232000664 2.150333752409688 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.739166876204844 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 -1.0 2.150333752409688 0.0 0.0 -2.6192272232000664 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 -1.0 0.0 0.0 2.150333752409688 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0; 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 2.150333752409688 -1.0 0.0 0.0 0.0 -2.619227223200066; 0.0 0.0 -1.309613611600033 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 -1.0 2.150333752409688 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.739166876204844 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.6192272232000664 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.309613611600033 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 2.150333752409688 -1.0 0.0; 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 0.0 0.0 -1.0 2.150333752409688 0.0; 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0 -1.0 0.0 0.0 2.150333752409688],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 -1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 0.0 0.0 1.309613611600033 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0 1.3096136116000332; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 1.309613611600033 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 -1.7391668762048442 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0 -1.739166876204844]
-        ),
-        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007; 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788; -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248],
-            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624]
-        )]
-
-    for (A, B) in problematic_pencils
-        f = schur(A, B)
-        @test f.Q*f.S*f.Z' ≈ A
-        @test f.Q*f.T*f.Z' ≈ B
-    end
-end
-
-@testset "adjoint and transpose for schur (#40941)" begin
-    A = rand(3, 3)
-    B = schur(A', A)
-    C = B.left*B.S*B.right'
-    D = schur(transpose(A), A)
-    E = D.left*D.S*D.right'
-    @test A' ≈ C ≈ E
-end
-
-@testset "UpperHessenberg schur" begin
-    A = UpperHessenberg(rand(ComplexF64, 100, 100))
-    B = Array(A)
-    fact1 = schur(A)
-    fact2 = schur(B)
-    @test fact1.values ≈ fact2.values
-    @test fact1.Z * fact1.T * fact1.Z' ≈ B
-
-    A = UpperHessenberg(rand(Int32, 50, 50))
-    B = Array(A)
-    fact1 = schur(A)
-    fact2 = schur(B)
-    @test fact1.values ≈ fact2.values
-    @test fact1.Z * fact1.T * fact1.Z' ≈ B
-end
-
-end # module TestSchur
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
deleted file mode 100644
index eaa297e05d957..0000000000000
--- a/stdlib/LinearAlgebra/test/special.jl
+++ /dev/null
@@ -1,538 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSpecial
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: rmul!
-
-n= 10 #Size of matrix to test
-Random.seed!(1)
-
-@testset "Interconversion between special matrix types" begin
-    a = [1.0:n;]
-    A = Diagonal(a)
-    @testset for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Tridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-       @test Matrix(convert(newtype, Diagonal(GenericArray(a)))) == Matrix(A)
-    end
-
-    @testset for isupper in (true, false)
-        A = Bidiagonal(a, [1.0:n-1;], ifelse(isupper, :U, :L))
-        for newtype in [Bidiagonal, Tridiagonal, Matrix]
-           @test Matrix(convert(newtype, A)) == Matrix(A)
-           @test Matrix(newtype(A)) == Matrix(A)
-        end
-        @test_throws ArgumentError convert(SymTridiagonal, A)
-        tritype = isupper ? UpperTriangular : LowerTriangular
-        @test Matrix(tritype(A)) == Matrix(A)
-
-        A = Bidiagonal(a, zeros(n-1), ifelse(isupper, :U, :L)) #morally Diagonal
-        for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Tridiagonal, Matrix]
-           @test Matrix(convert(newtype, A)) == Matrix(A)
-           @test Matrix(newtype(A)) == Matrix(A)
-        end
-        @test Matrix(tritype(A)) == Matrix(A)
-    end
-
-    A = SymTridiagonal(a, [1.0:n-1;])
-    for newtype in [Tridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    for newtype in [Diagonal, Bidiagonal]
-       @test_throws ArgumentError convert(newtype,A)
-    end
-    A = SymTridiagonal(a, zeros(n-1))
-    @test Matrix(convert(Bidiagonal,A)) == Matrix(A)
-
-    A = Tridiagonal(zeros(n-1), [1.0:n;], zeros(n-1)) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = Tridiagonal(fill(1., n-1), [1.0:n;], fill(1., n-1)) #not morally Diagonal
-    for newtype in [SymTridiagonal, Matrix]
-       @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    for newtype in [Diagonal, Bidiagonal]
-        @test_throws ArgumentError convert(newtype,A)
-    end
-    A = Tridiagonal(zeros(n-1), [1.0:n;], fill(1., n-1)) #not morally Diagonal
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = UpperTriangular(Tridiagonal(zeros(n-1), [1.0:n;], fill(1., n-1)))
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = Tridiagonal(fill(1., n-1), [1.0:n;], zeros(n-1)) #not morally Diagonal
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    A = LowerTriangular(Tridiagonal(fill(1., n-1), [1.0:n;], zeros(n-1)))
-    @test Matrix(convert(Bidiagonal, A)) == Matrix(A)
-    @test_throws ArgumentError convert(SymTridiagonal,A)
-
-    A = LowerTriangular(Matrix(Diagonal(a))) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, LowerTriangular, Matrix]
-        @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = UpperTriangular(Matrix(Diagonal(a))) #morally Diagonal
-    for newtype in [Diagonal, Bidiagonal, SymTridiagonal, UpperTriangular, Matrix]
-        @test Matrix(convert(newtype, A)) == Matrix(A)
-    end
-    A = UpperTriangular(triu(rand(n,n)))
-    for newtype in [Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal]
-        @test_throws ArgumentError convert(newtype,A)
-    end
-
-
-    # test operations/constructors (not conversions) permitted in the docs
-    dl = [1., 1.]
-    d = [-2., -2., -2.]
-    T = Tridiagonal(dl, d, -dl)
-    S = SymTridiagonal(d, dl)
-    Bu = Bidiagonal(d, dl, :U)
-    Bl = Bidiagonal(d, dl, :L)
-    D = Diagonal(d)
-    M = [-2. 0. 0.; 1. -2. 0.; -1. 1. -2.]
-    U = UpperTriangular(M)
-    L = LowerTriangular(Matrix(M'))
-
-    for A in (T, S, Bu, Bl, D, U, L, M)
-        Adense = Matrix(A)
-        B = Symmetric(A)
-        Bdense = Matrix(B)
-        for (C,Cdense) in ((A,Adense), (B,Bdense))
-            @test Diagonal(C) == Diagonal(Cdense)
-            @test Bidiagonal(C, :U) == Bidiagonal(Cdense, :U)
-            @test Bidiagonal(C, :L) == Bidiagonal(Cdense, :L)
-            @test Tridiagonal(C) == Tridiagonal(Cdense)
-            @test UpperTriangular(C) == UpperTriangular(Cdense)
-            @test LowerTriangular(C) == LowerTriangular(Cdense)
-        end
-    end
-
-    @testset "Matrix constructor for !isa(zero(T), T)" begin
-        # the following models JuMP.jl's VariableRef and AffExpr, resp.
-        struct TypeWithoutZero end
-        struct TypeWithZero end
-        Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
-        Base.convert(::Type{TypeWithZero}, ::TypeWithoutZero) = TypeWithZero()
-        Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
-        LinearAlgebra.symmetric(::TypeWithoutZero, ::Symbol) = TypeWithoutZero()
-        Base.transpose(::TypeWithoutZero) = TypeWithoutZero()
-        d  = fill(TypeWithoutZero(), 3)
-        du = fill(TypeWithoutZero(), 2)
-        dl = fill(TypeWithoutZero(), 2)
-        D  = Diagonal(d)
-        Bu = Bidiagonal(d, du, :U)
-        Bl = Bidiagonal(d, dl, :L)
-        Tri = Tridiagonal(dl, d, du)
-        Sym = SymTridiagonal(d, dl)
-        for M in (D, Bu, Bl, Tri, Sym)
-            @test Matrix(M) == zeros(TypeWithZero, 3, 3)
-        end
-    end
-end
-
-@testset "Binary ops among special types" begin
-    a=[1.0:n;]
-    A=Diagonal(a)
-    Spectypes = [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-    for (idx, type1) in enumerate(Spectypes)
-        for type2 in Spectypes
-           B = convert(type1,A)
-           C = convert(type2,A)
-           @test Matrix(B + C) ≈ Matrix(A + A)
-           @test Matrix(B - C) ≈ Matrix(A - A)
-       end
-    end
-    B = SymTridiagonal(a, fill(1., n-1))
-    for Spectype in [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-        @test Matrix(B + convert(Spectype,A)) ≈ Matrix(B + A)
-        @test Matrix(convert(Spectype,A) + B) ≈ Matrix(B + A)
-        @test Matrix(B - convert(Spectype,A)) ≈ Matrix(B - A)
-        @test Matrix(convert(Spectype,A) - B) ≈ Matrix(A - B)
-    end
-
-    C = rand(n,n)
-    for TriType in [LinearAlgebra.UnitLowerTriangular, LinearAlgebra.UnitUpperTriangular, UpperTriangular, LowerTriangular]
-        D = TriType(C)
-        for Spectype in [Diagonal, Bidiagonal, Tridiagonal, Matrix]
-            @test Matrix(D + convert(Spectype,A)) ≈ Matrix(D + A)
-            @test Matrix(convert(Spectype,A) + D) ≈ Matrix(A + D)
-            @test Matrix(D - convert(Spectype,A)) ≈ Matrix(D - A)
-            @test Matrix(convert(Spectype,A) - D) ≈ Matrix(A - D)
-        end
-    end
-
-    UpTri = UpperTriangular(rand(20,20))
-    LoTri = LowerTriangular(rand(20,20))
-    Diag = Diagonal(rand(20,20))
-    Tridiag = Tridiagonal(rand(20, 20))
-    UpBi = Bidiagonal(rand(20,20), :U)
-    LoBi = Bidiagonal(rand(20,20), :L)
-    Sym = SymTridiagonal(rand(20), rand(19))
-    Dense = rand(20, 20)
-    mats = Any[UpTri, LoTri, Diag, Tridiag, UpBi, LoBi, Sym, Dense]
-
-    for op in (+,-,*)
-        for A in mats
-            for B in mats
-                @test (op)(A, B) ≈ (op)(Matrix(A), Matrix(B)) ≈ Matrix((op)(A, B))
-            end
-        end
-    end
-end
-
-@testset "+ and - among structured matrices with different container types" begin
-    diag = 1:5
-    offdiag = 1:4
-    uniformscalingmats = [UniformScaling(3), UniformScaling(1.0), UniformScaling(3//5), UniformScaling(ComplexF64(1.3, 3.5))]
-    mats = Any[Diagonal(diag), Bidiagonal(diag, offdiag, 'U'), Bidiagonal(diag, offdiag, 'L'), Tridiagonal(offdiag, diag, offdiag), SymTridiagonal(diag, offdiag)]
-    for T in [ComplexF64, Int64, Rational{Int64}, Float64]
-        push!(mats, Diagonal(Vector{T}(diag)))
-        push!(mats, Bidiagonal(Vector{T}(diag), Vector{T}(offdiag), 'U'))
-        push!(mats, Bidiagonal(Vector{T}(diag), Vector{T}(offdiag), 'L'))
-        push!(mats, Tridiagonal(Vector{T}(offdiag), Vector{T}(diag), Vector{T}(offdiag)))
-        push!(mats, SymTridiagonal(Vector{T}(diag), Vector{T}(offdiag)))
-    end
-
-    for op in (+,-,*)
-        for A in mats
-            for B in mats
-                @test (op)(A, B) ≈ (op)(Matrix(A), Matrix(B)) ≈ Matrix((op)(A, B))
-            end
-        end
-    end
-    for op in (+,-)
-        for A in mats
-            for B in uniformscalingmats
-                @test (op)(A, B) ≈ (op)(Matrix(A), B) ≈ Matrix((op)(A, B))
-                @test (op)(B, A) ≈ (op)(B, Matrix(A)) ≈ Matrix((op)(B, A))
-            end
-        end
-    end
-    diag = [randn(ComplexF64, 2, 2) for _ in 1:3]
-    odiag = [randn(ComplexF64, 2, 2) for _ in 1:2]
-    for A in (Diagonal(diag),
-                Bidiagonal(diag, odiag, :U),
-                Bidiagonal(diag, odiag, :L),
-                Tridiagonal(odiag, diag, odiag),
-                SymTridiagonal(diag, odiag)), B in uniformscalingmats
-        @test (A + B)::typeof(A) == (B + A)::typeof(A)
-        @test (A - B)::typeof(A) == ((A + (-B))::typeof(A))
-        @test (B - A)::typeof(A) == ((B + (-A))::typeof(A))
-    end
-end
-
-
-@testset "Triangular Types and QR" begin
-    for typ in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        a = rand(n,n)
-        atri = typ(a)
-        matri = Matrix(atri)
-        b = rand(n,n)
-        for pivot in (ColumnNorm(), NoPivot())
-            qrb = qr(b, pivot)
-            @test atri * qrb.Q ≈ matri * qrb.Q
-            @test atri * qrb.Q' ≈ matri * qrb.Q'
-            @test qrb.Q * atri ≈ qrb.Q * matri
-            @test qrb.Q' * atri ≈ qrb.Q' * matri
-        end
-    end
-end
-
-@testset "Multiplication of Qs" begin
-    for pivot in (ColumnNorm(), NoPivot()), A in (rand(5, 3), rand(5, 5), rand(3, 5))
-        Q = qr(A, pivot).Q
-        m = size(A, 1)
-        C = Matrix{Float64}(undef, (m, m))
-        @test Q*Q ≈ (Q*I) * (Q*I) ≈ mul!(C, Q, Q)
-        @test size(Q*Q) == (m, m)
-        @test Q'Q ≈ (Q'*I) * (Q*I) ≈ mul!(C, Q', Q)
-        @test size(Q'Q) == (m, m)
-        @test Q*Q' ≈ (Q*I) * (Q'*I) ≈ mul!(C, Q, Q')
-        @test size(Q*Q') == (m, m)
-        @test Q'Q' ≈ (Q'*I) * (Q'*I) ≈ mul!(C, Q', Q')
-        @test size(Q'Q') == (m, m)
-    end
-end
-
-@testset "concatenations of combinations of special and other matrix types" begin
-    N = 4
-    # Test concatenating pairwise combinations of special matrices
-    diagmat = Diagonal(1:N)
-    bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
-    tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
-    symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    specialmats = (diagmat, bidiagmat, tridiagmat, symtridiagmat)
-    for specialmata in specialmats, specialmatb in specialmats
-        MA = Matrix(specialmata); MB = Matrix(specialmatb)
-        @test hcat(specialmata, specialmatb) == hcat(MA, MB)
-        @test vcat(specialmata, specialmatb) == vcat(MA, MB)
-        @test hvcat((1,1), specialmata, specialmatb) == hvcat((1,1), MA, MB)
-        @test cat(specialmata, specialmatb; dims=(1,2)) == cat(MA, MB; dims=(1,2))
-    end
-    # Test concatenating pairwise combinations of special matrices with dense matrices or dense vectors
-    densevec = fill(1., N)
-    densemat = diagm(0 => densevec)
-    for specialmat in specialmats
-        SM = Matrix(specialmat)
-        # --> Tests applicable only to pairs of matrices
-        @test vcat(specialmat, densemat) == vcat(SM, densemat)
-        @test vcat(densemat, specialmat) == vcat(densemat, SM)
-        # --> Tests applicable also to pairs including vectors
-        for specialmat in specialmats, othermatorvec in (densemat, densevec)
-            SM = Matrix(specialmat); OM = Array(othermatorvec)
-            @test hcat(specialmat, othermatorvec) == hcat(SM, OM)
-            @test hcat(othermatorvec, specialmat) == hcat(OM, SM)
-            @test hvcat((2,), specialmat, othermatorvec) == hvcat((2,), SM, OM)
-            @test hvcat((2,), othermatorvec, specialmat) == hvcat((2,), OM, SM)
-            @test cat(specialmat, othermatorvec; dims=(1,2)) == cat(SM, OM; dims=(1,2))
-            @test cat(othermatorvec, specialmat; dims=(1,2)) == cat(OM, SM; dims=(1,2))
-        end
-    end
-end
-
-@testset "concatenations of annotated types" begin
-    N = 4
-    # The tested annotation types
-    testfull = Base.get_bool_env("JULIA_TESTFULL", false)
-    utriannotations = (UpperTriangular, UnitUpperTriangular)
-    ltriannotations = (LowerTriangular, UnitLowerTriangular)
-    triannotations = (utriannotations..., ltriannotations...)
-    symannotations = (Symmetric, Hermitian)
-    annotations = testfull ? (triannotations..., symannotations...) : (LowerTriangular, Symmetric)
-    # Concatenations involving these types, un/annotated
-    diagmat = Diagonal(1:N)
-    bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
-    tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
-    symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    specialconcatmats = testfull ? (diagmat, bidiagmat, tridiagmat, symtridiagmat) : (diagmat,)
-    # Concatenations involving strictly these types, un/annotated
-    densevec = fill(1., N)
-    densemat = fill(1., N, N)
-    # Annotated collections
-    annodmats = [annot(densemat) for annot in annotations]
-    annospcmats = [annot(spcmat) for annot in annotations, spcmat in specialconcatmats]
-    # Test concatenations of pairwise combinations of annotated special matrices
-    for annospcmata in annospcmats, annospcmatb in annospcmats
-        AM = Array(annospcmata); BM = Array(annospcmatb)
-        @test vcat(annospcmata, annospcmatb) == vcat(AM, BM)
-        @test hcat(annospcmata, annospcmatb) == hcat(AM, BM)
-        @test hvcat((2,), annospcmata, annospcmatb) == hvcat((2,), AM, BM)
-        @test cat(annospcmata, annospcmatb; dims=(1,2)) == cat(AM, BM; dims=(1,2))
-    end
-    # Test concatenations of pairwise combinations of annotated special matrices and other matrix/vector types
-    for annospcmat in annospcmats
-        AM = Array(annospcmat)
-        # --> Tests applicable to pairs including only matrices
-        for othermat in (densemat, annodmats..., specialconcatmats...)
-            OM = Array(othermat)
-            @test vcat(annospcmat, othermat) == vcat(AM, OM)
-            @test vcat(othermat, annospcmat) == vcat(OM, AM)
-        end
-        # --> Tests applicable to pairs including other vectors or matrices
-        for other in (densevec, densemat, annodmats..., specialconcatmats...)
-            OM = Array(other)
-            @test hcat(annospcmat, other) == hcat(AM, OM)
-            @test hcat(other, annospcmat) == hcat(OM, AM)
-            @test hvcat((2,), annospcmat, other) == hvcat((2,), AM, OM)
-            @test hvcat((2,), other, annospcmat) == hvcat((2,), OM, AM)
-            @test cat(annospcmat, other; dims=(1,2)) == cat(AM, OM; dims=(1,2))
-            @test cat(other, annospcmat; dims=(1,2)) == cat(OM, AM; dims=(1,2))
-        end
-    end
-    # Test concatenations strictly involving un/annotated dense matrices/vectors
-    for densemata in (densemat, annodmats...)
-        AM = Array(densemata)
-        # --> Tests applicable to pairs including only matrices
-        for densematb in (densemat, annodmats...)
-            BM = Array(densematb)
-            @test vcat(densemata, densematb) == vcat(AM, BM)
-            @test vcat(densematb, densemata) == vcat(BM, AM)
-        end
-        # --> Tests applicable to pairs including vectors or matrices
-        for otherdense in (densevec, densemat, annodmats...)
-            OM = Array(otherdense)
-            @test hcat(densemata, otherdense) == hcat(AM, OM)
-            @test hcat(otherdense, densemata) == hcat(OM, AM)
-            @test hvcat((2,), densemata, otherdense) == hvcat((2,), AM, OM)
-            @test hvcat((2,), otherdense, densemata) == hvcat((2,), OM, AM)
-            @test cat(densemata, otherdense; dims=(1,2)) == cat(AM, OM; dims=(1,2))
-            @test cat(otherdense, densemata; dims=(1,2)) == cat(OM, AM; dims=(1,2))
-        end
-    end
-end
-
-# for testing types with a dimension
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-
-@testset "zero and one for structured matrices" begin
-    for elty in (Int64, Float64, ComplexF64)
-        D = Diagonal(rand(elty, 10))
-        Bu = Bidiagonal(rand(elty, 10), rand(elty, 9), 'U')
-        Bl = Bidiagonal(rand(elty, 10), rand(elty, 9), 'L')
-        T = Tridiagonal(rand(elty, 9),rand(elty, 10), rand(elty, 9))
-        S = SymTridiagonal(rand(elty, 10), rand(elty, 9))
-        mats = Any[D, Bu, Bl, T, S]
-        for A in mats
-            @test iszero(zero(A))
-            @test isone(one(A))
-            @test zero(A) == zero(Matrix(A))
-            @test one(A) == one(Matrix(A))
-        end
-
-        @test zero(D) isa Diagonal
-        @test one(D) isa Diagonal
-
-        @test zero(Bu) isa Bidiagonal
-        @test one(Bu) isa Bidiagonal
-        @test zero(Bl) isa Bidiagonal
-        @test one(Bl) isa Bidiagonal
-        @test zero(Bu).uplo == one(Bu).uplo == Bu.uplo
-        @test zero(Bl).uplo == one(Bl).uplo == Bl.uplo
-
-        @test zero(T) isa Tridiagonal
-        @test one(T) isa Tridiagonal
-        @test zero(S) isa SymTridiagonal
-        @test one(S) isa SymTridiagonal
-    end
-
-    # ranges
-    D = Diagonal(1:10)
-    Bu = Bidiagonal(1:10, 1:9, 'U')
-    Bl = Bidiagonal(1:10, 1:9, 'L')
-    T = Tridiagonal(1:9, 1:10, 1:9)
-    S = SymTridiagonal(1:10, 1:9)
-    mats = [D, Bu, Bl, T, S]
-    for A in mats
-        @test iszero(zero(A))
-        @test isone(one(A))
-        @test zero(A) == zero(Matrix(A))
-        @test one(A) == one(Matrix(A))
-    end
-
-    @test zero(D) isa Diagonal
-    @test one(D) isa Diagonal
-
-    @test zero(Bu) isa Bidiagonal
-    @test one(Bu) isa Bidiagonal
-    @test zero(Bl) isa Bidiagonal
-    @test one(Bl) isa Bidiagonal
-    @test zero(Bu).uplo == one(Bu).uplo == Bu.uplo
-    @test zero(Bl).uplo == one(Bl).uplo == Bl.uplo
-
-    @test zero(T) isa Tridiagonal
-    @test one(T) isa Tridiagonal
-    @test zero(S) isa SymTridiagonal
-    @test one(S) isa SymTridiagonal
-
-    # eltype with dimensions
-    D0 = Diagonal{Furlong{0, Int64}}([1, 2, 3, 4])
-    Bu0 = Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'U')
-    Bl0 =  Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'L')
-    T0 = Tridiagonal{Furlong{0, Int64}}([1, 2, 3], [1, 2, 3, 4], [1, 2, 3])
-    S0 = SymTridiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3])
-    F2 = Furlongs.Furlong{2}(1)
-    D2 = Diagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2)
-    Bu2 = Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'U')
-    Bl2 =  Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'L')
-    T2 = Tridiagonal{Furlong{2, Int64}}([1, 2, 3].*F2, [1, 2, 3, 4].*F2, [1, 2, 3].*F2)
-    S2 = SymTridiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2)
-    mats = Any[D0, Bu0, Bl0, T0, S0, D2, Bu2, Bl2, T2, S2]
-    for A in mats
-        @test iszero(zero(A))
-        @test isone(one(A))
-        @test zero(A) == zero(Matrix(A))
-        @test one(A) == one(Matrix(A))
-        @test eltype(one(A)) == typeof(one(eltype(A)))
-    end
-end
-
-@testset "== for structured matrices" begin
-    diag = rand(10)
-    offdiag = rand(9)
-    D = Diagonal(rand(10))
-    Bup = Bidiagonal(diag, offdiag, 'U')
-    Blo = Bidiagonal(diag, offdiag, 'L')
-    Bupd = Bidiagonal(diag, zeros(9), 'U')
-    Blod = Bidiagonal(diag, zeros(9), 'L')
-    T = Tridiagonal(offdiag, diag, offdiag)
-    Td = Tridiagonal(zeros(9), diag, zeros(9))
-    Tu = Tridiagonal(zeros(9), diag, offdiag)
-    Tl = Tridiagonal(offdiag, diag, zeros(9))
-    S = SymTridiagonal(diag, offdiag)
-    Sd = SymTridiagonal(diag, zeros(9))
-
-    mats = [D, Bup, Blo, Bupd, Blod, T, Td, Tu, Tl, S, Sd]
-
-    for a in mats
-        for b in mats
-            @test (a == b) == (Matrix(a) == Matrix(b)) == (b == a) == (Matrix(b) == Matrix(a))
-        end
-    end
-end
-
-@testset "BiTriSym*Q' and Q'*BiTriSym" begin
-    dl = [1, 1, 1]
-    d = [1, 1, 1, 1]
-    D = Diagonal(d)
-    Bi = Bidiagonal(d, dl, :L)
-    Tri = Tridiagonal(dl, d, dl)
-    Sym = SymTridiagonal(d, dl)
-    F = qr(ones(4, 1))
-    A = F.Q'
-    for A in (F.Q, F.Q'), B in (D, Bi, Tri, Sym)
-        @test B*A ≈ Matrix(B)*A
-        @test A*B ≈ A*Matrix(B)
-    end
-end
-
-@testset "Ops on SymTridiagonal ev has the same length as dv" begin
-    x = rand(3)
-    y = rand(3)
-    z = rand(2)
-
-    S = SymTridiagonal(x, y)
-    T = Tridiagonal(z, x, z)
-    Bu = Bidiagonal(x, z, :U)
-    Bl = Bidiagonal(x, z, :L)
-
-    Ms = Matrix(S)
-    Mt = Matrix(T)
-    Mbu = Matrix(Bu)
-    Mbl = Matrix(Bl)
-
-    @test S + T ≈ Ms + Mt
-    @test T + S ≈ Mt + Ms
-    @test S + Bu ≈ Ms + Mbu
-    @test Bu + S ≈ Mbu + Ms
-    @test S + Bl ≈ Ms + Mbl
-    @test Bl + S ≈ Mbl + Ms
-end
-
-@testset "Ensure Strided * (Sym)Tridiagonal is Dense" begin
-    x = rand(3)
-    y = rand(3)
-    z = rand(2)
-
-    l = rand(12, 12)
-    # strided but not a Matrix
-    v = @view l[1:4:end, 1:4:end]
-    M_v = Matrix(v)
-    m = rand(3, 3)
-
-    S = SymTridiagonal(x, y)
-    T = Tridiagonal(z, x, z)
-    M_S = Matrix(S)
-    M_T = Matrix(T)
-
-    @test m * T ≈ m * M_T
-    @test m * S ≈ m * M_S
-    @test v * T ≈ M_v * T
-    @test v * S ≈ M_v * S
-
-    @test m * T isa Matrix
-    @test m * S isa Matrix
-    @test v * T isa Matrix
-    @test v * S isa Matrix
-end
-
-end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
deleted file mode 100644
index 2ca1904b2ff2d..0000000000000
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ /dev/null
@@ -1,241 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestStructuredBroadcast
-using Test, LinearAlgebra
-
-@testset "broadcast[!] over combinations of scalars, structured matrices, and dense vectors/matrices" begin
-    N = 10
-    s = rand()
-    fV = rand(N)
-    fA = rand(N, N)
-    Z = copy(fA)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    U = UpperTriangular(rand(N,N))
-    L = LowerTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-    structuredarrays = (D, B, T, U, L, M)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test (Q = broadcast(sin, X); typeof(Q) == typeof(X) && Q == broadcast(sin, fX))
-        @test broadcast!(sin, Z, X) == broadcast(sin, fX)
-        @test (Q = broadcast(cos, X); Q isa Matrix && Q == broadcast(cos, fX))
-        @test broadcast!(cos, Z, X) == broadcast(cos, fX)
-        @test (Q = broadcast(*, s, X); typeof(Q) == typeof(X) && Q == broadcast(*, s, fX))
-        @test broadcast!(*, Z, s, X) == broadcast(*, s, fX)
-        @test (Q = broadcast(+, fV, fA, X); Q isa Matrix && Q == broadcast(+, fV, fA, fX))
-        @test broadcast!(+, Z, fV, fA, X) == broadcast(+, fV, fA, fX)
-        @test (Q = broadcast(*, s, fV, fA, X); Q isa Matrix && Q == broadcast(*, s, fV, fA, fX))
-        @test broadcast!(*, Z, s, fV, fA, X) == broadcast(*, s, fV, fA, fX)
-
-        @test X .* 2.0 == X .* (2.0,) == fX .* 2.0
-        @test X .* 2.0 isa typeof(X)
-        @test X .* (2.0,) isa typeof(X)
-        @test isequal(X .* Inf, fX .* Inf)
-
-        two = 2
-        @test X .^ 2 ==  X .^ (2,) == fX .^ 2 == X .^ two
-        @test X .^ 2 isa typeof(X)
-        @test X .^ (2,) isa typeof(X)
-        @test X .^ two isa typeof(X)
-        @test X .^ 0 == fX .^ 0
-        @test X .^ -1 == fX .^ -1
-
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test broadcast(+, X, Y) == broadcast(+, fX, fY)
-            @test broadcast!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test broadcast(*, X, Y) == broadcast(*, fX, fY)
-            @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-    diagonals = (D, B, T)
-    fdiagonals = map(Array, diagonals)
-    for (X, fX) in zip(diagonals, fdiagonals)
-        for (Y, fY) in zip(diagonals, fdiagonals)
-            @test broadcast(+, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(+, fX, fY)
-            @test broadcast!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test broadcast(*, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(*, fX, fY)
-            @test broadcast!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-end
-
-@testset "broadcast! where the destination is a structured matrix" begin
-    N = 5
-    A = rand(N, N)
-    sA = A + copy(A')
-    D = Diagonal(rand(N))
-    Bu = Bidiagonal(rand(N), rand(N - 1), :U)
-    Bl = Bidiagonal(rand(N), rand(N - 1), :L)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    ◣ = LowerTriangular(rand(N,N))
-    ◥ = UpperTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-
-    @test broadcast!(sin, copy(D), D) == Diagonal(sin.(D))
-    @test broadcast!(sin, copy(Bu), Bu) == Bidiagonal(sin.(Bu), :U)
-    @test broadcast!(sin, copy(Bl), Bl) == Bidiagonal(sin.(Bl), :L)
-    @test broadcast!(sin, copy(T), T) == Tridiagonal(sin.(T))
-    @test broadcast!(sin, copy(◣), ◣) == LowerTriangular(sin.(◣))
-    @test broadcast!(sin, copy(◥), ◥) == UpperTriangular(sin.(◥))
-    @test broadcast!(sin, copy(M), M) == Matrix(sin.(M))
-    @test broadcast!(*, copy(D), D, A) == Diagonal(broadcast(*, D, A))
-    @test broadcast!(*, copy(Bu), Bu, A) == Bidiagonal(broadcast(*, Bu, A), :U)
-    @test broadcast!(*, copy(Bl), Bl, A) == Bidiagonal(broadcast(*, Bl, A), :L)
-    @test broadcast!(*, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
-    @test broadcast!(*, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
-    @test broadcast!(*, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
-    @test broadcast!(*, copy(M), M, A) == Matrix(broadcast(*, M, A))
-
-    @test_throws ArgumentError broadcast!(cos, copy(D), D) == Diagonal(sin.(D))
-    @test_throws ArgumentError broadcast!(cos, copy(Bu), Bu) == Bidiagonal(sin.(Bu), :U)
-    @test_throws ArgumentError broadcast!(cos, copy(Bl), Bl) == Bidiagonal(sin.(Bl), :L)
-    @test_throws ArgumentError broadcast!(cos, copy(T), T) == Tridiagonal(sin.(T))
-    @test_throws ArgumentError broadcast!(cos, copy(◣), ◣) == LowerTriangular(sin.(◣))
-    @test_throws ArgumentError broadcast!(cos, copy(◥), ◥) == UpperTriangular(sin.(◥))
-    @test_throws ArgumentError broadcast!(+, copy(D), D, A) == Diagonal(broadcast(*, D, A))
-    @test_throws ArgumentError broadcast!(+, copy(Bu), Bu, A) == Bidiagonal(broadcast(*, Bu, A), :U)
-    @test_throws ArgumentError broadcast!(+, copy(Bl), Bl, A) == Bidiagonal(broadcast(*, Bl, A), :L)
-    @test_throws ArgumentError broadcast!(+, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
-    @test_throws ArgumentError broadcast!(+, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
-    @test_throws ArgumentError broadcast!(+, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
-    @test_throws ArgumentError broadcast!(*, copy(◥), ◣, 2)
-    @test_throws ArgumentError broadcast!(*, copy(Bu), Bl, 2)
-end
-
-@testset "map[!] over combinations of structured matrices" begin
-    N = 10
-    fA = rand(N, N)
-    Z = copy(fA)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    U = UpperTriangular(rand(N,N))
-    L = LowerTriangular(rand(N,N))
-    M = Matrix(rand(N,N))
-    structuredarrays = (M, D, B, T, U, L)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test (Q = map(sin, X); typeof(Q) == typeof(X) && Q == map(sin, fX))
-        @test map!(sin, Z, X) == map(sin, fX)
-        @test (Q = map(cos, X); Q isa Matrix && Q == map(cos, fX))
-        @test map!(cos, Z, X) == map(cos, fX)
-        @test (Q = map(+, fA, X); Q isa Matrix && Q == map(+, fA, fX))
-        @test map!(+, Z, fA, X) == map(+, fA, fX)
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test map(+, X, Y) == map(+, fX, fY)
-            @test map!(+, Z, X, Y) == map(+, fX, fY)
-            @test map(*, X, Y) == map(*, fX, fY)
-            @test map!(*, Z, X, Y) == map(*, fX, fY)
-            @test map(+, X, fA, Y) == map(+, fX, fA, fY)
-            @test map!(+, Z, X, fA, Y) == map(+, fX, fA, fY)
-        end
-    end
-    diagonals = (D, B, T)
-    fdiagonals = map(Array, diagonals)
-    for (X, fX) in zip(diagonals, fdiagonals)
-        for (Y, fY) in zip(diagonals, fdiagonals)
-            @test map(+, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(+, fX, fY)
-            @test map!(+, Z, X, Y) == broadcast(+, fX, fY)
-            @test map(*, X, Y)::Union{Diagonal,Bidiagonal,Tridiagonal} == broadcast(*, fX, fY)
-            @test map!(*, Z, X, Y) == broadcast(*, fX, fY)
-        end
-    end
-end
-
-@testset "Issue #33397" begin
-    N = 5
-    U = UpperTriangular(rand(N, N))
-    L = LowerTriangular(rand(N, N))
-    UnitU = UnitUpperTriangular(rand(N, N))
-    UnitL = UnitLowerTriangular(rand(N, N))
-    D = Diagonal(rand(N))
-    @test U .+ L .+ D == U + L + D
-    @test L .+ U .+ D == L + U + D
-    @test UnitU .+ UnitL .+ D == UnitU + UnitL + D
-    @test UnitL .+ UnitU .+ D == UnitL + UnitU + D
-    @test U .+ UnitL .+ D == U + UnitL + D
-    @test L .+ UnitU .+ D == L + UnitU + D
-    @test L .+ U .+ L .+ U == L + U + L + U
-    @test U .+ L .+ U .+ L == U + L + U + L
-    @test L .+ UnitL .+ UnitU .+ U .+ D == L + UnitL + UnitU + U + D
-    @test L .+ U .+ D .+ D .+ D .+ D == L + U + D + D + D + D
-end
-@testset "Broadcast Returned Types" begin
-    # Issue 35245
-    N = 3
-    dV = rand(N)
-    evu = rand(N-1)
-    evl = rand(N-1)
-
-    Bu = Bidiagonal(dV, evu, :U)
-    Bl = Bidiagonal(dV, evl, :L)
-    T = Tridiagonal(evl, dV * 2, evu)
-
-    @test typeof(Bu .+ Bl) <: Tridiagonal
-    @test typeof(Bl .+ Bu) <: Tridiagonal
-    @test typeof(Bu .+ Bu) <: Bidiagonal
-    @test typeof(Bl .+ Bl) <: Bidiagonal
-    @test Bu .+ Bl == T
-    @test Bl .+ Bu == T
-    @test Bu .+ Bu == Bidiagonal(dV * 2, evu * 2, :U)
-    @test Bl .+ Bl == Bidiagonal(dV * 2, evl * 2, :L)
-
-
-    @test typeof(Bu .* Bl) <: Tridiagonal
-    @test typeof(Bl .* Bu) <: Tridiagonal
-    @test typeof(Bu .* Bu) <: Bidiagonal
-    @test typeof(Bl .* Bl) <: Bidiagonal
-
-    @test Bu .* Bl == Tridiagonal(zeros(N-1), dV .* dV, zeros(N-1))
-    @test Bl .* Bu == Tridiagonal(zeros(N-1), dV .* dV, zeros(N-1))
-    @test Bu .* Bu == Bidiagonal(dV .* dV, evu .* evu, :U)
-    @test Bl .* Bl == Bidiagonal(dV .* dV, evl .* evl, :L)
-
-    Bu2 =  Bu .* 2
-    @test typeof(Bu2) <: Bidiagonal && Bu2.uplo == 'U'
-    Bu2 = 2 .* Bu
-    @test typeof(Bu2) <: Bidiagonal && Bu2.uplo == 'U'
-    Bl2 =  Bl .* 2
-    @test typeof(Bl2) <: Bidiagonal && Bl2.uplo == 'L'
-    Bu2 = 2 .* Bl
-    @test typeof(Bl2) <: Bidiagonal && Bl2.uplo == 'L'
-
-    # Example of Nested Broadcasts
-    tmp = (1 .* 2) .* (Bidiagonal(1:3, 1:2, 'U') .* (3 .* 4)) .* (5 .* Bidiagonal(1:3, 1:2, 'L'))
-    @test typeof(tmp) <: Tridiagonal
-
-end
-
-struct Zero36193 end
-Base.iszero(::Zero36193) = true
-LinearAlgebra.iszerodefined(::Type{Zero36193}) = true
-@testset "PR #36193" begin
-    f(::Union{Int, Zero36193}) = Zero36193()
-    function test(el)
-        M = [el el
-             el el]
-        v = [el, el]
-        U = UpperTriangular(M)
-        L = LowerTriangular(M)
-        D = Diagonal(v)
-        for (T, A) in [(UpperTriangular, U), (LowerTriangular, L), (Diagonal, D)]
-            @test identity.(A) isa typeof(A)
-            @test map(identity, A) isa typeof(A)
-            @test f.(A) isa T{Zero36193}
-            @test map(f, A) isa T{Zero36193}
-        end
-    end
-    # This should not need `zero(::Type{Zero36193})` to be defined
-    test(1)
-    Base.zero(::Type{Zero36193}) = Zero36193()
-    # This should not need `==(::Zero36193, ::Int)` to be defined as `iszerodefined`
-    # returns true.
-    test(Zero36193())
-end
-
-# structured broadcast with function returning non-number type
-@test tuple.(Diagonal([1, 2])) == [(1,) (0,); (0,) (2,)]
-
-end
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
deleted file mode 100644
index 7f2aad904a88f..0000000000000
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ /dev/null
@@ -1,276 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSVD
-
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
-
-@testset "Simple svdvals / svd tests" begin
-    ≊(x,y) = isapprox(x,y,rtol=1e-15)
-
-    m = [2, 0]
-    @test @inferred(svdvals(m)) ≊ [2]
-    @test @inferred(svdvals!(float(m))) ≊ [2]
-    for sf in (@inferred(svd(m)), @inferred(svd!(float(m))))
-        @test sf.S ≊ [2]
-        @test sf.U'sf.U ≊ [1]
-        @test sf.Vt'sf.Vt ≊ [1]
-        @test sf.U*Diagonal(sf.S)*sf.Vt' ≊ m
-    end
-    F = @inferred svd(m, full=true)
-    @test size(F.U) == (2, 2)
-    @test F.S ≊ [2]
-    @test F.U'F.U ≊ Matrix(I, 2, 2)
-    @test F.Vt'*F.Vt ≊ [1]
-    @test @inferred(svdvals(3:4)) ≊ [5]
-    A = Matrix(1.0I, 2, 2)
-    Z = svd(Hermitian(A); full=true)
-    @test Z.S ≈ ones(2)
-    @test Z.U'Z.U ≈ I(2)
-
-    m1 = [2 0; 0 0]
-    m2 = [2 -2; 1 1]/sqrt(2)
-    m2c = Complex.([2 -2; 1 1]/sqrt(2))
-    @test @inferred(svdvals(m1))  ≊ [2, 0]
-    @test @inferred(svdvals(m2))  ≊ [2, 1]
-    @test @inferred(svdvals(m2c)) ≊ [2, 1]
-
-    sf1 = @inferred svd(m1)
-    sf2 = @inferred svd(m2)
-    @test sf1.S ≊ [2, 0]
-    @test sf2.S ≊ [2, 1]
-    # U & Vt are unitary
-    I22 = Matrix(I, 2, 2)
-    @test sf1.U*sf1.U'   ≊ I22
-    @test sf1.Vt*sf1.Vt' ≊ I22
-    @test sf2.U*sf2.U'   ≊ I22
-    @test sf2.Vt*sf2.Vt' ≊ I22
-    # SVD not uniquely determined, so just test we can reconstruct the
-    # matrices from the factorization as expected.
-    @test sf1.U*Diagonal(sf1.S)*sf1.Vt' ≊ m1
-    @test sf2.U*Diagonal(sf2.S)*sf2.Vt' ≊ m2
-
-    @test ldiv!([0., 0.], svd(Matrix(I, 2, 2)), [1., 1.]) ≊ [1., 1.]
-    @test inv(svd(Matrix(I, 2, 2))) ≈ I
-    @test inv(svd([1 2; 3 4])) ≈ [-2.0 1.0; 1.5 -0.5]
-    @test inv(svd([1 0 1; 0 1 0])) ≈ [0.5 0.0; 0.0 1.0; 0.5 0.0]
-    @test_throws SingularException inv(svd([0 0; 0 0]))
-    @test inv(svd([1+2im 3+4im; 5+6im 7+8im])) ≈ [-0.5 + 0.4375im 0.25 - 0.1875im; 0.375 - 0.3125im -0.125 + 0.0625im]
-end
-
-n = 10
-
-Random.seed!(1234321)
-
-areal = randn(n,n)/2
-aimg  = randn(n,n)/2
-
-@testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    aa = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-    asym = aa' + aa                 # symmetric indefinite
-    for a in (aa, view(aa, 1:n, 1:n))
-        usv = svd(a)
-        @testset "singular value decomposition" begin
-            @test usv.S === svdvals(usv)
-            @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ a
-            @test convert(Array, usv) ≈ a
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            b = rand(eltya,n)
-            @test usv\b ≈ a\b
-            @test Base.propertynames(usv) == (:U, :S, :V, :Vt)
-            @test size(usv) == size(a)
-            if eltya <: BlasFloat
-                svdz = svd!(Matrix{eltya}(undef,0,0))
-                @test svdz.U ≈ Matrix{eltya}(I, 0, 0)
-                @test svdz.S ≈ real(zeros(eltya,0))
-                @test svdz.Vt ≈ Matrix{eltya}(I, 0, 0)
-            end
-        end
-        @testset "singular value decomposition of adjoint/transpose" begin
-            for transform in (adjoint, transpose)
-                usv = svd(transform(a))
-                @test usv.S === svdvals(usv)
-                @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ transform(a)
-                @test convert(Array, usv) ≈ transform(a)
-                @test usv.Vt' ≈ usv.V
-                @test_throws ErrorException usv.Z
-                b = rand(eltya,n)
-                @test usv\b ≈ transform(a)\b
-            end
-        end
-        @testset "Generalized svd" begin
-            a_svd = a[1:div(n, 2), :]
-            gsvd = svd(a,a_svd)
-            @test Base.propertynames(gsvd) == (:alpha, :beta, :vals, :S, :D1, :D2, :R0, :U, :V, :Q, :a, :b, :k, :l, :R)
-            @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ a
-            @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ a_svd
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            @test_throws ErrorException gsvd.Z
-            @test gsvd.vals ≈ svdvals(a,a_svd)
-            α = eltya == Int ? -1 : rand(eltya)
-            β = svd(α)
-            @test β.S == [abs(α)]
-            @test svdvals(α) == abs(α)
-            u,v,q,d1,d2,r0 = svd(a,a_svd)
-            @test u ≈ gsvd.U
-            @test v ≈ gsvd.V
-            @test d1 ≈ gsvd.D1
-            @test d2 ≈ gsvd.D2
-            @test q ≈ gsvd.Q
-            @test gsvd.a.^2 + gsvd.b.^2 ≈ fill(1, length(gsvd.a))
-            @test gsvd.alpha.^2 + gsvd.beta.^2 ≈ ones(eltya, length(gsvd.a))
-            #testing the other layout for D1 & D2
-            b = rand(eltya,n,2*n)
-            c = rand(eltya,n,2*n)
-            gsvd = svd(b,c)
-            @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ b
-            @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ c
-            # AbstractMatrix svd
-            T = Tridiagonal(a)
-            asvd = svd(T, a)
-            @test asvd.U*asvd.D1*asvd.R*asvd.Q' ≈ T
-            @test asvd.V*asvd.D2*asvd.R*asvd.Q' ≈ a
-            @test all(≈(1), svdvals(T, T))
-        end
-    end
-    @testset "singular value decomposition of AbstractMatrix" begin
-        A = Tridiagonal(aa)
-        F = svd(A)
-        @test Matrix(F) ≈ A
-        @test svdvals(A) ≈ F.S
-    end
-    @testset "singular value decomposition of Hermitian/real-Symmetric" begin
-        for T in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
-            usv = svd(T(asym))
-            @test usv.S === svdvals(usv)
-            @test usv.U * (Diagonal(usv.S) * usv.Vt) ≈ T(asym)
-            @test convert(Array, usv) ≈ T(asym)
-            @test usv.Vt' ≈ usv.V
-            @test_throws ErrorException usv.Z
-            b = rand(eltya,n)
-            @test usv\b ≈ T(asym)\b
-        end
-    end
-    if eltya <: LinearAlgebra.BlasReal
-        @testset "Number input" begin
-            x, y = randn(eltya, 2)
-            @test svd(x)    == svd(fill(x, 1, 1))
-            @test svdvals(x)    == first(svdvals(fill(x, 1, 1)))
-            @test svd(x, y) == svd(fill(x, 1, 1), fill(y, 1, 1))
-            @test svdvals(x, y) ≈  first(svdvals(fill(x, 1, 1), fill(y, 1, 1)))
-        end
-    end
-    if eltya != Int
-        @testset "isequal, ==, and hash" begin
-            x, y   = rand(eltya), convert(eltya, NaN)
-            Fx, Fy = svd(x), svd(y)
-            @test   Fx == Fx
-            @test !(Fy == Fy)
-            @test isequal(Fy, Fy)
-            @test hash(Fx)          == hash(Fx)
-            @test hash(Fx, UInt(1)) == hash(Fx, UInt(1))
-            @test hash(Fy)          == hash(Fy)
-            @test hash(Fy, UInt(1)) == hash(Fy, UInt(1))
-        end
-    end
-end
-
-
-
-@testset "SVD Algorithms" begin
-    ≊(x,y) = isapprox(x,y,rtol=1e-15)
-
-    x = [0.1 0.2; 0.3 0.4]
-
-    for alg in [LinearAlgebra.QRIteration(), LinearAlgebra.DivideAndConquer()]
-        sx1 = svd(x, alg = alg)
-        @test sx1.U * Diagonal(sx1.S) * sx1.Vt ≊ x
-        @test sx1.V * sx1.Vt ≊ I
-        @test sx1.U * sx1.U' ≊ I
-        @test all(sx1.S .≥ 0)
-
-        sx2 = svd!(copy(x), alg = alg)
-        @test sx2.U * Diagonal(sx2.S) * sx2.Vt ≊ x
-        @test sx2.V * sx2.Vt ≊ I
-        @test sx2.U * sx2.U' ≊ I
-        @test all(sx2.S .≥ 0)
-    end
-end
-
-@testset "REPL printing of SVD" begin
-    svdd = svd(randn(3, 3))
-    svdstring = sprint((t, s) -> show(t, "text/plain", s), svdd)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), svdd.U)
-    sstring = sprint((t, s) -> show(t, "text/plain", s), svdd.S)
-    vtstring = sprint((t, s) -> show(t, "text/plain", s), svdd.Vt)
-    @test svdstring == "$(summary(svdd))\nU factor:\n$ustring\nsingular values:\n$sstring\nVt factor:\n$vtstring"
-end
-
-@testset "REPL printing of Generalized SVD" begin
-    a = randn(3, 3)
-    b = randn(3, 3)
-    svdd = svd(a, b)
-    svdstring = sprint((t, s) -> show(t, "text/plain", s), svdd)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), svdd.U)
-    qstring = sprint((t, s) -> show(t, "text/plain", s), svdd.Q)
-    vstring = sprint((t, s) -> show(t, "text/plain", s), svdd.V)
-    d1string = sprint((t, s) -> show(t, "text/plain", s), svdd.D1)
-    d2string = sprint((t, s) -> show(t, "text/plain", s), svdd.D2)
-    r0string = sprint((t, s) -> show(t, "text/plain", s), svdd.R0)
-    @test svdstring == "$(summary(svdd))\nU factor:\n$ustring\nV factor:\n$vstring\nQ factor:\n$qstring\nD1 factor:\n$d1string\nD2 factor:\n$d2string\nR0 factor:\n$r0string"
-end
-
-@testset "c-tor with varying input eltypes" begin
-    A = randn(Float64, 10, 10)
-    U, S, V = svd(A)
-    Ut = convert.(Float16, U)
-    Vt = convert.(Float32, V)
-    svdc = SVD{ComplexF32}(Ut, S, Vt)
-    @test svdc isa SVD{ComplexF32}
-    Uc, Sc, Vc = svdc
-    @test Uc * diagm(0=>Sc) * transpose(V) ≈ complex.(A) rtol=1e-3
-end
-
-@testset "Issue 40944. ldiV!(SVD) should update rhs" begin
-    F = svd(randn(2, 2))
-    b = randn(2)
-    x = ldiv!(F, b)
-    @test x === b
-end
-
-@testset "adjoint of SVD" begin
-    n = 5
-    B = randn(5, 2)
-
-    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
-        @testset "size(A)=$(size(A))" for A in (
-            randn(n, n),
-            # Wide problems become minimum norm (in x) problems similarly to LQ
-            randn(n + 2, n),
-            randn(n - 2, n),
-            complex.(randn(n, n), randn(n, n)))
-
-            F = svd(A)
-            x = F'\b
-            @test x ≈ A'\b
-            @test length(size(x)) == length(size(b))
-        end
-    end
-end
-
-@testset "Float16" begin
-    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-    B = svd(A)
-    B32 = svd(Float32.(A))
-    @test B isa SVD{Float16, Float16, Matrix{Float16}}
-    @test B.U isa Matrix{Float16}
-    @test B.Vt isa Matrix{Float16}
-    @test B.S isa Vector{Float16}
-    @test B.U ≈ B32.U
-    @test B.Vt ≈ B32.Vt
-    @test B.S ≈ B32.S
-end
-
-end # module TestSVD
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
deleted file mode 100644
index 224b7b31a50df..0000000000000
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ /dev/null
@@ -1,887 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSymmetric
-
-using Test, LinearAlgebra, Random
-
-Random.seed!(1010)
-
-@testset "Pauli σ-matrices: $σ" for σ in map(Hermitian,
-        Any[ [1 0; 0 1], [0 1; 1 0], [0 -im; im 0], [1 0; 0 -1] ])
-    @test ishermitian(σ)
-end
-
-@testset "Two-dimensional Euler formula for Hermitian" begin
-    @test cis(Hermitian([π 0; 0 π])) ≈ -I
-end
-
-@testset "Hermitian matrix exponential/log" begin
-    A1 = randn(4,4) + im*randn(4,4)
-    A2 = A1 + A1'
-    @test exp(A2) ≈ exp(Hermitian(A2))
-    @test cis(A2) ≈ cis(Hermitian(A2))
-    @test log(A2) ≈ log(Hermitian(A2))
-    A3 = A1 * A1' # posdef
-    @test exp(A3) ≈ exp(Hermitian(A3))
-    @test cis(A3) ≈ cis(Hermitian(A3))
-    @test log(A3) ≈ log(Hermitian(A3))
-
-    A1 = randn(4,4)
-    A3 = A1 * A1'
-    A4 = A1 + transpose(A1)
-    @test exp(A4) ≈ exp(Symmetric(A4))
-    @test log(A3) ≈ log(Symmetric(A3))
-    @test log(A3) ≈ log(Hermitian(A3))
-end
-
-@testset "Core functionality" begin
-    n = 10
-    areal = randn(n,n)/2
-    aimg  = randn(n,n)/2
-    @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-        a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
-        asym = transpose(a) + a                 # symmetric indefinite
-        aherm = a' + a                 # Hermitian indefinite
-        apos  = a' * a                 # Hermitian positive definite
-        aposs = apos + transpose(apos)        # Symmetric positive definite
-        ε = εa = eps(abs(float(one(eltya))))
-
-        x = randn(n)
-        y = randn(n)
-        b = randn(n,n)/2
-        x = eltya == Int ? rand(1:7, n) : convert(Vector{eltya}, eltya <: Complex ? complex.(x, zeros(n)) : x)
-        y = eltya == Int ? rand(1:7, n) : convert(Vector{eltya}, eltya <: Complex ? complex.(y, zeros(n)) : y)
-        b = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(b, zeros(n,n)) : b)
-        @testset "basic ops" begin
-            @testset "constructor" begin
-                @test Symmetric(Symmetric(asym, :U))     === Symmetric(asym, :U)
-                @test Hermitian(Hermitian(aherm, :U))    === Hermitian(aherm, :U)
-                @test Symmetric(Symmetric(asym, :U), :U) === Symmetric(asym, :U)
-                @test Hermitian(Hermitian(aherm, :U), :U) === Hermitian(aherm, :U)
-                @test_throws ArgumentError Symmetric(Symmetric(asym, :U), :L)
-                @test_throws ArgumentError Hermitian(Hermitian(aherm, :U), :L)
-
-                @test_throws ArgumentError Symmetric(asym, :R)
-                @test_throws ArgumentError Hermitian(asym, :R)
-
-                @test_throws MethodError Symmetric{eltya,typeof(asym)}(asym, :L)
-                @test_throws MethodError Hermitian{eltya,typeof(aherm)}(aherm, :L)
-
-                # mixed cases with Hermitian/Symmetric
-                if eltya <: Real
-                    @test Symmetric(Hermitian(aherm, :U))     === Symmetric(aherm, :U)
-                    @test Hermitian(Symmetric(asym, :U))     === Hermitian(asym, :U)
-                    @test Symmetric(Hermitian(aherm, :U), :U) === Symmetric(aherm, :U)
-                    @test Hermitian(Symmetric(asym, :U), :U) === Hermitian(asym, :U)
-                    @test_throws ArgumentError Symmetric(Hermitian(aherm, :U), :L)
-                    @test_throws ArgumentError Hermitian(Symmetric(aherm, :U), :L)
-                end
-            end
-            @testset "diag" begin
-                D = Diagonal(x)
-                DM = Matrix(D)
-                B = diagm(-1 => x, 1 => x)
-                for uplo in (:U, :L)
-                    @test diag(Symmetric(D, uplo))::Vector == x
-                    @test diag(Hermitian(D, uplo))::Vector == real(x)
-                    @test isdiag(Symmetric(DM, uplo))
-                    @test isdiag(Hermitian(DM, uplo))
-                    @test !isdiag(Symmetric(B, uplo))
-                    @test !isdiag(Hermitian(B, uplo))
-                end
-            end
-            @testset "similar" begin
-                @test isa(similar(Symmetric(asym)), Symmetric{eltya})
-                @test isa(similar(Hermitian(aherm)), Hermitian{eltya})
-                @test isa(similar(Symmetric(asym), Int), Symmetric{Int})
-                @test isa(similar(Hermitian(aherm), Int), Hermitian{Int})
-                @test isa(similar(Symmetric(asym), (3,2)), Matrix{eltya})
-                @test isa(similar(Hermitian(aherm), (3,2)), Matrix{eltya})
-                @test isa(similar(Symmetric(asym), Int, (3,2)), Matrix{Int})
-                @test isa(similar(Hermitian(aherm), Int, (3,2)), Matrix{Int})
-            end
-
-            @testset "Array/Matrix constructor from Symmetric/Hermitian" begin
-                @test asym  == Matrix(Symmetric(asym))  == Array(Symmetric(asym))
-                @test aherm == Matrix(Hermitian(aherm)) == Array(Hermitian(aherm))
-            end
-
-            @testset "parent" begin
-                @test asym === parent(Symmetric(asym))
-                @test aherm === parent(Hermitian(aherm))
-            end
-            # Unary minus for Symmetric/Hermitian matrices
-            @testset "Unary minus for Symmetric/Hermitian matrices" begin
-                @test (-Symmetric(asym))::typeof(Symmetric(asym)) == -asym
-                @test (-Hermitian(aherm))::typeof(Hermitian(aherm)) == -aherm
-                @test (-Symmetric([true true; false false]))::Symmetric{Int,Matrix{Int}} == [-1 -1; -1 0]
-                @test (-Hermitian([true false; true false]))::Hermitian{Int,Matrix{Int}} == [-1 0; 0 0]
-            end
-
-            @testset "Addition and subtraction for Symmetric/Hermitian matrices" begin
-                for f in (+, -)
-                    @test (f(Symmetric(asym), Symmetric(aposs)))::typeof(Symmetric(asym)) == f(asym, aposs)
-                    @test (f(Hermitian(aherm), Hermitian(apos)))::typeof(Hermitian(aherm)) == f(aherm, apos)
-                    @test (f(Symmetric(real(asym)), Hermitian(aherm)))::typeof(Hermitian(aherm)) == f(real(asym), aherm)
-                    @test (f(Hermitian(aherm), Symmetric(real(asym))))::typeof(Hermitian(aherm)) == f(aherm, real(asym))
-                    @test (f(Symmetric(asym), Hermitian(aherm))) == f(asym, aherm)
-                    @test (f(Hermitian(aherm), Symmetric(asym))) == f(aherm, asym)
-                end
-            end
-
-            @testset "getindex and unsafe_getindex" begin
-                @test aherm[1,1] == Hermitian(aherm)[1,1]
-                @test asym[1,1] == Symmetric(asym)[1,1]
-                @test Symmetric(asym)[1:2,1:2] == asym[1:2,1:2]
-                @test Hermitian(aherm)[1:2,1:2] == aherm[1:2,1:2]
-            end
-
-            @testset "conversion" begin
-                @test Symmetric(asym) == convert(Symmetric,Symmetric(asym))
-                if eltya <: Real
-                    typs = [Float16,Float32,Float64]
-                    for typ in typs
-                        @test Symmetric(convert(Matrix{typ},asym)) == convert(Symmetric{typ,Matrix{typ}},Symmetric(asym))
-                    end
-                end
-                if eltya <: Complex
-                    typs = [ComplexF32,ComplexF64]
-                    for typ in typs
-                        @test Symmetric(convert(Matrix{typ},asym)) == convert(Symmetric{typ,Matrix{typ}},Symmetric(asym))
-                        @test Hermitian(convert(Matrix{typ},aherm)) == convert(Hermitian{typ,Matrix{typ}},Hermitian(aherm))
-                    end
-                end
-                @test Symmetric{eltya, Matrix{eltya}}(Symmetric(asym, :U)) === Symmetric(asym, :U)
-                @test Hermitian{eltya, Matrix{eltya}}(Hermitian(aherm, :U)) === Hermitian(aherm, :U)
-            end
-
-            @testset "issymmetric, ishermitian" begin
-                @test issymmetric(Symmetric(asym))
-                @test ishermitian(Hermitian(aherm))
-                if eltya <: Real
-                    @test ishermitian(Symmetric(asym))
-                    @test issymmetric(Hermitian(asym))
-                elseif eltya <: Complex
-                    # test that zero imaginary component is
-                    # handled properly
-                    @test ishermitian(Symmetric(b + b'))
-                end
-            end
-
-            @testset "tril/triu" begin
-                for (op, validks) in (
-                        (triu, (-n + 1):(n + 1)),
-                        (tril, (-n - 1):(n - 1)) )
-                    for di in validks
-                        @test op(Symmetric(asym), di) == op(asym, di)
-                        @test op(Hermitian(aherm), di) == op(aherm, di)
-                        @test op(Symmetric(asym, :L), di) == op(asym, di)
-                        @test op(Hermitian(aherm, :L), di) == op(aherm, di)
-                    end
-                end
-            end
-
-            @testset "transpose, adjoint" begin
-                S = Symmetric(asym)
-                H = Hermitian(aherm)
-                @test transpose(S) === S == asym
-                @test adjoint(H) === H == aherm
-                if eltya <: Real
-                    @test adjoint(S) === S == asym
-                    @test  transpose(H) === H == aherm
-                else
-                    @test adjoint(S) ==  Symmetric(conj(asym))
-                    @test transpose(H) ==  Hermitian(copy(transpose(aherm)))
-                end
-                @test copy(adjoint(H)) == copy(aherm)
-                @test copy(transpose(S)) == copy(asym)
-            end
-
-            @testset "real, imag" begin
-                S = Symmetric(asym)
-                H = Hermitian(aherm)
-                @test issymmetric(real(S))
-                @test ishermitian(real(H))
-                if eltya <: Real
-                    @test real(S) === S == asym
-                    @test real(H) === H == aherm
-                elseif eltya <: Complex
-                    @test issymmetric(imag(S))
-                    @test !ishermitian(imag(H))
-                end
-            end
-
-        end
-
-        @testset "linalg unary ops" begin
-            @testset "tr" begin
-                @test tr(asym) == tr(Symmetric(asym))
-                @test tr(aherm) == tr(Hermitian(aherm))
-            end
-
-            @testset "isposdef[!]" begin
-                @test isposdef(Symmetric(asym))  == isposdef(asym)
-                @test isposdef(Symmetric(aposs)) == isposdef(aposs) == true
-                @test isposdef(Hermitian(aherm)) == isposdef(aherm)
-                @test isposdef(Hermitian(apos))  == isposdef(apos) == true
-                if eltya != Int #chol! won't work with Int
-                    @test isposdef!(Symmetric(copy(asym)))  == isposdef(asym)
-                    @test isposdef!(Symmetric(copy(aposs))) == isposdef(aposs) == true
-                    @test isposdef!(Hermitian(copy(aherm))) == isposdef(aherm)
-                    @test isposdef!(Hermitian(copy(apos)))  == isposdef(apos) == true
-                end
-            end
-
-            @testset "$f" for f in (det, logdet, logabsdet)
-                for uplo in (:U, :L)
-                    @test all(f(apos)  .≈ f(Hermitian(apos, uplo)))
-                    @test all(f(aposs) .≈ f(Symmetric(aposs, uplo)))
-                    if f != logdet
-                        @test all(f(aherm) .≈ f(Hermitian(aherm, uplo)))
-                        @test all(f(asym)  .≈ f(Symmetric(asym, uplo)))
-                    end
-                end
-            end
-
-            @testset "inversion" begin
-                for uplo in (:U, :L)
-                    @test inv(Symmetric(asym, uplo))::Symmetric ≈ inv(asym)
-                    @test inv(Hermitian(aherm, uplo))::Hermitian ≈ inv(aherm)
-                    @test inv(Symmetric(a, uplo))::Symmetric ≈ inv(Matrix(Symmetric(a, uplo)))
-                    if eltya <: Real
-                        @test inv(Hermitian(a, uplo))::Hermitian ≈ inv(Matrix(Hermitian(a, uplo)))
-                    end
-                end
-                if eltya <: LinearAlgebra.BlasComplex
-                    @testset "inverse edge case with complex Hermitian" begin
-                        # Hermitian matrix, where inv(lu(A)) generates non-real diagonal elements
-                        for T in (ComplexF32, ComplexF64)
-                            A = T[0.650488+0.0im 0.826686+0.667447im; 0.826686-0.667447im 1.81707+0.0im]
-                            H = Hermitian(A)
-                            @test inv(H) ≈ inv(A)
-                            @test ishermitian(Matrix(inv(H)))
-                        end
-                    end
-                end
-                if eltya <: AbstractFloat
-                @testset "inv should error with NaNs/Infs" begin
-                    h = Hermitian(fill(eltya(NaN), 2, 2))
-                    @test_throws ArgumentError inv(h)
-                    s = Symmetric(fill(eltya(NaN), 2, 2))
-                    @test_throws ArgumentError inv(s)
-                end
-                end
-            end
-
-            # Revisit when implemented in julia
-            if eltya != BigFloat
-                @testset "cond" begin
-                    if eltya <: Real #svdvals! has no method for Symmetric{Complex}
-                        @test cond(Symmetric(asym)) ≈ cond(asym)
-                    end
-                    @test cond(Hermitian(aherm)) ≈ cond(aherm)
-                end
-
-                @testset "symmetric eigendecomposition" begin
-                    if eltya <: Real # the eigenvalues are only real and ordered for Hermitian matrices
-                        d, v = eigen(asym)
-                        @test asym*v[:,1] ≈ d[1]*v[:,1]
-                        @test v*Diagonal(d)*transpose(v) ≈ asym
-                        @test isequal(eigvals(asym[1]), eigvals(asym[1:1,1:1])[1])
-                        @test abs.(eigen(Symmetric(asym), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                        @test abs.(eigen(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                        @test eigvals(Symmetric(asym), 1:2) ≈ d[1:2]
-                        @test eigvals(Symmetric(asym), sortby= x -> -x) ≈ eigvals(eigen(Symmetric(asym), sortby = x -> -x))
-                        @test eigvals(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
-                        # eigen doesn't support Symmetric{Complex}
-                        @test Matrix(eigen(asym)) ≈ asym
-                        @test eigvecs(Symmetric(asym)) ≈ eigvecs(asym)
-                    end
-
-                    d, v = eigen(aherm)
-                    @test aherm*v[:,1] ≈ d[1]*v[:,1]
-                    @test v*Diagonal(d)*v' ≈ aherm
-                    @test isequal(eigvals(aherm[1]), eigvals(aherm[1:1,1:1])[1])
-                    @test abs.(eigen(Hermitian(aherm), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                    @test abs.(eigen(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
-                    @test eigvals(Hermitian(aherm), 1:2) ≈ d[1:2]
-                    @test eigvals(Hermitian(aherm), sortby= x -> -x) ≈ eigvals(eigen(Hermitian(aherm), sortby = x -> -x))
-                    @test eigvals(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
-                    @test Matrix(eigen(aherm)) ≈ aherm
-                    @test eigvecs(Hermitian(aherm)) ≈ eigvecs(aherm)
-
-                    # relation to svdvals
-                    if eltya <: Real #svdvals! has no method for Symmetric{Complex}
-                        @test sum(sort(abs.(eigvals(Symmetric(asym))))) == sum(sort(svdvals(Symmetric(asym))))
-                    end
-                    @test sum(sort(abs.(eigvals(Hermitian(aherm))))) == sum(sort(svdvals(Hermitian(aherm))))
-                end
-
-                @testset "rank" begin
-                    let A = a[:,1:5]*a[:,1:5]'
-                        # Make sure A is Hermitian even in the presence of rounding error
-                        # xianyi/OpenBLAS#729
-                        A = (A + A') / 2
-                        @test rank(A) == rank(Hermitian(A))
-                    end
-                end
-
-                @testset "pow" begin
-                    # Integer power
-                    @test (asym)^2   ≈ (Symmetric(asym)^2)::Symmetric
-                    @test (asym)^-2  ≈ (Symmetric(asym)^-2)::Symmetric
-                    @test (aposs)^2  ≈ (Symmetric(aposs)^2)::Symmetric
-                    @test (aherm)^2  ≈ (Hermitian(aherm)^2)::Hermitian
-                    @test (aherm)^-2 ≈ (Hermitian(aherm)^-2)::Hermitian
-                    @test (apos)^2   ≈ (Hermitian(apos)^2)::Hermitian
-                    # integer floating point power
-                    @test (asym)^2.0   ≈ (Symmetric(asym)^2.0)::Symmetric
-                    @test (asym)^-2.0  ≈ (Symmetric(asym)^-2.0)::Symmetric
-                    @test (aposs)^2.0  ≈ (Symmetric(aposs)^2.0)::Symmetric
-                    @test (aherm)^2.0  ≈ (Hermitian(aherm)^2.0)::Hermitian
-                    @test (aherm)^-2.0 ≈ (Hermitian(aherm)^-2.0)::Hermitian
-                    @test (apos)^2.0   ≈ (Hermitian(apos)^2.0)::Hermitian
-                    # non-integer floating point power
-                    @test (asym)^2.5   ≈ (Symmetric(asym)^2.5)::Symmetric
-                    @test (asym)^-2.5  ≈ (Symmetric(asym)^-2.5)::Symmetric
-                    @test (aposs)^2.5  ≈ (Symmetric(aposs)^2.5)::Symmetric
-                    @test (aherm)^2.5  ≈ (Hermitian(aherm)^2.5)#::Hermitian
-                    @test (aherm)^-2.5 ≈ (Hermitian(aherm)^-2.5)#::Hermitian
-                    @test (apos)^2.5   ≈ (Hermitian(apos)^2.5)::Hermitian
-                end
-            end
-        end
-
-        @testset "linalg binary ops" begin
-            @testset "mat * vec" begin
-                @test Symmetric(asym)*x+y ≈ asym*x+y
-                # testing fallbacks for transpose-vector * transpose(SymHerm)
-                xadj = transpose(x)
-                @test xadj * transpose(Symmetric(asym)) ≈ xadj * asym
-                @test x' * Symmetric(asym) ≈ x' * asym
-
-                @test Hermitian(aherm)*x+y ≈ aherm*x+y
-                # testing fallbacks for adjoint-vector * SymHerm'
-                xadj = x'
-                @test x' * Hermitian(aherm) ≈ x' * aherm
-                @test xadj * Hermitian(aherm)' ≈ xadj * aherm
-            end
-
-            @testset "mat * mat" begin
-                C = zeros(eltya,n,n)
-                @test Hermitian(aherm) * a ≈ aherm * a
-                @test a * Hermitian(aherm) ≈ a * aherm
-                # rectangular multiplication
-                @test [a; a] * Hermitian(aherm) ≈ [a; a] * aherm
-                @test Hermitian(aherm) * [a a] ≈ aherm * [a a]
-                @test Hermitian(aherm) * Hermitian(aherm) ≈ aherm*aherm
-                @test_throws DimensionMismatch Hermitian(aherm) * Vector{eltya}(undef, n+1)
-                LinearAlgebra.mul!(C,a,Hermitian(aherm))
-                @test C ≈ a*aherm
-
-                @test Symmetric(asym) * Symmetric(asym) ≈ asym*asym
-                @test Symmetric(asym) * a ≈ asym * a
-                @test a * Symmetric(asym) ≈ a * asym
-                # rectangular multiplication
-                @test Symmetric(asym) * [a a] ≈ asym * [a a]
-                @test [a; a] * Symmetric(asym) ≈ [a; a] * asym
-                @test_throws DimensionMismatch Symmetric(asym) * Vector{eltya}(undef, n+1)
-                LinearAlgebra.mul!(C,a,Symmetric(asym))
-                @test C ≈ a*asym
-
-                tri_b = UpperTriangular(triu(b))
-                @test Array(transpose(Hermitian(aherm)) * tri_b) ≈ transpose(aherm) * Array(tri_b)
-                @test Array(tri_b * transpose(Hermitian(aherm))) ≈ Array(tri_b) * transpose(aherm)
-                @test Array(Hermitian(aherm)' * tri_b) ≈ aherm' * Array(tri_b)
-                @test Array(tri_b * Hermitian(aherm)') ≈ Array(tri_b) * aherm'
-
-                @test Array(transpose(Symmetric(asym)) * tri_b) ≈ transpose(asym) * Array(tri_b)
-                @test Array(tri_b * transpose(Symmetric(asym))) ≈ Array(tri_b) * transpose(asym)
-                @test Array(Symmetric(asym)' * tri_b) ≈ asym' * Array(tri_b)
-                @test Array(tri_b * Symmetric(asym)') ≈ Array(tri_b) * asym'
-            end
-            @testset "solver" begin
-                @test Hermitian(aherm)\x ≈ aherm\x
-                @test Hermitian(aherm)\b ≈ aherm\b
-                @test Symmetric(asym)\x  ≈ asym\x
-                @test Symmetric(asym)\b  ≈ asym\b
-                @test Hermitian(Diagonal(aherm))\x ≈ Diagonal(aherm)\x
-                @test Hermitian(Matrix(Diagonal(aherm)))\b ≈ Diagonal(aherm)\b
-                @test Symmetric(Diagonal(asym))\x  ≈ Diagonal(asym)\x
-                @test Symmetric(Matrix(Diagonal(asym)))\b  ≈ Diagonal(asym)\b
-            end
-        end
-        @testset "generalized dot product" begin
-            for uplo in (:U, :L)
-                @test dot(x, Hermitian(aherm, uplo), y) ≈ dot(x, Hermitian(aherm, uplo)*y) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), y)
-                @test dot(x, Hermitian(aherm, uplo), x) ≈ dot(x, Hermitian(aherm, uplo)*x) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), x)
-            end
-            @test dot(x, Hermitian(Diagonal(a)), y) ≈ dot(x, Hermitian(Diagonal(a))*y) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), y)
-            @test dot(x, Hermitian(Diagonal(a)), x) ≈ dot(x, Hermitian(Diagonal(a))*x) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), x)
-            if eltya <: Real
-                for uplo in (:U, :L)
-                    @test dot(x, Symmetric(aherm, uplo), y) ≈ dot(x, Symmetric(aherm, uplo)*y) ≈ dot(x, Matrix(Symmetric(aherm, uplo)), y)
-                    @test dot(x, Symmetric(aherm, uplo), x) ≈ dot(x, Symmetric(aherm, uplo)*x) ≈ dot(x, Matrix(Symmetric(aherm, uplo)), x)
-                end
-            end
-        end
-
-        @testset "dot product of symmetric and Hermitian matrices" begin
-            for mtype in (Symmetric, Hermitian)
-                symau = mtype(a, :U)
-                symal = mtype(a, :L)
-                msymau = Matrix(symau)
-                msymal = Matrix(symal)
-                @test_throws DimensionMismatch dot(symau, mtype(zeros(eltya, n-1, n-1)))
-                for eltyc in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-                    creal = randn(n, n)/2
-                    cimag = randn(n, n)/2
-                    c = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(creal, cimag) : creal)
-                    symcu = mtype(c, :U)
-                    symcl = mtype(c, :L)
-                    msymcu = Matrix(symcu)
-                    msymcl = Matrix(symcl)
-                    @test dot(symau, symcu) ≈ dot(msymau, msymcu)
-                    @test dot(symau, symcl) ≈ dot(msymau, msymcl)
-                    @test dot(symal, symcu) ≈ dot(msymal, msymcu)
-                    @test dot(symal, symcl) ≈ dot(msymal, msymcl)
-                end
-
-                # block matrices
-                blockm = [eltya == Int ? rand(1:7, 3, 3) : convert(Matrix{eltya}, eltya <: Complex ? complex.(randn(3, 3)/2, randn(3, 3)/2) : randn(3, 3)/2) for _ in 1:3, _ in 1:3]
-                symblockmu = mtype(blockm, :U)
-                symblockml = mtype(blockm, :L)
-                msymblockmu = Matrix(symblockmu)
-                msymblockml = Matrix(symblockml)
-                @test dot(symblockmu, symblockmu) ≈ dot(msymblockmu, msymblockmu)
-                @test dot(symblockmu, symblockml) ≈ dot(msymblockmu, msymblockml)
-                @test dot(symblockml, symblockmu) ≈ dot(msymblockml, msymblockmu)
-                @test dot(symblockml, symblockml) ≈ dot(msymblockml, msymblockml)
-            end
-        end
-    end
-end
-
-#Issue #7647: test xsyevr, xheevr, xstevr drivers.
-@testset "Eigenvalues in interval for $(typeof(Mi7647))" for Mi7647 in
-        (Symmetric(diagm(0 => 1.0:3.0)),
-         Hermitian(diagm(0 => 1.0:3.0)),
-         Hermitian(diagm(0 => complex(1.0:3.0))),
-         SymTridiagonal([1.0:3.0;], zeros(2)))
-    @test eigmin(Mi7647)  == eigvals(Mi7647, 0.5, 1.5)[1] == 1.0
-    @test eigmax(Mi7647)  == eigvals(Mi7647, 2.5, 3.5)[1] == 3.0
-    @test eigvals(Mi7647) == eigvals(Mi7647, 0.5, 3.5) == [1.0:3.0;]
-end
-
-@testset "Hermitian wrapper ignores imaginary parts on diagonal" begin
-    A = [1.0+im 2.0; 2.0 0.0]
-    @test !ishermitian(A)
-    @test Hermitian(A)[1,1] == 1
-end
-
-@testset "Issue #7933" begin
-    A7933 = [1 2; 3 4]
-    B7933 = copy(A7933)
-    C7933 = Matrix(Symmetric(A7933))
-    @test A7933 == B7933
-end
-
-@testset "Issues #8057 and #8058. f=$f, A=$A" for f in
-        (eigen, eigvals),
-            A in (Symmetric([0 1; 1 0]), Hermitian([0 im; -im 0]))
-    @test_throws ArgumentError f(A, 3, 2)
-    @test_throws ArgumentError f(A, 1:4)
-end
-
-@testset "Ignore imaginary part of Hermitian diagonal" begin
-    A = [1.0+im 2.0; 2.0 0.0]
-    @test !ishermitian(A)
-    @test diag(Hermitian(A)) == real(diag(A))
-end
-
-@testset "Issue #17780" begin
-    a = randn(2,2)
-    a = a'a
-    b = complex.(a,a)
-    c = Symmetric(b)
-    @test conj(c) == conj(Array(c))
-    cc = copy(c)
-    @test conj!(c) == conj(Array(cc))
-    c = Hermitian(b + b')
-    @test conj(c) == conj(Array(c))
-    cc = copy(c)
-    @test conj!(c) == conj(Array(cc))
-end
-
-@testset "Issue # 19225" begin
-    X = [1 -1; -1 1]
-    for T in (Symmetric, Hermitian)
-        Y = T(copy(X))
-        _Y = similar(Y)
-        copyto!(_Y, Y)
-        @test _Y == Y
-
-        W = T(copy(X), :L)
-        copyto!(W, Y)
-        @test W.data == Y.data
-        @test W.uplo != Y.uplo
-
-        W[1,1] = 4
-        @test W == T([4 -1; -1 1])
-        @test_throws ArgumentError (W[1,2] = 2)
-        if T == Hermitian
-            @test_throws ArgumentError (W[2,2] = 3+4im)
-        end
-
-        @test Y + I == T([2 -1; -1 2])
-        @test Y - I == T([0 -1; -1 0])
-        @test Y * I == Y
-
-        @test Y .+ 1 == T([2 0; 0 2])
-        @test Y .- 1 == T([0 -2; -2 0])
-        @test Y * 2 == T([2 -2; -2 2])
-        @test Y / 1 == Y
-
-        @test T([true false; false true]) .+ true == T([2 1; 1 2])
-    end
-end
-
-@testset "Issue #21981" begin
-    B = complex(rand(4,4))
-    B[4,1] += 1im;
-    @test ishermitian(Symmetric(B, :U))
-    @test issymmetric(Hermitian(B, :U))
-    B[4,1]  = real(B[4,1])
-    B[1,4] += 1im
-    @test ishermitian(Symmetric(B, :L))
-    @test issymmetric(Hermitian(B, :L))
-end
-
-@testset "$HS solver with $RHS RHS - $T" for HS in (Hermitian, Symmetric),
-        RHS in (Hermitian, Symmetric, Diagonal, UpperTriangular, LowerTriangular),
-        T   in (Float64, ComplexF64)
-    D = rand(T, 10, 10); D = D'D
-    A = HS(D)
-    B = RHS(D)
-    @test A\B ≈ Matrix(A)\Matrix(B)
-end
-
-@testset "inversion of Hilbert matrix" begin
-    for T in (Float64, ComplexF64)
-        H = T[1/(i + j - 1) for i in 1:8, j in 1:8]
-        @test norm(inv(Symmetric(H))*(H*fill(1., 8)) .- 1) ≈ 0 atol = 1e-5
-        @test norm(inv(Hermitian(H))*(H*fill(1., 8)) .- 1) ≈ 0 atol = 1e-5
-    end
-end
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    immutablemat = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
-    for SymType in (Symmetric, Hermitian)
-        S = Float64
-        symmat = SymType(immutablemat)
-        @test convert(AbstractArray{S}, symmat).data isa ImmutableArray{S}
-        @test convert(AbstractMatrix{S}, symmat).data isa ImmutableArray{S}
-        @test AbstractArray{S}(symmat).data isa ImmutableArray{S}
-        @test AbstractMatrix{S}(symmat).data isa ImmutableArray{S}
-        @test convert(AbstractArray{S}, symmat) == symmat
-        @test convert(AbstractMatrix{S}, symmat) == symmat
-    end
-end
-
-
-@testset "#24572: eltype(A::HermOrSym) === eltype(parent(A))" begin
-    A = rand(Float32, 3, 3)
-    @test_throws TypeError Symmetric{Float64,Matrix{Float32}}(A, 'U')
-    @test_throws TypeError Hermitian{Float64,Matrix{Float32}}(A, 'U')
-end
-
-@testset "fill[stored]!" begin
-    for uplo in (:U, :L)
-        # Hermitian
-        A = Hermitian(fill(1.0+0im, 2, 2), uplo)
-        @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo === :U ? [2 2; 1.0+0im 2] : [2 1.0+0im; 2 2])
-        @test_throws ArgumentError fill!(A, 2+im)
-
-        # Symmetric
-        A = Symmetric(fill(1.0+im, 2, 2), uplo)
-        @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo === :U ? [2 2; 1.0+im 2] : [2 1.0+im; 2 2])
-    end
-end
-
-@testset "#25625 recursive transposition" begin
-    A = Matrix{Matrix{Int}}(undef, 2, 2)
-    A[1,1] = [1 2; 2 3]
-    A[1,2] = [4 5 6; 7 8 9]
-    A[2,1] = [4 7; 5 8; 6 9]
-    A[2,2] = [1 2; 3 4]
-    for uplo in (:U, :L)
-        S = Symmetric(A, uplo)
-        @test S[1,1] == A[1,1]
-        @test S[1,2] == transpose(S[2,1]) == A[1,2]
-        @test S[2,2] == Symmetric(A[2,2], uplo)
-        @test S == transpose(S) == Matrix(S) == Matrix(transpose(S)) == transpose(Matrix(S))
-    end
-
-    B = Matrix{Matrix{Complex{Int}}}(undef, 2, 2)
-    B[1,1] = [1 2+im; 2-im 3]
-    B[1,2] = [4 5+1im 6-2im; 7+3im 8-4im 9+5im]
-    B[2,1] = [4 7-3im; 5-1im 8+4im; 6+2im 9-5im]
-    B[2,2] = [1+1im 2+2im; 3-3im 4-2im]
-    for uplo in (:U, :L)
-        H = Hermitian(B, uplo)
-        @test H[1,1] == Hermitian(B[1,1], uplo)
-        @test H[1,2] == adjoint(H[2,1]) == B[1,2]
-        @test H[2,1] == adjoint(H[1,2]) == B[2,1]
-        @test H[2,2] == Hermitian(B[2,2], uplo)
-        @test H == adjoint(H) == Matrix(H) == Matrix(adjoint(H)) == adjoint(Matrix(H))
-    end
-end
-
-@testset "getindex of diagonal element (#25972)" begin
-    A = rand(ComplexF64, 2, 2)
-    @test Hermitian(A, :U)[1,1] == Hermitian(A, :L)[1,1] == real(A[1,1])
-end
-
-@testset "issue #29392: SymOrHerm scaled with Number" begin
-    R = rand(Float64, 2, 2); C = rand(ComplexF64, 2, 2)
-    # Symmetric * Real, Real * Symmetric
-    A = Symmetric(R); x = 2.0
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    A = Symmetric(C); x = 2.0
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    # Symmetric * Complex, Complex * Symmetrics
-    A = Symmetric(R); x = 2.0im
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    A = Symmetric(C); x = 2.0im
-    @test (A * x)::Symmetric == (x * A)::Symmetric
-    # Hermitian * Real, Real * Hermitian
-    A = Hermitian(R); x = 2.0
-    @test (A * x)::Hermitian == (x * A)::Hermitian
-    A = Hermitian(C); x = 2.0
-    @test (A * x)::Hermitian == (x * A)::Hermitian
-    # Hermitian * Complex, Complex * Hermitian
-    A = Hermitian(R); x = 2.0im
-    @test (A * x)::Matrix == (x * A)::Matrix
-    A = Hermitian(C); x = 2.0im
-    @test (A * x)::Matrix == (x * A)::Matrix
-    # Symmetric / Real
-    A = Symmetric(R); x = 2.0
-    @test (A / x)::Symmetric == Matrix(A) / x
-    A = Symmetric(C); x = 2.0
-    @test (A / x)::Symmetric == Matrix(A) / x
-    # Symmetric / Complex
-    A = Symmetric(R); x = 2.0im
-    @test (A / x)::Symmetric == Matrix(A) / x
-    A = Symmetric(C); x = 2.0im
-    @test (A / x)::Symmetric == Matrix(A) / x
-    # Hermitian / Real
-    A = Hermitian(R); x = 2.0
-    @test (A / x)::Hermitian == Matrix(A) / x
-    A = Hermitian(C); x = 2.0
-    @test (A / x)::Hermitian == Matrix(A) / x
-    # Hermitian / Complex
-    A = Hermitian(R); x = 2.0im
-    @test (A / x)::Matrix == Matrix(A) / x
-    A = Hermitian(C); x = 2.0im
-    @test (A / x)::Matrix == Matrix(A) / x
-end
-
-@testset "issue #30814: Symmetric of Hermitian if diag is not real" begin
-    A = [1 2; 3 4] * (1 + im)
-    B = Hermitian(A)
-    @test_throws ArgumentError Symmetric(B) == Symmetric(Matrix(B))
-    A[1,1] = 1; A[2,2] = 4
-    @test Symmetric(B) == Symmetric(Matrix(B))
-end
-
-@testset "issue #32079: det for singular Symmetric matrix" begin
-    A = ones(Float64, 3, 3)
-    @test det(Symmetric(A))::Float64 == det(A) == 0.0
-    @test det(Hermitian(A))::Float64 == det(A) == 0.0
-    A = ones(ComplexF64, 3, 3)
-    @test det(Symmetric(A))::ComplexF64 == det(A) == 0.0
-    @test det(Hermitian(A))::Float64 == det(A) == 0.0
-end
-
-@testset "symmetric()/hermitian() for Numbers" begin
-    @test LinearAlgebra.symmetric(1, :U) == 1
-    @test LinearAlgebra.symmetric_type(Int) == Int
-    @test LinearAlgebra.hermitian(1, :U) == 1
-    @test LinearAlgebra.hermitian_type(Int) == Int
-end
-
-@testset "sqrt(nearly semidefinite)" begin
-    let A = [0.9999999999999998 4.649058915617843e-16 -1.3149405273715513e-16 9.9959579317056e-17; -8.326672684688674e-16 1.0000000000000004 2.9280733590254494e-16 -2.9993900031619594e-16; 9.43689570931383e-16 -1.339206523454095e-15 1.0000000000000007 -8.550505126287743e-16; -6.245004513516506e-16 -2.0122792321330962e-16 1.183061278035052e-16 1.0000000000000002],
-        B = [0.09648289218436859 0.023497875751503007 0.0 0.0; 0.023497875751503007 0.045787575150300804 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0],
-        C = Symmetric(A*B*A'), # semidefinite up to roundoff
-        Csqrt = sqrt(C)
-        @test Csqrt isa Symmetric{Float64}
-        @test Csqrt*Csqrt ≈ C rtol=1e-14
-    end
-    let D = Symmetric(Matrix(Diagonal([1 0; 0 -1e-14])))
-        @test sqrt(D) ≈ [1 0; 0 1e-7im] rtol=1e-14
-        @test sqrt(D, rtol=1e-13) ≈ [1 0; 0 0] rtol=1e-14
-        @test sqrt(D, rtol=1e-13)^2 ≈ D rtol=1e-13
-    end
-end
-
-@testset "Multiplications symmetric/hermitian for $T and $S" for T in
-        (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
-    let A = transpose(Symmetric(rand(S, 3, 3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ Matrix(A) * Bv
-        @test A * Bm ≈ Matrix(A) * Bm
-        @test Bm * A ≈ Bm * Matrix(A)
-    end
-    let A = adjoint(Hermitian(rand(S, 3,3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ Matrix(A) * Bv
-        @test A * Bm ≈ Matrix(A) * Bm
-        @test Bm * A ≈ Bm * Matrix(A)
-    end
-    let Ahrs = transpose(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = transpose(Symmetric(rand(S, 3, 3))),
-        Ahcs = transpose(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
-        @test Ahrs * Acs ≈ Ahrs * Matrix(Acs)
-        @test Acs * Acs ≈ Matrix(Acs) * Matrix(Acs)
-        @test Acs * Ahrs ≈ Matrix(Acs) * Ahrs
-        @test Ahrs * Ahcs ≈ Matrix(Ahrs) * Ahcs
-        @test Ahcs * Ahrs ≈ Ahcs * Matrix(Ahrs)
-    end
-    let Ahrs = adjoint(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = adjoint(Symmetric(rand(S, 3, 3))),
-        Ahcs = adjoint(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
-        @test Ahcs * Ahcs ≈ Matrix(Ahcs) * Matrix(Ahcs)
-        @test Ahrs * Ahcs ≈ Ahrs * Matrix(Ahcs)
-        @test Acs * Ahcs ≈ Acs * Matrix(Ahcs)
-        @test Ahcs * Ahrs ≈ Matrix(Ahcs) * Ahrs
-        @test Ahcs * Acs ≈ Matrix(Ahcs) * Acs
-    end
-end
-
-@testset "Addition/subtraction with SymTridiagonal" begin
-    TR = SymTridiagonal(randn(Float64,5), randn(Float64,4))
-    TC = SymTridiagonal(randn(ComplexF64,5), randn(ComplexF64,4))
-    SR = Symmetric(randn(Float64,5,5))
-    SC = Symmetric(randn(ComplexF64,5,5))
-    HR = Hermitian(randn(Float64,5,5))
-    HC = Hermitian(randn(ComplexF64,5,5))
-    for op = (+,-)
-        for T = (TR, TC), S = (SR, SC)
-            @test op(T, S) == op(Array(T), S)
-            @test op(S, T) == op(S, Array(T))
-            @test op(T, S) isa Symmetric
-            @test op(S, T) isa Symmetric
-        end
-        for H = (HR, HC)
-            for T = (TR, TC)
-                @test op(T, H) == op(Array(T), H)
-                @test op(H, T) == op(H, Array(T))
-            end
-            @test op(TR, H) isa Hermitian
-            @test op(H, TR) isa Hermitian
-        end
-    end
-end
-
-@testset "hermitian part" begin
-    for T in [Float32, Complex{Float32}, Int32, Rational{Int32},
-              Complex{Int32}, Complex{Rational{Int32}}]
-        f, f!, t = hermitianpart, hermitianpart!, T <: Real ? transpose : adjoint
-        X = T[1 2 3; 4 5 6; 7 8 9]
-        T <: Complex && (X .+= im .* X)
-        Xc = copy(X)
-        Y = (X + t(X)) / 2
-        U = f(X)
-        L = f(X, :L)
-        @test U isa Hermitian
-        @test L isa Hermitian
-        @test U.uplo == 'U'
-        @test L.uplo == 'L'
-        @test U == L == Y
-        if T <: AbstractFloat || real(T) <: AbstractFloat
-            HU = f!(X)
-            @test HU == Y
-            @test triu(X) == triu(Y)
-            HL = f!(Xc, :L)
-            @test HL == Y
-            @test tril(Xc) == tril(Y)
-        end
-    end
-    @test_throws DimensionMismatch hermitianpart(ones(1,2))
-    for T in (Float64, ComplexF64), uplo in (:U, :L)
-        A = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        Aherm = hermitianpart(A, uplo)
-        @test Aherm == Aherm.data == (A + A')/2
-        @test Aherm isa Hermitian
-        @test Aherm.uplo == LinearAlgebra.char_uplo(uplo)
-    end
-end
-
-@testset "Structured display" begin
-    @testset "Diagonal" begin
-        d = 10:13
-        D = Diagonal(d)
-        for uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
-            S = SymHerm(D, uplo)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
-        end
-
-        d = (10:13) .+ 2im
-        D = Diagonal(d)
-        DR = Diagonal(complex.(real.(d)))
-        for uplo in (:L, :U)
-            H = Hermitian(D, uplo)
-            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, DR)
-
-            S = Symmetric(D, uplo)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
-        end
-    end
-    @testset "Bidiagonal" begin
-        dv, ev = 1:4, 1:3
-        ST = SymTridiagonal(dv, ev)
-        D = Diagonal(dv)
-        for B_uplo in (:L, :U)
-            B = Bidiagonal(dv, ev, B_uplo)
-            for Sym_uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
-                SB = SymHerm(B, Sym_uplo)
-                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? ST : D)
-                @test sprint(Base.print_matrix, SB) == teststr
-                SB = SymHerm(Transpose(B), Sym_uplo)
-                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? D : ST)
-                @test sprint(Base.print_matrix, SB) == teststr
-            end
-        end
-    end
-    @testset "Tridiagonal" begin
-        superd, d, subd = 3:5, 10:13, 1:3
-        for uplo in (:U, :L), SymHerm in (Symmetric, Hermitian)
-            S = SymHerm(Tridiagonal(subd, d, superd), uplo)
-            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
-        end
-
-        superd, d, subd = collect((3:5)*im), collect(Complex{Int}, 10:13), collect((1:3)*im)
-        for uplo in (:U, :L)
-            S = Symmetric(Tridiagonal(subd, d, superd), uplo)
-            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
-            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
-
-            H = Hermitian(Tridiagonal(subd, d, superd), uplo)
-            T = Tridiagonal(uplo == :L ? subd : conj(superd), d, uplo == :U ? superd : conj(subd))
-            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, T)
-        end
-    end
-end
-
-end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
deleted file mode 100644
index c28c17255c222..0000000000000
--- a/stdlib/LinearAlgebra/test/symmetriceigen.jl
+++ /dev/null
@@ -1,78 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestSymmetricEigen
-
-using Test, LinearAlgebra
-
-@testset "chol-eigen-eigvals" begin
-    ## Cholesky decomposition based
-
-    # eigenvalue sorting
-    sf = x->(real(x),imag(x))
-
-    ## Real valued
-    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
-    H = (A+A')/2
-    B = Float64[2 1 4 3; 0 3 1 3; 3 1 0 0; 0 1 3 1]
-    BH = (B+B')/2
-    # PD matrix
-    BPD = B*B'
-    # eigen
-    C = cholesky(BPD)
-    e,v = eigen(A, C; sortby=sf)
-    @test A*v ≈ BPD*v*Diagonal(e)
-    # eigvals
-    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
-
-    ## Complex valued
-    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
-    AH = (A+A')/2
-    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
-    BH = (B+B')/2
-    # PD matrix
-    BPD = B*B'
-    # eigen
-    C = cholesky(BPD)
-    e,v = eigen(A, C; sortby=sf)
-    @test A*v ≈ BPD*v*Diagonal(e)
-    # eigvals
-    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
-end
-
-@testset "issue #49533" begin
-    ## Real valued
-    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
-    B = Matrix(Diagonal(Float64[1:4;]))
-    # eigen
-    e0,v0 = eigen(A, B)
-    e1,v1 = eigen(A, Symmetric(B))
-    e2,v2 = eigen(Symmetric(A), B)
-    e3,v3 = eigen(Symmetric(A), Symmetric(B))
-    @test e0 ≈ e1 && v0 ≈ v1
-    @test e0 ≈ e2 && v0 ≈ v2
-    @test e0 ≈ e3 && v0 ≈ v3
-    # eigvals
-    @test eigvals(A, B) ≈ eigvals(A, Symmetric(B))
-    @test eigvals(A, B) ≈ eigvals(Symmetric(A), B)
-    @test eigvals(A, B) ≈ eigvals(Symmetric(A), Symmetric(B))
-
-    ## Complex valued
-    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
-    AH = A'A
-    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
-    BH = B'B
-    # eigen
-    sf = x->(real(x),imag(x))
-    e1,v1 = eigen(A, Hermitian(BH))
-    @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1)
-    e2,v2 = eigen(Hermitian(AH), B)
-    @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2)
-    e3,v3 = eigen(Hermitian(AH), Hermitian(BH))
-    @test Hermitian(AH)*v3 ≈ Hermitian(BH)*v3*Diagonal(e3)
-    # eigvals
-    @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf)
-    @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf)
-    @test eigvals(AH, BH; sortby=sf) ≈ eigvals(Hermitian(AH), Hermitian(BH); sortby=sf)
-end
-
-end # module TestSymmetricEigen
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
deleted file mode 100644
index 0f2f4f4af8708..0000000000000
--- a/stdlib/LinearAlgebra/test/testgroups
+++ /dev/null
@@ -1,30 +0,0 @@
-triangular
-addmul
-bidiag
-matmul
-dense
-symmetric
-diagonal
-special
-qr
-cholesky
-blas
-lu
-uniformscaling
-structuredbroadcast
-hessenberg
-svd
-eigen
-tridiag
-lapack
-lq
-adjtrans
-generic
-schur
-bunchkaufman
-givens
-pinv
-factorization
-abstractq
-ldlt
-symmetriceigen
diff --git a/stdlib/LinearAlgebra/test/testutils.jl b/stdlib/LinearAlgebra/test/testutils.jl
deleted file mode 100644
index 33eff29765c70..0000000000000
--- a/stdlib/LinearAlgebra/test/testutils.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Test approximate equality of vectors or columns of matrices modulo floating
-# point roundoff and phase (sign) differences.
-#
-# This function is designed to test for equality between vectors of floating point
-# numbers when the vectors are defined only up to a global phase or sign, such as
-# normalized eigenvectors or singular vectors. The global phase is usually
-# defined consistently, but may occasionally change due to small differences in
-# floating point rounding noise or rounding modes, or through the use of
-# different conventions in different algorithms. As a result, most tests checking
-# such vectors have to detect and discard such overall phase differences.
-#
-# Inputs:
-#     a, b:: StridedVecOrMat to be compared
-#     err :: Default: m^3*(eps(S)+eps(T)), where m is the number of rows
-#
-# Raises an error if any columnwise vector norm exceeds err. Otherwise, returns
-# nothing.
-function test_approx_eq_modphase(a::StridedVecOrMat{S}, b::StridedVecOrMat{T},
-                                 err = length(axes(a,1))^3*(eps(S)+eps(T))) where {S<:Real,T<:Real}
-    @test axes(a,1) == axes(b,1) && axes(a,2) == axes(b,2)
-    for i in axes(a,2)
-        v1, v2 = a[:, i], b[:, i]
-        @test min(abs(norm(v1-v2)),abs(norm(v1+v2))) ≈ 0.0 atol=err
-    end
-end
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
deleted file mode 100644
index 78fc2d5e0e74c..0000000000000
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ /dev/null
@@ -1,869 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestTriangular
-
-debug = false
-using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
-    UnitUpperTriangular, UnitLowerTriangular,
-    mul!, rdiv!, rmul!, lmul!
-
-debug && println("Triangular matrices")
-
-n = 9
-Random.seed!(123)
-
-debug && println("Test basic type functionality")
-@test_throws DimensionMismatch LowerTriangular(randn(5, 4))
-@test LowerTriangular(randn(3, 3)) |> t -> [size(t, i) for i = 1:3] == [size(Matrix(t), i) for i = 1:3]
-
-# The following test block tries to call all methods in base/linalg/triangular.jl in order for a combination of input element types. Keep the ordering when adding code.
-for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-    # Begin loop for first Triangular matrix
-    for (t1, uplo1) in ((UpperTriangular, :U),
-                        (UnitUpperTriangular, :U),
-                        (LowerTriangular, :L),
-                        (UnitLowerTriangular, :L))
-
-        # Construct test matrix
-        A1 = t1(elty1 == Int ? rand(1:7, n, n) : convert(Matrix{elty1}, (elty1 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo1 === :U ? t : copy(t')))
-        @test t1(A1) === A1
-        @test t1{elty1}(A1) === A1
-        # test the ctor works for AbstractMatrix
-        symm = Symmetric(rand(Int8, n, n))
-        t1s = t1{elty1}(symm)
-        @test typeof(t1s) == t1{elty1, Symmetric{elty1, Matrix{elty1}}}
-        t1t = t1{elty1}(t1(rand(Int8, n, n)))
-        @test typeof(t1t) == t1{elty1, Matrix{elty1}}
-
-        debug && println("elty1: $elty1, A1: $t1")
-
-        # Convert
-        @test convert(AbstractMatrix{elty1}, A1) == A1
-        @test convert(Matrix, A1) == A1
-        @test t1{elty1}(convert(AbstractMatrix{elty1}, A1)) == A1
-
-        # full!
-        @test full!(copy(A1)) == A1
-
-        # similar
-        @test isa(similar(A1), t1)
-        @test eltype(similar(A1)) == elty1
-        @test isa(similar(A1, Int), t1)
-        @test eltype(similar(A1, Int)) == Int
-        @test isa(similar(A1, (3,2)), Matrix{elty1})
-        @test isa(similar(A1, Int, (3,2)), Matrix{Int})
-
-        #copyto!
-        simA1 = similar(A1)
-        copyto!(simA1, A1)
-        @test simA1 == A1
-
-        # getindex
-        let mA1 = Matrix(A1)
-            # linear indexing
-            for i in 1:length(A1)
-                @test A1[i] == mA1[i]
-            end
-            # cartesian indexing
-            for i in 1:size(A1, 1), j in 1:size(A1, 2)
-                @test A1[i,j] == mA1[i,j]
-            end
-        end
-        @test isa(A1[2:4,1], Vector)
-
-
-        # setindex! (and copy)
-        A1c = copy(A1)
-        for i = 1:size(A1, 1)
-            for j = 1:size(A1, 2)
-                if uplo1 === :U
-                    if i > j
-                        A1c[i,j] = 0
-                        @test_throws ArgumentError A1c[i,j] = 1
-                    elseif i == j && t1 == UnitUpperTriangular
-                        A1c[i,j] = 1
-                        @test_throws ArgumentError A1c[i,j] = 0
-                    else
-                        A1c[i,j] = 0
-                        @test A1c[i,j] == 0
-                    end
-                else
-                    if i < j
-                        A1c[i,j] = 0
-                        @test_throws ArgumentError A1c[i,j] = 1
-                    elseif i == j && t1 == UnitLowerTriangular
-                        A1c[i,j] = 1
-                        @test_throws ArgumentError A1c[i,j] = 0
-                    else
-                        A1c[i,j] = 0
-                        @test A1c[i,j] == 0
-                    end
-                end
-            end
-        end
-
-        # istril/istriu
-        if uplo1 === :L
-            @test istril(A1)
-            @test !istriu(A1)
-            @test istriu(A1')
-            @test istriu(transpose(A1))
-            @test !istril(A1')
-            @test !istril(transpose(A1))
-        else
-            @test istriu(A1)
-            @test !istril(A1)
-            @test istril(A1')
-            @test istril(transpose(A1))
-            @test !istriu(A1')
-            @test !istriu(transpose(A1))
-        end
-        M = copy(parent(A1))
-        for trans in (adjoint, transpose), k in -1:1
-            triu!(M, k)
-            @test istril(trans(M), -k) == istril(copy(trans(M)), -k) == true
-        end
-        M = copy(parent(A1))
-        for trans in (adjoint, transpose), k in 1:-1:-1
-            tril!(M, k)
-            @test istriu(trans(M), -k) == istriu(copy(trans(M)), -k) == true
-        end
-
-        #tril/triu
-        if uplo1 === :L
-            @test tril(A1,0)  == A1
-            @test tril(A1,-1) == LowerTriangular(tril(Matrix(A1), -1))
-            @test tril(A1,1)  == t1(tril(tril(Matrix(A1), 1)))
-            @test tril(A1, -n - 2) == zeros(size(A1))
-            @test tril(A1, n) == A1
-            @test triu(A1,0)  == t1(diagm(0 => diag(A1)))
-            @test triu(A1,-1) == t1(tril(triu(A1.data,-1)))
-            @test triu(A1,1)  == zeros(size(A1)) # or just @test iszero(triu(A1,1))?
-            @test triu(A1, -n) == A1
-            @test triu(A1, n + 2) == zeros(size(A1))
-        else
-            @test triu(A1,0)  == A1
-            @test triu(A1,1)  == UpperTriangular(triu(Matrix(A1), 1))
-            @test triu(A1,-1) == t1(triu(triu(Matrix(A1), -1)))
-            @test triu(A1, -n) == A1
-            @test triu(A1, n + 2) == zeros(size(A1))
-            @test tril(A1,0)  == t1(diagm(0 => diag(A1)))
-            @test tril(A1,1)  == t1(triu(tril(A1.data,1)))
-            @test tril(A1,-1) == zeros(size(A1)) # or just @test iszero(tril(A1,-1))?
-            @test tril(A1, -n - 2) == zeros(size(A1))
-            @test tril(A1, n) == A1
-        end
-
-        # factorize
-        @test factorize(A1) == A1
-
-        # [c]transpose[!] (test views as well, see issue #14317)
-        let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
-            # transpose
-            @test copy(transpose(A1)) == transpose(Matrix(A1))
-            @test copy(transpose(viewA1)) == transpose(Matrix(viewA1))
-            # adjoint
-            @test copy(A1') == Matrix(A1)'
-            @test copy(viewA1') == Matrix(viewA1)'
-            # transpose!
-            @test transpose!(copy(A1)) == transpose(A1)
-            @test typeof(transpose!(copy(A1))).name == typeof(transpose(A1)).name
-            @test transpose!(t1(view(copy(A1).data, vrange, vrange))) == transpose(viewA1)
-            # adjoint!
-            @test adjoint!(copy(A1)) == adjoint(A1)
-            @test typeof(adjoint!(copy(A1))).name == typeof(adjoint(A1)).name
-            @test adjoint!(t1(view(copy(A1).data, vrange, vrange))) == adjoint(viewA1)
-        end
-
-        # diag
-        @test diag(A1) == diag(Matrix(A1))
-
-        # tr
-        @test tr(A1)::elty1 == tr(Matrix(A1))
-
-        # real
-        @test real(A1) == real(Matrix(A1))
-        @test imag(A1) == imag(Matrix(A1))
-        @test abs.(A1) == abs.(Matrix(A1))
-
-        # Unary operations
-        @test -A1 == -Matrix(A1)
-
-        # copy and copyto! (test views as well, see issue #14317)
-        let vrange = 1:n-1, viewA1 = t1(view(A1.data, vrange, vrange))
-            # copy
-            @test copy(A1) == copy(Matrix(A1))
-            @test copy(viewA1) == copy(Matrix(viewA1))
-            # copyto!
-            B = similar(A1)
-            copyto!(B, A1)
-            @test B == A1
-            B = similar(copy(transpose(A1)))
-            copyto!(B, copy(transpose(A1)))
-            @test B == copy(transpose(A1))
-            B = similar(viewA1)
-            copyto!(B, viewA1)
-            @test B == viewA1
-            B = similar(copy(transpose(viewA1)))
-            copyto!(B, copy(transpose(viewA1)))
-            @test B == transpose(viewA1)
-        end
-
-        #exp/log
-        if elty1 ∈ (Float32,Float64,ComplexF32,ComplexF64)
-            @test exp(Matrix(log(A1))) ≈ A1
-        end
-
-        # scale
-        if (t1 == UpperTriangular || t1 == LowerTriangular)
-            unitt = istriu(A1) ? UnitUpperTriangular : UnitLowerTriangular
-            if elty1 == Int
-                cr = 2
-            else
-                cr = 0.5
-            end
-            ci = cr * im
-            if elty1 <: Real
-                A1tmp = copy(A1)
-                rmul!(A1tmp, cr)
-                @test A1tmp == cr*A1
-                A1tmp = copy(A1)
-                lmul!(cr, A1tmp)
-                @test A1tmp == cr*A1
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, A2tmp, cr)
-                @test A1tmp == cr * A2tmp
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, cr, A2tmp)
-                @test A1tmp == cr * A2tmp
-            else
-                A1tmp = copy(A1)
-                rmul!(A1tmp, ci)
-                @test A1tmp == ci*A1
-                A1tmp = copy(A1)
-                lmul!(ci, A1tmp)
-                @test A1tmp == ci*A1
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, ci, A2tmp)
-                @test A1tmp == ci * A2tmp
-                A1tmp = copy(A1)
-                A2tmp = unitt(A1)
-                mul!(A1tmp, A2tmp, ci)
-                @test A1tmp == A2tmp*ci
-            end
-        end
-
-        # generalized dot
-        for eltyb in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-            b1 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n))
-            b2 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*randn(n))
-            @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-        end
-
-        # Binary operations
-        @test A1*0.5 == Matrix(A1)*0.5
-        @test 0.5*A1 == 0.5*Matrix(A1)
-        @test A1/0.5 == Matrix(A1)/0.5
-        @test 0.5\A1 == 0.5\Matrix(A1)
-
-        # inversion
-        @test inv(A1) ≈ inv(lu(Matrix(A1)))
-        inv(Matrix(A1)) # issue #11298
-        @test isa(inv(A1), t1)
-        # make sure the call to LAPACK works right
-        if elty1 <: BlasFloat
-            @test LinearAlgebra.inv!(copy(A1)) ≈ inv(lu(Matrix(A1)))
-        end
-
-        # Determinant
-        @test det(A1) ≈ det(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        @test logdet(A1) ≈ logdet(lu(Matrix(A1))) atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        lada, ladb = logabsdet(A1)
-        flada, fladb = logabsdet(lu(Matrix(A1)))
-        @test lada ≈ flada atol=sqrt(eps(real(float(one(elty1)))))*n*n
-        @test ladb ≈ fladb atol=sqrt(eps(real(float(one(elty1)))))*n*n
-
-        # Matrix square root
-        @test sqrt(A1) |> (t -> (t*t)::typeof(t)) ≈ A1
-
-        # naivesub errors
-        @test_throws DimensionMismatch ldiv!(A1, Vector{elty1}(undef, n+1))
-
-        # eigenproblems
-        if !(elty1 in (BigFloat, Complex{BigFloat})) # Not handled yet
-            vals, vecs = eigen(A1)
-            if (t1 == UpperTriangular || t1 == LowerTriangular) && elty1 != Int # Cannot really handle degenerate eigen space and Int matrices will probably have repeated eigenvalues.
-                @test vecs*diagm(0 => vals)/vecs ≈ A1 atol=sqrt(eps(float(real(one(vals[1])))))*(opnorm(A1,Inf)*n)^2
-            end
-        end
-
-        # Condition number tests - can be VERY approximate
-        if elty1 <:BlasFloat
-            for p in (1.0, Inf)
-                @test cond(A1,p) ≈ cond(A1,p) atol=(cond(A1,p)+cond(A1,p))
-            end
-            @test cond(A1,2) == cond(Matrix(A1),2)
-        end
-
-        if !(elty1 in (BigFloat, Complex{BigFloat})) # Not implemented yet
-            svd(A1)
-            elty1 <: BlasFloat && svd!(copy(A1))
-            svdvals(A1)
-        end
-
-        @test ((A1*A1)::t1) ≈ Matrix(A1) * Matrix(A1)
-        @test ((A1/A1)::t1) ≈ Matrix(A1) / Matrix(A1)
-        @test ((A1\A1)::t1) ≈ Matrix(A1) \ Matrix(A1)
-
-        # Begin loop for second Triangular matrix
-        for elty2 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-            for (t2, uplo2) in ((UpperTriangular, :U),
-                                (UnitUpperTriangular, :U),
-                                (LowerTriangular, :L),
-                                (UnitLowerTriangular, :L))
-
-                debug && println("elty1: $elty1, A1: $t1, elty2: $elty2")
-
-                A2 = t2(elty2 == Int ? rand(1:7, n, n) : convert(Matrix{elty2}, (elty2 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo2 === :U ? t : copy(t')))
-
-                # Convert
-                if elty1 <: Real && !(elty2 <: Integer)
-                    @test convert(AbstractMatrix{elty2}, A1) == t1(convert(Matrix{elty2}, A1.data))
-                elseif elty2 <: Real && !(elty1 <: Integer)
-                    @test_throws InexactError convert(AbstractMatrix{elty2}, A1) == t1(convert(Matrix{elty2}, A1.data))
-                end
-
-                # Binary operations
-                @test A1 + A2 == Matrix(A1) + Matrix(A2)
-                @test A1 - A2 == Matrix(A1) - Matrix(A2)
-
-                # Triangular-Triangular multiplication and division
-                @test A1*A2 ≈ Matrix(A1)*Matrix(A2)
-                @test transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                @test transpose(A1)*adjoint(A2) ≈ transpose(Matrix(A1))*adjoint(Matrix(A2))
-                @test adjoint(A1)*transpose(A2) ≈ adjoint(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2 ≈ Matrix(A1)'Matrix(A2)
-                @test A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                @test A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                @test transpose(A1)*transpose(A2) ≈ transpose(Matrix(A1))*transpose(Matrix(A2))
-                @test A1'A2' ≈ Matrix(A1)'Matrix(A2)'
-                @test A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                @test A1\A2 ≈ Matrix(A1)\Matrix(A2)
-                if uplo1 === :U && uplo2 === :U
-                    if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
-                        @test A1*A2 isa UnitUpperTriangular
-                        @test A1/A2 isa UnitUpperTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UnitUpperTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
-                    else
-                        @test A1*A2 isa UpperTriangular
-                        @test A1/A2 isa UpperTriangular
-                        elty1 == Int && elty2 == Int && t2 === UnitUpperTriangular && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UpperTriangular
-                        elty1 == Int && elty2 == Int && t1 === UnitUpperTriangular && @test eltype(A1\A2) == Int
-                    end
-                elseif uplo1 === :L && uplo2 === :L
-                    if t1 === UnitLowerTriangular && t2 === UnitLowerTriangular
-                        @test A1*A2 isa UnitLowerTriangular
-                        @test A1/A2 isa UnitLowerTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa UnitLowerTriangular
-                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
-                    else
-                        @test A1*A2 isa LowerTriangular
-                        @test A1/A2 isa LowerTriangular
-                        elty1 == Int && elty2 == Int && t2 === UnitLowerTriangular && @test eltype(A1/A2) == Int
-                        @test A1\A2 isa LowerTriangular
-                        elty1 == Int && elty2 == Int && t1 === UnitLowerTriangular && @test eltype(A1\A2) == Int
-                    end
-                end
-                offsizeA = Matrix{Float64}(I, n+1, n+1)
-                @test_throws DimensionMismatch offsizeA / A2
-                @test_throws DimensionMismatch offsizeA / transpose(A2)
-                @test_throws DimensionMismatch offsizeA / A2'
-                @test_throws DimensionMismatch offsizeA * A2
-                @test_throws DimensionMismatch offsizeA * transpose(A2)
-                @test_throws DimensionMismatch offsizeA * A2'
-                @test_throws DimensionMismatch transpose(A2) * offsizeA
-                @test_throws DimensionMismatch A2'  * offsizeA
-                @test_throws DimensionMismatch A2   * offsizeA
-                if (uplo1 == uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rdiv!(copy(A1), copy(A2))::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                    @test ldiv!(copy(A2), copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
-                end
-                if (uplo1 != uplo2 && elty1 == elty2 != Int && t2 != UnitLowerTriangular && t2 != UnitUpperTriangular)
-                    @test lmul!(adjoint(copy(A1)), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
-                    @test lmul!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                    @test ldiv!(adjoint(copy(A1)), copy(A2)) ≈ A1'\A2 ≈ Matrix(A1)'\Matrix(A2)
-                    @test ldiv!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(Matrix(A1))\Matrix(A2)
-                end
-                if (uplo1 != uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rmul!(copy(A1), adjoint(copy(A2))) ≈ A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                    @test rmul!(copy(A1), transpose(copy(A2))) ≈ A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                    @test rdiv!(copy(A1), adjoint(copy(A2))) ≈ A1/A2' ≈ Matrix(A1)/Matrix(A2)'
-                    @test rdiv!(copy(A1), transpose(copy(A2))) ≈ A1/transpose(A2) ≈ Matrix(A1)/transpose(Matrix(A2))
-                end
-            end
-        end
-
-        for eltyB in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
-            B = convert(Matrix{eltyB}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n, n))
-
-            debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
-
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
-            mul!(C, Tri, copy(A1))
-            @test C ≈ Tri*Matrix(A1)
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            mul!(C, copy(A1), Tri)
-            @test C ≈ Matrix(A1)*Tri
-
-            # Triangular-dense Matrix/vector multiplication
-            @test A1*B[:,1] ≈ Matrix(A1)*B[:,1]
-            @test A1*B ≈ Matrix(A1)*B
-            @test transpose(A1)*B[:,1] ≈ transpose(Matrix(A1))*B[:,1]
-            @test A1'B[:,1] ≈ Matrix(A1)'B[:,1]
-            @test transpose(A1)*B ≈ transpose(Matrix(A1))*B
-            @test A1'B ≈ Matrix(A1)'B
-            @test A1*transpose(B) ≈ Matrix(A1)*transpose(B)
-            @test adjoint(A1)*transpose(B) ≈ Matrix(A1)'*transpose(B)
-            @test transpose(A1)*adjoint(B) ≈ transpose(Matrix(A1))*adjoint(B)
-            @test A1*B' ≈ Matrix(A1)*B'
-            @test B*A1 ≈ B*Matrix(A1)
-            @test transpose(B[:,1])*A1 ≈ transpose(B[:,1])*Matrix(A1)
-            @test B[:,1]'A1 ≈ B[:,1]'Matrix(A1)
-            @test transpose(B)*A1 ≈ transpose(B)*Matrix(A1)
-            @test transpose(B)*adjoint(A1) ≈ transpose(B)*Matrix(A1)'
-            @test adjoint(B)*transpose(A1) ≈ adjoint(B)*transpose(Matrix(A1))
-            @test B'A1 ≈ B'Matrix(A1)
-            @test B*transpose(A1) ≈ B*transpose(Matrix(A1))
-            @test B*A1' ≈ B*Matrix(A1)'
-            @test transpose(B[:,1])*transpose(A1) ≈ transpose(B[:,1])*transpose(Matrix(A1))
-            @test B[:,1]'A1' ≈ B[:,1]'Matrix(A1)'
-            @test transpose(B)*transpose(A1) ≈ transpose(B)*transpose(Matrix(A1))
-            @test B'A1' ≈ B'Matrix(A1)'
-
-            if eltyB == elty1
-                @test mul!(similar(B), A1, B) ≈ Matrix(A1)*B
-                @test mul!(similar(B), A1, adjoint(B)) ≈ Matrix(A1)*B'
-                @test mul!(similar(B), A1, transpose(B)) ≈ Matrix(A1)*transpose(B)
-                @test mul!(similar(B), adjoint(A1), adjoint(B)) ≈ Matrix(A1)'*B'
-                @test mul!(similar(B), transpose(A1), transpose(B)) ≈ transpose(Matrix(A1))*transpose(B)
-                @test mul!(similar(B), transpose(A1), adjoint(B)) ≈ transpose(Matrix(A1))*B'
-                @test mul!(similar(B), adjoint(A1), transpose(B)) ≈ Matrix(A1)'*transpose(B)
-                @test mul!(similar(B), adjoint(A1), B) ≈ Matrix(A1)'*B
-                @test mul!(similar(B), transpose(A1), B) ≈ transpose(Matrix(A1))*B
-                # test also vector methods
-                B1 = vec(B[1,:])
-                @test mul!(similar(B1), A1, B1)  ≈ Matrix(A1)*B1
-                @test mul!(similar(B1), adjoint(A1), B1) ≈ Matrix(A1)'*B1
-                @test mul!(similar(B1), transpose(A1), B1) ≈ transpose(Matrix(A1))*B1
-            end
-            #error handling
-            Ann, Bmm, bm = A1, Matrix{eltyB}(undef, n+1, n+1), Vector{eltyB}(undef, n+1)
-            @test_throws DimensionMismatch lmul!(Ann, bm)
-            @test_throws DimensionMismatch rmul!(Bmm, Ann)
-            @test_throws DimensionMismatch lmul!(transpose(Ann), bm)
-            @test_throws DimensionMismatch lmul!(adjoint(Ann), bm)
-            @test_throws DimensionMismatch rmul!(Bmm, adjoint(Ann))
-            @test_throws DimensionMismatch rmul!(Bmm, transpose(Ann))
-
-            # ... and division
-            @test A1\B[:,1] ≈ Matrix(A1)\B[:,1]
-            @test A1\B ≈ Matrix(A1)\B
-            @test transpose(A1)\B[:,1] ≈ transpose(Matrix(A1))\B[:,1]
-            @test A1'\B[:,1] ≈ Matrix(A1)'\B[:,1]
-            @test transpose(A1)\B ≈ transpose(Matrix(A1))\B
-            @test A1'\B ≈ Matrix(A1)'\B
-            @test A1\transpose(B) ≈ Matrix(A1)\transpose(B)
-            @test A1\B' ≈ Matrix(A1)\B'
-            @test transpose(A1)\transpose(B) ≈ transpose(Matrix(A1))\transpose(B)
-            @test A1'\B' ≈ Matrix(A1)'\B'
-            Ann, bm = A1, Vector{elty1}(undef,n+1)
-            @test_throws DimensionMismatch Ann\bm
-            @test_throws DimensionMismatch Ann'\bm
-            @test_throws DimensionMismatch transpose(Ann)\bm
-            if t1 == UpperTriangular || t1 == LowerTriangular
-                if elty1 === eltyB <: BlasFloat
-                    @test_throws LAPACKException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
-                else
-                    @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
-                end
-            end
-            @test B/A1 ≈ B/Matrix(A1)
-            @test B/transpose(A1) ≈ B/transpose(Matrix(A1))
-            @test B/A1' ≈ B/Matrix(A1)'
-            @test transpose(B)/A1 ≈ transpose(B)/Matrix(A1)
-            @test B'/A1 ≈ B'/Matrix(A1)
-            @test transpose(B)/transpose(A1) ≈ transpose(B)/transpose(Matrix(A1))
-            @test B'/A1' ≈ B'/Matrix(A1)'
-
-            # Error bounds
-            !(elty1 in (BigFloat, Complex{BigFloat})) && !(eltyB in (BigFloat, Complex{BigFloat})) && errorbounds(A1, A1\B, B)
-
-        end
-    end
-end
-
-# Matrix square root
-Atn = UpperTriangular([-1 1 2; 0 -2 2; 0 0 -3])
-Atp = UpperTriangular([1 1 2; 0 2 2; 0 0 3])
-Atu = UnitUpperTriangular([1 1 2; 0 1 2; 0 0 1])
-@test sqrt(Atn) |> t->t*t ≈ Atn
-@test sqrt(Atn) isa UpperTriangular
-@test typeof(sqrt(Atn)[1,1]) <: Complex
-@test sqrt(Atp) |> t->t*t ≈ Atp
-@test sqrt(Atp) isa UpperTriangular
-@test typeof(sqrt(Atp)[1,1]) <: Real
-@test typeof(sqrt(complex(Atp))[1,1]) <: Complex
-@test sqrt(Atu) |> t->t*t ≈ Atu
-@test sqrt(Atu) isa UnitUpperTriangular
-@test typeof(sqrt(Atu)[1,1]) <: Real
-@test typeof(sqrt(complex(Atu))[1,1]) <: Complex
-
-@testset "matrix square root quasi-triangular blockwise" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
-        A = schur(rand(T, 100, 100)^2).T
-        @test LinearAlgebra.sqrt_quasitriu(A; blockwidth=16)^2 ≈ A
-    end
-    n = 256
-    A = rand(ComplexF64, n, n)
-    U = schur(A).T
-    Ubig = Complex{BigFloat}.(U)
-    @test LinearAlgebra.sqrt_quasitriu(U; blockwidth=64) ≈ LinearAlgebra.sqrt_quasitriu(Ubig; blockwidth=64)
-end
-
-@testset "sylvester quasi-triangular blockwise" begin
-    @testset for T in (Float32, Float64, ComplexF32, ComplexF64), m in (15, 40), n in (15, 45)
-        A = schur(rand(T, m, m)).T
-        B = schur(rand(T, n, n)).T
-        C = randn(T, m, n)
-        Ccopy = copy(C)
-        X = LinearAlgebra._sylvester_quasitriu!(A, B, C; blockwidth=16)
-        @test X === C
-        @test A * X + X * B ≈ -Ccopy
-
-        @testset "test raise=false does not break recursion" begin
-            Az = zero(A)
-            Bz = zero(B)
-            C2 = copy(Ccopy)
-            @test_throws LAPACKException LinearAlgebra._sylvester_quasitriu!(Az, Bz, C2; blockwidth=16)
-            m == n || @test any(C2 .== Ccopy)  # recursion broken
-            C3 = copy(Ccopy)
-            X3 = LinearAlgebra._sylvester_quasitriu!(Az, Bz, C3; blockwidth=16, raise=false)
-            @test !any(X3 .== Ccopy)  # recursion not broken
-        end
-    end
-end
-
-@testset "check matrix logarithm type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
-    A = UpperTriangular(exp(triu(randn(elty, n, n))))
-    @inferred Union{typeof(A),typeof(complex(A))} log(A)
-    @test exp(Matrix(log(A))) ≈ A
-    if elty <: Real
-        @test typeof(log(A)) <: UpperTriangular{elty}
-        @test typeof(log(complex(A))) <: UpperTriangular{complex(elty)}
-        @test isreal(log(complex(A)))
-        @test log(complex(A)) ≈ log(A)
-    end
-
-    Au = UnitUpperTriangular(exp(triu(randn(elty, n, n), 1)))
-    @inferred Union{typeof(A),typeof(complex(A))} log(Au)
-    @test exp(Matrix(log(Au))) ≈ Au
-    if elty <: Real
-        @test typeof(log(Au)) <: UpperTriangular{elty}
-        @test typeof(log(complex(Au))) <: UpperTriangular{complex(elty)}
-        @test isreal(log(complex(Au)))
-        @test log(complex(Au)) ≈ log(Au)
-    end
-end
-
-Areal   = randn(n, n)/2
-Aimg    = randn(n, n)/2
-A2real  = randn(n, n)/2
-A2img   = randn(n, n)/2
-
-for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
-    A = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(Areal, Aimg) : Areal)
-    # a2 = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(a2real, a2img) : a2real)
-    εa = eps(abs(float(one(eltya))))
-
-    for eltyb in (Float32, Float64, ComplexF32, ComplexF64)
-        εb = eps(abs(float(one(eltyb))))
-        ε = max(εa,εb)
-
-        debug && println("\ntype of A: ", eltya, " type of b: ", eltyb, "\n")
-
-        debug && println("Solve upper triangular system")
-        Atri = UpperTriangular(lu(A).U) |> t -> eltya <: Complex && eltyb <: Real ? real(t) : t # Here the triangular matrix can't be too badly conditioned
-        b = convert(Matrix{eltyb}, Matrix(Atri)*fill(1., n, 2))
-        x = Matrix(Atri) \ b
-
-        debug && println("Test error estimates")
-        if eltya != BigFloat && eltyb != BigFloat
-            for i = 1:2
-                @test  norm(x[:,1] .- 1) <= errorbounds(UpperTriangular(A), x, b)[1][i]
-            end
-        end
-        debug && println("Test forward error [JIN 5705] if this is not a BigFloat")
-
-        x = Atri \ b
-        γ = n*ε/(1 - n*ε)
-        if eltya != BigFloat
-            bigA = big.(Atri)
-            x̂ = fill(1., n, 2)
-            for i = 1:size(b, 2)
-                @test norm(x̂[:,i] - x[:,i], Inf)/norm(x̂[:,i], Inf) <= condskeel(bigA, x̂[:,i])*γ/(1 - condskeel(bigA)*γ)
-            end
-        end
-
-        debug && println("Test backward error [JIN 5705]")
-        for i = 1:size(b, 2)
-            @test norm(abs.(b[:,i] - Atri*x[:,i]), Inf) <= γ * norm(Atri, Inf) * norm(x[:,i], Inf)
-        end
-
-        debug && println("Solve lower triangular system")
-        Atri = UpperTriangular(lu(A).U) |> t -> eltya <: Complex && eltyb <: Real ? real(t) : t # Here the triangular matrix can't be too badly conditioned
-        b = convert(Matrix{eltyb}, Matrix(Atri)*fill(1., n, 2))
-        x = Matrix(Atri)\b
-
-        debug && println("Test error estimates")
-        if eltya != BigFloat && eltyb != BigFloat
-            for i = 1:2
-                @test  norm(x[:,1] .- 1) <= errorbounds(UpperTriangular(A), x, b)[1][i]
-            end
-        end
-
-        debug && println("Test forward error [JIN 5705] if this is not a BigFloat")
-        b = (b0 = Atri*fill(1, n, 2); convert(Matrix{eltyb}, eltyb == Int ? trunc.(b0) : b0))
-        x = Atri \ b
-        γ = n*ε/(1 - n*ε)
-        if eltya != BigFloat
-            bigA = big.(Atri)
-            x̂ = fill(1., n, 2)
-            for i = 1:size(b, 2)
-                @test norm(x̂[:,i] - x[:,i], Inf)/norm(x̂[:,i], Inf) <= condskeel(bigA, x̂[:,i])*γ/(1 - condskeel(bigA)*γ)
-            end
-        end
-
-        debug && println("Test backward error [JIN 5705]")
-        for i = 1:size(b, 2)
-            @test norm(abs.(b[:,i] - Atri*x[:,i]), Inf) <= γ * norm(Atri, Inf) * norm(x[:,i], Inf)
-        end
-    end
-end
-
-# Issue 10742 and similar
-@test istril(UpperTriangular(diagm(0 => [1,2,3,4])))
-@test istriu(LowerTriangular(diagm(0 => [1,2,3,4])))
-@test isdiag(UpperTriangular(diagm(0 => [1,2,3,4])))
-@test isdiag(LowerTriangular(diagm(0 => [1,2,3,4])))
-@test !isdiag(UpperTriangular(rand(4, 4)))
-@test !isdiag(LowerTriangular(rand(4, 4)))
-
-# Test throwing in fallbacks for non BlasFloat/BlasComplex in A_rdiv_Bx!
-let n = 5
-    A = rand(Float16, n, n)
-    B = rand(Float16, n-1, n-1)
-    @test_throws DimensionMismatch rdiv!(A, LowerTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UpperTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UnitLowerTriangular(B))
-    @test_throws DimensionMismatch rdiv!(A, UnitUpperTriangular(B))
-
-    @test_throws DimensionMismatch rdiv!(A, adjoint(LowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UpperTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UnitLowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, adjoint(UnitUpperTriangular(B)))
-
-    @test_throws DimensionMismatch rdiv!(A, transpose(LowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UpperTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UnitLowerTriangular(B)))
-    @test_throws DimensionMismatch rdiv!(A, transpose(UnitUpperTriangular(B)))
-end
-
-@test isdiag(LowerTriangular(UpperTriangular(randn(3,3))))
-@test isdiag(UpperTriangular(LowerTriangular(randn(3,3))))
-
-# Issue 16196
-@test UpperTriangular(Matrix(1.0I, 3, 3)) \ view(fill(1., 3), [1,2,3]) == fill(1., 3)
-
-# dimensional correctness:
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
-using .Main.Furlongs
-LinearAlgebra.sylvester(a::Furlong,b::Furlong,c::Furlong) = -c / (a + b)
-
-@testset "dimensional correctness" begin
-    A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(A)::UpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
-    @test inv(A)::UpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
-    B = UnitUpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(B)::UnitUpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
-    @test inv(B)::UnitUpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
-    b = [Furlong(5), Furlong(8)]
-    @test (A \ b)::Vector{<:Furlong{0}} == (B \ b)::Vector{<:Furlong{0}} == Furlong{0}.([-27, 8])
-    C = LowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
-    @test sqrt(C)::LowerTriangular == Furlong{1//2}.(LowerTriangular([1 0; 2 1]))
-    @test inv(C)::LowerTriangular == Furlong{-1}.(LowerTriangular([1 0; -4 1]))
-    D = UnitLowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
-    @test sqrt(D)::UnitLowerTriangular == Furlong{1//2}.(UnitLowerTriangular([1 0; 2 1]))
-    @test inv(D)::UnitLowerTriangular == Furlong{-1}.(UnitLowerTriangular([1 0; -4 1]))
-    b = [Furlong(5), Furlong(8)]
-    @test (C \ b)::Vector{<:Furlong{0}} == (D \ b)::Vector{<:Furlong{0}} == Furlong{0}.([5, -12])
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "AbstractArray constructor should preserve underlying storage type" begin
-    # tests corresponding to #34995
-    local m = 4
-    local T, S = Float32, Float64
-    immutablemat = ImmutableArray(randn(T,m,m))
-    for TriType in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        trimat = TriType(immutablemat)
-        @test convert(AbstractArray{S}, trimat).data isa ImmutableArray{S}
-        @test convert(AbstractMatrix{S}, trimat).data isa ImmutableArray{S}
-        @test AbstractArray{S}(trimat).data isa ImmutableArray{S}
-        @test AbstractMatrix{S}(trimat).data isa ImmutableArray{S}
-        @test convert(AbstractArray{S}, trimat) == trimat
-        @test convert(AbstractMatrix{S}, trimat) == trimat
-    end
-end
-
-@testset "inplace mul of appropriate types should preserve triagular structure" begin
-    for elty1 in (Float64, ComplexF32), elty2 in (Float64, ComplexF32)
-        T = promote_type(elty1, elty2)
-        M1 = rand(elty1, 5, 5)
-        M2 = rand(elty2, 5, 5)
-        A = UpperTriangular(M1)
-        A2 = UpperTriangular(M2)
-        Au = UnitUpperTriangular(M1)
-        Au2 = UnitUpperTriangular(M2)
-        B = LowerTriangular(M1)
-        B2 = LowerTriangular(M2)
-        Bu = UnitLowerTriangular(M1)
-        Bu2 = UnitLowerTriangular(M2)
-
-        @test mul!(similar(A), A, A)::typeof(A) == A*A
-        @test mul!(similar(A, T), A, A2) ≈ A*A2
-        @test mul!(similar(A, T), A2, A) ≈ A2*A
-        @test mul!(typeof(similar(A, T))(A), A, A2, 2.0, 3.0) ≈ 2.0*A*A2 + 3.0*A
-        @test mul!(typeof(similar(A2, T))(A2), A2, A, 2.0, 3.0) ≈ 2.0*A2*A + 3.0*A2
-
-        @test mul!(similar(A), A, Au)::typeof(A) == A*Au
-        @test mul!(similar(A), Au, A)::typeof(A) == Au*A
-        @test mul!(similar(Au), Au, Au)::typeof(Au) == Au*Au
-        @test mul!(similar(A, T), A, Au2) ≈ A*Au2
-        @test mul!(similar(A, T), Au2, A) ≈ Au2*A
-        @test mul!(similar(Au2), Au2, Au2) == Au2*Au2
-
-        @test mul!(similar(B), B, B)::typeof(B) == B*B
-        @test mul!(similar(B, T), B, B2) ≈ B*B2
-        @test mul!(similar(B, T), B2, B) ≈ B2*B
-        @test mul!(typeof(similar(B, T))(B), B, B2, 2.0, 3.0) ≈ 2.0*B*B2 + 3.0*B
-        @test mul!(typeof(similar(B2, T))(B2), B2, B, 2.0, 3.0) ≈ 2.0*B2*B + 3.0*B2
-
-        @test mul!(similar(B), B, Bu)::typeof(B) == B*Bu
-        @test mul!(similar(B), Bu, B)::typeof(B) == Bu*B
-        @test mul!(similar(Bu), Bu, Bu)::typeof(Bu) == Bu*Bu
-        @test mul!(similar(B, T), B, Bu2) ≈ B*Bu2
-        @test mul!(similar(B, T), Bu2, B) ≈ Bu2*B
-    end
-end
-
-@testset "special printing of Lower/UpperTriangular" begin
-    @test occursin(r"3×3 (LinearAlgebra\.)?LowerTriangular{Int64, Matrix{Int64}}:\n 2  ⋅  ⋅\n 2  2  ⋅\n 2  2  2",
-                   sprint(show, MIME"text/plain"(), LowerTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UnitLowerTriangular{Int64, Matrix{Int64}}:\n 1  ⋅  ⋅\n 2  1  ⋅\n 2  2  1",
-                   sprint(show, MIME"text/plain"(), UnitLowerTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UpperTriangular{Int64, Matrix{Int64}}:\n 2  2  2\n ⋅  2  2\n ⋅  ⋅  2",
-                   sprint(show, MIME"text/plain"(), UpperTriangular(2ones(Int64,3,3))))
-    @test occursin(r"3×3 (LinearAlgebra\.)?UnitUpperTriangular{Int64, Matrix{Int64}}:\n 1  2  2\n ⋅  1  2\n ⋅  ⋅  1",
-                   sprint(show, MIME"text/plain"(), UnitUpperTriangular(2ones(Int64,3,3))))
-end
-
-@testset "adjoint/transpose triangular/vector multiplication" begin
-    for elty in (Float64, ComplexF64), trity in (UpperTriangular, LowerTriangular)
-        A1 = trity(rand(elty, 1, 1))
-        b1 = rand(elty, 1)
-        A4 = trity(rand(elty, 4, 4))
-        b4 = rand(elty, 4)
-        @test A1 * b1' ≈ Matrix(A1) * b1'
-        @test_throws DimensionMismatch A4 * b4'
-        @test A1 * transpose(b1) ≈ Matrix(A1) * transpose(b1)
-        @test_throws DimensionMismatch A4 * transpose(b4)
-        @test A1' * b1' ≈ Matrix(A1') * b1'
-        @test_throws DimensionMismatch A4' * b4'
-        @test A1' * transpose(b1) ≈  Matrix(A1') * transpose(b1)
-        @test_throws DimensionMismatch A4' * transpose(b4)
-        @test transpose(A1) * transpose(b1) ≈  Matrix(transpose(A1)) * transpose(b1)
-        @test_throws DimensionMismatch transpose(A4) * transpose(b4)
-        @test transpose(A1) * b1' ≈ Matrix(transpose(A1)) * b1'
-        @test_throws DimensionMismatch transpose(A4) * b4'
-        @test b1' * transpose(A1) ≈ b1' * Matrix(transpose(A1))
-        @test b4' * transpose(A4) ≈ b4' * Matrix(transpose(A4))
-        @test transpose(b1) * A1' ≈ transpose(b1) * Matrix(A1')
-        @test transpose(b4) * A4' ≈ transpose(b4) * Matrix(A4')
-    end
-end
-
-@testset "Error condition for powm" begin
-    A = UpperTriangular(rand(ComplexF64, 10, 10))
-    @test_throws ArgumentError LinearAlgebra.powm!(A, 2.2)
-    A = LowerTriangular(rand(ComplexF64, 10, 10))
-    At = copy(transpose(A))
-    p = rand()
-    @test LinearAlgebra.powm(A, p) == transpose(LinearAlgebra.powm!(At, p))
-    @test_throws ArgumentError LinearAlgebra.powm(A, 2.2)
-end
-
-# Issue 35058
-let A = [0.9999999999999998 4.649058915617843e-16 -1.3149405273715513e-16 9.9959579317056e-17; -8.326672684688674e-16 1.0000000000000004 2.9280733590254494e-16 -2.9993900031619594e-16; 9.43689570931383e-16 -1.339206523454095e-15 1.0000000000000007 -8.550505126287743e-16; -6.245004513516506e-16 -2.0122792321330962e-16 1.183061278035052e-16 1.0000000000000002],
-    B = [0.09648289218436859 0.023497875751503007 0.0 0.0; 0.023497875751503007 0.045787575150300804 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]
-    @test sqrt(A*B*A')^2 ≈ A*B*A'
-end
-
-@testset "one and oneunit for triangular" begin
-    m = rand(4,4)
-    function test_one_oneunit_triangular(a)
-        b = Matrix(a)
-        @test (@inferred a^1) == b^1
-        @test (@inferred a^-1) == b^-1
-        @test one(a) == one(b)
-        @test one(a)*a == a
-        @test a*one(a) == a
-        @test oneunit(a) == oneunit(b)
-        @test oneunit(a) isa typeof(a)
-    end
-    for T in [UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular]
-        a = T(m)
-        test_one_oneunit_triangular(a)
-    end
-    # more complicated examples
-    b = UpperTriangular(LowerTriangular(m))
-    test_one_oneunit_triangular(b)
-    c = UpperTriangular(Diagonal(rand(2)))
-    test_one_oneunit_triangular(c)
-end
-
-@testset "LowerTriangular(Diagonal(...)) and friends (issue #28869)" begin
-    for elty in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
-        V = elty ≡ Int ? rand(1:10, 5) : elty.(randn(5))
-        D = Diagonal(V)
-        for dty in (UpperTriangular, LowerTriangular)
-            A = dty(D)
-            @test A * A' == D * D'
-        end
-    end
-end
-
-end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/trickyarithmetic.jl b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
deleted file mode 100644
index ad04ac89c2761..0000000000000
--- a/stdlib/LinearAlgebra/test/trickyarithmetic.jl
+++ /dev/null
@@ -1,66 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TrickyArithmetic
-    struct A
-        x::Int
-    end
-    A(a::A) = a
-    Base.convert(::Type{A}, i::Int) = A(i)
-    Base.zero(::Union{A, Type{A}}) = A(0)
-    Base.one(::Union{A, Type{A}}) = A(1)
-    Base.isfinite(a::A) = isfinite(a.x)
-    struct B
-        x::Int
-    end
-    struct C
-        x::Int
-    end
-    Base.isfinite(b::B) = isfinite(b.x)
-    Base.isfinite(c::C) = isfinite(c.x)
-    C(a::A) = C(a.x)
-    Base.zero(::Union{C, Type{C}}) = C(0)
-    Base.one(::Union{C, Type{C}}) = C(1)
-
-    Base.:(*)(x::Int, a::A) = B(x*a.x)
-    Base.:(*)(a::A, x::Int) = B(a.x*x)
-    Base.:(*)(a::Union{A,B}, b::Union{A,B}) = B(a.x*b.x)
-    Base.:(*)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x*b.x)
-    Base.:(+)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x+b.x)
-    Base.:(-)(a::Union{A,B,C}, b::Union{A,B,C}) = C(a.x-b.x)
-
-    struct D{NT, DT}
-        n::NT
-        d::DT
-    end
-    D{NT, DT}(d::D{NT, DT}) where {NT, DT} = d # called by oneunit
-    Base.zero(::Union{D{NT, DT}, Type{D{NT, DT}}}) where {NT, DT} = zero(NT) / one(DT)
-    Base.one(::Union{D{NT, DT}, Type{D{NT, DT}}}) where {NT, DT} = one(NT) / one(DT)
-    Base.convert(::Type{D{NT, DT}}, a::Union{A, B, C}) where {NT, DT} = NT(a) / one(DT)
-    #Base.convert(::Type{D{NT, DT}}, a::D) where {NT, DT} = NT(a.n) / DT(a.d)
-
-    Base.:(*)(a::D, b::D) = (a.n*b.n) / (a.d*b.d)
-    Base.:(*)(a::D, b::Union{A,B,C}) = (a.n * b) / a.d
-    Base.:(*)(a::Union{A,B,C}, b::D) = b * a
-    Base.inv(a::Union{A,B,C}) = A(1) / a
-    Base.inv(a::D) = a.d / a.n
-    Base.isfinite(a::D) = isfinite(a.n) && isfinite(a.d)
-    Base.:(/)(a::Union{A,B,C}, b::Union{A,B,C}) = D(a, b)
-    Base.:(/)(a::D, b::Union{A,B,C}) = a.n / (a.d*b)
-    Base.:(/)(a::Union{A,B,C,D}, b::D) = a * inv(b)
-    Base.:(+)(a::Union{A,B,C}, b::D) = (a*b.d+b.n) / b.d
-    Base.:(+)(a::D, b::Union{A,B,C}) = b + a
-    Base.:(+)(a::D, b::D) = (a.n*b.d+a.d*b.n) / (a.d*b.d)
-    Base.:(-)(a::Union{A,B,C}) = typeof(a)(a.x)
-    Base.:(-)(a::D) = (-a.n) / a.d
-    Base.:(-)(a::Union{A,B,C,D}, b::Union{A,B,C,D}) = a + (-b)
-
-    Base.promote_rule(::Type{A}, ::Type{B}) = B
-    Base.promote_rule(::Type{B}, ::Type{A}) = B
-    Base.promote_rule(::Type{A}, ::Type{C}) = C
-    Base.promote_rule(::Type{C}, ::Type{A}) = C
-    Base.promote_rule(::Type{B}, ::Type{C}) = C
-    Base.promote_rule(::Type{C}, ::Type{B}) = C
-    Base.promote_rule(::Type{D{NT,DT}}, T::Type{<:Union{A,B,C}}) where {NT,DT} = D{promote_type(NT,T),DT}
-    Base.promote_rule(T::Type{<:Union{A,B,C}}, ::Type{D{NT,DT}}) where {NT,DT} = D{promote_type(NT,T),DT}
-    Base.promote_rule(::Type{D{NS,DS}}, ::Type{D{NT,DT}}) where {NS,DS,NT,DT} = D{promote_type(NS,NT),promote_type(DS,DT)}
-end
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
deleted file mode 100644
index d4b2dd5e3f269..0000000000000
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ /dev/null
@@ -1,802 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestTridiagonal
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
-using .Main.InfiniteArrays
-
-isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
-using .Main.FillArrays
-
-include("testutils.jl") # test_approx_eq_modphase
-
-#Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
-function test_approx_eq_vecs(a::StridedVecOrMat{S}, b::StridedVecOrMat{T}, error=nothing) where {S<:Real,T<:Real}
-    n = size(a, 1)
-    @test n==size(b,1) && size(a,2)==size(b,2)
-    error===nothing && (error=n^3*(eps(S)+eps(T)))
-    for i=1:n
-        ev1, ev2 = a[:,i], b[:,i]
-        deviation = min(abs(norm(ev1-ev2)),abs(norm(ev1+ev2)))
-        if !isnan(deviation)
-            @test deviation ≈ 0.0 atol=error
-        end
-    end
-end
-
-@testset for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
-    n = 12 #Size of matrix problem to test
-    Random.seed!(123)
-    if elty == Int
-        Random.seed!(61516384)
-        d = rand(1:100, n)
-        dl = -rand(0:10, n-1)
-        du = -rand(0:10, n-1)
-        v = rand(1:100, n)
-        B = rand(1:100, n, 2)
-        a = rand(1:100, n-1)
-        b = rand(1:100, n)
-        c = rand(1:100, n-1)
-    else
-        d = convert(Vector{elty}, 1 .+ randn(n))
-        dl = convert(Vector{elty}, randn(n - 1))
-        du = convert(Vector{elty}, randn(n - 1))
-        v = convert(Vector{elty}, randn(n))
-        B = convert(Matrix{elty}, randn(n, 2))
-        a = convert(Vector{elty}, randn(n - 1))
-        b = convert(Vector{elty}, randn(n))
-        c = convert(Vector{elty}, randn(n - 1))
-        if elty <: Complex
-            a += im*convert(Vector{elty}, randn(n - 1))
-            b += im*convert(Vector{elty}, randn(n))
-            c += im*convert(Vector{elty}, randn(n - 1))
-        end
-    end
-    @test_throws DimensionMismatch SymTridiagonal(dl, fill(elty(1), n+1))
-    @test_throws ArgumentError SymTridiagonal(rand(n, n))
-    @test_throws ArgumentError Tridiagonal(dl, dl, dl)
-    @test_throws ArgumentError convert(SymTridiagonal{elty}, Tridiagonal(dl, d, du))
-
-    if elty != Int
-        @testset "issue #1490" begin
-            @test det(fill(elty(1),3,3)) ≈ zero(elty) atol=3*eps(real(one(elty)))
-            @test det(SymTridiagonal(elty[],elty[])) == one(elty)
-        end
-    end
-
-    @testset "constructor" begin
-        for (x, y) in ((d, dl), (GenericArray(d), GenericArray(dl)))
-            ST = (SymTridiagonal(x, y))::SymTridiagonal{elty, typeof(x)}
-            @test ST == Matrix(ST)
-            @test ST.dv === x
-            @test ST.ev === y
-            @test typeof(ST)(ST) === ST
-            TT = (Tridiagonal(y, x, y))::Tridiagonal{elty, typeof(x)}
-            @test TT == Matrix(TT)
-            @test TT.dl === y
-            @test TT.d  === x
-            @test TT.du === y
-            @test typeof(TT)(TT) === TT
-        end
-        ST = SymTridiagonal{elty}([1,2,3,4], [1,2,3])
-        @test eltype(ST) == elty
-        @test SymTridiagonal{elty, Vector{elty}}(ST) === ST
-        @test SymTridiagonal{Int64, Vector{Int64}}(ST) isa SymTridiagonal{Int64, Vector{Int64}}
-        TT = Tridiagonal{elty}([1,2,3], [1,2,3,4], [1,2,3])
-        @test eltype(TT) == elty
-        ST = SymTridiagonal{elty,Vector{elty}}(d, GenericArray(dl))
-        @test isa(ST, SymTridiagonal{elty,Vector{elty}})
-        TT = Tridiagonal{elty,Vector{elty}}(GenericArray(dl), d, GenericArray(dl))
-        @test isa(TT, Tridiagonal{elty,Vector{elty}})
-        @test_throws MethodError SymTridiagonal(d, GenericArray(dl))
-        @test_throws MethodError SymTridiagonal(GenericArray(d), dl)
-        @test_throws MethodError Tridiagonal(GenericArray(dl), d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal(dl, GenericArray(d), dl)
-        @test_throws MethodError SymTridiagonal{elty}(d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal{elty}(GenericArray(dl), d,GenericArray(dl))
-        STI = SymTridiagonal([1,2,3,4], [1,2,3])
-        TTI = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3])
-        TTI2 = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3], [1,2])
-        @test SymTridiagonal(STI) === STI
-        @test Tridiagonal(TTI)    === TTI
-        @test Tridiagonal(TTI2)   === TTI2
-        @test isa(SymTridiagonal{elty}(STI), SymTridiagonal{elty})
-        @test isa(Tridiagonal{elty}(TTI), Tridiagonal{elty})
-        TTI2y = Tridiagonal{elty}(TTI2)
-        @test isa(TTI2y, Tridiagonal{elty})
-        @test TTI2y.du2 == convert(Vector{elty}, [1,2])
-    end
-    @testset "interconversion of Tridiagonal and SymTridiagonal" begin
-        @test Tridiagonal(dl, d, dl) == SymTridiagonal(d, dl)
-        @test SymTridiagonal(d, dl) == Tridiagonal(dl, d, dl)
-        @test Tridiagonal(dl, d, du) + Tridiagonal(du, d, dl) == SymTridiagonal(2d, dl+du)
-        @test SymTridiagonal(d, dl) + Tridiagonal(dl, d, du) == Tridiagonal(dl + dl, d+d, dl+du)
-        @test convert(SymTridiagonal,Tridiagonal(SymTridiagonal(d, dl))) == SymTridiagonal(d, dl)
-        @test Array(convert(SymTridiagonal{ComplexF32},Tridiagonal(SymTridiagonal(d, dl)))) == convert(Matrix{ComplexF32}, SymTridiagonal(d, dl))
-    end
-    @testset "tril/triu" begin
-        zerosd = fill!(similar(d), 0)
-        zerosdl = fill!(similar(dl), 0)
-        zerosdu = fill!(similar(du), 0)
-        @test_throws ArgumentError tril!(SymTridiagonal(d, dl), -n - 2)
-        @test_throws ArgumentError tril!(SymTridiagonal(d, dl), n)
-        @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), -n - 2)
-        @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), n)
-        @test tril(SymTridiagonal(d,dl))    == Tridiagonal(dl,d,zerosdl)
-        @test tril(SymTridiagonal(d,dl),1)  == Tridiagonal(dl,d,dl)
-        @test tril(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,zerosd,zerosdl)
-        @test tril(SymTridiagonal(d,dl),-2) == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test tril(Tridiagonal(dl,d,du))    == Tridiagonal(dl,d,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),1)  == Tridiagonal(dl,d,du)
-        @test tril(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,zerosd,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),-2) == Tridiagonal(zerosdl,zerosd,zerosdu)
-
-        @test_throws ArgumentError triu!(SymTridiagonal(d, dl), -n)
-        @test_throws ArgumentError triu!(SymTridiagonal(d, dl), n + 2)
-        @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), -n)
-        @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), n + 2)
-        @test triu(SymTridiagonal(d,dl))    == Tridiagonal(zerosdl,d,dl)
-        @test triu(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,d,dl)
-        @test triu(SymTridiagonal(d,dl),1)  == Tridiagonal(zerosdl,zerosd,dl)
-        @test triu(SymTridiagonal(d,dl),2)  == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test triu(Tridiagonal(dl,d,du))    == Tridiagonal(zerosdl,d,du)
-        @test triu(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,d,du)
-        @test triu(Tridiagonal(dl,d,du),1)  == Tridiagonal(zerosdl,zerosd,du)
-        @test triu(Tridiagonal(dl,d,du),2)  == Tridiagonal(zerosdl,zerosd,zerosdu)
-
-        @test !istril(SymTridiagonal(d,dl))
-        @test istril(SymTridiagonal(d,zerosdl))
-        @test !istril(SymTridiagonal(d,dl),-2)
-        @test !istriu(SymTridiagonal(d,dl))
-        @test istriu(SymTridiagonal(d,zerosdl))
-        @test !istriu(SymTridiagonal(d,dl),2)
-        @test istriu(Tridiagonal(zerosdl,d,du))
-        @test !istriu(Tridiagonal(dl,d,zerosdu))
-        @test istriu(Tridiagonal(zerosdl,zerosd,du),1)
-        @test !istriu(Tridiagonal(dl,d,zerosdu),2)
-        @test istril(Tridiagonal(dl,d,zerosdu))
-        @test !istril(Tridiagonal(zerosdl,d,du))
-        @test istril(Tridiagonal(dl,zerosd,zerosdu),-1)
-        @test !istril(Tridiagonal(dl,d,zerosdu),-2)
-
-        @test isdiag(SymTridiagonal(d,zerosdl))
-        @test !isdiag(SymTridiagonal(d,dl))
-        @test isdiag(Tridiagonal(zerosdl,d,zerosdu))
-        @test !isdiag(Tridiagonal(dl,d,zerosdu))
-        @test !isdiag(Tridiagonal(zerosdl,d,du))
-        @test !isdiag(Tridiagonal(dl,d,du))
-
-        # Test methods that could fail due to dv and ev having the same length
-        # see #41089
-
-        badev = zero(d)
-        badev[end] = 1
-        S = SymTridiagonal(d, badev)
-
-        @test istriu(S, -2)
-        @test istriu(S, 0)
-        @test !istriu(S, 2)
-
-        @test isdiag(S)
-    end
-
-    @testset "iszero and isone" begin
-        Tzero = Tridiagonal(zeros(elty, 9), zeros(elty, 10), zeros(elty, 9))
-        Tone = Tridiagonal(zeros(elty, 9), ones(elty, 10), zeros(elty, 9))
-        Tmix = Tridiagonal(zeros(elty, 9), zeros(elty, 10), zeros(elty, 9))
-        Tmix[end, end] = one(elty)
-
-        Szero = SymTridiagonal(zeros(elty, 10), zeros(elty, 9))
-        Sone = SymTridiagonal(ones(elty, 10), zeros(elty, 9))
-        Smix = SymTridiagonal(zeros(elty, 10), zeros(elty, 9))
-        Smix[end, end] = one(elty)
-
-        @test iszero(Tzero)
-        @test !isone(Tzero)
-        @test !iszero(Tone)
-        @test isone(Tone)
-        @test !iszero(Tmix)
-        @test !isone(Tmix)
-
-        @test iszero(Szero)
-        @test !isone(Szero)
-        @test !iszero(Sone)
-        @test isone(Sone)
-        @test !iszero(Smix)
-        @test !isone(Smix)
-
-        badev = zeros(elty, 3)
-        badev[end] = 1
-
-        @test isone(SymTridiagonal(ones(elty, 3), badev))
-        @test iszero(SymTridiagonal(zeros(elty, 3), badev))
-    end
-
-    @testset for mat_type in (Tridiagonal, SymTridiagonal)
-        A = mat_type == Tridiagonal ? mat_type(dl, d, du) : mat_type(d, dl)
-        fA = map(elty <: Complex ? ComplexF64 : Float64, Array(A))
-        @testset "similar, size, and copyto!" begin
-            B = similar(A)
-            @test size(B) == size(A)
-            copyto!(B, A)
-            @test B == A
-            @test isa(similar(A), mat_type{elty})
-            @test isa(similar(A, Int), mat_type{Int})
-            @test isa(similar(A, (3, 2)), Matrix)
-            @test isa(similar(A, Int, (3, 2)), Matrix{Int})
-            @test size(A, 3) == 1
-            @test size(A, 1) == n
-            @test size(A) == (n, n)
-            @test_throws ArgumentError size(A, 0)
-        end
-        @testset "getindex" begin
-            @test_throws BoundsError A[n + 1, 1]
-            @test_throws BoundsError A[1, n + 1]
-            @test A[1, n] == convert(elty, 0.0)
-            @test A[1, 1] == d[1]
-        end
-        @testset "setindex!" begin
-            @test_throws BoundsError A[n + 1, 1] = 0 # test bounds check
-            @test_throws BoundsError A[1, n + 1] = 0 # test bounds check
-            @test_throws ArgumentError A[1, 3]   = 1 # test assignment off the main/sub/super diagonal
-            if mat_type == Tridiagonal
-                @test (A[3, 3] = A[3, 3]; A == fA) # test assignment on the main diagonal
-                @test (A[3, 2] = A[3, 2]; A == fA) # test assignment on the subdiagonal
-                @test (A[2, 3] = A[2, 3]; A == fA) # test assignment on the superdiagonal
-                @test ((A[1, 3] = 0) == 0; A == fA) # test zero assignment off the main/sub/super diagonal
-            else # mat_type is SymTridiagonal
-                @test ((A[3, 3] = A[3, 3]) == A[3, 3]; A == fA) # test assignment on the main diagonal
-                @test_throws ArgumentError A[3, 2] = 1 # test assignment on the subdiagonal
-                @test_throws ArgumentError A[2, 3] = 1 # test assignment on the superdiagonal
-            end
-        end
-        @testset "diag" begin
-            @test (@inferred diag(A))::typeof(d) == d
-            @test (@inferred diag(A, 0))::typeof(d) == d
-            @test (@inferred diag(A, 1))::typeof(d) == (mat_type == Tridiagonal ? du : dl)
-            @test (@inferred diag(A, -1))::typeof(d) == dl
-            @test (@inferred diag(A, n-1))::typeof(d) == zeros(elty, 1)
-            @test_throws ArgumentError diag(A, -n - 1)
-            @test_throws ArgumentError diag(A, n + 1)
-            GA = mat_type == Tridiagonal ? mat_type(GenericArray.((dl, d, du))...) : mat_type(GenericArray.((d, dl))...)
-            @test (@inferred diag(GA))::typeof(GenericArray(d)) == GenericArray(d)
-            @test (@inferred diag(GA, -1))::typeof(GenericArray(d)) == GenericArray(dl)
-        end
-        @testset "trace" begin
-            if real(elty) <: Integer
-                @test tr(A) == tr(fA)
-            else
-                @test tr(A) ≈ tr(fA) rtol=2eps(real(elty))
-            end
-        end
-        @testset "Idempotent tests" begin
-            for func in (conj, transpose, adjoint)
-                @test func(func(A)) == A
-            end
-        end
-        @testset "permutedims(::[Sym]Tridiagonal)" begin
-            @test permutedims(permutedims(A)) === A
-            @test permutedims(A) == transpose.(transpose(A))
-            @test permutedims(A, [1, 2]) === A
-            @test permutedims(A, (2, 1)) == permutedims(A)
-        end
-        if elty != Int
-            @testset "Simple unary functions" begin
-                for func in (det, inv)
-                    @test func(A) ≈ func(fA) atol=n^2*sqrt(eps(real(one(elty))))
-                end
-            end
-        end
-        ds = mat_type == Tridiagonal ? (dl, d, du) : (d, dl)
-        for f in (real, imag)
-            @test f(A)::mat_type == mat_type(map(f, ds)...)
-        end
-        if elty <: Real
-            for f in (round, trunc, floor, ceil)
-                fds = [f.(d) for d in ds]
-                @test f.(A)::mat_type == mat_type(fds...)
-                @test f.(Int, A)::mat_type == f.(Int, fA)
-            end
-        end
-        fds = [abs.(d) for d in ds]
-        @test abs.(A)::mat_type == mat_type(fds...)
-        @testset "Multiplication with strided matrix/vector" begin
-            @test (x = fill(1.,n); A*x ≈ Array(A)*x)
-            @test (X = fill(1.,n,2); A*X ≈ Array(A)*X)
-        end
-        @testset "Binary operations" begin
-            B = mat_type == Tridiagonal ? mat_type(a, b, c) : mat_type(b, a)
-            fB = map(elty <: Complex ? ComplexF64 : Float64, Array(B))
-            for op in (+, -, *)
-                @test Array(op(A, B)) ≈ op(fA, fB)
-            end
-            α = rand(elty)
-            @test Array(α*A) ≈ α*Array(A)
-            @test Array(A*α) ≈ Array(A)*α
-            @test Array(A/α) ≈ Array(A)/α
-
-            @testset "Matmul with Triangular types" begin
-                @test A*LinearAlgebra.UnitUpperTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*LinearAlgebra.UnitLowerTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*UpperTriangular(Matrix(1.0I, n, n)) ≈ fA
-                @test A*LowerTriangular(Matrix(1.0I, n, n)) ≈ fA
-            end
-            @testset "mul! errors" begin
-                Cnn, Cnm, Cmn = Matrix{elty}.(undef, ((n,n), (n,n+1), (n+1,n)))
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,A,Cnm)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,A,Cmn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnn,B,Cmn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cmn,B,Cnn)
-                @test_throws DimensionMismatch LinearAlgebra.mul!(Cnm,B,Cnn)
-            end
-        end
-        @testset "Negation" begin
-            mA = -A
-            @test mA isa mat_type
-            @test -mA == A
-        end
-        if mat_type == SymTridiagonal
-            @testset "Tridiagonal/SymTridiagonal mixing ops" begin
-                B = convert(Tridiagonal{elty}, A)
-                @test B == A
-                @test B + A == A + B
-                @test B - A == A - B
-            end
-            if elty <: LinearAlgebra.BlasReal
-                @testset "Eigensystems" begin
-                    zero, infinity = convert(elty, 0), convert(elty, Inf)
-                    @testset "stebz! and stein!" begin
-                        w, iblock, isplit = LAPACK.stebz!('V', 'B', -infinity, infinity, 0, 0, zero, b, a)
-                        evecs = LAPACK.stein!(b, a, w)
-
-                        (e, v) = eigen(SymTridiagonal(b, a))
-                        @test e ≈ w
-                        test_approx_eq_vecs(v, evecs)
-                    end
-                    @testset "stein! call using iblock and isplit" begin
-                        w, iblock, isplit = LAPACK.stebz!('V', 'B', -infinity, infinity, 0, 0, zero, b, a)
-                        evecs = LAPACK.stein!(b, a, w, iblock, isplit)
-                        test_approx_eq_vecs(v, evecs)
-                    end
-                    @testset "stegr! call with index range" begin
-                        F = eigen(SymTridiagonal(b, a),1:2)
-                        fF = eigen(Symmetric(Array(SymTridiagonal(b, a))),1:2)
-                        test_approx_eq_modphase(F.vectors, fF.vectors)
-                        @test F.values ≈ fF.values
-                    end
-                    @testset "stegr! call with value range" begin
-                        F = eigen(SymTridiagonal(b, a),0.0,1.0)
-                        fF = eigen(Symmetric(Array(SymTridiagonal(b, a))),0.0,1.0)
-                        test_approx_eq_modphase(F.vectors, fF.vectors)
-                        @test F.values ≈ fF.values
-                    end
-                    @testset "eigenvalues/eigenvectors of symmetric tridiagonal" begin
-                        if elty === Float32 || elty === Float64
-                            DT, VT = @inferred eigen(A)
-                            @inferred eigen(A, 2:4)
-                            @inferred eigen(A, 1.0, 2.0)
-                            D, Vecs = eigen(fA)
-                            @test DT ≈ D
-                            @test abs.(VT'Vecs) ≈ Matrix(elty(1)I, n, n)
-                            test_approx_eq_modphase(eigvecs(A), eigvecs(fA))
-                            #call to LAPACK.stein here
-                            test_approx_eq_modphase(eigvecs(A,eigvals(A)),eigvecs(A))
-                        elseif elty != Int
-                            # check that undef is determined accurately even if type inference
-                            # bails out due to the number of try/catch blocks in this code.
-                            @test_throws UndefVarError fA
-                        end
-                    end
-                end
-            end
-            if elty <: Real
-                Ts = SymTridiagonal(d, dl)
-                Fs = Array(Ts)
-                Tldlt = factorize(Ts)
-                @testset "symmetric tridiagonal" begin
-                    @test_throws DimensionMismatch Tldlt\rand(elty,n+1)
-                    @test size(Tldlt) == size(Ts)
-                    if elty <: AbstractFloat
-                        @test LinearAlgebra.LDLt{elty,SymTridiagonal{elty,Vector{elty}}}(Tldlt) === Tldlt
-                        @test LinearAlgebra.LDLt{elty}(Tldlt) === Tldlt
-                        @test typeof(convert(LinearAlgebra.LDLt{Float32,Matrix{Float32}},Tldlt)) ==
-                            LinearAlgebra.LDLt{Float32,Matrix{Float32}}
-                        @test typeof(convert(LinearAlgebra.LDLt{Float32},Tldlt)) ==
-                            LinearAlgebra.LDLt{Float32,SymTridiagonal{Float32,Vector{Float32}}}
-                    end
-                    for vv in (copy(v), view(v, 1:n))
-                        invFsv = Fs\vv
-                        x = Ts\vv
-                        @test x ≈ invFsv
-                        @test Array(Tldlt) ≈ Fs
-                    end
-
-                    @testset "similar" begin
-                        @test isa(similar(Ts), SymTridiagonal{elty})
-                        @test isa(similar(Ts, Int), SymTridiagonal{Int})
-                        @test isa(similar(Ts, (3, 2)), Matrix)
-                        @test isa(similar(Ts, Int, (3, 2)), Matrix{Int})
-                    end
-
-                    @test first(logabsdet(Tldlt)) ≈ first(logabsdet(Fs))
-                    @test last(logabsdet(Tldlt))  ≈ last(logabsdet(Fs))
-                    # just test that the det method exists. The numerical value of the
-                    # determinant is unreliable
-                    det(Tldlt)
-                end
-            end
-        else # mat_type is Tridiagonal
-            @testset "tridiagonal linear algebra" begin
-                for (BB, vv) in ((copy(B), copy(v)), (view(B, 1:n, 1), view(v, 1:n)))
-                    @test A*vv ≈ fA*vv
-                    invFv = fA\vv
-                    @test A\vv ≈ invFv
-                    # @test Base.solve(T,v) ≈ invFv
-                    # @test Base.solve(T, B) ≈ F\B
-                    Tlu = factorize(A)
-                    x = Tlu\vv
-                    @test x ≈ invFv
-                end
-            end
-        end
-        @testset "generalized dot" begin
-            x = fill(convert(elty, 1), n)
-            y = fill(convert(elty, 1), n)
-            @test dot(x, A, y) ≈ dot(A'x, y) ≈ dot(x, A*y)
-            @test dot([1], SymTridiagonal([1], Int[]), [1]) == 1
-            @test dot([1], Tridiagonal(Int[], [1], Int[]), [1]) == 1
-            @test dot(Int[], SymTridiagonal(Int[], Int[]), Int[]) === 0
-            @test dot(Int[], Tridiagonal(Int[], Int[], Int[]), Int[]) === 0
-        end
-    end
-end
-
-@testset "SymTridiagonal/Tridiagonal block matrix" begin
-    M = [1 2; 2 4]
-    n = 5
-    A = SymTridiagonal(fill(M, n), fill(M, n-1))
-    @test @inferred A[1,1] == Symmetric(M)
-    @test @inferred A[1,2] == M
-    @test @inferred A[2,1] == transpose(M)
-    @test @inferred diag(A, 1) == fill(M, n-1)
-    @test @inferred diag(A, 0) == fill(Symmetric(M), n)
-    @test @inferred diag(A, -1) == fill(transpose(M), n-1)
-    @test_throws ArgumentError diag(A, -2)
-    @test_throws ArgumentError diag(A, 2)
-    @test_throws ArgumentError diag(A, n+1)
-    @test_throws ArgumentError diag(A, -n-1)
-
-    A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1))
-    @test @inferred A[1,1] == M
-    @test @inferred A[1,2] == M
-    @test @inferred A[2,1] == M
-    @test @inferred diag(A, 1) == fill(M, n-1)
-    @test @inferred diag(A, 0) == fill(M, n)
-    @test @inferred diag(A, -1) == fill(M, n-1)
-    @test_throws MethodError diag(A, -2)
-    @test_throws MethodError diag(A, 2)
-    @test_throws ArgumentError diag(A, n+1)
-    @test_throws ArgumentError diag(A, -n-1)
-
-    for n in 0:2
-        dv, ev = fill(M, n), fill(M, max(n-1,0))
-        A = SymTridiagonal(dv, ev)
-        @test A == Matrix{eltype(A)}(A)
-
-        A = Tridiagonal(ev, dv, ev)
-        @test A == Matrix{eltype(A)}(A)
-    end
-end
-
-@testset "Issue 12068" begin
-    @test SymTridiagonal([1, 2], [0])^3 == [1 0; 0 8]
-end
-
-@testset "convert for SymTridiagonal" begin
-    STF32 = SymTridiagonal{Float32}(fill(1f0, 5), fill(1f0, 4))
-    @test convert(SymTridiagonal{Float64}, STF32)::SymTridiagonal{Float64} == STF32
-    @test convert(AbstractMatrix{Float64}, STF32)::SymTridiagonal{Float64} == STF32
-end
-
-@testset "constructors from matrix" begin
-    @test SymTridiagonal([1 2 3; 2 5 6; 0 6 9]) == [1 2 0; 2 5 6; 0 6 9]
-    @test Tridiagonal([1 2 3; 4 5 6; 7 8 9]) == [1 2 0; 4 5 6; 0 8 9]
-end
-
-@testset "constructors with range and other abstract vectors" begin
-    @test SymTridiagonal(1:3, 1:2) == [1 1 0; 1 2 2; 0 2 3]
-    @test Tridiagonal(4:5, 1:3, 1:2) == [1 1 0; 4 2 2; 0 5 3]
-end
-
-@testset "Issue #26994 (and the empty case)" begin
-    T = SymTridiagonal([1.0],[3.0])
-    x = ones(1)
-    @test T*x == ones(1)
-    @test SymTridiagonal(ones(0), ones(0)) * ones(0, 2) == ones(0, 2)
-end
-
-@testset "Issue 29630" begin
-    function central_difference_discretization(N; dfunc = x -> 12x^2 - 2N^2,
-                                               dufunc = x -> N^2 + 4N*x,
-                                               dlfunc = x -> N^2 - 4N*x,
-                                               bfunc = x -> 114ℯ^-x * (1 + 3x),
-                                               b0 = 0, bf = 57/ℯ,
-                                               x0 = 0, xf = 1)
-        h = 1/N
-        d, du, dl, b = map(dfunc, (x0+h):h:(xf-h)), map(dufunc, (x0+h):h:(xf-2h)),
-                       map(dlfunc, (x0+2h):h:(xf-h)), map(bfunc, (x0+h):h:(xf-h))
-        b[1] -= dlfunc(x0)*b0     # subtract the boundary term
-        b[end] -= dufunc(xf)*bf   # subtract the boundary term
-        Tridiagonal(dl, d, du), b
-    end
-
-    A90, b90 = central_difference_discretization(90)
-
-    @test A90\b90 ≈ inv(A90)*b90
-end
-
-@testset "singular values of SymTridiag" begin
-    @test svdvals(SymTridiagonal([-4,2,3], [0,0])) ≈ [4,3,2]
-    @test svdvals(SymTridiagonal(collect(0.:10.), zeros(10))) ≈ reverse(0:10)
-    @test svdvals(SymTridiagonal([1,2,1], [1,1])) ≈ [3,1,0]
-    # test that dependent methods such as `cond` also work
-    @test cond(SymTridiagonal([1,2,3], [0,0])) ≈ 3
-end
-
-@testset "sum, mapreduce" begin
-    T = Tridiagonal([1,2], [1,2,3], [7,8])
-    Tdense = Matrix(T)
-    S = SymTridiagonal([1,2,3], [1,2])
-    Sdense = Matrix(S)
-    @test sum(T) == 24
-    @test sum(S) == 12
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test mapreduce(one, min, T, dims=1) == mapreduce(one, min, Tdense, dims=1)
-    @test mapreduce(one, min, T, dims=2) == mapreduce(one, min, Tdense, dims=2)
-    @test mapreduce(one, min, T, dims=3) == mapreduce(one, min, Tdense, dims=3)
-    @test typeof(mapreduce(one, min, T, dims=1)) == typeof(mapreduce(one, min, Tdense, dims=1))
-    @test mapreduce(zero, max, T, dims=1) == mapreduce(zero, max, Tdense, dims=1)
-    @test mapreduce(zero, max, T, dims=2) == mapreduce(zero, max, Tdense, dims=2)
-    @test mapreduce(zero, max, T, dims=3) == mapreduce(zero, max, Tdense, dims=3)
-    @test typeof(mapreduce(zero, max, T, dims=1)) == typeof(mapreduce(zero, max, Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-    @test mapreduce(one, min, S, dims=1) == mapreduce(one, min, Sdense, dims=1)
-    @test mapreduce(one, min, S, dims=2) == mapreduce(one, min, Sdense, dims=2)
-    @test mapreduce(one, min, S, dims=3) == mapreduce(one, min, Sdense, dims=3)
-    @test typeof(mapreduce(one, min, S, dims=1)) == typeof(mapreduce(one, min, Sdense, dims=1))
-    @test mapreduce(zero, max, S, dims=1) == mapreduce(zero, max, Sdense, dims=1)
-    @test mapreduce(zero, max, S, dims=2) == mapreduce(zero, max, Sdense, dims=2)
-    @test mapreduce(zero, max, S, dims=3) == mapreduce(zero, max, Sdense, dims=3)
-    @test typeof(mapreduce(zero, max, S, dims=1)) == typeof(mapreduce(zero, max, Sdense, dims=1))
-
-    T = Tridiagonal(Int[], Int[], Int[])
-    Tdense = Matrix(T)
-    S = SymTridiagonal(Int[], Int[])
-    Sdense = Matrix(S)
-    @test sum(T) == 0
-    @test sum(S) == 0
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-
-    T = Tridiagonal(Int[], Int[2], Int[])
-    Tdense = Matrix(T)
-    S = SymTridiagonal(Int[2], Int[])
-    Sdense = Matrix(S)
-    @test sum(T) == 2
-    @test sum(S) == 2
-    @test_throws ArgumentError sum(T, dims=0)
-    @test sum(T, dims=1) == sum(Tdense, dims=1)
-    @test sum(T, dims=2) == sum(Tdense, dims=2)
-    @test sum(T, dims=3) == sum(Tdense, dims=3)
-    @test typeof(sum(T, dims=1)) == typeof(sum(Tdense, dims=1))
-    @test_throws ArgumentError sum(S, dims=0)
-    @test sum(S, dims=1) == sum(Sdense, dims=1)
-    @test sum(S, dims=2) == sum(Sdense, dims=2)
-    @test sum(S, dims=3) == sum(Sdense, dims=3)
-    @test typeof(sum(S, dims=1)) == typeof(sum(Sdense, dims=1))
-end
-
-@testset "Issue #28994 (sum of Tridigonal and UniformScaling)" begin
-    dl = [1., 1.]
-    d = [-2., -2., -2.]
-    T = Tridiagonal(dl, d, dl)
-    S = SymTridiagonal(T)
-
-    @test diag(T + 2I) == zero(d)
-    @test diag(S + 2I) == zero(d)
-end
-
-@testset "convert Tridiagonal to SymTridiagonal error" begin
-    du = rand(Float64, 4)
-    d  = rand(Float64, 5)
-    dl = rand(Float64, 4)
-    T = Tridiagonal(dl, d, du)
-    @test_throws ArgumentError SymTridiagonal{Float32}(T)
-end
-
-# Issue #38765
-@testset "Eigendecomposition with different lengths" begin
-    # length(A.ev) can be either length(A.dv) or length(A.dv) - 1
-    A = SymTridiagonal(fill(1.0, 3), fill(-1.0, 3))
-    F = eigen(A)
-    A2 = SymTridiagonal(fill(1.0, 3), fill(-1.0, 2))
-    F2 = eigen(A2)
-    test_approx_eq_modphase(F.vectors, F2.vectors)
-    @test F.values ≈ F2.values ≈ eigvals(A) ≈ eigvals(A2)
-    @test eigvecs(A) ≈ eigvecs(A2)
-    @test eigvecs(A, eigvals(A)[1:1]) ≈ eigvecs(A2, eigvals(A2)[1:1])
-end
-
-@testset "non-commutative algebra (#39701)" begin
-    for A in (SymTridiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))),
-              Tridiagonal(Quaternion.(randn(4), randn(4), randn(4), randn(4)), Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))))
-        c = Quaternion(1,2,3,4)
-        @test A * c ≈ Matrix(A) * c
-        @test A / c ≈ Matrix(A) / c
-        @test c * A ≈ c * Matrix(A)
-        @test c \ A ≈ c \ Matrix(A)
-    end
-end
-
-@testset "adjoint of LDLt" begin
-    Sr = SymTridiagonal(randn(5), randn(4))
-    Sc = SymTridiagonal(complex.(randn(5)) .+ 1im, complex.(randn(4), randn(4)))
-    b = ones(size(Sr, 1))
-
-    F = ldlt(Sr)
-    @test F\b == F'\b
-
-    F = ldlt(Sc)
-    @test copy(Sc')\b == F'\b
-end
-
-@testset "symmetric and hermitian tridiagonals" begin
-    A = [im 0; 0 -im]
-    @test issymmetric(A)
-    @test !ishermitian(A)
-
-    # real
-    A = SymTridiagonal(randn(5), randn(4))
-    @test issymmetric(A)
-    @test ishermitian(A)
-
-    A = Tridiagonal(A.ev, A.dv, A.ev .+ 1)
-    @test !issymmetric(A)
-    @test !ishermitian(A)
-
-    # complex
-    # https://github.com/JuliaLang/julia/pull/41037#discussion_r645524081
-    S = SymTridiagonal(randn(5) .+ 0im, randn(5) .+ 0im)
-    S.ev[end] = im
-    @test issymmetric(S)
-    @test ishermitian(S)
-
-    S = SymTridiagonal(randn(5) .+ 1im, randn(4) .+ 1im)
-    @test issymmetric(S)
-    @test !ishermitian(S)
-
-    S = Tridiagonal(S.ev, S.dv, adjoint.(S.ev))
-    @test !issymmetric(S)
-    @test !ishermitian(S)
-
-    S = Tridiagonal(S.dl, real.(S.d) .+ 0im, S.du)
-    @test !issymmetric(S)
-    @test ishermitian(S)
-end
-
-isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
-using .Main.ImmutableArrays
-
-@testset "Conversion to AbstractArray" begin
-    # tests corresponding to #34995
-    v1 = ImmutableArray([1, 2])
-    v2 = ImmutableArray([3, 4, 5])
-    v3 = ImmutableArray([6, 7])
-    T = Tridiagonal(v1, v2, v3)
-    Tsym = SymTridiagonal(v2, v1)
-
-    @test convert(AbstractArray{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
-    @test convert(AbstractMatrix{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
-    @test convert(AbstractArray{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
-    @test convert(AbstractMatrix{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
-end
-
-@testset "dot(x,A,y) for A::Tridiagonal or SymTridiagonal" begin
-    for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
-        x = fill(convert(elty, 1), 0)
-        T = Tridiagonal(x, x, x)
-        Tsym = SymTridiagonal(x, x)
-        @test dot(x, T, x) == 0.0
-        @test dot(x, Tsym, x) == 0.0
-    end
-end
-
-isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
-using .Main.SizedArrays
-@testset "non-number eltype" begin
-    @testset "sum for SymTridiagonal" begin
-        dv = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
-        ev = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
-        S = SymTridiagonal(dv, ev)
-        Sdense = Matrix(S)
-        @test Sdense == collect(S)
-        @test sum(S) == sum(Sdense)
-        @test sum(S, dims = 1) == sum(Sdense, dims = 1)
-        @test sum(S, dims = 2) == sum(Sdense, dims = 2)
-    end
-    @testset "issymmetric/ishermitian for Tridiagonal" begin
-        @test !issymmetric(Tridiagonal([[1 2;3 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
-        @test !issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;3 4], [1 2;3 4]], [[1 2;3 4]]))
-        @test issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
-
-        @test ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
-        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].+im]))
-        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
-    end
-    @testset "== between Tridiagonal and SymTridiagonal" begin
-        dv = [SizedArray{(2,2)}([1 2;3 4]) for i in 1:4]
-        ev = [SizedArray{(2,2)}([3 4;1 2]) for i in 1:4]
-        S = SymTridiagonal(dv, ev)
-        Sdense = Matrix(S)
-        @test S == Tridiagonal(diag(Sdense, -1), diag(Sdense),  diag(Sdense, 1)) == S
-        @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
-    end
-end
-
-@testset "copyto! with UniformScaling" begin
-    @testset "Tridiagonal" begin
-        @testset "Fill" begin
-            for len in (4, InfiniteArrays.Infinity())
-                d = FillArrays.Fill(1, len)
-                ud = FillArrays.Fill(0, len-1)
-                T = Tridiagonal(ud, d, ud)
-                @test copyto!(T, I) === T
-            end
-        end
-        T = Tridiagonal(fill(3, 3), fill(2, 4), fill(3, 3))
-        copyto!(T, I)
-        @test all(isone, diag(T))
-        @test all(iszero, diag(T, 1))
-        @test all(iszero, diag(T, -1))
-    end
-    @testset "SymTridiagonal" begin
-        @testset "Fill" begin
-            for len in (4, InfiniteArrays.Infinity())
-                d = FillArrays.Fill(1, len)
-                ud = FillArrays.Fill(0, len-1)
-                ST = SymTridiagonal(d, ud)
-                @test copyto!(ST, I) === ST
-            end
-        end
-        ST = SymTridiagonal(fill(2, 4), fill(3, 3))
-        copyto!(ST, I)
-        @test all(isone, diag(ST))
-        @test all(iszero, diag(ST, 1))
-        @test all(iszero, diag(ST, -1))
-    end
-end
-
-end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
deleted file mode 100644
index be1b9887d570f..0000000000000
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ /dev/null
@@ -1,564 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module TestUniformscaling
-
-using Test, LinearAlgebra, Random
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-Random.seed!(1234543)
-
-@testset "basic functions" begin
-    @test I === I' # transpose
-    @test ndims(I) == 2
-    @test one(UniformScaling{Float32}) == UniformScaling(one(Float32))
-    @test zero(UniformScaling{Float32}) == UniformScaling(zero(Float32))
-    @test eltype(one(UniformScaling{Float32})) == Float32
-    @test zero(UniformScaling(rand(ComplexF64))) == zero(UniformScaling{ComplexF64})
-    @test one(UniformScaling(rand(ComplexF64))) == one(UniformScaling{ComplexF64})
-    @test eltype(one(UniformScaling(rand(ComplexF64)))) == ComplexF64
-    @test -one(UniformScaling(2)) == UniformScaling(-1)
-    @test opnorm(UniformScaling(1+im)) ≈ sqrt(2)
-    @test convert(UniformScaling{Float64}, 2I) === 2.0I
-end
-
-@testset "getindex" begin
-    @test I[1,1] == 1
-    @test I[1,2] == 0
-
-    J = I(15)
-    for (a, b) in [
-        # indexing that returns a Vector
-        (1:10, 1),
-        (4, 1:10),
-        (11, 1:10),
-        # indexing that returns a Matrix
-        (1:2, 1:2),
-        (1:2:3, 1:2:3),
-        (1:2:8, 2:2:9),
-        (1:2:8, 9:-4:1),
-        (9:-4:1, 1:2:8),
-        (2:3, 1:2),
-        (2:-1:1, 1:2),
-        (1:2:9, 5:2:13),
-        (1, [1,2,5]),
-        (1, [1,10,5,2]),
-        (10, [10]),
-        ([1], 1),
-        ([15,1,5,2], 6),
-        ([2], [2]),
-        ([2,9,8,2,1], [2,8,4,3,1]),
-        ([8,3,5,3], 2:9),
-    ]
-        @test I[a,b] == J[a,b]
-        ndims(a) == 1 && @test I[OffsetArray(a,-10),b] == J[OffsetArray(a,-10),b]
-        ndims(b) == 1 && @test I[a,OffsetArray(b,-9)] == J[a,OffsetArray(b,-9)]
-        ndims(a) == ndims(b) == 1 && @test I[OffsetArray(a,-7),OffsetArray(b,-8)] == J[OffsetArray(a,-7),OffsetArray(b,-8)]
-    end
-end
-
-@testset "sqrt, exp, log, and trigonometric functions" begin
-    # convert to a dense matrix with random size
-    M(J) = (N = rand(1:10); Matrix(J, N, N))
-
-    # on complex plane
-    J = UniformScaling(randn(ComplexF64))
-    for f in ( exp,   log,
-               sqrt,
-               sin,   cos,   tan,
-               asin,  acos,  atan,
-               csc,   sec,   cot,
-               acsc,  asec,  acot,
-               sinh,  cosh,  tanh,
-               asinh, acosh, atanh,
-               csch,  sech,  coth,
-               acsch, asech, acoth )
-        @test f(J) ≈ f(M(J))
-    end
-
-    # on real axis
-    for (λ, fs) in (
-        # functions defined for x ∈ ℝ
-        (()->randn(),           (exp,
-                                 sin,   cos,   tan,
-                                 csc,   sec,   cot,
-                                 atan,  acot,
-                                 sinh,  cosh,  tanh,
-                                 csch,  sech,  coth,
-                                 asinh, acsch)),
-        # functions defined for x ≥ 0
-        (()->abs(randn()),      (log,   sqrt)),
-        # functions defined for -1 ≤ x ≤ 1
-        (()->2rand()-1,         (asin,  acos,  atanh)),
-        # functions defined for x ≤ -1 or x ≥ 1
-        (()->1/(2rand()-1),     (acsc,  asec,  acoth)),
-        # functions defined for 0 ≤ x ≤ 1
-        (()->rand(),            (asech,)),
-        # functions defined for x ≥ 1
-        (()->1/rand(),          (acosh,))
-    )
-        for f in fs
-            J = UniformScaling(λ())
-            @test f(J) ≈ f(M(J))
-        end
-    end
-end
-
-@testset "conjugation of UniformScaling" begin
-    @test conj(UniformScaling(1))::UniformScaling{Int} == UniformScaling(1)
-    @test conj(UniformScaling(1.0))::UniformScaling{Float64} == UniformScaling(1.0)
-    @test conj(UniformScaling(1+1im))::UniformScaling{Complex{Int}} == UniformScaling(1-1im)
-    @test conj(UniformScaling(1.0+1.0im))::UniformScaling{ComplexF64} == UniformScaling(1.0-1.0im)
-end
-
-@testset "isdiag, istriu, istril, issymmetric, ishermitian, isposdef, isapprox" begin
-    @test isdiag(I)
-    @test istriu(I)
-    @test istril(I)
-    @test issymmetric(I)
-    @test issymmetric(UniformScaling(complex(1.0,1.0)))
-    @test ishermitian(I)
-    @test !ishermitian(UniformScaling(complex(1.0,1.0)))
-    @test isposdef(UniformScaling(rand()))
-    @test !isposdef(UniformScaling(-rand()))
-    @test !isposdef(UniformScaling(randn(ComplexF64)))
-    @test !isposdef(UniformScaling(NaN))
-    @test isposdef(I)
-    @test !isposdef(-I)
-    @test isposdef(UniformScaling(complex(1.0, 0.0)))
-    @test !isposdef(UniformScaling(complex(1.0, 1.0)))
-    @test UniformScaling(4.00000000000001) ≈ UniformScaling(4.0)
-    @test UniformScaling(4.32) ≈ UniformScaling(4.3) rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.3 * [1 0; 0 1] rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.3 * [1 0; 0 1] rtol=0.1 atol=0.01 norm=norm
-    @test 4.3 * [1 0; 0 1] ≈ UniformScaling(4.32) rtol=0.1 atol=0.01
-    @test [4.3201 0.002;0.001 4.32009] ≈ UniformScaling(4.32) rtol=0.1 atol=0.
-    @test UniformScaling(4.32) ≉ fill(4.3,2,2) rtol=0.1 atol=0.01
-    @test UniformScaling(4.32) ≈ 4.32 * [1 0; 0 1]
-end
-
-@testset "arithmetic with Number" begin
-    α = rand()
-    @test α + I == α + 1
-    @test I + α == α + 1
-    @test α - I == α - 1
-    @test I - α == 1 - α
-    @test α .* UniformScaling(1.0) == UniformScaling(1.0) .* α
-    @test UniformScaling(α)./α == UniformScaling(1.0)
-    @test α.\UniformScaling(α) == UniformScaling(1.0)
-    @test α * UniformScaling(1.0) == UniformScaling(1.0) * α
-    @test UniformScaling(α)/α == UniformScaling(1.0)
-    @test (2I)^α == (2I).^α == (2^α)I
-
-    β = rand()
-    @test (α*I)^2    == UniformScaling(α^2)
-    @test (α*I)^(-2) == UniformScaling(α^(-2))
-    @test (α*I)^(.5) == UniformScaling(α^(.5))
-    @test (α*I)^β    == UniformScaling(α^β)
-
-    @test (α * I) .^ 2 == UniformScaling(α^2)
-    @test (α * I) .^ β == UniformScaling(α^β)
-end
-
-@testset "unary" begin
-    @test +I === +1*I
-    @test -I === -1*I
-end
-
-@testset "tr, det and logdet" begin
-    for T in (Int, Float64, ComplexF64, Bool)
-        @test tr(UniformScaling(zero(T))) === zero(T)
-    end
-    @test_throws ArgumentError tr(UniformScaling(1))
-    @test det(I) === true
-    @test det(1.0I) === 1.0
-    @test det(0I) === 0
-    @test det(0.0I) === 0.0
-    @test logdet(I) == 0
-    @test_throws ArgumentError det(2I)
-end
-
-@test copy(UniformScaling(one(Float64))) == UniformScaling(one(Float64))
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}\n(1.0 + 0.0im)*I"
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}\n1.0*I"
-@test sprint(show,UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}(1.0 + 0.0im)"
-@test sprint(show,UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}(1.0f0)"
-
-let
-    λ = complex(randn(),randn())
-    J = UniformScaling(λ)
-    @testset "transpose, conj, inv, pinv, cond" begin
-        @test ndims(J) == 2
-        @test transpose(J) == J
-        @test J * [1 0; 0 1] == conj(*(adjoint(J), [1 0; 0 1])) # ctranpose (and A(c)_mul_B)
-        @test I + I === UniformScaling(2) # +
-        @test inv(I) == I
-        @test inv(J) == UniformScaling(inv(λ))
-        @test pinv(J) == UniformScaling(inv(λ))
-        @test @inferred(pinv(0.0I)) == 0.0I
-        @test @inferred(pinv(0I)) == 0.0I
-        @test @inferred(pinv(false*I)) == 0.0I
-        @test @inferred(pinv(0im*I)) == 0im*I
-        @test cond(I) == 1
-        @test cond(J) == (λ ≠ zero(λ) ? one(real(λ)) : oftype(real(λ), Inf))
-    end
-
-    @testset "real, imag, reim" begin
-        @test real(J) == UniformScaling(real(λ))
-        @test imag(J) == UniformScaling(imag(λ))
-        @test reim(J) == (UniformScaling(real(λ)), UniformScaling(imag(λ)))
-    end
-
-    @testset "copyto!" begin
-        A = Matrix{Int}(undef, (3,3))
-        @test copyto!(A, I) == one(A)
-        B = Matrix{ComplexF64}(undef, (1,2))
-        @test copyto!(B, J) == [λ zero(λ)]
-    end
-
-    @testset "binary ops with vectors" begin
-        v = complex.(randn(3), randn(3))
-        # As shown in #20423@GitHub, vector acts like x1 matrix when participating in linear algebra
-        @test v  * J ≈ v  * λ
-        @test v' * J ≈ v' * λ
-        @test J * v  ≈ λ * v
-        @test J * v' ≈ λ * v'
-        @test v  / J ≈ v  / λ
-        @test v' / J ≈ v' / λ
-        @test J \ v  ≈ λ \ v
-        @test J \ v' ≈ λ \ v'
-    end
-
-    @testset "binary ops with matrices" begin
-        B = bitrand(2, 2)
-        @test B + I == B + Matrix(I, size(B))
-        @test I + B == B + Matrix(I, size(B))
-        AA = randn(2, 2)
-        for A in (AA, view(AA, 1:2, 1:2))
-            I22 = Matrix(I, size(A))
-            @test @inferred(A + I) == A + I22
-            @test @inferred(I + A) == A + I22
-            @test @inferred(I - I) === UniformScaling(0)
-            @test @inferred(B - I) == B - I22
-            @test @inferred(I - B) == I22 - B
-            @test @inferred(A - I) == A - I22
-            @test @inferred(I - A) == I22 - A
-            @test @inferred(I*J) === UniformScaling(λ)
-            @test @inferred(B*J) == B*λ
-            @test @inferred(J*B) == B*λ
-            @test @inferred(I*A) !== A # Don't alias
-            @test @inferred(A*I) !== A # Don't alias
-
-            @test @inferred(A*J) == A*λ
-            @test @inferred(J*A) == A*λ
-            @test @inferred(J*fill(1, 3)) == fill(λ, 3)
-            @test @inferred(λ*J) === UniformScaling(λ*J.λ)
-            @test @inferred(J*λ) === UniformScaling(λ*J.λ)
-            @test @inferred(J/I) === J
-            @test @inferred(I/A) == inv(A)
-            @test @inferred(A/I) == A
-            @test @inferred(I/λ) === UniformScaling(1/λ)
-            @test @inferred(I\J) === J
-
-            if isa(A, Array)
-                T = LowerTriangular(randn(3,3))
-            else
-                T = LowerTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = LinearAlgebra.UnitLowerTriangular(randn(3,3))
-            else
-                T = LinearAlgebra.UnitLowerTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = UpperTriangular(randn(3,3))
-            else
-                T = UpperTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            if isa(A, Array)
-                T = LinearAlgebra.UnitUpperTriangular(randn(3,3))
-            else
-                T = LinearAlgebra.UnitUpperTriangular(view(randn(3,3), 1:3, 1:3))
-            end
-            @test @inferred(T + J) == Array(T) + J
-            @test @inferred(J + T) == J + Array(T)
-            @test @inferred(T - J) == Array(T) - J
-            @test @inferred(J - T) == J - Array(T)
-            @test @inferred(T\I) == inv(T)
-
-            for elty in (Float64, ComplexF64)
-                if isa(A, Array)
-                    T = Hermitian(randn(elty, 3,3))
-                else
-                    T = Hermitian(view(randn(elty, 3,3), 1:3, 1:3))
-                end
-                @test @inferred(T + J) == Array(T) + J
-                @test @inferred(J + T) == J + Array(T)
-                @test @inferred(T - J) == Array(T) - J
-                @test @inferred(J - T) == J - Array(T)
-            end
-
-            @test @inferred(I\A) == A
-            @test @inferred(A\I) == inv(A)
-            @test @inferred(λ\I) === UniformScaling(1/λ)
-        end
-    end
-end
-
-@testset "hcat and vcat" begin
-    @test_throws ArgumentError hcat(I)
-    @test_throws ArgumentError [I I]
-    @test_throws ArgumentError vcat(I)
-    @test_throws ArgumentError [I; I]
-    @test_throws ArgumentError [I I; I]
-
-    A = rand(3,4)
-    B = rand(3,3)
-    C = rand(0,3)
-    D = rand(2,0)
-    E = rand(1,3)
-    F = rand(3,1)
-    α = rand()
-    @test (hcat(A, 2I))::Matrix == hcat(A, Matrix(2I, 3, 3))
-    @test (hcat(E, α))::Matrix == hcat(E, [α])
-    @test (hcat(E, α, 2I))::Matrix == hcat(E, [α], fill(2, 1, 1))
-    @test (vcat(A, 2I))::Matrix == vcat(A, Matrix(2I, 4, 4))
-    @test (vcat(F, α))::Matrix == vcat(F, [α])
-    @test (vcat(F, α, 2I))::Matrix == vcat(F, [α], fill(2, 1, 1))
-    @test (hcat(C, 2I))::Matrix == C
-    @test_throws DimensionMismatch hcat(C, α)
-    @test (vcat(D, 2I))::Matrix == D
-    @test_throws DimensionMismatch vcat(D, α)
-    @test (hcat(I, 3I, A, 2I))::Matrix == hcat(Matrix(I, 3, 3), Matrix(3I, 3, 3), A, Matrix(2I, 3, 3))
-    @test (vcat(I, 3I, A, 2I))::Matrix == vcat(Matrix(I, 4, 4), Matrix(3I, 4, 4), A, Matrix(2I, 4, 4))
-    @test (hvcat((2,1,2), B, 2I, I, 3I, 4I))::Matrix ==
-        hvcat((2,1,2), B, Matrix(2I, 3, 3), Matrix(I, 6, 6), Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-    @test hvcat((3,1), C, C, I, 3I)::Matrix == hvcat((2,1), C, C, Matrix(3I, 6,6))
-    @test hvcat((2,2,2), I, 2I, 3I, 4I, C, C)::Matrix ==
-        hvcat((2,2,2), Matrix(I, 3, 3), Matrix(2I, 3,3 ), Matrix(3I, 3,3), Matrix(4I, 3,3), C, C)
-    @test hvcat((2,2,4), C, C, I, 2I, 3I, 4I, 5I, D)::Matrix ==
-        hvcat((2,2,4), C, C, Matrix(I, 3, 3), Matrix(2I,3,3),
-            Matrix(3I, 2, 2), Matrix(4I, 2, 2), Matrix(5I,2,2), D)
-    @test (hvcat((2,3,2), B, 2I, C, C, I, 3I, 4I))::Matrix ==
-        hvcat((2,2,2), B, Matrix(2I, 3, 3), C, C, Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-    @test hvcat((3,2,1), C, C, I, B ,3I, 2I)::Matrix ==
-        hvcat((2,2,1), C, C, B, Matrix(3I,3,3), Matrix(2I,6,6))
-    @test (hvcat((1,2), A, E, α))::Matrix == hvcat((1,2), A, E, [α]) == hvcat((1,2), A, E, α*I)
-    @test (hvcat((2,2), α, E, F, 3I))::Matrix == hvcat((2,2), [α], E, F, Matrix(3I, 3, 3))
-    @test (hvcat((2,2), 3I, F, E, α))::Matrix == hvcat((2,2), Matrix(3I, 3, 3), F, E, [α])
-end
-
-@testset "Matrix/Array construction from UniformScaling" begin
-    I2_33 = [2 0 0; 0 2 0; 0 0 2]
-    I2_34 = [2 0 0 0; 0 2 0 0; 0 0 2 0]
-    I2_43 = [2 0 0; 0 2 0; 0 0 2; 0 0 0]
-    for ArrType in (Matrix, Array)
-        @test ArrType(2I, 3, 3)::Matrix{Int} == I2_33
-        @test ArrType(2I, 3, 4)::Matrix{Int} == I2_34
-        @test ArrType(2I, 4, 3)::Matrix{Int} == I2_43
-        @test ArrType(2.0I, 3, 3)::Matrix{Float64} == I2_33
-        @test ArrType{Real}(2I, 3, 3)::Matrix{Real} == I2_33
-        @test ArrType{Float64}(2I, 3, 3)::Matrix{Float64} == I2_33
-    end
-end
-
-@testset "Diagonal construction from UniformScaling" begin
-    @test Diagonal(2I, 3)::Diagonal{Int} == Matrix(2I, 3, 3)
-    @test Diagonal(2.0I, 3)::Diagonal{Float64} == Matrix(2I, 3, 3)
-    @test Diagonal{Real}(2I, 3)::Diagonal{Real} == Matrix(2I, 3, 3)
-    @test Diagonal{Float64}(2I, 3)::Diagonal{Float64} == Matrix(2I, 3, 3)
-end
-
-@testset "equality comparison of matrices with UniformScaling" begin
-    # AbstractMatrix methods
-    diagI = Diagonal(fill(1, 3))
-    rdiagI = view(diagI, 1:2, 1:3)
-    bidiag = Bidiagonal(fill(2, 3), fill(2, 2), :U)
-    @test diagI  ==  I == diagI  # test isone(I) path / equality
-    @test 2diagI !=  I != 2diagI # test isone(I) path / inequality
-    @test 0diagI == 0I == 0diagI # test iszero(I) path / equality
-    @test 2diagI != 0I != 2diagI # test iszero(I) path / inequality
-    @test 2diagI == 2I == 2diagI # test generic path / equality
-    @test 0diagI != 2I != 0diagI # test generic path / inequality on diag
-    @test bidiag != 2I != bidiag # test generic path / inequality off diag
-    @test rdiagI !=  I != rdiagI # test square matrix check
-    # StridedMatrix specialization
-    denseI = [1 0 0; 0 1 0; 0 0 1]
-    rdenseI = [1 0 0 0; 0 1 0 0; 0 0 1 0]
-    alltwos = fill(2, (3, 3))
-    @test denseI  ==  I == denseI  # test isone(I) path / equality
-    @test 2denseI !=  I != 2denseI # test isone(I) path / inequality
-    @test 0denseI == 0I == 0denseI # test iszero(I) path / equality
-    @test 2denseI != 0I != 2denseI # test iszero(I) path / inequality
-    @test 2denseI == 2I == 2denseI # test generic path / equality
-    @test 0denseI != 2I != 0denseI # test generic path / inequality on diag
-    @test alltwos != 2I != alltwos # test generic path / inequality off diag
-    @test rdenseI !=  I != rdenseI # test square matrix check
-
-    # isequal
-    @test !isequal(I, I(3))
-    @test !isequal(I(1), I)
-    @test !isequal([1], I)
-    @test isequal(I, 1I)
-    @test !isequal(2I, 3I)
-end
-
-@testset "operations involving I should preserve eltype" begin
-    @test isa(Int8(1) + I, Int8)
-    @test isa(Float16(1) + I, Float16)
-    @test eltype(Int8(1)I) == Int8
-    @test eltype(Float16(1)I) == Float16
-    @test eltype(fill(Int8(1), 2, 2)I) == Int8
-    @test eltype(fill(Float16(1), 2, 2)I) == Float16
-    @test eltype(fill(Int8(1), 2, 2) + I) == Int8
-    @test eltype(fill(Float16(1), 2, 2) + I) == Float16
-end
-
-@testset "test that UniformScaling is applied correctly for matrices of matrices" begin
-    LL = Bidiagonal(fill(0*I, 3), fill(1*I, 2), :L)
-    @test (I - LL')\[[0], [0], [1]] == (I - LL)'\[[0], [0], [1]] == fill([1], 3)
-end
-
-# Ensure broadcasting of I is an error (could be made to work in the future)
-@testset "broadcasting of I (#23197)" begin
-    @test_throws MethodError I .+ 1
-    @test_throws MethodError I .+ [1 1; 1 1]
-end
-
-@testset "in-place mul! and div! methods" begin
-    J = randn()*I
-    A = randn(4, 3)
-    C = similar(A)
-    target_mul = J * A
-    target_div = A / J
-    @test mul!(C, J, A) == target_mul
-    @test mul!(C, A, J) == target_mul
-    @test lmul!(J, copyto!(C, A)) == target_mul
-    @test rmul!(copyto!(C, A), J) == target_mul
-    @test ldiv!(J, copyto!(C, A)) == target_div
-    @test ldiv!(C, J, A) == target_div
-    @test rdiv!(copyto!(C, A), J) == target_div
-
-    A = randn(4, 3)
-    C = randn!(similar(A))
-    alpha = randn()
-    beta = randn()
-    target = J * A * alpha + C * beta
-    @test mul!(copy(C), J, A, alpha, beta) ≈ target
-    @test mul!(copy(C), A, J, alpha, beta) ≈ target
-
-    a = randn()
-    C = randn(3, 3)
-    target_5mul = a*alpha*J + beta*C
-    @test mul!(copy(C), a, J, alpha, beta) ≈ target_5mul
-    @test mul!(copy(C), J, a, alpha, beta) ≈ target_5mul
-    target_5mul = beta*C # alpha = 0
-    @test mul!(copy(C), a, J, 0, beta) ≈ target_5mul
-    target_5mul = a*alpha*Matrix(J, 3, 3) # beta = 0
-    @test mul!(copy(C), a, J, alpha, 0) ≈ target_5mul
-
-end
-
-@testset "Construct Diagonal from UniformScaling" begin
-    @test size(I(3)) === (3,3)
-    @test I(3) isa Diagonal
-    @test I(3) == [1 0 0; 0 1 0; 0 0 1]
-end
-
-@testset "dot" begin
-    A = randn(3, 3)
-    λ = randn()
-    J = UniformScaling(λ)
-    @test dot(A, J) ≈ dot(J, A)
-    @test dot(A, J) ≈ tr(A' * J)
-
-    A = rand(ComplexF64, 3, 3)
-    λ = randn() + im * randn()
-    J = UniformScaling(λ)
-    @test dot(A, J) ≈ conj(dot(J, A))
-    @test dot(A, J) ≈ tr(A' * J)
-end
-
-@testset "generalized dot" begin
-    x = rand(-10:10, 3)
-    y = rand(-10:10, 3)
-    λ = rand(-10:10)
-    J = UniformScaling(λ)
-    @test dot(x, J, y) == λ*dot(x, y)
-    λ = Quaternion(0.44567, 0.755871, 0.882548, 0.423612)
-    x, y = Quaternion(rand(4)...), Quaternion(rand(4)...)
-    @test dot([x], λ*I, [y]) ≈ dot(x, λ, y) ≈ dot(x, λ*y)
-end
-
-@testset "Factorization solutions" begin
-    J = complex(randn(),randn()) * I
-    qrp = A -> qr(A, ColumnNorm())
-
-    # thin matrices
-    X = randn(3,2)
-    Z = pinv(X)
-    for fac in (qr,qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-
-    # square matrices
-    X = randn(3,3)
-    X = X'X + rand()I # make positive definite for cholesky
-    Z = pinv(X)
-    for fac in (bunchkaufman,cholesky,lu,qr,qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-
-    # fat matrices - only rank-revealing variants
-    X = randn(2,3)
-    Z = pinv(X)
-    for fac in (qrp,svd)
-        F = fac(X)
-        @test @inferred(F \ I) ≈ Z
-        @test @inferred(F \ J) ≈ Z * J
-    end
-end
-
-@testset "offset arrays" begin
-    A = OffsetArray(zeros(4,4), -1:2, 0:3)
-    @test sum(I + A) ≈ 3.0
-    @test sum(A + I) ≈ 3.0
-    @test sum(I - A) ≈ 3.0
-    @test sum(A - I) ≈ -3.0
-end
-
-@testset "type promotion when dividing UniformScaling by matrix" begin
-    A = randn(5,5)
-    cA = complex(A)
-    J = (5+2im)*I
-    @test J/A ≈ J/cA
-    @test A\J ≈ cA\J
-end
-
-end # module TestUniformscaling
diff --git a/stdlib/Logging/Project.toml b/stdlib/Logging/Project.toml
index af931e68e07d1..ce69112733d5e 100644
--- a/stdlib/Logging/Project.toml
+++ b/stdlib/Logging/Project.toml
@@ -1,5 +1,6 @@
 name = "Logging"
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index 9a269ee54571b..a2bfd499e4586 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -1,7 +1,11 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Logging/docs/src/index.md"
+```
+
 # [Logging](@id man-logging)
 
 The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
-computation as a log of events.  Events are created by inserting a logging
+computation as a log of events. Events are created by inserting a logging
 statement into the source code, for example:
 
 ```julia
@@ -11,17 +15,17 @@ statement into the source code, for example:
 ```
 
 The system provides several advantages over peppering your source code with
-calls to `println()`.  First, it allows you to control the visibility and
-presentation of messages without editing the source code.  For example, in
+calls to `println()`. First, it allows you to control the visibility and
+presentation of messages without editing the source code. For example, in
 contrast to the `@warn` above
 
 ```julia
 @debug "The sum of some values $(sum(rand(100)))"
 ```
 
-will produce no output by default.  Furthermore, it's very cheap to leave debug
+will produce no output by default. Furthermore, it's very cheap to leave debug
 statements like this in the source code because the system avoids evaluating
-the message if it would later be ignored.  In this case `sum(rand(100))` and
+the message if it would later be ignored. In this case `sum(rand(100))` and
 the associated string processing will never be executed unless debug logging is
 enabled.
 
@@ -88,7 +92,7 @@ The system also generates some standard information for each event:
   fairly stable even if the source code of the file changes, as long as the
   logging statement itself remains the same.
 * A `group` for the event, which is set to the base name of the file by default,
-  without extension.  This can be used to group messages into categories more
+  without extension. This can be used to group messages into categories more
   finely than the log level (for example, all deprecation warnings have group
   `:depwarn`), or into logical groupings across or within modules.
 
@@ -120,7 +124,7 @@ user configurable code to see the event. All loggers must be subtypes of
 [`AbstractLogger`](@ref).
 
 When an event is triggered, the appropriate logger is found by looking for a
-task-local logger with the global logger as fallback.  The idea here is that
+task-local logger with the global logger as fallback. The idea here is that
 the application code knows how log events should be processed and exists
 somewhere at the top of the call stack. So we should look up through the call
 stack to discover the logger — that is, the logger should be *dynamically
@@ -130,11 +134,11 @@ simple global variable. In such a system it's awkward to control logging while
 composing functionality from multiple modules.)
 
 The global logger may be set with [`global_logger`](@ref), and task-local
-loggers controlled using [`with_logger`](@ref).  Newly spawned tasks inherit
+loggers controlled using [`with_logger`](@ref). Newly spawned tasks inherit
 the logger of the parent task.
 
 There are three logger types provided by the library.  [`ConsoleLogger`](@ref)
-is the default logger you see when starting the REPL.  It displays events in a
+is the default logger you see when starting the REPL. It displays events in a
 readable text format and tries to give simple but user friendly control over
 formatting and filtering.  [`NullLogger`](@ref) is a convenient way to drop all
 messages where necessary; it is the logging equivalent of the [`devnull`](@ref)
@@ -150,14 +154,14 @@ When an event occurs, a few steps of early filtering occur to avoid generating
 messages that will be discarded:
 
 1. The message log level is checked against a global minimum level (set via
-   [`disable_logging`](@ref)).  This is a crude but extremely cheap global
+   [`disable_logging`](@ref)). This is a crude but extremely cheap global
    setting.
 2. The current logger state is looked up and the message level checked against the
    logger's cached minimum level, as found by calling [`Logging.min_enabled_level`](@ref).
    This behavior can be overridden via environment variables (more on this later).
 3. The [`Logging.shouldlog`](@ref) function is called with the current logger, taking
    some minimal information (level, module, group, id) which can be computed
-   statically.  Most usefully, `shouldlog` is passed an event `id` which can be
+   statically. Most usefully, `shouldlog` is passed an event `id` which can be
    used to discard events early based on a cached predicate.
 
 If all these checks pass, the message and key--value pairs are evaluated in full
@@ -166,9 +170,9 @@ and passed to the current logger via the [`Logging.handle_message`](@ref) functi
 event to the screen, save it to a file, etc.
 
 Exceptions that occur while generating the log event are captured and logged
-by default.  This prevents individual broken events from crashing the
+by default. This prevents individual broken events from crashing the
 application, which is helpful when enabling little-used debug events in a
-production system.  This behavior can be customized per logger type by
+production system. This behavior can be customized per logger type by
 extending [`Logging.catch_exceptions`](@ref).
 
 ## Testing log events
@@ -180,17 +184,17 @@ pattern match against the log event stream.
 
 ## Environment variables
 
-Message filtering can be influenced through the `JULIA_DEBUG` environment
+Message filtering can be influenced through the [`JULIA_DEBUG`](@ref JULIA_DEBUG) environment
 variable, and serves as an easy way to enable debug logging for a file or
 module. Loading julia with `JULIA_DEBUG=loading` will activate
 `@debug` log messages in `loading.jl`. For example, in Linux shells:
 
 ```
 $ JULIA_DEBUG=loading julia -e 'using OhMyREPL'
-┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an incompatible cache header
 └ @ Base loading.jl:1328
 [ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL
-┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an incompatible cache header
 └ @ Base loading.jl:1328
 ...
 ```
@@ -298,6 +302,8 @@ Logging.Debug
 Logging.Info
 Logging.Warn
 Logging.Error
+Logging.BelowMinLevel
+Logging.AboveMaxLevel
 ```
 
 ### [Processing events with AbstractLogger](@id AbstractLogger-interface)
diff --git a/stdlib/Logging/src/Logging.jl b/stdlib/Logging/src/Logging.jl
index 0743c650326cc..192885f2f94b7 100644
--- a/stdlib/Logging/src/Logging.jl
+++ b/stdlib/Logging/src/Logging.jl
@@ -12,7 +12,7 @@ module Logging
 # Doing it this way (rather than with import) makes these symbols accessible to
 # tab completion.
 for sym in [
-    :LogLevel, :BelowMinLevel, :AboveMaxLevel,
+    :LogLevel,
     :AbstractLogger,
     :NullLogger,
     :handle_message, :shouldlog, :min_enabled_level, :catch_exceptions,
@@ -54,9 +54,24 @@ const Warn = Base.CoreLogging.Warn
 Alias for [`LogLevel(2000)`](@ref LogLevel).
 """
 const Error = Base.CoreLogging.Error
+"""
+    BelowMinLevel
+
+Alias for [`LogLevel(-1_000_001)`](@ref LogLevel).
+"""
+const BelowMinLevel = Base.CoreLogging.BelowMinLevel
+"""
+    AboveMaxLevel
+
+Alias for [`LogLevel(1_000_001)`](@ref LogLevel).
+"""
+const AboveMaxLevel = Base.CoreLogging.AboveMaxLevel
 
 using Base.CoreLogging:
-    closed_stream
+    closed_stream, ConsoleLogger, default_metafmt
+
+# Some packages use `Logging.default_logcolor`
+const default_logcolor = Base.CoreLogging.default_logcolor
 
 export
     AbstractLogger,
@@ -80,8 +95,6 @@ export
     Error,
     AboveMaxLevel
 
-include("ConsoleLogger.jl")
-
 # The following are also part of the public API, but not exported:
 #
 # 1. Log levels:
@@ -90,8 +103,4 @@ include("ConsoleLogger.jl")
 # 2. AbstractLogger message related functions:
 #  handle_message, shouldlog, min_enabled_level, catch_exceptions,
 
-function __init__()
-    global_logger(ConsoleLogger())
-end
-
 end
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index b6b4813964536..2fedbde557078 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -6,6 +6,10 @@ import Logging: min_enabled_level, shouldlog, handle_message
 
 @noinline func1() = backtrace()
 
+# see "custom log macro" testset
+CustomLog = LogLevel(-500)
+macro customlog(exs...) Base.CoreLogging.logmsg_code((Base.CoreLogging.@_sourceinfo)..., esc(CustomLog), exs...) end
+
 @testset "Logging" begin
 
 @testset "Core" begin
@@ -48,6 +52,15 @@ end
     end
     @test String(take!(buf)) == ""
 
+    # Check that the AnnotatedString path works too
+    with_logger(logger) do
+        @info Base.AnnotatedString("test")
+    end
+    @test String(take!(buf)) ==
+    """
+    [ Info: test
+    """
+
     @testset "Default metadata formatting" begin
         @test Logging.default_metafmt(Logging.Debug, Base, :g, :i, expanduser("~/somefile.jl"), 42) ==
             (:blue,      "Debug:",   "@ Base ~/somefile.jl:42")
@@ -272,7 +285,25 @@ end
             AboveMaxLevel === Logging.AboveMaxLevel
         end
         """)
-    @test m.run()
+    @test invokelatest(m.run)
+end
+
+@testset "custom log macro" begin
+    @test_logs (CustomLog, "a") min_level=CustomLog @customlog "a"
+
+    buf = IOBuffer()
+    io = IOContext(buf, :displaysize=>(30,80), :color=>false)
+    logger = ConsoleLogger(io, CustomLog)
+
+    with_logger(logger) do
+        @customlog "a"
+    end
+    @test occursin("LogLevel(-500): a", String(take!(buf)))
+end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Logging)
+    @test isempty(undoc)
 end
 
 end
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
index 39f99815832eb..50de38f169ff0 100644
--- a/stdlib/MPFR_jll/Project.toml
+++ b/stdlib/MPFR_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MPFR_jll"
 uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
-version = "4.2.0+0"
+version = "4.2.1+2"
 
 [deps]
 GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
index c184a9801102f..219ab0cad41be 100644
--- a/stdlib/MPFR_jll/src/MPFR_jll.jl
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/MPFR_jll.jl
 baremodule MPFR_jll
 using Base, Libdl, GMP_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl
index 81b6e06ed7b49..fc931b462fa9c 100644
--- a/stdlib/MPFR_jll/test/runtests.jl
+++ b/stdlib/MPFR_jll/test/runtests.jl
@@ -4,5 +4,5 @@ using Test, Libdl, MPFR_jll
 
 @testset "MPFR_jll" begin
     vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ())))
-    @test vn == v"4.2.0"
+    @test vn == v"4.2.1"
 end
diff --git a/stdlib/Makefile b/stdlib/Makefile
index e42061d593905..3975f24b7ae3b 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -17,7 +17,7 @@ VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 DIRS := $(build_datarootdir)/julia/stdlib/$(VERSDIR) $(build_prefix)/manifest/$(VERSDIR)
 $(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
 
-JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV MBEDTLS MPFR NGHTTP2 \
+JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV OPENSSL MPFR NGHTTP2 \
        BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE LIBSUITESPARSE ZLIB \
        LLVMUNWIND CSL UNWIND LLD
 
@@ -39,14 +39,15 @@ install-$$($(1)_JLL_NAME)_jll: get-$$($(1)_JLL_NAME)_jll
 endef
 $(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll))))
 
-
-STDLIBS = Artifacts Base64 CRC32c Dates Distributed FileWatching \
-          Future InteractiveUtils LazyArtifacts Libdl LibGit2 LinearAlgebra Logging \
+STDLIBS = Artifacts Base64 CRC32c Dates FileWatching \
+          Future InteractiveUtils Libdl LibGit2 Logging \
           Markdown Mmap Printf Profile Random REPL Serialization \
           SharedArrays Sockets Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL DelimitedFiles Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
+STDLIBS_EXT = Pkg Statistics LazyArtifacts LibCURL DelimitedFiles Downloads ArgTools \
+              Tar NetworkOptions SuiteSparse SparseArrays StyledStrings SHA Distributed \
+              JuliaSyntaxHighlighting LinearAlgebra
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
@@ -54,7 +55,6 @@ ifneq ($(filter $(STDLIBS),$(STDLIBS_EXT)),)
 $(error ERROR duplicated STDLIBS in list)
 endif
 
-
 # Generate symlinks to all stdlibs at usr/share/julia/stdlib/vX.Y/
 $(foreach module, $(STDLIBS), $(eval $(call symlink_target,$$(JULIAHOME)/stdlib/$(module),$$(build_datarootdir)/julia/stdlib/$$(VERSDIR),$(module))))
 
diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml
new file mode 100644
index 0000000000000..f029a210320cc
--- /dev/null
+++ b/stdlib/Manifest.toml
@@ -0,0 +1,300 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "1cb1aede0b4f0a2f12806233b9f188a63d6acf04"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.2"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.CRC32c]]
+uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+version = "1.2.0+0"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
+
+[[deps.DelimitedFiles]]
+deps = ["Mmap"]
+git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae"
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+version = "1.9.1"
+
+[[deps.Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+version = "1.11.0"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
+
+[[deps.GMP_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+version = "6.3.0+2"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JuliaSyntaxHighlighting]]
+deps = ["StyledStrings"]
+uuid = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+version = "1.12.0"
+
+[[deps.LLD_jll]]
+deps = ["Artifacts", "Libdl", "Zlib_jll", "libLLVM_jll"]
+uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+version = "18.1.7+3"
+
+[[deps.LLVMLibUnwind_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+version = "19.1.4+0"
+
+[[deps.LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+version = "1.11.0"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.11.1+1"
+
+[[deps.LibGit2]]
+deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "OpenSSL_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.9.0+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "OpenSSL_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.3+1"
+
+[[deps.LibUV_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
+version = "2.0.1+20"
+
+[[deps.LibUnwind_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+version = "1.8.1+2"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+version = "1.11.0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.MPFR_jll]]
+deps = ["Artifacts", "GMP_jll", "Libdl"]
+uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
+version = "4.2.1+1"
+
+[[deps.Markdown]]
+deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2024.11.26"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.3.0"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.28+3"
+
+[[deps.OpenLibm_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.5+0"
+
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.15+1"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.44.0+1"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.12.0"
+weakdeps = ["REPL"]
+
+    [deps.Pkg.extensions]
+    REPLExt = "REPL"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.Profile]]
+deps = ["StyledStrings"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+version = "1.11.0"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.SparseArrays]]
+deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+version = "1.12.0"
+
+[[deps.Statistics]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.11.1"
+weakdeps = ["SparseArrays"]
+
+    [deps.Statistics.extensions]
+    SparseArraysExt = ["SparseArrays"]
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.SuiteSparse_jll]]
+deps = ["Artifacts", "Libdl", "libblastrampoline_jll"]
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "7.8.3+2"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.3.1+2"
+
+[[deps.dSFMT_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+version = "2.2.5+2"
+
+[[deps.libLLVM_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+version = "18.1.7+3"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.12.0+0"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.64.0+1"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.5.0+2"
diff --git a/stdlib/Markdown/Project.toml b/stdlib/Markdown/Project.toml
index 229e58749d233..a48a3d1f0b345 100644
--- a/stdlib/Markdown/Project.toml
+++ b/stdlib/Markdown/Project.toml
@@ -1,8 +1,11 @@
 name = "Markdown"
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Markdown/docs/src/index.md b/stdlib/Markdown/docs/src/index.md
index a107929d1e838..926e3921d339d 100644
--- a/stdlib/Markdown/docs/src/index.md
+++ b/stdlib/Markdown/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Markdown/docs/src/index.md"
+```
+
 # [Markdown](@id markdown_stdlib)
 
 This section describes Julia's markdown syntax, which is enabled by the
@@ -75,7 +79,7 @@ the text enclosed in square brackets, `[ ]`, is the name of the link and the tex
 parentheses, `( )`, is the URL.
 
 ```
-A paragraph containing a link to [Julia](http://www.julialang.org).
+A paragraph containing a link to [Julia](https://www.julialang.org).
 ```
 
 It's also possible to add cross-references to other documented functions/methods/variables within
@@ -153,8 +157,8 @@ A header line can contain any inline syntax in the same way as a paragraph can.
 
 ### Code blocks
 
-Source code can be displayed as a literal block using an indent of four spaces as shown in the
-following example.
+Source code can be displayed as a literal block using an indent of four spaces or one tab as shown
+in the following example.
 
 ```
 This is a paragraph.
@@ -298,7 +302,8 @@ aside from the `:` character that is appended to the footnote label.
 
 [^note]:
 
-    Named footnote text containing several toplevel elements.
+    Named footnote text containing several toplevel elements
+    indented by 4 spaces or one tab.
 
       * item one
       * item two
@@ -357,6 +362,7 @@ They can be defined using the following `!!!` syntax:
 !!! note
 
     This is the content of the note.
+    It is indented by 4 spaces. A tab would work as well.
 
 !!! warning "Beware!"
 
@@ -386,6 +392,16 @@ If no title text is specified after the admonition type, then the type name will
 
 Admonitions, like most other toplevel elements, can contain other toplevel elements (e.g. lists, images).
 
+## [Markdown String Literals](@id stdlib-markdown-literals)
+
+The `md""` macro allows you to embed Markdown strings directly into your Julia code.
+This macro is designed to simplify the inclusion of Markdown-formatted text within your Julia source files.
+
+### Usage
+
+```julia
+result = md"This is a **custom** Markdown string with [a link](http://example.com)."
+```
 ## Markdown Syntax Extensions
 
 Julia's markdown supports interpolation in a very similar way to basic string literals, with the
@@ -396,3 +412,15 @@ complex features (such as references) without cluttering the basic syntax.
 
 In principle, the Markdown parser itself can also be arbitrarily extended by packages, or an entirely
 custom flavour of Markdown can be used, but this should generally be unnecessary.
+
+
+## [API reference](@id stdlib-markdown-api)
+
+```@docs
+Markdown.MD
+Markdown.@md_str
+Markdown.@doc_str
+Markdown.parse
+Markdown.html
+Markdown.latex
+```
diff --git a/stdlib/Markdown/src/Common/Common.jl b/stdlib/Markdown/src/Common/Common.jl
index 3036f2b4b730b..4bd3e5b4af8d6 100644
--- a/stdlib/Markdown/src/Common/Common.jl
+++ b/stdlib/Markdown/src/Common/Common.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+abstract type MarkdownElement end
+
 include("block.jl")
 include("inline.jl")
 
diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl
index bd184b60c40fa..247c894769f15 100644
--- a/stdlib/Markdown/src/Common/block.jl
+++ b/stdlib/Markdown/src/Common/block.jl
@@ -4,7 +4,7 @@
 # Paragraphs
 # ––––––––––
 
-mutable struct Paragraph
+mutable struct Paragraph <: MarkdownElement
     content
 end
 
@@ -21,7 +21,7 @@ function paragraph(stream::IO, md::MD)
             char == '\r' && !eof(stream) && peek(stream, Char) == '\n' && read(stream, Char)
             if prev_char == '\\'
                 write(buffer, '\n')
-            elseif blankline(stream) || parse(stream, md, breaking = true)
+            elseif blankline(stream) || _parse(stream, md, breaking = true)
                 break
             else
                 write(buffer, ' ')
@@ -39,7 +39,7 @@ end
 # Headers
 # –––––––
 
-mutable struct Header{level}
+mutable struct Header{level} <: MarkdownElement
     text
 end
 
@@ -95,7 +95,7 @@ end
 # Code
 # ––––
 
-mutable struct Code
+mutable struct Code <: MarkdownElement
     language::String
     code::String
 end
@@ -124,7 +124,7 @@ end
 # Footnote
 # --------
 
-mutable struct Footnote
+mutable struct Footnote <: MarkdownElement
     id::String
     text
 end
@@ -159,7 +159,7 @@ end
 # Quotes
 # ––––––
 
-mutable struct BlockQuote
+mutable struct BlockQuote <: MarkdownElement
     content
 end
 
@@ -188,7 +188,7 @@ end
 # Admonitions
 # -----------
 
-mutable struct Admonition
+mutable struct Admonition <: MarkdownElement
     category::String
     title::String
     content::Vector
@@ -246,7 +246,7 @@ end
 # Lists
 # –––––
 
-mutable struct List
+mutable struct List <: MarkdownElement
     items::Vector{Any}
     ordered::Int # `-1` is unordered, `>= 0` is ordered.
     loose::Bool # TODO: Renderers should use this field
@@ -332,7 +332,7 @@ pushitem!(list, buffer) = push!(list.items, parse(String(take!(buffer))).content
 # HorizontalRule
 # ––––––––––––––
 
-mutable struct HorizontalRule
+mutable struct HorizontalRule <: MarkdownElement
 end
 
 function horizontalrule(stream::IO, block::MD)
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index fda716a10fae7..a2a4140f80050 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -4,7 +4,7 @@
 # Emphasis
 # ––––––––
 
-mutable struct Italic
+mutable struct Italic <: MarkdownElement
     text
 end
 
@@ -20,7 +20,7 @@ function underscore_italic(stream::IO, md::MD)
     return result === nothing ? nothing : Italic(parseinline(result, md))
 end
 
-mutable struct Bold
+mutable struct Bold <: MarkdownElement
     text
 end
 
@@ -66,7 +66,7 @@ end
 # Images & Links
 # ––––––––––––––
 
-mutable struct Image
+mutable struct Image <: MarkdownElement
     url::String
     alt::String
 end
@@ -85,7 +85,7 @@ function image(stream::IO, md::MD)
     end
 end
 
-mutable struct Link
+mutable struct Link <: MarkdownElement
     text
     url::String
 end
@@ -156,7 +156,7 @@ end
 # Punctuation
 # –––––––––––
 
-mutable struct LineBreak end
+mutable struct LineBreak <: MarkdownElement end
 
 @trigger '\\' ->
 function linebreak(stream::IO, md::MD)
diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl
index 61807d267511d..676ae4a137779 100644
--- a/stdlib/Markdown/src/GitHub/GitHub.jl
+++ b/stdlib/Markdown/src/GitHub/GitHub.jl
@@ -44,7 +44,7 @@ function github_paragraph(stream::IO, md::MD)
     for char in readeach(stream, Char)
         if char == '\n'
             eof(stream) && break
-            if blankline(stream) || parse(stream, md, breaking = true)
+            if blankline(stream) || _parse(stream, md, breaking = true)
                 break
             else
                 write(buffer, '\n')
diff --git a/stdlib/Markdown/src/GitHub/table.jl b/stdlib/Markdown/src/GitHub/table.jl
index 29f956e9a0710..7c174007a75ba 100644
--- a/stdlib/Markdown/src/GitHub/table.jl
+++ b/stdlib/Markdown/src/GitHub/table.jl
@@ -140,15 +140,15 @@ end
 
 function term(io::IO, md::Table, columns)
     margin_str = " "^margin
-    cells = mapmap(x -> terminline_string(io, x), md.rows)
-    padcells!(cells, md.align, len = ansi_length)
+    cells = mapmap(x -> annotprint(terminline, x), md.rows)
+    padcells!(cells, md.align, len = textwidth)
     for i = 1:length(cells)
         print(io, margin_str)
         join(io, cells[i], " ")
         if i == 1
             println(io)
             print(io, margin_str)
-            join(io, ["–"^ansi_length(cells[i][j]) for j = 1:length(cells[1])], " ")
+            join(io, ["–"^textwidth(cells[i][j]) for j = 1:length(cells[1])], " ")
         end
         i < length(cells) && println(io)
     end
diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl
index 781fcbdafddc8..8d79cc93d6171 100644
--- a/stdlib/Markdown/src/Markdown.jl
+++ b/stdlib/Markdown/src/Markdown.jl
@@ -1,13 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-Tools for working with the Markdown file format. Mainly for documentation.
+    Markdown
+
+Tools for working with the Markdown markup language for formatted text, used within Julia for documentation.
+The `Markdown` module provides the (internal) [`MD`](@ref) type as well as the string
+literals `md"..."` and `doc"..."`.
 """
 module Markdown
 
-import Base: show, ==, with_output_color, mapany
+import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany
 using Base64: stringmime
 
+using StyledStrings: StyledStrings, Face, addface!, @styled_str, styled
+using JuliaSyntaxHighlighting: highlight, highlight!
+
 # Margin for printing in terminal.
 const margin = 2
 
@@ -28,7 +35,40 @@ include("render/terminal/render.jl")
 
 export @md_str, @doc_str
 
-parse(markdown::AbstractString; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+public MD, parse
+
+const MARKDOWN_FACES = [
+    :markdown_header => Face(weight=:bold),
+    :markdown_h1 => Face(height=1.25, inherit=:markdown_header),
+    :markdown_h2 => Face(height=1.20, inherit=:markdown_header),
+    :markdown_h3 => Face(height=1.15, inherit=:markdown_header),
+    :markdown_h4 => Face(height=1.12, inherit=:markdown_header),
+    :markdown_h5 => Face(height=1.08, inherit=:markdown_header),
+    :markdown_h6 => Face(height=1.05, inherit=:markdown_header),
+    :markdown_admonition => Face(weight=:bold),
+    :markdown_code => Face(inherit=:code),
+    :markdown_julia_prompt => Face(inherit=:repl_prompt_julia),
+    :markdown_footnote => Face(inherit=:bright_yellow),
+    :markdown_hrule => Face(inherit=:shadow),
+    :markdown_inlinecode => Face(inherit=:markdown_code),
+    :markdown_latex => Face(inherit=:magenta),
+    :markdown_link => Face(underline=:bright_blue),
+    :markdown_list => Face(foreground=:blue),
+]
+
+__init__() = foreach(addface!, MARKDOWN_FACES)
+
+parse(markdown::String; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+
+"""
+    Markdown.parse(markdown::AbstractString) -> MD
+
+Parse `markdown` as Julia-flavored Markdown text and return the corresponding `MD` object.
+
+See also [`@md_str`](@ref).
+"""
+parse(markdown::AbstractString; flavor = julia) = parse(String(markdown), flavor = flavor)
+
 parse_file(file::AbstractString; flavor = julia) = parse(read(file, String), flavor = flavor)
 
 function mdexpr(s, flavor = :julia)
@@ -40,6 +80,24 @@ function docexpr(source::LineNumberNode, mod::Module, s, flavor = :julia)
     :($doc_str($(mdexpr(s, flavor)), $(QuoteNode(source)), $mod))
 end
 
+"""
+    @md_str -> MD
+
+Parse the given string as Markdown text and return a corresponding [`MD`](@ref) object.
+
+See also [`Markdown.parse`](@ref Markdown.parse(::AbstractString)).
+
+# Examples
+```jldoctest
+julia> s = md"# Hello, world!"
+  Hello, world!
+  ≡≡≡≡≡≡≡≡≡≡≡≡≡
+
+julia> typeof(s)
+Markdown.MD
+
+```
+"""
 macro md_str(s, t...)
     mdexpr(s, t...)
 end
@@ -51,6 +109,25 @@ function doc_str(md, source::LineNumberNode, mod::Module)
 end
 doc_str(md::AbstractString, source::LineNumberNode, mod::Module) = doc_str(parse(md), source, mod)
 
+"""
+    @doc_str -> MD
+
+Parse the given string as Markdown text, add line and module information and return a
+corresponding [`MD`](@ref) object.
+
+`@doc_str` can be used in conjunction with the [`Base.Docs`](@ref) module. Please also refer to
+the manual section on [documentation](@ref man-documentation) for more information.
+
+# Examples
+```
+julia> s = doc"f(x) = 2*x"
+  f(x) = 2*x
+
+julia> typeof(s)
+Markdown.MD
+
+```
+"""
 macro doc_str(s::AbstractString, t...)
     docexpr(__source__, __module__, s, t...)
 end
@@ -59,4 +136,25 @@ import Base.Docs: catdoc
 
 catdoc(md::MD...) = MD(md...)
 
+if Base.generating_output()
+    # workload to reduce latency
+    md"""
+    # H1
+    ## H2
+    ### H3
+    **bold text**
+    *italicized text*
+    > blockquote
+    1. First item
+    2. Second item
+    3. Third item
+    - First item
+    - Second item
+    - Third item
+    `code`
+    Horizontal Rule
+    ---
+    """
+end
+
 end
diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl
index 452d90d1176e1..dee1e781bfbef 100644
--- a/stdlib/Markdown/src/parse/parse.jl
+++ b/stdlib/Markdown/src/parse/parse.jl
@@ -1,5 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+    MD
+
+`MD` represents a Markdown document. Note that the `MD` constructor should not generally be
+used directly, since it constructs the internal data structures. Instead, you can construct
+`MD` objects using the exported macros [`@md_str`](@ref) and [`@doc_str`](@ref).
+"""
 mutable struct MD
     content::Vector{Any}
     meta::Dict{Symbol, Any}
@@ -76,7 +83,7 @@ parseinline(s, md::MD) = parseinline(s, md, config(md))
 
 # Block parsing
 
-function parse(stream::IO, block::MD, config::Config; breaking = false)
+function _parse(stream::IO, block::MD, config::Config; breaking = false)
     skipblank(stream)
     eof(stream) && return false
     for parser in (breaking ? config.breaking : [config.breaking; config.regular])
@@ -85,12 +92,17 @@ function parse(stream::IO, block::MD, config::Config; breaking = false)
     return false
 end
 
-parse(stream::IO, block::MD; breaking = false) =
-  parse(stream, block, config(block), breaking = breaking)
+_parse(stream::IO, block::MD; breaking = false) =
+    _parse(stream, block, config(block), breaking = breaking)
+
+"""
+    parse(stream::IO) -> MD
 
+Parse the content of `stream` as Julia-flavored Markdown text and return the corresponding `MD` object.
+"""
 function parse(stream::IO; flavor = julia)
     isa(flavor, Symbol) && (flavor = flavors[flavor])
     markdown = MD(flavor)
-    while parse(stream, markdown, flavor) end
+    while _parse(stream, markdown, flavor) end
     return markdown
 end
diff --git a/stdlib/Markdown/src/render/html.jl b/stdlib/Markdown/src/render/html.jl
index a48180509400f..829fa6c7bf986 100644
--- a/stdlib/Markdown/src/render/html.jl
+++ b/stdlib/Markdown/src/render/html.jl
@@ -67,6 +67,9 @@ end
 
 function html(io::IO, code::Code)
     withtag(io, :pre) do
+        if code.language == "styled"
+            code = Code("", String(styled(code.code)))
+        end
         maybe_lang = !isempty(code.language) ? Any[:class=>"language-$(code.language)"] : []
         withtag(io, :code, maybe_lang...) do
             htmlesc(io, code.code)
@@ -134,6 +137,9 @@ function htmlinline(io::IO, content::Vector)
 end
 
 function htmlinline(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     withtag(io, :code) do
         htmlesc(io, code.code)
     end
@@ -182,6 +188,21 @@ htmlinline(io::IO, x) = tohtml(io, x)
 
 export html
 
+"""
+    html([io::IO], md)
+
+Output the contents of the Markdown object `md` in HTML format, either
+writing to an (optional) `io` stream or returning a string.
+
+One can alternatively use `show(io, "text/html", md)` or `repr("text/html", md)`, which
+differ in that they wrap the output in a `<div class="markdown"> ... </div>` element.
+
+# Examples
+```jldoctest
+julia> html(md"hello _world_")
+"<p>hello <em>world</em></p>\\n"
+```
+"""
 html(md) = sprint(html, md)
 
 function show(io::IO, ::MIME"text/html", md::MD)
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index d18a2e760ef3d..fad0508ce0e59 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,6 +33,9 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    if code.language == "styled"
+        code = Code("", String(styled(code.code)))
+    end
     occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
         println(io, code.code)
@@ -167,6 +170,20 @@ function latexesc(io, s::AbstractString)
     end
 end
 
+"""
+    latex([io::IO], md)
+
+Output the contents of the Markdown object `md` in LaTeX format, either
+writing to an (optional) `io` stream or returning a string.
+
+One can alternatively use `show(io, "text/latex", md)` or `repr("text/latex", md)`.
+
+# Examples
+```jldoctest
+julia> latex(md"hello _world_")
+"hello \\\\emph{world}\\n\\n"
+```
+"""
 latex(md) = sprint(latex, md)
 latexinline(md) = sprint(latexinline, md)
 latexesc(s) = sprint(latexesc, s)
diff --git a/stdlib/Markdown/src/render/rst.jl b/stdlib/Markdown/src/render/rst.jl
index 752916c581a07..e441ee0495da0 100644
--- a/stdlib/Markdown/src/render/rst.jl
+++ b/stdlib/Markdown/src/render/rst.jl
@@ -23,10 +23,16 @@ end
 function rst(io::IO, code::Code)
     if code.language == "jldoctest"
         println(io, ".. doctest::\n")
-    elseif code.language != "rst"
+    elseif code.language in ("", "julia", "julia-repl")
         println(io, ".. code-block:: julia\n")
+    elseif code.language == "rst"
+    elseif code.language == "styled"
+        code = Code("", String(styled(code.code)))
+        println(io, "::\n")
+    else
+        println(io, "::\n")
     end
-    for l in lines(code.code)
+    for l in eachsplit(code.code, '\n')
         println(io, "    ", l)
     end
 end
@@ -90,7 +96,7 @@ end
 
 function rst(io::IO, l::LaTeX)
     println(io, ".. math::\n")
-    for line in lines(l.formula)
+    for line in eachsplit(l.formula, '\n')
         println(io, "    ", line)
     end
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index a031de4d9ad82..c9dadfb5f3d94 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -1,68 +1,82 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Wrapping
+const AnnotIO = Union{AnnotatedIOBuffer, IOContext{AnnotatedIOBuffer}}
 
-function ansi_length(s)
-    replace(s, r"\e\[[0-9]+m" => "") |> textwidth
+function annotprint(f::Function, args...)
+    buf = AnnotatedIOBuffer()
+    f(buf, args...)
+    read(seekstart(buf), AnnotatedString)
 end
 
-words(s) = split(s, " ")
-lines(s) = split(s, "\n")
+"""
+    with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
 
-function wrapped_line(io::IO, s::AbstractString, width, i)
-    ws = words(s)
-    lines = String[]
-    for word in ws
-        word_length = ansi_length(word)
-        word_length == 0 && continue
-        if isempty(lines) || i + word_length + 1 > width
-            i = word_length
-            if length(lines) > 0
-                last_line = lines[end]
-                maybe_underline = findlast(Base.text_colors[:underline], last_line)
-                if !isnothing(maybe_underline)
-                    # disable underline style at end of line if not already disabled.
-                    maybe_disable_underline = max(
-                        last(something(findlast(Base.disable_text_style[:underline], last_line), -1)),
-                        last(something(findlast(Base.text_colors[:normal], last_line), -1)),
-                    )
+Call `f(io)`, and apply `annots` to the output created by doing so.
+"""
+function with_output_annotations(f::Function, io::AnnotIO, annots::Pair{Symbol, <:Any}...)
+    @nospecialize annots
+    aio = if io isa AnnotatedIOBuffer io else io.io end
+    start = position(aio) + 1
+    f(io)
+    stop = position(aio)
+    sortedindex = searchsortedlast(aio.annotations, (region=start:stop,), by=a -> a.region)
+    for (i, annot) in enumerate(annots)
+        insert!(aio.annotations, sortedindex + i, (start:stop, annot...))
+    end
+end
 
-                    if maybe_disable_underline < 0 || maybe_disable_underline < last(maybe_underline)
+"""
+    wraplines(content::AnnotatedString, width::Integer = 80, column::Integer = 0)
 
-                        lines[end] = last_line * Base.disable_text_style[:underline]
-                        word = Base.text_colors[:underline] * word
-                    end
+Wrap `content` into a vector of lines of at most `width` (according to
+`textwidth`), with the first line starting at `column`.
+"""
+function wraplines(content::Union{Annot, SubString{<:Annot}}, width::Integer = 80, column::Integer = 0) where { Annot <: AnnotatedString}
+    s, lines = String(content), SubString{Annot}[]
+    i, lastwrap, slen = firstindex(s), 0, ncodeunits(s)
+    most_recent_break_opportunity = 1
+    while i < slen
+        if isspace(s[i]) && s[i] != '\n'
+            most_recent_break_opportunity = i
+        elseif s[i] == '\n'
+            push!(lines, content[nextind(s, lastwrap):prevind(s, i)])
+            lastwrap = i
+            column = 0
+        elseif column >= width && most_recent_break_opportunity > 1
+            if lastwrap == most_recent_break_opportunity
+                nextbreak = findfirst(isspace, @view s[nextind(s, lastwrap):end])
+                if isnothing(nextbreak)
+                    break
+                else
+                    most_recent_break_opportunity = lastwrap + nextbreak
                 end
+                i = most_recent_break_opportunity
+            else
+                i = nextind(s, most_recent_break_opportunity)
             end
-            push!(lines, word)
-        else
-            i += word_length + 1
-            lines[end] *= " " * word   # this could be more efficient
+            push!(lines, content[nextind(s, lastwrap):prevind(s, most_recent_break_opportunity)])
+            lastwrap = most_recent_break_opportunity
+            column = 0
         end
+        column += textwidth(s[i])
+        i = nextind(s, i)
     end
-    return i, lines
-end
-
-function wrapped_lines(io::IO, s::AbstractString; width = 80, i = 0)
-    ls = String[]
-    for ss in lines(s)
-        i, line = wrapped_line(io, ss, width, i)
-        append!(ls, line)
+    if lastwrap < slen
+        push!(lines, content[nextind(s, lastwrap):end])
     end
-    return ls
+    lines
 end
 
-wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
-    wrapped_lines(io, sprint(f, args...; context=io), width = width, i = 0)
-
-function print_wrapped(io::IO, s...; width = 80, pre = "", i = 0)
-    lines = wrapped_lines(io, s..., width = width, i = i)
-    isempty(lines) && return 0, 0
-    print(io, lines[1])
-    for line in lines[2:end]
-        print(io, '\n', pre, line)
+# Print horizontal lines between each docstring if there are multiple docs
+function insert_hlines(docs)
+    if !isa(docs, MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
+        return docs
+    end
+    docs = docs::MD
+    v = Any[]
+    for (n, doc) in enumerate(docs.content)
+        push!(v, doc)
+        n == length(docs.content) || push!(v, HorizontalRule())
     end
-    length(lines), length(pre) + ansi_length(lines[end])
+    return MD(v)
 end
-
-print_wrapped(f::Function, io::IO, args...; kws...) = print_wrapped(io, f, args...; kws...)
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 20b1ef6d041fc..a97d273131536 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -13,121 +13,158 @@ function term(io::IO, content::Vector, cols)
     term(io, content[end], cols)
 end
 
-term(io::IO, md::MD, columns = cols(io)) = term(io, md.content, columns)
+function term(io::IO, md::MD, columns = cols(io))
+    md = insert_hlines(md)
+    return term(io, md.content, columns)
+end
 
 function term(io::IO, md::Paragraph, columns)
-    print(io, ' '^margin)
-    print_wrapped(io, width = columns-2margin, pre = ' '^margin) do io
-        terminline(io, md.content)
+    lines = wraplines(annotprint(terminline, md.content), columns-2margin)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::BlockQuote, columns)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    print(io, ' '^margin, '│', lines[1])
-    for i = 2:length(lines)
-        print(io, '\n', ' '^margin, '│', lines[i])
+    content = annotprint(term, md.content, columns - 10)
+    lines = wraplines(rstrip(content), columns - 10)
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::Admonition, columns)
-    col = :default
-    # If the types below are modified, the page manual/documentation.md must be updated accordingly.
-    if md.category == "danger"
-        col = Base.error_color()
-    elseif md.category == "warning"
-        col = Base.warn_color()
-    elseif md.category in ("info", "note")
-        col = Base.info_color()
-    elseif md.category == "tip"
-        col = :green
+    accent = if md.category == "danger"
+        :error
+    elseif md.category in ("warning", "info", "note", "tip")
+        Symbol(md.category)
+    elseif md.category == "compat"
+        :bright_cyan
+    elseif md.category == "todo"
+        :magenta
+    else
+        :default
     end
-    printstyled(io, ' '^margin, "│ "; color=col, bold=true)
-    printstyled(io, isempty(md.title) ? md.category : md.title; color=col, bold=true)
-    printstyled(io, '\n', ' '^margin, '│', '\n'; color=col, bold=true)
-    s = sprint(term, md.content, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        printstyled(io, ' '^margin, '│'; color=col, bold=true)
-        print(io, lines[i])
-        i < lastindex(lines) && println(io)
+    title = if isempty(md.title) md.category else md.title end
+    print(io, ' '^margin, styled"{$accent,markdown_admonition:│ $title}",
+          '\n', ' '^margin, styled"{$accent,markdown_admonition:│}", '\n')
+    content = annotprint(term, md.content, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, styled"{$accent,markdown_admonition:│}", line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, f::Footnote, columns)
     print(io, ' '^margin, "│ ")
-    printstyled(io, "[^$(f.id)]", bold=true)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
     println(io, '\n', ' '^margin, '│')
-    s = sprint(term, f.text, columns - 10; context=io)
-    lines = split(rstrip(s), '\n')
-    for i in eachindex(lines)
-        print(io, ' '^margin, '│', lines[i])
-        i < lastindex(lines) && println(io)
+    content = annotprint(term, f.text, columns - 10)
+    lines = split(rstrip(content), '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, '│', line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, md::List, columns)
     for (i, point) in enumerate(md.items)
-        print(io, ' '^2margin, isordered(md) ? "$(i + md.ordered - 1). " : "•  ")
-        print_wrapped(io, width = columns-(4margin+2), pre = ' '^(2margin+3),
-                          i = 2margin+2) do io
-            term(io, point, columns - 10)
-        end
-        i < lastindex(md.items) && print(io, '\n', '\n')
-    end
-end
-
-function _term_header(io::IO, md, char, columns)
-    text = terminline_string(io, md.text)
-    with_output_color(:bold, io) do io
-        pre = ' '^margin
-        print(io, pre)
-        line_no, lastline_width = print_wrapped(io, text,
-                                                width=columns - 4margin; pre)
-        line_width = min(lastline_width, columns)
-        if line_no > 1
-            line_width = max(line_width, div(columns, 3)+length(pre))
+        bullet = isordered(md) ? "$(i + md.ordered - 1)." : "• "
+        print(io, ' '^2margin, styled"{markdown_list:$bullet} ")
+        content = annotprint(term, point, columns - 10)
+        lines = split(rstrip(content), '\n')
+        for (l, line) in enumerate(lines)
+            l > 1 && print(io, ' '^(2margin+3))
+            print(io, lstrip(line))
+            l < length(lines) && println(io)
         end
-        header_width = max(0, line_width-length(pre))
-        char != ' ' && header_width > 0 && print(io, '\n', ' '^(margin), char^header_width)
+        i < length(md.items) && print(io, '\n'^(1 + md.loose))
     end
 end
 
 const _header_underlines = collect("≡=–-⋅ ")
 # TODO settle on another option with unicode e.g. "≡=≃–∼⋅" ?
 
-function term(io::IO, md::Header{l}, columns) where l
+function term(io::AnnotIO, md::Header{l}, columns) where l
+    face = Symbol("markdown_h$l")
     underline = _header_underlines[l]
-    _term_header(io, md, underline, columns)
+    pre = ' '^margin
+    local line_width
+    with_output_annotations(io, :face => face) do io
+        headline = annotprint(terminline, md.text)
+        lines = wraplines(headline, columns - 4margin)
+        for (i, line) in enumerate(lines)
+            print(io, pre, line)
+            i < length(lines) && println(io)
+        end
+        line_width = if length(lines) == 1
+            min(textwidth(lines[end]), columns)
+        elseif length(lines) > 1
+            max(textwidth(lines[end]), div(columns, 3)+length(pre))
+        else
+            0
+        end
+    end
+    header_width = max(0, line_width)
+    if underline != ' ' && header_width > 0
+        print(io, '\n', ' '^(margin))
+        with_output_annotations(io -> print(io, underline^header_width), io, :face => face)
+    end
 end
 
 function term(io::IO, md::Code, columns)
-    with_output_color(:cyan, io) do io
-        L = lines(md.code)
-        for i in eachindex(L)
-            print(io, ' '^margin, L[i])
-            i < lastindex(L) && println(io)
+    code = if md.language == "julia"
+        highlight(md.code)
+    elseif md.language == "julia-repl" || Base.startswith(md.language, "jldoctest")
+        hl = AnnotatedString(md.code)
+        for (; match) in eachmatch(r"(?:^|\n)julia>", hl)
+            StyledStrings.face!(match, :markdown_julia_prompt)
+            afterprompt = match.offset + ncodeunits(match) + 1
+            _, exprend = Meta.parse(md.code, afterprompt, raise = false)
+            highlight!(hl[afterprompt:prevind(md.code, exprend)])
+            if (nextspace = findnext(' ', md.code, exprend)) |> !isnothing
+                nextword = hl[exprend:prevind(hl, nextspace)]
+                if nextword == "ERROR:"
+                    StyledStrings.face!(nextword, :error)
+                end
+            end
         end
+        hl
+    elseif md.language == "styled"
+        styled(md.code)
+    else
+        styled"{markdown_code:$(md.code)}"
+    end
+    lines = split(code, '\n')
+    for (i, line) in enumerate(lines)
+        print(io, ' '^margin, line)
+        i < length(lines) && println(io)
     end
 end
 
 function term(io::IO, tex::LaTeX, columns)
-    printstyled(io, ' '^margin, tex.formula, color=:magenta)
+    print(io, ' '^margin, styled"{markdown_latex:$(tex.formula)}")
 end
 
 term(io::IO, br::LineBreak, columns) = nothing # line breaks already printed between subsequent elements
 
 function term(io::IO, br::HorizontalRule, columns)
-   print(io, ' '^margin, '─'^(columns - 2margin))
+    print(io, ' '^margin, styled"{markdown_hrule:$('─'^(columns - 2margin))}")
+end
+
+function term(io::IO, md::MarkdownElement, columns)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    term(a, md, columns)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 term(io::IO, x, _) = show(io, MIME"text/plain"(), x)
 
 # Inline Content
 
-terminline_string(io::IO, md) = sprint(terminline, md; context=io)
-
 terminline(io::IO, content...) = terminline(io, collect(content))
 
 function terminline(io::IO, content::Vector)
@@ -140,12 +177,12 @@ function terminline(io::IO, md::AbstractString)
     print(io, replace(md, r"[\s\t\n]+" => ' '))
 end
 
-function terminline(io::IO, md::Bold)
-    with_output_color(terminline, :bold, io, md.text)
+function terminline(io::AnnotIO, md::Bold)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :bold)
 end
 
-function terminline(io::IO, md::Italic)
-    with_output_color(terminline, :underline, io, md.text)
+function terminline(io::AnnotIO, md::Italic)
+    with_output_annotations(io -> terminline(io, md.text), io, :face => :italic)
 end
 
 function terminline(io::IO, md::LineBreak)
@@ -156,20 +193,36 @@ function terminline(io::IO, md::Image)
     terminline(io, "(Image: $(md.alt))")
 end
 
-terminline(io::IO, f::Footnote) = with_output_color(terminline, :bold, io, "[^$(f.id)]")
+function terminline(io::IO, f::Footnote)
+    print(io, styled"{markdown_footnote:[^$(f.id)]}")
+end
 
-function terminline(io::IO, md::Link)
-    url = !Base.startswith(md.url, "@ref") ? " ($(md.url))" : ""
-    text = terminline_string(io, md.text)
-    terminline(io, text, url)
+function terminline(io::AnnotIO, md::Link)
+    annots = if occursin(r"^(https?|file)://", md.url)
+        (:face => :markdown_link, :link => md.url)
+    else
+        (:face => :markdown_link,)
+    end
+    with_output_annotations(io -> terminline(io, md.text), io, annots...)
 end
 
 function terminline(io::IO, code::Code)
-    printstyled(io, code.code, color=:cyan)
+    body = if code.language == "styled"
+        styled(code.code)
+    else
+        code.code
+    end
+    print(io, styled"{markdown_inlinecode:$body}")
 end
 
 function terminline(io::IO, tex::LaTeX)
-    printstyled(io, tex.formula, color=:magenta)
+    print(io, styled"{markdown_latex:$(tex.formula)}")
+end
+
+function terminline(io::IO, md::MarkdownElement)
+    a = IOContext(AnnotatedIOBuffer(), io)
+    terminline(a, md)
+    print(io, read(seekstart(a.io), AnnotatedString))
 end
 
 terminline(io::IO, x) = show(io, MIME"text/plain"(), x)
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index 19d821a0254d7..35608f75b2426 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Markdown
-import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, plain, term, html, rst, Table, Code, LaTeX, Footnote
+using Test, Markdown, StyledStrings
+import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, insert_hlines, plain, term, html, rst, Table, Code, LaTeX, Footnote
 import Base: show
 
 # Basics
@@ -233,7 +233,7 @@ World""" |> plain == "Hello\n\n---\n\nWorld\n"
 
 # multiple whitespace is ignored
 @test sprint(term, md"a  b") == "  a b"
-@test sprint(term, md"[x](https://julialang.org)") == "  x (https://julialang.org)"
+@test sprint(term, md"[x](https://julialang.org)") == "  x"
 @test sprint(term, md"[x](@ref)") == "  x"
 @test sprint(term, md"[x](@ref something)") == "  x"
 @test sprint(term, md"![x](https://julialang.org)") == "  (Image: x)"
@@ -298,6 +298,7 @@ end
 let doc =
     md"""
     1. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
+
     2. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
     """
     str = sprint(term, doc, 50)
@@ -376,8 +377,8 @@ table = md"""
 # mime output
 let out =
     @test sprint(show, "text/plain", book) ==
-        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
-    @test sprint(show, "text/plain", md"#") == "  " # edge case of empty header
+        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n    •  list2"
+    @test sprint(show, "text/plain", md"#") == "" # edge case of empty header
     @test sprint(show, "text/markdown", book) ==
         """
         # Title
@@ -1157,7 +1158,7 @@ let buf = IOBuffer()
     show(buf, "text/markdown", md"*emph*")
     @test String(take!(buf)) == "*emph*\n"
     show(IOContext(buf, :color=>true), "text/plain", md"*emph*")
-    @test String(take!(buf)) == "  \e[4memph\e[24m"
+    @test String(take!(buf)) in ("  \e[3memph\e[23m", "  \e[4memph\e[24m")
 end
 
 let word = "Markdown" # disable underline when wrapping lines
@@ -1166,8 +1167,8 @@ let word = "Markdown" # disable underline when wrapping lines
     long_italic_text = Markdown.parse('_' * join(fill(word, 10), ' ') * '_')
     show(ctx, MIME("text/plain"), long_italic_text)
     lines = split(String(take!(buf)), '\n')
-    @test endswith(lines[begin], Base.disable_text_style[:underline])
-    @test startswith(lines[begin+1], ' '^Markdown.margin * Base.text_colors[:underline])
+    @test endswith(lines[begin], r"\e\[2[34]m")
+    @test startswith(lines[begin+1], Regex(' '^Markdown.margin * "\e\\[[34]m"))
 end
 
 let word = "Markdown" # pre is of size Markdown.margin when wrapping title
@@ -1176,7 +1177,9 @@ let word = "Markdown" # pre is of size Markdown.margin when wrapping title
     long_title = Markdown.parse("# " * join(fill(word, 3)))
     show(ctx, MIME("text/plain"), long_title)
     lines = split(String(take!(buf)), '\n')
-    @test all(startswith(Base.text_colors[:bold] * ' '^Markdown.margin), lines)
+    @test all(l -> startswith(l, ' '^Markdown.margin * StyledStrings.ANSI_STYLE_CODES.bold_weight) ||
+                   startswith(l, StyledStrings.ANSI_STYLE_CODES.bold_weight * ' '^Markdown.margin),
+              lines)
 end
 
 struct Struct49454 end
@@ -1185,7 +1188,7 @@ Base.show(io::IO, ::Struct49454) =
 
 let buf = IOBuffer()
     ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], 10))
-    show(stdout, MIME("text/plain"), md"""
+    show(ctx, MIME("text/plain"), md"""
     text without $(Struct49454()) underline.
     """)
     lines = split(String(take!(buf)), '\n')
@@ -1259,8 +1262,9 @@ end
     s = @md_str """
        Misc:\\
        - line\\
+         break
        """
-    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
+    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line\n   break"
 end
 
 @testset "pullrequest #41552: a code block has \\end{verbatim}" begin
@@ -1293,3 +1297,18 @@ end
     # see issue #42139
     @test md"<一轮红日初升>" |> html == """<p>&lt;一轮红日初升&gt;</p>\n"""
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Markdown))
+end
+
+@testset "Non-Markdown" begin
+    # https://github.com/JuliaLang/julia/issues/37765
+    @test isa(insert_hlines(Text("foo")), Text)
+    # https://github.com/JuliaLang/julia/issues/37757
+    @test insert_hlines(nothing) === nothing
+end
+
+@testset "Lazy Strings" begin
+    @test Markdown.parse(lazy"foo") == Markdown.parse("foo")
+end
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
deleted file mode 100644
index e46da42a9a638..0000000000000
--- a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## dummy stub for https://github.com/JuliaBinaryWrappers/MbedTLS_jll.jl
-
-baremodule MbedTLS_jll
-using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
-
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libmbedcrypto, libmbedtls, libmbedx509
-
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-artifact_dir::String = ""
-libmbedcrypto_handle::Ptr{Cvoid} = C_NULL
-libmbedcrypto_path::String = ""
-libmbedtls_handle::Ptr{Cvoid} = C_NULL
-libmbedtls_path::String = ""
-libmbedx509_handle::Ptr{Cvoid} = C_NULL
-libmbedx509_path::String = ""
-
-if Sys.iswindows()
-    const libmbedcrypto = "libmbedcrypto.dll"
-    const libmbedtls = "libmbedtls.dll"
-    const libmbedx509 = "libmbedx509.dll"
-elseif Sys.isapple()
-    const libmbedcrypto = "@rpath/libmbedcrypto.7.dylib"
-    const libmbedtls = "@rpath/libmbedtls.14.dylib"
-    const libmbedx509 = "@rpath/libmbedx509.1.dylib"
-else
-    const libmbedcrypto = "libmbedcrypto.so.7"
-    const libmbedtls = "libmbedtls.so.14"
-    const libmbedx509 = "libmbedx509.so.1"
-end
-
-function __init__()
-    global libmbedcrypto_handle = dlopen(libmbedcrypto)
-    global libmbedcrypto_path = dlpath(libmbedcrypto_handle)
-    global libmbedtls_handle = dlopen(libmbedtls)
-    global libmbedtls_path = dlpath(libmbedtls_handle)
-    global libmbedx509_handle = dlopen(libmbedx509)
-    global libmbedx509_path = dlpath(libmbedx509_handle)
-    global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libmbedtls_path)
-    push!(LIBPATH_list, LIBPATH[])
-end
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libmbedcrypto_path() =libmbedcrypto_path
-get_libmbedtls_path() = libmbedtls_path
-get_libmbedx509_path() = libmbedx509_path
-
-end  # module MbedTLS_jll
diff --git a/stdlib/MbedTLS_jll/test/runtests.jl b/stdlib/MbedTLS_jll/test/runtests.jl
deleted file mode 100644
index 2d82fa564cd18..0000000000000
--- a/stdlib/MbedTLS_jll/test/runtests.jl
+++ /dev/null
@@ -1,10 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Libdl, MbedTLS_jll
-
-@testset "MbedTLS_jll" begin
-    vstr = zeros(UInt8, 32)
-    ccall((:mbedtls_version_get_string, libmbedcrypto), Cvoid, (Ref{UInt8},), vstr)
-    vn = VersionNumber(unsafe_string(pointer(vstr)))
-    @test vn == v"2.28.2"
-end
diff --git a/stdlib/Mmap/Project.toml b/stdlib/Mmap/Project.toml
index f3dab686d2eaa..ce4b65ccbb06a 100644
--- a/stdlib/Mmap/Project.toml
+++ b/stdlib/Mmap/Project.toml
@@ -1,5 +1,6 @@
 name = "Mmap"
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Mmap/docs/src/index.md b/stdlib/Mmap/docs/src/index.md
index 5c40f11db4a4c..5ec2d5064eaf0 100644
--- a/stdlib/Mmap/docs/src/index.md
+++ b/stdlib/Mmap/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Mmap/docs/src/index.md"
+```
+
 # Memory-mapped I/O
 
 Low level module for mmap (memory mapping of files).
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 629f53e8371ed..7d57bf053940d 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -86,6 +86,8 @@ grow!(::Anonymous,o::Integer,l::Integer) = return
 function grow!(io::IO, offset::Integer, len::Integer)
     pos = position(io)
     filelen = filesize(io)
+    # If non-regular file skip trying to grow since we know that will fail the ftruncate syscall
+    filelen == 0 && !isfile(io) && return
     if filelen < offset + len
         failure = ccall(:jl_ftruncate, Cint, (Cint, Int64), fd(io), offset+len)
         Base.systemerror(:ftruncate, failure != 0)
@@ -208,18 +210,38 @@ function mmap(io::IO,
     mmaplen = (offset - offset_page) + len
 
     file_desc = gethandle(io)
+    szfile = convert(Csize_t, len + offset)
+    requestedSizeLarger = false
+    if !(io isa Mmap.Anonymous)
+        requestedSizeLarger = szfile > filesize(io)
+    end
     # platform-specific mmapping
     @static if Sys.isunix()
         prot, flags, iswrite = settings(file_desc, shared)
-        iswrite && grow && grow!(io, offset, len)
+        if requestedSizeLarger && isfile(io) # add a condition to this line to ensure it only checks files
+            if iswrite
+                if grow
+                    grow!(io, offset, len)
+                else
+                    throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+                end
+            else
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            end
+        end
         # mmap the file
         ptr = ccall(:jl_mmap, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t, Cint, Cint, RawFD, Int64),
             C_NULL, mmaplen, prot, flags, file_desc, offset_page)
         systemerror("memory mapping failed", reinterpret(Int, ptr) == -1)
     else
         name, readonly, create = settings(io)
-        szfile = convert(Csize_t, len + offset)
-        readonly && szfile > filesize(io) && throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+        if requestedSizeLarger
+            if readonly
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            elseif !grow
+                throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+            end
+        end
         handle = create ? ccall(:CreateFileMappingW, stdcall, Ptr{Cvoid}, (OS_HANDLE, Ptr{Cvoid}, DWORD, DWORD, DWORD, Cwstring),
                                 file_desc, C_NULL, readonly ? PAGE_READONLY : PAGE_READWRITE, szfile >> 32, szfile & typemax(UInt32), name) :
                           ccall(:OpenFileMappingW, stdcall, Ptr{Cvoid}, (DWORD, Cint, Cwstring),
@@ -231,7 +253,7 @@ function mmap(io::IO,
     end # os-test
     # convert mmapped region to Julia Array at `ptr + (offset - offset_page)` since file was mapped at offset_page
     A = unsafe_wrap(Array, convert(Ptr{T}, UInt(ptr) + UInt(offset - offset_page)), dims)
-    finalizer(A) do x
+    finalizer(A.ref.mem) do x
         @static if Sys.isunix()
             systemerror("munmap",  ccall(:munmap, Cint, (Ptr{Cvoid}, Int), ptr, mmaplen) != 0)
         else
@@ -342,8 +364,9 @@ Forces synchronization between the in-memory version of a memory-mapped `Array`
 [`BitArray`](@ref) and the on-disk version.
 """
 function sync!(m::Array, flags::Integer=MS_SYNC)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m @static if Sys.isunix()
         systemerror("msync",
@@ -404,8 +427,9 @@ Advises the kernel on the intended usage of the memory-mapped `array`, with the
 `flag` being one of the available `MADV_*` constants.
 """
 function madvise!(m::Array, flag::Integer=MADV_NORMAL)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m begin
         systemerror("madvise",
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 0b3cb0b9f1a42..03e4b48d95f7a 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -100,9 +100,9 @@ if !(Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le)
     s = open(file, "r")
     m = mmap(s)
     @test_throws ReadOnlyMemoryError m[5] = UInt8('x') # tries to setindex! on read-only array
-    finalize(m); m=nothing; GC.gc()
+    finalize(m); m=nothing;
 end
-
+GC.gc()
 write(file, "Hello World\n")
 
 s = open(file, "r")
@@ -336,6 +336,11 @@ open(file, "r+") do s
     finalize(A); A = nothing; GC.gc()
     A = mmap(s, Vector{UInt8}, (10,), 1)
     Mmap.sync!(A)
-    finalize(A); A = nothing; GC.gc()
+    finalize(A); A = nothing;
 end
+GC.gc()
 rm(file)
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Mmap))
+end
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index cef860fda4acd..2f9bf67e22a74 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,6 +1,7 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2023.01.10"
+# Keep in sync with `deps/libgit2.version`.
+version = "2024.12.31"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
index 244c1204563d5..1d5df0236ae9e 100644
--- a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
+++ b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule MozillaCACerts_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index 64d3fab9d7bf4..69b56e03ed89e 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,4 +1,4 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = f7bbeb66f05fc651adb12758b650e8630a998fbd
+NETWORKOPTIONS_SHA1 = c090626d3feee6d6a5c476346d22d6147c9c6d2d
 NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
 NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index 529c9945e65f1..07a81d3c1d547 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.23+0"
+version = "0.3.29+0"
 
 [deps]
 # See note in `src/OpenBLAS_jll.jl` about this dependency.
@@ -9,7 +9,7 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.9"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index a0c11ab047142..2f151f63f4413 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -13,7 +13,6 @@ using Base, Libdl, Base.BinaryPlatforms
 # using CompilerSupportLibraries_jll
 # Because of this however, we have to manually load the libraries we
 # _do_ care about, namely libgfortran
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/OpenBLAS_jll/test/runtests.jl b/stdlib/OpenBLAS_jll/test/runtests.jl
index 1d944bab8cd67..76242b2e4080e 100644
--- a/stdlib/OpenBLAS_jll/test/runtests.jl
+++ b/stdlib/OpenBLAS_jll/test/runtests.jl
@@ -13,5 +13,5 @@ else
 end
 
 @testset "OpenBLAS_jll" begin
-    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) != nothing
+    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) !== nothing
 end
diff --git a/stdlib/OpenLibm_jll/Project.toml b/stdlib/OpenLibm_jll/Project.toml
index 7f02fbc81ce1b..431528ee3f400 100644
--- a/stdlib/OpenLibm_jll/Project.toml
+++ b/stdlib/OpenLibm_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenLibm_jll"
 uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-version = "0.8.1+0"
+version = "0.8.5+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
index f2dee45a279cd..297cd25512894 100644
--- a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenLibm_jll.jl
 baremodule OpenLibm_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/OpenSSL_jll/Project.toml
similarity index 66%
rename from stdlib/MbedTLS_jll/Project.toml
rename to stdlib/OpenSSL_jll/Project.toml
index 2e8d0d384f88a..0773311e11043 100644
--- a/stdlib/MbedTLS_jll/Project.toml
+++ b/stdlib/OpenSSL_jll/Project.toml
@@ -1,13 +1,13 @@
-name = "MbedTLS_jll"
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.2+0"
+name = "OpenSSL_jll"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.15+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.8"
+julia = "1.6"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl b/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl
new file mode 100644
index 0000000000000..bba9a0a299de9
--- /dev/null
+++ b/stdlib/OpenSSL_jll/src/OpenSSL_jll.jl
@@ -0,0 +1,58 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/OpenSSL_jll.jl
+
+baremodule OpenSSL_jll
+using Base, Libdl, Base.BinaryPlatforms
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libcrypto, libssl
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir::String = ""
+libcrypto_handle::Ptr{Cvoid} = C_NULL
+libcrypto_path::String = ""
+libssl_handle::Ptr{Cvoid} = C_NULL
+libssl_path::String = ""
+
+if Sys.iswindows()
+    if arch(HostPlatform()) == "x86_64"
+        const libcrypto = "libcrypto-3-x64.dll"
+        const libssl = "libssl-3-x64.dll"
+    else
+        const libcrypto = "libcrypto-3.dll"
+        const libssl = "libssl-3.dll"
+    end
+elseif Sys.isapple()
+    const libcrypto = "@rpath/libcrypto.3.dylib"
+    const libssl = "@rpath/libssl.3.dylib"
+else
+    const libcrypto = "libcrypto.so.3"
+    const libssl = "libssl.so.3"
+end
+
+function __init__()
+    global libcrypto_handle = dlopen(libcrypto)
+    global libcrypto_path = dlpath(libcrypto_handle)
+    global libssl_handle = dlopen(libssl)
+    global libssl_path = dlpath(libssl_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libssl_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libcrypto_path() = libcrypto_path
+get_libssl_path() = libssl_path
+
+end  # module OpenSSL_jll
diff --git a/stdlib/OpenSSL_jll/test/runtests.jl b/stdlib/OpenSSL_jll/test/runtests.jl
new file mode 100644
index 0000000000000..35431d04bfcac
--- /dev/null
+++ b/stdlib/OpenSSL_jll/test/runtests.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, OpenSSL_jll
+
+@testset "OpenSSL_jll" begin
+    major = ccall((:OPENSSL_version_major, libcrypto), Cuint, ())
+    minor = ccall((:OPENSSL_version_minor, libcrypto), Cuint, ())
+    patch = ccall((:OPENSSL_version_patch, libcrypto), Cuint, ())
+    @test VersionNumber(major, minor, patch) == v"3.0.15"
+end
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
index d630c04383bfb..24ac196a3b8a9 100644
--- a/stdlib/PCRE2_jll/Project.toml
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "PCRE2_jll"
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.42.0+0"
+version = "10.44.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
index e7f685820830b..d825ac74db5a8 100644
--- a/stdlib/PCRE2_jll/src/PCRE2_jll.jl
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/PCRE2_jll.jl
 baremodule PCRE2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/PCRE2_jll/test/runtests.jl b/stdlib/PCRE2_jll/test/runtests.jl
index d593b07af31ce..21df2ec430e0e 100644
--- a/stdlib/PCRE2_jll/test/runtests.jl
+++ b/stdlib/PCRE2_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, PCRE2_jll
     vstr = zeros(UInt8, 32)
     @test ccall((:pcre2_config_8, libpcre2_8), Cint, (UInt32, Ref{UInt8}), 11, vstr) > 0
     vn = VersionNumber(split(unsafe_string(pointer(vstr)), " ")[1])
-    @test vn == v"10.42.0"
+    @test vn == v"10.44.0"
 end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 6551c7e24049f..4240c77105583 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = e8197dd0ed8132d4a7619f3657363c8415249c47
+PKG_SHA1 = bc9fb21b1f2d72038491eff938673fc5fbc99445
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/Project.toml b/stdlib/Printf/Project.toml
index 9fa4e3633cae1..019b7e94ef9bd 100644
--- a/stdlib/Printf/Project.toml
+++ b/stdlib/Printf/Project.toml
@@ -1,5 +1,6 @@
 name = "Printf"
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/stdlib/Printf/docs/src/index.md b/stdlib/Printf/docs/src/index.md
index 48e38e2b2ce5b..1c6f98ce22e58 100644
--- a/stdlib/Printf/docs/src/index.md
+++ b/stdlib/Printf/docs/src/index.md
@@ -1,6 +1,14 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Printf/docs/src/index.md"
+```
+
 # [Printf](@id man-printf)
 
+The `Printf` module provides formatted output functions similar to the C standard library's `printf`. It allows formatted printing to an output stream or to a string.
+
 ```@docs
 Printf.@printf
 Printf.@sprintf
+Printf.Format
+Printf.format
 ```
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index cb336a8d9c18b..fd38b3ebd3573 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -1,11 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The `Printf` module provides formatted output functions similar to the C standard library's `printf`. It allows formatted printing to an output stream or to a string.
+"""
 module Printf
 
 using Base.Ryu
 
 export @printf, @sprintf
 
+public format, Format
+
 # format specifier categories
 const Ints = Union{Val{'d'}, Val{'i'}, Val{'u'}, Val{'x'}, Val{'X'}, Val{'o'}}
 const Floats = Union{Val{'e'}, Val{'E'}, Val{'f'}, Val{'F'}, Val{'g'}, Val{'G'}, Val{'a'}, Val{'A'}}
@@ -237,7 +241,7 @@ function Format(f::AbstractString)
         !(b in b"diouxXDOUeEfFgGaAcCsSpn") && throw(InvalidFormatStringError("'$(Char(b))' is not a valid type specifier", f, last_percent_pos, pos-1))
         type = Val{Char(b)}
         if type <: Ints && precision > 0
-            # note - we should also set zero to false if dynamic precison > 0
+            # note - we should also set zero to false if dynamic precision > 0
             # this is taken care of in fmt() for Ints
             zero = false
         elseif (type <: Strings || type <: Chars) && !parsedprecdigits
@@ -293,11 +297,11 @@ end
 @inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
     zero, width, precision = spec.zero, spec.width, spec.precision
     if spec.dynamic_width
-        width = args[argp]
+        width = args[argp]::Integer
         argp += 1
     end
     if spec.dynamic_precision
-        precision = args[argp]
+        precision = args[argp]::Integer
         if zero && T <: Ints && precision > 0
             zero = false
         end
@@ -306,12 +310,12 @@ end
     (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
 end
 
-@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+Base.@constprop :aggressive function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
     spec, argp = rmdynamic(spec, args, argp)
     (fmt(buf, pos, args[argp], spec), argp+1)
 end
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
     w = textwidth(c)
@@ -332,7 +336,7 @@ end
 end
 
 # strings
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
     slen = textwidth(str)::Int + (hash ? arg isa AbstractString ? 2 : 1 : 0)
@@ -379,7 +383,7 @@ toint(x::Rational) = Integer(x)
 fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     fmt(buf, pos, arg, floatfmt(spec))
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     bs = base(T)
@@ -493,7 +497,7 @@ _snprintf(ptr, siz, str, arg) =
 # seems like a dangerous thing to do.
 const __BIG_FLOAT_MAX__ = 8192
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     x = tofloat(arg)
@@ -650,7 +654,7 @@ const __BIG_FLOAT_MAX__ = 8192
         else
             # right aligned
             n = width - (newpos - pos)
-            if zero
+            if zero && isfinite(x)
                 ex = (arg < 0 || (plus | space)) + (T <: Union{Val{'a'}, Val{'A'}} ? 2 : 0)
                 so = pos + ex
                 len = (newpos - pos) - ex
@@ -927,7 +931,8 @@ for more details on C `printf` support.
 """
 function format end
 
-function format(io::IO, f::Format, args...) # => Nothing
+# Since it will specialize on `f`, which has a Tuple-type often of length(args), we might as well specialize on `args` too.
+function format(io::IO, f::Format, args::Vararg{Any,N}) where N # => Nothing
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
@@ -935,7 +940,7 @@ function format(io::IO, f::Format, args...) # => Nothing
     return
 end
 
-function format(f::Format, args...) # => String
+function format(f::Format, args::Vararg{Any,N}) where N # => String
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index 33970f78648e2..abe547c00ed0d 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -116,12 +116,15 @@ end
     @test (Printf.@sprintf "%+f" Inf) == "+Inf"
     @test (Printf.@sprintf "% f" Inf) == " Inf"
     @test (Printf.@sprintf "% #f" Inf) == " Inf"
+    @test (Printf.@sprintf "%07f" Inf) == "    Inf"
     @test (Printf.@sprintf "%f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+f" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07f" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%f" NaN) == "NaN"
     @test (Printf.@sprintf "%+f" NaN) == "+NaN"
     @test (Printf.@sprintf "% f" NaN) == " NaN"
     @test (Printf.@sprintf "% #f" NaN) == " NaN"
+    @test (Printf.@sprintf "%07f" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -169,12 +172,15 @@ end
     @test (Printf.@sprintf "%+e" Inf) == "+Inf"
     @test (Printf.@sprintf "% e" Inf) == " Inf"
     @test (Printf.@sprintf "% #e" Inf) == " Inf"
+    @test (Printf.@sprintf "%07e" Inf) == "    Inf"
     @test (Printf.@sprintf "%e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+e" -Inf) == "-Inf"
+    @test (Printf.@sprintf "%07e" -Inf) == "   -Inf"
     @test (Printf.@sprintf "%e" NaN) == "NaN"
     @test (Printf.@sprintf "%+e" NaN) == "+NaN"
     @test (Printf.@sprintf "% e" NaN) == " NaN"
     @test (Printf.@sprintf "% #e" NaN) == " NaN"
+    @test (Printf.@sprintf "%07e" NaN) == "    NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -1145,4 +1151,11 @@ end
     @test_throws Printf.InvalidFormatStringError Printf.Format("%z")
 end
 
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Printf))
+end
+
+# issue #52749
+@test @sprintf("%.160g", 1.38e-23) == "1.380000000000000060010582465734078799297660966782642624395399644741944111814291318296454846858978271484375e-23"
+
 end # @testset "Printf"
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 334d475832b6d..6b70f9c7cd19d 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -1,14 +1,19 @@
 name = "Profile"
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+version = "1.11.0"
 
 [deps]
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+
+[compat]
+StyledStrings = "1.11.0"
 
 [extras]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Base64", "Logging", "Serialization", "Test"]
+test = ["Base64", "InteractiveUtils", "Logging", "Serialization", "Test"]
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index adb91cebb8c46..0b358e5decfa9 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Profile/docs/src/index.md"
+```
+
 # [Profiling](@id lib-profiling)
 
 ## CPU Profiling
@@ -34,7 +38,7 @@ First, a single stack trace at the instant that the signal was thrown is shown,
 followed by the profile report at the next yield point, which may be at task completion for code without yield points
 e.g. tight loops.
 
-Optionally set environment variable `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT` to `1` to also automatically collect a
+Optionally set environment variable [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@ref JULIA_PROFILE_PEEK_HEAP_SNAPSHOT) to `1` to also automatically collect a
 [heap snapshot](@ref Heap-Snapshots).
 
 ```julia-repl
@@ -106,6 +110,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
 
 ```@docs
 Profile.Allocs.clear
+Profile.Allocs.print
 Profile.Allocs.fetch
 Profile.Allocs.start
 Profile.Allocs.stop
@@ -129,5 +134,29 @@ Traces and records julia objects on the heap. This only records objects known to
 garbage collector. Memory allocated by external libraries not managed by the garbage
 collector will not show up in the snapshot.
 
+To avoid OOMing while recording the snapshot, we added a streaming option to stream out the heap snapshot
+into four files,
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.take_heap_snapshot("snapshot"; streaming=true)
+```
+
+where "snapshot" is the filepath as the prefix for the generated files.
+
+Once the snapshot files are generated, they could be assembled offline with the following command:
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot")
+```
+
 The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
 For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).
+An alternative for analyzing Chromium heap snapshots is with the VS Code extension
+`ms-vscode.vscode-js-profile-flame`.
+
+The Firefox heap snapshots are of a different format, and Firefox currently may
+*not* be used for viewing the heap snapshots generated by Julia.
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index e45f4dca9607f..9d0b18cb468ca 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -1,5 +1,12 @@
 module Allocs
 
+global print # Allocs.print is separate from both Base.print and Profile.print
+public @profile,
+    clear,
+    print,
+    fetch
+
+using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
 using Base.StackTraces: StackTrace, StackFrame, lookup
 using Base: InterpreterIP
 
@@ -33,7 +40,7 @@ end
     Profile.Allocs.@profile [sample_rate=0.1] expr
 
 Profile allocations that happen during `expr`, returning
-both the result and and AllocResults struct.
+both the result and AllocResults struct.
 
 A sample rate of 1.0 will record everything; 0.0 will record nothing.
 
@@ -47,18 +54,17 @@ julia> last(sort(results.allocs, by=x->x.size))
 Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
 ```
 
-The best way to visualize these is currently with the
-[PProf.jl](https://github.com/JuliaPerf/PProf.jl) package,
-by invoking `PProf.Allocs.pprof`.
+See the profiling tutorial in the Julia documentation for more information.
+
+!!! compat "Julia 1.11"
 
-!!! note
-    The current implementation of the Allocations Profiler does not
-    capture types for all allocations. Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
 
-    You can read more about the missing types and the plan to improve this, here:
-    <https://github.com/JuliaLang/julia/issues/43688>.
+    Since Julia 1.11, all allocations should have a type reported.
 
 !!! compat "Julia 1.8"
     The allocation profiler was added in Julia 1.8.
@@ -138,7 +144,7 @@ end
 # Without this, the Alloc's stacktrace prints for lines and lines and lines...
 function Base.show(io::IO, a::Alloc)
     stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
-    print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
+    Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
 end
 
 const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +222,201 @@ function stacktrace_memoized(
     return stack
 end
 
+function warning_empty()
+    @warn """
+    There were no samples collected.
+    Run your program longer (perhaps by running it multiple times),
+    or adjust the frequency of samples to record every event with
+    the `sample_rate=1.0` kwarg."""
+end
+
+
+"""
+    Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
+
+Prints profiling results to `io` (by default, `stdout`). If you do not
+supply a `data` vector, the internal buffer of accumulated backtraces
+will be used.
+
+See `Profile.print` for an explanation of the valid keyword arguments.
+"""
+print(; kwargs...) =
+    Profile.print(stdout, fetch(); kwargs...)
+print(io::IO; kwargs...) =
+    Profile.print(io, fetch(); kwargs...)
+print(io::IO, data::AllocResults; kwargs...) =
+    Profile.print(io, data; kwargs...)
+Profile.print(data::AllocResults; kwargs...) =
+    Profile.print(stdout, data; kwargs...)
+
+function Profile.print(io::IO,
+        data::AllocResults,
+        ;
+        format = :tree,
+        C = false,
+        #combine = true,
+        maxdepth::Int = typemax(Int),
+        mincount::Int = 0,
+        noisefloor = 0,
+        sortedby::Symbol = :filefuncline,
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        )
+    pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
+    Profile.print(io, data, pf, format)
+    return
+end
+
+function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
+    cols::Int = Base.displaysize(io)[2]
+    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
+    data = data.allocs
+    if format === :tree
+        tree(io, data, cols, fmt)
+    elseif format === :flat
+        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
+        flat(io, data, cols, fmt)
+    else
+        throw(ArgumentError("output format $(repr(format)) not recognized"))
+    end
+    nothing
+end
+
+
+function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
+    lilist = StackFrame[]
+    n = Int[]
+    m = Int[]
+    lilist_idx = Dict{T, Int}()
+    recursive = Set{T}()
+    totalbytes = 0
+    for r in data
+        first = true
+        empty!(recursive)
+        nb = r.size # or 1 for counting
+        totalbytes += nb
+        for frame in r.stacktrace
+            !C && frame.from_c && continue
+            key = (T === UInt64 ? ip : frame)
+            idx = get!(lilist_idx, key, length(lilist) + 1)
+            if idx > length(lilist)
+                push!(recursive, key)
+                push!(lilist, frame)
+                push!(n, nb)
+                push!(m, 0)
+            elseif !(key in recursive)
+                push!(recursive, key)
+                n[idx] += nb
+            end
+            if first
+                m[idx] += nb
+                first = false
+            end
+        end
+    end
+    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
+    return (lilist, n, m, totalbytes)
+end
+
+function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
+    filenamemap = Profile.FileNameMap()
+    if isempty(lilist)
+        warning_empty()
+        return true
+    end
+    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", totalbytes)
+    return false
+end
+
+function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
+    tops = Vector{StackFrameTree{T}}()
+    build = Dict{T, StackFrameTree{T}}()
+    for r in all
+        first = true
+        nb = r.size # or 1 for counting
+        root.recur = 0
+        root.count += nb
+        parent = root
+        for i in reverse(eachindex(r.stacktrace))
+            frame = r.stacktrace[i]
+            key = (T === UInt64 ? ip : frame)
+            if (recur === :flat && !frame.from_c) || recur === :flatc
+                # see if this frame already has a parent
+                this = get!(build, frame, parent)
+                if this !== parent
+                    # Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
+                    push!(tops, parent)
+                    parent = this
+                end
+            end
+            !C && frame.from_c && continue
+            this = get!(StackFrameTree{T}, parent.down, key)
+            if recur === :off || this.recur == 0
+                this.frame = frame
+                this.up = parent
+                this.count += nb
+                this.recur = 1
+            else
+                this.count_recur += 1
+            end
+            parent = this
+        end
+        parent.overhead += nb
+        if recur !== :off
+            # We mark all visited nodes to so we'll only count those branches
+            # once for each backtrace. Reset that now for the next backtrace.
+            empty!(build)
+            push!(tops, parent)
+            for top in tops
+                while top.recur != 0
+                    top.max_recur < top.recur && (top.max_recur = top.recur)
+                    top.recur = 0
+                    top = top.up
+                end
+            end
+            empty!(tops)
+        end
+        let this = parent
+            while this !== root
+                this.flat_count += nb
+                this = this.up
+            end
+        end
+    end
+    function cleanup!(node::StackFrameTree)
+        stack = [node]
+        while !isempty(stack)
+            node = pop!(stack)
+            node.recur = 0
+            empty!(node.builder_key)
+            empty!(node.builder_value)
+            append!(stack, values(node.down))
+        end
+        nothing
+    end
+    cleanup!(root)
+    return root
+end
+
+function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    if fmt.combine
+        root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
+    else
+        root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
+    end
+    print_tree(io, root, cols, fmt, false)
+    if isempty(root.down)
+        warning_empty()
+        return true
+    end
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", root.count)
+    return false
+end
+
 end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index 71bbfc70ee937..f59b49d8a4a36 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -1,19 +1,53 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-Profiling support, main entry point is the [`@profile`](@ref) macro.
+    Profile
+
+Profiling support.
+
+## CPU profiling
+- `@profile foo()` to profile a specific call.
+- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc.
+- `Profile.clear()` to clear the buffer.
+- Send a SIGUSR1 (on linux) or SIGINFO (on macOS/BSD) signal to the process to automatically trigger a profile and print. i.e. `kill -s SIGUSR1/SIGINFO 1234`, where 1234 is the pid of the julia process. On macOS & BSD platforms `ctrl-t` can be used directly.
+
+## Memory profiling
+- `Profile.Allocs.@profile [sample_rate=0.1] foo()` to sample allocations within a specific call. A sample rate of 1.0 will record everything; 0.0 will record nothing.
+- `Profile.Allocs.print()` to print the report.
+- `Profile.Allocs.clear()` to clear the buffer.
+
+## Heap profiling
+- `Profile.take_heap_snapshot()` to record a `.heapsnapshot` record of the heap.
+- Set `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT=true` to capture a heap snapshot when signal $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") is sent.
 """
 module Profile
 
+global print
+export @profile, @profile_walltime
+public clear,
+    print,
+    fetch,
+    retrieve,
+    add_fake_meta,
+    flatten,
+    callers,
+    init,
+    take_heap_snapshot,
+    take_page_profile,
+    clear_malloc_data,
+    Allocs
+
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
+import Base: AnnotatedString
+using StyledStrings: @styled_str
 
 const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
 
+const slash = Sys.iswindows() ? "\\" : "/"
+
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
-export @profile
-
 """
     @profile
 
@@ -31,12 +65,34 @@ macro profile(ex)
     end
 end
 
+"""
+    @profile_walltime
+
+`@profile_walltime <expression>` runs your expression while taking periodic backtraces of a sample of all live tasks (both running and not running).
+These are appended to an internal buffer of backtraces.
+
+It can be configured via `Profile.init`, same as the `Profile.@profile`, and that you can't use `@profile` simultaneously with `@profile_walltime`.
+
+As mentioned above, since this tool sample not only running tasks, but also sleeping tasks and tasks performing IO,
+it can be used to diagnose performance issues such as lock contention, IO bottlenecks, and other issues that are not visible in the CPU profile.
+"""
+macro profile_walltime(ex)
+    return quote
+        try
+            start_timer(true)
+            $(esc(ex))
+        finally
+            stop_timer()
+        end
+    end
+end
+
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
-    iob = IOBuffer()
+    iob = Base.AnnotatedIOBuffer()
     ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
     print(ioc, groupby = [:thread, :task])
-    Base.print(stderr, String(take!(iob)))
+    Base.print(stderr, read(seekstart(iob), AnnotatedString))
 end
 # This is a ref so that it can be overridden by other profile info consumers.
 const peek_report = Ref{Function}(_peek_report)
@@ -163,10 +219,13 @@ const META_OFFSET_THREADID = 5
 
 """
     print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
+    print(path::String, [cols::Int = 1000], [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
 
 Prints profiling results to `io` (by default, `stdout`). If you do not
 supply a `data` vector, the internal buffer of accumulated backtraces
-will be used.
+will be used. Paths are clickable links in supported terminals and
+specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file
+links if no editor is set.
 
 The keyword arguments can be any combination of:
 
@@ -201,6 +260,13 @@ The keyword arguments can be any combination of:
 
  - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
     does not control which tasks samples are collected within.
+
+!!! compat "Julia 1.8"
+    The `groupby`, `threads`, and `tasks` keyword arguments were introduced in Julia 1.8.
+
+!!! note
+    Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report.
+
 """
 function print(io::IO,
         data::Vector{<:Unsigned} = fetch(),
@@ -220,7 +286,7 @@ function print(io::IO,
 
     pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
     if groupby === :none
-        print(io, data, lidict, pf, format, threads, tasks, false)
+        print_group(io, data, lidict, pf, format, threads, tasks, false)
     else
         if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
             error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -229,7 +295,7 @@ function print(io::IO,
         end
         any_nosamples = true
         if format === :tree
-            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
             Base.print(io, "=========================================================\n")
         end
         if groupby == [:task, :thread]
@@ -243,8 +309,8 @@ function print(io::IO,
                     nl = length(threadids) > 1 ? "\n" : ""
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
-                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        printstyled(io, " Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -259,10 +325,10 @@ function print(io::IO,
                     any_nosamples = true
                 else
                     nl = length(taskids) > 1 ? "\n" : ""
-                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid)))$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -274,7 +340,7 @@ function print(io::IO,
             isempty(taskids) && (any_nosamples = true)
             for taskid in taskids
                 printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -283,8 +349,8 @@ function print(io::IO,
             threadids = intersect(get_thread_ids(data), threads)
             isempty(threadids) && (any_nosamples = true)
             for threadid in threadids
-                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
-                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
+                nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -294,6 +360,13 @@ function print(io::IO,
     return
 end
 
+function print(path::String, cols::Int = 1000, args...; kwargs...)
+    open(path, "w") do io
+        ioc = IOContext(io, :displaysize=>(1000,cols))
+        print(ioc, args...; kwargs...)
+    end
+end
+
 """
     print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
 
@@ -306,7 +379,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
 print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
                 format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
                 is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]
@@ -362,9 +435,10 @@ end
 
 function has_meta(data)
     for i in 6:length(data)
-        data[i] == 0 || continue            # first block end null
-        data[i - 1] == 0 || continue        # second block end null
-        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
+        data[i] == 0 || continue                            # first block end null
+        data[i - 1] == 0 || continue                        # second block end null
+        data[i - META_OFFSET_SLEEPSTATE] in 1:3 || continue # 1 for not sleeping, 2 for sleeping, 3 for task profiler fake state
+                                                            # See definition in `src/julia_internal.h`
         data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
         data[i - META_OFFSET_TASKID] != 0 || continue
         data[i - META_OFFSET_THREADID] != 0 || continue
@@ -464,12 +538,29 @@ function flatten(data::Vector, lidict::LineInfoDict)
     return (newdata, newdict)
 end
 
+const SRC_DIR = normpath(joinpath(Sys.BUILD_ROOT_PATH, "src"))
+const COMPILER_DIR = "../usr/share/julia/Compiler/"
+
 # Take a file-system path and try to form a concise representation of it
 # based on the package ecosystem
-function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
+# filenamecache is a dict of spath -> (fullpath or "" if !isfile, modulename, shortpath)
+function short_path(spath::Symbol, filenamecache::Dict{Symbol, Tuple{String,String,String}})
     return get!(filenamecache, spath) do
-        path = string(spath)
-        if isabspath(path)
+        path = Base.fixup_stdlib_path(string(spath))
+        path_norm = normpath(path)
+        possible_base_path = normpath(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+        lib_dir = abspath(Sys.BINDIR, Base.LIBDIR)
+        if startswith(path_norm, SRC_DIR)
+            remainder = only(split(path_norm, SRC_DIR, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@juliasrc", remainder
+        elseif startswith(path_norm, lib_dir)
+            remainder = only(split(path_norm, lib_dir, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@julialib", remainder
+        elseif contains(path, COMPILER_DIR)
+            remainder = split(path, COMPILER_DIR, keepempty=false)[end]
+            possible_compiler_path = normpath(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "Compiler", remainder))
+            return (isfile(possible_compiler_path) ? possible_compiler_path : ""), "@Compiler", remainder
+        elseif isabspath(path)
             if ispath(path)
                 # try to replace the file-system prefix with a short "@Module" one,
                 # assuming that profile came from the current machine
@@ -483,22 +574,23 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                         project_file = joinpath(root, proj)
                         if Base.isfile_casesensitive(project_file)
                             pkgid = Base.project_file_name_uuid(project_file, "")
-                            isempty(pkgid.name) && return path # bad Project file
+                            isempty(pkgid.name) && return path, "", path # bad Project file
                             # return the joined the module name prefix and path suffix
-                            path = path[nextind(path, sizeof(root)):end]
-                            return string("@", pkgid.name, path)
+                            _short_path = path[nextind(path, sizeof(root)):end]
+                            return path, string("@", pkgid.name), _short_path
                         end
                     end
                 end
             end
-            return path
-        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+            return path, "", path
+        elseif isfile(possible_base_path)
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
-            return joinpath("@Base", normpath(path))
+            return possible_base_path, "@Base", normpath(path)
         else
             # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
-            return normpath(path) # drop leading "./"
+            path = normpath(path)
+            return "", "", path # drop leading "./"
         end
     end
 end
@@ -555,9 +647,9 @@ Julia, and examine the resulting `*.mem` files.
 clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
 
 # C wrappers
-function start_timer()
+function start_timer(all_tasks::Bool=false)
     check_init() # if the profile buffer hasn't been initialized, initialize with default size
-    status = ccall(:jl_profile_start_timer, Cint, ())
+    status = ccall(:jl_profile_start_timer, Cint, (Bool,), all_tasks)
     if status < 0
         error(error_codes[status])
     end
@@ -641,10 +733,10 @@ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
     !isempty(data) && has_meta(data) && error("input already has metadata")
     cpu_clock_cycle = UInt64(99)
     data_with_meta = similar(data, 0)
-    for i = 1:length(data)
+    for i in eachindex(data)
         val = data[i]
         if iszero(val)
-            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            # META_OFFSET_THREADID, META_OFFSET_TASKID, META_OFFSET_CPUCYCLECLOCK, META_OFFSET_SLEEPSTATE
             push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
         else
             push!(data_with_meta, val)
@@ -669,12 +761,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     startframe = length(data)
     skip = false
     nsleeping = 0
+    is_task_profile = false
     for i in startframe:-1:1
         (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
         ip = data[i]
         if is_block_end(data, i)
             # read metadata
-            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            thread_sleeping_state = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            if thread_sleeping_state == 2
+                is_task_profile = true
+            end
             # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
             taskid = data[i - META_OFFSET_TASKID]
             threadid = data[i - META_OFFSET_THREADID]
@@ -682,7 +778,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
                 skip = true
                 continue
             end
-            if thread_sleeping == 1
+            if thread_sleeping_state == 1
                 nsleeping += 1
             end
             skip = false
@@ -716,12 +812,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
         end
     end
     @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
-    return (lilist, n, m, totalshots, nsleeping)
+    return (lilist, n, m, totalshots, nsleeping, is_task_profile)
 end
 
+const FileNameMap = Dict{Symbol,Tuple{String,String,String}}
+
 function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
-    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    lilist, n, m, totalshots, nsleeping, is_task_profile = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
     if false # optional: drop the "non-interpretable" ones
         keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
         lilist = lilist[keep]
@@ -729,7 +827,7 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         m = m[keep]
     end
     util_perc = (1 - (nsleeping / totalshots)) * 100
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = FileNameMap()
     if isempty(lilist)
         if is_subsection
             Base.print(io, "Total snapshots: ")
@@ -741,19 +839,57 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         return true
     end
     is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
+    if is_task_profile
+        Base.print(io, "Total snapshots: ", totalshots, "\n")
+    else
+        Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
+    end
     if is_subsection
         println(io)
         print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    else
+    elseif !is_task_profile
         Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
 end
 
+# make a terminal-clickable link to the file and linenum.
+# Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
+function editor_link(path::String, linenum::Int)
+    # Note: the editor path can include spaces (if escaped) and flags.
+    editor = nothing
+    for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"]
+        str = get(ENV, var, nothing)
+        str isa String || continue
+        editor = str
+        break
+    end
+    path_encoded = Base.Filesystem.encode_uri_component(path)
+    if editor !== nothing
+        if editor == "code"
+            return "vscode://file/$path_encoded:$linenum"
+        elseif editor == "subl" || editor == "sublime_text"
+            return "subl://open?url=file://$path_encoded&line=$linenum"
+        elseif editor == "idea" || occursin("idea", editor)
+            return "idea://open?file=$path_encoded&line=$linenum"
+        elseif editor == "pycharm"
+            return "pycharm://open?file=$path_encoded&line=$linenum"
+        elseif editor == "atom"
+            return "atom://core/open/file?filename=$path_encoded&line=$linenum"
+        elseif editor == "emacsclient" || editor == "emacs"
+            return "emacs://open?file=$path_encoded&line=$linenum"
+        elseif editor == "vim" || editor == "nvim"
+            # Note: Vim/Nvim may not support standard URI schemes without specific plugins
+            return "vim://open?file=$path_encoded&line=$linenum"
+        end
+    end
+    # fallback to generic URI, but line numbers are not supported by generic URI
+    return Base.Filesystem.uripath(path)
+end
+
 function print_flat(io::IO, lilist::Vector{StackFrame},
         n::Vector{Int}, m::Vector{Int},
-        cols::Int, filenamemap::Dict{Symbol,String},
+        cols::Int, filenamemap::FileNameMap,
         fmt::ProfileFormat)
     if fmt.sortedby === :count
         p = sortperm(n)
@@ -765,18 +901,18 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
     lilist = lilist[p]
     n = n[p]
     m = m[p]
-    filenames = String[short_path(li.file, filenamemap) for li in lilist]
+    pkgnames_filenames = Tuple{String,String,String}[short_path(li.file, filenamemap) for li in lilist]
     funcnames = String[string(li.func) for li in lilist]
     wcounts = max(6, ndigits(maximum(n)))
     wself = max(9, ndigits(maximum(m)))
     maxline = 1
     maxfile = 6
     maxfunc = 10
-    for i in 1:length(lilist)
+    for i in eachindex(lilist)
         li = lilist[i]
         maxline = max(maxline, li.line)
-        maxfunc = max(maxfunc, length(funcnames[i]))
-        maxfile = max(maxfile, length(filenames[i]))
+        maxfunc = max(maxfunc, textwidth(funcnames[i]))
+        maxfile = max(maxfile, sum(textwidth, pkgnames_filenames[i][2:3]) + 1)
     end
     wline = max(5, ndigits(maxline))
     ntext = max(20, cols - wcounts - wself - wline - 3)
@@ -792,7 +928,7 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
             rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
     println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
             rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
-    for i = 1:length(n)
+    for i in eachindex(n)
         n[i] < fmt.mincount && continue
         li = lilist[i]
         Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
@@ -804,16 +940,29 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
                 Base.print(io, "[any unknown stackframes]")
             end
         else
-            file = filenames[i]
+            path, pkgname, file = pkgnames_filenames[i]
             isempty(file) && (file = "[unknown file]")
-            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
+            pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+            Base.printstyled(io, pkgname, color=pkgcolor)
+            file_trunc = ltruncate(file, max(1, wfile))
+            wpad = wfile - textwidth(pkgname)
+            if !isempty(pkgname) && !startswith(file_trunc, slash)
+                Base.print(io, slash)
+                wpad -= 1
+            end
+            if isempty(path)
+                Base.print(io, rpad(file_trunc, wpad, " "))
+            else
+                link = editor_link(path, li.line)
+                Base.print(io, rpad(styled"{link=$link:$file_trunc}", wpad, " "))
+            end
             Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
             fname = funcnames[i]
             if !li.from_c && li.linfo !== nothing
                 fname = sprint(show_spec_linfo, li)
             end
             isempty(fname) && (fname = "[unknown function]")
-            Base.print(io, ltruncto(fname, wfunc))
+            Base.print(io, rtruncate(fname, wfunc))
         end
         println(io)
     end
@@ -852,22 +1001,24 @@ function indent(depth::Int)
     return indent
 end
 
-function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
+# mimics Stacktraces
+const PACKAGE_FIXEDCOLORS = Dict{String, Any}("@Base" => :gray, "@Core" => :gray)
+
+function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::FileNameMap, showpointer::Bool)
     nindent = min(cols>>1, level)
     ndigoverhead = ndigits(maxes.overhead)
     ndigcounts = ndigits(maxes.count)
     ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
     ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
     widthfile = 2*ntext÷5 # min 12
-    widthfunc = 3*ntext÷5 # min 18
-    strs = Vector{String}(undef, length(frames))
+    strs = Vector{AnnotatedString{String}}(undef, length(frames))
     showextra = false
     if level > nindent
         nextra = level - nindent
         nindent -= ndigits(nextra) + 2
         showextra = true
     end
-    for i = 1:length(frames)
+    for i in eachindex(frames)
         frame = frames[i]
         li = frame.frame
         stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
@@ -888,7 +1039,7 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                 else
                     fname = string(li.func)
                 end
-                filename = short_path(li.file, filenamemap)
+                path, pkgname, filename = short_path(li.file, filenamemap)
                 if showpointer
                     fname = string(
                         "0x",
@@ -896,16 +1047,26 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                         " ",
                         fname)
                 end
-                strs[i] = string(stroverhead, "╎", base, strcount, " ",
-                    rtruncto(filename, widthfile),
-                    ":",
-                    li.line == -1 ? "?" : string(li.line),
-                    "; ",
-                    ltruncto(fname, widthfunc))
+                pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+                remaining_path = ltruncate(filename, max(1, widthfile - textwidth(pkgname) - 1))
+                linenum = li.line == -1 ? "?" : string(li.line)
+                _slash = (!isempty(pkgname) && !startswith(remaining_path, slash)) ? slash : ""
+                styled_path = styled"{$pkgcolor:$pkgname}$(_slash)$remaining_path:$linenum"
+                rich_file = if isempty(path)
+                    styled_path
+                else
+                    link = editor_link(path, li.line)
+                    styled"{link=$link:$styled_path}"
+                end
+                strs[i] = Base.annotatedstring(stroverhead, "╎", base, strcount, " ", rich_file, "  ", fname)
+                if frame.overhead > 0
+                    strs[i] = styled"{bold:$(strs[i])}"
+                end
             end
         else
             strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
         end
+        strs[i] = rtruncate(strs[i], cols)
     end
     return strs
 end
@@ -920,12 +1081,16 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
     startframe = length(all)
     skip = false
     nsleeping = 0
+    is_task_profile = false
     for i in startframe:-1:1
         (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
         ip = all[i]
         if is_block_end(all, i)
             # read metadata
-            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            thread_sleeping_state = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            if thread_sleeping_state == 2
+                is_task_profile = true
+            end
             # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
             taskid = all[i - META_OFFSET_TASKID]
             threadid = all[i - META_OFFSET_THREADID]
@@ -934,7 +1099,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                 skip = true
                 continue
             end
-            if thread_sleeping == 1
+            if thread_sleeping_state == 1
                 nsleeping += 1
             end
             skip = false
@@ -1040,7 +1205,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
         nothing
     end
     cleanup!(root)
-    return root, nsleeping
+    return root, nsleeping, is_task_profile
 end
 
 function maxstats(root::StackFrameTree)
@@ -1064,10 +1229,10 @@ end
 # avoid stack overflows.
 function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
-    filenamemap = Dict{Symbol,String}()
-    worklist = [(bt, 0, 0, "")]
+    filenamemap = FileNameMap()
+    worklist = [(bt, 0, 0, AnnotatedString(""))]
     if !is_subsection
-        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
         Base.print(io, "=========================================================\n")
     end
     while !isempty(worklist)
@@ -1098,7 +1263,7 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             count = down.count
             count < fmt.mincount && continue
             count < noisefloor && continue
-            str = strs[i]
+            str = strs[i]::AnnotatedString
             noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
@@ -1109,9 +1274,9 @@ end
 function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     if fmt.combine
-        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
+        root, nsleeping, is_task_profile = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     else
-        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
+        root, nsleeping, is_task_profile = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
     util_perc = (1 - (nsleeping / root.count)) * 100
     is_subsection || print_tree(io, root, cols, fmt, is_subsection)
@@ -1125,11 +1290,15 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line
         end
         return true
     end
-    Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
+    if is_task_profile
+        Base.print(io, "Total snapshots: ", root.count, "\n")
+    else
+        Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
+    end
     if is_subsection
         Base.println(io)
         print_tree(io, root, cols, fmt, is_subsection)
-    else
+    elseif !is_task_profile
         Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
@@ -1159,24 +1328,7 @@ function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
     return [(v[i], k[i]) for i in p]
 end
 
-# Utilities
-function rtruncto(str::String, w::Int)
-    if length(str) <= w
-        return str
-    else
-        return string("...", str[prevind(str, end, w-4):end])
-    end
-end
-function ltruncto(str::String, w::Int)
-    if length(str) <= w
-        return str
-    else
-        return string(str[1:nextind(str, 1, w-4)], "...")
-    end
-end
-
-
-truncto(str::Symbol, w::Int) = truncto(string(str), w)
+## Utilities
 
 # Order alphabetically (file, function) and then by line number
 function liperm(lilist::Vector{StackFrame})
@@ -1213,33 +1365,119 @@ end
 
 
 """
-    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
-    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
-    Profile.take_heap_snapshot(all_one::Bool=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
+                               redact_data::Bool=true, streaming::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
+                               dir::String=nothing, streaming::Bool=false)
 
 Write a snapshot of the heap, in the JSON format expected by the Chrome
-Devtools Heap Snapshot viewer (.heapsnapshot extension), to a file
-(`\$pid_\$timestamp.heapsnapshot`) in the current directory, or the given
-file path, or IO stream. If `all_one` is true, then report the size of
-every object as one so they can be easily counted. Otherwise, report the
-actual size.
+Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
+(`\$pid_\$timestamp.heapsnapshot`) in the current directory by default (or tempdir if
+the current directory is unwritable), or in `dir` if given, or the given
+full file path, or IO stream.
+
+If `all_one` is true, then report the size of every object as one so they can be easily
+counted. Otherwise, report the actual size.
+
+If `redact_data` is true (default), then do not emit the contents of any object.
+
+If `streaming` is true, we will stream the snapshot data out into four files, using filepath
+as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
+used for any setting where your memory is constrained. These files can then be reassembled
+by calling Profile.HeapSnapshot.assemble_snapshot(), which can
+be done offline.
+
+NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing
+the snapshot from the parts requires holding the entire snapshot in memory, so if the
+snapshot is large, you can run out of memory while processing it. Streaming allows you to
+reconstruct the snapshot offline, after your workload is done running.
+If you do attempt to collect a snapshot with streaming=false (the default, for
+backwards-compatibility) and your process is killed, note that this will always save the
+parts in the same directory as your provided filepath, so you can still reconstruct the
+snapshot after the fact, via `assemble_snapshot()`.
 """
-function take_heap_snapshot(io::IOStream, all_one::Bool=false)
-    Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
-end
-function take_heap_snapshot(filepath::String, all_one::Bool=false)
-    open(filepath, "w") do io
-        take_heap_snapshot(io, all_one)
+function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
+    if streaming
+        _stream_heap_snapshot(filepath, all_one, redact_data)
+    else
+        # Support the legacy, non-streaming mode, by first streaming the parts, then
+        # reassembling it after we're done.
+        prefix = filepath
+        _stream_heap_snapshot(prefix, all_one, redact_data)
+        Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
+        Profile.HeapSnapshot.cleanup_streamed_files(prefix)
     end
     return filepath
 end
-function take_heap_snapshot(all_one::Bool=false)
-    f = abspath("$(getpid())_$(time_ns()).heapsnapshot")
-    return take_heap_snapshot(f, all_one)
+function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
+    # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
+    # then reassembling it after we're done.
+    dir = tempdir()
+    prefix = joinpath(dir, "snapshot")
+    _stream_heap_snapshot(prefix, all_one, redact_data)
+    Profile.HeapSnapshot.assemble_snapshot(prefix, io)
+end
+function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
+    # Nodes and edges are binary files
+    open("$prefix.nodes", "w") do nodes
+        open("$prefix.edges", "w") do edges
+            open("$prefix.strings", "w") do strings
+                # The following file is json data
+                open("$prefix.metadata.json", "w") do json
+                    Base.@_lock_ios(nodes,
+                    Base.@_lock_ios(edges,
+                    Base.@_lock_ios(strings,
+                    Base.@_lock_ios(json,
+                        ccall(:jl_gc_take_heap_snapshot,
+                            Cvoid,
+                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
+                            nodes.handle, edges.handle, strings.handle, json.handle,
+                            Cchar(all_one), Cchar(redact_data))
+                    )
+                    )
+                    )
+                    )
+                end
+            end
+        end
+    end
 end
+function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
+    fname = "$(getpid())_$(time_ns()).heapsnapshot"
+    if isnothing(dir)
+        wd = pwd()
+        fpath = joinpath(wd, fname)
+        try
+            touch(fpath)
+            rm(fpath; force=true)
+        catch
+            @warn "Cannot write to current directory `$(pwd())` so saving heap snapshot to `$(tempdir())`" maxlog=1 _id=Symbol(wd)
+            fpath = joinpath(tempdir(), fname)
+        end
+    else
+        fpath = joinpath(expanduser(dir), fname)
+    end
+    return take_heap_snapshot(fpath, all_one; kwargs...)
+end
+
+"""
+    Profile.take_page_profile(io::IOStream)
+    Profile.take_page_profile(filepath::String)
 
+Write a JSON snapshot of the pages from Julia's pool allocator, printing for every pool allocated object, whether it's garbage, or its type.
+"""
+function take_page_profile(io::IOStream)
+    Base.@_lock_ios(io, ccall(:jl_gc_take_page_profile, Cvoid, (Ptr{Cvoid},), io.handle))
+end
+function take_page_profile(filepath::String)
+    open(filepath, "w") do io
+        take_page_profile(io)
+    end
+    return filepath
+end
 
 include("Allocs.jl")
+include("heapsnapshot_reassemble.jl")
 include("precompile.jl")
 
 end # module
diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl
new file mode 100644
index 0000000000000..b2d86ee1f27b6
--- /dev/null
+++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl
@@ -0,0 +1,257 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module HeapSnapshot
+
+"""
+    assemble_snapshot(filepath::AbstractString, out_file::AbstractString)
+
+Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`.
+"""
+
+# SoA layout to reduce padding
+struct Edges
+    type::Vector{Int8}       # index into `snapshot.meta.edge_types`
+    name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type
+    to_pos::Vector{UInt}   # index into `snapshot.nodes`
+end
+function Edges(n::Int)
+    Edges(
+        Vector{Int8}(undef, n),
+        Vector{UInt}(undef, n),
+        Vector{UInt}(undef, n),
+    )
+end
+Base.length(n::Edges) = length(n.type)
+
+# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them
+struct Nodes
+    type::Vector{Int8}         # index into `snapshot.meta.node_types`
+    name_idx::Vector{UInt32} # index into `snapshot.strings`
+    id::Vector{UInt}           # unique id, in julia it is the address of the object
+    self_size::Vector{Int}     # size of the object itself, not including the size of its fields
+    edge_count::Vector{UInt} # number of outgoing edges
+    edges::Edges               # outgoing edges
+    # This is the main complexity of the .heapsnapshot format, and it's the reason we need
+    # to read in all the data before writing it out. The edges vector contains all edges,
+    # but organized by which node they came from. First, it contains all the edges coming
+    # out of node 0, then all edges leaving node 1, etc. So we need to have visited all
+    # edges, and assigned them to their corresponding nodes, before we can emit the file.
+    edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids
+end
+function Nodes(n::Int, e::Int)
+    Nodes(
+        Vector{Int8}(undef, n),
+        Vector{UInt32}(undef, n),
+        Vector{UInt}(undef, n),
+        Vector{Int}(undef, n),
+        Vector{UInt32}(undef, n),
+        Edges(e),
+        [Vector{UInt}() for _ in 1:n],  # Take care to construct n separate empty vectors
+    )
+end
+Base.length(n::Nodes) = length(n.type)
+
+const k_node_number_of_fields = 7
+
+# Like Base.dec, but doesn't allocate a string and writes directly to the io object
+# We know all of the numbers we're about to write fit into a UInt and are non-negative
+let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+    global _write_decimal_number
+    _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf)
+    function _write_decimal_number(io, x::Unsigned, digits_buf)
+        buf = digits_buf
+        n = ndigits(x)
+        i = n
+        @inbounds while i >= 2
+            d, r = divrem(x, 0x64)
+            d100 = _dec_d100[(r % Int)::Int + 1]
+            buf[i-1] = d100 % UInt8
+            buf[i] = (d100 >> 0x8) % UInt8
+            x = oftype(x, d)
+            i -= 2
+        end
+        if i > 0
+            @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+        end
+        write(io, @view buf[max(i, 1):n])
+    end
+end
+
+function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix)
+    open(out_file, "w") do io
+        assemble_snapshot(in_prefix, io)
+    end
+end
+
+# Manually parse and write the .json files, given that we don't have JSON import/export in
+# julia's stdlibs.
+function assemble_snapshot(in_prefix, io::IO)
+    preamble = read(string(in_prefix, ".metadata.json"), String)
+    pos = last(findfirst("node_count\":", preamble)) + 1
+    endpos = findnext(==(','), preamble, pos) - 1
+    node_count = parse(Int, String(@view preamble[pos:endpos]))
+
+    pos = last(findnext("edge_count\":", preamble, endpos)) + 1
+    endpos = findnext(==(','), preamble, pos) - 1
+    edge_count = parse(Int, String(@view preamble[pos:endpos]))
+
+    nodes = Nodes(node_count, edge_count)
+
+    orphans = Set{UInt}() # nodes that have no incoming edges
+    # Parse nodes with empty edge counts that we need to fill later
+    open(string(in_prefix, ".nodes"), "r") do nodes_file
+        for i in 1:length(nodes)
+            node_type = read(nodes_file, Int8)
+            node_name_idx = read(nodes_file, UInt)
+            id = read(nodes_file, UInt)
+            self_size = read(nodes_file, Int)
+            @assert read(nodes_file, Int) == 0 # trace_node_id
+            @assert read(nodes_file, Int8) == 0 # detachedness
+
+            nodes.type[i] = node_type
+            nodes.name_idx[i] = node_name_idx
+            nodes.id[i] = id
+            nodes.self_size[i] = self_size
+            nodes.edge_count[i] = 0 # edge_count
+            # populate the orphans set with node index
+            push!(orphans, i-1)
+        end
+    end
+
+    # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
+    open(string(in_prefix, ".edges"), "r") do edges_file
+        for i in 1:length(nodes.edges)
+            edge_type = read(edges_file, Int8)
+            edge_name_or_index = read(edges_file, UInt)
+            from_node = read(edges_file, UInt)
+            to_node = read(edges_file, UInt)
+
+            nodes.edges.type[i] = edge_type
+            nodes.edges.name_or_index[i] = edge_name_or_index
+            nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
+            nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
+            push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
+            # remove the node from the orphans if it has at least one incoming edge
+            if to_node in orphans
+                delete!(orphans, to_node)
+            end
+        end
+    end
+
+    _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
+    println(io, @view(preamble[1:end-1]), ",") # remove trailing "}" to reopen the object
+
+    println(io, "\"nodes\":[")
+    for i in 1:length(nodes)
+        i > 1 && println(io, ",")
+        _write_decimal_number(io, nodes.type[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.name_idx[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.id[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.self_size[i], _digits_buf)
+        print(io, ",")
+        _write_decimal_number(io, nodes.edge_count[i], _digits_buf)
+        print(io, ",0,0")
+    end
+    print(io, "],\n")
+    print(io, "\"edges\":[")
+    e = 1
+    for n in 1:length(nodes)
+        count = nodes.edge_count[n]
+        len_edges = length(nodes.edge_idxs[n])
+        @assert count == len_edges "For node $n: $count != $len_edges"
+        for i in nodes.edge_idxs[n]
+            e > 1 && print(io, ",")
+            println(io)
+            _write_decimal_number(io, nodes.edges.type[i], _digits_buf)
+            print(io, ",")
+            _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf)
+            print(io, ",")
+            _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf)
+            if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0)
+                @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])"
+            end
+            e += 1
+        end
+    end
+    println(io, "],")
+
+    # not used. Required by microsoft/vscode-v8-heap-tools
+    # This order of these fields is required by chrome dev tools otherwise loading fails
+    println(io, "\"trace_function_infos\":[],")
+    println(io, "\"trace_tree\":[],")
+    println(io, "\"samples\":[],")
+    println(io, "\"locations\":[],")
+
+    println(io, "\"strings\":[")
+    open(string(in_prefix, ".strings"), "r") do strings_io
+        first = true
+        while !eof(strings_io)
+            str_size = read(strings_io, UInt)
+            str_bytes = read(strings_io, str_size)
+            str = String(str_bytes)
+            if first
+                first = false
+            else
+                print(io, ",\n")
+            end
+            print_str_escape_json(io, str)
+        end
+    end
+    print(io, "]}")
+
+    # remove the uber node from the orphans
+    if 0 in orphans
+        delete!(orphans, 0)
+    end
+
+    @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))"
+
+    return nothing
+end
+
+"""
+    cleanup_streamed_files(prefix::AbstractString)
+
+Remove files streamed during `take_heap_snapshot` in streaming mode.
+"""
+function cleanup_streamed_files(prefix::AbstractString)
+    rm(string(prefix, ".metadata.json"))
+    rm(string(prefix, ".nodes"))
+    rm(string(prefix, ".edges"))
+    rm(string(prefix, ".strings"))
+    return nothing
+end
+
+function print_str_escape_json(stream::IO, s::AbstractString)
+    print(stream, '"')
+    for c in s
+        if c == '"'
+            print(stream, "\\\"")
+        elseif c == '\\'
+            print(stream, "\\\\")
+        elseif c == '\b'
+            print(stream, "\\b")
+        elseif c == '\f'
+            print(stream, "\\f")
+        elseif c == '\n'
+            print(stream, "\\n")
+        elseif c == '\r'
+            print(stream, "\\r")
+        elseif c == '\t'
+            print(stream, "\\t")
+        elseif '\x00' <= c <= '\x1f'
+            print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
+        elseif !isvalid(c)
+            # we have to do this because vscode's viewer doesn't like the replace character
+            print(stream, "[invalid unicode character]")
+        else
+            print(stream, c)
+        end
+    end
+    print(stream, '"')
+end
+
+end
diff --git a/stdlib/Profile/src/precompile.jl b/stdlib/Profile/src/precompile.jl
index 2d947429861a9..7b33e09941b28 100644
--- a/stdlib/Profile/src/precompile.jl
+++ b/stdlib/Profile/src/precompile.jl
@@ -1,4 +1,4 @@
-if ccall(:jl_generating_output, Cint, ()) == 1
+if Base.generating_output()
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
     precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
index c2ec7d2f6cb54..d4930a2b7f5ed 100644
--- a/stdlib/Profile/test/allocs.jl
+++ b/stdlib/Profile/test/allocs.jl
@@ -1,6 +1,13 @@
 using Test
 using Profile: Allocs
 
+Allocs.clear()
+let iobuf = IOBuffer()
+    for format in (:tree, :flat)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
+    end
+end
+
 @testset "alloc profiler doesn't segfault" begin
     res = Allocs.@profile sample_rate=1.0 begin
         # test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
     @test first_alloc.size > 0
     @test length(first_alloc.stacktrace) > 0
     @test length(string(first_alloc.type)) > 0
+
+    # test printing options
+    for options in ((format=:tree, C=true),
+                    (format=:tree, maxdepth=2),
+                    (format=:flat, C=true),
+                    (),
+                    (format=:flat, sortedby=:count),
+                    (format=:tree, recur=:flat),
+                   )
+        iobuf = IOBuffer()
+        Allocs.print(iobuf; options...)
+        str = String(take!(iobuf))
+        @test !isempty(str)
+    end
 end
 
 @testset "alloc profiler works when there are multiple tasks on multiple threads" begin
@@ -121,3 +142,39 @@ end
     @test length(prof.allocs) >= 1
     @test length([a for a in prof.allocs if a.type == String]) >= 1
 end
+
+@testset "alloc profiler catches allocs from codegen" begin
+    @eval begin
+        struct MyType x::Int; y::Int end
+        Base.:(+)(n::Number, x::MyType) = n + x.x + x.y
+        foo(a, x) = a[1] + x
+        wrapper(a) = foo(a, MyType(0,1))
+    end
+    a = Any[1,2,3]
+    # warmup
+    wrapper(a)
+
+    @eval Allocs.@profile sample_rate=1 wrapper($a)
+
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test length(prof.allocs) >= 1
+    @test length([a for a in prof.allocs if a.type == MyType]) >= 1
+end
+
+@testset "alloc profiler catches allocs from buffer resize" begin
+    f(a) = for _ in 1:100; push!(a, 1); end
+    f(Int[])
+    resize!(Int[], 1)
+    a = Int[]
+    Allocs.clear()
+    Allocs.@profile sample_rate=1 f(a)
+    Allocs.@profile sample_rate=1 resize!(a, 1_000_000) # 4MB
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test 3 <= length(prof.allocs) <= 10
+    @test length([a for a in prof.allocs if a.type === Allocs.BufferType]) == 1
+    @test length([a for a in prof.allocs if a.type === Memory{Int}]) >= 2
+end
diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl
new file mode 100644
index 0000000000000..e1d6621647671
--- /dev/null
+++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl
@@ -0,0 +1,54 @@
+using Test
+
+@testset "_write_decimal_number" begin
+    _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
+    io = IOBuffer()
+
+    test_write(d) = begin
+        Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf)
+        s = String(take!(io))
+        seekstart(io)
+        return s
+    end
+    @test test_write(0) == "0"
+    @test test_write(99) == "99"
+
+    @test test_write(UInt8(0)) == "0"
+    @test test_write(UInt32(0)) == "0"
+    @test test_write(Int32(0)) == "0"
+
+    @test test_write(UInt8(99)) == "99"
+    @test test_write(UInt32(99)) == "99"
+    @test test_write(Int32(99)) == "99"
+
+    # Sample among possible UInts we might print
+    for x in typemin(UInt8):typemax(UInt8)
+        @test test_write(x) == string(x)
+    end
+    for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt)
+        @test test_write(x) == string(x)
+    end
+end
+
+function test_print_str_escape_json(input::AbstractString, expected::AbstractString)
+    output = IOBuffer()
+    Profile.HeapSnapshot.print_str_escape_json(output, input)
+    @test String(take!(output)) == expected
+end
+
+@testset "print_str_escape_json" begin
+    # Test basic string escaping
+    test_print_str_escape_json("\"hello\"", "\"\\\"hello\\\"\"")
+
+    # Test escaping of control characters
+    test_print_str_escape_json("\x01\x02\x03", "\"\\u0001\\u0002\\u0003\"")
+
+    # Test escaping of other special characters
+    test_print_str_escape_json("\b\f\n\r\t", "\"\\b\\f\\n\\r\\t\"")
+
+    # Test handling of mixed characters
+    test_print_str_escape_json("abc\ndef\"ghi", "\"abc\\ndef\\\"ghi\"")
+
+    # Test handling of empty string
+    test_print_str_escape_json("", "\"\"")
+end
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 2d6df81b1015d..b73a2a618011b 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -25,41 +25,79 @@ end
     end
 end
 
-busywait(0, 0) # compile
-@profile busywait(1, 20)
+@noinline function sleeping_tasks(ch::Channel)
+    for _ in 1:100
+        Threads.@spawn take!(ch)
+    end
+    sleep(10)
+end
 
-let r = Profile.retrieve()
-    mktemp() do path, io
-        serialize(io, r)
-        close(io)
-        open(path) do io
-            @test isa(deserialize(io), Tuple{Vector{UInt},Dict{UInt64,Vector{Base.StackTraces.StackFrame}}})
+function test_profile()
+    let r = Profile.retrieve()
+        mktemp() do path, io
+            serialize(io, r)
+            close(io)
+            open(path) do io
+                @test isa(deserialize(io), Tuple{Vector{UInt},Dict{UInt64,Vector{Base.StackTraces.StackFrame}}})
+            end
         end
     end
 end
 
-let iobuf = IOBuffer()
-    Profile.print(iobuf, format=:tree, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:tree, maxdepth=2)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf)
-    @test !isempty(String(take!(iobuf)))
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, sortedby=:count)
-    @test !isempty(String(take!(iobuf)))
-    Profile.print(iobuf, format=:tree, recur=:flat)
+function test_has_task_profiler_sample_in_buffer()
+    let r = Profile.retrieve()
+        mktemp() do path, io
+            serialize(io, r)
+            close(io)
+            open(path) do io
+                all = deserialize(io)
+                data = all[1]
+                startframe = length(data)
+                for i in startframe:-1:1
+                    (startframe - 1) >= i >= (startframe - (Profile.nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
+                    if Profile.is_block_end(data, i)
+                        thread_sleeping_state = data[i - Profile.META_OFFSET_SLEEPSTATE]
+                        @test thread_sleeping_state == 0x3
+                    end
+                end
+            end
+        end
+    end
+end
+
+busywait(0, 0) # compile
+
+@profile_walltime busywait(1, 20)
+test_profile()
+
+Profile.clear()
+
+ch = Channel(1)
+@profile_walltime sleeping_tasks(ch)
+test_profile()
+close(ch)
+test_has_task_profiler_sample_in_buffer()
+
+Profile.clear()
+
+@profile busywait(1, 20)
+test_profile()
+
+# test printing options
+for options in ((format=:tree, C=true),
+                (format=:tree, maxdepth=2),
+                (format=:flat, C=true),
+                (),
+                (format=:flat, sortedby=:count),
+                (format=:tree, recur=:flat),
+               )
+    iobuf = IOBuffer()
+    Profile.print(iobuf; options...)
     str = String(take!(iobuf))
     @test !isempty(str)
-    truncate(iobuf, 0)
+    file, _ = mktemp()
+    Profile.print(file; options...)
+    @test filesize(file) > 0
 end
 
 @testset "Profile.print() groupby options" begin
@@ -166,6 +204,24 @@ end
     @test getline(values(fdictc)) == getline(values(fdict0)) + 2
 end
 
+import InteractiveUtils
+
+@testset "Module short names" begin
+    Profile.clear()
+    @profile InteractiveUtils.peakflops()
+    io = IOBuffer()
+    ioc = IOContext(io, :displaysize=>(1000,1000))
+    Profile.print(ioc, C=true)
+    str = String(take!(io))
+    slash = Sys.iswindows() ? "\\" : "/"
+    @test occursin("@Compiler" * slash, str)
+    @test occursin("@Base" * slash, str)
+    @test occursin("@InteractiveUtils" * slash, str)
+    @test occursin("@LinearAlgebra" * slash, str)
+    @test occursin("@juliasrc" * slash, str)
+    @test occursin("@julialib" * slash, str)
+end
+
 # Profile deadlocking in compilation (debuginfo registration)
 let cmd = Base.julia_cmd()
     script = """
@@ -178,12 +234,15 @@ let cmd = Base.julia_cmd()
         println("done")
         print(Profile.len_data())
         """
-    p = open(`$cmd -e $script`)
+    # use multiple threads here to ensure that profiling works with threading
+    p = open(`$cmd -t2 -e $script`)
     t = Timer(120) do t
         # should be under 10 seconds, so give it 2 minutes then report failure
-        println("KILLING BY PROFILE TEST WATCHDOG\n")
-        kill(p, Base.SIGTERM)
-        sleep(10)
+        println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")
+        kill(p, Base.SIGQUIT)
+        sleep(30)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
         kill(p, Base.SIGKILL)
     end
     s = read(p, String)
@@ -200,18 +259,24 @@ if Sys.isbsd() || Sys.islinux()
             script = """
                 print(stderr, "started\n")
                 eof(stdin)
-                close(t)
                 """
-            iob = Base.BufferStream()
+            iob = Base.BufferStream() # make an unbounded buffer, so we can just read after waiting for exit
             notify_exit = Base.PipeEndpoint()
-            p = run(pipeline(`$cmd -e $script`, stdin=notify_exit, stderr=iob, stdout=devnull), wait=false)
+            p = run(`$cmd -e $script`, notify_exit, devnull, iob, wait=false)
+            eof = @async try # set up a monitor task to set EOF on iob after p exits
+                wait(p)
+            finally
+                closewrite(iob)
+            end
             t = Timer(120) do t
                 # should be under 10 seconds, so give it 2 minutes then report failure
-                println("KILLING BY PROFILE TEST WATCHDOG\n")
-                kill(p, Base.SIGTERM)
-                sleep(10)
+                println("KILLING siginfo/sigusr1 test BY PROFILE TEST WATCHDOG\n")
+                kill(p, Base.SIGQUIT)
+                sleep(30)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
                 kill(p, Base.SIGKILL)
-                close(p)
+                close(notify_exit)
             end
             try
                 s = readuntil(iob, "started", keep=true)
@@ -230,17 +295,18 @@ if Sys.isbsd() || Sys.islinux()
                     @test occursin("Overhead ╎", s)
                 end
                 close(notify_exit) # notify test finished
-                s = read(iob, String) # consume test output
-                wait(p) # wait for test completion
+                wait(eof) # wait for test completion
+                s = read(iob, String) # consume test output from buffer
                 close(t)
             catch
                 close(notify_exit)
+                wait(eof) # wait for test completion
                 errs = read(iob, String) # consume test output
                 isempty(errs) || println("CHILD STDERR after test failure: ", errs)
-                wait(p) # wait for test completion
                 close(t)
                 rethrow()
             end
+            @test success(p)
         end
     end
 end
@@ -280,18 +346,50 @@ end
 
 @testset "HeapSnapshot" begin
     tmpdir = mktempdir()
+
+    # ensure that we can prevent redacting data
     fname = cd(tmpdir) do
-        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
     end
 
     @test isfile(fname)
 
-    open(fname) do fs
-        @test readline(fs) != ""
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test contains(sshot, "redact_this")
+
+    rm(fname)
+
+    # ensure that string data is redacted by default
+    fname = cd(tmpdir) do
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
     end
 
+    @test isfile(fname)
+
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test !contains(sshot, "redact_this")
+
     rm(fname)
     rm(tmpdir, force = true, recursive = true)
 end
 
+@testset "PageProfile" begin
+    fname = "$(getpid())_$(time_ns())"
+    fpath = joinpath(tempdir(), fname)
+    Profile.take_page_profile(fpath)
+    open(fpath) do fs
+        @test readline(fs) != ""
+    end
+    rm(fpath)
+end
+
 include("allocs.jl")
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Profile)
+    @test_broken isempty(undoc)
+    @test undoc == [:Allocs]
+end
+include("heapsnapshot_reassemble.jl")
diff --git a/stdlib/Project.toml b/stdlib/Project.toml
new file mode 100644
index 0000000000000..92996cf017d0d
--- /dev/null
+++ b/stdlib/Project.toml
@@ -0,0 +1,61 @@
+[deps]
+ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+LLVMLibUnwind_jll = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+LibCURL = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+LibCURL_jll = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+LibUV_jll = "183b4373-6708-53ba-ad28-60e28bb38547"
+LibUnwind_jll = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MPFR_jll = "3a97d323-0669-5f0c-9066-3539efd106a3"
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
+MozillaCACerts_jll = "14a3606d-f60d-562e-9121-12d972cd8159"
+NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
+OpenLibm_jll = "05823500-19ac-5b8b-9628-191a04bc5112"
+OpenSSL_jll = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+PCRE2_jll = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+SuiteSparse_jll = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+dSFMT_jll = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
+nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/stdlib/REPL/Project.toml b/stdlib/REPL/Project.toml
index 4f77157da0146..f60a6a4766093 100644
--- a/stdlib/REPL/Project.toml
+++ b/stdlib/REPL/Project.toml
@@ -1,15 +1,19 @@
 name = "REPL"
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Random"]
+test = ["Logging", "Test", "Random"]
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index ce594d55863bc..eabd7e729280e 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/REPL/docs/src/index.md"
+```
+
 # The Julia REPL
 
 Julia comes with a full-featured interactive command-line REPL (read-eval-print loop) built into
@@ -7,8 +11,9 @@ shell modes. The REPL can be started by simply calling `julia` with no arguments
 on the executable:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
+REPL.banner(io)
 banner = String(take!(io))
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia>\n```")
@@ -45,14 +50,16 @@ julia> ans
 
 In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
 that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
-well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg> ` ) are parsed, but others are
 removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
 without having to scrub away prompts and outputs. This feature is enabled by default but can be
 disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
 out by pasting the code block above this paragraph straight into the REPL. This feature does not
 work on the standard Windows command prompt due to its limitation at detecting when a paste occurs.
 
-Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
+A non-[`nothing`](@ref) result of executing an expression is displayed by the REPL using the [`show`](@ref) function
+with a specific [`IOContext`](@ref) (via [`display`](@ref), which defaults to calling
+`show(io, MIME("text/plain"), ans)`, which in turn defaults to `show(io, ans)`).
 In particular, the `:limit` attribute is set to `true`.
 Other attributes can receive in certain `show` methods a default value if it's not already set,
 like `:compact`.
@@ -61,7 +68,7 @@ It's possible, as an experimental feature, to specify the attributes used by the
 
 ```julia-repl
 julia> rand(2, 2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.8833    0.329197
  0.719708  0.59114
 
@@ -71,7 +78,7 @@ julia> show(IOContext(stdout, :compact => false), "text/plain", rand(2, 2))
 julia> Base.active_repl.options.iocontext[:compact] = false;
 
 julia> rand(2, 2)
-2×2 Array{Float64,2}:
+2×2 Matrix{Float64}:
  0.2083967319174056  0.13330606013126012
  0.6244375177790158  0.9777957560761545
 ```
@@ -312,7 +319,7 @@ Users should refer to `LineEdit.jl` to discover the available actions on key inp
 
 ## Tab completion
 
-In both the Julian and help modes of the REPL, one can enter the first few characters of a function
+In the Julian, pkg and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
 ```julia-repl
@@ -334,6 +341,13 @@ julia> mapfold[TAB]
 mapfoldl mapfoldr
 ```
 
+When a single complete tab-complete result is available at the end of an input line and 2 or more characters
+have been typed, a hint of the completion will show in a lighter color.
+This can be disabled via `Base.active_repl.options.hint_tab_completes = false`.
+
+!!! compat "Julia 1.11"
+    Tab-complete hinting was added in Julia 1.11
+
 Like other components of the REPL, the search is case-sensitive:
 
 ```julia-repl
@@ -354,13 +368,13 @@ julia> π
 
 julia> e\_1[TAB] = [1,0]
 julia> e₁ = [1,0]
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  1
  0
 
 julia> e\^1[TAB] = [1 0]
 julia> e¹ = [1 0]
-1×2 Array{Int64,2}:
+1×2 Matrix{Int64}:
  1  0
 
 julia> \sqrt[TAB]2     # √ is equivalent to the sqrt function
@@ -570,8 +584,9 @@ Main
 
 It is possible to change this contextual module via the function
 `REPL.activate(m)` where `m` is a `Module` or by typing the module in the REPL
-and pressing the keybinding Alt-m (the cursor must be on the module name). The
-active module is shown in the prompt:
+and pressing the keybinding Alt-m with the cursor on the module name (Esc-m on MacOS).
+Pressing the keybinding on an empty prompt toggles the context between the previously active
+non-`Main` module and `Main`. The active module is shown in the prompt (unless it is `Main`):
 
 ```julia-repl
 julia> using REPL
@@ -591,9 +606,13 @@ julia> Core<Alt-m> # using the keybinding to change module
 
 (Core) julia>
 
-(Core) julia> Main<Alt-m> # going back to Main via keybinding
+(Core) julia> <Alt-m> # going back to Main via keybinding
 
 julia>
+
+julia> <Alt-m> # going back to previously-active Core via keybinding
+
+(Core) julia>
 ```
 
 Functions that take an optional module argument often defaults to the REPL
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index ff67e849fcc5a..e15807f645119 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -3,11 +3,10 @@
 module LineEdit
 
 import ..REPL
-using REPL: AbstractREPL, Options
+using ..REPL: AbstractREPL, Options
 
 using ..Terminals
-import ..Terminals: raw!, width, height, cmove, getX,
-                       getY, clear_line, beep
+import ..Terminals: raw!, width, height, clear_line, beep
 
 import Base: ensureroom, show, AnyDict, position
 using Base: something
@@ -67,6 +66,7 @@ show(io::IO, x::Prompt) = show(io, string("Prompt(\"", prompt_string(x.prompt),
 mutable struct MIState
     interface::ModalInterface
     active_module::Module
+    previous_active_module::Module
     current_mode::TextInterface
     aborted::Bool
     mode_state::IdDict{TextInterface,ModeState}
@@ -76,9 +76,10 @@ mutable struct MIState
     key_repeats::Int
     last_action::Symbol
     current_action::Symbol
+    async_channel::Channel{Function}
 end
 
-MIState(i, mod, c, a, m) = MIState(i, mod, c, a, m, String[], 0, Char[], 0, :none, :none)
+MIState(i, mod, c, a, m) = MIState(i, mod, mod, c, a, m, String[], 0, Char[], 0, :none, :none, Channel{Function}())
 
 const BufferLike = Union{MIState,ModeState,IOBuffer}
 const State = Union{MIState,ModeState}
@@ -97,6 +98,7 @@ mutable struct PromptState <: ModeState
     p::Prompt
     input_buffer::IOBuffer
     region_active::Symbol # :shift or :mark or :off
+    hint::Union{String,Nothing}
     undo_buffers::Vector{IOBuffer}
     undo_idx::Int
     ias::InputAreaState
@@ -164,7 +166,7 @@ region_active(s::PromptState) = s.region_active
 region_active(s::ModeState) = :off
 
 
-input_string(s::PromptState) = String(take!(copy(s.input_buffer)))
+input_string(s::PromptState) = String(take!(copy(s.input_buffer)))::String
 
 input_string_newlines(s::PromptState) = count(c->(c == '\n'), input_string(s))
 function input_string_newlines_aftercursor(s::PromptState)
@@ -179,11 +181,22 @@ struct EmptyHistoryProvider <: HistoryProvider end
 
 reset_state(::EmptyHistoryProvider) = nothing
 
-complete_line(c::EmptyCompletionProvider, s) = String[], "", true
+# Before, completions were always given as strings. But at least for backslash
+# completions, it's nice to see what glyphs are available in the completion preview.
+# To separate between what's shown in the preview list of possible matches, and what's
+# actually completed, we introduce this struct.
+struct NamedCompletion
+    completion::String # what is actually completed, for example "\trianglecdot"
+    name::String # what is displayed in lists of possible completions, for example "◬ \trianglecdot"
+end
+
+NamedCompletion(completion::String) = NamedCompletion(completion, completion)
+
+complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = NamedCompletion[], "", true
 
 # complete_line can be specialized for only two arguments, when the active module
 # doesn't matter (e.g. Pkg does this)
-complete_line(c::CompletionProvider, s, ::Module) = complete_line(c, s)
+complete_line(c::CompletionProvider, s, ::Module; hint::Bool=false) = complete_line(c, s; hint)
 
 terminal(s::IO) = s
 terminal(s::PromptState) = s.terminal
@@ -306,6 +319,7 @@ end
 
 set_action!(s, command::Symbol) = nothing
 
+common_prefix(completions::Vector{NamedCompletion}) = common_prefix(map(x -> x.completion, completions))
 function common_prefix(completions::Vector{String})
     ret = ""
     c1 = completions[1]
@@ -328,6 +342,8 @@ end
 # does not restrict column length when multiple columns are used.
 const MULTICOLUMN_THRESHOLD = 5
 
+show_completions(s::PromptState, completions::Vector{NamedCompletion}) = show_completions(s, map(x -> x.name, completions))
+
 # Show available completions
 function show_completions(s::PromptState, completions::Vector{String})
     # skip any lines of input after the cursor
@@ -361,7 +377,7 @@ function show_completions(s::PromptState, completions::Vector{String})
     end
 end
 
-# Prompt Completions
+# Prompt Completions & Hints
 function complete_line(s::MIState)
     set_action!(s, :complete_line)
     if complete_line(state(s), s.key_repeats, s.active_module)
@@ -372,8 +388,72 @@ function complete_line(s::MIState)
     end
 end
 
-function complete_line(s::PromptState, repeats::Int, mod::Module)
-    completions, partial, should_complete = complete_line(s.p.complete, s, mod)::Tuple{Vector{String},String,Bool}
+# due to close coupling of the Pkg ReplExt `complete_line` can still return a vector of strings,
+# so we convert those in this helper
+function complete_line_named(args...; kwargs...)::Tuple{Vector{NamedCompletion},String,Bool}
+    result = complete_line(args...; kwargs...)::Union{Tuple{Vector{NamedCompletion},String,Bool},Tuple{Vector{String},String,Bool}}
+    if result isa Tuple{Vector{NamedCompletion},String,Bool}
+        return result
+    else
+        completions, partial, should_complete = result
+        return map(NamedCompletion, completions), partial, should_complete
+    end
+end
+
+function check_for_hint(s::MIState)
+    st = state(s)
+    if !options(st).hint_tab_completes || !eof(buffer(st))
+        # only generate hints if enabled and at the end of the line
+        # TODO: maybe show hints for insertions at other positions
+        # Requires making space for them earlier in refresh_multi_line
+        return clear_hint(st)
+    end
+
+    named_completions, partial, should_complete = try
+        complete_line_named(st.p.complete, st, s.active_module; hint = true)
+    catch
+        @debug "error completing line for hint" exception=current_exceptions()
+        return clear_hint(st)
+    end
+    completions = map(x -> x.completion, named_completions)
+
+    isempty(completions) && return clear_hint(st)
+    # Don't complete for single chars, given e.g. `x` completes to `xor`
+    if length(partial) > 1 && should_complete
+        singlecompletion = length(completions) == 1
+        p = singlecompletion ? completions[1] : common_prefix(completions)
+        if singlecompletion || p in completions # i.e. complete `@time` even though `@time_imports` etc. exists
+            # The completion `p` and the input `partial` may not share the same initial
+            # characters, for instance when completing to subscripts or superscripts.
+            # So, in general, make sure that the hint starts at the correct position by
+            # incrementing its starting position by as many characters as the input.
+            startind = 1 # index of p from which to start providing the hint
+            maxind = ncodeunits(p)
+            for _ in partial
+                startind = nextind(p, startind)
+                startind > maxind && break
+            end
+            if startind ≤ maxind # completion on a complete name returns itself so check that there's something to hint
+                hint = p[startind:end]
+                st.hint = hint
+                return true
+            end
+        end
+    end
+    return clear_hint(st)
+end
+
+function clear_hint(s::ModeState)
+    if !isnothing(s.hint)
+        s.hint = "" # don't set to nothing here. That will be done in `maybe_show_hint`
+        return true # indicate maybe_show_hint has work to do
+    else
+        return false
+    end
+end
+
+function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=false)
+    completions, partial, should_complete = complete_line_named(s.p.complete, s, mod; hint)
     isempty(completions) && return false
     if !should_complete
         # should_complete is false for cases where we only want to show
@@ -383,7 +463,7 @@ function complete_line(s::PromptState, repeats::Int, mod::Module)
         # Replace word by completion
         prev_pos = position(s)
         push_undo(s)
-        edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
+        edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1].completion)
     else
         p = common_prefix(completions)
         if !isempty(p) && p != partial
@@ -432,12 +512,32 @@ prompt_string(p::Prompt) = prompt_string(p.prompt)
 prompt_string(s::AbstractString) = s
 prompt_string(f::Function) = Base.invokelatest(f)
 
+function maybe_show_hint(s::PromptState)
+    isa(s.hint, String) || return nothing
+    # The hint being "" then nothing is used to first clear a previous hint, then skip printing the hint
+    if isempty(s.hint)
+        s.hint = nothing
+    else
+        Base.printstyled(terminal(s), s.hint, color=:light_black)
+        cmove_left(terminal(s), textwidth(s.hint))
+        s.hint = "" # being "" signals to do one clear line remainder to clear the hint next time the screen is refreshed
+    end
+    return nothing
+end
+
 function refresh_multi_line(s::PromptState; kw...)
     if s.refresh_wait !== nothing
         close(s.refresh_wait)
         s.refresh_wait = nothing
     end
-    refresh_multi_line(terminal(s), s; kw...)
+    if s.hint isa String
+        # clear remainder of line which is unknown here if it had a hint before unbeknownst to refresh_multi_line
+        # the clear line cannot be printed each time because it would break column movement
+        print(terminal(s), "\e[0K")
+    end
+    r = refresh_multi_line(terminal(s), s; kw...)
+    maybe_show_hint(s) # now maybe write the hint back to the screen
+    return r
 end
 refresh_multi_line(s::ModeState; kw...) = refresh_multi_line(terminal(s), s; kw...)
 refresh_multi_line(termbuf::TerminalBuffer, s::ModeState; kw...) = refresh_multi_line(termbuf, terminal(s), s; kw...)
@@ -676,7 +776,26 @@ function edit_move_right(buf::IOBuffer)
     end
     return false
 end
-edit_move_right(s::PromptState) = edit_move_right(s.input_buffer) ? refresh_line(s) : false
+function edit_move_right(m::MIState)
+    s = state(m)
+    buf = s.input_buffer
+    if edit_move_right(s.input_buffer)
+        refresh_line(s)
+        return true
+    else
+        completions, partial, should_complete = complete_line(s.p.complete, s, m.active_module)
+        if should_complete && eof(buf) && length(completions) == 1 && length(partial) > 1
+            # Replace word by completion
+            prev_pos = position(s)
+            push_undo(s)
+            edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
+            refresh_line(state(s))
+            return true
+        else
+            return false
+        end
+    end
+end
 
 function edit_move_word_right(s::PromptState)
     if !eof(s.input_buffer)
@@ -751,9 +870,9 @@ end
 # returns the removed portion as a String
 function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigid_mark::Bool=true)
     A, B = first(r), last(r)
-    A >= B && isempty(ins) && return String(ins)
+    A >= B && isempty(ins) && return ins
     buf = buffer(s)
-    pos = position(buf)
+    pos = position(buf) # n.b. position(), etc, are 0-indexed
     adjust_pos = true
     if A <= pos < B
         seek(buf, A)
@@ -762,18 +881,29 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi
     else
         adjust_pos = false
     end
-    if A < buf.mark  < B || A == buf.mark == B
-        # rigid_mark is used only if the mark is strictly "inside"
-        # the region, or the region is empty and the mark is at the boundary
-        buf.mark = rigid_mark ? A : A + sizeof(ins)
-    elseif buf.mark >= B
-        buf.mark += sizeof(ins) - B + A
+    mark = buf.mark
+    if mark != -1
+        if A < mark < B || A == mark == B
+            # rigid_mark is used only if the mark is strictly "inside"
+            # the region, or the region is empty and the mark is at the boundary
+            mark = rigid_mark ? A : A + sizeof(ins)
+        elseif mark >= B
+            mark += sizeof(ins) - B + A
+        end
+        buf.mark = -1
     end
-    ensureroom(buf, B) # handle !buf.reinit from take!
-    ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed
-    buf.size = buf.size + sizeof(ins) - B + A
-    adjust_pos && seek(buf, position(buf) + sizeof(ins))
-    return String(copy(ret))
+    # Implement ret = splice!(buf.data, A+1:B, codeunits(ins)) for a stream
+    pos = position(buf)
+    seek(buf, A)
+    ret = read(buf, A >= B ? 0 : B - A)
+    trail = read(buf)
+    seek(buf, A)
+    write(buf, ins)
+    write(buf, trail)
+    truncate(buf, position(buf))
+    seek(buf, pos + (adjust_pos ? sizeof(ins) : 0))
+    buf.mark = mark
+    return String(ret)
 end
 
 edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins)
@@ -1417,14 +1547,28 @@ current_word_with_dots(s::MIState) = current_word_with_dots(buffer(s))
 
 function activate_module(s::MIState)
     word = current_word_with_dots(s);
-    isempty(word) && return beep(s)
-    try
-        mod = Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
-        REPL.activate(mod)
-        edit_clear(s)
-    catch
+    empty = isempty(word)
+    mod = if empty
+        s.previous_active_module
+    else
+        try
+            Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
+        catch
+            nothing
+        end
+    end
+    if !(mod isa Module) || mod == Base.active_module()
         beep(s)
+        return
+    end
+    empty && edit_insert(s, ' ') # makes the `edit_clear` below actually update the prompt
+    if Base.active_module() == Main || mod == Main
+        # At least one needs to be Main. Disallows toggling between two non-Main modules because it's
+        # otherwise hard to get back to Main
+        s.previous_active_module = Base.active_module()
     end
+    REPL.activate(mod)
+    edit_clear(s)
 end
 
 history_prev(::EmptyHistoryProvider) = ("", false)
@@ -2058,8 +2202,8 @@ setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
 setmodifiers!(c) = nothing
 
 # Search Mode completions
-function complete_line(s::SearchState, repeats, mod::Module)
-    completions, partial, should_complete = complete_line(s.histprompt.complete, s, mod)
+function complete_line(s::SearchState, repeats, mod::Module; hint::Bool=false)
+    completions, partial, should_complete = complete_line(s.histprompt.complete, s, mod; hint)
     # For now only allow exact completions in search mode
     if length(completions) == 1
         prev_pos = position(s)
@@ -2218,7 +2362,7 @@ keymap_data(state, ::Union{HistoryPrompt, PrefixHistoryPrompt}) = state
 
 Base.isempty(s::PromptState) = s.input_buffer.size == 0
 
-on_enter(s::PromptState) = s.p.on_enter(s)
+on_enter(s::MIState) = state(s).p.on_enter(s)
 
 move_input_start(s::BufferLike) = (seek(buffer(s), 0); nothing)
 move_input_end(buf::IOBuffer) = (seekend(buf); nothing)
@@ -2424,8 +2568,8 @@ AnyDict(
     "\e\n" => "\e\r",
     "^_" => (s::MIState,o...)->edit_undo!(s),
     "\e_" => (s::MIState,o...)->edit_redo!(s),
-    # Simply insert it into the buffer by default
-    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c)),
+    # Show hints at what tab complete would do by default
+    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c); check_for_hint(s) && refresh_line(s)),
     "^U" => (s::MIState,o...)->edit_kill_line_backwards(s),
     "^K" => (s::MIState,o...)->edit_kill_line_forwards(s),
     "^Y" => (s::MIState,o...)->edit_yank(s),
@@ -2634,7 +2778,7 @@ end
 run_interface(::Prompt) = nothing
 
 init_state(terminal, prompt::Prompt) =
-    PromptState(terminal, prompt, IOBuffer(), :off, IOBuffer[], 1, InputAreaState(1, 1),
+    PromptState(terminal, prompt, IOBuffer(), :off, nothing, IOBuffer[], 1, InputAreaState(1, 1),
                 #=indent(spaces)=# -1, Threads.SpinLock(), 0.0, -Inf, nothing)
 
 function init_state(terminal, m::ModalInterface)
@@ -2731,44 +2875,61 @@ keymap_data(ms::MIState, m::ModalInterface) = keymap_data(state(ms), mode(ms))
 
 function prompt!(term::TextTerminal, prompt::ModalInterface, s::MIState = init_state(term, prompt))
     Base.reseteof(term)
+    l = Base.ReentrantLock()
+    t1 = Threads.@spawn :interactive while true
+        wait(s.async_channel)
+        status = @lock l begin
+            fcn = take!(s.async_channel)
+            fcn(s)
+        end
+        status ∈ (:ok, :ignore) || break
+    end
     raw!(term, true)
     enable_bracketed_paste(term)
     try
         activate(prompt, s, term, term)
         old_state = mode(s)
-        while true
-            kmap = keymap(s, prompt)
-            fcn = match_input(kmap, s)
-            kdata = keymap_data(s, prompt)
-            s.current_action = :unknown # if the to-be-run action doesn't update this field,
-                                        # :unknown will be recorded in the last_action field
-            local status
-            # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
-            # try/catch block
-            try
-                status = fcn(s, kdata)
-            catch e
-                @error "Error in the keymap" exception=e,catch_backtrace()
-                # try to cleanup and get `s` back to its original state before returning
-                transition(s, :reset)
-                transition(s, old_state)
-                status = :done
-            end
-            status !== :ignore && (s.last_action = s.current_action)
-            if status === :abort
-                s.aborted = true
-                return buffer(s), false, false
-            elseif status === :done
-                return buffer(s), true, false
-            elseif status === :suspend
-                if Sys.isunix()
-                    return buffer(s), true, true
+        # spawn this because the main repl task is sticky (due to use of @async and _wait2)
+        # and we want to not block typing when the repl task thread is busy
+        t2 = Threads.@spawn :interactive while true
+            eof(term) || peek(term) # wait before locking but don't consume
+            @lock l begin
+                kmap = keymap(s, prompt)
+                fcn = match_input(kmap, s)
+                kdata = keymap_data(s, prompt)
+                s.current_action = :unknown # if the to-be-run action doesn't update this field,
+                                            # :unknown will be recorded in the last_action field
+                local status
+                # errors in keymaps shouldn't cause the REPL to fail, so wrap in a
+                # try/catch block
+                try
+                    status = fcn(s, kdata)
+                catch e
+                    @error "Error in the keymap" exception=e,catch_backtrace()
+                    # try to cleanup and get `s` back to its original state before returning
+                    transition(s, :reset)
+                    transition(s, old_state)
+                    status = :done
+                end
+                status !== :ignore && (s.last_action = s.current_action)
+                if status === :abort
+                    s.aborted = true
+                    return buffer(s), false, false
+                elseif status === :done
+                    return buffer(s), true, false
+                elseif status === :suspend
+                    if Sys.isunix()
+                        return buffer(s), true, true
+                    end
+                else
+                    @assert status ∈ (:ok, :ignore)
                 end
-            else
-                @assert status ∈ (:ok, :ignore)
             end
         end
+        return fetch(t2)
     finally
+        put!(s.async_channel, Returns(:done))
+        wait(t1)
         raw!(term, false) && disable_bracketed_paste(term)
     end
     # unreachable
diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl
new file mode 100644
index 0000000000000..86b5cd35abd2f
--- /dev/null
+++ b/stdlib/REPL/src/Pkg_beforeload.jl
@@ -0,0 +1,120 @@
+## Pkg stuff needed before Pkg has loaded
+
+const Pkg_pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")
+load_pkg() = Base.require_stdlib(Pkg_pkgid, "REPLExt")
+
+## Below here copied/tweaked from Pkg Types.jl so that the dummy Pkg prompt
+# can populate the env correctly before Pkg loads
+
+function safe_realpath(path)
+    isempty(path) && return path
+    if ispath(path)
+        try
+            return realpath(path)
+        catch
+            return path
+        end
+    end
+    a, b = splitdir(path)
+    return joinpath(safe_realpath(a), b)
+end
+
+function find_project_file(env::Union{Nothing,String}=nothing)
+    project_file = nothing
+    if env isa Nothing
+        project_file = Base.active_project()
+        project_file === nothing && return nothing # in the Pkg version these are pkgerrors
+    elseif startswith(env, '@')
+        project_file = Base.load_path_expand(env)
+        project_file === nothing && return nothing
+    elseif env isa String
+        if isdir(env)
+            isempty(readdir(env)) || return nothing
+            project_file = joinpath(env, Base.project_names[end])
+        else
+            project_file = endswith(env, ".toml") ? abspath(env) :
+                abspath(env, Base.project_names[end])
+        end
+    end
+    @assert project_file isa String &&
+        (isfile(project_file) || !ispath(project_file) ||
+         isdir(project_file) && isempty(readdir(project_file)))
+    return safe_realpath(project_file)
+end
+
+function find_root_base_project(start_project::String)
+    project_file = start_project
+    while true
+        base_project_file = Base.base_project(project_file)
+        base_project_file === nothing && return project_file
+        project_file = base_project_file
+    end
+end
+
+function relative_project_path(project_file::String, path::String)
+    # compute path relative the project
+    # realpath needed to expand symlinks before taking the relative path
+    return relpath(safe_realpath(abspath(path)), safe_realpath(dirname(project_file)))
+end
+
+function projname(project_file::String)
+    if isfile(project_file)
+        name = try
+            # The `nothing` here means that this TOML parser does not return proper Dates.jl
+            # objects - but that's OK since we're just checking the name here.
+            p = Base.TOML.Parser{nothing}()
+            Base.TOML.reinit!(p, read(project_file, String); filepath=project_file)
+            proj = Base.TOML.parse(p)
+            get(proj, "name", nothing)
+        catch
+            nothing
+        end
+    else
+        name = nothing
+    end
+    if name === nothing
+        name = basename(dirname(project_file))
+    end
+    for depot in Base.DEPOT_PATH
+        envdir = joinpath(depot, "environments")
+        if startswith(safe_realpath(project_file), safe_realpath(envdir))
+            return "@" * name
+        end
+    end
+    return name
+end
+
+prev_project_file = nothing
+prev_project_timestamp = nothing
+prev_prefix = ""
+
+function Pkg_promptf()
+    global prev_project_timestamp, prev_prefix, prev_project_file
+    project_file = find_project_file()
+    prefix = ""
+    if project_file !== nothing
+        if prev_project_file == project_file && prev_project_timestamp == mtime(project_file)
+            prefix = prev_prefix
+        else
+            project_name = projname(project_file)
+            if project_name !== nothing
+                root = find_root_base_project(project_file)
+                rootname = projname(root)
+                if root !== project_file
+                    path_prefix = "/" * dirname(relative_project_path(root, project_file))
+                else
+                    path_prefix = ""
+                end
+                if textwidth(rootname) > 30
+                    rootname = first(rootname, 27) * "..."
+                end
+                prefix = "($(rootname)$(path_prefix)) "
+                prev_prefix = prefix
+                prev_project_timestamp = mtime(project_file)
+                prev_project_file = project_file
+            end
+        end
+    end
+    # Note no handling of Pkg.offline, as the Pkg version does here
+    return "$(prefix)$(PKG_PROMPT)"
+end
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index f8bb442ad6ec4..6c3f4bd4ba73a 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -17,7 +17,85 @@ module REPL
 Base.Experimental.@optlevel 1
 Base.Experimental.@max_methods 1
 
-using Base.Meta, Sockets
+function UndefVarError_hint(io::IO, ex::UndefVarError)
+    var = ex.var
+    if var === :or
+        print(io, "\nSuggestion: Use `||` for short-circuiting boolean OR.")
+    elseif var === :and
+        print(io, "\nSuggestion: Use `&&` for short-circuiting boolean AND.")
+    elseif var === :help
+        println(io)
+        # Show friendly help message when user types help or help() and help is undefined
+        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
+    elseif var === :quit
+        print(io, "\nSuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
+    end
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            bpart = Base.lookup_binding_partition(Base.get_world_counter(), GlobalRef(scope, var))
+            kind = Base.binding_kind(bpart)
+            if kind === Base.BINDING_KIND_GLOBAL || kind === Base.BINDING_KIND_CONST || kind == Base.BINDING_KIND_DECLARED
+                print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
+            elseif kind === Base.BINDING_KIND_FAILED
+                print(io, "\nHint: It looks like two or more modules export different ",
+                "bindings with this name, resulting in ambiguity. Try explicitly ",
+                "importing it from a particular module, or qualifying the name ",
+                "with the module it should come from.")
+            elseif kind === Base.BINDING_KIND_GUARD
+                print(io, "\nSuggestion: check for spelling errors or missing imports.")
+            else
+                print(io, "\nSuggestion: this global was defined as `$(bpart.restriction.globalref)` but not assigned a value.")
+            end
+        elseif scope === :static_parameter
+            print(io, "\nSuggestion: run Test.detect_unbound_args to detect method arguments that do not fully constrain a type parameter.")
+        elseif scope === :local
+            print(io, "\nSuggestion: check for an assignment to a local variable that shadows a global of the same name.")
+        end
+    else
+        scope = undef
+    end
+    if scope !== Base && !_UndefVarError_warnfor(io, Base, var)
+        warned = false
+        for m in Base.loaded_modules_order
+            m === Core && continue
+            m === Base && continue
+            m === Main && continue
+            m === scope && continue
+            warned |= _UndefVarError_warnfor(io, m, var)
+        end
+        warned ||
+            _UndefVarError_warnfor(io, Core, var) ||
+            _UndefVarError_warnfor(io, Main, var)
+    end
+    return nothing
+end
+
+function _UndefVarError_warnfor(io::IO, m::Module, var::Symbol)
+    Base.isbindingresolved(m, var) || return false
+    (Base.isexported(m, var) || Base.ispublic(m, var)) || return false
+    active_mod = Base.active_module()
+    print(io, "\nHint: ")
+    if isdefined(active_mod, Symbol(m))
+        print(io, "a global variable of this name also exists in $m.")
+    else
+        if Symbol(m) == var
+            print(io, "$m is loaded but not imported in the active module $active_mod.")
+        else
+            print(io, "a global variable of this name may be made accessible by importing $m in the current active module $active_mod")
+        end
+    end
+    return true
+end
+
+function __init__()
+    Base.REPL_MODULE_REF[] = REPL
+    Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
+    return nothing
+end
+
+using Base.Meta, Sockets, StyledStrings
+using JuliaSyntaxHighlighting
 import InteractiveUtils
 
 export
@@ -26,6 +104,8 @@ export
     LineEditREPL,
     StreamREPL
 
+public TerminalMenus
+
 import Base:
     AbstractDisplay,
     display,
@@ -44,7 +124,7 @@ include("options.jl")
 
 include("LineEdit.jl")
 using .LineEdit
-import ..LineEdit:
+import .LineEdit:
     CompletionProvider,
     HistoryProvider,
     add_history,
@@ -56,12 +136,10 @@ import ..LineEdit:
     history_first,
     history_last,
     history_search,
-    accept_result,
     setmodifiers!,
     terminal,
     MIState,
     PromptState,
-    TextInterface,
     mode_idx
 
 include("REPLCompletions.jl")
@@ -70,6 +148,8 @@ using .REPLCompletions
 include("TerminalMenus/TerminalMenus.jl")
 include("docview.jl")
 
+include("Pkg_beforeload.jl")
+
 @nospecialize # use only declared type signatures
 
 answer_color(::AbstractREPL) = ""
@@ -123,7 +203,129 @@ end
 # Temporary alias until Documenter updates
 const softscope! = softscope
 
-const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
+function print_qualified_access_warning(mod::Module, owner::Module, name::Symbol)
+    @warn string(name, " is defined in ", owner, " and is not public in ", mod) maxlog = 1 _id = string("repl-warning-", mod, "-", owner, "-", name) _line = nothing _file = nothing _module = nothing
+end
+
+function has_ancestor(query::Module, target::Module)
+    query == target && return true
+    while true
+        next = parentmodule(query)
+        next == target && return true
+        next == query && return false
+        query = next
+    end
+end
+
+retrieve_modules(::Module, ::Any) = (nothing,)
+function retrieve_modules(current_module::Module, mod_name::Symbol)
+    mod = try
+        getproperty(current_module, mod_name)
+    catch
+        return (nothing,)
+    end
+    return (mod isa Module ? mod : nothing,)
+end
+retrieve_modules(current_module::Module, mod_name::QuoteNode) = retrieve_modules(current_module, mod_name.value)
+function retrieve_modules(current_module::Module, mod_expr::Expr)
+    if Meta.isexpr(mod_expr, :., 2)
+        current_module = retrieve_modules(current_module, mod_expr.args[1])[1]
+        current_module === nothing && return (nothing,)
+        return (current_module, retrieve_modules(current_module, mod_expr.args[2])...)
+    else
+        return (nothing,)
+    end
+end
+
+add_locals!(locals, ast::Any) = nothing
+function add_locals!(locals, ast::Expr)
+    for arg in ast.args
+        add_locals!(locals, arg)
+    end
+    return nothing
+end
+function add_locals!(locals, ast::Symbol)
+    push!(locals, ast)
+    return nothing
+end
+
+function collect_names_to_warn!(warnings, locals, current_module::Module, ast)
+    ast isa Expr || return
+
+    # don't recurse through module definitions
+    ast.head === :module && return
+
+    if Meta.isexpr(ast, :., 2)
+        mod_name, name_being_accessed = ast.args
+        # retrieve the (possibly-nested) module being named here
+        mods = retrieve_modules(current_module, mod_name)
+        all(x -> x isa Module, mods) || return
+        outer_mod = first(mods)
+        mod = last(mods)
+        if name_being_accessed isa QuoteNode
+            name_being_accessed = name_being_accessed.value
+        end
+        name_being_accessed isa Symbol || return
+        owner = try
+            which(mod, name_being_accessed)
+        catch
+            return
+        end
+        # if `owner` is a submodule of `mod`, then don't warn. E.g. the name `parse` is present in the module `JSON`
+        # but is owned by `JSON.Parser`; we don't warn if it is accessed as `JSON.parse`.
+        has_ancestor(owner, mod) && return
+        # Don't warn if the name is public in the module we are accessing it
+        Base.ispublic(mod, name_being_accessed) && return
+        # Don't warn if accessing names defined in Core from Base if they are present in Base (e.g. `Base.throw`).
+        mod === Base && Base.ispublic(Core, name_being_accessed) && return
+        push!(warnings, (; outer_mod, mod, owner, name_being_accessed))
+        # no recursion
+        return
+    elseif Meta.isexpr(ast, :(=), 2)
+        lhs, rhs = ast.args
+        # any symbols we find on the LHS we will count as local. This can potentially be overzealous,
+        # but we want to avoid false positives (unnecessary warnings) more than false negatives.
+        add_locals!(locals, lhs)
+        # we'll recurse into the RHS only
+        return collect_names_to_warn!(warnings, locals, current_module, rhs)
+    elseif Meta.isexpr(ast, :function) && length(ast.args) >= 1
+
+        if Meta.isexpr(ast.args[1], :call, 2)
+            func_name, func_args = ast.args[1].args
+            # here we have a function definition and are inspecting it's arguments for local variables.
+            # we will error on the conservative side by adding all symbols we find (regardless if they are local variables or possibly-global default values)
+            add_locals!(locals, func_args)
+        end
+        # fall through to general recursion
+    end
+
+    for arg in ast.args
+        collect_names_to_warn!(warnings, locals, current_module, arg)
+    end
+
+    return nothing
+end
+
+function collect_qualified_access_warnings(current_mod, ast)
+    warnings = Set()
+    locals = Set{Symbol}()
+    collect_names_to_warn!(warnings, locals, current_mod, ast)
+    filter!(warnings) do (; outer_mod)
+        nameof(outer_mod) ∉ locals
+    end
+    return warnings
+end
+
+function warn_on_non_owning_accesses(current_mod, ast)
+    warnings = collect_qualified_access_warnings(current_mod, ast)
+    for (; outer_mod, mod, owner, name_being_accessed) in warnings
+        print_qualified_access_warning(mod, owner, name_being_accessed)
+    end
+    return ast
+end
+warn_on_non_owning_accesses(ast) = warn_on_non_owning_accesses(Base.active_module(), ast)
+
+const repl_ast_transforms = Any[softscope, warn_on_non_owning_accesses] # defaults for new REPL backends
 
 # Allows an external package to add hooks into the code loading.
 # The hook should take a Vector{Symbol} of package names and
@@ -131,6 +333,27 @@ const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
 # to e.g. install packages on demand
 const install_packages_hooks = Any[]
 
+# N.B.: Any functions starting with __repl_entry cut off backtraces when printing in the REPL.
+# We need to do this for both the actual eval and macroexpand, since the latter can cause custom macro
+# code to run (and error).
+__repl_entry_lower_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Cint}) =
+    ccall(:jl_expand_with_loc, Any, (Any, Any, Ptr{UInt8}, Cint), ast, mod, toplevel_file[], toplevel_line[])
+__repl_entry_eval_expanded_with_loc(mod::Module, @nospecialize(ast), toplevel_file::Ref{Ptr{UInt8}}, toplevel_line::Ref{Cint}) =
+    ccall(:jl_toplevel_eval_flex, Any, (Any, Any, Cint, Cint, Ptr{Ptr{UInt8}}, Ptr{Cint}), mod, ast, 1, 1, toplevel_file, toplevel_line)
+
+function toplevel_eval_with_hooks(mod::Module, @nospecialize(ast), toplevel_file=Ref{Ptr{UInt8}}(Base.unsafe_convert(Ptr{UInt8}, :REPL)), toplevel_line=Ref{Cint}(1))
+    if !isexpr(ast, :toplevel)
+        ast = invokelatest(__repl_entry_lower_with_loc, mod, ast, toplevel_file, toplevel_line)
+        check_for_missing_packages_and_run_hooks(ast)
+        return invokelatest(__repl_entry_eval_expanded_with_loc, mod, ast, toplevel_file, toplevel_line)
+    end
+    local value=nothing
+    for i = 1:length(ast.args)
+        value = toplevel_eval_with_hooks(mod, ast.args[i], toplevel_file, toplevel_line)
+    end
+    return value
+end
+
 function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
     lasterr = nothing
     Base.sigatomic_begin()
@@ -141,13 +364,10 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
                 put!(backend.response_channel, Pair{Any, Bool}(lasterr, true))
             else
                 backend.in_eval = true
-                if !isempty(install_packages_hooks)
-                    check_for_missing_packages_and_run_hooks(ast)
-                end
                 for xf in backend.ast_transforms
                     ast = Base.invokelatest(xf, ast)
                 end
-                value = Core.eval(mod, ast)
+                value = toplevel_eval_with_hooks(mod, ast)
                 backend.in_eval = false
                 setglobal!(Base.MainInclude, :ans, value)
                 put!(backend.response_channel, Pair{Any, Bool}(value, false))
@@ -170,33 +390,49 @@ function check_for_missing_packages_and_run_hooks(ast)
     mods = modules_to_be_loaded(ast)
     filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
     if !isempty(mods)
+        isempty(install_packages_hooks) && load_pkg()
         for f in install_packages_hooks
             Base.invokelatest(f, mods) && return
         end
     end
 end
 
-function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+function _modules_to_be_loaded!(ast::Expr, mods::Vector{Symbol})
     ast.head === :quote && return mods # don't search if it's not going to be run during this eval
     if ast.head === :using || ast.head === :import
         for arg in ast.args
             arg = arg::Expr
             arg1 = first(arg.args)
             if arg1 isa Symbol # i.e. `Foo`
-                if arg1 != :. # don't include local imports
+                if arg1 != :. # don't include local import `import .Foo`
                     push!(mods, arg1)
                 end
             else # i.e. `Foo: bar`
-                push!(mods, first((arg1::Expr).args))
+                sym = first((arg1::Expr).args)::Symbol
+                if sym != :. # don't include local import `import .Foo: a`
+                    push!(mods, sym)
+                end
             end
         end
     end
-    for arg in ast.args
-        if isexpr(arg, (:block, :if, :using, :import))
-            modules_to_be_loaded(arg, mods)
+    if ast.head !== :thunk
+        for arg in ast.args
+            if isexpr(arg, (:block, :if, :using, :import))
+                _modules_to_be_loaded!(arg, mods)
+            end
+        end
+    else
+        code = ast.args[1]
+        for arg in code.code
+            isa(arg, Expr) || continue
+            _modules_to_be_loaded!(arg, mods)
         end
     end
-    filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
+end
+
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    _modules_to_be_loaded!(ast, mods)
+    filter!(mod::Symbol -> !in(mod, (:Base, :Main, :Core)), mods) # Exclude special non-package modules
     return unique(mods)
 end
 
@@ -248,16 +484,74 @@ function repl_backend_loop(backend::REPLBackend, get_module::Function)
     return nothing
 end
 
-struct REPLDisplay{R<:AbstractREPL} <: AbstractDisplay
-    repl::R
+SHOW_MAXIMUM_BYTES::Int = 1_048_576
+
+# Limit printing during REPL display
+mutable struct LimitIO{IO_t <: IO} <: IO
+    io::IO_t
+    maxbytes::Int
+    n::Int # max bytes to write
+end
+LimitIO(io::IO, maxbytes) = LimitIO(io, maxbytes, 0)
+
+struct LimitIOException <: Exception
+    maxbytes::Int
+end
+
+function Base.showerror(io::IO, e::LimitIOException)
+    print(io, "$LimitIOException: aborted printing after attempting to print more than $(Base.format_bytes(e.maxbytes)) within a `LimitIO`.")
+end
+
+function Base.write(io::LimitIO, v::UInt8)
+    io.n > io.maxbytes && throw(LimitIOException(io.maxbytes))
+    n_bytes = write(io.io, v)
+    io.n += n_bytes
+    return n_bytes
 end
 
-==(a::REPLDisplay, b::REPLDisplay) = a.repl === b.repl
+# Semantically, we only need to override `Base.write`, but we also
+# override `unsafe_write` for performance.
+function Base.unsafe_write(limiter::LimitIO, p::Ptr{UInt8}, nb::UInt)
+    # already exceeded? throw
+    limiter.n > limiter.maxbytes && throw(LimitIOException(limiter.maxbytes))
+    remaining = limiter.maxbytes - limiter.n # >= 0
+
+    # Not enough bytes left; we will print up to the limit, then throw
+    if remaining < nb
+        if remaining > 0
+            Base.unsafe_write(limiter.io, p, remaining)
+        end
+        throw(LimitIOException(limiter.maxbytes))
+    end
+
+    # We won't hit the limit so we'll write the full `nb` bytes
+    bytes_written = Base.unsafe_write(limiter.io, p, nb)::Union{Int,UInt}
+    limiter.n += bytes_written
+    return bytes_written
+end
+
+struct REPLDisplay{Repl<:AbstractREPL} <: AbstractDisplay
+    repl::Repl
+end
+
+function show_limited(io::IO, mime::MIME, x)
+    try
+        # We wrap in a LimitIO to limit the amount of printing.
+        # We unpack `IOContext`s, since we will pass the properties on the outside.
+        inner = io isa IOContext ? io.io : io
+        wrapped_limiter = IOContext(LimitIO(inner, SHOW_MAXIMUM_BYTES), io)
+        # `show_repl` to allow the hook with special syntax highlighting
+        show_repl(wrapped_limiter, mime, x)
+    catch e
+        e isa LimitIOException || rethrow()
+        printstyled(io, """…[printing stopped after displaying $(Base.format_bytes(e.maxbytes)); call `show(stdout, MIME"text/plain"(), ans)` to print without truncation]"""; color=:light_yellow, bold=true)
+    end
+end
 
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
     x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
-        io = IOContext(io, :limit => true, :module => active_module(d)::Module)
+        io = IOContext(io, :limit => true, :module => Base.active_module(d)::Module)
         if d.repl isa LineEditREPL
             mistate = d.repl.mistate
             mode = LineEdit.mode(mistate)
@@ -270,21 +564,41 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
         end
-        show(io, mime, x[])
+        show_limited(io, mime, x[])
         println(io)
     end
     return nothing
 end
+
 display(d::REPLDisplay, x) = display(d, MIME("text/plain"), x)
 
+show_repl(io::IO, mime::MIME"text/plain", x) = show(io, mime, x)
+
+show_repl(io::IO, ::MIME"text/plain", ex::Expr) =
+    print(io, JuliaSyntaxHighlighting.highlight(
+        sprint(show, ex, context=IOContext(io, :color => false))))
+
 function print_response(repl::AbstractREPL, response, show_value::Bool, have_color::Bool)
     repl.waserror = response[2]
     with_repl_linfo(repl) do io
-        io = IOContext(io, :module => active_module(repl)::Module)
+        io = IOContext(io, :module => Base.active_module(repl)::Module)
         print_response(io, response, show_value, have_color, specialdisplay(repl))
     end
     return nothing
 end
+
+function repl_display_error(errio::IO, @nospecialize errval)
+    # this will be set to true if types in the stacktrace are truncated
+    limitflag = Ref(false)
+    errio = IOContext(errio, :stacktrace_types_limited => limitflag)
+    Base.invokelatest(Base.display_error, errio, errval)
+    if limitflag[]
+        print(errio, "Some type information was truncated. Use `show(err)` to see complete types.")
+        println(errio)
+    end
+    return nothing
+end
+
 function print_response(errio::IO, response, show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
     Base.sigatomic_begin()
     val, iserr = response
@@ -294,7 +608,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
             if iserr
                 val = Base.scrub_repl_backtrace(val)
                 Base.istrivialerror(val) || setglobal!(Base.MainInclude, :err, val)
-                Base.invokelatest(Base.display_error, errio, val)
+                repl_display_error(errio, val)
             else
                 if val !== nothing && show_value
                     try
@@ -317,7 +631,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 try
                     excs = Base.scrub_repl_backtrace(current_exceptions())
                     setglobal!(Base.MainInclude, :err, excs)
-                    Base.invokelatest(Base.display_error, errio, excs)
+                    repl_display_error(errio, excs)
                 catch e
                     # at this point, only print the name of the type as a Symbol to
                     # minimize the possibility of further errors.
@@ -368,7 +682,7 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
             Core.println(Core.stderr, e)
             Core.println(Core.stderr, catch_backtrace())
         end
-    get_module = () -> active_module(repl)
+    get_module = () -> Base.active_module(repl)
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
         errormonitor(t)
@@ -452,6 +766,7 @@ mutable struct LineEditREPL <: AbstractREPL
     answer_color::String
     shell_color::String
     help_color::String
+    pkg_color::String
     history_file::Bool
     in_shell::Bool
     in_help::Bool
@@ -464,13 +779,13 @@ mutable struct LineEditREPL <: AbstractREPL
     interface::ModalInterface
     backendref::REPLBackendRef
     frontend_task::Task
-    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,in_help,envcolors)
+    function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,in_help,envcolors)
         opts = Options()
         opts.hascolor = hascolor
         if !hascolor
             opts.beep_colors = [""]
         end
-        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,
+        new(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,pkg_color,history_file,in_shell,
             in_help,envcolors,false,nothing, opts, nothing, Tuple{String,Int}[])
     end
 end
@@ -487,6 +802,7 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         hascolor ? Base.answer_color() : "",
         hascolor ? Base.text_colors[:red] : "",
         hascolor ? Base.text_colors[:yellow] : "",
+        hascolor ? Base.text_colors[:blue] : "",
         false, false, false, envcolors
     )
 
@@ -498,13 +814,11 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-function active_module() # this method is also called from Base
-    isdefined(Base, :active_repl) || return Main
-    return active_module(Base.active_repl::AbstractREPL)
-end
-active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
-active_module(::AbstractREPL) = Main
-active_module(d::REPLDisplay) = active_module(d.repl)
+Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
+Base.active_module(::AbstractREPL) = Main
+Base.active_module(d::REPLDisplay) = Base.active_module(d.repl)
+
+setmodifiers!(c::CompletionProvider, m::LineEdit.Modifiers) = nothing
 
 setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
 
@@ -514,37 +828,37 @@ setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers =
 Set `mod` as the default contextual module in the REPL,
 both for evaluating expressions and printing them.
 """
-function activate(mod::Module=Main)
+function activate(mod::Module=Main; interactive_utils::Bool=true)
     mistate = (Base.active_repl::LineEditREPL).mistate
     mistate === nothing && return nothing
     mistate.active_module = mod
-    Base.load_InteractiveUtils(mod)
+    interactive_utils && Base.load_InteractiveUtils(mod)
     return nothing
 end
 
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
-function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module)
+function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module; hint::Bool=false)
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift)
+    ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift, hint)
     c.modifiers = LineEdit.Modifiers()
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete
 end
 
-function complete_line(c::ShellCompletionProvider, s::PromptState)
+function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=false)
     # First parse everything up to the current position
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = shell_completions(full, lastindex(partial))
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = shell_completions(full, lastindex(partial), hint)
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete
 end
 
-function complete_line(c::LatexCompletions, s)
+function complete_line(c::LatexCompletions, s; hint::Bool=false)
     partial = beforecursor(LineEdit.buffer(s))
     full = LineEdit.input_string(s)::String
-    ret, range, should_complete = bslash_completions(full, lastindex(partial))[2]
-    return unique!(map(completion_text, ret)), partial[range], should_complete
+    ret, range, should_complete = bslash_completions(full, lastindex(partial), hint)[2]
+    return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete
 end
 
 with_repl_linfo(f, repl) = f(outstream(repl))
@@ -868,7 +1182,7 @@ end
 
 find_hist_file() = get(ENV, "JULIA_HISTORY",
                        !isempty(DEPOT_PATH) ? joinpath(DEPOT_PATH[1], "logs", "repl_history.jl") :
-                       error("DEPOT_PATH is empty and and ENV[\"JULIA_HISTORY\"] not set."))
+                       error("DEPOT_PATH is empty and ENV[\"JULIA_HISTORY\"] not set."))
 
 backend(r::AbstractREPL) = r.backendref
 
@@ -941,7 +1255,7 @@ enable_promptpaste(v::Bool) = JL_PROMPT_PASTE[] = v
 
 function contextual_prompt(repl::LineEditREPL, prompt::Union{String,Function})
     function ()
-        mod = active_module(repl)
+        mod = Base.active_module(repl)
         prefix = mod == Main ? "" : string('(', mod, ") ")
         pr = prompt isa String ? prompt : prompt()
         prefix * pr
@@ -955,6 +1269,7 @@ setup_interface(
     extra_repl_keymap::Any = repl.options.extra_keymap
 ) = setup_interface(repl, hascolor, extra_repl_keymap)
 
+
 # This non keyword method can be precompiled which is important
 function setup_interface(
     repl::LineEditREPL,
@@ -1002,7 +1317,7 @@ function setup_interface(
         on_enter = return_callback)
 
     # Setup help mode
-    help_mode = Prompt(contextual_prompt(repl, "help?> "),
+    help_mode = Prompt(contextual_prompt(repl, HELP_PROMPT),
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -1030,6 +1345,34 @@ function setup_interface(
         end,
         sticky = true)
 
+    # Set up dummy Pkg mode that will be replaced once Pkg is loaded
+    # use 6 dots to occupy the same space as the most likely "@v1.xx" env name
+    dummy_pkg_mode = Prompt(Pkg_promptf,
+        prompt_prefix = hascolor ? repl.pkg_color : "",
+        prompt_suffix = hascolor ?
+        (repl.envcolors ? Base.input_color : repl.input_color) : "",
+        repl = repl,
+        complete = LineEdit.EmptyCompletionProvider(),
+        on_done = respond(line->nothing, repl, julia_prompt),
+        on_enter = function (s::MIState)
+                # This is hit when the user tries to execute a command before the real Pkg mode has been
+                # switched to. Ok to do this even if Pkg is loading on the other task because of the loading lock.
+                REPLExt = load_pkg()
+                if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                    for mode in repl.interface.modes
+                        if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                            # pkg mode
+                            buf = copy(LineEdit.buffer(s))
+                            transition(s, mode) do
+                                LineEdit.state(s, mode).input_buffer = buf
+                            end
+                        end
+                    end
+                end
+                return true
+            end,
+        sticky = true)
+
 
     ################################# Stage II #############################
 
@@ -1037,7 +1380,8 @@ function setup_interface(
     # We will have a unified history for all REPL modes
     hp = REPLHistoryProvider(Dict{Symbol,Prompt}(:julia => julia_prompt,
                                                  :shell => shell_mode,
-                                                 :help  => help_mode))
+                                                 :help  => help_mode,
+                                                 :pkg  => dummy_pkg_mode))
     if repl.history_file
         try
             hist_path = find_hist_file()
@@ -1060,6 +1404,7 @@ function setup_interface(
     julia_prompt.hist = hp
     shell_mode.hist = hp
     help_mode.hist = hp
+    dummy_pkg_mode.hist = hp
 
     julia_prompt.on_done = respond(x->Base.parse_input_line(x,filename=repl_filename(repl,hp)), repl, julia_prompt)
 
@@ -1069,8 +1414,8 @@ function setup_interface(
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
-    jl_prompt_regex = r"^In \[[0-9]+\]: |^(?:\(.+\) )?julia> "
-    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+    jl_prompt_regex = Regex("^In \\[[0-9]+\\]: |^(?:\\(.+\\) )?$JULIA_PROMPT")
+    pkg_prompt_regex = Regex("^(?:\\(.+\\) )?$PKG_PROMPT")
 
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
@@ -1086,6 +1431,7 @@ function setup_interface(
                 end
             else
                 edit_insert(s, ';')
+                LineEdit.check_for_hint(s) && LineEdit.refresh_line(s)
             end
         end,
         '?' => function (s::MIState,o...)
@@ -1096,6 +1442,44 @@ function setup_interface(
                 end
             else
                 edit_insert(s, '?')
+                LineEdit.check_for_hint(s) && LineEdit.refresh_line(s)
+            end
+        end,
+        ']' => function (s::MIState,o...)
+            if isempty(s) || position(LineEdit.buffer(s)) == 0
+                buf = copy(LineEdit.buffer(s))
+                transition(s, dummy_pkg_mode) do
+                    LineEdit.state(s, dummy_pkg_mode).input_buffer = buf
+                end
+                # load Pkg on another thread if available so that typing in the dummy Pkg prompt
+                # isn't blocked, but instruct the main REPL task to do the transition via s.async_channel
+                t_replswitch = Threads.@spawn begin
+                    REPLExt = load_pkg()
+                    if REPLExt isa Module && isdefined(REPLExt, :PkgCompletionProvider)
+                        put!(s.async_channel,
+                            function (s::MIState)
+                                LineEdit.mode(s) === dummy_pkg_mode || return :ok
+                                for mode in repl.interface.modes
+                                    if mode isa LineEdit.Prompt && mode.complete isa REPLExt.PkgCompletionProvider
+                                        buf = copy(LineEdit.buffer(s))
+                                        transition(s, mode) do
+                                            LineEdit.state(s, mode).input_buffer = buf
+                                        end
+                                        if !isempty(s) && @invokelatest(LineEdit.check_for_hint(s))
+                                            @invokelatest(LineEdit.refresh_line(s))
+                                        end
+                                        break
+                                    end
+                                end
+                                return :ok
+                            end
+                        )
+                    end
+                end
+                Base.errormonitor(t_replswitch)
+            else
+                edit_insert(s, ']')
+                LineEdit.check_for_hint(s) && LineEdit.refresh_line(s)
             end
         end,
 
@@ -1230,7 +1614,7 @@ function setup_interface(
                     # execute the statement
                     terminal = LineEdit.terminal(s) # This is slightly ugly but ok for now
                     raw!(terminal, false) && disable_bracketed_paste(terminal)
-                    LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
+                    @invokelatest LineEdit.mode(s).on_done(s, LineEdit.buffer(s), true)
                     raw!(terminal, true) && enable_bracketed_paste(terminal)
                     LineEdit.push_undo(s) # when the last line is incomplete
                 end
@@ -1276,9 +1660,9 @@ function setup_interface(
     b = Dict{Any,Any}[skeymap, mk, prefix_keymap, LineEdit.history_keymap, LineEdit.default_keymap, LineEdit.escape_defaults]
     prepend!(b, extra_repl_keymap)
 
-    shell_mode.keymap_dict = help_mode.keymap_dict = LineEdit.keymap(b)
+    shell_mode.keymap_dict = help_mode.keymap_dict = dummy_pkg_mode.keymap_dict = LineEdit.keymap(b)
 
-    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, search_prompt, prefix_prompt]
+    allprompts = LineEdit.TextInterface[julia_prompt, shell_mode, help_mode, dummy_pkg_mode, search_prompt, prefix_prompt]
     return ModalInterface(allprompts)
 end
 
@@ -1367,10 +1751,80 @@ ends_with_semicolon(code::AbstractString) = ends_with_semicolon(String(code))
 ends_with_semicolon(code::Union{String,SubString{String}}) =
     contains(_rm_strings_and_comments(code), r";\s*$")
 
+function banner(io::IO = stdout; short = false)
+    if Base.GIT_VERSION_INFO.tagged_commit
+        commit_string = Base.TAGGED_RELEASE_BANNER
+    elseif isempty(Base.GIT_VERSION_INFO.commit)
+        commit_string = ""
+    else
+        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - Base.GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
+        days = max(0, days)
+        unit = days == 1 ? "day" : "days"
+        distance = Base.GIT_VERSION_INFO.fork_master_distance
+        commit = Base.GIT_VERSION_INFO.commit_short
+
+        if distance == 0
+            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
+        else
+            branch = Base.GIT_VERSION_INFO.branch
+            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
+        end
+    end
+
+    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
+
+    if get(io, :color, false)::Bool
+        c = Base.text_colors
+        tx = c[:normal] # text
+        jl = c[:normal] # julia
+        d1 = c[:bold] * c[:blue]    # first dot
+        d2 = c[:bold] * c[:red]     # second dot
+        d3 = c[:bold] * c[:green]   # third dot
+        d4 = c[:bold] * c[:magenta] # fourth dot
+
+        if short
+            print(io,"""
+              $(d3)o$(tx)  | Version $(VERSION)$(commit_date)
+             $(d2)o$(tx) $(d4)o$(tx) | $(commit_string)
+            """)
+        else
+            print(io,"""               $(d3)_$(tx)
+               $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
+              $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
+               $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
+              $(jl)| | | | | | |/ _` |$(tx)  |
+              $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
+             $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
+            $(jl)|__/$(tx)                   |
+
+            """)
+        end
+    else
+        if short
+            print(io,"""
+              o  |  Version $(VERSION)$(commit_date)
+             o o |  $(commit_string)
+            """)
+        else
+            print(io,"""
+                           _
+               _       _ _(_)_     |  Documentation: https://docs.julialang.org
+              (_)     | (_) (_)    |
+               _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
+              | | | | | | |/ _` |  |
+              | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
+             _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
+            |__/                   |
+
+            """)
+        end
+    end
+end
+
 function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     repl.frontend_task = current_task()
     have_color = hascolor(repl)
-    Base.banner(repl.stream)
+    banner(repl.stream)
     d = REPLDisplay(repl)
     dopushdisplay = !in(d,Base.Multimedia.displays)
     dopushdisplay && pushdisplay(d)
@@ -1378,7 +1832,7 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
         if have_color
             print(repl.stream,repl.prompt_color)
         end
-        print(repl.stream, "julia> ")
+        print(repl.stream, JULIA_PROMPT)
         if have_color
             print(repl.stream, input_color(repl))
         end
@@ -1402,7 +1856,7 @@ module Numbered
 
 using ..REPL
 
-__current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+__current_ast_transforms() = Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
 
 function repl_eval_counter(hp)
     return length(hp.history) - hp.start_idx
@@ -1418,6 +1872,7 @@ function out_transform(@nospecialize(x), n::Ref{Int})
 end
 
 function get_usings!(usings, ex)
+    ex isa Expr || return usings
     # get all `using` and `import` statements which are at the top level
     for (i, arg) in enumerate(ex.args)
         if Base.isexpr(arg, :toplevel)
@@ -1463,14 +1918,13 @@ end
 
 function __current_ast_transforms(backend)
     if backend === nothing
-        isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+        Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
     else
         backend.ast_transforms
     end
 end
 
-
-function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl::LineEditREPL, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
@@ -1492,4 +1946,13 @@ end
 
 import .Numbered.numbered_prompt!
 
+# this assignment won't survive precompilation,
+# but will stick if REPL is baked into a sysimg.
+# Needs to occur after this module is finished.
+Base.REPL_MODULE_REF[] = REPL
+
+if Base.generating_output()
+    include("precompile.jl")
+end
+
 end # module
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 20d26953eb22b..e8aa1188ec213 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -2,12 +2,18 @@
 
 module REPLCompletions
 
-export completions, shell_completions, bslash_completions, completion_text
+export completions, shell_completions, bslash_completions, completion_text, named_completion
 
-using Core: CodeInfo, MethodInstance, CodeInstance, Const
-const CC = Core.Compiler
+using Core: Const
+# We want to insulate the REPLCompletion module from any changes the user may
+# make to the compiler, since it runs by default and the system becomes unusable
+# if it breaks.
+const CC = Base.Compiler
 using Base.Meta
-using Base: propertynames, something
+using Base: propertynames, something, IdSet
+using Base.Filesystem: _readdirx
+
+using ..REPL.LineEdit: NamedCompletion
 
 abstract type Completion end
 
@@ -19,6 +25,10 @@ struct KeywordCompletion <: Completion
     keyword::String
 end
 
+struct KeyvalCompletion <: Completion
+    keyval::String
+end
+
 struct PathCompletion <: Completion
     path::String
 end
@@ -49,8 +59,10 @@ struct MethodCompletion <: Completion
 end
 
 struct BslashCompletion <: Completion
-    bslash::String
+    completion::String # what is actually completed, for example "\trianglecdot"
+    name::String # what is displayed, for example "◬ \trianglecdot"
 end
+BslashCompletion(completion::String) = BslashCompletion(completion, completion)
 
 struct ShellCompletion <: Completion
     text::String
@@ -99,19 +111,28 @@ end
 
 _completion_text(c::TextCompletion) = c.text
 _completion_text(c::KeywordCompletion) = c.keyword
+_completion_text(c::KeyvalCompletion) = c.keyval
 _completion_text(c::PathCompletion) = c.path
 _completion_text(c::ModuleCompletion) = c.mod
 _completion_text(c::PackageCompletion) = c.package
 _completion_text(c::PropertyCompletion) = sprint(Base.show_sym, c.property)
 _completion_text(c::FieldCompletion) = sprint(Base.show_sym, c.field)
 _completion_text(c::MethodCompletion) = repr(c.method)
-_completion_text(c::BslashCompletion) = c.bslash
 _completion_text(c::ShellCompletion) = c.text
 _completion_text(c::DictCompletion) = c.key
 _completion_text(c::KeywordArgumentCompletion) = c.kwarg*'='
 
 completion_text(c) = _completion_text(c)::String
 
+named_completion(c::BslashCompletion) = NamedCompletion(c.completion, c.name)
+
+function named_completion(c)
+    text = completion_text(c)::String
+    return NamedCompletion(text, text)
+end
+
+named_completion_completion(c) = named_completion(c).completion::String
+
 const Completions = Tuple{Vector{Completion}, UnitRange{Int}, Bool}
 
 function completes_global(x, name)
@@ -129,112 +150,269 @@ function appendmacro!(syms, macros, needle, endchar)
     end
 end
 
-function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool = false, imported::Bool = false)
-    ssyms = names(mod, all = all, imported = imported)
+function append_filtered_mod_names!(ffunc::Function, suggestions::Vector{Completion},
+                                    mod::Module, name::String, complete_internal_only::Bool)
+    imported = usings = !complete_internal_only
+    ssyms = names(mod; all=true, imported, usings)
     filter!(ffunc, ssyms)
     macros = filter(x -> startswith(String(x), "@" * name), ssyms)
+
+    # don't complete string and command macros when the input matches the internal name like `r_` to `r"`
+    if !startswith(name, "@")
+        filter!(macros) do m
+            s = String(m)
+            if endswith(s, "_str") || endswith(s, "_cmd")
+                occursin(name, first(s, length(s)-4))
+            else
+                true
+            end
+        end
+    end
+
     syms = String[sprint((io,s)->Base.show_sym(io, s; allow_macroname=true), s) for s in ssyms if completes_global(String(s), name)]
     appendmacro!(syms, macros, "_str", "\"")
     appendmacro!(syms, macros, "_cmd", "`")
-    return [ModuleCompletion(mod, sym) for sym in syms]
+    for sym in syms
+        push!(suggestions, ModuleCompletion(mod, sym))
+    end
+    return suggestions
 end
 
 # REPL Symbol Completions
-function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc), context_module::Module=Main)
-    mod = context_module
-
-    lookup_module = true
-    t = Union{}
-    val = nothing
-    if ex !== nothing
-        res = repl_eval_ex(ex, context_module)
+function complete_symbol!(suggestions::Vector{Completion},
+                          @nospecialize(prefix), name::String, context_module::Module;
+                          complete_modules_only::Bool=false,
+                          shift::Bool=false)
+    local mod, t, val
+    complete_internal_only = false
+    if prefix !== nothing
+        res = repl_eval_ex(prefix, context_module)
         res === nothing && return Completion[]
         if res isa Const
             val = res.val
             if isa(val, Module)
                 mod = val
-                lookup_module = true
+                if !shift
+                    # when module is explicitly accessed, show internal bindings that are
+                    # defined by the module, unless shift key is pressed
+                    complete_internal_only = true
+                end
             else
-                lookup_module = false
                 t = typeof(val)
             end
         else
-            lookup_module = false
             t = CC.widenconst(res)
         end
+    else
+        mod = context_module
     end
 
-    suggestions = Completion[]
-    if lookup_module
-        # We will exclude the results that the user does not want, as well
-        # as excluding Main.Main.Main, etc., because that's most likely not what
-        # the user wants
-        p = let mod=mod, modname=nameof(mod)
-            s->(!Base.isdeprecated(mod, s) && s != modname && ffunc(mod, s)::Bool)
-        end
-        # Looking for a binding in a module
-        if mod == context_module
-            # Also look in modules we got through `using`
-            mods = ccall(:jl_module_usings, Any, (Any,), context_module)::Vector
-            for m in mods
-                append!(suggestions, filtered_mod_names(p, m::Module, name))
+    if @isdefined(mod) # lookup names available within the module
+        let modname = nameof(mod),
+            is_main = mod===Main
+            append_filtered_mod_names!(suggestions, mod, name, complete_internal_only) do s::Symbol
+                if Base.isdeprecated(mod, s)
+                    return false
+                elseif s === modname
+                    return false # exclude `Main.Main.Main`, etc.
+                elseif complete_modules_only && !completes_module(mod, s)
+                    return false
+                elseif is_main && s === :MainInclude
+                    return false
+                end
+                return true
             end
-            append!(suggestions, filtered_mod_names(p, mod, name, true, true))
-        else
-            append!(suggestions, filtered_mod_names(p, mod, name, true, false))
         end
-    elseif val !== nothing # looking for a property of an instance
-        for property in propertynames(val, false)
-            # TODO: support integer arguments (#36872)
-            if property isa Symbol && startswith(string(property), name)
-                push!(suggestions, PropertyCompletion(val, property))
+    elseif @isdefined(val) # looking for a property of an instance
+        try
+            for property in propertynames(val, false)
+                # TODO: support integer arguments (#36872)
+                if property isa Symbol && startswith(string(property), name)
+                    push!(suggestions, PropertyCompletion(val, property))
+                end
             end
+        catch
         end
-    else
+    elseif @isdefined(t) && field_completion_eligible(t)
         # Looking for a member of a type
-        if t isa DataType && t != Any
-            # Check for cases like Type{typeof(+)}
-            if Base.isType(t)
-                t = typeof(t.parameters[1])
-            end
-            # Only look for fields if this is a concrete type
-            if isconcretetype(t)
-                fields = fieldnames(t)
-                for field in fields
-                    isa(field, Symbol) || continue # Tuple type has ::Int field name
-                    s = string(field)
-                    if startswith(s, name)
-                        push!(suggestions, FieldCompletion(t, field))
-                    end
-                end
+        add_field_completions!(suggestions, name, t)
+    end
+    return suggestions
+end
+
+completes_module(mod::Module, x::Symbol) =
+    Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getglobal(mod, x), Module)
+
+function add_field_completions!(suggestions::Vector{Completion}, name::String, @nospecialize(t))
+    if isa(t, Union)
+        add_field_completions!(suggestions, name, t.a)
+        add_field_completions!(suggestions, name, t.b)
+    else
+        @assert isconcretetype(t)
+        fields = fieldnames(t)
+        for field in fields
+            isa(field, Symbol) || continue # Tuple type has ::Int field name
+            s = string(field)
+            if startswith(s, name)
+                push!(suggestions, FieldCompletion(t, field))
             end
         end
     end
-    suggestions
+end
+
+const GENERIC_PROPERTYNAMES_METHOD = which(propertynames, (Any,))
+
+function field_completion_eligible(@nospecialize t)
+    if isa(t, Union)
+        return field_completion_eligible(t.a) && field_completion_eligible(t.b)
+    end
+    isconcretetype(t) || return false
+    # field completion is correct only when `getproperty` fallbacks to `getfield`
+    match = Base._which(Tuple{typeof(propertynames),t}; raise=false)
+    match === nothing && return false
+    return match.method === GENERIC_PROPERTYNAMES_METHOD
+end
+
+function complete_from_list!(suggestions::Vector{Completion}, T::Type, list::Vector{String}, s::String)
+    r = searchsorted(list, s)
+    i = first(r)
+    n = length(list)
+    while i <= n && startswith(list[i],s)
+        r = first(r):i
+        i += 1
+    end
+    for kw in list[r]
+        push!(suggestions, T(kw))
+    end
+    return suggestions
 end
 
 const sorted_keywords = [
     "abstract type", "baremodule", "begin", "break", "catch", "ccall",
-    "const", "continue", "do", "else", "elseif", "end", "export", "false",
+    "const", "continue", "do", "else", "elseif", "end", "export",
     "finally", "for", "function", "global", "if", "import",
     "let", "local", "macro", "module", "mutable struct",
     "primitive type", "quote", "return", "struct",
-    "true", "try", "using", "while"]
+    "try", "using", "while"]
 
-function complete_keyword(s::Union{String,SubString{String}})
-    r = searchsorted(sorted_keywords, s)
-    i = first(r)
-    n = length(sorted_keywords)
-    while i <= n && startswith(sorted_keywords[i],s)
-        r = first(r):i
-        i += 1
+complete_keyword!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeywordCompletion, sorted_keywords, s)
+
+const sorted_keyvals = ["false", "true"]
+
+complete_keyval!(suggestions::Vector{Completion}, s::String) =
+    complete_from_list!(suggestions, KeyvalCompletion, sorted_keyvals, s)
+
+function do_raw_escape(s)
+    # escape_raw_string with delim='`' and ignoring the rule for the ending \
+    return replace(s, r"(\\+)`" => s"\1\\`")
+end
+function do_shell_escape(s)
+    return Base.shell_escape_posixly(s)
+end
+function do_string_escape(s)
+    return escape_string(s, ('\"','$'))
+end
+
+const PATH_cache_lock = Base.ReentrantLock()
+const PATH_cache = Set{String}()
+PATH_cache_task::Union{Task,Nothing} = nothing # used for sync in tests
+next_cache_update::Float64 = 0.0
+function maybe_spawn_cache_PATH()
+    global PATH_cache_task, next_cache_update
+    @lock PATH_cache_lock begin
+        PATH_cache_task isa Task && !istaskdone(PATH_cache_task) && return
+        time() < next_cache_update && return
+        PATH_cache_task = Threads.@spawn begin
+            REPLCompletions.cache_PATH()
+            @lock PATH_cache_lock PATH_cache_task = nothing # release memory when done
+        end
+        Base.errormonitor(PATH_cache_task)
     end
-    Completion[KeywordCompletion(kw) for kw in sorted_keywords[r]]
 end
 
-function complete_path(path::AbstractString, pos::Int;
-                       use_envpath=false, shell_escape=false,
-                       string_escape=false)
+# caches all reachable files in PATH dirs
+function cache_PATH()
+    path = get(ENV, "PATH", nothing)
+    path isa String || return
+
+    global next_cache_update
+
+    # Calling empty! on PATH_cache would be annoying for async typing hints as completions would temporarily disappear.
+    # So keep track of what's added this time and at the end remove any that didn't appear this time from the global cache.
+    this_PATH_cache = Set{String}()
+
+    @debug "caching PATH files" PATH=path
+    pathdirs = split(path, @static Sys.iswindows() ? ";" : ":")
+
+    next_yield_time = time() + 0.01
+
+    t = @elapsed for pathdir in pathdirs
+        actualpath = try
+            realpath(pathdir)
+        catch ex
+            ex isa Base.IOError || rethrow()
+            # Bash doesn't expect every folder in PATH to exist, so neither shall we
+            continue
+        end
+
+        if actualpath != pathdir && in(actualpath, pathdirs)
+            # Remove paths which (after resolving links) are in the env path twice.
+            # Many distros eg. point /bin to /usr/bin but have both in the env path.
+            continue
+        end
+
+        path_entries = try
+            _readdirx(pathdir)
+        catch e
+            # Bash allows dirs in PATH that can't be read, so we should as well.
+            if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
+                continue
+            else
+                # We only handle IOError and ArgumentError here
+                rethrow()
+            end
+        end
+        for entry in path_entries
+            # In a perfect world, we would filter on whether the file is executable
+            # here, or even on whether the current user can execute the file in question.
+            try
+                if isfile(entry)
+                    @lock PATH_cache_lock push!(PATH_cache, entry.name)
+                    push!(this_PATH_cache, entry.name)
+                end
+            catch e
+                # `isfile()` can throw in rare cases such as when probing a
+                # symlink that points to a file within a directory we do not
+                # have read access to.
+                if isa(e, Base.IOError)
+                    continue
+                else
+                    rethrow()
+                end
+            end
+            if time() >= next_yield_time
+                yield() # to avoid blocking typing when -t1
+                next_yield_time = time() + 0.01
+            end
+        end
+    end
+
+    @lock PATH_cache_lock begin
+        intersect!(PATH_cache, this_PATH_cache) # remove entries from PATH_cache that weren't found this time
+        next_cache_update = time() + 10 # earliest next update can run is 10s after
+    end
+
+    @debug "caching PATH files took $t seconds" length(pathdirs) length(PATH_cache)
+    return PATH_cache
+end
+
+function complete_path(path::AbstractString;
+                       use_envpath=false,
+                       shell_escape=false,
+                       raw_escape=false,
+                       string_escape=false,
+                       contract_user=false)
     @assert !(shell_escape && string_escape)
     if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
         # if the path is just "~", don't consider the expanded username as a prefix
@@ -246,82 +424,71 @@ function complete_path(path::AbstractString, pos::Int;
     else
         dir, prefix = splitdir(path)
     end
-    local files
-    try
+    entries = try
         if isempty(dir)
-            files = readdir()
+            _readdirx()
         elseif isdir(dir)
-            files = readdir(dir)
+            _readdirx(dir)
         else
-            return Completion[], 0:-1, false
+            return Completion[], dir, false
         end
-    catch
-        return Completion[], 0:-1, false
+    catch ex
+        ex isa Base.IOError || rethrow()
+        return Completion[], dir, false
     end
 
     matches = Set{String}()
-    for file in files
-        if startswith(file, prefix)
-            p = joinpath(dir, file)
-            is_dir = try isdir(p) catch; false end
-            push!(matches, is_dir ? joinpath(file, "") : file)
+    for entry in entries
+        if startswith(entry.name, prefix)
+            is_dir = try isdir(entry) catch ex; ex isa Base.IOError ? false : rethrow() end
+            push!(matches, is_dir ? entry.name * "/" : entry.name)
         end
     end
 
-    if use_envpath && length(dir) == 0
-        # Look for files in PATH as well
-        local pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":")
-
-        for pathdir in pathdirs
-            local actualpath
-            try
-                actualpath = realpath(pathdir)
-            catch
-                # Bash doesn't expect every folder in PATH to exist, so neither shall we
-                continue
-            end
-
-            if actualpath != pathdir && in(actualpath,pathdirs)
-                # Remove paths which (after resolving links) are in the env path twice.
-                # Many distros eg. point /bin to /usr/bin but have both in the env path.
-                continue
+    if use_envpath && isempty(dir)
+        # Look for files in PATH as well. These are cached in `cache_PATH` in an async task to not block typing.
+        # If we cannot get lock because its still caching just pass over this so that typing isn't laggy.
+        maybe_spawn_cache_PATH() # only spawns if enough time has passed and the previous caching task has completed
+        @lock PATH_cache_lock begin
+            for file in PATH_cache
+                startswith(file, prefix) && push!(matches, file)
             end
+        end
+    end
 
-            local filesinpath
-            try
-                filesinpath = readdir(pathdir)
-            catch e
-                # Bash allows dirs in PATH that can't be read, so we should as well.
-                if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
-                    continue
-                else
-                    # We only handle IOError and ArgumentError here
-                    rethrow()
-                end
-            end
+    matches = ((shell_escape ? do_shell_escape(s) : string_escape ? do_string_escape(s) : s) for s in matches)
+    matches = ((raw_escape ? do_raw_escape(s) : s) for s in matches)
+    matches = Completion[PathCompletion(contract_user ? contractuser(s) : s) for s in matches]
+    return matches, dir, !isempty(matches)
+end
 
-            for file in filesinpath
-                # In a perfect world, we would filter on whether the file is executable
-                # here, or even on whether the current user can execute the file in question.
-                if startswith(file, prefix) && isfile(joinpath(pathdir, file))
-                    push!(matches, file)
-                end
-            end
+function complete_path(path::AbstractString,
+                       pos::Int;
+                       use_envpath=false,
+                       shell_escape=false,
+                       string_escape=false,
+                       contract_user=false)
+    ## TODO: enable this depwarn once Pkg is fixed
+    #Base.depwarn("complete_path with pos argument is deprecated because the return value [2] is incorrect to use", :complete_path)
+    paths, dir, success = complete_path(path; use_envpath, shell_escape, string_escape)
+    if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
+        # if the path is just "~", don't consider the expanded username as a prefix
+        if path == "~"
+            dir, prefix = homedir(), ""
+        else
+            dir, prefix = splitdir(homedir() * path[2:end])
         end
+    else
+        dir, prefix = splitdir(path)
     end
-
-    function do_escape(s)
-        return shell_escape ? replace(s, r"(\s|\\)" => s"\\\0") :
-               string_escape ? escape_string(s, ('\"','$')) :
-               s
+    startpos = pos - lastindex(prefix) + 1
+    Sys.iswindows() && map!(paths, paths) do c::PathCompletion
+        # emulation for unnecessarily complicated return value, since / is a
+        # perfectly acceptable path character which does not require quoting
+        # but is required by Pkg's awkward parser handling
+        return endswith(c.path, "/") ? PathCompletion(chop(c.path) * "\\\\") : c
     end
-
-    matchList = Completion[PathCompletion(do_escape(s)) for s in matches]
-    startpos = pos - lastindex(do_escape(prefix)) + 1
-    # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`,
-    # hence we need to add one to get the first index. This is also correct when considering
-    # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`.
-    return matchList, startpos:pos, !isempty(matchList)
+    return paths, startpos:pos, success
 end
 
 function complete_expanduser(path::AbstractString, r)
@@ -337,13 +504,11 @@ end
 # Returns a range that includes the method name in front of the first non
 # closed start brace from the end of the string.
 function find_start_brace(s::AbstractString; c_start='(', c_end=')')
-    braces = 0
     r = reverse(s)
     i = firstindex(r)
-    in_single_quotes = false
-    in_double_quotes = false
-    in_back_ticks = false
-    in_comment = 0
+    braces = in_comment = 0
+    in_single_quotes = in_double_quotes = in_back_ticks = false
+    num_single_quotes_in_string = count('\'', s)
     while i <= ncodeunits(r)
         c, i = iterate(r, i)
         if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '='
@@ -366,7 +531,9 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
                 braces += 1
             elseif c == c_end
                 braces -= 1
-            elseif c == '\''
+            elseif c == '\'' && num_single_quotes_in_string % 2 == 0
+                # ' can be a transpose too, so check if there are even number of 's in the string
+                # TODO: This probably needs to be more robust
                 in_single_quotes = true
             elseif c == '"'
                 in_double_quotes = true
@@ -409,55 +576,35 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
     return (startind:lastindex(s), method_name_end)
 end
 
-struct REPLInterpreterCache
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-REPLInterpreterCache() = REPLInterpreterCache(IdDict{MethodInstance,CodeInstance}())
-const REPL_INTERPRETER_CACHE = REPLInterpreterCache()
-
-function get_code_cache()
-    # XXX Avoid storing analysis results into the cache that persists across precompilation,
-    #     as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`.
-    #     Otherwise, `CodeInstance`s created by `REPLInterpreter`, that are much less optimized
-    #     that those produced by `NativeInterpreter`, will leak into the native code cache,
-    #     potentially causing runtime slowdown.
-    #     (see https://github.com/JuliaLang/julia/issues/48453).
-    if (@ccall jl_generating_output()::Cint) == 1
-        return REPLInterpreterCache()
-    else
-        return REPL_INTERPRETER_CACHE
-    end
-end
+struct REPLCacheToken end
 
 struct REPLInterpreter <: CC.AbstractInterpreter
-    repl_frame::CC.InferenceResult
+    limit_aggressive_inference::Bool
     world::UInt
     inf_params::CC.InferenceParams
     opt_params::CC.OptimizationParams
     inf_cache::Vector{CC.InferenceResult}
-    code_cache::REPLInterpreterCache
-    function REPLInterpreter(repl_frame::CC.InferenceResult;
+    function REPLInterpreter(limit_aggressive_inference::Bool=false;
                              world::UInt = Base.get_world_counter(),
-                             inf_params::CC.InferenceParams = CC.InferenceParams(),
+                             inf_params::CC.InferenceParams = CC.InferenceParams(;
+                                 aggressive_constant_propagation=true),
                              opt_params::CC.OptimizationParams = CC.OptimizationParams(),
-                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
-                             code_cache::REPLInterpreterCache = get_code_cache())
-        return new(repl_frame, world, inf_params, opt_params, inf_cache, code_cache)
+                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[])
+        return new(limit_aggressive_inference, world, inf_params, opt_params, inf_cache)
     end
 end
 CC.InferenceParams(interp::REPLInterpreter) = interp.inf_params
 CC.OptimizationParams(interp::REPLInterpreter) = interp.opt_params
-CC.get_world_counter(interp::REPLInterpreter) = interp.world
+CC.get_inference_world(interp::REPLInterpreter) = interp.world
 CC.get_inference_cache(interp::REPLInterpreter) = interp.inf_cache
-CC.code_cache(interp::REPLInterpreter) = CC.WorldView(interp.code_cache, CC.WorldRange(interp.world))
-CC.get(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-CC.getindex(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-CC.haskey(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-CC.setindex!(wvc::CC.WorldView{REPLInterpreterCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+CC.cache_owner(::REPLInterpreter) = REPLCacheToken()
 
 # REPLInterpreter is only used for type analysis, so it should disable optimization entirely
 CC.may_optimize(::REPLInterpreter) = false
 
+# REPLInterpreter doesn't need any sources to be cached, so discard them aggressively
+CC.transform_result_for_cache(::REPLInterpreter, ::CC.InferenceResult) = nothing
+
 # REPLInterpreter analyzes a top-level frame, so better to not bail out from it
 CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.InferenceState) = false
 
@@ -466,53 +613,68 @@ CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.Infer
 # Aggressive binding resolution poses challenges for the inference cache validation
 # (until https://github.com/JuliaLang/julia/issues/40399 is implemented).
 # To avoid the cache validation issues, `REPLInterpreter` only allows aggressive binding
-# resolution for top-level frame representing REPL input code (`repl_frame`) and for child
-# `getproperty` frames that are constant propagated from the `repl_frame`. This works, since
-# a.) these frames are never cached, and
-# b.) their results are only observed by the non-cached `repl_frame`.
+# resolution for top-level frame representing REPL input code and for child uncached frames
+# that are constant propagated from the top-level frame ("repl-frame"s). This works, even if
+# those global bindings are not constant and may be mutated in the future, since:
+# a.) "repl-frame"s are never cached, and
+# b.) mutable values are never observed by any cached frames.
 #
 # `REPLInterpreter` also aggressively concrete evaluate `:inconsistent` calls within
-# `repl_frame` to provide reasonable completions for lines like `Ref(Some(42))[].|`.
+# "repl-frame" to provide reasonable completions for lines like `Ref(Some(42))[].|`.
 # Aggressive concrete evaluation allows us to get accurate type information about complex
 # expressions that otherwise can not be constant folded, in a safe way, i.e. it still
 # doesn't evaluate effectful expressions like `pop!(xs)`.
 # Similarly to the aggressive binding resolution, aggressive concrete evaluation doesn't
-# present any cache validation issues because `repl_frame` is never cached.
+# present any cache validation issues because "repl-frame" is never cached.
+
+# `REPLInterpreter` is specifically used by `repl_eval_ex`, where all top-level frames are
+# `repl_frame` always. However, this assumption wouldn't stand if `REPLInterpreter` were to
+# be employed, for instance, by `typeinf_ext_toplevel`.
+is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode === CC.CACHE_MODE_NULL
+
+function is_call_graph_uncached(sv::CC.InferenceState)
+    CC.is_cached(sv) && return false
+    parent = CC.frame_parent(sv)
+    parent === nothing && return true
+    return is_call_graph_uncached(parent::CC.InferenceState)
+end
 
-is_repl_frame(interp::REPLInterpreter, sv::CC.InferenceState) = interp.repl_frame === sv.result
+isdefined_globalref(g::GlobalRef) = !iszero(ccall(:jl_globalref_boundp, Cint, (Any,), g))
 
 # aggressive global binding resolution within `repl_frame`
-function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef,
+function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef, bailed::Bool,
                                     sv::CC.InferenceState)
-    if is_repl_frame(interp, sv)
-        if CC.isdefined_globalref(g)
-            return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_graph_uncached(sv))
+        if isdefined_globalref(g)
+            return Pair{CC.RTEffects, Union{Nothing, Core.BindingPartition}}(
+                CC.RTEffects(Const(ccall(:jl_get_globalref_value, Any, (Any,), g)), Union{}, CC.EFFECTS_TOTAL), nothing)
         end
-        return Union{}
+        return Pair{CC.RTEffects, Union{Nothing, Core.BindingPartition}}(
+            CC.RTEffects(Union{}, UndefVarError, CC.EFFECTS_THROWS), nothing)
     end
-    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef,
+    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef, bailed::Bool,
                                               sv::CC.InferenceState)
 end
 
-function is_repl_frame_getproperty(interp::REPLInterpreter, sv::CC.InferenceState)
+function is_repl_frame_getproperty(sv::CC.InferenceState)
     def = sv.linfo.def
     def isa Method || return false
     def.name === :getproperty || return false
-    sv.cached && return false
-    return is_repl_frame(interp, sv.parent)
+    CC.is_cached(sv) && return false
+    return is_repl_frame(CC.frame_parent(sv))
 end
 
 # aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
 function CC.builtin_tfunction(interp::REPLInterpreter, @nospecialize(f),
                               argtypes::Vector{Any}, sv::CC.InferenceState)
-    if f === Core.getglobal && is_repl_frame_getproperty(interp, sv)
+    if f === Core.getglobal && (interp.limit_aggressive_inference ? is_repl_frame_getproperty(sv) : is_call_graph_uncached(sv))
         if length(argtypes) == 2
             a1, a2 = argtypes
             if isa(a1, Const) && isa(a2, Const)
                 a1val, a2val = a1.val, a2.val
                 if isa(a1val, Module) && isa(a2val, Symbol)
                     g = GlobalRef(a1val, a2val)
-                    if CC.isdefined_globalref(g)
+                    if isdefined_globalref(g)
                         return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
                     end
                     return Union{}
@@ -528,28 +690,46 @@ end
 function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f),
                                    result::CC.MethodCallResult, arginfo::CC.ArgInfo,
                                    sv::CC.InferenceState)
-    if is_repl_frame(interp, sv)
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_graph_uncached(sv))
         neweffects = CC.Effects(result.effects; consistent=CC.ALWAYS_TRUE)
-        result = CC.MethodCallResult(result.rt, result.edgecycle, result.edgelimited,
-                                     result.edge, neweffects)
+        result = CC.MethodCallResult(result.rt, result.exct, neweffects, result.edge,
+                                     result.edgecycle, result.edgelimited, result.volatile_inf_result)
+    end
+    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
+                                            result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                            sv::CC.InferenceState)
+    if ret === :semi_concrete_eval
+        # while the base eligibility check probably won't permit semi-concrete evaluation
+        # for `REPLInterpreter` (given it completely turns off optimization),
+        # this ensures we don't inadvertently enter irinterp
+        ret = :none
+    end
+    return ret
+end
+
+# allow constant propagation for mutable constants
+function CC.const_prop_argument_heuristic(interp::REPLInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
+    if !interp.limit_aggressive_inference
+        any(@nospecialize(a)->isa(a, Const), arginfo.argtypes) && return true # even if mutable
     end
-    return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
-                                             result::CC.MethodCallResult, arginfo::CC.ArgInfo,
-                                             sv::CC.InferenceState)
+    return @invoke CC.const_prop_argument_heuristic(interp::CC.AbstractInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
 end
 
-function resolve_toplevel_symbols!(mod::Module, src::Core.CodeInfo)
-    newsrc = copy(src)
-    @ccall jl_resolve_globals_in_ir(
-        #=jl_array_t *stmts=# newsrc.code::Any,
+function resolve_toplevel_symbols!(src::Core.CodeInfo, mod::Module)
+    @ccall jl_resolve_definition_effects_in_ir(
+        #=jl_array_t *stmts=# src.code::Any,
         #=jl_module_t *m=# mod::Any,
         #=jl_svec_t *sparam_vals=# Core.svec()::Any,
         #=int binding_effects=# 0::Int)::Cvoid
-    return newsrc
+    return src
 end
 
 # lower `ex` and run type inference on the resulting top-level expression
-function repl_eval_ex(@nospecialize(ex), context_module::Module)
+function repl_eval_ex(@nospecialize(ex), context_module::Module; limit_aggressive_inference::Bool=false)
+    if (isexpr(ex, :toplevel) || isexpr(ex, :tuple)) && !isempty(ex.args)
+        # get the inference result for the last expression
+        ex = ex.args[end]
+    end
     lwr = try
         Meta.lower(context_module, ex)
     catch # macro expansion failed, etc.
@@ -562,17 +742,13 @@ function repl_eval_ex(@nospecialize(ex), context_module::Module)
     isexpr(lwr, :thunk) || return nothing # lowered to `Expr(:error, ...)` or similar
     src = lwr.args[1]::Core.CodeInfo
 
+    resolve_toplevel_symbols!(src, context_module)
     # construct top-level `MethodInstance`
-    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
-    mi.specTypes = Tuple{}
-
-    mi.def = context_module
-    src = resolve_toplevel_symbols!(context_module, src)
-    @atomic mi.uninferred = src
+    mi = ccall(:jl_method_instance_for_thunk, Ref{Core.MethodInstance}, (Any, Any), src, context_module)
 
+    interp = REPLInterpreter(limit_aggressive_inference)
     result = CC.InferenceResult(mi)
-    interp = REPLInterpreter(result)
-    frame = CC.InferenceState(result, src, #=cache=#:no, interp)::CC.InferenceState
+    frame = CC.InferenceState(result, src, #=cache=#:no, interp)
 
     # NOTE Use the fixed world here to make `REPLInterpreter` robust against
     #      potential invalidations of `Core.Compiler` methods.
@@ -616,6 +792,26 @@ function complete_methods(ex_org::Expr, context_module::Module=Main, shift::Bool
 end
 
 MAX_ANY_METHOD_COMPLETIONS::Int = 10
+function recursive_explore_names!(seen::IdSet, callee_module::Module, initial_module::Module, exploredmodules::IdSet{Module}=IdSet{Module}())
+    push!(exploredmodules, callee_module)
+    for name in names(callee_module; all=true, imported=true)
+        if !Base.isdeprecated(callee_module, name) && !startswith(string(name), '#') && isdefined(initial_module, name)
+            func = getfield(callee_module, name)
+            if !isa(func, Module)
+                funct = Core.Typeof(func)
+                push!(seen, funct)
+            elseif isa(func, Module) && func ∉ exploredmodules
+                recursive_explore_names!(seen, func, initial_module, exploredmodules)
+            end
+        end
+    end
+end
+function recursive_explore_names(callee_module::Module, initial_module::Module)
+    seen = IdSet{Any}()
+    recursive_explore_names!(seen, callee_module, initial_module)
+    seen
+end
+
 function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool, shift::Bool)
     out = Completion[]
     args_ex, kwargs_ex, kwargs_flag = try
@@ -631,32 +827,8 @@ function complete_any_methods(ex_org::Expr, callee_module::Module, context_modul
     # semicolon for the ".?(" syntax
     moreargs && push!(args_ex, Vararg{Any})
 
-    seen = Base.IdSet()
-    for name in names(callee_module; all=true)
-        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name) && !startswith(string(name), '#')
-            func = getfield(callee_module, name)
-            if !isa(func, Module)
-                funct = Core.Typeof(func)
-                if !in(funct, seen)
-                    push!(seen, funct)
-                    complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
-                end
-            elseif callee_module === Main && isa(func, Module)
-                callee_module2 = func
-                for name in names(callee_module2)
-                    if !Base.isdeprecated(callee_module2, name) && isdefined(callee_module2, name) && !startswith(string(name), '#')
-                        func = getfield(callee_module, name)
-                        if !isa(func, Module)
-                            funct = Core.Typeof(func)
-                            if !in(funct, seen)
-                                push!(seen, funct)
-                                complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
-                            end
-                        end
-                    end
-                end
-            end
-        end
+    for seen_name in recursive_explore_names(callee_module, callee_module)
+        complete_methods!(out, seen_name, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
     end
 
     if !shift
@@ -665,7 +837,7 @@ function complete_any_methods(ex_org::Expr, callee_module::Module, context_modul
             isa(c, TextCompletion) && return false
             isa(c, MethodCompletion) || return true
             sig = Base.unwrap_unionall(c.method.sig)::DataType
-            return !all(T -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
+            return !all(@nospecialize(T) -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
         end
     end
 
@@ -760,34 +932,51 @@ const subscript_regex = Regex("^\\\\_[" * join(isdigit(k) || isletter(k) ? "$k"
 const superscripts = Dict(k[3]=>v[1] for (k,v) in latex_symbols if startswith(k, "\\^") && length(k)==3)
 const superscript_regex = Regex("^\\\\\\^[" * join(isdigit(k) || isletter(k) ? "$k" : "\\$k" for k in keys(superscripts)) * "]+\\z")
 
-# Aux function to detect whether we're right after a
-# using or import keyword
-function afterusing(string::String, startpos::Int)
-    (isempty(string) || startpos == 0) && return false
-    str = string[1:prevind(string,startpos)]
-    isempty(str) && return false
-    rstr = reverse(str)
-    r = findfirst(r"\s(gnisu|tropmi)\b", rstr)
-    r === nothing && return false
-    fr = reverseind(str, last(r))
-    return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end])
+# Aux function to detect whether we're right after a using or import keyword
+function get_import_mode(s::String)
+    # allow all of these to start with leading whitespace and macros like @eval and @eval(
+    # ^\s*(?:@\w+\s*(?:\(\s*)?)?
+
+    # match simple cases like `using |` and `import  |`
+    mod_import_match_simple = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s*$", s)
+    if mod_import_match_simple !== nothing
+        if mod_import_match_simple[1] == "using"
+            return :using_module
+        else
+            return :import_module
+        end
+    end
+    # match module import statements like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`
+    mod_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+(?:\s*,\s*[\w\.]+)*),?\s*$", s)
+    if mod_import_match !== nothing
+        if mod_import_match.captures[1] == "using"
+            return :using_module
+        else
+            return :import_module
+        end
+    end
+    # now match explicit name import statements like `using Foo: |` and `import Foo: bar, baz|`
+    name_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+)\s*:\s*([\w@!\s,]+)$", s)
+    if name_import_match !== nothing
+        if name_import_match[1] == "using"
+            return :using_name
+        else
+            return :import_name
+        end
+    end
+    return nothing
 end
 
-function close_path_completion(str, startpos, r, paths, pos)
-    length(paths) == 1 || return false  # Only close if there's a single choice...
-    _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path
-    path = expanduser(unescape_string(replace(_path, "\\\$"=>"\$", "\\\""=>"\"")))
+function close_path_completion(dir, path, str, pos)
+    path = unescape_string(replace(path, "\\\$"=>"\$"))
+    path = joinpath(dir, path)
     # ...except if it's a directory...
-    try
-        isdir(path)
-    catch e
-        e isa Base.IOError || rethrow() # `path` cannot be determined to be a file
-    end && return false
+    Base.isaccessibledir(path) && return false
     # ...and except if there's already a " at the cursor.
     return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
 end
 
-function bslash_completions(string::String, pos::Int)
+function bslash_completions(string::String, pos::Int, hint::Bool=false)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
         !(1 < slashpos && (string[prevind(string, slashpos)]=='\\')))
@@ -809,12 +998,10 @@ function bslash_completions(string::String, pos::Int)
         end
         # return possible matches; these cannot be mixed with regular
         # Julian completions as only latex / emoji symbols contain the leading \
-        if startswith(s, "\\:") # emoji
-            namelist = Iterators.filter(k -> startswith(k, s), keys(emoji_symbols))
-        else # latex
-            namelist = Iterators.filter(k -> startswith(k, s), keys(latex_symbols))
-        end
-        return (true, (Completion[BslashCompletion(name) for name in sort!(collect(namelist))], slashpos:pos, true))
+        symbol_dict = startswith(s, "\\:") ? emoji_symbols : latex_symbols
+        namelist = Iterators.filter(k -> startswith(k, s), keys(symbol_dict))
+        completions = Completion[BslashCompletion(name, "$(symbol_dict[name]) $name") for name in sort!(collect(namelist))]
+        return (true, (completions, slashpos:pos, true))
     end
     return (false, (Completion[], 0:-1, false))
 end
@@ -827,20 +1014,18 @@ function dict_identifier_key(str::String, tag::Symbol, context_module::Module=Ma
     else
         str_close = str
     end
-
     frange, end_of_identifier = find_start_brace(str_close, c_start='[', c_end=']')
     isempty(frange) && return (nothing, nothing, nothing)
-    obj = context_module
-    for name in split(str[frange[1]:end_of_identifier], '.')
-        Base.isidentifier(name) || return (nothing, nothing, nothing)
-        sym = Symbol(name)
-        isdefined(obj, sym) || return (nothing, nothing, nothing)
-        obj = getfield(obj, sym)
-    end
-    (isa(obj, AbstractDict) && length(obj)::Int < 1_000_000) || return (nothing, nothing, nothing)
+    objstr = str[1:end_of_identifier]
+    objex = Meta.parse(objstr, raise=false, depwarn=false)
+    objt = repl_eval_ex(objex, context_module)
+    isa(objt, Core.Const) || return (nothing, nothing, nothing)
+    obj = objt.val
+    isa(obj, AbstractDict) || return (nothing, nothing, nothing)
+    (Base.haslength(obj) && length(obj)::Int < 1_000_000) || return (nothing, nothing, nothing)
     begin_of_key = something(findnext(!isspace, str, nextind(str, end_of_identifier) + 1), # +1 for [
                              lastindex(str)+1)
-    return (obj::AbstractDict, str[begin_of_key:end], begin_of_key)
+    return (obj, str[begin_of_key:end], begin_of_key)
 end
 
 # This needs to be a separate non-inlined function, see #19441
@@ -884,7 +1069,8 @@ function identify_possible_method_completion(partial, last_idx)
 end
 
 # Provide completion for keyword arguments in function calls
-function complete_keyword_argument(partial, last_idx, context_module)
+function complete_keyword_argument(partial::String, last_idx::Int, context_module::Module;
+                                   shift::Bool=false)
     frange, ex, wordrange, = identify_possible_method_completion(partial, last_idx)
     fail = Completion[], 0:-1, frange
     ex.head === :call || is_broadcasting_expr(ex) || return fail
@@ -893,7 +1079,7 @@ function complete_keyword_argument(partial, last_idx, context_module)
     kwargs_flag == 2 && return fail # one of the previous kwargs is invalid
 
     methods = Completion[]
-    complete_methods!(methods, funct, Any[Vararg{Any}], kwargs_ex, -1, kwargs_flag == 1)
+    complete_methods!(methods, funct, Any[Vararg{Any}], kwargs_ex, shift ? -1 : MAX_METHOD_COMPLETIONS, kwargs_flag == 1)
     # TODO: use args_ex instead of Any[Vararg{Any}] and only provide kwarg completion for
     # method calls compatible with the current arguments.
 
@@ -918,12 +1104,17 @@ function complete_keyword_argument(partial, last_idx, context_module)
     end
 
     suggestions = Completion[KeywordArgumentCompletion(kwarg) for kwarg in kwargs]
-    append!(suggestions, complete_symbol(nothing, last_word, Returns(true), context_module))
 
-    return sort!(suggestions, by=completion_text), wordrange
+    # Only add these if not in kwarg space. i.e. not in `foo(; `
+    if kwargs_flag == 0
+        complete_symbol!(suggestions, #=prefix=#nothing, last_word, context_module; shift)
+        complete_keyval!(suggestions, last_word)
+    end
+
+    return sort!(suggestions, by=named_completion_completion), wordrange
 end
 
-function project_deps_get_completion_candidates(pkgstarts::String, project_file::String)
+function get_loading_candidates(pkgstarts::String, project_file::String)
     loading_candidates = String[]
     d = Base.parsed_toml(project_file)
     pkg = get(d, "name", nothing)::Union{String, Nothing}
@@ -936,17 +1127,31 @@ function project_deps_get_completion_candidates(pkgstarts::String, project_file:
             startswith(pkg, pkgstarts) && push!(loading_candidates, pkg)
         end
     end
-    return Completion[PackageCompletion(name) for name in loading_candidates]
+    return loading_candidates
 end
 
-function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ffunc::Function), context_module::Module, string::String, name::String, pos::Int, dotpos::Int, startpos::Int, comp_keywords=false)
-    ex = nothing
-    comp_keywords && append!(suggestions, complete_keyword(name))
-    if dotpos > 1 && string[dotpos] == '.'
-        s = string[1:dotpos-1]
+function complete_loading_candidates!(suggestions::Vector{Completion}, pkgstarts::String, project_file::String)
+    for name in get_loading_candidates(pkgstarts, project_file)
+        push!(suggestions, PackageCompletion(name))
+    end
+    return suggestions
+end
+
+function complete_identifiers!(suggestions::Vector{Completion},
+                               context_module::Module, string::String, name::String,
+                               pos::Int, separatorpos::Int, startpos::Int;
+                               comp_keywords::Bool=false,
+                               complete_modules_only::Bool=false,
+                               shift::Bool=false)
+    if comp_keywords
+        complete_keyword!(suggestions, name)
+        complete_keyval!(suggestions, name)
+    end
+    if separatorpos > 1 && (string[separatorpos] == '.' || string[separatorpos] == ':')
+        s = string[1:prevind(string, separatorpos)]
         # First see if the whole string up to `pos` is a valid expression. If so, use it.
-        ex = Meta.parse(s, raise=false, depwarn=false)
-        if isexpr(ex, :incomplete)
+        prefix = Meta.parse(s, raise=false, depwarn=false)
+        if isexpr(prefix, :incomplete)
             s = string[startpos:pos]
             # Heuristic to find the start of the expression. TODO: This would be better
             # done with a proper error-recovering parser.
@@ -978,48 +1183,101 @@ function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ff
             if something(findlast(in(non_identifier_chars), s), 0) < something(findlast(isequal('.'), s), 0)
                 lookup_name, name = rsplit(s, ".", limit=2)
                 name = String(name)
-
-                ex = Meta.parse(lookup_name, raise=false, depwarn=false)
+                prefix = Meta.parse(lookup_name, raise=false, depwarn=false)
+            end
+            isexpr(prefix, :incomplete) && (prefix = nothing)
+        elseif isexpr(prefix, (:using, :import))
+            arglast = prefix.args[end] # focus on completion to the last argument
+            if isexpr(arglast, :.)
+                # We come here for cases like:
+                # - `string`: "using Mod1.Mod2.M"
+                # - `ex`: :(using Mod1.Mod2)
+                # - `name`: "M"
+                # Now we transform `ex` to `:(Mod1.Mod2)` to allow `complete_symbol!` to
+                # complete for inner modules whose name starts with `M`.
+                # Note that `complete_modules_only=true` is set within `completions`
+                prefix = nothing
+                firstdot = true
+                for arg = arglast.args
+                    if arg === :.
+                        # override `context_module` if multiple `.` accessors are used
+                        if firstdot
+                            firstdot = false
+                        else
+                            context_module = parentmodule(context_module)
+                        end
+                    elseif arg isa Symbol
+                        if prefix === nothing
+                            prefix = arg
+                        else
+                            prefix = Expr(:., prefix, QuoteNode(arg))
+                        end
+                    else # invalid expression
+                        prefix = nothing
+                        break
+                    end
+                end
+            end
+        elseif isexpr(prefix, :call) && length(prefix.args) > 1
+            isinfix = s[end] != ')'
+            # A complete call expression that does not finish with ')' is an infix call.
+            if !isinfix
+                # Handle infix call argument completion of the form bar + foo(qux).
+                frange, end_of_identifier = find_start_brace(@view s[1:prevind(s, end)])
+                if !isempty(frange) # if find_start_brace fails to find the brace just continue
+                    isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == prefix.args[end]
+                end
+            end
+            if isinfix
+                prefix = prefix.args[end]
+            end
+        elseif isexpr(prefix, :macrocall) && length(prefix.args) > 1
+            # allow symbol completions within potentially incomplete macrocalls
+            if s[end] ≠ '`' && s[end] ≠ ')'
+                prefix = prefix.args[end]
             end
-            isexpr(ex, :incomplete) && (ex = nothing)
         end
+    else
+        prefix = nothing
     end
-    append!(suggestions, complete_symbol(ex, name, ffunc, context_module))
-    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+    complete_symbol!(suggestions, prefix, name, context_module; complete_modules_only, shift)
+    return suggestions
 end
 
-function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
+function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true, hint::Bool=false)
     # First parse everything up to the current position
     partial = string[1:pos]
     inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
 
-    # ?(x, y)TAB lists methods you can call with these objects
-    # ?(x, y TAB lists methods that take these objects as the first two arguments
-    # MyModule.?(x, y)TAB restricts the search to names in MyModule
-    rexm = match(r"(\w+\.|)\?\((.*)$", partial)
-    if rexm !== nothing
-        # Get the module scope
-        if isempty(rexm.captures[1])
-            callee_module = context_module
-        else
-            modname = Symbol(rexm.captures[1][1:end-1])
-            if isdefined(context_module, modname)
-                callee_module = getfield(context_module, modname)
-                if !isa(callee_module, Module)
+    if !hint # require a tab press for completion of these
+        # ?(x, y)TAB lists methods you can call with these objects
+        # ?(x, y TAB lists methods that take these objects as the first two arguments
+        # MyModule.?(x, y)TAB restricts the search to names in MyModule
+        rexm = match(r"(\w+\.|)\?\((.*)$", partial)
+        if rexm !== nothing
+            # Get the module scope
+            if isempty(rexm.captures[1])
+                callee_module = context_module
+            else
+                modname = Symbol(rexm.captures[1][1:end-1])
+                if isdefined(context_module, modname)
+                    callee_module = getfield(context_module, modname)
+                    if !isa(callee_module, Module)
+                        callee_module = context_module
+                    end
+                else
                     callee_module = context_module
                 end
-            else
-                callee_module = context_module
             end
-        end
-        moreargs = !endswith(rexm.captures[2], ')')
-        callstr = "_(" * rexm.captures[2]
-        if moreargs
-            callstr *= ')'
-        end
-        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
-        if isa(ex_org, Expr)
-            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+            moreargs = !endswith(rexm.captures[2], ')')
+            callstr = "_(" * rexm.captures[2]
+            if moreargs
+                callstr *= ')'
+            end
+            ex_org = Meta.parse(callstr, raise=false, depwarn=false)
+            if isa(ex_org, Expr)
+                return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+            end
         end
     end
 
@@ -1031,7 +1289,6 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         length(matches)>0 && return Completion[DictCompletion(identifier, match) for match in sort!(matches)], loc::Int:pos, true
     end
 
-    ffunc = Returns(true)
     suggestions = Completion[]
 
     # Check if this is a var"" string macro that should be completed like
@@ -1041,52 +1298,129 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
     # its invocation.
     varrange = findprev("var\"", string, pos)
 
+    expanded = nothing
+    was_expanded = false
+
     if varrange !== nothing
         ok, ret = bslash_completions(string, pos)
         ok && return ret
         startpos = first(varrange) + 4
-        dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
-        return complete_identifiers!(Completion[], ffunc, context_module, string,
-            string[startpos:pos], pos, dotpos, startpos)
+        separatorpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
+        name = string[startpos:pos]
+        complete_identifiers!(suggestions, context_module, string, name,
+                              pos, separatorpos, startpos;
+                              shift)
+        return sort!(unique!(named_completion, suggestions), by=named_completion_completion), (separatorpos+1):pos, true
     elseif inc_tag === :cmd
-        m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial))
-        startpos = nextind(partial, reverseind(partial, m.offset))
-        r = startpos:pos
-
-        # This expansion with "\\ "=>' ' replacement and shell_escape=true
-        # assumes the path isn't further quoted within the cmd backticks.
-        expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r)
-        expanded[3] && return expanded  # If user expansion available, return it
-
-        paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos,
-                                          shell_escape=true)
+        # TODO: should this call shell_completions instead of partially reimplementing it?
+        let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse
+            startpos = nextind(partial, reverseind(partial, m.offset))
+            r = startpos:pos
+            scs::String = string[r]
+
+            expanded = complete_expanduser(scs, r)
+            was_expanded = expanded[3]
+            if was_expanded
+                scs = (only(expanded[1])::PathCompletion).path
+                # If tab press, ispath and user expansion available, return it now
+                # otherwise see if we can complete the path further before returning with expanded ~
+                !hint && ispath(scs) && return expanded::Completions
+            end
 
-        return sort!(paths, by=p->p.path), r, success
+            path::String = replace(scs, r"(\\+)\g1(\\?)`" => "\1\2`") # fuzzy unescape_raw_string: match an even number of \ before ` and replace with half as many
+            # This expansion with "\\ "=>' ' replacement and shell_escape=true
+            # assumes the path isn't further quoted within the cmd backticks.
+            path = replace(path, r"\\ " => " ", r"\$" => "\$") # fuzzy shell_parse (reversed by shell_escape_posixly)
+            paths, dir, success = complete_path(path, shell_escape=true, raw_escape=true)
+
+            if success && !isempty(dir)
+                let dir = do_raw_escape(do_shell_escape(dir))
+                    # if escaping of dir matches scs prefix, remove that from the completions
+                    # otherwise make it the whole completion
+                    if endswith(dir, "/") && startswith(scs, dir)
+                        r = (startpos + sizeof(dir)):pos
+                    elseif startswith(scs, dir * "/")
+                        r = nextind(string, startpos + sizeof(dir)):pos
+                    else
+                        map!(paths, paths) do c::PathCompletion
+                            p = dir * "/" * c.path
+                            was_expanded && (p = contractuser(p))
+                            return PathCompletion(p)
+                        end
+                    end
+                end
+            end
+            if isempty(paths) && !hint && was_expanded
+                # if not able to provide completions, not hinting, and ~ expansion was possible, return ~ expansion
+                return expanded::Completions
+            else
+                return sort!(paths, by=p->p.path), r::UnitRange{Int}, success
+            end
+        end
     elseif inc_tag === :string
         # Find first non-escaped quote
-        m = match(r"\"(?!\\)", reverse(partial))
-        startpos = nextind(partial, reverseind(partial, m.offset))
-        r = startpos:pos
+        let m = match(r"\"(?!\\)", reverse(partial))
+            startpos = nextind(partial, reverseind(partial, m.offset))
+            r = startpos:pos
+            scs::String = string[r]
+
+            expanded = complete_expanduser(scs, r)
+            was_expanded = expanded[3]
+            if was_expanded
+                scs = (only(expanded[1])::PathCompletion).path
+                # If tab press, ispath and user expansion available, return it now
+                # otherwise see if we can complete the path further before returning with expanded ~
+                !hint && ispath(scs) && return expanded::Completions
+            end
 
-        expanded = complete_expanduser(string[r], r)
-        expanded[3] && return expanded  # If user expansion available, return it
+            path = try
+                unescape_string(replace(scs, "\\\$"=>"\$"))
+            catch ex
+                ex isa ArgumentError || rethrow()
+                nothing
+            end
+            if !isnothing(path)
+                paths, dir, success = complete_path(path::String, string_escape=true)
+
+                if length(paths) == 1
+                    p = (paths[1]::PathCompletion).path
+                    hint && was_expanded && (p = contractuser(p))
+                    if close_path_completion(dir, p, path, pos)
+                        paths[1] = PathCompletion(p * "\"")
+                    end
+                end
 
-        path_prefix = try
-            unescape_string(replace(string[r], "\\\$"=>"\$", "\\\""=>"\""))
-        catch
-            nothing
-        end
-        if !isnothing(path_prefix)
-            paths, r, success = complete_path(path_prefix, pos, string_escape=true)
+                if success && !isempty(dir)
+                    let dir = do_string_escape(dir)
+                        # if escaping of dir matches scs prefix, remove that from the completions
+                        # otherwise make it the whole completion
+                        if endswith(dir, "/") && startswith(scs, dir)
+                            r = (startpos + sizeof(dir)):pos
+                        elseif startswith(scs, dir * "/") && dir != dirname(homedir())
+                            was_expanded && (dir = contractuser(dir))
+                            r = nextind(string, startpos + sizeof(dir)):pos
+                        else
+                            map!(paths, paths) do c::PathCompletion
+                                p = dir * "/" * c.path
+                                hint && was_expanded && (p = contractuser(p))
+                                return PathCompletion(p)
+                            end
+                        end
+                    end
+                end
 
-            if close_path_completion(string, startpos, r, paths, pos)
-                paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
+                # Fallthrough allowed so that Latex symbols can be completed in strings
+                if success
+                    return sort!(paths, by=p->p.path), r::UnitRange{Int}, success
+                elseif !hint && was_expanded
+                    # if not able to provide completions, not hinting, and ~ expansion was possible, return ~ expansion
+                    return expanded::Completions
+                end
             end
-
-            # Fallthrough allowed so that Latex symbols can be completed in strings
-            success && return sort!(paths, by=p->p.path), r, success
         end
     end
+    # if path has ~ and we didn't find any paths to complete just return the expanded path
+    was_expanded && return expanded::Completions
 
     ok, ret = bslash_completions(string, pos)
     ok && return ret
@@ -1107,39 +1441,41 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
     end
 
     # Check whether we can complete a keyword argument in a function call
-    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module)
+    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module; shift)
     isempty(wordrange) || return kwarg_completion, wordrange, !isempty(kwarg_completion)
 
-    dotpos = something(findprev(isequal('.'), string, pos), 0)
     startpos = nextind(string, something(findprev(in(non_identifier_chars), string, pos), 0))
     # strip preceding ! operator
     if (m = match(r"\G\!+", partial, startpos)) isa RegexMatch
         startpos += length(m.match)
     end
 
-    name = string[max(startpos, dotpos+1):pos]
-    comp_keywords = !isempty(name) && startpos > dotpos
-    if afterusing(string, startpos)
-        # We're right after using or import. Let's look only for packages
-        # and modules we can reach from here
+    separatorpos = something(findprev(isequal('.'), string, pos), 0)
+    namepos = max(startpos, separatorpos+1)
+    name = string[namepos:pos]
+    import_mode = get_import_mode(string)
+    if import_mode === :using_module || import_mode === :import_module
+        # Given input lines like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`:
+        # Let's look only for packages and modules we can reach from here
 
         # If there's no dot, we're in toplevel, so we should
         # also search for packages
         s = string[startpos:pos]
-        if dotpos <= startpos
+        if separatorpos <= startpos
             for dir in Base.load_path()
                 if basename(dir) in Base.project_names && isfile(dir)
-                    append!(suggestions, project_deps_get_completion_candidates(s, dir))
+                    complete_loading_candidates!(suggestions, s, dir)
                 end
                 isdir(dir) || continue
-                for pname in readdir(dir)
+                for entry in _readdirx(dir)
+                    pname = entry.name
                     if pname[1] != '.' && pname != "METADATA" &&
                         pname != "REQUIRE" && startswith(pname, s)
                         # Valid file paths are
                         #   <Mod>.jl
                         #   <Mod>/src/<Mod>.jl
                         #   <Mod>.jl/src/<Mod>.jl
-                        if isfile(joinpath(dir, pname))
+                        if isfile(entry)
                             endswith(pname, ".jl") && push!(suggestions,
                                                             PackageCompletion(pname[1:prevind(pname, end-2)]))
                         else
@@ -1148,7 +1484,7 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                             else
                                 pname
                             end
-                            if isfile(joinpath(dir, pname, "src",
+                            if isfile(joinpath(entry, "src",
                                                "$mod_name.jl"))
                                 push!(suggestions, PackageCompletion(mod_name))
                             end
@@ -1157,71 +1493,100 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
                 end
             end
         end
-        ffunc = (mod,x)->(Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getfield(mod, x), Module))
         comp_keywords = false
+        complete_modules_only = import_mode === :using_module # allow completion for `import Mod.name` (where `name` is not a module)
+    elseif import_mode === :using_name || import_mode === :import_name
+        # `using Foo: |` and `import Foo: bar, baz|`
+        separatorpos = findprev(isequal(':'), string, pos)::Int
+        comp_keywords = false
+        complete_modules_only = false
+    else
+        comp_keywords = !isempty(name) && startpos > separatorpos
+        complete_modules_only = false
     end
 
-    startpos == 0 && (pos = -1)
-    dotpos < startpos && (dotpos = startpos - 1)
-    return complete_identifiers!(suggestions, ffunc, context_module, string,
-        name, pos, dotpos, startpos, comp_keywords)
+    complete_identifiers!(suggestions, context_module, string, name,
+                          pos, separatorpos, startpos;
+                          comp_keywords, complete_modules_only, shift)
+    return sort!(unique!(named_completion, suggestions), by=named_completion_completion), namepos:pos, true
 end
 
-function shell_completions(string, pos)
+function shell_completions(string, pos, hint::Bool=false)
     # First parse everything up to the current position
     scs = string[1:pos]
-    local args, last_parse
-    try
-        args, last_parse = Base.shell_parse(scs, true)::Tuple{Expr,UnitRange{Int}}
-    catch
+    args, last_arg_start = try
+        Base.shell_parse(scs, true)::Tuple{Expr,Int}
+    catch ex
+        ex isa ArgumentError || ex isa ErrorException || rethrow()
         return Completion[], 0:-1, false
     end
     ex = args.args[end]::Expr
     # Now look at the last thing we parsed
     isempty(ex.args) && return Completion[], 0:-1, false
-    arg = ex.args[end]
-    if all(s -> isa(s, AbstractString), ex.args)
-        arg = arg::AbstractString
-        # Treat this as a path
-
-        # As Base.shell_parse throws away trailing spaces (unless they are escaped),
-        # we need to special case here.
-        # If the last char was a space, but shell_parse ignored it search on "".
-        ignore_last_word = arg != " " && scs[end] == ' '
-        prefix = ignore_last_word ? "" : join(ex.args)
+    lastarg = ex.args[end]
+    # As Base.shell_parse throws away trailing spaces (unless they are escaped),
+    # we need to special case here.
+    # If the last char was a space, but shell_parse ignored it search on "".
+    if isexpr(lastarg, :incomplete) || isexpr(lastarg, :error)
+        partial = string[last_arg_start:pos]
+        ret, range = completions(partial, lastindex(partial), Main, true, hint)
+        range = range .+ (last_arg_start - 1)
+        return ret, range, true
+    elseif endswith(scs, ' ') && !endswith(scs, "\\ ")
+        r = pos+1:pos
+        paths, dir, success = complete_path("", use_envpath=false, shell_escape=true)
+        return paths, r, success
+    elseif all(@nospecialize(arg) -> arg isa AbstractString, ex.args)
+        # Join these and treat this as a path
+        path::String = join(ex.args)
+        r = last_arg_start:pos
 
         # Also try looking into the env path if the user wants to complete the first argument
-        use_envpath = !ignore_last_word && length(args.args) < 2
+        use_envpath = length(args.args) < 2
+
+        expanded = complete_expanduser(path, r)
+        was_expanded = expanded[3]
+        if was_expanded
+            path = (only(expanded[1])::PathCompletion).path
+            # If tab press, ispath and user expansion available, return it now
+            # otherwise see if we can complete the path further before returning with expanded ~
+            !hint && ispath(path) && return expanded::Completions
+        end
 
-        return complete_path(prefix, pos, use_envpath=use_envpath, shell_escape=true)
-    elseif isexpr(arg, :incomplete) || isexpr(arg, :error)
-        partial = scs[last_parse]
-        ret, range = completions(partial, lastindex(partial))
-        range = range .+ (first(last_parse) - 1)
-        return ret, range, true
+        paths, dir, success = complete_path(path, use_envpath=use_envpath, shell_escape=true, contract_user=was_expanded)
+
+        if success && !isempty(dir)
+            let dir = do_shell_escape(dir)
+                # if escaping of dir matches scs prefix, remove that from the completions
+                # otherwise make it the whole completion
+                partial = string[last_arg_start:pos]
+                if endswith(dir, "/") && startswith(partial, dir)
+                    r = (last_arg_start + sizeof(dir)):pos
+                elseif startswith(partial, dir * "/")
+                    r = nextind(string, last_arg_start + sizeof(dir)):pos
+                else
+                    map!(paths, paths) do c::PathCompletion
+                        return PathCompletion(dir * "/" * c.path)
+                    end
+                end
+            end
+        end
+        # if ~ was expanded earlier and the incomplete string isn't a path
+        # return the path with contracted user to match what the hint shows. Otherwise expand ~
+        # i.e. require two tab presses to expand user
+        if was_expanded && !ispath(path)
+            map!(paths, paths) do c::PathCompletion
+                PathCompletion(contractuser(c.path))
+            end
+        end
+        return paths, r, success
     end
     return Completion[], 0:-1, false
 end
 
-function UndefVarError_hint(io::IO, ex::UndefVarError)
-    var = ex.var
-    if var === :or
-        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
-    elseif var === :and
-        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
-    elseif var === :help
-        println(io)
-        # Show friendly help message when user types help or help() and help is undefined
-        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
-    elseif var === :quit
-        print(io, "\nsuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
-    end
-end
-
 function __init__()
-    Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
     COMPLETION_WORLD[] = Base.get_world_counter()
-    nothing
+    return nothing
 end
 
 end # module
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index a1f94852b38ec..ddcfc111cf962 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -176,7 +176,7 @@ Returns `selected(m)`.
 !!! compat "Julia 1.6"
     The `cursor` argument requires Julia 1.6 or later.
 """
-request(m::AbstractMenu; kwargs...) = request(terminal, m; kwargs...)
+request(m::AbstractMenu; kwargs...) = request(default_terminal(), m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Union{Int, Base.RefValue{Int}}=1, suppress_output=false)
     if cursor isa Int
@@ -252,7 +252,7 @@ end
 
 Shorthand for `println(msg); request(m)`.
 """
-request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(terminal, msg, m; kwargs...)
+request(msg::AbstractString, m::AbstractMenu; kwargs...) = request(default_terminal(), msg, m; kwargs...)
 
 function request(term::REPL.Terminals.TTYTerminal, msg::AbstractString, m::AbstractMenu; kwargs...)
     println(term.out_stream, msg)
diff --git a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
index 5c3ecf3808c49..fd660fc0f7824 100644
--- a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
@@ -38,7 +38,7 @@ end
 
 """
 
-    MultiSelectMenu(options::Array{String,1}; pagesize::Int=10, selected=[], kwargs...)
+    MultiSelectMenu(options::Vector{String}; pagesize::Int=10, selected=[], kwargs...)
 
 Create a MultiSelectMenu object. Use `request(menu::MultiSelectMenu)` to get
 user input. It returns a `Set` containing the indices of options that
@@ -46,7 +46,7 @@ were selected by the user.
 
 # Arguments
 
-  - `options::Array{String, 1}`: Options to be displayed
+  - `options::Vector{String}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
   - `selected=[]`: pre-selected items. `i ∈ selected` means that `options[i]` is preselected.
 
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
index c823a5dedd1ba..091f87801e7a4 100644
--- a/stdlib/REPL/src/TerminalMenus/Pager.jl
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -39,4 +39,4 @@ function pager(terminal, object)
     pager = Pager(String(take!(buffer)); pagesize = div(lines, 2))
     return request(terminal, pager)
 end
-pager(object) = pager(terminal, object)
+pager(object) = pager(default_terminal(), object)
diff --git a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
index 32a6373b719d7..8e35e37f7f973 100644
--- a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
@@ -31,9 +31,9 @@ end
 
 """
 
-    RadioMenu(options::Array{String,1}; pagesize::Int=10,
-                                        keybindings::Vector{Char}=Char[],
-                                        kwargs...)
+    RadioMenu(options::Vector{String}; pagesize::Int=10,
+                                       keybindings::Vector{Char}=Char[],
+                                       kwargs...)
 
 Create a RadioMenu object. Use `request(menu::RadioMenu)` to get user input.
 `request()` returns an `Int` which is the index of the option selected by the
@@ -41,7 +41,7 @@ user.
 
 # Arguments
 
-  - `options::Array{String, 1}`: Options to be displayed
+  - `options::Vector{String}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
   - `keybindings::Vector{Char}=Char[]`: Shortcuts to pick corresponding entry from `options`
 
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index 87869e84d9838..f970cd9a289c2 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -1,14 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-module TerminalMenus
+"""
+    REPL.TerminalMenus
 
-terminal = nothing  # The user terminal
+A module that contains code for displaying text mode interactive menus.
+Key exported symbols include [`REPL.TerminalMenus.RadioMenu`](@ref) and
+[`REPL.TerminalMenus.MultiSelectMenu`](@ref).
+"""
+module TerminalMenus
 
-import REPL
+using ..REPL: REPL
 
-function __init__()
-    global terminal
-    terminal = REPL.Terminals.TTYTerminal(get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), stdin, stdout, stderr)
+function default_terminal(; in::IO=stdin, out::IO=stdout, err::IO=stderr)
+    return REPL.Terminals.TTYTerminal(
+        get(ENV, "TERM", Sys.iswindows() ? "" : "dumb"), in, out, err)
 end
 
 include("util.jl")
@@ -25,6 +30,9 @@ export
     Pager,
     request
 
+public Config, config, MultiSelectConfig
+public pick, cancel, writeline, options, numoptions, selected, header, keypress
+
 # TODO: remove in Julia 2.0
 # While not exported, AbstractMenu documented these as an extension interface
 @deprecate printMenu printmenu
diff --git a/stdlib/REPL/src/Terminals.jl b/stdlib/REPL/src/Terminals.jl
index dac19406b3fc1..aba6bff73a607 100644
--- a/stdlib/REPL/src/Terminals.jl
+++ b/stdlib/REPL/src/Terminals.jl
@@ -30,9 +30,7 @@ import Base:
     displaysize,
     flush,
     pipe_reader,
-    pipe_writer,
-    read,
-    readuntil
+    pipe_writer
 
 ## AbstractTerminal: abstract supertype of all terminals ##
 
@@ -99,6 +97,7 @@ abstract type UnixTerminal <: TextTerminal end
 pipe_reader(t::UnixTerminal) = t.in_stream::IO
 pipe_writer(t::UnixTerminal) = t.out_stream::IO
 
+@nospecialize
 mutable struct TerminalBuffer <: UnixTerminal
     out_stream::IO
 end
@@ -109,6 +108,7 @@ mutable struct TTYTerminal <: UnixTerminal
     out_stream::IO
     err_stream::IO
 end
+@specialize
 
 const CSI = "\x1b["
 
@@ -120,23 +120,19 @@ cmove_line_up(t::UnixTerminal, n) = (cmove_up(t, n); cmove_col(t, 1))
 cmove_line_down(t::UnixTerminal, n) = (cmove_down(t, n); cmove_col(t, 1))
 cmove_col(t::UnixTerminal, n) = (write(t.out_stream, '\r'); n > 1 && cmove_right(t, n-1))
 
-const is_precompiling = Ref(false)
 if Sys.iswindows()
     function raw!(t::TTYTerminal,raw::Bool)
-        is_precompiling[] && return true
-        check_open(t.in_stream)
         if Base.ispty(t.in_stream)
             run((raw ? `stty raw -echo onlcr -ocrnl opost` : `stty sane`),
                 t.in_stream, t.out_stream, t.err_stream)
             true
         else
-            ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) != -1
+            ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) == 0
         end
     end
 else
     function raw!(t::TTYTerminal, raw::Bool)
-        check_open(t.in_stream)
-        ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) != -1
+        ccall(:jl_tty_set_mode, Int32, (Ptr{Cvoid},Int32), t.in_stream.handle::Ptr{Cvoid}, raw) == 0
     end
 end
 
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index b9797dee910c2..0868d3e80c824 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -9,9 +9,9 @@ using Base.Docs: catdoc, modules, DocStr, Binding, MultiDoc, keywords, isfield,
 
 import Base.Docs: doc, formatdoc, parsedoc, apropos
 
-using Base: with_output_color, mapany
+using Base: with_output_color, mapany, isdeprecated, isexported
 
-import REPL
+using Base.Filesystem: _readdirx
 
 using InteractiveUtils: subtypes
 
@@ -20,20 +20,28 @@ using Unicode: normalize
 ## Help mode ##
 
 # This is split into helpmode and _helpmode to easier unittest _helpmode
-helpmode(io::IO, line::AbstractString, mod::Module=Main) = :($REPL.insert_hlines($io, $(REPL._helpmode(io, line, mod))))
+function helpmode(io::IO, line::AbstractString, mod::Module=Main)
+    internal_accesses = Set{Pair{Module,Symbol}}()
+    quote
+        docs = $Markdown.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
+        $REPL.insert_internal_warning(docs, $internal_accesses)
+    end
+end
 helpmode(line::AbstractString, mod::Module=Main) = helpmode(stdout, line, mod)
 
+# A hack to make the line entered at the REPL available at trimdocs without
+# passing the string through the entire mechanism.
 const extended_help_on = Ref{Any}(nothing)
 
-function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
+function _helpmode(io::IO, line::AbstractString, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     line = strip(line)
     ternary_operator_help = (line == "?" || line == "?:")
     if startswith(line, '?') && !ternary_operator_help
         line = line[2:end]
-        extended_help_on[] = line
+        extended_help_on[] = nothing
         brief = false
     else
-        extended_help_on[] = nothing
+        extended_help_on[] = line
         brief = true
     end
     # interpret anything starting with # or #= as asking for help on comments
@@ -47,7 +55,7 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
     x = Meta.parse(line, raise = false, depwarn = false)
     assym = Symbol(line)
     expr =
-        if haskey(keywords, Symbol(line)) || Base.isoperator(assym) || isexpr(x, :error) ||
+        if haskey(keywords, assym) || Base.isoperator(assym) || isexpr(x, :error) ||
             isexpr(x, :invalid) || isexpr(x, :incomplete)
             # Docs for keywords must be treated separately since trying to parse a single
             # keyword such as `function` would throw a parse error due to the missing `end`.
@@ -64,30 +72,17 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
         end
     # the following must call repl(io, expr) via the @repl macro
     # so that the resulting expressions are evaluated in the Base.Docs namespace
-    :($REPL.@repl $io $expr $brief $mod)
+    :($REPL.@repl $io $expr $brief $mod $internal_accesses)
 end
 _helpmode(line::AbstractString, mod::Module=Main) = _helpmode(stdout, line, mod)
 
-# Print vertical lines along each docstring if there are multiple docs
-function insert_hlines(io::IO, docs)
-    if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
-        return docs
-    end
-    docs = docs::Markdown.MD
-    v = Any[]
-    for (n, doc) in enumerate(docs.content)
-        push!(v, doc)
-        n == length(docs.content) || push!(v, Markdown.HorizontalRule())
-    end
-    return Markdown.MD(v)
-end
-
 function formatdoc(d::DocStr)
     buffer = IOBuffer()
     for part in d.text
         formatdoc(buffer, d, part)
     end
-    Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    md = Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    assume_julia_code!(md)
 end
 @noinline formatdoc(buffer, d, part) = print(buffer, part)
 
@@ -101,6 +96,27 @@ function parsedoc(d::DocStr)
     d.object
 end
 
+"""
+    assume_julia_code!(doc::Markdown.MD) -> doc
+
+Assume that code blocks with no language specified are Julia code.
+"""
+function assume_julia_code!(doc::Markdown.MD)
+    assume_julia_code!(doc.content)
+    doc
+end
+
+function assume_julia_code!(blocks::Vector)
+    for (i, block) in enumerate(blocks)
+        if block isa Markdown.Code && block.language == ""
+            blocks[i] = Markdown.Code("julia", block.code)
+        elseif block isa Vector || block isa Markdown.MD
+            assume_julia_code!(block)
+        end
+    end
+    blocks
+end
+
 ## Trimming long help ("# Extended help")
 
 struct Message  # For direct messages to the terminal
@@ -148,14 +164,48 @@ end
 
 _trimdocs(md, brief::Bool) = md, false
 
-"""
-    Docs.doc(binding, sig)
 
-Return all documentation that matches both `binding` and `sig`.
+is_tuple(expr) = false
+is_tuple(expr::Expr) = expr.head == :tuple
+
+struct Logged{F}
+    f::F
+    mod::Module
+    collection::Set{Pair{Module,Symbol}}
+end
+function (la::Logged)(m::Module, s::Symbol)
+    m !== la.mod && Base.isdefined(m, s) && !Base.ispublic(m, s) && push!(la.collection, m => s)
+    la.f(m, s)
+end
+(la::Logged)(args...) = la.f(args...)
+
+function log_nonpublic_access(expr::Expr, mod::Module, internal_access::Set{Pair{Module,Symbol}})
+    if expr.head === :. && length(expr.args) == 2 && !is_tuple(expr.args[2])
+        Expr(:call, Logged(getproperty, mod, internal_access), log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    elseif expr.head === :call && expr.args[1] === Base.Docs.Binding
+        Expr(:call, Logged(Base.Docs.Binding, mod, internal_access), log_nonpublic_access.(expr.args[2:end], (mod,), (internal_access,))...)
+    else
+        Expr(expr.head, log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    end
+end
+log_nonpublic_access(expr, ::Module, _) = expr
+
+function insert_internal_warning(md::Markdown.MD, internal_access::Set{Pair{Module,Symbol}})
+    if !isempty(internal_access)
+        items = Any[Any[Markdown.Paragraph(Any[Markdown.Code("", s)])] for s in sort!(["$mod.$sym" for (mod, sym) in internal_access])]
+        admonition = Markdown.Admonition("warning", "Warning", Any[
+            Markdown.Paragraph(Any["The following bindings may be internal; they may change or be removed in future versions:"]),
+            Markdown.List(items, -1, false)])
+        pushfirst!(md.content, admonition)
+    end
+    md
+end
+function insert_internal_warning(other, internal_access::Set{Pair{Module,Symbol}})
+    # We don't know how to insert an internal symbol warning into non-markdown
+    # content, so we don't.
+    other
+end
 
-If `getdoc` returns a non-`nothing` result on the value of the binding, then a
-dynamic docstring is returned instead of one based on the binding itself.
-"""
 function doc(binding::Binding, sig::Type = Union{})
     if defined(binding)
         result = getdoc(resolve(binding), sig)
@@ -250,7 +300,13 @@ function summarize(binding::Binding, sig)
     io = IOBuffer()
     if defined(binding)
         binding_res = resolve(binding)
-        !isa(binding_res, Module) && println(io, "No documentation found.\n")
+        if !isa(binding_res, Module)
+            if Base.ispublic(binding.mod, binding.var)
+                println(io, "No documentation found for public symbol.\n")
+            else
+                println(io, "No documentation found for private symbol.\n")
+            end
+        end
         summarize(io, binding_res, binding)
     else
         println(io, "No documentation found.\n")
@@ -331,9 +387,9 @@ function find_readme(m::Module)::Union{String, Nothing}
     path = dirname(mpath)
     top_path = pkgdir(m)
     while true
-        for file in readdir(path; join=true, sort=true)
-            isfile(file) && (basename(lowercase(file)) in ["readme.md", "readme"]) || continue
-            return file
+        for entry in _readdirx(path; sort=true)
+            isfile(entry) && (lowercase(entry.name) in ["readme.md", "readme"]) || continue
+            return entry.path
         end
         path == top_path && break # go no further than pkgdir
         path = dirname(path) # work up through nested modules
@@ -342,16 +398,17 @@ function find_readme(m::Module)::Union{String, Nothing}
 end
 function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
     readme_path = find_readme(m)
+    public = Base.ispublic(binding.mod, binding.var) ? "public" : "internal"
     if isnothing(readme_path)
-        println(io, "No docstring or readme file found for module `$m`.\n")
+        println(io, "No docstring or readme file found for $public module `$m`.\n")
     else
-        println(io, "No docstring found for module `$m`.")
+        println(io, "No docstring found for $public module `$m`.")
     end
     exports = filter!(!=(nameof(m)), names(m))
     if isempty(exports)
-        println(io, "Module does not export any names.")
+        println(io, "Module does not have any public names.")
     else
-        println(io, "# Exported names")
+        println(io, "# Public names")
         print(io, "  `")
         join(io, exports, "`, `")
         println(io, "`\n")
@@ -359,7 +416,9 @@ function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
     if !isnothing(readme_path)
         readme_lines = readlines(readme_path)
         isempty(readme_lines) && return  # don't say we are going to print empty file
-        println(io, "# Displaying contents of readme found at `$(readme_path)`")
+        println(io)
+        println(io, "---")
+        println(io, "_Package description from `$(basename(readme_path))`:_")
         for line in first(readme_lines, nlines)
             println(io, line)
         end
@@ -375,8 +434,31 @@ end
 
 # repl search and completions for help
 
+# This type is returned from `accessible` and denotes a binding that is accessible within
+# some context. It differs from `Base.Docs.Binding`, which is also used by the REPL, in
+# that it doesn't track the defining module for a symbol unless the symbol is public but
+# not exported, i.e. it's accessible but requires qualification. Using this type rather
+# than `Base.Docs.Binding` simplifies things considerably, partially because REPL searching
+# is based on `String`s, which this type stores, but `Base.Docs.Binding` stores a module
+# and symbol and does not have any notion of the context from which the binding is accessed.
+struct AccessibleBinding
+    source::Union{String,Nothing}
+    name::String
+end
+
+function AccessibleBinding(mod::Module, name::Symbol)
+    m = isexported(mod, name) ? nothing : String(nameof(mod))
+    return AccessibleBinding(m, String(name))
+end
+AccessibleBinding(name::Symbol) = AccessibleBinding(nothing, String(name))
+
+function Base.show(io::IO, b::AccessibleBinding)
+    b.source === nothing || print(io, b.source, '.')
+    print(io, b.name)
+end
 
 quote_spaces(x) = any(isspace, x) ? "'" * x * "'" : x
+quote_spaces(x::AccessibleBinding) = AccessibleBinding(x.source, quote_spaces(x.name))
 
 function repl_search(io::IO, s::Union{Symbol,String}, mod::Module)
     pre = "search:"
@@ -393,7 +475,12 @@ function repl_corrections(io::IO, s, mod::Module)
     quot = any(isspace, s) ? "'" : ""
     print(io, quot)
     printstyled(io, s, color=:cyan)
-    print(io, quot, '\n')
+    print(io, quot)
+    if Base.identify_package(s) === nothing
+        print(io, '\n')
+    else
+        print(io, ", but a loadable package with that name exists. If you are looking for the package docs load the package first.\n")
+    end
     print_correction(io, s, mod)
 end
 repl_corrections(s) = repl_corrections(stdout, s)
@@ -401,7 +488,7 @@ repl_corrections(s) = repl_corrections(stdout, s)
 # inverse of latex_symbols Dict, lazily created as needed
 const symbols_latex = Dict{String,String}()
 function symbol_latex(s::String)
-    if isempty(symbols_latex) && isassigned(Base.REPL_MODULE_REF)
+    if isempty(symbols_latex)
         for (k,v) in Iterators.flatten((REPLCompletions.latex_symbols,
                                         REPLCompletions.emoji_symbols))
             symbols_latex[v] = k
@@ -472,9 +559,9 @@ end
 repl_latex(s::String) = repl_latex(stdout, s)
 
 macro repl(ex, brief::Bool=false, mod::Module=Main) repl(ex; brief, mod) end
-macro repl(io, ex, brief, mod) repl(io, ex; brief, mod) end
+macro repl(io, ex, brief, mod, internal_accesses) repl(io, ex; brief, mod, internal_accesses) end
 
-function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main)
+function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     str = string(s)
     quote
         repl_latex($io, $str)
@@ -483,19 +570,19 @@ function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main)
                # n.b. we call isdefined for the side-effect of resolving the binding, if possible
                :(repl_corrections($io, $str, $mod))
           end)
-        $(_repl(s, brief))
+        $(_repl(s, brief, mod, internal_accesses))
     end
 end
 isregex(x) = isexpr(x, :macrocall, 3) && x.args[1] === Symbol("@r_str") && !isempty(x.args[3])
 
-repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief)
-repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main) = :(apropos($io, $str))
-repl(io::IO, other; brief::Bool=true, mod::Module=Main) = esc(:(@doc $other))
+repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief, mod, internal_accesses)
+repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = :(apropos($io, $str))
+repl(io::IO, other; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = esc(:(@doc $other)) # TODO: track internal_accesses
 #repl(io::IO, other) = lookup_doc(other) # TODO
 
 repl(x; brief::Bool=true, mod::Module=Main) = repl(stdout, x; brief, mod)
 
-function _repl(x, brief::Bool=true)
+function _repl(x, brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     if isexpr(x, :call)
         x = x::Expr
         # determine the types of the values
@@ -561,6 +648,7 @@ function _repl(x, brief::Bool=true)
     else
         docs
     end
+    docs = log_nonpublic_access(macroexpand(mod, docs), mod, internal_accesses)
     :(REPL.trimdocs($docs, $brief))
 end
 
@@ -584,8 +672,9 @@ function fielddoc(binding::Binding, field::Symbol)
             end
         end
     end
-    fields = join(["`$f`" for f in fieldnames(resolve(binding))], ", ", ", and ")
-    fields = isempty(fields) ? "no fields" : "fields $fields"
+    fs = fieldnames(resolve(binding))
+    fields = isempty(fs) ? "no fields" : (length(fs) == 1 ? "field " : "fields ") *
+                                          join(("`$f`" for f in fs), ", ", ", and ")
     Markdown.parse("`$(resolve(binding))` has $fields.")
 end
 
@@ -618,28 +707,80 @@ function matchinds(needle, haystack; acronym::Bool = false)
     return is
 end
 
+matchinds(needle, (; name)::AccessibleBinding; acronym::Bool=false) =
+    matchinds(needle, name; acronym)
+
 longer(x, y) = length(x) ≥ length(y) ? (x, true) : (y, false)
 
 bestmatch(needle, haystack) =
     longer(matchinds(needle, haystack, acronym = true),
            matchinds(needle, haystack))
 
-avgdistance(xs) =
-    isempty(xs) ? 0 :
-    (xs[end] - xs[1] - length(xs)+1)/length(xs)
+# Optimal string distance: Counts the minimum number of insertions, deletions,
+# transpositions or substitutions to go from one string to the other.
+function string_distance(a::AbstractString, lena::Integer, b::AbstractString, lenb::Integer)
+    if lena > lenb
+        a, b = b, a
+        lena, lenb = lenb, lena
+    end
+    start = 0
+    for (i, j) in zip(a, b)
+        if a == b
+            start += 1
+        else
+            break
+        end
+    end
+    start == lena && return lenb - start
+    vzero = collect(1:(lenb - start))
+    vone = similar(vzero)
+    prev_a, prev_b = first(a), first(b)
+    current = 0
+    for (i, ai) in enumerate(a)
+        i > start || (prev_a = ai; continue)
+        left = i - start - 1
+        current = i - start
+        transition_next = 0
+        for (j, bj) in enumerate(b)
+            j > start || (prev_b = bj; continue)
+            # No need to look beyond window of lower right diagonal
+            above = current
+            this_transition = transition_next
+            transition_next = vone[j - start]
+            vone[j - start] = current = left
+            left = vzero[j - start]
+            if ai != bj
+                # Minimum between substitution, deletion and insertion
+                current = min(current + 1, above + 1, left + 1)
+                if i > start + 1 && j > start + 1 && ai == prev_b && prev_a == bj
+                    current = min(current, (this_transition += 1))
+                end
+            end
+            vzero[j - start] = current
+            prev_b = bj
+        end
+        prev_a = ai
+    end
+    current
+end
 
-function fuzzyscore(needle, haystack)
-    score = 0.
-    is, acro = bestmatch(needle, haystack)
-    score += (acro ? 2 : 1)*length(is) # Matched characters
-    score -= 2(length(needle)-length(is)) # Missing characters
-    !acro && (score -= avgdistance(is)/10) # Contiguous
-    !isempty(is) && (score -= sum(is)/length(is)/100) # Closer to beginning
-    return score
+function fuzzyscore(needle::AbstractString, haystack::AbstractString)
+    lena, lenb = length(needle), length(haystack)
+    1 - (string_distance(needle, lena, haystack, lenb) / max(lena, lenb))
 end
 
-function fuzzysort(search::String, candidates::Vector{String})
-    scores = map(cand -> (fuzzyscore(search, cand), -Float64(levenshtein(search, cand))), candidates)
+function fuzzyscore(needle::AbstractString, haystack::AccessibleBinding)
+    score = fuzzyscore(needle, haystack.name)
+    haystack.source === nothing && return score
+    # Apply a "penalty" of half an edit if the comparator binding is public but not
+    # exported so that exported/local names that exactly match the search query are
+    # listed first
+    penalty = 1 / (2 * max(length(needle), length(haystack.name)))
+    return max(score - penalty, 0)
+end
+
+function fuzzysort(search::String, candidates::Vector{AccessibleBinding})
+    scores = map(cand -> fuzzyscore(search, cand), candidates)
     candidates[sortperm(scores)] |> reverse
 end
 
@@ -663,12 +804,14 @@ function levenshtein(s1, s2)
     return d[m+1, n+1]
 end
 
-function levsort(search::String, candidates::Vector{String})
-    scores = map(cand -> (Float64(levenshtein(search, cand)), -fuzzyscore(search, cand)), candidates)
+function levsort(search::String, candidates::Vector{AccessibleBinding})
+    scores = map(candidates) do cand
+        (Float64(levenshtein(search, cand.name)), -fuzzyscore(search, cand))
+    end
     candidates = candidates[sortperm(scores)]
     i = 0
     for outer i = 1:length(candidates)
-        levenshtein(search, candidates[i]) > 3 && break
+        levenshtein(search, candidates[i].name) > 3 && break
     end
     return candidates[1:i]
 end
@@ -686,24 +829,39 @@ function printmatch(io::IO, word, match)
     end
 end
 
+function printmatch(io::IO, word, match::AccessibleBinding)
+    match.source === nothing || print(io, match.source, '.')
+    printmatch(io, word, match.name)
+end
+
+function matchlength(x::AccessibleBinding)
+    n = length(x.name)
+    if x.source !== nothing
+        n += length(x.source) + 1  # the +1 is for the `.` separator
+    end
+    return n
+end
+matchlength(x) = length(x)
+
 function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2])
     total = 0
     for match in matches
-        total + length(match) + 1 > cols && break
-        fuzzyscore(word, match) < 0 && break
+        ml = matchlength(match)
+        total + ml + 1 > cols && break
+        fuzzyscore(word, match) < 0.5 && break
         print(io, " ")
         printmatch(io, word, match)
-        total += length(match) + 1
+        total += ml + 1
     end
 end
 
 printmatches(args...; cols::Int = _displaysize(stdout)[2]) = printmatches(stdout, args..., cols = cols)
 
-function print_joined_cols(io::IO, ss::Vector{String}, delim = "", last = delim; cols::Int = _displaysize(io)[2])
+function print_joined_cols(io::IO, ss::Vector{AccessibleBinding}, delim = "", last = delim; cols::Int = _displaysize(io)[2])
     i = 0
     total = 0
     for outer i = 1:length(ss)
-        total += length(ss[i])
+        total += matchlength(ss[i])
         total + max(i-2,0)*length(delim) + (i>1 ? 1 : 0)*length(last) > cols && (i-=1; break)
     end
     join(io, ss[1:i], delim, last)
@@ -725,27 +883,31 @@ print_correction(word, mod::Module) = print_correction(stdout, word, mod)
 
 # Completion data
 
-
 moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod)
 
-filtervalid(names) = filter(x->!occursin(r"#", x), map(string, names))
-
-accessible(mod::Module) =
-    Symbol[filter!(s -> !Base.isdeprecated(mod, s), names(mod, all=true, imported=true));
-           map(names, moduleusings(mod))...;
-           collect(keys(Base.Docs.keywords))] |> unique |> filtervalid
+function accessible(mod::Module)
+    bindings = Set(AccessibleBinding(s) for s in names(mod; all=true, imported=true)
+                   if !isdeprecated(mod, s))
+    for used in moduleusings(mod)
+        union!(bindings, (AccessibleBinding(used, s) for s in names(used)
+                          if !isdeprecated(used, s)))
+    end
+    union!(bindings, (AccessibleBinding(k) for k in keys(Base.Docs.keywords)))
+    filter!(b -> !occursin('#', b.name), bindings)
+    return collect(bindings)
+end
 
 function doc_completions(name, mod::Module=Main)
     res = fuzzysort(name, accessible(mod))
 
     # to insert an entry like `raw""` for `"@raw_str"` in `res`
-    ms = match.(r"^@(.*?)_str$", res)
+    ms = map(c -> match(r"^@(.*?)_str$", c.name), res)
     idxs = findall(!isnothing, ms)
 
     # avoid messing up the order while inserting
     for i in reverse!(idxs)
         c = only((ms[i]::AbstractMatch).captures)
-        insert!(res, i, "$(c)\"\"")
+        insert!(res, i, AccessibleBinding(res[i].source, "$(c)\"\""))
     end
     res
 end
@@ -822,18 +984,6 @@ stripmd(x::Markdown.Footnote) = "$(stripmd(x.id)) $(stripmd(x.text))"
 stripmd(x::Markdown.Table) =
     join([join(map(stripmd, r), " ") for r in x.rows], " ")
 
-"""
-    apropos([io::IO=stdout], pattern::Union{AbstractString,Regex})
-
-Search available docstrings for entries containing `pattern`.
-
-When `pattern` is a string, case is ignored. Results are printed to `io`.
-
-`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
-```
-help?> "pattern"
-```
-"""
 apropos(string) = apropos(stdout, string)
 apropos(io::IO, string) = apropos(io, Regex("\\Q$string", "i"))
 
diff --git a/stdlib/REPL/src/emoji_symbols.jl b/stdlib/REPL/src/emoji_symbols.jl
index 49a55c97f6564..d6d4a03321d0a 100644
--- a/stdlib/REPL/src/emoji_symbols.jl
+++ b/stdlib/REPL/src/emoji_symbols.jl
@@ -27,6 +27,7 @@ result = mapfoldr(emoji_data, merge, [
     # overwrite the old with names that changed but still keep old ones that were removed
     "https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json",
     "https://raw.githubusercontent.com/iamcal/emoji-data/e512953312c012f6bd00e3f2ef6bf152ca3710f8/emoji_pretty.json",
+    "https://raw.githubusercontent.com/iamcal/emoji-data/a8174c74675355c8c6a9564516b2e961fe7257ef/emoji_pretty.json",
     ];
     init=Dict()
 )
@@ -132,6 +133,7 @@ const emoji_symbols = Dict(
     "\\:bath:" => "🛀",
     "\\:bathtub:" => "🛁",
     "\\:battery:" => "🔋",
+    "\\:beans:" => "🫘",
     "\\:bear:" => "🐻",
     "\\:bearded_person:" => "🧔",
     "\\:beaver:" => "🦫",
@@ -151,6 +153,7 @@ const emoji_symbols = Dict(
     "\\:bird:" => "🐦",
     "\\:birthday:" => "🎂",
     "\\:bison:" => "🦬",
+    "\\:biting_lip:" => "🫦",
     "\\:black_circle:" => "⚫",
     "\\:black_heart:" => "🖤",
     "\\:black_joker:" => "🃏",
@@ -198,6 +201,7 @@ const emoji_symbols = Dict(
     "\\:broom:" => "🧹",
     "\\:brown_heart:" => "🤎",
     "\\:bubble_tea:" => "🧋",
+    "\\:bubbles:" => "🫧",
     "\\:bucket:" => "🪣",
     "\\:bug:" => "🐛",
     "\\:bulb:" => "💡",
@@ -309,6 +313,7 @@ const emoji_symbols = Dict(
     "\\:cool:" => "🆒",
     "\\:cop:" => "👮",
     "\\:copyright:" => "©",
+    "\\:coral:" => "🪸",
     "\\:corn:" => "🌽",
     "\\:couple:" => "👫",
     "\\:couple_with_heart:" => "💑",
@@ -325,6 +330,7 @@ const emoji_symbols = Dict(
     "\\:crossed_fingers:" => "🤞",
     "\\:crossed_flags:" => "🎌",
     "\\:crown:" => "👑",
+    "\\:crutch:" => "🩼",
     "\\:cry:" => "😢",
     "\\:crying_cat_face:" => "😿",
     "\\:crystal_ball:" => "🔮",
@@ -367,7 +373,9 @@ const emoji_symbols = Dict(
     "\\:dollar:" => "💵",
     "\\:dolls:" => "🎎",
     "\\:dolphin:" => "🐬",
+    "\\:donkey:" => "🫏",
     "\\:door:" => "🚪",
+    "\\:dotted_line_face:" => "🫥",
     "\\:doughnut:" => "🍩",
     "\\:dragon:" => "🐉",
     "\\:dragon_face:" => "🐲",
@@ -397,6 +405,7 @@ const emoji_symbols = Dict(
     "\\:elevator:" => "🛗",
     "\\:elf:" => "🧝",
     "\\:email:" => "✉",
+    "\\:empty_nest:" => "🪹",
     "\\:end:" => "🔚",
     "\\:envelope_with_arrow:" => "📩",
     "\\:euro:" => "💶",
@@ -408,12 +417,16 @@ const emoji_symbols = Dict(
     "\\:expressionless:" => "😑",
     "\\:eyeglasses:" => "👓",
     "\\:eyes:" => "👀",
+    "\\:face_holding_back_tears:" => "🥹",
     "\\:face_palm:" => "🤦",
     "\\:face_vomiting:" => "🤮",
     "\\:face_with_cowboy_hat:" => "🤠",
+    "\\:face_with_diagonal_mouth:" => "🫤",
     "\\:face_with_hand_over_mouth:" => "🤭",
     "\\:face_with_head_bandage:" => "🤕",
     "\\:face_with_monocle:" => "🧐",
+    "\\:face_with_open_eyes_and_hand_over_mouth:" => "🫢",
+    "\\:face_with_peeking_eye:" => "🫣",
     "\\:face_with_raised_eyebrow:" => "🤨",
     "\\:face_with_rolling_eyes:" => "🙄",
     "\\:face_with_symbols_on_mouth:" => "🤬",
@@ -452,10 +465,12 @@ const emoji_symbols = Dict(
     "\\:floppy_disk:" => "💾",
     "\\:flower_playing_cards:" => "🎴",
     "\\:flushed:" => "😳",
+    "\\:flute:" => "🪈",
     "\\:fly:" => "🪰",
     "\\:flying_disc:" => "🥏",
     "\\:flying_saucer:" => "🛸",
     "\\:foggy:" => "🌁",
+    "\\:folding_hand_fan:" => "🪭",
     "\\:fondue:" => "🫕",
     "\\:foot:" => "🦶",
     "\\:football:" => "🏈",
@@ -482,6 +497,7 @@ const emoji_symbols = Dict(
     "\\:ghost:" => "👻",
     "\\:gift:" => "🎁",
     "\\:gift_heart:" => "💝",
+    "\\:ginger_root:" => "🫚",
     "\\:giraffe_face:" => "🦒",
     "\\:girl:" => "👧",
     "\\:glass_of_milk:" => "🥛",
@@ -491,6 +507,7 @@ const emoji_symbols = Dict(
     "\\:goat:" => "🐐",
     "\\:goggles:" => "🥽",
     "\\:golf:" => "⛳",
+    "\\:goose:" => "🪿",
     "\\:gorilla:" => "🦍",
     "\\:grapes:" => "🍇",
     "\\:green_apple:" => "🍏",
@@ -498,6 +515,7 @@ const emoji_symbols = Dict(
     "\\:green_heart:" => "💚",
     "\\:green_salad:" => "🥗",
     "\\:grey_exclamation:" => "❕",
+    "\\:grey_heart:" => "🩶",
     "\\:grey_question:" => "❔",
     "\\:grimacing:" => "😬",
     "\\:grin:" => "😁",
@@ -506,11 +524,14 @@ const emoji_symbols = Dict(
     "\\:guide_dog:" => "🦮",
     "\\:guitar:" => "🎸",
     "\\:gun:" => "🔫",
+    "\\:hair_pick:" => "🪮",
     "\\:haircut:" => "💇",
     "\\:hamburger:" => "🍔",
     "\\:hammer:" => "🔨",
+    "\\:hamsa:" => "🪬",
     "\\:hamster:" => "🐹",
     "\\:hand:" => "✋",
+    "\\:hand_with_index_finger_and_thumb_crossed:" => "🫰",
     "\\:handbag:" => "👜",
     "\\:handball:" => "🤾",
     "\\:handshake:" => "🤝",
@@ -524,12 +545,14 @@ const emoji_symbols = Dict(
     "\\:heart_decoration:" => "💟",
     "\\:heart_eyes:" => "😍",
     "\\:heart_eyes_cat:" => "😻",
+    "\\:heart_hands:" => "🫶",
     "\\:heartbeat:" => "💓",
     "\\:heartpulse:" => "💗",
     "\\:hearts:" => "♥",
     "\\:heavy_check_mark:" => "✔",
     "\\:heavy_division_sign:" => "➗",
     "\\:heavy_dollar_sign:" => "💲",
+    "\\:heavy_equals_sign:" => "🟰",
     "\\:heavy_minus_sign:" => "➖",
     "\\:heavy_multiplication_x:" => "✖",
     "\\:heavy_plus_sign:" => "➕",
@@ -559,16 +582,19 @@ const emoji_symbols = Dict(
     "\\:hugging_face:" => "🤗",
     "\\:hushed:" => "😯",
     "\\:hut:" => "🛖",
+    "\\:hyacinth:" => "🪻",
     "\\:i_love_you_hand_sign:" => "🤟",
     "\\:ice_cream:" => "🍨",
     "\\:ice_cube:" => "🧊",
     "\\:ice_hockey_stick_and_puck:" => "🏒",
     "\\:icecream:" => "🍦",
     "\\:id:" => "🆔",
+    "\\:identification_card:" => "🪪",
     "\\:ideograph_advantage:" => "🉐",
     "\\:imp:" => "👿",
     "\\:inbox_tray:" => "📥",
     "\\:incoming_envelope:" => "📨",
+    "\\:index_pointing_at_the_viewer:" => "🫵",
     "\\:information_desk_person:" => "💁",
     "\\:information_source:" => "ℹ",
     "\\:innocent:" => "😇",
@@ -580,7 +606,9 @@ const emoji_symbols = Dict(
     "\\:japanese_castle:" => "🏯",
     "\\:japanese_goblin:" => "👺",
     "\\:japanese_ogre:" => "👹",
+    "\\:jar:" => "🫙",
     "\\:jeans:" => "👖",
+    "\\:jellyfish:" => "🪼",
     "\\:jigsaw:" => "🧩",
     "\\:joy:" => "😂",
     "\\:joy_cat:" => "😹",
@@ -589,6 +617,7 @@ const emoji_symbols = Dict(
     "\\:kangaroo:" => "🦘",
     "\\:key:" => "🔑",
     "\\:keycap_ten:" => "🔟",
+    "\\:khanda:" => "🪯",
     "\\:kimono:" => "👘",
     "\\:kiss:" => "💋",
     "\\:kissing:" => "😗",
@@ -631,11 +660,14 @@ const emoji_symbols = Dict(
     "\\:left_luggage:" => "🛅",
     "\\:left_right_arrow:" => "↔",
     "\\:leftwards_arrow_with_hook:" => "↩",
+    "\\:leftwards_hand:" => "🫲",
+    "\\:leftwards_pushing_hand:" => "🫷",
     "\\:leg:" => "🦵",
     "\\:lemon:" => "🍋",
     "\\:leo:" => "♌",
     "\\:leopard:" => "🐆",
     "\\:libra:" => "♎",
+    "\\:light_blue_heart:" => "🩵",
     "\\:light_rail:" => "🚈",
     "\\:link:" => "🔗",
     "\\:lion_face:" => "🦁",
@@ -650,10 +682,12 @@ const emoji_symbols = Dict(
     "\\:long_drum:" => "🪘",
     "\\:loop:" => "➿",
     "\\:lotion_bottle:" => "🧴",
+    "\\:lotus:" => "🪷",
     "\\:loud_sound:" => "🔊",
     "\\:loudspeaker:" => "📢",
     "\\:love_hotel:" => "🏩",
     "\\:love_letter:" => "💌",
+    "\\:low_battery:" => "🪫",
     "\\:low_brightness:" => "🔅",
     "\\:luggage:" => "🧳",
     "\\:lungs:" => "🫁",
@@ -679,6 +713,7 @@ const emoji_symbols = Dict(
     "\\:mans_shoe:" => "👞",
     "\\:manual_wheelchair:" => "🦽",
     "\\:maple_leaf:" => "🍁",
+    "\\:maracas:" => "🪇",
     "\\:martial_arts_uniform:" => "🥋",
     "\\:mask:" => "😷",
     "\\:massage:" => "💆",
@@ -688,6 +723,7 @@ const emoji_symbols = Dict(
     "\\:mechanical_leg:" => "🦿",
     "\\:mega:" => "📣",
     "\\:melon:" => "🍈",
+    "\\:melting_face:" => "🫠",
     "\\:memo:" => "📝",
     "\\:menorah_with_nine_branches:" => "🕎",
     "\\:mens:" => "🚹",
@@ -702,6 +738,7 @@ const emoji_symbols = Dict(
     "\\:minibus:" => "🚐",
     "\\:minidisc:" => "💽",
     "\\:mirror:" => "🪞",
+    "\\:mirror_ball:" => "🪩",
     "\\:mobile_phone_off:" => "📴",
     "\\:money_mouth_face:" => "🤑",
     "\\:money_with_wings:" => "💸",
@@ -711,6 +748,7 @@ const emoji_symbols = Dict(
     "\\:monorail:" => "🚝",
     "\\:moon:" => "🌔",
     "\\:moon_cake:" => "🥮",
+    "\\:moose:" => "🫎",
     "\\:mortar_board:" => "🎓",
     "\\:mosque:" => "🕌",
     "\\:mosquito:" => "🦟",
@@ -739,6 +777,7 @@ const emoji_symbols = Dict(
     "\\:necktie:" => "👔",
     "\\:negative_squared_cross_mark:" => "❎",
     "\\:nerd_face:" => "🤓",
+    "\\:nest_with_eggs:" => "🪺",
     "\\:nesting_dolls:" => "🪆",
     "\\:neutral_face:" => "😐",
     "\\:new:" => "🆕",
@@ -800,7 +839,9 @@ const emoji_symbols = Dict(
     "\\:page_facing_up:" => "📄",
     "\\:page_with_curl:" => "📃",
     "\\:pager:" => "📟",
+    "\\:palm_down_hand:" => "🫳",
     "\\:palm_tree:" => "🌴",
+    "\\:palm_up_hand:" => "🫴",
     "\\:palms_up_together:" => "🤲",
     "\\:pancakes:" => "🥞",
     "\\:panda_face:" => "🐼",
@@ -812,6 +853,7 @@ const emoji_symbols = Dict(
     "\\:partly_sunny:" => "⛅",
     "\\:partying_face:" => "🥳",
     "\\:passport_control:" => "🛂",
+    "\\:pea_pod:" => "🫛",
     "\\:peach:" => "🍑",
     "\\:peacock:" => "🦚",
     "\\:peanuts:" => "🥜",
@@ -829,6 +871,7 @@ const emoji_symbols = Dict(
     "\\:person_in_steamy_room:" => "🧖",
     "\\:person_in_tuxedo:" => "🤵",
     "\\:person_with_blond_hair:" => "👱",
+    "\\:person_with_crown:" => "🫅",
     "\\:person_with_headscarf:" => "🧕",
     "\\:person_with_pouting_face:" => "🙎",
     "\\:petri_dish:" => "🧫",
@@ -843,10 +886,12 @@ const emoji_symbols = Dict(
     "\\:pinched_fingers:" => "🤌",
     "\\:pinching_hand:" => "🤏",
     "\\:pineapple:" => "🍍",
+    "\\:pink_heart:" => "🩷",
     "\\:pisces:" => "♓",
     "\\:pizza:" => "🍕",
     "\\:placard:" => "🪧",
     "\\:place_of_worship:" => "🛐",
+    "\\:playground_slide:" => "🛝",
     "\\:pleading_face:" => "🥺",
     "\\:plunger:" => "🪠",
     "\\:point_down:" => "👇",
@@ -866,9 +911,12 @@ const emoji_symbols = Dict(
     "\\:pouch:" => "👝",
     "\\:poultry_leg:" => "🍗",
     "\\:pound:" => "💷",
+    "\\:pouring_liquid:" => "🫗",
     "\\:pouting_cat:" => "😾",
     "\\:pray:" => "🙏",
     "\\:prayer_beads:" => "📿",
+    "\\:pregnant_man:" => "🫃",
+    "\\:pregnant_person:" => "🫄",
     "\\:pregnant_woman:" => "🤰",
     "\\:pretzel:" => "🥨",
     "\\:prince:" => "🤴",
@@ -914,7 +962,10 @@ const emoji_symbols = Dict(
     "\\:rice_cracker:" => "🍘",
     "\\:rice_scene:" => "🎑",
     "\\:right-facing_fist:" => "🤜",
+    "\\:rightwards_hand:" => "🫱",
+    "\\:rightwards_pushing_hand:" => "🫸",
     "\\:ring:" => "💍",
+    "\\:ring_buoy:" => "🛟",
     "\\:ringed_planet:" => "🪐",
     "\\:robot_face:" => "🤖",
     "\\:rock:" => "🪨",
@@ -937,6 +988,7 @@ const emoji_symbols = Dict(
     "\\:sagittarius:" => "♐",
     "\\:sake:" => "🍶",
     "\\:salt:" => "🧂",
+    "\\:saluting_face:" => "🫡",
     "\\:sandal:" => "👡",
     "\\:sandwich:" => "🥪",
     "\\:santa:" => "🎅",
@@ -964,6 +1016,7 @@ const emoji_symbols = Dict(
     "\\:seedling:" => "🌱",
     "\\:selfie:" => "🤳",
     "\\:sewing_needle:" => "🪡",
+    "\\:shaking_face:" => "🫨",
     "\\:shallow_pan_of_food:" => "🥘",
     "\\:shark:" => "🦈",
     "\\:shaved_ice:" => "🍧",
@@ -1124,6 +1177,7 @@ const emoji_symbols = Dict(
     "\\:triangular_ruler:" => "📐",
     "\\:trident:" => "🔱",
     "\\:triumph:" => "😤",
+    "\\:troll:" => "🧌",
     "\\:trolleybus:" => "🚎",
     "\\:trophy:" => "🏆",
     "\\:tropical_drink:" => "🍹",
@@ -1188,6 +1242,7 @@ const emoji_symbols = Dict(
     "\\:wedding:" => "💒",
     "\\:whale2:" => "🐋",
     "\\:whale:" => "🐳",
+    "\\:wheel:" => "🛞",
     "\\:wheelchair:" => "♿",
     "\\:white_check_mark:" => "✅",
     "\\:white_circle:" => "⚪",
@@ -1202,7 +1257,9 @@ const emoji_symbols = Dict(
     "\\:wind_chime:" => "🎐",
     "\\:window:" => "🪟",
     "\\:wine_glass:" => "🍷",
+    "\\:wing:" => "🪽",
     "\\:wink:" => "😉",
+    "\\:wireless:" => "🛜",
     "\\:wolf:" => "🐺",
     "\\:woman:" => "👩",
     "\\:womans_clothes:" => "👚",
@@ -1215,6 +1272,7 @@ const emoji_symbols = Dict(
     "\\:worried:" => "😟",
     "\\:wrench:" => "🔧",
     "\\:wrestlers:" => "🤼",
+    "\\:x-ray:" => "🩻",
     "\\:x:" => "❌",
     "\\:yarn:" => "🧶",
     "\\:yawning_face:" => "🥱",
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 3c2be918d6bd2..9f5b7e3e864ed 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -4,7 +4,7 @@
 # This is used for tab substitution in the REPL.
 
 # The initial symbol listing was generated from the W3C symbol mapping file:
-#         http://www.w3.org/Math/characters/unicode.xml
+#         https://www.w3.org/Math/characters/unicode.xml
 # by the following Julia script:
 #=
 import REPL
@@ -119,6 +119,46 @@ const latex_symbols = Dict(
     "\\euler" => "ℯ",
     "\\ohm" => "Ω",
 
+    # Music Symbols
+    # Music Symbols - Accidentals
+    "\\flatflat" => "𝄫",
+    "\\sharpsharp" => "𝄪",
+    # Music Symbols - Codas
+    "\\leftrepeatsign" => "𝄆",
+    "\\rightrepeatsign" => "𝄇",
+    "\\dalsegno" => "𝄉",
+    "\\dacapo" => "𝄊",
+    "\\segno" => "𝄋",
+    "\\coda" => "𝄌",
+    # Music Symbols - Clefs
+    "\\clefg" => "𝄞",
+    "\\clefg8va" => "𝄟",
+    "\\clefg8vb" => "𝄠",
+    "\\clefc" => "𝄡",
+    "\\cleff" => "𝄢",
+    "\\cleff8va" => "𝄣",
+    "\\cleff8vb" => "𝄤",
+     # Music Symbols - Rests
+    "\\restmulti" => "𝄺",
+    "\\restwhole" => "𝄻",
+    "\\resthalf" => "𝄼",
+    "\\restquarter" => "𝄽",
+    "\\rest8th" => "𝄾",
+    "\\rest16th" => "𝄿",
+    "\\rest32th" => "𝅀",
+    "\\rest64th" => "𝅁",
+    "\\rest128th" => "𝅂",
+    # Music Symbols - Notes
+    "\\notedoublewhole" => "𝅜",
+    "\\notewhole" => "𝅝",
+    "\\notehalf" => "𝅗𝅥",
+    "\\notequarter" => "𝅘𝅥",
+    "\\note8th" => "𝅘𝅥𝅮",
+    "\\note16th" => "𝅘𝅥𝅯",
+    "\\note32th" => "𝅘𝅥𝅰",
+    "\\note64th" => "𝅘𝅥𝅱",
+    "\\note128th" => "𝅘𝅥𝅲",
+
     # Superscripts
     "\\^0" => "⁰",
     "\\^1" => "¹",
@@ -207,6 +247,8 @@ const latex_symbols = Dict(
     "\\_+" => "₊",
     "\\_-" => "₋",
     "\\_=" => "₌",
+    "\\_<" => "˱",
+    "\\_>" => "˲",
     "\\_(" => "₍",
     "\\_)" => "₎",
     "\\_a" => "ₐ",
@@ -432,8 +474,10 @@ const latex_symbols = Dict(
     "\\pertenthousand" => "‱",
     "\\prime" => "′",
     "\\backprime" => "‵",
-    "\\guilsinglleft" => "‹",
+    "\\guilsinglleft" => "‹", # note: \guil* quote names follow the LaTeX csquotes package
     "\\guilsinglright" => "›",
+    "\\guillemotleft" => "«",
+    "\\guillemotright" => "»",
     "\\nolinebreak" => "\u2060",
     "\\pes" => "₧",
     "\\dddot" => "⃛",
@@ -982,17 +1026,16 @@ const latex_symbols = Dict(
     "\\droang" => "̚",  # left angle above (non-spacing)
     "\\wideutilde" => "̰",  # under tilde accent (multiple characters and non-spacing)
     "\\not" => "̸",  # combining long solidus overlay
-    "\\upMu" => "Μ",  # capital mu, greek
-    "\\upNu" => "Ν",  # capital nu, greek
-    "\\upOmicron" => "Ο",  # capital omicron, greek
-    "\\upepsilon" => "ε",  # rounded small epsilon, greek
-    "\\upomicron" => "ο",  # small omicron, greek
-    "\\upvarbeta" => "ϐ",  # rounded small beta, greek
-    "\\upoldKoppa" => "Ϙ",  # greek letter archaic koppa
-    "\\upoldkoppa" => "ϙ",  # greek small letter archaic koppa
-    "\\upstigma" => "ϛ",  # greek small letter stigma
-    "\\upkoppa" => "ϟ",  # greek small letter koppa
-    "\\upsampi" => "ϡ",  # greek small letter sampi
+    "\\Mu" => "Μ",  # capital mu, greek
+    "\\Nu" => "Ν",  # capital nu, greek
+    "\\Omicron" => "Ο",  # capital omicron, greek
+    "\\omicron" => "ο",  # small omicron, greek
+    "\\varbeta" => "ϐ",  # rounded small beta, greek
+    "\\oldKoppa" => "Ϙ",  # greek letter archaic koppa
+    "\\oldkoppa" => "ϙ",  # greek small letter archaic koppa
+    "\\stigma" => "ϛ",  # greek small letter stigma
+    "\\koppa" => "ϟ",  # greek small letter koppa
+    "\\sampi" => "ϡ",  # greek small letter sampi
     "\\tieconcat" => "⁀",  # character tie, z notation sequence concatenation
     "\\leftharpoonaccent" => "⃐",  # combining left harpoon above
     "\\rightharpoonaccent" => "⃑",  # combining right harpoon above
@@ -2621,10 +2664,10 @@ const latex_symbols = Dict(
     "\\4/5" => "⅘", # vulgar fraction four fifths
     "\\1/6" => "⅙", # vulgar fraction one sixth
     "\\5/6" => "⅚", # vulgar fraction five sixths
-    "\\1/8" => "⅛", # vulgar fraction one eigth
-    "\\3/8" => "⅜", # vulgar fraction three eigths
-    "\\5/8" => "⅝", # vulgar fraction five eigths
-    "\\7/8" => "⅞", # vulgar fraction seventh eigths
+    "\\1/8" => "⅛", # vulgar fraction one eighth
+    "\\3/8" => "⅜", # vulgar fraction three eighths
+    "\\5/8" => "⅝", # vulgar fraction five eighths
+    "\\7/8" => "⅞", # vulgar fraction seventh eighths
     "\\1/" => "⅟", # fraction numerator one
     "\\0/3" => "↉", # vulgar fraction zero thirds
     "\\1/4" => "¼", # vulgar fraction one quarter
diff --git a/stdlib/REPL/src/options.jl b/stdlib/REPL/src/options.jl
index 3ce0ab6ff00dc..1fb2c654c7df2 100644
--- a/stdlib/REPL/src/options.jl
+++ b/stdlib/REPL/src/options.jl
@@ -27,6 +27,7 @@ mutable struct Options
     auto_indent_time_threshold::Float64
     # refresh after time delay
     auto_refresh_time_delay::Float64
+    hint_tab_completes::Bool
     # default IOContext settings at the REPL
     iocontext::Dict{Symbol,Any}
 end
@@ -47,6 +48,7 @@ Options(;
         auto_indent_bracketed_paste = false,
         auto_indent_time_threshold = 0.005,
         auto_refresh_time_delay = Sys.iswindows() ? 0.05 : 0.0,
+        hint_tab_completes = true,
         iocontext = Dict{Symbol,Any}()) =
             Options(hascolor, extra_keymap, tabwidth,
                     kill_ring_max, region_animation_duration,
@@ -55,6 +57,7 @@ Options(;
                     backspace_align, backspace_adjust, confirm_exit,
                     auto_indent, auto_indent_tmp_off, auto_indent_bracketed_paste,
                     auto_indent_time_threshold, auto_refresh_time_delay,
+                    hint_tab_completes,
                     iocontext)
 
 # for use by REPLs not having an options field
diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl
new file mode 100644
index 0000000000000..daa01f626aeab
--- /dev/null
+++ b/stdlib/REPL/src/precompile.jl
@@ -0,0 +1,202 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Precompile
+
+import ..REPL
+
+# Ugly hack for our cache file to not have a dependency edge on the FakePTYs file.
+Base._track_dependencies[] = false
+try
+    Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
+    import .FakePTYs: open_fake_pty
+finally
+    Base._track_dependencies[] = true
+end
+
+function repl_workload()
+    # these are intentionally triggered
+    allowed_errors = [
+        "BoundsError: attempt to access 0-element Vector{Any} at index [1]",
+        "MethodError: no method matching f(::$Int, ::$Int)",
+        "Padding of type", # reinterpret docstring has ERROR examples
+    ]
+    function check_errors(out)
+        str = String(out)
+        if occursin("ERROR:", str) && !any(occursin(e, str) for e in allowed_errors)
+            @error "Unexpected error (Review REPL precompilation with debug_output on):\n$str"
+            exit(1)
+        end
+    end
+    ## Debugging options
+    # View the code sent to the repl by setting this to `stdout`
+    debug_output = devnull # or stdout
+
+    CTRL_C = '\x03'
+    CTRL_D = '\x04'
+    CTRL_R = '\x12'
+    UP_ARROW = "\e[A"
+    DOWN_ARROW = "\e[B"
+
+    # This is notified as soon as the first prompt appears
+    repl_init_event = Base.Event()
+
+    atreplinit() do repl
+        # Main is closed so we can't evaluate in it, but atreplinit runs at
+        # a time that repl.mistate === nothing so REPL.activate fails. So do
+        # it async and wait for the first prompt to know its ready.
+        t = @async begin
+            wait(repl_init_event)
+            REPL.activate(REPL.Precompile; interactive_utils=false)
+        end
+        Base.errormonitor(t)
+    end
+
+    repl_script = """
+    2+2
+    print("")
+    printstyled("a", "b")
+    display([1])
+    display([1 2; 3 4])
+    foo(x) = 1
+    @time @eval foo(1)
+    ; pwd
+    $CTRL_C
+    $CTRL_R$CTRL_C#
+    ? reinterpret
+    using Ra\t$CTRL_C
+    \\alpha\t$CTRL_C
+    \e[200~paste here ;)\e[201~"$CTRL_C
+    $UP_ARROW$DOWN_ARROW$CTRL_C
+    123\b\b\b$CTRL_C
+    \b\b$CTRL_C
+    f(x) = x03
+    f(1,2)
+    [][1]
+    Base.Iterators.minimum
+    cd("complete_path\t\t$CTRL_C
+    println("done")
+    """
+
+    JULIA_PROMPT = "julia> "
+    PKG_PROMPT = "pkg> "
+    SHELL_PROMPT = "shell> "
+    HELP_PROMPT = "help?> "
+
+    blackhole = Sys.isunix() ? "/dev/null" : "nul"
+
+    withenv("JULIA_HISTORY" => blackhole,
+            "JULIA_PROJECT" => nothing, # remove from environment
+            "JULIA_LOAD_PATH" => "@stdlib",
+            "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+            "TERM" => "",
+            "JULIA_FALLBACK_REPL" => "0" # Make sure REPL.jl is turned on
+            ) do
+        rawpts, ptm = open_fake_pty()
+        pts = open(rawpts)::Base.TTY
+        if Sys.iswindows()
+            pts.ispty = false
+        else
+            # workaround libuv bug where it leaks pts
+            Base._fd(pts) == rawpts || Base.close_stdio(rawpts)
+        end
+        # Prepare a background process to copy output from `ptm` until `pts` is closed
+        output_copy = Base.BufferStream()
+        tee = @async try
+            while !eof(ptm)
+                l = readavailable(ptm)
+                write(debug_output, l)
+                write(output_copy, l)
+            end
+            write(debug_output, "\n#### EOF ####\n")
+        catch ex
+            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
+                rethrow() # ignore EIO on ptm after pts dies
+            end
+        finally
+            close(output_copy)
+            close(ptm)
+        end
+        Base.errormonitor(tee)
+        orig_stdin = stdin
+        orig_stdout = stdout
+        orig_stderr = stderr
+        repltask = @task try
+            Base.run_std_repl(REPL, false, :yes, true)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        Base.errormonitor(repltask)
+        try
+            Base.REPL_MODULE_REF[] = REPL
+            redirect_stdin(pts)
+            redirect_stdout(pts)
+            redirect_stderr(pts)
+            try
+                REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed
+            finally
+                redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull)
+            end
+            schedule(repltask)
+            # wait for the definitive prompt before start writing to the TTY
+            check_errors(readuntil(output_copy, JULIA_PROMPT))
+            write(debug_output, "\n#### REPL STARTED ####\n")
+            sleep(0.01)
+            check_errors(readavailable(output_copy))
+            # Input our script
+            precompile_lines = split(repl_script::String, '\n'; keepempty=false)
+            curr = 0
+            for l in precompile_lines
+                sleep(0.01) # try to let a bit of output accumulate before reading again
+                curr += 1
+                # consume any other output
+                bytesavailable(output_copy) > 0 && check_errors(readavailable(output_copy))
+                # push our input
+                write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
+                # If the line ends with a CTRL_C, don't write an extra newline, which would
+                # cause a second empty prompt. Our code below expects one new prompt per
+                # input line and can race out of sync with the unexpected second line.
+                endswith(l, CTRL_C) ? write(ptm, l) : write(ptm, l, "\n")
+                check_errors(readuntil(output_copy, "\n"))
+                # wait for the next prompt-like to appear
+                check_errors(readuntil(output_copy, "\n"))
+                strbuf = ""
+                while !eof(output_copy)
+                    strbuf *= String(readavailable(output_copy))
+                    occursin(JULIA_PROMPT, strbuf) && break
+                    occursin(PKG_PROMPT, strbuf) && break
+                    occursin(SHELL_PROMPT, strbuf) && break
+                    occursin(HELP_PROMPT, strbuf) && break
+                    sleep(0.01) # try to let a bit of output accumulate before reading again
+                end
+                notify(repl_init_event)
+                check_errors(strbuf)
+            end
+            write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")
+            write(ptm, "$CTRL_D")
+            wait(repltask)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        wait(tee)
+    end
+    write(debug_output, "\n#### FINISHED ####\n")
+    nothing
+end
+
+let
+    if Base.generating_output() && Base.JLOptions().use_pkgimages != 0
+        repl_workload()
+        precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int})
+        precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String})
+        precompile(Tuple{typeof(Base.:(==)), Char, String})
+        #for child in copy(Base.newly_inferred)
+        #    precompile((child::Base.CodeInstance).def)
+        #end
+    end
+end
+
+end # Precompile
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index c594958a36670..9455632d9f418 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -17,9 +17,9 @@ function simulate_input(menu::TerminalMenus.AbstractMenu, keys...; kwargs...)
             write(new_stdin, "$key")
         end
     end
-    TerminalMenus.terminal.in_stream = new_stdin
+    terminal = TerminalMenus.default_terminal(; in=new_stdin, out=devnull)
 
-    return request(menu; suppress_output=true, kwargs...)
+    return request(terminal, menu; suppress_output=true, kwargs...)
 end
 
 include("radio_menu.jl")
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 22701ead7883d..02f1dc8238f04 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -4,21 +4,17 @@ using Test
 import REPL, REPL.REPLCompletions
 import Markdown
 
-@testset "symbol completion" begin
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "α"))
-            String(take!(buf))
-        end, "\"α\" can be typed by \\alpha<tab>\n")
-
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "🐨"))
-            String(take!(buf))
-        end, "\"🐨\" can be typed by \\:koala:<tab>\n")
+function get_help_io(input, mod=Main)
+    buf = IOBuffer()
+    eval(REPL.helpmode(buf, input, mod))
+    String(take!(buf))
+end
+get_help_standard(input) = string(eval(REPL.helpmode(IOBuffer(), input)))
 
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "ᵞ₁₂₃¹²³α"))
-            String(take!(buf))
-        end, "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
+@testset "symbol completion" begin
+    @test startswith(get_help_io("α"), "\"α\" can be typed by \\alpha<tab>\n")
+    @test startswith(get_help_io("🐨"), "\"🐨\" can be typed by \\:koala:<tab>\n")
+    @test startswith(get_help_io("ᵞ₁₂₃¹²³α"), "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
 
     # Check that all symbols with several completions have a canonical mapping (#39148)
     symbols = values(REPLCompletions.latex_symbols)
@@ -27,19 +23,14 @@ import Markdown
 end
 
 @testset "quoting in doc search" begin
-    str = let buf = IOBuffer()
-        Core.eval(Main, REPL.helpmode(buf, "mutable s"))
-        String(take!(buf))
-    end
+    str = get_help_io("mutable s")
     @test occursin("'mutable struct'", str)
     @test occursin("Couldn't find 'mutable s'", str)
 end
 
-@testset "Non-Markdown" begin
-    # https://github.com/JuliaLang/julia/issues/37765
-    @test isa(REPL.insert_hlines(IOBuffer(), Markdown.Text("foo")), Markdown.Text)
-    # https://github.com/JuliaLang/julia/issues/37757
-    @test REPL.insert_hlines(IOBuffer(), nothing) === nothing
+@testset "non-loaded packages in doc search" begin
+    str = get_help_io("Profile")
+    @test occursin("Couldn't find Profile, but a loadable package with that name exists.", str)
 end
 
 @testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
@@ -47,13 +38,29 @@ end
     symbols = "@" .* checks .* "_str"
     results = checks .* "\"\""
     for (i,r) in zip(symbols,results)
-        @test r ∈ REPL.doc_completions(i)
+        @test r ∈ string.(REPL.doc_completions(i))
     end
 end
 @testset "fuzzy score" begin
     # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
     # shouldn't throw when there is a space in a middle of query
     @test (REPL.matchinds("a ", "a file.txt"); true)
+    @test isapprox(REPL.fuzzyscore("abcdef", ""), 0.0; atol=0.001)
+    @test 0.8 < REPL.fuzzyscore(
+    "supercalifragilisticexpialidocious",
+    "bupercalifragilisticexpialidocious"
+    ) < 1.0
+
+    # Unicode
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "αkδm") > 0.0
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "α") > 0.0
+
+    exact_match_export = REPL.fuzzyscore("thing", REPL.AccessibleBinding(:thing))
+    exact_match_public = REPL.fuzzyscore("thing", REPL.AccessibleBinding("A", "thing"))
+    inexact_match_export = REPL.fuzzyscore("thing", REPL.AccessibleBinding(:thang))
+    inexact_match_public = REPL.fuzzyscore("thing", REPL.AccessibleBinding("A", "thang"))
+    @test exact_match_export > exact_match_public > inexact_match_export > inexact_match_public
+    @test exact_match_export ≈ 1.0
 end
 
 @testset "Unicode doc lookup (#41589)" begin
@@ -65,3 +72,94 @@ end
     b = REPL.Binding(@__MODULE__, :R)
     @test REPL.summarize(b, Tuple{}) isa Markdown.MD
 end
+
+@testset "Struct field help (#51178)" begin
+    struct StructWithNoFields end
+    struct StructWithOneField
+        field1
+    end
+    struct StructWithTwoFields
+        field1
+        field2
+    end
+    struct StructWithThreeFields
+        field1
+        field2
+        field3
+    end
+
+    @test endswith(get_help_standard("StructWithNoFields.not_a_field"), "StructWithNoFields` has no fields.\n")
+    @test endswith(get_help_standard("StructWithOneField.not_a_field"), "StructWithOneField` has field `field1`.\n")
+    @test endswith(get_help_standard("StructWithTwoFields.not_a_field"), "StructWithTwoFields` has fields `field1`, and `field2`.\n")
+    @test endswith(get_help_standard("StructWithThreeFields.not_a_field"), "StructWithThreeFields` has fields `field1`, `field2`, and `field3`.\n")
+end
+
+module InternalWarningsTests
+
+    module A
+        public B, B3
+        module B
+            public e
+            c = 4
+            "d is 5"
+            d = 5
+            "e is 6"
+            e = 6
+        end
+
+        module B2
+            module C
+                public e
+                d = 1
+                "e is 2"
+                e = 2
+            end
+        end
+
+        module B3 end
+    end
+
+    using Test, REPL
+    @testset "internal warnings" begin
+        header = "!!! warning\n    The following bindings may be internal; they may change or be removed in future versions:\n\n"
+        prefix(warnings) = header * join("      * `$(@__MODULE__).$w`\n" for w in warnings) * "\n\n"
+        docstring(input) = string(eval(REPL.helpmode(IOBuffer(), input, @__MODULE__)))
+
+        @test docstring("A") == "No docstring or readme file found for internal module `$(@__MODULE__).A`.\n\n# Public names\n\n`B`, `B3`\n"
+        @test docstring("A.B") == "No docstring or readme file found for public module `$(@__MODULE__).A.B`.\n\n# Public names\n\n`e`\n"
+        @test startswith(docstring("A.B.c"), prefix(["A.B.c"]))
+        @test startswith(docstring("A.B.d"), prefix(["A.B.d"]))
+        @test docstring("A.B.e") == "e is 6\n"
+        @test startswith(docstring("A.B2"), prefix(["A.B2"]))
+        @test startswith(docstring("A.B2.C"), prefix(["A.B2", "A.B2.C"]))
+        @test startswith(docstring("A.B2.C.d"), prefix(["A.B2", "A.B2.C", "A.B2.C.d"]))
+        @test startswith(docstring("A.B2.C.e"), prefix(["A.B2", "A.B2.C"]))
+        @test docstring("A.B3") == "No docstring or readme file found for public module `$(@__MODULE__).A.B3`.\n\nModule does not have any public names.\n"
+    end
+end
+
+# Issue #51344, don't print "internal binding" warning for non-existent bindings.
+@test string(eval(REPL.helpmode("Base.no_such_symbol"))) == "No documentation found.\n\nBinding `Base.no_such_symbol` does not exist.\n"
+
+module TestSuggestPublic
+    export dingo
+    public dango
+    dingo(x) = x + 1
+    dango(x) = x = 2
+end
+using .TestSuggestPublic
+helplines(s) = map(strip, split(get_help_io(s, @__MODULE__), '\n'; keepempty=false))
+@testset "search lists public names" begin
+    lines = helplines("dango")
+    # Ensure that public names that exactly match the search query are listed first
+    # even if they aren't exported, as long as no exact exported/local match exists
+    @test startswith(lines[1], "search: TestSuggestPublic.dango dingo")
+    @test lines[2] == "Couldn't find dango"  # 🙈🍡
+    @test startswith(lines[3], "Perhaps you meant TestSuggestPublic.dango, dingo")
+end
+dango() = "🍡"
+@testset "search prioritizes exported names" begin
+    # Prioritize exported/local names if they exactly match
+    lines = helplines("dango")
+    @test startswith(lines[1], "search: dango TestSuggestPublic.dango dingo")
+end
diff --git a/stdlib/REPL/test/precompilation.jl b/stdlib/REPL/test/precompilation.jl
new file mode 100644
index 0000000000000..7efcf0b5e8282
--- /dev/null
+++ b/stdlib/REPL/test/precompilation.jl
@@ -0,0 +1,55 @@
+
+## Tests that compilation in the interactive session startup are as expected
+
+using Test
+Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
+import .FakePTYs: open_fake_pty
+
+if !Sys.iswindows()
+    # TODO: reenable this on Windows. Without it we're not checking that Windows startup has no compilation.
+    # On Windows CI runners using `open_fake_pty` is causing:
+    # ----
+    # `stty: 'standard input': Inappropriate ioctl for device
+    # Unhandled Task ERROR: failed process: Process(`stty raw -echo onlcr -ocrnl opost`, ProcessExited(1)) [1]
+    # ----
+    @testset "No interactive startup compilation" begin
+        f, _ = mktemp()
+
+        # start an interactive session, ensuring `TERM` is unset since it can trigger
+        # different amounts of precompilation stemming from `base/terminfo.jl` depending
+        # on the value, making the test here unreliable
+        cmd = addenv(`$(Base.julia_cmd()[1]) --trace-compile=$f -q --startup-file=no -i`,
+                     Dict("TERM" => ""))
+        pts, ptm = open_fake_pty()
+        p = run(cmd, pts, pts, pts; wait=false)
+        Base.close_stdio(pts)
+        std = readuntil(ptm, "julia>")
+        # check for newlines instead of equality with "julia>" because color may be on
+        occursin("\n", std) && @info "There was output before the julia prompt:\n$std"
+        sleep(1) # sometimes precompiles output just after prompt appears
+        tracecompile_out = read(f, String)
+        close(ptm) # close after reading so we don't get precompiles from error shutdown
+
+        # given this test checks that startup is snappy, it's best to add workloads to
+        # contrib/generate_precompile.jl rather than increase this number. But if that's not
+        # possible, it'd be helpful to add a comment with the statement and a reason below
+        expected_precompiles = 0
+
+        n_precompiles = count(r"precompile\(", tracecompile_out)
+
+        @test n_precompiles <= expected_precompiles
+
+        if n_precompiles == 0
+            @debug "REPL: trace compile output: (none)"
+        elseif n_precompiles > expected_precompiles
+            @info "REPL: trace compile output:\n$tracecompile_out"
+        else
+            @debug "REPL: trace compile output:\n$tracecompile_out"
+        end
+        # inform if lowered
+        if expected_precompiles > 0 && (n_precompiles < expected_precompiles)
+            @info "REPL: Actual number of precompiles has dropped below expected." n_precompiles expected_precompiles
+        end
+
+    end
+end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 8a6c6a3445e0a..809913502c3d7 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -3,9 +3,14 @@
 using Test
 using REPL
 using Random
+using Logging
 import REPL.LineEdit
 using Markdown
 
+empty!(Base.Experimental._hint_handlers) # unregister error hints so they can be tested separately
+
+@test Base.REPL_MODULE_REF[] === REPL
+
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
 import .Main.FakePTYs: with_fake_pty
@@ -16,7 +21,6 @@ include(joinpath(BASE_TEST_PATH, "testenv.jl"))
 include("FakeTerminals.jl")
 import .FakeTerminals.FakeTerminal
 
-
 function kill_timer(delay)
     # Give ourselves a generous timer here, just to prevent
     # this causing e.g. a CI hang when there's something unexpected in the output.
@@ -111,7 +115,7 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-# These are integration tests. If you want to unit test test e.g. completion, or
+# These are integration tests. If you want to unit test e.g. completion, or
 # exact LineEdit behavior, put them in the appropriate test files.
 # Furthermore since we are emulating an entire terminal, there may be control characters
 # in the mix. If verification needs to be done, keep it to the bare minimum. Basically
@@ -240,8 +244,9 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
         s = readuntil(stdout_read, "\n\n")
-        @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
-              startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
+        @test(startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
+            startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] "),
+            skip = Sys.iswindows() && Sys.WORD_SIZE == 32)
         write(stdin_write, "\b")
         wait(t)
     end
@@ -497,8 +502,9 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
-        prefix_mode = repl.interface.modes[5]
+        pkg_mode = repl.interface.modes[4]
+        histp = repl.interface.modes[5]
+        prefix_mode = repl.interface.modes[6]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
                                                        :shell => shell_mode,
@@ -1164,7 +1170,7 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line))
+help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line, mod))
 
 # Docs.helpmode tests: we test whether the correct expressions are being generated here,
 # rather than complete integration with Julia's REPL mode system.
@@ -1211,9 +1217,9 @@ global some_undef_global
 @test occursin("does not exist", sprint(show, help_result("..")))
 # test that helpmode is sensitive to contextual module
 @test occursin("No documentation found", sprint(show, help_result("Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Base.Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Fix2", Base)))
 
 
@@ -1249,6 +1255,7 @@ let emptyH1 = Markdown.parse("# "),
 end
 
 module BriefExtended
+public f, f_plain
 """
     f()
 
@@ -1395,6 +1402,126 @@ end
     Base.wait(backend.backend_task)
 end
 
+# Mimic of JSON.jl's structure
+module JSON54872
+
+module Parser
+export parse
+function parse end
+end # Parser
+
+using .Parser: parse
+end # JSON54872
+
+# Test the public mechanism
+module JSON54872_public
+public tryparse
+end # JSON54872_public
+
+@testset "warn_on_non_owning_accesses AST transform" begin
+    @test REPL.has_ancestor(JSON54872.Parser, JSON54872)
+    @test !REPL.has_ancestor(JSON54872, JSON54872.Parser)
+
+    # JSON54872.Parser owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.parse
+    end)
+    @test isempty(warnings)
+
+    # A submodule of `JSON54872` owns `parse`
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.parse
+    end)
+    @test isempty(warnings)
+
+    # `JSON54872` does not own `tryparse` (nor is it public)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    # Same for nested access
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872.Parser.tryparse
+    end)
+    @test length(warnings) == 1
+    @test only(warnings).owner == Base
+    @test only(warnings).name_being_accessed == :tryparse
+
+    test_logger = TestLogger()
+    with_logger(test_logger) do
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+        REPL.warn_on_non_owning_accesses(@__MODULE__, :(JSON54872.tryparse))
+    end
+    # only 1 logging statement emitted thanks to `maxlog` mechanism
+    @test length(test_logger.logs) == 1
+    record = only(test_logger.logs)
+    @test record.level == Warn
+    @test record.message == "tryparse is defined in Base and is not public in $JSON54872"
+
+    # However JSON54872_public has `tryparse` declared public
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        JSON54872_public.tryparse
+    end)
+    @test isempty(warnings)
+
+    # Now let us test some tricky cases
+    # No warning since `JSON54872` is local (LHS of `=`)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; tryparse=1)
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning for nested local access either
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = (; Parser = (; tryparse=1))
+            JSON54872.Parser.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function f(JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form function arg)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        f(JSON54872=(; tryparse)) = JSON54872.tryparse
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (long-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        function (JSON54872=(; tryparse))
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # No warning since `JSON54872` is local (short-form anonymous function)
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        (JSON54872 = (; tryparse)) -> begin
+            JSON54872.tryparse
+        end
+    end)
+    @test isempty(warnings)
+
+    # false-negative: missing warning
+    warnings = REPL.collect_qualified_access_warnings(@__MODULE__, quote
+        let JSON54872 = JSON54872
+            JSON54872.tryparse
+        end
+    end)
+    @test_broken !isempty(warnings)
+end
 
 backend = REPL.REPLBackend()
 frontend_task = @async begin
@@ -1473,8 +1600,42 @@ end
         @test isempty(mods)
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; Core.eval(Main,\"using Foo\") end"))
         @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(:(import .Foo: a))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(:(using .Foo: a))
+        @test isempty(mods)
+    end
+end
+
+# Test that the REPL can find `using` statements inside macro expansions
+global packages_requested = Any[]
+old_hooks = copy(REPL.install_packages_hooks)
+empty!(REPL.install_packages_hooks)
+push!(REPL.install_packages_hooks, function(pkgs)
+    append!(packages_requested, pkgs)
+end)
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
     end
+
+    # Just consume all the output - we only test that the callback ran
+    read_resp_task = @async while !eof(stdout_read)
+        readavailable(stdout_read)
+    end
+
+    write(stdin_write, "macro usingfoo(); :(using FooNotFound); end\n")
+    write(stdin_write, "@usingfoo\n")
+    write(stdin_write, "\x4")
+    Base.wait(repltask)
+    close(stdin_write)
+    close(stdout_read)
+    Base.wait(read_resp_task)
 end
+@test packages_requested == Any[:FooNotFound]
+empty!(REPL.install_packages_hooks); append!(REPL.install_packages_hooks, old_hooks)
 
 # err should reprint error if deeper than top-level
 fake_repl() do stdin_write, stdout_read, repl
@@ -1489,13 +1650,13 @@ fake_repl() do stdin_write, stdout_read, repl
     # generate top-level error
     write(stdin_write, "foobar\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
-    @test readline(stdout_read) == ""
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
+    @test readline(stdout_read) == "" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m"
+    @test readline(stdout_read) == "\e[0m" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
@@ -1504,13 +1665,13 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "foo()\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
     readuntil(stdout_read, "julia> ", keep=true)
     # check that deeper error did set `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
-    @test readline(stdout_read) == "UndefVarError: `foobar` not defined"
+    @test readline(stdout_read) == "UndefVarError: `foobar` not defined in `Main`"
     @test readline(stdout_read) == "Stacktrace:"
     readuntil(stdout_read, "\n\n", keep=true)
     readuntil(stdout_read, "julia> ", keep=true)
@@ -1555,8 +1716,9 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
         help_mode = repl.interface.modes[3]
-        histp = repl.interface.modes[4]
-        prefix_mode = repl.interface.modes[5]
+        pkg_mode = repl.interface.modes[4]
+        histp = repl.interface.modes[5]
+        prefix_mode = repl.interface.modes[6]
 
         hp = REPL.REPLHistoryProvider(Dict{Symbol,Any}(:julia => repl_mode,
                                                        :shell => shell_mode,
@@ -1652,6 +1814,223 @@ fake_repl() do stdin_write, stdout_read, repl
     @test !contains(s, "ERROR")
     @test contains(s, "Test Passed")
 
+    # Test for https://github.com/JuliaLang/julia/issues/49319
+    s = sendrepl2("# comment", "In [16]")
+    @test !contains(s, "ERROR")
+
+    write(stdin_write, '\x04')
+    Base.wait(repltask)
+end
+
+fake_repl() do stdin_write, stdout_read, repl
+    backend = REPL.REPLBackend()
+    repltask = @async REPL.run_repl(repl; backend)
+    write(stdin_write,
+          "a = UInt8(81):UInt8(160); b = view(a, 1:64); c = reshape(b, (8, 8)); d = reinterpret(reshape, Float64, c); sqrteach(a) = [sqrt(x) for x in a]; sqrteach(d)\n\"ZZZZZ\"\n")
+    txt = readuntil(stdout_read, "ZZZZZ")
     write(stdin_write, '\x04')
+    wait(repltask)
+    @test contains(txt, "Some type information was truncated. Use `show(err)` to see complete types.")
+end
+
+try # test the functionality of `UndefVarError_hint` against `Base.remove_linenums!`
+    @assert isempty(Base.Experimental._hint_handlers)
+    Base.Experimental.register_error_hint(REPL.UndefVarError_hint, UndefVarError)
+
+    # check the requirement to trigger the hint via `UndefVarError_hint`
+    @test !isdefined(Main, :remove_linenums!) && Base.ispublic(Base, :remove_linenums!)
+
+    fake_repl() do stdin_write, stdout_read, repl
+        backend = REPL.REPLBackend()
+        repltask = @async REPL.run_repl(repl; backend)
+        write(stdin_write,
+              "remove_linenums!\n\"ZZZZZ\"\n")
+        txt = readuntil(stdout_read, "ZZZZZ")
+        write(stdin_write, '\x04')
+        wait(repltask)
+        @test occursin("Hint: a global variable of this name also exists in Base.", txt)
+    end
+finally
+    empty!(Base.Experimental._hint_handlers)
+end
+
+try # test the functionality of `UndefVarError_hint` against import clashes
+    @assert isempty(Base.Experimental._hint_handlers)
+    Base.Experimental.register_error_hint(REPL.UndefVarError_hint, UndefVarError)
+
+    @eval module X
+
+    module A
+    export x
+    x = 1
+    end # A
+
+    module B
+    export x
+    x = 2
+    end # B
+
+    using .A, .B
+
+    end # X
+
+    expected_message = string("\nHint: It looks like two or more modules export different ",
+                              "bindings with this name, resulting in ambiguity. Try explicitly ",
+                              "importing it from a particular module, or qualifying the name ",
+                              "with the module it should come from.")
+    @test_throws expected_message X.x
+finally
+    empty!(Base.Experimental._hint_handlers)
+end
+
+# Hints for tab completes
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    s2 = readuntil(stdout_read, "vailable") # partial hint
+
+    write(stdin_write, "x") # "readax" doesn't tab complete so no hint
+    # we can't use readuntil given this doesn't print, so just wait for the hint state to be reset
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\b") # only tab complete while typing forward
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "v")
+    s3 = readuntil(stdout_read, "ailable") # partial hint
+
+    write(stdin_write, "\t")
+    s4 = readuntil(stdout_read, "readavailable") # full completion is reprinted
+
+    write(stdin_write, "\x15")
+    write(stdin_write, "x") # single chars shouldn't hint e.g. `x` shouldn't hint at `xor`
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    # issue #52376
+    write(stdin_write, "\x15")
+    write(stdin_write, "\\_ailuj")
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+    s5 = readuntil(stdout_read, "\\_ailuj")
+    write(stdin_write, "\t")
+    s6 = readuntil(stdout_read, "ₐᵢₗᵤⱼ")
+
+    write(stdin_write, "\x15\x04")
+    Base.wait(repltask)
+end
+## hints disabled
+fake_repl(options=REPL.Options(confirm_exit=false,hascolor=true,hint_tab_completes=false)) do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\x15\x04")
     Base.wait(repltask)
+    @test !occursin("vailable", String(readavailable(stdout_read)))
+end
+
+# banner
+let io = IOBuffer()
+    @test REPL.banner(io) === nothing
+    seek(io, 0)
+    @test countlines(io) == 9
+    take!(io)
+    @test REPL.banner(io; short=true) === nothing
+    seek(io, 0)
+    @test countlines(io) == 2
+end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(REPL)
+    @test_broken isempty(undoc)
+    @test undoc == [:AbstractREPL, :BasicREPL, :LineEditREPL, :StreamREPL]
+end
+
+struct A40735
+    str::String
+end
+
+# https://github.com/JuliaLang/julia/issues/40735
+@testset "Long printing" begin
+    previous = REPL.SHOW_MAXIMUM_BYTES
+    try
+        REPL.SHOW_MAXIMUM_BYTES = 1000
+        str = string(('a':'z')...)^50
+        @test length(str) > 1100
+        # For a raw string, we correctly get the standard abbreviated output
+        output = sprint(REPL.show_limited, MIME"text/plain"(), str; context=:limit => true)
+        hint = """call `show(stdout, MIME"text/plain"(), ans)` to print without truncation"""
+        suffix = "[printing stopped after displaying 1000 bytes; $hint]"
+        @test !endswith(output, suffix)
+        @test contains(output, "bytes ⋯")
+        # For a struct without a custom `show` method, we don't hit the abbreviated
+        # 3-arg show on the inner string, so here we check that the REPL print-limiting
+        # feature is correctly kicking in.
+        a = A40735(str)
+        output = sprint(REPL.show_limited, MIME"text/plain"(), a; context=:limit => true)
+        @test endswith(output, suffix)
+        @test length(output) <= 1200
+        # We also check some extreme cases
+        REPL.SHOW_MAXIMUM_BYTES = 1
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 1)
+        @test output == "1"
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 12)
+        @test output == "1…[printing stopped after displaying 1 byte; $hint]"
+        REPL.SHOW_MAXIMUM_BYTES = 0
+        output = sprint(REPL.show_limited, MIME"text/plain"(), 1)
+        @test output == "…[printing stopped after displaying 0 bytes; $hint]"
+        @test sprint(io -> show(REPL.LimitIO(io, 5), "abc")) == "\"abc\""
+        @test_throws REPL.LimitIOException(1) sprint(io -> show(REPL.LimitIO(io, 1), "abc"))
+    finally
+        REPL.SHOW_MAXIMUM_BYTES = previous
+    end
+end
+
+@testset "Dummy Pkg prompt" begin
+    # do this in an empty depot to test default for new users
+    withenv("JULIA_DEPOT_PATH" => mktempdir() * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.Pkg_promptf())"`)
+        @test prompt == "(@v$(VERSION.major).$(VERSION.minor)) pkg> "
+    end
+
+    # Issue 55850
+    tmp_55850 = mktempdir()
+    tmp_sym_link = joinpath(tmp_55850, "sym")
+    symlink(tmp_55850, tmp_sym_link; dir_target=true)
+    withenv("JULIA_DEPOT_PATH" => tmp_sym_link * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.projname(REPL.find_project_file()))"`)
+        @test prompt == "@v$(VERSION.major).$(VERSION.minor)"
+    end
+
+    get_prompt(proj::String) = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no $(proj) -e "using REPL; print(REPL.Pkg_promptf())"`)
+
+    @test get_prompt("--project=$(pkgdir(REPL))") == "(REPL) pkg> "
+
+    tdir = mkpath(joinpath(mktempdir(), "foo"))
+    @test get_prompt("--project=$tdir") == "(foo) pkg> "
+
+    proj_file = joinpath(tdir, "Project.toml")
+    touch(proj_file) # make a bad Project.toml
+    @test get_prompt("--project=$proj_file") == "(foo) pkg> "
+
+    write(proj_file, "name = \"Bar\"\n")
+    @test get_prompt("--project=$proj_file") == "(Bar) pkg> "
 end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index b2199e10bef55..2c8d48cc232cf 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -4,143 +4,165 @@ using REPL.REPLCompletions
 using Test
 using Random
 using REPL
-    @testset "Check symbols previously not shown by REPL.doc_completions()" begin
+
+@testset "Check symbols previously not shown by REPL.doc_completions()" begin
     symbols = ["?","=","[]","[","]","{}","{","}",";","","'","&&","||","julia","Julia","new","@var_str"]
-        for i in symbols
-            @test i ∈ REPL.doc_completions(i, Main)
-        end
+    for i in symbols
+        @test i ∈ string.(REPL.doc_completions(i, Main))
     end
-let ex = quote
-    module CompletionFoo
-        using Random
-        import Test
+end
 
-        mutable struct Test_y
-            yy
-        end
-        mutable struct Test_x
-            xx :: Test_y
-        end
-        type_test = Test_x(Test_y(1))
-        (::Test_y)() = "", ""
-        module CompletionFoo2
+let ex =
+    quote
+        module CompletionFoo
+            using Random
+            import Test
 
-        end
-        const bar = 1
-        foo() = bar
-        macro foobar()
-            :()
-        end
-        macro barfoo(ex)
-            ex
-        end
-        macro error_expanding()
-            error("cannot expand @error_expanding")
-            :()
-        end
-        macro error_lowering_conditional(a)
-            if isa(a, Number)
-                return a
+            mutable struct Test_y
+                yy
             end
-            throw(AssertionError("Not a Number"))
-            :()
-        end
-        macro error_throwing()
-            return quote
-                error("@error_throwing throws an error")
+            mutable struct Test_x
+                xx :: Test_y
             end
-        end
-
-        primitive type NonStruct 8 end
-        Base.propertynames(::NonStruct) = (:a, :b, :c)
-        x = reinterpret(NonStruct, 0x00)
-
-        # Support non-Dict AbstractDicts, #19441
-        mutable struct CustomDict{K, V} <: AbstractDict{K, V}
-            mydict::Dict{K, V}
-        end
-
-        Base.keys(d::CustomDict) = collect(keys(d.mydict))
-        Base.length(d::CustomDict) = length(d.mydict)
-
-        test(x::T, y::T) where {T<:Real} = pass
-        test(x::Real, y::Real) = pass
-        test(x::AbstractArray{T}, y) where {T<:Real} = pass
-        test(args...) = pass
-
-        test1(x::Type{Float64}) = pass
+            type_test = Test_x(Test_y(1))
+            (::Test_y)() = "", ""
+            unicode_αβγ = Test_y(1)
 
-        test2(x::AbstractString) = pass
-        test2(x::Char) = pass
-        test2(x::Cmd) = pass
+            Base.:(+)(x::Test_x, y::Test_y) = Test_x(Test_y(x.xx.yy + y.yy))
+            module CompletionFoo2
 
-        test3(x::AbstractArray{Int}, y::Int) = pass
-        test3(x::AbstractArray{Float64}, y::Float64) = pass
-
-        test4(x::AbstractString, y::AbstractString) = pass
-        test4(x::AbstractString, y::Regex) = pass
-
-        test5(x::Array{Bool,1}) = pass
-        test5(x::BitArray{1}) = pass
-        test5(x::Float64) = pass
-        const a=x->x
-        test6()=[a, a]
-        test7() = rand(Bool) ? 1 : 1.0
-        test8() = Any[1][1]
-        test9(x::Char) = pass
-        test9(x::Char, i::Int) = pass
-
-        test10(a, x::Int...) = pass
-        test10(a::Integer, b::Integer, c) = pass
-        test10(a, y::Bool...) = pass
-        test10(a, d::Integer, z::Signed...) = pass
-        test10(s::String...) = pass
-
-        test11(a::Integer, b, c) = pass
-        test11(u, v::Integer, w) = pass
-        test11(x::Int, y::Int, z) = pass
-        test11(_, _, s::String) = pass
-
-        test!12() = pass
-
-        kwtest(; x=1, y=2, w...) = pass
-        kwtest2(a; x=1, y=2, w...) = pass
-        kwtest3(a::Number; length, len2, foobar, kwargs...) = pass
-        kwtest3(a::Real; another!kwarg, len2) = pass
-        kwtest3(a::Integer; namedarg, foobar, slurp...) = pass
-        kwtest4(a::AbstractString; _a1b, x23) = pass
-        kwtest4(a::String; _a1b, xαβγ) = pass
-        kwtest4(a::SubString; x23, _something) = pass
-        kwtest5(a::Int, b, x...; somekwarg, somekotherkwarg) = pass
-        kwtest5(a::Char, b; xyz) = pass
-
-        const named = (; len2=3)
-
-        array = [1, 1]
-        varfloat = 0.1
-
-        const tuple = (1, 2)
+            end
+            const bar = 1
+            foo() = bar
+            macro foobar()
+                :()
+            end
+            macro barfoo(ex)
+                ex
+            end
+            macro error_expanding()
+                error("cannot expand @error_expanding")
+                :()
+            end
+            macro error_lowering_conditional(a)
+                if isa(a, Number)
+                    return a
+                end
+                throw(AssertionError("Not a Number"))
+                :()
+            end
+            macro error_throwing()
+                return quote
+                    error("@error_throwing throws an error")
+                end
+            end
 
-        test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
-        test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
-                         occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
-                         "α"=>12, :α=>13)
-        test_customdict = CustomDict(test_dict)
+            primitive type NonStruct 8 end
+            Base.propertynames(::NonStruct) = (:a, :b, :c)
+            x = reinterpret(NonStruct, 0x00)
 
-        macro teststr_str(s) end
-        macro tϵsτstρ_str(s) end
-        macro testcmd_cmd(s) end
-        macro tϵsτcmδ_cmd(s) end
+            # Support non-Dict AbstractDicts, #19441
+            mutable struct CustomDict{K, V} <: AbstractDict{K, V}
+                mydict::Dict{K, V}
+            end
 
-        var"complicated symbol with spaces" = 5
+            Base.keys(d::CustomDict) = collect(keys(d.mydict))
+            Base.length(d::CustomDict) = length(d.mydict)
 
-        struct WeirdNames end
-        Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+            # Support AbstractDict with unknown length, #55931
+            struct NoLengthDict{K,V} <: AbstractDict{K,V}
+                dict::Dict{K,V}
+                NoLengthDict{K,V}() where {K,V} = new(Dict{K,V}())
+            end
+            Base.iterate(d::NoLengthDict, s...) = iterate(d.dict, s...)
+            Base.IteratorSize(::Type{<:NoLengthDict}) = Base.SizeUnknown()
+            Base.eltype(::Type{NoLengthDict{K,V}}) where {K,V} = Pair{K,V}
+            Base.setindex!(d::NoLengthDict, v, k) = d.dict[k] = v
+
+            test(x::T, y::T) where {T<:Real} = pass
+            test(x::Real, y::Real) = pass
+            test(x::AbstractArray{T}, y) where {T<:Real} = pass
+            test(args...) = pass
+
+            test1(x::Type{Float64}) = pass
+
+            test2(x::AbstractString) = pass
+            test2(x::Char) = pass
+            test2(x::Cmd) = pass
+
+            test3(x::AbstractArray{Int}, y::Int) = pass
+            test3(x::AbstractArray{Float64}, y::Float64) = pass
+
+            test4(x::AbstractString, y::AbstractString) = pass
+            test4(x::AbstractString, y::Regex) = pass
+
+            test5(x::Array{Bool,1}) = pass
+            test5(x::BitArray{1}) = pass
+            test5(x::Float64) = pass
+            const a=x->x
+            test6()=[a, a]
+            test7() = rand(Bool) ? 1 : 1.0
+            test8() = Any[1][1]
+            test9(x::Char) = pass
+            test9(x::Char, i::Int) = pass
+
+            test10(a, x::Int...) = pass
+            test10(a::Integer, b::Integer, c) = pass
+            test10(a, y::Bool...) = pass
+            test10(a, d::Integer, z::Signed...) = pass
+            test10(s::String...) = pass
+
+            test11(a::Integer, b, c) = pass
+            test11(u, v::Integer, w) = pass
+            test11(x::Int, y::Int, z) = pass
+            test11(_, _, s::String) = pass
+
+            test!12() = pass
+
+            kwtest(; x=1, y=2, w...) = pass
+            kwtest2(a; x=1, y=2, w...) = pass
+            kwtest3(a::Number; length, len2, foobar, kwargs...) = pass
+            kwtest3(a::Real; another!kwarg, len2) = pass
+            kwtest3(a::Integer; namedarg, foobar, slurp...) = pass
+            kwtest4(a::AbstractString; _a1b, x23) = pass
+            kwtest4(a::String; _a1b, xαβγ) = pass
+            kwtest4(a::SubString; x23, _something) = pass
+            kwtest5(a::Int, b, x...; somekwarg, somekotherkwarg) = pass
+            kwtest5(a::Char, b; xyz) = pass
+
+            const named = (; len2=3)
+            const fmsoebelkv = (; len2=3)
+
+            array = [1, 1]
+            varfloat = 0.1
+
+            const tuple = (1, 2)
+
+            test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
+            test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
+                            occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
+                            "α"=>12, :α=>13)
+            test_customdict = CustomDict(test_dict)
+
+            macro teststr_str(s) end
+            macro tϵsτstρ_str(s) end
+            macro testcmd_cmd(s) end
+            macro tϵsτcmδ_cmd(s) end
+
+            var"complicated symbol with spaces" = 5
+
+            struct WeirdNames end
+            Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+
+            # https://github.com/JuliaLang/julia/issues/52551#issuecomment-1858543413
+            export exported_symbol
+            exported_symbol(::WeirdNames) = nothing
 
         end # module CompletionFoo
         test_repl_comp_dict = CompletionFoo.test_dict
         test_repl_comp_customdict = CompletionFoo.test_customdict
         test_dict_ℂ = Dict(1=>2)
+        test_dict_no_length = CompletionFoo.NoLengthDict{Int,Int}()
     end
     ex.head = :toplevel
     Core.eval(Main, ex)
@@ -148,16 +170,23 @@ end
 
 function map_completion_text(completions)
     c, r, res = completions
-    return map(completion_text, c), r, res
+    return map(x -> named_completion(x).completion, c), r, res
+end
+
+function map_named_completion(completions)
+    c, r, res = completions
+    return map(named_completion, c), r, res
 end
 
 test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
 test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
-test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
-test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
+test_complete_context(s, m=@__MODULE__; shift::Bool=true) =
+    map_completion_text(@inferred(completions(s,lastindex(s), m, shift)))
 test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
 test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
+test_bslashcomplete(s) =  map_named_completion(@inferred(bslash_completions(s, lastindex(s)))[2])
+
 test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter()))
 
 
@@ -253,6 +282,11 @@ let s = "Main.CompletionFoo.type_test.x"
     @test s[r] == "x"
 end
 
+let s = "Main.CompletionFoo.unicode_αβγ.y"
+    c, r = test_complete(s)
+    @test "yy" in c
+end
+
 let s = "Main.CompletionFoo.bar.no_val_available"
     c, r = test_complete(s)
     @test length(c)==0
@@ -313,18 +347,26 @@ end
 # inexistent completion inside a cmd
 @test_nocompletion("run(`lol")
 
+# issue 55856: copy(A').<TAB> errors in the REPL
+let
+    c, r = test_complete("copy(A').")
+    @test isempty(c)
+end
+
 # test latex symbol completions
 let s = "\\alpha"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "α"
-    @test r == 1:length(s)
+    @test c[1].completion == "α"
+    @test c[1].name == "α"
+    @test r == 1:lastindex(s)
     @test length(c) == 1
 end
 
 # test latex symbol completions after unicode #9209
 let s = "α\\alpha"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "α"
+    @test c[1].completion == "α"
+    @test c[1].name == "α"
     @test r == 3:sizeof(s)
     @test length(c) == 1
 end
@@ -332,20 +374,25 @@ end
 # test emoji symbol completions
 let s = "\\:koala:"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "🐨"
+    @test c[1].completion == "🐨"
+    @test c[1].name == "🐨"
     @test r == 1:sizeof(s)
     @test length(c) == 1
 end
 
 let s = "\\:ko"
     c, r = test_bslashcomplete(s)
-    @test "\\:koala:" in c
+    ko = only(filter(c) do namedcompletion
+        namedcompletion.completion == "\\:koala:"
+    end)
+    @test ko.name == "🐨 \\:koala:"
 end
 
 # test emoji symbol completions after unicode #9209
 let s = "α\\:koala:"
     c, r = test_bslashcomplete(s)
-    @test c[1] == "🐨"
+    @test c[1].name == "🐨"
+    @test c[1].completion == "🐨"
     @test r == 3:sizeof(s)
     @test length(c) == 1
 end
@@ -495,7 +542,7 @@ end
 let s = "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat,"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
+    @test only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1.,2.], 1.,"
@@ -566,7 +613,7 @@ end
 let s = "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat),"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 2
+    @test length(c) == 1
 end
 
 # method completions with kwargs
@@ -634,7 +681,7 @@ let s = "CompletionFoo.?([1,2,3], 2.0)"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test(x::AbstractArray{T}, y) where T<:Real", c[1])
+    @test occursin("test(x::AbstractArray{T}, y) where T<:Real", only(c))
     # In particular, this checks that test(args...) is not a valid completion
     # since it is strictly less specific than test(x::AbstractArray{T}, y)
 end
@@ -668,15 +715,15 @@ let s = "CompletionFoo.?(false, \"a\", 3, "
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 2
-    @test occursin("test(args...)", c[1])
-    @test occursin("test11(a::Integer, b, c)", c[2])
+    @test any(s->occursin("test(args...)", s), c)
+    @test any(s->occursin("test11(a::Integer, b, c)", s), c)
 end
 
 let s = "CompletionFoo.?(false, \"a\", 3, "
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test11(a::Integer, b, c)", c[1])
+    @test occursin("test11(a::Integer, b, c)", only(c))
 end
 
 let s = "CompletionFoo.?(\"a\", 3, "
@@ -699,7 +746,7 @@ let s = "CompletionFoo.?()"
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("test10(s::String...)", c[1])
+    @test occursin("test10(s::String...)", only(c))
 end
 
 #= TODO: restrict the number of completions when a semicolon is present in ".?(" syntax
@@ -717,7 +764,7 @@ let s = "CompletionFoo.?(3; len2=5, "
     c, r, res = test_complete_noshift(s)
     @test !res
     @test length(c) == 1
-    @test occursin("kwtest3(a::Integer; namedarg, foobar, slurp...)", c[1])
+    @test occursin("kwtest3(a::Integer; namedarg, foobar, slurp...)", only(c))
     # the other two kwtest3 methods should not appear because of specificity
 end
 =#
@@ -732,6 +779,9 @@ end
 
 #TODO: @test_nocompletion("CompletionFoo.?(3; len2=5; ")
 
+# https://github.com/JuliaLang/julia/issues/52551
+@test !isempty(test_complete("?("))
+
 #################################################################
 
 # Test method completion with varargs
@@ -1032,8 +1082,8 @@ let s, c, r
     # Issue #8047
     s = "@show \"/dev/nul\""
     c,r = completions(s, 15)
-    c = map(completion_text, c)
-    @test "null" in c
+    c = map(named_completion, c)
+    @test "null\"" in [_c.completion for _c in c]
     @test r == 13:15
     @test s[r] == "nul"
 
@@ -1057,8 +1107,8 @@ let s, c, r
     if !isdir(joinpath(s, "tmp"))
         c,r = test_scomplete(s)
         @test !("tmp/" in c)
-        @test r === length(s) + 1:0
-        @test s[r] == ""
+        @test !("$s/tmp/" in c)
+        @test r === (sizeof(s) + 1):sizeof(s)
     end
 
     s = "cd \$(Iter"
@@ -1083,7 +1133,7 @@ let s, c, r
         touch(file)
         s = string(tempdir(), "/repl\\ ")
         c,r = test_scomplete(s)
-        @test ["repl\\ completions"] == c
+        @test ["'repl completions'"] == c
         @test s[r] == "repl\\ "
         rm(file)
     end
@@ -1111,28 +1161,44 @@ let s, c, r
     end
 
     # Tests detecting of files in the env path (in shell mode)
-    let path, file
-        path = tempdir()
-        unreadable = joinpath(tempdir(), "replcompletion-unreadable")
+    mktempdir() do path
+        unreadable = joinpath(path, "replcompletion-unreadable")
+        file = joinpath(path, "tmp-executable")
+        touch(file)
+        chmod(file, 0o755)
+        mkdir(unreadable)
+        hidden_file = joinpath(unreadable, "hidden")
+        touch(hidden_file)
 
-        try
-            file = joinpath(path, "tmp-executable")
-            touch(file)
-            chmod(file, 0o755)
-            mkdir(unreadable)
-            chmod(unreadable, 0o000)
+        # Create symlink to a file that is in an unreadable directory
+        chmod(hidden_file, 0o755)
+        chmod(unreadable, 0o000)
+        symlink(hidden_file, joinpath(path, "replcompletions-link"))
 
+        try
             # PATH can also contain folders which we aren't actually allowed to read.
             withenv("PATH" => string(path, ":", unreadable)) do
                 s = "tmp-execu"
+                # Files reachable by PATH are cached async when PATH is seen to have been changed by `complete_path`
+                # so changes are unlikely to appear in the first complete. For testing purposes we can wait for
+                # caching to finish
+                @lock REPL.REPLCompletions.PATH_cache_lock begin
+                    # force the next cache update to happen immediately
+                    REPL.REPLCompletions.next_cache_update = 0
+                end
+                c,r = test_scomplete(s)
+                wait(REPL.REPLCompletions.PATH_cache_task::Task) # wait for caching to complete
                 c,r = test_scomplete(s)
                 @test "tmp-executable" in c
                 @test r == 1:9
                 @test s[r] == "tmp-execu"
+
+                c,r = test_scomplete("replcompletions-link")
+                @test isempty(c)
             end
         finally
-            rm(file)
-            rm(unreadable)
+            # If we don't fix the permissions here, our cleanup fails.
+            chmod(unreadable, 0o700)
         end
     end
 
@@ -1150,6 +1216,12 @@ let s, c, r
 
             withenv("PATH" => string(tempdir(), ":", dir)) do
                 s = string("repl-completio")
+                @lock REPL.REPLCompletions.PATH_cache_lock begin
+                    # force the next cache update to happen immediately
+                    REPL.REPLCompletions.next_cache_update = 0
+                end
+                c,r = test_scomplete(s)
+                wait(REPL.REPLCompletions.PATH_cache_task::Task) # wait for caching to complete
                 c,r = test_scomplete(s)
                 @test ["repl-completion"] == c
                 @test s[r] == "repl-completio"
@@ -1177,8 +1249,8 @@ let current_dir, forbidden
             catch e
                 e isa Base.IOError && occursin("ELOOP", e.msg)
             end
-            c, r = test_complete("\""*escape_string(joinpath(path, "selfsym")))
-            @test c == ["selfsymlink"]
+            c, r = test_complete("\"$(escape_string(path))/selfsym")
+            @test c == ["selfsymlink\""]
         end
     end
 
@@ -1214,20 +1286,20 @@ mktempdir() do path
         dir_space = replace(space_folder, " " => "\\ ")
         s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space"
         c, r = test_scomplete(s)
-        @test s[r] == "space"
-        @test "space\\ .file" in c
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder'/'space .file'" in c
         # Also use shell escape rules within cmd backticks
         s = "`$s"
         c, r = test_scomplete(s)
-        @test s[r] == "space"
-        @test "space\\ .file" in c
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder'/'space .file'" in c
 
         # escape string according to Julia escaping rules
-        julia_esc(str) = escape_string(str, ('\"','$'))
+        julia_esc(str) = REPL.REPLCompletions.do_string_escape(str)
 
         # For normal strings the string should be properly escaped according to
         # the usual rules for Julia strings.
-        s = "cd(\"" * julia_esc(joinpath(path, space_folder, "space"))
+        s = "cd(\"" * julia_esc(joinpath(path, space_folder) * "/space")
         c, r = test_complete(s)
         @test s[r] == "space"
         @test "space .file\"" in c
@@ -1236,7 +1308,7 @@ mktempdir() do path
         # which needs to be escaped in Julia strings (on unix we could do this
         # test with all sorts of special chars)
         touch(joinpath(space_folder, "needs_escape\$.file"))
-        escpath = julia_esc(joinpath(path, space_folder, "needs_escape\$"))
+        escpath = julia_esc(joinpath(path, space_folder) * "/needs_escape\$")
         s = "cd(\"$escpath"
         c, r = test_complete(s)
         @test s[r] == "needs_escape\\\$"
@@ -1273,12 +1345,12 @@ mktempdir() do path
                     # in shell commands the shell path completion cannot complete
                     # paths with these characters
                     c, r, res = test_scomplete(test_dir)
-                    @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/")
+                    @test c[1] == "'$test_dir/'"
                     @test res
                 end
                 escdir = julia_esc(test_dir)
                 c, r, res = test_complete("\""*escdir)
-                @test c[1] == escdir*(Sys.iswindows() ? "\\\\" : "/")
+                @test c[1] == escdir * "/"
                 @test res
             finally
                 rm(joinpath(path, test_dir), recursive=true)
@@ -1289,9 +1361,9 @@ mktempdir() do path
 end
 
 # Test tilde path completion
-let (c, r, res) = test_complete("\"~/julia")
+let (c, r, res) = test_complete("\"~/ka8w5rsz")
     if !Sys.iswindows()
-        @test res && c == String[homedir() * "/julia"]
+        @test res && c == String[homedir() * "/ka8w5rsz"]
     else
         @test !res
     end
@@ -1299,6 +1371,31 @@ let (c, r, res) = test_complete("\"~/julia")
     c, r, res = test_complete("\"foo~bar")
     @test !res
 end
+if !Sys.iswindows()
+    # create a dir and file temporarily in the home directory
+    path = mkpath(joinpath(homedir(), "Zx6Wa0GkC0"))
+    touch(joinpath(path, "my_file"))
+    try
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC")
+            @test res
+            @test c == String["Zx6Wa0GkC0/"]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0")
+            @test res
+            @test c == String[homedir() * "/Zx6Wa0GkC0"]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_")
+            @test res
+            @test c == String["my_file\""]
+        end
+        let (c, r, res) = test_complete("\"~/Zx6Wa0GkC0/my_file")
+            @test res
+            @test c == String[homedir() * "/Zx6Wa0GkC0/my_file"]
+        end
+    finally
+        rm(path, recursive=true)
+    end
+end
 
 # Test the completion returns nothing when the folder do not exist
 let (c, r) = test_complete("cd(\"folder_do_not_exist_77/file")
@@ -1314,27 +1411,43 @@ if Sys.iswindows()
     cd(path) do
         s = "cd ..\\\\"
         c,r = test_scomplete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "../$temp_name/" in c
+
+        s = "cd ../"
+        c,r = test_scomplete(s)
+        @test r == lastindex(s)+1:lastindex(s)
+        @test "$temp_name/" in c
 
         s = "ls $(file[1:2])"
         c,r = test_scomplete(s)
-        @test r == length(s)-1:length(s)
+        @test r == lastindex(s)-1:lastindex(s)
         @test file in c
 
         s = "cd(\"..\\\\"
         c,r = test_complete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "../$temp_name/" in c
+
+        s = "cd(\"../"
+        c,r = test_complete(s)
+        @test r == lastindex(s)+1:lastindex(s)
+        @test "$temp_name/" in c
 
         s = "cd(\"$(file[1:2])"
         c,r = test_complete(s)
-        @test r == length(s) - 1:length(s)
+        @test r == lastindex(s) - 1:lastindex(s)
         @test (length(c) > 1 && file in c) || (["$file\""] == c)
     end
     rm(tmp)
 end
 
+# issue 51985
+let s = "`\\"
+    c,r = test_scomplete(s)
+    @test r == lastindex(s)+1:lastindex(s)
+end
+
 # auto completions of true and false... issue #14101
 let s = "tru"
     c, r, res = test_complete(s)
@@ -1376,7 +1489,7 @@ function test_dict_completion(dict_name)
     @test c == Any["\"abcd\"]"]
     s = "$dict_name[\"abcd]"  # trailing close bracket
     c, r = completions(s, lastindex(s) - 1)
-    c = map(completion_text, c)
+    c = map(x -> named_completion(x).completion, c)
     @test c == Any["\"abcd\""]
     s = "$dict_name[:b"
     c, r = test_complete(s)
@@ -1429,8 +1542,12 @@ test_dict_completion("CompletionFoo.test_customdict")
 test_dict_completion("test_repl_comp_dict")
 test_dict_completion("test_repl_comp_customdict")
 
-# Issue #23004: this should not throw:
-@test REPLCompletions.dict_identifier_key("test_dict_ℂ[\\", :other) isa Tuple
+@testset "dict_identifier_key" begin
+    # Issue #23004: this should not throw:
+    @test REPLCompletions.dict_identifier_key("test_dict_ℂ[\\", :other) isa Tuple
+    # Issue #55931: neither should this:
+    @test REPLCompletions.dict_identifier_key("test_dict_no_length[", :other) isa NTuple{3,Nothing}
+end
 
 @testset "completion of string/cmd macros (#22577)" begin
     c, r, res = test_complete("ra")
@@ -1443,28 +1560,38 @@ test_dict_completion("test_repl_comp_customdict")
     @test "testcmd`" in c
     c, r, res = test_complete("CompletionFoo.tϵsτc")
     @test "tϵsτcmδ`" in c
+
+    # Issue #56071: don't complete string and command macros when the input matches the internal name like `r_` to `r"`
+    c, r, res = test_complete("CompletionFoo.teststr_")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.teststr_s")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.testcmd_")
+    @test isempty(c)
+    c, r, res = test_complete("CompletionFoo.testcmd_c")
+    @test isempty(c)
 end
 
 @testset "Keyword-argument completion" begin
     c, r = test_complete("CompletionFoo.kwtest3(a;foob")
     @test c == ["foobar="]
     c, r = test_complete("CompletionFoo.kwtest3(a; le")
-    @test "length" ∈ c # provide this kind of completion in case the user wants to splat a variable
+    @test "length" ∉ c
     @test "length=" ∈ c
     @test "len2=" ∈ c
     @test "len2" ∉ c
     c, r = test_complete("CompletionFoo.kwtest3.(a;\nlength")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a, length=4, l")
     @test "length" ∈ c
     @test "length=" ∉ c # since it was already used, do not suggest it again
     @test "len2=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; kwargs..., fo")
-    @test "foreach" ∈ c # provide this kind of completion in case the user wants to splat a variable
+    @test "foreach" ∉ c
     @test "foobar=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; another!kwarg=0, le")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c # the first method could be called and `anotherkwarg` slurped
     @test "len2=" ∈ c
     c, r = test_complete("CompletionFoo.kwtest3(a; another!")
@@ -1478,7 +1605,7 @@ end
     c, r = test_complete_foo("kwtest3(blabla; unknown=4, namedar")
     @test c == ["namedarg="]
     c, r = test_complete_foo("kwtest3(blabla; named")
-    @test "named" ∈ c
+    @test "named" ∉ c
     @test "namedarg=" ∈ c
     @test "len2" ∉ c
     c, r = test_complete_foo("kwtest3(blabla; named.")
@@ -1486,11 +1613,11 @@ end
     c, r = test_complete_foo("kwtest3(blabla; named..., another!")
     @test c == ["another!kwarg="]
     c, r = test_complete_foo("kwtest3(blabla; named..., len")
-    @test "length" ∈ c
+    @test "length" ∉ c
     @test "length=" ∈ c
     @test "len2=" ∈ c
     c, r = test_complete_foo("kwtest3(1+3im; named")
-    @test "named" ∈ c
+    @test "named" ∉ c
     # TODO: @test "namedarg=" ∉ c
     @test "len2" ∉ c
     c, r = test_complete_foo("kwtest3(1+3im; named.")
@@ -1817,7 +1944,7 @@ function Base.getproperty(v::Issue36437, s::Symbol)
 end
 
 let s = "Issue36437(42)."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
@@ -1825,16 +1952,47 @@ let s = "Issue36437(42)."
 end
 
 let s = "Some(Issue36437(42)).value."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+some_issue36437 = Some(Issue36437(42))
+
+let s = "some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+# get completions for :toplevel/:tuple expressions
+let s = "some_issue36437.value.a, some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+let s = "@show some_issue36437.value.a; some_issue36437.value."
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
     end
 end
+# https://github.com/JuliaLang/julia/issues/51505
+let s = "()."
+    c, r, res = test_complete_context(s)
+    @test res
+end
 
 # aggressive concrete evaluation on mutable allocation in `repl_frame`
 let s = "Ref(Issue36437(42))[]."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     for n in ("a", "b", "c")
         @test n in c
@@ -1842,14 +2000,132 @@ let s = "Ref(Issue36437(42))[]."
     @test "v" ∉ c
 end
 
+# concrete evaluation through `getindex`ing dictionary
+global_dict = Dict{Symbol, Any}(:r => r"foo")
+let s = "global_dict[:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+global_dict_nested = Dict{Symbol, Any}(:g => global_dict)
+let s = "global_dict_nested[:g][:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+
+# dict completions through nested `getindex`ing
+let s = "global_dict_nested["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":g]" in c
+end
+let s = "global_dict_nested[:g]["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":r]" in c
+end
+
 const global_xs = [Some(42)]
 let s = "pop!(global_xs)."
-    c, r, res = test_complete_context(s, @__MODULE__)
+    c, r, res = test_complete_context(s)
     @test res
     @test "value" in c
 end
 @test length(global_xs) == 1 # the completion above shouldn't evaluate `pop!` call
 
+# https://github.com/JuliaLang/julia/issues/51499
+# allow aggressive concrete evaluation for child uncached frames
+struct Issue51499CompletionDict
+    inner::Dict{Symbol,Any}
+    leaf_func # Function that gets invoked on leaf objects before being returned.
+    function Issue51499CompletionDict(inner::Dict, leaf_func=identity)
+        inner = Dict{Symbol,Any}(Symbol(k) => v for (k, v) in inner)
+        return new(inner, leaf_func)
+    end
+end
+function Base.getproperty(tcd::Issue51499CompletionDict, name::Symbol)
+    prop = getfield(tcd, :inner)[name]
+    isa(prop, Issue51499CompletionDict) && return prop
+    return getfield(tcd, :leaf_func)(prop)
+end
+Base.propertynames(tcd::Issue51499CompletionDict) = keys(getfield(tcd, :inner))
+
+const issue51499 = Ref{Any}(nothing)
+tcd3 = Issue51499CompletionDict(
+    Dict(:a => 1.0, :b => 2.0),
+    function (x)
+        issue51499[] = x
+        return sin(x)
+    end)
+tcd2 = Issue51499CompletionDict(
+    Dict(:v => tcd3, :w => 1.0))
+tcd1 = Issue51499CompletionDict(
+    Dict(:x => tcd2, :y => 1.0))
+let (c, r, res) = test_complete_context("tcd1.")
+    @test res
+    @test "x" in c && "y" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.")
+    @test res
+    @test "v" in c && "w" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.v.")
+    @test res
+    @test "a" in c && "b" in c
+    @test isnothing(issue51499[])
+end
+@test tcd1.x.v.a == sin(1.0)
+@test issue51499[] == 1.0
+
+# aggressive constant propagation for mutable `Const`s
+mutable_const_prop = Dict{Symbol,Any}(:key => Any[Some(r"x")])
+getkeyelem(d) = d[:key][1]
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).")
+    @test res
+    @test "value" in c
+end
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).value.")
+    @test res
+    for name in fieldnames(Regex)
+        @test String(name) in c
+    end
+end
+
+# JuliaLang/julia/#51548
+# don't return wrong result due to mutable inconsistency
+function issue51548(T, a)
+    # if we fold `xs = getindex(T)` to `xs::Const(Vector{T}())`, then we may wrongly
+    # constant-fold `isempty(xs)::Const(true)` and return wrong result
+    xs = T[]
+    if a isa T
+        push!(xs, a)
+    end
+    return Val(isempty(xs))
+end;
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(issue51548(Any, r"issue51548")), @__MODULE__; limit_aggressive_inference=true)
+    @test !isnothing(inferred)
+    RT = Core.Compiler.widenconst(inferred)
+    @test Val{false} <: RT
+end
+module TestLimitAggressiveInferenceGetProp
+global global_var = 1
+end
+function test_limit_aggressive_inference_getprop()
+    return getproperty(TestLimitAggressiveInferenceGetProp, :global_var)
+end
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(test_limit_aggressive_inference_getprop()), @__MODULE__; limit_aggressive_inference=true)
+    @test inferred == Core.Const(1)
+end
+
 # Test completion of var"" identifiers (#49280)
 let s = "var\"complicated "
     c, r = test_complete_foo(s)
@@ -1873,3 +2149,340 @@ let s = "`abc`.e"
     # (completions for the fields of `Cmd`)
     @test c == Any["env", "exec"]
 end
+
+# suppress false positive field completions (when `getproperty`/`propertynames` is overloaded)
+struct Issue51499_2
+    inner::Dict{Symbol,Any}
+end
+Base.getproperty(issue51499::Issue51499_2, name::Symbol) = getfield(issue51499, :inner)[name]
+Base.propertynames(issue51499::Issue51499_2) = keys(getfield(issue51499, :inner))
+const issue51499_2_1 = Issue51499_2(Dict(:a => nothing))
+const issue51499_2_2 = Issue51499_2(Dict(:b => nothing))
+let s = "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)."
+    c, r, res = test_complete_context(s)
+    @test "inner" ∉ c
+end
+
+# Test completion for a case when type inference returned `Union` of the same types
+union_somes(a, b) = rand() < 0.5 ? Some(a) : Some(b)
+let s = "union_somes(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c
+end
+union_some_ref(a, b) = rand() < 0.5 ? Some(a) : Ref(b)
+let s = "union_some_ref(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c && "x" in c
+end
+
+Issue49892(x) = x
+let s = "Issue49892(fal"
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("false", "falses")
+        @test n in c
+    end
+end
+
+@testset "public but non-exported symbols only complete qualified (#51331)" begin
+    c, r, res = test_complete("ispub")
+    @test res
+    @test "ispublic" ∉ c
+
+    c, r, res = test_complete("Base.ispub")
+    @test res
+    @test "ispublic" ∈ c
+
+    @test Base.ispublic(Base, :ispublic)
+    # If this last test starts failing, that's okay, just pick a new example symbol:
+    @test !Base.isexported(Base, :ispublic)
+end
+
+# issue #51194
+for (s, compl) in (("2*CompletionFoo.fmsoe", "fmsoebelkv"),
+                   (":a isa CompletionFoo.test!1", "test!12"),
+                   ("-CompletionFoo.Test_y(3).", "yy"),
+                   ("99 ⨷⁻ᵨ⁷ CompletionFoo.type_test.", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.Test_y(2).", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.Test_y(2)).", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.unicode_αβγ.", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.unicode_αβγ).", "xx"),
+                   ("foo'CompletionFoo.test!1", "test!12"))
+    @testset let s=s, compl=compl
+        c, r = test_complete_noshift(s)
+        @test length(c) == 1
+        @test only(c) == compl
+    end
+end
+
+# allows symbol completion within incomplete :macrocall
+# https://github.com/JuliaLang/julia/issues/51827
+macro issue51827(args...)
+    length(args) ≥ 2 || error("@issue51827: incomplete arguments")
+    return args
+end
+let s = "@issue51827 Base.ac"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "acquire" in c
+end
+
+let t = REPLCompletions.repl_eval_ex(:(`a b`), @__MODULE__; limit_aggressive_inference=true)
+    @test t isa Core.Const
+    @test t.val == `a b`
+end
+
+# issue #51823
+@test "include" in test_complete_context("inc", Main)[1]
+
+# REPL completions should not try to concrete-evaluate !:noub methods
+function very_unsafe_method(i::Int)
+    xs = Any[]
+    @inbounds xs[i]
+end
+let t = REPLCompletions.repl_eval_ex(:(unsafe_method(42)), @__MODULE__)
+    @test isnothing(t)
+end
+
+# https://github.com/JuliaLang/julia/issues/52099
+const issue52099 = []
+let t = REPLCompletions.repl_eval_ex(:(Base.PersistentDict(issue52099 => 3)), @__MODULE__)
+    if t isa Core.Const
+        @test length(t.val) == 1
+    end
+end
+
+# test REPLInterpreter effects for `getindex(::Dict, key)`
+for (DictT, KeyT) = Any[(Dict{Symbol,Any}, Symbol),
+                        (Dict{Int,Any}, Int),
+                        (Dict{String,Any}, String)]
+    @testset let DictT=DictT, KeyT=KeyT
+        effects = Base.infer_effects(getindex, (DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter())
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+        effects = Base.infer_effects((DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter()) do d, key
+            key in keys(d)
+        end
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+    end
+end
+
+# test invalidation support
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? r"foo" : r"bar")
+replinterp_invalidation_caller() = replinterp_invalidation_callee().value
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == Regex
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? "foo" : "bar")
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == String
+
+# JuliaLang/julia#52922
+let s = "using Base.Th"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Threads" in c
+end
+let s = "using Base."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "BinaryPlatforms" in c
+end
+# JuliaLang/julia#53999
+let s = "using Base.Sort, Base.Th"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Threads" in c
+end
+# test cases with the `.` accessor
+module Issue52922
+module Inner1
+module Inner12 end
+end
+module Inner2 end
+end
+let s = "using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " @time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time(using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "using .Issue52922.Inn"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Inner1" in c
+end
+let s = "using .Issue52922.Inner1."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Inner12" in c
+end
+let s = "using .Inner1.Inn"
+    c, r, res = test_complete_context(s, Issue52922)
+    @test res
+    @test "Inner12" in c
+end
+let s = "using ..Issue52922.Inn"
+    c, r, res = test_complete_context(s, Issue52922.Inner1)
+    @test res
+    @test "Inner2" in c
+end
+let s = "using ...Issue52922.Inn"
+    c, r, res = test_complete_context(s, Issue52922.Inner1.Inner12)
+    @test res
+    @test "Inner2" in c
+end
+
+struct Issue53126 end
+Base.propertynames(::Issue53126) = error("this should not be called")
+let s = "Issue53126()."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test isempty(c)
+end
+
+# complete explicitly `using`ed names
+baremodule TestExplicitUsing
+using Base: @assume_effects
+end # baremodule TestExplicitUsing
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsing)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "TestExplicitUsing.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+baremodule TestExplicitUsingNegative end
+let s = "@assu"
+    c, r, res = test_complete_context(s, TestExplicitUsingNegative)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+let s = "TestExplicitUsingNegative.@assu"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" ∉ c
+end
+# should complete implicitly `using`ed names
+module TestImplicitUsing end
+let s = "@asse"
+    c, r, res = test_complete_context(s, TestImplicitUsing)
+    @test res
+    @test "@assert" in c
+end
+let s = "TestImplicitUsing.@asse"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assert" in c
+end
+
+# JuliaLang/julia#23374: completion for `import Mod.name`
+module Issue23374
+global v23374 = nothing
+global w23374 = missing
+end
+let s = "import .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "import Base.sin, .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374.v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test isempty(c)
+end
+# JuliaLang/julia#23374: completion for `using Mod: name`
+let s = "using Base: @ass"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "@assume_effects" in c
+end
+let s = "using .Issue23374: v"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "v23374" in c
+end
+let s = "using .Issue23374: v23374, w"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "w23374" in c
+end
+# completes `using ` to `using [list of available modules]`
+let s = "using "
+    c, r, res = test_complete_context(s)
+    @test res
+    @test !isempty(c)
+end
+
+baremodule _TestInternalBindingOnly
+export binding1, binding2
+global binding1 = global binding2 = nothing
+end
+baremodule TestInternalBindingOnly
+using .._TestInternalBindingOnly
+global binding = nothing
+export binding
+end
+for s = ("TestInternalBindingOnly.bind", "using .TestInternalBindingOnly: bind")
+    # when module is explicitly accessed, completion should show internal names only
+    let (c, r, res) = test_complete_context(s; shift=false)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∉ c && "binding2" ∉ c
+    end
+    # unless completion is forced via shift key
+    let (c, r, res) = test_complete_context(s, TestInternalBindingOnly)
+        @test res
+        @test "binding" ∈ c
+        @test "binding1" ∈ c && "binding2" ∈ c
+    end
+end
+# without explicit module access, completion should show all available names
+let (c, r, res) = test_complete_context("bind", TestInternalBindingOnly; shift=false)
+    @test res
+    @test "binding" ∈ c
+    @test "binding1" ∈ c && "binding2" ∈ c
+end
+let (c, r, res) = test_complete_context("si", Main; shift=false)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("const xxx = Base.si", Main)
+    @test res
+    @test "sin" ∈ c
+end
+
+let (c, r, res) = test_complete_context("global xxx::Number = Base.", Main)
+    @test res
+    @test "pi" ∈ c
+end
diff --git a/stdlib/REPL/test/runtests.jl b/stdlib/REPL/test/runtests.jl
index e152677ccf7bb..d3eb6b9964981 100644
--- a/stdlib/REPL/test/runtests.jl
+++ b/stdlib/REPL/test/runtests.jl
@@ -3,6 +3,10 @@
 # Make a copy of the original environment
 original_env = copy(ENV)
 
+module PrecompilationTests
+    include("precompilation.jl")
+end
+
 module REPLTests
     include("repl.jl")
 end
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index f32fc3e2a4f84..5a9cc2dfc4cb7 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -1,5 +1,6 @@
 name = "Random"
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
 
 [deps]
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -9,7 +10,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [targets]
-test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future"]
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index e344e47947440..9ef86bb0d94f8 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Random/docs/src/index.md"
+```
+
 # Random Numbers
 
 ```@meta
@@ -81,7 +85,7 @@ Random.MersenneTwister
 Random.RandomDevice
 ```
 
-## Hooking into the `Random` API
+## [Hooking into the `Random` API](@id rand-api-hook)
 
 There are two mostly orthogonal ways to extend `Random` functionalities:
 1) generating random values of custom types
@@ -90,7 +94,7 @@ There are two mostly orthogonal ways to extend `Random` functionalities:
 The API for 1) is quite functional, but is relatively recent so it may still have to evolve in subsequent releases of the `Random` module.
 For example, it's typically sufficient to implement one `rand` method in order to have all other usual methods work automatically.
 
-The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementor,
+The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementer,
 in order to support usual types of generated values.
 
 ### Generating random values of custom types
@@ -99,7 +103,7 @@ Generating random values for some distributions may involve various trade-offs.
 
 The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both.
 
-The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementor should define the method
+The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementer should define the method
 
 ```julia
 rand(rng, sampler)
@@ -126,8 +130,8 @@ Random.SamplerSimple
 Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows:
 
 ```julia
-rng = MersenneTwister()
-sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20)
+rng = Xoshiro()
+sp = Random.Sampler(rng, 1:20) # or Random.Sampler(Xoshiro, 1:20)
 for x in X
     n = rand(rng, sp) # similar to n = rand(rng, 1:20)
     # use n
@@ -159,8 +163,8 @@ Scalar and array methods for `Die` now work as expected:
 julia> rand(Die)
 Die(5)
 
-julia> rand(MersenneTwister(0), Die)
-Die(11)
+julia> rand(Xoshiro(0), Die)
+Die(10)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
@@ -215,7 +219,7 @@ and that we *always* want to build an alias table, regardless of the number of v
 Random.eltype(::Type{<:DiscreteDistribution}) = Int
 
 function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition)
-    SamplerSimple(disribution, make_alias_table(distribution.probabilities))
+    SamplerSimple(distribution, make_alias_table(distribution.probabilities))
 end
 ```
 should be defined to return a sampler with pre-computed data, then
@@ -346,8 +350,8 @@ DocTestSetup = nothing
 
 By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
 number sequence when running your program multiple times. However, a minor release of Julia (e.g.
-1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
-particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed.
+(Even if the sequence produced by a low-level function like
 [`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
 change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
 prohibits many algorithmic improvements.
diff --git a/stdlib/Random/src/DSFMT.jl b/stdlib/Random/src/DSFMT.jl
index 4c5cb8c522667..25155b4e8575d 100644
--- a/stdlib/Random/src/DSFMT.jl
+++ b/stdlib/Random/src/DSFMT.jl
@@ -65,7 +65,8 @@ function dsfmt_init_gen_rand(s::DSFMT_state, seed::UInt32)
           s.val, seed)
 end
 
-function dsfmt_init_by_array(s::DSFMT_state, seed::Vector{UInt32})
+function dsfmt_init_by_array(s::DSFMT_state, seed::StridedVector{UInt32})
+    strides(seed) == (1,) || throw(ArgumentError("seed must have its stride equal to 1"))
     ccall((:dsfmt_init_by_array,:libdSFMT),
           Cvoid,
           (Ptr{Cvoid}, Ptr{UInt32}, Int32),
@@ -194,9 +195,11 @@ function dsfmt_jump(s::DSFMT_state, jp::GF2X)
     work = zeros(Int32, JN32)
     rwork = reinterpret(UInt64, work)
     dsfmt = Vector{UInt64}(undef, nval >> 1)
-    GC.@preserve dsfmt val begin
-        pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmt)
-        pval = Base.unsafe_convert(Ptr{Cvoid}, val)
+    dsfmtref = Base.cconvert(Ptr{Cvoid}, dsfmt)
+    valref = Base.cconvert(Ptr{Cvoid}, val)
+    GC.@preserve dsfmtref valref begin
+        pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmtref)
+        pval = Base.unsafe_convert(Ptr{Cvoid}, valref)
         Base.Libc.memcpy(pdsfmt, pval, (nval - 1) * sizeof(Int32))
     end
     dsfmt[end] = UInt64(N*2)
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 292ae00d33628..7782de88ba537 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -12,7 +12,7 @@ The entropy is obtained from the operating system.
 """
 struct RandomDevice <: AbstractRNG; end
 RandomDevice(seed::Nothing) = RandomDevice()
-seed!(rng::RandomDevice) = rng
+seed!(rng::RandomDevice, ::Nothing) = rng
 
 rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = Libc.getrandom!(Ref{sp[]}())[]
 rand(rd::RandomDevice, ::SamplerType{Bool}) = rand(rd, UInt8) % Bool
@@ -44,7 +44,7 @@ const MT_CACHE_I = 501 << 4 # number of bytes in the UInt128 cache
 @assert dsfmt_get_min_array_size() <= MT_CACHE_F
 
 mutable struct MersenneTwister <: AbstractRNG
-    seed::Vector{UInt32}
+    seed::Any
     state::DSFMT_state
     vals::Vector{Float64}
     ints::Vector{UInt128}
@@ -70,7 +70,7 @@ mutable struct MersenneTwister <: AbstractRNG
     end
 end
 
-MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
+MersenneTwister(seed, state::DSFMT_state) =
     MersenneTwister(seed, state,
                     Vector{Float64}(undef, MT_CACHE_F),
                     Vector{UInt128}(undef, MT_CACHE_I >> 4),
@@ -83,28 +83,26 @@ MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
 Create a `MersenneTwister` RNG object. Different RNG objects can have
 their own seeds, which may be useful for generating different streams
 of random numbers.
-The `seed` may be a non-negative integer or a vector of
-`UInt32` integers. If no seed is provided, a randomly generated one
-is created (using entropy from the system).
-See the [`seed!`](@ref) function for reseeding an already existing
-`MersenneTwister` object.
+The `seed` may be an integer, a string, or a vector of `UInt32` integers.
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object.
 
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> rng = MersenneTwister(123);
 
 julia> x1 = rand(rng, 2)
 2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
+ 0.37453777969575874
+ 0.8735343642013971
 
-julia> rng = MersenneTwister(1234);
-
-julia> x2 = rand(rng, 2)
+julia> x2 = rand(MersenneTwister(123), 2)
 2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
+ 0.37453777969575874
+ 0.8735343642013971
 
 julia> x1 == x2
 true
@@ -115,7 +113,7 @@ MersenneTwister(seed=nothing) =
 
 
 function copy!(dst::MersenneTwister, src::MersenneTwister)
-    copyto!(resize!(dst.seed, length(src.seed)), src.seed)
+    dst.seed = src.seed
     copy!(dst.state, src.state)
     copyto!(dst.vals, src.vals)
     copyto!(dst.ints, src.ints)
@@ -129,7 +127,7 @@ function copy!(dst::MersenneTwister, src::MersenneTwister)
 end
 
 copy(src::MersenneTwister) =
-    MersenneTwister(copy(src.seed), copy(src.state), copy(src.vals), copy(src.ints),
+    MersenneTwister(src.seed, copy(src.state), copy(src.vals), copy(src.ints),
                     src.idxF, src.idxI, src.adv, src.adv_jump, src.adv_vals, src.adv_ints)
 
 
@@ -144,12 +142,10 @@ hash(r::MersenneTwister, h::UInt) =
 
 function show(io::IO, rng::MersenneTwister)
     # seed
-    seed = from_seed(rng.seed)
-    seed_str = seed <= typemax(Int) ? string(seed) : "0x" * string(seed, base=16) # DWIM
     if rng.adv_jump == 0 && rng.adv == 0
-        return print(io, MersenneTwister, "(", seed_str, ")")
+        return print(io, MersenneTwister, "(", repr(rng.seed), ")")
     end
-    print(io, MersenneTwister, "(", seed_str, ", (")
+    print(io, MersenneTwister, "(", repr(rng.seed), ", (")
     # state
     adv = Integer[rng.adv_jump, rng.adv]
     if rng.adv_vals != -1 || rng.adv_ints != -1
@@ -277,68 +273,125 @@ end
 
 ### seeding
 
-#### make_seed()
+#### random_seed() & hash_seed()
 
-# make_seed produces values of type Vector{UInt32}, suitable for MersenneTwister seeding
-function make_seed()
+# random_seed tries to produce a random seed of type UInt128 from system entropy
+function random_seed()
     try
-        return rand(RandomDevice(), UInt32, 4)
+        # as MersenneTwister prints its seed when `show`ed, 128 bits is a good compromise for
+        # almost surely always getting distinct seeds, while having them printed reasonably tersely
+        return rand(RandomDevice(), UInt128)
     catch ex
         ex isa IOError || rethrow()
         @warn "Entropy pool not available to seed RNG; using ad-hoc entropy sources."
-        return make_seed(Libc.rand())
+        return Libc.rand()
     end
 end
 
-function make_seed(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
-    seed = UInt32[]
+function hash_seed(seed::Integer)
+    ctx = SHA.SHA2_256_CTX()
+    neg = signbit(seed)
+    if neg
+        seed = ~seed
+    end
+    @assert seed >= 0
     while true
-        push!(seed, n & 0xffffffff)
-        n >>= 32
-        if n == 0
-            return seed
-        end
+        word = (seed % UInt32) & 0xffffffff
+        seed >>>= 32
+        SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word))
+        iszero(seed) && break
+    end
+    # make sure the hash of negative numbers is different from the hash of positive numbers
+    neg && SHA.update!(ctx, (0x01,))
+    SHA.digest!(ctx)
+end
+
+function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
+    ctx = SHA.SHA2_256_CTX()
+    for xx in seed
+        SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
     end
+    # discriminate from hash_seed(::Integer)
+    SHA.update!(ctx, (0x10,))
+    SHA.digest!(ctx)
+end
+
+function hash_seed(str::AbstractString)
+    ctx = SHA.SHA2_256_CTX()
+    # convert to String such that `codeunits(str)` below is consistent between equal
+    # strings of different types
+    str = String(str)
+    SHA.update!(ctx, codeunits(str))
+    # signature for strings: so far, all hash_seed functions end-up hashing a multiple
+    # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
+    # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
+    # and then hash the signature 0x05; in order for strings of different lengths to have
+    # different hashes, padding bytes are set equal to the number of padding bytes
+    pad = 4 - mod(ncodeunits(str), 4)
+    for _=1:pad
+        SHA.update!(ctx, (pad % UInt8,))
+    end
+    SHA.update!(ctx, (0x05,))
+    SHA.digest!(ctx)
 end
 
-# inverse of make_seed(::Integer)
-from_seed(a::Vector{UInt32})::BigInt = sum(a[i] * big(2)^(32*(i-1)) for i in 1:length(a))
 
+"""
+    hash_seed(seed) -> AbstractVector{UInt8}
+
+Return a cryptographic hash of `seed` of size 256 bits (32 bytes).
+`seed` can currently be of type
+`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
+but modules can extend this function for types they own.
+
+`hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`.
+Moreover, if `n == m`, then `hash_seed(n) == hash_seed(m)`.
+
+This is an internal function subject to change.
+"""
+hash_seed
 
 #### seed!()
 
-function seed!(r::MersenneTwister, seed::Vector{UInt32})
-    copyto!(resize!(r.seed, length(seed)), seed)
-    dsfmt_init_by_array(r.state, r.seed)
+function initstate!(r::MersenneTwister, data::StridedVector, seed)
+    # we deepcopy `seed` because the caller might mutate it, and it's useful
+    # to keep it constant inside `MersenneTwister`; but multiple instances
+    # can share the same seed without any problem (e.g. in `copy`)
+    r.seed = deepcopy(seed)
+    dsfmt_init_by_array(r.state, reinterpret(UInt32, data))
     reset_caches!(r)
     r.adv = 0
     r.adv_jump = 0
     return r
 end
 
-seed!(r::MersenneTwister) = seed!(r, make_seed())
-seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n))
+# when a seed is not provided, we generate one via `RandomDevice()` in `random_seed()` rather
+# than calling directly `initstate!` with `rand(RandomDevice(), UInt32, whatever)` because the
+# seed is printed in `show(::MersenneTwister)`, so we need one; the cost of `hash_seed` is a
+# small overhead compared to `initstate!`, so this simple solution is fine
+seed!(r::MersenneTwister, ::Nothing) = seed!(r, random_seed())
+seed!(r::MersenneTwister, seed) = initstate!(r, hash_seed(seed), seed)
 
 
 ### Global RNG
 
-struct _GLOBAL_RNG <: AbstractRNG
-    global const GLOBAL_RNG = _GLOBAL_RNG.instance
-end
+"""
+    Random.default_rng() -> rng
 
-# GLOBAL_RNG currently uses TaskLocalRNG
-typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG
+Return the default global random number generator (RNG), which is used by `rand`-related functions when
+no explicit RNG is provided.
 
-"""
-    default_rng() -> rng
+When the `Random` module is loaded, the default RNG is _randomly_ seeded, via [`Random.seed!()`](@ref):
+this means that each time a new julia session is started, the first call to `rand()` produces a different
+result, unless `seed!(seed)` is called first.
 
-Return the default global random number generator (RNG).
+It is thread-safe: distinct threads can safely call `rand`-related functions on `default_rng()` concurrently,
+e.g. `rand(default_rng())`.
 
 !!! note
-    What the default RNG is is an implementation detail.  Across different versions of
-    Julia, you should not expect the default RNG to be always the same, nor that it will
-    return the same stream of random numbers for a given seed.
+    The type of the default RNG is an implementation detail. Across different versions of
+    Julia, you should not expect the default RNG to always have the same type, nor that it will
+    produce the same stream of random numbers for a given seed.
 
 !!! compat "Julia 1.3"
     This function was introduced in Julia 1.3.
@@ -346,48 +399,36 @@ Return the default global random number generator (RNG).
 @inline default_rng() = TaskLocalRNG()
 @inline default_rng(tid::Int) = TaskLocalRNG()
 
-copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng())
-copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
-copy(::_GLOBAL_RNG) = copy(default_rng())
-
-GLOBAL_SEED = 0
-set_global_seed!(seed) = global GLOBAL_SEED = seed
-
-function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
-    global GLOBAL_SEED = seed
-    seed!(default_rng(), seed)
-end
+# defined only for backward compatibility with pre-v1.3 code when `default_rng()` didn't exist;
+# `GLOBAL_RNG` was never really documented, but was appearing in the docstring of `rand`
+const GLOBAL_RNG = default_rng()
 
-seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
+# In v1.0, the GLOBAL_RNG was storing the seed which was used to initialize it; this seed was used to implement
+# the following feature of `@testset`:
+# > Before the execution of the body of a `@testset`, there is an implicit
+# > call to `Random.seed!(seed)` where `seed` is the current seed of the global RNG.
+# But the global RNG is now `TaskLocalRNG()` and doesn't store its seed; in order to not break `@testset`,
+# in a call like `seed!(seed)` *without* an explicit RNG, we now store the state of `TaskLocalRNG()` in
+# `task_local_storage()`
 
-seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64}}=nothing) =
-    seed!(GLOBAL_RNG, seed)
+# GLOBAL_SEED is used as a fall-back when no tls seed is found
+# only `Random.__init__` is allowed to set it
+const GLOBAL_SEED = Xoshiro(0, 0, 0, 0, 0)
 
-rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
-rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp)
-for T in (:(SamplerTrivial{UInt52Raw{UInt64}}),
-          :(SamplerTrivial{UInt2x52Raw{UInt128}}),
-          :(SamplerTrivial{UInt104Raw{UInt128}}),
-          :(SamplerTrivial{CloseOpen01_64}),
-          :(SamplerTrivial{CloseOpen12_64}),
-          :(SamplerUnion(Int64, UInt64, Int128, UInt128)),
-          :(SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)),
-         )
-    @eval rand(::_GLOBAL_RNG, x::$T) = rand(default_rng(), x)
-end
+get_tls_seed() = get!(() -> copy(GLOBAL_SEED), task_local_storage(),
+                      :__RANDOM_GLOBAL_RNG_SEED_uBlmfA8ZS__)::Xoshiro
 
-rand!(::_GLOBAL_RNG, A::AbstractArray{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-rand!(::_GLOBAL_RNG, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-for T in (Float16, Float32)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen12{$T}}) = rand!(default_rng(), A, I)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen01{$T}}) = rand!(default_rng(), A, I)
-end
-for T in BitInteger_types
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
+# seed the default RNG
+function seed!(seed=nothing)
+    seed!(default_rng(), seed)
+    copy!(get_tls_seed(), default_rng())
+    default_rng()
 end
 
 function __init__()
-    seed!(GLOBAL_RNG)
+    # do not call no-arg `seed!()` to not update `task_local_storage()` unnecessarily at startup
+    seed!(default_rng())
+    copy!(GLOBAL_SEED, TaskLocalRNG())
     ccall(:jl_gc_init_finalizer_rng_state, Cvoid, ())
 end
 
@@ -701,7 +742,7 @@ end
 function _randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X)
     adv = r.adv
     adv_jump = r.adv_jump
-    s = MersenneTwister(copy(r.seed), DSFMT.dsfmt_jump(r.state, jumppoly))
+    s = MersenneTwister(r.seed, DSFMT.dsfmt_jump(r.state, jumppoly))
     reset_caches!(s)
     s.adv = adv
     s.adv_jump = adv_jump
@@ -729,13 +770,13 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
 # 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
 # 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{6,Integer}) =
     advance!(MersenneTwister(seed), advance...)
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{4,Integer}) =
     MersenneTwister(seed, (advance..., 0, 0))
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{2,Integer}) =
     MersenneTwister(seed, (advance..., 0, 0, 0, 0))
 
 # advances raw state (per fill_array!) of r by n steps (Float64 values)
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 78d4f15e2beac..2d75f49480a7b 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -4,7 +4,7 @@
     Random
 
 Support for generating random numbers. Provides [`rand`](@ref), [`randn`](@ref),
-[`AbstractRNG`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
+[`AbstractRNG`](@ref), [`Xoshiro`](@ref), [`MersenneTwister`](@ref), and [`RandomDevice`](@ref).
 """
 module Random
 
@@ -29,6 +29,8 @@ export rand!, randn!,
        randcycle, randcycle!,
        AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
+public seed!, default_rng, Sampler, SamplerType, SamplerTrivial, SamplerSimple
+
 ## general definitions
 
 """
@@ -136,11 +138,9 @@ the amount of precomputation, if applicable.
 *types* and *values*, respectively. [`Random.SamplerSimple`](@ref) can be used to store
 pre-computed values without defining extra types for only this purpose.
 """
-Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof_rng(rng), x, r)
+Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof(rng), x, r)
 Sampler(rng::AbstractRNG, ::Type{X}, r::Repetition=Val(Inf)) where {X} =
-    Sampler(typeof_rng(rng), X, r)
-
-typeof_rng(rng::AbstractRNG) = typeof(rng)
+    Sampler(typeof(rng), X, r)
 
 # this method is necessary to prevent rand(rng::AbstractRNG, X) from
 # recursively constructing nested Sampler types.
@@ -313,12 +313,31 @@ include("XoshiroSimd.jl")
 Pick a random element or array of random elements from the set of values specified by `S`;
 `S` can be
 
-* an indexable collection (for example `1:9` or `('x', "y", :z)`),
-* an `AbstractDict` or `AbstractSet` object,
+* an indexable collection (for example `1:9` or `('x', "y", :z)`)
+
+* an `AbstractDict` or `AbstractSet` object
+
 * a string (considered as a collection of characters), or
-* a type: the set of values to pick from is then equivalent to `typemin(S):typemax(S)` for
-  integers (this is not applicable to [`BigInt`](@ref)), to ``[0, 1)`` for floating
-  point numbers and to ``[0, 1)+i[0, 1)`` for complex floating point numbers;
+
+* a type from the list below, corresponding to the specified set of values
+
+  + concrete integer types sample from `typemin(S):typemax(S)` (excepting [`BigInt`](@ref) which is not supported)
+
+  + concrete floating point types sample from `[0, 1)`
+
+  + concrete complex types `Complex{T}` if `T` is a sampleable type take their real and imaginary components
+    independently from the set of values corresponding to `T`, but are not supported if `T` is not sampleable.
+
+  + all `<:AbstractChar` types sample from the set of valid Unicode scalars
+
+  + a user-defined type and set of values; for implementation guidance please see [Hooking into the `Random` API](@ref rand-api-hook)
+
+  + a tuple type of known size and where each parameter of `S` is itself a sampleable type; return a value of type `S`.
+    Note that tuple types such as `Tuple{Vararg{T}}` (unknown size) and `Tuple{1:2}` (parameterized with a value) are not supported
+
+  + a `Pair` type, e.g. `Pair{X, Y}` such that `rand` is defined for `X` and `Y`,
+    in which case random pairs are produced.
+
 
 `S` defaults to [`Float64`](@ref).
 When only one argument is passed besides the optional `rng` and is a `Tuple`, it is interpreted
@@ -330,23 +349,26 @@ See also [`randn`](@ref) for normally distributed numbers, and [`rand!`](@ref) a
 !!! compat "Julia 1.1"
     Support for `S` as a tuple requires at least Julia 1.1.
 
+!!! compat "Julia 1.11"
+    Support for `S` as a `Tuple` type requires at least Julia 1.11.
+
 # Examples
 ```julia-repl
 julia> rand(Int, 2)
-2-element Array{Int64,1}:
+2-element Vector{Int64}:
  1339893410598768192
  1575814717733606317
 
 julia> using Random
 
-julia> rand(MersenneTwister(0), Dict(1=>2, 3=>4))
-1=>2
+julia> rand(Xoshiro(0), Dict(1=>2, 3=>4))
+3 => 4
 
 julia> rand((2, 3))
 3
 
 julia> rand(Float64, (2, 3))
-2×3 Array{Float64,2}:
+2×3 Matrix{Float64}:
  0.999717  0.0143835  0.540787
  0.696556  0.783855   0.938235
 ```
@@ -372,15 +394,13 @@ but without allocating a new array.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> rand!(rng, zeros(5))
+julia> rand!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
- 0.5662374165061859
- 0.4600853424625171
- 0.7940257103317943
+ 0.521213795535383
+ 0.5868067574533484
+ 0.8908786980927811
+ 0.19090669902576285
+ 0.5256623915420473
 ```
 """
 rand!
@@ -394,6 +414,8 @@ sequence of numbers if and only if a `seed` is provided. Some RNGs
 don't accept a seed, like `RandomDevice`.
 After the call to `seed!`, `rng` is equivalent to a newly created
 object initialized with the same seed.
+The types of accepted seeds depend on the type of `rng`, but in general,
+integer seeds should work.
 
 If `rng` is not specified, it defaults to seeding the state of the
 shared task-local generator.
@@ -433,6 +455,11 @@ julia> rand(Xoshiro(), Bool) # not reproducible either
 true
 ```
 """
-seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)
+seed!(rng::AbstractRNG) = seed!(rng, nothing)
+#=
+We have this generic definition instead of the alternative option
+`seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)`
+because it would lead too easily to ambiguities, e.g. when we define `seed!(::Xoshiro, seed)`.
+=#
 
 end # module
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 3be276ad23754..94c7e1ab24e1d 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -4,13 +4,13 @@
 # Lots of implementation is shared with TaskLocalRNG
 
 """
-    Xoshiro(seed)
+    Xoshiro(seed::Union{Integer, AbstractString})
     Xoshiro()
 
 Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
 Sebastiano Vigna in "Scrambled Linear Pseudorandom Number Generators",
 ACM Trans. Math. Softw., 2021. Reference implementation is available
-at http://prng.di.unimi.it
+at https://prng.di.unimi.it
 
 Apart from the high speed, Xoshiro has a small memory footprint, making it suitable for
 applications where many different random states need to be held for long time.
@@ -21,6 +21,12 @@ multiple interleaved xoshiro instances).
 The virtual PRNGs are discarded once the bulk request has been serviced (and should cause
 no heap allocations).
 
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `Xoshiro` object.
+
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> using Random
@@ -48,47 +54,130 @@ mutable struct Xoshiro <: AbstractRNG
     s1::UInt64
     s2::UInt64
     s3::UInt64
+    s4::UInt64 # internal splitmix state
 
-    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = new(s0, s1, s2, s3)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer, s4::Integer) = new(s0, s1, s2, s3, s4)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = initstate!(new(), map(UInt64, (s0, s1, s2, s3)))
     Xoshiro(seed=nothing) = seed!(new(), seed)
 end
 
-function setstate!(x::Xoshiro, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+@inline function setstate!(x::Xoshiro, (s0, s1, s2, s3, s4))
     x.s0 = s0
     x.s1 = s1
     x.s2 = s2
     x.s3 = s3
+    if s4 !== nothing
+        x.s4 = s4
+    end
     x
 end
 
-copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3)
+@inline getstate(x::Xoshiro) = (x.s0, x.s1, x.s2, x.s3, x.s4)
 
-function copy!(dst::Xoshiro, src::Xoshiro)
-    dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3
-    dst
-end
+rng_native_52(::Xoshiro) = UInt64
 
-function ==(a::Xoshiro, b::Xoshiro)
-    a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3
+# Jump functions from: https://xoshiro.di.unimi.it/xoshiro256plusplus.c
+
+for (fname, JUMP) in ((:jump_128, (0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c)),
+                      (:jump_192, (0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635)))
+    local fname! = Symbol(fname, :!)
+    @eval function $fname!(rng::Xoshiro)
+        _s0 = 0x0000000000000000
+        _s1 = 0x0000000000000000
+        _s2 = 0x0000000000000000
+        _s3 = 0x0000000000000000
+        s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
+        for j in $JUMP
+            for b in 0x0000000000000000:0x000000000000003f
+                if (j & 0x0000000000000001 << b) != 0
+                    _s0 ⊻= s0
+                    _s1 ⊻= s1
+                    _s2 ⊻= s2
+                    _s3 ⊻= s3
+                end
+                t = s1 << 17
+                s2 = xor(s2, s0)
+                s3 = xor(s3, s1)
+                s1 = xor(s1, s2)
+                s0 = xor(s0, s3)
+                s2 = xor(s2, t)
+                s3 = s3 << 45 | s3 >> 19
+            end
+        end
+        setstate!(rng, (_s0, _s1, _s2, _s3, nothing))
+    end
+    @eval $fname(rng::Xoshiro) = $fname!(copy(rng))
+
+    @eval function $fname!(rng::Xoshiro, n::Integer)
+        n < 0 && throw(DomainError(n, "the number of jumps must be ≥ 0"))
+        i = zero(n)
+        while i < n
+            $fname!(rng)
+            i += one(n)
+        end
+        rng
+    end
+
+    @eval $fname(rng::Xoshiro, n::Integer) = $fname!(copy(rng), n)
 end
 
-rng_native_52(::Xoshiro) = UInt64
+for (fname, sz) in ((:jump_128, 128), (:jump_192, 192))
+    local fname! = Symbol(fname, :!)
+    local see_other = Symbol(fname === :jump_128 ? :jump_192 : :jump_128)
+    local see_other! = Symbol(see_other, :!)
+    local seq_pow = 256 - sz
+    @eval begin
+        """
+            $($fname!)(rng::Xoshiro, [n::Integer=1])
 
-@inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
-    s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
-    tmp = s0 + s3
-    res = ((tmp << 23) | (tmp >> 41)) + s0
-    t = s1 << 17
-    s2 = xor(s2, s0)
-    s3 = xor(s3, s1)
-    s1 = xor(s1, s2)
-    s0 = xor(s0, s3)
-    s2 = xor(s2, t)
-    s3 = s3 << 45 | s3 >> 19
-    rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    res
-end
+        Jump forward, advancing the state equivalent to `2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each.
+
+        If `n > 0` is provided, the state is advanced equivalent to `n * 2^$($sz)` calls; if `n = 0`,
+        the state remains unchanged.
+
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname)`](@ref), [`$($see_other!)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> $($fname!)($($fname!)(Xoshiro(1))) == $($fname!)(Xoshiro(1), 2)
+        true
+        ```
+        """
+        function $fname! end
+    end
+
+    @eval begin
+        """
+            $($fname)(rng::Xoshiro, [n::Integer=1])
+
+        Return a copy of `rng` with the state advanced equivalent to `n * 2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each; if `n = 0`, the state of the returned copy will be
+        identical to `rng`.
 
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname!)`](@ref), [`$($see_other)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> x = Xoshiro(1);
+
+        julia> $($fname)($($fname)(x)) == $($fname)(x, 2)
+        true
+
+        julia> $($fname)(x, 0) == x
+        true
+
+        julia> $($fname)(x, 0) === x
+        false
+        ```
+        """
+        function $fname end
+    end
+end
 
 ## Task local RNG
 
@@ -96,8 +185,8 @@ end
     TaskLocalRNG
 
 The `TaskLocalRNG` has state that is local to its task, not its thread.
-It is seeded upon task creation, from the state of its parent task.
-Therefore, task creation is an event that changes the parent's RNG state.
+It is seeded upon task creation, from the state of its parent task, but without
+advancing the state of the parent's RNG.
 
 As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible
 multithreaded simulations (barring race conditions), independent of scheduler
@@ -108,28 +197,71 @@ endianness and possibly word size.
 
 Using or seeding the RNG of any other task than the one returned by `current_task()`
 is undefined behavior: it will work most of the time, and may sometimes fail silently.
+
+When seeding `TaskLocalRNG()` with [`seed!`](@ref), the passed seed, if any,
+may be any integer.
+
+!!! compat "Julia 1.11"
+    Seeding `TaskLocalRNG()` with a negative integer seed requires at least Julia 1.11.
+
+!!! compat "Julia 1.10"
+    Task creation no longer advances the parent task's RNG state as of Julia 1.10.
 """
 struct TaskLocalRNG <: AbstractRNG end
 TaskLocalRNG(::Nothing) = TaskLocalRNG()
-rng_native_52(::TaskLocalRNG) = UInt64
 
-function setstate!(
-    x::TaskLocalRNG,
-    s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64, # xoshiro256 state
-    s4::UInt64 = 1s0 + 3s1 + 5s2 + 7s3, # internal splitmix state
-)
+@inline function setstate!(x::TaskLocalRNG, (s0, s1, s2, s3, s4))
     t = current_task()
     t.rngState0 = s0
     t.rngState1 = s1
     t.rngState2 = s2
     t.rngState3 = s3
-    t.rngState4 = s4
+    if s4 !== nothing
+        t.rngState4 = s4
+    end
     x
 end
 
-@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64})
-    task = current_task()
-    s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+@inline function getstate(::TaskLocalRNG)
+    t = current_task()
+    (t.rngState0, t.rngState1, t.rngState2, t.rngState3, t.rngState4)
+end
+
+rng_native_52(::TaskLocalRNG) = UInt64
+
+
+## Shared implementation between Xoshiro and TaskLocalRNG
+
+# this variant of setstate! initializes the internal splitmix state, a.k.a. `s4`
+@inline function initstate!(x::Union{TaskLocalRNG, Xoshiro}, state)
+    length(state) == 4 && eltype(state) == UInt64 ||
+        throw(ArgumentError("initstate! expects a list of 4 `UInt64` values"))
+    s0, s1, s2, s3 = state
+    setstate!(x, (s0, s1, s2, s3, 1s0 + 3s1 + 5s2 + 7s3))
+end
+
+copy(rng::Union{TaskLocalRNG, Xoshiro}) = Xoshiro(getstate(rng)...)
+copy!(dst::Union{TaskLocalRNG, Xoshiro}, src::Union{TaskLocalRNG, Xoshiro}) = setstate!(dst, getstate(src))
+==(x::Union{TaskLocalRNG, Xoshiro}, y::Union{TaskLocalRNG, Xoshiro}) = getstate(x) == getstate(y)
+# use a magic (random) number to scramble `h` so that `hash(x)` is distinct from `hash(getstate(x))`
+hash(x::Union{TaskLocalRNG, Xoshiro}, h::UInt) = hash(getstate(x), h + 0x49a62c2dda6fa9be % UInt)
+
+function seed!(rng::Union{TaskLocalRNG, Xoshiro}, ::Nothing)
+    # as we get good randomness from RandomDevice, we can skip hashing
+    rd = RandomDevice()
+    s0 = rand(rd, UInt64)
+    s1 = rand(rd, UInt64)
+    s2 = rand(rd, UInt64)
+    s3 = rand(rd, UInt64)
+    initstate!(rng, (s0, s1, s2, s3))
+end
+
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed) =
+    initstate!(rng, reinterpret(UInt64, hash_seed(seed)))
+
+
+@inline function rand(x::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt64})
+    s0, s1, s2, s3 = getstate(x)
     tmp = s0 + s3
     res = ((tmp << 23) | (tmp >> 41)) + s0
     t = s1 << 17
@@ -139,28 +271,10 @@ end
     s0 ⊻= s3
     s2 ⊻= t
     s3 = s3 << 45 | s3 >> 19
-    task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    setstate!(x, (s0, s1, s2, s3, nothing))
     res
 end
 
-# Shared implementation between Xoshiro and TaskLocalRNG -- seeding
-
-function seed!(rng::Union{TaskLocalRNG,Xoshiro})
-    # as we get good randomness from RandomDevice, we can skip hashing
-    rd = RandomDevice()
-    setstate!(rng, rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64))
-end
-
-function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::Union{Vector{UInt32}, Vector{UInt64}})
-    c = SHA.SHA2_256_CTX()
-    SHA.update!(c, reinterpret(UInt8, seed))
-    s0, s1, s2, s3 = reinterpret(UInt64, SHA.digest!(c))
-    setstate!(rng, s0, s1, s2, s3)
-end
-
-seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(seed))
-
-
 @inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
     first = rand(rng, UInt64)
     second = rand(rng,UInt64)
@@ -176,41 +290,22 @@ end
     (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S
 end
 
-function copy(rng::TaskLocalRNG)
-    t = current_task()
-    Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3)
-end
-
-function copy!(dst::TaskLocalRNG, src::Xoshiro)
-    t = current_task()
-    setstate!(dst, src.s0, src.s1, src.s2, src.s3)
-    return dst
-end
-
-function copy!(dst::Xoshiro, src::TaskLocalRNG)
-    t = current_task()
-    setstate!(dst, t.rngState0, t.rngState1, t.rngState2, t.rngState3)
-    return dst
-end
-
-function ==(a::Xoshiro, b::TaskLocalRNG)
-    t = current_task()
-    a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3
-end
-
-==(a::TaskLocalRNG, b::Xoshiro) = b == a
-
 # for partial words, use upper bits from Xoshiro
 
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}})    = rand(r, UInt64) >>> 12
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}})  = rand(r, UInt104Raw())
 
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) =
-    Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11))
-
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) =
-    Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24)
-
-rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01_64}) =
-    Float64(rand(r, UInt64) >>> 11) * 0x1.0p-53
+for FT in (Float16, Float32, Float64)
+    UT = Base.uinttype(FT)
+    # Helper function: scale an unsigned integer to a floating point number of the same size
+    # in the interval [0, 1).  This is equivalent to, but more easily extensible than
+    #     Float16(i >>>  5) * Float16(0x1.0p-11)
+    #     Float32(i >>>  8) * Float32(0x1.0p-24)
+    #     Float32(i >>> 11) * Float64(0x1.0p-53)
+    @eval @inline _uint2float(i::$(UT), ::Type{$(FT)}) =
+        $(FT)(i >>> $(8 * sizeof(FT) - precision(FT))) * $(FT(2) ^ -precision(FT))
+
+    @eval rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{$(FT)}}) =
+        _uint2float(rand(r, $(UT)), $(FT))
+end
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
index 1a16baa4bce28..58544714dd9f5 100644
--- a/stdlib/Random/src/XoshiroSimd.jl
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -2,8 +2,8 @@
 
 module XoshiroSimd
 # Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl.
-import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView,
-                 SamplerType, SamplerTrivial
+import ..Random: rand!
+using ..Random: TaskLocalRNG, rand, Xoshiro, CloseOpen01, UnsafeView, SamplerType, SamplerTrivial, getstate, setstate!, _uint2float
 using Base: BitInteger_types
 using Base.Libc: memcpy
 using Core.Intrinsics: llvmcall
@@ -30,7 +30,12 @@ simdThreshold(::Type{Bool}) = 640
     Tuple{UInt64, Int64},
     x, y)
 
-@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64, Float64(x >>> 11) * 0x1.0p-53)
+# `_bits2float(x::UInt64, T)` takes `x::UInt64` as input, it splits it in `N` parts where
+# `N = sizeof(UInt64) / sizeof(T)` (`N = 1` for `Float64`, `N = 2` for `Float32, etc...), it
+# truncates each part to the unsigned type of the same size as `T`, scales all of these
+# numbers to a value of type `T` in the range [0,1) with `_uint2float`, and then
+# recomposes another `UInt64` using all these parts.
+@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64,  _uint2float(x, Float64))
 @inline function _bits2float(x::UInt64, ::Type{Float32})
     #=
     # this implementation uses more high bits, but is harder to vectorize
@@ -40,10 +45,21 @@ simdThreshold(::Type{Bool}) = 640
     =#
     ui = (x>>>32) % UInt32
     li = x % UInt32
-    u = Float32(ui >>> 8) * Float32(0x1.0p-24)
-    l = Float32(li >>> 8) * Float32(0x1.0p-24)
+    u = _uint2float(ui, Float32)
+    l = _uint2float(ui, Float32)
     (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l))
 end
+@inline function _bits2float(x::UInt64, ::Type{Float16})
+    i1 = (x>>>48) % UInt16
+    i2 = (x>>>32) % UInt16
+    i3 = (x>>>16) % UInt16
+    i4 = x % UInt16
+    f1 = _uint2float(i1, Float16)
+    f2 = _uint2float(i2, Float16)
+    f3 = _uint2float(i3, Float16)
+    f4 = _uint2float(i4, Float16)
+    return (UInt64(reinterpret(UInt16, f1)) << 48) | (UInt64(reinterpret(UInt16, f2)) << 32) | (UInt64(reinterpret(UInt16, f3)) << 16) | UInt64(reinterpret(UInt16, f4))
+end
 
 # required operations. These could be written more concisely with `ntuple`, but the compiler
 # sometimes refuses to properly vectorize.
@@ -118,6 +134,18 @@ for N in [4,8,16]
         ret <$N x i64> %i
         """
         @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %as16 = bitcast <$N x i64> %0 to <$(4N) x i16>
+        %shiftamt = shufflevector <1 x i16> <i16 5>, <1 x i16> undef, <$(4N) x i32> zeroinitializer
+        %sh = lshr <$(4N) x i16> %as16, %shiftamt
+        %f = uitofp <$(4N) x i16> %sh to <$(4N) x half>
+        %scale = shufflevector <1 x half> <half 0x3f40000000000000>, <1 x half> undef, <$(4N) x i32> zeroinitializer
+        %m = fmul <$(4N) x half> %f, %scale
+        %i = bitcast <$(4N) x half> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float16}) = llvmcall($code, $VT, Tuple{$VT}, x)
     end
 end
 
@@ -137,7 +165,7 @@ end
 
 _id(x, T) = x
 
-@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
+@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float16}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
     if len >= simdThreshold(T)
         written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f)
         len -= written
@@ -149,14 +177,9 @@ _id(x, T) = x
     nothing
 end
 
-@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F}
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F
+                                       ) where {T, F}
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -183,22 +206,12 @@ end
         # TODO: This may make the random-stream dependent on system endianness
         GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i)
     end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-       rng.s0, rng.s1, rng.s2, rng.s3 =  s0, s1, s2, s3
-    end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
 @noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f)
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -232,11 +245,7 @@ end
         s2 = _xor(s2, t)
         s3 = _rotl45(s3)
     end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-        rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
@@ -284,13 +293,8 @@ end
 end
 
 
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float)
-    dst
-end
-
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float)
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{T}, ::SamplerTrivial{CloseOpen01{T}}) where {T<:Union{Float16,Float32,Float64}}
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof(T), T, xoshiroWidth(), _bits2float)
     dst
 end
 
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index cc9840f678413..b605dff9e5d80 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -19,7 +19,7 @@
 Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {RNG<:AbstractRNG,T<:AbstractFloat} =
     Sampler(RNG, CloseOpen01(T), n)
 
-# generic random generation function which can be used by RNG implementors
+# generic random generation function which can be used by RNG implementers
 # it is not defined as a fallback rand method as this could create ambiguities
 
 rand(r::AbstractRNG, ::SamplerTrivial{CloseOpen01{Float16}}) =
@@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
         limbs[end] |= Limb_high_bit
     end
     z.sign = 1
-    GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs)
+    copyto!(z.d, limbs)
     randbool
 end
 
@@ -130,7 +130,7 @@ rand(r::AbstractRNG, sp::SamplerTrivial{<:UniformBits{T}}) where {T} =
 
 #### BitInteger
 
-# rand_generic methods are intended to help RNG implementors with common operations
+# rand_generic methods are intended to help RNG implementers with common operations
 # we don't call them simply `rand` as this can easily contribute to create
 # ambiguities with user-side methods (forcing the user to resort to @eval)
 
@@ -167,6 +167,38 @@ function rand(r::AbstractRNG, ::SamplerType{T}) where {T<:AbstractChar}
     (c < 0xd800) ? T(c) : T(c+0x800)
 end
 
+### random tuples
+
+function Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {T<:Tuple, RNG<:AbstractRNG}
+    tail_sp_ = Sampler(RNG, Tuple{Base.tail(fieldtypes(T))...}, n)
+    SamplerTag{Ref{T}}((Sampler(RNG, fieldtype(T, 1), n), tail_sp_.data...))
+    # Ref so that the gentype is `T` in SamplerTag's constructor
+end
+
+function Sampler(::Type{RNG}, ::Type{Tuple{Vararg{T, N}}}, n::Repetition) where {T, N, RNG<:AbstractRNG}
+    if N > 0
+        SamplerTag{Ref{Tuple{Vararg{T, N}}}}((Sampler(RNG, T, n),))
+    else
+        SamplerTag{Ref{Tuple{}}}(())
+    end
+end
+
+function rand(rng::AbstractRNG, sp::SamplerTag{Ref{T}}) where T<:Tuple
+    ntuple(i -> rand(rng, sp.data[min(i, length(sp.data))]), Val{fieldcount(T)}())::T
+end
+
+### random pairs
+
+function Sampler(::Type{RNG}, ::Type{Pair{A, B}}, n::Repetition) where {RNG<:AbstractRNG, A, B}
+    sp1 = Sampler(RNG, A, n)
+    sp2 = A === B ? sp1 : Sampler(RNG, B, n)
+    SamplerTag{Ref{Pair{A,B}}}(sp1 => sp2) # Ref so that the gentype is Pair{A, B}
+                                           # in SamplerTag's constructor
+end
+
+rand(rng::AbstractRNG, sp::SamplerTag{<:Ref{<:Pair}}) =
+    rand(rng, sp.data.first) => rand(rng, sp.data.second)
+
 
 ## Generate random integer within a range
 
@@ -262,7 +294,7 @@ rem_knuth(a::T, b::T) where {T<:Unsigned} = b != 0 ? a % b : a
 # maximum multiple of k <= sup decremented by one,
 # that is 0xFFFF...FFFF if k = (typemax(T) - typemin(T)) + 1 and sup == typemax(T) - 1
 # with intentional underflow
-# see http://stackoverflow.com/questions/29182036/integer-arithmetic-add-1-to-uint-max-and-divide-by-n-without-overflow
+# see https://stackoverflow.com/questions/29182036/integer-arithmetic-add-1-to-uint-max-and-divide-by-n-without-overflow
 
 # sup == 0 means typemax(T) + 1
 maxmultiple(k::T, sup::T=zero(T)) where {T<:Unsigned} =
@@ -438,6 +470,12 @@ function rand(rng::AbstractRNG, sp::SamplerSimple{<:Dict,<:Sampler})
     end
 end
 
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.KeySet{<:Any,<:Dict}}) =
+    rand(rng, sp[].dict).first
+
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.ValueIterator{<:Dict}}) =
+    rand(rng, sp[].dict).second
+
 ## random values from Set
 
 Sampler(::Type{RNG}, t::Set{T}, n::Repetition) where {RNG<:AbstractRNG,T} =
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index b1e3a4808e026..908776383d45f 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -17,16 +17,14 @@ Generate a `BitArray` of random boolean values.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> bitrand(rng, 10)
+julia> bitrand(Xoshiro(123), 10)
 10-element BitVector:
  0
- 0
- 0
+ 1
  0
  1
  0
+ 1
  0
  0
  1
@@ -55,8 +53,8 @@ number generator, see [Random Numbers](@ref).
 julia> Random.seed!(3); randstring()
 "Lxz5hUwn"
 
-julia> randstring(MersenneTwister(3), 'a':'z', 6)
-"ocucay"
+julia> randstring(Xoshiro(3), 'a':'z', 6)
+"iyzcsm"
 
 julia> randstring("ACGT")
 "TGCTCCTC"
@@ -141,19 +139,17 @@ Like [`randsubseq`](@ref), but the results are stored in `S`
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
 julia> S = Int64[];
 
-julia> randsubseq!(rng, S, 1:8, 0.3)
+julia> randsubseq!(Xoshiro(123), S, 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 
 julia> S
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq!(S::AbstractArray, A::AbstractArray, p::Real) = randsubseq!(default_rng(), S, A, p)
@@ -171,12 +167,10 @@ large.) Technically, this process is known as "Bernoulli sampling" of `A`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randsubseq(rng, 1:8, 0.3)
+julia> randsubseq(Xoshiro(123), 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq(A::AbstractArray, p::Real) = randsubseq(default_rng(), A, p)
@@ -197,42 +191,50 @@ optionally supplying the random-number generator `rng`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle!(rng, Vector(1:16))
-16-element Vector{Int64}:
-  2
- 15
+julia> shuffle!(Xoshiro(123), Vector(1:10))
+10-element Vector{Int64}:
   5
- 14
+  4
+  2
+  3
+  6
+ 10
+  8
   1
   9
- 10
-  6
- 11
-  3
- 16
   7
-  4
- 12
-  8
- 13
 ```
 """
 function shuffle!(r::AbstractRNG, a::AbstractArray)
+    # keep it consistent with `randperm!` and `randcycle!` if possible
     require_one_based_indexing(a)
     n = length(a)
-    n <= 1 && return a # nextpow below won't work with n == 0
     @assert n <= Int64(2)^52
-    mask = nextpow(2, n) - 1
-    for i = n:-1:2
-        (mask >> 1) == i && (mask >>= 1)
+    n == 0 && return a
+    mask = 3
+    @inbounds for i = 2:n
         j = 1 + rand(r, ltm52(i, mask))
         a[i], a[j] = a[j], a[i]
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
 
+function shuffle!(r::AbstractRNG, a::AbstractArray{Bool})
+    old_count = count(a)
+    len = length(a)
+    uncommon_value = 2old_count <= len
+    fuel = uncommon_value ? old_count : len - old_count
+    fuel == 0 && return a
+    a .= !uncommon_value
+    while fuel > 0
+        k = rand(r, eachindex(a))
+        fuel -= a[k] != uncommon_value
+        a[k] = uncommon_value
+    end
+    a
+end
+
 shuffle!(a::AbstractArray) = shuffle!(default_rng(), a)
 
 """
@@ -245,20 +247,18 @@ indices, see [`randperm`](@ref).
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle(rng, Vector(1:10))
+julia> shuffle(Xoshiro(123), Vector(1:10))
 10-element Vector{Int64}:
-  6
-  1
- 10
+  5
+  4
   2
   3
+  6
+ 10
+  8
+  1
   9
-  5
   7
-  4
-  8
 ```
 """
 shuffle(r::AbstractRNG, a::AbstractArray) = shuffle!(r, copymutable(a))
@@ -285,11 +285,11 @@ To randomly permute an arbitrary vector, see [`shuffle`](@ref) or
 
 # Examples
 ```jldoctest
-julia> randperm(MersenneTwister(1234), 4)
+julia> randperm(Xoshiro(123), 4)
 4-element Vector{Int64}:
- 2
  1
  4
+ 2
  3
 ```
 """
@@ -306,15 +306,16 @@ optional `rng` argument specifies a random number generator (see
 
 # Examples
 ```jldoctest
-julia> randperm!(MersenneTwister(1234), Vector{Int}(undef, 4))
+julia> randperm!(Xoshiro(123), Vector{Int}(undef, 4))
 4-element Vector{Int64}:
- 2
  1
  4
+ 2
  3
 ```
 """
 function randperm!(r::AbstractRNG, a::Array{<:Integer})
+    # keep it consistent with `shuffle!` and `randcycle!` if possible
     n = length(a)
     @assert n <= Int64(2)^52
     n == 0 && return a
@@ -326,7 +327,7 @@ function randperm!(r::AbstractRNG, a::Array{<:Integer})
             a[i] = a[j]
         end
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
@@ -343,20 +344,26 @@ Construct a random cyclic permutation of length `n`. The optional `rng`
 argument specifies a random number generator, see [Random Numbers](@ref).
 The element type of the result is the same as the type of `n`.
 
+Here, a "cyclic permutation" means that all of the elements lie within
+a single cycle.  If `n > 0`, there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `n == 0`, `randcycle` returns an empty vector.
+
+[`randcycle!`](@ref) is an in-place variant of this function.
+
 !!! compat "Julia 1.1"
-    In Julia 1.1 `randcycle` returns a vector `v` with `eltype(v) == typeof(n)`
-    while in Julia 1.0 `eltype(v) == Int`.
+    In Julia 1.1 and above, `randcycle` returns a vector `v` with
+    `eltype(v) == typeof(n)` while in Julia 1.0 `eltype(v) == Int`.
 
 # Examples
 ```jldoctest
-julia> randcycle(MersenneTwister(1234), 6)
+julia> randcycle(Xoshiro(123), 6)
 6-element Vector{Int64}:
- 3
  5
  4
+ 2
  6
+ 3
  1
- 2
 ```
 """
 randcycle(r::AbstractRNG, n::T) where {T <: Integer} = randcycle!(r, Vector{T}(undef, n))
@@ -365,33 +372,41 @@ randcycle(n::Integer) = randcycle(default_rng(), n)
 """
     randcycle!([rng=default_rng(),] A::Array{<:Integer})
 
-Construct in `A` a random cyclic permutation of length `length(A)`.
+Construct in `A` a random cyclic permutation of length `n = length(A)`.
 The optional `rng` argument specifies a random number generator, see
 [Random Numbers](@ref).
 
+Here, a "cyclic permutation" means that all of the elements lie within a single cycle.
+If `A` is nonempty (`n > 0`), there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `A` is empty, `randcycle!` leaves it unchanged.
+
+[`randcycle`](@ref) is a variant of this function that allocates a new vector.
+
 # Examples
 ```jldoctest
-julia> randcycle!(MersenneTwister(1234), Vector{Int}(undef, 6))
+julia> randcycle!(Xoshiro(123), Vector{Int}(undef, 6))
 6-element Vector{Int64}:
- 3
  5
  4
+ 2
  6
+ 3
  1
- 2
 ```
 """
 function randcycle!(r::AbstractRNG, a::Array{<:Integer})
+    # keep it consistent with `shuffle!` and `randperm!` if possible
     n = length(a)
-    n == 0 && return a
     @assert n <= Int64(2)^52
+    n == 0 && return a
     a[1] = 1
     mask = 3
+    # Sattolo's algorithm:
     @inbounds for i = 2:n
         j = 1 + rand(r, ltm52(i-1, mask))
         a[i] = a[j]
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl
index c2738653a0438..267d9db48fee8 100644
--- a/stdlib/Random/src/normal.jl
+++ b/stdlib/Random/src/normal.jl
@@ -3,7 +3,7 @@
 # Normally distributed random numbers using Ziggurat algorithm
 
 # The Ziggurat Method for generating random variables - Marsaglia and Tsang
-# Paper and reference code: http://www.jstatsoft.org/v05/i08/
+# Paper and reference code: https://www.jstatsoft.org/v05/i08/
 
 # randmtzig (covers also exponential variates)
 
@@ -14,27 +14,51 @@
 
 Generate a normally-distributed random number of type `T`
 with mean 0 and standard deviation 1.
-Optionally generate an array of normally-distributed random numbers.
-The `Base` module currently provides an implementation for the types
-[`Float16`](@ref), [`Float32`](@ref), and [`Float64`](@ref) (the default), and their
-[`Complex`](@ref) counterparts. When the type argument is complex, the values are drawn
-from the circularly symmetric complex normal distribution of variance 1 (corresponding to real and imaginary part having independent normal distribution with mean zero and variance `1/2`).
+Given the optional `dims` argument(s), generate an array of size `dims` of such numbers.
+Julia's standard library supports `randn` for any floating-point type
+that implements [`rand`](@ref), e.g. the `Base` types
+[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref) (the default), and [`BigFloat`](@ref),
+along with their [`Complex`](@ref) counterparts.
+
+(When `T` is complex, the values are drawn
+from the circularly symmetric complex normal distribution of variance 1, corresponding to real and imaginary parts
+having independent normal distribution with mean zero and variance `1/2`).
 
 See also [`randn!`](@ref) to act in-place.
 
 # Examples
+
+Generating a single random number (with the default `Float64` type):
+
+```julia-repl
+julia> randn()
+-0.942481877315864
+```
+
+Generating a matrix of normal random numbers (with the default `Float64` type):
+
+```julia-repl
+julia> randn(2,3)
+2×3 Matrix{Float64}:
+  1.18786   -0.678616   1.49463
+ -0.342792  -0.134299  -1.45005
+```
+
+Setting up of the random number generator `rng` with a user-defined seed (for reproducible numbers)
+and using it to generate a random `Float32` number or a matrix of `ComplexF32` random numbers:
+
 ```jldoctest
 julia> using Random
 
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
-julia> randn(rng, ComplexF64)
-0.6133070881429037 - 0.6376291670853887im
+julia> randn(rng, Float32)
+-0.6457307f0
 
 julia> randn(rng, ComplexF32, (2, 3))
 2×3 Matrix{ComplexF32}:
- -0.349649-0.638457im  0.376756-0.192146im  -0.396334-0.0136413im
-  0.611224+1.56403im   0.355204-0.365563im  0.0905552+1.31012im
+  -1.03467-1.14806im  0.693657+0.056538im   0.291442+0.419454im
+ -0.153912+0.34807im    1.0954-0.948661im  -0.543347-0.0538589im
 ```
 """
 @inline function randn(rng::AbstractRNG=default_rng())
@@ -72,8 +96,8 @@ end
 @noinline function randn_unlikely(rng, idx, rabs, x)
     @inbounds if idx == 0
         while true
-            xx = -ziggurat_nor_inv_r*log(rand(rng))
-            yy = -log(rand(rng))
+            xx = -ziggurat_nor_inv_r*log1p(-rand(rng))
+            yy = -log1p(-rand(rng))
             yy+yy > xx*xx &&
                 return (rabs >> 8) % Bool ? -ziggurat_nor_r-xx : ziggurat_nor_r+xx
         end
@@ -114,16 +138,16 @@ The `Base` module currently provides an implementation for the types
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
 julia> randexp(rng, Float32)
-2.4835055f0
+1.1757717f0
 
 julia> randexp(rng, 3, 3)
 3×3 Matrix{Float64}:
- 1.5167    1.30652   0.344435
- 0.604436  2.78029   0.418516
- 0.695867  0.693292  0.643644
+ 1.37766  0.456653  0.236418
+ 3.40007  0.229917  0.0684921
+ 0.48096  0.577481  0.71835
 ```
 """
 randexp(rng::AbstractRNG=default_rng()) = _randexp(rng, rand(rng, UInt52Raw()))
@@ -140,7 +164,7 @@ end
 
 @noinline function randexp_unlikely(rng, idx, x)
     @inbounds if idx == 0
-        return ziggurat_exp_r - log(rand(rng))
+        return ziggurat_exp_r - log1p(-rand(rng))
     elseif (fe[idx] - fe[idx+1])*rand(rng) + fe[idx+1] < exp(-x)
         return x # return from the triangular area
     else
@@ -162,15 +186,13 @@ Also see the [`rand`](@ref) function.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randn!(rng, zeros(5))
+julia> randn!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
-  0.8673472019512456
- -0.9017438158568171
- -0.4944787535042339
- -0.9029142938652416
-  0.8644013132535154
+ -0.6457306721039767
+ -1.4632513788889214
+ -1.6236037455860806
+ -0.21766510678354617
+  0.4922456865251828
 ```
 """
 function randn! end
@@ -183,15 +205,13 @@ Fill the array `A` with random numbers following the exponential distribution
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randexp!(rng, zeros(5))
+julia> randexp!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 2.4835053723904896
- 1.516703605376473
- 0.6044364871025417
- 0.6958665886385867
- 1.3065196315496677
+ 1.1757716836348473
+ 1.758884569451514
+ 1.0083623637301151
+ 0.3510644315565272
+ 0.6348266443720407
 ```
 """
 function randexp! end
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index 3f570d862b743..9b46951f63ff5 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -10,94 +10,80 @@ using .Main.OffsetArrays
 using Random
 using Random.DSFMT
 
-using Random: Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: jump_128, jump_192, jump_128!, jump_192!
 
 import Future # randjump
 
-@testset "Issue #6573" begin
-    Random.seed!(0)
-    rand()
-    x = rand(384)
-    @test findall(x .== rand()) == []
+function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat}
+    # TODO: refine
+    prec = isempty(xs) ? precision(T) : precision(first(xs))
+    proba_nocollision = prod((1.0 - i/2.0^prec for i=1:length(xs)-1), init=1.0) # rough estimate
+    xsu = Set(xs)
+    if (1.0 - proba_nocollision) < 2.0^-64
+        @test length(xsu) == length(xs)
+    elseif prec > 52 && length(xs) < 3000
+        # if proba of collisions is high enough, allow at most one collision;
+        # with the constraints on precision and length, more than one collision would happen
+        # with proba less than 2.0^-62
+        @test length(xsu) >= length(xs)-1
+    end
+    @test all(x -> zero(x) <= x < one(x), xs)
 end
 
-@test rand() != rand()
-@test 0.0 <= rand() < 1.0
-@test rand(UInt32) >= 0
-@test -10 <= rand(-10:-5) <= -5
-@test -10 <= rand(-10:5) <= 5
-@test minimum([rand(Int32(1):Int32(7^7)) for i = 1:100000]) > 0
-@test typeof(rand(false:true)) === Bool
-@test typeof(rand(Char)) === Char
-@test length(randn(4, 5)) == 20
-@test length(randn(ComplexF64, 4, 5)) == 20
-@test length(bitrand(4, 5)) == 20
-
-@test rand(MersenneTwister(0)) == 0.8236475079774124
-@test rand(MersenneTwister(42)) == 0.5331830160438613
-# Try a seed larger than 2^32
-@test rand(MersenneTwister(5294967296)) == 0.3498809918210497
-
-# Test array filling, Issues #7643, #8360
-@test rand(MersenneTwister(0), 1) == [0.8236475079774124]
-let A = zeros(2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [0.8236475079774124  0.16456579813368521;
-                0.9103565379264364  0.17732884646626457]
-end
-let A = zeros(2, 2)
-    @test_throws MethodError rand!(MersenneTwister(0), A, 5)
-    @test rand(MersenneTwister(0), Int64, 1) == [-3433174948434291912]
-end
-let A = zeros(Int64, 2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [858542123778948672  5715075217119798169;
-                8690327730555225005 8435109092665372532]
+function test_uniform(xs::AbstractArray{T}) where {T<:Base.BitInteger}
+    # TODO: refine
+    prec = 8*sizeof(T)
+    proba_nocollision = prod((1.0 - i/2.0^prec for i=1:length(xs)-1), init=1.0)
+    xsu = Set(xs)
+    if (1.0 - proba_nocollision) < 2.0^-64
+        @test length(xsu) == length(xs)
+    elseif prec > 52 && length(xs) < 3000
+        @test length(xsu) >= length(xs)-1
+    end
 end
 
-# rand from AbstractArray
-let mt = MersenneTwister()
-    @test rand(mt, 0:3:1000) in 0:3:1000
-    @test issubset(rand!(mt, Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
-    coll = Any[2, UInt128(128), big(619), "string"]
-    @test rand(mt, coll) in coll
-    @test issubset(rand(mt, coll, 2, 3), coll)
-
-    # check API with default RNG:
-    rand(0:3:1000)
-    rand!(Vector{Int}(undef, 100), 0:3:1000)
-    rand(coll)
-    rand(coll, 2, 3)
-end
 
-# randn
-@test randn(MersenneTwister(42)) == -0.5560268761463861
-let A = zeros(2, 2)
-    randn!(MersenneTwister(42), A)
-    @test A == [-0.5560268761463861  0.027155338009193845;
-                -0.444383357109696  -0.29948409035891055]
+@testset "MersenneTwister: do not do update the same global state in incompatible ways" begin
+    # Issue #6573
+    mm = MersenneTwister(rand(UInt128))
+    rand(mm)
+    xs = rand(mm, 384)
+    @test rand(mm) ∉ xs
+    test_uniform(xs)
 end
 
-let B = zeros(ComplexF64, 2)
-    randn!(MersenneTwister(42), B)
-    @test B == [ComplexF64(-0.5560268761463861,-0.444383357109696),
-                ComplexF64(0.027155338009193845,-0.29948409035891055)] * 0.7071067811865475244008
-end
+@testset "rand from AbstractArray" begin
+    seed = rand(UInt128)
+    for rng ∈ ([MersenneTwister(seed)], [Xoshiro(seed)], [])
+        # issue 8257
+        i8257 = 1:1/3:100
+        for _ = 1:100
+            @test rand(rng... ,i8257) in i8257
+        end
 
-for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
-          Float16, Float32, Float64, Rational{Int})
-    r = rand(convert(T, 97):convert(T, 122))
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    r = rand(convert(T, 97):convert(T,2):convert(T, 122),2)[1]
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    @test mod(r,2)==1
-
-    if T<:Integer && !(T===BigInt)
-        x = rand(typemin(T):typemax(T))
-        @test isa(x,T)
-        @test typemin(T) <= x <= typemax(T)
+        @test rand(rng..., 0:3:1000) in 0:3:1000
+        @test issubset(rand!(rng..., Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
+        coll = Any[2, UInt128(128), big(619), "string"]
+        @test rand(rng..., coll) in coll
+        @test issubset(rand(rng..., coll, 2, 3), coll)
+
+        for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
+                  Float16, Float32, Float64, Rational{Int})
+            r = rand(rng..., convert(T, 97):convert(T, 122))
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            r = rand(rng..., convert(T, 97):convert(T,2):convert(T, 122),2)[1]
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            @test mod(r,2)==1
+
+            if T<:Integer && !(T===BigInt)
+                x = rand(rng..., typemin(T):typemax(T))
+                @test isa(x,T)
+                @test typemin(T) <= x <= typemax(T)
+            end
+        end
     end
 end
 
@@ -227,54 +213,35 @@ for U in (Int64, UInt64)
               for k in 13 .+ Int64(2).^(1:30))
 end
 
-#issue 8257
-let i8257 = 1:1/3:100
-    for i = 1:100
-        @test rand(i8257) in i8257
-    end
-end
-
-# test code paths of rand!
-
-let mt = MersenneTwister(0)
-    A128 = Vector{UInt128}()
+@testset "test code paths of rand!(::MersenneTwister)" begin
+    mt = MersenneTwister(rand(UInt128))
+    A128 = UInt128[]
     @test length(rand!(mt, A128)) == 0
-    for (i,n) in enumerate([1, 3, 5, 6, 10, 11, 30])
+    for (i, n) in enumerate([1, 3, 5, 6, 10, 11, 30])
         resize!(A128, n)
         rand!(mt, A128)
         @test length(A128) == n
-        @test A128[end] == UInt128[0x15de6b23025813ad129841f537a04e40,
-                                   0xcfa4db38a2c65bc4f18c07dc91125edf,
-                                   0x33bec08136f19b54290982449b3900d5,
-                                   0xde41af3463e74cb830dad4add353ca20,
-                                   0x066d8695ebf85f833427c93416193e1f,
-                                   0x48fab49cc9fcee1c920d6dae629af446,
-                                   0x4b54632b4619f4eca22675166784d229][i]
+        test_uniform(A128)
     end
 
-    Random.seed!(mt, 0)
-    Aend = Any[]
-    Bend = Any[]
-    for (i,T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
+    for (i, T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
         A = Vector{T}(undef, 16)
         B = Vector{T}(undef, 31)
         rand!(mt, A)
         rand!(mt, B)
-        push!(Aend, A[end])
-        push!(Bend, B[end])
+        @test length(A) == 16
+        @test length(B) == 31
+        test_uniform(A)
+        test_uniform(B)
     end
-    @test Aend == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0x68a9f9865393cfd6,
-                      33687499368208574024854346399216845930, Float16(0.7744), 0.97259974f0]
-    @test Bend == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, -3433174948434291912, 0xd461716f27c91500,
-                      -85900088726243933988214632401750448432, Float16(0.10645), 0.13879478f0]
 
-    Random.seed!(mt, 0)
     AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
-    @test rand!(mt, AF64)[end] == 0.957735065345398
-    @test rand!(mt, AF64)[end] == 0.6492481059865669
+    rand!(mt, AF64)
+    test_uniform(AF64)
     resize!(AF64, 2*length(mt.vals))
-    @test invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
-                 mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))[end]  == 0.1142787906708973
+    invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
+           mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))
+    test_uniform(AF64)
 end
 
 # Issue #9037
@@ -297,7 +264,7 @@ let mt = MersenneTwister(0)
         Random.seed!(mt, 0)
         rand(mt) # this is to fill mt.vals, cf. #9040
         rand!(mt, A) # must not segfault even if Int(pointer(A)) % 16 != 0
-        @test A[end-4:end] == [0.3371041633752143, 0.41147647589610803, 0.6063082992397912, 0.9103565379264364, 0.16456579813368521]
+        test_uniform(A)
     end
 end
 
@@ -331,15 +298,23 @@ end
 
 # test all rand APIs
 for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
+    realrng = rng == [] ? default_rng() : only(rng)
     ftypes = [Float16, Float32, Float64, FakeFloat64, BigFloat]
     cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
-    types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
+    types = [Bool, Char, BigFloat, Tuple{Bool, Tuple{Int, Char}}, Pair{Int8, UInt32},
+             Base.BitInteger_types..., cftypes...]
     randset = Set(rand(Int, 20))
     randdict = Dict(zip(rand(Int,10), rand(Int, 10)))
+
+    randwidetup = Tuple{Bool, Char, Vararg{Tuple{Int, Float64}, 14}}
+    @inferred rand(rng..., randwidetup)
+
     collections = [BitSet(rand(1:100, 20))          => Int,
                    randset                          => Int,
                    GenericSet(randset)              => Int,
                    randdict                         => Pair{Int,Int},
+                   keys(randdict)                   => Int,
+                   values(randdict)                 => Int,
                    GenericDict(randdict)            => Pair{Int,Int},
                    1:100                            => Int,
                    rand(Int, 100)                   => Int,
@@ -354,42 +329,55 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     b2 = big(2)
     u3 = UInt(3)
     for f in [rand, randn, randexp]
-        f(rng...)                     ::Float64
-        f(rng..., 5)                  ::Vector{Float64}
-        f(rng..., 2, 3)               ::Array{Float64, 2}
-        f(rng..., b2, u3)             ::Array{Float64, 2}
+        f1 = f(rng...)                     ::Float64
+        f2 = f(rng..., 5)                  ::Vector{Float64}
+        f3 = f(rng..., 2, 3)               ::Array{Float64, 2}
+        f4 = f(rng..., b2, u3)             ::Array{Float64, 2}
+        @test size(f1) == ()
+        @test size(f2) == (5,)
+        @test size(f3) == size(f4) == (2, 3)
         for T in functypes[f]
-            a0 = f(rng..., T)         ::T
-            a1 = f(rng..., T, 5)      ::Vector{T}
-            a2 = f(rng..., T, 2, 3)   ::Array{T, 2}
-            a3 = f(rng..., T, b2, u3) ::Array{T, 2}
-            a4 = f(rng..., T, (2, 3)) ::Array{T, 2}
-            if T <: AbstractFloat && f === rand
-                for a in [a0, a1..., a2..., a3..., a4...]
-                    @test 0.0 <= a < 1.0
+            tts = f == rand ? (T, Sampler(realrng, T, Val(1)), Sampler(realrng, T, Val(Inf))) : (T,)
+            for tt in tts
+                a0 = f(rng..., tt)         ::T
+                a1 = f(rng..., tt, 5)      ::Vector{T}
+                a2 = f(rng..., tt, 2, 3)   ::Array{T, 2}
+                a3 = f(rng..., tt, b2, u3) ::Array{T, 2}
+                a4 = f(rng..., tt, (2, 3)) ::Array{T, 2}
+                if T <: Number
+                    @test size(a0) == ()
+                end
+                @test size(a1) == (5,)
+                @test size(a2) == size(a3) == size(a4) == (2, 3)
+                if T <: AbstractFloat && f === rand
+                    for a in T[a0, a1..., a2..., a3..., a4...]
+                        @test 0.0 <= a < 1.0
+                    end
                 end
             end
         end
     end
     for (C, T) in collections
-        a0  = rand(rng..., C)                                                       ::T
-        a1  = rand(rng..., C, 5)                                                    ::Vector{T}
-        a2  = rand(rng..., C, 2, 3)                                                 ::Array{T, 2}
-        a3  = rand(rng..., C, (2, 3))                                               ::Array{T, 2}
-        a4  = rand(rng..., C, b2, u3)                                               ::Array{T, 2}
-        a5  = rand!(rng..., Array{T}(undef, 5), C)                          ::Vector{T}
-        a6  = rand!(rng..., Array{T}(undef, 2, 3), C)                       ::Array{T, 2}
-        a7  = rand!(rng..., GenericArray{T}(undef, 5), C)                   ::GenericArray{T, 1}
-        a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), C)                ::GenericArray{T, 2}
-        a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), C)          ::OffsetArray{T, 1}
-        a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), C) ::OffsetArray{T, 2}
-        @test size(a1) == (5,)
-        @test size(a2) == size(a3) == (2, 3)
-        for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
-            if C isa Type
-                @test a isa C
-            else
-                @test a in C
+        for cc = (C, Sampler(realrng, C, Val(1)), Sampler(realrng, C, Val(Inf)))
+            a0  = rand(rng..., cc)                                               ::T
+            a1  = rand(rng..., cc, 5)                                            ::Vector{T}
+            a2  = rand(rng..., cc, 2, 3)                                         ::Array{T, 2}
+            a3  = rand(rng..., cc, (2, 3))                                       ::Array{T, 2}
+            a4  = rand(rng..., cc, b2, u3)                                       ::Array{T, 2}
+            a5  = rand!(rng..., Array{T}(undef, 5), cc)                          ::Vector{T}
+            a6  = rand!(rng..., Array{T}(undef, 2, 3), cc)                       ::Array{T, 2}
+            a7  = rand!(rng..., GenericArray{T}(undef, 5), cc)                   ::GenericArray{T, 1}
+            a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), cc)                ::GenericArray{T, 2}
+            a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), cc)          ::OffsetArray{T, 1}
+            a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), cc) ::OffsetArray{T, 2}
+            @test size(a1) == (5,)
+            @test size(a2) == size(a3) == (2, 3)
+            for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
+                if C isa Type
+                    @test a isa C
+                else
+                    @test a in C
+                end
             end
         end
     end
@@ -401,6 +389,7 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     end
     for f! in [rand!, randn!, randexp!]
         for T in functypes[f!]
+            (T <: Tuple || T <: Pair) && continue
             X = T == Bool ? T[0,1] : T[0,1,2]
             for A in (Vector{T}(undef, 5),
                       Matrix{T}(undef, 2, 3),
@@ -409,7 +398,8 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
                       OffsetArray(Array{T}(undef, 5), -3),
                       OffsetArray(Array{T}(undef, 2, 3), (4, 5)))
                 local A
-                f!(rng..., A)                    ::typeof(A)
+                A2 = f!(rng..., A)               ::typeof(A)
+                @test A2 === A
                 if f! === rand!
                     f!(rng..., A, X)             ::typeof(A)
                     if A isa Array && T !== Char # Char/Integer comparison
@@ -421,11 +411,16 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         end
     end
 
-    bitrand(rng..., 5)             ::BitArray{1}
-    bitrand(rng..., 2, 3)          ::BitArray{2}
-    bitrand(rng..., b2, u3)        ::BitArray{2}
-    rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
-    rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    z1 = bitrand(rng..., 5)             ::BitArray{1}
+    @test size(z1) == (5,)
+    z2 = bitrand(rng..., 2, 3)          ::BitArray{2}
+    @test size(z2) == (2, 3)
+    z3 = bitrand(rng..., b2, u3)        ::BitArray{2}
+    @test size(z3) == (b2, u3)
+    z4 = rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
+    @test size(z4) == (5,)
+    z5 = rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    @test size(z5) == (2, 3)
 
     # Test that you cannot call randn or randexp with non-Float types.
     for r in [randn, randexp]
@@ -441,6 +436,10 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         @test_throws MethodError r(rng..., Number, (2,3))
         @test_throws MethodError r(rng..., Any, 1)
     end
+
+    # Test that you cannot call rand with a tuple type of unknown size or with isbits parameters
+    @test_throws ArgumentError rand(rng..., Tuple{Vararg{Int}})
+    @test_throws TypeError rand(rng..., Tuple{1:2})
 end
 
 function hist(X, n)
@@ -451,17 +450,22 @@ function hist(X, n)
     v
 end
 
-# test uniform distribution of floats
-for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
-    T in [Float16, Float32, Float64, BigFloat],
+@testset "uniform distribution of floats" begin
+    for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
+        T in [Float16, Float32, Float64, BigFloat],
         prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
-    setprecision(BigFloat, prec) do
-        # array version
-        counts = hist(rand(rng, T, 2000), 4)
-        @test minimum(counts) > 300 # should fail with proba < 1e-26
-        # scalar version
-        counts = hist([rand(rng, T) for i in 1:2000], 4)
-        @test minimum(counts) > 300
+
+        setprecision(BigFloat, prec) do
+            if precision(T) >= precision(Float32)
+                @test rand(rng, T) != rand(rng, T)
+            end
+            # array version
+            counts = hist(rand(rng, T, 2000), 4)
+            @test minimum(counts) > 300 # should fail with proba < 1e-26
+            # scalar version
+            counts = hist([rand(rng, T) for i in 1:2000], 4)
+            @test minimum(counts) > 300
+        end
     end
 end
 
@@ -523,6 +527,7 @@ end
     @test shuffle!(mta,Vector(1:10)) == shuffle!(mtb,Vector(1:10))
     @test shuffle(mta,Vector(2:11)) == shuffle(mtb,2:11)
     @test shuffle!(mta, rand(mta, 2, 3)) == shuffle!(mtb, rand(mtb, 2, 3))
+    @test shuffle!(mta, rand(mta, Bool, 2, 3)) == shuffle!(mtb, rand(mtb, Bool, 2, 3))
     @test shuffle(mta, rand(mta, 2, 3)) == shuffle(mtb, rand(mtb, 2, 3))
 
     @test randperm(mta,10) == randperm(mtb,10)
@@ -588,30 +593,41 @@ end
     end
 end
 
-# test that the following is not an error (#16925)
-guardseed() do
-    Random.seed!(typemax(UInt))
-    Random.seed!(typemax(UInt128))
-end
-
-# copy, == and hash
-let seed = rand(UInt32, 10)
-    r = MersenneTwister(seed)
-    @test r == MersenneTwister(seed) # r.vals should be all zeros
-    @test hash(r) == hash(MersenneTwister(seed))
-    s = copy(r)
-    @test s == r && s !== r
-    @test hash(s) == hash(r)
-    skip, len = rand(0:2000, 2)
-    for j=1:skip
-        rand(r)
-        rand(s)
+@testset "copy, == and hash" begin
+    for RNG = (MersenneTwister, Xoshiro)
+        seed = rand(UInt32, 10)
+        r = RNG(seed)
+        t = RNG(seed)
+        @test r == t
+        @test hash(r) == hash(t)
+        s = copy(r)
+        @test s == r == t && s !== r
+        @test hash(s) == hash(r)
+        skip, len = rand(0:2000, 2)
+        for j=1:skip
+            rand(r)
+            @test r != s
+            @test hash(r) != hash(s)
+            rand(s)
+        end
+        @test rand(r, len) == rand(s, len)
+        @test s == r
+        @test hash(s) == hash(r)
+        h = rand(UInt)
+        @test hash(s, h) == hash(r, h)
+        if RNG == Xoshiro
+            t = copy(TaskLocalRNG())
+            @test hash(t) == hash(TaskLocalRNG())
+            @test hash(t, h) == hash(TaskLocalRNG(), h)
+            x = rand()
+            @test hash(t) != hash(TaskLocalRNG())
+            @test rand(t) == x
+            @test hash(t) == hash(TaskLocalRNG())
+            copy!(TaskLocalRNG(), r)
+            @test hash(TaskLocalRNG()) == hash(r)
+            @test TaskLocalRNG() == r
+        end
     end
-    @test rand(r, len) == rand(s, len)
-    @test s == r
-    @test hash(s) == hash(r)
-    h = rand(UInt)
-    @test hash(s, h) == hash(r, h)
 end
 
 # MersenneTwister initialization with invalid values
@@ -633,9 +649,7 @@ end
 let seed = rand(UInt32, 10)
     r = MersenneTwister(seed)
     @test r.seed == seed && r.seed !== seed
-    # RNGs do not share their seed in randjump
     let r2 = Future.randjump(r, big(10)^20)
-        @test  r.seed !== r2.seed
         Random.seed!(r2)
         @test seed == r.seed != r2.seed
     end
@@ -643,17 +657,23 @@ let seed = rand(UInt32, 10)
     @test r.seed != seed
 end
 
-# Random.seed!(rng, ...) returns rng (#21248)
-guardseed() do
-    g = Random.default_rng()
-    m = MersenneTwister(0)
-    @test Random.seed!() === g
-    @test Random.seed!(rand(UInt)) === g
-    @test Random.seed!(rand(UInt32, rand(1:8))) === g
-    @test Random.seed!(m) === m
-    @test Random.seed!(m, rand(UInt)) === m
-    @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
-    @test Random.seed!(m, rand(1:10)) === m
+@testset "Random.seed!(rng, ...) returns rng" begin
+    # issue #21248
+    seed = rand(UInt)
+    for m = ([MersenneTwister(seed)], [Xoshiro(seed)], [])
+        m2 = m == [] ? default_rng() : m[1]
+        @test Random.seed!(m...) === m2
+        @test Random.seed!(m..., rand(UInt)) === m2
+        @test Random.seed!(m..., rand(UInt32, rand(1:10))) === m2
+        @test Random.seed!(m..., rand(1:10)) === m2
+        # Try a seed larger than 2^32
+        @test Random.seed!(m..., 5294967296) === m2
+
+        # test that the following is not an error (#16925)
+        @test Random.seed!(m..., typemax(UInt)) === m2
+        @test Random.seed!(m..., typemax(UInt128)) === m2
+        @test Random.seed!(m..., "a random seed") === m2
+    end
 end
 
 # Issue 20062 - ensure internal functions reserve_1, reserve are type-stable
@@ -707,7 +727,7 @@ end
 end
 
 @testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
-    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...]
     if RNG == Xoshiro
         push!(seeds, rand(UInt64, rand(1:4)))
     end
@@ -720,37 +740,19 @@ end
 end
 
 @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
-    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)]
 
     for seed=seeds
         Random.seed!(seed)
-        @test Random.GLOBAL_SEED === seed
+        @test Random.get_tls_seed() == default_rng()
     end
-    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
-    # actually sets GLOBAL_SEED
-    for seed=seeds
-        Random.seed!(Random.GLOBAL_RNG, seed)
-        @test Random.GLOBAL_SEED === seed
-    end
-
-    Random.seed!(nothing)
-    seed1 = Random.GLOBAL_SEED
-    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
-
-    Random.seed!(Random.GLOBAL_RNG, nothing)
-    seed2 = Random.GLOBAL_SEED
-    @test seed2 isa Vector{UInt64}
-    @test seed2 != seed1
 
-    Random.seed!()
-    seed3 = Random.GLOBAL_SEED
-    @test seed3 isa Vector{UInt64}
-    @test seed3 != seed2
-
-    Random.seed!(Random.GLOBAL_RNG)
-    seed4 = Random.GLOBAL_SEED
-    @test seed4 isa Vector{UInt64}
-    @test seed4 != seed3
+    for ii = 1:8
+        iseven(ii) ? Random.seed!(nothing) : Random.seed!()
+        push!(seeds, copy(Random.get_tls_seed()))
+        @test Random.get_tls_seed() isa Xoshiro # could change, but must not be nothing
+    end
+    @test allunique(seeds)
 end
 
 struct RandomStruct23964 end
@@ -760,9 +762,20 @@ struct RandomStruct23964 end
 end
 
 @testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
-                                                        T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
-    for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
-        S == SamplerRangeNDL && sizeof(T) > 8 && continue
+                                                        T ∈ (Bool, Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
+    if T === Bool
+        @test rand(RNG, false:true) ∈ (false, true)
+        @test rand(RNG, false:false) === false
+        @test rand(RNG, true:true) === true
+        @test_throws ArgumentError rand(RNG, true:false)
+        continue
+    end
+    for S in (identity, SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
+        if T === Int32 && RNG isa MersenneTwister
+            @test minimum([rand(RNG, T(1):T(7^7)) for i = 1:100000]) > 0
+        end
+
+        (S == SamplerRangeNDL || S == identity) && sizeof(T) > 8 && continue
         r = T(1):T(108)
         @test rand(RNG, S(r)) ∈ r
         @test rand(RNG, S(typemin(T):typemax(T))) isa T
@@ -803,10 +816,18 @@ end
     end
 end
 
+@testset "rand(::Type{<:Tuple})" begin
+    @test_throws ArgumentError rand(Tuple)
+    @test rand(Tuple{}) == ()
+    @inferred rand(Tuple{Int32,Int64,Float64})
+    @inferred rand(NTuple{20,Int})
+    @test_throws TypeError rand(Tuple{1:2,3:4})
+end
+
 @testset "GLOBAL_RNG" begin
+    @test VERSION < v"2" # deprecate this in v2 (GLOBAL_RNG must go)
     local GLOBAL_RNG = Random.GLOBAL_RNG
     local LOCAL_RNG = Random.default_rng()
-    @test VERSION < v"2" # deprecate this in v2
 
     @test Random.seed!(GLOBAL_RNG, nothing) === LOCAL_RNG
     @test Random.seed!(GLOBAL_RNG, UInt32[0]) === LOCAL_RNG
@@ -922,14 +943,29 @@ end
         @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
 
         m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        @test string(m) == "MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)"
         rand(m, Int64)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
+        @test string(m) == "MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
         @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
 
         m = MersenneTwister(0); rand(m, Int64); rand(m)
         @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
         @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+
+        # negative seeds
+        Random.seed!(m, -3)
+        @test string(m) == "MersenneTwister(-3)"
+        Random.seed!(m, typemin(Int8))
+        @test string(m) == "MersenneTwister(-128)"
+
+        # string seeds
+        Random.seed!(m, "seed 1")
+        @test string(m) == "MersenneTwister(\"seed 1\")"
+        x = rand(m)
+        @test x == rand(MersenneTwister("seed 1"))
+        @test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))"""
+        # test that MersenneTwister's fancy constructors accept string seeds
+        @test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m
     end
 
     @testset "RandomDevice" begin
@@ -1065,3 +1101,153 @@ end
         end
     end
 end
+
+@testset "TaskLocalRNG: copy and copy! handle the splitmix state" begin
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        rng1 = copy(TaskLocalRNG())
+        x = fetch(@async rand(UInt64))
+        rng2 = copy(TaskLocalRNG())
+        y = fetch(@async rand(UInt64))
+        rng3 = copy(TaskLocalRNG())
+        @test x != y
+        @test rng1 != rng2
+        Random.seed!(seed)
+        @test TaskLocalRNG() == rng1
+        @test x == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng2
+        # this should be a no-op:
+        copy!(TaskLocalRNG(), copy(TaskLocalRNG()))
+        @test TaskLocalRNG() == rng2
+        @test y == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng3
+    end
+end
+
+# Xoshiro jumps
+@testset "Xoshiro jump, basic" begin
+    x1 = Xoshiro(1)
+    x2 = Xoshiro(1)
+
+    @test x1 === jump_128!(jump_128!(x1))
+    @test x2 === jump_128!(x2, 2)
+    @test x1 == x2
+
+    xo1 = Xoshiro(0xfff0241072ddab67, 0xc53bc12f4c3f0b4e, 0x56d451780b2dd4ba, 0x50a4aa153d208dd8)
+    @test rand(jump_128(xo1), UInt64) == 0x87c158da8c35824d
+    @test rand(jump_192(xo1), UInt64) == 0xcaecd5afdd0847d5
+
+    @test rand(jump_128(xo1, 98765), UInt64) == 0xcbec1d5053142608
+    @test rand(jump_192(xo1, 98765), UInt64) == 0x3b97a94c44d66216
+
+    # Throws where appropriate
+    @test_throws DomainError jump_128(Xoshiro(1), -1)
+    @test_throws DomainError jump_128!(Xoshiro(1), -1)
+    @test_throws DomainError jump_192(Xoshiro(1), -1)
+    @test_throws DomainError jump_192!(Xoshiro(1), -1)
+
+    # clean copy when non-mut and no state advance
+    x = Xoshiro(1)
+    @test jump_128(x, 0) == x
+    @test jump_128(x, 0) !== x
+    @test jump_192(x, 0) == x
+    @test jump_192(x, 0) !== x
+
+    y = Xoshiro(1)
+    @test jump_128!(x, 0) == y
+    @test jump_192!(x, 0) == y
+end
+
+@testset "Xoshiro jump_128, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_128(jump_128(jump_128(x))) == jump_128(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_128!(jump_128!(jump_128!(x1))) == jump_128(x, 3)
+        jump_128!(x1, 997)
+        x2 = jump_128!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_128!(x1), T, 5) == rand(jump_128!(x2), T, 5)
+        end
+    end
+end
+
+@testset "Xoshiro jump_192, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_192(jump_192(jump_192(x))) == jump_192(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_192!(jump_192!(jump_192!(x1))) == jump_192(x, 3)
+        jump_192!(x1, 997)
+        x2 = jump_192!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_192!(x1), T, 5) == rand(jump_192!(x2), T, 5)
+        end
+    end
+end
+
+@testset "seed! and hash_seed" begin
+    # Test that:
+    # 1) if n == m, then hash_seed(n) == hash_seed(m)
+    # 2) if n != m, then hash_seed(n) != hash_seed(m)
+    rngs = (Xoshiro(0), TaskLocalRNG(), MersenneTwister(0))
+    seeds = Any[]
+    for T = Base.BitInteger_types
+        append!(seeds, rand(T, 8))
+        push!(seeds, typemin(T), typemin(T) + T(1), typemin(T) + T(2),
+              typemax(T), typemax(T) - T(1), typemax(T) - T(2))
+        T <: Signed && push!(seeds, T(0), T(1), T(2), T(-1), T(-2))
+    end
+
+    vseeds = Dict{Vector{UInt8}, BigInt}()
+    for seed = seeds
+        bigseed = big(seed)
+        vseed = Random.hash_seed(bigseed)
+        # test property 1) above
+        @test Random.hash_seed(seed) == vseed
+        # test property 2) above
+        @test bigseed == get!(vseeds, vseed, bigseed)
+        # test that the property 1) is actually inherited by `seed!`
+        for rng = rngs
+            rng2 = copy(Random.seed!(rng, seed))
+            Random.seed!(rng, bigseed)
+            @test rng == rng2
+        end
+    end
+
+    seed32 = rand(UInt32, rand(1:9))
+    hash32 = Random.hash_seed(seed32)
+    @test Random.hash_seed(map(UInt64, seed32)) == hash32
+    @test hash32 ∉ keys(vseeds)
+
+    seed_str = randstring()
+    seed_gstr = GenericString(seed_str)
+    @test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr)
+    string_seeds = Set{Vector{UInt8}}()
+    for ch = 'A':'z'
+        vseed = Random.hash_seed(string(ch))
+        @test vseed ∉ keys(vseeds)
+        @test vseed ∉ string_seeds
+        push!(string_seeds, vseed)
+    end
+end
+
+@testset "rand(::Type{<:Pair})" begin
+    @test rand(Pair{Int, Int}) isa Pair{Int, Int}
+    @test rand(Pair{Int, Float64}) isa Pair{Int, Float64}
+    @test rand(Pair{Int, Float64}, 3) isa Array{Pair{Int, Float64}}
+
+    # test that making an array out of a sampler works
+    # (i.e. that gentype(sp) is correct)
+    sp = Random.Sampler(AbstractRNG, Pair{Bool, Char})
+    xs = rand(sp, 3)
+    @test xs isa Vector{Pair{Bool, Char}}
+    @test length(xs) == 3
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Random))
+end
diff --git a/stdlib/SHA.version b/stdlib/SHA.version
index f2242a336c6fe..4b33964a6dcdb 100644
--- a/stdlib/SHA.version
+++ b/stdlib/SHA.version
@@ -1,4 +1,4 @@
 SHA_BRANCH = master
-SHA_SHA1 = 2d1f84e6f8417a1a368de48318640d948b023e7a
+SHA_SHA1 = 8fa221ddc8f3b418d9929084f1644f4c32c9a27e
 SHA_GIT_URL := https://github.com/JuliaCrypto/SHA.jl.git
 SHA_TAR_URL = https://api.github.com/repos/JuliaCrypto/SHA.jl/tarball/$1
diff --git a/stdlib/Serialization/Project.toml b/stdlib/Serialization/Project.toml
index 4a2f7874e3124..97e898d731c7d 100644
--- a/stdlib/Serialization/Project.toml
+++ b/stdlib/Serialization/Project.toml
@@ -1,5 +1,6 @@
 name = "Serialization"
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Serialization/docs/src/index.md b/stdlib/Serialization/docs/src/index.md
index 9f593a2e807d9..0d00e47ed84ce 100644
--- a/stdlib/Serialization/docs/src/index.md
+++ b/stdlib/Serialization/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Serialization/docs/src/index.md"
+```
+
 # Serialization
 
 Provides serialization of Julia objects.
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 7c1043f33bdfe..bc476181e5b0d 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -7,7 +7,7 @@ Provide serialization of Julia objects via the functions
 """
 module Serialization
 
-import Base: GMP, Bottom, unsafe_convert, uncompressed_ast
+import Base: Bottom, unsafe_convert
 import Core: svec, SimpleVector
 using Base: unaliascopy, unwrap_unionall, require_one_based_indexing, ntupleany
 using Core.IR
@@ -80,7 +80,7 @@ const TAGS = Any[
 const NTAGS = length(TAGS)
 @assert NTAGS == 255
 
-const ser_version = 24 # do not make changes without bumping the version #!
+const ser_version = 29 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
@@ -288,6 +288,31 @@ function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Arra
     serialize_any(s, b)
 end
 
+serialize(s::AbstractSerializer, m::GenericMemory) = error("GenericMemory{:atomic} currently cannot be serialized")
+function serialize(s::AbstractSerializer, m::Memory)
+    serialize_cycle_header(s, m) && return
+    serialize(s, length(m))
+    elty = eltype(m)
+    if isbitstype(elty)
+        serialize_array_data(s.io, m)
+    else
+        sizehint!(s.table, div(length(m),4))  # prepare for lots of pointers
+        @inbounds for i in eachindex(m)
+            if isassigned(m, i)
+                serialize(s, m[i])
+            else
+                writetag(s.io, UNDEFREF_TAG)
+            end
+        end
+    end
+end
+
+function serialize(s::AbstractSerializer, x::GenericMemoryRef)
+    serialize_type(s, typeof(x))
+    serialize(s, getfield(x, :mem))
+    serialize(s, Base.memoryrefoffset(x))
+end
+
 function serialize(s::AbstractSerializer, ss::String)
     len = sizeof(ss)
     if len > 7
@@ -422,7 +447,7 @@ function serialize(s::AbstractSerializer, meth::Method)
     serialize(s, meth.constprop)
     serialize(s, meth.purity)
     if isdefined(meth, :source)
-        serialize(s, Base._uncompressed_ast(meth, meth.source))
+        serialize(s, Base._uncompressed_ast(meth))
     else
         serialize(s, nothing)
     end
@@ -445,11 +470,6 @@ end
 function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
     serialize_cycle(s, linfo) && return
     writetag(s.io, METHODINSTANCE_TAG)
-    if isdefined(linfo, :uninferred)
-        serialize(s, linfo.uninferred)
-    else
-        writetag(s.io, UNDEFREF_TAG)
-    end
     serialize(s, nothing)  # for backwards compat
     serialize(s, linfo.sparam_vals)
     serialize(s, Any)  # for backwards compat
@@ -511,7 +531,7 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
     serialize(s, primary.super)
     serialize(s, primary.parameters)
     serialize(s, primary.types)
-    serialize(s, isdefined(primary, :instance))
+    serialize(s, Base.issingletontype(primary))
     serialize(s, t.flags & 0x1 == 0x1) # .abstract
     serialize(s, t.flags & 0x2 == 0x2) # .mutable
     serialize(s, Int32(length(primary.types) - t.n_uninitialized))
@@ -654,6 +674,11 @@ end
 
 serialize(s::AbstractSerializer, @nospecialize(x)) = serialize_any(s, x)
 
+function serialize(s::AbstractSerializer, x::Core.AddrSpace)
+    serialize_type(s, typeof(x))
+    write(s.io, Core.bitcast(UInt8, x))
+end
+
 function serialize_any(s::AbstractSerializer, @nospecialize(x))
     tag = sertag(x)
     if tag > 0
@@ -1028,7 +1053,8 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     isva = deserialize(s)::Bool
     is_for_opaque_closure = false
     nospecializeinfer = false
-    constprop = purity = 0x00
+    constprop = 0x00
+    purity = 0x0000
     template_or_is_opaque = deserialize(s)
     if isa(template_or_is_opaque, Bool)
         is_for_opaque_closure = template_or_is_opaque
@@ -1038,8 +1064,10 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         if format_version(s) >= 14
             constprop = deserialize(s)::UInt8
         end
-        if format_version(s) >= 17
-            purity = deserialize(s)::UInt8
+        if format_version(s) >= 26
+            purity = deserialize(s)::UInt16
+        elseif format_version(s) >= 17
+            purity = UInt16(deserialize(s)::UInt8)
         end
         template = deserialize(s)
     else
@@ -1052,6 +1080,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     end
     if makenew
         meth.module = mod
+        meth.debuginfo = NullDebugInfo
         meth.name = name
         meth.file = file
         meth.line = line
@@ -1064,7 +1093,13 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.purity = purity
         if template !== nothing
             # TODO: compress template
-            meth.source = template::CodeInfo
+            template = template::CodeInfo
+            if format_version(s) < 29
+                template.nargs = nargs
+                template.isva = isva
+            end
+            meth.source = template
+            meth.debuginfo = template.debuginfo
             if !@isdefined(slot_syms)
                 slot_syms = ccall(:jl_compress_argnames, Ref{String}, (Any,), meth.source.slotnames)
             end
@@ -1078,7 +1113,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         end
         if !is_for_opaque_closure
             mt = ccall(:jl_method_table_for, Any, (Any,), sig)
-            if mt !== nothing && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), mt, sig, typemax(UInt))
+            if mt !== nothing && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), mt, sig, Base.get_world_counter())
                 ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, meth, C_NULL)
             end
         end
@@ -1090,9 +1125,13 @@ end
 function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Cvoid},), C_NULL)
     deserialize_cycle(s, linfo)
-    tag = Int32(read(s.io, UInt8)::UInt8)
-    if tag != UNDEFREF_TAG
-        setfield!(linfo, :uninferred, handle_deserialize(s, tag)::CodeInfo, :monotonic)
+    if format_version(s) < 28
+        tag = Int32(read(s.io, UInt8)::UInt8)
+        if tag != UNDEFREF_TAG
+            code = handle_deserialize(s, tag)::CodeInfo
+            ci = ccall(:jl_new_codeinst_for_uninferred, Ref{CodeInstance}, (Any, Any), linfo, code)
+            @atomic linfo.cache = ci
+        end
     end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
@@ -1118,6 +1157,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.LineInfoNode})
     return Core.LineInfoNode(mod, method, deserialize(s)::Symbol, Int32(deserialize(s)::Union{Int32, Int}), Int32(deserialize(s)::Union{Int32, Int}))
 end
 
+
 function deserialize(s::AbstractSerializer, ::Type{PhiNode})
     edges = deserialize(s)
     if edges isa Vector{Any}
@@ -1132,6 +1172,7 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     deserialize_cycle(s, ci)
     code = deserialize(s)::Vector{Any}
     ci.code = code
+    ci.debuginfo = NullDebugInfo
     # allow older-style IR with return and gotoifnot Exprs
     for i in 1:length(code)
         stmt = code[i]
@@ -1144,30 +1185,47 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             end
         end
     end
-    ci.codelocs = deserialize(s)::Vector{Int32}
+    _x = deserialize(s)
+    have_debuginfo = _x isa Core.DebugInfo
+    if have_debuginfo
+        ci.debuginfo = _x
+    else
+        codelocs = _x::Vector{Int32}
+        # TODO: convert codelocs to debuginfo format?
+    end
     _x = deserialize(s)
     if _x isa Array || _x isa Int
         pre_12 = false
-        ci.ssavaluetypes = _x
     else
         pre_12 = true
         # < v1.2
         ci.method_for_inference_limit_heuristics = _x
-        ci.ssavaluetypes = deserialize(s)
-        ci.linetable = deserialize(s)
+        _x = deserialize(s)
+    end
+    ci.ssavaluetypes = _x
+    if pre_12
+        linetable = deserialize(s)
+        # TODO: convert linetable to debuginfo format?
     end
     ssaflags = deserialize(s)
     if length(ssaflags) ≠ length(code)
         # make sure the length of `ssaflags` matches that of `code`
         # so that the latest inference doesn't throw on IRs serialized from old versions
-        ssaflags = UInt8[0x00 for _ in 1:length(code)]
+        ssaflags = UInt32[0x00 for _ in 1:length(code)]
+    elseif eltype(ssaflags) != UInt32
+        ssaflags = map(UInt32, ssaflags)
     end
     ci.ssaflags = ssaflags
     if pre_12
         ci.slotflags = deserialize(s)
     else
-        ci.method_for_inference_limit_heuristics = deserialize(s)
-        ci.linetable = deserialize(s)
+        if format_version(s) <= 26
+            ci.method_for_inference_limit_heuristics = deserialize(s)
+        end
+        if !have_debuginfo # pre v1.11 format
+            linetable = deserialize(s)
+            # TODO: convert linetable to debuginfo format?
+        end
     end
     ci.slotnames = deserialize(s)
     if !pre_12
@@ -1176,16 +1234,22 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         ci.rettype = deserialize(s)
         ci.parent = deserialize(s)
         world_or_edges = deserialize(s)
-        pre_13 = isa(world_or_edges, Integer)
+        pre_13 = isa(world_or_edges, Union{UInt, Int})
         if pre_13
-            ci.min_world = world_or_edges
+            ci.min_world = reinterpret(UInt, world_or_edges)
+            ci.max_world = reinterpret(UInt, deserialize(s))
         else
             ci.edges = world_or_edges
-            ci.min_world = reinterpret(UInt, deserialize(s))
-            ci.max_world = reinterpret(UInt, deserialize(s))
+            ci.min_world = deserialize(s)::UInt
+            ci.max_world = deserialize(s)::UInt
         end
+        if format_version(s) >= 26
+            ci.method_for_inference_limit_heuristics = deserialize(s)
+        end
+    end
+    if format_version(s) <= 26
+        deserialize(s)::Bool # inferred
     end
-    ci.inferred = deserialize(s)
     if format_version(s) < 22
         inlining_cost = deserialize(s)
         if isa(inlining_cost, Bool)
@@ -1194,6 +1258,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
             ci.inlining_cost = inlining_cost
         end
     end
+    if format_version(s) >= 29
+        ci.nargs = deserialize(s)
+    end
     ci.propagate_inbounds = deserialize(s)
     if format_version(s) < 23
         deserialize(s) # `pure` field has been removed
@@ -1204,21 +1271,29 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     if format_version(s) >= 24
         ci.nospecializeinfer = deserialize(s)::Bool
     end
+    if format_version(s) >= 29
+        ci.isva = deserialize(s)::Bool
+    end
     if format_version(s) >= 21
         ci.inlining = deserialize(s)::UInt8
     end
     if format_version(s) >= 14
         ci.constprop = deserialize(s)::UInt8
     end
-    if format_version(s) >= 17
+    if format_version(s) >= 26
+        ci.purity = deserialize(s)::UInt16
+    elseif format_version(s) >= 17
         ci.purity = deserialize(s)::UInt8
     end
     if format_version(s) >= 22
         ci.inlining_cost = deserialize(s)::UInt16
     end
+    ci.debuginfo = NullDebugInfo
     return ci
 end
 
+import Core: NullDebugInfo
+
 if Int === Int64
 const OtherInt = Int32
 else
@@ -1274,7 +1349,7 @@ function deserialize_array(s::AbstractSerializer)
     return A
 end
 
-function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
+function deserialize_fillarray!(A::Union{Array{T},Memory{T}}, s::AbstractSerializer) where {T}
     for i = eachindex(A)
         tag = Int32(read(s.io, UInt8)::UInt8)
         if tag != UNDEFREF_TAG
@@ -1284,6 +1359,48 @@ function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
     return A
 end
 
+function deserialize(s::AbstractSerializer, X::Type{Memory{T}} where T)
+    slot = pop!(s.pending_refs) # e.g. deserialize_cycle
+    n = deserialize(s)::Int
+    elty = eltype(X)
+    if isbitstype(elty)
+        A = X(undef, n)
+        if X === Memory{Bool}
+            i = 1
+            while i <= n
+                b = read(s.io, UInt8)::UInt8
+                v = (b >> 7) != 0
+                count = b & 0x7f
+                nxt = i + count
+                while i < nxt
+                    A[i] = v
+                    i += 1
+                end
+            end
+        else
+            A = read!(s.io, A)::X
+        end
+        s.table[slot] = A
+        return A
+    end
+    A = X(undef, n)
+    s.table[slot] = A
+    sizehint!(s.table, s.counter + div(n, 4))
+    deserialize_fillarray!(A, s)
+    return A
+end
+
+function deserialize(s::AbstractSerializer, X::Type{MemoryRef{T}} where T)
+    x = Core.memoryref(deserialize(s))::X
+    i = deserialize(s)::Int
+    i == 2 || (x = Core.memoryref(x, i, true))
+    return x::X
+end
+
+function deserialize(s::AbstractSerializer, X::Type{Core.AddrSpace{M}} where M)
+    Core.bitcast(X, read(s.io, UInt8))
+end
+
 function deserialize_expr(s::AbstractSerializer, len)
     e = Expr(:temp)
     resolve_ref_immediately(s, e)
@@ -1339,7 +1456,7 @@ function deserialize_typename(s::AbstractSerializer, number)
         tn.max_methods = maxm
         if has_instance
             ty = ty::DataType
-            if !Base.issingletontype(ty)
+            if !isdefined(ty, :instance)
                 singleton = ccall(:jl_new_struct, Any, (Any, Any...), ty)
                 # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
                 ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), ty, Base.fieldindex(DataType, :instance)-1, singleton)
@@ -1369,16 +1486,9 @@ function deserialize_typename(s::AbstractSerializer, number)
         tag = Int32(read(s.io, UInt8)::UInt8)
         if tag != UNDEFREF_TAG
             kws = handle_deserialize(s, tag)
-            if makenew
-                if kws isa Vector{Method}
-                    for def in kws
-                        kwmt = typeof(Core.kwcall).name.mt
-                        ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
-                    end
-                else
-                    # old object format -- try to forward from old to new
-                    @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
-                end
+            if makenew && !(kws isa Vector{Method})
+                # old object format -- try to forward from old to new
+                @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
             end
         end
     elseif makenew
@@ -1460,11 +1570,11 @@ function deserialize(s::AbstractSerializer, ::Type{Task})
     t.storage = deserialize(s)
     state = deserialize(s)
     if state === :runnable
-        t._state = Base.task_state_runnable
+        @atomic :release t._state = Base.task_state_runnable
     elseif state === :done
-        t._state = Base.task_state_done
+        @atomic :release t._state = Base.task_state_done
     elseif state === :failed
-        t._state = Base.task_state_failed
+        @atomic :release t._state = Base.task_state_failed
     else
         @assert false
     end
diff --git a/stdlib/Serialization/test/runtests.jl b/stdlib/Serialization/test/runtests.jl
index 46749d4375538..4d9b439e639d7 100644
--- a/stdlib/Serialization/test/runtests.jl
+++ b/stdlib/Serialization/test/runtests.jl
@@ -577,7 +577,7 @@ let io = IOBuffer()
     serialize(io, f)
     seekstart(io)
     f2 = deserialize(io)
-    @test f2(1) === 1f0
+    @test invokelatest(f2, 1) === 1f0
 end
 
 # using a filename; #30151
@@ -595,7 +595,7 @@ let f_data
         f_data = "N0pMBwAAAAA0MxMAAAAAAAAAAAEFIyM1IzYiAAAAABBYH04BBE1haW6bRCIAAAAAIgAAAABNTEy+AQIjNRUAI78jAQAAAAAAAAAfTgEETWFpbkQBAiM1AQdSRVBMWzJdvxBTH04BBE1haW6bRAMAAAAzLAAARkYiAAAAAE7BTBsVRsEWA1YkH04BBE1haW5EAQEqwCXAFgNWJB9OAQRNYWluRJ0ovyXBFgFVKMAVAAbBAQAAAAEAAAABAAAATsEVRr80EAEMTGluZUluZm9Ob2RlH04BBE1haW6bRB9OAQRNYWluRAECIzUBB1JFUExbMl2/vhW+FcEAAAAVRsGifX5MTExMTsEp"
     end
     f = deserialize(IOBuffer(base64decode(f_data)))
-    @test f(10,3) == 23
+    @test invokelatest(f, 10,3) == 23
 end
 
 # issue #33466, IdDict
@@ -655,3 +655,9 @@ end
     @test l2 == l1
     @test l2.parts === ()
 end
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Serialization)
+    @test_broken isempty(undoc)
+    @test undoc == [:AbstractSerializer, :Serializer]
+end
diff --git a/stdlib/SharedArrays/Project.toml b/stdlib/SharedArrays/Project.toml
index 588785347c73d..46e5332f8d89d 100644
--- a/stdlib/SharedArrays/Project.toml
+++ b/stdlib/SharedArrays/Project.toml
@@ -1,5 +1,6 @@
 name = "SharedArrays"
 uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
 
 [deps]
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
diff --git a/stdlib/SharedArrays/docs/src/index.md b/stdlib/SharedArrays/docs/src/index.md
index 67ceabf42115a..91ef63bf18aed 100644
--- a/stdlib/SharedArrays/docs/src/index.md
+++ b/stdlib/SharedArrays/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/SharedArrays/docs/src/index.md"
+```
+
 # Shared Arrays
 
 `SharedArray` represents an array, which is shared across multiple processes, on a single machine.
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index f9f701c61fcea..93ce396277af7 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -8,7 +8,7 @@ module SharedArrays
 using Mmap, Distributed, Random
 
 import Base: length, size, elsize, ndims, IndexStyle, reshape, convert, deepcopy_internal,
-             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, unsafe_convert
+             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, cconvert
 import Random
 using Serialization
 using Serialization: serialize_cycle_header, serialize_type, writetag, UNDEFREF_TAG, serialize, deserialize
@@ -358,8 +358,8 @@ for each worker process.
 """
 localindices(S::SharedArray) = S.pidx > 0 ? range_1dim(S, S.pidx) : 1:0
 
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = unsafe_convert(Ptr{T}, sdata(S))
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = unsafe_convert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = cconvert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = cconvert(Ptr{T}, sdata(S))
 
 function SharedArray(A::Array)
     S = SharedArray{eltype(A),ndims(A)}(size(A))
diff --git a/stdlib/SharedArrays/test/runtests.jl b/stdlib/SharedArrays/test/runtests.jl
index 7f1bbb6891ce0..84dffafb3d92a 100644
--- a/stdlib/SharedArrays/test/runtests.jl
+++ b/stdlib/SharedArrays/test/runtests.jl
@@ -324,3 +324,7 @@ end
 @test SharedMatrix([0.1 0.2; 0.3 0.4]) == [0.1 0.2; 0.3 0.4]
 @test_throws MethodError SharedVector(rand(4,4))
 @test_throws MethodError SharedMatrix(rand(4))
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(SharedArrays))
+end
diff --git a/stdlib/Sockets/Project.toml b/stdlib/Sockets/Project.toml
index 5afb89b29f126..6a395465722f2 100644
--- a/stdlib/Sockets/Project.toml
+++ b/stdlib/Sockets/Project.toml
@@ -1,5 +1,6 @@
 name = "Sockets"
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Sockets/docs/src/index.md b/stdlib/Sockets/docs/src/index.md
index c294461151d7d..feb1744179261 100644
--- a/stdlib/Sockets/docs/src/index.md
+++ b/stdlib/Sockets/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Sockets/docs/src/index.md"
+```
+
 # Sockets
 
 ```@docs
diff --git a/stdlib/Sockets/src/IPAddr.jl b/stdlib/Sockets/src/IPAddr.jl
index 04710e400fe87..d3834a8b8bf73 100644
--- a/stdlib/Sockets/src/IPAddr.jl
+++ b/stdlib/Sockets/src/IPAddr.jl
@@ -31,7 +31,7 @@ end
 """
     IPv4(host::Integer) -> IPv4
 
-Return an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv4 object from IP address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -49,7 +49,17 @@ function IPv4(host::Integer)
     end
 end
 
-# constructor: ("1.2.3.4")
+"""
+    IPv4(str::AbstractString) -> IPv4
+
+Parse an IPv4 address string into an `IPv4` object.
+
+# Examples
+```jldoctest
+julia> IPv4("127.0.0.1")
+ip"127.0.0.1"
+```
+"""
 IPv4(str::AbstractString) = parse(IPv4, str)
 
 show(io::IO,ip::IPv4) = print(io,"ip\"",ip,"\"")
@@ -84,7 +94,7 @@ end
 """
     IPv6(host::Integer) -> IPv6
 
-Return an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv6 object from IP address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -104,6 +114,17 @@ function IPv6(host::Integer)
     end
 end
 
+"""
+    IPv6(str::AbstractString) -> IPv6
+
+Parse an IPv6 address string into an `IPv6` object.
+
+# Examples
+```jldoctest
+julia> IPv6("::1")
+ip"::1"
+```
+"""
 IPv6(str::AbstractString) = parse(IPv6, str)
 
 # Suppress leading '0's and "0x"
@@ -119,7 +140,7 @@ end
 
 show(io::IO, ip::IPv6) = print(io,"ip\"",ip,"\"")
 # RFC 5952 compliant show function
-# http://tools.ietf.org/html/rfc5952
+# https://tools.ietf.org/html/rfc5952
 function print(io::IO,ip::IPv6)
     i = 8
     m = 0
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 33767c2153211..f9e0f2f88dd78 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -31,7 +31,7 @@ export
     IPv4,
     IPv6
 
-import Base: isless, show, print, parse, bind, convert, isreadable, iswritable, alloc_buf_hook, _uv_hook_close
+import Base: isless, show, print, parse, bind, alloc_buf_hook, _uv_hook_close
 
 using Base: LibuvStream, LibuvServer, PipeEndpoint, @handle_as, uv_error, associate_julia_struct, uvfinalize,
     notify_error, uv_req_data, uv_req_set_data, preserve_handle, unpreserve_handle, _UVError, IOError,
@@ -107,6 +107,8 @@ if OS_HANDLE != RawFD
     TCPSocket(fd::RawFD) = TCPSocket(Libc._get_osfhandle(fd))
 end
 
+Base.fd(sock::TCPSocket) = Base._fd(sock)
+
 
 mutable struct TCPServer <: LibuvServer
     handle::Ptr{Cvoid}
@@ -139,6 +141,8 @@ function TCPServer(; delay=true)
     return tcp
 end
 
+Base.fd(server::TCPServer) = Base._fd(server)
+
 """
     accept(server[, client])
 
@@ -199,6 +203,8 @@ end
 
 show(io::IO, stream::UDPSocket) = print(io, typeof(stream), "(", uv_status_string(stream), ")")
 
+Base.fd(sock::UDPSocket) = Base._fd(sock)
+
 function _uv_hook_close(sock::UDPSocket)
     lock(sock.cond)
     try
@@ -450,7 +456,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -567,7 +573,11 @@ end
 """
     nagle(socket::Union{TCPServer, TCPSocket}, enable::Bool)
 
-Enables or disables Nagle's algorithm on a given TCP server or socket.
+Nagle's algorithm batches multiple small TCP packets into larger
+ones. This can improve throughput but worsen latency. Nagle's algorithm
+is enabled by default. This function sets whether Nagle's algorithm is
+active on a given TCP server or socket. The opposite option is called
+`TCP_NODELAY` in other languages.
 
 !!! compat "Julia 1.3"
     This function requires Julia 1.3 or later.
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index dda9dac308f38..f5599b8623a0b 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -55,10 +55,10 @@ end
 Gets all of the IP addresses of the `host`.
 Uses the operating system's underlying `getaddrinfo` implementation, which may do a DNS lookup.
 
-# Example
+# Examples
 ```julia-repl
 julia> getalladdrinfo("google.com")
-2-element Array{IPAddr,1}:
+2-element Vector{IPAddr}:
  ip"172.217.6.174"
  ip"2607:f8b0:4000:804::200e"
 ```
@@ -90,7 +90,7 @@ function getalladdrinfo(host::String)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -122,10 +122,20 @@ end
 getalladdrinfo(host::AbstractString) = getalladdrinfo(String(host))
 
 """
-    getaddrinfo(host::AbstractString, IPAddr=IPv4) -> IPAddr
+    getaddrinfo(host::AbstractString, IPAddr) -> IPAddr
 
 Gets the first IP address of the `host` of the specified `IPAddr` type.
-Uses the operating system's underlying getaddrinfo implementation, which may do a DNS lookup.
+Uses the operating system's underlying getaddrinfo implementation, which may do
+a DNS lookup.
+
+# Examples
+```julia-repl
+julia> getaddrinfo("localhost", IPv6)
+ip"::1"
+
+julia> getaddrinfo("localhost", IPv4)
+ip"127.0.0.1"
+```
 """
 function getaddrinfo(host::String, T::Type{<:IPAddr})
     addrs = getalladdrinfo(host)
@@ -137,6 +147,14 @@ function getaddrinfo(host::String, T::Type{<:IPAddr})
     throw(DNSError(host, UV_EAI_NONAME))
 end
 getaddrinfo(host::AbstractString, T::Type{<:IPAddr}) = getaddrinfo(String(host), T)
+
+"""
+    getaddrinfo(host::AbstractString) -> IPAddr
+
+Gets the first available IP address of `host`, which may be either an `IPv4` or
+`IPv6` address. Uses the operating system's underlying getaddrinfo
+implementation, which may do a DNS lookup.
+"""
 function getaddrinfo(host::AbstractString)
     addrs = getalladdrinfo(String(host))
     if !isempty(addrs)
@@ -205,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6})
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -264,16 +282,14 @@ See also [`getipaddrs`](@ref).
 """
 function getipaddr(addr_type::Type{T}) where T<:IPAddr
     addrs = getipaddrs(addr_type)
+    isempty(addrs) && error("No networking interface available")
 
-    if length(addrs) == 0
-        error("No networking interface available")
-    end
-
-    # Prefer the first IPv4 address
+    # When `addr_type` is `IPAddr`, `addrs` contain IP addresses of all types
+    # In that case, we prefer to return the first IPv4
     i = something(findfirst(ip -> ip isa IPv4, addrs), 1)
     return addrs[i]
 end
-getipaddr() = getipaddr(IPv4)
+getipaddr() = getipaddr(IPAddr)
 
 
 """
@@ -291,7 +307,7 @@ The `loopback` keyword argument dictates whether loopback addresses (e.g. `ip"12
 # Examples
 ```julia-repl
 julia> getipaddrs()
-5-element Array{IPAddr,1}:
+5-element Vector{IPAddr}:
  ip"198.51.100.17"
  ip"203.0.113.2"
  ip"2001:db8:8:4:445e:5fff:fe5d:5500"
@@ -299,7 +315,7 @@ julia> getipaddrs()
  ip"fe80::445e:5fff:fe5d:5500"
 
 julia> getipaddrs(IPv6)
-3-element Array{IPv6,1}:
+3-element Vector{IPv6}:
  ip"2001:db8:8:4:445e:5fff:fe5d:5500"
  ip"2001:db8:8:4:c164:402e:7e3c:3668"
  ip"fe80::445e:5fff:fe5d:5500"
@@ -344,7 +360,7 @@ are not guaranteed to be unique beyond their network segment,
 therefore routers do not forward them. Link-local addresses are from
 the address blocks `169.254.0.0/16` or `fe80::/10`.
 
-# Example
+# Examples
 ```julia
 filter(!islinklocaladdr, getipaddrs())
 ```
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index 02a994460afbf..26f95d4ce1819 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -223,7 +223,8 @@ end
     end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
-    @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
+    # Temporarily broken due to a DNS issue. See https://github.com/JuliaLang/julia/issues/55008
+    @test_skip getnameinfo(ip"203.0.113.1") == "203.0.113.1"
     @test getnameinfo(ip"0.1.1.1") == "0.1.1.1"
     @test getnameinfo(ip"::ffff:0.1.1.1") == "::ffff:0.1.1.1"
     @test getnameinfo(ip"::ffff:192.0.2.1") == "::ffff:192.0.2.1"
@@ -452,6 +453,8 @@ end
         catch e
             if isa(e, Base.IOError) && Base.uverrorname(e.code) == "EPERM"
                 @warn "UDP IPv4 broadcast test skipped (permission denied upon send, restrictive firewall?)"
+            elseif Sys.isapple() && isa(e, Base.IOError) && Base.uverrorname(e.code) == "EHOSTUNREACH"
+                @warn "UDP IPv4 broadcast test skipped (local network access not granted?)"
             else
                 rethrow()
             end
@@ -602,6 +605,31 @@ end
     end
 end
 
+@testset "fd() methods" begin
+    function valid_fd(x)
+        if Sys.iswindows()
+            return x isa Base.OS_HANDLE
+        elseif !Sys.iswindows()
+            value = Base.cconvert(Cint, x)
+
+            # 2048 is a bit arbitrary, it depends on the process not having too many
+            # file descriptors open. But select() has a limit of 1024 and people
+            # don't seem to hit it too often so let's hope twice that is safe.
+            return value > 0 && value < 2048
+        end
+    end
+
+    sock = TCPSocket(; delay=false)
+    @test valid_fd(fd(sock))
+
+    sock = UDPSocket()
+    bind(sock, Sockets.localhost, 0)
+    @test valid_fd(fd(sock))
+
+    server = listen(Sockets.localhost, 0)
+    @test valid_fd(fd(server))
+end
+
 @testset "TCPServer constructor" begin
     s = Sockets.TCPServer(; delay=false)
     if ccall(:jl_has_so_reuseport, Int32, ()) == 1
@@ -611,11 +639,26 @@ end
 
 @testset "getipaddrs" begin
     @test getipaddr() in getipaddrs()
-    try
-        getipaddr(IPv6) in getipaddrs(IPv6)
-    catch
-        if !isempty(getipaddrs(IPv6))
-            @test "getipaddr(IPv6) errored when it shouldn't have!"
+
+    has_ipv4 = !isempty(getipaddrs(IPv4))
+    if has_ipv4
+        @test getipaddr(IPv4) in getipaddrs(IPv4)
+    else
+        @test_throws "No networking interface available" getipaddr(IPv4)
+    end
+
+    has_ipv6 = !isempty(getipaddrs(IPv6))
+    if has_ipv6
+        @test getipaddr(IPv6) in getipaddrs(IPv6)
+    else
+        @test_throws "No networking interface available" getipaddr(IPv6)
+    end
+
+    @testset "getipaddr() prefers IPv4 over IPv6" begin
+        if has_ipv4
+            @test getipaddr() isa IPv4
+        else
+            @test getipaddr() isa IPv6
         end
     end
 
@@ -682,3 +725,7 @@ end
 
 
 close(sockets_watchdog_timer)
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Sockets))
+end
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index d4a548daef5d7..3f6ab5b878069 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 8affe9e499379616e33fc60a24bb31500e8423d7
+SPARSEARRAYS_SHA1 = 5f527215c188ee99247cdce31ba8ce9e11f35055
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 27197b12be54c..3df70d30ba7e6 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = a3feba2bb63f06b7f40024185e9fa5f6385e2510
+STATISTICS_SHA1 = d49c2bf4f81e1efb4980a35fe39c815ef8396297
 STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
 STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version
new file mode 100644
index 0000000000000..c72f7a8399725
--- /dev/null
+++ b/stdlib/StyledStrings.version
@@ -0,0 +1,4 @@
+STYLEDSTRINGS_BRANCH = main
+STYLEDSTRINGS_SHA1 = 8985a37ac054c37d084a03ad2837208244824877
+STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git
+STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index d1fb2c25fa68b..3a1dc50a103fb 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,15 +1,14 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.10.1+6"
+version = "7.8.3+2"
 
 [deps]
 libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.7"
+julia = "1.12"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index a347a91721bad..9e03033c4e3fa 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/SuiteSparse_jll.jl
 baremodule SuiteSparse_jll
 using Base, Libdl, libblastrampoline_jll
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
@@ -54,57 +53,62 @@ if Sys.iswindows()
     const libsuitesparseconfig = "libsuitesparseconfig.dll"
     const libumfpack = "libumfpack.dll"
 elseif Sys.isapple()
-    const libamd = "@rpath/libamd.2.dylib"
-    const libbtf = "@rpath/libbtf.1.dylib"
-    const libcamd = "@rpath/libcamd.2.dylib"
-    const libccolamd = "@rpath/libccolamd.2.dylib"
-    const libcholmod = "@rpath/libcholmod.3.dylib"
-    const libcolamd = "@rpath/libcolamd.2.dylib"
-    const libklu = "@rpath/libklu.1.dylib"
-    const libldl = "@rpath/libldl.2.dylib"
-    const librbio = "@rpath/librbio.2.dylib"
-    const libspqr = "@rpath/libspqr.2.dylib"
-    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.5.dylib"
-    const libumfpack = "@rpath/libumfpack.5.dylib"
+    const libamd = "@rpath/libamd.3.dylib"
+    const libbtf = "@rpath/libbtf.2.dylib"
+    const libcamd = "@rpath/libcamd.3.dylib"
+    const libccolamd = "@rpath/libccolamd.3.dylib"
+    const libcholmod = "@rpath/libcholmod.5.dylib"
+    const libcolamd = "@rpath/libcolamd.3.dylib"
+    const libklu = "@rpath/libklu.2.dylib"
+    const libldl = "@rpath/libldl.3.dylib"
+    const librbio = "@rpath/librbio.4.dylib"
+    const libspqr = "@rpath/libspqr.4.dylib"
+    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.7.dylib"
+    const libumfpack = "@rpath/libumfpack.6.dylib"
 else
-    const libamd = "libamd.so.2"
-    const libbtf = "libbtf.so.1"
-    const libcamd = "libcamd.so.2"
-    const libccolamd = "libccolamd.so.2"
-    const libcholmod = "libcholmod.so.3"
-    const libcolamd = "libcolamd.so.2"
-    const libklu = "libklu.so.1"
-    const libldl = "libldl.so.2"
-    const librbio = "librbio.so.2"
-    const libspqr = "libspqr.so.2"
-    const libsuitesparseconfig = "libsuitesparseconfig.so.5"
-    const libumfpack = "libumfpack.so.5"
+    const libamd = "libamd.so.3"
+    const libbtf = "libbtf.so.2"
+    const libcamd = "libcamd.so.3"
+    const libccolamd = "libccolamd.so.3"
+    const libcholmod = "libcholmod.so.5"
+    const libcolamd = "libcolamd.so.3"
+    const libklu = "libklu.so.2"
+    const libldl = "libldl.so.3"
+    const librbio = "librbio.so.4"
+    const libspqr = "libspqr.so.4"
+    const libsuitesparseconfig = "libsuitesparseconfig.so.7"
+    const libumfpack = "libumfpack.so.6"
 end
 
 function __init__()
+    # BSD-3-Clause
+    global libamd_handle = dlopen(libamd)
+    global libamd_path = dlpath(libamd_handle)
+    global libcamd_handle = dlopen(libcamd)
+    global libcamd_path = dlpath(libcamd_handle)
+    global libccolamd_handle = dlopen(libccolamd)
+    global libccolamd_path = dlpath(libccolamd_handle)
+    global libcolamd_handle = dlopen(libcolamd)
+    global libcolamd_path = dlpath(libcolamd_handle)
+    global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
+    global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
+
+    # LGPL-2.1+
+    global libbtf_handle = dlopen(libbtf)
+    global libbtf_path = dlpath(libbtf_handle)
+    global libklu_handle = dlopen(libklu)
+    global libklu_path = dlpath(libklu_handle)
+    global libldl_handle = dlopen(libldl)
+    global libldl_path = dlpath(libldl_handle)
+
+    # GPL-2.0+
     if Base.USE_GPL_LIBS
-        global libamd_handle = dlopen(libamd)
-        global libamd_path = dlpath(libamd_handle)
-        global libbtf_handle = dlopen(libbtf)
-        global libbtf_path = dlpath(libbtf_handle)
-        global libcamd_handle = dlopen(libcamd)
-        global libcamd_path = dlpath(libcamd_handle)
-        global libccolamd_handle = dlopen(libccolamd)
-        global libccolamd_path = dlpath(libccolamd_handle)
         global libcholmod_handle = dlopen(libcholmod)
         global libcholmod_path = dlpath(libcholmod_handle)
-        global libcolamd_handle = dlopen(libcolamd)
-        global libcolamd_path = dlpath(libcolamd_handle)
-        global libklu_handle = dlopen(libklu)
-        global libklu_path = dlpath(libklu_handle)
-        global libldl_handle = dlopen(libldl)
-        global libldl_path = dlpath(libldl_handle)
         global librbio_handle = dlopen(librbio)
         global librbio_path = dlpath(librbio_handle)
         global libspqr_handle = dlopen(libspqr)
         global libspqr_path = dlpath(libspqr_handle)
-        global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
-        global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
         global libumfpack_handle = dlopen(libumfpack)
         global libumfpack_path = dlpath(libumfpack_handle)
     end
diff --git a/stdlib/SuiteSparse_jll/test/runtests.jl b/stdlib/SuiteSparse_jll/test/runtests.jl
index ca356951f99e2..922da55fa1881 100644
--- a/stdlib/SuiteSparse_jll/test/runtests.jl
+++ b/stdlib/SuiteSparse_jll/test/runtests.jl
@@ -2,6 +2,10 @@
 
 using Test, SuiteSparse_jll
 
+# SuiteSparse only uses SUITESPARSE_MAIN_VERSION and SUITESPARSE_SUB_VERSION to compute its version
+# The SUITESPARSE_SUBSUB_VERSION is not used
+# TODO before release: update to 7020 or above when upstreamed.
+# This should be safe and unnecessary since we specify exact version of the BB JLL.
 @testset "SuiteSparse_jll" begin
-    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5010
+    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) > 7000
 end
diff --git a/stdlib/TOML/Project.toml b/stdlib/TOML/Project.toml
index 17fc8be19ec8e..ceb4acf8bbc65 100644
--- a/stdlib/TOML/Project.toml
+++ b/stdlib/TOML/Project.toml
@@ -6,12 +6,13 @@ version = "1.0.3"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [compat]
+Dates = "1.11.0"
 julia = "1.6"
 
 [extras]
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
 
 [targets]
diff --git a/stdlib/TOML/docs/src/index.md b/stdlib/TOML/docs/src/index.md
index 36e8ec6248108..c6fe514ea39f4 100644
--- a/stdlib/TOML/docs/src/index.md
+++ b/stdlib/TOML/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/TOML/docs/src/index.md"
+```
+
 # TOML
 
 TOML.jl is a Julia standard library for parsing and writing [TOML
@@ -36,7 +40,7 @@ none:1:16 error: failed to parse value
 ```
 
 There are other versions of the parse functions ([`TOML.tryparse`](@ref)
-and [`TOML.tryparsefile`]) that instead of throwing exceptions on parser error
+and [`TOML.tryparsefile`](@ref)) that instead of throwing exceptions on parser error
 returns a [`TOML.ParserError`](@ref) with information:
 
 ```jldoctest
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index a2ea1869b4079..b37a5ca83c251 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -1,7 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+TOML.jl is a Julia standard library for parsing and writing TOML v1.0 files.
+This module provides functions to parse TOML strings and files into Julia data structures
+and to serialize Julia data structures to TOML format.
+"""
 module TOML
 
+using Dates
+
 module Internals
     # The parser is defined in Base
     using Base.TOML: Parser, parse, tryparse, ParserError, isvalid_barekey_char, reinit!
@@ -18,7 +25,7 @@ module Internals
 end
 
 # https://github.com/JuliaLang/julia/issues/36605
-readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
+_readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
 
 """
     Parser()
@@ -29,7 +36,14 @@ explicitly create a `Parser` but instead one directly use use
 will however reuse some internal data structures which can be beneficial for
 performance if a larger number of small files are parsed.
 """
-const Parser = Internals.Parser
+struct Parser
+    _p::Internals.Parser{Dates}
+end
+
+# Dates-enabled constructors
+Parser() = Parser(Internals.Parser{Dates}())
+Parser(io::IO) = Parser(Internals.Parser{Dates}(io))
+Parser(str::String; filepath=nothing) = Parser(Internals.Parser{Dates}(str; filepath))
 
 """
     parsefile(f::AbstractString)
@@ -41,9 +55,9 @@ Parse file `f` and return the resulting table (dictionary). Throw a
 See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
-    Internals.parse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 parsefile(p::Parser, f::AbstractString) =
-    Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     tryparsefile(f::AbstractString)
@@ -55,9 +69,9 @@ Parse file `f` and return the resulting table (dictionary). Return a
 See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
-    Internals.tryparse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 tryparsefile(p::Parser, f::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     parse(x::Union{AbstractString, IO})
@@ -68,10 +82,11 @@ Throw a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.tryparse`](@ref).
 """
+parse(p::Parser) = Internals.parse(p._p)
 parse(str::AbstractString) =
-    Internals.parse(Parser(String(str)))
+    Internals.parse(Internals.Parser{Dates}(String(str)))
 parse(p::Parser, str::AbstractString) =
-    Internals.parse(Internals.reinit!(p, String(str)))
+    Internals.parse(Internals.reinit!(p._p, String(str)))
 parse(io::IO) = parse(read(io, String))
 parse(p::Parser, io::IO) = parse(p, read(io, String))
 
@@ -84,10 +99,11 @@ Return a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.parse`](@ref).
 """
+tryparse(p::Parser) = Internals.tryparse(p._p)
 tryparse(str::AbstractString) =
-    Internals.tryparse(Parser(String(str)))
+    Internals.tryparse(Internals.Parser{Dates}(String(str)))
 tryparse(p::Parser, str::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, String(str)))
+    Internals.tryparse(Internals.reinit!(p._p, String(str)))
 tryparse(io::IO) = tryparse(read(io, String))
 tryparse(p::Parser, io::IO) = tryparse(p, read(io, String))
 
@@ -105,10 +121,11 @@ const ParserError = Internals.ParserError
 
 
 """
-    print([to_toml::Function], io::IO [=stdout], data::AbstractDict; sorted=false, by=identity)
+    print([to_toml::Function], io::IO [=stdout], data::AbstractDict; sorted=false, by=identity, inline_tables::IdSet{<:AbstractDict})
 
 Write `data` as TOML syntax to the stream `io`. If the keyword argument `sorted` is set to `true`,
-sort tables according to the function given by the keyword argument `by`.
+sort tables according to the function given by the keyword argument `by`. If the keyword argument
+`inline_tables` is given, it should be a set of tables that should be printed "inline".
 
 The following data types are supported: `AbstractDict`, `AbstractVector`, `AbstractString`, `Integer`, `AbstractFloat`, `Bool`,
 `Dates.DateTime`, `Dates.Time`, `Dates.Date`. Note that the integers and floats
@@ -118,4 +135,17 @@ supported type.
 """
 const print = Internals.Printer.print
 
+public Parser, parsefile, tryparsefile, parse, tryparse, ParserError, print
+
+# These methods are private Base interfaces, but we do our best to support them over
+# the TOML stdlib types anyway to minimize downstream breakage.
+Base.TOMLCache(p::Parser) = Base.TOMLCache(p._p, Dict{String, Base.CachedTOMLDict}())
+Base.TOMLCache(p::Parser, d::Base.CachedTOMLDict) = Base.TOMLCache(p._p, d)
+Base.TOMLCache(p::Parser, d::Dict{String, Dict{String, Any}}) = Base.TOMLCache(p._p, d)
+
+Internals.reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing) =
+    Internals.reinit!(p._p, str; filepath)
+Internals.parse(p::Parser) = Internals.parse(p._p)
+Internals.tryparse(p::Parser) = Internals.tryparse(p._p)
+
 end
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 1fa9f97405504..63f65b017d393 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -34,7 +34,8 @@ function print_toml_escaped(io::IO, s::AbstractString)
 end
 
 const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+const TOMLValue = Union{AbstractVector, AbstractDict, Bool, Integer, AbstractFloat, AbstractString,
+                        Dates.DateTime, Dates.Time, Dates.Date, Base.TOML.DateTime, Base.TOML.Time, Base.TOML.Date}
 
 
 ########
@@ -74,21 +75,24 @@ end
 ##########
 
 # Fallback
-function printvalue(f::MbyFunc, io::IO, value)
+function printvalue(f::MbyFunc, io::IO, value, sorted::Bool)
     toml_value = to_toml_value(f, value)
     @invokelatest printvalue(f, io, toml_value)
 end
 
-function printvalue(f::MbyFunc, io::IO, value::AbstractVector)
+function printvalue(f::MbyFunc, io::IO, value::AbstractVector, sorted::Bool)
     Base.print(io, "[")
     for (i, x) in enumerate(value)
         i != 1 && Base.print(io, ", ")
-        printvalue(f, io, x)
+        printvalue(f, io, x, sorted)
     end
     Base.print(io, "]")
 end
 
-function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
+function printvalue(f::MbyFunc, io::IO, value::TOMLValue, sorted::Bool)
+    value isa Base.TOML.DateTime && (value = Dates.DateTime(value))
+    value isa Base.TOML.Time && (value = Dates.Time(value))
+    value isa Base.TOML.Date && (value = Dates.Date(value))
     value isa Dates.DateTime ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z")) :
     value isa Dates.Time     ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) :
     value isa Dates.Date     ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) :
@@ -97,10 +101,11 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
     value isa AbstractFloat  ? Base.print(io, isnan(value) ? "nan" :
                                               isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
                                               Float64(value)) :  # TOML specifies IEEE 754 binary64 for float
-    value isa AbstractString ? (Base.print(io, "\"");
+    value isa AbstractString ? (qmark = Base.contains(value, "\n") ? "\"\"\"" : "\"";
+                                Base.print(io, qmark);
                                 print_toml_escaped(io, value);
-                                Base.print(io, "\"")) :
-    value isa AbstractDict ? print_inline_table(f, io, value) :
+                                Base.print(io, qmark)) :
+    value isa AbstractDict ? print_inline_table(f, io, value, sorted) :
     error("internal error in TOML printing, unhandled value")
 end
 
@@ -112,13 +117,18 @@ function print_integer(io::IO, value::Integer)
     return
 end
 
-function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict)
+function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict, sorted::Bool)
+    vkeys = collect(keys(value))
+    if sorted
+        sort!(vkeys)
+    end
     Base.print(io, "{")
-    for (i, (k,v)) in enumerate(value)
+    for (i, k) in enumerate(vkeys)
+        v = value[k]
         i != 1 && Base.print(io, ", ")
         printkey(io, [String(k)])
         Base.print(io, " = ")
-        printvalue(f, io, v)
+        printvalue(f, io, v, sorted)
     end
     Base.print(io, "}")
 end
@@ -141,11 +151,18 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
     indent::Int = 0,
     first_block::Bool = true,
     sorted::Bool = false,
+    inline_tables::IdSet,
     by::Function = identity,
 )
+
+    if a in inline_tables
+        @invokelatest print_inline_table(f, io, a)
+        return
+    end
+
     akeys = keys(a)
     if sorted
-        akeys = sort!(collect(akeys); by=by)
+        akeys = sort!(collect(akeys); by)
     end
 
     # First print non-tabular entries
@@ -154,12 +171,14 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
         if !isa(value, TOMLValue)
             value = to_toml_value(f, value)
         end
-        is_tabular(value) && continue
+        if is_tabular(value) && !(value in inline_tables)
+            continue
+        end
 
         Base.print(io, ' '^4max(0,indent-1))
         printkey(io, [String(key)])
         Base.print(io, " = ") # print separator
-        printvalue(f, io, value)
+        printvalue(f, io, value, sorted)
         Base.print(io, "\n")  # new line?
         first_block = false
     end
@@ -169,10 +188,10 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
         if !isa(value, TOMLValue)
             value = to_toml_value(f, value)
         end
-        if is_table(value)
+        if is_table(value) && !(value in inline_tables)
             push!(ks, String(key))
             _values = @invokelatest values(value)
-            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool
+            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool || any(v in inline_tables for v in _values)::Bool
             if header
                 # print table
                 first_block || println(io)
@@ -183,7 +202,7 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]\n")
             end
             # Use runtime dispatch here since the type of value seems not to be enforced other than as AbstractDict
-            @invokelatest print_table(f, io, value, ks; indent = indent + header, first_block = header, sorted=sorted, by=by)
+            @invokelatest print_table(f, io, value, ks; indent = indent + header, first_block = header, sorted, by, inline_tables)
             pop!(ks)
         elseif @invokelatest(is_array_of_tables(value))
             # print array of tables
@@ -197,7 +216,7 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]]\n")
                 # TODO, nicer error here
                 !isa(v, AbstractDict) && error("array should contain only tables")
-                @invokelatest print_table(f, io, v, ks; indent = indent + 1, sorted=sorted, by=by)
+                @invokelatest print_table(f, io, v, ks; indent = indent + 1, sorted, by, inline_tables)
             end
             pop!(ks)
         end
@@ -209,7 +228,7 @@ end
 # API #
 #######
 
-print(f::MbyFunc, io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(f, io, a; sorted=sorted, by=by)
-print(f::MbyFunc, a::AbstractDict; sorted::Bool=false, by=identity) = print(f, stdout, a; sorted=sorted, by=by)
-print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(nothing, io, a; sorted=sorted, by=by)
-print(a::AbstractDict; sorted::Bool=false, by=identity) = print(nothing, stdout, a; sorted=sorted, by=by)
+print(f::MbyFunc, io::IO, a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) = print_table(f, io, a; sorted, by, inline_tables)
+print(f::MbyFunc,         a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) = print(f, stdout, a; sorted, by, inline_tables)
+print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) = print_table(nothing, io, a; sorted, by, inline_tables)
+print(        a::AbstractDict; sorted::Bool=false, by=identity, inline_tables::IdSet{<:AbstractDict}=IdSet{Dict{String}}()) = print(nothing, stdout, a; sorted, by, inline_tables)
diff --git a/stdlib/TOML/test/parse.jl b/stdlib/TOML/test/parse.jl
index 12f68acbdb5bf..30400344f67cf 100644
--- a/stdlib/TOML/test/parse.jl
+++ b/stdlib/TOML/test/parse.jl
@@ -14,6 +14,7 @@ using TOML: ParserError
           TOML.parse(IOBuffer(str)) ==
           TOML.parse(p, str) == TOML.parse(p, SubString(str)) ==
           TOML.parse(p, IOBuffer(str)) == dict
+    @test TOML.parse("a\t=1") == dict
     @test_throws ParserError TOML.parse(invalid_str)
     @test_throws ParserError TOML.parse(SubString(invalid_str))
     @test_throws ParserError TOML.parse(IOBuffer(invalid_str))
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index 765b6feb491a5..8fba1b1c1df10 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -140,3 +140,68 @@ d = "hello"
 a = 2
 b = 9.9
 """
+
+
+inline_dict = Dict("a" => [1,2], "b" => Dict("a" => "b"), "c" => "foo")
+d = Dict(
+    "x" => "y",
+    "y" => inline_dict,
+    "z" => [1,2,3],
+)
+inline_tables = IdSet{Dict}()
+push!(inline_tables, inline_dict)
+@test toml_str(d; sorted=true, inline_tables) ==
+"""
+x = "y"
+y = {a = [1, 2], b = {a = "b"}, c = "foo"}
+z = [1, 2, 3]
+"""
+
+
+d = Dict("deps" => Dict(
+        "LocalPkg" => "fcf55292-0d03-4e8a-9e0b-701580031fc3",
+        "Example" => "7876af07-990d-54b4-ab0e-23690620f79a"),
+   "sources" => Dict(
+        "LocalPkg" => Dict("path" => "LocalPkg"),
+        "Example" => Dict("url" => "https://github.com/JuliaLang/Example.jl")))
+
+inline_tables = IdSet{Dict}()
+push!(inline_tables, d["sources"]["LocalPkg"])
+push!(inline_tables, d["sources"]["Example"])
+
+@test toml_str(d; sorted=true, inline_tables) ==
+"""
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+LocalPkg = "fcf55292-0d03-4e8a-9e0b-701580031fc3"
+
+[sources]
+Example = {url = "https://github.com/JuliaLang/Example.jl"}
+LocalPkg = {path = "LocalPkg"}
+"""
+
+inline_tables = IdSet{Dict}()
+push!(inline_tables, d["sources"]["LocalPkg"])
+s = """
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+LocalPkg = "fcf55292-0d03-4e8a-9e0b-701580031fc3"
+
+[sources]
+LocalPkg = {path = "LocalPkg"}
+
+    [sources.Example]
+    url = "https://github.com/JuliaLang/Example.jl"
+"""
+@test toml_str(d; sorted=true, inline_tables) == s
+@test roundtrip(s)
+
+# multiline strings (#55083)
+s = """
+a = \"\"\"lorem ipsum
+
+
+
+alpha\"\"\"
+"""
+@test roundtrip(s)
diff --git a/stdlib/TOML/test/runtests.jl b/stdlib/TOML/test/runtests.jl
index 7376fab914636..47c762d054711 100644
--- a/stdlib/TOML/test/runtests.jl
+++ b/stdlib/TOML/test/runtests.jl
@@ -25,3 +25,7 @@ include("print.jl")
 include("parse.jl")
 
 @inferred TOML.parse("foo = 3")
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(TOML))
+end
diff --git a/stdlib/TOML/test/toml_test.jl b/stdlib/TOML/test/toml_test.jl
index f4670058223a1..cdd8950677c75 100644
--- a/stdlib/TOML/test/toml_test.jl
+++ b/stdlib/TOML/test/toml_test.jl
@@ -36,7 +36,7 @@ end
 
 function check_valid(f)
     jsn = try jsn2data(@eval include($f * ".jl"))
-    # Some files cannot be reprsented with julias DateTime (timezones)
+    # Some files cannot be represented with julias DateTime (timezones)
     catch
         return false
     end
@@ -72,11 +72,7 @@ for (root, dirs, files) in walkdir(valid_test_folder)
                 rel = replace(rel, '\\' => '/')
             end
             v = check_valid(splitext(file)[1])
-            if rel in failures
-                @test_broken v
-            else
-                @test v
-            end
+            @test v broken=rel in failures
         end
     end
 end
@@ -145,11 +141,7 @@ for (root, dirs, files) in walkdir(invalid_test_folder)
                 rel = replace(rel, '\\' => '/')
             end
             v = check_invalid(file)
-            if rel in failures
-                @test_broken v
-            else
-                @test v
-            end
+            @test v broken=rel in failures
         end
     end
 end
diff --git a/stdlib/TOML/test/utils/utils.jl b/stdlib/TOML/test/utils/utils.jl
index c484a61cee25a..b01acf04a72fe 100644
--- a/stdlib/TOML/test/utils/utils.jl
+++ b/stdlib/TOML/test/utils/utils.jl
@@ -33,7 +33,7 @@ end
 function get_data()
     tmp = mktempdir()
     path = joinpath(tmp, basename(url))
-    Downloads.download(url, path)
+    retry(Downloads.download, delays=fill(10,5))(url, path)
     Tar.extract(`$(exe7z()) x $path -so`, joinpath(tmp, "testfiles"))
     return joinpath(tmp, "testfiles", "toml-test-julia-$version", "testfiles")
 end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index be2ed3acce5b5..53be1b04708b3 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -4,16 +4,31 @@ using Test
 using TOML
 using TOML: Internals
 
+# Construct an explicit Parser to test the "cached" version of parsing
+const test_parser = TOML.Parser()
+
 function testval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     parsed = TOML.parse(f)["foo"]
     return isequal(v, parsed) && typeof(v) == typeof(parsed)
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    parsed = TOML.parse(test_parser, f)["foo"]
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    return true
 end
 
 function failval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     err = TOML.tryparse(f);
     return err isa TOML.Internals.ParserError && err.type == v
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    err = TOML.tryparse(test_parser, f);
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    return true
 end
 
 @testset "Numbers" begin
@@ -157,6 +172,6 @@ end
 @testset "Array" begin
     @test testval("[1,2,3]", Int64[1,2,3])
     @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0])
-    @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)])
+    @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)])
     @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"])
 end
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index 44e829b5fea54..2403cd1c7c635 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = ff55460f4d329949661a33e6c8168ce6d890676c
+TAR_SHA1 = 1114260f5c7a7b59441acadca2411fa227bb8a3b
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
index ee1ae15fd7154..f04b4f976196f 100644
--- a/stdlib/Test/Project.toml
+++ b/stdlib/Test/Project.toml
@@ -1,5 +1,6 @@
 name = "Test"
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 1c9a55480d2c9..c1fe9e8e20c63 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Test/docs/src/index.md"
+```
+
 # Unit Testing
 
 ```@meta
@@ -69,6 +73,8 @@ Error During Test
   Test threw an exception of type MethodError
   Expression: foo(:cat) == 1
   MethodError: no method matching length(::Symbol)
+  The function `length` exists, but no method is defined for this combination of argument types.
+
   Closest candidates are:
     length(::SimpleVector) at essentials.jl:256
     length(::Base.MethodList) at reflection.jl:521
@@ -316,8 +322,12 @@ function finish(ts::CustomTestSet)
     # just record if we're not the top-level parent
     if get_testset_depth() > 0
         record(get_testset(), ts)
+        return ts
     end
-    ts
+
+    # so the results are printed if we are at the top level
+    Test.print_test_results(ts)
+    return ts
 end
 ```
 
@@ -332,6 +342,45 @@ And using that testset looks like:
 end
 ```
 
+In order to use a custom testset and have the recorded results printed as part of any outer default testset,
+also define `Test.get_test_counts`. This might look like so:
+
+```julia
+using Test: AbstractTestSet, Pass, Fail, Error, Broken, get_test_counts, TestCounts, format_duration
+
+function Test.get_test_counts(ts::CustomTestSet)
+    passes, fails, errors, broken = 0, 0, 0, 0
+    # cumulative results
+    c_passes, c_fails, c_errors, c_broken = 0, 0, 0, 0
+
+    for t in ts.results
+        # count up results
+        isa(t, Pass)   && (passes += 1)
+        isa(t, Fail)   && (fails  += 1)
+        isa(t, Error)  && (errors += 1)
+        isa(t, Broken) && (broken += 1)
+        # handle children
+        if isa(t, AbstractTestSet)
+            tc = get_test_counts(t)::TestCounts
+            c_passes += tc.passes + tc.cumulative_passes
+            c_fails  += tc.fails + tc.cumulative_fails
+            c_errors += tc.errors + tc.cumulative_errors
+            c_broken += tc.broken + tc.cumulative_broken
+        end
+    end
+    # get a duration, if we have one
+    duration = format_duration(ts)
+    return TestCounts(true, passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration)
+end
+```
+
+```@docs
+Test.TestCounts
+Test.get_test_counts
+Test.format_duration
+Test.print_test_results
+```
+
 ## Test utilities
 
 ```@docs
@@ -368,6 +417,8 @@ Add the following to `src/Example.jl`:
 ```julia
 module Example
 
+export greet, simple_add, type_multiply
+
 function greet()
     "Hello world!"
 end
@@ -441,7 +492,7 @@ end
 
 #### Writing Tests for `greeting_tests.jl`
 
-Using our knowledge of `Test.jl`, here are some example tests we could add to `math_tests.jl`:
+Using our knowledge of `Test.jl`, here are some example tests we could add to `greeting_tests.jl`:
 
 ```julia
 @testset "Testset 3" begin
@@ -489,3 +540,15 @@ Using `Test.jl`, more complicated tests can be added for packages but this shoul
 ```@meta
 DocTestSetup = nothing
 ```
+
+### Code Coverage
+
+Code coverage tracking during tests can be enabled using the `pkg> test --coverage` flag (or at a lower level using the
+[`--code-coverage`](@ref command-line-interface) julia arg). This is on by default in the
+[julia-runtest](https://github.com/julia-actions/julia-runtest) GitHub action.
+
+To evaluate coverage either manually inspect the `.cov` files that are generated beside the source files locally,
+or in CI use the [julia-processcoverage](https://github.com/julia-actions/julia-processcoverage) GitHub action.
+
+!!! compat "Julia 1.11"
+    Since Julia 1.11, coverage is not collected during the package precompilation phase.
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 622c696b383a0..79fdb89399d42 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -155,14 +155,16 @@ struct Fail <: Result
     context::Union{Nothing, String}
     source::LineNumberNode
     message_only::Bool
-    function Fail(test_type::Symbol, orig_expr, data, value, context, source::LineNumberNode, message_only::Bool)
+    backtrace::Union{Nothing, String}
+    function Fail(test_type::Symbol, orig_expr, data, value, context, source::LineNumberNode, message_only::Bool, backtrace=nothing)
         return new(test_type,
             string(orig_expr),
             data === nothing ? nothing : string(data),
             string(isa(data, Type) ? typeof(value) : value),
             context,
             source,
-            message_only)
+            message_only,
+            backtrace)
     end
 end
 
@@ -184,6 +186,11 @@ function Base.show(io::IO, t::Fail)
         else
             print(io, "\n    Expected: ", data)
             print(io, "\n      Thrown: ", value)
+            print(io, "\n")
+            if t.backtrace !== nothing
+                # Capture error message and indent to match
+                join(io, ("      " * line for line in split(t.backtrace, "\n")), "\n")
+            end
         end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
@@ -768,7 +775,7 @@ macro test_throws(extype, ex)
             if $(esc(extype)) != InterruptException && _e isa InterruptException
                 rethrow()
             end
-            Threw(_e, nothing, $(QuoteNode(__source__)))
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(__source__)))
         end
     end
     return :(do_test_throws($result, $orig_ex, $(esc(extype))))
@@ -795,6 +802,9 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
                 if from_macroexpand && extype == LoadError && exc isa Exception
                     Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
                     true
+                elseif extype == ErrorException && isa(exc, FieldError)
+                    Base.depwarn(lazy"ErrorException should no longer be used to test field access; FieldError should be used instead!", :do_test_throws)
+                    true
                 else
                     isa(exc, extype)
                 end
@@ -802,7 +812,11 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
             if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
                 extype = extype.error # deprecated
             end
-            if isa(exc, typeof(extype))
+            # Support `UndefVarError(:x)` meaning `UndefVarError(:x, scope)` for any `scope`.
+            # Retains the behaviour from pre-v1.11 when `UndefVarError` didn't have `scope`.
+            if isa(extype, UndefVarError) && !isdefined(extype, :scope)
+                success = exc isa UndefVarError && exc.var == extype.var
+            else isa(exc, typeof(extype))
                 success = true
                 for fld in 1:nfields(extype)
                     if !isequal(getfield(extype, fld), getfield(exc, fld))
@@ -825,7 +839,22 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
         if success
             testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, message_only)
+            if result.backtrace !== nothing
+                bt = scrub_exc_stack(result.backtrace, nothing, extract_file(result.source))
+                bt_str = try # try the latest world for this, since we might have eval'd new code for show
+                    Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+                catch ex
+                    "#=ERROR showing exception stack=# " *
+                        try
+                            sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                        catch
+                            "of type " * string(typeof(ex))
+                        end
+                end
+            else
+                bt_str = nothing
+            end
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, message_only, bt_str)
         end
     else
         testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, nothing, result.source, false)
@@ -858,21 +887,7 @@ Note: Warnings generated by `@warn` cannot be tested with this macro. Use
 [`@test_logs`](@ref) instead.
 """
 macro test_warn(msg, expr)
-    quote
-        let fname = tempname()
-            try
-                ret = open(fname, "w") do f
-                    redirect_stderr(f) do
-                        $(esc(expr))
-                    end
-                end
-                @test contains_warn(read(fname, String), $(esc(msg)))
-                ret
-            finally
-                rm(fname, force=true)
-            end
-        end
-    end
+    test_warn_expr(expr, msg)
 end
 
 """
@@ -885,28 +900,35 @@ Note: The absence of warnings generated by `@warn` cannot be tested
 with this macro. Use [`@test_logs`](@ref) instead.
 """
 macro test_nowarn(expr)
-    quote
-        # Duplicate some code from `@test_warn` to allow printing the content of
-        # `stderr` again to `stderr` here while suppressing it for `@test_warn`.
-        # If that shouldn't be used, it would be possible to just use
-        #     @test_warn isempty $(esc(expr))
-        # here.
-        let fname = tempname()
-            try
-                ret = open(fname, "w") do f
-                    redirect_stderr(f) do
-                        $(esc(expr))
-                    end
-                end
-                stderr_content = read(fname, String)
-                print(stderr, stderr_content) # this is helpful for debugging
-                @test isempty(stderr_content)
-                ret
+    # allow printing the content of `stderr` again to `stderr` here while suppressing it
+    # for `@test_warn`. If that shouldn't be used, this could just be `test_warn_expr(expr, #=msg=#isempty)`
+    test_warn_expr(expr, function (s)
+        print(stderr, s) # this is helpful for debugging
+        isempty(s)
+    end)
+end
+
+function test_warn_expr(@nospecialize(expr), @nospecialize(msg))
+    return :(let fname = tempname()
+        try
+            f = open(fname, "w")
+            stdold = stderr
+            redirect_stderr(f)
+            ret = try
+                # We deliberately don't use the thunk versions of open/redirect
+                # to ensure that adding the macro does not change the toplevel-ness
+                # of the resulting expression.
+                $(esc(expr))
             finally
-                rm(fname, force=true)
+                redirect_stderr(stdold)
+                close(f)
             end
+            @test contains_warn(read(fname, String), $(esc(msg)))
+            ret
+        finally
+            rm(fname, force=true)
         end
-    end
+    end)
 end
 
 #-----------------------------------------------------------------------
@@ -1049,8 +1071,9 @@ mutable struct DefaultTestSet <: AbstractTestSet
     time_end::Union{Float64,Nothing}
     failfast::Bool
     file::Union{String,Nothing}
+    rng::Union{Nothing,AbstractRNG}
 end
-function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing, source = nothing)
+function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing, source = nothing, rng = nothing)
     if isnothing(failfast)
         # pass failfast state into child testsets
         parent_ts = get_testset()
@@ -1060,7 +1083,7 @@ function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming:
             failfast = false
         end
     end
-    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast, extract_file(source))
+    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast, extract_file(source), rng)
 end
 extract_file(source::LineNumberNode) = extract_file(source.file)
 extract_file(file::Symbol) = string(file)
@@ -1092,6 +1115,27 @@ function record(ts::DefaultTestSet, t::Union{Fail, Error}; print_result::Bool=TE
     return t
 end
 
+"""
+    print_verbose(::AbstractTestSet) -> Bool
+
+Whether printing involving this `AbstractTestSet` should be verbose or not.
+
+Defaults to `false`.
+"""
+function print_verbose end
+
+"""
+    results(::AbstractTestSet)
+
+Return an iterator of results aggregated by this `AbstractTestSet`, if any were recorded.
+
+Defaults to the empty tuple.
+"""
+function results end
+
+print_verbose(ts::DefaultTestSet) = ts.verbose
+results(ts::DefaultTestSet) = ts.results
+
 # When a DefaultTestSet finishes, it records itself to its parent
 # testset, if there is one. This allows for recursive printing of
 # the results at the end of the tests
@@ -1099,26 +1143,42 @@ record(ts::DefaultTestSet, t::AbstractTestSet) = push!(ts.results, t)
 
 @specialize
 
-function print_test_errors(ts::DefaultTestSet)
-    for t in ts.results
+"""
+    print_test_errors(::AbstractTestSet)
+
+Prints the errors that were recorded by this `AbstractTestSet` after it
+was `finish`ed.
+"""
+function print_test_errors(ts::AbstractTestSet)
+    for t in results(ts)
         if isa(t, Error) || isa(t, Fail)
             println("Error in testset $(ts.description):")
             show(t)
             println()
-        elseif isa(t, DefaultTestSet)
+        elseif isa(t, AbstractTestSet)
             print_test_errors(t)
         end
     end
 end
 
-function print_test_results(ts::DefaultTestSet, depth_pad=0)
+"""
+    print_test_results(ts::AbstractTestSet, depth_pad=0)
+
+Print the results of an `AbstractTestSet` as a formatted table.
+
+`depth_pad` refers to how much padding should be added in front of all output.
+
+Called inside of `Test.finish`, if the `finish`ed testset is the topmost
+testset.
+"""
+function print_test_results(ts::AbstractTestSet, depth_pad=0)
     # Calculate the overall number for each type so each of
     # the test result types are aligned
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    total_pass   = passes + c_passes
-    total_fail   = fails  + c_fails
-    total_error  = errors + c_errors
-    total_broken = broken + c_broken
+    tc = get_test_counts(ts)
+    total_pass   = tc.passes + tc.cumulative_passes
+    total_fail   = tc.fails  + tc.cumulative_fails
+    total_error  = tc.errors + tc.cumulative_errors
+    total_broken = tc.broken + tc.cumulative_broken
     dig_pass   = total_pass   > 0 ? ndigits(total_pass)   : 0
     dig_fail   = total_fail   > 0 ? ndigits(total_fail)   : 0
     dig_error  = total_error  > 0 ? ndigits(total_error)  : 0
@@ -1131,14 +1191,13 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
     fail_width   = dig_fail   > 0 ? max(length("Fail"),   dig_fail)   : 0
     error_width  = dig_error  > 0 ? max(length("Error"),  dig_error)  : 0
     broken_width = dig_broken > 0 ? max(length("Broken"), dig_broken) : 0
-    total_width  = dig_total  > 0 ? max(length("Total"),  dig_total)  : 0
-    duration_width = max(length("Time"), length(duration))
+    total_width  = max(textwidth("Total"),  dig_total)
+    duration_width = max(textwidth("Time"), textwidth(tc.duration))
     # Calculate the alignment of the test result counts by
     # recursively walking the tree of test sets
-    align = max(get_alignment(ts, 0), length("Test Summary:"))
+    align = max(get_alignment(ts, depth_pad), textwidth("Test Summary:"))
     # Print the outer test set header once
-    pad = total == 0 ? "" : " "
-    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true)
+    printstyled(rpad("Test Summary:", align, " "), " |", " "; bold=true)
     if pass_width > 0
         printstyled(lpad("Pass", pass_width, " "), "  "; bold=true, color=:green)
     end
@@ -1151,15 +1210,23 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
     if broken_width > 0
         printstyled(lpad("Broken", broken_width, " "), "  "; bold=true, color=Base.warn_color())
     end
-    if total_width > 0
+    if total_width > 0 || total == 0
         printstyled(lpad("Total", total_width, " "), "  "; bold=true, color=Base.info_color())
     end
-    if ts.showtiming
+    timing = isdefined(ts, :showtiming) ? ts.showtiming : false
+    if timing
         printstyled(lpad("Time", duration_width, " "); bold=true)
     end
     println()
     # Recursively print a summary at every level
-    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
+    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, timing)
+    # Print the RNG of the outer testset if there are failures
+    if total != total_pass + total_broken
+        rng = get_rng(ts)
+        if !isnothing(rng)
+            println("RNG of the outermost testset: ", rng)
+        end
+    end
 end
 
 
@@ -1177,11 +1244,11 @@ function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
         record(parent_ts, ts)
         return ts
     end
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    total_pass   = passes + c_passes
-    total_fail   = fails  + c_fails
-    total_error  = errors + c_errors
-    total_broken = broken + c_broken
+    tc = get_test_counts(ts)
+    total_pass   = tc.passes + tc.cumulative_passes
+    total_fail   = tc.fails  + tc.cumulative_fails
+    total_error  = tc.errors + tc.cumulative_errors
+    total_broken = tc.broken + tc.cumulative_broken
     total = total_pass + total_fail + total_error + total_broken
 
     if print_results
@@ -1231,100 +1298,183 @@ function filter_errors(ts::DefaultTestSet)
     efs
 end
 
-# Recursive function that counts the number of test results of each
-# type directly in the testset, and totals across the child testsets
+"""
+    Test.get_rng(ts::AbstractTestSet) -> Union{Nothing,AbstractRNG}
+
+Return the global random number generator (RNG) associated to the input testset `ts`.
+If no RNG is associated to it, return `nothing`.
+"""
+get_rng(::AbstractTestSet) = nothing
+get_rng(ts::DefaultTestSet) = ts.rng
+"""
+    Test.set_rng!(ts::AbstractTestSet, rng::AbstractRNG) -> AbstractRNG
+
+Set the global random number generator (RNG) associated to the input testset `ts` to `rng`.
+If no RNG is associated to it, do nothing.
+In any case, always return the input `rng`.
+"""
+set_rng!(::AbstractTestSet, rng::AbstractRNG) = rng
+set_rng!(ts::DefaultTestSet, rng::AbstractRNG) = ts.rng = rng
+
+"""
+    TestCounts
+
+Holds the state for recursively gathering the results of a test set for display purposes.
+
+Fields:
+
+ * `customized`: Whether the function `get_test_counts` was customized for the `AbstractTestSet`
+                 this counts object is for. If a custom method was defined, always pass `true`
+                 to the constructor.
+ * `passes`: The number of passing `@test` invocations.
+ * `fails`: The number of failing `@test` invocations.
+ * `errors`: The number of erroring `@test` invocations.
+ * `broken`: The number of broken `@test` invocations.
+ * `passes`: The cumulative number of passing `@test` invocations.
+ * `fails`: The cumulative number of failing `@test` invocations.
+ * `errors`: The cumulative number of erroring `@test` invocations.
+ * `broken`: The cumulative number of broken `@test` invocations.
+ * `duration`: The total duration the `AbstractTestSet` in question ran for, as a formatted `String`.
+"""
+struct TestCounts
+    customized::Bool
+    passes::Int
+    fails::Int
+    errors::Int
+    broken::Int
+    cumulative_passes::Int
+    cumulative_fails::Int
+    cumulative_errors::Int
+    cumulative_broken::Int
+    duration::String
+end
+
+""""
+    get_test_counts(::AbstractTestSet) -> TestCounts
+
+Recursive function that counts the number of test results of each
+type directly in the testset, and totals across the child testsets.
+
+Custom `AbstractTestSet` should implement this function to get their totals
+counted & displayed with `DefaultTestSet` as well.
+
+If this is not implemented for a custom `TestSet`, the printing falls back to
+reporting `x` for failures and `?s` for the duration.
+"""
+function get_test_counts end
+
+get_test_counts(ts::AbstractTestSet) = TestCounts(false, 0,0,0,0,0,0,0,0, format_duration(ts))
+
 function get_test_counts(ts::DefaultTestSet)
     passes, fails, errors, broken = ts.n_passed, 0, 0, 0
+    # cumulative results
     c_passes, c_fails, c_errors, c_broken = 0, 0, 0, 0
     for t in ts.results
         isa(t, Fail)   && (fails  += 1)
         isa(t, Error)  && (errors += 1)
         isa(t, Broken) && (broken += 1)
-        if isa(t, DefaultTestSet)
-            np, nf, ne, nb, ncp, ncf, nce , ncb, duration = get_test_counts(t)
-            c_passes += np + ncp
-            c_fails  += nf + ncf
-            c_errors += ne + nce
-            c_broken += nb + ncb
+        if isa(t, AbstractTestSet)
+            tc = get_test_counts(t)::TestCounts
+            c_passes += tc.passes + tc.cumulative_passes
+            c_fails  += tc.fails + tc.cumulative_fails
+            c_errors += tc.errors + tc.cumulative_errors
+            c_broken += tc.broken + tc.cumulative_broken
         end
     end
+    duration = format_duration(ts)
     ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
+    return TestCounts(true, passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration)
+end
+
+"""
+    format_duration(::AbstractTestSet)
+
+Return a formatted string for printing the duration the testset ran for.
+
+If not defined, falls back to `"?s"`.
+"""
+format_duration(::AbstractTestSet) = "?s"
+
+function format_duration(ts::DefaultTestSet)
     (; time_start, time_end) = ts
-    duration = if isnothing(time_end)
-        ""
+    isnothing(time_end) && return ""
+
+    dur_s = time_end - time_start
+    if dur_s < 60
+        string(round(dur_s, digits = 1), "s")
     else
-        dur_s = time_end - time_start
-        if dur_s < 60
-            string(round(dur_s, digits = 1), "s")
-        else
-            m, s = divrem(dur_s, 60)
-            s = lpad(string(round(s, digits = 1)), 4, "0")
-            string(round(Int, m), "m", s, "s")
-        end
+        m, s = divrem(dur_s, 60)
+        s = lpad(string(round(s, digits = 1)), 4, "0")
+        string(round(Int, m), "m", s, "s")
     end
-    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration
 end
 
+print_verbose(::AbstractTestSet) = false
+results(::AbstractTestSet) = ()
+
 # Recursive function that prints out the results at each level of
 # the tree of test sets
-function print_counts(ts::DefaultTestSet, depth, align,
+function print_counts(ts::AbstractTestSet, depth, align,
                       pass_width, fail_width, error_width, broken_width, total_width, duration_width, showtiming)
     # Count results by each type at this level, and recursively
     # through any child test sets
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
-    subtotal = passes + fails + errors + broken + c_passes + c_fails + c_errors + c_broken
+    tc = get_test_counts(ts)
+    fallbackstr = tc.customized ? " " : "x"
+    subtotal = tc.passes + tc.fails + tc.errors + tc.broken +
+               tc.cumulative_passes + tc.cumulative_fails + tc.cumulative_errors + tc.cumulative_broken
     # Print test set header, with an alignment that ensures all
     # the test results appear above each other
     print(rpad(string("  "^depth, ts.description), align, " "), " | ")
 
-    np = passes + c_passes
-    if np > 0
-        printstyled(lpad(string(np), pass_width, " "), "  ", color=:green)
+    n_passes = tc.passes + tc.cumulative_passes
+    if n_passes > 0
+        printstyled(lpad(string(n_passes), pass_width, " "), "  ", color=:green)
     elseif pass_width > 0
         # No passes at this level, but some at another level
-        print(lpad(" ", pass_width), "  ")
+        printstyled(lpad(fallbackstr, pass_width, " "), "  ", color=:green)
     end
 
-    nf = fails + c_fails
-    if nf > 0
-        printstyled(lpad(string(nf), fail_width, " "), "  ", color=Base.error_color())
+    n_fails = tc.fails + tc.cumulative_fails
+    if n_fails > 0
+        printstyled(lpad(string(n_fails), fail_width, " "), "  ", color=Base.error_color())
     elseif fail_width > 0
         # No fails at this level, but some at another level
-        print(lpad(" ", fail_width), "  ")
+        printstyled(lpad(fallbackstr, fail_width, " "), "  ", color=Base.error_color())
     end
 
-    ne = errors + c_errors
-    if ne > 0
-        printstyled(lpad(string(ne), error_width, " "), "  ", color=Base.error_color())
+    n_errors = tc.errors + tc.cumulative_errors
+    if n_errors > 0
+        printstyled(lpad(string(n_errors), error_width, " "), "  ", color=Base.error_color())
     elseif error_width > 0
         # No errors at this level, but some at another level
-        print(lpad(" ", error_width), "  ")
+        printstyled(lpad(fallbackstr, error_width, " "), "  ", color=Base.error_color())
     end
 
-    nb = broken + c_broken
-    if nb > 0
-        printstyled(lpad(string(nb), broken_width, " "), "  ", color=Base.warn_color())
+    n_broken = tc.broken + tc.cumulative_broken
+    if n_broken > 0
+        printstyled(lpad(string(n_broken), broken_width, " "), "  ", color=Base.warn_color())
     elseif broken_width > 0
         # None broken at this level, but some at another level
-        print(lpad(" ", broken_width), "  ")
+        printstyled(lpad(fallbackstr, broken_width, " "), "  ", color=Base.warn_color())
     end
 
-    if np == 0 && nf == 0 && ne == 0 && nb == 0
-        printstyled(lpad("None", total_width, " "), "  ", color=Base.info_color())
+    if n_passes == 0 && n_fails == 0 && n_errors == 0 && n_broken == 0
+        total_str = tc.customized ? string(subtotal) : "?"
+        printstyled(lpad(total_str, total_width, " "), "  ", color=Base.info_color())
     else
         printstyled(lpad(string(subtotal), total_width, " "), "  ", color=Base.info_color())
     end
 
     if showtiming
-        printstyled(lpad(string(duration), duration_width, " "))
+        printstyled(lpad(tc.duration, duration_width, " "))
     end
     println()
 
     # Only print results at lower levels if we had failures or if the user
-    # wants.
-    if (np + nb != subtotal) || (ts.verbose)
-        for t in ts.results
-            if isa(t, DefaultTestSet)
+    # wants. Requires the given `AbstractTestSet` to have a vector of results
+    if ((n_passes + n_broken != subtotal) || print_verbose(ts))
+        for t in results(ts)
+            if isa(t, AbstractTestSet)
                 print_counts(t, depth + 1, align,
                     pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
             end
@@ -1370,14 +1520,17 @@ along with a summary of the test results.
 Any custom testset type (subtype of `AbstractTestSet`) can be given and it will
 also be used for any nested `@testset` invocations. The given options are only
 applied to the test set where they are given. The default test set type
-accepts three boolean options:
-- `verbose`: if `true`, the result summary of the nested testsets is shown even
+accepts the following options:
+- `verbose::Bool`: if `true`, the result summary of the nested testsets is shown even
   when they all pass (the default is `false`).
-- `showtiming`: if `true`, the duration of each displayed testset is shown
+- `showtiming::Bool`: if `true`, the duration of each displayed testset is shown
   (the default is `true`).
-- `failfast`: if `true`, any test failure or error will cause the testset and any
+- `failfast::Bool`: if `true`, any test failure or error will cause the testset and any
   child testsets to return immediately (the default is `false`).
   This can also be set globally via the env var `JULIA_TEST_FAILFAST`.
+- `rng::Random.AbstractRNG`: use the given random number generator (RNG) as the global one
+  for the testset.  `rng` must be `copy!`-able.  This option can be useful to locally
+  reproduce stochastic test failures which only depend on the state of the global RNG.
 
 !!! compat "Julia 1.8"
     `@testset test_func()` requires at least Julia 1.8.
@@ -1385,6 +1538,9 @@ accepts three boolean options:
 !!! compat "Julia 1.9"
     `failfast` requires at least Julia 1.9.
 
+!!! compat "Julia 1.12"
+    The `rng` option requires at least Julia 1.12.
+
 The description string accepts interpolation from the loop indices.
 If no description is provided, one is constructed based on the variables.
 If a function call is provided, its name will be used.
@@ -1397,13 +1553,19 @@ method, which by default will return a list of the testset objects used in
 each iteration.
 
 Before the execution of the body of a `@testset`, there is an implicit
-call to `Random.seed!(seed)` where `seed` is the current seed of the global RNG.
+call to `copy!(Random.default_rng(), rng)` where `rng` is the RNG of the current task, or
+the value of the RNG passed via the `rng` option.
 Moreover, after the execution of the body, the state of the global RNG is
 restored to what it was before the `@testset`. This is meant to ease
 reproducibility in case of failure, and to allow seamless
 re-arrangements of `@testset`s regardless of their side-effect on the
 global RNG state.
 
+!!! note "RNG of nested testsets"
+    Unless changed with the `rng` option, the same RNG is set at the beginning of all
+    nested testsets.  The RNG printed to screen when a testset has failures is the global RNG of
+    the outermost testset even if inner testsets have different RNGs manually set by the user.
+
 ## Examples
 ```jldoctest; filter = r"trigonometric identities |    4      4  [0-9\\.]+s"
 julia> @testset "trigonometric identities" begin
@@ -1437,7 +1599,14 @@ parent test set (with the context object appended to any failing tests.)
     `@testset let` requires at least Julia 1.9.
 
 !!! compat "Julia 1.10"
-    Multiple `let` assignements are supported since Julia 1.10.
+    Multiple `let` assignments are supported since Julia 1.10.
+
+# Special implicit world age increment for `@testset begin`
+
+World age inside `@testset begin` increments implicitly after every statement.
+This matches the behavior of ordinary toplevel code, but not that of ordinary
+`begin/end` blocks, i.e. with respect to world age, `@testset begin` behaves
+as if the body of the `begin/end` block was written at toplevel.
 
 ## Examples
 ```jldoctest
@@ -1533,6 +1702,21 @@ function testset_context(args, ex, source)
     return esc(ex)
 end
 
+function insert_toplevel_latestworld(@nospecialize(tests))
+    isa(tests, Expr) || return tests
+    (tests.head !== :block) && return tests
+    ret = Expr(:block)
+    for arg in tests.args
+        push!(ret.args, arg)
+        if isa(arg, LineNumberNode) ||
+          (isa(arg, Expr) && arg.head in (:latestworld, :var"latestworld-if-toplevel"))
+            continue
+        end
+        push!(ret.args, Expr(:var"latestworld-if-toplevel"))
+    end
+    return ret
+end
+
 """
 Generate the code for a `@testset` with a function call or `begin`/`end` argument
 """
@@ -1551,6 +1735,8 @@ function testset_beginend_call(args, tests, source)
         testsettype = :(get_testset_depth() == 0 ? DefaultTestSet : typeof(get_testset()))
     end
 
+    tests = insert_toplevel_latestworld(tests)
+
     # Generate a block of code that initializes a new testset, adds
     # it to the task local storage, evaluates the test(s), before
     # finally removing the testset and giving it a chance to take
@@ -1567,12 +1753,13 @@ function testset_beginend_call(args, tests, source)
         # we reproduce the logic of guardseed, but this function
         # cannot be used as it changes slightly the semantic of @testset,
         # by wrapping the body in a function
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
+        local tls_seed = isnothing(get_rng(ts)) ? set_rng!(ts, tls_seed_orig) : get_rng(ts)
         try
-            # RNG is re-seeded with its own seed to ease reproduce a failed test
-            Random.seed!(Random.GLOBAL_SEED)
+            # default RNG is reset to its state from last `seed!()` to ease reproduce a failed test
+            copy!(Random.default_rng(), tls_seed)
+            copy!(Random.get_tls_seed(), Random.default_rng())
             let
                 $(esc(tests))
             end
@@ -1587,8 +1774,8 @@ function testset_beginend_call(args, tests, source)
                 record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
             end
         finally
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
             pop_testset()
             ret = finish(ts)
         end
@@ -1653,13 +1840,10 @@ function testset_forloop(args, testloop, source)
             finish_errored = true
             push!(arr, finish(ts))
             finish_errored = false
-
-            # it's 1000 times faster to copy from tmprng rather than calling Random.seed!
-            copy!(RNG, tmprng)
-
+            copy!(default_rng(), tls_seed)
         end
         ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
-            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options..., rng=tls_seed)
         else
             $(testsettype)($desc; $options...)
         end
@@ -1681,12 +1865,12 @@ function testset_forloop(args, testloop, source)
         local arr = Vector{Any}()
         local first_iteration = true
         local ts
+        local rng_option = get($(options), :rng, nothing)
         local finish_errored = false
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
-        Random.seed!(Random.GLOBAL_SEED)
-        local tmprng = copy(RNG)
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
+        local tls_seed = isnothing(rng_option) ? copy(Random.get_tls_seed()) : rng_option
+        copy!(Random.default_rng(), tls_seed)
         try
             let
                 $(Expr(:for, Expr(:block, [esc(v) for v in loopvars]...), blk))
@@ -1697,8 +1881,8 @@ function testset_forloop(args, testloop, source)
                 pop_testset()
                 push!(arr, finish(ts))
             end
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
         end
         arr
     end
@@ -1715,10 +1899,21 @@ function parse_testset_args(args)
     options = :(Dict{Symbol, Any}())
     for arg in args
         # a standalone symbol is assumed to be the test set we should use
-        if isa(arg, Symbol)
+        # the same is true for a symbol that's not exported from a module
+        if isa(arg, Symbol) || Base.isexpr(arg, :.)
+            if testsettype !== nothing
+                msg = """Multiple testset types provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_testset_types; force=true)
+            end
             testsettype = esc(arg)
         # a string is the description
         elseif isa(arg, AbstractString) || (isa(arg, Expr) && arg.head === :string)
+            if desc !== nothing
+                msg = """Multiple descriptions provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_descriptions; force=true)
+            end
             desc = esc(arg)
         # an assignment is an option
         elseif isa(arg, Expr) && arg.head === :(=)
@@ -1781,7 +1976,7 @@ function get_testset_depth()
     return length(testsets)
 end
 
-_args_and_call(args...; kwargs...) = (args[1:end-1], kwargs, args[end](args[1:end-1]...; kwargs...))
+_args_and_call((args..., f)...; kwargs...) = (args, kwargs, f(args...; kwargs...))
 _materialize_broadcasted(f, args...) = Broadcast.materialize(Broadcast.broadcasted(f, args...))
 
 """
@@ -1798,7 +1993,7 @@ matches the inferred type modulo `AllowedType`, or when the return type is a sub
 `AllowedType`. This is useful when testing type stability of functions returning a small
 union such as `Union{Nothing, T}` or `Union{Missing, T}`.
 
-```jldoctest; setup = :(using InteractiveUtils), filter = r"begin\\n(.|\\n)*end"
+```jldoctest; setup = :(using InteractiveUtils; using Base: >), filter = r"begin\\n(.|\\n)*end"
 julia> f(a) = a > 1 ? 1 : 1.0
 f (generic function with 1 method)
 
@@ -1812,8 +2007,9 @@ Arguments
   #self#::Core.Const(f)
   a::Int64
 Body::UNION{FLOAT64, INT64}
-1 ─ %1 = (a > 1)::Bool
-└──      goto #3 if not %1
+1 ─ %1 = :>::Core.Const(>)
+│   %2 = (%1)(a, 1)::Bool
+└──      goto #3 if not %2
 2 ─      return 1
 3 ─      return 1.0
 
@@ -1863,25 +2059,24 @@ function _inferred(ex, mod, allow = :(Union{}))
         quote
             let allow = $(esc(allow))
                 allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                $(if any(@nospecialize(a)->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
                     # Has keywords
                     args = gensym()
                     kwargs = gensym()
                     quote
                         $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                        inftype = $(gen_call_with_extracted_types(mod, Base.infer_return_type, :($(ex.args[1])($(args)...; $(kwargs)...))))
                     end
                 else
                     # No keywords
                     quote
                         args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
                         result = $(esc(ex.args[1]))(args...)
-                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                        inftype = Base.infer_return_type($(esc(ex.args[1])), Base.typesof(args...))
                     end
                 end)
-                @assert length(inftypes) == 1
                 rettype = result isa Type ? Type{result} : typeof(result)
-                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                rettype <: allow || rettype == typesplit(inftype, allow) || error("return type $rettype does not match inferred return type $inftype")
                 result
             end
         end
@@ -1956,7 +2151,7 @@ function detect_ambiguities(mods::Module...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
@@ -2027,7 +2222,7 @@ function detect_unbound_args(mods...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
@@ -2108,6 +2303,8 @@ for G in (GenericSet, GenericDict)
 end
 
 Base.get(s::GenericDict, x, y) = get(s.s, x, y)
+Base.pop!(s::GenericDict, k) = pop!(s.s, k)
+Base.setindex!(s::GenericDict, v, k) = setindex!(s.s, v, k)
 
 """
 The `GenericArray` can be used to test generic array APIs that program to
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index 4e444874d0fb8..b224d79e47cd9 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -2,6 +2,7 @@
 
 using Logging: Logging, AbstractLogger, LogLevel, Info, with_logger
 import Base: occursin
+using Base: @lock
 
 #-------------------------------------------------------------------------------
 """
@@ -35,11 +36,15 @@ struct Ignored ; end
 #-------------------------------------------------------------------------------
 # Logger with extra test-related state
 mutable struct TestLogger <: AbstractLogger
-    logs::Vector{LogRecord}
+    lock::ReentrantLock
+    logs::Vector{LogRecord}  # Guarded by lock.
     min_level::LogLevel
     catch_exceptions::Bool
-    shouldlog_args
-    message_limits::Dict{Any,Int}
+    # Note: shouldlog_args only maintains the info for the most recent log message, which
+    # may not be meaningful in a multithreaded program. See:
+    # https://github.com/JuliaLang/julia/pull/54497#discussion_r1603691606
+    shouldlog_args  # Guarded by lock.
+    message_limits::Dict{Any,Int}  # Guarded by lock.
     respect_maxlog::Bool
 end
 
@@ -55,7 +60,7 @@ most `n` times.
 
 See also: [`LogRecord`](@ref).
 
-## Example
+## Examples
 
 ```jldoctest
 julia> using Test, Logging
@@ -80,15 +85,17 @@ Test Passed
 ```
 """
 TestLogger(; min_level=Info, catch_exceptions=false, respect_maxlog=true) =
-    TestLogger(LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
+    TestLogger(ReentrantLock(), LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
 Logging.min_enabled_level(logger::TestLogger) = logger.min_level
 
 function Logging.shouldlog(logger::TestLogger, level, _module, group, id)
-    if get(logger.message_limits, id, 1) > 0
-        logger.shouldlog_args = (level, _module, group, id)
-        true
-    else
-        false
+    @lock logger.lock begin
+        if get(logger.message_limits, id, 1) > 0
+            logger.shouldlog_args = (level, _module, group, id)
+            return true
+        else
+            return false
+        end
     end
 end
 
@@ -98,12 +105,17 @@ function Logging.handle_message(logger::TestLogger, level, msg, _module,
     if logger.respect_maxlog
         maxlog = get(kwargs, :maxlog, nothing)
         if maxlog isa Core.BuiltinInts
-            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
-            logger.message_limits[id] = remaining - 1
-            remaining > 0 || return
+            @lock logger.lock begin
+                remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+                logger.message_limits[id] = remaining - 1
+                remaining > 0 || return
+            end
         end
     end
-    push!(logger.logs, LogRecord(level, msg, _module, group, id, file, line, kwargs))
+    r = LogRecord(level, msg, _module, group, id, file, line, kwargs)
+    @lock logger.lock begin
+        push!(logger.logs, r)
+    end
 end
 
 # Catch exceptions for the test logger only if specified
@@ -112,7 +124,9 @@ Logging.catch_exceptions(logger::TestLogger) = logger.catch_exceptions
 function collect_test_logs(f; kwargs...)
     logger = TestLogger(; kwargs...)
     value = with_logger(f, logger)
-    logger.logs, value
+    @lock logger.lock begin
+        return copy(logger.logs), value
+    end
 end
 
 
@@ -149,7 +163,7 @@ function record(ts::DefaultTestSet, t::LogTestFailure)
     if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         print(t)
-        Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
+        Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
         println()
     end
     # Hack: convert to `Fail` so that test summarization works correctly
diff --git a/stdlib/Test/src/precompile.jl b/stdlib/Test/src/precompile.jl
index 2cb2fb7f3f0c6..04907f8425440 100644
--- a/stdlib/Test/src/precompile.jl
+++ b/stdlib/Test/src/precompile.jl
@@ -1,9 +1,15 @@
-redirect_stdout(devnull) do
-    @testset "example" begin
-        @test 1 == 1
-        @test_throws ErrorException error()
-        @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
-        @test_broken 1 == 2
-        @test 1 ≈ 1.0000000000000001
+if Base.generating_output()
+let
+    function example_payload()
+        @testset "example" begin
+            @test 1 == 1
+            @test_throws ErrorException error()
+            @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
+            @test_broken 1 == 2
+            @test 1 ≈ 1.0000000000000001
+        end
     end
+
+    redirect_stdout(example_payload, devnull)
+end
 end
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 0388e2107e098..995d2c983437c 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -77,7 +77,7 @@ end
     @test 1234 === @test_nowarn(1234)
     @test 5678 === @test_warn("WARNING: foo", begin println(stderr, "WARNING: foo"); 5678; end)
     let a
-        @test_throws UndefVarError(:a) a
+        @test_throws UndefVarError(:a, :local) a
         @test_nowarn a = 1
         @test a === 1
     end
@@ -162,7 +162,7 @@ let fails = @testset NoThrowTestSet begin
         @test_throws "A test" error("a test")
         @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
         @test_throws str->occursin("a T", str) error("a test")
-        @test_throws ["BoundsError", "aquire", "1-element", "at index [2]"] [1][2]
+        @test_throws ["BoundsError", "acquire", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -294,7 +294,7 @@ let fails = @testset NoThrowTestSet begin
     end
 
     let str = sprint(show, fails[26])
-        @test occursin("Expected: [\"BoundsError\", \"aquire\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin("Expected: [\"BoundsError\", \"acquire\", \"1-element\", \"at index [2]\"]", str)
         @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
     end
 
@@ -468,11 +468,11 @@ end
     end
     @testset "ts results" begin
         @test isa(ts, Test.DefaultTestSet)
-        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
-        total_pass   = passes + c_passes
-        total_fail   = fails  + c_fails
-        total_error  = errors + c_errors
-        total_broken = broken + c_broken
+        tc = Test.get_test_counts(ts)
+        total_pass   = tc.passes + tc.cumulative_passes
+        total_fail   = tc.fails  + tc.cumulative_fails
+        total_error  = tc.errors + tc.cumulative_errors
+        total_broken = tc.broken + tc.cumulative_broken
         @test total_pass   == 24
         @test total_fail   == 6
         @test total_error  == 6
@@ -659,15 +659,15 @@ end
 @test tss.foo == 3
 
 # test @inferred
-uninferrable_function(i) = (1, "1")[i]
-uninferrable_small_union(i) = (1, nothing)[i]
-@test_throws ErrorException @inferred(uninferrable_function(1))
+uninferable_function(i) = (1, "1")[i]
+uninferable_small_union(i) = (1, nothing)[i]
+@test_throws ErrorException @inferred(uninferable_function(1))
 @test @inferred(identity(1)) == 1
-@test @inferred(Nothing, uninferrable_small_union(1)) === 1
-@test @inferred(Nothing, uninferrable_small_union(2)) === nothing
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(1))
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(2))
-@test_throws ArgumentError @inferred(nothing, uninferrable_small_union(1))
+@test @inferred(Nothing, uninferable_small_union(1)) === 1
+@test @inferred(Nothing, uninferable_small_union(2)) === nothing
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(1))
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(2))
+@test_throws ArgumentError @inferred(nothing, uninferable_small_union(1))
 
 # Ensure @inferred only evaluates the arguments once
 inferred_test_global = 0
@@ -692,12 +692,12 @@ end
 
 # Issue #17105
 # @inferred with kwargs
-inferrable_kwtest(x; y=1) = 2x
-uninferrable_kwtest(x; y=1) = 2x+y
-@test (@inferred inferrable_kwtest(1)) == 2
-@test (@inferred inferrable_kwtest(1; y=1)) == 2
-@test (@inferred uninferrable_kwtest(1)) == 3
-@test (@inferred uninferrable_kwtest(1; y=2)) == 4
+inferable_kwtest(x; y=1) = 2x
+uninferable_kwtest(x; y=1) = 2x+y
+@test (@inferred inferable_kwtest(1)) == 2
+@test (@inferred inferable_kwtest(1; y=1)) == 2
+@test (@inferred uninferable_kwtest(1)) == 3
+@test (@inferred uninferable_kwtest(1; y=2)) == 4
 
 @test_throws ErrorException @testset "$(error())" for i in 1:10
 end
@@ -1032,6 +1032,7 @@ end
     # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block
     seed = rand(UInt128)
     Random.seed!(seed)
+    seeded_state = copy(Random.default_rng())
     a = rand()
     @testset begin
         # global RNG must re-seeded at the beginning of @testset
@@ -1043,31 +1044,82 @@ end
     # the @testset's above must have no consequence for rand() below
     b = rand()
     Random.seed!(seed)
+    @test Random.default_rng() == seeded_state
     @test a == rand()
     @test b == rand()
 
     # Even when seed!() is called within a testset A, subsequent testsets
     # should start with the same "global RNG state" as what A started with,
     # such that the test `refvalue == rand(Int)` below succeeds.
-    # Currently, this means that Random.GLOBAL_SEED has to be restored,
+    # Currently, this means that `Random.get_tls_seed()` has to be restored,
     # in addition to the state of Random.default_rng().
-    GLOBAL_SEED_orig = Random.GLOBAL_SEED
+    tls_seed_orig = copy(Random.get_tls_seed())
     local refvalue
-    @testset "GLOBAL_SEED is also preserved (setup)" begin
-        @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "TLS seed is also preserved (setup)" begin
+        @test tls_seed_orig == Random.get_tls_seed()
         refvalue = rand(Int)
         Random.seed!()
-        @test GLOBAL_SEED_orig != Random.GLOBAL_SEED
+        @test tls_seed_orig != Random.get_tls_seed()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (forloop)" for _=1:3
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (forloop)" for _=1:3
         @test refvalue == rand(Int)
         Random.seed!()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (beginend)" begin
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (beginend)" begin
         @test refvalue == rand(Int)
     end
+
+    # @testset below is not compatible with e.g. v1.9, but it still fails there (at "main task")
+    # when deleting lines using get_tls_seed() or GLOBAL_SEED
+    @testset "TLS seed and concurrency" begin
+        # Even with multi-tasking, the TLS seed must stay consistent: the default_rng() state
+        # is reset to the "global seed" at the beginning, and the "global seed" is reset to what
+        # it was at the end of the testset; make sure that distinct tasks don't see the mutation
+        # of this "global seed" (iow, it's task-local)
+        seed = rand(UInt128)
+        Random.seed!(seed)
+        seeded_state = copy(Random.default_rng())
+        a = rand()
+
+        ch = Channel{Nothing}()
+        @sync begin
+            @async begin
+                @testset "task 1" begin
+                    # tick 1
+                    # this task didn't call seed! explicitly (yet), so its TaskLocalRNG() should have been
+                    # reset to `Random.GLOBAL_SEED` at the beginning of `@testset`
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 1 -> tick 2
+                    take!(ch) # tick 3
+                end
+                put!(ch, nothing) # tick 3 -> tick 4
+            end
+            @async begin
+                take!(ch) # tick 2
+                # @testset below will record the current TLS "seed" and reset default_rng() to
+                # this value;
+                # it must not be affected by the fact that "task 1" called `seed!()` first
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+
+                @testset "task 2" begin
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 2 -> tick 3
+                    take!(ch) # tick 4
+                end
+                # when `@testset` of task 2 finishes, which is after `@testset` from task 1,
+                # it resets `get_tls_seed()` to what it was before starting:
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+            end
+        end
+        @testset "main task" begin
+            @test Random.default_rng() == seeded_state
+            @test a == rand()
+        end
+    end
 end
 
 @testset "InterruptExceptions #21043" begin
@@ -1141,7 +1193,7 @@ h25835(;x=1,y=1) = x isa Int ? x*y : (rand(Bool) ? 1.0 : 1)
     @test @inferred(f25835(x=nothing)) == ()
     @test @inferred(f25835(x=1)) == (1,)
 
-    # A global argument should make this uninferrable
+    # A global argument should make this uninferable
     global y25835 = 1
     @test f25835(x=y25835) == (1,)
     @test_throws ErrorException @inferred((()->f25835(x=y25835))()) == (1,)
@@ -1482,6 +1534,22 @@ end
     @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_2
 end
 
+# Issue 54807
+struct FEexc
+    a::Nothing
+    b::Nothing
+end
+
+@testset "FieldError Shim tests and Softdeprecation of @test_throws ErrorException" begin
+    feexc = FEexc(nothing, nothing)
+    # This is redundant regular test for FieldError
+    @test_throws FieldError feexc.c
+    # This should raise ErrorException
+    @test_throws ErrorException feexc.a = 1
+    # This is test for FieldError shim and deprecation
+    @test_deprecated @test_throws ErrorException feexc.c
+end
+
 # Issue 25483
 mutable struct PassInformationTestSet <: Test.AbstractTestSet
     results::Vector
@@ -1530,3 +1598,175 @@ let
         end
     end
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Test))
+end
+
+module CustomTestSetModule
+    using Test
+    struct CustomTestSet <: Test.AbstractTestSet
+        description::String
+    end
+    Test.record(::CustomTestSet, result) = result
+    Test.finish(cts::CustomTestSet) = cts
+end
+
+@testset "Unexported custom TestSet" begin
+    using .CustomTestSetModule
+    let res = @testset CustomTestSetModule.CustomTestSet begin
+                @test true
+            end
+        @test res isa CustomTestSetModule.CustomTestSet
+    end
+end
+
+struct CustomPrintingTestSet <: AbstractTestSet
+    description::String
+    passes::Int
+    errors::Int
+    fails::Int
+    broken::Int
+end
+
+function Test.finish(cpts::CustomPrintingTestSet)
+    if Test.get_testset_depth() != 0
+        push!(Test.get_current_testset(), cpts)
+        # printing is handled by the parent
+        return cpts
+    end
+
+    Test.print_testset_results(cpts)
+    cpts
+end
+
+@testset "Custom testsets participate in printing" begin
+    mktemp() do f, _
+        write(f,
+        """
+        using Test
+
+        mutable struct CustomPrintingTestSet <: Test.AbstractTestSet
+            description::String
+            passes::Int
+            fails::Int
+            errors::Int
+            broken::Int
+        end
+        CustomPrintingTestSet(desc::String) = CustomPrintingTestSet(desc, 0,0,0,0)
+
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Pass) = cpts.passes += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Error) = cpts.errors += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Fail) = cpts.fails += 1
+        Test.record(cpts::CustomPrintingTestSet, ::Test.Broken) = cpts.broken += 1
+        Test.get_test_counts(ts::CustomPrintingTestSet) = Test.TestCounts(
+                                                                true,
+                                                                ts.passes,
+                                                                ts.fails,
+                                                                ts.errors,
+                                                                ts.broken,
+                                                                0,
+                                                                0,
+                                                                0,
+                                                                0,
+                                                                Test.format_duration(ts))
+
+        function Test.finish(cpts::CustomPrintingTestSet)
+            if Test.get_testset_depth() != 0
+                Test.record(Test.get_testset(), cpts)
+                # printing is handled by the parent
+                return cpts
+            end
+
+            Test.print_test_results(cpts)
+            cpts
+        end
+
+        struct NonRecordingTestSet <: Test.AbstractTestSet
+            description::String
+        end
+        Test.record(nrts::NonRecordingTestSet, ::Test.Result) = nrts
+        Test.finish(nrts::NonRecordingTestSet) = Test.record(Test.get_testset(), nrts)
+
+         @testset "outer" begin
+            @testset "a" begin
+                @test true
+            end
+            @testset CustomPrintingTestSet "custom" begin
+                @test false
+                @test true
+                @test_broken false
+                @test error()
+            end
+            @testset NonRecordingTestSet "no-record" begin
+                @test false
+                @test true
+                @test_broken false
+                @test error()
+            end
+            @testset "b" begin
+                @test true
+            end
+        end
+        """)
+
+        # this tests both the `TestCounts` parts as well as the fallback `x`s
+        expected = r"""
+                    Test Summary: \| Pass  Fail  Error  Broken  Total  Time
+                    outer         \|    3     1      1       1      6  \s*\d*.\ds
+                      a           \|    1                           1  \s*\d*.\ds
+                      custom      \|    1     1      1       1      4  \s*\?s
+                      no-record   \|    x     x      x       x      \?  \s*\?s
+                      b           \|    1                           1  \s*\d*.\ds
+                    RNG of the outermost testset: .*
+                    """
+
+        cmd    = `$(Base.julia_cmd()) --startup-file=no --color=no $f`
+        result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
+        @test occursin(expected, result)
+    end
+
+end
+
+@testset "Deprecated multiple arguments" begin
+    msg1 = """Multiple descriptions provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg1 @macroexpand @testset "name1" "name2" begin end
+    msg2 = """Multiple testset types provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end
+end
+
+# Issue #54082
+module M54082 end
+@testset "@test_throws UndefVarError(:var)" begin
+    # Single-arg `UndefVarError` should match all `UndefVarError` for the
+    # same variable name, regardless of scope, to keep pre-v1.11 behaviour.
+    f54082() = var
+    @test_throws UndefVarError(:var) f54082()
+    # But if scope is set, then it has to match.
+    @test_throws UndefVarError(:var, M54082) M54082.var
+    let result = @testset NoThrowTestSet begin
+            # Wrong module scope
+            @test_throws UndefVarError(:var, Main) M54082.var
+        end
+        @test only(result) isa Test.Fail
+    end
+end
+
+@testset "Set RNG of testset" begin
+    rng1 = Xoshiro(0x2e026445595ed28e, 0x07bb81ac4c54926d, 0x83d7d70843e8bad6, 0xdbef927d150af80b, 0xdbf91ddf2534f850)
+    rng2 = Xoshiro(0xc380f460355639ee, 0xb39bc754b7d63bbf, 0x1551dbcfb5ed5668, 0x71ab5a18fec21a25, 0x649d0c1be1ca5436)
+    rng3 = Xoshiro(0xee97f5b53f7cdc49, 0x480ac387b0527d3d, 0x614b416502a9e0f5, 0x5250cb36e4a4ceb1, 0xed6615c59e475fa0)
+
+    @testset rng=rng1 begin
+        @test rand() == rand(rng1)
+    end
+
+    @testset rng=rng2 "Outer" begin
+        @test rand() == rand(rng2)
+        @testset rng=rng3 "Inner: $(i)" for i in 1:10
+            @test rand() == rand(rng3)
+        end
+    end
+end
diff --git a/stdlib/UUIDs/Project.toml b/stdlib/UUIDs/Project.toml
index 11dbcda5c4944..4eb31dc9572c0 100644
--- a/stdlib/UUIDs/Project.toml
+++ b/stdlib/UUIDs/Project.toml
@@ -1,5 +1,6 @@
 name = "UUIDs"
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/UUIDs/docs/src/index.md b/stdlib/UUIDs/docs/src/index.md
index 1e6c950dd8999..c9529a4a38170 100644
--- a/stdlib/UUIDs/docs/src/index.md
+++ b/stdlib/UUIDs/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/UUIDs/docs/src/index.md"
+```
+
 # UUIDs
 
 ```@docs
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 41d5319fec24d..e3f5f812ef6e2 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -10,7 +10,7 @@ using Random
 
 import SHA
 
-export UUID, uuid1, uuid4, uuid5, uuid_version
+export UUID, uuid1, uuid4, uuid5, uuid7, uuid_version
 
 import Base: UUID
 
@@ -39,20 +39,22 @@ const namespace_x500 = UUID(0x6ba7b8149dad11d180b400c04fd430c8) # 6ba7b814-9dad-
     uuid1([rng::AbstractRNG]) -> UUID
 
 Generates a version 1 (time-based) universally unique identifier (UUID), as specified
-by RFC 4122. Note that the Node ID is randomly generated (does not identify the host)
+by [RFC 4122](https://www.ietf.org/rfc/rfc4122). Note that the Node ID is randomly generated (does not identify the host)
 according to section 4.5 of the RFC.
 
-The default rng used by `uuid1` is not `GLOBAL_RNG` and every invocation of `uuid1()` without
+The default rng used by `uuid1` is not `Random.default_rng()` and every invocation of `uuid1()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid1` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid1` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid1` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid1` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
 julia> rng = MersenneTwister(1234);
 
 julia> uuid1(rng)
@@ -60,6 +62,13 @@ UUID("cfc395e8-590f-11e8-1f13-43a2532b2fa8")
 ```
 """
 function uuid1(rng::AbstractRNG=Random.RandomDevice())
+    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
+    # between the UUID epoch and Unix epoch
+    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
+    _build_uuid1(rng, timestamp)
+end
+
+function _build_uuid1(rng::AbstractRNG, timestamp::UInt64)
     u = rand(rng, UInt128)
 
     # mask off clock sequence and node
@@ -68,9 +77,6 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     # set the unicast/multicast bit and version
     u |= 0x00000000000010000000010000000000
 
-    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
-    # between the UUID epoch and Unix epoch
-    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
     ts_low = timestamp & typemax(UInt32)
     ts_mid = (timestamp >> 32) & typemax(UInt16)
     ts_hi = (timestamp >> 48) & 0x0fff
@@ -79,30 +85,32 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     u |= UInt128(ts_mid) << 80
     u |= UInt128(ts_hi) << 64
 
-    UUID(u)
+    return UUID(u)
 end
 
 """
     uuid4([rng::AbstractRNG]) -> UUID
 
 Generates a version 4 (random or pseudo-random) universally unique identifier (UUID),
-as specified by RFC 4122.
+as specified by [RFC 4122](https://www.ietf.org/rfc/rfc4122).
 
-The default rng used by `uuid4` is not `GLOBAL_RNG` and every invocation of `uuid4()` without
+The default rng used by `uuid4` is not `Random.default_rng()` and every invocation of `uuid4()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid4` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid4` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid4` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid4` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 ```
 """
 function uuid4(rng::AbstractRNG=Random.RandomDevice())
@@ -123,13 +131,15 @@ as specified by RFC 4122.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> u4 = uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 
 julia> u5 = uuid5(u4, "julia")
-UUID("086cc5bb-2461-57d8-8068-0aed7f5b5cd1")
+UUID("2df91e3f-da06-5362-a6fe-03772f2e14c9")
 ```
 """
 function uuid5(ns::UUID, name::String)
@@ -151,4 +161,47 @@ function uuid5(ns::UUID, name::String)
     return UUID(v)
 end
 
+"""
+    uuid7([rng::AbstractRNG]) -> UUID
+
+Generates a version 7 (random or pseudo-random) universally unique identifier (UUID),
+as specified by [RFC 9652](https://www.rfc-editor.org/rfc/rfc9562).
+
+The default rng used by `uuid7` is not `Random.default_rng()` and every invocation of `uuid7()` without
+an argument should be expected to return a unique identifier. Importantly, the outputs of
+`uuid7` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.12),
+`uuid7` uses `Random.RandomDevice` as the default rng. However, this is an implementation
+detail that may change in the future.
+
+!!! compat "Julia 1.12"
+    `uuid7()` is available as of Julia 1.12.
+
+# Examples
+```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
+julia> rng = Xoshiro(123);
+
+julia> uuid7(rng)
+UUID("019026ca-e086-772a-9638-f7b8557cd282")
+```
+"""
+function uuid7(rng::AbstractRNG=Random.RandomDevice())
+    # current time in ms, rounded to an Integer
+    timestamp = round(UInt128, time() * 1e3)
+    _build_uuid7(rng, timestamp)
+end
+
+function _build_uuid7(rng::AbstractRNG, timestamp::UInt128)
+    bytes = rand(rng, UInt128)
+    # make space for the timestamp
+    bytes &= 0x0000000000000fff3fffffffffffffff
+    # version & variant
+    bytes |= 0x00000000000070008000000000000000
+
+    bytes |= timestamp << UInt128(80)
+
+    return UUID(bytes)
+end
+
 end
diff --git a/stdlib/UUIDs/test/runtests.jl b/stdlib/UUIDs/test/runtests.jl
index 5085fa33e8573..c6da441076ea8 100644
--- a/stdlib/UUIDs/test/runtests.jl
+++ b/stdlib/UUIDs/test/runtests.jl
@@ -1,23 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, UUIDs, Random
-
-u1 = uuid1()
-u4 = uuid4()
-u5 = uuid5(u1, "julia")
-@test uuid_version(u1) == 1
-@test uuid_version(u4) == 4
-@test uuid_version(u5) == 5
-@test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
-@test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
-@test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
-@test u1 == UUID(UInt128(u1))
-@test u4 == UUID(UInt128(u4))
-@test u5 == UUID(UInt128(u5))
-@test uuid4(MersenneTwister(0)) == uuid4(MersenneTwister(0))
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
-@test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
-@test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+using UUIDs: _build_uuid1, _build_uuid7
 
 # results similar to Python builtin uuid
 # To reproduce the sequence
@@ -37,11 +21,6 @@ const following_uuids = [
     UUID("d8cc6298-75d5-57e0-996c-279259ab365c"),
 ]
 
-for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
-    next_id = uuid5(init_uuid, "julia")
-    @test next_id == following_uuids[idx+1]
-end
-
 # Python-generated UUID following each of the standard namespaces
 const standard_namespace_uuids = [
     (UUIDs.namespace_dns,  UUID("00ca23ad-40ef-500c-a910-157de3950d07")),
@@ -50,26 +29,98 @@ const standard_namespace_uuids = [
     (UUIDs.namespace_x500, UUID("993c6684-82e7-5cdb-bd46-9bff0362e6a9")),
 ]
 
-for (init_uuid, next_uuid) in standard_namespace_uuids
-    result = uuid5(init_uuid, "julia")
-    @test next_uuid == result
-end
-
-# Issue 35860
-Random.seed!(Random.GLOBAL_RNG, 10)
+@testset "UUIDs" begin
 u1 = uuid1()
 u4 = uuid4()
-Random.seed!(Random.GLOBAL_RNG, 10)
-@test u1 != uuid1()
-@test u4 != uuid4()
-
-@test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
-@test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
-str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
-@test UUID(uppercase(str)) == UUID(str)
-
-for r in rand(UInt128, 10^3)
-    @test UUID(r) == UUID(string(UUID(r)))
+u5 = uuid5(u1, "julia")
+u7 = uuid7()
+
+@testset "Extraction of version numbers" begin
+    @test uuid_version(u1) == 1
+    @test uuid_version(u4) == 4
+    @test uuid_version(u5) == 5
+    @test uuid_version(u7) == 7
+end
+
+@testset "Parsing from string" begin
+    @test u1 == UUID(string(u1)) == UUID(GenericString(string(u1)))
+    @test u4 == UUID(string(u4)) == UUID(GenericString(string(u4)))
+    @test u5 == UUID(string(u5)) == UUID(GenericString(string(u5)))
+    @test u7 == UUID(string(u7)) == UUID(GenericString(string(u7)))
+end
+
+@testset "UInt128 conversion" begin
+    @test u1 == UUID(UInt128(u1))
+    @test u4 == UUID(UInt128(u4))
+    @test u5 == UUID(UInt128(u5))
+    @test u7 == UUID(UInt128(u7))
+end
+
+@testset "Passing an RNG" begin
+    rng = Xoshiro(0)
+    @test uuid1(rng) isa UUID
+    @test uuid4(rng) isa UUID
+    @test uuid7(rng) isa UUID
+end
+
+@testset "uuid1, uuid4 & uuid7 RNG stability" begin
+    @test uuid4(Xoshiro(0)) == uuid4(Xoshiro(0))
+
+    time_uuid1 = rand(UInt64)
+    time_uuid7 = rand(UInt128)
+
+    # we need to go through the internal function to test RNG stability
+    @test _build_uuid1(Xoshiro(0), time_uuid1) == _build_uuid1(Xoshiro(0), time_uuid1)
+    @test _build_uuid7(Xoshiro(0), time_uuid7) == _build_uuid7(Xoshiro(0), time_uuid7)
+end
+
+@testset "Rejection of invalid UUID strings" begin
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("550e8400e29b-41d4-a716-44665544000098")
+    @test_throws ArgumentError UUID("z50e8400-e29b-41d4-a716-446655440000")
+    @test_throws ArgumentError UUID("22b4a8a1ae548-4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548a4eeb-9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeba9270-60426d66a48e")
+    @test_throws ArgumentError UUID("22b4a8a1-e548-4eeb-9270a60426d66a48e")
+end
+
+@testset "UUID sequence" begin
+    for (idx, init_uuid) in enumerate(following_uuids[1:end-1])
+        next_id = uuid5(init_uuid, "julia")
+        @test next_id == following_uuids[idx+1]
+    end
+end
+
+@testset "Standard namespace UUIDs" begin
+    for (init_uuid, next_uuid) in standard_namespace_uuids
+        result = uuid5(init_uuid, "julia")
+        @test next_uuid == result
+    end
+end
+
+@testset "Use of Random.RandomDevice (#35860)" begin
+    Random.seed!(Random.default_rng(), 10)
+    u1 = uuid1()
+    u4 = uuid4()
+    u7 = uuid7()
+    Random.seed!(Random.default_rng(), 10)
+    @test u1 != uuid1()
+    @test u4 != uuid4()
+    @test u7 != uuid7()
+end
+
+@testset "case invariance" begin
+    str = "22b4a8a1-e548-4eeb-9270-60426d66a48e"
+    @test UUID(uppercase(str)) == UUID(str)
+end
+
+@testset "Equality of string parsing & direct UInt128 passing" begin
+    for r in rand(UInt128, 10^3)
+        @test UUID(r) == UUID(string(UUID(r)))
+    end
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(UUIDs))
+end
 end
diff --git a/stdlib/Unicode/Project.toml b/stdlib/Unicode/Project.toml
index 5e3040ce9e3db..781da423c63e8 100644
--- a/stdlib/Unicode/Project.toml
+++ b/stdlib/Unicode/Project.toml
@@ -1,9 +1,10 @@
 name = "Unicode"
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Random"]
diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md
index 2771c8a9f01cc..fdf07685a4492 100644
--- a/stdlib/Unicode/docs/src/index.md
+++ b/stdlib/Unicode/docs/src/index.md
@@ -1,6 +1,14 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Unicode/docs/src/index.md"
+```
+
 # Unicode
 
+The `Unicode` module provides essential functionality for managing Unicode characters and strings.
+It includes validation, category determination, normalization, case transformation, and grapheme segmentation, enabling effective Unicode data handling.
+
 ```@docs
+Unicode
 Unicode.julia_chartransform
 Unicode.isassigned
 Unicode.isequal_normalized
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index e0ae78bd911a7..b9822d0073c73 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -1,5 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+"""
+The `Unicode` module provides essential functionality for managing Unicode characters and strings.
+It includes validation, category determination, normalization, case transformation, and grapheme segmentation,
+enabling effective Unicode data handling.
+"""
 module Unicode
 
 export graphemes, isequal_normalized
@@ -87,7 +91,7 @@ options (which all default to `false` except for `compose`) are specified:
 * `stable=true`: enforce Unicode versioning stability (never introduce characters missing from earlier Unicode versions)
 
 You can also use the `chartransform` keyword (which defaults to `identity`) to pass an arbitrary
-*function* mapping `Integer` codepoints to codepoints, which is is called on each
+*function* mapping `Integer` codepoints to codepoints, which is called on each
 character in `s` as it is processed, in order to perform arbitrary additional normalizations.
 For example, by passing `chartransform=Unicode.julia_chartransform`, you can apply a few Julia-specific
 character normalizations that are performed by Julia when parsing identifiers (in addition to
@@ -208,12 +212,19 @@ end
 
 using Base.Unicode: utf8proc_error, UTF8PROC_DECOMPOSE, UTF8PROC_CASEFOLD, UTF8PROC_STRIPMARK
 
-function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, options::Integer)
-    ret = @ccall utf8proc_decompose_char(codepoint::UInt32, dest::Ptr{UInt32}, length(dest)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
+function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, offset::Integer, options::Integer)
+    ret = GC.@preserve dest @ccall utf8proc_decompose_char(codepoint::UInt32, pointer(dest, 1+offset)::Ptr{UInt32}, (length(dest)-offset)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
+# would be good to have higher-level accessor functions in utf8proc.  alternatively,
+# we could mirror the whole utf8proc_property_t struct in Julia, but that is annoying
+# because of the bitfields.
+combining_class(uc::Integer) =
+    0x000301 ≤ uc ≤ 0x10ffff ? unsafe_load(ccall(:utf8proc_get_property, Ptr{UInt16}, (UInt32,), uc), 2) : 0x0000
+combining_class(c::AbstractChar) = ismalformed(c) ? 0x0000 : combining_class(UInt32(c))
+
 """
     isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity)
 
@@ -225,6 +236,9 @@ As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
 function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
 to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
 
+!!! compat "Julia 1.8"
+    The `isequal_normalized` function was added in Julia 1.8.
+
 # Examples
 
 For example, the string `"noël"` can be constructed in two canonically equivalent ways
@@ -251,29 +265,78 @@ julia> isequal_normalized(s1, "NOËL", casefold=true)
 true
 ```
 """
-function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity)
-    function decompose_next_char!(c, state, d, options, s)
-        n = _decompose_char!(c, d, options)
-        if n > length(d) # may be possible in future Unicode versions?
-            n = _decompose_char!(c, resize!(d, n), options)
+isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity) =
+    _isequal_normalized!(s1, s2, Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4), chartransform; casefold, stripmark)
+
+# like isequal_normalized, but takes pre-allocated codepoint buffers as arguments, and chartransform is a positional argument
+function _isequal_normalized!(s1::AbstractString, s2::AbstractString,
+                              d1::Vector{UInt32}, d2::Vector{UInt32}, chartransform::F=identity;
+                              casefold::Bool=false, stripmark::Bool=false) where {F}
+    function decompose_next_chars!(state, d, options, s)
+        local n
+        offset = 0
+        @inbounds while true
+            # read a char and decompose it to d
+            c = chartransform(UInt32(state[1]))
+            state = iterate(s, state[2])
+            if c < 0x80 # fast path for common ASCII case
+                n = 1 + offset
+                n > length(d) && resize!(d, 2n)
+                d[n] = casefold ? (0x41 ≤ c ≤ 0x5A ? c+0x20 : c) : c
+                break # ASCII characters are all zero combining class
+            else
+                while true
+                    n = _decompose_char!(c, d, offset, options) + offset
+                    if n > length(d)
+                        resize!(d, 2n)
+                        continue
+                    end
+                    break
+                end
+            end
+
+            # decomposed chars must be sorted in ascending order of combining class,
+            # which means we need to keep fetching chars until we get to non-combining
+            (iszero(combining_class(d[n])) || isnothing(state)) && break # non-combining
+            offset = n
+        end
+
+        # sort by combining class
+        if n < 32 # almost always true
+            for j1 = 2:n # insertion sort
+                cc = combining_class(d[j1])
+                iszero(cc) && continue # don't re-order non-combiners
+                for j2 = j1:-1:2
+                    combining_class(d[j2-1]) ≤ cc && break
+                    d[j2-1], d[j2] = d[j2], d[j2-1]
+                end
+            end
+        else # avoid n^2 complexity in crazy large-n case
+            j = 1
+            @views while j < n
+                j₀ = j + something(findnext(iszero ∘ combining_class, d[j+1:n], 1), n+1-j)
+                sort!(d[j:j₀-1], by=combining_class)
+                j = j₀
+            end
         end
-        return 1, n, iterate(s, state)
+
+        # split return statement to help type inference:
+        return state === nothing ? (1, n, nothing) : (1, n, state)
     end
     options = UTF8PROC_DECOMPOSE
     casefold && (options |= UTF8PROC_CASEFOLD)
     stripmark && (options |= UTF8PROC_STRIPMARK)
     i1,i2 = iterate(s1),iterate(s2)
-    d1,d2 = Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4) # codepoint buffers
     n1 = n2 = 0 # lengths of codepoint buffers
     j1 = j2 = 1 # indices in d1, d2
     while true
         if j1 > n1
             i1 === nothing && return i2 === nothing && j2 > n2
-            j1, n1, i1 = decompose_next_char!(chartransform(UInt32(i1[1])), i1[2], d1, options, s1)
+            j1, n1, i1 = decompose_next_chars!(i1, d1, options, s1)
         end
         if j2 > n2
             i2 === nothing && return false
-            j2, n2, i2 = decompose_next_char!(chartransform(UInt32(i2[1])), i2[2], d2, options, s2)
+            j2, n2, i2 = decompose_next_chars!(i2, d2, options, s2)
         end
         d1[j1] == d2[j2] || return false
         j1 += 1; j2 += 1
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index 5c5a75b33e363..7fa57508cffbf 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -3,6 +3,9 @@
 using Test
 using Unicode
 using Unicode: normalize, isassigned, julia_chartransform
+import Random
+
+Random.seed!(12345)
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
@@ -27,14 +30,14 @@ using Unicode: normalize, isassigned, julia_chartransform
     @test normalize("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
 
     # julia_chartransform identifier normalization
-    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212", chartransform=julia_chartransform) ==
-        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212\u210F", chartransform=julia_chartransform) ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D\u0127"
     @test julia_chartransform('\u00B5') === '\u03BC'
 end
 
 @testset "unicode sa#15" begin
     #Tests from Unicode SA#15, "Unicode normalization forms"
-    #http://www.unicode.org/reports/tr15/
+    #https://www.unicode.org/reports/tr15/
 
     @testset "canonical equivalence" begin
         let ==(a::Array{Char},b::Array{Char}) = normalize(string(a...), :NFC)==normalize(string(b...), :NFC)
@@ -455,6 +458,9 @@ end
     @test !Base.Unicode.isvalid(Char, overlong_char)
 end
 
+# the obvious, but suboptimal, algorithm:
+isequal_normalized_naive(s1, s2; kws...) = normalize(s1; kws...) == normalize(s2; kws...)
+
 @testset "Unicode equivalence" begin
     @test isequal_normalized("no\u00EBl", "noe\u0308l")
     @test !isequal_normalized("no\u00EBl", "noe\u0308l ")
@@ -466,4 +472,69 @@ end
     @test isequal_normalized("no\u00EBl", "noel", stripmark=true)
     @test isequal_normalized("no\u00EBl", "NOEL", stripmark=true, casefold=true)
     @test isequal_normalized("\u00B5\u0302m", "\u03BC\u0302m", chartransform=julia_chartransform)
+
+    # issue #52408
+    @testset "Sorting combining characters" begin
+        for str in ("\u5bc\u5b0", "j\u5ae\u5bf\u5b2\u5b4") # julia#52408 examples
+            @test isequal_normalized(str, normalize(str))
+        end
+
+        # first codepoint in every possible Unicode combining class
+        let cc_chars = UInt32[0x00000334, 0x00016ff0, 0x0000093c, 0x00003099, 0x0000094d, 0x000005b0, 0x000005b1, 0x000005b2, 0x000005b3, 0x000005b4, 0x000005b5, 0x000005b6, 0x000005b7, 0x000005b8, 0x000005b9, 0x000005bb, 0x000005bc, 0x000005bd, 0x000005bf, 0x000005c1, 0x000005c2, 0x0000fb1e, 0x0000064b, 0x0000064c, 0x0000064d, 0x00000618, 0x00000619, 0x0000061a, 0x00000651, 0x00000652, 0x00000670, 0x00000711, 0x00000c55, 0x00000c56, 0x00000e38, 0x00000e48, 0x00000eb8, 0x00000ec8, 0x00000f71, 0x00000f72, 0x00000f74, 0x00000321, 0x00001dce, 0x0000031b, 0x00001dfa, 0x00000316, 0x0000059a, 0x0000302e, 0x0001d16d, 0x000005ae, 0x00000301, 0x00000315, 0x0000035c, 0x0000035d, 0x00000345],
+            vowels = ['a', 'e', 'i', 'o', 'u', 'å', 'é', 'î', 'ö', 'ü'], Vowels = [vowels; uppercase.(vowels)]
+            function randcc(n, n_cc) # random string with lots of combining chars
+                buf = IOBuffer()
+                for _ = 1:n
+                    print.(buf, rand(Vowels, rand(1:5)))
+                    print.(buf, Char.(rand(cc_chars, rand(0:n_cc))))
+                end
+                return String(take!(buf))
+            end
+            for _ = 1:100
+                s = randcc(10,10)
+                ns = normalize(s)
+                cs = normalize(s, casefold=true)
+                @test isequal_normalized(s, s)
+                if !isequal_normalized(s, ns)
+                    @show s
+                end
+                @test isequal_normalized(s, ns)
+                @test isequal_normalized(cs, ns) == isequal_normalized_naive(cs, ns)
+                @test isequal_normalized(cs, ns, casefold=true) ==
+                      isequal_normalized_naive(cs, ns, casefold=true)
+            end
+            for _ = 1:3
+                s = randcc(5,1000) # exercise sort!-based fallback
+                @test isequal_normalized(s, normalize(s))
+            end
+            function randcc2(n, n_cc) # 2 strings with equivalent reordered combiners
+                buf1 = IOBuffer()
+                buf2 = IOBuffer()
+                p = n_cc / length(cc_chars)
+                for _ = 1:n
+                    a = join(rand(Vowels, rand(1:5)))
+                    print(buf1, a)
+                    print(buf2, a)
+
+                    # chars from distinct combining classes
+                    # are canonically equivalent when re-ordered
+                    c = Random.randsubseq(cc_chars, p)
+                    print.(buf1, Char.(Random.shuffle!(c)))
+                    print.(buf2, Char.(Random.shuffle!(c)))
+                end
+                return String(take!(buf1)), String(take!(buf2))
+            end
+            for _ = 1:100
+                s1, s2 = randcc2(10,10)
+                @test isequal_normalized(s1, s2)
+            end
+        end
+
+        # combining characters in the same class are inequivalent if re-ordered:
+        @test !isequal_normalized("x\u0334\u0335", "x\u0335\u0334")
+    end
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Unicode))
 end
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index 575863062d8bb..40acd335c2327 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "Zlib_jll"
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.13+0"
+version = "1.3.1+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
index ea381b8b0683c..fb043c7143789 100644
--- a/stdlib/Zlib_jll/src/Zlib_jll.jl
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/Zlib_jll.jl
 baremodule Zlib_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
index f04f9c70a7054..81eb742a172fe 100644
--- a/stdlib/Zlib_jll/test/runtests.jl
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Zlib_jll
 
 @testset "Zlib_jll" begin
-    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.13"
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.3.1"
 end
diff --git a/stdlib/dSFMT_jll/Project.toml b/stdlib/dSFMT_jll/Project.toml
index 4e3e80f918f0b..30209421a9994 100644
--- a/stdlib/dSFMT_jll/Project.toml
+++ b/stdlib/dSFMT_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "dSFMT_jll"
 uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
-version = "2.2.4+1"
+version = "2.2.5+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
index 35ada23778a94..b84bf0d8204ae 100644
--- a/stdlib/dSFMT_jll/src/dSFMT_jll.jl
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule dSFMT_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 87519e5a824b0..13669ec173678 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "15.0.7+5"
+version = "18.1.7+3"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.8"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 3140dc3989a72..be2acb34faa65 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule libLLVM_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/libLLVM_jll/test/runtests.jl b/stdlib/libLLVM_jll/test/runtests.jl
index ea678108ae012..e04076f4145a5 100644
--- a/stdlib/libLLVM_jll/test/runtests.jl
+++ b/stdlib/libLLVM_jll/test/runtests.jl
@@ -3,5 +3,6 @@
 using Test, Libdl, libLLVM_jll
 
 @testset "libLLVM_jll" begin
-    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMInitializeTarget; throw_error=false) !== nothing
+    # Try to find a symbol from the C API of libLLVM as a simple sanity check.
+    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMContextCreate; throw_error=false) !== nothing
 end
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 4699baa7dad23..d1dde4c6074a7 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.8.0+0"
+version = "5.12.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.10"
+julia = "1.12"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
index 49e7932a6b701..bbdad252be14a 100644
--- a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -4,7 +4,6 @@
 
 baremodule libblastrampoline_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index b8a9394c50e37..030f4a8a0b9d1 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "nghttp2_jll"
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.52.0+0"
+version = "1.64.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.6"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
index 76e8d3582c402..5057299614aa5 100644
--- a/stdlib/nghttp2_jll/src/nghttp2_jll.jl
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/nghttp2_jll.jl
 baremodule nghttp2_jll
 using Base, Libdl
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
index 2f9af6d6a3338..13e7adaad9df6 100644
--- a/stdlib/nghttp2_jll/test/runtests.jl
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -11,5 +11,5 @@ end
 
 @testset "nghttp2_jll" begin
     info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
-    @test VersionNumber(unsafe_string(info.version_str)) == v"1.52.0"
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.64.0"
 end
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
index 4c9bf62ad7ec1..214c5b19a8a4b 100644
--- a/stdlib/p7zip_jll/Project.toml
+++ b/stdlib/p7zip_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "p7zip_jll"
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+0"
+version = "17.5.0+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
index 01f26de936e78..a2a90a2450ea6 100644
--- a/stdlib/p7zip_jll/src/p7zip_jll.jl
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -3,7 +3,6 @@
 ## dummy stub for https://github.com/JuliaBinaryWrappers/p7zip_jll.jl
 baremodule p7zip_jll
 using Base
-Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
 const LIBPATH_list = String[]
diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk
new file mode 100644
index 0000000000000..006b7a276a3b3
--- /dev/null
+++ b/stdlib/stdlib.mk
@@ -0,0 +1,30 @@
+STDLIBS_WITHIN_SYSIMG := \
+	Artifacts FileWatching Libdl SHA libblastrampoline_jll OpenBLAS_jll Random \
+	LinearAlgebra Sockets
+
+INDEPENDENT_STDLIBS := \
+	ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads Future \
+	InteractiveUtils JuliaSyntaxHighlighting LazyArtifacts LibGit2 LibCURL Logging \
+	Markdown Mmap NetworkOptions Profile Printf Pkg REPL Serialization SharedArrays \
+	SparseArrays Statistics StyledStrings SuiteSparse_jll Tar Test TOML Unicode UUIDs \
+	dSFMT_jll GMP_jll libLLVM_jll LLD_jll LLVMLibUnwind_jll LibUnwind_jll LibUV_jll \
+	LibCURL_jll LibSSH2_jll LibGit2_jll nghttp2_jll  MozillaCACerts_jll \
+	MPFR_jll OpenLibm_jll OpenSSL_jll PCRE2_jll p7zip_jll Zlib_jll
+
+STDLIBS := $(STDLIBS_WITHIN_SYSIMG) $(INDEPENDENT_STDLIBS)
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+
+SYSIMG_STDLIBS_SRCS =
+INDEPENDENT_STDLIBS_SRCS =
+define STDLIB_srcs
+$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
+$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/Project.toml
+
+ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
+	SYSIMG_STDLIBS_SRCS += $$($1_SRCS)
+else
+	INDEPENDENT_STDLIBS_SRCS += $$($1_SRCS)
+endif
+endef
+
+$(foreach stdlib,$(STDLIBS),$(eval $(call STDLIB_srcs,$(stdlib))))
diff --git a/sysimage.mk b/sysimage.mk
index 993ee9a990058..571e2da003346 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -2,9 +2,11 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(SRCDIR)
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
 
 default: sysimg-$(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: sysimg-release sysimg-debug
+basecompiler-ji: $(build_private_libdir)/basecompiler.ji
 sysimg-ji: $(build_private_libdir)/sys.ji
 sysimg-bc: $(build_private_libdir)/sys-bc.a
 sysimg-release: $(build_private_libdir)/sys.$(SHLIB_EXT)
@@ -16,15 +18,19 @@ $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ \
 		$(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) \
 		$(if $(findstring -debug,$(notdir $@)),-ljulia-internal-debug -ljulia-debug,-ljulia-internal -ljulia) \
-		$$([ $(OS) = WINNT ] && echo '' -lssp))
+		$$([ $(OS) = WINNT ] && echo '' $(LIBM) -lssp --disable-auto-import --disable-runtime-pseudo-reloc))
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
 
 COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \
+		base/Base_compiler.jl \
 		base/boot.jl \
 		base/docs/core.jl \
 		base/abstractarray.jl \
 		base/abstractdict.jl \
+		base/abstractset.jl \
+		base/iddict.jl \
+		base/idset.jl \
 		base/array.jl \
 		base/bitarray.jl \
 		base/bitset.jl \
@@ -37,6 +43,7 @@ COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \
 		base/int.jl \
 		base/indices.jl \
 		base/iterators.jl \
+		base/invalidation.jl \
 		base/namedtuple.jl \
 		base/number.jl \
 		base/operators.jl \
@@ -45,29 +52,29 @@ COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \
 		base/pointer.jl \
 		base/promotion.jl \
 		base/range.jl \
-		base/reflection.jl \
+		base/runtime_internals.jl \
 		base/traits.jl \
 		base/refvalue.jl \
 		base/tuple.jl)
-COMPILER_SRCS += $(shell find $(JULIAHOME)/base/compiler -name \*.jl)
+COMPILER_SRCS += $(shell find $(JULIAHOME)/Compiler/src -name \*.jl)
 # sort these to remove duplicates
 BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \
                     $(shell find $(BUILDROOT)/base -name \*.jl  -and -not -name sysimg.jl))
-STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(shell find $(build_datarootdir)/julia/stdlib/$(VERSDIR)/*/src -name \*.jl) \
-                    $(wildcard $(build_prefix)/manifest/$(VERSDIR)/*)
+STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIBS_SRCS)
 RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make sure this always has a trailing slash
+RELDATADIR := $(call rel_path,$(JULIAHOME)/base,$(build_datarootdir))/ # <-- make sure this always has a trailing slash
 
-$(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
+$(build_private_libdir)/basecompiler.ji: $(COMPILER_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
-	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
-		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl)
+	JULIA_NUM_THREADS=1 $(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
+		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O1 Base_compiler.jl --buildroot $(RELBUILDROOT) --dataroot $(RELDATADIR))
 	@mv $@.tmp $@
 
-$(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
+$(build_private_libdir)/sys.ji: $(build_private_libdir)/basecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
 	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
-			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
+			JULIA_NUM_THREADS=1 $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O1 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl --buildroot $(RELBUILDROOT) --dataroot $(RELDATADIR); then \
 		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
diff --git a/test/.gitignore b/test/.gitignore
index a1af9ae3d44bf..20bf199b87c74 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -2,3 +2,6 @@
 /ccalltest
 /ccalltest.s
 /libccalltest.*
+/relocatedepot
+/RelocationTestPkg2/src/foo.txt
+/RelocationTestPkg*/Manifest.toml
diff --git a/test/Makefile b/test/Makefile
index 88dbe5b2b4ed6..9b151cd213274 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -7,14 +7,14 @@ STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR)
 # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE
 
 export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
-export JULIA_LOAD_PATH := @stdlib
+export JULIA_LOAD_PATH := @$(PATHSEP)@stdlib
 unexport JULIA_PROJECT :=
 unexport JULIA_BINDIR :=
 
-TESTGROUPS = unicode strings compiler
+TESTGROUPS = unicode strings compiler Compiler
 TESTS = all default stdlib $(TESTGROUPS) \
 		$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
-		$(filter-out runtests testdefs, \
+		$(filter-out runtests testdefs relocatedepot, \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
 		$(foreach group,$(TESTGROUPS), \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/$(group)/*.jl)))
@@ -24,6 +24,8 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
+TRIMMING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(JULIAHOME)/usr/bin" "CC=$(CC)"
+
 default:
 
 $(TESTS):
@@ -34,17 +36,48 @@ $(addprefix revise-, $(TESTS)): revise-% :
 	@cd $(SRCDIR) && \
     $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
 
+relocatedepot:
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
+
+revise-relocatedepot: revise-% :
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg3 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg4 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,RELOCATEDEPOT="" $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
+
 embedding:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS)
 
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+trimming:
+	@$(MAKE) -C $(SRCDIR)/$@ check $(TRIMMING_ARGS)
+
 clangsa:
 	@$(MAKE) -C $(SRCDIR)/$@
 
 clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
+	@$(MAKE) -C llvmpasses $@
+	@$(MAKE) -C trimming $@ $(TRIMMING_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext trimming clangsa clean
diff --git a/test/RelocationTestPkg1/Project.toml b/test/RelocationTestPkg1/Project.toml
new file mode 100644
index 0000000000000..4b5b67c3aef2d
--- /dev/null
+++ b/test/RelocationTestPkg1/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg1"
+uuid = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg1/src/RelocationTestPkg1.jl b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
new file mode 100644
index 0000000000000..a86543a61b3f8
--- /dev/null
+++ b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg1
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg1
diff --git a/test/RelocationTestPkg1/src/foo.txt b/test/RelocationTestPkg1/src/foo.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg2/Project.toml b/test/RelocationTestPkg2/Project.toml
new file mode 100644
index 0000000000000..b909269a0894c
--- /dev/null
+++ b/test/RelocationTestPkg2/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg2"
+uuid = "8d933983-b090-4b0b-a37e-c34793f459d1"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg2/src/RelocationTestPkg2.jl b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
new file mode 100644
index 0000000000000..4b1fd2708a727
--- /dev/null
+++ b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
@@ -0,0 +1,7 @@
+module RelocationTestPkg2
+
+include_dependency("foo.txt", track_content=false)
+include_dependency("foodir", track_content=false)
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg2
diff --git a/test/RelocationTestPkg2/src/foo.txt b/test/RelocationTestPkg2/src/foo.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg3/Project.toml b/test/RelocationTestPkg3/Project.toml
new file mode 100644
index 0000000000000..61882cb5cda65
--- /dev/null
+++ b/test/RelocationTestPkg3/Project.toml
@@ -0,0 +1,3 @@
+name = "RelocationTestPkg3"
+uuid = "1ba4f954-9da9-4cd2-9ca7-6250235df52c"
+version = "0.1.0"
diff --git a/test/RelocationTestPkg3/src/RelocationTestPkg3.jl b/test/RelocationTestPkg3/src/RelocationTestPkg3.jl
new file mode 100644
index 0000000000000..6ed8e1e560a99
--- /dev/null
+++ b/test/RelocationTestPkg3/src/RelocationTestPkg3.jl
@@ -0,0 +1,7 @@
+module RelocationTestPkg3
+
+include_dependency("bar.txt", track_content=true)
+include_dependency("bardir", track_content=true)
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg3
diff --git a/test/RelocationTestPkg3/src/bar.txt b/test/RelocationTestPkg3/src/bar.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/RelocationTestPkg4/Project.toml b/test/RelocationTestPkg4/Project.toml
new file mode 100644
index 0000000000000..8334a684f064e
--- /dev/null
+++ b/test/RelocationTestPkg4/Project.toml
@@ -0,0 +1,6 @@
+name = "RelocationTestPkg4"
+uuid = "d423d817-d7e9-49ac-b245-9d9d6db0b429"
+version = "0.1.0"
+
+[deps]
+RelocationTestPkg1 = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
diff --git a/test/RelocationTestPkg4/src/RelocationTestPkg4.jl b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
new file mode 100644
index 0000000000000..d24a51d19a918
--- /dev/null
+++ b/test/RelocationTestPkg4/src/RelocationTestPkg4.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg4
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg4
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 912e0d5883d12..d1f30eacafacc 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -2,6 +2,20 @@
 
 using Random, LinearAlgebra
 
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+
+isdefined(Main, :InfiniteArrays) || @eval Main include("testhelpers/InfiniteArrays.jl")
+using .Main.InfiniteArrays
+
+isdefined(Main, :StructArrays) || @eval Main include("testhelpers/StructArrays.jl")
+using .Main.StructArrays
+
+isdefined(Main, :FillArrays) || @eval Main include("testhelpers/FillArrays.jl")
+using .Main.FillArrays
+
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
+
 A = rand(5,4,3)
 @testset "Bounds checking" begin
     @test checkbounds(Bool, A, 1, 1, 1) == true
@@ -56,6 +70,20 @@ end
     @test checkbounds(Bool, A, CartesianIndex((5,)), CartesianIndex((4,)), CartesianIndex((4,)))  == false
 end
 
+@testset "Infinite axes" begin
+    r = OneToInf()
+    @testset "CartesianIndices" begin
+        C = CartesianIndices(size(r))
+        ax = to_indices(r, (C,))[1]
+        @test ax === r
+    end
+    @testset "LinearIndices" begin
+        L = LinearIndices(size(r))
+        ax = to_indices(r, (L,))[1]
+        @test ax === L
+    end
+end
+
 @testset "vector indices" begin
     @test checkbounds(Bool, A, 1:5, 1:4, 1:3) == true
     @test checkbounds(Bool, A, 0:5, 1:4, 1:3) == false
@@ -67,6 +95,7 @@ end
     @test checkbounds(Bool, A, 1:60) == true
     @test checkbounds(Bool, A, 1:61) == false
     @test checkbounds(Bool, A, 2, 2, 2, 1:1) == true  # extra indices
+    @test checkbounds(Bool, A, 2, 2, 2, 10:9) == true
     @test checkbounds(Bool, A, 2, 2, 2, 1:2) == false
     @test checkbounds(Bool, A, 1:5, 1:4) == false
     @test checkbounds(Bool, A, 1:5, 1:12) == false
@@ -87,6 +116,7 @@ end
     @test checkbounds(Bool, A, trues(5), trues(13)) == false
     @test checkbounds(Bool, A, trues(6), trues(12)) == false
     @test checkbounds(Bool, A, trues(5, 4, 3)) == true
+    @test checkbounds(Bool, A, trues(5, 4, 3, 1)) == true # issue 45867
     @test checkbounds(Bool, A, trues(5, 4, 2)) == false
     @test checkbounds(Bool, A, trues(5, 12)) == false
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 4, 1), trues(1, 1, 3)) == false
@@ -94,7 +124,9 @@ end
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 5, 1), trues(1, 1, 3)) == false
     @test checkbounds(Bool, A, trues(1, 5), :, 2) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(3)) == true
-    @test checkbounds(Bool, A, trues(4, 4), trues(3)) == true
+    @test checkbounds(Bool, A, trues(5), trues(4, 3, 1)) == true
+    @test checkbounds(Bool, A, trues(5, 4), trues(3, 2)) == false
+    @test checkbounds(Bool, A, trues(4, 4), trues(3)) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(2)) == false
     @test checkbounds(Bool, A, trues(6, 4), trues(3)) == false
     @test checkbounds(Bool, A, trues(5, 4), trues(4)) == false
@@ -117,6 +149,10 @@ end
     @test checkbounds(Bool, A, [CartesianIndex((6, 4))], 3) == false
     @test checkbounds(Bool, A, [CartesianIndex((5, 5))], 3) == false
     @test checkbounds(Bool, A, [CartesianIndex((5, 4))], 4) == false
+    @test checkbounds(Bool, A, 5, [CartesianIndex((4, 3, 1))]) == true
+    @test checkbounds(Bool, A, 5, [CartesianIndex((4, 3, 2))]) == false
+    @test_throws ArgumentError checkbounds(Bool, A, [CartesianIndex((4, 3)), CartesianIndex((4,))])
+    @test_throws ArgumentError checkbounds(Bool, A, [CartesianIndex((1,)), 1])
 end
 
 @testset "index conversion" begin
@@ -299,6 +335,22 @@ end
         R = LinearIndices((Base.IdentityUnitRange(0:1), 0:1))
         @test axes(R) == (Base.IdentityUnitRange(0:1), Base.OneTo(2))
     end
+
+    @testset "show" begin
+        A = zeros(2,3)
+        for B in (A, view(A, Base.IdentityUnitRange(2:4)))
+            l = LinearIndices(B)
+            s = sprint(show, l)
+            @test s == "LinearIndices($(axes(B)))"
+        end
+    end
+end
+
+@testset "copy for LinearIndices/CartesianIndices" begin
+    C = CartesianIndices((1:2, 1:4))
+    @test copy(C) === C
+    L = LinearIndices((1:2, 1:4))
+    @test copy(L) === L
 end
 
 # token type on which to dispatch testing methods in order to avoid potential
@@ -470,6 +522,13 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
 
         mask = bitrand(shape)
         @testset "test logical indexing" begin
+            let
+                masks1 = (mask,)
+                @test only(@inferred(to_indices(A, masks1))) isa Base.LogicalIndex{Int}
+                if IndexStyle(B) isa IndexCartesian
+                    @test only(@inferred(to_indices(B, masks1))) === Base.LogicalIndex(mask)
+                end
+            end
             @test B[mask] == A[mask] == B[findall(mask)] == A[findall(mask)] == LinearIndices(mask)[findall(mask)]
             @test B[vec(mask)] == A[vec(mask)] == LinearIndices(mask)[findall(mask)]
             mask1 = bitrand(size(A, 1))
@@ -479,10 +538,15 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
             @test B[mask1, 1, trailing2] == A[mask1, 1, trailing2] == LinearIndices(mask)[findall(mask1)]
 
             if ndims(B) > 1
+                slice = ntuple(Returns(:), ndims(B)-1)
                 maskfront = bitrand(shape[1:end-1])
-                Bslice = B[ntuple(i->(:), ndims(B)-1)..., 1]
-                @test B[maskfront,1] == Bslice[maskfront]
+                Bslicefront = B[slice..., 1]
+                @test B[maskfront, 1] == Bslicefront[maskfront]
                 @test size(B[maskfront, 1:1]) == (sum(maskfront), 1)
+                maskend = bitrand(shape[2:end])
+                Bsliceend = B[1, slice...]
+                @test B[1 ,maskend] == Bsliceend[maskend]
+                @test size(B[1:1, maskend]) == (1, sum(maskend))
             end
         end
     end
@@ -505,12 +569,24 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test firstindex(B, 1) == firstindex(A, 1) == first(axes(B, 1))
     @test firstindex(B, 2) == firstindex(A, 2) == first(axes(B, 2))
 
-    # isassigned(a::AbstractArray, i::Int...)
+    @test !isassigned(B)
+    # isassigned(a::AbstractArray, i::Integer...)
     j = rand(1:length(B))
     @test isassigned(B, j)
     if T == T24Linear
         @test !isassigned(B, length(B) + 1)
     end
+    # isassigned(a::AbstractArray, i::CartesianIndex)
+    @test isassigned(B, first(CartesianIndices(B)))
+    ind = last(CartesianIndices(B))
+    @test !isassigned(B, ind + oneunit(ind))
+    # isassigned(a::AbstractArray, i::Union{Integer,CartesianIndex}...)
+    @test isassigned(B, Int16.(first.(axes(B)))..., CartesianIndex(1,1))
+    # Bool isn't a valid index
+    @test_throws ArgumentError isassigned(B, Bool.(first.(axes(B)))..., CartesianIndex(1,1))
+    @test_throws ArgumentError isassigned(B, Bool.(first.(axes(B)))...)
+    @test_throws ArgumentError isassigned(B, true)
+    @test_throws ArgumentError isassigned(B, false)
 
     # reshape(a::AbstractArray, dims::Dims)
     @test_throws DimensionMismatch reshape(B, (0, 1))
@@ -683,8 +759,8 @@ function test_cat(::Type{TestAbstractArray})
     @test hcat() == Any[]
     @test vcat(1, 1.0, 3, 3.0) == [1.0, 1.0, 3.0, 3.0]
     @test hcat(1, 1.0, 3, 3.0) == [1.0 1.0 3.0 3.0]
-    @test_throws ArgumentError hcat(B1, B2)
-    @test_throws ArgumentError vcat(C1, C2)
+    @test_throws DimensionMismatch hcat(B1, B2)
+    @test_throws DimensionMismatch vcat(C1, C2)
 
     @test vcat(B) == B
     @test hcat(B) == B
@@ -713,9 +789,9 @@ function test_cat(::Type{TestAbstractArray})
     end
 
     @test_throws ArgumentError hvcat(7, 1:20...)
-    @test_throws ArgumentError hvcat((2), C1, C3)
-    @test_throws ArgumentError hvcat((1), C1, C2)
-    @test_throws ArgumentError hvcat((1), C2, C3)
+    @test_throws DimensionMismatch hvcat((2), C1, C3)
+    @test_throws DimensionMismatch hvcat((1), C1, C2)
+    @test_throws DimensionMismatch hvcat((1), C2, C3)
 
     tup = tuple(rand(1:10, i)...)
     @test hvcat(tup) == []
@@ -724,8 +800,8 @@ function test_cat(::Type{TestAbstractArray})
     @test_throws ArgumentError hvcat((2, 2), 1, 2, 3, 4, 5)
     @test_throws ArgumentError Base.typed_hvcat(Int, (2, 2), 1, 2, 3, 4, 5)
     # check for # of columns mismatch b/w rows
-    @test_throws ArgumentError hvcat((3, 2), 1, 2, 3, 4, 5, 6)
-    @test_throws ArgumentError Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch hvcat((3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
 
     # 18395
     @test isa(Any["a" 5; 2//3 1.0][2,1], Rational{Int})
@@ -782,9 +858,8 @@ Base.getindex(A::TSlowNIndexes{T,2}, i::Int, j::Int) where {T} = A.data[i,j]
     @test isa(map(Set, Array[[1,2],[3,4]]), Vector{Set{Int}})
 end
 
-@testset "mapping over scalars and empty arguments:" begin
+@testset "mapping over scalars" begin
     @test map(sin, 1) === sin(1)
-    @test map(()->1234) === 1234
 end
 
 function test_UInt_indexing(::Type{TestAbstractArray})
@@ -833,7 +908,7 @@ test_ind2sub(TestAbstractArray)
 
 include("generic_map_tests.jl")
 generic_map_tests(map, map!)
-@test_throws ArgumentError map!(-, [1])
+@test map!(-, [1]) == [-1]
 
 test_UInt_indexing(TestAbstractArray)
 test_13315(TestAbstractArray)
@@ -982,6 +1057,16 @@ end
     @test isempty(v)
     @test isempty(v2::Vector{Int})
     @test isempty(v3::Vector{Float64})
+
+    S = StructArrays.StructArray{Complex{Int}}((v, v))
+    for T in (Complex{Int}, ComplexF64)
+        S0 = empty(S, T)
+        @test S0 isa StructArrays.StructArray{T}
+        @test length(S0) == 0
+    end
+    S0 = empty(S, String)
+    @test S0 isa Vector{String}
+    @test length(S0) == 0
 end
 
 @testset "CartesianIndices" begin
@@ -1062,6 +1147,7 @@ end
 @testset "IndexStyle for various types" begin
     @test Base.IndexStyle(UpperTriangular) == IndexCartesian() # subtype of AbstractArray, not of Array
     @test Base.IndexStyle(Vector) == IndexLinear()
+    @test Base.IndexStyle(Memory) == IndexLinear()
     @test Base.IndexStyle(UnitRange) == IndexLinear()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), [1; 2; 3]) == IndexCartesian()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), rand(3, 3), [1; 2; 3]) == IndexCartesian()
@@ -1091,23 +1177,23 @@ end
 @testset "sizeof" begin
     let arrUInt8 = zeros(UInt8, 10)
         @test sizeof(arrUInt8) == 10
-        @test Core.sizeof(arrUInt8) == 10
+        @test Core.sizeof(arrUInt8) == 3 * sizeof(Int)
     end
 
     let arrUInt32 = zeros(UInt32, 10)
         @test sizeof(arrUInt32) == 40
-        @test Core.sizeof(arrUInt32) == 40
+        @test Core.sizeof(arrUInt32) == 3 * sizeof(Int)
     end
 
     let arrFloat64 = zeros(Float64, 10, 10)
         @test sizeof(arrFloat64) == 800
-        @test Core.sizeof(arrFloat64) == 800
+        @test Core.sizeof(arrFloat64) == 4 * sizeof(Int)
     end
 
     # Test union arrays (Issue #23321)
     let arrUnion = Union{Int64, Cvoid}[rand(Bool) ? k : nothing for k = 1:10]
         @test sizeof(arrUnion) == 80
-        @test Core.sizeof(arrUnion) == 80
+        @test Core.sizeof(arrUnion) == 3 * sizeof(Int)
     end
 
     # Test non-power of 2 types (Issue #35884)
@@ -1121,7 +1207,7 @@ end
     let arrayOfUInt48 = [a, b, c]
         f35884(x) = sizeof(x)
         @test f35884(arrayOfUInt48) == 24
-        @test Core.sizeof(arrayOfUInt48) == 24
+        @test Core.sizeof(arrayOfUInt48) == 3 * sizeof(Int)
     end
 end
 
@@ -1147,7 +1233,7 @@ function Base.getindex(S::Strider{<:Any,N}, I::Vararg{Int,N}) where {N}
 end
 Base.strides(S::Strider) = S.strides
 Base.elsize(::Type{<:Strider{T}}) where {T} = Base.elsize(Vector{T})
-Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S.offset)
+Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = memoryref(S.data.ref, S.offset)
 
 @testset "Simple 3d strided views and permutes" for sz in ((5, 3, 2), (7, 11, 13))
     A = collect(reshape(1:prod(sz), sz))
@@ -1206,6 +1292,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             end
         end
     end
+    # constant propagation in the PermutedDimsArray constructor
+    X = @inferred (A -> PermutedDimsArray(A, (2,3,1)))(A)
+    @test @inferred((X -> PermutedDimsArray(X, (3,1,2)))(X)) == A
 end
 
 @testset "simple 2d strided views, permutes, transposes" for sz in ((5, 3), (7, 11))
@@ -1325,6 +1414,8 @@ end
 Base.push!(tpa::TestPushArray{T}, a::T) where T = push!(tpa.data, a)
 Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
 
+push_slightly_abstract_namedtuple(v::Vector{@NamedTuple{x::Int,y::Any}}, x::Int, @nospecialize(y)) = push!(v, (; x, y))
+
 @testset "push! and pushfirst!" begin
     a_orig = [1]
     tpa = TestPushArray{Int, 2}(a_orig)
@@ -1334,6 +1425,58 @@ Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
     tpa = TestPushArray{Int, 2}(a_orig)
     pushfirst!(tpa, 6, 5, 4, 3, 2)
     @test tpa.data == reverse(collect(1:6))
+
+    let src = code_typed1(push_slightly_abstract_namedtuple, (Vector{@NamedTuple{x::Int,y::Any}},Int,Any))
+        # After optimization, all `push!` and `convert` calls should have been inlined
+        @test all((x)->!iscall((src, push!))(x) && !iscall((src, convert))(x), src.code)
+    end
+end
+
+mutable struct SimpleArray{T} <: AbstractVector{T}
+    els::Vector{T}
+end
+Base.size(sa::SimpleArray) = size(sa.els)
+Base.getindex(sa::SimpleArray, idx...) = getindex(sa.els, idx...)
+Base.setindex!(sa::SimpleArray, v, idx...) = setindex!(sa.els, v, idx...)
+Base.resize!(sa::SimpleArray, n) = resize!(sa.els, n)
+Base.copy(sa::SimpleArray) = SimpleArray(copy(sa.els))
+
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
+@testset "Failing `$f` should not grow the array $a" for
+        f in (push!, append!, pushfirst!, prepend!),
+        a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+    for args in ((1,), (1,2), ([1], [2]), [1])
+        orig = copy(a)
+        @test_throws Exception f(a, args...)
+        @test a == orig
+    end
+end
+
+@testset "Check push!($a, $args...)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        push!(a, args...)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check append!($a, $args)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        append!(a, args)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check sizehint!($a)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+        @test sizehint!(a, 10) === a
 end
 
 @testset "splatting into hvcat" begin
@@ -1344,7 +1487,7 @@ end
 
     @test Int[t...; 3 4] == [1 2; 3 4]
     @test Int[0 t...; t... 0] == [0 1 2; 1 2 0]
-    @test_throws ArgumentError Int[t...; 3 4 5]
+    @test_throws DimensionMismatch Int[t...; 3 4 5]
 end
 
 @testset "issue #39896, modified getindex " begin
@@ -1398,15 +1541,15 @@ using Base: typed_hvncat
     @test [1;;] == fill(1, (1,1))
 
     for v in (1, fill(1), fill(1,1,1), fill(1, 1, 1, 1))
-        @test_throws ArgumentError [v; v;; v]
-        @test_throws ArgumentError [v; v;; v; v; v]
-        @test_throws ArgumentError [v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v]
+        @test_throws DimensionMismatch [v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v; v]
         # ensure a wrong shape with the right number of elements doesn't pass through
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v; v]
 
         @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
         @test [v; v;; v; v;;;] == fill(1, 2, 2, 1)
@@ -1474,7 +1617,7 @@ using Base: typed_hvncat
     end
 
     # reject shapes that don't nest evenly between levels (e.g. 1 + 2 does not fit into 2)
-    @test_throws ArgumentError hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
+    @test_throws DimensionMismatch hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
 
     # zero-length arrays are handled appropriately
     @test [zeros(Int, 1, 2, 0) ;;; 1 3] == [1 3;;;]
@@ -1489,18 +1632,18 @@ using Base: typed_hvncat
     for v1 ∈ (zeros(Int, 0, 0), zeros(Int, 0, 0, 0, 0), zeros(Int, 0, 0, 0, 0, 0, 0, 0))
         for v2 ∈ (1, [1])
             for v3 ∈ (2, [2])
-                @test_throws ArgumentError [v1 ;;; v2]
-                @test_throws ArgumentError [v1 ;;; v2 v3]
-                @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 ;;; v2]
+                @test_throws DimensionMismatch [v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
             end
         end
     end
     v1 = zeros(Int, 0, 0, 0)
     for v2 ∈ (1, [1])
         for v3 ∈ (2, [2])
-            @test_throws ArgumentError [v1 ;;; v2 v3]
-            @test_throws ArgumentError [v1 ;;; v2]
-            @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2]
+            @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
         end
     end
 
@@ -1568,8 +1711,8 @@ using Base: typed_hvncat
     @test Array{Int, 3}(undef, 0, 0, 0) == typed_hvncat(Int, 3) isa Array{Int, 3}
 
     # Issue 43933 - semicolon precedence mistake should produce an error
-    @test_throws ArgumentError [[1 1]; 2 ;; 3 ; [3 4]]
-    @test_throws ArgumentError [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
+    @test_throws DimensionMismatch [[1 1]; 2 ;; 3 ; [3 4]]
+    @test_throws DimensionMismatch [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
 
     @test [[1 2; 3 4] [5; 6]; [7 8] 9;;;] == [1 2 5; 3 4 6; 7 8 9;;;]
 
@@ -1684,6 +1827,9 @@ end
     @test_throws ArgumentError stack([1:3, 4:6]; dims=3)
     @test_throws ArgumentError stack(abs2, 1:3; dims=2)
 
+    @test stack(["hello", "world"]) isa Matrix{Char}
+    @test_throws DimensionMismatch stack(["hello", "world!"])  # had a bug in error printing
+
     # Empty
     @test_throws ArgumentError stack(())
     @test_throws ArgumentError stack([])
@@ -1739,50 +1885,71 @@ end
 end
 
 module IRUtils
-    include("compiler/irutils.jl")
+    include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+end
+
+function check_pointer_strides(A::AbstractArray)
+    # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
+    dims = ntuple(identity, ndims(A))
+    map(i -> stride(A, i), dims) == @inferred(strides(A)) || return false
+    # Test pointer via value check.
+    first(A) === Base.unsafe_load(pointer(A)) || return false
+    # Test strides via value check.
+    for i in eachindex(IndexLinear(), A)
+        A[i] === Base.unsafe_load(pointer(A, i)) || return false
+    end
+    return true
 end
 
-@testset "strides for ReshapedArray" begin
-    function check_strides(A::AbstractArray)
-        # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
-        dims = ntuple(identity, ndims(A))
-        map(i -> stride(A, i), dims) == @inferred(strides(A)) || return false
-        # Test strides via value check.
-        for i in eachindex(IndexLinear(), A)
-            A[i] === Base.unsafe_load(pointer(A, i)) || return false
+@testset "colonful `reshape`, #54245" begin
+    @test reshape([], (0, :)) isa Matrix
+    @test_throws DimensionMismatch reshape([7], (0, :))
+    let b = prevpow(2, typemax(Int))
+        @test iszero(b*b)
+        @test_throws ArgumentError reshape([7], (b, :, b))
+        @test reshape([], (b, :, b)) isa Array{<:Any, 3}
+    end
+    for iterator ∈ (7:6, 7:7, 7:8)
+        for it ∈ (iterator, map(BigInt, iterator))
+            @test reshape(it, (:, Int(length(it)))) isa AbstractMatrix
+            @test reshape(it, (Int(length(it)), :)) isa AbstractMatrix
+            @test reshape(it, (1, :))               isa AbstractMatrix
+            @test reshape(it, (:, 1))               isa AbstractMatrix
         end
-        return true
     end
+end
+
+@testset "strides for ReshapedArray" begin
     # Type-based contiguous Check
     a = vec(reinterpret(reshape, Int16, reshape(view(reinterpret(Int32, randn(10)), 2:11), 5, :)))
     f(a) = only(strides(a));
     @test IRUtils.fully_eliminated(f, Base.typesof(a)) && f(a) == 1
     # General contiguous check
     a = view(rand(10,10), 1:10, 1:10)
-    @test check_strides(vec(a))
+    @test check_pointer_strides(vec(a))
     b = view(parent(a), 1:9, 1:10)
     @test_throws "Input is not strided." strides(vec(b))
     # StridedVector parent
     for n in 1:3
         a = view(collect(1:60n), 1:n:60n)
-        @test check_strides(reshape(a, 3, 4, 5))
-        @test check_strides(reshape(a, 5, 6, 2))
+        @test check_pointer_strides(reshape(a, 3, 4, 5))
+        @test check_pointer_strides(reshape(a, 5, 6, 2))
         b = view(parent(a), 60n:-n:1)
-        @test check_strides(reshape(b, 3, 4, 5))
-        @test check_strides(reshape(b, 5, 6, 2))
+        @test check_pointer_strides(reshape(b, 3, 4, 5))
+        @test check_pointer_strides(reshape(b, 5, 6, 2))
     end
     # StridedVector like parent
     a = randn(10, 10, 10)
     b = view(a, 1:10, 1:1, 5:5)
-    @test check_strides(reshape(b, 2, 5))
+    @test check_pointer_strides(reshape(b, 2, 5))
     # Other StridedArray parent
     a = view(randn(10,10), 1:9, 1:10)
-    @test check_strides(reshape(a,3,3,2,5))
-    @test check_strides(reshape(a,3,3,5,2))
-    @test check_strides(reshape(a,9,5,2))
-    @test check_strides(reshape(a,3,3,10))
-    @test check_strides(reshape(a,1,3,1,3,1,5,1,2))
-    @test check_strides(reshape(a,3,3,5,1,1,2,1,1))
+    @test check_pointer_strides(reshape(a,3,3,2,5))
+    @test check_pointer_strides(reshape(a,3,3,5,2))
+    @test check_pointer_strides(reshape(a,9,5,2))
+    @test check_pointer_strides(reshape(a,3,3,10))
+    @test check_pointer_strides(reshape(a,1,3,1,3,1,5,1,2))
+    @test check_pointer_strides(reshape(a,3,3,5,1,1,2,1,1))
     @test_throws "Input is not strided." strides(reshape(a,3,6,5))
     @test_throws "Input is not strided." strides(reshape(a,3,2,3,5))
     @test_throws "Input is not strided." strides(reshape(a,3,5,3,2))
@@ -1795,7 +1962,14 @@ end
     @test @inferred(strides(a)) == (1, 1, 1)
     # Dense parent (but not StridedArray)
     A = reinterpret(Int8, reinterpret(reshape, Int16, rand(Int8, 2, 3, 3)))
-    @test check_strides(reshape(A, 3, 2, 3))
+    @test check_pointer_strides(reshape(A, 3, 2, 3))
+end
+
+@testset "pointer for SubArray with none-dense parent." begin
+    a = view(Matrix(reshape(0x01:0xc8, 20, :)), 1:2:20, :)
+    b = reshape(a, 20, :)
+    @test check_pointer_strides(view(b, 2:11, 1:5))
+    @test check_pointer_strides(view(b, reshape(2:11, 2, :), 1:5))
 end
 
 @testset "stride for 0 dims array #44087" begin
@@ -1825,13 +1999,17 @@ end
 
 @testset "type-based offset axes check" begin
     a = randn(ComplexF64, 10)
+    b = randn(ComplexF64, 4, 4, 4, 4)
     ta = reinterpret(Float64, a)
     tb = reinterpret(Float64, view(a, 1:2:10))
     tc = reinterpret(Float64, reshape(view(a, 1:3:10), 2, 2, 1))
+    td = view(b, :, :, 1, 1)
     # Issue #44040
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc))
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(tc, tc))
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc, tb))
+    # Issue #49332
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(td, td, td))
     # Ranges && CartesianIndices
     @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(1:10, Base.OneTo(10), 1.0:2.0, LinRange(1.0, 2.0, 2), 1:2:10, CartesianIndices((1:2:10, 1:2:10))))
     # Remind us to call `any` in `Base.has_offset_axes` once our compiler is ready.
@@ -1844,3 +2022,208 @@ end
 # type stable [x;;] (https://github.com/JuliaLang/julia/issues/45952)
 f45952(x) = [x;;]
 @inferred f45952(1.0)
+
+@testset "isassigned with a Bool index" begin
+    A = zeros(2,2)
+    @test_throws "invalid index: true of type Bool" isassigned(A, 1, true)
+    @test_throws "invalid index: true of type Bool" isassigned(A, true)
+end
+
+@testset "repeat for FillArrays" begin
+    f = FillArrays.Fill(3, (4,))
+    @test repeat(f, 2) === FillArrays.Fill(3, (8,))
+    @test repeat(f, 2, 3) === FillArrays.Fill(3, (8, 3))
+    @test repeat(f, inner=(1,2), outer=(3,1)) === repeat(f, 3, 2) === FillArrays.Fill(3, (12,2))
+    f = FillArrays.Fill(3, (4, 2))
+    @test repeat(f, 2, 3) === FillArrays.Fill(3, (8, 6))
+    @test repeat(f, 2, 3, 4) === FillArrays.Fill(3, (8, 6, 4))
+    @test repeat(f, inner=(1,2), outer=(3,1)) === FillArrays.Fill(3, (12, 4))
+end
+
+@testset "zero" begin
+    @test zero([1 2; 3 4]) isa Matrix{Int}
+    @test zero([1 2; 3 4]) == [0 0; 0 0]
+
+    @test zero([1.0]) isa Vector{Float64}
+    @test zero([1.0]) == [0.0]
+
+    @test zero([[2,2], [3,3,3]]) isa Vector{Vector{Int}}
+    @test zero([[2,2], [3,3,3]]) == [[0,0], [0, 0, 0]]
+
+
+    @test zero(Union{Float64, Missing}[missing]) == [0.0]
+    struct CustomNumber <: Number
+        val::Float64
+    end
+    Base.zero(::Type{CustomNumber}) = CustomNumber(0.0)
+    @test zero([CustomNumber(5.0)]) == [CustomNumber(0.0)]
+    @test zero(Union{CustomNumber, Missing}[missing]) == [CustomNumber(0.0)]
+    @test zero(Vector{Union{CustomNumber, Missing}}(undef, 1)) == [CustomNumber(0.0)]
+end
+
+@testset "`_prechecked_iterate` optimization" begin
+    function test_prechecked_iterate(iter)
+        Js = Base._prechecked_iterate(iter)
+        for I in iter
+            J, s = Js::NTuple{2,Any}
+            @test J === I
+            Js = Base._prechecked_iterate(iter, s)
+        end
+    end
+    test_prechecked_iterate(1:10)
+    test_prechecked_iterate(Base.OneTo(10))
+    test_prechecked_iterate(CartesianIndices((3, 3)))
+    test_prechecked_iterate(CartesianIndices(()))
+    test_prechecked_iterate(LinearIndices((3, 3)))
+    test_prechecked_iterate(LinearIndices(()))
+    test_prechecked_iterate(Base.SCartesianIndices2{3}(1:3))
+end
+
+@testset "IndexStyles in copyto!" begin
+    A = rand(3,2)
+    B = zeros(size(A))
+    colons = ntuple(_->:, ndims(B))
+    # Ensure that the AbstractArray methods are hit
+    # by using views instead of Arrays
+    @testset "IndexLinear - IndexLinear" begin
+        B .= 0
+        copyto!(view(B, colons...), A)
+        @test B == A
+    end
+    @testset "IndexLinear - IndexCartesian" begin
+        B .= 0
+        copyto!(view(B, colons...), view(A, axes(A)...))
+        @test B == A
+    end
+    @testset "IndexCartesian - IndexLinear" begin
+        B .= 0
+        copyto!(view(B, axes(B)...), A)
+        @test B == A
+    end
+    @testset "IndexCartesian - IndexCartesian" begin
+        B .= 0
+        copyto!(view(B, axes(B)...), view(A, axes(A)...))
+        @test B == A
+    end
+end
+
+@testset "reshape for offset arrays" begin
+    p = Base.IdentityUnitRange(3:4)
+    r = reshape(p, :, 1)
+    @test r[eachindex(r)] == UnitRange(p)
+    @test collect(r) == r
+
+    struct ZeroBasedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+        a :: A
+        function ZeroBasedArray(a::AbstractArray)
+            Base.require_one_based_indexing(a)
+            new{eltype(a), ndims(a), typeof(a)}(a)
+        end
+    end
+    Base.parent(z::ZeroBasedArray) = z.a
+    Base.size(z::ZeroBasedArray) = size(parent(z))
+    Base.axes(z::ZeroBasedArray) = map(x -> Base.IdentityUnitRange(0:x - 1), size(parent(z)))
+    Base.getindex(z::ZeroBasedArray{<:Any, N}, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...]
+    Base.setindex!(z::ZeroBasedArray{<:Any, N}, val, i::Vararg{Int,N}) where {N} = parent(z)[map(x -> x + 1, i)...] = val
+
+    z = ZeroBasedArray(collect(1:4))
+    r2 = reshape(z, :, 1)
+    @test r2[CartesianIndices(r2)] == r2[LinearIndices(r2)]
+    r2[firstindex(r2)] = 34
+    @test z[0] == 34
+    r2[eachindex(r2)] = r2 .* 2
+    for (i, j) in zip(eachindex(r2), eachindex(z))
+        @test r2[i] == z[j]
+    end
+end
+
+@testset "zero for arbitrary axes" begin
+    r = SizedArrays.SOneTo(2)
+    s = Base.OneTo(2)
+    _to_oneto(x::Integer) = Base.OneTo(2)
+    _to_oneto(x::Union{Base.OneTo, SizedArrays.SOneTo}) = x
+    for (f, v) in ((zeros, 0), (ones, 1), ((x...)->fill(3,x...),3))
+        for ax in ((r,r), (s, r), (2, r))
+            A = f(ax...)
+            @test axes(A) == map(_to_oneto, ax)
+            if all(x -> x isa SizedArrays.SOneTo, ax)
+                @test A isa SizedArrays.SizedArray && parent(A) isa Array
+            else
+                @test A isa Array
+            end
+            @test all(==(v), A)
+        end
+    end
+end
+
+@testset "one" begin
+    @test one([1 2; 3 4]) == [1 0; 0 1]
+    @test one([1 2; 3 4]) isa Matrix{Int}
+
+    struct Mat <: AbstractMatrix{Int}
+        p::Matrix{Int}
+    end
+    Base.size(m::Mat) = size(m.p)
+    Base.IndexStyle(::Type{<:Mat}) = IndexLinear()
+    Base.getindex(m::Mat, i::Int) = m.p[i]
+    Base.setindex!(m::Mat, v, i::Int) = m.p[i] = v
+    Base.similar(::Mat, ::Type{Int}, size::NTuple{2,Int}) = Mat(Matrix{Int}(undef, size))
+
+    @test one(Mat([1 2; 3 4])) == Mat([1 0; 0 1])
+    @test one(Mat([1 2; 3 4])) isa Mat
+end
+
+@testset "copyto! with non-AbstractArray src" begin
+    A = zeros(4)
+    x = (i for i in axes(A,1))
+    copyto!(A, 1, x, 1, length(A))
+    @test A == axes(A,1)
+    A .= 0
+    copyto!(A, 1, x, 1, 2)
+    @test A[1:2] == first(x,2)
+    @test iszero(A[3:end])
+    A .= 0
+    copyto!(A, 1, x, 1)
+    @test A == axes(A,1)
+end
+
+@testset "reshape with Integer sizes" begin
+    @test reshape(1:4, big(2), big(2)) == reshape(1:4, 2, 2)
+    a = [1 2 3; 4 5 6]
+    reshaped_arrays = (
+        reshape(a, 3, 2),
+        reshape(a, (3, 2)),
+        reshape(a, big(3), big(2)),
+        reshape(a, (big(3), big(2))),
+        reshape(a, :, big(2)),
+        reshape(a, (:, big(2))),
+        reshape(a, big(3), :),
+        reshape(a, (big(3), :)),
+    )
+    @test allequal(reshaped_arrays)
+    for b ∈ reshaped_arrays
+        @test b isa Matrix{Int}
+        @test b.ref === a.ref
+    end
+end
+@testset "AbstractArrayMath" begin
+    @testset "IsReal" begin
+        A = [1, 2, 3, 4]
+        @test isreal(A) == true
+        B = [1.1, 2.2, 3.3, 4.4]
+        @test isreal(B) == true
+        C = [1, 2.2, 3]
+        @test isreal(C) == true
+        D = Real[]
+        @test isreal(D) == true
+        E = [1 + 1im, 2 - 2im]
+        @test isreal(E) == false
+        struct MyReal <: Real
+            value::Float64
+        end
+        F = [MyReal(1.0), MyReal(2.0)]
+        @test isreal(F) == true
+        G = ["a", "b", "c"]
+        @test_throws MethodError isreal(G)
+    end
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 5056fc626e84a..43ec1aab0557d 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -75,7 +75,7 @@ let io = IOBuffer()
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))
-    @test_throws(ErrorException("no unique matching method found for the specified argument types"),
+    @test_throws("Calling invoke(f, t, args...) would throw:\nMethodError: no method matching ambig",
                  which(ambig, (UInt8, Int)))
     @test length(code_typed(ambig, (UInt8, Int))) == 0
 end
@@ -97,10 +97,7 @@ ambig(x::Union{Char, Int16}) = 's'
 
 # Automatic detection of ambiguities
 
-const allowed_undefineds = Set([
-    GlobalRef(Base, :active_repl),
-    GlobalRef(Base, :active_repl_backend),
-])
+const allowed_undefineds = Set([])
 
 let Distributed = get(Base.loaded_modules,
                       Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
@@ -165,6 +162,22 @@ end
 ambs = detect_ambiguities(Ambig48312)
 @test length(ambs) == 4
 
+module UnboundAmbig55868
+    module B
+        struct C end
+        export C
+        Base.@deprecate_binding D C
+    end
+    using .B
+    export C, D
+end
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+@test isempty(detect_unbound_args(UnboundAmbig55868))
+@test isempty(detect_ambiguities(UnboundAmbig55868))
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
 @testset "detect_ambiguities" begin
@@ -179,8 +192,7 @@ ambs = detect_ambiguities(Ambig48312)
 
     # some ambiguities involving Union{} type parameters may be expected, but not required
     let ambig = Set(detect_ambiguities(Core; recursive=true, ambiguous_bottom=true))
-        @test !isempty(ambig)
-        @test length(ambig) < 30
+        @test isempty(ambig)
     end
 
     STDLIB_DIR = Sys.STDLIB
@@ -288,6 +300,30 @@ for f in (Ambig8.f, Ambig8.g)
     @test f(Int8(0)) == 4
     @test_throws MethodError f(0)
     @test_throws MethodError f(pi)
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
 end
 
 module Ambig9
@@ -308,23 +344,18 @@ end
 @test length(detect_unbound_args(M25341; recursive=true)) == 1
 
 # Test that Core and Base are free of UndefVarErrors
-# not using isempty so this prints more information when it fails
 @testset "detect_unbound_args in Base and Core" begin
     # TODO: review this list and remove everything between test_broken and test
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Core; recursive=true))
-        pop!(need_to_handle_undef_sparam, which(Core.Compiler.eltype, Tuple{Type{Tuple{Any}}}))
-        @test_broken need_to_handle_undef_sparam == Set()
-        pop!(need_to_handle_undef_sparam, which(Core.Compiler._cat, Tuple{Any, AbstractArray}))
-        pop!(need_to_handle_undef_sparam, first(methods(Core.Compiler.same_names)))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
     let need_to_handle_undef_sparam =
             Set{Method}(detect_unbound_args(Base; recursive=true, allowed_undefineds))
         pop!(need_to_handle_undef_sparam, which(Base._totuple, (Type{Tuple{Vararg{E}}} where E, Any, Any)))
         pop!(need_to_handle_undef_sparam, which(Base.eltype, Tuple{Type{Tuple{Any}}}))
         pop!(need_to_handle_undef_sparam, first(methods(Base.same_names)))
-        @test_broken need_to_handle_undef_sparam == Set()
+        @test_broken isempty(need_to_handle_undef_sparam)
         pop!(need_to_handle_undef_sparam, which(Base._cat, Tuple{Any, AbstractArray}))
         pop!(need_to_handle_undef_sparam, which(Base.byteenv, (Union{AbstractArray{Pair{T,V}, 1}, Tuple{Vararg{Pair{T,V}}}} where {T<:AbstractString,V},)))
         pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{AbstractArray{Union{Missing, T},N} where {T, N}}))
@@ -333,7 +364,7 @@ end
         pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T}))
-        @test need_to_handle_undef_sparam == Set()
+        @test isempty(need_to_handle_undef_sparam)
     end
 end
 
@@ -427,4 +458,20 @@ cc46601(::Type{T}, x::Int) where {T<:AbstractString} = 7
 @test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
 @test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 7
 
+# Issue #55231
+struct U55231{P} end
+struct V55231{P} end
+U55231(::V55231) = nothing
+(::Type{T})(::V55231) where {T<:U55231} = nothing
+@test length(methods(U55231)) == 2
+U55231(a, b) = nothing
+@test length(methods(U55231)) == 3
+struct S55231{P} end
+struct T55231{P} end
+(::Type{T})(::T55231) where {T<:S55231} = nothing
+S55231(::T55231) = nothing
+@test length(methods(S55231)) == 2
+S55231(a, b) = nothing
+@test length(methods(S55231)) == 3
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 770cec3705038..655e14675bfb4 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -115,7 +115,10 @@ end
         @test convert(Array{Int,1}, r) == [2,3,4]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,4]
-        @test Base.unsafe_convert(Ptr{Int}, r) == Base.unsafe_convert(Ptr{Int}, s)
+        let rc = Base.cconvert(Ptr{Int}, r), rs = Base.cconvert(Ptr{Int}, s)
+            @test rc == rs
+            @test Base.unsafe_convert(Ptr{Int}, rc) == Base.unsafe_convert(Ptr{Int}, rs)
+        end
         @test isa(r, StridedArray)  # issue #22411
     end
     @testset "linearslow" begin
@@ -131,6 +134,7 @@ end
         @test convert(Array{Int,1}, r) == [2,3,5]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,5]
+        # @test_throws ErrorException Base.cconvert(Ptr{Int}, r) broken=true
         @test_throws ErrorException Base.unsafe_convert(Ptr{Int}, r)
         r[2] = -1
         @test a[3] == -1
@@ -303,6 +307,41 @@ end
     @test_throws ArgumentError dropdims(a, dims=3)
     @test_throws ArgumentError dropdims(a, dims=4)
     @test_throws ArgumentError dropdims(a, dims=6)
+    @testset "insertdims" begin
+        a = rand(8, 7)
+        @test @inferred(insertdims(a, dims=1)) == @inferred(insertdims(a, dims=(1,))) == reshape(a, (1, 8, 7))
+        @test @inferred(insertdims(a, dims=3))  == @inferred(insertdims(a, dims=(3,))) == reshape(a, (8, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3)))  == reshape(a, (1, 8, 1, 7))
+        @test @inferred(insertdims(a, dims=(1, 2, 3)))  == reshape(a, (1, 1, 1, 8, 7))
+        @test @inferred(insertdims(a, dims=(1, 4)))  == reshape(a, (1, 8, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 5)))  == reshape(a, (1, 8, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 2, 4, 6)))  == reshape(a, (1, 1, 8, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 4, 6)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 4, 6, 3)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+        @test @inferred(insertdims(a, dims=(1, 3, 5, 6)))  == reshape(a, (1, 8, 1, 7, 1, 1))
+        @test_throws ArgumentError insertdims(a, dims=(1, 1, 2, 3))
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 2, 3))
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 3, 3))
+        @test_throws UndefKeywordError insertdims(a)
+        @test_throws ArgumentError insertdims(a, dims=0)
+        @test_throws ArgumentError insertdims(a, dims=(1, 2, 1))
+        @test_throws ArgumentError insertdims(a, dims=4)
+        @test_throws ArgumentError insertdims(a, dims=6)
+        A = reshape(1:6, 2, 3)
+        @test_throws ArgumentError insertdims(A, dims=(2, 2))
+        D = insertdims(A, dims=())
+        @test size(D) == size(A)
+        @test D == A
+        E = ones(2, 3, 4)
+        F = insertdims(E, dims=(2, 4, 6))
+        @test size(F) == (2, 1, 3, 1, 4, 1)
+        # insertdims and dropdims are inverses
+        b = rand(1,1,1,5,1,1,7)
+        for dims in [1, (1,), 2, (2,), 3, (3,), (1,3), (1,2,3), (1,2), (1,3,5), (1,2,5,6), (1,3,5,6), (1,3,5,6), (1,6,5,3)]
+            @test dropdims(insertdims(a; dims); dims) == a
+            @test insertdims(dropdims(b; dims); dims) == b
+        end
+    end
 
     sz = (5,8,7)
     A = reshape(1:prod(sz),sz...)
@@ -463,6 +502,11 @@ end
         @test vc == [v[1:(i-1)]; 5; v[i:end]]
     end
     @test_throws BoundsError insert!(v, 5, 5)
+
+    # test that data is copied when there is plenty of room to do so
+    v = empty!(collect(1:100))
+    pushfirst!(v, 1)
+    @test length(v.ref.mem) == 100
 end
 
 @testset "popat!(::Vector, i, [default])" begin
@@ -558,32 +602,32 @@ end
     @test findall(!, m) == [k for (k,v) in pairs(m) if !v]
     @test findfirst(!iszero, a) == 2
     @test findfirst(a.==0) == 1
-    @test findfirst(a.==5) == nothing
+    @test findfirst(a.==5) === nothing
     @test findfirst(Dict(1=>false, 2=>true)) == 2
-    @test findfirst(Dict(1=>false)) == nothing
+    @test findfirst(Dict(1=>false)) === nothing
     @test findfirst(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findfirst(!isequal(1), [1,2,4,1,2,3,4]) == 2
     @test findfirst(isodd, [2,4,6,3,9,2,0]) == 4
-    @test findfirst(isodd, [2,4,6,2,0]) == nothing
+    @test findfirst(isodd, [2,4,6,2,0]) === nothing
     @test findnext(!iszero,a,4) == 4
     @test findnext(!iszero,a,5) == 6
     @test findnext(!iszero,a,1) == 2
     @test findnext(isequal(1),a,4) == 6
-    @test findnext(isequal(5),a,4) == nothing
+    @test findnext(isequal(5),a,4) === nothing
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
-    @test findlast(a.==5) == nothing
-    @test findlast(false) == nothing # test non-AbstractArray findlast
+    @test findlast(a.==5) === nothing
+    @test findlast(false) === nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
-    @test findlast(isodd, [2,4,6,2,0]) == nothing
+    @test findlast(isodd, [2,4,6,2,0]) === nothing
     @test findprev(!iszero,a,4) == 4
     @test findprev(!iszero,a,5) == 4
-    @test findprev(!iszero,a,1) == nothing
+    @test findprev(!iszero,a,1) === nothing
     @test findprev(isequal(1),a,4) == 2
     @test findprev(isequal(1),a,8) == 6
     @test findprev(isodd, [2,4,5,3,9,2,0], 7) == 5
-    @test findprev(isodd, [2,4,5,3,9,2,0], 2) == nothing
+    @test findprev(isodd, [2,4,5,3,9,2,0], 2) === nothing
     @test findfirst(isequal(0x00), [0x01, 0x00]) == 2
     @test findlast(isequal(0x00), [0x01, 0x00]) == 2
     @test findnext(isequal(0x00), [0x00, 0x01, 0x00], 2) == 3
@@ -604,6 +648,15 @@ end
     @testset "issue 43078" begin
         @test_throws TypeError findall([1])
     end
+
+    @testset "issue #46425" begin
+        counter = 0
+        function pred46425(x)
+            counter += 1
+            counter < 4 && x
+        end
+        @test findall(pred46425, [false, false, true, true]) == [3]
+    end
 end
 @testset "find with Matrix" begin
     A = [1 2 0; 3 4 0]
@@ -728,6 +781,9 @@ end
     v = [1,2,3]
     @test permutedims(v) == [1 2 3]
 
+    zd = fill(0)
+    @test permutedims(zd, ()) == zd
+
     x = PermutedDimsArray([1 2; 3 4], (2, 1))
     @test size(x) == (2, 2)
     @test copy(x) == [1 3; 2 4]
@@ -781,6 +837,14 @@ end
     oa = OffsetVector(copy(a), -1)
     @test circshift!(oa, 1) === oa
     @test oa == circshift(OffsetVector(a, -1), 1)
+
+    # 1d circshift! (#53554)
+    a = []
+    @test circshift!(a, 1) === a
+    @test circshift!(a, 1) == []
+    a = [1:5;]
+    @test circshift!(a, 10) === a
+    @test circshift!(a, 10) == 1:5
 end
 
 @testset "circcopy" begin
@@ -1239,6 +1303,10 @@ end
     @test @inferred(mapslices(hcat, [1 2; 3 4], dims=1)) == [1 2; 3 4] # previously an error, now allowed
     @test mapslices(identity, [1 2; 3 4], dims=(2,2)) == [1 2; 3 4] # previously an error
     @test_broken @inferred(mapslices(identity, [1 2; 3 4], dims=(2,2))) == [1 2; 3 4]
+
+    # type inference in mapslices
+    a_ = @inferred (a -> mapslices(identity, reshape(a, size(a)..., 1, 1), dims=(3,4)))(a)
+    @test a_ == reshape(a, size(a)..., 1, 1)
 end
 
 @testset "single multidimensional index" begin
@@ -1347,6 +1415,14 @@ end
 end
 
 @testset "lexicographic comparison" begin
+    @testset "zero-dimensional" begin
+        vals = (0, 0.0, 1, 1.0)
+        for l ∈ vals
+            for r ∈ vals
+                @test cmp(fill(l), fill(r)) == cmp(l, r)
+            end
+        end
+    end
     @test cmp([1.0], [1]) == 0
     @test cmp([1], [1.0]) == 0
     @test cmp([1, 1], [1, 1]) == 0
@@ -1433,6 +1509,15 @@ end
     @test sortslices(B, dims=(1,3)) == B
 end
 
+@testset "sortslices inference (#52019)" begin
+    x = rand(3, 2)
+    @inferred sortslices(x, dims=1)
+    @inferred sortslices(x, dims=(2,))
+    x = rand(1, 2, 3)
+    @inferred sortslices(x, dims=(1,2))
+    @inferred sortslices(x, dims=3, by=sum)
+end
+
 @testset "fill" begin
     @test fill!(Float64[1.0], -0.0)[1] === -0.0
     A = fill(1.,3,3)
@@ -1773,6 +1858,32 @@ end
     # offset array
     @test append!([1,2], OffsetArray([9,8], (-3,))) == [1,2,9,8]
     @test prepend!([1,2], OffsetArray([9,8], (-3,))) == [9,8,1,2]
+
+    # Error recovery
+    A = [1, 2]
+    @test_throws MethodError append!(A, [1, 2, "hi"])
+    @test A == [1, 2, 1, 2]
+
+    oA = OffsetVector(A, 0:3)
+    @test_throws InexactError append!(oA, [1, 2, 3.01])
+    @test oA == OffsetVector([1, 2, 1, 2, 1, 2], 0:5)
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1]))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws MethodError prepend!(A, [1, 2, "hi"])
+    @test A == [2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1]))
+    @test A == [2, 1, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [2, 1, 1, 2]
 end
 
 let A = [1,2]
@@ -2013,6 +2124,7 @@ end
 
     I1 = CartesianIndex((2,3,0))
     I2 = CartesianIndex((-1,5,2))
+    @test +I1 == I1
     @test -I1 == CartesianIndex((-2,-3,0))
     @test I1 + I2 == CartesianIndex((1,8,2))
     @test I2 + I1 == CartesianIndex((1,8,2))
@@ -2115,6 +2227,8 @@ R = CartesianIndices((3,0))
     @test @inferred(eachindex(Base.IndexLinear(), a, b)) == 1:4
     @test @inferred(eachindex(a, b)) == CartesianIndices((2,2))
     @test @inferred(eachindex(a, a)) == 1:4
+    @test @inferred(eachindex(a, a, a)) == 1:4
+    @test @inferred(eachindex(a, a, b)) == CartesianIndices((2,2))
     @test_throws DimensionMismatch eachindex(a, rand(3,3))
     @test_throws DimensionMismatch eachindex(b, rand(3,3))
 end
@@ -2559,26 +2673,32 @@ end
 end
 
 @testset "sign, conj[!], ~" begin
-    local A, B, C
-    A = [-10,0,3]
-    B = [-10.0,0.0,3.0]
-    C = [1,im,0]
-
-    @test sign.(A) == [-1,0,1]
-    @test sign.(B) == [-1,0,1]
-    @test typeof(sign.(A)) == Vector{Int}
-    @test typeof(sign.(B)) == Vector{Float64}
-
-    @test conj(A) == A
-    @test conj!(copy(A)) == A
-    @test conj(B) == A
-    @test conj(C) == [1,-im,0]
-    @test typeof(conj(A)) == Vector{Int}
-    @test typeof(conj(B)) == Vector{Float64}
-    @test typeof(conj(C)) == Vector{Complex{Int}}
-
-    @test .~A == [9,-1,-4]
-    @test typeof(.~A) == Vector{Int}
+    let A, B, C, D, E # Suppress :latestworld to get good inference for the allocations test
+        A = [-10,0,3]
+        B = [-10.0,0.0,3.0]
+        C = [1,im,0]
+
+        @test sign.(A) == [-1,0,1]
+        @test sign.(B) == [-1,0,1]
+        @test typeof(sign.(A)) == Vector{Int}
+        @test typeof(sign.(B)) == Vector{Float64}
+
+        @test conj(A) == A
+        @test conj!(copy(A)) == A
+        @test conj(B) == A
+        @test conj(C) == [1,-im,0]
+        @test typeof(conj(A)) == Vector{Int}
+        @test typeof(conj(B)) == Vector{Float64}
+        @test typeof(conj(C)) == Vector{Complex{Int}}
+        D = [C copy(C); copy(C) copy(C)]
+        @test conj(D) == conj!(copy(D))
+        E = [D, copy(D)]
+        @test conj(E) == conj!(copy(E))
+        @test (@allocations conj!(E)) == 0
+
+        @test .~A == [9,-1,-4]
+        @test typeof(.~A) == Vector{Int}
+    end
 end
 
 # @inbounds is expression-like, returning its value; #15558
@@ -2715,7 +2835,7 @@ end
 end
 
 @testset "accumulate, accumulate!" begin
-    @test accumulate(+, [1,2,3]) == [1, 3, 6]
+    @test accumulate(+, [1, 2, 3]) == [1, 3, 6]
     @test accumulate(min, [1 2; 3 4], dims=1) == [1 2; 1 2]
     @test accumulate(max, [1 2; 3 0], dims=2) == [1 2; 3 3]
     @test accumulate(+, Bool[]) == Int[]
@@ -2732,12 +2852,15 @@ end
     @test accumulate(min, [1 0; 0 1], dims=1) == [1 0; 0 0]
     @test accumulate(min, [1 0; 0 1], dims=2) == [1 0; 0 0]
 
+    @test accumulate(+, [1, 2, 3], dims=1, init=1) == [2, 4, 7]
+    @test accumulate(*, [1, 4, 2], dims=1, init=2) == [2, 8, 16]
+
     @test accumulate(min, [3 2 1; 3 2 1], dims=2) == [3 2 1; 3 2 1]
     @test accumulate(min, [3 2 1; 3 2 1], dims=2, init=2) == [2 2 1; 2 2 1]
 
     @test isa(accumulate(+, Int[]), Vector{Int})
     @test isa(accumulate(+, Int[]; init=1.), Vector{Float64})
-    @test accumulate(+, [1,2]; init=1) == [2, 4]
+    @test accumulate(+, [1, 2]; init=1) == [2, 4]
     arr = randn(4)
     @test accumulate(*, arr; init=1) ≈ accumulate(*, arr)
 
@@ -2781,7 +2904,7 @@ end
 
     # asymmetric operation
     op(x,y) = 2x+y
-    @test accumulate(op, [10,20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
+    @test accumulate(op, [10, 20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
     @test accumulate(op, [10 20 30], dims=2) == [10 op(10, 20) op(op(10, 20), 30)] == [10 40 110]
 
     #25506
@@ -2790,6 +2913,33 @@ end
     @inferred accumulate(*, String[])
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=1) == ["a" "b"; "ac" "bd"]
     @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=2) == ["a" "ab"; "c" "cd"]
+
+    # #53438
+    v = [(1, 2), (3, 4)]
+    @test_throws MethodError accumulate(+, v)
+    @test_throws MethodError cumsum(v)
+    @test_throws MethodError cumprod(v)
+    @test_throws MethodError accumulate(+, v; init=(0, 0))
+    @test_throws MethodError accumulate(+, v; dims=1, init=(0, 0))
+
+    # Some checks to ensure we're identifying the widest needed eltype
+    # as identified in PR 53461
+    @testset "Base._accumulate_promote_op" begin
+        # A somewhat contrived example where each call to `foo`
+        # will return a different type
+        foo(x::Bool, y::Int)::Int = x + y
+        foo(x::Int, y::Int)::Float64 = x + y
+        foo(x::Float64, y::Int)::ComplexF64 = x + y * im
+        foo(x::ComplexF64, y::Int)::String = string(x, "+", y)
+
+        v = collect(1:5)
+        @test Base._accumulate_promote_op(foo, v; init=true) === Base._accumulate_promote_op(foo, v) == Union{Float64, String, ComplexF64}
+        @test Base._accumulate_promote_op(/, v) === Base._accumulate_promote_op(/, v; init=0) == Float64
+        @test Base._accumulate_promote_op(+, v) === Base._accumulate_promote_op(+, v; init=0) === Int
+        @test Base._accumulate_promote_op(+, v; init=0.0) === Float64
+        @test Base._accumulate_promote_op(+, Union{Int, Missing}[v...]) === Union{Int, Missing}
+        @test Base._accumulate_promote_op(+, Union{Int, Nothing}[v...]) === Union{Int, Nothing}
+    end
 end
 
 struct F21666{T <: Base.ArithmeticStyle}
@@ -3117,3 +3267,37 @@ end
         @test c + zero(c) == c
     end
 end
+
+@testset "Wrapping Memory into Arrays" begin
+    mem = Memory{Int}(undef, 10) .= 1
+    memref = memoryref(mem)
+    @test_throws DimensionMismatch Base.wrap(Array, mem, (10, 10))
+    @test Base.wrap(Array, mem, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, mem, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref, 10) == ones(Int, 10)
+    @test Base.wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
+
+    memref2 = memoryref(mem, 3)
+    @test Base.wrap(Array, memref2, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, memref2, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 9)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 10)
+end
+
+@testset "Memory size" begin
+    len = 5
+    mem = Memory{Int}(undef, len)
+    @test size(mem, 1) == len
+    @test size(mem, 0x1) == len
+    @test size(mem, 2) == 1
+    @test size(mem, 0x2) == 1
+end
+
+@testset "MemoryRef" begin
+    mem = Memory{Float32}(undef, 3)
+    ref = memoryref(mem, 2)
+    @test parent(ref) === mem
+end
diff --git a/test/atexit.jl b/test/atexit.jl
index 64b56e32466df..08a8e0c4b46a2 100644
--- a/test/atexit.jl
+++ b/test/atexit.jl
@@ -214,12 +214,13 @@ using Test
             # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             # 3. attempting to register a hook after all hooks have finished (disallowed)
             """
-            const atexit_has_finished = Threads.Atomic{Bool}(false)
+            const atexit_has_finished = Threads.Atomic{Int}(0)
             atexit() do
                 Threads.@spawn begin
                     # Block until the atexit hooks have all finished. We use a manual "spin
                     # lock" because task switch is disallowed inside the finalizer, below.
-                    while !atexit_has_finished[] end
+                    atexit_has_finished[] = 1
+                    while atexit_has_finished[] == 1; GC.safepoint(); end
                     try
                         # By the time this runs, all the atexit hooks will be done.
                         # So this will throw.
@@ -231,15 +232,16 @@ using Test
                         exit(22)
                     end
                 end
+                while atexit_has_finished[] == 0; GC.safepoint(); end
             end
             # Finalizers run after the atexit hooks, so this blocks exit until the spawned
             # task above gets a chance to run.
             x = []
             finalizer(x) do x
                 # Allow the spawned task to finish
-                atexit_has_finished[] = true
+                atexit_has_finished[] = 2
                 # Then spin forever to prevent exit.
-                while atexit_has_finished[] end
+                while atexit_has_finished[] == 2; GC.safepoint(); end
             end
             exit(0)
             """ => 22,
diff --git a/test/atomics.jl b/test/atomics.jl
index dd50fb96be49f..adfe4c87138cd 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -44,6 +44,13 @@ copy(r::Union{Refxy,ARefxy}) = typeof(r)(r.x, r.y)
 function add(x::T, y)::T where {T}; x + y; end
 swap(x, y) = y
 
+struct UndefComplex{T}
+    re::T
+    im::T
+    UndefComplex{T}() where {T} = new{T}()
+end
+Base.convert(T::Type{<:UndefComplex}, S) = T()
+
 let T1 = Refxy{NTuple{3,UInt8}},
     T2 = ARefxy{NTuple{3,UInt8}}
     @test sizeof(T1) == 6
@@ -60,10 +67,13 @@ end
 
 # check that very large types are getting locks
 let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40))
-    ar = ARefxy(x, y)
     r = Refxy(x, y)
+    ar = ARefxy(x, y)
+    mr = AtomicMemory{Pair{typeof(x),typeof(y)}}(undef, 20)
     @test 64 == sizeof(r) < sizeof(ar)
-    @test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1))
+    @test sizeof(ar) == sizeof(r) + Int(fieldoffset(typeof(ar), 1))
+    @test_broken Base.elsize(mr) == sizeof(ar)
+    @test sizeof(mr) == length(mr) * (sizeof(r) + 16)
 end
 
 struct PadIntA <: Number # internal padding
@@ -81,12 +91,15 @@ primitive type Int24 <: Signed 24 end # integral padding
 Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
 Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
 Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
+Base.:(==)(x::PadIntA, b::Int) = x == PadIntA(b)
 Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
 Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
 Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
 Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")")
 Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")")
 
+## Fields
+
 @noinline function _test_field_operators(r)
     r = r[]
     TT = fieldtype(typeof(r), :x)
@@ -116,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
 test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
 test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
 test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
 #FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
@@ -264,6 +278,26 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
     @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
     @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be written non-atomically") setfieldonce!(r, :x, x)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be written non-atomically") setfieldonce!(r, :x, y, :not_atomic, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x,  x, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("setfieldonce!: atomic field cannot be accessed non-atomically") setfieldonce!(r, :x, x, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfieldonce!(r, :x, x, :not_atomic, :sequentially_consistent)
+    @test setfieldonce!(r, :x, y, :sequentially_consistent, :sequentially_consistent) === false
+    @test setfieldonce!(r, :x, y, :sequentially_consistent, :sequentially_consistent) === false
+    @test setfieldonce!(r, :x, x, :sequentially_consistent) === false
     nothing
 end
 @noinline function test_field_orderings(r, x, y)
@@ -284,15 +318,11 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
 test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
 test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
 test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
+test_field_orderings(Complex{Rational{Integer}}(10, 30), Complex{Rational{Integer}}(20, 40))
 test_field_orderings(10.0, 20.0)
 test_field_orderings(NaN, Inf)
 
-struct UndefComplex{T}
-    re::T
-    im::T
-    UndefComplex{T}() where {T} = new{T}()
-end
-Base.convert(T::Type{<:UndefComplex}, S) = T()
 @noinline function _test_field_undef(r)
     r = r[]
     TT = fieldtype(typeof(r), :x)
@@ -318,6 +348,29 @@ test_field_undef(ARefxy{Union{Nothing,Integer}})
 test_field_undef(ARefxy{UndefComplex{Any}})
 test_field_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
 
+@noinline function _test_once_undef(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError getfield(r, :x)
+    @test setfieldonce!(r, :x, x, :sequentially_consistent) === true
+    @test getfield(r, :x, :sequentially_consistent) === x
+    @test setfieldonce!(r, :x, convert(TT, 12345_20), :sequentially_consistent) === false
+    nothing
+end
+
+@noinline function test_once_undef(TT)
+    _test_once_undef(Ref(TT()))
+    _test_once_undef(Ref{Any}(TT()))
+    nothing
+end
+
+test_once_undef(ARefxy{BigInt})
+test_once_undef(ARefxy{Any})
+test_once_undef(ARefxy{Union{Nothing,Integer}})
+test_once_undef(ARefxy{UndefComplex{Any}})
+test_once_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
+
 @test_throws ErrorException @macroexpand @atomic foo()
 @test_throws ErrorException @macroexpand @atomic foo += bar
 @test_throws ErrorException @macroexpand @atomic foo += bar
@@ -374,6 +427,99 @@ let a = ARefxy(1, -1)
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
 end
 
+function _test_atomic_get_set_swap_modify(T, x, y, z)
+    @testset "atomic get,set,swap,modify" begin
+        mem = AtomicMemory{T}(undef, 2)
+        @test_throws CanonicalIndexError mem[1] = 3
+
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), x, 1) == x
+        @test mem[1] == x
+        @test Base.setindex_atomic!(mem, Base.default_access_order(mem), y, 2) == y
+        @test mem[2] == y
+
+        idx = UInt32(2)
+
+        @test (@atomic mem[1]) == x
+        @test (@atomic mem[idx]) == y
+
+        (old, new) = (mem[idx], z)
+        # old and new are intentionally of different types to test inner conversion
+        @test (@atomic mem[idx] = new) == new
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        @test (@atomicswap mem[idx] = new) == old
+        @test mem[idx] == new
+        @atomic mem[idx] = old
+
+        try
+            old + new
+            @test (@atomic mem[idx] += new) == old + new
+            @test mem[idx] == old + new
+            @atomic mem[idx] = old
+        catch err
+            if !(err isa MethodError)
+                rethrow(err)
+            end
+        end
+    end
+end
+
+function _test_atomic_setonce_replace(T, initial, desired)
+    @testset "atomic setonce,replace" begin
+        mem = AtomicMemory{T}(undef, 2)
+        if isassigned(mem, 2)
+            @test (@atomiconce mem[2] = initial) == false
+            @atomic mem[2] = initial
+        else
+            @test (@atomiconce mem[2] = initial) == true
+            @test mem[2] == initial
+            @test (@atomiconce mem[2] = desired) == false
+            @test mem[2] == initial
+            @test !isassigned(mem, 1)
+        end
+
+        idx = UInt(2)
+
+        expected = @atomic mem[idx]
+        @test (@atomicreplace mem[idx] expected => desired) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] desired => desired) == (old=expected, success=false)
+        @test mem[idx] == expected
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(expected, desired)) == (old=expected, success=true)
+        @test mem[idx] == desired
+
+        @atomic mem[idx] = expected
+        @test (@atomicreplace mem[idx] Pair(desired, desired)) == (old=initial, success=false)
+        @test mem[idx] == expected
+    end
+end
+@testset "@atomic with AtomicMemory" begin
+
+    _test_atomic_get_set_swap_modify(Float64, rand(), rand(), 10)
+    _test_atomic_get_set_swap_modify(PadIntA, 123_1, 123_2, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, nothing, 10)
+    _test_atomic_get_set_swap_modify(Union{Nothing,Int}, 123_1, 234_5, 10)
+    _test_atomic_get_set_swap_modify(Vector{BigInt}, BigInt[1, 2, 3], BigInt[1, 2], [2, 4])
+
+    _test_atomic_setonce_replace(Float64, rand(), 42)
+    _test_atomic_setonce_replace(PadIntA, 123_1, 123_2)
+    _test_atomic_setonce_replace(Union{Nothing,Int}, 123_1, nothing)
+    _test_atomic_setonce_replace(Vector{BigInt}, BigInt[1, 2], [3, 4])
+    _test_atomic_setonce_replace(String, "abc", "cab")
+end
+
+let a = ARefxy{Union{Nothing,Integer}}()
+    @test_throws ConcurrencyViolationError @atomiconce :not_atomic a.x = 2
+    @test true === @atomiconce a.x = 1
+    @test 1 === @atomic a.x
+    @test false === @atomiconce a.x = 2
+end
+
 # atomic getfield with boundcheck
 # via codegen
 getx(a, boundcheck) = getfield(a, :x, :sequentially_consistent, boundcheck)
@@ -384,3 +530,562 @@ ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, true)
 @test ans == 42
 ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, false)
 @test ans == 42
+
+
+## Globals
+
+# the optimizer is terrible at handling PhiC nodes, so this must be a function
+# generator with a custom inlining here of r, instead of being able to assume
+# the inlining pass can inline a constant value correctly
+function gen_test_global_operators(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        T = typeof(getglobal($M, $r))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_10)
+        @test setglobal!($M, $r, T(123_1), :sequentially_consistent) === T(123_1)
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_1)
+        @test replaceglobal!($M, $r, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+        @test replaceglobal!($M, $r, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_30)
+        @test replaceglobal!($M, $r, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_30)
+        @test modifyglobal!($M, $r, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+        @test modifyglobal!($M, $r, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_32)
+        @test swapglobal!($M, $r, T(123_1), :sequentially_consistent) === T(123_32)
+        @test getglobal($M, $r, :sequentially_consistent) === T(123_1)
+    end
+end
+@noinline function test_global_operators(T::Type)
+    r = Symbol("g1_$T")
+    @eval global $r::$T = 123_10
+    invokelatest(@eval(() -> $(gen_test_global_operators(QuoteNode(r)))))
+    r = Symbol("g2_$T")
+    @eval global $r::$T = 123_10
+    invokelatest(@eval(r -> $(gen_test_global_operators(:r))), r)
+    nothing
+end
+test_global_operators(Int)
+test_global_operators(Any)
+test_global_operators(Union{Nothing,Int})
+test_global_operators(Complex{Int32})
+test_global_operators(Complex{Int128})
+test_global_operators(Complex{Real})
+test_global_operators(PadIntA)
+test_global_operators(PadIntB)
+#FIXME: test_global_operators(Int24)
+test_global_operators(Float64)
+
+function gen_test_global_orderings(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        @nospecialize x y
+        TT = Core.get_binding_type($M, $r)
+
+        @test getglobal($M, $r) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :u)
+        @test_throws ConcurrencyViolationError("getglobal: module binding cannot be read non-atomically") getglobal($M, $r, :not_atomic)
+        @test getglobal($M, $r, :unordered) === x
+        @test getglobal($M, $r, :monotonic) === x
+        @test getglobal($M, $r, :acquire) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :release) === x
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") getglobal($M, $r, :acquire_release) === x
+        @test getglobal($M, $r, :sequentially_consistent) === x
+        @test isdefined($M, $r)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :u)
+        @test_throws ConcurrencyViolationError("isdefined: module binding cannot be accessed non-atomically") isdefined($M, $r, :not_atomic)
+        @test isdefined($M, $r, :unordered)
+        @test isdefined($M, $r, :monotonic)
+        @test isdefined($M, $r, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined($M, $r, :acquire_release)
+        @test isdefined($M, $r, :sequentially_consistent)
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :u)
+        @test_throws ConcurrencyViolationError("setglobal!: module binding cannot be written non-atomically") setglobal!($M, $r, y, :not_atomic)
+        @test getglobal($M, $r) === x
+        @test setglobal!($M, $r, y) === y
+        @test setglobal!($M, $r, y, :unordered) === y
+        @test setglobal!($M, $r, y, :monotonic) === y
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :acquire) === y
+        @test setglobal!($M, $r, y, :release) === y
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobal!($M, $r, y, :acquire_release) === y
+        @test setglobal!($M, $r, y, :sequentially_consistent) === y
+        @test getglobal($M, $r) === y
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") swapglobal!($M, $r, x, :u)
+        @test_throws ConcurrencyViolationError("swapglobal!: module binding cannot be written non-atomically") swapglobal!($M, $r, x, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") swapglobal!($M, $r, x, :unordered) === y
+        @test swapglobal!($M, $r, x, :monotonic) === y
+        @test swapglobal!($M, $r, x, :acquire) === x
+        @test swapglobal!($M, $r, x, :release) === x
+        @test swapglobal!($M, $r, x, :acquire_release) === x
+        @test swapglobal!($M, $r, x, :sequentially_consistent) === x
+        @test swapglobal!($M, $r, x) === x
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyglobal!($M, $r, swap, x, :u)
+        @test_throws ConcurrencyViolationError("modifyglobal!: module binding cannot be written non-atomically") modifyglobal!($M, $r, swap, x, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyglobal!($M, $r, swap, x, :unordered)
+        @test modifyglobal!($M, $r, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :acquire) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :release) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
+        @test modifyglobal!($M, $r, swap, x) === Pair{TT,TT}(x, x)
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :u, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be written non-atomically") replaceglobal!($M, $r, y, x, :not_atomic, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :unordered, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :monotonic, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :acquire, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :release, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :acquire_release, :not_atomic)
+        @test_throws ConcurrencyViolationError("replaceglobal!: module binding cannot be accessed non-atomically") replaceglobal!($M, $r, x, x, :sequentially_consistent, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :u)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :unordered)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :monotonic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :acquire_release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") replaceglobal!($M, $r, x, x, :not_atomic, :sequentially_consistent)
+        @test replaceglobal!($M, $r, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+        @test replaceglobal!($M, $r, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+        @test replaceglobal!($M, $r, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+        @test replaceglobal!($M, $r, x, x) === ReplaceType{TT}((x, true))
+
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :u, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be written non-atomically") setglobalonce!($M, $r, y, :not_atomic, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r,  x, :unordered, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :monotonic, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :acquire, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :release, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :acquire_release, :not_atomic)
+        @test_throws ConcurrencyViolationError("setglobalonce!: module binding cannot be accessed non-atomically") setglobalonce!($M, $r, x, :sequentially_consistent, :not_atomic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :u)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :unordered)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :monotonic)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :acquire)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :acquire_release)
+        @test_throws ConcurrencyViolationError("invalid atomic ordering") setglobalonce!($M, $r, x, :not_atomic, :sequentially_consistent)
+        @test setglobalonce!($M, $r, x) === false
+        @test setglobalonce!($M, $r, y, :sequentially_consistent, :sequentially_consistent) === false
+        @test setglobalonce!($M, $r, y, :sequentially_consistent, :sequentially_consistent) === false
+        @test setglobalonce!($M, $r, x, :sequentially_consistent) === false
+    end
+end
+@noinline function test_global_orderings(T::Type, x, y)
+    @nospecialize
+    r = Symbol("h1_$T")
+    @eval global $r::$T = $(QuoteNode(x))
+    invokelatest(@eval((x, y) -> $(gen_test_global_orderings(QuoteNode(r)))), x, y)
+    r = Symbol("h2_$T")
+    @eval global $r::$T = $(QuoteNode(x))
+    invokelatest(@eval((r, x, y) -> $(gen_test_global_orderings(:r))), r, x, y)
+    nothing
+end
+test_global_orderings(Int, 10, 20)
+test_global_orderings(Bool, true, false)
+test_global_orderings(String, "hi", "bye")
+test_global_orderings(Symbol, :hi, :bye)
+test_global_orderings(Nothing, nothing, nothing)
+test_global_orderings(Any, 123_10, 123_20)
+test_global_orderings(Any, true, false)
+test_global_orderings(Union{Nothing,Missing}, nothing, missing)
+test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
+test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40))
+test_global_orderings(Float64, 10.0, 20.0)
+test_global_orderings(Float64, NaN, Inf)
+
+function gen_test_global_undef(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        x = convert(TT, 12345_10)
+        @test_throws UndefVarError getglobal($M, $r)
+        @test_throws UndefVarError getglobal($M, $r, :sequentially_consistent)
+        @test_throws UndefVarError modifyglobal!($M, $r, add, 1, :sequentially_consistent)
+        @test_throws (TT === Any ? UndefVarError : Union{TypeError,ErrorException}) replaceglobal!($M, $r, 1, 1.0, :sequentially_consistent) # TODO: should this be TypeError or ErrorException
+        @test_throws UndefVarError replaceglobal!($M, $r, 1, x, :sequentially_consistent)
+        @test_throws UndefVarError getglobal($M, $r, :sequentially_consistent)
+        @test_throws UndefVarError swapglobal!($M, $r, x, :sequentially_consistent)
+        @test getglobal($M, $r, :sequentially_consistent) === x === getglobal($M, $r)
+    end
+end
+@noinline function test_global_undef(T)
+    r = Symbol("u1_$T")
+    @eval global $r::$T
+    invokelatest(@eval(() -> $(gen_test_global_undef(QuoteNode(r)))))
+    r = Symbol("u2_$T")
+    @eval global $r::$T
+    invokelatest(@eval(r -> $(gen_test_global_undef(:r))), r)
+    nothing
+end
+test_global_undef(BigInt)
+test_global_undef(Any)
+test_global_undef(Union{Nothing,Integer})
+test_global_undef(UndefComplex{Any})
+test_global_undef(UndefComplex{UndefComplex{Any}})
+test_global_undef(Int)
+
+function gen_test_globalonce(@nospecialize r)
+    M = @__MODULE__
+    return quote
+        TT = Core.get_binding_type($M, $r)
+        x = convert(TT, 12345_10)
+        @test_throws UndefVarError getglobal($M, $r)
+        @test setglobalonce!($M, $r, x, :sequentially_consistent) === true
+        @test getglobal($M, $r, :sequentially_consistent) === x
+        @test setglobalonce!($M, $r, convert(TT, 12345_20), :sequentially_consistent) === false
+    end
+end
+@noinline function test_globalonce(T)
+    r = Symbol("o1_$T")
+    @eval global $r::$T
+    invokelatest(@eval(() -> $(gen_test_globalonce(QuoteNode(r)))))
+    r = Symbol("o2_$T")
+    @eval global $r::$T
+    invokelatest(@eval(r -> $(gen_test_globalonce(:r))), r)
+    nothing
+end
+test_globalonce(BigInt)
+test_globalonce(Any)
+test_globalonce(Union{Nothing,Integer})
+test_globalonce(UndefComplex{Any})
+test_globalonce(UndefComplex{UndefComplex{Any}})
+test_globalonce(Int)
+
+# test macroexpansions
+global x::Int
+let a = @__MODULE__
+    @test_throws ConcurrencyViolationError @atomiconce :not_atomic a.x = 2
+    @test true === @atomiconce a.x = 1
+    @test 1 === @atomic a.x
+    @test false === @atomiconce a.x = 2
+end
+let a = @__MODULE__
+    @test 1 === @atomic a.x
+    @test 2 === @atomic :sequentially_consistent a.x = 2
+    @test 3 === @atomic :monotonic a.x = 3
+    local four = 4
+    @test 4 === @atomic :monotonic a.x = four
+    @test 3 === @atomic :monotonic a.x = four - 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x = 2
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x += 1
+
+    @test 3 === @atomic :monotonic a.x
+    @test 5 === @atomic a.x += 2
+    @test 4 === @atomic :monotonic a.x -= 1
+    @test 12 === @atomic :monotonic a.x *= 3
+
+    @test 12 === @atomic a.x
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
+
+    @test 20 === @atomic a.x
+    @test 20 === @atomicswap a.x = 1
+    @test 1 === @atomicswap :monotonic a.x = 2
+    @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
+
+    @test 2 === @atomic a.x
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
+
+    @test 1 === @atomic a.x
+    xchg = 1 => 2
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
+end
+
+## Memory
+
+using InteractiveUtils
+using Core: memoryrefget, memoryrefset!, memoryrefswap!, memoryrefreplace!, memoryrefmodify!, memoryrefsetonce!, memoryref_isassigned
+
+@noinline function _test_memory_operators(r)
+    r = r[]
+    TT = eltype(r)
+    T = typeof(r[])
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_10)
+    @test memoryrefset!(r, T(123_1), :sequentially_consistent, true) === T(123_1)
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_1)
+    @test memoryrefreplace!(r, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_1), false))
+    @test memoryrefreplace!(r, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_1), true))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_30)
+    @test memoryrefreplace!(r, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((T(123_30), false))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_30)
+    @test memoryrefmodify!(r, add, 1, :sequentially_consistent, true) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test memoryrefmodify!(r, add, 1, :sequentially_consistent, true) === Pair{TT,TT}(T(123_31), T(123_32))
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_32)
+    @test memoryrefswap!(r, T(123_1), :sequentially_consistent, true) === T(123_32)
+    @test memoryrefget(r, :sequentially_consistent, true) === T(123_1)
+    nothing
+end
+@noinline function test_memory_operators(T::Type)
+    x = convert(T, 123_10)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(r, x, :unordered, true) # @atomic r[] = x
+    _test_memory_operators(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(r, x, :unordered, true) # @atomic r[] = x
+    _test_memory_operators(Ref{Any}(r))
+    nothing
+end
+test_memory_operators(Int)
+test_memory_operators(Any)
+test_memory_operators(Union{Nothing,Int})
+test_memory_operators(Complex{Int32})
+test_memory_operators(Complex{Int128})
+test_memory_operators(Complex{Real})
+test_memory_operators(PadIntA)
+test_memory_operators(PadIntB)
+#FIXME: test_memory_operators(Int24)
+test_memory_operators(Float64)
+
+@noinline function _test_memory_orderings(xr, yr, x, y)
+    @nospecialize x y
+    xr = xr[]
+    yr = yr[]
+    TT = eltype(yr)
+    @test TT == eltype(xr)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: atomic memory cannot be accessed non-atomically") memoryrefget(xr, :not_atomic, true)
+    @test memoryrefget(xr, :unordered, true) === x
+    @test memoryrefget(xr, :monotonic, true) === x
+    @test memoryrefget(xr, :acquire, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :release, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(xr, :acquire_release, true) === x
+    @test memoryrefget(xr, :sequentially_consistent, true) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :u, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: atomic memory cannot be accessed non-atomically") memoryref_isassigned(xr, :not_atomic, true)
+    @test memoryref_isassigned(xr, :unordered, true)
+    @test memoryref_isassigned(xr, :monotonic, true)
+    @test memoryref_isassigned(xr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(xr, :acquire_release, true)
+    @test memoryref_isassigned(xr, :sequentially_consistent, true)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :u, true)
+    @test memoryrefget(yr, :not_atomic, true) === y
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefget(yr, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefget: non-atomic memory cannot be accessed atomically") memoryrefget(yr, :sequentially_consistent, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :u, true)
+    @test memoryref_isassigned(yr, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryref_isassigned(yr, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryref_isassigned: non-atomic memory cannot be accessed atomically") memoryref_isassigned(yr, :sequentially_consistent, true)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: atomic memory cannot be written non-atomically") memoryrefset!(xr, y, :not_atomic, true)
+    @test memoryrefget(xr, :unordered, true) === x
+    @test memoryrefset!(xr, y, :unordered, true) === y
+    @test memoryrefset!(xr, y, :monotonic, true) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :acquire, true) === y
+    @test memoryrefset!(xr, y, :release, true) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(xr, y, :acquire_release, true) === y
+    @test memoryrefset!(xr, y, :sequentially_consistent, true) === y
+    @test memoryrefget(xr, :unordered, true) === y
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefset!(yr, x, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefset!: non-atomic memory cannot be written atomically") memoryrefset!(yr, x, :sequentially_consistent, true)
+    @test memoryrefget(yr, :not_atomic, true) === y
+    @test memoryrefset!(yr, x, :not_atomic, true) === x
+    @test memoryrefset!(yr, x, :not_atomic, true) === x
+    @test memoryrefget(yr, :not_atomic, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(yr, y, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(yr, y, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :release, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: non-atomic memory cannot be written atomically") memoryrefswap!(yr, y, :sequentially_consistent, true)
+    @test memoryrefswap!(yr, y, :not_atomic, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(yr, swap, y, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(yr, swap, y, :unordered, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :monotonic, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :acquire, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :release, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: non-atomic memory cannot be written atomically") memoryrefmodify!(yr, swap, y, :sequentially_consistent, true)
+    @test memoryrefmodify!(yr, swap, x, :not_atomic, true) === Pair{TT,TT}(y, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: non-atomic memory cannot be written atomically") memoryrefreplace!(yr, y, y, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(yr, y, y, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefreplace!(yr, x, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((x, true))
+    @test memoryrefreplace!(yr, x, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((y, x === y))
+    @test memoryrefreplace!(yr, y, y, :not_atomic, :not_atomic, true) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(xr, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefswap!: atomic memory cannot be written non-atomically") memoryrefswap!(xr, x, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefswap!(xr, x, :unordered, true) === y
+    @test memoryrefswap!(xr, x, :monotonic, true) === y
+    @test memoryrefswap!(xr, x, :acquire, true) === x
+    @test memoryrefswap!(xr, x, :release, true) === x
+    @test memoryrefswap!(xr, x, :acquire_release, true) === x
+    @test memoryrefswap!(xr, x, :sequentially_consistent, true) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(xr, swap, x, :u, true)
+    @test_throws ConcurrencyViolationError("memoryrefmodify!: atomic memory cannot be written non-atomically") memoryrefmodify!(xr, swap, x, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefmodify!(xr, swap, x, :unordered, true)
+    @test memoryrefmodify!(xr, swap, x, :monotonic, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :acquire, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :release, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :acquire_release, true) === Pair{TT,TT}(x, x)
+    @test memoryrefmodify!(xr, swap, x, :sequentially_consistent, true) === Pair{TT,TT}(x, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be written non-atomically") memoryrefreplace!(xr, y, x, :not_atomic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefreplace!: atomic memory cannot be accessed non-atomically") memoryrefreplace!(xr, x, x, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefreplace!(xr, x, x, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefreplace!(xr, x, y, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((x, true))
+    @test memoryrefreplace!(xr, x, y, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((y, x === y))
+    @test memoryrefreplace!(xr, y, x, :sequentially_consistent, :sequentially_consistent, true) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :u, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be written non-atomically") memoryrefsetonce!(xr, y, :not_atomic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr,  x, :unordered, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :monotonic, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :acquire, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :acquire_release, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("memoryrefsetonce!: atomic memory cannot be accessed non-atomically") memoryrefsetonce!(xr, x, :sequentially_consistent, :not_atomic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :u, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :unordered, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :monotonic, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :acquire, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :acquire_release, true)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") memoryrefsetonce!(xr, x, :not_atomic, :sequentially_consistent, true)
+    @test memoryrefsetonce!(xr, y, :sequentially_consistent, :sequentially_consistent, true) === false
+    @test memoryrefsetonce!(xr, y, :sequentially_consistent, :sequentially_consistent, true) === false
+    @test memoryrefsetonce!(xr, x, :sequentially_consistent, :sequentially_consistent, true) === false
+    nothing
+end
+@noinline function test_memory_orderings(T::Type, x, y)
+    @nospecialize
+    xr = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(xr, x, :unordered, true) # @atomic xr[] = x
+    yr = GenericMemoryRef(Memory{T}(undef, 1))
+    yr[] = y
+    _test_memory_orderings(Ref(xr), Ref(yr), x, y)
+    xr = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    memoryrefset!(xr, x, :unordered, true) # @atomic xr[] = x
+    yr = GenericMemoryRef(Memory{T}(undef, 1))
+    yr[] = y
+    _test_memory_orderings(Ref{Any}(xr), Ref{Any}(yr), x, y)
+    nothing
+end
+@noinline test_memory_orderings(x, y) = (@nospecialize; test_memory_orderings(typeof(x), x, y))
+test_memory_orderings(10, 20)
+test_memory_orderings(true, false)
+test_memory_orderings("hi", "bye")
+test_memory_orderings(:hi, :bye)
+test_memory_orderings(nothing, nothing)
+test_memory_orderings(Any, 123_10, 123_20)
+test_memory_orderings(Any, true, false)
+test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
+test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
+test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
+test_memory_orderings(10.0, 20.0)
+test_memory_orderings(NaN, Inf)
+
+@noinline function _test_memory_undef(r)
+    r = r[]
+    TT = eltype(r)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefmodify!(r, add, 1, :sequentially_consistent, true)
+    @test_throws (TT === Any ? UndefRefError : TypeError) memoryrefreplace!(r, 1, 1.0, :sequentially_consistent, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefreplace!(r, 1, x, :sequentially_consistent, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test_throws UndefRefError memoryrefswap!(r, x, :sequentially_consistent, true)
+    @test memoryrefget(r, :sequentially_consistent, true) === x
+    nothing
+end
+@noinline function test_memory_undef(T)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_memory_undef(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_memory_undef(Ref{Any}(r))
+    nothing
+end
+test_memory_undef(BigInt)
+test_memory_undef(Any)
+test_memory_undef(Union{Nothing,Integer})
+test_memory_undef(UndefComplex{Any})
+test_memory_undef(UndefComplex{UndefComplex{Any}})
+
+@noinline function _test_once_undef(r)
+    r = r[]
+    TT = eltype(r)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError memoryrefget(r, :sequentially_consistent, true)
+    @test memoryrefsetonce!(r, x, :sequentially_consistent, :sequentially_consistent, true) === true
+    @test memoryrefget(r, :sequentially_consistent, true) === x
+    @test memoryrefsetonce!(r, convert(TT, 12345_20), :sequentially_consistent, :sequentially_consistent, true) === false
+    nothing
+end
+@noinline function test_once_undef(T)
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_once_undef(Ref(r))
+    r = GenericMemoryRef(AtomicMemory{T}(undef, 1))
+    _test_once_undef(Ref{Any}(r))
+    nothing
+end
+test_once_undef(BigInt)
+test_once_undef(Any)
+test_once_undef(Union{Nothing,Integer})
+test_once_undef(UndefComplex{Any})
+test_once_undef(UndefComplex{UndefComplex{Any}})
diff --git a/test/backtrace.jl b/test/backtrace.jl
index 50a50100488c4..ee04a46b17304 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -202,6 +202,15 @@ let trace = try
     end
     @test trace[1].func === Symbol("top-level scope")
 end
+let trace = try
+        eval(Expr(:toplevel, LineNumberNode(3, :a_filename), Expr(:error, 1)))
+    catch
+        stacktrace(catch_backtrace())
+    end
+    @test trace[1].func === Symbol("top-level scope")
+    @test trace[1].file === :a_filename
+    @test trace[1].line == 3
+end
 let trace = try
         include_string(@__MODULE__,
             """
@@ -228,7 +237,7 @@ let trace = try
     end
     @test trace[1].func === Symbol("top-level scope")
     @test trace[1].file === :a_filename
-    @test trace[1].line == 3
+    @test trace[1].line in (2, 3)
 end
 
 # issue #45171
@@ -253,10 +262,14 @@ let code = """
                   if ip isa Base.InterpreterIP && ip.code isa Core.MethodInstance]
     num_fs = sum(meth_names .== :f29695)
     num_gs = sum(meth_names .== :g29695)
-    print(num_fs, ' ', num_gs)
+    if num_fs != 1000 || num_gs != 1000
+        Base.show_backtrace(stderr, bt)
+        error("Expected 1000 frames each, got \$num_fs, \$num_fs")
+    end
+    exit()
     """
 
-    @test read(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`, String) == "1000 1000"
+    @test success(pipeline(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`; stderr))
 end
 
 # Test that modules make it into InterpreterIP for top-level code
diff --git a/test/bitarray.jl b/test/bitarray.jl
index 5d0bff62ab6e1..fd5c1421a256f 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1,7 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Base: findprevnot, findnextnot
-using Random, LinearAlgebra, Test
+using Random, Test, LinearAlgebra # Ideally, these tests should not depend on LinearAlgebra
+
+isdefined(Main, :SizedArrays) || @eval Main include("testhelpers/SizedArrays.jl")
+using .Main.SizedArrays
 
 tc(r1::NTuple{N,Any}, r2::NTuple{N,Any}) where {N} = all(x->tc(x...), [zip(r1,r2)...])
 tc(r1::BitArray{N}, r2::Union{BitArray{N},Array{Bool,N}}) where {N} = true
@@ -12,7 +15,6 @@ tc(r1,r2) = false
 
 bitcheck(b::BitArray) = Test._check_bitarray_consistency(b)
 bitcheck(x) = true
-bcast_setindex!(b, x, I...) = (b[I...] .= x; b)
 
 function check_bitop_call(ret_type, func, args...; kwargs...)
     r2 = func(map(x->(isa(x, BitArray) ? Array(x) : x), args)...; kwargs...)
@@ -31,6 +33,9 @@ macro check_bit_operation(ex)
     Expr(:call, :check_bitop_call, nothing, map(esc, ex.args)...)
 end
 
+bcast_setindex!(b, x, I...) = (b[I...] .= x; b)
+
+
 let t0 = time_ns()
     global timesofar
     function timesofar(str)
@@ -82,6 +87,25 @@ allsizes = [((), BitArray{0}), ((v1,), BitVector),
     @test !isassigned(b, length(b) + 1)
 end
 
+@testset "trues and falses with custom axes" begin
+    for ax in ((SizedArrays.SOneTo(2),), (SizedArrays.SOneTo(2), Base.OneTo(2)))
+        t = trues(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test all(t)
+
+        f = falses(ax)
+        if all(x -> x isa SizedArrays.SOneTo, ax)
+            @test t isa SizedArrays.SizedArray && parent(t) isa BitArray
+        else
+            @test t isa BitArray
+        end
+        @test !any(f)
+    end
+end
 
 @testset "Conversions for size $sz" for (sz, T) in allsizes
     b1 = rand!(falses(sz...))
@@ -1335,11 +1359,11 @@ timesofar("find")
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
-    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
-    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findprev(Returns(true), b1, -1) == nothing
+    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   === nothing
+    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   === nothing
+    @test findprev(identity, b1, -1) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findprev(Returns(true), b1, -1) === nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
@@ -1350,28 +1374,28 @@ timesofar("find")
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
-    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
-    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
+    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  === nothing
+    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) === nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) === nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(Returns(true), b0, -1) == nothing
+    @test findprev(Returns(true), b0, -1) === nothing
     @test_throws BoundsError findprev(Returns(true), b0, 1)
     @test_throws BoundsError findnext(Returns(true), b0, -1)
-    @test findnext(Returns(true), b0, 1) == nothing
+    @test findnext(Returns(true), b0, 1) === nothing
 
     b1 = falses(10)
     @test findprev(Returns(true), b1, 5) == 5
     @test findnext(Returns(true), b1, 5) == 5
-    @test findprev(Returns(true), b1, -1) == nothing
-    @test findnext(Returns(true), b1, 11) == nothing
-    @test findprev(Returns(false), b1, 5) == nothing
-    @test findnext(Returns(false), b1, 5) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findnext(Returns(false), b1, 11) == nothing
+    @test findprev(Returns(true), b1, -1) === nothing
+    @test findnext(Returns(true), b1, 11) === nothing
+    @test findprev(Returns(false), b1, 5) === nothing
+    @test findnext(Returns(false), b1, 5) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findnext(Returns(false), b1, 11) === nothing
     @test_throws BoundsError findprev(Returns(true), b1, 11)
     @test_throws BoundsError findnext(Returns(true), b1, -1)
 
@@ -1393,7 +1417,7 @@ timesofar("find")
     for l = [1, 63, 64, 65, 127, 128, 129]
         f = falses(l)
         t = trues(l)
-        @test findprev(f, l) == findprevnot(t, l) == nothing
+        @test findprev(f, l) == findprevnot(t, l) === nothing
         @test findprev(t, l) == findprevnot(f, l) == l
         b1 = falses(l)
         b1[end] = true
@@ -1539,6 +1563,21 @@ timesofar("reductions")
             end
         end
     end
+    @testset "Issue #50780, map! bitarray map! where dest aliases source" begin
+        a = BitVector([1,0])
+        b = map(!, a)
+        map!(!, a, a) # a .= !.a
+        @test a == b == BitVector([0,1])
+
+        a = BitVector([1,0])
+        c = map(|, a, b)
+        map!(|, a, a, b)
+        @test c == a == BitVector([1, 1])
+
+        a = BitVector([1,0])
+        map!(|, b, a, b)
+        @test c == b == BitVector([1, 1])
+    end
 end
 
 ## Filter ##
@@ -1604,69 +1643,6 @@ end
 
 timesofar("cat")
 
-@testset "Linear algebra" begin
-    b1 = bitrand(v1)
-    b2 = bitrand(v1)
-    @check_bit_operation dot(b1, b2) Int
-
-    b1 = bitrand(n1, n2)
-    @test_throws ArgumentError tril(b1, -n1 - 2)
-    @test_throws ArgumentError tril(b1, n2)
-    @test_throws ArgumentError triu(b1, -n1)
-    @test_throws ArgumentError triu(b1, n2 + 2)
-    for k in (-n1 - 1):(n2 - 1)
-        @check_bit_operation tril(b1, k) BitMatrix
-    end
-    for k in (-n1 + 1):(n2 + 1)
-        @check_bit_operation triu(b1, k) BitMatrix
-    end
-
-    for sz = [(n1,n1), (n1,n2), (n2,n1)], (f,isf) = [(tril,istril), (triu,istriu)]
-        b1 = bitrand(sz...)
-        @check_bit_operation isf(b1) Bool
-        b1 = f(bitrand(sz...))
-        @check_bit_operation isf(b1) Bool
-    end
-
-    b1 = bitrand(n1,n1)
-    b1 .|= copy(b1')
-    @check_bit_operation issymmetric(b1) Bool
-    @check_bit_operation ishermitian(b1) Bool
-
-    b1 = bitrand(n1)
-    b2 = bitrand(n2)
-    @check_bit_operation kron(b1, b2) BitVector
-
-    b1 = bitrand(s1, s2)
-    b2 = bitrand(s3, s4)
-    @check_bit_operation kron(b1, b2) BitMatrix
-
-    b1 = bitrand(v1)
-    @check_bit_operation diff(b1) Vector{Int}
-
-    b1 = bitrand(n1, n2)
-    @check_bit_operation diff(b1, dims=1) Matrix{Int}
-    @check_bit_operation diff(b1, dims=2) Matrix{Int}
-
-    b1 = bitrand(n1, n1)
-    @test ((svdb1, svdb1A) = (svd(b1), svd(Array(b1)));
-            svdb1.U == svdb1A.U && svdb1.S == svdb1A.S && svdb1.V == svdb1A.V)
-    @test ((qrb1, qrb1A) = (qr(b1), qr(Array(b1)));
-            Matrix(qrb1.Q) == Matrix(qrb1A.Q) && qrb1.R == qrb1A.R)
-
-    b1 = bitrand(v1)
-    @check_bit_operation diagm(0 => b1) BitMatrix
-
-    b1 = bitrand(v1)
-    b2 = bitrand(v1)
-    @check_bit_operation diagm(-1 => b1, 1 => b2) BitMatrix
-
-    b1 = bitrand(n1, n1)
-    @check_bit_operation diag(b1)
-end
-
-timesofar("linalg")
-
 @testset "findmax, findmin" begin
     b1 = trues(0)
     @test_throws ArgumentError findmax(b1)
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index f2eb2ea630893..85df1d64017b4 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -252,10 +252,9 @@ end
 
 # Boundschecking removal of indices with different type, see #40281
 getindex_40281(v, a, b, c) = @inbounds getindex(v, a, b, c)
-typed_40281 = sprint((io, args...) -> code_warntype(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
+llvm_40281 = sprint((io, args...) -> code_llvm(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
 if bc_opt == bc_default || bc_opt == bc_off
-    @test occursin("arrayref(false", typed_40281)
-    @test !occursin("arrayref(true", typed_40281)
+    @test !occursin("call void @ijl_bounds_error_ints", llvm_40281)
 end
 
 # Given this is a sub-processed test file, not using @testsets avoids
@@ -298,4 +297,54 @@ end
     typeintersect(Int, Integer)
 end |> only === Type{Int}
 
+if bc_opt == bc_default
+@testset "Array/Memory escape analysis" begin
+    function no_allocate(T::Type{<:Union{Memory, Vector}})
+        v = T(undef, 2)
+        v[1] = 2
+        v[2] = 3
+        return v[1] + v[2]
+    end
+    function test_alloc(::Type{T}; broken=false) where T
+        @test (@allocated no_allocate(T)) == 0 broken=broken
+    end
+    @testset "$T" for T in [Memory, Vector]
+        @testset "$ET" for ET in [Int, Float32, Union{Int, Float64}]
+            no_allocate(T{ET}) #compile
+            # allocations aren't removed for Union eltypes which they theoretically could be eventually
+            test_alloc(T{ET}, broken=(ET==Union{Int, Float64}))
+        end
+    end
+    function f() # this was causing a bug on an in progress version of #55913.
+        m = Memory{Float64}(undef, 4)
+        m .= 1.0
+        s = 0.0
+        for x ∈ m
+            s += x
+        end
+        s
+    end
+    @test f() === 4.0
+    function confuse_alias_analysis()
+       mem0 = Memory{Int}(undef, 1)
+       mem1 = Memory{Int}(undef, 1)
+       @inbounds mem0[1] = 3
+       for width in 1:2
+            @inbounds mem1[1] = mem0[1]
+            mem0 = mem1
+       end
+       mem0[1]
+    end
+    @test confuse_alias_analysis() == 3
+    @test (@allocated confuse_alias_analysis()) == 0
+    function no_alias_prove(n)
+        m1 = Memory{Int}(undef,n)
+        m2 = Memory{Int}(undef,n)
+        m1 === m2
+    end
+    no_alias_prove(1)
+    @test_broken (@allocated no_alias_prove(5)) == 0
+end
+end
+
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 87858dd0f08fc..0f5bdf7a40bb1 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -49,9 +49,9 @@ ci(x) = CartesianIndex(x)
 @test @inferred(newindex(ci((2,2)), (true, false), (-1,-1)))  == ci((2,-1))
 @test @inferred(newindex(ci((2,2)), (false, true), (-1,-1)))  == ci((-1,2))
 @test @inferred(newindex(ci((2,2)), (false, false), (-1,-1))) == ci((-1,-1))
-@test @inferred(newindex(ci((2,2)), (true,), (-1,-1)))   == ci((2,))
-@test @inferred(newindex(ci((2,2)), (true,), (-1,)))   == ci((2,))
-@test @inferred(newindex(ci((2,2)), (false,), (-1,))) == ci((-1,))
+@test @inferred(newindex(ci((2,2)), (true,), (-1,-1))) == 2
+@test @inferred(newindex(ci((2,2)), (true,), (-1,)))   == 2
+@test @inferred(newindex(ci((2,2)), (false,), (-1,)))  == -1
 @test @inferred(newindex(ci((2,2)), (), ())) == ci(())
 
 end
@@ -592,6 +592,16 @@ end
     end
 end
 
+@testset "convert behavior of logical broadcast" begin
+    a = mod.(1:4, 2)
+    @test !isa(a, BitArray)
+    for T in (Array{Bool}, BitArray)
+        la = T(a)
+        la .= mod.(0:3, 2)
+        @test la == [false; true; false; true]
+    end
+end
+
 # Test that broadcast treats type arguments as scalars, i.e. containertype yields Any,
 # even for subtypes of abstract array. (https://github.com/JuliaStats/DataArrays.jl/issues/229)
 @testset "treat type arguments as scalars, DataArrays issue 229" begin
@@ -774,19 +784,32 @@ let X = zeros(2, 3)
 end
 
 # issue #27988: inference of Broadcast.flatten
-using .Broadcast: Broadcasted
+using .Broadcast: Broadcasted, cat_nested
 let
     bc = Broadcasted(+, (Broadcasted(*, (1, 2)), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 62
     bc = Broadcasted(+, (Broadcasted(*, (1, Broadcasted(/, (2.0, 2.5)))), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2.0,2.5,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2.0,2.5,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 60.8
+    # 1 .* 1 .- 1 .* 1 .^2 .+ 1 .* 1 .+ 1 .^ 3
+    bc = Broadcasted(+, (Broadcasted(+, (Broadcasted(-, (Broadcasted(*, (1, 1)), Broadcasted(*, (1, Broadcasted(Base.literal_pow, (Ref(^), 1, Ref(Val(2)))))))), Broadcasted(*, (1, 1)))), Broadcasted(Base.literal_pow, (Base.RefValue{typeof(^)}(^), 1, Base.RefValue{Val{3}}(Val{3}())))))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 2
+    # @. 1 + 1 * (1 + 1 + 1 + 1)
+    bc = Broadcasted(+, (1, Broadcasted(*, (1, Broadcasted(+, (1, 1, 1, 1))))))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1) # `cat_nested` failed to infer this
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    # @. 1 + (1 + 1) + 1 + (1 + 1) + 1 + (1 + 1) + 1
+    bc = Broadcasted(+, (1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    bc = Broadcasted(Float32, (Broadcasted(+, (1, 1)),))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
 end
 
 let
-  bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
-  @test isbits(Broadcast.flatten(bc).f)
+    bc = Broadcasted(+, (Broadcasted(*, ([1, 2, 3], 4)), 5))
+    @test isbits(Broadcast.flatten(bc).f)
 end
 
 # Issue #26127: multiple splats in a fused dot-expression
@@ -853,6 +876,31 @@ let
     @test copy(bc) == [v for v in bc] == collect(bc)
     @test eltype(copy(bc)) == eltype([v for v in bc]) == eltype(collect(bc))
     @test ndims(copy(bc)) == ndims([v for v in bc]) == ndims(collect(bc)) == ndims(bc)
+
+    struct MyFill{T,N} <: AbstractArray{T,N}
+        val :: T
+        sz :: NTuple{N,Int}
+    end
+    Base.size(M::MyFill) = M.sz
+    function Base.getindex(M::MyFill{<:Any,N}, i::Vararg{Int, N}) where {N}
+        checkbounds(M, i...)
+        M.val
+    end
+    Base.IndexStyle(::Type{<:Base.Broadcast.Broadcasted{<:Any,<:Any,<:Any,<:Tuple{MyFill}}}) = IndexLinear()
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+, MyFill(2, (3,3))))
+    @test IndexStyle(bc) == IndexLinear()
+    @test eachindex(bc) === Base.OneTo(9)
+    @test bc[2] == bc[CartesianIndex(2,1)]
+
+    for bc in Any[
+                Broadcast.broadcasted(+, collect(reshape(1:9, 3, 3)), 1:3), # IndexCartesian
+                Broadcast.broadcasted(+, [1,2], 2), # IndexLinear
+            ]
+        bci = Broadcast.instantiate(bc)
+        for (Ilin, Icart) in zip(eachindex(IndexLinear(), bc), eachindex(IndexCartesian(), bc))
+            @test bc[Ilin] == bc[Icart]
+        end
+    end
 end
 
 # issue 43847: collect preserves shape of broadcasted
@@ -882,6 +930,8 @@ let
 
     @test @inferred(Base.IteratorSize(Broadcast.broadcasted(+, (1,2,3), a1, zeros(3,3,3)))) === Base.HasShape{3}()
 
+    @test @inferred(Base.IteratorSize(Base.broadcasted(randn))) === Base.HasShape{0}()
+
     # inference on nested
     bc = Base.broadcasted(+, AD1(randn(3)), AD1(randn(3)))
     bc_nest = Base.broadcasted(+, bc , bc)
@@ -956,6 +1006,10 @@ end
     @test sum(bc, dims=1, init=0) == [5]
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
     @test prod(bc, dims=1, init="") == ["acbc"]
+
+    a = rand(-10:10,32,4); b = rand(-10:10,32,4)
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+,a,b))
+    @test sum(bc; dims = 1, init = 0.0) == sum(collect(bc); dims = 1, init = 0.0)
 end
 
 # treat Pair as scalar:
@@ -1129,7 +1183,45 @@ end
     @test CartesianIndex(1,2) .+ [CartesianIndex(3,4), CartesianIndex(5,6)] == [CartesianIndex(4, 6), CartesianIndex(6, 8)]
 end
 
+struct MyBroadcastStyleWithField <: Broadcast.BroadcastStyle
+    i::Int
+end
+# asymmetry intended
+Base.BroadcastStyle(a::MyBroadcastStyleWithField, b::MyBroadcastStyleWithField) = a
+
+@testset "issue #50937: styles that have fields" begin
+    @test Broadcast.result_style(MyBroadcastStyleWithField(1), MyBroadcastStyleWithField(1)) ==
+        MyBroadcastStyleWithField(1)
+    @test_throws ErrorException Broadcast.result_style(MyBroadcastStyleWithField(1),
+                                                       MyBroadcastStyleWithField(2))
+    dest = [0, 0]
+    dest .= Broadcast.Broadcasted(MyBroadcastStyleWithField(1), +, (1:2, 2:3))
+    @test dest == [3, 5]
+end
+
 # test that `Broadcast` definition is defined as total and eligible for concrete evaluation
 import Base.Broadcast: BroadcastStyle, DefaultArrayStyle
 @test Base.infer_effects(BroadcastStyle, (DefaultArrayStyle{1},DefaultArrayStyle{2},)) |>
     Core.Compiler.is_foldable
+
+f51129(v, x) = (1 .- (v ./ x) .^ 2)
+@test @inferred(f51129([13.0], 6.5)) == [-3.0]
+
+@testset "Docstrings" begin
+    undoc = Docs.undocumented_names(Broadcast)
+    @test_broken isempty(undoc)
+    @test undoc == [:dotview]
+end
+
+@testset "broadcast for `AbstractArray` without `CartesianIndex` support" begin
+    struct BVec52775 <: AbstractVector{Int}
+        a::Vector{Int}
+    end
+    Base.size(a::BVec52775) = size(a.a)
+    Base.getindex(a::BVec52775, i::Real) = a.a[i]
+    Base.getindex(a::BVec52775, i) = error("unsupported index!")
+    a = BVec52775([1,2,3])
+    bc = Base.broadcasted(identity, a)
+    @test bc[1] == bc[CartesianIndex(1)] == bc[1, CartesianIndex()]
+    @test a .+ [1 2] == a.a .+ [1 2]
+end
diff --git a/test/buildkitetestjson.jl b/test/buildkitetestjson.jl
new file mode 100644
index 0000000000000..0d51cd3b18f8f
--- /dev/null
+++ b/test/buildkitetestjson.jl
@@ -0,0 +1,167 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Convert test(set) results to a Buildkite-compatible JSON representation.
+# Based on <https://buildkite.com/docs/test-analytics/importing-json#json-test-results-data-reference>.
+
+module BuildkiteTestJSON
+
+using Test
+using Dates
+
+export write_testset_json_files
+
+# Bootleg JSON writer
+
+"""
+    json_repr(io::IO, value; kwargs...) -> Nothing
+
+Obtain a JSON representation of `value`, and print it to `io`.
+
+This may not be the best, most feature-complete, or fastest implementation.
+However, it works for its intended purpose.
+"""
+function json_repr end
+
+function json_repr(io::IO, val::String; indent::Int=0)
+    print(io, '"')
+    escape_string(io, val, ('"',))
+    print(io, '"')
+end
+json_repr(io::IO, val::Integer; indent::Int=0) = print(io, val)
+json_repr(io::IO, val::Float64; indent::Int=0) = print(io, val)
+function json_repr(io::IO, val::AbstractVector; indent::Int=0)
+    print(io, '[')
+    for i in eachindex(val)
+        print(io, '\n', ' '^(indent + 2))
+        json_repr(io, val[i]; indent=indent+2)
+        i == lastindex(val) || print(io, ',')
+    end
+    print(io, '\n', ' '^indent, ']')
+end
+function json_repr(io::IO, val::Dict; indent::Int=0)
+    print(io, '{')
+    for (i, (k, v)) in enumerate(pairs(val))
+        print(io, '\n', ' '^(indent + 2))
+        json_repr(io, string(k))
+        print(io, ": ")
+        json_repr(io, v; indent=indent+2)
+        i === length(val) || print(io, ',')
+    end
+    print(io, '\n', ' '^indent, '}')
+end
+json_repr(io::IO, val::Any; indent::Int=0) = json_repr(io, string(val))
+
+# Test result processing
+
+function result_dict(testset::Test.DefaultTestSet, prefix::String="")
+    Dict{String, Any}(
+        "id" => Base.UUID(rand(UInt128)),
+        "scope" => join((prefix, testset.description), '/'),
+        "history" => if !isnothing(testset.time_end)
+            Dict{String, Any}(
+                "start_at" => testset.time_start,
+                "end_at" => testset.time_end,
+                "duration" => testset.time_end - testset.time_start)
+        else
+            Dict{String, Any}("start_at" => testset.time_start, "duration" => 0.0)
+        end)
+end
+
+function result_dict(result::Test.Result)
+    file, line = if !hasproperty(result, :source) || isnothing(result.source)
+        "unknown", 0
+    else
+        something(result.source.file, "unknown"), result.source.line
+    end
+    status = if result isa Test.Pass && result.test_type === :skipped
+        "skipped"
+    elseif result isa Test.Pass
+        "passed"
+    elseif result isa Test.Fail || result isa Test.Error
+        "failed"
+    else
+        "unknown"
+    end
+    data = Dict{String, Any}(
+        "name" => "$(result.test_type): $(result.orig_expr)",
+        "location" => string(file, ':', line),
+        "file_name" => file,
+        "result" => status)
+    add_failure_info!(data, result)
+end
+
+function add_failure_info!(data::Dict{String, Any}, result::Test.Result)
+    if result isa Test.Fail
+        data["failure_reason"] = if result.test_type === :test && !isnothing(result.data)
+            "Evaluated: $(result.data)"
+        elseif result.test_type === :test_throws_nothing
+            "No exception thrown"
+        elseif result.test_type === :test_throws_wrong
+            "Wrong exception type thrown"
+        else
+            "unknown"
+        end
+    elseif result isa Test.Error
+        data["failure_reason"] = if result.test_type === :test_error
+            if occursin("\nStacktrace:\n", result.backtrace)
+                err, trace = split(result.backtrace, "\nStacktrace:\n", limit=2)
+                data["failure_expanded"] =
+                    [Dict{String,Any}("expanded" => split(err, '\n'),
+                                      "backtrace" => split(trace, '\n'))]
+            end
+            "Exception (unexpectedly) thrown during test"
+        elseif result.test_type === :test_nonbool
+            "Expected the expression to evaluate to a Bool, not a $(typeof(result.data))"
+        elseif result.test_type === :test_unbroken
+            "Expected this test to be broken, but it passed"
+        else
+            "unknown"
+        end
+    end
+    data
+end
+
+function collect_results!(results::Vector{Dict{String, Any}}, testset::Test.DefaultTestSet, prefix::String="")
+    common_data = result_dict(testset, prefix)
+    result_offset = length(results) + 1
+    result_counts = Dict{Tuple{String, String}, Int}()
+    for (i, result) in enumerate(testset.results)
+        if result isa Test.Result
+            rdata = result_dict(result)
+            rid = (rdata["location"], rdata["result"])
+            if haskey(result_counts, rid)
+                result_counts[rid] += 1
+            else
+                result_counts[rid] = 1
+                push!(results, merge(common_data, rdata))
+            end
+        elseif result isa Test.DefaultTestSet
+            collect_results!(results, result, common_data["scope"])
+        end
+    end
+    # Modify names to hold `result_counts`
+    for i in result_offset:length(results)
+        result = results[i]
+        rid = (result["location"], result["result"])
+        if get(result_counts, rid, 0) > 1
+            result["name"] = replace(result["name"], r"^([^:]):" =>
+                SubstitutionString("\\1 (x$(result_counts[rid])):"))
+        end
+    end
+    results
+end
+
+function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
+    data = Dict{String, Any}[]
+    collect_results!(data, testset)
+    files = String[]
+    # Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
+    for (i, chunk) in enumerate(Iterators.partition(data, 5000))
+        res_file = joinpath(dir, "results_$i.json")
+        open(io -> json_repr(io, chunk), res_file, "w")
+        push!(files, res_file)
+    end
+    return files
+end
+
+end
diff --git a/test/cartesian.jl b/test/cartesian.jl
index ed33f2c1035f7..7064b54ebbb8d 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -1,12 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@test Base.Cartesian.exprresolve(:(1 + 3)) == 4
+
 ex = Base.Cartesian.exprresolve(:(if 5 > 4; :x; else :y; end))
 @test ex.args[2] == QuoteNode(:x)
 
 @test Base.Cartesian.lreplace!("val_col", Base.Cartesian.LReplace{String}(:col, "col", 1)) == "val_1"
 @test Base.setindex(CartesianIndex(1,5,4),3,2) == CartesianIndex(1, 3, 4)
-
+@testset "Expression Resolve" begin
+    @test Base.Cartesian.exprresolve(:(1 + 3)) == 4
+    ex1 = Expr(:ref, [1, 2, 3], 2)
+    result1 = Base.Cartesian.exprresolve(ex1)
+    @test result1 == 2
+    ex2 = Expr(:ref, [1, 2, 3], "non-real-index")
+    result2 = Base.Cartesian.exprresolve(ex2)
+    @test result2 == ex2
+end
 @testset "CartesianIndices constructions" begin
     @testset "AbstractUnitRange" begin
         for oinds in [
@@ -296,8 +304,7 @@ end
     R = CartesianIndex(1, 1):CartesianIndex(2, 3):CartesianIndex(4, 5)
     @test R.indices == (1:2:3, 1:3:4)
     i = CartesianIndex(4, 1)
-    i_next = CartesianIndex(1, 4)
-    @test !(i in R) && iterate(R, i) == (i_next, i_next)
+    @test !(i in R)
 
     for R in [
         CartesianIndices((1:-1:-1, 1:2:5)),
@@ -393,19 +400,20 @@ end
 
 @testset "CartesianIndices overflow" begin
     @testset "incremental steps" begin
+        # n.b. typemax is an odd number
         I = CartesianIndices((1:typemax(Int),))
         i = last(I)
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:2:typemax(Int), ))
-        i = CartesianIndex(typemax(Int)-1)
+        i = CartesianIndex(typemax(Int))
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:(typemax(Int)-1),))
-        i = CartesianIndex(typemax(Int))
+        i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int)-1, ))
+        I = CartesianIndices((2:2:typemax(Int)-1, ))
         i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -413,7 +421,7 @@ end
         i = last(I)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -421,9 +429,9 @@ end
         i = CartesianIndex(typemax(Int), 1)
         @test iterate(I, i) === (CartesianIndex(1, 2), CartesianIndex(1,2))
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, 1)
-        @test iterate(I, i) === (CartesianIndex(1, 3), CartesianIndex(1, 3))
+        @test iterate(I, i) === (CartesianIndex(2, 3), CartesianIndex(2, 3))
 
         I = CartesianIndices((typemin(Int):(typemin(Int)+3),))
         i = last(I)
@@ -493,15 +501,6 @@ end
     end
     @test length(I) == length(indices)
     @test vec(collect(I)) == indices
-
-    # test invalid state
-    I = CartesianIndices((2:4, 3:5))
-    @test iterate(I, CartesianIndex(typemax(Int), 3))[1] == CartesianIndex(2,4)
-    @test iterate(I, CartesianIndex(typemax(Int), 4))[1] == CartesianIndex(2,5)
-    @test iterate(I, CartesianIndex(typemax(Int), 5))    === nothing
-
-    @test iterate(I, CartesianIndex(3, typemax(Int)))[1] == CartesianIndex(4,typemax(Int))
-    @test iterate(I, CartesianIndex(4, typemax(Int)))    === nothing
 end
 
 @testset "CartesianIndices operations" begin
@@ -542,3 +541,35 @@ end
     inds2 = (1, CI(1, 2), 1, CI(1, 2), 1, CI(1, 2), 1)
     @test (@inferred CI(inds2)) == CI(1, 1, 2, 1, 1, 2, 1, 1, 2, 1)
 end
+
+@testset "@ncallkw" begin
+    f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d)
+    x_1, x_2 = (-1, -2)
+    kw = (a = 0, c = 0, d = 0)
+    @test x_1 + x_2 + 1 + 4 == Base.Cartesian.@ncallkw 2 f kw 4 x
+    b = 0
+    kw = (c = 0, d = 0)
+    @test x_1 + x_2 + 4 == Base.Cartesian.@ncallkw 2 f (; a = 0, b, kw...) 4 x
+end
+
+@testset "if with and without else branch" begin
+    t1 = Base.Cartesian.@ntuple 3 i -> i == 1 ? 1 : 0
+    t2 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        end
+        m
+    end
+    @test t1 == t2
+    t3 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        elseif i == 2
+            m = 2
+        end
+        m
+    end
+    @test t3 == (1, 2, 0)
+end
diff --git a/test/ccall.jl b/test/ccall.jl
index 7e166ddbd9041..b10504de21abc 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -1477,7 +1477,7 @@ end
 # issue #20835
 @test_throws(ErrorException("could not evaluate ccall argument type (it might depend on a local variable)"),
              eval(:(f20835(x) = ccall(:fn, Cvoid, (Ptr{typeof(x)},), x))))
-@test_throws(UndefVarError(:Something_not_defined_20835),
+@test_throws(UndefVarError(:Something_not_defined_20835, @__MODULE__),
              eval(:(f20835(x) = ccall(:fn, Something_not_defined_20835, (Ptr{typeof(x)},), x))))
 @test isempty(methods(f20835))
 
@@ -1838,7 +1838,7 @@ ccall_lazy_lib_name(x) = ccall((:testUcharX, compute_lib_name()), Int32, (UInt8,
 @test ccall_lazy_lib_name(0) == 0
 @test ccall_lazy_lib_name(3) == 1
 ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid},), C_NULL)
-@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx) ccall_with_undefined_lib()
+@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx, @__MODULE__) ccall_with_undefined_lib()
 
 @testset "transcode for UInt8 and UInt16" begin
     a   = [UInt8(1), UInt8(2), UInt8(3)]
@@ -1915,3 +1915,54 @@ end
     ctest_total_const() = Val{ctest_total(1 + 2im)}()
     Core.Compiler.return_type(ctest_total_const, Tuple{}) == Val{2 + 0im}
 end
+
+const libfrobozz = ""
+
+function somefunction_not_found()
+    ccall((:somefunction, libfrobozz), Cvoid, ())
+end
+
+function somefunction_not_found_libc()
+    ccall(:test,Int,())
+end
+
+@testset "library not found" begin
+    if Sys.islinux()
+        @test_throws "could not load symbol \"somefunction\"" somefunction_not_found()
+    else
+        @test_throws "could not load library \"\"" somefunction_not_found()
+    end
+    @test_throws "could not load symbol \"test\"" somefunction_not_found_libc()
+end
+
+# issue #52025
+@test Base.unsafe_convert(Ptr{Ptr{Cchar}}, Base.cconvert(Ptr{Ptr{Cchar}}, map(pointer, ["ab"]))) isa Ptr{Ptr{Cchar}}
+#issue #54725
+for A in (reinterpret(UInt, [0]), reshape([0, 0], 1, 2))
+    @test pointer(A) == Base.unsafe_convert(Ptr{Cvoid}, A) == Base.unsafe_convert(Ptr{Int}, A)
+end
+# Cglobal with non-static symbols doesn't error
+function cglobal_non_static1()
+    sym = (:global_var, libccalltest)
+    cglobal(sym)
+end
+global the_sym = (:global_var, libccalltest)
+cglobal_non_static2() = cglobal(the_sym)
+
+@test isa(cglobal_non_static1(), Ptr)
+@test isa(cglobal_non_static2(), Ptr)
+
+@generated function generated_world_counter()
+    return :($(Base.get_world_counter()))
+end
+function world_counter()
+    return Base.get_world_counter()
+end
+let llvm = sprint(code_llvm, world_counter, ())
+    # check that we got a reasonable value for the world age
+    @test (world_counter() != 0) && (world_counter() != -1)
+    # no call to the runtime should be left over
+    @test !occursin("call i64", llvm)
+    # the world age should be -1 in generated functions (or other pure contexts)
+    @test (generated_world_counter() == reinterpret(UInt, -1))
+end
diff --git a/test/channel_threadpool.jl b/test/channel_threadpool.jl
new file mode 100644
index 0000000000000..4509604087fa8
--- /dev/null
+++ b/test/channel_threadpool.jl
@@ -0,0 +1,14 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using Base.Threads
+
+@testset "Task threadpools" begin
+    c = Channel{Symbol}() do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === threadpool(current_task())
+    c = Channel{Symbol}(spawn = true) do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === :default
+    c = Channel{Symbol}(threadpool = :interactive) do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === :interactive
+    @test_throws ArgumentError Channel{Symbol}(threadpool = :foo) do c; put!(c, :foo); end
+end
diff --git a/test/channels.jl b/test/channels.jl
index dbda5cf069081..4acf6c94da1b6 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -12,6 +12,9 @@ using Base: n_avail
     end
     @test wait(a) == "success"
     @test fetch(t) == "finished"
+
+    # Test printing
+    @test repr(a) == "Condition()"
 end
 
 @testset "wait first behavior of wait on Condition" begin
@@ -40,6 +43,8 @@ end
     c = Channel()
     @test eltype(c) == Any
     @test c.sz_max == 0
+    @test isempty(c) == true  # Nothing in it
+    @test isfull(c) == true   # But no more room
 
     c = Channel(1)
     @test eltype(c) == Any
@@ -49,6 +54,11 @@ end
     @test isready(c) == false
     @test eltype(Channel(1.0)) == Any
 
+    c = Channel(1)
+    @test isfull(c) == false
+    put!(c, 1)
+    @test isfull(c) == true
+
     c = Channel{Int}(1)
     @test eltype(c) == Int
     @test_throws MethodError put!(c, "Hello")
@@ -107,6 +117,11 @@ end
     @test taskref[].sticky == false
     @test collect(c) == [0]
 end
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no channel_threadpool.jl`
+    new_env = copy(ENV)
+    new_env["JULIA_NUM_THREADS"] = "1,1"
+    run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+end
 
 @testset "multiple concurrent put!/take! on a channel for different sizes" begin
     function testcpt(sz)
@@ -370,7 +385,7 @@ end
         """error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")""", output))
     # test for invalid state in Workqueue during yield
     t = @async nothing
-    t._state = 66
+    @atomic t._state = 66
     newstderr = redirect_stderr()
     try
         errstream = @async read(newstderr[1], String)
@@ -452,8 +467,8 @@ end
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
         close(async) # and close
         @test !isopen(async)
-        @test tc[] == 2
-        @test tc[] == 2
+        @test tc[] == 3
+        @test tc[] == 3
         yield() # consume event & then close
         @test tc[] == 3
         sleep(0.1) # no further events
@@ -474,7 +489,7 @@ end
         close(async)
         @test !isopen(async)
         Base.process_events() # and close
-        @test tc[] == 0
+        @test tc[] == 1
         yield() # consume event & then close
         @test tc[] == 1
         sleep(0.1) # no further events
@@ -484,12 +499,35 @@ end
     end
 end
 
+struct CustomError <: Exception end
+
 @testset "check_channel_state" begin
     c = Channel(1)
     close(c)
     @test !isopen(c)
-    c.excp == nothing # to trigger the branch
+    c.excp === nothing # to trigger the branch
     @test_throws InvalidStateException Base.check_channel_state(c)
+
+    # Issue 52974 - closed channels with exceptions
+    # must be thrown on iteration, if channel is empty
+    c = Channel(2)
+    put!(c, 5)
+    close(c, CustomError())
+    @test take!(c) == 5
+    @test_throws CustomError iterate(c)
+
+    c = Channel(Inf)
+    put!(c, 1)
+    close(c)
+    @test take!(c) == 1
+    @test_throws InvalidStateException take!(c)
+    @test_throws InvalidStateException put!(c, 5)
+
+    c = Channel(3)
+    put!(c, 1)
+    close(c)
+    @test first(iterate(c)) == 1
+    @test isnothing(iterate(c))
 end
 
 # PR #36641
@@ -535,8 +573,11 @@ end
 # make sure 1-shot timers work
 let a = []
     Timer(t -> push!(a, 1), 0.01, interval = 0)
-    sleep(0.2)
-    @test a == [1]
+    @test timedwait(() -> a == [1], 10) === :ok
+end
+let a = []
+    Timer(t -> push!(a, 1), 0.01, interval = 0, spawn = true)
+    @test timedwait(() -> a == [1], 10) === :ok
 end
 
 # make sure that we don't accidentally create a one-shot timer
@@ -554,7 +595,7 @@ end
     e = @elapsed for i = 1:5
         wait(t)
     end
-    @test 1.5 > e >= 0.4
+    @test e >= 0.4
     @test a[] == 0
     nothing
 end
@@ -626,3 +667,11 @@ end
         @test n_avail(c) == 0
     end
 end
+
+@testset "Task properties" begin
+    f() = rand(2,2)
+    t = Task(f)
+    message = "Querying a Task's `scope` field is disallowed.\nThe private `Core.current_scope()` function is better, though still an implementation detail."
+    @test_throws ErrorException(message) t.scope
+    @test t.state == :runnable
+end
diff --git a/test/char.jl b/test/char.jl
index 1639c62ec819d..5523125529031 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 @testset "basic properties" begin
-
     @test typemax(Char) == reinterpret(Char, typemax(UInt32))
     @test typemin(Char) == Char(0)
     @test typemax(Char) == reinterpret(Char, 0xffffffff)
@@ -122,7 +121,7 @@ end
     #iterate(c::Char)
     for x in testarrays
         @test iterate(x)[1] == x
-        @test iterate(x, iterate(x)[2]) == nothing
+        @test iterate(x, iterate(x)[2]) === nothing
     end
 
     #isless(x::Char, y::Integer) = isless(UInt32(x), y)
@@ -214,6 +213,35 @@ end
     end
 end
 
+# issue #50532
+@testset "invalid read(io, Char)" begin
+    # byte values with different numbers of leading bits
+    B = UInt8[
+        0x3f, 0x4d, 0x52, 0x63, 0x81, 0x83, 0x89, 0xb6,
+        0xc0, 0xc8, 0xd3, 0xe3, 0xea, 0xeb, 0xf0, 0xf2,
+        0xf4, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    ]
+    f = tempname()
+    for b1 in B, b2 in B, t = 0:3
+        bytes = [b1, b2]
+        append!(bytes, rand(B, t))
+        s = String(bytes)
+        write(f, s)
+        @test s == read(f, String)
+        chars = collect(s)
+        ios = [IOBuffer(s), open(f), Base.Filesystem.open(f, 0)]
+        for io in ios
+            chars′ = Char[]
+            while !eof(io)
+                push!(chars′, read(io, Char))
+            end
+            @test chars == chars′
+            close(io)
+        end
+    end
+    rm(f)
+end
+
 @testset "overlong codes" begin
     function test_overlong(c::Char, n::Integer, rep::String)
         if isvalid(c)
@@ -260,6 +288,10 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
     @test string(ASCIIChar('x')) == "x"
     @test length(ASCIIChar('x')) == 1
     @test !isempty(ASCIIChar('x'))
+    @test ndims(ASCIIChar('x')) == 0
+    @test ndims(ASCIIChar) == 0
+    @test firstindex(ASCIIChar('x')) == 1
+    @test lastindex(ASCIIChar('x')) == 1
     @test eltype(ASCIIChar) == ASCIIChar
     @test_throws MethodError write(IOBuffer(), ASCIIChar('x'))
     @test_throws MethodError read(IOBuffer('x'), ASCIIChar)
@@ -332,3 +364,31 @@ end
     @test Base.IteratorSize(Char) == Base.HasShape{0}()
     @test convert(ASCIIChar, 1) == Char(1)
 end
+
+@testset "foldable functions" begin
+    v = @inferred (() -> Val(isuppercase('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase('C')))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter('C')))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric('C')))()
+    @test v isa Val{false}
+
+    struct MyChar <: AbstractChar
+        x :: Char
+    end
+    Base.codepoint(m::MyChar) = codepoint(m.x)
+    MyChar(x::UInt32) = MyChar(Char(x))
+
+    v = @inferred (() -> Val(isuppercase(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(islowercase(MyChar('C'))))()
+    @test v isa Val{false}
+
+    v = @inferred (() -> Val(isletter(MyChar('C'))))()
+    @test v isa Val{true}
+    v = @inferred (() -> Val(isnumeric(MyChar('C'))))()
+    @test v isa Val{false}
+end
diff --git a/test/checked.jl b/test/checked.jl
index bacda3db75dec..4031918a38730 100644
--- a/test/checked.jl
+++ b/test/checked.jl
@@ -3,7 +3,7 @@
 # Checked integer arithmetic
 
 import Base: checked_abs, checked_neg, checked_add, checked_sub, checked_mul,
-             checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
+             checked_div, checked_rem, checked_fld, checked_mod, checked_cld, checked_pow,
              add_with_overflow, sub_with_overflow, mul_with_overflow
 
 # checked operations
@@ -166,6 +166,19 @@ import Base: checked_abs, checked_neg, checked_add, checked_sub, checked_mul,
     @test checked_cld(typemin(T), T(1)) === typemin(T)
     @test_throws DivideError checked_cld(typemin(T), T(0))
     @test_throws DivideError checked_cld(typemin(T), T(-1))
+
+    @test checked_pow(T(1), T(0)) === T(1)
+    @test checked_pow(typemax(T), T(0)) === T(1)
+    @test checked_pow(typemin(T), T(0)) === T(1)
+    @test checked_pow(T(1), T(1)) === T(1)
+    @test checked_pow(T(1), typemax(T)) === T(1)
+    @test checked_pow(T(2), T(2)) === T(4)
+    @test_throws OverflowError checked_pow(T(2), typemax(T))
+    @test_throws OverflowError checked_pow(T(-2), typemax(T))
+    @test_throws OverflowError checked_pow(typemax(T), T(2))
+    @test_throws OverflowError checked_pow(typemin(T), T(2))
+    @test_throws DomainError checked_pow(T(2), -T(1))
+    @test_throws DomainError checked_pow(-T(2), -T(1))
 end
 
 @testset for T in (UInt8, UInt16, UInt32, UInt64, UInt128)
@@ -296,6 +309,10 @@ end
     @test checked_cld(true, true) === true
     @test checked_cld(false, true) === false
     @test_throws DivideError checked_cld(true, false)
+
+    @test checked_pow(true, 1) === true
+    @test checked_pow(true, 1000000) === true
+    @test checked_pow(false, 1000000) === false
 end
 @testset "BigInt" begin
     @test checked_abs(BigInt(-1)) == BigInt(1)
@@ -310,6 +327,12 @@ end
     @test checked_fld(BigInt(10), BigInt(3)) == BigInt(3)
     @test checked_mod(BigInt(9), BigInt(4)) == BigInt(1)
     @test checked_cld(BigInt(10), BigInt(3)) == BigInt(4)
+
+    @test checked_pow(BigInt(2), 2) == BigInt(4)
+    @test checked_pow(BigInt(2), 100) == BigInt(1267650600228229401496703205376)
+
+    # Perf test: Make sure BigInts allocs don't scale with the power:
+    @test @allocations(checked_pow(BigInt(2), 2)) ≈ @allocations(checked_pow(BigInt(2), 10000)) rtol=0.9
 end
 
 @testset "Additional tests" begin
@@ -358,3 +381,7 @@ end
     @test checked_mul(1, 2, 3, 4, 5, 6, 7) === 5040
     @test checked_mul(1, 2, 3, 4, 5, 6, 7, 8) === 40320
 end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(Base.Checked))
+end
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 18af88ea191e9..ed441131f061f 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -19,16 +19,17 @@ const TESTNAMES = [
         "mpfr", "broadcast", "complex",
         "floatapprox", "stdlib", "reflection", "regex", "float16",
         "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
-        "euler", "show", "client",
+        "euler", "show", "client", "terminfo",
         "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
         "some", "meta", "stacktraces", "docs", "gc",
         "misc", "threads", "stress", "binaryplatforms", "atexit",
         "enums", "cmdlineargs", "int", "interpreter",
-        "checked", "bitset", "floatfuncs", "precompile",
+        "checked", "bitset", "floatfuncs", "precompile", "relocatedepot",
         "boundscheck", "error", "ambiguous", "cartesian", "osutils",
         "channels", "iostream", "secretbuffer", "specificity",
         "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
         "smallarrayshrink", "opaque_closure", "filesystem", "download",
+        "scopedvalues", "compileall", "rebinding"
 ]
 
 const INTERNET_REQUIRED_LIST = [
@@ -44,6 +45,23 @@ const INTERNET_REQUIRED_LIST = [
 
 const NETWORK_REQUIRED_LIST = vcat(INTERNET_REQUIRED_LIST, ["Sockets"])
 
+function test_path(test)
+    t = split(test, '/')
+    if t[1] in STDLIBS
+        pkgdir = abspath(Base.find_package(String(t[1])), "..", "..")
+        if length(t) == 2
+            return joinpath(pkgdir, "test", t[2])
+        else
+            return joinpath(pkgdir, "test", "runtests")
+        end
+    elseif t[1] == "Compiler"
+        testpath = length(t) >= 2 ? t[2:end] : ("runtests",)
+        return joinpath(@__DIR__, "..", t[1], "test", testpath...)
+    else
+        return joinpath(@__DIR__, test)
+    end
+end
+
 """
 `(; tests, net_on, exit_on_error, seed) = choosetests(choices)` selects a set of tests to be
 run. `choices` should be a vector of test names; if empty or set to
@@ -150,17 +168,10 @@ function choosetests(choices = [])
 
     filtertests!(tests, "unicode", ["unicode/utf8"])
     filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util",
-                   "strings/io", "strings/types"])
+                   "strings/io", "strings/types", "strings/annotated"])
     # do subarray before sparse but after linalg
     filtertests!(tests, "subarray")
-    filtertests!(tests, "compiler", [
-        "compiler/datastructures", "compiler/inference", "compiler/effects",
-        "compiler/validation", "compiler/ssair", "compiler/irpasses",
-        "compiler/codegen", "compiler/inline", "compiler/contextual",
-        "compiler/invalidation", "compiler/AbstractInterpreter",
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
-    filtertests!(tests, "compiler/EscapeAnalysis", [
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
+    filtertests!(tests, "compiler", ["Compiler"])
     filtertests!(tests, "stdlib", STDLIBS)
     filtertests!(tests, "internet_required", INTERNET_REQUIRED_LIST)
     # do ambiguous first to avoid failing if ambiguities are introduced by other tests
@@ -207,8 +218,8 @@ function choosetests(choices = [])
 
     new_tests = String[]
     for test in tests
-        if test in STDLIBS
-            testfile = joinpath(STDLIB_DIR, test, "test", "testgroups")
+        if test in STDLIBS || test == "Compiler"
+            testfile = test_path("$test/testgroups")
             if isfile(testfile)
                 testgroups = readlines(testfile)
                 length(testgroups) == 0 && error("no testgroups defined for $test")
@@ -218,7 +229,7 @@ function choosetests(choices = [])
             end
         end
     end
-    filter!(x -> (x != "stdlib" && !(x in STDLIBS)) , tests)
+    filter!(x -> (x != "stdlib" && !(x in STDLIBS) && x != "Compiler") , tests)
     append!(tests, new_tests)
 
     requested_all || explicit_pkg            || filter!(x -> x != "Pkg",            tests)
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index a62c1501bf323..6736d3e181118 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
 #include <string>
@@ -18,7 +18,7 @@ void missingPop2() {
 } // expected-warning{{Non-popped GC frame present at end of function}}
   // expected-note@-1{{Non-popped GC frame present at end of function}}
 
-void superflousPop() {
+void superfluousPop() {
   JL_GC_POP(); // expected-warning{{JL_GC_POP without corresponding push}}
 }              // expected-note@-1{{JL_GC_POP without corresponding push}}
 
diff --git a/test/clangsa/ImplicitAtomicsTest.c b/test/clangsa/ImplicitAtomicsTest.c
index 87154347d9757..cfac3f38e679a 100644
--- a/test/clangsa/ImplicitAtomicsTest.c
+++ b/test/clangsa/ImplicitAtomicsTest.c
@@ -1,7 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
-// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
 
 #include "julia_atomics.h"
 
diff --git a/test/clangsa/Makefile b/test/clangsa/Makefile
index 3bebd45c9a5a6..609809884fce1 100644
--- a/test/clangsa/Makefile
+++ b/test/clangsa/Makefile
@@ -13,7 +13,7 @@ TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.c) $(wildcard $(SRCDIR)/
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH="${build_libdir}:$$LD_LIBRARY_PATH" \
 	CLANGSA_FLAGS="${CLANGSA_FLAGS}" \
-	CLANGSACXX_FLAGS="${CLANGSACXX_FLAGS}" \
+	CLANGSA_CXXFLAGS="${CLANGSA_CXXFLAGS}" \
 	CPPFLAGS_FLAGS="${CPPFLAGS_FLAGS}" \
 	CFLAGS_FLAGS="${CFLAGS_FLAGS}" \
 	CXXFLAGS_FLAGS="${CXXFLAGS_FLAGS}" \
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index 0ff5e633622ce..0a0d5369eba44 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CLANGSA_CXXFLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
 
 #include "julia.h"
 #include "julia_internal.h"
@@ -328,7 +328,7 @@ void scopes() {
 jl_module_t *propagation(jl_module_t *m JL_PROPAGATES_ROOT);
 void module_member(jl_module_t *m)
 {
-    for(int i=(int)m->usings.len-1; i >= 0; --i) {
+    for(int i=(int)m->usings.len-1; i >= 0; i -= 3) {
       jl_module_t *imp = propagation(m);
       jl_gc_safepoint();
       look_at_value((jl_value_t*)imp);
@@ -415,7 +415,7 @@ void stack_rooted(jl_value_t *lb JL_MAYBE_UNROOTED, jl_value_t *ub JL_MAYBE_UNRO
 JL_DLLEXPORT jl_value_t *jl_totally_used_function(int i)
 {
     jl_value_t *v = jl_box_int32(i); // expected-note{{Started tracking value here}}
-    jl_safepoint(); // expected-note{{Value may have been GCed here}}
+    jl_gc_safepoint(); // expected-note{{Value may have been GCed here}}
     return v; // expected-warning{{Return value may have been GCed}}
               // expected-note@-1{{Return value may have been GCed}}
 }
diff --git a/test/client.jl b/test/client.jl
index 0649ab3241d62..61fe7d5093474 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -12,14 +12,14 @@ nested_error_pattern = r"""
     ERROR: DivideError: integer division error
     Stacktrace:.*
 
-    caused by: UndefVarError: `__not_a_binding__` not defined
+    caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
     Stacktrace:.*
     """s
 
 @testset "display_error" begin
     # Display of errors which cause more than one entry on the exception stack
     excs = try
-        eval(nested_error_expr)
+        Core.eval(Main, nested_error_expr)
     catch
         Base.current_exceptions()
     end
@@ -31,7 +31,7 @@ nested_error_pattern = r"""
         DivideError: integer division error
         Stacktrace:.*
 
-        caused by: UndefVarError: `__not_a_binding__` not defined
+        caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
         Stacktrace:.*
         """s, sprint(show, excs))
 end
@@ -52,3 +52,8 @@ end
         ERROR: ErrorException
         """s, err_str)
 end
+
+@testset "defining `ans` and `err`" begin
+    @test eval(:(ans = 1)) == 1
+    @test eval(:(err = 1)) == 1
+end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 917031b57fe5f..74f953250cd37 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -4,8 +4,8 @@ import Libdl
 
 # helper function for passing input to stdin
 # and returning the stdout result
-function writereadpipeline(input, exename)
-    p = open(exename, "w+")
+function writereadpipeline(input, exename; stderr=nothing)
+    p = open(pipeline(exename; stderr), "w+")
     @async begin
         write(p.in, input)
         close(p.in)
@@ -62,11 +62,25 @@ end
 
 @testset "julia_cmd" begin
     julia_basic = Base.julia_cmd()
+    function get_julia_cmd(arg)
+        io = Base.BufferStream()
+        cmd = `$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`
+        try
+            run(pipeline(cmd, stdout=io, stderr=io))
+        catch
+            @error "cmd failed" cmd read(io, String)
+            rethrow()
+        end
+        closewrite(io)
+        return read(io, String)
+    end
+
     opts = Base.JLOptions()
-    get_julia_cmd(arg) = strip(read(`$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`, String), ['`'])
 
     for (arg, default) in (
-                            ("-C$(unsafe_string(opts.cpu_target))",  false),
+                            # Use a Cmd to handle space nicely when
+                            # interpolating inside another Cmd.
+                            (`-C $(unsafe_string(opts.cpu_target))`,  false),
 
                             ("-J$(unsafe_string(opts.image_file))",  false),
 
@@ -123,31 +137,45 @@ end
                             ("--pkgimages=no",  false),
                         )
         @testset "$arg" begin
+            str = arg isa Cmd ? join(arg.exec, ' ') : arg
             if default
-                @test !occursin(arg, get_julia_cmd(arg))
+                @test !occursin(str, get_julia_cmd(arg))
             else
-                @test occursin(arg, get_julia_cmd(arg))
+                @test occursin(str, get_julia_cmd(arg))
             end
         end
     end
+
+    # Test empty `cpu_target` gives a helpful error message, issue #52209.
+    io = IOBuffer()
+    p = run(pipeline(`$(Base.julia_cmd(; cpu_target="")) --startup-file=no -e ''`; stderr=io); wait=false)
+    wait(p)
+    @test p.exitcode == 1
+    @test occursin("empty CPU name", String(take!(io)))
 end
 
 let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tests for handling of ENV errors
-    let v = writereadpipeline(
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
             "println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
             setenv(`$exename -i -E '@assert isempty(LOAD_PATH); push!(LOAD_PATH, "@stdlib"); @isdefined InteractiveUtils'`,
                     "JULIA_LOAD_PATH" => "",
                     "JULIA_DEPOT_PATH" => ";:",
-                    "HOME" => homedir()))
-        @test v == ("false\nREPL: InteractiveUtilstrue\n", true)
+                    "HOME" => homedir());
+            stderr=io)
+        # @which is undefined
+        @test_broken v == ("false\nREPL: InteractiveUtilstrue\n", true)
+        stderr = String(take!(io))
+        @test_broken isempty(stderr)
     end
     let v = writereadpipeline("println(\"REPL: \", InteractiveUtils)",
                 setenv(`$exename -i -e 'const InteractiveUtils = 3'`,
                     "JULIA_LOAD_PATH" => ";;;:::",
                     "JULIA_DEPOT_PATH" => ";;;:::",
                     "HOME" => homedir()))
-        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InterativeUtils`
+        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InteractiveUtils`
         @test v == ("REPL: 3\n", true)
     end
     @testset let v = readchomperrors(`$exename -i -e '
@@ -159,7 +187,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         # make sure this is a non-fatal error and the REPL still loads
         @test v[1]
         @test isempty(v[2])
-        @test startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
+        # Can't load REPL if it's outside the sysimg if we break the load path.
+        # Need to rewrite this test nicer
+        # ┌ Warning: REPL provider not available: using basic fallback
+        # └ @ Base client.jl:459
+        @test_broken startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
     end
     real_threads = string(ccall(:jl_cpu_threads, Int32, ()))
     for nc in ("0", "-2", "x", "2x", " ", "")
@@ -225,17 +257,24 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test expanded == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
     end
 
+    # handling of `@temp` in --project and JULIA_PROJECT
+    @test tempdir() == readchomp(`$exename --project=@temp -e 'println(Base.active_project())'`)[1:lastindex(tempdir())]
+    @test tempdir() == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@temp", "HOME" => homedir()))[1:lastindex(tempdir())]
+
     # --quiet, --banner
-    let t(q,b) = "Base.JLOptions().quiet == $q && Base.JLOptions().banner == $b"
-        @test success(`$exename                 -e $(t(0, -1))`)
-        @test success(`$exename -q              -e $(t(1,  0))`)
-        @test success(`$exename --quiet         -e $(t(1,  0))`)
-        @test success(`$exename --banner=no     -e $(t(0,  0))`)
-        @test success(`$exename --banner=yes    -e $(t(0,  1))`)
-        @test success(`$exename -q --banner=no  -e $(t(1,  0))`)
-        @test success(`$exename -q --banner=yes -e $(t(1,  1))`)
-        @test success(`$exename --banner=no  -q -e $(t(1,  0))`)
-        @test success(`$exename --banner=yes -q -e $(t(1,  1))`)
+    let p = "print((Base.JLOptions().quiet, Base.JLOptions().banner))"
+        @test read(`$exename                   -e $p`, String) == "(0, -1)"
+        @test read(`$exename -q                -e $p`, String) == "(1, 0)"
+        @test read(`$exename --quiet           -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=no       -e $p`, String) == "(0, 0)"
+        @test read(`$exename --banner=yes      -e $p`, String) == "(0, 1)"
+        @test read(`$exename --banner=short    -e $p`, String) == "(0, 2)"
+        @test read(`$exename -q --banner=no    -e $p`, String) == "(1, 0)"
+        @test read(`$exename -q --banner=yes   -e $p`, String) == "(1, 1)"
+        @test read(`$exename -q --banner=short -e $p`, String) == "(1, 2)"
+        @test read(`$exename --banner=no  -q   -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=yes -q   -e $p`, String) == "(1, 1)"
+        @test read(`$exename --banner=short -q -e $p`, String) == "(1, 2)"
     end
 
     # --home
@@ -300,43 +339,37 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test errors_not_signals(`$exename -C invalidtarget`)
     @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
 
-    if Sys.iswindows()
-        # -t, --threads
-        code = "print(Threads.threadpoolsize())"
-        cpu_threads = ccall(:jl_effective_threads, Int32, ())
-        @test string(cpu_threads) ==
-            read(`$exename --threads auto -e $code`, String) ==
-            read(`$exename --threads=auto -e $code`, String) ==
-            read(`$exename -tauto -e $code`, String) ==
-            read(`$exename -t auto -e $code`, String)
-        for nt in (nothing, "1")
-            withenv("JULIA_NUM_THREADS" => nt) do
-                @test read(`$exename --threads=2 -e $code`, String) ==
-                    read(`$exename -t 2 -e $code`, String) == "2"
-            end
-        end
-        # We want to test oversubscription, but on manycore machines, this can
-        # actually exhaust limited PID spaces
-        cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
-        if Sys.WORD_SIZE == 32
-            cpu_threads = min(cpu_threads, 50)
-        end
-        @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
-        withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
-            @test read(`$exename -e $code`, String) == string(cpu_threads)
+    # -t, --threads
+    code = "print(Threads.threadpoolsize())"
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test string(cpu_threads) ==
+        read(`$exename --threads auto -e $code`, String) ==
+        read(`$exename --threads=auto -e $code`, String) ==
+        read(`$exename -tauto -e $code`, String) ==
+        read(`$exename -t auto -e $code`, String)
+    for nt in (nothing, "1")
+        withenv("JULIA_NUM_THREADS" => nt) do
+            @test read(`$exename --threads=2 -e $code`, String) ==
+                read(`$exename -t 2 -e $code`, String) == "2"
         end
-        @test errors_not_signals(`$exename -t 0`)
-        @test errors_not_signals(`$exename -t -1`)
+    end
+    # We want to test oversubscription, but on manycore machines, this can
+    # actually exhaust limited PID spaces
+    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+    if Sys.WORD_SIZE == 32
+        cpu_threads = min(cpu_threads, 50)
+    end
+    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+        @test read(`$exename -e $code`, String) == string(cpu_threads)
+    end
+    @test errors_not_signals(`$exename -t 0`)
+    @test errors_not_signals(`$exename -t -1`)
 
-        # Combining --threads and --procs: --threads does propagate
-        withenv("JULIA_NUM_THREADS" => nothing) do
-            code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
-            @test read(`$exename -p2 -t2 -e $code`, String) == "6"
-        end
-    else
-        @test_skip "Command line tests with -t are flakey on non-Windows OS"
-        # Known issue: https://github.com/JuliaLang/julia/issues/49154
-        # These tests should be fixed and reenabled on all operating systems.
+    # Combining --threads and --procs: --threads does propagate
+    withenv("JULIA_NUM_THREADS" => nothing) do
+        code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
+        @test read(`$exename -p2 -t2 -e $code`, String) == "6"
     end
 
     # Combining --threads and invalid -C should yield a decent error
@@ -353,7 +386,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # --gcthreads
     code = "print(Threads.ngcthreads())"
     cpu_threads = ccall(:jl_effective_threads, Int32, ())
-    @test (cpu_threads == 1 ? "1" : string(div(cpu_threads, 2))) ==
+    @test string(cpu_threads) ==
           read(`$exename --threads auto -e $code`, String) ==
           read(`$exename --threads=auto -e $code`, String) ==
           read(`$exename -tauto -e $code`, String) ==
@@ -394,9 +427,30 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "isinteractive()" -i`) == "true"
 
     # --color
-    @test readchomp(`$exename --color=yes -E "Base.have_color"`) == "true"
-    @test readchomp(`$exename --color=no -E "Base.have_color"`) == "false"
-    @test errors_not_signals(`$exename --color=false`)
+    function color_cmd(; flag, no_color=nothing, force_color=nothing)
+        cmd = `$exename --color=$flag -E "Base.have_color"`
+        return addenv(cmd, "NO_COLOR" => no_color, "FORCE_COLOR" => force_color)
+    end
+
+    @test readchomp(color_cmd(flag="auto")) == "nothing"
+    @test readchomp(color_cmd(flag="no")) == "false"
+    @test readchomp(color_cmd(flag="yes")) == "true"
+    @test errors_not_signals(color_cmd(flag="false"))
+    @test errors_not_signals(color_cmd(flag="true"))
+
+    @test readchomp(color_cmd(flag="auto", no_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="no", no_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", force_color="")) == "nothing"
+    @test readchomp(color_cmd(flag="auto", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", force_color="1")) == "true"
+
+    @test readchomp(color_cmd(flag="auto", no_color="1", force_color="1")) == "true"
+    @test readchomp(color_cmd(flag="no", no_color="1", force_color="1")) == "false"
+    @test readchomp(color_cmd(flag="yes", no_color="1", force_color="1")) == "true"
 
     # --history-file
     @test readchomp(`$exename -E "Bool(Base.JLOptions().historyfile)"
@@ -409,9 +463,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     mktempdir() do dir
         helperdir = joinpath(@__DIR__, "testhelpers")
         inputfile = joinpath(helperdir, "coverage_file.jl")
-        expected = replace(read(joinpath(helperdir, "coverage_file.info.bad"), String),
-            "<FILENAME>" => realpath(inputfile))
-        expected_good = replace(read(joinpath(helperdir, "coverage_file.info"), String),
+        expected = replace(read(joinpath(helperdir, "coverage_file.info"), String),
             "<FILENAME>" => realpath(inputfile))
         covfile = replace(joinpath(dir, "coverage.info"), "%" => "%%")
         @test !isfile(covfile)
@@ -429,21 +481,18 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
         @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=user`) == "1"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
         @test readchomp(`$exename -E "Base.JLOptions().code_coverage" -L $inputfile
             --code-coverage=$covfile --code-coverage=all`) == "2"
         @test isfile(covfile)
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in specific file
         tfile = realpath(inputfile)
@@ -453,7 +502,6 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
 
         # Ask for coverage in directory
         tdir = dirname(realpath(inputfile))
@@ -463,16 +511,111 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         got = read(covfile, String)
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
-        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in current directory
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            # there may be atrailing separator here so use rstrip
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, rstrip(unsafe_string(Base.JLOptions().tracked_path), '/'))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+
+        # Ask for coverage in relative directory
+        tdir = dirname(realpath(inputfile))
+        cd(dirname(tdir)) do
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+
+        # Ask for coverage in relative directory with dot-dot notation
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@../testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
 
         # Ask for coverage in a different directory
         tdir = mktempdir() # a dir that contains no code
         @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
-            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
+            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(realpath(tdir))))"
         @test isfile(covfile)
         got = read(covfile, String)
         @test isempty(got)
         rm(covfile)
+
+        function coverage_info_for(src::String)
+            mktemp(dir) do srcfile, io
+                write(io, src); close(io)
+                outfile = tempname(dir, cleanup=false)*".info"
+                run(`$exename --code-coverage=$outfile $srcfile`)
+                result = read(outfile, String)
+                rm(outfile, force=true)
+                result
+            end
+        end
+        @test contains(coverage_info_for("""
+            function cov_bug(x, p)
+                if p > 2
+                    print("")  # runs
+                end
+                if Base.compilerbarrier(:const, false)
+                    println("Does not run")
+                end
+            end
+            function do_test()
+                cov_bug(5, 3)
+            end
+            do_test()
+            """), """
+            DA:2,1
+            DA:3,1
+            DA:5,1
+            DA:6,0
+            DA:9,1
+            DA:10,1
+            LH:5
+            LF:6
+            """)
+        @test contains(coverage_info_for("""
+            function cov_bug()
+                if Base.compilerbarrier(:const, true)
+                    if Base.compilerbarrier(:const, true)
+                        if Base.compilerbarrier(:const, false)
+                            println("Does not run")
+                        end
+                    else
+                        print("Does not run either")
+                    end
+                else
+                    print("")
+                end
+                return nothing
+            end
+            cov_bug()
+            """), """
+            DA:1,1
+            DA:2,1
+            DA:3,1
+            DA:4,1
+            DA:5,0
+            DA:8,0
+            DA:11,0
+            DA:13,1
+            LH:5
+            LF:8
+            """)
     end
 
     # --track-allocation
@@ -505,9 +648,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
         end
         if Sys.WORD_SIZE == 64
-            @test popfirst!(got) == "       48     []"
-        else
             @test popfirst!(got) == "       32     []"
+        else
+            @test popfirst!(got) == "       16     []"
         end
         @test popfirst!(got) == "        - end"
         @test popfirst!(got) == "        - f(1.23)"
@@ -528,30 +671,34 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2"
     # --print-before/--print-after with pass names is broken on Windows due to no-gnu-unique issues
     if !Sys.iswindows()
-        withenv("JULIA_LLVM_ARGS" => "--print-before=FinalLowerGC") do
+        withenv("JULIA_LLVM_ARGS" => "--print-before=BeforeOptimization") do
             let code = readchomperrors(`$exename -g0 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test !occursin("llvm.dbg.cu", code)
                 @test !occursin("int.jl", code)
-                @test !occursin("\"Int64\"", code)
+                @test !occursin("name: \"Int64\"", code)
             end
             let code = readchomperrors(`$exename -g1 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
-                @test !occursin("\"Int64\"", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
+                @test !occursin("name: \"Int64\"", code)
             end
             let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`)
                 @test code[1]
                 code = code[3]
                 @test occursin("llvm.module.flags", code)
                 @test occursin("llvm.dbg.cu", code)
-                @test occursin("int.jl", code)
-                @test occursin("\"Int64\"", code)
+                # TODO: consider moving test to llvmpasses as this fails on some platforms
+                # without clear reason
+                @test_skip occursin("int.jl", code)
+                @test occursin("name: \"Int64\"", code)
             end
         end
     end
@@ -636,10 +783,72 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_DEFAULT
     end
 
+    let JL_OPTIONS_TASK_METRICS_OFF = 0, JL_OPTIONS_TASK_METRICS_ON = 1
+        @test parse(Int,readchomp(`$exename -E
+            "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_OFF
+        @test parse(Int, readchomp(`$exename --task-metrics=yes -E
+            "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_ON
+        @test !parse(Bool, readchomp(`$exename  -E "current_task().metrics_enabled"`))
+        @test parse(Bool, readchomp(`$exename --task-metrics=yes -E "current_task().metrics_enabled"`))
+    end
+
     # --worker takes default / custom as argument (default/custom arguments
     # tested in test/parallel.jl)
     @test errors_not_signals(`$exename --worker=true`)
 
+    # --trace-compile
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
+    # --trace-compile-timing
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr --trace-compile-timing -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
+    # Base.@trace_compile (local version of the 2 above args)
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            """
+            f(x::Int) = 1
+            applyf(container) = f(container[1])
+            Base.@trace_compile @eval applyf([100])
+            Base.@trace_compile @eval applyf(Any[100])
+            f(::Bool) = 2
+            Base.@trace_compile @eval applyf([true])
+            Base.@trace_compile @eval applyf(Any[true])
+            """,
+            `$exename -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test length(findall(r"precompile\(", _stderr)) == 5
+        @test length(findall(r" # recompile", _stderr)) == 1
+    end
+
+    # --trace-dispatch
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-dispatch=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # test passing arguments
     mktempdir() do dir
         testfile, io = mktemp(dir)
@@ -971,6 +1180,61 @@ end
         @test lines[3] == "foo"
         @test lines[4] == "bar"
     end
-#heap-size-hint
-@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000"
+end
+
+@testset "heap size hint" begin
+    #heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.)
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)"
+
+    mem = ccall(:uv_get_total_memory, UInt64, ())
+    cmem = ccall(:uv_get_constrained_memory, UInt64, ())
+    if cmem > 0 && cmem < mem
+        mem = cmem
+    end
+    maxmem = parse(UInt64, readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=25% -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`))
+    hint = max(mem÷4, 251*1024*1024) - 250*1024*1024
+    MAX32HEAP = 1536 * 1024 * 1024
+    if Int === Int32 && hint > MAX32HEAP
+        hint = MAX32HEAP
+    end
+    @test abs(Float64(maxmem) - hint)/maxmem < 0.05
+
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=10M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$(1*1024*1024)"
+end
+
+## `Main.main` entrypoint
+
+# Basic usage
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println("hello")'`) == "hello"
+
+# Test ARGS with -e
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(args) = println(args)' a b`) == repr(["a", "b"])
+
+# Test import from module
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; using .Hello'`) == "hello"
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(args) = println("hello"); end; import .Hello'`) == ""
+
+# test --bug-report=rr
+if Sys.islinux() && Sys.ARCH in (:i686, :x86_64) # rr is only available on these platforms
+    mktempdir() do temp_trace_dir
+        @test success(pipeline(setenv(`$(Base.julia_cmd()) --bug-report=rr-local -e 'exit()'`,
+                                      "JULIA_RR_RECORD_ARGS" => "-n --nested=ignore",
+                                      "_RR_TRACE_DIR" => temp_trace_dir); #=stderr, stdout=#))
+    end
+end
+
+@testset "--heap-size-hint" begin
+    exename = `$(Base.julia_cmd())`
+    @test errors_not_signals(`$exename --heap-size-hint -e "exit(0)"`)
+    @testset "--heap-size-hint=$str" for str in ["asdf","","0","1.2vb","b","GB","2.5GB̂","1.2gb2","42gigabytes","5gig","2GiB","NaNt"]
+        @test errors_not_signals(`$exename --heap-size-hint=$str -e "exit(0)"`)
+    end
+    k = 1024
+    m = 1024k
+    g = 1024m
+    t = 1024g
+    @testset "--heap-size-hint=$str" for (str, val) in [("1", 1), ("1e7", 1e7), ("2.5e7", 2.5e7), ("1MB", 1m), ("2.5g", 2.5g), ("1e4kB", 1e4k),
+        ("1e100", typemax(UInt64)), ("1e500g", typemax(UInt64)), ("1e-12t", 1), ("500000000b", 500000000)]
+        @test parse(UInt64,read(`$exename --heap-size-hint=$str -E "Base.JLOptions().heap_size_hint"`, String)) == val
+    end
 end
diff --git a/test/combinatorics.jl b/test/combinatorics.jl
index f8fe4e0bd0829..527bd86963a6f 100644
--- a/test/combinatorics.jl
+++ b/test/combinatorics.jl
@@ -2,6 +2,9 @@
 
 using Random: randcycle
 
+isdefined(Main, :ImmutableArrays) || @eval Main include("testhelpers/ImmutableArrays.jl")
+using .Main.ImmutableArrays
+
 @testset "binomial" begin
     @test binomial(5,-1) == 0
     @test binomial(5,10) == 0
@@ -67,20 +70,19 @@ end
         @test isperm(T) == true
         @test isperm(K) == false
     end
+
+    # issue #47847
+    p = ImmutableArrays.ImmutableArray([2,3,1])
+    @test invperm(p) == invperm([2,3,1])
 end
 
 @testset "factorial" begin
-    @test factorial(7) == 5040
-    @test factorial(Int8(7)) == 5040
-    @test factorial(UInt8(7)) == 5040
-    @test factorial(Int16(7)) == 5040
-    @test factorial(UInt16(7)) == 5040
-    @test factorial(Int32(7)) == 5040
-    @test factorial(UInt32(7)) == 5040
-    @test factorial(Int64(7)) == 5040
-    @test factorial(UInt64(7)) == 5040
-    @test factorial(Int128(7)) == 5040
-    @test factorial(UInt128(7)) == 5040
+    for T = Base.uniontypes(Union{Base.Checked.SignedInt,Base.Checked.UnsignedInt})
+        @testset let T = T
+            @test factorial(T(7)) == 5040
+            @test Core.Compiler.is_foldable(Base.infer_effects(factorial, (T,)))
+        end
+    end
     @test factorial(0) == 1
     @test_throws DomainError factorial(-1)
     @test factorial(Int64(20)) == 2432902008176640000
@@ -122,3 +124,24 @@ end
         end
     end
 end
+
+@testset "permute!" begin
+    #simple array
+    @test permute!([1,2,3,4,5],[3,2,1,5,4]) == [3,2,1,5,4]
+    #empty array
+    @test permute!([],[]) == []
+    #single-element array
+    @test permute!([5],[1]) == [5]
+    #repeated elements in array
+    @test permute!([1,2,2,3,3,3],[2,1,3,5,4,6]) == [2,1,2,3,3,3]
+    #permutation vector contains zero
+    @test_throws BoundsError permute!([1,2,3],[0,1,2])
+    #permutation vector contains negative indices
+    @test_throws BoundsError permute!([1,2,3],[2,-1,1])
+    #permutation vector contains indices larger than array size
+    @test_throws BoundsError permute!([1,2,3],[2,4,1])
+    #permutation vector is empty
+    @test_throws DimensionMismatch permute!([1,2,3],[])
+    #array is empty
+    @test_throws BoundsError permute!([],[2,1])
+end
diff --git a/test/compileall.jl b/test/compileall.jl
new file mode 100644
index 0000000000000..beec0d6df49ab
--- /dev/null
+++ b/test/compileall.jl
@@ -0,0 +1,11 @@
+# This test builds a full system image, so it can take a little while.
+# We make it a separate test target here, so that it can run in parallel
+# with the rest of the tests.
+
+mktempdir() do dir
+    @test success(pipeline(`$(Base.julia_cmd()) --compile=all --strip-ir --output-o $(dir)/sys.o.a -e 'exit()'`, stderr=stderr)) skip=(Sys.WORD_SIZE == 32)
+    if isfile(joinpath(dir, "sys.o.a"))
+        Base.Linking.link_image(joinpath(dir, "sys.o.a"), joinpath(dir, "sys.so"))
+        @test success(`$(Base.julia_cmd()) -J $(dir)/sys.so -e 'Base.scrub_repl_backtrace(nothing); exit()'`)
+    end
+end
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
deleted file mode 100644
index 9db0a8903593d..0000000000000
--- a/test/compiler/AbstractInterpreter.jl
+++ /dev/null
@@ -1,355 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-const CC = Core.Compiler
-
-include("irutils.jl")
-include("newinterp.jl")
-
-# OverlayMethodTable
-# ==================
-
-import Base.Experimental: @MethodTable, @overlay
-
-@newinterp MTOverlayInterp
-@MethodTable(OverlayedMT)
-CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlayedMT)
-
-function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
-    if interp.meta !== nothing
-        # Core.println(remark)
-        push!(interp.meta, remark)
-    end
-    return nothing
-end
-
-strangesin(x) = sin(x)
-@overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
-
-# inference should use the overlayed method table
-@test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
-    strangesin(x)
-end |> only === Union{Float64,Nothing}
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    @invoke strangesin(x::Float64)
-end |> only === Union{Float64,Nothing}
-
-# effect analysis should figure out that the overlayed method is used
-@test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
-    strangesin(x)
-end |> !Core.Compiler.is_nonoverlayed
-@test Base.infer_effects((Any,); interp=MTOverlayInterp()) do x
-    @invoke strangesin(x::Float64)
-end |> !Core.Compiler.is_nonoverlayed
-
-# account for overlay possibility in unanalyzed matching method
-callstrange(::Float64) = strangesin(x)
-callstrange(::Nothing) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
-callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
-let interp = MTOverlayInterp(Set{Any}())
-    matches = Core.Compiler.findall(Tuple{typeof(callstrange),Any}, Core.Compiler.method_table(interp)).matches
-    @test Core.Compiler.length(matches) == 2
-    if Core.Compiler.getindex(matches, 1).method == which(callstrange, (Nothing,))
-        @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Core.Compiler.is_nonoverlayed
-        @test "Call inference reached maximally imprecise information. Bailing on." in interp.meta
-    else
-        @warn "`nonoverlayed` test for inference bailing out is skipped since the method match sort order is changed."
-    end
-end
-
-# but it should never apply for the native compilation
-@test Base.infer_effects((Float64,)) do x
-    strangesin(x)
-end |> Core.Compiler.is_nonoverlayed
-@test Base.infer_effects((Any,)) do x
-    @invoke strangesin(x::Float64)
-end |> Core.Compiler.is_nonoverlayed
-
-# fallback to the internal method table
-@test Base.return_types((Int,); interp=MTOverlayInterp()) do x
-    cos(x)
-end |> only === Float64
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    @invoke cos(x::Float64)
-end |> only === Float64
-
-# not fully covered overlay method match
-overlay_match(::Any) = nothing
-@overlay OverlayedMT overlay_match(::Int) = missing
-@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
-    overlay_match(x)
-end |> only === Union{Nothing,Missing}
-
-# partial concrete evaluation
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    isbitstype(Int) ? nothing : missing
-end |> only === Nothing
-Base.@assume_effects :terminates_globally function issue41694(x)
-    res = 1
-    1 < x < 20 || throw("bad")
-    while x > 1
-        res *= x
-        x -= 1
-    end
-    return res
-end
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    issue41694(3) == 6 ? nothing : missing
-end |> only === Nothing
-
-# disable partial concrete evaluation when tainted by any overlayed call
-Base.@assume_effects :total totalcall(f, args...) = f(args...)
-@test Base.return_types(; interp=MTOverlayInterp()) do
-    if totalcall(strangesin, 1.0) == cos(1.0)
-        return nothing
-    else
-        return missing
-    end
-end |> only === Nothing
-
-# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
-# https://github.com/JuliaLang/julia/issues/48097
-@newinterp Issue48097Interp
-@MethodTable Issue48097MT
-CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_world_counter(interp), Issue48097MT)
-CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
-@overlay Issue48097MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
-issue48097(; kwargs...) = return 42
-@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
-    issue48097(; a=1f0, b=1.0)
-end
-
-# AbstractLattice
-# ===============
-
-using Core: SlotNumber, Argument
-using Core.Compiler: slot_id, tmerge_fast_path
-import .CC:
-    AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice,
-    widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice,
-    widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn
-
-@newinterp TaintInterpreter
-struct TaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
-    parent::PL
-end
-CC.widenlattice(𝕃::TaintLattice) = 𝕃.parent
-CC.is_valid_lattice_norec(::TaintLattice, @nospecialize(elm)) = isa(elm, Taint)
-
-struct InterTaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
-    parent::PL
-end
-CC.widenlattice(𝕃::InterTaintLattice) = 𝕃.parent
-CC.is_valid_lattice_norec(::InterTaintLattice, @nospecialize(elm)) = isa(elm, InterTaint)
-
-const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}}
-
-CC.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance))
-CC.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance))
-CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance)
-
-struct Taint
-    typ
-    slots::BitSet
-    function Taint(@nospecialize(typ), slots::BitSet)
-        if typ isa Taint
-            slots = typ.slots ∪ slots
-            typ = typ.typ
-        end
-        return new(typ, slots)
-    end
-end
-Taint(@nospecialize(typ), id::Int) = Taint(typ, push!(BitSet(), id))
-function Base.:(==)(a::Taint, b::Taint)
-    return a.typ == b.typ && a.slots == b.slots
-end
-
-struct InterTaint
-    typ
-    slots::BitSet
-    function InterTaint(@nospecialize(typ), slots::BitSet)
-        if typ isa InterTaint
-            slots = typ.slots ∪ slots
-            typ = typ.typ
-        end
-        return new(typ, slots)
-    end
-end
-InterTaint(@nospecialize(typ), id::Int) = InterTaint(typ, push!(BitSet(), id))
-function Base.:(==)(a::InterTaint, b::InterTaint)
-    return a.typ == b.typ && a.slots == b.slots
-end
-
-const AnyTaint = Union{Taint, InterTaint}
-
-function CC.tmeet(𝕃::AnyTaintLattice, @nospecialize(v), @nospecialize(t::Type))
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(v, T)
-        v = v.typ
-    end
-    return tmeet(widenlattice(𝕃), v, t)
-end
-function CC.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
-    r = tmerge_fast_path(𝕃, typea, typeb)
-    r !== nothing && return r
-    # type-lattice for Taint
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(typea, T)
-        if isa(typeb, T)
-            return T(
-                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
-                typea.slots ∪ typeb.slots)
-        else
-            typea = typea.typ
-        end
-    elseif isa(typeb, T)
-        typeb = typeb.typ
-    end
-    return tmerge(widenlattice(𝕃), typea, typeb)
-end
-function CC.:⊑(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
-    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
-    if isa(typea, T)
-        if isa(typeb, T)
-            typea.slots ⊆ typeb.slots || return false
-            return ⊑(widenlattice(𝕃), typea.typ, typeb.typ)
-        end
-        typea = typea.typ
-    elseif isa(typeb, T)
-        return false
-    end
-    return ⊑(widenlattice(𝕃), typea, typeb)
-end
-CC.widenconst(taint::AnyTaint) = widenconst(taint.typ)
-
-function CC.abstract_eval_special_value(interp::TaintInterpreter,
-    @nospecialize(e), vtypes::CC.VarTable, sv::CC.InferenceState)
-    ret = @invoke CC.abstract_eval_special_value(interp::CC.AbstractInterpreter,
-        e::Any, vtypes::CC.VarTable, sv::CC.InferenceState)
-    if isa(e, SlotNumber) || isa(e, Argument)
-        return Taint(ret, slot_id(e))
-    end
-    return ret
-end
-
-function CC.widenreturn(𝕃::InferenceLattice{<:InterTaintLattice}, @nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::CC.VarTable)
-    if isa(rt, Taint)
-        return InterTaint(rt.typ, BitSet((id for id in rt.slots if id ≤ nargs)))
-    end
-    return CC.widenreturn(widenlattice(𝕃), rt, bestguess, nargs, slottypes, changes)
-end
-
-@test CC.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
-
-# code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
-
-# External lattice without `Conditional`
-
-import .CC:
-    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice,
-    typeinf_lattice, ipo_lattice, optimizer_lattice
-
-@newinterp NonconditionalInterpreter
-CC.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
-CC.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
-CC.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice())
-
-@test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x
-    c = isa(x, Int) || isa(x, Float64)
-    if c
-        return x
-    else
-        return nothing
-    end
-end |> only === Any
-
-# CallInfo × inlining
-# ===================
-
-@newinterp NoinlineInterpreter
-noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
-
-import .CC: CallInfo
-
-struct NoinlineCallInfo <: CallInfo
-    info::CallInfo # wrapped call
-end
-CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info)
-CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx)
-CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx)
-
-function CC.abstract_call(interp::NoinlineInterpreter,
-    arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
-        arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    if sv.mod in noinline_modules(interp)
-        return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info))
-    end
-    return ret
-end
-function CC.inlining_policy(interp::NoinlineInterpreter,
-    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt8, mi::MethodInstance,
-    argtypes::Vector{Any})
-    if isa(info, NoinlineCallInfo)
-        return nothing
-    end
-    return @invoke CC.inlining_policy(interp::CC.AbstractInterpreter,
-        src::Any, info::CallInfo, stmt_flag::UInt8, mi::MethodInstance,
-        argtypes::Vector{Any})
-end
-
-@inline function inlined_usually(x, y, z)
-    return x * y + z
-end
-
-# check if the inlining algorithm works as expected
-let src = code_typed1((Float64,Float64,Float64)) do x, y, z
-        inlined_usually(x, y, z)
-    end
-    @test count(isinvoke(:inlined_usually), src.code) == 0
-    @test count(iscall((src, inlined_usually)), src.code) == 0
-end
-let NoinlineModule = Module()
-    interp = NoinlineInterpreter(Set((NoinlineModule,)))
-
-    # this anonymous function's context is Main -- it should be inlined as usual
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # it should work for cached results
-    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
-    mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
-    @test haskey(interp.code_cache.dict, mi)
-    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
-            inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # now the context module is `NoinlineModule` -- it should not be inlined
-    let src = @eval NoinlineModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 1
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-
-    # the context module is totally irrelevant -- it should be inlined as usual
-    OtherModule = Module()
-    let src = @eval OtherModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
-            $inlined_usually(x, y, z)
-        end
-        @test count(isinvoke(:inlined_usually), src.code) == 0
-        @test count(iscall((src, inlined_usually)), src.code) == 0
-    end
-end
-
-# Make sure that Core.Compiler has enough NamedTuple infrastructure
-# to properly give error messages for basic kwargs...
-Core.eval(Core.Compiler, quote f(;a=1) = a end)
-@test_throws MethodError Core.Compiler.f(;b=2)
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
deleted file mode 100644
index bb3273b3e707a..0000000000000
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ /dev/null
@@ -1,366 +0,0 @@
-module EAUtils
-
-export code_escapes, @code_escapes, __clear_cache!
-
-const CC = Core.Compiler
-const EA = CC.EscapeAnalysis
-
-# entries
-# -------
-
-import Base: unwrap_unionall, rewrap_unionall
-import InteractiveUtils: gen_call_with_extracted_types_and_kwargs
-
-"""
-    @code_escapes [options...] f(args...)
-
-Evaluates the arguments to the function call, determines its types, and then calls
-[`code_escapes`](@ref) on the resulting expression.
-As with `@code_typed` and its family, any of `code_escapes` keyword arguments can be given
-as the optional arguments like `@code_escapes optimize=false myfunc(myargs...)`.
-"""
-macro code_escapes(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :code_escapes, ex0)
-end
-
-"""
-    code_escapes(f, argtypes=Tuple{}; [debuginfo::Symbol = :none], [optimize::Bool = true]) -> result::EscapeResult
-
-Runs the escape analysis on optimized IR of a generic function call with the given type signature.
-
-# Keyword Arguments
-
-- `optimize::Bool = true`:
-  if `true` returns escape information of post-inlining IR (used for local optimization),
-  otherwise returns escape information of pre-inlining IR (used for interprocedural escape information generation)
-- `debuginfo::Symbol = :none`:
-  controls the amount of code metadata present in the output, possible options are `:none` or `:source`.
-"""
-function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
-                      world::UInt = get_world_counter(),
-                      interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world),
-                      debuginfo::Symbol = :none,
-                      optimize::Bool = true)
-    tt = Base.signature_type(f, types)
-    interp = EscapeAnalyzer(interp, tt, optimize)
-    results = Base.code_typed_by_type(tt; optimize=true, world, interp)
-    isone(length(results)) || throw(ArgumentError("`code_escapes` only supports single analysis result"))
-    return EscapeResult(interp.ir, interp.state, interp.linfo, debuginfo === :source)
-end
-
-# in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
-__clear_cache!() = empty!(GLOBAL_CODE_CACHE)
-
-# AbstractInterpreter
-# -------------------
-
-# imports
-import .CC:
-    AbstractInterpreter, NativeInterpreter, WorldView, WorldRange,
-    InferenceParams, OptimizationParams, get_world_counter, get_inference_cache, code_cache
-# usings
-import Core:
-    CodeInstance, MethodInstance, CodeInfo
-import .CC:
-    InferenceResult, OptimizationState, IRCode, copy as cccopy,
-    @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!,
-    adce_pass!, JLOptions, verify_ir, verify_linetable
-import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable
-
-# when working outside of Core.Compiler,
-# cache entire escape state for later inspection and debugging
-struct EscapeCache
-    cache::ArgEscapeCache
-    state::EscapeState # preserved just for debugging purpose
-    ir::IRCode         # preserved just for debugging purpose
-end
-
-mutable struct EscapeAnalyzer{State} <: AbstractInterpreter
-    native::NativeInterpreter
-    cache::IdDict{InferenceResult,EscapeCache}
-    entry_tt
-    optimize::Bool
-    ir::IRCode
-    state::State
-    linfo::MethodInstance
-    EscapeAnalyzer(native::NativeInterpreter, @nospecialize(tt), optimize::Bool) =
-        new{EscapeState}(native, IdDict{InferenceResult,EscapeCache}(), tt, optimize)
-end
-
-CC.InferenceParams(interp::EscapeAnalyzer)    = InferenceParams(interp.native)
-CC.OptimizationParams(interp::EscapeAnalyzer) = OptimizationParams(interp.native)
-CC.get_world_counter(interp::EscapeAnalyzer)  = get_world_counter(interp.native)
-
-CC.get_inference_cache(interp::EscapeAnalyzer) = get_inference_cache(interp.native)
-
-const GLOBAL_CODE_CACHE = IdDict{MethodInstance,CodeInstance}()
-
-function CC.code_cache(interp::EscapeAnalyzer)
-    worlds = WorldRange(get_world_counter(interp))
-    return WorldView(GlobalCache(), worlds)
-end
-
-struct GlobalCache end
-
-CC.haskey(wvc::WorldView{GlobalCache}, mi::MethodInstance) = haskey(GLOBAL_CODE_CACHE, mi)
-
-CC.get(wvc::WorldView{GlobalCache}, mi::MethodInstance, default) = get(GLOBAL_CODE_CACHE, mi, default)
-
-CC.getindex(wvc::WorldView{GlobalCache}, mi::MethodInstance) = getindex(GLOBAL_CODE_CACHE, mi)
-
-function CC.setindex!(wvc::WorldView{GlobalCache}, ci::CodeInstance, mi::MethodInstance)
-    GLOBAL_CODE_CACHE[mi] = ci
-    add_callback!(mi) # register the callback on invalidation
-    return nothing
-end
-
-function add_callback!(linfo)
-    if !isdefined(linfo, :callbacks)
-        linfo.callbacks = Any[invalidate_cache!]
-    else
-        if !any(@nospecialize(cb)->cb===invalidate_cache!, linfo.callbacks)
-            push!(linfo.callbacks, invalidate_cache!)
-        end
-    end
-    return nothing
-end
-
-function invalidate_cache!(replaced, max_world, depth = 0)
-    delete!(GLOBAL_CODE_CACHE, replaced)
-
-    if isdefined(replaced, :backedges)
-        for mi in replaced.backedges
-            mi = mi::MethodInstance
-            if !haskey(GLOBAL_CODE_CACHE, mi)
-                continue # otherwise fall into infinite loop
-            end
-            invalidate_cache!(mi, max_world, depth+1)
-        end
-    end
-    return nothing
-end
-
-function CC.optimize(interp::EscapeAnalyzer,
-    opt::OptimizationState, caller::InferenceResult)
-    ir = run_passes_with_ea(interp, opt.src, opt, caller)
-    return CC.finish(interp, opt, ir, caller)
-end
-
-function CC.cache_result!(interp::EscapeAnalyzer, caller::InferenceResult)
-    if haskey(interp.cache, caller)
-        GLOBAL_ESCAPE_CACHE[caller.linfo] = interp.cache[caller]
-    end
-    return @invoke CC.cache_result!(interp::AbstractInterpreter, caller::InferenceResult)
-end
-
-const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeCache}()
-
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-function cache_escapes!(interp::EscapeAnalyzer,
-    caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-    cache = ArgEscapeCache(estate)
-    ecache = EscapeCache(cache, estate, cacheir)
-    interp.cache[caller] = ecache
-    return cache
-end
-
-function get_escape_cache(interp::EscapeAnalyzer)
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            ecache = get(interp.cache, linfo, nothing)
-        else
-            ecache = get(GLOBAL_ESCAPE_CACHE, linfo, nothing)
-        end
-        return ecache !== nothing ? ecache.cache : nothing
-    end
-end
-
-function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::OptimizationState,
-    caller::InferenceResult)
-    @timeit "convert"   ir = convert_to_ircode(ci, sv)
-    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @timeit "compact 1" ir = compact!(ir)
-    nargs = let def = sv.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
-    local state
-    if is_ipo_profitable(ir, nargs) || caller.linfo.specTypes === interp.entry_tt
-        try
-            @timeit "[IPO EA]" begin
-                state = analyze_escapes(ir, nargs, false, get_escape_cache(interp))
-                cache_escapes!(interp, caller, state, cccopy(ir))
-            end
-        catch err
-            @error "error happened within [IPO EA], inspect `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
-    end
-    if caller.linfo.specTypes === interp.entry_tt && !interp.optimize
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
-    # @timeit "verify 2" verify_ir(ir)
-    @timeit "compact 2" ir = compact!(ir)
-    if caller.linfo.specTypes === interp.entry_tt && interp.optimize
-        try
-            @timeit "[Local EA]" state = analyze_escapes(ir, nargs, true, get_escape_cache(interp))
-        catch err
-            @error "error happened within [Local EA], inspect `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "SROA"      ir = sroa_pass!(ir)
-    @timeit "ADCE"      ir = adce_pass!(ir)
-    @timeit "compact 3" ir = compact!(ir)
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    return ir
-end
-
-# printing
-# --------
-
-import Core: Argument, SSAValue
-import .CC: widenconst, singleton_type
-
-Base.getindex(estate::EscapeState, @nospecialize(x)) = CC.getindex(estate, x)
-
-function get_name_color(x::EscapeInfo, symbol::Bool = false)
-    getname(x) = string(nameof(x))
-    if x === EA.⊥
-        name, color = (getname(EA.NotAnalyzed), "◌"), :plain
-    elseif EA.has_no_escape(EA.ignore_argescape(x))
-        if EA.has_arg_escape(x)
-            name, color = (getname(EA.ArgEscape), "✓"), :cyan
-        else
-            name, color = (getname(EA.NoEscape), "✓"), :green
-        end
-    elseif EA.has_all_escape(x)
-        name, color = (getname(EA.AllEscape), "X"), :red
-    elseif EA.has_return_escape(x)
-        name = (getname(EA.ReturnEscape), "↑")
-        color = EA.has_thrown_escape(x) ? :yellow : :blue
-    else
-        name = (nothing, "*")
-        color = EA.has_thrown_escape(x) ? :yellow : :bold
-    end
-    name = symbol ? last(name) : first(name)
-    if name !== nothing && !isa(x.AliasInfo, Bool)
-        name = string(name, "′")
-    end
-    return name, color
-end
-
-# pcs = sprint(show, collect(x.EscapeSites); context=:limit=>true)
-function Base.show(io::IO, x::EscapeInfo)
-    name, color = get_name_color(x)
-    if isnothing(name)
-        @invoke show(io::IO, x::Any)
-    else
-        printstyled(io, name; color)
-    end
-end
-function Base.show(io::IO, ::MIME"application/prs.juno.inline", x::EscapeInfo)
-    name, color = get_name_color(x)
-    if isnothing(name)
-        return x # use fancy tree-view
-    else
-        printstyled(io, name; color)
-    end
-end
-
-struct EscapeResult
-    ir::IRCode
-    state::EscapeState
-    linfo::Union{Nothing,MethodInstance}
-    source::Bool
-    function EscapeResult(ir::IRCode, state::EscapeState,
-        linfo::Union{Nothing,MethodInstance} = nothing,
-        source::Bool=false)
-        return new(ir, state, linfo, source)
-    end
-end
-Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
-@eval Base.iterate(res::EscapeResult, state=1) =
-    return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
-
-Base.show(io::IO, cached::EscapeCache) = show(io, EscapeResult(cached.ir, cached.state, nothing))
-
-# adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
-function print_with_info(io::IO, (; ir, state, linfo, source)::EscapeResult)
-    # print escape information on SSA values
-    function preprint(io::IO)
-        ft = ir.argtypes[1]
-        f = singleton_type(ft)
-        if f === nothing
-            f = widenconst(ft)
-        end
-        print(io, f, '(')
-        for i in 1:state.nargs
-            arg = state[Argument(i)]
-            i == 1 && continue
-            c, color = get_name_color(arg, true)
-            printstyled(io, c, ' ', '_', i, "::", ir.argtypes[i]; color)
-            i ≠ state.nargs && print(io, ", ")
-        end
-        print(io, ')')
-        if !isnothing(linfo)
-            def = linfo.def
-            printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
-        end
-        println(io)
-    end
-
-    # print escape information on SSA values
-    # nd = ndigits(length(ssavalues))
-    function preprint(io::IO, idx::Int)
-        c, color = get_name_color(state[SSAValue(idx)], true)
-        # printstyled(io, lpad(idx, nd), ' ', c, ' '; color)
-        printstyled(io, rpad(c, 2), ' '; color)
-    end
-
-    print_with_info(preprint, (args...)->nothing, io, ir, source)
-end
-
-function print_with_info(preprint, postprint, io::IO, ir::IRCode, source::Bool)
-    io = IOContext(io, :displaysize=>displaysize(io))
-    used = Base.IRShow.stmts_used(io, ir)
-    if source
-        line_info_preprinter = function (io::IO, indent::String, idx::Int)
-            r = Base.IRShow.inline_linfo_printer(ir)(io, indent, idx)
-            idx ≠ 0 && preprint(io, idx)
-            return r
-        end
-    else
-        line_info_preprinter = Base.IRShow.lineinfo_disabled
-    end
-    line_info_postprinter = Base.IRShow.default_expr_type_printer
-    preprint(io)
-    bb_idx_prev = bb_idx = 1
-    for idx = 1:length(ir.stmts)
-        preprint(io, idx)
-        bb_idx = Base.IRShow.show_ir_stmt(io, ir, idx, line_info_preprinter, line_info_postprinter, used, ir.cfg, bb_idx)
-        postprint(io, idx, bb_idx != bb_idx_prev)
-        bb_idx_prev = bb_idx
-    end
-    max_bb_idx_size = ndigits(length(ir.cfg.blocks))
-    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
-    postprint(io)
-    return nothing
-end
-
-end # module EAUtils
diff --git a/test/compiler/EscapeAnalysis/interprocedural.jl b/test/compiler/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 756e5489ed637..0000000000000
--- a/test/compiler/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,262 +0,0 @@
-# IPO EA Test
-# ===========
-# EA works on pre-inlining IR
-
-include(normpath(@__DIR__, "setup.jl"))
-
-# callsites
-# ---------
-
-noescape(a) = nothing
-noescape(a, b) = nothing
-function global_escape!(x)
-    GR[] = x
-    return nothing
-end
-union_escape!(x) = global_escape!(x)
-union_escape!(x::SafeRef) = nothing
-union_escape!(x::SafeRefs) = nothing
-Base.@constprop :aggressive function conditional_escape!(cnd, x)
-    cnd && global_escape!(x)
-    return nothing
-end
-
-# MethodMatchInfo -- global cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return noescape(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity(x)
-        return nothing
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return identity(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return Ref(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        r = Ref{SafeRef{String}}()
-        r[] = x
-        return r
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        global_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# UnionSplitInfo
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? s : SafeRef(s)
-        union_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(3)]) # s
-end
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? SafeRef(s) : SafeRefs(s, s)
-        union_escape!(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# ConstCallInfo -- local cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return conditional_escape!(false, x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# InvokeCallInfo
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke noescape(x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke conditional_escape!(false::Any, x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-
-# MethodError
-# -----------
-# accounts for ThrownEscape via potential MethodError
-
-# no method error
-identity_if_string(x::SafeRef) = nothing
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test !has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# method ambiguity error
-ambig_error_test(a::SafeRef, b) = nothing
-ambig_error_test(a, b::SafeRef) = nothing
-ambig_error_test(a, b) = nothing
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        ambig_error_test(x, y)
-    end
-    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)  # x
-    @test has_thrown_escape(result.state[Argument(3)], i)  # y
-    @test !has_return_escape(result.state[Argument(2)], r)  # x
-    @test !has_return_escape(result.state[Argument(3)], r)  # y
-end
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        try
-            ambig_error_test(x, y)
-        catch err
-            global GV = err
-        end
-    end
-    @test has_all_escape(result.state[Argument(2)])  # x
-    @test has_all_escape(result.state[Argument(3)])  # y
-end
-
-# Local EA integration
-# --------------------
-
-# propagate escapes imposed on call arguments
-
-# FIXME handle _apply_iterate
-# FIXME currently we can't prove the effect-freeness of `getfield(RefValue{String}, :x)`
-# because of this check https://github.com/JuliaLang/julia/blob/94b9d66b10e8e3ebdb268e4be5f7e1f43079ad4e/base/compiler/tfuncs.jl#L745
-# and thus it leads to the following two broken tests
-
-@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
-let result = code_escapes() do
-        broadcast_noescape1(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline broadcast_noescape2(b) = broadcast(identity, b)
-let result = code_escapes() do
-        broadcast_noescape2(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline allescape_argument(a) = (global GV = a) # obvious escape
-let result = code_escapes() do
-        allescape_argument(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_all_escape(result.state[SSAValue(i)])
-end
-# if we can't determine the matching method statically, we should be conservative
-let result = code_escapes((Ref{Any},)) do a
-        may_exist(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Ref{Any},)) do a
-        Base.@invokelatest broadcast_noescape1(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-
-# handling of simple union-split (just exploit the inliner's effort)
-@noinline unionsplit_noescape(a)      = string(nothing)
-@noinline unionsplit_noescape(a::Int) = a + 10
-let result = code_escapes((Union{Int,Nothing},)) do x
-        s = SafeRef{Union{Int,Nothing}}(x)
-        unionsplit_noescape(s[])
-        return nothing
-    end
-    inds = findall(isnew, result.ir.stmts.inst) # find allocation statement
-    @assert !isempty(inds)
-    for i in inds
-        @test has_no_escape(result.state[SSAValue(i)])
-    end
-end
-
-@noinline function unused_argument(a)
-    println("prevent inlining")
-    return Base.inferencebarrier(nothing)
-end
-let result = code_escapes() do
-        a = Ref("foo") # shouldn't be "return escape"
-        b = unused_argument(a)
-        nothing
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-
-    result = code_escapes() do
-        a = Ref("foo") # still should be "return escape"
-        b = unused_argument(a)
-        return a
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-
-# should propagate escape information imposed on return value to the aliased call argument
-@noinline returnescape_argument(a) = (println("prevent inlining"); a)
-let result = code_escapes() do
-        obj = Ref("foo")           # should be "return escape"
-        ret = returnescape_argument(obj)
-        return ret                 # alias of `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
-let result = code_escapes() do
-        obj = Ref("foo")              # better to not be "return escape"
-        ret = noreturnescape_argument(obj)
-        return ret                    # must not alias to `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-end
diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl
deleted file mode 100644
index 27e4fdeee28c6..0000000000000
--- a/test/compiler/EscapeAnalysis/local.jl
+++ /dev/null
@@ -1,2205 +0,0 @@
-# Local EA Test
-# =============
-# EA works on post-inlining IR
-
-include(normpath(@__DIR__, "setup.jl"))
-
-@testset "basics" begin
-    let # arg return
-        result = code_escapes((Any,)) do a # return to caller
-            return nothing
-        end
-        @test has_arg_escape(result.state[Argument(2)])
-        # return
-        result = code_escapes((Any,)) do a
-            return a
-        end
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_arg_escape(result.state[Argument(1)]) # self
-        @test !has_return_escape(result.state[Argument(1)], i) # self
-        @test has_arg_escape(result.state[Argument(2)]) # a
-        @test has_return_escape(result.state[Argument(2)], i) # a
-    end
-    let # global store
-        result = code_escapes((Any,)) do a
-            global GV = a
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let # global load
-        result = code_escapes() do
-            global GV
-            return GV
-        end
-        i = only(findall(has_return_escape, map(i->result.state[SSAValue(i)], 1:length(result.ir.stmts))))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # global store / load (https://github.com/aviatesk/EscapeAnalysis.jl/issues/56)
-        result = code_escapes((Any,)) do s
-            global GV
-            GV = s
-            return GV
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-    end
-    let # :gc_preserve_begin / :gc_preserve_end
-        result = code_escapes((String,)) do s
-            m = SafeRef(s)
-            GC.@preserve m begin
-                return nothing
-            end
-        end
-        i = findfirst(isT(SafeRef{String}), result.ir.stmts.type) # find allocation statement
-        @test !isnothing(i)
-        @test has_no_escape(result.state[SSAValue(i)])
-    end
-    let # :isdefined
-        result = code_escapes((String, Bool, )) do a, b
-            if b
-                s = Ref(a)
-            end
-            return @isdefined(s)
-        end
-        i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
-        @test isnothing(i) || has_no_escape(result.state[SSAValue(i)])
-    end
-    let # ϕ-node
-        result = code_escapes((Bool,Any,Any)) do cond, a, b
-            c = cond ? a : b # ϕ(a, b)
-            return c
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], i) # a
-        @test has_return_escape(result.state[Argument(4)], i) # b
-    end
-    let # π-node
-        result = code_escapes((Any,)) do a
-            if isa(a, Regex) # a::π(Regex)
-                return a
-            end
-            return nothing
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.inst)
-        @test any(findall(isreturn, result.ir.stmts.inst)) do i
-            has_return_escape(result.state[Argument(2)], i)
-        end
-    end
-    let # φᶜ-node / ϒ-node
-        result = code_escapes((Any,String)) do a, b
-            local x::String
-            try
-                x = a
-            catch err
-                x = b
-            end
-            return x
-        end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.inst)
-        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], i)
-        @test has_return_escape(result.state[Argument(3)], i)
-    end
-    let # branching
-        result = code_escapes((Any,Bool,)) do a, c
-            if c
-                return nothing # a doesn't escape in this branch
-            else
-                return a # a escapes to a caller
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let # loop
-        result = code_escapes((Int,)) do n
-            c = SafeRef{Bool}(false)
-            while n > 0
-                rand(Bool) && return c
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)])
-    end
-    let # try/catch
-        result = code_escapes((Any,)) do a
-            try
-                nothing
-            catch err
-                return a # return escape
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            try
-                nothing
-            finally
-                return a # return escape
-            end
-        end
-        @test has_return_escape(result.state[Argument(2)])
-    end
-    let # :foreigncall
-        result = code_escapes((Any,)) do x
-            ccall(:some_ccall, Any, (Any,), x)
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-end
-
-let # simple allocation
-    result = code_escapes((Bool,)) do c
-        mm = SafeRef{Bool}(c) # just allocated, never escapes
-        return mm[] ? nothing : 1
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_no_escape(result.state[SSAValue(i)])
-end
-
-@testset "builtins" begin
-    let # throw
-        r = code_escapes((Any,)) do a
-            throw(a)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-    end
-
-    let # implicit throws
-        r = code_escapes((Any,)) do a
-            getfield(a, :may_not_field)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-
-        r = code_escapes((Any,)) do a
-            sizeof(a)
-        end
-        @test has_thrown_escape(r.state[Argument(2)])
-    end
-
-    let # :===
-        result = code_escapes((Bool, SafeRef{String})) do cond, s
-            m = cond ? s : nothing
-            c = m === nothing
-            return c
-        end
-        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-    end
-
-    let # sizeof
-        result = code_escapes((Vector{Any},)) do xs
-            sizeof(xs)
-        end
-        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-    end
-
-    let # ifelse
-        result = code_escapes((Bool,)) do c
-            r = ifelse(c, Ref("yes"), Ref("no"))
-            return r
-        end
-        inds = findall(isnew, result.ir.stmts.inst)
-        @assert !isempty(inds)
-        for i in inds
-            @test has_return_escape(result.state[SSAValue(i)])
-        end
-    end
-    let # ifelse (with constant condition)
-        result = code_escapes() do
-            r = ifelse(true, Ref("yes"), Ref(nothing))
-            return r
-        end
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)])
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{Nothing})(result.ir.stmts.type[i])
-                @test has_no_escape(result.state[SSAValue(i)])
-            end
-        end
-    end
-
-    let # typeassert
-        result = code_escapes((Any,)) do x
-            y = x::String
-            return y
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test !has_all_escape(result.state[Argument(2)])
-    end
-
-    let # isdefined
-        result = code_escapes((Any,)) do x
-            isdefined(x, :foo) ? x : throw("undefined")
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test !has_all_escape(result.state[Argument(2)])
-
-        result = code_escapes((Module,)) do m
-            isdefined(m, 10) # throws
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-end
-
-@testset "flow-sensitivity" begin
-    # ReturnEscape
-    let result = code_escapes((Bool,)) do cond
-            r = Ref("foo")
-            if cond
-                return cond
-            end
-            return r
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst)
-        @assert length(rts) == 2
-        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
-    end
-    let result = code_escapes((Bool,)) do cond
-            r = Ref("foo")
-            cnt = 0
-            while rand(Bool)
-                cnt += 1
-                rand(Bool) && return r
-            end
-            rand(Bool) && return r
-            return cnt
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst) # return statement
-        @assert length(rts) == 3
-        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
-    end
-end
-
-@testset "escape through exceptions" begin
-    M = @eval Module() begin
-        unsafeget(x) = isassigned(x) ? x[] : throw(x)
-        @noinline function escape_rethrow!()
-            try
-                rethrow()
-            catch err
-                GR[] = err
-            end
-        end
-        @noinline function escape_current_exceptions!()
-            excs = Base.current_exceptions()
-            GR[] = excs
-        end
-        const GR = Ref{Any}()
-        @__MODULE__
-    end
-
-    let # simple: return escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err
-                ret = err
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)])
-    end
-
-    let # simple: global escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret # prevent DCE
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err
-                global GV = err
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-
-    let # account for possible escapes via nested throws
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            try
-                try
-                    unsafeget(r)
-                catch err1
-                    throw(err1)
-                end
-            catch err2
-                GR[] = err2
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            try
-                try
-                    unsafeget(r)
-                catch err1
-                    rethrow(err1)
-                end
-            catch err2
-                GR[] = err2
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                escape_rethrow!()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `rethrow`
-        result = @eval M $code_escapes() do
-            local t
-            try
-                r = Ref{String}()
-                t = unsafeget(r)
-            catch err
-                t = typeof(err)
-                escape_rethrow!()
-            end
-            return t
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `Base.current_exceptions`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                GR[] = Base.current_exceptions()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-    let # account for possible escapes via `Base.current_exceptions`
-        result = @eval M $code_escapes() do
-            try
-                r = Ref{String}()
-                unsafeget(r)
-            catch
-                escape_current_exceptions!()
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-    end
-
-    let # contextual: escape information imposed on `err` shouldn't propagate to `r2`, but only to `r1`
-        result = @eval M $code_escapes() do
-            r1 = Ref{String}()
-            r2 = Ref{String}()
-            local ret
-            try
-                s1 = unsafeget(r1)
-                ret = sizeof(s1)
-            catch err
-                global GV = err
-            end
-            s2 = unsafeget(r2)
-            return s2, r2
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test !has_all_escape(result.state[SSAValue(i2)])
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-    end
-
-    # XXX test cases below are currently broken because of the technical reason described in `escape_exception!`
-
-    let # limited propagation: exception is caught within a frame => doesn't escape to a caller
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch
-                ret = nothing
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let # sequential: escape information imposed on `err1` and `err2 should propagate separately
-        result = @eval M $code_escapes() do
-            r1 = Ref{String}()
-            r2 = Ref{String}()
-            local ret
-            try
-                s1 = unsafeget(r1)
-                ret = sizeof(s1)
-            catch err1
-                global GV = err1
-            end
-            try
-                s2 = unsafeget(r2)
-                ret = sizeof(s2)
-            catch err2
-                ret = err2
-            end
-            return ret
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-        @test_broken !has_all_escape(result.state[SSAValue(i2)])
-    end
-    let # nested: escape information imposed on `inner` shouldn't propagate to `s`
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                try
-                    ret = sizeof(s)
-                catch inner
-                    return inner
-                end
-            catch outer
-                ret = nothing
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)])
-    end
-    let # merge: escape information imposed on `err1` and `err2 should be merged
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err1
-                return err1
-            end
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            catch err2
-                return err2
-            end
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rs = findall(isreturn, result.ir.stmts.inst)
-        @test_broken !has_all_escape(result.state[SSAValue(i)])
-        for r in rs
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let # no exception handling: should keep propagating the escape
-        result = @eval M $code_escapes() do
-            r = Ref{String}()
-            local ret
-            try
-                s = unsafeget(r)
-                ret = sizeof(s)
-            finally
-                if !@isdefined(ret)
-                    ret = 42
-                end
-            end
-            return ret
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-    end
-end
-
-@testset "field analysis / alias analysis" begin
-    # escaped allocations
-    # -------------------
-
-    # escaped object should escape its fields as well
-    let result = code_escapes((Any,)) do a
-            global GV = SafeRef{Any}(a)
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            global GV = (a,)
-            nothing
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            o0 = SafeRef{Any}(a)
-            global GV = SafeRef(o0)
-            nothing
-        end
-        is = findall(isnew, result.ir.stmts.inst)
-        @test length(is) == 2
-        i0, i1 = is
-        @test has_all_escape(result.state[SSAValue(i0)])
-        @test has_all_escape(result.state[SSAValue(i1)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do a
-            t0 = (a,)
-            global GV = (t0,)
-            nothing
-        end
-        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.inst)
-        @assert length(inds) == 2
-        for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    # global escape through `setfield!`
-    let result = code_escapes((Any,)) do a
-            r = SafeRef{Any}(:init)
-            global GV = r
-            r[] = a
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(a)
-            global GV = r
-            r[] = b
-            nothing
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test has_all_escape(result.state[SSAValue(i)])
-        @test has_all_escape(result.state[Argument(2)]) # a
-        @test has_all_escape(result.state[Argument(3)]) # b
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef{String}("Rx")
-            $code_escapes((String,)) do s
-                Rx[] = s
-                Core.sizeof(Rx[])
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef{String}("Rx")
-            $code_escapes((String,)) do s
-                setfield!(Rx, :x, s)
-                Core.sizeof(Rx[])
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    let M = EATModule()
-        @eval M module ___xxx___
-            import ..SafeRef
-            const Rx = SafeRef("Rx")
-        end
-        result = @eval M begin
-            $code_escapes((String,)) do s
-                rx = getfield(___xxx___, :Rx)
-                rx[] = s
-                nothing
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-
-    # field escape
-    # ------------
-
-    # field escape should propagate to :new arguments
-    let result = code_escapes((String,)) do a
-            o = SafeRef(a)
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((String,)) do a
-            t = SafeRef((a,))
-            f = t[][1]
-            return f
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        result.state[SSAValue(i)].AliasInfo
-    end
-    let result = code_escapes((String, String)) do a, b
-            obj = SafeRefs(a, b)
-            fld1 = obj[1]
-            fld2 = obj[2]
-            return (fld1, fld2)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # field escape should propagate to `setfield!` argument
-    let result = code_escapes((String,)) do a
-            o = SafeRef("foo")
-            o[] = a
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # propagate escape information imposed on return value of `setfield!` call
-    let result = code_escapes((String,)) do a
-            obj = SafeRef("foo")
-            return (obj[] = a)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # nested allocations
-    let result = code_escapes((String,)) do a
-            o1 = SafeRef(a)
-            o2 = SafeRef(o1)
-            return o2[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(SafeRef{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(SafeRef{SafeRef{String}})(result.ir.stmts.type[i])
-                @test is_load_forwardable(result.state[SSAValue(i)])
-            end
-        end
-    end
-    let result = code_escapes((String,)) do a
-            o1 = (a,)
-            o2 = (o1,)
-            return o2[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Tuple{String})(result.ir.stmts.type[i])
-                @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Tuple{Tuple{String}})(result.ir.stmts.type[i])
-                @test is_load_forwardable(result.state[SSAValue(i)])
-            end
-        end
-    end
-    let result = code_escapes((String,)) do a
-            o1  = SafeRef(a)
-            o2  = SafeRef(o1)
-            o1′ = o2[]
-            a′  = o1′[]
-            return a′
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes() do
-            o1 = SafeRef("foo")
-            o2 = SafeRef(o1)
-            return o2
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = code_escapes() do
-            o1   = SafeRef("foo")
-            o2′  = SafeRef(nothing)
-            o2   = SafeRef{SafeRef}(o2′)
-            o2[] = o1
-            return o2
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        findall(1:length(result.ir.stmts)) do i
-            if isnew(result.ir.stmts[i][:inst])
-                t = result.ir.stmts[i][:type]
-                return t === SafeRef{String}  || # o1
-                       t === SafeRef{SafeRef}    # o2
-            end
-            return false
-        end |> x->foreach(x) do i
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = code_escapes((String,)) do x
-            broadcast(identity, Ref(x))
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # ϕ-node allocations
-    let result = code_escapes((Bool,Any,Any)) do cond, x, y
-            if cond
-                ϕ = SafeRef{Any}(x)
-            else
-                ϕ = SafeRef{Any}(y)
-            end
-            return ϕ[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test has_return_escape(result.state[Argument(4)], r) # y
-        i = only(findall(isϕ, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes((Bool,Any,Any)) do cond, x, y
-            if cond
-                ϕ2 = ϕ1 = SafeRef{Any}(x)
-            else
-                ϕ2 = ϕ1 = SafeRef{Any}(y)
-            end
-            return ϕ1[], ϕ2[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test has_return_escape(result.state[Argument(4)], r) # y
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    # when ϕ-node merges values with different types
-    let result = code_escapes((Bool,String,String,String)) do cond, x, y, z
-            local out
-            if cond
-                ϕ = SafeRef(x)
-                out = ϕ[]
-            else
-                ϕ = SafeRefs(z, y)
-            end
-            return @isdefined(out) ? out : throw(ϕ)
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        ϕ = only(findall(isT(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test !has_return_escape(result.state[Argument(4)], r) # y
-        @test has_return_escape(result.state[Argument(5)], r) # z
-        @test has_thrown_escape(result.state[SSAValue(ϕ)], t)
-    end
-
-    # alias analysis
-    # --------------
-
-    # alias via getfield & Expr(:new)
-    let result = code_escapes((String,)) do s
-            r = SafeRef(s)
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        @test !isaliased(Argument(2), SSAValue(i), result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = SafeRef(s)
-            r2 = SafeRef(r1)
-            return r2[]
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
-        @test isaliased(SSAValue(i1), val, result.state)
-        @test !isaliased(SSAValue(i2), val, result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = SafeRef(s)
-            r2 = SafeRef(r1)
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef("Rx")
-            $code_escapes((String,)) do s
-                r = SafeRef(Rx)
-                rx = r[] # rx aliased to Rx
-                rx[] = s
-                nothing
-            end
-        end
-        i = findfirst(isnew, result.ir.stmts.inst)
-        @test has_all_escape(result.state[Argument(2)])
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # alias via getfield & setfield!
-    let result = code_escapes((String,)) do s
-            r = Ref{String}()
-            r[] = s
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        @test !isaliased(Argument(2), SSAValue(i), result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = Ref(s)
-            r2 = Ref{Base.RefValue{String}}()
-            r2[] = r1
-            return r2[]
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
-        @test isaliased(SSAValue(i1), val, result.state)
-        @test !isaliased(SSAValue(i2), val, result.state)
-    end
-    let result = code_escapes((String,)) do s
-            r1 = Ref{String}()
-            r2 = Ref{Base.RefValue{String}}()
-            r2[] = r1
-            r1[] = s
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-        result = code_escapes((String,)) do s
-            r1 = Ref{String}()
-            r2 = Ref{Base.RefValue{String}}()
-            r1[] = s
-            r2[] = r1
-            return r2[][]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !isaliased(SSAValue(i), val, result.state)
-        end
-    end
-    let result = @eval EATModule() begin
-            const Rx = SafeRef("Rx")
-            $code_escapes((SafeRef{String}, String,)) do _rx, s
-                r = SafeRef(_rx)
-                r[] = Rx
-                rx = r[] # rx aliased to Rx
-                rx[] = s
-                nothing
-            end
-        end
-        i = findfirst(isnew, result.ir.stmts.inst)
-        @test has_all_escape(result.state[Argument(3)])
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    # alias via typeassert
-    let result = code_escapes((Any,)) do a
-            r = a::String
-            return r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test isaliased(Argument(2), val, result.state)       # a <-> r
-    end
-    let result = code_escapes((Any,)) do a
-            global GV
-            (g::SafeRef{Any})[] = a
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)])
-    end
-    # alias via ifelse
-    let result = code_escapes((Bool,Any,Any)) do c, a, b
-            r = ifelse(c, a, b)
-            return r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(3)], r) # a
-        @test has_return_escape(result.state[Argument(4)], r) # b
-        @test !isaliased(Argument(2), val, result.state)      # c <!-> r
-        @test isaliased(Argument(3), val, result.state)       # a <-> r
-        @test isaliased(Argument(4), val, result.state)       # b <-> r
-    end
-    let result = @eval EATModule() begin
-            const Lx, Rx = SafeRef("Lx"), SafeRef("Rx")
-            $code_escapes((Bool,String,)) do c, a
-                r = ifelse(c, Lx, Rx)
-                r[] = a
-                nothing
-            end
-        end
-        @test has_all_escape(result.state[Argument(3)]) # a
-    end
-    # alias via ϕ-node
-    let result = code_escapes((Bool,String)) do cond, x
-            if cond
-                ϕ2 = ϕ1 = SafeRef("foo")
-            else
-                ϕ2 = ϕ1 = SafeRef("bar")
-            end
-            ϕ2[] = x
-            return ϕ1[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(3)], r) # x
-        @test isaliased(Argument(3), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    let result = code_escapes((Bool,Bool,String)) do cond1, cond2, x
-            if cond1
-                ϕ2 = ϕ1 = SafeRef("foo")
-            else
-                ϕ2 = ϕ1 = SafeRef("bar")
-            end
-            cond2 && (ϕ2[] = x)
-            return ϕ1[]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(4)], r) # x
-        @test isaliased(Argument(4), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-    # alias via π-node
-    let result = code_escapes((Any,)) do x
-            if isa(x, String)
-                return x
-            end
-            throw("error!")
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        rval = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
-        @test has_return_escape(result.state[Argument(2)], r) # x
-        @test isaliased(Argument(2), rval, result.state)
-    end
-    let result = code_escapes((String,)) do x
-            global GV
-            l = g
-            if isa(l, SafeRef{String})
-                l[] = x
-            end
-            nothing
-        end
-        @test has_all_escape(result.state[Argument(2)]) # x
-    end
-    # circular reference
-    let result = code_escapes() do
-            x = Ref{Any}()
-            x[] = x
-            return x[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let result = @eval Module() begin
-            const Rx = Ref{Any}()
-            Rx[] = Rx
-            $code_escapes() do
-                r = Rx[]::Base.RefValue{Any}
-                return r[]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = @eval Module() begin
-            @noinline function genr()
-                r = Ref{Any}()
-                r[] = r
-                return r
-            end
-            $code_escapes() do
-                x = genr()
-                return x[]
-            end
-        end
-        i = only(findall(isinvoke(:genr), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-
-    # dynamic semantics
-    # -----------------
-
-    # conservatively handle untyped objects
-    let result = @eval code_escapes((Any,Any,)) do T, x
-            obj = $(Expr(:new, :T, :x))
-        end
-        t = only(findall(isnew, result.ir.stmts.inst))
-        @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
-        @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
-            obj = $(Expr(:new, :T, :x, :y))
-            return getfield(obj, :x)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test #=x=# has_return_escape(result.state[Argument(3)], r)
-        @test #=y=# has_return_escape(result.state[Argument(4)], r)
-        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
-    end
-    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
-            obj = $(Expr(:new, :T, :x))
-            setfield!(obj, :x, y)
-            return getfield(obj, :x)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test #=x=# has_return_escape(result.state[Argument(3)], r)
-        @test #=y=# has_return_escape(result.state[Argument(4)], r)
-        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
-    end
-
-    # conservatively handle unknown field:
-    # all fields should be escaped, but the allocation itself doesn't need to be escaped
-    let result = code_escapes((String, Symbol)) do a, fld
-            obj = SafeRef(a)
-            return getfield(obj, fld)
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
-            obj = SafeRefs(a, b)
-            return getfield(obj, fld) # should escape both `a` and `b`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Int)) do a, b, idx
-            obj = SafeRefs(a, b)
-            return obj[idx] # should escape both `a` and `b`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
-            obj = SafeRefs("a", "b")
-            setfield!(obj, fld, a)
-            return obj[2] # should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, Symbol)) do a, fld
-            obj = SafeRefs("a", "b")
-            setfield!(obj, fld, a)
-            return obj[1] # this should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-    let result = code_escapes((String, String, Int)) do a, b, idx
-            obj = SafeRefs("a", "b")
-            obj[idx] = a
-            return obj[2] # should escape `a`
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test !has_return_escape(result.state[Argument(3)], r) # b
-        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
-    end
-
-    # interprocedural
-    # ---------------
-
-    let result = @eval EATModule() begin
-            @noinline getx(obj) = obj[]
-            $code_escapes((String,)) do a
-                obj = SafeRef(a)
-                fld = getx(obj)
-                return fld
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)
-        # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # TODO interprocedural alias analysis
-    let result = code_escapes((SafeRef{String},)) do s
-            s[] = "bar"
-            global GV = s[]
-            nothing
-        end
-        @test_broken !has_all_escape(result.state[Argument(2)])
-    end
-
-    # aliasing between arguments
-    let result = @eval EATModule() begin
-            @noinline setxy!(x, y) = x[] = y
-            $code_escapes((String,)) do y
-                x = SafeRef("init")
-                setxy!(x, y)
-                return x
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        @test has_return_escape(result.state[Argument(2)], r) # y
-    end
-    let result = @eval EATModule() begin
-            @noinline setxy!(x, y) = x[] = y
-            $code_escapes((String,)) do y
-                x1 = SafeRef("init")
-                x2 = SafeRef(y)
-                setxy!(x1, x2[])
-                return x1
-            end
-        end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i1)], r)
-        @test !has_return_escape(result.state[SSAValue(i2)], r)
-        @test has_return_escape(result.state[Argument(2)], r) # y
-    end
-    let result = @eval EATModule() begin
-            @noinline mysetindex!(x, a) = x[1] = a
-            const Ax = Vector{Any}(undef, 1)
-            $code_escapes((String,)) do s
-                mysetindex!(Ax, s)
-            end
-        end
-        @test has_all_escape(result.state[Argument(2)]) # s
-    end
-
-    # TODO flow-sensitivity?
-    # ----------------------
-
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(a)
-            r[] = b
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((Any,Any)) do a, b
-            r = SafeRef{Any}(:init)
-            r[] = a
-            r[] = b
-            return r[]
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
-        @test has_return_escape(result.state[Argument(3)], r) # b
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = code_escapes((Any,Any,Bool)) do a, b, cond
-            r = SafeRef{Any}(:init)
-            if cond
-                r[] = a
-                return r[]
-            else
-                r[] = b
-                return nothing
-            end
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        r = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isreturn(x) && isa(x.val, Core.SSAValue)
-        end)
-        @test has_return_escape(result.state[Argument(2)], r) # a
-        @test_broken !has_return_escape(result.state[Argument(3)], r) # b
-    end
-
-    # handle conflicting field information correctly
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
-            if cnd
-                o = SafeRef("foo")
-            else
-                o = SafeRefs("bar", baz)
-                r = getfield(o, 2)
-            end
-            if cnd
-                o = o::SafeRef
-                setfield!(o, 1, qux)
-                r = getfield(o, 1)
-            end
-            r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # baz
-        @test has_return_escape(result.state[Argument(4)], r) # qux
-        for new in findall(isnew, result.ir.stmts.inst)
-            @test is_load_forwardable(result.state[SSAValue(new)])
-        end
-    end
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
-            if cnd
-                o = SafeRefs("foo", "bar")
-                r = setfield!(o, 2, baz)
-            else
-                o = SafeRef(qux)
-            end
-            if !cnd
-                o = o::SafeRef
-                r = getfield(o, 1)
-            end
-            r
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(3)], r) # baz
-        @test has_return_escape(result.state[Argument(4)], r) # qux
-    end
-
-    # foreigncall should disable field analysis
-    let result = code_escapes((Any,Nothing,Int,UInt)) do t, mt, lim, world
-            ambig = false
-            min = Ref{UInt}(typemin(UInt))
-            max = Ref{UInt}(typemax(UInt))
-            has_ambig = Ref{Int32}(0)
-            mt = ccall(:jl_matching_methods, Any,
-                (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ref{Int32}),
-                t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
-            return mt, has_ambig[]
-        end
-        for i in findall(isnew, result.ir.stmts.inst)
-            @test !is_load_forwardable(result.state[SSAValue(i)])
-        end
-    end
-end
-
-# demonstrate the power of our field / alias analysis with a realistic end to end example
-abstract type AbstractPoint{T} end
-mutable struct MPoint{T} <: AbstractPoint{T}
-    x::T
-    y::T
-end
-add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y)
-function compute(T, ax, ay, bx, by)
-    a = T(ax, ay)
-    b = T(bx, by)
-    for i in 0:(100000000-1)
-        c = add(a, b) # replaceable
-        a = add(c, b) # replaceable
-    end
-    a.x, a.y
-end
-let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
-    for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
-                 return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
-             end
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-end
-function compute(a, b)
-    for i in 0:(100000000-1)
-        c = add(a, b) # replaceable
-        a = add(c, b) # unreplaceable (aliased to the call argument `a`)
-    end
-    a.x, a.y
-end
-let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
-    idxs = findall(1:length(result.ir.stmts)) do idx
-        inst = EscapeAnalysis.getinst(result.ir, idx)
-        stmt = inst[:inst]
-        return isnew(stmt) && inst[:type] <: MPoint
-    end
-    @assert length(idxs) == 2
-    @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
-end
-function compute!(a, b)
-    for i in 0:(100000000-1)
-        c = add(a, b)  # replaceable
-        a′ = add(c, b) # replaceable
-        a.x = a′.x
-        a.y = a′.y
-    end
-end
-let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
-    for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
-                 return isnew(stmt) && inst[:type] <: MPoint
-             end
-        @test is_load_forwardable(result.state[SSAValue(i)])
-    end
-end
-
-@testset "array primitives" begin
-    inbounds = Base.JLOptions().check_bounds == 0
-
-    # arrayref
-    let result = code_escapes((Vector{String},Int)) do xs, i
-            s = Base.arrayref(true, xs, i)
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test has_thrown_escape(result.state[Argument(2)])      # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    let result = code_escapes((Vector{String},Int)) do xs, i
-            s = Base.arrayref(false, xs, i)
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test !has_thrown_escape(result.state[Argument(2)])     # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    inbounds && let result = code_escapes((Vector{String},Int)) do xs, i
-            s = @inbounds xs[i]
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test !has_thrown_escape(result.state[Argument(2)])     # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    let result = code_escapes((Vector{String},Bool)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError will happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((String,Int)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError will happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((AbstractVector{String},Int)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError may happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((Vector{String},Any)) do xs, i
-            c = Base.arrayref(true, xs, i) # TypeError may happen here
-            return c
-        end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-
-    # arrayset
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i)
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            Base.arrayset(false, xs, x, i)
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test !has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    inbounds && let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            @inbounds xs[i] = x
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test !has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    let result = code_escapes((String,String,String,)) do s, t, u
-            xs = Vector{String}(undef, 3)
-            Base.arrayset(true, xs, s, 1)
-            Base.arrayset(true, xs, t, 2)
-            Base.arrayset(true, xs, u, 3)
-            return xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        for i in 2:result.state.nargs
-            @test has_return_escape(result.state[Argument(i)], r)
-        end
-    end
-    let result = code_escapes((Vector{String},String,Bool,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError will happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((String,String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError will happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs::String
-        @test has_thrown_escape(result.state[Argument(3)], t) # x::String
-    end
-    let result = code_escapes((AbstractVector{String},String,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError may happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((Vector{String},AbstractString,Int,)) do xs, x, i
-            Base.arrayset(true, xs, x, i) # TypeError may happen here
-            return xs
-        end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-
-    # arrayref and arrayset
-    let result = code_escapes() do
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[]
-            a[1] = b
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
-        end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Any}
-        end)
-        @test !has_return_escape(result.state[SSAValue(ai)], r)
-        @test has_return_escape(result.state[SSAValue(bi)], r)
-    end
-    let result = code_escapes() do
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[]
-            a[1] = b
-            return a
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
-        end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
-            isarrayalloc(x) && x.args[2] === Vector{Any}
-        end)
-        @test has_return_escape(result.state[SSAValue(ai)], r)
-        @test has_return_escape(result.state[SSAValue(bi)], r)
-    end
-    let result = code_escapes((Vector{Any},String,Int,Int)) do xs, s, i, j
-            x = SafeRef(s)
-            xs[i] = x
-            xs[j] # potential error
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(3)], t) # s
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # x
-    end
-
-    # arraysize
-    let result = code_escapes((Vector{Any},)) do xs
-            Core.arraysize(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t)
-    end
-    let result = code_escapes((Vector{Any},Int,)) do xs, dim
-            Core.arraysize(xs, dim)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t)
-    end
-    let result = code_escapes((Any,)) do xs
-            Core.arraysize(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t)
-    end
-
-    # arraylen
-    let result = code_escapes((Vector{Any},)) do xs
-            Base.arraylen(xs)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((String,)) do xs
-            Base.arraylen(xs)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            Base.arraylen(xs, 1)
-        end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[Argument(2)], t) # xs
-    end
-
-    # array resizing
-    # without BoundsErrors
-    let result = code_escapes((Vector{Any},String)) do xs, x
-            @ccall jl_array_grow_beg(xs::Any, 2::UInt)::Cvoid
-            xs[1] = x
-            xs
-        end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test !has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    let result = code_escapes((Vector{Any},String)) do xs, x
-            @ccall jl_array_grow_end(xs::Any, 2::UInt)::Cvoid
-            xs[1] = x
-            xs
-        end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
-        @test !has_thrown_escape(result.state[Argument(3)], t) # x
-    end
-    # with possible BoundsErrors
-    let result = code_escapes((String,)) do x
-            xs = Any[1,2,3]
-            xs[3] = x
-            @ccall jl_array_del_beg(xs::Any, 2::UInt)::Cvoid # can potentially throw
-            xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[1,2,3]
-            xs[1] = x
-            @ccall jl_array_del_end(xs::Any, 2::UInt)::Cvoid # can potentially throw
-            xs
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[x]
-            @ccall jl_array_grow_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    let result = code_escapes((String,)) do x
-            xs = Any[x]
-            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    inbounds && let result = code_escapes((String,)) do x
-            xs = @inbounds Any[x]
-            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-
-    # array copy
-    let result = code_escapes((Vector{Any},)) do xs
-            return copy(xs)
-        end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken !has_return_escape(result.state[Argument(2)], r)
-    end
-    let result = code_escapes((String,)) do s
-            xs = String[s]
-            xs′ = copy(xs)
-            return xs′[1]
-        end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i1)])
-        @test !has_return_escape(result.state[SSAValue(i2)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            xs′ = copy(xs)
-            return xs′[1] # may potentially throw BoundsError, should escape `xs` conservatively (i.e. escape its elements)
-        end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        ref = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        ret = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_thrown_escape(result.state[SSAValue(i)], ref)
-        @test_broken !has_return_escape(result.state[SSAValue(i)], ret)
-        @test has_thrown_escape(result.state[Argument(2)], ref)
-        @test has_return_escape(result.state[Argument(2)], ret)
-    end
-    let result = code_escapes((String,)) do s
-            xs = Vector{String}(undef, 1)
-            xs[1] = s
-            xs′ = copy(xs)
-            length(xs′) > 2 && throw(xs′)
-            return xs′
-        end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_thrown_escape(result.state[SSAValue(i1)], t)
-        @test_broken !has_return_escape(result.state[SSAValue(i1)], r)
-        @test has_thrown_escape(result.state[SSAValue(i2)], t)
-        @test has_return_escape(result.state[SSAValue(i2)], r)
-        @test has_thrown_escape(result.state[Argument(2)], t)
-        @test has_return_escape(result.state[Argument(2)], r)
-    end
-
-    # isassigned
-    let result = code_escapes((Vector{Any},Int)) do xs, i
-            return isassigned(xs, i)
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[Argument(2)], r)
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-
-    # indexing analysis
-    # -----------------
-
-    # safe case
-    let result = code_escapes((String,String)) do s, t
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[2] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test !has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String)) do s, t
-            a = Matrix{Any}(undef, 1, 2)
-            a[1, 1] = s
-            a[1, 2] = t
-            return a[1, 1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test !has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
-            a = Vector{Any}(undef, 2)
-            if c
-                a[1] = s
-                a[2] = u
-            else
-                a[1] = t
-                a[2] = u
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test is_load_forwardable(result.state[SSAValue(i)])
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test has_return_escape(result.state[Argument(3)], r) # s
-        @test has_return_escape(result.state[Argument(4)], r) # t
-        @test !has_return_escape(result.state[Argument(5)], r) # u
-    end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
-            a = Any[nothing, nothing] # TODO how to deal with loop indexing?
-            if c
-                a[1] = s
-                a[2] = u
-            else
-                a[1] = t
-                a[2] = u
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(3)], r) # s
-        @test has_return_escape(result.state[Argument(4)], r) # t
-        @test_broken !has_return_escape(result.state[Argument(5)], r) # u
-    end
-    let result = code_escapes((String,)) do s
-            a = Vector{Vector{Any}}(undef, 1)
-            b = Any[s]
-            a[1] = b
-            return a[1][1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        is = findall(isarrayalloc, result.ir.stmts.inst)
-        @assert length(is) == 2
-        ia, ib = is
-        @test !has_return_escape(result.state[SSAValue(ia)], r)
-        @test is_load_forwardable(result.state[SSAValue(ia)])
-        @test !has_return_escape(result.state[SSAValue(ib)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(ib)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-    end
-    let result = code_escapes((Bool,String,String,Regex,Regex,)) do c, s1, s2, t1, t2
-            if c
-                a = Vector{String}(undef, 2)
-                a[1] = s1
-                a[2] = s2
-            else
-                a = Vector{Regex}(undef, 2)
-                a[1] = t1
-                a[2] = t2
-            end
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isarrayalloc, result.ir.stmts.inst)
-            @test !has_return_escape(result.state[SSAValue(i)], r)
-            @test is_load_forwardable(result.state[SSAValue(i)])
-        end
-        @test has_return_escape(result.state[Argument(3)], r) # s1
-        @test !has_return_escape(result.state[Argument(4)], r) # s2
-        @test has_return_escape(result.state[Argument(5)], r) # t1
-        @test !has_return_escape(result.state[Argument(6)], r) # t2
-    end
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Any[s]
-            push!(a, t)
-            return a[2]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken is_load_forwardable(result.state[SSAValue(i)])
-        @test_broken !has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    # unsafe cases
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[2] = t
-            return a[i]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String,Int)) do s, t, i
-            a = Vector{Any}(undef, 2)
-            a[1] = s
-            a[i] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-        @test has_return_escape(result.state[Argument(2)], r) # s
-        @test has_return_escape(result.state[Argument(3)], r) # t
-    end
-    let result = code_escapes((String,String,Int,Int,Int)) do s, t, i, j, k
-            a = Vector{Any}(undef, 2)
-            a[3] = s # BoundsError
-            a[1] = t
-            return a[1]
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-    end
-    let result = @eval Module() begin
-            @noinline some_resize!(a) = pushfirst!(a, nothing)
-            $code_escapes((String,String,Int)) do s, t, i
-                a = Vector{Any}(undef, 2)
-                a[1] = s
-                some_resize!(a)
-                return a[2]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
-        @test !is_load_forwardable(result.state[SSAValue(i)])
-    end
-
-    # circular reference
-    let result = code_escapes() do
-            xs = Vector{Any}(undef, 1)
-            xs[1] = xs
-            return xs[1]
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-    let result = @eval Module() begin
-            const Ax = Vector{Any}(undef, 1)
-            Ax[1] = Ax
-            $code_escapes() do
-                xs = Ax[1]::Vector{Any}
-                return xs[1]
-            end
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, Core.arrayref)), result.ir.stmts.inst)
-            @test has_return_escape(result.state[SSAValue(i)], r)
-        end
-    end
-    let result = @eval Module() begin
-            @noinline function genxs()
-                xs = Vector{Any}(undef, 1)
-                xs[1] = xs
-                return xs
-            end
-            $code_escapes() do
-                xs = genxs()
-                return xs[1]
-            end
-        end
-        i = only(findall(isinvoke(:genxs), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[SSAValue(i)], r)
-    end
-end
-
-# demonstrate array primitive support with a realistic end to end example
-let result = code_escapes((Int,String,)) do n,s
-        xs = String[]
-        for i in 1:n
-            push!(xs, s)
-        end
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-    @test !has_thrown_escape(result.state[SSAValue(i)])
-    @test has_return_escape(result.state[Argument(3)], r) # s
-    @test !has_thrown_escape(result.state[Argument(3)])    # s
-end
-let result = code_escapes((Int,String,)) do n,s
-        xs = String[]
-        for i in 1:n
-            pushfirst!(xs, s)
-        end
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r) # xs
-    @test !has_thrown_escape(result.state[SSAValue(i)])    # xs
-    @test has_return_escape(result.state[Argument(3)], r) # s
-    @test !has_thrown_escape(result.state[Argument(3)])    # s
-end
-let result = code_escapes((String,String,String)) do s, t, u
-        xs = String[]
-        resize!(xs, 3)
-        xs[1] = s
-        xs[1] = t
-        xs[1] = u
-        xs
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
-    @test has_return_escape(result.state[Argument(2)], r) # s
-    @test has_return_escape(result.state[Argument(3)], r) # t
-    @test has_return_escape(result.state[Argument(4)], r) # u
-end
-
-@static if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-@testset "ImmutableArray" begin
-    # arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # mutating_arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            mutating_arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # arraythaw
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray,)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = ImmutableArray(Any[])
-            arraythaw(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-end
-
-# demonstrate some arrayfreeze optimizations
-# !has_return_escape(ary) means ary is eligible for arrayfreeze to mutating_arrayfreeze optimization
-let result = code_escapes((Int,)) do n
-        xs = collect(1:n)
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Float64},)) do xs
-        ys = sin.(xs)
-        ImmutableArray(ys)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Pair{Int,String}},)) do xs
-        n = maximum(first, xs)
-        ys = Vector{String}(undef, n)
-        for (i, s) in xs
-            ys[i] = s
-        end
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-
-end # @static if isdefined(Core, :ImmutableArray)
-
-# demonstrate a simple type level analysis can sometimes improve the analysis accuracy
-# by compensating the lack of yet unimplemented analyses
-@testset "special-casing bitstype" begin
-    let result = code_escapes((Nothing,)) do a
-            global GV = a
-        end
-        @test !(has_all_escape(result.state[Argument(2)]))
-    end
-
-    let result = code_escapes((Int,)) do a
-            o = SafeRef(a)
-            f = o[]
-            return f
-        end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[SSAValue(i)], r)
-    end
-
-    # an escaped tuple stmt will not propagate to its Int argument (since `Int` is of bitstype)
-    let result = code_escapes((Int,Any,)) do a, b
-            t = tuple(a, b)
-            return t
-        end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[Argument(2)], r)
-        @test has_return_escape(result.state[Argument(3)], r)
-    end
-end
-
-# # TODO implement a finalizer elision pass
-# mutable struct WithFinalizer
-#     v
-#     function WithFinalizer(v)
-#         x = new(v)
-#         f(t) = @async println("Finalizing $t.")
-#         return finalizer(x, x)
-#     end
-# end
-# make_m(v = 10) = MyMutable(v)
-# function simple(cond)
-#     m = make_m()
-#     if cond
-#         # println(m.v)
-#         return nothing # <= insert `finalize` call here
-#     end
-#     return m
-# end
diff --git a/test/compiler/EscapeAnalysis/setup.jl b/test/compiler/EscapeAnalysis/setup.jl
deleted file mode 100644
index 18221e5afc524..0000000000000
--- a/test/compiler/EscapeAnalysis/setup.jl
+++ /dev/null
@@ -1,59 +0,0 @@
-include(normpath(@__DIR__, "..", "irutils.jl"))
-include(normpath(@__DIR__, "EAUtils.jl"))
-using Test, Core.Compiler.EscapeAnalysis, .EAUtils
-import Core: Argument, SSAValue, ReturnNode
-const EA = Core.Compiler.EscapeAnalysis
-import .EA: ignore_argescape
-
-isT(T) = (@nospecialize x) -> x === T
-isreturn(@nospecialize x) = isa(x, Core.ReturnNode) && isdefined(x, :val)
-isthrow(@nospecialize x) = Meta.isexpr(x, :call) && Core.Compiler.is_throw_call(x)
-isϕ(@nospecialize x) = isa(x, Core.PhiNode)
-function with_normalized_name(@nospecialize(f), @nospecialize(x))
-    if Meta.isexpr(x, :foreigncall)
-        name = x.args[1]
-        nn = EA.normalize(name)
-        return isa(nn, Symbol) && f(nn)
-    end
-    return false
-end
-isarrayalloc(@nospecialize x) = with_normalized_name(nn->!isnothing(Core.Compiler.alloc_array_ndims(nn)), x)
-isarrayresize(@nospecialize x) = with_normalized_name(nn->!isnothing(EA.array_resize_info(nn)), x)
-isarraycopy(@nospecialize x) = with_normalized_name(nn->EA.is_array_copy(nn), x)
-
-"""
-    is_load_forwardable(x::EscapeInfo) -> Bool
-
-Queries if `x` is elibigle for store-to-load forwarding optimization.
-"""
-function is_load_forwardable(x::EA.EscapeInfo)
-    AliasInfo = x.AliasInfo
-    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
-    # but we can also do equivalent check during forwarding
-    return isa(AliasInfo, EA.IndexableFields) || isa(AliasInfo, EA.IndexableElements)
-end
-
-let setup_ex = quote
-        mutable struct SafeRef{T}
-            x::T
-        end
-        Base.getindex(s::SafeRef) = getfield(s, 1)
-        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
-
-        mutable struct SafeRefs{S,T}
-            x1::S
-            x2::T
-        end
-        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
-        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
-
-        global GV::Any
-        const global GR = Ref{Any}()
-    end
-    global function EATModule(setup_ex = setup_ex)
-        M = Module()
-        Core.eval(M, setup_ex)
-        return M
-    end
-    Core.eval(@__MODULE__, setup_ex)
-end
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
deleted file mode 100644
index bbcf7b0dfb959..0000000000000
--- a/test/compiler/contextual.jl
+++ /dev/null
@@ -1,222 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Cassette
-# ========
-
-module MiniCassette
-    # A minimal demonstration of the cassette mechanism. Doesn't support all the
-    # fancy features, but sufficient to exercise this code path in the compiler.
-
-    using Core.Compiler: retrieve_code_info, CodeInfo,
-        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, SlotNumber, quoted,
-        signature_type, anymap
-    using Base: _methods_by_ftype
-    using Base.Meta: isexpr
-    using Test
-
-    export Ctx, overdub
-
-    struct Ctx; end
-
-    # A no-op cassette-like transform
-    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::Core.SimpleVector)
-        @nospecialize expr
-        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
-        if isexpr(expr, :call)
-            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
-        elseif isa(expr, GotoIfNot)
-            return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
-        elseif isexpr(expr, :static_parameter)
-            return quoted(sparams[expr.args[1]])
-        elseif isa(expr, ReturnNode)
-            return ReturnNode(transform(expr.val))
-        elseif isa(expr, Expr)
-            return Expr(expr.head, anymap(transform, expr.args)...)
-        elseif isa(expr, GotoNode)
-            return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
-        elseif isa(expr, SlotNumber)
-            return map_slot_number(expr.id)
-        elseif isa(expr, SSAValue)
-            return map_ssa_value(expr)
-        else
-            return expr
-        end
-    end
-
-    function transform!(ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
-        code = ci.code
-        ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
-        ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
-        # Insert one SSAValue for every argument statement
-        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
-        prepend!(ci.codelocs, fill(0, nargs))
-        prepend!(ci.ssaflags, fill(0x00, nargs))
-        ci.ssavaluetypes += nargs
-        function map_slot_number(slot::Int)
-            if slot == 1
-                # self in the original function is now `f`
-                return SlotNumber(3)
-            elseif 2 <= slot <= nargs + 1
-                # Arguments get inserted as ssa values at the top of the function
-                return SSAValue(slot - 1)
-            else
-                # The first non-argument slot will be 5
-                return SlotNumber(slot - (nargs + 1) + 4)
-            end
-        end
-        map_ssa_value(ssa::SSAValue) = SSAValue(ssa.id + nargs)
-        for i = (nargs+1:length(code))
-            code[i] = transform_expr(code[i], map_slot_number, map_ssa_value, sparams)
-        end
-    end
-
-    function overdub_generator(world::UInt, source, self, c, f, args)
-        @nospecialize
-        if !Base.issingletontype(f)
-            # (c, f, args..) -> f(args...)
-            code_info = :(return f(args...))
-            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :c, :f, :args), Core.svec())(world, source, code_info)
-        end
-
-        tt = Tuple{f, args...}
-        match = Base._which(tt; world)
-        mi = Core.Compiler.specialize_method(match)
-        # Unsupported in this mini-cassette
-        @assert !mi.def.isva
-        code_info = retrieve_code_info(mi, world)
-        @assert isa(code_info, CodeInfo)
-        code_info = copy(code_info)
-        @assert code_info.edges === nothing
-        code_info.edges = MethodInstance[mi]
-        transform!(code_info, length(args), match.sparams)
-        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
-        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
-        return code_info
-    end
-
-    @inline function overdub(c::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...)
-        f(args...)
-    end
-
-    @eval function overdub(c::Ctx, f, args...)
-        $(Expr(:meta, :generated_only))
-        $(Expr(:meta, :generated, overdub_generator))
-    end
-end
-
-using .MiniCassette
-
-# Test #265 for Cassette
-f() = 1
-@test overdub(Ctx(), f) === 1
-f() = 2
-@test overdub(Ctx(), f) === 2
-
-# Test that MiniCassette is at least somewhat capable by overdubbing gcd
-@test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
-
-@generated bar(::Val{align}) where {align} = :(42)
-foo(i) = i+bar(Val(1))
-
-@test @inferred(overdub(Ctx(), foo, 1)) == 43
-
-# overlay method tables
-# =====================
-
-module OverlayModule
-
-using Base.Experimental: @MethodTable, @overlay
-
-@MethodTable(mt)
-
-@overlay mt function sin(x::Float64)
-    1
-end
-
-# short function def
-@overlay mt cos(x::Float64) = 2
-
-# parametric function def
-@overlay mt tan(x::T) where {T} = 3
-
-end # module OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
-@test only(methods).method.module === Base.Math
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
-@test only(methods).method.module === OverlayModule
-
-methods = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
-@test isempty(methods)
-
-# precompilation
-
-load_path = mktempdir()
-depot_path = mktempdir()
-try
-    pushfirst!(LOAD_PATH, load_path)
-    pushfirst!(DEPOT_PATH, depot_path)
-
-    write(joinpath(load_path, "Foo.jl"),
-          """
-          module Foo
-          Base.Experimental.@MethodTable(mt)
-          Base.Experimental.@overlay mt sin(x::Int) = 1
-          end
-          """)
-
-     # precompiling Foo serializes the overlay method through the `mt` binding in the module
-     Foo = Base.require(Main, :Foo)
-     @test length(Foo.mt) == 1
-
-    write(joinpath(load_path, "Bar.jl"),
-          """
-          module Bar
-          Base.Experimental.@MethodTable(mt)
-          end
-          """)
-
-    write(joinpath(load_path, "Baz.jl"),
-          """
-          module Baz
-          using Bar
-          Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
-          end
-          """)
-
-     # when referring an method table in another module,
-     # the overlay method needs to be discovered explicitly
-     Bar = Base.require(Main, :Bar)
-     @test length(Bar.mt) == 0
-     Baz = Base.require(Main, :Baz)
-     @test length(Bar.mt) == 1
-finally
-    filter!((≠)(load_path), LOAD_PATH)
-    filter!((≠)(depot_path), DEPOT_PATH)
-    rm(load_path, recursive=true, force=true)
-    try
-        rm(depot_path, force=true, recursive=true)
-    catch err
-        @show err
-    end
-end
-
-# Test that writing a bad cassette-style pass gives the expected error (#49715)
-function generator49715(world, source, self, f, tt)
-    tt = tt.parameters[1]
-    sig = Tuple{f, tt.parameters...}
-    mi = Base._which(sig; world)
-
-    error("oh no")
-
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
-    stub(world, source, :(nothing))
-end
-
-@eval function doit49715(f, tt)
-  $(Expr(:meta, :generated, generator49715))
-  $(Expr(:meta, :generated_only))
-end
-
-@test_throws "oh no" doit49715(sin, Tuple{Int})
diff --git a/test/compiler/datastructures.jl b/test/compiler/datastructures.jl
deleted file mode 100644
index 8dbaee61503d0..0000000000000
--- a/test/compiler/datastructures.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-using Test
-
-@testset "CachedMethodTable" begin
-    # cache result should be separated per `limit` and `sig`
-    # https://github.com/JuliaLang/julia/pull/46799
-    interp = Core.Compiler.NativeInterpreter()
-    table = Core.Compiler.method_table(interp)
-    sig = Tuple{typeof(*), Any, Any}
-    result1 = Core.Compiler.findall(sig, table; limit=-1)
-    result2 = Core.Compiler.findall(sig, table; limit=Core.Compiler.InferenceParams().max_methods)
-    @test result1 !== nothing && !Core.Compiler.isempty(result1.matches)
-    @test result2 === nothing
-end
-
-@testset "BitSetBoundedMinPrioritySet" begin
-    bsbmp = Core.Compiler.BitSetBoundedMinPrioritySet(5)
-    Core.Compiler.push!(bsbmp, 2)
-    Core.Compiler.push!(bsbmp, 2)
-    @test Core.Compiler.popfirst!(bsbmp) == 2
-    Core.Compiler.push!(bsbmp, 1)
-    @test Core.Compiler.popfirst!(bsbmp) == 1
-    @test Core.Compiler.isempty(bsbmp)
-end
-
-@testset "basic heap functionality" begin
-    v = [2,3,1]
-    @test Core.Compiler.heapify!(v, Core.Compiler.Forward) === v
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 1
-    @test Core.Compiler.heappush!(v, 4, Core.Compiler.Forward) === v
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 2
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 3
-    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 4
-end
-
-@testset "randomized heap correctness tests" begin
-    order = Core.Compiler.By(x -> -x[2])
-    for i in 1:6
-        heap = Tuple{Int, Int}[(rand(1:i), rand(1:i)) for _ in 1:2i]
-        mock = copy(heap)
-        @test Core.Compiler.heapify!(heap, order) === heap
-        sort!(mock, by=last)
-
-        for _ in 1:6i
-            if rand() < .5 && !isempty(heap)
-                # The first entries may differ because heaps are not stable
-                @test last(Core.Compiler.heappop!(heap, order)) === last(pop!(mock))
-            else
-                new = (rand(1:i), rand(1:i))
-                Core.Compiler.heappush!(heap, new, order)
-                push!(mock, new)
-                sort!(mock, by=last)
-            end
-        end
-    end
-end
-
-@testset "searchsorted" begin
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Core.Compiler.UnitRange(1, 0)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Core.Compiler.UnitRange(1, 2)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Core.Compiler.UnitRange(3, 4)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Core.Compiler.UnitRange(7, 6)
-    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Core.Compiler.UnitRange(5, 4)
-
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 0) === Core.Compiler.UnitRange(1, 0)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 1) === Core.Compiler.UnitRange(1, 1)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 2) === Core.Compiler.UnitRange(2, 2)
-    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 4) === Core.Compiler.UnitRange(4, 3)
-
-    @test Core.Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Core.Compiler.UnitRange(1, 4)
-    @test Core.Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Core.Compiler.UnitRange(5, 10)
-    @test Core.Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 6)
-    @test Core.Compiler.searchsorted(fill(1, 15), 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 10)
-
-    for (rg,I) in Any[(Core.Compiler.UnitRange(49, 57),   47:59),
-                      (Core.Compiler.StepRange(1, 2, 17), -1:19)]
-        rg_r = Core.Compiler.reverse(rg)
-        rgv, rgv_r = Core.Compiler.collect(rg), Core.Compiler.collect(rg_r)
-        for i = I
-            @test Core.Compiler.searchsorted(rg,i) === Core.Compiler.searchsorted(rgv,i)
-            @test Core.Compiler.searchsorted(rg_r,i,rev=true) === Core.Compiler.searchsorted(rgv_r,i,rev=true)
-        end
-    end
-end
-
-@testset "basic sort" begin
-    v = [3,1,2]
-    @test v == [3,1,2]
-    @test Core.Compiler.sort!(v) === v == [1,2,3]
-    @test Core.Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
-    @test Core.Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
-end
-
-@testset "randomized sorting tests" begin
-    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
-        v = rand(-1:k, n)
-        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
-            @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt)
-        end
-    end
-end
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
deleted file mode 100644
index a4b21da523a8e..0000000000000
--- a/test/compiler/effects.jl
+++ /dev/null
@@ -1,1000 +0,0 @@
-using Test
-include("irutils.jl")
-
-# Test that the Core._apply_iterate bail path taints effects
-function f_apply_bail(f)
-    f(()...)
-    return nothing
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_apply_bail))
-@test !fully_eliminated((Function,)) do f
-    f_apply_bail(f)
-    nothing
-end
-
-# Test that effect modeling for return_type doesn't incorrectly pick
-# up the effects of the function being analyzed
-f_throws() = error()
-@noinline function return_type_unused(x)
-    Core.Compiler.return_type(f_throws, Tuple{})
-    return x+1
-end
-@test Core.Compiler.is_removable_if_unused(Base.infer_effects(return_type_unused, (Int,)))
-@test fully_eliminated((Int,)) do x
-    return_type_unused(x)
-    return nothing
-end
-
-# Test that ambiguous calls don't accidentally get nothrow effect
-ambig_effects_test(a::Int, b) = 1
-ambig_effects_test(a, b::Int) = 1
-ambig_effects_test(a, b) = 1
-@test !Core.Compiler.is_nothrow(Base.infer_effects(ambig_effects_test, (Int, Any)))
-global ambig_unknown_type_global::Any = 1
-@noinline function conditionally_call_ambig(b::Bool, a)
-    if b
-        ambig_effects_test(a, ambig_unknown_type_global)
-    end
-    return 0
-end
-@test !fully_eliminated((Bool,)) do b
-    conditionally_call_ambig(b, 1)
-    return nothing
-end
-
-# Test that a missing methtable identification gets tainted
-# appropriately
-struct FCallback; f::Union{Nothing, Function}; end
-f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_invoke_callback, (FCallback,)))
-@test !fully_eliminated((FCallback,)) do fc
-    f_invoke_callback(fc)
-    return nothing
-end
-
-# @assume_effects override
-const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
-Base.@assume_effects :foldable concrete_eval(
-    f, args...; kwargs...) = f(args...; kwargs...)
-@test fully_eliminated() do
-    concrete_eval(getindex, ___CONST_DICT___, :a)
-end
-
-# :removable override
-Base.@assume_effects :removable removable_call(
-    f, args...; kwargs...) = f(args...; kwargs...)
-@test fully_eliminated() do
-    @noinline removable_call(getindex, ___CONST_DICT___, :a)
-    nothing
-end
-
-# terminates_globally override
-# https://github.com/JuliaLang/julia/issues/41694
-Base.@assume_effects :terminates_globally function issue41694(x)
-    res = 1
-    1 < x < 20 || throw("bad")
-    while x > 1
-        res *= x
-        x -= 1
-    end
-    return res
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(issue41694, (Int,)))
-@test fully_eliminated() do
-    issue41694(2)
-end
-
-Base.@assume_effects :terminates_globally function recur_termination1(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return x * recur_termination1(x-1)
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
-@test fully_eliminated() do
-    recur_termination1(12)
-end
-
-Base.@assume_effects :terminates_globally function recur_termination21(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return recur_termination22(x)
-end
-recur_termination22(x) = x * recur_termination21(x-1)
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
-@test Core.Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
-@test fully_eliminated() do
-    recur_termination21(12) + recur_termination22(12)
-end
-
-# anonymous function support for `@assume_effects`
-@test fully_eliminated() do
-    map((2,3,4)) do x
-        # this :terminates_locally allows this anonymous function to be constant-folded
-        Base.@assume_effects :terminates_locally
-        res = 1
-        1 < x < 20 || error("bad pow")
-        while x > 1
-            res *= x
-            x -= 1
-        end
-        return res
-    end
-end
-
-# control flow backedge should taint `terminates`
-@test Base.infer_effects((Int,)) do n
-    for i = 1:n; end
-end |> !Core.Compiler.is_terminates
-
-# interprocedural-recursion should taint `terminates` **appropriately**
-function sumrecur(a, x)
-    isempty(a) && return x
-    return sumrecur(Base.tail(a), x + first(a))
-end
-@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int},Int)) |> Core.Compiler.is_terminates
-@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int,Vararg{Int}},Int)) |> !Core.Compiler.is_terminates
-
-# https://github.com/JuliaLang/julia/issues/45781
-@test Base.infer_effects((Float32,)) do a
-    out1 = promote_type(Irrational{:π}, Bool)
-    out2 = sin(a)
-    out1, out2
-end |> Core.Compiler.is_terminates
-
-# refine :consistent-cy effect inference using the return type information
-@test Base.infer_effects((Any,)) do x
-    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
-    throw(taint)
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
-    end
-    return nothing
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
-    end
-    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Symbol,Any)) do s, x
-    if s === :throw
-        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
-        throw(taint)
-    end
-    return s # should handle `Symbol` nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return Ref(x)
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return x < 0 ? Ref(x) : nothing
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        throw(DomainError(x, lazy"$x is negative"))
-    end
-    return nothing
-end |> Core.Compiler.is_foldable
-
-# :the_exception expression should taint :consistent-cy
-global inconsistent_var::Int = 42
-function throw_inconsistent() # this is still :consistent
-    throw(inconsistent_var)
-end
-function catch_inconsistent()
-    try
-        throw_inconsistent()
-    catch err
-        err
-    end
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent))
-cache_inconsistent() = catch_inconsistent()
-function compare_inconsistent()
-    a = cache_inconsistent()
-    global inconsistent_var = 0
-    b = cache_inconsistent()
-    global inconsistent_var = 42
-    return a === b
-end
-@test !compare_inconsistent()
-# return type information shouldn't be able to refine it also
-function catch_inconsistent(x::T) where T
-    v = x
-    try
-        throw_inconsistent()
-    catch err
-        v = err::T
-    end
-    return v
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent, (Int,)))
-cache_inconsistent(x) = catch_inconsistent(x)
-function compare_inconsistent(x::T) where T
-    x = one(T)
-    a = cache_inconsistent(x)
-    global inconsistent_var = 0
-    b = cache_inconsistent(x)
-    global inconsistent_var = 42
-    return a === b
-end
-@test !compare_inconsistent(3)
-
-# Effect modeling for Core.compilerbarrier
-@test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Core.Compiler.is_removable_if_unused
-
-# allocation/access of uninitialized fields should taint the :consistent-cy
-struct Maybe{T}
-    x::T
-    Maybe{T}() where T = new{T}()
-    Maybe{T}(x) where T = new{T}(x)
-    Maybe(x::T) where T = new{T}(x)
-end
-Base.getindex(x::Maybe) = x.x
-
-struct SyntacticallyDefined{T}
-    x::T
-end
-
-import Core.Compiler: Const, getfield_notundefined
-for T = (Base.RefValue, Maybe) # both mutable and immutable
-    for name = (Const(1), Const(:x))
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{Union{String,Integer}}, name)
-        @test getfield_notundefined(Union{T{String},T{Integer}}, name)
-        @test !getfield_notundefined(T{Int}, name)
-        @test !getfield_notundefined(T{<:Integer}, name)
-        @test !getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test !getfield_notundefined(T, name)
-    end
-    # throw doesn't account for undefined behavior
-    for name = (Const(0), Const(2), Const(1.0), Const(:y), Const("x"),
-                Float64, String, Nothing)
-        @test getfield_notundefined(T{String}, name)
-        @test getfield_notundefined(T{Int}, name)
-        @test getfield_notundefined(T{Integer}, name)
-        @test getfield_notundefined(T{<:Integer}, name)
-        @test getfield_notundefined(T{Union{Int32,Int64}}, name)
-        @test getfield_notundefined(T, name)
-    end
-    # should not be too conservative when field isn't known very well but object information is accurate
-    @test getfield_notundefined(T{String}, Int)
-    @test getfield_notundefined(T{String}, Symbol)
-    @test getfield_notundefined(T{Integer}, Int)
-    @test getfield_notundefined(T{Integer}, Symbol)
-    @test !getfield_notundefined(T{Int}, Int)
-    @test !getfield_notundefined(T{Int}, Symbol)
-    @test !getfield_notundefined(T{<:Integer}, Int)
-    @test !getfield_notundefined(T{<:Integer}, Symbol)
-end
-# should be conservative when object information isn't accurate
-@test !getfield_notundefined(Any, Const(1))
-@test !getfield_notundefined(Any, Const(:x))
-# tuples and namedtuples should be okay if not given accurate information
-for TupleType = Any[Tuple{Int,Int,Int}, Tuple{Int,Vararg{Int}}, Tuple{Any}, Tuple,
-                    NamedTuple{(:a, :b), Tuple{Int,Int}}, NamedTuple{(:x,),Tuple{Any}}, NamedTuple],
-    FieldType = Any[Int, Symbol, Any]
-    @test getfield_notundefined(TupleType, FieldType)
-end
-# skip analysis on fields that are known to be defined syntactically
-@test Core.Compiler.getfield_notundefined(SyntacticallyDefined{Float64}, Symbol)
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(:var))
-@test Core.Compiler.getfield_notundefined(Const(Main), Const(42))
-# high-level tests for `getfield_notundefined`
-@test Base.infer_effects() do
-    Maybe{Int}()
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Maybe{Int}()[]
-end |> !Core.Compiler.is_consistent
-@test !fully_eliminated() do
-    Maybe{Int}()[]
-end
-@test Base.infer_effects() do
-    Maybe{String}()
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Maybe{String}()[]
-end |> Core.Compiler.is_consistent
-let f() = Maybe{String}()[]
-    @test Base.return_types() do
-        f() # this call should be concrete evaluated
-    end |> only === Union{}
-end
-@test Base.infer_effects() do
-    Ref{Int}()
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects() do
-    Ref{Int}()[]
-end |> !Core.Compiler.is_consistent
-@test !fully_eliminated() do
-    Ref{Int}()[]
-end
-@test Base.infer_effects() do
-    Ref{String}()[]
-end |> Core.Compiler.is_consistent
-let f() = Ref{String}()[]
-    @test Base.return_types() do
-        f() # this call should be concrete evaluated
-    end |> only === Union{}
-end
-@test Base.infer_effects((SyntacticallyDefined{Float64}, Symbol)) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-
-# effects propagation for `Core.invoke` calls
-# https://github.com/JuliaLang/julia/issues/44763
-global x44763::Int = 0
-increase_x44763!(n) = (global x44763; x44763 += n)
-invoke44763(x) = @invoke increase_x44763!(x)
-@test Base.return_types() do
-    invoke44763(42)
-end |> only === Int
-@test x44763 == 0
-
-# `@inbounds`/`@boundscheck` expression should taint :consistent-cy correctly
-# https://github.com/JuliaLang/julia/issues/48099
-function A1_inbounds()
-    r = 0
-    @inbounds begin
-        @boundscheck r += 1
-    end
-    return r
-end
-@test !Core.Compiler.is_consistent(Base.infer_effects(A1_inbounds))
-
-# Test that purity doesn't try to accidentally run unreachable code due to
-# boundscheck elimination
-function f_boundscheck_elim(n)
-    # Inbounds here assumes that this is only ever called with `n==0`, but of
-    # course the compiler has no way of knowing that, so it must not attempt
-    # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
-    ntuple(x->(@inbounds ()[x]), n)
-end
-@test_broken !Core.Compiler.is_consistent(Base.infer_effects(f_boundscheck_elim, (Int,)))
-@test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
-
-# Test that purity modeling doesn't accidentally introduce new world age issues
-f_redefine_me(x) = x+1
-f_call_redefine() = f_redefine_me(0)
-f_mk_opaque() = Base.Experimental.@opaque ()->Base.inferencebarrier(f_call_redefine)()
-const op_capture_world = f_mk_opaque()
-f_redefine_me(x) = x+2
-@test op_capture_world() == 1
-@test f_mk_opaque()() == 2
-
-# backedge insertion for Any-typed, effect-free frame
-const CONST_DICT = let d = Dict()
-    for c in 'A':'z'
-        push!(d, c => Int(c))
-    end
-    d
-end
-Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
-@noinline callf(f, args...) = f(args...)
-function entry_to_be_invalidated(c)
-    return callf(getcharid, c)
-end
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
-end |> Core.Compiler.is_foldable
-@test fully_eliminated(; retval=97) do
-    entry_to_be_invalidated('a')
-end
-getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
-end |> !Core.Compiler.is_foldable
-@test !fully_eliminated() do
-    entry_to_be_invalidated('a')
-end
-
-@test !Core.Compiler.builtin_nothrow(Core.Compiler.fallback_lattice, Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
-
-# Nothrow for assignment to globals
-global glob_assign_int::Int = 0
-f_glob_assign_int() = global glob_assign_int += 1
-let effects = Base.infer_effects(f_glob_assign_int, ())
-    @test !Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-# Nothrow for setglobal!
-global SETGLOBAL!_NOTHROW::Int = 0
-let effects = Base.infer_effects() do
-        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
-    end
-    @test Core.Compiler.is_nothrow(effects)
-end
-
-# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
-# as the cached effects can be easily wrong otherwise
-# since the inference currently doesn't track "world-age" of global variables
-@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
-setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-global UNDEFINEDYET::String = "0"
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
-    end
-    @test !Core.Compiler.is_nothrow(effects)
-end
-@test_throws ErrorException setglobal!_nothrow_undefinedyet()
-
-# Nothrow for setfield!
-mutable struct SetfieldNothrow
-    x::Int
-end
-f_setfield_nothrow() = SetfieldNothrow(0).x = 1
-let effects = Base.infer_effects(f_setfield_nothrow, ())
-    # Technically effect free even though we use the heap, since the
-    # object doesn't escape, but the compiler doesn't know that.
-    #@test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-
-# even if 2-arg `getfield` may throw, it should be still `:consistent`
-@test Core.Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
-
-# SimpleVector allocation is consistent
-@test Core.Compiler.is_consistent(Base.infer_effects(Core.svec))
-@test Base.infer_effects() do
-    Core.svec(nothing, 1, "foo")
-end |> Core.Compiler.is_consistent
-
-# fastmath operations are inconsistent
-@test !Core.Compiler.is_consistent(Base.infer_effects((a,b)->@fastmath(a+b), (Float64,Float64)))
-
-# issue 46122: @assume_effects for @ccall
-@test Base.infer_effects((Vector{Int},)) do a
-    Base.@assume_effects :effect_free @ccall jl_array_ptr(a::Any)::Ptr{Int}
-end |> Core.Compiler.is_effect_free
-
-# `getfield_effects` handles access to union object nicely
-let 𝕃 = Core.Compiler.fallback_lattice
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{String}, Core.Const(:value)]), String))
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{Symbol}, Core.Const(:value)]), Symbol))
-    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Union{Some{Symbol},Some{String}}, Core.Const(:value)]), Union{Symbol,String}))
-end
-@test Base.infer_effects((Bool,)) do c
-    obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
-    return getfield(obj, :value)
-end |> Core.Compiler.is_consistent
-
-# getfield is nothrow when bounds checking is turned off
-@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
-    getfield(t, i, false)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
-    getfield(t, i, false)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
-    getfield(t, i, false) # invalid name type
-end |> !Core.Compiler.is_nothrow
-
-@test Core.Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
-
-# :inaccessiblememonly effect
-const global constant_global::Int = 42
-const global ConstantType = Ref
-global nonconstant_global::Int = 42
-const global constant_mutable_global = Ref(0)
-const global constant_global_nonisbits = Some(:foo)
-@test Base.infer_effects() do
-    constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    constant_global_nonisbits
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :constant_global)
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    nonconstant_global
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :nonconstant_global)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Symbol,)) do name
-    getglobal(@__MODULE__, name)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    global nonconstant_global = v
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    setglobal!(@__MODULE__, :nonconstant_global, v)
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Int,)) do v
-    constant_mutable_global[] = v
-end |> !Core.Compiler.is_inaccessiblememonly
-module ConsistentModule
-const global constant_global::Int = 42
-const global ConstantType = Ref
-end # module
-@test Base.infer_effects() do
-    ConsistentModule.constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConsistentModule.ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    ConsistentModule.ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).constant_global
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).ConstantType
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do
-    getglobal(@__MODULE__, :ConsistentModule).ConstantType{Any}()
-end |> Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Module,)) do M
-    M.constant_global
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects((Module,)) do M
-    M.ConstantType
-end |> !Core.Compiler.is_inaccessiblememonly
-@test Base.infer_effects() do M
-    M.ConstantType{Any}()
-end |> !Core.Compiler.is_inaccessiblememonly
-
-# the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
-# including `getfield` / `isdefined` accessing to local mutable object
-
-mutable struct SafeRef{T}
-    x::T
-end
-Base.getindex(x::SafeRef) = x.x;
-Base.setindex!(x::SafeRef, v) = x.x = v;
-Base.isassigned(x::SafeRef) = true;
-
-function mutable_consistent(s)
-    SafeRef(s)[]
-end
-@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(mutable_consistent, (Symbol,)))
-@test fully_eliminated(; retval=:foo) do
-    mutable_consistent(:foo)
-end
-
-function nested_mutable_consistent(s)
-    SafeRef(SafeRef(SafeRef(SafeRef(SafeRef(s)))))[][][][][]
-end
-@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(nested_mutable_consistent, (Symbol,)))
-@test fully_eliminated(; retval=:foo) do
-    nested_mutable_consistent(:foo)
-end
-
-const consistent_global = Some(:foo)
-@test Base.infer_effects() do
-    consistent_global.value
-end |> Core.Compiler.is_consistent
-const inconsistent_global = SafeRef(:foo)
-@test Base.infer_effects() do
-    inconsistent_global[]
-end |> !Core.Compiler.is_consistent
-const inconsistent_condition_ref = Ref{Bool}(false)
-@test Base.infer_effects() do
-    if inconsistent_condition_ref[]
-        return 0
-    else
-        return 1
-    end
-end |> !Core.Compiler.is_consistent
-
-# should handle va-method properly
-callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
-@test !Core.Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
-const GLOBAL_XS = Ref(:julia)
-global_getfield() = callgetfield1(GLOBAL_XS)
-@test let
-    Base.Experimental.@force_compile
-    global_getfield()
-end === :julia
-GLOBAL_XS[] = :julia2
-@test let
-    Base.Experimental.@force_compile
-    global_getfield()
-end === :julia2
-
-# the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
-# including `setfield!` modifying local mutable object
-
-const global_ref = Ref{Any}()
-global const global_bit::Int = 42
-makeref() = Ref{Any}()
-setref!(ref, @nospecialize v) = ref[] = v
-
-@noinline function removable_if_unused1()
-    x = makeref()
-    setref!(x, 42)
-    x
-end
-@noinline function removable_if_unused2()
-    x = makeref()
-    setref!(x, global_bit)
-    x
-end
-for f = Any[removable_if_unused1, removable_if_unused2]
-    effects = Base.infer_effects(f)
-    @test Core.Compiler.is_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_removable_if_unused(effects)
-    @test @eval fully_eliminated() do
-        $f()
-        nothing
-    end
-end
-@noinline function removable_if_unused3(v)
-    x = makeref()
-    setref!(x, v)
-    x
-end
-let effects = Base.infer_effects(removable_if_unused3, (Int,))
-    @test Core.Compiler.is_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_removable_if_unused(effects)
-end
-@test fully_eliminated((Int,)) do v
-    removable_if_unused3(v)
-    nothing
-end
-
-@noinline function unremovable_if_unused1!(x)
-    setref!(x, 42)
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (typeof(global_ref),)))
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (Any,)))
-
-@noinline function unremovable_if_unused2!()
-    setref!(global_ref, 42)
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused2!))
-
-@noinline function unremovable_if_unused3!()
-    getfield(@__MODULE__, :global_ref)[] = nothing
-end
-@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
-
-# array ops
-# =========
-
-# allocation
-# ----------
-
-# low-level constructor
-@noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
-# should eliminate safe but dead allocations
-let good_dims = @static Int === Int64 ? (1:10) : (1:8)
-    Ns = @static Int === Int64 ? (1:10) : (1:8)
-    for dim = good_dims, N = Ns
-        dims = ntuple(i->dim, N)
-        @test @eval Base.infer_effects() do
-            $construct_array(Int, $(dims...))
-        end |> Core.Compiler.is_removable_if_unused
-        @test @eval fully_eliminated() do
-            $construct_array(Int, $(dims...))
-            nothing
-        end
-    end
-end
-# should analyze throwness correctly
-let bad_dims = [-1, typemax(Int)]
-    for dim in bad_dims, N in 1:10
-        dims = ntuple(i->dim, N)
-        @test @eval Base.infer_effects() do
-            $construct_array(Int, $(dims...))
-        end |> !Core.Compiler.is_removable_if_unused
-        @test @eval !fully_eliminated() do
-            $construct_array(Int, $(dims...))
-            nothing
-        end
-        @test_throws "invalid Array" @eval $construct_array(Int, $(dims...))
-    end
-end
-
-# high-level interfaces
-# getindex
-for safesig = Any[
-        (Type{Int},)
-        (Type{Int}, Int)
-        (Type{Int}, Int, Int)
-        (Type{Number},)
-        (Type{Number}, Number)
-        (Type{Number}, Int)
-        (Type{Any},)
-        (Type{Any}, Any,)
-        (Type{Any}, Any, Any)
-    ]
-    let effects = Base.infer_effects(getindex, safesig)
-        @test Core.Compiler.is_consistent_if_notreturned(effects)
-        @test Core.Compiler.is_removable_if_unused(effects)
-    end
-end
-for unsafesig = Any[
-        (Type{Int}, String)
-        (Type{Int}, Any)
-        (Type{Number}, AbstractString)
-        (Type{Number}, Any)
-    ]
-    let effects = Base.infer_effects(getindex, unsafesig)
-        @test !Core.Compiler.is_nothrow(effects)
-    end
-end
-# vect
-for safesig = Any[
-        ()
-        (Int,)
-        (Int, Int)
-    ]
-    let effects = Base.infer_effects(Base.vect, safesig)
-        @test Core.Compiler.is_consistent_if_notreturned(effects)
-        @test Core.Compiler.is_removable_if_unused(effects)
-    end
-end
-
-# arrayref
-# --------
-
-for tt = Any[(Bool,Vector{Any},Int),
-             (Bool,Matrix{Any},Int,Int)]
-    @testset let effects = Base.infer_effects(Base.arrayref, tt)
-        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_effect_free(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-        @test Core.Compiler.is_terminates(effects)
-    end
-end
-
-# arrayset
-# --------
-
-for tt = Any[(Bool,Vector{Any},Any,Int),
-             (Bool,Matrix{Any},Any,Int,Int)]
-    @testset let effects = Base.infer_effects(Base.arrayset, tt)
-        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-        @test Core.Compiler.is_terminates(effects)
-    end
-end
-# nothrow for arrayset
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    Base.arrayset(true, a, v, i)
-end |> !Core.Compiler.is_nothrow
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    a[i] = v # may throw
-end |> !Core.Compiler.is_nothrow
-# when bounds checking is turned off, it should be safe
-@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
-    Base.arrayset(false, a, v, i)
-end |> Core.Compiler.is_nothrow
-@test Base.infer_effects((Vector{Number},Number,Int)) do a, v, i
-    Base.arrayset(false, a, v, i)
-end |> Core.Compiler.is_nothrow
-
-# arraysize
-# ---------
-
-let effects = Base.infer_effects(Base.arraysize, (Array,Int))
-    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test !Core.Compiler.is_nothrow(effects)
-    @test Core.Compiler.is_terminates(effects)
-end
-# Test that arraysize has proper effect modeling
-@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
-
-# arraylen
-# --------
-
-let effects = Base.infer_effects(Base.arraylen, (Vector{Any},))
-    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
-    @test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
-    @test Core.Compiler.is_terminates(effects)
-end
-
-# resize
-# ------
-
-for op = Any[
-        Base._growbeg!,
-        Base._growend!,
-        Base._deletebeg!,
-        Base._deleteend!,
-    ]
-    let effects = Base.infer_effects(op, (Vector, Int))
-        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
-        @test Core.Compiler.is_terminates(effects)
-        @test !Core.Compiler.is_nothrow(effects)
-    end
-end
-
-# end to end
-# ----------
-
-function simple_vec_ops(T, op!, op, xs...)
-    a = T[]
-    op!(a, xs...)
-    return op(a)
-end
-for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
-    xs = Any[(Int,), (Int,Int,)]
-    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
-        @test Core.Compiler.is_foldable(effects)
-    end
-end
-
-# Test that builtin_effects handles vararg correctly
-@test !Core.Compiler.is_nothrow(Core.Compiler.builtin_effects(Core.Compiler.fallback_lattice, Core.isdefined,
-    Core.Compiler.ArgInfo(nothing, Any[Core.Compiler.Const(Core.isdefined), String, Vararg{Any}]), Bool))
-
-# Test that :new can be eliminated even if an sparam is unknown
-struct SparamUnused{T}
-    x
-    SparamUnused(x::T) where {T} = new{T}(x)
-end
-mksparamunused(x) = (SparamUnused(x); nothing)
-let src = code_typed1(mksparamunused, (Any,))
-    @test count(isnew, src.code) == 0
-end
-
-struct WrapperOneField{T}
-    x::T
-end
-
-# Effects for getfield of type instance
-@test Base.infer_effects(Tuple{Nothing}) do x
-    WrapperOneField{typeof(x)}.instance
-end |> Core.Compiler.is_foldable_nothrow
-@test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-@test Core.Compiler.getfield_notundefined(WrapperOneField{Float64}, Symbol)
-@test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
-    getfield(w, s)
-end |> Core.Compiler.is_foldable
-
-# Flow-sensitive consistenct for _typevar
-@test Base.infer_effects() do
-    return WrapperOneField == (WrapperOneField{T} where T)
-end |> Core.Compiler.is_foldable_nothrow
-
-# Test that dead `@inbounds` does not taint consistency
-# https://github.com/JuliaLang/julia/issues/48243
-@test Base.infer_effects(Tuple{Int64}) do i
-    false && @inbounds (1,2,3)[i]
-    return 1
-end |> Core.Compiler.is_foldable_nothrow
-
-@test Base.infer_effects(Tuple{Int64}) do i
-    @inbounds (1,2,3)[i]
-end |> !Core.Compiler.is_consistent
-
-@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
-    @inbounds x[1]
-end |> Core.Compiler.is_foldable_nothrow
-
-# Test that :new of non-concrete, but otherwise known type
-# does not taint consistency.
-@eval struct ImmutRef{T}
-    x::T
-    ImmutRef(x) = $(Expr(:new, :(ImmutRef{typeof(x)}), :x))
-end
-@test Core.Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
-
-@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
-
-# nothrow-ness of subtyping operations
-# https://github.com/JuliaLang/julia/pull/48566
-@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
-
-# GotoIfNot should properly mark itself as throwing when given a non-Bool
-# https://github.com/JuliaLang/julia/pull/48583
-gotoifnot_throw_check_48583(x) = x ? x : 0
-@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
-
-# unknown :static_parameter should taint :nothrow
-# https://github.com/JuliaLang/julia/issues/46771
-unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
-unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
-unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
-@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
-@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
-
-# purely abstract recursion should not taint :terminates
-# https://github.com/JuliaLang/julia/issues/48983
-abstractly_recursive1() = abstractly_recursive2()
-abstractly_recursive2() = (Core.Compiler._return_type(abstractly_recursive1, Tuple{}); 1)
-abstractly_recursive3() = abstractly_recursive2()
-@test Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
-actually_recursive1(x) = actually_recursive2(x)
-actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
-actually_recursive3(x) = actually_recursive2(x)
-@test !Core.Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
-
-# `isdefined` effects
-struct MaybeSome{T}
-    value::T
-    MaybeSome(x::T) where T = new{T}(x)
-    MaybeSome{T}(x::T) where T = new{T}(x)
-    MaybeSome{T}() where T = new{T}()
-end
-const undefined_ref = Ref{String}()
-const defined_ref = Ref{String}("julia")
-const undefined_some = MaybeSome{String}()
-const defined_some = MaybeSome{String}("julia")
-let effects = Base.infer_effects() do
-        isdefined(undefined_ref, :x)
-    end
-    @test !Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(defined_ref, :x)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(undefined_some, :value)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-let effects = Base.infer_effects() do
-        isdefined(defined_some, :value)
-    end
-    @test Core.Compiler.is_consistent(effects)
-    @test Core.Compiler.is_nothrow(effects)
-end
-# high-level interface test
-isassigned_effects(s) = isassigned(Ref(s))
-@test Core.Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
-@test fully_eliminated(; retval=true) do
-    isassigned_effects(:foo)
-end
-
-# Effects of Base.hasfield (#50198)
-hf50198(s) = hasfield(typeof((;x=1, y=2)), s)
-f50198() = (hf50198(Ref(:x)[]); nothing)
-@test fully_eliminated(f50198)
-
-# Effects properly applied to flags by irinterp (#50311)
-f50311(x, s) = Symbol(s)
-g50311(x) = Val{f50311((1.0, x), "foo")}()
-@test fully_eliminated(g50311, Tuple{Float64})
diff --git a/test/compiler/interpreter_exec.jl b/test/compiler/interpreter_exec.jl
deleted file mode 100644
index a310a2740131d..0000000000000
--- a/test/compiler/interpreter_exec.jl
+++ /dev/null
@@ -1,122 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# tests that interpreter matches codegen
-using Test
-using Core: GotoIfNot, ReturnNode
-
-# test that interpreter correctly handles PhiNodes (#29262)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        GlobalRef(@__MODULE__, :test29262),
-        GotoIfNot(Core.SSAValue(3), 6),
-        # block 2
-        Core.PhiNode(Int32[4], Any[Core.SSAValue(1)]),
-        Core.PhiNode(Int32[4, 5], Any[Core.SSAValue(2), Core.SSAValue(5)]),
-        ReturnNode(Core.SSAValue(6)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test :a === @eval $m
-    global test29262 = false
-    @test :b === @eval $m
-end
-
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        QuoteNode(:c),
-        GlobalRef(@__MODULE__, :test29262),
-        # block 2
-        Core.PhiNode(Int32[4, 16], Any[false, true]), # false, true
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(1), Core.SSAValue(2)]), # :a, :b
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(3), Core.SSAValue(6)]), # :c, :a
-        Core.PhiNode(Int32[16], Any[Core.SSAValue(7)]), # NULL, :c
-        # block 3
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[true, Core.SSAValue(4)]), # test29262, test29262, [true]
-        Core.PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[Core.SSAValue(2), Core.SSAValue(8)]), # NULL, :c, [:b]
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        GotoIfNot(Core.SSAValue(5), 5),
-        # block 4
-        GotoIfNot(Core.SSAValue(10), 9),
-        # block 5
-        Expr(:call, GlobalRef(Core, :tuple), Core.SSAValue(6), Core.SSAValue(7), Core.SSAValue(8), Core.SSAValue(14)),
-        ReturnNode(Core.SSAValue(18)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test (:b, :a, :c, :c) === @eval $m
-    global test29262 = false
-    @test (:b, :a, :c, :b) === @eval $m
-end
-
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
-    src.code = Any[
-        # block 1
-        QuoteNode(:a),
-        QuoteNode(:b),
-        GlobalRef(@__MODULE__, :test29262),
-        # block 2
-        Expr(:enter, 11),
-        # block 3
-        Core.UpsilonNode(),
-        Core.UpsilonNode(),
-        Core.UpsilonNode(Core.SSAValue(2)),
-        GotoIfNot(Core.SSAValue(3), 10),
-        # block 4
-        Core.UpsilonNode(Core.SSAValue(1)),
-        # block 5
-        Expr(:throw_undef_if_not, :expected, false),
-        # block 6
-        Core.PhiCNode(Any[Core.SSAValue(5), Core.SSAValue(7), Core.SSAValue(9)]), # NULL, :a, :b
-        Core.PhiCNode(Any[Core.SSAValue(6)]), # NULL
-        Expr(:leave, 1),
-        # block 7
-        ReturnNode(Core.SSAValue(11)),
-    ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
-    src.codelocs = fill(Int32(1), nstmts)
-    src.inferred = true
-    Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
-    global test29262 = true
-    @test :a === @eval $m
-    global test29262 = false
-    @test :b === @eval $m
-end
-
-# https://github.com/JuliaLang/julia/issues/47065
-# `Core.Compiler.sort!` should be able to handle a big list
-let n = 1000
-    ex = :(return 1)
-    for _ in 1:n
-        ex = :(rand() < .1 && $(ex))
-    end
-    @eval global function f_1000_blocks()
-        $ex
-        return 0
-    end
-end
-@test f_1000_blocks() == 0
diff --git a/test/compiler/invalidation.jl b/test/compiler/invalidation.jl
deleted file mode 100644
index 20ab2483aa378..0000000000000
--- a/test/compiler/invalidation.jl
+++ /dev/null
@@ -1,258 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# setup
-# -----
-
-include("irutils.jl")
-
-using Test
-const CC = Core.Compiler
-import Core: MethodInstance, CodeInstance
-import .CC: WorldRange, WorldView
-
-struct InvalidationTesterCache
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-InvalidationTesterCache() = InvalidationTesterCache(IdDict{MethodInstance,CodeInstance}())
-
-const INVALIDATION_TESTER_CACHE = InvalidationTesterCache()
-
-struct InvalidationTester <: CC.AbstractInterpreter
-    callback!
-    world::UInt
-    inf_params::CC.InferenceParams
-    opt_params::CC.OptimizationParams
-    inf_cache::Vector{CC.InferenceResult}
-    code_cache::InvalidationTesterCache
-    function InvalidationTester(callback! = nothing;
-                                world::UInt = Base.get_world_counter(),
-                                inf_params::CC.InferenceParams = CC.InferenceParams(),
-                                opt_params::CC.OptimizationParams = CC.OptimizationParams(),
-                                inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
-                                code_cache::InvalidationTesterCache = INVALIDATION_TESTER_CACHE)
-        if callback! === nothing
-            callback! = function (replaced::MethodInstance)
-                # Core.println(replaced) # debug
-                delete!(code_cache.dict, replaced)
-            end
-        end
-        return new(callback!, world, inf_params, opt_params, inf_cache, code_cache)
-    end
-end
-
-struct InvalidationTesterCacheView
-    interp::InvalidationTester
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-
-CC.InferenceParams(interp::InvalidationTester) = interp.inf_params
-CC.OptimizationParams(interp::InvalidationTester) = interp.opt_params
-CC.get_world_counter(interp::InvalidationTester) = interp.world
-CC.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
-CC.code_cache(interp::InvalidationTester) = WorldView(InvalidationTesterCacheView(interp, interp.code_cache.dict), WorldRange(interp.world))
-CC.get(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-CC.getindex(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-CC.haskey(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-function CC.setindex!(wvc::WorldView{InvalidationTesterCacheView}, ci::CodeInstance, mi::MethodInstance)
-    add_callback!(wvc.cache.interp.callback!, mi)
-    setindex!(wvc.cache.dict, ci, mi)
-end
-
-function add_callback!(@nospecialize(callback!), mi::MethodInstance)
-    callback = function (replaced::MethodInstance, max_world,
-                         seen::Base.IdSet{MethodInstance} = Base.IdSet{MethodInstance}())
-        push!(seen, replaced)
-        callback!(replaced)
-        if isdefined(replaced, :backedges)
-            for item in replaced.backedges
-                isa(item, MethodInstance) || continue # might be `Type` object representing an `invoke` signature
-                mi = item
-                mi in seen && continue # otherwise fail into an infinite loop
-                var"#self#"(mi, max_world, seen)
-            end
-        end
-        return nothing
-    end
-
-    if !isdefined(mi, :callbacks)
-        mi.callbacks = Any[callback]
-    else
-        callbacks = mi.callbacks::Vector{Any}
-        if !any(@nospecialize(cb)->cb===callback, callbacks)
-            push!(callbacks, callback)
-        end
-    end
-    return nothing
-end
-
-
-# basic functionality test
-# ------------------------
-
-basic_callee(x) = x
-basic_caller(x) = basic_callee(x)
-
-# run inference and check that cache exist
-@test Base.return_types((Float64,); interp=InvalidationTester()) do x
-    basic_caller(x)
-end |> only === Float64
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# this redefinition below should invalidate the cache
-basic_callee(x) = x, x
-@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# re-run inference and check the result is updated (and new cache exists)
-@test Base.return_types((Float64,); interp=InvalidationTester()) do x
-    basic_caller(x)
-end |> only === Tuple{Float64,Float64}
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_callee
-end
-@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-    mi.def.name === :basic_caller
-end
-
-# backedge optimization
-# ---------------------
-
-const GLOBAL_BUFFER = IOBuffer()
-
-# test backedge optimization when the callee's type and effects information are maximized
-begin take!(GLOBAL_BUFFER)
-
-    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
-    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee, (Any,)))
-        @test rt === Any
-        effects = Base.infer_effects(pr48932_callee, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on both `pr48932_caller` and `pr48932_callee`
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller(x)
-        end |> only
-        @test rt === Any
-        @test any(iscall((src, pr48932_callee)), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller
-    end
-    @test 42 == pr48932_caller(42)
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
-    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
-    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller
-    end
-    @test isnothing(pr48932_caller(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-end
-
-# we can avoid adding backedge even if the callee's return type is not the top
-# when the return value is not used within the caller
-begin take!(GLOBAL_BUFFER)
-
-    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, x); nothing)
-    pr48932_caller_unuse(x) = (pr48932_callee_inferrable(Base.inferencebarrier(x)); nothing)
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee_inferrable, (Any,)))
-        @test rt === Nothing
-        effects = Base.infer_effects(pr48932_callee_inferrable, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on both `pr48932_caller` and `pr48932_callee`:
-    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
-    # since the inference result of `pr48932_callee` is maximized and it's not inlined
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller_unuse(x)
-        end |> only
-        @test rt === Nothing
-        @test any(iscall((src, pr48932_callee_inferrable)), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inferrable
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_unuse
-    end
-    @test isnothing(pr48932_caller_unuse(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we didn't add the backedge from `pr48932_callee_inferrable` to `pr48932_caller_unuse`:
-    # this redefinition below should invalidate the cache of `pr48932_callee_inferrable` but not that of `pr48932_caller_unuse`
-    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, "foo"); x)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inferrable
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_unuse
-    end
-    @test isnothing(pr48932_caller_unuse(42))
-    @test "foo" == String(take!(GLOBAL_BUFFER))
-end
-
-# we need to add backedge when the callee is inlined
-begin take!(GLOBAL_BUFFER)
-
-    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
-    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
-
-    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
-    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
-        @test rt === Any
-        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
-        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
-    end
-
-    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
-    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
-            @inline pr48932_caller_inlined(x)
-        end |> only
-        @test rt === Any
-        @test any(isinvoke(:pr48932_callee_inlined), src.code)
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inlined
-    end
-    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_inlined
-    end
-    @test 42 == pr48932_caller_inlined(42)
-    @test "42" == String(take!(GLOBAL_BUFFER))
-
-    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
-    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
-    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_callee_inlined
-    end
-    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
-        mi.def.name === :pr48932_caller_inlined
-    end
-    @test isnothing(pr48932_caller_inlined(42))
-    @test "42" == String(take!(GLOBAL_BUFFER))
-end
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
deleted file mode 100644
index 00de9b2472de4..0000000000000
--- a/test/compiler/irutils.jl
+++ /dev/null
@@ -1,57 +0,0 @@
-using Core: CodeInfo, ReturnNode, MethodInstance
-using Core.Compiler: IRCode, IncrementalCompact, singleton_type, VarState
-using Base.Meta: isexpr
-using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
-
-argextype(@nospecialize args...) = Core.Compiler.argextype(args..., VarState[])
-code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
-macro code_typed1(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0)
-end
-get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
-macro get_code(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0)
-end
-
-# check if `x` is a statement with a given `head`
-isnew(@nospecialize x) = isexpr(x, :new)
-issplatnew(@nospecialize x) = isexpr(x, :splatnew)
-isreturn(@nospecialize x) = isa(x, ReturnNode)
-
-# check if `x` is a dynamic call of a given function
-iscall(y) = @nospecialize(x) -> iscall(y, x)
-function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:Union{CodeInfo,IRCode,IncrementalCompact}
-    return iscall(x) do @nospecialize x
-        singleton_type(argextype(x, src)) === f
-    end
-end
-function iscall(pred::Base.Callable, @nospecialize(x))
-    if isexpr(x, :(=))
-        x = x.args[2]
-    end
-    return isexpr(x, :call) && pred(x.args[1])
-end
-
-# check if `x` is a statically-resolved call of a function whose name is `sym`
-isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
-isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
-isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred(x.args[1]::MethodInstance)
-
-function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...)
-    code = code_typed1(args...; kwargs...).code
-    if retval !== (@__FILE__)
-        length(code) == 1 || return false
-        code1 = code[1]
-        isreturn(code1) || return false
-        val = code1.val
-        if val isa QuoteNode
-            val = val.value
-        end
-        return val == retval
-    else
-        return length(code) == 1 && isreturn(code[1])
-    end
-end
-macro fully_eliminated(ex0...)
-    return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0)
-end
diff --git a/test/compiler/newinterp.jl b/test/compiler/newinterp.jl
deleted file mode 100644
index 56a68f2a09545..0000000000000
--- a/test/compiler/newinterp.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    @newinterp NewInterpreter
-
-Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
-from the native code cache, satisfying the minimum interface requirements.
-"""
-macro newinterp(InterpName)
-    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
-    InterpName = esc(InterpName)
-    C = Core
-    CC = Core.Compiler
-    quote
-        struct $InterpCacheName
-            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
-        end
-        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
-        struct $InterpName <: $CC.AbstractInterpreter
-            meta # additional information
-            world::UInt
-            inf_params::$CC.InferenceParams
-            opt_params::$CC.OptimizationParams
-            inf_cache::Vector{$CC.InferenceResult}
-            code_cache::$InterpCacheName
-            function $InterpName(meta = nothing;
-                                 world::UInt = Base.get_world_counter(),
-                                 inf_params::$CC.InferenceParams = $CC.InferenceParams(),
-                                 opt_params::$CC.OptimizationParams = $CC.OptimizationParams(),
-                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[],
-                                 code_cache::$InterpCacheName = $InterpCacheName())
-                return new(meta, world, inf_params, opt_params, inf_cache, code_cache)
-            end
-        end
-        $CC.InferenceParams(interp::$InterpName) = interp.inf_params
-        $CC.OptimizationParams(interp::$InterpName) = interp.opt_params
-        $CC.get_world_counter(interp::$InterpName) = interp.world
-        $CC.get_inference_cache(interp::$InterpName) = interp.inf_cache
-        $CC.code_cache(interp::$InterpName) = $CC.WorldView(interp.code_cache, $CC.WorldRange(interp.world))
-        $CC.get(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
-        $CC.getindex(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
-        $CC.haskey(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
-        $CC.setindex!(wvc::$CC.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
-    end
-end
diff --git a/test/complex.jl b/test/complex.jl
index 2b87655f1ebe0..63304652ee7d8 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -383,6 +383,7 @@ import Base.Math.@horner
     @test isequal(log1p(complex(-2, 1e-10)), log(1 + complex(-2, 1e-10)))
     @test isequal(log1p(complex(1, Inf)), complex(Inf, pi/2))
     @test isequal(log1p(complex(1, -Inf)), complex(Inf, -pi/2))
+    @test isequal(log1p(complex(1e-200, 5e-175)), complex(1e-200, 5e-175))
 
     for z in (1e-10+1e-9im, 1e-10-1e-9im, -1e-10+1e-9im, -1e-10-1e-9im)
         @test log1p(z) ≈ @horner(z, 0, 1, -0.5, 1/3, -0.25, 0.2)
@@ -1214,3 +1215,9 @@ end
     @test !iseven(7+0im) && isodd(7+0im)
     @test !iseven(6+1im) && !isodd(7+1im)
 end
+
+@testset "issue #55266" begin
+    for T in (Float16, Float32, Float64)
+        @test isapprox(atanh(1+im*floatmin(T)), Complex{T}(atanh(1+im*big(floatmin(T)))))
+    end
+end
diff --git a/test/copy.jl b/test/copy.jl
index 633beee5f2af3..f5cc57c86feaa 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -49,6 +49,15 @@ chnlprod(x) = Channel(c->for i in x; put!(c,i); end)
 
         @test_throws Union{BoundsError, ArgumentError} copyto!(dest, 1, src(), 2, 2)
     end
+
+    v = rand(Float32, 4)
+    a = Memory{Float32}(v)
+    b = similar(a)
+    copyto!(b, a)
+    @test a == b
+
+    c = Memory{Float32}(undef, 3)
+    @test_throws BoundsError copyto!(c, a)
 end
 
 @testset "with CartesianIndices" begin
@@ -189,7 +198,7 @@ end
         bar = Bar19921(foo, Dict(foo => 3))
         bar2 = deepcopy(bar)
         @test bar2.foo ∈ keys(bar2.fooDict)
-        @test bar2.fooDict[bar2.foo] != nothing
+        @test bar2.fooDict[bar2.foo] !== nothing
     end
 
     let d = IdDict(rand(2) => rand(2) for i = 1:100)
@@ -213,11 +222,13 @@ end
 @testset "copying CodeInfo" begin
     _testfunc() = nothing
     ci,_ = code_typed(_testfunc, ())[1]
-    ci.edges = [_testfunc]
+    if isdefined(ci, :edges)
+        ci.edges = [_testfunc]
 
-    ci2 = copy(ci)
-    # Test that edges are not shared
-    @test ci2.edges !== ci.edges
+        ci2 = copy(ci)
+        # Test that edges are not shared
+        @test ci2.edges !== ci.edges
+    end
 end
 
 @testset "issue #34025" begin
@@ -242,10 +253,35 @@ end
     @test copyto!(s, String[]) == [1, 2] # No error
 end
 
+@testset "circular reference arrays" begin
+    # issue 56775
+    p = Any[nothing]
+    p[1] = p
+    p2 = deepcopy(p)
+    @test p2 === p2[1]
+    @test p2 !== p
+end
+
 @testset "deepcopy_internal arrays" begin
     @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
 end
 
+@testset "deepcopy_internal inference" begin
+    @inferred Base.deepcopy_internal(1, IdDict())
+    @inferred Base.deepcopy_internal(1.0, IdDict())
+    @inferred Base.deepcopy_internal(big(1), IdDict())
+    @inferred Base.deepcopy_internal(big(1.0), IdDict())
+    @inferred Base.deepcopy_internal('a', IdDict())
+    @inferred Base.deepcopy_internal("abc", IdDict())
+    @inferred Base.deepcopy_internal([1,2,3], IdDict())
+
+    # structs without custom deepcopy_internal method
+    struct Immutable2; x::Int; end
+    mutable struct Mutable2; x::Int; end
+    @inferred Base.deepcopy_internal(Immutable2(1), IdDict())
+    @inferred Base.deepcopy_internal(Mutable2(1), IdDict())
+end
+
 @testset "`copyto!`'s unaliasing" begin
     a = view([1:3;], :)
     @test copyto!(a, 2, a, 1, 2) == [1;1:2;]
@@ -255,6 +291,8 @@ end
 
 @testset "`deepcopy` a `GenericCondition`" begin
     a = Base.GenericCondition(ReentrantLock())
+    # Test printing
+    @test repr(a) == "Base.GenericCondition(ReentrantLock())"
     @test !islocked(a.lock)
     lock(a.lock)
     @test islocked(a.lock)
@@ -267,4 +305,6 @@ end
     @test a.lock !== b.lock
     @test islocked(a.lock)
     @test !islocked(b.lock)
+    @inferred deepcopy(a)
+    @inferred deepcopy(a.lock)
 end
diff --git a/test/core.jl b/test/core.jl
index f0439afeed23c..4bbb2ca368019 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -14,15 +14,18 @@ include("testenv.jl")
 # sanity tests that our built-in types are marked correctly for const fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:def, :rettype, :rettype_const, :ipo_purity_bits, :argescapes]),
+        (Core.CodeInstance, [:def, :owner, :rettype, :exctype, :rettype_const, :analysis_results]),
         (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :is_for_opaque_closure, :constprop=#]),
         (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals=#]),
         (Core.MethodTable, [:module]),
-        (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :min_world, :max_world, :func, :isleafsig, :issimplesig, :va]),
+        (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :func, :isleafsig, :issimplesig, :va]),
         (Core.TypeMapLevel, []),
-        (Core.TypeName, [:name, :module, :names, :atomicfields, :constfields, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
+        (Core.TypeName, [:name, :module, :names, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
         (DataType, [:name, :super, :parameters, :instance, :hash]),
         (TypeVar, [:name, :ub, :lb]),
+        (Core.Memory, [:length, :ptr]),
+        (Core.GenericMemoryRef, [:mem, :ptr_or_offset]),
+        (Task, [:metrics_enabled]),
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c)
 end
@@ -30,14 +33,17 @@ end
 # sanity tests that our built-in types are marked correctly for atomic fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:next, :inferred, :purity_bits, :invoke, :specptr, :precompile]),
-        (Core.Method, []),
-        (Core.MethodInstance, [:uninferred, :cache, :precompiled]),
+        (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :edges, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile]),
+        (Core.Method, [:primary_world, :deleted_world]),
+        (Core.MethodInstance, [:cache, :flags]),
         (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
-        (Core.TypeMapEntry, [:next]),
+        (Core.TypeMapEntry, [:next, :min_world, :max_world]),
         (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
         (Core.TypeName, [:cache, :linearcache]),
         (DataType, [:types, :layout]),
+        (Core.Memory, []),
+        (Core.GenericMemoryRef, []),
+        (Task, [:_state, :running_time_ns, :finished_at, :first_enqueued_at, :last_started_running_at]),
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c)
 end
@@ -107,6 +113,21 @@ let abcd = ABCDconst(1, 2, 3, 4)
         abcd.d = nothing)
     @test (1, 2, "not constant", 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
 end
+# Issue #52686
+struct A52686{T} end
+struct B52686{T, S}
+    a::A52686{<:T}
+end
+function func52686()
+    @eval begin
+        struct A52686{T} end
+        struct B52686{T, S}
+            a::A52686{<:T}
+        end
+    end
+    return true
+end
+@test func52686()
 
 # test `===` handling null pointer in struct #44712
 struct N44712
@@ -220,8 +241,8 @@ k11840(::Type{Union{Tuple{Int32}, Tuple{Int64}}}) = '2'
 # issue #20511
 f20511(x::DataType) = 0
 f20511(x) = 1
-Type{Integer}  # cache this
-@test f20511(Union{Integer,T} where T <: Unsigned) == 1
+Type{AbstractSet}  # cache this
+@test f20511(Union{AbstractSet,Set{T}} where T) == 1
 
 # join
 @test typejoin(Int8,Int16) === Signed
@@ -374,8 +395,8 @@ let ft = Base.datatype_fieldtypes
     @test ft(elT2.body)[1].parameters[1] === elT2
     @test Base.isconcretetype(ft(elT2.body)[1])
 end
-#struct S22624{A,B,C} <: Ref{S22624{Int64,A}}; end
-@test_broken @isdefined S22624
+struct S22624{A,B,C} <: Ref{S22624{Int,A}}; end
+@test sizeof(S22624) == sizeof(S22624{Int,Int,Int}) == 0
 
 # issue #42297
 mutable struct Node42297{T, V}
@@ -414,6 +435,18 @@ mutable struct FooFoo{A,B} y::FooFoo{A} end
 
 @test FooFoo{Int} <: FooFoo{Int,AbstractString}.types[1]
 
+# make sure this self-referential struct doesn't crash type layout
+struct SelfTyA{V}
+    a::Base.RefValue{V}
+end
+struct SelfTyB{T}
+    a::T
+    b::SelfTyA{SelfTyB{T}}
+end
+let T = Base.RefValue{SelfTyB{Int}}
+    @test sizeof(T) === sizeof(Int)
+    @test sizeof(T.types[1]) === 2 * sizeof(Int)
+end
 
 let x = (2,3)
     @test +(x...) == 5
@@ -536,7 +569,7 @@ function i18408()
     return (x -> i)
 end
 let f = i18408()
-    @test_throws UndefVarError(:i) f(0)
+    @test_throws UndefVarError(:i, :local) f(0)
 end
 
 # issue #23558
@@ -596,7 +629,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_a()
+@test_throws UndefVarError(:glob_x2, :local) f7234_a()
 @test f7234_cnt == 1
 begin
     global glob_x2 = 24
@@ -606,7 +639,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_b()
+@test_throws UndefVarError(:glob_x2, :local) f7234_b()
 @test f7234_cnt == 2
 # globals can accessed if declared
 for i = 1:2
@@ -721,11 +754,11 @@ function f21900()
     global f21900_cnt += -1000
     nothing
 end
-@test_throws UndefVarError(:x_global_undefined_error) f21900()
+@test_throws UndefVarError(:x_global_undefined_error, @__MODULE__) f21900()
 @test f21900_cnt == 1
 
 # use @eval so this runs as a toplevel scope block
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         local bar21900
         for j21900 = 1:10
@@ -738,7 +771,7 @@ end
 @test !@isdefined(foo21900)
 @test !@isdefined(bar21900)
 bar21900 = 0
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         global bar21900
         for j21900 = 1:10
@@ -764,6 +797,34 @@ end
 @test foo21900 == 10
 @test bar21900 == 11
 
+let f = g -> x -> g(x)
+    @test f(Int)(1.0) === 1
+    @test @inferred(f(Int)) isa Function
+    @test fieldtype(typeof(f(Int)), 1) === Type{Int}
+    @test @inferred(f(Rational{Int})) isa Function
+    @test fieldtype(typeof(f(Rational{Int})), 1) === Type{Rational{Int}}
+    @test_broken @inferred(f(Rational)) isa Function
+    @test fieldtype(typeof(f(Rational)), 1) === Type{Rational}
+    @test_broken @inferred(f(Rational{Core.TypeVar(:T)})) isa Function
+    @test fieldtype(typeof(f(Rational{Core.TypeVar(:T)})), 1) === DataType
+end
+let f() = (T = Rational{Core.TypeVar(:T)}; () -> T)
+    @test f() isa Function
+    @test Base.infer_return_type(f()) == DataType
+    @test fieldtype(typeof(f()), 1) === DataType
+    t = f()()
+    @test t isa DataType
+    @test t.name.wrapper == Rational
+    @test length(t.parameters) == 1
+    @test t.parameters[1] isa Core.TypeVar
+end
+function issue23618(a::AbstractVector)
+    T = eltype(a)
+    b = Vector{T}()
+    return [Set{T}() for x in a]
+end
+@test Base.infer_return_type(issue23618, (Vector{Int},)) == Vector{Set{Int}}
+
 # ? syntax
 @test (true ? 1 : false ? 2 : 3) == 1
 
@@ -1149,6 +1210,10 @@ let A = [1]
     @test x == 1
 end
 
+# Make sure that `Module` is not resolved to `Core.Module` during sysimg generation
+# so that users can define their own binding named `Module` in Main.
+@test success(`$(Base.julia_cmd()) -e '@assert !Base.isbindingresolved(Main, :Module)'`)
+
 # Module() constructor
 @test names(Module(:anonymous), all = true, imported = true) == [:anonymous]
 @test names(Module(:anonymous, false), all = true, imported = true) == [:anonymous]
@@ -1429,6 +1494,9 @@ let
     @test unsafe_load(p2) == 101
     unsafe_store!(p2, 909, 3)
     @test a2 == [101,102,909]
+    # test for issue 51954
+    @test pointer(a.ref.mem)===pointer(a)
+    @test pointer(a.ref.mem,2)===pointer(a,2)
 end
 
 @test unsafe_pointer_to_objref(ccall(:jl_call1, Ptr{Cvoid}, (Any,Any),
@@ -1886,9 +1954,9 @@ end
 
 # issue #4526
 f4526(x) = isa(x.a, Nothing)
-@test_throws ErrorException f4526(1)
-@test_throws ErrorException f4526(im)
-@test_throws ErrorException f4526(1+2im)
+@test_throws FieldError f4526(1)
+@test_throws FieldError f4526(im)
+@test_throws FieldError f4526(1+2im)
 
 # issue #4528
 function f4528(A, B)
@@ -1897,7 +1965,7 @@ function f4528(A, B)
     end
 end
 @test f4528(false, Int32(12)) === nothing
-@test_throws ArgumentError f4528(true, Int32(12))
+@test_throws ErrorException f4528(true, Int32(12))
 
 # issue #4518
 f4518(x, y::Union{Int32,Int64}) = 0
@@ -2554,7 +2622,7 @@ end
 # issue #8338
 let ex = Expr(:(=), :(f8338(x;y=4)), :(x*y))
     eval(ex)
-    @test f8338(2) == 8
+    @test invokelatest(f8338, 2) == 8
 end
 
 # call overloading (#2403)
@@ -2582,7 +2650,7 @@ struct D14919 <: Function; end
 for f in (:Any, :Function, :(Core.Builtin), :(Union{Nothing, Type}), :(Union{typeof(+), Type}), :(Union{typeof(+), typeof(-)}), :(Base.Callable))
     @test_throws ErrorException("Method dispatch is unimplemented currently for this method signature") @eval (::$f)() = 1
 end
-for f in (:(Core.arrayref), :((::typeof(Core.arrayref))), :((::Core.IntrinsicFunction)))
+for f in (:(Core.getfield), :((::typeof(Core.getfield))), :((::Core.IntrinsicFunction)))
     @test_throws ErrorException("cannot add methods to a builtin function") @eval $f() = 1
 end
 
@@ -3961,6 +4029,14 @@ end
 end
 @test f13432b(true) == true
 @test f13432b(false) == false
+@noinline function f13432c(x)
+    offset = x ? Base.Bottom : 1
+    # Barrier for inference, so the optimizer cannot optimize this,
+    # but codegen can still see this is a constant
+    return ===(offset, Base.inferencebarrier(Base.Bottom))
+end
+@test f13432c(true) == true
+@test f13432c(false) == false
 
 #13433, read!(::IO, a::Vector{UInt8}) should return a
 mutable struct IO13433 <: IO end
@@ -4110,8 +4186,30 @@ end
 let z1 = Z14477()
     @test isa(z1, Z14477)
     @test isa(z1.fld, Z14477)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
+end
+struct Z14477B
+    fld::Union{Nothing,Z14477B}
+    Z14477B() = new(new(nothing))
+end
+let z1 = Z14477B()
+    @test isa(z1, Z14477B)
+    @test isa(z1.fld, Z14477B)
+    @test isa(z1.fld.fld, Nothing)
+end
+struct Z14477C{T}
+    fld::Z14477C{Int8}
+    Z14477C() = new{Int16}(new{Int8}())
+end
+let z1 = Z14477C()
+    @test isa(z1, Z14477C)
+    @test isa(z1.fld, Z14477C)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
 end
 
+
 # issue #8846, generic macros
 macro m8846(a, b=0)
     a, b
@@ -4135,7 +4233,7 @@ let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = 1
 end
 let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = T
     @test foo(1, nothing) === Int
-    @test_throws UndefVarError(:T) foo(nothing, nothing)
+    @test_throws UndefVarError(:T, :static_parameter) foo(nothing, nothing)
 end
 
 module TestMacroGlobalFunction
@@ -4189,14 +4287,14 @@ foo9677(x::Array) = invoke(foo9677, Tuple{AbstractArray}, x)
 
 # issue #6846
 f6846() = (please6846; 2)
-@test_throws UndefVarError(:please6846) f6846()
+@test_throws UndefVarError(:please6846, @__MODULE__) f6846()
 
 module M6846
     macro f()
         return esc(:(please6846; 2))
     end
 end
-@test_throws UndefVarError(:please6846) @M6846.f()
+@test_throws UndefVarError(:please6846, @__MODULE__) @M6846.f()
 
 # issue #14758
 @test isa(@eval(f14758(; $([]...)) = ()), Function)
@@ -4238,13 +4336,13 @@ end
 abstract type abstest_14825 end
 
 mutable struct t1_14825{A <: abstest_14825, B}
-  x::A
-  y::B
+    x::A
+    y::B
 end
 
 mutable struct t2_14825{C, B} <: abstest_14825
-  x::C
-  y::t1_14825{t2_14825{C, B}, B}
+    x::C
+    y::t1_14825{t2_14825{C, B}, B}
 end
 
 @test t2_14825{Int,Int}.types[2] <: t1_14825
@@ -4300,6 +4398,7 @@ function f15180(x::T) where T
 end
 @test map(f15180(1), [1,2]) == [(Int,1),(Int,1)]
 
+using Base: _growbeg!, _deletebeg!, _growend!, _deleteend!
 struct ValueWrapper
     vpadding::NTuple{2,VecElement{UInt}}
     value
@@ -4308,43 +4407,44 @@ end
 Base.convert(::Type{ValueWrapper}, x) = ValueWrapper(x)
 for T in (Any, ValueWrapper)
     let ary = Vector{T}(undef, 10)
-        check_undef_and_fill(ary, rng) = for i in rng
-            @test !isassigned(ary, i)
+        check_undef_and_fill(ary, rng) = all(i -> begin
+            isassigned(ary, i) && return false
             ary[i] = (Float64(i), i) # some non-cached content
-            @test isassigned(ary, i)
-        end
+            isassigned(ary, i) || return false
+            return true
+        end, rng)
         # Check if the memory is initially zerod and fill it with value
         # to check if these values are not reused later.
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:10)
         # Check if the memory grown at the end are zerod
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 11:20)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 11:20)
         # Make sure the content of the memory deleted at the end are not reused
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 5)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 5)
-        check_undef_and_fill(ary, 16:20)
+        _deleteend!(ary, 5)
+        _growend!(ary, 5)
+        @test check_undef_and_fill(ary, 16:20)
 
         # Now check grow/del_end
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        check_undef_and_fill(ary, 1:1000)
+        _deletebeg!(ary, 1000)
+        _growbeg!(ary, 1000)
+        @test check_undef_and_fill(ary, 1:1000)
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should not move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        _deletebeg!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1011:1020)
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:1010)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 1011:1020)
+        _deleteend!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         # Make sure newly malloc'd buffers are filled with 0
         # test this for a few different sizes since we need to make sure
@@ -4357,33 +4457,60 @@ for T in (Any, ValueWrapper)
             GC.gc()
             GC.gc()
             GC.gc()
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, n)
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            check_undef_and_fill(ary, 1:(2n + 4))
+            _growbeg!(ary, 4)
+            _deletebeg!(ary, 4)
+            _growend!(ary, n)
+            _growbeg!(ary, 4)
+            @test check_undef_and_fill(ary, 1:(2n + 4))
         end
 
         ary = Vector{T}(undef, 100)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10000)
+        _growend!(ary, 10000)
         ary[:] = 1:length(ary)
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10000)
+        _deletebeg!(ary, 10000)
         # grow on the back until a buffer reallocation happens
         cur_ptr = pointer(ary)
         while cur_ptr == pointer(ary)
             len = length(ary)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-            for i in (len + 1):(len + 10)
-                @test !isassigned(ary, i)
-            end
+            _growend!(ary, 10)
+            result = @test all(i -> !isassigned(ary, i), (len + 1):(len + 10))
+            result isa Test.Pass || break
         end
 
-        ary = Vector{T}(undef, 100)
-        ary[:] = 1:length(ary)
-        ccall(:jl_array_grow_at, Cvoid, (Any, Csize_t, Csize_t), ary, 50, 10)
-        for i in 51:60
-            @test !isassigned(ary, i)
-        end
+        # growat when copy into start of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        pushfirst!(ary, 0)
+        Base._growat!(ary, 3, 5)
+        @test all(i -> !isassigned(ary, i), 3:7)
+        @test all(i -> isassigned(ary, i), 8:length(ary))
+        @test all(i -> isassigned(ary, i), 1:2)
+
+        # growat when copy into end of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        push!(ary, 11)
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
+
+        # growat when copy to new buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
+    end
+end
+
+#test grow_end ccall directly since it's used in the C source
+for ET in [Nothing, Int, Union{Int, Nothing}, Any]
+    for n in [0, 1, 10]
+        arr = Vector{ET}(undef, n)
+        ccall(:jl_array_grow_end, Cvoid, (Any, UInt), arr, 1)
+        @test length(arr) == n+1
     end
 end
 
@@ -4469,8 +4596,13 @@ end
 # Make sure arrayset can handle `Array{T}` (where `T` is a type and not a
 # `TypeVar`) without crashing
 let
-    function arrayset_unknown_dim(::Type{T}, n) where T
-        Base.arrayset(true, reshape(Vector{T}(undef, 1), fill(1, n)...), 2, 1)
+    @noinline function arrayset_unknown_dim(::Type{T}, n) where T
+        a = Vector{T}(undef, 1)
+        fill!(a, 0)
+        a = reshape(a, fill(1, n)...)::Array{T}
+        @test a[1] === 0
+        Core.memoryrefset!(a.ref, 2, :not_atomic, true)
+        @test a[1] === 2
     end
     arrayset_unknown_dim(Any, 1)
     arrayset_unknown_dim(Any, 2)
@@ -4480,88 +4612,6 @@ let
     arrayset_unknown_dim(Int, 3)
 end
 
-module TestSharedArrayResize
-using Test
-# Attempting to change the shape of a shared array should unshare it and
-# not modify the original data
-function test_shared_array_resize(::Type{T}) where T
-    len = 100
-    a = Vector{T}(undef, len)
-    function test_unshare(f)
-        a′ = reshape(reshape(a, (len ÷ 2, 2)), len)
-        a[:] = 1:length(a)
-        # The operation should fail on the owner shared array
-        # and has no side effect.
-        @test_throws ErrorException f(a)
-        @test a == [1:len;]
-        @test a′ == [1:len;]
-        @test pointer(a) == pointer(a′)
-        # The operation should pass on the non-owner shared array
-        # and should unshare the arrays with no effect on the original one.
-        f(a′)
-        @test a == [1:len;]
-        @test pointer(a) != pointer(a′)
-    end
-
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->deleteat!(a, 10))
-    test_unshare(a->deleteat!(a, 90))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->insert!(a, 10, 10))
-    test_unshare(a->insert!(a, 90, 90))
-end
-test_shared_array_resize(Int)
-test_shared_array_resize(Any)
-end
-
-module TestArrayNUL
-using Test
-function check_nul(a::Vector{UInt8})
-    b = ccall(:jl_array_cconvert_cstring,
-              Ref{Vector{UInt8}}, (Vector{UInt8},), a)
-    @test unsafe_load(pointer(b), length(b) + 1) == 0x0
-    return b === a
-end
-
-a = UInt8[]
-b = "aaa"
-c = [0x2, 0x1, 0x3]
-
-@test check_nul(a)
-@test check_nul(unsafe_wrap(Vector{UInt8},b))
-@test check_nul(c)
-d = [0x2, 0x1, 0x3]
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-ccall(:jl_array_del_end, Cvoid, (Any, UInt), d, 2)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 10)
-@test check_nul(d)
-ccall(:jl_array_del_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-f = unsafe_wrap(Array, pointer(d), length(d))
-@test !check_nul(f)
-f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, own = true)
-@test !check_nul(f)
-end
-
 # Copy of `#undef`
 copyto!(Vector{Any}(undef, 10), Vector{Any}(undef, 10))
 function test_copy_alias(::Type{T}) where T
@@ -4956,7 +5006,7 @@ function trigger14878()
     w.ext[:14878] = B14878(junk)  # global junk not defined!
     return w
 end
-@test_throws UndefVarError(:junk) trigger14878()
+@test_throws UndefVarError(:junk, @__MODULE__) trigger14878()
 
 # issue #1090
 function f1090(x)::Int
@@ -5196,9 +5246,9 @@ let x = 1
     @noinline g18444(a) = (x += 1; a[])
     f18444_1(a) = invoke(sin, Tuple{Int}, g18444(a))
     f18444_2(a) = invoke(sin, Tuple{Integer}, g18444(a))
-    @test_throws ErrorException("invoke: argument type error") f18444_1(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_1(Ref{Any}(1.0))
     @test x == 2
-    @test_throws ErrorException("invoke: argument type error") f18444_2(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_2(Ref{Any}(1.0))
     @test x == 3
     @test f18444_1(Ref{Any}(1)) === sin(1)
     @test x == 4
@@ -5274,9 +5324,9 @@ GC.enable(true)
 @test isa(which(bad_tvars, ()), Method)
 @test bad_tvars() === 1
 @test_warn "declares type variable T but does not use it" @eval bad_tvars2() where {T} = T
-@test_throws UndefVarError(:T) bad_tvars2()
+@test_throws UndefVarError(:T, :static_parameter) bad_tvars2()
 missing_tvar(::T...) where {T} = T
-@test_throws UndefVarError(:T) missing_tvar()
+@test_throws UndefVarError(:T, :static_parameter) missing_tvar()
 @test missing_tvar(1) === Int
 @test missing_tvar(1, 2, 3) === Int
 @test_throws MethodError missing_tvar(1, 2, "3")
@@ -5403,6 +5453,21 @@ function g37690()
 end
 @test g37690().x === 0
 
+# issue #48889
+function f48889()
+    let j=0, f, i
+        while j < 3
+            i = j + 1
+            if j == 0
+                f = ()->i
+            end
+            j += 1
+        end
+        f
+    end
+end
+@test f48889()() == 3
+
 function _assigns_and_captures_arg(a)
     a = a
     return ()->a
@@ -5584,6 +5649,26 @@ end
     x::Array{T} where T<:Integer
 end
 
+# issue #54757, type redefinitions with recursive reference in supertype
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+@test_throws ErrorException struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+
 let a = Vector{Core.TypeofBottom}(undef, 2)
     @test a[1] == Union{}
     @test a == [Union{}, Union{}]
@@ -5877,7 +5962,7 @@ function f_unused_undefined_sp(::T...) where T
     T
     return 0
 end
-@test_throws UndefVarError(:T) f_unused_undefined_sp()
+@test_throws UndefVarError(:T, :static_parameter) f_unused_undefined_sp()
 
 # note: the constant `5` here should be > DataType.ninitialized.
 # This tests that there's no crash due to accessing Type.body.layout.
@@ -6019,10 +6104,10 @@ const unboxedunions = [Union{Int8, Nothing},
 @test Base.isbitsunion(unboxedunions[2])
 @test Base.isbitsunion(unboxedunions[3])
 
-@test Base.bitsunionsize(unboxedunions[1]) == 1
-@test Base.bitsunionsize(unboxedunions[2]) == 2
-@test Base.bitsunionsize(unboxedunions[3]) == 16
-@test Base.bitsunionsize(unboxedunions[4]) == 8
+@test Base.aligned_sizeof(unboxedunions[1]) == 1
+@test Base.aligned_sizeof(unboxedunions[2]) == 2
+@test Base.aligned_sizeof(unboxedunions[3]) == 16
+@test Base.aligned_sizeof(unboxedunions[4]) == 8
 
 @test sizeof(unboxedunions[1]) == 1
 @test sizeof(unboxedunions[2]) == 2
@@ -6230,6 +6315,16 @@ let
     @test_throws ArgumentError unsafe_wrap(Array, convert(Ptr{Union{Int, Nothing}}, pointer(A5)), 6)
 end
 
+# More unsafe_wrap
+let
+    a = [1, 2, 3]
+    GC.@preserve a begin
+        m = unsafe_wrap(Memory{Int}, pointer(a), (3,))
+        @test m == a
+        @test m isa Memory{Int}
+    end
+end
+
 # copyto!
 A23567 = Vector{Union{Float64, Nothing}}(undef, 5)
 B23567 = collect(Union{Float64, Nothing}, 1.0:3.0)
@@ -6330,7 +6425,7 @@ for U in unboxedunions
             resize!(A, len)
             @test length(A) === len
             @test A[1] === initvalue2(F2)
-            @test typeof(A[end]) === F
+            @test typeof(A[end]) === F2
 
             # deleteat!
             F = Base.uniontypes(U)[2]
@@ -6418,304 +6513,291 @@ for U in unboxedunions
     end
 end
 
-@testset "jl_array_grow_at_end" begin
+@testset "array _growatend!" begin
 
 # start w/ array, set & check elements, grow it, check that elements stayed correct, set & check elements
 A = Vector{Union{Missing, UInt8}}(undef, 2)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
 
-# grow_at_end 2
 resize!(A, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+# The rest of the values are unspecified
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
+@test isequal(A, [0x01, missing, 0x03, missing, 0x05])
 
 # grow_at_end 1
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
-
-Base.arrayset(true, A, missing, 1)
-Base.arrayset(true, A, 0x02, 2)
-Base.arrayset(true, A, missing, 3)
-Base.arrayset(true, A, 0x04, 4)
-Base.arrayset(true, A, missing, 5)
-Base.arrayset(true, A, 0x06, 6)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x04
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x06
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+#A[4] is unspecified
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
+
+setindex!(A, missing, 1)
+setindex!(A, 0x02, 2)
+setindex!(A, missing, 3)
+setindex!(A, 0x04, 4)
+setindex!(A, missing, 5)
+setindex!(A, 0x06, 6)
+@test isequal(A, [missing, 0x2, missing, 0x4, missing, 0x6])
 
 # grow_at_end 5
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+#A[4] is unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
 
 # grow_at_end 6
 resize!(A, 8)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+# A[8] is unspecified but test that it exists
+@test getindex(A, 8) isa Any
 
 # grow_at_end 4
 resize!(A, 1048576)
 resize!(A, 1048577)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] is stil still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+@test getindex(A, 8) === missing
+# 9:1048577 are unspecified
 foreach(9:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-end
-foreach(9:1048577) do i
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 # grow_at_end 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
 foreach(1:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 Base._growat!(A, 1048576, 1)
 @test length(A) == 1048578
 foreach(1:1048575) do i
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === i % UInt8
     @test A[i] === i % UInt8
 end
-@test Base.arrayref(true, A, 1048576) === missing
-@test Base.arrayref(true, A, 1048577) === 1048576 % UInt8
-@test Base.arrayref(true, A, 1048578) === 1048577 % UInt8
+@test getindex(A, 1048576) === missing
+@test getindex(A, 1048577) === 1048576 % UInt8
+@test getindex(A, 1048578) === 1048577 % UInt8
 
 end # @testset
 
-@testset "jl_array_grow_at_beg" begin
+@testset "array _growatbeg!" begin
 
 # grow_at_beg 4
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
 
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 # grow_at_beg 2
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x01
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x01
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 1
 Base._growat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x01
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x01
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === 0x05
 
 # grow_at_beg 9
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x01
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x03
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x01
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x03
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === 0x05
 
 # grow_at_beg 8
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 5
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 4, 1)
 Base._growat!(A, 4, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 6
 Base._growat!(A, 2, 3)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === missing
-@test Base.arrayref(true, A, 10) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === missing
+@test getindex(A, 10) === 0x05
 
 # grow_at_beg 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 foreach(7:length(A)) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 end # @testset
 
-@testset "jl_array_del_at_beg" begin
+@testset "array _deleteatbeg!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === 0x03
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === 0x03
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x05
 
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 Base._deleteat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x05
 Base._deleteat!(A, 1, 2)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x05
 
 end # @testset
 
-@testset "jl_array_del_at_end" begin
+@testset "array _deleteatend!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 5, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
 
 Base._deleteat!(A, 3, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
 
 end # @testset
 
@@ -6737,23 +6819,23 @@ end
 
 # jl_array_shrink
 let A=Vector{Union{UInt8, Missing}}(undef, 1048577)
-    Base.arrayset(true, A, 0x01, 1)
-    Base.arrayset(true, A, missing, 2)
-    Base.arrayset(true, A, 0x03, 3)
-    Base.arrayset(true, A, missing, 4)
-    Base.arrayset(true, A, 0x05, 5)
+    setindex!(A, 0x01, 1)
+    setindex!(A, missing, 2)
+    setindex!(A, 0x03, 3)
+    setindex!(A, missing, 4)
+    setindex!(A, 0x05, 5)
     deleteat!(A, 6:1048577)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
     sizehint!(A, 5)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
 end
 
 # copyto!/vcat w/ internal padding
@@ -6771,14 +6853,14 @@ primitive type TypeWith24Bits 24 end
 TypeWith24Bits(x::UInt32) = Core.Intrinsics.trunc_int(TypeWith24Bits, x)
 let x = TypeWith24Bits(0x112233), y = TypeWith24Bits(0x445566), z = TypeWith24Bits(0x778899)
     a = [x, x]
-    Core.arrayset(true, a, y, 2)
+    Core.memoryrefset!(Core.memoryrefnew(a.ref, 2, true), y, :not_atomic, true)
     @test a == [x, y]
     a[2] = z
     @test a == [x, z]
     @test pointer(a, 2) - pointer(a, 1) == 4
 
     b = [(x, x), (x, x)]
-    Core.arrayset(true, b, (x, y), 2)
+    Core.memoryrefset!(Core.memoryrefnew(b.ref, 2, true), (x, y), :not_atomic, true)
     @test b == [(x, x), (x, y)]
     b[2] = (y, z)
     @test b == [(x, x), (y, z)]
@@ -6880,7 +6962,7 @@ end
 # issue #21004
 const PTuple_21004{N,T} = NTuple{N,VecElement{T}}
 @test_throws ArgumentError("too few elements for tuple type $PTuple_21004") PTuple_21004(1)
-@test_throws UndefVarError(:T) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
+@test_throws UndefVarError(:T, :static_parameter) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
 
 #issue #22792
 foo_22792(::Type{<:Union{Int8,Int,UInt}}) = 1;
@@ -7007,7 +7089,7 @@ translate27368(::Type{Val{name}}) where {name} =
 # issue #27456
 @inline foo27456() = try baz_nonexistent27456(); catch; nothing; end
 bar27456() = foo27456()
-@test bar27456() == nothing
+@test bar27456() === nothing
 
 # issue #27365
 mutable struct foo27365
@@ -7178,7 +7260,7 @@ end
 c28399 = 42
 @test g28399(0)() == 42
 @test g28399(1)() == 42
-@test_throws UndefVarError(:__undef_28399__) f28399()
+@test_throws UndefVarError(:__undef_28399__, @__MODULE__) f28399()
 
 # issue #28445
 mutable struct foo28445
@@ -7205,6 +7287,20 @@ end
 @test_throws ArgumentError Array{Int, 2}(undef, -10, 0)
 @test_throws ArgumentError Array{Int, 2}(undef, -1, -1)
 
+# issue #54244
+# test that zero sized array doesn't throw even with large axes
+bignum = Int==Int64 ? 2^32 : 2^16
+Array{Int}(undef, 0, bignum, bignum)
+Array{Int}(undef, bignum, bignum, 0)
+Array{Int}(undef, bignum, bignum, 0, bignum, bignum)
+# but also test that it does throw if the axes multiply to a multiple of typemax(UInt)
+@test_throws ArgumentError Array{Int}(undef, bignum, bignum)
+@test_throws ArgumentError Array{Int}(undef, 1, bignum, bignum)
+# also test that we always throw erros for negative dims even if other dims are 0 or the product is positive
+@test_throws ArgumentError Array{Int}(undef, 0, -4, -4)
+@test_throws ArgumentError Array{Int}(undef, -4, 1, 0)
+@test_throws ArgumentError Array{Int}(undef, -4, -4, 1)
+
 # issue #28812
 @test Tuple{Vararg{Array{T} where T,3}} === Tuple{Array,Array,Array}
 
@@ -7348,6 +7444,7 @@ end
 @test isa(Core.eval(@__MODULE__, :(Bar31062(()))), Bar31062)
 @test precompile(identity, (Foo31062,))
 
+using Core: SSAValue
 ftype_eval = Ref(0)
 FieldTypeA = String
 FieldTypeE = UInt32
@@ -7371,21 +7468,41 @@ let fc = FieldConvert(1.0, [2.0], 0x3, 0x4, 0x5)
 end
 @test ftype_eval[] == 1
 let code = code_lowered(FieldConvert)[1].code
-    @test code[1] == Expr(:call, GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA), Expr(:static_parameter, 1))
-    @test code[2] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 1)
-    @test code[7] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(2), Core.SlotNumber(10)))
-    @test code[8] == Core.SlotNumber(10)
-    @test code[9] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 2)
-    @test code[14] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(9), Core.SlotNumber(9)))
-    @test code[15] == Core.SlotNumber(9)
-    @test code[16] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 4)
-    @test code[21] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(16), Core.SlotNumber(8)))
-    @test code[22] == Core.SlotNumber(8)
-    @test code[23] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 5)
-    @test code[28] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(23), Core.SlotNumber(7)))
-    @test code[29] == Core.SlotNumber(7)
-    @test code[30] == Expr(:new, Core.SSAValue(1), Core.SSAValue(8), Core.SSAValue(15), Core.SlotNumber(4), Core.SSAValue(22), Core.SSAValue(29))
-    @test code[31] == Core.ReturnNode(Core.SSAValue(30))
+    calls = Vector{Pair{SSAValue, Expr}}(undef, 0)
+    for i = 1:length(code)
+        expr = code[i]
+        if Meta.isexpr(expr, :call) || (Meta.isexpr(expr, :(=)) && Meta.isexpr(expr.args[2], :call))
+            push!(calls, SSAValue(i)=>expr)
+        end
+    end
+
+    function is_globalref(arg, gr)
+        while isa(arg, SSAValue)
+            arg = code[arg.id]
+        end
+        arg == gr
+    end
+
+    # calls[1]
+    @test all(is_globalref.(calls[1][2].args[1:3], (GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA))))
+
+    # calls[2]
+    @test all(is_globalref.(calls[2][2].args[1:1], (GlobalRef(Core, :fieldtype),)))
+    @test all(calls[2][2].args[2:3] .== (calls[1][1], 1))
+
+    # calls[3] - isa
+
+    # calls[4]
+    let calle = calls[4][2]
+        @test Meta.isexpr(calle, :(=))
+        call = calle.args[2]
+        @test is_globalref(call.args[1], GlobalRef(Base, :convert))
+        @test call.args[2] == calls[2][1]
+    end
+
+    # calls[5]
+    @test all(is_globalref.(calls[5][2].args[1:1], (GlobalRef(Core, :fieldtype),)))
+    @test all(calls[5][2].args[2:3] .== (calls[1][1], 2))
 end
 
 # Issue #32820
@@ -7452,6 +7569,13 @@ struct A43411{S, T}
 end
 @test isbitstype(A43411{(:a,), Tuple{Int}})
 
+# issue #55189
+struct A55189{N}
+    children::NTuple{N,A55189{N}}
+end
+@test fieldtype(A55189{2}, 1) === Tuple{A55189{2}, A55189{2}}
+@assert !isbitstype(A55189{2})
+
 # issue #44614
 struct T44614_1{T}
     m::T
@@ -7497,16 +7621,11 @@ function f34482()
     Base.not_int("ABC")
     1
 end
-function g34482()
-    Core.Intrinsics.arraylen(1)
-    1
-end
 function h34482()
     Core.Intrinsics.bitcast(1, 1)
     1
 end
 @test_throws ErrorException f34482()
-@test_throws TypeError g34482()
 @test_throws TypeError h34482()
 
 struct NFANode34126
@@ -7527,7 +7646,7 @@ end
 # issue #31696
 foo31696(x::Int8, y::Int8) = 1
 foo31696(x::T, y::T) where {T <: Int8} = 2
-@test length(methods(foo31696)) == 1
+@test length(methods(foo31696)) == 2
 let T1 = Tuple{Int8}, T2 = Tuple{T} where T<:Int8, a = T1[(1,)], b = T2[(1,)]
     b .= a
     @test b[1] == (1,)
@@ -7544,6 +7663,19 @@ end
 struct T36104   # check that redefining it works, issue #21816
     v::Vector{T36104}
 end
+struct S36104{K,V}
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
+@test !isdefined(Base.unwrap_unionall(Base.ImmutableDict).name, :partial)
+@test !isdefined(S36104.body.body.name, :partial)
+@test hasfield(typeof(S36104.body.body.name), :partial)
+struct S36104{K,V}   # check that redefining it works
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
 # with a gensymmed unionall
 struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
@@ -7562,7 +7694,7 @@ end
 end
 @test fieldtypes(M36104.T36104) == (Vector{M36104.T36104},)
 @test_throws ErrorException("expected") @eval(struct X36104; x::error("expected"); end)
-@test @isdefined(X36104)
+@test !@isdefined(X36104)
 struct X36104; x::Int; end
 @test fieldtypes(X36104) == (Int,)
 primitive type P36104 8 end
@@ -7707,13 +7839,17 @@ struct ContainsPointerNopadding{T}
 end
 
 @test !Base.datatype_haspadding(PointerNopadding{Symbol})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 @test !Base.datatype_haspadding(PointerNopadding{Int})
+@test Base.datatype_isbitsegal(PointerNopadding{Int})
 # Sanity check to make sure the meaning of haspadding didn't change.
-@test Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_haspadding(PointerNopadding{Any})
+@test !Base.datatype_isbitsegal(PointerNopadding{Any})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Symbol}})
 @test !Base.datatype_haspadding(Tuple{PointerNopadding{Int}})
 @test !Base.datatype_haspadding(ContainsPointerNopadding{Symbol})
-@test Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_haspadding(ContainsPointerNopadding{Int})
+@test !Base.datatype_isbitsegal(ContainsPointerNopadding{Int})
 
 # Test the codegen optimized version as well as the unoptimized version of `jl_egal`
 @noinline unopt_jl_egal(@nospecialize(a), @nospecialize(b)) =
@@ -7966,14 +8102,14 @@ code_typed(f47476, (Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 code_typed(f47476, (Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
 @test f47476(1, 2, 3, 4, 5, 6, (7, 8)) === 2
-@test_throws UndefVarError(:N) f47476(1, 2, 3, 4, 5, 6, 7)
+@test_throws UndefVarError(:N, :static_parameter) f47476(1, 2, 3, 4, 5, 6, 7)
 
 vect47476(::Type{T}) where {T} = T
 @test vect47476(Type{Type{Type{Int32}}}) === Type{Type{Type{Int32}}}
 @test vect47476(Type{Type{Type{Int64}}}) === Type{Type{Type{Int64}}}
 
 g47476(::Union{Nothing,Int,Val{T}}...) where {T} = T
-@test_throws UndefVarError(:T) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
+@test_throws UndefVarError(:T, :static_parameter) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
 @test g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5, Val(6)) === 6
 let spec = only(methods(g47476)).specializations::Core.SimpleVector
     @test !isempty(spec)
@@ -8000,12 +8136,11 @@ for T in (Int, String, Symbol, Module)
     @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T},)))
     @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T,T},)))
     @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T,T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Ref{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Ref{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Vector{T}},)))
 end
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Ref{Int},)))
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Ref{Int}},)))
-# objectid for datatypes is inconsistant for types that have unbound type parameters.
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (DataType,)))
-@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Vector{Int}},)))
+@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (DataType,)))
 
 # donotdelete should not taint consistency of the containing function
 f_donotdete(x) = (Core.Compiler.donotdelete(x); 1)
@@ -8040,3 +8175,226 @@ bar50293(@nospecialize(u)) = (Base.issingletontype(u.a), baz50293(u.a))
 let u = Union{Type{Union{}}, Type{Any}}, ab = bar50293(u)
     @test ab[1] == ab[2] == false
 end
+
+# `SimpleVector`-operations should be concrete-eval eligible
+@test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int)))
+
+# Test that a the lowering of nothrow globalref
+module WellKnownGlobal
+    global well_known = 1
+end
+macro insert_global()
+    Expr(:call, GlobalRef(Base, :println), GlobalRef(WellKnownGlobal, :well_known))
+end
+check_globalref_lowering() = @insert_global
+let src = code_lowered(check_globalref_lowering)[1]
+    @test length(src.code) == 4
+end
+
+# Test correctness of widen_diagonal
+let widen_diagonal(x::UnionAll) = Base.rewrap_unionall(Base.widen_diagonal(Base.unwrap_unionall(x), x), x)
+    @test Tuple{Int,Float64} <: widen_diagonal(NTuple)
+    @test Tuple{Int,Float64} <: widen_diagonal(Tuple{T,T} where {T})
+    @test Tuple{Real,Int,Float64} <: widen_diagonal(Tuple{S,Vararg{T}} where {S, T<:S})
+    @test Tuple{Int,Int,Float64,Float64} <: widen_diagonal(Tuple{S,S,Vararg{T}} where {S, T<:S})
+    @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T})
+    @test Tuple === widen_diagonal(Union{Tuple{Vararg{S}}, Tuple{Vararg{T}}} where {S, T})
+    @test Tuple{Vararg{Val{<:Set}}} == widen_diagonal(Tuple{Vararg{T}} where T<:Val{<:Set})
+end
+
+# Test try/catch/else ordering
+function test_try_catch_else()
+    local x
+    try
+        x = 1
+    catch
+        rethrow()
+    else
+        return x
+    end
+end
+@test test_try_catch_else() == 1
+
+# #52433
+@test_throws ErrorException Core.Intrinsics.pointerref(Ptr{Vector{Int64}}(C_NULL), 1, 0)
+
+# #53034 (Union normalization for typevar elimination)
+@test Tuple{Int,Any} <: Tuple{Union{Int,T},T} where {T>:Int}
+@test Tuple{Int,Any} <: Tuple{Union{Int,T},T} where {T>:Integer}
+# #53034 (Union normalization for Type elimination)
+@test Int isa Type{Union{Int,T2} where {T2<:T1}} where {T1}
+@test Int isa Type{Union{Int,T1}} where {T1}
+@test Int isa Union{UnionAll, Type{Union{Int,T2} where {T2<:T1}}} where {T1}
+@test Int isa Union{Union, Type{Union{Int,T1}}} where {T1}
+@test_broken Int isa Union{UnionAll, Type{Union{Int,T2} where {T2<:T1}} where {T1}}
+@test_broken Int isa Union{Union, Type{Union{Int,T1}} where {T1}}
+
+let M = @__MODULE__
+    Core.eval(M, :(global a_typed_global))
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
+    @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
+    @test Core.eval(M, :(global a_typed_global::$(Tuple{Union{Integer,Nothing}}))) === nothing
+    @test Core.eval(M, :(global a_typed_global::$(Union{Tuple{Integer},Tuple{Nothing}}))) === nothing
+    @test_throws(ErrorException("cannot set type for global $(nameof(M)).a_typed_global. It already has a value or is already set to a different type."),
+                 Core.eval(M, :(global a_typed_global::$(Union{Nothing,Tuple{Union{Integer,Nothing}}}))))
+    @test Core.eval(M, :(global a_typed_global)) === nothing
+    @test Core.get_binding_type(M, :a_typed_global) == Tuple{Union{Integer,Nothing}}
+end
+
+@test Base.unsafe_convert(Ptr{Int}, [1]) !== C_NULL
+
+# Test that new macros are allowed to be defined inside Expr(:toplevel) returned by macros
+macro macroception()
+    Expr(:toplevel, :(macro foo() 1 end), :(@foo))
+end
+
+@test (@macroception()) === 1
+
+# overlay method tables
+# =====================
+
+module OverlayModule
+
+using Base.Experimental: @MethodTable, @overlay
+
+@MethodTable mt
+# long function def
+@overlay mt function sin(x::Float64)
+    1
+end
+# short function def
+@overlay mt cos(x::Float64) = 2
+# parametric function def
+@overlay mt tan(x::T) where {T} = 3
+
+end # module OverlayModule
+
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
+    @test only(ms).method.module === Base.Math
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test only(ms).method.module === OverlayModule
+end
+let ms = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
+    @test isempty(ms)
+end
+
+# precompilation
+let load_path = mktempdir()
+    depot_path = mktempdir()
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, depot_path)
+
+        write(joinpath(load_path, "Foo.jl"),
+            """
+            module Foo
+            Base.Experimental.@MethodTable(mt)
+            Base.Experimental.@overlay mt sin(x::Int) = 1
+            end
+            """)
+
+        # precompiling Foo serializes the overlay method through the `mt` binding in the module
+        Foo = Base.require(Main, :Foo)
+        @test length(Foo.mt) == 1
+
+        write(joinpath(load_path, "Bar.jl"),
+            """
+            module Bar
+            Base.Experimental.@MethodTable(mt)
+            end
+            """)
+
+        write(joinpath(load_path, "Baz.jl"),
+            """
+            module Baz
+            using Bar
+            Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
+            end
+            """)
+
+        # when referring an method table in another module,
+        # the overlay method needs to be discovered explicitly
+        Bar = Base.require(Main, :Bar)
+        @test length(Bar.mt) == 0
+        Baz = Base.require(Main, :Baz)
+        @test length(Bar.mt) == 1
+    finally
+        filter!((≠)(load_path), LOAD_PATH)
+        filter!((≠)(depot_path), DEPOT_PATH)
+        rm(load_path, recursive=true, force=true)
+        try
+            rm(depot_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+    end
+end
+
+# merging va tuple unions
+@test Tuple === Union{Tuple{},Tuple{Any,Vararg}}
+@test Tuple{Any,Vararg} === Union{Tuple{Any},Tuple{Any,Any,Vararg}}
+@test Core.Compiler.return_type(Base.front, Tuple{Tuple{Int,Vararg{Int}}}) === Tuple{Vararg{Int}}
+@test Tuple{Vararg{Int}} === Union{Tuple{Int}, Tuple{}, Tuple{Int, Int, Vararg{Int}}}
+@test (Tuple{Vararg{T}} where T) === (Union{Tuple{T, T, Vararg{T}}, Tuple{}, Tuple{T}} where T)
+@test_broken (Tuple{Vararg{T}} where T) === Union{Tuple{T, T, Vararg{T}} where T, Tuple{}, Tuple{T} where T}
+
+@test sizeof(Pair{Union{typeof(Union{}),Nothing}, Union{Type{Union{}},Nothing}}(Union{}, Union{})) == 2
+
+# Make sure that Core.Compiler has enough NamedTuple infrastructure
+# to properly give error messages for basic kwargs...
+Core.eval(Core.Compiler, quote issue50174(;a=1) = a end)
+@test_throws MethodError Core.Compiler.issue50174(;b=2)
+
+let s = mktemp() do path, io
+        xxx = 42
+        redirect_stdout(io) do
+            Base.@assume_effects :nothrow @show xxx
+        end
+        flush(io)
+        read(path, String)
+    end
+    @test strip(s) == "xxx = 42"
+end
+
+# `module` has an implicit world-age increment
+let foo = eval(Expr(:toplevel, :(module BarModuleInc; struct FooModuleInc; end; end), :(BarModuleInc.FooModuleInc())))
+    @Core.latestworld
+    @test foo == BarModuleInc.FooModuleInc()
+end
+
+let
+    eval(:(module BarModuleInc2; module BazModuleInc; struct FooModuleInc; end; end; const foo = BazModuleInc.FooModuleInc(); end))
+    @Core.latestworld
+    @test BarModuleInc2.foo == BarModuleInc2.BazModuleInc.FooModuleInc()
+end
+
+# `toplevel` has implicit world age increment between expansion and evaluation
+macro define_call(sym)
+    Core.eval(__module__, :($sym() = 1))
+    :($sym())
+end
+@test eval(Expr(:toplevel, :(@define_call(f_macro_defined1)))) == 1
+@test @define_call(f_macro_defined2) == 1
+
+# `invoke` of `Method`
+let m = which(+, (Int, Int))
+    @eval f56692(i) = invoke(+, $m, i, 4)
+    global g56692() = f56692(5) == 9 ? "true" : false
+end
+@test @inferred(f56692(3)) == 7
+@test @inferred(g56692()) == "true"
+
+# `invoke` of `CodeInstance`
+f_invalidate_me() = return 1
+f_invoke_me() = return f_invalidate_me()
+@test f_invoke_me() == 1
+const f_invoke_me_ci = Base.specialize_method(Base._which(Tuple{typeof(f_invoke_me)})).cache
+f_call_me() = invoke(f_invoke_me, f_invoke_me_ci)
+@test invoke(f_invoke_me, f_invoke_me_ci) == 1
+@test f_call_me() == 1
+@test_throws TypeError invoke(f_invoke_me, f_invoke_me_ci, 1)
+f_invalidate_me() = 2
+@test_throws ErrorException invoke(f_invoke_me, f_invoke_me_ci)
+@test_throws ErrorException f_call_me()
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 9626f48e4b407..b8cd3716cad2e 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -103,12 +103,12 @@ end
         logmsg = (function() @info msg x=y end,
                   function() @info msg x=y z=1+1 end)[i]
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:msg) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg)
+        @test_throws UndefVarError(:msg, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg, :local)
         msg = "the msg"
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:y) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y)
+        @test_throws UndefVarError(:y, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y, :local)
         y = "the y"
         @test_logs (Info,"the msg") logmsg()
         @test only(collect_test_logs(logmsg)[1]).kwargs[:x] === "the y"
@@ -140,9 +140,9 @@ end
     end
     @test length(logger.logs) == 1
     record = logger.logs[1]
-    @test record._module == nothing
-    @test record.file == nothing
-    @test record.line == nothing
+    @test record._module === nothing
+    @test record.file === nothing
+    @test record.line === nothing
 end
 
 # PR #28209
diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl
index 61ffcc2a59ac6..8209b0e920a18 100644
--- a/test/deprecation_exec.jl
+++ b/test/deprecation_exec.jl
@@ -68,6 +68,7 @@ begin # @deprecate
     ex = :(module M22845; import ..DeprecationTests: bar;
                           bar(x::Number) = x + 3; end)
     @test_warn "importing deprecated binding" eval(ex)
+    @Core.latestworld
     @test @test_nowarn(DeprecationTests.bar(4)) == 7
 
     @test @test_warn "`f1` is deprecated, use `f` instead." f1()
diff --git a/test/dict.jl b/test/dict.jl
index 6a47c3c6eea8b..83d35ae18bb85 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -8,7 +8,7 @@ using Random
     @test isequal(p,10=>20)
     @test iterate(p)[1] == 10
     @test iterate(p, iterate(p)[2])[1] == 20
-    @test iterate(p, iterate(p, iterate(p)[2])[2]) == nothing
+    @test iterate(p, iterate(p, iterate(p)[2])[2]) === nothing
     @test firstindex(p) == 1
     @test lastindex(p) == length(p) == 2
     @test Base.indexed_iterate(p, 1, nothing) == (10,2)
@@ -162,6 +162,30 @@ end
 
     # issue #39117
     @test Dict(t[1]=>t[2] for t in zip((1,"2"), (2,"2"))) == Dict{Any,Any}(1=>2, "2"=>"2")
+
+    @testset "issue #33147" begin
+        expected = try; Base._throw_dict_kv_error(); catch e; e; end
+        @test_throws expected Dict(i for i in 1:2)
+        @test_throws expected Dict(nothing for i in 1:2)
+        @test_throws expected Dict(() for i in 1:2)
+        @test_throws expected Dict((i, i, i) for i in 1:2)
+        @test_throws expected Dict(nothing)
+        @test_throws expected Dict((1,))
+        @test_throws expected Dict(1:2)
+        @test_throws expected Dict(((),))
+        @test_throws expected IdDict(((),))
+        @test_throws expected WeakKeyDict(((),))
+        @test_throws expected IdDict(nothing)
+        @test_throws expected WeakKeyDict(nothing)
+        @test Dict(1:0) isa Dict
+        @test Dict(()) isa Dict
+        try
+            Dict(i => error("$i") for i in 1:3)
+        catch ex
+            @test ex isa ErrorException
+            @test length(Base.current_exceptions()) == 1
+        end
+    end
 end
 
 @testset "empty tuple ctor" begin
@@ -639,13 +663,13 @@ end
     @test d == IdDict(1=>1, 2=>2, 3=>3)
     @test eltype(d) == Pair{Int,Int}
     @test_throws KeyError d[:a]
-    @test_throws ArgumentError d[:a] = 1
+    @test_throws TypeError d[:a] = 1
     @test_throws MethodError d[1] = :a
 
     # copy constructor
     d = IdDict(Pair(1,1), Pair(2,2), Pair(3,3))
     @test collect(values(IdDict{Int,Float64}(d))) == collect(values(d))
-    @test_throws ArgumentError IdDict{Float64,Int}(d)
+    @test_throws TypeError IdDict{Float64,Int}(d)
 
     # misc constructors
     @test typeof(IdDict(1=>1, :a=>2)) == IdDict{Any,Int}
@@ -659,9 +683,9 @@ end
     @inferred setindex!(d, -1, 10)
     @test d[10] == -1
     @test 1 == @inferred d[1]
-    @test get(d, -111, nothing) == nothing
+    @test get(d, -111, nothing) === nothing
     @test 1 == @inferred get(d, 1, 1)
-    @test pop!(d, -111, nothing) == nothing
+    @test pop!(d, -111, nothing) === nothing
     @test 1 == @inferred pop!(d, 1)
 
     # get! and delete!
@@ -672,7 +696,7 @@ end
     @test_throws MethodError get!(d, "b", "b")
     @test delete!(d, "a") === d
     @test !haskey(d, "a")
-    @test_throws ArgumentError get!(IdDict{Symbol,Any}(), 2, "b")
+    @test_throws TypeError get!(IdDict{Symbol,Any}(), 2, "b")
     @test get!(IdDict{Int,Int}(), 1, 2.0) === 2
     @test get!(()->2.0, IdDict{Int,Int}(), 1) === 2
 
@@ -763,6 +787,13 @@ end
           [v for (k, v) in d] == [d[x[1]] for (i, x) in enumerate(d)]
 end
 
+@testset "consistency of dict iteration order (issue #56841)" begin
+    dict = Dict(randn() => randn() for _ = 1:100)
+    @test all(zip(dict, keys(dict), values(dict), pairs(dict))) do (d, k, v, p)
+        d == p && first(d) == first(p) == k && last(d) == last(p) == v
+    end
+end
+
 @testset "generators, similar" begin
     d = Dict(:a=>"a")
     # TODO: restore when 0.7 deprecation is removed
@@ -1025,7 +1056,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # issue #26939
     d26939 = WeakKeyDict()
-    (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939)
+    (@noinline d -> d[big"1" + 1] = 1)(d26939)
     GC.gc() # primarily to make sure this doesn't segfault
     @test count(d26939) == 0
     @test length(d26939.ht) == 1
@@ -1084,6 +1115,119 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
     GC.@preserve A B C D nothing
 end
 
+import Base.PersistentDict
+@testset "PersistentDict" begin
+    @testset "HAMT HashState" begin
+        key = :key
+        h = Base.HAMT.HashState(key)
+        h1 = Base.HAMT.HashState(key, objectid(key), 0, 0)
+        h2 = Base.HAMT.HashState(h, key) # reconstruct
+        @test h.hash == h1.hash
+        @test h.hash == h2.hash
+
+        hs = Base.HAMT.next(h1)
+        @test hs.depth == 1
+        recompute_depth = (Base.HAMT.MAX_SHIFT ÷ Base.HAMT.BITS_PER_LEVEL) + 1
+        for i in 2:recompute_depth
+            hs = Base.HAMT.next(hs)
+            @test hs.depth == i
+        end
+        @test hs.depth == recompute_depth
+        @test hs.shift == 0
+        hsr = Base.HAMT.HashState(hs, key)
+        @test hs.hash == hsr.hash
+        @test hs.depth == hsr.depth
+        @test hs.shift == hsr.shift
+
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.init_hamt, (Type{Vector{Any}},Type{Int},Vector{Any},Int)))
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.HAMT{Vector{Any},Int}, (Pair{Vector{Any},Int},)))
+    end
+    @testset "basics" begin
+        dict = PersistentDict{Int, Int}()
+        @test_throws KeyError dict[1]
+        @test length(dict) == 0
+        @test isempty(dict)
+
+        dict = PersistentDict{Int, Int}(1=>2.0)
+        @test dict[1] == 2
+
+        dict = PersistentDict(1=>2)
+        @test dict[1] == 2
+
+        dict = PersistentDict(dict, 1=>3.0)
+        @test dict[1] == 3
+
+        dict = PersistentDict(dict, 1, 1)
+        @test dict[1] == 1
+        @test get(dict, 2, 1) == 1
+        @test get(()->1, dict, 2) == 1
+
+        @test (1 => 1) ∈ dict
+        @test (1 => 2) ∉ dict
+        @test (2 => 1) ∉ dict
+
+        @test haskey(dict, 1)
+        @test !haskey(dict, 2)
+
+        dict2 = PersistentDict{Int, Int}(dict, 1=>2)
+        @test dict[1] == 1
+        @test dict2[1] == 2
+
+        dict3 = Base.delete(dict2, 1)
+        @test_throws KeyError dict3[1]
+        @test dict3 == Base.delete(dict3, 1)
+        @test dict3.trie != Base.delete(dict3, 1).trie
+
+        dict = PersistentDict(dict, 1, 3)
+        @test dict[1] == 3
+        @test dict2[1] == 2
+
+        @test length(dict) == 1
+        @test length(dict2) == 1
+
+        dict = PersistentDict(1=>2, 2=>3, 4=>1)
+        @test eltype(dict) == Pair{Int, Int}
+        @test dict[1] == 2
+        @test dict[2] == 3
+        @test dict[4] == 1
+    end
+
+    @testset "objectid" begin
+        c = [0]
+        dict = PersistentDict{Any, Int}(c => 1, [1] => 2)
+        @test dict[c] == 1
+        c[1] = 1
+        @test dict[c] == 1
+
+        c[1] = 0
+        dict = PersistentDict{Any, Int}((c,) => 1, ([1],) => 2)
+        @test dict[(c,)] == 1
+
+        c[1] = 1
+        @test dict[(c,)] == 1
+    end
+
+    @testset "stress" begin
+        N = 2^14
+        dict = PersistentDict{Int, Int}()
+        for i in 1:N
+            dict = PersistentDict(dict, i, i)
+        end
+        @test length(dict) == N
+        length(collect(dict)) == N
+        values = sort!(collect(dict))
+        @test values[1] == (1=>1)
+        @test values[end] == (N=>N)
+
+        dict = Base.delete(dict, 16384)
+        @test !haskey(dict, 16384)
+        for i in 1:N
+            dict = Base.delete(dict, i)
+        end
+        @test isempty(dict)
+    end
+end
+
 @testset "issue #19995, hash of dicts" begin
     @test hash(Dict(Dict(1=>2) => 3, Dict(4=>5) => 6)) != hash(Dict(Dict(4=>5) => 3, Dict(1=>2) => 6))
     a = Dict(Dict(3 => 4, 2 => 3) => 2, Dict(1 => 2, 5 => 6) => 1)
@@ -1362,12 +1506,68 @@ end
     filter!(x -> x.first < 10, d)
     sizehint!(d, 10)
     @test length(d.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.slots) < 1000
 end
 
 # getindex is :effect_free and :terminates but not :consistent
 for T in (Int, Float64, String, Symbol)
-    @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-    @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    @testset let T=T
+        @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    end
+end
+
+struct BadHash
+    i::Int
+end
+Base.hash(::BadHash, ::UInt)=UInt(1)
+@testset "maxprobe reset #51595" begin
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    empty!(d)
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    for _ in 1:20
+        pop!(d)
+    end
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+end
+
+# Issue #52066
+let d = Dict()
+    d[1] = 'a'
+    d[1.0] = 'b'
+    @test only(d) === Pair{Any,Any}(1.0, 'b')
+end
+
+@testset "UnionAll `keytype` and `valtype` (issue #53115)" begin
+    K = Int8
+    V = Int16
+    dicts = (
+        AbstractDict, IdDict, Dict, WeakKeyDict, Base.ImmutableDict,
+        Base.PersistentDict, Iterators.Pairs
+    )
+
+    @testset "D: $D" for D ∈ dicts
+        @test_throws MethodError keytype(D)
+        @test_throws MethodError keytype(D{<:Any,V})
+        @test                    keytype(D{K      }) == K
+        @test                    keytype(D{K,    V}) == K
+
+        @test_throws MethodError valtype(D)
+        @test                    valtype(D{<:Any,V}) == V
+        @test_throws MethodError valtype(D{K      })
+        @test                    valtype(D{K,    V}) == V
+    end
 end
diff --git a/test/docs.jl b/test/docs.jl
index 7f6ece4e76ab4..8cfdbba3f2d97 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -2,6 +2,10 @@
 
 import Base.Docs: meta, @var, DocStr, parsedoc
 
+# check that @doc can work before REPL is loaded
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc sin'`, String), "nothing")
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc @time'`, String), "nothing")
+
 using Markdown
 using REPL
 
@@ -69,6 +73,37 @@ $$latex literal$$
 """
 function break_me_docs end
 
+
+# `hasdoc` returns `true` on a name with a docstring.
+@test Docs.hasdoc(Base, :map)
+# `hasdoc` returns `false` on a name without a docstring.
+@test !isdefined(Base, :_this_name_doesnt_exist_) && !Docs.hasdoc(Base, :_this_name_doesnt_exist_)
+@test isdefined(Base, :_typed_vcat) && !Docs.hasdoc(Base, :_typed_vcat)
+
+"This module has names without documentation."
+module _ModuleWithUndocumentedNames
+export f
+public ⨳, @foo
+f() = 1
+g() = 2
+⨳(a,b) = a * b
+macro foo(); nothing; end
+⊕(a,b) = a + b
+end
+
+"This module has some documentation."
+module _ModuleWithSomeDocumentedNames
+export f
+"f() is 1."
+f() = 1
+g() = 2
+end
+
+@test Docs.undocumented_names(_ModuleWithUndocumentedNames) == [Symbol("@foo"), :f, :⨳]
+@test isempty(Docs.undocumented_names(_ModuleWithSomeDocumentedNames))
+@test Docs.undocumented_names(_ModuleWithSomeDocumentedNames; private=true) == [:g]
+
+
 # issue #11548
 
 module ModuleMacroDoc
@@ -89,7 +124,7 @@ module NoDocStrings end
 # General tests for docstrings.
 
 const LINE_NUMBER = @__LINE__() + 1
-"DocsTest"
+"DocsTest, evaluating $(K)"     # test that module docstring is evaluated within module
 module DocsTest
 
 using Markdown
@@ -234,7 +269,7 @@ fnospecialize(@nospecialize(x::AbstractArray)) = 2
 end
 
 let md = meta(DocsTest)[@var(DocsTest)]
-    @test docstrings_equal(md.docs[Union{}], doc"DocsTest")
+    @test docstrings_equal(md.docs[Union{}], doc"DocsTest, evaluating K")
     # Check that plain docstrings store a module reference.
     # https://github.com/JuliaLang/julia/pull/13017#issuecomment-138618663
     @test md.docs[Union{}].data[:module] == DocsTest
@@ -540,8 +575,8 @@ end
 
 let T = meta(DocVars)[@var(DocVars.T)],
     S = meta(DocVars)[@var(DocVars.S)],
-    Tname = Markdown.parse("```\n$(curmod_prefix)DocVars.T\n```"),
-    Sname = Markdown.parse("```\n$(curmod_prefix)DocVars.S\n```")
+    Tname = Markdown.parse("```julia\n$(curmod_prefix)DocVars.T\n```"),
+    Sname = Markdown.parse("```julia\n$(curmod_prefix)DocVars.S\n```")
     # Splicing the expression directly doesn't work
     @test docstrings_equal(T.docs[Union{}],
         doc"""
@@ -623,6 +658,7 @@ end
 
 let d = @doc(I15424.LazyHelp)
     @test repr("text/plain", d) == "LazyHelp\nLazyHelp(text)\n"
+    # (no internal warning is inserted for non-markdown content)
 end
 
 # Issue #13385.
@@ -650,7 +686,7 @@ end
 let d = (@doc :@m2_11993),
     macro_doc = Markdown.parse("`$(curmod_prefix == "Main." ? "" : curmod_prefix)@m2_11993` is a macro.")
     @test docstring_startswith(d, doc"""
-    No documentation found.
+    No documentation found for private symbol.
 
     $macro_doc""")
 end
@@ -849,9 +885,9 @@ undocumented(x,y) = 3
 end # module
 
 doc_str = Markdown.parse("""
-No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
+No docstring or readme file found for internal module `$(curmod_prefix)Undocumented`.
 
-# Exported names
+# Public names
 
 `A`, `B`, `C`, `at0`, `pt2`
 """)
@@ -865,7 +901,7 @@ Binding `$(curmod_prefix)Undocumented.bindingdoesnotexist` does not exist.
 @test docstrings_equal(@doc(Undocumented.bindingdoesnotexist), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -881,7 +917,7 @@ $(curmod_prefix)Undocumented.C
 @test docstrings_equal(@doc(Undocumented.A), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -901,7 +937,7 @@ $(curmod_prefix)Undocumented.B <: $(curmod_prefix)Undocumented.A <: Any
 @test docstrings_equal(@doc(Undocumented.B), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -916,7 +952,7 @@ $(curmod_prefix)Undocumented.C <: $(curmod_prefix)Undocumented.A <: Any
 @test docstrings_equal(@doc(Undocumented.C), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 ```
@@ -938,7 +974,7 @@ $(curmod_prefix)Undocumented.D <: $(curmod_prefix)Undocumented.B <: $(curmod_pre
 @test docstrings_equal(@doc(Undocumented.D), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 
@@ -958,7 +994,7 @@ $(curmod_prefix)Undocumented.st4{T<:Number, N}
 @test docstrings_equal(@doc(Undocumented.at0), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -981,7 +1017,7 @@ $(curmod_prefix)Undocumented.at1{T>:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.at1), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1000,7 +1036,7 @@ $(curmod_prefix)Undocumented.st4{Int64, N}
 @test docstrings_equal(@doc(Undocumented.at_), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 
@@ -1017,7 +1053,7 @@ $(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer} <: $(curmod_prefix)Un
 @test docstrings_equal(@doc(Undocumented.pt2), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1027,7 +1063,7 @@ struct $(curmod_prefix)Undocumented.st3{T<:Integer, N}
 
 # Fields
 ```
-a :: Tuple{Vararg{T<:Integer, N}}
+a :: NTuple{N, T<:Integer}
 b :: Array{Int64, N}
 c :: Int64
 ```
@@ -1040,7 +1076,7 @@ $(curmod_prefix)Undocumented.st3{T<:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.st3), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1051,7 +1087,7 @@ struct $(curmod_prefix)Undocumented.st4{T, N}
 # Fields
 ```
 a :: T
-b :: Tuple{Vararg{T, N}}
+b :: NTuple{N, T}
 ```
 
 # Supertype Hierarchy
@@ -1062,7 +1098,7 @@ $(curmod_prefix)Undocumented.st4{T, N} <: $(curmod_prefix)Undocumented.at0{T, N}
 @test docstrings_equal(@doc(Undocumented.st4), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1083,7 +1119,7 @@ $(curmod_prefix)Undocumented.st5{T>:Int64, N} <: $(curmod_prefix)Undocumented.at
 @test docstrings_equal(@doc(Undocumented.st5), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1104,7 +1140,7 @@ $(curmod_prefix)Undocumented.mt6{T<:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.mt6), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1118,7 +1154,7 @@ No documentation found.
 @test docstrings_equal(@doc(Undocumented.ut7), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1134,7 +1170,7 @@ No documentation found.
 @test docstrings_equal(@doc(Undocumented.ut8), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1153,7 +1189,7 @@ let d = @doc(Undocumented.f)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)),"""
-    No documentation found.
+    No documentation found for private symbol.
 
     `$(curmod_prefix)Undocumented.f` is a `Function`.
     """)
@@ -1163,7 +1199,7 @@ let d = @doc(Undocumented.undocumented)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)), """
-    No documentation found.
+    No documentation found for private symbol.
 
     `$(curmod_prefix)Undocumented.undocumented` is a `Function`.
     """)
@@ -1309,30 +1345,30 @@ end
 let dt1 = striptrimdocs(_repl(:(dynamic_test(1.0))))
     @test dt1 isa Expr
     @test dt1.args[1] isa Expr
-    @test dt1.args[1].head === :macrocall
-    @test dt1.args[1].args[1] === Symbol("@doc")
-    @test dt1.args[1].args[3] == :(dynamic_test(::typeof(1.0)))
+    @test dt1.args[1].head === :call
+    @test dt1.args[1].args[1] === Base.Docs.doc
+    @test dt1.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 let dt2 = striptrimdocs(_repl(:(dynamic_test(::String))))
     @test dt2 isa Expr
     @test dt2.args[1] isa Expr
-    @test dt2.args[1].head === :macrocall
-    @test dt2.args[1].args[1] === Symbol("@doc")
-    @test dt2.args[1].args[3] == :(dynamic_test(::String))
+    @test dt2.args[1].head === :call
+    @test dt2.args[1].args[1] === Base.Docs.doc
+    @test dt2.args[1].args[3] == :(Union{Tuple{String}})
 end
 let dt3 = striptrimdocs(_repl(:(dynamic_test(a))))
     @test dt3 isa Expr
     @test dt3.args[1] isa Expr
-    @test dt3.args[1].head === :macrocall
-    @test dt3.args[1].args[1] === Symbol("@doc")
-    @test dt3.args[1].args[3].args[2].head === :(::) # can't test equality due to line numbers
+    @test dt3.args[1].head === :call
+    @test dt3.args[1].args[1] === Base.Docs.doc
+    @test dt3.args[1].args[3].args[2].head === :curly # can't test equality due to line numbers
 end
 let dt4 = striptrimdocs(_repl(:(dynamic_test(1.0,u=2.0))))
     @test dt4 isa Expr
     @test dt4.args[1] isa Expr
-    @test dt4.args[1].head === :macrocall
-    @test dt4.args[1].args[1] === Symbol("@doc")
-    @test dt4.args[1].args[3] == :(dynamic_test(::typeof(1.0); u::typeof(2.0)=2.0))
+    @test dt4.args[1].head === :call
+    @test dt4.args[1].args[1] === Base.Docs.doc
+    @test dt4.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 
 # Equality testing
@@ -1433,7 +1469,7 @@ end
 end
 
 struct t_docs_abc end
-@test "t_docs_abc" in accessible(@__MODULE__)
+@test "t_docs_abc" in string.(accessible(@__MODULE__))
 
 # Call overloading issues #20087 and #44889
 """
@@ -1482,7 +1518,7 @@ struct B_20087 end
 # issue #27832
 
 _last_atdoc = Core.atdoc
-Core.atdoc!(Core.Compiler.CoreDocs.docm)  # test bootstrap doc system
+Core.atdoc!(Base.CoreDocs.docm)  # test bootstrap doc system
 
 """
 """
@@ -1516,3 +1552,29 @@ struct S41727
 end
 @test S41727(1) isa S41727
 @test string(@repl S41727.x) == "x is 4\n"
+
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_short()::Int = 2
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_long()::Int = 3
+
+@test docstrings_equal(@doc(c51586_short()), doc"ensure we can document ccallable functions")
+@test docstrings_equal(@doc(c51586_long()), doc"ensure we can document ccallable functions")
+
+@testset "Docs docstrings" begin
+    undoc = Docs.undocumented_names(Docs)
+    @test_broken isempty(undoc)
+    @test undoc == [Symbol("@var")]
+end
+
+# Docing the macroception macro
+macro docmacroception()
+    Expr(:toplevel, macroexpand(__module__, :(@Base.__doc__ macro docmacrofoo() 1 end); recursive=false), :(@docmacrofoo))
+end
+
+"""
+This docmacroception has a docstring
+"""
+@docmacroception()
+
+@test Docs.hasdoc(@__MODULE__, :var"@docmacrofoo")
diff --git a/test/embedding/embedding-test.jl b/test/embedding/embedding-test.jl
index c10cc6a16fee8..34ef9a796ba56 100644
--- a/test/embedding/embedding-test.jl
+++ b/test/embedding/embedding-test.jl
@@ -21,6 +21,7 @@ end
     close(err.in)
     out_task = @async readlines(out)
     @test readline(err) == "MethodError: no method matching this_function_has_no_methods()"
+    @test readline(err) == "The function `this_function_has_no_methods` exists, but no method is defined for this combination of argument types."
     @test success(p)
     lines = fetch(out_task)
     @test length(lines) == 11
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index 1294d4cdafb45..746c59fc8ce1f 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -86,17 +86,17 @@ int main()
         // (aka, is gc-rooted until) the program reaches the corresponding JL_GC_POP()
         JL_GC_PUSH1(&x);
 
-        double* xData = jl_array_data(x);
+        double* xData = jl_array_data(x, double);
 
         size_t i;
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             xData[i] = i;
 
         jl_function_t *func  = jl_get_function(jl_base_module, "reverse!");
         jl_call1(func, (jl_value_t*) x);
 
         printf("x = [");
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             printf("%e ", xData[i]);
         printf("]\n");
         fflush(stdout);
@@ -192,6 +192,12 @@ int main()
         checked_eval_string("f28825()");
     }
 
+    {
+        // jl_typeof works (#50714)
+        jl_value_t *v = checked_eval_string("sqrt(2.0)");
+        jl_value_t *t = jl_typeof(v);
+    }
+
     JL_TRY {
         jl_error("exception thrown");
     }
diff --git a/test/env.jl b/test/env.jl
index de5cf92d9edb9..7f6962cf675aa 100644
--- a/test/env.jl
+++ b/test/env.jl
@@ -52,6 +52,11 @@ end
     @test get!(ENV, key, "default") == "default"
     @test haskey(ENV, key)
     @test ENV[key] == "default"
+
+    key = randstring(25)
+    @test !haskey(ENV, key)
+    @test get!(ENV, key, 0) == 0
+    @test ENV[key] == "0"
 end
 @testset "#17956" begin
     @test length(ENV) > 1
@@ -128,7 +133,9 @@ end
             for _v in (v, uppercasefirst(v), uppercase(v))
                 ENV["testing_gbe"] = _v
                 @test Base.get_bool_env("testing_gbe", false) == true
+                @test Base.get_bool_env(() -> false, "testing_gbe") == true
                 @test Base.get_bool_env("testing_gbe", true) == true
+                @test Base.get_bool_env(() -> true, "testing_gbe") == true
             end
         end
     end
@@ -137,26 +144,34 @@ end
             for _v in (v, uppercasefirst(v), uppercase(v))
                 ENV["testing_gbe"] = _v
                 @test Base.get_bool_env("testing_gbe", true) == false
+                @test Base.get_bool_env(() -> true, "testing_gbe") == false
                 @test Base.get_bool_env("testing_gbe", false) == false
+                @test Base.get_bool_env(() -> false, "testing_gbe") == false
             end
         end
     end
     @testset "empty" begin
         ENV["testing_gbe"] = ""
         @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env(() -> true, "testing_gbe") == true
         @test Base.get_bool_env("testing_gbe", false) == false
+        @test Base.get_bool_env(() -> false, "testing_gbe") == false
     end
     @testset "undefined" begin
         delete!(ENV, "testing_gbe")
         @test !haskey(ENV, "testing_gbe")
         @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env(() -> true, "testing_gbe") == true
         @test Base.get_bool_env("testing_gbe", false) == false
+        @test Base.get_bool_env(() -> false, "testing_gbe") == false
     end
     @testset "unrecognized" begin
         for v in ("truw", "falls")
             ENV["testing_gbe"] = v
             @test Base.get_bool_env("testing_gbe", true) === nothing
+            @test_throws ArgumentError Base.get_bool_env("testing_gbe", true, throw=true)
             @test Base.get_bool_env("testing_gbe", false) === nothing
+            @test_throws ArgumentError Base.get_bool_env("testing_gbe", false, throw=true)
         end
     end
 
@@ -168,7 +183,7 @@ end
 end
 
 # Restore the original environment
-for k in keys(ENV)
+for k in collect(keys(ENV))
     if !haskey(original_env, k)
         delete!(ENV, k)
     end
diff --git a/test/error.jl b/test/error.jl
index e9cdfa100bc81..f76a7809b08a9 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -93,10 +93,14 @@ end
 @testset "MethodError for methods without line numbers" begin
     try
         eval(Expr(:function, :(f44319()), 0))
-        f44319(1)
+        @invokelatest f44319()
     catch e
         s = sprint(showerror, e)
-        @test s == "MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))\n\nClosest candidates are:\n  f44319()\n   @ $curmod_str none:0\n"
+        @test s == """MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))
+                      The function `f44319` exists, but no method is defined for this combination of argument types.
+
+                      Closest candidates are:\n  f44319()\n   @ $curmod_str none:0
+                      """
     end
 end
 
@@ -107,8 +111,8 @@ end
             for name in names(mod, all=true)
                 isdefined(mod, name) || continue
                 value = getfield(mod, name)
-
                 if value isa Module
+                    value === Main && continue
                     test_exceptions(value, visited)
                 elseif value isa Type
                     str = string(value)
@@ -123,3 +127,36 @@ end
     visited = test_exceptions(Base)
     test_exceptions(Core, visited)
 end
+
+# inference quality test for `error`
+@test Base.infer_return_type(error, (Any,)) === Union{}
+@test Base.infer_return_type(xs->error(xs...), (Vector{Any},)) === Union{}
+module Issue54029
+export raise54029
+Base.Experimental.@max_methods 1
+raise54029(x) = error(x)
+end
+using .Issue54029
+@test Base.infer_return_type(raise54029, (Any,)) === Union{}
+@test Base.infer_return_type(xs->raise54029(xs...), (Vector{Any},)) === Union{}
+
+@testset "CompositeException" begin
+    ce = CompositeException()
+    @test isempty(ce)
+    @test length(ce) == 0
+    @test eltype(ce) == Any
+    str = sprint(showerror, ce)
+    @test str == "CompositeException()\n"
+    push!(ce, ErrorException("something sad has happened"))
+    @test !isempty(ce)
+    @test length(ce) == 1
+    pushfirst!(ce, ErrorException("something sad has happened even earlier"))
+    @test length(ce) == 2
+    # test iterate
+    for ex in ce
+        @test ex isa ErrorException
+    end
+    push!(ce, ErrorException("something sad has happened yet again"))
+    str = sprint(showerror, ce)
+    @test str == "something sad has happened even earlier\n\n...and 2 more exceptions.\n"
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 28ae3fd32365a..f83bbe31b7cc4 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -5,6 +5,13 @@ using Random, LinearAlgebra
 # For curmod_*
 include("testenv.jl")
 
+# re-register only the error hints that are being tested here (
+Base.Experimental.register_error_hint(Base.noncallable_number_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.string_concatenation_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.methods_on_iterable, MethodError)
+Base.Experimental.register_error_hint(Base.nonsetable_type_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.fielderror_listfields_hint_handler, FieldError)
+Base.Experimental.register_error_hint(Base.fielderror_dict_hint_handler, FieldError)
 
 @testset "SystemError" begin
     err = try; systemerror("reason", Cint(0)); false; catch ex; ex; end::SystemError
@@ -74,8 +81,12 @@ Base.show_method_candidates(buf, Base.MethodError(method_c1,(1, "", "")))
 Base.show_method_candidates(buf, Base.MethodError(method_c1,(1., "", "")))
 @test occursin("\n\nClosest candidates are:\n  method_c1(::Float64, ::AbstractString...)$cmod$cfile$c1line\n", String(take!(buf)))
 
-# Have no matches so should return empty
+# Have no matches, but still print up to 3
 Base.show_method_candidates(buf, Base.MethodError(method_c1,(1, 1, 1)))
+@test occursin("\n\nClosest candidates are:\n  method_c1(!Matched::Float64, !Matched::AbstractString...)$cmod$cfile$c1line\n", String(take!(buf)))
+
+function nomethodsfunc end
+Base.show_method_candidates(buf, Base.MethodError(nomethodsfunc,(1, 1, 1)))
 @test isempty(String(take!(buf)))
 
 # matches the implicit constructor -> convert method
@@ -204,6 +215,7 @@ Base.show_method_candidates(buf, try bad_vararg_decl("hello", 3) catch e e end)
 @test occursin("bad_vararg_decl(!Matched::$Int, ::Any...)", String(take!(buf)))
 
 macro except_str(expr, err_type)
+    source_info = __source__
     return quote
         let err = nothing
             try
@@ -211,7 +223,9 @@ macro except_str(expr, err_type)
             catch err
             end
             err === nothing && error("expected failure, but no exception thrown")
-            @test typeof(err) === $(esc(err_type))
+            @testset let expr=$(repr(expr))
+                $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+            end
             buf = IOBuffer()
             showerror(buf, err)
             String(take!(buf))
@@ -220,6 +234,7 @@ macro except_str(expr, err_type)
 end
 
 macro except_strbt(expr, err_type)
+    source_info = __source__
     errmsg = "expected failure, but no exception thrown for $expr"
     return quote
         let err = nothing
@@ -228,7 +243,9 @@ macro except_strbt(expr, err_type)
             catch err
             end
             err === nothing && error($errmsg)
-            @test typeof(err) === $(esc(err_type))
+            @testset let expr=$(repr(expr))
+                $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+            end
             buf = IOBuffer()
             showerror(buf, err, catch_backtrace())
             String(take!(buf))
@@ -237,6 +254,7 @@ macro except_strbt(expr, err_type)
 end
 
 macro except_stackframe(expr, err_type)
+    source_info = __source__
     return quote
        let err = nothing
            local st
@@ -246,7 +264,9 @@ macro except_stackframe(expr, err_type)
                st = stacktrace(catch_backtrace())
            end
            err === nothing && error("expected failure, but no exception thrown")
-           @test typeof(err) === $(esc(err_type))
+           @testset let expr=$(repr(expr))
+               $(Expr(:macrocall, Symbol("@test"), source_info, :(typeof(err) === $(esc(err_type)))))
+           end
            sprint(show, st[1])
        end
     end
@@ -350,7 +370,7 @@ let undefvar
     err_str = @except_str Vector{Any}(undef, 1)[1] UndefRefError
     @test err_str == "UndefRefError: access to undefined reference"
     err_str = @except_str undefvar UndefVarError
-    @test err_str == "UndefVarError: `undefvar` not defined"
+    @test err_str == "UndefVarError: `undefvar` not defined in local scope"
     err_str = @except_str read(IOBuffer(), UInt8) EOFError
     @test err_str == "EOFError: read end of file"
     err_str = @except_str Dict()[:doesnotexist] KeyError
@@ -460,7 +480,7 @@ let err_str,
     @test startswith(sprint(show, which(StructWithUnionAllMethodDefs{<:Integer}, (Any,))),
                      "($(curmod_prefix)StructWithUnionAllMethodDefs{T} where T<:Integer)(x)")
     @test repr("text/plain", FunctionLike()) == "(::$(curmod_prefix)FunctionLike) (generic function with 1 method)"
-    @test repr("text/plain", Core.arraysize) == "arraysize (built-in function)"
+    @test repr("text/plain", Core.getfield) == "getfield (built-in function)"
 
     err_str = @except_stackframe String() ErrorException
     @test err_str == "String() at $sn:$(method_defs_lineno + 0)"
@@ -501,7 +521,7 @@ let
     @test (@macroexpand @fastmath +      ) == :(Base.FastMath.add_fast)
     @test (@macroexpand @fastmath min(1) ) == :(Base.FastMath.min_fast(1))
     let err = try; @macroexpand @doc "" f() = @x; catch ex; ex; end
-        @test err == UndefVarError(Symbol("@x"))
+        @test err == UndefVarError(Symbol("@x"), @__MODULE__)
     end
     @test (@macroexpand @seven_dollar $bar) == 7
     x = 2
@@ -534,6 +554,14 @@ end
     @test (@macroexpand1 @nest2b 42) == _macroexpand1(:(@nest2b 42))
 end
 
+module TwoargMacroExpand
+macro modulecontext(); return __module__; end
+end
+@test (@__MODULE__) == @macroexpand TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand TwoargMacroExpand @modulecontext
+@test (@__MODULE__) == @macroexpand1 TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand1 TwoargMacroExpand @modulecontext
+
 foo_9965(x::Float64; w=false) = x
 foo_9965(x::Int) = 2x
 
@@ -549,17 +577,37 @@ foo_9965(x::Int) = 2x
     @test occursin("got unsupported keyword argument \"w\"", String(take!(io)))
 end
 
+@testset "MethodError with long types (#50803)" begin
+    a = view(reinterpret(reshape, UInt8, PermutedDimsArray(rand(5, 7), (2, 1))), 2:3, 2:4, 1:4) # a mildly-complex type
+    function f50803 end
+    ex50803 = try
+        f50803(a, a, a, a, a, a)
+    catch e
+        e
+    end::MethodError
+    tlf = Ref(false)
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120), :stacktrace_types_limited=>tlf))
+    @test tlf[]
+    @test occursin("::SubArray{…}", str)
+    tlf[] = false
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 10000), :stacktrace_types_limited=>tlf))
+    @test !tlf[]
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120)))
+    @test !occursin("::SubArray{…}", str)
+end
+
 # Issue #20556
 import REPL
 module EnclosingModule
     abstract type AbstractTypeNoConstructors end
 end
 let
-    method_error = MethodError(EnclosingModule.AbstractTypeNoConstructors, ())
+    method_error = MethodError(EnclosingModule.AbstractTypeNoConstructors, (), Base.get_world_counter())
 
     # Test that it shows a special message when no constructors have been defined by the user.
-    @test sprint(showerror, method_error) ==
-        "MethodError: no constructors have been defined for $(EnclosingModule.AbstractTypeNoConstructors)"
+    @test startswith(sprint(showerror, method_error),
+        """MethodError: no constructors have been defined for $(EnclosingModule.AbstractTypeNoConstructors)
+           The type `$(EnclosingModule.AbstractTypeNoConstructors)` exists, but no method is defined for this combination of argument types when trying to construct it.""")
 
     # Does it go back to previous behaviour when there *is* at least
     # one constructor defined?
@@ -618,6 +666,24 @@ end
         @test startswith(str, "MethodError: no method matching f21006(::Tuple{})")
         @test !occursin("The applicable method may be too new", str)
     end
+
+    str = sprint(Base.showerror, MethodError(+, (1.0, 2.0)))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(+, (1.0, 2.0), Base.get_world_counter()))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0)))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)")
+    @test occursin("This error has been manually thrown, explicitly", str)
+
+    str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0), Base.get_world_counter()))
+    @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)")
+    @test occursin("This method does not support all of the given keyword arguments", str)
+
+    @test_throws "MethodError: no method matching kwcall()" Core.kwcall()
 end
 
 # Issue #50200
@@ -626,8 +692,9 @@ using Base.Experimental: @opaque
     test_no_error(f) = @test f() === nothing
     function test_worldage_error(f)
         ex = try; f(); error("Should not have been reached") catch ex; ex; end
-        @test occursin("The applicable method may be too new", sprint(Base.showerror, ex))
-        @test !occursin("!Matched::", sprint(Base.showerror, ex))
+        strex = sprint(Base.showerror, ex)
+        @test occursin("The applicable method may be too new", strex)
+        @test !occursin("!Matched::", sprint(Base.showerror, strex))
     end
 
     global callback50200
@@ -681,8 +748,7 @@ end
 pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
 
 struct ANumber <: Number end
-let err_str
-    err_str = @except_str ANumber()(3 + 4) MethodError
+let err_str = @except_str ANumber()(3 + 4) MethodError
     @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
     # issue 40478
@@ -690,6 +756,25 @@ let err_str
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
 end
 
+let a = [1 2; 3 4];
+    err_str = @except_str (a[1][2] = 5) MethodError
+    @test occursin("\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ", err_str)
+    @test occursin("a[1, 2]", err_str)
+    @test occursin("rather than a[1][2]", err_str)
+end
+
+let d = Dict
+    err_str = @except_str (d[1] = 5) MethodError
+    @test occursin("\nYou attempted to index the type Dict, rather than an instance of the type. Make sure you create the type using its constructor: ", err_str)
+    @test occursin("d = Dict([...])", err_str)
+    @test occursin(" rather than d = Dict", err_str)
+end
+
+let s = Some("foo")
+    err_str = @except_str (s[] = "bar") MethodError
+    @test !occursin("You attempted to index the type String", err_str)
+end
+
 # Execute backtrace once before checking formatting, see #38858
 backtrace()
 
@@ -701,12 +786,27 @@ backtrace()
     io = IOBuffer()
     Base.show_backtrace(io, bt)
     output = split(String(take!(io)), '\n')
+    length(output) >= 8 || println(output) # for better errors when this fails
     @test lstrip(output[3])[1:3] == "[1]"
     @test occursin("g28442", output[3])
     @test lstrip(output[5])[1:3] == "[2]"
     @test occursin("f28442", output[5])
-    @test occursin("the last 2 lines are repeated 5000 more times", output[7])
-    @test lstrip(output[8])[1:7] == "[10003]"
+    is_windows_32_bit = Sys.iswindows() && (Sys.WORD_SIZE == 32)
+    if is_windows_32_bit
+        # These tests are currently broken (intermittently/non-determistically) on 32-bit Windows.
+        # https://github.com/JuliaLang/julia/issues/55900
+        # Instead of skipping them entirely, we skip one, and we loosen the other.
+
+        # Broken test: @test occursin("the above 2 lines are repeated 5000 more times", output[7])
+        @test occursin("the above 2 lines are repeated ", output[7])
+        @test occursin(" more times", output[7])
+
+        # Broken test: @test lstrip(output[8])[1:7] == "[10003]"
+        @test_broken false
+    else
+        @test occursin("the above 2 lines are repeated 5000 more times", output[7])
+        @test lstrip(output[8])[1:7] == "[10003]"
+    end
 end
 
 @testset "Line number correction" begin
@@ -734,6 +834,47 @@ end
 @test_throws ArgumentError("invalid index: \"foo\" of type String") [1]["foo"]
 @test_throws ArgumentError("invalid index: nothing of type Nothing") [1][nothing]
 
+# issue #53618, pr #55165
+@testset "FieldErrorHints" begin
+    struct FieldFoo
+        a::Float32
+        b::Int
+    end
+    Base.propertynames(foo::FieldFoo) = (:a, :x, :y)
+
+    s = FieldFoo(1, 2)
+
+    test = @test_throws FieldError s.c
+
+    ex = test.value::FieldError
+
+    # Check error message first
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type FieldFoo has no field `c`", errorMsg)
+    @test occursin("available fields: `a`, `b`", errorMsg)
+    @test occursin("Available properties: `x`, `y`", errorMsg)
+
+    d = Dict(s => 1)
+
+    for fld in fieldnames(Dict)
+        ex = try
+            getfield(d, fld)
+        catch e
+            print(e)
+        end
+        @test !(ex isa Type) || ex <: FieldError
+    end
+    test = @test_throws FieldError d.c
+
+    ex = test.value::FieldError
+
+    errorMsg = sprint(Base.showerror, ex)
+    @test occursin("FieldError: type Dict has no field `c`", errorMsg)
+    # Check hint message
+    hintExpected = "Did you mean to access dict values using key: `:c` ? Consider using indexing syntax dict[:c]\n"
+    @test occursin(hintExpected, errorMsg)
+end
+
 # test showing MethodError with type argument
 struct NoMethodsDefinedHere; end
 let buf = IOBuffer()
@@ -840,7 +981,7 @@ if (Sys.isapple() || Sys.islinux()) && Sys.ARCH === :x86_64
                 catch_backtrace()
             end
             bt_str = sprint(Base.show_backtrace, bt)
-            @test occursin(r"the last 2 lines are repeated \d+ more times", bt_str)
+            @test occursin(r"the above 2 lines are repeated \d+ more times", bt_str)
         end
     end
 end
@@ -967,6 +1108,40 @@ let err_str
     @test occursin("String concatenation is performed with *", err_str)
 end
 
+# https://github.com/JuliaLang/julia/issues/55745
+let err_str
+    err_str = @except_str +() MethodError
+    @test !occursin("String concatenation is performed with *", err_str)
+end
+
+struct MissingLength; end
+struct MissingSize; end
+Base.IteratorSize(::Type{MissingSize}) = Base.HasShape{2}()
+Base.iterate(::MissingLength) = nothing
+Base.iterate(::MissingSize) = nothing
+
+let err_str
+    expected = "Finding the minimum of an iterable is performed with `minimum`."
+    err_str = @except_str min([1,2,3]) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str min((i for i in 1:3)) MethodError
+    @test occursin(expected, err_str)
+    expected = "Finding the maximum of an iterable is performed with `maximum`."
+    err_str = @except_str max([1,2,3]) MethodError
+    @test occursin(expected, err_str)
+
+    expected = "You may need to implement the `length` method or define `IteratorSize` for this type to be `SizeUnknown`."
+    err_str = @except_str length(MissingLength()) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str collect(MissingLength()) MethodError
+    @test occursin(expected, err_str)
+    expected = "You may need to implement the `length` and `size` methods for `IteratorSize` `HasShape`."
+    err_str = @except_str size(MissingSize()) MethodError
+    @test occursin(expected, err_str)
+    err_str = @except_str collect(MissingSize()) MethodError
+    @test occursin(expected, err_str)
+end
+
 @testset "unused argument names" begin
     g(::Int) = backtrace()
     bt = g(1)
@@ -1059,3 +1234,28 @@ let e = @test_throws MethodError convert(TypeCompareError{Float64,1}, TypeCompar
     @test  occursin("TypeCompareError{Float64,1}", str)
     @test !occursin("TypeCompareError{Float64{},2}", str) # No {...} for types without params
 end
+
+@testset "InexactError for Inf16 should print '16' (#51087)" begin
+    @test sprint(showerror, InexactError(:UInt128, UInt128, Inf16)) == "InexactError: UInt128(Inf16)"
+
+    for IntType in [Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128]
+        IntStr = string(IntType)
+        for InfVal in Any[Inf, Inf16, Inf32, Inf64]
+            InfStr = repr(InfVal)
+            e = @test_throws InexactError IntType(InfVal)
+            str = sprint(Base.showerror, e.value)
+            @test occursin("InexactError: $IntStr($InfStr)", str)
+        end
+    end
+end
+
+# error message hint from PR #22647
+@test_throws "Many shells" cd("~")
+@test occursin("Many shells", sprint(showerror, Base.IOError("~", Base.UV_ENOENT)))
+
+# issue #47559"
+@test_throws("MethodError: no method matching invoke Returns(::Any, ::Val{N}) where N",
+             invoke(Returns, Tuple{Any,Val{N}} where N, 1, Val(1)))
+
+f33793(x::Float32, y::Float32) = 1
+@test_throws "\nClosest candidates are:\n  f33793(!Matched::Float32, !Matched::Float32)\n" f33793(Float64(0.0), Float64(0.0))
diff --git a/test/euler.jl b/test/euler.jl
index 9af79a44cc0d3..c8d0e9a734fd2 100644
--- a/test/euler.jl
+++ b/test/euler.jl
@@ -2,7 +2,7 @@
 
 ## Project Euler
 #
-#  problems: http://projecteuler.net/problems
+#  problems: https://projecteuler.net/problems
 #  solutions: https://code.google.com/p/projecteuler-solutions/wiki/ProjectEulerSolutions
 
 #1: 233168
diff --git a/test/exceptions.jl b/test/exceptions.jl
index eb0bbaec35090..1e52c7a2fe2c3 100644
--- a/test/exceptions.jl
+++ b/test/exceptions.jl
@@ -241,6 +241,18 @@ end
         end
     end)()
     @test length(Base.current_exceptions()) == 0
+
+    (()-> begin
+        while true
+            try
+                error("foo")
+            finally
+                break
+            end
+        end
+        @test length(Base.current_exceptions()) == 0
+    end)()
+    @test length(Base.current_exceptions()) == 0
 end
 
 @testset "Deep exception stacks" begin
diff --git a/test/fastmath.jl b/test/fastmath.jl
index 21f3ebc1e603f..efca5b85c6642 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -1,7 +1,30 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using InteractiveUtils: code_llvm
 # fast math
 
+@testset "check fast present in LLVM" begin
+    for T in (Float16, Float32, Float64, ComplexF32, ComplexF64)
+        f(x) = @fastmath x + x + x
+        llvm = sprint(code_llvm, f, (T,))
+        @test occursin("fast", llvm)
+
+        g(x) = @fastmath x * x * x
+        llvm = sprint(code_llvm, g, (T,))
+        @test occursin("fast", llvm)
+    end
+
+    for T in (Float16, Float32, Float64)
+        f(x, y, z) = @fastmath min(x, y, z)
+        llvm = sprint(code_llvm, f, (T,T,T))
+        @test occursin("fast", llvm)
+
+        g(x, y, z) = @fastmath max(x, y, z)
+        llvm = sprint(code_llvm, g, (T,T,T))
+        @test occursin("fast", llvm)
+    end
+end
+
 @testset "check expansions" begin
     @test macroexpand(Main, :(@fastmath 1+2)) == :(Base.FastMath.add_fast(1,2))
     @test macroexpand(Main, :(@fastmath +)) == :(Base.FastMath.add_fast)
@@ -256,6 +279,28 @@ end
 
 @testset "literal powers" begin
     @test @fastmath(2^-2) == @fastmath(2.0^-2) == 0.25
+    # Issue #53817
+    # Note that exponent -2^63 fails testing because of issue #53881
+    # Therefore we test with -(2^63-1). For Int == Int32 there is an analogue restriction.
+    # See also PR #53860.
+    if Int == Int64
+        @test @fastmath(2^-9223372036854775807) === 0.0
+        @test_throws DomainError @fastmath(2^-9223372036854775809)
+        @test @fastmath(1^-9223372036854775807) isa Float64
+        @test @fastmath(1^-9223372036854775809) isa Int
+    elseif Int == Int32
+        @test @fastmath(2^-2147483647) === 0.0
+        @test_throws DomainError @fastmath(2^-2147483649)
+        @test @fastmath(1^-2147483647) isa Float64
+        @test @fastmath(1^-2147483649) isa Int
+    end
+    @test_throws MethodError @fastmath(^(2))
+end
+# issue #53857
+@testset "fast_pow" begin
+    n = Int64(2)^52
+    @test @fastmath (1 + 1 / n) ^ n ≈ ℯ
+    @test @fastmath (1 + 1 / n) ^ 4503599627370496 ≈ ℯ
 end
 
 @testset "sincos fall-backs" begin
@@ -293,3 +338,14 @@ end
     @test x == [1, 1]
     @test i == 1
 end
+
+@testset "@fastmath-related crash (#49907)" begin
+    x = @fastmath maximum(Float16[1,2,3]; init = Float16(0))
+    @test x == Float16(3)
+end
+
+@testset "Test promotion of >=3 arg fastmath" begin
+    # Bug caught in https://github.com/JuliaLang/julia/pull/54513#discussion_r1620553369
+    x = @fastmath 1. + 1. + 1f0
+    @test x == 3.0
+end
diff --git a/test/file.jl b/test/file.jl
index 1d2ac4c6f9132..6425155c82965 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -31,6 +31,8 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     symlink(subdir, dirlink)
     @test stat(dirlink) == stat(subdir)
     @test readdir(dirlink) == readdir(subdir)
+    @test map(o->o.names, Base.Filesystem._readdirx(dirlink)) == map(o->o.names, Base.Filesystem._readdirx(subdir))
+    @test realpath.(Base.Filesystem._readdirx(dirlink)) == realpath.(Base.Filesystem._readdirx(subdir))
 
     # relative link
     relsubdirlink = joinpath(subdir, "rel_subdirlink")
@@ -38,6 +40,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     symlink(reldir, relsubdirlink)
     @test stat(relsubdirlink) == stat(subdir2)
     @test readdir(relsubdirlink) == readdir(subdir2)
+    @test Base.Filesystem._readdirx(relsubdirlink) == Base.Filesystem._readdirx(subdir2)
 
     # creation of symlink to directory that does not yet exist
     new_dir = joinpath(subdir, "new_dir")
@@ -56,6 +59,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     mkdir(new_dir)
     touch(foo_file)
     @test readdir(new_dir) == readdir(nedlink)
+    @test realpath.(Base.Filesystem._readdirx(new_dir)) == realpath.(Base.Filesystem._readdirx(nedlink))
 
     rm(foo_file)
     rm(new_dir)
@@ -124,6 +128,9 @@ end
     end
     @test_throws ArgumentError tempname(randstring())
 end
+@testset "tempname with suffix" begin
+    @test !isfile(tempname(suffix = "_foo.txt"))
+end
 
 child_eval(code::String) = eval(Meta.parse(readchomp(`$(Base.julia_cmd()) -E $code`)))
 
@@ -355,7 +362,7 @@ chmod(file, filemode(file) | 0o222)
 @test filesize(file) == 0
 
 # issue #26685
-@test !isfile("http://google.com")
+@test !isfile("https://google.com")
 
 if Sys.iswindows()
     permissions = 0o444
@@ -435,8 +442,7 @@ end
                 for pth in ("afile",
                             joinpath("afile", "not_file"),
                             SubString(joinpath(dir, "afile")),
-                            Base.RawFD(-1),
-                            -1)
+                            Base.RawFD(-1))
                     test_stat_error(stat, pth)
                     test_stat_error(lstat, pth)
                 end
@@ -453,6 +459,11 @@ end
     end
 end
 
+# Issue #51710 and PR #54855
+@test_throws MethodError stat(7)
+@test_throws MethodError ispath(false)
+@test_throws MethodError ispath(1)
+
 # On windows the filesize of a folder is the accumulation of all the contained
 # files and is thus zero in this case.
 if Sys.iswindows()
@@ -637,9 +648,11 @@ end
     MAX_PATH = (Sys.iswindows() ? 260 - length(PATH_PREFIX) : 255)  - 9
     for i = 0:9
         local tmp = joinpath(PATH_PREFIX, "x"^MAX_PATH * "123456789"[1:i])
-        @test withenv(var => tmp) do
-            tempdir()
-        end == tmp
+        no_error_logging() do
+            @test withenv(var => tmp) do
+                tempdir()
+            end == tmp
+        end
     end
 end
 
@@ -815,6 +828,303 @@ mktempdir() do tmpdir
     rm(b_tmpdir)
 end
 
+@testset "rename" begin
+    # some of the windows specific behavior may be fixed in new versions of julia
+    mktempdir() do dir
+        # see if can make symlinks
+        local can_symlink = try
+            symlink("foo", joinpath(dir, "link"))
+            rm(joinpath(dir, "link"))
+            true
+        catch
+            false
+        end
+        local f1 = joinpath(dir, "file1")
+        local f2 = joinpath(dir, "file2")
+        local d1 = joinpath(dir, "dir1")
+        local d2 = joinpath(dir, "dir2")
+        local subd1f1 = joinpath(d1, "file1")
+        local subd1f2 = joinpath(d1, "file2")
+        local subd2f1 = joinpath(d2, "file1")
+        local subd2f2 = joinpath(d2, "file2")
+        local h1 = joinpath(dir, "hlink1")
+        local h2 = joinpath(dir, "hlink2")
+        local s1 = joinpath(dir, "slink1")
+        local s2 = joinpath(dir, "slink2")
+        @testset "renaming to non existing newpath in same directory" begin
+            # file, make sure isexecutable is copied
+            for mode in (0o644, 0o755)
+                write(f1, b"data")
+                chmod(f1, mode)
+                Base.rename(f1, f2)
+                @test !isfile(f1)
+                @test isfile(f2)
+                @test read(f2) == b"data"
+                if mode == 0o644
+                    @test !isexecutable(f2)
+                else
+                    @test isexecutable(f2)
+                end
+                rm(f2)
+            end
+            # empty directory
+            mkdir(d1)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test isempty(readdir(d2))
+            rm(d2)
+            # non empty directory
+            mkdir(d1)
+            write(subd1f1, b"data")
+            chmod(subd1f1, 0o644)
+            write(subd1f2, b"exe")
+            chmod(subd1f2, 0o755)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test read(subd2f1) == b"data"
+            @test read(subd2f2) == b"exe"
+            @test !isexecutable(subd2f1)
+            @test isexecutable(subd2f2)
+            rm(d2; recursive=true)
+            # hardlink
+            write(f1, b"data")
+            hardlink(f1, h1)
+            Base.rename(h1, h2)
+            @test isfile(f1)
+            @test !isfile(h1)
+            @test isfile(h2)
+            @test read(h2) == b"data"
+            write(h2, b"data2")
+            @test read(f1) == b"data2"
+            rm(h2)
+            rm(f1)
+            # symlink
+            if can_symlink
+                symlink("foo", s1)
+                Base.rename(s1, s2)
+                @test !islink(s1)
+                @test islink(s2)
+                @test readlink(s2) == "foo"
+                rm(s2)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        # Get the error code from failed rename, or nothing if it worked
+        function rename_errorcodes(oldpath, newpath)
+            try
+                Base.rename(oldpath, newpath)
+                nothing
+            catch e
+                e.code
+            end
+        end
+        @testset "errors" begin
+            # invalid paths
+            @test_throws ArgumentError Base.rename(f1*"\0", "")
+            @test Base.UV_ENOENT == rename_errorcodes("", "")
+            write(f1, b"data")
+            @test Base.UV_ENOENT == rename_errorcodes(f1, "")
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes("", f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f2, f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f1, subd1f1)
+            @test read(f1) == b"data"
+            rm(f1)
+            # attempt to make a directory a subdirectory of itself
+            mkdir(d1)
+            if Sys.iswindows()
+                @test rename_errorcodes(d1, joinpath(d1, "subdir")) ∈ (Base.UV_EINVAL, Base.UV_EBUSY)
+            else
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(d1, "subdir"))
+            end
+            rm(d1)
+            # rename to child of a file
+            mkdir(d1)
+            write(f2, "foo")
+            if Sys.iswindows()
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            else
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            end
+            # replace a file with a directory
+            if !Sys.iswindows()
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, f2)
+            else
+                # this should work on windows
+                Base.rename(d1, f2)
+                @test isdir(f2)
+                @test !ispath(d1)
+            end
+            rm(f2; force=true)
+            rm(d1; force=true)
+            # symlink loop
+            if can_symlink
+                symlink(s1, s2)
+                symlink(s2, s1)
+                @test Base.UV_ELOOP == rename_errorcodes(joinpath(s1, "foo"), f2)
+                write(f2, b"data")
+                @test Base.UV_ELOOP == rename_errorcodes(f2, joinpath(s1, "foo"))
+                rm(s1)
+                rm(s2)
+                rm(f2)
+            end
+            # newpath is a nonempty directory
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f1, b"data")
+            write(f1, b"otherdata")
+            if Sys.iswindows()
+                @test Base.UV_EACCES == rename_errorcodes(f1, d1)
+                @test Base.UV_EACCES == rename_errorcodes(f1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(subd2f1, d2)
+            else
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d1)
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d2)
+                @test rename_errorcodes(d1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST)
+                @test rename_errorcodes(subd2f1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST, Base.UV_EISDIR)
+            end
+            rm(f1)
+            rm(d1)
+            rm(d2; recursive=true)
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        @testset "replacing existing file" begin
+            write(f2, b"olddata")
+            chmod(f2, 0o755)
+            write(f1, b"newdata")
+            chmod(f1, 0o644)
+            @test isexecutable(f2)
+            @test !isexecutable(f1)
+            Base.rename(f1, f2)
+            @test !ispath(f1)
+            @test read(f2) == b"newdata"
+            @test !isexecutable(f2)
+            rm(f2)
+        end
+
+        @testset "replacing file with itself" begin
+            write(f1, b"data")
+            Base.rename(f1, f1)
+            @test read(f1) == b"data"
+            hardlink(f1, h1)
+            Base.rename(f1, h1)
+            if Sys.iswindows()
+                # On Windows f1 gets deleted
+                @test !ispath(f1)
+            else
+                @test read(f1) == b"data"
+            end
+            @test read(h1) == b"data"
+            rm(h1)
+            rm(f1; force=true)
+        end
+
+        @testset "replacing existing file in different directories" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f2, b"olddata")
+            chmod(subd2f2, 0o755)
+            write(subd1f1, b"newdata")
+            chmod(subd1f1, 0o644)
+            @test isexecutable(subd2f2)
+            @test !isexecutable(subd1f1)
+            Base.rename(subd1f1, subd2f2)
+            @test !ispath(subd1f1)
+            @test read(subd2f2) == b"newdata"
+            @test !isexecutable(subd2f2)
+            @test isdir(d1)
+            @test isdir(d2)
+            rm(d1; recursive=true)
+            rm(d2; recursive=true)
+        end
+
+        @testset "rename with open files" begin
+            # both open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                open(f2) do handle2
+                    if Sys.iswindows()
+                        # currently this doesn't work on windows
+                        @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                    else
+                        Base.rename(f1, f2)
+                        @test !ispath(f1)
+                        @test read(f2) == b"newdata"
+                    end
+                    # rename doesn't break already opened files
+                    @test read(handle1) == b"newdata"
+                    @test read(handle2) == b"olddata"
+                end
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # oldpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle1) == b"newdata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # newpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f2) do handle2
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EACCES == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle2) == b"olddata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+        end
+
+        @testset "replacing empty directory with directory" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd1f1, b"data")
+            if Sys.iswindows()
+                # currently this doesn't work on windows
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                rm(d1; recursive=true)
+                rm(d2)
+            else
+                Base.rename(d1, d2)
+                @test isdir(d2)
+                @test read(subd2f1) == b"data"
+                @test !ispath(d1)
+                rm(d2; recursive=true)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+    end
+end
+
 # issue #10506 #10434
 ## Tests for directories and links to directories
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
@@ -1023,7 +1333,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
         # mv
-        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
+        @test_throws Base._UVError("rename($(repr(nonexisting_src)), $(repr(dst)))", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
     end
 end
 
@@ -1423,6 +1733,13 @@ cd(dirwalk) do
         @test dirs == []
         @test files == ["foo"]
     end
+
+    # pwd() as default directory
+    for ((r1, d1, f1), (r2, d2, f2)) in zip(walkdir(), walkdir(pwd()))
+        @test r1 == r2
+        @test d1 == d2
+        @test f1 == f2
+    end
 end
 rm(dirwalk, recursive=true)
 
@@ -1436,6 +1753,10 @@ rm(dirwalk, recursive=true)
                 touch(randstring())
             end
             @test issorted(readdir())
+            @test issorted(Base.Filesystem._readdirx())
+            @test map(o->o.name, Base.Filesystem._readdirx()) == readdir()
+            @test map(o->o.path, Base.Filesystem._readdirx()) == readdir(join=true)
+            @test count(isfile, readdir(join=true)) == count(isfile, Base.Filesystem._readdirx())
         end
     end
 end
@@ -1460,7 +1781,7 @@ rm(dir)
 
 
 ##################
-# Return values of mkpath, mkdir, cp, mv and touch
+# Return values of mkpath, mkdir, cp, mv, rename and touch
 ####################
 mktempdir() do dir
     name1 = joinpath(dir, "apples")
@@ -1477,8 +1798,11 @@ mktempdir() do dir
     @test cp(name2, name1) == name1
     @test isfile(name1)
     @test isfile(name2)
+    @test Base.rename(name1, name2) == name2
+    @test !ispath(name1)
+    @test isfile(name2)
     namedir = joinpath(dir, "chalk")
-    namepath = joinpath(dir, "chalk","cheese","fresh")
+    namepath = joinpath(dir, "chalk", "cheese", "fresh")
     @test !ispath(namedir)
     @test mkdir(namedir) == namedir
     @test isdir(namedir)
@@ -1487,7 +1811,12 @@ mktempdir() do dir
     @test isdir(namepath)
     @test mkpath(namepath) == namepath
     @test isdir(namepath)
+    # issue 54826
+    namepath_dirpath = joinpath(dir, "x", "y", "z", "")
+    @test mkpath(namepath_dirpath) == namepath_dirpath
 end
+@test mkpath("") == ""
+@test mkpath("/") == "/"
 
 # issue #30588
 @test realpath(".") == realpath(pwd())
@@ -1591,6 +1920,26 @@ end
     end
 end
 
+@testset "pwd tests" begin
+    mktempdir() do dir
+        cd(dir) do
+            withenv("OLDPWD" => nothing) do
+                io = IOBuffer()
+                Base.repl_cmd(@cmd("cd"), io)
+                Base.repl_cmd(@cmd("cd -"), io)
+                @test realpath(pwd()) == realpath(dir)
+                if !Sys.iswindows()
+                    # Delete the working directory and check we can cd out of it
+                    # Cannot delete the working directory on Windows
+                    rm(dir)
+                    @test_throws Base._UVError("pwd()", Base.UV_ENOENT) pwd()
+                    Base.repl_cmd(@cmd("cd \\~"), io)
+                end
+            end
+        end
+    end
+end
+
 @testset "readdir tests" begin
     ≛(a, b) = sort(a) == sort(b)
     mktempdir() do dir
@@ -1636,23 +1985,66 @@ end
     end
 end
 
-@testset "chmod/isexecutable" begin
+if Sys.isunix()
+    @testset "mkfifo" begin
+        mktempdir() do dir
+            path = Libc.mkfifo(joinpath(dir, "fifo"))
+            @sync begin
+                @async write(path, "hello")
+                cat_exec = `$(Base.julia_cmd()) --startup-file=no -e "write(stdout, read(ARGS[1]))"`
+                @test read(`$cat_exec $path`, String) == "hello"
+            end
+
+            existing_file = joinpath(dir, "existing")
+            write(existing_file, "")
+            @test_throws SystemError Libc.mkfifo(existing_file)
+        end
+    end
+else
+    @test_throws(
+        "mkfifo: Operation not supported",
+        Libc.mkfifo(joinpath(pwd(), "dummy_path")),
+    )
+end
+
+@testset "chmod/isexecutable/isreadable/iswritable" begin
     mktempdir() do dir
-        mkdir(joinpath(dir, "subdir"))
+        subdir = joinpath(dir, "subdir")
         fpath = joinpath(dir, "subdir", "foo")
 
-        # Test that we can actually set the executable bit on all platforms.
+        @test !ispath(subdir)
+        mkdir(subdir)
+        @test ispath(subdir)
+
+        @test !ispath(fpath)
         touch(fpath)
+        @test ispath(fpath)
+
+        # Test that we can actually set the executable/readable/writeable bit on all platforms.
         chmod(fpath, 0o644)
         @test !Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
         chmod(fpath, 0o755)
         @test Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
+        chmod(fpath, 0o444)
+        @test !Sys.isexecutable(fpath)
+        @test Sys.isreadable(fpath)
+        @test !Sys.iswritable(fpath)
+        chmod(fpath, 0o244)
+        @test !Sys.isexecutable(fpath)
+        @test !Sys.isreadable(fpath) skip=Sys.iswindows()
+        @test Sys.iswritable(fpath) skip=Sys.iswindows()
 
         # Ensure that, on Windows, where inheritance is default,
         # chmod still behaves as we expect.
         if Sys.iswindows()
-            chmod(joinpath(dir, "subdir"), 0o666)
-            @test Sys.isexecutable(fpath)
+            chmod(subdir, 0o666)
+            @test !Sys.isexecutable(fpath)
+            @test Sys.isreadable(fpath)
+            @test_skip Sys.iswritable(fpath)
         end
 
         # Reset permissions to all at the end, so it can be deleted properly.
@@ -1673,6 +2065,38 @@ if Sys.iswindows()
 end
 end
 
+# Unusually for structs, we test this explicitly because the fields of StatStruct
+# is part of its documentation, and therefore cannot change.
+@testset "StatStruct has promised fields" begin
+    f, io = mktemp()
+    s = stat(f)
+    @test s isa Base.StatStruct
+
+    @test s.desc isa Union{String, Base.OS_HANDLE}
+    @test s.size isa Int64
+    @test s.device isa UInt
+    @test s.inode isa UInt
+    @test s.mode isa UInt
+    @test s.nlink isa Int
+    @test s.uid isa UInt
+    @test s.gid isa UInt
+    @test s.rdev isa UInt
+    @test s.blksize isa Int64
+    @test s.blocks isa Int64
+    @test s.mtime isa Float64
+    @test s.ctime isa Float64
+
+    @test s === stat((f,))
+    @test s === lstat((f,))
+    @test s === stat(".", f)
+    @test s === lstat(".", f)
+end
+
+mutable struct URI50890; f::String; end
+Base.joinpath(x::URI50890) = URI50890(x.f)
+@test_throws "stat not implemented" stat(URI50890("."))
+@test_throws "lstat not implemented" lstat(URI50890("."))
+
 @testset "StatStruct show's extended details" begin
     f, io = mktemp()
     s = stat(f)
@@ -1716,6 +2140,16 @@ end
         @test !isnothing(Base.Filesystem.getusername(s.uid))
         @test !isnothing(Base.Filesystem.getgroupname(s.gid))
     end
+    s = Base.Filesystem.StatStruct()
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"\" ENOENT: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"\"\n ENOENT: ") && !endswith(stat_show_str_multi, r"\s")
+    s = Base.Filesystem.StatStruct("my/test", Ptr{UInt8}(0), Int32(Base.UV_ENOTDIR))
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"my/test\" ENOTDIR: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"my/test\"\n ENOTDIR: ") && !endswith(stat_show_str_multi, r"\s")
 end
 
 @testset "diskstat() works" begin
diff --git a/test/filesystem.jl b/test/filesystem.jl
index 79beea9f66ac1..036a3dda30cca 100644
--- a/test/filesystem.jl
+++ b/test/filesystem.jl
@@ -2,41 +2,51 @@
 
 mktempdir() do dir
 
-  # Create test file
-  filename = joinpath(dir, "file.txt")
-  text = "123456"
-  write(filename, text)
-
-  # test filesystem truncate (shorten)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 2)
-  text = text[1:2]
-  @test length(read(file)) == 2
-  close(file)
-
-  # test filesystem truncate (lengthen)
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.truncate(file, 20)
-  @test length(read(file)) == 20
-  close(file)
-
-  # test filesystem futime
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  Base.Filesystem.futime(file, 1.0, 2.0)
-  @test Base.Filesystem.stat(file).mtime == 2.0
-  close(file)
-
-  # test filesystem readbytes!
-  file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
-  res = ones(UInt8, 80)
-  Base.Filesystem.readbytes!(file, res)
-  @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
-  close(file)
+    # Create test file
+    filename = joinpath(dir, "file.txt")
+    text = "123456"
+    write(filename, text)
+
+    # test filesystem truncate (shorten)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 2)
+    text = text[1:2]
+    @test length(read(file)) == 2
+    close(file)
+
+    # test filesystem truncate (lengthen)
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.truncate(file, 20)
+    @test length(read(file)) == 20
+    close(file)
+
+    # test filesystem futime
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    Base.Filesystem.futime(file, 1.0, 2.0)
+    @test Base.Filesystem.stat(file).mtime == 2.0
+    close(file)
+
+    # test filesystem readbytes!
+    file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
+    res = ones(UInt8, 80)
+    Base.Filesystem.readbytes!(file, res)
+    @test res == UInt8[text..., (i > 20 for i in (length(text) + 1):length(res))...]
+    close(file)
 
 end
 
 import Base.Filesystem: S_IRUSR, S_IRGRP, S_IROTH
 @testset "types of permission mask constants" begin
-  @test S_IRUSR & ~S_IRGRP == S_IRUSR
-  @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+    @test S_IRUSR & ~S_IRGRP == S_IRUSR
+    @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+end
+
+@testset "Base.Filesystem docstrings" begin
+    undoc = Docs.undocumented_names(Base.Filesystem)
+    @test_broken isempty(undoc)
+    @test undoc == [:File, :Filesystem, :cptree, :futime, :sendfile, :unlink]
+end
+
+@testset "write return type" begin
+    @test Base.return_types(write, (Base.Filesystem.File, UInt8)) == [Int]
 end
diff --git a/test/float16.jl b/test/float16.jl
index 75f9b55b6d51c..4ff7cc663d07b 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -79,7 +79,8 @@ end
     @test unsafe_trunc(Int16, Float16(3)) === Int16(3)
     @test unsafe_trunc(UInt128, Float16(3)) === UInt128(3)
     @test unsafe_trunc(Int128, Float16(3)) === Int128(3)
-    @test unsafe_trunc(Int16, NaN16) === Int16(0)  #18771
+    # `unsafe_trunc` of `NaN` can be any value, see #56582
+    @test unsafe_trunc(Int16, NaN16) isa Int16 # #18771
 end
 @testset "fma and muladd" begin
     @test fma(Float16(0.1),Float16(0.9),Float16(0.5)) ≈ fma(0.1,0.9,0.5)
@@ -203,6 +204,11 @@ const minsubf16_32 = Float32(minsubf16)
 # issues #33076
 @test Float16(1f5) == Inf16
 
+# issue #52394
+@test Float16(10^8 // (10^9 + 1)) == convert(Float16, 10^8 // (10^9 + 1)) == Float16(0.1)
+@test Float16((typemax(UInt128)-0x01) // typemax(UInt128)) == Float16(1.0)
+@test Float32((typemax(UInt128)-0x01) // typemax(UInt128)) == Float32(1.0)
+
 @testset "conversion to Float16 from" begin
     for T in (Float32, Float64, BigFloat)
         @testset "conversion from $T" begin
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index 7e9d8021ac5df..d5d697634bcfa 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -139,9 +139,10 @@ end
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
-    two = 2
-    @test 1.0000000105367122^2 == 1.0000000105367122^two
-    @test 1.0041504f0^2 == 1.0041504f0^two
+    let two = 2
+        @test 1.0000000105367122^2 == 1.0000000105367122^two
+        @test 1.0041504f0^2 == 1.0041504f0^two
+    end
 
     function g2(start, two, N)
         x = start
@@ -192,11 +193,13 @@ end
     finv(x) = f(x, -1)
     f2(x) = f(x, 2)
     f3(x) = f(x, 3)
-    x = 1.0000000105367122
-    @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
-    @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
-    x = 1.000000007393669
-    @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    let x = 1.0000000105367122
+        @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
+        @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
+    end
+    let x = 1.000000007393669
+        @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    end
 end
 
 @testset "curried approximation" begin
@@ -209,3 +212,107 @@ end
     struct CustomNumber <: Number end
     @test !isnan(CustomNumber())
 end
+
+@testset "isapprox and integer overflow" begin
+    for T in (Int8, Int16, Int32)
+        T === Int && continue
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), 0)
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test !isapprox(typemin(T), 0, atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+        @test !isapprox(typemin(T), 0, atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), 10)
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), 10, atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), 10, atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+    for T in (Int, Int64, Int128)
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+end
+
+@testset "isapprox and unsigned integers" begin
+    for T in Base.BitUnsigned_types
+        # Test also combinations of different integer types
+        W = widen(T)
+        # The order of the operands for difference between unsigned integers is
+        # very important, test both combinations.
+        @test isapprox(T(42), T(42); rtol=T(0), atol=0.5)
+        @test isapprox(T(42), W(42); rtol=T(0), atol=0.5)
+        @test !isapprox(T(0), T(1); rtol=T(0), atol=0.5)
+        @test !isapprox(T(1), T(0); rtol=T(0), atol=0.5)
+        @test isapprox(T(1), T(3); atol=T(2))
+        @test isapprox(T(4), T(2); atol=T(2))
+        @test isapprox(T(1), W(3); atol=T(2))
+        @test isapprox(T(4), W(2); atol=T(2))
+        @test isapprox(T(5), T(7); atol=typemax(T))
+        @test isapprox(T(8), T(6); atol=typemax(T))
+        @test isapprox(T(1), T(2); rtol=1)
+        @test isapprox(T(6), T(3); rtol=1)
+        @test isapprox(T(1), W(2); rtol=1)
+        @test isapprox(T(6), W(3); rtol=1)
+        @test !isapprox(typemin(T), typemax(T))
+        @test !isapprox(typemax(T), typemin(T))
+        @test !isapprox(typemin(T), typemax(T); atol=typemax(T)-T(1))
+        @test !isapprox(typemax(T), typemin(T); atol=typemax(T)-T(1))
+        @test isapprox(typemin(T), typemax(T); atol=typemax(T))
+        @test isapprox(typemax(T), typemin(T); atol=typemax(T))
+    end
+end
+
+@testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin
+    @test_throws InexactError UInt32(4.2949673f9)
+    @test_throws InexactError UInt64(1.8446744f19)
+    @test_throws InexactError UInt64(1.8446744073709552e19)
+    @test_throws InexactError UInt128(3.402823669209385e38)
+end
+
+@testset "Conversion from floating point to integer near extremes (exhaustive)" begin
+    for Ti in Base.BitInteger_types, Tf in (Float16, Float32, Float64), x in (typemin(Ti), typemax(Ti))
+        y = Tf(x)
+        for i in -3:3
+            z = nextfloat(y, i)
+
+            result = isfinite(z) ? round(BigInt, z) : error
+            result = result !== error && typemin(Ti) <= result <= typemax(Ti) ? result : error
+
+            if result === error
+                @test_throws InexactError round(Ti, z)
+                @test_throws InexactError Ti(z)
+            else
+                @test result == round(Ti, z)
+                if isinteger(z)
+                    @test result == Ti(z)
+                else
+                    @test_throws InexactError Ti(z)
+                end
+            end
+        end
+    end
+end
diff --git a/test/functional.jl b/test/functional.jl
index 19355d13ff335..84c4098308ebd 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -52,9 +52,6 @@ end
 
 # foreach
 let a = []
-    foreach(()->push!(a,0))
-    @test a == [0]
-    a = []
     foreach(x->push!(a,x), [1,5,10])
     @test a == [1,5,10]
     a = []
@@ -145,6 +142,13 @@ let gen = (i for i in 1:3);
     @test @inferred(findall(x -> false, gen))::Vector{Int} == Int[]
     @test @inferred(findall(x -> x < 0, gen))::Vector{Int} == Int[]
 end
+let d = Dict()
+    d[7]=2
+    d[3]=6
+    @test @inferred(sort(findall(x -> true, d)))::Vector{Int} == [3, 7]
+    @test @inferred(sort(findall(x -> false, d)))::Vector{Any} == []
+    @test @inferred(sort(findall(x -> x < 0, d)))::Vector{Any} == []
+end
 
 # inference on vararg generator of a type (see #22907 comments)
 let f(x) = collect(Base.Generator(=>, x, x))
@@ -231,3 +235,129 @@ end
 let (:)(a,b) = (i for i in Base.:(:)(1,10) if i%2==0)
     @test Int8[ i for i = 1:2 ] == [2,4,6,8,10]
 end
+
+@testset "Basic tests of Fix1, Fix2, and Fix" begin
+    function test_fix1(Fix1=Base.Fix1)
+        increment = Fix1(+, 1)
+        @test increment(5) == 6
+        @test increment(-1) == 0
+        @test increment(0) == 1
+        @test map(increment, [1, 2, 3]) == [2, 3, 4]
+
+        concat_with_hello = Fix1(*, "Hello ")
+        @test concat_with_hello("World!") == "Hello World!"
+        # Make sure inference is good:
+        @inferred concat_with_hello("World!")
+
+        one_divided_by = Fix1(/, 1)
+        @test one_divided_by(10) == 1/10.0
+        @test one_divided_by(-5) == 1/-5.0
+
+        return nothing
+    end
+
+    function test_fix2(Fix2=Base.Fix2)
+        return_second = Fix2((x, y) -> y, 999)
+        @test return_second(10) == 999
+        @inferred return_second(10)
+        @test return_second(-5) == 999
+
+        divide_by_two = Fix2(/, 2)
+        @test map(divide_by_two, (2, 4, 6)) == (1.0, 2.0, 3.0)
+        @inferred map(divide_by_two, (2, 4, 6))
+
+        concat_with_world = Fix2(*, " World!")
+        @test concat_with_world("Hello") == "Hello World!"
+        @inferred concat_with_world("Hello World!")
+
+        return nothing
+    end
+
+    # Test with normal Base.Fix1 and Base.Fix2
+    test_fix1()
+    test_fix2()
+
+    # Now, repeat the Fix1 and Fix2 tests, but
+    # with a Fix lambda function used in their place
+    test_fix1((op, arg) -> Base.Fix{1}(op, arg))
+    test_fix2((op, arg) -> Base.Fix{2}(op, arg))
+
+    # Now, we do more complex tests of Fix:
+    let Fix=Base.Fix
+        @testset "Argument Fixation" begin
+            let f = (x, y, z) -> x + y * z
+                fixed_f1 = Fix{1}(f, 10)
+                @test fixed_f1(2, 3) == 10 + 2 * 3
+
+                fixed_f2 = Fix{2}(f, 5)
+                @test fixed_f2(1, 4) == 1 + 5 * 4
+
+                fixed_f3 = Fix{3}(f, 3)
+                @test fixed_f3(1, 2) == 1 + 2 * 3
+            end
+        end
+        @testset "Helpful errors" begin
+            let g = (x, y) -> x - y
+                # Test minimum N
+                fixed_g1 = Fix{1}(g, 100)
+                @test fixed_g1(40) == 100 - 40
+
+                # Test maximum N
+                fixed_g2 = Fix{2}(g, 100)
+                @test fixed_g2(150) == 150 - 100
+
+                # One over
+                fixed_g3 = Fix{3}(g, 100)
+                @test_throws ArgumentError("expected at least 2 arguments to `Fix{3}`, but got 1") fixed_g3(1)
+            end
+        end
+        @testset "Type Stability and Inference" begin
+            let h = (x, y) -> x / y
+                fixed_h = Fix{2}(h, 2.0)
+                @test @inferred(fixed_h(4.0)) == 2.0
+            end
+        end
+        @testset "Interaction with varargs" begin
+            vararg_f = (x, y, z...) -> x + 10 * y + sum(z; init=zero(x))
+            fixed_vararg_f = Fix{2}(vararg_f, 6)
+
+            # Can call with variable number of arguments:
+            @test fixed_vararg_f(1, 2, 3, 4) == 1 + 10 * 6 + sum((2, 3, 4))
+            @inferred fixed_vararg_f(1, 2, 3, 4)
+            @test fixed_vararg_f(5) == 5 + 10 * 6
+            @inferred fixed_vararg_f(5)
+        end
+        @testset "Errors should propagate normally" begin
+            error_f = (x, y) -> sin(x * y)
+            fixed_error_f = Fix{2}(error_f, Inf)
+            @test_throws DomainError fixed_error_f(10)
+        end
+        @testset "Chaining Fix together" begin
+            f1 = Fix{1}(*, "1")
+            f2 = Fix{1}(f1, "2")
+            f3 = Fix{1}(f2, "3")
+            @test f3() == "123"
+
+            g1 = Fix{2}(*, "1")
+            g2 = Fix{2}(g1, "2")
+            g3 = Fix{2}(g2, "3")
+            @test g3("") == "123"
+        end
+        @testset "Zero arguments" begin
+            f = Fix{1}(x -> x, 'a')
+            @test f() == 'a'
+        end
+        @testset "Dummy-proofing" begin
+            @test_throws ArgumentError("expected `N` in `Fix{N}` to be integer greater than 0, but got 0") Fix{0}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `0.5::Float64`") Fix{0.5}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `1::UInt64`") Fix{UInt64(1)}(>, 1)
+        end
+        @testset "Specialize to structs not in `Base`" begin
+            struct MyStruct
+                x::Int
+            end
+            f = Fix{1}(MyStruct, 1)
+            @test f isa Fix{1,Type{MyStruct},Int}
+        end
+    end
+end
diff --git a/test/gc.jl b/test/gc.jl
index e085c1d8658e5..c532f17f04eb5 100644
--- a/test/gc.jl
+++ b/test/gc.jl
@@ -5,16 +5,57 @@ using Test
 function run_gctest(file)
     let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file`
         @testset for test_nthreads in (1, 2, 4)
-            @testset for concurrent_sweep in (0, 1)
-                new_env = copy(ENV)
-                new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
-                new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
-                @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+            @testset for test_nithreads in (0, 1)
+                @testset for concurrent_sweep in (0, 1)
+                    new_env = copy(ENV)
+                    new_env["JULIA_NUM_THREADS"] = "$test_nthreads,$test_nithreads"
+                    new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
+                    @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+                end
             end
         end
     end
 end
 
+function run_nonzero_page_utilization_test()
+    GC.gc()
+    page_utilization = Base.gc_page_utilization_data()
+    # at least one of the pools should have nonzero page_utilization
+    @test any(page_utilization .> 0)
+end
+
+function run_pg_size_test()
+    page_size = @ccall jl_get_pg_size()::UInt64
+    # supported page sizes: 4KB and 16KB
+    @test page_size == (1 << 12) || page_size == (1 << 14)
+end
+
+function issue_54275_alloc_string()
+    String(UInt8['a' for i in 1:10000000])
+end
+
+function issue_54275_test()
+    GC.gc(true)
+    baseline = Base.gc_live_bytes()
+    live_bytes_has_grown_too_much = false
+    for _ in 1:10
+        issue_54275_alloc_string()
+        GC.gc(true)
+        if Base.gc_live_bytes() - baseline > 1_000_000
+            live_bytes_has_grown_too_much = true
+            break
+        end
+    end
+    @test !live_bytes_has_grown_too_much
+end
+
+function full_sweep_reasons_test()
+    GC.gc()
+    reasons = Base.full_sweep_reasons()
+    @test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
+    @test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
+end
+
 # !!! note:
 #     Since we run our tests on 32bit OS as well we confine ourselves
 #     to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -25,3 +66,34 @@ end
     run_gctest("gc/objarray.jl")
     run_gctest("gc/chunks.jl")
 end
+
+@testset "GC page metrics" begin
+    run_nonzero_page_utilization_test()
+    run_pg_size_test()
+end
+
+@testset "issue-54275" begin
+    issue_54275_test()
+end
+
+@testset "Base.GC docstrings" begin
+    @test isempty(Docs.undocumented_names(GC))
+end
+
+@testset "Full GC reasons" begin
+    full_sweep_reasons_test()
+end
+
+#testset doesn't work here because this needs to run in top level
+#Check that we ensure objects in toplevel exprs are rooted
+global dims54422 = [] # allocate the Binding
+GC.gc(); GC.gc(); # force the binding to be old
+GC.enable(false); # prevent new objects from being old
+@eval begin
+    Base.Experimental.@force_compile # use the compiler
+    dims54422 = $([])
+    nothing
+end
+GC.enable(true); GC.gc(false) # incremental collection
+@test typeof(dims54422) == Vector{Any}
+@test isempty(dims54422)
diff --git a/test/gc/linkedlist.jl b/test/gc/linkedlist.jl
index 669e5f8ec21d9..3eb1480417e50 100644
--- a/test/gc/linkedlist.jl
+++ b/test/gc/linkedlist.jl
@@ -1,11 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 mutable struct ListNode
-  key::Int64
-  next::ListNode
-  ListNode() = new()
-  ListNode(x)= new(x)
-  ListNode(x,y) = new(x,y);
+    key::Int64
+    next::ListNode
+    ListNode() = new()
+    ListNode(x)= new(x)
+    ListNode(x,y) = new(x,y);
 end
 
 function list(N=16*1024^2)
diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c
index 90b5ee82d80b5..d5bf91ec8c9ab 100644
--- a/test/gcext/gcext.c
+++ b/test/gcext/gcext.c
@@ -32,7 +32,7 @@ static inline int lt_ptr(void *a, void *b)
     return (uintptr_t)a < (uintptr_t)b;
 }
 
-/* align pointer to full word if mis-aligned */
+/* align pointer to full word if misaligned */
 static inline void *align_ptr(void *p)
 {
     uintptr_t u = (uintptr_t)p;
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index b155370dd6465..7f19d60fe31fb 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -43,7 +43,7 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
     @test mapf(f, Int[], Int[], Complex{Int}[]) == Union{}[]
 
     # In-place map
-    if inplace_mapf != nothing
+    if inplace_mapf !== nothing
         A = Float64[1:10...]
         inplace_mapf(x -> x*x, A, A)
         @test A == map(x -> x*x, Float64[1:10...])
diff --git a/test/gmp.jl b/test/gmp.jl
index 8f6be13c38054..0812775672969 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -11,6 +11,11 @@ ee = typemax(Int64)
     @test BigInt <: Signed
     @test big(1) isa Signed
 
+    if sizeof(Culong) >= 8
+        @test_throws OutOfMemoryError big(96608869069402268615522366320733234710)^16374500563449903721
+        @test_throws OutOfMemoryError 555555555555555555555555555555555555555555555555555^55555555555555555
+    end
+
     let x = big(1)
         @test signed(x) === x
         @test convert(Signed, x) === x
@@ -215,6 +220,8 @@ end
 end
 @testset "combinatorics" begin
     @test factorial(BigInt(40)) == parse(BigInt,"815915283247897734345611269596115894272000000000")
+    @test_throws DomainError factorial(BigInt(-1))
+    @test_throws DomainError factorial(BigInt(rand(-999:-2)))
     @test binomial(BigInt(1), -1) == BigInt(0)
     @test binomial(BigInt(1), 2)  == BigInt(0)
     @test binomial(BigInt(-53), 42) == parse(BigInt,"959509335087854414441273718")
@@ -438,11 +445,94 @@ end
     @test string(big(0), base = rand(2:62), pad = 0) == ""
 end
 
+@testset "Base.GMP.MPZ.export!" begin
+
+    function Base_GMP_MPZ_import!(x::BigInt, n::AbstractVector{T}; order::Integer=-1, nails::Integer=0, endian::Integer=0) where {T<:Base.BitInteger}
+        ccall((:__gmpz_import, Base.GMP.MPZ.libgmp),
+               Cvoid,
+               (Base.GMP.MPZ.mpz_t, Csize_t, Cint, Csize_t, Cint, Csize_t, Ptr{Cvoid}),
+               x, length(n), order, sizeof(T), endian, nails, n)
+        return x
+    end
+    # test import
+    bytes_to_import_from = Vector{UInt8}([1, 0])
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_import_from, order=0)
+    @test int_to_import_to == BigInt(256)
+
+    # test export
+    int_to_export_from = BigInt(256)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    @test all(bytes_to_export_to .== bytes_to_import_from)
+
+    # test both composed import(export) is identity
+    int_to_export_from = BigInt(256)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_export_to, order=0)
+    @test int_to_export_from == int_to_import_to
+
+    # test both composed export(import) is identity
+    bytes_to_import_from = Vector{UInt8}([1, 0])
+    int_to_import_to = BigInt()
+    Base_GMP_MPZ_import!(int_to_import_to, bytes_to_import_from, order=0)
+    bytes_to_export_to = Vector{UInt8}(undef, 2)
+    Base.GMP.MPZ.export!(bytes_to_export_to, int_to_export_from, order=0)
+    @test all(bytes_to_export_to .== bytes_to_import_from)
+end
+
 @test isqrt(big(4)) == 2
 @test isqrt(big(5)) == 2
 
-@test big(5)^true == big(5)
-@test big(5)^false == one(BigInt)
+
+@testset "Exponentiation operator" begin
+    @test big(5)^true == big(5)
+    @test big(5)^false == one(BigInt)
+    testvals = Int8[-128:-126; -3:3; 125:127]
+    @testset "BigInt and Int8 are consistent: $i^$j" for i in testvals, j in testvals
+        int8_res = try
+            i^j
+        catch e
+            e
+        end
+        if int8_res isa Int8
+            @test (big(i)^big(j)) % Int8 === int8_res
+        else
+            # Test both have exception of the same type
+            @test_throws typeof(int8_res) big(i)^big(j)
+        end
+    end
+end
+
+@testset "modular invert" begin
+    # test invert is correct and does not mutate
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    @test Base.GMP.MPZ.invert(a, b) == i
+    @test a == BigInt(3)
+    @test b == BigInt(7)
+
+    # test in place invert does mutate first argument
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    i_inplace = BigInt(3)
+    Base.GMP.MPZ.invert!(i_inplace, b)
+    @test i_inplace == i
+
+    # test in place invert does mutate only first argument
+    a = BigInt(3)
+    b = BigInt(7)
+    i = BigInt(5)
+    i_inplace = BigInt(0)
+    Base.GMP.MPZ.invert!(i_inplace, a, b)
+    @test i_inplace == i
+    @test a == BigInt(3)
+    @test b == BigInt(7)
+end
 
 @testset "math ops returning BigFloat" begin
     # operations that when applied to Int64 give Float64, should give BigFloat
diff --git a/test/hashing.jl b/test/hashing.jl
index 1c7c37d00f93b..173a31d10a6a9 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Random, LinearAlgebra, SparseArrays
+using Random, LinearAlgebra
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
 
@@ -92,9 +92,8 @@ vals = Any[
     Dict(x => x for x in 1:10),
     Dict(7=>7,9=>9,4=>4,10=>10,2=>2,3=>3,8=>8,5=>5,6=>6,1=>1),
     [], [1], [2], [1, 1], [1, 2], [1, 3], [2, 2], [1, 2, 2], [1, 3, 3],
-    zeros(2, 2), spzeros(2, 2), Matrix(1.0I, 2, 2), sparse(1.0I, 2, 2),
-    sparse(fill(1., 2, 2)), fill(1., 2, 2), sparse([0 0; 1 0]), [0 0; 1 0],
-    [-0. 0; -0. 0.], SparseMatrixCSC(2, 2, [1, 3, 3], [1, 2], [-0., -0.]),
+    zeros(2, 2), Matrix(1.0I, 2, 2), fill(1., 2, 2),
+    [-0. 0; -0. 0.],
     # issue #16364
     1:4, 1:1:4, 1:-1:0, 1.0:4.0, 1.0:1.0:4.0, range(1, stop=4, length=4),
     # issue #35597, when `LinearIndices` does not begin at 1
@@ -141,13 +140,6 @@ vals = Any[
     [5 1; 0 0], [1 1; 0 1], [0 2; 3 0], [0 2; 4 6], [4 0; 0 1],
     [0 0 0; 0 0 0], [1 0 0; 0 0 1], [0 0 2; 3 0 0], [0 0 7; 6 1 2],
     [4 0 0; 3 0 1], [0 2 4; 6 0 0],
-    # various stored zeros patterns
-    sparse([1], [1], [0]), sparse([1], [1], [-0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0]), sparse([1, 2], [1, 1], [0.0, -0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 2], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 3], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 3], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 2, 3], [1, 1, 1], [-1, 0, 1], 3, 1), sparse([1, 2, 3], [1, 1, 1], [-1.0, -0.0, 1.0], 3, 1),
-    sparse([1, 3], [1, 1], [-1, 0], 3, 1), sparse([1, 2], [1, 1], [-1, 0], 3, 1)
 ]
 
 for a in vals
@@ -155,7 +147,6 @@ for a in vals
     @test hash(convert(Array{Any}, a)) == hash(b)
     @test hash(convert(Array{supertype(eltype(a))}, a)) == hash(b)
     @test hash(convert(Array{Float64}, a)) == hash(b)
-    @test hash(sparse(a)) == hash(b)
     if !any(x -> isequal(x, -0.0), a)
         @test hash(convert(Array{Int}, a)) == hash(b)
         if all(x -> typemin(Int8) <= x <= typemax(Int8), a)
@@ -169,20 +160,6 @@ end
 @test hash(Any[Int8(127), Int8(-128), 129, 130]) ==
     hash([127, -128, 129, 130]) != hash([127,  128, 129, 130])
 
-# Test hashing sparse matrix with type which does not support -
-struct CustomHashReal
-    x::Float64
-end
-Base.hash(x::CustomHashReal, h::UInt) = hash(x.x, h)
-Base.:(==)(x::CustomHashReal, y::Number) = x.x == y
-Base.:(==)(x::Number, y::CustomHashReal) = x == y.x
-Base.zero(::Type{CustomHashReal}) = CustomHashReal(0.0)
-Base.zero(x::CustomHashReal) = zero(CustomHashReal)
-
-let a = sparse([CustomHashReal(0), CustomHashReal(3), CustomHashReal(3)])
-    @test hash(a) == hash(Array(a))
-end
-
 vals = Any[
     0.0:0.1:0.3, 0.3:-0.1:0.0,
     0:-1:1, 0.0:-1.0:1.0, 0.0:1.1:10.0, -4:10,
@@ -310,3 +287,20 @@ struct AUnionParam{T<:Union{Nothing,Float32,Float64}} end
 @test Type{AUnionParam{<:Union{Nothing,Float32,Float64}}} === Type{AUnionParam}
 @test Type{AUnionParam.body}.hash == 0
 @test Type{Base.Broadcast.Broadcasted}.hash != 0
+
+
+@testset "issue 50628" begin
+    # test hashing of rationals that equal floats are equal to the float hash
+    @test hash(5//2) == hash(big(5)//2) == hash(2.5)
+    # test hashing of rational that are integers hash to the integer
+    @test hash(Int64(5)^25) == hash(big(5)^25) == hash(Int64(5)^25//1) == hash(big(5)^25//1)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash(Int64(5)^25) != hash(5.0^25)
+    @test hash((Int64(5)//2)^25) == hash(big(5//2)^25)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash((Int64(5)//2)^25) != hash(2.5^25)
+    # test hashing of rational with odd denominator
+    @test hash(5//3) == hash(big(5)//3)
+end
+
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(hash, Tuple{Type{Int}, UInt}))
diff --git a/test/interpreter.jl b/test/interpreter.jl
index 0fea42e0aecdb..012a0f7fe7859 100644
--- a/test/interpreter.jl
+++ b/test/interpreter.jl
@@ -30,3 +30,11 @@ let p = Pipe(),
     wait(proc)
     close(p)
 end
+
+# Test generated function behavior in interpreter
+@test success(pipeline(`$(Base.julia_cmd()) --compile=min -E 'include("staged.jl")'`; stderr))
+
+# Test contextual execution mechanism in interpreter (#54360)
+let compiler_contextual_test = escape_string(joinpath(@__DIR__,"../Compiler/test/contextual.jl"))
+    @test success(pipeline(`$(Base.julia_cmd()) --compile=min -E "include(\"$compiler_contextual_test\")"`; stderr))
+end
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index ceaac235a3da9..38f29344d2f30 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -4,40 +4,44 @@ using Random
 
 is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args...))
 
+⟷(a::T, b::T) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::T, b::T) where T <: BigInt = a == b
+
 @testset "gcd/lcm" begin
     # All Integer data types take different code paths -- test all
-    # TODO: Test gcd and lcm for BigInt.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T(3)) === T(3)
-        @test gcd(T(3), T(5)) === T(1)
-        @test gcd(T(3), T(15)) === T(3)
-        @test gcd(T(0), T(15)) === T(15)
-        @test gcd(T(15), T(0)) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T(3)) ⟷ T(3)
+        @test gcd(T(3), T(5)) ⟷ T(1)
+        @test gcd(T(3), T(15)) ⟷ T(3)
+        @test gcd(T(0), T(15)) ⟷ T(15)
+        @test gcd(T(15), T(0)) ⟷ T(15)
         if T <: Signed
-            @test gcd(T(-12)) === T(12)
-            @test gcd(T(0), T(-15)) === T(15)
-            @test gcd(T(-15), T(0)) === T(15)
-            @test gcd(T(3), T(-15)) === T(3)
-            @test gcd(T(-3), T(-15)) === T(3)
+            @test gcd(T(-12)) ⟷ T(12)
+            @test gcd(T(0), T(-15)) ⟷ T(15)
+            @test gcd(T(-15), T(0)) ⟷ T(15)
+            @test gcd(T(3), T(-15)) ⟷ T(3)
+            @test gcd(T(-3), T(-15)) ⟷ T(3)
         end
-        @test gcd(T(0), T(0)) === T(0)
+        @test gcd(T(0), T(0)) ⟷ T(0)
 
-        @test gcd(T(2), T(4), T(6)) === T(2)
+        @test gcd(T(2), T(4), T(6)) ⟷ T(2)
         if T <: Signed
-            @test gcd(T(2), T(4), T(-6)) === T(2)
-            @test gcd(T(2), T(-4), T(-6)) === T(2)
-            @test gcd(T(-2), T(4), T(-6)) === T(2)
-            @test gcd(T(-2), T(-4), T(-6)) === T(2)
+            @test gcd(T(2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(2), T(-4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(4), T(-6)) ⟷ T(2)
+            @test gcd(T(-2), T(-4), T(-6)) ⟷ T(2)
         end
 
-        @test gcd(typemax(T), T(1)) === T(1)
-        @test gcd(T(1), typemax(T)) === T(1)
-        @test gcd(typemax(T), T(0)) === typemax(T)
-        @test gcd(T(0), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)) === typemax(T)
-        @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        if T != BigInt
+            @test gcd(typemax(T), T(1)) === T(1)
+            @test gcd(T(1), typemax(T)) === T(1)
+            @test gcd(typemax(T), T(0)) === typemax(T)
+            @test gcd(T(0), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)) === typemax(T)
+            @test gcd(typemax(T), typemax(T)-T(1)) === T(1)     # gcd(n, n-1) = 1. n and n-1 are always coprime.
+        end
 
-        if T <: Signed
+        if T <: Signed && T != BigInt
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
             @test gcd(-typemax(T), T(0)) === typemax(T)
@@ -52,7 +56,7 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test_throws OverflowError gcd(typemin(T), typemin(T))
             @test_throws OverflowError gcd(typemin(T), T(0))
             @test_throws OverflowError gcd(T(0), typemin(T))
-        else
+        elseif T != BigInt
             # For Unsigned Integer types, -typemax(T) == 1.
             @test gcd(-typemax(T), T(1)) === T(1)
             @test gcd(T(1), -typemax(T)) === T(1)
@@ -71,83 +75,86 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
             @test gcd(T(0), typemin(T)) === T(0)
         end
 
-        @test lcm(T(0)) === T(0)
-        @test lcm(T(2)) === T(2)
-        @test lcm(T(2), T(3)) === T(6)
-        @test lcm(T(3), T(2)) === T(6)
-        @test lcm(T(4), T(6)) === T(12)
-        @test lcm(T(6), T(4)) === T(12)
-        @test lcm(T(3), T(0)) === T(0)
-        @test lcm(T(0), T(3)) === T(0)
-        @test lcm(T(0), T(0)) === T(0)
+        @test lcm(T(0)) ⟷ T(0)
+        @test lcm(T(2)) ⟷ T(2)
+        @test lcm(T(2), T(3)) ⟷ T(6)
+        @test lcm(T(3), T(2)) ⟷ T(6)
+        @test lcm(T(4), T(6)) ⟷ T(12)
+        @test lcm(T(6), T(4)) ⟷ T(12)
+        @test lcm(T(3), T(0)) ⟷ T(0)
+        @test lcm(T(0), T(3)) ⟷ T(0)
+        @test lcm(T(0), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(-12)) === T(12)
-            @test lcm(T(0), T(-4)) === T(0)
-            @test lcm(T(-4), T(0)) === T(0)
-            @test lcm(T(4), T(-6)) === T(12)
-            @test lcm(T(-4), T(-6)) === T(12)
+            @test lcm(T(-12)) ⟷ T(12)
+            @test lcm(T(0), T(-4)) ⟷ T(0)
+            @test lcm(T(-4), T(0)) ⟷ T(0)
+            @test lcm(T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(-4), T(-6)) ⟷ T(12)
         end
 
-        @test lcm(T(2), T(4), T(6)) === T(12)
-        @test lcm(T(2), T(4), T(0)) === T(0)
+        @test lcm(T(2), T(4), T(6)) ⟷ T(12)
+        @test lcm(T(2), T(4), T(0)) ⟷ T(0)
         if T <: Signed
-            @test lcm(T(2), T(4), T(-6)) === T(12)
-            @test lcm(T(2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(-4), T(-6)) === T(12)
-            @test lcm(T(-2), T(0), T(-6)) === T(0)
+            @test lcm(T(2), T(4), T(-6)) ⟷ T(12)
+            @test lcm(T(2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(-4), T(-6)) ⟷ T(12)
+            @test lcm(T(-2), T(0), T(-6)) ⟷ T(0)
         end
 
-        @test lcm(typemax(T), T(1)) === typemax(T)
-        @test lcm(T(1), typemax(T)) === typemax(T)
-        @test lcm(typemax(T), T(0)) === T(0)
-        @test lcm(T(0), typemax(T)) === T(0)
-        @test lcm(typemax(T), typemax(T)) === typemax(T)
-        @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
-        @test_throws OverflowError lcm(typemax(T), T(2))
-
-        let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
-            @test lcm(x, x) === x
-            @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
-        end
-
-        if T <: Signed
-            @test lcm(-typemax(T), T(1)) === typemax(T)
-            @test lcm(T(1), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-
-            @test_throws OverflowError lcm(typemin(T), T(1))
-            @test_throws OverflowError lcm(T(1), typemin(T))
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
-            @test_throws OverflowError lcm(typemin(T), typemin(T))
-        else
-            # For Unsigned Integer types, -typemax(T) == 1.
-            @test lcm(-typemax(T), T(1)) === T(1)
-            @test lcm(T(1), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), T(0)) === T(0)
-            @test lcm(T(0), -typemax(T)) === T(0)
-            @test lcm(-typemax(T), -typemax(T)) === T(1)
-            @test lcm(-typemax(T), typemax(T)) === typemax(T)
-            @test lcm(typemax(T), -typemax(T)) === typemax(T)
+        if T != BigInt
+            @test lcm(typemax(T), T(1)) === typemax(T)
+            @test lcm(T(1), typemax(T)) === typemax(T)
+            @test lcm(typemax(T), T(0)) === T(0)
+            @test lcm(T(0), typemax(T)) === T(0)
+            @test lcm(typemax(T), typemax(T)) === typemax(T)
+            @test_throws OverflowError lcm(typemax(T), typemax(T)-T(1)) # lcm(n, n-1) = n*(n-1). Since n and n-1 are always coprime.
+            @test_throws OverflowError lcm(typemax(T), T(2))
+
+            let x = isqrt(typemax(T))+T(1) # smallest number x such that x^2 > typemax(T)
+                @test lcm(x, x) === x
+                @test_throws OverflowError lcm(x, x+T(1))   # lcm(n, n+1) = n*(n+1). Since n and n+1 are always coprime.
+            end
 
-            # For Unsigned Integer types, typemin(T) == 0.
-            @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
-            @test lcm(T(1), typemin(T)) === T(0)
-            @test lcm(typemin(T), T(0)) === T(0)
-            @test lcm(T(0), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)) === T(0)
-            @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            if T <: Signed
+                @test lcm(-typemax(T), T(1)) === typemax(T)
+                @test lcm(T(1), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+
+                @test_throws OverflowError lcm(typemin(T), T(1))
+                @test_throws OverflowError lcm(T(1), typemin(T))
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test_throws OverflowError lcm(typemin(T), typemin(T)+T(1)) # lcm(n, n+1) = n*(n+1).
+                @test_throws OverflowError lcm(typemin(T), typemin(T))
+            else
+                # For Unsigned Integer types, -typemax(T) == 1.
+                @test lcm(-typemax(T), T(1)) === T(1)
+                @test lcm(T(1), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), T(0)) === T(0)
+                @test lcm(T(0), -typemax(T)) === T(0)
+                @test lcm(-typemax(T), -typemax(T)) === T(1)
+                @test lcm(-typemax(T), typemax(T)) === typemax(T)
+                @test lcm(typemax(T), -typemax(T)) === typemax(T)
+
+                # For Unsigned Integer types, typemin(T) == 0.
+                @test lcm(typemin(T), T(1)) === lcm(T(0), T(1)) === T(0)
+                @test lcm(T(1), typemin(T)) === T(0)
+                @test lcm(typemin(T), T(0)) === T(0)
+                @test lcm(T(0), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)) === T(0)
+                @test lcm(typemin(T), typemin(T)+T(1)) === T(0)
+            end
         end
     end
     @test lcm(0x5, 3) == 15
     @test gcd(0xf, 20) == 5
     @test gcd(UInt32(6), Int8(-50)) == 2
     @test gcd(typemax(UInt), -16) == 1
+    @test gcd(typemax(UInt), BigInt(1236189723689716298376189726398761298361892)) == 1
 
     @testset "effects" begin
         @test is_effect_free(gcd, Tuple{Int,Int})
@@ -156,45 +163,55 @@ is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args..
 end
 
 @testset "gcd/lcm for arrays" begin
-    # TODO: Test gcd and lcm for BigInt arrays.
-    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128)
-        @test gcd(T[]) === T(0)
-        @test gcd(T[3, 5]) === T(1)
-        @test gcd(T[3, 15]) === T(3)
-        @test gcd(T[0, 15]) === T(15)
+    for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt)
+        @test gcd(T[]) ⟷ T(0)
+        @test gcd(T[3, 5]) ⟷ T(1)
+        @test gcd(T[3, 15]) ⟷ T(3)
+        @test gcd(T[0, 15]) ⟷ T(15)
         if T <: Signed
-            @test gcd(T[-12]) === T(12)
-            @test gcd(T[3,-15]) === T(3)
-            @test gcd(T[-3,-15]) === T(3)
+            @test gcd(T[-12]) ⟷ T(12)
+            @test gcd(T[3,-15]) ⟷ T(3)
+            @test gcd(T[-3,-15]) ⟷ T(3)
         end
-        @test gcd(T[0, 0]) === T(0)
+        @test gcd(T[0, 0]) ⟷ T(0)
 
-        @test gcd(T[2, 4, 6]) === T(2)
-        @test gcd(T[2, 4, 3, 5]) === T(1)
+        @test gcd(T[2, 4, 6]) ⟷ T(2)
+        @test gcd(T[2, 4, 3, 5]) ⟷ T(1)
 
-        @test lcm(T[]) === T(1)
-        @test lcm(T[2, 3]) === T(6)
-        @test lcm(T[4, 6]) === T(12)
-        @test lcm(T[3, 0]) === T(0)
-        @test lcm(T[0, 0]) === T(0)
+        @test lcm(T[]) ⟷ T(1)
+        @test lcm(T[2, 3]) ⟷ T(6)
+        @test lcm(T[4, 6]) ⟷ T(12)
+        @test lcm(T[3, 0]) ⟷ T(0)
+        @test lcm(T[0, 0]) ⟷ T(0)
         if T <: Signed
-            @test lcm(T[-2]) === T(2)
-            @test lcm(T[4, -6]) === T(12)
-            @test lcm(T[-4, -6]) === T(12)
+            @test lcm(T[-2]) ⟷ T(2)
+            @test lcm(T[4, -6]) ⟷ T(12)
+            @test lcm(T[-4, -6]) ⟷ T(12)
         end
 
-        @test lcm(T[2, 4, 6]) === T(12)
+        @test lcm(T[2, 4, 6]) ⟷ T(12)
     end
+
+    # Issue #55379
+    @test lcm([1//2; 1//2]) === lcm([1//2, 1//2]) === lcm(1//2, 1//2) === 1//2
+    @test gcd(Int[]) === 0
+    @test lcm(Int[]) === 1
+    @test gcd(Rational{Int}[]) === 0//1
+    @test_throws ArgumentError("lcm has no identity for Rational{$Int}") lcm(Rational{Int}[])
 end
 
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128} = a === b
+⟷(a::Tuple{T, T, T}, b::Tuple{T, T, T}) where T <: BigInt = a == b
 @testset "gcdx" begin
-    # TODO: Test gcdx for BigInt.
-    for T in (Int8, Int16, Int32, Int64, Int128)
-        @test gcdx(T(5), T(12)) === (T(1), T(5), T(-2))
-        @test gcdx(T(5), T(-12)) === (T(1), T(5), T(2))
-        @test gcdx(T(-5), T(12)) === (T(1), T(-5), T(-2))
-        @test gcdx(T(-5), T(-12)) === (T(1), T(-5), T(2))
-        @test gcdx(T(-25), T(-4)) === (T(1), T(-1), T(6))
+    for T in (Int8, Int16, Int32, Int64, Int128, BigInt)
+        @test gcdx(T(5), T(12)) ⟷ (T(1), T(5), T(-2))
+        @test gcdx(T(5), T(-12)) ⟷ (T(1), T(5), T(2))
+        @test gcdx(T(-5), T(12)) ⟷ (T(1), T(-5), T(-2))
+        @test gcdx(T(-5), T(-12)) ⟷ (T(1), T(-5), T(2))
+        @test gcdx(T(-25), T(-4)) ⟷ (T(1), T(-1), T(6))
+        @test gcdx(T(0), T(0)) ⟷ (T(0), T(0), T(0))
+        @test gcdx(T(8), T(0)) ⟷ (T(8), T(1), T(0))
+        @test gcdx(T(0), T(-8)) ⟷ (T(8), T(0), T(-1))
     end
     x, y = Int8(-12), UInt(100)
     d, u, v = gcdx(x, y)
@@ -221,7 +238,7 @@ end
     @test_throws MethodError gcdx(MyOtherRational(2//3), MyOtherRational(3//4))
 end
 
-@testset "invmod" begin
+@testset "invmod(n, m)" begin
     @test invmod(6, 31) === 26
     @test invmod(-1, 3) === 2
     @test invmod(1, -3) === -2
@@ -256,6 +273,37 @@ end
     end
 end
 
+@testset "invmod(n)" begin
+    for T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        if sizeof(T) ≤ 2
+            # test full domain for small types
+            for a = typemin(T)+true:T(2):typemax(T)
+                b = invmod(a)
+                @test a * b == 1
+            end
+        else
+            # test random sample for large types
+            for _ = 1:2^12
+                a = rand(T) | true
+                b = invmod(a)
+                @test a * b == 1
+            end
+        end
+    end
+end
+
+@testset "invmod(n, T)" begin
+    for S in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128),
+        T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        for _ = 1:2^8
+            a = rand(S) | true
+            b = invmod(a, T)
+            @test (a * b) % T == 1
+            @test (a % T) * b == 1
+        end
+    end
+end
+
 @testset "powermod" begin
     @test powermod(2, 3, 5) == 3
     @test powermod(2, 3, -5) == -2
@@ -552,6 +600,10 @@ end
             x>=0 && @test binomial(x,x-T(2)) == div(x*(x-1), 2)
         end
         @test @inferred(binomial(one(T),one(T))) isa T
+
+        # Arguments of different Integer types do not lead to computation of
+        # generalized binomial coefficient (issue #54296)
+        @test @inferred(binomial(Int64(5), T(2))) === Int64(10)
     end
     for x in ((false,false), (false,true), (true,false), (true,true))
         @test binomial(x...) == (x != (false,true))
@@ -571,3 +623,20 @@ end
 @test Base.infer_effects(gcdx, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(invmod, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(binomial, (Int,Int)) |> Core.Compiler.is_foldable
+
+@testset "literal power" begin
+    @testset for T in Base.uniontypes(Base.HWReal)
+        ns = (T(0), T(1), T(5))
+        if T <: AbstractFloat
+            ns = (ns..., T(3.14), T(-2.71))
+        end
+        for n in ns
+            @test n ^ 0 === T(1)
+            @test n ^ 1 === n
+            @test n ^ 2 === n * n
+            @test n ^ 3 === n * n * n
+            @test n ^ -1 ≈ inv(n)
+            @test n ^ -2 ≈ inv(n) * inv(n)
+        end
+    end
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 3c49afe2c4d7e..7a63cd1c0a62e 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -180,28 +180,12 @@ end
     @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
 end
 
-if Sys.ARCH == :aarch64 ||  Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    # On AArch64 we are following the `_Float16` ABI. Buthe these functions expect `Int16`.
-    # TODO: SHould we have `Chalf == Int16` and `Cfloat16 == Float16`?
-    extendhfsf2(x::Float16) = ccall("extern __extendhfsf2", llvmcall, Float32, (UInt16,), reinterpret(UInt16, x))
-    gnu_h2f_ieee(x::Float16) = ccall("extern __gnu_h2f_ieee", llvmcall, Float32, (UInt16,), reinterpret(UInt16, x))
-    truncsfhf2(x::Float32) = reinterpret(Float16, ccall("extern __truncsfhf2", llvmcall, UInt16, (Float32,), x))
-    gnu_f2h_ieee(x::Float32) = reinterpret(Float16, ccall("extern __gnu_f2h_ieee", llvmcall, UInt16, (Float32,), x))
-    truncdfhf2(x::Float64) = reinterpret(Float16, ccall("extern __truncdfhf2", llvmcall, UInt16, (Float64,), x))
-else
-    extendhfsf2(x::Float16) = ccall("extern __extendhfsf2", llvmcall, Float32, (Float16,), x)
-    gnu_h2f_ieee(x::Float16) = ccall("extern __gnu_h2f_ieee", llvmcall, Float32, (Float16,), x)
-    truncsfhf2(x::Float32) = ccall("extern __truncsfhf2", llvmcall, Float16, (Float32,), x)
-    gnu_f2h_ieee(x::Float32) = ccall("extern __gnu_f2h_ieee", llvmcall, Float16, (Float32,), x)
-    truncdfhf2(x::Float64) = ccall("extern __truncdfhf2", llvmcall, Float16, (Float64,), x)
-end
-
 @testset "Float16 intrinsics (crt)" begin
-    @test extendhfsf2(Float16(3.3)) == 3.3007812f0
+    gnu_h2f_ieee(x::Float16) = ccall("julia__gnu_h2f_ieee", Float32, (Float16,), x)
+    gnu_f2h_ieee(x::Float32) = ccall("julia__gnu_f2h_ieee", Float16, (Float32,), x)
+
     @test gnu_h2f_ieee(Float16(3.3)) == 3.3007812f0
-    @test truncsfhf2(3.3f0) == Float16(3.3)
     @test gnu_f2h_ieee(3.3f0) == Float16(3.3)
-    @test truncdfhf2(3.3) == Float16(3.3)
 end
 
 using Base.Experimental: @force_compile
@@ -213,8 +197,8 @@ for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :se
     @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
     @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
 end
-@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
-@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) === nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) === nothing
 
 primitive type Int256 <: Signed 256 end
 Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
@@ -236,7 +220,7 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent)
             end
             @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
-            if sizeof(r) > 8
+            if sizeof(r) > 2*sizeof(Int)
                 @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent)
@@ -361,3 +345,16 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
         @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
     end
 end)()
+
+@testset "issue #54548" begin
+    @inline passthrough(ptr::Core.LLVMPtr{T,A}) where {T,A} = Base.llvmcall(("""
+            define ptr addrspace(1) @entry(ptr addrspace(1) %0) #0 {
+            entry:
+                ret ptr addrspace(1) %0
+            }
+
+            attributes #0 = { alwaysinline }""", "entry"),
+        Core.LLVMPtr{T,A}, Tuple{Core.LLVMPtr{T,A}}, ptr)
+    f(gws) = passthrough(Core.bitcast(Core.LLVMPtr{UInt32,1}, gws))
+    f(C_NULL)
+end
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index ec77903b4a5b8..a9d58f4b7871e 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -120,6 +120,7 @@ end
     Base.compact(io)
     @test position(io) == 0
     @test ioslength(io) == 0
+    Base._resize!(io,0)
     Base.ensureroom(io,50)
     @test position(io) == 0
     @test ioslength(io) == 0
@@ -195,6 +196,31 @@ end
     @test position(skip(io, -3)) == 0
 end
 
+@testset "issue #53908" begin
+    @testset "offset $first" for first in (false, true)
+        b = collect(0x01:0x05)
+        sizehint!(b, 100; first) # make offset non zero
+        io = IOBuffer(b)
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, typemax(Int))) == 5
+        @test position(skip(io, typemax(Int128))) == 5
+        @test position(skip(io, typemax(Int32))) == 5
+        @test position(skip(io, typemin(Int))) == 0
+        @test position(skip(io, typemin(Int128))) == 0
+        @test position(skip(io, typemin(Int32))) == 0
+        @test position(skip(io, 4)) == 4
+        @test position(skip(io, -2)) == 2
+        @test position(skip(io, -2)) == 0
+        @test position(seek(io, -2)) == 0
+        @test position(seek(io, typemax(Int))) == 5
+        @test position(seek(io, typemax(Int128))) == 5
+        @test position(seek(io, typemax(Int32))) == 5
+        @test position(seek(io, typemin(Int))) == 0
+        @test position(seek(io, typemin(Int128))) == 0
+        @test position(seek(io, typemin(Int32))) == 0
+    end
+end
+
 @testset "pr #11554" begin
     io  = IOBuffer(SubString("***αhelloworldω***", 4, 16))
     io2 = IOBuffer(Vector{UInt8}(b"goodnightmoon"), read=true, write=true)
@@ -251,6 +277,7 @@ end
     c = zeros(UInt8,8)
     @test bytesavailable(bstream) == 8
     @test !eof(bstream)
+    @test Base.reseteof(bstream) === nothing # TODO: Actually test intended effect
     read!(bstream,c)
     @test c == a[3:10]
     @test closewrite(bstream) === nothing
@@ -324,7 +351,7 @@ end
     a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     mark(a) # mark at position 0
     write(a, "Hello!")
-    @test Base.compact(a) == nothing # because pointer > mark
+    @test Base.compact(a) === nothing # because pointer > mark
     close(a)
     b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     write(b, "Hello!")
@@ -357,3 +384,24 @@ end
     seek(io,0)
     @test Base.read_sub(io,v,1,1) == [1,0]
 end
+
+@testset "with offset" begin
+    b = pushfirst!([0x02], 0x01)
+    @test take!(IOBuffer(b)) == [0x01, 0x02]
+end
+
+@testset "#54636 reading from non-dense vectors" begin
+    data = 0x00:0xFF
+    io = IOBuffer(data)
+    @test read(io) == data
+
+    data = @view(collect(0x00:0x0f)[begin:2:end])
+    io = IOBuffer(data)
+    @test read(io) == data
+end
+
+@testset "Writing Char to full buffer" begin
+    io = IOBuffer(;maxsize=1)
+    write(io, 'a')
+    @test write(io, 'a') == 0
+end
diff --git a/test/iostream.jl b/test/iostream.jl
index bc4751fb1fca7..13d01e61bbf8c 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -119,6 +119,24 @@ end
     end
 end
 
+@testset "read!/write(::IO, A::StridedArray)" begin
+    s1 = reshape(view(rand(UInt8, 16), 1:16), 2, 2, 2, 2)
+    s2 = view(s1, 1:2, 1:2, 1:2, 1:2)
+    s3 = view(s1, 1:2, 1:2, 1, 1:2)
+    mktemp() do path, io
+        b = Vector{UInt8}(undef, 17)
+        for s::StridedArray in (s3, s1, s2)
+            @test write(io, s) == length(s)
+            seek(io, 0)
+            @test readbytes!(io, b) == length(s)
+            seek(io, 0)
+            @test view(b, 1:length(s)) == vec(s)
+            @test read!(io, fill!(deepcopy(s), 0)) == s
+            seek(io, 0)
+        end
+    end
+end
+
 @test Base.open_flags(read=false, write=true, append=false) == (read=false, write=true, create=true, truncate=true, append=false)
 
 @testset "issue #30978" begin
@@ -172,3 +190,7 @@ end
     @test all(T -> T <: Union{UInt, Int}, Base.return_types(unsafe_write, (IO, Ptr{UInt8}, UInt)))
     @test all(T -> T === Bool, Base.return_types(eof, (IO,)))
 end
+
+@testset "fd" begin
+    @test open(fd, tempname(), "w") isa RawFD
+end
diff --git a/test/iterators.jl b/test/iterators.jl
index 59588bdac9684..06f08cff4f6ad 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -5,34 +5,38 @@ using Random
 using Base: IdentityUnitRange
 using Dates: Date, Day
 
-@test Base.IteratorSize(Any) isa Base.SizeUnknown
+@test (@inferred Base.IteratorSize(Any)) isa Base.SizeUnknown
 
 # zip and filter iterators
 # issue #4718
 @test collect(Iterators.filter(x->x[1], zip([true, false, true, false],"abcd"))) == [(true,'a'),(true,'c')]
 
+# issue #45085
+@test_throws ArgumentError Iterators.reverse(zip("abc", "abcd"))
+@test_throws ArgumentError Iterators.reverse(zip("abc", Iterators.cycle("ab")))
+
 let z = zip(1:2)
-    @test size(z) == (2,)
+    @test (@inferred size(z)) == (2,)
     @test collect(z) == [(1,), (2,)]
     # Issue #13979
-    @test eltype(z) == Tuple{Int}
+    @test (@inferred eltype(z)) == Tuple{Int}
 end
 
 for z in (zip(1:2, 3:4), zip(1:2, 3:5))
     @test collect(z) == [(1,3), (2,4)]
-    @test eltype(z) == Tuple{Int,Int}
-    @test size(z) == (2,)
-    @test axes(z) == (Base.OneTo(2),)
-    @test length(z) == 2
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
+    @test (@inferred size(z)) == (2,)
+    @test (@inferred axes(z)) == (Base.OneTo(2),)
+    @test (@inferred length(z)) == 2
 end
 
 let z = zip(1:2, Iterators.countfrom(3))
     @test collect(z) == [(1,3), (2,4)]
-    @test eltype(z) == Tuple{Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
     @test_throws MethodError size(z) # by convention, the zip of a finite and
                          # an infinite iterator has only `length`
     @test_throws MethodError axes(z)
-    @test length(z) == 2
+    @test (@inferred length(z)) == 2
 end
 
 let z = zip([i*j for i in 1:3, j in -1:2:1], 1:6)
@@ -42,29 +46,29 @@ let z = zip([i*j for i in 1:3, j in -1:2:1], 1:6)
                          (1, 4)
                          (2, 5)
                          (3, 6) ]
-    @test eltype(z) == Tuple{Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
     @test_throws DimensionMismatch size(z)
     @test_throws DimensionMismatch axes(z)
-    @test length(z) == 6
+    @test (@inferred length(z)) == 6
 end
 
 let z = zip([i*j for i in 1:3, j in -1:2:1], [i*j for i in 1:3, j in -1:2:1])
     @test collect(z) == [(-1, -1) (1, 1)
                         (-2, -2) (2, 2)
                         (-3, -3) (3, 3)]
-    @test eltype(z) == Tuple{Int,Int}
-    @test size(z) == (3, 2)
-    @test axes(z) == (Base.OneTo(3), Base.OneTo(2))
-    @test length(z) == 6
+    @test (@inferred eltype(z)) == Tuple{Int,Int}
+    @test (@inferred size(z)) == (3, 2)
+    @test (@inferred axes(z)) == (Base.OneTo(3), Base.OneTo(2))
+    @test (@inferred length(z)) == 6
 end
 
 let z = zip(1:2, 3:4, 5:6)
-    @test size(z) == (2,)
+    @test (@inferred size(z)) == (2,)
     @test collect(z) == [(1,3,5), (2,4,6)]
-    @test eltype(z) == Tuple{Int,Int,Int}
+    @test (@inferred eltype(z)) == Tuple{Int,Int,Int}
 end
 
-@test eltype(Iterators.filter(isodd, 1:5)) == Int
+@test (@inferred eltype(Iterators.filter(isodd, 1:5))) == Int
 
 # typed `collect`
 @test collect(Float64, Iterators.filter(isodd, [1,2,3,4]))[1] === 1.0
@@ -98,10 +102,10 @@ let zeb = IOBuffer("1\n2\n3\n4\n5\n"),
     @test res == [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')]
 end
 
-@test length(zip(cycle(1:3), 1:7)) == 7
-@test length(zip(cycle(1:3), 1:7, cycle(1:3))) == 7
-@test length(zip(1:3,product(1:7,cycle(1:3)))) == 3
-@test length(zip(1:3,product(1:7,cycle(1:3)),8)) == 1
+@test (@inferred length(zip(cycle(1:3), 1:7))) == 7
+@test (@inferred length(zip(cycle(1:3), 1:7, cycle(1:3)))) == 7
+@test (@inferred length(zip(1:3,product(1:7,cycle(1:3))))) == 3
+@test (@inferred length(zip(1:3,product(1:7,cycle(1:3)),8))) == 1
 @test_throws ArgumentError length(zip()) # length of zip of empty tuple
 
 # map
@@ -150,7 +154,7 @@ end
 # take
 # ----
 let t = take(0:2:8, 10), i = 0
-    @test length(collect(t)) == 5 == length(t)
+    @test length(collect(t)) == 5 == @inferred length(t)
 
     for j = t
         @test j == i*2
@@ -167,11 +171,11 @@ let i = 0
     @test i == 10
 end
 
-@test isempty(take(0:2:8, 0))
+@test @inferred isempty(take(0:2:8, 0))
 @test_throws ArgumentError take(0:2:8, -1)
-@test length(take(1:3,typemax(Int))) == 3
-@test length(take(countfrom(1),3)) == 3
-@test length(take(1:6,3)) == 3
+@test (@inferred length(take(1:3,typemax(Int)))) == 3
+@test (@inferred length(take(countfrom(1),3))) == 3
+@test (@inferred length(take(1:6,3))) == 3
 
 # drop
 # ----
@@ -183,15 +187,15 @@ let i = 0
     @test i == 4
 end
 
-@test isempty(drop(0:2:10, 100))
-@test isempty(collect(drop(0:2:10, 100)))
+@test @inferred isempty(drop(0:2:10, 100))
+@test @inferred isempty(collect(drop(0:2:10, 100)))
 @test_throws ArgumentError drop(0:2:8, -1)
-@test length(drop(1:3,typemax(Int))) == 0
-@test length(drop(UInt(1):2, 3)) == 0
-@test length(drop(StepRangeLen(1, 1, UInt(2)), 3)) == 0
-@test Base.IteratorSize(drop(countfrom(1),3)) == Base.IsInfinite()
+@test (@inferred length(drop(1:3,typemax(Int)))) == 0
+@test (@inferred length(drop(UInt(1):2, 3))) == 0
+@test (@inferred length(drop(StepRangeLen(1, 1, UInt(2)), 3))) == 0
+@test (@inferred Base.IteratorSize(drop(countfrom(1),3))) == Base.IsInfinite()
 @test_throws MethodError length(drop(countfrom(1), 3))
-@test Base.IteratorSize(Iterators.drop(Iterators.filter(i -> i>0, 1:10), 2)) == Base.SizeUnknown()
+@test (@inferred Base.IteratorSize(Iterators.drop(Iterators.filter(i -> i>0, 1:10), 2))) == Base.SizeUnknown()
 
 let x = Iterators.drop(Iterators.Stateful("abc"), 2)
     @test !Base.isdone(x, nothing)
@@ -208,7 +212,7 @@ for xs in Any["abc", [1, 2, 3]]
     @test drop(drop(xs, 1), 1) === drop(xs, 2)
     @test take(drop(xs, 1), 1) === drop(take(xs, 2), 1)
     @test take(drop(xs, 3), 0) === drop(take(xs, 2), 3)
-    @test isempty(drop(drop(xs, 2), 2))
+    @test @inferred isempty(drop(drop(xs, 2), 2))
     @test drop(take(drop(xs, 1), 2), 1) === take(drop(xs, 2), 1)
     @test take(drop(take(xs, 3), 1), 1) === take(drop(xs, 1), 1)
 end
@@ -222,7 +226,7 @@ end
     @test collect(takewhile(Returns(true),5:10)) == 5:10
     @test collect(takewhile(isodd,[1,1,2,3])) == [1,1]
     @test collect(takewhile(<(2), takewhile(<(3), [1,1,2,3]))) == [1,1]
-    @test Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
+    @test (@inferred Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10))))) isa Base.EltypeUnknown
 end
 
 # dropwhile
@@ -230,12 +234,12 @@ end
 @testset begin
     @test collect(dropwhile(<(4), 1:10)) == 4:10
     @test collect(dropwhile(<(4), 1:10)) isa Vector{Int}
-    @test isempty(dropwhile(<(4), []))
+    @test @inferred isempty(dropwhile(<(4), []))
     @test collect(dropwhile(Returns(false),1:3)) == 1:3
-    @test isempty(dropwhile(Returns(true), 1:3))
+    @test @inferred isempty(dropwhile(Returns(true), 1:3))
     @test collect(dropwhile(isodd,[1,1,2,3])) == [2,3]
     @test collect(dropwhile(iseven,dropwhile(isodd,[1,1,2,3]))) == [3]
-    @test Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
+    @test (@inferred Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10))))) isa Base.EltypeUnknown
 end
 
 # cycle
@@ -250,6 +254,22 @@ let i = 0
     @test !Base.isdone(cycle(0:3), 1)
 end
 
+@testset "cycle(iter, n)"  begin
+    @test collect(cycle(0:3, 2)) == [0, 1, 2, 3, 0, 1, 2, 3]
+    @test collect(cycle(Iterators.filter(iseven, 1:4), 2)) == [2, 4, 2, 4]
+    @test collect(take(cycle(countfrom(11), 3), 4)) == 11:14
+
+    @test (@inferred isempty(cycle(1:0))) == (@inferred isempty(cycle(1:0, 3))) == true
+    @test @inferred isempty(cycle(1:5, 0))
+    @test @inferred isempty(cycle(Iterators.filter(iseven, 1:4), 0))
+
+    @test (@inferred eltype(cycle(0:3, 2))) === Int
+    @test (@inferred Base.IteratorEltype(cycle(0:3, 2))) == Base.HasEltype()
+
+    Base.haslength(cycle(0:3, 2)) == false  # but not sure we should test these
+    (@inferred Base.IteratorSize(cycle(0:3, 2))) == Base.SizeUnknown()
+end
+
 # repeated
 # --------
 let i = 0
@@ -266,13 +286,13 @@ let i = 0
         i <= 10 || break
     end
 end
-@test eltype(repeated(0))    == Int
-@test eltype(repeated(0, 5)) == Int
-@test Base.IteratorSize(repeated(0))      == Base.IsInfinite()
-@test Base.IteratorSize(repeated(0, 5))   == Base.HasLength()
-@test Base.IteratorEltype(repeated(0))    == Base.HasEltype()
-@test Base.IteratorEltype(repeated(0, 5)) == Base.HasEltype()
-@test Base.IteratorSize(zip(repeated(0), repeated(0))) == Base.IsInfinite()
+@test (@inferred eltype(repeated(0)))    == Int
+@test (@inferred eltype(repeated(0, 5))) == Int
+@test (@inferred Base.IteratorSize(repeated(0)))      == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(repeated(0, 5)))   == Base.HasLength()
+@test (@inferred Base.IteratorEltype(repeated(0)))    == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(repeated(0, 5))) == Base.HasEltype()
+@test (@inferred Base.IteratorSize(zip(repeated(0), repeated(0)))) == Base.IsInfinite()
 
 # product
 # -------
@@ -284,8 +304,8 @@ for itr in [product(1:0),
             product(1:0, 1:1, 1:2),
             product(1:1, 1:0, 1:2),
             product(1:1, 1:2 ,1:0)]
-    @test isempty(itr)
-    @test isempty(collect(itr))
+    @test @inferred isempty(itr)
+    @test @inferred isempty(collect(itr))
 end
 
 # collect a product - first iterators runs faster
@@ -305,10 +325,10 @@ end
 let (a, b) = (1:3, [4 6;
                     5 7])
     p = product(a, b)
-    @test size(p)    == (3, 2, 2)
-    @test length(p)  == 12
-    @test ndims(p)   == 3
-    @test eltype(p)  == NTuple{2, Int}
+    @test (@inferred size(p))    == (3, 2, 2)
+    @test (@inferred length(p))  == 12
+    @test (@inferred ndims(p))   == 3
+    @test (@inferred eltype(p))  == NTuple{2, Int}
     cp = collect(p)
     for i = 1:3
         @test cp[i, :, :] == [(i, 4) (i, 6);
@@ -336,28 +356,28 @@ let a = 1:2,
     c = Int32(1):Int32(0)
 
     # length
-    @test length(product())        == 1
-    @test length(product(a))       == 2
-    @test length(product(a, b))    == 20
-    @test length(product(a, b, c)) == 0
+    @test (@inferred length(product()))        == 1
+    @test (@inferred length(product(a)))       == 2
+    @test (@inferred length(product(a, b)))    == 20
+    @test (@inferred length(product(a, b, c))) == 0
 
     # size
-    @test size(product())          == tuple()
-    @test size(product(a))         == (2,)
-    @test size(product(a, b))      == (2, 10)
-    @test size(product(a, b, c))   == (2, 10, 0)
+    @test (@inferred size(product()))          == tuple()
+    @test (@inferred size(product(a)))         == (2,)
+    @test (@inferred size(product(a, b)))      == (2, 10)
+    @test (@inferred size(product(a, b, c)))   == (2, 10, 0)
 
     # eltype
-    @test eltype(product())        == Tuple{}
-    @test eltype(product(a))       == Tuple{Int}
-    @test eltype(product(a, b))    == Tuple{Int, Float64}
-    @test eltype(product(a, b, c)) == Tuple{Int, Float64, Int32}
+    @test (@inferred eltype(product()))        == Tuple{}
+    @test (@inferred eltype(product(a)))       == Tuple{Int}
+    @test (@inferred eltype(product(a, b)))    == Tuple{Int, Float64}
+    @test (@inferred eltype(product(a, b, c))) == Tuple{Int, Float64, Int32}
 
     # ndims
-    @test ndims(product())         == 0
-    @test ndims(product(a))        == 1
-    @test ndims(product(a, b))     == 2
-    @test ndims(product(a, b, c))  == 3
+    @test (@inferred ndims(product()))         == 0
+    @test (@inferred ndims(product(a)))        == 1
+    @test (@inferred ndims(product(a, b)))     == 2
+    @test (@inferred ndims(product(a, b, c)))  == 3
 end
 
 # with multidimensional inputs
@@ -377,7 +397,7 @@ let a = randn(4, 4),
                 (4, 4, 3, 3, 3, 2, 2, 2, 2)]
     for (method, fun) in zip([size, ndims, length], [x->x, length, prod])
         for i in 1:length(args)
-            @test method(product(args[i]...)) == method(collect(product(args[i]...))) == fun(sizes[i])
+            @test (@inferred method(product(args[i]...))) == method(collect(product(args[i]...))) == fun(sizes[i])
         end
     end
 end
@@ -393,7 +413,7 @@ let iters = (1:2,
     for method in [size, length, ndims, eltype]
         for i = 1:length(iters)
             args = (iters[i],)
-            @test method(product(args...)) == method(collect(product(args...)))
+            @test (@inferred method(product(args...))) == method(collect(product(args...)))
             for j = 1:length(iters)
                 args = iters[i], iters[j]
                 @test method(product(args...)) == method(collect(product(args...)))
@@ -435,51 +455,51 @@ end
 
 # IteratorSize trait business
 let f1 = Iterators.filter(i->i>0, 1:10)
-    @test Base.IteratorSize(product(f1))               == Base.SizeUnknown()
-    @test Base.IteratorSize(product(1:2, f1))          == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, 1:2))          == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, f1))           == Base.SizeUnknown()
-    @test Base.IteratorSize(product(f1, countfrom(1))) == Base.IsInfinite()
-    @test Base.IteratorSize(product(countfrom(1), f1)) == Base.IsInfinite()
-end
-@test Base.IteratorSize(product(1:2, countfrom(1)))          == Base.IsInfinite()
-@test Base.IteratorSize(product(countfrom(2), countfrom(1))) == Base.IsInfinite()
-@test Base.IteratorSize(product(countfrom(1), 1:2))          == Base.IsInfinite()
-@test Base.IteratorSize(product(1:2))                        == Base.HasShape{1}()
-@test Base.IteratorSize(product(1:2, 1:2))                   == Base.HasShape{2}()
-@test Base.IteratorSize(product(take(1:2, 1), take(1:2, 1))) == Base.HasShape{2}()
-@test Base.IteratorSize(product(take(1:2, 2)))               == Base.HasShape{1}()
-@test Base.IteratorSize(product([1 2; 3 4]))                 == Base.HasShape{2}()
-@test Base.IteratorSize(product((1,2,3,4), (5, 6, 7, 8)))    == Base.HasShape{2}()  # product of ::HasLength and ::HasLength
-@test Base.IteratorSize(product(1:2, 3:5, 5:6))              == Base.HasShape{3}()  # product of 3 iterators
-@test Base.IteratorSize(product([1 2; 3 4], 1:4))            == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasShape{1}
-@test Base.IteratorSize(product([1 2; 3 4], (1,2)))          == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasLength
+    @test (@inferred Base.IteratorSize(product(f1)))               == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(1:2, f1)))          == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, 1:2)))          == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, f1)))           == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(product(f1, countfrom(1)))) == Base.IsInfinite()
+    @test (@inferred Base.IteratorSize(product(countfrom(1), f1))) == Base.IsInfinite()
+end
+@test (@inferred Base.IteratorSize(product(1:2, countfrom(1))))          == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(countfrom(2), countfrom(1)))) == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(countfrom(1), 1:2)))          == Base.IsInfinite()
+@test (@inferred Base.IteratorSize(product(1:2)))                        == Base.HasShape{1}()
+@test (@inferred Base.IteratorSize(product(1:2, 1:2)))                   == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product(take(1:2, 1), take(1:2, 1)))) == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product(take(1:2, 2))))               == Base.HasShape{1}()
+@test (@inferred Base.IteratorSize(product([1 2; 3 4])))                 == Base.HasShape{2}()
+@test (@inferred Base.IteratorSize(product((1,2,3,4), (5, 6, 7, 8))))    == Base.HasShape{2}()  # product of ::HasLength and ::HasLength
+@test (@inferred Base.IteratorSize(product(1:2, 3:5, 5:6)))              == Base.HasShape{3}()  # product of 3 iterators
+@test (@inferred Base.IteratorSize(product([1 2; 3 4], 1:4)))            == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasShape{1}
+@test (@inferred Base.IteratorSize(product([1 2; 3 4], (1,2))))          == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasLength
 
 # IteratorEltype trait business
 let f1 = Iterators.filter(i->i>0, 1:10)
-    @test Base.IteratorEltype(product(f1))               == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(1:2, f1))          == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, 1:2))          == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, f1))           == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(f1, countfrom(1))) == Base.HasEltype() # FIXME? eltype(f1) is Any
-    @test Base.IteratorEltype(product(countfrom(1), f1)) == Base.HasEltype() # FIXME? eltype(f1) is Any
-end
-@test Base.IteratorEltype(product(1:2, countfrom(1)))          == Base.HasEltype()
-@test Base.IteratorEltype(product(countfrom(1), 1:2))          == Base.HasEltype()
-@test Base.IteratorEltype(product(1:2))                        == Base.HasEltype()
-@test Base.IteratorEltype(product(1:2, 1:2))                   == Base.HasEltype()
-@test Base.IteratorEltype(product(take(1:2, 1), take(1:2, 1))) == Base.HasEltype()
-@test Base.IteratorEltype(product(take(1:2, 2)))               == Base.HasEltype()
-@test Base.IteratorEltype(product([1 2; 3 4]))                 == Base.HasEltype()
-@test Base.IteratorEltype(product())                           == Base.HasEltype()
+    @test (@inferred Base.IteratorEltype(product(f1)))               == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(1:2, f1)))          == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, 1:2)))          == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, f1)))           == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(f1, countfrom(1)))) == Base.HasEltype() # FIXME? eltype(f1) is Any
+    @test (@inferred Base.IteratorEltype(product(countfrom(1), f1))) == Base.HasEltype() # FIXME? eltype(f1) is Any
+end
+@test (@inferred Base.IteratorEltype(product(1:2, countfrom(1))))          == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(countfrom(1), 1:2)))          == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(1:2)))                        == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(1:2, 1:2)))                   == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(take(1:2, 1), take(1:2, 1)))) == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product(take(1:2, 2))))               == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product([1 2; 3 4])))                 == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(product()))                           == Base.HasEltype()
 
 @test collect(product(1:2,3:4)) == [(1,3) (1,4); (2,3) (2,4)]
-@test isempty(collect(product(1:0,1:2)))
-@test length(product(1:2,1:10,4:6)) == 60
-@test Base.IteratorSize(product(1:2, countfrom(1))) == Base.IsInfinite()
+@test @inferred isempty(collect(product(1:0,1:2)))
+@test (@inferred length(product(1:2,1:10,4:6))) == 60
+@test (@inferred Base.IteratorSize(product(1:2, countfrom(1)))) == Base.IsInfinite()
 
 @test Base.iterate(product()) == ((), true)
-@test Base.iterate(product(), 1) == nothing
+@test Base.iterate(product(), 1) === nothing
 
 # intersection
 @test intersect(product(1:3, 4:6), product(2:4, 3:5)) == Iterators.ProductIterator((2:3, 4:5))
@@ -492,21 +512,31 @@ end
 @test collect(flatten(Any[flatten(Any[1:2, 4:5]), flatten(Any[6:7, 8:9])])) == Any[1,2,4,5,6,7,8,9]
 @test collect(flatten(Any[flatten(Any[1:2, 6:5]), flatten(Any[6:7, 8:9])])) == Any[1,2,6,7,8,9]
 @test collect(flatten(Any[2:1])) == Any[]
-@test eltype(flatten(UnitRange{Int8}[1:2, 3:4])) == Int8
-@test length(flatten(zip(1:3, 4:6))) == 6
-@test length(flatten(1:6)) == 6
+@test (@inferred eltype(flatten(UnitRange{Int8}[1:2, 3:4]))) == Int8
+@test (@inferred eltype(flatten(([1, 2], [3.0, 4.0])))) == Real
+@test (@inferred eltype(flatten((a = [1, 2], b = Int8[3, 4])))) == Signed
+@test (@inferred eltype(flatten((Int[], Nothing[], Int[])))) == Union{Int, Nothing}
+@test (@inferred eltype(flatten((String[],)))) == String
+@test (@inferred eltype(flatten((Int[], UInt[], Int8[],)))) == Integer
+@test (@inferred eltype(flatten((; a = Int[], b = Nothing[], c = Int[])))) == Union{Int, Nothing}
+@test (@inferred eltype(flatten((; a = String[],)))) == String
+@test (@inferred eltype(flatten((; a = Int[], b = UInt[], c = Int8[],)))) == Integer
+@test (@inferred eltype(flatten(()))) == Union{}
+@test (@inferred eltype(flatten((;)))) == Union{}
+@test (@inferred length(flatten(zip(1:3, 4:6)))) == 6
+@test (@inferred length(flatten(1:6))) == 6
 @test collect(flatten(Any[])) == Any[]
 @test collect(flatten(())) == Union{}[]
 @test_throws ArgumentError length(flatten(NTuple[(1,), ()])) # #16680
 @test_throws ArgumentError length(flatten([[1], [1]]))
 
 @testset "IteratorSize trait for flatten" begin
-    @test Base.IteratorSize(Base.Flatten((i for i=1:2) for j=1:1)) == Base.SizeUnknown()
-    @test Base.IteratorSize(Base.Flatten((1,2))) == Base.HasLength()
-    @test Base.IteratorSize(Base.Flatten(1:2:4)) == Base.HasLength()
+    @test (@inferred Base.IteratorSize(Base.Flatten((i for i=1:2) for j=1:1))) == Base.SizeUnknown()
+    @test (@inferred Base.IteratorSize(Base.Flatten((1,2)))) == Base.HasLength()
+    @test (@inferred Base.IteratorSize(Base.Flatten(1:2:4))) == Base.HasLength()
 end
 
-@test Base.IteratorEltype(Base.Flatten((i for i=1:2) for j=1:1)) == Base.EltypeUnknown()
+@test (@inferred Base.IteratorEltype(Base.Flatten((i for i=1:2) for j=1:1))) == Base.EltypeUnknown()
 # see #29112, #29464, #29548
 @test Base.return_types(Base.IteratorEltype, Tuple{Array}) == [Base.HasEltype]
 
@@ -626,21 +656,21 @@ end
     @test_throws ArgumentError partition(1:10, -1)
     @test_throws ArgumentError partition(1:0, 0)
     @test_throws ArgumentError partition(1:0, -1)
-    @test isempty(partition(1:0, 1))
-    @test isempty(partition(CartesianIndices((0,1)), 1))
+    @test @inferred isempty(partition(1:0, 1))
+    @test @inferred isempty(partition(CartesianIndices((0,1)), 1))
 end
 @testset "exact partition eltypes" for a in (Base.OneTo(24), 1:24, 1:1:24, LinRange(1,10,24), .1:.1:2.4, Vector(1:24),
                                              CartesianIndices((4, 6)), Dict((1:24) .=> (1:24)))
     P = partition(a, 2)
-    @test eltype(P) === typeof(first(P))
-    @test Iterators.IteratorEltype(P) == Iterators.HasEltype()
+    @test (@inferred eltype(P)) === typeof(first(P))
+    @test (@inferred Iterators.IteratorEltype(P)) == Iterators.HasEltype()
     if a isa AbstractArray
         P = partition(vec(a), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
         P = partition(reshape(a, 6, 4), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
         P = partition(reshape(a, 2, 3, 4), 2)
-        @test eltype(P) === typeof(first(P))
+        @test (@inferred eltype(P)) === typeof(first(P))
     end
 end
 
@@ -661,19 +691,19 @@ let s = "Monkey 🙈🙊🙊"
     @test tf(1) == "M|o|n|k|e|y| |🙈|🙊|🙊"
 end
 
-@test Base.IteratorEltype(partition([1,2,3,4], 2)) == Base.HasEltype()
-@test Base.IteratorEltype(partition((2x for x in 1:3), 2)) == Base.EltypeUnknown()
+@test (@inferred Base.IteratorEltype(partition([1,2,3,4], 2))) == Base.HasEltype()
+@test (@inferred Base.IteratorEltype(partition((2x for x in 1:3), 2))) == Base.EltypeUnknown()
 
 # take and friends with arbitrary integers (#19214)
 for T in (UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int128, BigInt)
-    @test length(take(1:6, T(3))) == 3
-    @test length(drop(1:6, T(3))) == 3
-    @test length(repeated(1, T(5))) == 5
+    @test (@inferred length(take(1:6, T(3)))) == 3
+    @test (@inferred length(drop(1:6, T(3)))) == 3
+    @test (@inferred length(repeated(1, T(5)))) == 5
     @test collect(partition(1:5, T(5)))[1] == 1:5
 end
 
 @testset "collect finite iterators issue #12009" begin
-    @test eltype(collect(enumerate(Iterators.Filter(x -> x>0, randn(10))))) == Tuple{Int, Float64}
+    @test (@inferred eltype(collect(enumerate(Iterators.Filter(x -> x>0, randn(10)))))) == Tuple{Int, Float64}
 end
 
 @testset "product iterator infinite loop" begin
@@ -682,8 +712,8 @@ end
 
 @testset "filter empty iterable #16704" begin
     arr = filter(Returns(true), 1:0)
-    @test length(arr) == 0
-    @test eltype(arr) == Int
+    @test (@inferred length(arr)) == 0
+    @test (@inferred eltype(arr)) == Int
 end
 
 @testset "Pairs type" begin
@@ -697,19 +727,19 @@ end
              )
         d = pairs(A)
         @test d === pairs(d)
-        @test isempty(d) == isempty(A)
-        @test length(d) == length(A)
+        @test (@inferred isempty(d)) == isempty(A)
+        @test (@inferred length(d)) == length(A)
         @test keys(d) == keys(A)
         @test values(d) == A
-        @test Base.IteratorSize(d) == Base.IteratorSize(A)
-        @test Base.IteratorEltype(d) == Base.HasEltype()
-        @test Base.IteratorSize(pairs([1 2;3 4])) isa Base.HasShape{2}
-        @test isempty(d) || haskey(d, first(keys(d)))
+        @test (@inferred Base.IteratorSize(d)) == Base.IteratorSize(A)
+        @test (@inferred Base.IteratorEltype(d)) == Base.HasEltype()
+        @test (@inferred Base.IteratorSize(pairs([1 2;3 4]))) isa Base.HasShape{2}
+        @test (@inferred isempty(d)) || haskey(d, first(keys(d)))
         @test collect(v for (k, v) in d) == collect(A)
         if A isa NamedTuple
             K = Symbol
             V = isempty(d) ? Union{} : Float64
-            @test isempty(d) || haskey(d, :a)
+            @test (@inferred isempty(d)) || haskey(d, :a)
             @test !haskey(d, :abc)
             @test !haskey(d, 1)
             @test get(A, :key) do; 99; end == 99
@@ -729,7 +759,7 @@ end
         end
         @test keytype(d) == K
         @test valtype(d) == V
-        @test eltype(d) == Pair{K, V}
+        @test (@inferred eltype(d)) == Pair{K, V}
     end
 
     let io = IOBuffer()
@@ -776,7 +806,7 @@ end
 
 @testset "Iterators.Stateful" begin
     let a = @inferred(Iterators.Stateful("abcdef"))
-        @test !isempty(a)
+        @test !(@inferred isempty(a))
         @test popfirst!(a) == 'a'
         @test collect(Iterators.take(a, 3)) == ['b','c','d']
         @test collect(a) == ['e', 'f']
@@ -787,70 +817,72 @@ end
         @test peek(a) == 3
         @test sum(a) == 7
     end
-    @test eltype(Iterators.Stateful("a")) == Char
+    @test (@inferred eltype(Iterators.Stateful("a"))) == Char
     # Interaction of zip/Stateful
     let a = Iterators.Stateful("a"), b = ""
-    @test isempty(collect(zip(a,b)))
-    @test !isempty(a)
-    @test isempty(collect(zip(b,a)))
-    @test !isempty(a)
+    @test @inferred isempty(collect(zip(a,b)))
+    @test !(@inferred isempty(a))
+    @test @inferred isempty(collect(zip(b,a)))
+    @test !(@inferred isempty(a))
     end
     let a = Iterators.Stateful("a"), b = "", c = Iterators.Stateful("c")
-        @test isempty(collect(zip(a,b,c)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(a,c,b)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(b,a,c)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(b,c,a)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(c,a,b)))
-        @test !isempty(a)
-        @test !isempty(c)
-        @test isempty(collect(zip(c,b,a)))
-        @test !isempty(a)
-        @test !isempty(c)
+        @test @inferred isempty(collect(zip(a,b,c)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(a,c,b)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(b,a,c)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(b,c,a)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(c,a,b)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
+        @test @inferred isempty(collect(zip(c,b,a)))
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(a,b,c))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(a,b,c)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(a,c,b))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(a,c,b)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(b,a,c))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(b,a,c)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(b,c,a))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(b,c,a)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(c,a,b))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(c,a,b)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let a = Iterators.Stateful("aa"), b = "b", c = Iterators.Stateful("cc")
-        @test length(collect(zip(c,b,a))) == 1
-        @test !isempty(a)
-        @test !isempty(c)
+        @test (@inferred length(collect(zip(c,b,a)))) == 1
+        @test !(@inferred isempty(a))
+        @test !(@inferred isempty(c))
     end
     let z = zip(Iterators.Stateful("ab"), Iterators.Stateful("b"), Iterators.Stateful("c"))
         v, s = iterate(z)
         @test Base.isdone(z, s)
     end
-    # Stateful wrapping mutable iterators of known length (#43245)
-    @test length(Iterators.Stateful(Iterators.Stateful(1:5))) == 5
+    # Stateful does not define length
+    let s = Iterators.Stateful(Iterators.Stateful(1:5))
+        @test_throws MethodError length(s)
+    end
 end
 
 @testset "pair for Svec" begin
@@ -862,6 +894,10 @@ end
 @testset "inference for large zip #26765" begin
     x = zip(1:2, ["a", "b"], (1.0, 2.0), Base.OneTo(2), Iterators.repeated("a"), 1.0:0.2:2.0,
             (1 for i in 1:2), Iterators.Stateful(["a", "b", "c"]), (1.0 for i in 1:2, j in 1:3))
+    @test (@inferred Base.IteratorSize(x)) isa Base.SizeUnknown
+    x = zip(1:2, ["a", "b"], (1.0, 2.0), Base.OneTo(2), Iterators.repeated("a"), 1.0:0.2:2.0,
+            (1 for i in 1:2), Iterators.cycle(Iterators.Stateful(["a", "b", "c"])), (1.0 for i in 1:2, j in 1:3))
+    @test (@inferred Base.IteratorSize(x)) isa Base.HasLength
     @test @inferred(length(x)) == 2
     z = Iterators.filter(x -> x[1] >= 1, x)
     @test @inferred(eltype(z)) <: Tuple{Int,String,Float64,Int,String,Float64,Any,String,Any}
@@ -870,23 +906,23 @@ end
 end
 
 @testset "Stateful fix #30643" begin
-    @test Base.IteratorSize(1:10) isa Base.HasShape
+    @test (@inferred Base.IteratorSize(1:10)) isa Base.HasShape{1}
     a = Iterators.Stateful(1:10)
-    @test Base.IteratorSize(a) isa Base.HasLength
-    @test length(a) == 10
+    @test (@inferred Base.IteratorSize(a)) isa Base.SizeUnknown
+    @test !Base.isdone(a)
     @test length(collect(a)) == 10
-    @test length(a) == 0
+    @test Base.isdone(a)
     b = Iterators.Stateful(Iterators.take(1:10,3))
-    @test Base.IteratorSize(b) isa Base.HasLength
-    @test length(b) == 3
+    @test (@inferred Base.IteratorSize(b)) isa Base.SizeUnknown
+    @test !Base.isdone(b)
     @test length(collect(b)) == 3
-    @test length(b) == 0
+    @test Base.isdone(b)
     c = Iterators.Stateful(Iterators.countfrom(1))
-    @test Base.IteratorSize(c) isa Base.IsInfinite
-    @test length(Iterators.take(c,3)) == 3
+    @test (@inferred Base.IteratorSize(c)) isa Base.IsInfinite
+    @test !Base.isdone(Iterators.take(c,3))
     @test length(collect(Iterators.take(c,3))) == 3
     d = Iterators.Stateful(Iterators.filter(isodd,1:10))
-    @test Base.IteratorSize(d) isa Base.SizeUnknown
+    @test (@inferred Base.IteratorSize(d)) isa Base.SizeUnknown
     @test length(collect(Iterators.take(d,3))) == 3
     @test length(collect(d)) == 2
     @test length(collect(d)) == 0
@@ -928,7 +964,7 @@ end
 end
 
 @testset "flatten empty tuple" begin
-    @test isempty(collect(Iterators.flatten(())))
+    @test @inferred isempty(collect(Iterators.flatten(())))
 end
 
 @testset "Iterators.accumulate" begin
@@ -940,10 +976,10 @@ end
     @test collect(Iterators.accumulate(+, (x for x in [true])))::Vector{Int} == [1]
     @test collect(Iterators.accumulate(+, (x for x in [true, true, false])))::Vector{Int} == [1, 2, 2]
     @test collect(Iterators.accumulate(+, (x for x in [true]), init=10.0))::Vector{Float64} == [11.0]
-    @test length(Iterators.accumulate(+, [10,20,30])) == 3
-    @test size(Iterators.accumulate(max, rand(2,3))) == (2,3)
-    @test Base.IteratorSize(Iterators.accumulate(max, rand(2,3))) === Base.IteratorSize(rand(2,3))
-    @test Base.IteratorEltype(Iterators.accumulate(*, ())) isa Base.EltypeUnknown
+    @test (@inferred length(Iterators.accumulate(+, [10,20,30]))) == 3
+    @test (@inferred size(Iterators.accumulate(max, rand(2,3)))) == (2,3)
+    @test (@inferred Base.IteratorSize(Iterators.accumulate(max, rand(2,3)))) === Base.IteratorSize(rand(2,3))
+    @test (@inferred Base.IteratorEltype(Iterators.accumulate(*, ()))) isa Base.EltypeUnknown
 end
 
 @testset "Base.accumulate" begin
@@ -952,21 +988,14 @@ end
     @test accumulate(+, (x^2 for x in 1:3); init=100) == [101, 105, 114]
 end
 
-
-@testset "Iterators.tail_if_any" begin
-    @test Iterators.tail_if_any(()) == ()
-    @test Iterators.tail_if_any((1, 2)) == (2,)
-    @test Iterators.tail_if_any((1,)) == ()
-end
-
 @testset "IteratorSize trait for zip" begin
-    @test Base.IteratorSize(zip()) == Base.IsInfinite()                     # for zip of empty tuple
-    @test Base.IteratorSize(zip((1,2,3), repeated(0))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
-    @test Base.IteratorSize(zip( 1:5, repeated(0) )) == Base.HasLength()    # for zip of ::HasShape and ::IsInfinite
-    @test Base.IteratorSize(zip(repeated(0), (1,2,3))) == Base.HasLength()  # for zip of ::IsInfinite and ::HasLength
-    @test Base.IteratorSize(zip(repeated(0), 1:5 )) == Base.HasLength()     # for zip of ::IsInfinite and ::HasShape
-    @test Base.IteratorSize(zip((1,2,3), 1:5) ) == Base.HasLength()         # for zip of ::HasLength and ::HasShape
-    @test Base.IteratorSize(zip(1:5, (1,2,3)) ) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
+    @test (@inferred Base.IteratorSize(zip())) == Base.IsInfinite()                     # for zip of empty tuple
+    @test (@inferred Base.IteratorSize(zip((1,2,3), repeated(0)))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
+    @test (@inferred Base.IteratorSize(zip( 1:5, repeated(0) ))) == Base.HasLength()    # for zip of ::HasShape and ::IsInfinite
+    @test (@inferred Base.IteratorSize(zip(repeated(0), (1,2,3)))) == Base.HasLength()  # for zip of ::IsInfinite and ::HasLength
+    @test (@inferred Base.IteratorSize(zip(repeated(0), 1:5 ))) == Base.HasLength()     # for zip of ::IsInfinite and ::HasShape
+    @test (@inferred Base.IteratorSize(zip((1,2,3), 1:5) )) == Base.HasLength()         # for zip of ::HasLength and ::HasShape
+    @test (@inferred Base.IteratorSize(zip(1:5, (1,2,3)) )) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
 end
 
 @testset "proper partition for non-1-indexed vector" begin
@@ -974,7 +1003,7 @@ end
 end
 
 @testset "Iterators.peel" begin
-    @test Iterators.peel([]) == nothing
+    @test Iterators.peel([]) === nothing
     @test Iterators.peel(1:10)[1] == 1
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
@@ -989,7 +1018,7 @@ end
 @testset "isempty and isdone for Generators" begin
     itr = eachline(IOBuffer("foo\n"))
     gen = (x for x in itr)
-    @test !isempty(gen)
+    @test !(@inferred isempty(gen))
     @test !Base.isdone(gen)
     @test collect(gen) == ["foo"]
 end
@@ -1001,3 +1030,58 @@ end
     end
     @test v == ()
 end
+
+@testset "collect partition substring" begin
+    @test collect(Iterators.partition(lstrip("01111", '0'), 2)) == ["11", "11"]
+end
+
+@testset "IterableStringPairs" begin
+    for s in ["", "a", "abcde", "γ", "∋γa"]
+        for T in (String, SubString, GenericString)
+            sT = T(s)
+            p = pairs(sT)
+            @test collect(p) == [k=>v for (k,v) in zip(keys(sT), sT)]
+            rv = Iterators.reverse(p)
+            @test collect(rv) == reverse([k=>v for (k,v) in zip(keys(sT), sT)])
+            rrv = Iterators.reverse(rv)
+            @test collect(rrv) == collect(p)
+        end
+    end
+end
+
+let itr = (i for i in 1:9) # Base.eltype == Any
+    @test first(Iterators.partition(itr, 3)) isa Vector{Any}
+    @test collect(zip(repeat([Iterators.Stateful(itr)], 3)...)) == [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
+end
+
+@testset "map/reduce/mapreduce without an iterator argument" begin
+    maps = map(Returns, (nothing, 3, 3:2, 3:3, (), (3,)))
+    mappers1 = (Iterators.map, map, foreach, reduce, foldl, foldr)
+    mappers2 = (mapreduce, mapfoldl, mapfoldr)
+
+    @testset "map/reduce" begin
+        @testset "r: $r" for r ∈ mappers1
+            @testset "f: $f" for f ∈ maps
+                @test_throws MethodError r(f)
+                @test !applicable(r, f)
+                @test !hasmethod(r, Tuple{typeof(f)})
+            end
+        end
+    end
+
+    @testset "mapreduce" begin
+        @testset "mr: $mr" for mr ∈ mappers2
+            @testset "f: $f" for f ∈ maps
+                @testset "g: $g" for g ∈ maps
+                    @test_throws MethodError mr(f, g)
+                    @test !applicable(mr, f, g)
+                    @test !hasmethod(mr, Tuple{typeof(f),typeof(g)})
+                end
+            end
+        end
+    end
+end
+
+@testset "Iterators docstrings" begin
+    @test isempty(Docs.undocumented_names(Iterators))
+end
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index 0aed0544b7e2e..43013ab1d721e 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -394,3 +394,9 @@ let m = first(methods(Core.kwcall, (NamedTuple,typeof(kwf1),Vararg)))
     @test Core.kwcall(1) == "hi 1"
     @test which(Core.kwcall, (Int,)).name === :kwcall
 end
+
+# issue #50518
+function f50518(xs...=["a", "b", "c"]...; debug=false)
+    return xs[1]
+end
+@test f50518() == f50518(;debug=false) == "a"
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index f7f6b44b29e62..ddf66ca680d45 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -70,13 +70,13 @@ end
        ret i32 %3""", Int32, Tuple{Int32, Int32},
         Int32(1), Int32(2))) # llvmcall must be compiled to be called
 
-# Test whether declarations work properly
+#Since LLVM 18, LLVM does a best effort to automatically include the intrinsics
 function undeclared_ceil(x::Float64)
     llvmcall("""%2 = call double @llvm.ceil.f64(double %0)
         ret double %2""", Float64, Tuple{Float64}, x)
 end
-@test_throws ErrorException undeclared_ceil(4.2)
-@test_throws ErrorException undeclared_ceil(4.2)
+@test undeclared_ceil(4.2) == 5.0
+@test undeclared_ceil(4.2) == 5.0
 
 function declared_floor(x::Float64)
     llvmcall(
@@ -147,13 +147,17 @@ module ObjLoadTest
     using Base: llvmcall, @ccallable
     using Test
     didcall = false
+    """    jl_the_callback()
+
+    Sets the global didcall when it did the call
+    """
     @ccallable Cvoid function jl_the_callback()
         global didcall
         didcall = true
         nothing
     end
     @test_throws(ErrorException("@ccallable was already defined for this method name"),
-                 @eval @ccallable Cvoid jl_the_callback(not_the_method::Int) = "other")
+            @eval @ccallable String jl_the_callback(not_the_method::Int) = "other")
     # Make sure everything up until here gets compiled
     @test jl_the_callback() === nothing
     @test jl_the_callback(1) == "other"
diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl
index 07b27fc407433..e3e89bb916f2d 100644
--- a/test/llvmcall2.jl
+++ b/test/llvmcall2.jl
@@ -73,3 +73,12 @@ end
     jl_str = unsafe_string(str)
     @test length(jl_str) > 4
 end
+
+
+# boolean structs
+const NT4I = NTuple{4, VecElement{Int}}
+const NT4B = NTuple{4, VecElement{Bool}}
+f_nt4b(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4B, NT4B}, (NT4B, NT4B), x, y)
+f_nt4i(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4I, NT4B}, (NT4I, NT4I), x, y)
+@test f_nt4b((false, true, false, true), (false, false, true, true)) === (NT4B((false, true, true, false)) => NT4B((false, false, false, true)))
+@test f_nt4i((typemin(Int), 0, typemax(Int), typemax(Int)), (-1, typemax(Int),-1, 1)) === (NT4I((typemax(Int), typemax(Int), typemax(Int)-1, typemin(Int))) => NT4B((true, false, false, true)))
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index 7318d1b67da02..d9fdfa190f3cf 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -30,4 +30,7 @@ update-help:
 	$(JULIAHOME)/deps/srccache/llvm/llvm/utils/update_test_checks.py \
 	--help
 
-.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all .
+clean:
+	rm -rf .lit_test_times.txt Output
+
+.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all clean update-help .
diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll
new file mode 100644
index 0000000000000..e19093f46f815
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-bits.ll
@@ -0,0 +1,37 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s 
+
+
+@tag = external addrspace(10) global {}
+
+@glob = external addrspace(10) global {}
+
+; Test that the gc_preserve intrinsics are deleted directly.
+
+; CHECK-LABEL: @ptr_and_bits
+; CHECK-NOT: alloca 
+; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj
+
+define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag)
+  
+  %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1
+  store ptr addrspace(10) @glob, ptr addrspace(10) %g0
+  
+  %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0
+  store i64 7, ptr addrspace(10) %g1
+
+  %res = load ptr addrspace(10), ptr addrspace(10) %g0
+  %res2 = load i64, ptr addrspace(10) %g1
+  ret void
+}
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
index b33f2cdac7dd4..b96c9385e38eb 100644
--- a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
+++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -19,15 +15,10 @@ declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 ; Test that non-0 addrspace allocas are properly emitted and handled
 
 ; CHECK-LABEL: @non_zero_addrspace
-; CHECK: %1 = alloca i32, align 8, addrspace(5)
-
-; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)*
-; TYPED: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)*
-; TYPED: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)*
-; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2)
 
-; OPAQUE: %var1 = addrspacecast ptr addrspace(5) %1 to ptr addrspace(10)
-; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %1)
+; OPAQUE: %var1 = alloca i32, align 8, addrspace(5)
+; OPAQUE: %1 = addrspacecast ptr addrspace(5) %var1 to ptr
+; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %var1)
 
 ; CHECK: ret void
 define void @non_zero_addrspace() {
diff --git a/test/llvmpasses/alloc-opt-gcframe.ll b/test/llvmpasses/alloc-opt-gcframe.ll
index a04d6566cec0a..f53a4d5c01df7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.ll
+++ b/test/llvmpasses/alloc-opt-gcframe.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -13,18 +9,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-LABEL: @return_obj
 ; CHECK-NOT: @julia.gc_alloc_obj
 
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16)
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-
 ; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
 ; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16)
+; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 
 define {} addrspace(10)* @return_obj() {
@@ -39,8 +27,7 @@ define {} addrspace(10)* @return_obj() {
 ; CHECK-LABEL: @return_load
 ; CHECK: alloca i64
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; CHECK-NOT: @tag
 ; CHECK-NOT: @llvm.lifetime.end
@@ -59,11 +46,9 @@ define i64 @return_load(i64 %i) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_obj
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -78,17 +63,12 @@ define void @ccall_obj(i8* %fptr) {
 
 ; CHECK-LABEL: @ccall_ptr
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; Currently the GC frame lowering pass strips away all operand bundles
-; TYPED-NEXT: call void %f(i8*
 ; OPAQUE-NEXT: call void %f(ptr
 ; CHECK-NEXT: ret void
 define void @ccall_ptr(i8* %fptr) {
@@ -106,11 +86,9 @@ define void @ccall_ptr(i8* %fptr) {
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @ccall_unknown_bundle
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
-; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -128,13 +106,10 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 
 ; CHECK-LABEL: @lifetime_branches
 ; CHECK: alloca i64
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
 
-; TYPED: %f = bitcast i8* %fptr to void (i8*)*
-; TYPED-NEXT: call void %f(i8*
 
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; OPAQUE-NEXT: call void %f(ptr
@@ -142,10 +117,8 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
 
 ; CHECK: L2:
-; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
 ; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr
 ; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-; TYPED-NEXT: call void %f2({}* null)
 ; OPAQUE-NEXT: call void %f2(ptr null)
 
 ; CHECK: L3:
@@ -176,10 +149,9 @@ L3:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @object_field
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -195,11 +167,9 @@ define void @object_field({} addrspace(10)* %field) {
 
 ; CHECK-LABEL: @memcpy_opt
 ; CHECK: alloca [16 x i8], align 16
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
-; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.memcpy.p0.p0.i64
 define void @memcpy_opt(i8* %v22) {
 top:
@@ -215,10 +185,9 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_opt
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: @llvm.lifetime.end
 ; CHECK: @external_function
 define void @preserve_opt(i8* %v22) {
@@ -238,7 +207,6 @@ top:
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.get_pgcstack()
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK: L1:
 ; CHECK-NEXT: @external_function()
@@ -270,10 +238,7 @@ L3:
 }
 ; CHECK-LABEL: }{{$}}
 
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*,
-; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*,
-
-; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_small_alloc(ptr,
 ; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr,
 declare void @external_function()
 declare {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll
index b7e0647263caa..665687e86835d 100644
--- a/test/llvmpasses/alloc-opt-pass.ll
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -1,17 +1,12 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}
 
 ; Test that the gc_preserve intrinsics are deleted directly.
 
 ; CHECK-LABEL: @preserve_branches
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
 ; CHECK-NOT: @llvm.julia.gc_preserve_begin
@@ -23,32 +18,30 @@
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
+define void @preserve_branches(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %v)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) nonnull %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @preserve_branches2
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
 ; CHECK: L1:
-; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
 ; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2
 ; CHECK-NEXT: @external_function()
 ; CHECK-NEXT: br i1 %b2, label %L2, label %L3
@@ -58,24 +51,24 @@ L3:
 ; CHECK-NEXT: br label %L3
 
 ; CHECK: L3:
-define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v2 = call {} addrspace(10)* @external_function2()
+define void @preserve_branches2(ptr %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v2 = call ptr addrspace(10) @external_function2()
   br i1 %b, label %L1, label %L3
 
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* nonnull %v2)
+L1:                                               ; preds = %0
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v, ptr addrspace(10) nonnull %v2)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
 
-L2:
+L2:                                               ; preds = %L1
   call void @external_function()
   br label %L3
 
-L3:
+L3:                                               ; preds = %L2, %L1, %0
   ret void
 }
 ; CHECK-LABEL: }{{$}}
@@ -83,82 +76,141 @@ L3:
 ; CHECK-LABEL: @legal_int_types
 ; CHECK: alloca [12 x i8]
 ; CHECK-NOT: alloca i96
+; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %var1,
 ; CHECK: ret void
 define void @legal_int_types() {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 12, {} addrspace(10)* @tag)
-  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
-  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 12, ptr addrspace(10) @tag)
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
   ret void
 }
 ; CHECK-LABEL: }{{$}}
 
-
 declare void @external_function()
-declare {} addrspace(10)* @external_function2()
-declare {}*** @julia.ptls_states()
-declare {}*** @julia.get_pgcstack()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
-declare {}* @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+declare ptr addrspace(10) @external_function2()
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.pointer_from_objref(ptr addrspace(11))
+
 declare token @llvm.julia.gc_preserve_begin(...)
+
 declare void @llvm.julia.gc_preserve_end(token)
 
 ; CHECK-LABEL: @memref_collision
-; TYPED: call {}*** @julia.ptls_states()
 ; OPAQUE: call ptr @julia.ptls_states()
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: store i
-; TYPED-NOT: store {}
 ; OPAQUE-NOT: store ptr
 ; CHECK: L1:
-; TYPED: load {}
 ; OPAQUE: load ptr
 ; CHECK: L2:
 ; CHECK: load i
 define void @memref_collision(i64 %x) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %v_p = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  store i64 %x, i64 addrspace(10)* %v_p
-  br i1 0, label %L1, label %L2
-
-L1:
-  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %v_p = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  store i64 %x, ptr addrspace(10) %v_p, align 4
+  br i1 false, label %L1, label %L2
+
+L1:                                               ; preds = %0
+  %v1 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v1_x = load ptr addrspace(10), ptr addrspace(10) %v1, align 8
   ret void
 
-L2:
-  %v2 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  %v2_x = load i64, i64 addrspace(10)* %v2
+L2:                                               ; preds = %0
+  %v2 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+  %v2_x = load i64, ptr addrspace(10) %v2, align 4
   ret void
 }
+
 ; CHECK-LABEL: }{{$}}
 
 ; CHECK-LABEL: @lifetime_no_preserve_end
 ; CHECK: alloca
 ; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin
 ; CHECK: call void @llvm.lifetime.start
+; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %v,
 ; CHECK-NOT: call void @llvm.lifetime.end
-define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
-  %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived)
-  %ptr_raw = bitcast {}* %ptr to i8*
-  call void @external_function() ; safepoint
-  %ret_raw = bitcast {}* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false)
-  %ret_raw2 = bitcast {}* %0 to i8*
+define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 8, ptr addrspace(10) @tag)
+  %token = call token (...) @llvm.julia.gc_preserve_begin(ptr addrspace(10) %v)
+  %v_derived = addrspacecast ptr addrspace(10) %v to ptr addrspace(11)
+  %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %v_derived)
+  %ptr_raw = bitcast ptr %ptr to ptr
+  call void @external_function()
+  %ret_raw = bitcast ptr %0 to ptr
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %ret_raw, ptr align 8 %ptr_raw, i64 0, i1 false)
+  %ret_raw2 = bitcast ptr %0 to ptr
   ret void
 }
 ; CHECK-LABEL: }{{$}}
+
+
+; CHECK-LABEL: @initializers
+; CHECK: alloca [1 x i8]
+; CHECK-DAG: alloca [2 x i8]
+; CHECK-DAG: alloca [3 x i8]
+; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 1 %var1,
+; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 4 %var7,
+; CHECK: ret void
+define void @initializers() {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
+  %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
+  %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
+  %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
+  %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
+  %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
+  %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
+  %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
+  %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+; Test that the pass handles dead basic blocks with references to the allocation
+; CHECK-LABEL: @nopreds
+; CHECK: alloca i8, i64 0, align 1
+; CHECK: call void @llvm.lifetime.start
+define swiftcc { ptr addrspace(10), i8 } @nopreds() {
+top:
+  %0 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr null, i64 0, ptr addrspace(10) null)
+  %1 = addrspacecast ptr addrspace(10) %0 to ptr addrspace(11)
+  br label %common.ret
+
+common.ret:                                       ; preds = %union_move9, %top
+  ret { ptr addrspace(10), i8 } zeroinitializer
+
+union_move9:                                      ; No predecessors!
+  call void @llvm.memcpy.p0.p11.i64(ptr null, ptr addrspace(11) %1, i64 0, i1 false)
+  br label %common.ret
+}
+; CHECK-LABEL: }{{$}}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+attributes #1 = { allockind("alloc") }
+attributes #2 = { allockind("alloc,uninitialized") }
+attributes #3 = { allockind("alloc,zeroed") }
diff --git a/test/llvmpasses/alloc-opt-pipeline.jl b/test/llvmpasses/alloc-opt-pipeline.jl
index 9437913e4054b..e84348ec4a8c6 100644
--- a/test/llvmpasses/alloc-opt-pipeline.jl
+++ b/test/llvmpasses/alloc-opt-pipeline.jl
@@ -17,7 +17,7 @@ end
 
 # CHECK-LABEL: @julia_haszerolayout
 # CHECK: top:
-# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: @jl_gc_small_alloc
 # CHECK: extractelement
 # CHECK: ret i8
 emit(haszerolayout, NTuple{32,VecElement{UInt8}})
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
index d3868548a00d7..d435ab1490cfc 100644
--- a/test/llvmpasses/alloc-opt-unsized.ll
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
 
 source_filename = "text"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
@@ -31,15 +27,6 @@ entry:
   ret void
 }
 
-; TYPED:   %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
-; TYPED:   %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
-; TYPED:   %i18 = bitcast i8* %[[i1]] to {}*
-; TYPED:   %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
-; TYPED:   %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
-; TYPED:   store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
-; TYPED:   %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
-; TYPED:   %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8
-
 ; OPAQUE:   %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16
 ; OPAQUE:   %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i
 ; OPAQUE:   store ptr addrspace(10) %arg, ptr %i23, align 8
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
index eea3d1b288204..073597fbcdc66 100644
--- a/test/llvmpasses/cpu-features.ll
+++ b/test/llvmpasses/cpu-features.ll
@@ -1,10 +1,8 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
 
 declare i1 @julia.cpu.have_fma.f64()
 declare double @with_fma(double %0, double %1, double %2)
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index 7338d1c3ccc5a..3c4c1d491ec28 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
 
 ## Notes:
@@ -14,21 +14,5 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 
 import Base.FastMath
 
-# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}})
+# CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
-
-
-# Float16 operations should be performed as Float32, unless @fastmath is specified
-# TODO: this is not true for platforms that natively support Float16
-
-foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub half %0, %1
-# FINAL: %2 = fpext half %0 to float
-# FINAL: %3 = fpext half %1 to float
-# FINAL: fsub half %2, %3
-emit(foo, Float16, Float16)
-
-@fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub fast half %0, %1
-# FINAL: fsub fast half %0, %1
-emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll
index d3cdea7454972..db80188fc206d 100644
--- a/test/llvmpasses/final-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/final-lower-gc-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -27,11 +23,8 @@ attributes #0 = { allocsize(1) }
 define void @gc_frame_addrspace(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_addrspace
-; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5)
 ; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5)
-; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)**
 ; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr
-; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8*
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
   %pgcstack = call {}*** @julia.get_pgcstack()
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 5bbaa2f4d81ea..f8e123fdc6aea 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 @tag = external addrspace(10) global {}
@@ -18,28 +14,17 @@ declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
 declare void @julia.push_gc_frame({} addrspace(10)**, i32)
 declare {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)**, i32)
 declare void @julia.pop_gc_frame({} addrspace(10)**)
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #0
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64, i64) #0
 
 attributes #0 = { allocsize(1) }
 
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
 ; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack()
   %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
-; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
-; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
-; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
-; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0
-; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
-; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
 
 ; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0
 ; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0
@@ -49,22 +34,15 @@ top:
 ; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
   %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
-; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
 ; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
   %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
-; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
 ; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
-; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
 ; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed)
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
-; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
-; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
 
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1
 ; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
@@ -80,9 +58,8 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; TYPED: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc
-; OPAQUE: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc
-  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
   store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
@@ -95,10 +72,8 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %0 = add i64 %size, 8
-; TYPED: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null)
-; OPAQUE: %v = call noalias nonnull dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, ptr null)
-  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %size, i64 12341234)
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
   store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
index ab1425ec12fa5..d1dfb6aca11dd 100644
--- a/test/llvmpasses/float16.ll
+++ b/test/llvmpasses/float16.ll
@@ -1,13 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0  -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+; RUN: opt  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1  -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
-
-define half @demotehalf_test(half %a, half %b) #0 {
+define half @demote_half_test(half %a, half %b) #0 {
 top:
-; CHECK-LABEL: @demotehalf_test(
+; CHECK-LABEL: @demote_half_test(
 ; CHECK-NEXT:  top:
 ; CHECK-NEXT:    %0 = fpext half %a to float
 ; CHECK-NEXT:    %1 = fpext half %b to float
@@ -103,5 +99,131 @@ top:
   ret half %13
 }
 
-attributes #0 = { "target-features"="-avx512fp16" }
-attributes #1 = { "target-features"="+avx512fp16" }
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 {
+top:
+; CHECK-LABEL: @demote_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fpext bfloat %a to float
+; CHECK-NEXT:    %1 = fpext bfloat %b to float
+; CHECK-NEXT:    %2 = fadd float %0, %1
+; CHECK-NEXT:    %3 = fptrunc float %2 to bfloat
+; CHECK-NEXT:    %4 = fpext bfloat %3 to float
+; CHECK-NEXT:    %5 = fpext bfloat %b to float
+; CHECK-NEXT:    %6 = fadd float %4, %5
+; CHECK-NEXT:    %7 = fptrunc float %6 to bfloat
+; CHECK-NEXT:    %8 = fpext bfloat %7 to float
+; CHECK-NEXT:    %9 = fpext bfloat %b to float
+; CHECK-NEXT:    %10 = fadd float %8, %9
+; CHECK-NEXT:    %11 = fptrunc float %10 to bfloat
+; CHECK-NEXT:    %12 = fpext bfloat %11 to float
+; CHECK-NEXT:    %13 = fpext bfloat %b to float
+; CHECK-NEXT:    %14 = fmul float %12, %13
+; CHECK-NEXT:    %15 = fptrunc float %14 to bfloat
+; CHECK-NEXT:    %16 = fpext bfloat %15 to float
+; CHECK-NEXT:    %17 = fpext bfloat %b to float
+; CHECK-NEXT:    %18 = fdiv float %16, %17
+; CHECK-NEXT:    %19 = fptrunc float %18 to bfloat
+; CHECK-NEXT:    %20 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %21 = insertelement <2 x bfloat> %20, bfloat %b, i32 1
+; CHECK-NEXT:    %22 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %23 = insertelement <2 x bfloat> %22, bfloat %b, i32 1
+; CHECK-NEXT:    %24 = fpext <2 x bfloat> %21 to <2 x float>
+; CHECK-NEXT:    %25 = fpext <2 x bfloat> %23 to <2 x float>
+; CHECK-NEXT:    %26 = fadd <2 x float> %24, %25
+; CHECK-NEXT:    %27 = fptrunc <2 x float> %26 to <2 x bfloat>
+; CHECK-NEXT:    %28 = extractelement <2 x bfloat> %27, i32 0
+; CHECK-NEXT:    %29 = extractelement <2 x bfloat> %27, i32 1
+; CHECK-NEXT:    %30 = fpext bfloat %28 to float
+; CHECK-NEXT:    %31 = fpext bfloat %29 to float
+; CHECK-NEXT:    %32 = fadd float %30, %31
+; CHECK-NEXT:    %33 = fptrunc float %32 to bfloat
+; CHECK-NEXT:    %34 = fpext bfloat %33 to float
+; CHECK-NEXT:    %35 = fpext bfloat %19 to float
+; CHECK-NEXT:    %36 = fadd float %34, %35
+; CHECK-NEXT:    %37 = fptrunc float %36 to bfloat
+; CHECK-NEXT:    ret bfloat %37
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 {
+top:
+; CHECK-LABEL: @native_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fadd bfloat %a, %b
+; CHECK-NEXT:    %1 = fadd bfloat %0, %b
+; CHECK-NEXT:    %2 = fadd bfloat %1, %b
+; CHECK-NEXT:    %3 = fmul bfloat %2, %b
+; CHECK-NEXT:    %4 = fdiv bfloat %3, %b
+; CHECK-NEXT:    %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+; CHECK-NEXT:    %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+; CHECK-NEXT:    %9 = fadd <2 x bfloat> %6, %8
+; CHECK-NEXT:    %10 = extractelement <2 x bfloat> %9, i32 0
+; CHECK-NEXT:    %11 = extractelement <2 x bfloat> %9, i32 1
+; CHECK-NEXT:    %12 = fadd bfloat %10, %11
+; CHECK-NEXT:    %13 = fadd bfloat %12, %4
+; CHECK-NEXT:    ret bfloat %13
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define i1 @fast_half_test(half %0, half %1) #0 {
+top:
+; CHECK-LABEL: @fast_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast half %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq half %2, 0xH0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast half %0, %1
+  %3 = fcmp fast oeq half %2, 0xH0000
+  ret i1 %3
+}
+
+define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 {
+top:
+; CHECK-LABEL: @fast_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast bfloat %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq bfloat %2, 0xR0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast bfloat %0, %1
+  %3 = fcmp fast oeq bfloat %2, 0xR0000
+  ret i1 %3
+}
+
+attributes #0 = { "julia.hasfp16"="false" }
+attributes #1 = { "julia.hasfp16"="true" }
+attributes #2 = { "julia.hasbf16"="false" }
+attributes #3 = { "julia.hasbf16"="true" }
diff --git a/test/llvmpasses/gc-invariant-verifier.ll b/test/llvmpasses/gc-invariant-verifier.ll
new file mode 100644
index 0000000000000..652fabc742aad
--- /dev/null
+++ b/test/llvmpasses/gc-invariant-verifier.ll
@@ -0,0 +1,13 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(GCInvariantVerifier)' -S %s | FileCheck %s
+
+; CHECK-LABEL: @vectorized_addrspacecast
+define ptr addrspace(10) @vectorized_addrspacecast() {
+top:
+  ret ptr addrspace(10) null
+
+vector.ph:
+  %0 = addrspacecast <4 x ptr addrspace(10)> zeroinitializer to <4 x ptr addrspace(11)>
+  unreachable
+}
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 7d29a9e3b1f9e..9f9282cd3c870 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -19,13 +15,9 @@ top:
 ; CHECK-LABEL: @simple
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
-; TYPED: call {} addrspace(10)* @jl_box_int64
 ; OPAQUE: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
@@ -33,9 +25,6 @@ top:
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
 
-; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]
@@ -51,7 +40,6 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; If this pass encounters an alloca, it'll just sink it into the gcframe,
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
-; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
 ; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -70,12 +58,8 @@ define void @simple_union() {
 ; CHECK-LABEL: @simple_union
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0
@@ -101,7 +85,6 @@ define void @select_simple(i64 %a, i64 %b) {
 define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -115,8 +98,6 @@ blabel:
     br label %common
 common:
     %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
-; TYPED:  [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]]
 
 ; OPAQUE:  [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP]]
@@ -128,7 +109,6 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -137,7 +117,6 @@ define void @select_lift(i64 %a, i64 %b) {
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     %cmp = icmp eq i64 %a, %b
-; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed
 ; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed
     %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed
     call void @one_arg_decayed(i64 addrspace(12)* %selectb)
@@ -147,7 +126,6 @@ define void @select_lift(i64 %a, i64 %b) {
 define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -177,7 +155,6 @@ top:
     br i1 %cmp, label %alabel, label %blabel
 alabel:
     %u = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
 ; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0
     %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -188,7 +165,6 @@ blabel:
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     br label %common
 common:
-; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
 ; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ]
     %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ]
     call void @one_arg_decayed(i64 addrspace(12)* %phi)
@@ -198,7 +174,6 @@ common:
 define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -217,12 +192,10 @@ succ:
 ; safepoint
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     ret {} addrspace(10)* %aboxed
@@ -230,16 +203,11 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
 ; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
-; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0
-; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0
@@ -273,12 +241,10 @@ top:
 
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
-; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)**
 ; OPAQUE: store ptr addrspace(10) %loaded, ptr
     call void @one_arg_boxed({} addrspace(10)* %loaded)
     ret void
@@ -286,13 +252,11 @@ define void @global_ref() {
 
 define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
 ; CHECK-NEXT: call void @jl_safepoint()
     call void @jl_safepoint()
@@ -313,13 +277,11 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1)
 
 define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)*
@@ -332,23 +294,19 @@ declare void @llvm.julia.gc_preserve_end(token)
 
 define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed)
     %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed2
 ; OPAQUE: store ptr addrspace(10) %aboxed2
     call void @jl_safepoint()
     call void @llvm.julia.gc_preserve_end(token %tok)
     %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: store {} addrspace(10)* %aboxed3
 ; OPAQUE: store ptr addrspace(10) %aboxed3
     call void @jl_safepoint()
     call void @one_arg_boxed({} addrspace(10)* %aboxed2)
@@ -358,24 +316,11 @@ top:
 
 define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) {
 ; CHECK-LABEL: @gc_preserve_vec
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
-; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0
-; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1
-; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0
-; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1
-; TYPED-DAG: store {} addrspace(10)* [[V11]]
-; TYPED-DAG: store {} addrspace(10)* [[V12]]
-; TYPED-DAG: store {} addrspace(10)* [[V21]]
-; TYPED-DAG: store {} addrspace(10)* [[V22]]
 
 ; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
 ; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
@@ -428,7 +373,6 @@ declare {} addrspace(10) *@alloc()
 
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -443,7 +387,6 @@ define {} addrspace(10)* @vec_loadobj() {
 
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
@@ -459,7 +402,6 @@ define {} addrspace(10)* @vec_gep() {
 declare i1 @check_property({} addrspace(10)* %val)
 define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -473,8 +415,6 @@ header:
 a:
 ; This needs a store
 ; CHECK-LABEL: a:
-; TYPED:  [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]]
 
 ; OPAQUE:  [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP1]]
@@ -483,8 +423,6 @@ a:
 
 latch:
 ; This as well in case we went the other path
-; TYPED:  [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]]
 
 ; OPAQUE:  [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
 ; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP2]]
@@ -498,7 +436,6 @@ exit:
 
 define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -524,7 +461,6 @@ join:
 
 define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -541,7 +477,6 @@ top:
 
 define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -559,7 +494,6 @@ top:
 
 define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -574,7 +508,6 @@ top:
 
 define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -601,7 +534,6 @@ common:
 
 define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -641,7 +573,6 @@ common:
 
 define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -659,14 +590,12 @@ top:
 
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -679,14 +608,12 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
@@ -699,7 +626,6 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -707,7 +633,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -720,7 +645,6 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -728,7 +652,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed
 ; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
@@ -741,7 +664,6 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 
 define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
@@ -769,11 +691,8 @@ top:
 
 define i8 @vector_arrayptrs() {
 ; CHECK-LABEL: @vector_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
@@ -795,12 +714,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad
 
 define i8 @masked_arrayptrs() {
 ; CHECK-LABEL: @masked_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -823,12 +738,8 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8
 
 define i8 @gather_arrayptrs() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -850,12 +761,8 @@ top:
 
 define i8 @gather_arrayptrs_alltrue() {
 ; CHECK-LABEL: @gather_arrayptrs
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i8 addrspace(13)*> zeroinitializer)
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x ptr addrspace(13)> zeroinitializer)
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
@@ -877,11 +784,8 @@ top:
 
 define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK-LABEL: @lost_select_decayed
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 
-; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
 
 ; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
 ; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]]
diff --git a/test/llvmpasses/image-codegen.jl b/test/llvmpasses/image-codegen.jl
index 8132dc4faa22a..2e52245b7d3b9 100644
--- a/test/llvmpasses/image-codegen.jl
+++ b/test/llvmpasses/image-codegen.jl
@@ -13,7 +13,8 @@
 # CHECK-NOT: internal global
 # CHECK-NOT: private global
 # CHECK: jl_global
-# CHECK-SAME: = global
+# COM: we emit both declarations and definitions, so we may see either style in the IR
+# CHECK-SAME: = {{(external )?}}global
 # CHECK: julia_f_
 # CHECK-NOT: internal global
 # CHECK-NOT: private global
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
index 464a96f1413d9..76ce19af96e94 100644
--- a/test/llvmpasses/julia-licm-fail.ll
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that should not trigger allocations to be hoisted out of loops
 
@@ -25,10 +21,8 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
 ; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc)
   %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
   br i1 %ret, label %return, label %loop
@@ -51,13 +45,10 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
 ; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast)
   %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
   br i1 %ret, label %return, label %loop
@@ -82,7 +73,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll
index e1684c7577578..4f25a99f7e615 100644
--- a/test/llvmpasses/julia-licm-memoryssa.ll
+++ b/test/llvmpasses/julia-licm-memoryssa.ll
@@ -1,8 +1,6 @@
 ; COM: NewPM-only test, tests that memoryssa is preserved correctly
 
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -116,8 +114,6 @@ top:
 preheader:
 ; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]])
 
-; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag)
-; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
 
 ; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag)
 
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
index 941b2d072a1cc..37a547c9861b7 100644
--- a/test/llvmpasses/julia-licm-missed.ll
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 ; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops
 ; COM: i.e. they are missed optimizations
@@ -29,16 +25,12 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
 ; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
   %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
-; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
 ; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11)
   %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
-; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
 ; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8
   store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
   br i1 %ret, label %return, label %loop
@@ -63,13 +55,11 @@ preheader:
   br label %loop
 ; CHECK: loop:
 loop:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
   %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
   br label %other
 ; CHECK: other:
 other:
-; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ]
 ; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ]
   %phi = phi {} addrspace(10)* [ %alloc, %loop ]
   br i1 %ret, label %return, label %loop
@@ -96,7 +86,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 8bedc5db75d96..732b62788f13c 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -100,9 +96,6 @@ L3:                                               ; preds = %L3.loopexit, %top
 L4:                                               ; preds = %top
   %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
   %current_task1 = bitcast {}*** %current_task112 to {}**
-  ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
-  ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)*
-  ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false)
 
   ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag)
   ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false)
@@ -112,8 +105,6 @@ L4:                                               ; preds = %top
 
 L22:                                              ; preds = %L4, %L22
   %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ]
-  ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
 
   ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
   ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10)
@@ -135,9 +126,6 @@ top:
   br label %preheader
 ; CHECK: preheader:
 preheader:
-; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
-; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false)
 
 ; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
 ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false)
@@ -164,7 +152,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll
new file mode 100644
index 0000000000000..b99fb4f57db20
--- /dev/null
+++ b/test/llvmpasses/julia-simdloop-memoryssa.ll
@@ -0,0 +1,53 @@
+; COM: NewPM-only test, tests that memoryssa is preserved correctly
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+
+; CHECK-LABEL: MemorySSA for function: simd_test
+; CHECK-LABEL: @simd_test(
+define void @simd_test(double *%a, double *%b) {
+; CHECK: top:
+top:
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]})
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+  %bptr = getelementptr double, double *%b, i64 %i
+; CHECK: MemoryUse([[MPHI]])
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, double *%aptr
+; CHECK: MemoryUse([[MPHI]])
+  %bval = load double, double *%aptr
+  %cval = fadd double %aval, %bval
+; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]])
+  store double %cval, double *%bptr
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret void
+}
+
+; CHECK-LABEL: MemorySSA for function: simd_test_sub2
+; CHECK-LABEL: @simd_test_sub2(
+define double @simd_test_sub2(double *%a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+; CHECK: MemoryUse(liveOnEntry) 
+  %aval = load double, double *%aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
\ No newline at end of file
diff --git a/test/llvmpasses/julia-simdloop.ll b/test/llvmpasses/julia-simdloop.ll
new file mode 100644
index 0000000000000..9a23a2826da70
--- /dev/null
+++ b/test/llvmpasses/julia-simdloop.ll
@@ -0,0 +1,133 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
+
+; CHECK-LABEL: @simd_test(
+define void @simd_test(ptr %a, ptr %b) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+  %bptr = getelementptr double, ptr %b, i64 %i
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, ptr %aptr
+  %bval = load double, ptr %aptr
+  %cval = fadd double %aval, %bval
+  store double %cval, ptr %bptr
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret void
+}
+
+; CHECK-LABEL: @simd_test_sub(
+define double @simd_test_sub(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, ptr %aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret double %nextv
+}
+
+; CHECK-LABEL: @simd_test_sub2(
+define double @simd_test_sub2(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, ptr %a, i64 %i
+  %aval = load double, ptr %aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+; CHECK-LABEL: @simd_test_sub4(
+define double @simd_test_sub4(ptr %a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+  %aval = load double, double *%aptr
+  %nextv2 = fmul double %aval, %aval
+  ; CHECK: fmul contract double %aval, %aval
+  %nextv = fsub double %v, %nextv2
+; CHECK: fsub reassoc contract double %v, %nextv2
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+; Tests if we correctly pass through other metadata
+; CHECK-LABEL: @disabled(
+define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %N
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 48
+; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end:                                          ; preds = %for.body
+  %1 = load i32, i32* %a, align 4
+  ret i32 %1
+}
+
+; Check that we don't add contract to non loop things
+; CHECK-LABEL: @dont_add_no_loop(
+define double @dont_add_no_loop(ptr nocapture noundef nonnull readonly align 8 dereferenceable(72) %"a::Tuple", ptr nocapture noundef nonnull readonly align 8 dereferenceable(24) %"b::Tuple") #0 {
+top:
+   %"a::Tuple[9]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 64
+   %"b::Tuple[3]_ptr" = getelementptr inbounds i8, ptr %"b::Tuple", i64 16
+   %"a::Tuple[6]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 40
+   %"b::Tuple[2]_ptr" = getelementptr inbounds i8, ptr %"b::Tuple", i64 8
+   %"a::Tuple[3]_ptr" = getelementptr inbounds i8, ptr %"a::Tuple", i64 16
+   %"a::Tuple[3]_ptr.unbox" = load double, ptr %"a::Tuple[3]_ptr", align 8
+   %"b::Tuple.unbox" = load double, ptr %"b::Tuple", align 8
+   %0 = fmul double %"a::Tuple[3]_ptr.unbox", %"b::Tuple.unbox"
+; CHECK: fmul double %
+   %"a::Tuple[6]_ptr.unbox" = load double, ptr %"a::Tuple[6]_ptr", align 8
+   %"b::Tuple[2]_ptr.unbox" = load double, ptr %"b::Tuple[2]_ptr", align 8
+   %1 = fmul contract double %"a::Tuple[6]_ptr.unbox", %"b::Tuple[2]_ptr.unbox"
+   %2 = fadd contract double %0, %1
+   %"a::Tuple[9]_ptr.unbox" = load double, ptr %"a::Tuple[9]_ptr", align 8
+   %"b::Tuple[3]_ptr.unbox" = load double, ptr %"b::Tuple[3]_ptr", align 8
+   %3 = fmul contract double %"a::Tuple[9]_ptr.unbox", %"b::Tuple[3]_ptr.unbox"
+   %4 = fadd contract double %2, %3
+   ret double %4
+}
+
+
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
+!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3}
+!3 = !{!"llvm.loop.vectorize.disable", i1 0}
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
index 9849f432fb9a7..9c041664a9682 100644
--- a/test/llvmpasses/late-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -22,39 +18,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -64,25 +50,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -97,34 +73,21 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 36e581993c176..d294847db8f9d 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -19,39 +15,29 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -61,25 +47,15 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: ret {} addrspace(10)* %v
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -94,34 +70,21 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
-; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
-
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8)
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -193,26 +156,19 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
   %l0 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e0
   %e1 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 1
   %l1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e1
-  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) 
+  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
   ret void
 }
 
 ; CHECK-LABEL: @decayar
-; TYPED:  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; TYPED:  %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
-; TYPED:  store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8
-; TYPED:  %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
-; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8
-; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
-; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
-
-; OPAQUE:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
-; OPAQUE:  store ptr addrspace(10) %l0, ptr %1, align 8
-; OPAQUE:  %2 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
-; OPAQUE: store ptr addrspace(10) %l1, ptr %2, align 8
-; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
-; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe)
+
+; CHECK:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+; CHECK:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; CHECK: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
+; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
 
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl
index 3e0df7a8885a7..294c657196142 100644
--- a/test/llvmpasses/llvmcall.jl
+++ b/test/llvmpasses/llvmcall.jl
@@ -1,11 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no %s %t
-# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+# RUN: export JULIA_LLVM_ARGS=""
 
 # RUN: julia --startup-file=no %s %t
 # RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE
@@ -17,7 +12,7 @@ struct Foo
     y::Int32
 end
 
-@generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x))
+@generated foo(x) = :(ccall("extern foo", llvmcall, $x, ($x,), x))
 bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2)
 
 # CHECK: define
@@ -48,7 +43,7 @@ emit(foo, Float16)
 # CHECK: ret [2 x half]
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, Float16})
+emit(foo, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -67,7 +62,7 @@ emit(foo, NTuple{2, Float16})
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, NTuple{2, VecElement{Float16}})
+emit(foo, NTuple{2,VecElement{Float16}})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -89,7 +84,7 @@ emit(foo, NTuple{2, VecElement{Float16}})
 # OPAQUE: ret ptr addrspace(3)
 # CHECK-NOT: define
 # CHECK: }
-emit(foo, Core.LLVMPtr{Float32, 3})
+emit(foo, Core.LLVMPtr{Float32,3})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
@@ -127,7 +122,7 @@ emit(foo, Foo)
 # CHECK: ret <2 x half>
 # CHECK-NOT: define
 # CHECK: }
-emit(bar, NTuple{2, Float16})
+emit(bar, NTuple{2,Float16})
 
 # COM: Make sure that we don't miss a function by accident (helps localize errors)
 # CHECK-NOT: {
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index b9b388c73d0c5..759ff09499deb 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -2,8 +2,7 @@
 
 # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
-# RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER
-# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL
 
@@ -27,12 +26,11 @@ function simdf(X)
     acc = zero(eltype(X))
     @simd for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]]
-# LOWER-NOT: llvm.mem.parallel_loop_access
-# LOWER: fadd reassoc contract double
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # LOWER-NOT: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
+        # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
     end
     acc
 end
@@ -43,11 +41,10 @@ function simdf2(X)
     acc = zero(eltype(X))
     @simd ivdep for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]]
-# LOWER: llvm.mem.parallel_loop_access
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: fadd reassoc contract double
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
+        # LOWER: llvm.mem.parallel_loop_access
+        # LOWER: fadd reassoc contract double
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
     end
     acc
 end
@@ -61,14 +58,13 @@ end
     for i in 1:N
         iteration(i)
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
-# FINAL: br
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: br
     end
 end
 
@@ -83,24 +79,23 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+        # CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL: call {{(swiftcc )?}}void @j_iteration
+        # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
     end
 end
 
@@ -109,21 +104,21 @@ end
     for i in 1:10
         for j in J
             1 <= j <= I && continue
-            @show (i,j)
+            @show (i, j)
             iteration(i)
-# FINAL: call {{(swiftcc )?}}void @j_iteration
-# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
+            # FINAL: call {{(swiftcc )?}}void @j_iteration
+            # FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
     end
 end
 
 ## Check all the MD nodes
-# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"}
-# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"}
-# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]}
+# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"}
+# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"}
+# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]}
 # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3}
-# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]}
+# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]}
 # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"}
 # LOWER: [[LOOPID]] = distinct !{[[LOOPID]]}
 # LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]}
diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll
index 744bf09082646..ce3bdc6401b91 100644
--- a/test/llvmpasses/lower-handlers-addrspaces.ll
+++ b/test/llvmpasses/lower-handlers-addrspaces.ll
@@ -1,17 +1,13 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
 
 attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
+declare {i32, i8*} @julia.except_enter({}*) #1
+declare void @ijl_pop_handler({}*, i32)
 declare i8**** @julia.ptls_states()
 declare i8**** @julia.get_pgcstack()
 
@@ -21,7 +17,8 @@ top:
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
+    %rb = call {i32, i8*} @julia.except_enter({}* null)
+    %r = extractvalue {i32, i8*} %rb, 0
     %cmp = icmp eq i32 %r, 0
     br i1 %cmp, label %try, label %catch
 try:
@@ -29,7 +26,7 @@ try:
 catch:
     br label %after
 after:
-    call void @ijl_pop_handler(i32 1)
+    call void @ijl_pop_handler({}* null, i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index 2f5dea6cf0892..7f0648a1a8bf5 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -1,14 +1,10 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 attributes #1 = { returns_twice }
-declare i32 @julia.except_enter() #1
-declare void @ijl_pop_handler(i32)
+declare {i32, i8*} @julia.except_enter({}*) #1
+declare void @ijl_pop_handler({}*, i32)
 declare i8**** @julia.ptls_states()
 declare i8**** @julia.get_pgcstack()
 
@@ -18,15 +14,18 @@ top:
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
-    %r = call i32 @julia.except_enter()
+    %rb = call {i32, i8*} @julia.except_enter({}* null)
+    %r = extractvalue {i32, i8*} %rb, 0
+    %b = extractvalue {i32, i8*} %rb, 1
     %cmp = icmp eq i32 %r, 0
     br i1 %cmp, label %try, label %catch
 try:
+    %lcssa = phi {i32, i8*} [ %rb, %top ]
     br label %after
 catch:
     br label %after
 after:
-    call void @ijl_pop_handler(i32 1)
+    call void @ijl_pop_handler({}* null, i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
deleted file mode 100644
index afeb068317844..0000000000000
--- a/test/llvmpasses/muladd.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
-
-
-; CHECK-LABEL: @fast_muladd1
-define double @fast_muladd1(double %a, double %b, double %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul double %a, %b
-  %v2 = fadd fast double %v1, %c
-; CHECK: ret double
-  ret double %v2
-}
-
-; CHECK-LABEL: @fast_mulsub1
-define double @fast_mulsub1(double %a, double %b, double %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul double %a, %b
-  %v2 = fsub fast double %v1, %c
-; CHECK: ret double
-  ret double %v2
-}
-
-; CHECK-LABEL: @fast_mulsub_vec1
-define <2 x double> @fast_mulsub_vec1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-top:
-; CHECK: {{contract|fmuladd}}
-  %v1 = fmul <2 x double> %a, %b
-  %v2 = fsub fast <2 x double> %c, %v1
-; CHECK: ret <2 x double>
-  ret <2 x double> %v2
-}
-
-; COM: Should not mark fmul as contract when multiple uses of fmul exist
-; CHECK-LABEL: @slow_muladd1
-define double @slow_muladd1(double %a, double %b, double %c) {
-top:
-; CHECK: %v1 = fmul double %a, %b
-  %v1 = fmul double %a, %b
-; CHECK: %v2 = fadd fast double %v1, %c
-  %v2 = fadd fast double %v1, %c
-; CHECK: %v3 = fadd fast double %v1, %b
-  %v3 = fadd fast double %v1, %b
-; CHECK: %v4 = fadd fast double %v3, %v2
-  %v4 = fadd fast double %v3, %v2
-; CHECK: ret double %v4
-  ret double %v4
-}
-
-; COM: Should not mark fadd->fadd fast as contract
-; CHECK-LABEL: @slow_addadd1
-define double @slow_addadd1(double %a, double %b, double %c) {
-top:
-; CHECK: %v1 = fadd double %a, %b
-  %v1 = fadd double %a, %b
-; CHECK: %v2 = fadd fast double %v1, %c
-  %v2 = fadd fast double %v1, %c
-; CHECK: ret double %v2
-  ret double %v2
-}
diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll
index 0109010f4c1a1..849cf57c78aa3 100644
--- a/test/llvmpasses/multiversioning-annotate-only.ll
+++ b/test/llvmpasses/multiversioning-annotate-only.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
 
 ; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning
 ; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars
diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll
index e37eefdc362f7..00f0db0aa1e91 100644
--- a/test/llvmpasses/multiversioning-clone-only.ll
+++ b/test/llvmpasses/multiversioning-clone-only.ll
@@ -1,38 +1,27 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
 
+; CHECK: @jl_gvar_base = hidden constant i64 0
+; CHECK: @jl_gvar_offsets = hidden constant [0 x i32] zeroinitializer
 ; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
 ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
-; TYPED: @subtarget_cloned_gv = hidden global i64* null
 ; OPAQUE: @subtarget_cloned_gv = hidden global ptr null
-; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null
 ; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null
-; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0]
-; CHECK: @jl_gvar_base = hidden constant i64 0
-; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
+; CHECK: @jl_fvar_count = hidden constant i64 1
+; OPAQUE: @jl_fvar_ptrs = hidden global [1 x ptr] [ptr @subtarget_cloned]
 ; CHECK: @jl_clone_slots = hidden constant [5 x i32]
-; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_gvar_base
+; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_clone_slots
 ; CHECK: @jl_clone_idxs = hidden constant [13 x i32]
 ; COM: TODO actually check the clone idxs maybe?
-; CHECK: @jl_clone_offsets = hidden constant [4 x i32]
-; CHECK-SAME: sub
-; CHECK-SAME: @subtarget_cloned.1
-; CHECK-SAME: @subtarget_cloned
-; CHECK-SAME: sub
-; CHECK-SAME: @subtarget_cloned.2
-; CHECK-SAME: @subtarget_cloned
-; CHECK-SAME: sub
-
-@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 16
-@jl_gvars = global [0 x i64*] zeroinitializer, align 16
-@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 16
-@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
-@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 16
+; OPAQUE: @jl_clone_ptrs = hidden constant [4 x ptr] [ptr @subtarget_cloned.1, ptr @subtarget_cloned.2, ptr @subtarget_cloned, ptr @subtarget_cloned]
+
+@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 8
+@jl_gvar_base = hidden constant i64 zeroinitializer, align 8
+@jl_gvar_offsets = hidden constant [0 x i32] zeroinitializer, align 8
+@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 8
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 8
 
 @subtarget_cloned_aliased = alias i32 (i32), i32 (i32)* @subtarget_cloned
 
diff --git a/test/llvmpasses/multiversioning-x86.ll b/test/llvmpasses/multiversioning-x86.ll
new file mode 100644
index 0000000000000..ff4a8abba5252
--- /dev/null
+++ b/test/llvmpasses/multiversioning-x86.ll
@@ -0,0 +1,123 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+
+
+; COM: This test checks that multiversioning actually happens from start to finish
+; COM: We need the fvars for a proper test
+
+
+
+; OPAQUE: @jl_gvar_ptrs = global [0 x ptr] zeroinitializer, align 8
+; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
+; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+; OPAQUE: @simd_test.reloc_slot = hidden global ptr null
+; OPAQUE: @jl_fvar_ptrs = hidden global [5 x ptr] [ptr @boring, ptr @fastmath_test, ptr @loop_test, ptr @simd_test, ptr @simd_test_call]
+; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_clone_slots to i64)) to i32)]
+; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]
+; OPAQUE: @jl_clone_ptrs = hidden constant [9 x ptr] [ptr @boring.1, ptr @fastmath_test.1, ptr @loop_test.1, ptr @simd_test.1, ptr @simd_test_call.1, ptr @fastmath_test.2, ptr @loop_test.2, ptr @simd_test.2, ptr @simd_test_call.2]
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
+
+@jl_fvars = global [5 x i64*] [i64* bitcast (i32 (i32)* @boring to i64*),
+                               i64* bitcast (float (float, float)* @fastmath_test to i64*),
+                               i64* bitcast (i32 (i32)* @loop_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test_call to i64*)
+                              ], align 8
+@jl_gvar_ptrs = global [0 x i64*] zeroinitializer, align 8
+@jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 8
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 8
+
+declare i1 @julia.cpu.have_fma.f32()
+
+; CHECK: @boring{{.*}}#[[BORING_BASE:[0-9]+]]
+define noundef i32 @boring(i32 noundef %0) {
+  ret i32 %0
+}
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+define noundef float @fastmath_test(float noundef %0, float noundef %1) {
+  %3 = call i1 @julia.cpu.have_fma.f32()
+  %4 = sitofp i1 %3 to float
+  %5 = fadd fast float %0, %4
+  ret float %5
+}
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @loop_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %5, %1
+  %4 = phi i32 [ 0, %1 ], [ %9, %5 ]
+  ret i32 %4
+
+5:                                                ; preds = %1, %5
+  %6 = phi i32 [ %10, %5 ], [ 0, %1 ]
+  %7 = phi i32 [ %9, %5 ], [ 0, %1 ]
+  %8 = lshr i32 %6, 1
+  %9 = add nuw nsw i32 %8, %7
+  %10 = add nuw nsw i32 %6, 1
+  %11 = icmp eq i32 %10, %0
+  br i1 %11, label %3, label %5;, !llvm.loop -
+}
+
+; CHECK: @simd_test{{.*}}#[[SIMD_BASE_RELOC:[0-9]+]]
+define noundef i32 @simd_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  ret i32 %2
+}
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @simd_test_call(<4 x i32> noundef %0) {
+  %2 = call noundef i32 @simd_test(<4 x i32> noundef %0)
+  ret i32 %2
+}
+
+; CHECK: @boring{{.*}}#[[BORING_CLONE:[0-9]+]]
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %3 = sitofp i1 true to float
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE1:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; OPAQUE: %2 = load ptr, ptr @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
+; CHECK: %3 = call noundef i32 %2(<4 x i32> noundef %0)
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %2 = call noundef i32 @simd_test.2(<4 x i32> noundef %0)
+
+; CHECK-DAG: attributes #[[BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_BASE_RELOC]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[BORING_CLONE]] = { "julia.mv.clone"="1" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE2]] =  { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE2]] = { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+
+
+!llvm.module.flags = !{!0, !2}
+
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{!1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6}
+!4 = !{!"x86-64", !"+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 0}
+!5 = !{!"sandybridge", !"+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 2}
+!6 = !{!"haswell", !"+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512cd,-sha,-avx512bw,-avx512vl,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 1, i32 284}
diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl
new file mode 100644
index 0000000000000..1ab2204044804
--- /dev/null
+++ b/test/llvmpasses/names.jl
@@ -0,0 +1,183 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
+# RUN: cat %t/module.ll | FileCheck %s
+
+## Notes:
+# This script uses the `emit` function (defined llvmpasses.jl) to emit either
+# optimized or unoptimized LLVM IR. Each function is emitted individually and
+# `llvm-link` is used to create a single module that can be passed to opt.
+# The order in which files are emitted and linked is important since `lit` will
+# process the test cases in order.
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: check basic parameter names
+function f1(a, b, c, d)
+    return a + b + c + d
+end
+
+# COM: check basic parameter names + varargs
+function f2(a, b, c, d, e...)
+    return a + b + c + d + sum(e)
+end
+
+mutable struct D
+    i::Int64
+end
+struct C
+    d::D
+end
+struct B
+    c::C
+end
+struct A
+    b::B
+end
+
+# COM: check getfield/setfield names
+function f5(a)
+    a.b.c.d.i = 0
+    return a.b.c.d
+end
+
+struct H end
+struct G
+    h::Ref{H}
+end
+struct F
+    g::Ref{G}
+end
+struct E
+    f::Ref{F}
+end
+
+# COM: check gc lowering names
+function f6(e)
+    return e.f[].g[].h[]
+end
+
+# COM: check getfield for Tuples
+function f7(a)
+    return a[2]
+end
+
+# COM: check write barrier names and struct names
+mutable struct Barrier
+    b
+end
+
+# COM: check write barrier names
+function f8(b,y)
+    b.b = y
+    return b
+end
+
+struct Named
+    x::Int
+end
+
+function fmemory(nel)
+    return Memory{Int64}(undef,nel)
+end
+# CHECK-LABEL: define {{(swiftcc )?}}double @julia_f1
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+
+# CHECK: fadd double
+# CHECK-DAG: %"a::Float64"
+# CHECK-DAG: %"b::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"c::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"d::Float64"
+# CHECK: ret double
+# CHECK: }
+
+# CHECK-LABEL: define nonnull ptr @jfptr_f1
+# CHECK-SAME: %"function::Core.Function"
+# CHECK-SAME: %"args::Any[]"
+# CHECK-SAME: %"nargs::UInt32"
+# CHECK: %"+Core.Float64
+# CHECK: ret ptr
+# CHECK: }
+emit(f1, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64)
+
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+# CHECK-SAME: double %"e[3]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f5
+# CHECK-SAME: %"a::A"
+# CHECK: %"a::A.d
+# COM: this text check relies on our LLVM code emission being relatively poor, which is not always the case
+emit(f5, A)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f6
+# CHECK-SAME: %"e::E"
+# CHECK: %jlcallframe
+# CHECK: %gcframe
+# CHECK: %frame.prev
+# CHECK: %task.gcstack
+# CHECK: %ptls_field
+# CHECK: %ptls_load
+# CHECK: %safepoint
+# CHECK: %"e::E.f"
+# CHECK: %"e::E.f.tag_addr"
+# CHECK: %"e::E.f.tag"
+# CHECK: @"+Main.Base.RefValue
+# CHECK: %gc_slot_addr_0
+# CHECK: @"jl_sym#g
+# CHECK: @"jl_sym#h
+emit(f6, E)
+
+
+# CHECK: define {{(swiftcc )?}}i64 @julia_f7
+# CHECK-SAME: %"a::Tuple"
+# CHECK: %"a::Tuple[2]_ptr.unbox
+emit(f7, Tuple{Int,Int})
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f8
+# CHECK-SAME: %"y::Int64"
+# CHECK: %parent_bits
+# CHECK: %parent_old_marked
+# CHECK: %child_bit
+# CHECK: %child_not_marked
+emit(f8, Barrier, Int)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_Barrier
+# CHECK-SAME: %"b::Named"
+# CHECK: %"new::Barrier"
+# CHECK: %"box::Named"
+emit(Barrier, Named)
+
+# CHECK: define {{(swiftcc )?}}nonnull ptr @julia_fmemory
+# CHECK-SAME: %"nel::Int64"
+# CHECK: %"Memory{Int64}[]"
+emit(fmemory, Int64)
diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll
index 434ffbb26c95f..b8aec5ee2fa71 100644
--- a/test/llvmpasses/parsing.ll
+++ b/test/llvmpasses/parsing.ll
@@ -1,6 +1,9 @@
 ; COM: NewPM-only test, tests for ability to parse Julia passes
 
-; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_enable_vector_pipeline>" -S %s -o /dev/null
 
 define void @test() {
     ret void
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
index e48a5f7df111f..5dab675f2b547 100644
--- a/test/llvmpasses/pipeline-o0.jl
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -1,14 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
@@ -19,7 +10,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 # CHECK-LABEL: @julia_simple
 # CHECK-NOT: julia.get_pgcstack
 # CHECK-NOT: julia.gc_alloc_obj
-# CHECK: ijl_gc_pool_alloc
+# CHECK: ijl_gc_small_alloc
 # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
 function simple()
     Ref(0)
diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl
index 86ab9125f2f27..999e63e2725c4 100644
--- a/test/llvmpasses/pipeline-o2-allocs.jl
+++ b/test/llvmpasses/pipeline-o2-allocs.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 
@@ -60,8 +53,7 @@ end
 # CHECK-NOT: julia.gc_preserve_end
 function nopreserve()
     ref = Ref(0)
-    GC.@preserve ref begin
-    end
+    GC.@preserve ref begin end
 end
 
 # COM: this cordons off the attributes/function declarations from the actual
diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl
index 83a4450522c79..584e8855f0f8c 100644
--- a/test/llvmpasses/pipeline-o2-broadcast.jl
+++ b/test/llvmpasses/pipeline-o2-broadcast.jl
@@ -1,12 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
 # RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
 
diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl
index 9fd42562f96aa..ceb2fe3bf65d6 100644
--- a/test/llvmpasses/pipeline-o2.jl
+++ b/test/llvmpasses/pipeline-o2.jl
@@ -1,20 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-
-# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
-
-# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
-
-# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
-
-# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
-# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 
 # RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
 # RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll
index 0c0d81420d9fe..9c27885c5ca45 100644
--- a/test/llvmpasses/pipeline-prints.ll
+++ b/test/llvmpasses/pipeline-prints.ll
@@ -1,46 +1,25 @@
 ; COM: This is a newpm-only test, no legacypm command
 ; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
-
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -force-vector-width=2 -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
 
 ; ModuleID = 'f'
 source_filename = "f"
@@ -319,12 +298,12 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: Loop simplification makes the exit condition obvious
 ; AFTERLOOPSIMPLIFICATION: L35.lr.ph:
-; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw
+; AFTERLOOPSIMPLIFICATION: add nuw nsw
 
 ; COM: Scalar optimization removes the previous add from the preheader
-; AFTERSCALAROPTIMIZATION: L35.preheader:
+; AFTERSCALAROPTIMIZATION: L35.lr.ph:
 ; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw
-; AFTERSCALAROPTIMIZATION-NEXT: br label %L35
+; AFTERSCALAROPTIMIZATION: br label %L35
 
 ; COM: Vectorization does stuff
 ; AFTERVECTORIZATION: vector.body
@@ -332,4 +311,4 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: Intrinsics are lowered and cleaned up by the time optimization is finished
 ; AFTEROPTIMIZATION-NOT: call void @julia.safepoint
-; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint
\ No newline at end of file
+; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint
diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll
index ac491000ba1e5..996b995a58556 100644
--- a/test/llvmpasses/propagate-addrspace-non-zero.ll
+++ b/test/llvmpasses/propagate-addrspace-non-zero.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index ffed83ddb615a..033fbd6f0386e 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 define i64 @simple() {
 ; CHECK-LABEL: @simple
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index 4637fc4b45071..da32758c1dc5b 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare {}*** @julia.ptls_states()
@@ -30,14 +26,12 @@ define void @argument_refinement({} addrspace(10)* %a) {
 ; Check that we reuse the gc slot from the box
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %aboxed
 ; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -49,14 +43,12 @@ define void @heap_refinement1(i64 %a) {
 ; Check that we don't root the allocated value here, just the derived value
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
-; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %loaded1
 ; OPAQUE: store ptr addrspace(10) %loaded1
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
@@ -67,20 +59,14 @@ define void @heap_refinement2(i64 %a) {
 ; Check that the way we compute rooting is compatible with refinements
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
     %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; TYPED: store {} addrspace(10)* %y,
 ; OPAQUE: store ptr addrspace(10) %y,
     %a = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %a
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %x)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %a)
-; TYPED: call void @one_arg_boxed({} addrspace(10)* %y)
 
 ; OPAQUE: store ptr addrspace(10) %a
 ; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x)
@@ -89,10 +75,8 @@ define void @issue22770() {
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %a)
     call void @one_arg_boxed({} addrspace(10)* %y)
-; TYPED: store {} addrspace(10)* %x
 ; OPAQUE: store ptr addrspace(10) %x
     %c = call {} addrspace(10)* @allocate_some_value()
-; TYPED: store {} addrspace(10)* %c
 ; OPAQUE: store ptr addrspace(10) %c
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %c)
@@ -123,7 +107,6 @@ L3:
 
 define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -167,7 +150,6 @@ L2:
 
 define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -193,7 +175,6 @@ L2:
 
 define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
 ; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index b2d14ae49c8e7..fbd84de85a4a3 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,11 +1,12 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
+; COM: check that package image fptrs work
+@pjlsys_BoundsError_32 = internal global {} addrspace(10)* ({}***, {} addrspace(10)*, [1 x i64] addrspace(11)*)* null
+; CHECK: @pjlsys_BoundsError_32 = internal global
+; OPAQUE-SAME: ptr null
 
 define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) {
 ; CHECK-LABEL: @getindex
@@ -37,7 +38,6 @@ top:
 define nonnull {} addrspace(10)* @constexpr(i64) {
 ; CHECK-LABEL: @constexpr
 top:
-; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1)
 ; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1)
   %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1)
 ; CHECK-NOT: addrspacecast
@@ -67,22 +67,16 @@ top:
   %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1
 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix.
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %a
 ; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %b
 ; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
 ; CHECK: getelementptr %list
-; TYPED-SAME: %list* %c
 ; OPAQUE-SAME: ptr %c
   store i64 111, i64* %a.car
   store i64 222, i64* %b.car
@@ -112,7 +106,6 @@ exit:
 
 ; COM: check that address spaces in byval types are processed correctly
 define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
-; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
 ; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0)
   ret void
 }
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index 699d89f7257d0..eb1c6444129c3 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,10 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=TYPED
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=TYPED
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=OPAQUE
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -17,8 +13,12 @@ declare void @one_arg_boxed({} addrspace(10)*)
 define void @try_catch(i64 %a, i64 %b)
 {
 ; Because of the returns_twice function, we need to keep aboxed live everywhere
-; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
-; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
+; CHECK: %gcframe = alloca ptr addrspace(10), i32 4
+; CHECK:  store ptr addrspace(10) %aboxed, ptr [[slot_0:%.*]],
+; CHECK-NOT:  store {{.*}} ptr [[slot_0]]
+; CHECK:  store ptr addrspace(10) %bboxed, ptr {{%.*}}
+; CHECK-NOT:  store {{.*}} ptr [[slot_0]]
+
 top:
     %sigframe = alloca [208 x i8], align 16
     %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index dc6752e76d595..173058df12fb1 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S - | FileCheck %s
 
 
 println("""
@@ -15,7 +14,7 @@ define void @stress(i64 %a, i64 %b) {
     %ptls = call {}*** @julia.ptls_states()
 """)
 
-# CHECK: %gcframe = alloca {} addrspace(10)*, i32 10002
+# CHECK: %gcframe = alloca ptr addrspace(10), i32 10002
 for i = 1:10000
     println("\t%arg$i = call {} addrspace(10)* @alloc()")
 end
diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll
deleted file mode 100644
index 929fbeea2c3f5..0000000000000
--- a/test/llvmpasses/simdloop.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; This file is a part of Julia. License is MIT: https://julialang.org/license
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
-
-; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
-
-declare void @julia.loopinfo_marker()
-
-; CHECK-LABEL: @simd_test(
-define void @simd_test(double *%a, double *%b) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-  %bptr = getelementptr double, double *%b, i64 %i
-; CHECK: llvm.mem.parallel_loop_access
-  %aval = load double, double *%aptr
-  %bval = load double, double *%aptr
-  %cval = fadd double %aval, %bval
-  store double %cval, double *%bptr
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret void
-}
-
-; CHECK-LABEL: @simd_test_sub(
-define double @simd_test_sub(double *%a) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-; CHECK: llvm.mem.parallel_loop_access
-  %aval = load double, double *%aptr
-  %nextv = fsub double %v, %aval
-; CHECK: fsub reassoc contract double %v, %aval
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret double %nextv
-}
-
-; CHECK-LABEL: @simd_test_sub2(
-define double @simd_test_sub2(double *%a) {
-top:
-  br label %loop
-loop:
-  %i = phi i64 [0, %top], [%nexti, %loop]
-  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
-  %aptr = getelementptr double, double *%a, i64 %i
-  %aval = load double, double *%aptr
-  %nextv = fsub double %v, %aval
-; CHECK: fsub reassoc contract double %v, %aval
-  %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !2
-  %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
-loopdone:
-  ret double %nextv
-}
-
-; Tests if we correctly pass through other metadata
-; CHECK-LABEL: @disabled(
-define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %add = add nsw i32 %0, %N
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx2, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !4
-  %exitcond = icmp eq i64 %indvars.iv.next, 48
-; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
-  %1 = load i32, i32* %a, align 4
-  ret i32 %1
-}
-
-!1 = !{}
-!2 = !{!"julia.simdloop"}
-!3 = !{!"julia.simdloop", !"julia.ivdep"}
-!4 = !{!"julia.simdloop", !"julia.ivdep", !5}
-!5 = !{!"llvm.loop.vectorize.disable", i1 0}
-; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]}
-; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false}
diff --git a/test/loading.jl b/test/loading.jl
index 394c13c5f2962..09f96e1f43578 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -1,10 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-
 using Test
 
 # Tests for @__LINE__ inside and outside of macros
+# NOTE: the __LINE__ numbers for these first couple tests are significant, so
+# adding any lines here will make those tests fail
 @test (@__LINE__) == 8
 
 macro macro_caller_lineno()
@@ -33,6 +33,9 @@ end
 @test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__())
 @test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__())
 
+original_depot_path = copy(Base.DEPOT_PATH)
+include("precompile_utils.jl")
+
 loaded_files = String[]
 push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn))
 include("test_sourcepath.jl")
@@ -60,7 +63,7 @@ let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no --co
     @test !endswith(s_dir, Base.Filesystem.path_separator)
 end
 
-@test Base.in_sysimage(Base.PkgId(Base.UUID("cf7118a7-6976-5b1a-9a39-7adc72f591a4"), "UUIDs"))
+@test Base.in_sysimage(Base.PkgId(Base.UUID("8f399da3-3557-5675-b5ff-fb832c97cbdb"), "Libdl"))
 @test Base.in_sysimage(Base.PkgId(Base.UUID("3a7fdc7e-7467-41b4-9f64-ea033d046d5b"), "NotAPackage")) == false
 
 ## Unit tests for safe file operations ##
@@ -167,7 +170,7 @@ end
 
             @test root.uuid == root_uuid
             @test this.uuid == this_uuid
-            @test that == nothing
+            @test that === nothing
 
             write(project_file, """
             name = "Root"
@@ -180,8 +183,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == proj_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
         finally
             copy!(LOAD_PATH, old_load_path)
         end
@@ -213,8 +216,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == root_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
 
             @test Base.get_uuid_name(project_file, this_uuid) == "This"
         finally
@@ -273,8 +276,8 @@ end
         @test joinpath(@__DIR__, normpath(path)) == locate_package(pkg)
         @test Base.compilecache_path(pkg, UInt64(0)) == Base.compilecache_path(pkg, UInt64(0))
     end
-    @test identify_package("Baz") == nothing
-    @test identify_package("Qux") == nothing
+    @test identify_package("Baz") === nothing
+    @test identify_package("Qux") === nothing
     @testset "equivalent package names" begin
          classes = [
             ["Foo"],
@@ -721,16 +724,17 @@ end
 @testset "expansion of JULIA_DEPOT_PATH" begin
     s = Sys.iswindows() ? ';' : ':'
     tmp = "/this/does/not/exist"
-    DEFAULT = Base.append_default_depot_path!(String[])
+    default = joinpath(homedir(), ".julia")
+    bundled = Base.append_bundled_depot_path!(String[])
     cases = Dict{Any,Vector{String}}(
-        nothing => DEFAULT,
+        nothing => [default; bundled],
         "" => [],
-        "$s" => DEFAULT,
-        "$tmp$s" => [tmp; DEFAULT],
-        "$s$tmp" => [DEFAULT; tmp],
+        "$s" => [default; bundled],
+        "$tmp$s" => [tmp; bundled],
+        "$s$tmp" => [default; bundled; tmp],
         )
     for (env, result) in pairs(cases)
-        script = "DEPOT_PATH == $(repr(result)) || error()"
+        script = "DEPOT_PATH == $(repr(result)) || error(\"actual depot \" * join(DEPOT_PATH,':') * \" does not match expected depot \" * join($(repr(result)), ':'))"
         cmd = `$(Base.julia_cmd()) --startup-file=no -e $script`
         cmd = addenv(cmd, "JULIA_DEPOT_PATH" => env)
         cmd = pipeline(cmd; stdout, stderr)
@@ -792,11 +796,24 @@ import .Foo28190.Libdl; import Libdl
     end
 end
 
+@testset "`::AbstractString` constraint on the path argument to `include`" begin
+    for m ∈ (NotPkgModule, evalfile("testhelpers/just_module.jl"))
+        let i = m.include
+            @test !applicable(i, (nothing,))
+            @test !applicable(i, (identity, nothing,))
+            @test !hasmethod(i, Tuple{Nothing})
+            @test !hasmethod(i, Tuple{Function,Nothing})
+        end
+    end
+end
+
 @testset "`Base.project_names` and friends" begin
     # Some functions in Pkg assumes that these tuples have the same length
     n = length(Base.project_names)
-    @test length(Base.manifest_names) == n
     @test length(Base.preferences_names) == n
+
+    # there are two manifest names per project name
+    @test length(Base.manifest_names) == 2n
 end
 
 @testset "Manifest formats" begin
@@ -825,20 +842,31 @@ end
     end
 end
 
-@testset "error message loading pkg bad module name" begin
+@testset "Manifest name preferential loading" begin
     mktempdir() do tmp
-        old_loadpath = copy(LOAD_PATH)
-        try
-            push!(LOAD_PATH, tmp)
-            write(joinpath(tmp, "BadCase.jl"), "module badcase end")
-            @test_logs (:warn, r"The call to compilecache failed.*") match_mode=:any begin
-                @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
-                    check for typos in package module name") (@eval using BadCase)
-            end
-        finally
-            copy!(LOAD_PATH, old_loadpath)
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        for man_name in (
+            "Manifest.toml",
+            "JuliaManifest.toml",
+            "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+            "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml"
+            )
+            touch(joinpath(tmp, man_name))
+            man = basename(Base.project_file_manifest_path(proj))
+            @test man == man_name
         end
     end
+    mktempdir() do tmp
+        # check that another version isn't preferred
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        touch(joinpath(tmp, "Manifest-v1.5.toml"))
+        @test Base.project_file_manifest_path(proj) === nothing
+        touch(joinpath(tmp, "Manifest.toml"))
+        man = basename(Base.project_file_manifest_path(proj))
+        @test man == "Manifest.toml"
+    end
 end
 
 @testset "Preferences loading" begin
@@ -1004,6 +1032,16 @@ end
 end
 
 @testset "Extensions" begin
+    test_ext = """
+    function test_ext(parent::Module, ext::Symbol)
+        _ext = Base.get_extension(parent, ext)
+        _ext isa Module || error("expected extension \$ext to be loaded")
+        _pkgdir = pkgdir(_ext)
+        _pkgdir == pkgdir(parent) != nothing || error("unexpected extension \$ext pkgdir path: \$_pkgdir")
+        _pkgversion = pkgversion(_ext)
+        _pkgversion == pkgversion(parent) || error("unexpected extension \$ext version: \$_pkgversion")
+    end
+    """
     depot_path = mktempdir()
     try
         proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
@@ -1014,6 +1052,7 @@ end
             cmd = """
             $load_distr
             begin
+                $ew $test_ext
                 $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
                 using HasExtensions
                 $ew using HasExtensions
@@ -1021,13 +1060,16 @@ end
                 $ew HasExtensions.ext_loaded && error("ext_loaded set")
                 using HasDepWithExtensions
                 $ew using HasDepWithExtensions
+                $ew test_ext(HasExtensions, :Extension)
                 $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
                 $ew HasExtensions.ext_loaded || error("ext_loaded not set")
                 $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
                 $ew HasDepWithExtensions.do_something() || error("do_something errored")
                 using ExtDep2
                 $ew using ExtDep2
-                $ew HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
+                using ExtDep3
+                $ew using ExtDep3
+                $ew HasExtensions.ext_dep_loaded || error("ext_dep_loaded not set")
             end
             """
             return `$(Base.julia_cmd()) $compile --startup-file=no -e $cmd`
@@ -1070,11 +1112,14 @@ end
 
         test_ext_proj = """
         begin
+            $test_ext
             using HasExtensions
             using ExtDep
-            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :Extension)
             using ExtDep2
-            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :ExtensionFolder)
+            using ExtDep3
+            test_ext(HasExtensions, :ExtensionDep)
         end
         """
         for compile in (`--compiled-modules=no`, ``)
@@ -1083,6 +1128,132 @@ end
             cmd_proj_ext = addenv(cmd_proj_ext, "JULIA_LOAD_PATH" => join([joinpath(proj, "HasExtensions.jl"), joinpath(proj, "EnvWithDeps")], sep))
             run(cmd_proj_ext)
         end
+
+        # Extensions in implicit environments
+        old_load_path = copy(LOAD_PATH)
+        try
+            empty!(LOAD_PATH)
+            push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ImplicitEnv"))
+            pkgid_B = Base.PkgId(Base.uuid5(Base.identify_package("A").uuid, "BExt"), "BExt")
+            @test Base.identify_package(pkgid_B, "B") isa Base.PkgId
+        finally
+            copy!(LOAD_PATH, old_load_path)
+        end
+
+        # Extension with cycles in dependencies
+        code = """
+        using CyclicExtensions
+        Base.get_extension(CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+        Base.get_extension(CyclicExtensions, :ExtB) isa Module || error("expected extension to load")
+        CyclicExtensions.greet()
+        """
+        proj = joinpath(@__DIR__, "project", "Extensions", "CyclicExtensions")
+        cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj)
+        @test occursin("Hello Cycles!", String(read(cmd)))
+
+        # Extension-to-extension dependencies
+
+        mktempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using ExtToExtDependency
+            Base.get_extension(ExtToExtDependency, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(ExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            ExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "ExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello ext-to-ext!", String(read(cmd)))
+        end
+        mktempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using ExtToExtDependency
+            Base.get_extension(ExtToExtDependency, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(ExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            ExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "ExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello ext-to-ext!", String(read(cmd)))
+        end
+
+        mktempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using CrossPackageExtToExtDependency
+            Base.get_extension(CrossPackageExtToExtDependency.CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(CrossPackageExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            CrossPackageExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "CrossPackageExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello x-package ext-to-ext!", String(read(cmd)))
+        end
+        mktempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using CrossPackageExtToExtDependency
+            Base.get_extension(CrossPackageExtToExtDependency.CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+            Base.get_extension(CrossPackageExtToExtDependency, :ExtAB) isa Module || error("expected extension to load")
+            CrossPackageExtToExtDependency.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "CrossPackageExtToExtDependency")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello x-package ext-to-ext!", String(read(cmd)))
+        end
+
+        # Extensions for "parent" dependencies
+        # (i.e. an `ExtAB`  where A depends on / loads B, but B provides the extension)
+
+        mktempdir() do depot # Parallel pre-compilation
+            code = """
+            Base.disable_parallel_precompile = false
+            using Parent
+            Base.get_extension(getfield(Parent, :DepWithParentExt), :ParentExt) isa Module || error("expected extension to load")
+            Parent.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "Parent.jl")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello parent!", String(read(cmd)))
+        end
+        mktempdir() do depot # Serial pre-compilation
+            code = """
+            Base.disable_parallel_precompile = true
+            using Parent
+            Base.get_extension(getfield(Parent, :DepWithParentExt), :ParentExt) isa Module || error("expected extension to load")
+            Parent.greet()
+            """
+            proj = joinpath(@__DIR__, "project", "Extensions", "Parent.jl")
+            cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+            cmd = addenv(cmd,
+                "JULIA_LOAD_PATH" => proj,
+                "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+            )
+            @test occursin("Hello parent!", String(read(cmd)))
+        end
+
     finally
         try
             rm(depot_path, force=true, recursive=true)
@@ -1092,7 +1263,7 @@ end
     end
 end
 
-pkgimage(val) = val == 1 ? `--pkgimage=yes` : `--pkgimage=no`
+pkgimage(val) = val == 1 ? `--pkgimages=yes` : `--pkgimages=no`
 opt_level(val) = `-O$val`
 debug_level(val) = `-g$val`
 inline(val) = val == 1 ? `--inline=yes` : `--inline=no`
@@ -1137,49 +1308,425 @@ end
     @test cf.check_bounds == 3
     @test cf.inline
     @test cf.opt_level == 3
-
-    io = PipeBuffer()
-    show(io, cf)
-    @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3"
+    @test repr(cf) == "CacheFlags(; use_pkgimages=true, debug_level=3, check_bounds=3, inline=true, opt_level=3)"
 end
 
 empty!(Base.DEPOT_PATH)
 append!(Base.DEPOT_PATH, original_depot_path)
 
+module loaded_pkgid1 end
+module loaded_pkgid2 end
+module loaded_pkgid3 end
+module loaded_pkgid4 end
+
 @testset "loading deadlock detector" begin
     pkid1 = Base.PkgId("pkgid1")
     pkid2 = Base.PkgId("pkgid2")
     pkid3 = Base.PkgId("pkgid3")
     pkid4 = Base.PkgId("pkgid4")
+    build_id = UInt128(0)
     e = Base.Event()
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid4)     # module pkgid4
-    @test nothing === @lock Base.require_lock Base.start_loading(pkid1)     # module pkgid1
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid4, build_id, false)     # module pkgid4
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid1, build_id, false)     # module pkgid1
     t1 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid2) # @async module pkgid2; using pkgid1; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid2, build_id, false) # @async module pkgid2; using pkgid1; end
         notify(e)
-        @test "loaded_pkgid1" == @lock Base.require_lock Base.start_loading(pkid1)
-        @lock Base.require_lock Base.end_loading(pkid2, "loaded_pkgid2")
+        @test loaded_pkgid1 == @lock Base.require_lock Base.start_loading(pkid1, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid2, loaded_pkgid2)
     end
     wait(e)
     reset(e)
     t2 = @async begin
-        @test nothing === @lock Base.require_lock Base.start_loading(pkid3) # @async module pkgid3; using pkgid2; end
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid3, build_id, false) # @async module pkgid3; using pkgid2; end
         notify(e)
-        @test "loaded_pkgid2" == @lock Base.require_lock Base.start_loading(pkid2)
-        @lock Base.require_lock Base.end_loading(pkid3, "loaded_pkgid3")
+        @test loaded_pkgid2 == @lock Base.require_lock Base.start_loading(pkid2, build_id, false)
+        @lock Base.require_lock Base.end_loading(pkid3, loaded_pkgid3)
     end
     wait(e)
     reset(e)
     @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 -> pkgid2 -> pkgid1 -> pkgid3 && pkgid4"),
-        @lock Base.require_lock Base.start_loading(pkid3)).value            # try using pkgid3
+        @lock Base.require_lock Base.start_loading(pkid3, build_id, false)).value            # try using pkgid3
     @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 -> pkgid4 && pkgid1"),
-        @lock Base.require_lock Base.start_loading(pkid4)).value            # try using pkgid4
-    @lock Base.require_lock Base.end_loading(pkid1, "loaded_pkgid1")        # end
-    @lock Base.require_lock Base.end_loading(pkid4, "loaded_pkgid4")        # end
+        @lock Base.require_lock Base.start_loading(pkid4, build_id, false)).value            # try using pkgid4
+    @lock Base.require_lock Base.end_loading(pkid1, loaded_pkgid1)        # end
+    @lock Base.require_lock Base.end_loading(pkid4, loaded_pkgid4)        # end
     wait(t2)
     wait(t1)
 end
 
 @testset "Upgradable stdlibs" begin
     @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using DelimitedFiles'`)
+    @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using Statistics'`)
+end
+
+@testset "relocatable upgrades #51989" begin
+    mktempdir() do depot
+        # realpath is needed because Pkg is used for one of the precompile paths below, and Pkg calls realpath on the
+        # project path so the cache file slug will be different if the tempdir is given as a symlink
+        # (which it often is on MacOS) which would break the test.
+        project_path = joinpath(realpath(depot), "project")
+        mkpath(project_path)
+
+        # Create fake `Foo.jl` package with two files:
+        foo_path = joinpath(depot, "dev", "Foo51989")
+        mkpath(joinpath(foo_path, "src"))
+        open(joinpath(foo_path, "src", "Foo51989.jl"); write=true) do io
+            println(io, """
+            module Foo51989
+            include("internal.jl")
+            end
+            """)
+        end
+        open(joinpath(foo_path, "src", "internal.jl"); write=true) do io
+            println(io, "const a = \"asd\"")
+        end
+        open(joinpath(foo_path, "Project.toml"); write=true) do io
+            println(io, """
+            name = "Foo51989"
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+        end
+
+        # In our depot, `dev` and then `precompile` this `Foo` package.
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$project_path --startup-file=no -e 'import Pkg; Pkg.develop("Foo51989"); Pkg.precompile(); exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot))
+
+        # Get the size of the generated `.ji` file so that we can ensure that it gets altered
+        foo_compiled_path = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "Foo51989")
+        cache_path = joinpath(foo_compiled_path, only(filter(endswith(".ji"), readdir(foo_compiled_path))))
+        cache_size = filesize(cache_path)
+
+        # Next, remove the dependence on `internal.jl` and delete it:
+        rm(joinpath(foo_path, "src", "internal.jl"))
+        open(joinpath(foo_path, "src", "Foo51989.jl"); write=true) do io
+            truncate(io, 0)
+            println(io, """
+            module Foo51989
+            end
+            """)
+        end
+
+        # Try to load `Foo`; this should trigger recompilation, not an error!
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$project_path --startup-file=no -e 'using Foo51989; exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot,
+        ))
+
+        # Ensure that there is still only one `.ji` file (it got replaced
+        # and the file size changed).
+        @test length(filter(endswith(".ji"), readdir(foo_compiled_path))) == 1
+        @test filesize(cache_path) != cache_size
+    end
+end
+
+@testset "Fallback for stdlib deps if manifest deps aren't found" begin
+    mktempdir() do depot
+        # This manifest has a LibGit2 entry that is missing LibGit2_jll, which should be
+        # handled by falling back to the stdlib Project.toml for dependency truth.
+        badmanifest_test_dir = joinpath(@__DIR__, "project", "deps", "BadStdlibDeps.jl")
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$badmanifest_test_dir --startup-file=no -e 'using LibGit2'`,
+            "JULIA_DEPOT_PATH" => depot * Base.Filesystem.pathsep(),
+        ))
+    end
+end
+
+@testset "code coverage disabled during precompilation" begin
+    mktempdir() do depot
+        cov_test_dir = joinpath(@__DIR__, "project", "deps", "CovTest.jl")
+        cov_cache_dir = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "CovTest")
+        function rm_cov_files()
+            for cov_file in filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"), join=true))
+                rm(cov_file)
+            end
+            @test !cov_exists()
+        end
+        cov_exists() = !isempty(filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"))))
+
+        rm_cov_files() # clear out any coverage files first
+        @test !cov_exists()
+
+        cd(cov_test_dir) do
+            # In our depot, precompile CovTest.jl with coverage on
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test !isempty(filter(!endswith(".ji"), readdir(cov_cache_dir))) # check that object cache file(s) exists
+            @test !cov_exists()
+            rm_cov_files()
+
+            # same again but call foo(), which is in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; foo(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+
+            # same again but call bar(), which is NOT in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; bar(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+        end
+    end
+end
+
+@testset "command-line flags" begin
+    mktempdir() do depot_path mktempdir() do dir
+        # generate a Parent.jl and Child.jl package, with Parent depending on Child
+        open(joinpath(dir, "Child.jl"), "w") do io
+            println(io, """
+                module Child
+                end""")
+        end
+        open(joinpath(dir, "Parent.jl"), "w") do io
+            println(io, """
+                module Parent
+                using Child
+                end""")
+        end
+
+        # helper function to load a package and return the output
+        function load_package(name, args=``)
+            code = "using $name"
+            cmd = addenv(`$(Base.julia_cmd()) -e $code $args`,
+                        "JULIA_LOAD_PATH" => dir,
+                        "JULIA_DEPOT_PATH" => depot_path,
+                        "JULIA_DEBUG" => "loading")
+
+            out = Base.PipeEndpoint()
+            log = @async read(out, String)
+            try
+                proc = run(pipeline(cmd, stdout=out, stderr=out))
+                @test success(proc)
+            catch
+                @show fetch(log)
+                rethrow()
+            end
+            return fetch(log)
+        end
+
+        log = load_package("Parent", `--compiled-modules=no --pkgimages=no`)
+        @test !occursin(r"Generating (cache|object cache) file", log)
+        @test !occursin(r"Loading (cache|object cache) file", log)
+
+
+        ## tests for `--compiled-modules`, which generates cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=no`)
+        @test occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+
+        # with `--compiled-modules=existing` we should only precompile Child
+        log = load_package("Parent", `--compiled-modules=existing --pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test !occursin(r"Generating cache file for Parent", log)
+        @test !occursin(r"Loading cache file .+ for Parent", log)
+
+        # the default is `--compiled-modules=yes`, which should now precompile Parent
+        log = load_package("Parent", `--pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test occursin(r"Generating cache file for Parent", log)
+        @test occursin(r"Loading cache file .+ for Parent", log)
+
+
+        ## tests for `--pkgimages`, which generates object cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=yes`)
+        @test occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+
+        # with `--pkgimages=existing` we should only generate code for Child
+        log = load_package("Parent", `--compiled-modules=yes --pkgimages=existing`)
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test !occursin(r"Generating object cache file for Parent", log)
+        @test !occursin(r"Loading object cache file .+ for Parent", log)
+
+        # the default is `--pkgimages=yes`, which should now generate code for Parent
+        log = load_package("Parent")
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test occursin(r"Generating object cache file for Parent", log)
+        @test occursin(r"Loading object cache file .+ for Parent", log)
+    end end
+end
+
+@testset "including non-existent file throws proper error #52462" begin
+    mktempdir() do depot
+        project_path = joinpath(depot, "project")
+        mkpath(project_path)
+
+        # Create a `Foo.jl` package
+        foo_path = joinpath(depot, "dev", "Foo52462")
+        mkpath(joinpath(foo_path, "src"))
+        open(joinpath(foo_path, "src", "Foo52462.jl"); write=true) do io
+            println(io, """
+            module Foo52462
+            include("non-existent.jl")
+            end
+            """)
+        end
+        open(joinpath(foo_path, "Project.toml"); write=true) do io
+            println(io, """
+            name = "Foo52462"
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+        end
+
+        file = joinpath(depot, "dev", "non-existent.jl")
+        @test try
+            include(file); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file $(repr(file))"
+            true
+        end
+        touch(file)
+        @test include_dependency(file) === nothing
+        chmod(file, 0x000)
+
+        # same for include_dependency: #52063
+        dir = mktempdir() do dir
+            @test include_dependency(dir) === nothing
+            dir
+        end
+        @test try
+            include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end
+    end
+end
+
+@testset "-m" begin
+    rot13proj = joinpath(@__DIR__, "project", "Rot13")
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13 --project nowhere ABJURER`) == "--cebwrpg abjurer NOWHERE "
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13.Rot26 --project nowhere ABJURER`) == "--project nowhere ABJURER "
+end
+
+@testset "workspace loading" begin
+   old_load_path = copy(LOAD_PATH)
+   try
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject"))
+       @test Base.get_preferences()["value"] == 1
+       @test Base.get_preferences()["x"] == 1
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "sub"))
+       id = Base.identify_package("Devved")
+       @test isfile(Base.locate_package(id))
+       @test Base.identify_package("Devved2") === nothing
+       id3 = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id3))
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 2
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "PackageThatIsSub", "test"))
+       id_pkg = Base.identify_package("PackageThatIsSub")
+       @test isfile(Base.locate_package(id_pkg))
+       @test Base.identify_package(id_pkg, "Devved") === nothing
+       id_dev2 = Base.identify_package(id_pkg, "Devved2")
+       @test isfile(Base.locate_package(id_dev2))
+       id_mypkg = Base.identify_package("MyPkg")
+       @test isfile(Base.locate_package(id_mypkg))
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.get_preferences()["value"] == 3
+       @test Base.get_preferences()["x"] == 1
+       @test Base.get_preferences()["y"] == 2
+       @test Base.get_preferences()["z"] == 3
+
+       empty!(LOAD_PATH)
+       push!(LOAD_PATH, joinpath(@__DIR__, "project", "SubProject", "test"))
+       id_mypkg = Base.identify_package("MyPkg")
+       id_dev = Base.identify_package(id_mypkg, "Devved")
+       @test isfile(Base.locate_package(id_dev))
+       @test Base.identify_package("Devved2") === nothing
+
+    finally
+       copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "project path handling" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "ProjectPath"))
+        id_project = Base.identify_package("ProjectPath")
+        Base.locate_package(id_project)
+        @test Base.locate_package(id_project) == joinpath(@__DIR__, "project", "ProjectPath", "CustomPath.jl")
+
+        id_dep = Base.identify_package("ProjectPathDep")
+        @test Base.locate_package(id_dep) == joinpath(@__DIR__, "project", "ProjectPath", "ProjectPathDep", "CustomPath.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "extension path computation name collision" begin
+    old_load_path = copy(LOAD_PATH)
+    try
+        empty!(LOAD_PATH)
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_A"))
+        push!(LOAD_PATH, joinpath(@__DIR__, "project", "Extensions", "ExtNameCollision_B"))
+        ext_B = Base.PkgId(Base.uuid5(Base.identify_package("ExtNameCollision_B").uuid, "REPLExt"), "REPLExt")
+        @test Base.locate_package(ext_B) == joinpath(@__DIR__, "project",  "Extensions", "ExtNameCollision_B", "ext", "REPLExt.jl")
+    finally
+        copy!(LOAD_PATH, old_load_path)
+    end
+end
+
+@testset "require_stdlib loading duplication" begin
+    depot_path = mktempdir()
+    oldBase64 = nothing
+    try
+        push!(empty!(DEPOT_PATH), depot_path)
+        Base64_key = Base.PkgId(Base.UUID("2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"), "Base64")
+        oldBase64 = Base.unreference_module(Base64_key)
+        cc = Base.compilecache(Base64_key)
+        @test Base.isprecompiled(Base64_key, cachepaths=String[cc[1]])
+        empty!(DEPOT_PATH)
+        Base.require_stdlib(Base64_key)
+        push!(DEPOT_PATH, depot_path)
+        append!(DEPOT_PATH, original_depot_path)
+        oldloaded = @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[])))
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded
+        Base.unreference_module(Base64_key)
+        empty!(DEPOT_PATH)
+        push!(DEPOT_PATH, depot_path)
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded + 1
+        Base.unreference_module(Base64_key)
+    finally
+        oldBase64 === nothing || Base.register_root_module(oldBase64)
+        copy!(DEPOT_PATH, original_depot_path)
+        rm(depot_path, force=true, recursive=true)
+    end
 end
diff --git a/test/math.jl b/test/math.jl
index 19d9f7893a496..7070fe63ba931 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -23,29 +23,45 @@ has_fma = Dict(
 )
 
 @testset "clamp" begin
-    @test clamp(0, 1, 3) == 1
-    @test clamp(1, 1, 3) == 1
-    @test clamp(2, 1, 3) == 2
-    @test clamp(3, 1, 3) == 3
-    @test clamp(4, 1, 3) == 3
-
-    @test clamp(0.0, 1, 3) == 1.0
-    @test clamp(1.0, 1, 3) == 1.0
-    @test clamp(2.0, 1, 3) == 2.0
-    @test clamp(3.0, 1, 3) == 3.0
-    @test clamp(4.0, 1, 3) == 3.0
-
-    @test clamp.([0, 1, 2, 3, 4], 1.0, 3.0) == [1.0, 1.0, 2.0, 3.0, 3.0]
-    @test clamp.([0 1; 2 3], 1.0, 3.0) == [1.0 1.0; 2.0 3.0]
-
-    @test clamp(-200, Int8) === typemin(Int8)
-    @test clamp(100, Int8) === Int8(100)
-    @test clamp(200, Int8) === typemax(Int8)
-
-    begin
-        x = [0.0, 1.0, 2.0, 3.0, 4.0]
-        clamp!(x, 1, 3)
-        @test x == [1.0, 1.0, 2.0, 3.0, 3.0]
+    let
+        @test clamp(0, 1, 3) == 1
+        @test clamp(1, 1, 3) == 1
+        @test clamp(2, 1, 3) == 2
+        @test clamp(3, 1, 3) == 3
+        @test clamp(4, 1, 3) == 3
+
+        @test clamp(0.0, 1, 3) == 1.0
+        @test clamp(1.0, 1, 3) == 1.0
+        @test clamp(2.0, 1, 3) == 2.0
+        @test clamp(3.0, 1, 3) == 3.0
+        @test clamp(4.0, 1, 3) == 3.0
+
+        @test clamp.([0, 1, 2, 3, 4], 1.0, 3.0) == [1.0, 1.0, 2.0, 3.0, 3.0]
+        @test clamp.([0 1; 2 3], 1.0, 3.0) == [1.0 1.0; 2.0 3.0]
+
+        @test clamp(-200, Int8) === typemin(Int8)
+        @test clamp(100, Int8) === Int8(100)
+        @test clamp(200, Int8) === typemax(Int8)
+
+        begin
+            x = [0.0, 1.0, 2.0, 3.0, 4.0]
+            clamp!(x, 1, 3)
+            @test x == [1.0, 1.0, 2.0, 3.0, 3.0]
+        end
+
+        @test clamp(typemax(UInt64), Int64) === typemax(Int64)
+        @test clamp(typemin(Int), UInt64) === typemin(UInt64)
+        @test clamp(Int16(-1), UInt16) === UInt16(0)
+        @test clamp(-1, 2, UInt(0)) === UInt(2)
+        @test clamp(typemax(UInt16), Int16) === Int16(32767)
+
+        # clamp should not allocate a BigInt for typemax(Int16)
+        x = big(2) ^ 100
+        @test (@allocated clamp(x, Int16)) == 0
+
+        x = clamp(2.0, BigInt)
+        @test x isa BigInt
+        @test x == big(2)
     end
 end
 
@@ -188,6 +204,7 @@ end
             @test exp10(x) ≈ exp10(big(x))
             @test exp2(x) ≈ exp2(big(x))
             @test expm1(x) ≈ expm1(big(x))
+            @test expm1(T(-1.1)) ≈ expm1(big(T(-1.1)))
             @test hypot(x,y) ≈ hypot(big(x),big(y))
             @test hypot(x,x,y) ≈ hypot(hypot(big(x),big(x)),big(y))
             @test hypot(x,x,y,y) ≈ hypot(hypot(big(x),big(x)),hypot(big(y),big(y)))
@@ -365,6 +382,10 @@ end
     end
 end
 
+@testset "https://github.com/JuliaLang/julia/issues/56782" begin
+    @test isnan(exp(reinterpret(Float64, 0x7ffbb14880000000)))
+end
+
 @testset "test abstractarray trig functions" begin
     TAA = rand(2,2)
     TAA = (TAA + TAA')/2.
@@ -870,14 +891,14 @@ end
 end
 
 @testset "isapprox" begin
-  # #22742: updated isapprox semantics
-  @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
-  @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
-  @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
+    # #22742: updated isapprox semantics
+    @test !isapprox(1.0, 1.0+1e-12, atol=1e-14)
+    @test isapprox(1.0, 1.0+0.5*sqrt(eps(1.0)))
+    @test !isapprox(1.0, 1.0+1.5*sqrt(eps(1.0)), atol=sqrt(eps(1.0)))
 
-  # #13132: Use of `norm` kwarg for scalar arguments
-  @test isapprox(1, 1+1.0e-12, norm=abs)
-  @test !isapprox(1, 1+1.0e-12, norm=x->1)
+    # #13132: Use of `norm` kwarg for scalar arguments
+    @test isapprox(1, 1+1.0e-12, norm=abs)
+    @test !isapprox(1, 1+1.0e-12, norm=x->1)
 end
 
 # test AbstractFloat fallback pr22716
@@ -1097,7 +1118,7 @@ pcnfloat(x) = prevfloat(x), x, nextfloat(x)
 import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
 
 @testset "sinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test sinh(zero(T)) === zero(T)
         @test sinh(-zero(T)) === -zero(T)
         @test sinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1105,15 +1126,17 @@ import Base.Math: COSH_SMALL_X, H_SMALL_X, H_MEDIUM_X, H_LARGE_X
         @test sinh(T(1000)) === T(Inf)
         @test sinh(-T(1000)) === -T(Inf)
         @test isnan_type(T, sinh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
-            @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test sinh(x) ≈ sinh(big(x)) rtol=eps(T)
+                @test sinh(-x) ≈ sinh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "cosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test cosh(zero(T)) === one(T)
         @test cosh(-zero(T)) === one(T)
         @test cosh(nextfloat(zero(T))) === one(T)
@@ -1121,15 +1144,17 @@ end
         @test cosh(T(1000)) === T(Inf)
         @test cosh(-T(1000)) === T(Inf)
         @test isnan_type(T, cosh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
-            @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
-            @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([COSH_SMALL_X(T), H_MEDIUM_X(T), H_LARGE_X(T)]))
+                @test cosh(x) ≈ cosh(big(x)) rtol=eps(T)
+                @test cosh(-x) ≈ cosh(big(-x)) rtol=eps(T)
+            end
         end
     end
 end
 
 @testset "tanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test tanh(zero(T)) === zero(T)
         @test tanh(-zero(T)) === -zero(T)
         @test tanh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1137,9 +1162,11 @@ end
         @test tanh(T(1000)) === one(T)
         @test tanh(-T(1000)) === -one(T)
         @test isnan_type(T, tanh(T(NaN)))
-        for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
-            @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
-            @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+        if T ∈ (Float32, Float64)
+            for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
+                @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
+                @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
+            end
         end
     end
     @test tanh(18.0) ≈ tanh(big(18.0)) rtol=eps(Float64)
@@ -1147,7 +1174,7 @@ end
 end
 
 @testset "asinh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test asinh(zero(T)) === zero(T)
         @test asinh(-zero(T)) === -zero(T)
         @test asinh(nextfloat(zero(T))) === nextfloat(zero(T))
@@ -1161,7 +1188,7 @@ end
 end
 
 @testset "acosh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError acosh(T(0.1))
         @test acosh(one(T)) === zero(T)
         @test isnan_type(T, acosh(T(NaN)))
@@ -1172,7 +1199,7 @@ end
 end
 
 @testset "atanh" begin
-    for T in (Float32, Float64)
+    for T in (Float16, Float32, Float64)
         @test_throws DomainError atanh(T(1.1))
         @test atanh(zero(T)) === zero(T)
         @test atanh(-zero(T)) === -zero(T)
@@ -1354,6 +1381,16 @@ end
     # hypot on Complex returns Real
     @test (@inferred hypot(3, 4im)) === 5.0
     @test (@inferred hypot(3, 4im, 12)) === 13.0
+    @testset "promotion, issue #53505" begin
+        @testset "Int,$T" for T in (Float16, Float32, Float64, BigFloat)
+            for args in ((3, 4), (3, 4, 12))
+                for i in eachindex(args)
+                    targs = ntuple(j -> (j == i) ? T(args[j]) : args[j], length(args))
+                    @test (@inferred hypot(targs...)) isa float(eltype(promote(targs...)))
+                end
+            end
+        end
+    end
 end
 
 struct BadFloatWrapper <: AbstractFloat
@@ -1448,6 +1485,47 @@ end
     # two cases where we have observed > 1 ULP in the past
     @test 0.0013653274095082324^-97.60372292227069 == 4.088393948750035e279
     @test 8.758520413376658e-5^70.55863059215994 == 5.052076767078296e-287
+
+    # issue #53881
+    c53881 = 2.2844135865398217e222 # check correctness within 2 ULPs
+    @test prevfloat(1.0) ^ -Int64(2)^62 ≈ c53881 atol=2eps(c53881)
+    @test 2.0 ^ typemin(Int) == 0.0
+    @test (-1.0) ^ typemin(Int) == 1.0
+    Z = Int64(2)
+    E = prevfloat(1.0)
+    @test E ^ (-Z^54) ≈ 7.38905609893065
+    @test E ^ (-Z^62) ≈ 2.2844135865231613e222
+    @test E ^ (-Z^63) == Inf
+    @test abs(E ^ (Z^62-1) * E ^ (-Z^62+1) - 1) <= eps(1.0)
+    n, x = -1065564664, 0.9999997040311492
+    @test abs(x^n - Float64(big(x)^n)) / eps(x^n) == 0 # ULPs
+    @test E ^ (big(2)^100 + 1) == 0
+    @test E ^ 6705320061009595392 == nextfloat(0.0)
+    n = Int64(1024 / log2(E))
+    @test E^n == Inf
+    @test E^float(n) == Inf
+
+    # #55633
+    struct Issue55633_1 <: Number end
+    struct Issue55633_3 <: Number end
+    struct Issue55633_9 <: Number end
+    Base.one(::Issue55633_3) = Issue55633_1()
+    Base.:(*)(::Issue55633_3, ::Issue55633_3) = Issue55633_9()
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_3}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_9}, ::Type{Int}) = Int
+    Base.convert(::Type{Int}, ::Issue55633_1) = 1
+    Base.convert(::Type{Int}, ::Issue55633_3) = 3
+    Base.convert(::Type{Int}, ::Issue55633_9) = 9
+    for x ∈ (im, pi, Issue55633_3())
+        p = promote(one(x), x, x*x)
+        for y ∈ 0:2
+            @test all((t -> ===(t...)), zip(x^y, p[y + 1]))
+        end
+    end
 end
 
 # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
@@ -1536,25 +1614,77 @@ function f44336()
     @inline hypot(as...)
 end
 @testset "Issue #44336" begin
-    f44336()
-    @test (@allocated f44336()) == 0
+    let
+        f44336()
+        @test (@allocated f44336()) == 0
+    end
 end
 
-# test constant-foldability
-for fn in (:sin, :cos, :tan, :log, :log2, :log10, :log1p, :exponent, :sqrt, :cbrt, :fourthroot,
-           :asin, :atan, :acos, :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
-           :exp, :exp2, :exp10, :expm1
-           )
-    for T in (Float16, Float32, Float64)
-        f = getfield(@__MODULE__, fn)
-        eff = Base.infer_effects(f, (T,))
-        @test Core.Compiler.is_foldable(eff)
+@testset "constant-foldability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f,
+                             rt = Base.infer_return_type(f, (T,)),
+                             effects = Base.infer_effects(f, (T,))
+                    @test rt != Union{}
+                    @test Core.Compiler.is_foldable(effects)
+                end
+            end
+            @testset let effects = Base.infer_effects(^, (T,Int))
+                @test Core.Compiler.is_foldable(effects)
+            end
+            @testset let effects = Base.infer_effects(^, (T,T))
+                @test Core.Compiler.is_foldable(effects)
+            end
+        end
     end
-end
-for T in (Float16, Float32, Float64)
-    for f in (exp, exp2, exp10)
-        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(f, (T,)))
+end;
+@testset "removability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Core.Compiler.is_removable_if_unused(Base.infer_effects(f, (T,)))
+                end
+            end
+        end
+    end
+end;
+@testset "exception type inference of core math functions" begin
+    MathErrorT = Union{DomainError, InexactError}
+    for T = (Float16, Float32, Float64)
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Base.infer_exception_type(f, (T,)) <: MathErrorT
+                end
+            end
+            @test Base.infer_exception_type(^, (T,Int)) <: MathErrorT
+            @test Base.infer_exception_type(^, (T,T)) <: MathErrorT
+        end
+    end
+end;
+@test Base.infer_return_type((Int,)) do x
+    local r = nothing
+    try
+        r = sin(x)
+    catch err
+        if err isa DomainError
+            r = 0.0
+        end
     end
-    @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,Int)))
-    @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,T)))
+    return r
+end === Float64
+
+@testset "BigInt Rationals with special funcs" begin
+    @test sinpi(big(1//1)) == big(0.0)
+    @test tanpi(big(1//1)) == big(0.0)
+    @test cospi(big(1//1)) == big(-1.0)
+end
+
+@testset "Docstrings" begin
+    @test isempty(Docs.undocumented_names(MathConstants))
 end
diff --git a/test/meta.jl b/test/meta.jl
index 36a8acbfe08dd..e9e344bba2e22 100644
--- a/test/meta.jl
+++ b/test/meta.jl
@@ -43,77 +43,70 @@ end
 @test foundfunc(h_inlined(), :g_inlined)
 @test foundfunc(h_noinlined(), :g_noinlined)
 
-using Base: pushmeta!, popmeta!
+using Base: popmeta!
 
-macro attach(val, ex)
-    esc(_attach(val, ex))
+macro attach_meta(val, ex)
+    esc(_attach_meta(val, ex))
 end
+_attach_meta(val, ex) = Base.pushmeta!(ex, Expr(:test, val))
 
-_attach(val, ex) = pushmeta!(ex, :test, val)
-
-@attach 42 function dummy()
+@attach_meta 42 function dummy()
     false
 end
-
-asts = code_lowered(dummy, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+let ast = only(code_lowered(dummy, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # Simple popmeta!() tests
-ex1 = quote
-    $(Expr(:meta, :foo))
-    x*x+1
+let ex1 = quote
+        $(Expr(:meta, :foo))
+        x*x+1
+    end
+    @test popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :bar)[1]
+    @test !(popmeta!(:(x*x+1), :foo)[1])
 end
-@test popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :bar)[1]
-@test !(popmeta!(:(x*x+1), :foo)[1])
 
 # Find and pop meta information from general ast locations
-multi_meta = quote
-    $(Expr(:meta, :foo1))
-    y = x
-    $(Expr(:meta, :foo2, :foo3))
-    begin
-        $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+let multi_meta = quote
+        $(Expr(:meta, :foo1))
+        y = x
+        $(Expr(:meta, :foo2, :foo3))
+        begin
+            $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+        end
+        x*x+1
     end
-    x*x+1
-end
-@test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
-@test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
-
-# Test that popmeta!() removes meta blocks entirely when they become empty.
-for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
-    @test popmeta!(multi_meta, m)[1]
+    @test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
+    @test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
+
+    # Test that popmeta!() removes meta blocks entirely when they become empty.
+    ast = :(dummy() = $multi_meta)
+    for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
+        @test popmeta!(multi_meta, m)[1]
+    end
+    @test Base.findmeta(ast)[1] == 0
 end
-@test Base.findmeta(multi_meta.args)[1] == 0
 
 # Test that pushmeta! can push across other macros,
 # in the case multiple pushmeta!-based macros are combined
-
-@attach 40 @attach 41 @attach 42 dummy_multi() = return nothing
-
-asts = code_lowered(dummy_multi, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [40])
-@test popmeta!(body, :test) == (true, [41])
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+@attach_meta 40 @attach_meta 41 @attach_meta 42 dummy_multi() = return nothing
+let ast = only(code_lowered(dummy_multi, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [40])
+    @test popmeta!(body, :test) == (true, [41])
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # tests to fully cover functions in base/meta.jl
 using Base.Meta
@@ -254,14 +247,14 @@ end
 f(::T) where {T} = T
 ci = code_lowered(f, Tuple{Int})[1]
 @test Meta.partially_inline!(ci.code, [], Tuple{typeof(f),Int}, Any[Int], 0, 0, :propagate) ==
-    Any[Core.ReturnNode(QuoteNode(Int))]
+    Any[QuoteNode(Int), Core.ReturnNode(Core.SSAValue(1))]
 
 g(::Val{x}) where {x} = x ? 1 : 0
 ci = code_lowered(g, Tuple{Val{true}})[1]
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 3)
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 5)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(1), 4)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(3), 6)
 
 @testset "inlining with isdefined" begin
     isdefined_slot(x) = @isdefined(x)
@@ -281,6 +274,15 @@ ci = code_lowered(g, Tuple{Val{true}})[1]
     @eval isdefined_globalref(x) = $(Expr(:isdefined, GlobalRef(Base, :foo)))
     ci = code_lowered(isdefined_globalref, Tuple{Int})[1]
     @test Meta.partially_inline!(copy(ci.code), Any[isdefined_globalref, 1], Tuple{typeof(isdefined_globalref), Int},
-                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, GlobalRef(Base, :foo))
+                                 [], 0, 0, :propagate)[1] == Expr(:call, GlobalRef(Core, :isdefinedglobal), Base, QuoteNode(:foo))
+
+    withunreachable(s::String) = sin(s)
+    ci = code_lowered(withunreachable, Tuple{String})[1]
+    ci.code[end] = Core.ReturnNode()
+    @test Meta.partially_inline!(copy(ci.code), Any[withunreachable, "foo"], Tuple{typeof(withunreachable), String},
+                                 [], 0, 0, :propagate)[end] == Core.ReturnNode()
+end
 
+@testset "Base.Meta docstrings" begin
+    @test isempty(Docs.undocumented_names(Meta))
 end
diff --git a/test/misc.jl b/test/misc.jl
index 79b684badf1e0..7070fd49f5f36 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -129,6 +129,46 @@ let l = ReentrantLock()
     @test_throws ErrorException unlock(l)
 end
 
+# Lockable{T, L<:AbstractLock}
+using Base: Lockable
+let
+    @test Base.isexported(Base, :Lockable)
+    lockable = Lockable(Dict("foo" => "hello"), ReentrantLock())
+    # note field access is non-public
+    @test lockable.value["foo"] == "hello"
+    @test @lock(lockable, lockable[]["foo"]) == "hello"
+    lock(lockable) do d
+        @test d["foo"] == "hello"
+    end
+    lock(lockable) do d
+        d["foo"] = "goodbye"
+    end
+    @test lockable.value["foo"] == "goodbye"
+    @lock lockable begin
+        @test lockable[]["foo"] == "goodbye"
+    end
+    l = trylock(lockable)
+    try
+        @test l
+    finally
+        unlock(lockable)
+    end
+    # Test 1-arg constructor
+    lockable2 = Lockable(Dict("foo" => "hello"))
+    @test lockable2.lock isa ReentrantLock
+    @test @lock(lockable2, lockable2[]["foo"]) == "hello"
+end
+
+@testset "`show` for ReentrantLock" begin
+    l = ReentrantLock()
+    @test repr(l) == "ReentrantLock()"
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+    @lock l begin
+        @test startswith(repr("text/plain", l), "ReentrantLock() (locked by current Task (")
+    end
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+end
+
 for l in (Threads.SpinLock(), ReentrantLock())
     @test get_finalizers_inhibited() == 0
     @test lock(get_finalizers_inhibited, l) == 1
@@ -221,12 +261,14 @@ let c = Ref(0),
     @test c[] == 100
 end
 
-@test_throws ErrorException("deadlock detected: cannot wait on current task") wait(current_task())
+@test_throws ConcurrencyViolationError("deadlock detected: cannot wait on current task") wait(current_task())
+
+@test_throws ConcurrencyViolationError("Cannot yield to currently running task!") yield(current_task())
 
 # issue #41347
 let t = @async 1
     wait(t)
-    @test_throws ErrorException yield(t)
+    @test_throws ConcurrencyViolationError yield(t)
 end
 
 let t = @async error(42)
@@ -265,6 +307,9 @@ let
     stats = @timed sin(1)
     @test stats.value == sin(1)
     @test isa(stats.time, Real) && stats.time >= 0
+    @test isa(stats.compile_time, Real) && stats.compile_time >= 0
+    @test isa(stats.recompile_time, Real) && stats.recompile_time >= 0
+    @test stats.compile_time <= stats.time
 
     # The return type of gcstats was changed in Julia 1.4 (# 34147)
     # Test that the 1.0 API still works
@@ -286,25 +331,43 @@ v11801, t11801 = @timed sin(1)
 @test names(@__MODULE__, all = true) == names_before_timing
 
 redirect_stdout(devnull) do # suppress time prints
+
 # Accepted @time argument formats
 @test @time true
 @test @time "message" true
+@test @time 1 true
 let msg = "message"
     @test @time msg true
 end
 let foo() = "message"
     @test @time foo() true
 end
+let foo() = 1
+    @test @time foo() true
+end
 
 # Accepted @timev argument formats
 @test @timev true
 @test @timev "message" true
+@test @timev 1 true
 let msg = "message"
     @test @timev msg true
 end
 let foo() = "message"
     @test @timev foo() true
 end
+let foo() = 1
+    @test @timev foo() true
+end
+
+# this is internal, but used for easy testing
+@test sprint(Base.time_print, 1e9) == "  1.000000 seconds"
+@test sprint(Base.time_print, 1e9, 111, 0, 222) == "  1.000000 seconds (222 allocations: 111 bytes)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 0, 0, 0, 333) == "  1.000000 seconds (333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333, 0.25e9) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts, 25.00% compilation time)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222, 333, 0.25e9, 0.175e9) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time, 333 lock conflicts, 25.00% compilation time: 70% of which was recompilation)"
 
 # @showtime
 @test @showtime true
@@ -497,7 +560,7 @@ struct ambigconvert; end # inject a problematic `convert` method to ensure it st
 Base.convert(::Any, v::ambigconvert) = v
 
 import Base.summarysize
-@test summarysize(Core) > (summarysize(Core.Compiler) + Base.summarysize(Core.Intrinsics)) > Core.sizeof(Core)
+@test summarysize(Core) > Base.summarysize(Core.Intrinsics) > Core.sizeof(Core)
 @test summarysize(Base) > 100_000 * sizeof(Ptr)
 
 let R = Ref{Any}(nothing), depth = 10^6
@@ -538,6 +601,24 @@ end
 # issue #44780
 @test summarysize(BigInt(2)^1000) > summarysize(BigInt(2))
 
+# issue #53061
+mutable struct S53061
+    x::Union{Float64, Tuple{Float64, Float64}}
+    y::Union{Float64, Tuple{Float64, Float64}}
+end
+let s = S53061[S53061(rand(), (rand(),rand())) for _ in 1:10^4]
+    @test allequal(summarysize(s) for i in 1:10)
+end
+struct Z53061
+    x::S53061
+    y::Int64
+end
+let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4]
+    @test allequal(summarysize(z) for i in 1:10)
+    # broken on i868 linux. issue #54895
+    @test abs(summarysize(z) - 640000)/640000 <= 0.01 broken = Sys.WORD_SIZE == 32 && Sys.islinux()
+end
+
 ## test conversion from UTF-8 to UTF-16 (for Windows APIs)
 
 # empty arrays
@@ -1058,7 +1139,7 @@ Base.setindex!(xs::InvokeXs2, @nospecialize(v::Any), idx::Int) = xs.xs[idx] = v
         @test @invoke(f2(1::Real)) === Integer
     end
 
-    # when argment's type annotation is omitted, it should be specified as `Core.Typeof(x)`
+    # when argument's type annotation is omitted, it should be specified as `Core.Typeof(x)`
     let f(_) = Any
         f(x::Integer) = Integer
         @test f(1) === Integer
@@ -1130,10 +1211,7 @@ include("testenv.jl")
 
 let flags = Cmd(filter(a->!occursin("depwarn", a), collect(test_exeflags)))
     local cmd = `$test_exename $flags --depwarn=yes deprecation_exec.jl`
-
-    if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-        error("Deprecation test failed, cmd : $cmd")
-    end
+    run(cmd, devnull)
 end
 
 # PR #23664, make sure names don't get added to the default `Main` workspace
@@ -1284,10 +1362,56 @@ end
     end
 end
 
+module KwdefWithEsc
+    const Int1 = Int
+    const val1 = 42
+    macro define_struct()
+        quote
+            @kwdef struct $(esc(:Struct))
+                a
+                b = val1
+                c::Int1
+                d::Int1 = val1
+
+                $(esc(quote
+                    e
+                    f = val2
+                    g::Int2
+                    h::Int2 = val2
+                end))
+
+                $(esc(:(i = val2)))
+                $(esc(:(j::Int2)))
+                $(esc(:(k::Int2 = val2)))
+
+                l::$(esc(:Int2))
+                m::$(esc(:Int2)) = val1
+
+                n = $(esc(:val2))
+                o::Int1 = $(esc(:val2))
+
+                $(esc(:p))
+                $(esc(:q)) = val1
+                $(esc(:s))::Int1
+                $(esc(:t))::Int1 = val1
+            end
+        end
+    end
+end
+
+module KwdefWithEsc_TestModule
+    using ..KwdefWithEsc
+    const Int2 = Int
+    const val2 = 42
+    KwdefWithEsc.@define_struct()
+end
+@test isdefined(KwdefWithEsc_TestModule, :Struct)
+
 @testset "exports of modules" begin
-    for (_, mod) in Base.loaded_modules
+    @testset "$mod" for (_, mod) in Base.loaded_modules
         mod === Main && continue # Main exports everything
-        for v in names(mod)
+        @testset "$v" for v in names(mod)
+            isdefined(mod, v) || @error "missing $v in $mod"
             @test isdefined(mod, v)
         end
     end
@@ -1300,6 +1424,11 @@ end
     @test sort([a, b]) == [b, a]
 end
 
+@testset "UUID display" begin
+    a = Base.UUID("dbd321ed-e87e-4f33-9511-65b7d01cdd55")
+    @test repr(a) == "$(Base.UUID)(\"dbd321ed-e87e-4f33-9511-65b7d01cdd55\")"
+end
+
 @testset "Libc.rand" begin
     low, high = extrema(Libc.rand(Float64) for i=1:10^4)
     # these fail with probability 2^(-10^4) ≈ 5e-3011
@@ -1335,8 +1464,10 @@ end
         open(tmppath, "w") do tmpio
             redirect_stderr(tmpio) do
                 GC.enable_logging(true)
+                @test GC.logging_enabled()
                 GC.gc()
                 GC.enable_logging(false)
+                @test !GC.logging_enabled()
             end
         end
         @test occursin("GC: pause", read(tmppath, String))
@@ -1353,9 +1484,9 @@ end
 end
 
 # Test that read fault on a prot-none region does not incorrectly give
-# ReadOnlyMemoryEror, but rather crashes the program
+# ReadOnlyMemoryError, but rather crashes the program
 const MAP_ANONYMOUS_PRIVATE = Sys.isbsd() ? 0x1002 : 0x22
-let script = :(
+let script = """
         let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
                                   (Ptr{Cvoid}, Csize_t, Cint, Cint, Cint, Int),
                                   C_NULL, 16*1024, 0, $MAP_ANONYMOUS_PRIVATE, -1, 0))
@@ -1365,27 +1496,62 @@ let script = :(
                 println(e)
             end
         end
-    )
+    """
     cmd = if Sys.isunix()
         # Set the maximum core dump size to 0 to keep this expected crash from
         # producing a (and potentially overwriting an existing) core dump file
-        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e '$script'"`
+        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e $(Base.shell_escape(script))"`
+    else
+        `$(Base.julia_cmd()) -e $script`
+    end
+    p = run(ignorestatus(cmd), devnull, stdout, devnull)
+    if p.termsignal == 0
+        Sys.isunix() ? @test(p.exitcode ∈ (128+7, 128+10, 128+11)) : @test(p.exitcode != 0) # expect SIGBUS (7 on BSDs or 10 on Linux) or SIGSEGV (11)
     else
-        `$(Base.julia_cmd()) -e '$script'`
+        @test(p.termsignal ∈ (7, 10, 11))
     end
-    @test !success(cmd)
 end
 
 # issue #41656
-@test success(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
+run(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
 
 @testset "Base/timing.jl" begin
     @test Base.jit_total_bytes() >= 0
 
-    # sanity check `@allocations` returns what we expect in some very simple cases
-    @test (@allocations "a") == 0
-    @test (@allocations "a" * "b") == 0 # constant propagation
-    @test (@allocations "a" * Base.inferencebarrier("b")) == 1
+    # sanity check `@allocations` returns what we expect in some very simple cases.
+    # These are inside functions because `@allocations` uses `Experimental.@force_compile`
+    # so can be affected by other code in the same scope.
+    @test (() -> @allocations "a")() == 0
+    @test (() -> @allocations "a" * "b")() == 0 # constant propagation
+    @test (() -> @allocations "a" * Base.inferencebarrier("b"))() == 1
+
+    _lock_conflicts, _nthreads = eval(Meta.parse(read(`$(Base.julia_cmd()) -tauto -E '
+        _lock_conflicts = @lock_conflicts begin
+            l = ReentrantLock()
+            Threads.@threads for i in 1:Threads.nthreads()
+                 lock(l) do
+                    sleep(1)
+                end
+            end
+        end
+        _lock_conflicts,Threads.nthreads()
+    '`, String)))
+    @test _lock_conflicts > 0 skip=(_nthreads < 2) # can only test if the worker can multithread
+end
+
+#TODO: merge with `@testset "Base/timing.jl"` once https://github.com/JuliaLang/julia/issues/52948 is resolved
+@testset "Base/timing.jl2" begin
+    # Test the output of `format_bytes()`
+    inputs = [(factor * (Int64(1000)^e),binary) for binary in (false,true), factor in (1,2), e in 0:6][:]
+    expected_output = ["1 byte", "1 byte", "2 bytes", "2 bytes", "1000 bytes", "1000 bytes", "2.000 kB", "1.953 KiB",
+                        "1000.000 kB", "976.562 KiB", "2.000 MB", "1.907 MiB", "1000.000 MB", "953.674 MiB",
+                        "2.000 GB", "1.863 GiB", "1000.000 GB", "931.323 GiB", "2.000 TB", "1.819 TiB",
+                        "1000.000 TB", "909.495 TiB", "2.000 PB", "1.776 PiB", "1000.000 PB", "888.178 PiB",
+                        "2000.000 PB", "1776.357 PiB"]
+
+    for ((n, binary), expected) in zip(inputs, expected_output)
+        @test Base.format_bytes(n; binary) == expected
+    end
 end
 
 @testset "in_finalizer" begin
@@ -1402,3 +1568,59 @@ end
     GC.gc(true); yield()
     @test in_fin[]
 end
+
+@testset "Base docstrings" begin
+    undoc = Docs.undocumented_names(Base)
+    @test_broken isempty(undoc)
+    @test undoc == [:BufferStream, :CanonicalIndexError, :CapturedException, :Filesystem, :IOServer, :InvalidStateException, :Order, :PipeEndpoint, :ScopedValues, :Sort, :TTY]
+end
+
+@testset "Base.Libc docstrings" begin
+    @test isempty(Docs.undocumented_names(Libc))
+end
+
+@testset "Silenced missed transformations" begin
+    # Ensure the WarnMissedTransformationsPass is not on by default
+    src = """
+        @noinline iteration(i) = (@show(i); return nothing)
+        @eval function loop_unroll_full_fail(N)
+            for i in 1:N
+              iteration(i)
+              \$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"), 1)))
+          end
+       end
+       loop_unroll_full_fail(3)
+    """
+    out_err = mktemp() do _, f
+        run(`$(Base.julia_cmd()) -e "$src"`, devnull, devnull, f)
+        seekstart(f)
+        read(f, String)
+    end
+    @test !occursin("loop not unrolled", out_err)
+end
+
+let errs = IOBuffer()
+    run(`$(Base.julia_cmd()) -e '
+        using Test
+        @test isdefined(DataType.name.mt, :backedges)
+        Base.Experimental.disable_new_worlds()
+        @test_throws "disable_new_worlds" @eval f() = 1
+        @test !isdefined(DataType.name.mt, :backedges)
+        @test_throws "disable_new_worlds" Base.delete_method(which(+, (Int, Int)))
+        @test 1+1 == 2
+        using Dates
+        '`, devnull, stdout, errs)
+    @test occursin("disable_new_worlds", String(take!(errs)))
+end
+
+@testset "`@constprop`, `@assume_effects` handling of an unknown setting" begin
+    for x ∈ ("constprop", "assume_effects")
+        try
+            eval(Meta.parse("Base.@$x :unknown f() = 3"))
+            error("unexpectedly reached")
+        catch e
+            e::LoadError
+            @test e.error isa ArgumentError
+        end
+    end
+end
diff --git a/test/missing.jl b/test/missing.jl
index 36155eb32fe49..f588b2dabe904 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -596,7 +596,7 @@ end
     @test @coalesce(missing) === missing
 
     @test @coalesce(1, error("failed")) === 1
-    @test_throws ErrorException @coalesce(missing, error("failed"))
+    @test_throws ErrorException("failed") @coalesce(missing, error("failed"))
 end
 
 mutable struct Obj; x; end
@@ -615,8 +615,7 @@ mutable struct Obj; x; end
 end
 
 @testset "showerror missing function" begin
-    me = try missing(1) catch e e end
-    @test sprint(showerror, me) == "MethodError: objects of type Missing are not callable"
+    @test_throws "MethodError: objects of type Missing are not callable" missing(1)
 end
 
 @testset "sort and sortperm with $(eltype(X))" for (X, P, RP) in
@@ -651,3 +650,29 @@ for func in (round, ceil, floor, trunc)
         @test Core.Compiler.is_foldable(Base.infer_effects(func, (Type{Int},Union{Int,Missing})))
     end
 end
+
+@testset "Custom Missing type" begin
+    struct NewMissing end
+    Base.ismissing(::NewMissing) = true
+    Base.coalesce(x::NewMissing, y...) = coalesce(y...)
+    Base.isless(::NewMissing, ::NewMissing) = false
+    Base.isless(::NewMissing, ::Any) = false
+    Base.isless(::Any, ::NewMissing) = true
+    Base.isequal(::NewMissing, ::Missing) = true
+    Base.isequal(::Missing, ::NewMissing) = true
+    arr = [missing 1 2 3 missing 10 11 12 missing]
+    newarr = Union{Int, NewMissing}[ismissing(v) ? NewMissing() : v for v in arr]
+
+    @test all(skipmissing(arr) .== skipmissing(newarr))
+    @test all(eachindex(skipmissing(arr)) .== eachindex(skipmissing(newarr)))
+    @test all(keys(skipmissing(arr)) .== keys(skipmissing(newarr)))
+    @test_broken sum(skipmissing(arr)) == sum(skipmissing(newarr))
+    @test filter(>(10), skipmissing(arr)) == filter(>(10), skipmissing(newarr))
+    @test isequal(sort(vec(arr)), sort(vec(newarr)))
+
+    @test_throws MissingException skipmissing(newarr)[findfirst(ismissing, newarr)]
+    @test coalesce(NewMissing(), 1) == coalesce(NewMissing(), NewMissing(), 1) == 1
+    @test coalesce(NewMissing()) === coalesce(NewMissing(), NewMissing()) === missing
+    @test @coalesce(NewMissing(), 1) == @coalesce(NewMissing(), NewMissing(), 1) == 1
+    @test @coalesce(NewMissing()) === @coalesce(NewMissing(), NewMissing()) === missing
+end
diff --git a/test/mod2pi.jl b/test/mod2pi.jl
index 5b0cb906bcef2..0eeac6f1e3ce4 100644
--- a/test/mod2pi.jl
+++ b/test/mod2pi.jl
@@ -26,7 +26,7 @@
 # 3.14159265359, -3.14159265359
 # pi/16*k +/- 0.00001 for k in [-20:20] # to cover all quadrants
 # numerators of continuous fraction approximations to pi
-#   see http://oeis.org/A002485
+#   see https://oeis.org/A002485
 #   (reason: for max cancellation, we want x = k*pi + eps for small eps, so x/k ≈ pi)
 
 testCases = [
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 1a0a0041bf94e..c212bdfc92821 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -1039,3 +1039,61 @@ end
         end
     end
 end
+
+@testset "issue #50642" begin
+    setprecision(BigFloat, 500) do
+        bf = big"1.4901162082026128889687591176485489397376143775948511e-07"
+        @test Float16(bf) == Float16(2.0e-7)
+    end
+end
+
+# PR #54284
+import Base.MPFR: clear_flags, had_underflow, had_overflow, had_divbyzero,
+    had_nan, had_inexact_exception, had_range_exception
+
+function all_flags_54284()
+    (
+        had_underflow(),
+        had_overflow(),
+        had_divbyzero(),
+        had_nan(),
+        had_inexact_exception(),
+        had_range_exception(),
+    )
+end
+@testset "MPFR flags" begin
+    let x, a = floatmin(BigFloat), b = floatmax(BigFloat), c = zero(BigFloat)
+        clear_flags()
+        @test !any(all_flags_54284())
+
+        x = a - a # normal
+        @test all_flags_54284() == (false, false, false, false, false, false)
+        x = 1 / c # had_divbyzero
+        @test all_flags_54284() == (false, false, true, false, false, false)
+        clear_flags()
+        x = nextfloat(a) - a # underflow
+        @test all_flags_54284() == (true, false, false, false, true, false)
+        clear_flags()
+        x = 1 / a # overflow
+        @test all_flags_54284() == (false, true, false, false, true, false)
+        clear_flags()
+        x = c / c # nan
+        @test all_flags_54284() == (false, false, false, true, false, false)
+        clear_flags()
+        x = prevfloat(BigFloat(1.0)) * 100 # inexact
+        @test all_flags_54284() == (false, false, false, false, true, false)
+        clear_flags()
+        try convert(Int, b); catch; end # range exception
+        @test all_flags_54284() == (false, false, false, false, false, true)
+        clear_flags()
+    end
+end
+
+@testset "BigFloatData truncation OOB read" begin
+    @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128)
+        v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs))
+        @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80)
+            @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any
+        end
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index eb3846c8cbffd..b8dba5c06422e 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base: delete
+
 @test_throws TypeError NamedTuple{1,Tuple{}}
 @test_throws TypeError NamedTuple{(),1}
 @test_throws TypeError NamedTuple{(:a,1),Tuple{Int}}
@@ -28,13 +30,13 @@
 @test (x=4, y=5, z=6)[()] == NamedTuple()
 @test (x=4, y=5, z=6)[:] == (x=4, y=5, z=6)
 @test NamedTuple()[()] == NamedTuple()
-@test_throws ErrorException (x=4, y=5, z=6).a
+@test_throws FieldError (x=4, y=5, z=6).a
 @test_throws BoundsError (a=2,)[0]
 @test_throws BoundsError (a=2,)[2]
-@test_throws ErrorException (x=4, y=5, z=6)[(:a,)]
-@test_throws ErrorException (x=4, y=5, z=6)[(:x, :a)]
-@test_throws ErrorException (x=4, y=5, z=6)[[:a]]
-@test_throws ErrorException (x=4, y=5, z=6)[[:x, :a]]
+@test_throws FieldError (x=4, y=5, z=6)[(:a,)]
+@test_throws FieldError (x=4, y=5, z=6)[(:x, :a)]
+@test_throws FieldError (x=4, y=5, z=6)[[:a]]
+@test_throws FieldError (x=4, y=5, z=6)[[:x, :a]]
 @test_throws ErrorException (x=4, y=5, z=6)[(:x, :x)]
 
 @test length(NamedTuple()) == 0
@@ -94,6 +96,9 @@ end
 
     conv_res = @test_throws MethodError convert(NamedTuple{(:a,),Tuple{I}} where I<:AbstractString, (;a=1))
     @test conv_res.value.f === convert && conv_res.value.args === (AbstractString, 1)
+
+    conv6 = convert(NamedTuple{(:a,),Tuple{NamedTuple{(:b,), Tuple{Int}}}}, ((1,),))
+    @test conv6 === (a = (b = 1,),)
 end
 
 @test NamedTuple{(:a,:c)}((b=1,z=2,c=3,aa=4,a=5)) === (a=5, c=3)
@@ -134,6 +139,14 @@ end
 @test map(string, (x=1, y=2)) == (x="1", y="2")
 @test map(round, (x=UInt, y=Int), (x=3.1, y=2//3)) == (x=UInt(3), y=1)
 
+@testset "filter" begin
+    @test filter(isodd, (a=1,b=2,c=3)) === (a=1, c=3)
+    @test filter(i -> true, (;)) === (;)
+    longnt = NamedTuple{ntuple(i -> Symbol(:a, i), 20)}(ntuple(identity, 20))
+    @test filter(iseven, longnt) === NamedTuple{ntuple(i -> Symbol(:a, 2i), 10)}(ntuple(i -> 2i, 10))
+    @test filter(x -> x<2, (longnt..., z=1.5)) === (a1=1, z=1.5)
+end
+
 @test merge((a=1, b=2), (a=10,)) == (a=10, b=2)
 @test merge((a=1, b=2), (a=10, z=20)) == (a=10, b=2, z=20)
 @test merge((a=1, b=2), (z=20,)) == (a=1, b=2, z=20)
@@ -244,7 +257,7 @@ function abstr_nt_22194_2()
     a = NamedTuple[(a=1,), (b=2,)]
     return a[1].b
 end
-@test_throws ErrorException abstr_nt_22194_2()
+@test_throws FieldError abstr_nt_22194_2()
 @test Base.return_types(abstr_nt_22194_2, ()) == Any[Any]
 
 mutable struct HasAbstractNamedTuples
@@ -271,6 +284,11 @@ end
 abstr_nt_22194_3()
 @test Base.return_types(abstr_nt_22194_3, ()) == Any[Any]
 
+@test delete((a=1,), :a) == NamedTuple()
+@test delete((a=1, b=2), :a) == (b=2,)
+@test delete((a=1, b=2, c=3), :b) == (a=1, c=3)
+@test delete((a=1, b=2, c=3), :z) == (a=1, b=2, c=3)
+
 @test Base.structdiff((a=1, b=2), (b=3,)) == (a=1,)
 @test Base.structdiff((a=1, b=2, z=20), (b=3,)) == (a=1, z=20)
 @test Base.structdiff((a=1, b=2, z=20), (b=3, q=20, z=1)) == (a=1,)
@@ -382,10 +400,22 @@ end
 
 # Test effect/inference for merge/diff of unknown NamedTuples
 for f in (Base.merge, Base.structdiff)
-    let eff = Base.infer_effects(f, Tuple{NamedTuple, NamedTuple})
-        @test Core.Compiler.is_foldable(eff) && eff.nonoverlayed
+    @testset let f = f
+        # test the effects of the fallback path
+        fallback_func(a::NamedTuple, b::NamedTuple) = @invoke f(a::NamedTuple, b::NamedTuple)
+        @testset let eff = Base.infer_effects(fallback_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test Core.Compiler.is_nonoverlayed(eff)
+        end
+        @test only(Base.return_types(fallback_func)) == NamedTuple
+        # test if `max_methods = 4` setting works as expected
+        general_func(a::NamedTuple, b::NamedTuple) = f(a, b)
+        @testset let eff = Base.infer_effects(general_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test Core.Compiler.is_nonoverlayed(eff)
+        end
+        @test only(Base.return_types(general_func)) == NamedTuple
     end
-    @test Core.Compiler.return_type(f, Tuple{NamedTuple, NamedTuple}) == NamedTuple
 end
 @test Core.Compiler.is_foldable(Base.infer_effects(pairs, Tuple{NamedTuple}))
 
@@ -394,3 +424,38 @@ let a = Base.NamedTuple{(:a, :b), Tuple{Any, Any}}((1, 2)), b = Base.NamedTuple{
     @test typeof(Base.merge(a, b)) == Base.NamedTuple{(:a, :b), Tuple{Any, Float64}}
     @test typeof(Base.structdiff(a, b)) == Base.NamedTuple{(:a,), Tuple{Any}}
 end
+
+function mergewith51009(combine, a::NamedTuple{an}, b::NamedTuple{bn}) where {an, bn}
+    names = Base.merge_names(an, bn)
+    NamedTuple{names}(ntuple(Val{nfields(names)}()) do i
+                          n = getfield(names, i)
+                          if Base.sym_in(n, an)
+                              if Base.sym_in(n, bn)
+                                  combine(getfield(a, n), getfield(b, n))
+                              else
+                                  getfield(a, n)
+                              end
+                          else
+                              getfield(b, n)
+                          end
+                      end)
+end
+let c = (a=1, b=2),
+    d = (b=3, c=(d=1,))
+    @test @inferred(mergewith51009((x,y)->y, c, d)) === (a = 1, b = 3, c = (d = 1,))
+end
+
+@test_throws ErrorException NamedTuple{(), Union{}}
+for NT in (NamedTuple{(:a, :b), Union{}}, NamedTuple{(:a, :b), T} where T<:Union{})
+    @test fieldtype(NT, 1) == Union{}
+    @test fieldtype(NT, :b) == Union{}
+    @test_throws FieldError fieldtype(NT, :c)
+    @test_throws BoundsError fieldtype(NT, 0)
+    @test_throws BoundsError fieldtype(NT, 3)
+    @test Base.return_types((Type{NT},)) do NT; fieldtype(NT, :a); end == Any[Type{Union{}}]
+    @test fieldtype(NamedTuple{<:Any, Union{}}, 1) == Union{}
+end
+let NT = NamedTuple{<:Any, Union{}}
+    @test fieldtype(NT, 100) == Union{}
+    @test only(Base.return_types((Type{NT},)) do NT; fieldtype(NT, 100); end) >: Type{Union{}}
+end
diff --git a/test/numbers.jl b/test/numbers.jl
index d7fd6531b157d..dc4f2cb613d77 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -262,7 +262,7 @@ end
 
 # GMP allocation overflow should not cause crash
 if Base.GMP.ALLOC_OVERFLOW_FUNCTION[] && sizeof(Int) > 4
-  @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
+    @test_throws OutOfMemoryError BigInt(2)^(typemax(Culong))
 end
 
 # exponentiating with a negative base
@@ -679,6 +679,9 @@ end
     @test copysign(big(-1), 0x02) == 1
     @test copysign(big(-1.0), 0x02) == 1.0
     @test copysign(-1//2, 0x01) == 1//2
+
+    # Verify overflow is checked with rational
+    @test_throws OverflowError copysign(typemin(Int)//1, 1)
 end
 
 @testset "isnan/isinf/isfinite" begin
@@ -1111,10 +1114,30 @@ end
 end
 
 @testset "Irrational zero and one" begin
-    @test one(pi) === true
-    @test zero(pi) === false
-    @test one(typeof(pi)) === true
-    @test zero(typeof(pi)) === false
+    for i in (π, ℯ, γ, catalan)
+        @test one(i) === true
+        @test zero(i) === false
+        @test one(typeof(i)) === true
+        @test zero(typeof(i)) === false
+    end
+end
+
+@testset "Irrational iszero, isfinite, isinteger, and isone" begin
+    for i in (π, ℯ, γ, catalan)
+        @test !iszero(i)
+        @test !isone(i)
+        @test !isinteger(i)
+        @test isfinite(i)
+    end
+end
+
+@testset "Irrational promote_type" begin
+    for T in (Float16, Float32, Float64)
+        for i in (π, ℯ, γ, catalan)
+            @test T(2.0) * i ≈ T(2.0) * T(i)
+            @test T(2.0) * i isa T
+        end
+    end
 end
 
 @testset "Irrationals compared with Irrationals" begin
@@ -1135,6 +1158,8 @@ end
 end
 
 @testset "Irrationals compared with Rationals and Floats" begin
+    @test pi != Float64(pi)
+    @test Float64(pi) != pi
     @test Float64(pi,RoundDown) < pi
     @test Float64(pi,RoundUp) > pi
     @test !(Float64(pi,RoundDown) > pi)
@@ -1153,6 +1178,7 @@ end
     @test nextfloat(big(pi)) > pi
     @test !(prevfloat(big(pi)) > pi)
     @test !(nextfloat(big(pi)) < pi)
+    @test big(typeof(pi)) == BigFloat
 
     @test 2646693125139304345//842468587426513207 < pi
     @test !(2646693125139304345//842468587426513207 > pi)
@@ -1168,6 +1194,17 @@ Base.@irrational i46051 4863.185427757 1548big(pi)
     # issue #46051
     @test sprint(show, "text/plain", i46051) == "i46051 = 4863.185427757..."
 end
+
+@testset "Irrational round, float, ceil" begin
+    using .MathConstants
+    @test round(π) === 3.0
+    @test round(Int, ℯ) === 3
+    @test floor(ℯ) === 2.0
+    @test floor(Int, φ) === 1
+    @test ceil(γ) === 1.0
+    @test ceil(Int, catalan) === 1
+end
+
 @testset "issue #6365" begin
     for T in (Float32, Float64)
         for i = 9007199254740992:9007199254740996
@@ -2216,11 +2253,11 @@ end
     @test round(Int16, -32768.1) === Int16(-32768)
 end
 # issue #7508
-@test_throws ArgumentError reinterpret(Int, 0x01)
+@test_throws ErrorException reinterpret(Int, 0x01)
 
 @testset "issue #12832" begin
     @test_throws ArgumentError reinterpret(Float64, Complex{Int64}(1))
-    @test_throws ArgumentError reinterpret(Int32, false)
+    @test_throws ErrorException reinterpret(Int32, false)
 end
 # issue #41
 ndigf(n) = Float64(log(Float32(n)))
@@ -2675,7 +2712,7 @@ end
     @test divrem(a,-(a-20), RoundDown) == (div(a,-(a-20), RoundDown), rem(a,-(a-20), RoundDown))
 end
 
-@testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat)
+@testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat, Int8, Int16, Int32, Int64, Int128)
     @test rem2pi(T(1), RoundToZero)  == 1
     @test rem2pi(T(1), RoundNearest) == 1
     @test rem2pi(T(1), RoundDown)    == 1
@@ -2762,6 +2799,20 @@ Base.literal_pow(::typeof(^), ::PR20530, ::Val{p}) where {p} = 2
     @test [2,4,8].^-2 == [0.25, 0.0625, 0.015625]
     @test [2, 4, 8].^-2 .* 4 == [1.0, 0.25, 0.0625] # nested literal_pow
     @test ℯ^-2 == exp(-2) ≈ inv(ℯ^2) ≈ (ℯ^-1)^2 ≈ sqrt(ℯ^-4)
+
+    if Int === Int32
+        p = 2147483647
+        @test x^p == 1
+        @test x^2147483647 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^2147483647) == 2
+    elseif Int === Int64
+        p = 9223372036854775807
+        @test x^p == 1
+        @test x^9223372036854775807 == 2
+        @test (@fastmath x^p) == 1
+        @test (@fastmath x^9223372036854775807) == 2
+    end
 end
 module M20889 # do we get the expected behavior without importing Base.^?
     using Test
@@ -2886,10 +2937,19 @@ end
     @test log(π,ComplexF32(2)) isa ComplexF32
 end
 
+@testset "irrational promotion shouldn't recurse without bound, issue #51001" begin
+    for s ∈ (:π, :ℯ)
+        T = Irrational{s}
+        @test promote_type(Complex{T}, T) <: Complex
+        @test promote_type(T, Complex{T}) <: Complex
+    end
+end
+
 @testset "printing non finite floats" begin
     let float_types = Set()
         allsubtypes!(Base, AbstractFloat, float_types)
         allsubtypes!(Core, AbstractFloat, float_types)
+        filter!(!isequal(Core.BFloat16), float_types)   # defined externally
         @test !isempty(float_types)
 
         for T in float_types
@@ -3112,3 +3172,49 @@ end
     end
 
 end
+
+@testset "FP(inf) == inf" begin
+    # Iterate through all pairs of FP types
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    for F ∈ fp_types, G ∈ fp_types, f ∈ (typemin, typemax)
+        i = f(F)
+        @test i == G(i)
+    end
+end
+
+@testset "small int FP conversion" begin
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    m = Int(maxintfloat(Float16))
+    for F ∈ fp_types, G ∈ fp_types, n ∈ (-m):m
+        @test n == G(F(n)) == F(G(n))
+    end
+end
+
+@testset "`precision`" begin
+    Fs = (Float16, Float32, Float64, BigFloat)
+
+    @testset "type vs instance" begin
+        @testset "F: $F" for F ∈ Fs
+            @test precision(F) == precision(one(F))
+            @test precision(F, base = 2) == precision(one(F), base = 2)
+            @test precision(F, base = 3) == precision(one(F), base = 3)
+        end
+    end
+
+    @testset "`precision` of `Union` shouldn't recur infinitely, #52909" begin
+        @testset "i: $i" for i ∈ eachindex(Fs)
+            @testset "j: $j" for j ∈ (i + 1):lastindex(Fs)
+                S = Fs[i]
+                T = Fs[j]
+                @test_throws MethodError precision(Union{S,T})
+                @test_throws MethodError precision(Union{S,T}, base = 3)
+            end
+        end
+    end
+end
+
+@testset "irrational special values" begin
+    for v ∈ (π, ℯ, γ, catalan, φ)
+        @test v === typemin(v) === typemax(v)
+    end
+end
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index c447c6d420f2a..8e2ee33c49ed6 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -383,6 +383,18 @@ v2 = copy(v)
 @test v2[end-1] == 2
 @test v2[end] == 1
 
+# push!(v::AbstractVector, x...)
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 3)) === v2
+@test v2[axes(v,1)] == v
+@test v2[end] == 3
+@test v2[begin] == v[begin] == v[-2]
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 5, 6)) == v2
+@test v2[axes(v,1)] == v
+@test v2[end-1] == 5
+@test v2[end] == 6
+
 # append! from array
 v2 = copy(v)
 @test append!(v2, [2, 1]) === v2
@@ -399,11 +411,29 @@ v2 = copy(v)
 @test v2[axes(v, 1)] == v
 @test v2[lastindex(v)+1:end] == [2, 1]
 
+# append!(::AbstractVector, ...)
+# append! from array
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, [2, 1]::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from HasLength iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1])::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from SizeUnknown iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1] if true)::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+
 # other functions
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
 @test v ≈ v
-@test axes(v') === (Base.OneTo(1), OffsetArrays.IdOffsetRange(Base.OneTo(4), -3))
+@test (@inferred axes(v')[1]) === OffsetArrays.IdOffsetRange(Base.OneTo(1))
+@test (@inferred axes(v')[2]) === OffsetArrays.IdOffsetRange(Base.OneTo(4), -3)
 @test parent(v) == collect(v)
 rv = reverse(v)
 @test axes(rv) == axes(v)
@@ -627,15 +657,15 @@ end
     B = OffsetArray(reshape(1:24, 4, 3, 2), -5, 6, -7)
     for R in (fill(0, -4:-1), fill(0, -4:-1, 7:7), fill(0, -4:-1, 7:7, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(2,3)), axes(R)) == reshape(21:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 800
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(2,3)), axes(R)) == reshape(1:4, axes(R))
-        @test @allocated(minimum!(R, B)) <= 800
+        @test @allocated(minimum!(R, B)) <= 400
     end
     for R in (fill(0, -4:-4, 7:9), fill(0, -4:-4, 7:9, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(1,3)), axes(R)) == reshape(16:4:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 800
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(1,3)), axes(R)) == reshape(1:4:9, axes(R))
-        @test @allocated(minimum!(R, B)) <= 800
+        @test @allocated(minimum!(R, B)) <= 400
     end
     @test_throws DimensionMismatch maximum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
     @test_throws DimensionMismatch minimum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
@@ -863,3 +893,35 @@ end
     # this is fixed in #40038, so the evaluation of its CartesianIndices should work
     @test CartesianIndices(A) == CartesianIndices(B)
 end
+
+@testset "overflowing show" begin
+    A = OffsetArray(repeat([1], 1), typemax(Int)-1)
+    b = IOBuffer(maxsize=10)
+    show(b, A)
+    @test String(take!(b)) == "[1]"
+    show(b, (A, A))
+    @test String(take!(b)) == "([1], [1])"
+end
+
+@testset "indexing views (#53249)" begin
+    v = view([1,2,3,4], :)
+    @test v[Base.IdentityUnitRange(2:3)] == OffsetArray(2:3, 2:3)
+end
+
+@testset "mapreduce with OffsetRanges" begin
+    r = 5:100
+    a = OffsetArray(r, 2)
+    b = sum(a, dims=1)
+    @test b[begin] == sum(r)
+end
+
+@testset "reshape" begin
+    A0 = [1 3; 2 4]
+    A = reshape(A0, 2:3, 4:5)
+    @test axes(A) == Base.IdentityUnitRange.((2:3, 4:5))
+
+    B = reshape(A0, -10:-9, 9:10)
+    @test isa(B, OffsetArray{Int,2})
+    @test parent(B) == A0
+    @test axes(B) == Base.IdentityUnitRange.((-10:-9, 9:10))
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index e6490f5e9d345..6c988b068a668 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -10,7 +10,7 @@ const lno = LineNumberNode(1, :none)
 
 let ci = @code_lowered const_int()
     @eval function oc_trivial()
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -19,7 +19,7 @@ end
 
 let ci = @code_lowered const_int()
     @eval function oc_simple_inf()
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
@@ -33,7 +33,7 @@ end
 (a::OcClos2Int)() = getfield(a, 1) + getfield(a, 2)
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_trivial_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))
     end
@@ -42,7 +42,7 @@ end
 
 let ci = @code_lowered OcClos2Int(1, 2)();
     @eval function oc_self_call_clos()
-        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             1, 2))()
     end
@@ -59,7 +59,7 @@ end
 (a::OcClos1Any)() = getfield(a, 1)
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -69,7 +69,7 @@ end
 
 let ci = @code_lowered OcClos1Any(1)()
     @eval function oc_infer_pass_clos(x)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :x))
     end
@@ -81,7 +81,7 @@ end
 
 let ci = @code_lowered identity(1)
     @eval function oc_infer_pass_id()
-        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any,
+        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 1, false, lno, ci)))
     end
 end
@@ -103,7 +103,7 @@ end
 
 let ci = @code_lowered OcOpt([1 2])()
     @eval function oc_opt_ndims(A)
-        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
             :A))
     end
@@ -151,26 +151,33 @@ end # module test_world_age
 
 function maybe_vararg(isva::Bool)
     T = isva ? Vararg{Int} : Int
-    @opaque Tuple{T} (x...)->x
+    @opaque Tuple{T}->_ (x...)->x
 end
 @test maybe_vararg(false)(1) == (1,)
 @test_throws MethodError maybe_vararg(false)(1,2,3)
 @test maybe_vararg(true)(1) == (1,)
 @test maybe_vararg(true)(1,2,3) == (1,2,3)
-@test (@opaque Tuple{Int, Int} (a, b, x...)->x)(1,2) === ()
-@test (@opaque Tuple{Int, Int} (a, x...)->x)(1,2) === (2,)
-@test (@opaque Tuple{Int, Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Int}->_ (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->Tuple{} (a, b, x...)->x)(1,2) === ()
+@test (@opaque _->Tuple{Vararg{Int}} (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int}->_ (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Int}->Tuple{Int} (a, x...)->x)(1,2) === (2,)
+@test (@opaque _->Tuple{Vararg{Int}} (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Vararg{Int}}->_ (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque Tuple{Int, Vararg{Int}}->Tuple{Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
 @test (@opaque (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->Tuple{Vararg{Int}} (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+@test (@opaque _->_ (a::Int, x::Int...)->x)(1,2,3) === (2,3)
 
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}} x->x)
-@test_throws ErrorException (@opaque Tuple{Int, Int} x->x)
-@test_throws ErrorException (@opaque Tuple{Any} (x,y)->x)
-@test_throws ErrorException (@opaque Tuple{Vararg{Int}} (x,y...)->x)
-@test_throws ErrorException (@opaque Tuple{Int} (x,y,z...)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Int}->_ x->x)
+@test_throws ErrorException (@opaque Tuple{Any}->_ (x,y)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}}->_ (x,y...)->x)
+@test_throws ErrorException (@opaque Tuple{Int}->_ (x,y,z...)->x)
 
 # cannot specify types both on arguments and separately
-@test_throws ErrorException @eval @opaque Tuple{Any} (x::Int)->x
+@test_throws ErrorException @eval @opaque Tuple{Any}->_ (x::Int)->x
 
 # Vargarg in complied mode
 mk_va_opaque() = @opaque (x...)->x
@@ -178,19 +185,19 @@ mk_va_opaque() = @opaque (x...)->x
 @test mk_va_opaque()(1,2) == (1,2)
 
 # OpaqueClosure show method
-@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)::Any->◌"
+@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)->◌::Any"
 
 # Opaque closure in CodeInfo returned from generated functions
 let ci = @code_lowered const_int()
     global function mk_ocg(world::UInt, source, args...)
         @nospecialize
-        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
         cig.slotnames = Symbol[Symbol("#self#")]
         cig.slottypes = Any[Any]
         cig.slotflags = UInt8[0x00]
-        @assert cig.min_world == UInt(1)
-        @assert cig.max_world == typemax(UInt)
+        cig.nargs = 1
+        cig.isva = false
         return cig
     end
 end
@@ -241,13 +248,22 @@ let foo::Int = 42
 end
 
 let oc = @opaque a->sin(a)
-    @test length(code_typed(oc, (Int,))) == 1
+    let opt = code_typed(oc, (Int,))
+        @test length(opt) == 1
+        @test opt[1][2] === Float64
+    end
+    let unopt = code_typed(oc, (Int,); optimize=false)
+        @test length(unopt) == 1
+    end
 end
 
 # constructing an opaque closure from IRCode
 let src = first(only(code_typed(+, (Int, Int))))
-    ir = Core.Compiler.inflate_ir(src)
-    @test OpaqueClosure(src)(40, 2) == 42
+    ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    ir.argtypes[1] = Tuple{}
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
+    @test OpaqueClosure(src; sig=Tuple{Int, Int}, rettype=Int, nargs=2)(40, 2) == 42
     oc = OpaqueClosure(ir)
     @test oc(40, 2) == 42
     @test isa(oc, OpaqueClosure{Tuple{Int,Int}, Int})
@@ -255,9 +271,12 @@ let src = first(only(code_typed(+, (Int, Int))))
     @test OpaqueClosure(ir)(40, 2) == 42 # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
 let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42) == sin(42)
     @test OpaqueClosure(ir)(42) == sin(42) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+    @test length(code_typed(OpaqueClosure(ir))) == 1
     ir = first(only(Base.code_ircode(sin, (Float64,))))
+    ir.argtypes[1] = Tuple{}
     @test OpaqueClosure(ir)(42.) == sin(42.)
     @test OpaqueClosure(ir)(42.) == sin(42.) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
@@ -266,11 +285,14 @@ end
 let src = code_typed((Int,Int)) do x, y...
         return (x, y)
     end |> only |> first
-    let oc = OpaqueClosure(src)
+    src.slottypes[1] = Tuple{}
+    let oc = OpaqueClosure(src; rettype=Tuple{Int, Tuple{Int}}, sig=Tuple{Int, Int}, nargs=2, isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
-    ir = Core.Compiler.inflate_ir(src)
+    ir = Core.Compiler.inflate_ir(src, Core.Compiler.VarState[], src.slottypes)
+    @test ir.debuginfo.def === nothing
+    ir.debuginfo.def = Symbol(@__FILE__)
     let oc = OpaqueClosure(ir; isva=true)
         @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
@@ -283,7 +305,7 @@ eval_oc_spec(oc) = oc()
 for f in (const_int, const_int_barrier)
     ci = code_lowered(f, Tuple{})[1]
     for compiled in (true, false)
-        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64,
+        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64, true,
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
         oc_mismatch = let ci = code_lowered(f, Tuple{})[1]
             if compiled
@@ -297,3 +319,74 @@ for f in (const_int, const_int_barrier)
         @test_throws TypeError eval_oc_spec(oc_mismatch)
     end
 end
+
+
+# Attempting to construct an opaque closure backtrace after the oc is GC'ed
+f_oc_throws() = error("oops")
+@noinline function make_oc_and_collect_bt()
+    did_gc = Ref{Bool}(false)
+    bt = let ir = first(only(Base.code_ircode(f_oc_throws, ())))
+        ir.argtypes[1] = Tuple
+        sentinel = Ref{Any}(nothing)
+        oc = OpaqueClosure(ir, sentinel)
+        finalizer(sentinel) do x
+            did_gc[] = true
+        end
+        try
+            oc()
+            @test false
+        catch e
+            bt = catch_backtrace()
+            @test isa(e, ErrorException)
+            bt
+        end
+    end
+    return bt, did_gc
+end
+let (bt, did_gc) = make_oc_and_collect_bt()
+    GC.gc(true); GC.gc(true); GC.gc(true);
+    @test did_gc[]
+    @test any(stacktrace(bt)) do frame
+        li = frame.linfo
+        isa(li, Core.CodeInstance) && (li = li.def)
+        isa(li, Core.ABIOverride) && (li = li.def)
+        isa(li, Core.MethodInstance) || return false
+        isa(li.def, Method) || return false
+        return li.def.is_for_opaque_closure
+    end
+end
+
+# Opaque closure with mismatch struct argtype
+const op_arg_restrict2 = @opaque (x::Tuple{Int64}, y::Base.RefValue{Int64})->x+y
+ccall_op_arg_restrict2_bad_args() = op_arg_restrict2((1.,), 2)
+
+@test_throws TypeError ccall_op_arg_restrict2_bad_args()
+
+# code_llvm for opaque closures
+let ir = Base.code_ircode((Int,Int)) do x, y
+        @noinline x * y
+    end |> only |> first
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    io = IOBuffer()
+    code_llvm(io, oc, Tuple{Int,Int})
+    @test occursin("j_*_", String(take!(io)))
+    code_llvm(io, oc, (Int,Int))
+    @test occursin("j_*_", String(take!(io)))
+end
+
+foopaque() = Base.Experimental.@opaque(@noinline x::Int->println(x))(1)
+
+code_llvm(devnull,foopaque,()) #shouldn't crash
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir)
+    @test (Base.show_method(IOBuffer(), oc.source::Method); true)
+end
+
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    ir.argtypes[1] = Tuple{}
+    oc = Core.OpaqueClosure(ir; do_compile=false)
+    @test oc(1) == sin(1)
+end
diff --git a/test/operators.jl b/test/operators.jl
index 46cf6c7526299..2e22238c3e9d9 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -2,7 +2,7 @@
 
 using Random: randstring
 
-include("compiler/irutils.jl")
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
 
 @testset "ifelse" begin
     @test ifelse(true, 1, 2) == 1
@@ -154,6 +154,13 @@ Base.convert(::Type{T19714}, ::Int) = T19714()
 Base.promote_rule(::Type{T19714}, ::Type{Int}) = T19714
 @test T19714()/1 === 1/T19714() === T19714()
 
+@testset "operators with zero argument" begin
+    @test_throws(MethodError, +())
+    @test_throws(MethodError, *())
+    @test isempty(methods(+, ()))
+    @test isempty(methods(*, ()))
+end
+
 # pr #17155 and #33568
 @testset "function composition" begin
     @test (uppercase∘(x->string(x,base=16)))(239487) == "3A77F"
@@ -188,7 +195,7 @@ Base.promote_rule(::Type{T19714}, ::Type{Int}) = T19714
     @test repr(uppercase ∘ first) == "uppercase ∘ first"
     @test sprint(show, "text/plain", uppercase ∘ first) == "uppercase ∘ first"
 
-    # test keyword ags in composition
+    # test keyword args in composition
     function kwf(a;b,c); a + b + c; end
     @test (abs2 ∘ kwf)(1,b=2,c=3) == 36
 
@@ -321,9 +328,21 @@ end
     @test lt5(4) && !lt5(5)
 end
 
+@testset "in tuples" begin
+    @test ∈(5, (1,5,10,11))
+    @test ∉(0, (1,5,10,11))
+    @test ∈(5, (1,"hi","hey",5.0))
+    @test ∉(0, (1,"hi","hey",5.0))
+    @test ∈(5, (5,))
+    @test ∉(0, (5,))
+    @test ∉(5, ())
+end
+
 @testset "ni" begin
     @test ∋([1,5,10,11], 5)
     @test !∋([1,10,11], 5)
+    @test ∋((1,5,10,11), 5)
+    @test ∌((1,10,11), 5)
     @test ∋(5)([5,1])
     @test !∋(42)([0,1,100])
     @test ∌(0)(1:10)
@@ -359,3 +378,35 @@ end
     Base.:(<)(::B46327, ::B46327) = false
     @test B46327() <= B46327()
 end
+
+@testset "inference for `x in itr::Tuple`" begin
+    # concrete evaluation
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Int,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Char,Tuple{Char,Char,Char})))
+    for i = (1,2,3)
+        @testset let i = i
+            @test @eval Base.return_types() do
+                Val($i in (1,2,3))
+            end |> only == Val{true}
+        end
+    end
+    @test Base.infer_return_type() do
+        Val(4 in (1,2,3))
+    end == Val{false}
+    @test Base.infer_return_type() do
+        Val('1' in ('1','2','3'))
+    end == Val{true}
+
+    # constant propagation
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(1 in (x,2,y))
+    end >: Val{true}
+    @test Base.infer_return_type((Int,Int)) do x, y
+        Val(2 in (x,2,y))
+    end == Val{true}
+
+    # should use the loop implementation given large tuples to avoid inference blowup
+    let t = ntuple(x->'A', 10000);
+        @test Base.infer_return_type(in, (Char,typeof(t))) == Bool
+    end
+end
diff --git a/test/ordering.jl b/test/ordering.jl
index 547d8d8dd0e8b..3b5385b99be68 100644
--- a/test/ordering.jl
+++ b/test/ordering.jl
@@ -2,21 +2,24 @@
 
 using Test
 
-import Base.Order: Forward, Reverse
+import Base.Order: Forward, Reverse, ord, Lt, By, ReverseOrdering
 
 # every argument can flip the integer order by passing the right value. Here,
 # we enumerate a few of these combinations and check that all these flips
 # compound so that in total we either have an increasing or decreasing sort.
 for (s1, rev) in enumerate([true, false])
-    for (s2, lt) in enumerate([>, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
+    for (s2, lt) in enumerate([(a, b)->isless(b, a), isless, >, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
         for (s3, by) in enumerate([-, +])
             for (s4, order) in enumerate([Reverse, Forward])
-                if iseven(s1 + s2 + s3 + s4)
-                    target = [1, 2, 3]
-                else
-                    target = [3, 2, 1]
-                end
+                is_fwd = iseven(s1 + s2 + s3 + s4)
+                target = is_fwd ? (1:3) : (3:-1:1)
+                # arrays, integer and float ranges sometimes have different code paths
                 @test target == sort([2, 3, 1], rev=rev, lt=lt, by=by, order=order)
+
+                @test target == sort(1:3, rev=rev, lt=lt, by=by, order=order)
+                @test target == sort(3:-1:1, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(1.0:3, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(3.0:-1:1, rev=rev, lt=lt, by=by, order=order)
             end
         end
     end
@@ -40,3 +43,17 @@ struct SomeOtherOrder <: Base.Order.Ordering end
 
 @test reverse(Forward) === Reverse
 @test reverse(Reverse) === Forward
+
+@test ord(isless, identity, false, Forward) === Forward
+@test ord(isless, identity, true, Forward) === Reverse
+@test ord(<, identity, false, Forward) === Lt(<)
+@test ord(isless, abs, false, Forward) === By(abs)
+@test ord(<, abs, false, Forward) === By(abs, Lt(<))
+@test ord(<, abs, true, Forward) === ReverseOrdering(By(abs, Lt(<)))
+@test ord(<, abs, true, Reverse) === By(abs, Lt(<))
+
+@testset "Base.Order docstrings" begin
+    undoc = Docs.undocumented_names(Base.Order)
+    @test_broken isempty(undoc)
+    @test undoc == [:DirectOrdering, :ForwardOrdering, :Order, :ordtype]
+end
diff --git a/test/parse.jl b/test/parse.jl
index 69092b2c4188d..e2b94a45cc446 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -296,6 +296,8 @@ end
         @test_throws ArgumentError parse(Complex{T}, bad)
     end
     @test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im")
+    @test_throws ArgumentError parse(ComplexF64, "3 β+ 4im")
+    @test_throws ArgumentError parse(ComplexF64, "3 + 4αm")
 end
 
 @testset "parse and tryparse type inference" begin
diff --git a/test/path.jl b/test/path.jl
index 2f4f2d0983a58..2515d765d8ca9 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -311,6 +311,26 @@
         test_relpath()
     end
 
+    @testset "uripath" begin
+        host = if Sys.iswindows()
+            ""
+        elseif ispath("/proc/sys/fs/binfmt_misc/WSLInterop")
+            distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
+            "wsl%24/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+        else
+            gethostname()
+        end
+        sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end
+        @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt"
+        @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md"
+        @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars"
+        @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D"
+        @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive"
+        @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd())
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1"
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8"
+    end
+
     if Sys.iswindows()
         @testset "issue #23646" begin
             @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"
diff --git a/test/precompile.jl b/test/precompile.jl
index 62d862c384040..78a96250600a4 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -1,12 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-original_load_path = copy(Base.LOAD_PATH)
-
-using Test, Distributed, Random
+using Test, Distributed, Random, Logging
 using REPL # doc lookup function
 
+include("precompile_utils.jl")
+
 Foo_module = :Foo4b3a94a1a081a8cb
+foo_incl_dep = :foo4b3a94a1a081a8cb
+bar_incl_dep = :bar4b3a94a1a081a8cb
 Foo2_module = :F2oo4b3a94a1a081a8cb
 FooBase_module = :FooBase4b3a94a1a081a8cb
 @eval module ConflictingBindings
@@ -16,35 +17,9 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
 end
 using .ConflictingBindings
 
-function precompile_test_harness(@nospecialize(f), testset::String)
-    @testset "$testset" begin
-        precompile_test_harness(f, true)
-    end
-end
-function precompile_test_harness(@nospecialize(f), separate::Bool)
-    load_path = mktempdir()
-    load_cache_path = separate ? mktempdir() : load_path
-    try
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-        f(load_path)
-    finally
-        try
-            rm(load_path, force=true, recursive=true)
-        catch err
-            @show err
-        end
-        if separate
-            try
-                rm(load_cache_path, force=true, recursive=true)
-            catch err
-                @show err
-            end
-        end
-        filter!((≠)(load_path), LOAD_PATH)
-        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
-    end
-    nothing
+@testset "object_build_id" begin
+    @test Base.object_build_id([1]) === nothing
+    @test Base.object_build_id(Base) == Base.module_build_id(Base)
 end
 
 # method root provenance
@@ -102,6 +77,8 @@ precompile_test_harness(false) do dir
     Foo_file = joinpath(dir, "$Foo_module.jl")
     Foo2_file = joinpath(dir, "$Foo2_module.jl")
     FooBase_file = joinpath(dir, "$FooBase_module.jl")
+    foo_file = joinpath(dir, "$foo_incl_dep.jl")
+    bar_file = joinpath(dir, "$bar_incl_dep.jl")
 
     write(FooBase_file,
           """
@@ -115,6 +92,19 @@ precompile_test_harness(false) do dir
                   d = den(a)
                   return h
               end
+              abstract type AbstractAlgebraMap{A} end
+              struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end
+
+              global process_state_calls::Int = 0
+              const process_state = Base.OncePerProcess{typeof(getpid())}() do
+                  @assert (global process_state_calls += 1) == 1
+                  return getpid()
+              end
+              const mypid = process_state()
+              @assert process_state_calls === 1
+              process_state_calls = 0
+              @assert process_state() === process_state()
+              @assert process_state_calls === 0
           end
           """)
     write(Foo2_file,
@@ -130,10 +120,11 @@ precompile_test_harness(false) do dir
     write(Foo_file,
           """
           module $Foo_module
-              import $FooBase_module, $FooBase_module.typeA
+              import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism
               import $Foo2_module: $Foo2_module, override, overridenc
               import $FooBase_module.hash
               import Test
+              public foo, Bar
               module Inner
                   import $FooBase_module.hash
                   using ..$Foo_module
@@ -147,10 +138,11 @@ precompile_test_harness(false) do dir
 
               # test that docs get reconnected
               @doc "foo function" foo(x) = x + 1
-              include_dependency("foo.jl")
-              include_dependency("foo.jl")
+              include_dependency("$foo_incl_dep.jl")
+              include_dependency("$foo_incl_dep.jl")
               module Bar
-                  include_dependency("bar.jl")
+                  public bar
+                  include_dependency("$bar_incl_dep.jl")
               end
               @doc "Bar module" Bar # this needs to define the META dictionary via eval
               @eval Bar @doc "bar function" bar(x) = x + 2
@@ -211,6 +203,8 @@ precompile_test_harness(false) do dir
               Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2
               Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3
 
+              const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing}
+              const GAPType2 = GAPGroupHomomorphism{1, 2}
 
               # issue #28297
               mutable struct Result
@@ -239,11 +233,14 @@ precompile_test_harness(false) do dir
               gnc() = overridenc(1.0)
               Test.@test 1 < gnc() < 5 # compile this
 
-              const abigfloat_f() = big"12.34"
+              abigfloat_f() = big"12.34"
               const abigfloat_x = big"43.21"
-              const abigint_f() = big"123"
+              abigint_f() = big"123"
               const abigint_x = big"124"
 
+              # issue #51111
+              abigfloat_to_f32() = Float32(big"1.5")
+
               # issue #31488
               _v31488 = Base.StringVector(2)
               resize!(_v31488, 0)
@@ -266,8 +263,33 @@ precompile_test_harness(false) do dir
               # check that Tasks work from serialized state
               ch1 = Channel(x -> nothing)
               ch2 = Channel(x -> (push!(x, 2); nothing), Inf)
+
+              # check that Memory aliasing is respected
+              a_vec_int = Int[]
+              push!(a_vec_int, 1, 2)
+              a_mat_int = reshape(a_vec_int, (1, 2))
+
+              a_vec_any = Any[]
+              push!(a_vec_any, 1, 2)
+              a_mat_any = reshape(a_vec_any, (1, 2))
+
+              a_vec_union = Union{Int,Nothing}[]
+              push!(a_vec_union, 1, 2)
+              a_mat_union = reshape(a_vec_union, (1, 2))
+
+              a_vec_inline = Pair{Int,Any}[]
+              push!(a_vec_inline, 1=>2, 3=>4)
+              a_mat_inline = reshape(a_vec_inline, (1, 2))
+
+              oid_vec_int = objectid(a_vec_int)
+              oid_mat_int = objectid(a_mat_int)
+
+              using $FooBase_module: process_state, mypid as FooBase_pid, process_state_calls
+              const mypid = process_state()
           end
           """)
+    # Issue #52063
+    touch(foo_file); touch(bar_file)
     # Issue #12623
     @test __precompile__(false) === nothing
 
@@ -299,6 +321,9 @@ precompile_test_harness(false) do dir
         @test Foo.abigint_f()::BigInt == big"123"
         @test Foo.abigint_x::BigInt + 1 == big"125"
 
+        # Issue #51111
+        @test Foo.abigfloat_to_f32() == 1.5f0
+
         @test Foo.x28297.result === missing
 
         @test Foo.d29936a === Dict
@@ -322,6 +347,44 @@ precompile_test_harness(false) do dir
         @test isready(Foo.ch2)
         @test take!(Foo.ch2) === 2
         @test !isready(Foo.ch2)
+
+        @test Foo.process_state_calls === 0
+        @test Foo.process_state() === getpid()
+        @test Foo.mypid !== getpid()
+        @test Foo.FooBase_pid !== getpid()
+        @test Foo.mypid !== Foo.FooBase_pid
+        @test Foo.process_state_calls === 1
+    end
+
+    let
+        @test Foo.a_vec_int == Int[1, 2]
+        @test Foo.a_mat_int == Int[1 2]
+        Foo.a_mat_int[1, 2] = 3
+        @test Foo.a_vec_int[2] === 3
+
+        @test Foo.a_vec_any == Int[1, 2]
+        @test Foo.a_mat_any == Int[1 2]
+        Foo.a_mat_any[1, 2] = 3
+        @test Foo.a_vec_any[2] === 3
+
+        @test Foo.a_vec_union == Union{Int,Nothing}[1, 2]
+        @test Foo.a_mat_union == Union{Int,Nothing}[1 2]
+        Foo.a_mat_union[1, 2] = 3
+        @test Foo.a_vec_union[2] === 3
+        Foo.a_mat_union[1, 2] = nothing
+        @test Foo.a_vec_union[2] === nothing
+
+        @test Foo.a_vec_inline == Pair{Int,Any}[1=>2, 3=>4]
+        @test Foo.a_mat_inline == Pair{Int,Any}[1=>2 3=>4]
+        Foo.a_mat_inline[1, 2] = 5=>6
+        @test Foo.a_vec_inline[2] === Pair{Int,Any}(5, 6)
+
+        @test objectid(Foo.a_vec_int) === Foo.oid_vec_int
+        @test objectid(Foo.a_mat_int) === Foo.oid_mat_int
+        @test Foo.oid_vec_int !== Foo.oid_mat_int
+        @test Base.object_build_id(Foo.a_vec_int) == Base.object_build_id(Foo.a_mat_int)
+        @test Base.object_build_id(Foo) == Base.module_build_id(Foo)
+        @test Base.object_build_id(Foo.a_vec_int) == Base.module_build_id(Foo)
     end
 
     @eval begin function ccallable_test()
@@ -347,11 +410,11 @@ precompile_test_harness(false) do dir
         else
             ocachefile = nothing
         end
-            # use _require_from_serialized to ensure that the test fails if
-            # the module doesn't reload from the image:
+        # use _require_from_serialized to ensure that the test fails if
+        # the module doesn't reload from the image:
         @test_warn "@ccallable was already defined for this method name" begin
             @test_logs (:warn, "Replacing module `$Foo_module`") begin
-                m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile)
+                m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile, Foo_file)
                 @test isa(m, Module)
             end
         end
@@ -373,10 +436,10 @@ precompile_test_harness(false) do dir
         @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"
         @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n"
 
-        modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
-        discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
+        modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+        discard_module = mod_fl_mt -> mod_fl_mt.filename
         @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ]
-        @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
+        @test map(x -> x.filename, deps) == [ Foo_file, joinpath("@depot", foo_file), joinpath("@depot", bar_file) ]
         @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
                             Base.PkgId(Foo) => Base.PkgId(Foo2),
                             Base.PkgId(Foo) => Base.PkgId(Test),
@@ -385,30 +448,39 @@ precompile_test_harness(false) do dir
         @test !isempty(srctxt) && srctxt == read(Foo_file, String)
         @test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt")
         # dependencies declared with `include_dependency` should not be stored
-        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))
+        @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, foo_file))
 
         modules, deps1 = Base.cache_dependencies(cachefile)
-        @test Dict(modules) == merge(
+        modules_ok = merge(
             Dict(let m = Base.PkgId(s)
                     m => Base.module_build_id(Base.root_module(m))
                  end for s in
                  [ "Base", "Core", "Main",
-                   string(Foo2_module), string(FooBase_module) ]),
+                   string(Foo2_module), string(FooBase_module),]),
             # plus modules included in the system image
             Dict(let m = Base.root_module(Base, s)
                      Base.PkgId(m) => Base.module_build_id(m)
-                 end for s in
-                [:ArgTools, :Artifacts, :Base64, :CRC32c, :Dates,
-                 :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
-                 :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
-                 :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf,
-                 :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :Sockets,
-                 :TOML, :Tar, :Test, :UUIDs, :Unicode,
-                 :nghttp2_jll]
-            ),
+                 end for s in [Symbol(x.name) for x in Base._sysimage_modules if !(x.name in ["Base", "Core", "Main"])]),
+            # plus test module,
+            Dict(Base.PkgId(Base.root_module(Base, :Test)) => Base.module_build_id(Base.root_module(Base, :Test))),
+            # plus dependencies of test module
+            Dict(Base.PkgId(Base.root_module(Base, :InteractiveUtils)) => Base.module_build_id(Base.root_module(Base, :InteractiveUtils))),
+            Dict(Base.PkgId(Base.root_module(Base, :Logging)) => Base.module_build_id(Base.root_module(Base, :Logging))),
+            Dict(Base.PkgId(Base.root_module(Base, :Random)) => Base.module_build_id(Base.root_module(Base, :Random))),
+            Dict(Base.PkgId(Base.root_module(Base, :Serialization)) => Base.module_build_id(Base.root_module(Base, :Serialization))),
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :SHA)) => Base.module_build_id(Base.root_module(Base, :SHA))),
+            Dict(Base.PkgId(Base.root_module(Base, :Markdown)) => Base.module_build_id(Base.root_module(Base, :Markdown))),
+            Dict(Base.PkgId(Base.root_module(Base, :JuliaSyntaxHighlighting)) => Base.module_build_id(Base.root_module(Base, :JuliaSyntaxHighlighting))),
+            Dict(Base.PkgId(Base.root_module(Base, :StyledStrings)) => Base.module_build_id(Base.root_module(Base, :StyledStrings))),
+
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :Base64)) => Base.module_build_id(Base.root_module(Base, :Base64))),
         )
+        @test Dict(modules) == modules_ok
+
         @test discard_module.(deps) == deps1
-        modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true)
+        modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
         @test map(x -> x.filename, deps) == [Foo_file]
 
         @test current_task()(0x01, 0x4000, 0x30031234) == 2
@@ -471,7 +543,7 @@ precompile_test_harness(false) do dir
         """)
     Nest = Base.require(Main, Nest_module)
     cachefile = joinpath(cachedir, "$Nest_module.ji")
-    modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     @test last(deps).modpath == ["NestInner"]
 
     UsesB_module = :UsesB4b3a94a1a081a8cb
@@ -493,7 +565,7 @@ precompile_test_harness(false) do dir
         """)
     UsesB = Base.require(Main, UsesB_module)
     cachefile = joinpath(cachedir, "$UsesB_module.ji")
-    modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     id1, id2 = only(requires)
     @test Base.pkgorigins[id1].cachepath == cachefile
     @test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji")
@@ -501,15 +573,54 @@ precompile_test_harness(false) do dir
     Baz_file = joinpath(dir, "Baz.jl")
     write(Baz_file,
           """
-          true && __precompile__(false)
+          haskey(Base.loaded_modules, Base.PkgId("UseBaz")) || __precompile__(false)
           module Baz
           baz() = 1
           end
           """)
 
     @test Base.compilecache(Base.PkgId("Baz")) == Base.PrecompilableError() # due to __precompile__(false)
+
+    OverwriteMethodError_file = joinpath(dir, "OverwriteMethodError.jl")
+    write(OverwriteMethodError_file,
+          """
+          module OverwriteMethodError
+              Base.:(+)(x::Bool, y::Bool) = false
+          end
+          """)
+
+    @test (@test_warn "overwritten in module OverwriteMethodError" Base.compilecache(Base.PkgId("OverwriteMethodError"))) == Base.PrecompilableError() # due to piracy
+
+    UseBaz_file = joinpath(dir, "UseBaz.jl")
+    write(UseBaz_file,
+          """
+          module UseBaz
+          biz() = 1
+          @assert haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+          @assert !haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          using Baz
+          @assert haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          buz() = 2
+          const generating = ccall(:jl_generating_output, Cint, ())
+          const incremental = Base.JLOptions().incremental
+          end
+          """)
+
+    @test Base.compilecache(Base.PkgId("UseBaz")) == Base.PrecompilableError() # due to __precompile__(false)
+    @eval using UseBaz
+    @test haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+    @test haskey(Base.loaded_modules, Base.PkgId("Baz"))
+    @test Base.invokelatest(UseBaz.biz) === 1
+    @test Base.invokelatest(UseBaz.buz) === 2
+    @test UseBaz.generating == 0
+    @test UseBaz.incremental == 0
     @eval using Baz
-    @test Base.invokelatest(Baz.baz) == 1
+    @test Base.invokelatest(Baz.baz) === 1
+    @test Baz === UseBaz.Baz
+
+    # should not throw if the cachefile does not exist
+    @test !isfile("DoesNotExist.ji")
+    @test Base.stale_cachefile("", "DoesNotExist.ji") === true
 
     # Issue #12720
     FooBar1_file = joinpath(dir, "FooBar1.jl")
@@ -527,17 +638,17 @@ precompile_test_harness(false) do dir
           end
           """)
 
-    cachefile, _ = Base.compilecache(Base.PkgId("FooBar"))
+    cachefile, _ = @test_logs (:debug, r"Precompiling FooBar") min_level=Logging.Debug match_mode=:any Base.compilecache(Base.PkgId("FooBar"))
     empty_prefs_hash = Base.get_preferences_hash(nothing, String[])
     @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash)
     @test isfile(joinpath(cachedir, "FooBar.ji"))
-    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing}
+    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String, UInt128} : Tuple{<:Vector, Nothing, UInt128}
     @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test !isdefined(Main, :FooBar)
     @test !isdefined(Main, :FooBar1)
 
     relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl")
-    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows
+    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String, UInt128} : Bool) # `..` is not a symlink on Windows
     mkdir(joinpath(dir, "subfolder"))
     @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
 
@@ -545,12 +656,12 @@ precompile_test_harness(false) do dir
     fb_uuid = Base.module_build_id(FooBar)
     sleep(2); touch(FooBar_file)
     insert!(DEPOT_PATH, 1, dir2)
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @eval using FooBar1
     @test !isfile(joinpath(cachedir2, "FooBar.ji"))
     @test !isfile(joinpath(cachedir, "FooBar1.ji"))
     @test isfile(joinpath(cachedir2, "FooBar1.ji"))
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc
     @test fb_uuid == Base.module_build_id(FooBar)
     fb_uuid1 = Base.module_build_id(FooBar1)
@@ -570,7 +681,7 @@ precompile_test_harness(false) do dir
           error("break me")
           end
           """)
-    @test_warn r"LoadError: break me\nStacktrace:\n \[1\] [\e01m\[]*error" try
+    @test_warn r"LoadError: break me\nStacktrace:\n[ ]*\[1\] [\e01m\[]*error" try
             Base.require(Main, :FooBar2)
             error("the \"break me\" test failed")
         catch exc
@@ -583,7 +694,11 @@ precompile_test_harness(false) do dir
     FooBar3_inc = joinpath(dir, "FooBar3_inc.jl")
     write(FooBar3_inc, "x=1\n")
     for code in ["Core.eval(Base, :(x=1))", "Base.include(Base, \"FooBar3_inc.jl\")"]
-        write(FooBar3_file, code)
+        write(FooBar3_file, """
+        module FooBar3
+        $code
+        end
+        """)
         @test_warn "Evaluation into the closed module `Base` breaks incremental compilation" try
                 Base.require(Main, :FooBar3)
             catch exc
@@ -635,10 +750,9 @@ precompile_test_harness("code caching") do dir
               struct X end
               struct X2 end
               @noinline function f(d)
-                  @noinline
-                  d[X()] = nothing
+                  @noinline d[X()] = nothing
               end
-              @noinline fpush(dest) = push!(dest, X())
+              @noinline fpush(dest) = @noinline push!(dest, X())
               function callboth()
                   f(Dict{X,Any}())
                   fpush(X[])
@@ -682,24 +796,12 @@ precompile_test_harness("code caching") do dir
         end
     end
     @test hasspec
-    # Test that compilation adds to method roots with appropriate provenance
-    m = which(setindex!, (Dict{M.X,Any}, Any, M.X))
-    @test M.X ∈ m.roots
-    # Check that roots added outside of incremental builds get attributed to a moduleid of 0
-    Base.invokelatest() do
-        Dict{M.X2,Any}()[M.X2()] = nothing
-    end
-    @test M.X2 ∈ m.roots
-    groups = group_roots(m)
-    @test M.X ∈ groups[Mid]           # attributed to M
-    @test M.X2 ∈ groups[0]            # activate module is not known
-    @test !isempty(groups[Bid])
     # Check that internal methods and their roots are accounted appropriately
     minternal = which(M.getelsize, (Vector,))
     mi = minternal.specializations::Core.MethodInstance
     @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
     ci = mi.cache
-    @test ci.relocatability == 1
+    @test (codeunits(ci.inferred::String)[end]) === 0x01
     @test ci.inferred !== nothing
     # ...and that we can add "untracked" roots & non-relocatable CodeInstances to them too
     Base.invokelatest() do
@@ -708,20 +810,21 @@ precompile_test_harness("code caching") do dir
     mispecs = minternal.specializations::Core.SimpleVector
     @test mispecs[1] === mi
     mi = mispecs[2]::Core.MethodInstance
+    mi.specTypes == Tuple{typeof(M.getelsize),Vector{M.X2}}
     ci = mi.cache
-    @test ci.relocatability == 0
+    @test (codeunits(ci.inferred::String)[end]) == 0x00
     # PkgA loads PkgB, and both add roots to the same `push!` method (both before and after loading B)
     Cache_module2 = :Cachea1544c83560f0c99
     write(joinpath(dir, "$Cache_module2.jl"),
           """
           module $Cache_module2
               struct Y end
-              @noinline f(dest) = push!(dest, Y())
+              @noinline f(dest) = @noinline push!(dest, Y())
               callf() = f(Y[])
               callf()
               using $(Cache_module)
               struct Z end
-              @noinline g(dest) = push!(dest, Z())
+              @noinline g(dest) = @noinline push!(dest, Z())
               callg() = g(Z[])
               callg()
           end
@@ -742,10 +845,10 @@ precompile_test_harness("code caching") do dir
     end
     mT = which(push!, (Vector{T} where T, Any))
     groups = group_roots(mT)
-    @test M2.Y ∈ groups[M2id]
-    @test M2.Z ∈ groups[M2id]
-    @test M.X ∈ groups[Mid]
-    @test M.X ∉ groups[M2id]
+    @test Memory{M2.Y} ∈ groups[M2id]
+    @test Memory{M2.Z} ∈ groups[M2id]
+    @test Memory{M.X} ∈ groups[Mid]
+    @test Memory{M.X} ∉ groups[M2id]
     # backedges of external MethodInstances
     # Root gets used by RootA and RootB, and both consumers end up inferring the same MethodInstance from Root
     # Do both callers get listed as backedges?
@@ -799,7 +902,7 @@ precompile_test_harness("code caching") do dir
             # external callers
             mods = Module[]
             for be in mi.backedges
-                push!(mods, be.def.module)
+                push!(mods, ((be.def::Core.MethodInstance).def::Method).module) # XXX
             end
             @test MA ∈ mods
             @test MB ∈ mods
@@ -808,7 +911,7 @@ precompile_test_harness("code caching") do dir
             # internal callers
             meths = Method[]
             for be in mi.backedges
-                push!(meths, be.def)
+                push!(meths, (be.def::Method).def) # XXX
             end
             @test which(M.g1, ()) ∈ meths
             @test which(M.g2, ()) ∈ meths
@@ -901,9 +1004,9 @@ precompile_test_harness("code caching") do dir
     MA = getfield(@__MODULE__, StaleA)
     Base.eval(MA, :(nbits(::UInt8) = 8))
     @eval using $StaleC
-    invalidations = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+    invalidations = Base.StaticData.debug_method_invalidation(true)
     @eval using $StaleB
-    ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
+    Base.StaticData.debug_method_invalidation(false)
     MB = getfield(@__MODULE__, StaleB)
     MC = getfield(@__MODULE__, StaleC)
     world = Base.get_world_counter()
@@ -938,15 +1041,14 @@ precompile_test_harness("code caching") do dir
         @test mi.specTypes.parameters[end] === Integer ? !hv : hv
     end
 
-    setglobal!(Main, :inval, invalidations)
     idxs = findall(==("verify_methods"), invalidations)
     idxsbits = filter(idxs) do i
         mi = invalidations[i-1]
-        mi.def == m
+        mi.def.def === m
     end
     idx = only(idxsbits)
     tagbad = invalidations[idx+1]
-    @test isa(tagbad, Int32)
+    @test isa(tagbad, Core.CodeInstance)
     j = findfirst(==(tagbad), invalidations)
     @test invalidations[j-1] == "insert_backedges_callee"
     @test isa(invalidations[j-2], Type)
@@ -954,7 +1056,7 @@ precompile_test_harness("code caching") do dir
     m = only(methods(MB.useA2))
     mi = only(Base.specializations(m))
     @test !hasvalid(mi, world)
-    @test mi ∈ invalidations
+    @test any(x -> x isa Core.CodeInstance && x.def === mi, invalidations)
 
     m = only(methods(MB.map_nbits))
     @test !hasvalid(m.specializations::Core.MethodInstance, world+1) # insert_backedges invalidations also trigger their backedges
@@ -1031,22 +1133,22 @@ precompile_test_harness("invoke") do dir
 
               call_getlast(x) = getlast(x)
 
-              # force precompilation
+              # force precompilation, force call so that inlining heuristics don't affect the result
               begin
                   Base.Experimental.@force_compile
-                  callf(3)
-                  callg(3)
-                  callh(3)
-                  callq(3)
-                  callqi(3)
-                  callfnc(3)
-                  callgnc(3)
-                  callhnc(3)
-                  callqnc(3)
-                  callqnci(3)
-                  internal(3)
-                  internalnc(3)
-                  call_getlast([1,2,3])
+                  @noinline callf(3)
+                  @noinline callg(3)
+                  @noinline callh(3)
+                  @noinline callq(3)
+                  @noinline callqi(3)
+                  @noinline callfnc(3)
+                  @noinline callgnc(3)
+                  @noinline callhnc(3)
+                  @noinline callqnc(3)
+                  @noinline callqnci(3)
+                  @noinline internal(3)
+                  @noinline internalnc(3)
+                  @noinline call_getlast([1,2,3])
               end
 
               # Now that we've precompiled, invalidate with a new method that overrides the `invoke` dispatch
@@ -1064,12 +1166,7 @@ precompile_test_harness("invoke") do dir
     @eval using $CallerModule
     M = getfield(@__MODULE__, CallerModule)
 
-    function get_method_for_type(func, @nospecialize(T))   # return the method func(::T)
-        for m in methods(func)
-            m.sig.parameters[end] === T && return m
-        end
-        error("no ::Real method found for $func")
-    end
+    get_method_for_type(func, @nospecialize(T)) = which(func, (T,)) # return the method func(::T)
     function nvalid(mi::Core.MethodInstance)
         isdefined(mi, :cache) || return 0
         ci = mi.cache
@@ -1084,9 +1181,15 @@ precompile_test_harness("invoke") do dir
     for func in (M.f, M.g, M.internal, M.fnc, M.gnc, M.internalnc)
         m = get_method_for_type(func, Real)
         mi = m.specializations::Core.MethodInstance
-        @test length(mi.backedges) == 2
+        @test length(mi.backedges) == 2 || length(mi.backedges) == 4 # internalnc might have a constprop edge
         @test mi.backedges[1] === Tuple{typeof(func), Real}
-        @test isa(mi.backedges[2], Core.MethodInstance)
+        @test isa(mi.backedges[2], Core.CodeInstance)
+        if length(mi.backedges) == 4
+            @test mi.backedges[3] === Tuple{typeof(func), Real}
+            @test isa(mi.backedges[4], Core.CodeInstance)
+            @test mi.backedges[2] !== mi.backedges[4]
+            @test mi.backedges[2].def === mi.backedges[4].def
+        end
         @test mi.cache.max_world == typemax(mi.cache.max_world)
     end
     for func in (M.q, M.qnc)
@@ -1094,18 +1197,18 @@ precompile_test_harness("invoke") do dir
         mi = m.specializations::Core.MethodInstance
         @test length(mi.backedges) == 2
         @test mi.backedges[1] === Tuple{typeof(func), Integer}
-        @test isa(mi.backedges[2], Core.MethodInstance)
+        @test isa(mi.backedges[2], Core.CodeInstance)
         @test mi.cache.max_world == typemax(mi.cache.max_world)
     end
 
     m = get_method_for_type(M.h, Real)
-    @test isempty(Base.specializations(m))
+    @test nvalid(m.specializations::Core.MethodInstance) == 0
     m = get_method_for_type(M.hnc, Real)
-    @test isempty(Base.specializations(m))
+    @test nvalid(m.specializations::Core.MethodInstance) == 0
     m = only(methods(M.callq))
-    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
+    @test nvalid(m.specializations::Core.MethodInstance) == 0
     m = only(methods(M.callqnc))
-    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
+    @test nvalid(m.specializations::Core.MethodInstance) == 1
     m = only(methods(M.callqi))
     @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqi), Int}
     m = only(methods(M.callqnci))
@@ -1248,6 +1351,25 @@ precompile_test_harness("package_callbacks") do dir
     finally
         pop!(Base.package_callbacks)
     end
+    Test5_module = :Teste4095a85
+    write(joinpath(dir, "$(Test5_module).jl"),
+    """
+    module $(Test5_module)
+    end
+    """)
+    Base.compilecache(Base.PkgId("$(Test5_module)"))
+    cnt = 0
+    push!(Base.package_callbacks, _->(cnt += 1))
+    try
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @eval using $(Symbol(Test5_module))
+        @test cnt == 1
+    finally
+        pop!(Base.package_callbacks)
+    end
 end
 
 # Issue #19960
@@ -1416,6 +1538,7 @@ precompile_test_harness("Issue #26028") do load_path
         module Foo26028
         module Bar26028
             x = 0
+            y = 0
         end
         function __init__()
             include(joinpath(@__DIR__, "Baz26028.jl"))
@@ -1425,7 +1548,10 @@ precompile_test_harness("Issue #26028") do load_path
     write(joinpath(load_path, "Baz26028.jl"),
         """
         module Baz26028
-        import Foo26028.Bar26028.x
+        using Test
+        @test_throws(ConcurrencyViolationError("deadlock detected in loading Foo26028 -> Foo26028"),
+                     @eval import Foo26028.Bar26028.x)
+        import ..Foo26028.Bar26028.y
         end
         """)
     Base.compilecache(Base.PkgId("Foo26028"))
@@ -1587,14 +1713,15 @@ precompile_test_harness("Issue #46558") do load_path
     @test (@eval $Foo.foo(1)) == 2
 end
 
+# TODO: Decide if we need to keep supporting this.
 precompile_test_harness("issue #46296") do load_path
     write(joinpath(load_path, "CodeInstancePrecompile.jl"),
         """
         module CodeInstancePrecompile
 
         mi = first(Base.specializations(first(methods(identity))))
-        ci = Core.CodeInstance(mi, Any, nothing, nothing, zero(Int32), typemin(UInt),
-                               typemax(UInt), zero(UInt32), zero(UInt32), nothing, 0x00)
+        ci = Core.CodeInstance(mi, nothing, Any, Any, nothing, nothing, zero(Int32), typemin(UInt),
+                               typemax(UInt), zero(UInt32), nothing, Core.DebugInfo(mi), Core.svec())
 
         __init__() = @assert ci isa Core.CodeInstance
 
@@ -1604,6 +1731,12 @@ precompile_test_harness("issue #46296") do load_path
     (@eval (using CodeInstancePrecompile))
 end
 
+@testset "Precompile external abstract interpreter" begin
+    dir = @__DIR__
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) precompile_absint1.jl`; dir); stdout, stderr))
+    @test success(pipeline(Cmd(`$(Base.julia_cmd()) precompile_absint2.jl`; dir); stdout, stderr))
+end
+
 precompile_test_harness("Recursive types") do load_path
     write(joinpath(load_path, "RecursiveTypeDef.jl"),
         """
@@ -1669,7 +1802,7 @@ precompile_test_harness("PkgCacheInspector") do load_path
         try
             # isvalid_cache_header returns checksum id or zero
             Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile."))
-            depmodnames = Base.parse_cache_header(io)[3]
+            depmodnames = Base.parse_cache_header(io, cachefile)[3]
             Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile."))
         finally
             close(io)
@@ -1687,16 +1820,18 @@ precompile_test_harness("PkgCacheInspector") do load_path
     end
 
     if ocachefile !== nothing
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachefile, depmods, true, "PCI")
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring, Cint),
+            ocachefile, depmods, #=completeinfo=#true, "PCI", false)
     else
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), cachefile, depmods, true, "PCI")
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring),
+            cachefile, depmods, #=completeinfo=#true, "PCI")
     end
 
-    modules, init_order, external_methods, new_specializations, new_method_roots, external_targets, edges = sv
-    m = only(external_methods)
+    modules, init_order, edges, new_ext_cis, external_methods, new_method_roots, cache_sizes = sv
+    m = only(external_methods).func::Method
     @test m.name == :repl_cmd && m.nargs < 2
-    @test any(new_specializations) do ci
-        mi = ci.def
+    @test new_ext_cis === nothing || any(new_ext_cis) do ci
+        mi = ci.def::Core.MethodInstance
         mi.specTypes == Tuple{typeof(Base.repl_cmd), Int, String}
     end
 end
@@ -1771,7 +1906,276 @@ precompile_test_harness("Issue #48391") do load_path
     @test_throws ErrorException isless(x, x)
 end
 
-empty!(Base.DEPOT_PATH)
-append!(Base.DEPOT_PATH, original_depot_path)
-empty!(Base.LOAD_PATH)
-append!(Base.LOAD_PATH, original_load_path)
+precompile_test_harness("Generator nospecialize") do load_path
+    write(joinpath(load_path, "GenNoSpec.jl"),
+        """
+        module GenNoSpec
+        @generated function f(x...)
+            :((\$(Base.Meta.quot(x)),))
+        end
+        @assert precompile(Tuple{typeof(which(f, (Any,Any)).generator.gen), Any, Any})
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("GenNoSpec"))
+    @eval using GenNoSpec
+end
+
+precompile_test_harness("Issue #50538") do load_path
+    write(joinpath(load_path, "I50538.jl"),
+        """
+        module I50538
+        const newglobal = try
+            Base.newglobal = false
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        const newtype = try
+            Core.eval(Base, :(global newglobal::Any))
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        global undefglobal::Any
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("I50538"))
+    @eval using I50538
+    @test I50538.newglobal.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
+    @test I50538.newtype.msg == "Evaluation into the closed module `Base` breaks incremental compilation because the side effects will not be permanent. This is likely due to some other module mutating `Base` with `eval` during precompilation - don't do this."
+    @test_throws(ErrorException("cannot set type for global I50538.undefglobal. It already has a value or is already set to a different type."),
+                 Core.eval(I50538, :(global undefglobal::Int)))
+    Core.eval(I50538, :(global undefglobal::Any))
+    @test Core.get_binding_type(I50538, :undefglobal) === Any
+    @test !isdefined(I50538, :undefglobal)
+end
+
+precompile_test_harness("Test flags") do load_path
+    write(joinpath(load_path, "TestFlags.jl"),
+          """
+          module TestFlags
+          end
+          """)
+
+    current_flags = Base.CacheFlags()
+    modified_flags = Base.CacheFlags(
+        current_flags.use_pkgimages,
+        current_flags.debug_level,
+        2,
+        current_flags.inline,
+        3
+    )
+    ji, ofile = Base.compilecache(Base.PkgId("TestFlags"); flags=`--check-bounds=no -O3`)
+    open(ji, "r") do io
+        Base.isvalid_cache_header(io)
+        _, _, _, _, _, _, _, flags = Base.parse_cache_header(io, ji)
+        cacheflags = Base.CacheFlags(flags)
+        @test cacheflags.check_bounds == 2
+        @test cacheflags.opt_level == 3
+    end
+    id = Base.identify_package("TestFlags")
+    @test Base.isprecompiled(id, ;flags=modified_flags)
+    @test !Base.isprecompiled(id, ;flags=current_flags)
+end
+
+if Base.get_bool_env("CI", false) && (Sys.ARCH === :x86_64 || Sys.ARCH === :aarch64)
+    @testset "Multiversioning" begin # This test isn't the most robust because it relies on being in CI,
+        pkg = Base.identify_package("Test")  # but we need better target reflection to make a better one.
+        cachefiles = Base.find_all_in_cache_path(pkg)
+        pkgpath = Base.locate_package(pkg)
+        idx = findfirst(cachefiles) do cf
+            Base.stale_cachefile(pkgpath, cf) !== true
+        end
+        targets = Base.parse_image_targets(Base.parse_cache_header(cachefiles[idx])[7])
+        @test length(targets) > 1
+    end
+end
+
+precompile_test_harness("No backedge precompile") do load_path
+    # Test that the system doesn't accidentally forget to revalidate a method without backedges
+    write(joinpath(load_path, "NoBackEdges.jl"),
+          """
+          module NoBackEdges
+          using Core.Intrinsics: add_int
+          f(a::Int, b::Int) = add_int(a, b)
+          precompile(f, (Int, Int))
+          end
+          """)
+    ji, ofile = Base.compilecache(Base.PkgId("NoBackEdges"))
+    @eval using NoBackEdges
+    @test first(methods(NoBackEdges.f)).specializations.cache.max_world === typemax(UInt)
+end
+
+# Test precompilation of generated functions that return opaque closures
+# (with constprop marker set to false).
+precompile_test_harness("Generated Opaque") do load_path
+    write(joinpath(load_path, "GeneratedOpaque.jl"),
+        """
+        module GeneratedOpaque
+        using Base.Experimental: @opaque
+        using InteractiveUtils
+        const_int_barrier() = Base.inferencebarrier(1)::typeof(1)
+        const lno = LineNumberNode(1, :none)
+
+        const ci = @code_lowered const_int_barrier()
+        @generated function oc_re_generated_no_partial()
+            Expr(:new_opaque_closure, Tuple{}, Any, Any, false,
+                Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
+        end
+        @assert oc_re_generated_no_partial()() === 1
+        @generated function oc_re_generated_no_partial_macro()
+            AT = nothing
+            RT = nothing
+            allow_partial = false # makes this legal to generate during pre-compile
+            return Expr(:opaque_closure, AT, RT, RT, allow_partial, :(()->const_int_barrier()))
+        end
+        @assert oc_re_generated_no_partial_macro()() === 1
+        end
+        """)
+    Base.compilecache(Base.PkgId("GeneratedOpaque"))
+    @eval using GeneratedOpaque
+    let oc = invokelatest(GeneratedOpaque.oc_re_generated_no_partial)
+        @test oc.source.specializations.cache.max_world === typemax(UInt)
+        @test oc() === 1
+    end
+end
+
+precompile_test_harness("Issue #52063") do load_path
+    fname = joinpath(load_path, "i_do_not_exist.jl")
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end
+    touch(fname)
+    @test include_dependency(fname) === nothing
+    chmod(fname, 0x000)
+    @test try
+        include_dependency(fname); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(fname))"
+        true
+    end broken=Sys.iswindows()
+    dir = mktempdir() do dir
+        @test include_dependency(dir) === nothing
+        chmod(dir, 0x000)
+        @test try
+             include_dependency(dir); false
+        catch e
+            @test e isa SystemError
+            @test e.prefix == "opening file or folder $(repr(dir))"
+            true
+        end broken=Sys.iswindows()
+        dir
+    end
+    @test try
+        include_dependency(dir); false
+    catch e
+        @test e isa SystemError
+        @test e.prefix == "opening file or folder $(repr(dir))"
+        true
+    end
+end
+
+precompile_test_harness("Binding Unique") do load_path
+    write(joinpath(load_path, "UniqueBinding1.jl"),
+        """
+        module UniqueBinding1
+            export x
+            global x = 1
+        end
+        """)
+    write(joinpath(load_path, "UniqueBinding2.jl"),
+        """
+        module UniqueBinding2
+            using UniqueBinding1
+            const thebinding = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+            const thebinding2 = ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), @__MODULE__, :thebinding, true)
+        end
+        """)
+
+    @eval using UniqueBinding1
+    @eval using UniqueBinding2
+
+    @test UniqueBinding2.thebinding === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding1, :x, true)
+    @test UniqueBinding2.thebinding2 === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding2, :thebinding, true)
+end
+
+precompile_test_harness("Detecting importing outside of a package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "ImportBeforeMod.jl"),
+    """
+    import Printf
+    module ImportBeforeMod
+    end #module
+    """)
+    @test_throws r"Failed to precompile ImportBeforeMod" Base.compilecache(Base.identify_package("ImportBeforeMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "HarmlessComments.jl"),
+    """
+    # import Printf
+    #=
+    import Printf
+    =#
+    module HarmlessComments
+    end #module
+    # import Printf
+    #=
+    import Printf
+    =#
+    """)
+    Base.compilecache(Base.identify_package("HarmlessComments"))
+
+
+    write(joinpath(load_path, "ImportAfterMod.jl"), """
+    module ImportAfterMod
+    end #module
+    import Printf
+    """)
+    @test_throws r"Failed to precompile ImportAfterMod" Base.compilecache(Base.identify_package("ImportAfterMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
+precompile_test_harness("No package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "NoModule.jl"),
+    """
+    1
+    """)
+    @test_throws r"Failed to precompile NoModule" Base.compilecache(Base.identify_package("NoModule"), io, io)
+    @test occursin(
+        "NoModule [top-level] did not define the expected module `NoModule`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "WrongModuleName.jl"),
+    """
+    module DifferentName
+    x = 1
+    end #module
+    """)
+    @test_throws r"Failed to precompile WrongModuleName" Base.compilecache(Base.identify_package("WrongModuleName"), io, io)
+    @test occursin(
+        "WrongModuleName [top-level] did not define the expected module `WrongModuleName`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "NoModuleWithImport.jl"), """
+    import Printf
+    """)
+    @test_throws r"Failed to precompile NoModuleWithImport" Base.compilecache(Base.identify_package("NoModuleWithImport"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint1.jl b/test/precompile_absint1.jl
new file mode 100644
index 0000000000000..98078ebf41098
--- /dev/null
+++ b/test/precompile_absint1.jl
@@ -0,0 +1,87 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+import Base.Compiler: Compiler
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath(joinpath(@__DIR__,"../Compiler/test/newinterp.jl"))
+    write(joinpath(load_path, "TestAbsIntPrecompile1.jl"), :(module TestAbsIntPrecompile1
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            import Base.Compiler: Compiler
+            include($newinterp_path)
+            @newinterp PrecompileInterpreter
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile1"))
+
+    @eval let
+        using TestAbsIntPrecompile1
+        cache_owner = Compiler.cache_owner(
+            TestAbsIntPrecompile1.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile1.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test_broken isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+            @test_skip begin
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                Base.object_build_id(ci)
+            end
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi in Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test_broken isdefined(ci, :next)
+                    @test_broken ci.owner === cache_owner
+                    @test_skip begin
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    end
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile1) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_absint2.jl b/test/precompile_absint2.jl
new file mode 100644
index 0000000000000..4aa84e0992f7c
--- /dev/null
+++ b/test/precompile_absint2.jl
@@ -0,0 +1,107 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+include("precompile_utils.jl")
+
+precompile_test_harness() do load_path
+    write(joinpath(load_path, "SimpleModule.jl"), :(module SimpleModule
+        basic_callee(x) = x
+        basic_caller(x) = basic_callee(x)
+    end) |> string)
+
+    newinterp_path = abspath(joinpath(@__DIR__,"../Compiler/test/newinterp.jl"))
+    write(joinpath(load_path, "TestAbsIntPrecompile2.jl"), :(module TestAbsIntPrecompile2
+        import SimpleModule: basic_caller, basic_callee
+
+        module Custom
+            import Base.Compiler: Compiler
+            include($newinterp_path)
+            @newinterp PrecompileInterpreter
+            struct CustomData
+                inferred
+                CustomData(@nospecialize inferred) = new(inferred)
+            end
+            function Compiler.transform_result_for_cache(interp::PrecompileInterpreter, result::Compiler.InferenceResult)
+                inferred_result = @invoke Compiler.transform_result_for_cache(
+                    interp::Compiler.AbstractInterpreter, result::Compiler.InferenceResult)
+                return CustomData(inferred_result)
+            end
+            function Compiler.src_inlining_policy(interp::PrecompileInterpreter, @nospecialize(src),
+                                            @nospecialize(info::Compiler.CallInfo), stmt_flag::UInt32)
+                if src isa CustomData
+                    src = src.inferred
+                end
+                return @invoke Compiler.src_inlining_policy(interp::Compiler.AbstractInterpreter, src::Any,
+                                                      info::Compiler.CallInfo, stmt_flag::UInt32)
+            end
+            Compiler.retrieve_ir_for_inlining(cached_result::Core.CodeInstance, src::CustomData) =
+                Compiler.retrieve_ir_for_inlining(cached_result, src.inferred)
+            Compiler.retrieve_ir_for_inlining(mi::Core.MethodInstance, src::CustomData, preserve_local_sources::Bool) =
+                Compiler.retrieve_ir_for_inlining(mi, src.inferred, preserve_local_sources)
+        end
+
+        Base.return_types((Float64,)) do x
+            basic_caller(x)
+        end
+        Base.return_types((Float64,); interp=Custom.PrecompileInterpreter()) do x
+            basic_caller(x)
+        end
+        Base.return_types((Vector{Float64},)) do x
+            sum(x)
+        end
+        Base.return_types((Vector{Float64},); interp=Custom.PrecompileInterpreter()) do x
+            sum(x)
+        end
+    end) |> string)
+    Base.compilecache(Base.PkgId("TestAbsIntPrecompile2"))
+
+    @eval let
+        using TestAbsIntPrecompile2
+        cache_owner = Core.Compiler.cache_owner(
+            TestAbsIntPrecompile2.Custom.PrecompileInterpreter())
+        let m = only(methods(TestAbsIntPrecompile2.basic_callee))
+            mi = only(Base.specializations(m))
+            ci = mi.cache
+            @test_broken isdefined(ci, :next)
+            @test ci.owner === nothing
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+            @test_skip begin
+            ci = ci.next
+            @test !isdefined(ci, :next)
+            @test ci.owner === cache_owner
+            @test ci.max_world == typemax(UInt)
+            @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                Base.object_build_id(ci)
+            end
+        end
+        let m = only(methods(sum, (Vector{Float64},)))
+            found = false
+            for mi = Base.specializations(m)
+                if mi isa Core.MethodInstance && mi.specTypes == Tuple{typeof(sum),Vector{Float64}}
+                    ci = mi.cache
+                    @test_broken isdefined(ci, :next)
+                    @test_broken ci.owner === cache_owner
+                    @test_skip begin
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    ci = ci.next
+                    end
+                    @test !isdefined(ci, :next)
+                    @test ci.owner === nothing
+                    @test ci.max_world == typemax(UInt)
+                    @test Base.module_build_id(TestAbsIntPrecompile2) ==
+                        Base.object_build_id(ci)
+                    found = true
+                    break
+                end
+            end
+            @test found
+        end
+    end
+end
+
+finish_precompile_test!()
diff --git a/test/precompile_utils.jl b/test/precompile_utils.jl
new file mode 100644
index 0000000000000..55eba353f2ada
--- /dev/null
+++ b/test/precompile_utils.jl
@@ -0,0 +1,41 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function precompile_test_harness(@nospecialize(f), testset::String)
+    @testset "$testset" precompile_test_harness(f, true)
+end
+function precompile_test_harness(@nospecialize(f), separate::Bool=true)
+    load_path = mktempdir()
+    load_cache_path = separate ? mktempdir() : load_path
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, load_cache_path)
+        f(load_path)
+    finally
+        try
+            rm(load_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+        if separate
+            try
+                rm(load_cache_path, force=true, recursive=true)
+            catch err
+                @show err
+            end
+        end
+        filter!((≠)(load_path), LOAD_PATH)
+        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
+    end
+    return nothing
+end
+
+let original_depot_path = copy(Base.DEPOT_PATH)
+    original_load_path = copy(Base.LOAD_PATH)
+
+    global function finish_precompile_test!()
+        empty!(Base.DEPOT_PATH)
+        append!(Base.DEPOT_PATH, original_depot_path)
+        empty!(Base.LOAD_PATH)
+        append!(Base.LOAD_PATH, original_load_path)
+    end
+end
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml b/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml
new file mode 100644
index 0000000000000..5497fdb7091bb
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/Manifest.toml
@@ -0,0 +1,32 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.11.1"
+manifest_format = "2.0"
+project_hash = "dc35c2cf8c6b82fb5b9624c9713c2df34ca30499"
+
+[[deps.CyclicExtensions]]
+deps = ["ExtDep"]
+path = "../CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+weakdeps = ["SomePackage"]
+
+    [deps.CyclicExtensions.extensions]
+    ExtA = ["SomePackage"]
+    ExtB = ["SomePackage"]
+
+[[deps.ExtDep]]
+deps = ["SomeOtherPackage", "SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml b/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml
new file mode 100644
index 0000000000000..76ffb7bd1c882
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/Project.toml
@@ -0,0 +1,12 @@
+name = "CrossPackageExtToExtDependency"
+uuid = "30f07f2e-c47e-40db-93a2-cbc4d1b301cc"
+version = "0.1.0"
+
+[deps]
+CyclicExtensions = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtAB = ["CyclicExtensions", "SomePackage"]
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl b/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl
new file mode 100644
index 0000000000000..1ded9f2df5097
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/ext/ExtAB.jl
@@ -0,0 +1,12 @@
+module ExtAB
+
+using CrossPackageExtToExtDependency
+using SomePackage
+using CyclicExtensions
+
+const ExtA = Base.get_extension(CyclicExtensions, :ExtA)
+if !(ExtA isa Module)
+    error("expected extension to load")
+end
+
+end
diff --git a/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl b/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl
new file mode 100644
index 0000000000000..28b229e2d61bf
--- /dev/null
+++ b/test/project/Extensions/CrossPackageExtToExtDependency/src/CrossPackageExtToExtDependency.jl
@@ -0,0 +1,7 @@
+module CrossPackageExtToExtDependency
+
+using CyclicExtensions
+
+greet() = print("Hello x-package ext-to-ext!")
+
+end # module CrossPackageExtToTextDependency
diff --git a/test/project/Extensions/CyclicExtensions/Manifest.toml b/test/project/Extensions/CyclicExtensions/Manifest.toml
new file mode 100644
index 0000000000000..0f280293c07b6
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Manifest.toml
@@ -0,0 +1,26 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.4"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage", "SomeOtherPackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CyclicExtensions/Project.toml b/test/project/Extensions/CyclicExtensions/Project.toml
new file mode 100644
index 0000000000000..08d539dcc40ae
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Project.toml
@@ -0,0 +1,13 @@
+name = "CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtB = ["SomePackage"]
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtA.jl b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
new file mode 100644
index 0000000000000..fa0c0961633cb
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtB.jl b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
new file mode 100644
index 0000000000000..8f6da556d39b8
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
@@ -0,0 +1,6 @@
+module ExtB
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
new file mode 100644
index 0000000000000..f1c2ec2077562
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
@@ -0,0 +1,7 @@
+module CyclicExtensions
+
+using ExtDep
+
+greet() = print("Hello Cycles!")
+
+end # module CyclicExtensions
diff --git a/test/project/Extensions/DepWithParentExt.jl/Project.toml b/test/project/Extensions/DepWithParentExt.jl/Project.toml
new file mode 100644
index 0000000000000..bc487252ced4e
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/Project.toml
@@ -0,0 +1,9 @@
+name = "DepWithParentExt"
+uuid = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
+version = "0.1.0"
+
+[weakdeps]
+Parent = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+
+[extensions]
+ParentExt = "Parent"
diff --git a/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl b/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl
new file mode 100644
index 0000000000000..56176d2f5921d
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/ext/ParentExt.jl
@@ -0,0 +1,6 @@
+module ParentExt
+
+using Parent
+using DepWithParentExt
+
+end
diff --git a/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl b/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl
new file mode 100644
index 0000000000000..3d4ebc4ebf8a0
--- /dev/null
+++ b/test/project/Extensions/DepWithParentExt.jl/src/DepWithParentExt.jl
@@ -0,0 +1,5 @@
+module DepWithParentExt
+
+greet() = print("Hello dep w/ ext for parent dep!")
+
+end # module DepWithParentExt
diff --git a/test/project/Extensions/EnvWithDeps/Manifest.toml b/test/project/Extensions/EnvWithDeps/Manifest.toml
index 85ff259f0a4d5..554a317b370eb 100644
--- a/test/project/Extensions/EnvWithDeps/Manifest.toml
+++ b/test/project/Extensions/EnvWithDeps/Manifest.toml
@@ -5,7 +5,7 @@ manifest_format = "2.0"
 project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -15,6 +15,11 @@ path = "../ExtDep2"
 uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 version = "0.1.0"
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
index 8ac961fa1a9a9..ca2be57c61596 100644
--- a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
+++ b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
@@ -1,11 +1,11 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.0-beta4"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+project_hash = "a4c480cfa7da9610333d5c42623bf746bd286c5f"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -18,10 +18,17 @@ version = "0.1.0"
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
+    LinearAlgebraExt = "LinearAlgebra"
 
     [deps.HasExtensions.weakdeps]
     ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
     ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
 
 [[deps.SomePackage]]
 path = "../SomePackage"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
index 66781a5701363..9f8c717041b6e 100644
--- a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
@@ -5,7 +5,7 @@ manifest_format = "2.0"
 project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -19,6 +19,11 @@ weakdeps = ["ExtDep"]
     [deps.HasExtensions.extensions]
     Extension2 = "ExtDep"
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/ExtDep.jl/Project.toml b/test/project/Extensions/ExtDep.jl/Project.toml
index d246934b7f958..1ece7bf11f95a 100644
--- a/test/project/Extensions/ExtDep.jl/Project.toml
+++ b/test/project/Extensions/ExtDep.jl/Project.toml
@@ -4,3 +4,4 @@ version = "0.1.0"
 
 [deps]
 SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+SomeOtherPackage = "178f68a2-4498-45ee-a775-452b36359b63"
diff --git a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
index 1c0022d879f51..2d3c6b7f28827 100644
--- a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
+++ b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
@@ -2,6 +2,7 @@ module ExtDep
 
 # loading this package makes the check for loading extensions trigger
 # which tests #47921
+using SomeOtherPackage
 using SomePackage
 
 struct ExtDepStruct end
diff --git a/test/project/Extensions/ExtDep3.jl/Project.toml b/test/project/Extensions/ExtDep3.jl/Project.toml
new file mode 100644
index 0000000000000..690b2f1cffff4
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/Project.toml
@@ -0,0 +1,4 @@
+name = "ExtDep3"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+authors = ["Kristoffer <kcarlsson89@gmail.com>"]
diff --git a/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
new file mode 100644
index 0000000000000..96a0b472d06c5
--- /dev/null
+++ b/test/project/Extensions/ExtDep3.jl/src/ExtDep3.jl
@@ -0,0 +1,5 @@
+module ExtDep3
+
+greet() = print("Hello World!")
+
+end # module ExtDep3
diff --git a/test/project/Extensions/ExtNameCollision_A/Project.toml b/test/project/Extensions/ExtNameCollision_A/Project.toml
new file mode 100644
index 0000000000000..f4cc37786f508
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_A"
+uuid = "9f48de98-8f56-4937-aa32-2a5530882eaa"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl b/test/project/Extensions/ExtNameCollision_A/ext/REPLExt.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
new file mode 100644
index 0000000000000..2f47a862dd9c5
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_A/src/ExtNameCollision_A.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_A
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_A
diff --git a/test/project/Extensions/ExtNameCollision_B/Project.toml b/test/project/Extensions/ExtNameCollision_B/Project.toml
new file mode 100644
index 0000000000000..ac52d64a82a7c
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/Project.toml
@@ -0,0 +1,9 @@
+name = "ExtNameCollision_B"
+uuid = "597d654f-44d8-4443-9b1e-1f2f4b45906f"
+version = "0.1.0"
+
+[weakdeps]
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[extensions]
+REPLExt = "REPL"
diff --git a/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl b/test/project/Extensions/ExtNameCollision_B/ext/REPLExt.jl
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
new file mode 100644
index 0000000000000..e7665982a79b3
--- /dev/null
+++ b/test/project/Extensions/ExtNameCollision_B/src/ExtNameCollision_B.jl
@@ -0,0 +1,5 @@
+module ExtNameCollision_B
+
+greet() = print("Hello World!")
+
+end # module ExtNameCollision_B
diff --git a/test/project/Extensions/ExtToExtDependency/Manifest.toml b/test/project/Extensions/ExtToExtDependency/Manifest.toml
new file mode 100644
index 0000000000000..41546213cdd41
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.11.1"
+manifest_format = "2.0"
+project_hash = "90b427e837c654fabb1434527ea698dabad46d29"
+
+[[deps.ExtDep]]
+deps = ["SomeOtherPackage", "SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/ExtToExtDependency/Project.toml b/test/project/Extensions/ExtToExtDependency/Project.toml
new file mode 100644
index 0000000000000..980db74c04dc4
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/Project.toml
@@ -0,0 +1,14 @@
+name = "ExtToExtDependency"
+uuid = "594ddb71-72fb-4cfe-9471-775d48a5b70b"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomeOtherPackage = "178f68a2-4498-45ee-a775-452b36359b63"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtAB = ["SomePackage", "SomeOtherPackage"]
diff --git a/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl b/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl
new file mode 100644
index 0000000000000..71ed09795157c
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using ExtToExtDependency
+using SomePackage
+
+end
diff --git a/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl b/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl
new file mode 100644
index 0000000000000..a5b2c43cafd58
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/ext/ExtAB.jl
@@ -0,0 +1,12 @@
+module ExtAB
+
+using ExtToExtDependency
+using SomePackage
+using SomeOtherPackage
+
+const ExtA = Base.get_extension(ExtToExtDependency, :ExtA)
+if !(ExtA isa Module)
+    error("expected extension to load")
+end
+
+end
diff --git a/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl b/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl
new file mode 100644
index 0000000000000..ec2bf58f18641
--- /dev/null
+++ b/test/project/Extensions/ExtToExtDependency/src/ExtToExtDependency.jl
@@ -0,0 +1,7 @@
+module ExtToExtDependency
+
+using ExtDep
+
+greet() = print("Hello ext-to-ext!")
+
+end # module ExtToExtDependency
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
index 52542fc822094..98510dcb27733 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
@@ -1,11 +1,11 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "d523b3401f72a1ed34b7b43749fd2655c6b78542"
+project_hash = "4e196b07f2ee7adc48ac9d528d42b3cf3737c7a0"
 
 [[deps.ExtDep]]
-deps = ["SomePackage"]
+deps = ["SomePackage", "SomeOtherPackage"]
 path = "../ExtDep.jl"
 uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 version = "0.1.0"
@@ -15,7 +15,13 @@ path = "../ExtDep2"
 uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 version = "0.1.0"
 
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
+
 [[deps.HasExtensions]]
+deps = ["ExtDep3"]
 path = "../HasExtensions.jl"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
@@ -23,8 +29,14 @@ weakdeps = ["ExtDep", "ExtDep2"]
 
     [deps.HasExtensions.extensions]
     Extension = "ExtDep"
+    ExtensionDep = "ExtDep3"
     ExtensionFolder = ["ExtDep", "ExtDep2"]
 
+[[deps.SomeOtherPackage]]
+path = "../SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+version = "0.1.0"
+
 [[deps.SomePackage]]
 path = "../SomePackage"
 uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
index 8f308a9fbee72..aa4956caada74 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
@@ -5,4 +5,5 @@ version = "0.1.0"
 [deps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
 HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
diff --git a/test/project/Extensions/HasExtensions.jl/Manifest.toml b/test/project/Extensions/HasExtensions.jl/Manifest.toml
index 55f7958701a75..429c6598fc4f4 100644
--- a/test/project/Extensions/HasExtensions.jl/Manifest.toml
+++ b/test/project/Extensions/HasExtensions.jl/Manifest.toml
@@ -1,7 +1,10 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.0-DEV"
+julia_version = "1.12.0-DEV"
 manifest_format = "2.0"
-project_hash = "c87947f1f1f070eea848950c304d668a112dec3d"
+project_hash = "c0bb526b75939a74a6195ee4819e598918a22ad7"
 
-[deps]
+[[deps.ExtDep3]]
+path = "../ExtDep3.jl"
+uuid = "a5541f1e-a556-4fdc-af15-097880d743a1"
+version = "0.1.0"
diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml
index 72577de36d65d..a02f5662d602d 100644
--- a/test/project/Extensions/HasExtensions.jl/Project.toml
+++ b/test/project/Extensions/HasExtensions.jl/Project.toml
@@ -2,10 +2,14 @@ name = "HasExtensions"
 uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
 version = "0.1.0"
 
+[deps]
+ExtDep3 = "a5541f1e-a556-4fdc-af15-097880d743a1"
+
 [weakdeps]
 ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
 ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
 
 [extensions]
 Extension = "ExtDep"
+ExtensionDep = "ExtDep3"
 ExtensionFolder = ["ExtDep", "ExtDep2"]
diff --git a/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
new file mode 100644
index 0000000000000..e2710d4d89bbb
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/ext/ExtensionDep.jl
@@ -0,0 +1,9 @@
+module ExtensionDep
+
+using HasExtensions, ExtDep3
+
+function __init__()
+    HasExtensions.ext_dep_loaded = true
+end
+
+end
diff --git a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
index dbfaeec4f8812..9d9785f87f790 100644
--- a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
+++ b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
@@ -6,5 +6,6 @@ foo(::HasExtensionsStruct) = 1
 
 ext_loaded = false
 ext_folder_loaded = false
+ext_dep_loaded = false
 
 end # module
diff --git a/test/project/Extensions/ImplicitEnv/A/Project.toml b/test/project/Extensions/ImplicitEnv/A/Project.toml
new file mode 100644
index 0000000000000..043272d4bd015
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/Project.toml
@@ -0,0 +1,9 @@
+name = "A"
+uuid = "299a509a-2181-4868-8714-15151945d902"
+version = "0.1.0"
+
+[weakdeps]
+B = "c2c18cb0-3543-497c-ac2a-523c527589e5"
+
+[extensions]
+BExt = "B"
diff --git a/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl b/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl
new file mode 100644
index 0000000000000..70be6435bcbe8
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/ext/BExt.jl
@@ -0,0 +1,3 @@
+module BExt
+
+end
diff --git a/test/project/Extensions/ImplicitEnv/A/src/A.jl b/test/project/Extensions/ImplicitEnv/A/src/A.jl
new file mode 100644
index 0000000000000..ab16fa1de96af
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/A/src/A.jl
@@ -0,0 +1,5 @@
+module A
+
+greet() = print("Hello World!")
+
+end # module A
diff --git a/test/project/Extensions/ImplicitEnv/B/Project.toml b/test/project/Extensions/ImplicitEnv/B/Project.toml
new file mode 100644
index 0000000000000..d919c27be0467
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/B/Project.toml
@@ -0,0 +1,3 @@
+name = "B"
+uuid = "c2c18cb0-3543-497c-ac2a-523c527589e5"
+version = "0.1.0"
diff --git a/test/project/Extensions/ImplicitEnv/B/src/B.jl b/test/project/Extensions/ImplicitEnv/B/src/B.jl
new file mode 100644
index 0000000000000..79b5a1204765f
--- /dev/null
+++ b/test/project/Extensions/ImplicitEnv/B/src/B.jl
@@ -0,0 +1,5 @@
+module B
+
+greet() = print("Hello World!")
+
+end # module B
diff --git a/test/project/Extensions/Parent.jl/Manifest.toml b/test/project/Extensions/Parent.jl/Manifest.toml
new file mode 100644
index 0000000000000..eb0c323ac36f5
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/Manifest.toml
@@ -0,0 +1,20 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "b6ac643184d62cc94427c9aa665ff1fb63d66038"
+
+[[deps.DepWithParentExt]]
+path = "../DepWithParentExt.jl"
+uuid = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
+version = "0.1.0"
+weakdeps = ["Parent"]
+
+    [deps.DepWithParentExt.extensions]
+    ParentExt = "Parent"
+
+[[deps.Parent]]
+deps = ["DepWithParentExt"]
+path = "."
+uuid = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+version = "0.1.0"
diff --git a/test/project/Extensions/Parent.jl/Project.toml b/test/project/Extensions/Parent.jl/Project.toml
new file mode 100644
index 0000000000000..d62594cf15d3f
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/Project.toml
@@ -0,0 +1,7 @@
+name = "Parent"
+uuid = "58cecb9c-f68a-426e-b92a-89d456ae7acc"
+version = "0.1.0"
+authors = ["Cody Tapscott <topolarity@tapscott.me>"]
+
+[deps]
+DepWithParentExt = "8a35c396-5ffc-40d2-b7ec-e8ed2248da32"
diff --git a/test/project/Extensions/Parent.jl/src/Parent.jl b/test/project/Extensions/Parent.jl/src/Parent.jl
new file mode 100644
index 0000000000000..471f4b13ecca3
--- /dev/null
+++ b/test/project/Extensions/Parent.jl/src/Parent.jl
@@ -0,0 +1,7 @@
+module Parent
+
+using DepWithParentExt
+
+greet() = print("Hello parent!")
+
+end # module Parent
diff --git a/test/project/Extensions/SomeOtherPackage/Project.toml b/test/project/Extensions/SomeOtherPackage/Project.toml
new file mode 100644
index 0000000000000..6e7eee40c7be2
--- /dev/null
+++ b/test/project/Extensions/SomeOtherPackage/Project.toml
@@ -0,0 +1,4 @@
+name = "SomeOtherPackage"
+uuid = "178f68a2-4498-45ee-a775-452b36359b63"
+authors = ["Cody Tapscott <topolarity@tapscott.me>"]
+version = "0.1.0"
diff --git a/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl b/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl
new file mode 100644
index 0000000000000..ba23eb3914561
--- /dev/null
+++ b/test/project/Extensions/SomeOtherPackage/src/SomeOtherPackage.jl
@@ -0,0 +1,5 @@
+module SomeOtherPackage
+
+greet() = print("Hello World!")
+
+end # module SomeOtherPackage
diff --git a/test/project/ProjectPath/CustomPath.jl b/test/project/ProjectPath/CustomPath.jl
new file mode 100644
index 0000000000000..8fe764fa066dc
--- /dev/null
+++ b/test/project/ProjectPath/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPath
+
+greet() = print("Hello World!")
+
+end # module ProjectPath
diff --git a/test/project/ProjectPath/Manifest.toml b/test/project/ProjectPath/Manifest.toml
new file mode 100644
index 0000000000000..123e7f575062a
--- /dev/null
+++ b/test/project/ProjectPath/Manifest.toml
@@ -0,0 +1,18 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "51ade905d618e4aa369bc869841376219cc36cb1"
+
+[[deps.ProjectPath]]
+deps = ["ProjectPathDep"]
+path = "."
+entryfile = "CustomPath.jl"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+version = "0.1.0"
+
+[[deps.ProjectPathDep]]
+path = "ProjectPathDep"
+entryfile = "CustomPath.jl"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
diff --git a/test/project/ProjectPath/Project.toml b/test/project/ProjectPath/Project.toml
new file mode 100644
index 0000000000000..a434f78e9c211
--- /dev/null
+++ b/test/project/ProjectPath/Project.toml
@@ -0,0 +1,7 @@
+name = "ProjectPath"
+uuid = "32833bde-7fc1-4d28-8365-9d01e1bcbc1b"
+entryfile = "CustomPath.jl"
+version = "0.1.0"
+
+[deps]
+ProjectPathDep = "f18633fc-8799-43ff-aa06-99ed830dc572"
diff --git a/test/project/ProjectPath/ProjectPathDep/CustomPath.jl b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
new file mode 100644
index 0000000000000..adbe508f0c7f9
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/CustomPath.jl
@@ -0,0 +1,5 @@
+module ProjectPathDep
+
+greet() = print("Hello World!")
+
+end # module ProjectPathDep
diff --git a/test/project/ProjectPath/ProjectPathDep/Project.toml b/test/project/ProjectPath/ProjectPathDep/Project.toml
new file mode 100644
index 0000000000000..c69e54e8c9390
--- /dev/null
+++ b/test/project/ProjectPath/ProjectPathDep/Project.toml
@@ -0,0 +1,4 @@
+name = "ProjectPathDep"
+uuid = "f18633fc-8799-43ff-aa06-99ed830dc572"
+version = "0.1.0"
+entryfile = "CustomPath.jl"
diff --git a/test/project/Rot13/Project.toml b/test/project/Rot13/Project.toml
new file mode 100644
index 0000000000000..eb03cb84d588e
--- /dev/null
+++ b/test/project/Rot13/Project.toml
@@ -0,0 +1,3 @@
+name = "Rot13"
+uuid = "43ef800a-eac4-47f4-949b-25107b932e8f"
+version = "0.1.0"
diff --git a/test/project/Rot13/src/Rot13.jl b/test/project/Rot13/src/Rot13.jl
new file mode 100644
index 0000000000000..66f077812d878
--- /dev/null
+++ b/test/project/Rot13/src/Rot13.jl
@@ -0,0 +1,28 @@
+module Rot13
+
+function rot13(c::Char)
+    shft = islowercase(c) ? 'a' : 'A'
+    isletter(c) ? c = shft + (c - shft + 13) % 26 : c
+end
+
+rot13(str::AbstractString) = map(rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot13(arg), " "), args)
+    return 0
+end
+
+module Rot26 # LOL
+
+import ..rot13
+
+rot26(str::AbstractString) = map(rot13 ∘ rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot26(arg), " "), args)
+    return 0
+end
+
+end
+
+end # module Rot13
diff --git a/test/project/SubProject/Devved/Project.toml b/test/project/SubProject/Devved/Project.toml
new file mode 100644
index 0000000000000..63088a132cb77
--- /dev/null
+++ b/test/project/SubProject/Devved/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved/src/Devved.jl b/test/project/SubProject/Devved/src/Devved.jl
new file mode 100644
index 0000000000000..f3eb267409ece
--- /dev/null
+++ b/test/project/SubProject/Devved/src/Devved.jl
@@ -0,0 +1,5 @@
+module Devved
+
+greet() = print("Hello World!")
+
+end # module Devved
diff --git a/test/project/SubProject/Devved2/Project.toml b/test/project/SubProject/Devved2/Project.toml
new file mode 100644
index 0000000000000..c761630566116
--- /dev/null
+++ b/test/project/SubProject/Devved2/Project.toml
@@ -0,0 +1,3 @@
+name = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
diff --git a/test/project/SubProject/Devved2/src/Devved2.jl b/test/project/SubProject/Devved2/src/Devved2.jl
new file mode 100644
index 0000000000000..9bd5df2793671
--- /dev/null
+++ b/test/project/SubProject/Devved2/src/Devved2.jl
@@ -0,0 +1,5 @@
+module Devved2
+
+greet() = print("Hello World!")
+
+end # module Devved2
diff --git a/test/project/SubProject/Manifest.toml b/test/project/SubProject/Manifest.toml
new file mode 100644
index 0000000000000..5d791a74652d4
--- /dev/null
+++ b/test/project/SubProject/Manifest.toml
@@ -0,0 +1,68 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "620b9377bc807ff657e6618c8ccc24887eb40285"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
+
+[[deps.Devved]]
+path = "Devved"
+uuid = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+version = "0.1.0"
+
+[[deps.Devved2]]
+path = "Devved2"
+uuid = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+version = "0.1.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.Logging]]
+deps = ["StyledStrings"]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MyPkg]]
+deps = ["Devved", "Devved2"]
+path = "."
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+version = "0.0.0"
+
+[[deps.PackageThatIsSub]]
+deps = ["Devved2", "MyPkg"]
+path = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
+
+[[deps.StyledStrings]]
+uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+version = "1.11.0"
+
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
diff --git a/test/project/SubProject/PackageThatIsSub/Project.toml b/test/project/SubProject/PackageThatIsSub/Project.toml
new file mode 100644
index 0000000000000..e41dd998c5a1c
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/Project.toml
@@ -0,0 +1,14 @@
+name = "PackageThatIsSub"
+uuid = "1efb588c-9412-4e40-90a4-710420bd84aa"
+version = "0.1.0"
+
+[workspace]
+projects = ["test"]
+
+[deps]
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[preferences]
+value = 2
+y = 2
diff --git a/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
new file mode 100644
index 0000000000000..7f9ea94ccb156
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/src/PackageThatIsSub.jl
@@ -0,0 +1,5 @@
+module PackageThatIsSub
+
+greet() = print("Hello World!")
+
+end # module PackageThatIsSub
diff --git a/test/project/SubProject/PackageThatIsSub/test/Project.toml b/test/project/SubProject/PackageThatIsSub/test/Project.toml
new file mode 100644
index 0000000000000..dc8186e2b735e
--- /dev/null
+++ b/test/project/SubProject/PackageThatIsSub/test/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+PackageThatIsSub = "1efb588c-9412-4e40-90a4-710420bd84aa"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[preferences]
+value = 3
+z = 3
diff --git a/test/project/SubProject/Project.toml b/test/project/SubProject/Project.toml
new file mode 100644
index 0000000000000..dcb84d865ac85
--- /dev/null
+++ b/test/project/SubProject/Project.toml
@@ -0,0 +1,13 @@
+name = "MyPkg"
+uuid = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+
+[workspace]
+projects = ["sub", "PackageThatIsSub", "test"]
+
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+Devved2 = "08f74b90-50f5-462f-80b9-a72b1258a17b"
+
+[preferences]
+value = 1
+x = 1
diff --git a/test/project/SubProject/src/MyPkg.jl b/test/project/SubProject/src/MyPkg.jl
new file mode 100644
index 0000000000000..6d84954645d55
--- /dev/null
+++ b/test/project/SubProject/src/MyPkg.jl
@@ -0,0 +1,3 @@
+module MyPkg
+
+end
diff --git a/test/project/SubProject/sub/Project.toml b/test/project/SubProject/sub/Project.toml
new file mode 100644
index 0000000000000..50aa238e91d57
--- /dev/null
+++ b/test/project/SubProject/sub/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
diff --git a/test/project/SubProject/test/Project.toml b/test/project/SubProject/test/Project.toml
new file mode 100644
index 0000000000000..b64312e4b1ee2
--- /dev/null
+++ b/test/project/SubProject/test/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+MyPkg = "0cafdeb2-d7a2-40d0-8d22-4411fcc2c4ee"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Devved = "cbce3a6e-7a3d-4e84-8e6d-b87208df7599"
diff --git a/test/project/deps/BadStdlibDeps/Manifest.toml b/test/project/deps/BadStdlibDeps/Manifest.toml
new file mode 100644
index 0000000000000..32aaa0b83dc0a
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps/Manifest.toml
@@ -0,0 +1,51 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.12.0-DEV"
+manifest_format = "2.0"
+project_hash = "dc9d33b0ee13d9466bdb75b8d375808a534a79ec"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
+# This is intentionally missing LibGit2_jll for testing purposes
+[[deps.LibGit2]]
+deps = ["NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.8.0+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.6+1"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
diff --git a/test/project/deps/BadStdlibDeps/Project.toml b/test/project/deps/BadStdlibDeps/Project.toml
new file mode 100644
index 0000000000000..223889185ea15
--- /dev/null
+++ b/test/project/deps/BadStdlibDeps/Project.toml
@@ -0,0 +1,2 @@
+[deps]
+LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
diff --git a/test/project/deps/CovTest.jl/Project.toml b/test/project/deps/CovTest.jl/Project.toml
new file mode 100644
index 0000000000000..97fb2c7d9cfce
--- /dev/null
+++ b/test/project/deps/CovTest.jl/Project.toml
@@ -0,0 +1,3 @@
+name = "CovTest"
+uuid = "f1f4390d-b815-473a-b5dd-5af6e1d717cb"
+version = "0.1.0"
diff --git a/test/project/deps/CovTest.jl/src/CovTest.jl b/test/project/deps/CovTest.jl/src/CovTest.jl
new file mode 100644
index 0000000000000..bd172fc3a00f4
--- /dev/null
+++ b/test/project/deps/CovTest.jl/src/CovTest.jl
@@ -0,0 +1,26 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module CovTest
+
+function foo()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+function bar()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+if Base.generating_output()
+    # precompile foo but not bar
+    foo()
+end
+
+export foo, bar
+
+end #module
diff --git a/test/ranges.jl b/test/ranges.jl
index b263e6d4d530d..89134be897ddd 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -3,6 +3,9 @@
 using Base.Checked: checked_length
 using InteractiveUtils: code_llvm
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
 @testset "range construction" begin
     @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
     @test_throws ArgumentError range(start=1, step=1, stop=10, length=11)
@@ -62,6 +65,10 @@ using InteractiveUtils: code_llvm
 
     unitrangeerrstr = "promotion of types Char and Char failed to change any arguments"
     @test_throws unitrangeerrstr UnitRange('a', 'b')
+
+    @test step(false:true) === true # PR 56405
+    @test eltype((false:true) + (Int8(0):Int8(1))) === Int8
+    @test eltype((false:true:true) + (Int8(0):Int8(1))) === Int8
 end
 
 using Dates, Random
@@ -232,9 +239,23 @@ end
             @test cmp_sn2(Tw(xw/yw), astuple(x/y)..., slopbits)
         end
     end
+    @testset "high precision of varying types" begin
+        x = Float32(π)
+        y = Float64(Base.MathConstants.γ)
+        @test Base.mul12(x, y)[1] ≈ Base.mul12(Float64(π), y)[1] rtol=1e-6
+        @test Base.mul12(x, y)[2] ≈ Base.mul12(Float64(π), y)[2] atol=1e-15
+        @test Base.div12(x, y)[1] ≈ Base.div12(Float64(π), y)[1] rtol=1e-6
+        @test Base.div12(x, y)[2] ≈ Base.div12(Float64(π), y)[2] atol=1e-15
+        xtp = Base.TwicePrecision{Float32}(π)
+        ytp = Base.TwicePrecision{Float64}(Base.MathConstants.γ)
+        @test Float32(xtp + ytp) ≈ Float32(Base.TwicePrecision{Float64}(π) + ytp)
+    end
 
     x1 = Base.TwicePrecision{Float64}(1)
     x0 = Base.TwicePrecision{Float64}(0)
+    @test eltype(x1) == Float64
+    @test eltype(typeof(x1)) == Float64
+    @test zero(typeof(x1)) === x0
     xinf = Base.TwicePrecision{Float64}(Inf)
     @test Float64(x1+x0)  == 1
     @test Float64(x1+0)   == 1
@@ -275,15 +296,10 @@ end
 
     rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
 
-    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
-
     # For this test the `BigFloat` mantissa needs to be just a bit
     # larger than the `Float64` mantissa
     setprecision(BigFloat, 70) do
         n = 10
-        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
-            rand_twiceprecision_is_ok(T)
-        end
         @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
             twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
         end
@@ -297,6 +313,13 @@ end
             twiceprecision_is_normalized(Base.TwicePrecision{Float64}(rand_twiceprecision(Float32)))
         end
     end
+
+    @testset "displaying a complex range (#52713)" begin
+        r = 1.0*(1:5) .+ im
+        @test startswith(repr(r), repr(first(r)))
+        @test endswith(repr(r), repr(last(r)))
+        @test occursin(repr(step(r)), repr(r))
+    end
 end
 @testset "ranges" begin
     @test size(10:1:0) == (0,)
@@ -413,17 +436,57 @@ end
     @testset "findfirst" begin
         @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
-        @test findfirst(==(0), Base.OneTo(10)) == nothing
-        @test findfirst(==(11), Base.OneTo(10)) == nothing
+        @test findfirst(==(0), Base.OneTo(10)) === nothing
+        @test findfirst(==(11), Base.OneTo(10)) === nothing
+        @test @inferred((r -> Val(findfirst(iszero, r)))(Base.OneTo(10))) == Val(nothing)
+        @test findfirst(isone, Base.OneTo(10)) === 1
+        @test findfirst(isone, Base.OneTo(0)) === nothing
         @test findfirst(==(4), Int16(3):Int16(7)) === Int(2)
-        @test findfirst(==(2), Int16(3):Int16(7)) == nothing
-        @test findfirst(isequal(8), 3:7) == nothing
+        @test findfirst(==(2), Int16(3):Int16(7)) === nothing
+        @test findfirst(isequal(8), 3:7) === nothing
+        @test findfirst(==(0), UnitRange(-0.5, 0.5)) === nothing
+        @test findfirst(==(2), big(1):big(2)) === 2
         @test findfirst(isequal(7), 1:2:10) == 4
+        @test findfirst(iszero, -5:5) == 6
+        @test findfirst(iszero, 2:5) === nothing
+        @test findfirst(iszero, 6:5) === nothing
+        @test findfirst(isone, -5:5) == 7
+        @test findfirst(isone, 2:5) === nothing
+        @test findfirst(isone, 6:5) === nothing
         @test findfirst(==(7), 1:2:10) == 4
-        @test findfirst(==(10), 1:2:10) == nothing
-        @test findfirst(==(11), 1:2:10) == nothing
+        @test findfirst(==(10), 1:2:10) === nothing
+        @test findfirst(==(11), 1:2:10) === nothing
         @test findfirst(==(-7), 1:-1:-10) == 9
-        @test findfirst(==(2),1:-1:2) == nothing
+        @test findfirst(==(2),1:-1:2) === nothing
+        @test findfirst(iszero, 5:-2:-5) === nothing
+        @test findfirst(iszero, 6:-2:-6) == 4
+        @test findfirst(==(Int128(2)), Int128(1):Int128(1):Int128(4)) === 2
+    end
+    @testset "findlast" begin
+        @test findlast(==(1), Base.IdentityUnitRange(-1:1)) == 1
+        @test findlast(isequal(3), Base.OneTo(10)) == 3
+        @test findlast(==(0), Base.OneTo(10)) === nothing
+        @test findlast(==(11), Base.OneTo(10)) === nothing
+        @test @inferred((() -> Val(findlast(iszero, Base.OneTo(10))))()) == Val(nothing)
+        @test findlast(isone, Base.OneTo(10)) == 1
+        @test findlast(isone, Base.OneTo(0)) === nothing
+        @test findlast(==(4), Int16(3):Int16(7)) === Int(2)
+        @test findlast(==(2), Int16(3):Int16(7)) === nothing
+        @test findlast(isequal(8), 3:7) === nothing
+        @test findlast(==(0), UnitRange(-0.5, 0.5)) === nothing
+        @test findlast(==(2), big(1):big(2)) === 2
+        @test findlast(isequal(7), 1:2:10) == 4
+        @test findlast(iszero, -5:5) == 6
+        @test findlast(iszero, 2:5) === nothing
+        @test findlast(iszero, 6:5) === nothing
+        @test findlast(==(7), 1:2:10) == 4
+        @test findlast(==(10), 1:2:10) === nothing
+        @test findlast(==(11), 1:2:10) === nothing
+        @test findlast(==(-7), 1:-1:-10) == 9
+        @test findlast(==(2),1:-1:2) === nothing
+        @test findlast(iszero, 5:-2:-5) === nothing
+        @test findlast(iszero, 6:-2:-6) == 4
+        @test findlast(==(Int128(2)), Int128(1):Int128(1):Int128(4)) === 2
     end
     @testset "reverse" begin
         @test reverse(reverse(1:10)) == 1:10
@@ -545,6 +608,13 @@ end
         @test sort(1:10, rev=true) == 10:-1:1
         @test sort(-3:3, by=abs) == [0,-1,1,-2,2,-3,3]
         @test partialsort(1:10, 4) == 4
+
+        @testset "offset ranges" begin
+            x = OffsetArrays.IdOffsetRange(values=4:13, indices=4:13)
+            @test sort(x) === x === sort!(x)
+            @test sortperm(x) == eachindex(x)
+            @test issorted(x[sortperm(x)])
+        end
     end
     @testset "in" begin
         @test 0 in UInt(0):100:typemax(UInt)
@@ -631,11 +701,30 @@ end
     end
 end
 @testset "indexing range with empty range (#4309)" begin
-    @test (3:6)[5:4] === 7:6
+    @test (@inferred (3:6)[5:4]) === 7:6
     @test_throws BoundsError (3:6)[5:5]
     @test_throws BoundsError (3:6)[5]
-    @test (0:2:10)[7:6] === 12:2:10
+    @test (@inferred (0:2:10)[7:6]) === 12:2:11
     @test_throws BoundsError (0:2:10)[7:7]
+
+    for start in [true], stop in [true, false]
+        @test (@inferred (start:stop)[1:0]) === true:false
+    end
+    @test (@inferred (true:false)[true:false]) == true:false
+
+    @testset "issue #40760" begin
+        empty_range = 1:0
+        r = range(false, length = 0)
+        @test r isa UnitRange && first(r) == 0 && last(r) == -1
+        r = (true:true)[empty_range]
+        @test r isa UnitRange && first(r) == true && last(r) == false
+        @testset for r in Any[true:true, true:true:true, 1:2, 1:1:2]
+            @test (@inferred r[1:0]) isa AbstractRange
+            @test r[1:0] == empty_range
+            @test (@inferred r[1:1:0]) isa AbstractRange
+            @test r[1:1:0] == empty_range
+        end
+    end
 end
 # indexing with negative ranges (#8351)
 for a=AbstractRange[3:6, 0:2:10], b=AbstractRange[0:1, 2:-1:0]
@@ -1007,6 +1096,7 @@ end
         end
         a = prevfloat(a)
     end
+    @test (1:2:3)[StepRangeLen{Bool}(true,-1,2)] == [1]
 end
 
 # issue #20380
@@ -1286,6 +1376,8 @@ end
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
+
+    @test sprint(show, LinRange{Float32}(1.5, 2.5, 10)) == "LinRange{Float32}(1.5, 2.5, 10)"
 end
 
 @testset "Issue 11049, and related" begin
@@ -1491,6 +1583,9 @@ end
         @test size(r) == (3,)
         @test step(r) == 1
         @test first(r) == 1
+        @test first(r,2) === Base.OneTo(2)
+        @test first(r,20) === r
+        @test_throws ArgumentError first(r,-20)
         @test last(r) == 3
         @test minimum(r) == 1
         @test maximum(r) == 3
@@ -1522,6 +1617,9 @@ end
         @test findall(in(2:(length(r) - 1)), r) === 2:(length(r) - 1)
         @test findall(in(r), 2:(length(r) - 1)) === 1:(length(r) - 2)
     end
+    let r = Base.OneTo(Int8(4))
+        @test first(r,4) === r
+    end
     @test convert(Base.OneTo, 1:2) === Base.OneTo{Int}(2)
     @test_throws ArgumentError("first element must be 1, got 2") convert(Base.OneTo, 2:3)
     @test_throws ArgumentError("step must be 1, got 2") convert(Base.OneTo, 1:2:5)
@@ -1802,6 +1900,7 @@ Base.div(x::Displacement, y::Displacement) = Displacement(div(x.val, y.val))
 # required for collect (summing lengths); alternatively, should length return Int by default?
 Base.promote_rule(::Type{Displacement}, ::Type{Int}) = Int
 Base.convert(::Type{Int}, x::Displacement) = x.val
+Base.Int(x::Displacement) = x.val
 
 # Unsigned complement, for testing checked_length
 struct UPosition <: Unsigned
@@ -1959,8 +2058,10 @@ end
 end
 
 @testset "allocation of TwicePrecision call" begin
-    @test @allocated(0:286.493442:360) == 0
-    @test @allocated(0:286:360) == 0
+    let
+        @test @allocated(0:286.493442:360) == 0
+        @test @allocated(0:286:360) == 0
+    end
 end
 
 @testset "range with start and stop" begin
@@ -2302,6 +2403,7 @@ end
     @test_throws BoundsError r[true:true:false]
     @test_throws BoundsError r[true:true:true]
 end
+
 @testset "Non-Int64 endpoints that are identical (#39798)" begin
     for T in DataType[Float16,Float32,Float64,Bool,Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128],
         r in [ LinRange(1, 1, 10), StepRangeLen(7, 0, 5) ]
@@ -2328,13 +2430,46 @@ end
     @test 0.2 * (-2:2:2) == [-0.4, 0, 0.4]
 end
 
-@testset "Indexing OneTo with IdentityUnitRange" begin
-    for endpt in Any[10, big(10), UInt(10)]
-        r = Base.OneTo(endpt)
-        inds = Base.IdentityUnitRange(3:5)
-        rs = r[inds]
-        @test rs === inds
-        @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+@testset "IdentityUnitRange indexing" begin
+    @testset "Indexing into an IdentityUnitRange" begin
+        @testset for r in Any[-1:20, Base.OneTo(20)]
+            ri = Base.IdentityUnitRange(r)
+            @test_throws "invalid index" ri[true]
+            @testset for s in Any[Base.OneTo(6), Base.OneTo{BigInt}(6), 3:6, big(3):big(6), 3:2:7]
+                @test mapreduce(==, &, ri[s], ri[s[begin]]:step(s):ri[s[end]])
+                @test axes(ri[s]) == axes(s)
+                @test eltype(ri[s]) == eltype(ri)
+            end
+        end
+        @testset "Bool indices" begin
+            r = 1:1
+            @test Base.IdentityUnitRange(r)[true:true] == r[true:true]
+            @test Base.IdentityUnitRange(r)[true:true:true] == r[true:true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true]
+            @test_throws BoundsError Base.IdentityUnitRange(1:2)[true:true:true]
+        end
+    end
+    @testset "Indexing with IdentityUnitRange" begin
+        @testset "OneTo" begin
+            @testset for endpt in Any[10, big(12), UInt(11)]
+                r = Base.OneTo(endpt)
+                inds = Base.IdentityUnitRange(3:5)
+                rs = r[inds]
+                @test rs == inds
+                @test axes(rs) == axes(inds)
+                @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+            end
+        end
+        @testset "IdentityUnitRange" begin
+            @testset for r in Any[Base.IdentityUnitRange(1:4), Base.IdentityUnitRange(Base.OneTo(4)), Base.Slice(1:4), Base.Slice(Base.OneTo(4))]
+                @testset for s in Any[Base.IdentityUnitRange(3:3), Base.IdentityUnitRange(Base.OneTo(2)), Base.Slice(3:3), Base.Slice(Base.OneTo(2))]
+                    rs = r[s]
+                    @test rs == s
+                    @test axes(rs) == axes(s)
+                end
+                @test_throws BoundsError r[Base.IdentityUnitRange(first(r):last(r) + 1)]
+            end
+        end
     end
 end
 
@@ -2490,3 +2625,164 @@ function check_ranges(rx, ry)
 end
 @test Core.Compiler.is_foldable(Base.infer_effects(check_ranges, (UnitRange{Int},UnitRange{Int})))
 # TODO JET.@test_opt check_ranges(1:2, 3:4)
+
+@testset "checkbounds overflow (#26623)" begin
+    # the reported issue:
+    @test_throws BoundsError (1:3:4)[typemax(Int)÷3*2+3]
+
+    # a case that using mul_with_overflow & add_with_overflow might get wrong:
+    @test (-10:2:typemax(Int))[typemax(Int)÷2+2] == typemax(Int)-9
+end
+
+@testset "collect with specialized vcat" begin
+    struct OneToThree <: AbstractUnitRange{Int} end
+    Base.size(r::OneToThree) = (3,)
+    Base.first(r::OneToThree) = 1
+    Base.length(r::OneToThree) = 3
+    Base.last(r::OneToThree) = 3
+    function Base.getindex(r::OneToThree, i::Int)
+        checkbounds(r, i)
+        i
+    end
+    Base.vcat(r::OneToThree) = r
+    r = OneToThree()
+    a = Array(r)
+    @test a isa Vector{Int}
+    @test a == r
+    @test collect(r) isa Vector{Int}
+    @test collect(r) == r
+end
+
+@testset "isassigned" begin
+    for (r, val) in ((1:3, 3), (1:big(2)^65, big(2)^65))
+        @test isassigned(r, lastindex(r))
+        # test that the indexing actually succeeds
+        @test r[end] == val
+        @test_throws ArgumentError isassigned(r, true)
+    end
+
+end
+
+@testset "unsigned index #44895" begin
+    x = range(-1,1,length=11)
+    @test x[UInt(1)] == -1.0
+    a = StepRangeLen(1,2,3,2)
+    @test a[UInt(1)] == -1
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    CIstart = CartesianIndex(2,3)
+    CIstep = CartesianIndex(1,1)
+    r = StepRangeLen(CIstart, CIstep, 4)
+    @test length(r) == 4
+    @test first(r) == CIstart
+    @test step(r) == CIstep
+    @test last(r) == CartesianIndex(5,6)
+    @test r[2] == CartesianIndex(3,4)
+
+    @test repr(r) == "StepRangeLen($CIstart, $CIstep, 4)"
+
+    r = StepRangeLen(CartesianIndex(), CartesianIndex(), 3)
+    @test all(==(CartesianIndex()), r)
+    @test length(r) == 3
+    @test repr(r) == "StepRangeLen(CartesianIndex(), CartesianIndex(), 3)"
+
+    errmsg = ("deliberately unsupported for CartesianIndex", "StepRangeLen")
+    @test_throws errmsg range(CartesianIndex(1), step=CartesianIndex(1), length=3)
+end
+
+@testset "logrange" begin
+    # basic idea
+    @test logrange(2, 16, 4) ≈ [2, 4, 8, 16]
+    @test logrange(1/8, 8.0, 7) ≈ [0.125, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0]
+    @test logrange(1000, 1, 4) ≈ [1000, 100, 10, 1]
+    @test logrange(1, 10^9, 19)[1:2:end] ≈ 10 .^ (0:9)
+
+    # endpoints
+    @test logrange(0.1f0, 100, 33)[1] === 0.1f0
+    @test logrange(0.789, 123_456, 135_790)[[begin, end]] == [0.789, 123_456]
+    @test logrange(nextfloat(0f0), floatmax(Float32), typemax(Int))[end] === floatmax(Float32)
+    @test logrange(nextfloat(Float16(0)), floatmax(Float16), 66_000)[end] === floatmax(Float16)
+    @test first(logrange(pi, 2pi, 3000)) === logrange(pi, 2pi, 3000)[1] === Float64(pi)
+    if Int == Int64
+        @test logrange(0.1, 1000, 2^54)[end] === 1000.0
+    end
+
+    # empty, only, constant
+    @test first(logrange(1, 2, 0)) === 1.0
+    @test last(logrange(1, 2, 0)) === 2.0
+    @test collect(logrange(1, 2, 0)) == Float64[]
+    @test only(logrange(2pi, 2pi, 1)) === logrange(2pi, 2pi, 1)[1] === 2pi
+    @test logrange(1, 1, 3) == fill(1.0, 3)
+
+    # subnormal Float64
+    x = logrange(1e-320, 1e-300, 21) .* 1e300
+    @test x ≈ logrange(1e-20, 1, 21) rtol=1e-6
+
+    # types
+    @test eltype(logrange(1, 10, 3)) == Float64
+    @test eltype(logrange(1, 10, Int32(3))) == Float64
+    @test eltype(logrange(1, 10f0, 3)) == Float32
+    @test eltype(logrange(1f0, 10, 3)) == Float32
+    @test eltype(logrange(1, big(10), 3)) == BigFloat
+    @test logrange(big"0.3", big(pi), 50)[1] == big"0.3"
+    @test logrange(big"0.3", big(pi), 50)[end] == big(pi)
+
+    # more constructors
+    @test logrange(1,2,length=3) === Base.LogRange(1,2,3) == Base.LogRange{Float64}(1,2,3)
+    @test logrange(1f0, 2f0, length=3) == Base.LogRange{Float32}(1,2,3)
+
+    # errors
+    @test_throws UndefKeywordError logrange(1, 10)  # no default length
+    @test_throws ArgumentError logrange(1, 10, -1)  # negative length
+    @test_throws ArgumentError logrange(1, 10, 1) # endpoints must not differ
+    @test_throws DomainError logrange(1, -1, 3)   # needs complex numbers
+    @test_throws DomainError logrange(-1, -2, 3)  # not supported, for now
+    @test_throws MethodError logrange(1, 2+3im, length=4)  # not supported, for now
+    @test_throws ArgumentError logrange(1, 10, 2)[true]  # bad index
+    @test_throws BoundsError logrange(1, 10, 2)[3]
+    @test_throws ArgumentError Base.LogRange{Int}(1,4,5)  # no integer ranges
+    @test_throws MethodError Base.LogRange(1,4, length=5)  # type does not take keyword
+    # (not sure if these should ideally be DomainError or ArgumentError)
+    @test_throws DomainError logrange(1, Inf, 3)
+    @test_throws DomainError logrange(0, 2, 3)
+    @test_throws DomainError logrange(1, NaN, 3)
+    @test_throws DomainError logrange(NaN, 2, 3)
+
+    # printing
+    @test repr(Base.LogRange(1,2,3)) == "LogRange{Float64}(1.0, 2.0, 3)"  # like 2-arg show
+    @test repr("text/plain", Base.LogRange(1,2,3)) == "3-element Base.LogRange{Float64, Base.TwicePrecision{Float64}}:\n 1.0, 1.41421, 2.0"
+    @test repr("text/plain", Base.LogRange(1,2,0)) == "LogRange{Float64}(1.0, 2.0, 0)"  # empty case
+end
+
+@testset "_log_twice64_unchecked" begin
+    # it roughly works
+    @test big(Base._log_twice64_unchecked(exp(1))) ≈ 1.0
+    @test big(Base._log_twice64_unchecked(exp(123))) ≈ 123.0
+
+    # it gets high accuracy
+    @test abs(big(log(4.0)) - log(big(4.0))) < 1e-16
+    @test abs(big(Base._log_twice64_unchecked(4.0)) - log(big(4.0))) < 1e-30
+
+    # it handles subnormals
+    @test abs(big(Base._log_twice64_unchecked(1e-310)) - log(big(1e-310))) < 1e-20
+
+    # it accepts negative, NaN, etc without complaint:
+    @test Base._log_twice64_unchecked(-0.0).lo isa Float64
+    @test Base._log_twice64_unchecked(-1.23).lo isa Float64
+    @test Base._log_twice64_unchecked(NaN).lo isa Float64
+    @test Base._log_twice64_unchecked(Inf).lo isa Float64
+end
+
+@testset "OneTo promotion" begin
+    struct MyUnitRange{T} <: AbstractUnitRange{T}
+        range::UnitRange{T}
+    end
+    Base.first(r::MyUnitRange) = first(r.range)
+    Base.last(r::MyUnitRange) = last(r.range)
+    Base.size(r::MyUnitRange) = size(r.range)
+    Base.length(r::MyUnitRange) = length(r.range)
+    Base.getindex(r::MyUnitRange, i::Int) = getindex(r.range, i)
+    @test promote(MyUnitRange(2:3), Base.OneTo(3)) == (2:3, 1:3)
+    @test promote(MyUnitRange(UnitRange(3.0, 4.0)), Base.OneTo(3)) == (3.0:4.0, 1.0:3.0)
+end
diff --git a/test/rational.jl b/test/rational.jl
index 4b29618bd15e0..90b5414a6fe89 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -28,8 +28,8 @@ using Test
     @test (1//typemax(Int)) / (1//typemax(Int)) == 1
     @test_throws OverflowError (1//2)^63
     @test inv((1+typemin(Int))//typemax(Int)) == -1
-    @test_throws ArgumentError inv(typemin(Int)//typemax(Int))
-    @test_throws ArgumentError Rational(0x1, typemin(Int32))
+    @test_throws OverflowError inv(typemin(Int)//typemax(Int))
+    @test_throws OverflowError Rational(0x1, typemin(Int32))
 
     @test @inferred(rationalize(Int, 3.0, 0.0)) === 3//1
     @test @inferred(rationalize(Int, 3.0, 0)) === 3//1
@@ -43,15 +43,63 @@ using Test
     # issue 26823
     @test_throws InexactError rationalize(Int, NaN)
     # issue 32569
-    @test_throws ArgumentError 1 // typemin(Int)
+    @test_throws OverflowError 1 // typemin(Int)
     @test_throws ArgumentError 0 // 0
     @test -2 // typemin(Int) == -1 // (typemin(Int) >> 1)
     @test 2 // typemin(Int) == 1 // (typemin(Int) >> 1)
+    # issue 32443
+    @test Int8(-128)//Int8(1) == -128
+    @test_throws OverflowError Int8(-128)//Int8(-1)
+    @test_throws OverflowError Int8(-1)//Int8(-128)
+    @test Int8(-128)//Int8(-2) == 64
+    # issue 51731
+    @test Rational{Int8}(-128) / Rational{Int8}(-128) === Rational{Int8}(1)
+    # issue 51731
+    @test Rational{Int8}(-128) / Rational{Int8}(0) === Rational{Int8}(-1, 0)
+    @test Rational{Int8}(0) / Rational{Int8}(-128) === Rational{Int8}(0, 1)
 
     @test_throws InexactError Rational(UInt(1), typemin(Int32))
     @test iszero(Rational{Int}(UInt(0), 1))
     @test Rational{BigInt}(UInt(1), Int(-1)) == -1
-    @test_broken Rational{Int64}(UInt(1), typemin(Int32)) == Int64(1) // Int64(typemin(Int32))
+    @test Rational{Int64}(UInt(1), typemin(Int32)) == Int64(1) // Int64(typemin(Int32))
+
+    @testset "Rational{T} constructor with concrete T" begin
+        test_types = [Bool, Int8, Int64, Int128, UInt8, UInt64, UInt128, BigInt]
+        test_values = Any[
+            Any[zero(T) for T in test_types];
+            Any[one(T) for T in test_types];
+            big(-1);
+            collect(Iterators.flatten(
+                (T(j) for T in (Int8, Int64, Int128)) for j in [-3:-1; -128:-126;]
+            ));
+            collect(Iterators.flatten(
+                (T(j) for T in (Int8, Int64, Int128, UInt8, UInt64, UInt128)) for j in [2:3; 126:127;]
+            ));
+            Any[typemax(T) for T in (Int64, Int128, UInt8, UInt64, UInt128)];
+            Any[typemax(T)-one(T) for T in (Int64, Int128, UInt8, UInt64, UInt128)];
+            Any[typemin(T) for T in (Int64, Int128)];
+            Any[typemin(T)+one(T) for T in (Int64, Int128)];
+        ]
+        for x in test_values, y in test_values
+            local big_r = iszero(x) && iszero(y) ? nothing : big(x) // big(y)
+            for T in test_types
+                if iszero(x) && iszero(y)
+                    @test_throws Exception Rational{T}(x, y)
+                elseif Base.hastypemax(T)
+                    local T_range = typemin(T):typemax(T)
+                    if numerator(big_r) ∈ T_range && denominator(big_r) ∈ T_range
+                        @test big_r == Rational{T}(x, y)
+                        @test Rational{T} == typeof(Rational{T}(x, y))
+                    else
+                        @test_throws Exception Rational{T}(x, y)
+                    end
+                else
+                    @test big_r == Rational{T}(x, y)
+                    @test Rational{T} == typeof(Rational{T}(x, y))
+                end
+            end
+        end
+    end
 
     for a = -5:5, b = -5:5
         if a == b == 0; continue; end
@@ -537,6 +585,7 @@ end
              100798//32085
              103993//33102
              312689//99532 ]
+    @test rationalize(pi) === rationalize(BigFloat(pi))
 end
 
 @testset "issue #12536" begin
@@ -606,7 +655,7 @@ end
         @test gcdx(T(1)//T(1), T(1)//T(0)) === (T(1)//T(0), T(0), T(1))
         @test gcdx(T(1)//T(0), T(1)//T(0)) === (T(1)//T(0), T(1), T(1))
         @test gcdx(T(1)//T(0), T(0)//T(1)) === (T(1)//T(0), T(1), T(0))
-        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(1), T(0))
+        @test gcdx(T(0)//T(1), T(0)//T(1)) === (T(0)//T(1), T(0), T(0))
 
         if T <: Signed
             @test gcdx(T(-1)//T(0), T(1)//T(2)) === (T(1)//T(0), T(1), T(0))
@@ -653,6 +702,22 @@ end
     end
 end
 
+@testset "gcdx for 1 and 3+ arguments" begin
+    # one-argument
+    @test gcdx(7) == (7, 1)
+    @test gcdx(-7) == (7, -1)
+    @test gcdx(1//4) == (1//4, 1)
+
+    # 3+ arguments
+    @test gcdx(2//3) == gcdx(2//3) == (2//3, 1)
+    @test gcdx(15, 12, 20) == (1, 7, -7, -1)
+    @test gcdx(60//4, 60//5, 60//3) == (1//1, 7, -7, -1)
+    abcd = (105, 1638, 2145, 3185)
+    d, uvwp... = gcdx(abcd...)
+    @test d == sum(abcd .* uvwp) # u*a + v*b + w*c + p*d == gcd(a, b, c, d)
+    @test (@inferred gcdx(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) isa NTuple{11, Int}
+end
+
 @testset "Binary operations with Integer" begin
     @test 1//2 - 1 == -1//2
     @test -1//2 + 1 == 1//2
@@ -715,6 +780,19 @@ end
 
 @testset "Rational{T} with non-concrete T (issue #41222)" begin
     @test @inferred(Rational{Integer}(2,3)) isa Rational{Integer}
+    @test @inferred(Rational{Unsigned}(2,3)) isa Rational{Unsigned}
+    @test @inferred(Rational{Signed}(2,3)) isa Rational{Signed}
+    @test_throws InexactError Rational{Unsigned}(-1,1)
+    @test_throws InexactError Rational{Unsigned}(-1)
+    @test Rational{Unsigned}(Int8(-128), Int8(-128)) === Rational{Unsigned}(0x01, 0x01)
+    @test Rational{Unsigned}(Int8(-128), Int8(-1)) === Rational{Unsigned}(0x80, 0x01)
+    @test Rational{Unsigned}(Int8(0), Int8(-128)) === Rational{Unsigned}(0x00, 0x01)
+    # Numerator and denominator should have the same type.
+    @test Rational{Integer}(0x02) === Rational{Integer}(0x02, 0x01)
+    @test Rational{Integer}(Int16(3)) === Rational{Integer}(Int16(3), Int16(1))
+    @test Rational{Integer}(0x01,-1) === Rational{Integer}(-1, 1)
+    @test Rational{Integer}(-1, 0x01) === Rational{Integer}(-1, 1)
+    @test_throws InexactError Rational{Integer}(Int8(-1), UInt8(1))
 end
 
 @testset "issue #41489" begin
@@ -739,3 +817,20 @@ end
     @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im
     @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im)
 end
+
+@testset "complex numerator, denominator" begin
+    z = complex(3*3, 2*3*5)
+    @test z === numerator(z) === numerator(z // 2) === numerator(z // 5)
+    @test complex(3, 2*5) === numerator(z // 3)
+    @test isone(denominator(z))
+    @test 2 === denominator(z // 2)
+    @test 1 === denominator(z // 3)
+    @test 5 === denominator(z // 5)
+    for den ∈ 1:10
+        q = z // den
+        @test q === (numerator(q)//denominator(q))
+    end
+    @testset "do not overflow silently" begin
+        @test_throws OverflowError numerator(Int8(1)//Int8(31) + Int8(8)im//Int8(3))
+    end
+end
diff --git a/test/read.jl b/test/read.jl
index b8060a023333f..99903d92d270f 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -145,6 +145,7 @@ for (name, f) in l
 
     verbose && println("$name readuntil...")
     for (t, s, m, kept) in [
+            ("a", "", "", ""),
             ("a", "ab", "a", "a"),
             ("b", "ab", "b", "b"),
             ("α", "αγ", "α", "α"),
@@ -152,20 +153,27 @@ for (name, f) in l
             ("bc", "abc", "bc", "bc"),
             ("αβ", "αβγ", "αβ", "αβ"),
             ("aaabc", "ab", "aa", "aaab"),
+            ("aaabc", "b", "aaa", "aaab"),
             ("aaabc", "ac", "aaabc", "aaabc"),
             ("aaabc", "aab", "a", "aaab"),
             ("aaabc", "aac", "aaabc", "aaabc"),
             ("αααβγ", "αβ", "αα", "αααβ"),
+            ("αααβγ", "β", "ααα", "αααβ"),
             ("αααβγ", "ααβ", "α", "αααβ"),
             ("αααβγ", "αγ", "αααβγ", "αααβγ"),
             ("barbarbarians", "barbarian", "bar", "barbarbarian"),
             ("abcaabcaabcxl", "abcaabcx", "abca", "abcaabcaabcx"),
             ("abbaabbaabbabbaax", "abbaabbabbaax", "abba", "abbaabbaabbabbaax"),
             ("abbaabbabbaabbaabbabbaax", "abbaabbabbaax", "abbaabbabba", "abbaabbabbaabbaabbabbaax"),
+            ('a'^500 * 'x' * "bbbb", "x", 'a'^500, 'a'^500 * 'x'),
            ]
         local t, s, m, kept
         @test readuntil(io(t), s) == m
         @test readuntil(io(t), s, keep=true) == kept
+        if isone(length(s))
+            @test readuntil(io(t), first(s)) == m
+            @test readuntil(io(t), first(s), keep=true) == kept
+        end
         @test readuntil(io(t), SubString(s, firstindex(s))) == m
         @test readuntil(io(t), SubString(s, firstindex(s)), keep=true) == kept
         @test readuntil(io(t), GenericString(s)) == m
@@ -174,6 +182,18 @@ for (name, f) in l
         @test readuntil(io(t), unsafe_wrap(Vector{UInt8},s), keep=true) == unsafe_wrap(Vector{UInt8},kept)
         @test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m)
         @test readuntil(io(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept)
+
+        buf = IOBuffer()
+        @test String(take!(copyuntil(buf, io(t), s))) == m
+        @test String(take!(copyuntil(buf, io(t), s, keep=true))) == kept
+        file = tempname()
+        for (k,m) in ((false, m), (true, kept))
+            open(file, "w") do f
+                @test f == copyuntil(f, io(t), s, keep=k)
+            end
+            @test read(file, String) == m
+        end
+        rm(file)
     end
     cleanup()
 
@@ -248,13 +268,27 @@ for (name, f) in l
             n2 = readbytes!(s2, a2)
             @test n1 == n2
             @test length(a1) == length(a2)
-            @test a1[1:n1] == a2[1:n2]
+            let l = min(l, n)
+                @test a1[1:l] == a2[1:l]
+            end
             @test n <= length(text) || eof(s1)
             @test n <= length(text) || eof(s2)
 
             cleanup()
         end
 
+        # Test growing output array
+        let x = UInt8[],
+            io = io()
+            n = readbytes!(io, x)
+            @test n == 0
+            @test isempty(x)
+            n = readbytes!(io, x, typemax(Int))
+            @test n == length(x)
+            @test x == codeunits(text)
+            cleanup()
+        end
+
         verbose && println("$name read!...")
         l = length(text)
         for n = [1, 2, l-2, l-1, l]
@@ -281,8 +315,45 @@ for (name, f) in l
         cleanup()
 
         verbose && println("$name readline...")
-        @test readline(io(), keep=true) == readline(IOBuffer(text), keep=true)
-        @test readline(io(), keep=true) == readline(filename, keep=true)
+        file = tempname()
+        for lineending in ("\n", "\r\n", "")
+            kept = "foo bar" * lineending
+            t = isempty(lineending) ? "foo bar" : kept * "baz\n"
+            write(file, t)
+            @test readline(io(t)) == readline(file) == "foo bar"
+            @test readline(io(t), keep=true) == readline(file, keep=true) == kept
+
+            @test String(take!(copyline(IOBuffer(), file))) == "foo bar"
+            @test String(take!(copyline(IOBuffer(), file, keep=true))) == kept
+
+            cleanup()
+
+            buf = IOBuffer()
+            @test buf === copyline(buf, io(t))
+            @test String(take!(buf)) == "foo bar"
+            @test String(take!(copyline(buf, file, keep=true))) == kept
+            for keep in (true, false)
+                open(file, "w") do f
+                    @test f === copyline(f, io(t), keep=keep)
+                end
+                @test read(file, String) == (keep ? kept : "foo bar")
+            end
+
+            cleanup()
+
+            write(file, lineending)
+            @test readline(IOBuffer(lineending)) == ""
+            @test readline(IOBuffer(lineending), keep=true) == lineending
+            @test String(take!(copyline(IOBuffer(), IOBuffer(lineending)))) == ""
+            @test String(take!(copyline(IOBuffer(), IOBuffer(lineending), keep=true))) == lineending
+            @test readline(file) == ""
+            @test readline(file, keep=true) == lineending
+            @test String(take!(copyline(IOBuffer(), file))) == ""
+            @test String(take!(copyline(IOBuffer(), file, keep=true))) == lineending
+
+            cleanup()
+        end
+        rm(file)
 
         verbose && println("$name readlines...")
         @test readlines(io(), keep=true) == readlines(IOBuffer(text), keep=true)
@@ -420,12 +491,6 @@ let s = "qwerty"
     @test read(IOBuffer(s)) == codeunits(s)
     @test read(IOBuffer(s), 10) == codeunits(s)
     @test read(IOBuffer(s), 1) == codeunits(s)[1:1]
-
-    # Test growing output array
-    x = UInt8[]
-    n = readbytes!(IOBuffer(s), x, 10)
-    @test x == codeunits(s)
-    @test n == length(x)
 end
 
 
@@ -624,6 +689,21 @@ end
     @test  isempty(itr) # now it is empty
 end
 
+@testset "readuntil/copyuntil fallbacks" begin
+    # test fallback for generic delim::T
+    buf = IOBuffer()
+    fib = [1,1,2,3,5,8,13,21]
+    write(buf, fib)
+    @test readuntil(seekstart(buf), 21) == fib[1:end-1]
+    @test readuntil(buf, 21) == Int[]
+    @test readuntil(seekstart(buf), 21; keep=true) == fib
+    out = IOBuffer()
+    @test copyuntil(out, seekstart(buf), 21) === out
+    @test reinterpret(Int, take!(out)) == fib[1:end-1]
+    @test copyuntil(out, seekstart(buf), 21; keep=true) === out
+    @test reinterpret(Int, take!(out)) == fib
+end
+
 # more tests for reverse(eachline)
 @testset "reverse(eachline)" begin
     lines = vcat(repr.(1:4), ' '^50000 .* repr.(5:10), repr.(11:10^5))
@@ -652,3 +732,21 @@ end
         @test isempty(r) && isempty(collect(r))
     end
 end
+
+@testset "Ref API" begin
+    io = PipeBuffer()
+    @test write(io, Ref{Any}(0xabcd_1234)) === 4
+    @test read(io, UInt32) === 0xabcd_1234
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Cvoid}}, io, C_NULL)
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Int}}, io, Ptr{Int}(0))
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Any}}, io, Ptr{Any}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, C_NULL)
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Int}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Any}(0))
+    @test eof(io)
+    @test write(io, C_NULL) === sizeof(Int)
+    @test write(io, Ptr{Int}(4)) === sizeof(Int)
+    @test write(io, Ptr{Any}(5)) === sizeof(Int)
+    @test read!(io, Int[1, 2, 3]) == [0, 4, 5]
+    @test eof(io)
+end
diff --git a/test/rebinding.jl b/test/rebinding.jl
new file mode 100644
index 0000000000000..10da27ce3ad8f
--- /dev/null
+++ b/test/rebinding.jl
@@ -0,0 +1,50 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Rebinding
+    using Test
+
+    make_foo() = Foo(1)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD
+    struct Foo
+        x::Int
+    end
+    const defined_world_age = Base.tls_world_age()
+    x = Foo(1)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_CONST
+    @test !contains(repr(x), "@world")
+    Base.delete_binding(@__MODULE__, :Foo)
+
+    @test Base.binding_kind(@__MODULE__, :Foo) == Base.BINDING_KIND_GUARD
+    @test contains(repr(x), "@world")
+
+    struct Foo
+        x::Int
+    end
+    @test Foo != typeof(x)
+
+    # This tests that the compiler uses the correct world, but does not test
+    # invalidation.
+    @test typeof(Base.invoke_in_world(defined_world_age, make_foo)) == typeof(x)
+    @test typeof(make_foo()) == Foo
+
+    # Tests for @world syntax
+    @test Base.@world(Foo, defined_world_age) == typeof(x)
+    nameof(@__MODULE__) === :Rebinding && @test Base.@world(Rebinding.Foo, defined_world_age) == typeof(x)
+    @test Base.@world((@__MODULE__).Foo, defined_world_age) == typeof(x)
+
+    # Test invalidation (const -> undefined)
+    const delete_me = 1
+    f_return_delete_me() = delete_me
+    @test f_return_delete_me() == 1
+    Base.delete_binding(@__MODULE__, :delete_me)
+    @test_throws UndefVarError f_return_delete_me()
+
+    ## + via indirect access
+    const delete_me = 2
+    f_return_delete_me_indirect() = getglobal(@__MODULE__, :delete_me)
+    @test f_return_delete_me_indirect() == 2
+    Base.delete_binding(@__MODULE__, :delete_me)
+    @test_throws UndefVarError f_return_delete_me_indirect()
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index 4c05b179edcff..f5140c8a34bd9 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -53,8 +53,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
+@test_throws "reducing over an empty collection is not allowed" reduce(*, ())
+@test_throws "reducing over an empty collection is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -91,8 +91,7 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, &, Float64[])
 @test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
 @test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
@@ -144,9 +143,8 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] sum(sin, Int[])
+@test_throws "reducing over an empty collection is not allowed" sum(Union{}[])
+@test_throws "reducing over an empty collection is not allowed" sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -439,39 +437,39 @@ end
 
 # any & all
 
-@test @inferred any([]) == false
-@test @inferred any(Bool[]) == false
-@test @inferred any([true]) == true
-@test @inferred any([false, false]) == false
-@test @inferred any([false, true]) == true
-@test @inferred any([true, false]) == true
-@test @inferred any([true, true]) == true
-@test @inferred any([true, true, true]) == true
-@test @inferred any([true, false, true]) == true
-@test @inferred any([false, false, false]) == false
-
-@test @inferred all([]) == true
-@test @inferred all(Bool[]) == true
-@test @inferred all([true]) == true
-@test @inferred all([false, false]) == false
-@test @inferred all([false, true]) == false
-@test @inferred all([true, false]) == false
-@test @inferred all([true, true]) == true
-@test @inferred all([true, true, true]) == true
-@test @inferred all([true, false, true]) == false
-@test @inferred all([false, false, false]) == false
-
-@test @inferred any(x->x>0, []) == false
-@test @inferred any(x->x>0, Int[]) == false
-@test @inferred any(x->x>0, [-3]) == false
-@test @inferred any(x->x>0, [4]) == true
-@test @inferred any(x->x>0, [-3, 4, 5]) == true
-
-@test @inferred all(x->x>0, []) == true
-@test @inferred all(x->x>0, Int[]) == true
-@test @inferred all(x->x>0, [-3]) == false
-@test @inferred all(x->x>0, [4]) == true
-@test @inferred all(x->x>0, [-3, 4, 5]) == false
+@test @inferred(Union{Missing,Bool}, any([])) == false
+@test @inferred(any(Bool[])) == false
+@test @inferred(any([true])) == true
+@test @inferred(any([false, false])) == false
+@test @inferred(any([false, true])) == true
+@test @inferred(any([true, false])) == true
+@test @inferred(any([true, true])) == true
+@test @inferred(any([true, true, true])) == true
+@test @inferred(any([true, false, true])) == true
+@test @inferred(any([false, false, false])) == false
+
+@test @inferred(Union{Missing,Bool}, all([])) == true
+@test @inferred(all(Bool[])) == true
+@test @inferred(all([true])) == true
+@test @inferred(all([false, false])) == false
+@test @inferred(all([false, true])) == false
+@test @inferred(all([true, false])) == false
+@test @inferred(all([true, true])) == true
+@test @inferred(all([true, true, true])) == true
+@test @inferred(all([true, false, true])) == false
+@test @inferred(all([false, false, false])) == false
+
+@test @inferred(Union{Missing,Bool}, any(x->x>0, [])) == false
+@test @inferred(any(x->x>0, Int[])) == false
+@test @inferred(any(x->x>0, [-3])) == false
+@test @inferred(any(x->x>0, [4])) == true
+@test @inferred(any(x->x>0, [-3, 4, 5])) == true
+
+@test @inferred(Union{Missing,Bool}, all(x->x>0, [])) == true
+@test @inferred(all(x->x>0, Int[])) == true
+@test @inferred(all(x->x>0, [-3])) == false
+@test @inferred(all(x->x>0, [4])) == true
+@test @inferred(all(x->x>0, [-3, 4, 5])) == false
 
 @test reduce((a, b) -> a .| b, fill(trues(5), 24))  == trues(5)
 @test reduce((a, b) -> a .| b, fill(falses(5), 24)) == falses(5)
@@ -705,3 +703,32 @@ let a = NamedTuple(Symbol(:x,i) => i for i in 1:33),
     b = (a...,)
     @test fold_alloc(a) == fold_alloc(b) == 0
 end
+
+@testset "concrete eval `[any|all](f, itr::Tuple)`" begin
+    intf = in((1,2,3)); Intf = typeof(intf)
+    symf = in((:one,:two,:three)); Symf = typeof(symf)
+    @test Core.Compiler.is_foldable(Base.infer_effects(intf, (Int,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(symf, (Symbol,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3,4)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,6)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(all(in((:one,:two,:three)),(:three,:four)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((:one,:two,:three)),(:four,:three)))
+    end |> only == Val{true}
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index daa0a3fbe1f92..6a6f20214058c 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -124,6 +124,18 @@ fill!(r, -6.3)
 fill!(r, -1.1)
 @test sum!(abs2, r, Breduc, init=false) ≈ safe_sumabs2(Breduc, 1) .- 1.1
 
+# issue #35199
+function issue35199_test(sizes, dims)
+    M = rand(Float64, sizes)
+    ax = axes(M)
+    n1 = @allocations Base.reduced_indices(ax, dims)
+    return @test n1 == 0
+end
+for dims in (1, 2, (1,), (2,), (1,2))
+    sizes = (64, 3)
+    issue35199_test(sizes, dims)
+end
+
 # Small arrays with init=false
 let A = reshape(1:15, 3, 5)
     R = fill(1, 3)
@@ -564,8 +576,8 @@ end
 @testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
     result = sum(T[1 2 3; 4 5 6; 7 8 9], dims=2)
     @test result == hcat([6, 15, 24])
-    @test eltype(result) === (T <: Base.SmallSigned ? Int :
-                              T <: Base.SmallUnsigned ? UInt :
+    @test eltype(result) === (T <: Base.BitSignedSmall ? Int :
+                              T <: Base.BitUnsignedSmall ? UInt :
                               T)
 end
 
@@ -575,6 +587,30 @@ end
     @test B[argmin(B, dims=[2, 3])] == @inferred(minimum(B, dims=[2, 3]))
 end
 
+@testset "careful with @inbounds" begin
+    Base.@propagate_inbounds f(x) = x == 2 ? x[-10000] : x
+    Base.@propagate_inbounds op(x,y) = x[-10000] + y[-10000]
+    for (arr, dims) in (([1,1,2], 1), ([1 1 2], 2), ([ones(Int,256);2], 1))
+        @test_throws BoundsError mapreduce(f, +, arr)
+        @test_throws BoundsError mapreduce(f, +, arr; dims)
+        @test_throws BoundsError mapreduce(f, +, arr; dims, init=0)
+        @test_throws BoundsError mapreduce(identity, op, arr)
+        try
+            #=@test_throws BoundsError=# mapreduce(identity, op, arr; dims)
+        catch ex
+            @test_broken ex isa BoundsError
+        end
+        @test_throws BoundsError mapreduce(identity, op, arr; dims, init=0)
+
+        @test_throws BoundsError findmin(f, arr)
+        @test_throws BoundsError findmin(f, arr; dims)
+
+        @test_throws BoundsError mapreduce(f, max, arr)
+        @test_throws BoundsError mapreduce(f, max, arr; dims)
+        @test_throws BoundsError mapreduce(f, max, arr; dims, init=0)
+    end
+end
+
 @testset "in-place reductions with mismatched dimensionalities" begin
     B = reshape(1:24, 4, 3, 2)
     for R in (fill(0, 4), fill(0, 4, 1), fill(0, 4, 1, 1))
@@ -608,7 +644,7 @@ end
 end
 @testset "NaN/missing test for extrema with dims #43599" begin
     for sz = (3, 10, 100)
-        for T in (Int, Float64, BigFloat)
+        for T in (Int, Float64, BigFloat, BigInt)
             Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
             Aₘ[rand(1:sz*sz, sz)] .= missing
             unordered_test_for_extrema(Aₘ)
@@ -622,9 +658,16 @@ end
         end
     end
 end
-@test_broken minimum([missing;BigInt(1)], dims = 1)
-@test_broken maximum([missing;BigInt(1)], dims = 1)
-@test_broken extrema([missing;BigInt(1)], dims = 1)
+
+@testset "minimum/maximum over dims with missing (#35308)" begin
+    for T in (Int, Float64, BigInt, BigFloat)
+        x = Union{T, Missing}[1 missing; 2 missing]
+        @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :))
+        @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :))
+        @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1))
+        @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1))
+    end
+end
 
 # issue #26709
 @testset "dimensional reduce with custom non-bitstype types" begin
diff --git a/test/reflection.jl b/test/reflection.jl
index c13e7d88d8cfd..9aa8fe512cd7c 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -2,7 +2,7 @@
 
 using Test
 
-include("compiler/irutils.jl")
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
 
 # code_native / code_llvm (issue #8239)
 # It's hard to really test these, but just running them should be
@@ -125,11 +125,18 @@ not_const = 1
 # For curmod_*
 include("testenv.jl")
 
+module TestMod36529
+    x36529 = 0
+    y36529 = 1
+    export y36529
+end
+
 module TestMod7648
 using Test
 import Base.convert
 import ..curmod_name, ..curmod
-export a9475, foo9475, c7648, foo7648, foo7648_nomethods, Foo7648
+using ..TestMod36529: x36529   # doesn't import TestMod36529 or y36529, even though it's exported
+export a9475, c7648, f9475, foo7648, foo7648_nomethods, Foo7648
 
 const c7648 = 8
 d7648 = 9
@@ -142,10 +149,11 @@ module TestModSub9475
     using Test
     using ..TestMod7648
     import ..curmod_name
-    export a9475, foo9475
+    export a9475, f9475, f54609
     a9475 = 5
     b9475 = 7
-    foo9475(x) = x
+    f9475(x) = x
+    f54609(x) = x
     let
         @test Base.binding_module(@__MODULE__, :a9475) == @__MODULE__
         @test Base.binding_module(@__MODULE__, :c7648) == TestMod7648
@@ -169,18 +177,104 @@ let
     @test Base.binding_module(TestMod7648, :d7648) == TestMod7648
     @test Base.binding_module(TestMod7648, :a9475) == TestMod7648.TestModSub9475
     @test Base.binding_module(TestMod7648.TestModSub9475, :b9475) == TestMod7648.TestModSub9475
-    @test Set(names(TestMod7648))==Set([:TestMod7648, :a9475, :foo9475, :c7648, :foo7648, :foo7648_nomethods, :Foo7648])
-    @test Set(names(TestMod7648, all = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include")])
-    @test Set(names(TestMod7648, all = true, imported = true)) == Set([:TestMod7648, :TestModSub9475, :a9475, :foo9475, :c7648, :d7648, :f7648,
-                                                      :foo7648, Symbol("#foo7648"), :foo7648_nomethods, Symbol("#foo7648_nomethods"),
-                                                      :Foo7648, :eval, Symbol("#eval"), :include, Symbol("#include"),
-                                                      :convert, :curmod_name, :curmod])
+    defaultset = Set(Symbol[:Foo7648, :TestMod7648, :a9475, :c7648, :f9475, :foo7648, :foo7648_nomethods])
+    allset = defaultset ∪ Set(Symbol[
+        Symbol("#foo7648"), Symbol("#foo7648_nomethods"),
+        :TestModSub9475, :d7648, :eval, :f7648, :include])
+    imported = Set(Symbol[:convert, :curmod_name, :curmod])
+    usings_from_Test = Set(Symbol[
+        Symbol("@inferred"), Symbol("@test"), Symbol("@test_broken"), Symbol("@test_deprecated"),
+        Symbol("@test_logs"), Symbol("@test_nowarn"), Symbol("@test_skip"), Symbol("@test_throws"),
+        Symbol("@test_warn"), Symbol("@testset"), :GenericArray, :GenericDict, :GenericOrder,
+        :GenericSet, :GenericString, :LogRecord, :Test, :TestLogger, :TestSetException,
+        :detect_ambiguities, :detect_unbound_args])
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set(Symbol[:x36529, :TestModSub9475, :f54609]) ∪ usings_from_Test ∪ usings_from_Base
+    @test Set(names(TestMod7648)) == defaultset
+    @test Set(names(TestMod7648, all=true)) == allset
+    @test Set(names(TestMod7648, all=true, imported=true)) == allset ∪ imported
+    @test Set(names(TestMod7648, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod7648, all=true, usings=true)) == allset ∪ usings
     @test isconst(TestMod7648, :c7648)
     @test !isconst(TestMod7648, :d7648)
 end
 
+# tests for `names(...; usings=true)`
+
+baremodule Test54609Simple
+module Inner
+export exported
+global exported::Int = 1
+global unexported::Int = 0
+end
+using Base: @assume_effects
+using .Inner
+end
+let usings = names(Test54609Simple; usings=true)
+    @test Symbol("@assume_effects") ∈ usings
+    @test :Base ∉ usings
+    @test :exported ∈ usings
+    @test :unexported ∉ usings
+end # baremodule Test54609Simple
+
+baremodule _Test54609Complex
+export exported_new
+using Base: @deprecate_binding
+global exported_new = nothing
+@deprecate_binding exported_old exported_new
+end # baremodule _Test54609Complex
+baremodule Test54609Complex
+using .._Test54609Complex
+end # baremodule Test54609Complex
+let usings = names(Test54609Complex; usings=true)
+    @test :exported_new ∈ usings
+    @test :exported_old ∉ usings
+    @test :_Test54609Complex ∈ usings # should include the `using`ed module itself
+    usings_all = names(Test54609Complex; usings=true, all=true)
+    @test :exported_new ∈ usings_all
+    @test :exported_old ∈ usings_all # deprecated names should be included with `all=true`
+end
+
+module TestMod54609
+module M1
+    const m1_x = 1
+    export m1_x
+end
+module M2
+    const m2_x = 1
+    export m2_x
+end
+module A
+    module B
+        f(x) = 1
+        secret = 1
+        module Inner2 end
+    end
+    module C
+        x = 1
+        y = 2
+        export y
+    end
+    using .B: f
+    using .C
+    using ..M1
+    import ..M2
+end
+end # module TestMod54609
+let defaultset = Set((:A,))
+    imported = Set((:M2,))
+    usings_from_Base = delete!(Set(names(Module(); usings=true)), :anonymous) # the name of the anonymous module itself
+    usings = Set((:A, :f, :C, :y, :M1, :m1_x)) ∪ usings_from_Base
+    allset = Set((:A, :B, :C, :eval, :include))
+    @test Set(names(TestMod54609.A)) == defaultset
+    @test Set(names(TestMod54609.A, imported=true)) == defaultset ∪ imported
+    @test Set(names(TestMod54609.A, usings=true)) == defaultset ∪ usings
+    @test Set(names(TestMod54609.A, all=true)) == allset
+    @test Set(names(TestMod54609.A, all=true, usings=true)) == allset ∪ usings
+    @test Set(names(TestMod54609.A, imported=true, usings=true)) == defaultset ∪ imported ∪ usings
+    @test Set(names(TestMod54609.A, all=true, imported=true, usings=true)) == allset ∪ imported ∪ usings
+end
+
 let
     using .TestMod7648
     @test Base.binding_module(@__MODULE__, :a9475) == TestMod7648.TestModSub9475
@@ -189,10 +283,10 @@ let
     @test parentmodule(foo7648, (Any,)) == TestMod7648
     @test parentmodule(foo7648) == TestMod7648
     @test parentmodule(foo7648_nomethods) == TestMod7648
-    @test parentmodule(foo9475, (Any,)) == TestMod7648.TestModSub9475
-    @test parentmodule(foo9475) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475, (Any,)) == TestMod7648.TestModSub9475
+    @test parentmodule(f9475) == TestMod7648.TestModSub9475
     @test parentmodule(Foo7648) == TestMod7648
-    @test parentmodule(first(methods(foo9475))) == TestMod7648.TestModSub9475
+    @test parentmodule(first(methods(f9475))) == TestMod7648.TestModSub9475
     @test parentmodule(first(methods(foo7648))) == TestMod7648
     @test nameof(Foo7648) === :Foo7648
     @test basename(functionloc(foo7648, (Any,))[1]) == "reflection.jl"
@@ -211,15 +305,21 @@ include("testenv.jl") # for curmod_str
 import Base.isexported
 global this_is_not_defined
 export this_is_not_defined
+public this_is_public
 @test_throws ErrorException("\"this_is_not_defined\" is not defined in module Main") which(Main, :this_is_not_defined)
 @test_throws ErrorException("\"this_is_not_exported\" is not defined in module Main") which(Main, :this_is_not_exported)
 @test isexported(@__MODULE__, :this_is_not_defined)
 @test !isexported(@__MODULE__, :this_is_not_exported)
+@test !isexported(@__MODULE__, :this_is_public)
 const a_value = 1
 @test which(@__MODULE__, :a_value) === @__MODULE__
 @test_throws ErrorException("\"a_value\" is not defined in module Main") which(Main, :a_value)
 @test which(Main, :Core) === Main
 @test !isexported(@__MODULE__, :a_value)
+@test !Base.ispublic(@__MODULE__, :a_value)
+@test Base.ispublic(@__MODULE__, :this_is_not_defined)
+@test Base.ispublic(@__MODULE__, :this_is_public)
+@test !Base.ispublic(@__MODULE__, :this_is_not_exported)
 end
 
 # PR 13825
@@ -468,7 +568,7 @@ fLargeTable() = 4
 fLargeTable(::Union, ::Union) = "a"
 @test fLargeTable(Union{Int, Missing}, Union{Int, Missing}) == "a"
 fLargeTable(::Union, ::Union) = "b"
-@test length(methods(fLargeTable)) == 205
+@test length(methods(fLargeTable)) == 206
 @test fLargeTable(Union{Int, Missing}, Union{Int, Missing}) == "b"
 
 # issue #15280
@@ -586,7 +686,7 @@ let
     @test @inferred wrapperT(ReflectionExample{T, Int64} where T) == ReflectionExample
     @test @inferred wrapperT(ReflectionExample) == ReflectionExample
     @test @inferred wrapperT(Union{ReflectionExample{Union{},1},ReflectionExample{Float64,1}}) == ReflectionExample
-    @test_throws(ErrorException("typename does not apply to unions whose components have different typenames"),
+    @test_throws(Core.TypeNameError(Union{Int, Float64}),
                  Base.typename(Union{Int, Float64}))
 end
 
@@ -608,11 +708,16 @@ end
              sizeof(Real))
 @test sizeof(Union{ComplexF32,ComplexF64}) == 16
 @test sizeof(Union{Int8,UInt8}) == 1
-@test_throws ErrorException sizeof(AbstractArray)
+@test sizeof(MemoryRef{Int}) == 2 * sizeof(Int)
+@test sizeof(GenericMemoryRef{:atomic,Int,Core.CPU}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,0}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,1}) == 3 * sizeof(Int)
+@test sizeof(Array{Int,2}) == 4 * sizeof(Int)
+@test sizeof(Array{Int,20}) == 22 * sizeof(Int)
 @test_throws ErrorException sizeof(Tuple)
 @test_throws ErrorException sizeof(Tuple{Any,Any})
 @test_throws ErrorException sizeof(String)
-@test_throws ErrorException sizeof(Vector{Int})
+@test_throws ErrorException sizeof(Memory{false,Int})
 @test_throws ErrorException sizeof(Symbol)
 @test_throws ErrorException sizeof(Core.SimpleVector)
 @test_throws ErrorException sizeof(Union{})
@@ -917,7 +1022,7 @@ end
 @test nameof(Any) === :Any
 @test nameof(:) === :Colon
 @test nameof(Core.Intrinsics.mul_int) === :mul_int
-@test nameof(Core.Intrinsics.arraylen) === :arraylen
+@test nameof(Core.Intrinsics.cglobal) === :cglobal
 
 module TestMod33403
 f(x) = 1
@@ -993,9 +1098,18 @@ end
     @test Base.default_tt(m.f4) == Tuple
 end
 
+@testset "lookup mi" begin
+    @test 1+1 == 2
+    mi1 = Base.method_instance(+, (Int, Int))
+    @test mi1.def.name == :+
+    # Note `jl_method_lookup` doesn't returns CNull if not found
+    mi2 = @ccall jl_method_lookup(Any[+, 1, 1]::Ptr{Any}, 3::Csize_t, Base.get_world_counter()::Csize_t)::Ref{Core.MethodInstance}
+    @test mi1 == mi2
+end
+
 Base.@assume_effects :terminates_locally function issue41694(x::Int)
     res = 1
-    1 < x < 20 || throw("bad")
+    0 ≤ x < 20 || error("bad fact")
     while x > 1
         res *= x
         x -= 1
@@ -1009,7 +1123,22 @@ ambig_effects_test(a::Int, b) = 1
 ambig_effects_test(a, b::Int) = 1
 ambig_effects_test(a, b) = 1
 
-@testset "infer_effects" begin
+@testset "Base.infer_return_type[s]" begin
+    # generic function case
+    @test only(Base.return_types(issue41694, (Int,))) == Base.infer_return_type(issue41694, (Int,)) == Int
+    # case when it's not fully covered
+    @test only(Base.return_types(issue41694, (Integer,))) == Base.infer_return_type(issue41694, (Integer,)) == Int
+    # MethodError case
+    @test isempty(Base.return_types(issue41694, (Float64,)))
+    @test Base.infer_return_type(issue41694, (Float64,)) == Union{}
+    # builtin case
+    @test only(Base.return_types(typeof, (Any,))) == Base.infer_return_type(typeof, (Any,)) == DataType
+    @test only(Base.return_types(===, (Any,Any))) == Base.infer_return_type(===, (Any,Any)) == Bool
+    @test only(Base.return_types(setfield!, ())) == Base.infer_return_type(setfield!, ()) == Union{}
+    @test only(Base.return_types(Core.Intrinsics.mul_int, ())) == Base.infer_return_type(Core.Intrinsics.mul_int, ()) == Union{}
+end
+
+@testset "Base.infer_effects" begin
     # generic functions
     @test Base.infer_effects(issue41694, (Int,)) |> Core.Compiler.is_terminates
     @test Base.infer_effects((Int,)) do x
@@ -1033,7 +1162,34 @@ ambig_effects_test(a, b) = 1
     @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_foldable_nothrow
     @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_foldable_nothrow
     @test (Base.infer_effects(setfield!, ()); true) # `builtin_effects` shouldn't throw on empty `argtypes`
-    @test (Base.infer_effects(Core.Intrinsics.arraylen, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
+    @test (Base.infer_effects(Core.Intrinsics.mul_int, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
+end
+
+@testset "Base.infer_exception_type[s]" begin
+    # generic functions
+    @test Base.infer_exception_type(issue41694, (Int,)) == only(Base.infer_exception_types(issue41694, (Int,))) == ErrorException
+    @test Base.infer_exception_type((Int,)) do x
+        issue41694(x)
+    end == Base.infer_exception_types((Int,)) do x
+        issue41694(x)
+    end |> only == ErrorException
+    @test Base.infer_exception_type(issue41694) == only(Base.infer_exception_types(issue41694)) == ErrorException # use `default_tt`
+    let excts = Base.infer_exception_types(maybe_effectful, (Any,))
+        @test any(==(Any), excts)
+        @test any(==(Union{}), excts)
+    end
+    @test Base.infer_exception_type(maybe_effectful, (Any,)) == Any
+    # `infer_exception_type` should account for MethodError
+    @test Base.infer_exception_type(issue41694, (Float64,)) == MethodError # definitive dispatch error
+    @test Base.infer_exception_type(issue41694, (Integer,)) == Union{MethodError,ErrorException} # possible dispatch error
+    @test Base.infer_exception_type(f_no_methods) == MethodError # no possible matching methods
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Int)) == MethodError # ambiguity error
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Any)) == MethodError # ambiguity error
+    # builtins
+    @test Base.infer_exception_type(typeof, (Any,)) === only(Base.infer_exception_types(typeof, (Any,))) === Union{}
+    @test Base.infer_exception_type(===, (Any,Any)) === only(Base.infer_exception_types(===, (Any,Any))) === Union{}
+    @test (Base.infer_exception_type(setfield!, ()); Base.infer_exception_types(setfield!, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
+    @test (Base.infer_exception_type(Core.Intrinsics.mul_int, ()); Base.infer_exception_types(Core.Intrinsics.mul_int, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
 end
 
 @test Base._methods_by_ftype(Tuple{}, -1, Base.get_world_counter()) == Any[]
@@ -1057,3 +1213,88 @@ end
 @test !Base.ismutationfree(Vector{UInt64})
 
 @test Base.ismutationfree(Type{Union{}})
+
+module TestNames
+
+public publicized
+export exported
+
+publicized() = 1
+exported() = 1
+private() = 1
+
+end
+
+@test names(TestNames) == [:TestNames, :exported, :publicized]
+
+# reflections for generated function with abstract input types
+
+# :generated_only function should return failed results if given abstract input types
+@generated function generated_only_simple(x)
+    if x <: Integer
+        return :(x ^ 2)
+    else
+        return :(x)
+    end
+end
+@test only(Base.return_types(generated_only_simple, (Real,))) ==
+      Base.infer_return_type(generated_only_simple, (Real,)) ==
+      Core.Compiler.return_type(generated_only_simple, Tuple{Real}) == Any
+let (src, rt) = only(code_typed(generated_only_simple, (Real,)))
+    @test src isa Method
+    @test rt == Any
+end
+
+# optionally generated function should return fallback results if given abstract input types
+function sub2ind_gen_impl(dims::Type{NTuple{N,Int}}, I...) where N
+    ex = :(I[$N] - 1)
+    for i = (N - 1):-1:1
+        ex = :(I[$i] - 1 + dims[$i] * $ex)
+    end
+    return :($ex + 1)
+end;
+function sub2ind_gen_fallback(dims::NTuple{N,Int}, I) where N
+    ind = I[N] - 1
+    for i = (N - 1):-1:1
+        ind = I[i] - 1 + dims[i]*ind
+    end
+    return ind + 1
+end;
+function sub2ind_gen(dims::NTuple{N,Int}, I::Integer...) where N
+    length(I) == N || error("partial indexing is unsupported")
+    if @generated
+        return sub2ind_gen_impl(dims, I...)
+    else
+        return sub2ind_gen_fallback(dims, I)
+    end
+end;
+@test only(Base.return_types(sub2ind_gen, (NTuple,Int,Int,))) == Int
+let (src, rt) = only(code_typed(sub2ind_gen, (NTuple,Int,Int,); optimize=false))
+    @test src isa CodeInfo
+    @test rt == Int
+    @test any(iscall((src,sub2ind_gen_fallback)), src.code)
+    @test any(iscall((src,error)), src.code)
+end
+
+# marking a symbol as public should not "unexport" it
+# https://github.com/JuliaLang/julia/issues/52812
+module Mod52812
+using Test
+export a, b
+@test_throws ErrorException eval(Expr(:public, :a))
+public c
+@test_throws ErrorException eval(Expr(:export, :c))
+export b
+public c
+end
+
+@test Base.isexported(Mod52812, :a)
+@test Base.isexported(Mod52812, :b)
+@test Base.ispublic(Mod52812, :a)
+@test Base.ispublic(Mod52812, :b)
+@test Base.ispublic(Mod52812, :c) && !Base.isexported(Mod52812, :c)
+
+@test Base.infer_return_type(code_lowered, (Any,)) == Vector{Core.CodeInfo}
+@test Base.infer_return_type(code_lowered, (Any,Any)) == Vector{Core.CodeInfo}
+
+@test methods(Union{}) == Any[m.method for m in Base._methods_by_ftype(Tuple{Core.TypeofBottom, Vararg}, 1, Base.get_world_counter())] # issue #55187
diff --git a/test/regex.jl b/test/regex.jl
index e5f1428527512..51802125a3467 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -101,15 +101,34 @@
         @test haskey(m, 3)
         @test !haskey(m, 44)
         @test (m[1], m[2], m[3]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (var"1"="x", var"2"="y", var"3"="z")
+        @test Dict(m) == Dict([1=>"x", 2=>"y", 3=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", 1=\"x\", 2=\"y\", 3=\"z\")"
     end
 
     # Named subpatterns
+    let m = match(r"(?<a>.)(?<c>.)(?<b>.)", "xyz")
+        @test haskey(m, :a)
+        @test haskey(m, "b")
+        @test !haskey(m, "foo")
+        @test (m[:a], m[:c], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", c="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", "c"=>"y", "b"=>"z"])
+        @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", c=\"y\", b=\"z\")"
+        @test keys(m) == ["a", "c", "b"]
+    end
+
+    # Named and unnamed subpatterns
     let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
         @test haskey(m, :a)
         @test haskey(m, "b")
         @test !haskey(m, "foo")
         @test (m[:a], m[2], m["b"]) == ("x", "y", "z")
+        @test Tuple(m) == ("x", "y", "z")
+        @test NamedTuple(m) == (a="x", var"2"="y", b="z")
+        @test Dict(m) == Dict(["a"=>"x", 2=>"y", "b"=>"z"])
         @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
         @test keys(m) == ["a", 2, "b"]
     end
@@ -194,7 +213,7 @@
 
         r = r"" * raw"a\Eb|c"
         @test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
-        @test match(r, raw"c") == nothing
+        @test match(r, raw"c") === nothing
 
         # error for really incompatible options
         @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index 501e9f4a9b57f..e6381329e4ec6 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -6,20 +6,28 @@ using .Main.OffsetArrays
 isdefined(Main, :TSlow) || @eval Main include("testhelpers/arrayindexingtypes.jl")
 using .Main: TSlow, WrapperArray
 
+tslow(a::AbstractArray) = TSlow(a)
+wrapper(a::AbstractArray) = WrapperArray(a)
+fcviews(a::AbstractArray) = view(a, ntuple(Returns(:),ndims(a)-1)..., axes(a)[end])
+fcviews(a::AbstractArray{<:Any, 0}) = view(a)
+tslow(t::Tuple) = map(tslow, t)
+wrapper(t::Tuple) = map(wrapper, t)
+fcviews(t::Tuple) = map(fcviews, t)
+
+test_many_wrappers(testf, A, wrappers) = foreach(w -> testf(w(A)), wrappers)
+test_many_wrappers(testf, A) = test_many_wrappers(testf, A, (identity, tslow, wrapper, fcviews))
+
 A = Int64[1, 2, 3, 4]
-As = TSlow(A)
 Ars = Int64[1 3; 2 4]
-Arss = TSlow(Ars)
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Bs = TSlow(B)
 Av = [Int32[1,2], Int32[3,4]]
 
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars, (identity, tslow)) do Ar
     @test @inferred(ndims(reinterpret(reshape, Complex{Int64}, Ar))) == 1
     @test @inferred(axes(reinterpret(reshape, Complex{Int64}, Ar))) === (Base.OneTo(2),)
     @test @inferred(size(reinterpret(reshape, Complex{Int64}, Ar))) == (2,)
 end
-for _B in (B, Bs)
+test_many_wrappers(B, (identity, tslow)) do _B
     @test @inferred(ndims(reinterpret(reshape, Int64, _B))) == 2
     @test @inferred(axes(reinterpret(reshape, Int64, _B))) === (Base.OneTo(2), Base.OneTo(3))
     @test @inferred(size(reinterpret(reshape, Int64, _B))) == (2, 3)
@@ -32,9 +40,8 @@ end
 @test_throws ArgumentError("cannot reinterpret `Vector{Int32}` as `Int32`, type `Vector{Int32}` is not a bits type") reinterpret(Int32, Av)
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int64` array to `Int32` which is of a different size") reinterpret(Int32, reshape([Int64(0)]))
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int32` array to `Int64` which is of a different size") reinterpret(Int64, reshape([Int32(0)]))
-@test_throws ArgumentError("""cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`.
-                              The resulting array would have non-integral first dimension.
-                              """) reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
+@test_throws ArgumentError("cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`."*
+                              " The resulting array would have a non-integral first dimension.") reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
 
 @test_throws ArgumentError("`reinterpret(reshape, Complex{Int64}, a)` where `eltype(a)` is Int64 requires that `axes(a, 1)` (got Base.OneTo(4)) be equal to 1:2 (from the ratio of element sizes)") reinterpret(reshape, Complex{Int64}, A)
 @test_throws ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got 24) and `sizeof(eltype(a))` (got 16) be an integer multiple of the other") reinterpret(reshape, NTuple{3, Int64}, B)
@@ -42,24 +49,25 @@ end
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `UInt8` array to `UInt16` which is of a larger size") reinterpret(reshape, UInt16, reshape([0x01]))
 
 # getindex
-for _A in (A, As)
+test_many_wrappers(A) do _A
     @test reinterpret(Complex{Int64}, _A) == [1 + 2im, 3 + 4im]
     @test reinterpret(Float64, _A) == reinterpret.(Float64, A)
     @test reinterpret(reshape, Float64, _A) == reinterpret.(Float64, A)
 end
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars) do Ar
     @test reinterpret(reshape, Complex{Int64}, Ar) == [1 + 2im, 3 + 4im]
     @test reinterpret(reshape, Float64, Ar) == reinterpret.(Float64, Ars)
 end
 
-for _B in (B, Bs)
+test_many_wrappers(B) do _B
     @test reinterpret(NTuple{3, Int64}, _B) == [(5,6,7),(8,9,10)]
     @test reinterpret(reshape, Int64, _B) == [5 7 9; 6 8 10]
 end
 
 # setindex
-for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
-    let Ac = copy(_A), Arsc = copy(Ar), Bc = copy(_B)
+test_many_wrappers((A, Ars, B)) do (A, Ars, B)
+    _A, Ar, _B = deepcopy(A), deepcopy(Ars), deepcopy(B)
+    let Ac = deepcopy(_A), Arsc = deepcopy(Ar), Bc = deepcopy(_B)
         reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
         @test Ac == [1, 2, -1, -2]
         reinterpret(Complex{Int64}, Arsc)[2] = -1 - 2im
@@ -94,50 +102,67 @@ for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
     end
 end
 A3 = collect(reshape(1:18, 2, 3, 3))
-A3r = reinterpret(reshape, Complex{Int}, A3)
-@test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
-A3r[2,3] = -8-15im
-@test A3[1,2,3] == -8
-@test A3[2,2,3] == -15
-A3r[4] = 100+200im
-@test A3[1,1,2] == 100
-@test A3[2,1,2] == 200
-A3r[CartesianIndex(1,2)] = 300+400im
-@test A3[1,1,2] == 300
-@test A3[2,1,2] == 400
+test_many_wrappers(A3) do A3_
+    A3 = deepcopy(A3_)
+    A3r = reinterpret(reshape, Complex{Int}, A3)
+    @test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
+    A3r[2,3] = -8-15im
+    @test A3[1,2,3] == -8
+    @test A3[2,2,3] == -15
+    A3r[4] = 100+200im
+    @test A3[1,1,2] == 100
+    @test A3[2,1,2] == 200
+    A3r[CartesianIndex(1,2)] = 300+400im
+    @test A3[1,1,2] == 300
+    @test A3[2,1,2] == 400
+end
 
 # same-size reinterpret where one of the types is non-primitive
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(Float32, a)[1] == 2.0
+    end
 end
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(reshape, Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(reshape, Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(reshape, Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(reshape, Float32, a)[1] == 2.0
+    end
 end
 
 # Pass-through indexing
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Br = reinterpret(reshape, Int64, B)
-W = WrapperArray(Br)
-for (b, w) in zip(5:10, W)
-    @test b == w
-end
-for (i, j) in zip(eachindex(W), 11:16)
-    W[i] = j
+test_many_wrappers(B) do B_
+    B = deepcopy(B_)
+    Br = reinterpret(reshape, Int64, B)
+    W = WrapperArray(Br)
+    for (b, w) in zip(5:10, W)
+        @test b == w
+    end
+    for (i, j) in zip(eachindex(W), 11:16)
+        W[i] = j
+    end
+    @test B[1] === Complex{Int64}(11+12im)
+    @test B[2] === Complex{Int64}(13+14im)
+    @test B[3] === Complex{Int64}(15+16im)
 end
-@test B[1] === Complex{Int64}(11+12im)
-@test B[2] === Complex{Int64}(13+14im)
-@test B[3] === Complex{Int64}(15+16im)
 z3 = (0x00, 0x00, 0x00)
 Az = [z3 z3; z3 z3]
-Azr = reinterpret(reshape, UInt8, Az)
-W = WrapperArray(Azr)
-copyto!(W, fill(0x01, 3, 2, 2))
-@test all(isequal((0x01, 0x01, 0x01)), Az)
-@test eachindex(W, W) == eachindex(W)
+test_many_wrappers(Az, (identity, wrapper)) do Az_
+    Az = deepcopy(Az_)
+    Azr = reinterpret(reshape, UInt8, Az)
+    W = WrapperArray(Azr)
+    copyto!(W, fill(0x01, 3, 2, 2))
+    @test all(isequal((0x01, 0x01, 0x01)), Az)
+    @test eachindex(W, W) == eachindex(W)
+end
 
 # ensure that reinterpret arrays aren't erroneously classified as strided
 let A = reshape(1:20, 5, 4)
@@ -169,7 +194,7 @@ function check_strides(A::AbstractArray)
 end
 
 @testset "strides for NonReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:88, 11, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:88, 11, 8)))
     for viewax2 in (1:8, 1:2:6, 7:-1:1, 5:-2:1, 2:3:8, 7:-6:1, 3:5:11)
         # dim1 is contiguous
         for T in (Int16, Float32)
@@ -203,7 +228,7 @@ end
 end
 
 @testset "strides for ReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:192, 3, 8, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:192, 3, 8, 8)))
     for viewax1 in (1:8, 1:2:8, 8:-1:1, 8:-2:1), viewax2 in (1:2, 4:-1:1)
         for T in (Int16, Float32)
             @test check_strides(reinterpret(reshape, T, view(A, 1:2, viewax1, viewax2)))
@@ -240,7 +265,8 @@ end
 end
 
 # IndexStyle
-let a = fill(1.0, 5, 3)
+test_many_wrappers(fill(1.0, 5, 3), (identity, wrapper)) do a_
+    a = deepcopy(a_)
     r = reinterpret(Int64, a)
     @test @inferred(IndexStyle(r)) == IndexLinear()
     fill!(r, 2)
@@ -293,14 +319,13 @@ let a = fill(1.0, 5, 3)
         @test setindex!(r, -5, goodinds...) === r
         @test r[goodinds...] == -5
     end
-
-    ar = [(1,2), (3,4)]
+end
+let ar = [(1,2), (3,4)]
     arr = reinterpret(reshape, Int, ar)
     @test @inferred(IndexStyle(arr)) == Base.IndexSCartesian2{2}()
     @test @inferred(eachindex(arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
     @test @inferred(eachindex(arr, arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
 end
-
 # Error on reinterprets that would expose padding
 struct S1
     a::Int8
@@ -314,11 +339,14 @@ end
 
 A1 = S1[S1(0, 0)]
 A2 = S2[S2(0, 0)]
-@test reinterpret(S1, A2)[1] == S1(0, 0)
-@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
-@test_throws Base.PaddingError reinterpret(S2, A1)[1]
-reinterpret(S2, A1)[1] = S2(1, 2)
-@test A1[1] == S1(1, 2)
+test_many_wrappers((A1, A2), (identity, wrapper)) do (A1_, A2_)
+    A1, A2 = deepcopy(A1_), deepcopy(A2_)
+    @test reinterpret(S1, A2)[1] == S1(0, 0)
+    @test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
+    @test_throws Base.PaddingError reinterpret(S2, A1)[1]
+    reinterpret(S2, A1)[1] = S2(1, 2)
+    @test A1[1] == S1(1, 2)
+end
 
 # Unconventional axes
 let a = [0.1 0.2; 0.3 0.4], at = reshape([(i,i+1) for i = 1:2:8], 2, 2)
@@ -371,50 +399,59 @@ end
 
 # Test 0-dimensional Arrays
 A = zeros(UInt32)
-B = reinterpret(Int32, A)
-Brs = reinterpret(reshape,Int32, A)
-C = reinterpret(Tuple{UInt32}, A) # non-primitive type
-Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
-@test size(B) == size(Brs) == size(C) == size(Crs) == ()
-@test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
-@test setindex!(B, Int32(5)) === B
-@test B[] === Int32(5)
-@test Brs[] === Int32(5)
-@test C[] === (UInt32(5),)
-@test Crs[] === (UInt32(5),)
-@test A[] === UInt32(5)
-@test setindex!(Brs, Int32(12)) === Brs
-@test A[] === UInt32(12)
-@test setindex!(C, (UInt32(7),)) === C
-@test A[] === UInt32(7)
-@test setindex!(Crs, (UInt32(3),)) === Crs
-@test A[] === UInt32(3)
-
-
-a = [(1.0,2.0)]
-af = @inferred(reinterpret(reshape, Float64, a))
-anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
-@test anew[1] == a[1]
-@test ndims(anew) == 0
+test_many_wrappers(A, (identity, wrapper)) do A_
+    A = deepcopy(A_)
+    B = reinterpret(Int32, A)
+    Brs = reinterpret(reshape,Int32, A)
+    C = reinterpret(Tuple{UInt32}, A) # non-primitive type
+    Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
+    @test size(B) == size(Brs) == size(C) == size(Crs) == ()
+    @test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
+    @test setindex!(B, Int32(5)) === B
+    @test B[] === Int32(5)
+    @test Brs[] === Int32(5)
+    @test C[] === (UInt32(5),)
+    @test Crs[] === (UInt32(5),)
+    @test A[] === UInt32(5)
+    @test setindex!(Brs, Int32(12)) === Brs
+    @test A[] === UInt32(12)
+    @test setindex!(C, (UInt32(7),)) === C
+    @test A[] === UInt32(7)
+    @test setindex!(Crs, (UInt32(3),)) === Crs
+    @test A[] === UInt32(3)
+end
+
+test_many_wrappers([(1.0,2.0)], (identity, wrapper)) do a
+    af = @inferred(reinterpret(reshape, Float64, a))
+    anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
+    @test anew[1] == a[1]
+    @test ndims(anew) == 0
+end
 
 # re-reinterpret
 a0 = reshape([0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04], 4, 2)
-a = reinterpret(reshape, NTuple{4,UInt8}, a0)
-@test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
-@test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
-@test reinterpret(reshape, UInt8, a) === a0
+test_many_wrappers(a0, (identity, wrapper)) do a0
+    a = reinterpret(reshape, NTuple{4,UInt8}, a0)
+    @test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
+    @test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
+    @test reinterpret(reshape, UInt8, a) === a0
+end
 
 # reductions
 a = [(1,2,3), (4,5,6)]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 21
-@test sum(ars; dims=1) == [6 15]
-@test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
-@test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 21
+    @test sum(ars; dims=1) == [6 15]
+    @test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
+    @test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+end
 # also test large sizes for the pairwise algorithm
 a = [(k,k+1,k+2) for k = 1:3:4000]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 8010003
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 8010003
+end
 
 @testset "similar(::ReinterpretArray)" begin
     a = reinterpret(NTuple{2,Float64}, TSlow(rand(Float64, 4, 4)))
@@ -514,6 +551,21 @@ end
     @test_throws MethodError x[2,4] = nothing
 end
 
+@testset "pointer for StridedArray" begin
+    a = rand(Float64, 251)
+    v = view(a, UInt(2):UInt(251));
+    A = reshape(v, 25, 10);
+    @test A isa StridedArray && pointer(A) === pointer(a, 2)
+    Av = view(A, 1:20, 1:2)
+    @test Av isa StridedArray && pointer(Av) === pointer(a, 2)
+    @test Av * Av' isa Array
+end
+
+@testset "effect of StridedReinterpretArray's getindex" begin
+    eff = Base.infer_effects(getindex, Base.typesof(reinterpret(Int8, Int[1]), 1))
+    @test Core.Compiler.is_effect_free(eff)
+end
+
 # reinterpret of arbitrary bitstypes
 @testset "Reinterpret arbitrary bitstypes" begin
     struct Bytes15
@@ -535,3 +587,23 @@ end
 
     @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4)))
 end
+
+let R = reinterpret(Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(1), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Vector{Float32} == [1.0f0, 2.0f0, 4.0f0, 3.0f0]
+end
+
+let R = reinterpret(reshape, Float32, ComplexF32[1.0f0+2.0f0*im, 4.0f0+3.0f0*im])
+    @test !isassigned(R, 0)
+    @test isassigned(R, 1)
+    @test isassigned(R, 4)
+    @test isassigned(R, Int8(2), Int16(2), Int32(1), Int64(1))
+    @test !isassigned(R, 1, 1, 2)
+    @test !isassigned(R, 5)
+    @test Array(R)::Matrix{Float32} == [1.0f0 4.0f0; 2.0f0 3.0f0]
+end
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
new file mode 100644
index 0000000000000..2ef6dec90dbc1
--- /dev/null
+++ b/test/relocatedepot.jl
@@ -0,0 +1,320 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+
+include("testenv.jl")
+
+
+function test_harness(@nospecialize(fn); empty_load_path=true, empty_depot_path=true)
+    load_path = copy(LOAD_PATH)
+    depot_path = copy(DEPOT_PATH)
+    try
+        empty_load_path && empty!(LOAD_PATH)
+        empty_depot_path && empty!(DEPOT_PATH)
+        fn()
+    finally
+        copy!(LOAD_PATH, load_path)
+        copy!(DEPOT_PATH, depot_path)
+    end
+end
+
+# We test relocation with these dummy pkgs:
+# - RelocationTestPkg1 - pkg with no include_dependency
+# - RelocationTestPkg2 - pkg with include_dependency tracked by `mtime`
+# - RelocationTestPkg3 - pkg with include_dependency tracked by content
+# - RelocationTestPkg4 - pkg with no dependencies; will be compiled such that the pkgimage is
+#                        not relocatable, but no repeated recompilation happens upon loading
+
+if !test_relocated_depot
+
+    @testset "edge cases when inserting @depot tag in path" begin
+
+        # insert @depot only once for first match
+        test_harness() do
+            mktempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                if Sys.iswindows()
+                    # dirs start with a drive letter instead of a path separator
+                    path = dir*Base.Filesystem.pathsep()*dir
+                    @test Base.replace_depot_path(path) == "@depot"*Base.Filesystem.pathsep()*dir
+                else
+                    path = dir*dir
+                    @test Base.replace_depot_path(path) == "@depot"*dir
+                end
+            end
+
+            # 55340
+            empty!(DEPOT_PATH)
+            mktempdir() do dir
+                jlrc = joinpath(dir, "julia-rc2")
+                jl   = joinpath(dir, "julia")
+                mkdir(jl)
+                push!(DEPOT_PATH, jl)
+                @test Base.replace_depot_path(jl) == "@depot"
+                @test Base.replace_depot_path(string(jl,Base.Filesystem.pathsep())) ==
+                            string("@depot",Base.Filesystem.pathsep())
+                @test Base.replace_depot_path(jlrc) != "@depot-rc2"
+                @test Base.replace_depot_path(jlrc) == jlrc
+            end
+        end
+
+        # deal with and without trailing path separators
+        test_harness() do
+            mktempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                path = joinpath(dir, "foo")
+                if isdirpath(DEPOT_PATH[1])
+                    DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep
+                end
+                tag = string("@depot", Base.Filesystem.pathsep())
+                @test startswith(Base.replace_depot_path(path), tag)
+                DEPOT_PATH[1] = string(DEPOT_PATH[1], Base.Filesystem.pathsep())
+                @test startswith(Base.replace_depot_path(path), tag)
+                popfirst!(DEPOT_PATH)
+                @test !startswith(Base.replace_depot_path(path), tag)
+            end
+        end
+
+    end
+
+    @testset "restore path from @depot tag" begin
+
+        tmp = tempdir()
+
+        path = joinpath("@depot", "foo", "bar")
+        tmppath = joinpath(tmp, "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+        path = joinpath("no@depot", "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == path
+
+        path = joinpath("@depot", "foo", "bar\n", "@depot", "foo")
+        tmppath = joinpath(tmp, "foo", "bar\n", "@depot", "foo")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+    end
+
+    @testset "precompile RelocationTestPkg1" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg2" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            rm(joinpath(@__DIR__, pkgname, "src", "foodir"), force=true, recursive=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            touch(joinpath(@__DIR__, pkgname, "src", "foo.txt"))
+            mkdir(joinpath(@__DIR__, pkgname, "src", "foodir"))
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+        end
+    end
+
+    @testset "precompile RelocationTestPkg3" begin
+        pkgname = "RelocationTestPkg3"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__) # make src files available for relocation
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            rm(joinpath(@__DIR__, pkgname, "src", "bardir"), force=true, recursive=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            touch(joinpath(@__DIR__, pkgname, "src", "bar.txt"))
+            mkdir(joinpath(@__DIR__, pkgname, "src", "bardir"))
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg4" begin
+        # test for #52346 and https://github.com/JuliaLang/julia/issues/53859#issuecomment-2027352004
+        # If a pkgimage is not relocatable, no repeated precompilation should occur.
+        pkgname = "RelocationTestPkg4"
+        test_harness(empty_depot_path=false) do
+            push!(LOAD_PATH, @__DIR__)
+            # skip this dir to make the pkgimage not relocatable
+            filter!(DEPOT_PATH) do depot
+                !startswith(@__DIR__, depot)
+            end
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+            @test Base.isrelocatable(pkg) == false
+        end
+    end
+
+    @testset "#52161" begin
+        # Take the src files from two pkgs Example1 and Example2,
+        # which are each located in depot1 and depot2, respectively, and
+        # add them as include_dependency()s to a new pkg Foo, which will be precompiled into depot3.
+        # After loading the include_dependency()s of Foo should refer to depot1 depot2 each.
+        test_harness() do
+            mktempdir() do depot1
+                # precompile Example in depot1
+                example1_root = joinpath(depot1, "Example1")
+                mkpath(joinpath(example1_root, "src"))
+                open(joinpath(example1_root, "src", "Example1.jl"); write=true) do io
+                    println(io, """
+                    module Example1
+                    greet() = println("Hello from Example1!")
+                    end
+                    """)
+                end
+                open(joinpath(example1_root, "Project.toml"); write=true) do io
+                    println(io, """
+                    name = "Example1"
+                    uuid = "00000000-0000-0000-0000-000000000001"
+                    version = "1.0.0"
+                    """)
+                end
+                pushfirst!(LOAD_PATH, depot1); pushfirst!(DEPOT_PATH, depot1)
+                pkg = Base.identify_package("Example1"); Base.require(pkg)
+                mktempdir() do depot2
+                    # precompile Example in depot2
+                    example2_root = joinpath(depot2, "Example2")
+                    mkpath(joinpath(example2_root, "src"))
+                    open(joinpath(example2_root, "src", "Example2.jl"); write=true) do io
+                        println(io, """
+                        module Example2
+                        greet() = println("Hello from Example2!")
+                        end
+                        """)
+                    end
+                    open(joinpath(example2_root, "Project.toml"); write=true) do io
+                        println(io, """
+                        name = "Example2"
+                        uuid = "00000000-0000-0000-0000-000000000002"
+                        version = "1.0.0"
+                        """)
+                    end
+                    pushfirst!(LOAD_PATH, depot2); pushfirst!(DEPOT_PATH, depot2)
+                    pkg = Base.identify_package("Example2"); Base.require(pkg)
+                    mktempdir() do depot3
+                        # precompile Foo in depot3
+                        open(joinpath(depot3, "Module52161.jl"), write=true) do io
+                            println(io, """
+                            module Module52161
+                            using Example1
+                            using Example2
+                            srcfile1 = joinpath(pkgdir(Example1), "src", "Example1.jl")
+                            srcfile2 = joinpath(pkgdir(Example2), "src", "Example2.jl")
+                            include_dependency(srcfile1)
+                            include_dependency(srcfile2)
+                            end
+                            """)
+                        end
+                        pushfirst!(LOAD_PATH, depot3); pushfirst!(DEPOT_PATH, depot3)
+                        pkg = Base.identify_package("Module52161"); Base.compilecache(pkg)
+                        cachefile = joinpath(depot3, "compiled",
+                                             "v$(VERSION.major).$(VERSION.minor)", "Module52161.ji")
+                        _, (deps, _, _), _... = Base.parse_cache_header(cachefile)
+                        @test map(x -> x.filename, deps) ==
+                            [ joinpath(depot3, "Module52161.jl"),
+                              joinpath(depot1, "Example1", "src", "Example1.jl"),
+                              joinpath(depot2, "Example2", "src", "Example2.jl") ]
+                    end
+                end
+            end
+        end
+    end
+
+
+else
+
+    @testset "load stdlib from test/relocatedepot" begin
+        test_harness() do
+            push!(LOAD_PATH, "@stdlib")
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia"))
+            # stdlib should be already precompiled
+            pkg = Base.identify_package("DelimitedFiles")
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg1 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg2 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+            Base.require(pkg)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+            touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg2", "src", "foodir", "foofoo"))
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because tracked by mtime
+        end
+    end
+
+    @testset "load RelocationTestPkg3 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg3"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == true
+            touch(joinpath(@__DIR__, "relocatedepot", "RelocationTestPkg3", "src", "bardir", "barbar"))
+            @test Base.isprecompiled(pkg) == false
+            @test Base.isrelocatable(pkg) == false # because not precompiled
+        end
+    end
+
+    @testset "load RelocationTestPkg4 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg4"
+        test_harness() do
+            push!(LOAD_PATH, @__DIR__, "relocatedepot")
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot")) # required to find src files
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot", "julia")) # contains cache file
+            pkg = Base.identify_package(pkgname)
+            # precompiled but not relocatable
+            @test Base.isprecompiled(pkg) == true
+            @test Base.isrelocatable(pkg) == false
+        end
+    end
+
+end
diff --git a/test/rounding.jl b/test/rounding.jl
index 508a68032e083..6fad6f62e8dfe 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -57,7 +57,7 @@ end
         @test pu - pd == eps(pz)
     end
 
-    for T in [Float32,Float64]
+    for T in [Float16,Float32,Float64]
         for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
                   prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
                   pi,ℯ,eulergamma,catalan,golden,
@@ -351,3 +351,147 @@ end
         Base.Rounding.setrounding_raw(T, Base.Rounding.to_fenv(old))
     end
 end
+
+@testset "rounding floats with specified return type #50778" begin
+    @test round(Float64, 1.2) === 1.0
+    @test round(Float32, 1e60) === Inf32
+    x = floatmax(Float32)-1.0
+    @test round(Float32, x) == x
+end
+
+@testset "rounding complex numbers (#42060, #47128)" begin
+    # 42060
+    @test ceil(Complex(4.6, 2.2)) === Complex(5.0, 3.0)
+    @test floor(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test trunc(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test round(Complex(4.6, 2.2)) === Complex(5.0, 2.0)
+    @test ceil(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test floor(Complex(-4.6, -2.2)) === Complex(-5.0, -3.0)
+    @test trunc(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test round(Complex(-4.6, -2.2)) === Complex(-5.0, -2.0)
+
+    # 47128
+    @test round(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 2)
+    @test ceil(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 3)
+end
+
+@testset "rounding to custom integers" begin
+    struct Int50812 <: Integer
+        x::Int
+    end
+    @test round(Int50812, 1.2) === Int50812(1)
+    @test round(Int50812, π) === Int50812(3)
+    @test ceil(Int50812, π) === Int50812(4)
+end
+
+const MPFRRM = Base.MPFR.MPFRRoundingMode
+
+function mpfr_to_ieee(::Type{Float32}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_flt, Base.MPFR.libmpfr), Float32, (Ref{BigFloat}, MPFRRM), x, r)
+end
+function mpfr_to_ieee(::Type{Float64}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_d, Base.MPFR.libmpfr), Float64, (Ref{BigFloat}, MPFRRM), x, r)
+end
+
+function mpfr_to_ieee(::Type{G}, x::BigFloat, r::RoundingMode) where {G}
+    mpfr_to_ieee(G, x, convert(MPFRRM, r))
+end
+
+const mpfr_rounding_modes = map(
+    Base.Fix1(convert, MPFRRM),
+    (RoundNearest, RoundToZero, RoundFromZero, RoundDown, RoundUp)
+)
+
+sample_float(::Type{T}, e::Integer) where {T<:AbstractFloat} = ldexp(rand(T) + true, e)::T
+
+function float_samples(::Type{T}, exponents, n::Int) where {T<:AbstractFloat}
+    ret = T[]
+    for e ∈ exponents, i ∈ 1:n
+        push!(ret, sample_float(T, e), -sample_float(T, e))
+    end
+    ret
+end
+
+# a reasonable range of values for testing behavior between 1:200
+const fib200 = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 200]
+
+@testset "IEEEFloat(::BigFloat) against MPFR" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = exponent(floatmax(Float64)) + 10
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 82680 random values
+            for mpfr_rm ∈ mpfr_rounding_modes, bf ∈ bf_samples, F ∈ (Float32, Float64)
+                @test (
+                    mpfr_to_ieee(F, bf, mpfr_rm) ===
+                    F(bf, mpfr_rm) === F(bf, convert(RoundingMode, mpfr_rm))
+                )
+            end
+        end
+    end
+end
+
+const native_rounding_modes = (
+    RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp,
+    RoundToZero, RoundFromZero, RoundUp, RoundDown
+)
+
+# Checks that each rounding mode is faithful.
+@testset "IEEEFloat(::BigFloat) faithful rounding" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = 500
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 40040 random values
+            for rm ∈ (mpfr_rounding_modes..., Base.MPFR.MPFRRoundFaithful,
+                      native_rounding_modes...),
+                bf ∈ bf_samples,
+                F ∈ (Float16, Float32, Float64)
+                f = F(bf, rm)
+                @test (f === F(bf, RoundDown)) | (f === F(bf, RoundUp))
+            end
+        end
+    end
+end
+
+@testset "round(Int, -Inf16) should throw (#51113)" begin
+    @test_throws InexactError round(Int32, -Inf16)
+    @test_throws InexactError round(Int64, -Inf16)
+    @test_throws InexactError round(Int128, -Inf16)
+    # More comprehensive testing is present in test/floatfuncs.jl
+end
+
+@testset "floor(<:AbstractFloat, large_number) (#52355)" begin
+    @test floor(Float32, 0xffff_ffff) == prevfloat(2f0^32) <= 0xffff_ffff
+    @test trunc(Float16, typemax(UInt128)) == floatmax(Float16)
+    @test round(Float16, typemax(UInt128)) == Inf16
+    for i in [-BigInt(floatmax(Float64)), -BigInt(floatmax(Float64))*100, BigInt(floatmax(Float64)), BigInt(floatmax(Float64))*100]
+        f = ceil(Float64, i)
+        @test f >= i
+        @test isinteger(f) || isinf(f)
+        @test prevfloat(f) < i
+    end
+end
+
+@testset "π to `BigFloat` with `setrounding`" begin
+    function irrational_to_big_float(c::AbstractIrrational)
+        BigFloat(c)
+    end
+
+    function irrational_to_big_float_with_rounding_mode(c::AbstractIrrational, rm::RoundingMode)
+        f = () -> irrational_to_big_float(c)
+        setrounding(f, BigFloat, rm)
+    end
+
+    function irrational_to_big_float_with_rounding_mode_and_precision(c::AbstractIrrational, rm::RoundingMode, prec::Int)
+        f = () -> irrational_to_big_float_with_rounding_mode(c, rm)
+        setprecision(f, BigFloat, prec)
+    end
+
+    for c ∈ (π, MathConstants.γ, MathConstants.catalan)
+        for p ∈ 1:40
+            @test (
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundDown, p) < c <
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundUp, p)
+            )
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 1264acae985b0..fd0326d48ee6c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,12 +3,17 @@
 using Test
 using Distributed
 using Dates
-import REPL
+if !Sys.iswindows() && isa(stdin, Base.TTY)
+    import REPL
+end
 using Printf: @sprintf
 using Base: Experimental
 
 include("choosetests.jl")
 include("testenv.jl")
+include("buildkitetestjson.jl")
+
+using .BuildkiteTestJSON
 
 (; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
 tests = unique(tests)
@@ -41,19 +46,6 @@ else
 end
 limited_worker_rss = max_worker_rss != typemax(Csize_t)
 
-function test_path(test)
-    t = split(test, '/')
-    if t[1] in STDLIBS
-        if length(t) == 2
-            return joinpath(STDLIB_DIR, t[1], "test", t[2])
-        else
-            return joinpath(STDLIB_DIR, t[1], "test", "runtests")
-        end
-    else
-        return joinpath(@__DIR__, test)
-    end
-end
-
 # Check all test files exist
 isfiles = isfile.(test_path.(tests) .* ".jl")
 if !all(isfiles)
@@ -84,12 +76,14 @@ move_to_node1("stress")
 # since it starts a lot of workers and can easily exceed the maximum memory
 limited_worker_rss && move_to_node1("Distributed")
 
-# Shuffle LinearAlgebra tests to the front, because they take a while, so we might
+# Move LinearAlgebra and Pkg tests to the front, because they take a while, so we might
 # as well get them all started early.
-linalg_test_ids = findall(x->occursin("LinearAlgebra", x), tests)
-linalg_tests = tests[linalg_test_ids]
-deleteat!(tests, linalg_test_ids)
-prepend!(tests, linalg_tests)
+for prependme in ["LinearAlgebra", "Pkg"]
+    prependme_test_ids = findall(x->occursin(prependme, x), tests)
+    prependme_tests = tests[prependme_test_ids]
+    deleteat!(tests, prependme_test_ids)
+    prepend!(tests, prependme_tests)
+end
 
 import LinearAlgebra
 cd(@__DIR__) do
@@ -124,6 +118,7 @@ cd(@__DIR__) do
 
     println("""
         Running parallel tests with:
+          getpid() = $(getpid())
           nworkers() = $(nworkers())
           nthreads() = $(Threads.threadpoolsize())
           Sys.CPU_THREADS = $(Sys.CPU_THREADS)
@@ -421,14 +416,20 @@ cd(@__DIR__) do
         Test.record(o_ts, fake)
         Test.pop_testset()
     end
+
+    if Base.get_bool_env("CI", false)
+        @info "Writing test result data to $(@__DIR__)"
+        write_testset_json_files(@__DIR__, o_ts)
+    end
+
     Test.TESTSET_PRINT_ENABLE[] = true
     println()
     # o_ts.verbose = true # set to true to show all timings when successful
     Test.print_test_results(o_ts, 1)
     if !o_ts.anynonpass
-        println("    \033[32;1mSUCCESS\033[0m")
+        printstyled("    SUCCESS\n"; bold=true, color=:green)
     else
-        println("    \033[31;1mFAILURE\033[0m\n")
+        printstyled("    FAILURE\n\n"; bold=true, color=:red)
         skipped > 0 &&
             println("$skipped test", skipped > 1 ? "s were" : " was", " skipped due to failure.")
         println("The global RNG seed was 0x$(string(seed, base = 16)).\n")
diff --git a/test/ryu.jl b/test/ryu.jl
index 0b10bd7e49ba5..4acd2fd08df50 100644
--- a/test/ryu.jl
+++ b/test/ryu.jl
@@ -558,6 +558,11 @@ end # Float16
         @test Ryu.writefixed(1.25e+5, 1, false, false, false, UInt8('.'), true) == "125000"
         @test Ryu.writefixed(1.25e+5, 2, false, false, false, UInt8('.'), true) == "125000"
     end
+
+    @test Ryu.writefixed(100.0-eps(100.0), 0, false, false, true, UInt8('.'), false) == "100."
+    @test Ryu.writefixed(-100.0+eps(-100.0), 0, false, false, true, UInt8('.'), false) == "-100."
+    @test Ryu.writefixed(100.0-eps(100.0), 1, false, false, true, UInt8('.'), false) == "100.0"
+    @test Ryu.writefixed(-100.0+eps(-100.0), 1, false, false, true, UInt8('.'), false) == "-100.0"
 end # fixed
 
 @testset "Ryu.writeexp" begin
diff --git a/test/scopedvalues.jl b/test/scopedvalues.jl
new file mode 100644
index 0000000000000..174bc690ac0a2
--- /dev/null
+++ b/test/scopedvalues.jl
@@ -0,0 +1,184 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Base.ScopedValues
+
+include(joinpath(@__DIR__,"../Compiler/test/irutils.jl"))
+
+@testset "errors" begin
+    @test ScopedValue{Float64}(1)[] == 1.0
+    @test_throws InexactError ScopedValue{Int}(1.5)
+    let val = ScopedValue(1)
+        @test_throws MethodError val[] = 2
+        with() do
+            @test_throws MethodError val[] = 2
+        end
+    end
+    let val = ScopedValue{String}()
+        @test_throws KeyError val[]
+    end
+    let val = ScopedValue{Int}()
+        @test_throws KeyError val[]
+    end
+    @test_throws MethodError ScopedValue()
+end
+
+const sval = ScopedValue(1)
+@testset "inheritance" begin
+    @test sval[] == 1
+    with() do
+        @test sval[] == 1
+        with() do
+            @test sval[] == 1
+        end
+        with(sval => 2) do
+            @test sval[] == 2
+        end
+        @test sval[] == 1
+    end
+    @test sval[] == 1
+end
+
+const sval_float = ScopedValue(1.0)
+
+@testset "multiple scoped values" begin
+    with(sval => 2, sval_float => 2.0) do
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    with(sval => 2, sval => 3) do
+        @test sval[] == 3
+    end
+end
+
+emptyf() = nothing
+
+@testset "conversion" begin
+    with(emptyf, sval_float=>2)
+    @test_throws MethodError with(emptyf, sval_float=>"hello")
+    a = ScopedValue(1)
+    with(a => 2.0) do
+        @test a[] == 2
+        @test a[] isa Int
+    end
+    a = ScopedValue(1.0)
+    with(a => 2) do
+        @test a[] == 2.0
+        @test a[] isa Float64
+    end
+end
+
+import Base.Threads: @spawn
+@testset "tasks" begin
+    @test fetch(@spawn begin
+        sval[]
+    end) == 1
+    with(sval => 2) do
+        @test fetch(@spawn begin
+            sval[]
+        end) == 2
+    end
+end
+
+@testset "show" begin
+    @test sprint(show, ScopedValue{Int}()) == "Base.ScopedValues.ScopedValue{$Int}(undefined)"
+    @test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(1)"
+    @test sprint(show, Core.current_scope()) == "nothing"
+    with(sval => 2.0) do
+        @test sprint(show, sval) == "Base.ScopedValues.ScopedValue{$Int}(2)"
+        objid = sprint(show, Base.objectid(sval))
+        @test sprint(show, Core.current_scope()) == "Base.ScopedValues.Scope(Base.ScopedValues.ScopedValue{$Int}@$objid => 2)"
+    end
+end
+
+const depth = ScopedValue(0)
+function nth_with(f, n)
+    if n <= 0
+        f()
+    else
+        with(depth => n) do
+            nth_with(f, n-1)
+        end
+    end
+end
+
+
+@testset "nested with" begin
+    @testset for depth in 1:16
+        nth_with(depth) do
+            @test sval_float[] == 1.0
+        end
+        with(sval_float=>2.0) do
+            nth_with(depth) do
+                @test sval_float[] == 2.0
+            end
+        end
+        nth_with(depth) do
+            with(sval_float=>2.0) do
+                @test sval_float[] == 2.0
+            end
+        end
+    end
+    with(sval_float=>2.0) do
+        nth_with(15) do
+            @test sval_float[] == 2.0
+            with(sval_float => 3.0) do
+                @test sval_float[] == 3.0
+            end
+        end
+    end
+end
+
+@testset "macro" begin
+    @with sval=>2 sval_float=>2.0 begin
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    # Doesn't do much...
+    @with begin
+        @test sval[] == 1
+        @test sval_float[] == 1.0
+    end
+    @with sval=>2 sval_float=>2.0 begin
+        @with begin
+            @test sval[] == 2
+            @test sval_float[] == 2.0
+        end
+    end
+end
+
+@testset "isassigned" begin
+    sv = ScopedValue(1)
+    @test isassigned(sv)
+    sv = ScopedValue{Int}()
+    @test !isassigned(sv)
+    with(sv => 2) do
+        @test isassigned(sv)
+    end
+end
+
+# Test that the `@with` macro doesn't introduce unnecessary PhiC nodes
+# (which can be hard for the optimizer to remove).
+function with_macro_slot_cross()
+    a = 1
+    @with sval=>1 begin
+        a = sval_float[]
+    end
+    return a
+end
+
+let code = code_typed(with_macro_slot_cross)[1][1].code
+    @test !any(x->isa(x, Core.PhiCNode), code)
+end
+
+# inline constant scoped values
+const inlineable_const_sv = ScopedValue(1)
+@test fully_eliminated(; retval=(inlineable_const_sv => 1)) do
+    inlineable_const_sv => 1
+end
+
+# Handle nothrow scope bodies correctly (#56609)
+@eval function nothrow_scope()
+    $(Expr(:tryfinally, :(), nothing, 1))
+    @test Core.current_scope() === nothing
+end
+nothrow_scope()
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index 976c757deea57..703552570745c 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -1,7 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt
+
 using Base: SecretBuffer, SecretBuffer!, shred!, isshredded
-using Test
+using Test, Random
 
 @testset "SecretBuffer" begin
     @testset "original unmodified" begin
@@ -129,4 +133,70 @@ using Test
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr)
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr, 0)
     end
+
+    @testset "copiers" begin
+        s1 = SecretBuffer()
+        write(s1, "hello world")
+        seekstart(s1)
+
+        s2 = copy(s1)
+        write(s2, 'c')
+        seekstart(s2)
+
+        @test read(s1) == codeunits("hello world")
+        @test read(s2) == codeunits("cello world")
+
+        shred!(s1)
+        @test isshredded(s1)
+        @test !isshredded(s2)
+        shred!(s2)
+
+        # Copying into a bigger destination
+        s3 = SecretBuffer()
+        s4 = SecretBuffer()
+        write(s3, "original")
+        seekstart(s3)
+        write(s4, randstring(1234))
+        s4data = s4.data
+        copy!(s4, s3)
+        @test s3.data == s4.data
+        @test read(s3) == read(s4) == codeunits("original")
+        @test all(iszero, s4data)
+        shred!(s3); shred!(s4)
+
+        # Copying into a smaller destination
+        s5 = SecretBuffer()
+        s6 = SecretBuffer("sekrit")
+        str = randstring(321)
+        write(s5, str)
+        seekstart(s5)
+        copy!(s6, s5)
+        @test read(s5) == read(s6) == codeunits(str)
+        shred!(s5); shred!(s6)
+    end
+
+    if !Sys.iswindows()
+        @testset "getpass" begin
+            v1, s1 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I"); (read(s), Base.shred!(s))), ["LPAwVZM8D4I: " => "too many secrets\n"])
+            s2 = SecretBuffer("too many secrets")
+            @test s1 isa SecretBuffer
+            @test isshredded(s1)
+            @test v1 == read(s2) == codeunits("too many secrets")
+            shred!(s1); shred!(s2)
+
+            v3, s3 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=false); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> " => "frperg\n"])
+            s4 = SecretBuffer("frperg")
+            @test s3 isa SecretBuffer
+            @test isshredded(s3)
+            @test v3 == read(s4) == codeunits("frperg")
+            shred!(s3); shred!(s4)
+
+            v5, s5 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=true); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> : " => "frperg\n"])
+            s6 = SecretBuffer("frperg")
+            @test s5 isa SecretBuffer
+            @test isshredded(s5)
+            @test v5 == read(s6) == codeunits("frperg")
+            shred!(s5); shred!(s6)
+        end
+    end
 end
diff --git a/test/sets.jl b/test/sets.jl
index 65444153c90d9..4d52cb243620c 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -124,7 +124,40 @@ end
     @test isempty(s)
     @test_throws ArgumentError pop!(s)
     @test length(Set(['x',120])) == 2
+
+    # Test that pop! returns the element in the set, not the query
+    s = Set{Any}(Any[0x01, UInt(2), 3, 4.0])
+    @test pop!(s, 1) === 0x01
+    @test pop!(s, 2) === UInt(2)
+    @test pop!(s, 3) === 3
+    @test pop!(s, 4) === 4.0
+    @test_throws KeyError pop!(s, 5)
+end
+
+@testset "in!" begin
+    s = Set()
+    @test !(in!(0x01, s))
+    @test !(in!(Int32(2), s))
+    @test in!(1, s)
+    @test in!(2.0, s)
+    (a, b, c...) = sort!(collect(s))
+    @test a === 0x01
+    @test b === Int32(2)
+    @test isempty(c)
+
+    # in! will convert to the right type automatically
+    s = Set{Int32}()
+    @test !(in!(1, s))
+    @test only(s) === Int32(1)
+    @test_throws Exception in!("hello", s)
+
+    # Other set types
+    s = BitSet()
+    @test !(in!(13, s))
+    @test in!(UInt16(13), s)
+    @test only(s) === 13
 end
+
 @testset "copy" begin
     data_in = (1,2,9,8,4)
     s = Set(data_in)
@@ -164,6 +197,19 @@ end
     sizehint!(s2, 10)
     @test s2 == GenericSet(s)
 end
+
+@testset "shrinking" begin # Similar test as for the underlying Dict
+    d = Set(i for i = 1:1000)
+    filter!(x -> x < 10, d)
+    sizehint!(d, 10)
+    @test length(d.dict.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.dict.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.dict.slots) < 1000
+end
+
 @testset "rehash!" begin
     # Use a pointer type to have defined behavior for uninitialized
     # array element
@@ -364,7 +410,9 @@ end
             @test issubset(intersect(l,r), r)
             @test issubset(l, union(l,r))
             @test issubset(r, union(l,r))
+            @test issubset(union(l,r))(r)
             @test isdisjoint(l,l) == isempty(l)
+            @test isdisjoint(l)(l) == isempty(l)
             @test isdisjoint(l,r) == isempty(intersect(l,r))
             if S === Vector
                 @test sort(union(intersect(l,r),symdiff(l,r))) == sort(union(l,r))
@@ -381,6 +429,15 @@ end
             @test ⊋(S([1,2]), S([1]))
             @test !⊋(S([1]), S([1]))
             @test ⊉(S([1]), S([2]))
+
+            @test ⊆(S([1,2]))(S([1]))
+            @test ⊊(S([1,2]))(S([1]))
+            @test !⊊(S([1]))(S([1]))
+            @test ⊈(S([2]))(S([1]))
+            @test ⊇(S([1]))(S([1,2]))
+            @test ⊋(S([1]))(S([1,2]))
+            @test !⊋(S([1]))(S([1]))
+            @test ⊉(S([2]))(S([1]))
         end
         let s1 = S([1,2,3,4])
             @test s1 !== symdiff(s1) == s1
@@ -548,6 +605,9 @@ end
     @test !allunique([1,1,2])
     @test !allunique([:a,:b,:c,:a])
     @test allunique(unique(randn(100)))  # longer than 32
+    @test allunique(collect(1:100)) # sorted/unique && longer than 32
+    @test allunique(collect(100:-1:1)) # sorted/unique && longer than 32
+    @test !allunique(fill(1,100)) # sorted/repeating && longer than 32
     @test allunique(collect('A':'z')) # 58-element Vector{Char}
     @test !allunique(repeat(1:99, 1, 2))
     @test !allunique(vcat(pi, randn(1998), pi))  # longer than 1000
@@ -582,21 +642,39 @@ end
     @test !allunique((1,2,3,4,3))
     @test allunique((0.0, -0.0))
     @test !allunique((NaN, NaN))
+    # Known length 1, need not evaluate:
+    @test allunique(error(x) for x in [1])
+    # @test_opt allunique(Int[])
+end
+
+@testset "allunique(f, xs)" begin
+    @test allunique(sin, 1:3)
+    @test !allunique(sin, [1,2,3,1])
+    @test allunique(sin, (1, 2, pi, im))  # eltype Any
+    @test allunique(abs2, 1:100)
+    @test !allunique(abs, -10:10)
+    @test allunique(abs2, Vector{Any}(1:100))
+    # These cases don't call the function at all:
+    @test allunique(error, [])
+    @test allunique(error, [1])
 end
 
 @testset "allequal" begin
+    # sets & dictionaries
     @test allequal(Set())
     @test allequal(Set(1))
     @test !allequal(Set([1, 2]))
     @test allequal(Dict())
     @test allequal(Dict(:a => 1))
     @test !allequal(Dict(:a => 1, :b => 2))
+    # vectors
     @test allequal([])
     @test allequal([1])
     @test allequal([1, 1])
     @test !allequal([1, 1, 2])
     @test allequal([:a, :a])
     @test !allequal([:a, :b])
+    # ranges
     @test !allequal(1:2)
     @test allequal(1:1)
     @test !allequal(4.0:0.3:7.0)
@@ -610,6 +688,26 @@ end
     @test allequal(LinRange(1, 1, 1))
     @test allequal(LinRange(1, 1, 2))
     @test !allequal(LinRange(1, 2, 2))
+    # Known length 1, need not evaluate:
+    @test allequal(error(x) for x in [1])
+    # Empty, but !haslength:
+    @test allequal(error(x) for x in 1:3 if false)
+end
+
+@testset "allequal(f, xs)" begin
+    @test allequal(abs2, [3, -3])
+    @test allequal(x -> 1, rand(3))
+    @test !allequal(x -> rand(), [1,1,1])
+    # tuples
+    @test allequal(abs2, (3, -3))
+    @test allequal(x -> 1, Tuple(rand(3)))
+    @test !allequal(x -> rand(), (1,1,1))
+    # These cases don't call the function at all:
+    @test allequal(error, [])
+    @test allequal(error, ())
+    @test allequal(error, (x for x in 1:3 if false))
+    @test allequal(error, [1])
+    @test allequal(error, (1,))
 end
 
 @testset "filter(f, ::$S)" for S = (Set, BitSet)
@@ -810,6 +908,28 @@ end
     @test replace((NaN, 1.0), NaN=>0.0) === (0.0, 1.0)
     @test replace([1, missing], missing=>0) == [1, 0]
     @test replace((1, missing), missing=>0) === (1, 0)
+
+    # test that MethodError is thrown for pairs
+    @test_throws MethodError replace(identity, 1=>2)
+    @test_throws MethodError replace(identity, 1=>2, 3=>4)
+    @test_throws MethodError replace!(identity, 1=>2)
+    @test_throws MethodError replace!(identity, 1=>2, 3=>4)
+
+    # test replace and friends for AbstractDicts
+    d1 = GenericDict(Dict(1=>2, 3=>4))
+    d2 = replace(d1, (1=>2) => (1=>"a"))
+    @test d2 == Dict(1=>"a", 3=>4)
+    @test d2 isa Dict{Int, Any}
+    @test d1 === replace!(d1, (1=>2) => (1=>-2))
+    @test d1 == Dict(1=>-2, 3=>4)
+
+    dd = Dict(1=>2, 3=>1, 5=>1, 7=>1)
+    for d1 in (dd, GenericDict(dd))
+        @test replace(d1, (1=>2) => (1=>"a"), count=0) == d1
+        d2 = replace(kv->(kv[2] == 1 ? kv[1]=>2 : kv), d1, count=2)
+        @test count(==(2), values(d2)) == 3
+        @test count(==(1), values(d2)) == 1
+    end
 end
 
 @testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
@@ -817,8 +937,8 @@ end
     b = [2, 3, 1, 3]
     ua = unique(a)
     ub = unique(b)
-    for TA in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
-        TB in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
+    for TA in (Tuple, identity, Set, BitSet, IdSet{Int}),
+        TB in (Tuple, identity, Set, BitSet, IdSet{Int}),
         uA = false:true,
         uB = false:true
         A = TA(uA ? ua : a)
@@ -837,7 +957,9 @@ end
         @test !(B ⊉ A)
         @test !issetequal(A, B)
         @test !issetequal(B, A)
-        for T = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
+        @test !issetequal(B)(A)
+        @test !issetequal(A)(B)
+        for T = (Tuple, identity, Set, BitSet, IdSet{Int})
             @test issetequal(A, T(A))
             @test issetequal(B, T(B))
         end
@@ -898,7 +1020,7 @@ end
     c = [3]
     d = [4]
     e = [5]
-    A = Base.IdSet{Vector{Int}}([a, b, c, d])
+    A = IdSet{Vector{Int}}([a, b, c, d])
     @test !isempty(A)
     B = copy(A)
     @test A ⊆ B
@@ -915,6 +1037,8 @@ end
     @test !isempty(A)
     A = empty!(A)
     @test isempty(A)
+    @test isnothing(sizehint!(A, 10))
+    @test Base.copymutable(A) == copy(A)
 end
 
 @testset "⊊, ⊋" begin
@@ -931,4 +1055,6 @@ end
     end
     set = TestSet{Any}()
     @test sizehint!(set, 1) === set
+    @test sizehint!(set, 1; shrink = true) === set
+    @test sizehint!(set, 1; shrink = false) === set
 end
diff --git a/test/show.jl b/test/show.jl
index 25c5a49372054..75f04c1e02096 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -326,7 +326,7 @@ end
             # line meta
             if d < 0
                 # line meta
-                error(\"dimension size must be nonnegative (got \$d)\")
+                error(\"dimension size must be non-negative (got \$d)\")
             end
             # line meta
             n *= d
@@ -523,6 +523,13 @@ end
 # Hidden macro names
 @test sprint(show, Expr(:macrocall, Symbol("@#"), nothing, :a)) == ":(@var\"#\" a)"
 
+# Test that public expressions are rendered nicely
+# though they are hard to create with quotes because public is not a context dependant keyword
+@test sprint(show, Expr(:public, Symbol("@foo"))) == ":(public @foo)"
+@test sprint(show, Expr(:public, :f,:o,:o)) == ":(public f, o, o)"
+s = sprint(show, :(module A; public x; end))
+@test match(r"^:\(module A\n  #= .* =#\n  #= .* =#\n  public x\n  end\)$", s) !== nothing
+
 # PR #38418
 module M1 var"#foo#"() = 2 end
 @test occursin("M1.var\"#foo#\"", sprint(show, M1.var"#foo#", context = :module=>@__MODULE__))
@@ -696,7 +703,7 @@ let oldout = stdout, olderr = stderr
         redirect_stderr(olderr)
         close(wrout)
         close(wrerr)
-        @test fetch(out) == "Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
+        @test fetch(out) == "primitive type Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
         @test fetch(err) == "TESTA\nTESTB\nΑ1Β2\"A\"\n"
     finally
         redirect_stdout(oldout)
@@ -748,6 +755,69 @@ end
 
 @test startswith(sprint(show, typeof(x->x), context = :module=>@__MODULE__), "var\"")
 
+# PR 53719
+module M53719
+    f = x -> x + 1
+    function foo(x)
+        function bar(y)
+            function baz(z)
+                return x + y + z
+            end
+            return baz
+        end
+        return bar
+    end
+    function foo2(x)
+        function bar2(y)
+            return z -> x + y + z
+        end
+        return bar2
+    end
+    lambda1 = (x)->begin
+        function foo(y)
+            return x + y
+        end
+        return foo
+    end
+    lambda2 = (x)->begin
+        y -> x + y
+    end
+end
+
+@testset "PR 53719 function names" begin
+    # M53719.f should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.f, context = :module=>M53719))
+    # M53719.foo(1) should be printed as var"#bar"
+    @test occursin(r"var\"#bar", sprint(show, M53719.foo(1), context = :module=>M53719))
+    # M53719.foo(1)(2) should be printed as var"#baz"
+    @test occursin(r"var\"#baz", sprint(show, M53719.foo(1)(2), context = :module=>M53719))
+    # M53719.foo2(1) should be printed as var"#bar2"
+    @test occursin(r"var\"#bar2", sprint(show, M53719.foo2(1), context = :module=>M53719))
+    # M53719.foo2(1)(2) should be printed as var"#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+", sprint(show, M53719.foo2(1)(2), context = :module=>M53719))
+    # M53719.lambda1(1) should be printed as var"#foo"
+    @test occursin(r"var\"#foo", sprint(show, M53719.lambda1(1), context = :module=>M53719))
+    # M53719.lambda2(1) should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.lambda2(1), context = :module=>M53719))
+end
+
+@testset "PR 53719 function types" begin
+    # typeof(M53719.f) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.f), context = :module=>M53719))
+    #typeof(M53719.foo(1)) should be printed as var"#bar#foo##[0-9]+"
+    @test occursin(r"var\"#bar#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)), context = :module=>M53719))
+    #typeof(M53719.foo(1)(2)) should be printed as var"#baz#foo##[0-9]+"
+    @test occursin(r"var\"#baz#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)(2)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)) should be printed as var"#bar2#foo2##[0-9]+"
+    @test occursin(r"var\"#bar2#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)(2)) should be printed as var"#foo2##[0-9]+#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)(2)), context = :module=>M53719))
+    #typeof(M53719.lambda1(1)) should be printed as var"#foo#[0-9]+"
+    @test occursin(r"var\"#foo#[0-9]+", sprint(show, typeof(M53719.lambda1(1)), context = :module=>M53719))
+    #typeof(M53719.lambda2(1)) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.lambda2(1)), context = :module=>M53719))
+end
+
 #test methodshow.jl functions
 @test Base.inbase(Base)
 @test !Base.inbase(LinearAlgebra)
@@ -858,19 +928,19 @@ end
 # string show with elision
 @testset "string show with elision" begin
     @testset "elision logic" begin
-        strs = ["A", "∀", "∀A", "A∀", "😃"]
+        strs = ["A", "∀", "∀A", "A∀", "😃", "x̂"]
         for limit = 0:100, len = 0:100, str in strs
             str = str^len
             str = str[1:nextind(str, 0, len)]
             out = sprint() do io
                 show(io, MIME"text/plain"(), str; limit)
             end
-            lower = length("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
+            lower = textwidth("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
             limit = max(limit, lower)
-            if length(str) + 2 ≤ limit
+            if textwidth(str) + 2 ≤ limit+1 && !contains(out, '⋯')
                 @test eval(Meta.parse(out)) == str
             else
-                @test limit-!isascii(str) <= length(out) <= limit
+                @test limit-2 <= textwidth(out) <= limit
                 re = r"(\"[^\"]*\") ⋯ (\d+) bytes ⋯ (\"[^\"]*\")"
                 m = match(re, out)
                 head = eval(Meta.parse(m.captures[1]))
@@ -886,11 +956,11 @@ end
 
     @testset "default elision limit" begin
         r = replstr("x"^1000)
-        @test length(r) == 7*80
-        @test r == repr("x"^271) * " ⋯ 459 bytes ⋯ " * repr("x"^270)
+        @test length(r) == 7*80-1
+        @test r == repr("x"^270) * " ⋯ 460 bytes ⋯ " * repr("x"^270)
         r = replstr(["x"^1000])
         @test length(r) < 120
-        @test r == "1-element Vector{String}:\n " * repr("x"^31) * " ⋯ 939 bytes ⋯ " * repr("x"^30)
+        @test r == "1-element Vector{String}:\n " * repr("x"^30) * " ⋯ 940 bytes ⋯ " * repr("x"^30)
     end
 end
 
@@ -1205,6 +1275,7 @@ let x = [], y = [], z = Base.ImmutableDict(x => y)
     push!(y, x)
     push!(y, z)
     @test replstr(x) == "1-element Vector{Any}:\n Any[Any[#= circular reference @-2 =#], Base.ImmutableDict{Vector{Any}, Vector{Any}}([#= circular reference @-3 =#] => [#= circular reference @-2 =#])]"
+    @test replstr(x, :color => true) == "1-element Vector{Any}:\n Any[Any[\e[33m#= circular reference @-2 =#\e[39m], Base.ImmutableDict{Vector{Any}, Vector{Any}}([\e[33m#= circular reference @-3 =#\e[39m] => [\e[33m#= circular reference @-2 =#\e[39m])]"
     @test repr(z) == "Base.ImmutableDict{Vector{Any}, Vector{Any}}([Any[Any[#= circular reference @-2 =#], Base.ImmutableDict{Vector{Any}, Vector{Any}}(#= circular reference @-3 =#)]] => [Any[Any[#= circular reference @-2 =#]], Base.ImmutableDict{Vector{Any}, Vector{Any}}(#= circular reference @-2 =#)])"
     @test sprint(dump, x) == """
         Array{Any}((1,))
@@ -1243,70 +1314,73 @@ end
 @testset "PR 17117: print_array" begin
     s = IOBuffer(Vector{UInt8}(), read=true, write=true)
     Base.print_array(s, [1, 2, 3])
-    @test String(resize!(s.data, s.size)) == " 1\n 2\n 3"
+    @test String(take!(s)) == " 1\n 2\n 3"
     close(s)
     s2 = IOBuffer(Vector{UInt8}(), read=true, write=true)
     z = zeros(0,0,0,0,0,0,0,0)
     Base.print_array(s2, z)
-    @test String(resize!(s2.data, s2.size)) == ""
+    @test String(take!(s2)) == ""
     close(s2)
 end
 
-let repr = sprint(dump, :(x = 1))
-    @test repr == "Expr\n  head: Symbol =\n  args: Array{Any}((2,))\n    1: Symbol x\n    2: $Int 1\n"
-end
-let repr = sprint(dump, Pair{String,Int64})
-    @test repr == "Pair{String, Int64} <: Any\n  first::String\n  second::Int64\n"
-end
-let repr = sprint(dump, Tuple)
-    @test repr == "Tuple <: Any\n"
-end
-let repr = sprint(dump, Int64)
-    @test repr == "Int64 <: Signed\n"
-end
-let repr = sprint(dump, Any)
-    @test length(repr) == 4
-    @test occursin(r"^Any\n", repr)
-    @test endswith(repr, '\n')
-end
-let repr = sprint(dump, Integer)
-    @test occursin("Integer <: Real", repr)
-    @test !occursin("Any", repr)
-end
-let repr = sprint(dump, Union{Integer, Float32})
-    @test repr == "Union{Integer, Float32}\n" || repr == "Union{Float32, Integer}\n"
-end
 module M30442
     struct T end
 end
-let repr = sprint(show, Union{String, M30442.T})
-    @test repr == "Union{$(curmod_prefix)M30442.T, String}" ||
-          repr == "Union{String, $(curmod_prefix)M30442.T}"
-end
-let repr = sprint(dump, Ptr{UInt8}(UInt(1)))
-    @test repr == "Ptr{UInt8} @$(Base.repr(UInt(1)))\n"
-end
-let repr = sprint(dump, Core.svec())
-    @test repr == "empty SimpleVector\n"
-end
-let repr = sprint(dump, sin)
-    @test repr == "sin (function of type typeof(sin))\n"
-end
-let repr = sprint(dump, Test)
-    @test repr == "Module Test\n"
-end
-let repr = sprint(dump, nothing)
-    @test repr == "Nothing nothing\n"
-end
-let a = Vector{Any}(undef, 10000)
-    a[2] = "elemA"
-    a[4] = "elemB"
-    a[11] = "elemC"
-    repr = sprint(dump, a; context=(:limit => true), sizehint=0)
-    @test repr == "Array{Any}((10000,))\n  1: #undef\n  2: String \"elemA\"\n  3: #undef\n  4: String \"elemB\"\n  5: #undef\n  ...\n  9996: #undef\n  9997: #undef\n  9998: #undef\n  9999: #undef\n  10000: #undef\n"
-end
-@test occursin("NamedTuple", sprint(dump, NamedTuple))
+@testset "Dump types" begin
+    let repr = sprint(dump, :(x = 1))
+        @test repr == "Expr\n  head: Symbol =\n  args: Array{Any}((2,))\n    1: Symbol x\n    2: $Int 1\n"
+    end
+    let repr = sprint(dump, Pair{String,Int64})
+        @test repr == "struct Pair{String, Int64} <: Any\n  first::String\n  second::Int64\n"
+    end
+    let repr = sprint(dump, Tuple)
+        @test repr == "Tuple <: Any\n"
+    end
+    let repr = sprint(dump, Int64)
+        @test repr == "primitive type Int64 <: Signed\n"
+    end
+    let repr = sprint(dump, Any)
+        @test repr == "abstract type Any\n"
+    end
+    let repr = sprint(dump, Integer)
+        @test occursin("abstract type Integer <: Real", repr)
+        @test !occursin("Any", repr)
+    end
+    let repr = sprint(dump, Union{Integer, Float32})
+        @test repr == "Union{Integer, Float32}\n" || repr == "Union{Float32, Integer}\n"
+    end
 
+    let repr = sprint(show, Union{String, M30442.T})
+        @test repr == "Union{$(curmod_prefix)M30442.T, String}" ||
+              repr == "Union{String, $(curmod_prefix)M30442.T}"
+    end
+    let repr = sprint(dump, Ptr{UInt8}(UInt(1)))
+        @test repr == "Ptr{UInt8}($(Base.repr(UInt(1))))\n"
+    end
+    let repr = sprint(dump, Core.svec())
+        @test repr == "empty SimpleVector\n"
+    end
+    let repr = sprint(dump, sin)
+        @test repr == "sin (function of type typeof(sin))\n"
+    end
+    let repr = sprint(dump, Test)
+        @test repr == "Module Test\n"
+    end
+    let repr = sprint(dump, nothing)
+        @test repr == "Nothing nothing\n"
+    end
+    let a = Vector{Any}(undef, 10000)
+        a[2] = "elemA"
+        a[4] = "elemB"
+        a[11] = "elemC"
+        repr = sprint(dump, a; context=(:limit => true), sizehint=0)
+        @test repr == "Array{Any}((10000,))\n  1: #undef\n  2: String \"elemA\"\n  3: #undef\n  4: String \"elemB\"\n  5: #undef\n  ...\n  9996: #undef\n  9997: #undef\n  9998: #undef\n  9999: #undef\n  10000: #undef\n"
+    end
+    @test occursin("NamedTuple", sprint(dump, NamedTuple))
+
+    # issue 36495, dumping a partial NamedTupled shouldn't error
+    @test occursin("NamedTuple", sprint(dump, NamedTuple{(:foo,:bar)}))
+end
 # issue #17338
 @test repr(Core.svec(1, 2)) == "svec(1, 2)"
 
@@ -1361,6 +1435,9 @@ test_repr("(:).a")
 @test repr(Tuple{Float32, Float32, Float32}) == "Tuple{Float32, Float32, Float32}"
 @test repr(Tuple{String, Int64, Int64, Int64}) == "Tuple{String, Int64, Int64, Int64}"
 @test repr(Tuple{String, Int64, Int64, Int64, Int64}) == "Tuple{String, Vararg{Int64, 4}}"
+@test repr(NTuple) == "NTuple{N, T} where {N, T}"
+@test repr(Tuple{NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "NTuple{5, NTuple{N, T} where T} where N"
+@test repr(Tuple{Float64, NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "Tuple{Float64, Vararg{NTuple{N, T} where T, 5}} where N"
 
 # Test printing of NamedTuples using the macro syntax
 @test repr(@NamedTuple{kw::Int64}) == "@NamedTuple{kw::Int64}"
@@ -1368,22 +1445,26 @@ test_repr("(:).a")
 @test repr(@NamedTuple{kw::@NamedTuple{kw2::Int64}}) == "@NamedTuple{kw::@NamedTuple{kw2::Int64}}"
 @test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}"
 @test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}"
+@test repr(@NamedTuple{var"#"::Int64}) == "@NamedTuple{var\"#\"::Int64}"
 
 # Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax)
 @test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}"
 
 @testset "issue #42931" begin
-    @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
+    @test repr(NTuple{4, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
     @test repr(NTuple{2, :A}) == "Tuple{:A, :A}"
     @test repr(NTuple{1, :A}) == "Tuple{:A}"
     @test repr(NTuple{0, :A}) == "Tuple{}"
 
     @test repr(Tuple{:A, :A, :A, :B}) == "Tuple{:A, :A, :A, :B}"
-    @test repr(Tuple{:A, :A, :A, :A}) == "NTuple{4, :A}"
+    @test repr(Tuple{:A, :A, :A, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(Tuple{:A, :A, :A}) == "Tuple{:A, :A, :A}"
     @test repr(Tuple{:A}) == "Tuple{:A}"
     @test repr(Tuple{}) == "Tuple{}"
+
+    @test repr(Tuple{Vararg{N, 10}} where N) == "NTuple{10, N} where N"
+    @test repr(Tuple{Vararg{10, N}} where N) == "Tuple{Vararg{10, N}} where N"
 end
 
 # Test that REPL/mime display of invalid UTF-8 data doesn't throw an exception:
@@ -1446,12 +1527,20 @@ end
 @test static_shown(:+) == ":+"
 @test static_shown(://) == "://"
 @test static_shown(://=) == "://="
-@test static_shown(Symbol("")) == "Symbol(\"\")"
-@test static_shown(Symbol("a/b")) == "Symbol(\"a/b\")"
-@test static_shown(Symbol("a-b")) == "Symbol(\"a-b\")"
+@test static_shown(Symbol("")) == ":var\"\""
+@test static_shown(Symbol("a/b")) == ":var\"a/b\""
+@test static_shown(Symbol("a-b")) == ":var\"a-b\""
 @test static_shown(UnionAll) == "UnionAll"
-
 @test static_shown(QuoteNode(:x)) == ":(:x)"
+@test static_shown(:!) == ":!"
+@test static_shown("\"") == "\"\\\"\""
+@test static_shown("\$") == "\"\\\$\""
+@test static_shown("\\") == "\"\\\\\""
+@test static_shown("a\x80b") == "\"a\\x80b\""
+@test static_shown("a\x80\$\\b") == "\"a\\x80\\\$\\\\b\""
+@test static_shown(GlobalRef(Main, :var"a#b")) == "Main.var\"a#b\""
+@test static_shown(GlobalRef(Main, :+)) == "Main.:(+)"
+@test static_shown((a = 3, ! = 4, var"a b" = 5)) == "(a=3, (!)=4, var\"a b\"=5)"
 
 # PR #38049
 @test static_shown(sum) == "Base.sum"
@@ -1772,6 +1861,9 @@ end
     B = @view ones(2)[r]
     Base.showarg(io, B, false)
     @test String(take!(io)) == "view(::Vector{Float64}, $(repr(r)))"
+
+    Base.showarg(io, reshape(UnitRange{Int64}(1,1)), false)
+    @test String(take!(io)) == "reshape(::UnitRange{Int64})"
 end
 
 @testset "Methods" begin
@@ -1868,6 +1960,7 @@ end
 
     @test showstr(Pair{Integer,Integer}(1, 2), :typeinfo => Pair{Integer,Integer}) == "1 => 2"
     @test showstr([Pair{Integer,Integer}(1, 2)]) == "Pair{Integer, Integer}[1 => 2]"
+    @test showstr([(a=1,)]) == "[(a = 1,)]"
     @test showstr(Dict{Integer,Integer}(1 => 2)) == "Dict{Integer, Integer}(1 => 2)"
     @test showstr(Dict(true=>false)) == "Dict{Bool, Bool}(1 => 0)"
     @test showstr(Dict((1 => 2) => (3 => 4))) == "Dict((1 => 2) => (3 => 4))"
@@ -1956,12 +2049,12 @@ end
 end
 
 @testset "Intrinsic printing" begin
-    @test sprint(show, Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
-    @test repr(Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
+    @test sprint(show, Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
+    @test repr(Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
     let io = IOBuffer()
-        show(io, MIME"text/plain"(), Core.Intrinsics.arraylen)
+        show(io, MIME"text/plain"(), Core.Intrinsics.cglobal)
         str = String(take!(io))
-        @test occursin("arraylen", str)
+        @test occursin("cglobal", str)
         @test occursin("(intrinsic function", str)
     end
     @test string(Core.Intrinsics.add_int) == "add_int"
@@ -2036,6 +2129,7 @@ eval(Meta._parse_string("""function my_fun28173(x)
             r = 1
             s = try
                 r = 2
+                Base.inferencebarrier(false) && error()
                 "BYE"
             catch
                 r = 3
@@ -2049,7 +2143,7 @@ eval(Meta._parse_string("""function my_fun28173(x)
 end""", "a"^80, 1, 1, :statement)[1]) # use parse to control the line numbers
 let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    fill!(src.codelocs, 0) # IRCode printing is only capable of printing partial line info
+    src.debuginfo = Core.DebugInfo(src.debuginfo.def) # IRCode printing defaults to incomplete line info printing, so turn it off completely for CodeInfo too
     let source_slotnames = String["my_fun28173", "x"],
         repr_ir = split(repr(ir, context = :SOURCE_SLOTNAMES=>source_slotnames), '\n'),
         repr_ir = "CodeInfo(\n" * join((l[4:end] for l in repr_ir), "\n") * ")" # remove line numbers
@@ -2059,8 +2153,8 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     @test all(isspace, pop!(lines1))
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(1), Val{1}), false)
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(2), Val{2}), true)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
     lines2 = split(repr(ir), '\n')
     @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
@@ -2072,9 +2166,9 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     end
     @test popfirst!(lines2) == "   │          $(QuoteNode(2))"
     @test pop!(lines2) == "   └───       \$(QuoteNode(4))"
-    @test pop!(lines1) == "17 └───       return %18"
-    @test pop!(lines2) == "   │          return %18"
-    @test pop!(lines2) == "17 │          \$(QuoteNode(3))"
+    @test pop!(lines1) == "18 └───       return %21"
+    @test pop!(lines2) == "   │          return %21"
+    @test pop!(lines2) == "18 │          \$(QuoteNode(3))"
     @test lines1 == lines2
 
     # verbose linetable
@@ -2096,7 +2190,7 @@ end
 # with as unnamed "!" BB.
 let src = code_typed(gcd, (Int, Int), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    push!(ir.stmts.inst, Core.Compiler.ReturnNode())
+    push!(ir.stmts.stmt, Core.Compiler.ReturnNode())
     lines = split(sprint(show, ir), '\n')
     @test all(isspace, pop!(lines))
     @test pop!(lines) == "   !!! ──       unreachable::#UNDEF"
@@ -2153,6 +2247,20 @@ replstrcolor(x) = sprint((io, x) -> show(IOContext(io, :limit => true, :color =>
     @test_repr "Bool[1, 0]"
 end
 
+@testset "Unions with Bool (#39590)" begin
+    @test repr([missing, false]) == "Union{Missing, Bool}[missing, 0]"
+    @test_repr "Union{Bool, Nothing}[1, 0, nothing]"
+end
+
+# issue #26847
+@test_repr "Union{Missing, Float32}[1.0]"
+
+# intersection of #45396 and #48822
+@test_repr "Union{Missing, Rational{Int64}}[missing, 1//2, 2]"
+
+# Don't go too far with #48822
+@test_repr "Union{String, Bool}[true]"
+
 # issue #30505
 @test repr(Union{Tuple{Char}, Tuple{Char, Char}}[('a','b')]) == "Union{Tuple{Char}, Tuple{Char, Char}}[('a', 'b')]"
 
@@ -2458,9 +2566,9 @@ end
 
     # replace an instruction
     add_stmt = ir.stmts[1]
-    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:inst].args[1], add_stmt[:inst].args[2], 999), Int)
+    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
     node = Core.Compiler.insert_node!(ir, 1, inst)
-    Core.Compiler.setindex!(add_stmt, node, :inst)
+    Core.Compiler.setindex!(add_stmt, node, :stmt)
 
     # the new node should be colored green (as it's uncompacted IR),
     # and its uses shouldn't be colored at all (since they're just plain valid references)
@@ -2471,7 +2579,7 @@ end
     @test contains(str, "%1 = %6")
 
     # if we insert an invalid node, it should be colored appropriately
-    Core.Compiler.setindex!(add_stmt, Core.Compiler.SSAValue(node.id+1), :inst)
+    Core.Compiler.setindex!(add_stmt, Core.Compiler.SSAValue(node.id+1), :stmt)
     str = sprint(; context=:color=>true) do io
         show(io, ir)
     end
@@ -2630,3 +2738,60 @@ end
     ir = Core.Compiler.complete(compact)
     verify_display(ir)
 end
+
+let buf = IOBuffer()
+    Base.show_tuple_as_call(buf, Symbol(""), Tuple{Function,Any})
+    @test String(take!(buf)) == "(::Function)(::Any)"
+end
+
+module Issue49382
+    abstract type Type49382 end
+end
+using .Issue49382
+(::Type{Issue49382.Type49382})() = 1
+@test sprint(show, methods(Issue49382.Type49382)) isa String
+
+# Showing of bad SlotNumber in Expr(:toplevel)
+let lowered = Meta.lower(Main, Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(x = 1)), :(y = 1))))
+    ci = lowered.args[1]
+    @assert isa(ci, Core.CodeInfo)
+    @test !isempty(ci.slotnames)
+    @assert ci.code[1].head === :toplevel
+    ci.code[1].args[1] = :($(Core.SlotNumber(1)) = 1)
+    # Check that this gets printed as `_1 = 1` not `y = 1`
+    @test contains(sprint(show, ci), "_1 = 1")
+end
+
+# Pointers should be reprable
+@test is_juliarepr(pointer([1]))
+@test is_juliarepr(Ptr{Vector{Complex{Float16}}}(UInt(0xdeadbeef)))
+
+# Toplevel MethodInstance with undef :uninferred
+let topmi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    topmi.specTypes = Tuple{}
+    topmi.def = Main
+    @test contains(repr(topmi), "Toplevel MethodInstance")
+end
+
+@testset "show(<do-block expr>) no trailing whitespace" begin
+    do_expr1 = :(foo() do; bar(); end)
+    @test !contains(sprint(show, do_expr1), " \n")
+end
+
+struct NoLengthDict{K,V} <: AbstractDict{K,V}
+    dict::Dict{K,V}
+    NoLengthDict{K,V}() where {K,V} = new(Dict{K,V}())
+end
+Base.iterate(d::NoLengthDict, s...) = iterate(d.dict, s...)
+Base.IteratorSize(::Type{<:NoLengthDict}) = Base.SizeUnknown()
+Base.eltype(::Type{NoLengthDict{K,V}}) where {K,V} = Pair{K,V}
+Base.setindex!(d::NoLengthDict, v, k) = d.dict[k] = v
+
+# Issue 55931
+@testset "show AbstractDict with unknown length" begin
+    x = NoLengthDict{Int,Int}()
+    x[1] = 2
+    str = sprint(io->show(io, MIME("text/plain"), x))
+    @test contains(str, "NoLengthDict")
+    @test contains(str, "1 => 2")
+end
diff --git a/test/smallarrayshrink.jl b/test/smallarrayshrink.jl
index a1a7df5aee5a5..680a882e432d4 100644
--- a/test/smallarrayshrink.jl
+++ b/test/smallarrayshrink.jl
@@ -1,45 +1,20 @@
 @testset "shrink small array" begin
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 10000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 1000000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+    function check_array(x, size, capacity)
+        @test x[1] == 1
+        @test x[2] == 2
+        @test x[3] == 3
+        @test x[4] == 4
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == size
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == capacity
+    end
+    for hint_size = [10000, 1000000]
+        x = [1, 2, 3, 4]
+        check_array(x, 4, 4)
+        sizehint!(x, hint_size)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4; shrink = false)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4)
+        check_array(x, 4, 4)
+    end
 end
diff --git a/test/some.jl b/test/some.jl
index e49fc586a3a6e..89f699d8306c3 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -44,11 +44,37 @@
 
 ##  == and isequal nothing
 
-@test Some(1) != nothing
-@test Some(nothing) != nothing
+@test Some(1) !== nothing
+@test Some(nothing) !== nothing
 @test !isequal(Some(1), nothing)
 @test !isequal(Some(nothing), nothing)
 
+# Some with something else is false
+@test !=(Some(nothing), nothing)
+@test !=(nothing, Some(nothing))
+
+# Two `Some`s forward to their wrapped things
+@test ==(Some([0x1]), Some([1]))
+
+# propagate wrapped missings
+@test !=(Some(1), Some(missing)) isa Missing
+@test !=(Some(missing), Some(1)) isa Missing
+@test ==(Some(missing), Some(missing)) isa Missing
+
+# Make sure to still propagate non-wrapped Missing
+@test ==(Some(1), missing) isa Missing
+@test ==(missing, Some(1)) isa Missing
+
+@test isequal(Some([0x1]), Some([1]))
+@test !isequal(missing, Some(missing))
+@test !isequal(Some(missing), missing)
+@test isequal(Some(missing), Some(missing))
+
+# hashing implications
+@test hash(Some(0x1)) != hash(0x1)
+@test hash(Some(0x1)) == hash(Some(1))
+@test hash((Some(1),)) != hash((1, Some))
+
 @testset "something" begin
     @test_throws ArgumentError something()
     @test something(1) === 1
diff --git a/test/sorting.jl b/test/sorting.jl
index 147a70a5db7d9..e16b30de5bfc8 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -9,6 +9,12 @@ using Test
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
 
+@testset "Base.Sort docstrings" begin
+    undoc = Docs.undocumented_names(Base.Sort)
+    @test_broken isempty(undoc)
+    @test undoc == [:Algorithm, :SMALL_THRESHOLD, :Sort]
+end
+
 @testset "Order" begin
     @test Forward == ForwardOrdering()
     @test ReverseOrdering(Forward) == ReverseOrdering() == Reverse
@@ -94,12 +100,14 @@ function tuple_sort_test(x)
     @test 0 == @allocated sort(x)
 end
 @testset "sort(::NTuple)" begin
+    @test sort(()) == ()
     @test sort((9,8,3,3,6,2,0,8)) == (0,2,3,3,6,8,8,9)
     @test sort((9,8,3,3,6,2,0,8), by=x->x÷3) == (2,0,3,3,8,6,8,9)
     for i in 1:40
-        tuple_sort_test(tuple(rand(i)...))
+        tuple_sort_test(rand(NTuple{i, Float64}))
     end
-    @test_throws ArgumentError sort((1,2,3.0))
+    @test_throws MethodError sort((1,2,3.0))
+    @test Base.infer_return_type(sort, Tuple{Tuple{Vararg{Int}}}) == Tuple{Vararg{Int}}
 end
 
 @testset "partialsort" begin
@@ -544,23 +552,6 @@ end
     @test isequal(a, [8,6,7,NaN,5,3,0,9])
 end
 
-@testset "sort!(iterable)" begin
-    gen = (x % 7 + 0.1x for x in 1:50)
-    @test sort(gen) == sort!(collect(gen))
-    gen = (x % 7 + 0.1y for x in 1:10, y in 1:5)
-    @test sort(gen; dims=1) == sort!(collect(gen); dims=1)
-    @test sort(gen; dims=2) == sort!(collect(gen); dims=2)
-
-    @test_throws ArgumentError("dimension out of range") sort(gen; dims=3)
-
-    @test_throws UndefKeywordError(:dims) sort(gen)
-    @test_throws UndefKeywordError(:dims) sort(collect(gen))
-    @test_throws UndefKeywordError(:dims) sort!(collect(gen))
-
-    @test_throws ArgumentError sort("string")
-    @test_throws ArgumentError("1 cannot be sorted") sort(1)
-end
-
 @testset "sort!(::AbstractVector{<:Integer}) with short int range" begin
     a = view([9:-1:0;], :)::SubArray
     sort!(a)
@@ -606,6 +597,26 @@ end
     @test searchsortedfirst(o, 1.5) == 0
     @test searchsortedlast(o, 0) == firstindex(o) - 1
     @test searchsortedlast(o, 1.5) == -1
+
+    # Issue #56457
+    o2 = OffsetArray([2,2,3], typemax(Int)-3);
+    @test searchsorted(o2, 2) == firstindex(o2):firstindex(o2)+1
+
+    struct IdentityVector <: AbstractVector{Int}
+        lo::Int
+        hi::Int
+    end
+    function Base.getindex(s::IdentityVector, i::Int)
+        s.lo <= i <= s.hi || throw(BoundsError(s, i))
+        i
+    end
+    Base.axes(s::IdentityVector) = (s.lo:s.hi,)
+    Base.size(s::IdentityVector) = length.(axes(s))
+
+    o3 = IdentityVector(typemin(Int), typemin(Int)+5)
+    @test searchsortedfirst(o3, typemin(Int)+2) === typemin(Int)+2
+    @test searchsortedlast(o3, typemin(Int)+2) === typemin(Int)+2
+    @test searchsorted(o3, typemin(Int)+2) === typemin(Int)+2:typemin(Int)+2
 end
 
 function adaptive_sort_test(v; trusted=InsertionSort, kw...)
@@ -735,6 +746,7 @@ end
     safe_algs = [InsertionSort, MergeSort, Base.Sort.ScratchQuickSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
 
     n = 1000
+    Random.seed!(0x3588d23f15e74060);
     v = rand(1:5, n);
     s = sort(v);
 
@@ -752,8 +764,9 @@ end
     for alg in safe_algs
         @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm
     end
-    @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172]
-    @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
+    # Broken by the introduction of BracketedSort in #52006 which is unstable
+    # @test_broken partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] (sometimes passes due to RNG)
+    @test_broken partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
 
     # lt can be very poorly behaved and sort will still permute its input in some way.
     for alg in safe_algs
@@ -822,9 +835,9 @@ end
     let
         requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort,
             Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort,
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes),
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big),
-            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)}
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes),
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes.big),
+            typeof(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS.next.next.next.big.next.yes.big.next)}
 
         function test_alg(kw, alg, float=true)
             for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)]
@@ -864,15 +877,18 @@ end
             end
         end
 
-        test_alg_rec(Base.DEFAULT_STABLE)
+        test_alg_rec(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS)
     end
 end
 
 @testset "show(::Algorithm)" begin
-    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
-    lines = split(string(Base.DEFAULT_STABLE), '\n')
+    @test eval(Meta.parse(string(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS))) === Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS
+    lines = split(string(Base.Sort._DEFAULT_ALGORITHMS_FOR_VECTORS), '\n')
     @test 10 < maximum(length, lines) < 100
     @test 1 < length(lines) < 30
+
+    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
+    @test string(Base.DEFAULT_STABLE) == "Base.Sort.DefaultStable()"
 end
 
 @testset "Extensibility" begin
@@ -913,6 +929,20 @@ end
     end
     @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9]
     @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10)))
+
+    # Tuple extensions (custom alg)
+    @test_throws MethodError sort((1,2,3), alg=MyFirstAlg())
+    Base.Sort._sort(v::NTuple, ::MyFirstAlg, o::Base.Order.Ordering, kw) = (17,2,9)
+    @test sort((1,2,3), alg=MyFirstAlg()) == (17,2,9)
+
+    struct TupleFoo
+        x::Int
+    end
+
+    # Tuple extensions (custom type)
+    @test_throws MethodError sort(TupleFoo.((3,1,2)))
+    Base.Sort._sort(v::NTuple{N, TupleFoo}, ::Base.Sort.DefaultStable, o::Base.Order.Ordering, kw) where N = v
+    @test sort(TupleFoo.((3,1,2))) === TupleFoo.((3,1,2))
 end
 
 @testset "sort!(v, lo, hi, alg, order)" begin
@@ -971,9 +1001,10 @@ end
 end
 
 @testset "ScratchQuickSort allocations on non-concrete eltype" begin
-    v = Vector{Union{Nothing, Bool}}(rand(Bool, 10000))
-    @test 4 == @allocations sort(v)
-    @test 4 == @allocations sort(v; alg=Base.Sort.ScratchQuickSort())
+    let v = Vector{Union{Nothing, Bool}}(rand(Bool, 10000))
+        @test 10 > @allocations sort(v)
+        @test 10 > @allocations sort(v; alg=Base.Sort.ScratchQuickSort())
+    end
     # it would be nice if these numbers were lower (1 or 2), but these
     # test that we don't have O(n) allocations due to type instability
 end
@@ -981,15 +1012,15 @@ end
 function test_allocs()
     v = rand(10)
     i = randperm(length(v))
-    @test 1 == @allocations sort(v)
+    @test 2 >= @allocations sort(v)
     @test 0 == @allocations sortperm!(i, v)
     @test 0 == @allocations sort!(i)
     @test 0 == @allocations sortperm!(i, v, rev=true)
-    @test 1 == @allocations sortperm(v, rev=true)
-    @test 1 == @allocations sortperm(v, rev=false)
+    @test 2 >= @allocations sortperm(v, rev=true)
+    @test 2 >= @allocations sortperm(v, rev=false)
     @test 0 == @allocations sortperm!(i, v, order=Base.Reverse)
-    @test 1 == @allocations sortperm(v)
-    @test 1 == @allocations sortperm(i, by=sqrt)
+    @test 2 >= @allocations sortperm(v)
+    @test 2 >= @allocations sortperm(i, by=sqrt)
     @test 0 == @allocations sort!(v, lt=(a, b) -> hash(a) < hash(b))
     sort!(Int[], rev=false) # compile
     @test 0 == @allocations sort!(i, rev=false)
@@ -1065,6 +1096,32 @@ end
     @test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward))
 end
 
+# Pathologize 0 is a noop, pathologize 3 is fully pathological
+function pathologize!(x, level)
+    Base.require_one_based_indexing(x)
+    k2 = Int(cbrt(length(x))^2)
+    seed = hash(length(x), Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    for a in 1:level
+        seed = hash(a, seed)
+        x[mod.(hash.(1:k2, seed), range.(1:k2,lastindex(x)))] .= a
+    end
+    x
+end
+
+@testset "partialsort tests added for BracketedSort #52006" begin
+    for x in [pathologize!.(Ref(rand(Int, 1000)), 0:3); pathologize!.(Ref(rand(1000)), 0:3); [pathologize!(rand(Int, 1_000_000), 3)]]
+        @test partialsort(x, 1) == minimum(x)
+        @test partialsort(x, lastindex(x)) == maximum(x)
+        sx = sort(x)
+        for i in [1, 2, 4, 10, 11, 425, 500, 845, 991, 997, 999, 1000]
+            @test partialsort(x, i) == sx[i]
+        end
+        for i in [1:1, 1:2, 1:5, 1:8, 1:9, 1:11, 1:108, 135:812, 220:586, 363:368, 450:574, 458:597, 469:638, 487:488, 500:501, 584:594, 1000:1000]
+            @test partialsort(x, i) == sx[i]
+        end
+    end
+end
+
 # This testset is at the end of the file because it is slow.
 @testset "searchsorted" begin
     numTypes = [ Int8,  Int16,  Int32,  Int64,  Int128,
@@ -1225,6 +1282,16 @@ end
             @test searchsorted(v, 0.1, rev=true) === 4:3
         end
     end
+
+    @testset "ranges issue #44102, PR #50365" begin
+        # range sorting test for different Ordering parameter combinations
+        @test searchsorted(-1000.0:1:1000, -0.0) === 1001:1000
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<) === 1001:1001
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<, by=x->x) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, by=-) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0, rev=true) === 1002:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, rev=true) === 1001:1001
+    end
 end
 # The "searchsorted" testset is at the end of the file because it is slow.
 
diff --git a/test/spawn.jl b/test/spawn.jl
index 0241c65573886..c1802ba1f74da 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -22,8 +22,8 @@ lscmd = `ls`
 havebb = false
 
 function _tryonce_download_from_cache(desired_url::AbstractString)
-    cache_url = "https://cache.julialang.org/foo/$(desired_url)"
-    cache_output_filename = joinpath(mktempdir(), "myfile")
+    cache_url = "https://cache.julialang.org/$(desired_url)"
+    cache_output_filename = joinpath(mktempdir(), "busybox")
     cache_response = Downloads.request(
         cache_url;
         output = cache_output_filename,
@@ -573,7 +573,7 @@ end
 @test Cmd(`foo`, env=["A=true"]).env      == ["A=true"]
 @test Cmd(`foo`, env=("A"=>true,)).env    == ["A=true"]
 @test Cmd(`foo`, env=["A"=>true]).env     == ["A=true"]
-@test Cmd(`foo`, env=nothing).env         == nothing
+@test Cmd(`foo`, env=nothing).env         === nothing
 
 # test for interpolation of Cmd
 let c = setenv(`x`, "A"=>true)
@@ -660,9 +660,21 @@ let p = run(`$sleepcmd 100`, wait=false)
     kill(p)
 end
 
-# Second argument of shell_parse
+# Second return of shell_parse
 let s = "   \$abc   "
-    @test s[Base.shell_parse(s)[2]] == "abc"
+    @test Base.shell_parse(s)[2] === findfirst('a', s)
+    s = "abc def"
+    @test Base.shell_parse(s)[2] === findfirst('d', s)
+    s = "abc 'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc \$x'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x'g'"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s)
+    s = "abc \$(d)ef\$(x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s) - 1
 end
 
 # Logging macros should not output to finalized streams (#26687)
@@ -795,8 +807,9 @@ let text = "input-test-text"
     out = Base.BufferStream()
     proc = run(catcmd, IOBuffer(text), out, wait=false)
     @test proc.out === out
-    @test read(out, String) == text
     @test success(proc)
+    closewrite(out)
+    @test read(out, String) == text
 
     out = PipeBuffer()
     proc = run(catcmd, IOBuffer(SubString(text)), out)
@@ -1003,5 +1016,26 @@ end
     args = ["ab ^` c", " \" ", "\"", ascii95, ascii95,
             "\"\\\"\\", "", "|", "&&", ";"];
     @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;"
+end
+
+# effects for Cmd construction
+for f in (() -> `a b c`, () -> `a a$("bb")a $("c")`)
+    effects = Base.infer_effects(f)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_terminates(effects)
+    @test Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
+end
+let effects = Base.infer_effects(x -> `a $x`, (Any,))
+    @test !Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_terminates(effects)
+    @test !Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
+end
 
+# Test that Cmd accepts various AbstractStrings
+@testset "AbstractStrings" begin
+    args = split("-l /tmp")
+    @assert eltype(args) != String
+    @test Cmd(["ls", args...]) == `ls -l /tmp`
 end
diff --git a/test/specificity.jl b/test/specificity.jl
index 9b605444bad42..13688036c2047 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -1,9 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function args_morespecific(a, b)
-    sp = (ccall(:jl_type_morespecific, Cint, (Any,Any), a, b) != 0)
+    sp = Base.morespecific(a, b)
     if sp  # make sure morespecific(a,b) implies !morespecific(b,a)
-        @test ccall(:jl_type_morespecific, Cint, (Any,Any), b, a) == 0
+        @test !Base.morespecific(b, a)
     end
     return sp
 end
@@ -316,3 +316,14 @@ end
 @test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
+
+# requires assertions enabled
+let root = NTuple
+    N = root.var
+    T = root.body.var
+    x1 = root.body.body
+    x2 = Dict{T,Tuple{N}}
+    A = UnionAll(N, UnionAll(T, Tuple{Union{x1, x2}}))
+    B = Tuple{Union{UnionAll(N, UnionAll(T, x1)), UnionAll(N, UnionAll(T, x2))}}
+    @ccall jl_type_morespecific_no_subtype(A::Any, B::Any)::Cint
+end
diff --git a/test/stacktraces.jl b/test/stacktraces.jl
index 590abb90c590f..ca553c2a2e801 100644
--- a/test/stacktraces.jl
+++ b/test/stacktraces.jl
@@ -91,9 +91,11 @@ trace = (try; f(3); catch; stacktrace(catch_backtrace()); end)[1:3]
 can_inline = Bool(Base.JLOptions().can_inline)
 for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false))
     @test frame.func === typeof(func).name.mt.name
-    @test frame.linfo.def.module === which(func, (Any,)).module
-    @test frame.linfo.def === which(func, (Any,))
-    @test frame.linfo.specTypes === Tuple{typeof(func), Int}
+    # broken until #50082 can be addressed
+    mi = isa(frame.linfo, Core.CodeInstance) ? frame.linfo.def : frame.linfo
+    @test mi.def.module === which(func, (Any,)).module broken=inlined
+    @test mi.def === which(func, (Any,)) broken=inlined
+    @test mi.specTypes === Tuple{typeof(func), Int} broken=inlined
     # line
     @test frame.file === Symbol(@__FILE__)
     @test !frame.from_c
@@ -101,13 +103,8 @@ for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false
 end
 end
 
-let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo,
-    li = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ()),
-    sf
-
-    setfield!(li, :uninferred, src, :monotonic)
-    li.specTypes = Tuple{}
-    li.def = @__MODULE__
+let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo
+    li = ccall(:jl_method_instance_for_thunk, Ref{Core.MethodInstance}, (Any, Any), src, @__MODULE__)
     sf = StackFrame(:a, :b, 3, li, false, false, 0)
     repr = string(sf)
     @test repr == "Toplevel MethodInstance thunk at b:3"
@@ -159,6 +156,22 @@ end
 @test bt[1].line == topline+4
 end
 
+# Accidental incorrect phi block computation in interpreter
+global global_false_bool = false
+let bt, topline = @__LINE__
+    try
+        let
+            global read_write_global_bt_test, global_false_bool
+            if global_false_bool
+            end
+            (read_write_global_bt_test, (read_write_global_bt_test=2;))
+        end
+    catch
+        bt = stacktrace(catch_backtrace())
+    end
+    @test bt[1].line == topline+6
+end
+
 # issue #28990
 let bt
 try
@@ -234,8 +247,7 @@ struct F49231{a,b,c,d,e,f,g} end
     catch e
         stacktrace(catch_backtrace())
     end
-    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,105)))
-    @test endswith(str, "to see complete types.")
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,105)))
     @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0mTuple\e[90m{…}\e[39m\e[0m\e[1m)\e[22m\n\e[90m")
 
     st = try
@@ -243,6 +255,10 @@ struct F49231{a,b,c,d,e,f,g} end
     catch e
         stacktrace(catch_backtrace())
     end
-    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,132)))
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,132)))
     @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n\e[90m")
 end
+
+@testset "Base.StackTraces docstrings" begin
+    @test isempty(Docs.undocumented_names(StackTraces))
+end
diff --git a/test/staged.jl b/test/staged.jl
index df351d8d47b96..6cb99950a7bb2 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# N.B.: This file is also run from interpreter.jl, so needs to be standalone-executable
+using Test
+
 using Random
 using InteractiveUtils: code_llvm, code_native
 
@@ -200,7 +203,7 @@ let gf_err2
     @test_throws Expected gf_err2(code_typed)
     @test_throws Expected gf_err2(code_llvm)
     @test_throws Expected gf_err2(code_native)
-    @test gf_err_ref[] == 88
+    @test gf_err_ref[] < 1000
 end
 
 # issue #15043
@@ -267,12 +270,12 @@ end
 
 # PR #23168
 
-function f23168(a, x)
+@eval function f23168(a, x)
     push!(a, 1)
     if @generated
-        :(y = x + x)
+        :(y = $(+)(x, x))
     else
-        y = 2x
+        y = $(*)(2, x)
     end
     push!(a, y)
     if @generated
@@ -287,9 +290,9 @@ end
 let a = Any[]
     @test f23168(a, 3) == (6, Int)
     @test a == [1, 6, 3]
-    @test occursin(" + ", string(code_lowered(f23168, (Vector{Any},Int))))
-    @test occursin("2 * ", string(Base.uncompressed_ir(first(methods(f23168)))))
-    @test occursin("2 * ", string(code_lowered(f23168, (Vector{Any},Int), generated=false)))
+    @test occursin("(+)(", string(code_lowered(f23168, (Vector{Any},Int))))
+    @test occursin("(*)(2", string(Base.uncompressed_ir(first(methods(f23168)))))
+    @test occursin("(*)(2", string(code_lowered(f23168, (Vector{Any},Int), generated=false)))
     @test occursin("Base.add_int", string(code_typed(f23168, (Vector{Any},Int))))
 end
 
@@ -308,6 +311,8 @@ end
 @generated function f33243()
     :(global x33243 = 2)
 end
+@test_throws ErrorException f33243()
+global x33243
 @test f33243() === 2
 @test x33243 === 2
 
@@ -335,12 +340,112 @@ let world = Base.get_world_counter()
     match = Base._which(Tuple{typeof(sin), Int}; world)
     mi = Core.Compiler.specialize_method(match)
     lwr = Core.Compiler.retrieve_code_info(mi, world)
-    @test all(lin->lin.method === :sin, lwr.linetable)
+    nstmts = length(lwr.code)
+    di = Core.DebugInfo(Core.Compiler.DebugInfoStream(mi, lwr.debuginfo, nstmts), nstmts)
+    lwr.debuginfo = di
     @eval function sin_generated(a)
         $(Expr(:meta, :generated, Returns(lwr)))
         $(Expr(:meta, :generated_only))
     end
     src = only(code_lowered(sin_generated, (Int,)))
-    @test all(lin->lin.method === :sin, src.linetable)
+    @test src.debuginfo === di
     @test sin_generated(42) == sin(42)
 end
+
+# Allow passing unreachable insts in generated codeinfo
+let
+    dummy() = return
+    dummy_m = which(dummy, Tuple{})
+
+    src = Base.uncompressed_ir(dummy_m)
+    src.code = Any[
+        # block 1
+        Core.ReturnNode(nothing),
+        # block 2
+        Core.ReturnNode(),
+    ]
+    nstmts = length(src.code)
+    nslots = 1
+    src.ssavaluetypes = nstmts
+    src.debuginfo = Core.DebugInfo(:f_unreachable_generated)
+    src.ssaflags = fill(Int32(0), nstmts)
+    src.slotflags = fill(0, nslots)
+    src.slottypes = Any[Any]
+
+    @eval function f_unreachable()
+        $(Expr(:meta, :generated, Returns(src)))
+        $(Expr(:meta, :generated_only))
+    end
+
+    ir, _ = Base.code_ircode(f_unreachable, ()) |> only
+    @test length(ir.cfg.blocks) == 1
+end
+
+function generate_lambda_ex(world::UInt, source::LineNumberNode,
+                            argnames, spnames, @nospecialize body)
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...))
+    return stub(world, source, body)
+end
+
+# Test that `Core.CachedGenerator` works as expected
+struct Generator54916 <: Core.CachedGenerator end
+function (::Generator54916)(world::UInt, source::LineNumberNode, args...)
+    return generate_lambda_ex(world, source,
+        (:doit54916, :func, :arg), (), :(func(arg)))
+end
+@eval function doit54916(func, arg)
+    $(Expr(:meta, :generated, Generator54916()))
+    $(Expr(:meta, :generated_only))
+end
+@test doit54916(sin, 1) == sin(1)
+let mi = only(methods(doit54916)).specializations
+    ci = mi.cache::Core.CodeInstance
+    found = false
+    while true
+        if ci.owner === :uninferred && ci.inferred isa Core.CodeInfo
+            found = true
+            break
+        end
+        isdefined(ci, :next) || break
+        ci = ci.next
+    end
+    @test found
+end
+
+# Test that writing a bad cassette-style pass gives the expected error (#49715)
+function generator49715(world, source, self, f, tt)
+    tt = tt.parameters[1]
+    sig = Tuple{f, tt.parameters...}
+    mi = Base._which(sig; world)
+    error("oh no")
+    return generate_lambda_ex(world, source,
+        (:doit49715, :f, :tt), (), nothing)
+end
+@eval function doit49715(f, tt)
+    $(Expr(:meta, :generated, generator49715))
+    $(Expr(:meta, :generated_only))
+end
+@test_throws "oh no" doit49715(sin, Tuple{Int})
+
+# Test that the CodeInfo returned from generated function need not match the generator.
+function overdubbee54341(a, b)
+    a + b
+end
+const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1]
+function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes)
+    if length(fargtypes) != 2
+        return generate_lambda_ex(world, source,
+            (:overdub54341, :args), (), :(error("Wrong number of arguments")))
+    else
+        return copy(overdubee_codeinfo54341)
+    end
+end
+@eval function overdub54341(args...)
+    $(Expr(:meta, :generated, overdub_generator54341))
+    $(Expr(:meta, :generated_only))
+end
+@test overdub54341(1, 2) == 3
+# check if the inlining pass handles `nargs`/`isva` correctly
+@test first(only(code_typed((Int,Int)) do x, y; @inline overdub54341(x, y); end)) isa Core.CodeInfo
+@test first(only(code_typed((Int,)) do x; @inline overdub54341(x, 1); end)) isa Core.CodeInfo
+@test_throws "Wrong number of arguments" overdub54341(1, 2, 3)
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
new file mode 100644
index 0000000000000..dbb81cb48acbc
--- /dev/null
+++ b/test/strings/annotated.jl
@@ -0,0 +1,257 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@testset "AnnotatedString" begin
+    str = Base.AnnotatedString("some string")
+    @test str == Base.AnnotatedString(str.string, Base.RegionAnnotation[])
+    @test length(str) == 11
+    @test ncodeunits(str) == 11
+    @test codeunits(str) == codeunits("some string")
+    @test codeunit(str) == UInt8
+    @test codeunit(str, 1) == codeunit("some string", 1)
+    @test firstindex(str) == firstindex("some string")
+    @test convert(Base.AnnotatedString, str) === str
+    @test eltype(str) == Base.AnnotatedChar{eltype(str.string)}
+    @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[])
+    @test str[1:4] isa SubString{typeof(str)}
+    @test str[1:4] == Base.AnnotatedString("some")
+    big_byte_str = Base.AnnotatedString("आख")
+    @test_throws StringIndexError big_byte_str[5]
+    @test "a" * str == Base.AnnotatedString("asome string")
+    @test str * "a" == Base.AnnotatedString("some stringa")
+    @test str * str == Base.AnnotatedString("some stringsome string")
+    @test cmp(str, "some stringy thingy") == -1
+    @test cmp("some stringy thingy", str) == 1
+    @test str[3:4] == SubString("me")
+    @test SubString("me") == str[3:4]
+    Base.annotate!(str, 1:4, :thing, 0x01)
+    Base.annotate!(str, 6:11, :other, 0x02)
+    Base.annotate!(str, 1:11, :all, 0x03)
+    #  :thing :other
+    #  ┌┸─┐ ┌──┸─┐
+    # "some string"
+    #  └───┰─────┘
+    #     :all
+    @test str[3:4] == SubString(str, 3, 4)
+    @test str[3:4] != SubString("me")
+    @test SubString("me") != str[3:4]
+    @test Base.AnnotatedString(str[3:4]) == SubString(str, 3, 4)
+    @test repeat(SubString(str, 3, 4), 2) == repeat(Base.AnnotatedString(str[3:4]), 2)
+    @test reverse(SubString(str, 3, 4)) == reverse(Base.AnnotatedString(str[3:4]))
+    @test Base.AnnotatedString(str[3:4]) ==
+        Base.AnnotatedString("me", [(1:2, :thing, 0x01), (1:2, :all, 0x03)])
+    @test Base.AnnotatedString(str[3:6]) ==
+        Base.AnnotatedString("me s", [(1:2, :thing, 0x01), (4:4, :other, 0x02), (1:4, :all, 0x03)])
+    @test str == Base.AnnotatedString("some string", [(1:4, :thing, 0x01), (6:11, :other, 0x02), (1:11, :all, 0x03)])
+    @test str != Base.AnnotatedString("some string")
+    @test str != Base.AnnotatedString("some string", [(1:1, :thing, 0x01), (1:11, :all, 0x03), (6:6, :other, 0x02)])
+    @test str != Base.AnnotatedString("some string", [(1:4, :thing, 0x11), (1:11, :all, 0x13), (6:11, :other, 0x12)])
+    @test str != Base.AnnotatedString("some thingg", [(1:4, :thing, 0x01), (1:11, :all, 0x03), (6:11, :other, 0x02)])
+    @test Base.AnnotatedString([Base.AnnotatedChar('a', [(:a, 1)]), Base.AnnotatedChar('b', [(:b, 2)])]) ==
+        Base.AnnotatedString("ab", [(1:1, :a, 1), (2:2, :b, 2)])
+    let allstrings =
+        ['a', Base.AnnotatedChar('a'), Base.AnnotatedChar('a', [(:aaa, 0x04)]),
+         "a string", Base.AnnotatedString("a string"),
+         Base.AnnotatedString("a string", [(1:2, :hmm, '%')]),
+         SubString(Base.AnnotatedString("a string", [(1:2, :hmm, '%')]), 1:1)]
+        for str1 in repeat(allstrings, 2)
+            for str2 in repeat(allstrings, 2)
+                @test String(str1 * str2) ==
+                    String(string(str1, str2)) ==
+                    String(string(str1)) * String(string(str2))
+                @test Base.annotatedstring(str1 * str2) ==
+                    Base.annotatedstring(str1, str2) ==
+                    Base.annotatedstring(str1) * Base.annotatedstring(str2)
+            end
+        end
+    end
+    # @test collect(Base.eachstyle(str)) ==
+    #     [("some", [:thing, 0x01, :all, 0x03]),
+    #     (" string", [:all, 0x03, :other, 0x02])]
+    @test chopprefix(sprint(show, str), "Base.") ==
+        "AnnotatedString{String}(\"some string\", [(1:4, :thing, 0x01), (6:11, :other, 0x02), (1:11, :all, 0x03)])"
+    @test eval(Meta.parse(repr(str))) == str
+    @test sprint(show, MIME("text/plain"), str) == "\"some string\""
+
+    a = Base.AnnotatedString("hello", [(1:5, :label, 1)])
+    @test first(a) == Base.AnnotatedChar('h', [(:label, 1)])
+end
+
+@testset "AnnotatedChar" begin
+    chr = Base.AnnotatedChar('c')
+    @test Base.AnnotatedChar(UInt32('c')) == chr
+    @test convert(Base.AnnotatedChar, chr) === chr
+    @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[])
+    @test uppercase(chr) == Base.AnnotatedChar('C')
+    @test titlecase(chr) == Base.AnnotatedChar('C')
+    @test lowercase(Base.AnnotatedChar('C')) == chr
+    str = Base.AnnotatedString("hmm", [(1:1, :attr, "h0h0"),
+                               (1:2, :attr, "h0m1"),
+                               (2:3, :attr, "m1m2")])
+    @test str[1] == Base.AnnotatedChar('h', [(:attr, "h0h0")])
+    @test str[2] == Base.AnnotatedChar('m', [(:attr, "h0m1"), (:attr, "m1m2")])
+    @test str[3] == Base.AnnotatedChar('m', [(:attr, "m1m2")])
+end
+
+@testset "Styling preservation" begin
+    str = Base.AnnotatedString("some string", [(1:4, :thing, 0x01), (1:11, :all, 0x03), (6:11, :other, 0x02)])
+    @test match(r".e", str).match == str[3:4]
+    @test  match(r"(.e)", str).captures == [str[3:4]]
+    let m0 = match(r"(.)e", str)
+        m1 = first(eachmatch(r"(.)e", str))
+        for f in fieldnames(RegexMatch)
+            @test getfield(m0, f) == getfield(m1, f)
+        end
+    end
+    @test lpad(str, 12) ==
+        Base.AnnotatedString(" some string", [(2:5, :thing, 0x01),
+                                      (2:12, :all, 0x03),
+                                      (7:12, :other, 0x02)])
+    @test rpad(str, 12) ==
+        Base.AnnotatedString("some string ", [(1:4, :thing, 0x01),
+                                      (1:11, :all, 0x03),
+                                      (6:11, :other, 0x02)])
+    str1 = Base.AnnotatedString("test", [(1:4, :label, 5)])
+    str2 = Base.AnnotatedString("case", [(2:3, :label, "oomph")])
+    @test join([str1, str1], ' ') ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label, 5),
+                      (6:9, :label, 5)])
+    @test join([str1, str1], Base.AnnotatedString(" ", [(1:1, :label, 2)])) ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label, 5),
+                      (5:5, :label, 2),
+                      (6:9, :label, 5)])
+    @test join((String(str1), str1), ' ') ==
+        Base.AnnotatedString("test test", [(6:9, :label, 5)])
+    @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label, 5)])
+    @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label, "oomph"),
+                                                       (6:7, :label, "oomph")])
+    @test repeat(str1[1], 3) == Base.AnnotatedString("ttt", [(1:3, :label, 5)])
+    @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label, 5)])
+    @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label, "oomph")])
+end
+
+@testset "Unicode" begin
+    for words in (["ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE", "Сodeunıts"],
+                  ["Сodeunıts", "ᲃase", "cɦɒnɡeȿ", "can", "CHⱯNGE"])
+        ann_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                     for (i, w) in enumerate(words)]
+        ann_str = join(ann_words, '-')
+        for transform in (lowercase, uppercase, titlecase)
+            t_words = map(transform, words)
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+        for transform in (uppercasefirst, lowercasefirst)
+            t_words = vcat(transform(first(words)), words[2:end])
+            ann_t_words = [Base.AnnotatedString(w, [(1:ncodeunits(w), :i, i)])
+                        for (i, w) in enumerate(t_words)]
+            ann_t_str = join(ann_t_words, '-')
+            t_ann_str = transform(ann_str)
+            @test String(ann_t_str) == String(t_ann_str)
+            @test Base.annotations(ann_t_str) == Base.annotations(t_ann_str)
+        end
+    end
+end
+
+@testset "AnnotatedIOBuffer" begin
+    aio = Base.AnnotatedIOBuffer()
+    vec2ann(v::Vector{<:Tuple}) = collect(Base.RegionAnnotation, v)
+    # Append-only writing
+    @test write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)])) == 5
+    @test write(aio, ' ') == 1
+    @test write(aio, Base.AnnotatedString("world", [(1:5, :tag, 2)])) == 5
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)])
+    # Check `annotate!`, including region sorting
+    @test truncate(aio, 0).io.size == 0
+    @test write(aio, "hello world") == ncodeunits("hello world")
+    @test Base.annotate!(aio, 1:5, :tag, 1) === aio
+    @test Base.annotate!(aio, 7:11, :tag, 2) === aio
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)])
+    # Reading
+    @test read(seekstart(deepcopy(aio.io)), String) == "hello world"
+    @test read(seekstart(deepcopy(aio)), String) == "hello world"
+    @test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag, 1), (7:11, :tag, 2)])
+    @test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag, 1), (6:10, :tag, 2)])
+    @test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag, 1), (3:7, :tag, 2)])
+    @test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag, 2)])
+    @test read(seekend(aio), Base.AnnotatedString) == Base.AnnotatedString("")
+    @test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag, 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag, 1)])
+    @test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag, 1), (7:7, :tag, 2)])
+    @test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [(:tag, 1)])
+    @test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', [])
+    @test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [(:tag, 2)])
+    # Check method compatibility with IOBuffer
+    @test position(aio) == 7
+    @test seek(aio, 4) === aio
+    @test skip(aio, 2) === aio
+    @test Base.annotations(copy(aio)) == Base.annotations(aio)
+    @test take!(copy(aio).io) == take!(copy(aio.io))
+    # Writing into the middle of the buffer
+    @test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice'
+    @test read(seekstart(aio), String) == "hello alice"
+    @test Base.annotations(aio) == vec2ann([(1:5, :tag, 1), (7:11, :tag, 2)]) # Should be unchanged
+    @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey, 'o')])) == 5
+    @test read(seekstart(aio), String) == "hey-o alice"
+    @test Base.annotations(aio) == vec2ann([(7:11, :tag, 2), (1:5, :hey, 'o')]) # First annotation should have been entirely replaced
+    @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey, 'a')])) == 3 # a[lic, bbi]e ('alice', 'abbie')
+    @test read(seekstart(aio), String) == "hey-o abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (1:5, :hey, 'o'), (8:10, :hey, 'a')])
+    @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
+    @test read(seekstart(aio), String) == "aby-o abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (3:5, :hey, 'o'), (8:10, :hey, 'a')])
+    @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
+    @test read(seekstart(aio), String) == "abyss abbie"
+    @test Base.annotations(aio) == vec2ann([(7:7, :tag, 2), (11:11, :tag, 2), (3:3, :hey, 'o'), (8:10, :hey, 'a')])
+    # Writing one buffer to another
+    newaio = Base.AnnotatedIOBuffer()
+    @test write(newaio, seekstart(aio)) == 11
+    @test read(seekstart(newaio), String) == "abyss abbie"
+    @test Base.annotations(newaio) == Base.annotations(aio)
+    @test write(seek(newaio, 5), seek(aio, 5)) == 6
+    @test sort(Base.annotations(newaio)) == sort(Base.annotations(aio))
+    @test write(newaio, seek(aio, 5)) == 6
+    @test read(seekstart(newaio), String) == "abyss abbie abbie"
+    @test sort(Base.annotations(newaio)) ==
+        sort(vcat(Base.annotations(aio), vec2ann([(13:13, :tag, 2), (14:16, :hey, 'a'), (17:17, :tag, 2)])))
+    # The `_insert_annotations!` cautious-merging optimisation
+    aio = Base.AnnotatedIOBuffer()
+    @test write(aio, Base.AnnotatedChar('a', [(:a, 1), (:b, 2)])) == 1
+    @test Base.annotations(aio) == vec2ann([(1:1, :a, 1), (1:1, :b, 2)])
+    @test write(aio, Base.AnnotatedChar('b', [(:a, 1), (:b, 2)])) == 1
+    @test Base.annotations(aio) == vec2ann([(1:2, :a, 1), (1:2, :b, 2)])
+    let aio2 = copy(aio) # A different start makes merging too risky to do.
+        @test write(aio2, Base.AnnotatedChar('c', [(:a, 0), (:b, 2)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:2, :b, 2), (3:3, :a, 0), (3:3, :b, 2)])
+    end
+    let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though.
+        @test write(aio2, Base.AnnotatedChar('c', [(:b, 2)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:3, :b, 2)])
+    end
+    let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over.
+        @test write(aio2, Base.AnnotatedChar('c', [(:b, 2), (:c, 3), (:d, 4)])) == 1
+        @test Base.annotations(aio2) == vec2ann([(1:2, :a, 1), (1:3, :b, 2), (3:3, :c, 3), (3:3, :d, 4)])
+    end
+    let aio2 = Base.AnnotatedIOBuffer()
+        @test write(aio2, Base.AnnotatedChar('a', [(:b, 1)])) == 1
+        @test write(aio2, Base.AnnotatedChar('b', [(:a, 1), (:b, 1)])) == 1
+        @test read(seekstart(aio2), Base.AnnotatedString) ==
+            Base.AnnotatedString("ab", [(1:1, :b, 1), (2:2, :a, 1), (2:2, :b, 1)])
+    end
+    # Working through an IOContext
+    aio = Base.AnnotatedIOBuffer()
+    wrapio = IOContext(aio)
+    @test write(wrapio, Base.AnnotatedString("hey", [(1:3, :x, 1)])) == 3
+    @test write(wrapio, Base.AnnotatedChar('a', [(:y, 2)])) == 1
+    @test read(seekstart(aio), Base.AnnotatedString) ==
+        Base.AnnotatedString("heya", [(1:3, :x, 1), (4:4, :y, 2)])
+    # show-ing an AnnotatedIOBuffer
+    aio = Base.AnnotatedIOBuffer()
+    write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)]))
+    @test sprint(show, aio) == "Base.AnnotatedIOBuffer(5 bytes, 1 annotation)"
+end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 13f2f5197187a..f90ce8c697ed8 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -203,6 +203,12 @@ end
         @test (@views (x[3], x[1:2], x[[1,4]])) isa Tuple{Char, SubString, String}
         @test (@views (x[3], x[1:2], x[[1,4]])) == ('c', "ab", "ad")
     end
+
+    @testset ":noshift constructor" begin
+        @test SubString("", 0, 0, Val(:noshift)) == ""
+        @test SubString("abcd", 0, 1, Val(:noshift)) == "a"
+        @test SubString("abcd", 0, 4, Val(:noshift)) == "abcd"
+    end
 end
 
 
@@ -337,9 +343,7 @@ end
     @test_throws StringIndexError get(utf8_str, 2, 'X')
 end
 
-#=
-# issue #7764
-let
+@testset "issue #7764" begin
     srep = repeat("Σβ",2)
     s="Σβ"
     ss=SubString(s,1,lastindex(s))
@@ -352,16 +356,15 @@ let
     @test iterate(srep, 7) == ('β',9)
 
     @test srep[7] == 'β'
-    @test_throws BoundsError srep[8]
+    @test_throws StringIndexError srep[8]
 end
-=#
 
 # This caused JuliaLang/JSON.jl#82
 @test first('\x00':'\x7f') === '\x00'
 @test last('\x00':'\x7f') === '\x7f'
 
-# make sure substrings do not accept code unit if it is not start of codepoint
-let s = "x\u0302"
+@testset "make sure substrings do not accept code unit if it is not start of codepoint" begin
+    s = "x\u0302"
     @test s[1:2] == s
     @test_throws BoundsError s[0:3]
     @test_throws BoundsError s[1:4]
@@ -427,6 +430,8 @@ end
 
     @test Symbol(gstr) === Symbol("12")
 
+    @test eltype(gstr) == Char
+    @test firstindex(gstr) == 1
     @test sizeof(gstr) == 2
     @test ncodeunits(gstr) == 2
     @test length(gstr) == 2
@@ -1070,8 +1075,8 @@ let s = "∀x∃y", u = codeunits(s)
     @test Base.elsize(u) == Base.elsize(typeof(u)) == 1
 end
 
-# issue #24388
-let v = unsafe_wrap(Vector{UInt8}, "abc")
+@testset "issue #24388" begin
+    v = unsafe_wrap(Vector{UInt8}, "abc")
     s = String(v)
     @test_throws BoundsError v[1]
     push!(v, UInt8('x'))
@@ -1087,6 +1092,17 @@ let v = [0x40,0x41,0x42]
     @test String(view(v, 2:3)) == "AB"
 end
 
+@testset "issue #54369" begin
+    v = Base.StringMemory(3)
+    v .= [0x41,0x42,0x43]
+    s = String(v)
+    @test s == "ABC"
+    @test v == [0x41,0x42,0x43]
+    v[1] = 0x43
+    @test s == "ABC"
+    @test v == [0x43,0x42,0x43]
+end
+
 # make sure length for identical String and AbstractString return the same value, PR #25533
 let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100))*"∀εa∀aε",
                                    String(rand(rng, UInt8, 200))]
@@ -1099,8 +1115,8 @@ let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100
     end
 end
 
-# conversion of SubString to the same type, issue #25525
-let x = SubString("ab", 1, 1)
+@testset "conversion of SubString to the same type, issue #25525" begin
+    x = SubString("ab", 1, 1)
     y = convert(SubString{String}, x)
     @test y === x
     chop("ab") === chop.(["ab"])[1]
@@ -1153,6 +1169,9 @@ end
     apple_uint8 = Vector{UInt8}("Apple")
     @test apple_uint8 == [0x41, 0x70, 0x70, 0x6c, 0x65]
 
+    apple_uint8 = Array{UInt8}("Apple")
+    @test apple_uint8 == [0x41, 0x70, 0x70, 0x6c, 0x65]
+
     Base.String(::tstStringType) = "Test"
     abstract_apple = tstStringType(apple_uint8)
     @test hash(abstract_apple, UInt(1)) == hash("Test", UInt(1))
@@ -1164,7 +1183,7 @@ end
     code_units = Base.CodeUnits("abc")
     @test Base.IndexStyle(Base.CodeUnits) == IndexLinear()
     @test Base.elsize(code_units) == sizeof(UInt8)
-    @test Base.unsafe_convert(Ptr{Int8}, code_units) == Base.unsafe_convert(Ptr{Int8}, code_units.s)
+    @test Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{UInt8}, code_units)) == Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{Int8}, code_units.s))
 end
 
 @testset "LazyString" begin
@@ -1178,6 +1197,7 @@ end
     @test codeunit(l) == UInt8
     @test codeunit(l,2) == 0x2b
     @test isvalid(l, 1)
+    @test lastindex(l) == lastindex("1+2")
     @test Base.infer_effects((Any,)) do a
         throw(lazy"a is $a")
     end |> Core.Compiler.is_foldable
@@ -1229,6 +1249,8 @@ end
         @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
     end
     @test_throws ArgumentError Symbol("a\0a")
+
+    @test Base._string_n_override == Base.encode_effects_override(Base.compute_assumed_settings((:total, :(!:consistent))))
 end
 
 @testset "Ensure UTF-8 DFA can never leave invalid state" begin
@@ -1382,3 +1404,39 @@ end
         end
     end
 end
+
+@testset "transcode" begin
+    # string starting with an ASCII character
+    str_1 = "zβγ"
+    # string starting with a 2 byte UTF-8 character
+    str_2 = "αβγ"
+    # string starting with a 3 byte UTF-8 character
+    str_3 = "आख"
+    # string starting with a 4 byte UTF-8 character
+    str_4 = "𒃵𒃰"
+    @testset for str in (str_1, str_2, str_3, str_4)
+        @test transcode(String, str) === str
+        @test transcode(String, transcode(UInt16, str)) == str
+        @test transcode(String, transcode(UInt16, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(Int32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt8, transcode(UInt16, str))) == str
+    end
+end
+
+if Sys.iswindows()
+    @testset "cwstring" begin
+        # empty string
+        str_0 = ""
+        # string with embedded NUL character
+        str_1 = "Au\000B"
+        # string with terminating NUL character
+        str_2 = "Wordu\000"
+        # "Regular" string with UTF-8 characters of differing byte counts
+        str_3 = "aܣ𒀀"
+        @test Base.cwstring(str_0) == UInt16[0x0000]
+        @test_throws ArgumentError Base.cwstring(str_1)
+        @test_throws ArgumentError Base.cwstring(str_2)
+        @test Base.cwstring(str_3) == UInt16[0x0061, 0x0723, 0xd808, 0xdc00, 0x0000]
+    end
+end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index aed1f800d4d49..209844580b3cd 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -156,6 +156,20 @@
         @test "aaa \\g \\n" == unescape_string(str, ['g', 'n'])
     end
     @test Base.escape_raw_string(raw"\"\\\"\\-\\") == "\\\"\\\\\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\") == "\`\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"\"\\\"\\-\\", '`') == "\"\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\", '`') == "\\\`\\\\\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"some`string") == "some`string"
+    @test Base.escape_raw_string(raw"some\"string", '`') == "some\"string"
+    @test Base.escape_raw_string(raw"some`string\\") == "some`string\\\\"
+    @test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
+    @test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
+    @test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
+
+    # ascii and fullhex flags:
+    @test escape_string("\u00e4\u00f6\u00fc") == "\u00e4\u00f6\u00fc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true) == "\\ue4\\uf6\\ufc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true, fullhex=true) == "\\u00e4\\u00f6\\u00fc"
 end
 @testset "join()" begin
     @test join([]) == join([],",") == ""
@@ -321,3 +335,17 @@ end
     # test empty args
     @test string() == ""
 end
+
+module StringsIOStringReturnTypesTestModule
+    struct S end
+    Base.joinpath(::S) = S()
+end
+
+@testset "`string` return types" begin
+    @test all(T -> T <: AbstractString, Base.return_types(string))
+end
+
+@testset "type stable `join` (#55389)" begin
+    itr = ("foo" for _ in 1:100)
+    @test Base.return_types(join, (typeof(itr),))[] == String
+end
diff --git a/test/strings/search.jl b/test/strings/search.jl
index d328168bfa466..c43327fe2971b 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -4,389 +4,409 @@
 astr = "Hello, world.\n"
 u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
 
-# I think these should give error on 4 also, and "" is not treated
-# consistently with SubString("",1,1), nor with Char[]
-for ind in (0, 5)
-    @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
-    @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
-end
+@testset "BoundsError for findnext/findprev" begin
+    # I think these should give error on 4 also, and "" is not treated
+    # consistently with SubString("",1,1), nor with Char[]
+    for ind in (0, 5)
+        @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
+        @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
+    end
 
-# Note: the commented out test will be enabled after fixes to make
-# sure that findnext/findprev are consistent
-# no matter what type of AbstractString the second argument is
-@test_throws BoundsError findnext(isequal('a'), "foo", 0)
-@test_throws BoundsError findnext(in(Char[]), "foo", 5)
-# @test_throws BoundsError findprev(in(Char[]), "foo", 0)
-@test_throws BoundsError findprev(in(Char[]), "foo", 5)
+    # Note: the commented out test will be enabled after fixes to make
+    # sure that findnext/findprev are consistent
+    # no matter what type of AbstractString the second argument is
+    @test_throws BoundsError findnext(isequal('a'), "foo", 0)
+    @test_throws BoundsError findnext(in(Char[]), "foo", 5)
+    # @test_throws BoundsError findprev(in(Char[]), "foo", 0)
+    @test_throws BoundsError findprev(in(Char[]), "foo", 5)
 
-# @test_throws ErrorException in("foobar","bar")
-@test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+    # @test_throws ErrorException in("foobar","bar")
+    @test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+end
 
-# ascii forward search
-for str in [astr, GenericString(astr)]
+@testset "ascii forward search $(typeof(str))" for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('x'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∀'), str) == nothing
+    @test findfirst(isequal('x'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∀'), str) === nothing
     @test findfirst(isequal('H'), str) == 1
     @test findfirst(isequal('l'), str) == 3
     @test findnext(isequal('l'), str, 4) == 4
     @test findnext(isequal('l'), str, 5) == 11
-    @test findnext(isequal('l'), str, 12) == nothing
+    @test findnext(isequal('l'), str, 12) === nothing
     @test findfirst(isequal(','), str) == 6
-    @test findnext(isequal(','), str, 7) == nothing
+    @test findnext(isequal(','), str, 7) === nothing
     @test findfirst(isequal('\n'), str) == 14
-    @test findnext(isequal('\n'), str, 15) == nothing
+    @test findnext(isequal('\n'), str, 15) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
-end
 
-for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext('z', str, 0)
     @test_throws BoundsError findnext('∀', str, 0)
-    @test findfirst('x', str) == nothing
-    @test findfirst('\0', str) == nothing
-    @test findfirst('\u80', str) == nothing
-    @test findfirst('∀', str) == nothing
+    @test findfirst('x', str) === nothing
+    @test findfirst('\0', str) === nothing
+    @test findfirst('\u80', str) === nothing
+    @test findfirst('∀', str) === nothing
     @test findfirst('H', str) == 1
     @test findfirst('l', str) == 3
     @test findfirst('e', str) == 2
-    @test findfirst('u', str) == nothing
+    @test findfirst('u', str) === nothing
     @test findnext('l', str, 4) == 4
     @test findnext('l', str, 5) == 11
-    @test findnext('l', str, 12) == nothing
+    @test findnext('l', str, 12) === nothing
     @test findfirst(',', str) == 6
-    @test findnext(',', str, 7) == nothing
+    @test findnext(',', str, 7) === nothing
     @test findfirst('\n', str) == 14
-    @test findnext('\n', str, 15) == nothing
+    @test findnext('\n', str, 15) === nothing
     @test_throws BoundsError findnext('ε', str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext('a', str, nextind(str,lastindex(str))+1)
 end
 
-# ascii backward search
-for str in [astr]
-    @test findlast(isequal('x'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∀'), str) == nothing
+@testset "ascii backward search" begin
+    str = astr
+    @test findlast(isequal('x'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∀'), str) === nothing
     @test findlast(isequal('H'), str) == 1
-    @test findprev(isequal('H'), str, 0) == nothing
+    @test findprev(isequal('H'), str, 0) === nothing
     @test findlast(isequal('l'), str) == 11
     @test findprev(isequal('l'), str, 5) == 4
     @test findprev(isequal('l'), str, 4) == 4
     @test findprev(isequal('l'), str, 3) == 3
-    @test findprev(isequal('l'), str, 2) == nothing
+    @test findprev(isequal('l'), str, 2) === nothing
     @test findlast(isequal(','), str) == 6
-    @test findprev(isequal(','), str, 5) == nothing
+    @test findprev(isequal(','), str, 5) === nothing
     @test findlast(isequal('\n'), str) == 14
-end
 
-for str in [astr]
-    @test findlast('x', str) == nothing
-    @test findlast('\0', str) == nothing
-    @test findlast('\u80', str) == nothing
-    @test findlast('∀', str) == nothing
+    @test findlast('x', str) === nothing
+    @test findlast('\0', str) === nothing
+    @test findlast('\u80', str) === nothing
+    @test findlast('∀', str) === nothing
     @test findlast('H', str) == 1
-    @test findprev('H', str, 0) == nothing
+    @test findprev('H', str, 0) === nothing
     @test findlast('l', str) == 11
     @test findprev('l', str, 5) == 4
     @test findprev('l', str, 4) == 4
     @test findprev('l', str, 3) == 3
-    @test findprev('l', str, 2) == nothing
+    @test findprev('l', str, 2) === nothing
     @test findlast(',', str) == 6
-    @test findprev(',', str, 5) == nothing
-    @test findlast(str, "") == nothing
-    @test findlast(str^2, str) == nothing
+    @test findprev(',', str, 5) === nothing
+    @test findlast(str, "") === nothing
+    @test findlast(str^2, str) === nothing
     @test findlast('\n', str) == 14
 end
 
-# utf-8 forward search
-for str in (u8str, GenericString(u8str))
+@testset "utf-8 forward search $(typeof(str))" for str in (u8str, GenericString(u8str))
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('z'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∄'), str) == nothing
+    @test findfirst(isequal('z'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∄'), str) === nothing
     @test findfirst(isequal('∀'), str) == 1
     @test_throws StringIndexError findnext(isequal('∀'), str, 2)
-    @test findnext(isequal('∀'), str, 4) == nothing
+    @test findnext(isequal('∀'), str, 4) === nothing
     @test findfirst(isequal('∃'), str) == 13
     @test_throws StringIndexError findnext(isequal('∃'), str, 15)
-    @test findnext(isequal('∃'), str, 16) == nothing
+    @test findnext(isequal('∃'), str, 16) === nothing
     @test findfirst(isequal('x'), str) == 26
     @test findnext(isequal('x'), str, 27) == 43
-    @test findnext(isequal('x'), str, 44) == nothing
+    @test findnext(isequal('x'), str, 44) === nothing
     @test findfirst(isequal('δ'), str) == 17
     @test_throws StringIndexError findnext(isequal('δ'), str, 18)
     @test findnext(isequal('δ'), str, nextind(str,17)) == 33
-    @test findnext(isequal('δ'), str, nextind(str,33)) == nothing
+    @test findnext(isequal('δ'), str, nextind(str,33)) === nothing
     @test findfirst(isequal('ε'), str) == 5
     @test findnext(isequal('ε'), str, nextind(str,5)) == 54
-    @test findnext(isequal('ε'), str, nextind(str,54)) == nothing
-    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) == nothing
-    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) == nothing
+    @test findnext(isequal('ε'), str, nextind(str,54)) === nothing
+    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) === nothing
+    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
 end
 
-# utf-8 backward search
-for str in [u8str]
-    @test findlast(isequal('z'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∄'), str) == nothing
+@testset "utf-8 backward search" begin
+    str = u8str
+    @test findlast(isequal('z'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∄'), str) === nothing
     @test findlast(isequal('∀'), str) == 1
-    @test findprev(isequal('∀'), str, 0) == nothing
+    @test findprev(isequal('∀'), str, 0) === nothing
     @test findlast(isequal('∃'), str) == 13
     @test findprev(isequal('∃'), str, 14) == 13
     @test findprev(isequal('∃'), str, 13) == 13
-    @test findprev(isequal('∃'), str, 12) == nothing
+    @test findprev(isequal('∃'), str, 12) === nothing
     @test findlast(isequal('x'), str) == 43
     @test findprev(isequal('x'), str, 42) == 26
-    @test findprev(isequal('x'), str, 25) == nothing
+    @test findprev(isequal('x'), str, 25) === nothing
     @test findlast(isequal('δ'), str) == 33
     @test findprev(isequal('δ'), str, 32) == 17
-    @test findprev(isequal('δ'), str, 16) == nothing
+    @test findprev(isequal('δ'), str, 16) === nothing
     @test findlast(isequal('ε'), str) == 54
     @test findprev(isequal('ε'), str, 53) == 5
-    @test findprev(isequal('ε'), str, 4) == nothing
+    @test findprev(isequal('ε'), str, 4) === nothing
+end
+
+@testset "string forward search with a single-char string" begin
+    @test findfirst("x", astr) === nothing
+    @test findfirst("H", astr) == 1:1
+    @test findnext("H", astr, 2) === nothing
+    @test findfirst("l", astr) == 3:3
+    @test findnext("l", astr, 4) == 4:4
+    @test findnext("l", astr, 5) == 11:11
+    @test findnext("l", astr, 12) === nothing
+    @test findfirst("\n", astr) == 14:14
+    @test findnext("\n", astr, 15) === nothing
+
+    @test findfirst("z", u8str) === nothing
+    @test findfirst("∄", u8str) === nothing
+    @test findfirst("∀", u8str) == 1:1
+    @test findnext("∀", u8str, 4) === nothing
+    @test findfirst("∃", u8str) == 13:13
+    @test findnext("∃", u8str, 16) === nothing
+    @test findfirst("x", u8str) == 26:26
+    @test findnext("x", u8str, 27) == 43:43
+    @test findnext("x", u8str, 44) === nothing
+    @test findfirst("ε", u8str) == 5:5
+    @test findnext("ε", u8str, 7) == 54:54
+    @test findnext("ε", u8str, 56) === nothing
 end
 
-# string forward search with a single-char string
-@test findfirst("x", astr) == nothing
-@test findfirst("H", astr) == 1:1
-@test findnext("H", astr, 2) == nothing
-@test findfirst("l", astr) == 3:3
-@test findnext("l", astr, 4) == 4:4
-@test findnext("l", astr, 5) == 11:11
-@test findnext("l", astr, 12) == nothing
-@test findfirst("\n", astr) == 14:14
-@test findnext("\n", astr, 15) == nothing
-
-@test findfirst("z", u8str) == nothing
-@test findfirst("∄", u8str) == nothing
-@test findfirst("∀", u8str) == 1:1
-@test findnext("∀", u8str, 4) == nothing
-@test findfirst("∃", u8str) == 13:13
-@test findnext("∃", u8str, 16) == nothing
-@test findfirst("x", u8str) == 26:26
-@test findnext("x", u8str, 27) == 43:43
-@test findnext("x", u8str, 44) == nothing
-@test findfirst("ε", u8str) == 5:5
-@test findnext("ε", u8str, 7) == 54:54
-@test findnext("ε", u8str, 56) == nothing
-
-# strifindprev  backward search with a single-char string
-@test findlast("x", astr) == nothing
-@test findlast("H", astr) == 1:1
-@test findprev("H", astr, 2) == 1:1
-@test findprev("H", astr, 0) == nothing
-@test findlast("l", astr) == 11:11
-@test findprev("l", astr, 10) == 4:4
-@test findprev("l", astr, 4) == 4:4
-@test findprev("l", astr, 3) == 3:3
-@test findprev("l", astr, 2) == nothing
-@test findlast("\n", astr) == 14:14
-@test findprev("\n", astr, 13) == nothing
-
-@test findlast("z", u8str) == nothing
-@test findlast("∄", u8str) == nothing
-@test findlast("∀", u8str) == 1:1
-@test findprev("∀", u8str, 0) == nothing
-#TODO: setting the limit in the middle of a wide char
-#      makes findnext fail but findprev succeed.
-#      Should findprev fail as well?
-#@test findprev("∀", u8str, 2) == nothing # gives 1:3
-@test findlast("∃", u8str) == 13:13
-@test findprev("∃", u8str, 12) == nothing
-@test findlast("x", u8str) == 43:43
-@test findprev("x", u8str, 42) == 26:26
-@test findprev("x", u8str, 25) == nothing
-@test findlast("ε", u8str) == 54:54
-@test findprev("ε", u8str, 53) == 5:5
-@test findprev("ε", u8str, 4) == nothing
-
-# string forward search with a single-char regex
-@test findfirst(r"x", astr) == nothing
-@test findfirst(r"H", astr) == 1:1
-@test findnext(r"H", astr, 2) == nothing
-@test findfirst(r"l", astr) == 3:3
-@test findnext(r"l", astr, 4) == 4:4
-@test findnext(r"l", astr, 5) == 11:11
-@test findnext(r"l", astr, 12) == nothing
-@test findfirst(r"\n", astr) == 14:14
-@test findnext(r"\n", astr, 15) == nothing
-@test findfirst(r"z", u8str) == nothing
-@test findfirst(r"∄", u8str) == nothing
-@test findfirst(r"∀", u8str) == 1:1
-@test findnext(r"∀", u8str, 4) == nothing
-@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
-@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
-@test findfirst(r"∃", u8str) == 13:13
-@test findnext(r"∃", u8str, 16) == nothing
-@test findfirst(r"x", u8str) == 26:26
-@test findnext(r"x", u8str, 27) == 43:43
-@test findnext(r"x", u8str, 44) == nothing
-@test findfirst(r"ε", u8str) == 5:5
-@test findnext(r"ε", u8str, 7) == 54:54
-@test findnext(r"ε", u8str, 56) == nothing
-for i = 1:lastindex(astr)
-    @test findnext(r"."s, astr, i) == i:i
+@testset "findprev backward search with a single-char string" begin
+    @test findlast("x", astr) === nothing
+    @test findlast("H", astr) == 1:1
+    @test findprev("H", astr, 2) == 1:1
+    @test findprev("H", astr, 0) === nothing
+    @test findlast("l", astr) == 11:11
+    @test findprev("l", astr, 10) == 4:4
+    @test findprev("l", astr, 4) == 4:4
+    @test findprev("l", astr, 3) == 3:3
+    @test findprev("l", astr, 2) === nothing
+    @test findlast("\n", astr) == 14:14
+    @test findprev("\n", astr, 13) === nothing
+
+    @test findlast("z", u8str) === nothing
+    @test findlast("∄", u8str) === nothing
+    @test findlast("∀", u8str) == 1:1
+    @test findprev("∀", u8str, 0) === nothing
+    #TODO: setting the limit in the middle of a wide char
+    #      makes findnext fail but findprev succeed.
+    #      Should findprev fail as well?
+    #@test findprev("∀", u8str, 2) === nothing # gives 1:3
+    @test findlast("∃", u8str) == 13:13
+    @test findprev("∃", u8str, 12) === nothing
+    @test findlast("x", u8str) == 43:43
+    @test findprev("x", u8str, 42) == 26:26
+    @test findprev("x", u8str, 25) === nothing
+    @test findlast("ε", u8str) == 54:54
+    @test findprev("ε", u8str, 53) == 5:5
+    @test findprev("ε", u8str, 4) === nothing
 end
-for i = 1:lastindex(u8str)
-    if isvalid(u8str,i)
-        @test findnext(r"."s, u8str, i) == i:i
+
+@testset "string forward search with a single-char regex" begin
+    @test findfirst(r"x", astr) === nothing
+    @test findfirst(r"H", astr) == 1:1
+    @test findnext(r"H", astr, 2) === nothing
+    @test findfirst(r"l", astr) == 3:3
+    @test findnext(r"l", astr, 4) == 4:4
+    @test findnext(r"l", astr, 5) == 11:11
+    @test findnext(r"l", astr, 12) === nothing
+    @test findfirst(r"\n", astr) == 14:14
+    @test findnext(r"\n", astr, 15) === nothing
+    @test findfirst(r"z", u8str) === nothing
+    @test findfirst(r"∄", u8str) === nothing
+    @test findfirst(r"∀", u8str) == 1:1
+    @test findnext(r"∀", u8str, 4) === nothing
+    @test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
+    @test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
+    @test findfirst(r"∃", u8str) == 13:13
+    @test findnext(r"∃", u8str, 16) === nothing
+    @test findfirst(r"x", u8str) == 26:26
+    @test findnext(r"x", u8str, 27) == 43:43
+    @test findnext(r"x", u8str, 44) === nothing
+    @test findfirst(r"ε", u8str) == 5:5
+    @test findnext(r"ε", u8str, 7) == 54:54
+    @test findnext(r"ε", u8str, 56) === nothing
+    for i = 1:lastindex(astr)
+        @test findnext(r"."s, astr, i) == i:i
+    end
+    for i = 1:lastindex(u8str)
+        if isvalid(u8str,i)
+            @test findnext(r"."s, u8str, i) == i:i
+        end
     end
 end
 
-# string forward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findnext("", astr, i) == i:i-1
+@testset "string forward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findnext("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findnext("", u8str, i) == i:i-1
+    end
+    @test findfirst("", "") === 1:0
 end
-for i = 1:lastindex(u8str)
-    @test findnext("", u8str, i) == i:i-1
+
+@testset "string backward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findprev("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findprev("", u8str, i) == i:i-1
+    end
+    @test findlast("", "") === 1:0
 end
-@test findfirst("", "") === 1:0
 
-# string backward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findprev("", astr, i) == i:i-1
+@testset "string forward search with a zero-char regex" begin
+    for i = 1:lastindex(astr)
+        @test findnext(r"", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        # TODO: should regex search fast-forward invalid indices?
+        if isvalid(u8str,i)
+            @test findnext(r"", u8str, i) == i:i-1
+        end
+    end
 end
-for i = 1:lastindex(u8str)
-    @test findprev("", u8str, i) == i:i-1
+
+# See the comments in #54579
+@testset "Search for invalid chars" begin
+    @test findfirst(==('\xff'), "abc\xffde") == 4
+    @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4
+    @test isnothing(findfirst(==('\xff'), "abcdeæd"))
+
+    @test isnothing(findnext(==('\xa6'), "æ", 1))
+    @test isnothing(findprev(==('\xa6'), "æa", 2))
 end
-@test findlast("", "") === 1:0
 
-# string forward search with a zero-char regex
-for i = 1:lastindex(astr)
-    @test findnext(r"", astr, i) == i:i-1
+@testset "string forward search with a two-char string literal" begin
+    @test findfirst("xx", "foo,bar,baz") === nothing
+    @test findfirst("fo", "foo,bar,baz") == 1:2
+    @test findnext("fo", "foo,bar,baz", 3) === nothing
+    @test findfirst("oo", "foo,bar,baz") == 2:3
+    @test findnext("oo", "foo,bar,baz", 4) === nothing
+    @test findfirst("o,", "foo,bar,baz") == 3:4
+    @test findnext("o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(",b", "foo,bar,baz") == 4:5
+    @test findnext(",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(",b", "foo,bar,baz", 10) === nothing
+    @test findfirst("az", "foo,bar,baz") == 10:11
+    @test findnext("az", "foo,bar,baz", 12) === nothing
 end
-for i = 1:lastindex(u8str)
-    # TODO: should regex search fast-forward invalid indices?
-    if isvalid(u8str,i)
-        @test findnext(r"", u8str, i) == i:i-1
-    end
+
+@testset "issue #9365" begin
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "ééé") == 1:3
+    @test findnext("éé", "ééé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€€") == 1:4
+    @test findnext("€€", "€€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
+
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "éé") == 1:3
+    @test findnext("éé", "éé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€") == 1:4
+    @test findnext("€€", "€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "ééé") == 3:5
+    @test findprev("éé", "ééé", lastindex("ééé")) == 3:5
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€€") == 4:7
+    @test findprev("€€", "€€€", lastindex("€€€")) == 4:7
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "éé") == 1:3        # should really be 1:4!
+    @test findprev("éé", "éé", lastindex("ééé")) == 1:3
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€") == 1:4        # should really be 1:6!
+    @test findprev("€€", "€€", lastindex("€€€")) == 1:4
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
+end
+
+@testset "string backward search with a two-char string literal" begin
+    @test findlast("xx", "foo,bar,baz") === nothing
+    @test findlast("fo", "foo,bar,baz") == 1:2
+    @test findprev("fo", "foo,bar,baz", 1) === nothing
+    @test findlast("oo", "foo,bar,baz") == 2:3
+    @test findprev("oo", "foo,bar,baz", 2) === nothing
+    @test findlast("o,", "foo,bar,baz") == 3:4
+    @test findprev("o,", "foo,bar,baz", 1) === nothing
+    @test findlast(",b", "foo,bar,baz") == 8:9
+    @test findprev(",b", "foo,bar,baz", 6) == 4:5
+    @test findprev(",b", "foo,bar,baz", 3) === nothing
+    @test findlast("az", "foo,bar,baz") == 10:11
+    @test findprev("az", "foo,bar,baz", 10) === nothing
+end
+
+@testset "string search with a two-char regex" begin
+    @test findfirst(r"xx", "foo,bar,baz") === nothing
+    @test findfirst(r"fo", "foo,bar,baz") == 1:2
+    @test findnext(r"fo", "foo,bar,baz", 3) === nothing
+    @test findfirst(r"oo", "foo,bar,baz") == 2:3
+    @test findnext(r"oo", "foo,bar,baz", 4) === nothing
+    @test findfirst(r"o,", "foo,bar,baz") == 3:4
+    @test findnext(r"o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(r",b", "foo,bar,baz") == 4:5
+    @test findnext(r",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(r",b", "foo,bar,baz", 10) === nothing
+    @test findfirst(r"az", "foo,bar,baz") == 10:11
+    @test findnext(r"az", "foo,bar,baz", 12) === nothing
+end
+
+@testset "occursin/contains" begin
+    # occursin with a String and Char needle
+    @test occursin("o", "foo")
+    @test occursin('o', "foo")
+    # occursin in curried form
+    @test occursin("foo")("o")
+    @test occursin("foo")('o')
+
+    # contains
+    @test contains("foo", "o")
+    @test contains("foo", 'o')
+    # contains in curried form
+    @test contains("o")("foo")
+    @test contains('o')("foo")
+
+    @test_throws ErrorException "ab" ∈ "abc"
+end
+
+@testset "issue #15723" begin
+    @test findfirst(isequal('('), "⨳(") == 4
+    @test findnext(isequal('('), "(⨳(", 2) == 5
+    @test findlast(isequal('('), "(⨳(") == 5
+    @test findprev(isequal('('), "(⨳(", 2) == 1
+
+    @test @inferred findall(isequal('a'), "éa") == [3]
+    @test @inferred findall(isequal('€'), "€€") == [1, 4]
+    @test @inferred isempty(findall(isequal('é'), ""))
+end
+
+
+@testset "issue #18109" begin
+    s_18109 = "fooα🐨βcd3"
+    @test findlast(isequal('o'), s_18109) == 3
+    @test findfirst(isequal('d'), s_18109) == 13
 end
 
-# string forward search with a two-char string literal
-@test findfirst("xx", "foo,bar,baz") == nothing
-@test findfirst("fo", "foo,bar,baz") == 1:2
-@test findnext("fo", "foo,bar,baz", 3) == nothing
-@test findfirst("oo", "foo,bar,baz") == 2:3
-@test findnext("oo", "foo,bar,baz", 4) == nothing
-@test findfirst("o,", "foo,bar,baz") == 3:4
-@test findnext("o,", "foo,bar,baz", 5) == nothing
-@test findfirst(",b", "foo,bar,baz") == 4:5
-@test findnext(",b", "foo,bar,baz", 6) == 8:9
-@test findnext(",b", "foo,bar,baz", 10) == nothing
-@test findfirst("az", "foo,bar,baz") == 10:11
-@test findnext("az", "foo,bar,baz", 12) == nothing
-
-# issue #9365
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "ééé") == 1:3
-@test findnext("éé", "ééé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€€") == 1:4
-@test findnext("€€", "€€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
-
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "éé") == 1:3
-@test findnext("éé", "éé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€") == 1:4
-@test findnext("€€", "€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "ééé") == 3:5
-@test findprev("éé", "ééé", lastindex("ééé")) == 3:5
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€€") == 4:7
-@test findprev("€€", "€€€", lastindex("€€€")) == 4:7
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "éé") == 1:3        # should really be 1:4!
-@test findprev("éé", "éé", lastindex("ééé")) == 1:3
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€") == 1:4        # should really be 1:6!
-@test findprev("€€", "€€", lastindex("€€€")) == 1:4
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
-
-# string backward search with a two-char string literal
-@test findlast("xx", "foo,bar,baz") == nothing
-@test findlast("fo", "foo,bar,baz") == 1:2
-@test findprev("fo", "foo,bar,baz", 1) == nothing
-@test findlast("oo", "foo,bar,baz") == 2:3
-@test findprev("oo", "foo,bar,baz", 2) == nothing
-@test findlast("o,", "foo,bar,baz") == 3:4
-@test findprev("o,", "foo,bar,baz", 1) == nothing
-@test findlast(",b", "foo,bar,baz") == 8:9
-@test findprev(",b", "foo,bar,baz", 6) == 4:5
-@test findprev(",b", "foo,bar,baz", 3) == nothing
-@test findlast("az", "foo,bar,baz") == 10:11
-@test findprev("az", "foo,bar,baz", 10) == nothing
-
-# string search with a two-char regex
-@test findfirst(r"xx", "foo,bar,baz") == nothing
-@test findfirst(r"fo", "foo,bar,baz") == 1:2
-@test findnext(r"fo", "foo,bar,baz", 3) == nothing
-@test findfirst(r"oo", "foo,bar,baz") == 2:3
-@test findnext(r"oo", "foo,bar,baz", 4) == nothing
-@test findfirst(r"o,", "foo,bar,baz") == 3:4
-@test findnext(r"o,", "foo,bar,baz", 5) == nothing
-@test findfirst(r",b", "foo,bar,baz") == 4:5
-@test findnext(r",b", "foo,bar,baz", 6) == 8:9
-@test findnext(r",b", "foo,bar,baz", 10) == nothing
-@test findfirst(r"az", "foo,bar,baz") == 10:11
-@test findnext(r"az", "foo,bar,baz", 12) == nothing
-
-# occursin with a String and Char needle
-@test occursin("o", "foo")
-@test occursin('o', "foo")
-# occursin in curried form
-@test occursin("foo")("o")
-@test occursin("foo")('o')
-
-# contains
-@test contains("foo", "o")
-@test contains("foo", 'o')
-# contains in curried form
-@test contains("o")("foo")
-@test contains('o')("foo")
-
-@test_throws ErrorException "ab" ∈ "abc"
-
-# issue #15723
-@test findfirst(isequal('('), "⨳(") == 4
-@test findnext(isequal('('), "(⨳(", 2) == 5
-@test findlast(isequal('('), "(⨳(") == 5
-@test findprev(isequal('('), "(⨳(", 2) == 1
-
-@test @inferred findall(isequal('a'), "éa") == [3]
-@test @inferred findall(isequal('€'), "€€") == [1, 4]
-@test @inferred isempty(findall(isequal('é'), ""))
-
-# issue #18109
-s_18109 = "fooα🐨βcd3"
-@test findlast(isequal('o'), s_18109) == 3
-@test findfirst(isequal('d'), s_18109) == 13
-
-# findall (issue #31788)
-@testset "findall" begin
+@testset "findall (issue #31788)" begin
     @test findall("fooo", "foo") == UnitRange{Int}[]
     @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25]
     @test all(findall("", "foo") .=== [1:0, 2:1, 3:2, 4:3]) # use === to compare empty ranges
@@ -395,13 +415,29 @@ s_18109 = "fooα🐨βcd3"
     @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6]
 end
 
+@testset "Findall char in string" begin
+    @test findall(==('w'), "wabcwewwawk") == [1, 5, 7, 8, 10]
+    @test isempty(findall(isequal("w"), "abcde!,"))
+    @test findall(==('读'), "联国读大会一九四二月十读日第号决通过并颁布读") == [7, 34, 64]
+
+    # Empty string
+    @test isempty(findall(isequal('K'), ""))
+    @test isempty(findall(isequal('α'), ""))
+
+    # Finds an invalid char ONLY if it's at a char boundary in the string,
+    # i.e. iterating the string would emit the given char.
+    @test findall(==('\xfe'), "abκæøc\xfeα\xfeβå!") == [10, 13]
+    @test isempty(findall(==('\xaf'), "abκæ读α\xe8\xaf\xfeβå!"))
+    @test isempty(findall(==('\xc3'), ";æ"))
+end
+
 # issue 37280
 @testset "UInt8, Int8 vector" begin
     for T in [Int8, UInt8], VT in [Int8, UInt8]
         A = T[0x40, 0x52, 0x00, 0x52, 0x00]
 
         for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
-            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) === nothing
             @test findfirst(VT[0x52], A) === 2:2
             @test findfirst(==(VT(0x52)), A) === 2
             @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
@@ -429,6 +465,45 @@ end
             @test_throws BoundsError findprev(pattern, A, -3)
         end
     end
+
+    @test findall([0x01, 0x02], [0x03, 0x01, 0x02, 0x01, 0x02, 0x06]) == [2:3, 4:5]
+    @test isempty(findall([0x04, 0x05], [0x03, 0x04, 0x06]))
+end
+
+# Issue 54578
+@testset "No conflation of Int8 and UInt8" begin
+    # Work for mixed types if the values are the same
+    @test findfirst(==(Int8(1)), [0x01]) == 1
+    @test findnext(iszero, Int8[0, -2, 0, -3], 2) == 3
+    @test findfirst(Int8[1,4], UInt8[0, 2, 4, 1, 8, 1, 4, 2]) == 6:7
+    @test findprev(UInt8[5, 6], Int8[1, 9, 2, 5, 6, 3], 6) == 4:5
+
+    # Returns nothing for the same methods if the values are different,
+    # even if the bitpatterns are the same
+    @test isnothing(findfirst(==(Int8(-1)), [0xff]))
+    @test isnothing(findnext(isequal(0xff), Int8[-1, -2, -1], 2))
+    @test isnothing(findfirst(UInt8[0xff, 0xfe], Int8[0, -1, -2, 1, 8, 1, 4, 2]))
+    @test isnothing(findprev(UInt8[0xff, 0xfe], Int8[1, 9, 2, -1, -2, 3], 6))
+end
+
+@testset "DenseArray with offsets" begin
+    isdefined(Main, :OffsetDenseArrays) || @eval Main include("../testhelpers/OffsetDenseArrays.jl")
+    OffsetDenseArrays = Main.OffsetDenseArrays
+
+    A = OffsetDenseArrays.OffsetDenseArray(collect(0x61:0x69), 100)
+    @test findfirst(==(0x61), A) == 101
+    @test findlast(==(0x61), A) == 101
+    @test findfirst(==(0x00), A) === nothing
+
+    @test findfirst([0x62, 0x63, 0x64], A) == 102:104
+    @test findlast([0x63, 0x64], A) == 103:104
+    @test findall([0x62, 0x63], A) == [102:103]
+
+    @test findfirst(iszero, A) === nothing
+    A = OffsetDenseArrays.OffsetDenseArray([0x01, 0x02, 0x00, 0x03], -100)
+    @test findfirst(iszero, A) == -97
+    @test findnext(==(0x02), A, -99) == -98
+    @test findnext(==(0x02), A, -97) === nothing
 end
 
 # issue 32568
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 771be253b1ec9..c09652c3a608d 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -2,196 +2,211 @@
 
 ## SubString and Cstring tests ##
 
-## SubString tests ##
-u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-u8str2 = u8str^2
-len_u8str = length(u8str)
-slen_u8str = length(u8str)
-len_u8str2 = length(u8str2)
-slen_u8str2 = length(u8str2)
-
-@test len_u8str2 == 2 * len_u8str
-@test slen_u8str2 == 2 * slen_u8str
-
-u8str2plain = String(u8str2)
-
-for i1 = 1:length(u8str2)
-    if !isvalid(u8str2, i1); continue; end
-    for i2 = i1:length(u8str2)
-        if !isvalid(u8str2, i2); continue; end
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test u8str2[i1:i2] == u8str2plain[i1:i2]
+@testset "SubString" begin
+    u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+    u8str2 = u8str^2
+    len_u8str = length(u8str)
+    slen_u8str = length(u8str)
+    len_u8str2 = length(u8str2)
+    slen_u8str2 = length(u8str2)
+
+    @test len_u8str2 == 2 * len_u8str
+    @test slen_u8str2 == 2 * slen_u8str
+
+    u8str2plain = String(u8str2)
+    @test !isascii(u8str2)
+    @test cmp(u8str2, u8str^3) == -1
+    @test cmp(u8str2, u8str2)  == 0
+    @test cmp(u8str^3, u8str2) == 1
+    @test codeunit(u8str2) == codeunit(u8str2plain)
+
+    @test convert(Union{String, SubString{String}}, u8str2)      === u8str2
+    @test convert(Union{String, SubString{String}}, u8str2plain) === u8str2plain
+
+    for i1 = 1:ncodeunits(u8str2)
+        if !isvalid(u8str2, i1); continue; end
+        for i2 = i1:ncodeunits(u8str2)
+            if !isvalid(u8str2, i2); continue; end
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test u8str2[i1:i2] == u8str2plain[i1:i2]
+        end
     end
-end
 
-# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
-# gives the same result as `getindex` (except that it is a view not a copy)
-for idx in 0:1
-    @test SubString("∀", 1, idx) == "∀"[1:idx]
-end
+    # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
+    # gives the same result as `getindex` (except that it is a view not a copy)
+    for idx in 0:1
+        @test SubString("∀", 1, idx) == "∀"[1:idx]
+    end
 
-# Substring provided with invalid end index throws BoundsError
-@test_throws StringIndexError SubString("∀", 1, 2)
-@test_throws StringIndexError SubString("∀", 1, 3)
-@test_throws BoundsError SubString("∀", 1, 4)
-
-# Substring provided with invalid start index throws BoundsError
-@test SubString("∀∀", 1:1) == "∀"
-@test SubString("∀∀", 1:4) == "∀∀"
-@test SubString("∀∀", 4:4) == "∀"
-@test_throws StringIndexError SubString("∀∀", 1:2)
-@test_throws StringIndexError SubString("∀∀", 1:5)
-@test_throws StringIndexError SubString("∀∀", 2:4)
-@test_throws BoundsError SubString("∀∀", 0:1)
-@test_throws BoundsError SubString("∀∀", 0:4)
-@test_throws BoundsError SubString("∀∀", 1:7)
-@test_throws BoundsError SubString("∀∀", 4:7)
-
-# tests for SubString of more than one multibyte `Char` string
-# we are consistent with `getindex` for `String`
-for idx in [0, 1, 4]
-    @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
-    @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
-end
+    @testset "invalid end index" begin
+        # Substring provided with invalid end index throws BoundsError
+        @test_throws StringIndexError SubString("∀", 1, 2)
+        @test_throws StringIndexError SubString("∀", 1, 3)
+        @test_throws BoundsError SubString("∀", 1, 4)
+    end
 
-# index beyond lastindex("∀∀")
-for idx in [2:3; 5:6]
-    @test_throws StringIndexError SubString("∀∀", 1, idx)
-end
-for idx in 7:8
-    @test_throws BoundsError SubString("∀∀", 1, idx)
-end
+    @testset "invalid start index" begin
+        # Substring provided with invalid start index throws BoundsError
+        @test SubString("∀∀", 1:1) == "∀"
+        @test SubString("∀∀", 1:4) == "∀∀"
+        @test SubString("∀∀", 4:4) == "∀"
+        @test_throws StringIndexError SubString("∀∀", 1:2)
+        @test_throws StringIndexError SubString("∀∀", 1:5)
+        @test_throws StringIndexError SubString("∀∀", 2:4)
+        @test_throws BoundsError SubString("∀∀", 0:1)
+        @test_throws BoundsError SubString("∀∀", 0:4)
+        @test_throws BoundsError SubString("∀∀", 1:7)
+        @test_throws BoundsError SubString("∀∀", 4:7)
+    end
 
-let str="tempus fugit"              #length(str)==12
-    ss=SubString(str,1,lastindex(str)) #match source string
-    @test length(ss)==length(str)
+    # tests for SubString of more than one multibyte `Char` string
+    # we are consistent with `getindex` for `String`
+    for idx in [0, 1, 4]
+        @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
+        @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
+    end
 
-    ss=SubString(str,1:lastindex(str))
-    @test length(ss)==length(str)
+    @testset "index beyond lastindex(\"∀∀\")" begin
+        for idx in [2:3; 5:6]
+            @test_throws StringIndexError SubString("∀∀", 1, idx)
+        end
+        for idx in 7:8
+            @test_throws BoundsError SubString("∀∀", 1, idx)
+        end
+    end
 
-    ss=SubString(str,1,0)    #empty SubString
-    @test length(ss)==0
+    let str="tempus fugit"              #length(str)==12
+        ss=SubString(str,1,lastindex(str)) #match source string
+        @test length(ss)==length(str)
 
-    ss=SubString(str,1:0)
-    @test length(ss)==0
+        ss=SubString(str,1:lastindex(str))
+        @test length(ss)==length(str)
 
-    @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
-    @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
+        ss=SubString(str,1,0)    #empty SubString
+        @test length(ss)==0
 
-    @test_throws BoundsError SubString("", 1, 4)  #empty source string
-    @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
-    @test_throws BoundsError SubString("", 10, 12)
-    @test SubString("", 12, 10) == ""
-end
+        ss=SubString(str,1:0)
+        @test length(ss)==0
 
-@test SubString("foobar", big(1), big(3)) == "foo"
-
-let str = "aa\u2200\u2222bb"
-    u = SubString(str, 3, 6)
-    @test length(u) == 2
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == "\u2200\u2222"
-
-    @test_throws StringIndexError SubString(str, 4, 5)
-    @test_throws BoundsError iterate(u, 0)
-    @test_throws BoundsError iterate(u, 8)
-    @test_throws BoundsError getindex(u, 0)
-    @test_throws BoundsError getindex(u, 7)
-    @test_throws BoundsError getindex(u, 0:1)
-    @test_throws BoundsError getindex(u, 7:7)
-    @test reverseind(u, 1) == 4
-    @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
-    @test Base.cconvert(Ptr{Int8}, u) == u
-end
+        @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
+        @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
 
-let str = "føøbar"
-    @test_throws BoundsError SubString(str, 10, 10)
-    u = SubString(str, 4, 3)
-    @test length(u) == 0
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == ""
-end
+        @test_throws BoundsError SubString("", 1, 4)  #empty source string
+        @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
+        @test_throws BoundsError SubString("", 10, 12)
+        @test SubString("", 12, 10) == ""
+    end
 
-# search and SubString (issue #5679)
-let str = "Hello, world!"
-    u = SubString(str, 1, 5)
-    @test findlast("World", u) == nothing
-    @test findlast(isequal('z'), u) == nothing
-    @test findlast("ll", u) == 3:4
-end
+    @test SubString("foobar", big(1), big(3)) == "foo"
+
+    let str = "aa\u2200\u2222bb"
+        u = SubString(str, 3, 6)
+        @test length(u) == 2
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == "\u2200\u2222"
+
+        @test_throws StringIndexError SubString(str, 4, 5)
+        @test_throws BoundsError iterate(u, 0)
+        @test_throws BoundsError iterate(u, 8)
+        @test_throws BoundsError getindex(u, 0)
+        @test_throws BoundsError getindex(u, 7)
+        @test_throws BoundsError getindex(u, 0:1)
+        @test_throws BoundsError getindex(u, 7:7)
+        @test reverseind(u, 1) == 4
+        @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
+        @test Base.cconvert(Ptr{Int8}, u) == u
+    end
 
-# SubString created from SubString
-let str = "Hello, world!"
-    u = SubString(str, 2, 5)
-    for idx in 1:4
-        @test SubString(u, 2, idx) == u[2:idx]
-        @test SubString(u, 2:idx) == u[2:idx]
+    let str = "føøbar"
+        @test_throws BoundsError SubString(str, 10, 10)
+        u = SubString(str, 4, 3)
+        @test length(u) == 0
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == ""
     end
-    @test_throws BoundsError SubString(u, 1, 10)
-    @test_throws BoundsError SubString(u, 1:10)
-    @test_throws BoundsError SubString(u, 20:30)
-    @test SubString(u, 20:15) == ""
-    @test_throws BoundsError SubString(u, -1:10)
-    @test SubString(u, -1, -10) == ""
-    @test SubString(SubString("123", 1, 2), -10, -20) == ""
-end
 
-# sizeof
-@test sizeof(SubString("abc\u2222def",4,4)) == 3
-
-# issue #3710
-@test prevind(SubString("{var}",2,4),4) == 3
-
-# issue #4183
-@test split(SubString("x", 2, 0), "y") == [""]
-
-# issue #6772
-@test parse(Float64, SubString("10",1,1)) === 1.0
-@test parse(Float64, SubString("1 0",1,1)) === 1.0
-@test parse(Float32, SubString("10",1,1)) === 1.0f0
-
-# issue #5870
-@test !occursin(Regex("aa"), SubString("",1,0))
-@test occursin(Regex(""), SubString("",1,0))
-
-# isvalid, length, prevind, nextind for SubString{String}
-let s = "lorem ipsum", sdict = Dict(
-    SubString(s, 1, 11)  => "lorem ipsum",
-    SubString(s, 1, 6)   => "lorem ",
-    SubString(s, 1, 0)   => "",
-    SubString(s, 2, 4)   => "ore",
-    SubString(s, 2, 11)  => "orem ipsum",
-    SubString(s, 15, 14) => "",
-)
-    for (ss, s) in sdict
-        @test ncodeunits(ss) == ncodeunits(s)
-        for i in -2:13
-            @test isvalid(ss, i) == isvalid(s, i)
-        end
-        for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
-            @test length(ss, i, j) == length(s, i, j)
+    @testset "search and SubString (issue #5679)" begin
+        str = "Hello, world!"
+        u = SubString(str, 1, 5)
+        @test findlast("World", u) === nothing
+        @test findlast(isequal('z'), u) === nothing
+        @test findlast("ll", u) == 3:4
+    end
+
+    @testset "SubString created from SubString" begin
+        str = "Hello, world!"
+        u = SubString(str, 2, 5)
+        for idx in 1:4
+            @test SubString(u, 2, idx) == u[2:idx]
+            @test SubString(u, 2:idx) == u[2:idx]
         end
+        @test_throws BoundsError SubString(u, 1, 10)
+        @test_throws BoundsError SubString(u, 1:10)
+        @test_throws BoundsError SubString(u, 20:30)
+        @test SubString(u, 20:15) == ""
+        @test_throws BoundsError SubString(u, -1:10)
+        @test SubString(u, -1, -10) == ""
+        @test SubString(SubString("123", 1, 2), -10, -20) == ""
+    end
+
+    # sizeof
+    @test sizeof(SubString("abc\u2222def",4,4)) == 3
+
+    # issue #3710
+    @test prevind(SubString("{var}",2,4),4) == 3
+
+    # issue #4183
+    @test split(SubString("x", 2, 0), "y") == [""]
+
+    @testset "issue #6772" begin
+        @test parse(Float64, SubString("10",1,1)) === 1.0
+        @test parse(Float64, SubString("1 0",1,1)) === 1.0
+        @test parse(Float32, SubString("10",1,1)) === 1.0f0
     end
-    for (ss, s) in sdict
-        @test length(ss) == length(s)
-        for i in 0:ncodeunits(ss), j = 0:length(ss)+1
-            @test prevind(ss, i+1, j) == prevind(s, i+1, j)
-            @test nextind(ss, i, j) == nextind(s, i, j)
+
+    @testset "issue #5870" begin
+        @test !occursin(Regex("aa"), SubString("",1,0))
+        @test occursin(Regex(""), SubString("",1,0))
+    end
+    @testset" isvalid, length, prevind, nextind for SubString{String}" begin
+        s = "lorem ipsum"
+        sdict = Dict(
+            SubString(s, 1, 11)  => "lorem ipsum",
+            SubString(s, 1, 6)   => "lorem ",
+            SubString(s, 1, 0)   => "",
+            SubString(s, 2, 4)   => "ore",
+            SubString(s, 2, 11)  => "orem ipsum",
+            SubString(s, 15, 14) => "",
+        )
+        for (ss, s) in sdict
+            @test ncodeunits(ss) == ncodeunits(s)
+            for i in -2:13
+                @test isvalid(ss, i) == isvalid(s, i)
+            end
+            for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
+                @test length(ss, i, j) == length(s, i, j)
+            end
+        end
+        for (ss, s) in sdict
+            @test length(ss) == length(s)
+            for i in 0:ncodeunits(ss), j = 0:length(ss)+1
+                @test prevind(ss, i+1, j) == prevind(s, i+1, j)
+                @test nextind(ss, i, j) == nextind(s, i, j)
+            end
+            @test_throws BoundsError prevind(s, 0)
+            @test_throws BoundsError prevind(ss, 0)
+            @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
+            @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
         end
-        @test_throws BoundsError prevind(s, 0)
-        @test_throws BoundsError prevind(ss, 0)
-        @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
-        @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
     end
-end
 
-# proper nextind/prevind/thisind for SubString{String}
-let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
+    rng = MersenneTwister(1)
+    strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
                                       String(rand(rng, UInt8, 50))]
-    for s in strs
+    @testset "proper nextind/prevind/thisind for SubString{String}: $(repr(s))" for s in strs
         a = 0
         while a <= ncodeunits(s)
             a = nextind(s, a)
@@ -223,111 +238,115 @@ let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"
             end
         end
     end
-end
 
-# for isvalid(SubString{String})
-let s = "Σx + βz - 2"
-    for i in -1:ncodeunits(s)+2
-        if checkbounds(Bool, s, i)
-            if isvalid(s, i)
-                ss = SubString(s, 1, i)
-                for j = 1:ncodeunits(ss)
-                    @test isvalid(ss, j) == isvalid(s, j)
+    # for isvalid(SubString{String})
+    let s = "Σx + βz - 2"
+        for i in -1:ncodeunits(s)+2
+            if checkbounds(Bool, s, i)
+                if isvalid(s, i)
+                    ss = SubString(s, 1, i)
+                    for j = 1:ncodeunits(ss)
+                        @test isvalid(ss, j) == isvalid(s, j)
+                    end
+                else
+                    @test_throws StringIndexError SubString(s, 1, i)
                 end
+            elseif i > 0
+                @test_throws BoundsError SubString(s, 1, i)
             else
-                @test_throws StringIndexError SubString(s, 1, i)
+                @test SubString(s, 1, i) == ""
             end
-        elseif i > 0
-            @test_throws BoundsError SubString(s, 1, i)
-        else
-            @test SubString(s, 1, i) == ""
         end
     end
-end
 
-let ss = SubString("hello", 1, 5)
-    @test length(ss, 1, 0) == 0
-    @test_throws BoundsError length(ss, 1, -1)
-    @test_throws BoundsError length(ss, 1, 6)
-    @test_throws BoundsError length(ss, 1, 10)
-    @test_throws BoundsError prevind(ss, 0, 1)
-    @test prevind(ss, 1, 1) == 0
-    @test prevind(ss, 6, 1) == 5
-    @test_throws BoundsError prevind(ss, 7, 1)
-    @test_throws BoundsError nextind(ss, -1, 1)
-    @test nextind(ss, 0, 1) == 1
-    @test nextind(ss, 5, 1) == 6
-    @test_throws BoundsError nextind(ss, 6, 1)
-end
+    let ss = SubString("hello", 1, 5)
+        @test length(ss, 1, 0) == 0
+        @test_throws BoundsError length(ss, 1, -1)
+        @test_throws BoundsError length(ss, 1, 6)
+        @test_throws BoundsError length(ss, 1, 10)
+        @test_throws BoundsError prevind(ss, 0, 1)
+        @test prevind(ss, 1, 1) == 0
+        @test prevind(ss, 6, 1) == 5
+        @test_throws BoundsError prevind(ss, 7, 1)
+        @test_throws BoundsError nextind(ss, -1, 1)
+        @test nextind(ss, 0, 1) == 1
+        @test nextind(ss, 5, 1) == 6
+        @test_throws BoundsError nextind(ss, 6, 1)
+    end
 
-# length(SubString{String}) performance specialization
-let s = "|η(α)-ϕ(κ)| < ε"
-    @test length(SubString(s, 1, 0)) == length(s[1:0])
-    @test length(SubString(s, 4, 4)) == length(s[4:4])
-    @test length(SubString(s, 1, 7)) == length(s[1:7])
-    @test length(SubString(s, 4, 11)) == length(s[4:11])
-end
+    # length(SubString{String}) performance specialization
+    let s = "|η(α)-ϕ(κ)| < ε"
+        @test length(SubString(s, 1, 0)) == length(s[1:0])
+        @test length(SubString(s, 4, 4)) == length(s[4:4])
+        @test length(SubString(s, 1, 7)) == length(s[1:7])
+        @test length(SubString(s, 4, 11)) == length(s[4:11])
+    end
 
-@testset "reverseind" for T in (String, SubString, GenericString)
-    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
-        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
-            for c in ('X', 'δ', '\U0001d6a5')
-                s = convert(T, string(prefix, c, suffix))
-                r = reverse(s)
-                ri = findfirst(isequal(c), r)
-                @test c == s[reverseind(s, ri)] == r[ri]
-                s = convert(T, string(prefix, prefix, c, suffix, suffix))
-                pre = convert(T, prefix)
-                sb = SubString(s, nextind(pre, lastindex(pre)),
-                               lastindex(convert(T, string(prefix, prefix, c, suffix))))
-                r = reverse(sb)
-                ri = findfirst(isequal(c), r)
-                @test c == sb[reverseind(sb, ri)] == r[ri]
+    @testset "reverseind" for T in (String, SubString, GenericString)
+        for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
+            for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
+                for c in ('X', 'δ', '\U0001d6a5')
+                    s = convert(T, string(prefix, c, suffix))
+                    r = reverse(s)
+                    ri = findfirst(isequal(c), r)
+                    @test c == s[reverseind(s, ri)] == r[ri]
+                    s = convert(T, string(prefix, prefix, c, suffix, suffix))
+                    pre = convert(T, prefix)
+                    sb = SubString(s, nextind(pre, lastindex(pre)),
+                                   lastindex(convert(T, string(prefix, prefix, c, suffix))))
+                    r = reverse(sb)
+                    ri = findfirst(isequal(c), r)
+                    @test c == sb[reverseind(sb, ri)] == r[ri]
+                end
             end
         end
     end
-end
 
-@testset "reverseind of empty strings" begin
-    for s in ("",
-              SubString("", 1, 0),
-              SubString("ab", 1, 0),
-              SubString("ab", 2, 1),
-              SubString("ab", 3, 2),
-              GenericString(""))
-        @test reverseind(s, 0) == 1
-        @test reverseind(s, 1) == 0
+    @testset "reverseind of empty strings" begin
+        for s in ("",
+                  SubString("", 1, 0),
+                  SubString("ab", 1, 0),
+                  SubString("ab", 2, 1),
+                  SubString("ab", 3, 2),
+                  GenericString(""))
+            @test reverseind(s, 0) == 1
+            @test reverseind(s, 1) == 0
+        end
     end
 end
 
-## Cstring tests ##
-
-# issue #13974: comparison against pointers
-let
-    str = String("foobar")
-    ptr = pointer(str)
-    cstring = Cstring(ptr)
-    @test ptr == cstring
-    @test cstring == ptr
-
-    # convenient NULL string creation from Ptr{Cvoid}
-    nullstr = Cstring(C_NULL)
-
-    # Comparisons against NULL strings
-    @test ptr != nullstr
-    @test nullstr != ptr
+@testset "Cstring" begin
+    @testset "issue #13974: comparison against pointers" begin
+        str = String("foobar")
+        ptr = pointer(str)
+        cstring = Cstring(ptr)
+        @test ptr == cstring
+        @test cstring == ptr
+
+        # convenient NULL string creation from Ptr{Cvoid}
+        nullstr = Cstring(C_NULL)
+
+        # Comparisons against NULL strings
+        @test ptr != nullstr
+        @test nullstr != ptr
+
+        # Short-hand comparison against C_NULL
+        @test nullstr == C_NULL
+        @test C_NULL == nullstr
+        @test cstring != C_NULL
+        @test C_NULL != cstring
+    end
 
-    # Short-hand comparison against C_NULL
-    @test nullstr == C_NULL
-    @test C_NULL == nullstr
-    @test cstring != C_NULL
-    @test C_NULL != cstring
+    @testset "issue #31381: eltype(Cstring) != Cchar" begin
+        s = Cstring(C_NULL)
+        @test eltype(Cstring) == Cchar
+        @test eltype(s) == Cchar
+        @test pointer(s) isa Ptr{Cchar}
+    end
 end
 
-# issue #31381: eltype(Cstring) != Cchar
-let
-    s = Cstring(C_NULL)
-    @test eltype(Cstring) == Cchar
-    @test eltype(s) == Cchar
-    @test pointer(s) isa Ptr{Cchar}
+@testset "Codeunits" begin
+    s = "I'm a string!"
+    @test codeunit(s) == UInt8
+    @test codeunit(s, Int8(1)) == codeunit(s, 1)
 end
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 5218310c5c1c7..bb87881bbaa1d 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -2,6 +2,20 @@
 
 SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
 
+@testset "textwidth" begin
+    for (c, w) in [('x', 1), ('α', 1), ('🍕', 2), ('\0', 0), ('\u0302', 0), ('\xc0', 1)]
+        @test textwidth(c) == w
+        @test textwidth(c^3) == w*3
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+    for i in 0x00:0x7f # test all ASCII chars (which have fast path)
+        w = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+        c = Char(i)
+        @test textwidth(c) == w
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+end
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -51,6 +65,59 @@ SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
     @test rpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
     @test lpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
     @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
+    for pad in (rpad, lpad), p in ('\0', "\0", "\0\0", "\u302")
+        if ncodeunits(p) == 1
+            @test_throws r".*has zero textwidth.*maybe you want.*bytes.*" pad("foo", 10, p)
+        else
+            @test_throws r".*has zero textwidth$" pad("foo", 10, p)
+        end
+    end
+end
+
+@testset "string truncation (ltruncate, rtruncate, ctruncate)" begin
+    @test ltruncate("foo", 4) == "foo"
+    @test ltruncate("foo", 3) == "foo"
+    @test ltruncate("foo", 2) == "…o"
+    @test ltruncate("🍕🍕 I love 🍕", 10) == "…I love 🍕" # handle wide emojis
+    @test ltruncate("🍕🍕 I love 🍕", 10, "[…]") == "[…]love 🍕"
+    # when the replacement string is longer than the trunc
+    # trust that the user wants the replacement string rather than erroring
+    @test ltruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred ltruncate("xxx", 4)
+    @inferred ltruncate("xxx", 2)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test rtruncate("foo", 4) == "foo"
+    @test rtruncate("foo", 3) == "foo"
+    @test rtruncate("foo", 2) == "f…"
+    @test rtruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 I lo…"
+    @test rtruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕 I […]"
+    @test rtruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred rtruncate("xxx", 4)
+    @inferred rtruncate("xxx", 2)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test ctruncate("foo", 4) == "foo"
+    @test ctruncate("foo", 3) == "foo"
+    @test ctruncate("foo", 2) == "f…"
+    @test ctruncate("foo", 2; prefer_left=true) == "f…"
+    @test ctruncate("foo", 2; prefer_left=false) == "…o"
+    @test ctruncate("foobar", 6) == "foobar"
+    @test ctruncate("foobar", 5) == "fo…ar"
+    @test ctruncate("foobar", 4) == "fo…r"
+    @test ctruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 …e 🍕"
+    @test ctruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕[…] 🍕"
+    @test ctruncate("abc", 2, "xxxxxx") == "xxxxxx"
+    @test ctruncate("🍕🍕🍕🍕🍕🍕xxxxxxxxxxx", 9) == "🍕🍕…xxxx"
+
+    @inferred ctruncate("xxxxx", 5)
+    @inferred ctruncate("xxxxx", 3)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 5)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 3)
 end
 
 # string manipulation
@@ -89,6 +156,10 @@ end
     @test rstrip(isnumeric, "abc0123") == "abc"
     @test lstrip("ello", ['e','o']) == "llo"
     @test rstrip("ello", ['e','o']) == "ell"
+
+    @test_throws ArgumentError strip("", "")
+    @test_throws ArgumentError lstrip("", "")
+    @test_throws ArgumentError rstrip("", "")
 end
 
 @testset "partition" begin
@@ -208,6 +279,28 @@ end
     @test split("α β γ", "β") == rsplit("α β γ", "β") == ["α "," γ"]
 end
 
+@testset "eachrsplit" begin
+    @test collect(eachrsplit("", 'a')) == [""]
+    @test collect(eachrsplit("", isspace; limit=3)) == [""]
+    @test collect(eachrsplit("b c  d"; limit=2)) == ["d", "b c "]
+    @test collect(eachrsplit("a.b.c", '.'; limit=1)) == ["a.b.c"]
+    @test collect(eachrsplit("a..b..c", '.')) == ["c", "", "b", "", "a"]
+    @test collect(eachrsplit("ax  b  c")) == ["c", "b", "ax"]
+    @test collect(eachrsplit(" a 12 4 v ", isnumeric)) == [" v ", " ", "", " a "]
+    @test collect(eachrsplit("ba", 'a')) == ["", "b"]
+    @test collect(eachrsplit("   ")) == []
+    @test collect(eachrsplit("aaaa", 'a'; keepempty=false)) == []
+    @test collect(eachrsplit("aaaa", 'a'; limit=2)) == ["", "aaa"]
+    @test collect(eachrsplit("abcdef", ['b', 'e'])) == ["f", "cd", "a"]
+    @test collect(eachrsplit("abc", isletter)) == ["", "", "", ""]
+
+    # This behaviour is quite surprising, but is consistent with split
+    # See issue 45916
+    @test collect(eachrsplit("a  b"; limit=2)) == ["b", "a "] # only one trailing space
+    @test collect(eachrsplit("a "; limit=1)) == ["a "]
+    @test collect(eachrsplit("  a  b  c  d"; limit=3)) == ["d", "c", "  a  b "]
+end
+
 @testset "replace" begin
     @test replace("\u2202", '*' => '\0') == "\u2202"
 
@@ -333,6 +426,28 @@ end
     # Issue 36953
     @test replace("abc", "" => "_", count=1) == "_abc"
 
+    # tests for io::IO API (in addition to internals exercised above):
+    let buf = IOBuffer()
+        replace(buf, "aaa", 'a' => 'z', count=0)
+        replace(buf, "aaa", 'a' => 'z', count=1)
+        replace(buf, "bbb", 'a' => 'z')
+        replace(buf, "aaa", 'a' => 'z')
+        @test String(take!(buf)) == "aaazaabbbzzz"
+    end
+    let tempfile = tempname()
+        try
+            open(tempfile, "w") do f
+                replace(f, "aaa", 'a' => 'z', count=0)
+                replace(f, "aaa", 'a' => 'z', count=1)
+                replace(f, "bbb", 'a' => 'z')
+                replace(f, "aaa", 'a' => 'z')
+                print(f, "\n")
+            end
+            @test read(tempfile, String) == "aaazaabbbzzz\n"
+        finally
+            rm(tempfile, force=true)
+        end
+    end
 end
 
 @testset "replace many" begin
diff --git a/test/subarray.jl b/test/subarray.jl
index e22c1394cbfc2..a462224e7643a 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -275,9 +275,6 @@ end
 # with the exception of Int-slicing
 oindex = (:, 6, 3:7, reshape([12]), [8,4,6,12,5,7], [3:7 1:5 2:6 4:8 5:9], reshape(2:11, 2, 5))
 
-_ndims(::AbstractArray{T,N}) where {T,N} = N
-_ndims(x) = 1
-
 if testfull
     let B = copy(reshape(1:13^3, 13, 13, 13))
         @testset "full tests: ($o1,$o2,$o3)" for o3 in oindex, o2 in oindex, o1 in oindex
@@ -342,6 +339,7 @@ end
     A = copy(reshape(1:120, 3, 5, 8))
     sA = view(A, 2:2, 1:5, :)
     @test @inferred(strides(sA)) == (1, 3, 15)
+    @test IndexStyle(sA) == IndexStyle(typeof(sA)) == IndexCartesian()
     @test parent(sA) == A
     @test parentindices(sA) == (2:2, 1:5, Base.Slice(1:8))
     @test size(sA) == (1, 5, 8)
@@ -468,6 +466,113 @@ end
     @test sA[[1 2 4 4; 6 1 1 4]] == [34 35 38 38; 50 34 34 38]
 end
 
+@testset "fast linear indexing with AbstractUnitRange or Colon indices" begin
+    @testset "getindex" begin
+        @testset "1D" begin
+            for a1 in Any[1:5, [1:5;]]
+                b1 = @view a1[:]; # FastContiguousSubArray
+                c1 = @view a1[eachindex(a1)]; # FastContiguousSubArray
+                d1 = @view a1[begin:1:end]; # FastSubArray
+
+                ax1 = eachindex(a1);
+                @test b1[ax1] == c1[ax1] == d1[ax1] == a1[ax1]
+                @test b1[:] == c1[:] == d1[:] == a1[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c1 = @view a1[inds1]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c1 = @view a1[inds12]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds12]
+
+                inds2 = 3:2:5
+                d1 = @view a1[inds2]
+                @test d1[axes(d1,1)] == d1[:] == a1[inds2]
+            end
+        end
+
+        @testset "2D" begin
+            a2_ = reshape(1:25, 5, 5)
+            for a2 in Any[a2_, collect(a2_)]
+                b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+                b22 = @view a2[:]; # 1D FastContiguousSubArray
+                c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+                d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+                ax2 = eachindex(a2);
+                @test b2[ax2] == b22[ax2] == c2[ax2] == d2[ax2] == a2[ax2]
+                @test b2[:] == b22[:] == c2[:] == d2[:] == a2[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c2 = @view a2[inds1]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c2 = @view a2[inds12]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds12]
+
+                inds2 = 2:2:4
+                d2 = @view a2[inds2];
+                @test d2[axes(d2,1)] == d2[:] == a2[inds2]
+            end
+        end
+    end
+    @testset "setindex!" begin
+        @testset "1D" begin
+            a1 = rand(10);
+            a12 = copy(a1);
+            b1 = @view a1[:]; # 1D FastContiguousSubArray
+            c1 = @view a1[eachindex(a1)]; # 1D FastContiguousSubArray
+            d1 = @view a1[begin:1:end]; # 1D FastSubArray
+
+            ax1 = eachindex(a1);
+            @test (b1[ax1] = a12; b1) == (c1[ax1] = a12; c1) == (d1[ax1] = a12; d1) == (a1[ax1] = a12; a1)
+            @test (b1[:] = a12; b1) == (c1[:] = a12; c1) == (d1[:] = a12; d1) == (a1[:] = a12; a1)
+
+            # some arbitrary indices
+            ind1 = 2:4
+            c1 = a12[ind1]
+            @test (c1[axes(c1,1)] = a12[ind1]; c1) == (c1[:] = a12[ind1]; c1) == a12[ind1]
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c1 = @view a1[inds1]
+            @test (c1[eachindex(c1)] = @view(a12[inds1]); c1) == @view(a12[inds1])
+
+            ind2 = 2:2:8
+            d1 = a12[ind2]
+            @test (d1[axes(d1,1)] = a12[ind2]; d1) == (d1[:] = a12[ind2]; d1) == a12[ind2]
+        end
+
+        @testset "2D" begin
+            a2 = rand(10, 10);
+            a22 = copy(a2);
+            a2v = vec(a22);
+            b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+            c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+            d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+            @test (b2[eachindex(b2)] = a2v; vec(b2)) == (c2[eachindex(c2)] = a2v; c2) == a2v
+            @test (d2[eachindex(d2)] = a2v; d2) == a2v
+
+            # some arbitrary indices
+            inds1 = 3:9
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds2 = 3:3:9
+            d2 = @view a2[inds2]
+            @test (d2[eachindex(d2)] = @view(a22[inds2]); d2) == @view(a22[inds2])
+        end
+    end
+end
+
 @testset "issue #11871" begin
     a = fill(1., (2,2))
     b = view(a, 1:2, 1:2)
@@ -533,6 +638,44 @@ end
         @test foo == [X, X]
     end
 
+    # Test as an assignment's left hand side
+    let x = [1,2,3,4]
+        @test Meta.@lower(@view(x[1]) = 1).head == :error
+        @test Meta.@lower(@view(x[1]) += 1).head == :error
+        @test Meta.@lower(@view(x[end]) = 1).head == :error
+        @test Meta.@lower(@view(x[end]) += 1).head == :error
+        @test Meta.@lower(@view(f(x)[end]) = 1).head == :error
+        @test Meta.@lower(@view(f(x)[end]) += 1).head == :error
+        @test (@view(x[1]) .+= 1) == fill(2)
+        @test x == [2,2,3,4]
+        @test (@view(reshape(x,2,2)[1,1]) .+= 10) == fill(12)
+        @test x == [12,2,3,4]
+        @test (@view(x[end]) .+= 1) == fill(5)
+        @test x == [12,2,3,5]
+        @test (@view(reshape(x,2,2)[end]) .+= 10) == fill(15)
+        @test x == [12,2,3,15]
+        @test (@view(reshape(x,2,2)[[begin],[begin,end]])::AbstractMatrix{Int} .+= [2]) == [14 5]
+        @test x == [14,2,5,15]
+
+        x = [1,2,3,4]
+        @test Meta.@lower(@views(x[[1]]) = 1).head == :error
+        @test Meta.@lower(@views(x[[1]]) += 1).head == :error
+        @test Meta.@lower(@views(x[[end]]) = 1).head == :error
+        @test Meta.@lower(@views(x[[end]]) += 1).head == :error
+        @test Meta.@lower(@views(f(x)[end]) = 1).head == :error
+        @test Meta.@lower(@views(f(x)[end]) += 1).head == :error
+        @test (@views(x[[1]]) .+= 1) == [2]
+        @test x == [2,2,3,4]
+        @test (@views(reshape(x,2,2)[[1],1]) .+= 10) == [12]
+        @test x == [12,2,3,4]
+        @test (@views(x[[end]]) .+= 1) == [5]
+        @test x == [12,2,3,5]
+        @test (@views(reshape(x,2,2)[[end]]) .+= 10) == [15]
+        @test x == [12,2,3,15]
+        @test (@views(reshape(x,2,2)[[begin],[begin,end]])::AbstractMatrix{Int} .+= [2]) == [14 5]
+        @test x == [14,2,5,15]
+    end
+
     # test @views macro
     @views let f!(x) = x[begin:end-1] .+= x[begin+1:end].^2
         x = [1,2,3,4]
@@ -559,6 +702,12 @@ end
         @test x == [5,8,12,9] && i == [4,3]
         @. x[3:end] = 0       # make sure @. works with end expressions in @views
         @test x == [5,8,0,0]
+        x[begin:end] .+= 1
+        @test x == [6,9,1,1]
+        x[[begin,2,end]] .-= [1,2,3]
+        @test x == [5,7,1,-2]
+        @. x[[begin,2,end]] .+= [1,2,3]
+        @test x == [6,9,1,1]
     end
     @views @test isa(X[1:3], SubArray)
     @test X[begin:end] == @views X[begin:end]
@@ -663,8 +812,40 @@ end
 @testset "unaliascopy trimming; Issue #26263" begin
     A = rand(5,5,5,5)
     V = view(A, 2:5, :, 2:5, 1:2:5)
-    @test @inferred(Base.unaliascopy(V)) == V == A[2:5, :, 2:5, 1:2:5]
-    @test @inferred(sum(Base.unaliascopy(V))) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V′.parent) == size(V)
+    @test V′::typeof(V) == V == A[2:5, :, 2:5, 1:2:5]
+    @test @inferred(sum(V′)) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V = view(A, Base.IdentityUnitRange(2:4), :, Base.StepRangeLen(1,1,3), 1:2:5)
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V.parent) != size(V′.parent)
+    @test V′ == V && V′ isa typeof(V)
+    i1 = collect(CartesianIndices((2:5)))
+    i2 = [CartesianIndex(), CartesianIndex()]
+    i3 = collect(CartesianIndices((2:5, 1:2:5)))
+    V = view(A, i1, 1:5, i2, i3)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i2, i3]
+    V = view(A, i1, 1:5, i3, i2)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i3, i2]
+
+    @testset "custom ranges" begin
+        struct MyStepRange{T} <: OrdinalRange{T,T}
+            r::StepRange{T,T}
+        end
+
+        for f in (:first, :last, :step, :length, :size)
+            @eval Base.$f(r::MyStepRange) = $f(r.r)
+        end
+        Base.getindex(r::MyStepRange, i::Int) = r.r[i]
+
+        a = rand(6)
+        V = view(a, MyStepRange(2:2:4))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+
+        # empty range
+        V = view(a, MyStepRange(2:2:1))
+        @test @inferred(Base.unaliascopy(V))::typeof(V) == V
+    end
 end
 
 @testset "issue #27632" begin
@@ -762,9 +943,9 @@ end
 
 @testset "issue #41221: view(::Vector, :, 1)" begin
     v = randn(3)
-    @test view(v,:,1) == v
-    @test parent(view(v,:,1)) === v
-    @test parent(view(v,2:3,1,1)) === v
+    @test @inferred(view(v,:,1)) == v
+    @test parent(@inferred(view(v,:,1))) === v
+    @test parent(@inferred(view(v,2:3,1,1))) === v
     @test_throws BoundsError view(v,:,2)
     @test_throws BoundsError view(v,:,1,2)
 
@@ -772,3 +953,148 @@ end
     @test view(m, 1:2, 3, 1, 1) == m[1:2, 3]
     @test parent(view(m, 1:2, 3, 1, 1)) === m
 end
+
+@testset "issue #53209: avoid invalid elimination of singleton indices" begin
+    A = randn(4,5)
+    @test A[CartesianIndices(()), :, 3] == @inferred(view(A, CartesianIndices(()), :, 3))
+    @test parent(@inferred(view(A, :, 3, 1, CartesianIndices(()), 1))) === A
+    @test_throws BoundsError view(A, :, 3, 2, CartesianIndices(()), 1)
+end
+
+@testset "replace_in_print_matrix" begin
+    struct MyIdentity <: AbstractMatrix{Bool}
+        n :: Int
+    end
+    Base.size(M::MyIdentity) = (M.n, M.n)
+    function Base.getindex(M::MyIdentity, i::Int, j::Int)
+        checkbounds(M, i, j)
+        i == j
+    end
+    function Base.replace_in_print_matrix(M::MyIdentity, i::Integer, j::Integer, s::AbstractString)
+        i == j ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(MyIdentity(3), 1:2, 1:3)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 1  ⋅  ⋅\n ⋅  1  ⋅"
+
+    struct OneElVec <: AbstractVector{Bool}
+        n :: Int
+        ind :: Int
+    end
+    Base.size(M::OneElVec) = (M.n,)
+    function Base.getindex(M::OneElVec, i::Int)
+        checkbounds(M, i)
+        i == M.ind
+    end
+    function Base.replace_in_print_matrix(M::OneElVec, i::Integer, j::Integer, s::AbstractString)
+        i == M.ind ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(OneElVec(6, 2), 1:5)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n ⋅\n 1\n ⋅\n ⋅\n ⋅"
+
+    V = view(1:2, [CartesianIndex(2)])
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 2"
+end
+
+@testset "Base.first_index for offset indices" begin
+    a = Vector(1:10)
+    b = view(a, Base.IdentityUnitRange(4:7))
+    @test first(b) == a[Base.first_index(b)]
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    v = view(1:2, StepRangeLen(CartesianIndex(1,1), CartesianIndex(1,1), 0))
+    @test isempty(v)
+    r = StepRangeLen(CartesianIndex(1), CartesianIndex(1), 1)
+    v = view(1:2, r)
+    @test v == view(1:2, collect(r))
+end
+
+# https://github.com/JuliaLang/julia/pull/53064
+# `@view(A[idx]) = xxx` should raise syntax error always
+@test try
+    Core.eval(@__MODULE__, :(@view(A[idx]) = 2))
+    false
+catch err
+    err isa ErrorException && startswith(err.msg, "syntax:")
+end
+module Issue53064
+import Base: view
+end
+@test try
+    Core.eval(Issue53064, :(@view(A[idx]) = 2))
+    false
+catch err
+    err isa ErrorException && startswith(err.msg, "syntax:")
+end
+
+@testset "isassigned" begin
+    a = Vector{BigFloat}(undef, 5)
+    a[2] = 0
+    for v in (view(a, 2:3), # FastContiguousSubArray
+               view(a, 2:2:4), # FastSubArray
+               view(a, [2:2:4;]), # SlowSubArray
+            )
+        @test !isassigned(v, 0) # out-of-bounds
+        @test isassigned(v, 1) # inbounds and assigned
+        @test !isassigned(v, 2) # inbounds but not assigned
+        @test !isassigned(v, 4) # out-of-bounds
+    end
+
+    a = Array{BigFloat}(undef,3,3,3)
+    a[1,1,1] = 0
+    for v in (view(a, :, 1:3, 1), # FastContiguousSubArray
+               view(a, 1, :, 1:2), # FastSubArray
+            )
+        @test !isassigned(v, 0, 0) # out-of-bounds
+        @test isassigned(v, 1, 1) # inbounds and assigned
+        @test !isassigned(v, 1, 2) # inbounds but not assigned
+        @test !isassigned(v, 3, 3) # out-of-bounds
+    end
+end
+
+@testset "aliasing checks with shared indices" begin
+    indices = [1,3]
+    a = rand(3)
+    av = @view a[indices]
+    b = rand(3)
+    bv = @view b[indices]
+    @test !Base.mightalias(av, bv)
+    @test Base.mightalias(a, av)
+    @test Base.mightalias(b, bv)
+    @test Base.mightalias(indices, av)
+    @test Base.mightalias(indices, bv)
+    @test Base.mightalias(view(indices, :), av)
+    @test Base.mightalias(view(indices, :), bv)
+end
+
+@testset "aliasing checks with disjoint arrays" begin
+    A = rand(3,4,5)
+    @test Base.mightalias(view(A, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(A, :, :, 1), view(A, :, :, 2))
+
+    B = reinterpret(UInt64, A)
+    @test Base.mightalias(view(B, :, :, 1), view(A, :, :, 1))
+    @test !Base.mightalias(view(B, :, :, 1), view(A, :, :, 2))
+
+    C = reinterpret(UInt32, A)
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 1))
+    @test Base.mightalias(view(C, :, :, 1), view(A, :, :, 2)) # This is overly conservative
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 1))
+    @test Base.mightalias(@view(C[begin:2:end, :, 1]), view(A, :, :, 2)) # This is overly conservative
+end
+
+@testset "aliasing check with reshaped subarrays" begin
+    C = rand(2,1)
+    V1 = @view C[1, :]
+    V2 = @view C[2, :]
+
+    @test !Base.mightalias(V1, V2)
+    @test !Base.mightalias(V1, permutedims(V2))
+    @test !Base.mightalias(permutedims(V1), V2)
+    @test !Base.mightalias(permutedims(V1), permutedims(V2))
+
+    @test Base.mightalias(V1, V1)
+    @test Base.mightalias(V1, permutedims(V1))
+    @test Base.mightalias(permutedims(V1), V1)
+    @test Base.mightalias(permutedims(V1), permutedims(V1))
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index de11689e9e7c4..ba7f86bb86a14 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -146,6 +146,14 @@ function test_diagonal()
     @test  isequal_type(Ref{Tuple{T, T} where Int<:T<:Int},
                         Ref{Tuple{S, S}} where Int<:S<:Int)
 
+    # issue #53021
+    @test Tuple{X, X} where {X<:Union{}} <: Tuple{X, X, Vararg{Any}} where {Int<:X<:Int}
+    @test Tuple{Integer, X, Vararg{X}} where {X<:Int} <: Tuple{Any, Vararg{X}} where {X>:Int}
+    @test Tuple{Any, X, Vararg{X}} where {X<:Int} <: Tuple{Vararg{X}} where X>:Integer
+    @test Tuple{Integer, Integer, Any, Vararg{Any}} <: Tuple{Vararg{X}} where X>:Integer
+    # issue #53019
+    @test Tuple{T,T} where {T<:Int} <: Tuple{T,T} where {T>:Int}
+
     let A = Tuple{Int,Int8,Vector{Integer}},
         B = Tuple{T,T,Vector{T}} where T>:Integer,
         C = Tuple{T,T,Vector{Union{Integer,T}}} where T
@@ -699,16 +707,17 @@ macro testintersect(a, b, result)
     a = esc(a)
     b = esc(b)
     result = esc(result)
-    Base.remove_linenums!(quote
+    # use a manual macrocall expression since Test will examine this __source__ value
+    return quote
         # test real intersect
-        @test $cmp(_type_intersect($a, $b), $result)
-        @test $cmp(_type_intersect($b, $a), $result)
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($a, $b), $result))))
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($b, $a), $result))))
         # test simplified intersect
         if !($result === Union{})
-            @test typeintersect($a, $b) != Union{}
-            @test typeintersect($b, $a) != Union{}
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($a, $b) != Union{})))
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($b, $a) != Union{})))
         end
-    end)
+    end
 end
 
 abstract type IT4805_2{N, T} end
@@ -1260,14 +1269,7 @@ let a = Tuple{Tuple{T2,4},T6} where T2 where T6,
 end
 let a = Tuple{T3,Int64,Tuple{T3}} where T3,
     b = Tuple{S3,S3,S4} where S4 where S3
-    I1 = typeintersect(a, b)
-    I2 = typeintersect(b, a)
-    @test I1 <: I2
-    @test I2 <: I1
-    @test_broken I1 <: a
-    @test I2 <: a
-    @test I1 <: b
-    @test I2 <: b
+    @testintersect(a, b, Tuple{Int64, Int64, Tuple{Int64}})
 end
 let a = Tuple{T1,Val{T2},T2} where T2 where T1,
     b = Tuple{Float64,S1,S2} where S2 where S1
@@ -1436,6 +1438,9 @@ struct A23764_2{T, N, S} <: AbstractArray{Union{Ref{T}, S}, N}; end
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N}} where {T,N}
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N} where {T,N}}
 
+# issue #50716
+@test !<:(Ref{Vector{Tuple{K}} where K}, Ref{<:Vector{K}} where K)
+
 # issue #26131
 @test !(Vector{Vector{Number}} <: Vector{Union{Vector{Number}, Vector{S}}} where S<:Integer)
 
@@ -1517,7 +1522,7 @@ f26453(x::T,y::T) where {S,T>:S} = 0
 @test f26453(1,2) == 0
 @test f26453(1,"") == 0
 g26453(x::T,y::T) where {S,T>:S} = T
-@test_throws UndefVarError(:T) g26453(1,1)
+@test_throws UndefVarError(:T, :static_parameter) g26453(1,1)
 @test issub_strict((Tuple{T,T} where T), (Tuple{T,T} where {S,T>:S}))
 
 # issue #27632
@@ -1803,7 +1808,7 @@ let (_, E) = intersection_env(Tuple{Tuple{Vararg{Int}}}, Tuple{Tuple{Vararg{Int,
     @test !isa(E[1], Type)
 end
 
-# this is is a timing test, so it would fail on debug builds
+# this is a timing test, so it would fail on debug builds
 #let T = Type{Tuple{(Union{Int, Nothing} for i = 1:23)..., Union{String, Nothing}}},
 #    S = Type{T} where T<:Tuple{E, Vararg{E}} where E
 #    @test @elapsed (@test T != S) < 5
@@ -2207,13 +2212,19 @@ let A = Tuple{NTuple{N, Int}, NTuple{N, Int}} where N,
     Bs = (Tuple{Tuple{Int, Vararg{Any}}, Tuple{Int, Int, Vararg{Any}}},
           Tuple{Tuple{Int, Vararg{Any,N1}}, Tuple{Int, Int, Vararg{Any,N2}}} where {N1,N2},
           Tuple{Tuple{Int, Vararg{Any,N}} where {N}, Tuple{Int, Int, Vararg{Any,N}} where {N}})
-    Cerr = Tuple{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    C = Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+    A = Tuple{NTuple{N, Int}, Tuple{Int, Vararg{Int, N}}} where N
+    C = Tuple{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
     for B in Bs
-        C = typeintersect(A, B)
-        @test C == typeintersect(B, A) != Union{}
-        @test C != Cerr
-        # TODO: The ideal result is Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
-        @test_broken C != Tuple{Tuple{Int, Vararg{Int}}, Tuple{Int, Int, Vararg{Int}}}
+        @testintersect(A, B, C)
+    end
+    A = Tuple{Tuple{Int, Vararg{Int, N}}, NTuple{N, Int}} where N
+    C = Tuple{Tuple{Int, Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
     end
 end
 
@@ -2226,9 +2237,8 @@ let A = Pair{NTuple{N, Int}, NTuple{N, Int}} where N,
     Bs = (Pair{<:Tuple{Int, Vararg{Int}}, <:Tuple{Int, Int, Vararg{Int}}},
           Pair{Tuple{Int, Vararg{Int,N1}}, Tuple{Int, Int, Vararg{Int,N2}}} where {N1,N2},
           Pair{<:Tuple{Int, Vararg{Int,N}} where {N}, <:Tuple{Int, Int, Vararg{Int,N}} where {N}})
-    Cs = (Bs[2], Bs[2], Bs[3])
-    for (B, C) in zip(Bs, Cs)
-        # TODO: The ideal result is Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    C = Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
         @testintersect(A, B, C)
     end
 end
@@ -2258,31 +2268,46 @@ let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
     @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
 end
 
+function equal_envs(env1, env2)
+    length(env1) == length(env2) || return false
+    for i = 1:length(env1)
+        a = env1[i]
+        b = env2[i]
+        if a isa TypeVar
+            if !(b isa TypeVar && a.name == b.name && a.lb == b.lb && a.ub == b.ub)
+                return false
+            end
+        elseif !(a == b)
+            return false
+        end
+    end
+    return true
+end
+
 # issue #43064
 let
-    env_tuple(@nospecialize(x), @nospecialize(y)) = (intersection_env(x, y)[2]...,)
-    all_var(x::UnionAll) = (x.var, all_var(x.body)...)
-    all_var(x::DataType) = ()
+    env_tuple(@nospecialize(x), @nospecialize(y)) = intersection_env(x, y)[2]
     TT0 = Tuple{Type{T},Union{Real,Missing,Nothing}} where {T}
     TT1 = Union{Type{Int8},Type{Int16}}
     @test env_tuple(Tuple{TT1,Missing}, TT0) ===
           env_tuple(Tuple{TT1,Nothing}, TT0) ===
-          env_tuple(Tuple{TT1,Int}, TT0) === all_var(TT0)
+          env_tuple(Tuple{TT1,Int}, TT0) ===
+          Core.svec(TT0.var)
 
     TT0 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Union{Int,Int8},Union{Int,Int8},Int}
     TT3 = Tuple{Int,Union{Int,Int8},Int}
-    @test env_tuple(TT2, TT0) === all_var(TT0)
-    @test env_tuple(TT2, TT1) === all_var(TT1)
-    @test env_tuple(TT3, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT3, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(TypeVar(:T1, Union{Int, Int8}), TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), TypeVar(:T1, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 
     TT0 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Int,Union{Int,Int8},Int,Int}
-    @test env_tuple(TT2, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT2, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 end
 
 #issue #46735
@@ -2327,9 +2352,10 @@ T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{Abstr
 #issue 36185
 let S = Tuple{Type{T},Array{Union{T,Missing},N}} where {T,N},
     T = Tuple{Type{T},Array{Union{T,Nothing},N}} where {T,N}
-    @testintersect(S, T, !Union{})
-    @test_broken typeintersect(S, T) != S
-    @test_broken typeintersect(T, S) != T
+    I = typeintersect(S, T)
+    @test I == typeintersect(T, S) != Union{}
+    @test_broken I <: S
+    @test_broken I <: T
 end
 
 #issue 46736
@@ -2370,12 +2396,41 @@ let S = Tuple{T2, V2} where {T2, N2, V2<:(Array{S2, N2} where {S2 <: T2})},
     @testintersect(S, T, !Union{})
 end
 
-# A simple case which has a small local union.
-# make sure the env is not widened too much when we intersect(Int8, Int8).
-struct T48006{A1,A2,A3} end
-@testintersect(Tuple{T48006{Float64, Int, S1}, Int} where {F1<:Real, S1<:Union{Int8, Val{F1}}},
-               Tuple{T48006{F2, I, S2}, I} where {F2<:Real, I<:Int, S2<:Union{Int8, Val{F2}}},
-               Tuple{T48006{Float64, Int, S1}, Int} where S1<:Union{Val{Float64}, Int8})
+let S = Dict{Int, S1} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Dict{F2, S2} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test_broken typeintersect(S, T) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+    @test typeintersect(T, S) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+end
+
+# Ensure inner `intersect_all` never under-esitimate.
+let S = Tuple{F1, Dict{Int, S1}} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Tuple{Any, Dict{F2, S2}} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test Tuple{Nothing, Dict{Int, Int8}} <: S
+    @test Tuple{Nothing, Dict{Int, Int8}} <: T
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(S, T)
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(T, S)
+end
+
+let S = Tuple{F1, Val{S1}} where {F1, S1<:Dict{F1}}
+    T = Tuple{Any, Val{S2}} where {F2, S2<:Union{map(T->Dict{T}, Base.BitInteger_types)...}}
+    ST = typeintersect(S, T)
+    TS = typeintersect(S, T)
+    for U in Base.BitInteger_types
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: S
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: T
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: ST
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: TS
+    end
+end
+
+#issue 55206
+struct T55206{A,B<:Complex{A},C<:Union{Dict{Nothing},Dict{A}}} end
+@testintersect(T55206, T55206{<:Any,<:Any,<:Dict{Nothing}}, T55206{A,<:Complex{A},<:Dict{Nothing}} where {A})
+@testintersect(
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {F1, S1<:AbstractSet{F1}},
+    Tuple{Dict{T1, T2}, Val{S2}} where {T1, T2, S2<:Union{Set{T1},Set{T2}}},
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {S1<:Union{Set{Int8},Set{Int16}}}
+)
 
 f48167(::Type{Val{L2}}, ::Type{Union{Val{L1}, Set{R}}}) where {L1, R, L2<:L1} = 1
 f48167(::Type{Val{L1}}, ::Type{Union{Val{L2}, Set{R}}}) where {L1, R, L2<:L1} = 2
@@ -2437,7 +2492,7 @@ abstract type P47654{A} end
     @test_broken typeintersect(Type{Tuple{Array{T,1} where T}}, UnionAll) != Union{}
 
     #issue 33137
-    @test_broken (Tuple{Q,Int} where Q<:Int) <: Tuple{T,T} where T
+    @test (Tuple{Q,Int} where Q<:Int) <: Tuple{T,T} where T
 
     # issue 24333
     @test (Type{Union{Ref,Cvoid}} <: Type{Union{T,Cvoid}} where T)
@@ -2544,7 +2599,7 @@ end
 let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
     S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
     # optimal = Union{}?
-    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(T, S) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {S<:Val, T<:Val}
     @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
 end
 
@@ -2552,8 +2607,142 @@ end
 @test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T})
 
 #issue 50195
-T50195{S} = Pair{S,Set{S}}
 let a = Tuple{Type{X} where X<:Union{Nothing, Val{X1} where {X4, X1<:(Pair{X2, Val{X2}} where X2<:Val{X4})}}},
     b = Tuple{Type{Y} where Y<:(Val{Y1} where {Y4<:Src, Y1<:(Pair{Y2, Val{Y2}} where Y2<:Union{Val{Y4}, Y4})})} where Src
     @test typeintersect(a, b) <: Any
 end
+
+#issue 50195
+let a = Tuple{Union{Nothing, Type{Pair{T1}} where T1}}
+    b = Tuple{Type{X2} where X2<:(Pair{T2, Y2} where {Src, Z2<:Src, Y2<:Union{Val{Z2}, Z2}})} where T2
+    @test !Base.has_free_typevars(typeintersect(a, b))
+end
+
+#issue 53366
+let Y = Tuple{Val{T}, Val{Val{T}}} where T
+    A = Val{Val{T}} where T
+    T = TypeVar(:T, UnionAll(A.var, Val{A.var}))
+    B = UnionAll(T, Val{T})
+    X = Tuple{A, B}
+    @testintersect(X, Y, !Union{})
+end
+
+#issue 53621 (requires assertions enabled)
+abstract type A53621{T, R, C, U} <: AbstractSet{Union{C, U}} end
+struct T53621{T, R<:Real, C, U} <: A53621{T, R, C, U} end
+let
+    U = TypeVar(:U)
+    C = TypeVar(:C)
+    T = TypeVar(:T)
+    R = TypeVar(:R)
+    CC = TypeVar(:CC, Union{C, U})
+    UU = TypeVar(:UU, Union{C, U})
+    S1 = UnionAll(T, UnionAll(R, Type{UnionAll(C, UnionAll(U, T53621{T, R, C, U}))}))
+    S2 = UnionAll(C, UnionAll(U, UnionAll(CC, UnionAll(UU, UnionAll(T, UnionAll(R, T53621{T, R, CC, UU}))))))
+    S = Tuple{S1, S2}
+    T = Tuple{Type{T53621{T, R}}, AbstractSet{T}} where {T, R}
+    @testintersect(S, T, !Union{})
+end
+
+#issue 53371
+struct T53371{A,B,C,D,E} end
+S53371{A} = Union{Int, <:A}
+R53371{A} = Val{V} where V<:(T53371{B,C,D,E,F} where {B<:Val{A}, C<:S53371{B}, D<:S53371{B}, E<:S53371{B}, F<:S53371{B}})
+let S = Type{T53371{A, B, C, D, E}} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53371{A}, E<:R53371{A}},
+    T = Type{T53371{A, B, C, D, E} where {A, B<:R53371{A}, C<:R53371{A}, D<:R53371{A}, E<:R53371{A}}}
+    @test !(S <: T)
+end
+
+#issue 54356
+let S = Tuple{Val{Val{Union{Val{A2}, A2}}}, Val{Val{Union{Val{A2}, Val{A4}, A4}}}} where {A2, A4<:Union{Val{A2}, A2}},
+    T = Tuple{Vararg{Val{V}}} where {V}
+    @testintersect(S, T, !Union{})
+end
+
+#issue 54356
+abstract type A54356{T<:Real} end
+struct B54356{T} <: A54356{T} end
+struct C54356{S,T<:Union{S,Complex{S}}} end
+struct D54356{S<:Real,T} end
+let S = Tuple{Val, Val{T}} where {T}, R = Tuple{Val{Val{T}}, Val{T}} where {T},
+    SS = Tuple{Val, Val{T}, Val{T}} where {T}, RR = Tuple{Val{Val{T}}, Val{T}, Val{T}} where {T}
+    # parameters check for self
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Complex{B}}}, S{1}, R{1})
+    # parameters check for supertype (B54356 -> A54356)
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, B54356{B}}}, S{1}, R{1})
+    # enure unused TypeVar skips the `UnionAll` wrapping
+    @testintersect(Tuple{Val{A}, A} where {B, A<:(Union{Val{B}, D54356{B,C}} where {C})}, S{1}, R{1})
+    # invariant parameter should not get narrowed
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Val{Union{Int,Complex{B}}}}}, S{1}, R{1})
+    # bit value could not be `Union` element
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Val{B}}}, SS{1}, RR{1})
+    @testintersect(Tuple{Val{A}, A, Val{B}} where {B, A<:Union{B, Complex{B}}}, SS{1}, Union{})
+    # `check_datatype_parameters` should ignore bad `Union` elements in constraint's ub
+    T = Tuple{Val{Union{Val{Nothing}, Val{C54356{V,V}}}}, Val{Nothing}} where {Nothing<:V<:Nothing}
+    @test T <: S{Nothing}
+    @test T <: Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}
+    @test T <: typeintersect(Tuple{Val{A}, A} where {B, C, A<:Union{Val{B}, Val{C54356{B,C}}}}, S{Nothing})
+    # extra check for Vararg
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NTuple{B,Any}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Any,Vararg{Any,B}}}}, S{-1}, R{-1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, Tuple{Vararg{Int,Union{Int,Complex{B}}}}}}, S{1}, R{1})
+    # extra check for NamedTuple
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int}}}}, S{(1,)}, R{(1,)})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{(:a),B}}}, S{NTuple{2,Int}}, R{NTuple{2,Int}})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, NamedTuple{B,Tuple{Int,Int}}}}, S{(:a,:a)}, R{(:a,:a)})
+    # extra check for GenericMemory/GenericMemoryRef
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{B}}}, S{1}, R{1})
+    @testintersect(Tuple{Val{A}, A} where {B, A<:Union{Val{B}, GenericMemory{:not_atomic,Int,B}}}, S{1}, R{1})
+end
+
+#issue 54516
+let S = Tuple{Val{<:T}, Union{Int,T}} where {T},
+    T = Tuple{Union{Int,T}, Val{<:T}} where {T}
+    @testintersect(S, T, !Union{})
+    @test !Base.has_free_typevars(typeintersect(S, T))
+end
+
+#issue 55230
+let T1 = NTuple{12, Union{Val{1}, Val{2}, Val{3}, Val{4}, Val{5}, Val{6}}}
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Val}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Real}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{Val,Real}}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{String,Real}}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Union{Val,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Union{String,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test !(T1 <: T2)
+    @test Tuple{Union{Val{1},Val{2}}} <: Tuple{S} where {T, S<:Val{T}}
+end
+
+#issue 56040
+let S = Dict{V,V} where {V},
+    T = Dict{Ref{Union{Set{A2}, Set{A3}, A3}}, Ref{Union{Set{A3}, Set{A2}, Set{A1}, Set{A4}, A4}}} where {A1, A2<:Set{A1}, A3<:Union{Set{A1}, Set{A2}}, A4<:Union{Set{A2}, Set{A1}, Set{A3}}},
+    A = Dict{Ref{Set{Union{}}}, Ref{Set{Union{}}}}
+    @testintersect(S, T, !Union{})
+    @test A <: typeintersect(S, T)
+    @test A <: typeintersect(T, S)
+end
+
+#issue 56606
+let
+    A = Tuple{Val{1}}
+    B = Tuple{Val}
+    for _ in 1:30
+        A = Tuple{Val{A}}
+        B = Tuple{Val{<:B}}
+    end
+    @test A <: B
+end
+@testintersect(
+    Val{Tuple{Int,S,T}} where {S<:Any,T<:Vector{Vector{Int}}},
+    Val{Tuple{T,R,S}} where {T,R<:Vector{T},S<:Vector{R}},
+    Val{Tuple{Int, Vector{Int}, T}} where T<:Vector{Vector{Int}},
+)
diff --git a/test/syntax.jl b/test/syntax.jl
index 4d1b167693adb..9fd0204821eab 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -476,7 +476,7 @@ let err = try
     catch e
         e
     end
-    @test err.line == 7
+    @test err.line in (5, 7)
 end
 
 # PR #17393
@@ -501,6 +501,10 @@ let m_error, error_out, filename = Base.source_path()
     m_error = try @eval foo(types::NTuple{N}, values::Vararg{Any,N}, c) where {N} = nothing; catch e; e; end
     error_out = sprint(showerror, m_error)
     @test startswith(error_out, "ArgumentError: Vararg on non-final argument")
+
+    m_error = try @eval method_c6(a::Vararg{:A}) = 1; catch e; e; end
+    error_out = sprint(showerror, m_error)
+    @test startswith(error_out, "ArgumentError: invalid type for argument a in method definition for method_c6 at $filename:")
 end
 
 # issue #7272
@@ -549,7 +553,14 @@ for (str, tag) in Dict("" => :none, "\"" => :string, "#=" => :comment, "'" => :c
 end
 
 # meta nodes for optional positional arguments
-let src = Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code[end-1].args[3]
+let code = Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code
+    local src
+    for i = length(code):-1:1
+        if Meta.isexpr(code[i], :method)
+            src = code[i].args[3]
+            break
+        end
+    end
     @test Core.Compiler.is_declared_inline(src)
 end
 
@@ -574,6 +585,7 @@ let thismodule = @__MODULE__,
     @test isa(ex, Expr)
     @test !isdefined(M16096, :foo16096)
     local_foo16096 = Core.eval(@__MODULE__, ex)
+    Core.@latestworld
     @test local_foo16096(2.0) == 1
     @test !@isdefined foo16096
     @test !@isdefined it
@@ -709,36 +721,10 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end))
 let low3 = Meta.lower(@__MODULE__, quote @m3 end)
     m3_exprs = get_expr_list(low3)
     ci = low3.args[1]::Core.CodeInfo
-    @test ci.codelocs == [4, 2]
+    #@test ci.codelocs in ([4, 4, 0], [4, 0])
     @test is_return_ssavalue(m3_exprs[end])
 end
 
-function f1(a)
-    b = a + 100
-    b
-end
-
-@generated function f2(a)
-    quote
-        b = a + 100
-        b
-    end
-end
-
-f1_ci = code_typed(f1, (Int,), debuginfo=:source)[1][1]
-f2_ci = code_typed(f2, (Int,), debuginfo=:source)[1][1]
-
-f1_exprs = get_expr_list(f1_ci)
-f2_exprs = get_expr_list(f2_ci)
-
-if Base.JLOptions().can_inline != 0
-    @test length(f1_ci.linetable) == 3
-    @test length(f2_ci.linetable) >= 3
-else
-    @test length(f1_ci.linetable) == 2
-    @test length(f2_ci.linetable) >= 3
-end
-
 # Check that string and command literals are parsed to the appropriate macros
 @test :(x"s") == :(@x_str "s")
 @test :(x"s"flag) == :(@x_str "s" "flag")
@@ -1702,7 +1688,7 @@ end
 @test Meta.parse("(a...)") == Expr(Symbol("..."), :a)
 
 # #19324
-@test_throws UndefVarError(:x) eval(:(module M19324
+@test_throws UndefVarError(:x, :local) eval(:(module M19324
                  x=1
                  for i=1:10
                      x += i
@@ -1779,6 +1765,43 @@ end
 @test B28593.var.name === :S
 @test C28593.var.name === :S
 
+# issue #51899
+macro struct_macro_51899()
+    quote
+        mutable struct Struct51899
+            const const_field
+            const const_field_with_type::Int
+            $(esc(Expr(:const, :(escaped_const_field::MyType))))
+            @atomic atomic_field
+            @atomic atomic_field_with_type::Int
+        end
+    end
+end
+
+let ex = @macroexpand @struct_macro_51899()
+    const_field, const_field_with_type, escaped_const_field,
+    atomic_field, atomic_field_with_type = filter(x -> isa(x, Expr), ex.args[end].args[end].args)
+    @test Meta.isexpr(const_field, :const)
+    @test const_field.args[1] === :const_field
+
+    @test Meta.isexpr(const_field_with_type, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test const_field_with_type.args[1].args[1] === :const_field_with_type
+    @test const_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+
+    @test Meta.isexpr(escaped_const_field, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test escaped_const_field.args[1].args[1] === :escaped_const_field
+    @test escaped_const_field.args[1].args[2] === :MyType
+
+    @test Meta.isexpr(atomic_field, :atomic)
+    @test atomic_field.args[1] === :atomic_field
+
+    @test Meta.isexpr(atomic_field_with_type, :atomic)
+    @test atomic_field_with_type.args[1].args[1] === :atomic_field_with_type
+    @test atomic_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+end
+
 # issue #25955
 macro noeffect25955(e)
     return e
@@ -1882,7 +1905,7 @@ function capture_with_conditional_label()
     return y->x
 end
 let f = capture_with_conditional_label()  # should not throw
-    @test_throws UndefVarError(:x) f(0)
+    @test_throws UndefVarError(:x, :local) f(0)
 end
 
 # `_` should not create a global (or local)
@@ -2174,6 +2197,16 @@ end
 end
 @test z28789 == 42
 
+const warn28789 = "Assignment to `s28789` in soft scope is ambiguous because a global variable by the same name exists: "*
+    "`s28789` will be treated as a new local. Disambiguate by using `local s28789` to suppress this warning or "*
+    "`global s28789` to assign to the existing global variable."
+@test_logs (:warn, warn28789) @test_throws UndefVarError @eval begin
+    s28789 = 0
+    for i = 1:10
+        s28789 += i
+    end
+end
+
 # issue #38650, `struct` should always be a hard scope
 f38650() = 0
 @eval begin
@@ -2470,7 +2503,14 @@ end
 function ncalls_in_lowered(ex, fname)
     lowered_exprs = Meta.lower(Main, ex).args[1].code
     return count(lowered_exprs) do ex
-        Meta.isexpr(ex, :call) && ex.args[1] == fname
+        if Meta.isexpr(ex, :call)
+            arg = ex.args[1]
+            if isa(arg, Core.SSAValue)
+                arg = lowered_exprs[arg.id]
+            end
+            return arg == fname
+        end
+        return false
     end
 end
 
@@ -2510,19 +2550,19 @@ end
                                                               3 4 5]
 
 @test Meta.parse("for x in 1:10 g(x) end") ==
-  Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
+    Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
 @test Meta.parse("(f->f(1))() do x x+1 end") ==
-  Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
+    Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
 @test Meta.parse("while i < 10 i += 1 end") ==
-  Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
+    Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
 @test Meta.parse("begin x=1 end") == Meta.parse("begin#==#x=1#==#end")
 @test Meta.parse("if x<y x+1 elseif y>0 y+1 else z end") ==
-  Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
+    Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
 @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end")
 @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c")
 @test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1")
 @test Meta.parse("try f() catch e g() finally h() end") ==
-  Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
+    Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
 @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b")
 
 # issue #37540
@@ -2613,9 +2653,9 @@ end
 @test_throws ErrorException("invalid method definition in Mod3: function Mod3.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
 @test !isdefined(Mod3, :always_undef) # resolve this binding now in Mod3
 @test_throws ErrorException("invalid method definition in Mod3: exported function Mod.always_undef does not exist") Core.eval(Mod3, :(always_undef(x::Int) = x))
-@test_throws ErrorException("cannot assign a value to imported variable Mod.always_undef from module Mod3") Core.eval(Mod3, :(const always_undef = 3))
-@test_throws ErrorException("cannot assign a value to imported variable Mod3.f") Core.eval(Mod3, :(const f = 3))
-@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it already has a value") Core.eval(Mod, :(const maybe_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.always_undef constant; it was already declared as an import") Core.eval(Mod3, :(const always_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.f constant; it was already declared as an import") Core.eval(Mod3, :(const f = 3))
+@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it was already declared global") Core.eval(Mod, :(const maybe_undef = 3))
 
 z = 42
 import .z as also_z
@@ -2799,7 +2839,7 @@ end
     @test a == 5
     @test b == 6
 
-    @test_throws ErrorException (; a, b) = (x=1,)
+    @test_throws FieldError (; a, b) = (x=1,)
 
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x end), :error)
     @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x, y end), :error)
@@ -2808,7 +2848,7 @@ end
     f((; a, b)) = a, b
     @test f((b=3, a=4)) == (4, 3)
     @test f((b=3, c=2, a=4)) == (4, 3)
-    @test_throws ErrorException f((;))
+    @test_throws FieldError f((;))
 
     # with type annotation
     let num, den, a, b
@@ -3070,6 +3110,7 @@ end
     ex = Expr(:block)
     ex.args = fill!(Vector{Any}(undef, 700000), 1)
     f = eval(Expr(:function, :(), ex))
+    @Core.latestworld
     @test f() == 1
     ex = Expr(:vcat)
     ex.args = fill!(Vector{Any}(undef, 600000), 1)
@@ -3183,6 +3224,22 @@ end
     end
     @test err == 5 + 6
     @test x == 1
+
+    x = 0
+    try
+    catch
+    else
+        x = 1
+    end
+    @test x == 1
+
+    try
+    catch
+    else
+        tryelse_in_local_scope = true
+    end
+
+    @test !@isdefined(tryelse_in_local_scope)
 end
 
 @test_parseerror """
@@ -3507,3 +3564,481 @@ let x = 1 => 2
     @test_throws ErrorException @eval a => b = 2
     @test_throws "function Base.=> must be explicitly imported to be extended" @eval a => b = 2
 end
+
+# Splatting in non-final default value (Ref #50518)
+for expr in (quote
+    function g1(a=(1,2)..., b...=3)
+        b
+    end
+end,quote
+    function g2(a=(1,2)..., b=3, c=4)
+        (b, c)
+    end
+end,quote
+    function g3(a=(1,2)..., b=3, c...=4)
+        (b, c)
+    end
+end)
+    let exc = try eval(expr); catch exc; exc end
+        @test isa(exc, ErrorException)
+        @test startswith(exc.msg, "syntax: invalid \"...\" in non-final positional argument default value")
+    end
+end
+
+# Test that bad lowering does not segfault (ref #50518)
+@test_throws ErrorException("syntax: Attempted to use slot marked unused") @eval function funused50518(::Float64)
+    $(Symbol("#unused#"))
+end
+
+@testset "public keyword" begin
+    p(str) = Base.remove_linenums!(Meta.parse(str))
+    # tests ported from JuliaSyntax.jl
+    @test p("function f(public)\n    public + 3\nend") == Expr(:function, Expr(:call, :f, :public), Expr(:block, Expr(:call, :+, :public, 3)))
+    @test p("public A, B") == Expr(:public, :A, :B)
+    @test p("if true \n public *= 4 \n end") == Expr(:if, true, Expr(:block, Expr(:*=, :public, 4)))
+    @test p("module Mod\n public A, B \n end") == Expr(:module, true, :Mod, Expr(:block, Expr(:public, :A, :B)))
+    @test p("module Mod2\n a = 3; b = 6; public a, b\n end") == Expr(:module, true, :Mod2, Expr(:block, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b)))
+    @test p("a = 3; b = 6; public a, b") == Expr(:toplevel, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b))
+    @test_throws Meta.ParseError p("begin \n public A, B \n end")
+    @test_throws Meta.ParseError p("if true \n public A, B \n end")
+    @test_throws Meta.ParseError p("public export=true foo, bar")
+    @test_throws Meta.ParseError p("public experimental=true foo, bar")
+    @test p("public(x::String) = false") == Expr(:(=), Expr(:call, :public, Expr(:(::), :x, :String)), Expr(:block, false))
+    @test p("module M; export @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :var"@a")))
+    @test p("module M; public @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :var"@a")))
+    @test p("module M; export ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :⤈)))
+    @test p("module M; public ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :⤈)))
+    @test p("public = 4") == Expr(:(=), :public, 4)
+    @test p("public[7] = 5") == Expr(:(=), Expr(:ref, :public, 7), 5)
+    @test p("public() = 6") == Expr(:(=), Expr(:call, :public), Expr(:block, 6))
+end
+
+@testset "removing argument sideeffects" begin
+    # Allow let blocks in broadcasted LHSes, but only evaluate them once:
+    execs = 0
+    array = [1]
+    let x = array; execs += 1; x; end .+= 2
+    @test array == [3]
+    @test execs == 1
+    let; execs += 1; array; end .= 4
+    @test array == [4]
+    @test execs == 2
+    let x = array; execs += 1; x; end::Vector{Int} .+= 2
+    @test array == [6]
+    @test execs == 3
+    let; execs += 1; array; end::Vector{Int} .= 7
+    @test array == [7]
+    @test execs == 4
+end
+
+# Allow GlobalRefs in macro definition
+module MyMacroModule
+    macro mymacro end
+end
+macro MyMacroModule.mymacro()
+    1
+end
+@eval macro $(GlobalRef(MyMacroModule, :mymacro))(x)
+    2
+end
+@test (@MyMacroModule.mymacro) == 1
+@test (@MyMacroModule.mymacro(a)) == 2
+
+# Issue #53673 - missing macro hygiene for for/generator
+baremodule MacroHygieneFor
+    import ..Base
+    using Base: esc, Expr, +
+    macro for1()
+        :(let a=(for i=10; end; 1); a; end)
+    end
+    macro for2()
+        :(let b=(for j=11, k=12; end; 2); b; end)
+    end
+    macro for3()
+        :(let c=($(Expr(:for, esc(Expr(:block, :(j=11), :(k=12))), :())); 3); c; end)
+    end
+    macro for4()
+        :(begin; local j; let a=(for outer j=10; end; 4); j+a; end; end)
+    end
+end
+let nnames = length(names(MacroHygieneFor; all=true))
+    @test (@MacroHygieneFor.for1) == 1
+    @test (@MacroHygieneFor.for2) == 2
+    @test (@MacroHygieneFor.for3) == 3
+    @test (@MacroHygieneFor.for4) == 14
+    @test length(names(MacroHygieneFor; all=true)) == nnames
+end
+
+baremodule MacroHygieneGenerator
+    using ..Base: Any, !
+    my!(x) = !x
+    macro gen1()
+        :(let a=Any[x for x in 1]; a; end)
+    end
+    macro gen2()
+        :(let a=Bool[x for x in (true, false) if my!(x)]; a; end)
+    end
+    macro gen3()
+        :(let a=Bool[x for x in (true, false), y in (true, false) if my!(x) && my!(y)]; a; end)
+    end
+end
+let nnames = length(names(MacroHygieneGenerator; all=true))
+    @test (MacroHygieneGenerator.@gen1) == Any[x for x in 1]
+    @test (MacroHygieneGenerator.@gen2) == Bool[false]
+    @test (MacroHygieneGenerator.@gen3) == Bool[false]
+    @test length(names(MacroHygieneGenerator; all=true)) == nnames
+end
+
+# Issue #53729 - Lowering recursion into Expr(:toplevel)
+@test eval(Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(f53729(x) = x)), :(x=1)))) == 1
+@test f53729(2) == 2
+
+# Issue #54701 - Macro hygiene of argument destructuring
+macro makef54701()
+    quote
+        call(f) = f((1, 2))
+        function $(esc(:f54701))()
+            call() do (a54701, b54701)
+                return a54701+b54701
+            end
+        end
+    end
+end
+@makef54701
+@test f54701() == 3
+@test !@isdefined(a54701)
+@test !@isdefined(b54701)
+
+# Issue #54607 - binding creation in foreign modules should not be permitted
+module Foreign54607
+    # Syntactic, not dynamic
+    try_to_create_binding1() = (Foreign54607.foo = 2)
+    # GlobalRef is allowed for same-module assignment and declares the binding
+    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2)
+    function global_create_binding()
+        global bar
+        bar = 3
+    end
+    baz = 4
+    begin;
+        @Base.Experimental.force_compile
+        compiled_assign = 5
+    end
+    @eval $(GlobalRef(Foreign54607, :gr_assign)) = 6
+end
+@test_throws ErrorException (Foreign54607.foo = 1)
+@test_throws ErrorException Foreign54607.try_to_create_binding1()
+Foreign54607.try_to_create_binding2()
+function assign_in_foreign_module()
+    (Foreign54607.foo = 1)
+    nothing
+end
+@test !Core.Compiler.is_nothrow(Base.infer_effects(assign_in_foreign_module))
+@test_throws ErrorException begin
+    @Base.Experimental.force_compile
+    (Foreign54607.foo = 1)
+end
+@test_throws ErrorException @eval (GlobalRef(Foreign54607, :gr_assign2)) = 7
+Foreign54607.global_create_binding()
+@test isdefined(Foreign54607, :bar)
+@test isdefined(Foreign54607, :baz)
+@test isdefined(Foreign54607, :compiled_assign)
+@test isdefined(Foreign54607, :gr_assign)
+@test isdefined(Foreign54607, :foo2)
+Foreign54607.bar = 8
+@test Foreign54607.bar == 8
+begin
+    @Base.Experimental.force_compile
+    Foreign54607.bar = 9
+end
+@test Foreign54607.bar == 9
+
+# Issue #54805 - export mislowering
+module Export54805
+let
+    local b54805=1
+    export b54805
+end
+b54805 = 2
+end
+using .Export54805
+@test b54805 == 2
+
+# F{T} = ... has special syntax semantics, not found anywhere else in the language
+# that make `F` `const` iff an assignment to `F` is global in the relevant scope.
+# We implicitly test this elsewhere, but there's some tricky interactions with
+# explicit declarations that we test here.
+module ImplicitCurlies
+    using ..Test
+    let
+        ImplicitCurly1{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly1)
+    let
+        global ImplicitCurly2
+        ImplicitCurly2{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly2) && isconst(@__MODULE__, :ImplicitCurly2)
+    begin
+        ImplicitCurly3{T} = Ref{T}
+    end
+    @test @isdefined(ImplicitCurly3) && isconst(@__MODULE__, :ImplicitCurly3)
+    begin
+        local ImplicitCurly4
+        ImplicitCurly4{T} = Ref{T}
+    end
+    @test !@isdefined(ImplicitCurly4)
+    @test_throws "syntax: `global const` declaration not allowed inside function" Core.eval(@__MODULE__, :(function implicit5()
+        global ImplicitCurly5
+        ImplicitCurly5{T} = Ref{T}
+    end))
+    @test !@isdefined(ImplicitCurly5)
+    function implicit6()
+        ImplicitCurly6{T} = Ref{T}
+        return ImplicitCurly6
+    end
+    @test !@isdefined(ImplicitCurly6)
+    # Check return value of assignment expr
+    @test isa((const ImplicitCurly7{T} = Ref{T}), UnionAll)
+    @test isa(begin; ImplicitCurly8{T} = Ref{T}; end, UnionAll)
+end
+
+# `const` does not distribute over assignments
+const aconstassign = bconstassign = 2
+@test isconst(@__MODULE__, :aconstassign)
+@test !isconst(@__MODULE__, :bconstassign)
+@test aconstassign == bconstassign
+
+const afunc_constassign() = bfunc_constassign() = 2
+@test afunc_constassign()() == 2
+@test !@isdefined(bfunc_constassign)
+
+# `const` RHS is regular toplevel scope (not `let`)
+const arhs_toplevel = begin
+    athis_should_be_a_global = 1
+    2
+end
+@test isconst(@__MODULE__, :arhs_toplevel)
+@test !isconst(@__MODULE__, :athis_should_be_a_global)
+@test arhs_toplevel == 2
+@test athis_should_be_a_global == 1
+
+# `const` is permitted before function assignment for legacy reasons
+const fconst_assign() = 1
+const (gconst_assign(), hconst_assign()) = (2, 3)
+@test (fconst_assign(), gconst_assign(), hconst_assign()) == (1, 2, 3)
+@test isconst(@__MODULE__, :fconst_assign)
+@test isconst(@__MODULE__, :gconst_assign)
+@test isconst(@__MODULE__, :hconst_assign)
+
+# `const` assignment to `_` drops the assignment effect,
+# and the conversion, but not the rhs.
+struct CantConvert; end
+Base.convert(::Type{CantConvert}, x) = error()
+@test (const _::CantConvert = 1) == 1
+@test !isconst(@__MODULE__, :_)
+@test_throws ErrorException("expected") (const _ = error("expected"))
+
+# Issue #54787
+const (destruct_const54787...,) = (1,2,3)
+@test destruct_const54787 == (1,2,3)
+@test isconst(@__MODULE__, :destruct_const54787)
+const a54787, b54787, c54787 = destruct_const54787
+@test (a54787, b54787, c54787) == (1,2,3)
+@test isconst(@__MODULE__, :a54787)
+@test isconst(@__MODULE__, :b54787)
+@test isconst(@__MODULE__, :c54787)
+
+# Same number of statements on lhs and rhs, but non-atom
+const c54787_1,c54787_2 = 1,(2*1)
+@test isconst(@__MODULE__, :c54787_1)
+@test isconst(@__MODULE__, :c54787_2)
+@test c54787_1 == 1
+@test c54787_2 == 2
+
+# Methods can be added to any singleton not just generic functions
+struct SingletonMaker; end
+const no_really_this_is_a_function_i_promise = Val{SingletonMaker()}()
+no_really_this_is_a_function_i_promise(a) = 2 + a
+@test Val{SingletonMaker()}()(2) == 4
+
+# Test that lowering doesn't accidentally put a `Module` in the Method name slot
+let src = @Meta.lower let capture=1
+    global foo_lower_block
+    foo_lower_block() = capture
+end
+    code = src.args[1].code
+    for i = length(code):-1:1
+        expr = code[i]
+        Meta.isexpr(expr, :method) || continue
+        @test isa(expr.args[1], Union{GlobalRef, Symbol})
+    end
+end
+
+let src = Meta.@lower let
+    try
+        try
+            return 1
+        catch
+        end
+    finally
+        nothing
+    end
+end
+    code = src.args[1].code
+    for stmt in code
+        if Meta.isexpr(stmt, :leave) && length(stmt.args) > 1
+            # Expr(:leave, ...) should list the arguments to pop from
+            # inner-most scope to outer-most
+            @test issorted(Int[
+                (arg::Core.SSAValue).id
+                for arg in stmt.args
+            ]; rev=true)
+        end
+    end
+end
+
+# Test that globals can be `using`'d even if they are not yet defined
+module UndefGlobal54954
+    global theglobal54954::Int
+end
+using .UndefGlobal54954: theglobal54954
+@test Core.get_binding_type(@__MODULE__, :theglobal54954) === Int
+
+# Extended isdefined
+module ExtendedIsDefined
+    using Test
+    module Import
+        export x2, x3
+        x2 = 2
+        x3 = 3
+        x4 = 4
+    end
+    const x1 = 1
+    using .Import
+    import .Import.x4
+    @test x2 == 2 # Resolve the binding
+    @eval begin
+        @test Core.isdefinedglobal(@__MODULE__, :x1)
+        @test Core.isdefinedglobal(@__MODULE__, :x2)
+        @test Core.isdefinedglobal(@__MODULE__, :x3)
+        @test Core.isdefinedglobal(@__MODULE__, :x4)
+
+        @test Core.isdefinedglobal(@__MODULE__, :x1, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x2, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x3, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x4, false)
+    end
+
+    @eval begin
+        @Base.Experimental.force_compile
+        @test Core.isdefinedglobal(@__MODULE__, :x1)
+        @test Core.isdefinedglobal(@__MODULE__, :x2)
+        @test Core.isdefinedglobal(@__MODULE__, :x3)
+        @test Core.isdefinedglobal(@__MODULE__, :x4)
+
+        @test Core.isdefinedglobal(@__MODULE__, :x1, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x2, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x3, false)
+        @test !Core.isdefinedglobal(@__MODULE__, :x4, false)
+    end
+end
+
+# Test importing the same module twice using two different paths
+module FooDualImport
+end
+module BarDualImport
+import ..FooDualImport
+import ..FooDualImport.FooDualImport
+end
+
+# Test trying to define a constant and then importing the same constant
+const ImportConstant = 1
+module ImportConstantTestModule
+    using Test
+    const ImportConstant = 1
+    import ..ImportConstant
+    @test ImportConstant == 1
+    @test isconst(@__MODULE__, :ImportConstant)
+end
+
+# Test trying to define a constant and then trying to assign to the same value
+module AssignConstValueTest
+    const x = 1
+    x = 1
+end
+@test isconst(AssignConstValueTest, :x)
+
+# Module Replacement
+module ReplacementContainer
+    module ReplaceMe
+        const x = 1
+    end
+    const Old = ReplaceMe
+    module ReplaceMe
+        const x = 2
+    end
+end
+@test ReplacementContainer.Old !== ReplacementContainer.ReplaceMe
+@test ReplacementContainer.ReplaceMe.x === 2
+
+# Setglobal of previously declared global
+module DeclareSetglobal
+    using Test
+    @test_throws ErrorException setglobal!(@__MODULE__, :DeclareMe, 1)
+    global DeclareMe
+    setglobal!(@__MODULE__, :DeclareMe, 1)
+    @test DeclareMe === 1
+end
+
+# Binding type of const (N.B.: This may change in the future)
+module ConstBindingType
+    using Test
+    const x = 1
+    @test Core.get_binding_type(@__MODULE__, :x) === Any
+end
+
+# Explicit import may resolve using failed
+module UsingFailedExplicit
+    using Test
+    module A; export x; x = 1; end
+    module B; export x; x = 2; end
+    using .A, .B
+    @test_throws UndefVarError x
+    using .A: x as x
+    @test x === 1
+end
+
+# issue #45494
+begin
+  local b::Tuple{<:Any} = (0,)
+  function f45494()
+    b = b
+    b
+  end
+end
+@test f45494() === (0,)
+
+@test_throws "\"esc(...)\" used outside of macro expansion" eval(esc(:(const x=1)))
+
+# Inner function declaration world age
+function create_inner_f_no_methods()
+    function inner_f end
+end
+@test isa(create_inner_f_no_methods(), Function)
+@test length(methods(create_inner_f_no_methods())) == 0
+
+function create_inner_f_one_method()
+    inner_f() = 1
+end
+@test isa(create_inner_f_no_methods(), Function)
+@test length(methods(create_inner_f_no_methods())) == 0
+@test Base.invoke_in_world(first(methods(create_inner_f_one_method)).primary_world, create_inner_f_one_method()) == 1
+
+# Issue 56711 - Scope of signature hoisting
+function fs56711()
+    f(lhs::Integer) = 1
+    f(lhs::Integer, rhs::(local x_should_not_be_defined=Integer; x_should_not_be_defined)) = 2
+    return f
+end
+@test !@isdefined(x_should_not_be_defined)
diff --git a/test/sysinfo.jl b/test/sysinfo.jl
index 3a16dc73b4f6a..f02d8ffe36091 100644
--- a/test/sysinfo.jl
+++ b/test/sysinfo.jl
@@ -10,6 +10,11 @@ Base.Sys.loadavg()
 @test Base.libllvm_path() isa Symbol
 @test contains(String(Base.libllvm_path()), "LLVM")
 
+@test length(ccall(:jl_get_cpu_name, String, ())) != 0
+@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
+foo_fma() = Core.Intrinsics.have_fma(Int64)
+@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()
+
 if Sys.isunix()
     mktempdir() do tempdir
         firstdir = joinpath(tempdir, "first")
@@ -38,3 +43,31 @@ if Sys.isunix()
         end
     end
 end
+
+@testset "username()" begin
+    if Sys.isunix()
+        passwd = Libc.getpwuid(Libc.getuid())
+        @test Sys.username() == passwd.username
+    elseif Sys.iswindows()
+        @test Sys.username() == ENV["USERNAME"]
+    else
+        @test !isempty(Sys.username())
+    end
+end
+
+@testset "Base.Sys docstrings" begin
+    @test isempty(Docs.undocumented_names(Sys))
+end
+
+@testset "show" begin
+    example_cpus = [Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x000000000d913b08, 0x0000000000000000, 0x0000000005f4243c, 0x00000000352a550a, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x000000000d9040c2, 0x0000000000000000, 0x0000000005d4768c, 0x00000000356b3d22, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x00000000026784da, 0x0000000000000000, 0x0000000000fda30e, 0x0000000046a731ea, 0x0000000000000000)
+    Base.Sys.CPUinfo("Apple M1 Pro", 2400, 0x00000000017726c0, 0x0000000000000000, 0x00000000009491de, 0x0000000048134f1e, 0x0000000000000000)]
+
+    Sys.SC_CLK_TCK, save_SC_CLK_TCK = 100, Sys.SC_CLK_TCK # use platform-independent tick units
+    @test repr(example_cpus[1]) == "Base.Sys.CPUinfo(\"Apple M1 Pro\", 2400, 0x000000000d913b08, 0x0000000000000000, 0x0000000005f4243c, 0x00000000352a550a, 0x0000000000000000)"
+    @test repr("text/plain", example_cpus[1]) == "Apple M1 Pro: \n        speed         user         nice          sys         idle          irq\n     2400 MHz    2276216 s          0 s     998861 s    8919667 s          0 s"
+    @test sprint(Sys.cpu_summary, example_cpus) == "Apple M1 Pro: \n       speed         user         nice          sys         idle          irq\n#1  2400 MHz    2276216 s          0 s     998861 s    8919667 s          0 s\n#2  2400 MHz    2275576 s          0 s     978101 s    8962204 s          0 s\n#3  2400 MHz     403386 s          0 s     166224 s   11853624 s          0 s\n#4  2400 MHz     245859 s          0 s      97367 s   12092250 s          0 s\n"
+    Sys.SC_CLK_TCK = save_SC_CLK_TCK
+end
diff --git a/test/terminfo.jl b/test/terminfo.jl
new file mode 100644
index 0000000000000..07aa21704fef5
--- /dev/null
+++ b/test/terminfo.jl
@@ -0,0 +1,931 @@
+let
+    dumb_terminfo = UInt8[
+        0x1a, 0x01, 0x18, 0x00, 0x02, 0x00, 0x01, 0x00, 0x82, 0x00, 0x08, 0x00,
+        0x64, 0x75, 0x6d, 0x62, 0x7c, 0x38, 0x30, 0x2d, 0x63, 0x6f, 0x6c, 0x75,
+        0x6d, 0x6e, 0x20, 0x64, 0x75, 0x6d, 0x62, 0x20, 0x74, 0x74, 0x79, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0xff, 0xff, 0x00, 0x00, 0x02, 0x00, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0x04, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x00,
+        0x07, 0x00, 0x0d, 0x00, 0x0a, 0x00, 0x0a, 0x00]
+
+    dumb_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :am => true,
+        :auto_right_margin => true,
+        :bw => false,
+        :auto_left_margin => false,
+        :bel => "\a",
+        :bell => "\a",
+        :cr => "\r",
+        :carriage_return => "\r",
+        :cols => 80,
+        :columns => 80,
+        :cud1 => "\n",
+        :cursor_down => "\n",
+        :ind => "\n",
+        :scroll_forward => "\n")
+
+    xterm_terminfo = UInt8[
+        0x1a, 0x01, 0x30, 0x00, 0x26, 0x00, 0x0f, 0x00, 0x9d, 0x01, 0xe6, 0x05,
+        0x78, 0x74, 0x65, 0x72, 0x6d, 0x7c, 0x78, 0x74, 0x65, 0x72, 0x6d, 0x20,
+        0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x61, 0x6c, 0x20, 0x65, 0x6d, 0x75,
+        0x6c, 0x61, 0x74, 0x6f, 0x72, 0x20, 0x28, 0x58, 0x20, 0x57, 0x69, 0x6e,
+        0x64, 0x6f, 0x77, 0x20, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x29, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0x08, 0x00, 0x18, 0x00, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x08, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00,
+        0x06, 0x00, 0x08, 0x00, 0x19, 0x00, 0x1e, 0x00, 0x26, 0x00, 0x2a, 0x00,
+        0x2e, 0x00, 0xff, 0xff, 0x39, 0x00, 0x4a, 0x00, 0x4c, 0x00, 0x50, 0x00,
+        0x57, 0x00, 0xff, 0xff, 0x59, 0x00, 0x66, 0x00, 0xff, 0xff, 0x6a, 0x00,
+        0x6e, 0x00, 0x78, 0x00, 0x7c, 0x00, 0xff, 0xff, 0xff, 0xff, 0x80, 0x00,
+        0x84, 0x00, 0x89, 0x00, 0x8e, 0x00, 0xff, 0xff, 0xa0, 0x00, 0xa5, 0x00,
+        0xaa, 0x00, 0xff, 0xff, 0xaf, 0x00, 0xb4, 0x00, 0xb9, 0x00, 0xbe, 0x00,
+        0xc7, 0x00, 0xcb, 0x00, 0xd2, 0x00, 0xff, 0xff, 0xe4, 0x00, 0xe9, 0x00,
+        0xef, 0x00, 0xf5, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x19, 0x01, 0xff, 0xff, 0x1d, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x01, 0xff, 0xff, 0x24, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x01, 0x2c, 0x01,
+        0x32, 0x01, 0x36, 0x01, 0x3a, 0x01, 0x3e, 0x01, 0x44, 0x01, 0x4a, 0x01,
+        0x50, 0x01, 0x56, 0x01, 0x5c, 0x01, 0x60, 0x01, 0xff, 0xff, 0x65, 0x01,
+        0xff, 0xff, 0x69, 0x01, 0x6e, 0x01, 0x73, 0x01, 0x77, 0x01, 0x7e, 0x01,
+        0xff, 0xff, 0x85, 0x01, 0x89, 0x01, 0x91, 0x01, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x99, 0x01, 0xa2, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x01, 0xb4, 0x01, 0xbd, 0x01, 0xc6, 0x01, 0xcf, 0x01,
+        0xd8, 0x01, 0xe1, 0x01, 0xea, 0x01, 0xf3, 0x01, 0xfc, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x05, 0x02, 0x09, 0x02, 0x0e, 0x02, 0x13, 0x02,
+        0x27, 0x02, 0x2a, 0x02, 0xff, 0xff, 0xff, 0xff, 0x3c, 0x02, 0x3f, 0x02,
+        0x4a, 0x02, 0x4d, 0x02, 0x4f, 0x02, 0x52, 0x02, 0xaf, 0x02, 0xff, 0xff,
+        0xb2, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x02,
+        0xb8, 0x02, 0xbc, 0x02, 0xc0, 0x02, 0xc4, 0x02, 0xff, 0xff, 0xff, 0xff,
+        0xc8, 0x02, 0xff, 0xff, 0xfd, 0x02, 0xff, 0xff, 0xff, 0xff, 0x01, 0x03,
+        0x07, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x0d, 0x03, 0x11, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x15, 0x03, 0xff, 0xff, 0xff, 0xff,
+        0x1c, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x23, 0x03,
+        0x2a, 0x03, 0x31, 0x03, 0xff, 0xff, 0xff, 0xff, 0x38, 0x03, 0xff, 0xff,
+        0x3f, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x46, 0x03, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4d, 0x03, 0x53, 0x03,
+        0x59, 0x03, 0x60, 0x03, 0x67, 0x03, 0x6e, 0x03, 0x75, 0x03, 0x7d, 0x03,
+        0x85, 0x03, 0x8d, 0x03, 0x95, 0x03, 0x9d, 0x03, 0xa5, 0x03, 0xad, 0x03,
+        0xb5, 0x03, 0xbc, 0x03, 0xc3, 0x03, 0xca, 0x03, 0xd1, 0x03, 0xd9, 0x03,
+        0xe1, 0x03, 0xe9, 0x03, 0xf1, 0x03, 0xf9, 0x03, 0x01, 0x04, 0x09, 0x04,
+        0x11, 0x04, 0x18, 0x04, 0x1f, 0x04, 0x26, 0x04, 0x2d, 0x04, 0x35, 0x04,
+        0x3d, 0x04, 0x45, 0x04, 0x4d, 0x04, 0x55, 0x04, 0x5d, 0x04, 0x65, 0x04,
+        0x6d, 0x04, 0x74, 0x04, 0x7b, 0x04, 0x82, 0x04, 0x89, 0x04, 0x91, 0x04,
+        0x99, 0x04, 0xa1, 0x04, 0xa9, 0x04, 0xb1, 0x04, 0xb9, 0x04, 0xc1, 0x04,
+        0xc9, 0x04, 0xd0, 0x04, 0xd7, 0x04, 0xde, 0x04, 0xe3, 0x04, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x04, 0xf5, 0x04, 0xfa, 0x04,
+        0x0d, 0x05, 0x11, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x1a, 0x05, 0x60, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa6, 0x05, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb1, 0x05,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb5, 0x05, 0xbf, 0x05, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xc9, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xe0, 0x05, 0xe3, 0x05, 0x1b, 0x5b, 0x5a, 0x00, 0x07, 0x00,
+        0x0d, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x32, 0x25, 0x64, 0x72, 0x00, 0x1b, 0x5b, 0x33, 0x67, 0x00,
+        0x1b, 0x5b, 0x48, 0x1b, 0x5b, 0x32, 0x4a, 0x00, 0x1b, 0x5b, 0x4b, 0x00,
+        0x1b, 0x5b, 0x4a, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25,
+        0x64, 0x47, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64,
+        0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x48, 0x00, 0x0a, 0x00, 0x1b, 0x5b,
+        0x48, 0x00, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x6c, 0x00, 0x08, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x32, 0x6c, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x68, 0x00,
+        0x1b, 0x5b, 0x43, 0x00, 0x1b, 0x5b, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31,
+        0x32, 0x3b, 0x32, 0x35, 0x68, 0x00, 0x1b, 0x5b, 0x50, 0x00, 0x1b, 0x5b,
+        0x4d, 0x00, 0x1b, 0x28, 0x30, 0x00, 0x1b, 0x5b, 0x35, 0x6d, 0x00, 0x1b,
+        0x5b, 0x31, 0x6d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x68,
+        0x1b, 0x5b, 0x32, 0x32, 0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b,
+        0x32, 0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x68, 0x00, 0x1b, 0x5b, 0x38, 0x6d,
+        0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b,
+        0x5b, 0x34, 0x6d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x58,
+        0x00, 0x1b, 0x28, 0x42, 0x00, 0x1b, 0x28, 0x42, 0x1b, 0x5b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x6c, 0x1b, 0x5b, 0x32, 0x33,
+        0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b, 0x34, 0x6c, 0x00, 0x1b,
+        0x5b, 0x32, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x35, 0x68, 0x24, 0x3c, 0x31, 0x30, 0x30, 0x2f, 0x3e, 0x1b,
+        0x5b, 0x3f, 0x35, 0x6c, 0x00, 0x1b, 0x5b, 0x21, 0x70, 0x1b, 0x5b, 0x3f,
+        0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x4c, 0x00, 0x08, 0x00, 0x1b, 0x5b, 0x33, 0x7e, 0x00, 0x1b, 0x4f,
+        0x42, 0x00, 0x1b, 0x4f, 0x50, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x7e, 0x00,
+        0x1b, 0x4f, 0x51, 0x00, 0x1b, 0x4f, 0x52, 0x00, 0x1b, 0x4f, 0x53, 0x00,
+        0x1b, 0x5b, 0x31, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x38, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x7e, 0x00,
+        0x1b, 0x5b, 0x32, 0x30, 0x7e, 0x00, 0x1b, 0x4f, 0x48, 0x00, 0x1b, 0x5b,
+        0x32, 0x7e, 0x00, 0x1b, 0x4f, 0x44, 0x00, 0x1b, 0x5b, 0x36, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x7e, 0x00, 0x1b, 0x4f, 0x43, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x41, 0x00, 0x1b,
+        0x4f, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x68, 0x1b, 0x3d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30,
+        0x33, 0x34, 0x6c, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x33, 0x34, 0x68,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x50, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x42, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x40,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x53, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4c, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x44, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x43,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x54, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x41, 0x00, 0x1b, 0x5b, 0x69, 0x00, 0x1b,
+        0x5b, 0x34, 0x69, 0x00, 0x1b, 0x5b, 0x35, 0x69, 0x00, 0x25, 0x70, 0x31,
+        0x25, 0x63, 0x1b, 0x5b, 0x25, 0x70, 0x32, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x2d, 0x25, 0x64, 0x62, 0x00, 0x1b, 0x63, 0x00, 0x1b, 0x5b, 0x21, 0x70,
+        0x1b, 0x5b, 0x3f, 0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b,
+        0x3e, 0x00, 0x1b, 0x38, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x64, 0x00, 0x1b, 0x37, 0x00, 0x0a, 0x00, 0x1b, 0x4d, 0x00,
+        0x25, 0x3f, 0x25, 0x70, 0x39, 0x25, 0x74, 0x1b, 0x28, 0x30, 0x25, 0x65,
+        0x1b, 0x28, 0x42, 0x25, 0x3b, 0x1b, 0x5b, 0x30, 0x25, 0x3f, 0x25, 0x70,
+        0x36, 0x25, 0x74, 0x3b, 0x31, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x35,
+        0x25, 0x74, 0x3b, 0x32, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x32, 0x25,
+        0x74, 0x3b, 0x34, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x70,
+        0x33, 0x25, 0x7c, 0x25, 0x74, 0x3b, 0x37, 0x25, 0x3b, 0x25, 0x3f, 0x25,
+        0x70, 0x34, 0x25, 0x74, 0x3b, 0x35, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70,
+        0x37, 0x25, 0x74, 0x3b, 0x38, 0x25, 0x3b, 0x6d, 0x00, 0x1b, 0x48, 0x00,
+        0x09, 0x00, 0x1b, 0x4f, 0x77, 0x00, 0x1b, 0x4f, 0x79, 0x00, 0x1b, 0x4f,
+        0x75, 0x00, 0x1b, 0x4f, 0x71, 0x00, 0x1b, 0x4f, 0x73, 0x00, 0x60, 0x60,
+        0x61, 0x61, 0x66, 0x66, 0x67, 0x67, 0x69, 0x69, 0x6a, 0x6a, 0x6b, 0x6b,
+        0x6c, 0x6c, 0x6d, 0x6d, 0x6e, 0x6e, 0x6f, 0x6f, 0x70, 0x70, 0x71, 0x71,
+        0x72, 0x72, 0x73, 0x73, 0x74, 0x74, 0x75, 0x75, 0x76, 0x76, 0x77, 0x77,
+        0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c, 0x7c, 0x7d, 0x7d,
+        0x7e, 0x7e, 0x00, 0x1b, 0x5b, 0x5a, 0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x68,
+        0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x6c, 0x00, 0x1b, 0x4f, 0x46, 0x00, 0x1b,
+        0x4f, 0x4d, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x32, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x48, 0x00,
+        0x1b, 0x5b, 0x32, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32,
+        0x44, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x35,
+        0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x43, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x32, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x35, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x53, 0x00, 0x1b,
+        0x5b, 0x31, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x39, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x50, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x36, 0x51, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x52, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x38, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x31, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x33, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x33, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x4b, 0x00, 0x1b, 0x5b, 0x3f,
+        0x36, 0x39, 0x6c, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x64, 0x3b, 0x25,
+        0x64, 0x52, 0x00, 0x1b, 0x5b, 0x36, 0x6e, 0x00, 0x1b, 0x5b, 0x3f, 0x25,
+        0x5b, 0x3b, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+        0x5d, 0x63, 0x00, 0x1b, 0x5b, 0x63, 0x00, 0x1b, 0x5b, 0x33, 0x39, 0x3b,
+        0x34, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x3f, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x31, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d,
+        0x25, 0x74, 0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b,
+        0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b,
+        0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x31, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x33, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3c, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d,
+        0x00, 0x1b, 0x5b, 0x34, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x36, 0x39, 0x68, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x73, 0x00, 0x1b, 0x6c,
+        0x00, 0x1b, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x96, 0x00,
+        0xac, 0x03, 0x01, 0x01, 0x00, 0x00, 0x07, 0x00, 0x13, 0x00, 0x18, 0x00,
+        0x2a, 0x00, 0x30, 0x00, 0x3a, 0x00, 0x5a, 0x00, 0x61, 0x00, 0x68, 0x00,
+        0x6f, 0x00, 0x76, 0x00, 0x7d, 0x00, 0x84, 0x00, 0x8b, 0x00, 0x92, 0x00,
+        0x99, 0x00, 0xa0, 0x00, 0xa7, 0x00, 0xae, 0x00, 0xb5, 0x00, 0xbc, 0x00,
+        0xc3, 0x00, 0xca, 0x00, 0xd1, 0x00, 0xd8, 0x00, 0xdf, 0x00, 0xe6, 0x00,
+        0xed, 0x00, 0xf4, 0x00, 0xfb, 0x00, 0x02, 0x01, 0x09, 0x01, 0x10, 0x01,
+        0x17, 0x01, 0x1e, 0x01, 0x25, 0x01, 0x2c, 0x01, 0x33, 0x01, 0x3a, 0x01,
+        0x41, 0x01, 0x48, 0x01, 0x4f, 0x01, 0x56, 0x01, 0x5d, 0x01, 0x64, 0x01,
+        0x6b, 0x01, 0x72, 0x01, 0x79, 0x01, 0x80, 0x01, 0x87, 0x01, 0x8e, 0x01,
+        0x95, 0x01, 0x9c, 0x01, 0xa3, 0x01, 0xaa, 0x01, 0xb1, 0x01, 0xb8, 0x01,
+        0xbf, 0x01, 0xc6, 0x01, 0xca, 0x01, 0xce, 0x01, 0xd2, 0x01, 0xd6, 0x01,
+        0xda, 0x01, 0xde, 0x01, 0xe2, 0x01, 0xe6, 0x01, 0xea, 0x01, 0xee, 0x01,
+        0xf2, 0x01, 0xf6, 0x01, 0xfc, 0x01, 0x01, 0x02, 0x00, 0x00, 0x03, 0x00,
+        0x06, 0x00, 0x09, 0x00, 0x0c, 0x00, 0x0f, 0x00, 0x12, 0x00, 0x15, 0x00,
+        0x18, 0x00, 0x1b, 0x00, 0x20, 0x00, 0x25, 0x00, 0x2a, 0x00, 0x2f, 0x00,
+        0x34, 0x00, 0x38, 0x00, 0x3d, 0x00, 0x42, 0x00, 0x47, 0x00, 0x4c, 0x00,
+        0x51, 0x00, 0x57, 0x00, 0x5d, 0x00, 0x63, 0x00, 0x69, 0x00, 0x6f, 0x00,
+        0x75, 0x00, 0x7b, 0x00, 0x81, 0x00, 0x87, 0x00, 0x8d, 0x00, 0x92, 0x00,
+        0x97, 0x00, 0x9c, 0x00, 0xa1, 0x00, 0xa6, 0x00, 0xac, 0x00, 0xb2, 0x00,
+        0xb8, 0x00, 0xbe, 0x00, 0xc4, 0x00, 0xca, 0x00, 0xd0, 0x00, 0xd6, 0x00,
+        0xdc, 0x00, 0xe2, 0x00, 0xe8, 0x00, 0xee, 0x00, 0xf4, 0x00, 0xfa, 0x00,
+        0x00, 0x01, 0x06, 0x01, 0x0c, 0x01, 0x12, 0x01, 0x18, 0x01, 0x1e, 0x01,
+        0x22, 0x01, 0x27, 0x01, 0x2c, 0x01, 0x31, 0x01, 0x36, 0x01, 0x3b, 0x01,
+        0x3f, 0x01, 0x43, 0x01, 0x47, 0x01, 0x4b, 0x01, 0x4f, 0x01, 0x55, 0x01,
+        0x5b, 0x01, 0x61, 0x01, 0x67, 0x01, 0x6d, 0x01, 0x73, 0x01, 0x79, 0x01,
+        0x7e, 0x01, 0x83, 0x01, 0x1b, 0x5d, 0x31, 0x31, 0x32, 0x07, 0x00, 0x1b,
+        0x5d, 0x31, 0x32, 0x3b, 0x25, 0x70, 0x31, 0x25, 0x73, 0x07, 0x00, 0x1b,
+        0x5b, 0x33, 0x4a, 0x00, 0x1b, 0x5d, 0x35, 0x32, 0x3b, 0x25, 0x70, 0x31,
+        0x25, 0x73, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x73, 0x07, 0x00, 0x1b, 0x5b,
+        0x32, 0x20, 0x71, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x20,
+        0x71, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x30, 0x36, 0x3b, 0x31, 0x30,
+        0x30, 0x30, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x3d, 0x25, 0x74, 0x68, 0x25, 0x65, 0x6c, 0x25, 0x3b, 0x00, 0x1b, 0x5b,
+        0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x42, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x34, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x42,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x37, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x46, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x46, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x48, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x48, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x36, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x48,
+        0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b,
+        0x34, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b,
+        0x32, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x44, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x36, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x44, 0x00, 0x1b,
+        0x5b, 0x36, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x34, 0x7e,
+        0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b,
+        0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x33, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x43, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x35, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x43,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x41, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x41, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x41, 0x00, 0x1b, 0x4f, 0x78, 0x00, 0x1b, 0x4f, 0x74, 0x00, 0x1b, 0x4f,
+        0x76, 0x00, 0x1b, 0x4f, 0x72, 0x00, 0x1b, 0x4f, 0x45, 0x00, 0x1b, 0x4f,
+        0x6b, 0x00, 0x1b, 0x4f, 0x6c, 0x00, 0x1b, 0x4f, 0x6f, 0x00, 0x1b, 0x4f,
+        0x6e, 0x00, 0x1b, 0x4f, 0x6a, 0x00, 0x1b, 0x4f, 0x6d, 0x00, 0x1b, 0x4f,
+        0x70, 0x00, 0x1b, 0x5b, 0x32, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x39, 0x6d,
+        0x00, 0x1b, 0x5b, 0x3c, 0x25, 0x69, 0x25, 0x70, 0x33, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x3b,
+        0x25, 0x3f, 0x25, 0x70, 0x34, 0x25, 0x74, 0x4d, 0x25, 0x65, 0x6d, 0x25,
+        0x3b, 0x00, 0x41, 0x58, 0x00, 0x58, 0x54, 0x00, 0x43, 0x72, 0x00, 0x43,
+        0x73, 0x00, 0x45, 0x33, 0x00, 0x4d, 0x73, 0x00, 0x53, 0x65, 0x00, 0x53,
+        0x73, 0x00, 0x58, 0x4d, 0x00, 0x6b, 0x44, 0x43, 0x33, 0x00, 0x6b, 0x44,
+        0x43, 0x34, 0x00, 0x6b, 0x44, 0x43, 0x35, 0x00, 0x6b, 0x44, 0x43, 0x36,
+        0x00, 0x6b, 0x44, 0x43, 0x37, 0x00, 0x6b, 0x44, 0x4e, 0x00, 0x6b, 0x44,
+        0x4e, 0x33, 0x00, 0x6b, 0x44, 0x4e, 0x34, 0x00, 0x6b, 0x44, 0x4e, 0x35,
+        0x00, 0x6b, 0x44, 0x4e, 0x36, 0x00, 0x6b, 0x44, 0x4e, 0x37, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x33, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x34, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x35, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x36, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x37, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x33, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x34, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x35, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x36, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x37, 0x00, 0x6b,
+        0x49, 0x43, 0x33, 0x00, 0x6b, 0x49, 0x43, 0x34, 0x00, 0x6b, 0x49, 0x43,
+        0x35, 0x00, 0x6b, 0x49, 0x43, 0x36, 0x00, 0x6b, 0x49, 0x43, 0x37, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x33, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x34, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x35, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x36, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x37, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x33, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x34, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x35, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x36, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x37, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x33, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x34, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x35, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x36, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x37, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x33, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x34, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x35, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x36, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x37, 0x00,
+        0x6b, 0x55, 0x50, 0x00, 0x6b, 0x55, 0x50, 0x33, 0x00, 0x6b, 0x55, 0x50,
+        0x34, 0x00, 0x6b, 0x55, 0x50, 0x35, 0x00, 0x6b, 0x55, 0x50, 0x36, 0x00,
+        0x6b, 0x55, 0x50, 0x37, 0x00, 0x6b, 0x61, 0x32, 0x00, 0x6b, 0x62, 0x31,
+        0x00, 0x6b, 0x62, 0x33, 0x00, 0x6b, 0x63, 0x32, 0x00, 0x6b, 0x70, 0x35,
+        0x00, 0x6b, 0x70, 0x41, 0x44, 0x44, 0x00, 0x6b, 0x70, 0x43, 0x4d, 0x41,
+        0x00, 0x6b, 0x70, 0x44, 0x49, 0x56, 0x00, 0x6b, 0x70, 0x44, 0x4f, 0x54,
+        0x00, 0x6b, 0x70, 0x4d, 0x55, 0x4c, 0x00, 0x6b, 0x70, 0x53, 0x55, 0x42,
+        0x00, 0x6b, 0x70, 0x5a, 0x52, 0x4f, 0x00, 0x72, 0x6d, 0x78, 0x78, 0x00,
+        0x73, 0x6d, 0x78, 0x78, 0x00, 0x78, 0x6d, 0x00]
+
+    xterm_extensions =
+        [:AX, :E3, :XM, :XT, :enter_strikeout_mode, :exit_strikeout_mode, :ka2,
+         :kb1, :kb3, :kc2, :key_alt_control_delete_character,
+         :key_alt_control_down_cursor, :key_alt_control_end,
+         :key_alt_control_home, :key_alt_control_insert_character,
+         :key_alt_control_left_cursor, :key_alt_control_next,
+         :key_alt_control_previous, :key_alt_control_right_cursor,
+         :key_alt_control_up_cursor, :key_alt_delete_character,
+         :key_alt_down_cursor, :key_alt_end, :key_alt_home,
+         :key_alt_insert_character, :key_alt_left_cursor, :key_alt_next,
+         :key_alt_previous, :key_alt_right_cursor, :key_alt_up_cursor,
+         :key_control_delete_character, :key_control_down_cursor,
+         :key_control_end, :key_control_home, :key_control_insert_character,
+         :key_control_left_cursor, :key_control_next, :key_control_previous,
+         :key_control_right_cursor, :key_control_up_cursor,
+         :key_shift_alt_delete_character, :key_shift_alt_down_cursor,
+         :key_shift_alt_end, :key_shift_alt_home,
+         :key_shift_alt_insert_character, :key_shift_alt_left_cursor,
+         :key_shift_alt_next, :key_shift_alt_previous,
+         :key_shift_alt_right_cursor, :key_shift_alt_up_cursor,
+         :key_shift_control_delete_character, :key_shift_control_down_cursor,
+         :key_shift_control_end, :key_shift_control_home,
+         :key_shift_control_insert_character, :key_shift_control_left_cursor,
+         :key_shift_control_next, :key_shift_control_previous,
+         :key_shift_control_right_cursor, :key_shift_control_up_cursor,
+         :key_shift_down_cursor, :key_shift_up_cursor, :kp5, :kpADD, :kpCMA,
+         :kpDIV, :kpDOT, :kpMUL, :kpSUB, :kpZRO, :reset_cursor_color,
+         :reset_cursor_style, :set_cursor_color, :set_cursor_style,
+         :set_host_clipboard, :xm]
+
+    xterm_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :AX => true,
+        :Cr => "\e]112\a",
+        :Cs => "\e]12;%p1%s\a",
+        :E3 => "\e[3J",
+        :Ms => "\e]52;%p1%s;%p2%s\a",
+        :OTbs => true,
+        :Se => "\e[2 q",
+        :Ss => "\e[%p1%d q",
+        :XM => "\e[?1006;1000%?%p1%{1}%=%th%el%;",
+        :XT => true,
+        :acs_chars => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :acsc => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :am => true,
+        :auto_left_margin => false,
+        :auto_right_margin => true,
+        :back_color_erase => true,
+        :back_tab => "\e[Z",
+        :backspaces_with_bs => true,
+        :bce => true,
+        :bel => "\a",
+        :bell => "\a",
+        :blink => "\e[5m",
+        :bold => "\e[1m",
+        :bw => false,
+        :can_change => false,
+        :carriage_return => "\r",
+        :cbt => "\e[Z",
+        :ccc => false,
+        :ceol_standout_glitch => false,
+        :change_scroll_region => "\e[%i%p1%d;%p2%dr",
+        :chts => false,
+        :civis => "\e[?25l",
+        :clear => "\e[H\e[2J",
+        :clear_all_tabs => "\e[3g",
+        :clear_margins => "\e[?69l",
+        :clear_screen => "\e[H\e[2J",
+        :clr_bol => "\e[1K",
+        :clr_eol => "\e[K",
+        :clr_eos => "\e[J",
+        :cnorm => "\e[?12l\e[?25h",
+        :col_addr_glitch => false,
+        :colors => 8,
+        :cols => 80,
+        :column_address => "\e[%i%p1%dG",
+        :columns => 80,
+        :cpi_changes_res => false,
+        :cpix => false,
+        :cr => "\r",
+        :cr_cancels_micro_mode => false,
+        :crxm => false,
+        :csr => "\e[%i%p1%d;%p2%dr",
+        :cub => "\e[%p1%dD",
+        :cub1 => "\b",
+        :cud => "\e[%p1%dB",
+        :cud1 => "\n",
+        :cuf => "\e[%p1%dC",
+        :cuf1 => "\e[C",
+        :cup => "\e[%i%p1%d;%p2%dH",
+        :cursor_address => "\e[%i%p1%d;%p2%dH",
+        :cursor_down => "\n",
+        :cursor_home => "\e[H",
+        :cursor_invisible => "\e[?25l",
+        :cursor_left => "\b",
+        :cursor_normal => "\e[?12l\e[?25h",
+        :cursor_right => "\e[C",
+        :cursor_up => "\e[A",
+        :cursor_visible => "\e[?12;25h",
+        :cuu => "\e[%p1%dA",
+        :cuu1 => "\e[A",
+        :cvvis => "\e[?12;25h",
+        :da => false,
+        :daisy => false,
+        :db => false,
+        :dch => "\e[%p1%dP",
+        :dch1 => "\e[P",
+        :delete_character => "\e[P",
+        :delete_line => "\e[M",
+        :dest_tabs_magic_smso => false,
+        :dim => "\e[2m",
+        :dl => "\e[%p1%dM",
+        :dl1 => "\e[M",
+        :eat_newline_glitch => true,
+        :ech => "\e[%p1%dX",
+        :ed => "\e[J",
+        :el => "\e[K",
+        :el1 => "\e[1K",
+        :enter_alt_charset_mode => "\e(0",
+        :enter_am_mode => "\e[?7h",
+        :enter_blink_mode => "\e[5m",
+        :enter_bold_mode => "\e[1m",
+        :enter_ca_mode => "\e[?1049h\e[22;0;0t",
+        :enter_dim_mode => "\e[2m",
+        :enter_insert_mode => "\e[4h",
+        :enter_italics_mode => "\e[3m",
+        :enter_reverse_mode => "\e[7m",
+        :enter_secure_mode => "\e[8m",
+        :enter_standout_mode => "\e[7m",
+        :enter_underline_mode => "\e[4m",
+        :eo => false,
+        :erase_chars => "\e[%p1%dX",
+        :erase_overstrike => false,
+        :eslok => false,
+        :exit_alt_charset_mode => "\e(B",
+        :exit_am_mode => "\e[?7l",
+        :exit_attribute_mode => "\e(B\e[m",
+        :exit_ca_mode => "\e[?1049l\e[23;0;0t",
+        :exit_insert_mode => "\e[4l",
+        :exit_italics_mode => "\e[23m",
+        :exit_standout_mode => "\e[27m",
+        :exit_underline_mode => "\e[24m",
+        :flash => "\e[?5h\$<100/>\e[?5l",
+        :flash_screen => "\e[?5h\$<100/>\e[?5l",
+        :generic_type => false,
+        :gn => false,
+        :hard_copy => false,
+        :hard_cursor => false,
+        :has_meta_key => true,
+        :has_print_wheel => false,
+        :has_status_line => false,
+        :hc => false,
+        :hls => false,
+        :home => "\e[H",
+        :hpa => "\e[%i%p1%dG",
+        :hs => false,
+        :ht => "\t",
+        :hts => "\eH",
+        :hue_lightness_saturation => false,
+        :hz => false,
+        :ich => "\e[%p1%d@",
+        :il => "\e[%p1%dL",
+        :il1 => "\e[L",
+        :in => false,
+        :ind => "\n",
+        :indn => "\e[%p1%dS",
+        :init_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :init_tabs => 8,
+        :insert_line => "\e[L",
+        :insert_null_glitch => false,
+        :invis => "\e[8m",
+        :is2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :it => 8,
+        :kDC => "\e[3;2~",
+        :kDC3 => "\e[3;3~",
+        :kDC4 => "\e[3;4~",
+        :kDC5 => "\e[3;5~",
+        :kDC6 => "\e[3;6~",
+        :kDC7 => "\e[3;7~",
+        :kDN => "\e[1;2B",
+        :kDN3 => "\e[1;3B",
+        :kDN4 => "\e[1;4B",
+        :kDN5 => "\e[1;5B",
+        :kDN6 => "\e[1;6B",
+        :kDN7 => "\e[1;7B",
+        :kEND => "\e[1;2F",
+        :kEND3 => "\e[1;3F",
+        :kEND4 => "\e[1;4F",
+        :kEND5 => "\e[1;5F",
+        :kEND6 => "\e[1;6F",
+        :kEND7 => "\e[1;7F",
+        :kHOM => "\e[1;2H",
+        :kHOM3 => "\e[1;3H",
+        :kHOM4 => "\e[1;4H",
+        :kHOM5 => "\e[1;5H",
+        :kHOM6 => "\e[1;6H",
+        :kHOM7 => "\e[1;7H",
+        :kIC => "\e[2;2~",
+        :kIC3 => "\e[2;3~",
+        :kIC4 => "\e[2;4~",
+        :kIC5 => "\e[2;5~",
+        :kIC6 => "\e[2;6~",
+        :kIC7 => "\e[2;7~",
+        :kLFT => "\e[1;2D",
+        :kLFT3 => "\e[1;3D",
+        :kLFT4 => "\e[1;4D",
+        :kLFT5 => "\e[1;5D",
+        :kLFT6 => "\e[1;6D",
+        :kLFT7 => "\e[1;7D",
+        :kNXT => "\e[6;2~",
+        :kNXT3 => "\e[6;3~",
+        :kNXT4 => "\e[6;4~",
+        :kNXT5 => "\e[6;5~",
+        :kNXT6 => "\e[6;6~",
+        :kNXT7 => "\e[6;7~",
+        :kPRV => "\e[5;2~",
+        :kPRV3 => "\e[5;3~",
+        :kPRV4 => "\e[5;4~",
+        :kPRV5 => "\e[5;5~",
+        :kPRV6 => "\e[5;6~",
+        :kPRV7 => "\e[5;7~",
+        :kRIT => "\e[1;2C",
+        :kRIT3 => "\e[1;3C",
+        :kRIT4 => "\e[1;4C",
+        :kRIT5 => "\e[1;5C",
+        :kRIT6 => "\e[1;6C",
+        :kRIT7 => "\e[1;7C",
+        :kUP => "\e[1;2A",
+        :kUP3 => "\e[1;3A",
+        :kUP4 => "\e[1;4A",
+        :kUP5 => "\e[1;5A",
+        :kUP6 => "\e[1;6A",
+        :kUP7 => "\e[1;7A",
+        :ka1 => "\eOw",
+        :ka2 => "\eOx",
+        :ka3 => "\eOy",
+        :kb1 => "\eOt",
+        :kb2 => "\eOu",
+        :kb3 => "\eOv",
+        :kbs => "\b",
+        :kc1 => "\eOq",
+        :kc2 => "\eOr",
+        :kc3 => "\eOs",
+        :kcbt => "\e[Z",
+        :kcub1 => "\eOD",
+        :kcud1 => "\eOB",
+        :kcuf1 => "\eOC",
+        :kcuu1 => "\eOA",
+        :kdch1 => "\e[3~",
+        :kend => "\eOF",
+        :kent => "\eOM",
+        :key_a1 => "\eOw",
+        :key_a3 => "\eOy",
+        :key_b2 => "\eOu",
+        :key_backspace => "\b",
+        :key_btab => "\e[Z",
+        :key_c1 => "\eOq",
+        :key_c3 => "\eOs",
+        :key_dc => "\e[3~",
+        :key_down => "\eOB",
+        :key_end => "\eOF",
+        :key_enter => "\eOM",
+        :key_f1 => "\eOP",
+        :key_f10 => "\e[21~",
+        :key_f11 => "\e[23~",
+        :key_f12 => "\e[24~",
+        :key_f13 => "\e[1;2P",
+        :key_f14 => "\e[1;2Q",
+        :key_f15 => "\e[1;2R",
+        :key_f16 => "\e[1;2S",
+        :key_f17 => "\e[15;2~",
+        :key_f18 => "\e[17;2~",
+        :key_f19 => "\e[18;2~",
+        :key_f2 => "\eOQ",
+        :key_f20 => "\e[19;2~",
+        :key_f21 => "\e[20;2~",
+        :key_f22 => "\e[21;2~",
+        :key_f23 => "\e[23;2~",
+        :key_f24 => "\e[24;2~",
+        :key_f25 => "\e[1;5P",
+        :key_f26 => "\e[1;5Q",
+        :key_f27 => "\e[1;5R",
+        :key_f28 => "\e[1;5S",
+        :key_f29 => "\e[15;5~",
+        :key_f3 => "\eOR",
+        :key_f30 => "\e[17;5~",
+        :key_f31 => "\e[18;5~",
+        :key_f32 => "\e[19;5~",
+        :key_f33 => "\e[20;5~",
+        :key_f34 => "\e[21;5~",
+        :key_f35 => "\e[23;5~",
+        :key_f36 => "\e[24;5~",
+        :key_f37 => "\e[1;6P",
+        :key_f38 => "\e[1;6Q",
+        :key_f39 => "\e[1;6R",
+        :key_f4 => "\eOS",
+        :key_f40 => "\e[1;6S",
+        :key_f41 => "\e[15;6~",
+        :key_f42 => "\e[17;6~",
+        :key_f43 => "\e[18;6~",
+        :key_f44 => "\e[19;6~",
+        :key_f45 => "\e[20;6~",
+        :key_f46 => "\e[21;6~",
+        :key_f47 => "\e[23;6~",
+        :key_f48 => "\e[24;6~",
+        :key_f49 => "\e[1;3P",
+        :key_f5 => "\e[15~",
+        :key_f50 => "\e[1;3Q",
+        :key_f51 => "\e[1;3R",
+        :key_f52 => "\e[1;3S",
+        :key_f53 => "\e[15;3~",
+        :key_f54 => "\e[17;3~",
+        :key_f55 => "\e[18;3~",
+        :key_f56 => "\e[19;3~",
+        :key_f57 => "\e[20;3~",
+        :key_f58 => "\e[21;3~",
+        :key_f59 => "\e[23;3~",
+        :key_f6 => "\e[17~",
+        :key_f60 => "\e[24;3~",
+        :key_f61 => "\e[1;4P",
+        :key_f62 => "\e[1;4Q",
+        :key_f63 => "\e[1;4R",
+        :key_f7 => "\e[18~",
+        :key_f8 => "\e[19~",
+        :key_f9 => "\e[20~",
+        :key_home => "\eOH",
+        :key_ic => "\e[2~",
+        :key_left => "\eOD",
+        :key_mouse => "\e[<",
+        :key_npage => "\e[6~",
+        :key_ppage => "\e[5~",
+        :key_right => "\eOC",
+        :key_sdc => "\e[3;2~",
+        :key_send => "\e[1;2F",
+        :key_sf => "\e[1;2B",
+        :key_shome => "\e[1;2H",
+        :key_sic => "\e[2;2~",
+        :key_sleft => "\e[1;2D",
+        :key_snext => "\e[6;2~",
+        :key_sprevious => "\e[5;2~",
+        :key_sr => "\e[1;2A",
+        :key_sright => "\e[1;2C",
+        :key_up => "\eOA",
+        :keypad_local => "\e[?1l\e>",
+        :keypad_xmit => "\e[?1h\e=",
+        :kf1 => "\eOP",
+        :kf10 => "\e[21~",
+        :kf11 => "\e[23~",
+        :kf12 => "\e[24~",
+        :kf13 => "\e[1;2P",
+        :kf14 => "\e[1;2Q",
+        :kf15 => "\e[1;2R",
+        :kf16 => "\e[1;2S",
+        :kf17 => "\e[15;2~",
+        :kf18 => "\e[17;2~",
+        :kf19 => "\e[18;2~",
+        :kf2 => "\eOQ",
+        :kf20 => "\e[19;2~",
+        :kf21 => "\e[20;2~",
+        :kf22 => "\e[21;2~",
+        :kf23 => "\e[23;2~",
+        :kf24 => "\e[24;2~",
+        :kf25 => "\e[1;5P",
+        :kf26 => "\e[1;5Q",
+        :kf27 => "\e[1;5R",
+        :kf28 => "\e[1;5S",
+        :kf29 => "\e[15;5~",
+        :kf3 => "\eOR",
+        :kf30 => "\e[17;5~",
+        :kf31 => "\e[18;5~",
+        :kf32 => "\e[19;5~",
+        :kf33 => "\e[20;5~",
+        :kf34 => "\e[21;5~",
+        :kf35 => "\e[23;5~",
+        :kf36 => "\e[24;5~",
+        :kf37 => "\e[1;6P",
+        :kf38 => "\e[1;6Q",
+        :kf39 => "\e[1;6R",
+        :kf4 => "\eOS",
+        :kf40 => "\e[1;6S",
+        :kf41 => "\e[15;6~",
+        :kf42 => "\e[17;6~",
+        :kf43 => "\e[18;6~",
+        :kf44 => "\e[19;6~",
+        :kf45 => "\e[20;6~",
+        :kf46 => "\e[21;6~",
+        :kf47 => "\e[23;6~",
+        :kf48 => "\e[24;6~",
+        :kf49 => "\e[1;3P",
+        :kf5 => "\e[15~",
+        :kf50 => "\e[1;3Q",
+        :kf51 => "\e[1;3R",
+        :kf52 => "\e[1;3S",
+        :kf53 => "\e[15;3~",
+        :kf54 => "\e[17;3~",
+        :kf55 => "\e[18;3~",
+        :kf56 => "\e[19;3~",
+        :kf57 => "\e[20;3~",
+        :kf58 => "\e[21;3~",
+        :kf59 => "\e[23;3~",
+        :kf6 => "\e[17~",
+        :kf60 => "\e[24;3~",
+        :kf61 => "\e[1;4P",
+        :kf62 => "\e[1;4Q",
+        :kf63 => "\e[1;4R",
+        :kf7 => "\e[18~",
+        :kf8 => "\e[19~",
+        :kf9 => "\e[20~",
+        :khome => "\eOH",
+        :kich1 => "\e[2~",
+        :kind => "\e[1;2B",
+        :km => true,
+        :kmous => "\e[<",
+        :knp => "\e[6~",
+        :kp5 => "\eOE",
+        :kpADD => "\eOk",
+        :kpCMA => "\eOl",
+        :kpDIV => "\eOo",
+        :kpDOT => "\eOn",
+        :kpMUL => "\eOj",
+        :kpSUB => "\eOm",
+        :kpZRO => "\eOp",
+        :kpp => "\e[5~",
+        :kri => "\e[1;2A",
+        :lines => 24,
+        :lpi_changes_res => false,
+        :lpix => false,
+        :max_colors => 8,
+        :max_pairs => 64,
+        :mc0 => "\e[i",
+        :mc4 => "\e[4i",
+        :mc5 => "\e[5i",
+        :mc5i => true,
+        :meml => "\el",
+        :memory_above => false,
+        :memory_below => false,
+        :memory_lock => "\el",
+        :memory_unlock => "\em",
+        :memu => "\em",
+        :meta_off => "\e[?1034l",
+        :meta_on => "\e[?1034h",
+        :mgc => "\e[?69l",
+        :mir => true,
+        :move_insert_mode => true,
+        :move_standout_mode => true,
+        :msgr => true,
+        :ndscr => false,
+        :needs_xon_xoff => false,
+        :no_esc_ctlc => false,
+        :no_pad_char => true,
+        :non_dest_scroll_region => false,
+        :non_rev_rmcup => false,
+        :npc => true,
+        :nrrmc => false,
+        :nxon => false,
+        :op => "\e[39;49m",
+        :orig_pair => "\e[39;49m",
+        :os => false,
+        :over_strike => false,
+        :pairs => 64,
+        :parm_dch => "\e[%p1%dP",
+        :parm_delete_line => "\e[%p1%dM",
+        :parm_down_cursor => "\e[%p1%dB",
+        :parm_ich => "\e[%p1%d@",
+        :parm_index => "\e[%p1%dS",
+        :parm_insert_line => "\e[%p1%dL",
+        :parm_left_cursor => "\e[%p1%dD",
+        :parm_right_cursor => "\e[%p1%dC",
+        :parm_rindex => "\e[%p1%dT",
+        :parm_up_cursor => "\e[%p1%dA",
+        :print_screen => "\e[i",
+        :prtr_off => "\e[4i",
+        :prtr_on => "\e[5i",
+        :prtr_silent => true,
+        :rc => "\e8",
+        :rep => "%p1%c\e[%p2%{1}%-%db",
+        :repeat_char => "%p1%c\e[%p2%{1}%-%db",
+        :reset_1string => "\ec",
+        :reset_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :restore_cursor => "\e8",
+        :rev => "\e[7m",
+        :ri => "\eM",
+        :rin => "\e[%p1%dT",
+        :ritm => "\e[23m",
+        :rmacs => "\e(B",
+        :rmam => "\e[?7l",
+        :rmcup => "\e[?1049l\e[23;0;0t",
+        :rmir => "\e[4l",
+        :rmkx => "\e[?1l\e>",
+        :rmm => "\e[?1034l",
+        :rmso => "\e[27m",
+        :rmul => "\e[24m",
+        :rmxx => "\e[29m",
+        :row_addr_glitch => false,
+        :row_address => "\e[%i%p1%dd",
+        :rs1 => "\ec",
+        :rs2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :sam => false,
+        :save_cursor => "\e7",
+        :sc => "\e7",
+        :scroll_forward => "\n",
+        :scroll_reverse => "\eM",
+        :semi_auto_right_margin => false,
+        :set_a_background => "\e[4%p1%dm",
+        :set_a_foreground => "\e[3%p1%dm",
+        :set_attributes => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :set_background => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_foreground => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_lr_margin => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :set_tab => "\eH",
+        :setab => "\e[4%p1%dm",
+        :setaf => "\e[3%p1%dm",
+        :setb => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :setf => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :sgr => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :sgr0 => "\e(B\e[m",
+        :sitm => "\e[3m",
+        :smacs => "\e(0",
+        :smam => "\e[?7h",
+        :smcup => "\e[?1049h\e[22;0;0t",
+        :smglr => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :smir => "\e[4h",
+        :smkx => "\e[?1h\e=",
+        :smm => "\e[?1034h",
+        :smso => "\e[7m",
+        :smul => "\e[4m",
+        :smxx => "\e[9m",
+        :status_line_esc_ok => false,
+        :tab => "\t",
+        :tbc => "\e[3g",
+        :tilde_glitch => false,
+        :transparent_underline => false,
+        :u6 => "\e[%i%d;%dR",
+        :u7 => "\e[6n",
+        :u8 => "\e[?%[;0123456789]c",
+        :u9 => "\e[c",
+        :ul => false,
+        :user6 => "\e[%i%d;%dR",
+        :user7 => "\e[6n",
+        :user8 => "\e[?%[;0123456789]c",
+        :user9 => "\e[c",
+        :vpa => "\e[%i%p1%dd",
+        :xenl => true,
+        :xhp => false,
+        :xhpa => false,
+        :xm => "\e[<%i%p3%d;%p1%d;%p2%d;%?%p4%tM%em%;",
+        :xon => false,
+        :xon_xoff => false,
+        :xsb => false,
+        :xt => false,
+        :xvpa => false)
+
+@testset "terminfo" begin
+    dumb = Base.TermInfo(read(IOBuffer(dumb_terminfo), Base.TermInfoRaw))
+    @test dumb.names == ["dumb", "80-column dumb tty"]
+    @test length(dumb.flags) == 2
+    @test length(dumb.numbers) == 1
+    @test length(dumb.strings) == 4
+    @test isnothing(dumb.extensions)
+    for (key, value) in dumb_capabilities
+        @test dumb[key] == value
+    end
+
+    xterm = Base.TermInfo(read(IOBuffer(xterm_terminfo), Base.TermInfoRaw))
+    @test xterm.names == ["xterm", "xterm terminal emulator (X Window System)"]
+    @test length(xterm.flags) == 40
+    @test length(xterm.numbers) == 15
+    @test length(xterm.strings) == 253
+    @test sort(xterm.extensions |> collect) == sort(xterm_extensions)
+    for (key, value) in xterm_capabilities
+        @test xterm[key] == value
+    end
+end
+
+end
diff --git a/test/testdefs.jl b/test/testdefs.jl
index 4aac988cda7fb..eb0bf570b11fd 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -5,6 +5,9 @@ using Test, Random
 function runtests(name, path, isolate=true; seed=nothing)
     old_print_setting = Test.TESTSET_PRINT_ENABLE[]
     Test.TESTSET_PRINT_ENABLE[] = false
+    # remove all hint_handlers, so that errorshow tests are not changed by which packages have been loaded on this worker already
+    # packages that call register_error_hint should also call this again, and then re-add any hooks they want to test
+    empty!(Base.Experimental._hint_handlers)
     try
         if isolate
             # Simple enough to type and random enough so that no one will hard
@@ -20,11 +23,12 @@ function runtests(name, path, isolate=true; seed=nothing)
         end
         res_and_time_data = @timed @testset "$name" begin
             # Random.seed!(nothing) will fail
-            seed != nothing && Random.seed!(seed)
+            seed !== nothing && Random.seed!(seed)
 
             original_depot_path = copy(Base.DEPOT_PATH)
             original_load_path = copy(Base.LOAD_PATH)
             original_env = copy(ENV)
+            original_project = Base.active_project()
 
             Base.include(m, "$path.jl")
 
@@ -51,25 +55,38 @@ function runtests(name, path, isolate=true; seed=nothing)
                 error(msg)
             end
             if copy(ENV) != original_env
-                msg = "The `$(name)` test set mutated ENV and did not restore the original values"
-                @error(
-                    msg,
-                    testset_name = name,
-                    testset_path = path,
-                )
                 throw_error_str = get(ENV, "JULIA_TEST_CHECK_MUTATED_ENV", "true")
                 throw_error_b = parse(Bool, throw_error_str)
                 if throw_error_b
+                    msg = "The `$(name)` test set mutated ENV and did not restore the original values"
+                    @error(
+                        msg,
+                        testset_name = name,
+                        testset_path = path,
+                    )
                     error(msg)
                 end
             end
+            if Base.active_project() != original_project
+                msg = "The `$(name)` test set changed the active project and did not restore the original value"
+                @error(
+                    msg,
+                    original_project,
+                    Base.active_project(),
+                    testset_name = name,
+                    testset_path = path,
+                )
+                error(msg)
+            end
         end
         rss = Sys.maxrss()
         #res_and_time_data[1] is the testset
         ts = res_and_time_data[1]
-        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
+        tc = Test.get_test_counts(ts)
         # simplify our stored data to just contain the counts
-        res_and_time_data = (TestSetException(passes+c_passes, fails+c_fails, errors+c_errors, broken+c_broken, Test.filter_errors(ts)),
+        res_and_time_data = (TestSetException(tc.passes+tc.cumulative_passes, tc.fails+tc.cumulative_fails,
+                             tc.errors+tc.cumulative_errors, tc.broken+tc.cumulative_broken,
+                             Test.filter_errors(ts)),
                              res_and_time_data[2],
                              res_and_time_data[3],
                              res_and_time_data[4],
diff --git a/test/testenv.jl b/test/testenv.jl
index 41706dd24e75e..3ef1126e0e927 100644
--- a/test/testenv.jl
+++ b/test/testenv.jl
@@ -35,8 +35,14 @@ if !@isdefined(testenv_defined)
         const rr_exename = ``
     end
 
+    const test_relocated_depot = haskey(ENV, "RELOCATEDEPOT")
+
     function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
         exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
+        if X isa Integer
+            heap_size=round(Int,(Sys.total_memory()/(1024^2)/(X+1)))
+            push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M")
+        end
         addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
     end
 
diff --git a/test/testhelpers/ChallengePrompts.jl b/test/testhelpers/ChallengePrompts.jl
new file mode 100644
index 0000000000000..10dd1553afbbd
--- /dev/null
+++ b/test/testhelpers/ChallengePrompts.jl
@@ -0,0 +1,123 @@
+module ChallengePrompts
+
+include("FakePTYs.jl")
+using .FakePTYs: with_fake_pty
+using Serialization: serialize, deserialize
+
+const timeout = 60
+
+"""
+    challenge_prompt(code::Expr, challenges; pkgs=[])
+
+Execute the passed code in a separate process, looking for
+the passed prompts and responding as defined in the pairs of
+(prompt, response) in the collection of challenges.
+
+Optionally `import` the given `pkgs`.
+
+Returns the value of the last expression.
+"""
+function challenge_prompt(code::Expr, challenges; pkgs=[])
+    input_code = tempname()
+    open(input_code, "w") do fp
+        serialize(fp, code)
+    end
+    output_file = tempname()
+    torun = """
+        $(isempty(pkgs) ? "" : string("import ", join(pkgs, ", ")))
+        using Serialization
+        result = open($(repr(input_code))) do fp
+            eval(deserialize(fp))
+        end
+        open($(repr(output_file)), "w") do fp
+            serialize(fp, result)
+        end"""
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
+    try
+        challenge_prompt(cmd, challenges)
+        return open(output_file, "r") do fp
+            deserialize(fp)
+        end
+    finally
+        isfile(output_file) && rm(output_file)
+        isfile(input_code) && rm(input_code)
+    end
+    return nothing
+end
+
+function challenge_prompt(cmd::Cmd, challenges)
+    function format_output(output)
+        str = read(seekstart(output), String)
+        isempty(str) && return ""
+        return "Process output found:\n\"\"\"\n$str\n\"\"\""
+    end
+    out = IOBuffer()
+    with_fake_pty() do pts, ptm
+        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
+        Base.close_stdio(pts)
+
+        # Kill the process if it takes too long. Typically occurs when process is waiting
+        # for input.
+        timer = Channel{Symbol}(1)
+        watcher = @async begin
+            waited = 0
+            while waited < timeout && process_running(p)
+                sleep(1)
+                waited += 1
+            end
+
+            if process_running(p)
+                kill(p)
+                put!(timer, :timeout)
+            elseif success(p)
+                put!(timer, :success)
+            else
+                put!(timer, :failure)
+            end
+
+            # SIGKILL stubborn processes
+            if process_running(p)
+                sleep(3)
+                process_running(p) && kill(p, Base.SIGKILL)
+            end
+            wait(p)
+        end
+
+        wroteall = false
+        try
+            for (challenge, response) in challenges
+                write(out, readuntil(ptm, challenge, keep=true))
+                if !isopen(ptm)
+                    error("Could not locate challenge: \"$challenge\". ",
+                          format_output(out))
+                end
+                write(ptm, response)
+            end
+            wroteall = true
+
+            # Capture output from process until `pts` is closed
+            write(out, ptm)
+        catch ex
+            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
+                # ignore EIO from `ptm` after `pts` dies
+                error("Process failed possibly waiting for a response. ",
+                      format_output(out))
+            end
+        end
+
+        status = fetch(timer)
+        close(ptm)
+        if status !== :success
+            if status === :timeout
+                error("Process timed out possibly waiting for a response. ",
+                      format_output(out))
+            else
+                error("Failed process. ", format_output(out), "\n", p)
+            end
+        end
+        wait(watcher)
+    end
+    nothing
+end
+
+end
diff --git a/test/testhelpers/DualNumbers.jl b/test/testhelpers/DualNumbers.jl
index 9f62e3bf0d429..5c481aef47f76 100644
--- a/test/testhelpers/DualNumbers.jl
+++ b/test/testhelpers/DualNumbers.jl
@@ -41,6 +41,6 @@ Base.sqrt(x::Dual) = Dual(sqrt(x.val), x.eps/(2sqrt(x.val)))
 Base.isless(x::Dual, y::Dual) = x.val < y.val
 Base.isless(x::Real, y::Dual) = x < y.val
 Base.isinf(x::Dual) = isinf(x.val) & isfinite(x.eps)
-Base.real(x::Dual) = x # since we curently only consider Dual{<:Real}
+Base.real(x::Dual) = x # since we currently only consider Dual{<:Real}
 
 end # module
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index c592699440ee0..56ce6dc7d3a49 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -1,5 +1,4 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
 module FakePTYs
 
 if Sys.iswindows()
@@ -24,10 +23,7 @@ function open_fake_pty()
         close(pts)
         pts = fds
         # convert pts handle to a TTY
-        #fds = pts.handle
-        #pts.status = Base.StatusClosed
-        #pts.handle = C_NULL
-        #pts = Base.TTY(fds, Base.StatusOpen)
+        #pts = open(fds)::Base.TTY
     else
         O_RDWR = Base.Filesystem.JL_O_RDWR
         O_NOCTTY = Base.Filesystem.JL_O_NOCTTY
@@ -44,8 +40,9 @@ function open_fake_pty()
         pts = RawFD(fds)
 
         # pts = fdio(fds, true)
-        # pts = Base.Filesystem.File(RawFD(fds))
-        # pts = Base.TTY(RawFD(fds); readable = false)
+        # pts = Base.Filesystem.File(pts)
+        # pts = Base.TTY(pts)
+        # pts = Base.open(pts)
         ptm = Base.TTY(RawFD(fdm))
     end
     return pts, ptm
diff --git a/test/testhelpers/FillArrays.jl b/test/testhelpers/FillArrays.jl
index 1f36a77bf8c12..d3b8d74da7148 100644
--- a/test/testhelpers/FillArrays.jl
+++ b/test/testhelpers/FillArrays.jl
@@ -9,6 +9,10 @@ Fill(v, size::Vararg{Integer}) = Fill(v, size)
 
 Base.size(F::Fill) = F.size
 
+Base.copy(F::Fill) = F
+
+Base.AbstractArray{T,N}(F::Fill{<:Any,N}) where {T,N} = Fill(T(F.value), F.size)
+
 @inline getindex_value(F::Fill) = F.value
 
 @inline function Base.getindex(F::Fill{<:Any,N}, i::Vararg{Int,N}) where {N}
@@ -27,7 +31,36 @@ end
     F
 end
 
+Base.zero(F::Fill) = Fill(zero(F.value), size(F))
+
 Base.show(io::IO, F::Fill) = print(io, "Fill($(F.value), $(F.size))")
 Base.show(io::IO, ::MIME"text/plain", F::Fill) = show(io, F)
 
+_first_or_one(t::Tuple) = t[1]
+_first_or_one(t::Tuple{}) = 1
+
+_match_size(sz::Tuple{}, inner::Tuple{}, outer::Tuple{}) = ()
+function _match_size(sz::Tuple, inner::Tuple, outer::Tuple)
+    t1 = (_first_or_one(sz), _first_or_one(inner), _first_or_one(outer))
+    t2 = _match_size(sz[2:end], inner[2:end], outer[2:end])
+    (t1, t2...)
+end
+
+function _repeat_size(sz::Tuple, inner::Tuple, outer::Tuple)
+    t = _match_size(sz, inner, outer)
+    map(*, getindex.(t, 1), getindex.(t, 2), getindex.(t, 3))
+end
+
+function Base.repeat(A::Fill; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A)))
+    Base.require_one_based_indexing(A)
+    length(inner) >= ndims(A) ||
+        throw(ArgumentError("number of inner repetitions $(length(inner)) cannot be "*
+            "less than number of dimensions of input array $(ndims(A))"))
+    length(outer) >= ndims(A) ||
+        throw(ArgumentError("number of outer repetitions $(length(outer)) cannot be "*
+            "less than number of dimensions of input array $(ndims(A))"))
+    sz = _repeat_size(size(A), Tuple(inner), Tuple(outer))
+    Fill(getindex_value(A), sz)
+end
+
 end
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index f63b5460c7c16..3ddf42bf1a82c 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -99,5 +99,12 @@ for op in (:rem, :mod)
     end
 end
 Base.sqrt(x::Furlong) = _div(sqrt(x.val), x, Val(2))
+Base.muladd(x::Furlong, y::Furlong, z::Furlong) = x*y + z
+Base.muladd(x::Furlong, y::Number, z::Number) = x*y + z
+Base.muladd(x::Furlong, y::Furlong, z::Number) = x*y + z
+Base.muladd(x::Number, y::Furlong, z::Number) = x*y + z
+Base.muladd(x::Number, y::Number, z::Furlong) = x*y + z
+Base.muladd(x::Number, y::Furlong, z::Furlong) = x*y + z
+Base.muladd(x::Furlong, y::Number, z::Furlong) = x*y + z
 
 end
diff --git a/test/testhelpers/ImmutableArrays.jl b/test/testhelpers/ImmutableArrays.jl
index df2a78387e07b..8f2d23be3a7a7 100644
--- a/test/testhelpers/ImmutableArrays.jl
+++ b/test/testhelpers/ImmutableArrays.jl
@@ -25,4 +25,7 @@ Base.getindex(A::ImmutableArray, i...) = getindex(A.data, i...)
 AbstractArray{T}(A::ImmutableArray) where {T} = ImmutableArray(AbstractArray{T}(A.data))
 AbstractArray{T,N}(A::ImmutableArray{S,N}) where {S,T,N} = ImmutableArray(AbstractArray{T,N}(A.data))
 
+Base.copy(A::ImmutableArray) = ImmutableArray(copy(A.data))
+Base.zero(A::ImmutableArray) = ImmutableArray(zero(A.data))
+
 end
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
index 14b2e56daf1c6..cec3c94aaa296 100644
--- a/test/testhelpers/InfiniteArrays.jl
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -48,5 +48,6 @@ Base.length(r::OneToInf) = Infinity()
 Base.last(r::OneToInf) = Infinity()
 Base.unitrange(r::OneToInf) = r
 Base.oneto(::Infinity) = OneToInf()
+Base.unchecked_oneto(::Infinity) = OneToInf()
 
 end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index 705bd07b2878c..5acaa88064245 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -5,7 +5,7 @@
 # This test file is designed to exercise support for generic indexing,
 # even though offset arrays aren't implemented in Base.
 
-# OffsetArrays v1.11.2
+# OffsetArrays v1.15.0
 # No compat patch and docstrings
 module OffsetArrays
 
@@ -73,10 +73,15 @@ end
 IdOffsetRange(r::IdOffsetRange) = r
 
 # Constructor to make `show` round-trippable
+# try to preserve typeof(values) if the indices are known to be 1-based
+_subtractindexoffset(values, indices::Union{Base.OneTo, IdentityUnitRange{<:Base.OneTo}}, offset) = values
+_subtractindexoffset(values, indices, offset) = _subtractoffset(values, offset)
 function IdOffsetRange(; values::AbstractUnitRange{<:Integer}, indices::AbstractUnitRange{<:Integer})
     length(values) == length(indices) || throw(ArgumentError("values and indices must have the same length"))
+    values_nooffset = no_offset_view(values)
     offset = first(indices) - 1
-    return IdOffsetRange(values .- offset, offset)
+    values_minus_offset = _subtractindexoffset(values_nooffset, indices, offset)
+    return IdOffsetRange(values_minus_offset, offset)
 end
 
 # Conversions to an AbstractUnitRange{Int} (and to an OrdinalRange{Int,Int} on Julia v"1.6") are necessary
@@ -110,12 +115,19 @@ offset_coerce(::Type{I}, r::AbstractUnitRange) where I<:AbstractUnitRange =
 @inline Base.unsafe_indices(r::IdOffsetRange) = (Base.axes1(r),)
 @inline Base.length(r::IdOffsetRange) = length(r.parent)
 @inline Base.isempty(r::IdOffsetRange) = isempty(r.parent)
+#= We specialize on reduced_indices to work around cases where the parent axis type doesn't
+support reduced_index, but the axes do support reduced_indices
+The difference is that reduced_index expects the axis type to remain unchanged,
+which may not always be possible, eg. for statically sized axes
+See https://github.com/JuliaArrays/OffsetArrays.jl/issues/204
+=#
+function Base.reduced_indices(inds::Tuple{IdOffsetRange, Vararg{IdOffsetRange}}, d::Int)
+    parents_reduced = Base.reduced_indices(map(parent, inds), d)
+    ntuple(i -> IdOffsetRange(parents_reduced[i], inds[i].offset), Val(length(inds)))
+end
 Base.reduced_index(i::IdOffsetRange) = typeof(i)(first(i):first(i))
 # Workaround for #92 on Julia < 1.4
 Base.reduced_index(i::IdentityUnitRange{<:IdOffsetRange}) = typeof(i)(first(i):first(i))
-for f in [:firstindex, :lastindex]
-    @eval @inline Base.$f(r::IdOffsetRange) = $f(r.parent) + r.offset
-end
 for f in [:first, :last]
     # coerce the type to deal with values that get promoted on addition (eg. Bool)
     @eval @inline Base.$f(r::IdOffsetRange) = eltype(r)($f(r.parent) + r.offset)
@@ -186,17 +198,20 @@ for R in [:IIUR, :IdOffsetRange]
 end
 
 # offset-preserve broadcasting
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(-), r::IdOffsetRange{T}, x::Integer) where T =
-    IdOffsetRange{T}(r.parent .- x, r.offset)
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), r::IdOffsetRange{T}, x::Integer) where T =
-    IdOffsetRange{T}(r.parent .+ x, r.offset)
-Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::IdOffsetRange{T}) where T =
-    IdOffsetRange{T}(x .+ r.parent, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(-), r::IdOffsetRange, x::Integer) =
+    IdOffsetRange(r.parent .- x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), r::IdOffsetRange, x::Integer) =
+    IdOffsetRange(r.parent .+ x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::IdOffsetRange) =
+    IdOffsetRange(x .+ r.parent, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(big), r::IdOffsetRange) =
+    IdOffsetRange(big.(r.parent), r.offset)
 
 Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first(r), ':', last(r),", indices=",first(eachindex(r)),':',last(eachindex(r)), ")")
 
 # Optimizations
 @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
+Base._firstslice(i::IdOffsetRange) = IdOffsetRange(Base._firstslice(i.parent), i.offset)
 
 ########################################################################################################
 # origin.jl
@@ -308,12 +323,12 @@ _popreshape(A::AbstractArray, ax, inds) = A
 
 # Technically we know the length of CartesianIndices but we need to convert it first, so here we
 # don't put it in OffsetAxisKnownLength.
-const OffsetAxisKnownLength = Union{Integer,AbstractUnitRange}
-const OffsetAxis = Union{OffsetAxisKnownLength,Colon}
-const ArrayInitializer = Union{UndefInitializer,Missing,Nothing}
+const OffsetAxisKnownLength = Union{Integer, AbstractUnitRange}
+const OffsetAxis = Union{OffsetAxisKnownLength, Colon}
+const ArrayInitializer = Union{UndefInitializer, Missing, Nothing}
 
 ## OffsetArray
-struct OffsetArray{T,N,AA<:AbstractArray} <: AbstractArray{T,N}
+struct OffsetArray{T,N,AA<:AbstractArray{T,N}} <: AbstractArray{T,N}
     parent::AA
     offsets::NTuple{N,Int}
     @inline function OffsetArray{T, N, AA}(parent::AA, offsets::NTuple{N, Int}; checkoverflow = true) where {T, N, AA<:AbstractArray{T,N}}
@@ -481,6 +496,10 @@ Base.parent(A::OffsetArray) = A.parent
 # Base.Broadcast.BroadcastStyle(::Type{<:OffsetArray{<:Any, <:Any, AA}}) where AA = Base.Broadcast.BroadcastStyle(AA)
 
 @inline Base.size(A::OffsetArray) = size(parent(A))
+# specializing length isn't necessary, as length(A) = prod(size(A)),
+# but specializing length enables constant-propagation for statically sized arrays
+# see https://github.com/JuliaArrays/OffsetArrays.jl/pull/304
+@inline Base.length(A::OffsetArray) = length(parent(A))
 
 @inline Base.axes(A::OffsetArray) = map(IdOffsetRange, axes(parent(A)), A.offsets)
 @inline Base.axes(A::OffsetArray, d) = d <= ndims(A) ? IdOffsetRange(axes(parent(A), d), A.offsets[d]) : IdOffsetRange(axes(parent(A), d))
@@ -527,8 +546,10 @@ _similar_axes_or_length(A, T, ax::I, ::I) where {I} = similar(A, T, map(_indexle
 _similar_axes_or_length(AT, ax::I, ::I) where {I} = similar(AT, map(_indexlength, ax))
 
 # reshape accepts a single colon
-Base.reshape(A::AbstractArray, inds::OffsetAxis...) = reshape(A, inds)
-function Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}})
+# this method is limited to AbstractUnitRange{<:Integer} to avoid method overwritten errors if Base defines the same,
+# see https://github.com/JuliaLang/julia/pull/56850
+Base.reshape(A::AbstractArray, inds::Union{Integer, Colon, AbstractUnitRange{<:Integer}}...) = reshape(A, inds)
+function Base.reshape(A::AbstractArray, inds::Tuple{Vararg{OffsetAxis}})
     AR = reshape(no_offset_view(A), map(_indexlength, inds))
     O = OffsetArray(AR, map(_offset, axes(AR), inds))
     return _popreshape(O, axes(AR), _filterreshapeinds(inds))
@@ -552,36 +573,14 @@ _reshape2(A, inds) = reshape(A, inds)
 _reshape2(A::OffsetArray, inds) = reshape(parent(A), inds)
 _reshape_nov(A, inds) = _reshape(no_offset_view(A), inds)
 
-Base.reshape(A::OffsetArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) =
-    OffsetArray(_reshape(parent(A), inds), map(_toaxis, inds))
 # And for non-offset axes, we can just return a reshape of the parent directly
-Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{Integer,Base.OneTo}}}) = _reshape_nov(A, inds)
+Base.reshape(A::OffsetArray, inds::Tuple{Integer,Vararg{Integer}}) = _reshape_nov(A, inds)
 Base.reshape(A::OffsetArray, inds::Dims) = _reshape_nov(A, inds)
-Base.reshape(A::OffsetVector, ::Colon) = A
-Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A
-Base.reshape(A::OffsetArray, ::Colon) = reshape(A, (Colon(),))
-Base.reshape(A::OffsetArray, inds::Union{Int,Colon}...) = reshape(A, inds)
-Base.reshape(A::OffsetArray, inds::Tuple{Vararg{Union{Int,Colon}}}) = _reshape_nov(A, inds)
-# The following two additional methods for Colon are added to resolve method ambiguities to
-# Base: https://github.com/JuliaLang/julia/pull/45387#issuecomment-1132859663
-Base.reshape(A::OffsetArray, inds::Colon) = _reshape_nov(A, inds)
-Base.reshape(A::OffsetArray, inds::Tuple{Colon}) = _reshape_nov(A, inds)
 
 # permutedims in Base does not preserve axes, and can not be fixed in a non-breaking way
 # This is a stopgap solution
 Base.permutedims(v::OffsetVector) = reshape(v, (1, axes(v, 1)))
 
-Base.fill(v, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(Array{typeof(v)}, inds), v)
-Base.zeros(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(similar(Array{T}, inds), zero(T))
-Base.ones(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(similar(Array{T}, inds), one(T))
-Base.trues(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(BitArray, inds), true)
-Base.falses(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(similar(BitArray, inds), false)
-
 Base.zero(A::OffsetArray) = parent_call(zero, A)
 Base.fill!(A::OffsetArray, x) = parent_call(Ap -> fill!(Ap, x), A)
 
@@ -641,7 +640,7 @@ Base.copy(A::OffsetArray) = parent_call(copy, A)
 
 Base.strides(A::OffsetArray) = strides(parent(A))
 Base.elsize(::Type{OffsetArray{T,N,A}}) where {T,N,A} = Base.elsize(A)
-@inline Base.unsafe_convert(::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(P::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.cconvert(P, parent(A))
 
 # For fast broadcasting: ref https://discourse.julialang.org/t/why-is-there-a-performance-hit-on-broadcasting-with-offsetarrays/32194
 Base.dataids(A::OffsetArray) = Base.dataids(parent(A))
@@ -741,15 +740,6 @@ if eltype(IIUR) === Int
     Base.map(::Type{T}, r::IdentityUnitRange) where {T<:Real} = _indexedby(map(T, UnitRange(r)), axes(r))
 end
 
-# mapreduce is faster with an IdOffsetRange than with an OffsetUnitRange
-# We therefore convert OffsetUnitRanges to IdOffsetRanges with the same values and axes
-function Base.mapreduce(f, op, A1::OffsetUnitRange{<:Integer}, As::OffsetUnitRange{<:Integer}...; kw...)
-    As = (A1, As...)
-    ofs = map(A -> first(axes(A,1)) - 1, As)
-    AIds = map((A, of) -> IdOffsetRange(_subtractoffset(parent(A), of), of), As, ofs)
-    mapreduce(f, op, AIds...; kw...)
-end
-
 # Optimize certain reductions that treat an OffsetVector as a list
 for f in [:minimum, :maximum, :extrema, :sum]
     @eval Base.$f(r::OffsetRange) = $f(parent(r))
@@ -771,7 +761,8 @@ Base.append!(A::OffsetVector, items) = (append!(A.parent, items); A)
 Base.empty!(A::OffsetVector) = (empty!(A.parent); A)
 
 # These functions keep the summary compact
-function Base.inds2string(inds::Tuple{Vararg{Union{IdOffsetRange, IdentityUnitRange{<:IdOffsetRange}}}})
+const OffsetIndices = Union{IdOffsetRange, IdentityUnitRange{<:IdOffsetRange}}
+function Base.inds2string(inds::Tuple{OffsetIndices, Vararg{OffsetIndices}})
     Base.inds2string(map(UnitRange, inds))
 end
 Base.showindices(io::IO, ind1::IdOffsetRange, inds::IdOffsetRange...) = Base.showindices(io, map(UnitRange, (ind1, inds...))...)
@@ -795,7 +786,33 @@ function Base.replace_in_print_matrix(A::OffsetArray{<:Any,1}, i::Integer, j::In
     Base.replace_in_print_matrix(parent(A), ip, j, s)
 end
 
+# Actual unsafe_wrap implementation
+@inline function _unsafe_wrap(pointer::Ptr{T}, inds::NTuple{N, OffsetAxisKnownLength}; own = false, kw...) where {T,N}
+    _checkindices(N, inds, "indices")
+    AA = Base.unsafe_wrap(Array, pointer, map(_indexlength, inds); own=own)
+    OffsetArray{T, N, typeof(AA)}(AA, map(_indexoffset, inds); kw...)
+end
+const OffsetArrayUnion{T,N} = Union{Type{OffsetArray}, Type{OffsetArray{T}}, Type{OffsetArray{T,N}}, Type{OffsetArray{T1, N} where T1}} where {T,N}
+
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::NTuple{N, OffsetAxisKnownLength}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+# Avoid ambiguity
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::NTuple{N, <:Integer}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::Vararg{OffsetAxisKnownLength,N}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+# Avoid ambiguity
+@inline function Base.unsafe_wrap(::OffsetArrayUnion{T,N}, pointer::Ptr{T}, inds::Vararg{Integer,N}; kw...) where {T,N}
+    _unsafe_wrap(pointer, inds; kw...)
+end
+
 no_offset_view(A::OffsetArray) = no_offset_view(parent(A))
+no_offset_view(a::Base.Slice{<:Base.OneTo}) = a
+no_offset_view(a::Base.Slice) = Base.Slice(UnitRange(a))
+no_offset_view(S::SubArray) = view(parent(S), map(no_offset_view, parentindices(S))...)
 no_offset_view(a::Array) = a
 no_offset_view(i::Number) = i
 no_offset_view(A::AbstractArray) = _no_offset_view(axes(A), A)
@@ -811,9 +828,12 @@ _no_offset_view(::Any, A::AbstractUnitRange) = UnitRange(A)
 # These two helpers are deliberately not exported; their meaning can be very different in
 # other scenarios and will be very likely to cause name conflicts if exported.
 #####
+
+_halfroundInt(v, r::RoundingMode) = div(v, 2, r)
+
 function center(A::AbstractArray, r::RoundingMode=RoundDown)
     map(axes(A)) do inds
-        round(Int, (length(inds)-1)/2, r) + first(inds)
+        _halfroundInt(length(inds)-1, r) + first(inds)
     end
 end
 
@@ -821,17 +841,6 @@ centered(A::AbstractArray, cp::Dims=center(A)) = OffsetArray(A, .-cp)
 
 centered(A::AbstractArray, i::CartesianIndex) = centered(A, Tuple(i))
 
-# we may pass the searchsorted* functions to the parent, and wrap the offset
-for f in [:searchsortedfirst, :searchsortedlast, :searchsorted]
-    _safe_f = Symbol("_safe_" * String(f))
-    @eval function $_safe_f(v::OffsetArray, x, ilo, ihi, o::Base.Ordering)
-        offset = firstindex(v) - firstindex(parent(v))
-        $f(parent(v), x, ilo - offset, ihi - offset, o) .+ offset
-    end
-    @eval Base.$f(v::OffsetVector, x, ilo::T, ihi::T, o::Base.Ordering) where T<:Integer =
-        $_safe_f(v, x, ilo, ihi, o)
-end
-
 ##
 # Deprecations
 ##
diff --git a/test/testhelpers/OffsetDenseArrays.jl b/test/testhelpers/OffsetDenseArrays.jl
new file mode 100644
index 0000000000000..44a1b8d627800
--- /dev/null
+++ b/test/testhelpers/OffsetDenseArrays.jl
@@ -0,0 +1,31 @@
+"""
+    module OffsetDenseArrays
+
+A minimal implementation of an offset array which is also <: DenseArray.
+"""
+module OffsetDenseArrays
+
+struct OffsetDenseArray{A <: DenseVector, T} <: DenseVector{T}
+    x::A
+    offset::Int
+end
+OffsetDenseArray(x::AbstractVector{T}, i::Integer) where {T} = OffsetDenseArray{typeof(x), T}(x, Int(i))
+
+Base.size(x::OffsetDenseArray) = size(x.x)
+Base.pointer(x::OffsetDenseArray) = pointer(x.x)
+
+function Base.getindex(x::OffsetDenseArray, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset]
+end
+
+function Base.setindex(x::OffsetDenseArray, v, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset] = v
+end
+
+IndexStyle(::Type{<:OffsetDenseArray}) = Base.IndexLinear()
+Base.axes(x::OffsetDenseArray) = (x.offset + 1 : x.offset + length(x.x),)
+Base.keys(x::OffsetDenseArray) = only(axes(x))
+
+end # module
diff --git a/test/testhelpers/Quaternions.jl b/test/testhelpers/Quaternions.jl
index 1eddad322ec40..b1a414266bb34 100644
--- a/test/testhelpers/Quaternions.jl
+++ b/test/testhelpers/Quaternions.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Quaternions
+using LinearAlgebra
 using Random
 
 export Quaternion
@@ -13,16 +14,23 @@ struct Quaternion{T<:Real} <: Number
     v2::T
     v3::T
 end
+Quaternion{T}(s::Real) where {T<:Real} = Quaternion{T}(T(s), zero(T), zero(T), zero(T))
 Quaternion(s::Real, v1::Real, v2::Real, v3::Real) = Quaternion(promote(s, v1, v2, v3)...)
 Base.convert(::Type{Quaternion{T}}, s::Real) where {T <: Real} =
     Quaternion{T}(convert(T, s), zero(T), zero(T), zero(T))
+Base.promote_rule(::Type{Quaternion{T}}, ::Type{S}) where {T <: Real, S <: Real} =
+    Quaternion{promote_type(T, S)}
 Base.abs2(q::Quaternion) = q.s*q.s + q.v1*q.v1 + q.v2*q.v2 + q.v3*q.v3
 Base.float(z::Quaternion{T}) where T = Quaternion(float(z.s), float(z.v1), float(z.v2), float(z.v3))
 Base.abs(q::Quaternion) = sqrt(abs2(q))
 Base.real(::Type{Quaternion{T}}) where {T} = T
+Base.real(q::Quaternion) = q.s
 Base.conj(q::Quaternion) = Quaternion(q.s, -q.v1, -q.v2, -q.v3)
 Base.isfinite(q::Quaternion) = isfinite(q.s) & isfinite(q.v1) & isfinite(q.v2) & isfinite(q.v3)
+Base.isreal(q::Quaternion) = iszero(q.v1) & iszero(q.v2) & iszero(q.v3)
 Base.zero(::Type{Quaternion{T}}) where T = Quaternion{T}(zero(T), zero(T), zero(T), zero(T))
+# avoid defining sqrt(::Quaternion)
+LinearAlgebra.choltype(::AbstractArray{Quaternion{T}}) where T = Quaternion{promote_type(T, Float32)}
 
 Base.:(+)(ql::Quaternion, qr::Quaternion) =
  Quaternion(ql.s + qr.s, ql.v1 + qr.v1, ql.v2 + qr.v2, ql.v3 + qr.v3)
@@ -33,9 +41,14 @@ Base.:(*)(q::Quaternion, w::Quaternion) = Quaternion(q.s*w.s - q.v1*w.v1 - q.v2*
                                             q.s*w.v2 - q.v1*w.v3 + q.v2*w.s + q.v3*w.v1,
                                             q.s*w.v3 + q.v1*w.v2 - q.v2*w.v1 + q.v3*w.s)
 Base.:(*)(q::Quaternion, r::Real) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r)
-Base.:(*)(q::Quaternion, b::Bool) = b * q # remove method ambiguity
+Base.:(*)(q::Quaternion, r::Bool) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r) # remove method ambiguity
+Base.:(*)(r::Real, q::Quaternion) = q * r
+Base.:(*)(r::Bool, q::Quaternion) = q * r # remove method ambiguity
 Base.:(/)(q::Quaternion, w::Quaternion) = q * conj(w) * (1.0 / abs2(w))
 Base.:(\)(q::Quaternion, w::Quaternion) = conj(q) * w * (1.0 / abs2(q))
+Base.:(/)(q::Quaternion, r::Real) = Quaternion(q.s / r, q.v1 / r, q.v2 / r, q.v3 / r)
+Base.:(==)(q::Quaternion, w::Quaternion) =
+    (q.s == w.s) & (q.v1 == w.v1) & (q.v2 == w.v2) & (q.v3 == w.v3)
 
 # adapted from https://github.com/JuliaGeometry/Quaternions.jl/pull/42
 function Base.rand(rng::AbstractRNG, ::Random.SamplerType{Quaternion{T}}) where {T<:Real}
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
index dfcc5b79f1387..e52e965a64859 100644
--- a/test/testhelpers/SizedArrays.jl
+++ b/test/testhelpers/SizedArrays.jl
@@ -9,8 +9,23 @@ module SizedArrays
 
 import Base: +, *, ==
 
+using LinearAlgebra
+import LinearAlgebra: mul!
+
 export SizedArray
 
+struct SOneTo{N} <: AbstractUnitRange{Int} end
+SOneTo(N) = SOneTo{N}()
+Base.length(::SOneTo{N}) where {N} = N
+Base.size(r::SOneTo) = (length(r),)
+Base.axes(r::SOneTo) = (r,)
+Base.first(::SOneTo) = 1
+Base.last(r::SOneTo) = length(r)
+Base.show(io::IO, r::SOneTo) = print(io, "SOneTo(", length(r), ")")
+
+Broadcast.axistype(a::Base.OneTo, s::SOneTo) = s
+Broadcast.axistype(s::SOneTo, a::Base.OneTo) = s
+
 struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
     data::A
     function SizedArray{SZ}(data::AbstractArray{T,N}) where {SZ,T,N}
@@ -21,20 +36,70 @@ struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
         SZ == size(data) || throw(ArgumentError("size mismatch!"))
         new{SZ,T,N,A}(A(data))
     end
+    function SizedArray{SZ,T,N}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
+    function SizedArray{SZ,T}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
 end
-Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+SizedMatrix{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,2,A}
+SizedVector{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,1,A}
+Base.convert(::Type{S}, data::AbstractArray) where {S<:SizedArray} = data isa S ? data : S(data)
 
 # Minimal AbstractArray interface
 Base.size(a::SizedArray) = size(typeof(a))
 Base.size(::Type{<:SizedArray{SZ}}) where {SZ} = SZ
+Base.axes(a::SizedArray) = map(SOneTo, size(a))
 Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
+Base.setindex!(A::SizedArray, v, i...) = setindex!(A.data, v, i...)
 Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
+Base.parent(S::SizedArray) = S.data
 +(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
 ==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
-function *(S1::SizedArray, S2::SizedArray)
+
+homogenize_shape(t::Tuple) = (_homogenize_shape(first(t)), homogenize_shape(Base.tail(t))...)
+homogenize_shape(::Tuple{}) = ()
+_homogenize_shape(x::Integer) = x
+_homogenize_shape(x::AbstractUnitRange) = length(x)
+const Dims = Union{Integer, Base.OneTo, SOneTo}
+function Base.similar(::Type{A}, shape::Tuple{Dims, Vararg{Dims}}) where {A<:AbstractArray}
+    similar(A, homogenize_shape(shape))
+end
+function Base.similar(::Type{A}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {A<:AbstractArray}
+    R = similar(A, length.(shape))
+    SizedArray{length.(shape)}(R)
+end
+function Base.similar(x::SizedArray, ::Type{T}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {T}
+    sz = map(length, shape)
+    SizedArray{sz}(similar(parent(x), T, sz))
+end
+
+const SizedMatrixLike = Union{SizedMatrix, Transpose{<:Any, <:SizedMatrix}, Adjoint{<:Any, <:SizedMatrix}}
+
+_data(S::SizedArray) = S.data
+_data(T::Transpose{<:Any, <:SizedArray}) = transpose(_data(parent(T)))
+_data(T::Adjoint{<:Any, <:SizedArray}) = adjoint(_data(parent(T)))
+
+function *(S1::SizedMatrixLike, S2::SizedMatrixLike)
     0 < ndims(S1) < 3 && 0 < ndims(S2) < 3 && size(S1, 2) == size(S2, 1) || throw(ArgumentError("size mismatch!"))
-    data = S1.data * S2.data
+    data = _data(S1) * _data(S2)
     SZ = ndims(data) == 1 ? (size(S1, 1), ) : (size(S1, 1), size(S2, 2))
     SizedArray{SZ}(data)
 end
+
+# deliberately wide method definitions to test for method ambiguties in LinearAlgebra
+*(S1::SizedMatrixLike, M::AbstractMatrix) = _data(S1) * M
+mul!(dest::AbstractMatrix, S1::SizedMatrix, M::AbstractMatrix, α::Number, β::Number) =
+    mul!(dest, _data(S1), M, α, β)
+mul!(dest::AbstractMatrix, M::AbstractMatrix, S2::SizedMatrix, α::Number, β::Number) =
+    mul!(dest, M, _data(S2), α, β)
+mul!(dest::AbstractMatrix, S1::SizedMatrix, S2::SizedMatrix, α::Number, β::Number) =
+    mul!(dest, _data(S1), _data(S2), α, β)
+mul!(dest::AbstractVector, M::AbstractMatrix, v::SizedVector, α::Number, β::Number) =
+    mul!(dest, M, _data(v), α, β)
+
+LinearAlgebra.zeroslike(::Type{S}, ax::Tuple{SizedArrays.SOneTo, Vararg{SizedArrays.SOneTo}}) where {S<:SizedArray} =
+            zeros(eltype(S), ax)
+
 end
diff --git a/test/testhelpers/StructArrays.jl b/test/testhelpers/StructArrays.jl
new file mode 100644
index 0000000000000..f03b07f4e60ad
--- /dev/null
+++ b/test/testhelpers/StructArrays.jl
@@ -0,0 +1,39 @@
+module StructArrays
+
+struct StructArray{T,N,C <: Tuple{Vararg{AbstractArray{<:Any,N}}}} <: AbstractArray{T,N}
+    components :: C
+
+    function StructArray{T,N,C}(components::C) where {T,N,C}
+        fieldcount(T) == length(components) || throw(ArgumentError("number of components incompatible with eltype"))
+        allequal(axes.(components)) || throw(ArgumentError("component arrays must have the same axes"))
+        new{T,N,C}(components)
+    end
+end
+
+function StructArray{T}(components::Tuple{Vararg{AbstractArray{<:Any,N}}}) where {T,N}
+    StructArray{T,N,typeof(components)}(components)
+end
+
+Base.size(S::StructArray) = size(S.components[1])
+Base.axes(S::StructArray) = axes(S.components[1])
+function Base.getindex(S::StructArray{T,N}, inds::Vararg{Int,N}) where {T,N}
+    vals = map(x -> x[inds...], S.components)
+    T(vals...)
+end
+function Base.setindex!(S::StructArray{T,N}, val, inds::Vararg{Int,N}) where {T,N}
+    vals = getfield.(Ref(convert(T, val)), fieldnames(T))
+    for (A,v) in zip(S.components, vals)
+        A[inds...] = v
+    end
+    S
+end
+
+isnonemptystructtype(::Type{T}) where {T} = isstructtype(T) && fieldcount(T) != 0
+
+function Base.similar(S::StructArray, ::Type{T}, dims::Tuple{Int, Vararg{Int}}) where {T}
+    isnonemptystructtype(T) || return similar(S.components[1], T, dims)
+    arrs = similar.(S.components, fieldtypes(T), Ref(dims))
+    StructArray{T}(arrs)
+end
+
+end
diff --git a/test/testhelpers/arrayindexingtypes.jl b/test/testhelpers/arrayindexingtypes.jl
index 0e956b5216c94..95c1f18e00903 100644
--- a/test/testhelpers/arrayindexingtypes.jl
+++ b/test/testhelpers/arrayindexingtypes.jl
@@ -66,3 +66,6 @@ Base.axes(A::WrapperArray) = axes(A.parent)
 Base.getindex(A::WrapperArray, i::Int...) = A.parent[i...]
 Base.setindex!(A::WrapperArray, v, i::Int...) = A.parent[i...] = v
 Base.similar(A::WrapperArray, ::Type{T}, dims::Dims) where T = similar(A.parent, T, dims)
+Base.cconvert(::Type{Ptr{T}}, A::WrapperArray{T}) where {T} = Base.cconvert(Ptr{T}, A.parent)
+Base.strides(A::WrapperArray) = strides(A.parent)
+Base.elsize(::Type{WrapperArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = Base.elsize(A)
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index c83e75dee8060..b03b0e07e6977 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -10,9 +10,10 @@ DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
-DA:19,2
+DA:18,1
+DA:19,1
 DA:20,1
 DA:22,1
-LH:12
-LF:14
+LH:13
+LF:15
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
deleted file mode 100644
index 311f6379381ee..0000000000000
--- a/test/testhelpers/coverage_file.info.bad
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,1
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,2
-DA:20,1
-DA:22,1
-DA:1234,0
-LH:12
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad2 b/test/testhelpers/coverage_file.info.bad2
deleted file mode 100644
index a766597be4c17..0000000000000
--- a/test/testhelpers/coverage_file.info.bad2
+++ /dev/null
@@ -1,20 +0,0 @@
-SF:<FILENAME>
-DA:3,1
-DA:4,1
-DA:5,0
-DA:7,1
-DA:8,1
-DA:9,3
-DA:10,5
-DA:11,0
-DA:12,1
-DA:14,0
-DA:17,1
-DA:18,0
-DA:19,0
-DA:20,0
-DA:22,1
-DA:1234,0
-LH:9
-LF:16
-end_of_record
diff --git a/test/testhelpers/coverage_file.jl b/test/testhelpers/coverage_file.jl
index e8e0355952d80..577cc6bb5d2ca 100644
--- a/test/testhelpers/coverage_file.jl
+++ b/test/testhelpers/coverage_file.jl
@@ -24,6 +24,6 @@ end
 
 success = code_coverage_test() == [1, 2, 3] &&
           short_form_func_coverage_test(2) == 4
-exit(success ?  0 : 1)
+exit(success ? 0 : 1)
 
 # end of file
diff --git a/test/testhelpers/just_module.jl b/test/testhelpers/just_module.jl
new file mode 100644
index 0000000000000..71bd87e660eae
--- /dev/null
+++ b/test/testhelpers/just_module.jl
@@ -0,0 +1 @@
+@__MODULE__
diff --git a/test/threads.jl b/test/threads.jl
index 8189311739e31..179279dbab4e6 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -16,7 +16,8 @@ let lk = ReentrantLock()
     t2 = @async (notify(c2); trylock(lk))
     wait(c1)
     wait(c2)
-    @test t1.queue === lk.cond_wait.waitq
+    # wait for the task to park in the queue (it may be spinning)
+    @test timedwait(() -> t1.queue === lk.cond_wait.waitq, 1.0) == :ok
     @test t2.queue !== lk.cond_wait.waitq
     @test istaskdone(t2)
     @test !fetch(t2)
@@ -288,18 +289,16 @@ close(proc.in)
         proc = run(cmd; wait = false)
         done = Threads.Atomic{Bool}(false)
         timeout = false
-        timer = Timer(100) do _
+        timer = Timer(200) do _
             timeout = true
-            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
-                for _ in 1:1000
+            for sig in (Base.SIGQUIT, Base.SIGKILL)
+                for _ in 1:3
                     kill(proc, sig)
+                    sleep(1)
                     if done[]
-                        if sig != Base.SIGTERM
-                            @warn "Terminating `$script` required signal $sig"
-                        end
+                        @warn "Terminating `$script` required signal $sig"
                         return
                     end
-                    sleep(0.001)
                 end
             end
         end
@@ -309,16 +308,11 @@ close(proc.in)
             done[] = true
             close(timer)
         end
-        if ( !success(proc) ) || ( timeout )
+        if !success(proc) || timeout
             @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
         end
-        if Sys.iswindows() || Sys.isapple()
-            # Known failure: https://github.com/JuliaLang/julia/issues/43124
-            @test_skip success(proc)
-        else
-            @test success(proc)
-            @test !timeout
-        end
+        @test success(proc)
+        @test !timeout
     end
 end
 
@@ -327,3 +321,234 @@ end
     @test_throws ArgumentError @macroexpand(@threads 1) # arg isn't an Expr
     @test_throws ArgumentError @macroexpand(@threads if true 1 end) # arg doesn't start with for
 end
+
+@testset "rand_ptls underflow" begin
+    @test Base.Partr.cong(UInt32(0)) == 0
+end
+
+@testset "num_stack_mappings metric" begin
+    @test @ccall(jl_get_num_stack_mappings()::Cint) >= 1
+    # There must be at least two: one for the root test task and one for the async task:
+    @test fetch(@async(@ccall(jl_get_num_stack_mappings()::Cint))) >= 2
+end
+
+@testset "Base.Threads docstrings" begin
+    @test isempty(Docs.undocumented_names(Threads))
+end
+
+@testset "wait failed task" begin
+    @testset "wait without throw keyword" begin
+        t = Threads.@spawn error("Error")
+        @test_throws TaskFailedException wait(t)
+    end
+
+    @testset "wait with throw=false" begin
+        t = Threads.@spawn error("Error")
+        wait(t; throw=false)
+        @test istaskfailed(t)
+    end
+end
+
+@testset "jl_*affinity" begin
+    cpumasksize = @ccall uv_cpumask_size()::Cint
+    if cpumasksize > 0 # otherwise affinities are not supported on the platform (UV_ENOTSUP)
+        jl_getaffinity = (tid, mask, cpumasksize) -> ccall(:jl_getaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        jl_setaffinity = (tid, mask, cpumasksize) -> ccall(:jl_setaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
+        mask = zeros(Cchar, cpumasksize)
+        @test jl_getaffinity(0, mask, cpumasksize) == 0
+        @test !all(iszero, mask)
+        @test jl_setaffinity(0, mask, cpumasksize) == 0
+    end
+end
+
+@testset "io_thread" begin
+    function io_thread_test()
+        # This test creates a thread that does IO and then blocks the main julia thread
+        # This test hangs if you don't spawn an IO thread.
+        # It hanging or not is technically a race but I haven't seen julia win that race yet.
+        cmd = """
+        Base.Experimental.make_io_thread()
+        function callback()::Cvoid
+            println("Running a command")
+            run(`echo 42`)
+            return
+        end
+        function call_on_thread(callback::Ptr{Nothing})
+            tid = UInt[0]
+            threadwork = @cfunction function(arg::Ptr{Cvoid})
+                current_task().donenotify = Base.ThreadSynchronizer()
+                Base.errormonitor(current_task())
+                println("Calling Julia from thread")
+                ccall(arg, Cvoid, ())
+                nothing
+            end Cvoid (Ptr{Cvoid},)
+            err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, callback::Ptr{Cvoid})::Cint
+            err == 0 || Base.uv_error("uv_thread_create", err)
+            gc_state = @ccall jl_gc_safe_enter()::Int8
+            err = @ccall uv_thread_join(tid::Ptr{UInt})::Cint
+            @ccall jl_gc_safe_leave(gc_state::Int8)::Cvoid
+            err == 0 || Base.uv_error("uv_thread_join", err)
+            return
+        end
+        function main()
+            callback_ptr = @cfunction(callback, Cvoid, ())
+            call_on_thread(callback_ptr)
+            println("Done")
+        end
+        main()
+
+        """
+        proc = run(pipeline(`$(Base.julia_cmd()) -e $cmd`), wait=false)
+        t = Timer(60) do t; kill(proc); end;
+        @test success(proc)
+        close(t)
+        return true
+    end
+    @test io_thread_test()
+end
+
+# Make sure default number of BLAS threads respects CPU affinity: issue #55572.
+@testset "LinearAlgebra number of default threads" begin
+    if AFFINITY_SUPPORTED
+        allowed_cpus = findall(uv_thread_getaffinity())
+        cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`,
+                     # Remove all variables which could affect the default number of threads
+                     "OPENBLAS_NUM_THREADS"=>nothing,
+                     "GOTO_NUM_THREADS"=>nothing,
+                     "OMP_NUM_THREADS"=>nothing)
+        for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems
+            @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2))
+        end
+    end
+end
+
+let once = OncePerProcess(() -> return [nothing])
+    @test typeof(once) <: OncePerProcess{Vector{Nothing}}
+    x = once()
+    @test x === once()
+    @atomic once.state = 0xff
+    @test_throws ErrorException("invalid state for OncePerProcess") once()
+    @test_throws ErrorException("OncePerProcess initializer failed previously") once()
+    @atomic once.state = 0x01
+    @test x === once()
+end
+let once = OncePerProcess{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("OncePerProcess initializer failed previously") once()
+end
+
+let e = Base.Event(true),
+    started = Channel{Int16}(Inf),
+    finish = Channel{Nothing}(Inf),
+    exiting = Channel{Nothing}(Inf),
+    starttest2 = Event(),
+    once = OncePerThread() do
+        push!(started, threadid())
+        take!(finish)
+        return [nothing]
+    end
+    alls = OncePerThread() do
+        return [nothing]
+    end
+    @test typeof(once) <: OncePerThread{Vector{Nothing}}
+    push!(finish, nothing)
+    @test_throws ArgumentError once[0]
+    x = once()
+    @test_throws ArgumentError once[0]
+    @test x === once() === fetch(@async once()) === once[threadid()]
+    @test take!(started) == threadid()
+    @test isempty(started)
+    tids = zeros(UInt, 50)
+    newthreads = zeros(Int16, length(tids))
+    onces = Vector{Vector{Nothing}}(undef, length(tids))
+    allonces = Vector{Vector{Vector{Nothing}}}(undef, length(tids))
+    # allocate closure memory to last until all threads are started
+    cls = [function cl()
+            GC.gc(false) # stress test the GC-safepoint mechanics of jl_adopt_thread
+            try
+                newthreads[i] = threadid()
+                local y = once()
+                onces[i] = y
+                @test x !== y === once() === once[threadid()]
+                wait(starttest2)
+                allonces[i] = Vector{Nothing}[alls[tid] for tid in newthreads]
+            catch ex
+                close(started, ErrorException("failed"))
+                close(finish, ErrorException("failed"))
+                @lock stderr Base.display_error(current_exceptions())
+            end
+            push!(exiting, nothing)
+            GC.gc(false) # stress test the GC-safepoint mechanics of jl_delete_thread
+            nothing
+        end
+    for i = 1:length(tids)]
+    GC.@preserve cls begin # this memory must survive until each corresponding thread exits (waitallthreads / uv_thread_join)
+        Base.preserve_handle(cls)
+        for i = 1:length(tids)
+            function threadcallclosure(tid::Ref{UInt}, cl::Ref{F}) where {F} # create sparam so we can reference the type of cl in the ccall type
+                threadwork = @cfunction cl -> cl() Cvoid (Ref{F},) # create a cfunction that specializes on cl as an argument and calls it
+                err = @ccall uv_thread_create(tid::Ptr{UInt}, threadwork::Ptr{Cvoid}, cl::Ref{F})::Cint # call that on a thread
+                err == 0 || Base.uv_error("uv_thread_create", err)
+                nothing
+            end
+            threadcallclosure(Ref(tids, i), Ref(cls, i))
+        end
+        @noinline function waitallthreads(tids, cls)
+            for i = 1:length(tids)
+                tid = Ref(tids, i)
+                tidp = Base.unsafe_convert(Ptr{UInt}, tid)::Ptr{UInt}
+                gc_state = @ccall jl_gc_safe_enter()::Int8
+                GC.@preserve tid err = @ccall uv_thread_join(tidp::Ptr{UInt})::Cint
+                @ccall jl_gc_safe_leave(gc_state::Int8)::Cvoid
+                err == 0 || Base.uv_error("uv_thread_join", err)
+            end
+            Base.unpreserve_handle(cls)
+        end
+        try
+            # let them finish in batches of 10
+            for i = 1:length(tids) ÷ 10
+                for i = 1:10
+                    newid = take!(started)
+                    @test newid != threadid()
+                end
+                for i = 1:10
+                    push!(finish, nothing)
+                end
+            end
+            @test isempty(started)
+            # now run the second part of the test where they all try to access the other threads elements
+            notify(starttest2)
+        finally
+            for _ = 1:length(tids)
+                # run IO loop until all threads are close to exiting
+                take!(exiting)
+            end
+            waitallthreads(tids, cls)
+        end
+    end
+    @test isempty(started)
+    @test isempty(finish)
+    @test length(IdSet{eltype(onces)}(onces)) == length(onces) # make sure every object is unique
+    allexpected = Vector{Nothing}[alls[tid] for tid in newthreads]
+    @test length(IdSet{eltype(allexpected)}(allexpected)) == length(allexpected) # make sure every object is unique
+    @test all(i -> allonces[i] !== allexpected && all(j -> allonces[i][j] === allexpected[j], eachindex(allexpected)), eachindex(allonces)) # make sure every thread saw the same elements
+    @test_throws ArgumentError once[Threads.maxthreadid() + 1]
+    @test_throws ArgumentError once[-1]
+
+end
+let once = OncePerThread{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("OncePerThread initializer failed previously") once()
+end
+
+let once = OncePerTask(() -> return [nothing])
+    @test typeof(once) <: OncePerTask{Vector{Nothing}}
+    x = once()
+    @test x === once() !== fetch(@async once())
+    delete!(task_local_storage(), once)
+    @test x !== once() === once()
+end
+let once = OncePerTask{Int}(() -> error("expected"))
+    @test_throws ErrorException("expected") once()
+    @test_throws ErrorException("expected") once()
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 9c7c524febeff..d77cf06905f44 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -3,6 +3,7 @@
 using Test
 using Base.Threads
 using Base.Threads: SpinLock, threadpoolsize
+using LinearAlgebra: peakflops
 
 # for cfunction_closure
 include("testenv.jl")
@@ -803,6 +804,84 @@ function _atthreads_dynamic_with_error(a)
 end
 @test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(threadpoolsize()))
 
+####
+# :greedy
+###
+
+function _atthreads_greedy_schedule(n)
+    inc = Threads.Atomic{Int}(0)
+    flags = zeros(Int, n)
+    Threads.@threads :greedy for i = 1:n
+        Threads.atomic_add!(inc, 1)
+        flags[i] = 1
+    end
+    return inc[], flags
+end
+@test _atthreads_greedy_schedule(threadpoolsize()) == (threadpoolsize(), ones(threadpoolsize()))
+@test _atthreads_greedy_schedule(1) == (1, ones(1))
+@test _atthreads_greedy_schedule(10) == (10, ones(10))
+@test _atthreads_greedy_schedule(threadpoolsize() * 2) == (threadpoolsize() * 2, ones(threadpoolsize() * 2))
+
+# nested greedy schedule
+function _atthreads_greedy_greedy_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :greedy for _ = 1:threadpoolsize()
+        Threads.@threads :greedy for _ = 1:threadpoolsize()
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_greedy_greedy_schedule() == threadpoolsize() * threadpoolsize()
+
+function _atthreads_greedy_dynamic_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :greedy for _ = 1:threadpoolsize()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize()
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_greedy_dynamic_schedule() == threadpoolsize() * threadpoolsize()
+
+function _atthreads_dymamic_greedy_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :dynamic for _ = 1:threadpoolsize()
+        Threads.@threads :greedy for _ = 1:threadpoolsize()
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_dymamic_greedy_schedule() == threadpoolsize() * threadpoolsize()
+
+function _atthreads_static_greedy_schedule()
+    ids = zeros(Int, threadpoolsize())
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :static for i = 1:threadpoolsize()
+        ids[i] = Threads.threadid()
+        Threads.@threads :greedy for _ = 1:threadpoolsize()
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return ids, inc[]
+end
+@test _atthreads_static_greedy_schedule() == (1:threadpoolsize(), threadpoolsize() * threadpoolsize())
+
+# errors inside @threads :greedy
+function _atthreads_greedy_with_error(a)
+    Threads.@threads :greedy for i in eachindex(a)
+        error("user error in the loop body")
+    end
+    a
+end
+@test_throws "user error in the loop body" _atthreads_greedy_with_error(zeros(threadpoolsize()))
+
+####
+# multi-argument loop
+####
+
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
 catch ex
@@ -1044,23 +1123,25 @@ end
 
 # issue #41546, thread-safe package loading
 @testset "package loading" begin
-    ch = Channel{Bool}(threadpoolsize())
+    ntasks = max(threadpoolsize(), 4)
+    ch = Channel{Bool}(ntasks)
     barrier = Base.Event()
     old_act_proj = Base.ACTIVE_PROJECT[]
     try
         pushfirst!(LOAD_PATH, "@")
         Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
         @sync begin
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 Threads.@spawn begin
                     put!(ch, true)
                     wait(barrier)
                     @eval using TestPkg
                 end
             end
-            for _ in 1:threadpoolsize()
+            for _ in 1:ntasks
                 take!(ch)
             end
+            close(ch)
             notify(barrier)
         end
         @test Base.root_module(@__MODULE__, :TestPkg) isa Module
@@ -1090,4 +1171,369 @@ end
     end
 end
 
+#Thread safety of threacall
+function threadcall_threads()
+    Threads.@threads for i = 1:8
+        ptr = @threadcall(:jl_malloc, Ptr{Cint}, (Csize_t,), sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 3)
+        @test unsafe_load(ptr) == 3
+        ptr = @threadcall(:jl_realloc, Ptr{Cint}, (Ptr{Cint}, Csize_t,), ptr, 2 * sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 4, 2)
+        @test unsafe_load(ptr, 1) == 3
+        @test unsafe_load(ptr, 2) == 4
+        @threadcall(:jl_free, Cvoid, (Ptr{Cint},), ptr)
+    end
+end
+@testset "threadcall + threads" begin
+    threadcall_threads() #Shouldn't crash!
+end
+
+@testset "Wait multiple tasks" begin
+    convert_tasks(t, x) = x
+    convert_tasks(::Set{Task}, x::Vector{Task}) = Set{Task}(x)
+    convert_tasks(::Tuple{Task}, x::Vector{Task}) = tuple(x...)
+
+    function create_tasks()
+        tasks = Task[]
+        event = Threads.Event()
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.01)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  sleep(0.02)
+              end)
+        push!(tasks,
+              Threads.@spawn begin
+                  wait(event)
+              end)
+        return tasks, event
+    end
+
+    function teardown(tasks, event)
+        notify(event)
+        waitall(resize!(tasks, 3), throw=true)
+    end
+
+    for tasks_type in (Vector{Task}, Set{Task}, Tuple{Task})
+        @testset "waitany" begin
+            @testset "throw=false" begin
+                tasks, event = create_tasks()
+                wait(tasks[1])
+                wait(tasks[2])
+                done,  pending = waitany(convert_tasks(tasks_type, tasks); throw=false)
+                @test length(done) == 2
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+                teardown(tasks, event)
+            end
+
+            @testset "throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+                wait(tasks[end]; throw=false)
+
+                @test_throws CompositeException begin
+                    waitany(convert_tasks(tasks_type, tasks); throw=true)
+                end
+
+                teardown(tasks, event)
+            end
+        end
+
+        @testset "waitall" begin
+            @testset "All tasks succeed" begin
+                tasks, event = create_tasks()
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks))
+                @test !istaskdone(waiter)
+
+                notify(event)
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[3] ∈ done
+                @test length(pending) == 0
+            end
+
+            @testset "failfast=true, throw=false" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                wait(tasks[1])
+                wait(tasks[2])
+                waiter = Threads.@spawn waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=false)
+
+                done, pending = fetch(waiter)
+                @test length(done) == 3
+                @test tasks[1] ∈ done
+                @test tasks[2] ∈ done
+                @test tasks[4] ∈ done
+                @test length(pending) == 1
+                @test tasks[3] ∈ pending
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=false, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                notify(event)
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=false, throw=true)
+                end
+
+                @test all(istaskdone.(tasks))
+
+                teardown(tasks, event)
+            end
+
+            @testset "failfast=true, throw=true" begin
+                tasks, event = create_tasks()
+                push!(tasks, Threads.@spawn error("Error"))
+
+                @test_throws CompositeException begin
+                    waitall(convert_tasks(tasks_type, tasks); failfast=true, throw=true)
+                end
+
+                @test !istaskdone(tasks[3])
+
+                teardown(tasks, event)
+            end
+        end
+    end
+end
+
+@testset "Base.Experimental.task_metrics" begin
+    t = Task(() -> nothing)
+    @test_throws "const field" t.metrics_enabled = true
+    is_task_metrics_enabled() = fetch(Threads.@spawn current_task().metrics_enabled)
+    @test !is_task_metrics_enabled()
+    try
+        @testset "once" begin
+            Base.Experimental.task_metrics(true)
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false)
+            @test !is_task_metrics_enabled()
+        end
+        @testset "multiple" begin
+            Base.Experimental.task_metrics(true)  # 1
+            Base.Experimental.task_metrics(true)  # 2
+            Base.Experimental.task_metrics(true)  # 3
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false) # 2
+            @test is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false) # 1
+            @test is_task_metrics_enabled()
+            @sync for i in 1:5                    # 0 (not negative)
+                Threads.@spawn Base.Experimental.task_metrics(false)
+            end
+            @test !is_task_metrics_enabled()
+            Base.Experimental.task_metrics(true)  # 1
+            @test is_task_metrics_enabled()
+        end
+    finally
+        while is_task_metrics_enabled()
+            Base.Experimental.task_metrics(false)
+        end
+    end
+end
+
+@testset "task time counters" begin
+    @testset "enabled" begin
+        try
+            Base.Experimental.task_metrics(true)
+            start_time = time_ns()
+            t = Threads.@spawn peakflops()
+            wait(t)
+            end_time = time_ns()
+            wall_time_delta = end_time - start_time
+            @test t.metrics_enabled
+            @test Base.Experimental.task_running_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t)
+            @test wall_time_delta > Base.Experimental.task_wall_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "disabled" begin
+        t = Threads.@spawn peakflops()
+        wait(t)
+        @test !t.metrics_enabled
+        @test isnothing(Base.Experimental.task_running_time_ns(t))
+        @test isnothing(Base.Experimental.task_wall_time_ns(t))
+    end
+    @testset "task not run" begin
+        t1 = Task(() -> nothing)
+        @test !t1.metrics_enabled
+        @test isnothing(Base.Experimental.task_running_time_ns(t1))
+        @test isnothing(Base.Experimental.task_wall_time_ns(t1))
+        try
+            Base.Experimental.task_metrics(true)
+            t2 = Task(() -> nothing)
+            @test t2.metrics_enabled
+            @test Base.Experimental.task_running_time_ns(t2) == 0
+            @test Base.Experimental.task_wall_time_ns(t2) == 0
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "task failure" begin
+        try
+            Base.Experimental.task_metrics(true)
+            t = Threads.@spawn error("this task failed")
+            @test_throws "this task failed" wait(t)
+            @test Base.Experimental.task_running_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) > 0
+            @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "direct yield(t)" begin
+        try
+            Base.Experimental.task_metrics(true)
+            start = time_ns()
+            t_outer = Threads.@spawn begin
+                t_inner = Task(() -> peakflops())
+                t_inner.sticky = false
+                # directly yield to `t_inner` rather calling `schedule(t_inner)`
+                yield(t_inner)
+                wait(t_inner)
+                @test Base.Experimental.task_running_time_ns(t_inner) > 0
+                @test Base.Experimental.task_wall_time_ns(t_inner) > 0
+                @test Base.Experimental.task_wall_time_ns(t_inner) >= Base.Experimental.task_running_time_ns(t_inner)
+            end
+            wait(t_outer)
+            delta = time_ns() - start
+            @test Base.Experimental.task_running_time_ns(t_outer) > 0
+            @test Base.Experimental.task_wall_time_ns(t_outer) > 0
+            @test Base.Experimental.task_wall_time_ns(t_outer) >= Base.Experimental.task_running_time_ns(t_outer)
+            @test Base.Experimental.task_wall_time_ns(t_outer) < delta
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "bad schedule" begin
+        try
+            Base.Experimental.task_metrics(true)
+            t1 = Task((x) -> 1)
+            schedule(t1) # MethodError
+            yield()
+            @assert istaskfailed(t1)
+            @test Base.Experimental.task_running_time_ns(t1) > 0
+            @test Base.Experimental.task_wall_time_ns(t1) > 0
+            foo(a, b) = a + b
+            t2 = Task(() -> (peakflops(); foo(wait())))
+            schedule(t2)
+            yield()
+            @assert istaskstarted(t1) && !istaskdone(t2)
+            schedule(t2, 1)
+            yield()
+            @assert istaskfailed(t2)
+            @test Base.Experimental.task_running_time_ns(t2) > 0
+            @test Base.Experimental.task_wall_time_ns(t2) > 0
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+    @testset "continuously update until task done" begin
+        try
+            Base.Experimental.task_metrics(true)
+            last_running_time = Ref(typemax(Int))
+            last_wall_time = Ref(typemax(Int))
+            t = Threads.@spawn begin
+                running_time = Base.Experimental.task_running_time_ns()
+                wall_time = Base.Experimental.task_wall_time_ns()
+                for _ in 1:5
+                    x = time_ns()
+                    while time_ns() < x + 100
+                    end
+                    new_running_time = Base.Experimental.task_running_time_ns()
+                    new_wall_time = Base.Experimental.task_wall_time_ns()
+                    @test new_running_time > running_time
+                    @test new_wall_time > wall_time
+                    running_time = new_running_time
+                    wall_time = new_wall_time
+                end
+                last_running_time[] = running_time
+                last_wall_time[] = wall_time
+            end
+            wait(t)
+            final_running_time = Base.Experimental.task_running_time_ns(t)
+            final_wall_time = Base.Experimental.task_wall_time_ns(t)
+            @test last_running_time[] < final_running_time
+            @test last_wall_time[] < final_wall_time
+            # ensure many more tasks are run to make sure the counters are
+            # not being updated after a task is done e.g. only when a new task is found
+            @sync for _ in 1:Threads.nthreads()
+                Threads.@spawn rand()
+            end
+            @test final_running_time == Base.Experimental.task_running_time_ns(t)
+            @test final_wall_time == Base.Experimental.task_wall_time_ns(t)
+        finally
+            Base.Experimental.task_metrics(false)
+        end
+    end
+end
+
+@testset "task time counters: lots of spawns" begin
+    using Dates
+    try
+        Base.Experimental.task_metrics(true)
+        # create more tasks than we have threads.
+        # - all tasks must have: cpu time <= wall time
+        # - some tasks must have: cpu time < wall time
+        # - summing across all tasks we must have: total cpu time <= available cpu time
+        n_tasks = 2 * Threads.nthreads(:default)
+        cpu_times = Vector{UInt64}(undef, n_tasks)
+        wall_times = Vector{UInt64}(undef, n_tasks)
+        start_time = time_ns()
+        @sync begin
+            for i in 1:n_tasks
+                start_time_i = time_ns()
+                task_i = Threads.@spawn peakflops()
+                Threads.@spawn begin
+                    wait(task_i)
+                    end_time_i = time_ns()
+                    wall_time_delta_i = end_time_i - start_time_i
+                    cpu_times[$i] = cpu_time_i = Base.Experimental.task_running_time_ns(task_i)
+                    wall_times[$i] = wall_time_i = Base.Experimental.task_wall_time_ns(task_i)
+                    # task should have recorded some cpu-time and some wall-time
+                    @test cpu_time_i > 0
+                    @test wall_time_i > 0
+                    # task cpu-time cannot be greater than its wall-time
+                    @test wall_time_i >= cpu_time_i
+                    # task wall-time must be less than our manually measured wall-time
+                    # between calling `@spawn` and returning from `wait`.
+                    @test wall_time_delta_i > wall_time_i
+                end
+            end
+        end
+        end_time = time_ns()
+        wall_time_delta = (end_time - start_time)
+        available_cpu_time = wall_time_delta * Threads.nthreads(:default)
+        summed_cpu_time = sum(cpu_times)
+        # total CPU time from all tasks can't exceed what was actually available.
+        @test available_cpu_time > summed_cpu_time
+        # some tasks must have cpu-time less than their wall-time, because we had more tasks
+        # than threads.
+        summed_wall_time = sum(wall_times)
+        @test summed_wall_time > summed_cpu_time
+    finally
+        Base.Experimental.task_metrics(false)
+    end
+end
+
 end # main testset
diff --git a/test/trimming/Makefile b/test/trimming/Makefile
new file mode 100644
index 0000000000000..d2da21eb71a88
--- /dev/null
+++ b/test/trimming/Makefile
@@ -0,0 +1,55 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This Makefile template requires the following variables to be set
+# in the environment or on the command-line:
+#   JULIA: path to julia[.exe] executable
+#   BIN:   binary build directory
+
+ifndef JULIA
+  $(error "Please pass JULIA=[path of target julia binary], or set as environment variable!")
+endif
+ifndef BIN
+  $(error "Please pass BIN=[path of build directory], or set as environment variable!")
+endif
+
+#=============================================================================
+# location of test source
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/../..)
+BUILDSCRIPT := $(BIN)/../share/julia/juliac-buildscript.jl
+include $(JULIAHOME)/Make.inc
+
+# get the executable suffix, if any
+EXE := $(suffix $(abspath $(JULIA)))
+
+# get compiler and linker flags. (see: `contrib/julia-config.jl`)
+JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
+CPPFLAGS_ADD :=
+CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
+LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) -ljulia-internal
+
+#=============================================================================
+
+release: hello$(EXE)
+
+hello.o: $(SRCDIR)/hello.jl $(BUILDSCRIPT)
+	$(JULIA) -t 1 -J $(BIN)/../lib/julia/sys.so --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --experimental --trim $(BUILDSCRIPT) $(SRCDIR)/hello.jl --output-exe true
+
+init.o: $(SRCDIR)/init.c
+	$(CC) -c -o $@ $< $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS)
+
+hello$(EXE): hello.o init.o
+	$(CC) -o $@ $(WHOLE_ARCHIVE) hello.o $(NO_WHOLE_ARCHIVE) init.o $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+check: hello$(EXE)
+	$(JULIA) --depwarn=error $(SRCDIR)/../runtests.jl $(SRCDIR)/trimming
+
+clean:
+	-rm -f hello$(EXE) init.o hello.o
+
+.PHONY: release clean check
+
+# Makefile debugging trick:
+# call print-VARIABLE to see the runtime value of any variable
+print-%:
+	@echo '$*=$($*)'
diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl
new file mode 100644
index 0000000000000..307bf820f325b
--- /dev/null
+++ b/test/trimming/hello.jl
@@ -0,0 +1,6 @@
+module MyApp
+Base.@ccallable function main()::Cint
+    println(Core.stdout, "Hello, world!")
+    return 0
+end
+end
diff --git a/test/trimming/init.c b/test/trimming/init.c
new file mode 100644
index 0000000000000..ea1b02f8e5c8f
--- /dev/null
+++ b/test/trimming/init.c
@@ -0,0 +1,9 @@
+#include <julia.h>
+
+__attribute__((constructor)) void static_init(void)
+{
+    if (jl_is_initialized())
+        return;
+    julia_init(JL_IMAGE_IN_MEMORY);
+    jl_exception_clear();
+}
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
new file mode 100644
index 0000000000000..dfacae7f8e531
--- /dev/null
+++ b/test/trimming/trimming.jl
@@ -0,0 +1,7 @@
+using Test
+
+exe_path = joinpath(@__DIR__, "hello"*splitext(Base.julia_exename())[2])
+
+@test readchomp(`$exe_path`) == "Hello, world!"
+
+@test filesize(exe_path) < filesize(unsafe_string(Base.JLOptions().image_file))/10
diff --git a/test/tuple.jl b/test/tuple.jl
index 71770b6a553c2..13af5ac992434 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -209,10 +209,12 @@ end
 
     @test eachindex((2,5,"foo")) === Base.OneTo(3)
     @test eachindex((2,5,"foo"), (1,2,5,7)) === Base.OneTo(4)
+
+    @test Core.Compiler.is_nothrow(Base.infer_effects(iterate, (Tuple{Int,Int,Int}, Int)))
 end
 
 
-@testset "element type" begin
+@testset "elelement/value/key types" begin
     @test eltype((1,2,3)) === Int
     @test eltype((1.0,2.0,3.0)) <: AbstractFloat
     @test eltype((true, false)) === Bool
@@ -227,6 +229,11 @@ end
         typejoin(Int, Float64, Bool)
     @test eltype(Tuple{Int, Missing}) === Union{Missing, Int}
     @test eltype(Tuple{Int, Nothing}) === Union{Nothing, Int}
+
+    @test valtype((1,2,3)) === eltype((1,2,3))
+    @test valtype(Tuple{Int, Missing}) === eltype(Tuple{Int, Missing})
+    @test keytype((1,2,3)) === Int
+    @test keytype(Tuple{Int, Missing}) === Int
 end
 
 @testset "map with Nothing and Missing" begin
@@ -525,6 +532,13 @@ end
     for n = 0:15
         @test ntuple(identity, Val(n)) == ntuple(identity, n)
     end
+
+    @test Base.infer_return_type(ntuple, Tuple{typeof(identity), Val}) == Tuple{Vararg{Int}}
+
+    # issue #55790
+    for n in 1:32
+        @test typeof(ntuple(identity, UInt64(n))) == NTuple{n, Int}
+    end
 end
 
 struct A_15703{N}
@@ -647,6 +661,8 @@ end
 
     f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
     @test @inferred(f()) == (9, 2:2, 3:3)
+
+    @test Base.return_types(Base.setindex, Tuple{Tuple,Nothing,Int}) == [Tuple]
 end
 
 @testset "inferable range indexing with constant values" begin
@@ -763,6 +779,12 @@ g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
 # issue #46049: setindex(::Tuple) regression
 @inferred Base.setindex((1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), 42, 1)
 
+# issue #50562
+f50562(r) = in(:i_backward, r[])
+r50562 = Ref((:b_back, :foofakgka, :i_backw))
+f50562(r50562)
+@test @allocated(f50562(r50562)) == 0
+
 # issue #47326
 function fun1_47326(args...)
     head..., tail = args
@@ -796,3 +818,35 @@ namedtup = (;a=1, b=2, c=3)
 @test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Vararg{Union{},1}}
 @test Tuple{} <: Tuple{Vararg{Union{},N}} where N
 @test !(Tuple{} >: Tuple{Vararg{Union{},N}} where N)
+
+@test Val{Tuple{T,T,T} where T} === Val{Tuple{Vararg{T,3}} where T}
+@test Val{Tuple{Vararg{T,4}} where T} === Val{Tuple{T,T,T,T} where T}
+@test Val{Tuple{Int64, Vararg{Int32,N}} where N} === Val{Tuple{Int64, Vararg{Int32}}}
+@test Val{Tuple{Int32, Vararg{Int64}}} === Val{Tuple{Int32, Vararg{Int64,N}} where N}
+
+@testset "from Pair, issue #52636" begin
+    pair = (1 => "2")
+    @test (1, "2") == @inferred Tuple(pair)
+    @test (1, "2") == @inferred Tuple{Int,String}(pair)
+end
+
+@testset "circshift" begin
+    t1 = (1, 2, 3, 4, 5)
+    t2 = (1, 'a', -7.0, 3)
+    t3 = ('a', 'b', 'c', 'd')
+    @test @inferred(Base.circshift(t1, 2)) == (4, 5, 1, 2, 3)
+    # The return type of mixed tuples with runtime shift cannot be inferred.
+    @test Base.circshift(t2, 3) == ('a', -7.0, 3, 1)
+    @test @inferred(Base.circshift(t3, 7)) == ('b', 'c', 'd', 'a')
+    @test @inferred(Base.circshift(t3, -1)) == ('b', 'c', 'd', 'a')
+    @test_throws MethodError circshift(t1, 'a')
+    @test Base.infer_return_type(circshift, Tuple{Tuple,Integer}) <: Tuple
+    @test Base.infer_return_type(circshift, Tuple{Tuple{Vararg{Any,10}},Integer}) <: Tuple{Vararg{Any,10}}
+    for len ∈ 0:5
+        v = 1:len
+        t = Tuple(v)
+        for shift ∈ -6:6
+            @test circshift(v, shift) == collect(circshift(t, shift))
+        end
+    end
+end
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 6638f06f4f358..b89eb097ee560 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -1,5 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+using InteractiveUtils
 make_value(::Type{T}, i::Integer) where {T<:Integer} = 3*i%T
 make_value(::Type{T},i::Integer) where {T<:AbstractFloat} = T(3*i)
 
@@ -120,3 +120,9 @@ for T in (Float64, Float32, Int64, Int32)
         @test b == result
     end
 end
+@testset "vecelement overalignment" begin
+    io = IOBuffer()
+    code_llvm(io,getindex, (Array{NTuple{5, VecElement{Float64}}, 1}, Int64), optimize=false)
+    ir = String(take!(io))
+    @test match(r"align 64", ir) === nothing
+end
diff --git a/test/version.jl b/test/version.jl
index 3723bb0f788e2..242b32c47cbdc 100644
--- a/test/version.jl
+++ b/test/version.jl
@@ -219,11 +219,14 @@ for major=0:3, minor=0:3, patch=0:3
     end
 end
 
-# banner
-import Base.banner
-io = IOBuffer()
-@test banner(io) === nothing
-@test length(String(take!(io))) > 50
+# VersionNumber has the promised fields
+let v = v"4.2.1-1.x+a.9"
+    @test v.major isa Integer
+    @test v.minor isa Integer
+    @test v.patch isa Integer
+    @test v.prerelease isa Tuple{Vararg{Union{Integer, AbstractString}}}
+    @test v.build isa Tuple{Vararg{Union{Integer, AbstractString}}}
+end
 
 # julia_version.h version test
 @test VERSION.major == ccall(:jl_ver_major, Cint, ())
diff --git a/test/worlds.jl b/test/worlds.jl
index b5a8f1c5449ac..268a6664571fb 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -2,8 +2,7 @@
 
 # tests for accurate updating of method tables
 
-using Base: get_world_counter
-tls_world_age() = ccall(:jl_get_tls_world_age, UInt, ())
+using Base: get_world_counter, tls_world_age
 @test typemax(UInt) > get_world_counter() == tls_world_age() > 0
 
 # test simple method replacement
@@ -258,15 +257,13 @@ end
 # avoid adding this to Base
 function equal(ci1::Core.CodeInfo, ci2::Core.CodeInfo)
     return ci1.code == ci2.code &&
-           ci1.codelocs == ci2.codelocs &&
+           ci1.debuginfo == ci2.debuginfo &&
            ci1.ssavaluetypes == ci2.ssavaluetypes &&
            ci1.ssaflags == ci2.ssaflags &&
            ci1.method_for_inference_limit_heuristics == ci2.method_for_inference_limit_heuristics &&
-           ci1.linetable == ci2.linetable &&
            ci1.slotnames == ci2.slotnames &&
            ci1.slotflags == ci2.slotflags &&
-           ci1.slottypes == ci2.slottypes &&
-           ci1.rettype == ci2.rettype
+           ci1.slottypes == ci2.slottypes
 end
 equal(p1::Pair, p2::Pair) = p1.second == p2.second && equal(p1.first, p2.first)
 
@@ -419,3 +416,87 @@ ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
     which(mc48954, (AbstractFloat, Int)),
     "jl_method_table_insert"
 ]
+
+# issue #50091 -- missing invoke edge affecting nospecialized dispatch
+module ExceptionUnwrapping
+@nospecialize
+unwrap_exception(@nospecialize(e)) = e
+unwrap_exception(e::Base.TaskFailedException) = e.task.exception
+@noinline function _summarize_task_exceptions(io::IO, exc, prefix = nothing)
+    _summarize_exception((;prefix,), io, exc)
+    nothing
+end
+@noinline function _summarize_exception(kws, io::IO, e::TaskFailedException)
+    _summarize_task_exceptions(io, e.task, kws.prefix)
+end
+# This is the overload that prints the actual exception that occurred.
+result = Bool[]
+@noinline function _summarize_exception(kws, io::IO, @nospecialize(exc))
+    global result
+    push!(result, unwrap_exception(exc) === exc)
+    if unwrap_exception(exc) !== exc # something uninferrable
+        return _summarize_exception(kws, io, unwrap_exception(exc))
+    end
+end
+struct X; x; end
+end
+let e = ExceptionUnwrapping.X(nothing)
+    @test ExceptionUnwrapping.unwrap_exception(e) === e
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [true]
+    empty!(ExceptionUnwrapping.result)
+end
+ExceptionUnwrapping.unwrap_exception(e::ExceptionUnwrapping.X) = e.x
+let e = ExceptionUnwrapping.X(nothing)
+    @test !(ExceptionUnwrapping.unwrap_exception(e) === e)
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [false, true]
+    empty!(ExceptionUnwrapping.result)
+end
+
+fshadow() = 1
+gshadow() = fshadow()
+@test fshadow() === 1
+@test gshadow() === 1
+fshadow_m1 = which(fshadow, ())
+fshadow() = 2
+fshadow() = 3
+@test fshadow() === 3
+@test gshadow() === 3
+fshadow_m3 = which(fshadow, ())
+Base.delete_method(fshadow_m1)
+@test fshadow() === 3
+@test gshadow() === 3
+Base.delete_method(fshadow_m3)
+fshadow_m2 = which(fshadow, ())
+@test fshadow() === 2
+@test gshadow() === 2
+Base.delete_method(fshadow_m2)
+@test_throws MethodError(fshadow, (), Base.tls_world_age()) gshadow()
+@test Base.morespecific(fshadow_m3, fshadow_m2)
+@test Base.morespecific(fshadow_m2, fshadow_m1)
+@test Base.morespecific(fshadow_m3, fshadow_m1)
+@test !Base.morespecific(fshadow_m2, fshadow_m3)
+
+# Generated functions without edges must have min_world = 1.
+# N.B.: If changing this, move this test to precompile and make sure
+# that the specialization survives revalidation.
+function generated_no_edges_gen(world, args...)
+    src = ccall(:jl_new_code_info_uninit, Ref{Core.CodeInfo}, ())
+    src.code = Any[Core.ReturnNode(nothing)]
+    src.slotnames = Symbol[:self]
+    src.slotflags = UInt8[0x00]
+    src.ssaflags = UInt32[0x00]
+    src.ssavaluetypes = 1
+    src.nargs = 1
+    src.min_world = first(Base._methods(generated_no_edges, Tuple{}, -1, world)).method.primary_world
+
+    return src
+end
+
+@eval function generated_no_edges()
+    $(Expr(:meta, :generated, generated_no_edges_gen))
+    $(Expr(:meta, :generated_only))
+end
+
+@test_throws ErrorException("Generated function result with `edges == nothing` and `max_world == typemax(UInt)` must have `min_world == 1`") generated_no_edges()
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 0000000000000..b9a9311946bc4
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,2 @@
+[default]
+extend-ignore-words-re = ["^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?$"]